pax_global_header00006660000000000000000000000064136074350750014524gustar00rootroot0000000000000052 comment=10949f528c5ffc5c3a2cad47fe16a802afb021be blis-0.6.1/000077500000000000000000000000001360743507500124615ustar00rootroot00000000000000blis-0.6.1/.appveyor.yml000066400000000000000000000036171360743507500151360ustar00rootroot00000000000000environment: matrix: - LIB_TYPE: shared CONFIG: auto CC: gcc THREADING: pthreads CBLAS: no - LIB_TYPE: static CONFIG: auto CC: clang THREADING: no - LIB_TYPE: shared CONFIG: x86_64 CC: clang THREADING: pthreads - LIB_TYPE: static CONFIG: auto CC: clang THREADING: openmp install: - set "PATH=C:\msys64\mingw64\bin;C:\msys64\bin;%PATH%" - if [%CC%]==[clang] set "PATH=C:\Program Files\LLVM\bin;%PATH%" - if [%CC%]==[clang] set "AR=llvm-ar" - if [%CC%]==[clang] set "AS=llvm-as" - if [%CC%]==[clang] call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64 build_script: - if [%LIB_TYPE%]==[shared] set "CONFIGURE_OPTS=%CONFIGURE_OPTS% --enable-shared --disable-static" - if [%LIB_TYPE%]==[static] set "CONFIGURE_OPTS=%CONFIGURE_OPTS% --disable-shared --enable-static" - if not [%CBLAS%]==[no] set "CONFIGURE_OPTS=%CONFIGURE_OPTS% --enable-cblas" - set RANLIB=echo - set LIBPTHREAD= - set "PATH=%PATH%;C:\blis\lib" - set "CFLAGS=-Wno-macro-redefined" - bash -lc "cd /c/projects/blis && ./configure %CONFIGURE_OPTS% --enable-threading=%THREADING% --enable-arg-max-hack --prefix=/c/blis %CONFIG%" - bash -lc "cd /c/projects/blis && mingw32-make -j4 V=1" - bash -lc "cd /c/projects/blis && mingw32-make install" - 7z a C:\blis.zip C:\blis - ps: Push-AppveyorArtifact C:\blis.zip test_script: - if [%LIB_TYPE%]==[shared] set "TEST_TARGET=checkblis-fast" - if [%LIB_TYPE%]==[static] set "TEST_TARGET=check" - bash -lc "cd /c/projects/blis && mingw32-make %TEST_TARGET% -j4 V=1" # Enable this to be able to login to the build worker. You can use the # `remmina` program in Ubuntu, use the login information that the line below # prints into the log. #on_finish: #- ps: $blockRdp = $true; iex ((new-object net.webclient).DownloadString('https://raw.githubusercontent.com/appveyor/ci/master/scripts/enable-rdp.ps1')) blis-0.6.1/.dir-locals.el000066400000000000000000000005271360743507500151160ustar00rootroot00000000000000;; First (minimal) attempt at configuring Emacs CC mode for the BLIS ;; layout requirements. ((c-mode . ((c-file-style . "stroustrup") (c-basic-offset . 4) (comment-start . "// ") (comment-end . "") (indent-tabs-mode . t) (tab-width . 4) (parens-require-spaces . nil)))) blis-0.6.1/.gitignore000066400000000000000000000013071360743507500144520ustar00rootroot00000000000000# -- generic files to ignore -- # emacs backup files *~ # vim backup files *.swp # NFS file .nfs* # -- compiler-related -- # object files # NOTE: This will result in git also exluding the top-level obj directory # since its only contents are .o files. *.o # static library archives # NOTE: This will result in git also exluding the top-level lib directory # since its only contents are .a files. *.a *.so *.so.* # test executables *.x *.pexe *.nexe *.js # link map files *.map # -- build system files -- config.mk bli_config.h # -- monolithic headers -- include/*/*.h # -- makefile fragments -- .fragment.mk # -- misc. -- # BLIS testsuite output file output.testsuite # BLAS test output files out.* blis-0.6.1/.travis.yml000066400000000000000000000052441360743507500145770ustar00rootroot00000000000000language: c sudo: required dist: trusty matrix: include: # full testsuite (all tests except for mixed datatype) - os: linux compiler: gcc env: OOT=0 TEST=1 SDE=0 THR="none" CONF="auto" # mixed-datatype testsuite (gemm_nn only) - os: linux compiler: gcc env: OOT=0 TEST=MD SDE=0 THR="none" CONF="auto" # salt testsuite (fast set of operations+parameters) - os: linux compiler: gcc env: OOT=0 TEST=SALT SDE=0 THR="none" CONF="auto" # test x86_64 ukrs with SDE - os: linux compiler: gcc env: OOT=0 TEST=0 SDE=1 THR="none" CONF="x86_64" # openmp build - os: linux compiler: gcc env: OOT=0 TEST=0 SDE=0 THR="openmp" CONF="auto" # pthreads build - os: linux compiler: gcc env: OOT=0 TEST=0 SDE=0 THR="pthreads" CONF="auto" # out-of-tree build - os: linux compiler: gcc env: OOT=1 TEST=0 SDE=0 THR="none" CONF="auto" # clang build - os: linux compiler: clang env: OOT=0 TEST=0 SDE=0 THR="none" CONF="auto" # macOS with system compiler (clang) - os: osx compiler: clang env: OOT=0 TEST=1 SDE=0 THR="none" CONF="auto" # cortexa15 build and fast testsuite (qemu) - os: linux compiler: arm-linux-gnueabihf-gcc env: OOT=0 TEST=FAST SDE=0 THR="none" CONF="cortexa15" \ PACKAGES="gcc-arm-linux-gnueabihf qemu-system-arm qemu-user" \ TESTSUITE_WRAPPER="qemu-arm -cpu cortex-a15 -L /usr/arm-linux-gnueabihf/" # cortexa57 build and fast testsuite (qemu) - os: linux compiler: aarch64-linux-gnu-gcc env: OOT=0 TEST=FAST SDE=0 THR="none" CONF="cortexa57" \ PACKAGES="gcc-aarch64-linux-gnu qemu-system-arm qemu-user" \ TESTSUITE_WRAPPER="qemu-aarch64 -L /usr/aarch64-linux-gnu/" install: - if [ "$TRAVIS_OS_NAME" = "linux" ]; then sudo rm -f /usr/bin/as; fi - if [ "$TRAVIS_OS_NAME" = "linux" ]; then sudo ln -s /usr/lib/binutils-2.26/bin/as /usr/bin/as; fi - if [ "$TRAVIS_OS_NAME" = "linux" ]; then sudo rm -f /usr/bin/ld; fi - if [ "$TRAVIS_OS_NAME" = "linux" ]; then sudo ln -s /usr/lib/binutils-2.26/bin/ld /usr/bin/ld; fi - if [ "$CC" = "gcc" ] && [ "$TRAVIS_OS_NAME" = "linux" ]; then export CC="gcc-6"; fi - if [ -n "$PACKAGES" ]; then sudo apt-get install -y $PACKAGES; fi addons: apt: sources: - ubuntu-toolchain-r-test packages: - gcc-6 - binutils-2.26 - clang script: - export DIST_PATH=. - pwd - if [ $OOT -eq 1 ]; then export DIST_PATH=`pwd`; mkdir ../oot; cd ../oot; chmod -R a-w $DIST_PATH; fi - pwd - $DIST_PATH/configure -t $THR CC=$CC $CONF - pwd - ls -l - $CC --version - make -j 2 - if [ "$TEST" != "0" ]; then travis_wait 30 $DIST_PATH/travis/do_testsuite.sh; fi - if [ "$SDE" = "1" ]; then travis_wait 30 $DIST_PATH/travis/do_sde.sh; fi blis-0.6.1/CHANGELOG000066400000000000000000031707341360743507500137120ustar00rootroot00000000000000commit 18c876b989fd0dcaa27becd14e4f16bdac7e89b3 (HEAD -> master, tag: 0.6.0) Author: Field G. Van Zee Date: Mon Jun 3 18:37:19 2019 -0500 Version file update (0.6.0) commit 0f1b3bf49eb593ca7bb08b68a7209f7cd550f912 (origin/master, origin/HEAD) Author: Field G. Van Zee Date: Mon Jun 3 18:35:19 2019 -0500 ReleaseNotes.md update in advance of next version. Details: - Updated ReleaseNotes.md in preparation for next version. - CREDITS file update. commit 27da2e8400d900855da0d834b5417d7e83f21de1 Author: Field G. Van Zee Date: Mon Jun 3 17:14:56 2019 -0500 Minor edits to docs/PerformanceSmall.md. Details: - Added performance analysis to "Comments" section of both Kaby Lake and Epyc sections. - Added emphasis to certain passages. commit 09ba05c6f87efbaadf085497dc137845f16ee9c5 Author: Field G. Van Zee Date: Mon Jun 3 16:53:19 2019 -0500 Added sup performance graphs/document to 'docs'. Details: - Added a new markdown document, docs/PerformanceSmall.md, which publishes new performance graphs for Kaby Lake and Epyc showcasing the new BLIS sup (small/skinny/unpacked) framework logic and kernels. For now, only single-threaded dgemm performance is shown. - Reorganized graphs in docs/graphs into docs/graphs/large, with new graphs being placed in docs/graphs/sup. - Updates to scripts in test/sup/octave, mostly to allow decent output in both GNU octave and Matlab. - Updated README.md to mention and refer to the new PerformanceSmall.md document. commit 6bf449cc6941734748034de0e9af22b75f1d6ba1 Merge: abd8a9fa a4e8801d Author: Field G. Van Zee Date: Fri May 31 17:42:40 2019 -0500 Merge branch 'amd' commit a4e8801d08d81fa42ebea6a05a990de8dcedc803 (origin/amd, amd) Author: Field G. Van Zee Date: Fri May 31 17:30:51 2019 -0500 Increased MT sup threshold for double to 201. Details: - Fine-tuned the double-precision real MT threshold (which controls whether the sup implementation kicks for smaller m dimension values) from 180 to 201 for haswell and 180 to 256 for zen. - Updated octave scripts in test/sup/octave to include a seventh column to display performance for m = n = k. commit abd8a9fa7df4569aa2711964c19888b8e248901f (origin/pfhp) Author: Field G. Van Zee Date: Tue May 28 12:49:44 2019 -0500 Inadvertantly hidden xerbla_() in blastest (#313). Details: - Attempted a fix to issue #313, which reports that when building only a shared library (ie: static library build is disabled), running the BLAS test drivers can fail because those drivers provide their own local version of xerbla_() as a clever (albeit still rather hackish) way of checking the error codes that result from the individual tests. This local xerbla_() function is never found at link-time because the BLAS test drivers' Makefile imports BLIS compilation flags via the get-user-cflags-for() function, which currently conveys the -fvisibility=hidden flag, which hides symbols unless they are explicitly annotated for export. The -fvisibility=hidden flag was only ever intended for use when building BLIS (not for applications), and so the attempted solution here is to omit the symbol export flag(s) from get-user-cflags-for() by storing the symbol export flag(s) to a new BULID_SYMFLAGS variable instead of appending it to the subconfigurations' CMISCFLAGS variable (which is returned by every get-*-cflags-for() function). Thanks to M. Zhou for reporting this issue and also to Isuru Fernando for suggesting the fix. - Renamed BUILD_FLAGS to BUILD_CPPFLAGS to harmonize with the newly created BUILD_SYMFLAGS. - Fixed typo in entry for --export-shared flag in 'configure --help' text. commit 755730608d923538273a90c48bfdf77571f86519 Author: Field G. Van Zee Date: Thu May 23 17:34:36 2019 -0500 Minor rewording of language around mt env. vars. commit ba31abe73c97c16c78fffc59a215761b8d9fd1f6 Author: Field G. Van Zee Date: Thu May 23 14:59:53 2019 -0500 Added BLIS theading info to Performance.md. Details: - Documented the BLIS environment variables that were set (e.g. BLIS_JC_NT, BLIS_IC_NT, BLIS_JR_NT) for each machine and threading configuration in order to achieve the parallelism reported on in docs/Performance.md. commit cb788ffc89cac03b44803620412a5e83450ca949 Author: Field G. Van Zee Date: Thu May 23 13:00:53 2019 -0500 Increased MT sup threshold for double to 180. Details: - Increased the double-precision real MT threshold (which controls whether the sup implementation kicks for smaller m dimension values) from 80 to 180, and this change was made for both haswell and zen subconfigurations. This is less about the m dimension in particular and more about facilitating a smoother performance transition when m = n = k. commit 057f5f3d211e7513f457ee6ca6c9555d00ad1e57 Author: Field G. Van Zee Date: Thu May 23 12:51:17 2019 -0500 Minor build system housekeeping. Details: - Commented out redundant setting of LIBBLIS_LINK within all driver- level Makefiles. This variable is already set within common.mk, and so the only time it should be overridden is if the user wants to link to a different copy of libblis. - Very minor changes to build/gen-make-frags/gen-make-frag.sh. - Whitespace and inconsequential quoting change to configure. - Moved top-level 'windows' directory into a new 'attic' directory. commit 32392cfc72af7f42da817a129748349fb1951346 Author: Jeff Hammond Date: Tue May 14 15:52:30 2019 -0400 add info about CXX in configure (#311) commit fa7e6b182b8365465ade178b0e4cd344ff6f6460 Author: Field G. Van Zee Date: Wed May 1 19:13:00 2019 -0500 Define _POSIX_C_SOURCE in bli_system.h. Details: - Added #ifndef _POSIX_C_SOURCE #define _POSIX_C_SOURCE 200809L #endif to bli_system.h so that an application that uses BLIS (specifically, an application that #includes blis.h) does not need to remember to #define the macro itself (either on the command line or in the code that includes blis.h) in order to activate things like the pthreads. Thanks to Christos Psarras for reporting this issue and suggesting this fix. - Commented out #include in bli_system.h, since I don't think this header is used/needed anymore. - Comment update to function macro for bli_?normiv_unb_var1() in frame/util/bli_util_unb_var1.c. commit 3df84f1b5d5e1146bb01bfc466ac20c60a9cc859 Author: Field G. Van Zee Date: Sat Apr 27 21:27:32 2019 -0500 Minor bugfixes in sup dgemm implementation. Details: - Fixed an obscure but in the bli_dgemmsup_rv_haswell_asm_5x8n() kernel that only affected the beta == 0, column-storage output case. Thanks to the BLAS test drivers for catching this bug. - Previously, bli_gemmsup_ref_var1n() and _var2m() were returning if k = 0, when the correct action would be to scale by beta (and then return). Thanks to the BLAS test drivers to catching this bug. - Changed the sup threshold behavior such that the sup implementation only kicks in if a matrix dimension is strictly less than (rather than less than or equal to) the threshold in question. - Initialize all thresholds to zero (instead of 10) by default in ref_kernels/bli_cntx_ref.c. This, combined with the above change to threshold testing means that calls to BLIS or BLAS with one or more matrix dimensions of zero will no longer trigger the sup implementation. - Added disabled debugging output to frame/3/bli_l3_sup.c (for future use, perhaps). commit ecbdd1c42dcebfecd729fe351e6bb0076aba7d81 Author: Field G. Van Zee Date: Sat Apr 27 19:38:11 2019 -0500 Ceased use of BLIS_ENABLE_SUP_MR/NR_EXT macros. Details: - Removed already limited use of the BLIS_ENABLE_SUP_MR_EXT and BLIS_ENABLE_SUP_NR_EXT macros in bli_gemmsup_ref_var1n() and bli_gemmsup_ref_var2m(). Their purpose was merely to avoid a long conditional that would determine whether to allow the last iteration to be merged with the second-to-last iteration. Functionally, the macros were not needed, and they ended up causing problems when building configuration families such as intel64 and x86_64. commit aa8a6bec3036a41e1bff2034f8ef6766a704ec49 Author: Field G. Van Zee Date: Sat Apr 27 18:53:33 2019 -0500 Fixed typo in --disable-sup-handling macro guard. Details: - Fixed an incorrectly-named macro guard that is intended to allow disabling of the sup framework via the configure option --disable-sup-handling. In this case, the preprocessor macro, BLIS_DISABLE_SUP_HANDLING, was still named by its name from an older uncommitted version of the code (BLIS_DISABLE_SM_HANDLING). commit b9c9f03502c78a63cfcc21654b06e9089e2a3822 Author: Field G. Van Zee Date: Sat Apr 27 18:44:50 2019 -0500 Implemented gemm on skinny/unpacked matrices. Details: - Implemented a new sub-framework within BLIS to support the management of code and kernels that specifically target matrix problems for which at least one dimension is deemed to be small, which can result in long and skinny matrix operands that are ill-suited for the conventional level-3 implementations in BLIS. The new framework tackles the problem in two ways. First the stripped-down algorithmic loops forgo the packing that is famously performed in the classic code path. That is, the computation is performed by a new family of kernels tailored specifically for operating on the source matrices as-is (unpacked). Second, these new kernels will typically (and in the case of haswell and zen, do in fact) include separate assembly sub-kernels for handling of edge cases, which helps smooth performance when performing problems whose m and n dimension are not naturally multiples of the register blocksizes. In a reference to the sub-framework's purpose of supporting skinny/unpacked level-3 operations, the "sup" operation suffix (e.g. gemmsup) is typically used to denote a separate namespace for related code and kernels. NOTE: Since the sup framework does not perform any packing, it targets row- and column-stored matrices A, B, and C. For now, if any matrix has non-unit strides in both dimensions, the problem is computed by the conventional implementation. - Implemented the default sup handler as a front-end to two variants. bli_gemmsup_ref_var2() provides a block-panel variant (in which the 2nd loop around the microkernel iterates over n and the 1st loop iterates over m), while bli_gemmsup_ref_var1() provides a panel-block variant (2nd loop over m and 1st loop over n). However, these variants are not used by default and provided for reference only. Instead, the default sup handler calls _var2m() and _var1n(), which are similar to _var2() and _var1(), respectively, except that they defer to the sup kernel itself to iterate over the m and n dimension, respectively. In other words, these variants rely not on microkernels, but on so-called "millikernels" that iterate along m and k, or n and k. The benefit of using millikernels is a reduction of function call and related (local integer typecast) overhead as well as the ability for the kernel to know which micropanel (A or B) will change during the next iteration of the 1st loop, which allows it to focus its prefetching on that micropanel. (In _var2m()'s millikernel, the upanel of A changes while the same upanel of B is reused. In _var1n()'s, the upanel of B changes while the upanel of A is reused.) - Added a new configure option, --[en|dis]able-sup-handling, which is enabled by default. However, the default thresholds at which the default sup handler is activated are set to zero for each of the m, n, and k dimensions, which effectively disables the implementation. (The default sup handler only accepts the problem if at least one dimension is smaller than or equal to its corresponding threshold. If all dimensions are larger than their thresholds, the problem is rejected by the sup front-end and control is passed back to the conventional implementation, which proceeds normally.) - Added support to the cntx_t structure to track new fields related to the sup framework, most notably: - sup thresholds: the thresholds at which the sup handler is called. - sup handlers: the address of the function to call to implement the level-3 skinny/unpacked matrix implementation. - sup blocksizes: the register and cache blocksizes used by the sup implementation (which may be the same or different from those used by the conventional packm-based approach). - sup kernels: the kernels that the handler will use in implementing the sup functionality. - sup kernel prefs: the IO preference of the sup kernels, which may differ from the preferences of the conventional gemm microkernels' IO preferences. - Added a bool_t to the rntm_t structure that indicates whether sup handling should be enabled/disabled. This allows per-call control of whether the sup implementation is used, which is useful for test drivers that wish to switch between the conventional and sup codes without having to link to different copies of BLIS. The corresponding accessor functions for this new bool_t are defined in bli_rntm.h. - Implemented several row-preferential gemmsup kernels in a new directory, kernels/haswell/3/sup. These kernels include two general implementation types--'rd' and 'rv'--for the 6x8 base shape, with two specialized millikernels that embed the 1st loop within the kernel itself. - Added ref_kernels/3/bli_gemmsup_ref.c, which provides reference gemmsup microkernels. NOTE: These microkernels, unlike the current crop of conventional (pack-based) microkernels, do not use constant loop bounds. Additionally, their inner loop iterates over the k dimension. - Defined new typedef enums: - stor3_t: captures the effective storage combination of the level-3 problem. Valid values are BLIS_RRR, BLIS_RRC, BLIS_RCR, etc. A special value of BLIS_XXX is used to denote an arbitrary combination which, in practice, means that at least one of the operands is stored according to general stride. - threshid_t: captures each of the three dimension thresholds. - Changed bli_adjust_strides() in bli_obj.c so that bli_obj_create() can be passed "-1, -1" as a lazy request for row storage. (Note that "0, 0" is still accepted as a lazy request for column storage.) - Added support for various instructions to bli_x86_asm_macros.h, including imul, vhaddps/pd, and other instructions related to integer vectors. - Disabled the older small matrix handling code inserted by AMD in bli_gemm_front.c, since the sup framework introduced in this commit is intended to provide a more generalized solution. - Added test/sup directory, which contains standalone performance test drivers, a Makefile, a runme.sh script, and an 'octave' directory containing scripts compatible with GNU Octave. (They also may work with matlab, but if not, they are probably close to working.) - Reinterpret the storage combination string (sc_str) in the various level-3 testsuite modules (e.g. src/test_gemm.c) so that the order of each matrix storage char is "cab" rather than "abc". - Comment updates in level-3 BLAS API wrappers in frame/compat. commit 0d549ceda822833bec192bbf80633599620c15d9 Author: Isuru Fernando Date: Sat Apr 27 22:56:02 2019 +0000 make unix friendly archives on appveyor (#310) commit 945928c650051c04d6900c7f4e9e29cd0e5b299f Merge: 663f6629 74e513eb Author: Field G. Van Zee Date: Wed Apr 17 15:58:56 2019 -0500 Merge branch 'amd' of github.com:flame/blis into amd commit 74e513eb6a6787a925d43cd1500277d54d86ab8f (origin/dev) Author: Field G. Van Zee Date: Wed Apr 17 13:34:44 2019 -0500 Support row storage in Eigen gemm test/3 driver. Details: - Added preprocessor branches to test/3/test_gemm.c to explicitly support row-stored matrices. Column-stored matrices are also still supported (and is the default for now). (This is mainly residual work leftover from initial integration of Eigen into the test drivers, so if we ever want to test Eigen with row-stored matrices, the code will be ready to use, even if it is not yet integrated into the Makefile in test/3.) commit b5d457fae9bd75c4ca67f7bc7214e527aa248127 Author: Field G. Van Zee Date: Tue Apr 16 12:50:01 2019 -0500 Applied forgotten variable rename from 89a70cc. Details: - Somehow the variable name change (root_file_name -> root_inputname) in flatten-headers.py mentioned in the commit log entry for 89a70cc didn't make it into the actual commit. This commit applies that change. commit 89a70cccf869333147eb2559cdfa5a23dc915824 Author: Field G. Van Zee Date: Thu Apr 11 18:33:08 2019 -0500 GNU-like handling of installation prefix et al. Details: - Changed the default installation prefix from $HOME/lib to /usr/local. - Modified the way configure internally handles the prefix, libdir, includedir, and sharedir (and also added an --exec-prefix option). The defaults to these variables are set as follows: prefix: /usr/local exec_prefix: ${prefix} libdir: ${exec_prefix}/lib includedir: ${prefix}/include sharedir: ${prefix}/share The key change, aside from the addition of exec_prefix and its use to define the default to libdir, is that the variables are substituted into config.mk with quoting that delays evaluation, meaning the substituted values may contain unevaluated references to other variables (namely, ${prefix} and ${exec_prefix}). This more closely follows GNU conventions, including those used by GNU autoconf, and also allows make to override any one of the variables *after* configure has already been run (e.g. during 'make install'). - Updates to build/config.mk.in pursuant to above changes. - Updates to output of 'configure --help' pursuant to above changes. - Updated docs/BuildSystem.md to reflect the new default installation prefix, as well as mention EXECPREFIX and SHAREDIR. - Changed the definitions of the UNINSTALL_OLD_* variables in the top-level Makefile to use $(wildcard ...) instead of 'find'. This was motivated by the new way of handling prefix and friends, which leads to the 'find' command being run on /usr/local (by default), which can take a while almost never yielding any benefit (since the user will very rarely use the uninstall-old targets). - Removed periods from the end of descriptive output statements (i.e., non-verbose output) since those statements often end with file or directory paths, which get confusing to read when puctuated by a period. - Trival change to 'make showconfig' output. - Removed my name from 'configure --help'. (Many have contributed to it over the years.) - In configure script, changed the default state of threading_model variable from 'no' to 'off' to match that of debug_type, where there are similarly more than two valid states. ('no' is still accepted if given via the --enable-debug= option, though it will be standardized to 'off' prior to config.mk being written out.) - Minor variable name change in flatten-headers.py that was intended for 32812ff. - CREDITS file update. commit 32812ff5aba05d34c421fe1024a61f3e2d5e7052 Author: Field G. Van Zee Date: Tue Apr 9 12:20:19 2019 -0500 Minor bugfix to flatten-headers.py. Details: - Fixed a minor bug in flatten-headers.py whereby the script, upon encountering a #include directive for the root header file, would erroneously recurse and inline the conents of that root header. The script has been modified to avoid recursion into any headers that share the same name as the root-level header that was passed into the script. (Note: this bug didn't actually manifest in BLIS, so it's merely a precaution for usage of flatten-headers.py in other contexts.) commit bec90e0b6aeb3c9b19589c2b700fda2d66f6ccdf Author: Field G. Van Zee Date: Tue Apr 2 17:45:13 2019 -0500 Minor update to docs/HardwareSupport.md document. Details: - Added more details and clarifying language to implications of 1m and the recycling of microkernels between microarchitectures. commit 89cd650e7be01b59aefaa85885a3ea78970351e4 Author: Field G. Van Zee Date: Tue Apr 2 17:23:55 2019 -0500 Use void_fp for function pointers instead of void*. Change void*-typed function pointers to void_fp. - Updated all instances of void* variables that store function pointers to variables of a new type, void_fp. Originally, I wanted to define the type of void_fp as "void (*void_fp)( void )"--that is, a pointer to a function with no return value and no arguments. However, once I did this, I realized that gcc complains with incompatible pointer type (-Wincompatible-pointer-types) warnings every time any such a pointer is being assigned to its final, type-accurate function pointer type. That is, gcc will silently typecast a void* to another defined function pointer type (e.g. dscalv_ker_ft) during an assignment from the former to the latter, but the same statement will trigger a warning when typecasting from a void_fp type. I suspect an explicit typecast is needed in order to avoid the warning, which I'm not willing to insert at this time. - Added a typedef to bli_type_defs.h defining void_fp as void*, along with a commented-out version of the aborted definition described above. (Note that POSIX requires that void* and function pointers be interchangeable; it is the C standard that does not provide this guarantee.) - Comment updates to various _oapi.c files. commit ffce3d632b284eb52474036096815ec38ca8dd5f Author: Field G. Van Zee Date: Tue Apr 2 14:40:50 2019 -0500 Renamed armv8a gemm kernel filename. Details: - Renamed kernels/armv8a/3/bli_gemm_armv8a_opt_4x4.c to kernels/armv8a/3/bli_gemm_armv8a_asm_d6x8.c. This follows the naming convention used by other kernel sets, most notably haswell. commit 77867478af02144544b4e7b6df5d54d874f3f93b Author: Isuru Fernando Date: Tue Apr 2 13:33:11 2019 -0500 Use pthreads on MinGW and Cygwin (#307) commit 7bc75882f02ce3470a357950878492e87e688cec Author: Field G. Van Zee Date: Thu Mar 28 17:40:50 2019 -0500 Updated Eigen results in docs/graphs with 3.3.90. Details: - Updated the level-3 performance graphs in docs/graphs with new Eigen results, this time using a development version cloned from their git mirror on March 27, 2019 (version 3.3.90). Performance is improved over 3.3.7, though still noticeably short of BLIS/MKL in most cases. - Very minor updates to docs/Performance.md and matlab scripts in test/3/matlab. commit 20ea7a1217d3833db89a96158c42da2d6e968ed8 Author: Field G. Van Zee Date: Wed Mar 27 18:09:17 2019 -0500 Minor text updates (Eigen) to docs/Performance.md. Details: - Added/updated a few more details, mostly regarding Eigen. commit bfb7e1bc6af468e4ff22f7e27151ea400dcd318a Merge: 044df950 2c85e1dd Author: Field G. Van Zee Date: Wed Mar 27 17:58:19 2019 -0500 Merge branch 'dev' commit 2c85e1dd9d5d84da7228ea4ae6deec56a89b3a8f (dev) Author: Field G. Van Zee Date: Wed Mar 27 16:29:51 2019 -0500 Added Eigen results to performance graphs. Details: - Updated the Haswell, SkylakeX, and Epyc performance graphs in docs/graphs to report on Eigen implementations, where applicable. Specifically, Eigen implements all level-3 operations sequentially, however, of those operations it only provides multithreaded gemm. Thus, mt results for symm/hemm, syrk/herk, trmm, and trsm are omitted. Thanks to Sameer Agarwal for his help configuring and using Eigen. - Updated docs/Performance.md to note the new implementation tested. - CREDITS file update. commit bfac7e385f8061f2e6591de208b0acf852f04580 Author: Field G. Van Zee Date: Wed Mar 27 16:04:48 2019 -0500 Added ability to plot with Eigen in test/3/matlab. Details: - Updated matlab scripts in test/3/matlab to optionally plot/display Eigen performance curves. Whether Eigen is plotted is determined by a new boolean function parameter, with_eigen. - Updated runme.m scratchpad to reflect the latest invocations of the plot_panel_4x5() function (with Eigen plotting enabled). commit 67535317b9411c90de7fa4cb5b0fdb8f61fdcd79 Author: Field G. Van Zee Date: Wed Mar 27 13:32:18 2019 -0500 Fixed mislabeled eigen output from test/3 drivers. Details: - Fixed the Makefile in test/3 so that it no longer incorrectly labels the matlab output variables from Eigen-linked hemm, herk, trmm, and trsm driver output as "vendor". (The gemm drivers were already correctly outputing matlab variables containing the "eigen" label.) commit 044df9506f823643c0cdd53e81ad3c27a9f9d4ff Author: Isuru Fernando Date: Wed Mar 27 12:39:31 2019 -0500 Test with shared on windows (#306) Export macros can't support both shared and static at the same time. When blis is built with both shared and static, headers assume that shared is used at link time and dllimports the symbols with __imp_ prefix. To use the headers with static libraries a user can give -DBLIS_EXPORT= to import the symbol without the __imp_ prefix commit 5e6b160c8a85e5e23bab0f64958a8acf4918a4ed Author: Field G. Van Zee Date: Tue Mar 26 19:10:59 2019 -0500 Link to Eigen BLAS for non-gemm drivers in test/3. Details: - Adjusted test/3/Makefile so that the test drivers are linked against Eigen's BLAS library for hemm, herk, trmm, and trsm. We have to do this since Eigen's headers don't define implementations to the standard BLAS APIs. - Simplified #included headers in hemm, herk, trmm, and trsm source driver files, since nothing specific to Eigen is needed at compile-time for those operations. commit e593221383aae19dfdc3f30539de80ed05cfec7f Merge: 92fb9c87 c208b9dc Author: Field G. Van Zee Date: Tue Mar 26 15:51:45 2019 -0500 Merge branch 'master' into dev commit 92fb9c87bf88b9f9c401eeecd9aa9c3521bc2adb Author: Field G. Van Zee Date: Tue Mar 26 15:43:23 2019 -0500 Add more support for Eigen to drivers in test/3. Details: - Use compile-time implementations of Eigen in test_gemm.c via new EIGEN cpp macro, defined on command line. (Linking to Eigen's BLAS library is not necessary.) However, as of Eigen 3.3.7, Eigen only parallelizes the gemm operation and not hemm, herk, trmm, trsm, or any other level-3 operation. - Fixed a bug in trmm and trsm drivers whereby the wrong function (bli_does_trans()) was being called to determine whether the object for matrix A should be created for a left- or right-side case. This was corrected by changing the function to bli_is_left(), as is done in the hemm driver. - Added support for running Eigen test drivers from runme.sh. commit c208b9dc46852c877197d53b6dd913a046b6ebb6 Author: Isuru Fernando Date: Mon Mar 25 13:03:44 2019 -0500 Fix clang version detection (#305) clang -dumpversion gives 4.2.1 for all clang versions as clang was originally compatible with gcc 4.2.1 Apple clang version and clang version are two different things and the real clang version cannot be deduced from apple clang version programatically. Rely on wikipedia to map apple clang to clang version Also fixes assembly detection with clang clang 3.8 can't build knl as it doesn't recognize zmm0 commit feefcab4427a75b0b55af215486b85abcda314f7 Author: Field G. Van Zee Date: Thu Mar 21 18:11:20 2019 -0500 Allow disabling of BLAS prototypes at compile-time. Details: - Modified bli_blas.h so that: - By default, if the BLAS layer is enabled at configure-time, BLAS prototypes are also enabled within blis.h; - But if the user #defines BLIS_DISABLE_BLAS_DEFS prior to including blis.h, BLAS prototypes are skipped over entirely so that, for example, the application or some other header pulled in by the application may prototype the BLAS functions without causing any duplication. - Updated docs/BuildSystem.md to document the feature above, and related text. commit 288843b06d91e1b4fade337959aef773090bd1c9 Author: Field G. Van Zee Date: Wed Mar 20 17:52:23 2019 -0500 Added Eigen support to test/3 Makefile, runme.sh. Details: - Added targets to test/3/Makefile that link against a BLAS library build by Eigen. It appears, however, that Eigen's BLAS library does not support multithreading. (It may be that multithreading is only available when using the native C++ APIs.) - Updated runme.sh with a few Eigen-related tweaks. - Minor tweaks to docs/Performance.md. commit 153e0be21d9ff413e370511b68d553dd02abada9 Author: Field G. Van Zee Date: Tue Mar 19 17:53:18 2019 -0500 More minor tweaks to docs/Performance.md. Details: - Defined GFLOPS as billions of floating-point operations per second, and reworded the sentence after about normalization. commit 05c4e42642cc0c8dbfa94a6c21e975ac30c0517a Author: Field G. Van Zee Date: Tue Mar 19 17:07:20 2019 -0500 CHANGELOG update (0.5.2) commit 9204cd0cb0cc27790b8b5a2deb0233acd9edeb9b (tag: 0.5.2) Author: Field G. Van Zee Date: Tue Mar 19 17:07:18 2019 -0500 Version file update (0.5.2) commit 64560cd9248ebf4c02c4a1eeef958e1ca434e510 Author: Field G. Van Zee Date: Tue Mar 19 17:04:20 2019 -0500 ReleaseNotes.md update in advance of next version. Details: - Updated ReleaseNotes.md in preparation for next version. commit ab5ad557ea69479d487c9a3cb516f43fa1089863 Author: Field G. Van Zee Date: Tue Mar 19 16:50:41 2019 -0500 Very minor tweaks to Performance.md. commit 03c4a25e1aa8a6c21abbb789baa599ac419c3641 Author: Field G. Van Zee Date: Tue Mar 19 16:47:15 2019 -0500 Minor fixes to docs/Performance.md. Details: - Fixed some incorrect labels associated with the pdf/png graphs, apparently the result of copy-pasting. commit fe6dd8b132f39ecb8893d54cd8e75d4bbf6dab83 Author: Field G. Van Zee Date: Tue Mar 19 16:30:23 2019 -0500 Fixed broken section links in docs/Performance.md. Details: - Fixed a few broken section links in the Contents section. commit 913cf97653f5f9a40aa89a5b79e2b0a8882dd509 Author: Field G. Van Zee Date: Tue Mar 19 16:15:24 2019 -0500 Added docs/Performance.md and docs/graphs subdir. Details: - Added a new markdown document, docs/Performance.md, which reports performance of a representative set of level-3 operations across a variety of hardware architectures, comparing BLIS to OpenBLAS and a vendor library (MKL on Intel/AMD, ARMPL on ARM). Performance graphs, in pdf and png formats, reside in docs/graphs. - Updated README.md to link to new Performance.md document. - Minor updates to CREDITS, docs/Multithreading.md. - Minor updates to matlab scripts in test/3/matlab. commit 9945ef24fd758396b698b19bb4e23e53b9d95725 Author: Field G. Van Zee Date: Tue Mar 19 15:28:44 2019 -0500 Adjusted cache blocksizes for zen subconfig. Details: - Adjusted the zen sub-configuration's cache blocksizes for float, scomplex, and dcomplex based on the existing values for double. (The previous values were taken directly from the haswell subconfig, which targets Intel Haswell/Broadwell/Skylake systems.) commit d202d008d51251609d08d3c278bb6f4ca9caf8e4 Author: Field G. Van Zee Date: Mon Mar 18 18:18:25 2019 -0500 Renamed --enable-export-all to --export-shared=[]. Details: - Replaced the existing --enable-export-all / --disable-export-all configure option with --export-shared=[public|all], with the 'public' instance of the latter corresponding to --disable-export-all and the 'all' instance corresponding to --enable-export-all. Nothing else semantically about the option, or its default, has changed. commit ff78089870f714663026a7136e696603b5259560 Author: Field G. Van Zee Date: Mon Mar 18 13:22:55 2019 -0500 Updates to docs/Multithreading.md. Details: - Made extra explicit the fact that: (a) multithreading in BLIS is disabled by default; and (b) even with multithreading enabled, the user must specify multithreading at runtime in order to observe parallelism. Thanks to M. Zhou for suggesting these clarifications in #292. - Also made explicit that only the environment variable and global runtime API methods are available when using the BLAS API. If the user wishes to use the local runtime API (specify multithreading on a per-call basis), one of the native BLIS APIs must be used. commit 663f662932c3f182fefc3c77daa1bf8c3394bb8b Merge: 938c05ef 6bfe3812 Author: Field G. Van Zee Date: Sat Mar 16 16:17:12 2019 -0500 Merge branch 'amd' of github.com:flame/blis into amd commit 938c05ef8654e2fc013d39a57f51d91d40cc40fb Merge: 4ed39c09 5a5f494e Author: Field G. Van Zee Date: Sat Mar 16 16:01:43 2019 -0500 Merge branch 'amd' of github.com:flame/blis into amd commit 6bfe3812e29b86c95b828822e4e5473b48891167 Author: Field G. Van Zee Date: Fri Mar 15 13:57:49 2019 -0500 Use -fvisibility=[...] with clang on Linux/BSD/OSX. Details: - Modified common.mk to use the -fvisibility=[hidden|default] option when compiling with clang on non-Windows platforms (Linux, BSD, OS X, etc.). Thanks to Isuru Fernando for pointing out this option works with clang on these OSes. commit 809395649c5bbf48778ede4c03c1df705dd49566 Author: Field G. Van Zee Date: Wed Mar 13 18:21:35 2019 -0500 Annotated additional symbols for export. Details: - Added export annotations to additional function prototypes in order to accommodate the testsuite. - Disabled calling bli_amaxv_check() from within the testsuite's test_amaxv.c. commit e095926c643fd9c9c2220ebecd749caae0f71d42 Author: Field G. Van Zee Date: Wed Mar 13 17:35:18 2019 -0500 Support shared lib export of only public symbols. Details: - Introduced a new configure option, --enable-export-all, which will cause all shared library symbols to be exported by default, or, alternatively, --disable-export-all, which will cause all symbols to be hidden by default, with only those symbols that are annotated for visibility, via BLIS_EXPORT_BLIS (and BLIS_EXPORT_BLAS for BLAS symbols), to be exported. The default for this configure option is --disable-export-all. Thanks to Isuru Fernando for consulting on this commit. - Removed BLIS_EXPORT_BLIS annotations from frame/1m/bli_l1m_unb_var1.h, which was intended for 5a5f494. - Relocated BLIS_EXPORT-related cpp logic from bli_config.h.in to frame/include/bli_config_macro_defs.h. - Provided appropriate logic within common.mk to implement variable symbol visibility for gcc, clang, and icc (to the extend that each of these compilers allow). - Relocated --help text associated with debug option (-d) to configure slightly further down in the list. commit 5a5f494e428372c7c27ed1f14802e15a83221e87 Author: Field G. Van Zee Date: Tue Mar 12 18:45:09 2019 -0500 Removed export macros from all internal prototypes. Details: - After merging PR #303, at Isuru's request, I removed the use of BLIS_EXPORT_BLIS from all function prototypes *except* those that we potentially wish to be exported in shared/dynamic libraries. In other words, I removed the use of BLIS_EXPORT_BLIS from all prototypes of functions that can be considered private or for internal use only. This is likely the last big modification along the path towards implementing the functionality spelled out in issue #248. Thanks again to Isuru Fernando for his initial efforts of sprinkling the export macros throughout BLIS, which made removing them where necessary relatively painless. Also, I'd like to thank Tony Kelman, Nathaniel Smith, Ian Henriksen, Marat Dukhan, and Matthew Brett for participating in the initial discussion in issue #37 that was later summarized and restated in issue #248. - CREDITS file update. commit 3dc18920b6226026406f1d2a8b2c2b405a2649d5 Merge: b938c16b 766769ee Author: Field G. Van Zee Date: Tue Mar 12 11:20:25 2019 -0500 Merge branch 'master' into dev commit 766769eeb944bd28641a6f72c49a734da20da755 Author: Isuru Fernando Date: Mon Mar 11 19:05:32 2019 -0500 Export functions without def file (#303) * Revert "restore bli_extern_defs exporting for now" This reverts commit 09fb07c350b2acee17645e8e9e1b8d829c73dca8. * Remove symbols not intended to be public * No need of def file anymore * Fix whitespace * No need of configure option * Remove export macro from definitions * Remove blas export macro from definitions commit 4ed39c0971c7917e2675cf5449f563b1f4751ccc Merge: 540ec1b4 b938c16b Author: Field G. Van Zee Date: Fri Mar 8 11:56:58 2019 -0600 Merge branch 'amd' of github.com:flame/blis into amd commit b938c16b0c9e839335ac2c14944b82890143d02f Author: Field G. Van Zee Date: Thu Mar 7 16:40:39 2019 -0600 Renamed test/3m4m to test/3. Details: - Renamed '3m4m' directory to '3', which captures the directory nicely since it builds test drivers to test level-3 operations. - These test drivers ceased to be used to test the 3m and 4m (or even 1m) induced methods long ago, hence the name change. commit ab89a40582ec7acf802e59b0763bed099a02edd8 Author: Field G. Van Zee Date: Thu Mar 7 16:26:12 2019 -0600 More minor updates and edits to test/3m4m. Details: - Further updates to matlab scripts, mostly for compatibility with GNU Octave. - More tweaks to runme.sh. - Updates to runme.m that allow copy-paste into matlab interactive session to generate graphs. commit f0e70dfbf3fee4c4e382c2c4e87c25454cbc79a1 Author: Field G. Van Zee Date: Thu Mar 7 01:04:05 2019 +0000 Very minor updates to test/3m4m for ul252. Details: - Very minor updates to the newly revamped test/3m4m drivers when used on a Xeon Platinum (SkylakeX). commit 9f1dbe572b1fd5e7dd30d5649bdf59259ad770d5 Author: Field G. Van Zee Date: Tue Mar 5 17:47:55 2019 -0600 Overhauled test/3m4m Makefile and scripts. Details: - Rewrote much of Makefile to generate executables for single- and dual- socket multithreading as well as single-threaded. Each of the three can also use a different problem size range/increment, as is often appropriate when doubling/halving the number of threads. - Rewrote runme.sh script to flexibly execute as many threading parameter scenarios as is given in the input parameter string (currently set within the script itself). The string also encodes the maximum problem size for each threading scenario, which is used to identify the executable to run. Also improved the "progress" output of the script to reduce redundant info and improve readability in terminals that are not especially wide. - Minor updates to test_*.c source files. - Updated matlab scripts according to changes made to the Makefile, test drivers, and runme.sh script, and renamed 'plot_all.m' to 'runme.m'. commit 3bdab823fa93342895bf45d812439324a37db77c Merge: 70f12f20 e2a02ebd Author: Field G. Van Zee Date: Thu Feb 28 14:07:24 2019 -0600 Merge branch 'master' into dev commit e2a02ebd005503c63138d48a2b7d18978ee29205 Author: Field G. Van Zee Date: Thu Feb 28 13:58:59 2019 -0600 Updates (from ls5) to test/3m4m/runme.sh. Details: - Lonestar5-specific updates to runme.sh. commit f0dcc8944fa379d53770f5cae5d670140918f00c Author: Isuru Fernando Date: Wed Feb 27 17:27:23 2019 -0600 Add symbol export macro for all functions (#302) * initial export of blis functions * Regenerate def file for master * restore bli_extern_defs exporting for now commit 540ec1b479712d5e1da637a718927249c15d867f Author: Field G. Van Zee Date: Sun Feb 24 19:09:10 2019 -0600 Updated level-3 BLAS to call object API directly. Details: - Updated the BLAS compatibility layer for level-3 operations so that the corresponding BLIS object API is called directly rather than first calling the typed BLIS API. The previous code based on the typed BLIS API calls is still available in a deactivated cpp macro branch, which may be re-activated by #defining BLIS_BLAS3_CALLS_TAPI. (This does not yet correspond to a configure option. If it seems like people might want to toggle this behavior more regularly, a configure option can be added in the future.) - Updated the BLIS typed API to statically "pre-initialize" objects via new initializor macros. Initialization is then finished via calls to static functions bli_obj_init_finish_1x1() and bli_obj_init_finish(), which are similar to the previously-called functions, bli_obj_create_1x1_with_attached_buffer() and bli_obj_create_with_attached_buffer(), respectively. (The BLAS compatibility layer updates mentioned above employ this new technique as well.) - Transformed certain routines in bli_param_map.c--specifically, the ones that convert netlib-style parameters to BLIS equivalents--into static functions, now in bli_param_map.h. (The remaining three classes of conversation routines were left unchanged.) - Added the aforementioned pre-initializor macros to bli_type_defs.h. - Relocated bli_obj_init_const() and bli_obj_init_constdata() from bli_obj_macro_defs.h to bli_type_defs.h. - Added a few macros to bli_param_macro_defs.h for testing domains for real/complexness and precisions for single/double-ness. commit 8e023bc914e9b4ac1f13614feb360b105fbe44d2 Author: Field G. Van Zee Date: Fri Feb 22 16:55:30 2019 -0600 Updates to 3m4m/matlab scripts. Details: - Minor updates to matlab graph-generating scripts. - Added a plot_all.m script that is more of a scratchpad for copying and pasting function invocations into matlab to generate plots that are presently of interest to us. commit 70f12f209bc1901b5205902503707134cf2991a0 Author: Field G. Van Zee Date: Wed Feb 20 16:10:10 2019 -0600 Changed unsafe-loop to unsafe-math optimizations. Details: - Changed -funsafe-loop-optimizations (re-)introduced in 7690855 for make_defs.mk files' CRVECFLAGS to -funsafe-math-optimizations (to account for a miscommunication in issue #300). Thanks to Dave Love for this suggestion and Jeff Hammond for his feedback on the topic. commit 7690855c5106a56e5b341a350f8db1c78caacd89 Author: Field G. Van Zee Date: Mon Feb 18 19:16:01 2019 -0600 Restored -funsafe-loop-optimizations to subconfigs. Details: - Restored use of -funsafe-loop-optimizations in the definitions of CRVECFLAGS (when using gcc), but only for sub-configurations (and not configuration families such as amd64, intel64, and x86_64). This more or less reverts 5190d05 and 6cf1550. commit 44994d1490897b08cde52a615a2e37ddae8b2061 Author: Field G. Van Zee Date: Mon Feb 18 18:35:30 2019 -0600 Disable TBM, XOP, LWP instructions in AMD configs. Details: - Added -mno-tbm -mno-xop -mno-lwp to CKVECFLAGS in bulldozer, piledriver, steamroller, and excavator configurations to explicitly disable AMD's bulldozer-era TBM, XOP, and LWP instruction sets in an attempt to fix the invalid instruction error that has plagued Travis CI builds since 6a014a3. Thanks to Devin Matthews for pointing out that the offending instruction was part of TBM (issue #300). - Restored -O3 to piledriver configuration's COPTFLAGS. commit 1e5b530744c1906140d47f43c5cad235eaa619cf Author: Field G. Van Zee Date: Mon Feb 18 18:04:38 2019 -0600 Reverted piledriver COPTFLAGS from -O3 to -O2. Details: - Debugging continues; changing COPTFLAGS for piledriver subconfig from -O3 to -O2, its original value prior to 6a014a3. commit 6cf155049168652c512aefdd16d74e7ff39b98df Author: Field G. Van Zee Date: Mon Feb 18 17:29:51 2019 -0600 Removed -funsafe-loop-optimizations from all configs. Details: - Error persists. Removed -funsafe-loop-optimizations from all remaining sub-configurations. commit 5190d05a27c5fa4c7942e20094f76eb9a9785c3e Author: Field G. Van Zee Date: Mon Feb 18 17:07:35 2019 -0600 Removed -funsafe-loop-optimizations from piledriver. Details: - Error persists; continuing debugging from bf0fb78c by removing -funsafe-loop-optimizations from piledriver configuration. commit bf0fb78c5e575372060d22f5ceeb5b332e8978ec Author: Field G. Van Zee Date: Mon Feb 18 16:51:38 2019 -0600 Removed -funsafe-loop-optimizations from families. Details: - Removed -funsafe-loop-optimizations from the configuration families affected by 6a014a3, specifically: intel64, amd64, and x86_64. This is part of an attempt to debug why the sde, as executed by Travis CI, is crashing via the following error: TID 0 SDE-ERROR: Executed instruction not valid for specified chip (ICELAKE): 0x9172a5: bextr_xop rax, rcx, 0x103 commit 6a014a3377a2e829dbc294b814ca257a2bfcb763 Author: Field G. Van Zee Date: Mon Feb 18 14:52:29 2019 -0600 Standardized optimization flags in make_defs.mk. Details: - Per Dave Love's recommendation in issue #300, this commit defines COPTFLAGS := -03 and CRVECFLAGS := $(CKVECFLAGS) -funsafe-loop-optimizations in the make_defs.mk for all Intel- and AMD-based configurations. commit 565fa3853b381051ac92cff764625909d105644d Author: Field G. Van Zee Date: Mon Feb 18 11:43:58 2019 -0600 Redirect trsm pc, ir parallelism to ic, jr loops. Details: - trsm parallelization was temporarily simplifed in 075143d to entirely ignore any parallelism specified via the pc or ir loops. Now, any parallelism specified to the pc loop will be redirected to the ic loop, and any parallelism specified to the ir loop will be redirected to the jr loop. (Note that because of inter-iteration dependencies, trsm cannot parallelize the ir loop. Parallelism via the pc loop is at least somewhat feasible in theory, but it would require tracking dependencies between blocks--something for which BLIS currently lacks the necessary supporting infrastructure.) commit a023c643f25222593f4c98c2166212561d030621 Author: Field G. Van Zee Date: Thu Feb 14 20:18:55 2019 -0600 Regenerated symbols in build/libblis-symbols.def. Details: - Reran ./build/regen-symbols.sh after running 'configure --enable-cblas auto' commit 075143dfd92194647da9022c1a58511b20fc11f3 Author: Field G. Van Zee Date: Thu Feb 14 18:52:45 2019 -0600 Added support for IC loop parallelism to trsm. Details: - Parallelism within the IC loop (3rd loop around the microkernel) is now supported within the trsm operation. This is done via a new branch on each of the control and thread trees, which guide execution of a new trsm-only subproblem from within bli_trsm_blk_var1(). This trsm subproblem corresponds to the macrokernel computation on only the block of A that contains the diagonal (labeled as A11 in algorithms with FLAME-like partitioning), and the corresponding row panel of C. During the trsm subproblem, all threads within the JC communicator participate and parallelize along the JR loop, including any parallelism that was specified for the IC loop. (IR loop parallelism is not supported for trsm due to inter-iteration dependencies.) After this trsm subproblem is complete, a barrier synchronizes all participating threads and then they proceed to apply the prescribed BLIS_IC_NT (or equivalent) ways of parallelism (and any BLIS_JR_NT parallelism specified within) to the remaining gemm subproblem (the rank-k update that is performed using the newly updated row-panel of B). Thus, trsm now supports JC, IC, and JR loop parallelism. - Modified bli_trsm_l_cntl_create() to create the new "prenode" branch of the trsm_l cntl_t tree. The trsm_r tree was left unchanged, for now, since it is not currently used. (All trsm problems are cast in terms of left-side trsm.) - Updated bli_cntl_free_w_thrinfo() to be able to free the newly shaped trsm cntl_t trees. Fixed a potentially latent bug whereby a cntl_t subnode is only recursed upon if there existed a corresponding thrinfo_t node, which may not always exist (for problems too small to employ full parallelization due to the minimum granularity imposed by micropanels). - Updated other functions in frame/base/bli_cntl.c, such as bli_cntl_copy() and bli_cntl_mark_family(), to recurse on sub-prenodes if they exist. - Updated bli_thrinfo_free() to recurse into sub-nodes and prenodes when they exist, and added support for growing a prenode branch to bli_thrinfo_grow() via a corresponding set of help functions named with the _prenode() suffix. - Added a bszid_t field thrinfo_t nodes. This field comes in handy when debugging the allocation/release of thrinfo_t nodes, as it helps trace the "identity" of each nodes as it is created/destroyed. - Renamed bli_l3_thrinfo_print_paths() -> bli_l3_thrinfo_print_gemm_paths() and created a separate bli_l3_thrinfo_print_trsm_paths() function to print out the newly reconfigured thrinfo_t trees for the trsm operation. - Trival changes to bli_gemm_blk_var?.c and bli_trsm_blk_var?.c regarding variable declarations. - Removed subpart_t enum values BLIS_SUBPART1T, BLIS_SUBPART1B, BLIS_SUBPART1L, BLIS_SUBPART1R. Then added support for two new labels (semantically speaking): BLIS_SUBPART1A and BLIS_SUBPART1B, which represent the subpartition ahead of and behind, respectively, BLIS_SUBPART1. Updated check functions in bli_check.c accordingly. - Shuffled layering/APIs for bli_acquire_mpart_[mn]dim() and bli_acquire_mpart_t2b/b2t(), _l2r/r2l(). - Deprecated old functions in frame/3/bli_l3_thrinfo.c. commit 78bc0bc8b6b528c79b11f81ea19250a1db7450ed Author: Nicholai Tukanov Date: Thu Feb 14 13:29:02 2019 -0600 Power9 sub-configuration (#298) Formally registered power9 sub-configuration. Details: - Added and registered power9 sub-configuration into the build system. Thanks to Nicholai Tukanov and Devangi Parikh for these contributions. - Note: The sub-configuration does not yet have a corresponding architecture-specific kernel set registered, and so for now the sub-config is using the generic kernel set. commit 6b832731261f9e7ad003a9ea4682e9ca973ef844 Author: Field G. Van Zee Date: Tue Feb 12 16:01:28 2019 -0600 Generalized ref kernels' pragma omp simd usage. Details: - Replaced direct usage of _Pragma( "omp simd" ) in reference kernels with PRAGMA_SIMD, which is defined as a function of the compiler being used in a new bli_pragma_macro_defs.h file. That definition is cleared when BLIS detects that the -fopenmp-simd command line option is unsupported. Thanks to Devin Matthews and Jeff Hammond for suggestions that guided this commit. - Updated configure and bli_config.h.in so that the appropriate anchor is substituted in (when the corresponding pragma omp simd support is present). commit b1f5ce8622b682b79f956fed83f04a60daa8e0fc Author: Field G. Van Zee Date: Tue Feb 5 17:38:50 2019 -0600 Minor updates to scripts in test/mixeddt/matlab. commit 38203ecd15b1fa50897d733daeac6850d254e581 Author: Devangi N. Parikh Date: Mon Feb 4 15:28:28 2019 -0500 Added thunderx2 system in the mixeddt test scripts Details: - Added thunderx2 (tx2) as a system in the runme.sh in test/mixeddt commit dfc91843ea52297bf636147793029a0c1345be04 Author: Devangi N. Parikh Date: Mon Feb 4 15:23:40 2019 -0500 Fixed gcc flags for thunderx2 subconfiguration Details: - Fixed -march flag. Thunderx2 is an armv8.1a architecture not armv8a. commit c665eb9b888ec7e41bd0a28c4c8ac4094d0a01b5 Author: Field G. Van Zee Date: Mon Jan 28 16:22:23 2019 -0600 Minor updates to docs, Makefiles. Details: - Changed all occurrances of micro-kernel -> microkernel macro-kernel -> macrokernel micro-panel -> micropanel in all markdown documents in 'docs' directory. This change is being made since we've reached the point in adoption and acceptance of BLIS's insights where words such as "microkernel" are no longer new, and therefore now merit being unhyphenated. - Updated "Implementation Notes" sections of KernelsHowTo.md, which still contained references to nonexistent cpp macros such as BLIS_DEFAULT_MR_? and BLIS_PACKDIM_MR_?. - Added 'run-fast' and 'check-fast' targets to testsuite/Makefile. - Minor updates to Testsuite.md, including suggesting use of 'make check' and 'make check-fast' when running from the local testsuite directory. - Added a comment to top-level Makefile explaining the purpose behind the TESTSUITE_WRAPPER variable, which at first glance appears to serve no purpose. commit 1aa280d0520ed5eaea3b119b4e92b789ecad78a4 Author: M. Zhou <5723047+cdluminate@users.noreply.github.com> Date: Sun Jan 27 21:40:48 2019 +0000 Amend OS detection for kFreeBSD. (#295) commit fffc23bb35d117a433886eb52ee684ff5cf6997f Author: Field G. Van Zee Date: Fri Jan 25 13:35:31 2019 -0600 CREDITS file update. commit 26c5cf495ce22521af5a36a1012491213d5a4551 Author: Field G. Van Zee Date: Thu Jan 24 18:49:31 2019 -0600 Fixed bug in skx subconfig related to bdd46f9. Details: - Fixed code in the skx subconfiguration that became a bug after committing bdd46f9. Specifically, the bli_cntx_init_skx() function was overwriting default blocksizes for the scomplex and dcomplex microkernels despite the fact that only single and double real microkernels were being registered. This was not a problem prior to bdd46f9 since all microkernels used dynamically-queried (at runtime) register blocksizes for loop bounds. However, post-bdd46f9, this became a bug because the reference ukernels for scomplex and dcomplex were written with their register blocksizes hard-coded as constant loop bounds, which conflicted the the erroneous scomplex and dcomplex values that bli_cntx_init_skx() was setting in the context. The lesson here is that going forward, all subconfigurations must not set any blocksizes for datatypes corresponding to default/reference microkernels. (Note that a blocksize is left unchanged by the bli_cntx_set_blkszs() function if it was set to -1.) commit 180f8e42e167b83a757340ad4bd4a5c7a1d6437b Author: Field G. Van Zee Date: Thu Jan 24 18:01:15 2019 -0600 Fixed undefined behavior trsm ukr bug in bdd46f9. Details: - Fixed a bug that mainfested anytime a configuration was used in which optimized microkernels were registered and the trsm operation (or kernel) was invoked. The bug resulted from the optimized microkernels' register blocksizes conflicting with the hard-coded values--expressed in the form of constant loop bounds--used in the new reference trsm ukernels that were introduced in bdd46f9. The fix was easy: reverting back to the implementation that uses variable-bound loops, which amounted to changing an #if 0 to #if 1 (since I preserved the older implementation in the file alongside the new code based on constant- bound loops). It should be noted that this fix must be permanent, since the trsm kernel code with constant-bound loops can never work with gemm ukernels that use different register blocksizes. commit bdd46f9ee88057d52610161966a11c224e5a026c Author: Field G. Van Zee Date: Thu Jan 24 17:23:18 2019 -0600 Rewrote reference kernels to use #pragma omp simd. Details: - Rewrote level-1v, -1f, and -3 reference kernels in terms of simplified indexing annotated by the #pragma omp simd directive, which a compiler can use to vectorize certain constant-bounded loops. (The new kernels actually use _Pragma("omp simd") since the kernels are defined via templatizing macros.) Modest speedup was observed in most cases using gcc 5.4.0, which may improve with newer versions. Thanks to Devin Matthews for suggesting this via issue #286 and #259. - Updated default blocksizes defined in ref_kernels/bli_cntx_ref.c to be 4x16, 4x8, 4x8, and 4x4 for single, double, scomplex and dcomplex, respectively, with a default row preference for the gemm ukernel. Also updated axpyf, dotxf, and dotxaxpyf fusing factors to 8, 6, and 4, respectively, for all datatypes. - Modified configure to verify that -fopenmp-simd is a valid compiler option (via a new detect/omp_simd/omp_simd_detect.c file). - Added a new header in which prefetch macros are defined according to which compiler is detected (via macros such as __GNUC__). These prefetch macros are not yet employed anywhere, though. - Updated the year in copyrights of template license headers in build/templates and removed AMD as a default copyright holder. commit 63de2b0090829677755eb5cdb27e73bc738da32d Author: Field G. Van Zee Date: Wed Jan 23 12:16:27 2019 -0600 Prevent redef of ftnlen in blastest f2c_types.h. Details: - Guard typedef of ftnlen in f2c_types.h with a #ifndef HAVE_BLIS_H directive to prevent the redefinition of that type. Thanks to Jeff Diamond for reporting this compiler warning (and apologies for the delay in committing a fix). commit eec2e183a7b7d67702dbd1f39c153f38148b2446 Author: Field G. Van Zee Date: Mon Jan 21 12:12:18 2019 -0600 Added escaping to '/' in os_name in configure. Details: - Add os_name to the list of variables into which the '/' character is escaped. This is meant to address (or at least make progress toward addressing) #293. Thanks to Isuru Fernando for spotting this as the potential fix, and also thanks to M. Zhou for the original report. commit adf5c17f0839fdbc1f4a1780f637928b1e78e389 Author: Field G. Van Zee Date: Fri Jan 18 15:14:45 2019 -0600 Formally registered thunderx2 subconfiguration. Details: - Added a separate subconfiguration for thunderx2, which now uses different optimization flags than cortexa57/cortexa53. commit 094cfdf7df6c2764c25fcbfce686ba29b933942c Author: M. Zhou <5723047+cdluminate@users.noreply.github.com> Date: Fri Jan 18 18:46:13 2019 +0000 Port BLIS to GNU Hurd OS. (#294) Prevent blis.h from misidentifying Hurd as OSX. commit 5d7d616e8e591c2f3c7c2d73220eb27ea484f9c9 Author: Field G. Van Zee Date: Tue Jan 15 20:52:51 2019 -0600 README.md update re: mixeddt TOMS paper. commit 58c7fb4788177487f73a3964b7a910fe4dc75941 Author: Field G. Van Zee Date: Tue Jan 8 17:00:27 2019 -0600 Added more matlab scripts for mixeddt paper. Details: - Added a variant set of matlab scripts geared to producing plots that reflect performance data gathered with and without extra memory optimizations enabled. These scripts reside (for now) in test/mixeddt/matlab/wawoxmem. commit 34286eb914b48b56cdda4dfce192608b9f86d053 Author: Field G. Van Zee Date: Tue Jan 8 11:41:20 2019 -0600 Minor update to docs/HardwareSupport.md. commit 108b04dc5b1b1288db95f24088d1e40407d7bc88 Author: Field G. Van Zee Date: Mon Jan 7 20:16:31 2019 -0600 Regenerated symbols in build/libblis-symbols.def. Details: - Reran ./build/regen-symbols.sh after running 'configure --enable-cblas auto' to reflect removal of bli_malloc_pool() and bli_free_pool(). commit 706cbd9d5622f4690e6332a89cf41ab5c8771899 Author: Field G. Van Zee Date: Mon Jan 7 18:28:19 2019 -0600 Minor tweaks/cleanups to bli_malloc.c, _apool.c. Details: - Removed malloc_ft and free_ft function pointer arguments from the interface to bli_apool_init() after deciding that there is no need to specify the malloc()/free() for blocks within the apool. (The apool blocks are actually just array_t structs.) Instead, we simply call bli_malloc_intl()/_free_intl() directly. This has the added benefit of allowing additional output when memory tracing is enabled via --enable-mem-tracing. Also made corresponding changes elsewhere in the apool API. - Changed the inner pools (elements of the array_t within the apool_t) to use BLIS_MALLOC_POOL and BLIS_FREE_POOL instead of BLIS_MALLOC_INTL and BLIS_FREE_INTL. - Disabled definitions of bli_malloc_pool() and bli_free_pool() since there are no longer any consumers of these functions. - Very minor comment / printf() updates. commit 579145039d945adbcad1177b1d53fb2d3f2e6573 Author: Minh Quan Ho <1337056+hominhquan@users.noreply.github.com> Date: Mon Jan 7 23:00:15 2019 +0100 Initialize error messages at compile time (#289) * Initialize error messages at compile time - Assigning strings directly to the bli_error_string array, instead of snprintf() at execution-time. * Retired bli_error_init(), _finalize(). Details: - Removed functions obviated by changes in 80e8dc6: bli_error_init(), bli_error_finalize(), and bli_error_init_msgs(), as well as calls to the former two in bli_init.c. * Regenerated symbols in build/libblis-symbols.def. Details: - Reran ./build/regen-symbols.sh after running 'configure --enable-cblas auto'. commit aafbca086e36b6727d7be67e21fef5bd9ff7bfd9 Author: Field G. Van Zee Date: Mon Jan 7 12:38:21 2019 -0600 Updated external package language in README.md. Details: - Updated/added comments about Fedora, OpenSUSE, and GNU Guix under the newly-renamed "External GNU/Linux packages" section. Thanks to Dave Love for providing these revisions. commit daacfe68404c9cc8078e5e7ba49a8c7d93e8cda3 Author: Field G. Van Zee Date: Mon Jan 7 12:12:47 2019 -0600 Allow running configure with python 3.4. Details: - Relax version blacklisting of python3 to allow 3.4 or later instead of 3.5 or later. Thanks to Dave Love for pointing out that 3.4 was sufficient for the purpose of BLIS's build system. (It should be noted that we're not sure which, if any, python3 versions prior to 3.4 are insufficient, and that the only thing stopping us from determining this is the fact that these earlier versions of python3 are not readily available for us to test with.) - Updated docs/BuildSystem.md to be explicit about current python2 vs python3 version requirements. commit ad8d9adb09a7dd267bbdeb2bd1fbbf9daf64ee76 Author: Field G. Van Zee Date: Thu Jan 3 16:08:24 2019 -0600 README.md, CREDITS update. Details: - Added "What's New" and "What People Are Saying About BLIS" sections to README.md. - Added missing github handles to various individuals' entries in the CREDITS file. commit 7052fca5aef430241278b67d24cef6fe33106904 Author: Field G. Van Zee Date: Wed Jan 2 13:48:40 2019 -0600 Apply f272c289 to bli_fmalloc_noalign(). Details: - Perform the same check for NULL return values and error message output in bli_fmalloc_noalign() as is performed by bli_fmalloc_align(). (This change was intended for f272c289.) commit 528e3ad16a42311a852a8376101959b4ccd801a5 Merge: 3126c52e f272c289 Author: Field G. Van Zee Date: Wed Jan 2 13:39:19 2019 -0600 Merge branch 'amd' commit 3126c52ea795ffb7d30b16b7f7ccc2a288a6158d Merge: 61441b24 8091998b Author: Field G. Van Zee Date: Wed Jan 2 13:37:37 2019 -0600 Merge branch 'amd' commit f272c2899a6764eedbe05cea874ee3bd258dbff3 Author: Field G. Van Zee Date: Wed Jan 2 12:34:15 2019 -0600 Add error message to malloc() check for NULL. Details: - Output an error message if and when the malloc()-equivalent called by bli_fmalloc_align() ever returns NULL. Everything was already in place for this to happen, including the error return code, the error string sprintf(), the error checking function bli_check_valid_malloc_buf() definition, and its prototype. Thanks to Minh Quan Ho for pointing out the missing error message. - Increased the default block_ptrs_len for each inner pool stored in the small block allocator from 10 to 25. Under normal execution, each thread uses only 21 blocks, so this change will prevent the sba from needing to resize the block_ptrs array of any given inner pool as threads initially populate the pool with small blocks upon first execution of a level-3 operation. - Nix stray newline echo in configure. commit eb97f778a1e13ee8d3b3aade05e479c4dfcfa7c0 Author: Field G. Van Zee Date: Tue Dec 25 20:17:09 2018 -0600 Added missing AMD copyrights to previous commit. Details: - Forgot to add AMD copyrights to several touched files that did not already have them in 2f31743. commit 2f3174330fb29164097d664b7c84e05c7ced7d95 Author: Field G. Van Zee Date: Tue Dec 25 19:35:01 2018 -0600 Implemented a pool-based small block allocator. Details: - Implemented a sophisticated data structure and set of APIs that track the small blocks of memory (around 80-100 bytes each) used when creating nodes for control and thread trees (cntl_t and thrinfo_t) as well as thread communicators (thrcomm_t). The purpose of the small block allocator, or sba, is to allow the library to transition into a runtime state in which it does not perform any calls to malloc() or free() during normal execution of level-3 operations, regardless of the threading environment (potentially multiple application threads as well as multiple BLIS threads). The functionality relies on a new data structure, apool_t, which is (roughly speaking) a pool of arrays, where each array element is a pool of small blocks. The outer pool, which is protected by a mutex, provides separate arrays for each application thread while the arrays each handle multiple BLIS threads for any given application thread. The design minimizes the potential for lock contention, as only concurrent application threads would need to fight for the apool_t lock, and only if they happen to begin their level-3 operations at precisely the same time. Thanks to Kiran Varaganti and AMD for requesting this feature. - Added a configure option to disable the sba pools, which are enabled by default; renamed the --[dis|en]able-packbuf-pools option to --[dis|en]able-pba-pools; and rewrote the --help text associated with this new option and consolidated it with the --help text for the option associated with the sba (--[dis|en]able-sba-pools). - Moved the membrk field from the cntx_t to the rntm_t. We now pass in a rntm_t* to the bli_membrk_acquire() and _release() APIs, just as we do for bli_sba_acquire() and _release(). - Replaced all calls to bli_malloc_intl() and bli_free_intl() that are used for small blocks with calls to bli_sba_acquire(), which takes a rntm (in addition to the bytes requested), and bli_sba_release(). These latter two functions reduce to the former two when the sba pools are disabled at configure-time. - Added rntm_t* arguments to various cntl_t and thrinfo_t functions, as required by the new usage of bli_sba_acquire() and _release(). - Moved the freeing of "old" blocks (those allocated prior to a change in the block_size) from bli_membrk_acquire_m() to the implementation of the pool_t checkout function. - Miscellaneous improvements to the pool_t API. - Added a block_size field to the pblk_t. - Harmonized the way that the trsm_ukr testsuite module performs packing relative to that of gemmtrsm_ukr, in part to avoid the need to create a packm control tree node, which now requires a rntm_t that has been initialized with an sba and membrk. - Re-enable explicit call bli_finalize() in testsuite so that users who run the testsuite with memory tracing enabled can check for memory leaks. - Manually imported the compact/minor changes from 61441b24 that cause the rntm to be copied locally when it is passed in via one of the expert APIs. - Reordered parameters to various bli_thrcomm_*() functions so that the thrcomm_t* to the comm being modified is last, not first. - Added more descriptive tracing for allocating/freeing small blocks and formalized via a new configure option: --[dis|en]able-mem-tracing. - Moved some unused scalm code and headers into frame/1m/other. - Whitespace changes to bli_pthread.c. - Regenerated build/libblis-symbols.def. commit 61441b24f3244a4b202c29611a4899dd5c51d3a1 Author: Field G. Van Zee Date: Thu Dec 20 19:38:11 2018 -0600 Make local copy of user's rntm_t in level-3 ops. Details: - In the case that the caller passes in a non-NULL rntm_t pointer into one of the expert APIs for a level-3 operation (e.g. bli_gemm_ex()), make a local copy of the rntm_t and use the address of that local copy in all subsequent execution (which may change the contents of the rntm_t). This prevents a potentially confusing situation whereby a user-initialized rntm_t is used once (in, say, gemm), and then found by the user to be in a different state before it is used a second time. commit e809b5d2f1023b4249969e2f516291c9a3a00b80 Merge: 76016691 0476f706 Author: Field G. Van Zee Date: Thu Dec 20 16:27:26 2018 -0600 Merge branch 'master' into amd commit 0476f706b93e83f6b74a3d7b7e6e9cc9a1a52c3b Author: Field G. Van Zee Date: Tue Dec 18 14:56:20 2018 -0600 CHANGELOG update (0.5.1) commit e0408c3ca3d53bc8e6fedac46ea42c86e06c922d (tag: 0.5.1) Author: Field G. Van Zee Date: Tue Dec 18 14:56:16 2018 -0600 Version file update (0.5.1) commit 3ab231afc9f69d14493908c53c85a84c5fba58aa Author: Field G. Van Zee Date: Tue Dec 18 14:53:37 2018 -0600 ReleaseNotes.md update in advance of next version. Details: - Updated ReleaseNotes.md in preparation for next version. commit d1aa87164e1e82347d62aa98793963c5265ef7e7 Author: Field G. Van Zee Date: Tue Dec 18 14:52:40 2018 -0600 README.md update (External packages section). Details: - Updated External packages section in anticipation of introducing BLIS into Debian package universe. Thanks to M. Zhou for sponsoring BLIS in Debian. commit d2b2a0819a2fccad9165bc48c0e172d79a87542c Author: Field G. Van Zee Date: Mon Dec 17 19:26:35 2018 -0600 Removed stray sections from Multithreading.md. Details: - Removed unintended section headers from before table of contents. commit 93d56319f2953cf0e9df1ff2cda90b8e41351b2c Author: Field G. Van Zee Date: Mon Dec 17 19:17:30 2018 -0600 Added missing bli_init_once() in bli_thread API. Details: - Fixed an issue with specifying threading globally at runtime via bli_thread_set_num_threads() (the automatic way) or via bli_thread_set_ways() (the manual way), with bli_thread_init_rntm() also affected. These functions were not calling bli_init_once() prior to acting, and therefore their effects on the global rntm_t structure were being wiped out by the eventual call to bli_init_once(), by some other BLIS function. Thanks to Ali Emre Gülcü for reporting the behavior associated with this bug. - Added additional content to docs/Multithreading.md covering topics of choosing between OpenMP and pthreads, and specifying affinity via OpenMP. - CREDITS file update. commit 76016691e2c514fcb59f940c092475eda968daa2 Author: Field G. Van Zee Date: Thu Dec 13 17:23:09 2018 -0600 Improvements to bli_pool; malloc()/free() tracing. Details: - Added malloc_ft and free_ft fields to pool_t, which are provided when the pool is initialized, to allow bli_pool_alloc_block() and bli_pool_free_block() to call bli_fmalloc_align()/bli_ffree_align() with arbitrary align_size values (according to how the pool_t was initialized). - Added a block_ptrs_len argument to bli_pool_init(), which allows the caller to specify an initial length for the block_ptrs array, which previously suffered the cost of being reallocated, copied, and freed each time a new block was added to the pool. - Consolidated the "buf_sys" and "buf_align" pointer fields in pblk_t into a single "buf" field. Consolidated the bli_pblk API accordingly and also updated the bli_mem API implementation. This was done because I'd previously already implemented opaque alignment via bli_malloc_align(), which allocates extra space and stores the original pointer returned by malloc() one element before the element whose address is aligned. - Tweaked bli_membrk_acquire_m() and bli_membrk_release() to call bli_fmalloc_align() and bli_ffree_align(), which required adding an align_size field to the membrk_t struct. - Pass the pack schemas directly into bli_l3_cntl_create_if() rather than transmit them via objects for A and B. - Simplified bli_l3_cntl_free_if() and renamed to bli_l3_cntl_free(). The function had not been conditionally freeing control trees for quite some time. Also, removed obj_t* parameters since they aren't needed anymore (or never were). - Spun-off OpenMP nesting code in bli_l3_thread_decorator() to a separate function, bli_l3_thread_decorator_thread_check(). - Renamed: bli_malloc_align() -> bli_fmalloc_align() bli_free_align() -> bli_ffree_align() bli_malloc_noalign() -> bli_fmalloc_noalign() bli_free_noalign() -> bli_ffree_noalign() The 'f' is for "function" since they each take a malloc_ft or free_ft function pointer argument. - Inserted various printf() calls for the purposes of tracing memory allocation and freeing, guarded by cpp macro ENABLE_MEM_DEBUG, which, for now, is intended to be a "hidden" feature rather than one hooked up to a configure-time option. - Defined bli_rntm_equals(), which compares two rntm_t for equality. (There are no use cases for this function yet, but there may be soon.) - Whitespace changes to function parameter lists in bli_pool.c, .h. commit f808d829c58dc4194cc3ebc3825fbdde12cd3f93 Author: Field G. Van Zee Date: Wed Dec 12 15:22:59 2018 -0600 Handle edge cases, zero-filling in packm kernels. Details: - Updated the API and semantics of packm kernels such that they must now handle edge cases, meaning that a c-by-k packm kernel must be able to pack edge cases that are fewer than c rows/columns and be able to zero-fill the remaining elements. They must also be able to zero-fill the equivalent region when copying fewer than k columns/rows (which is needed by trsm). The new packm kernel API is generally: void packm_kernel ( conj_t conja, dim_t cdim, dim_t n, dim_t n_max, ctype* restrict kappa, ctype* restrict a, inc_t inca, inc_t lda, ctype* restrict p, inc_t ldp, cntx_t* restrict cntx ); where cdim and n are the dimensions (short and long, respectively) of the submatrix being copied from the source matrix A, and n_max is the "full" long dimension (corresponding to the k dimension in gemm) of the micropanel. The "full" short dimension (corresponding to the register blocksize MR or NR) is not part of the API because it is known intrinsically by the packm kernel implementation. Thanks to Devin Matthews for prompting us to make this change (#282). - Updated all reference packm kernels in ref_kernels/1m according to above changes, as well as all optimized packm kernels (which only consisted of those for knl). - Bumped the major soname version number in 'so_version' to 2. At first I was considering leaving it unchanged, but I couldn't escape the reality that the packm kernel API is much closer to an expert API than it is some obscure helper function interface within the framework that nobody would ever notice. - Removed reference packm kernels for mr/nr = 30. The only sub-config that would have been using those kernels is knc, which is likely no longer being used by very many people (if any). (This also mostly offset the larger object code footprint incurred by moving the edge- case handling into the individual packm kernels.) - Fixed an obscure race condition for 3mh and 4mh induced methods in which those implementations were modifying the contexts stored in the gks rather than a local copy. - Fixed a minor bug in the testsuite that prevented non-1m-based induced method implementations of trsm from executing. commit 02ec0be3ba0b0d6b4186386ae140906a96de919b Merge: e275def3 c534da62 Author: Field G. Van Zee Date: Wed Dec 5 19:33:53 2018 -0600 Merge branch 'master' into amd commit c534da62c0015f91391983da5376c9e091378010 Author: Field G. Van Zee Date: Wed Dec 5 15:51:05 2018 -0600 Disabled ARM configuration families in registry. Details: - Disabled (commented out) the arm32 and arm64 configuration families in the config_registry file. Having a configuration family registered only makes sense if BLIS is currently outfitted with runtime hardware detection logic to choose the appropriate sub-configuration. That logic is currently missing for ARM architectures, and thus having the ARM configuration families in the configuration registry only serves to confuse people. Thanks to Devangi Parikh for suggesting this change. commit 6885051a164628904fad0d8a3b39c82f9a7b193c Author: Field G. Van Zee Date: Wed Dec 5 14:45:39 2018 -0600 Generalizations/cleanup to mixeddt matlab scripts. Details: - Parameterized, reorganized, and added comments to matlab scripts in test/mixeddt/matlab. - Reordered some lines of code and added comments to plot_l3_perf.m in test/3m4m/matlab. commit cbdb0566bf3201a495bbdcb8cb50342fa0098649 Author: Field G. Van Zee Date: Wed Dec 5 20:06:32 2018 +0000 Updates to 3m4m, mixeddt test driver files. Details: - Updated 3m4m and mixeddt Makefiles and runme.sh scripts, mostly to port recent changes to the former to the latter. - Disabled (for now) code in 3m4m/test_*.c files that disables all induced methods except for the one that is requested from the Makefile via the IND macro. This is done because usually, we want to test whatever method is enabled automatically for complex datatypes. (That is, when native complex microkernels are missing, we usually want to test performance of 1m.) commit 0645f239fbdf37ee9d2096ee3bb0e76b3302cfff Author: Field G. Van Zee Date: Tue Dec 4 14:31:06 2018 -0600 Remove UT-Austin from copyright headers' clause 3. Details: - Removed explicit reference to The University of Texas at Austin in the third clause of the license comment blocks of all relevant files and replaced it with a more all-encompassing "copyright holder(s)". - Removed duplicate words ("derived") from a few kernels' license comment blocks. - Homogenized license comment block in kernels/zen/3/bli_gemm_small.c with format of all other comment blocks. commit 9b688a2d69dd420f4d2582827c5ac87e422cd3bc Author: Field G. Van Zee Date: Tue Dec 4 13:30:25 2018 -0600 Refer to color mm algorithm in Multithreading.md. commit 22384fd2b749aa8cfdfad1084ce5e7dbd4ad2d64 Author: Field G. Van Zee Date: Tue Dec 4 13:09:04 2018 -0600 Minor updates to test_gemm.c in test/mixeddt. commit 2ba3b1780cbca58e43a3948d67bd07e637036125 Author: Field G. Van Zee Date: Mon Dec 3 19:40:39 2018 -0600 Removed symbols from libblis-symbols.def. Details: - Removed bli_gemm_md_front() and bli_gemm_md_zgemm() symbols from build/libblis-symbols.def, which will hopefully appease AppVeyor. commit dcb38c4e59c3395c258799e69bfe2104c578c528 Merge: dc184095 375eb30b Author: Field G. Van Zee Date: Mon Dec 3 18:06:19 2018 -0600 Merge branch 'dev' commit 375eb30b0a63ac06a363a5f75f283584258db48b Author: Field G. Van Zee Date: Mon Dec 3 17:49:52 2018 -0600 Added mixed-precision support to 1m method. Details: - Lifted the constraint that 1m only be used when all operands' storage datatypes (along with the computation datatype) are equal. Now, 1m may be used as long as all operands are stored in the complex domain. This change largely consisted of adding the ability to pack to 1e and 1r formats from one precision to another. It also required adding logic for handling complex values of alpha to bli_packm_blk_var1_md() (similar to the logic in bli_packm_blk_var1()). - Fixed a bug in several virtual microkernels (bli_gemm_md_c2r_ref.c, bli_gemm1m_ref.c, and bli_gemmtrsm1m_ref.c) that resulted in the wrong ukernel output preference field being read. Previously, the preference for the native complex ukernel was being read instead of the pref for the native real domain ukernel. This bug would not manifest if the preference for the native complex ukernel happened to be equal to that of the native real ukernel. - Added support for testing mixed-precision 1m execution via the gemm module of the testsuite. - Tweaked/simplified bli_gemm_front() and bli_gemm_md.c so that pack schemas are always read from the context, rather than trying to sometimes embed them directly to the A and B objects. (They are still embedded, but now uniformly only after reading the schemas from the context.) - Redefined cpp macro bli_l3_ind_recast_1m_params() as a static function and renamed to bli_gemm_ind_recast_1m_params() (since gemm is the only consumer). - Added 1m optimization logic (via bli_gemm_ind_recast_1m_params()) to bli_gemm_ker_var2_md(). - Added explicit handling for beta == 1 and beta == 0 in the reference gemm1m virtual microkernel in ref_kernels/ind/bli_gemm1m_ref.c. - Rewrote various level-0 macro defs, including axpyris, axpbyris, scal2ris, and xpbyris (and their conjugating counterparts) to explicitly support three operand types and updated invocations to xpbyris in bli_gemmtrsm1m_ref.c. - Query and use the storage datatype of the packed object instead of the storage datatype of the source object in bli_packm_blk_var1(). - Relocated and renamed frame/ind/misc/bli_l3_ind_opt.h to frame/3/gemm/ind/bli_gemm_ind_opt.h. - Various whitespace/comment updates. commit e275def30ac41cadce296560fa67282704f20a02 Merge: 8091998b dc184095 Author: Field G. Van Zee Date: Fri Nov 30 15:39:50 2018 -0600 Merge branch 'master' into amd commit dc18409551f341125169fe8d4d43ac45e81bdf28 Author: Field G. Van Zee Date: Wed Nov 28 11:58:40 2018 -0600 CREDITS file update. commit ee4d2712963816f84d7e3fdd39d93424e1aaf63d Merge: e81c4b56 3d7e8bc3 Author: Field G. Van Zee Date: Wed Nov 28 11:52:57 2018 -0600 Merge pull request #287 from SuperFluffy/fix_configuration_links Fix configuration links commit 3d7e8bc3b8e77693152138e75676f71573e5e6cd Author: Richard Janis Goldschmidt Date: Wed Nov 28 15:56:37 2018 +0100 Fix configuration links commit 6a4885f8be9ecd81423ebf2eb6da75d7981c979b Merge: 1d8aae22 e81c4b56 Author: Field G. Van Zee Date: Tue Nov 27 13:22:59 2018 -0600 Merge branch 'master' into dev commit e81c4b56660b25a39f8fdc09fbe07459c5bd8e8e Merge: 757043ea cfbdb58d Author: Field G. Van Zee Date: Wed Nov 21 17:00:49 2018 -0600 Merge pull request #285 from isuruf/pthread Move LDFLAGS to the end commit cfbdb58de2e44f2e3a3d8b14fceece7aef4b3006 Author: Isuru Fernando Date: Wed Nov 21 14:23:39 2018 -0600 Move LDFLAGS to the end Otherwise the linker will drop flags like -lpthread commit 757043eae8630c0a76e9bb04f2cb0bd72439a86a Merge: e769bf46 7af8fa01 Author: Field G. Van Zee Date: Wed Nov 21 13:07:26 2018 -0600 Merge pull request #283 from isuruf/patch-3 Fix MinGW and Cygwin build failures commit 7af8fa01373b7bb30fa3b1fd110fd201c87ea225 Author: Isuru Fernando Date: Wed Nov 21 02:10:05 2018 -0600 Fix blis dll path commit 2acd8dcd23805203a6821358c5e3e09d521fecdf Author: Isuru Fernando Date: Wed Nov 21 02:02:18 2018 -0600 Fix install path of dll.a commit b7b0ad22b151e89e2a6c7782cf4d8d47b4e60734 Author: Isuru Fernando Date: Wed Nov 21 01:54:44 2018 -0600 Test mingw commit bafe521ed0012b7b8814404b78a6c576d8386370 Author: Isuru Fernando Date: Wed Nov 21 01:54:36 2018 -0600 Fixes for mingw commit be831879bd03edcddff8a345161f749ad92215af Author: Isuru Fernando Date: Wed Nov 21 01:39:32 2018 -0600 test gcc shared commit f6b924648c79c4b1c3d3c7fbf85372680aff8362 Author: Isuru Fernando Date: Wed Nov 21 01:39:19 2018 -0600 Don't use .def for gcc commit ce6e4eae6d5e977e6f699acc9cf239be8ac53771 Author: Isuru Fernando Date: Wed Nov 21 01:34:56 2018 -0600 test no threading commit c9169b4685bfe81bc562cf9128b35a6a9884799b Author: Isuru Fernando Date: Wed Nov 21 01:17:36 2018 -0600 Add mingw64 path commit 0f753090eaf4264b743a49ce15de97514bcbe112 Author: Isuru Fernando Date: Wed Nov 21 01:14:52 2018 -0600 Fix PATH commit d424470b1f2fa8717fa54c0245b21341504665f6 Author: Isuru Fernando Date: Wed Nov 21 01:04:26 2018 -0600 Check openmp and pthreads threading commit c73e7601e58239e2dedec6c9f1b752e949254a42 Author: Isuru Fernando Date: Wed Nov 21 00:50:33 2018 -0600 Revert "enable rdp" This reverts commit 368274bcbd0c9232521d14fa28304f35ced0e6d7. commit 6209b2e6060b89e65f3405c31333af8952dd63c0 Author: Isuru Fernando Date: Wed Nov 21 00:50:22 2018 -0600 Remove conda commit 0b1b344447b8a2fcd635a48f0ce7ce89b2107dc4 Author: Isuru Fernando Date: Wed Nov 21 00:42:39 2018 -0600 Fix make name commit 7a9838983ba8dd32ac9f87712255721542ff561f Author: Isuru Fernando Date: Wed Nov 21 00:35:27 2018 -0600 Use m2w64-make commit 4c1dedd6a90087807f16353a5d0bcaaade35a7a5 Author: Isuru Fernando Date: Wed Nov 21 00:28:20 2018 -0600 No activate on gcc commit 368274bcbd0c9232521d14fa28304f35ced0e6d7 Author: Isuru Fernando Date: Tue Nov 20 23:40:26 2018 -0600 enable rdp commit 707a5e7f9b07f554e1e9289dd0ce3b7dc4fded6e Author: Isuru Fernando Date: Tue Nov 20 23:39:31 2018 -0600 No conda for mingw build commit 65b0565c0ad9162d4474bd84eabde491fa971538 Author: Isuru Fernando Date: Tue Nov 20 23:19:38 2018 -0600 Check MinGW-w64 commit 9ddffba5847080e0d77d9e6059d05dc4b1d89ba5 Author: Isuru Fernando Date: Wed Nov 21 00:23:34 2018 -0600 Fix MinGW build failure Fixes https://github.com/flame/blis/issues/278 commit 1d8aae220bc52ce8e3a8afaa64b57e5d83480bdc Author: Field G. Van Zee Date: Tue Nov 20 18:42:07 2018 -0600 Track internal scalar datatypes. Details: - Added a num_t datatype bitfield to the obj_t in the form of a new info2 field in the obj_t. This change was made primarily so that in the case of mixed-datatype gemm, the alpha scalar would not need to be cast to the storage datatype of B (or A) before then being cast to the computation datatype just before the macrokernel is called. This double-casting regime could result in loss of precision if the storage datatype of B (or A) is less than the computation precision. In practice, it was likely not going to be a big deal since most usage of alpha is for -1.0, 0.0, and 1.0 (or integer multiples thereof), which can all be represented exactly in single or double precision. - The type of objbits_t was changed to uint32_t, so the new format potentially takes up the same space as the previous obj_t definition, assuming no padding inserted by the compiler. Shrinking info to 32 bits and spilling over into a second field was chosen over using the high 32 bits of a single 64-bit objbits_t info field because many of the bitwise operations are performed with enums such as num_t, dom_t, and prec_t, which may take on the type of 32-bit ints. It's easier to just keep all of those bitwise operations in 32 bits than perform a million typecasts throughout bli_type_defs.h and bli_obj_macro_defs.h to ensure that the integers are treated as 64-bit for the purposes of the ANDs, ORs, and bitshifts. - Many comment updates. - Thanks to Devin Matthews and Devangi Parikh for their feedback and involvement during this commit cycle. commit e769bf46b0931d68031af212110484ec98e16908 Author: Field G. Van Zee Date: Tue Nov 20 16:16:53 2018 -0600 Tweak testsuite to issue FAIL for Nan, Inf (#279). Details: - Adjusted the definition for libblis_test_get_string_for_result() in testsuite/src/test_libblis.c so that the "FAIL" string is returned if the computed residual contains either NaN or Inf. Previously, a residual containing NaN would result in the selection of the "PASS" string. Thanks to Devin Matthews for reporting this issue (#279). - Expounded on comment for the macro definitions of bli_isnan() and bli_isinf() in bli_misc_macro_defs.h to make it more obvious why they must remain macros. commit 279deae18fb8b8106161863b46fcb38232314de4 Author: Field G. Van Zee Date: Fri Nov 16 11:34:19 2018 -0600 Added 4x5 matlab plotting scripts to test/3m4m. Details: - Added a new directory, test/3m4m/matlab, containing matlab scripts for plotting 4x5 panels of performance graphs (using the subplot() function) for gemm, hemm, herk, trmm, and trsm across all four floating-point datatypes. I expect to further refine these scripts as time goes on, but their current state constitutes a good start. commit 7b02c726650336c12286c8ba166d1d0fdf7601a8 Author: Field G. Van Zee Date: Wed Nov 14 13:49:55 2018 -0600 CREDITS file update. commit 84dd298a27033945fa2d3b6e5dce1fe625cd2a0a Author: Field G. Van Zee Date: Wed Nov 14 13:47:45 2018 -0600 Patch to fix msys2/Windows build failure (#277). Details: - Expanded cpp guard in frame/include/bli_x86_asm_macros.h to also check __MINGW32__ in addition to _WIN32, __clang__, and __MIC__. Thanks to Isuru Fernando for suggesting this fix, and also to Costas Yamin for originally reporting the issue (#277). commit 8091998b6500e343c2024561c2b1aa73c3bafb0b Merge: 333d8562 7b5ba731 Author: Field G. Van Zee Date: Wed Nov 14 12:36:35 2018 -0600 Merge branch 'master' into amd commit 7b5ba7319b3901ad0e6c6b4fa3c1d96b579efbe9 Merge: ce719f81 52392932 Author: Field G. Van Zee Date: Wed Nov 14 12:32:01 2018 -0600 Merge branch 'dev' of github.com:flame/blis into dev commit 52392932dc1ea3c16220cc4e6978efcb2f5f0616 Author: Field G. Van Zee Date: Tue Nov 13 22:23:38 2018 +0000 Minor fixes to test/3m4m drivers. Details: - Cleanups to Makefile to allow all test drivers to be built for OpenBLAS and MKL in addition to BLIS. - Fixed copy-paste typos in test_hemm in calls to ssymm_() and dsymm_(). - Fixed incorrect types for betap in BLAS cpp macro branch of test_herk.c. commit 4f12e36a0d0e6df146314b4e50e36c5e7a1af3d3 Author: Field G. Van Zee Date: Tue Nov 13 14:23:12 2018 -0600 Fixed number of columns in first output line. Details: - In previous commit, forgot to remove output column corresponding to the k dimension. commit a2e0cdd7debf8109198536d55af05d5631072fb2 Author: Field G. Van Zee Date: Tue Nov 13 14:15:11 2018 -0600 Added hemm test driver to test/3m4m. Details: - Added a new test_hemm.c test driver to test/3m4m, which was modeled after the driver by the similar name in test. Also updated Makefile so that blis-nat-[sm]t would trigger builds for the new driver. commit 0f9b53e84b48d8d73a56cc9889eae3595ca58a78 Author: Field G. Van Zee Date: Tue Nov 13 13:03:15 2018 -0600 Fixed a bug in high-level mixeddt conditional. Details: - Fixed a bug in frame/3/bli_l3_oapi.c in the conditional that divides use of induced method (1m) execution from native execution. The former was intended to only be used in cases where all storage datatypes are complex and the datatype of C is equal to the computation datatype. (If mixed datatypes are detected, native execution would be used.) However, the code in bli_gemm() was erroneously checking the execution datatype instead of the computation datatype, which at that point is guaranteed to be equal to the storage datatype even if the computation datatype contains a different value. Thanks to Devangi Parikh for helping in isolating this bug. commit 333d8562f04eea0676139a10cb80a97f107b45b0 Author: Field G. Van Zee Date: Sun Nov 11 14:28:53 2018 -0600 Added debug output to bli_malloc.c. Details: - Added debug output to bli_malloc.c in order to debug certain kinds of memory behavior in BLIS. The printf() statements are disabled and must be enabled manually. - Whitespace/comment updates in bli_membrk.c. commit ce719f816d1237f5277527d7f61123e77180be54 Author: Field G. Van Zee Date: Sat Nov 10 14:48:43 2018 -0600 More edits to mixeddt matlab scripts. Details: - Renamed scripts in test/mixeddt/matlab: plot_case_all.m -> plot_dom_all.m plot_case_md.m -> plot_dom_case.m plot_all_md.m -> plot_dt_all.m - Added plot_dt_select.m in order to plot select graphs for the main body of the mixeddt paper, and added additional related legend handling in plot_gemm_perf.m. - Added test/mixeddt/matlab/output and a .gitkeep file within in order to force git to recognize the directory. commit bf99e7c14baf45725b698d06ad043b531e3a2763 Author: Field G. Van Zee Date: Thu Nov 8 18:47:17 2018 -0600 Minor updates to test/mixeddt driver. Details: - Cleaned up test/mixeddt Makefile in preparation for gathering new data for mixeddt paper, including renaming implementations to "internal" and "ad-hoc" to match the terminology to be used in the paper. - Added new matlab scripts for generating 8 figures, each covering all mixed-precision cases for each mixed-domain case. - Updated the runme.sh script according to changes to Makefile. - Fixed a minor bug in test_gemm.c that may have given incorrect performance in complex, homogeneous storage datatype cases where the computation precision was equal to the storage precisions. (Examples: zzzd, cccs.) commit 4bbb454bf3c361af9e97bfa394a73d610cd9002a Author: Field G. Van Zee Date: Sat Nov 3 19:11:01 2018 -0500 Testsuite docs update for mixed-datatype gemm. Details: - Updated docs/Testsuite.md to include mention of the new mixed-domain and mixed-precision settings, including descriptions. - Updated docs/MixedDatatypes.md to include a brief section on running the testsuite to exercise mixed-datatype functionality, which mostly amounts to a link to the Testsuite.md document. - Minor verbiage change to testsuite output to correct a misleading label associated with the value returned by the query function bli_info_get_simd_num_registers(). (The function does not return the number of SIMD registers present in the hardware, but rather a maximum assumed value for the purposes of allocating temporary microtile workspace on the function stack.) commit 16401ae922b1285437cf5f6867b2764650a95fb0 Merge: f19c33af 2d403a15 Author: Field G. Van Zee Date: Sat Nov 3 19:09:43 2018 -0500 Merge branch 'dev' commit 2d403a1535380a2ebe2ae2c0f5ac54ba7564fbeb Merge: e90e7f30 4a12979f Author: Field G. Van Zee Date: Thu Nov 1 20:18:53 2018 -0500 Merge pull request #275 from RhysU/patch-1 Spelling in FAQ commit 4a12979f65697ed79ba290efd59f4b994ac9429b Author: Rhys Ulerich Date: Thu Nov 1 20:20:59 2018 -0400 Spelling in FAQ commit f19c33af4cbe6f5705b96fbf2b8799c3c2bd75c3 Author: Field G. Van Zee Date: Fri Oct 26 17:07:15 2018 -0500 Disallow 64b BLAS integers + 32b BLIS integers. Details: - Print an error message from configure if the user attempts to explicitly configure BLIS for simultaneous use of 64-bit integers in the BLAS API with 32-bit integers in the BLIS API. - Added cpp macro conditional to bli_type_defs.h to mandate that BLIS integers be 64 bits if the BLAS integers are 64 bits. This and the above item take care of issue #274. Thanks to Devin Matthews and Jeff Hammond for suggesting these safeguards. - Slight reorganization and relabeling (for clarity) of BLAS/CBLAS sections and BLIS integer size line of the testsuite configuration output. - Very minor edits to docs/MixedDatatypes.md. commit e90e7f309b3f2760a01e8e09a29bf702754fa2b5 (origin/win-pthreads, win-pthreads) Author: Field G. Van Zee Date: Thu Oct 25 14:09:43 2018 -0500 CHANGELOG update (0.5.0) commit be7c57819cfd48adb175d9a480cc9f37928645c1 (tag: 0.5.0) Author: Field G. Van Zee Date: Thu Oct 25 14:09:40 2018 -0500 Version file update (0.5.0) commit 75da7f2a208ad7d26ed9c6d3e10d08b2a1caf9d6 Author: Field G. Van Zee Date: Thu Oct 25 14:02:41 2018 -0500 ReleaseNotes.md update in advance of next version. Details: - Updated ReleaseNotes.md in preparation for next version. - Updated docs/FAQ.md to reflect recent developments, and other edits. - Minor updates to RELEASING. commit 6fbc456fb3f4401ec951a618990f15a84fdfa236 Author: Field G. Van Zee Date: Thu Oct 25 13:20:25 2018 -0500 Added SALT testing to Travis CI. Details: - Modified .travis.yml to automatically employ the simulation of application-level threading within the testsuite, with supporting changes to common.mk, the top-level Makefile, and travis/do_testsuite.sh. - Added a new pair of input files to testsuite directory with the '.salt' suffix (similar to those with the '.fast' suffix) for testing application-level threading. - Updated docs/BuildSystem.md to document the new make targets 'testblis-salt' and 'checkblis-salt'. commit 0e27963a6770e6b64f3299ad0613d5df45d8b6ae Author: Field G. Van Zee Date: Wed Oct 24 12:16:19 2018 -0500 Add bli_pthread_mutex_trylock(). Details: - Added the missing bli_pthread_mutex_trylock() function and prototype to the non-Windows sections of bli_pthread.c and .h. This function isn't needed by BLIS, but I figured why not make the Windows and non-Windows sections consistent with one another. commit 4b683740c12f83804a51ec610b16ce28607d5c85 Author: Field G. Van Zee Date: Wed Oct 24 11:56:16 2018 -0500 Defined bli_pthread_cond_*() and related defs. Details: - Added function definitions for bli_pthread_cond_*() as well as related types and constants to bli_pthread.c, and corresponding prototypes to bli_pthread.h. commit 4b4f8072b9bb495b3e01d45698b0bad3dac31ba8 Author: Field G. Van Zee Date: Wed Oct 24 11:31:46 2018 -0500 Define bli_pthreads barrier types on OS X. Details: - Fully define bli_pthreads barrier-related types on OS X. Only typedef those types in terms of pthreads types on non-Windows, non-Apple OSes (i.e. Linux). commit ad98790dcef6bd9aab7f13d615b987b5daa58757 Author: Field G. Van Zee Date: Tue Oct 23 20:35:05 2018 -0500 Fix names of Windows pthread initializer macros. Details: - Renamed the PTHREAD_ initializer macros in the Windows cpp case to use BLIS_ prefixes to match their non-Windows counterparts. commit 06c23954e6b17219a50c3d37821544a46defaf89 Author: Field G. Van Zee Date: Tue Oct 23 19:16:54 2018 -0500 Defined unified bli_pthreads_*() API for all OSes. Details: - Expanded the bli_pthread_*() -> pthread_*() wrappers in frame/thread/bli_pthread.c to include cases for Windows taken from frame/base/bli_pthread_wrap.c. Now, bli_thread_*() is always defined and always used by BLIS and the BLIS testsuite (in lieu of calling pthreads directly, as before). The implementation used in this new API depends on whether we are building for Windows, and to a lesser extent, whether we are building on OS X. For the core API, Windows uses Windows threads, non-Windows (Linux, OS X) uses pthreads. OS X and Windows get barriers implemented in terms of other bli_pthread_*() functions, and Linux gets barriers implemented in terms of pthread_barrier*(). This commit addresses issue #273. - Fixed a bug in the Linux definition of bli_pthread_mutex_unlock(), which was erroneously calling pthread_mutex_lock(). - Minor changes to configure so that the auto-detection executable can be built given the above changes (most notably, turning on POSIX extensions via -D_GNU_SOURCE). - Removed temporary play-test code for shiftd that accidentally got committed into test/3m4m/test_gemm.c. commit eac7d267a017d646a2c5b4fa565f4637ebfd9da7 Author: Field G. Van Zee Date: Mon Oct 22 18:10:59 2018 -0500 Unconditionally define bli_l3_thread_entry(). Details: - Define a dummy bli_l3_thread_entry() function when multithreading is disabled altogether, or enabled via OpenMP. This function was originally necessary when multithreading is enabled via pthreads. By defining the function no matter the threading options given, it is less likely that an AppVeyor Windows build will complain due to a missing symbol in the DLL. (To be clear: AppVeyor was working fine before, but a problem may have arisen if it were switched to an OpenMP build.) - Removed the prototype for bli_l3_thread_entry() from bli_thrcomm_pthreads.c and placed it in bli_thrcomm.h. - Regenerated the symbols list file build/libblis-symbols.def. commit 4ee986f0a74207f4ca29df077929134725d62b80 Author: Field G. Van Zee Date: Mon Oct 22 14:09:44 2018 -0500 Added mixed-datatype testing to Travis CI (#271). Details: - Modified .travis.yml to automatically test the mixed-datatype support of the gemm operation, with supporting changes to common.mk, the top-level Makefile, and travis/do_testsuite.sh. - Added a new pair of input files to testsuite directory with the '.mixed' suffix (similar to those with the '.fast' suffix) for testing mixed-datatype gemm. - Updated docs/BuildSystem.md to document the new make targets 'testblis-md' and 'checkblis-md'. commit c3c6ebc9c6244053d654a9b0c955acb2fef42ee8 Author: Field G. Van Zee Date: Sun Oct 21 18:48:54 2018 -0500 Fixed thrinfo_t printing for small problems. Details: - Fixed a bug in the code that prints out the communicator and work ids from the various threads' thrinfo_t nodes. This bug manifested when the dimension being parallelized was not large enough such that every thread was assigned actual work (since the minimum amount of work is determined by the register blocksize in the dimension being parallelized). In those cases, the threads that receive no work in that dimension do not finish building their thrinfo_t tree, leaving lower-level nodes non-existent. (The bug itself was usally observed as a segfault when the printing code attempted to dereference all the way down the thrinfo_t tree.) The solution involves explicitly checking each node as it is dereferenced, and if at any time NULL is found, all subsequent communicator and work ids are set to -1. commit 73a222c0d99dcc221be7dea10eaebf844f31f72e Author: Field G. Van Zee Date: Sat Oct 20 14:13:04 2018 -0500 Minor edits to 'configure --help' text. commit 14f3d5e6df183819a0c393b2661ad15df0786544 Author: Field G. Van Zee Date: Fri Oct 19 20:39:35 2018 -0500 Refresh libblis-symbols.def post-merge 090e4f0. commit 090e4f08fc2f429a1b2db77b0a6f8276f892a7ac Merge: c9be5889 0854e880 Author: Field G. Van Zee Date: Fri Oct 19 18:41:10 2018 -0500 Merge branch 'master' into dev commit 0854e880b0848e0c2e3d0644c93c80b0fd13c0dc Merge: 4e38a8d4 343a2715 Author: Field G. Van Zee Date: Fri Oct 19 18:05:00 2018 -0500 Merge pull request #261 from flame/win-pthreads Implement missing pthreads function on Windows commit c9be5889fbe947c64ef75740662e4d63032f4c35 Author: Field G. Van Zee Date: Fri Oct 19 17:42:40 2018 -0500 Added "Known issues" section to Multithreading.md. Details: - Added known issues section to Multithreading.md. - Trivial changes to MixedDatatypes.md, Sandboxes.md. commit 343a2715ebee28d250ee41b914abdcd1dc77c344 Author: Field G. Van Zee Date: Fri Oct 19 16:59:19 2018 -0500 Whitespace changes to configure, bli_pthread_wrap. Details: - Mostly whitespace changes (spaces to tabs) to configure and bli_pthread_wrap.c and .h. commit 3678a1cd518df9447b4b1ea86885eb2ba8abcf6e Merge: 85397cd4 4e38a8d4 Author: Field G. Van Zee Date: Fri Oct 19 16:11:31 2018 -0500 Merge branch 'master' into win-pthreads commit 4e38a8d4eebb18ead74e644fac76a4fde8e7f6c6 Author: Field G. Van Zee Date: Fri Oct 19 15:54:15 2018 -0500 Implemented python version checking in configure. Details: - Added python version checking to configure script. (Recall that python is needed to execute the flatten-headers.py script.) Minimum versions of python needed are currently as follows: python2: 2.7 or later python3: 3.5 or later The standard search order for python interpeters is: python python3 python2 The PYTHON environment variable is also supported and will be checked before the standard search order list. - Updated BuildSystem.md to include: a minimum make version; mention that the C compiler must actually be a C99 compiler; and the caveat that Windows builds do not require pthreads since BLIS can provide an implementation of pthreads internally. commit 85397cd4fa52f6c4c33f4fb715478c55533c680e Author: Field G. Van Zee Date: Fri Oct 19 13:12:43 2018 -0500 Added explanatory comment to bli_pthread.c. Details: - Added a verbose comment to bli_pthread.c that explains why a bli_ wrapper to pthreads APIs is useful. commit 53c07035ef61cc9b8469636d4d8fa5085f37652d Author: Field G. Van Zee Date: Fri Oct 19 12:53:03 2018 -0500 Refresh libblis-symbols.def from bb6df28. Details: - Forgot to regenerate the symbols file after the previous commit (bb6df281) in which shiftd operation was introduced. commit 473ce54f5fbea4860ac0514e7e8b022c1ea03e63 Author: Field G. Van Zee Date: Thu Oct 18 19:03:56 2018 -0500 Added bli_pthread_*() API. Details: - Defined a bli_pthread_*() API so that the testsuite, when being linked against a Windows DLL, will be able to access pthreads functionality without those pthreads functions being explicitly exported by the DLL. Instead, we export the bli_pthread_*() layer, which uses types and functions that are identical to pthreads, but adds a 'bli_' prefix. Only a few basic functions are present in the bli_pthreads_*() API for now. Thanks to Devin Matthews and Isuru Fernando for their help on a related PR (#261) that this commit will hopefully facilitate. - Updated testsuite so that it calls bli_pthread_*() layer instead of pthread_*() functions directly. - Regenerated build/libblis-symbols.def. - Comment updated to build/regen-symbols.sh. commit bb6df2814fcaa2fa62a549379f61be2f8667a598 Author: Field G. Van Zee Date: Thu Oct 18 17:11:39 2018 -0500 Defined a new level-1d operation: shiftd. Details: - Defined a new level-1d operation called 'shiftd', including object and typed APIs. This operation adds a scalar value to every element along an arbitrary diagonal of a matrix. Currently, shiftd is implemented in terms of the addv kernel. (The scalar is passed in as the x vector with an increment of zero.) - Replaced ad-hoc usage of setd and addd (after creating a temporary matrix object) with use of shiftd, which is much more concise, in various test driver files in the testsuite. Similar changes were made to the standalone test drivers and the example code. - Added documentation entries in BLISObjectAPI.md and BLISTypedAPI.md for bli_shiftd() and bli_?shiftd(), respectively. - Added observed object properties to level-1d documentation in BLISObjectAPI.md. commit 53e0a0c9b38e8525c7224e280342ef56328af567 Merge: 1c7247b6 ec676799 Author: Field G. Van Zee Date: Thu Oct 18 14:54:59 2018 -0500 Merge branch 'master' into win-pthreads commit ec67679990660a60362a49406595383672812287 Author: Field G. Van Zee Date: Thu Oct 18 14:27:02 2018 -0500 Refreshed Windows symbol list; added regen script. Details: - Moved windows/build/libblis-symbols.def to build/libblis-symbols.def. Updated link commands in common.mk accordingly. - Added a new script build/regen-symbols.sh that will regenerate the libblis-symbols.def file in its new location after building a haswell-targeted shared library. Thanks to Isuru Fernando for providing the symbol generation command. - Ran the new script to refresh the symbols file. commit fdad54ab8eee4a7efd04ec4afb3e6902eb22e60a Author: Field G. Van Zee Date: Thu Oct 18 12:43:22 2018 -0500 Removed old symbol from libblis-symbols.def. Details: - Removed bli_gemm_ker_var1() from windows/build/libblis-symbols.def since this function is no longer compiled. commit 49d3f9fcbb4a75553439f97c099ea48d85763eea Merge: 779d64dc 3c527256 Author: Field G. Van Zee Date: Wed Oct 17 18:00:40 2018 -0500 Merge branch 'master' into dev commit 3c52725693d0d7726e1c8fb224f9b1ef786db8b9 Author: Field G. Van Zee Date: Wed Oct 17 14:56:22 2018 -0500 Renamed/moved l3 zen ukernels to haswell kernel set. Details: - Renamed the microkernels in kernels/zen/3 to kernels/haswell/3 and then updated the file contents to use the 'haswell' infix. - Updated bli_cntx_init_zen.c and bli_cntx_init_haswell.c according to above function renames. - Moved/updated the corresponding prototypes in bli_kernels_zen.h to bli_kernels_haswell.h. - Updated config_registry according to above changes. - NOTE: This rename reflects the fact that haswell microkernels are specifically written to overcome the floating-point latency for FMA instructions on Intel Haswell-like architectures, which can issue two FMA instructions per cycle. These ukernels happen to work fine on AMD Zen-based architectures. However, Zen only issues one FMA per cycle, which, while halving its floating-point throughput, gives it extra flexibility in the design of its microkernels--namely, mr and nr can be smaller and still overcome the floating-point latency for those single-issue cores. A smaller value of mr and nr allows for a larger value of kc, which may be useful in some situations. In the future, we may write such Zen-specific microkernels to take advantage of this additional flexibility. commit 71c5832d5f5596f25204980803423d08143a4010 Author: Field G. Van Zee Date: Wed Oct 17 14:11:01 2018 -0500 Consolidated slab/rr-explicit level-3 macrokernels. Details: - Consolidated the *sl.c and *rr.c level-3 macrokernels into a single file per sl/rr pair, with those files named as they were before c92762e. The consolidation does not take away the *option* of using slab or round-robin assignment of micropanels to threads; it merely *hides* the choice within the definitions of functions such as bli_thread_range_jrir(), bli_packm_my_iter(), and bli_is_last_iter() rather than expose that choice explicitly in the code. The choice of slab or rr is not always hidden, however; there are some cases involving herk and trmm, for example, that require some part of the computation to use rr unconditionally. (The --thread-part-jrir option controls the partitioning in all other cases.) - Note: Originally, the sl and rr macrokernels were separated out for clarity. However, aside from the additional binary code bloat, I later deemed that clarity not worth the price of maintaining the additional (mostly similar) codes. commit 57eab3a4f0e43099fc2ff189df9fcc0d7801c2cd Author: Field G. Van Zee Date: Wed Oct 17 11:29:20 2018 -0500 CREDITS file update. commit 6722ec21817cbab9d86ee63f00984eb407b5e627 Author: Ye Luo Date: Wed Oct 17 11:26:00 2018 -0500 Fix bgclang compilation on BGQ (#270) * Fix bgq kernels * Support bgq with bgclang commit 1c7247b6d146fc728d7c4240e4e069e33f8f8868 Merge: c1bc5530 6c5a1aaf Author: Devin Matthews Date: Tue Oct 16 14:44:32 2018 -0500 Merge branch 'win-pthreads' of github.com:flame/blis into win-pthreads commit c1bc5530d51bf55b4aa3c35165f6d4452a0fd779 Author: Devin Matthews Date: Tue Oct 16 14:44:10 2018 -0500 Don't call pthread_once in auto-detect. commit b9c61d03f542a2e92551ff0595415bec3076ab25 Merge: 5a1e461f 3612ecac Author: Field G. Van Zee Date: Tue Oct 16 14:39:57 2018 -0500 Merge branch 'nested-omp-patch' commit 5a1e461ffe09ed200ee2fc7aafccf6dd7e8c0080 Author: Field G. Van Zee Date: Tue Oct 16 14:21:45 2018 -0500 Execute flatten-headers.py via $(PYTHON). Details: - Execute build/flatten-headers.py python script via $(PYTHON) in common.mk. This allows distributions that define the current/preferred python interpreter in the PYTHON environment variable to use that interpreter when executing flatten-headers.py. Thanks to Isuru Fernando for this suggestion, and for Dave Love for submitting the initial issue/request. commit 6c5a1aaff540b19672e91501e894ed695aee322b Author: Devin Matthews Date: Tue Oct 16 10:15:59 2018 -0500 Fix type in bli_pthread_wrap.c commit 29e6245816760b1bd4ac738d7d3e11a9d9d13473 Merge: 0b73209f ed657714 Author: Devin Matthews Date: Tue Oct 16 10:12:25 2018 -0500 Merge branch 'master' into win-pthreads commit 0b73209f6b22cc024169146d343627f6999b63d8 Author: Devin Matthews Date: Tue Oct 16 10:02:06 2018 -0500 Add missing argument to WaitForSingleObject and use $is_win in configure to turn off pthreads. commit ed65771482a705f7ed028d822489766327b44e76 Author: Field G. Van Zee Date: Mon Oct 15 17:54:45 2018 -0500 Fixed merge fail on testsuite threading macros. Details: - Applied the following C preprocessor macro renames BLIS_DEFAULT_MR_THREAD_MAX -> BLIS_THREAD_MAX_IR BLIS_DEFAULT_NR_THREAD_MAX -> BLIS_THREAD_MAX_JR BLIS_DEFAULT_M_THREAD_RATIO -> BLIS_THREAD_RATIO_M BLIS_DEFAULT_N_THREAD_RATIO -> BLIS_THREAD_RATIO_N in src/test_libblis.c. This is apparently the result of a failure by git to properly merge the 'master' and 'amd' branches in the previous commit. (The 'master' branch contained a commit, 53a9ab1, in which these same cpp macros were renamed throughout the source distribution. commit dc5fd898af8c74c2e2a75fc647157da0d04dd922 Merge: 667d3929 637c2ce7 Author: Field G. Van Zee Date: Mon Oct 15 17:41:35 2018 -0500 Merge branch 'amd' commit 779d64dc3091dea6b7530283304e52878151d218 Author: Field G. Van Zee Date: Mon Oct 15 17:13:18 2018 -0500 Added entry for xpbym to input.operations.fast. Details: - Forgot to add an entry for the new xpbym operation to input.operations.fast in previous commit. commit 5fec95b99f61761963834f62a9867f797687813c Author: Field G. Van Zee Date: Mon Oct 15 16:37:39 2018 -0500 Implemented mixed-datatype support for gemm. Details: - Implemented support for gemm where A, B, and C may have different storage datatypes, as well as a computational precision (and implied computation domain) that may be different from the storage precision of either A or B. This results in 128 different combinations, all which are implemented within this commit. (For now, the mixed-datatype functionality is only supported via the object API.) If desired, the mixed-datatype support may be disabled at configure-time. - Added a memory-intensive optimization to certain mixed-datatype cases that requires a single m-by-n matrix be allocated (temporarily) per call to gemm. This optimization aims to avoid the overhead involved in repeatedly updating C with general stride, or updating C after a typecast from the computation precision. This memory optimization may be disabled at configure-time (provided that the mixed-datatype support is enabled in the first place). - Added support for testing mixed-datatype combinations to testsuite. The user may test gemm with mixed domains, precisions, both, or neither. - Added a standalone test driver directory for building and running mixed-datatype performance experiments. - Defined a new variation of castm, castnzm, which operates like castm except that imaginary values are not touched when casting a real operand to a complex operand. (By contrast, in these situations castm sets the imaginary components of the destination matrix to zero.) - Defined bli_obj_imag_is_zero() and substituted calls in lieu of all usages of bli_obj_imag_equals() that tested against BLIS_ZERO, and also simplified the implementation of bli_obj_imag_equals(). - Fixed bad behavior from bli_obj_is_real() and bli_obj_is_complex() when given BLIS_CONSTANT objects. - Disabled dt_on_output field in auxinfo_t structure as well as all accessor functions. Also commented out all usage of accessor functions within macrokernels. (Typecasting in the microkernel is still feasible, though probably unrealistic for now given the additional complexity required.) - Use void function pointer type (instead of void*) for storing function pointers in bli_l0_fpa.c. - Added documentation for using gemm with mixed datatypes in docs/MixedDatatypes.md and example code in examples/oapi/11gemm_md.c. - Defined level-1d operation xpbyd and level-1m operation xpbym. - Added xpbym test module to testsuite. - Updated frame/include/bli_x86_asm_macros.h with additional macros (courtsey of Devin Matthews). commit 3612ecac98a9d36c3fcd64154121d420bb69febd (origin/nested-omp-patch) Author: Field G. Van Zee Date: Thu Oct 11 15:16:41 2018 -0500 Added comments to nested OpenMP handling code. Details: - Added comments to bli_thrcomm_openmp.c relating to changes made in 6ac0c80 and 1064d79. commit 667d3929ee20e94849b4e25b693b4037b7e3f350 Author: Field G. Van Zee Date: Thu Oct 11 11:47:57 2018 -0500 Added Fortran APIs for some thread functions. Details: - Defined Fortran-77 compatible APIs for bli_thread_set_num_threads() and bli_thread_set_ways(). These wrappers are defined in frame/compat/blis/thread/b77_thread.c. Thanks to Kay Dewhurst for suggesting these new interfaces. - Added missing prototype for bli_thread_set_ways() in bli_thread.h and removed prototypes for non-existent functions bli_thread_set_*_nt(). - CREDITS file update. commit 1064d79711f03a0541b92d8b8b9b7e25e04097a5 Author: Devin Matthews Date: Thu Oct 11 11:14:25 2018 -0500 Adjust rntm_t struct as well. commit 6ac0c805609b85616ddb32e50101c4f9feb25a35 Author: Devin Matthews Date: Thu Oct 11 10:45:07 2018 -0500 Fix OMP nesting problem. Detect when OpenMP uses fewer threads than requested and correct accordingly, so that we don't wait forever for nonexistent threads. Fixes #267. commit 53a9ab1c85be14dcfd2560f5b16e898e3e258797 Author: Field G. Van Zee Date: Wed Oct 10 15:11:09 2018 -0500 Renamed thread auto-factorization macro constants. Details: - Renamed the following C preprocessor macros whose fallback/default values are specified within frame/include/bli_kernel_macro_defs.h: BLIS_DEFAULT_MR_THREAD_MAX -> BLIS_THREAD_MAX_IR BLIS_DEFAULT_NR_THREAD_MAX -> BLIS_THREAD_MAX_JR BLIS_DEFAULT_M_THREAD_RATIO -> BLIS_THREAD_RATIO_M BLIS_DEFAULT_N_THREAD_RATIO -> BLIS_THREAD_RATIO_N - Renamed the above cpp macro overrides within the knl, skx, and zen sub-configurations, as well as invocations of those macros in bli_rntm.c. - Moved config/zen/bli_kernel.h to an 'old' directory as it is no longer used by any code within BLIS. commit 637c2ce794b0414ba8b25e9a452f7d64f825d63a Author: Field G. Van Zee Date: Tue Oct 9 17:18:04 2018 -0500 Updated column index range for irun.py -q. Details: - Forgot to apply the column index range fix in 10f179f to situations when "quiet" mode (-q) is requested. This commit applies the new column index range modifications to the quiet case. commit e2a59400bdda7ed7ee0ff00edea70c00ed593b6c Author: Field G. Van Zee Date: Tue Oct 9 15:29:48 2018 -0500 Allow trsm_l parallelism in the jc loop. Details: - Previously, trsm was consolidating all ways of parallelism into the jr loop. This was unnecessary and to some degree detrimental on some types of hardware. Now, any parallelism bound for the jc loop will be applied to the jc loop, while all other loops' parallelism is funneled to the jr loop. Thanks to Devangi Parikh for helping investigate this issue and suggesting the fix. - NOTE: This change affects only left-side trsm. However, currently right-side trsm is currently implemented in terms of the left-side case, and thus the change effectively applies to both left and right cases. commit f1dba506c970f14e612580d3c171e7c5ffd0a5fb Author: Field G. Van Zee Date: Mon Oct 8 17:59:41 2018 -0500 Output threading status/params from testsuite. Details: - Updated testsuite to output various parameters related to parallelism in BLIS. These parameters include: - threading status: disabled, openmp, or pthreads; - thread partitioning for jr/ir loops: slab or rr (round-robin); - ways of parallelism from environment variables, and also actual values used by gemm, herk, trmm_l, trmm_r, trsm_l, and trsm_r for square problems (assuming all dimensions are set to 1000); - automatic thread factorization parameters. - Also output the status of two relatively new configure-time options: libmemkind and the sandbox. commit 10f179fb13fc1179921a4ef8efdd2174f01e07da Author: Field G. Van Zee Date: Mon Oct 8 14:36:38 2018 -0500 Updated irun.py to use updated column index range. Details: - Updated the irun.py script so that it updates the matlab column index range (if found) to reflect the additional columns of data that are substituted in. Thanks to Devangi Parikh for recognizing and reporting this issue. commit c244a716c97849dee41f52b5f424116aae1b710b Author: Field G. Van Zee Date: Sun Oct 7 20:59:40 2018 -0500 Added missing -r option to configure --help output. Details: - Added inadvertantly-omitted mention of -r option-equivalent to --thread-part-jrir to the output for 'configure --help'. Also made minor edits to the same text. commit c92762ecdca1eb0b08c8acd583b4739a1e3fbd39 Author: Field G. Van Zee Date: Sun Oct 7 20:30:32 2018 -0500 Added option of slab or rr partitioning in jr/ir. Details: - Updated existing macrokernel function names and definitions to explicitly use slab assignment of micropanels to threads, then created duplicate versions of macrokernels that explicitly use round-robin assignment instead of slab. NOTE: As in ac18949, trsm_r macrokernels were not substantially updated in this commit because they are currently disabled in bli_trsm_front.c. - Updated existing packing function (in blk_packm_blk_var1.c) to explicitly use slab partitioning, and then duplicated for round-robin. - Updated control tree initialization to use the appropriate macrokernel and packm function pointers depending on which method (slab or rr) was enabled at configure-time. - Updated configure script to accept new --thread-part-jrir=[slab|rr] option (-m [slab|rr] for short), which allows the user to explicitly request either slab or round-robin assignment (partitioning) of micropanels to threads. - Updated sandbox/ref99 according to above changes. - Minor updates to build/add-copyright.py. commit 98e01ea04bfe1032e5bd4781043afd84f864a19e Merge: ac18949a 541b8a3b Author: Field G. Van Zee Date: Thu Oct 4 20:44:12 2018 -0500 Merge branch 'master' into amd commit 541b8a3b3e9af4078f5e6fb2f9608d681839952a Author: Field G. Van Zee Date: Thu Oct 4 20:39:06 2018 -0500 Removed 1h short-circuit from bli_clock_min_diff(). Details: - Removed a guard from bli_clock_min_diff() that would return 0 if the time delta was greater than 60 minutes. This was originally intended to disregard extremely large values under the assumption that the user probably didn't intend to run a test that long. However, since it is in bli_clock_min_diff(), it doesn't actually help short-circuit an implementation that is hanging or looping infinitely, since such an implementation would first have to finish before the bli_clock_min_diff() is called. Thanks to Kiran Varaganti for reporting this issue. commit 8bf30eb4735872388b5317883d99b775a344ce25 Author: Devangi N. Parikh Date: Wed Oct 3 22:22:29 2018 -0400 Fixed runme.sh in test/studies/thunderx2 Details: - Fixed the setting of threads for a single core run. commit f6f2456ba2afa8f85f43c7c2c90acc439d61d94f Author: Devangi N. Parikh Date: Wed Oct 3 21:43:46 2018 -0400 Fixed the Makefile in test/studies/thunderx2 Details: - Fixed target for make-all-st and make-all-mt so that the armpl targets are built commit 743a1a6dec1bd3908f0f15513b501c9bd59715b3 Author: Field G. Van Zee Date: Wed Oct 3 14:40:10 2018 -0500 Fixed misleading version query from gcc 7+. Details: - gcc 7 introduced new behavior to the -dumpversion option whereby only the major version component is output. However, as part of this change, gcc 7 also introduced a new option, -dumpfullversion, which is guaranteed to always output the major, minor, and revision numbers. If we are using gcc 7 or later, we re-query the version string with this new option and then re-parse the result so as to avoid misleading output from configure (e.g. using gcc 7.3.0 is reported as 7.7.7). commit de07840ba5672b9d7b2ed2b918974e98c3f249fb Author: Field G. Van Zee Date: Wed Oct 3 13:57:25 2018 -0500 Whitespace, https updates to README.md. Details: - Reformatted to fit all lines within 80 columns, unless a link is too long to fit on a single line. - Changed some links from http to https. commit b8dfd82e0d1afda4ee5436662d63515a59b2dee3 Author: Devin Matthews Date: Tue Oct 2 15:37:12 2018 -0500 Get pthreads via blis.h in the test driver. commit d0c0c20b7bd3ecf914b5910a50f618fb7d7aa355 Author: Devin Matthews Date: Tue Oct 2 15:16:00 2018 -0500 There seems to be a problem with _POSIX_BARRIERS on Travis. commit 0904d9e4df0c8a256ac35c491f14a587ebe9fca2 Author: Devin Matthews Date: Tue Oct 2 15:04:36 2018 -0500 *Always* use Windows primitives instead of pthreads. commit 998317d309934cd7129f8c818ea6e5f07534ebc8 Author: Devin Matthews Date: Tue Oct 2 14:43:24 2018 -0500 Remove pthreads from appveyor build. commit 627d0c5bfd4b7b149803587391c93b164c11ced5 Author: Devin Matthews Date: Tue Oct 2 14:40:55 2018 -0500 Combine the alternative barrier implementation for macOS with the pthread wrapper for Windows. Also implement pthread_{create,join} for Windows. commit 81d2c064a209df7eca7d6103696ca3a137a7f82e Author: Devin Matthews Date: Tue Oct 2 11:46:36 2018 -0500 Add wrapper for basic pthreads functionality (mutex, once) with MSVC. commit d33f130ea621fca1dccb30631f454d237918eb04 Author: Devin Matthews Date: Tue Oct 2 11:45:43 2018 -0500 Some configure changes: 1) Allow environment variables to be set anywhere in the argument list. 2) Allow any environment variable to be set. 3) Allow LIBPHTREAD to be set to null without getting defaulted to -lpthread. commit 9d5f1c4f3bf70c2c0ea84bfa326a0113ae2d176c Author: Field G. Van Zee Date: Mon Oct 1 17:39:26 2018 -0500 Patch to avoid gcc warning in blastest/f2c/open.c. Details: - Use the modulo operator to limit the size of an integer that is given to sprintf(). This avoids a warning in some versions of gcc about the integer potentially overflowing the available space in the string into which the integer is being printed. commit 0c3cd00ba76de607e807f8deb04b1a2ce18ea7a8 Author: Field G. Van Zee Date: Mon Oct 1 16:18:25 2018 -0500 More README.md updates. Details: - Replaced much of "Getting Started" section with a shortened version of the bullet list of documentation currently shown in the github wiki page. Thanks to Devangi Parikh for her feedback in this change. commit 8eaf34bd23b30a1857a50d7142ee9811895f24bf Author: Field G. Van Zee Date: Mon Oct 1 14:29:07 2018 -0500 Very minor README.md update. commit 599090e0eb41b2706fa1231fa7b90096f3281678 Author: Field G. Van Zee Date: Mon Oct 1 14:04:30 2018 -0500 README.md update. Details: - Added language mentioning SHPC group to Introduction. commit ac18949a4b9613741b9ea8e5026d8083acef6fe4 Author: Field G. Van Zee Date: Sun Sep 30 18:54:56 2018 -0500 Multithreading optimizations for l3 macrokernels. Details: - Adjusted the method by which micropanels are assigned to threads in the 2nd (jr) and 1st (ir) loops around the microkernel to (mostly) employ contiguous "slab" partitioning rather than interleaved (round robin) partitioning. The new partitioning schemes and related details for specific families of operations are listed below: - gemm: slab partitioning. - herk: slab partitioning for region corresponding to non-triangular region of C; round robin partitioning for triangular region. - trmm: slab partitioning for region corresponding to non-triangular region of B; round robin partitioning for triangular region. (NOTE: This affects both left- and right-side macrokernels: trmm_ll, trmm_lu, trmm_rl, trmm_ru.) - trsm: slab partitioning. (NOTE: This only affects only left-side macrokernels trsm_ll, trsm_lu; right-side macrokernels were not touched.) Also note that the previous macrokernels were preserved inside of the 'other' directory of each operation family directory (e.g. frame/3/gemm/other, frame/3/herk/other, etc). - Updated gemm macrokernel in sandbox/ref99 in light of above changes and fixed a stale function pointer type in blx_gemm_int.c (gemm_voft -> gemm_var_oft). - Added standalone test drivers in test/3m4m for herk, trmm, and trsm and minor changes to test/3m4m/Makefile. - Updated the arguments and definitions of bli_*_get_next_[ab]_upanel() and bli_trmm_?_?r_my_iter() macros defined in bli_l3_thrinfo.h. - Renamed bli_thread_get_range*() APIs to bli_thread_range*(). commit b952ca8feb6f17f71a4512649c2aa72bdee9c8f4 Author: Field G. Van Zee Date: Fri Sep 28 16:12:32 2018 -0500 CREDITS file update. commit 7d96fc437ebaa9dd2d7071865b5df16402fadd64 Author: Field G. Van Zee Date: Fri Sep 28 15:40:45 2018 -0500 Allow slashes ('/') in version tags. Details: - Updated the configure script to allow slashes in version string. This is needed so that downstream maintainers (such as those for Debian) can create local tags such as "upstream/0.4.1". Thanks to M. Zhou for reporting this issue via PR #256 and providing me the information needed to debug the problem. commit 5fdddf6f37c64da093c7f59e3a85214e819ae652 Author: Field G. Van Zee Date: Fri Sep 28 11:25:54 2018 -0500 Removed 'debian' directory. Details: - Removed the top-level 'debian' directory. This directory is apparently no longer needed (issue #257). Thanks to M. Zhou and Nico Schlömer for their contributions. commit 60b2650d7406d266feffe232c2d5692a9e3886d0 Author: Field G. Van Zee Date: Mon Sep 24 15:04:45 2018 -0500 Added statistics-collecting irun.py script. Details: - Added irun.py script to 'build' directory. This irun.py script is a python script for repeatedly invoking a test driver executable, such as those found in test/3m4m, and replace the performance output column with four columns that aggregate statistics. Specifically, the script reports the minimum, average, maximum, and standard deviation for each problem size. This script is useful especially (though not exclusively) when trying to determine the impact of relatively minor changes to the code, or other small optimizations that may be difficult to distinguish from "noise." One way this "noise" manifests is that a test executable may run slightly slower or faster for all problem sizes (and all implementations) tested by the executable over the life of a single execution. The cause of these minor across-the-board pertubations in the overall performance signatures is unknown, though we hypothesize that it may relate to any number of issues such as operating system scheduling, where in memory the program is loaded, or how the CPU clock frequency is throttled at the time of execution. Regardless of the source of these subtle performance anomalies, the statistical properties reported by the irun.py script help the user to more precisely characterize the underlying performance exhibited by any given test driver, which allows him or her to make better judgments about the true difference in performance between two implementations, or minor changes within a single implementation. commit 807a654888117fb3a27ea36384f1c1c11b882cd5 Author: Field G. Van Zee Date: Thu Sep 20 15:41:05 2018 -0500 Fixed confusing configure message for libmemkind. Details: - Corrected feedback echoed to user by configure when libmemkind is found but not explicitly requested. In these cases, configure would echo a message that it had received an explicit request to enable libmemkind, which was not accurate, even if the end result was the same--that libmemkind is enabled by default when it is found. Thanks To Devangi Parikh for reporting this issue. commit 02adab427c779b0aaf38a5877a5f0246b1909e8f Author: Devangi N. Parikh Date: Thu Sep 20 14:38:50 2018 -0400 Created a 'thunderx2' subdirectory within test/studies Details: - Created a 'thunderx2' subdirectory within test/studies to house various level-3 test driver used to measure performance on ThunderX2. commit d7537fb51dac0636591fc7c68261a2322642ab3c Merge: dad07245 c03728f1 Author: Field G. Van Zee Date: Wed Sep 12 15:24:20 2018 -0500 Merge branch 'dev' commit dad07245dbcfaf35232ec379ba756eb133c361c1 Author: Devangi N. Parikh Date: Wed Sep 12 04:16:58 2018 -0500 Fixed yet another bug in runme script in test/studies Details: - Fixed another copy-paste bug commit e669057fe35f2037d8111af687d84a0ecf6d7a2a Author: Devangi N. Parikh Date: Tue Sep 11 22:29:42 2018 -0500 Fixed bug in runme script in test/studies Details: - Fixed bug in runme script for skx studies that set the number of threads incorrectly commit 232fdc3df3e01ae3f86d53767bd14eb93b511e6e Author: Devangi N. Parikh Date: Mon Sep 10 18:45:50 2018 -0500 Updated runme script in test/studies. Details: - Updated runme script for skx studies to run multithreading tests on 1 and 2 sockets. commit c03728f1f45edb5e434db90ab8a77ba0184a682b Author: Field G. Van Zee Date: Mon Sep 10 17:54:27 2018 -0500 Various minor cleanups. Details: - Rewrote bli_winsys.c to define bli_setenv() and bli_sleep() unconditionally, but differently for Windows and non-Windows, but then disabled the definition of bli_setenv() entirely since BLIS no longer needs to set environment variables. Updated bli_winsys.h accordingly, and call bli_sleep() from within testsuite instead of sleep() directly. - Use #if !defined(_POSIX_BARRIERS) || (_POSIX_BARRIERS != 200809L) instead of #if !defined(_POSIX_BARRIERS) || (_POSIX_BARRIERS < 0) when guarding against local definition of pthread barrier in testsuite. (The description for unistd.h implies that _POSIX_BARRIERS should always be set to 200809L when barriers are supported, though I won't be surprised if we encounter a case in the future where it is set to something else such as 1 while still supported.) - Removed old _VERS_CONF_INST definitions and installation rules in top-level Makefile. These are no longer needed because we no longer output libraries with the version and configuration name as substrings. - Comment/whitespace updates in Makefile, config.mk.in, common.mk, configure, bli_extern_defs.h, and test_libblis.h. - Added mention of 1m to README.md and other trivial tweaks. commit e249a00a82908054ecd307cf602c8801275903e8 Author: Field G. Van Zee Date: Mon Sep 10 16:48:35 2018 -0500 Imported skx dgemm ukernel from skx-redux branch. Details: - Added the new bli_dgemm_skx_asm_16x14.c microkernel from the skx-redux branch, along with appropriate blocksizes in bli_cntx_init_skx.c and a prototype in bli_kernels_skx.h. (Devin has not yet written the sgemm analague, so for now we will continue using the older sgemm ukernel.) - Updated frame/include/bli_x86_asm_macros.h with a minor change that was present within the skx-redux branch. commit e93b01ff60bf9742baa5eefd93e208d1219e7a43 Author: Isuru Fernando Date: Sun Sep 9 15:57:43 2018 -0500 Windows DLL support (#246) * Enable shared * Enable rdp * Add support for dll * Use libblis-symbols.def * Fix building dlls * Fix libblis-symbols.def * Fix soname * Fix Makefile error * Fix install target * Fix missing symbols * Add BLIS_MINUS_TWO * Add path to dll * Fix OSX soname * Add declspec for dll * Add -DBLIS_BUILD_DLL * Replace @enable_shared@ in config * switch to auto for now * blis_ -> bli_ * Remove BLIS_BUILD_DLL in make check * change auto->haswell * enable_shared_01 * Add wno-macro-redefined * print out.cblat3 * BLIS_BUILD_DLL -> BLIS_IS_BUILDING_LIBRARY * Use V=1 * Remove fpic for windows * Remember LIBPTHREAD * Remove libm for windows * Remember AR * Fix remembering libpthread * Add Wno-maybe-uninitialized in only gcc * Don't do blastest for shared for now * Fix install target And remove unnecessary change * test auto and x86_64 * Fix install target again * Use IS_WIN variable * Remove leading dot from LIBBLIS_SO_MAJ_EXT * Make is_win yes/no * Add comments for windows builds * Change if else blocks location commit 1330d5c4bc3b644ec0af54c3939a5b9f00eacd9c Author: Field G. Van Zee Date: Fri Sep 7 19:37:59 2018 -0500 Employ "user" cflags for tl Makefile test targets. Details: - Use get-user-cflags-for() to generate cflags when compiling BLAS test drivers and BLIS testsuite from top-level Makefile. Meant to include these changes in previous commit (4b5437e). Thanks to Isuru Fernando for pointing out this oversight. commit 4b5437ec7afb2befffffbb83f7872bcb4fc61e51 Author: Field G. Van Zee Date: Fri Sep 7 17:24:32 2018 -0500 Define a cpp macro specific to BLIS compilation. Details: - Tweaked the cflags functions in common.mk so that a new preprocessor macro, BLIS_IS_BUILDING_LIBRARY, is defined, but only when BLIS itself is being built. This macro will not be defined when, for example, the testsuite or example code compiles code local to those applications. This was done in part by defining a new cflags function get-user-cflags-for(), which is now the designated function for application Makefiles if they wish to inherit a basic set of CFLAGS from BLIS. (The compiler flags returned are identical to that of get-frame-cflags-for() except that -DBLIS_IS_BUILDING_LIBRARY is omitted.) - Updated all test driver-like makefiles to call get-user-cflags-for() instead of get-frame-cflags-for(). commit cc2cca4f56eb30212a0dce3e5c121e64d9e59560 Merge: e19e7212 fb81c7fc Author: Field G. Van Zee Date: Thu Sep 6 17:12:13 2018 -0500 Merge branch 'dev' commit e19e7212872da3d464734199193436faa51f0da0 Merge: 97965b09 b3d0702c Author: Jeff Hammond Date: Thu Sep 6 14:58:49 2018 -0700 Merge pull request #244 from kali/pthread-barrier-osx add an adhoc impl for pthread_barrier commit b3d0702cf2ef6dda19a23dd8a677be1b6f73c322 Merge: 4e7d0670 97965b09 Author: Jeff Hammond Date: Thu Sep 6 14:58:23 2018 -0700 Merge branch 'master' into pthread-barrier-osx commit 4e7d06700f176a62952d7d51e41fdcbc6b7a9d5f Author: Mathieu Poumeyrol Date: Thu Sep 6 23:48:31 2018 +0200 second __APPLE__ commit fb81c7fc665d68e6a2add163feb29acc0bce8936 Author: Field G. Van Zee Date: Thu Sep 6 16:29:39 2018 -0500 Defined cortexa53 sub-configuration. Details: - Added a new sub-configuration 'cortexa53', which is a mirror image of cortexa57 except that it will use slightly different compiler flags. Thanks to Mathieu Poumeyrol for making this suggestion after discovering that the compiler flags being used by cortexa57 were not working properly in certain OS X environments (the fix to which is currently pending in pull request #245). commit 24ecc0d94aaa9ab4df1ae6d199c4ec6d7783169f Author: Mathieu Poumeyrol Date: Thu Sep 6 22:10:16 2018 +0200 use _POSIX_BARRIERS instead of __APPLE__ commit 97965b09059a610db06fb7a22bdfa79c0d37d673 Author: Mathieu Poumeyrol Date: Thu Sep 6 21:10:29 2018 +0200 cortexa9 and cortexa53 travis build + qemu test (#245) commit a6802eab7d94b5a9de633c53beca8245b74f5dc6 Author: Mathieu Poumeyrol Date: Thu Sep 6 17:16:35 2018 +0200 reinstantiate test on macos commit d688a2b7e5a19cba44ea398a99e325e19b8fce50 Author: Mathieu Poumeyrol Date: Thu Sep 6 15:25:16 2018 +0200 add an adhoc impl for pthread_barrier commit ab9f9e684dc3ffbb70cc45b21c67af5d916919e5 Author: Field G. Van Zee Date: Thu Aug 30 15:14:02 2018 -0500 CHANGELOG update (0.4.1) commit 10fd614031307c46db3d893528d4e5fc31f490b3 (tag: 0.4.1) Author: Field G. Van Zee Date: Thu Aug 30 15:13:59 2018 -0500 Version file update (0.4.1) commit 08dd67c4b21244851f8416bd59159bea7a9c5b3d Author: Field G. Van Zee Date: Thu Aug 30 15:12:13 2018 -0500 ReleaseNotes.md update in advance of next version. commit 4fa4cb0734e7de6505b5d6f1aeef3a5d5c89dcbb Author: Field G. Van Zee Date: Wed Aug 29 18:06:41 2018 -0500 Trivial comment header updates. Details: - Removed four trailing spaces after "BLIS" that occurs in most files' commented-out license headers. - Added UT copyright lines to some files. (These files previously had only AMD copyright lines but were contributed to by both UT and AMD.) - In some files' copyright lines, expanded 'The University of Texas' to 'The University of Texas at Austin'. - Fixed various typos/misspellings in some license headers. commit b051ffb815baf6c3ece2b5118b679fd9219d5780 Merge: 6f33d9de aaa549f4 Author: Field G. Van Zee Date: Wed Aug 29 17:06:48 2018 -0500 Merge branch 'dev' commit 6f33d9de21fbc2f579846b9104fb9d513753f79c Author: Mathieu Poumeyrol Date: Wed Aug 29 23:48:22 2018 +0200 fix compilation of armv7a kernels (#242) commit 8199e339aefdd27019c7f3d8c99818d375d5400b Author: Field G. Van Zee Date: Mon Aug 27 07:00:12 2018 -0500 Added testsuite threading to input.general.fast. Details: - Added lines associated with the testsuite's new threading option to input.general.fast. This change was intended for the previous commit (10d0735). commit 10d07357afbb2d468837aa97369ef9a6d0610817 Author: Field G. Van Zee Date: Sun Aug 26 20:34:30 2018 -0500 Better thread safety; added threading to testsuite. Details: - Replaced critical sections that were conditional upon multithreading being enabled (via pthreads or OpenMP) with unconditional use of pthreads mutexes. (Why pthreads? Because BLIS already requires it for its initialization mechanism: pthread_once().) This was done in bli_error.c, bli_gks.c, bli_l3_ind.c. Also, replaced usage of BLIS's mtx_t object and bli_mutex_*() API with pthread mutexes in bli_thread.c. The previous status quo could result in a race condition if the application called BLIS from more than one thread. The new pthread-based code should be completely agnostic to the application's threading configuration. Thanks to AMD for bringing to our attention the need for a thread-safety review. - Added an option to the testsuite to simulate application-level multithreading. Specifically, each thread maintains a counter that is incremented after each experiment. The thread only executes the experiment if: counter % n_threads == thread_id. In other words, the threads simply take turns executing each problem experiment. Also, POSIX guarantees that fprintf() will not intermingle output, so output was switched to fprintf() instead of libblis_test_fprintf(). - Changed membrk_t objects to use pthread_mutex_t intead of mtx_t and replaced use of bli_mutex_init()/_finalize() in bli_membrk.c with wrappers to pthread_mutex_init()/_destroy(). - Changed the implementation of bli_l3_ind_oper_enable_only() to fix a race condition; specifically, two threads calling the function with the same parameters could lead to a non-deterministic outcome. - Added #include to bli_cpuid.c and moved the same in bli_arch.c. - Added 'const' to declaration of OPT_MARKER in bli_getopt.c. - Added #include to bli_system.h. - Added add-copyright.py script to automate adding new copyright lines to (and updating existing lines of) source files. commit aaa549f4d1e63929fe2bea023ce849253cfbbb42 Author: Field G. Van Zee Date: Sun Aug 26 20:13:51 2018 -0500 Minor update to configure --help (--sharedir option). Details: - Fixed/tweaked description for --sharedir=SHAREDIR option. commit 573b8ac373f821a65cc8afd51cdbe03b8ec01081 Author: Field G. Van Zee Date: Sun Aug 26 13:51:32 2018 -0500 Fixed copy-paste typo in previous commit. Details: - Fixed a typo in travis/do_testsuite.sh introduced in 62ea1d3. commit 62ea1d33d3bc1e890420a1e828b9d0e87e87533b Author: Field G. Van Zee Date: Sun Aug 26 13:35:53 2018 -0500 Fixed broken out-of-tree builds. Details: - Fixed stale filepaths to check-blastest.sh and check-blistest.sh in travis/do_testsuite.sh and travis/do_sde.sh. - Create a symbolic link to the 'config' directory so that the top-level Makefile can find the configs' make_defs.mk files during out-of-tree builds. - Added additional case handling to out-of-tree scenario to handle situations where files 'Makefile', 'common.mk', or 'config' exist but are not symbolic links. In such cases, configure warns the user and exits. - Homogenized various error messages throughout configure. - Belated thanks to Victor Eijkhout for requesting the feature added in 0f491e9 whereby lesser Makefiles can compile and link against an existing installation of BLIS. commit 0f491e994a7e14d4dfce26e6a51dba2bccad29a3 Author: Field G. Van Zee Date: Sat Aug 25 20:12:36 2018 -0500 Allow lesser Makefiles to reference installed BLIS. Details: - Updated the build system so that "lesser" Makefiles, such as those in belonging to example code or the testsuite, may be run even if the directory is orphaned from the original build tree. This allows a user to configure, compile, and install BLIS, delete the build tree (that is, the source distribution, or the build directory for out- of-tree builds) and then compile example or testsuite code and link against the installed copy of BLIS (provided the example or testsuite directory was preserved or obtained from another source). The only requirement is that make be invoked while setting the BLIS_INSTALL_PATH variable to the same installation prefix used when BLIS was configured. The easiest syntax is: make BLIS_INSTALL_PATH=/install/prefix though it's also permissible to set BLIS_INSTALL_PATH as an environment variable prior to running 'make'. - Updated all lesser Makefiles to implement the new aforementioned build behavior. - Relocated check-blastest.sh and check-blistest.sh from build to blastest and testsuite, respectively, so that if those directories are copied elsewhere the user can still run 'make check' locally. - Updated docs/Testsuite.md with language that mentions this new option of building/linking against an installed copy of BLIS. commit 36ff92ce0d3b428b15b6cddc6f5944afe22e43ec Author: Field G. Van Zee Date: Fri Aug 24 18:26:09 2018 -0500 Missing C++ compiler no longer fatal to configure. Details: - Changed configure so that the absence of any C++ compiler from the pre-defined search list does not result in an exit. Instead, in this situation, the found_cxx variable is assigned 'c++notfound' and the error message is changed to remind the user that C++ will not be available in the sandbox. Thanks to Devangi Parikh for reporting this issue. - Also tweaked the message when a C++ compiler *is* found to remind any would-be confused user that BLIS will only use C++ if it is needed by code in the sandbox. commit 658f0a129bdc565b072696b6ebddce501132091c Author: Field G. Van Zee Date: Fri Aug 24 17:49:37 2018 -0500 Fixed obscure integer size bug in va_arg() usage. Details: - Fixed a bug in the way that the variadic bli_cntx_set_l3_nat_ukrs() function was defined. This function is meant to take a microkernel id, microkernel datatype, microkernel address, and microkernel preference as arguments, and is typically called within the bli_cntx_init_*() function defined within a sub-configuration for initializing an appropriate context. The problem is with the final argument: the microkernel preference. These preferences are actually boolean values, 0 or 1 (encoded as FALSE or TRUE). Since the variadic function does not give the compiler any type information for any variadic arguments, they are "promoted" in the course of internal (macroized) processing according to default argument promotion rules. Thus, integer literals such as 0 and 1 become int and floating-point literals (such as 0.0 or 1.0) become double. Previous to this commit, we indicated to va_arg() that the ukernel preference was a 'bool_t', which is a typedef of int64_t on 64-bit systems. On systems where int is defined as 64 bits, no problems manifest since int is the same size as the type we passed in to va_arg(), but on systems where int is 32 bits, the ukernel preference could be misinterpreted as a garbage value. (This was observed on a modern armv8 system.) The fix was to interpret the bool_t value as int and then immediately typecast it to and store it as a bool_t. Special thanks to Devangi Parikh for helping track down this issue, including deciphering the use of va_arg() and its byzantine treatment of types. - Added explicit typecasts for all invocations of va_arg() in bli_cntx.c. commit e71dc389120b032e42091e4d1a928515ed6f7275 Author: Field G. Van Zee Date: Fri Aug 24 15:56:04 2018 -0500 Fixed a very minor memory leak in gks. Details: - Fixed a memory leak in the global kernel structure that resulted in 56 bytes per configured architecture (of which only 18 are presently supported by BLIS). The leak would only manifest if BLIS was initialized and then finalized before the application terminated. Thanks to Devangi Parikh for helping track down this leak. commit a7e3a5f9753468c8e665e6c5c3b38d22b7c92500 Author: Field G. Van Zee Date: Fri Aug 24 14:51:11 2018 -0500 Fixed uncallable bli_finalize(). Details: - Previously, bli_finalize_once()--which, like bli_init_once(), was implemented in terms of pthread_once()--was using the same pthread_once_t control object being used by bli_init(), thus guaranteeing that it would never be called as long as BLIS had already been initialized. This could manifest as a rather large memory leak to any application that attempted to finalize BLIS midway through its execution (since BLIS reserves several megabytes of storage for packing buffers per thread used). The fix entailed giving each function its own pthread_once_t object. Thanks to Devangi Parikh for helping track down this very quiet bug. commit a79c21c7c17fb4854fd24c73b81ec5543f74082d Author: Field G. Van Zee Date: Thu Aug 23 14:40:46 2018 -0500 Fixed cleanmk target post-1b0f8d6. Details: - Changed the cleanmk target to delete makefile fragments from their new home in obj/$(CONFIG_NAME). The old definition worked only because of a typo (REFERKN_PATH instead of REFKERN_PATH), and only in the non-verbose (V != 1) case. commit ffb57242f3eb1175c991fe1b492595fdaa175c27 Author: Field G. Van Zee Date: Wed Aug 22 18:22:41 2018 -0500 Cosmetic output changes to configure. Details: - Disable sandbox-related obj directory creation, directory mirroring, and makefile fragment generation when a sandbox is not enabled. - Prevent various duplicate actions by configure (such as those mentioned above for sandboxes above). commit ac17454aae9ad430f05aa7c156919c6c695c300c Merge: a77bec76 7afd095a Author: Field G. Van Zee Date: Wed Aug 22 15:34:53 2018 -0500 Merge branch 'master' into dev commit a77bec766a01e42f13f8cacbec8c4cbde8ecefef Author: Field G. Van Zee Date: Wed Aug 22 15:31:29 2018 -0500 Whitespace changes, minor renames in build system. Details: - Minor whitespace cleanup, mostly in the form of spaces -> tabs. - Shortened certain variables' _FRAGMENT_ infixes to _FRAG_ in common.mk. commit 1b0f8d60d1132b56485cc202ebf1246898d3a2a4 Author: Devin Matthews Date: Wed Aug 22 13:19:29 2018 -0700 Generate makefile fragments in build tree (#240) * Make src dir read-only in out-of-tree build test. * Generate makefile fragments in the build tree. commit 7afd095af33690e0175903852b354c9fe46993f6 Author: Field G. Van Zee Date: Wed Aug 22 14:58:24 2018 -0500 Removed skx from code snippet in previous commit. Details: - The docs/ConfigurationHowTo.md document was written with examples that did not yet contain the skx sub-configuration, but the previous commit included bli_arch.c code copied and pasted from a recent commit that does support skx. To keep things consistent, I've removed skx from the recently-added ConfigurationHowTo.md code snippet. commit 48211a980d78673133076e8eced1007b1980f5e6 Author: Field G. Van Zee Date: Wed Aug 22 14:55:02 2018 -0500 Update to docs/ConfigurationHowTo.md. Details: - Added missing language directing the reader to modify the config_name string array in bli_arch.c when adding a new sub-configuration. Thanks to Devangi Parikh for reporting this missing section. commit 65c9096c6e21f3dc2947fa12be9ea3034f8662dc Author: Field G. Van Zee Date: Fri Aug 17 11:44:12 2018 -0500 Fixed broken -p option to configure. Details: - Fixed some stale code that was preventing the -p option to configure from working as expected (though the --prefix option was unaffected). This bug was was most likely introduced in 7e5648c (May 7 2018). Thanks to Dave Love for reporting this issue. commit e358d5e497c77b305af462f44266370a596445e2 Author: Field G. Van Zee Date: Thu Aug 16 12:18:45 2018 -0500 README.md update (Funding section). commit a61dd5e7bcf23f7237d407a5e06dd44e1bec9ad0 Author: Field G. Van Zee Date: Tue Aug 14 17:08:03 2018 -0500 Changed 'test' target to be more like 'check'. Details: - Redefined the 'test' make target in the top-level Makefile so that the final result ("everything passed" or at "least one failure") is echoed to stdout. Note that 'check' is unchanged, and thus is now effectively a fast version of 'test'. - Updated docs/BuildSystem.md to reflect the above change. commit ce5c3a198a7ae1ca676c27da4541d51ed19d16e1 Merge: 4f6745d6 0bbe69d5 Author: Field G. Van Zee Date: Tue Aug 14 16:52:19 2018 -0500 Merge branch 'master' of github.com:flame/blis commit 4f6745d68a2c66511695eff0beb00a82ffc6bbbe Author: Field G. Van Zee Date: Tue Aug 14 16:50:47 2018 -0500 Fixed link error when building only shared library. Details: - Fixed a linker error that occurred when attempting to compile and link the testsuite and/or BLAS test drivers after having configured BLIS to only generate a shared library (no static library). The chosen solution involved (1) adding the local library path, $(BASE_LIB_PATH), to the search paths for the shared library via the link option -Wl,-rpath,$(BASE_LIB_PATH). (2) adding a local symlink to $(BASE_LIB_PATH) that uses the .so major version number so that ld would find the shared library at execution time. Thanks to Sajid Ali for reporting this issue, to Devin Matthews for pointing out the need for the -rpath option, and to Devangi Parikh for helping Sajid isolate the problem. - Added #include to bli_system.h to avoid a compiler warning resulting from using toupper() from bli_string.c without a prototype. Thanks again to Sajid Ali, whose build log revealed this compiler warning. - Added '*.so.*' to .gitignore. - CREDITS file update. commit 0bbe69d5ed260849297d8f2d35b7668d167482ed Author: Devangi N. Parikh Date: Tue Aug 14 14:49:58 2018 -0500 Updated plotting scripts in test/studies. Details: - Fixed indexing on plots to correspond to the removal of dtime in the test drivers. commit e93e0e149e087e08eca2885f1a748a4e88ffe55d Author: Field G. Van Zee Date: Tue Aug 7 15:54:30 2018 -0500 Removed redefinition of axpyv, scal2v func types. Details: - Removed a stray/accidental redefinition of axpyv and scal2v function types in frame/1d/bli_l1d_ft.h (probably a copy/paste leftover during development). commit 1deb33bd16349aaa643694d1bd685ff8a9a5f476 Author: Field G. Van Zee Date: Tue Aug 7 15:02:50 2018 -0500 Updated penryn kernels to use new _ker_ft type names. Details: - Updated older _ft kernel type suffixes used within penryn level-1v and -1f kernels to use the newer _ker_ft suffix that was introduced in 0175483. (Thank you Travis CI.) commit 9cb0b023ca91abdc056d726cdc070062e4954611 Author: Field G. Van Zee Date: Tue Aug 7 14:21:07 2018 -0500 INSTALL file update. commit 017548314f3f78f66fbe3264509ac5302bd8d62b Author: Field G. Van Zee Date: Tue Aug 7 14:13:25 2018 -0500 Replaced function chooser macros w/ func ptr arrays. Details: - Previously, most object API functions (_oapi.c) used a function chooser macro that would expand out to an if-elseif-elseif-else conditional that used a num_t datatype to call the appropriate type-specific API (_tapi.c). This always felt a little hackish, and would get in the way somewhat of addig support for new num_t datatypes in the future. So, I've replaced that functionality with code that queries a function pointer that is then typecast appropriately. This model of function calling was already pervasive for kernels queried from the cntx_t structure. It was also already in use in various other functions, such as macrokernels, and this commit simply extends that pattern. - The above change required many new files, mostly header files, that define the function types (mostly _ft.h) for the queriable functions as well as some source files to define the function pointer arrays and their corresponding query functions (_fpa.c). Various other function types, mostly for kernel function types, were renamed to reduce the potential for confusion with the function types for expert and basic (non-expert) typed API functions. - Removed definitions for all of the "bli_call_ft_*()" function chooser macros from bli_misc_macro_defs.h. commit addce089664561f9f63efa6f107e58fc48d29871 Author: Field G. Van Zee Date: Mon Aug 6 13:18:20 2018 -0500 Format spec and other updates in test, test/3m4m. Details: - Removed the dtime (delta time, or wallclock time) column from the matlab output of all test drivers in test, test/3m4m, test/studies. This value was rarely (if ever) really needed and usually only served to take up screen space. - Updated format specifier in test/studies/skx to use %7.2f instead of %6.3f. - For the test drivers in 'test' directory, added an initial line of output that sets last entry of matlab matrix to zero in order to induce a pre-allocation of the entire array of performance results. commit 94d5ef42c833a4d43e50a80d46dddbd7a56d2db6 Author: Field G. Van Zee Date: Sat Aug 4 15:57:17 2018 -0500 Adjusted gflops format spec in testsuite, test/3m4m. Details: - Changed the format specifier for the gflops column in the testsuite output from %7.3f to %7.2f. This was done mainly to keep the output aligned properly when the expected perfomance exceeded 1000 gflops. Also, two decimal places still conveys plenty of precision for all practical applications, including just eyeballing performance deltas between two executions (let alone two implementations). - Changed the format specifier for gflops in the test/3m4m drivers from %6.3f to %7.2f (for the same reasons listed above). commit c7ff06bae92b9b6c6656f2030d13486b95417821 Merge: 6074082c ebe998d0 Author: Devangi N. Parikh Date: Wed Aug 1 14:20:41 2018 -0500 Merge branch 'master' of https://github.com/flame/blis commit 6074082cd359dd775ef72478f8f3a281c5a6a6f9 Author: Devangi N. Parikh Date: Wed Aug 1 13:30:51 2018 -0500 Fixed bug in bli_cntx_set_packm_ker_dt() implementation. Details: - Fixed bug in static function bli_cntx_set_[packm/unpackm]_ker_dt(), which were incorrectly calling bli_cntx_get_[packm/unpackm]_ker_dt to get the corresponding func_t. commit ebe998d06cc56a9a9d66990b6ebf683d6fd0efdf Author: Field G. Van Zee Date: Wed Aug 1 13:24:00 2018 -0500 Fixed typos in BuildSystem.md from previuos commit. commit e72a344e94c5ae253f69b60f41d92ca89a5d1d1c Author: Field G. Van Zee Date: Wed Aug 1 13:00:38 2018 -0500 Added table of 'make' targets to BuildSystem.md. Details: - Added a new section to BuildSystem.md that describes the most useful make targets defined in the top-level Makefile. commit 4f60d0288e00586dc921ff57db851f1266ff8e70 Author: Field G. Van Zee Date: Mon Jul 30 19:22:57 2018 -0500 README.md, comment updates. Details: - Added links, and sandbox language to README.md. - Adjusted some comments in high-level level-3 object functions to make clear what bli_thread_init_rntm() does. commit 455d3f49e5c8362395be14c79e6adb5123e29623 Author: Field G. Van Zee Date: Sun Jul 29 18:31:29 2018 -0500 Edits to object/typed API, multithreading docs. commit 922a1c05e06f52c97fb369870dce07233e61c4c9 Author: Field G. Van Zee Date: Sat Jul 28 20:15:55 2018 -0500 More tweaks to README.md. commit a7a0cf2b5d9f1dea5061c0f20eeaf371dfd4ea12 Author: Field G. Van Zee Date: Sat Jul 28 16:59:31 2018 -0500 More edits to docs/Multithreading.md. commit be21d0cf68c330fd0d2048465a43ddc59d0b9d6c Author: Field G. Van Zee Date: Sat Jul 28 16:46:51 2018 -0500 Fixed typos in docs/Multithreading.md. commit eac07c7b4f7a41c68d63f1e67141b2b58009609e Author: Field G. Van Zee Date: Sat Jul 28 16:45:28 2018 -0500 Edits to docs/Multithreading.md. commit 5438375a032273b46ae626fee909ffc05f48ab72 Author: Field G. Van Zee Date: Sat Jul 28 16:34:21 2018 -0500 Fixed link in README.md. commit 1f1a237d3f0b24d71ce2d7ee52d8a84f8e6a29ad Author: Field G. Van Zee Date: Sat Jul 28 16:33:28 2018 -0500 Fixed links in BLISTypedAPI.md. commit 89c8806e3aa49310f36c0314c5f6956c83a627a1 Author: Field G. Van Zee Date: Sat Jul 28 16:30:56 2018 -0500 Minor doc fixes to previous commit. commit b8c7574f84873b9c408f70c29c41ce464df57c2d Author: Field G. Van Zee Date: Sat Jul 28 16:27:09 2018 -0500 README.md, typed/object API updates. Details: - Updated the typed and object APIs to include language on the rntm_t parameters in the expert interfaces. - Updated README to include link to object API. commit 29c34c4adb02d91fb34d1ccc0e821d6cfb7ce5c5 Author: Field G. Van Zee Date: Fri Jul 27 16:26:19 2018 -0500 CREDITS file update. commit 55a04edf52ac4f16c51b738bc884684adc1f1777 Author: Field G. Van Zee Date: Fri Jul 27 16:10:46 2018 -0500 CHANGELOG update (0.4.0) commit 4ad61ce905d250dd3ef197f0d06a69ce6d99d309 (tag: 0.4.0) Author: Field G. Van Zee Date: Fri Jul 27 16:10:43 2018 -0500 Version file update (0.4.0) commit b86cf13793b07f35c027a56c9faec8f4b6279d3e Author: Field G. Van Zee Date: Fri Jul 27 16:08:21 2018 -0500 Release Notes update in advance of next version. commit a8b4084a0e04e47ac02ceae93a2018f5363e1205 Author: Field G. Van Zee Date: Fri Jul 27 16:07:26 2018 -0500 CREDITS file update. commit 8e10cac5f388ac961c3d77b0a465214e7c9dc91a Author: Field G. Van Zee Date: Fri Jul 27 14:45:35 2018 -0500 Updates to CREDITS, RELEASING, config/README.md. Details: - Added individuals' github handles to CREDITS file. - Updated RELEASING, config/README.md files. commit 401b69c8f26a86726ac5e1fb4f9fc2d2098ef204 Author: Field G. Van Zee Date: Wed Jul 25 17:55:13 2018 -0500 More indentation in docs/ConfigurationHowTo.md. commit 1c6a1b921ef96999bb449d657cca6d9a556f7245 Author: Field G. Van Zee Date: Wed Jul 25 17:14:58 2018 -0500 Trying new indentation in ConfigurationHowTo.md. Details: - Modified a few sections to take advantage of a feature of markdown that allows a bullet or enumeration to have multiple paragraphs. This is a trial run to make sure the indentation looks good when rendered in a web browser. commit 71f978719527fcf17617cb234e48bf349a76c12d Author: Field G. Van Zee Date: Wed Jul 25 15:55:36 2018 -0500 Whitespace changes to macrokernels' func ptr defs. commit 87d57c31c2bfcf4609dfe31ce915e9345150e613 Author: Field G. Van Zee Date: Wed Jul 25 14:20:18 2018 -0500 Various minor updates to typed, object API docs. commit fb6e16268aaafbab2fd78d47cbf821e2152261fd Author: Field G. Van Zee Date: Wed Jul 25 14:17:28 2018 -0500 Consolidated prototypes in bli_l1v_tapi.h. Details: - Consolidated typed API function prototypes in bli_l1v_tapi.h by leveraging identical function signatures between operations. - Removed 'restrict' keyword since it is not actually present in the function definitions. commit af60d738f21340ccb0903e6c87dbf6af4fc44fc0 Author: Field G. Van Zee Date: Tue Jul 24 15:35:52 2018 -0500 Finished object creation part of BLISObjectAPI.md. Details: - Filled in remaining section on object creation function reference of BLISObjectAPI.md. All object management functions demonstrated as part of the example code in examples/oapi are now documented, as well as some other functions that are not shown in the example code. - Updated variuos links (mostly in function index) to correctly point to the object API reference instead of the typed API reference. - Added documentation to getijm, setijm. commit 8217a6a3b68382c62f016c658d337e6086112fef Author: Field G. Van Zee Date: Tue Jul 24 13:13:10 2018 -0500 Moved sandbox README.md to docs/Sandboxes.md. Details: - Relocated sandbox/ref99/README.md to docs/Sandboxes.md and made minor edits to the document. commit b7db29332394324ffd1a73c3847a75e9a5b38c8d Author: Field G. Van Zee Date: Thu Jul 19 11:14:30 2018 -0500 Explicitly typecast return vals in static funcs. Details: - Added explicit typecasting to various functions (mostly static functions), primarily those in bli_param_macro_defs.h, bli_obj_macro_defs.h, bli_cntx.h, bli_cntl.h, and a few other header files. - This change was prompted by feedback from Jacob Gorm Hansen, who reported that #including "blis.h" from his application caused a gcc to output error messages (relating to types being returned mismatching the declared return types) when used via the C++ compiler front-end. This is the first pass of fixes, and we may need to iterate with additional follow-up commits (#233). commit fa08e5ead95f9d757af6ab5b095a8bf131e3874d Author: Field G. Van Zee Date: Tue Jul 17 19:02:15 2018 -0500 Fixed minor issues in ecbebe7 with mt disabled. Details: - Fixed an unused variable warning in frame/base/bli_rntm.c when multithreading is disabled. - Fixed a missing variable declaration in bli_thread_init_rntm_from_env() when multithreading is disabled. commit ecbebe7c2e43950dfa369f71c2b83cabe348a046 Author: Field G. Van Zee Date: Tue Jul 17 18:37:32 2018 -0500 Defined rntm_t to relocate cntx_t.thrloop (#235). Details: - Defined a new struct datatype, rntm_t (runtime), to house the thrloop field of the cntx_t (context). The thrloop array holds the number of ways of parallelism (thread "splits") to extract per level-3 algorithmic loop until those values can be used to create a corresponding node in the thread control tree (thrinfo_t structure), which (for any given level-3 invocation) usually happens by the time the macrokernel is called for the first time. - Relocating the thrloop from the cntx_t remedies a thread-safety issue when invoking level-3 operations from two or more application threads. The race condition existed because the cntx_t, a pointer to which is usually queried from the global kernel structure (gks), is supposed to be a read-only. However, the previous code would write to the cntx_t's thrloop field *after* it had been queried, thus violating its read-only status. In practice, this would not cause a problem when a sequential application made a multithreaded call to BLIS, nor when two or more application threads used the same parallelization scheme when calling BLIS, because in either case all application theads would be using the same ways of parallelism for each loop. The true effects of the race condition were limited to situations where two or more application theads used *different* parallelization schemes for any given level-3 call. - In remedying the above race condition, the application or calling library can now specify the parallelization scheme on a per-call basis. All that is required is that the thread encode its request for parallelism into the rntm_t struct prior to passing the address of the rntm_t to one of the expert interfaces of either the typed or object APIs. This allows, for example, one application thread to extract 4-way parallelism from a call to gemm while another application thread requests 2-way parallelism. Or, two threads could each request 4-way parallelism, but from different loops. - A rntm_t* parameter has been added to the function signatures of most of the level-3 implementation stack (with the most notable exception being packm) as well as all level-1v, -1d, -1f, -1m, and -2 expert APIs. (A few internal functions gained the rntm_t* parameter even though they currently have no use for it, such as bli_l3_packm().) This required some internal calls to some of those functions to be updated since BLIS was already using those operations internally via the expert interfaces. For situations where a rntm_t object is not available, such as within packm/unpackm implementations, NULL is passed in to the relevant expert interfaces. This is acceptable for now since parallelism is not obtained for non-level-3 operations. - Revamped how global parallelism is encoded. First, the conventional environment variables such as BLIS_NUM_THREADS and BLIS_*_NT are only read once, at library initialization. (Thanks to Nathaniel Smith for suggesting this to avoid repeated calls getenv(), which can be slow.) Those values are recorded to a global rntm_t object. Public APIs, in bli_thread.c, are still available to get/set these values from the global rntm_t, though now the "set" functions have additional logic to ensure that the values are set in a synchronous manner via a mutex. If/when NULL is passed into an expert API (meaning the user opted to not provide a custom rntm_t), the values from the global rntm_t are copied to a local rntm_t, which is then passed down the function stack. Calling a basic API is equivalent to calling the expert APIs with NULL for the cntx and rntm parameters, which means the semantic behavior of these basic APIs (vis-a-vis multithreading) is unchanged from before. - Renamed bli_cntx_set_thrloop_from_env() to bli_rntm_set_ways_for_op() and reimplemented, with the function now being able to treat the incoming rntm_t in a manner agnostic to its origin--whether it came from the application or is an internal copy of the global rntm_t. - Removed various global runtime APIs for setting the number of ways of parallelism for individual loops (e.g. bli_thread_set_*_nt()) as well as the corresponding "get" functions. The new model simplifies these interfaces so that one must either set the total number of threads, OR set all of the ways of parallelism for each loop simultaneously (in a single function call). - Updated sandbox/ref99 according to above changes. - Rewrote/augmented docs/Multithreading.md to document the three methods (and two specific ways within each method) of requesting parallelism in BLIS. - Removed old, disabled code from bli_l3_thrinfo.c. - Whitespace changes to code (e.g. bli_obj.c) and docs/BuildSystem.md. commit 323eaaab99752858b12e81e2eb8e416f009a3028 Author: Devangi N. Parikh Date: Fri Jul 13 11:40:06 2018 -0500 Removed left over code from plotting scripts. commit 60c197736495b47ce974ffb9b43874d1ebcfe78c Author: Field G. Van Zee Date: Thu Jul 12 19:22:14 2018 -0500 Documented accessor functions in BLISObjectAPI.md. Details: - Added documentation to docs/BLISObjectAPI.md for a handful of commonly-used obj_t accessor functions. - Minor updates to docs/BLISTypedAPI.md. commit 77327ad796e11ef67df0cc91d45ed663598ba4df Merge: 73b0b2a3 9fef8575 Author: Devangi N. Parikh Date: Thu Jul 12 17:09:33 2018 -0500 Merge branch 'master' of https://github.com/flame/blis commit 73b0b2a3ac1be6dfbe85c116886b4e29d98ac945 Author: Devangi N. Parikh Date: Thu Jul 12 16:53:10 2018 -0500 Created hardware-specific test driver directory. Details: - Created a 'studies' subdirectory within 'test' to be used to house test drivers, makefiles, run scripts, matlab plot code, and related files that have been customized for collecting performance data on specific host machines or product lines. This new setup will help us catalog, track, and share test driver materials over time, and in a way that facilitates reproducibility. - Created an 'skx' subdirectory within 'test/studies' to house various level-3 test driver files used to measure performance on SkylakeX nodes (specifically, those nodes used by TACC's stampede2 system). commit 9fef85756d15ee0f977fff6e57acd01c20cba184 Author: Field G. Van Zee Date: Wed Jul 11 18:40:30 2018 -0500 Cleaned up loose ends in BLISObjectAPI.md. Details: - Deleted some lines from the API function signatures that did not belong (and were only left over from the copy-paste of the typed API). - Fixed some paragraph-in-bullet indentation. commit 80ddeae4629022b69fdf1f1b053a1fcba643c40c Author: Field G. Van Zee Date: Wed Jul 11 18:31:57 2018 -0500 Added BLISObjectAPI.md to docs. Details: - Added first draft of BLISObjectAPI.md. (Object management section is still missing.) - Small fixes to BLISTypedAPI.md found while writing BLISObjectAPI.md. - In various .md files, changed ``` verbatim blocks to language attributes (e.g. ```c for C code). commit 038442add39ce629fee0d960b212ce0c95138d46 Author: Field G. Van Zee Date: Wed Jul 11 12:24:18 2018 -0500 Added -lpthread to makefile example in BuildSystem.md. Details: - Added missing pthreads library linking to example makefile in docs/BuildSystem.md, as well as similar language to build requirements at the beginning of the document. Thanks to Stefanos Mavros for bringing this to our attention. - Updated CREDITS file. commit bf10d8624e7b5902c9d9189c7c93f318b8e1b9a5 Author: Field G. Van Zee Date: Mon Jul 9 18:40:13 2018 -0500 Small updates to KernelsHowTo.md, BLISTypedAPI.md. Details: - Minor updates to BLISTypedAPI.md, mostly to bring terminology up-to-date with the new "typed API" classification. - Added contents section to KernelsHowTo.md. commit 1fd3bce59e43b422e62f9684bca9d1296a29edc3 Author: Field G. Van Zee Date: Mon Jul 9 18:20:11 2018 -0500 Further updates to KernelsHowTo.md, BLISTypedAPI.md. Details: - Added missing level-1v operations to BLISTypedAPI (e.g. axpbyv, xpbyv). - Updated broken linkes in KernelsHowTo.md based on misnamed anchors. - Other minor changes. commit c40d30a6c920bd2e5a8353a3cd07a7e2b2265758 Author: Field G. Van Zee Date: Mon Jul 9 17:55:54 2018 -0500 Updated KernelsHowTo.md, BLISTypedAPI.md. Details; - Added missing (basic) information in KernelsHowTo.md for level-1f and level-1v kernels. - Updated section regarding contexts. commit f8913c2bf91c0e0fb4e68aedf64a242a19db92a0 Author: Field G. Van Zee Date: Sat Jul 7 20:35:13 2018 -0500 Fixed outdated scalv() calls in penryn l1f kernels. Details: - Fixed stale calls to dscalv() from the dotxf and dotxaxpyf penryn kernels that were not updated during the basic/expert API separation in e88aeda. commit e78e71d549ac17ecd52c7b33008df1cd78f1b59e Author: Field G. Van Zee Date: Sat Jul 7 20:18:09 2018 -0500 Added README.md mention/link to examples/tapi. Details: - Added language to README.md to bring the reader's attention to the example code for the typed API (in addition to those for the object API). commit 419ffb158573a26bfec47bac73e4394e7926a7b8 Author: Field G. Van Zee Date: Sat Jul 7 20:14:23 2018 -0500 Updates to README.md. Details: - Updated wiki links according to renamed/relocated files in 'docs'. - Converted links to relative paths. - Added link to docs/Multithreading.md. commit 7d3e8a7e5f1ec299d009fb6c9071f0c1b089b460 Author: Field G. Van Zee Date: Sat Jul 7 20:01:29 2018 -0500 Reverted docs/*.md links to relative paths. Details: - Within the documents in docs/*.md, reverted links to other local documents to relative paths. - Fixed some links/documents that did not yet have the '.md' suffix. - Testing whether we can use relative links ('docs/BLISTypedAPI.md') from within README.md. commit d97c862c2b9170d774f414e63ae365488fffb4f5 Author: Field G. Van Zee Date: Sat Jul 7 19:40:41 2018 -0500 Updated links (URLs) in docs/*.md. Details: - Updated most markdown links in the documents/wikis to use absolute paths instead of the relative paths that were in use previously. A few links were not updated, except for adding a ".md" to reflect the documents' new names, in order to test whether relative linking still works. commit 3a0c12135875e0fb04de9798664e4fae632d994e Merge: 2c7960c8 bcacddfa Author: Field G. Van Zee Date: Sat Jul 7 16:51:38 2018 -0500 Merge branch 'dev' commit bcacddfad75b20969660606751eea6ead6c42ca9 Author: Field G. Van Zee Date: Sat Jul 7 16:45:29 2018 -0500 Added 'docs' directory with wiki markdown files. Details: - Exported all github wikis to a new 'docs' directory. - Renamed 'BLISAPIQuickReference' wiki to 'BLISTypedAPI' and removed all cntx_t* arguments from the (now non-expert) APIs (with the exception of the kernel APIs). - Added section to BuildSystem documenting new ARG_MAX hack. commit 3ee2bc0f7aa3b08da92331d64271bee99eaf8c1d Author: Field G. Van Zee Date: Sat Jul 7 16:02:16 2018 -0500 Renamed files that distinguish basic/expert APIs. Details: - Renamed various files that were previously named according to a "with context" or "without context" convention. For example, the following files in frame/3 were renamed: frame/3/bli_l3_oapi_woc.c -> frame/3/bli_l3_oapi_ba.c frame/3/bli_l3_oapi_wc.c -> frame/3/bli_l3_oapi_ex.c frame/3/bli_l3_tapi_woc.c -> frame/3/bli_l3_tapi_ba.c frame/3/bli_l3_tapi_wc.c -> frame/3/bli_l3_tapi_ex.c Here, the "ba" is for "basic" and "ex" is for "expert". This new naming scheme will make more sense especially if/when additional expert parameters are added to the expert APIs (typed and object). commit e88aedae735dfeb6fa5ac28d4527eb3ca58c6510 Author: Field G. Van Zee Date: Fri Jul 6 19:14:02 2018 -0500 Separated expert, non-expert typed APIs. Details: - Split existing typed APIs into two subsets of interfaces: one for use with expert parameters, such as the cntx_t*, and one without. This separation was already in place for the object APIs, and after this commit the typed and object APIs will have similar expert and non- expert APIs. The expert functions will be suffixed with "_ex" just as is the case for expert interfaces in the object APIs. - Updated internal invocations of typed APIs (functions such as bli_?setm() and bli_?scalv()) throughout BLIS to reflect use of the new explictly expert APIs. - Updated example code in examples/tapi to reflect the existence (and usage) of non-expert APIs. - Bumped the major soname version number in 'so_version'. While code compiled against a previous version/commit will likely still work (since the old typed function symbol names still exist in the new API, just with one less function argument) the semantics of the function have changed if the cntx_t* parameter the application passes in is non-NULL. For example, calling bli_daxpyv() with a non-NULL context does not behave the same way now as it did before; before, the context would be used in the computation, and now the context would be ignored since the interace for that function no longer expects a context argument. commit 331694e52414c0cd50048daf880a9ace9e29b94a Author: Isuru Fernando Date: Fri Jul 6 09:07:38 2018 -0600 Fix windows build and enable x86_64 on appveyor (#230) * Upload artifacts built on appveyor (#228) * Upload artifacts * Fix install in appveyor * Remove windows.h in bli_winsys.c (#229) Looks like it is unneeded. * Implemented ARG_MAX hack in configure, Makefile. Details: - Added support for --enable-arg-max-hack to configure, which will change the behavior of make when building BLIS so that rather than invoke the archiver/linker with all of the object files as command line arguments, those object files are echoed to a temporary file and then the archiver/linker is fed that temporary file via the @ notation. An example of this can be found in the GNU make docs at https://www.gnu.org/software/make/manual/make.html#File-Function - Thanks to Isuru Fernando for prompting this feature. * Enable x86_64 and arg-max-hack on appveyor * Use gas style assembly for clang on windows commit a64a780d28c99d35f237f59212772e9beff35b3e Merge: 89e178ce 3cb396d1 Author: Devin Matthews Date: Fri Jul 6 09:38:42 2018 -0500 Merge pull request #231 from flame/travis-pr Disable SDE for PRs commit 3cb396d1ae4ee569f862db201c6a976712fd128e Author: Devin Matthews Date: Fri Jul 6 09:19:44 2018 -0500 Disable SDE for PRs Pull requests cannot use Travis secret variables, so SDE needs to be disabled. This PR should suffice as a test. commit 2c7960c8416ee9b67364be5f2b210fd7a0aec4b5 Author: Field G. Van Zee Date: Thu Jul 5 14:38:33 2018 -0500 Implemented ARG_MAX hack in configure, Makefile. Details: - Added support for --enable-arg-max-hack to configure, which will change the behavior of make when building BLIS so that rather than invoke the archiver/linker with all of the object files as command line arguments, those object files are echoed to a temporary file and then the archiver/linker is fed that temporary file via the @ notation. An example of this can be found in the GNU make docs at https://www.gnu.org/software/make/manual/make.html#File-Function - Thanks to Isuru Fernando for prompting this feature. commit c422a5cd191d47e6aeb9cea6de0e348f46e3e318 Merge: b6470262 89e178ce Author: Field G. Van Zee Date: Thu Jul 5 12:33:35 2018 -0500 Merge branch 'dev' commit b6470262ea66c0f48a5b4d85ca4bf85c1fb2b3af Author: Isuru Fernando Date: Wed Jul 4 19:14:29 2018 -0600 Remove windows.h in bli_winsys.c (#229) Looks like it is unneeded. commit eac4bdf98691c5ec784af0dc11d1ad2269840661 Author: Isuru Fernando Date: Wed Jul 4 18:31:01 2018 -0600 Upload artifacts built on appveyor (#228) * Upload artifacts * Fix install in appveyor commit 89e178ce380439dea951925e33703dc4b979e914 Merge: d868eb3e e32b2ef9 Author: Field G. Van Zee Date: Wed Jul 4 17:51:16 2018 -0500 Merge branch 'master' into dev commit e32b2ef983ea1c3521dd3821116c0078690f125e Author: Field G. Van Zee Date: Wed Jul 4 17:49:39 2018 -0500 Update to CREDITS file. commit 14648e137696484e0ff04f89b16c6b4183ea42b8 Author: Isuru Fernando Date: Wed Jul 4 16:48:42 2018 -0600 Native windows support using clang (#227) * Add appveyor file * Build script * Remove fPIC for now * copy as * set CC and CXX * Change the order of immintrin.h * Fix testsuite header * Move testsuite defs to .c * Fix appveyor file * Remove fPIC again and fix strerror_r missing bug * Remove appveyor script * cd to blis directory * Fix sleep implementation * Add f2c_types_win.h * Fix f2c compilation * Remove rdp and rename appveyor.yml * Remove setenv declaration in test header * set CPICFLAGS to empty * Fix another immintrin.h issue * Escape CFLAGS and LDFLAGS * Fix more ?mmintrin.h issues * Build x86_64 in appveyor * override LIBM LIBPTHREAD AR AS * override pthreads in configure * Move windows definitions to bli_winsys.h * Fix LIBPTHREAD default value * Build intel64 in appveyor for now commit b45ea92fc6f77f2313b50dbe95922f838cbead07 Author: Field G. Van Zee Date: Tue Jul 3 18:27:29 2018 -0500 Added typed (BLAS-like) API code examples. Details: - Added new example code to examples/tapi demonstrating how to use the BLIS typed API. These code examples directly mirror the corresponding example code files in examples/oapi. This setup provides a convenient opportunity for newcomers to BLIS to compare and contrast the typed and object APIs when they are used to perform the same tasks. - Minor cleanups to examples/oapi. commit d868eb3e200f657a1284c4cc933e7a4d25260dce Author: Field G. Van Zee Date: Fri Jun 29 12:36:04 2018 -0500 Implemented bli_obj_scalar_cast_to(). Details: - Implemented bli_obj_scalar_cast_to(), which will typecast the value in the internal scalar of an obj_t to a specified datatype. - Changed bli_obj_scalar_attach() so that the scalar value being attached is first typecast to the storage datatype of the destination object rather than the target datatype. - Reformatted function type signatures in bli_obj_scalar.c as well as prototypes in its corresponding header file. commit 52d80b5f09517d80ac8a7c96983a576c1ec2080b Author: Field G. Van Zee Date: Fri Jun 29 12:30:44 2018 -0500 Fixed static funcs related to target and exec dts. Details: - Fixed incorrect bit shifts in the following static functions: bli_obj_set_target_domain() bli_obj_set_target_prec() bli_obj_set_exec_domain() bli_obj_set_exec_prec() - Fixed incorrect bitmask in bli_dt_proj_to_single_prec(). - Updated bli_obj_real_part() and bli_obj_imag_part() so that it updates the target and exec datatypes (in addition to the storage datatypes). commit e006f2d0eeb229c1cd05a424496a774c29bdc5d7 Merge: bd8c55fe dafca7a0 Author: Field G. Van Zee Date: Wed Jun 27 15:54:38 2018 -0500 Merge branch 'dev' of github.com:flame/blis into dev commit bd8c55fe268e8e352508341ebd739ef4fc68eb92 Author: Field G. Van Zee Date: Wed Jun 27 15:52:37 2018 -0500 Added dt_on_output field to auxinfo_t. Details: - Added a new field to the auxinfo_t struct that can be used, in theory, to request type conversion before the microkernel stores/accumulates its microtile back to memory. - Added the appropriate get/set static functions to bli_type_defs.h. commit dafca7a0c2c72aaf15cb588b2bef6f246abb1905 Author: Devin Matthews Date: Mon Jun 25 16:20:10 2018 -0500 Fix botched memory addressing in Penryn kernel (no effect for GAS output). commit de493b0f349efebab98ab17f063d4d3d932c24c3 Merge: 195480be a7166feb Author: Devin Matthews Date: Mon Jun 25 14:26:06 2018 -0500 Merge pull request #226 from devinamatthews/dev Finish macroization of assembly ukernels. commit 195480beb589db7d582646f556e855c611d4c3a9 Merge: 07c3d0a9 3f387ca3 Author: Field G. Van Zee Date: Mon Jun 25 13:24:21 2018 -0500 Merge branch 'master' into dev commit 3f387ca35e42519f0d6a154814e4c8800fa2acb8 Author: Field G. Van Zee Date: Mon Jun 25 12:32:03 2018 -0500 Fixed bugs in configure's select_cc() function. Details: - This commit fixes several bugs in configure relating to selecting a C compiler. By dumb luck, two of the two bugs sort of cancelled each other out in most use cases, which manifested as the expected behavior. Thanks to Mathieu Poumeyrol for bringing this issue to our attention, and to Devin Matthews for suggesting the more portable way of capturing both stdout and stderr and suggesting a return code check instead of testing stdout/stderr. - The first bug: As the values of the compiler search list are iterated over, only stderr is captured when querying a compiler with --version rather than both stdout and stderr. - The second bug: After each query, a conditional attempted to test whether the query resulted in anything being output. That conditional erroneously was using "-z" instead of "-n" for non-emptiness. Thus, most of the time, stderr was empty (because the --version info was being output on stdout), and since it was empty, the -z conditional (intended to execute only when a compiler was found to be responsive) executed. - A third bug was also fixed in the way that the merged stdout/stderr output was tested for non-emptiness (moving the 'cat' invocation to another line and testing the contents of a variable instead). - The three bugs above have been fixed as part of a partial rewrite of the select_cc() function in terms of a return code check, which obviated the need to save the output of stdout and stderr. - The fourth bug involved a misnamed variable in the right-hand side of a statement intended to prepend CC to search_list when CC was non-empty. This typically did not manifest as a bug since usually CC (if it was set) was set to a value that was known to work. commit a7166feb1053814b7dd27f3879ae38acfc9637fc Author: Devin Matthews Date: Mon Jun 25 12:09:18 2018 -0500 Finish macroization of assembly ukernels. commit f986396c2af5de06283b9834112782afd0a8907e Author: Field G. Van Zee Date: Fri Jun 22 18:12:40 2018 -0500 Added 'configure --help' text for CFLAGS, LDFLAGS. Details: - Added mention of the new support for preset CFLAGS, LDFLAGS to the bottom of the text output by './configure --help'. - Updated usage example to use 'haswell' instead of 'sandybridge'. commit 884175d9ffb62e49535e6c1f7d58fb3b83e7e78f Author: Field G. Van Zee Date: Fri Jun 22 18:08:43 2018 -0500 Added configure support for preset CFLAGS, LDFLAGS. Details: - Any preexisting values set to the CFLAGS environment variable (or the CFLAGS variable if given on the command line) are saved by configure for later inclusion (prepending, to be precise) along with the compiler flags automatically determined by the BLIS build system. LDFLAGS is treated in a similar manner.) Thanks to Dave Love for requesting this feature in issue #223 and Mathieu Poumeyrol for his support on this and a previous related issue. - Comment updates to build/config.mk.in. - Strip whitespace from return value of various cflags functions in common.mk. commit 07c3d0a95190bd23f0cd2ef220deb3384d8378d1 Author: Field G. Van Zee Date: Thu Jun 21 12:35:07 2018 -0500 Update to CREDITS file. commit a1ebbbf158c7b34c9032ef45431bc610b6f14858 Merge: 17928b1c c81c6f23 Author: Devin Matthews Date: Wed Jun 20 15:37:53 2018 -0500 Merge pull request #224 from devinamatthews/asm-macros Asm macros commit c81c6f23b9547b5d55ae68fd5a3bbd8a78290b6b Author: Devin Matthews Date: Wed Jun 20 15:20:44 2018 -0500 Fix problem with inc and dec macros. commit 5a63971c822fd452f97ba869625c8e87f6cbeebc Merge: b4d94e54 17928b1c Author: Devin Matthews Date: Wed Jun 20 14:07:49 2018 -0500 Merge remote-tracking branch 'upstream/dev' into asm-macros commit b4d94e54d44cf30e4bb452ca5263be3473c0582d Author: Devin Matthews Date: Wed Jun 20 14:07:24 2018 -0500 Convert x86 microkernels to assembly macros. commit 17928b1c9941aa58aef1f122c793e2b14e705267 Author: Field G. Van Zee Date: Tue Jun 19 17:59:03 2018 -0500 Added static funcs bli_dt_domain(), bli_dt_prec(). Details: - Added definitions of static functions bli_dt_domain()/bli_dt_prec(), which extract a dom_t domain or prec_t precision value, respectively, from a num_t datatype. - Changed the return types of bli_obj_domain() and bli_obj_prec() from objbits_t to dom_t and prec_t. (Not sure why they were ever set to return objbits_t.) commit 5f7fbb7115b1bf532c169dfd9adef84c41a95031 Author: Field G. Van Zee Date: Tue Jun 19 15:38:55 2018 -0500 Static funcs for projecting dt to single/double. Details: - Added static functions for projecting a datatype to single precision or double precision, both for obj_t's storage datatypes and standalone datatypes. commit d4a22702c7a90273dc14f271db465c2e11e5b87e Author: Field G. Van Zee Date: Tue Jun 19 14:54:57 2018 -0500 Set up haswell config for optional col-pref ukrs. Details: - Added two presently-disabled cpp blocks in bli_cntx_init_haswell.c to easily allow one to switch to a set of column-preferential gemm microkernels (in the haswell subconfiguration). The second column- preferring block sets the the register blocksizes to their appropriate values. However, cache blocksizes are left unchanged, and therefore are likely suboptimal. This should be addressed later. commit f317c2e31bfc329cb6bb4e06005e45b9c8a9d6a7 Author: Field G. Van Zee Date: Tue Jun 19 12:21:23 2018 -0500 Added get/set static funcs for exec dt/dom/prec. Details: - Added functions to bli_obj_macro_defs.h to get and set the target domain and target precision bits in the obj_t, and also added the appropriate support in bli_type_defs.h. commit e88a5b8da8c26caebd2b0fb73b30836fb5417c9c Author: Field G. Van Zee Date: Mon Jun 18 15:56:26 2018 -0500 Implemented castm, castv operations. Details: - Implemented castm and castv operations, which behave like copym and copyv except where the obj_t operands can be of different datatypes. These new operations, however, unlike copym/copyv, do not build upon existing level-1v kernels. - Reorganized projm, projv into a 'proj' subdirectory of frame/base (to match the newly added frame/base/cast directory). - Added new macros to bli_gentfunc_macro_defs.h, _gentprot_macro_defs.h that insert GENTFUNC2/GENTPROT2 macros for all non-homogeneous datatype combinations. Previously, one had to invoke two additional macros--one which mixed domains only and another that included all remaining cases--in order to get full type combination coverage. - Defined a new static function, bli_set_dims_incs_2m(), to aid in the setting of various variables in the implementations of bli_??castm(). This static function joins others like it in bli_param_macro_defs.h. - Comment update to bli_copysc.h. commit 2000cdff59272974438e88e0e82d8e1a32710325 Author: Field G. Van Zee Date: Mon Jun 18 14:17:28 2018 -0500 Update to CREDITS file. commit ed2c8aed848ba2dede18df090cf2e0b6e4cc059f Author: Field G. Van Zee Date: Mon Jun 18 11:49:34 2018 -0500 Temporarily disabled small matrix handling on zen. Details: - Disabled small matrix handling in config/zen/bli_family_zen.h due to what appears to be a bug that manifests as failures in the single and double precision real level-3 BLAS test drivers (visible via out.sblat3 and out.dblat3). Thanks to Robin Christ for reporting this issue. commit ed20392c500940bfc0947795c1ff7c8c24f8e26f Author: Field G. Van Zee Date: Fri Jun 15 16:31:22 2018 -0500 Added get/set static funcs for exec dt/dom/prec. Details: - Added functions to bli_obj_macro_defs.h to get and set the execution domain and execution precision bits in the obj_t. - Added/rearranged a few functions in bli_obj_macro_defs.h. - Renamed some macros in bli_type_defs.h: EXECUTION -> EXEC. commit 22594e8e9ab55f5bc0e69d96a23e128502849999 Author: Field G. Van Zee Date: Thu Jun 14 17:35:23 2018 -0500 Updated sandbox/ref99 according to f97a86f. Details: - Applied changes to ref99 sandbox analagous to those applied to framework code in f97a86f. This involves setting the pack schemas of A and B objects temporarily to communicate those desired schemas to the control tree creation function in blx_gemm_cntl.c. This allows us to (henceforth) query the schemas from the control tree rather than the context. commit 1b5d0424d2c7e5eac33e02359c12917ef280949f Author: Field G. Van Zee Date: Wed Jun 13 18:41:32 2018 -0500 Prototype column-preferential zen gemm ukernels. Details: - Added prototypes to bli_kernels_zen.h for each of the four gemm microkernels that prefer outputting to column storage. commit f88c2e7a539e383297e846e6d4647058dd3db128 Author: Field G. Van Zee Date: Wed Jun 13 18:27:46 2018 -0500 Defined static function bli_blksz_scale_def_max(). Details: - Added a new static function to bli_blksz.h that scales both the default (regular) blocksize as well as the maximum blocksize in the blksz_t object. Reminder: maximum blocksizes have different meanings in different contexts. For register blocksizes, they refer to the packing register blocksizes (PACKMR or PACKNR) while for cache blocksizes, they refer to the maximum blocksize to use during the final iteration of a loop. commit 87db5c048e0c7f37351fda486abaf7d19fc5821c Author: Field G. Van Zee Date: Tue Jun 12 19:38:37 2018 -0500 Changed usage of virtual microkernel slots in cntx. Details: - Changed the way virtual microkernels are handled in the context. Previously, there were query routines such as bli_cntx_get_l3_ukr_dt() which returned the native ukernel for a datatype if the method was equal to BLIS_NAT, or the virtual ukernel for that datatype if the method was some other value. Going forward, the context native and virtual ukernel slots will both be initialized to native ukernel function pointers for native execution, and for non-native execution the virtual ukernel pointer will be something else. This allows us to always query the virtual ukernel slot (from within, say, the macrokernel) without needing any logic in the query routine to decide which function pointer (native or virtual) to return. (Essentially, the logic has been shifted to init-time instead of compute-time.) This scheme will also allow generalized virtual ukernels as a way to insert extra logic in between the macrokernel and the native microkernel. - Initialize native contexts (in bli_cntx_ref.c) with native ukernel function addresses stored to the virtual ukernel slots pursuant to the above policy change. - Renamed all static functions that were native/virtual-ambiguous, such as bli_cntx_get_l3_ukr_dt() or bli_cntx_l3_ukr_prefers_cols_dt() pursuant to the above polilcy change. Those routines now use the substring "get_l3_vir_ukr" in their name instead of "get_l3_ukr". All of these functions were static functions defined in bli_cntx.h, and most uses were in level-3 front-ends and macrokernels. - Deprecated anti_pref bool_t in context, along with related functions such as bli_cntx_l3_ukr_eff_dislikes_storage_of(), now that 1m's panel-block execution is disabled. commit dbaf440540837b03643190cd685ed889fa7fd212 Merge: 22aa44eb 2610fff0 Author: Field G. Van Zee Date: Mon Jun 11 12:37:04 2018 -0500 Merge branch 'master' into dev commit 2610fff0b07bdb345cb2e334ef6bea0c63c8cead Author: Field G. Van Zee Date: Mon Jun 11 12:32:54 2018 -0500 Renamed 1m packm kernels from _1e to _1er. Details: - Renamed the reference packm kernels used by 1m. Previously, they used a _1e suffix, which was confusing since they packed to both 1e and 1r schemas. This was likely an artifact of the time when there were separate kernels for each schema before I decided to combine them into a single function (per datatype and panel dimension), and the 1e functions were the ones to inherit the 1r functionality. The kernels have now been renamed to use a _1er suffix. commit 712de9b371a8727682352a2f52cd4880de905f0b Author: Field G. Van Zee Date: Sat Jun 9 14:36:30 2018 -0500 Added missing semicolon in 03obj_view.c Details: - Thanks to Tony Skjellum for pointing out this typo due to a last-minute change to the source prior to committing. commit 043d0cd37ef4a27b1901eeb89d40083cfb2a57ba Author: Field G. Van Zee Date: Sat Jun 9 13:46:49 2018 -0500 Implemented bli_acquire_mpart(), added example code. Details: - Implemented bli_acquire_mpart(), a general-purpose submatrix view function that will alias an obj_t to be a submatrix "view" of an existing obj_t. - Renumbered examples in examples/oapi and inserted a new example file, 03obj_view.c, which shows how to use bli_acquire_mpart() to obtain submatrix views of existing objects, which can then be used to indirectly modify the parent object. commit f1908d39767baef56077def69126d96f805ee27e Author: Field G. Van Zee Date: Fri Jun 8 14:22:22 2018 -0500 Fixed broken input.operations.fast. Details: - Removed three input lines from input.operations.fast (labeled "test sequential micro-kernel") that I intended to remove in bd02c4e. These lines prevented 'make check' (and 'make checkblis-fast') from completing correctly. Note: This bug was fixed in 3df39b3, but that commit has not yet been merged into master, hence this redundant commit. Thanks to Robert van de Geijn for reporting this issue. commit 262a62e3482c5caa947a89cabb562b5887555bd6 Author: Field G. Van Zee Date: Fri Jun 8 12:10:54 2018 -0500 Fixed undefined ref in steamroller/excavator configs. Details: - Fixed erroneous calls to bli_cntx_init_piledriver_ref() in bli_cntx_init_steamroller() and bli_cntx_init_excavator(), which should have been to their respectively-named bli_cntx_init_*() functions instead. Thanks to qnerd for bringing these bugs to our attention. commit 22aa44ebec2c7884bdc944775a1aa7534ab53f0d Merge: 65fae950 b65d0b84 Author: Field G. Van Zee Date: Thu Jun 7 17:42:59 2018 -0500 Merge branch 'dev' of github.com:flame/blis into dev commit 65fae95074d239354737355bbe6f202d4f8b2871 Author: Field G. Van Zee Date: Thu Jun 7 17:41:09 2018 -0500 Implemented bli_setrm, _setim, _setrv, _setiv. Details: - Defined new wrappers to setm/setv operations in frame/base/bli_setri.c that will target only the real or only the imaginary parts of a matrix/vector object. - Updated bli_obj_real_part() so that the complex-specific portions of the function are not executed if the object is real. - Defined bli_obj_imag_part(). - Caveat: If bli_obj_imag_part() is called on a real object, it does nothing, leaving the destination object untouched. The caller must take care to only call the function on complex objects. - Reordered some of the static functions in bli_obj_macro_defs.h related to aliasing. commit b65d0b841b7e4357bc2cf743bbb03384a3ab0bfa Author: Field G. Van Zee Date: Thu Jun 7 14:38:41 2018 -0500 Fixed bug in bli_dt_proj_to_complex(). Details: - Fixed a bug identical to the one fixed in 0a4a27e, except this time in the bli_obj_param_defs.h header file. It looks like the only consumers of this static function were in bli_l0_oapi.c, and so this may not have been manifesting (yet). commit 55b6abdf7458e31df3ad01796d67c2332c776948 Author: Field G. Van Zee Date: Thu Jun 7 14:08:12 2018 -0500 Enforce consistent datatypes in most object APIs. Details: - Added logic to level-1v, -1d, -1f, -1m, -2, and -3 operations' _check() functions to ensure that all operands are of the same datatype. There are some exceptions that were left out, such as the _check() function for the various norm operations since they have a different idea of datatype consistency (ie: the norm object must be the real projection of the primary input vector/matrix object). commit 513138b1a1ecebd015580423c779810cae5c67f2 Author: Field G. Van Zee Date: Thu Jun 7 12:24:47 2018 -0500 Defined/implemented bli_projv(). Details: - Added an implementation for bli_projv() to go along with the implementation of bli_projm() added in 0a4a27e. The only difference between the two is that bli_projv() may only be used on vectors, whereas bli_projm() is general-purpose. - Added a _check() function corresponding to bli_projv(). commit 5f71c1e719eb482b2a4e40daa280c4f7d05b6963 Merge: b5a641e9 3df39b37 Author: Field G. Van Zee Date: Wed Jun 6 19:06:14 2018 -0500 Merge branch 'dev' of github.com:flame/blis into dev commit b5a641e968469805906eb2c971384d12ad1beac5 Author: Field G. Van Zee Date: Wed Jun 6 19:05:37 2018 -0500 Added char-to-dt and dt-to-char mapping functions. Details: - Defined additional functions in bli_param_map.c: bli_param_map_char_to_blis_dt() bli_param_map_blis_to_char_dt() which will map a char to its corresponding num_t, or vice versa. commit 0a4a27e1a4487480410bc0b1bb034bcf97583214 Author: Field G. Van Zee Date: Wed Jun 6 19:02:29 2018 -0500 Defined/implemented bli_projm(). Details: - Defined a new operation in frame/base/bli_proj.c, bli_projm(), which behaves like bli_copym(), except that operands a and b are allowed to contain data of differing domains (e.g. a is real while b is complex, or vice versa). The file is named bli_proj.c, rather than bli_projm.c, with the intention that a 'v' vector version of the function may be added to the same file (at some point in the future). - Added supporting bli_check_*() functions in bli_check.c to confirm consistent precisions between to datatypes/objects, as well as the appropriate error message in bli_error.c and a new error code in bli_type_defs.h. - Wrote a bli_projm_check() function to go along with bli_projm(). - Defined static function bli_obj_real_part() in bli_obj_macro_defs.h, which will initialize an obj_t alias to the real part of the source object. - Fixed a bug in the static function bli_dt_proj_to_complex(), found in bli_param_macro_defs.h. Thankfully, there were no calls to the function to produce buggy behavior. commit 3df39b37a0134befa34b6b6259db98467c7bc965 Author: Field G. Van Zee Date: Wed Jun 6 15:35:05 2018 -0500 Fixed recently broken input.operations.fast. Details: - Removed "test sequential front-end" lines from microkernel test entries of input.operations.fast. This change was meant for inclusion in bd02c4e but was missed due to slightly different wording of the comment (I used "sed //d" to remove the lines). This fixes the broken 'make checkblis-fast' (and 'make check') targets. commit 3f48c38164b4135515b5c752c506fdccc4480be2 Author: Field G. Van Zee Date: Tue Jun 5 16:52:35 2018 -0500 Cosmetic fix to configure output in config.mk. Details: - Fixed configure so that MK_ENABLE_MEMKIND is assigned "no" when the option is disabled due to libmemkind not being present. This wasn't affecting anything since the one use of the variable (in common.mk) was formulated as "ifeq ($(MK_ENABLE_MEMKIND),yes)". That is, the variable being empty was effectively equivalent to it being set to "no". - Comment updates to build/config.mk.in, common.mk. commit 5df201260f64aa98a365931f6d2da70144d69932 Merge: 1b9af85e 96d2774b Author: Field G. Van Zee Date: Tue Jun 5 16:14:19 2018 -0500 Merge branch 'master' into dev commit 1b9af85ec98d91bb2b27aadaa3df344d18faff35 Author: Field G. Van Zee Date: Tue Jun 5 16:07:13 2018 -0500 Updated ref99 call to _cntx_set_thrloop_from_env(). Details: - Reordered the arguments in the ref99 sandbox's call to bli_cntx_set_thrloop_from_env() to be consistent with the updated function signature from f97a86f. Thanks to Devangi Parikh for reporting this issue. commit 96d2774b4cb44ff1e8b5798d7cfc83154a607624 Author: Tyler Michael Smith Date: Tue Jun 5 14:17:39 2018 +0200 Make bli_auxinfo_next_b() return b_next, not a_next (#216) commit bd02c4e9f7fe07487276e61507335d48c8e05f35 Author: Field G. Van Zee Date: Mon Jun 4 13:42:17 2018 -0500 Cleanups to testsuite, input.operations format. Details: - Removed the line in each operation entry in input.operations titled "test sequential front-end" and the corresponding support for the lines in the testsuite input parsing code. This line was included in the some of the earliest versions of the testsuite, back when I intended to eventually have separate multithreaded APIs. Specifically, I envisioned that multithreaded and sequential testing could be enabled or disabled on an operation level. However, BLIS evolved in a different direction and still does not have multithreaded-specific APIs (even if it will eventually someday). But even if it did have such APIs, I doubt I would allow the user to enable/disable them on an operation level. Thus, this was a zombie future parameter that was never used and never made sense to begin with. The one instance of the front_seq variable, used in the various libblis_test_() functions to guard the call to the operation test driver, that remains was commented out instead of deleted so that someday it could be easily changed via sed, if desired. - Various minor cleanups to the testsuite code, including consolidating use of DISABLE and DISABLE_ALL and reexpressing certain conditional expressions in the libblis_test_() functions in terms of boolean functions. commit 2c6d99b99e50d70f904da298a0c59be16cc5c180 Author: Field G. Van Zee Date: Sun Jun 3 18:13:36 2018 -0500 Fixed names out of alphabetical order in CREDITS. commit 7a207e8f2c5046f8b295a78e029ff2de765c7409 Author: Field G. Van Zee Date: Sun Jun 3 18:04:27 2018 -0500 Disabled indirect blacklisting (issue #214). Details: - Return early from function, pass_config_kernel_registries(), that implements indirect blacklisting of subconfigurations (during pass 0). In short, I realized that indirect blacklisting is not needed in the situations I envisioned, and can actually cause problems under certain circumstances. Thanks to Tony Skjellum for reporting the issue (#214) that led to this commit, and to Devin Matthews for prompting me to realize that indirect blacklisting was unnecessary, at least as originally envisioned. commit d7fb32682057c7458c8891c0eedafc374fd9beef Author: Field G. Van Zee Date: Sun Jun 3 13:20:37 2018 -0500 Fixed syntax artifacts from 4b36e85 in examples. Details: - Fixed artifacts of malformed recursive sed expressions used when preparing 4b36e85, in which most function-like macros were converted to static functions. The syntactically defective code was contained entirely in examples/oapi. Thanks to Tony Skjellum for reporting this issue. - Update to CREDITS file. commit ed7dedfd4a07eefeb5a038f9899afb8053b45383 Merge: f97a86f3 469727d4 Author: Field G. Van Zee Date: Sat Jun 2 20:29:53 2018 -0500 Merge branch 'master' into dev commit f97a86f322a6e3e31f33c89befc66189b0b8c64f Author: Field G. Van Zee Date: Sat Jun 2 20:28:20 2018 -0500 Updated setting/querying pack schema (cntx->cntl). - Query pack schemas in level-3 bli_*_front() functions and store those values in the schema bitfields of the correponding obj_t's when the cntx's method is not BLIS_NAT. (When method is BLIS_NAT, the default native schemas are stored to the obj_t's.) - In bli_l3_cntl_create_if(), query the schemas stored to the obj_t's in bli_*_front(), clear the schema bitfields, and pass the queried values into bli_gemm_cntl_create() and bli_trsm_cntl_create(). - Updated APIs for bli_gemm_cntl_create() and bli_trsm_cntl_create() to take schemas for A and B, and use these values to initialize the appropriate control tree nodes. (Also cpp-disabled the panel-block cntl tree creation variant, bli_gemmpb_cntl_create(), as it has not been employed by BLIS in quite some time.) - Simplified querying of schema in bli_packm_init() thanks to above changes. - Updated openmp and pthreads definitions of bli_l3_thread_decorator() so that thread-local aliases of matrix operands are guaranteed, even if aliasing is disabled within the internal back-end functions (e.g. bli_gemm_int.c). Also added a comment to bli_thrcomm_single.c explaining why the extra aliasing is not needed there. - Change bli_gemm() and level-3 friends so that the operation's ind() function is called only if all matrix operands have the same datatype, and only if that datatype is complex. The former condition is needed in preparation for work related to mixed domain operands, while the latter helps with readability, especially for those who don't want to venture into frame/ind. - Reshuffled arguments in bli_cntx_set_thrloop_from_env() to be consistent with BLIS calling conventions (modified argument(s) are last), and updated all invocations in the level-3 _front() functions. - Comment updates to bli_cntx_set_thrloop_from_env(). commit 965db85d29977d228ea744581edf2b682eb8e8a8 Author: Field G. Van Zee Date: Fri Jun 1 12:32:15 2018 -0500 Updated macro invocations in bli_gemm_ker_var2.c. Details: - Updated "get next a/b micropanel" macro invocations in bli_gemm_ker_var2.c according to changes in 9588625. - Comment update in bli_cntx.c. commit 8749fa0b48a7710f4115023e2c46bc80167bc8f9 Author: Field G. Van Zee Date: Thu May 31 12:34:01 2018 -0500 Cleanups to ref99/README.md, test/3m4m/Makefile. Details: - Minor edits to sandbox/ref99/README.md. - Removed cpp guards in sandbox/ref99/thread/blx_gemm_thread.h to be consistent with other headers in sandbox/ref99. - Additional targets and related cleanups in test/3m4m/Makefile. commit 9588625c43c86ef1bde8140f620a30f52420e6a6 Author: Field G. Van Zee Date: Wed May 30 15:19:53 2018 -0500 Renamed "next micropanel" macros in _l3_thrinfo.h. Details: - Renamed several macros defined in bli_l3_thrinfo.h designed to compute the values of a_next and b_next to insert into an auxinfo_t struct in level-3 macrokernels. (Previously, the macros did not use a bli_ prefix.) - Updated instances of above macro usage within various macrokernels. commit e4420591225fca2f63ca74ef6a23b962fcd4bec0 Merge: 34f974d1 850a8a46 Author: Field G. Van Zee Date: Tue May 29 17:12:22 2018 -0500 Merge branch 'dev' of github.com:flame/blis into dev commit 34f974d1a83a7d29ba09f67e392d361231fdf99c Author: Field G. Van Zee Date: Tue May 29 17:11:52 2018 -0500 More tweaks/updates to sandbox/ref99/README.md. commit 850a8a46c0a569a2652d8c200e5c53b61bcf988d Author: Devin Matthews Date: Tue May 29 13:51:21 2018 -0500 Test all x86_64 configurations*... (#212) * Add custom SDE cpuid files. * Set up testing of all x86_64 architectures (except bulldozer) using SDE. * Update .travis.yml [ci skip] * Update do_testsuite.sh [ci skip] * Updated .travis.yml with my secret token. Details: - Replaced Devin's temporary secret token with my own, which is used by Travis when accessing the Intel SDE via Dropbox. * Work around CPUID dispatch in glibc/libm by patching ld.so. * Detect path of loader at runtime. * Attempt to make SDE run on Travis * Allow unpatched ld.so if we don't know how to patch it. I *think* this only happens for older glibc without the multi-arch stuff (e.g. Ubuntu 14.04 on Travis), but who knows? * Upgrade Travis to gcc-6 and binutils-2.26. * Try to get Travis to use the right assembler. * Apparently you need ld-2.26 too. * Try to also patch ld.so from Ubuntu 14.04. * Take the nuclear option. * Account for non-absolute dependencies in ldd output. * String manipulation fail. * Update patch-ld-so.py * Add Zen to SDE testing. * Removed dead variable from travis/do_testsuite.sh. Details: - Removed 'BLIS_ENABLE_TEST_OUTPUT=yes' from make invocations in travis/do_testsuite.sh. This variable is no longer present in the BLIS build system (if it ever was?), and therefore has no effect. commit 42ea02a34e5c144893fe239ae55daef895d92677 Author: Field G. Van Zee Date: Tue May 29 12:48:14 2018 -0500 Renamed c99 sandbox to ref99. Details: - Renamed sandbox/c99 to sandbox/ref99. I wanted to name the sandbox so that it would be thought of as a "reference" sandbox. I kept the "99" to differientiate it from future reference sandboxes that may be written in another language (such as C++). - Updates to sandbox/ref99/README.md. commit 0e7205ccef50dccd4306cf427a63633396472813 Author: Field G. Van Zee Date: Tue May 29 12:36:13 2018 -0500 Remove sandbox/.gitkeep now that dir is non-empty. commit 3a4603858e3819cbd6ed7dd67d0fc0b3f89ed254 Author: Field G. Van Zee Date: Sat May 26 15:51:08 2018 -0500 More README.md updates to sandbox/c99. Details: - Added a section that walks the reader through how to configure BLIS to use a gemm sandbox. commit 2bad97f6bdf4642884d60fc03970549902a54d74 Author: Field G. Van Zee Date: Sat May 26 15:31:16 2018 -0500 Updates to CREDITS, sandbox/c99/README.md. commit 2b4a447526effa3e847a7e5c15c3758573f12318 Author: Field G. Van Zee Date: Fri May 25 18:51:23 2018 -0500 Initial implementation of c99 "reference" sandbox. Details: - Added a c99 sandbox (in sandbox/c99) to serve as a starting point for others looking to experiment with alternative implementations of gemm in BLIS. Note that this sandbox implementation is a first draft and will be refined over time. - Minor updates to Makefile and common.mk to restrict what source files get recompiled when sandbox files are touched. - Added an initial draft of a README.md in sandbox/c99. commit 469727d4f8a976d8713afb4d0b6235c322498db0 Author: Field G. Van Zee Date: Fri May 25 16:17:13 2018 -0500 Very minor comment updates. commit 66dbe69a0f9359bf1e39b5672ee365213de2e3ee Author: Field G. Van Zee Date: Fri May 25 15:45:53 2018 -0500 Converted macros to static funcs in _packm_cntl.h. Details: - Converted various macros in frame/1m/packm/bli_packm_cntl.h (designed to access fields of a packm_params_t struct) to static functions. commit 22deef2f5463a47e3b3c37fc313d17550f10ee06 Author: Field G. Van Zee Date: Thu May 24 14:28:55 2018 -0500 Support alternative gemm implementation sandboxes. Detail: - configure: - add support for --enable-sandbox=NAME to configure script, where NAME is a subdirectory of a new 'sandbox' directory that contains an alternative implementation of gemm. (For now, only implementations of gemm may be provided via a sandbox.); - add support for C++ compiler. C++ compilers are handled in a manner similar to that of C compilers, in that a default search order is used, and that CXX is searched for first, if the variable is set. In practice, the C++ compiler that is selected should correspond to the selected C compiler. (Example: If gcc is selected for C, g++ should be selected for C++.) The result of the search is output to config.mk via build/config.mk.in. NOTE: The use of C++ in BLIS is still hypothetical, but may eventually move to being experimental. This support was intended only for use of C++ within a gemm sandbox. - build/config.mk.in: - define SANDBOX variable containing sandbox subdirectory name. - build/bli_config.in: - define either of the BLIS_ENABLE_SANDBOX or BLIS_DISABLE_SANDBOX macros in bli_config.h. - common.mk: - include makefile fragments that were propagated into the specified sandbox subdirectory; - generate different CFLAGS for sandboxes, as well as a separate CXXFLAGS variable for sandboxes when C++ source files are compiled; - isolate into a single location lists of file suffixes for various purposes. - reorganized/clean up code related to identifying header files and paths. - Makefile: - generate object filepaths for and compile source code files found in sandbox sub-directory; - remove makefile fragments placed in sandbox sub-directory (cleanmk); - various other cleanups. - Added .cc, .cpp, and .cxx to list of suffixes of files to recognize in makefile fragments (via build/gen-make-frags/suffix_list). - Updated blis.h to conditionally #include bli_sandbox.h (via a new file, bli_sbox.h), which each sandbox is assumed to use for any type definitions and function prototypes it wishes to export out to blis.h. - Conditionally disable bli_gemmnat() implementation in frame/3 when BLIS_ENABLE_SANDBOX is defined. commit 25e3501ed57a0db7f860c88b7199b36049aec12a Merge: 216a4cb9 5140ee34 Author: Field G. Van Zee Date: Thu May 24 13:57:16 2018 -0500 Merge branch 'master' into dev commit 5140ee3424c744981a3fed3b5a748ebbfc111388 Author: Field G. Van Zee Date: Wed May 23 16:56:14 2018 -0500 Updated types of bli_is_[un]aligned_to() functions. Details: - Changed the void* arguments of the following static functions: bli_is_aligned_to() bli_is_unaligned_to() bli_offset_past_alignment() to siz_t, and the return type of bli_offset_past_alignment() from guint_t to siz_t. This allows for more versatile usage of these functions (e.g. when aligning both pointers and leading dimension). - Updated all invocations of these functions, mostly in kernels/penryn but also in kernels/bgq, to include explicit typecasts to siz_t when pointer arguments are passed in. - Thanks to Devin Matthews for pointing out this potential bug (via issue #211). - Deleted a few trailing spaces in various penryn kernels. - Removed duplicate instances of the words "derived" and "THEORY" from various kernel license headers, likely from a malformed recursive sed performed long ago. commit 216a4cb9cb87fa4c93f6ceb6ae90602e5018b305 Author: Field G. Van Zee Date: Fri May 18 18:47:03 2018 -0500 Minor update to flatten-headers.[py|sh] help text. Details: - Fixed a typo and removed some outdated language from the help text of flatten-headers.py and flatten-headers.sh. commit 962a706a6f56ea070ac4683f0af69c7e59af8ecb Author: Field G. Van Zee Date: Fri May 18 18:19:40 2018 -0500 Updated LICENSE file to mention HP Enterprise. Details: - Added HP Enterprise to the LICENSE file. Previously, only the source files touched by HPE contained the corresponding copyright notices. (This oversight was unintentional.) - Updated file-level copyright notices to include a comma, to match the formatting used for UT and AMD copyrights. commit efa43e13effe901ad31e734ac90f027e89473bd9 Author: Field G. Van Zee Date: Fri May 18 12:20:40 2018 -0500 More updates to CREDITS and RELEASING files. commit f94ab97af8e86baf9ee9a9cbaef8bb3712df2e11 Author: Field G. Van Zee Date: Thu May 17 17:45:31 2018 -0500 Update to CREDITS file. commit 4919b10c005e006a6d818eb8f865f9dbd8aa16df Author: Field G. Van Zee Date: Thu May 17 16:38:49 2018 -0500 Minor changes to README.md and CONTRIBUTING.md. commit b89451187e8321b673a1cf7603c8d48028d9d4c8 Author: Field G. Van Zee Date: Thu May 17 16:23:06 2018 -0500 README.md update. Details: - Added "Contributing" section with relevant links. commit af244194e7d76276a1b90fe59f9307dde0429e1d Author: Field G. Van Zee Date: Thu May 17 15:38:02 2018 -0500 Removed explicit critical sec. from bli_memsys.c. Details: - Removed critical sections protecting the initialization/finalization of bli_memsys.c. These synchronization mechanisms are no longer needed now that BLIS initializes all APIs via pthread_once(). commit 10c9e8f95254d8c6436c4d3cb093fa5544b45c90 Author: Field G. Van Zee Date: Thu May 17 15:22:51 2018 -0500 Cache hardware's arch_t id after querying once. Details: - Added logic to bli_arch.c that will call what was previously the body of bli_arch_query_id() only once and then cache the value in a static variable local to the file. (Previously, the arch_t associated with the hardware/configuration was queried every time bli_arch_query_id() was called, which was at least once per level-3 function call. Thanks to Devin Matthews for suggesting this feature via issue #175. - Added -lpthread to the compile/link command line of the compiler invocation that compiles build/detect/config/config_detect.c, which prints the string identifying the detected configuration, since it is now needed due to new pthread_once() logic in bli_arch.c. - Implementation note: I chose to implement this arch_t caching feature via pthread_once(), using a separate pthread_once_t variable local to the file, rather than calling bli_init_once(). The reason is that I did not want to require bli_init() as a prerequisite to this function. bli_init() already calls several sub-components, some of which make use of bli_arch_query_id(), and therefore it would be easy to fall into a circular self-init situation (which usually causes pthreads to hang indefinitely). commit f28a15293890ac6fbceac229fd204dbc9fec6e27 Author: Francisco Igual Date: Thu May 17 09:26:14 2018 +0000 Fixed clobber list bug in ARMv8 ukernel commit 2e31dd7852b4d6a9355899cf9659d4b8130461cb Author: Field G. Van Zee Date: Wed May 16 17:28:33 2018 -0500 Inserted missing integer typecasting into ukernels. Details: - Inserted missing safeguards into most microkernels to ensure that the integers read by the microkernel's assembly instructions are of the appropriate size. In many cases, this bug was going undetected likely because the compiler was inserting zero padding before the integers in the calling function, allowing the assembly code to read 64-bits in a way that did not corrupt the "lower" 32 integer bits with garbage in the higher bits. Thanks to Francisco Igual and Devangi Parikh for finding this issue. commit 12dfa9516428b4092554f0ce70b07571d35de222 Author: Field G. Van Zee Date: Wed May 16 12:46:57 2018 -0500 Fixed a bug in determining default integer size. Details: - Fixed a bug that would cause configurations to inadvertantly define their integers to be 32 bits when those environments actually call for 64-bit integers. While either BLIS_ARCH_64 or BLIS_ARCH_32 is defined in bli_system.h (based on whether preprocessor macros such as __x86_64 or __aarch64__ are defined by the environment), bli_system.h was being #included *after* bli_config_macro_defs.h, in which the BLIS_ARCH_64 macro was used to choose an integer type size in the event that BLIS_INT_TYPE_SIZE was not already defined by configure via bli_config.h. And due to the structure of the cpp code in that file, the 32-bit integer case was being chosen. Thanks to Francisco Igual and Devangi Parikh for their help in isolating this bug. - Moved the #include of hbwmalloc.h and related preprocessor code to bli_kernel_macro_defs.h to facilitate the reshuffling of the #include for bli_system.h in blis.h. commit f930cec0f35824c0f9ebbd218614209217d491cb Author: Field G. Van Zee Date: Tue May 15 17:47:08 2018 -0500 More tweaks to CONTRIBUTING.md. commit 173e30ff7d293ba31f3fab8ab0c0a695eda3d4fd Author: Field G. Van Zee Date: Tue May 15 14:48:34 2018 -0500 Added initial draft of CONTRIBUTING.md file. Details: - Thanks to the Ruby on Rails project for providing a good template off of which to build. commit 6e25e758b444bf725046674e1e64c6a52421749d Author: Nico Schlömer Date: Tue May 15 14:03:20 2018 +0200 Debian config (#206) * add debian config * correct wording in the README commit fcf6c6a3c87da08a7cdb92b102489b991ef7a644 Author: Alex Arslan Date: Mon May 14 18:41:03 2018 -0700 Fix shared library builds on platforms other than Linux and macOS (#209) * Fix detection of systems other than Linux and macOS The way the logic is currently laid out, any platform that isn't Linux gets assigned the .dylib shared library extension and the macOS-specific compiler flags. This reverses the logic to check for macOS first, and have the fallback use the Linux definitions, which apply to most other systems as well. * Use SHLIB_EXT instead of SO_SUF The former is more standard, as jakirkham pointed out in a comment. commit 6f7f51048c48f31d691c06451d0fd2cbc453ad03 Author: Field G. Van Zee Date: Mon May 14 18:41:56 2018 -0500 Echo cc_vendor when printing compiler version. Details: - Echo the ${cc_vendor} when informing the user of the compiler's version. Previously, the actual ${cc} (which could be a path to the executable) was being printed, which has already been printed by that point in the configure script. commit ad67dc4e348b0a381efc057573a6b03cc7e26db0 Author: Field G. Van Zee Date: Mon May 14 18:35:28 2018 -0500 Communicate cc, cc_vendor to make via config.mk. Details: - Historically, the compiler selection has happened statically in the various make_defs.mk and would only be overriden by setting CC (either prior to running configure or as a configure argument). However, in the last couple months, configure has evolved to contain rather sophisticated compiler detection logic for the purposes of blacklisting sub-configurations. It only makes sense that configure now fully take over the responsibility of selecting a compiler from the GNU make side of the build system. Thanks to Alex Arslan for his help exposing this issue. - Substitute found_cc into CC in config.mk via configure. - Set a new variable, CC_VENDOR, in config.mk via substitution from configure, and disable the corresponding CC_VENDOR code in common.mk. - Disabled default compiler selection (usually gcc) in the sub-configs' various make_def.mk files. commit 20af119fc97ec6120017a7a5ba5f9aaa920c7640 Author: Field G. Van Zee Date: Mon May 14 17:44:58 2018 -0500 Added README.md to 'config' directory. Details: - Added a brief README.md file to the config directory to redirect those who may be exploring the source tree to the ConfigurationHowTo wiki. (Included is a very brief explanation of configurations for those who don't have time to read the wiki.) Thanks to Nico Schlömer for this suggestion. commit 9dbce16269c3e1f27c7a0d64372cc76aed30dfc1 Author: Field G. Van Zee Date: Mon May 14 17:04:54 2018 -0500 Search for 'cc clang gcc' on OpenBSD, FreeBSD. Details: - Swapped gcc and clang in the compiler search list for OpenBSD. - Use the same search list for FreeBSD as above. commit 55ebf24d63128b5fd15b10160485667415a02a55 Author: Field G. Van Zee Date: Mon May 14 16:19:08 2018 -0500 Change compiler search order on OpenBSD. Details: - Set a compiler search list (and order) as a function of the OS detected via 'uname -s'. By default, this list and order is 'gcc clang cc' for Linux and Darwin (OS X), and any other OS except OpenBSD). On OpenBSD, we use 'cc gcc clang' because OpenBSD's default installation of gcc (4.2.1) is too old for BLIS. Thanks to Alex Arslan for reporting this issue and suggesting a fix. commit 4fb353bd90e6642c8aeffd1b1e6329f54eee4bb4 Merge: 4b36e85b 8a2857b5 Author: Field G. Van Zee Date: Sun May 13 17:50:51 2018 -0500 Merge branch 'master' into dev commit 8a2857b5e3c633b18c24f2275110437a702a71d0 Author: Field G. Van Zee Date: Fri May 11 18:42:05 2018 -0500 Fixed README.md typo; mention 'make check'. commit 543935c02f9335142d2e485a15f37dbaebe012ed Author: Field G. Van Zee Date: Fri May 11 18:35:32 2018 -0500 Updated README.md with Ubuntu packages link. Details: - Created a separate section of README.md for external packages, with one bullet each for Dave Love's rpms and Nico Schlömer's Ubuntu apt packages. Thanks to Dave and Nico for their contributions. commit af1d8470b56d3b2a1c8513d366d788dddcb84baa Author: Field G. Van Zee Date: Fri May 11 17:49:58 2018 -0500 Better handling of shared libraries on OS X. Details: - Use the .dylib shared library suffix on OS X (instead of .so in Linux). - Link with the -dynamiclib and -install_name options on OS X (instead of -shared and -soname in Linux). - Determine operating system (e.g. Linux, Darwin) during configure and substitute into config.mk.in rather than run 'uname -s' during make. - Echo operating system during configure. commit 4b72a462d7467cf815422aafac7b05037d2e3b13 Author: Field G. Van Zee Date: Thu May 10 18:35:38 2018 -0500 Enable building shared library by default. Details: - Tweaked configure so that the shared library is generated by default. - Updated --help text and configure's feedback messages reporting the status of the static/shared builds. - Changed the order of build product installation so that headers are installed last, after libraries and symlinks. commit b699bb1ff03c6e9baaa054805b4939983ae7145b Author: Field G. Van Zee Date: Thu May 10 15:54:17 2018 -0500 Adopt Linux-like .so versioning at install-time. Details: - Changed the naming conventions used for installed libraries and symlinks to more closely mirror patterns used by typical GNU/Linux libraries. Whereas previously static and shared libraries were installed and symlinked as follows: (library) libblis-0.3.2-15-haswell.a (library) libblis-0.3.2-15-haswell.so (symlink) libblis.a -> libblis-0.3.2-15-haswell.a (symlink) libblis.so -> libblis-0.3.2-15-haswell.so we now use the following naming conventions: (library) libblis.a (symlink) libblis.so -> libblis.so.0.1.2 (symlink) libblis.so.0 -> libblis.so.0.1.2 (library) libblis.so.0.1.2 where 0.1.2 indicates shared library major, minor, and build versions of 0, 1, and 2, respectively. The conventional version string can still be queried by linking to the library in question and then calling bli_info_get_version_str(). (The testsuite binary does this automatically at startup.) - Added logic to common.mk to set the soname field in the shared library via the -soname linker flag. - Added a 'so_version' file to the top-level directory containing two lines. The first line specifies the .so major version number, and the second line specifies the minor and build version numbers joined with a '.'. This file is read by configure and those values substituted into build/config.mk.in to define SO_MAJOR, SO_MINORB, and SO_MMB variables. commit fc2d9ec6bf46f6e5b19d196208415ce433e95b10 Author: Field G. Van Zee Date: Wed May 9 15:19:28 2018 -0500 Tweaks to top-level clean and distclean targets. Details: - Moved the removal of bli_config.h from cleanh to distclean. - Removed cleantest as a dependency of clean. commit bf0350305971e3991861b5117a13fda31ff97b6d Author: Field G. Van Zee Date: Tue May 8 16:49:22 2018 -0500 Renamed (shortened) a few build system variables. Details: - Renamed the following variables in config.mk (via build/config.mk.in): BLIS_ENABLE_VERBOSE_MAKE_OUTPUT -> ENABLE_VERBOSE BLIS_ENABLE_STATIC_BUILD -> MK_ENABLE_STATIC BLIS_ENABLE_SHARED_BUILD -> MK_ENABLE_SHARED BLIS_ENABLE_BLAS2BLIS -> MK_ENABLE_BLAS BLIS_ENABLE_CBLAS -> MK_ENABLE_CBLAS BLIS_ENABLE_MEMKIND -> MK_ENABLE_MEMKIND and also renamed all uses of these variables in makefiles and makefile fragments. Notice that we use the "MK_" prefix so that those variables can be easily differentiated (such as via grep) from their "BLIS_" C preprocessor macro counterparts. - Other whitespace changes to build/config.mk.in. - Renamed the following C preprocessor macros in bli_config.h (via build/bli_config.h.in): BLIS_ENABLE_BLAS2BLIS -> BLIS_ENABLE_BLAS BLIS_DISABLE_BLAS2BLIS -> BLIS_DISABLE_BLAS BLIS_BLAS2BLIS_INT_TYPE_SIZE -> BLIS_BLAS_INT_TYPE_SIZE and also renamed all relevant uses of these macros in BLIS source files. - Renamed "blas2blis" variable occurrences in configure to "blas", as was done in build/config.mk.in and build/bli_config.h.in. - Renamed the following functions in frame/base/bli_info.c: bli_info_get_enable_blas2blis() -> bli_info_get_enable_blas() bli_info_get_blas2blis_int_type_size() -> bli_info_get_blas_int_type_size() - Remove bli_config.h during 'make cleanh' target of top-level Makefile. commit 4b36e85be9b516b4089b24768f881dd976668997 Author: Field G. Van Zee Date: Tue May 8 14:26:30 2018 -0500 Converted function-like macros to static functions. Details: - Converted most C preprocessor macros in bli_param_macro_defs.h and bli_obj_macro_defs.h to static functions. - Reshuffled some functions/macros to bli_misc_macro_defs.h and also between bli_param_macro_defs.h and bli_obj_macro_defs.h. - Changed obj_t-initializing macros in bli_type_defs.h to static functions. - Removed some old references to BLIS_TWO and BLIS_MINUS_TWO from bli_constants.h. - Whitespace changes in select files (four spaces to single tab). commit 7e5648ca150757b874f6823da832f3798c40b9f9 Author: Field G. Van Zee Date: Mon May 7 18:59:19 2018 -0500 Add configure support for --libdir, --includedir. Details: - Added support for two new configure options: --libdir and --includedir. They specify the precise install directories for libraries and header files, respectively, and override any location implied by the --prefix option (including the default install prefix, if --prefix was not given). Thanks to Nico Schlömer for suggesting this via issue #195. - Removed the INSTALL_PREFIX definition/anchor from build/config.mk.in and replaced it with corresponding definitions/anchors for libdir and includedir. - Updated top-level Makefile to use the new variables, INSTALL_LIBDIR and INSTALL_INCDIR, instead of INSTALL_PREFIX (which is now no longer needed by make). - Set default sane values for INSTALL_LIBDIR and INSTALL_INCDIR in common.mk when configure has not been run, as is already done for DIST_PATH. This is to safeguard against statements in the top-level Makefile that use 'find' to locate old libraries and headers for the uninstall targets, which run regardless of make target. Without setting INSTALL_LIBDIR and INSTALL_INCDIR, those variables are empty and the 'find' ends up looking at '/', which is obviously not what we want. (Also enclosed those definitions in an IS_CONFIGURED guard so that they won't get evaluated unless configure has been run.) - Rearranged "ifeq ($(IS_CONFIGURED),yes)" conditionals in Makefile to reduce occurrences and separated "local" and top-level components of cleanblastest and cleanblistest targets to improve readability. - Adjusted out-of-tree builds so that they are no longer oblivious to the .git directories, if present, and thus now properly augment version strings with the appropriate patch number. - Include missing version string in 'configure --help' output. commit b09e4e8852a6c42895910e3bcb9041124dc8bf9f Author: Field G. Van Zee Date: Mon May 7 14:37:50 2018 -0500 Allow 'make clean' and friends without configuring. Details: - Modified top-level Makefile so that a user can run 'make distclean', 'make clean', or any of the other clean-related targets prior to running configure (or after a previous 'make distclean'). Thanks to Nico Schlömer for suggesting this via issue #197. - Made the cleanblastest and cleanblistest more comprehensive in that they now clean out build products that would have resulted from local compilation (ie: builds performed within the 'blastest' or 'testsuite' directories). - Added "cc" to list of expected compiler "vendors" since the CC variable seems to automatically be set to "cc" on Ubuntu 16.04 (which is just an alias to gcc). - Comment update to build/config.mk.in. commit 35c5a1449c3efe0b2ec43cdefcfdf00e71828149 Author: Field G. Van Zee Date: Mon May 7 12:04:57 2018 -0500 No longer update version file during configure. Details: - Recycled the core functionality of build/update-version-file.sh into a function in configure, disabling the updating of the 'version' file in the process. Instead of writing the patched version string back to the version file and then reading it again from within configure, the patched version string is now saved directly to a variable in the main() function in configure. This will prevent developers from accidentally committing configure-induced changes to the version file in between releases. commit 8adb2f919b62da4a2885ae04a10925e0e6a2e304 Author: Mathieu Poumeyrol Date: Sun May 6 19:58:16 2018 +0200 Some cross compilations fixes (#198) * cross-compilation fixes * add doc ranlib variable * icc support -dumpversion, posix compatible test, plus one stupid mistake * retab * revert version as requested commit 89acd9ebe516eeb97006dba344354bfc98826645 Merge: 4cff432d 0557eba7 Author: Field G. Van Zee Date: Wed May 2 12:53:35 2018 -0500 Merge branch 'amd' commit 4cff432d707891ada705b039a7e043558bbf3c51 Author: Nisanth M P <31736542+nisanthmpamd@users.noreply.github.com> Date: Wed May 2 23:20:42 2018 +0530 AMD specific optimizations for target 'zen' (#194) Re-enabled AMD-specific optimizations for zen. Details: - Re-enabled Zen-specific cache blocksizes for 'zen' sub-configuration. - Re-enabled small matrix gemm optimization for 'zen'. - These were both temporarily disabled during a previous merge simply due to lack of Zen hardware for testing. commit 8eda5fe7f678b413cb274bd84716995a7d0b87a9 Author: Field G. Van Zee Date: Wed May 2 12:20:37 2018 -0500 Typo fix in README.md. commit 0557eba78f5fcf28f0f039f28da79498ffde848c Author: Nisanth M P Date: Mon Mar 19 12:49:26 2018 +0530 Re-enabling the small matrix gemm optimization for target zen Change-Id: I13872784586984634d728cd99a00f71c3f904395 commit df78ceb3d6f33a27fe69017854405edaea7c40e5 Author: Nisanth M P Date: Mon Mar 19 11:34:32 2018 +0530 Re-enabling Zen optimized cache block sizes for config target zen Change-Id: I8191421b876755b31590323c66156d4a814575f1 commit 5e515f9a76f4aaf43dc21315a34d797726ca8069 Author: Field G. Van Zee Date: Tue May 1 13:44:10 2018 -0500 Tweaked new language in README.md. commit 1ddd9e316ad5024af8b606dfcebd1e7d587a130f Author: Field G. Van Zee Date: Tue May 1 13:36:28 2018 -0500 Added link to Dave Love's Fedora Copr page. Details: - Added a blurb to README.md advertising Dave Love's Copr homepage, which contains rpm packages for RHEL/Fedora-like distributions. commit 078a852f738c66c6468bd5e64b06467edc9057fd Author: Field G. Van Zee Date: Mon Apr 30 16:15:26 2018 -0500 Minor tweaks to top-level 'make clean' target. Details: - Execute 'cleanh' target as part of 'clean' - Remove cblas.h file from 'include//' as part of 'cleanh' target. - Updated the echoed (non-verbose) text for uniformity. commit 75d0d1057dda69c655bd1cd8f791cb39b54d99b8 Author: Field G. Van Zee Date: Mon Apr 30 14:57:33 2018 -0500 Renamed various datatype-related macros/functions. Details: - Renamed the following macros in bli_obj_macro_defs.h and bli_param_macro_defs.h: - bli_obj_datatype() -> bli_obj_dt() - bli_obj_target_datatype() -> bli_obj_target_dt() - bli_obj_execution_datatype() -> bli_obj_exec_dt() - bli_obj_set_datatype() -> bli_obj_set_dt() - bli_obj_set_target_datatype() -> bli_obj_set_target_dt() - bli_obj_set_execution_datatype() -> bli_obj_set_exec_dt() - bli_obj_datatype_proj_to_real() -> bli_obj_dt_proj_to_real() - bli_obj_datatype_proj_to_complex() -> bli_obj_dt_proj_to_complex() - bli_datatype_proj_to_real() -> bli_dt_proj_to_real() - bli_datatype_proj_to_complex() -> bli_dt_proj_to_complex() - Renamed the following functions in bli_obj.c: - bli_datatype_size() -> bli_dt_size() - bli_datatype_string() -> bli_dt_string() - bli_datatype_union() -> bli_dt_union() - Removed a pair of old level-1f penryn intrinsics kernels that were no longer in use. commit 01c4173238baf08e7f6700a3f91a2ea58cca50c1 Author: Field G. Van Zee Date: Sat Apr 28 14:07:34 2018 -0500 CHANGELOG update (0.3.2) commit 2fb440876690bdcec0c11a30e2b33ad100bab529 (tag: 0.3.2) Author: Field G. Van Zee Date: Sat Apr 28 14:07:31 2018 -0500 Version file update (0.3.2) commit cdf041ddadd8725e578e2f59f37ae341f26655af Author: Field G. Van Zee Date: Sat Apr 28 14:05:00 2018 -0500 Use config.mk instead of common.mk in bump-version.sh. Details: - Fixed inadvertent targeting of common.mk when testing whether configure had already been run, rather than config.mk. commit 6ded8f9f0364b3c07255e2532ada3eeb2ed2a715 Author: Field G. Van Zee Date: Sat Apr 28 14:01:29 2018 -0500 Account for recent 'make distclean' in bump-version.sh. Details: - Added logic to build/bump-version.sh that will run './configure auto' if 'common.mk' is not present (usually because 'make distclean' was run recently). commit 7c16fdce433f5dea0e83d5047553c955d8e46fd2 Author: Field G. Van Zee Date: Sat Apr 28 13:50:55 2018 -0500 Fixed typo in RELEASING file. commit 5e5ca4984fcf6d72d3036c338bb9cdc64520a325 Author: Field G. Van Zee Date: Sat Apr 28 13:48:01 2018 -0500 README updates. Details: - Updates to the top-level README files in the top-level directory as well as the 'examples/oapi' directory. commit 627b045e301defea6770dc5b64e1110cbec25153 Author: Field G. Van Zee Date: Fri Apr 27 18:11:19 2018 -0500 Added an example of using transposition with gemm. Details: - Added an example to examples/oapi/8level3.c to show how to indicate transposition when performing a gemm operation. commit 13a0eadc69d72933e322901f5b44944834e3c787 Author: Field G. Van Zee Date: Fri Apr 27 18:00:07 2018 -0500 Added more transposition/conjugation examples. Details: - Added code to examples/oapi/5level1m.c that demonstrates transposing (and conjugate-transposing) unstructured matrices. - Comment updates to 6level1m_diag.c to maintain consistency with new examples in 5level1m.c. commit 5606cd8881e75264a96af45dc8ea1905bab054f5 Author: Field G. Van Zee Date: Fri Apr 27 17:13:10 2018 -0500 Added utility module to examples/oapi. Details: - Added a new code example file to examples/oapi demonstrating how to use various utility operations. - Comment updates to other example files. - README updates. commit ff26c94c6486374c709f93c6965ea18903bd6a18 Author: Field G. Van Zee Date: Fri Apr 27 12:31:34 2018 -0500 Added missing gcc version constraint for knl. Details: - Previously forgot to add explicit enforcement of a minimum gcc version in configure script when 'knl' sub-configuration is requested. - Comment updates to configure. commit 4d97574e477b3e55ddbb6044b0542a92cd9bab30 Author: Field G. Van Zee Date: Tue Apr 24 18:48:09 2018 -0500 Added object API example code. Details: - Added an 'examples' directory at the top level. - Added an 'oapi' subdirectory in 'examples' that contains a tutorial-like sequence of example code demostrating the core functionality of BLIS's object-based API, along with a Makefile and README. Thanks to Victor Eijkhout for being the first to suggest including such code in BLIS. commit d6ab25a3232aa52b9b855088fb4b0b46ff2c00c8 Author: Field G. Van Zee Date: Tue Apr 24 18:43:03 2018 -0500 Add setijm, getijm operations. Details: - Added bli_setgetijm.c, which defines bli_setijm(), bli_getijm(), and related functions that can be used to read and write individual elements of an obj_t. - Defined a new function, bli_obj_create_conf_to(), in bli_obj.c that will create a new object with dimensions conformal to an existing object. Transposition and conjugation states on the existing object are ignored, as are structure and uplo fields. - Defined a new function, bli_datatype_string(), in bli_obj.c that returns a char* to a string representation of the name of each num_t datatype. For example, BLIS_DOUBLE is "double" and BLIS_DCOMPLEX is "dcomplex". BLIS_INT is included (as "int"), but BLIS_CONSTANT is not, and thus is not a valid input argument to bli_datatype_string(). - Added calls to bli_init_once() to various functions in bli_obj.c, the most important of which was bli_obj_create_without_buffer(). - Removed unintended/extra newline from the end of printv output. - Whitespace changes to - frame/base/bli_machval.c - frame/base/bli_machval.h - frame/0/copysc/bli_copysc.c - Trivial changes to README.md and common.mk. commit a731a428f7fc02fd6ab4f953ead828c1d06fb5a1 Author: Field G. Van Zee Date: Tue Apr 17 16:44:55 2018 -0500 Another README.md update. commit c734ee928a824b27d280a9a67b1b4bc8423d5795 Author: Field G. Van Zee Date: Tue Apr 17 16:40:05 2018 -0500 README.md update. commit 03ecad372d8eb603ee905a7b944d0544a813460a Author: Field G. Van Zee Date: Tue Apr 17 14:16:59 2018 -0500 Added RELEASING file. Details: - Added a file named 'RELEASING' that contains basic notes on how to create a new version/release of BLIS. This is mostly just a reminder to myself, but also may become useful if/when others take over development and administration of the project. commit 24b3c3149ce66546b9a1afc2cc794a637a86aa60 Merge: 60366a3f 817b67c0 Author: Field G. Van Zee Date: Mon Apr 16 18:49:38 2018 -0500 Merge branch 'dev' of github.com:flame/blis into dev commit 60366a3faba4e60cee85c3b87a3f69625f4b9026 Author: Field G. Van Zee Date: Mon Apr 16 18:46:21 2018 -0500 Updates to knl kernels and related code. Details: - Imported the 24x16 knl sgemm microkernel (and its corresonding spackm kernel) from TBLIS and enabled its use in the knl sub-config. Also Added sgemm microkernel prototype to bli_kernels_knl.h. - Updated dgemm and dpackm microkernels from TBLIS, which included an important change regarding the offsets array (changed from extern declaration to static declaration/definition). - Activated use of level-1v and -1f zen kernels in skx and knl sub-configs. - Removed some old macros no longer needed in bli_family_skx.h now that libmemkind support exists in configure. - Moved bli_avx512_macros.h to frame/include and adjusted #includes in skx and knl kernels accordingly. - Moved unused kernels in kernels/knl/3 to kernels/knl/3/other directory. - Fixed a minor bug in the 'make' output per compile when verboseness is not turned on. The rule-generating function 'make-kernel-rule' was previously passing in the name of the config, rather than the name of the kernel set returned by get-config-for-kset, which could give misleading information to the user when the kconfig_map mapped a kernel set to a sub-configuration that did not share the same name. (This didn't affect the CFLAGS that were actually used.) - Updated test/3m4m/Makefile, removing acml targets and renaming the remaining targets. commit 817b67c01752e0ca8fe230bb8ad23afc7bd0f64e Merge: 67c9c2f8 2b7108a8 Author: Field G. Van Zee Date: Mon Apr 16 14:06:26 2018 -0500 Merge branch 'dev' of github.com:flame/blis into dev commit 67c9c2f86d5ef2accc439b21581d73d82754a2e3 Author: Field G. Van Zee Date: Mon Apr 16 14:03:12 2018 -0500 Retired haswell gemm microkernels. Details: - Moved microkernels in kernels/haswell/3 to kernels/haswell/3/old. These microkernels were no longer being used and only sowed confusion to anyone inspecting the repository without being fully cognizant of the build system and how it works (and sometimes even to those who wrote the build system). Note that the haswell configuration currently employs the zen microkernels. commit 2b7108a8ef8ce958b3acad028ff07c85ff97fd63 Author: Field G. Van Zee Date: Mon Apr 16 12:35:53 2018 -0500 Minor updates to test driver makefiles. Details: - Cleaned up and homogenized the various test driver Makefiles in testsuite and test directories. - Very minor updates to test driver code. commit 9f56df95570a24587b910b169f342bd356ccbfb6 Author: Field G. Van Zee Date: Wed Apr 11 14:51:36 2018 -0500 Trivial tweaks to configure blacklisting output. Details: - Updated output of information vis-a-vis configuration blacklisting. commit f56481efebd9a7785c0618f3a12c0bec36f46333 Author: Field G. Van Zee Date: Tue Apr 10 19:02:21 2018 -0500 Cleaned up assembler version query on OS X. Details: - Swiched from querying version of 'objdump' to 'as' (e.g. the assembler). - Fixed the outputting of the version of 'as' on OS X, which required this beauty: ...=$(as -v /dev/null -o /dev/null 2>&1) - Only add sub-configs to blacklist if the sub-config hasn't already been added. commit 088c474e629535affbe111f141f895af50d109be Author: Field G. Van Zee Date: Tue Apr 10 18:09:56 2018 -0500 Added support for blacklisting via the assembler. Details: - Added logic to configure that attempts to assemble various small files containing select instructions designed to reveal whether binutils (specifically, the assembler) supports emitting those instruction sets. This information provides additional opportunities to blacklist sub- configurations that are unsupported by the environment. Thanks to Devin Matthews for pointing me towards a similar solution in TBLIS as an example. - Various other cleanups in configure. - Reorganized the detection code in the 'build' directory, bringing the "auto-detect" configuration detection, libmemkind detection, and new instruction set detection codes into a single new subdirectory named 'detect'. commit 78a24e7dada52a3582f8488795bd1a44993989d9 Author: Field G. Van Zee Date: Mon Apr 9 17:02:13 2018 -0500 Updated bli_avx512_macros.h in knl and skx configs. Details: - Downloaded updated version of bli_avx512_macros.h from TBLIS [1] in attempt to address issue #192. [1] https://github.com/devinamatthews/tblis/ commit 388f64d6ade14caa4a6c286845ad2d565378b2bb Author: Field G. Van Zee Date: Mon Apr 9 15:33:10 2018 -0500 Fixed failure to honor CC= argument to configure. Details: - Fixed a failure to observe the value of CC when selecting the compiler in configure. Thanks to Devangi Parikh for reporting this bug. - The semantics now also work for the CC environment variable. That is, if CC is set prior to running configure, that value is used, but will be overridden by specifying the CC= argument to configure. If the CC environment variable is not set, the CC= value is used. If neither the environment variable nor CC= are specified, then the choice is made internally to configure: first attempting to find gcc, then clang, and then cc. commit 45fbe66b3e2ab92f0b4fdf437d57c5d06603803d Author: Field G. Van Zee Date: Mon Apr 9 14:01:08 2018 -0500 Fixed libmemkind dependency for x86_64. Details: - Removed some old conditional code in config/knl/make_defs.mk that added -lmemkind to LDFLAGS if DEBUG_TYPE was not 'sde' and inserted code into common.mk that affirmatively filters out -lmemkind from LDFLAGS if DEBUG_TYPE is 'sde'. (Thanks to Dave Love for reporting this issue.) Other minor cleanups to neighboring code in common.mk. - Updated CRVECFLAGS in knl/make_defs.mk to be based on -march=knl, and then AVX-512 functionality is manually removed via various -mno-avx512* flags. Also, make the setting of CRVECFLAGS conditional on CC_VENDOR. Similar change to skx/make_defs.mk. - Comment/whitespace updates. commit ca982148b3b419db063cad2fa74376ec383a5c80 Author: dnp Date: Sun Apr 8 21:27:10 2018 -0500 Fixed bug in SKX sgemm microkernel. Modified SKX dgemm mircokernel to be consistent with the sgemm microkernel commit bd0276752ccdd56ff897b1a5ae022f2ffe6e0b38 Author: Field G. Van Zee Date: Fri Apr 6 18:51:43 2018 -0500 Track separate ref kernel flags for each sub-config. Details: - Renamed CVECFLAGS variables in sub-configurations' make_defs.mk files to CKVECFLAGS. - Added default defintions of two new make variables to most sub- configurations' make_defs.mk files--CROPTFLAGS and CRVECFLAGS-- which correspond to reference kernel analogues of the CKOPTFLAGS and CKVECFLAGS, which track optimization and vectorization flags for optimized kernels. Currently, two sub-configurations (knl and skx) explicitly set CRVECFLAGS to non-default values (using AVX2 instead of AVX-512 for reference kernels. Thanks to Jeff Hammond, whose feedback prompted me to make this change (issue #187). - Changed common.mk so that the get-refkern-cflags-for function returns the flags associated with the given sub-configuration's CROPTFLAGS and CRVECFLAGS (instead of CKOPTFLAGS and CKVECFLAGS). commit b9aebce19480448817373e2df2b36bd090eae41a Author: Field G. Van Zee Date: Fri Apr 6 18:37:33 2018 -0500 De-verbosify makefile fragment generation. Details: - Changed from -v1 to -v0 when calling gen-make-frag.sh from configure. The directory-by-directory recursive output didn't add much value to the user, so now we just echo a line for each top-level directory into which we will recurse (e.g. 'config', 'ref_kernels', 'frame', etc.). This also helps keep more interesting information (from earlier in the execution of configure) from scrolling out of the terminal window. commit b549b91f26948991e13364f1f26a878da0f43aa0 Author: Field G. Van Zee Date: Fri Apr 6 16:31:33 2018 -0500 Added 64-bit integer support to BLAS test drivers. Details: - Updated the build system and BLAS test drivers to use 64-bit integers when BLIS is configured for 64-bit integers in the BLAS layer. Also updated blastest/Makefile accordingly. Thanks to Dave Love for reporting the need for this feature. - Added a 'check' target to blastest/Makefile so that the user can see a summary of the tests. - Commented out the initial definition of INCLUDE_PATHS in common.mk, which was used pre-monolithic header, back when BLIS needed paths to *all* headers, rather than just a select few. This line is no longer needed since the value of INCLUDE_PATHS is overwritten by a later definition limited to only the header paths that are needed now. commit d39fa1c04265869bdf8b6f453076359eec2f3c59 Author: Field G. Van Zee Date: Thu Apr 5 19:38:35 2018 -0500 Adjusted CFLAGS used to compile bli_cntx_ref.c. Details: - Removed CKOPTFLAGS and CVECFLAGS from the set of CFLAGS used to compile bli_cntx_ref.c for each configuration. This is necessary because the file defines functions like bli_cntx_init_skx_ref(), which are called during BLIS's initialization of the global kernel structure, potentially being executed by an architecture that lacks the instruction set used to compile the kernels for, in this example, skx, which would lead to an illegal instruction error. Thanks to Dave Love for reporting this issue. - Further adjusted CFLAGS used when compiling code in the 'config' directory (e.g. bli_cntx_init_skx.c) as well as code in 'frame' so as to avoid the aforementioned issue. commit 08b123084d35680beab379012f8f5a5a8b44a443 Author: Field G. Van Zee Date: Thu Apr 5 14:25:39 2018 -0500 Added color-coding to 'make check' output. Details: - Added color coding to output of check-blistest.sh, check-blastest.sh scripts. Success messages are coded green and failure are coded red. This helps draw the eye toward those messages as the 'make checkblis', 'make checkblis-fast', and 'make checkblas' targets are executed. - Changed top-level Makefile so that execution will not halt if 'checkblis', 'checkblis-fast', or 'checkblas' targets fail, which means that the second of the two tests (BLIS and BLAS) run by 'make check' will run even if the first test fails. commit c9e4d7db7410b03c1ffe8c9727e9f1b2ba7fecfe Author: Field G. Van Zee Date: Wed Apr 4 17:13:15 2018 -0500 CHANGELOG update (0.3.1) commit 1f28d7c86e17730f05bd239c8e8d67e3e7510a4f (tag: 0.3.1) Author: Field G. Van Zee Date: Wed Apr 4 17:13:15 2018 -0500 Version file update (0.3.1) commit e6cc9ee26bcf0450f1120d5d12985b04d9fb8516 Merge: 786d15c5 3c91c7ae Author: Field G. Van Zee Date: Wed Apr 4 16:08:18 2018 -0500 Merge branch 'dev' of github.com:flame/blis into dev commit 786d15c5ef09f1f647b126b63d57e76d5810c58e Author: Field G. Van Zee Date: Wed Apr 4 16:06:47 2018 -0500 Added skx, knl to x86_64 configuration family. Details: - Added 'skx' and 'knl' sub-configurations to the 'x86_64' configuration family in the config_registry file. - Added logic to configure that avoids committing certain sub-configs to the configuration/kernel registries if those sub-configs cannot be handled properly by the chosen compiler. (This was modeled after similar logic in TBLIS's configure; thanks to Devin Matthews for pointing this out.) First, the compiler and its version are inspected and, based on the results, certain configurations are added to a "blacklist". Then, as the configuration registries are being created, configurations and/or kernels that match items in the blacklist are skipped over and not commited to the registries. Under certain circumstances, omitting a blacklisted configuration will indirectly invalidate other configurations due to the loss of availability of the original blacklisted configuration's kernel set. This additional indirect blacklist is also accounted for. - Added output to the beginning of configure that echos information about the chosen compiler as well as the configurations that are blacklisted and must be stripped from the registries. - Various other cleanups in configure, especially with respect to explicitly declaring local variables in functions. - Comment updates to config/zen/make_defs.mk regarding choice of -march flags based on compiler version. commit 3c91c7aebafb446a2582267beb3b22c8bb475b3b Author: Field G. Van Zee Date: Mon Apr 2 12:40:25 2018 -0500 Fixed 64b type mismatch warning in cblas_xerbla.c. Details: - Fixed a compiler warning concerning a type mismatch between the format specifier of the printf() call in cblas_xerbla.c and its corresponding (info) argument. The warning manifested when the CBLAS layer was enabled and the BLAS/CBLAS integer type siwas is set to 64 (the default is 32). The warning was fixed by changing the specifier from %d to %jd and typecasting the argument to intmax_t. Thanks to Dave Love for reporting this issue and submitting the patch. commit 71eaf449a812fe2bd640d21513ec83974b2edb45 Merge: 6a628184 ae9a5be5 Author: Field G. Van Zee Date: Tue Mar 27 17:21:43 2018 -0500 Merge branch 'dev' commit ae9a5be56d6f9b87278d6032154d2dcf3fb7d54f Author: dnp Date: Tue Mar 27 17:01:23 2018 -0500 Fixed bug in skx sgemm microkernel commit 3f02af0905b1e2e2e065862f8afe5e9a52f282b2 Author: Field G. Van Zee Date: Mon Mar 26 17:40:04 2018 -0500 Row storage optimizations to zen dotxf kernels. Details: - Split the main loop bodies of zen's [sd]dotxf kernels into two cases: one to handle a column-stored matrix A and one to handle a row-stored matrix A. This allows vector instructions to be employed even if A is stored by rows (and A^T appears stored as columns). Both storage cases use a common edge case loop. Thanks to Devin Matthews for this idea and for prototyping the change needed for sdotxf kernel. commit 679dcc331dd870ec680e135a3fb65ffa6e3a91c2 Author: Field G. Van Zee Date: Mon Mar 26 15:35:17 2018 -0500 Make k_iter/k_left uint64_t in bulldozer fma ukrs. Details: - Changed the declaration of k_iter and k_left for d, c, z microkernels from dim_t to uint64_t. This is needed to ensure compatibility with the movq instruction used to load the value into registers. This change should have been made a long time ago, but for some reason only recently began showing up via Travis CI. commit 6a628184f6938673440e4cdd4fed0208c51fd1f9 Author: Field G. Van Zee Date: Mon Mar 26 14:48:16 2018 -0500 Fixed a memkind-related compile-time bug on knl. Details: - Fixed a compile-time error that occurred due to the fact that BLIS_ENABLE_MEMKIND, defined in bli_config.h, was not being defined soon enough to be used in bli_system.h where it is needed to determine whether hbwmalloc.h should be #included. bli_system.h is now included after bli_config.h (and bli_config_macro_defs.h). Thanks to Dave Love for reporting this issue. - Tweaked the language used by configure to echo the status of the --with[out]-memkind option. commit e2192a8fd58ec3657434ddd407033e097edad8f4 Author: Field G. Van Zee Date: Fri Mar 23 12:53:48 2018 -0500 Removed vzeroupper intrinsics from zen kenels. Details: - Fixed a bug in the zen (also used by haswell) dotxf kernels whereby a vzeroupper instruction destoryed part of the intermediate result stored by the vdpps instructions that came right before. (The vzeroupper instrinsic was removed.) - Removed remaining vzeroupper instrinsics from other zen kernels. Previously, the vzeroupper instructions were included because BLIS is typically compiled with -mfpmath=sse. But it was brought to my attention that inserting these vzeroupper instructions is unnecessary for our purposes, since (a) -mfpmath=sse results in VEX-encoded scalar code rather than literal SSE instructions, and (b) compilers already (likely) insert vzeroupper instructions where necessary. Thanks to Devin Matthews for zeroing in on the dotxf bug. - Removed -malign-double from bulldozer make_defs.mk. This alignment was already happening by default since bulldozer is an x86_64 system. commit 22289ad23cd10b81451ce82f60d84b5f97e7fd85 Author: Field G. Van Zee Date: Thu Mar 22 18:21:30 2018 -0500 Added build system support for libmemkind. Details: - Added support for libmemkind to configure. configure attempts to detect the presence of libmemkind by compiling a small program containing #include and a call to hbw_malloc(). If successful, it is assumed that libmemkind is present and available. If present, use of libmemkind is enabled by default, and otherwise use is disabled by default. If libmemkind is present, the user may explicitly disable use of the library by running configure with the --without-memkind option. Furthermore, a configuration may disable libmemkind, perhaps conditional on some aspect of the build system, by including -DBLIS_DISABLE_MEMKIND in the configuration's CPPROCFLAGS make variable and setting the BLIS_ENABLE_MEMKIND makefile variable, set in config.mk, to 'no'. (The knl configuration makes use of this latter feature; see below.) - If enabled at configure-time, bli_system.h will #include and bli_kernel_macro_defs.h will define BLIS_MALLOC_POOL and BLIS_FREE_POOL to use hbw_malloc() and hbw_free(), respectively. - Deprecated explicit use of BLIS_NO_HBWMALLOC in config/knl/bli_family.knl.h and replaced use of -DBLIS_NO_HBWMALLOC in config/knl/make_defs.mk with -DBLIS_DISABLE_MEMKIND, which overrides (#undefs) the definition of BLIS_ENABLE_MEMKIND in bli_system.h, if it would otherwise be defined. Also, set the BLIS_ENABLE_MEMKIND makefile variable to 'no'. - common.mk now adds libmemkind to LDFLAGS if libmemkind is enabled. commit 7dc40eafdd9af3e8c4519a8d1b04d25830b4ca7a Author: Field G. Van Zee Date: Wed Mar 21 18:39:16 2018 -0500 Updates to top-level and test driver Makefiles. Details: - Added logic to common.mk that will choose a BLIS library against which to link (LIBBLIS_LINK). The default choice is the static (.a) library; the shared (.so) library is chosen only if the shared library build was enabled and the static one was disabled. - Updated the various test driver Makefiles to reference this common, pre-chosen library against which to link. (Previously, these drivers unconditionally linked against the static library and would have failed if the static library build was disabled at configure-time.) - Renamed many of the variables in common.mk and the top-level Makefile so that variables relating to the libblis.[a|so] files, including paths to those files, begin with "LIBBLIS". - Shuffled around some of the library definitions from the top-level Makefile to common.mk. - Renamed BLIS_ENABLE_DYNAMIC_BUILD to BLIS_ENABLE_SHARED_BUILD, and the @enable_dynamic@ anchor to @enable_shared@ in build/config.mk.in and in configure. - A few other cleanups in the top-level Makefile. commit 97e1eeade3c51df1bae574a9bc1da34b05bf2bd3 Author: Field G. Van Zee Date: Wed Mar 21 15:47:11 2018 -0500 Added input.operations.fast file for 'make check'. Details: - Added an 'input.operations.fast' file to testsuite directory to go along with the 'input.general.fast' file used by the 'make check' target in the top-level Makefile. This will allow the "fast" check to prune operations and/or parameter combinations from the test space in order to save time. - Currently, input.operations.fast prunes trmm3 and all transposition and conjugation parameters from the level-3 test space. - Reduced problem size tested in input.general.fast to 100 and disabled testing of 1m method. commit c441caa95aabe69f54e2160eb67bf4ca76a66c34 Author: Field G. Van Zee Date: Tue Mar 20 17:56:02 2018 -0500 README update. Details: - Minor updates to README.md. - Minor change to blastest/Makefile. commit 6fe018eb4ac8c16f2edc916c24f5994848017b7f Author: Field G. Van Zee Date: Tue Mar 20 15:35:45 2018 -0500 Added .gitkeep file to blastest/obj. Details: - Added an empty file named '.gitkeep' to blastest/obj/ so that git will track the otherwise empty directory. (This is already done for the BLIS testsuite in testsuite/obj.) commit 0e6d000db9291342913dc5f8590a28c67bbcbc95 Author: Field G. Van Zee Date: Tue Mar 20 15:08:43 2018 -0500 Updated .gitignore to ignore BLAS test out.* files. commit 40c040a31d96fbadff11f761d0cad1ef03ef2cc5 Author: Field G. Van Zee Date: Tue Mar 20 14:33:50 2018 -0500 Fixes to .travis.yml. Details: - Invoke the full BLIS testsuite via 'make testblis' instead of the fast version via 'blistest-fast' (which was wrong anyway, since the correct fast traget is 'testblis-fast'). - Invoke the BLAS tests via 'make testblas' instead of 'blastest'. commit 664ec4813d8b53121cce7a68bef47da656ece9cb Author: Field G. Van Zee Date: Tue Mar 20 13:54:58 2018 -0500 Integrated f2c'ed netlib BLAS test suite. Details: - Created a new test suite that exercises only the BLAS compatibility found in BLIS. The test suite is a straightforward port of code obtained from netlib LAPACK, run through f2c and linked to a stripped- down version of libf2c that is compiled along with the test drivers (to prevent any obvious ABI issues). The new BLAS test suite can be run from within its new local directory, 'blastest' (through its local 'make ; make run' targets) or from the top-level Makefile (via the 'make testblas' target). Output files are created in whatever directory the test drivers are run, whether it be the 'blastest' directory, the top-level source distribution directory, or the out-of-tree directory in which 'configure' was run. Also, the results of the BLAS test suite can be checked via 'make checkblas', which summarizes the presence or absence of test failures in a single line printed to stdout. - Updated the 'test' target to run both 'testblis' and 'testblas'. - Added a new 'testblis-fast' target that runs the BLIS testsuite with smaller problem sizes, allowing it to finish more quickly. - Added a 'make check' target, which runs 'checkblis-fast' and 'checkblas'. - Changed .travis.yml so that Travis CI runs 'testblis-fast' instead of 'testblis' before (calling the check-blistest.sh script to check the result manually). - Renamed some targets in the top-level Makefile to be consistent between BLAS and BLIS. commit 40fa10396c0a3f9601cf49f6b6cd9922185c932e Author: Field G. Van Zee Date: Mon Mar 19 18:19:43 2018 -0500 Fixed a few obscure bugs in the BLAS API. Details: - Fixed a missing parameter in the definition of sdsdot_(). The 'sb' argument was missing. Strangely, the argument is omitted from dsdot_() in the BLAS API. - Fixed the missing 'c' or 'u' in the "?gerc" or "?geru" operation string passed to xerbla_() by the bla_ger_check() macro. - For bla_syrk_check() and bla_syr2k_check() macros, only allow conjugate-transpose (trans='c') as a valid argument for the real domain functions [sd]syrk_() and [sd]syr2k_(). (Previously, the argument was allowed even for the complex domain equivalents, which was inconsistent with the BLAS API.) commit fe7d7f1e43e4c26249eed83d4188beee1ba96202 Author: Field G. Van Zee Date: Sun Mar 18 19:43:06 2018 -0500 Fixed cpp macro parameter "ch" typo in bla_ger.c. Details: - Previously, the BLAS routine-generating macro in bla_ger.c was incorrectly passing MKSTR(ch) into the _check() macro when it should have been passing in the char that was available, chxy. I've instead changed the name of the macro parameter from chxy to ch. Similar change as made to bla_ger.h for consistency. Thanks to Dave Love in helping track this down. (NOTE: This is actually the root cause of the bug that was first patched by increasing the length of the operation name strings passed into xerbla_(), as defined by the constant BLIS_MAX_BLAS_FUNC_STR_LENGTH, in 3d1a5a7. In theory, that change could be backed out now.) - Applied aforementioned chxy->ch change to bla_dot.[ch], as well as frame/compat/cblas/f77_sub/f77_dot_sub.[ch] (not because it needed to happen, but for naming consistency). - Reformatted function signatures/prototypes of CBLAS functions and function calls to BLAS in frame/compat/cblas/f77_sub/*.c. commit cb7ed90752d1ddbac11368c4510641ca4f3a02eb Author: Field G. Van Zee Date: Fri Mar 16 13:05:56 2018 -0500 Convert op names to uppercase before calling xerbla_(). Details: - Defined a new function, bli_string_mkupper(), that calls toupper() on every non-NULL character in a string. - Call bli_string_mkupper() prior to calling xerbla_() in the level-2/-3 BLAS _check() macros. This prevents the BLAS testsuite from complaining that the operation name (e.g. "dgemm") does not match the expected value (e.g. "DGEMM"). Thanks to Dave Love for reporting this issue. commit 3d1a5a7c08fed3ba29f060fe1db2b0dc42dde223 Author: Field G. Van Zee Date: Fri Mar 16 12:24:07 2018 -0500 Fixed printf() format overflow. Details: - Increased the length of operation name strings passed to xerbla_() in the level-2 and level-3 operation _check() functions, found in frame/compat/check. This avoids a format specifier overflow warning by gcc 7. Thanks to Dave Love for reporting this issue and suggesting the fix. commit c73055f028684d998e03b2392093c393782bbfe7 Author: Field G. Van Zee Date: Thu Mar 15 16:08:21 2018 -0500 Return after non-zero info in BLAS checks. Details: - Previously, when calling the BLAS compatibility layer, discovering a parameter check failure would result in the proper setting of the info parameter (printed by xerbla_()), but would also come with an immediate abort() rather than a return. This was incorrect behavior for two overlapping reasons. (1) BLAS should return gracefully to the caller in the event of a bad set of parameters, not abort(). (2) When BLIS was being tested via the BLAS testsuite, BLIS's xerbla_() would correctly get preempted/overridden by the xerbla_() in the BLAS testsuite, but execution would then erroneously continue on to the BLIS implementation with bad parameter values. - The previous issue was addressed by disabling the abort() in BLIS's xerbla_(), changing all of the BLAS _check() functions to cpp macros, and adding a return statement to the end of each _check() macro's "if ( info != 0 )" conditional. Thanks to Dave Love for reporting this issue. commit c4f1d18b97a6a8c3ea0366aa759db597a664062a Author: Field G. Van Zee Date: Wed Mar 14 19:10:09 2018 -0500 Minor typo fix to printing arch in testsuite. Details: - Mistakenly was calling bli_cpuid_query_id() instead of bli_arch_query_id() in the recent addition to the testsuite output that prints the active sub-configuration. The former function is only used for multi-architecture builds, whereas the latter is the more general option that also works for single configuration (including 'configure auto') builds. commit 8f2fabec800a720b3e94b33c0048cc8c4ead436d Author: Devin Matthews Date: Wed Mar 14 17:43:42 2018 -0500 Make arm32 and arm64 families work. (#176) commit fc6a1842518a0820c6708c285611346d5a1419da Author: Field G. Van Zee Date: Wed Mar 14 15:31:17 2018 -0500 Print sub-configuration name in testsuite output. Details: - Added a line to the testsuite output that prints the name of the current/active sub-configuration. This is useful when linking the testsuite against multi-configuration builds because it confirms the sub-configuration that is actually being employed at runtime. Thanks to Devin Matthews for suggesting this feature. commit 9943a899d64bf7ec4a24106f6f4c70629bbe1f6e Merge: 290dd4a9 b1a15ae6 Author: Devin Matthews Date: Wed Mar 14 13:27:44 2018 -0500 Merge pull request #173 from devinamatthews/dev Fix Cortex-A9 and Cortex-A15 configs. commit b1a15ae6ee0f46c9a95cf59f9555925e0e8e21ff Author: Devin Matthews Date: Wed Mar 14 13:26:44 2018 -0500 Use BLIS_H_FLAT commit 290dd4a9feee447e69b40ad108954af78e196f7e Author: Field G. Van Zee Date: Wed Mar 14 13:15:37 2018 -0500 Allow arbitrarily deep configuration families. Details: - Updated configure so that configuration families specified in the config_registry are no longer constrained as being only one level deep. For example, previously the x86_64 family could not be defined concisely in terms of, say, intel64 and amd64 families, and instead had to be defined as containing "haswell, sandybridge, penryn, zen, etc." In other words, families were constrained to only having singleton configurations as their members. That constraint is now lifted. - Redefined x86_64 family in config_registry in terms of intel64 and amd64. commit 9cee78e006d56543ac02fc9c488905c0434e60ae Author: Devin Matthews Date: Wed Mar 14 13:09:48 2018 -0500 Fix Cortex-A9 and Cortex-A15 configs. Tested with QEMU. commit 1a3031740f7fcbbcc2c99d5c4cb50d0413407455 Author: Field G. Van Zee Date: Tue Mar 13 16:04:40 2018 -0500 Updates to ARM hardware detection support. Details: - Updated/clarified the ARM preprocessor macro branch of bli_cpuid.c. Going forward, cortexa57 (64-bit), cortexa15, and cortexa9 (32-bit) sub-configurations are supported. However, the functions that detect features specific to a15 and a9 are identical, and since a15 is tested first, it will always be chosen for arm32 hardware (even if both sub-configurations were enabled at configure-time and the library is linked and run on an a9). Thus, more work needs to be done to distinguish these two. - Added cpp guard around x86_64 portions of bli_cpuid.c. Now, either the x86_64 or ARM code will be compiled (or neither, if neither environment is detected). - In bli_arch_query_id(), call bli_cpuid_query_id() when the BLIS_FAMILY_ARM64 or BLIS_FAMILY_ARM32 macros are defined. - Added arm64 and arm32 configuration families to config_registry. - Added a note to the arch_t typedef enum in bli_type_defs.h reminding the developer to update the string array in bli_arch.c whenever new enum values are added or existing values are reordered. commit 1442d06886ebdc34d8f1cb620229ddc6062c2ce8 Author: Field G. Van Zee Date: Sun Mar 11 16:59:50 2018 -0500 Fixed misnamed kernels in _cntx_init_cortexa57.c. Details: - Changed incorrect kernel function names in bli_cntx_init_cortexa57.c: bli_sgemm_cortexa57_asm_8x12 -> bli_sgemm_armv8a_asm_8x12 bli_dgemm_cortexa57_asm_6x8 -> bli_dgemm_armv8a_asm_6x8 Thanks to Jacob Gorm Hansen for reporting this issue. commit 48da9f5805f0a49f6ad181ae2bf57b4fde8e1b0a Author: Field G. Van Zee Date: Wed Mar 7 12:54:06 2018 -0600 Tweaked common.mk, Makefile, skx/knl make_defs.mk. Details: - Reorganized linker-related section of common.mk so that LDFLAGS set in a sub-configuration's make_defs.mk file will not be immediately (and erroneously) overridden by the default values. - Re-enabled redirected (to file) output of the testsuite when run from the top-level Makefile via 'make test'. (For some reason, it was commented-out for the non-verbose case.) - Removed old/unnecessary code from the make_defs.mk files of skx and knl sub-configurations. commit 8b0475a87daa177916e2caac0e530c6a57fa07cf Author: Field G. Van Zee Date: Tue Mar 6 06:39:44 2018 -0600 Fixed typo in attempted fix in 1a8350f7. Details: - Mistakenly entered 148 as knl mc blocksize for double real when the value should have been 144. Thanks to Dave Love for reporting this. commit 8912e6886b97eabb4ce0c35a3609a0fd994d347b Author: Field G. Van Zee Date: Mon Mar 5 18:00:45 2018 -0600 Fixed missing flags during shared object build. Details: - Fixed a bug in common.mk that caused warning, position-independent code, miscellaneous, and general preprocessor flags to be omitted from the configuration family-specific variables that hold those values, as registered by the family's make_defs.mk file. This would most obviously manifest when targeting a configuration family such as 'intel64' while simultaneously configuring for a shared object build, as the key '-fPIC' flag would be omitted at compile-time and prevent successful linking. Thanks to Dave Love for reporting this bug. - Other cleanups to common.mk for readability and clarity. commit 1a8350f70557fc53ca0c2eadf2076710dd0d9bc9 Author: Field G. Van Zee Date: Mon Mar 5 13:32:00 2018 -0600 Fixed cache blocksize bug in knl configuration. Details: - Changed the mc blocksize for double real execution in the knl sub- configuration from 160 to 148. The old value was not a multiple of mr (which is 24), and thus the safeguards in bli_gks_register_cntx() were tripping. Thanks for Dave Love for reporting this issue. - Switch knl sub-configuration to use default blocksizes for datatypes not supported by native kernels. - Fixed typos in bli_error.c that prevented certain error strings (which report maximum cache blocksizes not being multiples of their corresponding register blocksize) from properly initializing. commit c09fffa827fe6241dc20193a1c404496664220de Author: Field G. Van Zee Date: Sat Mar 3 13:13:39 2018 -0600 Added missing cntx_t* arg in knl packm kernels. Details: - Added the missing cntx_t* argument to the function signature of packm kernels in kernels/knl/1m/. Thanks to Dave Love for reporting this issue. commit 1ef9360b1fd0209fbeb5766f7a35402fbd080fcb Author: Field G. Van Zee Date: Thu Mar 1 14:36:39 2018 -0600 Enable non-unit vector stride tests by default. Details: - Change "vector storage schemes to test" parameter in testsuite's input.general file to "cj". This means that both unit stride column vectors and non-unit stride column vectors will be tested in operations with vector operands (e.g. level-1v, level-1f, level-2). - Very minor comment (typo) changes to input.operations. commit 8c4e55a1a1ead9a5e970200fee027ffd2c7e8454 Author: Field G. Van Zee Date: Wed Feb 28 17:01:47 2018 -0600 Added individual operation overrides in testsuite. Details: - Updated the testsuite driver so that setting one or more individual operation test switches to "2" in input.operations will enable ONLY those operations and disable all others, regardless of the values of the section overrides and other operation switches. This makes it every easy to quickly test only one or two operations, and equally easy to revert back to the previous combination of operation tests. - Added more comments to input.operations describing the use of individual "enable only" overrides. commit 34862aed89e5d5a8f35aeecd49f3052ada1f337b Author: Field G. Van Zee Date: Wed Feb 28 15:30:14 2018 -0600 Use zen kernels in haswell sub-configuration. Details: - Register use of level-1v zen intrinsic kernels for amaxv, axpyv, dotv, dotxv, and scalv, as well asl level-1f zen intrinsic kernels for axpyf and dotxf. This works because these kernels simply target AVX/AVX2, and therefore work without modification on haswell hardware. - Switch to use of zen microkernels in bli_cntx_init_haswell.c. The zen kernels are essentially identical to those used by haswell, except that now zen kernels are a bit more up-to-date. In the future, I may continue to maintain duplicates, or I may keep the kernels named after one architecture (zen or haswell) but used by both sub-configurations. - In config_registry, enable use of both haswell and zen kernels for the haswell sub-configuration. This is necessary in order to make zen kernels visible when registering kernels in bli_cntx_init_haswell.c. - Enable use of assembly-based complex gemm microkernels for zen, bli_cgemm_zen_asm_3x8() and bli_zgemm_zen_asm_3x4(), in bli_cntx_init_zen.c. This was actually intended for 1681333. commit d9079655c9cbb903c6761d79194a21b7c0a322bc Author: Field G. Van Zee Date: Fri Feb 23 17:42:48 2018 -0600 CHANGELOG update (0.3.0) commit 709f8361ebc90b96b02ebe5c5ffb6fc3b1b25e58 (tag: 0.3.0) Author: Field G. Van Zee Date: Fri Feb 23 17:42:48 2018 -0600 Version file update (0.3.0) commit 3defc7265c12cf85e9de2d7a1f243c5e090a6f9d Author: Field G. Van Zee Date: Fri Feb 23 17:38:19 2018 -0600 Applied 34b72a3 to non-active/unused microkernels. Details: - Applied the read-beyond-bounds bugfix in 34b72a3 to other haswell and zen kernels (ie: other microtile shapes) which are not used by default. This was done mostly in case someone decided to pick up these kernels and start using them, not because it affects BLIS's behavior out-of-the-box. commit 34b72a351745aa0d47bb0b74ebcd0f0a616d613d Author: Field G. Van Zee Date: Fri Feb 23 16:33:32 2018 -0600 Fixed obscure read-beyond-bounds bug in sgemm ukrs. Details: - Fixed an obscure bug in the bli_sgemm_haswell_asm_6x16 and bli_sgemm_zen_asm_6x16 microkernels when the input/output matrix C is stored with general stride (ie: both rs and cs are non-unit). The bug was rooted in the way those microkernels read from matrix C-- namely, they used vmovlps/vmovhps instead of movss. By loading two floats at a time, even if one of them was treated as junk, the assembly code could be written in a more concise manner. However, under certain conditions--if m % mr == 0 and n % nr == 0 and the underlying matrix is not an internal "view" into a larger matrix-- this could result in the very last vmovhps of the last (bottom-right) microkernel invocation reading beyond valid memory. Specifically, the low 32 bits read would always be valid, but the high 32 bits could reside beyond the bounds of the array in which the output C matrix is contained. To remedy this situation, we now selectively use movss to load any element that could be the last element in the matrix. commit 5112e1859e7f8888f5555eb7bc02bd9fab9b4442 (origin/rt) Author: Field G. Van Zee Date: Fri Feb 23 14:31:26 2018 -0600 Added missing 'restrict' to some kernels' cntx_t*. Details: - Added missing 'restrict' keyword to cntx_t* argument of function signatures corresponding to level-1v, level-1f, and level-1m kernels. This affected bli_l1v_ker_prot.h, bli_l1f_ker_prot.h, and bli_l1m_ker_prot.h. (The 'restrict' was already being used to qualify cntx_t* arguments for kernels defined in bli_l3_ker_prot.h.) - Added comments to bli_l1v_ker.h, bli_l1f_ker.h, bli_l1m_ker.h, and bli_l3_ukr.h that help explain how those headers function to produce kernel prototypes using the prototype macros defined in the files mentioned above. commit 1fa8af95d807168e0849adb668492601e7009be0 Merge: c084b03b 16813335 Author: Field G. Van Zee Date: Wed Feb 21 17:54:02 2018 -0600 Merge branch 'rt' commit c084b03b31d84427a120e391963db5419f1911ee Merge: 5d03b6e6 fa74af4e Author: Field G. Van Zee Date: Wed Feb 21 17:52:17 2018 -0600 Merge branch 'rt' commit 16813335bdb5978bc9a26cd00a32bd5a130130c4 Merge: fa74af4e 5a7005dd Author: Field G. Van Zee Date: Wed Feb 21 17:43:32 2018 -0600 Merge branch 'amd' into rt Details: - Merged contributions made by AMD via 'amd' branch (see summary below). Special thanks to AMD for their contributions to-date, especially with regard to intrinsic- and assembly-based kernels. - Added column storage output cases to microkernels in bli_gemm_zen_asm_d6x8.c and bli_gemmtrsm_l_zen_asm_d6x8.c. Even with the extra cost of transposing the microtile in registers, this is much faster than using the general storage case when the underlying matrix is column-stored. - Added s and d assembly-based zen gemmtrsm_u microkernel (including column storage optimization mentioned above). - Updated zen sub-configuration to reflect presence of new native kernels. - Temporarily reverted zen sub-configuration's level-3 cache blocksizes to smaller haswell values. - Temporarily disabled small matrix handling for zen configuration family in config/zen/bli_family_zen.h. - Updated zen CFLAGS according to changes in 1e4365b. - Updated haswell microkernels such that: - only one vzeroupper instruction is called prior to returning - movapd/movupd are used in leiu of movaps/movups for double-real microkernels. (Note that single-real microkernels still use movaps/movups.) - Added kernel prototypes to kernels/zen/bli_kernels_zen.h, which is now included via frame/include/bli_arch_config.h. - Minor updates to bli_amaxv_ref.c (and to inlined "test" implementation in testsuite/src/test_amaxv.c). - Added early return for alpha == 0 in bli_dotxv_ref.c. - Integrated changes from f07b176, including a fix for undefined behavior when executing the 1m method under certain conditions. - Updated config_registry; no longer need haswell kernels for zen sub-configuration. - Tweaked marginal and pass thresholds for dotxf. - Reformatted level-1v, -1f, and -3 amd kernels and inserted additional comments. - Updated LICENSE file to explicitly mention that parts are copyright UT-Austin and AMD. - Added AMD copyright to header templates in build/templates. Summary of previous changes from 'amd' branch. - Added s and d assembly-based zen gemm microkernels (d6x8 and d8x6) and s and d assembly-based zen gemmtrsm_l microkernels (d6x8). - Added s and d intrinsics-based zen kernels for amaxv, axpyv, dotv, dotxv, and scalv, with extra-unrolling variants for axpyv and scalv. - Added a small matrix handler to bli_gemm_front(), with the handler implemented in kernels/zen/3/bli_gemm_small_matrix.c. - Added additional logic to sumsqv that first attempts to compute the sum of the squares via dotv(). If there is a floating-point exception (FE_OVERFLOW), then the previous (numerically conservative) code is used; otherwise, the result of dotv() is square-rooted and stored as the result. This new implementation is only enabled when FE_OVERFLOW is #defined. If the macro is not #defined, then the previous implementation is used. - Added axpyv and dotv standalone test drivers to test directory. - Added zen support to old cpuid_x86.c driver in build/auto-detect/old. - Added thread-local and __attribute__-related macros to bli_macro_defs.h. commit 5d03b6e6e19d5a07f0cccf1a158f02fbd62dfd99 Author: Devin Matthews Date: Mon Feb 19 11:31:30 2018 -0600 Fix asm macro include line for KNL. Fixes #167. commit f07b176c84dc9ca38fb0d68805c28b69287c938a Author: Field G. Van Zee Date: Thu Feb 15 18:36:54 2018 -0600 Fixed an obscure bug in the 1m implementation. Details: - Fixed a bug in the way the bli_gemm1m_cntx_ref() function (defined in ref_kernels/bli_cntx_ref.c) initializes its context for 1m execution. Previously, the function probed the context that was in the process of being updated for use with 1m--this context being previously initialized/copied from a native context--for its storage preference to determine which "variant" (row- or column-oriented) of 1m would be needed. However, the _cntx_ref() function was not updating the method field of the context until AFTER this query, and the conditional which depended on it, had taken place, meaning the storage preference query function would mistakenly think the context was for native execution, since the context's method field would still be set to BLIS_NAT. This would lead it to incorrectly grab the storage preference of the complex domain microkernel rather than the corresponding real domain microkernel, which could cause the storage preference predicate to evaluate to the wrong value, which would lead to the _cntx_ref() function choosing the wrong variant. This could lead to undefined behavior at runtime. The method is now explicitly set within the context prior to calling the storage preference query function. - Updated comments in frame/ind/oapi/bli_l3_3m4m1m_oapi.c. - Fixed a typo in the commented-out CFLAGS in config/zen/make_defs.mk, which are appropriate for gcc 6.x and newer. (Mistakenly used -march=bdver4 instead of -march=znver1.) commit 1f94bb7b96eb2b67257e6c4df89e29c73e9ab386 Author: Field G. Van Zee Date: Fri Jan 19 12:46:53 2018 -0600 Document how to enable zen-specific instructions. Details: - Added as a comment in config/zen/make_defs.mk the list of compiler flags that could be added to manually enable the instructions provided by the Zen microarchitecture that are not already implied by -march=bdver4. This information, along with the previous commit's flags to selectively disable Bulldozer instructions no longer present in Zen, was gathered from [1]. I hesitate to enable use of these instructions since I don't have any Zen hardware to test on yet. [1] https://wiki.gentoo.org/wiki/Ryzen commit 1e4365b21bafa02bd108c5ac4705a25671fb9441 Author: Field G. Van Zee Date: Thu Jan 18 12:03:51 2018 -0600 Augment zen CFLAGS to prevent illegal instruction. Details: - Added various compiler flags (-mno-fma4 -mno-tbm -mno-xop -mno-lwp) so that compiling with -march=bdver4 on zen-based architectures does not result in an illegal instruction error at runtime. Note: This fix is only needed for gcc 5.4; gcc 6.3 or later supports the use of -march=znver1, which can be used in lieu of the augmented set of flags based on bdver4. Thanks to Nisanth Padinharepatt for reporting this error. commit fa74af4e1fa7385ac3f3089fe1ea7bb88c906029 Author: Field G. Van Zee Date: Tue Jan 9 13:43:15 2018 -0600 Minor labeling update for './configure -c' output. Details: - Print the name of the configuration in the output of the kernel-to-config map (and chosen pairs list) as a subtle way to remind the user that these only apply to the targeted configuration (whereas the config list and kernel list are printed without regard to which configuration was actually targeted). commit 5cdea756c7391e2c6cbfb38436ef9a205f860237 Merge: 9d8858b5 1e7a4896 Author: Field G. Van Zee Date: Sun Jan 7 19:45:20 2018 -0600 Merge branch 'rt' commit 9d8858b5cff4a4b078b87872847a5710073fff0a Merge: 0b3ca3cf f7df64da Author: Devin Matthews Date: Sun Jan 7 10:03:25 2018 -0600 Merge pull request #164 from devinamatthews/master Don't use memkind for skx configuration. commit f7df64daf6bbe6431effada6e13d8d1fab5aa221 Author: Devin Matthews Date: Sun Jan 7 09:37:25 2018 -0600 Don't use memkind for skx configuration. Fixes #163. commit 1e7a4896e0cbe73c4685fa956278e3f28273cdf9 Author: Field G. Van Zee Date: Fri Jan 5 12:33:48 2018 -0600 Minor error handling in update-version-file.sh. Details: - Added explicit handling of situations when 'git describe --tags' returns an error. This command is used by update-version-file.sh when deciding whether or not to update the version file prior to configuration. - Removed bli_packm.c and bli_unpackm.c, as they contained no source code. commit 0b3ca3cfb682715a3686fd93ebb10d4a695d1162 Author: Field G. Van Zee Date: Thu Jan 4 20:51:35 2018 -0600 Intelligently select compiler for auto-detection. Details: - Rewrote code that selects the compiler for the purposes of compiling the auto-detection executable. CC (if specified) is tried first. Then gcc. Then clang. The absolute fallback is cc. The previous code was sort of broken, and seemed to unintentionally always use gcc. - Moved various configuration-agnostic flags from config/*/make_defs.mk files to common.mk. The new mechanism appends the configuration- agnostic flags to the various compiler flag variables initialized in make_defs.mk. Flags specific to the sub-configuration are still set in make_defs.mk. - Added -Wno-tautological-compare to CMISCFLAGS when clang is in use. Also added the flag to the compiler instantiation during configure- time hardware detection (when clang is selected). - Added some missing (but mostly-optional) quotes to configure script. commit 5a7005dd44ed3174abbe360981e367fd41c99b4b Merge: 7be88705 3bc99a96 Author: Nisanth M P Date: Wed Jan 3 12:05:12 2018 +0530 Merge changes in AMD beta release 0.95 into amd branch commit 0b9c5127e91508c115228ca604ee2dac8de8f477 Author: Field G. Van Zee Date: Sat Dec 23 15:53:44 2017 -0600 Enabled C99, added stdint.h to auto-detect build. Details: - Added "-std=c99" to compiler arguments when building auto-detection driver in configure script. - Added #include to all three source files needed by auto- detection program. commit 0ce5e19c318e04909d3e664d69accb3a0fc6b988 Author: Field G. Van Zee Date: Sat Dec 23 15:32:03 2017 -0600 Reimplemented configure-time hardware detection. Details: - Reimplemented the hardware detection functionality invoked when running "./configure auto". Previously, a standalone script in build/auto-detect that used CPUID was used. However, the script attempted to enumerate all models for each microarchitecture supported. The new approach recycles the same code used for runtime hardware detection introduced in 2c51356. This has two immediate benefits. First, it reduces and consolidates the code required to detect microarchitectures via the CPUID instruction. Second, it provides an indirect way of testing at configure-time the code that is used to detect hardware at runtime. This code is (a) only activated when targeting a configuration family (such as intel64 or amd64) at configure-time and (b) somewhat difficult to test in practice, since it relies on having access to older microarchitectures. - The above change required placing conditional cpp macro blocks in bli_arch.c and bli_cpuid.c which either #include "blis.h" or #include a bare-bones set of headers that does not rely on the presence of a bli_config.h header. This is needed because bli_config.h has not been created yet when configure-time auto-detection takes places. - Defined a new function in bli_arch.c, bli_arch_string(), which takes an arch_t id and returns a pointer to a string that contains the lowercase name of the corresponding microarchitecture. This function is used by the auto-detection script to printf() the name of the sub-configuration corresponding to the detected hardware. commit 9804adfd405056ec332bb8e13d68c7b52bd3a6c1 (origin/selfinit) Author: Field G. Van Zee Date: Thu Dec 21 19:22:57 2017 -0600 Added option to disable pack buffer memory pools. Details: - Added a new configure option, --[en|dis]able-packbuf-pools, which will enable or disable the use of internal memory pools for managing buffers used for packing. When disabled, the function specified by the cpp macro BLIS_MALLOC_POOL is called whenever a packing buffer is needed (and BLIS_FREE_POOL is called when the buffer is ready to be released, usually at the end of a loop). When enabled, which was the status quo prior to this commit, a memory pool data structure is created and managed to provide threads with packing buffers. The memory pool minimizes calls to bli_malloc_pool() (i.e., the wrapper that calls BLIS_MALLOC_POOL), but does so through a somewhat more complex mechanism that may incur additional overhead in some (but not all) situations. The new option defaults to --enable-packbuf-pools. - Removed the reinitialization of the memory pools from the level-3 front-ends and replaced it with automatic reinitialization within the pool API's implementation. This required an extra argument to bli_pool_checkout_block() in the form of a requested size, but hides the complexity entirely from BLIS. And since bli_pool_checkout_block() is only ever called within a critical section, this change fixes a potential race condition in which threads using contexts with different cache blocksizes--most likely a heterogeneous environment--can check out pool blocks that are too small for the submatrices it wishes to pack. Thanks to Nisanth Padinharepatt for reporting this potential issue. - Removed several functions in light of the relocation of pool reinit, including bli_membrk_reinit_pools(), bli_memsys_reinit(), bli_pool_reinit_if(), and bli_check_requested_block_size_for_pool(). - Updated the testsuite to print whether the memory pools are enabled or disabled. commit 107801aaae180c00022f1b990bc59038c14949d2 Merge: d9c05745 0084531d Author: Field G. Van Zee Date: Mon Dec 18 16:29:28 2017 -0600 Merge branch 'master' into selfinit commit 0084531d3eea730a319ecd7018428148c81bbba7 Author: Field G. Van Zee Date: Sun Dec 17 18:58:25 2017 -0600 Updated flatten-headers.py for python3. Details: - Modifed flatten-headers.py to work with python 3.x. This mostly amounted to removing print statements (which I replaced with calls to my_print(), a wrapper to sys.stdout.write()). Thanks to Stefan Husmann for pointing out the script's incompatibility with python 3. - Other minor changes/cleanups. commit 90b11b79c302f208791bdfb1ed754873103c7ce5 Author: Field G. Van Zee Date: Sun Dec 17 17:34:32 2017 -0600 Modest performance boost to flatten-headers.py. Details: - Updated flatten-headers.py to pre-compile the main regular expression used to isolate #include directives and the header filenames they reference. The compiled regex object is then used over and over on each header file in the tree of referenced headers. This appears to have provided a 1.7-2x performance increase in the best case. - Other minor tweaks, such as renaming the main recursive function from replace_pass() to flatten_header(). commit 99dee87f30b4d437fa6b5e4ba862526d07b9f08b Author: Field G. Van Zee Date: Sun Dec 17 16:47:27 2017 -0600 Reimplemented flatten-headers.sh in python. Details: - Added flatten-headers.py, a python implementation of the bash script flatten-headers.sh. The new script appears to be 25-100x faster, depending on the operating system, filesystem, etc. The python script abides by the same command line interface as its predecessor and targets python 2.7 or later. (Thanks to Devin Matthews for suggesting that I look into a python replacement for higher performance.) - Activated use of flatten-headers.py in common.mk via the FLATTEN_H variable. - Made minor tweaks to flatten-headers.sh such as spelling corrections in comments. commit d9c0574599c3f97c0f9b6c334a077bab9452e1f4 Author: Field G. Van Zee Date: Thu Dec 14 17:13:42 2017 -0600 Allow travis failures of OS X builds that run testsuite. Details: - Added an allowance for OS X builds that run the testsuite to fail. There seems to be an issue with 1m when running in Travis CI under OS X and clang, but only in double-precision. Haven't been able to reproduce the error on my own, and thus, I can't debug it. (Hopefully it is simply a version-specific compiler bug.) commit 86cd23b7379b00a42b4ecc04fa668f1e3f9b54ee Author: Field G. Van Zee Date: Thu Dec 14 15:47:41 2017 -0600 Fixed testsuite Makefile brokenness from 9091a207. Details: - Fixed a makefile error encountered when building the testsuite directly in its directory (as opposed to indirectly via 'make test'). The fix involves introducing a new variable, BUILD_PATH, alongside the existing DIST_PATH variable. By default, BUILD_PATH is set to the current directory, and is overridden by other Makefiles used by, for example, the testsuite and standalone test drivers in testsuite or test, respectively. - Some files/directories in common.mk were redefined in terms of BUILD_DIR, such as the locations of config.mk file and the intermediate include directory. commit 6a3a8924c04d25507fc4aa593df30c56c7dc12f7 Author: Field G. Van Zee Date: Thu Dec 14 13:20:02 2017 -0600 Temporarily show Makefile's testsuite output. Details: - Disabled redirection of testsuite output for 'test' target. This is part of an attempt to debug a segmentation fault on OS X via Travis. commit 9a01080dd426915bed18229f70401bfa639dc283 Merge: 83316485 a32e8a47 Author: Field G. Van Zee Date: Thu Dec 14 11:27:19 2017 -0600 Merge branch 'master' into selfinit commit a32e8a47c022b6071302b2956af5728976c83ca9 (origin/travis) Author: Field G. Van Zee Date: Wed Dec 13 16:31:36 2017 -0600 Added an exclusion to .travis.yml. Details: - Added exclusion for out-of-tree builds on OS X (clang). commit b9f7d987df548965c86e16e0ba94d5cad0d9b399 Author: Field G. Van Zee Date: Wed Dec 13 16:22:09 2017 -0600 Cleaned up after previous travis oot debugging. Details: - Removed debugging output from common.mk related to Travis CI out-of-tree builds. - Other minor cleanups to common.mk. commit 9091a207aa8c49e279676ea02be533480b3b0d5a Author: Field G. Van Zee Date: Wed Dec 13 16:12:34 2017 -0600 Attempted fix to travis oot build failure. Details: - Found the likely cause of the Travis CI out-of-tree build failures: config.mk was being read from DIST_PATH, rather than the current directory. commit c01c71c33e236e6c91f5ddd3ec1e3faec89368c1 Author: Field G. Van Zee Date: Wed Dec 13 15:58:50 2017 -0600 Added debugging output to Makefile. Details: - Added $(info ...) statements in key locations in an attempt to reveal why Travis CI doesn't like building BLIS out-of-tree. commit 784289d69dd6b3692444d3b3e290f6a014465b72 Author: Field G. Van Zee Date: Wed Dec 13 15:31:27 2017 -0600 Updated SHELL in common.mk from /bin/bash to bash. commit d9bb1d1d4ebc89ea75d9d927d09882162a914f77 Author: Field G. Van Zee Date: Wed Dec 13 15:27:54 2017 -0600 Defined SHELL in common.mk so "echo -n" works. Details: - Defined the SHELL variable in common.mk as "/bin/bash" so that the -n option can be used with echo in the Makefile rule for flattening blis.h. Thanks to Devin Matthews for suggesting this fix. commit 9289a08667df2044f3a37af54d893efe2b56d555 Author: Field G. Van Zee Date: Wed Dec 13 15:14:27 2017 -0600 Attempt 3 on .travis.yml. commit 720bfcf0ef54fdc41df0dcaa94503edb0d5c8972 Author: Field G. Van Zee Date: Wed Dec 13 14:52:28 2017 -0600 More fixes to .travis.yml. Details: - Fixed a mistake (hopefully) in d0c4dd0 that resulted in many more osx/clang sub-tests than intended. - Shortened the variable names in an effort to make them more readable via the Travis CI web interface. commit 8717c9c97fe9b1ecd3b3192049a73976f8390ca7 Author: Field G. Van Zee Date: Wed Dec 13 14:36:37 2017 -0600 Added 'pwd' commands to .travis.yml for debugging. Details: - Added 'pwd' commands to the script portion of the .travis.yml file in an attempt to uncover the problem with the recent out-of-tree build testing changes made in d0c4dd0. commit 83316485ce10f6fcafe92a1c146282de0dd8068a Author: Field G. Van Zee Date: Wed Dec 13 14:14:50 2017 -0600 Simplified/fixed self-initialization. Details: - Fixed a race condition in self-initialization whereby the bli_is_init static variable could be erroneously read as TRUE by thread 1 while thread 0 is still executing bli_init_apis(), thus allowing thread 1 to use the library before it is actually ready. Thanks to to Minh Quan Ho and Devin Matthews for pointing out this issue. - Part of the solution to the aforementioned race condition was involved replacing the runtime initialization of the global scalar constants (e.g., BLIS_ONE, BLIS_ZERO, etc.) in bli_const.c with a static initialization of those same constants. This eliminates the need for bli_const_init() altogether. (The static initialization is made concise via preprocess macros.) - Defined bli_gks_query_cntx_noinit(), which behaves just like bli_gks_query_cntx(), except that it does not call bli_init_once(). This function is called in lieu of bli_gks_query_cntx() in bli_ind_init() and bli_memsys_init() so as to not result in any recursion into bli_init_once(). - Removed BLIS_ONE_HALF, BLIS_MINUS_ONE_HALF global scalar constants. They have no use in BLIS or its test products, and we have little reason to believe they are used by others. - Removed testsuite/out file, which was accidentally committed as part of 70640a3. commit 6526d1d4ae6dbfa854ca8d1e5f224cd6ab3fa958 Author: Field G. Van Zee Date: Tue Dec 12 13:50:43 2017 -0600 Added temp_dir argument to flatten-headers.sh. Details: - Added "temp_dir" argument to flatten-headers.sh so that the caller can specify where intermediate files should be created as the script runs. - Updated flatten-headers.sh to create intermediate files in temp_dir instead of alongside the corresponding source files. This should now (once again) allow out-of-tree builds where the BLIS distribution is read-only, or where the out-of-tree build is running concurrently with another out-of-tree build. (Thanks to Devin Matthews for pointing out the possibility of simultaneous out-of-tree builds.) commit 94755017c967630daf2e31c1f63ed5e88ab0d6ab Merge: d0c4dd00 5cf7b0c4 Author: Field G. Van Zee Date: Tue Dec 12 12:50:41 2017 -0600 Merge branch 'master' of github.com:flame/blis commit d0c4dd000ff38acc249e8acf7e0655a523991695 Author: Field G. Van Zee Date: Tue Dec 12 12:47:53 2017 -0600 Added out-of-tree build test to .travis.yml file. Details: - Modified .travis.yml file to include an out-of-tree build test (using the "auto" configure target). Thanks to Devin Matthews for this suggestion. commit 5cf7b0c4e52922069183a87dc2aa177419644e04 Author: Devin Matthews Date: Tue Dec 12 12:38:48 2017 -0600 Ignore blis.h.interm [ci skip] commit 8d8ff74d15b4a584929cec36034ba6d3c53f7d27 Author: Field G. Van Zee Date: Tue Dec 12 12:32:50 2017 -0600 Further attempt to fix out-of-tree builds. Details: - Fix applied in 87978f6 was necessary but not sufficient to fix out-of-tree builds. It turns out that using a source tree that had already built the target erroneously gave the impression that out-of-tree builds were working again, when in fact they were still broken. The additional changes in this commit should complete the fix that was started in the aforementioned commit. Thanks to Devin Matthews and Shaden Smith for their help in isolating this issue. commit 70640a37109290b57c344083c00624e13c496e30 Author: Field G. Van Zee Date: Mon Dec 11 17:18:43 2017 -0600 Implemented library self-initialization. Details: - Defined two new functions in bli_init.c: bli_init_once() and bli_finalize_once(). Each is implemented with pthread_once(), which guarantees that, among the threads that pass in the same pthread_once_t data structure, exactly one thread will execute a user-defined function. (Thus, there is now a runtime dependency against libpthread even when multithreading is not enabled at configure-time.) - Added calls to bli_init_once() to top-level user APIs for all computational operations as well as many other functions in BLIS to all but guarantee that BLIS will self-initialize through the normal use of its functions. - Rewrote and simplified bli_init() and bli_finalize() and related functions. - Added -lpthread to LDFLAGS in common.mk. - Modified the bli_init_auto()/_finalize_auto() functions used by the BLAS compatibility layer to take and return no arguments. (The previous API that tracked whether BLIS was initialized, and then only finalized if it was initialized in the same function, was too cute by half and borderline useless because by default BLIS stays initialized when auto-initialized via the compatibility layer.) - Removed static variables that track initialization of the sub-APIs in bli_const.c, bli_error.c, bli_init.c, bli_memsys.c, bli_thread, and bli_ind.c. We don't need to track initialization at the sub-API level, especially now that BLIS can self-initialize. - Added a critical section around the changing of the error checking level in bli_error.c. - Deprecated bli_ind_oper_has_avail() as well as all functions bli__ind_get_avail(), where is a level-3 operation name. These functions had no use cases within BLIS and likely none outside of BLIS. - Commented out calls to bli_init() and bli_finalize() in testsuite's main() function, and likewise for standalone test drivers in 'test' directory, so that self-initialization is exercised by default. commit 70a64432ee5a7adbee10fb7ff6d7b608c1940a7a Author: Field G. Van Zee Date: Mon Dec 11 13:14:20 2017 -0600 Fixed off-by-one indexing in bli_cpuid.c. Details: - In bli_cpuid.c, fixed an off-by-one indexing statement in vpu_count() whereby a string-terminating NULL character, '\0', is written beyond the bounds of the model_num string. - Minor whitespace and formatting edits to bli_cpuid.c. commit 87978f6261a080d261d01f9acf4e9cc18855c833 Author: Field G. Van Zee Date: Mon Dec 11 12:49:03 2017 -0600 Fixed broken out-of-tree builds since 52f9e6f. Details: - Added missing $(DIST_PATH)/ prefix to relative path to flatten-headers.sh script in common.mk so that the script could be found during out-of-tree builds. Thanks to Devin Matthews for reporting this bug. commit 513ef4d040f89a18dda5154e8c4cf1aaf7463999 Author: Field G. Van Zee Date: Mon Dec 11 12:35:59 2017 -0600 Various typecasting fixes, mis-typed enums, etc. Details: - Fixed implicit typecasting of conj_t to trans_t in bli_[un]packm_cxk.c. - Properly typecast integer arguments to match format specifier in various calls to printf() in bli_l3_thrinfo.c, bli_cntx.c, bli_pool.c, and bli_util_oapi.c. - Fixed "unsigned less-than-comparison with zero" checks in bli_check.c, bli_cntx.h. - Fixed mis-typed enums in bli_cntx.c (e.g., l1mkr_t that should have been l1fkr_t or l1vkr_t). - Fixed instances of opid_t value BLIS_GEMM that should have been l3ukr_t value BLIS_GEMM_UKR in bli_cntx_ref.c. - NOTE: These issues were identified via compiler warnings when building BLIS with clang on a rather old installation of OS X: $ clang --version Apple LLVM version 5.0 (clang-500.2.79) (based on LLVM 3.3svn) Target: x86_64-apple-darwin15.2.0 Thread model: posix commit 3bc99a96a3648f51b9acdc8a8c7e1cf4eb815459 Merge: 3a441183 78199c53 Author: prangana Date: Mon Dec 11 12:53:03 2017 +0530 Fix merge conflicts after rebase with release branch Change-Id: I581b26c6d515f717ff0dce91c7c0c92553aa2630 commit 3a44118398955d6f872e01f73ae5bb4a4f8500f7 Author: Nisanth M P Date: Wed Nov 15 11:11:17 2017 +0530 Added AMD copyright line to the changed files in last 3 commits Change-Id: I37d5dbbbe1b199e07529610a5e9cc9e49d067c66 commit 268a56c06e94d1c388766dbfe81d54efbe432809 Author: Field G. Van Zee Date: Wed Nov 1 11:51:41 2017 -0500 Revert to default SIMD alignment for bulldozer. Details: - Removed the default-overriding #define of BLIS_SIMD_ALIGN_SIZE set in config/bulldozer/bli_kernel.h. Not sure where this value came from, but it would seem to allow for insufficient starting address alignment for any matrices created via bli_malloc_user(), such as via bli_obj_create(). Thanks to Rene Sitt for reporting the behavior that led us to this bug. - This commit is a manual patch of the same fix made to the 'rt' branch in 8f150f2. commit 510a6863e28277f9446abfb77f1aea9f01d37e7a Author: Devin Matthews Date: Mon Oct 30 10:04:42 2017 -0500 Fix CVECFLAGS for bulldozer config. commit c669716790bdda5d2b11ea0a026cbc121b228842 Author: Nisanth M P Date: Tue Oct 24 16:36:36 2017 +0530 Adding __attribute__((constructor/destructor)) for CLANG case. CLANG supports __attribute__, but its documentation doesn't mention support for constructor/destructor. Compiling with clang and testing shows that it does support this. Change-Id: Ie115b20634c26bda475cc09c20960d687fb7050b commit 24e64a9d0877d788357fc63d4b947e977f8697f7 Author: Field G. Van Zee Date: Wed Oct 18 13:41:25 2017 -0500 Removed a duplicate bli_avx512_macros.h header. Details: - Removed a duplicate header file that was causing problems during installation for the 'knl' configuration. Thanks to Victor Eijkhout for reporting this issue. commit 9c0a3c4c0260cbfefb9f11532f46508b4fd19ec2 Author: Nisanth M P Date: Mon Oct 16 22:06:57 2017 +0530 Thread Safety: Move bli_init() before and bli_finalize() after main() BLIS provides APIs to initialize and finalize its global context. One application thread can finalize BLIS, while other threads in the application are stil using BLIS. This issue can be solved by removing bli_finalize() from API. One way to do this is by getting bli_finalize() to execute by default after application exits from main(). GCC supports this behaviour with the help of __attribute__((destructor)) added to the function that need to be executed after main exits. Similarly bli_init() can be made to run before application enters main() so that application need not call it. Change-Id: I7ce6cfa28b384e92c0bdf772f3baea373fd9feac commit 83f31253eb21c5ecd8a5907835e57720daae0b8b Author: Nisanth M P Date: Mon Oct 16 21:07:50 2017 +0530 Thread safety: Make the global induced method status array local to thread BLIS retains a global status array for induced methods, and provides APIs to modify this state during runtime. So, one application thread can modify the state, before another starts the corresponding BLIS operation. This patch solves this issue by making the induced method status array local to threads. Change-Id: Iff59b6f473771344054c010b4eda51b7aa4317fe commit e923402e68029be379a4297de3ac6fb155ffd928 Author: sthangar Date: Thu Sep 28 12:15:36 2017 +0530 The inner loop paralleization is turned off by default, the JR and IR loop parameters are set to 1 by default Change-Id: I8c3c2ecbbd636259f6ffb92768ec04148205c3e5 commit a64c15de19327c7595376d699be676c7003e850e Author: Field G. Van Zee Date: Tue Sep 26 19:02:53 2017 -0500 Fixed a pthread typo in previous commit. Details: - Misnamed 'pthread_mutex_t' type in bli_memsys.c as 'thread_mutex_t'. commit 42dcd589c37e1a2473ab2e1539207da97aebc07f Author: Field G. Van Zee Date: Tue Sep 26 17:00:04 2017 -0500 Fixed bugs in gemm/gemmtrsm ukr tests in testsuite. Details: - Fixed a bug in gemmtrsm test module that was due to improper partitioning into a k x k triangular matrix for the purposes of obtaining an mr x k micropanel of A with which to test. - Fixed a bug in gemm and gemmtrsm test modules that would only manifest for very large k (depending on the product of mr x kc on that architecture). The bug arose from the fact that the test module was triggering the allocation of blocks from the internal memory pools, which are limited in size. This allocation imposes an implicit assumption that the micro- panel being tested with will fit inside, and this assumption is violated for large values of k. Arbitrarily large k may now be tested for both operation tests. - Added OpenMP/pthread critical sections around the setting or getting of statuses from the induced method operation lookup table in bli_l3_ind.c. - Added the 'static' keyword to all pthread_mutex_t global variables in BLIS. - Thanks to Nisanth Padinharepatt of AMD for reporting the first and third issues. commit 206beb68ff73b75f5c382413967aacbb8a0aac3a Author: Field G. Van Zee Date: Sat Sep 9 14:10:15 2017 -0500 Updated bibtex info for BLIS5 (3m4m) article. commit 0c8c0363aeb1f4aa88f7ec2d02403dab05a6e014 Author: sthangar Date: Mon Aug 28 16:44:42 2017 +0530 Bug fix for the testsuite build failing Change-Id: I7cd8c9d187387c48b2564e45cbfb8df985e93d77 commit 63d1c84465b50f64787808dd3e8494e683c16821 Author: sthangar Date: Wed Aug 23 13:01:14 2017 +0530 Adding auto hardware detection for Zen Change-Id: I40ce6705dd66b35000c4ccddffad1c5b65998caf commit 537fb2a895b09be94b11947696fd2da629be24dd Author: Devin Matthews Date: Tue Aug 15 10:02:25 2017 -0500 Add vzeroupper to Intel AVX kernels. commit 7628de3f76f78a44788807605a4601ddda445854 Author: Field G. Van Zee Date: Thu Aug 10 16:24:28 2017 -0500 Removed trailing enum commas from bli_type_defs.h. Details: - Removed trailing commas from enums in bli_type_defs.h. Thanks to Erling Andersen for pointing out this inconsistency and suggesting the change. commit a666fd4e267ffae3d4b21f38d569c61ff56adc9e Author: Field G. Van Zee Date: Sat Aug 5 13:04:31 2017 -0500 Added edge handling to _determine_blocksize_b(). Details: - Added explicit handling of situations where i == dim to bli_determine_blocksize_b_sub(). This isn't actually needed by any current use case within BLIS, but handling the situation is nonetheless prudent. Thanks to Minh Quan for reporting this issue and requesting the fix. commit 0c8afa546d7f33760415519ba328d7c49eb7aa06 Author: Field G. Van Zee Date: Fri Aug 4 14:17:44 2017 -0500 Fixed a minor bug in level-3 packm management. Details: - Fixed a bug in bli_l3_packm() that caused cntl_t-cached packed mem_t entries to be released and then re-acquired unnecessarily. (In essence, the "<" operands in the conditional that guards the release-and-reacquire code block simply needed to be swapped.) The bug should have only affected performance (rather than the computed result). Thanks to Minh Quan for identifying and reporting the bug. commit 6cf68a185d83fa46d438fcef65258ace78e24b13 Author: Devin Matthews Date: Mon Jul 31 15:19:51 2017 -0500 Change lsame_ signature to match lapacke. commit 6a9bd97295cc4fb1cbcd28f69824a43c073c9a76 Author: Field G. Van Zee Date: Sat Jul 29 20:17:05 2017 -0500 Fixed pthreads compile bug with previous commit. Details: - Erroneously passed family parameter into l3int_t function despite that function not taking the parameter. Oops. commit 95adc43d800431dc0a02ca83a51426dbef641ad6 Author: Field G. Van Zee Date: Sat Jul 29 14:53:39 2017 -0500 Moved 'family' field from cntx_t to cntl_t. Details: - Removed the family field inside the cntx_t struct and re-added it to the cntl_t struct. Updated all accessor functions/macros accordingly, as well as all consumers and intermediaries of the family parameter (such as bli_l3_thread_decorator(), bli_l3_direct(), and bli_l3_prune_*()). This change was motivated by the desire to keep the context limited, as much as possible, to information about the computing environment. (The family field, by contrast, is a descriptor about the operation being executed.) - Added additional functions to bli_blksz_*() API. - Added additional functions to bli_cntx_*() API. - Minor updates to bli_func.c, bli_mbool.c. - Removed 'obj' from bli_blksz_*() API names. - Removed 'obj' from bli_cntx_*() API names. - Removed 'obj' from bli_cntl_*(), bli_*_cntl_*() API names. Renamed routines that operate only on a single struct to contain the "_node" suffix to differentiate with those routines that operate on the entire tree. - Added enums for packm and unpackm kernels to bli_type_defs.h. - Removed BLIS_1F and BLIS_VF from bszid_t definition in bli_type_defs.h. They weren't being used and probably never will be. commit a98e4aa547f61ab09dd91d11478c2a2ef9882e11 Author: Devin Matthews Date: Thu Jul 20 14:50:13 2017 -0500 Clang can't make up it's mind what to support. commit 32eb36c3e8c2add2528514272044de16faed0c8f Author: Devin Matthews Date: Thu Jul 20 12:54:58 2017 -0500 Add default #define for __has_extension. commit 2a9aa134f7c29d3d4fdc160022ff257e61885a95 Author: Devin Matthews Date: Thu Jul 20 10:04:34 2017 -0500 Add fallbacks to __sync_* or __c11_atomic_* builtins when __atomic_* is not supported. Fixes #143. commit 6f07a034d575e1e9e30bb6417b8fcb77cf301297 Author: Field G. Van Zee Date: Wed Jul 19 15:40:48 2017 -0500 Updated ar option list used by all configurations. Details: - Dropped 'u' from the list of modifiers passed into the library archiver ar. Previously, "cru" was used, while now we employ only "cr". This change was prompted by a warning observed on Ubuntu 16.04: ar: `u' modifier ignored since `D' is the default (see `U') This caused me to realize that the default mode causes timestamps to be zero, and thus the 'u' option, which causes only changed object files to be inserted, is not applicable. commit 32bc03f9eed8795cfd2f2615d1c9f8673e039c57 Author: Field G. Van Zee Date: Wed Jul 19 13:51:53 2017 -0500 Added --force-version=STRING option to configure. Details: - Added an option to configure that allows the user to force an arbitrary version string at configure-time. The help text also now describes the usage information. - Changed the way the version string is communicated to the Makefile. Previously, it was read into the VERSION variable from the 'version' file via $(shell cat ...). Now, the VERSION variable is instead set in config.mk (via a configure-substituted anchor from config.mk.in). commit befaee6dd8b2a72de9e0461fe2ec1f36e9f88f3c Author: Field G. Van Zee Date: Tue Jul 18 17:56:00 2017 -0500 Updated openmp/pthread barriers with GNU atomics. Details: - Updated the non-tree openmp and pthreads barriers defined in bli_thrcomm_openmp.c and bli_thrcomm_pthreads.c to instead call a common implementation in bli_thrcomm.c, bli_thrcomm_barrier_atomic(). This new implementation goes through the same motions as the previous codes, but protects its loads and increments with GNU atomic built-ins. These atomic statements take memory ordering parameters that allow us to specify just enough constraints for the barrier to work as intended on weakly-ordered hardware. The prior implementation was only guaranteed to work on systems with strongly- ordered memory. (Thanks to Devin Matthews for suggesting this change and his crash-course in atomics and memory ordering.) - Removed 'volatile' from structs' barrier field declarations in bli_thrcomm_*.h. - Updated bli_thrcomm_pthread.? files to use renamed struct barrier fields consistent with that of the _openmp.? files. - Updated other bli_thrcomm_* files to rename "communicator" variables to simply "comm". commit 8f739cc847fcff2ddeeb336f8b2b9d080eb16f6c Author: Field G. Van Zee Date: Mon Jul 17 19:03:22 2017 -0500 Added API to set mt environment variables. Details: - Renamed bli_env_get_nway() -> bli_thread_get_env(). - Added bli_thread_set_env() to allow setting environment variables pertaining to multithreading, such as BLIS_JC_NT or BLIS_NUM_THREADS. - Added the following convenience wrapper routines: bli_thread_get_jc_nt() bli_thread_get_ic_nt() bli_thread_get_jr_nt() bli_thread_get_ir_nt() bli_thread_get_num_threads() bli_thread_set_jc_nt() bli_thread_set_ic_nt() bli_thread_set_jr_nt() bli_thread_set_ir_nt() bli_thread_set_num_threads() - Added #include "errno.h" to bli_system.h. - This commit addresses issue #140. - Thanks to Chris Goodyer for inspiring these updates. commit 10163833075fd42be5b5b503acc855f91a484cfd Author: Marat Dukhan Date: Thu Jul 13 21:39:24 2017 -0700 Fix Emscripten builds commit c09b30d115eade72f44f37bf90aa848c9c0e79af Author: Minh Quan HO Date: Fri Jul 7 10:52:05 2017 +0200 set missing free_fp in bli_membrk_init for free-ing GEN_USE buffers The membrk's free_fp is called when releasing GEN_USE buffers, but this free_fp is not set in bli_membrk_init commit 997628ed9793c72e9ef576dd8d715cfec27c4862 Author: sthangar Date: Fri Jun 30 12:23:19 2017 +0530 Reducing the framework overhead of GEMV routines Change-Id: I83607ad767bff74e305e915b54b0ea34ec3e5684 commit ee869066168239b710ad9938bb0e1ae454883f3a Author: Kiran Varaganti Date: Tue Jul 4 12:57:32 2017 +0530 Improved efficiency of dGEMM for large matrices by reducing TLB load misses and majorly L3 cache misses. This is achieved by changing the packed block sizes of matrix A & B. Now the optimum values are MC_D = 510 and KC_D = 1024. Change-Id: I2d8bdd5f62f2d1f8782ae2997f3d7a26587d1ca4 commit 7b933b90b1859c96de49a402d48de82909bc73e5 Author: Devin Matthews Date: Tue Jun 6 20:23:17 2017 -0500 Add new SSI acknowledgment commit 3485abba4b426fbf42b146a9611a0841f6d236c6 Author: sthangar Date: Wed May 24 11:48:16 2017 +0530 Checked in the small matrix code to compute GEMM called with A transpose case Change-Id: I29f40046d43d7a4b037c1cb322503ee26495f462 commit de16beb83b29b4b9748f70db985b0fe04db85f7d Author: Devin Matthews Date: Fri May 26 14:49:31 2017 -0400 PACKDIM_MR=8 didn't work out, but messing with the prefetching helps 2%. commit 25d0e618544b6eea7d3f13c7aec513ac0139801d Author: Devin Matthews Date: Fri May 26 14:47:36 2017 -0400 Revert "Change PACKDIM_MR (double) for haswell to 8." This reverts commit 681eec913d7c2ebcff637cec5c1627ced9a92b99. commit c5bdd84b35bc2a8ebf55b7763fb56c0c945be0cb Author: Devin Matthews Date: Fri May 26 12:28:09 2017 -0500 Change PACKDIM_MR (double) for haswell to 8. commit 172789d562001293b973bbdd8015bd27d37292e8 Author: Field G. Van Zee Date: Wed May 17 13:03:52 2017 -0500 Restored deleted lines from makefile fragments. commit 3ea9bd2c8e90dbd35655fa6a5b953dfea1f308fe Author: Devin Matthews Date: Wed May 17 12:29:44 2017 -0500 Change to /bin/sh. All scripts checked with Debian's checkbashisms. Also check for clang first in auto-detect.sh. commit 49438409eedb98d3f0ebf00b8d1eee0ae45f4f8c Author: Devin Matthews Date: Wed May 17 12:27:14 2017 -0500 Remove shebangs from makefiles. commit 497e2640474c016d576dce3530fa6a66891642a0 Author: J M Dieterich Date: Tue May 16 23:11:22 2017 -0400 Fix if/else structure. Thanks to TravisCI. commit 835035c56a8de36ad25bb8d1375db170d489ef57 Author: J M Dieterich Date: Tue May 16 22:23:27 2017 -0400 Mark piledriver compilable w/ clang. commit 6cdb533472ee61af297c1f948307abbf45828887 Author: J M Dieterich Date: Tue May 16 22:12:12 2017 -0400 Mark bulldozer compilable w/ clang. commit a85697d62272da06d28cd1c947f6cf1098df6467 Author: J M Dieterich Date: Tue May 16 22:06:59 2017 -0400 Correct error message. commit e0c64cad271058688a2b999caf8c2767dc3aef7e Author: J M Dieterich Date: Tue May 16 22:03:23 2017 -0400 Indeed once can compile for carrizo also using clang. commit 4aafe0505d3f0954d095ded5459a76976e5093b4 Author: J M Dieterich Date: Tue May 16 21:50:49 2017 -0400 A bunch of shebang fixes from unportable /bin/bash to portable /usr/bin/env bash commit abaeaa68ea11e84be1810f564d6f38d506cbeb6a Author: Field G. Van Zee Date: Fri May 5 15:06:56 2017 -0500 Fixed a bug in norm1v, norm1m. Details: - Fixed a bug that manifested as improperly-computed 1-norm for vectors and matrices. This is one of the few operations in BLIS that does not have its own test module within the testsuite, hence why it went undetected for so long. The bad 1-norms were being used to normalize matrices in the testsuite after initialization, which led to some matrices containing a combination of "large" and "small" values. This tended to push the residuals computed after each test away from zero. In some cases, they were off *just* enough to the testsuite to label it a "failure". Many thanks to Jeff Hammond for reporting this bug. (Wonky details: the bug was due to improperly-defined level-0 scalar macros for abval2, an operation that computes the absolute square, or complex magnitude/modulus. Certain complex domain instances of abval2 were being incorrectly defined in terms of real-only solutions, leading to bad results. This level-0 operation forms the basis of norm1v/norm1m. absq2 was also affected, but almost nothing uses this operation.) commit cc3107ae1c2074f72b724aa748d2e5b4cb290ed5 Author: Devin Matthews Date: Thu May 4 10:35:22 2017 -0500 Setting any one of BLIS_NT_[IJ][CR] overrides BLIS_NUM_THEADS. Missing BLIS_NT_XX's are defaulted to 1. Fixes #123. commit c8ab91f70d399ee14edd30a3a5c46b24c5d2f910 Author: Field G. Van Zee Date: Wed May 3 15:04:51 2017 -0500 Disable complex 3m/4m in testsuite by default. Details: - Disabled testsuite tests of all level-3 implementations based on 3m and 4m. This will improve testing runtime on Travis CI as well as for anyone manually running the testsuite using default test parameters. Thanks to Devin Matthews for suggesting this change. commit 9700f0e5785007ddafb72a5ca83800dee61fd35c Author: Jeff Hammond Date: Tue May 2 19:25:21 2017 -0700 allow KNL build without hbwmalloc.h (i.e. emulated) we want to be able to run BLIS KNL binaries on non-KNL machines via SDE. although it is possible to install hbwmalloc implementation on such systems, it is easier not to, since obviously the performance of SDE execution is not representative so there is no reason to emulate HBW allocation. commit 17dcd5a33ff91967f67e7c0ba09b4f18754609a4 Author: Field G. Van Zee Date: Tue May 2 16:48:43 2017 -0500 Fixed stray parentheses in README citations. commit 2910d44ff9e1d951d3249313f4ab39d18ea1b48d Author: Field G. Van Zee Date: Tue May 2 16:38:43 2017 -0500 CHANGELOG update (0.2.2) commit 5ca3863220e07972fcefc6682ddd3f6e54fe4a94 Author: Field G. Van Zee Date: Tue May 2 15:48:30 2017 -0500 Fixed a trsm1m bug that affected right-side cases. Details: - Fixed a bug introduced in 1c732d3 that affected trsm1m_r. The result was nondeterministic behavior (usually segmentation faults) for certain problem sizes beyond the 1m instance of kc (e.g. 128 on haswell). The cause of the bug was my commenting out lines in bli_gemm1m_ukr_ref.c which explicitly directed the virtual gemm micro-kernel to use temporary space if the storage preference of the [real domain] gemm ukernel did not match the storage of the output matrix C. In the context of gemm, this handling is not needed because agreement between the storage pref and the matrix is guaranteed by a high-level optimization in BLIS. However, this optimization is not applied to trsm because the storage of C is not necessarily the same as the storage of the micro-panels of B--both of which are updated by the micro-kernel during a trsm operation. Thus, the guarantee of storage/preference agreement is not in place for trsm, which means we must handle that case within the virtual gemm micro-kernel. - Comment updates and a minor macro change to bli_trsm*_cntx_init() for 3m1, 4m1a, and 1m. commit 1af0b09f5c275ee7bac896cc6f36f42af721d9b5 Author: Field G. Van Zee Date: Tue May 2 12:09:39 2017 -0500 README.md update. Details: - Updated bibtex entries for 4th BLIS paper, and adds entries for 5th and 6th BLIS papers. commit db4a0bb8ba7cd697d68be8e5632371ee3e59fd63 Author: Field G. Van Zee Date: Fri Mar 17 12:07:27 2017 -0500 Whitespace reformatting to armv8a kernels file. Details: - Updated formatting of function signature/header in kernels/armv8a/3/bli_gemm_opt_4x4.c. commit e3eb01f6b990e205b15edcbaffd3d54b3ddd1ca4 Author: Field G. Van Zee Date: Tue Feb 21 15:33:39 2017 -0600 Disabled experiment-related 1m code. Details: - Commented out code in frame/ind/oapi/bli_l3_3m4m1m_oapi.c that was specifically inserted to facilitate the benchmarking of 1m block-panel and panel-block algorithms. - Updates to test/3m4m/Makefile, runme.sh script, and test_gemm.c to reflect changes used/needed during benchmarking. commit 4f61528d56eed6a139eeac9db0c44e56f2d2d136 Author: Field G. Van Zee Date: Wed Jan 25 16:25:46 2017 -0600 Added 1m-specific APIs for bp, pb gemm algorithms. Details: - Defined bli_gemmbp_cntl_create(), bli_gemmpb_cntl_create(), with the body of bli_gemm_cntl_create() replaced with a call to the former. - Defined bli_cntl_free_w_thrinfo(), bli_cntl_free_wo_thrinfo(). Now, bli_cntl_free() can check if the thread parameter is NULL, and if so, call the latter, and otherwise call the former. - Defined bli_gemm1mbp_cntx_init(), bli_gemm1mpb_cntx_init(), both in terms of bli_gemm1mxx_cntx_init(), which behaves the same as bli_gemm1m_cntx_init() did before, except that an extra bool parameter (is_pb) is used to support both bp and pb algorithms (including to support the anti-preference field described below). - Added support for "anti-preference" in context. The anti_pref field, when true, will toggle the boolean return value of routines such as bli_cntx_l3_ukr_eff_prefers_storage_of(), which has the net effect of causing BLIS to transpose the operation to achieve disagreement (rather than agreement) between the storage of C and the micro-kernel output preference. This disagreement is needed for panel-block implementations, since they induce a transposition of the suboperation immediately before the macro-kernel is called, which changes the apparent storage of C. For now, anti-preference is used only with the pb algorithm for 1m (and not with any other non-1m implementation). - Defined new functions, bli_cntx_l3_ukr_eff_prefers_storage_of() bli_cntx_l3_ukr_eff_dislikes_storage_of() bli_cntx_l3_nat_ukr_eff_prefers_storage_of() bli_cntx_l3_nat_ukr_eff_dislikes_storage_of() which are identical to their non-"eff" (effectively) counterparts except that they take the anti-preference field of the context into account. - Explicitly initialize the anti-pref field to FALSE in bli_gks_cntx_set_l3_nat_ukr_prefs(). - Added bli_gemm_ker_var1.c, which implements a panel-block macro-kernel in terms of the existing block-panel macro-kernel _ker_var2(). This technique requires inducing transposes on all operands and swapping the A and B. - Changed bli_obj_induce_trans() macro so that pack-related fields are also changed to reflect the induced transposition. - Added a temporary hack to bli_l3_3m4m1m_oapi.c that allows us to easily specify the 1m algorithm (block-panel or panel-block). - Renamed the following cntx_t-related macros: bli_cntx_get_pack_schema_a() -> bli_cntx_get_pack_schema_a_block() bli_cntx_get_pack_schema_b() -> bli_cntx_get_pack_schema_b_panel() bli_cntx_get_pack_schema_c() -> bli_cntx_get_pack_schema_c_panel() and updated all instantiations. Also updated the field names in the cntx_t struct. - Comment updates. commit 1d728ccb2394e77365e7c42683db6579c5fba014 Author: Field G. Van Zee Date: Fri Nov 25 18:29:49 2016 -0600 Implemented the 1m method. Details: - Implemented the 1m method for inducing complex domain matrix multiplication. 1m support has been added to all level-3 operations, including trsm, and is now the default induced method when native complex domain gemm microkernels are omitted from the configuration. - Updated _cntx_init() operations to take a datatype parameter. This was needed for the corresponding function for 1m (because 1m requires us to choose between column-oriented or row-oriented execution, which requires us to query the context for the storage preference of the gemm microkernel, which requires knowing the datatype) but I decided that it made sense for consistency to add the parameter to all other cntx initialization functions as well, even though those functions don't use the parameter. - Updated bli_cntx_set_blkszs() and bli_gks_cntx_set_blkszs() to take a second scalar for each blocksize entry. The semantic meaning of the two scalars now is that the first will scale the default blocksize while the second will scale the maximum blocksize. This allows scaling the two independently, and was needed to support 1m, which requires scaling for a register blocksize but not the register storage blocksize (ie: "packdim") analogue. - Deprecated bli_blksz_reduce_dt_to() and defined two new functions, bli_blksz_reduce_def_to() and bli_blksz_reduce_max_to(), for reducing default and maximum blocksizes to some desired blocksize multiple. These functions are needed in the updated definitions of bli_cntx_set_blkszs() and bli_gks_cntx_set_blkszs(). - Added support for the 1e and 1r packing schemas to packm, including 1e/1r packing kernels. - Added a minor optimization to bli_gemm_ker_var2() that allows, under certain circumstances (specifically, real domain beta and row- or column-stored matrix C), the real domain macrokernel and microkernel to be called directly, rather than using the virtual microkernel via the complex domain macrokernel, which carries a slight additional amount of overhead. - Added 1m support to the testsuite. - Added 1m support to Makefile and runme.sh in test/3m4m. Also simplified some code in test_gemm.c driver. commit 0d1b90286e29aa8b768e280b5286d92c02ad87a1 Author: Jeff Hammond Date: Tue Oct 25 21:15:26 2016 -0700 never use libm with Intel compilers Intel compilers include a highly optimized math library (libimf) that should be used instead of GNU libm. yes, this change is for ALL targets, including those that are not supported by the Intel compiler. there is no harm in doing this, and it is future-proof in the event that the Intel compilers support other architectures. commit b150870397e7aee558e61d1bd72a0c0d1d99bee8 Author: Field G. Van Zee Date: Fri Dec 8 16:08:41 2017 -0600 Removed most "old" directories. Details: - Removed the vast majority of directories named "old", which contained deprecated code that I wasn't quite ready to jettison from the source tree. commit 270c65985df849297ba1951aa3b56c03948d7775 Author: Field G. Van Zee Date: Fri Dec 8 15:21:18 2017 -0600 Modified bli_getopt() for thread-safety. Details: - Changed the interface of bli_getopt() to take a new argument, a getopt_t struct, that stores the values of optarg, optind, opterr, and optopt, and updated the implementation accordingly. (Previously, these variables were assumed to be global.) - Added a function for initializing a getopt_t struct. - Changed test_libblis.c--currently the only consumer of bli_getopt()--to utilize the new getopt_t state object. commit ce4d8fabc2e39371f89c12192fb707be82ae021a Merge: 39be59f2 e05a8dfa Author: Field G. Van Zee Date: Thu Dec 7 17:36:44 2017 -0600 Merge branch 'master' of github.com:flame/blis commit 39be59f2a8470f40475907d9dd52639b8a911a92 Author: Field G. Van Zee Date: Thu Dec 7 17:35:20 2017 -0600 Replaced several macros with static function APIs. Details: - Reimplemented several sets of get/set-style preprocessor macros with static functions, including those in the following frame/base headers: auxinfo, cntl, mbool, mem, membrk, opid, and pool. A few headers in frame/thread were touched as well: mutex_*, thrcomm, and thrinfo. commit e05a8dfa7cc7df41e966c1ad04e51c482b308b23 Merge: 79507337 4423e33d Author: dnp Date: Wed Dec 6 16:45:24 2017 -0600 Merge branch 'rt' commit 4423e33dc593115cda92c5763d756d7ad1298aa9 Author: dnp Date: Wed Dec 6 16:35:03 2017 -0600 Adding SKX kernels and configuration. commit 79507337e140daec7639f6eb3ed9cfe6e123d342 Author: Field G. Van Zee Date: Wed Dec 6 16:21:35 2017 -0600 Various checks to ensure that arch_t id is in range. Details: - Expanded checking of the arch_t id in bli_gks.c--either passed in from the caller or as returned from bli_arch_query_id()--against the expected range of id values. Thanks to Devangi Parikh for suggesting these additional sanity checks. commit fde7c1126c58373ecde83471890b257399144876 Author: Field G. Van Zee Date: Mon Dec 4 16:11:01 2017 -0600 Added 'uninstall-old-headers' target to Makefile. Details: - Defined a new 'uninstall-old-headers' target that allows users of BLIS to uninstall no-longer-needed headers left over from previous installations. - Fixed the 'uninstall-old' target so that it will install both .a and .so libraries. - Renamed 'uninstall-old' to 'uninstall-old-libs'. - Added 'uninstall-old' target (different from previous 'uninstall-old' target) that combines 'uninstall-old-libs' and 'uninstall-old-headers'. commit d4ee770bde213a87aa6049245145318324dc6b51 Author: Field G. Van Zee Date: Mon Dec 4 14:53:43 2017 -0600 Create/install monolithic cblas.h. Details: - When CBLAS is enabled at configure-time, BLIS now creates a monolithic cblas.h using the same flatten-header.sh script that was recently introduced for creating monolithic blis.h header files. The top-level Makefile will also install this cblas.h file into the install prefix alongside blis.h when the 'install' target is invoked. The two header files are compatible with one another. Regardless whether the user's source #includes cblas.h, both blis.h and cblas.h, or just blis.h, the user will get the CBLAS function prototypes and enums, as expected. commit 52f9e6f1b6468785af8947317656445d4729fc8b Merge: ab57b979 21360dd8 Author: Field G. Van Zee Date: Fri Dec 1 12:28:09 2017 -0600 Merge branch 'rt' commit 21360dd8e2c7287100645e109acaabcc6ba1140c Author: Field G. Van Zee Date: Wed Nov 29 14:11:34 2017 -0600 Fixed cntx_t packm query when ker_id > _NUM_PACKM_KERS. Details: - Fixed a subtle bug in bli_cntx_get_[un]packm_ker_dt() in which the function fails to return NULL when passed a kernel id argument that is equal to or beyond BLIS_NUM_[UN]PACKM_KERS. Instead, the function was attempting to index into the cntx_t's packm kernel array, which resulted in undefined behvaior. Thanks to Devangi Parikh for finding this bug. commit 244a6f4e66e8ff091e995f8090ce779c1928aa8b Author: Field G. Van Zee Date: Tue Nov 28 17:48:48 2017 -0600 Fixed POSIX sed non-compliance in flatten-header.sh. Details: - Changed GNU usage of 'i' and 'a' sed commands used in flatten-header.sh to POSIX-compliant usage that will work on OS X's sed. commit 45078621676833e53a2878af8f89479c4f93b8ab Author: Field G. Van Zee Date: Tue Nov 28 15:16:22 2017 -0600 Generate/compile with/install monolithic blis.h. Details: - Rewrote monolithify-header.sh (and renamed to flatten-header.sh) so that headers are inserted recursively. This improves performance by a factor of 3-4x. - Modified configure to create an 'include/' directory in which make can create a monolithic header. - Modified the top-level Makefile so that a monolithic header is generated unconditionally prior to compilation (stored in include/) and so that the single header is installed instead of the 450 or so header files that reside throughout the framework source tree. - Added "include/*/*.h" to .gitignore file. - Removed some pnacl/emscripten leftovers that I intended to include in a1caeba (mostly in testsuite/Makefile). - Trivial comment changes to frame/include/bli_f2c.h. commit 1f30b1301bf6d6047ec29e57a5fde8eb1072a0ee Author: Field G. Van Zee Date: Sat Nov 25 16:54:26 2017 -0600 Added missing framework support for x86_64 family. Details: - Added support for the x86_64 configuration family to bli_arch.c and bli_arch_config.h. Thanks to Johannes Dieterich for reporting this issue. - Bumped the default value for BLIS_SIMD_NUM_REGISTERS from 16 to 32 and the default value for BLIS_SIMD_SIZE from 32 to 64. This will support configuration families that include Skylake and newer processors without any supported needed in the bli_family_*.h file. The semantics of these values have always been "maximum" and not exact values; comments in bli_kernel_macro_defs.h and the github wiki have been adjusted accordingly. commit 9f39806c4ed484c9ed13edf96005838d977722a9 Author: Field G. Van Zee Date: Tue Nov 21 16:03:56 2017 -0600 Fixed a bug in e31f0b3/b131b9a. Details: - Erroneously placed the "don't overwrite existing blocksize" logic in bli_blksz_init*() rather than in bli_cntx_set_blkszs(). It belongs in the latter because that function copies blocksizes as-is from the blksz_t function argument to the appropriate field in the cntx_t. If the blksz_t was previously initialized selectively, based on the sign of the blocksize value passed into bli_blksz_init*(), that just leaves some fields possibly uninitialized (with garbage values), which definitely will not work. - The aforementioned logic has been moved to bli_cntx_set_blkszs() via a new function bli_blksz_copy_if_pos(), which selectively copies only the blocksizes that are greater than zero. commit b131b9a025c15f548d4c2952a9ec85eee3d139b1 Author: Field G. Van Zee Date: Tue Nov 21 14:30:26 2017 -0600 Updated configs to omit setting some blocksizes. Details: - Employ the new semantics of bli_blksz_init*() in e31f0b3 in various sub-configurations' bli_cntx_init_*() functions by passing in 0 for register and cache blocksizes that correpond to gemm microkernel datatypes that were not registered, allowing the default values set by the bli_cntx_init_*_ref() function call to remain. commit 499a4c002f895744ecaf81ef7f62d2d6d0d7d594 Merge: e31f0b3e 6c3ba502 Author: Field G. Van Zee Date: Tue Nov 21 14:25:08 2017 -0600 Merge branch 'rt' of github.com:flame/blis into rt commit e31f0b3e2dba19ca8a2946bc21beb136a42d0f57 Author: Field G. Van Zee Date: Tue Nov 21 14:21:25 2017 -0600 Subtle update to bli_blksz_init*() API. Details: - Updated the semantics of bli_blksz_init() and bli_blksz_init_ed() so that non-positive blocksize values are ignored entirely. This provides an easy way to indicate that certain existing values should not be touched by the update. Thanks to Devangi Parikh for feedback that led to these changes. commit 6c3ba502a11f87bc67555d26154cfd39d0af1bac Author: Field G. Van Zee Date: Tue Nov 21 13:50:53 2017 -0600 Added 'x86_64' sub-config directory. Details: - Added missing x86_64 configuration directory, which was intended to be part of b7ca580. - Added -Wfatal-errors compiler warning flag to all configurations so that compilation stops after the first error. - Changed the vectorization flags for intel64 configuration to be compatible with 'penryn', the oldest sub-config included in that family. - Changed the vectorization flags for penryn to target the 'core2' microarchitecture and ssse3. commit 25eee3cc49b0631812485d4d5ceef0c23ed1b6dd Author: Field G. Van Zee Date: Tue Nov 21 12:34:20 2017 -0600 Added a dummy file to kernels/generic. Details: - Added a dummy file to kernels/generic, which was previously empty, so that git would begin tracking the otherwise-empty directory. This directory's existence is necessary for proper execution of configure for any configuration family that contains the 'generic' sub-configuration. Thanks to Johannes Dieterich for reporting the issue that led to this fix. commit ef024ce4cafa217669eaabb31ff8ab6df93cca05 Author: Field G. Van Zee Date: Mon Nov 20 18:08:29 2017 -0600 More tweaks to monolithify-header.sh Details: - Further fixes monolithify-header.sh script. - Removed unnecessary #include "blis.h" from frame/3/bli_l3_packm.h. commit 5028e7dec269b62895511453272585da36e591b5 Author: Field G. Van Zee Date: Mon Nov 20 17:00:37 2017 -0600 Second attempt to implement travis_wait. Details: - Corrected accidental misplacement of the travis_wait prefix (on the wrong line of the .travis.yml file) in commit 13e5d91. commit 13e5d9107b3763cba46fb1bae87476852601b47c Author: Field G. Van Zee Date: Mon Nov 20 15:57:06 2017 -0600 Added travis_wait prefix to testsuite via Travis. Details: - It appears that Travis CL has implemented a new policy that results in a test failing if it does not produce any output for more than 10 minutes. (Two test instances are now failing in Travis despite the most recent commit not affecting the library or testsuite.) This issue can be worked around by executing the test run via travis_wait, which takes an optional time parameter. This commit attempts to use 'travis_wait 30' in the .travis.yml file to prevent the early failure at 10 minutes. commit a1caeba0ea79c8fecb1abadca1f91c6367ab3afb Author: Field G. Van Zee Date: Mon Nov 20 13:31:20 2017 -0600 Removed pnacl, emscripten support from Makefile. commit 78199c539beaa50f37893add220261ce0dcb921a Merge: b3d8ab2e ab57b979 Author: praveeng Date: Mon Nov 20 15:51:20 2017 +0530 Merge master code till 01-Nov-2017 to amd-staging Change-Id: I40b53f876db84c8b947b3f2385c9b882245c6603 commit 9df6dda9ec51a0d40166169d2d8a2f84b42266e6 Author: Field G. Van Zee Date: Sat Nov 18 19:03:26 2017 -0600 Improvements, bugfixes to monolithify-header.sh. commit 21d26201f90b884eb8d5de279ed74bbd244ffcb5 Merge: 43baa3b3 b7ca5806 Author: Field G. Van Zee Date: Sat Nov 18 14:16:53 2017 -0600 Merge branch 'rt' of github.com:flame/blis into rt commit 43baa3b327d5ae1e2ba619432687b4dd849b05e3 Author: Field G. Van Zee Date: Sat Nov 18 14:14:44 2017 -0600 Removed unnecessary flags for generic config. Details: - Removed -D_POSIX_C_SOURCE=200112L and -m64 flags from make_defs.mk file of generic sub-configuration. These flags are generally not necessary, and particularly not desirable for the generic configuration since they unnecessarily restrict the environments in which the configuration can be built. commit b7ca580618f9382b7982168fd035ed058f83e4c2 Author: iotamudelta Date: Sat Nov 18 14:56:05 2017 -0500 [WIP] Add x86 and x86_64 processor families. (#154) * Add x86 and x86_64 processor families. * Use generic config as fallback for more families. After discussion with fgvanzee, a) it's "generic" and 2) use it for all the families as a fallback. Goal is that if a specific CPU is not yet supported by a family (say a new Intel microarchitecture on x86_64), it'll fall through to still work with the slower "generic" kernels commit 870597d1663aaba1b74d7654b1d4946280aa0d3f Author: Field G. Van Zee Date: Fri Nov 17 17:06:42 2017 -0600 Added bash script for creating monolithic headers. Details: - Added a new script, monolithify-header.sh, to the 'build' directory. This script recursively replaces all #include directives in a selected file with the contents of the header files referenced by each directive. The idea is to "flatten" a tree of .h files into a single file, with the script acting as a C preprocessor that only processes #include directives. commit c76f77f4cc1e71988251c5e63cf6ef137477bf9c Author: Field G. Van Zee Date: Fri Nov 17 15:10:52 2017 -0600 Removed unnecessary #include "blis.h" from header. Details: - Removed an errant #include "blis.h directive from bli_cntx_ind_stage.h. The generaly policy is that no header file in BLIS should include blis.h. This will be important in the near future when using a tool to recursively create a monolithic blis.h file from its consitutent headers. commit 2bb9bc6e9536fa239fbc19a7efaaf151116e15b4 Author: Field G. Van Zee Date: Fri Nov 17 13:50:14 2017 -0600 Miscellaneous tweaks to gks, rt functionality. Details: - Updated bli_cpuid_query_id() so that BLIS_ARCH_GENERIC is always returned if the hardware fails to test positive for any supported sub-configuration. - Defined bli_gks_init_ref_cntx(), which will call the context initialization function bli_cntx_init_configname() for the sub-configuration 'configname' associated with the arch_t id returned by bli_arch_query_id(). This makes initializing a reference context easy for experts who wish to construct those contexts. commit b3d8ab2ea02c127ab241532abc214624f35bfaab Merge: 189ffbb0 fe71c06e Author: Santanu Thangaraj Date: Wed Nov 15 01:33:12 2017 -0500 Merge "Added AMD copyright line to the changed files in last 3 commits" into amd-staging commit fe71c06e42b072407c83112779055b0afb67173d Author: Nisanth M P Date: Wed Nov 15 11:11:17 2017 +0530 Added AMD copyright line to the changed files in last 3 commits Change-Id: I37d5dbbbe1b199e07529610a5e9cc9e49d067c66 commit d5bf79e50bf97072bbe7117c86b7c45e6e707ea0 Author: Field G. Van Zee Date: Mon Nov 13 14:24:29 2017 -0600 Miscellaneous tweaks and fixes. Details: - Fixed incorrect calling sequence in bli_cntx_init_knl.c--an instance of bli_blksz_init_easy() that should have been bli_blksz_init(). - Fixed a bug in code that is supposed to output the list of sub-directories in the 'config' directory when configure script is run with no arguments. - Expanded the output of "make showconfig" to include more info from config.mk. - Minor changes to build/auto-detect/cpuid_x86.c, mostly in preparation for someone to add excavator and zen support. - Added a link to the ConfigurationHowTo wiki to config_registry. - Other minor tweaks to configure. commit 673e5184030532c4ebd9fdeecbaa6442bb3ad54f Merge: 2c51356a 8f150f28 Author: Field G. Van Zee Date: Wed Nov 1 17:37:42 2017 -0500 Merge branch 'rt' of github.com:flame/blis into rt commit 2c51356a8b2699c99f9507c80d69c08a35d45fe3 Author: Field G. Van Zee Date: Wed Nov 1 17:37:02 2017 -0500 Implemented runtime hardware detection via cpuid. Details: - Added runtime support for selecting an appropriate arch_t value based on the results of the cpuid instruction (for x86_64). This allows deferral of choosing a context (kernels, blocksizes, etc.) until runtime, which allows BLIS to be built with support for multiple microarchitectures. Currently, only amd64 and intel64 configurations are registered in the config_registry; however, one could create custom configuration families to support arbitrary sets of x86_64 microarchitectures. - Current Intel microarchitectures supported via cpuid are knl, haswell, sandybridge, and penryn. - Current AMD microarchitectures supported via cpuid are: zen, excavator, steamroller, piledriver, and bulldozer. commit ab57b979046479bcda7f83165838a80117c2ad95 Author: Field G. Van Zee Date: Wed Nov 1 11:51:41 2017 -0500 Revert to default SIMD alignment for bulldozer. Details: - Removed the default-overriding #define of BLIS_SIMD_ALIGN_SIZE set in config/bulldozer/bli_kernel.h. Not sure where this value came from, but it would seem to allow for insufficient starting address alignment for any matrices created via bli_malloc_user(), such as via bli_obj_create(). Thanks to Rene Sitt for reporting the behavior that led us to this bug. - This commit is a manual patch of the same fix made to the 'rt' branch in 8f150f2. commit 8f150f28a678c4a0c1591400177ad7cca81fcaec Author: Field G. Van Zee Date: Wed Nov 1 11:41:45 2017 -0500 Revert to default SIMD alignment for bulldozer. Details: - Removed the default-overriding #define of BLIS_SIMD_ALIGN_SIZE set in bli_family_bulldozer.h. Not sure where this value came from, but it would seem to allow for insufficient starting address alignment for any matrices created via bli_malloc_user(), such as via bli_obj_create(). Thanks to Rene Sitt for reporting the behavior that led us to this bug. commit e3f10557caf114441fbfff990e3ce3576c177bdc Author: Field G. Van Zee Date: Mon Oct 30 13:37:54 2017 -0500 Use perl for some substitution for OS X compatibility. Details: - Discovered that sed commands where the replacement string contains '\n' are problematic with the version of sed present in OS X. For these cases cases in the configure script, we instead use 'perl -pe' for search-and-replace functionality. - Various other minor comment/whitespace tweaks to configure. - Removed remaining lines of code related to setting/checking variables to track "unregistered" configurations. commit dd45cfdfc3d8f9acf4cf7f69138d9b83dafc8842 Merge: 3e4f42a4 f60c827b Author: Field G. Van Zee Date: Mon Oct 30 12:23:05 2017 -0500 Merge branch 'master' into rt commit f60c827ba95f452c8454fb914f5564f4895bf644 Author: Devin Matthews Date: Mon Oct 30 10:04:42 2017 -0500 Fix CVECFLAGS for bulldozer config. commit 3e4f42a4d2ebb37b95988933d92e561c5b2cc201 Author: Field G. Van Zee Date: Fri Oct 27 11:41:37 2017 -0500 Typecast l1mkr_t enum value prior to comparison. Details: - Typecast l1mkr_t enum value in bli_cntx.h to guint_t before testing for out-of-range value. This is an attempt to pacify a strange warning from clang on OS X that is seemingly the result of the following compiler warning flag: -Wtautological-constant-out-of-range-compare commit aec6e038d942d35b81bbd723a640cce2c054fb8e Author: Field G. Van Zee Date: Thu Oct 26 16:12:36 2017 -0500 Removed associative arrays from configure. Details: - Implemented a replacement for associative arrays in the configure script that does not utilize arrays, and therefore works in pre-4.0 versions of bash. (It appears that Mac OS X will be stuck with version 3.2 indefinitely due to bash switching to the GPL 3.0 license starting with version 4.0.) commit 189ffbb0d37262b21acddc0d35b4a22f2cbbca94 Merge: 06e0e635 3eb44f67 Author: Santanu Thangaraj Date: Wed Oct 25 02:00:30 2017 -0400 Merge changes Ie115b206,I7ce6cfa2,Iff59b6f4 into amd-staging * changes: Adding __attribute__((constructor/destructor)) for CLANG case. Thread Safety: Move bli_init() before and bli_finalize() after main() Thread safety: Make the global induced method status array local to thread commit 3eb44f67618b91ae5f5f0aaaba67e38f16042ee4 Author: Nisanth M P Date: Tue Oct 24 16:36:36 2017 +0530 Adding __attribute__((constructor/destructor)) for CLANG case. CLANG supports __attribute__, but its documentation doesn't mention support for constructor/destructor. Compiling with clang and testing shows that it does support this. Change-Id: Ie115b20634c26bda475cc09c20960d687fb7050b commit 07c352188bf5265af242255f8e6fcb97050d973d Author: Field G. Van Zee Date: Mon Oct 23 16:59:22 2017 -0500 Added "generic" configuration. Details: - Added a "generic" configuration that leaves the default blocksizes and kernels unchanged. This replaces the older "reference" configuration. Updated auto-detect script and code accordingly. - Added support for generic configuration to arch_t (bli_type_defs.h), bli_gks_init() (bli_gks.c), and bli_arch_config.h - Moved bli_arch_query_id() to bli_arch.c (and prototype to bli_arch.h). - Whitespace changes to configurations' make_defs.mk files. commit c1a98d6f70608b02a1e6bcad6ba020a60773dace Author: Field G. Van Zee Date: Mon Oct 23 14:24:41 2017 -0500 Minor update to .travis.yml file. commit 75b9383f01caa8b83f8be0117e15085b0d807ba6 Author: Field G. Van Zee Date: Fri Oct 20 16:41:22 2017 -0500 Minor header renaming ahead of bli_arch.c. Details: - Renamed the various configurations' "bli_arch_.h" header files (replacing "arch" with "family") to free up the 'bli_arch' namespace for a different purpose (hardware detection). - Renamed "bli_arch.h" and "bli_arch_pre_macro_defs.h" in frame/include to "bli_arch_config.h" and "bli_arch_config_pre.h", respectively. commit 482af51add26d5ed103c3e3f167657f273b32c7a Author: Field G. Van Zee Date: Fri Oct 20 15:44:26 2017 -0500 Fixed 'make test' target from top-level Makefile. Details: - Updated the top-level Makefile's build rule for testsuite object files to properly obtain CFLAGS via get-frame-cflags-for() function instead of simply using the $(CFLAGS) variable (which is empty). This means that 'make test' should now work as expected. commit 3c269f700d207efe6c04193f09d519c88c1d4045 Author: Field G. Van Zee Date: Fri Oct 20 13:57:21 2017 -0500 Makefile updates for test drivers, testsuite. Details: - Fixed semi-broken testsuite Makefile and very-broken test driver Makefiles, as well as those for test/3m4m, test/thread_ranges, and test/exec_sizes sub-directories. - Factored out much of the top-level Makefile into common.mk. A Makefile needs only set DIST_PATH to the relative path to the top level of the BLIS source distribution before including common.mk in order to acquire all of the definitions typically needed in a Makefile that tests BLIS. commit 0557189d463446b4c32077cdcf0467fa71ca68dc Author: Field G. Van Zee Date: Wed Oct 18 15:05:27 2017 -0500 Minor updates to .travis.yml, configure script. commit 2553734d1d62043793f4e783a027349ef6d4d563 Merge: 453deb29 37534279 Author: Field G. Van Zee Date: Wed Oct 18 13:46:50 2017 -0500 Merge branch 'master' into rt commit 375342799cbae981c28d831793af588d7951f3f6 Author: Field G. Van Zee Date: Wed Oct 18 13:41:25 2017 -0500 Removed a duplicate bli_avx512_macros.h header. Details: - Removed a duplicate header file that was causing problems during installation for the 'knl' configuration. Thanks to Victor Eijkhout for reporting this issue. commit 453deb29068889698e274f269c9aa90eea99b527 Author: Field G. Van Zee Date: Wed Oct 18 13:29:32 2017 -0500 Implemented runtime kernel management. Details: - Reworked the build system around a configuration registry file, named config_registry', that identifies valid configuration targets, their constituent sub-configurations, and the kernel sets that are needed by those sub-configurations. The build system now facilitates the building of a single library that can contains kernels and cache/register blocksizes for multiple configurations (microarchitectures). Reference kernels are also built on a per-configuration basis. - Updated the Makefile to use new variables set by configure via the config.mk.in template, such as CONFIG_LIST, KERNEL_LIST, and KCONFIG_MAP, in determining which sub-configurations (CONFIG_LIST) and kernel sets (KERNEL_LIST) are included in the library, and which make_defs.mk files' CFLAGS (KCONFIG_MAP) are used when compiling kernels. - Reorganized 'kernels' directory into a "flat" structure. Renamed kernel functions into a standard format that includes the kernel set name (e.g. 'haswell'). Created a "bli_kernels_.h" file in each kernels sub-directory. These files exist to provide prototypes for the kernels present in those directories. - Reorganized reference kernels into a top-level 'ref_kernels' directory. This directory includes a new source file, bli_cntx_ref.c (compiled on a per-configuration basis), that defines the code needed to initialize a reference context and a context for induced methods for the microarchitecture in question. - Rewrote make_defs.mk files in each configuration so that the compiler variables (e.g. CFLAGS) are "stored" (renamed) on a per-configuration basis. - Modified bli_config.h.in template so that bli_config.h is generated with #defines for the config (family) name, the sub-configurations that are associated with the family, and the kernel sets needed by those sub-configurations. - Deprecated all kernel-related information in bli_kernel.h and transferred what remains to new header files named "bli_arch_.h", which are conditionally #included from a new header bli_arch.h. These files are still needed to set library-wide parameters such as custom malloc()/free() functions or SIMD alignment values. - Added bli_cntx_init_.c files to each configuration directory. The files contain a function, named the same as the file, that initializes a "native" context for a particular configuration (microarchitecture). The idea is that optimized kernels, if available, will be initialized into these contexts. Other fields will retain pointers to reference functions, which will be compiled on a per-configuration basis. These bli_cntx_init_*() functions will be called during the initialization of the global kernel structure. They are thought of as initializing for "native" execution, but they also form the basis for contexts that use induced methods. These functions are prototyped, along with their _ref() and _ind() brethren, by prototype-generating macros in bli_arch.h. - Added a new typedef enum in bli_type_defs.h to define an arch_t, which identifies the various sub-configurations. - Redesigned the global kernel structure (gks) around a 2D array of cntx_t structures (pointers to cntx_t, actually). The first dimension is indexed over arch_t and the inner dimension is the ind_t (induced method) for each microarchitecture. When a microarchitecture (configuration) is "registered" at init-time, the inner array for that configuration in the 2D array is initialized (and allocated, if it hasn't been already). The cntx_t slot for BLIS_NAT is initialized immediately and those for other induced method types are initialized and cached on-demand, as needed. At cntx_t registration, we also store function pointers to cntx_init functions that will initialize (a) "reference" contexts and (b) contexts for use with induced methods. We don't cache the full contexts for reference contexts since they are rarely needed. The functions that initialize these two kinds of contexts are generated automatically for each targeted sub-configuration from cpp-templatized code at compile-time. Induced method contexts that need "stage" adjustments can still obtain them via functions in bli_cntx_ind_stage.c. - Added new functions and functionality to bli_cntx.c, such as for setting the level-1f, level-1v, and packm kernels, and for converting a native context into one for executing an induced method. - Moved the checking of register/cache blocksize consistency from being cpp macros in bli_kernel_macro_defs.h to being runtime checks defined in bli_check.c and called from bli_gks_register_cntx() at the time that the global kernel structure's internal context is initialized for a given microarchitecture/configuration. - Deprecated all of the old per-operation bli_*_cntx.c files and removed the previous operation-level cntx_t_init()/_finalize() invocations. Instead, we now query the gks for a suitable context, usually via bli_gks_query_cntx(). - Deprecated support for the 3m2 and 3m3 induced methods. (They required hackery that I was no longer willing to support.) - Consolidated the 1e and 1r packm kernels for any given register blocksize into a single kernel that will branch on the schema and support packing to both formats. - Added the cntx_t* argument to all packm kernel signatures. - Deprecated the local function pointer array in all bli_packm_cxk*.c files and instead obtain the packm kernel from the cntx_t. - Added bli_calloc_intl(), which serves as the calloc-equivalent to to bli_malloc_intl(). Useful when we wish to allocate and initialize to zero/NULL. - Converted existing cpp macro functions defined in bli_blksz.h, bli_func.h, bli_cntx.h into static functions. commit 4607aac297e55ad540cbe5fffbe02e6b1889c181 Author: Nisanth M P Date: Mon Oct 16 22:06:57 2017 +0530 Thread Safety: Move bli_init() before and bli_finalize() after main() BLIS provides APIs to initialize and finalize its global context. One application thread can finalize BLIS, while other threads in the application are stil using BLIS. This issue can be solved by removing bli_finalize() from API. One way to do this is by getting bli_finalize() to execute by default after application exits from main(). GCC supports this behaviour with the help of __attribute__((destructor)) added to the function that need to be executed after main exits. Similarly bli_init() can be made to run before application enters main() so that application need not call it. Change-Id: I7ce6cfa28b384e92c0bdf772f3baea373fd9feac commit 0f5ce26fc597cda6e8ae93a7526f52eb8cba01e9 Author: Nisanth M P Date: Mon Oct 16 21:07:50 2017 +0530 Thread safety: Make the global induced method status array local to thread BLIS retains a global status array for induced methods, and provides APIs to modify this state during runtime. So, one application thread can modify the state, before another starts the corresponding BLIS operation. This patch solves this issue by making the induced method status array local to threads. Change-Id: Iff59b6f473771344054c010b4eda51b7aa4317fe commit b882648af87deb1b365fc6b3e94151e69c5ccfa4 Merge: 8b379069 e02d3cb8 Author: Field G. Van Zee Date: Wed Oct 11 16:32:21 2017 -0500 Merge branch 'master' into rt commit 06e0e6351acb9481225975ad9a4e0b8925336621 Author: sthangar Date: Thu Sep 28 12:15:36 2017 +0530 The inner loop paralleization is turned off by default, the JR and IR loop parameters are set to 1 by default Change-Id: I8c3c2ecbbd636259f6ffb92768ec04148205c3e5 commit e02d3cb84190a345ebe9b32f53db03a1838976b1 Author: Field G. Van Zee Date: Tue Sep 26 19:02:53 2017 -0500 Fixed a pthread typo in previous commit. Details: - Misnamed 'pthread_mutex_t' type in bli_memsys.c as 'thread_mutex_t'. commit f5962a1aae0fb3c9be104d0035c0d73210e7f670 Author: Field G. Van Zee Date: Tue Sep 26 17:00:04 2017 -0500 Fixed bugs in gemm/gemmtrsm ukr tests in testsuite. Details: - Fixed a bug in gemmtrsm test module that was due to improper partitioning into a k x k triangular matrix for the purposes of obtaining an mr x k micropanel of A with which to test. - Fixed a bug in gemm and gemmtrsm test modules that would only manifest for very large k (depending on the product of mr x kc on that architecture). The bug arose from the fact that the test module was triggering the allocation of blocks from the internal memory pools, which are limited in size. This allocation imposes an implicit assumption that the micro- panel being tested with will fit inside, and this assumption is violated for large values of k. Arbitrarily large k may now be tested for both operation tests. - Added OpenMP/pthread critical sections around the setting or getting of statuses from the induced method operation lookup table in bli_l3_ind.c. - Added the 'static' keyword to all pthread_mutex_t global variables in BLIS. - Thanks to Nisanth Padinharepatt of AMD for reporting the first and third issues. commit 8e917b256ca2d4bcdc059fe98d86be8775c69561 Author: Field G. Van Zee Date: Sat Sep 9 14:10:15 2017 -0500 Updated bibtex info for BLIS5 (3m4m) article. commit 7be887057358df4978a4833eeae0c17e15acd9d1 Author: Nisanth M P Date: Mon Aug 28 17:38:22 2017 +0530 Merging "Adding auto hardware detection for Zen" Change-Id: Id450fb0c4f91a5cd5cbdc06970f4f9ed28dd8520 commit e056d810d16621891ead032603de0c2105cfc0f7 Author: sthangar Date: Mon Aug 28 16:44:42 2017 +0530 Bug fix for the testsuite build failing Change-Id: I7cd8c9d187387c48b2564e45cbfb8df985e93d77 commit 83796b7caf745fafc263e9e5e1bfcf5eff00c025 Merge: 8176f4e4 d1ee7762 Author: Kiran Varaganti Date: Mon Aug 28 05:23:28 2017 -0400 Merge "Adding auto hardware detection for Zen" into amd-staging commit d1ee776202b26874333af7a91b6d2686342c4c81 Author: sthangar Date: Wed Aug 23 13:01:14 2017 +0530 Adding auto hardware detection for Zen Change-Id: I40ce6705dd66b35000c4ccddffad1c5b65998caf commit 8176f4e43872714b997f1a5f83056daadb0ff1a5 Merge: 12413018 adafe974 Author: praveeng Date: Mon Aug 28 12:21:16 2017 +0530 resolving conflicts bli_gemm_front.c and LICENCE Change-Id: Id24ce53896d4c1c7ceccc3e004014a0ecceb5474 commit 57e1e5cd51e7ffe8612c96a20b6a041b55426ddb Merge: f86ce54d d6ef56c6 Author: Nisanth M P Date: Tue Aug 22 17:07:44 2017 +0530 Merge AMD authored changes commit adafe974b4bc3fc0663bc2f6f4ce2fde71a97988 Merge: f86ce54d 7dc78b49 Author: Devin Matthews Date: Tue Aug 15 15:17:21 2017 -0500 Merge pull request #150 from devinamatthews/vzeroupper Add vzeroupper to Intel AVX kernels. commit 7dc78b49f97e6b3cd6d72fcdc588ace534d0e700 Author: Devin Matthews Date: Tue Aug 15 10:02:25 2017 -0500 Add vzeroupper to Intel AVX kernels. commit f86ce54d6f315006984534fe29e47a2deaacc9f5 Author: Field G. Van Zee Date: Thu Aug 10 16:24:28 2017 -0500 Removed trailing enum commas from bli_type_defs.h. Details: - Removed trailing commas from enums in bli_type_defs.h. Thanks to Erling Andersen for pointing out this inconsistency and suggesting the change. commit 60a1eeb2317939d732b9eb6ff1e0d6d668c9a1e5 Author: Field G. Van Zee Date: Sat Aug 5 13:04:31 2017 -0500 Added edge handling to _determine_blocksize_b(). Details: - Added explicit handling of situations where i == dim to bli_determine_blocksize_b_sub(). This isn't actually needed by any current use case within BLIS, but handling the situation is nonetheless prudent. Thanks to Minh Quan for reporting this issue and requesting the fix. commit b01c80829907d50ec79977fba8e7b53cfe7db80a Author: Field G. Van Zee Date: Fri Aug 4 14:17:44 2017 -0500 Fixed a minor bug in level-3 packm management. Details: - Fixed a bug in bli_l3_packm() that caused cntl_t-cached packed mem_t entries to be released and then re-acquired unnecessarily. (In essence, the "<" operands in the conditional that guards the release-and-reacquire code block simply needed to be swapped.) The bug should have only affected performance (rather than the computed result). Thanks to Minh Quan for identifying and reporting the bug. commit 8b379069fcd4811669855b1248ece831f190dff6 Merge: 1f3a5819 05925dd5 Author: Field G. Van Zee Date: Tue Aug 1 15:30:40 2017 -0500 Merge branch 'master' into rt commit 05925dd5d30e8f403bb671ce33029170d65ce7c0 Merge: 803bbef0 cecdc05d Author: Devin Matthews Date: Tue Aug 1 09:31:02 2017 -0500 Merge pull request #146 from devinamatthews/master Change lsame_ signature to match lapacke. commit cecdc05d2834786a84ff85775d3f99a958c0765a Author: Devin Matthews Date: Mon Jul 31 15:19:51 2017 -0500 Change lsame_ signature to match lapacke. commit 803bbef0a386dd0571ad389f69d55154dbfe3c50 Author: Field G. Van Zee Date: Sat Jul 29 20:17:05 2017 -0500 Fixed pthreads compile bug with previous commit. Details: - Erroneously passed family parameter into l3int_t function despite that function not taking the parameter. Oops. commit c63980f4ca750618f359031d0691289b1abf5146 Author: Field G. Van Zee Date: Sat Jul 29 14:53:39 2017 -0500 Moved 'family' field from cntx_t to cntl_t. Details: - Removed the family field inside the cntx_t struct and re-added it to the cntl_t struct. Updated all accessor functions/macros accordingly, as well as all consumers and intermediaries of the family parameter (such as bli_l3_thread_decorator(), bli_l3_direct(), and bli_l3_prune_*()). This change was motivated by the desire to keep the context limited, as much as possible, to information about the computing environment. (The family field, by contrast, is a descriptor about the operation being executed.) - Added additional functions to bli_blksz_*() API. - Added additional functions to bli_cntx_*() API. - Minor updates to bli_func.c, bli_mbool.c. - Removed 'obj' from bli_blksz_*() API names. - Removed 'obj' from bli_cntx_*() API names. - Removed 'obj' from bli_cntl_*(), bli_*_cntl_*() API names. Renamed routines that operate only on a single struct to contain the "_node" suffix to differentiate with those routines that operate on the entire tree. - Added enums for packm and unpackm kernels to bli_type_defs.h. - Removed BLIS_1F and BLIS_VF from bszid_t definition in bli_type_defs.h. They weren't being used and probably never will be. commit 07837395560d413a1ba828163b41186e21a7bcfe Merge: ca1d1d85 ad8610b4 Author: Field G. Van Zee Date: Fri Jul 21 16:49:48 2017 -0500 Merge pull request #139 from Maratyszcza/emscripten Fix Emscripten builds commit ad8610b4415cc7982804d74f9aba29875e9e2b6c Merge: 8772a0b3 ca1d1d85 Author: Field G. Van Zee Date: Fri Jul 21 15:18:33 2017 -0500 Merge branch 'master' into emscripten commit ca1d1d8560c9ab1a7e3b0ac43ac70d08075bf904 Merge: b537b5bb 733faf84 Author: Devin Matthews Date: Fri Jul 21 09:49:50 2017 -0500 Merge pull request #144 from devinamatthews/fix_atomics_on_bgq Add fallbacks to __sync_* or __c11_atomic_* builtins... commit 733faf848dcc54834fcdfbb0185dc644978d8864 Author: Devin Matthews Date: Thu Jul 20 14:50:13 2017 -0500 Clang can't make up it's mind what to support. commit 7425d0744d9e9cd29a887120e57c2b43ba287040 Author: Devin Matthews Date: Thu Jul 20 12:54:58 2017 -0500 Add default #define for __has_extension. commit b537b5bbe8cbee459a85bac11458498ae2bce4de Merge: 1f1ec0db 7f41bb0a Author: Devin Matthews Date: Thu Jul 20 10:58:39 2017 -0500 Merge pull request #133 from devinamatthews/haswell-packdim Fix prefetching in haswell ukernel commit 8823f91a14638ce6f4e45e67df03212bb61609d6 Author: Devin Matthews Date: Thu Jul 20 10:04:34 2017 -0500 Add fallbacks to __sync_* or __c11_atomic_* builtins when __atomic_* is not supported. Fixes #143. commit 1f1ec0db9380b87679d5c771c4594daa1cfc5f0d Author: Field G. Van Zee Date: Wed Jul 19 15:40:48 2017 -0500 Updated ar option list used by all configurations. Details: - Dropped 'u' from the list of modifiers passed into the library archiver ar. Previously, "cru" was used, while now we employ only "cr". This change was prompted by a warning observed on Ubuntu 16.04: ar: `u' modifier ignored since `D' is the default (see `U') This caused me to realize that the default mode causes timestamps to be zero, and thus the 'u' option, which causes only changed object files to be inserted, is not applicable. commit 5caaba2d61cbbc36d63102a0786ece28ff797f72 Author: Field G. Van Zee Date: Wed Jul 19 13:51:53 2017 -0500 Added --force-version=STRING option to configure. Details: - Added an option to configure that allows the user to force an arbitrary version string at configure-time. The help text also now describes the usage information. - Changed the way the version string is communicated to the Makefile. Previously, it was read into the VERSION variable from the 'version' file via $(shell cat ...). Now, the VERSION variable is instead set in config.mk (via a configure-substituted anchor from config.mk.in). commit 13175c5fb70fb6a378d5fff6ecede62e5ea6a1f6 Author: Field G. Van Zee Date: Tue Jul 18 17:56:00 2017 -0500 Updated openmp/pthread barriers with GNU atomics. Details: - Updated the non-tree openmp and pthreads barriers defined in bli_thrcomm_openmp.c and bli_thrcomm_pthreads.c to instead call a common implementation in bli_thrcomm.c, bli_thrcomm_barrier_atomic(). This new implementation goes through the same motions as the previous codes, but protects its loads and increments with GNU atomic built-ins. These atomic statements take memory ordering parameters that allow us to specify just enough constraints for the barrier to work as intended on weakly-ordered hardware. The prior implementation was only guaranteed to work on systems with strongly- ordered memory. (Thanks to Devin Matthews for suggesting this change and his crash-course in atomics and memory ordering.) - Removed 'volatile' from structs' barrier field declarations in bli_thrcomm_*.h. - Updated bli_thrcomm_pthread.? files to use renamed struct barrier fields consistent with that of the _openmp.? files. - Updated other bli_thrcomm_* files to rename "communicator" variables to simply "comm". commit 0e58ba1b3aa84700ca51a96f1c0eed6067562fba Author: Field G. Van Zee Date: Mon Jul 17 19:03:22 2017 -0500 Added API to set mt environment variables. Details: - Renamed bli_env_get_nway() -> bli_thread_get_env(). - Added bli_thread_set_env() to allow setting environment variables pertaining to multithreading, such as BLIS_JC_NT or BLIS_NUM_THREADS. - Added the following convenience wrapper routines: bli_thread_get_jc_nt() bli_thread_get_ic_nt() bli_thread_get_jr_nt() bli_thread_get_ir_nt() bli_thread_get_num_threads() bli_thread_set_jc_nt() bli_thread_set_ic_nt() bli_thread_set_jr_nt() bli_thread_set_ir_nt() bli_thread_set_num_threads() - Added #include "errno.h" to bli_system.h. - This commit addresses issue #140. - Thanks to Chris Goodyer for inspiring these updates. commit 8772a0b33a90154c80d88b381dcdd66f824e041f Author: Marat Dukhan Date: Thu Jul 13 21:39:24 2017 -0700 Fix Emscripten builds commit 72c8b49bb8d3b9370b2cc37718da22f065de9c57 Merge: 70cc825b ba7cada5 Author: Field G. Van Zee Date: Wed Jul 12 14:58:12 2017 -0500 Merge pull request #138 from hominhquan/membrk_set_free_fp Set missing free_fp in bli_membrk_init for free-ing GEN_USE buffers commit ba7cada51a238d320528e3504ed0f0a17a6b022a Author: Minh Quan HO Date: Fri Jul 7 10:52:05 2017 +0200 set missing free_fp in bli_membrk_init for free-ing GEN_USE buffers The membrk's free_fp is called when releasing GEN_USE buffers, but this free_fp is not set in bli_membrk_init commit 1241301869957c96f16a2c6567e3ad70afa547de Merge: 969b67e8 25ead66f Author: Kiran Varaganti Date: Wed Jul 5 02:24:00 2017 -0400 Merge "Reducing the framework overhead of GEMV routines" into amd-staging commit 25ead66fb78557f73af48bac305724d5d8aa3309 Author: sthangar Date: Fri Jun 30 12:23:19 2017 +0530 Reducing the framework overhead of GEMV routines Change-Id: I83607ad767bff74e305e915b54b0ea34ec3e5684 commit 969b67e8800fbd5d14a086606f3b5afbf66ed093 Author: Kiran Varaganti Date: Tue Jul 4 12:57:32 2017 +0530 Improved efficiency of dGEMM for large matrices by reducing TLB load misses and majorly L3 cache misses. This is achieved by changing the packed block sizes of matrix A & B. Now the optimum values are MC_D = 510 and KC_D = 1024. Change-Id: I2d8bdd5f62f2d1f8782ae2997f3d7a26587d1ca4 commit 70cc825b552dec05165b9d70f9e6eb33d8abb118 Author: Devin Matthews Date: Tue Jun 6 21:58:21 2017 -0500 Update LICENSE Remove totally unnecessary first 9 lines and hopefully get Github to recognize it as 3BSD [ci skip]. commit cf54c77bc79a0f33a514be72c80a654c4e6e6f63 Author: Devin Matthews Date: Tue Jun 6 20:23:17 2017 -0500 Add new SSI acknowledgment commit d6ef56c6dbaf6df8ee1af1ca6a0f0792a811396a Author: prangana Date: Thu Jun 1 16:11:09 2017 +0530 Update version number Change-Id: Ib6e52d1d34c0791367ab9152dfab31f94deedeb4 commit 897bfa0e92082c30bbb74229562d7d7327cbbac8 Author: prangana Date: Thu Jun 1 16:11:09 2017 +0530 Update version number Change-Id: Ib6e52d1d34c0791367ab9152dfab31f94deedeb4 commit 99d0ba5606d4b63e6a9c639aa78d4defc2455f79 Merge: be2c7eb8 6d17e012 Author: Santanu Thangaraj Date: Thu Jun 1 02:19:02 2017 -0400 Merge "Checked in the small matrix code to compute GEMM called with A transpose case" into amd-staging commit 6d17e0120fe5c127b941136ad2c0c08e91439535 Author: sthangar Date: Wed May 24 11:48:16 2017 +0530 Checked in the small matrix code to compute GEMM called with A transpose case Change-Id: I29f40046d43d7a4b037c1cb322503ee26495f462 commit 9d93f8481a1404695f7b78a3ced8ca47e890b649 Author: prangana Date: Tue May 30 09:58:10 2017 +0530 Update Licence File Change-Id: I4c5cf1690d0cef92a68400f9a89e454ab6856ad2 commit be2c7eb85168937bd4318f4d05ded37620119310 Author: prangana Date: Tue May 30 09:58:10 2017 +0530 Update Licence File Change-Id: I4c5cf1690d0cef92a68400f9a89e454ab6856ad2 commit 7f41bb0a0becde6a7de7df0f99668d7b4686c3b0 Author: Devin Matthews Date: Fri May 26 14:49:31 2017 -0400 PACKDIM_MR=8 didn't work out, but messing with the prefetching helps 2%. commit d87614af3f3d9187be94d6e77984b282bf890928 Author: Devin Matthews Date: Fri May 26 14:47:36 2017 -0400 Revert "Change PACKDIM_MR (double) for haswell to 8." This reverts commit 681eec913d7c2ebcff637cec5c1627ced9a92b99. commit 681eec913d7c2ebcff637cec5c1627ced9a92b99 Author: Devin Matthews Date: Fri May 26 12:28:09 2017 -0500 Change PACKDIM_MR (double) for haswell to 8. commit 0a3ae0ecaa0ddcb5887005d7051fa234499f1120 Merge: 0f4e6652 6e04f9df Author: praveeng Date: Sat May 20 16:53:50 2017 +0530 frame/3/gemm/bli_gemm_front.c Change-Id: I52a0fbc1d33bb948d430942323bbc5fe44e3ca13 commit 6e04f9df01d79c1b0e673943ca0d5d0a6095eb2e Author: Field G. Van Zee Date: Wed May 17 13:03:52 2017 -0500 Restored deleted lines from makefile fragments. commit ec5c0c0448275280dca0991f6f33afeb73650450 Author: Devin Matthews Date: Wed May 17 12:29:44 2017 -0500 Change to /bin/sh. All scripts checked with Debian's checkbashisms. Also check for clang first in auto-detect.sh. commit 555ddc30d4c7e44f3f335e436c98606f56e1598b Author: Devin Matthews Date: Wed May 17 12:27:14 2017 -0500 Remove shebangs from makefiles. commit f26bd7f42e0c2a47fe321b2c452644990b689654 Merge: cbf8710a 169fb05f Author: Devin Matthews Date: Wed May 17 11:58:41 2017 -0500 Merge pull request #128 from iotamudelta/master Portability and clang commit 169fb05f225c2f060265bcaa872f7f80dc638b70 Author: J M Dieterich Date: Tue May 16 23:11:22 2017 -0400 Fix if/else structure. Thanks to TravisCI. commit 0579dfea0bcfbb90ebc073fcf78b92a5cf7238e1 Author: J M Dieterich Date: Tue May 16 22:58:07 2017 -0400 Restore version. commit a75b05c23dc786a1fdc45dc1627a5ce2299f1a7b Author: J M Dieterich Date: Tue May 16 22:23:27 2017 -0400 Mark piledriver compilable w/ clang. commit 7541d46e2ba8659bb2e36b444edef112fefa1345 Author: J M Dieterich Date: Tue May 16 22:12:12 2017 -0400 Mark bulldozer compilable w/ clang. commit 91f897073ec0df3330ede449c4d6af8158266ae3 Author: J M Dieterich Date: Tue May 16 22:06:59 2017 -0400 Correct error message. commit f5131e1e49167f948bddd714bb1af1761829c212 Author: J M Dieterich Date: Tue May 16 22:03:23 2017 -0400 Indeed once can compile for carrizo also using clang. commit 5fa4e9439c04f35f89dd7d26ff742cb2dadc3180 Author: J M Dieterich Date: Tue May 16 21:50:49 2017 -0400 A bunch of shebang fixes from unportable /bin/bash to portable /usr/bin/env bash commit 1f3a58197e5d5f9ac862bda91e7527cbfbab5d76 Author: Field G. Van Zee Date: Mon May 8 16:10:03 2017 -0500 Housekeeping, induced method file/function renames. Details: - Renamed all level-3 induced method files to use the "_vir.c" suffix instead of "_ref.c". Also renamed functions within these files accordingly. - Renamed cpp macro definitions in frame/ind/include according to the above changes. - Removed frame/3/old. commit cbf8710a1ba63e25aadaa6fc5da51ea81b3d596d Merge: cf39d3ef fdc66f12 Author: Tyler Michael Smith Date: Mon May 8 11:21:20 2017 -0500 Merge pull request #127 from devinamatthews/fix_blis_nt_xx Setting any one of BLIS_NT_[IJ][CR] overrides BLIS_NUM_THEADS commit cf39d3ef3b29b8058c39fb4638c1a734fe64aaed Author: Field G. Van Zee Date: Fri May 5 15:06:56 2017 -0500 Fixed a bug in norm1v, norm1m. Details: - Fixed a bug that manifested as improperly-computed 1-norm for vectors and matrices. This is one of the few operations in BLIS that does not have its own test module within the testsuite, hence why it went undetected for so long. The bad 1-norms were being used to normalize matrices in the testsuite after initialization, which led to some matrices containing a combination of "large" and "small" values. This tended to push the residuals computed after each test away from zero. In some cases, they were off *just* enough to the testsuite to label it a "failure". Many thanks to Jeff Hammond for reporting this bug. (Wonky details: the bug was due to improperly-defined level-0 scalar macros for abval2, an operation that computes the absolute square, or complex magnitude/modulus. Certain complex domain instances of abval2 were being incorrectly defined in terms of real-only solutions, leading to bad results. This level-0 operation forms the basis of norm1v/norm1m. absq2 was also affected, but almost nothing uses this operation.) commit 799485124f4d823e908d2e5d38b0c3a1e6172ade Merge: 773a24ef 0df3541f Author: Devin Matthews Date: Thu May 4 10:52:09 2017 -0500 Merge pull request #121 from jeffhammond/not-real-knl allow KNL build without hbwmalloc (i.e. emulated) commit fdc66f12d40754ff46179804bff592fddafbca02 Author: Devin Matthews Date: Thu May 4 10:35:22 2017 -0500 Setting any one of BLIS_NT_[IJ][CR] overrides BLIS_NUM_THEADS. Missing BLIS_NT_XX's are defaulted to 1. Fixes #123. commit 773a24efb2fa1c3a220bf0ce1dd621a3176196da Merge: dd58c954 b8854259 Author: Field G. Van Zee Date: Wed May 3 15:07:59 2017 -0500 Merge branch 'master' of github.com:flame/blis commit dd58c9545c877c3f7553eaebca7b5e9720a66f5d Author: Field G. Van Zee Date: Wed May 3 15:04:51 2017 -0500 Disable complex 3m/4m in testsuite by default. Details: - Disabled testsuite tests of all level-3 implementations based on 3m and 4m. This will improve testing runtime on Travis CI as well as for anyone manually running the testsuite using default test parameters. Thanks to Devin Matthews for suggesting this change. commit 0df3541f54b7fe0c604ab2ec47ba814f12391798 Author: Jeff Hammond Date: Tue May 2 19:25:21 2017 -0700 allow KNL build without hbwmalloc.h (i.e. emulated) we want to be able to run BLIS KNL binaries on non-KNL machines via SDE. although it is possible to install hbwmalloc implementation on such systems, it is easier not to, since obviously the performance of SDE execution is not representative so there is no reason to emulate HBW allocation. commit b88542591d4dd0cde366e5ae35afd3205cb81bdc Merge: 43007f7b c2c91e09 Author: Field G. Van Zee Date: Tue May 2 19:22:41 2017 -0500 Merge pull request #107 from jeffhammond/intel-compilers-no-use-libm never use libm with Intel compilers commit 43007f7b65ec7926cbbfc39965ff733fa251c15f Author: Field G. Van Zee Date: Tue May 2 16:48:43 2017 -0500 Fixed stray parentheses in README citations. commit a4f1d0b8801c114e9ef8be39df01e1b8d27ebcb3 Author: Field G. Van Zee Date: Tue May 2 16:38:43 2017 -0500 CHANGELOG update (0.2.2) commit 940a707ac78de975110e17c95765e65b89aa5e10 (tag: 0.2.2) Author: Field G. Van Zee Date: Tue May 2 16:38:42 2017 -0500 Version file update (0.2.2) commit d5a5e003ea9b24bb6abf12e88862e8eb61ffb03d Author: Field G. Van Zee Date: Tue May 2 15:48:30 2017 -0500 Fixed a trsm1m bug that affected right-side cases. Details: - Fixed a bug introduced in 1c732d3 that affected trsm1m_r. The result was nondeterministic behavior (usually segmentation faults) for certain problem sizes beyond the 1m instance of kc (e.g. 128 on haswell). The cause of the bug was my commenting out lines in bli_gemm1m_ukr_ref.c which explicitly directed the virtual gemm micro-kernel to use temporary space if the storage preference of the [real domain] gemm ukernel did not match the storage of the output matrix C. In the context of gemm, this handling is not needed because agreement between the storage pref and the matrix is guaranteed by a high-level optimization in BLIS. However, this optimization is not applied to trsm because the storage of C is not necessarily the same as the storage of the micro-panels of B--both of which are updated by the micro-kernel during a trsm operation. Thus, the guarantee of storage/preference agreement is not in place for trsm, which means we must handle that case within the virtual gemm micro-kernel. - Comment updates and a minor macro change to bli_trsm*_cntx_init() for 3m1, 4m1a, and 1m. commit e80993e71f4d571e9650a8e90ed386e32059eae5 Merge: a509fbd5 ca3a7924 Author: Field G. Van Zee Date: Tue May 2 12:30:28 2017 -0500 Merge branch 'master' into 1m commit ca3a7924770d6cf203cce4ca9f5482e1d0d4e961 Author: Field G. Van Zee Date: Tue May 2 12:09:39 2017 -0500 README.md update. Details: - Updated bibtex entries for 4th BLIS paper, and adds entries for 5th and 6th BLIS papers. commit 0f4e6652dfe9b30105d3bab328ac26d9d5c11182 Merge: 42e7f6fb 6e7de6ef Author: praveeng Date: Wed Apr 19 17:54:10 2017 +0530 Merge master code till 2017_04_19 to amd-staging Change-Id: Ibebe83c8ea2e7eb15798c2bcf214b7228a1c9518 commit 42e7f6fb2a531429ee600b2fe0293b67371c7ccb Author: sthangar Date: Tue Mar 28 18:10:03 2017 +0530 fixed license attribute issues in AMD added files Change-Id: I303f870a777c7cd1c1af29ea0b93f3e0a27948e4 commit 5600001e973c6cea048bd3fdb28117f1d7c98b9d Merge: 0b190293 b3ed4933 Author: prangana Date: Mon Mar 20 13:56:33 2017 +0530 Fix merge conflicts after sync with release branch Change-Id: Icf14a09f728befb69a73fff9fa79c4128e728310 commit 6e7de6ef84babb273dc5528a9b9d01f0febe394b Author: Field G. Van Zee Date: Fri Mar 17 12:10:24 2017 -0500 Minor updates to test/3m4m. Details: - Updated initial problem size and increment in Makefile. - Updated code in test_gemm.c to correctly query kc from context. commit f484c6cd4389dc7ae5b972849e12e98ad5bbf9a4 Author: Field G. Van Zee Date: Fri Mar 17 12:07:27 2017 -0500 Whitespace reformatting to armv8a kernels file. Details: - Updated formatting of function signature/header in kernels/armv8a/3/bli_gemm_opt_4x4.c. commit 0b19029342ffc530fa22ef20398a26221cb8f6ec Author: Kiran Varaganti Date: Tue Mar 14 14:51:31 2017 +0530 Code cleanup, removed warnings from trsm, removed unused routines in axpyv & scalv Change-Id: I02867f394c5f416194c4b1769a6c75f39243ec81 commit 825363bd2a5a60a923d4a6d9691dc143845a9cab Merge: 093bdb80 513944e4 Author: praveeng Date: Wed Mar 8 15:42:49 2017 +0530 Merge code from master to amd-staging as on 2017_03_08 by praveeng Change-Id: I80740081b2cb54c9b77a3e78b9fe540e170be23d commit 093bdb80c86b06367e595aa17487139ae983822f Author: sthangar Date: Tue Mar 7 13:35:50 2017 +0530 Checked in Unpacked DGEMM code Change-Id: I39dcc7b238b328f73ee2675d21a5e521d0488723 commit 33923da9a108854590d386e74b6ee66b971e7796 Author: Kiran Varaganti Date: Mon Mar 6 14:31:31 2017 +0530 Added variant 10 for double precision axpyv microkernel Change-Id: I7a20cc113a422603250bc450825c965136354974 commit bc828f7f8e3ddb9f58af07edc0b935b21759fb0f Author: Kiran Varaganti Date: Fri Mar 3 14:45:35 2017 +0530 Added new axpyv (single precision) microkernel where it performs 10 FMAs per loop- This gives better performance than all other implementations of axpyv Change-Id: Ic4f0e4c67e367d67d0b24febcf34f81a70a39972 commit c9949f4603419267c10973adf1d63ec38497475d Author: sthangar Date: Fri Feb 17 14:16:33 2017 +0530 Checked in DGEMMTRSM and edge case handling routine in DDOTXF Change-Id: I65f00661af6c09b2507294fd43e0a10641c0597e commit a509fbd5ac04fafd4e51b43d2f59ca56432dc212 Merge: 69b4846a 513944e4 Author: Field G. Van Zee Date: Tue Feb 21 17:06:16 2017 -0600 Merge branch 'master' into 1m commit 69b4846ae9adb157c4171b52e159684db2867853 Author: Field G. Van Zee Date: Tue Feb 21 15:33:39 2017 -0600 Disabled experiment-related 1m code. Details: - Commented out code in frame/ind/oapi/bli_l3_3m4m1m_oapi.c that was specifically inserted to facilitate the benchmarking of 1m block-panel and panel-block algorithms. - Updates to test/3m4m/Makefile, runme.sh script, and test_gemm.c to reflect changes used/needed during benchmarking. commit 513944e4a951d8823b4de161b86ad7a965b4d99b Merge: 8b462a0e 0e18f68c Author: Devin Matthews Date: Mon Feb 20 10:04:33 2017 -0500 Merge pull request #118 from devinamatthews/master Handle k=0 correctly in KNL dgemm ukernel. commit 0e18f68cf12eb9189ba901a20040b1cdae417670 Author: Devin Matthews Date: Mon Feb 20 09:03:21 2017 -0600 Handle k=0 correctly in KNL dgemm ukernel. commit 8b462a0e8c3e9252f0401940849e53cc772256fa Merge: c362afc5 7d42fc07 Author: Devin Matthews Date: Sun Feb 19 23:03:03 2017 -0500 Merge pull request #117 from devinamatthews/master Cast dim_t and inc_t parameters to 64-bit in KNL microkernels. commit 7d42fc0796ef0c010375fd8e59b1240ba41ce4d2 Author: Devin Matthews Date: Sun Feb 19 21:10:55 2017 -0500 Cast dim_t and inc_t parameters to 64-bit in KNL microkernels. commit 04245c9ff7f8b3c70d61003029c964bb9a4320ee Author: Kiran Varaganti Date: Fri Feb 10 14:24:30 2017 +0530 Reoptimized scalv routines - two vector multiplies are done per iteration, and these routines are enabled in bli_kernel.h Change-Id: Ic5654508573d1f6bde2edef06aefe117e581feb5 commit c362afc525bab4050581d1b0fcea2fe4d582c608 Author: Field G. Van Zee Date: Thu Feb 9 11:54:59 2017 -0600 Added missing "level-0" BLAS [sd]cabs1_(). Details: - Fixed issue #115 by adding implementations for scabs1_() and dcabs1_() to the BLAS compatibility layer. Thanks to heroxbd for pointing out their absence. commit 018180c938c32efbeaaf626ba71ec5b780664db1 Author: Field G. Van Zee Date: Wed Feb 8 11:20:52 2017 -0600 Fixed a minor bug in configure (issue #114). Details: - Fixed a bug in the configure script whereby a non-preferred value for --enable-threading would cause problems in common.mk vis-a-vis detecting which threading model was chosen. Thanks to heroxbd for reporting this issue. commit 58b5b77e5fdb179ea465e398e416e6a00d917e05 Author: Kiran Varaganti Date: Wed Feb 8 21:43:34 2017 +0530 Fixed a bug in axpyv, the arguments passed to intrinsic fmad instruction are corrected Change-Id: If12f24c6bc74b22ac9e4acd6b9378e06d79f2f5e commit 85de4ebf74d0a5587d5a12724eb5489d51674db3 Author: Kiran Varaganti Date: Wed Feb 8 14:41:04 2017 +0530 variant 4 axpyv single precision modified: explicitly used FMA intrinsics, replaced vector multiply and add operations Change-Id: I975feef56696d479d2b9e9441b0660021cf4f6ff commit 3fa53e8af31d634779f40258c51483ae8af494fa Merge: b5291a44 95be7b04 Author: Kiran Varaganti Date: Wed Feb 8 11:46:34 2017 +0530 Merged axpyv and gemm small in bli_kernel.h Merge branch 'amd-staging' of ssh://git.amd.com:29418/cpulibraries/er/blis into amd-staging modified: config/zen/bli_kernel.h modified: frame/3/gemm/bli_gemm_front.c modified: kernels/x86_64/zen/3/bli_gemm_small_matrix.c Change-Id: If181cf9345178c448b3530beb8bef453917fe295 commit 95be7b04709e688a4cb01fba680081e30f4258ef Author: sthangar Date: Tue Feb 7 14:01:27 2017 +0530 Added logic for packing matrix A and prefetching matrix C in Unpacked SGEMM code Change-Id: I99efeca9eb5b4449286ec0ec133fd554ef1bb4f0 commit b5291a445b1313e01f1e0e8102c5f3660ab07f69 Author: Kiran Varaganti Date: Tue Feb 7 12:39:31 2017 +0530 Added optimization variant 4 for axpyv single precision - this performs 5 FMA per loop, keeping the IPC always full Change-Id: Ie77ed22584271136a257e673bcd3b1ba71136bc9 commit f4bfc1662af82aa4b98185334c44835e51f1cbec Author: Kiran Varaganti Date: Mon Feb 6 15:04:27 2017 +0530 New routines implemented for axpyv to improve performance for small vector sizes, vectorization is done for vectors as small as 8 (single precision) 4(double precision), since this operation has low compute to memory ratio, higher matrix sizes memory operations are dominating and hence not much gain - This still needs some work- added saxpyv and daxpyv var 3 routines in the file bli_axpyv_opt_var1.c Change-Id: Ic1b33bd5516e10113b00e44ab41b97eb19d46072 commit ddf45e71770c55ea4a58ca24ea4913fe5d8beb9b Merge: a6ab91bc 78e1b16e Author: Devin Matthews Date: Fri Jan 27 14:25:40 2017 -0600 Merge pull request #113 from devinamatthews/knl_thread_params Change default threading parameters for KNL. commit 78e1b16e16d589ed31b2e712115ee282097f114d Author: Devin Matthews Date: Fri Jan 27 14:22:20 2017 -0600 Change default threading parameters for KNL. commit 574472ba5a89924eca7dbd10055d0e1dcd7f4c71 Author: sthangar Date: Tue Jan 10 14:51:46 2017 +0530 checked in unpacked SGEMM optimization Change-Id: I8e4ea374415c0c402c660b656fb076af15354181 commit 1c732d3ddc4ac0861d3b0e0dd15eb7e071615502 Author: Field G. Van Zee Date: Wed Jan 25 16:25:46 2017 -0600 Added 1m-specific APIs for bp, pb gemm algorithms. Details: - Defined bli_gemmbp_cntl_create(), bli_gemmpb_cntl_create(), with the body of bli_gemm_cntl_create() replaced with a call to the former. - Defined bli_cntl_free_w_thrinfo(), bli_cntl_free_wo_thrinfo(). Now, bli_cntl_free() can check if the thread parameter is NULL, and if so, call the latter, and otherwise call the former. - Defined bli_gemm1mbp_cntx_init(), bli_gemm1mpb_cntx_init(), both in terms of bli_gemm1mxx_cntx_init(), which behaves the same as bli_gemm1m_cntx_init() did before, except that an extra bool parameter (is_pb) is used to support both bp and pb algorithms (including to support the anti-preference field described below). - Added support for "anti-preference" in context. The anti_pref field, when true, will toggle the boolean return value of routines such as bli_cntx_l3_ukr_eff_prefers_storage_of(), which has the net effect of causing BLIS to transpose the operation to achieve disagreement (rather than agreement) between the storage of C and the micro-kernel output preference. This disagreement is needed for panel-block implementations, since they induce a transposition of the suboperation immediately before the macro-kernel is called, which changes the apparent storage of C. For now, anti-preference is used only with the pb algorithm for 1m (and not with any other non-1m implementation). - Defined new functions, bli_cntx_l3_ukr_eff_prefers_storage_of() bli_cntx_l3_ukr_eff_dislikes_storage_of() bli_cntx_l3_nat_ukr_eff_prefers_storage_of() bli_cntx_l3_nat_ukr_eff_dislikes_storage_of() which are identical to their non-"eff" (effectively) counterparts except that they take the anti-preference field of the context into account. - Explicitly initialize the anti-pref field to FALSE in bli_gks_cntx_set_l3_nat_ukr_prefs(). - Added bli_gemm_ker_var1.c, which implements a panel-block macro-kernel in terms of the existing block-panel macro-kernel _ker_var2(). This technique requires inducing transposes on all operands and swapping the A and B. - Changed bli_obj_induce_trans() macro so that pack-related fields are also changed to reflect the induced transposition. - Added a temporary hack to bli_l3_3m4m1m_oapi.c that allows us to easily specify the 1m algorithm (block-panel or panel-block). - Renamed the following cntx_t-related macros: bli_cntx_get_pack_schema_a() -> bli_cntx_get_pack_schema_a_block() bli_cntx_get_pack_schema_b() -> bli_cntx_get_pack_schema_b_panel() bli_cntx_get_pack_schema_c() -> bli_cntx_get_pack_schema_c_panel() and updated all instantiations. Also updated the field names in the cntx_t struct. - Comment updates. commit 41595e98eedaf3f1f93802c14dcae490402f933f Merge: d625c49e a6ab91bc Author: praveeng Date: Wed Dec 7 15:13:21 2016 +0530 Merge master code as on 2016_12_07 to amd-staging Change-Id: I5d9ecef9bff960aeb9b51ca4e4b21714e789e44f commit d625c49e20bd3c50d6d44e330e34076cced114a3 Author: sthangar Date: Tue Nov 29 15:05:19 2016 +0530 checked-in SGEMMTRSM microkernel for Zen Change-Id: Ib61936418dea911b2154aa99f703b66e9669f94f commit a6ab91bc61432490fadf18d596de4589645f37dd Merge: 145a551d 7f31a630 Author: Field G. Van Zee Date: Wed Nov 30 09:26:58 2016 -0600 Merge pull request #111 from figual/master Fixed missing cntx argument in ARMv8 microkernels. commit 7f31a6307b7bd35f913c895947552c3a176f789b Author: Francisco Igual Date: Sun Nov 27 14:40:47 2016 +0100 Fixed missing cntx argument in ARMv8 microkernels. commit 126482a3b609b9ad7026ba348f6c4bf6a29be8a1 Author: Field G. Van Zee Date: Fri Nov 25 18:29:49 2016 -0600 Implemented the 1m method. Details: - Implemented the 1m method for inducing complex domain matrix multiplication. 1m support has been added to all level-3 operations, including trsm, and is now the default induced method when native complex domain gemm microkernels are omitted from the configuration. - Updated _cntx_init() operations to take a datatype parameter. This was needed for the corresponding function for 1m (because 1m requires us to choose between column-oriented or row-oriented execution, which requires us to query the context for the storage preference of the gemm microkernel, which requires knowing the datatype) but I decided that it made sense for consistency to add the parameter to all other cntx initialization functions as well, even though those functions don't use the parameter. - Updated bli_cntx_set_blkszs() and bli_gks_cntx_set_blkszs() to take a second scalar for each blocksize entry. The semantic meaning of the two scalars now is that the first will scale the default blocksize while the second will scale the maximum blocksize. This allows scaling the two independently, and was needed to support 1m, which requires scaling for a register blocksize but not the register storage blocksize (ie: "packdim") analogue. - Deprecated bli_blksz_reduce_dt_to() and defined two new functions, bli_blksz_reduce_def_to() and bli_blksz_reduce_max_to(), for reducing default and maximum blocksizes to some desired blocksize multiple. These functions are needed in the updated definitions of bli_cntx_set_blkszs() and bli_gks_cntx_set_blkszs(). - Added support for the 1e and 1r packing schemas to packm, including 1e/1r packing kernels. - Added a minor optimization to bli_gemm_ker_var2() that allows, under certain circumstances (specifically, real domain beta and row- or column-stored matrix C), the real domain macrokernel and microkernel to be called directly, rather than using the virtual microkernel via the complex domain macrokernel, which carries a slight additional amount of overhead. - Added 1m support to the testsuite. - Added 1m support to Makefile and runme.sh in test/3m4m. Also simplified some code in test_gemm.c driver. commit d8f13beeea90338e0ecb0a3aeaa2d59d8ebd6c36 Merge: c25a9205 145a551d Author: praveeng Date: Fri Nov 25 17:31:08 2016 +0530 Merge master code till 2016_11_25 to amd-staging commit c25a9205fd8c8d8de7fd81b1e5621e7ac79f4e87 Merge: 65298762 bdc0a264 Author: praveeng Date: Fri Nov 25 17:06:36 2016 +0530 Merge master code till Switched to simpler trsm_r 2016_11_25 to amd-staging Change-Id: Ibf71d224d8fb6cf0bc497f84d50c27d276512cc1 commit 145a551d524ae5492667a05fc248923d922df850 Author: Field G. Van Zee Date: Wed Nov 23 17:59:06 2016 -0600 Switched to simpler trsm_r implementation. Details: - Disabled the implementation of trsm_r that allows the right-hand matrix B to be trianglar, and switched to the implementation that simply transposes the operation (and thus the storage of C) in order to recast the operation as trsm_l. This avoids the need to use trsm_rl and trsm_ru macrokernels, which require an awkward swapping of MR and NR. For now, the support for trsm_r macrokernels, via separate control trees, remains. - Modified bli_config_macro_defs.h so that BLIS_RELAX_MCNR_NCMR_CONSTRAINTS is defined by default. This is mostly a safety precaution in case someone tries to switch back to the previous trsm_r implementation, but also serves as a convenience on some systems where one does not naturally choose blocksizes in a way that satisfies MC % NR = 0 and NC % MR = 0. commit b3e58ee30307cf1e11529f2113acb9abbeda25af Author: Field G. Van Zee Date: Wed Nov 23 17:58:26 2016 -0600 Reimplemented 4x12 haswell ukernels (real only). Details: - Replaced permutation-based implementations in bli_gemm_asm_d4x12.c, which defines 4x24 single real and 4x12 double real gemm microkernels, with broadcast-based implementations. (The previous microkernel file has been moved to an 'old' subdirectory.) commit 65298762ff15c45e8588e0c279a9feaa98c927a0 Author: sthangar Date: Tue Nov 22 12:15:33 2016 +0530 removed a redundant copy operation in DNRM2 Change-Id: I673b08efde4480e871779716f7715566740ad9ce commit d6863e851adeef037e4d1476fe63bb293fb9d987 Author: sthangar Date: Mon Nov 21 11:30:30 2016 +0530 checked-in DNRM2 optimizations Change-Id: I3b31d768bd7f4fbf43042aa5a0762995c73c4522 commit bdc0a264d2fb5940bfd09298b1de823674a39053 Author: Field G. Van Zee Date: Wed Nov 16 14:13:08 2016 -0600 Adjusted stride selection of ct in macrokernels. Details: - Updated the changes introduced in 618f433 so that the strides of the temporary microtile ct used in the macrokernels is determined based on the storage preference of the microkernel (via the new functions below), rather than the strides of c. In almost all cases, presently, this change results in no net effect, as a high-level optimization in the _front() functions aligns the storage of c to that of the microkernel's preference. However, I encountered some cases where this is not always the case in some development code that has yet to be committed, and therefore I'm generalizing the framework code in advance. - Defined two new functions in bli_cntx.c: bli_cntx_l3_ukr_prefers_rows_dt() bli_cntx_l3_ukr_prefers_cols_dt() which return bool_t's based on the current micro-kernel's storage preferences. For induced methods, the preference of the underlying real domain microkernel is returned. - Updated definition of bli_cntx_l3_ukr_dislikes_storage_of(), and by proxy bli_cntx_l3_ukr_prefers_storage_of(), to be in terms of the above functions, rather than querying the preferences of the native microkernel directly (which did the wrong thing for induced methods). commit 031978d2647cf08316858baf29c84ebba9c3133e Author: Field G. Van Zee Date: Wed Nov 16 14:04:33 2016 -0600 Fixed inactive trsm_r blocksize constraint code. Details: - Changed a cpp macro that was meant to prevent using certain trsm_r code if BLIS_RELAX_MCNR_NCMR_CONSTRAINTS was defined. It was actually coded incorrectly at first. I've now fixed its location and changed its consequence to a compile-time #error message. commit 9772218cae57d55c252595b01e3669d8bed84944 Author: sthangar Date: Wed Nov 16 15:19:19 2016 +0530 Added optimized DAMAX routines for Zen Change-Id: I499c0c8f0f4ce6c19235c47b86d5608db6ba50f8 commit 9c448e30174e5eb76a94b43b30819704a5dfcb3f Merge: 998d8240 e35d3c23 Author: Santanu Thangaraj Date: Wed Nov 16 04:18:57 2016 -0500 Merge "Added new optimized micro-kernel for dotxv routine" into amd-staging commit 998d824044adac0d54c921dcd44fb58f3d54aad2 Merge: 0d13e9a4 6b5a4032 Author: praveeng Date: Wed Nov 16 14:22:42 2016 +0530 Merge master code till devinamatthews/omp_num_thrds 2016_11_16 to amd-staging Change-Id: I601ff1d3ec8a680e1be039ffc7b299744e8a27c5 commit 6b5a4032d2e3ed29a272c7f738b7e3ed6657e556 Merge: 3b524a08 a8220e3a Author: Field G. Van Zee Date: Thu Nov 10 15:28:24 2016 -0600 Merge pull request #109 from devinamatthews/omp_num_threads Add automatic loop thread assignment. commit a8220e3a86433b5d76789e32ea7ca014a11b6d17 Author: Devin Matthews Date: Thu Nov 10 14:19:34 2016 -0600 - Fix typo in bli_cntx.c - Bump BLIS_DEFAULT_NR_THREAD_MAX to 4 commit e35d3c23f28784e50ee13d2e77a69d60e0c24c1f Author: Kiran Varaganti Date: Thu Nov 10 14:30:53 2016 +0530 Added new optimized micro-kernel for dotxv routine Change-Id: I2c544e9b25a454d971ad690353502a55cd668391 commit 0d13e9a4f6f2fcda08f205215240cdf86442d6c6 Merge: e044fa62 3b524a08 Author: praveeng Date: Mon Nov 7 14:40:41 2016 +0530 bli_kernel.h Change-Id: I425d089f79497a0de7d1622e829c3ca9edf7f091 commit c05b3862f6241486442b313eff0c8bee7b5e1274 Author: Devin Matthews Date: Fri Nov 4 15:48:02 2016 -0500 Add automatic loop thread assignment. - Number of threads is determined by BLIS_NUM_THREADS or OMP_NUM_THREADS, but can be overridden by BLIS_XX_NT as before. - Threads are assigned to loops (ic, jc, ir, and jc) automatically by weighted partitioning and heuristics, both of which are tunable via bli_kernel.h. - All level-3 BLAS covered. commit 3b524a08e3fb8380e7b8b2ba835312c51a331570 Author: Field G. Van Zee Date: Wed Nov 2 17:45:18 2016 -0500 Consolidated 3m1/4m1 gemmtrsm, trsm ukernel code. Details: - Consolidated the macros that define the lower and upper versions of the gemmtrsm microkernels into a single macro that is instantiated twice. Did this for both 3m1 and 4m1 microkernels. - Consolidated lower and upper versions of the trsm microkernels for 3m1 and 4m1 into single files (each). commit ead231aca635deb3db270f118454e4222c627f31 Merge: d25e6f8b 62987f60 Author: Field G. Van Zee Date: Wed Nov 2 13:03:50 2016 -0500 Merge pull request #108 from devinamatthews/patch-2 Update .travis.yml with additional tests commit 62987f60a6a6ff0a75b31d0404f493593ce35ccc Author: Devin Matthews Date: Wed Nov 2 11:20:37 2016 -0500 Allow KNL to fail commit 8f9010542c751ae3cbfe6121cb011d8985c1e00d Author: Devin Matthews Date: Wed Nov 2 11:18:32 2016 -0500 Fix some problems with OSX builds: - Update CPU detection for Intel archs (esp. Skylake) - Allow clang for the reference config commit d25e6f8b63c57f30b8a67dffbf4995977cf9f235 Author: Field G. Van Zee Date: Tue Nov 1 14:35:15 2016 -0500 Can disable trsm_r-specific blocksize constraints. Details: - Added cpp guards around the constraints in bli_kernel_macro_defs.h that enforce MC % NR = 0 and NC % MR = 0. These constraints are ONLY needed when handling right-side trsm by allowing the matrix on the right (matrix B) to be triangular, because it involves swapping register, but not cache, blocksizes (packing A by NR and B by MR) and then swapping the operands to gemmtrsm just before that kernel is called. It may be useful to disable these constraints if, for example, the developer wishes to test the configuration with a different set of cache blocksizes where only MC % MR = 0 and NC % NR = 0 are enforced. - In summary, #defining BLIS_RELAX_MCNR_NCMR_CONSTRAINTS will bypass the enforcement of MC % NR = 0 and NC % MR = 0. commit 1a67e3688edb073a9d44c160e7b0798e08796b8a Author: Devin Matthews Date: Tue Nov 1 13:53:18 2016 -0500 Bogus commit Need to trigger another Travis build. commit 2cd82d67b372cad1bed50cfd99e524f1f40b4e24 Author: Devin Matthews Date: Tue Nov 1 13:25:50 2016 -0500 Some fixes for .travis.yml - Switch to gcc-5 to support knl - Don't run tests in parallel -- it is super slow. - Use clang on OSX since gcc is only a zombie husk. commit a3db4e6bdfe745083acf704ab0f51f74ea869538 Author: Devin Matthews Date: Tue Nov 1 10:33:18 2016 -0500 Update .travis.yml with additional tests - Test knl configuration (without running of course). - Test openmp and pthreads threading for auto configuration with 4 threads. - Test auto configuration with and without pthreads on OSX. - Also, run make in parallel. I don't know how the `addons:` section works on OSX; hopefully it is just ignored. commit 8a11a2174a1a5b9426f13bbc5338dc86ab138cdd Author: Field G. Van Zee Date: Mon Oct 31 19:07:55 2016 -0500 Updates to non-default haswell microkernels. Details: - Updated s and d microkernels in bli_gemm_asm_d8x6.c to relax alignment constraints. - Added missing c and z microkernels, which are based on the corresponding kernels in the d6x8 set. - This completes the d8x6 set (which may be used for situations when it is desirable to have a microkernel with a column preference). commit 618f4331eba209803ecab99747872eceb1b5f091 Author: Field G. Van Zee Date: Mon Oct 31 14:40:51 2016 -0500 Align strides of ct in macrokernels to that of c. Details: - Previously, rs_ct and cs_ct, the strides of the temporary microtile used primarily in the macrokernels' edge case handling, were unconditionally set to 1 and MR, respectively. However, Devin Matthews noted that this ought to be changed so that the strides of ct were in agreement with the strides of C. (That is, if C was row-stored, then ct should be accessed as by rows as well.) The implicit assumption is that the strides of C have already been adjusted, via induced transposition, if the storage preference of the microkernel is at odds with the storage of C. So, if the microkernel prefers row storage, the macrokernel's interior cases would present row-stored (ideal) microkernel subproblems to the microkernel, but for edge cases, it would still see column-stored subproblems (not ideal). This commit fixes this issue. Thanks to Devin for his suggestion. commit c2c91e09b4893cb81314774557f728a95080f81e Author: Jeff Hammond Date: Tue Oct 25 21:15:26 2016 -0700 never use libm with Intel compilers Intel compilers include a highly optimized math library (libimf) that should be used instead of GNU libm. yes, this change is for ALL targets, including those that are not supported by the Intel compiler. there is no harm in doing this, and it is future-proof in the event that the Intel compilers support other architectures. commit 630391002325a589063aec2ab0a7d89ef2e178c0 Merge: 956b3edf 216206c1 Author: Field G. Van Zee Date: Tue Oct 25 19:34:51 2016 -0500 Merge pull request #105 from devinamatthews/knl Support for Intel Knight's Landing. commit 216206c1d328a865c2192e35a4df6e9aff79a85b Author: Devin Matthews Date: Tue Oct 25 13:56:18 2016 -0500 Fix up for merge to master. commit 11eb7957abbcdf02d5e312898e094260eadb1209 Merge: cd5b6681 956b3edf Author: Devin Matthews Date: Tue Oct 25 13:51:07 2016 -0500 Merge branch 'master' into knl # Conflicts: # frame/thread/bli_thread.h commit cd5b6681838899283cd94e5427dfda206e7fbabe Author: Devin Matthews Date: Tue Oct 25 13:49:27 2016 -0500 Don't use %rbp in KNL packing kernels. commit 956b3edf8eb09480f31f2e861c1b10f9ecbb2e52 Merge: b7e41d71 0662a3c1 Author: Field G. Van Zee Date: Tue Oct 25 13:02:57 2016 -0500 Merge pull request #104 from devinamatthews/misspellings Add flexible options for thread model (pthread/posix for pthreads etc.). commit 0662a3c1b1f4644a86bf8e5073d1391808c91b4a Author: Devin Matthews Date: Tue Oct 25 12:42:44 2016 -0500 Add flexible options for thread model (pthread/posix for pthreads etc.). commit e044fa624008c161de32a39d734cddf1dd22dd41 Author: Kiran Varaganti Date: Tue Oct 25 13:03:05 2016 +0530 Changed double precision trsm kernel macro definition to bli_dtrsm_l_int_6x8 from 6x16 : it fixes the seg fault Change-Id: Ia8c1de5fe13a370d691570a50136d55ffb18908a commit b3ed4933aa0da72ad771fb0fdf1727e5ba9ad7b4 Author: Kiran Varaganti Date: Tue Oct 25 13:03:05 2016 +0530 Changed double precision trsm kernel macro definition to bli_dtrsm_l_int_6x8 from 6x16 : it fixes the seg fault Change-Id: Ia8c1de5fe13a370d691570a50136d55ffb18908a commit b7e41d71b07d2af6d22d632c70e0c5f7ce46852c Merge: 4bd905bd 5117d444 Author: Field G. Van Zee Date: Mon Oct 24 16:47:46 2016 -0500 Merge pull request #103 from devinamatthews/patch-1 Change .align to .p2align in Bulldozer ukernels. commit 5117d444f7f3a2bc327f067926eaf2398212edda Author: Devin Matthews Date: Mon Oct 24 16:20:47 2016 -0500 Change .align to .p2align in Bulldozer ukernels Apparently OSX doesn't allow .align directives for >16B, so I've changed these to their .p2align counterparts. commit 4bd905bd4597e0ad7bedf31e25e779d3e2dfda29 Merge: 936d5fdc 7f32dd57 Author: Field G. Van Zee Date: Fri Oct 21 14:48:44 2016 -0500 Merge pull request #93 from ShadenSmith/config_check Adds sanity check to configuration choice. commit 936d5fdc26c6c4dab199a8d11fde948975cfa1d6 Author: Field G. Van Zee Date: Fri Oct 21 14:34:27 2016 -0500 Fixed multithreading compilation bug in 970745a. Details: - Moved the definition of the cpp macro BLIS_ENABLE_MULTITHREADING from bli_thread.h to bli_config_macro_defs.h. Also moved the sanity check that OpenMP and POSIX threads are not both enabled. - Thanks to Krzysztof Drewniak for reporting this bug. commit d250e6a3af3af8beedcda28f508ac03e94efb3c8 Author: Kiran Varaganti Date: Thu Oct 20 14:34:39 2016 +0530 Merged TRSM and scalv routines into zen folder Change-Id: Ice897bc83e8fb70b90f23cc3ce892c39883aceb9 commit 8feb0f85a674e84bec2417486e3bcea584b14c04 Author: Field G. Van Zee Date: Wed Oct 19 16:05:41 2016 -0500 Removed auto-prototyping of malloc()/free() substitutes. Details: - Removed the header file, bli_malloc_prototypes.h, which automatically generated prototypes for the functions specified by the following cpp macros: BLIS_MALLOC_INTL BLIS_FREE_INTL BLIS_MALLOC_POOL BLIS_FREE_POOL BLIS_MALLOC_USER BLIS_FREE_USER These prototypes were originally provided primarily as a convenience to those developers who specified their own malloc()/free() substitutes for one or more of the following. However, we generated these prototypes regardless, even when the default values (malloc and free) of the macros above were used. A problem arose under certain circumstances (e.g., gcc in C++ mode on Linux with glibc) when including blis.h that stemmed from the "throw" specification which was added to the glibc's malloc() prototype, resulting in a prototype mismatch. Therefore, going forward, developers who specify their own custom malloc()/free() substitutes must also prototype those substitutes via bli_kernel.h. Thanks to Krzysztof Drewniak for reporting this bug, and Devin Matthews for researching the nature and potential solutions. commit 970745a5fc7c29de3e202988e5eb104fabca4fdc Author: Field G. Van Zee Date: Wed Oct 19 15:58:03 2016 -0500 Reorganized typedefs to avoid compiler warnings. Details: - Relocated membrk_t definition from bli_membrk.h to bli_type_defs.h. - Moved #include of bli_malloc.h from blis.h to bli_type_defs.h. - Removed standalone mtx_t and mutex_t typedefs in bli_type_defs.h. - Moved #include of bli_mutex.h from bli_thread.h to bli_typedefs.h. - The redundant typedefs of membrk_t and mtx_t caused a warning on some C compilers. Thanks to Tyler Smith for reporting this issue. commit 1c2f7b57d557c05f5ef6148cccafaf0f70d910da Author: sthangar Date: Tue Oct 18 15:06:35 2016 +0530 Removed symlinks to zen kernels from haswell kernel folder and also modified the bli_kernel.h file accordingly Change-Id: Ib3736af48e851c8243bbe10d937fb942c49ad048 commit d864ea9f4f039fe2b2dc395d0015bd9e8902bc8e Merge: 7045fcbf 28b2af8a Author: praveeng Date: Fri Oct 14 17:00:57 2016 +0530 Merge master code 2016_10_14 till Added disabled code thrinfo_t structures Change-Id: If7db98d286c1471fcd30f00757abee9b253ef987 commit 28b2af8a71133ce68774e153b6e05afb05affba8 Author: Field G. Van Zee Date: Thu Oct 13 14:50:08 2016 -0500 Added disabled code to print thrinfo_t structures. Details: - Added cpp-guarded code to bli_thrcomm_openmp.c that allows a curious developer to print the contents of the thrinfo_t structures of each thread, for verification purposes or just to study the way thread information and communicators are used in BLIS. - Enabled some previously-disabled code in bli_l3_thrinfo.c for freeing an array of thrinfo_t* values that is used in the new, cpp-guarde code mentioned above. - Removed some old commented lines from bli_gemm_front.c. commit 11eed3f683d09e65f721567b346b0f733bff9a64 Author: Field G. Van Zee Date: Thu Oct 13 14:23:23 2016 -0500 Fixed a configure -t omp/openmp bug from fd04869. Details: - Forgot to update certain occurrences of "omp" in common.mk during commit fd04869, which changed the preferred configure option string for enabling OpenMP from "omp" to "openmp". commit 7045fcbf0bd349ebe6cb9ac4508c6a387bb05966 Merge: 7e044900 9cda6057 Author: praveeng Date: Thu Oct 13 12:02:28 2016 +0530 Merge master code 2016_10_13 Removed previously renamed/old files Change-Id: I8106d371afaa0af474a8967388d44481b05de923 commit 7e04490002206d3557fcfb7dd893838a7f36916f Author: sthangar Date: Wed Oct 12 16:43:02 2016 +0530 Checked in the SAMAX optimizations Change-Id: I7faf8c3adf52ff01432188ad3b9866ee4b9a9dfd commit 9cda6057eaa16a24ac8785a9fa167df6c9edba44 Author: Field G. Van Zee Date: Tue Oct 11 13:21:26 2016 -0500 Removed previously renamed/old files. Details: - Removed frame/base/bli_mem.c and frame/include/bli_auxinfo_macro_defs.h, both of which were renamed/removed in 701b9aa. For some reason, these files survived when the compose branch was merged back into master. (Clearly, git's merging algorithm is not perfect.) - Removed frame/base/bli_mem.c.prev (an artifact of the long-ago changed memory allocator that I was keeping around for no particular reason). commit 22377abd84b9e560ffe1c4e4d284eb443ddb7133 Author: Field G. Van Zee Date: Mon Oct 10 13:43:56 2016 -0500 Fixed bli_gemm() segfault on empty C matrices. Details: - Fixed a bug that would manifest in the form of a segmentation fault in bli_cntl_free() when calling any level-3 operation on an empty output matrix (ie: m = n = 0). Specifically, the code previously assumed that the entire control tree was built prior to it being freed. However, if the level-3 operation performs an early exit, the control tree will be incomplete, and this scenario is now handled. Thanks to Elmar Peise for reporting this bug. commit 0b571cd94d9b175331c9453258a6b1389a718ae8 Author: Field G. Van Zee Date: Thu Oct 6 14:48:15 2016 -0500 Fixed segfault in bli_free_align() for NULL ptrs. Details: - Fixed a bug in bli_free_align() caused by failing to handle NULL pointers up-front, which led to performing pointer arithmetic on NULL pointers in order to free the address immediately before the pointer. Thanks to Devin Matthews for reporting this bug. commit cd84fb95182514601d72c78ee0e36a394d0284d7 Author: praveeng Date: Thu Oct 6 15:08:21 2016 +0530 syntax erros in configure file Change-Id: Ibe8a6071aad97df550df64c009fec33a9d8f43a1 commit f2e7ea113aa93b74f1d42408d5db2c5a7b00a653 Merge: 133983c3 86969873 Author: praveeng Date: Thu Oct 6 12:35:30 2016 +0530 conflicts merge for bli_kernel.h Change-Id: I15d846bd34e11f86ebfd7ed091ff671a1f3366a0 commit 133983c36fa01c7acb6d666b3744f77f216314a5 Author: sthangar Date: Thu Oct 6 11:26:22 2016 +0530 code clean up in bli_kernel.h Change-Id: I11d9cdf2af8e8199209eb084f6c3a7c910b83d5d commit 4fb9b4ef2e4cf2626a6e000a41628fb823f16da8 Author: Field G. Van Zee Date: Wed Oct 5 14:41:35 2016 -0500 CHANGELOG update (0.2.1) commit 866b2dde3f41760121115fb25f096d4344e8b4f9 (tag: 0.2.1) Author: Field G. Van Zee Date: Wed Oct 5 14:41:34 2016 -0500 Version file update (0.2.1) commit 87fddeab3c8a5ccb1bbf02e5f89db1464e459ba9 Merge: 86969873 6f71cd34 Author: Field G. Van Zee Date: Wed Oct 5 13:35:01 2016 -0500 Merge branch 'compose' commit 6f71cd344951854e4cff9ea21bbdfe536e72611d (origin/compose) Merge: c0630c40 8d55033c Author: Field G. Van Zee Date: Tue Oct 4 15:53:46 2016 -0500 Merge pull request #94 from flame/distcomm Implemented distributed thrinfo_t management. commit 86969873b5b861966d717d8f9f370af39e3d9de6 Author: Field G. Van Zee Date: Tue Oct 4 14:24:59 2016 -0500 Reclassified amaxv operation as a level-1v kernel. Details: - Moved amaxv from being a utility operation to being a level-1v operation. This includes the establishment of a new amaxv kernel to live beside all of the other level-1v kernels. - Added two new functions to bli_part.c: bli_acquire_mij() bli_acquire_vi() The first acquires a scalar object for the (i,j) element of a matrix, and the second acquires a scalar object for the ith element of a vector. - Added integer support to bli_getsc level-0 operation. This involved adding integer support to the bli_*gets level-0 scalar macros. - Added a new test module to test amaxv as a level-1v operation. The test module works by comparing the value identified by bli_amaxv() to the the value found from a reference-like code local to the test module source file. In other words, it (intentionally) does not guarantee the same index is found; only the same value. This allows for different implementations in the case where a vector contains two or more elements containing exactly the same floating point value (or values, in the case of the complex domain). - Removed the directory frame/include/old/. commit 8d55033c966feed99fcca2a58017c3ab5b1646dc Author: Field G. Van Zee Date: Tue Sep 27 15:20:58 2016 -0500 Implemented distributed thrinfo_t management. Details: - Implemented Ricardo Magana's distributed thread info/communicator management. Rather that fully construct the thrinfo_t structures, from root to leaf, prior to spawning threads, the threads individually construct their thrinfo_t trees (or, chains), and do so incrementally, as needed, reusing the same structure nodes during subsequent blocked variant iterations. This required moving the initial creation of the thrinfo_t structure (now, the root nodes) from the _front() functions to the bli_l3_thread_decorator(). The incremental "growing" of the tree is performed in the internal back-end (ie: _int()) function, and so mostly invisible. Also, the incremental growth of the thrinfo_t tree is done as a function of the current and parent control tree nodes (as well as the parent thrinfo_t node), further reinforcing the parallel relationship between the two data structures. - Removed the "inner" communicator from thrinfo_t structure definition, as well as its id. Changed all APIs accordingly. Renamed bli_thrinfo_needs_free_comms() to bli_thrinfo_needs_free_comm(). - Defined bli_l3_thrinfo_print_paths(), which prints the information in an array of thrinfo_t* structure pointers. (Used only as a debugging/verification tool.) - Deprecated the following thrinfo_t creation functions: bli_packm_thrinfo_create() bli_l3_thrinfo_create() because they are no longer used. bli_thrinfo_create() is now called directly when creating thrinfo_t nodes. commit fd04869ae4d4a3b0ebb9052557c296456bce7c0d Author: Field G. Van Zee Date: Tue Sep 27 14:14:11 2016 -0500 Changed configure's 'omp' threading to 'openmp'. Details: - Changed the configure script so that the expected string argument to the -t (or --enable-threading=) option that enables OpenMP multithreading is 'openmp'. The previous expected string, 'omp', is still supported but should be considered deprecated. commit 9424af87209e4e435e2e742430945152690170b0 Merge: efa7341d c0630c40 Author: Field G. Van Zee Date: Tue Sep 27 12:51:08 2016 -0500 Merge branch 'compose' commit 7f32dd57c6bd41c0704341752842277dd6a4c8eb Author: Shaden Smith Date: Sat Sep 17 11:33:57 2016 -0500 Adds sanity check to configuration choice. commit efa7341df0b0115926aa8a6e8a4ebfb24fdbf11e Merge: 121c39d4 e1453f68 Author: Field G. Van Zee Date: Fri Sep 16 11:01:57 2016 -0500 Merge pull request #92 from ShadenSmith/readme_fix Fixes broken URL in README.md commit e1453f68f6afd90ae9a29b7a5faa46aa79bbf741 Author: Shaden Smith Date: Fri Sep 16 09:29:28 2016 -0500 Fixes broken URL in README.md commit b922d7563422e14c49a4677bc6ae088a408861ed Author: Field G. Van Zee Date: Tue Aug 23 13:38:36 2016 -0500 Avoid compiling BLAS/CBLAS files when disabled. Details: - Updated the top-level Makefile, build/config.mk.in template, and configure script so that object files corresponding to source files belonging to the BLAS compatibility layer are not compiled (or archived) when the compatibility layer is disabled. (Same for CBLAS.) Thanks to Devin Matthews for suggesting this optimization. - Slight change to the way configure handles internal variables. Instead of converting (overwriting) some, such as enable_blas2blis and enable_cblas, from a "yes" or "no" to a "1" or "0" value, the latter are now stored in new variables that live alongside the originals (with the suffix "_01"). This is convenient since some values need to be sed-substituted into the config.mk.in template, which requires "yes" or "no", while some need to be written to the bli_config.h.in template, which requires "0" or "1". Updated BLIS4 TOMS citation in README.md. Added complex gemm micro-kernels for haswell. Details: - Defined cgemm (3x8) and zgemm (3x4) micro-kernels for haswell-based architectures. As with their real domain brethren, these kernels perfer row storage, (though this doesn't affect most users due to high-level optimizations in most level-3 operations that induce a transpose to whatever storage preference the kernel may have). Change-Id: I512ab90784ecbb7cdaee24928d2ccebb544ba5c1 commit 69826110bab2a064ec76457c24843d28f2581281 Merge: 64598ee4 a58dd35e Author: Pradeep Rao Date: Wed Sep 14 03:26:25 2016 -0400 Merge "Implemented trsm single precision for lower triangular matrices, files added bli_trsm_l_int_6x16.cfiles modified bli_kernel.h to enable optimized trsm microkernel and test_trsm.c is modified to test trsm single precision" into amd-staging commit c0630c4024b08750043a2942a3e8a037aa6b6259 Author: Field G. Van Zee Date: Mon Sep 12 13:59:02 2016 -0500 Added debugging printf()'s to bli_l3_thrinfo.c. Details: - Added optional printf() statements to print out thread communicator info as the thrinfo_t structure is built in bli_l3_thrinfo.c. - Minor changes to frame/thread/bli_thrinfo.h. commit 7b3bf1ffcd7160ccbf6c2518af6d88f6742e4977 Merge: 35509818 121c39d4 Author: Field G. Van Zee Date: Tue Sep 6 15:47:13 2016 -0500 Merge branch 'master' into compose commit 121c39d455f2db6f7ce6802ba7f73ad5e088c68c Author: Field G. Van Zee Date: Mon Sep 5 13:11:42 2016 -0500 Added complex gemm micro-kernels for haswell. Details: - Defined cgemm (3x8) and zgemm (3x4) micro-kernels for haswell-based architectures. As with their real domain brethren, these kernels perfer row storage, (though this doesn't affect most users due to high-level optimizations in most level-3 operations that induce a transpose to whatever storage preference the kernel may have). commit 35509818cbea1598b123421f81c42120889a03c3 Author: Field G. Van Zee Date: Wed Aug 31 17:34:15 2016 -0500 Added, moved some thread barriers. Details: - Removed thread barriers from the end of the loop bodies of bli_gemm_blk_var1(), bli_gemm_blk_var2(), bli_trsm_blk_var1(), and bli_trsm_blk_var2(). - Moved the thread barrier at the end of bli_packm_int() to the end of bli_l3_packm(), and added missing barriers to that function. - Removed the no longer necessary (and now incorrect) ochief guard in bli_gemm3m3_packa() on the bli_obj_scalar_reset() on C. - Thanks to Tyler Smith for help with these changes. commit 64598ee4cfb86f64abbd4bcef5a82ba0d5565b67 Author: sthangar Date: Wed Aug 31 12:54:50 2016 +0530 fixed the symlink issue Change-Id: I2186d529f295c576597c189e1ae219bc1a83f955 commit abd61f9fa75d77a96d1491b3e035451ee73238fe Author: Field G. Van Zee Date: Tue Aug 30 12:34:19 2016 -0500 Updated BLIS4 TOMS citation in README.md. commit 8a2373f26ba8fcd5b2d7b2cc72cb8b2e1f841a03 Author: sthangar Date: Mon Aug 29 14:10:45 2016 +0530 Norm 2 optimization Change-Id: Ide9decaccd20bf0ccc32c9abb6556e038dceed2b commit fdc663902347aa252ea88cf09ce24ab748958dff Author: sthangar Date: Mon Aug 29 10:43:38 2016 +0530 Placed 1 and 1f AMD optimized AVX routines under zen folder Change-Id: I26795211ef11d232ed794ce36dd0a9c1f8706328 commit 701b9aa3ff028decbf90efac0dca5bd64fe26269 Author: Field G. Van Zee Date: Fri Aug 26 19:04:45 2016 -0500 Redesigned control tree infrastructure. Details: - Altered control tree node struct definitions so that all nodes have the same struct definition, whose primary fields consist of a blocksize id, a variant function pointer, a pointer to an optional parameter struct, and a pointer to a (single) sub-node. This unified control tree type is now named cntl_t. - Changed the way control tree nodes are connected, and what computation they represent, such that, for example, packing operations are now associated with nodes that are "inline" in the tree, rather than off- shoot braches. The original tree for the classic Goto gemm algorithm was expressed (roughly) as: blk_var2 -> blk_var3 -> blk_var1 -> ker_var2 | | -> packb -> packa and now, the same tree would look like: blk_var2 -> blk_var3 -> packb -> blk_var1 -> packa -> ker_var2 Specifically, the packb and packa nodes perform their respective packing operations and then recurse (without any loop) to a subproblem. This means there are now two kinds of level-3 control tree nodes: partitioning and non-partitioning. The blocked variants are members of the former, because they iteratively partition off submatrices and perform suboperations on those partitions, while the packing variants belong to the latter group. (This change has the effect of allowing greatly simplified initialization of the nodes, which previously involved setting many unused node fields to NULL.) - Changed the way thrinfo_t tree nodes are arranged to mirror the new connective structure of control trees. That is, packm nodes are no longer off-shoot branches of the main algorithmic nodes, but rather connected "inline". - Simplified control tree creation functions. Partitioning nodes are created concisely with just a few fields needing initialization. By contrast, the packing nodes require additional parameters, which are stored in a packm-specific struct that is tracked via the optional parameters pointer within the control tree struct. (This parameter struct must always begin with a uint64_t that contains the byte size of the struct. This allows us to use a generic function to recursively copy control trees.) gemm, herk, and trmm control tree creation continues to be consolidated into a single function, with the operation family being used to select among the parameter-agnostic macro-kernel wrappers. A single routine, bli_cntl_free(), is provided to free control trees recursively, whereby the chief thread within a groups release the blocks associated with mem_t entries back to the memory broker from which they were acquired. - Updated internal back-ends, e.g. bli_gemm_int(), to query and call the function pointer stored in the current control tree node (rather than index into a local function pointer array). Before being invoked, these function pointers are first cast to a gemm_voft (for gemm, herk, or trmm families) or trsm_voft (for trsm family) type, which is defined in frame/3/bli_l3_var_oft.h. - Retired herk and trmm internal back-ends, since all execution now flows through gemm or trsm blocked variants. - Merged forwards- and backwards-moving variants by querying the direction from routines as a function of the variant's matrix operands. gemm and herk always move forward, while trmm and trsm move in a direction that is dependent on which operand (a or b) is triangular. - Added functions bli_thread_get_range_mdim(), bli_thread_get_range_ndim(), each of which takes additional arguments and hides complexity in managing the difference between the way ranges are computed for the four families of operations. - Simplified level-3 blocked variants according to the above changes, so that the only steps taken are: 1. Query partitioning direction (forwards or backwards). 2. Prune unreferenced regions, if they exist. 3. Determine the thread partitioning sub-ranges. 4. Determine the partitioning blocksize (passing in the partitioning direction) 5. Acquire the curren iteration's partitions for the matrices affected by the current variants's partitioning dimension (m, k, n). 6. Call the subproblem. - Instantiate control trees once per thread, per operation invocation. (This is a change from the previous regime in which control trees were treated as stateless objects, initialized with the library, and shared as read-only objects between threads.) This once-per-thread allocation is done primarily to allow threads to use the control tree as as place to cache certain data for use in subsequent loop iterations. Presently, the only application of this caching is a mem_t entry for the packing blocks checked out from the memory broker (allocator). If a non-NULL control tree is passed in by the (expert) user, then the tree is copied by each thread. This is done in bli_l3_thread_decorator(), in bli_thrcomm_*.c. - Added a new field to the context, and opid_t which tracks the "family" of the operation being executed. For example, gemm, hemm, and symm are all part of the gemm family, while herk, syrk, her2k, and syr2k are all part of the herk family. Knowing the operation's family is necessary when conditionally executing the internal (beta) scalar reset on on C in blocked variant 3, which is needed for gemm and herk families, but must not be performed for the trmm family (because beta has only been applied to the current row-panel of C after the first rank-kc iteration). - Reexpressed 3m3 induced method blocked variant in frame/3/gemm/ind to comform with the new control tree design, and renamed the macro- kernel codes corresponding to 3m2 and 4m1b. - Renamed bli_mem.c (and its APIs) to bli_memsys.c, and renamed/relocated bli_mem_macro_defs.h from frame/include to frame/base/bli_mem.h. - Renamed/relocated bli_auxinfo_macro_defs.h from frame/include to frame/base/bli_auxinfo.h. - Fixed a minor bug whereby the storage-to-ukr-preference matching optimization in the various level-3 front-ends was not being applied properly when the context indicated that execution would be via an induced method. (Before, we always checked the native micro-kernel corresponding to the datatype being executed, whereas now we check the native micro-kernel corresponding to the datatype's real projection, since that is the micro-kernel that is actually used by induced methods. - Added an option to the testsuite to skip the testing of native level-3 complex implementations. Previously, it was always tested, provided that the c/z datatypes were enabled. However, some configurations use reference micro-kernels for complex datatypes, and testing these implementations can slow down the testsuite considerably. commit a58dd35ed7b5b77a6b272655d2edd7a822b8fa87 Author: Kiran Varaganti Date: Fri Aug 26 14:55:12 2016 +0530 Implemented trsm single precision for lower triangular matrices, files added bli_trsm_l_int_6x16.cfiles modified bli_kernel.h to enable optimized trsm microkernel and test_trsm.c is modified to test trsm single precision Change-Id: Ibddf989f4aad577e89558673e1038cf6ece654d9 commit 73517f522b69de429dd7f3df60a70c068149ab28 Merge: c6f5c215 50293da3 Author: Field G. Van Zee Date: Tue Aug 23 13:46:59 2016 -0500 Merge branch 'master' into compose commit 50293da38d5f2b7be9bbc94b9e85aacb6a10f672 Author: Field G. Van Zee Date: Tue Aug 23 13:38:36 2016 -0500 Avoid compiling BLAS/CBLAS files when disabled. Details: - Updated the top-level Makefile, build/config.mk.in template, and configure script so that object files corresponding to source files belonging to the BLAS compatibility layer are not compiled (or archived) when the compatibility layer is disabled. (Same for CBLAS.) Thanks to Devin Matthews for suggesting this optimization. - Slight change to the way configure handles internal variables. Instead of converting (overwriting) some, such as enable_blas2blis and enable_cblas, from a "yes" or "no" to a "1" or "0" value, the latter are now stored in new variables that live alongside the originals (with the suffix "_01"). This is convenient since some values need to be sed-substituted into the config.mk.in template, which requires "yes" or "no", while some need to be written to the bli_config.h.in template, which requires "0" or "1". commit 22dd6a353ddb56614309c01533b1a94c9fd32bca Merge: cdfb3c3f f20ed388 Author: praveeng Date: Tue Aug 23 15:15:35 2016 +0530 Merge master code as on 2016_08_23 to amd-staging branch by praveeng Changes to be committed: modified: frame/thread/bli_mutex_openmp.h modified: frame/thread/bli_mutex_pthreads.h Change-Id: Ica522edbb1d0173f53f38d5057b1f7aef73666be commit c6f5c215ee793d03ea834469fc2adc53feaffc42 Merge: d52cb767 16a4c7a8 Author: Field G. Van Zee Date: Mon Aug 22 17:33:02 2016 -0500 Merge branch 'master' into compose commit f20ed3885d628992fab88690f629a5a2bab3eb88 Merge: 02ac597e 4bc842ca Author: praveeng Date: Mon Aug 22 15:27:33 2016 +0530 Merge branch 'master' of https://github.com/clMathLibraries/blis-amd for "Fixed bugs in bli_mutex_init() and friends." commit 02ac597e4b9be2670d9fff65d28552f8e1ec81b3 Author: praveeng Date: Thu Jul 28 15:11:08 2016 +0530 Revert commits 357c990bdd7bd5667aac5adf1bab3712973e7414 Change-Id: I12a34456d7eed93fda4369e76bcddb42ba7ccb99 commit 84e41cc73c9c87ce64582acd4264b8e1b5316482 Author: praveeng Date: Thu Jul 28 15:01:36 2016 +0530 Revert commits 8aee306 Change-Id: I3dd999c77c6779332a40dbb84371ca487216f189 commit 30ccfcee82db93d0109d1571242e2db925e95d0a Author: praveeng Date: Mon Jul 25 14:14:00 2016 +0530 removed changes from readme file which are giving confilcts Change-Id: Ic71ad1313e1404fed444e899466043704d875af6 commit aeca25cd63fc8971f8fe7809599c57853f976548 Author: praveeng Date: Tue Jul 5 16:51:23 2016 +0530 first commit Change-Id: Ib50c81acda3b2c1583da3d421efc0ca547ef68e2 commit 6b2274864b36fd1019d97bcc4ca6dd7a57ef16d9 Author: praveeng Date: Tue Jul 5 15:00:31 2016 +0530 small modification to readme for git push test Change-Id: I68506a49586b07eaa907f3f85304ee40d4c92d0a commit daa7a9ecb25982f2551adbd95e65f8ba97cfe944 Author: praveeng Date: Tue Jul 5 16:51:23 2016 +0530 first commit Change-Id: Ib50c81acda3b2c1583da3d421efc0ca547ef68e2 commit 5f66a4aa05aeffcb6eb587851d78d9527319466c Author: praveeng Date: Tue Jul 5 15:00:31 2016 +0530 small modification to readme for git push test Change-Id: I68506a49586b07eaa907f3f85304ee40d4c92d0a commit c6cbd78d2388c08824822b91a1c36ac4349bb67f Author: praveeng Date: Thu Jul 28 15:11:08 2016 +0530 Revert commits 357c990bdd7bd5667aac5adf1bab3712973e7414 Change-Id: I12a34456d7eed93fda4369e76bcddb42ba7ccb99 commit 9219a9060762525f87ebbf556d78fe8621858513 Author: praveeng Date: Thu Jul 28 15:01:36 2016 +0530 Revert commits 8aee306 Change-Id: I3dd999c77c6779332a40dbb84371ca487216f189 commit 728573296efa7cf14d2381570e116509dfe2a240 Author: praveeng Date: Mon Jul 25 14:14:00 2016 +0530 removed changes from readme file which are giving confilcts Change-Id: Ic71ad1313e1404fed444e899466043704d875af6 commit ad7862e291c240505c733a41d231b1a126ade73c Author: praveeng Date: Tue Jul 5 16:51:23 2016 +0530 first commit Change-Id: Ib50c81acda3b2c1583da3d421efc0ca547ef68e2 commit ad4b471a25ce77867295e5529dfc787e7c18b03f Author: praveeng Date: Tue Jul 5 15:00:31 2016 +0530 small modification to readme for git push test Change-Id: I68506a49586b07eaa907f3f85304ee40d4c92d0a commit 55d641363fcd8bdfdabbd7c22822fa2d0b7f3fa6 Author: praveeng Date: Tue Jul 5 16:51:23 2016 +0530 first commit Change-Id: Ib50c81acda3b2c1583da3d421efc0ca547ef68e2 commit f3b6b15f6d591d323802bd6c81c522a02056506d Author: praveeng Date: Tue Jul 5 15:00:31 2016 +0530 small modification to readme for git push test Change-Id: I68506a49586b07eaa907f3f85304ee40d4c92d0a commit 16a4c7a823d60707ed9272f5d36e5c5d54c0ba4b Author: Field G. Van Zee Date: Fri Aug 19 11:38:36 2016 -0500 Fixed bugs in bli_mutex_init() and friends. Details: - Fixed a couple of bugs that affected OpenMP and POSIX threads configurations that resulted in compiler errors and warnings due to type mismatch, and in the case of pthreads, a missing function argument. The bugs are fairly recent, introduced in a017062. commit c8e4ef93953ba2b79fb7e0973c08469c0e28a2cd Author: Devin Matthews Date: Wed Aug 3 16:13:03 2016 -0500 Add prefetchw to 30x8 kernel. commit 4b5a2f3d6e7ffeb5cc2be8448554f5c2083ad68f Merge: 380736bf 9f52a587 Author: Devin Matthews Date: Wed Aug 3 16:09:51 2016 -0500 Merge remote-tracking branch 'origin/knl' into knl # Conflicts: # kernels/x86_64/knl/3/bli_dgemm_opt_24x8.c commit 380736bfe955efbdd7274c90b6fd635688e83bc4 Author: Devin Matthews Date: Wed Aug 3 16:08:28 2016 -0500 Add (new) 30x8 KNL kernel and fix non-scatter prefetch bug. commit 9f52a587dee855daa73c194e41b6951416544e9a Author: Devin Matthews Date: Wed Aug 3 16:03:53 2016 -0500 Try prefetchw[t1] instead of regular prefetch for C. commit 8945a1512d366bc6a8a85718d12cbf5de6f2898b Author: Devin Matthews Date: Wed Aug 3 11:28:24 2016 -0500 This version gets ~1550 GFLOPs on KNL wuth 16x4. commit cdfb3c3f29d321033fca106aa58ab67ead90a95d Merge: 50a2f2ef 4bc842ca Author: praveeng Date: Fri Jul 29 12:45:04 2016 +0530 Merge master code as on 2016_07_29 to amd-staging branch by praveeng Change-Id: Ic78b84d8b8d10158fb2a612f9a64bbc7b1f9b486 commit 4bc842ca3a64e658c0808bfe4c5693a5ace97923 Merge: 117f8838 b0d510bf Author: praveeng Date: Thu Jul 28 17:32:12 2016 +0530 Merge branch 'master' of publicrepo commit 117f8838511a478aa16137e770d27dd21f4227c5 Author: praveeng Date: Thu Jul 28 15:11:08 2016 +0530 Revert commits 357c990bdd7bd5667aac5adf1bab3712973e7414 Change-Id: I12a34456d7eed93fda4369e76bcddb42ba7ccb99 commit 2fcdc28f1055d385b2e662aa920fb97c472394d7 Author: praveeng Date: Thu Jul 28 15:01:36 2016 +0530 Revert commits 8aee306 Change-Id: I3dd999c77c6779332a40dbb84371ca487216f189 commit 1b5d104afe0628b8b6c0650f1e58cfb08be67004 Author: praveeng Date: Mon Jul 25 14:14:00 2016 +0530 removed changes from readme file which are giving confilcts Change-Id: Ic71ad1313e1404fed444e899466043704d875af6 commit d81273047bff56501e9413a90991d3d1f8b56a06 Author: praveeng Date: Tue Jul 5 16:51:23 2016 +0530 first commit Change-Id: Ib50c81acda3b2c1583da3d421efc0ca547ef68e2 commit 65905c3011a11cda95761681d4ae84337e46bdb5 Author: praveeng Date: Tue Jul 5 15:00:31 2016 +0530 small modification to readme for git push test Change-Id: I68506a49586b07eaa907f3f85304ee40d4c92d0a commit 23cca231be10fe1797aed451bcbc69d38c78bc0c Author: praveeng Date: Tue Jul 5 16:51:23 2016 +0530 first commit Change-Id: Ib50c81acda3b2c1583da3d421efc0ca547ef68e2 commit 922e3091702f25e3287b417719a33adbd5bbf138 Author: praveeng Date: Tue Jul 5 15:00:31 2016 +0530 small modification to readme for git push test Change-Id: I68506a49586b07eaa907f3f85304ee40d4c92d0a commit b0d510bf0e4dfd177f9e4ae0069f41921e2ecdc1 Author: praveeng Date: Thu Jul 28 15:11:08 2016 +0530 Revert commits 357c990bdd7bd5667aac5adf1bab3712973e7414 Change-Id: I12a34456d7eed93fda4369e76bcddb42ba7ccb99 commit 5ebeece5b4a8df81d59ca7558b278a4263d15128 Author: praveeng Date: Thu Jul 28 15:01:36 2016 +0530 Revert commits 8aee306 Change-Id: I3dd999c77c6779332a40dbb84371ca487216f189 commit 6ce4c022ebdea00c2b951090e3c2e9e88735b9ce Author: Devin Matthews Date: Wed Jul 27 16:26:36 2016 -0500 Switch back to 24x8. I could only squeeze 24.5GFLOP out of 8x24, and scalability is not improved. commit d52cb7671509592a8078729477b40b60380518a2 Merge: 95abea46 c31b1e7b Author: Field G. Van Zee Date: Wed Jul 27 16:04:55 2016 -0500 Merge branch 'master' into compose commit c31b1e7b9d659b96433a87e5aecb90e457a104cc Author: Field G. Van Zee Date: Wed Jul 27 15:58:07 2016 -0500 Relax alignment restrictions for sandybridge ukrs. Details: - Relaxed the base pointer and leading dimension alignment restrictions in the sandybridge gemm microkernels, allowing the use of vmovups/vmovupd instead of vmovaps/vmovapd. These change mimic those made to the haswell microkernels in e0d2fa0 and ee2c139. - Updated testsuite modules as well as standalone test drivers in 'test' directory to use DBL_MAX as the initial time candidate. Thanks to Devin Matthews for suggesting this change. - Inserted #include "float.h" into bli_system.h (to gain access to DBL_MAX). - Minor update (vis-a-vis contexts) to driver code in test/3m4m. commit b8f2b55532849d45d379afbdd05a52ff6100800d Author: Devin Matthews Date: Wed Jul 27 15:22:55 2016 -0500 Try an 8x24 kernel for the hell of it. commit 7ede5863ae3567f7c0852efc2d5cd649ca19e0f3 Author: Devin Matthews Date: Wed Jul 27 13:41:27 2016 -0600 Allocate pack buffer on MCDRAM for KNL. commit ad89ed2e829c7b261d8ba0998a3cb83ad576ee04 Merge: 2c9de740 81e2b05f Author: Devin Matthews Date: Wed Jul 27 11:45:40 2016 -0500 Merge branch 'knl' of github.com:devinamatthews/blis into knl commit 2c9de740edb66c4692c200731763bbd1d3171ccb Author: Devin Matthews Date: Wed Jul 27 11:44:54 2016 -0500 This version gets ~26GF on one core. commit 81e2b05f31bca4e1e1676e7b533d1868d9f9be33 Author: Devin Matthews Date: Wed Jul 27 11:39:05 2016 -0500 Add optimized packing kernels for KNL. commit a7d8ca97b8d835c32d90ff20a565c82733f014a8 Author: Devin Matthews Date: Mon Jul 25 15:15:13 2016 -0500 All fixed. commit 963d0393b023f4134bb0c682923faf9964c0e645 Author: Devin Matthews Date: Mon Jul 25 14:40:53 2016 -0500 Add 24xk pack kernel. commit 117b76739afba481768897d2580f8365d3345417 Author: Devin Matthews Date: Mon Jul 25 13:53:07 2016 -0500 In the midst of debugging. commit 8c0a4fd1d3535d608a9a309a61ffee0a73c3646f Author: Devin Matthews Date: Mon Jul 25 13:09:24 2016 -0500 Fix some row/column confusion. commit c44f9f96930312125b15e64c326ab5ab5cc02633 Author: Devin Matthews Date: Mon Jul 25 12:02:24 2016 -0500 Simplify displacements -- clang assembler was badly botching EVEX compressed displacements giving false alarms for instruction length. commit e0cce177cc1b47ec9f11ac0556241feaa3564df1 Author: Devin Matthews Date: Mon Jul 25 10:02:25 2016 -0500 Minor fixes for 8x24 KNL kernel. commit 50a2f2efcbeb46537f1deaa8e44dc579a4e49eb8 Merge: 1aa77dfc cfd46c88 Author: praveeng Date: Mon Jul 25 17:01:20 2016 +0530 Merge master code as on 2016_07_25 to amd-staging branch by praveeng Change-Id: I84886ae241db2aac0bef6b7ef399f04aa8bca16d commit cfd46c88d59c8f61d5e7cf768d606e4c44623584 Merge: f493bf4d a017062f Author: praveeng Date: Mon Jul 25 15:38:13 2016 +0530 Merge remote-tracking branch 'publicrepo/master' commit f493bf4d704fe0e967783cd6e6877d3302c056a1 Author: praveeng Date: Mon Jul 25 14:14:00 2016 +0530 removed changes from readme file which are giving confilcts Change-Id: Ic71ad1313e1404fed444e899466043704d875af6 commit 65735bbedf75784c48bd11e05b3fdc98fc66b4bc Author: Devin Matthews Date: Sun Jul 24 21:50:32 2016 -0500 Switch to 24x8 kernel, unrolled by 16. commit 45d5dc97177117220bd9dd0abf85aafc185acad1 Author: Devin Matthews Date: Sun Jul 24 14:25:26 2016 -0500 Add 24x8 "KNC-style" kernel for KNL. commit 95abea46f86816fddfc9ff0abfa52880801461be Merge: d0dfe5b5 a017062f Author: Field G. Van Zee Date: Sat Jul 23 15:38:33 2016 -0500 Merge branch 'master' into compose commit a017062fdf763037da9d971a028bb07d47aa1c8a Author: Field G. Van Zee Date: Fri Jul 22 17:02:59 2016 -0500 Integrated "memory broker" (membrk_t) abstraction. Details: - Integrated a patch originally authored and submitted by Ricardo Magana of HP Enterprise. The changeset inserts use of a new object type, membrk_t, (memory broker) that allows multiple sets of memory pools on, for example, separate NUMA nodes, each of which has a separate memory space. - Added membrk field to cntx_t and defined corresponding accessor macros. - Added membrk field to mem_t object and defined corresponding accessor macros. - Created new bli_membrk.c file, which contains the new memory broker API, including: bli_membrk_init(), bli_membrk_finalize() bli_membrk_acquire_[mv](), bli_membrk_release(), bli_membrk_init_pools(), bli_membrk_reinit_pools(), bli_membrk_finalize_pools(), bli_membrk_pool_size() - In bli_mem.c, changed function calls to bli_mem_init_pools() -> bli_membrk_init() bli_mem_reinit_pools() -> bli_membrk_reinit() bli_mem_finalize_pools() -> bli_membrk_finalize() - In bli_packv_init.c, bli_packm_init.c, changed function calls to: bli_mem_acquire_[mv]() -> bli_membrk_acquire_[mv]() bli_mem_release() -> bli_membrk_release() - Added bli_mutex.c and related files to frame/thread. These files define abstract mutexes (locks) and corresponding APIs for pthreads, openmp, or single-threaded execution. This new API is employed within functions such as bli_membrk_acquire_[mv]() and bli_membrk_release(). commit 8ff2e069c48c12fd06b9c48c6b3aeb4ea9b0e6e1 Author: Devin Matthews Date: Fri Jul 22 16:22:26 2016 -0500 Add 4x unrolled variant for KNL microkernel. commit 9cb2ed9b0c25f31a22c1c9719b062fa665ad7adf Author: Devin Matthews Date: Fri Jul 22 16:10:30 2016 -0500 Git rid of one RBX update. commit 451bde076f0320d60cd2475cfb048ac4a2b798bb Author: Devin Matthews Date: Fri Jul 22 15:43:00 2016 -0500 Add some more knobs to twiddle for KNL microkernel. commit 8c6e621c099521e7a4d87e007bb8224faa5f33a3 Author: Devin Matthews Date: Fri Jul 22 15:05:15 2016 -0500 Make knl conform to new kernel dir structure. commit ce7214c6618d6f22f4ce2ee452336236916d1f30 Merge: 119d0399 ce59f811 Author: Devin Matthews Date: Fri Jul 22 14:59:53 2016 -0500 Merge remote-tracking branch 'origin/master' into knl commit ce59f81108ec9aea918a7e77030da8acfdd397ce Merge: ff41153f 707a2b7f Author: Field G. Van Zee Date: Fri Jul 22 14:48:14 2016 -0500 Merge pull request #88 from devinamatthews/32bit-dim_t Handle 32-bit dim_t in 64-bit microkernels. commit 707a2b7faca137cca7cab7b11a12c44ddaf7ad53 Author: Devin Matthews Date: Fri Jul 22 13:49:44 2016 -0500 Somehow forgot the most important microkernel. commit 47ec045056351ac4f0791c071fa0daaa81699c8c Merge: 08f1d6b6 ff41153f Author: Devin Matthews Date: Fri Jul 22 13:45:23 2016 -0500 Merge remote-tracking branch 'upstream/master' into 32bit-dim_t commit 08f1d6b6fa344275de0f675f69737145ccf6646a Author: Devin Matthews Date: Fri Jul 22 13:44:37 2016 -0500 Use 64-bit intermediate variable for k for architectures that do 64-bit loads in case dim_t is 32-bit. commit ff41153f4eb7f38ed94bdd9a3fd81fb979f3f401 Merge: f9214ced e0d2fa0d Author: Field G. Van Zee Date: Fri Jul 22 13:21:03 2016 -0500 Merge pull request #86 from devinamatthews/haswell-vmovups Remove alignment restrictions on C in haswell kernel. commit e0d2fa0d835ab49366aeb790363bb2b571d36ed8 Author: Devin Matthews Date: Fri Jul 22 12:56:51 2016 -0500 Relax alignment restrictions for haswell sgemm. commit f9214ced97392861f5a0ea72abfcf6f41faf674c Merge: 413d62ac 08666eaa Author: Field G. Van Zee Date: Fri Jul 22 12:16:39 2016 -0500 Merge pull request #85 from devinamatthews/qopenmp Change -openmp to -fopenmp for icc. commit ee2c139df6ad53c6aec8a67ab23b3b1912e8d259 Author: Devin Matthews Date: Fri Jul 22 12:06:03 2016 -0500 Remove alignment restrictions on C in haswell kernel. commit 08666eaa20d8a31f2f92f944e5bfa7c1558c53e4 Author: Devin Matthews Date: Fri Jul 22 11:07:34 2016 -0500 Change -openmp to -fopenmp for icc. commit 119d0399428905053265f3aca1cc8cc1fde3b363 Author: Devin Matthews Date: Fri Jul 22 10:23:31 2016 -0500 Add 8x24 KNL kernel. commit 1aa77dfc1dc183d16e0b6a1196d9c263f021e83d Merge: 9101a9c8 ec9f5983 Author: praveeng Date: Thu Jul 21 14:22:40 2016 +0530 Merge master code as on 2016_07_21 to amd-staging branch by praveeng Change-Id: Ic7d0a21101358f08147736e7f1884e7409937344 commit b58cda9eba0c1e175460aae109baf792d29ba5bf Merge: 318f063d 413d62ac Author: Devin Matthews Date: Tue Jul 19 14:09:09 2016 -0500 Merge remote-tracking branch 'origin/master' into knl # Conflicts: # frame/base/bli_threading.h # frame/include/blis.h # frame/thread/bli_thread.c commit ec9f59836b32260c29ff1cd24e629c7d8de14992 Merge: 197e182f 763babe4 Author: praveeng Date: Mon Jul 18 12:56:25 2016 +0530 Merge branch 'master' of https://github.com/clMathLibraries/blis-amd commit 197e182fcbf1340fd4a202fac58bea6cfcfa9e2f Author: praveeng Date: Tue Jul 5 16:51:23 2016 +0530 first commit Change-Id: Ib50c81acda3b2c1583da3d421efc0ca547ef68e2 commit 41fb32711031e7ec86b062aa7f53255d1f5905e2 Author: praveeng Date: Tue Jul 5 15:00:31 2016 +0530 small modification to readme for git push test Change-Id: I68506a49586b07eaa907f3f85304ee40d4c92d0a commit d0dfe5b5372cc7558ee9c4104b29f82eecc7ed61 Merge: 31def12e 413d62ac Author: Field G. Van Zee Date: Thu Jul 14 11:01:06 2016 -0500 Merge branch 'master' into compose commit 9101a9c880e3934f8a63ffc7fe15f5fc1077a73d Author: sthangar Date: Wed Jul 13 16:51:14 2016 +0530 Checked in optimized 1V kernels along with benchmark codes. Also incorporated review comments for 1F kernels Change-Id: I035c0d39e6b0bed28e6e2041242186c49f6ed55b commit 763babe488880b42c86c7fc207aa7665bd0ff9f7 Merge: 357c990b 413d62ac Author: praveeng Date: Wed Jul 13 11:57:19 2016 +0530 Merge remote-tracking branch 'publirepo/master' commit 413d62aca28edabba56605a9f87d5b715831e1db Author: Field G. Van Zee Date: Tue Jul 12 15:02:52 2016 -0500 README update (use official ACM TOMS links). commit dfa431f696db2df4065ea454df268a2e0bc02eac Author: Field G. Van Zee Date: Tue Jul 12 14:21:19 2016 -0500 README update (BLIS2 TOMS article now in-print). commit 357c990bdd7bd5667aac5adf1bab3712973e7414 Author: praveeng Date: Tue Jul 5 16:51:23 2016 +0530 first commit Change-Id: Ib50c81acda3b2c1583da3d421efc0ca547ef68e2 commit 8aee306300adb099b66036f2c2f7f3996433cf49 Author: praveeng Date: Tue Jul 5 15:00:31 2016 +0530 small modification to readme for git push test Change-Id: I68506a49586b07eaa907f3f85304ee40d4c92d0a commit 31def12e2629f187e40f93f6bae9e26a6c2660e2 Author: Field G. Van Zee Date: Thu Jun 30 15:19:20 2016 -0500 First phase of control tree redesign. Details: - These changes constitute the first set of changes in preparation to revamping the structure and use of control trees in BLIS. Modifications in this commit don't affect the control tree code yet, but rather lay the groundwork. - Defined wrappers for the following functions, where the the wrappers each take a direction parameter of a new enumerated type (BLIS_BWD or BLIS_FWD), dir_t, and executes the correct underlying function. - bli_acquire_mpart_*() and _vpart_*() - bli_*_determine_kc_[fb]() - bli_thread_get_range_*() and bli_thread_get_range_weighted_*() - Consolidated all 'f' (forwards-moving) and 'b' (backwards-moving) blocked variants for trmm and trsm, and renamed gemm and herk variants accordingly. The direction is now queried via routines such as bli_trmm_direct(), which deterines the direction from the implied side and uplo parameters. For gemm and herk, it is uncondtionally BLIS_FWD. - Defined wrappers to parameter-specific macrokernels for herk, trmm, and trsm, e.g. bli_trmm_xx_ker_var2(), that execute the correct underlying macrokernel based on the implied parameters. The same logic used to choose the dir_t in _direct() functions is used here. - Simplified the function pointer arrays in _int() functions given the consolidation and dir_t querying mentioned above. - Function signature (whitespace) reformatting for various functions. - Removed old code in various 'old' directories. commit 405c9d46344d93c3eab5572b233900b50ca50d68 Author: sthangar Date: Wed Jun 22 12:18:54 2016 +0530 Check-in the fused kernels optimized for Zen Change-Id: I7b2f467b960e7b9a285f06e47be87de122e5fa24 commit 232754feecf29452987666b9f5ebba2619bfd0b0 Author: Field G. Van Zee Date: Tue Jun 21 14:25:39 2016 -0500 Fixed compiler warning in rand[vm], randn[vm]. Details: - Fixed compiler warnings about unused variables related to the disabling of normalization in the structured cases of the rand[vm] and randn[vm] operations. commit a89555d1605574f3685813dcc972b636dd61264d Author: Field G. Van Zee Date: Fri Jun 17 14:08:35 2016 -0500 Added randn[vm] operations, support in testsuite. Details: - Defined a new randomization operation, randn, on vectors and matrices. The randnv and randnm operations randomize each element of the target object with values from a narrow range of values. Presently, those values are all integer powers of two, but they do not need to be powers of two in order to achieve the primary goal, which is to initialize objects that can be operated on with plenty of precision "slack" available to allow computations that avoid roundoff. Using this method of randomization makes it much more likely that testsuite residuals of properly-functioning operations are close to zero, if not exactly zero. - Updated existing randomization operations randv and randm to skip special diagonal handling and normalization for matrices with structure. This is now handled by the testsuite modules by explicitly calling a testsuite function that loads the diagonal (and scales off-diagonal elements). - Added support for randnv and randnm in the testsuite with a new switch in input.general that universally toggles between use of the classic randv/randm, which use real values on the interval [-1,1], and randnv/randnm, which use only values from a narrow range. Currently, the narrow range is: +/-{2^0, 2^-1, 2^-2, 2^-3, 2^-4, 2^-5, 2^-6}, as well as 0.0. - Updated testsuite modules so that a testsutie wrapper function is called instead of directly calling the randomization operations (such as bli_randv() and bli_randm()). This wrapper also takes a bool_t that indicates whether the object's elements should be normalized. (NOTE: As alluded to above, in the test modules of triangular solve operations such as trsv and trsm, we perform the extra step of loading the diagonal.) - Defined a new level-0 operation, invertsc, which inverts a scalar. - Updated the abval2ris and sqrt2ris level-0 macros to avoid an unlikely but possible divide-by-zero. - Updated function signature and prototype formatting in testsuite. commit 318f063dcbd8b594969e401bc99146d24b01066a Author: Devin Matthews Date: Wed Jun 8 17:46:50 2016 -0500 Add new KNL microkernel derived from Haswell. commit 096895c5d538a7f8817603d7cf28c52e99340def Author: Field G. Van Zee Date: Mon Jun 6 13:32:04 2016 -0500 Reorganized code, APIs related to multithreading. Details: - Reorganized code and renamed files defining APIs related to multithreading. All code that is not specific to a particular operation is now located in a new directory: frame/thread. Code is now organized, roughly, by the namespace to which it belongs (see below). - Consolidated all operation-specific *_thrinfo_t object types into a single thrinfo_t object type. Operation-specific level-3 *_thrinfo_t APIs were also consolidated, leaving bli_l3_thrinfo_*() and bli_packm_thrinfo_*() functions (aside from a few general purpose bli_thrinfo_*() functions). - Renamed thread_comm_t object type to thrcomm_t. - Renamed many of the routines and functions (and macros) for multithreading. We now have the following API namespaces: - bli_thrinfo_*(): functions related to thrinfo_t objects - bli_thrcomm_*(): functions related to thrcomm_t objects. - bli_thread_*(): general-purpose functions, such as initialization, finalization, and computing ranges. (For now, some macros, such as bli_thread_[io]broadcast() and bli_thread_[io]barrier() use the bli_thread_ namespace prefix, even though bli_thrinfo_ may be more appropriate.) - Renamed thread-related macros so that they use a bli_ prefix. - Renamed control tree-related macros so that they use a bli_ prefix (to be consistent with the thread-related macros that were also renamed). - Removed #undef BLIS_SIMD_ALIGN_SIZE from dunnington's bli_kernel.h. This #undef was a temporary fix to some macro defaults which were being applied in the wrong order, which was recently fixed. commit 232530e88ff99f37abcae5b6fb5319a9a375a45f Merge: 4bcabd1b eef37f8b Author: Tyler Michael Smith Date: Wed Jun 1 15:14:10 2016 -0500 Merge commit 'refs/pull/81/head' of https://github.com/flame/blis Conflicts: frame/base/bli_threading_pthreads.c frame/base/bli_threading_pthreads.h commit 4bcabd1bf60688c38cf562459fc5e8be8b831756 Author: Tyler Michael Smith Date: Wed Jun 1 13:27:28 2016 -0500 Use spin locks instead of pthread barriers commit eef37f8b4d81845a6ba4bf25586d32b50c3e8a68 Author: Jeff Hammond Date: Sun May 29 22:28:13 2016 -0700 use GCC intrinsic instead of pthread_mutex for atomic increment and fetch commit 9dcd6f05c4c3ff2ce7cd87a9951a96ebef22681e Author: Field G. Van Zee Date: Tue May 24 13:15:32 2016 -0500 Implemented developer-configurable malloc()/free(). Details: - Replaced all instances of bli_malloc() and bli_free() with one of: - bli_malloc_pool()/bli_free_pool() - bli_malloc_user()/bli_free_user() - bli_malloc_intl()/bli_free_intl() each of which can be configured to call malloc()/free() substitutes, so long as the substitute functions have the same function type signatures as malloc() and free() defined by C's stdlib.h. The _pool() function is called when allocating blocks for the memory pools (used for packing buffers, primarily), the _user() function is called when obj_t's are created (via bli_obj_create() and friends), and the _intl() function is called for internal use by BLIS, such as when creating control tree nodes or temporary buffers for manipulating internal data structures. Substitutes for any of the three types of bli_malloc() may be specified by #defining the following pairs of cpp macros in bli_kernel.h: - BLIS_MALLOC_POOL/BLIS_FREE_POOL - BLIS_MALLOC_USER/BLIS_FREE_USER - BLIS_MALLOC_INTL/BLIS_FREE_INTL to be the name of the substitute functions. (Obviously, the object code that contains these functions must be provided at link-time.) These macros default to malloc() and free(). Subsitute functions are also automatically prototyped by BLIS (in bli_malloc_prototypes.h). - Removed definitions for bli_malloc() and bli_free(). - Note that bli_malloc_pool() and bli_malloc_user() are now defined in terms of a new function, bli_malloc_align(), which aligns memory to an arbitrary (power of two) alignment boundary, but does so manually, whereas before alignment was performed behind the scenes by posix_memalign(). Currently, bli_malloc_intl() is defined in terms of bli_malloc_noalign(), which serves as a simple wrapper to the designated function that is passed in (e.g. BLIS_MALLOC_INTL). Similarly, there are bli_free_align() and bli_free_noalign(), which are used in concert with their bli_malloc_*() counterparts. commit 9dd440109a9d964f5cd286e9f83c487ad703e1e4 Author: Jeff Hammond Date: Sat May 21 15:21:58 2016 -0700 fix 404 link to BuildSystem Google Code is dead. Long live GitHub! commit d309f20b7376a68efa3b864ad790c2021c071655 Author: Field G. Van Zee Date: Wed May 18 15:13:53 2016 -0500 Added alignment switch to testsuite. Details: - Added a new input parameter to input.general that globally toggles whether testsuite tests are performed on objects whose buffers and leading dimensions have been aligned, and changed the implementation of libblis_test_mobj_create() to employ alignment (or not) regardless of whether row, column, or general storage is being tested. - Updated configure script's "--help" text to indicate default behavior for internal integer type size and BLAS/CBLAS integer type size options. commit 32db0adc218ea4ae370164dbe8d23b41cd3526d3 Author: Field G. Van Zee Date: Tue May 17 15:20:16 2016 -0500 Generate prototypes for user-defined packm kernels. Details: - Created template prototypes for packm kernels (in bli_l1m_ker.h), and then redefined reference packm kernels' prototyping headers in terms of this template, as is already done for level-1v, -1f, and -3 kernels. - Automatically generate prototypes for user-defined packm kernels in bli_kernel_prototypes.h (using the new template prototypes in bli_l1m_ker.h). - Defined packm kernel function types in bli_l1m_ft.h, including for packm kernels specific to induced methods, which are now used in bli_packm_cxk.c and friends rather than using a locally-defined function type. - In bli_packm_cxk.c, extended function pointer for packm kernels array from out to index 31 (from previous maximum of 17). This allows us to store the unrolled 30xk kernel in the array for use (on knc, for example). Note: This should have been done a long time ago. commit e3bd5ca64ae7c190ba689396c0de687b829a11fe Author: Devin Matthews Date: Thu May 12 20:54:13 2016 -0500 Fix SIMD definitions in KNL config, and a couple of fixes to C update. commit 4fe02e3d497995d94d34d3fcf5af895084cfc8b9 Author: Devin Matthews Date: Thu May 12 20:53:58 2016 -0500 Move bli_kernel.h before bli_threading.h in order of inclusion in blis.h. commit 4bcf1b35abea3f3dfc8f2fe462dcf155cf199e55 Author: Field G. Van Zee Date: Wed May 11 16:09:49 2016 -0500 Fixed bli_get_range_*() bugs in trsm variants. Details: - Fixed incorrect calls to bli_get_range_*() from within trsm blocked variants 1f, 2b, and 2f. The bug somehow went undetected since the big commit (537a1f4), and, strangely, did not manifest via the BLIS testsuite. The bug finally came to our attention when running thei libflame test suite while linking to BLIS. Thanks to Kiran Varaganti for submitting the initial report that led to this bug. commit 9cfa33023f123a6c17e987f72fba174ce073f0b6 Author: Field G. Van Zee Date: Wed May 11 16:02:30 2016 -0500 Minor updates to bli_f2c.h. Details: - Added #undef guards to certain #define statements in bli_f2c.h, and renamed the file guard to BLIS_F2C_H. This helps when #including "blis.h" from an application or library that already #includes an "f2c.h" header. commit a09a2e23eacf5328858c8318bb637c5ff3b71d08 Merge: 4dcd37eb 7c604e1c Author: Tyler Michael Smith Date: Wed May 11 10:47:11 2016 -0500 Merge pull request #76 from devinamatthews/move_simd_defs Move default SIMD-related definitions to bli_kernel_macro_defs.h commit 4dcd37eb1b12a6e08cc13df7b61391ef8363f5d8 Author: Tyler Smith Date: Tue May 10 16:28:59 2016 -0500 fixing knc simd align size commit 619dee0daec3474b4e5a55df90a61aabcae194f2 Merge: b790b3d9 7c604e1c Author: Devin Matthews Date: Tue May 10 12:13:24 2016 -0500 Merge branch 'move_simd_defs' into knl commit 7c604e1cbc1609b6e12d3ee973c08b7af5035be4 Author: Devin Matthews Date: Tue May 10 12:11:55 2016 -0500 Move default SIMD-related definitions to bli_kernel_macro_defs.h. Otherwise, configurations which customize these fail as these are now defined in bli_kernel.h. commit b790b3d9e1820f3b691676de48c291cae083452d Merge: 4f8c05c9 a7be2d28 Author: Devin Matthews Date: Tue May 10 11:49:47 2016 -0500 Merge branch 'master' into knl commit a7be2d28e8930b154d0da1d6929b54a96e210af6 Merge: 97b512ef 4b1e55ed Author: Field G. Van Zee Date: Tue May 10 11:48:51 2016 -0500 Merge pull request #74 from devinamatthews/fix_common_symbols Default-initialize all extern global variables to avoid generating common symbols. commit 4b1e55edbfe0e1cb2e7b9428424903497cb7a841 Author: Devin Matthews Date: Tue May 10 10:08:47 2016 -0500 Default-initialize all extern global variables to avoid generating common symbols. Fixes #73. commit 97b512ef62c7e25c97ed5e9eca81cd7015b2ac91 Author: Field G. Van Zee Date: Fri May 6 10:24:30 2016 -0500 Include headers from cblas.h to pull in f77_int. Details: - Added #include statements for certain key BLIS headers so that the definition of f77_int is pulled in when a user compiles application code with only #include "cblas.h" (and no other BLIS header). This is necessary since f77_int is now used within the cblas API. commit c3a4d39d03665135f1616588b5ef7c3e9ef5688d Author: Field G. Van Zee Date: Wed May 4 17:22:56 2016 -0500 Updates to haswell gemm micro-kernels. Details: - Added two new sets of [sd]gemm micro-kernels for haswell architectures, one that is 4x24/4x12 (s and d) and one that is 6x16/6x8. - Changed the haswell configuration to use the 6x16/6x8 micro-kernels by default. - Updated various Makefiles, in test, test/3m4m, and testsuite. commit 0b01d355ae861754ae2da6c9a545474af010f02e Author: Field G. Van Zee Date: Wed Apr 27 15:21:10 2016 -0500 Miscellaneous cleanups, fixes to recent commits. Details: - Fixed a typo in bli_l1f_ref.h, introduced into bbb8569, that only manifested when non-reference level-1f kernels were used. - Added an #undef BLIS_SIMD_ALIGN_SIZE to bli_kernel.h of dunnington configuration to prevent a compile-time warning until I can figure out the proper permanent fix. - Moved frame/1f/kernels/bli_dotxaxpyf_ref_var1.c out of the compilation path (into 'other' directory). _ref_var2 is used by default, which is the variant that is built on axpyf and dotxf instead of dotaxpyv. - Removed section of frame/include/bli_config_macro_defs.h pertaining to mixed datatype support. commit ed7326c836f427e2f8420b015220ce293207b10c Author: Field G. Van Zee Date: Wed Apr 27 14:57:40 2016 -0500 Added 'restrict' to l1v/l1f code in 'kernels' dir. Details: - Added 'restrict' keyword to existing kernel definitions in 'kernels' directory. These changes were meant for inclusion in bbb8569. commit bbb8569b2a08c3bcd631d5a05eb389d01d94ac07 Author: Field G. Van Zee Date: Wed Apr 27 14:13:46 2016 -0500 Use 'restrict' in all kernel APIs; wspace changes. Details: - Updated level-1v, level-1f kernel function types (bli_l1?_ft.h) and generic kernel prototypes (bli_l1?_ker.h) to use 'restrict' for all numerical operand pointers (ie: all pointers except the cntx_t). - Updated level-1f reference kernel definitions to use 'restrict' for all numerical operand pointers. (Level-1v reference kernel definitions were already updated in bdbda6e.) - Rewrote the level-1v and level-1f reference kernel prototypes in bli_l1v_ref.h and bli_l1f_ref.h, respectively, to simply #include bli_l1v_ker.h and bli_l1f_ker.h with redefined function base names (as was already being done for the level-3 micro-kernel prototypes in bli_l3_ref.h), rather than duplicate the signatures from the _ker.h files. - Added definitions to frame/include/bli_kernel_prototypes.h for axpbyv and xpbyv, which were probably meant for inclusion in bdbda6e. - Converted a number of instances of four spaces, as introduced in bdbda6e, to tabs. commit 4ea419c72c789825e1f93a1eee88219bbf873930 Merge: f1e9be2a bdbda6e6 Author: Field G. Van Zee Date: Tue Apr 26 12:50:45 2016 -0500 Merge pull request #70 from devinamatthews/daxpby Give the level1v operations some love commit bdbda6e6acc682ab1b6ca680edebd09ae12a832c Author: Devin Matthews Date: Mon Apr 25 11:05:57 2016 -0500 Give the level1v operations some love: - Add missing axpby and xpby operations (plus test cases). - Add special case for scal2v with alpha=1. - Add restrict qualifiers. - Add special-case algorithms for incx=incy=1. commit f1e9be2aba1a057eedb947bbae96848597777408 Author: Field G. Van Zee Date: Fri Apr 22 15:34:02 2016 -0500 Minor tweak to test/Makefile. Details: - Just committing a minor change to test/Makefile that has been lingering in my local working copy for longer than I can remember. commit aa0bceec277938328dabeb744680623f24fb0b61 Merge: 4136553f e2784b4c Author: Field G. Van Zee Date: Fri Apr 22 12:01:31 2016 -0500 Merge branch 'master' of github.com:flame/blis commit 4136553f0d0661a668dfdb9edcd7ce1c5773dde7 Author: Field G. Van Zee Date: Fri Apr 22 11:53:53 2016 -0500 Clear level-3 cntx_t's via memset() before use. Details: - In all level-3 operations' _cntx_init() functions, replaced calls to bli_cntx_obj_init() with calls to bli_cntx_obj_clear(), and in all level-3 operations' _cntx_finalize() functions, removed calls to bli_cntx_obj_finalize(), leaving those function definitions empty. - Changed the definition of bli_cntx_obj_clear() so that the clearing occurs via a single call to memset(). commit 4f8c05c9e2ef4cbb82b35a3ebf1f0a0ac665830e Author: Devin Matthews Date: Thu Apr 21 10:00:59 2016 -0500 Rearrange KNL dgemm kernel again to streamline usage of ymm register. sgemm and dgemm now both working with Intel SDE. commit e2784b4c921f706e756df3e146e20a4cb63f53e3 Merge: dd0ab1d9 a9b6c3ab Author: Field G. Van Zee Date: Wed Apr 20 18:34:09 2016 -0500 Merge pull request #67 from devinamatthews/cblas-f77-int Change CBLAS integer type to f77_int commit a9b6c3abda6222a8b240361643932e83cf726c4f Merge: e4c54c81 dd0ab1d9 Author: Devin Matthews Date: Wed Apr 20 16:00:10 2016 -0500 Merge remote-tracking branch 'origin/master' into cblas-f77-int # Conflicts: # config/haswell/bli_config.h commit e4c54c81463c2a19c9bb6b1f0f1be3fa9d018a45 Author: Devin Matthews Date: Wed Apr 20 15:56:46 2016 -0500 Change integer type in CBLAS function signatures to f77_int, and add proper const-correctness to BLAS layer. commit dd0ab1d93f33abca6af9edd7b8e52da62dcfa5b1 Author: Field G. Van Zee Date: Wed Apr 20 14:38:23 2016 -0500 Converted some bli_cntx query functions to macros. Details: - Commented out several datatype-aware query functions (those ending in _dt) from bli_cntx.c, as well as their prototypes in bli_cntx.h, and added equivalent cpp query macros to bli_cntx.h. - Added 'bli_config.h' to .gitignore. commit 7193230f7d35edbd1d2f77842a613971f1603463 Author: Devin Matthews Date: Wed Apr 20 09:37:30 2016 -0500 Work around missing VPMULLQ on KNL. commit a30ccbc4c6a6e6460e78af6b5c530ee0d06f98fb Merge: eb2f18e4 0e1a9821 Author: Field G. Van Zee Date: Tue Apr 19 15:04:33 2016 -0500 Merge pull request #66 from devinamatthews/blas-configure Add configure options and generate bli_config.h automatically. commit bd44cf13e886069bc66c10ac0db178be96629a0d Author: Devin Matthews Date: Tue Apr 19 13:43:04 2016 -0500 Fix copy-paste errors in KNL kernels. commit eb2f18e4844d985715df20798f50f9cc12e3b5ad Author: Field G. Van Zee Date: Tue Apr 19 12:50:32 2016 -0500 More compile-time fixes to bgq gemm ukernel code. commit 0e1a9821d860f6c1d818baf4c48d21a23726c132 Author: Devin Matthews Date: Tue Apr 19 11:44:37 2016 -0500 Add configure options and generate bli_config.h automatically. Options to configure have been added for: - Setting the internal BLIS and BLAS/CBLAS integer sizes. - Enabling and disabling the BLAS and CBLAS layers. Additionally, configure options which require defining macros (the above plus the threading model), write their macros to the automatically-generated bli_config.h file in the top-level build directory. The old bli_config.h files in the config dirs were removed, and any kernel-related macros (SIMD size and alignment etc.) were moved to bli_kernel.h. The Makefiles were also modified to find the new bli_config.h file. Lastly, support for OMP in clang has been added (closes #56). commit a11eec05928ddc5c43fa5dbcd35f2edd24ff35a1 Author: Devin Matthews Date: Mon Apr 18 13:13:36 2016 -0500 Add sgemm ukernels for KNL. vpmullq is not implemented on KNL -- needs workaround. commit ff84469a4575f1ef8a0010046fde52240a312cae Author: Field G. Van Zee Date: Mon Apr 18 12:29:09 2016 -0500 Applied various compilation fixes to bgq kernels. commit c38e0dab05b2dc36672eab96e1248fb7fb2d785b Merge: bd5e2296 cbcd0b73 Author: Devin Matthews Date: Mon Apr 18 10:21:35 2016 -0500 Merge remote-tracking branch 'origin/master' into knl commit bd5e2296e98e042c31f1e8ece2c1ca8e4bdc2d4c Merge: 4745def0 49f85177 Author: Devin Matthews Date: Mon Apr 18 10:15:22 2016 -0500 Merge remote-tracking branch 'origin/knl' into knl commit 4745def0c87377ae83ad73ac514d7de08a96b2ac Author: Devin Matthews Date: Mon Apr 18 10:15:05 2016 -0500 Add 64-bit offset vector so we can use vgatherqpd. commit 49f85177f886f38889b60503a4e12fa7f04be1fd Author: Devin Matthews Date: Mon Apr 18 10:14:11 2016 -0500 KNL ukernel compiles with gcc. commit cbcd0b739dc54bd14fbb46aeda267c26725cd70f Author: Tyler Michael Smith Date: Mon Apr 18 03:12:57 2016 -0500 Changing ifdef for OSX pthread barriers commit 58b2c3cf040134d1be913c585a3c6905629116c0 Author: Devin Matthews Date: Sat Apr 16 16:12:24 2016 -0500 Rewrite of KNL kernel in GNU extended asm syntax. commit dd62080cea78f3a23616200d6640e52c102b2bb9 Author: Field G. Van Zee Date: Fri Apr 15 11:15:41 2016 -0500 Compile-time fix to bgq l1f kernels. Details: - Fixed an old reference to bli_daxpyf_fusefac, which no longer exists, by replacing it with the axpyf fusing factor (8), and cleaned up the relevant section of config/bgq/bli_kernel.h. - Removed most of the details of the level-3 kernels from the template kernel code in config/template/kernels/3 and replaced it with a reference to the relevant kernel wiki maintained on the BLIS github website. commit d5a915dd8d7a6ead42a68772e4420eb3647e6f1a Merge: 4320b725 41694675 Author: Field G. Van Zee Date: Thu Apr 14 12:56:36 2016 -0500 Merge branch 'master' of github.com:flame/blis commit 4320b725a1f8fd34101470b6cf52ad504a79c517 Author: Field G. Van Zee Date: Thu Apr 14 12:51:29 2016 -0500 Use kernel CFLAGS on "ukernels" directories. Details: - Updated the top-level Makefile so that the CFLAGS variable designated for kernel source code is applied not only to source code in directories named "kernels" but source code in any directory that contains the substring "kernels", such as "ukernels". - Formally disabled some code in gen-make-frag.sh script that was already effectively disabled. The code was related to handling "noopt" and "kernel" directories, which is now handled independently within the top-level Makefile without needing to place these source files into a spearate makefile variable. commit 41694675e4cb56e2e0323c7a7db48e0819606a31 Author: Tyler Smith Date: Wed Apr 13 15:51:08 2016 -0500 pthreads bugfixes Getting pthreads to work on my Mac Implemented a pthread barrier when _POSIX_BARRIER isn't defined Now spawn n-1 threads instead of n threads so that master thread isn't just spinning the whole time Add -lpthread instead of -pthread to LDFLAGS (for clang) commit f756dbfa0d542cbc497724981520c83abf049c4b Author: Field G. Van Zee Date: Wed Apr 13 11:25:33 2016 -0500 Removed stale #include from bgq configuration. Details: - Removed an old #include statement ("bli_gemm_8x8.h") from the bli_kernel.h file in the bgq configuration. It turns out this file was no longer needed even prior to 537a1f4. commit 0bd4169ea75f690714e7d2912229932a75d8a7e2 Author: Field G. Van Zee Date: Mon Apr 11 18:08:32 2016 -0500 Fixed context-broken dunnington/penryn kernels. Details: - Added missing context parameters to several instances where simpler kernels, or reference kernels, are called instead of executing the main body code contained in the kernel function in question. - Renamed axpyv and dotv kernel files to use "opt" instead of "int" substring, for consistency with level-1f kernels. commit 7912af5db45b7372d19a9a3dfeb82df302a05628 Author: Field G. Van Zee Date: Mon Apr 11 17:32:13 2016 -0500 CHANGELOG update (0.2.0) commit 898614a555ea0aa7de4ca07bb3cb8f5708b6a002 (tag: 0.2.0) Author: Field G. Van Zee Date: Mon Apr 11 17:32:09 2016 -0500 Version file update (0.2.0) commit 537a1f4f85ce1aa008901857cb3182e6b4546d7f Author: Field G. Van Zee Date: Mon Apr 11 17:21:28 2016 -0500 Implemented runtime contexts and reorganized code. Details: - Retrofitted a new data structure, known as a context, into virtually all internal APIs for computational operations in BLIS. The structure is now present within the type-aware APIs, as well as many supporting utility functions that require information stored in the context. User- level object APIs were unaffected and continue to be "context-free," however, these APIs were duplicated/mirrored so that "context-aware" APIs now also exist, differentiated with an "_ex" suffix (for "expert"). These new context-aware object APIs (along with the lower-level, type- aware, BLAS-like APIs) contain the the address of a context as a last parameter, after all other operands. Contexts, or specifically, cntx_t object pointers, are passed all the way down the function stack into the kernels and allow the code at any level to query information about the runtime, such as kernel addresses and blocksizes, in a thread- friendly manner--that is, one that allows thread-safety, even if the original source of the information stored in the context changes at run-time; see next bullet for more on this "original source" of info). (Special thanks go to Lee Killough for suggesting the use of this kind of data structure in discussions that transpired during the early planning stages of BLIS, and also for suggesting such a perfectly appropriate name.) - Added a new API, in frame/base/bli_gks.c, to define a "global kernel structure" (gks). This data structure and API will allow the caller to initialize a context with the kernel addresses, blocksizes, and other information associated with the currently active kernel configuration. The currently active kernel configuration within the gks cannot be changed (for now), and is initialized with the traditional cpp macros that define kernel function names, blocksizes, and the like. However, in the future, the gks API will be expanded to allow runtime management of kernels and runtime parameters. The most obvious application of this new infrastructure is the runtime detection of hardware (and the implied selection of appropriate kernels). With contexts in place, kernels may even be "hot swapped" at runtime within the gks. Once execution enters a level-3 _front() function, the memory allocator will be reinitialized on-the-fly, if necessary, to accommodate the new kernels' blocksizes. If another application thread is executing with another (previously loaded) kernel, it will finish in a deterministic fashion because its kernel information was loaded into its context before computation began, and also because the blocks it checked out from the internal memory pools will be unaffected by the newer threads' reinitialization of the allocator. - Reorganized and streamlined the 'ind' directory, which contains much of the code enabling use of induced methods for complex domain matrix multiplication; deprecated bli_bsv_query.c and bli_ukr_query.c, as those APIs' functionality is now mostly subsumed within the global kernel structure. - Updated bli_pool.c to define a new function, bli_pool_reinit_if(), that will reinitialize a memory pool if the necessary pool block size has increased. - Updated bli_mem.c to use bli_pool_reinit_if() instead of bli_pool_reinit() in the definition of bli_mem_pool_init(), and placed usage of contexts where appropriate to communicate cache and register blocksizes to bli_mem_compute_pool_block_sizes(). - Simplified control trees now that much of the information resides in the context and/or the global kernel structure: - Removed blocksize object pointers (blksz_t*) fields from all control tree node definitions and replaced them with blocksize id (bszid_t) values instead, which may be passed into a context query routine in order to extract the corresponding blocksize from the given context. - Removed micro-kernel function pointers (func_t*) fields from all control tree node definitions. Now, any code that needs these function pointers can query them from the local context, as identified by a level-3 micro-kernel id (l3ukr_t), level-1f kernel id, (l1fkr_t), or level-1v kernel id (l1vkr_t). - Removed blksz_t object creation and initialization, as well as kernel function object creation and initialization, from all operation- specific control tree initialization files (bli_*_cntl.c), since this information will now live in the gks and, secondarily, in the context. - Removed blocksize multiples from blksz_t objects. Now, we track blocksize multiples for each blocksize id (bszid_t) in the context object. - Removed the bool_t's that were required when a func_t was initialized. These bools are meant to allow one to track the micro-kernel's storage preferences (by rows or columns). This preference is now tracked separately within the gks and contexts. - Merged and reorganized many separate-but-related functions into single files. This reorganization affects frame/0, 1, 1d, 1m, 1f, 2, 3, and util directories, but has the most obvious effect of allowing BLIS to compile noticeably faster. - Reorganized execution paths for level-1v, -1d, -1m, and -2 operations in an attempt to reduce overhead for memory-bound operations. This includes removal of default use of object-based variants for level-2 operations. Now, by default, level-2 operations will directly call a low-level (non-object based) loop over a level-1v or -1f kernel. - Converted many common query functions in blk_blksz.c (renamed from bli_blocksize.c) and bli_func.c into cpp macros, now defined in their respective header files. - Defined bli_mbool.c API to create and query "multi-bools", or heterogeneous bool_t's (one for each floating-point datatype), in the same spirit as blksz_t and func_t. - Introduced two key parameters of the hardware: BLIS_SIMD_NUM_REGISTERS and BLIS_SIMD_SIZE. These values are needed in order to compute a third new parameter, which may be set indirectly via the aforementioned macros or directly: BLIS_STACK_BUF_MAX_SIZE. This value is used to statically allocate memory in macro-kernels and the induced methods' virtual kernels to be used as temporary space to hold a single micro-tile. These values are now output by the testsuite. The default value of BLIS_STACK_BUF_MAX_SIZE is computed as "2 * BLIS_SIMD_NUM_REGISTERS * BLIS_SIMD_SIZE". - Cleaned up top-level 'kernels' directory (for example, renaming the embarrassingly misleading "avx" and "avx2" directories to "sandybridge" and "haswell," respectively, and gave more consistent and meaningful names to many kernel files (as well as updating their interfaces to conform to the new context-aware kernel APIs). - Updated the testsuite to query blocksizes from a locally-initialized context for test modules that need those values: axpyf, dotxf, dotxaxpyf, gemm_ukr, gemmtrsm_ukr, and trsm_ukr. - Reformatted many function signatures into a standard format that will more easily facilitate future API-wide changes. - Updated many "mxn" level-0 macros (ie: those used to inline double loops for level-1m-like operations on small matrices) in frame/include/level0 to use more obscure local variable names in an effort to avoid variable shaddowing. (Thanks to Devin Matthews for pointing these gcc warnings, which are only output using -Wshadow.) - Added a conj argument to setm, so that its interface now mirrors that of scalm. The semantic meaning of the conj argument is to optionally allow implicit conjugation of the scalar prior to being populated into the object. - Deprecated all type-aware mixed domain and mixed precision APIs. Note that this does not preclude supporting mixed types via the object APIs, where it produces absolutely zero API code bloat. commit dd856c2cb75a2221a503a73dde27790c34b91570 Author: Devin Matthews Date: Mon Apr 11 10:39:18 2016 -0500 Translated MIC kernel to KNL and cleaned up a bit. Only real change is lack of swizzle modifiers for FMA instructions (used bcast from memory instead). commit 7f27431d3fffdda99c282ec412731d0a90cb32a7 Author: Devin Matthews Date: Fri Apr 8 10:04:39 2016 -0500 Copy mic kernel to knl for transliteration. commit f8f02f0334ac020021e15a415bcd33aeea01deb4 Merge: 32c92d94 d1f8e5d9 Author: Devin Matthews Date: Wed Apr 6 11:37:05 2016 -0500 Merge branch 'master' into const_correctness commit 32c92d945c55708da0eb63be1771f8c5430e3910 Merge: 62914ccb 20af937b Author: Devin Matthews Date: Wed Apr 6 11:36:02 2016 -0500 Merge branch 'master' into const_correctness commit d1f8e5d9b2ecd054ed103f4d642d748db2d4f173 Merge: 20af937b c11d28ee Author: Field G. Van Zee Date: Tue Apr 5 12:21:27 2016 -0500 Merge pull request #60 from esauvage/master sgemm µkernel for bulldozer : bug correction for k%4 != 0 commit c11d28eed89d65494bc4019f04d046520866c0ff Author: Etienne Sauvage Date: Sat Apr 2 21:15:48 2016 +0200 cgemm µkernel for bulldozer : bug correction for k%4 != 0 commit 20af937b57f82bb3acb09418d5c0206e1b24f2c7 Merge: 36c3abb0 fc61a114 Author: Field G. Van Zee Date: Thu Mar 31 14:37:30 2016 -0500 Merge pull request #59 from devinamatthews/fix_testsuite_makefile Fix testsuite makefile commit fc61a1143edeba4946d4b9915f1775bb08e643fc Author: Devin Matthews Date: Thu Mar 31 10:53:01 2016 -0500 Fix formatting in configure. commit 26379b14de630e3a6c6eef5dfe87ff001558a8a6 Author: Devin Matthews Date: Thu Mar 31 10:45:48 2016 -0500 Adjust paths in common.mk to support building from testsuite dir. commit 36c3abb05fecb02d4a9ab13b2b69d133adf34583 Merge: 64b41fa5 917ce754 Author: Field G. Van Zee Date: Thu Mar 31 10:26:17 2016 -0500 Merge pull request #58 from esauvage/master cgemm & zgemm micro-kernels for FMA4 instruction set (bulldozer confi… commit 356d854fc9e34642cc46e0e02a8ceb56114878af Author: Devin Matthews Date: Wed Mar 30 16:33:15 2016 -0500 Make symlink to common.mk in build directory. commit edbb8470044f82ef959583ee09613a5a985292b5 Author: Devin Matthews Date: Wed Mar 30 16:27:11 2016 -0500 Refactor out some definitions which moved from make_defs.mk to Makefile for use in testsuite Makefile. commit 917ce75482a543fef46553efff6c246939761e59 Author: Etienne Sauvage Date: Wed Mar 30 22:03:09 2016 +0200 cgemm & zgemm micro-kernels for FMA4 instruction set (bulldozer configuration), based on x86_64/avx micro-kernel commit 62914ccbcdb3c594f065dcfa65bd7e7b95c79283 Merge: bbf704bf 64b41fa5 Author: Devin Matthews Date: Tue Mar 29 15:24:25 2016 -0500 Merge branch 'master' into const_correctness commit 64b41fa554dff44b2f9ad48901b67c63836407a8 Merge: 1b09e343 0171ad58 Author: Field G. Van Zee Date: Tue Mar 29 15:19:41 2016 -0500 Merge pull request #54 from devinamatthews/more_config_opts More config opts commit 1b09e343dfe5b48b4842e2cb96f41c8cc249bad0 Author: Field G. Van Zee Date: Tue Mar 29 12:55:28 2016 -0500 Updated gcc version from 4.8 to 4.9 in .travis.yml. commit 0171ad58997b3a5a9b76301511dbe0751fffc940 Author: Devin Matthews Date: Mon Mar 28 13:55:06 2016 -0500 Add icc and clang support for Intel architectures, fixes #47. 2bd036f fixes #49 BTW. commit 3090fff64cc87ff2519a09f38e6b8699cf3cba11 Merge: 8624e365 4ca5d5b1 Author: Field G. Van Zee Date: Mon Mar 28 12:36:25 2016 -0500 Merge pull request #44 from esauvage/master sgemm micro-kernel for FMA4 instruction set commit e6e566426ac3ded7ef87cd8ff9be98accfdc4acc Merge: 469429ec 8624e365 Author: Devin Matthews Date: Sat Mar 26 14:10:15 2016 -0500 Merge branch 'master' into more_config_opts commit 8624e36543160739d954c4dbcc5a5594458f3a12 Merge: a315833f 2bd036f1 Author: Field G. Van Zee Date: Sat Mar 26 13:56:28 2016 -0500 Merge pull request #50 from devinamatthews/fix_noopt_avx Fix configuration issue where instruction set flags are not specified for debug builds. commit 469429ec34e5b1a172ce35596f9c7afdaacac131 Author: Devin Matthews Date: Fri Mar 25 20:45:41 2016 -0500 Fix LD_FLAGS -> LDFLAGS. commit 8442d65c9ead0376fc5f2dfad62fd4862ab9b2b3 Author: Devin Matthews Date: Fri Mar 25 20:06:48 2016 -0500 Replace -march=native with specific architecture flags to support cross-compiling, and add icc support for Intel architectures. commit 76099f20be1b49ac960f7e3c5a8296bbf4e1782d Author: Devin Matthews Date: Fri Mar 25 17:22:58 2016 -0500 Add threading option to configure. commit ad43eab4c7899d56d8d7caa6e2d92bc0581ea5a5 Merge: 9452bdb3 2bd036f1 Author: Devin Matthews Date: Fri Mar 25 15:00:02 2016 -0500 Merge branch 'fix_noopt_avx' into more_config_opts commit 9452bdb3afbf2d7f898134a091d7790817e7be9c Author: Devin Matthews Date: Fri Mar 25 14:59:50 2016 -0500 Add options for verbose make output and static/shared linking to configure. commit 2bd036f1f9ce1ee0864365557f66d9415dd42de3 Author: Devin Matthews Date: Fri Mar 25 12:16:49 2016 -0500 Fix configuration issue where instruction set flags are not specified for debug builds. commit bbf704bf7501411964a63a68f1af541f612cf92d Author: Devin Matthews Date: Fri Mar 25 09:55:35 2016 -0500 Add missing const to bli_read_nway_from_env. commit a315833f067944fb0bc14cf60f0c7dcb5dc897b6 Merge: 1d1a426d af92773f Author: Field G. Van Zee Date: Thu Mar 24 12:30:21 2016 -0500 Merge pull request #48 from figual/master Updated and improved ARMv8 micro-kernels. commit af92773f4f85a2441fe0c6e3a52c31b07253d08e Author: figual Date: Wed Mar 23 22:07:02 2016 +0100 Updated and improved ARMv8 micro-kernels. commit a4d7729776d17d9bdf2341eacd70b9770b9ba8d2 Author: Devin Matthews Date: Mon Mar 21 09:55:21 2016 -0500 Set default value for debug_type variable. commit 0e2447fa55d8c5fa2b1fc4150073512495c5f9eb Author: Devin Matthews Date: Thu Mar 17 16:32:05 2016 -0500 Add const correctness to auxinfo_t struct (microkernels need update theoretically). commit 1d1a426d18ec03754021456862a1f4d1dfec1fbf Merge: 5a978fff d226dfa0 Author: Field G. Van Zee Date: Mon Mar 7 15:17:53 2016 -0600 Merge pull request #46 from devinamatthews/new-config-opts Add several changes to the build system. commit d226dfa05190eb477b33563b1edccf8603973336 Author: Devin Matthews Date: Sat Mar 5 16:18:14 2016 -0600 Add several changes to the build system. 1) Add -- options. 2) Add -d/--enable-debug option to enable debugging symbols with and without optimization. 3) Allow user to specify CC at configure time, and determine vendor (gcc/icc/etc.). For now configurations enforce a particular vendor. 4) Add make V=[0,1] option to control build verbosity. commit 5a978fffdb8f09a81c89541d541d4a6830cd70a4 Merge: adb2b4e0 63e26423 Author: Field G. Van Zee Date: Fri Mar 4 17:26:58 2016 -0600 Merge pull request #45 from devinamatthews/high_prec_timers Use clock_gettime(CLOCK_MONOTONIC) and mach_absolute_time instead of gettimeofday commit 63e264239053b913164a849dd8a45829087eaddc Author: Devin Matthews Date: Fri Mar 4 13:17:50 2016 -0600 Make sure that -lrt is linked on Linux. commit 44fddd48dc1708a956803d1948f04429ec0d8700 Author: Devin Matthews Date: Fri Mar 4 12:36:38 2016 -0600 Add missing \. commit 7cabd2131f953de23e7015d760b0ddfda51b1251 Author: Devin Matthews Date: Thu Mar 3 11:43:07 2016 -0600 Use clock_gettime(CLOCK_MONOTONIC) and mach_absolute_time instead of gettimeofday. commit adb2b4e096c78e8b2f85fd372cf0d5eb04af5be8 Author: Tyler Smith Date: Wed Mar 2 14:48:12 2016 -0600 Fixing guard for non implemented partitioning through packed matrices commit 4ca5d5b1fd6f2e4a8b2e139c5405475239581e51 Author: Etienne Sauvage Date: Tue Mar 1 21:33:01 2016 +0100 sgemm micro-kernel for FMA4 instruction set (bulldozer configuration), based on x86_64/avx micro-kernel commit 627d59b5ba06866b26f46e4434a0435b600925e3 Author: Etienne Sauvage Date: Mon Feb 29 21:53:12 2016 +0100 symbolic link for bulldozer configuration to kernels commit 2dc5c0ae038ed175fab85751803ada05734d1ba1 Merge: f2809fc5 3d0fae81 Author: Field G. Van Zee Date: Mon Feb 29 12:22:51 2016 -0600 Merge pull request #40 from tkelman/bulldozer-symlink Add symlink from config/bulldozer/kernels to kernels/x86_64/bulldozer commit f2809fc5f74466c755da6a5b4632853e634060b5 Merge: f86b94f2 8624a33c Author: Field G. Van Zee Date: Sat Feb 27 13:06:03 2016 -0600 Merge pull request #39 from devinamatthews/fix_f2c_conflicts Devin's f2c type namespace update. Details: - Added "bla_" prefix to f2c type names to prevent conflicts with external user code. - Removed most of the body of bli_f2c.h, which was unused. commit 3d0fae810d942085d8f2d389820b4e0027577db8 Author: Tony Kelman Date: Thu Feb 25 23:24:03 2016 -0800 Add symlink from config/bulldozer/kernels to kernels/x86_64/bulldozer to fix linking issue mentioned in #37 and https://groups.google.com/forum/#!topic/blis-devel/iypwljcaeEI commit 8624a33ccc12dff6f6c4f92992ca5636af1576a6 Author: Devin Matthews Date: Thu Feb 25 13:51:26 2016 -0600 Fix remaining f2c conflicts. commit 372eef0b6c0a535bf88d4b46b72f61266e8491ba Author: Devin Matthews Date: Thu Feb 25 12:01:58 2016 -0600 Fixed most conflicts after hack-n-slash ofr bli_f2c.h, cleanup in progress. commit f86b94f206e2e09fa3221cc55c3dc5b05ca4775a Author: Field G. Van Zee Date: Tue Feb 23 18:12:34 2016 -0600 Included missing blas2blis integer def to CBLAS. Details: - Added #include "bli_config_macro_defs" to all cblas_*.c files in compat/cblas/src. This has the effect of defining BLIS_BLAS2BLIS_INT_TYPE_SIZE to the default value if bli_config.h does not define it. Thanks to Tony Kelman for reporting this bug. - In cblas_i?amax.c, changed the type of the variable 'iamax' from 'int' to 'f77_int'. This eliminates a compiler warning and a potential runtime bug and/or crash when the size of an int differs from the size of f77_int (as determined by BLIS_BLAS2BLIS_INT_TYPE_SIZE). commit 0b126de1342c11c65623bcb38e258e21e9244e3d Author: Field G. Van Zee Date: Fri Nov 13 16:29:12 2015 -0600 Consolidated packm_blk_var1 and packm_blk_var2. Details: - Consolidated the two blocked variants for packm into a single implementation (packm_blk_var1) and removed the other variant. - Updated all induced method _cntl_init() functions in frame/cntl/ind/ to use the new blocked variant 1. - Defined two new macros, bli_is_ind_packed() and bli_is_nat_packed(), to detect pack_t schemas for induced methods and native execution, respectively. commit 30e5eb29e060b97752f702d2ea5d101d950f53b2 Author: Field G. Van Zee Date: Fri Nov 13 12:14:19 2015 -0600 Minor changes to treatment of rs, cs in bli_obj.c. Details: - Applied a patch submitted by Devin Matthews that: - implements subtle changes to handling of somewhat unusual cases of row and column strides to accommodate certail tensor cases, which includes adding dimension parameters to _is_col_tilted() and _is_row_tilted() macros, - simplifies how buffers are sized when requested BLIS-allocated objects, - re-consolidates bli_adjust_strides_*() into one function, and - defines 'restrict' keyword as a "nothing" macro for C++ and pre-C99 environments. commit f0a4f41b5acf55b41707ec821c4c5f9076dfbc24 Author: Field G. Van Zee Date: Thu Nov 12 15:22:50 2015 -0600 Fixed unimplemented case in core2 sgemm ukernel. Details: - Implemented the "beta == 0" case for general stride output for the dunnington sgemm micro-kernel. This case had been, up until now, identical to the "beta != 0" case, which does not work when the output matrix has nan's and inf's. It had manifested as nan residuals in the test suite for right-side tests of ctrsm4m1a. Thanks to Devin Matthews for reporting this bug. commit 42810bbfa0b8f006ecc5128d903909ec13ea63f9 Author: Field G. Van Zee Date: Thu Nov 12 12:07:46 2015 -0600 Fixed minor bugs for uncommon obj_create cases. Details: - Separated bli_adjust_strides() into _alloc() and _attach() flavors so that the latter can avoid a test performed by the former, in which the rs and cs are overridden and set to zero if either matrix dimension is zero. Actually, we also disable this overridding behavior, even for the _alloc() case, since keeping the original strides (probably) does not hurt anything. The original code has been kept commented-out, though, in case an unintended consequence is later discovered. - Fixed a typo in an error check for general stride cases where rs == cs. commit 3e6dd11467643fbc2cb45c13cec8dd6024232833 Author: Field G. Van Zee Date: Tue Nov 3 10:30:08 2015 -0600 Minor re-expression in quadratic partitioning code. Details: - Minor change to quadratic equation solution code that avoids recomputation of the sqrt() parameter when the compiler is not smart enough to perform this optimization automatically. commit 0694b722f7e4df00efb32639095a2aca80e67f52 Merge: 3e116f0a 33557ecc Author: Field G. Van Zee Date: Mon Nov 2 17:24:25 2015 -0600 Merge branch 'master' of github.com:flame/blis commit 3e116f0a2953f50b3c068759a775ad7ffae04e49 Author: Field G. Van Zee Date: Mon Nov 2 17:18:23 2015 -0600 Fixed imaginary bug in quadratic partitioning code. Details: - Fixed a bug in the relatively new quadratic partitioning code that, under the right conditions, would perform sqrt() on a negative value. If the solution is imaginary, we discard it and use an alternate partition width that assumes no diagonal intersection. That alternate width is actually already computed, so, the fix was quite simple. Thanks to Devangi Parikh for reporting this bug. commit 33557ecccaf49b2569b7f3d7bcea52c2aab94c68 Author: Jeff Hammond Date: Mon Nov 2 12:18:43 2015 -0800 add Travis CI build status icon to the README commit 4a502fbe77bd0f701108baaa559d9cfb483f88de Author: Field G. Van Zee Date: Mon Nov 2 13:28:34 2015 -0600 Laid groundwork for runtime memory pool resizing. Details: - Changed bli_pool_finalize() so that the freeing begins with the block at top_index instead of block 0. This allows us to use the function for terminal finalization as well as temporary cleanup prior to reinitialization. Also, clear the pool_t struct upon _pool_finalize() in case it is called in the terminal case with some blocks still checked out to threads (in which case the threads will see the new block size as 0 and thus release the block as intended). - Added bli_pool_reinit(), which calls _pool_finalize() followed by _pool_init() with new parameters. - Added bli_mem_reinit(), which is based on bli_pool_reinit(). - Added new wrapper, _mem_compute_pool_block_sizes(), which calls _mem_compute_pool_block_sizes_dt(). - Updated bli_mem_release() so that the pblk_t is freed, via _pool_free_block(), if the block size recorded in the mem_t at the time the pblk_t was acquired is now different from the value in the pool_t. commit 37e55ca39bdbddaec03ad30d43e8ad2b3e549c96 Author: Field G. Van Zee Date: Fri Oct 30 18:25:04 2015 -0500 Fixed obscure 3m1/4m1a bugs in trmm[3] and trsm. Details: - Fixed a family of bugs in the triangular level-3 operations for certain complex implementations (3m1 and 4m1a) that only manifest if one of the register blocksizes (PACKMR/PACKNR, actually) is odd: - Fixed incorrect imaginary stride computation in bli_packm_blk_var2() for the triangular case. - Fixed the incorrect computation of imaginary stride, as stored in the auxinfo_t struct in trmm and trsm macro-kernels. - Fixed incorrect pointer arithmetic in the trsm macro-kernels in the cases where the the register blocksize for the triangular matrix is odd. Introduced a new byte-granular pointer arithmetic macro, bli_ptr_add(), that computes the correct value. - Added cpp macro to bli_macro_defs.h for typeof() operator, defined in terms of __typeof__, which is used by bli_ptr_add() macro. - Disabled the row- vs. column-storage optimization in bli_trmm_front() for singleton problems because the inherent ambiguity of whether a scalar is row-stored or column-stored causes the wrong parameter combination code to be executed (by dumb luck of our checking for row storage first). - Added commented-out debugging lines to 3m1/4m1a and reference micro-kernels, and trsm_ll macro-kernel. commit 46294d80e5a79c598e200e1c8ec2a642ff839971 Merge: d3159c57 a0a7b85a Author: Field G. Van Zee Date: Tue Oct 27 12:41:23 2015 -0500 Merge pull request #35 from figual/master Fixed incomplete code in the double precision ARMv8 microkernel. commit a0a7b85ac3e157af53cff8db0e008f4a3f90372c Author: Francisco Igual Date: Tue Oct 27 08:59:15 2015 +0000 Fixed incomplete code in the double precision ARMv8 microkernel. commit d3159c5740c9ee7f8c0b661003aab6f00646ad6f Merge: b489152e 7e03e45b Author: Field G. Van Zee Date: Wed Oct 21 14:54:00 2015 -0500 Merge branch 'master' of github.com:flame/blis commit b489152e112644ec3b6d19e687231a9607f7694f Author: Field G. Van Zee Date: Wed Oct 21 14:53:17 2015 -0500 Use vzeroall in haswell micro-kernels. commit 7e03e45bfe6c27c4fdbf06b1caa7f49e9a5fef49 Merge: 77ddb0b1 4f88c29f Author: Field G. Van Zee Date: Wed Oct 14 13:26:07 2015 -0500 Merge pull request #33 from xianyi/master Enable Travis CI commit 4f88c29f9e634cbb6fb22d8c88931f0ec78ad7db Author: Zhang Xianyi Date: Wed Oct 14 12:57:50 2015 -0500 Detect Intel Broadwell (using Haswell config). commit 4b0ac1a9984a93f7ad4369b10fca63991107d9f5 Merge: fe3e355c 77ddb0b1 Author: Zhang Xianyi Date: Wed Oct 14 12:51:05 2015 -0500 Merge branch 'upstream_master' commit 77ddb0b1d31ada111dadf392766ba6d9210ed9fb Author: Field G. Van Zee Date: Tue Oct 13 12:53:06 2015 -0500 Removed flop-counting mechanism. Details: - Removed the optional flop-counting feature introduced in commit 7574c994. commit 276da366187460a4c8e6e0910e79cb39ce780bfe Author: Field G. Van Zee Date: Mon Oct 12 11:43:03 2015 -0500 Minor formatting change to README.md. commit d17057446f5404824478e8a6cd08f242ab75544a Author: Field G. Van Zee Date: Mon Oct 12 11:39:49 2015 -0500 Added "Getting Started" section to README.md. Details: - Added section to README.md file containing links to wikis with brief descriptions. commit e7e1f2f7b601b21b50e3cdad8972cb3fe11018d3 Author: Field G. Van Zee Date: Fri Oct 2 16:51:52 2015 -0500 Minor updates to CREDITS, README files. commit 55329906ecd7ce1ab910e4d30a29354a9172e7ea Author: Field G. Van Zee Date: Sat Sep 26 20:47:19 2015 -0500 Minor edits to README.md, testsuite. Details: - Fixed typos in README.md. - Fixed column heading alignment for testsuite when matlab output is enabled. - Minor updates to test/3m4m/runme.sh and test/3m4m/Makefile. commit bbebdb5793a8fd6aaf257012ab0272beaa04a0de Author: Field G. Van Zee Date: Fri Sep 25 14:47:27 2015 -0500 Replaced README with README.md. Details: - Replaced the old (and short) README file with a much more comprehensive version written in github-flavored markdown. The new file is based on content taken from the old Google Code homepage. commit e2e9d64a63485461192d9c2a6dd0183a8b71013c Author: Field G. Van Zee Date: Thu Sep 24 12:14:03 2015 -0500 Load balance thread ranges for arbitrary diagonals. Details: - Expanded/updated interface for bli_get_range_weighted() and bli_get_range() so that the direction of movement is specified in the function name (e.g. bli_get_range_l2r(), bli_get_range_weighted_t2b()) and also so that the object being partitioned is passed instead of an uplo parameter. Updated invocations in level-3 blocked variants, as appropriate. - (Re)implemented bli_get_range_*() and bli_get_range_weighted_*() to carefully take into account the location of the diagonal when computing ranges so that the area of each subpartition (which, in all present level-3 operations, is proportional to the amount of computation engendered) is as equal as possible. - Added calls to a new class of routines to all non-gemm level-3 blocked variants: bli__prune_unref_mparts_[mnk]() where is herk, trmm, or trsm and [mnk] is chosen based on which dimension is being partitioned. These routines call a more basic routine, bli_prune_unref_mparts(), to prune unreferenced/unstored regions from matrices and simultaneously adjust other matrices which share the same dimension accordingly. - Simplified herk_blk_var2f, trmm_blk_var1f/b as a result of more the new pruning routines. - Fixed incorrect blocking factors passed into bli_get_range_*() in bli_trsm_blk_var[12][fb].c - Added a new test driver in test/thread_ranges that can exercise the new bli_get_range_*() and bli_get_range_weighted_*() under a range of conditions. - Reimplemented m and n fields of obj_t as elements in a "dim" array field so that dimensions could be queried via index constant (e.g. BLIS_M, BLIS_N). Adjusted/added query and modification macros accordingly. - Defined mdim_t type to enumerate BLIS_M and BLIS_N indexing values. - Added bli_round() macro, which calls C math library function round(), and bli_round_to_mult(), which rounds a value to the nearest multiple of some other value. - Added miscellaneous pruning- and mdim_t-related macros. - Renamed bli_obj_row_offset(), bli_obj_col_offset() macros to bli_obj_row_off(), bli_obj_col_off(). commit fe3e355c9c5a6f65b8736b009e2d501b62a83ea1 Merge: efa641e3 4dd9dd3e Author: Zhang Xianyi Date: Fri Aug 21 14:38:36 2015 -0500 Merge branch 'upstream_master' commit efa641e36b73abee34166a252e90e28a6281d92d Author: Zhang Xianyi Date: Sat Aug 22 03:15:50 2015 +0800 Try to fix the compiling bug on travis. commit 4dd9dd3e1de626b51bfe85d9ee65f193d60e8d38 Author: Field G. Van Zee Date: Fri Aug 21 11:52:37 2015 -0500 Fixed minor alignment ambiguity bug in bli_pool.c. Details: - Fixed a typecasting ambiguity in bli_pool_alloc_block() in which pointer arithmetic was performed on a void* as if it were a byte pointer (such as char*). Some compilers may have already been interpreting this situation as intended, despite the sloppiness. Thanks to Aleksei Rechinskii for reporting this issue. - Redefined pointer alignment macros to typecast to uintptr_t instead of siz_t. commit 12ffd568b04feda57147c13b67717416a01c82f8 Author: Zhang Xianyi Date: Sat Aug 22 00:24:28 2015 +0800 Add Travis CI. commit ecc3ebb749e0861c27deda52b5f87236ede4901b Author: Field G. Van Zee Date: Wed Jul 29 13:31:12 2015 -0500 CHANGELOG update (0.1.8) commit 47caa33485b91ea6f2a5e386e61210c90c5f489f (tag: 0.1.8) Author: Field G. Van Zee Date: Wed Jul 29 13:31:09 2015 -0500 Version file update (0.1.8) commit ef0fbbbdb6148b96938733fce72cb4ed7dad685e Merge: fdfe14f1 d4b89136 Author: Field G. Van Zee Date: Thu Jul 9 13:54:54 2015 -0500 Merge branch 'master' of github.com:flame/blis commit fdfe14f1e17ba5a2f8dfa0bdb799c6b0e730211b Author: Field G. Van Zee Date: Thu Jul 9 13:52:39 2015 -0500 Added support for Intel Haswell/Broadwell. Details: - Added sgemm and dgemm micro-kernels, which employ 256-bit AVX vectors and FMA instructions. (Complex support is currently provided by default induced method, 4m1a.) - Added a 'haswell' configuration, which uses the aforementioned kernels. - Inserted auto-detection support for haswell configuration in build/auto-detect/cpuid_x86.c. - Modified configure script to explicitly echo when automatic or manual configuration is in progress. - Changed beta scalar in test_gemm.c module of test suite to -1.0 to 0.9. commit d4b891369c1eb0879ade662ff896a5b9a7fca207 Author: Field G. Van Zee Date: Tue Jul 7 10:06:53 2015 -0500 Added 'carrizo' configuration. Details: - Added a new configuration for AMD Excavator-based hardware also known as Carrizo when referring to the entire APU. This configuration uses the same micro-kernels as the piledriver, but with different cache blocksizes. commit 0b7255a642d56723f02d7ca1f8f21809967b8515 Author: Field G. Van Zee Date: Fri Jun 19 12:01:50 2015 -0500 CHANGELOG update (0.1.7) commit 267253de8a7be546ce87626443ee38701c1d411f (tag: 0.1.7) Author: Field G. Van Zee Date: Fri Jun 19 12:01:49 2015 -0500 Version file update (0.1.7) commit 7cd01b71b5e757a6774625b3c9f427f5e7664a76 Author: Field G. Van Zee Date: Fri Jun 19 11:31:53 2015 -0500 Implemented dynamic allocation for packing buffers. Details: - Replaced the old memory allocator, which was based on statically- allocated arrays, with one based on a new internal pool_t type, which, combined with a new bli_pool_*() API, provides a new abstract data type that implements the same memory pool functionality but with blocks from the heap (ie: malloc() or equivalent). Hiding the details of the pool in a separate API also allows for a much simpler bli_mem.c family of functions. - Added a new internal header, bli_config_macro_defs.h, which enables sane defaults for the values previously found in bli_config. Those values can be overridden by #defining them in bli_config.h the same way kernel defaults can be overridden in bli_kernel.h. This file most resembles what was previously a typical configuration's bli_config.h. - Added a new configuration macro, BLIS_POOL_ADDR_ALIGN_SIZE, which defaults to BLIS_PAGE_SIZE, to specify the alignment of individual blocks in the memory pool. Also added a corresponding query routine to the bli_info API. - Deprecated (once again) the micro-panel alignment feature. Upon further reflection, it seems that the goal of more predictable L1 cache replacement behavior is outweighed by the harm caused by non-contiguous micro-panels when k % kc != 0. I honestly don't think anyone will even miss this feature. - Changed bli_ukr_get_funcs() and bli_ukr_get_ref_funcs() to call bli_cntl_init() instead of bli_init(). - Removed query functions from bli_info.c that are no longer applicable given the dynamic memory allocator. - Removed unnecessary definitions from configurations' bli_config.h files, which are now pleasantly sparse. - Fixed incorrect flop counts in addv, subv, scal2v, scal2m testsuite modules. Thanks to Devangi Parikh for pointing out these miscalculations. - Comment, whitespace changes. commit 9848f255a3bab17d1139c391cca13ff3f1ffe6ed Author: Field G. Van Zee Date: Thu Jun 11 19:14:22 2015 -0500 Added early return to API-level _init() routines. Details: - Added conditional code that returns early from the API-level _init() routines if the API is already initialized. Actually meant for this to be included in 5f93cbe8. commit 5f93cbe870f3478870e15581e7fd450dad5bba1e Author: Field G. Van Zee Date: Thu Jun 11 18:52:12 2015 -0500 Introduced API-level initialization. Details: - Added API-level initialization state to _const, _error, _mem, _thread, _ind, and _cntl APIs. While this functionality will mostly go unused, adding miniscule overhead at init-time, there will be at least once instance in the near future where, in order to avoid an infinite loop, a certain portion of the initialization will call a query function that itself attempts to call bli_init(). API-level initialization will allow this later stage to verify that an earlier stage of initialization has completed, even if the overall call to bli_init() has not yet returned. - Added _is_initialized() functions for each API, setting the underlying bool_t during _init() and unsetting it during _finalize(). - Comment, whitespace changes. commit ee129c6b028bc5ac88da7c74fde72c49803742ff Author: Field G. Van Zee Date: Wed Jun 10 12:53:28 2015 -0500 Fixed bugs in _get_range(), _get_range_weighted(). Details: - Fixed some bugs that only manifested in multithreaded instances of some (non-gemm) level-3 operations. The bugs were related to invalid allocation of "edge" cases to thread subpartitions. (Here, we define an "edge" case to be one where the dimension being partitioned for parallelism is not a whole multiple of whatever register blocksize is needed in that dimension.) In BLIS, we always require edge cases to be part of the bottom, right, or bottom-right subpartitions. (This is so that zero-padding only has to happen at the bottom, right, or bottom-right edges of micro-panels.) The previous implementations of bli_get_range() and _get_range_weighted() did not adhere to this implicit policy and thus produced bad ranges for some combinations of operation, parameter cases, problem sizes, and n-way parallelism. - As part of the above fix, the functions bli_get_range() and _get_range_weighted() have been renamed to use _l2r, _r2l, _t2b, and _b2t suffixes, similar to the partitioning functions. This is an easy way to make sure that the variants are calling the right version of each function. The function signatures have also been changed slightly. - Comment/whitespace updates. - Removed unnecessary '/' from macros in bli_obj_macro_defs.h. commit 9135dfd69d39f3bbd75034f479f27a78dbfebcce Author: Field G. Van Zee Date: Fri Jun 5 13:37:44 2015 -0500 Minor updates to test/3m4m files. commit d62ceece943b20537ec4dd99f25136b9ba2ae340 Author: Field G. Van Zee Date: Wed Jun 3 12:56:45 2015 -0500 Minor update to test/3m4m/runme.sh. Details: - Removed some stale script code that should have been removed during 590bb3b8c. commit b6ee82a3d421c9c4f1eb6848c7c6e37aa46de799 Author: Field G. Van Zee Date: Wed Jun 3 12:14:23 2015 -0500 Minor cleanup to bli_init() and friends. Details: - Spun-off initialization of global scalar constants to bli_const_init() and of threading stuff to bli_thread_init(). - Added some missing _finalize() functions, even when there is nothing to do. commit 1213f5cebabc1637ce9dd45c4bfa87bb93677c29 Author: Field G. Van Zee Date: Tue Jun 2 13:27:47 2015 -0500 POSIX thread bugfixes/edits to bli_init.c, _mem.c. Details: - Fixed a sort-of bug in bli_init.c whereby the wrong pthread mutex was used to lock access to initialization/finalization actions. But everything worked out okay as long as bli_init() was called by single-threaded code. - Changed to static initialization for memory allocator mutex in bli_mem.c, and moved mutex to that file (from bli_init.c). - Fixed some type mismatches in bli_threading_pthreads.c that resulted in compiler warnings. - Fixed a small memory leak with allocated-but-never-freed (and unused) pthread_attr_t objects. - Whitespace changes to bli_init.c and bli_mem.c. commit 590bb3b8c5c0389159c5a9451b6c156c5f237e8a Author: Field G. Van Zee Date: Sun May 24 16:02:53 2015 -0500 Backed-out adjusted dim changes to test/3m4m. Details: - Reverted most changes applied during commit ec25807b. commit ec25807b26da943868f0d0517c3720e50181b8f9 Author: Field G. Van Zee Date: Fri Apr 10 13:23:50 2015 -0500 Tweaks to test/3m4m to test with adjusted dims. Details: - Updated test/3m4m driver files to build test drivers that allow comparision of real "asm_blis" results to complex "asm_blis" results, except with the latter's problem sizes adjusted so that problems are generated with equal flop counts. commit 426b6488580a92bf071a62dc319a9c837ce39821 Author: Field G. Van Zee Date: Wed Apr 8 15:12:21 2015 -0500 Fixed a packing bug that manifested in trsm_r. Details: - Fixed a bug that caused a memory leak in the contiguous memory allocator. Because packm_init() was using simple aliasing when a subpartition object was marked as zeros by bli_acquire_mpart_*(), the "destination" pack object's mem_t entry was being overwritten by the corresponding field of the "source" object (which was likely NULL). This prevented the block from being released back to the memory allocator. But this bug only manifested when changing the location of packing B from outside the var1 loop to inside the var3 loop, and only for trsm with triangular B (side = right). The bug was fixed by changing the type of alias used in packm_init() when handling zero partition cases. Specifically, we now use bli_obj_alias_for_packing(), which does not clobber the destination (pack) object's mem_t field. Thanks to Devangi Parikh for this bug report. commit c84286d5cef48f16d83831baac1f46b9856b9a36 Author: Field G. Van Zee Date: Sat Apr 4 15:39:14 2015 -0500 More minor tweaks to test/3m4m. Details: - Added a line of output that forces matlab to allocate the entire array up-front. - Re-enabled real domain benchmarks in runme.sh, which were temporarily disabled. commit 309717c8ebf4ef1369f15cf41340e13c25b41573 Author: Field G. Van Zee Date: Fri Apr 3 19:28:49 2015 -0500 More tweaks to test/3m4m, configurations. Details: - Fixed incorrect number of mc_x_kc memory blocks in sandybridge/bli_config.h. - Enabled OpenMP multithreding in piledriver/bli_config.h. - More updates to test/3m4m driver files. commit 4baf3b9c69b2f648be9e46e07ccc9859dd675828 Author: Field G. Van Zee Date: Fri Apr 3 16:44:32 2015 -0500 Tweaked test/3m4m driver, including acml support. Details: - Added ACML support to test/3m4m driver Makefile and runme.sh script. commit a32f7c49ca4ea869d2a6c66818780f4321743d67 Merge: 349e075a 4bfd1ce8 Author: Field G. Van Zee Date: Fri Apr 3 08:28:11 2015 -0500 Merge pull request #23 from xianyi/master Add auto-detecting CPU on configure stage. commit 349e075ad6a8e2a1211d94f36d24828c9d44b052 Author: Field G. Van Zee Date: Thu Apr 2 18:12:28 2015 -0500 Tweaks to sandybridge config, test/3m4m driver. Details: - Enable OpenMP support by default in sandybridge's bli_config.h. - Reorganized sandybridge's bli_kernel.h. - Updated 3m4m Makefile, runme.sh to also test MKL implementation. commit 4bfd1ce8ca93f93d170dd2715f0a32027b417b46 Author: Zhang Xianyi Date: Thu Apr 2 16:40:21 2015 -0500 Detect NEON for cortex-a9 and cortex-a15. commit aa6eec4f43137057276fe6119bdbfb5c52682527 Author: Zhang Xianyi Date: Thu Apr 2 16:03:44 2015 -0500 Detect the CPU architecture. Support ARM cores. Detect the CPU architecture by compiler's predefined macros. Then, detect the CPU cores. Support detecting x86 and ARM architectures. commit 2947cfb749c937b0f62fac36cc92f123bd45b53c Author: Zhang Xianyi Date: Wed Apr 1 12:24:00 2015 -0500 Add auto-detecting CPU on configure stage. e.g. /Path_to_BLIS/configure auto Now, it only support detecting x86 CPUs. commit 26a4b8f6f985597f80e0174990bf541f1d9bafac Author: Field G. Van Zee Date: Wed Apr 1 10:44:54 2015 -0500 Implemented 3m2, 3m3 induced algorithms (gemm only). Details: - Defined a new "3ms" (separated 3m) pack schema and added appropriate support in packm_init(), packm_blk_var2(). - Generalized packm_struc_cxk_3mi to take the imaginary stride (is_p) as an argument instead of computing it locally. Exception: for trmm, is_p must be computed locally, since it changes for triangular packed matrices. Also exposed is_p in interface to dt-specific packm_blk_var2 (and _var1, even though it does not use imaginary stride). - Renamed many functions/variables from _3mi to _3mis to indicate that they work for either interleaved or separated 3m pack schemas. - Generalized gemm and herk macro-kernels to pass in imaginary stride rather than compute them locally. - Added support for 3m2 and 3m3 algorithms to frame/ind, including 3m2- and 3m3-specific virtual micro-kernels. - Added special gemm macro-kernels to support 3m2 and 3m3. - Added support for 3m2 and 3m3 to testsuite. - Corrected the type of the panel dimension (pd_) in various macro- kernels from inc_t to dim_t. - Renamed many functions defined in bli_blocksize.c. - Moved most induced-related macro defs from frame/include to frame/ind/include. - Updated the _ukernel.c files so that the micro-kernel function pointers are obtained from the func_t objects rather than the cpp macros that define the function names. - Updated test/3m4m driver, Makefile, and run script. commit ddf62ba7d2da08225b201585b85e06c967767dea Author: Tyler Smith Date: Fri Mar 27 14:27:51 2015 -0500 Refuse to free the packm thread info if it uses the single threaded version commit 016fc587584d958a0e430a56a5e2c05022ac2f17 Author: Tyler Smith Date: Fri Mar 27 14:23:02 2015 -0500 Don't free packm thread info if it is null commit 00a443c529a60862a57b93e303a0b3212c9b1df4 Author: Tyler Smith Date: Fri Mar 27 14:11:07 2015 -0500 Use bli_malloc instead of malloc for the thread info paths commit f1a6b7d02861ccebdc500ea98778cc0f6cddad17 Author: Field G. Van Zee Date: Wed Mar 18 15:37:10 2015 -0500 Reorganized code for induced complex methods. Details: - Consolidated most of the code relating to induced complex methods (e.g. 4mh, 4m1, 3mh, 3m1, etc.) into frame/ind. Induced methods are now enabled on a per-operation basis. The current "available" (enabled and implemented) implementation can then be queried on an operation basis. Micro-kernel func_t objects as well as blksz_t objects can also be queried in a similar maner. - Redefined several micro-kernel and operation-related functions in bli_info_*() API, in accordance with above changes. - Added mr and nr fields to blksz_t object, which point to the mr and nr blksz_t objects for each cache blocksize (and are NULL for register blocksizes). Renamed the sub-blocksize field "sub" to "mult" since it is really expressing a blocksize multiple. - Updated bli_*_determine_kc_[fb]() for gemm/hemm/symm, trmm, and trsm to correctly query mr and nr (for purposes of nudging kc). - Introduced an enumerated opid_t in bli_type_defs.h that uniquely identifies an operation. For now, only level-3 id values are defined, along with a generic, catch-all BLIS_NOID value. - Reworked testsuite so that all induced methods that are enabled are tested (one at a time) rather than only testing the first available method. - Reformated summary at the beginning of testsuite output so that blocksize and micro-kernel info is shown for each induced method that was requested (as well as native execution). - Reduced the number of columns needed to display non-matlab testsuite output (from approx. 90 to 80). commit 8d5169ccda954e5f72944308a036dcb7ebfc9097 Author: Field G. Van Zee Date: Wed Mar 18 11:38:08 2015 -0500 Fixed bug in release of mem_t buffer. Details: - Fixed a bug that affects all level-2 and level-3 blocked variants. The bug only manifested, however, if the packing of operands (A and B in gemm, for example) spanned multiple nodes in the control tree. Until recently, the main consumers of packm were level-3 operations, all of which packed both input operands from blocked variant 1 (B outside of the loop, and A within the loop). This particular usage masked a flaw in the code whereby bli_obj_release_pack() would always release the underlying mem_t buffer (provided it was allocated), even if the buffer was not allocated in the current variant. This has been fixed by replacing all calls to bli_obj_release_pack() with calls to a new function, bli_packm_release(), which takes the same control tree node argument passed into the object's corresponding call to packm_init() or packv_init(). bli_packm_release() then proceeds to invoke bli_obj_release_pack() only if the control tree node indicates that packing was requested. Thanks to Devangi Parikh for identifying this bug. commit c0acca0f5182ba96fd39c9d10b34a896a6e74206 Author: Field G. Van Zee Date: Tue Mar 3 10:56:22 2015 -0600 Clarified comments in testsuite input.operations. commit 03ba9a6b17861d9e1adc0cf924439c4d7e860d19 Author: Field G. Van Zee Date: Tue Feb 24 10:33:28 2015 -0600 Removed some 'old' directories. commit a86db60ee270cdeb745ae7cf68f9e0becc9f522d Author: Field G. Van Zee Date: Mon Feb 23 18:42:39 2015 -0600 Extensive renaming of 3m/4m-related files, symbols. Details: - Renamed all remaining 3m/4m packing files and symbols to 3mi/4mi ('i' for "interleaved"). Similar changes to 3M/4M macros. - Renamed all 3m/4m files and functions to 3m1/4m1. - Whitespace changes. commit 8cf8da291a0fb2f491f410969a76ec0fbda47faf Author: Field G. Van Zee Date: Fri Feb 20 15:24:27 2015 -0600 Minor updates to induced complex mode management. Details: - Relocated bli_4mh.c, bli_4mb.c, bli_4m.c, bli_3mh.c, bli_3m.c (and associated headers) from frame/base to frame/base/induced. - Added bli_xm.? to frame/base/induced, which implements bli_xm_is_enabled(), which detects whether ANY induced complex method is currently enabled. - The new function bli_xm_is_enabled() is now used in bli_info.c to detect when an induced complex method is used, so we know when to return blocksizes from one of the induced methods' blocksize objects. commit 411e637ee7d1083a84f58f08938d51e63d7c3c9a Merge: c2569b88 fc0b7712 Author: Tyler Michael Smith Date: Fri Feb 20 20:39:25 2015 -0600 Merge branch 'master' of http://github.com/flame/blis commit c2569b8803d4ccc1d7b6f391713461b51443601d Author: Tyler Michael Smith Date: Fri Feb 20 20:38:19 2015 -0600 Fixed a memory leak in freeing the thread infos commit fc0b771227abf86d81f505b324f69f6e83db1d8f Author: Field G. Van Zee Date: Fri Feb 20 11:47:44 2015 -0600 Added max(mr,nr) to kc in static mem pools. Details: - Changed the static memory definitions to compute the maximum register blocksize for each datatype and add it to kc when computing the size of blocks of A and B. This formally accounts for the nudging of kc up to a multiple of mr or nr at runtime for triangular operations (e.g. trmm). commit af32e3a608631953ef770341df10a14a991bf290 Author: Tyler Michael Smith Date: Thu Feb 19 22:51:11 2015 -0600 Fixed a bug with get_range_weighted would return end = 0 for small problem sizes commit 441d47542a64e131578d00da7404c1ed387a721c Author: Field G. Van Zee Date: Thu Feb 19 17:06:10 2015 -0600 Renamed 3m and 4m symbols/macros to 3mi and 4mi. Details: - Renamed several variables and macros from 3m/4m to 3mi/4mi. This is because those packing schemas were always implicitly "interleaved". This new naming scheme will make way for new schemas that separate instead of interleve the real and imaginary (and summed) parts. - Expanded the pack format sub-field of the pack schema field of the info_t to 4 bits (from 3). This will allow for more schema types going forward. - Removed old _cntl.c files for herk3m, herk4m, trmm3m, trmm4m. commit 518a1756ccf02122b96fc437b538604a597df42a Author: Field G. Van Zee Date: Thu Feb 19 14:27:09 2015 -0600 Fixed indexing bug for trmm3 via 3mh, 4mh. Details: - Fixed a bug that only affected trmm3 when performed via 3mh or 4mh, whereby micro-panels of the triangular matrix were packed with "dead space" between them due to failing to adjust for the fact that pointer arithmetic was occurring in units of complex elements while the data being packed consisted of real elements. It turns out that the macro- kernel suffered from the same bug, meaning the panels were actually being packed and read consistently. The only way I was able to discover the bug in the first place was because the packed block of A was overflowing into the beginning of the packed row panel of B using the sandybridge configuration. commit 493087d730f01d5169434f461644e5633f48a42f Merge: 650d2a6f 25021299 Author: Field G. Van Zee Date: Wed Feb 18 09:45:51 2015 -0600 Merge branch 'master' of github.com:flame/blis commit 25021299b670775df8ca9c87910c63d7e74ed946 Merge: fe2b8d39 f05a5763 Author: Field G. Van Zee Date: Wed Feb 11 20:03:21 2015 -0600 Merge branch 'master' of github.com:flame/blis commit fe2b8d39a445ac848686e78c7540fd046cb95492 Author: Field G. Van Zee Date: Wed Feb 11 19:33:10 2015 -0600 Fixed an obscure bug in 3mh/3m/4mh/4m packing. Details: - Modified bli_packm_blk_var1.c and _var2.c to increase the triangular case's panel increment by 1 if it would otherwise be odd. This is particularly necessary in _var2.c when handling the interleaved 3m or ro/io/rpi pack schemas, since division of an odd number by 2 can happen if both the panel length and the panel packing dimension (register packing blocksize) are odd, thus making their product odd. - Modified bli_packm_init.c so that panel strides are increased by 1 if they would otherwise be odd, even for non-3m related packing. - Modified the trmm and trsm macro-kernels so that triangular packed micro-panels are traversed with this new "increment by 1 if odd" policy. - Added sanity checks in trmm and trsm macro-kernels that would result in an abort() if the conditions that would lead to a "divide odd integer by 2" scenario ever manifest. - Defined bli_is_odd(), _is_even() macros in bli_scalar_macro_defs.h. commit 650d2a6ff2e593151a296ca86b5214afcc747afc Author: Field G. Van Zee Date: Mon Feb 9 14:59:20 2015 -0600 Added initial support for imaginary stride. Details: - Added an imaginary stride field ("is") to obj_t. - Renamed bli_obj_set_incs() macro to bli_obj_set_strides(). - Defined bli_obj_imag_stride() and bli_obj_set_imag_stride() and added invocations in key locations. - Added some basic error-checking related to imaginary stride. - For now, imaginary stride will not be exposed into the most-used BLIS APIs such as bli_obj_create(), and certainly not the computational APIs such as bli_dgemm(). commit f05a57634a7c8e3864b25b3335d1194c1ea1aeb9 Author: Field G. Van Zee Date: Sun Feb 8 19:40:34 2015 -0600 Defined gemm cntl function to query ukrs func_t. Details: - Added a new function, bli_gemm_cntl_ukrs(), that returns the func_t* for the gemm micro-kernels from the leaf node of the control tree. This allows all the func_t* fields from higher-level nodes in the tree to be NULL, which makes the function that builds the control trees slightly easier to read. - Call bli_gemm_cntl_ukrs() instead of the cntl_gemm_ukrs() macro in all bli_*_front() functions (which is needed to apply the row/column preference optimization). - In all level-3 bli_*_cntl_init() functions, changed the _obj_create() function arguments corresponding to the gemm_ukrs fields in higher- level cntl tree nodes to NULL. - Removed some old her2k macro-kernels. commit cefd3d5d2001264de17cf63dae541f890cb9daaf Author: Tyler Smith Date: Thu Feb 5 11:09:12 2015 -0600 A couple of functions were incorrectly ifdeffed away on Xeon Phi. Fixed this commit 7574c9947d57a19f613880e3b9f62f8c8f6df4ec Author: Field G. Van Zee Date: Wed Feb 4 12:11:55 2015 -0600 Added basic flop-counting mechanism (level-3 only). Details: - Added optional flop counting to all level-3 front-ends, which is enabled via BLIS_ENABLE_FLOP_COUNT. The flop count can be reset at any time via bli_flop_count_reset() and queried via bli_flop_count(). Caveats: - flop counts are approximate for her[2]k, syr[2]k, trmm, and trsm operations; - flop counts ignore extra flops due to non-unit alpha; - flop counts do not account for situations where beta is zero. commit ceda4f27d1f1bcf19320e09848e0f2e3b9941e6c Author: Field G. Van Zee Date: Thu Jan 29 13:22:54 2015 -0600 Implemented bli_obj_imag_equals(). Details: - Implemented a new function, bli_obj_imag_equals(), which compares the imaginary part of the first argument to the second argument, which may be a BLIS_CONSTANT or of a regular real datatype. commit 81114824a05a9053229efd577a8a94a856deda93 Author: Field G. Van Zee Date: Tue Jan 6 12:15:21 2015 -0600 Minor 4m/3m consolidation to mem_pool_macro_defs.h. Details: - Merged the 4m and 3m definitions in bli_mem_pool_macro_defs.h to reduce code and improve readability. commit 36a9b7b7436d9423ba4de2a9f85cfcd43577b783 Author: Tyler Michael Smith Date: Wed Dec 17 21:53:50 2014 +0000 reduced the default number of MC by KC blocks for bgq commit c60619c7c3568f044a849abbab60209aa7455423 Author: Field G. Van Zee Date: Tue Dec 16 17:08:22 2014 -0600 Minor tweaks for 3m4m test drivers. Details: - Changed gemm_kc blocksizes to be reduced by two-thirds instead of half. - Changed 3m4m/test_gemm.c driver to divide by 3 instead of 2 when computing the fixed k dimension. - Fixed runme.sh so that it would use multiple threads for s/dgemm cases. commit c6929ba6a5e6f633a7295e979a2b8df8c7ecdb1b Author: Field G. Van Zee Date: Tue Dec 16 11:27:50 2014 -0600 Added 4m_1b to test/3m4m test driver and script. commit 785d480805fc0d6f4251b5499933515740b6b2a7 Merge: 9456f330 4156c088 Author: Field G. Van Zee Date: Fri Dec 12 14:34:19 2014 -0600 Merge branch 'master' of github.com:flame/blis commit 9456f330af4617f9ee32972d51f974aa2d84f97b Author: Field G. Van Zee Date: Fri Dec 12 14:31:57 2014 -0600 Added 4m_1b implementation for gemm. Details: - Added yet another 4m-based implementation for complex domain level-3 operations. This method, which the 3m/4m paper identifies as Algorithm "4m_1b" fissures the first loop around the micro-kernel so that the real sub-panel of the current micro-panel of B is multiplied against (both sub-panels of) all micro-panels of A, before doing the same for the imaginary sub-panel of the micro-panel of B. For now, only gemm is supported, and 4m_1b (labeled "4mb" within the framework) is not yet integrated into the test suite. commit 4156c0880d9aea4ff04a9c4fa139ba8c437d8bfb Author: Field G. Van Zee Date: Tue Dec 9 16:03:14 2014 -0600 Fixed obscure level-2 packing / general stride bug. Details: - Fixed a bug in certain structured level-2 operations that manifested only when the structured matrix was provided to BLIS as matrix stored with general stride. The bug was introduced in c472993b when the densify field was removed from the packm control tree node and associated APIs. Since then, the packed object was unconditionally marked with an uplo field of BLIS_DENSE. This is fine for level-3 operations where micro-panels are always densified, but in level-2 contexts, the underlying unblocked variant (fused or unfused) of structured operations (e.g. trmv) still needs to know whether to execute its "lower" or "upper" branches of code. Since this field was unconditionally being set to BLIS_DENSE, the unblocked variants were always executed the "else" branch, which happened to be the "lower" case code. Thus, running an upper case produced the wrong answer. This most obviously manifested in the form of failures for trmm, trmm3, and trsm in the test suite. The bug was fixed by setting the packed object's uplo field to BLIS_DENSE only if the schema indicated that micro-panels were to be packed. Otherwise, we can assume we are packing to regular row or column storage, as is the case with level-2 packing. Thanks to Francisco Igual for reporting the testsuite failures and ultimately leading us to this bug. commit 689f60a578b461119e9ea90c74f642b9eb79addb Merge: bef24e67 483e4d6a Author: Field G. Van Zee Date: Sun Dec 7 14:03:30 2014 -0600 Merge pull request #21 from figual/master Adding armv8a configuration and micro-kernels. commit 483e4d6a3fdbef9d9ab47fb674c9476c70ca9f0f Author: Francisco D. Igual Date: Sun Dec 7 20:27:49 2014 +0100 Adding armv8a configuration and micro-kernels. Only sgemm micro-kernel is fully functional at this point. commit bef24e67e0f93579c2a80315348dc2e227f72a72 Author: Tyler Smith Date: Wed Nov 26 18:00:56 2014 -0600 Fixed a type of race condition exposed by pthreads implementation. Lead thread of the inner thread communicator could exit subproblem, move on the next iteration of the loop and modify a1_pack, b1_pack, or c1_pack while other threads were still using those. Barriers were inserted to fix this. commit 76bde44411f0e34266bab9d666a54ef22be97320 Merge: e56e6143 f3d729e5 Author: Field G. Van Zee Date: Wed Nov 26 17:25:24 2014 -0600 Merge branch 'master' of github.com:flame/blis commit f3d729e504ec012e7dc7e02b2ecd42e004c6894d Author: Tyler Michael Smith Date: Wed Nov 26 22:25:24 2014 -0600 Added static mutex to bli_init and bli_finalize commit d71cc797866ff502ad1127527016f463267eef80 Author: Tyler Michael Smith Date: Wed Nov 26 21:35:39 2014 -0600 Refactored bli_threading files and added support for pthreads commit e56e61438ff7fcf25a48c0b7603f18df782b50b6 Author: Field G. Van Zee Date: Wed Nov 26 17:20:35 2014 -0600 Minor cleanups to bli_threading.h and friends. Details: - No longer need to define BLIS_ENABLE_MULTITHREADING manually in bli_config.h; it now gets defined when BLIS_ENABLE_OPENMP or BLIS_ENABLE_PTHREADS is defined. - Added sanity check to prevent both BLIS__ENABLE_OPENMP and BLIS_ENABLE_PTHREADS from being enabled simultaneously. - Reorganization of bli_threading*.h header files, which led to simplification of threading-related part of blis.h. - added "-fopenmp -lpthread" to LDFLAGS of sandybridge make_defs.mk file. commit 3be2744cbe2c56d38c23fd818aa5c1f10cc7ea51 Author: Field G. Van Zee Date: Fri Nov 21 12:28:08 2014 -0600 Update to template gemm ukernel comments. Details: - Updated comments on alignment of a1 and b1 to match wiki. commit 994429c6881b2ade92d9d7949bcaebfbf2cc65eb Merge: 58796abd 694029d9 Author: Field G. Van Zee Date: Thu Nov 20 13:55:35 2014 -0600 Merge pull request #20 from TimmyLiu/master #define PASTEF773 required by cblas compatibility layer commit 694029d9d7db857d642ab536955c0621791108c8 Author: Timmy Date: Wed Nov 19 15:25:14 2014 -0600 #define PASTEF773 required by cblas compatiility layer commit 58796abda66b133346f8d523b39178afc336351f Author: Field G. Van Zee Date: Thu Nov 6 14:31:52 2014 -0600 Removed KC constraint comments from _kernel.h files. Details: - Since 4674ca8c, the constraint that KC be a multiple of both MR and NR have been relaxed, and thus it was time to remove the comments from the top of the bli_kernel.h files of all configurations. commit 7bbc95a54f706d43c7f7951f0e5995f86130cd52 Author: Field G. Van Zee Date: Wed Oct 29 10:52:23 2014 -0500 Added new piledriver micro-kernels. Details: - Added new micro-kernels for the AMD piledriver architecture (one for each datatype). - Updates and tweaks to piledriver configuration. - Added 3xk packm micro-kernel support. - Explicitly unrolled some of the smaller packm micro-kernels. - Added notes to avx/sandybridge and piledriver micro-kernel files acknowledging the influence of the corresponding kernel code in OpenBLAS. commit 59613f1d5500f6279963327db2fbc84bc9135183 Author: Field G. Van Zee Date: Thu Oct 23 17:21:37 2014 -0500 Added separeate micro-panel alignment for A and B. Details: - Changed the recently-added micro-panel alignment macros so that we now have two sets--one for micro-panels of matrix A and one for micro- panels of matrix B: BLIS_UPANEL_[AB]_ALIGN_SIZE_?. - Store each set of alignment values into a separate blksz_t object in bli_gemm_cntl_init(). - Adjusted packm_init() to use the separate alignment values. - Added query routines for the new alignment values to bli_info.c. - Modified test suite output accordingly. commit a8e12884ee1fddd3fd77ca5a68aa0cb857f3af57 Author: Field G. Van Zee Date: Thu Oct 23 11:35:48 2014 -0500 CHANGELOG update (0.1.6) commit 38ea5022e4ed846112198c4e1672fcdaeb90dc71 (tag: 0.1.6) Author: Field G. Van Zee Date: Thu Oct 23 11:35:45 2014 -0500 Version file update (0.1.6) commit a3e6341bdb0e28411f935d6b4708a6389663e004 Author: Field G. Van Zee Date: Thu Oct 23 11:13:28 2014 -0500 Factored common code from blocksize functions. Details: - Split bli_determine_blocksize_[fb]() into two functions each, the newer ones ending with the _sub suffix. These new sub-functions are now called from bli_[gemm|trmm|trsm]_determine_kc_[fb](), which eliminates redundant code and will allow any future tweaks to the core sub-functions to automatically be inherited by the operation- specific versions. commit 4674ca8cffb58331ff7edf23bbe0e3f6a7558489 Author: Field G. Van Zee Date: Thu Oct 23 10:50:59 2014 -0500 Extended newly relaxed KC to hemm, symm. Details: - These changes were intended for the previous commit. - Defined bli_gemm_determine_kc_[fb]() and bli_gemm_determine_kc_[fb](), which determine blocksizes for gemm-based operations, taking special care to "nudge" the kc dimension up to a multiple of MR or NR for hemm and symm operations, as needed. - Changed bli_gemm_blk_var3f.c to call bli_gemm_determine_kc_f(). instead of bli_determine_blocksize_f(). - Comment updates to bli_trmm_blocksize.c, bli_trsm_blocksize.c. commit ab954ba6f874eaca7b001804491f866ef6b9b327 Author: Field G. Van Zee Date: Wed Oct 22 17:21:58 2014 -0500 Relaxed constraint that KC be multiple of MR, NR. Details: - Relaxed a long-held requirement in register blocksizes that required the kernel programmer to choose a KC that was divisible by both MR and NR. This was very constraining on some architectures that did not use register blocksizes that were powers of two. The constraint is now enforced only for trmm and trsm, where it is needed, and it is now handled by "nudging" kc upward at runtime, if necessary, to be a multiple of MR or NR, as needed. - Defined bli_trmm_determine_kc_[fb]() and bli_trsm_determine_kc_[fb](), which determine blocksizes for trmm and trsm, taking special care to "nudge" the kc dimension up to a multiple of MR or NR, as needed. - Changed bli_trmm_blk_var3[fb].c to call bli_trmm_determine_kc_[fb]() instead of bli_determine_blocksize_[fb](). - Added safeguard to bli_align_dim_to_mult() that returns the dimension unmodified if the dimension multiple is zero (to avoid division by zero). - Removed cpp guard/check for KC % MR == 0 and KC % NR == 0 from bli_kernel_macro_defs.h. - Whitespace, variable name changes to bli_blocksize.c. - Removed old commented code from bli_gemm_cntl.c. commit 95cdae65d6b88e043ee14bcd53cd2e800d7aecb4 Author: Tyler Smith Date: Wed Oct 22 16:30:16 2014 -0500 Fixed bug in KNC microkernel where k=0 and beta != 1 commit e64dba5633fc49b768b5edc7762f2b5d8a4d0588 Author: Field G. Van Zee Date: Mon Oct 20 19:23:06 2014 -0500 Re-implemented micro-panel alignment. Details: - This commit re-implements a feature that was removed in commit c2b2ab62. It was removed because, at the time, I wasn't sure how the micro-panel alignment feature would interact with the 4m method (when applied at the micro-kernrel level), and so it seemed safer to disable the feature entirely rather than allow possible breakage. This commit revisits the issue and safely re-implements the feature in a way that is compatible with 4m, 3m, 4mh, and 3mh (and native execution). - Modified the static memory pool to account for micro-panel alignment space. - Modified packm_init and blocked variants to align whole micro-panels by a datatype-specific alignment value that may be set by the configuration. (If it is not set by the configuration, it will default to BLIS_SIZEOF_?.) - Modified macro-kernels so that: - storage stride is handled properly given the new micro-panel alignment behavior; - indexing through 3m/4m/rih-type sub-panels, as is done by trmm and trsm, is more robust (e.g. will work if the applicable packing register blocksize is odd); - imaginary strides are computed and stored within auxinfo_t structs, which allows the virtual micro-kernels to more easily determine how to index into the micro-panel operands. - Modified virtual 3m and 4m micro-kernels to use the imaginary strides within the auxinfo_t structs instead of panel strides. - Deprecated the panel stride fields from the auxinfo_t structs. - Updated test suite to print out the micro-panel alignment values. commit add16b0e5402924301e7078e4ca5e3ef725bff0b Author: Field G. Van Zee Date: Fri Oct 17 11:49:24 2014 -0500 Added 3m4m test driver subdir of 'test'. Details: - Added a modified test driver for [cz]gemm that will test all 3m/4m as well as assembly-based and OpenBLAS implementations of gemm in single and multithreaded modes. commit e171504a72406c61a173241d8bccf0a5ceb10582 Author: Field G. Van Zee Date: Fri Oct 17 11:25:59 2014 -0500 Use correct definition of bli_is_last_iter(). Details: - As intended for previous commit, the new definition of bli_is_last_iter() is now disabled in favor of the old definition. commit 0d954087b2b55d2f5f3c5e57d702b318ca2300f6 Author: Field G. Van Zee Date: Fri Oct 17 11:19:34 2014 -0500 Minor changes and fixes. Details: - Redefined bli_is_last_iter() to take thread_id and num_thread arguments, which allows the macro to correctly compute whether a given iteration is the last that the thread will compute in that particular loop. The new definition, however, remains disabled (commented out) until someone can look at this more closely, as the new definition seems to actually hurt performance slightly. - Whitespace and related updates to level-3 macro-kernels. - Updated test suite so that performance results in the hundreds of gigaflops does not disrupt the column alignment of the output. commit d1e86e1876e433f54b501ec5a005b4ba7c5ce4e6 Author: Field G. Van Zee Date: Sun Oct 12 13:43:47 2014 -0500 More minor tweaks to sandybridge/avx micro-kernel. Details: - Re-enabled use of b_next for dgemm and cgemm micro-kernels. commit 7b6fe4cae57cb22c09c1a97595e1a201a02cbcd2 Author: Field G. Van Zee Date: Sun Oct 12 12:01:51 2014 -0500 Minor tweaks to sandybridge/avx micro-kernels. Details: - Changed the MC blocksize for zgemm micro-kernel from 128 to 64. - Removed usage of b_next in all x86_64/avx gemm micro-kernels. commit a6a156e9feec47154e7a0fd43bcc006b1fc04aba Author: Field G. Van Zee Date: Fri Oct 10 14:26:41 2014 -0500 Added cgemm ukernel for avx/sandybridge. Details: - Implemented AVX-based cgemm micro-kernel (via GNU extended inline assembly syntax). - Updated sandybridge configuration accordingly. commit 6f8575ab2580e167a022293b76ddf0514f71b613 Author: Field G. Van Zee Date: Fri Oct 10 10:01:45 2014 -0500 Added zgemm ukernel for avx/sandybridge. Details: - Implemented AVX-based zgemm micro-kernel (via GNU extended inline assembly syntax). - Updated sandybridge configuration accordingly. commit 23ce7ee542a12ca40b4b6090ad2558d180e16d37 Merge: 99fd9a39 7a8ad47f Author: Field G. Van Zee Date: Thu Oct 9 16:41:22 2014 -0500 Merge branch 'master' of github.com:flame/blis commit 99fd9a39718cb7281f6fb23f9fef7cca4fe514f4 Author: Field G. Van Zee Date: Thu Oct 9 16:38:04 2014 -0500 Fixed two minor bugs. Details: - Fixed a bug in the test suite for the trsm_ukr and gemmtrsm_ukr test modules whereby the uplo bits of some packed matrix objects were not being set properly, resulting in false FAILURE results for those tests. Thanks to Tyler Smith for bringing this issue to my attention. - Fixed a bug in bli_obj_alloc_buffer() that caused an unnecessary "not yet implemented" abort() when creating a 1x1 object with non-unit strides. commit 7a8ad47fb2d100a9da93aa8cab774fcceeaab733 Author: Tyler Smith Date: Wed Oct 8 15:52:13 2014 -0500 Minor changes to knc configuration, including preference row major storage Also fixed a bug in the knc micro-kernel where it would fail if k == 0 commit 76b7c34af0c09f47d9615b18857a356acddc788a Author: Field G. Van Zee Date: Thu Oct 2 14:15:38 2014 -0500 Fixed a bug in the pack schema-related bit macros. Details: - Expanded the BLIS_PACK_SCHEMA_BITS value in bli_type_defs.h to include all six bits presently used in the pack schema bitfield of the info field of obj_t structs. Prior to this commit, the macro constant only included the lowest five bits, which excluded the "is or is not packed" bit. This manifested as a strange bug in probably many level-2 codes that invoked packing, though we only observed it in ger before fixing. Thanks to Devin Matthews for finding and reporting this bug. commit a5763e332226598d70c47dfa9cad4578e15ef5f4 Author: Field G. Van Zee Date: Thu Oct 2 13:28:17 2014 -0500 Added extra output to bli_obj_print(). Details: - Print extra values from info field of obj_t struct within bli_obj_print(). commit 9bba209fc44fbfce943ba6a51cd8278a0cb6b159 Author: Tyler Smith Date: Mon Sep 29 14:56:36 2014 -0500 Fixed bug when packing anywhere besides in blk_var_1 for gemm. commit 614a4afc9272adb47e5a8b83b39d56c2804d95d6 Merge: b541b667 4a7df04e Author: Tyler Smith Date: Fri Sep 26 10:49:57 2014 -0500 Merge branch 'master' of http://github.com/flame/blis commit 4a7df04e8a4ffdb9561d26426afd35e4fe15b013 Author: Field G. Van Zee Date: Mon Sep 22 16:06:15 2014 -0500 Added 30xk support for packm ukernels. Details: - Updated bli_kernel_*_macro_defs.h headers to include default definitions for 30xk packm kernels. - Extended function pointer arrays in bli_packm_cxk_*() out to 31 and included 30xk kernels. - Addex 30xk kernels to frame/1m/packm/ukernels/bli_packm_ref_cxk_*.c. commit b6d4bd792e0d44ce4b28afef343f5ff3ba89c285 Author: Field G. Van Zee Date: Mon Sep 22 16:02:37 2014 -0500 Fixed missing tabs from Makefile patch. commit 32630f9b6f0d5ba28d5b56dae4c7288a37158743 Author: Field G. Van Zee Date: Fri Sep 19 17:18:20 2014 -0500 Comment update to virtual micro-kernels. commit 13447cffead7c6d137a7a3ccbf9e552ed0477467 Author: Field G. Van Zee Date: Fri Sep 19 13:00:48 2014 -0500 Minor bugfix to top-level Makefile. Details: - Applied a patch that allows the top-level Makefile to work on certain systems. The patch simply separates out the source-to-object code generation rules for .c and .S files into two separate rules. Thanks to Devin Matthews for submitting this patch. commit e80a4537846416719c067ae08a53aeda978c572d Author: Field G. Van Zee Date: Thu Sep 18 10:24:20 2014 -0500 Fixed bug introduced by bugfix in 25b258d. Details: - We actually need to check alignment of lda*sizeof(double) and NOT a+lda because in the latter case, alignment could cancel out and still allow the optimized code to run when it shouldn't. Thanks to Devin for pointing this out. commit 25b258d61f9c8cee64e922f4131784b6edb196dd Author: Field G. Van Zee Date: Thu Sep 18 10:10:49 2014 -0500 Fixed a non-fatal problem with bugfix in a68b316c. Details: - The bugfix in a68b316c was inadvertantly checkin alignment of the leading dimension itself, rather than the byte size of the leading dimension. Now, we simply check alignment of a+lda. commit 96302d4fc81363410e41c3a3c43a65df44d97ad9 Author: Field G. Van Zee Date: Thu Sep 18 09:43:40 2014 -0500 Renamed bli_info_get_*_ukr_type() functions. Details: - Added _string() suffix to bli_info_get_*_ukr_type() function names. This makes them consistent with the bli_info_get_*_impl_string() functions. commit a68b316ca4852509f84ed50e01afac486bf70f58 Author: Field G. Van Zee Date: Wed Sep 17 11:10:07 2014 -0500 Fixed alignment bugs in level-1f kernels. Details: - Fixed bugs whereby the level-1f dotxf, axpyxf, and dotxaxpyf kernels were attempting to compute problems with unaligned leading dimensions with optimized code, rather than (correctly) using the reference implementations. Thanks to Devin Matthews for reporting this bug. commit 870761eb902e4866090d1d3446a345df3d6d4599 Merge: e9899be0 a2b59a37 Author: Field G. Van Zee Date: Tue Sep 16 18:20:49 2014 -0500 Merge branch 'master' of github.com:flame/blis commit e9899be09044829e23386bd73e394f1dd7778210 Author: Field G. Van Zee Date: Tue Sep 16 18:19:32 2014 -0500 Added high-level implementations of 4m, 3m. Details: - Added "4mh" and "3mh" APIs, which implement the 4m and 3m methods at high levels, respectively. APIs for trmm and trsm were NOT added due to the fact that these approaches are inherently incompatible with implementing 4m or 3m at high levels (because the input right-hand side matrix is overwritten). - Added 4mh, 3mh virtual micro-kernels, and updated the existing 4m and 3m so that all are stylistically consistent. - Added new "rih" packing kernels (both low-level and structure-aware) to support both 4mh and 3mh. - Defined new pack_t schemas to support real-only, imaginary-only, and real+imaginary packing formats. - Added various level0 scalar macros to support the rih packm kernels. - Minor tweaks to trmm macro-kernels to facilitate 4mh and 3mh. - Added the ability to enable/disable 4mh, 3m, and 3mh, and adjusted level-3 front-ends to check enabledness of 3mh, 3m, 4mh, and 4m (in that order) and execute the first one that is enabled, or the native implementation if none are enabled. - Added implementation query functions for each level-3 operation so that the user can query a string that describes the implementation that is currently enabled. - Updated test suite to output implementation types for reach level-3 operation, as well as micro-kernel types for each of the five micro- kernels. - Renamed BLIS_ENABLE_?COMPLEX_VIA_4M macros to _ENABLE_VIRTUAL_?COMPLEX. - Fixed an obscure bug when packing Hermitian matrices (regular packing type) whereby the diagonal elements of the packed micro-panels could get tainted if the source matrix's imaginary diagonal part contained garbage. commit a2b59a37f166f70a6dd5793db2530823ef590c2b Author: Tyler Smith Date: Mon Sep 15 10:44:44 2014 -0500 Fixed make defs so that they actually compile for bulldozer commit 86fc7e40764f78ec217f50216ef4fa5b57dbfbc7 Author: Tyler Smith Date: Mon Sep 15 10:35:46 2014 -0500 Added bulldozer configuration and updated piledriver micro-kernel commit 0644e61a79a57f136be5f4c47b9099cff2af06e0 Author: Field G. Van Zee Date: Thu Sep 11 12:55:34 2014 -0500 Minor updates to bli_packm_init.c. commit 9dc9b44a057a08e20ad4d423344f0ecad54c1eb2 Author: Field G. Van Zee Date: Thu Sep 11 12:03:28 2014 -0500 Renamed bli_obj_pack_status() to _pack_schema(). Details: - Renamed the bli_obj_pack_status() macro to bli_obj_pack_schema() in order to help avoid confusion as to what the macro returns. commit cf5efdde0588a0d5b6ea57fe7d7be5000be06f8e Author: Field G. Van Zee Date: Thu Sep 11 11:47:56 2014 -0500 Pass pack_t schemas into ukernels via auxinfo_t. Details: - Modified macro-kernels to pass the pack_t schema values for matrices A and B into the datatype-specific functions, where they are now inserted into a newly-expanded auxinfo_t struct. This gives gives the micro-kernels access to the pack_t schema values embedded in the control trees, which determine the precise format into which the matrix elements are packed. - Updated a call to bli_packm_init_pack() in src/test_libblis.c to remove densify argument. Meant to include this in commit c472993b. commit cc8d2b82775cca3c2d51bf427f4e77c8024a6d15 Author: Field G. Van Zee Date: Tue Sep 9 13:48:22 2014 -0500 Updated old test drivers in 'test'. commit c472993bbccb69e9ffc409c79b742426c8ad2ad4 Author: Field G. Van Zee Date: Tue Sep 9 13:42:04 2014 -0500 Removed densify argument to packm_cntl_obj_create(). Details: - Removed the "densify" bool_t argument to bli_packm_cntl_obj_create(). This argument was inserted very early in BLIS's development, when it was anticipated that the developer may sometimes wish to pack a Hermitian, symmetric, or triangular matrix without making it dense. But as it turns out, if we are packing a matrix, we always want to make it dense in some way or another due to the fact that the micro- kernel only multiplies dense micro-panels. Thus, unless/until there is a real need for the feature, it seems reasonable to remove it from the packm_cntl API. commit 5c43ee387146cd76dc59b730dac6683a8446b834 Author: Field G. Van Zee Date: Mon Sep 8 15:19:29 2014 -0500 Moved trmm4m/3m_cntl files to 'old' directory. Details: - Meant to include this in previous commit. commit 7b2f469d5465ed73b1ca88124bc9a1987388aa27 Author: Field G. Van Zee Date: Mon Sep 8 14:49:50 2014 -0500 Retired trmm_t control tree definitions, usage. Details: - Replaced all trmm_t control tree instances and usage with that of gemm_t. This change is similar to the recent retirement of the herk_t control tree. - Tweaked packm blocked variants so that the triangular code does NOT assume that k is a multiple of MR (when A is triangular) or NR (when B is triangular). This means that bottom-right micro-panels packed for trmm will have different zero-padding when k is not already a multiple of the relevant register blocksize. While this creates a seemingly arbitrary and unnecessary distinction between trmm and trsm packing, it actually allows trmm to be handled with one control tree, instead of one for left and one for right side cases. Furthermore, since only one tree is required, it can now be handled by the gemm tree, and thus the trmm control tree definitions can be disposed of entirely. - Tweaked trmm macro-kernels so that they do NOT inflate k up to a multiple of MR (when A is triangular) or NR (when B is triangular). - Misc. tweaks and cleanups to bli_packm_struc_cxk_4m.c and _3m.c, some of which are to facilitate above-mentioned changes whereby k is no longer required to be a multiple of register blocksize when packing triangular micro-panels. - Adjusted trmm3 according to above changes. - Retired trmm_t control tree creation/initialization functions. commit 576e9e9255a79dba9cd3c804267f51e0b4aa6e8a Author: Field G. Van Zee Date: Sun Sep 7 16:12:52 2014 -0500 Retired herk_t control tree definitions, usage. Details: - Replaced all herk_t control tree instances and usage with that of gemm_t, since the two types presently have the same fields. This means that herk, her2k, syrk, and syr2k can simply use the gemm control tree as-is, just as hemm and symm have been doing for some time now. - Retired herk_t control tree creation/initialization functions. - Retired many _target.c and .h files into 'old' directories. commit b2fed052c9a23d858ef0afbe220b342bce9aa7f7 Author: Field G. Van Zee Date: Wed Sep 3 17:07:25 2014 -0500 Minor code cleanup to bli_packm_struc_cxk*.c Details: - Realized that we don't need to track rs_p11 and cs_p11 for Hermitian/symmetric case of bli_packm_struc_cxk*(). They are always equal to rs_p and cs_p. commit 023ce770966b3b5a98bba729c5af1f45e15ebb97 Author: Field G. Van Zee Date: Wed Sep 3 10:47:53 2014 -0500 Minor update to packm_cxk kernels. Details: - Changed m and n dimension parameter names to panel_dim and panel_len, respectively, in packm_cxk, packm_cxk_3m, packm_cxk_4m kernel wrapper functions. This makes the code a little easier to read since "m" and "n" have connotations that are not applicable here. - Comment updates. commit 189def3667d9218adbeec45e2801fd074341a679 Author: Field G. Van Zee Date: Mon Sep 1 16:23:17 2014 -0500 Retired portions of bli_kernel_3m/4m_macro_defs.h. Details: - Removed sections of bli_kernel_[4m|3m]_macro_defs.h that defined 4m/3m-specific blocksizes after realizing that this can be done in bli_gemm[4m|3m]_cntl.c, since that is (mostly) the only place they are used. - The maximum cache values for 4m/3m are stll needed when computing mem pool dimensions in bli_mem_pool_macro_defs.h. As a workaround, "local" definitions in terms of the regular cache blocksizes are now in place. - Similarly, the register blocksizes for 4m/3m are still needed in bli_kernel_post_macro_defs.h. As a workaround, "local" definitions in terms of the regular register blocksizes are now in place. commit af521ee6f2a77d61c98b833e85c09969987bc00d Author: Field G. Van Zee Date: Mon Sep 1 14:06:46 2014 -0500 Changed semantics of blocksize extensions. Details: - Changed semantics of cache and register blocksize extensions so that the extended values are tracked, rather than just the marginal extensions. - BLIS_EXTEND_[MKN]C_? has been renamed BLIS_MAXIMUM_[MKN]C_?. - BLIS_EXTEND_[MKN]R_? has been renamed BLIS_PACKDIM_[MKN]R_?. - bli_blksz_ext_*() APIs have been renamed to bli_blksz_max_*(). Note that these "max" query routines grab the maximum value for cache blocksizes and the packdim value for register blocksizes. - bli_info_*() API has been updated accordingly. - All configurations have been updated accordingly. commit 07f23aefd52f5ba4960dbd46e59b180a2136b8e9 Author: Field G. Van Zee Date: Sun Aug 31 11:58:50 2014 -0500 Pass pack schema into packm_struc_cxk*(). Details: - Changed the interface to the packm_struc_cxk*() kernels to include the pack_t schema. This allows the implementation to more easily determine how the micro-panel is stored (row-stored column panel or column-stored row panel). - Updated packm blocked variants to pass in the schema. - Updated packm_ker_t function pointer definition accordingly. commit f032ba9b1186cb02184574d339565f53d733aa42 Author: Field G. Van Zee Date: Sat Aug 30 16:21:20 2014 -0500 Reorganized packm implementation. Details: - Reorganized packm variants and structure-aware kernels so that all routines for a given pack format (4m, 3m, regular) reside in a single file. - Renamed _blk_var4 to _blk_var2 and generalized so that it will work for both 4m and 3m, and adjusted 4m/3m _cntl_init() functions accordingly. - Added a new packm_ker_t function pointer type to bli_kernel_type_defs.h to facilitate function pointer typecasting in the datatype-specific packm_blk_var2() functions. - Deprecated _blk_var3. - Fixed a bug in the triangular micro-panel packing facility that affected trmm and trmm3 with unit diagonals. commit c6793cecb70788bdf2c76ab8102504ea97be9d2a Author: Field G. Van Zee Date: Thu Aug 28 17:14:48 2014 -0500 Reorganized #includes for scalar macro headers. Details: - Reordered the #include statements in bli_scalar_macro_defs.h so that conventional, ri-, and ri3-based macros are grouped together. - Renamed bli_eqri.h (and macros within) to end with 'ris' suffix. commit b4da8907284345be4374f87a88679c4886ab866e Author: Field G. Van Zee Date: Thu Aug 28 14:10:32 2014 -0500 Whitespace, comments updates on packm_blk_var?.c. commit 46e46a1d83da586c3dd9fd7a01eb16067abbaee1 Author: Field G. Van Zee Date: Thu Aug 28 12:05:45 2014 -0500 Minor updates to packm blocked, cxk_3m/4m code. Details: - Added 'const' qualifier to inlined packing code that handles micro-panel packing that is too large for an existing packm ukernel. - Comment updates. commit 908dc688b5979995eaacb3aa937f241551a8df00 Author: Field G. Van Zee Date: Thu Aug 28 11:55:12 2014 -0500 Pass pack schema into blocked packm routines. Details: - Rather than passing the packm blocked routines a boolean value that represents whether the matrix is being packed to row or column storage, we now pass in the pack schema itself. commit a0ff6066e06075ab5f92b19247b39b92ed15f1bf Merge: c4c99c48 d40b32bc Author: Field G. Van Zee Date: Sun Aug 24 15:56:21 2014 -0500 Merge branch 'master' of github.com:flame/blis commit c4c99c4813bf9817592a7899c5d33412fe22313f Author: Field G. Van Zee Date: Sun Aug 24 15:52:22 2014 -0500 Renamed packm scalar from beta to kappa. Details: - The packm implementation (i.e. sources files in frame/1m/packm and frame/1m/packm/ukernels), interchangeably used the names "beta" and "kappa" to refer to the optional scalar to be applied during packing. This commit renames all uses of "beta" to be "kappa", since "beta" sometimes evokes the scalar specifically on the output matrix of a level-2 or level-3 operation. commit d40b32bc24ffbae24123e054307b3138969bb095 Merge: 9331f794 6c25c379 Author: Field G. Van Zee Date: Sun Aug 24 13:46:36 2014 -0500 Merge branch 'master' of github.com:flame/blis commit 6c25c379fadb50834146e1614f7b80c093c2aad0 Author: Field G. Van Zee Date: Sun Aug 24 13:44:10 2014 -0500 Consolidated unpackm ukernels into single file. Details: - Reorganized unpackm ukernels into a single file, bli_unpackm_ref_cxk.c, in a manner similar to what was done for packm ukernels in commit 4cc2b46. commit 9331f79443223fe267676ee54c439e1ed320380c Merge: 7fc48a7d 670b6392 Author: Field G. Van Zee Date: Sun Aug 24 10:54:21 2014 -0500 Merge branch 'master' of github.com:flame/blis commit 670b63926a7f4fc694abc5b1582ef8a4f367f5a8 Author: Field G. Van Zee Date: Sun Aug 24 10:46:27 2014 -0500 Added whitespace to bli_obj_scalar_ routine calls. Details: - Added extra spaces to align arguments of bli_obj_scalar_init_detached_copy_of(). This misalignment was due to the fact that the function was previously named bli_obj_init_scalar_copy_of() and the name change, performed in b444489f, was done via recursive sed commands which left subsequent lines untouched. commit 7fc48a7d920e07fd8e9528ab2565123f8f4e67f9 Author: Field G. Van Zee Date: Sat Aug 23 16:50:58 2014 -0500 Combined 4m/3m bits into an expanded bitfield. Details: - Combined the 4m/3m bits into an expanded bitfield, which will encode the packing "format" of the micro-panels. This will allow for more easily and compactly encoding additional formats. - Other minor comment/whitespace updates to bli_type_defs.h. - Updated bli_obj_macro_defs.h and bli_param_macro_defs.h to use the new format bitfield. - Comment update to bli_kernel_post_macro_defs.h. - Whitespace changes to bli_kernel_3m_macro_defs.h, _4m_macro_defs.h. commit ef0143cc1417e4815e4cafd5a464cc83fe7a1e86 Author: Field G. Van Zee Date: Sat Aug 23 14:02:27 2014 -0500 Renamed _ri, _ri3 packm ukernels to _4m, _3m. Details: - Renamed packm ukernels, _cxk dispatcher, and structure-aware _cxk helper functions to use _4m and _3m instead of _ri and _ri3 suffixes. - Updated names of cpp macros that correspond to packm ukernels. commit b0ccac116158b5ed3316d34798748ba0c6d78672 Author: Field G. Van Zee Date: Thu Aug 21 19:21:52 2014 -0500 Cleaned up front-end layering for 4m/3m. Details: - Added an extra layer to level-3 front-ends (examples: bli_gemm_entry() and bli_gemm4m_entry()) to hide the control trees from the code that decides whether to execute native or 4m-based implementations. The layering was also applied to 3m. - Branch to 4m code based on the return value of bli_4m_is_enabled(), rather than the cpp macros BLIS_ENABLE_?COMPLEX_VIA_4M. This lays the groundwork for users to be able to change at runtime which implementation is called by the main front-ends (e.g. bli_gemm()). - Retired some experimental gemm code that hadn't been touched in months. commit bedec95451cabfa7a8906b51018a5e0572998a5e Author: Field G. Van Zee Date: Thu Aug 21 18:25:48 2014 -0500 Added bli_4m API for querying 4m enabled state. Details: - Added bli_4m.c (and header), which defines a simple API that can be used to query, enable, and disable 4m-based complex support in BLIS. The macros BLIS_ENABLE_?COMPLEX_VIA_4M are now used to initialize the variable that determines the state (enabled or disabled). - Changed bli_info*() API so that all cache and register blocksize- related query routines return the blksz_t objects' values as they exist at runtime, rather than return the values as determined by the configuration system (e.g. bli_kernel.h, or defaults for those values not specified). This sets the foundation for being able to change those blocksizes at runtime. commit b541b667cabfa6d41b50ad1e49209651ee6812cc Merge: 699a8151 dd61307f Author: Tyler Smith Date: Wed Aug 20 14:44:51 2014 -0500 Merge branch 'master' of http://github.com/flame/blis Conflicts: frame/3/trsm/bli_trsm_blk_var2b.c frame/3/trsm/bli_trsm_blk_var2f.c commit 699a8151ca3d5021e834a1784ef45dcc3a3d17cd Author: Tyler Smith Date: Wed Aug 20 14:43:17 2014 -0500 Some improvements to trsm parallelism commit dd61307f55bb6bc762fe0ef0446479d6c0536723 Author: Field G. Van Zee Date: Wed Aug 20 09:52:16 2014 -0500 Minor update to sandybridge MC_S, KC_S. Details: - Changed sandybridge MC and KC for single-precision real to 128 and 384, respectively. - Updated comments in template configuration's gemm micro-kernel file to document the new "contiguous row preference" macro. commit d0eec4bddd740ce360d0f655362c551287cf925b Author: Field G. Van Zee Date: Tue Aug 19 15:49:19 2014 -0500 Added optional row preference to ukernel config. Details: - Added the ability for the kernel developer to indicate the gemm micro- kernel as having a preference for accessing the micro-tile of C via contiguous rows (as opposed to contiguous columns). This property may be encoded in bli_kernel.h as BLIS_?GEMM_UKERNEL_PREFERS_CONTIG_ROWS, which may be defined or left undefined. Leaving it undefined leads to the default assumption of column preference. - Changed conditionals in frame/3/*/*_front.c that induce transposition of the operation so that the transposition is induced only if there is disagreement between the storage of C and the preference of the micro-kernel. Previously, the only conditional that needed to be met was that C was row-stored, which is to say that we assumed the micro- kernel preferred column-contiguous access on C. - Added a "prefers_contig_rows" property to func_t objects, and updated calls to bli_func_obj_create() in _cntl.c files in order to support the above changes. - Removed the row-storage optimization from bli_trsm_front.c because it is actually ineffective. This is because the right-side case of trsm flips the A and B micro-panel operands (since BLIS only requires left-side gemmtrsm/trsm kernels), meaning any transposition done at the high level is then undone at the low level. - Tweaked trmm, trmm3 _front.c files to eliminate a possible redundant invocation of the bli_obj_swap() macro. commit 4cc2b464f29cafbfef9295b073b857fe0752f710 Author: Field G. Van Zee Date: Fri Aug 15 11:49:15 2014 -0500 Reorganized packm ukernels. Details: - Previously, packm micro-kernels were organized by the implied register blocksize (panel dimension) assumed by the kernel, meaning conventional, ri, and ri3 variations of some micro-kernel size were housed in the same file. This commit reorganizes the micro-kernels so that all sizes reside in the same file for each format type (conventional, ri, and ri3). commit fcc10054a11b6fc3976986f57feccf741596cbf6 Author: Field G. Van Zee Date: Wed Aug 13 12:32:06 2014 -0500 Tweaks to gemm4m, gemm3m virtual ukernels. Details: - Fixed a potential, but as-yet unobserved bug in gemm3m that would allow undesirable inf/NaN propogation, since C was being scaled by beta even if it was equal to zero. - In gemm3m micro-kernel, we now avoid copying C to the temporary micro-tile if beta is zero. - Rearranged computation in gemm4m so that the temporary C micro-tile is accessed less, and C is accessed only after the micro-kernel calls. This improves performance marginally in most situations. - Comment updates to both gemm4m and gemm3m micro-kernels. commit cdcbacc2fa871317c8e7ef961ecc6d70ab22dc34 Author: Field G. Van Zee Date: Tue Aug 12 12:45:38 2014 -0500 Removed redundant redef of packm ukr prototypes. Details: - Removed redundant macro code that redefined packm ukernel prototypes when the previous macro was already sufficient. This helps de-clutter the packm ukernel prototyping headers a little bit. commit 82dac98d9032ccb598068a55ddf23d7898491e9e Author: Field G. Van Zee Date: Tue Aug 12 12:36:25 2014 -0500 Relocated packm ukernel #includes. Details: - Consolidated the #include statements for packm ukernel headers from bli_packm_cxk.h, bli_packm_cxk_ri.h, and bli_packm_cxk_ri3.h to bli_packm.h. - Comment/whitespace updates to bli_packm_blk_var3.c, _var4.c. commit 7f77856e25aad5fc6f172ed3e57b6351804e31a4 Author: Field G. Van Zee Date: Tue Aug 12 12:20:15 2014 -0500 Removed unused 4m/3m-related packm macro defs. Details: - Removed unused and unneeded s- and d-flavored macro definitions for packm ukernels related to the complex 4m and 3m methods, as implemented in BLIS. commit bc1d86b2d4d436b1dfba2d0098501aaca9cbb8b5 Author: Field G. Van Zee Date: Thu Aug 7 19:01:20 2014 -0500 Sandy Bridge configuration, micro-kernel update. Details: - Minor updates to bli_config and bli_kernel.h for sandybridge configuration. - Renamed existing AVX intrinsic-based micro-kernel file to bli_gemm_int_d8x4.c. - Added new file, bli_gemm_asm_d8x4.c, which provides assembly-based gemm micro-kernels for single- and double-precision real. commit 98ec95877a95242e159b2bf0c879115a59e4c6e2 Author: Field G. Van Zee Date: Thu Aug 7 18:28:32 2014 -0500 Corrected comment for _obj_is_[row|col]_stored(). Details: - Fixed a mistake in the comments introduced in the previous commit for bli_obj_is_row_stored() and bli_obj_is_col_stored(). commit 43d5e419e1b424d2143817103dbee8ead797e8aa Author: Field G. Van Zee Date: Thu Aug 7 18:20:40 2014 -0500 Reverted _obj_is_[row|col]_stored() macros. Details: - Rolled back recent changes to bli_obj_is_row_stored() and bli_obj_is_col_stored() so that those macros now only inspect the strides (row or column). It turns out that the more sophisticated definitions introduced in a51e32e are not necessary, because these "obj" macros are virtually never used on packed matrices, and when they are, they can use bli_obj_is_[row|col}_packed() macros, which inspect the info bitfield. commit 45692e3ad4b7e1d05ac4302398df4efce04b4284 Author: Field G. Van Zee Date: Thu Aug 7 13:21:15 2014 -0500 Reverted some accidental changes. Details: - Reverted some changes that were unintentionally included in the previous commit (9526ce98). Thanks to Tony Kelman for pointing this out. (Note: a few select changes were not reverted.) commit 9526ce98812be908bc4915f2849b657fb6ce1b49 Author: Field G. Van Zee Date: Wed Aug 6 14:13:46 2014 -0500 Updated copyright headers of emscripten configuration files. commit 30833ed71d56f231ddba21e632bcbbc90b12a97c Author: Field G. Van Zee Date: Wed Aug 6 12:12:03 2014 -0500 Minor edits to configurations' make_defs.mk files. Details: - Redefined CFLAGS, CFLAGS_NOOPT, and CFLAGS_KERNELS so that CFLAGS_NOOPT is defined first and then the other two are defined in terms of CFLAGS_NOOPT. This textually cleans up the definitions and makes them a little easier to read. commit 9d61afeae2ba70fe1df07e7546f6954ea83aed12 Author: Field G. Van Zee Date: Mon Aug 4 16:01:59 2014 -0500 CHANGELOG update (0.1.5) commit bde56d0ecfd0ec20330fac290b91a6dca0cf94e9 (tag: 0.1.5) Author: Field G. Van Zee Date: Mon Aug 4 16:01:58 2014 -0500 Version file update (0.1.5) commit 4c6ceea4be35d089630986eb5b959b9e97214077 Author: Field G. Van Zee Date: Mon Aug 4 15:49:59 2014 -0500 Added CBLAS compatibility layer. Details: - Added a new section in bli_config.h files of all configurations for enabling CBLAS support. (Currently, the default is for the CBLAS layer to be disabled.) - Added a directory, frame/compat/cblas, to house CBLAS source code. A subdirectory 'f77_sub' holds subroutine wrappers corresponding to subroutines found in CBLAS that allow calling some BLAS routines with the return value passed as the last argument rather than as an actual (function) return value. This was probably intended to allow CBLAS to avoid the whole f2c debacle altogether. However, since BLIS does not assume the presence of a Fortran compiler, we had to provide similar routines in C. - A script, integrate-cblas-tarball.sh, is included to streamline the integration of future revisions of the CBLAS source code. - The current tarball, cblas.tgz, that was used with the above script to generate the present set of CBLAS source code is also included. - Updated blis.h to include necessary CBLAS-related headers. commit caab62dac0fb0bd0d674118f409c81680db94d29 Merge: 383631b5 db97ce97 Author: Field G. Van Zee Date: Sun Aug 3 14:36:18 2014 -0500 Merge pull request #19 from kevinoid/fix-install-perms-error Fix permissions error installing to non-owned directory commit db97ce979b88c051922c2f946ce52d523c7a12c6 Author: Kevin Locke Date: Sun Aug 3 12:48:04 2014 -0600 Fix permissions error installing to non-owned directory When installing to a directory which is not owned by the installing user, even when the user has write permission for the directory, the installation can fail with an error similar to the following: Installing libblis-0.1.4-7-sandybridge.a into /usr/local/lib/ install: cannot change permissions of ‘/usr/local/lib’: Operation not permitted Makefile:658: recipe for target '/usr/local/lib/libblis-0.1.4-7-sandybridge.a' failed make: *** [/usr/local/lib/libblis-0.1.4-7-sandybridge.a] Error 1 In the example case, the error occurred because the user attempted to install to /usr/local and /usr/local/lib is owned by root with mode 2755 which the Makefile unsuccessfully attempted to change to 0755. Given that installing to /usr/local is likely to be quite common and the ownership/permissions are the default for Debian and Debian-derived Linux distributions (perhaps others as well), this commit attempts to support that use case by using mkdir rather than install to create the directory (which is the same approach as Automake). Signed-off-by: Kevin Locke commit 383631b514c3d42b724640f57644eea276cc418c Author: Field G. Van Zee Date: Thu Jul 31 14:51:48 2014 -0500 Redefined bit field macros with bitshift operator. Details: - Redefined many of the macros that define bit fields and bit values in the obj_t info field using the bitshift operator (<<). This makes it easier to reorder bit fields, or expand existing bit fields, or add new fields. The bitshifting should be evaluated by the compiler at compile-time. commit 137143345dc93cc9a83da5ba88b25bac7502de86 Author: Field G. Van Zee Date: Thu Jul 31 12:12:45 2014 -0500 Reimplemented unit blocksize fix in prev commit. Details: - Instead of inferring the storage format of the micro-panels from within the packm variants, we now pass in a bool_t value that denotes whether the packed matrix contains row-stored column panels or column-stored row panels. This value can then be tested more easily inside the main packm variant loop. - Renumbered pack_t schema values in bli_type_defs.h so that there are now five bits, each with different meaning: - 4: packed or not packed? - 3: packed for 3m? - 2: packed for 4m? - 1: packed to panels? - 0: stored by rows or columns? - Added new macros that test for status of above bits in schema bit subfield, and renamed some existing macros related to 4m/3m. commit a51e32ec061941cd10119ea80115c82a40b1673f Author: Field G. Van Zee Date: Wed Jul 30 10:41:48 2014 -0500 Fixed unit register blocksize brokenness. Details: - Fixed a breakdown in BLIS's ability to differentiate between row-stored and column-stored micro-panels when MR or NR is unit. When either register blocksize (or both) is equal to one, inspecting the strides of the affected packed micro-panel is no longer sufficient to determine whether the micro-panel is a row-stored column panel or a column-stored row panel (because both strides are unit). At that point, dimension information is necessary when invoking the bli_is_row_stored_f() and bli_is_col_stored_f() macros (and their "obj" counterparts). Thanks to Ilya Polkovnichenko for reporting this bug. - Added panel dimensions (m and n) to obj_t, which are set in packm_init() and then passed into the blocked variants to support the aforementioned update. commit c2732272f0ac680a0ad19fa9db5d587398a1479a Author: Field G. Van Zee Date: Tue Jul 29 16:37:18 2014 -0500 Removed old/unused packm variants. commit b97fa9a5a70fe0123e5eebd999b947461d38445f Author: Field G. Van Zee Date: Sun Jul 27 18:54:09 2014 -0500 Minor usage update to build/bump-version.sh. commit b18ba5f62d98629cdd519ff4c96fc67ec1a62fb9 Author: Field G. Van Zee Date: Sun Jul 27 18:52:05 2014 -0500 Added missing 'bla_' prefix to r_imag(), d_imag(). Details: - Added "bla_" to f2c functions r_imag() and d_imag(). Thanks to Murtaza Ali for pointing the mis-named functions. commit af7a8e6c042cade452130a6729377f1a3ef4e19e Author: Field G. Van Zee Date: Sun Jul 27 18:20:13 2014 -0500 CHANGELOG update (0.1.4) commit a7537071b152ecff671f8716595d37dc09e4fd51 (tag: 0.1.4) Author: Field G. Van Zee Date: Sun Jul 27 18:20:12 2014 -0500 Version file update (0.1.4) commit acff74041bf02c7b9fdfa24b507bca782a4c5fce Merge: cdb9413e 47b243ef Author: Tyler Smith Date: Wed Jul 23 15:07:30 2014 -0500 Merge branch 'master' of https://github.com/flame/blis commit cdb9413e140f8a198666250ec88fa34b5425a9c3 Author: Tyler Smith Date: Wed Jul 23 15:05:15 2014 -0500 Enabled threading for a couple more loops in TRSM JC loop is now enabled for the left-sided case IC loop is now enabled for the right-sided case commit 47b243ef08f4101de3d936f2373343e67eaa4dd5 Author: Field G. Van Zee Date: Wed Jul 23 13:41:13 2014 -0500 Call setid for early return from herk/her2k. Details: - Added setid call (to zero imaginary parts of diagonal elements) to early return branches of herk_front() and her2k_front() for cases where alpha is zero. Thanks to Murtaza Ali for suggesting this fix. - Comment update. commit 3e7b0db5b0e24f5fd66c60bacabc019885ddbec5 Merge: 2f8a357d ed3e33d5 Author: Tyler Smith Date: Wed Jul 23 13:40:44 2014 -0500 Merge branch 'master' of https://github.com/flame/blis commit 2f8a357de5fb55163a969d888cf059f24b78125c Author: Tyler Smith Date: Wed Jul 23 13:40:12 2014 -0500 Some TRSM threading fixes/additions commit ed3e33d548047be3283ff41268fdf716563bc542 Author: Field G. Van Zee Date: Tue Jul 22 14:40:43 2014 -0500 Tweaked behavior of herk, her2k for BLAS compat. Details: - Updated herk_front() and her2k_front() to explicitly set the imaginary components of the diagonal entries of C to zero after the computation is complete. This is needed in case downstream applications read the full diagonal entries (i.e., including imaginary part), which could, in the absence of this modification, accumulate numerical error from subsequent rank-k/rank-2k updates. - Updated BLAS compatibility wrappers for herk and her2k to return early if: n == 0 || ( ( alpha == 0 || k == 0 ) && beta == 1 ) This also results in the imaginary components of diagonal entries NOT being set to zero (see above), which is consistent with BLAS. - Updated mkherm to use setid instead of an inlined loop over the diagonal. commit ea59a5c93cde1467a3715abc53dda4aecf961873 Author: Field G. Van Zee Date: Tue Jul 22 14:36:02 2014 -0500 Added new level-1d operation: setid. Details: - Defined a new level-1d operation, setid, which sets the imaginary elements of an object's diagonal to a single scalar. This can be useful, for example, when trying to make the diagonal of a Hermitian matrix real-valued. commit 8965a965931318619ceaebd7c32edccf3022d0c7 Merge: 1785efb5 5b73e80b Author: Field G. Van Zee Date: Tue Jul 22 14:34:32 2014 -0500 Merge branch 'master' of github.com:flame/blis commit 1785efb5420bc7b9c850a068cb5d99837071e877 Author: Field G. Van Zee Date: Tue Jul 22 14:33:01 2014 -0500 Minor improvements to invertd and setd. Details: - Added missing call to invertd_check() from front-end. - Changed setd front-end call of scald_check() to setd_check(). commit 5b73e80b71c054c1945a06aff044ef629bc1a9a0 Merge: a41e68e0 20690fe3 Author: Field G. Van Zee Date: Fri Jul 18 12:21:20 2014 -0500 Merge pull request #16 from Maratyszcza/emscripten Emscripten port commit a41e68e09e73b999fab0bb430a43dccfc63aab45 Author: Field G. Van Zee Date: Thu Jul 17 13:25:56 2014 -0500 Reimplemented BLIS initialization/finalization. Details: - Rewrote bli_init() and bli_finalize() with OpenMP critical sections for thread-safety. Also added lots of explanatory comments. - Renamed bli_init_safe() and bli_finalize_safe() with the _auto() suffix, and reimplemented for simplicity. Updated all invocations in BLAS compatibility layer to use _auto() suffix. commit 36358948ea75074bda32a9f8c008f835b87d21db Author: Field G. Van Zee Date: Thu Jul 17 10:58:10 2014 -0500 Retired frame/3/gemm/other directory. Details: - Removed frame/3/gemm/other directory, which contained some outdated and/or experimental variants. commit c73261f17edf589e76bdbe297702a1fbbd69275f Author: Field G. Van Zee Date: Mon Jul 14 16:23:51 2014 -0500 More minor cleanups post-copyright update. commit 2a09d24463d358be6243b24f112fad057c2aefe0 Author: Field G. Van Zee Date: Mon Jul 14 16:17:09 2014 -0500 Reverted power7 symlinks destroyed by sed script. Details: - Reverted two symlinks, in kernels/power7/3/test, back to being symlinks after recursive-sed.sh mistakenly replaced them with copies of the actual files to which they referred. Meant to include this in previous commit. commit 7ed415824d3b2e78541b6f64e404ca5347c06d3d Author: Field G. Van Zee Date: Mon Jul 14 16:14:33 2014 -0500 Updated copyright headers (continued). Details: - Inserted "at Austin" into third clause of license declarations. Meant to include this change in previous commit. commit 5c2c6c85616834ff2716ece083118201d9df6dde Author: Field G. Van Zee Date: Mon Jul 14 16:05:03 2014 -0500 Updated copyright headers to contain "at Austin". Details: - Updated copyright headers to include "at Austin" in the name of the University of Texas. - Updated the copyright years of a few headers to 2014 (from 2011 and 2012). commit fcec68cda3f6e90ae055e7304e6674c1c5c8d010 Merge: 94c0df79 4a20ed1a Author: Field G. Van Zee Date: Mon Jul 14 11:35:34 2014 -0500 Merge branch 'master' of github.com:flame/blis commit 94c0df797eda377931f29a41ba6a89c0ed58daca Author: Field G. Van Zee Date: Mon Jul 14 11:24:36 2014 -0500 Changed order of zero dim / error checking. Details: - Updated level-2 and level-3 internal back-ends so that the operation's _check() function is called BEFORE any attempt to return early due to the presence of zero dimensions. This ordering makes more sense because (for example) object dimensions should match even if one of them is zero. Previously, a dimension mismatch could result in an early return with no error message. - Updated bli_check_object_buffer() so that NULL buffers result in an error only if the object is dimensionally non-empty (i.e., only if both of the object's dimensions are non-zero). This allows BLIS operations to be performed on dimensionally empty objects (i.e., where at least one dimension is zero). - Updated the error message associated with bli_check_object_buffer() to mention the newly relaxed constraint mentioned above, vis-a-vis non-zero dimensions. commit 20690fe3018ce17c8df61ce0bffecaa7911dc3a5 Author: Marat Dukhan Date: Sun Jul 13 22:50:56 2014 -0700 Emscripten port commit 4a20ed1a3f5e9e5232df30aa0e568e6c00c56ce1 Merge: 6a515e98 8ccdfaef Author: Field G. Van Zee Date: Sun Jul 13 17:45:01 2014 -0500 Merge pull request #14 from Maratyszcza/master Support "make test" for PNaCl configuration commit 6a515e988f2ae1628258a6dec2c0e9cf2d04790f Author: Field G. Van Zee Date: Sun Jul 13 17:38:33 2014 -0500 Implemented dsdot() and sdsdot() in compat layer. Details: - Replaced "not yet implemented" error messages in dsdot() and sdsdot() with actual implementations. (These routines are so rarely used that this log message will probably lead to some people learning of their existence for the first time.) commit 255668ddd1004552c6cc65035ec6486671ce99bb Author: Field G. Van Zee Date: Sun Jul 13 17:30:44 2014 -0500 Inserted gemv beta-scaling bug into compat layer. Details: - BLAS has a peculiar bug (or feature) whereby calling gemv on a vector y of non-zero length and a vector x of zero length results in no action. Given that the operation is y := beta*y + A*x, many (most?) individuals would expect vector y to still be scaled by beta. BLIS, when called natively, handles these cases intuitively (with beta scaling). Unfortunately, many BLAS test suites actually check for the way this situation is handled. Therefore, we have decided to implement this "bug" in the compatibility layer so as to provide "bug-for-bug" compatibility with BLAS. commit 570a154581bdb353fa13a219c7cb3c81d3dceffd Author: Field G. Van Zee Date: Sat Jul 12 17:51:05 2014 -0500 Comment/formatting updates to build scripts. Details: - Minor updates to comments and formatting in bump-version.sh and update-version-file.sh scripts. commit 26cd81990631ff799791629206e068126ff9e3a1 Author: Field G. Van Zee Date: Thu Jul 10 13:16:07 2014 -0500 Added bli_info_*() query functions. Details: - Added a new API family, bli_info_*(), which can be used to query information about how BLIS was configured. Most of these values are returned as gint_t, with the exception of the version string which is char*. - Changed how the testsuite driver queries information about how BLIS was configured (from using macro constants directly to using the new bli_info API). - Removed bli_version.c and its header file. - Added STRINGIFY_INT() macro to bli_macro_defs.h - Renamed info_t type in bli_type_defs.h to objbits_t (not because of an actual naming conflict, but because the name 'info_t' would now be somewhat misleading in the presence of the new bli_info API, as the two are unrelated). commit 970b43141697d8c31a033f59513bb59d7cc78ab0 Author: Field G. Van Zee Date: Thu Jul 10 09:30:00 2014 -0500 Minor bugfixes to BLAS compatibility layer. Details: - Changed bla_amax.c so that i?amax() routines now correctly return 0 if ( n < 1 || incx <= 0 ). - Changed bla_rotg.c and bla_rotmg.c to use bli_fabs() macro instead of f2c's abs() macro for float and double cases. - Thanks to Murtaza Ali for suggesting the two fixes above. - Updated label of fnormv to normfv in testsuite/input.operations. commit 8ccdfaef4c42ad8957af8607a1a9ee29b9277d4b Author: Marat Dukhan Date: Tue Jul 8 23:14:36 2014 -0700 Replicated logic from testsuite/Makefile in top-level Makefile to support make test commit caa6507ff3724c80d60987f309b8bbc5b50a9841 Author: Field G. Van Zee Date: Tue Jul 8 10:25:27 2014 -0500 Minor cleanup to standalone test drivers. Details: - Very minor code changes to standalone test drivers in 'test' directory. - Added *.so files to '.gitignore'. commit 6c65e9a58fe55990ebb99ec3986443e18af35338 Merge: cb12e456 daca500d Author: Field G. Van Zee Date: Tue Jul 8 10:13:49 2014 -0500 Merge branch 'master' of github.com:flame/blis commit cb12e456f94c196c093e52f02a7cbca0032fc86e Author: Field G. Van Zee Date: Tue Jul 8 10:07:46 2014 -0500 Fixed possible level-3 inf/NaN issue when beta=0. Details: - Redefined xpbys_mxn and xpbys_mxn_u/_l macros to employ a copy (instead of scaling by beta) when beta is zero. This will stamp out any possible infs or NaNs in the output matrix, if it happens to be uninitialized. Thanks to Tony Kelman for isolating this bug. commit daca500db5e2448ba0da8047b75eb0f88d9f40e3 Merge: ab3bc915 47023502 Author: Tyler Smith Date: Thu Jul 3 12:52:52 2014 -0500 Merge branch 'master' of http://github.com/flame/blis commit 4702350278af31f662b458127777dd4d85a3192f Author: Field G. Van Zee Date: Thu Jul 3 11:48:23 2014 -0500 Defined _ukernel_void() wrappers to micro-kernels. Details: - Added wrappers for micro-kernels so that users may invoke the micro-kernels without knowing what the function names actually are. This is useful when an application wishes to call the micro-kernel from a shared library instance of BLIS, where the application may not necessarily have the luxury of grabbing the micro-kernel name(s) from C preprocessor macros at compile-time. Also, since the wrappers use void* pointers, one's environment does not need to be aware of some BLIS types such as scomplex and dcomplex. These wrappers now join the level-1 and level-1f kernel wrappers, which pre-dated this commit. - Removed the wrapper definitions and prototypes from the micro-kernel test suite modules, and replaced calls to them with calls to the new wrappers mentioned above. commit ab3bc9153b914fbaf259e15b66c91d628e7c8661 Author: Tyler Smith Date: Thu Jul 3 11:19:43 2014 -0500 Fixed a bug for TRSM when BLIS_ENABLE_MULTITHREADING is not set but the multithreading environment variables are turned on commit b8134b720b985783ee6a582a3eb5d6c51f00d051 Author: Tyler Smith Date: Wed Jul 2 16:02:39 2014 -0500 Quick and dirty multithreading for TRSM Should work fine for small number of threads (up to 8 or maybe even 16). However, performance is yet untested. This parallelizes the "JR" loop for the left sided cases and the "IR" loop for the right sided cases. Future work is to parallelize the outer loops as well. commit e8ef69692831db07ddbe9485a5e504ac3f03e496 Author: Field G. Van Zee Date: Wed Jul 2 14:59:27 2014 -0500 Added shared library support to build system. Details: - Modified top-level Makefile to support building shared (dynamic) libraries. - Updated most configurations' make_defs.mk files to include necessary compiler/linker flags needed by top-level Makefile. - Note that by default, all configurations presently do NOT build shared libraries. To enable, one must change the value of BLIS_ENABLE_DYNAMIC_BUILD to 'yes'. commit b80df0f2cffb015da02e70a82b8512da9891ab67 Author: Field G. Van Zee Date: Mon Jun 23 13:52:39 2014 -0500 Added bump-version.sh script to 'build' directory. Details: - Added a bash script, bump-version.sh, to aid in incrementing the BLIS version string. commit 9ef1f1e21d083697fc730e48d7d9169c201f3da2 Author: Field G. Van Zee Date: Mon Jun 23 13:48:17 2014 -0500 CHANGELOG update (0.1.3) commit 036cc634918463b1caa0fd89c9a211f2f5639af7 (tag: 0.1.3) Author: Field G. Van Zee Date: Mon Jun 23 13:48:17 2014 -0500 Version file update (0.1.3) commit 09d9a3bf6763932d9f571085b2cfd1b8631eccba Author: Field G. Van Zee Date: Mon Jun 23 13:43:26 2014 -0500 Reverting version file to test new version script. Details: - Changed version file contents to 0.1.2 so that I can test out a new version file bumping script. commit ebb33965981dcb2b0bdee5fc7fdf6c959420f311 Author: Field G. Van Zee Date: Mon Jun 23 11:22:50 2014 -0500 Added 'version' file. commit 2cb9a5501a3cbeb6692cf68e896087ba73b6af69 Author: Field G. Van Zee Date: Mon Jun 23 10:42:29 2014 -0500 Removed 'version' from .gitignore file. commit b40dcefc5ee31f67aa3990e2e9d2ef8ed1386a25 Merge: 7101a8ee b693b0cd Author: Field G. Van Zee Date: Mon Jun 23 10:39:05 2014 -0500 Merge pull request #11 from Maratyszcza/stable [sc]axpy kernels for PNaCl commit b693b0cddcfb41450e3c09a3ab97acb44c1ccdec Author: Marat Dukhan Date: Sun Jun 22 13:44:25 2014 -0700 [SC]AXPY kernels for PNaCl commit 7101a8eec0327d6c3a7eb36eb4b0fd45c1c6d162 Merge: ad48dca2 020a831b Author: Field G. Van Zee Date: Thu Jun 19 21:46:50 2014 -0500 Merge pull request #10 from Maratyszcza/stable Portable Native Client port commit 020a831bc5f61744cb8354886aa679b99b1285f6 Author: Marat Dukhan Date: Thu Jun 19 00:58:26 2014 -0700 Code clean-up in PNaCl port commit 491be4f91ed725522f5cc7184053857c6c376ada Author: Marat Dukhan Date: Thu Jun 19 00:45:44 2014 -0700 Optimized dot product kernels for PNaCl commit 4b8e71aab80182873a2e138eb07902b8d8fd5480 Author: Marat Dukhan Date: Thu Jun 19 00:43:25 2014 -0700 Use AR rcs flags for PNaCl target to avoid warning commit 031deb2a5c718d569bde842590a791b812f4cf1d Author: Marat Dukhan Date: Wed Jun 18 03:11:34 2014 -0700 PNaCl configuration: use pnacl-ar instead or ar (fixes build issue on Mac) commit 68a02976e3c3638f0a9821342e269a1743e3ace3 Author: Marat Dukhan Date: Wed Jun 18 03:10:25 2014 -0700 Compile pnacl configuration in GNU11 mode to avoid warning about non-standard features commit 6f8462eb0ec278b89731e73ef583386a3371d095 Author: Marat Dukhan Date: Wed Jun 18 03:08:46 2014 -0700 Fix inconsistent VERBOSE macro in Makefile commit b2ffb4de8b6872cb23537ad282e557d11dcd9c8b Author: Marat Dukhan Date: Sun Jun 15 18:41:30 2014 -0400 Reformatted PNaCl GEMM kernels commit 6de2d472d98baa215264a776f3d5291780a6a085 Author: Marat Dukhan Date: Sun Jun 15 08:44:31 2014 -0400 CGEMM and ZGEMM kernels for PNaCl commit f064711a5e6fb3852c17c7520909b09dc27665f2 Author: Marat Dukhan Date: Sun Jun 15 06:27:37 2014 -0400 SGEMM and DGEMM kernels for PNaCl commit ad48dca22913a363899f0bef45553898718eebb1 Merge: ee2b6792 7118f87e Author: Field G. Van Zee Date: Sat Jun 14 15:10:13 2014 -0500 Merge pull request #9 from tkelman/memalign_windows Use _aligned_malloc instead of posix_memalign on Windows commit 7118f87e18b4941423472afc00215c1d1f2a1fcd Author: Tony Kelman Date: Sat Jun 14 06:53:20 2014 -0700 Use _aligned_malloc instead of posix_memalign on Windows commit ee2b679281ca45fb40b2198e293bc3bc3d446632 Author: Tyler Smith Date: Fri Jun 6 12:41:55 2014 -0500 Only include omp.h if BLIS_ENABLE_OPENMP is set commit 19c05dfaac43c627f86e897c8c00f1f9440754aa Author: Field G. Van Zee Date: Thu Jun 5 10:54:16 2014 -0500 CHANGELOG update (for 0.1.2). commit 00f232f8ed1f7c41619b12ebf779ebe2c3b2d3cd (tag: 0.1.2) Author: Tyler Smith Date: Mon Jun 2 13:40:57 2014 -0500 Added single-precision micro-kernel for Knights Corner aka MIC aka Xeon Phi commit 3fc60e491426f6248c0feae88d971e4d1f88fb95 Author: Field G. Van Zee Date: Wed May 21 11:34:42 2014 -0500 Fixed ldim alignment bug in core2 gemm ukernel. Details: - Fixed a bug in the dunnington/core2 gemm micro-kernels that resulted in a segmentation fault if a column-stored matrix's starting address was aligned, but its leading dimension was such that its second column was unaligned. Basically, the micro-kernel was assuming that aligned load instructions were safe when they actually were not. An extra condition that checks the alignment of cs_c (ie: the leading dimension in the column storage case) has now been added. Thanks to Michael Lehn for reporting this bug. commit 77a2d8dac8b242d7a202c9aabda3927ab68cf987 Merge: 8c5d6071 21fb0893 Author: Field G. Van Zee Date: Tue May 20 09:53:19 2014 -0500 Merge pull request #8 from tlrmchlsmth/master Added multithreading to most level-3 operations. commit 21fb089387ee7c87f6dc53b0f60f68b48d3ff3e8 Author: Tyler Smith Date: Mon May 19 20:38:55 2014 -0700 Reverting changes dunnington and reference configs Now they are unchanged from the main branch of BLIS commit 8a0ef0e0db5880730425926f8ba56b457a2ba764 Author: Tyler Smith Date: Fri May 16 13:44:14 2014 -0500 Fixed rounding error in bli_get_range_weighted commit 0b4b1680334528b1b60bc696537600f763198e92 Author: Tyler Smith Date: Fri May 16 12:23:37 2014 -0500 Fixed bug with disabling JC loop threading for right sided trmm commit 5c048a90d8dfa1dbde4e45fbc10ffcbdfe59d960 Author: Tyler Smith Date: Wed May 14 16:20:06 2014 -0500 Disabled parallelism for right-sided TRMM JC loop The loop has dependent iterations. commit 13a4c717ed0e273359dbaf5554cc4fa70b087d71 Author: Tyler Smith Date: Wed May 14 14:59:04 2014 -0500 Fixed bug with bli_get_range_weighted commit 45957cc7745e9bb1698408d72f53ef192e960820 Author: Tyler Smith Date: Tue May 13 17:14:46 2014 -0500 Allowed threading to be turned off No longer requires OpenMP to compile Define the following in bli_config.h in order to enable multithreading: BLIS_ENABLE_MULTITHREADING BLIS_ENABLE_OPENMP Also fixes a bug with bli_get_range_weighted commit bd1dc98ce599d74513a553fe3b37a2ebca1c3812 Author: Tyler Smith Date: Mon May 12 17:26:19 2014 -0500 Disabled multithreading of the kc loop commit 456df0372170bd7ca2c7e2d85365a69f1f04de88 Author: Tyler Smith Date: Wed Apr 30 12:28:00 2014 -0500 Replaced register blocksize hack with querying the register blocksize for determining parallelism granularity commit f4fdfe8fc573553eb36795b79cdf681270dab71b Merge: 31bb065b 8c5d6071 Author: Tyler Smith Date: Wed Apr 30 11:46:35 2014 -0500 Merge http://github.com/flame/blis commit 8c5d6071e24ba10a53669390a47287e86ff354ce Author: Field G. Van Zee Date: Tue Apr 29 12:26:12 2014 -0500 Added _check() routines for fprint[mv], rand[mv]. Details: - Added _check() routines for fprintm, fprintv, randm, and randv. - Added invocations to the above routines from their respective front-ends. commit 262cdabcc885bcf6636f4d8bb7d320f95e81d820 Author: Field G. Van Zee Date: Mon Apr 28 16:48:25 2014 -0500 Changed treatment of NULL object buffers. Details: - Relaxed the constraint in bli_obj_attach_buffer_check(), which required the buffer address being attached to be non-NULL. This is acceptable because the user was already able to create and use objects with NULL buffers (via bli_obj_create_without_buffer(), which initializes the buffer to NULL). - Inserted calls to newly defined function, bli_check_object_buffer(), into nearly all operations' _check() or _int_check() functions. This allows BLIS to abort peacefully if a computational routine is called with an object containing a NULL buffer. By contrast, under such conditions, BLAS would typically fail with a segmentation fault. - Within operation front-ends, moved the calls to _check()/_int_check() so that zero dimensions are checked first (and if found, execution returns with trivial or no computation). This resolves issue #7. Thanks to Jack Poulson for reporting this bug. commit 31bb065ba40ae0c5a614e743b8025abca012b99e Merge: 20e24430 7c619599 Author: Tyler Smith Date: Wed Apr 23 12:30:19 2014 -0500 Merge http://github.com/flame/blis commit 7c61959955c8ba78160d0ed4d1979022029d963b Author: Field G. Van Zee Date: Thu Apr 10 17:18:36 2014 -0500 Can now query register blocksizes from blk algs. Details: - Added a new field to blksz_t objects that allows one to attach a sub-object. Doing this allows us to associate a register blocksize with any given cache blocksize. That way, the register blocksize can be queried wherever the cache blocksize would normally be accessible (e.g. a blocked algorithm). - Modified bli_gemm_cntl.c (and 4m/3m variants) so that the register blocksizes are attached to the cache blocksizes after they are created. commit 58671597d3d450817b2eda576c05ed6dadd8af6d Author: Field G. Van Zee Date: Thu Apr 10 15:35:30 2014 -0500 Minor cleanups to level-2 _cntl.c files. Details: - Changed level-2 _cntl.c files so that the blocksizes for gemv are imported and used, rather than blocksizes being declared locally. - Whitespace changes to gemv_cntl.c and gemm_cntl.c files (as well as 4m/3m variants). - Removed test/old/test_blis2.c. commit 20e24430a772bc0fbaf24dec2f8c544096fd3f4e Author: Tyler Michael Smith Date: Tue Apr 8 17:50:44 2014 +0000 Some fixes for the bgq kernels commit bde697f75ec1e7f2decebee0c9bd620b4c134cd5 Author: Tyler Smith Date: Fri Apr 4 16:43:44 2014 -0500 Add -openmp to ldflags as well commit c332be8cd471eeace7b4fa4ae7443088b6a68ec3 Author: Tyler Smith Date: Fri Apr 4 16:37:50 2014 -0500 Added -openmp flag to Xeon Phi build for convenience commit e7ca9e4b4a24d585c9aec8293fc7bb79e4171ad0 Author: Tyler Smith Date: Fri Apr 4 16:31:15 2014 -0500 Used BLIS_DEFAULT_*_MR for rounding partitioning instead of BLIS_DEFAULT_*_MC commit 7b9b228c6fa4cfb70b1ebb855b009a036e85fac3 Author: Tyler Smith Date: Fri Apr 4 16:29:10 2014 -0500 Fix for tree barrier freeing bug commit 5ec93bd9a76096312d51c326ccde1e9bd0a436ab Author: Tyler Smith Date: Fri Apr 4 15:09:10 2014 -0500 Bunch of minor fixes Removed barrier after unpackm in all level3 blocked variants Now there is an implicit barrier inside unpackm that only occurs if C is packed (which is usually not the case) Moved the enabling of the tree barriers into bli_config.h Fed the default MR and NR for double precision into bli_get_range instead of the number 8 commit 575fb9b0b08f3bdb56ccde056da619d1585617c1 Author: Tyler Smith Date: Fri Apr 4 12:13:29 2014 -0500 Changed default blocking factor to default double precision MR and NR commit ab9c7880335c281432d5809fe0dec46753d22569 Author: Tyler Smith Date: Fri Apr 4 11:38:11 2014 -0500 Added faster tree barriers necessary for performance for Xeon Phi Fixed up some stuff in the thread info free functions Disabled threading for TRSM so that it actually works when threading environment variables are set commit ec58a7923cccac08632670caadf3cf6ff5dce766 Author: Tyler Smith Date: Fri Apr 4 10:22:48 2014 -0500 Freeing thread info paths. Also made herk IC and JC loops do weighted partitioning commit 2b6848b2397d6d84ca4e5f792fc51ad05e351a36 Merge: 4e3eb39a 21a0efb3 Author: Tyler Smith Date: Fri Apr 4 09:54:54 2014 -0500 Merge http://github.com/flame/blis Conflicts: kernels/bgq/1/bli_axpyv_opt_var1.c kernels/bgq/1/bli_dotv_opt_var1.c commit 4e3eb39aca4df0b9fdc003d468f368a2f2ba597d Author: Tyler Michael Smith Date: Fri Apr 4 14:50:03 2014 +0000 Some fixes to the bgq config MR and NR for double complex were wrong Default fusing factor for double precision was wrong as well commit 21a0efb33d7435139e9c43c1a4787a6bff533e26 Author: Field G. Van Zee Date: Thu Apr 3 16:38:44 2014 -0500 Fixed follow-up to issue #6. commit c318157a9bee8ea6e59be16f99f65d9271fe0d27 Author: Field G. Van Zee Date: Thu Apr 3 16:24:34 2014 -0500 Fixed issue #6 (incorrect 'restrict' usage). Details: - Fixed improper usage of restrict keyword in axpyv and dotv bgq kernels. (However, there may be other instances of similar misuse elsewhere in BLIS.) Thanks to Jeff Hammond for reporting this issue. commit b5150a1bf3bd89598e2b3aeac110eb5b44ac6c12 Author: Field G. Van Zee Date: Thu Apr 3 12:25:45 2014 -0500 Added #include "arm_neon.h" to ARM gemm ukernel. Details: - Inserted #include "arm_neon.h" into gemm ukernel source file for arm/neon. Thanks to Jean-Michel Hautbois for suggesting this fix. commit 2041c264517b6c590fd4f7e8253e6911b622d1c3 Author: Tyler Smith Date: Thu Apr 3 10:30:03 2014 -0500 Added barriers needed prior to doing scalar reset for rank-k updates. commit 47a90e69dfde3f4f8fdf90654248a6b499fbadbc Author: Field G. Van Zee Date: Tue Apr 1 14:34:31 2014 -0500 Attempted to fix uninitialized variable warnings. Details: - Added initialization statements to various macros used in level 1m and 1m-like operations. I wasn't able to reproduce the reported behavior, so hopefully this takes care of it. Thanks to Jeff Hammond for the report. commit d27b4f690c14b1f836f8c7a3c0e91e09d852f02e Author: Field G. Van Zee Date: Tue Apr 1 12:57:24 2014 -0500 Use generic paths for toolchain in POWER7. Details: - Fixed issue #4. Thanks to Jeff Hammond for contributing changes. commit 1584ae1c83c3a8c1af76acb46404747507650f19 Author: Tyler Smith Date: Fri Mar 28 15:15:48 2014 -0500 Fixed race condition involving scalar reset commit 459dde4acc09e49380da58fb7b246db488884ad9 Author: Tyler Smith Date: Thu Mar 27 17:06:45 2014 -0500 Made barrier after packing implicit. This also fixed a bug where barriers in the blocked variants were inserted after the inner packing routines, but not the outer packing routines. This allowed, for instance, the block of B to not be finished being packed before computation to occur. commit 9f78ec6e7e95fcad89a167b27cad7e2d74b6d122 Author: Tyler Smith Date: Thu Mar 27 14:18:46 2014 -0500 Some fixes for the internal functions, was innappropriately only having thread chief do some things. commit a6fd48345424e097f71652be013aa897e098b41e Author: Tyler Michael Smith Date: Wed Mar 26 17:19:46 2014 +0000 Added test drivers for level 3 BLAS that run tests in parallel using MPI commit 73b3db594864be0f9be9a0eb29bf961fa9c95f29 Author: Tyler Michael Smith Date: Wed Mar 26 15:39:05 2014 +0000 Some fixes for the bgq configuration commit f0824a04fc75e231c3a3d7757fa4e7294173282f Author: Tyler Smith Date: Mon Mar 24 15:21:42 2014 -0500 Initial commit to enable threading in TRSM, Also enabled weighted partitioning for herk, trmm Fixed bug where multiple threads would try to modify the same state in the internal level 3 functions Correctly computed a_next and b_next for gemm, herk macrokernels a_next and b_next point to the current micropanels in trmm commit 23d9eab354fbc88165889832955e126772bf8488 Merge: 5d5dc2ee fd3e32a5 Author: Tyler Smith Date: Thu Mar 20 16:54:35 2014 -0500 Merge https://github.com/flame/blis commit 5d5dc2eedef2f7c90d61371a1b457be5c06cf583 Author: Tyler Smith Date: Thu Mar 20 16:43:36 2014 -0500 Parallelized trmm and trmm3 Also fixed bugs in packm commit fd3e32a5f419fa412f46afe4dd1c3a26e15f3eb4 Author: Field G. Van Zee Date: Thu Mar 20 13:59:48 2014 -0500 Refined INSERT_GENTFUNC macro usage. Details: - Defined new INSERT_GENTFUNC macros so that the macro always takes exactly the number of arguments needed for the particular operation or variant being defined. Many operations were using INSERT_GENTFUNC macros that expected one auxiliary argument even though none were needed. Those instances have now been updated. Most of these instances were in the level-0 and -1v operations, as well as some operations defined in frame/util. commit 9b0e715f29338a1a1d6445907d2445c35f011121 Author: Field G. Van Zee Date: Wed Mar 19 15:47:54 2014 -0500 Minor simplifications to trmm, trsm macro-kernels. Details: - Simplified some code that would have allowed the diagonal of a trmm or trsm triangular matrix to intersect the short end of a micro-panel. This is disallowed via higher-level constraints on cache blocksizes, so this code was never needed and only served to obfuscate. - Updated some comments in trmm, trsm macro-kernels. commit a3902750b9ab4923433f7e353f3669c3c419f8e4 Author: Field G. Van Zee Date: Wed Mar 19 12:35:17 2014 -0500 Reorganized norm operations. Details: - Completely reoganized norm operations: - Renames: - fnormsc, fnormv, fnormm -> normfsc, normfv, normfm (2-norm) - absumv -> norm1v (vector 1-norm) - New operations: - norm1m (matrix 1-norm) - normiv, normim (infinity-norm) - amaxv (BLAS-like absolute maximum value index) - asumv (BLAS-like absolute sum) - Deprecated absumm, as it did not correspond to any actual norm. (However, an inlined version now exists in the testsuite module for randm.) commit c0140cb752f27e99742f85d23be2181c00a1335e Author: Tyler Smith Date: Wed Mar 19 11:21:16 2014 -0500 Fixed packm variants 3 and 4 where every thread was trying to manipulate the same state Now just performed by the master thread. commit fb42983bd9943711baa7d1c6496de1215bb816ef Author: Tyler Smith Date: Tue Mar 18 16:37:28 2014 -0500 Fixed a barrier bug and a thread decorator bug commit aa2405f8b23d0f8d2ec04790882f2176ef2e8fd8 Author: Tyler Smith Date: Tue Mar 18 15:23:09 2014 -0500 Fixing function pointer issues with thread decorator commit ec8b88f93533942d3711191873310e7ff281bda6 Author: Tyler Smith Date: Tue Mar 18 14:35:37 2014 -0500 Enabled threading for packm blocked variants 3 and 4 commit 0ac534cdf657bbf04601abfe719ba2887aab5da7 Author: Tyler Smith Date: Tue Mar 18 13:26:27 2014 -0500 Added decorator for calling parallelized intermal functions Will allow for easy support for different threading models commit 5296f58975f7d351f88909cc80b6d0cffd73def7 Author: Tyler Smith Date: Mon Mar 17 17:15:35 2014 -0500 Fixing some bugs with herk parallelization commit c51d0110831eb89361b4720bf7ed75edbd26ebce Author: Tyler Smith Date: Mon Mar 17 15:00:47 2014 -0500 Initial multithreading support for HERK commit c720b141568d1f289146bf34ded08001f2c0dfbb Author: Tyler Smith Date: Mon Mar 17 11:39:32 2014 -0500 Switched to using environment variables to control threading. The environment variables all follow the format BLIS_X_NT, where X is the index of the loop as described in our paper Anatomy of High Performance Many-Threaded Matrix Multiplication. These indices are IR, JR, IC, KC, and JC. Also enabled parallelism for hemm and symm, but these are currently untested. commit 92233cf64274b27b2217c5cfffe75443ff6137a4 Author: Tyler Smith Date: Tue Mar 11 14:16:08 2014 -0500 Some fixes to gemm thread info tree creation, Changed microkernel tests to use the new BLIS_PACKM_SINGLE_THREADED instead of BLIS_SINGLE_THREADED commit 020f80c30289d8bcaa688bf600b01fae9b23b54f Author: Tyler Smith Date: Tue Mar 11 12:08:17 2014 -0500 Added files specific to threading for gemm and packm operations commit 8d8f4352a41926bc923e47be836365b6b726aff2 Author: Tyler Smith Date: Mon Mar 10 15:47:28 2014 -0500 Added single threaded thread info data structures specifically for gemm and packm commit 0e8677761175189583ca7d855e24b2bbdd2dada8 Merge: 2e727a02 b3bff631 Author: Tyler Smith Date: Mon Mar 10 15:16:21 2014 -0500 Merge branch 'master' of https://github.com/tlrmchlsmth/blis commit 2e727a025a8f796d2b6bd14f489d0ee72e7d1fc7 Author: Tyler Smith Date: Mon Mar 10 15:14:33 2014 -0500 Modifying the thread info data structures This change makes each operation have its own thread info type, allowing more fine control of threading in operations that have different types of suboperations commit a770590cf21a459f04bf941c58ee2afd272cc441 Author: Field G. Van Zee Date: Mon Mar 3 14:31:44 2014 -0600 Minor fixes to sumsqv, abmaxv. Details: - Minor update to bli_sumsqv_unb_var1() to bring it up-to-date with LAPACK 3.5.0's zlassq.f, which, starting with 3.4.2, returns NaN when the vector (or matrix) contains a NaN. - Minor change to bli_abmaxv_unb_var1() to more closely mimic the behavior of netlib BLAS's izamax(). There, a "less than or equal to" operator is used in the search instead of "less than", which would change the element index returned if there were multiple maximum values. - Added macro function definitions for bli_isinf() and bli_isnan(), which are currently implemented in terms of isinf() and isnan() from math.h. commit b3bff631eadf98b15cb422fb4a8e2f855c23e8a7 Merge: 2c158fb8 e8757b03 Author: Tyler Smith Date: Thu Feb 27 16:53:24 2014 -0600 Merge https://github.com/flame/blis commit 2c158fb885c27f7b599dc1e85b57edd684f19223 Merge: e4738c48 c2b2ab62 Author: Tyler Smith Date: Thu Feb 27 16:46:23 2014 -0600 Merge https://github.com/flame/blis Conflicts: frame/1m/packm/bli_packm_blk_var1.c commit e8757b03a74f9891632242e9a90efb32150826f5 Author: Field G. Van Zee Date: Thu Feb 27 16:40:07 2014 -0600 Use "%ld" as int format specifier in fprintm. Details: - Changed "%d" to "%ld" when printing integers via bli_fprintm(). - Meant to include this in previous commit. commit c663ce3b5170fee7dfb5b528b650d70c8e932cac Author: Field G. Van Zee Date: Thu Feb 27 16:32:57 2014 -0600 Fixed various bugs when C99 complex is enabled. Details: - Fixed various bugs in packm_*_cxk(), the 4m/3m micro-kernels, and elsewhere in the framework that were not yet set up to work properly when BLIS_ENABLE_C99_COMPLEX is defined in bli_config.h - Extensive changes to f2c-derived files in frame/compat/f2c to allow C99 complex storage. Most of these changes center around accessing real and imaginary components via bli_?real()/bli_?imag() accessor macros, and setting of values via bli_?sets() assignment macros. (Thanks to Vladimir Sukarev for pointing out that _ENABLE_C99_COMPLEX was broken.) commit e4738c48e00b89391d9baa1fd0aa62d1ea2f95e6 Author: Tyler Smith Date: Thu Feb 27 16:29:46 2014 -0600 Added support for parallelism in gemm micro-kernel commit bfe214b633765ed40b57b330fbb84c332663aa40 Author: Tyler Smith Date: Thu Feb 27 15:53:10 2014 -0600 Fixed bug with parallel packing, and bug with allocating an array of thread infos In packm variant 1, the variable p_begin was incremented each iteration, causing a dependency. This dependeny was removed, allowing each iteration to be executed in parallel. Somewhere in bli_threading.c, I was allocating an array of pointers instead of an array of structs. commit 6193d9ceea552e67170dba45abde04c64271c705 Author: Tyler Smith Date: Thu Feb 27 14:09:19 2014 -0600 Fixed bug in thread trees commit ac5a2de1d17ffd460b00fee9757898525a09abae Merge: 01b125e8 bd3c7ecf Author: Tyler Smith Date: Thu Feb 27 11:59:33 2014 -0600 Merge branch 'master' of https://github.com/tlrmchlsmth/blis commit 01b125e815f19410e8e0611d088b84570e499e93 Author: Tyler Smith Date: Thu Feb 27 11:55:45 2014 -0600 First pass at adding parallelism to BLIS. Added a multithreading infrastructure that should be independent of multithreading implementation in the future. Currently, gemm blocked variants 1f and 2f, and packm variant blocked variant 1 is parallelized. commit c2b2ab62707e4174892aff3ce65f36f54878fae5 Author: Field G. Van Zee Date: Wed Feb 26 12:46:45 2014 -0600 Deprecated panel stride alignment in bli_config.h. Details: - Removed BLIS_CONTIG_STRIDE_ALIGN_SIZE from bli_config.h of all configurations. It was already going unused in packm_init() since the recent 4m/3m commit. This setting was rarely, if ever, useful, and its existence only posed a potential risk for 4m/3m-based implementations. - Removed BLIS_CONTIG_STRIDE_ALIGN_SIZE usage from mem_pool_macro_defs.h. - Updated comments regarding CONTIG_STRIDE_ALIGN_SIZE in template micro-kernels. commit f18aee83a5ac1b14808686fc3c5a3c846a1d99b9 Author: Field G. Van Zee Date: Tue Feb 25 17:58:42 2014 -0600 CHANGELOG update (for 0.1.1). commit fde5f1fdece19881f50b142e8611b772a647e6d2 (tag: 0.1.1) Author: Field G. Van Zee Date: Tue Feb 25 13:34:56 2014 -0600 Added extensive support for configuration defaults. Details: - Standard names for reference kernels (levels-1v, -1f and 3) are now macro constants. Examples: BLIS_SAXPYV_KERNEL_REF BLIS_DDOTXF_KERNEL_REF BLIS_ZGEMM_UKERNEL_REF - Developers no longer have to name all datatype instances of a kernel with a common base name; [sdcz] datatype flavors of each kernel or micro-kernel (level-1v, -1f, or 3) may now be named independently. This means you can now, if you wish, encode the datatype-specific register blocksizes in the name of the micro-kernel functions. - Any datatype instances of any kernel (1v, 1f, or 3) that is left undefined in bli_kernel.h will default to the corresponding reference implementation. For example, if BLIS_DGEMM_UKERNEL is left undefined, it will be defined to be BLIS_DGEMM_UKERNEL_REF. - Developers no longer need to name level-1v/-1f kernels with multiple datatype chars to match the number of types the kernel WOULD take in a mixed type environment, as in bli_dddaxpyv_opt(). Now, one char is sufficient, as in bli_daxpyv_opt(). - There is no longer a need to define an obj_t wrapper to go along with your level-1v/-1f kernels. The framework now prvides a _kernel() function which serves as the obj_t wrapper for whatever kernels are specified (or defaulted to) via bli_kernel.h - Developers no longer need to prototype their kernels, and thus no longer need to include any prototyping headers from within bli_kernel.h. The framework now generates kernel prototypes, with the proper type signature, based on the kernel names defined (or defaulted to) via bli_kernel.h. - If the complex datatype x (of [cz]) implementation of the gemm micro- kernel is left undefined by bli_kernel.h, but its same-precision real domain equivalent IS defined, BLIS will use a 4m-based implementation for the datatype x implementations of all level-3 operations, using only the real gemm micro-kernel. commit 15b51e990f1d21333b5f7af97c211756247336e5 Merge: 6363a9f6 fc04b5eb Author: Field G. Van Zee Date: Fri Feb 21 09:04:32 2014 -0600 Merge branch 'master' of github.com:fgvanzee/blis commit fc04b5eb69868c341ce03f5ef1f02de4b8c121b0 Merge: b29e1c2b d1813c9d Author: Field G. Van Zee Date: Fri Feb 21 09:04:13 2014 -0600 Merge pull request #3 from figual/master New ARM armv7a kernels and Assembly file consideration in Makefile commit d1813c9dee34410833db5061e6588ec1a6c9ecd4 Author: Francisco Igual Date: Fri Feb 21 15:14:31 2014 +0100 Added new armv7a micro-kernels and configuration files from Werner Saar. commit 0cd098c03a000ed9426a7e9135190696da8cadbc Author: Francisco Igual Date: Fri Feb 21 15:12:30 2014 +0100 o Modified Makefile to consider .S assembly microkernels. commit 6363a9f658257fe3d814a3dce5308f807adb54a2 Author: Field G. Van Zee Date: Wed Feb 19 17:00:52 2014 -0600 Added level-3 support for complex via 4m-/3m. Details: - Added the ability to induce complex domain level-3 operations via new virtual complex micro-kernels which are implemented via only real domain micro-kernels. Two new implementations are provided: 4m and 3m. 4m implements complex matrix multiplication in terms of four real matrix multiplications, where as 3m uses only three and thus is capable of even higher (than peak) performance. However, the 3m method has somewhat weaker numerical properties, making it less desirable in general. - Further refined packing routines, which were recently revamped, and added packing functionality for 4m and 3m. - Some modifications to trmm and trsm macro-kernels to facilitate indexing into micro-panels which were packed for 4m/3m virtual kernels. - Added 4m and 3m interfaces for each level-3 operation. - Various other minor changes to facilitate 4m/3m methods. commit b29e1c2b278c177e104c84ba462820ee8296df6c Merge: ee60377e bd3c7ecf Author: Field G. Van Zee Date: Fri Feb 14 14:11:54 2014 -0600 Merge pull request #2 from tlrmchlsmth/master Fixes and improvements to xeon phi implementation. commit bd3c7ecfb54a9b9851c7d364f41c21e4cff52f6f Author: Tyler Smith Date: Fri Feb 14 14:05:57 2014 -0600 Removing changes to input.general and input.operations commit ce066863683cb4e910270cf8ab8e138b01ff3358 Author: Tyler Smith Date: Fri Feb 14 13:40:24 2014 -0600 Fixed more Xeon Phi bugs, especially with scattered update commit 31134b5c7076423aee1b4f494e925f27171d97e6 Author: Tyler Smith Date: Fri Feb 14 11:19:44 2014 -0600 Some fixes, changes, and improvements to the microkernel to the Xeon Phi commit ee60377e467862b9d8a7205c45dce5cf66c78c46 Author: Field G. Van Zee Date: Thu Feb 13 14:03:31 2014 -0600 Shifted some fields in info_t. Details: - Shifted the pack order, pack buffer type, and structure type fields to make room for an extra bit in the pack type/status field. commit bd3ab1ad4cf42f8bc30ab262acf8eccb49bb1a08 Author: Field G. Van Zee Date: Thu Feb 13 09:29:55 2014 -0600 Minor fixes to trsm consistent with prev on trmm. Details: - Removed use of bli_min() and bli_max() that were only being used to try to support situations where the diagonal would intersect the short end of some micro-panels, which is situation that is disallowed at a higher level by various constraints on the register and cache blocksize. This only affected trsm_ll and trsm_lu. - Use panel stride as passed into the macro-kernel rather than compute it via k and PACKMR/PACKNR. This affects all macro-kernels of trsm. commit 6260b0b5f8bd248f3f66e5a1c6854bdbd9d02ad0 Author: Field G. Van Zee Date: Thu Feb 13 09:19:56 2014 -0600 Fixed obscure bug in trmm_ll, trmm_lu. Details: - Fixed an obscure bug in left-hand trmm that would only manifest when non-zero register blocksize extensions (PACKMR > MR or PACKNR > NR) are used. - Removed use of bli_min() and bli_max() that were only being used to try to support situations where the diagonal would intersect the short end of some micro-panels, which is situation that is disallowed at a higher level by various constraints on the register and cache blocksize. This only affected trmm_ll and trmm_lu. - Use panel stride as passed into the macro-kernel rather than compute it via k and PACKMR/PACKNR. This affects all macro-kernels of trmm. commit 16915c1c1e55c660bf82141cdadf7c0860d5b464 Author: Field G. Van Zee Date: Tue Feb 11 10:54:19 2014 -0600 Fixed an obscure bug in packm_cxk(). Details: - Fixed a bug in packm_cxk() whereby the packm ukernel was being chosen from ldp, which is always equal to PACKMR or PACKNR. The problem with this is that the pack ukernels were implicitly assuming that the panel dimension of the panel being packed was equal to ldp, which is not the case when the register blocksizes extensions are non-zero (ie: when PACKMR > MR or PACKNR > NR, whichever is applicable). This problem has been fixed by passing ldp into the pack ukernels, which now walk through the packed micro-panel region by incrementing by this value, rather than incrementing by the inherent panel dimension value assumed by each packm ukernel (e.g. 4 in the case of packm_ref_4xk). - Also fixed a very minor edge case inefficiency whereby pack ukernels smaller than the default were not being used in edge cases, and instead those situations were being handled by scal2m. This is related to the issue above, because the pack ukernel itself was being chosen based on ldp instead of the panel dimension. commit b7da57b282c5a5e2208946e60309d2352f55351d Author: Field G. Van Zee Date: Tue Feb 11 10:28:23 2014 -0600 Updated calls to packm_blk_var2() in testsuite. Details: - In ukernel testsuite modules, replaced calls to packm_blk_var2() with _var1(). Meant to include this in previous commit. commit c255a293e25b2223c88e8800267cd06ad2a90041 Author: Field G. Van Zee Date: Mon Feb 10 14:31:24 2014 -0600 Consolidated packm_blk_var2 and var3. Details: - Consolidated the functionality previously supported by packm_blk_var2() and packm_blk_var3() into a new variant, packm_blk_var1(). - Updates to packm_gen_cxk(), packm_herm_cxk.c(), and packm_tri_cxk() to accommodate above changes. - Removed packm_blk_var3() and retired packm_blk_var2() to frame/1m/packm/old. - Updated all level-3 _cntl_init() functions so that the new, more versatile packm_blk_var1 is used for all level-3 matrix packing. commit 32d8f264ae7b28155f5d7b21dcc5ecb78da2e0ab Author: Field G. Van Zee Date: Sun Feb 9 10:07:37 2014 -0600 Refactored packm variants. Details: - Revised packm_blk_var2() and _var3() by encapsulating the general, hermitian/symmetric, and triangular panel-packing subproblems into separate functions: packm_gen_cxk(), packm_herm_cxk(), and packm_tri_cxk(), respectively. Also, homogenized the packm code as well as the new specialized packm_*_cxk() code to further improve readability. commit 6c8067028707947fcdf4f856a272e15bb9ed91e3 Author: Field G. Van Zee Date: Fri Feb 7 11:27:15 2014 -0600 Renamed enumerated type in testsuite and modules. Details: - Renamed the test suite's "mt_impl_t" enumerated type to "iface_t", and renamed all corresponding "impl" variables to "iface". commit 6c12598b1bc567f0b08f58aebdc753a1c1390378 Author: Field G. Van Zee Date: Thu Feb 6 18:26:35 2014 -0600 Employ simpler INSERT_ macro for ref ukernels. Details: - Defined a new macro, INSERT_GENTFUNC_BASIC0, which takes only one argument--the base name of the function--and employed this macro in the reference micro-kernel files instead of the _BASIC macro, which takes one auxiliary argument. That argument was not being used and probably just acted to unnecessarily obfuscate. commit 32cae66326b68706d0e695cfd60c9ca5bc32c534 Author: Field G. Van Zee Date: Thu Feb 6 18:06:42 2014 -0600 Fixed some instances of sloppy 'restrict' usage. Details: - Fixed some technical incorrectness with some usage of the 'restrict' keyword in the reference trsm micro-kernels. - Tweak to testsuite/Makefile that causes rebuild if libblis was touched. commit 7aceef7683e2a2aff3c7ec2a73508036af2e19e2 Author: Field G. Van Zee Date: Thu Feb 6 17:31:19 2014 -0600 Updated comments in macro-kernels. Details: - Updated (and fixed some errors in) the "Assumptions/assertions" comment section of macro-kernels. - Changed register blocksizes of reference configuration to MR = 8 and NR = 4. It's always good for MR != NR in the reference configuration since it may help uncover bugs related to non-square micro-kernels. commit 8fd292aa78950bcdf556605718f09d13f9575abc Author: Field G. Van Zee Date: Thu Feb 6 14:32:21 2014 -0600 Pass panel dimensions into macro-kernels. Details: - Modified the interfaces to the datatype-specific macro-kernels so that: - pd_a and pd_b are passed in (which contain the panel dimensions of packed panels of a and b). - rs_a and cs_b are no longer passed in (they were guaranteed to be 1). - Modified implementations of datatype-specific macro-kernels so pd_a, pd_b, cs_a, and rs_b are used instead of cpp macros for MR, NR, PACKMR, and PACKNR, respectively. - Declare temporary c matrices (ct) as being maxmr-by-maxnr, which for now is equivalent to being mr-by-nr. maxmr and maxnr are declared in a new header file bli_kernel_post_macro_defs.h. commit 3404e6657eabb017cd1580a2f1dd8e6fb13df923 Author: Field G. Van Zee Date: Wed Feb 5 11:19:10 2014 -0600 Deprecated incremental blocksize macro const defs. Details: - Removed macro constant definitions related to incremental blocksizes from all configurations' bli_kernel.h files. This change is minor and is mostly a cleanup related to a previous commit. commit 1e9afd39a63e0a58167d4439c1a0a880a4a35657 Author: Field G. Van Zee Date: Tue Feb 4 20:15:19 2014 -0600 Comment updates (removed vestiges of "bd"). commit 5cf58f7c2d5bc0d2d94d9576f7158d8f133b7aac Author: Field G. Van Zee Date: Tue Feb 4 09:15:19 2014 -0600 Added early returns for "object is zeros" case. Details: - Added some logic to packm_init(), pack_int() and gemm_int() so that (a) objects marked as BLIS_ZEROS are not packed, and (b) those objects are not computed with. This functionality is not currently needed by any existing implementations, but may be used in the future. commit 6bbd4be769a9b344a55abe5ddaca1a99fd29f7b4 Author: Field G. Van Zee Date: Mon Feb 3 13:15:25 2014 -0600 Added 'f' on some gemm and trmm blocked variants. Details: - Added 'f' to some block variant files/functions to be consistent with other file/functions' naming convention. Here, the f indicates partitioning in the "forward" direction. commit eb13cb2c6b182df5e2a9b88c76f50e2cee25b9e0 Author: Field G. Van Zee Date: Mon Feb 3 11:07:01 2014 -0600 Removed redundant non-gemm blksz_t creation. Details: - Removed code that creates duplicate blksz_t objects for herk, trmm, and trsm. Instead, the gemm blksz_t objects are accessed via extern and used directly. This reduces the amount of code associated with each of the three _cntl_init() and _cntl_finalize() function. commit 0a023a7d9e58e53b8c204a5f49aa8ca9afeba938 Author: Field G. Van Zee Date: Wed Jan 29 14:02:08 2014 -0600 Introduced new level-3 front-end layer. Details: - Added new _front() functions for each level-3 operation. This is done so that the choosing of the control tree (and *only* the choosing of the control tree) happens in what was previously the "front end" (e.g. bli_gemm()). That control tree is then passed into the _front() function, which then performs up-front tasks such as parameter checking. commit 251c5d112196d37b183e554bc9d406104aed65fb Author: Field G. Van Zee Date: Tue Jan 28 19:40:29 2014 -0600 Removed redundant hemm, her2k control trees. Details: - Removed code that generated a control tree specifically for hemm and symm. Instead, the gemm control tree is now configured so that it works for gemm, hemm, or symm. - Retired most her2k code, as it was not being used. (Currently, her2k is implemented as two invocations of herk.) I couldn't think of many situations where her2k variants were needed. - Removed some older her2k code. commit 5a36e5bf2f59d1e85d6dbce32a07d604c5e82d11 Author: Field G. Van Zee Date: Mon Jan 27 11:13:00 2014 -0600 Embed func_t microkernel objects in control trees. Details: - Modified all control tree node definitions to include a new field of type func_t*, which is similar to a blksz_t except that it contains one function pointer (each typed simply as void*) for each datatype. We use the func_t* to embed pointers to the micro-kernels to use for the leaf-level nodes of each control tree. This change is a natural extension of control trees and will allow more flexibility in the future. - Modified all macro-kernel wrappers to obtain the micro-kernel pointers from the incomming (previously ignored) control tree node and then pass the queried pointer into the datatype-specific macro-kernel code, which then casts the pointer to the appropriate type (new typedefs residing in bli_kernel_type_defs.h) and then uses the pointer to call the micro- kernel. Thus, the micro-kernel function is no longer "hard-coded" (that is, determined when the datatype-specific macro-kernel functions are instantiated by the C preprocessor). - Added macros to bli_kernel_macro_defs.h that build datatype-specific base names if they do not exist already, and then uses those to build datatype-specific micro-kernel function names. This will allow developers extra flexibility if they wanted to, for example, name each of their datatype-specific micro-kernels differently (e.g. double real might be named bli_dgemm_opt_4x4() while double complex might be named bli_zgemm_opt_2x2()). - Inserted appropriate code into _cntl_init() functions that allocates and initializes a func_t object for the corresponding micro-kernels. The gemm ukernel func_t object is created once, in bli_gemm_cntl_init(), and then reused via extern wherever possible. commit 6cbd6f1c7f1915180aa28939833afde48665c5ae Author: Field G. Van Zee Date: Fri Jan 24 10:38:29 2014 -0600 Removed commented mixed domain macro-kernel code. Details: - Removed commented-out code from macro-kernels that was supposed to facilitate implementing mixed domain (complex times real) matrix multiplication. This functionality is still (probably possible), but I'm getting tired of looking at the code every time I edit a macro-kernel. Plus, there are probably ways of doing it at a higher level, via control trees. commit 29778be1119f1a884330d7f8dc424a2df4101d58 Author: Field G. Van Zee Date: Wed Jan 22 16:03:11 2014 -0600 Removed b_aux field from cntl nodes. Details: - Removed b_aux field from all control tree node definitions. This field was being used in certain optimizations (incremental blocking) that were not actually being employed within BLIS, and are probably not employed by others. - Updated all _cntl_obj_create() function definitions and invocations according to above change. - Retired bli_gemm_blk_var4.c, which was one such function that employed incremental blocking, but which was never called by BLIS itself. commit 06ac727a42ec9e832c7832745036702014638f99 Author: Field G. Van Zee Date: Wed Jan 15 16:44:52 2014 -0600 Updated some comments in level-3 front ends. commit d628bf1da1560f1f5126a1ddfed8714f0a4b8da3 Author: Field G. Van Zee Date: Wed Jan 15 11:40:12 2014 -0600 Consolidated pack_t enums; retired VECTOR value. Details: - Changed the pack_t enumerations so that BLIS_PACKED_VECTOR no longer has its own value, and instead simply aliases to BLIS_PACKED_UNSPEC. This makes room in the three pack_t bits of the info field of obj_t so that two values are now unused, and may be used for other future purposes. - Updated sloppy terminology usage in comments in level-2 front-ends. (Replaced "is contiguous" with more accurate "has unit stride".) commit ddc8c1c379b4787be5954802906593d7ea144452 Author: Field G. Van Zee Date: Mon Jan 13 14:55:43 2014 -0600 Suppress warning in Makefile (UNINSTALL_LIBS). Details: - Redirect errors to /dev/null when using 'find' to locate libraries that would be uninstalled upon executing "make uninstall-old". Before, if the Makefile was read before $(INSTALL_PREFIX)/lib existed, a "No such file or directory" message was emitted. This message was harmless, but is now suppressed in this situation. commit f8f67d7251bffc05020e20527c100c8115fd5e55 Author: Field G. Van Zee Date: Fri Jan 10 09:06:11 2014 -0600 Typecast bli_getopt() return value in testsuite. Details: - In the test suite driver, inserted an explicit typecast of the return value of bli_getopt() prior parsing. The lack of typecast caused a problem on at least one system whereby a return value of -1 was interpreted as garbage character. Thanks to Francisco Igual for finding and submitting this fix. commit e7f154fe2ed3e10e2323cefe5d25c2c23ac902c4 Author: Field G. Van Zee Date: Fri Jan 10 08:48:07 2014 -0600 Applied edge case fix to arm/neon microkernel. Details: - Applied an edge case bugfix, courtesy of Francisco Igual, to the current double precision real gemm microkernel in kernels/arm/neon/3. commit 89c76a8a51d070d263c13bfa5ace65769509f2b4 Author: Field G. Van Zee Date: Thu Jan 9 12:08:37 2014 -0600 Allow building outside source distribution. Details: - Modified build system (mostly configure and top-level Makefile) so that a user can build a BLIS library outside of the top-level directory of the source distribution. - Added "test" target to Makefile so that the user can run "make test", which will compile, link, and run the testsuite binary. This works even if the build directory is externally located, thanks to the test suite binary's new -g and -o command-line options. Also, when creating the test suite via the top-level Makefile, the linking is against the local archive, in lib/, rather than at /lib. - Modified testsuite/Makefile so that it links against the library built locally, in ../lib/. - Added "-lm" to LDFLAGS of most configurations' make_defs.mk. - Various other cleanups to build system. commit 12fa82ec12cc340ab28552997d9d50f7c98691f8 Author: Field G. Van Zee Date: Wed Jan 8 16:09:26 2014 -0600 Implemented bli_getopt(). Details: - Added bli_getopt.c and .h files to frame/base. These files implement a custom version of getopt(), which may be used to parse command line options passed into a program via argc/argv. I am implementing this function myself, as opposed to using the version available via unistd.h, for portability reasons, as the only requirements are string.h (which is available via the standard C library). - Modified test suite to allow the user to specify the file name (and/or path) to the parameters and operations input files: -g may be used to specify the general input file and -o to specify the operations input file). If -g or -o or both are not given, default filenames are assumed (as well as their existence in the current directory). commit cafb58e86ea5cfb21b9eedc57ca8ebbf24252098 Author: Field G. Van Zee Date: Mon Jan 6 13:28:36 2014 -0600 Updated template micro-kernels to use auxinfo_t. Details: - Updated template micro-kernel implementations (located in config/template/kernels), to adhere to the new auxinfo_t interface. Meant to include this change in a0331fb1. - Changed template configuration to use 64-bit integers (for both BLIS and the BLAS compatibility layer). commit 9ab126b499c3805045020cb89a8a5848e28d3bf5 Author: Field G. Van Zee Date: Mon Jan 6 12:13:26 2014 -0600 Removed error checks in netlib->BLIS param mapping Details: - Disabled error checking in netlib-to-BLIS parameter mapping functions. If the char value input to these functions was not one of the defined values, bli_check_error_code() with the appropriate error code value would be called, resulting in an abort(). This was unnecessary and redundant since these routines are currently only used within the BLAS compatibility layer, and they are only called AFTER parameter checking has already been performed on the original BLAS char values. If the application tried to override xerbla() to prevent an abort() from being called, this error checking would still get in the way. Thus, instead of reporting the error situation to the framework (ie: calling abort()), an arbitrary BLIS parameter value is now chosen and the function returns normally. Thanks to Jeff Hammond for finding and reporting this issue. commit 2cb13600f9f9601c60e7f96f4ca159d169ade9cb Author: Field G. Van Zee Date: Fri Jan 3 12:29:13 2014 -0600 Updated year in copyright headers to 2014. commit 290fa54e0083c9c837188b8321b13b1b282e7b0c Author: Field G. Van Zee Date: Fri Dec 20 14:10:26 2013 -0600 Store variable panel strides in trmm/trsm auxinfo. Details: - Changed the value being stored into the auxinfo_t structure in trmm and trsm macro-kernels. Whereas before we stored whatever value was provided to the macro-kernel implementation via ps_a/ps_b, now we store the stride that will advance to the next variable-length micro-panel of the triangular matrix A (left) or B (right). - Whitespace changes to the files affected above. commit e3a6c7e77667fd749248df3f75f880266c3136ec Author: Field G. Van Zee Date: Thu Dec 19 16:29:31 2013 -0600 Macroized conditionals for a2/b2 in macro-kernels. Details: - Replaced conditional expressions in macro-kernels related to computing the addresses a2 and b2 (a_next and b_next) with a preprocessor macro invocation, bli_is_last_iter(), that tests the same condition. - Updated gemm_ukr module to use auxinfo_t argument. - Whitespace changes in test suite ukr modules. commit a0331fb10a50393e31d16339053b75b944132da1 Author: Field G. Van Zee Date: Thu Dec 19 14:50:11 2013 -0600 Introduced auxinfo_t argument to micro-kernels. Details: - Removed a_next and b_next arguments to micro-kernels and replaced them with a pointer to a new datatype, auxinfo_t, which is simply a struct that holds a_next and b_next. The struct may hold other auxiliary information that may be useful to a micro-kernel, such as micro-panel stride. Micro-kernels may access struct fields via accessor macros defined in bli_auxinfo_macro_defs.h. - Updated all instances of micro-kernel definitions, micro-kernel calls, as well as macro-kernels (for declaring and initializing the structs) according to above change. commit 392428dea4001fe4384efe29f6cde32f8abeeb35 Author: Field G. Van Zee Date: Thu Dec 12 19:01:47 2013 -0600 Added "ri" scalar macros. Details: - Added set of basic scalar macros that take arguments' real and imaginary components separately, named like the previous set except with the "ris" (instead of "s") suffix. - Redefined the previous set of scalar macros (those that take arguments "whole") in terms of the new "ri" set. - Renamed setris and getris macros to sets and gets. - Renamed setimag0 macros to seti0s. - Use bli_?1 macro instead of a local constant in bla_trmv.c, bla_trsv.c. commit f60c8adc2f61eaba06b892f4e73000159de93056 Author: Field G. Van Zee Date: Tue Dec 10 14:39:56 2013 -0600 Minor updates to dunnington configuration. Details: - Added commented alternatives to dunnington configuration's bli_kernel.h. - Minor reformatting of optimization flag variables in make_defs.mk. commit 4ef20150492db254b5baf2368add62e19b0ac11b Author: Field G. Van Zee Date: Mon Dec 9 18:53:03 2013 -0600 Tweaks to dunnington configuration (x86_64/core2). Details: - Updated BLIS_DEFAULT_KC_D from 256 to 384. - Enabled cache blocksize extension of up to 25% for MC and KC (for double-precision real). commit 5ad2ce7bf5ba3ea955e6d517bfd270e02820263b Author: Field G. Van Zee Date: Mon Dec 9 18:30:49 2013 -0600 Minor x86_64 (core2) kernel fixes. Details: - Fixed copy-and-paste bug whereby [scz]gemmtrsm_u_opt_d4x4 kernels for x86_64/core2 were calling the wrong reference code (l instead of u). - Fixed some unused variables in x86_64/core2 dotaxpyv and dotxaxpyf kernels. - Minor typecasting fix in testsuite/src/test_libblis.c. - Makefile updates. commit d289f5d3a9c0e1a68a17c1c32b736e282a289c4c Author: Field G. Van Zee Date: Thu Dec 5 10:56:13 2013 -0600 Whitespace changes to level-2 blocked variants. Details: - Joined some lines in level-2 blocked variants to match formatting used in level-3 blocked variants. - Streamlined implementation of bli_obj_equals() in bli_query.c. commit b444489f100d218bc8ef29b01ff8489c358559f9 Author: Field G. Van Zee Date: Tue Dec 3 16:08:30 2013 -0600 Added new "attached" scalar representation. Details: - Added infrastructure to support a new scalar representation, whereby every object contains an internal scalar that defaults to 1.0. This facilitates passing scalars around without having to house them in separate objects. These "attached" scalars are stored in the internal atom_t field of the obj_t struct, and are always stored to be the same datatype as the object to which they are attached. Level-3 variants no longer take scalar arguments, however, level-3 internal back-ends stll do; this is so that the calling function can perform subproblems such as C := C - alpha * A * B on-the-fly without needing to change either of the scalars attached to A or B. - Removed scalar argument from packm_int(). - Observe and apply attached scalars in scalm_int(), and removed scalar from interface of scalm_unb_var1(). - Renamed the following functions (and corresponding invocations): bli_obj_init_scalar_copy_of() -> bli_obj_scalar_init_detached_copy_of() bli_obj_init_scalar() -> bli_obj_scalar_init_detached() bli_obj_create_scalar_with_attached_buffer() -> bli_obj_create_1x1_with_attached_buffer() bli_obj_scalar_equals() -> bli_obj_equals() - Defined new functions: bli_obj_scalar_detach() bli_obj_scalar_attach() bli_obj_scalar_apply_scalar() bli_obj_scalar_reset() bli_obj_scalar_has_nonzero_imag() bli_obj_scalar_equals() - Placed all bli_obj_scalar_* functions in a new file, bli_obj_scalar.c. - Renamed the following macros: bli_obj_scalar_buffer() -> bli_obj_buffer_for_1x1() bli_obj_is_scalar() -> bli_obj_is_1x1() - Defined new macros to set and copy internal scalars between objects: bli_obj_set_internal_scalar() bli_obj_copy_internal_scalar() - In level-3 internal back-ends, added conditional blocks where alpha and beta are checked for non-unit-ness. Those values for alpha and beta are applied to the scalars attached to aliases of A/B/C, as appropriate, before being passed into the variant specified by the control tree. - In level-3 blocked variants, pass BLIS_ONE into subproblems instead of alpha and/or beta. - In level-3 macro-kernels, changed how scalars are obtained. Now, scalars attached to A and B are multiplied together to obtain alpha, while beta is obtained directly from C. - In level-3 front-ends, removed old function calls meant to provide future support for mixed domain/precision. These can be added back later once that functionality is given proper treatment. Also, removed the creating of copy-casts of alpha and beta since typecasting of scalars is now implicitly handled in the internal back-ends when alpha and beta are applied to the attached scalars. commit 992de486d6f23e69a623abd15ae77d7881d13871 Merge: 9552e6ee fd4ac636 Author: Field G. Van Zee Date: Mon Dec 2 13:58:46 2013 -0600 Unimplemented kernels now call reference. Details: - Updated arm, bgq, loongson3a, and x86_64 kernels so that unimplemented datatypes call the corresponding reference kernel. Previously, these kernel functions called abort() with a "not yet implemented" error message. commit fd4ac636d9a55cec1476a444bd4e70def219dc8f Author: Field G. Van Zee Date: Mon Dec 2 13:50:36 2013 -0600 Unimplemented kernels now call reference. Details: - Updated micro-kernels for arm, bgq, loongson3a, and x86_64 so that unimplemented kernel functions simply call the corresponding reference implementation. (Previously, these unimplemented functions would abort() with a "not yet implemented" message.) commit 9552e6ee824d4345d5e908e869e071d19829819a Author: Field G. Van Zee Date: Sun Nov 24 11:40:31 2013 -0600 Removed optional scaling from packm control tree. Details: - Removed does_scale field from packm control tree node and bli_packm_cntl_obj_create() interface. Adjusted all invocations of _cntl_obj_create() accordingly. - Redefined/renamted macros that are used in aliasing so that now, bli_obj_alias_to() does a full alias (shallow copy) while bli_obj_alias_for_packing() does a partial alias that preserves the pack_mem-related fields of the aliasing (destination) object. - Removed bli_trmm3_cntl.c, .h after realizing that the trmm control tree will work just fine for bli_trmm3(). - Removed some commented vestiges of the typecasting functionality needed to support heterogeneous datatypes. commit e65c476284db9ef64b23191a21c2584b1083342f Author: Field G. Van Zee Date: Tue Nov 19 10:05:35 2013 -0600 Minor updates to packm_blk_var2.c and _blk_var3.c. Details: - Comment updates to packm_blk_var2.c and packm_blk_var3.c. - In packm_blk_var2(), call setm_unb_var1(), scal2m_unb_var1() directly instead of setm(), scal2m(). commit 9e1d0d4bca48eda54301d8976f203e2544c9df3a Author: Field G. Van Zee Date: Mon Nov 18 18:11:07 2013 -0600 Added trsm_l, trsm_u ukernels for x86_64/core2. Details: - Added standalone trsm_l/trsm_u micro-kernels for x86_64 (core2). These kernels are based on the gemmtrsm_l/gemmtrsm_u micro-kernels that already existed in kernels/x86_64/core2-sse3/3. commit 85e7e02ea3a9190b6fcff5d46b00d41c79cb1242 Merge: 67761e22 70720054 Author: Field G. Van Zee Date: Mon Nov 18 12:02:00 2013 -0600 Merge branch 'master'. Forgot to git-pull. commit 67761e224c92500eecf9c1540cc72bdd2fb27679 Author: Field G. Van Zee Date: Mon Nov 18 11:57:40 2013 -0600 Attempting to fix errors in bgq build. Details: - Removed restrict declaration from b_cast and c_cast from bli_trsm_lu_ker_var2.c and bli_trsm_rl_ker_var2.c. Curiously, they are causing problems for xlc only in those two files and no other macro-kernels. - Fixed (hopefully) kernel function parameter type declarations in kernels/bgq/1f/bli_axpyf_opt_var1.c and kernels/bgq/3/bli_gemm_8x8.c. commit 707200541d344f98cf34c9801954dbb36fbe0447 Author: Field G. Van Zee Date: Mon Nov 18 11:17:31 2013 -0600 Syntax error fix in x86_64/core2 gemmtrsm_u ukr. commit bbe2b84a49e7785d4d0c514cda34adfbe66478b0 Author: Field G. Van Zee Date: Mon Nov 18 11:11:06 2013 -0600 Updated Makefile in test, testsuite. Details: - Updated Makefiles in test and testsuite directories to use the new BLIS header installation directory scheme, which is to compile with -I/include/blis instead of -I/include. commit 9bd7fcfd436625ca2108128086671319362f4d92 Author: Field G. Van Zee Date: Mon Nov 18 10:58:09 2013 -0600 Outer-to-inner 'restrict' fix in macro-kernels. Details: - Fixed sloppy placement of 'restrict' pointer declarations in level-3 macro-kernels. Previously, all restricted pointers were being declared at the outer-most function scope level. While this violates the C99 standard, very few of the compilers used with BLIS so far have seemed to care. The lone exception has been IBM's xlc. Thanks to Tyler Smith for identifying this bug (and suggesting the fix). commit 50549a6a31dd26cf63a013e0ede16b2c7ce835b6 Author: Field G. Van Zee Date: Sun Nov 17 18:31:27 2013 -0600 Changed header install directory to include/blis. Details: - Changed top-level Makefile so that headers are installed to $(INSTALL_PREFIX)/include/blis/. (Header directories are no longer named by version/configuration and then symlinked.) - Added uninstall targets, including uninstall-old to clean out old library archives. - Added GREP makefile definitions to all configurations' make_defs.mk. commit d70733abddfb9a95661897e1e4f3c1f3cfa7cbaa Author: Field G. Van Zee Date: Sat Nov 16 17:34:25 2013 -0600 Added ARM kernels, configurations. Details: - Added kernels for ARM, and configurations for Cortex-A9 and Cortex-A15. Thanks to Francisco Igual for contributing these kernels and configurations. commit d37c2cff62089c86983c2f79762f4b5329037373 Author: Field G. Van Zee Date: Wed Nov 13 10:47:11 2013 -0600 Minor comment and Makefile changes. Details: - Added missing 'check-config' and 'check-make-defs' targets to testsuite/Makefile. - Removed unused 'test' target from top-level Makefile. - Comment changes to testsuite input files. commit 19885f893a17b91ee79bead0620d0f913392d4c5 Author: Field G. Van Zee Date: Mon Nov 11 12:09:21 2013 -0600 Updated some kernel comment headers. Details: - Updated bgq and piledriver comment headers to use BLIS copyright header instead of libflame. commit 1a4d698f42981d74fe5f29b980031e1ee7dc42d5 Author: Field G. Van Zee Date: Mon Nov 11 10:15:40 2013 -0600 CHANGELOG update (for 0.1.0). commit 089048d5895a30221b6b1976c9be93ad6443420d (tag: 0.1.0) Author: Field G. Van Zee Date: Sat Nov 9 17:18:00 2013 -0600 Added object wrappers to 1f test suite modules. Details: - Added missing object wrappers to level-1f test suite modules. This was only apparent if you were configuring with something other than the reference configuration. - Commented out object-wrappers in level-1f front-ends. These were not working as intended the reference configuration was selected, because most kernel sets, such as those in the template set, do not have object wrappers. - Whitespace changes to template micro-kernels. - Comment changes to template level-1f kernel headers. commit 9ef3752079de10124bed906b5d28479d04aa8187 Author: Field G. Van Zee Date: Fri Nov 8 17:20:47 2013 -0600 Updated template kernels wrt KernelsHowTo wiki. Details: - Merged latest state of KernelsHowTo wiki into template micro-kernels located in config/template/kernels/3. commit 376bbb59c8944e29c5c1ff6637920d8451370afa Author: Field G. Van Zee Date: Fri Nov 8 11:17:34 2013 -0600 Removed support for duplication. Details: - Removed support for duplication from the gemmtrsm/trsm micro-kernels and all framework code. - Updated test suite modules according to above changes. commit 68a5910974b62b4df853fae2a68cb04df9d5a19c Author: Field G. Van Zee Date: Thu Nov 7 11:36:11 2013 -0600 Added comments to testsuite/input.operations. Details: - Added extensive comments to the top of testsuite/input.operations, which describe how to edit the file. - Removed input.operations.0 and input.operations.1. - Changed input.general to test all datatypes ("sdcz") by default. commit a98f78b715fb256a519870071bb5266130d70b21 Author: Field G. Van Zee Date: Wed Nov 6 15:32:47 2013 -0600 Changed dim_t and inc_t to be signed integers. Details: - Redefined dim_t and inc_t in terms of gint_t (instead of guint_t). This will facilitate interoperability with Fortran in the future. (Fortran does not support unsigned integers.) - Redefined many instances of stride-related macros so that they return or use the absolute value of the strides, rather than the raw strides which may now be signed. Added new macros bli_is_row_stored_f() and bli_is_col_stored_f(), which assume positive (forward-oriented) strides, and changed the packm_blk_var[23] variants to use these macros instead of the existing bli_is_row_stored(), bli_is_col_stored(). - Added/adjusted typecasting to to various functions/macros, including bli_obj_alloc_buffer(), bli_obj_buffer_at_off(), and various pointer- related macros in bli_param_macro_defs.h. - Redefined bli_convert_blas_incv() macro so that the BLAS compatibility layer properly handles situations where vector increments are negative. Thanks to Vladimir Sukharev for pointing out this issue. - Changed type of increment parameters in bli_adjust_strides() from dim_t to inc_t. Likewise in bli_check_matrix_strides(). - Defined bli_check_matrix_object(), which checks for negative strides. - Redefined bli_check_scalar_object() and bli_check_vector_object() so that they also check for negative stride. - Added instances of bli_check_matrix_object() to various operations' _check routines. commit 1f8afc3e08a4312cfe810be86aedeacbc57275c5 Author: Field G. Van Zee Date: Wed Nov 6 10:09:10 2013 -0600 Minor comment update to BLAS compat files. commit 1abbf768afafc158d44e4d5c4a135cfd9e277f13 Author: Field G. Van Zee Date: Mon Nov 4 15:50:00 2013 -0600 Fixed bugs in scalv and setv. Details: - Fixed bugs similar to those addressed in cca1e1f51dc6, whereby a segmentation fault may occur if beta is not the same type as the vector operand for scalv and setv. - Changed axpyv and scal2v front-ends in a similar fashion. commit f5953259a1842ee48e5833c22ac86e68a337bfe1 Author: Field G. Van Zee Date: Mon Nov 4 14:43:55 2013 -0600 Fixed a bug related to Hermitian matrix diagonals. Details: - Fixed a bug whereby BLIS assumed that the imaginary components of the diagonal elements of Hermitian matrices were already zero. This property is now enforced when the matrix is packed (bli_packm_blk_var2). Thanks to Vladimir Sukharev for reporting this bug. - Minor comment updates to template kernels. commit d70f2b089dac8b9e4c19295dfa6014c36afee2ec Author: Field G. Van Zee Date: Sat Nov 2 17:19:40 2013 -0500 Added scaling to abval2s, sqrt2s macros. Details: - Re-defined abval2s and sqrt2s macros to use scaling to avoid underflow and overflow from squaring the real and imaginary components. (This is the same technique used to fix recent bugs in invscals/invscaljs and inverts.) commit c5b1ed9409ae2f71d04041eef5da9a0080b5784a Author: Field G. Van Zee Date: Fri Nov 1 10:28:04 2013 -0500 Added new dotxaxpyf variant 2. Details: - Added a new variant for dotxaxpyf that is based on dotxf and axpyf kernels. By default, this variant is not used by any other operation. commit 97f89fbcf202d72fc440b614708e352ea31633e2 Author: Field G. Van Zee Date: Fri Nov 1 10:16:39 2013 -0500 Fixed bug in complex invscals. Details: - Fixed complex inversion in invscals and invscaljs whereby the imaginary component was being computed incorrectly. - Use bli_fmaxabs() instead of bli_fabs() when choosing the scalar in inverts, invscals, and invscaljs. - Changed bli_abs() and bli_fabs() macro definitions to use "<=" operator instead of "<". commit eda42a21d17a2742eab69ab801ed530b82488c8a Author: Field G. Van Zee Date: Thu Oct 31 18:00:44 2013 -0500 Defined missing symbols in bla_rotg.c Details: - Defined local equivalents of libf2c's r_sign(), d_sign(), c_abs(), and z_abs(), which are needed by bla_rotg.c. Also defined r_abs() and d_abs() for completeness. Thanks to Vladimir Sukharev for reporting these bugs. commit cca1e1f51dc67a2c3725d5c1837256831aaf70f8 Author: Field G. Van Zee Date: Wed Oct 30 14:39:01 2013 -0500 Fixed bugs in scalm and setm. Details: - Fixed bugs in scalm and setm that resulted in segmentation faults when beta is not the same type as the matrix operand. Thanks to Vladimir Sukharev for reporting this bug. - Changed axpym and scal2m front-ends in fashion similar to that of scalm and setm; namely, the alpha scalar is copy-cast the type of the first matrix operand. - Changed the template and reference configurations' bli_config.h files so that the number of memory allocator blocks of A and B are set based on BLIS_MAX_NUM_THREADS. - Comment updates to bli_obj.c and variable rename in bla_nrm2.c. commit 2807013a4761c2b84b3944de64d23483ad7ef2fb Author: Field G. Van Zee Date: Thu Oct 24 14:32:20 2013 -0500 Fixed over/under-flow in complex inversion. Details: - Fixed the complex bli_?inverts() macros, which were inverting elements in an "unsafe" manner, such that very large and very small values were unnecessarily over/under-flowing. Thanks for Vladimir Sukharev for reporting this bug. - Comment update to bli_sumsqv_unb_var1.c. - Removed redundant bli_min() macro in bli_scalar_macro_defs.h. - Changed 1.0F to 1.0 for bli_drands() macro. commit 45a80c625f84edb2ade6ac25efe2b9c589d7e0df Author: Field G. Van Zee Date: Wed Oct 23 12:15:25 2013 -0500 Fixed parameter checking issue in BLAS syr[2]k. Details: - Fixed a minor parameter checking bug in the BLAS compatibility layer for [sd]syrk and [sd]syr2k. Specifically, if 'C' is passed in for the trans parameter of either operation, it is (a) allowed, and (b) treated as 'T' (whereas previously it was disallowed). Thanks for Vladimir Sukharev for finding and reporting this bug. commit a091a219bda55e56817acd4930c2aa4472e53ba5 Author: Field G. Van Zee Date: Mon Oct 14 10:11:29 2013 -0500 Minor fixes to piledriver configuration, ukernel. Details: - Applied a patch from Tyler that fixes minor staleness in the piledriver configuration and gemm micro-kernel. - Very minor changes to test suite input files. commit dacdde27aee4fb90b14880136d7f20c6b234e2c6 Author: Field G. Van Zee Date: Fri Oct 11 11:37:19 2013 -0500 Added Fran's Sandy Bridge kernels/configuration. Details: - Added a kernel directory for kernels developed by Francisco Igual for the Sandy Bridge architecture, including a dgemm ukernel coded with AVX intrinsics. - Added a configuration for Sandy Bridge using values supplied by Fran. commit 03106d650e4030d4c9831683448376f92fc52d41 Author: Field G. Van Zee Date: Fri Oct 11 10:40:38 2013 -0500 Fixed minor perf bug in gemm_ker_var2. Details: - Fixed a minor performance bug in bli_gemm_ker_var2.c (and the experimental bli_gemm_ker_var5.c) whereby the addresses for a_next and b_next are not computed correctly (ie: do not wraparound) at the edge cases. Thanks to Tze Meng for helping me identify this bug. commit b053337387dbdef9035be03538222670a21707ca Author: Field G. Van Zee Date: Thu Oct 10 18:26:55 2013 -0500 Added fusing factors, MR/NR to test suite output. Details: - Updated the test suite driver (and modules where appropriate) so that the level-1f fusing factors are output along with the variable dimension. While this is not strictly necessary, since the fusing factors are output in the initial parameter summary, it allows extra reassurance to the user since the fusing factors appear alongside the variable dimension, which together give a complete picture of the problem size. Similar changes were made for outputting the register blocksizes when reporting results for the micro-kernel test modules. commit be4833bd91c5a58d0bfc52daaadf7ba543a77acf Author: Field G. Van Zee Date: Thu Oct 10 14:20:06 2013 -0500 Added test suite modules for level-1f, 3 kernels. Details: - Added test modules in test suite for level-1f kernels and level-3 micro-kernels. (Duplication in the micro-kernels, for now, is NOT supported by these test modules.) - Added section override switches to test suite's input.operations file. - Added obj_t APIs for level-1f front-ends and their unblocked variants to facilitate the level-1f test modules. Also added front-end for dupl operation. - Added obj_t-based check routines for level-1f operations, which are called from the new front-ends mentioned above. - Added query routines for axpyf, dotxf, and dotxaxpyf that return fusing factors as a function of datatype, which is needed by their respective test modules. - Whitespace changes to bli_kernel.h of all existing configurations. commit 680188d46bb15b9a1a2867638104939dc77ca2a1 Author: Field G. Van Zee Date: Thu Oct 10 13:23:37 2013 -0500 Cleaned up old test drivers. Details: - Minor updates to old test drivers in preparation for our participation in ACM TOMS's replicated results initiative. commit 3690bdd4f95769c935c410414112102cc3e108b1 Author: Field G. Van Zee Date: Thu Oct 10 11:45:33 2013 -0500 More updates to level-1f kernels for core2-sse3. Details: - Changed types in function signatures to match new prototypes. Meant to include this in previous commit. commit 661d5120cd7071f9b0c5cefc95f99f1361370ade Author: Field G. Van Zee Date: Thu Oct 10 11:27:27 2013 -0500 Fixed outdated fusing factor macros in 1f kernels. Details: - Updated level-1f kernels for x86_64 and bgq to use renamed fusing factor macros. Meant to include this in 5e54f46c. Thanks to Fran for pointing this out. commit 73aa1e9f31d1b2a319c7e711ced6db3f9835c832 Author: Field G. Van Zee Date: Tue Oct 1 17:01:18 2013 -0500 Added section overrides to test suite. Details: - Added new lines of input to the test suite's input.operations file, which allows the user to disable entire sections (levels) of tests. Before this change, the user had to manually disable each operation tests's "master switch". (This is why input.operations.0 existed: to allow a more convenient starting point for someone who only wanted to test one or a few operations.) commit 5e54f46ccb76beab892d530b693e07c6bf6db7cf Author: Field G. Van Zee Date: Mon Sep 30 12:58:18 2013 -0500 Added template implementations and other tweaks. Details: - Added a 'template' configuration, which contains stub implementations of the level 1, 1f, and 3 kernels with one datatype implemented in C for each, with lots of in-file comments and documentation. - Modified some variable/parameter names for some 1/1f operations. (e.g. renaming vector length parameter from m to n.) - Moved level-1f fusing factors from axpyf, dotxf, and dotxaxpyf header files to bli_kernel.h. - Modifed test suite to print out fusing factors for axpyf, dotxf, and dotxaxpyf, as well as the default fusing factor (which are all equal in the reference and template implementations). - Cleaned up some sloppiness in the level-1f unb_var1.c files whereby these reference variants were implemented in terms of front-end routines rather that directly in terms of the kernels. (For example, axpy2v was implemented as two calls to axpyv rather than two calls to AXPYV_KERNEL.) - Changed the interface to dotxf so that it matches that of axpyf, in that A is assumed to be m x b_n in both cases, and for dotxf A is actually used as A^T. - Minor variable naming and comment changes to reference micro-kernels in frame/3/gemm/ukernels and frame/3/trsm/ukernels. commit 97aaf220a847363b4da35935eca17790c0ef71f6 Author: Field G. Van Zee Date: Tue Sep 17 10:51:36 2013 -0500 Added new kernels, configurations. Details: - Added various micro-kernels for the following architectures: Intel MIC IBM BG/Q IBM Power7 AMD Piledriver Loogson 3A and reorganized kernels directory. Thanks to Tyler Smith, Mike Kistler, and Xianyi Zhang for contributing these kernels. - Added configurations corresponding to above architectures, and renamed "clarksville" configuration to "dunnington". commit fe979c5a114c877506a5697cdab1fc8cf2bcd303 Author: Field G. Van Zee Date: Fri Sep 13 14:31:53 2013 -0500 Removed default configuration behavior. Details: - Changed the configure script so that it no longer defaults to the reference configuration. This change is being made so that the developer has a firm awareness of which configuration is being used to configure BLIS. Thanks to Mike Kistler and Bryan Marker for this suggested change. commit da77e9614f54f92f703f01e3b9bd67a83280150c Author: Field G. Van Zee Date: Fri Sep 13 12:00:37 2013 -0500 Minor improvements to static memory allocator. Details: - Expanded on cpp macro definitions from bli_mem.c and relocated them to a new header file, frame/include/bli_mem_pool_macro_defs.h. The expanded functionality includes computing the pool size for each datatype (using that datatype's cache blocksizes) and using the maximum to size the actual pool array. This addresses the somewhat common pitfall whereby a developer updates cache blocksizes in bli_kernel.h for only one datatype (say, single-precision real), while the memory pools are sized using the double-precision real values. Then, when the developer attempts to link to and run a level-3 BLIS routine (e.g. dgemm), the library aborts with a message saying the static memory pool was exhausted. Clearly, this message is misleading when the pool was not sized properly to begin with. - Removed previously disabled code in bli_kernel_macro_defs.h that was meant to check for size consistency among the various cache blocksizes. (Obviously the memory pool size-based solution mentioned above is better.) - Added BLIS_SIZEOF_? cpp macros to bli_type_defs.h. This seemed like a reasonable place to put these constants, rather than further crowd up bli_config.h. - Updated testsuite driver to output memory pool sizes for A, B, and C. - Minor comment updates to bli_config.h. - Removed 'flame' configuration. It was beginning to get out-of-date, and I hadn't used it in months. We can always re-create it later. commit 631f347b7a99cb02757c534fd3ec5f723a2fdb0e Author: Field G. Van Zee Date: Tue Sep 10 17:17:28 2013 -0500 Added ESSL and Accelerate targets to test drivers. Details: - Added ESSL and Accelerate (OS X) targets to standalone test drivers' Makefile in "test" directory. Thanks to Jeff Hammond for suggesting / providing this patch. commit 7ae4d7a41d13ef5f1ceee217c000a5cf77a11128 Author: Field G. Van Zee Date: Tue Sep 10 16:35:12 2013 -0500 Various changes to treatment of integers. Details: - Added a new cpp macro in bli_config.h, BLIS_INT_TYPE_SIZE, which can be assigned values of 32, 64, or some other value. The former two result in defining gint_t/guint_t in terms of 32- or 64-bit integers, while the latter causes integers to be defined in terms of a default type (e.g. long int). - Updated bli_config.h in reference and clarksville configurations according to above changes. - Updated test drivers in test and testsuite to avoid type warnings associated with format specifiers not matching the types of their arguments to printf() and scanf(). - Inserted missing #include "bli_system.h" into blis.h (which was slated for inclusion in d141f9eeb6d1). - Added explicit typecasting of dim_t and inc_t to macros in bli_blas_macro_defs.h (which are used in BLAS compatibility layer). - Slight changes to CREDITS and INSTALL files. - Slight tweaks to Windows build system, mostly in the form of switching to Windows-style CRLF newlines for certain files. commit 068437736b41d51a1f5ec47839f059bf58a20413 Author: Field G. Van Zee Date: Mon Sep 9 14:07:58 2013 -0500 Fixed set-but-not-used compiler (gcc) warnings. Details: - Used void-casts of certain variables to appease gcc (and perhaps other compilers) when such variables are only used in the complex instances of the functions. Special thanks to Karl Rupp for suggesting a portable fix for these warnings. commit 6dc85f63dcd5282340c9e00d585e97d70a21edc3 Author: Field G. Van Zee Date: Mon Sep 9 13:48:52 2013 -0500 Small fix to Windows defs.mk makefile fragment. Details: - Commented out a !include statement that was attempting to include a version file that does not yet exist. For now, the version string is hard-coded into defs.mk. commit d141f9eeb6d1de7044b7429adf52d11c6fca620c Author: Field G. Van Zee Date: Mon Sep 9 13:09:16 2013 -0500 Added Windows build system. Details: - Added a 'windows' directory, which contains a Windows build system similar to that of libflame's. Thanks to Martin for getting this up and running. - Spun off system header #includes into bli_system.h, which is included in blis.h - Added a Windows section to bli_clock.c (similar to libflame's). commit 9b320e7406fb69e8b61a0085abe2ed89a96bdb68 Author: Field G. Van Zee Date: Mon Sep 9 11:04:46 2013 -0500 Edited bli_?lamch.c to avoid Windows keyword. Details: - Renamed "small" variable to "smnum" to avoid collision with Windows type by the same name. This change is needed in advance of the upcoming Windows build system. commit 9013ad6ff2e9ace35e0cf44c32795c2f3d5be628 Author: Field G. Van Zee Date: Wed Sep 4 13:36:07 2013 -0500 Switched integer typedefs (again) to C types. Details: - Redefined gint_t and guint_t in terms of the standard C types long int and unsigned long int, respectively. - Changed testsuite default max problem size to 500. - Changed testsuite input.operations to use square problems for level-3 operation tests. commit 981a60cfa07abac2e93697dfe12b0f076ab00a38 Author: Field G. Van Zee Date: Wed Sep 4 12:09:11 2013 -0500 Falling back to 32-bit integers for dim_t, etc. Details: - In light of recent segfaulting issues when compiling on 32-bit systems, I've changed the default typedef for gint_t and guint_t from int64_t and uint64_t to int32_t and uint32_t, respectively. - Disabled 64-bit integers in the blas2blis layer for the reference configuration. - Added type sizes of gint_t, guint_t, and the four floating-point datatypes to introductory output of the testsuite. commit b776ddcd4338b34f172ef78da0ac1d771a771ab4 Author: Field G. Van Zee Date: Tue Sep 3 21:58:07 2013 -0500 Applied temp fix to typecasting bug in testsuite. Details: - Applied a temporary fix to the typecasting bug in the testsuite driver. The fix involves casting both numerator and denominator to unsigned long. This fix is more voodoo than science, as I can't be sure why it even works. commit 9ee6e125373869c4213c017ce772c38ecefba103 Author: Field G. Van Zee Date: Tue Sep 3 21:53:27 2013 -0500 Changed dimension spec for gemm in testsuite. Details: - Encounted a bizarre typecasting bug whereby the test suite was not computing the proper dimension from the problem size and dimension specification when the latter was set to -3. Will investigate. Thanks to Fran for finding this "bug". commit e8be081e68c385ab44d0fea8dade21d40c200b79 Author: Field G. Van Zee Date: Wed Aug 28 15:52:34 2013 -0500 Generalized matlab and file output in testsuite. Details: - Added a new option in input.general that allows outputting in matlab/octave format so that one can output in matlab format independently from outputting to files. - Adjusted input.operations according to above. - Added input.operations.0 and input.operations.1 with all options disabled and enabled, respectively. commit d352c746e5683037d41b5061dfb5ce08e1d0843b Author: Field G. Van Zee Date: Tue Aug 27 13:41:46 2013 -0500 Added single/real gemm micro-kernel for x86_64. Details: - Added a single-precision real gemm micro-kernel in kernels/x86_64/3/bli_gemm_opt_d4x4.c. - Adjusted the single-precision real register blocksizes in config/clarksville/bli_kernel.h to be 8x4. - Added a missing comment to bli_packm_blk_var2.c that was present in bli_packm_blk_var3.c commit dedda523dc5dc779ecc34e6a03dc74cb8eb220de Author: Field G. Van Zee Date: Mon Aug 19 12:07:41 2013 -0500 Fixed bug in bli_acquire_mpart_t2b(), _l2r(). Details: - Fixed a bug in bli_acquire_mpart_t2b() and bli_acquire_mpart_l2r() that cause incorrect partitioning when SUBPART0 was requested. This bug was introduced in 46d3d09d49ad. Thanks to Bryan for isolating this bug. - Removed dupl kernels from kernels/x86_64/3 directory. - Uncommented beta == 0 optimizaition code in kernels/x86_64/3/bli_gemm_opt_d4x4.c. commit 12dbd2f33455e9384fe2070cbdd660fd4a7fceb5 Author: Field G. Van Zee Date: Thu Aug 8 14:39:35 2013 -0500 Moved init_safe(), finalize_safe() to BLAS compat. Details: - Moved the bli_init_safe() and bli_finalize_safe() function calls from the BLAS-like BLIS layer to the BLAS compatibility layer. Having these auto- initializers in the BLIS layer wasn't buying us anything because the user could still call the library with uninitialized global scalar constants, for example. Thus, we will just have to live with the constraint that bli_init() MUST be called before calling ANY routine with a bli_ prefix. - Added the missing _init_safe() and finalize_safe() calls to the level-1 BLAS compatibility wrappers. commit 8abfe55f2ae5d89df18e1b26a5a28d94b0936683 Author: Field G. Van Zee Date: Thu Aug 8 13:30:19 2013 -0500 Miscellaneous updates. Details: - Changed the BLIS_HEAP_STRIDE_ALIGN_SIZE in the configurations from 16 to BLIS_CACHE_LINE_SIZE (typically 64). - Changed the use of nr in sizing of bd buffer to packnr in level-3 macro- kernels. - Reformulated gemm_ker_var2 to look more like the other level-3 macro- kernels, in that the interior and edge-case handling is expressed once inside the loops in the n and m dimensions, rather than the edge-case handling being "unrolled" and expressed as distinct code regions. The previous macro-kernel now lives in retired form in the subdirectory other/bli_gemm_ker_var2.c.old. - Updated experimental gemm_ker_var5 according to above change. - Fixed bug in bli_her2k.c whereby incorrect transformations were being applied to optimize the macro-kernel accesses pattern on C when C is row-stored. - Various updates inside of test/exec_sizes. commit 1aa05736ff49e7cc5f121acf615460fe9a87852c Author: Field G. Van Zee Date: Wed Aug 7 12:27:04 2013 -0500 Fixed bug in interface of bla_ger_check(). Details: - Fixed the misplaced lda parameter in the function signature of bla_ger_check(). Thanks to Tyler for finding this bug. commit 685aad25353fb200de4ca97a8bc0feeebde51d0f Author: Field G. Van Zee Date: Tue Aug 6 12:25:51 2013 -0500 Fixed cpp guard typos in frame/compat/check files. Details: - Fixed instances of BLIS_ENABLE_BLIS2BLAS that should have been BLIS_ENABLE_BLAS2BLIS. Thanks to Tyler for catching this. - Fixed various syntax errors in the code that had yet to be compiled due to the aforementioned bug. commit f4ec28e723d28d998f1038f82da6986e44320ef6 Author: Field G. Van Zee Date: Thu Aug 1 11:24:23 2013 -0500 Added basic OpenMP-based gemm and packm files. Details: - Integrated Tyler's parallelized packm_blk_var2 and gemm_ker_var2 into the following auxiliary files frame/1m/packm/other/bli_packm_blk_var2.c frame/3/gemm/other/bli_gemm_ker_var2.c The routine in the first file uses a basic OpenMP parallel region to parallelize the packing of blocks of A and panels of B, while the second uses a similar parallel region to parallelize along the n dimension of the gemm macro-kernel. commit f8980edf9c318453bb1962ac4939c06bf11e6d5e Merge: 67a8b949 6e7e4523 Author: Field G. Van Zee Date: Fri Jul 26 11:14:27 2013 -0500 Merge branch 'master' of https://code.google.com/p/blis commit 67a8b9498d13b038deb316ac163e62c5b17da2ec Author: Field G. Van Zee Date: Fri Jul 26 11:12:37 2013 -0500 Added missing cpp kernel blocksize constraints. Details: - Added missing C preprocessor guards in bli_kernel_macro_defs.h that enforce constraints on the register blocksizes relative to the cache blocksizes. Thanks to Tyler for helping me stumble across this issue. commit 6e7e452343014e8f86640874dc1dbadca4a642a1 Author: Field G. Van Zee Date: Mon Jul 22 14:50:57 2013 -0500 Fixed minor warnings and misc issues. Details: - Fixed various warnings output by gcc 4.6.3-1, including removing some set-but-not-used variables and addressing some instances of typecasting of pointer types to integer types of different sizes. commit 03f6c3599743bc837a7d40eb5b415b1bf4f2a4e9 Author: Field G. Van Zee Date: Mon Jul 22 12:54:32 2013 -0500 Tightened some macros that detect datatypes. Details: - Modified the definitions of some macros, such as bli_is_real(), so that the "special" bit is taken into account so that BLIS_INT is differentiated from BLIS_FLOAT. - Whitespace changes to bli_obj_macro_defs.h. - Removed BLIS_SPECIAL_BIT definition from bli_type_defs.h, since it wasn't being used. commit b33e2f4443b9043b554963320280ff7783773652 Author: Field G. Van Zee Date: Fri Jul 19 17:15:03 2013 -0500 CHANGELOG update (for 0.0.9). commit 0680916fdd532f7a4716b11a2515243b2c08d00f (tag: 0.0.9) Author: Field G. Van Zee Date: Thu Jul 18 18:04:34 2013 -0500 Added BLAS error checking to compatibility layer. Details: - Added frame/compat/check directory, which now houses companion _check() routines for each of the BLAS wrappers in frame/compat. These _check() routines are called from the compatibility wrappers and mimic the error-checking present in the netlib BLAS. - Edited bla_xerbla.c so that xerbla() translates the operation string to uppercase before printing. - Redefined util routines in frame/compat/f2c/util in terms of level0 macros. - Added prototypes for util routines, f2c routines, lsame(), and xerbla(). - Commented out prototypes in test/test_*.c since Fortran integers are now int64_t by default (and the prototypes that were present in the files used int). - Removed redundant #include "bli_f2c.h" in bli_?lamch.c and bli_lsame.c, since blis.h was already being included. - Other minor changes to code in frame/compat/f2c. commit 4e80ad28c97273db3366428ec44020da7944964d Author: Field G. Van Zee Date: Thu Jul 18 17:53:31 2013 -0500 Added support for C99 complex types/arithmetic. Details: - Added support for C99 complex types to bli_type_defs.h and overloaded complex arithmetic to the scalar-level macros in include/level0. This includes a somewhat substantial reorganization and re-layering of much of the existing machinery present in the level0 macros. - Added new #define for BLIS_ENABLE_C99_COMPLEX to bli_config.h files, commented-out by default, which optionally enables the use of built-in C99 complex types and arithmetic. - Minor changes to clarksville and reference configs' make_defs.mk files. - Removed macro definitions from bli_param_macro_defs.h which was not being used (bli_proj_dt_to_real_if_imag_eq0). commit 6072d7c848e837ba20d607f7b727438ada31bdcf Author: Field G. Van Zee Date: Wed Jul 17 12:27:45 2013 -0500 Fixed bugs in trsm, trmm macro-kernels. Details: - Fixed a bug in trsm_rl_ker_var2() caused by incorrect edge case handling. - Fixed a bug in trsm_rl_ker_var2() and trsm_ru_ker_var2() whereby k was incorrectly being adjusted upward by MR, instead of NR. The rl and ru trmm macro-kernels were updated in a similar fashion. - Fixed a bug in trsm_ru_ker_var2() that was due to a missing negation on diagoffb when recomputing k to skip a zero region below where the diagonal intersects the right side of the block. The corresponding trmm macro-kernel was also updated. - Fixed a bug in trsm_ru_ker_var2() where the the adjustment of k (by NR) needed to be placed AFTER the block that recomputes k to skip the zero region (if present). The other three trsm macro-kernels, as well as the trmm macro-kernels, were updated in the same manner, for consistency. - Fixed a bug in trmm_lu_ker_var2() in which the wrong dimension (n) was being updated to skip a zero region to the left of where the diagonal of A intersects the top edge of the block. - Comment updates to all trsm and trmm macro-kernels. - Comment updates to bli_packm_init.c. commit 47410a48f9b91e94ce4c67633686ffd1f2ad0275 Author: Field G. Van Zee Date: Wed Jul 10 14:53:59 2013 -0500 Added f2c'ed Givens rotation wrappers. Details: - Retired (for now) existing ?rot*() BLAS compatibility wrappers to 'attic' along with other wrappers for which no BLIS implementation exists. - Added f2c-generated codes for applicable datatype flavors of rot, rotg, rotm, and rotmg operations. commit e5f90f3a8dbe671104bcb9d8b4e3409de01805da Author: Field G. Van Zee Date: Wed Jul 10 13:40:12 2013 -0500 Removed copynz defs from bli_kernel.h files. Details: - Removed COPYNZ_KERNEL definition from the bli_kernel.h files in each configuration. (Meant to include this in previous commit.) commit aec12d90f596e8c04b1ad178258a1cd38108f59d Author: Field G. Van Zee Date: Wed Jul 10 13:33:30 2013 -0500 Removed copynzv, copynzm and related codes. Details: - Removed copynzv and copynzm operation directories. These operations implemented a variation of copyv/m that, in the case of real source and complex destination operands, leaves the imaginary component untouched (rather than setting it to zero). I realize now that the special case(s) (e.g. gemm with real A and B but complex C) that I thought required this operation actually can be handled more simply. - Removed level0 scalar macros implementing copynzs, copynzjs. commit b0a0a0f274a761788531b5d281cc3b411b7124ed Author: Field G. Van Zee Date: Tue Jul 9 17:15:38 2013 -0500 Added handling of restrict, stdint.h for non-C99. Details: - Removed the #include from blis.h and inserted a cpp macro block in bli_type_defs.h that #includes for C++ and C99, and otherwise manually typedefs the types we need (which, for now, are unconditionally int64_t and uint64_t). - Moved basic typedefs to top of bli_type_defs.h, and comment changes. - Added cpp macro block to bli_macro_defs.h that #defines restrict as nothing for C++ and non-C99. commit 4b7e7970f1af4a1ab121e07657e2b78b9fcd7671 Author: Field G. Van Zee Date: Mon Jul 8 15:20:34 2013 -0500 Migrated integer usage to stdint.h types. Details: - Changed the way bli_type_defs.h defines integer types so that dim_t, inc_t, doff_t, etc. are all defined in terms of gint_t (general signed integer) or guint_t (general unsigned integer). - Renamed Fortran types fchar and fint to f77_char and f77_int. - Define f77_int as int64_t if a new configuration variable, BLIS_ENABLE_BLIS2BLAS_INT64, is defined, and int32_t otherwise. These types are defined in stdint.h, which is now included in blis.h. - Renamed "complex" type in f2c files to "singlecomplex" and typedef'ed in terms of scomplex. - Renamed "char" type in f2c files to "character" and typedef'ed in terms of char. - Updated bla_amax() wrappers so that the return type is defined directly as f77_int, rather than letting the prototype-generating macro decide the type. This was the only use of GENTFUNC2I/GENTPROT2I-related macros, so I removed them. Also, changed the body of the wrapper so that a gint_t is passed into abmaxv, which is THEN typecast to an f77_int before returning the value. - Updated f2c code that accessed .r and .i fields of complex and doublecomplex types so that they use .real and .imag instead (now that we are using scomplex and dcomplex). commit 372501398564fdba3d5a3db86c30bc1039b185ff Author: Field G. Van Zee Date: Mon Jul 8 11:24:18 2013 -0500 Added experimental bli_gemm_ker_var5(). Details: - Added support for an experimental gemm macro-kernel incrementally packs one micro-panel of B at a time. This is useful for certain special cases of gemm where m is small. - Minor changes to default values of clarksville configuration. - Defined BLIS_PACKED_BLOCKS as part of pack_t type, even though we do not yet have any use (or implementation support) for block storage. - Comment update to bli_packm_init.c. commit 9915d667a79f23e3a2a2516247c560e9063a1646 Author: Field G. Van Zee Date: Sun Jul 7 13:28:39 2013 -0500 Defined "total" blocksize query functions. Details: - Defined bli_blksz_total_for_type() and bli_blksz_total_for_obj() to query the default blocksize plus blocksize extension (using the type or the type of an object). - Comment update in bli_packm_cxk.c. commit 46d3d09d49aded1d9f1b468c83fce75e07d631dc Author: Field G. Van Zee Date: Thu Jun 27 13:19:56 2013 -0500 Consolidated lower/upper her[2]k blocked variants. Details: - Consolidated lower and upper blocked variants for herk and her2k, and renamed the resulting variants, according to the same changes recently made to trmm and trsm. - Implemented support for four new subpartitions types: BLIS_SUBPART1T BLIS_SUBPART1B BLIS_SUBPART1L BLIS_SUBPART1R which correspond to "merged" partitions that include the middle "1" partition as well as either the neighboring "0" or "2" partition. This is used to clean up code in herk/her2k var2 that attempts to partition away the strictly zero region above or below the diagonal of a matrix operand that is being marched through diagonally. - Added safeguards to herk macro-kernels that skip any leading or trailing zero region in the panel of C that is passed in. This is now needed given that herk/her2k var1 no longer partitions off this zero region before calling the macro-kernel (via bli_her[2]k_int()). - Updated comments and other whitespace changes to trmm/trsm macro-kernels. commit 02002ef6f3d2746665982793db36714bd69bccc9 Author: Field G. Van Zee Date: Mon Jun 24 17:08:14 2013 -0500 Added row-storage optimizations for trmm, trsm. Details: - Implemented algorithmic optimizations for trmm and trsm whereby the right side case is now handled explicitly, rather than induced indirectly by transposing and swapping strides on operands. This allows us to walk through the output matrix with favorable access patterns no matter how it is stored, for all parameter combinations. - Renamed trmm and trsm blocked variants so that there is no longer a lower/upper distinction. Instead, we simply label the variants by which dimension is partitioned and whether the variant marches forwards or backwards through the corresponding partitioned operands. - Added support for row-stored packing of lower and upper triangular matrices (as provided by bli_packm_blk_var3.c). - Fixed a performance bug in bli_determine_blocksize_b() whereby the cache blocksize extensions (if non-zero) were not being used to appropriately size the first iteration (ie: the bottom/right edge case). - Updated comments in bli_kernel.h to indicate that both MC and NC must be whole multiples of MR AND NR. This is needed for the case of trsm_r where, in order to reuse existing left-side gemmtrsm fused micro-kernels, the packing of A (left-hand operand) and B (right-hand operand) is done with NR and MR, respectively (instead of MR and NR). commit d1e81ddc848ee47bc188735883d14582bdd0cabc Author: Field G. Van Zee Date: Thu Jun 13 11:14:21 2013 -0500 Minor generalizing tweaks to trmm blk var1, var2. commit 0efb7974f104206ba3985276f2180a9b14fe9f9b Author: Field G. Van Zee Date: Wed Jun 12 16:40:04 2013 -0500 CHANGELOG update. commit 5b641c3bab31eac6a1795b9f6e3f86c59651ca50 (tag: 0.0.8) Author: Field G. Van Zee Date: Wed Jun 12 16:02:12 2013 -0500 Use separate CFLAGS for "kernels" directories. Details: - Added a new "special" directory type: any source code within directories named "kernels" will be compiled with a separate CFLAGS_KERNELS set of compiler flags. This allows the developer to specify a separate set of flags (e.g. optimization flags) for compiling kernels while maintaining a standard set for regular framework code. - Fixed a bug in the top-level Makefile that was causing "noopt" code to be compiled with the standard set of compilation flags. - Updated make_defs.mk in reference, flame, and clarksville configurations according to above changes. commit 08475e7c7653ba598665071a617d10f0d8f763c2 Author: Field G. Van Zee Date: Tue Jun 11 12:18:39 2013 -0500 Various level-3 optimizations for row storage. Details: - Implemented remaining two cases within bli_packm_blk_var2(), which allow packing from a lower or upper-stored symmetric/Hermitian matrix to column panels (which are row-stored). Previously one could only pack to row panels (which are column-stored). - Implemented various optimizations in the level-3 front-ends that allow more favorable access through row-stored matrices for gemm, hemm, herk, her2k, symm, syrk, and syr2k. - Cleaned up code in level-3 front-ends that has to do with setting target and execution datatypes. commit 05a657a6b92e8d34efa5c57ae6a18a4f35ec0841 Author: Field G. Van Zee Date: Fri Jun 7 11:04:10 2013 -0500 Added beta == 0 optimization to x86_64 ukernel. Details: - Modified x86_64 gemm microkernel so that when beta is zero, C is not read from memory (nor scaled by beta). - Fixed minor bug in test suite driver when "Test all combinations of storage schemes?" switch is disabled, which would result in redundant tests being executed for matrix-only (e.g. level-1m, level-3) operations if multiple vector storage schemes were specified. - Restored debug flags as default in clarksville configuration. commit f1aa6b81cc421516dd77dd0f18f7c432724e6ef2 Author: Field G. Van Zee Date: Thu Jun 6 13:36:06 2013 -0500 Whitespace changes to old test drivers. Details: - Replaced tabs with four spaces in places where indention was already in place. commit 9feb4c23d2e36f3d8b5417a3802c69f94b29f749 Author: Field G. Van Zee Date: Tue Jun 4 14:57:46 2013 -0500 Fixed unaligned handling in axpyf, dotxaxpyf. Details: - Fixed over-cautious handling of unaligned operands in vector instrinsic implementation of axpyf kernel. - Fixed over- and under-cautious handling of unaligned operands in vector intrinsic implementation of dotxaxpyf kernel. commit 22b06cfcd2e3205c8325a246c2279e4b1047c066 Author: Field G. Van Zee Date: Mon Jun 3 16:54:52 2013 -0500 Updated level-1/-1f [vector intrinsic] kernels. Details: - Updated level-1/-1f kernels so that non-unit and un-aligned cases are handled by reference implementation (rather than aborted). - Added -fomit-frame-pointer to default make_defs.mk for clarksville configuration. - Defined bli_offset_from_alignment() macro. - Minor edits to old test drivers. commit 0288c827d3659bb225ac9c10f168b623ed0106a2 Author: Field G. Van Zee Date: Sat Jun 1 08:02:23 2013 -0500 Updated ukernels for x86_64. Details: - Tweaked micro-kernels and configuration for clarksville. - Updated/cleaned up old test drivers in test directory. - Fixed syntax bug in trsv_unb_var1 and trsv_unf_var1 (introduced recently). commit 85a6d1c9a52c2b27c71a3a3e341c51d7ba263749 Author: Field G. Van Zee Date: Mon May 6 11:05:08 2013 -0500 Replaced axpys usage with subs in trsv. Details: - Replaced instances of axpys with alpha equal to -1 with subs. - Use BLIS_MAX_TYPE_SIZE to define BLIS_CONSTANT_SLOT_SIZE instead of sizeof(dcomplex). commit 2d9c667f3c48a12cab64e5ad09d5fcb9f4c19d78 Author: Field G. Van Zee Date: Fri May 24 16:28:10 2013 -0500 Fixed x86_64 kernel bugs and other minor issues. Details: - Fixed bugs in trmv_l and trsv_u due to backwards iteration resulting in unaligned subpartitions. We were already going out of our way a bit to handle edge cases in the first iteration for blocked variants, and this was simply the unblocked-fused extension of that idea. - Fixed control tree handling in her/her2/syr/syr2 that was not taking into account how the choice of variant needed to be altered for upper-stored matrices (given that only lower-stored algorithms are explicitly implemented). - Added bli_determine_blocksize_dim_f(), bli_determine_blocksize_dim_b() macros to provide inlined versions of bli_determine_blocksize_[fb]() for use by unblocked-fused variants. - Integrated new blocksize_dim macros into gemv/hemv unf variants for consistency with that of the bugfix for trmv/trsv (both of which now use the same macros). - Modified bli_obj_vector_inc() so that 1 is returned if the object is a vector of length 1 (ie: 1 x 1). This fixes a bug whereby under certain conditions (e.g. dotv_opt_var1), an invalid increment was returned, which was invalid only because the code was expecting 1 (for purposes of performing contiguous vector loads) but got a value greater than 1 because the column stride of the object (e.g. rho) was inflated for alignment purposes (albeit unnecessarily since there is only one element in the object). - Replaced some old invocations of set0 with set0s. - Added alpha parameter to gemmtrsm ukernels for x86_64 and use accordingly. - Fixed increment bug in cleanup loop of gemm ukernel for x86_64. - Added safeguard to test modules so that testing a problem with a zero dimension does not result in a failure. - Tweaked handling of zero dimensions in level-2 and level-3 operations' internal back-ends to correctly handle cases where output operand still needs to be scaled (e.g. by beta, in the case of gemm with k = 0). commit d57ec42b34f8447c88adeffa95cf22f8c115ad51 Author: Field G. Van Zee Date: Fri May 3 17:35:32 2013 -0500 Renamed _trans_status() macro. Details: - Mistakenly forgot to rename the _trans_status() macro and instances in previous commit. commit 9e2b227866af429a4a6fb7dbb8c457bbdda2f136 Author: Field G. Van Zee Date: Fri May 3 17:24:58 2013 -0500 Renamed _set_trans(), _trans_status() macros. Details: - Renamed the following macros: bli_obj_set_trans() -> bli_obj_set_onlytrans() bli_obj_trans_status() -> bli_obj_onlytrans_status() to remove ambiguity as to which bits are read/updated. commit 2f8174509ea9f844db11ebd9389de5168e85b132 Author: Field G. Van Zee Date: Wed May 1 15:06:30 2013 -0500 Unconditionally check memory pool(s) for errors. Details: - Changed bli_mem_acquire_m() in bli_mem.c so that we still check if the memory pool is exhausted before checking out and returning a block, even if BLIS error checking has been disabled. These errors are useful because they likely indicate that BLIS was improperly configured for the code being run. commit 75405a2b83679b6aff38d7e7425199d623a7b0a9 Author: Field G. Van Zee Date: Wed May 1 15:00:30 2013 -0500 CHANGELOG update. commit 6bfa96f84887dec0b4cf8be5d38dd634c2f8951d (tag: 0.0.7) Author: Field G. Van Zee Date: Tue Apr 30 19:35:54 2013 -0500 Absorbed blocksize extensions into main objects. Details: - Revamped some parts of commit b6ef84fad1c9 by adding blocksize extension fields to the blksz_t object rather than have them as separate structs. - Updated all packm interfaces/invocations according to above change. - Generalized bli_determine_blocksize_?() so that edge case optimization happens if and only if cache blocksizes are created with non-zero extensions. - Updated comments in bli_kernel.h files to indicate that the edge case blocksize extension mechanism is now available for use. commit bc7c8005cedbe50961ac2a99aeeabf4e9f9a8e9e Author: Field G. Van Zee Date: Thu Apr 25 17:16:59 2013 -0500 Added option to disable err checking in testsuite. Details: - Added a new line to input.general that allows one to specify the error- checking level to use for each BLIS experiment. The only two levels supported for now are "no error checking" and "full error checking". commit 096b366ddcfe386f44419ef84d8df8be13825f86 Author: Field G. Van Zee Date: Thu Apr 25 16:43:43 2013 -0500 Use cntl trees that block in n dimension. Details: - Updated _cntl.c files for each level-3 operation to induce blocked algorithms that first paritition in the n dimension with a blocksize of NC. Typically this is not an issue since only very large problems exceed that of NC. But developers often run very large problems, and so this extra blocking should be the default. - Removed some recently introduced but now unused macros from bli_param_macro_defs.h. commit b6e24b23cb4dfc488c1c9c70d596539c2287f72e Author: Field G. Van Zee Date: Thu Apr 25 12:06:12 2013 -0500 Use PASTEMAC in macro-kernels (over MAC2 or MAC3). Details: - Replaced multi-type invocations of copys_mxn, xpbys_mxn, etc. (PASTEMAC2 and PASTEMAC3) with those that only use a single type (PASTEMAC). - Added extra macros to bli_adds_mxn_uplo.h and bli_xpbys_mxn_uplo.h to accommodate above change. - Fixed comment typo in bli_config.h files. - Added .nfs* pattern to .gitignore. commit df80acf517dde180ddcc5835c6136b2fa7556d4b Author: Field G. Van Zee Date: Tue Apr 23 19:43:23 2013 -0500 Fixed computation of b_next in L3 macro-kernels. Details: - Restructured herk_l and herk_u macro-kernels in the imagine of trmm and trsm, in that the edge cases are captured by the main loop, rather than trying to have "cleanup" sections that result in four distinct parts (interior, bottom edge, right edge, bottom-right edge) of the code. - Fixed the way b_next was being computed in the non-gemm level-3 macro-kernels (herk, trmm, trsm). The way they are computed now matches that of gemm. commit 3671528cf8efe4b445d196665143a5c50c2c6048 Author: Field G. Van Zee Date: Tue Apr 23 19:12:14 2013 -0500 Fixed minor bug in computing b_next in gemm. commit db072a5b4a039a9a668ef951333ecfb5bd3a74b9 Author: Field G. Van Zee Date: Tue Apr 23 17:49:10 2013 -0500 Fixed rare edge case bug in herk_l macro-kernel. Details: - Fixed a potential bug in herk_l at the m_left edge case. If MR was chosen to be much larger than NR, then one could encounter edge cases in the the MC dimension that fall entirely below the diagonal, which the previous implementation of the herk_l macro-kernel was not allowing for. commit 1dab11e37d1cb403cbe75b73a644c00de534f104 Author: Field G. Van Zee Date: Tue Apr 23 17:17:11 2013 -0500 Updated x86 gemmtrsm ukernels to use alpha. commit 9d10d7dd9bc92a993fea7162bfa5983f75506f49 Author: Field G. Van Zee Date: Tue Apr 23 16:00:18 2013 -0500 Added a_next, b_next arguments to micro-kernels. Details: - Added two more arguments to the gemm and gemmtrsm microkernels: the addresses of the next micro-panels of A and B. By passing these pointers into the micro-kernel, we allow the micro-kernel author to prefetch micro-panels of A and B as necessary (though this is completely optional; these addresses may also be safely ignored). - Updated all seven macro-kernels so that they compute and pass in a_next and b_next. Note that ONLY the gemm macro-kernel computes a_next and b_next with the precise semantics we want. I will go back and fix the other macro-kernels in the near future. - Added 'restrict' to various micro-kernels from which it was missing. commit f3815dc84d385c514a5acaf1e925424a57be2f51 Author: Field G. Van Zee Date: Tue Apr 23 11:12:33 2013 -0500 Added code for backward edge-case blocking. Disabled: - Edited bli_determine_blocksize_b() to include experimental (and currently disabled) code that computes extended blocks. - Updated commnts relate to above changes. - Enabled use of x86 gemmtrsm ukernel in config/flame/bli_kernel.h. commit 4fe1435f20e8fc7dd72f795ac58c8e236e6c631b Author: Field G. Van Zee Date: Mon Apr 22 19:00:43 2013 -0500 Updated dupl implementation to use PACKNR and NR. Details: - Updated frame/util/dupl/bli_dupl_unb_var1.c to utilize PACKNR and NR explicitly so navigate b1 so that situations where PACKNR > NR are supported. - Moved the 4x2 and 4x4 reference micro-kernels in frame/3/gemm/ukernels and frame/3/trsm/ukernels to kernels/c99/. - Updated clarksville and flame configurations. commit 2d6f9e83799a46d52d7901e275f8fd67f0a0edc6 Author: Field G. Van Zee Date: Sun Apr 21 15:10:34 2013 -0500 Disabled blocksize checks for memory pools. Details: - Temporarily disabled checks that ensure that enough memory will be allocated by the contiguous memory allocator for all types, given that the values for double precision real are the ones used to allocate the space. These checks can easily go awry in certain situations, especially if you are developing for only one datatype. So for now, they are probably more trouble than they are worth. commit b6ef84fad1c9884c84b7f1350a0bcdfe1737e8f2 Author: Field G. Van Zee Date: Sun Apr 21 15:00:24 2013 -0500 Allow ldim of packed micro-panels != MR, NR. Details: - Made substantial changes throughout the framework to decouple the leading dimension (row or column stride) used within each packed micro-panel from the corresponding register blocksize. It appears advantageous on some systems to use, for example, packed micro-panels of A where the column stride is greater than MR (whereas previously it was always equal to MR). - Changes include: - Added BLIS_EXTEND_[MNK]R_? macros, which specify how much extra padding to use when packing micro-panels of A and B. - Adjusted all packing routines and macro-kernels to use PACKMR and PACKNR where appropriate, instead of MR and NR. - Added pd field (panel dimension) to obj_t. - New interface to bli_packm_cntl_obj_create(). - Renamed bli_obj_packed_length()/_width() macros to bli_obj_padded_length()/_width(). - Removed local #defines for cache/register blocksizes in level-3 *_cntl.c. - Print out new cache and register blocksize extensions in test suite. - Also added new BLIS_EXTEND_[MNK]C_? macros for future use in using a larger blocksize for edge cases, which can improve performance at the margins. commit 59fca58dbe678d79c1df0916b022afbeac7c48fa Author: Field G. Van Zee Date: Fri Apr 19 15:26:29 2013 -0500 Fixed bug in compatibility layer (her2k/syr2k). Details: - Fixed a bug in the BLAS compatibility layer, specifically in bla_her2k.c and bla_syr2k.c, that caused incorrect computation to occur when the BLAS interface caller requests the [conjugate-]transpose case. Thanks to Bryan Marker for reporting the behavior that led to this bug. commit 09eacbd1ab1380a95a0e9625726b45e43ed102d6 Author: Field G. Van Zee Date: Thu Apr 18 19:39:13 2013 -0500 Changed old level3 test drivers to call front-ends. Details: - Changed old level-3 test drivers, in 'test' directory, to always call the front-end object API instead of the internal back-end with the locally defined control tree. commit 83e45de23e565138b8fde06fb11cfedc973b7246 Author: Field G. Van Zee Date: Thu Apr 18 18:33:03 2013 -0500 Allow packm_init() to reacquire a too-small mem_t. Details: - Changed bli_packm_init() to react differently to a situation where a pack obj_t has an already-allocated mem_t entry that has a buffer that is smaller than what will be needed to hold the block/panel that now needs to be packed. Previously, this situation was treated with an abort() since I assumed something was horribly wrong. I have changed the code so that it now reacts by releasing the previous mem_t and re-acquires a new mem_t with the new information. (This change was done at the request of Bryan Marker to facilitate code generation via DxT.) commit a6990434173b0cf651f8521194f3aef738deb7d2 Author: Field G. Van Zee Date: Thu Apr 18 13:52:47 2013 -0500 Fixed bug in packing block of A for hemm/symm. Details: - Fixed a bug in bli_packm_blk_var2() that affected the packing functionality of hemm and symm. The bug occurs whenever attempting to pack a Hermitian or symmetric matrix where the block of A being packed intersects the diagonal, but some of its micro-panels do not intersect the diagonal and lie completely in the unstored region. Thanks to Francisco Igual for reporting this bug. - Comment updates to both _blk_var2.c and _blk_var3.c. commit c92e7590e1934f830814ab614c794215ebe0c415 Author: Field G. Van Zee Date: Wed Apr 17 20:53:29 2013 -0500 Activated bli_packm_acquire_mpart_t2b(). Details: - Removed the overly-paranoid bli_abort() from the end of bli_packm_acquire_mpart_t2b(), to allow others to experiment with partitioning through packed blocks of A. Also, and more importantly, changed an earlier check that was causing an erroneous (but coincidentally redundant) abort(). Also, updated some of the comments in bli_packm_part.c. commit bea579e9f009a44e08008eb14d09f38748ab2b53 Author: Field G. Van Zee Date: Tue Apr 16 19:43:14 2013 -0500 Allow creation of "empty" objects. Details: - Modified bli_obj_alloc_buffer() to allow allocating an empty buffer, and modified bli_adjust_strides() to explicitly handle m = n = 0. - Updated bli_check_matrix_strides() to allow cases where m = n = 0. commit 7904e20f2e6908571ee5008da2a08084198eefae Author: Field G. Van Zee Date: Tue Apr 16 17:37:16 2013 -0500 Fixed "root" object bug in bli_her[2]k/syr[2]k. Details: - Fixed an obscure bug in the front-ends for herk, her2k, syrk, and syr2k, that manifested as the incorrect triangle being updated. It occurred when the user would pass in a matrix object that was correctly marked as symmetric/Hermitian and lower-stored, but whose root object was never marked as lower (or upper). We now alias and re-assign root status for matrix C within the front-ends. Note that trmm and trsm were already doing this, albeit for a slightly different reason (to allow the internal back-end to choose which algorithm to run--lower or upper--based on the uplo of the root object for both left and right side cases). Thanks to Bryan Marker for leading me to this bug. commit 19155a768dd97b57cfb59c32fa8e54a344ec66e1 Author: Field G. Van Zee Date: Tue Apr 16 11:24:03 2013 -0500 Fixed overzealous type-checking in bli_getsc(). Details: - Relaxed type checking in getsc so that the input object could be a constant and not just a proper floating-point type. (If it is a constant, default to extracting the dcomplex values.) Thanks to Bryan Marker for reporting this bug. - Added definition for bli_is_constant() in bli_param_macro_defs.h - Comment updates to various level-0 scalar routines. commit 2ee6bbca2953d04c967685da9735b3eaf8a4b813 Author: Field G. Van Zee Date: Mon Apr 15 19:27:57 2013 -0500 Fixed bug in bli_obj_is_packed() and renamed. Details: - This macro is used to determine whether the partitioning routines should call a corresponding packm_part routine instead. However, it was unintentionally catching matrices that were marked as "packed" by virtue of them simply being marked as BLIS_PACKED_UNSPEC in, say, bli_gemv(). The macro has now been renamed to bli_obj_is_panel_packed(), and now only checks for row or column panel packing. (Note that I first attempted to fix this bug in a571af816d72.) Thanks to Bryan Marker for reporting the erroneous behavior that led me to this bug. commit 99b99eebe70336b5f28039a4a084aa7f5fa7059d Author: Field G. Van Zee Date: Mon Apr 15 17:54:43 2013 -0500 Removed local reference ukernel blocksize macros. Details: - Removed locally defined gemm microkernel blocksize macros from _mxn reference microkernel definition and header. Meant to include this in a recent/previous commit (0020ef7c8271). commit 6a538fa7b164655f41cea5b9c8d3902438bda66b Author: Field G. Van Zee Date: Mon Apr 15 14:40:31 2013 -0500 Formatting change to mods in previous commit. commit ea079d35591e808971d2d98a1a7d9f89bc1f7c2f Author: Field G. Van Zee Date: Mon Apr 15 14:31:40 2013 -0500 Set structure of objects in level-2 BLIS APIs. Details: - Added missing statement to set structure field of local objects in top-level BLIS (BLAS-like) API wrappers. Thanks to Bryan Marker for reporting this bug. commit d9948c541c0446e20e249a1ccc83709ce51b7aa8 Author: Field G. Van Zee Date: Mon Apr 15 10:21:26 2013 -0500 Tweak to test suite function string construction. Details: - Fixed a minor bug in the way that the test suite would construct function name strings when the user anchored all parameters in input.operations. In this case, the test driver would mistake this situation for one where the operation simply had no parameters to begin with, and thus would not include the parameter string in the function string that is output for every result. commit ca9e435c57c5c7a000d2a32681dd8070ba850abd Author: Field G. Van Zee Date: Mon Apr 15 09:59:46 2013 -0500 Fixed a bug in reference implementation of dupl. Details: - Fixed a bug in reference implementation of dupl (bli_dupl_unb_var1.c), which resulted in incorrect duplication. - Updated old test drivers according to recently updated packm control tree creation interface. - Added 'restrict' to x86 gemm microkernel interface. commit 26cbd52e364bbe439e3744101cd5a6cbcb82dffd Author: Field G. Van Zee Date: Sun Apr 14 19:05:33 2013 -0500 Modified bli_kernel.h include order in blis.h. Details: - Delayed #include of bli_kernel.h in blis.h to prevent a situation where _kernel.h includes an optimized microkernel header, which uses BLIS types such as dim_t and inc_t, which would precede the definition of those types in bli_type_defs.h. - Moved the #include of bli_kernel_macro_defs.h in bli_macro_defs.h to blis.h (immediately after that of bli_kernel.h). commit 3414a23c38b0de45a8034b3dda2fc4b5a755e4e1 Author: Field G. Van Zee Date: Sat Apr 13 16:53:16 2013 -0500 CHANGELOG update. commit ec16c52f2ecf419c749175ce0a297441c10f1c68 (tag: 0.0.6) Author: Field G. Van Zee Date: Sat Apr 13 16:41:16 2013 -0500 Updated INSTALL file (now redirects to website). commit 0020ef7c82711a7ebf08e5174f939bee2563184c Author: Field G. Van Zee Date: Sat Apr 13 15:26:35 2013 -0500 Removed gemmtrsm-, trsm-specific blocksize macros. Details: - Modified gemmtrsm micro-kernel wrappers to use new aliased blocksize macros instead of operation-specific ones. - Removed local, gemmtrsm-specific blocksize macro definitions found in micro-kernel header files. (Meant to include above changes in 31b100e7bf4a.) - Added comments to reference gemmtrsm micro-kernel wrapper implementation. commit 1a9f427b85bb95aaa9e54c8ff8ecad8734b361ee Author: Field G. Van Zee Date: Fri Apr 12 15:25:54 2013 -0500 Added/renamed alignment constants to _config.h. Details: - Added new memory alignment constants: BLIS_HEAP_STRIDE_ALIGN_SIZE (previously assumed to be same as SYSTEM_MEM) BLIS_CONTIG_ADDR_ALIGN_SIZE (previously assumed to be same as PAGE_SIZE) BLIS_STACK_BUF_ALIGN_SIZE (previously not enforced) and renamed existing ones BLIS_SYSTEM_MEM_ALIGN_SIZE -> BLIS_HEAP_ADDR_ALIGN_SIZE BLIS_CONTIG_MEM_ALIGN_SIZE -> BLIS_CONTIG_STRIDE_ALIGN_SIZE to better convey what the alignment factor is used for (and what it is not used for). - Removed BLIS_ENABLE_SYSTEM_MEM_ALIGN. Dynamic memory alignment is now disabled by setting BLIS_HEAP_STRIDE_ALIGN_SIZE to 1. - Inserted instances of __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))) into macro-kernels to specify stack alignment of temporary buffers. - Modified test suite driver to output new constants. - Removed bli_align_dim_to_sys() and bli_align_dim_to_cmem(). Instead, we now use bli_align_dim_to_size(), which takes a third argument (the desired alignment). commit a77d10e87e3c0ab55ec14d74c285bc95c06285c3 Author: Field G. Van Zee Date: Fri Apr 12 11:40:55 2013 -0500 Fixed an bug in axpyv/axpym when alpha is unit. Details: - Fixed bug whereby axpyv and axpym were incorrectly simplifying to a copy, rather than an add, when alpha = 1. Thanks to Bryan Marker for identifying this bug. commit 0495bd1d6de5995fe2fb79b321eec79e961eb7a5 Author: Field G. Van Zee Date: Thu Apr 11 16:39:25 2013 -0500 Moved _POSIX_C_SOURCE def to compiler cmd line. Details: - Removed the #define of _POSIX_C_SOURCE in bli_config.h (for both reference and clarksville configurations) and added "-D_POSIX_C_SOURCE=200112L" to the compiler command line arguments in make_defs.mk (for both configs). Thanks to Devin Matthews for suggesting this change. commit d43d1a0a2ef6de4bc57627566aef8e3fdb458b8c Author: Field G. Van Zee Date: Thu Apr 11 16:28:17 2013 -0500 Appended 'f2c_' to abs, min, max macros in f2c.h. Details: - Renamed abs, min, max, dmin, and dmax macros in bli_f2c.h so that they would not conflict with anything defined by the user (or the language). Thanks to Devin Matthews for suggesting this fix. - Updated all instances of the above macros accordingly. commit 31b100e7bf4aeaa4ceafefd2b6c3102d5fbc4cbb Author: Field G. Van Zee Date: Thu Apr 11 11:11:52 2013 -0500 Added new kernel blocksize macro aliases. Details: - Added new macros that alias level-3 cache and register blocksize macros to names that can be constructed via the PASTEMAC macro. These aliased macro definitions live inside bli_kernel_macro_defs.h, which is now #included after bli_kernel.h. - Modified macro-kernels to use new aliased blocksize macros instead of operation-specific ones. - Removed local, operation-specific kernel blocksize macro definitions (found in macro-kernel header files). commit bd2b24ba65b36d7c07c5918a3838ce2ff57c4b48 Author: Field G. Van Zee Date: Thu Apr 11 10:35:39 2013 -0500 Updated CREDITS file. commit 79328c15410215737f3f14cd069328cf52aa11fd Author: Field G. Van Zee Date: Thu Apr 11 10:32:14 2013 -0500 Reverted testsuite object files' home to 'obj'. Details: - Removed 'obj' and 'lib' from .gitignore. - Added testsuite/obj/.gitkeep (which is an empty file). - Updated testsuite/Makefile accordingly. - Thanks to Vernon Austel for pointing out the .gitkeep trick to tracking empty directories in git. commit 4afe3bfd82c03e1e97b58b7d250588a0d28541e5 Author: Field G. Van Zee Date: Tue Apr 9 17:45:39 2013 -0500 Renamed/moved object scalar constant macros. Details: - Replaced scalar constant macro definitions in bli_const_defs.h with a single, simplier macro in bli_obj_macro_defs.h. - Updated invocations of old macros accordingly. - Removed bli_const_defs.h. commit 357893f5be5c56ab7b062874005e77e614b23f06 Author: Field G. Van Zee Date: Tue Apr 9 14:48:15 2013 -0500 Applied fix from prev commit to gemmtrsm_?_ref_4x4 Details: - Fixed hard-coded kernels in bli_gemmtrsm_l_ref_4x4.c and bli_gemmtrsm_u_ref_4x4.c. commit 54988e8dca44475610bcaee5a7bc1c40e8921402 Author: Field G. Van Zee Date: Mon Apr 8 19:08:43 2013 -0500 Fixed a performance bug in trsm. Details: - Fixed a bug in the reference implementations of the gemmtrsm wrappers (bli_gemmtrsm_l_ref_mxn.c and bli_gemmtrsm_u_ref_mxn.c) whereby the reference gemm microkernel was hard-coded, and thus always called, even when GEMM_UKERNEL was defined to point to an optimzied microkernel. This manifested as artificially low trsm performance for all problem sizes, but especially for small problem sizes as it only affected blocks of A that intersected the diagonal. Thanks to Mike Kistler of IBM for helping me find this bug. commit a7252e40b5c351eef9a1df531ea0ef25cb5fb705 Author: Field G. Van Zee Date: Mon Apr 8 16:08:22 2013 -0500 Generate testsuite objects 'src'. Details: - Tweaked the testsuite makefile so that object files are stored in 'src' rather than 'obj', since (a) the top-level .gitignore dictates that obj directories are to be ignored, and (b) since git has problems tracking empty directories. Now, users do not need to create their own obj directories within their own local clones of BLIS. commit 803871c55b60d3c225ad9a0607fa507a9c16aab7 Author: Field G. Van Zee Date: Mon Apr 8 15:18:42 2013 -0500 Minor formatting changes. commit a571af816d72727e16cad37007e7043b9d6fa362 Author: Field G. Van Zee Date: Mon Apr 8 15:00:13 2013 -0500 Fixed definition of bli_is_packed_object() macro. Details: - Changed the definition of bli_is_packed_object() so that it keys off of the value of the pack schema bits in the info field of obj_t, rather than comparing the obj_t buffer with that of the mem_t entry. This was the cause of a very low probability bug whereby uninitialized memory caused the macro to evaluate to TRUE even though the object in question was not packed. Thanks to Vernon Austel of IBM for helping discover this bug. - Changed an abort() in bli_packm_part() to a not-yet-implemented. commit 3be14c32f735ecc6169d3ab6370cf8b69162acec Author: Field G. Van Zee Date: Sat Apr 6 12:54:45 2013 -0500 Updated information in testsuite output header. Details: - Added to the information that is echoed at the beginning of the test suite's output, and also re-labeled some existing information. commit 874707c1b183a4dd9a91dbfd4ea1522384c190df Author: Field G. Van Zee Date: Fri Apr 5 17:19:43 2013 -0500 Fixed edge case handling bug in herk macrokernels. Details: - Fixed a bug present in bli_herk_l_ker_var2() and bli_herk_u_ker_var2() that only manifests when BLIS is configured such that MR != NR. The bug involves incorrectly detecting edge cases, which resulted in some parts of matrix C potentially being skipped and not updated, depending on the problem size. - Updated the default values of MR and NR in config/reference/bli_kernel.h to 8 and 4, respectively, so that I can better stress the framework on a day-to-day basis. (The fact that they were both equal to 4 for so long is why I did not stumble upon this bug much sooner.) commit 7cbda15291d3e01300e71c286b9657b7ef0708bf Author: Field G. Van Zee Date: Thu Apr 4 15:25:43 2013 -0500 Added reference microkernels for arbitrary MR, NR. Details: - Added a new set of reference gemm, gemmtrsm, and trsm micro-kernels that contain explicit loops over MR and NR, thus allowing them to be used unmodified by developers who want to build a reference library with custom register blocksizes. - Changed config/reference/bli_kernel.h to use above ukernels by default. - Changed interfaces of new and existing gemm, gemmtrsm, and trsm micro-kernels to use 'restrict' keyword. - Added -funroll-loops option to config/reference/make_defs.mk. - Updated comments in bli_kernel.h describing constraints on register and cache blocksizes. - Updated _adds_mxn.h, _copys_mxn.h, and _xpbys_mxn.h macros files so that single-char macros are also defined. commit 6684b73d5501f91d24a79e26655a42819c9b3114 Author: Field G. Van Zee Date: Tue Apr 2 13:06:20 2013 -0500 Implemented amax operation and related changes. Details: - Implemented amax operation in BLIS. - Activated BLAS2BLIS routine mapping for new amax BLIS implementation. - Added integer support to [f]printv, [f]printm. - Added integer support to level-0 copys macros. - Updated printing of configuration information in test suite driver. - Comment changes to _config.h files. - Added comments to bla_dot.c to reminder reader what sdsdot()/dsdot() are used for. commit fb68087f8727cd5fd656a742a110e54fb1c91db9 Author: Field G. Van Zee Date: Tue Mar 26 15:10:16 2013 -0500 More memory alignment-related tweaks. Details: - Renamed BLIS_MEMORY_ALIGNMENT_SIZE to BLIS_CONTIG_MEM_ALIGN_SIZE. - Renamed BLIS_ENABLE_MEMORY_ALIGNMENT to BLIS_ENABLE_SYSTEM_MEM_ALIGN. - Added BLIS_SYSTEM_MEM_ALIGN_SIZE, which controls only the alignment passed into posix_memalign() or equivalent. - Defined new function, bli_align_dim_to_cmem(), which applies the contiguous memory alignment (rather than the system/malloc alignment). commit 9682ef61dbf9a8846c8b0826d4de24bc216cd641 Author: Field G. Van Zee Date: Tue Mar 26 14:14:53 2013 -0500 Always define memory alignment size cpp constant. Details: - Removed guard around #define for memory alignment size constant. Memory alignment should always be enabled, and so this value should always be defined. commit 3a787cccaae16531474f34398e3c0cf4f49b8cd8 Author: Field G. Van Zee Date: Tue Mar 26 13:59:19 2013 -0500 Renamed memory alignment macro constant. Details: - Renamed all occurrences of BLIS_MEMORY_ALIGNMENT_BOUNDARY to BLIS_MEMORY_ALIGNMENT_SIZE. commit 37308f9a502b56d94fa52a7df71c676a46c3be3d Author: Field G. Van Zee Date: Tue Mar 26 12:43:14 2013 -0500 Align packed panel strides with system alignment. Details: - Pass panel strides through bli_align_dim_to_sys() to ensure that each subsequent packed panel of A and B begins at an aligned address. (The first panel is presumably aligned to system alignment because it is aligned to a page boundary, which is typically much larger.) - Rearranged code in packm_init_pack() to prevent additional conditional blocks as a result of the aforementioned change. - Adjusted contiguous memory allocator so that the system memory alignment is used to allocate enough space for each block no matter what kind of register blocking is used (even if register blocksize is unit and every row/column needs maximal padding). - Adjusted default blocksizes in reference configuration so that MC*KC and KC*NC result in identical footprints for all datatypes. commit 40a0654ada5f256beb3da80ebba015a3c71fb61f Author: Field G. Van Zee Date: Sun Mar 24 20:18:12 2013 -0500 CHANGELOG update. commit b65cdc57d9e51fa00e3c03539cfb7e045707d0f4 (tag: 0.0.5) Author: Field G. Van Zee Date: Sun Mar 24 20:01:49 2013 -0500 Migrated 'bl2' prefix to 'bli'. Details: - Changed all filename and function prefixes from 'bl2' to 'bli'. - Changed the "blis2.h" header filename to "blis.h" and changed all corresponding #include statements accordingly. - Fixed incorrect association for Fran in CREDITS file. commit 132bffcef7441f32d02cc7485aef6a0648e0ef1e Author: Field G. Van Zee Date: Sun Mar 24 18:49:36 2013 -0500 Removed several 'old' directories and files. Details: - Removed most of the 'old' directories scattered throughout the framework, which includes alternate/half-baked/broken implementations. commit 551ea4767a3ea6c263f12aaca94bc2642cee4cfa Author: Field G. Van Zee Date: Sun Mar 24 18:00:10 2013 -0500 Removed #include "blis2.h" from low-level headers. Details: - Removed #include of "blis2.h" from various lower-level, operation-specific header files throughout the framework. Given that these low-level headers are included within #blis2.h in a very specific order, #include'ing blis2.h within them directly is unnecessary. commit bc7b318ed0960edeb4537797dd8c91de0d942ca9 Author: Field G. Van Zee Date: Fri Mar 22 17:18:58 2013 -0500 Added cpp guards to conflicting libflame typedefs. Details: - Added cpp guards around the definitions of dim_t, scomplex, and dcomplex. This is a temporary hack to allow interoperability with libflame. (Similarly temporary changes are being made to libflame's type definitions file.) commit f469907503fcdc24dff0174c569170e6e756e045 Author: Field G. Van Zee Date: Fri Mar 22 15:20:15 2013 -0500 Renamed MAX_PREFETCH_BYTE_OFFSET to MAX_PRELOAD_. Details: - Renamed BLIS_MAX_PREFETCH_BYTE_OFFSET to BLIS_MAX_PRELOAD_BYTE_OFFSET since "prefetch" is kind of a loaded word (e.g. "prefetch" instructions, which are different than the particular kind of prefetching/preloading referred to by this constant). commit d1023bfbc6668a58a01ee4f82ded2319911e7b19 Author: Field G. Van Zee Date: Fri Mar 22 15:09:59 2013 -0500 Removed build/old directory. commit 718888849c48d99f83eea6b8f83bc1998cffef7e Author: Field G. Van Zee Date: Fri Mar 22 15:07:01 2013 -0500 Deprecated 'flame' configuration. Details: - Removed 'flame' configuration, as it was horribly out-of-date. - Comment changes to bl2_blocksize.c and bl2_mem.c. commit bba38cf4e9d28058c14483f44fa074a6d2852ad9 Author: Field G. Van Zee Date: Tue Mar 19 18:07:40 2013 -0500 Added missing conjbeta argument to scald. commit 1f82b51d06d0279dded3f2b87ba59403f3ed0af6 Author: Field G. Van Zee Date: Mon Mar 18 15:37:20 2013 -0500 Relocated packed mem_t dimension fields to obj_t. Details: - Removed the m and n (and elem_size) fields from the mem_t object, and added m_packed and n_packed fields to obj_t. These new fields track the same as the old ones. From an abstraction standpoint, it seemed awkward to store those dimensions inside the mem_t. - Updated interfaces to bl2_mem_acquire_*() so that only a byte size argument is passed in, instead of m, n, and elem_size. - Updated bl2_packm_init_pack() and bl2_packv_init_pack() to inline the functionality of bl2_mem_alloc_update_m() and bl2_mem_alloc_update_v(), respectively. - Updated packm variants to access the packed length and width fields from their new locations. commit 36c782857bf9b8ac1b1dac47a70f689a4407e2cc Author: Field G. Van Zee Date: Mon Mar 18 10:37:03 2013 -0500 CHANGELOG update. commit e7d41229d3b1674e74f47d7f29fae004a745201a (tag: 0.0.4) Author: Field G. Van Zee Date: Fri Mar 15 17:12:36 2013 -0500 Re-implemented contiguous memory allocator. Details: - Completely re-wrote the contiguous memory allocator (bl2_mem.c). The new allocator instantiates and initializes three separate memory pool objects, each one associated with a separate array of contiguous memory blocks, each block of fixed and uniform size. (The three pools are for allocating mc-by-kc blocks of A, kc-by-nc panels of B, and mc-by-nc panels of C.) The pool objects use a stack structure internally to track which blocks in the region have been "checked out" to a thread and which are still available. Critical regions are now clearly marked and adaptable to parallel environments (e.g. OpenMP). Memory pools are set up when bl2_init() is called. - Added a new field to the packm control tree node, which indicates what kind of packed buffer is being allocated. The enumerated type for this argument is defined as packbuf_t in bl2_type_defs.h. - Updated level-3 _cntl.c files to pass in the appropriate value for a new packbuf_t argument to bl2_packm_cntl_obj_create(). - Moved some macros called by packm_init_pack() from bl2_obj_macro_defs.h to bl2_mem_macro_defs.h. - Added BLIS_MAX_NUM_THREADS to bl2_config.h, which we use as the default number of blocks of A reserved for the memory allocator. - Deprecated bl2_align_dim(). Replaced usage with that of bl2_align_dim_to_mult(). Turns out that typically we don't need to align a dimension to the system alignment, since that value has to do with starting addresses, whereas the values we are dealing with are unitless dimensions. commit 1e76cae00cb0a04544aaae1ade878686b238d283 Author: Field G. Van Zee Date: Fri Mar 15 12:21:42 2013 -0500 Perform her2k var1 loops in sequence. Details: - Changed variant 1 of her2k so that the two rank-k products are computed and accumulated in sequence rather than fused into one loop. This is necessary if BLIS is to be configured to provide only enough contiguous memory for one panel of B. commit c95c270eba91ae4efc26603beddfd0292caa919b Author: Field G. Van Zee Date: Thu Mar 7 14:42:15 2013 -0600 Enhanced tracking of dimensions for mem_t objects. Details: - Added new fields to mem_t struct definition to track the allocated (as opposed to the currently used) dimensions of the memory region. This allows packm_init() to be more robust in situations where memory is already allocated but is more than needed for the current packing job. - Updated logic in bl2_obj_set_buffer_with_cached_packm_mem() macro, used in packm_init(), to update the "currently used" dimensions of the mem_t object if the requested dimensions are smaller than the allocated dimensions. commit e99281a0f41d482fddeffa239bfc8e13e6d13d4b Author: Field G. Van Zee Date: Thu Mar 7 14:00:10 2013 -0600 Fixed test suite flop formulas for ops with side. Details: - Fixed incorrect flop counts in test suite modules for hemm, symm, trmm, trmm3, and trsm. - Comment updates in herk macro-kernels. commit ef8cbfc44dd620fdcbdb51cdb173217194bebe31 Author: Field G. Van Zee Date: Sat Mar 2 12:47:06 2013 -0600 Added "version" to .gitignore. Details: - Added "version" to .gitignore file so that the file does not show up when running 'git status', or accidentally get pulled into the index when running 'git add' or 'git add --all'. commit e9e0747c2f6c178f53ac46ab794acbb7b8c4fea8 Author: Field G. Van Zee Date: Sat Mar 2 12:43:54 2013 -0600 Removed version file from version control. Details: - Removed version file from version control to prevent git errors that occur when trying to pull new commits. commit bb612f864e9c17dd9805e9446840f02259619469 Author: Field G. Van Zee Date: Fri Mar 1 12:55:42 2013 -0600 Updated behavior of bl2_obj_induce_trans() macro. Details: - Changed bl2_obj_induce_trans() so that the transposition bit is no longer updated as part of the macro. All current uses of the macro have been coupled with instances of bl2_obj_set_trans() to clear the bit. - Added Jed to CREDITS file. commit f24e29b789e7314764a818ceb3063126936c986f Author: Field G. Van Zee Date: Fri Feb 22 18:15:41 2013 -0600 Replaced banded/packed BLAS2 stubs with f2c code. Details: - Retired the blas2blis wrappers that simply called abort with a "not yet implemented" message. This includes all of the level-2 banded and packed routines. - Replaced the aforementioned with the corresponding netlib implementations having been run through f2c (with some customization). - Added directories named 'attic' to build/gen-make-frags/ignore_list. commit 1454c1a14207766dfed372b8e38b47fa384f5198 Author: Field G. Van Zee Date: Fri Feb 22 12:38:45 2013 -0600 Moved Fortran name-mangling macro to bl2_config.h. Details: - Moved the Fortran-77 name-mangling macros from bl2_blas_macro_defs.h to the configuration directory (bl2_config.h, specifically) given that it can be expected to be tweaked by some developers. commit ede75693e5a36c6006087c4a7df834175b604504 (tag: 0.0.3) Author: Field G. Van Zee Date: Fri Feb 22 12:11:24 2013 -0600 Implemented blas2blis compatibility layer. Details: - Added the blas2blis compatibility layer, located in frame/compat. This includes virtually all of the BLAS, including banded and packed level-2 operations. - Defined bl2_init_safe(), bl2_finalize_safe(). The former allows a conditional initialization, which stores the "exit status" in an err_t, which is then read by the latter function to determine whether finalization should actually take place. - Added calls to bl2_init_safe(), bl2_finalize_safe() to all level-2 and level-3 BLAS-like wrappers. - Added configuration option to instruct BLIS to remain initialized whenever it automatically initializes itself (via bl2_init_safe()), until/unless the application code explicitly calls bl2_finalize(). - Added INSERT_GENTFUNC* and INSERT_GENTPROT* macros to facilitate type templatization of blas2blis wrappers. - Defined level-0 scalar macro bl2_??swaps(). - Defined level-1v operation bl2_swapv(). - Defined some "Fortran" types to bl2_type_defs.h for use with BLAS wrappers. commit 995edf43e21c1868732dbdd7fee14b08730218bd Author: Field G. Van Zee Date: Thu Feb 21 14:30:50 2013 -0600 Updated version file. (Forgot to in prev commit). commit e823b08aaf7b65ecc6ddc30570709ea8a4b52aa7 Author: Field G. Van Zee Date: Thu Feb 21 12:00:17 2013 -0600 Fixed some scalar types in BLAS-like Herm APIs. Details: - Some of the scalars of Hermitian operations, such as alpha in her, alpha and beta in herk, and beta in her2k, need to be real. These arguments were typed incorrectly as the complex types. This has been fixed. Note the issue was only present in the BLAS-like APIs for these operations (not the native object-based interfaces). commit 5ece050a669e74ba4a711d1d4669239d22d45642 Author: Field G. Van Zee Date: Wed Feb 20 15:50:54 2013 -0600 Updated version file. (Forgot to in prev commit). commit f243034b8b430d4684680ea8eddfd246e73fefc0 Author: Field G. Van Zee Date: Wed Feb 20 14:11:36 2013 -0600 Changed API of packm_init_pack() to use blksz_t. Details: - Changed the interface of packm_init_pack() so that mult_m and mult_n are passed in as type blksz_t* instead of dim_t. - Make similar change for packv_init_pack(). commit da0c22f24107be9f33e0ea2dae52e5534b1fd0e5 Author: Field G. Van Zee Date: Fri Feb 15 09:59:48 2013 -0600 Minor changes to lower levels of scalm and setm. Details: - Removed diagx parameter from lower-level interfaces of scalm. - Modified scalm_basic_check() to expect an object with a nonunit diagonal. - Changed setm_unb_var1() so that having an implicit unit diagonal results in only the strictly lower or upper triangle of the matrix being modified. commit 2c836adadcd2a7d7f217033ac4d7fcad03d5bd55 Author: Field G. Van Zee Date: Thu Feb 14 10:42:56 2013 -0600 Updated beta == zero semantics of mulsc. Details: - Updated beta == zero semantics of mulsc. Hopefully this is the last operation that needed updating. - Added Devin to CREDITS file. commit 722b66c7dcaaaa1b109e7c8b1d53fd71a9af8240 Author: Field G. Van Zee Date: Thu Feb 14 10:18:00 2013 -0600 Removed some calls to setv() in test modules. Details: - Removed calls to setv() in test modules whose sole purpose was to initialize vectors to zero to ensure that nan's and inf's would not taint the computation. Now that beta == zero semantics have been updated to clear the output operand (when beta is zero), rather than multiply against it, these setv() calls are no longer needed. commit e6ac623a902f776c42f85eadbf76996d9770a0db Author: Field G. Van Zee Date: Wed Feb 13 18:44:59 2013 -0600 Properly implemented beta == 0 semantics. Details: - Changed name of set0 and set0_mxn macros to set0s and set0s_mxn, respectively. - Added code to the following operations that sets the output operand to zero if the corresponding scalar is zero (rather than performing the floating-point multiply, or in the case of setv, copying the value). This will prevent nan's and inf's from creeping into results from uninitialized memory. - axpy - dotxv - scalv - scal2v - setv - gemv - ger - hemv - her - her2 - gemm reference ukernels commit aedccbc85d491e41711a0c6eb0d246d8700a199a Author: Field G. Van Zee Date: Wed Feb 13 18:29:53 2013 -0600 Fixed stale interface to packm_unb_var1(). Details: - Removed the control tree from the interface to packm_unb_var1(), which I meant to do when it was un-deprecated. commit c23135669f7a8a545e2e11ef559bf284be8bc65c Author: Field G. Van Zee Date: Wed Feb 13 13:21:00 2013 -0600 Un-deprecated packm_unb_var1.c (needed by l2 ops). Details: - Added bl2_packm_unb_var1() back into the mix once I realized that level-2 operations still need this routine for packing matrices. Now, whether level-2 operations should be packing matrices to begin with is another matter. But this fixes the segmentation fault one would have gotten when running bl2_gemv() on a general stride matrix. commit cf49e35f9819f9d93ebdca4703ade5abab28f6f6 Author: Field G. Van Zee Date: Tue Feb 12 18:39:35 2013 -0600 Removed cntl tree usage from packm implementation. Details: - Added new fields to obj_t info field: - invert_diag - pack_order_if_upper - pack_order_if_lower These fields allow packm_init() to embed information that begins in the control tree into the object so that the packm implementation does not need to use control trees at all. This is being done to aid Bryan's DxT code generation. - Added macros that operate on above fields. - Changed packm_init(), packm_blk_var2(), and packm_blk_var3() according to above changes. - Made similar (but much simpler) changes to packv. - Deprecated packm_blk_var1(), packm_unb_var1(), and packm_densify(). These were part of prototype implementations and are no longer needed. commit eb139ae256651af7820b93ef982626180195b87f Author: Field G. Van Zee Date: Tue Feb 12 12:39:30 2013 -0600 Replaced bl2_abs() with _fabs() where appropriate. commit 474bac30c99928f9e87315972bcb45c632c0b7ec Author: Field G. Van Zee Date: Tue Feb 12 12:23:48 2013 -0600 Removed level-0 macros projrs, grabis. Details: - Replaced instances of projrs and grabis macros with newer, more general-purpose getris. commit 03a260a457c8964e4603a655cee0d40ac17affba Author: Field G. Van Zee Date: Tue Feb 12 11:45:34 2013 -0600 Restored executable permissions to scripts. Details: - Restored executable (0755) permissions to scripts that were touched by the recursive sed script that updated the copyright headers in the previous commit. commit 1274e1243775e5e705114257a43176f63635227f Author: Field G. Van Zee Date: Mon Feb 11 14:37:47 2013 -0600 Updated copyright headers from 2012 to 2013. commit 3b620cc8e90c53c79129bd9dd89ae6b77c2446f1 Author: Field G. Van Zee Date: Mon Feb 11 13:38:07 2013 -0600 CHANGELOG update. commit 768fcebaa8be0eb936a6e7a02cd8a19438c79d99 (tag: 0.0.2) Author: Field G. Van Zee Date: Mon Feb 11 13:20:44 2013 -0600 Added unified test suite, and many fixes. Details: - Added a highly configurable, unified test suite. - Removed DUPB configuration constant from bl2_kernel.h and macro-kernel header files. Now, instead, DUPB is computed as (NDUP != 1) within each macro-kernel. This fixes a bug in trmm/trsm whereby bp was indexed into incorrectly when DUPB was set to FALSE but the NDUP was still non-unit. By encoding both pieces of information into one constant in _kernel.h, it seems somewhat less likely others will encounter this bug in the future. - Added level-2 cache blocksizes to _kernel.h for reference configuration, and defined blocksizes in _cntl.c files to these default values. - Changed semantics of her2k and syr2k such that these operations no longer expect the B matrix to already be conjugate-transposed (or just transposed for syr2k). However, these semantics are preserved for the internal mechanics of the implementations, including the internal back-end and all blocked variants. - Inserted checks for real-valued alpha and beta for herk/her2k and herk, respectively. - Relaxed general object structure constraints in _basic_check() for gemv, ger. - Changed her front-end to NOT copy-cast to real projection; instead, this is replaced by selecting either the real part or both parts within the unblocked algorithm implementation, depending on the value of conjh. - Added conjh to all _check routines for her so that the code knows when to verify that alpha has an imaginary component equal to zero (for her, but not syr). - Changed control tree for her to forgo packing. - Added unit diagonal support to fnormm. - Redefined real versions of abval2s macros in terms of fabs(), fabsf(). - Redefined complex versions of sqrt2s macros using the actual "complex square root" formula. - Created new level-0 object-based routines, suffixed with "sc" (for "scalar"). - Defined new level-1v, -1d, and -1m versions of add and sub operations (two-operand add and subtract). - Added new scalar macros: - getris: acquire real and imaginary components. - setris: set real and imaginary components. - addjs: addition with conjugated x. - subjs: subtraction with conjugated x. - Defined new utility operations: - absumv: element-wise sum of absolute values for vector elements. - absumm: element-wise sum of absolute values for matrix elements. - mkherm: convert existing matrix to Hermitian. - mksymm: convert existing matrix to symmetric. - mktrim: convert existing matrix to triangular. - Added various error checking routines. - Added bl2_clock_min_diff(), which is used to more cleanly measure the wall clock time of a code block. - Added general stride support to bl2_obj_alloc_buffer(). - Added bl2_obj_init_scalar(). - Updated parameter mapping in bl2_param_map.c. - Added support for queriable version string. - Fixed a bug in the her2k macro-kernels (which currently are simply implemented in terms of two invocations of herk) whereby beta was being applied to both the first and second rank-k updates, rather than only the first. - Fixed a bug in trmm/trsm whereby transpose and right side cases were not properly implemented due to erroneous assumptions regarding aliasing and root objects. - Fixed a bug in the upper triangular trsm macro-kernel in which the wrong MR x NR block of B was being updated. - Fixed a bug in the inverts macro in the double real case whereby the value was typecast to float before inversion. This affected non-unit cases of dtrsm. - Fixed a bug in the reference kernels for gemmtrsm whereby the minus one constant was being applied incorrectly. - Fixed a bug in the overall treatment of non-unit alpha for trsm. The code now mimics the rank-k strategy of gemm, whereby alpah is applied during the first iteration of variant 3, with BLIS_ONE passed in instead for subsequent iterations. This also required passing alpha into the macro- kernels as well as the fused gemmtrsm micro-kernels. - Fixed a bug in trsm_u_blk_var1 whereby the gemm macro-kernel was being called for blocks strictly above the diagonal. While this sounds good in theory, this cannot be done because gemm_ker_var2 expects row panels of A to be packed from top to bottom, while for trsm_u, A is actually packed from bottom to top due to the reverse (BR->TL) nature of the algorithm. - Fixed a bug in packm_cxk() whereby panel packings with unit panel dimensions were mishandled due to incorrect arguments to the copyv kernel. Also changed the copyv kernel invocation to scal2v so that these edge cases are properly handled when scaling is requested. - Fixed a bug in packv_int() whereby an uninitialized object is passed in instead of the source object. - Fixed a bug whereby level-2 code could allocate memory dynamically via bl2_malloc() and then attempt to free it via bl2_mm_release(). Also fixed a potential future bug whereby a mem_t object that is actually no longer "allocated" from the static pool is mistaken for being allocated due to failure to NULLify the buffer when the block was most recently released. - Fixed a bug in bl2_acquire_mpart_*() whreby the uplo field was mistakenly toggled when the requested subpartition needed to be "reflected" due to it residing in an unstored region. commit be94fb84c0351602d7585269f29998e3bf83f899 Author: Field G. Van Zee Date: Fri Jan 4 10:55:21 2013 -0600 Added missing 'd' to fused gemmtrsm function name. commit 879a179e1dee36f0c56765f2ab91a26861019b34 Author: Field G. Van Zee Date: Fri Jan 4 10:37:27 2013 -0600 Added debug statements to bl2_mm_acquire_m(). Details: - Added printf() statements to bl2_mm_acquire_m() to help debug issues with prematurely exhausted memory pool. - Removed 'd' from kernel names of reference kernels in clarksville configuration's bl2_kernel.h commit 806e74beb4eafeef620a555ffbb3f6779e29c7b6 Author: Field G. Van Zee Date: Thu Dec 20 17:07:50 2012 -0600 Defined Frobenius norm operations. Details: - Added level-0 grabis macro operation to grab imaginary component of one variable and copy it to the real component of another variable. - Defined sumsqv operation, which computes the sum of the absolute squares of the elements of a vector. This implementation is modeled after ?lassq in netlib LAPACK. - Defined fnormv and fnormm operations, which compute the Frobenius norm on vectors and matrices, respectively. These operations are treated as one- operand operations where the output norm value is the real projection of the datatype of the input operand. Both operations are implemented in terms of sumsqv. commit 66e80ce1aec099b2b2b0c4f295e38add2c921383 Author: Field G. Van Zee Date: Thu Dec 20 17:02:55 2012 -0600 Added GENT*R macros; tweaked bl2_machval defs. Details: - Added function and prototype macro-generating macros for GENTFUNCR and GENTPROTR, which are one-operand macros with auxiliary real projection types. - Tweaked bl2_machval files to use new macros. commit 2fecc88ca22142020573f168da715e8e9f3dd7de Author: Field G. Van Zee Date: Thu Dec 20 11:35:14 2012 -0600 Fixed harmless macro bug in level-1m operations. Details: - Fixed some inconsistent usage of n_iter_max and n_iter in the two bl2_set_dims_incs_uplo_[12]m macros. The right thing ended up happening despite the bug, which is why I had not discovered it until now. commit 8945db6ec9f82168cf72411ad408b4fdb44ae0d1 Author: Field G. Van Zee Date: Tue Dec 18 15:07:36 2012 -0600 Renamed x86,x86_64 kernels to indicate 'd' fusing. Details: - Renamed x86 and x86_64 kernels to contain a 'd' before the fusing shape to emphasize that the fusing shape is not for all datatype instances, but rather just for one (that of double-precision real). Other fusing shapes would be proportional to their precision and domain "byte footprints". - Corresponding changes to config/clarksville/bl2_kernel.h. commit 6fbbdd4e194d06096ad08c5db61127be338067db Author: Field G. Van Zee Date: Tue Dec 18 14:34:02 2012 -0600 More tweaks to _config.h, _kernel.h; smem tweaks. Details: - Moved kernel-related definitions form bl2_config.h to bl2_kernel.h. - Replaced #define of _GNU_SOURCE with #define of _POSIX_C_SOURCE. This accomplishes the same thing (enabling posix_memalign()) without enabling all of the GNU extensions we don't need. - Defined the size of the static memory pool in terms of MC, KC, and NC, as well as two new constants that determine how many MCxKC blocks and how many KCxNC blocks should be allocated (defined in bl2_config.h). - In the case of static memory pool exhaustion, replaced the generic bl2_abort() with a specific error code call. commit 5d8bdb21c48e8fb11bef6128a242122cc1470a99 Author: Field G. Van Zee Date: Mon Dec 17 16:07:36 2012 -0600 Minor reordering of bl2_config.h definitions. commit 4a83f67490136a898f558e273b76a687aed8b893 Author: Field G. Van Zee Date: Mon Dec 17 12:35:54 2012 -0600 Consolidated configuration headers. Details: - Merged contents of bl2_arch.h into bl2_config.h for reference and clarksville configurations. - Updated CREDITS, INSTALL, LICENSE, README files. commit 0670c33cc14612f636ef09ede4133404ae0af6ba Author: Field G. Van Zee Date: Fri Dec 14 12:45:26 2012 -0600 Fixed bug in reference gemm ukernels. Details: - Fixed a bug whereby, for the reference gemm ukernels, the matrix product was not correctly accumulated and scaled (by alpha) into the output matrix C. (Thanks to Fran for finding this bug.) - Whitespace changes to reference trsm kernels. commit e2e7cb2fbe615be4d375bc2dce88d03d98fadc9e Author: Field G. Van Zee Date: Thu Dec 13 18:17:54 2012 -0600 Expanded reference packm/unpackm kernel set to 16. Details: - Added 10xk, 12xk, 14xk, and 16xk reference kernels for packm and unpackm. - Updated bl2_[un]packm_cxk() to silently use scal2m if "out of range" kernel size is requested. (Thanks to Tyler for finding this bug.) - Updated bl2_kernel.h to contain new _KERNEL definitions, according to above changes, for 'reference' and 'clarksville' configurations. - Updated CHANGELOG. - Removed "output*.m" from .gitignore. commit 17455a8bce038dd570356ab0c5c11d9a89f20248 Author: Field G. Van Zee Date: Mon Dec 10 17:23:32 2012 -0600 Minor updates towards to 0.0.1. commit 7ad4ebef38b8e6eea9b6091844ba7294ec870271 (tag: 0.0.1) Author: Field G. Van Zee Date: Mon Dec 10 16:18:40 2012 -0600 Tweaks to get BLIS compiling again on clarksville. Details: - Updated header files and make_defs.mk in config/clarksville. - Fixes to bl2_mem.c (now that SMEM_M, SMEM_N are gone). - Moved definition of blksz_t from bl2_cntl.h to bl2_type_defs.h. - Shuffled include statements in blis2.h. commit cc58ea86010b1f046134d13b546c878389df9af5 Author: Field G. Van Zee Date: Mon Dec 10 14:55:12 2012 -0600 Added template fragment.mk; updated .gitignore. commit 714c527b0eb153b7e2040b79349edc8372f743fd Author: Field G. Van Zee Date: Fri Dec 7 19:54:04 2012 -0600 Added 'changelog' make target; other tweaks. Details: - Updated CHANGELOG. - Added 'changelog' target to Makefile that runs 'git log --decorate' and overwrites CHANGELOG with the output. - Other trivial changes. commit e4e5404d26aded4873278e85faf6f14ac32115b5 Author: Field G. Van Zee Date: Fri Dec 7 17:34:53 2012 -0600 Define static memory pool size in bl2_config.h. commit 19bb507d0de6a2bd3ce37cf616bdcd6b419ed641 Author: Field G. Van Zee Date: Fri Dec 7 17:18:00 2012 -0600 Refined INSTALL text; added 'showconfig' target. Details: - Added 'showconfig' target to Makefile. - Added header files and ./config//make_defs.mk as prerequisites to object file rules. - Added config.mk as prerequisite to library install rules. - Edited and added to INSTALL file. commit 26cb659dd79636489db5a051aa60fff80273a7b9 Author: Field G. Van Zee Date: Thu Dec 6 15:34:53 2012 -0600 Added auto-detection of version string (via git). Details: - Added build/update-version-file.sh script for auto-detecting "version" string and updating 'version' file accordingly. (If .git directory is not present, then it is assumed this copy of BLIS is a downloaded release, in which case 'version' file is left unchanged.) - Added invocation of update-version-file.sh to configure script. commit b0ecd0ff52fa6ffc9e1d9eb44c365f7f009a6204 Author: Field G. Van Zee Date: Thu Dec 6 14:27:11 2012 -0600 Wrote first draft of INSTALL file. commit bcbe81235a35ccfdbcc2f2319a0ca6e04f75a785 (tag: 0.0.0) Author: Field G. Van Zee Date: Thu Dec 6 12:42:35 2012 -0600 Updated standalone test Makefile and other fixes. Details: - Major edits to test/Makefile to bring up-to-date wrt new build system; should no longer be broken. - Minor edits to top-level Makefile. - Fixed copy-and-paste bugs in - frame/1m/packm/ukernels/bl2_packm_ref_?xk.c - frame/1m/unpackm/ukernels/bl2_unpackm_ref_?xk.c commit 2f272b40f43307909736327f49d17737c7a05d37 Author: Field G. Van Zee Date: Tue Dec 4 19:22:14 2012 -0600 Added build system and continued reorganization. Details: - Added/renamed packm, unpackm kernels. - Added machine value routines. - Added param_map facility. - Renamed AUTHORS to CREDITS. - Added Makefile; continued to expand upon existing configure script. - #define fuse_fac macros in operation headers if not defined already (by the user in bl2_kernels.h). commit 00f3498a8943be1b387f0d5c029c8c7891687ad5 Author: Field G. Van Zee Date: Mon Dec 3 12:36:11 2012 -0600 Initial commit. blis-0.6.1/CONTRIBUTING.md000066400000000000000000000114051360743507500147130ustar00rootroot00000000000000## How to contribute to BLIS First, we want to thank you for your interest in contributing to BLIS! Please read through the following guidelines to help you better understand how to best contribute your potential bug report, bugfix, feature, etc. #### **Did you find a bug?** * **Check if the bug has already been reported** by searching on GitHub under [Issues](https://github.com/flame/blis/issues). * If you can't find an open issue addressing the problem, please feel free to [open a new one](https://github.com/flame/blis/issues/new). Some things to keep in mind as you create your issue: * Be sure to include a **meaningful title**. Aim for a title that is neither overly general nor overly specific. * Putting some time into writing a **clear description** will help us understand your bug and how you found it. * You are welcome to include the BLIS version number (e.g. 0.3.2-15) if you wish, but please supplement it with the **actual git commit number** corresponding to the code that exhibits your reported behavior (the first seven or eight hex digits is fine). * Unless you are confident that it's not relevant, it's usually recommended that you **tell us how you configured BLIS** and **about your environment in general**. Your hardware microarchitecture, OS, compiler (including version), `configure` options, configuration target are all good examples of things to you may wish to include. If the bug involves elements of the build system such as bash or python functionality, please include those versions numbers, too. * If your bug involves behavior observed after linking to BLIS and running an application, please provide a minimally illustrative **code sample** that developers can run to (hopefully) reproduce the error or other concerning behavior. #### **Did you write a patch that fixes a bug?** If so, great, and thanks for your efforts! Please submit a new GitHub [pull request](https://github.com/flame/blis/pulls) with the patch. * Ensure the PR description clearly describes the problem and solution. Include any relevant issue numbers, if applicable. * Please limit your PR to addressing one issue at a time. For example, if you are fixing a bug and in the process you find a second, unrelated bug, please open a separate PR for the second bug (or, if the bugfix to the second bug is not obvious, you can simply open an [issue](https://github.com/flame/blis/issues/new) for the second bug). * Before submitting new code, please read the [coding conventions](https://github.com/flame/blis/wiki/CodingConventions) guide to learn more about our preferred coding conventions. (It's unlikely that we will turn away your contributed code due to mismatched coding styles, but it will be **highly** appreciated by project maintainers since it will save them the time of digressing from their work--whether now or later--to reformat your code.) #### **Did you fix whitespace or reformat code?** Unlike some other projects, if you find code that does not abide by the project's [coding conventions](https://github.com/flame/blis/wiki/CodingConventions) and you would like to bring that code up to our standards, we will be happy to accept your contribution. Please note in the commit log the fixing of whitespace, formatting, etc. as applicable. If you are making a more substantial contribution and in the vicinity of the affected code (i.e., within the same file) you stumble upon other code that works but could use some trivial changes or reformatting, you may combine the latter into the commit for the former. Just note in your commit log that you also fixed whitespace or applied reformatting. #### **Do you intend to add a new feature or change an existing one?** That's fine, we are interested to hear your ideas! * You may wish to introduce your idea by opening an [issue](https://github.com/flame/blis/issues/new) to describe your new feature, or how an existing feature is not sufficiently general-purpose. This allows you the chance to open a dialogue with other developers, who may provide you with useful feedback. * Before submitting new code, please read the [coding conventions](https://github.com/flame/blis/wiki/CodingConventions) guide to learn more about our preferred coding conventions. (See comments above regarding mismatched coding styles.) #### **Do you have questions about the source code?** * Feel free to ask questions on the [blis-devel mailing list](https://groups.google.com/d/forum/blis-devel). You'll have to join to post, but don't be shy! Most of the interesting discussion (outside of GitHub) happens on blis-devel. We also have a [blis-discuss mailing list](https://groups.google.com/d/forum/blis-discuss), but it is not the preferred venue for discussion these days. Here at the BLIS project, we :heart: our community. :) Thanks for helping to make BLIS better! Field blis-0.6.1/CREDITS000066400000000000000000000117471360743507500135130ustar00rootroot00000000000000 BLIS framework Acknowledgements --- The BLIS framework was primarily authored by Field Van Zee @fgvanzee (The University of Texas at Austin) but many others have contributed code and feedback, including Sameer Agarwal @sandwichmaker (Google) Murtaza Ali (Texas Instruments) Sajid Ali @s-sajid-ali (Northwestern University) Erling Andersen @erling-d-andersen Alex Arslan @ararslan Vernon Austel (IBM, T.J. Watson Research Center) Matthew Brett @matthew-brett (University of Birmingham) Jed Brown @jedbrown (Argonne National Laboratory) Robin Christ @robinchrist Kay Dewhurst @jkd2016 (Max Planck Institute, Halle, Germany) Jeff Diamond (Oracle) Johannes Dieterich @iotamudelta Krzysztof Drewniak @krzysz00 Marat Dukhan @Maratyszcza (Google) Victor Eijkhout @VictorEijkhout (Texas Advanced Computing Center) Evgeny Epifanovsky @epifanovsky (Q-Chem) Isuru Fernando @isuruf Roman Gareev @gareevroman Richard Goldschmidt @SuperFluffy Chris Goodyer John Gunnels @jagunnels (IBM, T.J. Watson Research Center) Ali Emre Gülcü @Lephar Jeff Hammond @jeffhammond (Intel) Jacob Gorm Hansen @jacobgorm Jérémie du Boisberranger @jeremiedbb Jean-Michel Hautbois @jhautbois Ian Henriksen @insertinterestingnamehere (The University of Texas at Austin) Minh Quan Ho @hominhquan Matthew Honnibal @honnibal Stefan Husmann @stefanhusmann Francisco Igual @figual (Universidad Complutense de Madrid) Tony Kelman @tkelman Lee Killough @leekillough (Cray) Mike Kistler @mkistler (IBM, Austin Research Laboratory) Kyungmin Lee @kyungminlee (Ohio State University) Michael Lehn @michael-lehn Shmuel Levine @ShmuelLevine Dave Love @loveshack Tze Meng Low (The University of Texas at Austin) Ye Luo @ye-luo (Argonne National Laboratory) Ricardo Magana @magania (Hewlett Packard Enterprise) Bryan Marker @bamarker (The University of Texas at Austin) Simon Lukas Märtens @ACSimon33 (RWTH Aachen University) Devin Matthews @devinamatthews (The University of Texas at Austin) Stefanos Mavros @smavros Nisanth Padinharepatt (AMD) Devangi Parikh @dnparikh (The University of Texas at Austin) Elmar Peise @elmar-peise (RWTH-Aachen) Clément Pernet @ClementPernet Ilya Polkovnichenko Jack Poulson @poulson (Stanford) Mathieu Poumeyrol @kali Christos Psarras @ChrisPsa (RWTH Aachen University) @qnerd Michael Rader @mrader1248 Pradeep Rao @pradeeptrgit (AMD) Aleksei Rechinskii Karl Rupp @karlrupp Martin Schatz (The University of Texas at Austin) Nico Schlömer @nschloe Rene Sitt Tony Skjellum @tonyskjellum (The University of Tennessee at Chattanooga) Mikhail Smelyanskiy (Intel, Parallel Computing Lab) Nathaniel Smith @njsmith Shaden Smith @ShadenSmith Tyler Smith @tlrmchlsmth (The University of Texas at Austin) Paul Springer @springer13 (RWTH Aachen University) Adam J. Stewart @adamjstewart (University of Illinois at Urbana-Champaign) Vladimir Sukarev Santanu Thangaraj (AMD) Nicholai Tukanov @nicholaiTukanov (The University of Texas at Austin) Rhys Ulerich @RhysU (The University of Texas at Austin) Robert van de Geijn @rvdg (The University of Texas at Austin) Kiran Varaganti @kvaragan (AMD) Natalia Vassilieva (Hewlett Packard Enterprise) Zhang Xianyi @xianyi (Chinese Academy of Sciences) Benda Xu @heroxbd Costas Yamin @cosstas Chenhan Yu @ChenhanYu (The University of Texas at Austin) Roman Yurchak @rth (Symerio) M. Zhou @cdluminate BLIS's development was partially funded by grants from industry partners, including AMD Hewlett Packard Enterprise Huawei Intel Microsoft Oracle Texas Instruments as well as the National Science Foundation (NSF Awards CCF-0917167, ACI-1148125/1340293, ACI-1550493, and CCF-1320112). blis-0.6.1/INSTALL000066400000000000000000000014651360743507500135200ustar00rootroot00000000000000 BLIS framework INSTALL --- For a detailed description of how to configure, compile, install, and link to a BLIS library on your local system, please read the build system documentation located in: docs/BuildSystem.md Note that the document's markdown content can be conveniently rendered by viewing the file over GitHub via a web browser: https://github.com/flame/blis/blob/master/docs/BuildSystem.md This document will always contain the most up-to-date information related to instantiating a BLIS library from the framework source code. If you have any further questions or wish to provide feedback, please contact the BLIS community by posting your message to the BLIS developer's mailing list: https://groups.google.com/d/forum/blis-devel Thanks for your interest in the BLIS framework! Field Van Zee blis-0.6.1/LICENSE000066400000000000000000000037451360743507500134770ustar00rootroot00000000000000NOTE: Portions of this project's code are copyrighted by The University of Texas at Austin while other portions are copyrighted by Hewlett Packard Enterprise Development LP Advanced Micro Devices, Inc. with some overlap. Please see file-level license headers for file-specific copyright info. All parties provide their portions of the code under the 3-clause BSD license, found below. --- Copyright (C) 2018, The University of Texas at Austin Copyright (C) 2016, Hewlett Packard Enterprise Development LP Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. blis-0.6.1/Makefile000066400000000000000000001264471360743507500141370ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # # Makefile # # Field G. Van Zee # # Top-level makefile for libflame linear algebra library. # # # # --- Makefile PHONY target definitions ---------------------------------------- # .PHONY: all \ libs libblis \ check-env check-env-mk check-env-fragments check-env-make-defs \ flat-header flat-cblas-header \ test \ testblas blastest-f2c blastest-bin blastest-run \ testsuite testsuite-bin \ testsuite-run testsuite-run-fast testsuite-run-md testsuite-run-salt \ testblis testblis-fast testblis-md testblis-salt \ check checkblas \ checkblis checkblis-fast checkblis-md checkblis-salt \ install-headers install-libs install-lib-symlinks \ showconfig \ clean cleanmk cleanh cleanlib distclean \ cleantest cleanblastest cleanblistest \ changelog \ install uninstall uninstall-old \ uninstall-libs uninstall-lib-symlinks uninstall-headers \ uninstall-old-libs uninstall-lib-symlinks uninstall-old-headers # # --- Determine makefile fragment location ------------------------------------- # # Comments: # - We don't need to define DIST_PATH, LIB_PATH, INC_PATH, or SHARE_PATH since # the defaults in common.mk (and config.mk) are designed to work with the # top-level Makefile. #DIST_PATH := . #LIB_PATH = ./lib/$(CONFIG_NAME) #INC_PATH = ./include/$(CONFIG_NAME) #SHARE_PATH := . # # --- Include common makefile definitions -------------------------------------- # # Define the name of the common makefile. COMMON_MK_FILE := common.mk # Include the configuration file. -include $(COMMON_MK_FILE) # Detect whether we actually got the configuration file. If we didn't, then # it is likely that the user has not yet generated it (via configure). ifeq ($(strip $(COMMON_MK_INCLUDED)),yes) COMMON_MK_PRESENT := yes else COMMON_MK_PRESENT := no endif # # --- Main target variable definitions ----------------------------------------- # # --- Object file paths --- # Construct the base object file path for the current configuration. BASE_OBJ_PATH := ./$(OBJ_DIR)/$(CONFIG_NAME) # Construct base object file paths corresponding to the four locations # of source code. BASE_OBJ_CONFIG_PATH := $(BASE_OBJ_PATH)/$(CONFIG_DIR) BASE_OBJ_FRAME_PATH := $(BASE_OBJ_PATH)/$(FRAME_DIR) BASE_OBJ_REFKERN_PATH := $(BASE_OBJ_PATH)/$(REFKERN_DIR) BASE_OBJ_KERNELS_PATH := $(BASE_OBJ_PATH)/$(KERNELS_DIR) BASE_OBJ_SANDBOX_PATH := $(BASE_OBJ_PATH)/$(SANDBOX_DIR) # --- Define install target names for static libraries --- LIBBLIS_A_INST := $(INSTALL_LIBDIR)/$(LIBBLIS_A) # --- Define install target names for shared libraries --- LIBBLIS_SO_INST := $(INSTALL_LIBDIR)/$(LIBBLIS_SO) LIBBLIS_SO_MAJ_INST := $(INSTALL_LIBDIR)/$(LIBBLIS_SONAME) ifeq ($(IS_WIN),yes) # The 'install' target does not create symlinks for Windows builds, so we don't # bother defining LIBBLIS_SO_MMB_INST. LIBBLIS_SO_MMB_INST := else LIBBLIS_SO_MMB_INST := $(INSTALL_LIBDIR)/$(LIBBLIS).$(LIBBLIS_SO_MMB_EXT) endif # --- Determine which libraries to build --- MK_LIBS := MK_LIBS_INST := MK_LIBS_SYML := ifeq ($(MK_ENABLE_STATIC),yes) MK_LIBS += $(LIBBLIS_A_PATH) MK_LIBS_INST += $(LIBBLIS_A_INST) MK_LIBS_SYML += endif ifeq ($(MK_ENABLE_SHARED),yes) MK_LIBS += $(LIBBLIS_SO_PATH) \ $(LIBBLIS_SO_MAJ_PATH) MK_LIBS_INST += $(LIBBLIS_SO_MMB_INST) MK_LIBS_SYML += $(LIBBLIS_SO_INST) \ $(LIBBLIS_SO_MAJ_INST) endif # Strip leading, internal, and trailing whitespace. MK_LIBS_INST := $(strip $(MK_LIBS_INST)) MK_LIBS_SYML := $(strip $(MK_LIBS_SYML)) # --- Define install directory for headers --- # Set the path to the subdirectory of the include installation directory. MK_INCL_DIR_INST := $(INSTALL_INCDIR)/blis # --- Define install directory for public makefile fragments --- # Set the path to the subdirectory of the share installation directory. MK_SHARE_DIR_INST := $(INSTALL_SHAREDIR)/blis # # --- Library object definitions ----------------------------------------------- # # In this section, we will isolate the relevant source code filepaths and # convert them to lists of object filepaths. Relevant source code falls into # four categories: configuration source; architecture-specific kernel source; # reference kernel source; and general framework source. # $(call gen-obj-paths-from-src file_exts, src_files, base_src_path, base_obj_path) gen-obj-paths-from-src = $(foreach ch, $(1), \ $(patsubst $(3)/%.$(ch), \ $(4)/%.o, \ $(filter %.$(ch), $(2)) ) ) # Generate object file paths for source code found in the sub-configuration # directories. MK_CONFIG_OBJS := $(call gen-obj-paths-from-src,$(CONFIG_SRC_SUFS),$(MK_CONFIG_SRC),$(CONFIG_PATH),$(BASE_OBJ_CONFIG_PATH)) # Generate object file paths for architecture-specific kernel source code. # We target only .c, .s, and .S files. Note that MK_KERNELS_SRC is already # limited to the kernel source corresponding to the kernel sets in # KERNEL_LIST. This is because the configure script only propogated makefile # fragments into those specific kernel subdirectories. MK_KERNELS_OBJS := $(call gen-obj-paths-from-src,$(KERNELS_SRC_SUFS),$(MK_KERNELS_SRC),$(KERNELS_PATH),$(BASE_OBJ_KERNELS_PATH)) # Generate object file paths for reference kernels, with one set of object # files for each sub-configuration in CONFIG_LIST. Note that due to the # nuances of naming the reference kernel files, we can't use the function # gen-obj-paths-from-src as we do above and below. MK_REFKERN_C := $(filter %.c, $(MK_REFKERN_SRC)) MK_REFKERN_OBJS := $(foreach arch, $(CONFIG_LIST), \ $(patsubst $(REFKERN_PATH)/%_$(REFNM).c, \ $(BASE_OBJ_REFKERN_PATH)/$(arch)/%_$(arch)_$(REFNM).o, \ $(MK_REFKERN_C) \ ) \ ) # Generate object file paths for all of the portable framework source code. MK_FRAME_OBJS := $(call gen-obj-paths-from-src,$(FRAME_SRC_SUFS),$(MK_FRAME_SRC),$(FRAME_PATH),$(BASE_OBJ_FRAME_PATH)) # Generate object file paths for the sandbox source code. If a sandbox was not # enabled a configure-time, this variable will we empty. MK_SANDBOX_OBJS := $(call gen-obj-paths-from-src,$(SANDBOX_SRC_SUFS),$(MK_SANDBOX_SRC),$(SANDBOX_PATH),$(BASE_OBJ_SANDBOX_PATH)) # Combine all of the object files into some readily-accessible variables. MK_BLIS_OBJS := $(MK_CONFIG_OBJS) \ $(MK_KERNELS_OBJS) \ $(MK_REFKERN_OBJS) \ $(MK_FRAME_OBJS) \ $(MK_SANDBOX_OBJS) # Optionally filter out the BLAS and CBLAS compatibility layer object files. # This is not actually necessary, since each affected file is guarded by C # preprocessor macros, but it but prevents "empty" object files from being # added into the library (and reduces compilation time). BASE_OBJ_BLAS_PATH := $(BASE_OBJ_FRAME_PATH)/compat BASE_OBJ_CBLAS_PATH := $(BASE_OBJ_FRAME_PATH)/compat/cblas ifeq ($(MK_ENABLE_CBLAS),no) MK_BLIS_OBJS := $(filter-out $(BASE_OBJ_CBLAS_PATH)/%.o, $(MK_BLIS_OBJS) ) endif ifeq ($(MK_ENABLE_BLAS),no) MK_BLIS_OBJS := $(filter-out $(BASE_OBJ_BLAS_PATH)/%.o, $(MK_BLIS_OBJS) ) endif # # --- Monolithic header definitions -------------------------------------------- # # Define a list of headers to install. The default is to only install blis.h. HEADERS_TO_INSTALL := $(BLIS_H_FLAT) # If CBLAS is enabled, we also install cblas.h so the user does not need to # change their source code to #include "blis.h" in order to access the CBLAS # function prototypes and enums. ifeq ($(MK_ENABLE_CBLAS),yes) HEADERS_TO_INSTALL += $(CBLAS_H_FLAT) endif # # --- public makefile fragment definitions ------------------------------------- # # Define a list of makefile fragments to install. FRAGS_TO_INSTALL := $(CONFIG_MK_FILE) \ $(COMMON_MK_FILE) # # --- BLAS test drivers definitions -------------------------------------------- # # The location of the BLAS test suite's input files. BLASTEST_INPUT_PATH := $(DIST_PATH)/$(BLASTEST_DIR)/input # The location of the BLAS test suite object directory. BASE_OBJ_BLASTEST_PATH := $(BASE_OBJ_PATH)/$(BLASTEST_DIR) # The locations of the BLAS test suite source code (f2c and drivers). BLASTEST_F2C_SRC_PATH := $(DIST_PATH)/$(BLASTEST_DIR)/f2c BLASTEST_DRV_SRC_PATH := $(DIST_PATH)/$(BLASTEST_DIR)/src # The paths to object files we will create (f2c and drivers). BLASTEST_F2C_OBJS := $(sort \ $(patsubst $(BLASTEST_F2C_SRC_PATH)/%.c, \ $(BASE_OBJ_BLASTEST_PATH)/%.o, \ $(wildcard $(BLASTEST_F2C_SRC_PATH)/*.c)) \ ) BLASTEST_DRV_OBJS := $(sort \ $(patsubst $(BLASTEST_DRV_SRC_PATH)/%.c, \ $(BASE_OBJ_BLASTEST_PATH)/%.o, \ $(wildcard $(BLASTEST_DRV_SRC_PATH)/*.c)) \ ) # libf2c name and location. BLASTEST_F2C_LIB_NAME := libf2c.a BLASTEST_F2C_LIB := $(BASE_OBJ_BLASTEST_PATH)/$(BLASTEST_F2C_LIB_NAME) # The base names of each driver source file (ie: filename minus suffix). BLASTEST_DRV_BASES := $(basename $(notdir $(BLASTEST_DRV_OBJS))) # The binary executable driver names. BLASTEST_DRV_BINS := $(addsuffix .x,$(BLASTEST_DRV_BASES)) BLASTEST_DRV_BIN_PATHS := $(addprefix $(BASE_OBJ_BLASTEST_PATH)/,$(BLASTEST_DRV_BINS)) # Binary executable driver "run-" names BLASTEST_DRV_BINS_R := $(addprefix run-,$(BLASTEST_DRV_BASES)) # Filter level-1, level-2, and level-3 names to different variables. BLASTEST_DRV1_BASES := $(filter %1,$(BLASTEST_DRV_BASES)) BLASTEST_DRV2_BASES := $(filter %2,$(BLASTEST_DRV_BASES)) BLASTEST_DRV3_BASES := $(filter %3,$(BLASTEST_DRV_BASES)) # Define some CFLAGS that we'll only use when compiling BLAS test suite # files. BLAT_CFLAGS := -Wno-parentheses \ -I$(BLASTEST_F2C_SRC_PATH) \ -I. -DHAVE_BLIS_H # Suppress warnings about possibly uninitialized variables for the BLAS # test driver code (as output from f2c), which is riddled with such # variables, but only if the option to do so is supported. ifeq ($(CC_VENDOR),gcc) BLAT_CFLAGS += -Wno-maybe-uninitialized endif # The location of the script that checks the BLAS test output. BLASTEST_CHECK_PATH := $(DIST_PATH)/$(BLASTEST_DIR)/$(BLASTEST_CHECK) # # --- BLIS testsuite definitions ----------------------------------------------- # # The location of the test suite's general and operations-specific # input/configuration files. TESTSUITE_CONF_GEN_PATH := $(DIST_PATH)/$(TESTSUITE_DIR)/$(TESTSUITE_CONF_GEN) TESTSUITE_CONF_OPS_PATH := $(DIST_PATH)/$(TESTSUITE_DIR)/$(TESTSUITE_CONF_OPS) TESTSUITE_FAST_GEN_PATH := $(DIST_PATH)/$(TESTSUITE_DIR)/$(TESTSUITE_FAST_GEN) TESTSUITE_FAST_OPS_PATH := $(DIST_PATH)/$(TESTSUITE_DIR)/$(TESTSUITE_FAST_OPS) TESTSUITE_MIXD_GEN_PATH := $(DIST_PATH)/$(TESTSUITE_DIR)/$(TESTSUITE_MIXD_GEN) TESTSUITE_MIXD_OPS_PATH := $(DIST_PATH)/$(TESTSUITE_DIR)/$(TESTSUITE_MIXD_OPS) TESTSUITE_SALT_GEN_PATH := $(DIST_PATH)/$(TESTSUITE_DIR)/$(TESTSUITE_SALT_GEN) TESTSUITE_SALT_OPS_PATH := $(DIST_PATH)/$(TESTSUITE_DIR)/$(TESTSUITE_SALT_OPS) # The locations of the test suite source directory and the local object # directory. TESTSUITE_SRC_PATH := $(DIST_PATH)/$(TESTSUITE_DIR)/src BASE_OBJ_TESTSUITE_PATH := $(BASE_OBJ_PATH)/$(TESTSUITE_DIR) # Convert source file paths to object file paths by replacing the base source # directories with the base object directories, and also replacing the source # file suffix (eg: '.c') with '.o'. MK_TESTSUITE_OBJS := $(sort \ $(patsubst $(TESTSUITE_SRC_PATH)/%.c, \ $(BASE_OBJ_TESTSUITE_PATH)/%.o, \ $(wildcard $(TESTSUITE_SRC_PATH)/*.c)) \ ) # The test suite binary executable filename. # NOTE: The TESTSUITE_WRAPPER variable defaults to the empty string if it # is not already set, in which case it has no effect lateron when the # testsuite binary is executed via lines such as # # $(TESTSUITE_WRAPPER) ./$(TESTSUITE_BIN) ... > $(TESTSUITE_OUT_FILE) # # The reason TESTSUITE_WRAPPER is employed in this way is so that some # unusual environments (e.g. ARM) can run the testsuite through some other # binary. See .travis.yml for details on how the variable is employed in # practice. TESTSUITE_BIN := test_$(LIBBLIS).x TESTSUITE_WRAPPER ?= # The location of the script that checks the BLIS testsuite output. TESTSUITE_CHECK_PATH := $(DIST_PATH)/$(TESTSUITE_DIR)/$(TESTSUITE_CHECK) # # --- Uninstall definitions ---------------------------------------------------- # ifeq ($(IS_CONFIGURED),yes) # These shell commands gather the filepaths to any library in the current # LIBDIR that might be left over from an old installation. We start with # including nothing for static libraries, since older static libraries are # always overwritten by newer ones. Then we add shared libraries, which are # named with three .so version numbers. UNINSTALL_OLD_LIBS := UNINSTALL_OLD_LIBS += $(filter-out $(INSTALL_LIBDIR)/$(LIBBLIS).$(LIBBLIS_SO_MMB_EXT),$(wildcard $(INSTALL_LIBDIR)/$(LIBBLIS_SO).?.?.?)) # These shell commands gather the filepaths to any library symlink in the # current LIBDIR that might be left over from an old installation. We start # with symlinks named using the .so major version number. UNINSTALL_OLD_SYML := $(filter-out $(INSTALL_LIBDIR)/$(LIBBLIS_SO).$(SO_MAJOR),$(wildcard $(INSTALL_LIBDIR)/$(LIBBLIS_SO).?)) # We also prepare to uninstall older-style symlinks whose names contain the # BLIS version number and configuration family. UNINSTALL_OLD_SYML += $(wildcard $(INSTALL_LIBDIR)/$(LIBBLIS)-*.a) UNINSTALL_OLD_SYML += $(wildcard $(INSTALL_LIBDIR)/$(LIBBLIS)-*.$(SHLIB_EXT)) # This shell command grabs all files named "*.h" that are not blis.h or cblas.h # in the installation directory. We consider this set of headers to be "old" and # eligible for removal upon running of the uninstall-old-headers target. UNINSTALL_OLD_HEADERS := $(filter-out $(BLIS_H),$(filter-out $(CBLAS_H),$(wildcard $(INSTALL_INCDIR)/blis/*.h))) endif # IS_CONFIGURED # # --- Targets/rules ------------------------------------------------------------ # # --- Primary targets --- all: libs libs: libblis test: checkblis checkblas check: checkblis-fast checkblas install: libs install-libs install-lib-symlinks install-headers install-share uninstall: uninstall-libs uninstall-lib-symlinks uninstall-headers uninstall-share uninstall-old: uninstall-old-libs uninstall-old-symlinks uninstall-old-headers clean: cleanh cleanlib # --- Environment check rules --- check-env: check-env-make-defs check-env-fragments check-env-mk check-env-mk: ifeq ($(CONFIG_MK_PRESENT),no) $(error Cannot proceed: config.mk not detected! Run configure first) endif check-env-fragments: check-env-mk ifeq ($(MAKEFILE_FRAGMENTS_PRESENT),no) $(error Cannot proceed: makefile fragments not detected! Run configure first) endif check-env-make-defs: check-env-fragments ifeq ($(ALL_MAKE_DEFS_MK_PRESENT),no) $(error Cannot proceed: Some make_defs.mk files not found or mislabeled!) endif # --- Consolidated blis.h header creation --- flat-header: check-env $(BLIS_H_FLAT) $(BLIS_H_FLAT): $(FRAME_H99_FILES) ifeq ($(ENABLE_VERBOSE),yes) $(FLATTEN_H) -c -v1 $(BLIS_H_SRC_PATH) $@ "./$(INCLUDE_DIR)" "$(ALL_H99_DIRPATHS)" else @echo -n "Generating monolithic blis.h" @$(FLATTEN_H) -c -v1 $(BLIS_H_SRC_PATH) $@ "./$(INCLUDE_DIR)" "$(ALL_H99_DIRPATHS)" @echo "Generated $@" endif # --- Consolidated cblas.h header creation --- flat-cblas-header: check-env $(CBLAS_H_FLAT) $(CBLAS_H_FLAT): $(FRAME_H99_FILES) ifeq ($(ENABLE_VERBOSE),yes) $(FLATTEN_H) -c -v1 $(CBLAS_H_SRC_PATH) $@ "./$(INCLUDE_DIR)" "$(ALL_H99_DIRPATHS)" else @echo -n "Generating monolithic cblas.h" @$(FLATTEN_H) -c -v1 $(CBLAS_H_SRC_PATH) $@ "./$(INCLUDE_DIR)" "$(ALL_H99_DIRPATHS)" @echo "Generated $@" endif # --- General source code / object code rules --- # FGVZ: Add support for compiling .s and .S files in 'config'/'kernels' # directories. # - May want to add an extra foreach loop around function eval/call. # first argument: a configuration name from config_list, used to look up the # CFLAGS to use during compilation. define make-config-rule $(BASE_OBJ_CONFIG_PATH)/$(1)/%.o: $(CONFIG_PATH)/$(1)/%.c $(BLIS_H_FLAT) $(MAKE_DEFS_MK_PATHS) ifeq ($(ENABLE_VERBOSE),yes) $(CC) $(call get-config-cflags-for,$(1)) -c $$< -o $$@ else @echo "Compiling $$@" $(call get-config-text-for,$(1)) @$(CC) $(call get-config-cflags-for,$(1)) -c $$< -o $$@ endif endef # first argument: a configuration name from the union of config_list and # config_name, used to look up the CFLAGS to use during compilation. define make-frame-rule $(BASE_OBJ_FRAME_PATH)/%.o: $(FRAME_PATH)/%.c $(BLIS_H_FLAT) $(MAKE_DEFS_MK_PATHS) ifeq ($(ENABLE_VERBOSE),yes) $(CC) $(call get-frame-cflags-for,$(1)) -c $$< -o $$@ else @echo "Compiling $$@" $(call get-frame-text-for,$(1)) @$(CC) $(call get-frame-cflags-for,$(1)) -c $$< -o $$@ endif endef # first argument: a kernel set (name) being targeted (e.g. haswell). define make-refinit-rule $(BASE_OBJ_REFKERN_PATH)/$(1)/bli_cntx_$(1)_ref.o: $(REFKERN_PATH)/bli_cntx_ref.c $(BLIS_H_FLAT) $(MAKE_DEFS_MK_PATHS) ifeq ($(ENABLE_VERBOSE),yes) $(CC) $(call get-refinit-cflags-for,$(1)) -c $$< -o $$@ else @echo "Compiling $$@" $(call get-refinit-text-for,$(1)) @$(CC) $(call get-refinit-cflags-for,$(1)) -c $$< -o $$@ endif endef # first argument: a kernel set (name) being targeted (e.g. haswell). define make-refkern-rule $(BASE_OBJ_REFKERN_PATH)/$(1)/%_$(1)_ref.o: $(REFKERN_PATH)/%_ref.c $(BLIS_H_FLAT) $(MAKE_DEFS_MK_PATHS) ifeq ($(ENABLE_VERBOSE),yes) $(CC) $(call get-refkern-cflags-for,$(1)) -c $$< -o $$@ else @echo "Compiling $$@" $(call get-refkern-text-for,$(1)) @$(CC) $(call get-refkern-cflags-for,$(1)) -c $$< -o $$@ endif endef # first argument: a kernel set (name) being targeted (e.g. haswell). # second argument: the configuration whose CFLAGS we should use in compilation. # third argument: the kernel file suffix being considered. define make-kernels-rule $(BASE_OBJ_KERNELS_PATH)/$(1)/%.o: $(KERNELS_PATH)/$(1)/%.$(3) $(BLIS_H_FLAT) $(MAKE_DEFS_MK_PATHS) ifeq ($(ENABLE_VERBOSE),yes) $(CC) $(call get-kernel-cflags-for,$(2)) -c $$< -o $$@ else @echo "Compiling $$@" $(call get-kernel-text-for,$(2)) @$(CC) $(call get-kernel-cflags-for,$(2)) -c $$< -o $$@ endif endef # first argument: a configuration name from the union of config_list and # config_name, used to look up the CFLAGS to use during compilation. define make-c99-sandbox-rule $(BASE_OBJ_SANDBOX_PATH)/%.o: $(SANDBOX_PATH)/%.$(2) $(BLIS_H_FLAT) $(SANDBOX_H99_FILES) $(MAKE_DEFS_MK_PATHS) ifeq ($(ENABLE_VERBOSE),yes) $(CC) $(call get-sandbox-c99flags-for,$(1)) -c $$< -o $$@ else @echo "Compiling $$@" $(call get-sandbox-c99text-for,$(1)) @$(CC) $(call get-sandbox-c99flags-for,$(1)) -c $$< -o $$@ endif endef define make-cxx-sandbox-rule $(BASE_OBJ_SANDBOX_PATH)/%.o: $(SANDBOX_PATH)/%.$(2) $(BLIS_H_FLAT) $(SANDBOX_HXX_FILES) $(MAKE_DEFS_MK_PATHS) ifeq ($(ENABLE_VERBOSE),yes) $(CXX) $(call get-sandbox-cxxflags-for,$(1)) -c $$< -o $$@ else @echo "Compiling $$@" $(call get-sandbox-cxxtext-for,$(1)) @$(CXX) $(call get-sandbox-cxxflags-for,$(1)) -c $$< -o $$@ endif endef # Define functions to choose the correct sub-configuration name for the # given kernel set. This function is called when instantiating the # make-kernels-rule. get-config-for-kset = $(lastword $(subst :, ,$(filter $(1):%,$(KCONFIG_MAP)))) # Instantiate the build rule for files in the configuration directory for # each of the sub-configurations in CONFIG_LIST with the CFLAGS designated # for that sub-configuration. $(foreach conf, $(CONFIG_LIST), $(eval $(call make-config-rule,$(conf)))) # Instantiate the build rule for framework files. Use the CFLAGS for the # configuration family, which exists in the directory whose name is equal to # CONFIG_NAME. Note that this doesn't need to be in a loop since we expect # CONFIG_NAME to only ever contain a single name. (BTW: If CONFIG_NAME refers # to a singleton family, then CONFIG_LIST contains CONFIG_NAME as its only # item.) $(foreach conf, $(CONFIG_NAME), $(eval $(call make-frame-rule,$(conf)))) # Instantiate the build rule for reference kernel initialization and # reference kernels for each of the sub-configurations in CONFIG_LIST with # the CFLAGS designated for that sub-configuration. $(foreach conf, $(CONFIG_LIST), $(eval $(call make-refinit-rule,$(conf)))) $(foreach conf, $(CONFIG_LIST), $(eval $(call make-refkern-rule,$(conf)))) # Instantiate the build rule for optimized kernels for each of the kernel # sets in KERNEL_LIST with the CFLAGS designated for the sub-configuration # specified by the KCONFIG_MAP. $(foreach suf, $(KERNELS_SRC_SUFS), \ $(foreach kset, $(KERNEL_LIST), $(eval $(call make-kernels-rule,$(kset),$(call get-config-for-kset,$(kset)),$(suf))))) # Instantiate the build rule for C sandbox files. Use the CFLAGS for the # configuration family. $(foreach suf, $(SANDBOX_C99_SUFS), \ $(foreach conf, $(CONFIG_NAME), $(eval $(call make-c99-sandbox-rule,$(conf),$(suf))))) # Instantiate the build rule for C++ sandbox files. Use the CXXFLAGS for the # configuration family. $(foreach suf, $(SANDBOX_CXX_SUFS), \ $(foreach conf, $(CONFIG_NAME), $(eval $(call make-cxx-sandbox-rule,$(conf),$(suf))))) # --- All-purpose library rule (static and shared) --- libblis: check-env $(MK_LIBS) # --- Static library archiver rules --- $(LIBBLIS_A_PATH): $(MK_BLIS_OBJS) ifeq ($(ENABLE_VERBOSE),yes) ifeq ($(ARG_MAX_HACK),yes) $(file > $@.in,$^) $(AR) $(ARFLAGS) $@ @$@.in $(RM_F) $@.in $(RANLIB) $@ else $(AR) $(ARFLAGS) $@ $? $(RANLIB) $@ endif else # ifeq ($(ENABLE_VERBOSE),no) ifeq ($(ARG_MAX_HACK),yes) @echo "Archiving $@" @$(file > $@.in,$^) @$(AR) $(ARFLAGS) $@ @$@.in @$(RM_F) $@.in @$(RANLIB) $@ else @echo "Archiving $@" @$(AR) $(ARFLAGS) $@ $? @$(RANLIB) $@ endif endif # --- Shared library linker rules --- $(LIBBLIS_SO_PATH): $(MK_BLIS_OBJS) ifeq ($(ENABLE_VERBOSE),yes) ifeq ($(ARG_MAX_HACK),yes) $(file > $@.in,$^) $(LINKER) $(SOFLAGS) -o $(LIBBLIS_SO_OUTPUT_NAME) @$@.in $(LDFLAGS) $(RM_F) $@.in else $(LINKER) $(SOFLAGS) -o $(LIBBLIS_SO_OUTPUT_NAME) $? $(LDFLAGS) endif else # ifeq ($(ENABLE_VERBOSE),no) ifeq ($(ARG_MAX_HACK),yes) @echo "Dynamically linking $@" @$(file > $@.in,$^) @$(LINKER) $(SOFLAGS) -o $(LIBBLIS_SO_OUTPUT_NAME) @$@.in $(LDFLAGS) @$(RM_F) $@.in else @echo "Dynamically linking $@" @$(LINKER) $(SOFLAGS) -o $(LIBBLIS_SO_OUTPUT_NAME) $? $(LDFLAGS) endif endif # Local symlink for shared library. # NOTE: We use a '.loc' suffix to avoid filename collisions in case this # rule is executed concurrently with the install-lib-symlinks rule, which # also creates symlinks in the current directory (before installing them). # NOTE: We don't create any symlinks during Windows builds. $(LIBBLIS_SO_MAJ_PATH): $(LIBBLIS_SO_PATH) ifeq ($(IS_WIN),no) ifeq ($(ENABLE_VERBOSE),yes) $(SYMLINK) $( out.$(1) else @echo "Running $(1).x > 'out.$(1)'" @$(TESTSUITE_WRAPPER) $(BASE_OBJ_BLASTEST_PATH)/$(1).x > out.$(1) endif endef # Instantiate the rule above for each level-1 driver file. $(foreach name, $(BLASTEST_DRV1_BASES), $(eval $(call make-run-blat1-rule,$(name)))) # A rule to run ?blat2.x and ?blat3.x driver files. define make-run-blat23-rule run-$(1): $(BASE_OBJ_BLASTEST_PATH)/$(1).x ifeq ($(ENABLE_VERBOSE),yes) $(TESTSUITE_WRAPPER) $(BASE_OBJ_BLASTEST_PATH)/$(1).x < $(BLASTEST_INPUT_PATH)/$(1).in else @echo "Running $(1).x < '$(BLASTEST_INPUT_PATH)/$(1).in' (output to 'out.$(1)')" @$(TESTSUITE_WRAPPER) $(BASE_OBJ_BLASTEST_PATH)/$(1).x < $(BLASTEST_INPUT_PATH)/$(1).in endif endef # Instantiate the rule above for each level-2 driver file. $(foreach name, $(BLASTEST_DRV2_BASES), $(eval $(call make-run-blat23-rule,$(name)))) # Instantiate the rule above for each level-3 driver file. $(foreach name, $(BLASTEST_DRV3_BASES), $(eval $(call make-run-blat23-rule,$(name)))) # Check the results of the BLAS test suite drivers. checkblas: blastest-run ifeq ($(ENABLE_VERBOSE),yes) - $(BLASTEST_CHECK_PATH) else @- $(BLASTEST_CHECK_PATH) endif # --- BLIS test suite rules --- testblis: testsuite testblis-fast: testsuite-run-fast testblis-md: testsuite-run-md testblis-salt: testsuite-run-salt testsuite: testsuite-run testsuite-bin: check-env $(TESTSUITE_BIN) # Object file rule. $(BASE_OBJ_TESTSUITE_PATH)/%.o: $(TESTSUITE_SRC_PATH)/%.c ifeq ($(ENABLE_VERBOSE),yes) $(CC) $(call get-user-cflags-for,$(CONFIG_NAME)) -c $< -o $@ else @echo "Compiling $@" @$(CC) $(call get-user-cflags-for,$(CONFIG_NAME)) -c $< -o $@ endif # Testsuite binary rule. $(TESTSUITE_BIN): $(MK_TESTSUITE_OBJS) $(LIBBLIS_LINK) ifeq ($(ENABLE_VERBOSE),yes) $(LINKER) $(MK_TESTSUITE_OBJS) $(LIBBLIS_LINK) $(LDFLAGS) -o $@ else @echo "Linking $@ against '$(LIBBLIS_LINK) $(LDFLAGS)'" @$(LINKER) $(MK_TESTSUITE_OBJS) $(LIBBLIS_LINK) $(LDFLAGS) -o $@ endif # A rule to run the testsuite using the normal input.* files. testsuite-run: testsuite-bin ifeq ($(ENABLE_VERBOSE),yes) $(TESTSUITE_WRAPPER) ./$(TESTSUITE_BIN) -g $(TESTSUITE_CONF_GEN_PATH) \ -o $(TESTSUITE_CONF_OPS_PATH) \ > $(TESTSUITE_OUT_FILE) else @echo "Running $(TESTSUITE_BIN) with output redirected to '$(TESTSUITE_OUT_FILE)'" @$(TESTSUITE_WRAPPER) ./$(TESTSUITE_BIN) -g $(TESTSUITE_CONF_GEN_PATH) \ -o $(TESTSUITE_CONF_OPS_PATH) \ > $(TESTSUITE_OUT_FILE) endif # A rule to run the testsuite using the input.*.fast files, which # run a set of tests designed to finish much more quickly. testsuite-run-fast: testsuite-bin ifeq ($(ENABLE_VERBOSE),yes) $(TESTSUITE_WRAPPER) ./$(TESTSUITE_BIN) -g $(TESTSUITE_FAST_GEN_PATH) \ -o $(TESTSUITE_FAST_OPS_PATH) \ > $(TESTSUITE_OUT_FILE) else @echo "Running $(TESTSUITE_BIN) (fast) with output redirected to '$(TESTSUITE_OUT_FILE)'" @$(TESTSUITE_WRAPPER) ./$(TESTSUITE_BIN) -g $(TESTSUITE_FAST_GEN_PATH) \ -o $(TESTSUITE_FAST_OPS_PATH) \ > $(TESTSUITE_OUT_FILE) endif # A rule to run the testsuite using the input.*.md files, which # run a set of tests designed to only exercise mixed-datatype gemm. testsuite-run-md: testsuite-bin ifeq ($(ENABLE_VERBOSE),yes) $(TESTSUITE_WRAPPER) ./$(TESTSUITE_BIN) -g $(TESTSUITE_MIXD_GEN_PATH) \ -o $(TESTSUITE_MIXD_OPS_PATH) \ > $(TESTSUITE_OUT_FILE) else @echo "Running $(TESTSUITE_BIN) (mixed dt) with output redirected to '$(TESTSUITE_OUT_FILE)'" @$(TESTSUITE_WRAPPER) ./$(TESTSUITE_BIN) -g $(TESTSUITE_MIXD_GEN_PATH) \ -o $(TESTSUITE_MIXD_OPS_PATH) \ > $(TESTSUITE_OUT_FILE) endif # A rule to run the testsuite using the input.*.salt files, which # simulates application-level threading across operation tests. testsuite-run-salt: testsuite-bin ifeq ($(ENABLE_VERBOSE),yes) $(TESTSUITE_WRAPPER) ./$(TESTSUITE_BIN) -g $(TESTSUITE_SALT_GEN_PATH) \ -o $(TESTSUITE_SALT_OPS_PATH) \ > $(TESTSUITE_OUT_FILE) else @echo "Running $(TESTSUITE_BIN) (salt) with output redirected to '$(TESTSUITE_OUT_FILE)'" @$(TESTSUITE_WRAPPER) ./$(TESTSUITE_BIN) -g $(TESTSUITE_SALT_GEN_PATH) \ -o $(TESTSUITE_SALT_OPS_PATH) \ > $(TESTSUITE_OUT_FILE) endif # Check the results of the BLIS testsuite. checkblis: testsuite-run ifeq ($(ENABLE_VERBOSE),yes) - $(TESTSUITE_CHECK_PATH) $(TESTSUITE_OUT_FILE) else @- $(TESTSUITE_CHECK_PATH) $(TESTSUITE_OUT_FILE) endif # Check the results of the BLIS testsuite (fast). checkblis-fast: testsuite-run-fast ifeq ($(ENABLE_VERBOSE),yes) - $(TESTSUITE_CHECK_PATH) $(TESTSUITE_OUT_FILE) else @- $(TESTSUITE_CHECK_PATH) $(TESTSUITE_OUT_FILE) endif # Check the results of the BLIS testsuite (mixed-datatype). checkblis-md: testsuite-run-md ifeq ($(ENABLE_VERBOSE),yes) - $(TESTSUITE_CHECK_PATH) $(TESTSUITE_OUT_FILE) else @- $(TESTSUITE_CHECK_PATH) $(TESTSUITE_OUT_FILE) endif # Check the results of the BLIS testsuite (salt). checkblis-salt: testsuite-run-salt ifeq ($(ENABLE_VERBOSE),yes) - $(TESTSUITE_CHECK_PATH) $(TESTSUITE_OUT_FILE) else @- $(TESTSUITE_CHECK_PATH) $(TESTSUITE_OUT_FILE) endif # --- Install header rules --- install-headers: check-env $(MK_INCL_DIR_INST) $(MK_INCL_DIR_INST): $(HEADERS_TO_INSTALL) $(CONFIG_MK_FILE) ifeq ($(ENABLE_VERBOSE),yes) $(MKDIR) $(@) $(INSTALL) -m 0644 $(HEADERS_TO_INSTALL) $(@) else @$(MKDIR) $(@) @echo "Installing $(notdir $(HEADERS_TO_INSTALL)) into $(@)/" @$(INSTALL) -m 0644 $(HEADERS_TO_INSTALL) $(@) endif # --- Install share rules --- install-share: check-env $(MK_SHARE_DIR_INST) $(MK_SHARE_DIR_INST): $(FRAGS_TO_INSTALL) $(CONFIG_MK_FILE) ifeq ($(ENABLE_VERBOSE),yes) $(MKDIR) $(@) $(INSTALL) -m 0644 $(FRAGS_TO_INSTALL) $(@) $(MKDIR) -p $(@)/$(CONFIG_DIR)/$(CONFIG_NAME) $(INSTALL) -m 0644 $(CONFIG_DIR)/$(CONFIG_NAME)/$(MAKE_DEFS_FILE) \ $(@)/$(CONFIG_DIR)/$(CONFIG_NAME) else @$(MKDIR) $(@) @echo "Installing $(notdir $(FRAGS_TO_INSTALL)) into $(@)/" @$(INSTALL) -m 0644 $(FRAGS_TO_INSTALL) $(@) @$(MKDIR) -p $(@)/$(CONFIG_DIR)/$(CONFIG_NAME) @echo "Installing $(CONFIG_DIR)/$(CONFIG_NAME)/$(MAKE_DEFS_FILE) into $(@)/$(CONFIG_DIR)/$(CONFIG_NAME)" @$(INSTALL) -m 0644 $(CONFIG_DIR)/$(CONFIG_NAME)/$(MAKE_DEFS_FILE) \ $(@)/$(CONFIG_DIR)/$(CONFIG_NAME)/ endif # --- Install library rules --- install-libs: check-env $(MK_LIBS_INST) # Install static library. $(INSTALL_LIBDIR)/%.a: $(BASE_LIB_PATH)/%.a $(CONFIG_MK_FILE) ifeq ($(ENABLE_VERBOSE),yes) $(MKDIR) $(@D) $(INSTALL) -m 0644 $< $@ else @echo "Installing $(@F) into $(INSTALL_LIBDIR)/" @$(MKDIR) $(@D) @$(INSTALL) -m 0644 $< $@ endif # Install shared library containing .so major, minor, and build versions. # Note: Installation rules for Windows does not include major, minor, and # build version numbers. ifeq ($(IS_WIN),no) # Linux/OSX library (.so OR .dylib) installation rules. $(INSTALL_LIBDIR)/%.$(LIBBLIS_SO_MMB_EXT): $(BASE_LIB_PATH)/%.$(SHLIB_EXT) $(CONFIG_MK_FILE) ifeq ($(ENABLE_VERBOSE),yes) $(MKDIR) $(@D) $(INSTALL) -m 0755 $< $@ else @echo "Installing $(@F) into $(INSTALL_LIBDIR)/" @$(MKDIR) $(@D) @$(INSTALL) -m 0755 $< $@ endif else # ifeq ($(IS_WIN),yes) # Windows library (.dll and .lib) installation rules. $(INSTALL_LIBDIR)/%.$(SHLIB_EXT): $(BASE_LIB_PATH)/%.$(SHLIB_EXT) ifeq ($(ENABLE_VERBOSE),yes) @$(MKDIR) $(@D) @$(INSTALL) -m 0644 $(BASE_LIB_PATH)/$(@F) $@ else @echo "Installing $(@F) into $(INSTALL_LIBDIR)/" @$(MKDIR) $(@D) @$(INSTALL) -m 0644 $(BASE_LIB_PATH)/$(@F) $@ endif $(INSTALL_LIBDIR)/%.$(LIBBLIS_SO_MAJ_EXT): $(BASE_LIB_PATH)/%.$(LIBBLIS_SO_MAJ_EXT) ifeq ($(ENABLE_VERBOSE),yes) @$(MKDIR) $(@D) @$(INSTALL) -m 0644 $(BASE_LIB_PATH)/$(@F) $@ else @echo "Installing $(@F) into $(INSTALL_LIBDIR)/" @$(MKDIR) $(@D) @$(INSTALL) -m 0644 $(BASE_LIB_PATH)/$(@F) $@ endif endif # ifeq ($(IS_WIN),no) # --- Install-symlinks rules --- install-lib-symlinks: check-env $(MK_LIBS_SYML) # Note: Symlinks are not installed on Windows. ifeq ($(IS_WIN),no) # Install generic shared library symlink. $(INSTALL_LIBDIR)/%.$(SHLIB_EXT): $(INSTALL_LIBDIR)/%.$(LIBBLIS_SO_MMB_EXT) ifeq ($(ENABLE_VERBOSE),yes) $(SYMLINK) $( $(DIST_PATH)/$(CHANGELOG) # --- Uninstall rules --- # NOTE: We can't write these uninstall rules directly in terms of targets # $(MK_LIBS_VERS_CONF_INST), $(MK_LIBS_INST), and $(MK_INCL_DIR_INST) # because those targets are already defined in terms of rules that *build* # those products. uninstall-libs: check-env ifeq ($(ENABLE_VERBOSE),yes) - $(RM_F) $(MK_LIBS_INST) else @echo "Uninstalling libraries $(notdir $(MK_LIBS_INST)) from $(dir $(firstword $(MK_LIBS_INST)))" @- $(RM_F) $(MK_LIBS_INST) endif uninstall-lib-symlinks: check-env ifeq ($(ENABLE_VERBOSE),yes) - $(RM_F) $(MK_LIBS_SYML) else @echo "Uninstalling symlinks $(notdir $(MK_LIBS_SYML)) from $(dir $(firstword $(MK_LIBS_SYML)))" @- $(RM_F) $(MK_LIBS_SYML) endif uninstall-headers: check-env ifeq ($(ENABLE_VERBOSE),yes) - $(RM_RF) $(MK_INCL_DIR_INST) else @echo "Uninstalling directory '$(notdir $(MK_INCL_DIR_INST))' from $(dir $(MK_INCL_DIR_INST))" @- $(RM_RF) $(MK_INCL_DIR_INST) endif uninstall-share: check-env ifeq ($(ENABLE_VERBOSE),yes) - $(RM_RF) $(MK_SHARE_DIR_INST) else @echo "Uninstalling directory '$(notdir $(MK_SHARE_DIR_INST))' from $(dir $(MK_SHARE_DIR_INST))" @- $(RM_RF) $(MK_SHARE_DIR_INST) endif # --- Uninstall old rules --- uninstall-old-libs: $(UNINSTALL_OLD_LIBS) check-env uninstall-old-symlinks: $(UNINSTALL_OLD_SYML) check-env uninstall-old-headers: $(UNINSTALL_OLD_HEADERS) check-env $(UNINSTALL_OLD_LIBS) $(UNINSTALL_OLD_SYML) $(UNINSTALL_OLD_HEADERS): check-env ifeq ($(ENABLE_VERBOSE),yes) - $(RM_F) $@ else @echo "Uninstalling $(@F) from $(@D)/" @- $(RM_F) $@ endif blis-0.6.1/README.md000066400000000000000000001071521360743507500137460ustar00rootroot00000000000000![The BLIS cat is sleeping.](http://www.cs.utexas.edu/users/field/blis_cat.png) [![Build Status](https://travis-ci.org/flame/blis.svg?branch=master)](https://travis-ci.org/flame/blis) [![Build Status](https://ci.appveyor.com/api/projects/status/github/flame/blis?branch=master&svg=true)](https://ci.appveyor.com/project/shpc/blis/branch/master) Contents -------- * **[Introduction](#introduction)** * **[Education and Learning](#education-and-learning)** * **[What's New](#whats-new)** * **[What People Are Saying About BLIS](#what-people-are-saying-about-blis)** * **[Key Features](#key-features)** * **[How to Download BLIS](#how-to-download-blis)** * **[Getting Started](#getting-started)** * **[Documentation](#documentation)** * **[External Packages](#external-packages)** * **[Discussion](#discussion)** * **[Contributing](#contributing)** * **[Citations](#citations)** * **[Funding](#funding)** Introduction ------------ BLIS is a portable software framework for instantiating high-performance BLAS-like dense linear algebra libraries. The framework was designed to isolate essential kernels of computation that, when optimized, immediately enable optimized implementations of most of its commonly used and computationally intensive operations. BLIS is written in [ISO C99](http://en.wikipedia.org/wiki/C99) and available under a [new/modified/3-clause BSD license](http://opensource.org/licenses/BSD-3-Clause). While BLIS exports a [new BLAS-like API](docs/BLISTypedAPI.md), it also includes a BLAS compatibility layer which gives application developers access to BLIS implementations via traditional [BLAS routine calls](http://www.netlib.org/lapack/lug/node145.html). An [object-based API](docs/BLISObjectAPI.md) unique to BLIS is also available. For a thorough presentation of our framework, please read our [ACM Transactions on Mathematical Software (TOMS)](https://toms.acm.org/) journal article, ["BLIS: A Framework for Rapidly Instantiating BLAS Functionality"](http://dl.acm.org/authorize?N91172). For those who just want an executive summary, please see the [Key Features](#key-features) section below. In a follow-up article (also in [ACM TOMS](https://toms.acm.org/)), ["The BLIS Framework: Experiments in Portability"](http://dl.acm.org/authorize?N16240), we investigate using BLIS to instantiate level-3 BLAS implementations on a variety of general-purpose, low-power, and multicore architectures. An IPDPS'14 conference paper titled ["Anatomy of High-Performance Many-Threaded Matrix Multiplication"](http://www.cs.utexas.edu/users/flame/pubs/blis3_ipdps14.pdf) systematically explores the opportunities for parallelism within the five loops that BLIS exposes in its matrix multiplication algorithm. For other papers related to BLIS, please see the [Citations section](#citations) below. It is our belief that BLIS offers substantial benefits in productivity when compared to conventional approaches to developing BLAS libraries, as well as a much-needed refinement of the BLAS interface, and thus constitutes a major advance in dense linear algebra computation. While BLIS remains a work-in-progress, we are excited to continue its development and further cultivate its use within the community. The BLIS framework is primarily developed and maintained by individuals in the [Science of High-Performance Computing](http://shpc.ices.utexas.edu/) (SHPC) group in the [Institute for Computational Engineering and Sciences](https://www.ices.utexas.edu/) at [The University of Texas at Austin](https://www.utexas.edu/). Please visit the [SHPC](http://shpc.ices.utexas.edu/) website for more information about our research group, such as a list of [people](http://shpc.ices.utexas.edu/people.html) and [collaborators](http://shpc.ices.utexas.edu/collaborators.html), [funding sources](http://shpc.ices.utexas.edu/funding.html), [publications](http://shpc.ices.utexas.edu/publications.html), and [other educational projects](http://www.ulaff.net/) (such as MOOCs). Education and Learning ---------------------- Want to understand what's under the hood? Many of the same concepts and principles employed when developing BLIS are introduced and taught in a basic pedagogical setting as part of [LAFF-On Programming for High Performance (LAFF-On-PfHP)](http://www.ulaff.net/), one of several massive open online courses (MOOCs) in the [Linear Algebra: Foundations to Frontiers](http://www.ulaff.net/) series, all of which are available for free via the [edX platform](http://www.edx.org/). What's New ---------- * **BLIS awarded SIAM Activity Group on Supercomputing Best Paper Prize for 2020!** We are thrilled to announce that the paper that we internally refer to as the second BLIS paper, "The BLIS Framework: Experiments in Portability." Field G. Van Zee, Tyler Smith, Bryan Marker, Tze Meng Low, Robert A. van de Geijn, Francisco Igual, Mikhail Smelyanskiy, Xianyi Zhang, Michael Kistler, Vernon Austel, John A. Gunnels, Lee Killough. ACM Transactions on Mathematical Software (TOMS), 42(2):12:1--12:19, 2016. was selected for the [SIAM Activity Group on Supercomputing Best Paper Prize](https://www.siam.org/prizes-recognition/activity-group-prizes/detail/siag-sc-best-paper-prize) for 2020. The prize is awarded once every two years to a paper judged to be the most outstanding paper in the field of parallel scientific and engineering computing, and has only been awarded once before (in 2016) since its inception in 2015 (the committee did not award the prize in 2018). The prize will be awarded at the [SIAM Conference on Parallel Processing for Scientific Computing](https://www.siam.org/conferences/cm/conference/pp20) in Seattle next February. Robert will be present at the conference to accept the prize and give [a talk on BLIS](https://meetings.siam.org/sess/dsp_programsess.cfm?SESSIONCODE=68266). The selection committee sought to recognize the paper, "which validates BLIS, a framework relying on the notion of microkernels that enables both productivity and high performance." Their statement continues, "The framework will continue having an important influence on the design and the instantiation of dense linear algebra libraries." * **Small/skinny matrix support for dgemm now available!** Thanks to contributions made possible by our partnership with AMD, we have dramatically accelerated `gemm` for double-precision real matrix problems where one or two dimensions is exceedingly small. A natural byproduct of this optimization is that the traditional case of small _m = n = k_ (i.e. square matrices) is also accelerated, even though it was not targeted specifically. And though only `dgemm` was optimized for now, support for other datatypes, other operations, and/or multithreading may be implemented in the future. We've also added a new [PerformanceSmall](docs/PerformanceSmall.md) document to showcase the improvement in performance when some matrix dimensions are small. * **Performance comparisons now available!** We recently measured the performance of various level-3 operations on a variety of hardware architectures, as implemented within BLIS and other BLAS libraries for all four of the standard floating-point datatypes. The results speak for themselves! Check out our extensive performance graphs and background info in our new [Performance](docs/Performance.md) document. * **BLIS is now in Debian Unstable!** Thanks to Debian developer-maintainers [M. Zhou](https://github.com/cdluminate) and [Nico Schlömer](https://github.com/nschloe) for sponsoring our package in Debian. Their participation, contributions, and advocacy were key to getting BLIS into the second-most popular Linux distribution (behind Ubuntu, which Debian packages feed into). The Debian tracker page may be found [here](https://tracker.debian.org/pkg/blis). * **BLIS now supports mixed-datatype gemm!** The `gemm` operation may now be executed on operands of mixed domains and/or mixed precisions. Any combination of storage datatype for A, B, and C is now supported, along with a separate computation precision that can differ from the storage precision of A and B. And even the 1m method now supports mixed-precision computation. For more details, please see our [ACM TOMS](https://toms.acm.org/) journal article submission ([current draft](http://www.cs.utexas.edu/users/flame/pubs/blis7_toms_rev0.pdf)). * **BLIS now implements the 1m method.** Let's face it: writing complex assembly `gemm` microkernels for a new architecture is never a priority--and now, it almost never needs to be. The 1m method leverages existing real domain `gemm` microkernels to implement all complex domain level-3 operations. For more details, please see our [ACM TOMS](https://toms.acm.org/) journal article submission ([current draft](http://www.cs.utexas.edu/users/flame/pubs/blis6_toms_rev2.pdf)). What People Are Saying About BLIS --------------------------------- *["This is an awesome library."](https://github.com/flame/blis/issues/288#issuecomment-447488637)* ... *["I want to thank you and the blis team for your efforts."](https://github.com/flame/blis/issues/288#issuecomment-448074704)* ([@Lephar](https://github.com/Lephar)) *["Any time somebody outside Intel beats MKL by a nontrivial amount, I report it to the MKL team. It is fantastic for any open-source project to get within 10% of MKL... [T]his is why Intel funds BLIS development."](https://github.com/flame/blis/issues/264#issuecomment-428673275)* ([@jeffhammond](https://github.com/jeffhammond)) *["So BLIS is now a part of Elk."](https://github.com/flame/blis/issues/267#issuecomment-429303902)* ... *["We have found that zgemm applied to a 15000x15000 matrix with multi-threaded BLIS on a 32-core Ryzen 2990WX processor is about twice as fast as MKL"](https://github.com/flame/blis/issues/264#issuecomment-428373946)* ... *["I'm starting to like this a lot."](https://github.com/flame/blis/issues/264#issuecomment-428926191)* ([@jdk2016](https://github.com/jdk2016)) *["I [found] BLIS because I was looking for BLAS operations on C-ordered arrays for NumPy. BLIS has that, but even better is the fact that it's developed in the open using a more modern language than Fortran."](https://github.com/flame/blis/issues/254#issuecomment-423838345)* ([@nschloe](https://github.com/nschloe)) *["The specific reason to have BLIS included [in Linux distributions] is the KNL and SKX [AVX-512] BLAS support, which OpenBLAS doesn't have."](https://github.com/flame/blis/issues/210#issuecomment-393126303)* ([@loveshack](https://github.com/loveshack)) *["All tests pass without errors on OpenBSD. Thanks!"](https://github.com/flame/blis/issues/202#issuecomment-389691543)* ([@ararslan](https://github.com/ararslan)) *["Thank you very much for your great help!... Looking forward to benchmarking."](https://github.com/flame/blis/issues/180#issuecomment-375895449)* ([@mrader1248](https://github.com/mrader1248)) *["Thanks for the beautiful work."](https://github.com/flame/blis/issues/163#issue-286575452)* ([@mmrmo](https://github.com/mmrmo)) *["[M]y software currently uses BLIS for its BLAS interface..."](https://github.com/flame/blis/issues/129#issuecomment-302904805)* ([@ShadenSmith](https://github.com/ShadenSmith)) *["[T]hanks so much for your work on this! Excited to test."](https://github.com/flame/blis/issues/129#issuecomment-341565071)* ... *["[On AMD Excavator], BLIS is competitive to / slightly faster than OpenBLAS for dgemms in my tests."](https://github.com/flame/blis/issues/129#issuecomment-341608673)* ([@iotamudelta](https://github.com/iotamudelta)) *["BLIS provided the only viable option on KNL, whose ecosystem is at present dominated by blackbox toolchains. Thanks again. Keep on this great work."](https://github.com/flame/blis/issues/116#issuecomment-281225101)* ([@heroxbd](https://github.com/heroxbd)) *["I want to definitely try this out..."](https://github.com/flame/blis/issues/12#issuecomment-48086295)* ([@ViralBShah](https://github.com/ViralBShah)) Key Features ------------ BLIS offers several advantages over traditional BLAS libraries: * **Portability that doesn't impede high performance.** Portability was a top priority of ours when creating BLIS. With virtually no additional effort on the part of the developer, BLIS is configurable as a fully-functional reference implementation. But more importantly, the framework identifies and isolates a key set of computational kernels which, when optimized, immediately and automatically optimize performance across virtually all level-2 and level-3 BLIS operations. In this way, the framework acts as a productivity multiplier. And since the optimized (non-portable) code is compartmentalized within these few kernels, instantiating a high-performance BLIS library on a new architecture is a relatively straightforward endeavor. * **Generalized matrix storage.** The BLIS framework exports interfaces that allow one to specify both the row stride and column stride of a matrix. This allows one to compute with matrices stored in column-major order, row-major order, or by general stride. (This latter storage format is important for those seeking to implement tensor contractions on multidimensional arrays.) Furthermore, since BLIS tracks stride information for each matrix, operands of different storage formats can be used within the same operation invocation. By contrast, BLAS requires column-major storage. And while the CBLAS interface supports row-major storage, it does not allow mixing storage formats. * **Rich support for the complex domain.** BLIS operations are developed and expressed in their most general form, which is typically in the complex domain. These formulations then simplify elegantly down to the real domain, with conjugations becoming no-ops. Unlike the BLAS, all input operands in BLIS that allow transposition and conjugate-transposition also support conjugation (without transposition), which obviates the need for thread-unsafe workarounds. Also, where applicable, both complex symmetric and complex Hermitian forms are supported. (BLAS omits some complex symmetric operations, such as `symv`, `syr`, and `syr2`.) Another great example of BLIS serving as a portability lever is its implementation of the 1m method for complex matrix multiplication, a novel mechanism of providing high-performance complex level-3 operations using only real domain microkernels. This new innovation guarantees automatic level-3 support in the complex domain even when the kernel developers entirely forgo writing complex kernels. * **Advanced multithreading support.** BLIS allows multiple levels of symmetric multithreading for nearly all level-3 operations. (Currently, users may choose to obtain parallelism via either OpenMP or POSIX threads). This means that matrices may be partitioned in multiple dimensions simultaneously to attain scalable, high-performance parallelism on multicore and many-core architectures. The key to this innovation is a thread-specific control tree infrastructure which encodes information about the logical thread topology and allows threads to query and communicate data amongst one another. BLIS also employs so-called "quadratic partitioning" when computing dimension sub-ranges for each thread, so that arbitrary diagonal offsets of structured matrices with unreferenced regions are taken into account to achieve proper load balance. More recently, BLIS introduced a runtime abstraction to specify parallelism on a per-call basis, which is useful for applications that want to handle most of the parallelism. * **Ease of use.** The BLIS framework, and the library of routines it generates, are easy to use for end users, experts, and vendors alike. An optional BLAS compatibility layer provides application developers with backwards compatibility to existing BLAS-dependent codes. Or, one may adjust or write their application to take advantage of new BLIS functionality (such as generalized storage formats or additional complex operations) by calling one of BLIS's native APIs directly. BLIS's typed API will feel familiar to many veterans of BLAS since these interfaces use BLAS-like calling sequences. And many will find BLIS's object-based APIs a delight to use when customizing or writing their own BLIS operations. (Objects are relatively lightweight `structs` and passed by address, which helps tame function calling overhead.) * **Multilayered API, exposed kernels, and sandboxes.** The BLIS framework exposes its implementations in various layers, allowing expert developers to access exactly the functionality desired. This layered interface includes that of the lowest-level kernels, for those who wish to bypass the bulk of the framework. Optimizations can occur at various levels, in part thanks to exposed packing and unpacking facilities, which by default are highly parameterized and flexible. And more recently, BLIS introduced sandboxes--a way to provide alternative implementations of `gemm` that do not use any more of the BLIS infrastructure than is desired. Sandboxes provide a convenient and straightforward way of modifying the `gemm` implementation without disrupting any other level-3 operation or any other part of the framework. This works especially well when the developer wants to experiment with new optimizations or try a different algorithm. * **Functionality that grows with the community's needs.** As its name suggests, the BLIS framework is not a single library or static API, but rather a nearly-complete template for instantiating high-performance BLAS-like libraries. Furthermore, the framework is extensible, allowing developers to leverage existing components to support new operations as they are identified. If such operations require new kernels for optimal efficiency, the framework and its APIs will be adjusted and extended accordingly. * **Code re-use.** Auto-generation approaches to achieving the aforementioned goals tend to quickly lead to code bloat due to the multiple dimensions of variation supported: operation (i.e. `gemm`, `herk`, `trmm`, etc.); parameter case (i.e. side, [conjugate-]transposition, upper/lower storage, unit/non-unit diagonal); datatype (i.e. single-/double-precision real/complex); matrix storage (i.e. row-major, column-major, generalized); and algorithm (i.e. partitioning path and kernel shape). These "brute force" approaches often consider and optimize each operation or case combination in isolation, which is less than ideal when the goal is to provide entire libraries. BLIS was designed to be a complete framework for implementing basic linear algebra operations, but supporting this vast amount of functionality in a manageable way required a holistic design that employed careful abstractions, layering, and recycling of generic (highly parameterized) codes, subject to the constraint that high performance remain attainable. * **A foundation for mixed domain and/or mixed precision operations.** BLIS was designed with the hope of one day allowing computation on real and complex operands within the same operation. Similarly, we wanted to allow mixing operands' numerical domains, floating-point precisions, or both domain and precision, and to optionally compute in a precision different than one or both operands' storage precisions. This feature has been implemented for the general matrix multiplication (`gemm`) operation, providing 128 different possible type combinations, which, when combined with existing transposition, conjugation, and storage parameters, enables 55,296 different `gemm` use cases. For more details, please see the documentation on [mixed datatype](docs/MixedDatatypes.md) support and/or our [ACM TOMS](https://toms.acm.org/) journal paper on mixed-domain/mixed-precision `gemm` ([linked below](#citations)). How to Download BLIS -------------------- There are a few ways to download BLIS. We list the most common four ways below. We **highly recommend** using either Option 1 or 2. Otherwise, we recommend Option 3 (over Option 4) so your compiler can perform optimizations specific to your hardware. 1. **Download a source repository with `git clone`.** Generally speaking, we prefer using `git clone` to clone a `git` repository. Having a repository allows the user to periodically pull in the latest changes and quickly rebuild BLIS whenever they wish. Also, implicit in cloning a repository is that the repository defaults to using the `master` branch, which contains the latest "stable" commits since the most recent release. (This is in contrast to Option 3 in which the user is opting for code that may be slightly out of date.) In order to clone a `git` repository of BLIS, please obtain a repository URL by clicking on the green button above the file/directory listing near the top of this page (as rendered by GitHub). Generally speaking, it will amount to executing the following command in your terminal shell: ``` git clone https://github.com/flame/blis.git ``` 2. **Download a source repository via a zip file.** If you are uncomfortable with using `git` but would still like the latest stable commits, we recommend that you download BLIS as a zip file. In order to download a zip file of the BLIS source distribution, please click on the green button above the file listing near the top of this page. This should reveal a link for downloading the zip file. 3. **Download a source release via a tarball/zip file.** Alternatively, if you would like to stick to the code that is included in official releases, you may download either a tarball or zip file of any of BLIS's previous [tagged releases](https://github.com/flame/blis/releases). We consider this option to be less than ideal for most people since it will likely mean you miss out on the latest bugfix or feature commits (in contrast to Options 1 or 2), and you also will not be able to update your code with a simple `git pull` command (in contrast to Option 1). 4. **Download a binary package specific to your OS.** While we don't recommend this as the first choice for most users, we provide links to community members who generously maintain BLIS packages for various Linux distributions such as Debian Unstable and EPEL/Fedora. Please see the [External Packages](#external-packages) section below for more information. Getting Started --------------- *NOTE: This section assumes you've either cloned a BLIS source code repository via `git`, downloaded the latest source code via a zip file, or downloaded the source code for a tagged version release---Options 1, 2, or 3, respectively, as discussed in [the previous section](#how-to-download-blis).* If you just want to build a sequential (not parallelized) version of BLIS in a hurry and come back and explore other topics later, you can configure and build BLIS as follows: ``` $ ./configure auto $ make [-j] ``` You can then verify your build by running BLAS- and BLIS-specific test drivers via `make check`: ``` $ make check [-j] ``` And if you would like to install BLIS to the directory specified to `configure` via the `--prefix` option, run the `install` target: ``` $ make install ``` Please read the output of `./configure --help` for a full list of configure-time options. If/when you have time, we *strongly* encourage you to read the detailed walkthrough of the build system found in our [Build System](docs/BuildSystem.md) guide. Documentation ------------- We provide extensive documentation on the BLIS build system, APIs, test infrastructure, and other important topics. All documentation is formatted in markdown and included in the BLIS source distribution (usually in the `docs` directory). Slightly longer descriptions of each document may be found via in the project's [wiki](https://github.com/flame/blis/wiki) section. **Documents for everyone:** * **[Build System](docs/BuildSystem.md).** This document covers the basics of configuring and building BLIS libraries, as well as related topics. * **[Testsuite](docs/Testsuite.md).** This document describes how to run BLIS's highly parameterized and configurable test suite, as well as the included BLAS test drivers. * **[BLIS Typed API Reference](docs/BLISTypedAPI.md).** Here we document the so-called "typed" (or BLAS-like) API. This is the API that many users who are already familiar with the BLAS will likely want to use. You can find lots of example code for the typed API in the [examples/tapi](examples/tapi) directory included in the BLIS source distribution. * **[BLIS Object API Reference](docs/BLISObjectAPI.md).** Here we document the object API. This is API abstracts away properties of vectors and matrices within `obj_t` structs that can be queried with accessor functions. Many developers and experts prefer this API over the typed API. You can find lots of example code for the object API in the [examples/oapi](examples/oapi) directory included in the BLIS source distribution. * **[Hardware Support](docs/HardwareSupport.md).** This document maintains a table of supported microarchitectures. * **[Multithreading](docs/Multithreading.md).** This document describes how to use the multithreading features of BLIS. * **[Mixed-Datatypes](docs/MixedDatatypes.md).** This document provides an overview of BLIS's mixed-datatype functionality and provides a brief example of how to take advantage of this new code. * **[Performance](docs/Performance.md).** This document reports empirically measured performance of a representative set of level-3 operations on a variety of hardware architectures, as implemented within BLIS and other BLAS libraries for all four of the standard floating-point datatypes. * **[PerformanceSmall](docs/PerformanceSmall.md).** This document reports empirically measured performance of `gemm` on select hardware architectures within BLIS and other BLAS libraries when performing matrix problems where one or two dimensions is exceedingly small. * **[Release Notes](docs/ReleaseNotes.md).** This document tracks a summary of changes included with each new version of BLIS, along with contributor credits for key features. * **[Frequently Asked Questions](docs/FAQ.md).** If you have general questions about BLIS, please read this FAQ. If you can't find the answer to your question, please feel free to join the [blis-devel](https://groups.google.com/group/blis-devel) mailing list and post a question. We also have a [blis-discuss](https://groups.google.com/group/blis-discuss) mailing list that anyone can post to (even without joining). **Documents for github contributors:** * **[Contributing bug reports, feature requests, PRs, etc](CONTRIBUTING.md).** Interested in contributing to BLIS? Please read this document before getting started. It provides a general overview of how best to report bugs, propose new features, and offer code patches. * **[Coding Conventions](docs/CodingConventions.md).** If you are interested or planning on contributing code to BLIS, please read this document so that you can format your code in accordance with BLIS's standards. **Documents for BLIS developers:** * **[Kernels Guide](docs/KernelsHowTo.md).** If you would like to learn more about the types of kernels that BLIS exposes, their semantics, the operations that each kernel accelerates, and various implementation issues, please read this guide. * **[Configuration Guide](docs/ConfigurationHowTo.md).** If you would like to learn how to add new sub-configurations or configuration families, or are simply interested in learning how BLIS organizes its configurations and kernel sets, please read this thorough walkthrough of the configuration system. * **[Sandbox Guide](docs/Sandboxes.md).** If you are interested in learning about using sandboxes in BLIS--that is, providing alternative implementations of the `gemm` operation--please read this document. External Packages ----------------- Generally speaking, we **highly recommend** building from source whenever possible using the latest `git` clone. (Tarballs of each [tagged release](https://github.com/flame/blis/releases) are also available, but we consider them to be less ideal since they are not as easy to upgrade as `git` clones.) That said, some users may prefer binary and/or source packages through their Linux distribution. Thanks to generous involvement/contributions from our community members, the following BLIS packages are now available: * **Debian**. [M. Zhou](https://github.com/cdluminate) has volunteered to sponsor and maintain BLIS packages within the Debian Linux distribution. The Debian package tracker can be found [here](https://tracker.debian.org/pkg/blis). (Also, thanks to [Nico Schlömer](https://github.com/nschloe) for previously volunteering his time to set up a standalone PPA.) * **EPEL/Fedora**. There are official BLIS packages in Fedora and EPEL (for RHEL7+ and compatible distributions) with versions for 64-bit integers, OpenMP, and pthreads, and shims which can be dynamically linked instead of reference BLAS. (NOTE: For architectures other than intel64, amd64, and maybe arm64, the performance of packaged BLIS will be low because it uses unoptimized generic kernels; for those architectures, [OpenBLAS](https://github.com/xianyi/OpenBLAS) may be a better solution.) [Dave Love](https://github.com/loveshack) provides additional packages for EPEL6 in a [Fedora Copr](https://copr.fedorainfracloud.org/coprs/loveshack/blis/), and possibly versions more recent than the official repo for other EPEL/Fedora releases. The source packages may build on other rpm-based distributions. * **OpenSuSE**. The copr referred to above has rpms for some OpenSuSE releases; the source rpms may build for others. * **GNU Guix**. Guix has BLIS packages, provides builds only for the generic target and some specific x86_64 micro-architectures. * **Conda**. conda channel [conda-forge](https://github.com/conda-forge/blis-feedstock) has Linux, OSX and Windows binary packages for x86_64. Discussion ---------- You can keep in touch with developers and other users of the project by joining one of the following mailing lists: * [blis-devel](https://groups.google.com/group/blis-devel): Please join and post to this mailing list if you are a BLIS developer, or if you are trying to use BLIS beyond simply linking to it as a BLAS library. **Note:** Most of the interesting discussions happen here; don't be afraid to join! If you would like to submit a bug report, or discuss a possible bug, please consider opening a [new issue](https://github.com/flame/blis/issues) on github. * [blis-discuss](https://groups.google.com/group/blis-discuss): Please join and post to this mailing list if you have general questions or feedback regarding BLIS. Application developers (end users) may wish to post here, unless they have bug reports, in which case they should open a [new issue](https://github.com/flame/blis/issues) on github. Contributing ------------ For information on how to contribute to our project, including preferred [coding conventions](docs/CodingConventions), please refer to the [CONTRIBUTING](CONTRIBUTING.md) file at the top-level of the BLIS source distribution. Citations --------- For those of you looking for the appropriate article to cite regarding BLIS, we recommend citing our [first ACM TOMS journal paper](http://dl.acm.org/authorize?N91172) ([unofficial backup link](http://www.cs.utexas.edu/users/flame/pubs/blis1_toms_rev3.pdf)): ``` @article{BLIS1, author = {Field G. {V}an~{Z}ee and Robert A. {v}an~{d}e~{G}eijn}, title = {{BLIS}: A Framework for Rapidly Instantiating {BLAS} Functionality}, journal = {ACM Transactions on Mathematical Software}, volume = {41}, number = {3}, pages = {14:1--14:33}, month = jun, year = {2015}, issue_date = {June 2015}, url = {http://doi.acm.org/10.1145/2764454}, } ``` You may also cite the [second ACM TOMS journal paper](http://dl.acm.org/authorize?N16240) ([unofficial backup link](http://www.cs.utexas.edu/users/flame/pubs/blis2_toms_rev3.pdf)): ``` @article{BLIS2, author = {Field G. {V}an~{Z}ee and Tyler Smith and Francisco D. Igual and Mikhail Smelyanskiy and Xianyi Zhang and Michael Kistler and Vernon Austel and John Gunnels and Tze Meng Low and Bryan Marker and Lee Killough and Robert A. {v}an~{d}e~{G}eijn}, title = {The {BLIS} Framework: Experiments in Portability}, journal = {ACM Transactions on Mathematical Software}, volume = {42}, number = {2}, pages = {12:1--12:19}, month = jun, year = {2016}, issue_date = {June 2016}, url = {http://doi.acm.org/10.1145/2755561}, } ``` We also have a third paper, submitted to IPDPS 2014, on achieving [multithreaded parallelism in BLIS](http://www.cs.utexas.edu/users/flame/pubs/blis3_ipdps14.pdf): ``` @inproceedings{BLIS3, author = {Tyler M. Smith and Robert A. {v}an~{d}e~{G}eijn and Mikhail Smelyanskiy and Jeff R. Hammond and Field G. {V}an~{Z}ee}, title = {Anatomy of High-Performance Many-Threaded Matrix Multiplication}, booktitle = {28th IEEE International Parallel \& Distributed Processing Symposium (IPDPS 2014)}, year = 2014, } ``` A fourth paper, submitted to ACM TOMS, also exists, which proposes an [analytical model](http://dl.acm.org/citation.cfm?id=2925987) ([unofficial backup link](http://www.cs.utexas.edu/users/flame/pubs/TOMS-BLIS-Analytical.pdf)) for determining blocksize parameters in BLIS: ``` @article{BLIS4, author = {Tze Meng Low and Francisco D. Igual and Tyler M. Smith and Enrique S. Quintana-Ort\'{\i}}, title = {Analytical Modeling Is Enough for High-Performance {BLIS}}, journal = {ACM Transactions on Mathematical Software}, volume = {43}, number = {2}, pages = {12:1--12:18}, month = aug, year = {2016}, issue_date = {August 2016}, url = {http://doi.acm.org/10.1145/2925987}, } ``` A fifth paper, submitted to ACM TOMS, begins the study of so-called [induced methods for complex matrix multiplication](http://www.cs.utexas.edu/users/flame/pubs/blis5_toms_rev2.pdf): ``` @article{BLIS5, author = {Field G. {V}an~{Z}ee and Tyler Smith}, title = {Implementing High-performance Complex Matrix Multiplication via the 3m and 4m Methods}, journal = {ACM Transactions on Mathematical Software}, volume = {44}, number = {1}, pages = {7:1--7:36}, month = jul, year = {2017}, issue_date = {July 2017}, url = {http://doi.acm.org/10.1145/3086466}, } ``` A sixth paper, submitted to ACM TOMS, revisits the topic of the previous article and derives a [superior induced method](http://www.cs.utexas.edu/users/flame/pubs/blis6_sisc_rev1.pdf): ``` @article{BLIS6, author = {Field G. {V}an~{Z}ee}, title = {Implementing High-Performance Complex Matrix Multiplication via the 1m Method}, journal = {SIAM Journal on Scientific Computing}, note = {submitted} } ``` A seventh paper, submitted to ACM TOMS, explores the implementation of `gemm` for [mixed-domain and/or mixed-precision](http://www.cs.utexas.edu/users/flame/pubs/blis7_toms_rev0.pdf) operands: ``` @article{BLIS7, author = {Field G. {V}an~{Z}ee and Devangi N. Parikh and Robert A. van~de~{G}eijn}, title = {Supporting Mixed-domain Mixed-precision Matrix Multiplication within the BLIS Framework}, journal = {ACM Transactions on Mathematical Software}, note = {submitted} } ``` Funding ------- This project and its associated research were partially sponsored by grants from [Microsoft](http://www.microsoft.com/), [Intel](http://www.intel.com/), [Texas Instruments](http://www.ti.com/), [AMD](http://www.amd.com/), [Oracle](http://www.oracle.com/), [Huawei](http://www.huawei.com/), and [Facebook](http://www.facebook.com/), as well as grants from the [National Science Foundation](http://www.nsf.gov/) (Awards CCF-0917167, ACI-1148125/1340293, CCF-1320112, and ACI-1550493). _Any opinions, findings and conclusions or recommendations expressed in this material are those of the author(s) and do not necessarily reflect the views of the National Science Foundation (NSF)._ blis-0.6.1/RELEASING000066400000000000000000000021661360743507500137220ustar00rootroot00000000000000Here are the steps to follow to create a new release (version) of BLIS: 1. Make sure there are no commits that have yet to be pulled into local repository. $ git pull If there are any commits upstream, merge them as appropriate. 2. Verify that the code builds properly. $ ./configure auto; make 3. Verify that the code passes BLIS and BLAS tests: $ make check # BLIS testsuite (fast) + BLAS test drivers $ make checkblis # BLIS testsuite (full ex. mixed-datatype) $ make checkblis-md # BLIS testsuite (mixed-datatype only) $ make checkblis-salt # BLIS testsuite (fast + salt) 4. Draft a new announcement to blis-devel, crediting those who contributed towards this version by browsing 'git log'. 5. Update CREDITS file if 'git log' reveals any new contributors. 6. Update docs/ReleaseNotes.md file with body of finalized announcement and the date of the release. 7. Bump the version number: $ ./build/bump-version.sh "0.3.2" 8. Push the new commits and new tag associated with the new version: $ git push $ git push --tag 9. Send finalized announcement to blis-devel. blis-0.6.1/blastest/000077500000000000000000000000001360743507500143025ustar00rootroot00000000000000blis-0.6.1/blastest/Makefile000066400000000000000000000165471360743507500157570ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # # Makefile # # Field G. Van Zee # # Makefile for BLAS test drivers. # # # --- Makefile PHONY target definitions ---------------------------------------- # .PHONY: all f2c bin \ clean cleanf2c cleanobj cleanbin cleanout \ check-env check-env-mk check-env-fragments check-env-make-defs \ run check # # --- Determine makefile fragment location ------------------------------------- # # Comments: # - DIST_PATH is assumed to not exist if BLIS_INSTALL_PATH is given. # - We must use recursively expanded assignment for LIB_PATH and INC_PATH in # the second case because CONFIG_NAME is not yet set. ifneq ($(strip $(BLIS_INSTALL_PATH)),) LIB_PATH := $(BLIS_INSTALL_PATH)/lib INC_PATH := $(BLIS_INSTALL_PATH)/include/blis SHARE_PATH := $(BLIS_INSTALL_PATH)/share/blis else DIST_PATH := .. LIB_PATH = ../lib/$(CONFIG_NAME) INC_PATH = ../include/$(CONFIG_NAME) SHARE_PATH := .. endif # # --- Include common makefile definitions -------------------------------------- # # Include the common makefile fragment. -include $(SHARE_PATH)/common.mk # # --- General build definitions ------------------------------------------------ # TEST_OBJ_PATH := obj F2C_LIB := libf2c.a F2C_PATH := f2c DRIVER_PATH := src BLIS_H_PATH := $(BUILD_PATH)/$(BASE_INC_PATH) INPUT_DIR := input # Gather all local object files. F2C_OBJS := $(sort $(patsubst $(F2C_PATH)/%.c, \ $(TEST_OBJ_PATH)/%.o, \ $(wildcard $(F2C_PATH)/*.c))) DRIVER_OBJS := $(sort $(patsubst $(DRIVER_PATH)/%.c, \ $(TEST_OBJ_PATH)/%.o, \ $(wildcard $(DRIVER_PATH)/*.c))) # Extract base names for each test driver file. DRIVER_BASES := $(basename $(notdir $(DRIVER_OBJS))) # Binary executable names. DRIVER_BINS := $(addsuffix .x,$(DRIVER_BASES)) # Binary run-rule names DRIVER_BINS_R := $(addprefix run-,$(DRIVER_BASES)) # Filter level-1, level-2, and level-3 names to different variables. DRIVER1_BASES := $(filter %1,$(DRIVER_BASES)) DRIVER2_BASES := $(filter %2,$(DRIVER_BASES)) DRIVER3_BASES := $(filter %3,$(DRIVER_BASES)) # The location of the script that checks the BLAS test output. #BLASTEST_CHECK := $(DIST_PATH)/$(BUILD_DIR)/check-blastest.sh # Override the value of CINCFLAGS so that the value of CFLAGS returned by # get-user-cflags-for() is not cluttered up with include paths needed only # while building BLIS. CINCFLAGS := -I$(INC_PATH) # Use the CFLAGS for the configuration family. CFLAGS := $(call get-user-cflags-for,$(CONFIG_NAME)) # Suppress warnings about uninitialized functions, add local header # paths and the path to blis.h to CFLAGS. CFLAGS += -Wno-maybe-uninitialized -Wno-parentheses -Wfatal-errors \ -I$(F2C_PATH) \ -I$(INC_PATH) -DHAVE_BLIS_H # Locate the libblis library to which we will link. #LIBBLIS_LINK := $(LIB_PATH)/$(LIBBLIS_L) # Override the location of the check-blastest.sh script. #BLASTEST_CHECK := ./check-blastest.sh TESTSUITE_WRAPPER ?= # # --- Targets/rules ------------------------------------------------------------ # # --- Primary targets --- all: check-env f2c bin f2c: check-env $(F2C_LIB) bin: check-env $(DRIVER_BINS) # --Object file rules -- $(TEST_OBJ_PATH)/%.o: $(F2C_PATH)/%.c ifeq ($(ENABLE_VERBOSE),yes) $(CC) $(CFLAGS) -c $< -o $@ else @echo "Compiling $@" @$(CC) $(CFLAGS) -c $< -o $@ endif $(TEST_OBJ_PATH)/%.o: $(DRIVER_PATH)/%.c ifeq ($(ENABLE_VERBOSE),yes) $(CC) $(CFLAGS) -c $< -o $@ else @echo "Compiling $@" @$(CC) $(CFLAGS) -c $< -o $@ endif # -- libf2c library archive rule -- $(F2C_LIB): $(F2C_OBJS) ifeq ($(ENABLE_VERBOSE),yes) $(AR) $(ARFLAGS) $@ $? $(RANLIB) $@ else @echo "Archiving $@" @$(AR) $(ARFLAGS) $@ $? @$(RANLIB) $@ endif # -- Executable file rules -- # first argument: the base name of the BLAS test driver. define make-blat-rule $(1).x: $(TEST_OBJ_PATH)/$(1).o $(F2C_LIB) $(LIBBLIS_LINK) ifeq ($(ENABLE_VERBOSE),yes) $(LINKER) $(TEST_OBJ_PATH)/$(1).o $(F2C_OBJS) $(LIBBLIS_LINK) $(LDFLAGS) -o $$@ else @echo "Linking $$@ against '$(F2C_LIB) $(LIBBLIS_LINK) $(LDFLAGS)'" @$(LINKER) $(TEST_OBJ_PATH)/$(1).o $(F2C_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $$@ endif endef # Instantiate the rule above for each driver file. $(foreach name, $(DRIVER_BASES), $(eval $(call make-blat-rule,$(name)))) # -- Test run rules -- run: $(DRIVER_BINS_R) # A rule to run ?blat1.x driver files. define make-run-blat1-rule run-$(1): $(1).x ifeq ($(ENABLE_VERBOSE),yes) $(TESTSUITE_WRAPPER) ./$(1).x > out.$(1) else @echo "Running $(1).x > 'out.$(1)'" @$(TESTSUITE_WRAPPER) ./$(1).x > out.$(1) endif endef # Instantiate the rule above for each level-1 driver file. $(foreach name, $(DRIVER1_BASES), $(eval $(call make-run-blat1-rule,$(name)))) # A rule to run ?blat2.x and ?blat3.x driver files. define make-run-blat23-rule run-$(1): $(1).x ifeq ($(ENABLE_VERBOSE),yes) $(TESTSUITE_WRAPPER) ./$(1).x < $(INPUT_DIR)/$(1).in else @echo "Running $(1).x < '$(INPUT_DIR)/$(1).in' (output to 'out.$(1)')" @$(TESTSUITE_WRAPPER) ./$(1).x < $(INPUT_DIR)/$(1).in endif endef # Instantiate the rule above for each level-2 driver file. $(foreach name, $(DRIVER2_BASES), $(eval $(call make-run-blat23-rule,$(name)))) # Instantiate the rule above for each level-3 driver file. $(foreach name, $(DRIVER3_BASES), $(eval $(call make-run-blat23-rule,$(name)))) check: run ifeq ($(ENABLE_VERBOSE),yes) - $(BLASTEST_CHECK) else @- $(BLASTEST_CHECK) endif # -- Clean rules -- cleanf2c: - $(RM_F) $(F2C_OBJS) $(F2C_LIB) cleanobj: - $(RM_F) $(DRIVER_OBJS) cleanbin: - $(RM_F) $(DRIVER_BINS) cleanout: - $(RM_F) $(addprefix out.,$(DRIVER_BASES)) clean: cleanf2c cleanobj cleanbin cleanout blis-0.6.1/blastest/check-blastest.sh000077500000000000000000000042141360743507500175360ustar00rootroot00000000000000#!/bin/sh # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2018, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # script_name=${0##*/} ansi_red="\033[0;31m" ansi_green="\033[0;32m" ansi_normal="\033[0m" passmsg="All BLAS tests passed!" failmsg0="At least one BLAS test failed. :(" failmsg1="Please see out.* files for details." grep -q '\*\*\*\*' ./out.* if [ $? -eq 0 ]; then printf "${ansi_red}""${script_name}: ${failmsg0}""${ansi_normal}\n" printf "${ansi_red}""${script_name}: ${failmsg1}""${ansi_normal}\n" exit 1 else printf "${ansi_green}""${script_name}: ${passmsg}""${ansi_normal}\n" exit 0 fi blis-0.6.1/blastest/f2c/000077500000000000000000000000001360743507500147545ustar00rootroot00000000000000blis-0.6.1/blastest/f2c/abs.c000066400000000000000000000041311360743507500156640ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif /* Integer */ shortint h_abs(const shortint *x) { return ( shortint )( *x >= 0 ? (*x) : (- *x) ); //return ( shortint )abs( ( int )*x ); } integer i_abs(const integer *x) { return ( integer )( *x >= 0 ? (*x) : (- *x) ); //return ( integer )abs( ( int )*x ); } /* Double */ double r_abs(real *x) { return ( double )( *x >= 0 ? (*x) : (- *x) ); //return ( double )fabsf( ( float )*x ); } double d_abs(const doublereal *x) { return ( double )( *x >= 0 ? (*x) : (- *x) ); //return ( double )fabs( ( double )*x ); } /* Complex */ double c_abs(const complex *z) { return ( double )hypot(z->r, z->i); } double z_abs(const doublecomplex *z) { return ( double )hypot(z->r, z->i); } #ifdef __cplusplus } #endif blis-0.6.1/blastest/f2c/acos.c000066400000000000000000000026441360743507500160530ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif double r_acos(real *x) { return( acos(*x) ); } double d_acos(const doublereal *x) { return( acos(*x) ); } #ifdef __cplusplus } #endif blis-0.6.1/blastest/f2c/arith.h000066400000000000000000000042761360743507500162450ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #ifndef F2C_ARITH_H #define F2C_ARITH_H #include #include #ifdef _MSC_VER #define isnan _isnan #define isinf(x) (!_finite(x)) #endif #ifndef isnan # define isnan(x) \ (sizeof (x) == sizeof (long double) ? isnan_ld (x) \ : sizeof (x) == sizeof (double) ? isnan_d (x) \ : isnan_f (x)) static inline int isnan_f (float x) { return x != x; } static inline int isnan_d (double x) { return x != x; } static inline int isnan_ld (long double x) { return x != x; } #endif #ifndef isinf # define isinf(x) \ (sizeof (x) == sizeof (long double) ? isinf_ld (x) \ : sizeof (x) == sizeof (double) ? isinf_d (x) \ : isinf_f (x)) static inline int isinf_f (float x) { return !isnan (x) && isnan (x - x); } static inline int isinf_d (double x) { return !isnan (x) && isnan (x - x); } static inline int isinf_ld (long double x) { return !isnan (x) && isnan (x - x); } #endif #ifndef signbit #define signbit(x) (((x) < 0)? 1 : 0) #endif #endif blis-0.6.1/blastest/f2c/asin.c000066400000000000000000000026441360743507500160600ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif double r_asin(real *x) { return( asin(*x) ); } double d_asin(const doublereal *x) { return( asin(*x) ); } #ifdef __cplusplus } #endif blis-0.6.1/blastest/f2c/atan.c000066400000000000000000000026441360743507500160510ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif double r_atan(real *x) { return( atan(*x) ); } double d_atan(const doublereal *x) { return( atan(*x) ); } #ifdef __cplusplus } #endif blis-0.6.1/blastest/f2c/atn2.c000066400000000000000000000027121360743507500157660ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif double r_atn2(real *x, real *y) { return( atan2(*x,*y) ); } double d_atn2(const doublereal *x, const doublereal *y) { return( atan2(*x,*y) ); } #ifdef __cplusplus } #endif blis-0.6.1/blastest/f2c/close.c000066400000000000000000000046101360743507500162260ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include #include "f2c.h" #include "fio.h" #undef abs #undef min #undef max #include #if defined(NON_UNIX_STDIO) || defined(_MSC_VER) || defined(__MINGW32__) # include # define unlink remove #else # include #endif integer f_clos(cllist *a) { unit *b; if(a->cunit >= MXUNIT) return(0); b= &f__units[a->cunit]; if(b->ufd==NULL) goto done; if (b->uscrtch == 1) goto Delete; if (!a->csta) goto Keep; switch(*a->csta) { default: Keep: case 'k': case 'K': if(b->uwrt == 1) t_runc((alist *)a); if(b->ufnm) { fclose(b->ufd); free(b->ufnm); } break; case 'd': case 'D': Delete: fclose(b->ufd); if(b->ufnm) { unlink(b->ufnm); /*SYSDEP*/ free(b->ufnm); } } b->ufd=NULL; done: b->uend=0; b->ufnm=NULL; return(0); } void f_exit(void) { static int run = 0; int i; static cllist xx; /* Do not execute f_exit() twice */ if (run) return; run = 1; if (!xx.cerr) { xx.cerr=1; xx.csta=NULL; for(i=0;ii; r->r = z->r; r->i = -zi; } void r_cnjg(complex *r, complex *z) { real zi = z->i; r->r = z->r; r->i = -zi; } #ifdef __cplusplus } #endif blis-0.6.1/blastest/f2c/cos.c000066400000000000000000000033611360743507500157070ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif double r_cos(real *x) { return( cos(*x) ); } double d_cos(const doublereal *x) { return( cos(*x) ); } void c_cos(complex *r, complex *z) { double zi = z->i, zr = z->r; r->r = cos(zr) * cosh(zi); r->i = - sin(zr) * sinh(zi); } void z_cos(doublecomplex *r, doublecomplex *z) { double zi = z->i, zr = z->r; r->r = cos(zr) * cosh(zi); r->i = - sin(zr) * sinh(zi); } #ifdef __cplusplus } #endif blis-0.6.1/blastest/f2c/cosh.c000066400000000000000000000026451360743507500160630ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif double r_cosh(real *x) { return( cosh(*x) ); } double d_cosh(const doublereal *x) { return( cosh(*x) ); } #ifdef __cplusplus } #endif blis-0.6.1/blastest/f2c/dim.c000066400000000000000000000032611360743507500156730ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif shortint h_dim(const shortint *a, const shortint *b) { return( *a > *b ? *a - *b : 0); } integer i_dim(const integer *a, const integer *b) { return( *a > *b ? *a - *b : 0); } double r_dim(real *a, real *b) { return( *a > *b ? *a - *b : 0); } double d_dim(const doublereal *a, const doublereal *b) { return( *a > *b ? *a - *b : 0); } #ifdef __cplusplus } #endif blis-0.6.1/blastest/f2c/div.c000066400000000000000000000063631360743507500157120ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif void c_div(complex *c, complex *a, complex *b) { double ratio, den; double abr, abi, cr; if( (abr = b->r) < 0.) abr = - abr; if( (abi = b->i) < 0.) abi = - abi; if( abr <= abi ) { if(abi == 0) { #ifdef IEEE_COMPLEX_DIVIDE float af, bf; af = bf = abr; if (a->i != 0 || a->r != 0) af = 1.; c->i = c->r = af / bf; return; #else sig_die("complex division by zero", 1); #endif } ratio = (double)b->r / b->i ; den = b->i * (1 + ratio*ratio); cr = (a->r*ratio + a->i) / den; c->i = (a->i*ratio - a->r) / den; } else { ratio = (double)b->i / b->r ; den = b->r * (1 + ratio*ratio); cr = (a->r + a->i*ratio) / den; c->i = (a->i - a->r*ratio) / den; } c->r = cr; } void z_div(doublecomplex *c, doublecomplex *a, doublecomplex *b) { double ratio, den; double abr, abi, cr; if( (abr = b->r) < 0.) abr = - abr; if( (abi = b->i) < 0.) abi = - abi; if( abr <= abi ) { if(abi == 0) { #ifdef IEEE_COMPLEX_DIVIDE if (a->i != 0 || a->r != 0) abi = 1.; c->i = c->r = abi / abr; return; #else sig_die("complex division by zero", 1); #endif } ratio = b->r / b->i ; den = b->i * (1 + ratio*ratio); cr = (a->r*ratio + a->i) / den; c->i = (a->i*ratio - a->r) / den; } else { ratio = b->i / b->r ; den = b->r * (1 + ratio*ratio); cr = (a->r + a->i*ratio) / den; c->i = (a->i - a->r*ratio) / den; } c->r = cr; } #ifdef __cplusplus } #endif blis-0.6.1/blastest/f2c/dolio.c000066400000000000000000000025551360743507500162350ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include #include "f2c.h" #include "fio.h" integer do_lio(ftnint *type, ftnint *number, char *ptr, ftnlen len) { return((*f__lioproc)(number,ptr,len,*type)); } blis-0.6.1/blastest/f2c/endfile.c000066400000000000000000000066221360743507500165340ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include #include "f2c.h" #include "fio.h" #ifdef HAVE_FTRUNCATE #include #endif #undef abs #undef min #undef max #include "stdlib.h" #include "string.h" integer f_end(alist *a) { unit *b; FILE *tf; if(a->aunit>=MXUNIT || a->aunit<0) err(a->aerr,101,"endfile"); b = &f__units[a->aunit]; if(b->ufd==NULL) { char nbuf[10]; sprintf(nbuf,"fort.%ld",(long)a->aunit); if (tf = fopen(nbuf, f__w_mode[0])) fclose(tf); return(0); } b->uend=1; return(b->useek ? t_runc(a) : 0); } #if !defined(HAVE_FTRUNCATE) static int copy(FILE *from, register long len, FILE *to) { int len1; char buf[BUFSIZ]; while(fread(buf, len1 = len > BUFSIZ ? BUFSIZ : (int)len, 1, from)) { if (!fwrite(buf, len1, 1, to)) return 1; if ((len -= len1) <= 0) break; } return 0; } #endif /* !HAVE_FTRUNCATE */ int t_runc(alist *a) { OFF_T loc, len; unit *b; int rc; FILE *bf; #if !defined(HAVE_FTRUNCATE) FILE *tf; #endif b = &f__units[a->aunit]; if(b->url) return(0); /*don't truncate direct files*/ loc=FTELL(bf = b->ufd); FSEEK(bf,(OFF_T)0,SEEK_END); len=FTELL(bf); if (loc >= len || b->useek == 0) return(0); #ifndef HAVE_FTRUNCATE if (b->ufnm == NULL) return 0; rc = 0; fclose(b->ufd); if (!loc) { if (!(bf = fopen(b->ufnm, f__w_mode[b->ufmt]))) rc = 1; if (b->uwrt) b->uwrt = 1; goto done; } if (!(bf = fopen(b->ufnm, f__r_mode[0])) || !(tf = tmpfile())) { #ifdef NON_UNIX_STDIO bad: #endif rc = 1; goto done; } if (copy(bf, (long)loc, tf)) { bad1: rc = 1; goto done1; } if (!(bf = freopen(b->ufnm, f__w_mode[0], bf))) goto bad1; rewind(tf); if (copy(tf, (long)loc, bf)) goto bad1; b->uwrt = 1; b->urw = 2; #ifdef NON_UNIX_STDIO if (b->ufmt) { fclose(bf); if (!(bf = fopen(b->ufnm, f__w_mode[3]))) goto bad; FSEEK(bf,(OFF_T)0,SEEK_END); b->urw = 3; } #endif done1: fclose(tf); done: f__cf = b->ufd = bf; #else /* !HAVE_TRUNCATE */ if (b->urw & 2) fflush(b->ufd); /* necessary on some Linux systems */ rc = ftruncate(fileno(b->ufd), loc); /* The following FSEEK is unnecessary on some systems, */ /* but should be harmless. */ FSEEK(b->ufd, (OFF_T)0, SEEK_END); #endif /* HAVE_TRUNCATE */ if (rc) err(a->aerr,111,"endfile"); return 0; } blis-0.6.1/blastest/f2c/epsilon.c000066400000000000000000000026421360743507500165750ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #include "float.h" #ifdef __cplusplus extern "C" { #endif real s_epsilon_( real* x ) { return FLT_EPSILON; } doublereal d_epsilon_( doublereal* x ) { return DBL_EPSILON; } #ifdef __cplusplus } #endif blis-0.6.1/blastest/f2c/err.c000066400000000000000000000153711360743507500157170ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include #include #if defined(_MSC_VER) || defined(__MINGW32__) # include # include #else # ifdef HAVE_ISATTY # include # else # define isatty(x) 0 # endif #endif #include "f2c.h" #include "fio.h" #include "fmt.h" /* for struct syl */ /*global definitions*/ unit f__units[MXUNIT]; /*unit table*/ flag f__init; /*0 on entry, 1 after initializations*/ cilist *f__elist; /*active external io list*/ icilist *f__svic; /*active internal io list*/ flag f__reading; /*1 if reading, 0 if writing*/ flag f__cplus,f__cblank; const char *f__fmtbuf; flag f__external; /*1 if external io, 0 if internal */ int (*f__getn)(void); /* for formatted input */ void (*f__putn)(int); /* for formatted output */ int (*f__doed)(struct syl*, char*, ftnlen),(*f__doned)(struct syl*); int (*f__dorevert)(void),(*f__donewrec)(void),(*f__doend)(void); flag f__sequential; /*1 if sequential io, 0 if direct*/ flag f__formatted; /*1 if formatted io, 0 if unformatted*/ FILE *f__cf; /*current file*/ unit *f__curunit; /*current unit*/ int f__recpos; /*place in current record*/ OFF_T f__cursor, f__hiwater; int f__scale; char *f__icptr; /*error messages*/ const char *F_err[] = { "error in format", /* 100 */ "illegal unit number", /* 101 */ "formatted io not allowed", /* 102 */ "unformatted io not allowed", /* 103 */ "direct io not allowed", /* 104 */ "sequential io not allowed", /* 105 */ "can't backspace file", /* 106 */ "null file name", /* 107 */ "can't stat file", /* 108 */ "unit not connected", /* 109 */ "off end of record", /* 110 */ "truncation failed in endfile", /* 111 */ "incomprehensible list input", /* 112 */ "out of free space", /* 113 */ "unit not connected", /* 114 */ "read unexpected character", /* 115 */ "bad logical input field", /* 116 */ "bad variable type", /* 117 */ "bad namelist name", /* 118 */ "variable not in namelist", /* 119 */ "no end record", /* 120 */ "variable count incorrect", /* 121 */ "subscript for scalar variable", /* 122 */ "invalid array section", /* 123 */ "substring out of bounds", /* 124 */ "subscript out of bounds", /* 125 */ "can't read file", /* 126 */ "can't write file", /* 127 */ "'new' file exists", /* 128 */ "can't append to file", /* 129 */ "non-positive record number", /* 130 */ "nmLbuf overflow" /* 131 */ }; #define MAXERR (sizeof(F_err)/sizeof(char *)+100) #if defined(_MSC_VER) || defined(__MINGW32__) #undef isatty #define isatty _isatty #undef fileno #define fileno _fileno #endif int f__canseek(FILE *f) /*SYSDEP*/ { #ifdef NON_UNIX_STDIO return !isatty(fileno(f)); #else struct stat x; if (fstat(fileno(f),&x) < 0) return(0); #ifdef S_IFMT switch(x.st_mode & S_IFMT) { case S_IFDIR: case S_IFREG: if(x.st_nlink > 0) /* !pipe */ return(1); else return(0); case S_IFCHR: if(isatty(fileno(f))) return(0); return(1); #ifdef S_IFBLK case S_IFBLK: return(1); #endif } #else #ifdef S_ISDIR /* POSIX version */ if (S_ISREG(x.st_mode) || S_ISDIR(x.st_mode)) { if(x.st_nlink > 0) /* !pipe */ return(1); else return(0); } if (S_ISCHR(x.st_mode)) { if(isatty(fileno(f))) return(0); return(1); } if (S_ISBLK(x.st_mode)) return(1); #else Help! How does fstat work on this system? #endif #endif return(0); /* who knows what it is? */ #endif } void f__fatal(int n, const char *s) { if(n<100 && n>=0) perror(s); /*SYSDEP*/ else if(n >= (int)MAXERR || n < -1) { fprintf(stderr,"%s: illegal error number %d\n",s,n); } else if(n == -1) fprintf(stderr,"%s: end of file\n",s); else fprintf(stderr,"%s: %s\n",s,F_err[n-100]); if (f__curunit) { fprintf(stderr,"apparent state: unit %d ", (int)(f__curunit-f__units)); fprintf(stderr, f__curunit->ufnm ? "named %s\n" : "(unnamed)\n", f__curunit->ufnm); } else fprintf(stderr,"apparent state: internal I/O\n"); if (f__fmtbuf) fprintf(stderr,"last format: %s\n",f__fmtbuf); fprintf(stderr,"lately %s %s %s %s",f__reading?"reading":"writing", f__sequential?"sequential":"direct",f__formatted?"formatted":"unformatted", f__external?"external":"internal"); sig_die(" IO", 1); } void f_init(void) { unit *p; f__init=1; p= &f__units[0]; p->ufd=stderr; p->useek=f__canseek(stderr); p->ufmt=1; p->uwrt=1; p = &f__units[5]; p->ufd=stdin; p->useek=f__canseek(stdin); p->ufmt=1; p->uwrt=0; p= &f__units[6]; p->ufd=stdout; p->useek=f__canseek(stdout); p->ufmt=1; p->uwrt=1; } int f__nowreading(unit *x) { OFF_T loc; int ufmt, urw; if (x->urw & 1) goto done; if (!x->ufnm) goto cantread; ufmt = x->url ? 0 : x->ufmt; loc = FTELL(x->ufd); urw = 3; if (!freopen(x->ufnm, f__w_mode[ufmt|2], x->ufd)) { urw = 1; if(!freopen(x->ufnm, f__r_mode[ufmt], x->ufd)) { cantread: errno = 126; return 1; } } FSEEK(x->ufd,loc,SEEK_SET); x->urw = urw; done: x->uwrt = 0; return 0; } int f__nowwriting(unit *x) { OFF_T loc; int ufmt; if (x->urw & 2) { if (x->urw & 1) FSEEK(x->ufd, (OFF_T)0, SEEK_CUR); goto done; } if (!x->ufnm) goto cantwrite; ufmt = x->url ? 0 : x->ufmt; if (x->uwrt == 3) { /* just did write, rewind */ if (!(f__cf = x->ufd = freopen(x->ufnm,f__w_mode[ufmt],x->ufd))) goto cantwrite; x->urw = 2; } else { loc=FTELL(x->ufd); if (!(f__cf = x->ufd = freopen(x->ufnm, f__w_mode[ufmt | 2], x->ufd))) { x->ufd = NULL; cantwrite: errno = 127; return(1); } x->urw = 3; FSEEK(x->ufd,loc,SEEK_SET); } done: x->uwrt = 1; return 0; } int err__fl(int f, int m, const char *s) { if (!f) f__fatal(m, s); if (f__doend) (*f__doend)(); return errno = m; } blis-0.6.1/blastest/f2c/exit_.c000066400000000000000000000027631360743507500162400ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ /* This gives the effect of subroutine exit(rc) integer*4 rc stop end * with the added side effect of supplying rc as the program's exit code. */ #include #include "f2c.h" #ifdef __cplusplus extern "C" { #endif void exit_(integer *rc) { exit(*rc); } #ifdef __cplusplus } #endif blis-0.6.1/blastest/f2c/exp.c000066400000000000000000000034051360743507500157160ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif double r_exp(real *x) { return( exp(*x) ); } double d_exp(const doublereal *x) { return( exp(*x) ); } void c_exp(complex *r, complex *z) { double expx, zi = z->i; expx = exp(z->r); r->r = expx * cos(zi); r->i = expx * sin(zi); } void z_exp(doublecomplex *r, doublecomplex *z) { double expx, zi = z->i; expx = exp(z->r); r->r = expx * cos(zi); r->i = expx * sin(zi); } #ifdef __cplusplus } #endif blis-0.6.1/blastest/f2c/f2c.h000066400000000000000000000265261360743507500156120ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ /* include/f2c.h. Generated from f2c.h.in by configure. */ /* f2c.h -- Standard Fortran to C header file */ /** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ #ifndef F2C_INCLUDE #define F2C_INCLUDE #include #include #ifdef _MSC_VER # include #else # include #endif #ifdef __cplusplus extern "C" { #endif #ifdef INTEGER_STAR_8 /* Adjust for integer*8. */ #define qbit_clear(a,b) ((a) & ~((ulongint)1 << (b))) #define qbit_set(a,b) ((a) | ((ulongint)1 << (b))) #endif #define TRUE_ (1) #define FALSE_ (0) /* Extern is for use with -E */ #ifndef Extern #define Extern extern #endif /* I/O stuff */ /*external read, write*/ typedef struct { flag cierr; ftnint ciunit; flag ciend; char *cifmt; ftnint cirec; } cilist; /*internal read, write*/ typedef struct { flag icierr; char *iciunit; flag iciend; char *icifmt; ftnint icirlen; ftnint icirnum; } icilist; /*open*/ typedef struct { flag oerr; ftnint ounit; char *ofnm; ftnlen ofnmlen; char *osta; char *oacc; char *ofm; ftnint orl; char *oblnk; } olist; /*close*/ typedef struct { flag cerr; ftnint cunit; char *csta; } cllist; /*rewind, backspace, endfile*/ typedef struct { flag aerr; ftnint aunit; } alist; /* inquire */ typedef struct { flag inerr; ftnint inunit; char *infile; ftnlen infilen; ftnint *inex; /*parameters in standard's order*/ ftnint *inopen; ftnint *innum; ftnint *innamed; char *inname; ftnlen innamlen; char *inacc; ftnlen inacclen; char *inseq; ftnlen inseqlen; char *indir; ftnlen indirlen; char *infmt; ftnlen infmtlen; char *inform; ftnint informlen; char *inunf; ftnlen inunflen; ftnint *inrecl; ftnint *innrec; char *inblank; ftnlen inblanklen; } inlist; union Multitype { /* for multiple entry points */ integer1 g; shortint h; integer i; /* longint j; */ real r; doublereal d; complex c; doublecomplex z; }; typedef union Multitype Multitype; struct Vardesc { /* for Namelist */ char *name; char *addr; ftnlen *dims; int type; }; typedef struct Vardesc Vardesc; struct Namelist { char *name; Vardesc **vars; int nvars; }; typedef struct Namelist Namelist; #define abs(x) ((x) >= 0 ? (x) : -(x)) #define dabs(x) (doublereal)abs(x) #define min(a,b) ((a) <= (b) ? (a) : (b)) #define max(a,b) ((a) >= (b) ? (a) : (b)) #define dmin(a,b) (doublereal)min(a,b) #define dmax(a,b) (doublereal)max(a,b) #define bit_test(a,b) ((a) >> (b) & 1) #define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) #define bit_set(a,b) ((a) | ((uinteger)1 << (b))) /* undef any lower-case symbols that your C compiler predefines, e.g.: */ #ifndef Skip_f2c_Undefs /* #undef cray */ /* #undef gcos */ /* #undef mc68010 */ /* #undef mc68020 */ /* #undef mips */ /* #undef pdp11 */ /* #undef sgi */ /* #undef sparc */ /* #undef sun */ /* #undef sun2 */ /* #undef sun3 */ /* #undef sun4 */ /* #undef u370 */ /* #undef u3b */ /* #undef u3b2 */ /* #undef u3b5 */ /* #undef unix */ /* #undef vax */ #endif void libf2c_init(int argc, char **argv); void libf2c_close(); /************************************************************* * LIBF77 */ /* * Private functions and variables in libF77 */ extern int xargc; extern char **xargv; extern doublereal _0; double f__cabs(double, double); char *F77_aloc(integer Len, const char *whence); void sig_die(const char*, int); void _uninit_f2c(void *x, int type, long len); /* * Public functions in libF77 */ int abort_(void); void c_cos(complex *r, complex *z); void c_div(complex *c, complex *a, complex *b); void c_exp(complex *r, complex *z); void c_log(complex *r, complex *z); void c_sin(complex *r, complex *z); void c_sqrt(complex *r, complex *z); double dtime_(float *tarray); int ef1asc_(ftnint *a, ftnlen *la, ftnint *b, ftnlen *lb); integer ef1cmc_(ftnint *a, ftnlen *la, ftnint *b, ftnlen *lb); real etime_(real *tarray); int getarg_(ftnint *n, char *s, ftnlen ls); int getenv_(char *fname, char *value, ftnlen flen, ftnlen vlen); shortint h_indx(char *a, char *b, ftnlen la, ftnlen lb); integer i_indx(char *a, char *b, ftnlen la, ftnlen lb); logical l_ge(char *a, char *b, ftnlen la, ftnlen lb); logical l_gt(char *a, char *b, ftnlen la, ftnlen lb); logical l_le(char *a, char *b, ftnlen la, ftnlen lb); logical l_lt(char *a, char *b, ftnlen la, ftnlen lb); integer lbit_bits(integer a, integer b, integer len); integer lbit_shift(integer a, integer b); integer lbit_cshift(integer a, integer b, integer len); void pow_ci(complex *p, complex *a, integer *b); double pow_dd(doublereal *ap, doublereal *bp); double pow_di(doublereal *ap, integer *bp); shortint pow_hh(shortint *ap, shortint *bp); integer pow_ii(integer *ap, integer *bp); #ifdef INTEGER_STAR_8 longint pow_qq(longint *ap, longint *bp); #endif double pow_ri(real *ap, integer *bp); void pow_zi(doublecomplex*, doublecomplex*, integer*); void pow_zz(doublecomplex *r, doublecomplex *a, doublecomplex *b); #ifdef INTEGER_STAR_8 longint qbit_bits(longint a, integer b, integer len); longint qbit_cshift(longint a, integer b, integer len); longint qbit_shift(longint a, integer b); #endif double r_abs(real *x); double r_acos(real *x); double r_asin(real *x); double r_atan(real *x); double r_atn2(real *x, real *y); void r_cnjg(complex *r, complex *z); double r_cos(real *x); double r_cosh(real *x); double r_dim(real *a, real *b); double r_exp(real *x); double r_imag(complex *z); double r_int(real *x); double r_lg10(real *x); double r_log(real *x); double r_mod(real *x, real *y); double r_nint(real *x); double r_sign(real *a, real *b); double r_sin(real *x); double r_sinh(real *x); double r_sqrt(real *x); double r_tan(real *x); double r_tanh(real *x); int s_cat(char *lp, char *rpp[], ftnint rnp[], ftnint *np, ftnlen ll); integer s_cmp(const char *a0, const char *b0, ftnlen la, ftnlen lb); int s_paus(char *s, ftnlen n); integer s_rnge(char *varn, ftnint offset, char *procn, ftnint line); int s_stop(char *s, ftnlen n); ftnint signal_(integer *sigp, void *proc); integer system_(register char *s, ftnlen n); void z_div(doublecomplex*, doublecomplex*, doublecomplex*); void z_cos(doublecomplex *r, doublecomplex *z); void z_exp(doublecomplex *r, doublecomplex *z); void z_log(doublecomplex *r, doublecomplex *z); void z_sin(doublecomplex *r, doublecomplex *z); void z_sqrt(doublecomplex *r, doublecomplex *z); /* #ifndef F2C_NO_INLINE_H # if defined(__GNUC__) # include # endif #endif */ #if !defined(F2C_INLINE_H) double c_abs(const complex *z); double d_abs(const doublereal *x); double d_acos(const doublereal *x); double d_asin(const doublereal *x); double d_atan(const doublereal *x); double d_atn2(const doublereal *x, const doublereal *y); void d_cnjg(doublecomplex *r, const doublecomplex *z); double d_cos(const doublereal *x); double d_cosh(const doublereal *x); double d_dim(const doublereal *a, const doublereal *b); double d_exp(const doublereal *x); double d_imag(const doublecomplex *z); double d_int(const doublereal *x); double d_lg10(const doublereal *x); double d_log(const doublereal *x); double d_mod(const doublereal *x, const doublereal *y); double d_nint(const doublereal *x); double d_prod(const real *x, const real *y); double d_sign(const doublereal *a, const doublereal *b); double d_sin(const doublereal *x); double d_sinh(const doublereal *x); double d_sqrt(const doublereal *x); double d_tan(const doublereal *x); double d_tanh(const doublereal *x); double derf_(const doublereal *x); double derfc_(const doublereal *x); double erf_(const real *x); double erfc_(const real *x); shortint h_abs(const shortint *x); shortint h_dim(const shortint *a, const shortint *b); shortint h_dnnt(const doublereal *x); shortint h_len(const char *s, ftnlen n); shortint h_mod(const short *a, const short *b); shortint h_nint(const real *x); shortint h_sign(const shortint *a, const shortint *b); shortlogical hl_ge(const char *a, const char *b, ftnlen la, ftnlen lb); shortlogical hl_gt(const char *a, const char *b, ftnlen la, ftnlen lb); shortlogical hl_le(const char *a, const char *b, ftnlen la, ftnlen lb); shortlogical hl_lt(const char *a, const char *b, ftnlen la, ftnlen lb); integer i_abs(const integer *x); integer i_dceiling(const doublereal *x); integer i_dim(const integer *a, const integer *b); integer i_dnnt(const doublereal *x); integer i_len(const char *s, ftnlen n); integer i_len_trim(const char *s, ftnlen n); integer i_mod(const integer *a, const integer *b); integer i_nint(const real *x); integer i_sign(const integer *a, const integer *b); integer i_sceiling(const real *x); ftnint iargc_(void); int s_copy(char *a, const char *b, ftnlen la, ftnlen lb); double z_abs(const doublecomplex *z); #endif /* !F2C_INLINE_H */ /************************************************************* * LIBI77 * * Public functions */ int c_dfe(cilist *a); int c_due(cilist *a); int c_sfe(cilist *a); int c_sue(cilist *a); integer e_rdfe(void); integer e_rdue(void); integer e_rsfe(void); integer e_rsfi(void); integer e_rsle(void); integer e_rsli(void); integer e_rsue(void); integer e_wdfe(void); integer e_wdue(void); integer e_wsfi(void); integer e_wsfe(void); integer e_wsle(void); integer e_wsli(void); integer e_wsue(void); void exit_(integer *rc); integer f_back(alist *a); integer f_clos(cllist *a); integer f_end(alist *a); void f_exit(void); integer f_inqu(inlist *a); integer f_open(olist *a); integer f_rew(alist *a); int flush_(void); integer ftell_(integer *Unit); int fseek_(integer *Unit, integer *offset, integer *whence); #ifdef INTEGER_STAR_8 longint ftell64_(integer *Unit); int fseek64_(integer *Unit, longint *offset, integer *whence); #endif integer s_rdfe(cilist *a); integer s_rdue(cilist *a); integer s_rsfi(icilist *a); integer s_rsle(cilist *a); integer s_rsli(icilist *a); integer s_rsne(cilist *a); integer s_rsni(icilist *a); integer s_rsue(cilist *a); integer s_wdfe(cilist *a); integer s_wdue(cilist *a); integer s_wsfe(cilist *a); integer s_wsfi(icilist *a); integer s_wsle(cilist *a); integer s_wsli(icilist *a); integer s_wsne(cilist *a); integer s_wsni(icilist *a); integer s_wsue(cilist *a); real s_epsilon_( real* x ); double d_epsilon_( doublereal* x ); /* * Private functions in the F2C library */ extern const ftnlen f__typesize[]; #ifdef __cplusplus } #endif #endif blis-0.6.1/blastest/f2c/f2c_config.h000066400000000000000000000121211360743507500171210ustar00rootroot00000000000000/* config_aux/config.h. Generated from f2c_config.h.in by configure. */ /* config_aux/config.h.in. Generated from configure.ac by autoheader. */ /* Bit size of 'int' */ #define F2C_INT_BITS 32 /* Bit size of 'long' */ #define F2C_LONG_BITS 64 /* Bit sizze of long long */ #define F2C_LONG_LONG_BITS 64 /* Define to 1 if you have the `atexit' function. */ #define HAVE_ATEXIT 1 /* Define to 1 if you have the header file. */ #define HAVE_DLFCN_H 1 /* Define to 1 if you have the header file. */ #define HAVE_FENV_H 1 /* Define to 1 if you have the `floor' function. */ /* #undef HAVE_FLOOR */ /* Define to 1 if you have the `fork' function. */ #define HAVE_FORK 1 /* Define to 1 if fseeko (and presumably ftello) exists and is declared. */ #if !defined(_MSC_VER) #define HAVE_FSEEKO 1 #endif /* Define to 1 if you have the `ftruncate' function. */ #if !defined(_MSC_VER) #define HAVE_FTRUNCATE 1 #endif /* Define to 1 if you have the header file. */ #define HAVE_INTTYPES_H 1 /* Define to 1 if you have the `isascii' function. */ #define HAVE_ISASCII 1 /* Define to 1 if you have the `isatty' function. */ #define HAVE_ISATTY 1 /* Define to 1 if your system has a GNU libc compatible `malloc' function, and to 0 otherwise. */ #define HAVE_MALLOC 1 /* Define to 1 if you have the header file. */ #define HAVE_MEMORY_H 1 /* Define to 1 if you have the `memset' function. */ #define HAVE_MEMSET 1 /* Define to 1 if you have the `mkdir' function. */ #define HAVE_MKDIR 1 /* Define to 1 if you have the `mkdtemp' function. */ #define HAVE_MKDTEMP 1 /* Define to 1 if you have the `mkstemp' function. */ #define HAVE_MKSTEMP 1 /* Define to 1 if you have the `onexit' function. */ /* #undef HAVE_ONEXIT */ /* Define to 1 if you have the `pow' function. */ /* #undef HAVE_POW */ /* Define to 1 if your system has a GNU libc compatible `realloc' function, and to 0 otherwise. */ #define HAVE_REALLOC 1 /* Define to 1 if you have the `rmdir' function. */ #define HAVE_RMDIR 1 /* Define to 1 if you have the `sqrt' function. */ /* #undef HAVE_SQRT */ /* Define to 1 if you have the header file. */ #define HAVE_STDDEF_H 1 /* Define to 1 if you have the header file. */ #define HAVE_STDINT_H 1 /* Define to 1 if you have the header file. */ #define HAVE_STDLIB_H 1 /* Define to 1 if you have the `strchr' function. */ #define HAVE_STRCHR 1 /* Define to 1 if you have the header file. */ #define HAVE_STRINGS_H 1 /* Define to 1 if you have the header file. */ #define HAVE_STRING_H 1 /* Define to 1 if you have the header file. */ #define HAVE_SYS_STAT_H 1 /* Define to 1 if you have the header file. */ #define HAVE_SYS_TYPES_H 1 /* Define to 1 if you have the `tmpfile' function. */ #define HAVE_TMPFILE 1 /* Define to 1 if you have the header file. */ #define HAVE_UNISTD_H 1 /* Define to 1 if you have the `vfork' function. */ #define HAVE_VFORK 1 /* Define to 1 if you have the header file. */ /* #undef HAVE_VFORK_H */ /* Define to 1 if `fork' works. */ #define HAVE_WORKING_FORK 1 /* Define to 1 if `vfork' works. */ #define HAVE_WORKING_VFORK 1 /* Define to the sub-directory where libtool stores uninstalled libraries. */ #define LT_OBJDIR ".libs/" /* Name of package */ #define PACKAGE "f2c" /* Define to the address where bug reports for this package should be sent. */ #define PACKAGE_BUGREPORT "jjgarcia@users.sourceforge.net" /* Define to the full name of this package. */ #define PACKAGE_NAME "F2C Fortran to C99 compiler" /* Define to the full name and version of this package. */ #define PACKAGE_STRING "F2C Fortran to C99 compiler 12.02.01" /* Define to the one symbol short name of this package. */ #define PACKAGE_TARNAME "f2c" /* Define to the home page for this package. */ #define PACKAGE_URL "" /* Define to the version of this package. */ #define PACKAGE_VERSION "12.02.01" /* Define to 1 if you have the ANSI C header files. */ #define STDC_HEADERS 1 /* Version number of package */ #define VERSION "12.02.01" /* Enable large inode numbers on Mac OS X 10.5. */ #ifndef _DARWIN_USE_64_BIT_INODE # define _DARWIN_USE_64_BIT_INODE 1 #endif /* Number of bits in a file offset, on hosts where this is settable. */ /* #undef _FILE_OFFSET_BITS */ /* Define to 1 to make fseeko visible on some hosts (e.g. glibc 2.2). */ /* #undef _LARGEFILE_SOURCE */ /* Define for large files, on AIX-style hosts. */ /* #undef _LARGE_FILES */ /* Define to `__inline__' or `__inline' if that's what the C compiler calls it, or to nothing if 'inline' is not supported under any name. */ #ifndef __cplusplus /* #undef inline */ #endif /* Define to rpl_malloc if the replacement function should be used. */ /* #undef malloc */ /* Define to `int' if does not define. */ /* #undef pid_t */ /* Define to rpl_realloc if the replacement function should be used. */ /* #undef realloc */ /* Define to `unsigned int' if does not define. */ /* #undef size_t */ /* Define as `fork' if `vfork' does not work. */ /* #undef vfork */ #ifdef _MSC_VER #define NON_UNIX_STDIO 1 #endifblis-0.6.1/blastest/f2c/f2c_inline.h000066400000000000000000000135271360743507500171450ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ /* f2c_inline.h -- Standard Fortran to C header file */ #ifndef F2C_INLINE_H #define F2C_INLINE_H #ifndef F2C_INCLUDE #error f2c_include.h cannot be included as is #endif static inline double c_abs(const complex *z) { return hypot(z->r, z->i); } static inline double d_abs(const double *x) { return fabs(*x); } static inline double d_acos(const double *x) { return acos(*x); } static inline double d_acosh(const double *x) { return acosh(*x); } static inline double d_asin(const double *x) { return asin(*x); } static inline double d_asinh(const double *x) { return asinh(*x); } static inline double d_atan(const double *x) { return atan(*x); } static inline double d_atanh(const double *x) { return atanh(*x); } static inline double d_atn2(const double *x, double *y) { return atan2(*x, *y); } static inline void d_cnjg(doublecomplex *r, const doublecomplex *z) { r->r = z->r; r->i = -z->i; } static inline double d_cos(const double *x) { return cos(*x); } static inline double d_cosh(const double *x) { return cosh(*x); } static inline double d_dim(const double *a, double *b) { double d = (*a - *b); return (d > 0)? d : 0; } static inline double d_exp(const double *x) { return exp(*x); } static inline double d_imag(doublecomplex *x) { return x->i; } static inline double d_int(const double *x) { double y = *x; return (y < 0)? floor(y) : -floor(-y); } static inline double d_lg10(const double *x) { return log10(*x); } static inline double d_log(const double *x) { return log(*x); } static inline double d_nint(const double *x) { return round(*x); } static inline double d_prod(const float *x, const float *y) { return ((double)*x) * ((double)*x); } static inline double d_sin(const double *x) { return sin(*x); } static inline double d_tan(const double *x) { return tan(*x); } static inline double d_sinh(const double *x) { return sinh(*x); } static inline double d_sqrt(const double *x) { return sqrt(*x); } static inline double d_tanh(const double *x) { return tanh(*x); } static inline double d_sign(const double *a, const double *b) { double x = fabs(*a); return (*b >= 0 ? x : -x); } static inline double derfc_(const double *x) { return erfc(*x); } static inline double derf_(const double *x) { return erf(*x); } static inline double erf_(const float *x) { return erf((double)(*x)); } static inline double erfc_(const float *x) { return erfc((double)(*x)); } static inline shortint h_abs(const shortint *x) { return abs(*x); } static inline shortint h_dim(const shortint *a, const shortint *b) { shortint d = (*a - *b); return (d > 0)? d : 0; } static inline shortint h_len(const char *s, ftnlen n) { return n; } static inline shortint h_mod(const shortint *a, const shortint *b) { return *a % *b; } static inline shortint h_nint(const float *x) { return (shortint)round(*x); } static inline shortint h_dnnt(const doublereal *x) { return (shortint)round(*x); } static inline shortint h_sign(const shortint *a, const shortint *b) { shortint x = abs(*a); return *b >= 0 ? x : -x; } static inline shortlogical hl_ge(const char *a, const char *b, ftnlen la, ftnlen lb) { return s_cmp(a,b,la,lb) >= 0; } static inline shortlogical hl_le(const char *a, const char *b, ftnlen la, ftnlen lb) { return s_cmp(a,b,la,lb) >= 0; } static inline shortlogical hl_gt(const char *a, const char *b, ftnlen la, ftnlen lb) { return s_cmp(a,b,la,lb) > 0; } static inline shortlogical hl_lt(const char *a, const char *b, ftnlen la, ftnlen lb) { return s_cmp(a,b,la,lb) < 0; } static inline integer i_abs(const integer *x) { return abs(*x); } static inline integer i_dim(const integer *a, const integer *b) { integer d = (*a - *b); return (d > 0)? d : 0; } static inline integer i_len(const char *s, ftnlen n) { return n; } static inline integer i_mod(const integer *a, const integer *b) { return *a % *b; } static inline integer i_nint(const float *x) { return (integer)round(*x); } static inline integer i_dnnt(const doublereal *x) { return (integer)round(*x); } static inline integer i_sign(const integer *a, const integer *b) { integer x = abs(*a); return *b >= 0 ? x : -x; } static inline ftnint iargc_(void) { return xargc - 1; } static inline double z_abs(const doublecomplex *z) { return hypot(z->r, z->i); } static int s_copy(char *a, const char *b, ftnlen la, ftnlen lb) { if (la <= lb) { memmove(a, b, la); } else { memmove(a, b, lb); memset(a, ' ', la - lb); } return 0; } static inline integer i_sceiling(const real *r) { real x = *r; return ((integer)(x) + ((x) > 0 && (x) != (integer)(x))); } static inline integer i_dceiling(const doublereal *r) { doublereal x = *r; return ((integer)(x) + ((x) > 0 && (x) != (integer)(x))); } #endif /* !F2C_INLINE_H */ blis-0.6.1/blastest/f2c/f2c_types.h000066400000000000000000000104511360743507500170240ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ /* include/f2c_types.h. Generated from f2c_types.h.in by configure. */ /* include/f2c.h. Generated from f2c.h.in by configure. */ /* f2c.h -- Standard Fortran to C header file */ /** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ #ifndef F2C_TYPES_H #define F2C_TYPES_H #ifdef HAVE_BLIS_H #include #define BLIS_VIA_BLASTEST #include "blis.h" #endif #ifdef __cplusplus extern "C" { #endif /* Define to the number of bits in an integer */ #define F2C_INT_BITS 32 /* Define to the number of bits in a long integer */ #define F2C_LONG_BITS 64 /* Define to the number of bits in a long long integer, if it exists */ #define F2C_LONG_LONG_BITS 64 #ifdef HAVE_BLIS_H #if BLIS_BLAS_INT_TYPE_SIZE == 32 typedef int32_t integer; #elif BLIS_BLAS_INT_TYPE_SIZE == 64 typedef int64_t integer; #else typedef long int integer; #endif //typedef int integer; typedef unsigned int uinteger; #endif #if F2C_INT_BITS == 32 # if F2C_LONG_BITS == 64 typedef long int longint; typedef unsigned long int ulongint; # define INTEGER_STAR_8 # elif defined(F2C_LONG_LONG_BITS) # if F2C_LONG_LONG_BITS == 64 typedef long long int longint; typedef unsigned long long int ulongint; # define INTEGER_STAR_8 # endif # endif #endif typedef char integer1; typedef char *address; typedef short int shortint; typedef float real; typedef double doublereal; typedef struct { real r, i; } complex; typedef struct { doublereal r, i; } doublecomplex; typedef integer logical; typedef shortint shortlogical; typedef integer1 logical1; #ifdef f2c_i2 /* for -i2 */ typedef short flag; #ifndef HAVE_BLIS_H // don't re-typedef ftnlen typedef short ftnlen; #endif typedef short ftnint; #else typedef integer flag; #ifndef HAVE_BLIS_H // don't re-typedef ftnlen typedef integer ftnlen; #endif typedef integer ftnint; #endif /* procedure parameter types for -A and -C++ */ #define F2C_proc_par_types 1 #ifdef __cplusplus typedef int /* Unknown procedure type */ (*U_fp)(...); typedef shortint (*J_fp)(...); typedef integer (*I_fp)(...); typedef real (*R_fp)(...); typedef doublereal (*D_fp)(...), (*E_fp)(...); typedef /* Complex */ void (*C_fp)(...); typedef /* Double Complex */ void (*Z_fp)(...); typedef logical (*L_fp)(...); typedef shortlogical (*K_fp)(...); typedef /* Character */ void (*H_fp)(...); typedef /* Subroutine */ int (*S_fp)(...); #else typedef int /* Unknown procedure type */ (*U_fp)(); typedef shortint (*J_fp)(); typedef integer (*I_fp)(); typedef real (*R_fp)(); typedef doublereal (*D_fp)(), (*E_fp)(); typedef /* Complex */ void (*C_fp)(); typedef /* Double Complex */ void (*Z_fp)(); typedef logical (*L_fp)(); typedef shortlogical (*K_fp)(); typedef /* Character */ void (*H_fp)(); typedef /* Subroutine */ int (*S_fp)(); #endif /* E_fp is for real functions when -R is not specified */ typedef void C_f; /* complex function */ typedef void H_f; /* character function */ typedef void Z_f; /* double complex function */ typedef doublereal E_f; /* real function with -R not specified */ #ifdef __cplusplus } #endif #endif /* F2C_TYPES_H */ blis-0.6.1/blastest/f2c/f2c_types_win.h000066400000000000000000000044701360743507500177050ustar00rootroot00000000000000/* include/f2c.h. Generated from f2c.h.in by configure. */ /* f2c.h -- Standard Fortran to C header file */ /** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ #ifndef F2C_TYPES_WIN_H #define F2C_TYPES_WIN_H #ifdef __cplusplus extern "C" { #endif /* Define to the number of bits in an integer */ #define F2C_INT_BITS 32 /* Define to the number of bits in a long integer */ #define F2C_LONG_BITS 64 typedef int integer; typedef unsigned int uinteger; typedef __int64 longint; typedef unsigned __int64 ulongint; /*#define INTEGER_STAR_8*/ typedef char integer1; typedef char *address; typedef short int shortint; typedef float real; typedef double doublereal; typedef struct { real r, i; } complex; typedef struct { doublereal r, i; } doublecomplex; typedef integer logical; typedef shortint shortlogical; typedef integer1 logical1; #ifdef f2c_i2 /* for -i2 */ typedef short flag; typedef short ftnlen; typedef short ftnint; #else typedef integer flag; typedef integer ftnlen; typedef integer ftnint; #endif /* procedure parameter types for -A and -C++ */ #define F2C_proc_par_types 1 #ifdef __cplusplus typedef int /* Unknown procedure type */ (*U_fp)(...); typedef shortint (*J_fp)(...); typedef integer (*I_fp)(...); typedef real (*R_fp)(...); typedef doublereal (*D_fp)(...), (*E_fp)(...); typedef /* Complex */ void (*C_fp)(...); typedef /* Double Complex */ void (*Z_fp)(...); typedef logical (*L_fp)(...); typedef shortlogical (*K_fp)(...); typedef /* Character */ void (*H_fp)(...); typedef /* Subroutine */ int (*S_fp)(...); #else typedef int /* Unknown procedure type */ (*U_fp)(); typedef shortint (*J_fp)(); typedef integer (*I_fp)(); typedef real (*R_fp)(); typedef doublereal (*D_fp)(), (*E_fp)(); typedef /* Complex */ void (*C_fp)(); typedef /* Double Complex */ void (*Z_fp)(); typedef logical (*L_fp)(); typedef shortlogical (*K_fp)(); typedef /* Character */ void (*H_fp)(); typedef /* Subroutine */ int (*S_fp)(); #endif /* E_fp is for real functions when -R is not specified */ typedef void C_f; /* complex function */ typedef void H_f; /* character function */ typedef void Z_f; /* double complex function */ typedef doublereal E_f; /* real function with -R not specified */ #ifdef __cplusplus } #endif #endif /* F2C_TYPES_H */ blis-0.6.1/blastest/f2c/fio.h000066400000000000000000000114521360743507500157050ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include #include #include #include #include #include #ifdef HAVE_FSEEKO #define OFF_T off_t #define FSEEK fseeko #define FTELL ftello #else #define OFF_T long #define FSEEK fseek #define FTELL ftell #endif #ifdef MSDOS #ifndef NON_UNIX_STDIO #define NON_UNIX_STDIO #endif #endif typedef long uiolen; /*units*/ typedef struct { FILE *ufd; /*0=unconnected*/ char *ufnm; #ifndef MSDOS long uinode; int udev; #endif int url; /*0=sequential*/ flag useek; /*true=can backspace, use dir, ...*/ flag ufmt; flag urw; /* (1 for can read) | (2 for can write) */ flag ublnk; flag uend; flag uwrt; /*last io was write*/ flag uscrtch; } unit; extern int (*f__getn)(void); /* for formatted input */ extern void (*f__putn)(int); /* for formatted output */ extern void x_putc(int); extern long f__inode(char*,int*); extern void sig_die(const char*,int); extern void f__fatal(int, const char*); extern int t_runc(alist*); extern int f__nowreading(unit*), f__nowwriting(unit*); extern int fk_open(int,int,ftnint); extern int en_fio(void); extern void f_init(void); extern int (*f__donewrec)(void), t_putc(int), x_wSL(void); extern void b_char(const char*,char*,ftnlen), g_char(const char*,ftnlen,char*); extern int c_sfe(cilist*); extern int z_rnew(void); extern int err__fl(int,int,const char*); extern int xrd_SL(void); extern int f__putbuf(int); extern int f__canseek(FILE *f); extern int z_getc(void); extern void z_putc(int c); extern integer f_open(olist *a); #ifdef INTEGER_STAR_8 extern char *f__icvt(longint value, int *ndigit, int *sign, int base); #else extern char *f__icvt(integer value, int *ndigit, int *sign, int base); #endif extern int t_getc(void); extern flag f__init; extern cilist *f__elist; /*active external io list*/ extern flag f__reading,f__external,f__sequential,f__formatted; extern int (*f__doend)(void); extern FILE *f__cf; /*current file*/ extern unit *f__curunit; /*current unit*/ extern unit f__units[]; extern char *f__icptr; extern char *f__icend; extern icilist *f__svic; extern int f__icnum; #define err(f,m,s) {if(f) errno= m; else f__fatal(m,s); return(m);} #define errfl(f,m,s) return err__fl((int)f,m,s) /*Table sizes*/ #define MXUNIT 100 extern int f__recpos; /*position in current record*/ extern OFF_T f__cursor; /* offset to move to */ extern OFF_T f__hiwater; /* so TL doesn't confuse us */ #define WRITE 1 #define READ 2 #define SEQ 3 #define DIR 4 #define FMT 5 #define UNF 6 #define EXT 7 #define INT 8 #define buf_end(x) (x->_flag & _IONBF ? x->_ptr : x->_base + BUFSIZ) extern const char *f__fmtbuf; extern const char *f__r_mode[2]; extern const char *f__w_mode[]; extern int l_eof; extern int c_le(cilist *a); extern int l_read(ftnint *number, char *ptr, ftnlen len, ftnint type); extern int l_write(ftnint *number, char *ptr, ftnlen len, ftnint type); extern flag f__lquit; extern int f__lcount; extern char *f__icptr; extern char *f__icend; extern icilist *f__svic; extern int f__icnum, f__recpos; extern int f__Aquote; extern int x_rsne(cilist*); extern void x_wsne(cilist *a); extern flag f__lquit; extern int f__lcount, nml_read; extern int t_getc(void); extern uiolen f__reclen; extern ftnint L_len; extern int f__scale; extern int (*l_getc)(void); extern int (*l_ungetc)(int,FILE*); extern int (*f__lioproc)(ftnint*, char*, ftnlen, ftnint); int do_us(ftnint *number, char *ptr, ftnlen len); integer do_ud(ftnint *number, char *ptr, ftnlen len); integer do_uio(ftnint *number, char *ptr, ftnlen len); integer do_fio(ftnint *number, char *ptr, ftnlen len); int en_fio(void); extern int x_wSL(void); extern int x_getc(void); extern int x_endp(void); blis-0.6.1/blastest/f2c/fmt.c000066400000000000000000000214211360743507500157060ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include #include "f2c.h" #include "fio.h" #include "fmt.h" #define skip(s) while(*s==' ') s++ #ifdef interdata #define SYLMX 300 #endif #ifdef pdp11 #define SYLMX 300 #endif #ifdef vax #define SYLMX 300 #endif #ifndef SYLMX #define SYLMX 300 #endif #define GLITCH '\2' /* special quote character for stu */ extern flag f__cblank,f__cplus; /*blanks in I and compulsory plus*/ static struct syl f__syl[SYLMX]; int f__parenlvl,f__pc,f__revloc; static const char *ap_end(const char *s) { char quote; quote= *s++; for(;*s;s++) { if(*s!=quote) continue; if(*++s!=quote) return(s); } if(f__elist->cierr) { errno = 100; return(NULL); } f__fatal(100, "bad string"); /*NOTREACHED*/ return 0; } static int op_gen(int a, int b, int c, int d) { struct syl *p= &f__syl[f__pc]; if(f__pc>=SYLMX) { fprintf(stderr,"format too complicated:\n"); sig_die(f__fmtbuf, 1); } p->op=a; p->p1=b; p->p2.i[0]=c; p->p2.i[1]=d; return(f__pc++); } static const char *f_list(const char*); static const char *gt_num(const char *s, int *n, int n1) { int m=0,f__cnt=0; char c; for(c= *s;;c = *s) { if(c==' ') { s++; continue; } if(c>'9' || c<'0') break; m=10*m+c-'0'; f__cnt++; s++; } if(f__cnt==0) { if (!n1) s = 0; *n=n1; } else *n=m; return(s); } static const char *f_s(const char *s, int curloc) { skip(s); if(*s++!='(') { return(NULL); } if(f__parenlvl++ ==1) f__revloc=curloc; if(op_gen(RET1,curloc,0,0)<0 || (s=f_list(s))==NULL) { return(NULL); } skip(s); return(s); } static int ne_d(const char *s, const char **p) { int n,x,sign=0; struct syl *sp; switch(*s) { default: return(0); case ':': (void) op_gen(COLON,0,0,0); break; case '$': (void) op_gen(NONL, 0, 0, 0); break; case 'B': case 'b': if(*++s=='z' || *s == 'Z') (void) op_gen(BZ,0,0,0); else (void) op_gen(BN,0,0,0); break; case 'S': case 's': if(*(s+1)=='s' || *(s+1) == 'S') { x=SS; s++; } else if(*(s+1)=='p' || *(s+1) == 'P') { x=SP; s++; } else x=S; (void) op_gen(x,0,0,0); break; case '/': (void) op_gen(SLASH,0,0,0); break; case '-': sign=1; case '+': s++; /*OUTRAGEOUS CODING TRICK*/ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': if (!(s=gt_num(s,&n,0))) { bad: *p = 0; return 1; } switch(*s) { default: return(0); case 'P': case 'p': if(sign) n= -n; (void) op_gen(P,n,0,0); break; case 'X': case 'x': (void) op_gen(X,n,0,0); break; case 'H': case 'h': sp = &f__syl[op_gen(H,n,0,0)]; sp->p2.s = (char*)s + 1; s+=n; break; } break; case GLITCH: case '"': case '\'': sp = &f__syl[op_gen(APOS,0,0,0)]; sp->p2.s = (char*)s; if((*p = ap_end(s)) == NULL) return(0); return(1); case 'T': case 't': if(*(s+1)=='l' || *(s+1) == 'L') { x=TL; s++; } else if(*(s+1)=='r'|| *(s+1) == 'R') { x=TR; s++; } else x=T; if (!(s=gt_num(s+1,&n,0))) goto bad; s--; (void) op_gen(x,n,0,0); break; case 'X': case 'x': (void) op_gen(X,1,0,0); break; case 'P': case 'p': (void) op_gen(P,1,0,0); break; } s++; *p=s; return(1); } static int e_d(const char *s, const char **p) { int i,im,n,w,d,e,found=0,x=0; const char *sv=s; s=gt_num(s,&n,1); (void) op_gen(STACK,n,0,0); switch(*s++) { default: break; case 'E': case 'e': x=1; case 'G': case 'g': found=1; if (!(s=gt_num(s,&w,0))) { bad: *p = 0; return 1; } if(w==0) break; if(*s=='.') { if (!(s=gt_num(s+1,&d,0))) goto bad; } else d=0; if(*s!='E' && *s != 'e') (void) op_gen(x==1?E:G,w,d,0); /* default is Ew.dE2 */ else { if (!(s=gt_num(s+1,&e,0))) goto bad; (void) op_gen(x==1?EE:GE,w,d,e); } break; case 'O': case 'o': i = O; im = OM; goto finish_I; case 'Z': case 'z': i = Z; im = ZM; goto finish_I; case 'L': case 'l': found=1; if (!(s=gt_num(s,&w,0))) goto bad; if(w==0) break; (void) op_gen(L,w,0,0); break; case 'A': case 'a': found=1; skip(s); if(*s>='0' && *s<='9') { s=gt_num(s,&w,1); if(w==0) break; (void) op_gen(AW,w,0,0); break; } (void) op_gen(A,0,0,0); break; case 'F': case 'f': if (!(s=gt_num(s,&w,0))) goto bad; found=1; if(w==0) break; if(*s=='.') { if (!(s=gt_num(s+1,&d,0))) goto bad; } else d=0; (void) op_gen(F,w,d,0); break; case 'D': case 'd': found=1; if (!(s=gt_num(s,&w,0))) goto bad; if(w==0) break; if(*s=='.') { if (!(s=gt_num(s+1,&d,0))) goto bad; } else d=0; (void) op_gen(D,w,d,0); break; case 'I': case 'i': i = I; im = IM; finish_I: if (!(s=gt_num(s,&w,0))) goto bad; found=1; if(w==0) break; if(*s!='.') { (void) op_gen(i,w,0,0); break; } if (!(s=gt_num(s+1,&d,0))) goto bad; (void) op_gen(im,w,d,0); break; } if(found==0) { f__pc--; /*unSTACK*/ *p=sv; return(0); } *p=s; return(1); } static const char *i_tem(const char *s) { const char *t; int n,curloc; if(*s==')') return(s); if(ne_d(s,&t)) return(t); if(e_d(s,&t)) return(t); s=gt_num(s,&n,1); if((curloc=op_gen(STACK,n,0,0))<0) return(NULL); return(f_s(s,curloc)); } static const char *f_list(const char *s) { for(;*s!=0;) { skip(s); if((s=i_tem(s))==NULL) return(NULL); skip(s); if(*s==',') s++; else if(*s==')') { if(--f__parenlvl==0) { (void) op_gen(REVERT,f__revloc,0,0); return(++s); } (void) op_gen(GOTO,0,0,0); return(++s); } } return(NULL); } int pars_f(const char *s) { f__parenlvl=f__revloc=f__pc=0; if(f_s(s,0) == NULL) { return(-1); } return(0); } #define STKSZ 10 int f__cnt[STKSZ],f__ret[STKSZ],f__cp,f__rp; flag f__workdone, f__nonl; static int type_f(int n) { switch(n) { default: return(n); case RET1: return(RET1); case REVERT: return(REVERT); case GOTO: return(GOTO); case STACK: return(STACK); case X: case SLASH: case APOS: case H: case T: case TL: case TR: return(NED); case F: case I: case IM: case A: case AW: case O: case OM: case L: case E: case EE: case D: case G: case GE: case Z: case ZM: return(ED); } } integer do_fio(ftnint *number, char *ptr, ftnlen len) { struct syl *p; int n,i; for(i=0;i<*number;i++,ptr+=len) { loop: switch(type_f((p= &f__syl[f__pc])->op)) { default: fprintf(stderr,"unknown code in do_fio: %d\n%s\n", p->op,f__fmtbuf); err(f__elist->cierr,100,"do_fio"); case NED: if((*f__doned)(p)) { f__pc++; goto loop; } f__pc++; continue; case ED: if(f__cnt[f__cp]<=0) { f__cp--; f__pc++; goto loop; } if(ptr==NULL) return((*f__doend)()); f__cnt[f__cp]--; f__workdone=1; if((n=(*f__doed)(p,ptr,len))>0) errfl(f__elist->cierr,errno,"fmt"); if(n<0) err(f__elist->ciend,(EOF),"fmt"); continue; case STACK: f__cnt[++f__cp]=p->p1; f__pc++; goto loop; case RET1: f__ret[++f__rp]=p->p1; f__pc++; goto loop; case GOTO: if(--f__cnt[f__cp]<=0) { f__cp--; f__rp--; f__pc++; goto loop; } f__pc=1+f__ret[f__rp--]; goto loop; case REVERT: f__rp=f__cp=0; f__pc = p->p1; if(ptr==NULL) return((*f__doend)()); if(!f__workdone) return(0); if((n=(*f__dorevert)()) != 0) return(n); goto loop; case COLON: if(ptr==NULL) return((*f__doend)()); f__pc++; goto loop; case NONL: f__nonl = 1; f__pc++; goto loop; case S: case SS: f__cplus=0; f__pc++; goto loop; case SP: f__cplus = 1; f__pc++; goto loop; case P: f__scale=p->p1; f__pc++; goto loop; case BN: f__cblank=0; f__pc++; goto loop; case BZ: f__cblank=1; f__pc++; goto loop; } } return(0); } int en_fio(void) { ftnint one=1; return(do_fio(&one,(char *)NULL,(ftnint)0)); } void fmt_bg(void) { f__workdone=f__cp=f__rp=f__pc=f__cursor=0; f__cnt[0]=f__ret[0]=0; } blis-0.6.1/blastest/f2c/fmt.h000066400000000000000000000054631360743507500157230ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ struct syl { int op; int p1; union { int i[2]; char *s;} p2; }; #define RET1 1 #define REVERT 2 #define GOTO 3 #define X 4 #define SLASH 5 #define STACK 6 #define I 7 #define ED 8 #define NED 9 #define IM 10 #define APOS 11 #define H 12 #define TL 13 #define TR 14 #define T 15 #define COLON 16 #define S 17 #define SP 18 #define SS 19 #define P 20 #define BN 21 #define BZ 22 #define F 23 #define E 24 #define EE 25 #define D 26 #define G 27 #define GE 28 #define L 29 #define A 30 #define AW 31 #define O 32 #define NONL 33 #define OM 34 #define Z 35 #define ZM 36 typedef union { real pf; doublereal pd; } ufloat; typedef union { short is; signed char ic; integer il; #ifdef Allow_TYQUAD longint ili; #endif } Uint; #ifdef __cplusplus extern "C" { #define Cextern extern "C" #else #define Cextern extern #endif /* __cplusplus */ extern const char *f__fmtbuf; extern int (*f__doed)(struct syl*, char*, ftnlen),(*f__doned)(struct syl*); extern int (*f__dorevert)(void); extern void fmt_bg(void); extern int pars_f(const char*); extern int rd_ed(struct syl*, char*, ftnlen),rd_ned(struct syl*); extern int w_ed(struct syl*, char*, ftnlen),w_ned(struct syl*); extern int wrt_E(ufloat*, int, int, int, ftnlen); extern int wrt_F(ufloat*, int, int, ftnlen); extern int wrt_L(Uint*, int, ftnlen); extern int f__pc,f__parenlvl,f__revloc; extern flag f__cblank,f__cplus,f__workdone, f__nonl; extern int f__scale; #ifdef __cplusplus } #endif #define GET(x) if((x=(*f__getn)())<0) return(x) #define VAL(x) (x!='\n'?x:' ') #define PUT(x) (*f__putn)(x) #undef TYQUAD #ifndef Allow_TYQUAD #undef longint #define longint long #else #define TYQUAD 14 #endif blis-0.6.1/blastest/f2c/fmtlib.c000066400000000000000000000037271360743507500164060ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include /* @(#)fmtlib.c 1.2 */ #define MAXINTLENGTH 23 #include "f2c.h" #ifndef Allow_TYQUAD #undef longint #define longint long #undef ulongint #define ulongint unsigned long #endif #ifdef INTEGER_STAR_8 char *f__icvt(longint value, int *ndigit, int *sign, int base) #else char *f__icvt(integer value, int *ndigit, int *sign, int base) #endif { static char buf[MAXINTLENGTH+1]; register int i; ulongint uvalue; if(value > 0) { uvalue = value; *sign = 0; } else if (value < 0) { uvalue = -value; *sign = 1; } else { *sign = 0; *ndigit = 1; buf[MAXINTLENGTH-1] = '0'; return &buf[MAXINTLENGTH-1]; } i = MAXINTLENGTH; do { buf[--i] = (uvalue%base) + '0'; uvalue /= base; } while(uvalue > 0); *ndigit = MAXINTLENGTH - i; return &buf[i]; } blis-0.6.1/blastest/f2c/fp.h000066400000000000000000000035251360743507500155370ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #define FMAX 40 #define EXPMAXDIGS 8 #define EXPMAX 99999999 /* FMAX = max number of nonzero digits passed to atof() */ /* EXPMAX = 10^EXPMAXDIGS - 1 = largest allowed exponent absolute value */ #ifdef V10 /* Research Tenth-Edition Unix */ #include "local.h" #endif /* MAXFRACDIGS and MAXINTDIGS are for wrt_F -- bounds (not necessarily tight) on the maximum number of digits to the right and left of * the decimal point. */ #ifdef VAX #define MAXFRACDIGS 56 #define MAXINTDIGS 38 #else #ifdef CRAY #define MAXFRACDIGS 9880 #define MAXINTDIGS 9864 #else /* values that suffice for IEEE double */ #define MAXFRACDIGS 344 #define MAXINTDIGS 308 #endif #endif blis-0.6.1/blastest/f2c/h_dnnt.c000066400000000000000000000025421360743507500163750ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif shortint h_dnnt(const doublereal *x) { return (shortint)round(*x); } #ifdef __cplusplus } #endif blis-0.6.1/blastest/f2c/hl_cmp.c000066400000000000000000000035201360743507500163620ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif extern integer s_cmp(const char *, const char *, ftnlen, ftnlen); shortlogical hl_ge(const char *a, const char *b, ftnlen la, ftnlen lb) { return(s_cmp(a,b,la,lb) >= 0); } shortlogical hl_gt(const char *a, const char *b, ftnlen la, ftnlen lb) { return(s_cmp(a,b,la,lb) > 0); } shortlogical hl_le(const char *a, const char *b, ftnlen la, ftnlen lb) { return(s_cmp(a,b,la,lb) <= 0); } shortlogical hl_lt(const char *a, const char *b, ftnlen la, ftnlen lb) { return(s_cmp(a,b,la,lb) < 0); } #ifdef __cplusplus } #endif blis-0.6.1/blastest/f2c/i_dnnt.c000066400000000000000000000025401360743507500163740ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif integer i_dnnt(const doublereal *x) { return (integer)round(*x); } #ifdef __cplusplus } #endif blis-0.6.1/blastest/f2c/i_len.c000066400000000000000000000025151360743507500162110ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif integer i_len(const char *s, ftnlen n) { return(n); } #ifdef __cplusplus } #endif blis-0.6.1/blastest/f2c/imag.c000066400000000000000000000025671360743507500160470ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif double r_imag(complex *z) { return z->i; } double d_imag(const doublecomplex *z) { return z->i; } #ifdef __cplusplus } #endif blis-0.6.1/blastest/f2c/int.c000066400000000000000000000027251360743507500157200ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif double d_int(const doublereal *x) { return( (*x>0) ? floor(*x) : -floor(- *x) ); } double r_int(real *x) { return( (*x>0) ? floor(*x) : -floor(- *x) ); } #ifdef __cplusplus } #endif blis-0.6.1/blastest/f2c/l_cmp.c000066400000000000000000000034221360743507500162130ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif extern integer s_cmp(const char *a0, const char *b0, ftnlen la, ftnlen lb); logical l_ge(char *a, char *b, ftnlen la, ftnlen lb) { return(s_cmp(a,b,la,lb) >= 0); } logical l_gt(char *a, char *b, ftnlen la, ftnlen lb) { return(s_cmp(a,b,la,lb) > 0); } logical l_le(char *a, char *b, ftnlen la, ftnlen lb) { return(s_cmp(a,b,la,lb) <= 0); } logical l_lt(char *a, char *b, ftnlen la, ftnlen lb) { return(s_cmp(a,b,la,lb) < 0); } #ifdef __cplusplus } #endif blis-0.6.1/blastest/f2c/lg10.c000066400000000000000000000026471360743507500156740ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif double r_lg10(real *x) { return( log10(*x) ); } double d_lg10(const doublereal *x) { return( log10(*x) ); } #ifdef __cplusplus } #endif blis-0.6.1/blastest/f2c/lio.h000066400000000000000000000040501360743507500157070ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ /* copy of ftypes from the compiler */ /* variable types * numeric assumptions: * int < reals < complexes * TYDREAL-TYREAL = TYDCOMPLEX-TYCOMPLEX */ /* 0-10 retain their old (pre LOGICAL*1, etc.) */ /* values to allow mixing old and new objects. */ #define TYUNKNOWN 0 #define TYADDR 1 #define TYSHORT 2 #define TYLONG 3 #define TYREAL 4 #define TYDREAL 5 #define TYCOMPLEX 6 #define TYDCOMPLEX 7 #define TYLOGICAL 8 #define TYCHAR 9 #define TYSUBR 10 #define TYINT1 11 #define TYLOGICAL1 12 #define TYLOGICAL2 13 #ifdef Allow_TYQUAD #undef TYQUAD #define TYQUAD 14 #endif #define LINTW 24 #define LINE 80 #define LLOGW 2 #define LGFMT "%.9G" /* LEFBL 20 should suffice; 24 overcomes a NeXT bug. */ #define LEFBL 24 typedef union { char flchar; short flshort; ftnint flint; #ifdef Allow_TYQUAD longint fllongint; #endif real flreal; doublereal fldouble; } flex; blis-0.6.1/blastest/f2c/log.c000066400000000000000000000033431360743507500157040ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif double r_log(real *x) { return( log(*x) ); } double d_log(const doublereal *x) { return( log(*x) ); } void c_log(complex *r, complex *z) { double zi, zr; r->i = atan2(zi = z->i, zr = z->r); r->r = log( hypot(zr, zi) ); } void z_log(doublecomplex *r, doublecomplex *z) { double zi, zr; r->i = atan2(zi = z->i, zr = z->r); r->r = log( hypot(zr, zi) ); } #ifdef __cplusplus } #endif blis-0.6.1/blastest/f2c/lread.c000066400000000000000000000351371360743507500162200ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include #include #include "f2c.h" #include "fio.h" /* Compile with -DF8X_NML_ELIDE_QUOTES to permit eliding quotation */ /* marks in namelist input a la the Fortran 8X Draft published in */ /* the May 1989 issue of Fortran Forum. */ #ifdef Allow_TYQUAD static longint f__llx; #endif #undef abs #undef min #undef max #include #include "fmt.h" #include "lio.h" #include "fp.h" int l_eof; int (*l_getc)(void); int (*l_ungetc)(int,FILE*); int (*f__lioproc)(ftnint*, char*, ftnlen, ftnint); #define isblnk(x) (f__ltab[x+1]&B) #define issep(x) (f__ltab[x+1]&SX) #define isapos(x) (f__ltab[x+1]&AX) #define isexp(x) (f__ltab[x+1]&EX) #define issign(x) (f__ltab[x+1]&SG) #define iswhit(x) (f__ltab[x+1]&WH) #define SX 1 #define B 2 #define AX 4 #define EX 8 #define SG 16 #define WH 32 static char f__ltab[128+1] = { /* offset one for EOF */ 0, 0,0,AX,0,0,0,0,0,0,WH|B,SX|WH,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, SX|B|WH,0,AX,0,0,0,0,AX,0,0,0,SG,SX,SG,0,SX, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,EX,EX,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, AX,0,0,0,EX,EX,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }; #ifdef ungetc static int un_getc(int x, FILE *f__cf) { return ungetc(x,f__cf); } #else #define un_getc ungetc #endif int t_getc(void) { int ch; if(f__curunit->uend) return(EOF); if((ch=getc(f__cf))!=EOF) return(ch); if(feof(f__cf)) f__curunit->uend = l_eof = 1; return(EOF); } integer e_rsle(void) { int ch; if(f__curunit->uend) return(0); while((ch=t_getc())!='\n') if (ch == EOF) { if(feof(f__cf)) f__curunit->uend = l_eof = 1; return EOF; } return(0); } flag f__lquit; int f__lcount,f__ltype,nml_read; char *f__lchar; double f__lx,f__ly; #define ERR(x) if(n=(x)) return(n) #define GETC(x) (x=(*l_getc)()) #define Ungetc(x,y) (*l_ungetc)(x,y) static int l_R(int poststar, int reqint) { char s[FMAX+EXPMAXDIGS+4]; register int ch; register char *sp, *spe, *sp1; long e, exp; int havenum, havestar, se; if (!poststar) { if (f__lcount > 0) return(0); f__lcount = 1; } #ifdef Allow_TYQUAD f__llx = 0; #endif f__ltype = 0; exp = 0; havestar = 0; retry: sp1 = sp = s; spe = sp + FMAX; havenum = 0; switch(GETC(ch)) { case '-': *sp++ = ch; sp1++; spe++; case '+': GETC(ch); } while(ch == '0') { ++havenum; GETC(ch); } while(isdigit(ch)) { if (sp < spe) *sp++ = ch; else ++exp; GETC(ch); } if (ch == '*' && !poststar) { if (sp == sp1 || exp || *s == '-') { errfl(f__elist->cierr,112,"bad repetition count"); } poststar = havestar = 1; *sp = 0; f__lcount = atoi(s); goto retry; } if (ch == '.') { #ifndef ALLOW_FLOAT_IN_INTEGER_LIST_INPUT if (reqint) errfl(f__elist->cierr,115,"invalid integer"); #endif GETC(ch); if (sp == sp1) while(ch == '0') { ++havenum; --exp; GETC(ch); } while(isdigit(ch)) { if (sp < spe) { *sp++ = ch; --exp; } GETC(ch); } } havenum += sp - sp1; se = 0; if (issign(ch)) goto signonly; if (havenum && isexp(ch)) { #ifndef ALLOW_FLOAT_IN_INTEGER_LIST_INPUT if (reqint) errfl(f__elist->cierr,115,"invalid integer"); #endif GETC(ch); if (issign(ch)) { signonly: if (ch == '-') se = 1; GETC(ch); } if (!isdigit(ch)) { bad: errfl(f__elist->cierr,112,"exponent field"); } e = ch - '0'; while(isdigit(GETC(ch))) { e = 10*e + ch - '0'; if (e > EXPMAX) goto bad; } if (se) exp -= e; else exp += e; } (void) Ungetc(ch, f__cf); if (sp > sp1) { ++havenum; while(*--sp == '0') ++exp; if (exp) sprintf(sp+1, "e%ld", exp); else sp[1] = 0; f__lx = atof(s); #ifdef Allow_TYQUAD if (reqint&2 && (se = sp - sp1 + exp) > 14 && se < 20) { /* Assuming 64-bit longint and 32-bit long. */ if (exp < 0) sp += exp; if (sp1 <= sp) { f__llx = *sp1 - '0'; while(++sp1 <= sp) f__llx = 10*f__llx + (*sp1 - '0'); } while(--exp >= 0) f__llx *= 10; if (*s == '-') f__llx = -f__llx; } #endif } else f__lx = 0.; if (havenum) f__ltype = TYLONG; else switch(ch) { case ',': case '/': break; default: if (havestar && ( ch == ' ' ||ch == '\t' ||ch == '\n')) break; if (nml_read > 1) { f__lquit = 2; return 0; } errfl(f__elist->cierr,112,"invalid number"); } return 0; } static int rd_count(register int ch) { if (ch < '0' || ch > '9') return 1; f__lcount = ch - '0'; while(GETC(ch) >= '0' && ch <= '9') f__lcount = 10*f__lcount + ch - '0'; Ungetc(ch,f__cf); return f__lcount <= 0; } static int l_C(void) { int ch, nml_save; double lz; if(f__lcount>0) return(0); f__ltype=0; GETC(ch); if(ch!='(') { if (nml_read > 1 && (ch < '0' || ch > '9')) { Ungetc(ch,f__cf); f__lquit = 2; return 0; } if (rd_count(ch)) if(!f__cf || !feof(f__cf)) errfl(f__elist->cierr,112,"complex format"); else err(f__elist->cierr,(EOF),"lread"); if(GETC(ch)!='*') { if(!f__cf || !feof(f__cf)) errfl(f__elist->cierr,112,"no star"); else err(f__elist->cierr,(EOF),"lread"); } if(GETC(ch)!='(') { Ungetc(ch,f__cf); return(0); } } else f__lcount = 1; while(iswhit(GETC(ch))); Ungetc(ch,f__cf); nml_save = nml_read; nml_read = 0; if (ch = l_R(1,0)) return ch; if (!f__ltype) errfl(f__elist->cierr,112,"no real part"); lz = f__lx; while(iswhit(GETC(ch))); if(ch!=',') { (void) Ungetc(ch,f__cf); errfl(f__elist->cierr,112,"no comma"); } while(iswhit(GETC(ch))); (void) Ungetc(ch,f__cf); if (ch = l_R(1,0)) return ch; if (!f__ltype) errfl(f__elist->cierr,112,"no imaginary part"); while(iswhit(GETC(ch))); if(ch!=')') errfl(f__elist->cierr,112,"no )"); f__ly = f__lx; f__lx = lz; #ifdef Allow_TYQUAD f__llx = 0; #endif nml_read = nml_save; return(0); } static char nmLbuf[256], *nmL_next; static int (*nmL_getc_save)(void); static int (*nmL_ungetc_save)(int, FILE*); static int nmL_getc(void) { int rv; if (rv = *nmL_next++) return rv; l_getc = nmL_getc_save; l_ungetc = nmL_ungetc_save; return (*l_getc)(); } static int nmL_ungetc(int x, FILE *f) { /* f = f;*/ /* banish non-use warning */ ( void )f; return *--nmL_next = x; } static int Lfinish(int ch, int dot, int *rvp) { char *s, *se; static char what[] = "namelist input"; s = nmLbuf + 2; se = nmLbuf + sizeof(nmLbuf) - 1; *s++ = ch; while(!issep(GETC(ch)) && ch!=EOF) { if (s >= se) { nmLbuf_ovfl: return *rvp = err__fl(f__elist->cierr,131,what); } *s++ = ch; if (ch != '=') continue; if (dot) return *rvp = err__fl(f__elist->cierr,112,what); got_eq: *s = 0; nmL_getc_save = l_getc; l_getc = nmL_getc; nmL_ungetc_save = l_ungetc; l_ungetc = nmL_ungetc; nmLbuf[1] = *(nmL_next = nmLbuf) = ','; *rvp = f__lcount = 0; return 1; } if (dot) goto done; for(;;) { if (s >= se) goto nmLbuf_ovfl; *s++ = ch; if (!isblnk(ch)) break; if (GETC(ch) == EOF) goto done; } if (ch == '=') goto got_eq; done: Ungetc(ch, f__cf); return 0; } static int l_L(void) { int ch, rv, sawdot; if(f__lcount>0) return(0); f__lcount = 1; f__ltype=0; GETC(ch); if(isdigit(ch)) { rd_count(ch); if(GETC(ch)!='*') if(!f__cf || !feof(f__cf)) errfl(f__elist->cierr,112,"no star"); else err(f__elist->cierr,(EOF),"lread"); GETC(ch); } sawdot = 0; if(ch == '.') { sawdot = 1; GETC(ch); } switch(ch) { case 't': case 'T': if (nml_read && Lfinish(ch, sawdot, &rv)) return rv; f__lx=1; break; case 'f': case 'F': if (nml_read && Lfinish(ch, sawdot, &rv)) return rv; f__lx=0; break; default: if(isblnk(ch) || issep(ch) || ch==EOF) { (void) Ungetc(ch,f__cf); return(0); } if (nml_read > 1) { Ungetc(ch,f__cf); f__lquit = 2; return 0; } errfl(f__elist->cierr,112,"logical"); } f__ltype=TYLONG; while(!issep(GETC(ch)) && ch!=EOF); Ungetc(ch, f__cf); return(0); } #define BUFSIZE 128 static int l_CHAR(void) { int ch,size,i; static char rafail[] = "realloc failure"; char quote,*p; if(f__lcount>0) return(0); f__ltype=0; if(f__lchar!=NULL) free(f__lchar); size=BUFSIZE; p=f__lchar = (char *)malloc((unsigned int)size); if(f__lchar == NULL) errfl(f__elist->cierr,113,"no space"); GETC(ch); if(isdigit(ch)) { /* allow Fortran 8x-style unquoted string... */ /* either find a repetition count or the string */ f__lcount = ch - '0'; *p++ = ch; for(i = 1;;) { switch(GETC(ch)) { case '*': if (f__lcount == 0) { f__lcount = 1; #ifndef F8X_NML_ELIDE_QUOTES if (nml_read) goto no_quote; #endif goto noquote; } p = f__lchar; goto have_lcount; case ',': case ' ': case '\t': case '\n': case '/': Ungetc(ch,f__cf); /* no break */ case EOF: f__lcount = 1; f__ltype = TYCHAR; return *p = 0; } if (!isdigit(ch)) { f__lcount = 1; #ifndef F8X_NML_ELIDE_QUOTES if (nml_read) { no_quote: errfl(f__elist->cierr,112, "undelimited character string"); } #endif goto noquote; } *p++ = ch; f__lcount = 10*f__lcount + ch - '0'; if (++i == size) { f__lchar = (char *)realloc(f__lchar, (unsigned int)(size += BUFSIZE)); if(f__lchar == NULL) errfl(f__elist->cierr,113,rafail); p = f__lchar + i; } } } else (void) Ungetc(ch,f__cf); have_lcount: if(GETC(ch)=='\'' || ch=='"') quote=ch; else if(isblnk(ch) || (issep(ch) && ch != '\n') || ch==EOF) { Ungetc(ch,f__cf); return 0; } #ifndef F8X_NML_ELIDE_QUOTES else if (nml_read > 1) { Ungetc(ch,f__cf); f__lquit = 2; return 0; } #endif else { /* Fortran 8x-style unquoted string */ *p++ = ch; for(i = 1;;) { switch(GETC(ch)) { case ',': case ' ': case '\t': case '\n': case '/': Ungetc(ch,f__cf); /* no break */ case EOF: f__ltype = TYCHAR; return *p = 0; } noquote: *p++ = ch; if (++i == size) { f__lchar = (char *)realloc(f__lchar, (unsigned int)(size += BUFSIZE)); if(f__lchar == NULL) errfl(f__elist->cierr,113,rafail); p = f__lchar + i; } } } f__ltype=TYCHAR; for(i=0;;) { while(GETC(ch)!=quote && ch!='\n' && ch!=EOF && ++icierr,113,rafail); p=f__lchar+i-1; *p++ = ch; } else if(ch==EOF) return(EOF); else if(ch=='\n') { if(*(p-1) != '\\') continue; i--; p--; if(++iciunit]; if(a->ciunit>=MXUNIT || a->ciunit<0) err(a->cierr,101,"stler"); f__scale=f__recpos=0; f__elist=a; if(f__curunit->ufd==NULL && fk_open(SEQ,FMT,a->ciunit)) err(a->cierr,102,"lio"); f__cf=f__curunit->ufd; if(!f__curunit->ufmt) err(a->cierr,103,"lio") return(0); } int l_read(ftnint *number, char *ptr, ftnlen len, ftnint type) { #define Ptr ((flex *)ptr) int i,n,ch; doublereal *yy; real *xx; for(i=0;i<*number;i++) { if(f__lquit) return(0); if(l_eof) err(f__elist->ciend, EOF, "list in") if(f__lcount == 0) { f__ltype = 0; for(;;) { GETC(ch); switch(ch) { case EOF: err(f__elist->ciend,(EOF),"list in") case ' ': case '\t': case '\n': continue; case '/': f__lquit = 1; goto loopend; case ',': f__lcount = 1; goto loopend; default: (void) Ungetc(ch, f__cf); goto rddata; } } } rddata: switch((int)type) { case TYINT1: case TYSHORT: case TYLONG: #ifndef ALLOW_FLOAT_IN_INTEGER_LIST_INPUT ERR(l_R(0,1)); break; #endif case TYREAL: case TYDREAL: ERR(l_R(0,0)); break; #ifdef TYQUAD case TYQUAD: n = l_R(0,2); if (n) return n; break; #endif case TYCOMPLEX: case TYDCOMPLEX: ERR(l_C()); break; case TYLOGICAL1: case TYLOGICAL2: case TYLOGICAL: ERR(l_L()); break; case TYCHAR: ERR(l_CHAR()); break; } while (GETC(ch) == ' ' || ch == '\t'); if (ch != ',' || f__lcount > 1) Ungetc(ch,f__cf); loopend: if(f__lquit) return(0); if(f__cf && ferror(f__cf)) { clearerr(f__cf); errfl(f__elist->cierr,errno,"list in"); } if(f__ltype==0) goto bump; switch((int)type) { case TYINT1: case TYLOGICAL1: Ptr->flchar = (char)f__lx; break; case TYLOGICAL2: case TYSHORT: Ptr->flshort = (short)f__lx; break; case TYLOGICAL: case TYLONG: Ptr->flint = (ftnint)f__lx; break; #ifdef Allow_TYQUAD case TYQUAD: if (!(Ptr->fllongint = f__llx)) Ptr->fllongint = f__lx; break; #endif case TYREAL: Ptr->flreal=f__lx; break; case TYDREAL: Ptr->fldouble=f__lx; break; case TYCOMPLEX: xx=(real *)ptr; *xx++ = f__lx; *xx = f__ly; break; case TYDCOMPLEX: yy=(doublereal *)ptr; *yy++ = f__lx; *yy = f__ly; break; case TYCHAR: b_char(f__lchar,ptr,len); break; } bump: if(f__lcount>0) f__lcount--; ptr += len; if (nml_read) nml_read++; } return(0); #undef Ptr } integer s_rsle(cilist *a) { int n; f__reading=1; f__external=1; f__formatted=1; if(n=c_le(a)) return(n); f__lioproc = l_read; f__lquit = 0; f__lcount = 0; l_eof = 0; if(f__curunit->uwrt && f__nowreading(f__curunit)) err(a->cierr,errno,"read start"); if(f__curunit->uend) err(f__elist->ciend,(EOF),"read start"); l_getc = t_getc; l_ungetc = un_getc; f__doend = xrd_SL; return(0); } blis-0.6.1/blastest/f2c/lwrite.c000066400000000000000000000116141360743507500164310ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include #include "f2c.h" #include "fio.h" #include "fmt.h" #include "lio.h" #include "arith.h" ftnint L_len; int f__Aquote; static void donewrec(void) { if (f__recpos) (*f__donewrec)(); } static void lwrt_I(longint n) { char *p; int ndigit, sign; p = f__icvt(n, &ndigit, &sign, 10); if(f__recpos + ndigit >= L_len) donewrec(); PUT(' '); if (sign) PUT('-'); while(*p) PUT(*p++); } static void lwrt_L(ftnint n, ftnlen len) { if(f__recpos+LLOGW>=L_len) donewrec(); wrt_L((Uint *)&n,LLOGW, len); } static void lwrt_A(char *p, ftnlen len) { int a; char *p1, *pe; a = 0; pe = p + len; if (f__Aquote) { a = 3; if (len > 1 && p[len-1] == ' ') { while(--len > 1 && p[len-1] == ' '); pe = p + len; } p1 = p; while(p1 < pe) if (*p1++ == '\'') a++; } if(f__recpos+len+a >= L_len) donewrec(); if (a #ifndef OMIT_BLANK_CC || !f__recpos #endif ) PUT(' '); if (a) { PUT('\''); while(p < pe) { if (*p == '\'') PUT('\''); PUT(*p++); } PUT('\''); } else while(p < pe) PUT(*p++); } static int l_g(char *buf, double n) { register char *b, c, c1; b = buf; *b++ = ' '; if (n < 0) { *b++ = '-'; n = -n; } else *b++ = ' '; if (n == 0) { #ifdef SIGNED_ZEROS if (signbit(n)) *b++ = '-'; #endif *b++ = '0'; *b++ = '.'; *b = 0; goto f__ret; } sprintf(b, LGFMT, n); switch(*b) { #ifndef WANT_LEAD_0 case '0': while(b[0] = b[1]) b++; break; #endif case 'i': case 'I': /* Infinity */ case 'n': case 'N': /* NaN */ while(*++b); break; default: /* Fortran 77 insists on having a decimal point... */ for(;; b++) switch(*b) { case 0: *b++ = '.'; *b = 0; goto f__ret; case '.': while(*++b); goto f__ret; case 'E': for(c1 = '.', c = 'E'; *b = c1; c1 = c, c = *++b); goto f__ret; } } f__ret: return b - buf; } static void l_put(register char *s) { #ifdef KR_headers register void (*pn)() = f__putn; #else register void (*pn)(int) = f__putn; #endif register int c; while(c = *s++) (*pn)(c); } static void lwrt_F(double n) { char buf[LEFBL]; if(f__recpos + l_g(buf,n) >= L_len) donewrec(); l_put(buf); } static void lwrt_C(double a, double b) { char *ba, *bb, bufa[LEFBL], bufb[LEFBL]; int al, bl; al = l_g(bufa, a); for(ba = bufa; *ba == ' '; ba++) --al; bl = l_g(bufb, b) + 1; /* intentionally high by 1 */ for(bb = bufb; *bb == ' '; bb++) --bl; if(f__recpos + al + bl + 3 >= L_len) donewrec(); #ifdef OMIT_BLANK_CC else #endif PUT(' '); PUT('('); l_put(ba); PUT(','); if (f__recpos + bl >= L_len) { (*f__donewrec)(); #ifndef OMIT_BLANK_CC PUT(' '); #endif } l_put(bb); PUT(')'); } int l_write(ftnint *number, char *ptr, ftnlen len, ftnint type) { #define Ptr ((flex *)ptr) int i; longint x; double y,z; real *xx; doublereal *yy; for(i=0;i< *number; i++) { switch((int)type) { default: f__fatal(117,"unknown type in lio"); case TYINT1: x = Ptr->flchar; goto xint; case TYSHORT: x=Ptr->flshort; goto xint; #ifdef Allow_TYQUAD case TYQUAD: x = Ptr->fllongint; goto xint; #endif case TYLONG: x=Ptr->flint; xint: lwrt_I(x); break; case TYREAL: y=Ptr->flreal; goto xfloat; case TYDREAL: y=Ptr->fldouble; xfloat: lwrt_F(y); break; case TYCOMPLEX: xx= &Ptr->flreal; y = *xx++; z = *xx; goto xcomplex; case TYDCOMPLEX: yy = &Ptr->fldouble; y= *yy++; z = *yy; xcomplex: lwrt_C(y,z); break; case TYLOGICAL1: x = Ptr->flchar; goto xlog; case TYLOGICAL2: x = Ptr->flshort; goto xlog; case TYLOGICAL: x = Ptr->flint; xlog: lwrt_L(Ptr->flint, len); break; case TYCHAR: lwrt_A(ptr,len); break; } ptr += len; } return(0); } blis-0.6.1/blastest/f2c/mod.c000066400000000000000000000037241360743507500157050ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif shortint h_mod(const short *a, const short *b) { return( *a % *b); } integer i_mod(const integer *a, const integer *b) { return( *a % *b); } double r_mod(real *x, real *y) { double quotient; if( (quotient = (double)*x / *y) >= 0) quotient = floor(quotient); else quotient = -floor(-quotient); return(*x - (*y) * quotient ); } double d_mod(const doublereal *x, const doublereal *y) { double quotient; if( (quotient = *x / *y) >= 0) quotient = floor(quotient); else quotient = -floor(-quotient); return(*x - (*y) * quotient ); } #ifdef __cplusplus } #endif blis-0.6.1/blastest/f2c/nint.c000066400000000000000000000033231360743507500160710ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif double d_nint(const doublereal *x) { return( (*x)>=0 ? floor(*x + .5) : -floor(.5 - *x) ); } shortint h_nint(const real *x) { return (shortint)(*x >= 0 ? floor(*x + .5) : -floor(.5 - *x)); } integer i_nint(const real *x) { return (integer)(*x >= 0 ? floor(*x + .5) : -floor(.5 - *x)); } double r_nint(real *x) { return( (*x)>=0 ? floor(*x + .5) : -floor(.5 - *x) ); } #ifdef __cplusplus } #endif blis-0.6.1/blastest/f2c/open.c000066400000000000000000000141011360743507500160560ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include #include #include #ifndef NON_UNIX_STDIO #include #endif #ifdef _MSC_VER #define access _access #endif #include "f2c.h" #include "fio.h" const char *f__r_mode[2] = {"rb", "r"}; const char *f__w_mode[4] = {"wb", "w", "r+b", "r+"}; static char f__buf0[400], *f__buf = f__buf0; static int f__buflen = (int)sizeof(f__buf0); static void f__bufadj(int n, int c) { unsigned int len; char *nbuf, *s, *t, *te; if (f__buf == f__buf0) f__buflen = 1024; while(f__buflen <= n) f__buflen <<= 1; len = (unsigned int)f__buflen; if (len != f__buflen || !(nbuf = (char*)malloc(len))) { f__fatal(113, "malloc failure"); } else { s = nbuf; t = f__buf; te = t + c; while (t < te) *s++ = *t++; if (f__buf != f__buf0) free(f__buf); f__buf = nbuf; } } int f__putbuf(int c) { char *s, *se; int n; if (f__hiwater > f__recpos) f__recpos = f__hiwater; n = f__recpos + 1; if (n >= f__buflen) f__bufadj(n, f__recpos); s = f__buf; se = s + f__recpos; if (c) *se++ = c; *se = 0; for(;;) { fputs(s, f__cf); s += strlen(s); if (s >= se) break; /* normally happens the first time */ putc(*s++, f__cf); } return 0; } void x_putc(int c) { if (f__recpos >= f__buflen) f__bufadj(f__recpos, f__buflen); f__buf[f__recpos++] = c; } #define opnerr(f,m,s) {if(f) errno= m; else opn_err(m,s,a); return(m);} static void opn_err(int m, const char *s, olist *a) { if (a->ofnm) { /* supply file name to error message */ if (a->ofnmlen >= f__buflen) f__bufadj((int)a->ofnmlen, 0); g_char(a->ofnm, a->ofnmlen, f__curunit->ufnm = f__buf); } f__fatal(m, s); } integer f_open(olist *a) { unit *b; integer rv; char buf[256], *s; cllist x; int ufmt; FILE *tf; #ifndef NON_UNIX_STDIO int n; #endif f__external = 1; if(a->ounit>=MXUNIT || a->ounit<0) err(a->oerr,101,"open") if (!f__init) f_init(); f__curunit = b = &f__units[a->ounit]; if(b->ufd) { if(a->ofnm==0) { same: if (a->oblnk) b->ublnk = *a->oblnk == 'z' || *a->oblnk == 'Z'; return(0); } #ifdef NON_UNIX_STDIO if (b->ufnm && strlen(b->ufnm) == a->ofnmlen && !strncmp(b->ufnm, a->ofnm, (unsigned)a->ofnmlen)) goto same; #else g_char(a->ofnm,a->ofnmlen,buf); if (f__inode(buf,&n) == b->uinode && n == b->udev) goto same; #endif x.cunit=a->ounit; x.csta=0; x.cerr=a->oerr; if ((rv = f_clos(&x)) != 0) return rv; } b->url = (int)a->orl; b->ublnk = a->oblnk && (*a->oblnk == 'z' || *a->oblnk == 'Z'); if(a->ofm==0) { if(b->url>0) b->ufmt=0; else b->ufmt=1; } else if(*a->ofm=='f' || *a->ofm == 'F') b->ufmt=1; else b->ufmt=0; ufmt = b->ufmt; #ifdef url_Adjust if (b->url && !ufmt) url_Adjust(b->url); #endif if (a->ofnm) { g_char(a->ofnm,a->ofnmlen,buf); if (!buf[0]) opnerr(a->oerr,107,"open") } else sprintf(buf, "fort.%ld", (long)a->ounit); b->uscrtch = 0; b->uend=0; b->uwrt = 0; b->ufd = 0; b->urw = 3; switch(a->osta ? *a->osta : 'u') { case 'o': case 'O': if (access(buf,0)) opnerr(a->oerr,errno,"open") break; case 's': case 'S': b->uscrtch=1; #ifdef HAVE_TMPFILE if (!(b->ufd = tmpfile())) opnerr(a->oerr,errno,"open") b->ufnm = 0; #ifndef NON_UNIX_STDIO b->uinode = b->udev = -1; #endif b->useek = 1; return 0; #else (void) strcpy(buf,"tmp.FXXXXXX"); (void) mktemp(buf); goto replace; #endif case 'n': case 'N': if (!access(buf,0)) opnerr(a->oerr,128,"open") /* no break */ case 'r': /* Fortran 90 replace option */ case 'R': #ifndef HAVE_TMPFILE replace: #endif if (tf = fopen(buf,f__w_mode[0])) fclose(tf); } b->ufnm=(char *) malloc((unsigned int)(strlen(buf)+1)); if(b->ufnm==NULL) opnerr(a->oerr,113,"no space"); (void) strcpy(b->ufnm,buf); if ((s = a->oacc) && b->url) ufmt = 0; if(!(tf = fopen(buf, f__w_mode[ufmt|2]))) { if (tf = fopen(buf, f__r_mode[ufmt])) b->urw = 1; else if (tf = fopen(buf, f__w_mode[ufmt])) { b->uwrt = 1; b->urw = 2; } else err(a->oerr, errno, "open"); } b->useek = f__canseek(b->ufd = tf); #ifndef NON_UNIX_STDIO if((b->uinode = f__inode(buf,&b->udev)) == -1) opnerr(a->oerr,108,"open") #endif if(b->useek) if (a->orl) rewind(b->ufd); else if ((s = a->oacc) && (*s == 'a' || *s == 'A') && FSEEK(b->ufd, 0L, SEEK_END)) opnerr(a->oerr,129,"open"); return(0); } int fk_open(int seq, int fmt, ftnint n) { char nbuf[10]; olist a; // FGVZ: gcc 7.3 outputs a warning that the integer value corresponding // to the "%ld" format specifier could (in theory) use up 11 bytes in a // string that only allows for five additional bytes. I use the modulo // operator to reassure gcc that the integer will be very small. //(void) sprintf(nbuf,"fort.%ld",(long)n); (void) sprintf(nbuf,"fort.%ld",(long)n % 20); a.oerr=1; a.ounit=n; a.ofnm=nbuf; a.ofnmlen=strlen(nbuf); a.osta=NULL; a.oacc= (char*)(seq==SEQ?"s":"d"); a.ofm = (char*)(fmt==FMT?"f":"u"); a.orl = seq==DIR?1:0; a.oblnk=NULL; return(f_open(&a)); } blis-0.6.1/blastest/f2c/pow.c000066400000000000000000000066141360743507500157340ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif /* Integer */ shortint pow_hh(shortint *ap, shortint *bp) { return (shortint)(pow(*ap, *bp)); } integer pow_ii(integer *ap, integer *bp) { return (integer)(pow(*ap, *bp)); } #ifdef INTEGER_STAR_8 longint pow_qq(longint *ap, longint *bp) { return (longint)(pow(*ap, *bp)); } #endif /* Double */ double pow_ri(real *ap, integer *bp) { return (pow(*ap, *bp)); } double pow_dd(doublereal *ap, doublereal *bp) { return (pow(*ap, *bp)); } double pow_di(doublereal *ap, integer *bp) { return (pow(*ap, *bp)); } /* Complex */ void pow_ci(complex *p, complex *a, integer *b) { doublecomplex p1, a1; a1.r = a->r; a1.i = a->i; pow_zi(&p1, &a1, b); p->r = p1.r; p->i = p1.i; } void pow_zz(doublecomplex *r, doublecomplex *a, doublecomplex *b) { double logr, logi, x, y; logr = log( hypot(a->r, a->i) ); logi = atan2(a->i, a->r); x = exp( logr * b->r - logi * b->i ); y = logr * b->i + logi * b->r; r->r = x * cos(y); r->i = x * sin(y); } void pow_zi(doublecomplex *p, doublecomplex *a, integer *b) { integer n; unsigned long u; double t; doublecomplex q, x; static doublecomplex one = {1.0, 0.0}; n = *b; q.r = 1; q.i = 0; if(n == 0) goto done; if(n < 0) { n = -n; z_div(&x, &one, a); } else { x.r = a->r; x.i = a->i; } for(u = n; ; ) { if(u & 01) { t = q.r * x.r - q.i * x.i; q.i = q.r * x.i + q.i * x.r; q.r = t; } if(u >>= 1) { t = x.r * x.r - x.i * x.i; x.i = 2 * x.r * x.i; x.r = t; } else break; } done: p->i = q.i; p->r = q.r; } #ifdef __cplusplus } #endif blis-0.6.1/blastest/f2c/prod.c000066400000000000000000000025761360743507500160760ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif double d_prod(const real *x, const real *y) { return( (double)(*x) * (double)(*y) ); } #ifdef __cplusplus } #endif blis-0.6.1/blastest/f2c/rdfmt.c000066400000000000000000000220711360743507500162360ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include #include #include #include "f2c.h" #include "fio.h" #include "fmt.h" #include "fp.h" static int rd_Z(Uint *n, int w, ftnlen len) { long x[9]; char *s, *s0, *s1, *se, *t; const char *sc; int ch, i, w1, w2; static char hex[256]; static int one = 1; int bad = 0; if (!hex['0']) { sc = "0123456789"; while(ch = *sc++) hex[ch] = ch - '0' + 1; sc = "ABCDEF"; while(ch = *sc++) hex[ch] = hex[ch + 'a' - 'A'] = ch - 'A' + 11; } s = s0 = (char *)x; s1 = (char *)&x[4]; se = (char *)&x[8]; if (len > 4*sizeof(long)) return errno = 117; while (w) { GET(ch); if (ch==',' || ch=='\n') break; w--; if (ch > ' ') { if (!hex[ch & 0xff]) bad++; *s++ = ch; if (s == se) { /* discard excess characters */ for(t = s0, s = s1; t < s1;) *t++ = *s++; s = s1; } } } if (bad) return errno = 115; w = (int)len; w1 = s - s0; w2 = w1+1 >> 1; t = (char *)n; if (*(char *)&one) { /* little endian */ t += w - 1; i = -1; } else i = 1; for(; w > w2; t += i, --w) *t = 0; if (!w) return 0; if (w < w2) s0 = s - (w << 1); else if (w1 & 1) { *t = hex[*s0++ & 0xff] - 1; if (!--w) return 0; t += i; } do { *t = hex[*s0 & 0xff]-1 << 4 | hex[s0[1] & 0xff]-1; t += i; s0 += 2; } while(--w); return 0; } static int rd_I(Uint *n, int w, ftnlen len, register int base) { int ch, sign; longint x = 0; if (w <= 0) goto have_x; for(;;) { GET(ch); if (ch != ' ') break; if (!--w) goto have_x; } sign = 0; switch(ch) { case ',': case '\n': w = 0; goto have_x; case '-': sign = 1; case '+': break; default: if (ch >= '0' && ch <= '9') { x = ch - '0'; break; } goto have_x; } while(--w) { GET(ch); if (ch >= '0' && ch <= '9') { x = x*base + ch - '0'; continue; } if (ch != ' ') { if (ch == '\n' || ch == ',') w = 0; break; } if (f__cblank) x *= base; } if (sign) x = -x; have_x: if(len == sizeof(integer)) n->il=x; else if(len == sizeof(char)) n->ic = (char)x; #ifdef Allow_TYQUAD else if (len == sizeof(longint)) n->ili = x; #endif else n->is = (short)x; if (w) { while(--w) GET(ch); return errno = 115; } return 0; } static int rd_L(ftnint *n, int w, ftnlen len) { int ch, dot, lv; if (w <= 0) goto bad; for(;;) { GET(ch); --w; if (ch != ' ') break; if (!w) goto bad; } dot = 0; retry: switch(ch) { case '.': if (dot++ || !w) goto bad; GET(ch); --w; goto retry; case 't': case 'T': lv = 1; break; case 'f': case 'F': lv = 0; break; default: bad: for(; w > 0; --w) GET(ch); /* no break */ case ',': case '\n': return errno = 116; } switch(len) { case sizeof(char): *(char *)n = (char)lv; break; case sizeof(short): *(short *)n = (short)lv; break; default: *n = lv; } while(w-- > 0) { GET(ch); if (ch == ',' || ch == '\n') break; } return 0; } static int rd_F(ufloat *p, int w, int d, ftnlen len) { char s[FMAX+EXPMAXDIGS+4]; register int ch; register char *sp, *spe, *sp1; double x; int scale1, se; long e, exp; sp1 = sp = s; spe = sp + FMAX; exp = -d; x = 0.; do { GET(ch); w--; } while (ch == ' ' && w); switch(ch) { case '-': *sp++ = ch; sp1++; spe++; case '+': if (!w) goto zero; --w; GET(ch); } while(ch == ' ') { blankdrop: if (!w--) goto zero; GET(ch); } while(ch == '0') { if (!w--) goto zero; GET(ch); } if (ch == ' ' && f__cblank) goto blankdrop; scale1 = f__scale; while(isdigit(ch)) { digloop1: if (sp < spe) *sp++ = ch; else ++exp; digloop1e: if (!w--) goto done; GET(ch); } if (ch == ' ') { if (f__cblank) { ch = '0'; goto digloop1; } goto digloop1e; } if (ch == '.') { exp += d; if (!w--) goto done; GET(ch); if (sp == sp1) { /* no digits yet */ while(ch == '0') { skip01: --exp; skip0: if (!w--) goto done; GET(ch); } if (ch == ' ') { if (f__cblank) goto skip01; goto skip0; } } while(isdigit(ch)) { digloop2: if (sp < spe) { *sp++ = ch; --exp; } digloop2e: if (!w--) goto done; GET(ch); } if (ch == ' ') { if (f__cblank) { ch = '0'; goto digloop2; } goto digloop2e; } } switch(ch) { default: break; case '-': se = 1; goto signonly; case '+': se = 0; goto signonly; case 'e': case 'E': case 'd': case 'D': if (!w--) goto bad; GET(ch); while(ch == ' ') { if (!w--) goto bad; GET(ch); } se = 0; switch(ch) { case '-': se = 1; case '+': signonly: if (!w--) goto bad; GET(ch); } while(ch == ' ') { if (!w--) goto bad; GET(ch); } if (!isdigit(ch)) goto bad; e = ch - '0'; for(;;) { if (!w--) { ch = '\n'; break; } GET(ch); if (!isdigit(ch)) { if (ch == ' ') { if (f__cblank) ch = '0'; else continue; } else break; } e = 10*e + ch - '0'; if (e > EXPMAX && sp > sp1) goto bad; } if (se) exp -= e; else exp += e; scale1 = 0; } switch(ch) { case '\n': case ',': break; default: bad: return (errno = 115); } done: if (sp > sp1) { while(*--sp == '0') ++exp; if (exp -= scale1) sprintf(sp+1, "e%ld", exp); else sp[1] = 0; x = atof(s); } zero: if (len == sizeof(real)) p->pf = x; else p->pd = x; return(0); } static int rd_A(char *p, ftnlen len) { int i,ch; for(i=0;i=len) { for(i=0;i0;f__cursor--) if((ch=(*f__getn)())<0) return(ch); if(f__cursor<0) { if(f__recpos+f__cursor < 0) /*err(elist->cierr,110,"fmt")*/ f__cursor = -f__recpos; /* is this in the standard? */ if(f__external == 0) { extern char *f__icptr; f__icptr += f__cursor; } else if(f__curunit && f__curunit->useek) (void) FSEEK(f__cf, f__cursor,SEEK_CUR); else err(f__elist->cierr,106,"fmt"); f__recpos += f__cursor; f__cursor=0; } switch(p->op) { default: fprintf(stderr,"rd_ed, unexpected code: %d\n", p->op); sig_die(f__fmtbuf, 1); case IM: case I: ch = rd_I((Uint *)ptr,p->p1,len, 10); break; /* O and OM don't work right for character, double, complex, */ /* or doublecomplex, and they differ from Fortran 90 in */ /* showing a minus sign for negative values. */ case OM: case O: ch = rd_I((Uint *)ptr, p->p1, len, 8); break; case L: ch = rd_L((ftnint *)ptr,p->p1,len); break; case A: ch = rd_A(ptr,len); break; case AW: ch = rd_AW(ptr,p->p1,len); break; case E: case EE: case D: case G: case GE: case F: ch = rd_F((ufloat *)ptr,p->p1,p->p2.i[0],len); break; /* Z and ZM assume 8-bit bytes. */ case ZM: case Z: ch = rd_Z((Uint *)ptr, p->p1, len); break; } if(ch == 0) return(ch); else if(ch == EOF) return(EOF); if (f__cf) clearerr(f__cf); return(errno); } int rd_ned(struct syl *p) { switch(p->op) { default: fprintf(stderr,"rd_ned, unexpected code: %d\n", p->op); sig_die(f__fmtbuf, 1); case APOS: return(rd_POS(p->p2.s)); case H: return(rd_H(p->p1,p->p2.s)); case SLASH: return((*f__donewrec)()); case TR: case X: f__cursor += p->p1; return(1); case T: f__cursor=p->p1-f__recpos - 1; return(1); case TL: f__cursor -= p->p1; if(f__cursor < -f__recpos) /* TL1000, 1X */ f__cursor = -f__recpos; return(1); } } blis-0.6.1/blastest/f2c/rewind.c000066400000000000000000000030601360743507500164070ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ //#include #include "f2c.h" #include "fio.h" integer f_rew(alist *a) { unit *b; if(a->aunit>=MXUNIT || a->aunit<0) err(a->aerr,101,"rewind"); b = &f__units[a->aunit]; if(b->ufd == NULL || b->uwrt == 3) return(0); if(!b->useek) err(a->aerr,106,"rewind") if(b->uwrt) { (void) t_runc(a); b->uwrt = 3; } rewind(b->ufd); b->uend=0; return 0; } blis-0.6.1/blastest/f2c/rsfe.c000066400000000000000000000050211360743507500160550ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ /* read sequential formatted external */ #include #include "f2c.h" #include "fio.h" #include "fmt.h" int xrd_SL(void) { int ch; if(!f__curunit->uend) while((ch=getc(f__cf))!='\n') if (ch == EOF) { f__curunit->uend = 1; break; } f__cursor=f__recpos=0; return 1; } int x_getc(void) { int ch; if(f__curunit->uend) return EOF; ch = getc(f__cf); if(ch!=EOF && ch!='\n') { f__recpos++; return ch; } if(ch=='\n') { (void) ungetc(ch,f__cf); return ch; } if(f__curunit->uend || feof(f__cf)) { errno=0; f__curunit->uend=1; return -1; } return -1; } int x_endp(void) { xrd_SL(); return f__curunit->uend == 1 ? EOF : 0; } int x_rev(void) { (void) xrd_SL(); return 0; } integer s_rsfe(cilist *a) /* start */ { int n; if(!f__init) f_init(); f__reading=1; f__sequential=1; f__formatted=1; f__external=1; if(n=c_sfe(a)) return n; f__elist=a; f__cursor=f__recpos=0; f__scale=0; f__fmtbuf=a->cifmt; f__cf=f__curunit->ufd; if(pars_f(f__fmtbuf)<0) err(a->cierr,100,"startio"); f__getn= x_getc; f__doed= rd_ed; f__doned= rd_ned; fmt_bg(); f__doend=x_endp; f__donewrec=xrd_SL; f__dorevert=x_rev; f__cblank=f__curunit->ublnk; f__cplus=0; if(f__curunit->uwrt && f__nowreading(f__curunit)) err(a->cierr,errno,"read start"); if(f__curunit->uend) err(f__elist->ciend,(EOF),"read start"); return 0; } blis-0.6.1/blastest/f2c/s_cmp.c000066400000000000000000000044241360743507500162250ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif /* compare two strings */ integer s_cmp(const char *a0, const char *b0, ftnlen la, ftnlen lb) { register unsigned char *a, *aend, *b, *bend; a = (unsigned char *)a0; b = (unsigned char *)b0; aend = a + la; bend = b + lb; if(la <= lb) { while(a < aend) if(*a != *b) return( *a - *b ); else { ++a; ++b; } while(b < bend) if(*b != ' ') return( ' ' - *b ); else ++b; } else { while(b < bend) if(*a == *b) { ++a; ++b; } else return( *a - *b ); while(a < aend) if(*a != ' ') return(*a - ' '); else ++a; } return(0); } #ifdef __cplusplus } #endif blis-0.6.1/blastest/f2c/s_copy.c000066400000000000000000000034021360743507500164130ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ /* Unless compiled with -DNO_OVERWRITE, this variant of s_copy allows the * target of an assignment to appear on its right-hand side (contrary * to the Fortran 77 Standard, but in accordance with Fortran 90), * as in a(2:5) = a(4:7) . */ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif /* assign strings: a = b */ int s_copy(char *a, const char *b, ftnlen la, ftnlen lb) { if (la <= lb) { memmove(a, b, la); } else { memset((char *)memmove(a, b, lb) + lb, ' ', la - lb); } return 0; } #ifdef __cplusplus } #endif blis-0.6.1/blastest/f2c/s_stop.c000066400000000000000000000032511360743507500164300ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ //#include #include #include #include "f2c.h" int s_stop(char *s, ftnlen n) { int i; if(n > 0) { fprintf(stderr, "STOP "); for(i = 0; i #include "f2c.h" #include "fio.h" integer e_rsfe(void) { int n; n=en_fio(); f__fmtbuf=NULL; return(n); } int c_sfe(cilist *a) { unit *p; f__curunit = p = &f__units[a->ciunit]; if(a->ciunit >= MXUNIT || a->ciunit<0) err(a->cierr,101,"startio"); if(p->ufd==NULL && fk_open(SEQ,FMT,a->ciunit)) err(a->cierr,114,"sfe") if(!p->ufmt) err(a->cierr,102,"sfe") return(0); } integer e_wsfe(void) { int n = en_fio(); f__fmtbuf = NULL; #ifdef ALWAYS_FLUSH if (!n && fflush(f__cf)) err(f__elist->cierr, errno, "write end"); #endif return n; } blis-0.6.1/blastest/f2c/sig_die.c000066400000000000000000000032231360743507500165230ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include #include #include #include #include "f2c.h" #ifndef SIGIOT #ifdef SIGABRT #define SIGIOT SIGABRT #endif #endif void sig_die(const char *s, int kill) { /* print error message, then clear buffers */ fprintf(stderr, "%s\n", s); if(kill) { fflush(stderr); f_exit(); fflush(stderr); /* now get a core */ #ifdef SIGIOT signal(SIGIOT, SIG_DFL); #endif abort(); } else { f_exit(); exit(1); } } blis-0.6.1/blastest/f2c/sign.c000066400000000000000000000035171360743507500160660ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif shortint h_sign(const shortint *a, const shortint *b) { shortint x = (*a >= 0 ? *a : - *a); return ( *b >= 0 ? x : -x); } integer i_sign(const integer *a, const integer *b) { integer x = (*a >= 0 ? *a : - *a); return ( *b >= 0 ? x : -x); } double r_sign(real *a, real *b) { double x = (*a >= 0 ? *a : - *a); return ( *b >= 0 ? x : -x); } double d_sign(const doublereal *a, const doublereal *b) { double x = (*a >= 0 ? *a : - *a); return ( *b >= 0 ? x : -x); } #ifdef __cplusplus } #endif blis-0.6.1/blastest/f2c/sin.c000066400000000000000000000033511360743507500157130ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif double r_sin(real *x) { return( sin(*x) ); } double d_sin(const doublereal *x) { return( sin(*x) ); } void c_sin(complex *r, complex *z) { double zi = z->i, zr = z->r; r->r = sin(zr) * cosh(zi); r->i = cos(zr) * sinh(zi); } void z_sin(doublecomplex *r, doublecomplex *z) { double zi = z->i, zr = z->r; r->r = sin(zr) * cosh(zi); r->i = cos(zr) * sinh(zi); } #ifdef __cplusplus } #endif blis-0.6.1/blastest/f2c/sinh.c000066400000000000000000000026451360743507500160700ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif double r_sinh(real *x) { return( sinh(*x) ); } double d_sinh(const doublereal *x) { return( sinh(*x) ); } #ifdef __cplusplus } #endif blis-0.6.1/blastest/f2c/sqrt.c000066400000000000000000000030331360743507500161100ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif double r_sqrt(real *x) { return ( sqrt(*x) ); } double d_sqrt(const doublereal *x) { return ( sqrt(*x) ); } void c_sqrt(complex *r, complex *z) { } void z_sqrt(doublecomplex *r, doublecomplex *z) { } #ifdef __cplusplus } #endif blis-0.6.1/blastest/f2c/tan.c000066400000000000000000000026411360743507500157050ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif double r_tan(real *x) { return( tan(*x) ); } double d_tan(const doublereal *x) { return( tan(*x) ); } #ifdef __cplusplus } #endif blis-0.6.1/blastest/f2c/tanh.c000066400000000000000000000026451360743507500160610ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include "f2c.h" #ifdef __cplusplus extern "C" { #endif double r_tanh(real *x) { return( tanh(*x) ); } double d_tanh(const doublereal *x) { return( tanh(*x) ); } #ifdef __cplusplus } #endif blis-0.6.1/blastest/f2c/util.c000066400000000000000000000033751360743507500161050ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include #include "f2c.h" #include "fio.h" void g_char(const char *a, ftnlen alen, char *b) { const char *x = a + alen; char *y = b + alen; for(;; y--) { if (x <= a) { *b = 0; return; } if (*--x != ' ') break; } *y-- = 0; do *y-- = *x; while(x-- > a); } void b_char(const char *a, char *b, ftnlen blen) { int i; for(i=0;i #include #include #include #include "f2c.h" #include "fio.h" #include "arith.h" #include "fmt.h" #include "fp.h" int wrt_E(ufloat *p, int w, int d, int e, ftnlen len) { char buf[FMAX+EXPMAXDIGS+4], *s, *se; int d1, delta, e1, i, sign, signspace; double dd; #ifdef WANT_LEAD_0 int insert0 = 0; #endif int e0 = e; if(e <= 0) e = 2; if(f__scale) { if(f__scale >= d + 2 || f__scale <= -d) goto nogood; } if(f__scale <= 0) --d; if (len == sizeof(real)) dd = p->pf; else dd = p->pd; if (dd < 0.) { signspace = sign = 1; dd = -dd; } else { sign = 0; signspace = (int)f__cplus; if (!dd) { #ifdef SIGNED_ZEROS if (signbit(dd)) signspace = sign = 1; #endif dd = 0.; /* avoid -0 */ } } delta = w - (2 /* for the . and the d adjustment above */ + 2 /* for the E+ */ + signspace + d + e); #ifdef WANT_LEAD_0 if (f__scale <= 0 && delta > 0) { delta--; insert0 = 1; } else #endif if (delta < 0) { nogood: while(--w >= 0) PUT('*'); return(0); } if (f__scale < 0) d += f__scale; if (d > FMAX) { d1 = d - FMAX; d = FMAX; } else d1 = 0; sprintf(buf,"%#.*E", d, dd); /* check for NaN, Infinity */ if (!isdigit(buf[0])) { switch(buf[0]) { case 'n': case 'N': signspace = 0; /* no sign for NaNs */ } delta = w - strlen(buf) - signspace; if (delta < 0) goto nogood; while(--delta >= 0) PUT(' '); if (signspace) PUT(sign ? '-' : '+'); for(s = buf; *s; s++) PUT(*s); return 0; } se = buf + d + 3; #ifdef GOOD_SPRINTF_EXPONENT /* When possible, exponent has 2 digits. */ if (f__scale != 1 && dd) sprintf(se, "%+.2d", atoi(se) + 1 - f__scale); #else if (dd) sprintf(se, "%+.2d", atoi(se) + 1 - f__scale); else strcpy(se, "+00"); #endif s = ++se; if (e < 2) { if (*s != '0') goto nogood; } /* accommodate 3 significant digits in exponent */ if (s[2]) { #ifdef Pedantic if (!e0 && !s[3]) for(s -= 2, e1 = 2; s[0] = s[1]; s++); /* Pedantic gives the behavior that Fortran 77 specifies, */ /* i.e., requires that E be specified for exponent fields */ /* of more than 3 digits. With Pedantic undefined, we get */ /* the behavior that Cray displays -- you get a bigger */ /* exponent field if it fits. */ #else if (!e0) { for(s -= 2, e1 = 2; s[0] = s[1]; s++) #ifdef CRAY delta--; if ((delta += 4) < 0) goto nogood #endif ; } #endif else if (e0 >= 0) goto shift; else e1 = e; } else shift: for(s += 2, e1 = 2; *s; ++e1, ++s) if (e1 >= e) goto nogood; while(--delta >= 0) PUT(' '); if (signspace) PUT(sign ? '-' : '+'); s = buf; i = f__scale; if (f__scale <= 0) { #ifdef WANT_LEAD_0 if (insert0) PUT('0'); #endif PUT('.'); for(; i < 0; ++i) PUT('0'); PUT(*s); s += 2; } else if (f__scale > 1) { PUT(*s); s += 2; while(--i > 0) PUT(*s++); PUT('.'); } if (d1) { se -= 2; while(s < se) PUT(*s++); se += 2; do PUT('0'); while(--d1 > 0); } while(s < se) PUT(*s++); if (e < 2) PUT(s[1]); else { while(++e1 <= e) PUT('0'); while(*s) PUT(*s++); } return 0; } int wrt_F(ufloat *p, int w, int d, ftnlen len) { int d1, sign, n; double x; char *b, buf[MAXINTDIGS+MAXFRACDIGS+4], *s; x= (len==sizeof(real)?p->pf:p->pd); if (d < MAXFRACDIGS) d1 = 0; else { d1 = d - MAXFRACDIGS; d = MAXFRACDIGS; } if (x < 0.) { x = -x; sign = 1; } else { sign = 0; if (!x) { #ifdef SIGNED_ZEROS if (signbit(x)) sign = 2; #endif x = 0.; } } if (n = f__scale) if (n > 0) do x *= 10.; while(--n > 0); else do x *= 0.1; while(++n < 0); n = sprintf(b = buf, "%#.*f", d, x) + d1; #ifndef WANT_LEAD_0 if (buf[0] == '0' && d) { ++b; --n; } #endif if (sign == 1) { /* check for all zeros */ for(s = b;;) { while(*s == '0') s++; switch(*s) { case '.': s++; continue; case 0: sign = 0; } break; } } if (sign || f__cplus) ++n; if (n > w) { #ifdef WANT_LEAD_0 if (buf[0] == '0' && --n == w) ++b; else #endif { while(--w >= 0) PUT('*'); return 0; } } for(w -= n; --w >= 0; ) PUT(' '); if (sign) PUT('-'); else if (f__cplus) PUT('+'); while(n = *b++) PUT(n); while(--d1 >= 0) PUT('0'); return 0; } blis-0.6.1/blastest/f2c/wrtfmt.c000066400000000000000000000172341360743507500164520ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include #include "f2c.h" #include "fio.h" #include "fmt.h" static int mv_cur(void) /* shouldn't use fseek because it insists on calling fflush */ /* instead we know too much about stdio */ { int cursor = f__cursor; f__cursor = 0; if(f__external == 0) { if(cursor < 0) { if(f__hiwater < f__recpos) f__hiwater = f__recpos; f__recpos += cursor; f__icptr += cursor; if(f__recpos < 0) err(f__elist->cierr, 110, "left off"); } else if(cursor > 0) { if(f__recpos + cursor >= f__svic->icirlen) err(f__elist->cierr, 110, "recend"); if(f__hiwater <= f__recpos) for(; cursor > 0; cursor--) (*f__putn)(' '); else if(f__hiwater <= f__recpos + cursor) { cursor -= f__hiwater - f__recpos; f__icptr += f__hiwater - f__recpos; f__recpos = f__hiwater; for(; cursor > 0; cursor--) (*f__putn)(' '); } else { f__icptr += cursor; f__recpos += cursor; } } return(0); } if (cursor > 0) { if(f__hiwater <= f__recpos) for(;cursor>0;cursor--) (*f__putn)(' '); else if(f__hiwater <= f__recpos + cursor) { cursor -= f__hiwater - f__recpos; f__recpos = f__hiwater; for(; cursor > 0; cursor--) (*f__putn)(' '); } else { f__recpos += cursor; } } else if (cursor < 0) { if(cursor + f__recpos < 0) err(f__elist->cierr,110,"left off"); if(f__hiwater < f__recpos) f__hiwater = f__recpos; f__recpos += cursor; } return(0); } static int wrt_Z(Uint *n, int w, int minlen, ftnlen len) { register char *s, *se; register int i, w1; static int one = 1; static char hex[] = "0123456789ABCDEF"; s = (char *)n; --len; if (*(char *)&one) { /* little endian */ se = s; s += len; i = -1; } else { se = s + len; i = 1; } for(;; s += i) if (s == se || *s) break; w1 = (i*(se-s) << 1) + 1; if (*s & 0xf0) w1++; if (w1 > w) for(i = 0; i < w; i++) (*f__putn)('*'); else { if ((minlen -= w1) > 0) w1 += minlen; while(--w >= w1) (*f__putn)(' '); while(--minlen >= 0) (*f__putn)('0'); if (!(*s & 0xf0)) { (*f__putn)(hex[*s & 0xf]); if (s == se) return 0; s += i; } for(;; s += i) { (*f__putn)(hex[*s >> 4 & 0xf]); (*f__putn)(hex[*s & 0xf]); if (s == se) break; } } return 0; } static int wrt_I(Uint *n, int w, ftnlen len, register int base) { int ndigit,sign,spare,i; longint x; char *ans; if(len==sizeof(integer)) x=n->il; else if(len == sizeof(char)) x = n->ic; #ifdef Allow_TYQUAD else if (len == sizeof(longint)) x = n->ili; #endif else x=n->is; ans=f__icvt(x,&ndigit,&sign, base); spare=w-ndigit; if(sign || f__cplus) spare--; if(spare<0) for(i=0;iil; else if(len == sizeof(char)) x = n->ic; #ifdef Allow_TYQUAD else if (len == sizeof(longint)) x = n->ili; #endif else x=n->is; ans=f__icvt(x,&ndigit,&sign, base); if(sign || f__cplus) xsign=1; else xsign=0; if(ndigit+xsign>w || m+xsign>w) { for(i=0;i=m) spare=w-ndigit-xsign; else spare=w-m-xsign; for(i=0;iil; else if(sz == sizeof(char)) x = n->ic; else x=n->is; for(i=0;i 0) (*f__putn)(*p++); return(0); } static int wrt_AW(char * p, int w, ftnlen len) { while(w>len) { w--; (*f__putn)(' '); } while(w-- > 0) (*f__putn)(*p++); return(0); } static int wrt_G(ufloat *p, int w, int d, int e, ftnlen len) { double up = 1,x; int i=0,oldscale,n,j; x = len==sizeof(real)?p->pf:p->pd; if(x < 0 ) x = -x; if(x<.1) { if (x != 0.) return(wrt_E(p,w,d,e,len)); i = 1; goto have_i; } for(;i<=d;i++,up*=10) { if(x>=up) continue; have_i: oldscale = f__scale; f__scale = 0; if(e==0) n=4; else n=e+2; i=wrt_F(p,w-n,d-i,len); for(j=0;jop) { default: fprintf(stderr,"w_ed, unexpected code: %d\n", p->op); sig_die(f__fmtbuf, 1); case I: return(wrt_I((Uint *)ptr,p->p1,len, 10)); case IM: return(wrt_IM((Uint *)ptr,p->p1,p->p2.i[0],len,10)); /* O and OM don't work right for character, double, complex, */ /* or doublecomplex, and they differ from Fortran 90 in */ /* showing a minus sign for negative values. */ case O: return(wrt_I((Uint *)ptr, p->p1, len, 8)); case OM: return(wrt_IM((Uint *)ptr,p->p1,p->p2.i[0],len,8)); case L: return(wrt_L((Uint *)ptr,p->p1, len)); case A: return(wrt_A(ptr,len)); case AW: return(wrt_AW(ptr,p->p1,len)); case D: case E: case EE: return(wrt_E((ufloat *)ptr,p->p1,p->p2.i[0],p->p2.i[1],len)); case G: case GE: return(wrt_G((ufloat *)ptr,p->p1,p->p2.i[0],p->p2.i[1],len)); case F: return(wrt_F((ufloat *)ptr,p->p1,p->p2.i[0],len)); /* Z and ZM assume 8-bit bytes. */ case Z: return(wrt_Z((Uint *)ptr,p->p1,0,len)); case ZM: return(wrt_Z((Uint *)ptr,p->p1,p->p2.i[0],len)); } } int w_ned(struct syl *p) { switch(p->op) { default: fprintf(stderr,"w_ned, unexpected code: %d\n", p->op); sig_die(f__fmtbuf, 1); case SLASH: return((*f__donewrec)()); case T: f__cursor = p->p1-f__recpos - 1; return(1); case TL: f__cursor -= p->p1; if(f__cursor < -f__recpos) /* TL1000, 1X */ f__cursor = -f__recpos; return(1); case TR: case X: f__cursor += p->p1; return(1); case APOS: return(wrt_AP(p->p2.s)); case H: return(wrt_H(p->p1,p->p2.s)); } } blis-0.6.1/blastest/f2c/wsfe.c000066400000000000000000000045101360743507500160640ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ /*write sequential formatted external*/ #include #include "f2c.h" #include "fio.h" #include "fmt.h" int x_wSL(void) { int n = f__putbuf('\n'); f__hiwater = f__recpos = f__cursor = 0; return(n == 0); } static int xw_end(void) { int n; if(f__nonl) { f__putbuf(n = 0); fflush(f__cf); } else n = f__putbuf('\n'); f__hiwater = f__recpos = f__cursor = 0; return n; } static int xw_rev(void) { int n = 0; if(f__workdone) { n = f__putbuf('\n'); f__workdone = 0; } f__hiwater = f__recpos = f__cursor = 0; return n; } integer s_wsfe(cilist *a) /*start*/ { int n; if(!f__init) f_init(); f__reading=0; f__sequential=1; f__formatted=1; f__external=1; if(n=c_sfe(a)) return(n); f__elist=a; f__hiwater = f__cursor=f__recpos=0; f__nonl = 0; f__scale=0; f__fmtbuf=a->cifmt; f__cf=f__curunit->ufd; if(pars_f(f__fmtbuf)<0) err(a->cierr,100,"startio"); f__putn= x_putc; f__doed= w_ed; f__doned= w_ned; f__doend=xw_end; f__dorevert=xw_rev; f__donewrec=x_wSL; fmt_bg(); f__cplus=0; f__cblank=f__curunit->ublnk; if(f__curunit->uwrt != 1 && f__nowwriting(f__curunit)) err(a->cierr,errno,"write start"); return(0); } blis-0.6.1/blastest/f2c/wsle.c000066400000000000000000000034141360743507500160740ustar00rootroot00000000000000/**************************************************************** Copyright 1990 - 1997 by AT&T, Lucent Technologies and Bellcore. Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appear in all copies and that both that the copyright notice and this permission notice and warranty disclaimer appear in supporting documentation, and that the names of AT&T, Bell Laboratories, Lucent or Bellcore or any of their entities not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. AT&T, Lucent and Bellcore disclaim all warranties with regard to this software, including all implied warranties of merchantability and fitness. In no event shall AT&T, Lucent or Bellcore be liable for any special, indirect or consequential damages or any damages whatsoever resulting from loss of use, data or profits, whether in an action of contract, negligence or other tortious action, arising out of or in connection with the use or performance of this software. ****************************************************************/ #include #include #include "f2c.h" #include "fio.h" #include "fmt.h" #include "lio.h" integer s_wsle(cilist *a) { int n; if(n=c_le(a)) return(n); f__reading=0; f__external=1; f__formatted=1; f__putn = x_putc; f__lioproc = l_write; L_len = LINE; f__donewrec = x_wSL; if(f__curunit->uwrt != 1 && f__nowwriting(f__curunit)) err(a->cierr, errno, "list output start"); return(0); } integer e_wsle(void) { int n = f__putbuf('\n'); f__recpos=0; #ifdef ALWAYS_FLUSH if (!n && fflush(f__cf)) err(f__elist->cierr, errno, "write end"); #endif return(n); } blis-0.6.1/blastest/input/000077500000000000000000000000001360743507500154415ustar00rootroot00000000000000blis-0.6.1/blastest/input/cblat2.in000066400000000000000000000030121360743507500171340ustar00rootroot00000000000000'out.cblat2' NAME OF SUMMARY OUTPUT FILE 6 UNIT NUMBER OF SUMMARY FILE 'CBLA2T.SNAP' NAME OF SNAPSHOT OUTPUT FILE -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. F LOGICAL FLAG, T TO STOP ON FAILURES. T LOGICAL FLAG, T TO TEST ERROR EXITS. 16.0 THRESHOLD VALUE OF TEST RATIO 6 NUMBER OF VALUES OF N 0 1 2 3 5 9 VALUES OF N 4 NUMBER OF VALUES OF K 0 1 2 4 VALUES OF K 4 NUMBER OF VALUES OF INCX AND INCY 1 2 -1 -2 VALUES OF INCX AND INCY 3 NUMBER OF VALUES OF ALPHA (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA 3 NUMBER OF VALUES OF BETA (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA CGEMV T PUT F FOR NO TEST. SAME COLUMNS. CGBMV T PUT F FOR NO TEST. SAME COLUMNS. CHEMV T PUT F FOR NO TEST. SAME COLUMNS. CHBMV T PUT F FOR NO TEST. SAME COLUMNS. CHPMV T PUT F FOR NO TEST. SAME COLUMNS. CTRMV T PUT F FOR NO TEST. SAME COLUMNS. CTBMV T PUT F FOR NO TEST. SAME COLUMNS. CTPMV T PUT F FOR NO TEST. SAME COLUMNS. CTRSV T PUT F FOR NO TEST. SAME COLUMNS. CTBSV T PUT F FOR NO TEST. SAME COLUMNS. CTPSV T PUT F FOR NO TEST. SAME COLUMNS. CGERC T PUT F FOR NO TEST. SAME COLUMNS. CGERU T PUT F FOR NO TEST. SAME COLUMNS. CHER T PUT F FOR NO TEST. SAME COLUMNS. CHPR T PUT F FOR NO TEST. SAME COLUMNS. CHER2 T PUT F FOR NO TEST. SAME COLUMNS. CHPR2 T PUT F FOR NO TEST. SAME COLUMNS. blis-0.6.1/blastest/input/cblat3.in000066400000000000000000000020261360743507500171410ustar00rootroot00000000000000'out.cblat3' NAME OF SUMMARY OUTPUT FILE 6 UNIT NUMBER OF SUMMARY FILE 'CBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. F LOGICAL FLAG, T TO STOP ON FAILURES. T LOGICAL FLAG, T TO TEST ERROR EXITS. 16.0 THRESHOLD VALUE OF TEST RATIO 6 NUMBER OF VALUES OF N 0 1 2 3 5 9 VALUES OF N 3 NUMBER OF VALUES OF ALPHA (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA 3 NUMBER OF VALUES OF BETA (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA CGEMM T PUT F FOR NO TEST. SAME COLUMNS. CHEMM T PUT F FOR NO TEST. SAME COLUMNS. CSYMM T PUT F FOR NO TEST. SAME COLUMNS. CTRMM T PUT F FOR NO TEST. SAME COLUMNS. CTRSM T PUT F FOR NO TEST. SAME COLUMNS. CHERK T PUT F FOR NO TEST. SAME COLUMNS. CSYRK T PUT F FOR NO TEST. SAME COLUMNS. CHER2K T PUT F FOR NO TEST. SAME COLUMNS. CSYR2K T PUT F FOR NO TEST. SAME COLUMNS. blis-0.6.1/blastest/input/dblat2.in000066400000000000000000000026721360743507500171500ustar00rootroot00000000000000'out.dblat2' NAME OF SUMMARY OUTPUT FILE 6 UNIT NUMBER OF SUMMARY FILE 'DBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. F LOGICAL FLAG, T TO STOP ON FAILURES. T LOGICAL FLAG, T TO TEST ERROR EXITS. 16.0 THRESHOLD VALUE OF TEST RATIO 6 NUMBER OF VALUES OF N 0 1 2 3 5 9 VALUES OF N 4 NUMBER OF VALUES OF K 0 1 2 4 VALUES OF K 4 NUMBER OF VALUES OF INCX AND INCY 1 2 -1 -2 VALUES OF INCX AND INCY 3 NUMBER OF VALUES OF ALPHA 0.0 1.0 0.7 VALUES OF ALPHA 3 NUMBER OF VALUES OF BETA 0.0 1.0 0.9 VALUES OF BETA DGEMV T PUT F FOR NO TEST. SAME COLUMNS. DGBMV T PUT F FOR NO TEST. SAME COLUMNS. DSYMV T PUT F FOR NO TEST. SAME COLUMNS. DSBMV T PUT F FOR NO TEST. SAME COLUMNS. DSPMV T PUT F FOR NO TEST. SAME COLUMNS. DTRMV T PUT F FOR NO TEST. SAME COLUMNS. DTBMV T PUT F FOR NO TEST. SAME COLUMNS. DTPMV T PUT F FOR NO TEST. SAME COLUMNS. DTRSV T PUT F FOR NO TEST. SAME COLUMNS. DTBSV T PUT F FOR NO TEST. SAME COLUMNS. DTPSV T PUT F FOR NO TEST. SAME COLUMNS. DGER T PUT F FOR NO TEST. SAME COLUMNS. DSYR T PUT F FOR NO TEST. SAME COLUMNS. DSPR T PUT F FOR NO TEST. SAME COLUMNS. DSYR2 T PUT F FOR NO TEST. SAME COLUMNS. DSPR2 T PUT F FOR NO TEST. SAME COLUMNS. blis-0.6.1/blastest/input/dblat3.in000066400000000000000000000015621360743507500171460ustar00rootroot00000000000000'out.dblat3' NAME OF SUMMARY OUTPUT FILE 6 UNIT NUMBER OF SUMMARY FILE 'DBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. F LOGICAL FLAG, T TO STOP ON FAILURES. T LOGICAL FLAG, T TO TEST ERROR EXITS. 16.0 THRESHOLD VALUE OF TEST RATIO 6 NUMBER OF VALUES OF N 0 1 2 3 5 9 VALUES OF N 3 NUMBER OF VALUES OF ALPHA 0.0 1.0 0.7 VALUES OF ALPHA 3 NUMBER OF VALUES OF BETA 0.0 1.0 1.3 VALUES OF BETA DGEMM T PUT F FOR NO TEST. SAME COLUMNS. DSYMM T PUT F FOR NO TEST. SAME COLUMNS. DTRMM T PUT F FOR NO TEST. SAME COLUMNS. DTRSM T PUT F FOR NO TEST. SAME COLUMNS. DSYRK T PUT F FOR NO TEST. SAME COLUMNS. DSYR2K T PUT F FOR NO TEST. SAME COLUMNS. blis-0.6.1/blastest/input/sblat2.in000066400000000000000000000026721360743507500171670ustar00rootroot00000000000000'out.sblat2' NAME OF SUMMARY OUTPUT FILE 6 UNIT NUMBER OF SUMMARY FILE 'SBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. F LOGICAL FLAG, T TO STOP ON FAILURES. T LOGICAL FLAG, T TO TEST ERROR EXITS. 16.0 THRESHOLD VALUE OF TEST RATIO 6 NUMBER OF VALUES OF N 0 1 2 3 5 9 VALUES OF N 4 NUMBER OF VALUES OF K 0 1 2 4 VALUES OF K 4 NUMBER OF VALUES OF INCX AND INCY 1 2 -1 -2 VALUES OF INCX AND INCY 3 NUMBER OF VALUES OF ALPHA 0.0 1.0 0.7 VALUES OF ALPHA 3 NUMBER OF VALUES OF BETA 0.0 1.0 0.9 VALUES OF BETA SGEMV T PUT F FOR NO TEST. SAME COLUMNS. SGBMV T PUT F FOR NO TEST. SAME COLUMNS. SSYMV T PUT F FOR NO TEST. SAME COLUMNS. SSBMV T PUT F FOR NO TEST. SAME COLUMNS. SSPMV T PUT F FOR NO TEST. SAME COLUMNS. STRMV T PUT F FOR NO TEST. SAME COLUMNS. STBMV T PUT F FOR NO TEST. SAME COLUMNS. STPMV T PUT F FOR NO TEST. SAME COLUMNS. STRSV T PUT F FOR NO TEST. SAME COLUMNS. STBSV T PUT F FOR NO TEST. SAME COLUMNS. STPSV T PUT F FOR NO TEST. SAME COLUMNS. SGER T PUT F FOR NO TEST. SAME COLUMNS. SSYR T PUT F FOR NO TEST. SAME COLUMNS. SSPR T PUT F FOR NO TEST. SAME COLUMNS. SSYR2 T PUT F FOR NO TEST. SAME COLUMNS. SSPR2 T PUT F FOR NO TEST. SAME COLUMNS. blis-0.6.1/blastest/input/sblat3.in000066400000000000000000000015621360743507500171650ustar00rootroot00000000000000'out.sblat3' NAME OF SUMMARY OUTPUT FILE 6 UNIT NUMBER OF SUMMARY FILE 'SBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. F LOGICAL FLAG, T TO STOP ON FAILURES. T LOGICAL FLAG, T TO TEST ERROR EXITS. 16.0 THRESHOLD VALUE OF TEST RATIO 6 NUMBER OF VALUES OF N 0 1 2 3 5 9 VALUES OF N 3 NUMBER OF VALUES OF ALPHA 0.0 1.0 0.7 VALUES OF ALPHA 3 NUMBER OF VALUES OF BETA 0.0 1.0 1.3 VALUES OF BETA SGEMM T PUT F FOR NO TEST. SAME COLUMNS. SSYMM T PUT F FOR NO TEST. SAME COLUMNS. STRMM T PUT F FOR NO TEST. SAME COLUMNS. STRSM T PUT F FOR NO TEST. SAME COLUMNS. SSYRK T PUT F FOR NO TEST. SAME COLUMNS. SSYR2K T PUT F FOR NO TEST. SAME COLUMNS. blis-0.6.1/blastest/input/zblat2.in000066400000000000000000000030121360743507500171630ustar00rootroot00000000000000'out.zblat2' NAME OF SUMMARY OUTPUT FILE 6 UNIT NUMBER OF SUMMARY FILE 'CBLA2T.SNAP' NAME OF SNAPSHOT OUTPUT FILE -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. F LOGICAL FLAG, T TO STOP ON FAILURES. T LOGICAL FLAG, T TO TEST ERROR EXITS. 16.0 THRESHOLD VALUE OF TEST RATIO 6 NUMBER OF VALUES OF N 0 1 2 3 5 9 VALUES OF N 4 NUMBER OF VALUES OF K 0 1 2 4 VALUES OF K 4 NUMBER OF VALUES OF INCX AND INCY 1 2 -1 -2 VALUES OF INCX AND INCY 3 NUMBER OF VALUES OF ALPHA (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA 3 NUMBER OF VALUES OF BETA (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA ZGEMV T PUT F FOR NO TEST. SAME COLUMNS. ZGBMV T PUT F FOR NO TEST. SAME COLUMNS. ZHEMV T PUT F FOR NO TEST. SAME COLUMNS. ZHBMV T PUT F FOR NO TEST. SAME COLUMNS. ZHPMV T PUT F FOR NO TEST. SAME COLUMNS. ZTRMV T PUT F FOR NO TEST. SAME COLUMNS. ZTBMV T PUT F FOR NO TEST. SAME COLUMNS. ZTPMV T PUT F FOR NO TEST. SAME COLUMNS. ZTRSV T PUT F FOR NO TEST. SAME COLUMNS. ZTBSV T PUT F FOR NO TEST. SAME COLUMNS. ZTPSV T PUT F FOR NO TEST. SAME COLUMNS. ZGERC T PUT F FOR NO TEST. SAME COLUMNS. ZGERU T PUT F FOR NO TEST. SAME COLUMNS. ZHER T PUT F FOR NO TEST. SAME COLUMNS. ZHPR T PUT F FOR NO TEST. SAME COLUMNS. ZHER2 T PUT F FOR NO TEST. SAME COLUMNS. ZHPR2 T PUT F FOR NO TEST. SAME COLUMNS. blis-0.6.1/blastest/input/zblat3.in000066400000000000000000000020261360743507500171700ustar00rootroot00000000000000'out.zblat3' NAME OF SUMMARY OUTPUT FILE 6 UNIT NUMBER OF SUMMARY FILE 'ZBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. F LOGICAL FLAG, T TO STOP ON FAILURES. T LOGICAL FLAG, T TO TEST ERROR EXITS. 16.0 THRESHOLD VALUE OF TEST RATIO 6 NUMBER OF VALUES OF N 0 1 2 3 5 9 VALUES OF N 3 NUMBER OF VALUES OF ALPHA (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA 3 NUMBER OF VALUES OF BETA (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA ZGEMM T PUT F FOR NO TEST. SAME COLUMNS. ZHEMM T PUT F FOR NO TEST. SAME COLUMNS. ZSYMM T PUT F FOR NO TEST. SAME COLUMNS. ZTRMM T PUT F FOR NO TEST. SAME COLUMNS. ZTRSM T PUT F FOR NO TEST. SAME COLUMNS. ZHERK T PUT F FOR NO TEST. SAME COLUMNS. ZSYRK T PUT F FOR NO TEST. SAME COLUMNS. ZHER2K T PUT F FOR NO TEST. SAME COLUMNS. ZSYR2K T PUT F FOR NO TEST. SAME COLUMNS. blis-0.6.1/blastest/obj/000077500000000000000000000000001360743507500150545ustar00rootroot00000000000000blis-0.6.1/blastest/obj/.gitkeep000066400000000000000000000000001360743507500164730ustar00rootroot00000000000000blis-0.6.1/blastest/src/000077500000000000000000000000001360743507500150715ustar00rootroot00000000000000blis-0.6.1/blastest/src/cblat1.c000066400000000000000000000706401360743507500164120ustar00rootroot00000000000000/* cblat1.f -- translated by f2c (version 20100827). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" /* Common Block Declarations */ struct { integer icase, n, incx, incy, mode; logical pass; } combla_; #define combla_1 combla_ /* Table of constant values */ static integer c__1 = 1; static integer c__9 = 9; static integer c__5 = 5; static real c_b43 = 1.f; static real c_b52 = 0.f; /* > \brief \b CBLAT1 */ /* =========== DOCUMENTATION =========== */ /* Online html documentation available at */ /* http://www.netlib.org/lapack/explore-html/ */ /* Definition: */ /* =========== */ /* PROGRAM CBLAT1 */ /* > \par Purpose: */ /* ============= */ /* > */ /* > \verbatim */ /* > */ /* > Test program for the COMPLEX Level 1 BLAS. */ /* > Based upon the original BLAS test routine together with: */ /* > */ /* > F06GAF Example Program Text */ /* > \endverbatim */ /* Authors: */ /* ======== */ /* > \author Univ. of Tennessee */ /* > \author Univ. of California Berkeley */ /* > \author Univ. of Colorado Denver */ /* > \author NAG Ltd. */ /* > \date April 2012 */ /* > \ingroup complex_blas_testing */ /* ===================================================================== */ /* Main program */ int main(void) { /* Initialized data */ static real sfac = 9.765625e-4f; /* Format strings */ static char fmt_99999[] = "(\002 Complex BLAS Test Program Results\002,/" "1x)"; static char fmt_99998[] = "(\002 ----" "- PASS -----\002)"; /* Builtin functions */ integer s_wsfe(cilist *), e_wsfe(void); /* Subroutine */ int s_stop(char *, ftnlen); /* Local variables */ integer ic; extern /* Subroutine */ int check1_(real *), check2_(real *), header_( void); /* Fortran I/O blocks */ static cilist io___2 = { 0, 6, 0, fmt_99999, 0 }; static cilist io___4 = { 0, 6, 0, fmt_99998, 0 }; /* -- Reference BLAS test routine (version 3.4.1) -- */ /* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* April 2012 */ /* ===================================================================== */ /* .. Parameters .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. External Subroutines .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* .. Executable Statements .. */ s_wsfe(&io___2); e_wsfe(); for (ic = 1; ic <= 10; ++ic) { combla_1.icase = ic; header_(); /* Initialize PASS, INCX, INCY, and MODE for a new case. */ /* The value 9999 for INCX, INCY or MODE will appear in the */ /* detailed output, if any, for cases that do not involve */ /* these parameters. */ combla_1.pass = TRUE_; combla_1.incx = 9999; combla_1.incy = 9999; combla_1.mode = 9999; if (combla_1.icase <= 5) { check2_(&sfac); } else if (combla_1.icase >= 6) { check1_(&sfac); } /* -- Print */ if (combla_1.pass) { s_wsfe(&io___4); e_wsfe(); } /* L20: */ } s_stop("", (ftnlen)0); return 0; } /* main */ /* Subroutine */ int header_(void) { /* Initialized data */ static char l[6*10] = "CDOTC " "CDOTU " "CAXPY " "CCOPY " "CSWAP " "SCNR" "M2" "SCASUM" "CSCAL " "CSSCAL" "ICAMAX"; /* Format strings */ static char fmt_99999[] = "(/\002 Test of subprogram number\002,i3,12x,a" "6)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Fortran I/O blocks */ static cilist io___6 = { 0, 6, 0, fmt_99999, 0 }; /* .. Parameters .. */ /* .. Scalars in Common .. */ /* .. Local Arrays .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* .. Executable Statements .. */ s_wsfe(&io___6); do_fio(&c__1, (char *)&combla_1.icase, (ftnlen)sizeof(integer)); do_fio(&c__1, l + (0 + (0 + (combla_1.icase - 1) * 6)), (ftnlen)6); e_wsfe(); return 0; } /* header_ */ /* Subroutine */ int check1_(real *sfac) { /* Initialized data */ static real strue2[5] = { 0.f,.5f,.6f,.7f,.8f }; static real strue4[5] = { 0.f,.7f,1.f,1.3f,1.6f }; static complex ctrue5[80] /* was [8][5][2] */ = { {.1f,.1f},{1.f,2.f},{ 1.f,2.f},{1.f,2.f},{1.f,2.f},{1.f,2.f},{1.f,2.f},{1.f,2.f},{-.16f, -.37f},{3.f,4.f},{3.f,4.f},{3.f,4.f},{3.f,4.f},{3.f,4.f},{3.f,4.f} ,{3.f,4.f},{-.17f,-.19f},{.13f,-.39f},{5.f,6.f},{5.f,6.f},{5.f, 6.f},{5.f,6.f},{5.f,6.f},{5.f,6.f},{.11f,-.03f},{-.17f,.46f},{ -.17f,-.19f},{7.f,8.f},{7.f,8.f},{7.f,8.f},{7.f,8.f},{7.f,8.f},{ .19f,-.17f},{.2f,-.35f},{.35f,.2f},{.14f,.08f},{2.f,3.f},{2.f,3.f} ,{2.f,3.f},{2.f,3.f},{.1f,.1f},{4.f,5.f},{4.f,5.f},{4.f,5.f},{4.f, 5.f},{4.f,5.f},{4.f,5.f},{4.f,5.f},{-.16f,-.37f},{6.f,7.f},{6.f, 7.f},{6.f,7.f},{6.f,7.f},{6.f,7.f},{6.f,7.f},{6.f,7.f},{-.17f, -.19f},{8.f,9.f},{.13f,-.39f},{2.f,5.f},{2.f,5.f},{2.f,5.f},{2.f, 5.f},{2.f,5.f},{.11f,-.03f},{3.f,6.f},{-.17f,.46f},{4.f,7.f},{ -.17f,-.19f},{7.f,2.f},{7.f,2.f},{7.f,2.f},{.19f,-.17f},{5.f,8.f}, {.2f,-.35f},{6.f,9.f},{.35f,.2f},{8.f,3.f},{.14f,.08f},{9.f,4.f} } ; static complex ctrue6[80] /* was [8][5][2] */ = { {.1f,.1f},{1.f,2.f},{ 1.f,2.f},{1.f,2.f},{1.f,2.f},{1.f,2.f},{1.f,2.f},{1.f,2.f},{.09f, -.12f},{3.f,4.f},{3.f,4.f},{3.f,4.f},{3.f,4.f},{3.f,4.f},{3.f,4.f} ,{3.f,4.f},{.03f,-.09f},{.15f,-.03f},{5.f,6.f},{5.f,6.f},{5.f,6.f} ,{5.f,6.f},{5.f,6.f},{5.f,6.f},{.03f,.03f},{-.18f,.03f},{.03f, -.09f},{7.f,8.f},{7.f,8.f},{7.f,8.f},{7.f,8.f},{7.f,8.f},{.09f, .03f},{.15f,0.f},{0.f,.15f},{0.f,.06f},{2.f,3.f},{2.f,3.f},{2.f, 3.f},{2.f,3.f},{.1f,.1f},{4.f,5.f},{4.f,5.f},{4.f,5.f},{4.f,5.f},{ 4.f,5.f},{4.f,5.f},{4.f,5.f},{.09f,-.12f},{6.f,7.f},{6.f,7.f},{ 6.f,7.f},{6.f,7.f},{6.f,7.f},{6.f,7.f},{6.f,7.f},{.03f,-.09f},{ 8.f,9.f},{.15f,-.03f},{2.f,5.f},{2.f,5.f},{2.f,5.f},{2.f,5.f},{ 2.f,5.f},{.03f,.03f},{3.f,6.f},{-.18f,.03f},{4.f,7.f},{.03f,-.09f} ,{7.f,2.f},{7.f,2.f},{7.f,2.f},{.09f,.03f},{5.f,8.f},{.15f,0.f},{ 6.f,9.f},{0.f,.15f},{8.f,3.f},{0.f,.06f},{9.f,4.f} }; static integer itrue3[5] = { 0,1,2,2,2 }; static real sa = .3f; static complex ca = {.4f,-.7f}; static complex cv[80] /* was [8][5][2] */ = { {.1f,.1f},{1.f,2.f},{ 1.f,2.f},{1.f,2.f},{1.f,2.f},{1.f,2.f},{1.f,2.f},{1.f,2.f},{.3f, -.4f},{3.f,4.f},{3.f,4.f},{3.f,4.f},{3.f,4.f},{3.f,4.f},{3.f,4.f}, {3.f,4.f},{.1f,-.3f},{.5f,-.1f},{5.f,6.f},{5.f,6.f},{5.f,6.f},{ 5.f,6.f},{5.f,6.f},{5.f,6.f},{.1f,.1f},{-.6f,.1f},{.1f,-.3f},{7.f, 8.f},{7.f,8.f},{7.f,8.f},{7.f,8.f},{7.f,8.f},{.3f,.1f},{.5f,0.f},{ 0.f,.5f},{0.f,.2f},{2.f,3.f},{2.f,3.f},{2.f,3.f},{2.f,3.f},{.1f, .1f},{4.f,5.f},{4.f,5.f},{4.f,5.f},{4.f,5.f},{4.f,5.f},{4.f,5.f},{ 4.f,5.f},{.3f,-.4f},{6.f,7.f},{6.f,7.f},{6.f,7.f},{6.f,7.f},{6.f, 7.f},{6.f,7.f},{6.f,7.f},{.1f,-.3f},{8.f,9.f},{.5f,-.1f},{2.f,5.f} ,{2.f,5.f},{2.f,5.f},{2.f,5.f},{2.f,5.f},{.1f,.1f},{3.f,6.f},{ -.6f,.1f},{4.f,7.f},{.1f,-.3f},{7.f,2.f},{7.f,2.f},{7.f,2.f},{.3f, .1f},{5.f,8.f},{.5f,0.f},{6.f,9.f},{0.f,.5f},{8.f,3.f},{0.f,.2f},{ 9.f,4.f} }; /* System generated locals */ integer i__1, i__2, i__3; real r__1; complex q__1; /* Builtin functions */ integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_wsle(void); /* Subroutine */ int s_stop(char *, ftnlen); /* Local variables */ integer i__; complex cx[8]; integer np1, len; extern /* Subroutine */ int cscal_(integer *, complex *, complex *, integer *), ctest_(integer *, complex *, complex *, complex *, real *); complex mwpcs[5], mwpct[5]; extern real scnrm2_(integer *, complex *, integer *); extern /* Subroutine */ int itest1_(integer *, integer *), stest1_(real *, real *, real *, real *); extern integer icamax_(integer *, complex *, integer *); extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer *); extern real scasum_(integer *, complex *, integer *); /* Fortran I/O blocks */ static cilist io___19 = { 0, 6, 0, 0, 0 }; /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* .. Executable Statements .. */ for (combla_1.incx = 1; combla_1.incx <= 2; ++combla_1.incx) { for (np1 = 1; np1 <= 5; ++np1) { combla_1.n = np1 - 1; len = max(combla_1.n,1) << 1; /* .. Set vector arguments .. */ i__1 = len; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__ - 1; i__3 = i__ + (np1 + combla_1.incx * 5 << 3) - 49; cx[i__2].r = cv[i__3].r, cx[i__2].i = cv[i__3].i; /* L20: */ } if (combla_1.icase == 6) { /* .. SCNRM2 .. */ r__1 = scnrm2_(&combla_1.n, cx, &combla_1.incx); stest1_(&r__1, &strue2[np1 - 1], &strue2[np1 - 1], sfac); } else if (combla_1.icase == 7) { /* .. SCASUM .. */ r__1 = scasum_(&combla_1.n, cx, &combla_1.incx); stest1_(&r__1, &strue4[np1 - 1], &strue4[np1 - 1], sfac); } else if (combla_1.icase == 8) { /* .. CSCAL .. */ cscal_(&combla_1.n, &ca, cx, &combla_1.incx); ctest_(&len, cx, &ctrue5[(np1 + combla_1.incx * 5 << 3) - 48], &ctrue5[(np1 + combla_1.incx * 5 << 3) - 48], sfac); } else if (combla_1.icase == 9) { /* .. CSSCAL .. */ csscal_(&combla_1.n, &sa, cx, &combla_1.incx); ctest_(&len, cx, &ctrue6[(np1 + combla_1.incx * 5 << 3) - 48], &ctrue6[(np1 + combla_1.incx * 5 << 3) - 48], sfac); } else if (combla_1.icase == 10) { /* .. ICAMAX .. */ i__1 = icamax_(&combla_1.n, cx, &combla_1.incx); itest1_(&i__1, &itrue3[np1 - 1]); } else { s_wsle(&io___19); do_lio(&c__9, &c__1, " Shouldn't be here in CHECK1", (ftnlen) 28); e_wsle(); s_stop("", (ftnlen)0); } /* L40: */ } /* L60: */ } combla_1.incx = 1; if (combla_1.icase == 8) { /* CSCAL */ /* Add a test for alpha equal to zero. */ ca.r = 0.f, ca.i = 0.f; for (i__ = 1; i__ <= 5; ++i__) { i__1 = i__ - 1; mwpct[i__1].r = 0.f, mwpct[i__1].i = 0.f; i__1 = i__ - 1; mwpcs[i__1].r = 1.f, mwpcs[i__1].i = 1.f; /* L80: */ } cscal_(&c__5, &ca, cx, &combla_1.incx); ctest_(&c__5, cx, mwpct, mwpcs, sfac); } else if (combla_1.icase == 9) { /* CSSCAL */ /* Add a test for alpha equal to zero. */ sa = 0.f; for (i__ = 1; i__ <= 5; ++i__) { i__1 = i__ - 1; mwpct[i__1].r = 0.f, mwpct[i__1].i = 0.f; i__1 = i__ - 1; mwpcs[i__1].r = 1.f, mwpcs[i__1].i = 1.f; /* L100: */ } csscal_(&c__5, &sa, cx, &combla_1.incx); ctest_(&c__5, cx, mwpct, mwpcs, sfac); /* Add a test for alpha equal to one. */ sa = 1.f; for (i__ = 1; i__ <= 5; ++i__) { i__1 = i__ - 1; i__2 = i__ - 1; mwpct[i__1].r = cx[i__2].r, mwpct[i__1].i = cx[i__2].i; i__1 = i__ - 1; i__2 = i__ - 1; mwpcs[i__1].r = cx[i__2].r, mwpcs[i__1].i = cx[i__2].i; /* L120: */ } csscal_(&c__5, &sa, cx, &combla_1.incx); ctest_(&c__5, cx, mwpct, mwpcs, sfac); /* Add a test for alpha equal to minus one. */ sa = -1.f; for (i__ = 1; i__ <= 5; ++i__) { i__1 = i__ - 1; i__2 = i__ - 1; q__1.r = -cx[i__2].r, q__1.i = -cx[i__2].i; mwpct[i__1].r = q__1.r, mwpct[i__1].i = q__1.i; i__1 = i__ - 1; i__2 = i__ - 1; q__1.r = -cx[i__2].r, q__1.i = -cx[i__2].i; mwpcs[i__1].r = q__1.r, mwpcs[i__1].i = q__1.i; /* L140: */ } csscal_(&c__5, &sa, cx, &combla_1.incx); ctest_(&c__5, cx, mwpct, mwpcs, sfac); } return 0; } /* check1_ */ /* Subroutine */ int check2_(real *sfac) { /* Initialized data */ static complex ca = {.4f,-.7f}; static integer incxs[4] = { 1,2,-2,-1 }; static integer incys[4] = { 1,-2,1,-2 }; static integer lens[8] /* was [4][2] */ = { 1,1,2,4,1,1,3,7 }; static integer ns[4] = { 0,1,2,4 }; static complex cx1[7] = { {.7f,-.8f},{-.4f,-.7f},{-.1f,-.9f},{.2f,-.8f},{ -.9f,-.4f},{.1f,.4f},{-.6f,.6f} }; static complex cy1[7] = { {.6f,-.6f},{-.9f,.5f},{.7f,-.6f},{.1f,-.5f},{ -.1f,-.2f},{-.5f,-.3f},{.8f,-.7f} }; static complex ct8[112] /* was [7][4][4] */ = { {.6f,-.6f},{0.f,0.f},{ 0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{.32f,-1.41f},{ 0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{.32f, -1.41f},{-1.55f,.5f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f, 0.f},{.32f,-1.41f},{-1.55f,.5f},{.03f,-.89f},{-.38f,-.96f},{0.f, 0.f},{0.f,0.f},{0.f,0.f},{.6f,-.6f},{0.f,0.f},{0.f,0.f},{0.f,0.f}, {0.f,0.f},{0.f,0.f},{0.f,0.f},{.32f,-1.41f},{0.f,0.f},{0.f,0.f},{ 0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{-.07f,-.89f},{-.9f,.5f},{ .42f,-1.41f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{.78f,.06f},{ -.9f,.5f},{.06f,-.13f},{.1f,-.5f},{-.77f,-.49f},{-.5f,-.3f},{.52f, -1.51f},{.6f,-.6f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f, 0.f},{0.f,0.f},{.32f,-1.41f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f, 0.f},{0.f,0.f},{0.f,0.f},{-.07f,-.89f},{-1.18f,-.31f},{0.f,0.f},{ 0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{.78f,.06f},{-1.54f,.97f},{ .03f,-.89f},{-.18f,-1.31f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{.6f, -.6f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f}, {.32f,-1.41f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{ 0.f,0.f},{.32f,-1.41f},{-.9f,.5f},{.05f,-.6f},{0.f,0.f},{0.f,0.f}, {0.f,0.f},{0.f,0.f},{.32f,-1.41f},{-.9f,.5f},{.05f,-.6f},{.1f, -.5f},{-.77f,-.49f},{-.5f,-.3f},{.32f,-1.16f} }; static complex ct7[16] /* was [4][4] */ = { {0.f,0.f},{-.06f,-.9f},{ .65f,-.47f},{-.34f,-1.22f},{0.f,0.f},{-.06f,-.9f},{-.59f,-1.46f},{ -1.04f,-.04f},{0.f,0.f},{-.06f,-.9f},{-.83f,.59f},{.07f,-.37f},{ 0.f,0.f},{-.06f,-.9f},{-.76f,-1.15f},{-1.33f,-1.82f} }; static complex ct6[16] /* was [4][4] */ = { {0.f,0.f},{.9f,.06f},{ .91f,-.77f},{1.8f,-.1f},{0.f,0.f},{.9f,.06f},{1.45f,.74f},{.2f, .9f},{0.f,0.f},{.9f,.06f},{-.55f,.23f},{.83f,-.39f},{0.f,0.f},{ .9f,.06f},{1.04f,.79f},{1.95f,1.22f} }; static complex ct10x[112] /* was [7][4][4] */ = { {.7f,-.8f},{0.f,0.f},{ 0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{.6f,-.6f},{0.f, 0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{.6f,-.6f}, {-.9f,.5f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{.6f, -.6f},{-.9f,.5f},{.7f,-.6f},{.1f,-.5f},{0.f,0.f},{0.f,0.f},{0.f, 0.f},{.7f,-.8f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f}, {0.f,0.f},{.6f,-.6f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f, 0.f},{0.f,0.f},{.7f,-.6f},{-.4f,-.7f},{.6f,-.6f},{0.f,0.f},{0.f, 0.f},{0.f,0.f},{0.f,0.f},{.8f,-.7f},{-.4f,-.7f},{-.1f,-.2f},{.2f, -.8f},{.7f,-.6f},{.1f,.4f},{.6f,-.6f},{.7f,-.8f},{0.f,0.f},{0.f, 0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{.6f,-.6f},{0.f,0.f}, {0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{-.9f,.5f},{ -.4f,-.7f},{.6f,-.6f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{ .1f,-.5f},{-.4f,-.7f},{.7f,-.6f},{.2f,-.8f},{-.9f,.5f},{.1f,.4f},{ .6f,-.6f},{.7f,-.8f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f, 0.f},{0.f,0.f},{.6f,-.6f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f}, {0.f,0.f},{0.f,0.f},{.6f,-.6f},{.7f,-.6f},{0.f,0.f},{0.f,0.f},{ 0.f,0.f},{0.f,0.f},{0.f,0.f},{.6f,-.6f},{.7f,-.6f},{-.1f,-.2f},{ .8f,-.7f},{0.f,0.f},{0.f,0.f},{0.f,0.f} }; static complex ct10y[112] /* was [7][4][4] */ = { {.6f,-.6f},{0.f,0.f},{ 0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{.7f,-.8f},{0.f, 0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{.7f,-.8f}, {-.4f,-.7f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{ .7f,-.8f},{-.4f,-.7f},{-.1f,-.9f},{.2f,-.8f},{0.f,0.f},{0.f,0.f},{ 0.f,0.f},{.6f,-.6f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f, 0.f},{0.f,0.f},{.7f,-.8f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f}, {0.f,0.f},{0.f,0.f},{-.1f,-.9f},{-.9f,.5f},{.7f,-.8f},{0.f,0.f},{ 0.f,0.f},{0.f,0.f},{0.f,0.f},{-.6f,.6f},{-.9f,.5f},{-.9f,-.4f},{ .1f,-.5f},{-.1f,-.9f},{-.5f,-.3f},{.7f,-.8f},{.6f,-.6f},{0.f,0.f}, {0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{.7f,-.8f},{0.f, 0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{-.1f,-.9f} ,{.7f,-.8f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{ -.6f,.6f},{-.9f,-.4f},{-.1f,-.9f},{.7f,-.8f},{0.f,0.f},{0.f,0.f},{ 0.f,0.f},{.6f,-.6f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f, 0.f},{0.f,0.f},{.7f,-.8f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f}, {0.f,0.f},{0.f,0.f},{.7f,-.8f},{-.9f,.5f},{-.4f,-.7f},{0.f,0.f},{ 0.f,0.f},{0.f,0.f},{0.f,0.f},{.7f,-.8f},{-.9f,.5f},{-.4f,-.7f},{ .1f,-.5f},{-.1f,-.9f},{-.5f,-.3f},{.2f,-.8f} }; static complex csize1[4] = { {0.f,0.f},{.9f,.9f},{1.63f,1.73f},{2.9f, 2.78f} }; static complex csize3[14] = { {0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{ 0.f,0.f},{0.f,0.f},{0.f,0.f},{1.17f,1.17f},{1.17f,1.17f},{1.17f, 1.17f},{1.17f,1.17f},{1.17f,1.17f},{1.17f,1.17f},{1.17f,1.17f} }; static complex csize2[14] /* was [7][2] */ = { {0.f,0.f},{0.f,0.f},{0.f, 0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{0.f,0.f},{1.54f,1.54f},{1.54f, 1.54f},{1.54f,1.54f},{1.54f,1.54f},{1.54f,1.54f},{1.54f,1.54f},{ 1.54f,1.54f} }; /* System generated locals */ integer i__1, i__2; complex q__1; /* Builtin functions */ integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_wsle(void); /* Subroutine */ int s_stop(char *, ftnlen); /* Local variables */ integer i__, ki, kn; complex cx[7], cy[7]; integer mx, my; complex cdot[1]; integer lenx, leny; extern /* Complex */ complex cdotc_(integer *, complex *, integer *, complex *, integer *); extern /* Subroutine */ int ccopy_(integer *, complex *, integer *, complex *, integer *); extern /* Complex */ complex cdotu_(integer *, complex *, integer *, complex *, integer *); extern /* Subroutine */ int cswap_(integer *, complex *, integer *, complex *, integer *), ctest_(integer *, complex *, complex *, complex *, real *); integer ksize; extern /* Subroutine */ int caxpy_(integer *, complex *, complex *, integer *, complex *, integer *); /* Fortran I/O blocks */ static cilist io___48 = { 0, 6, 0, 0, 0 }; /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* .. Executable Statements .. */ for (ki = 1; ki <= 4; ++ki) { combla_1.incx = incxs[ki - 1]; combla_1.incy = incys[ki - 1]; mx = abs(combla_1.incx); my = abs(combla_1.incy); for (kn = 1; kn <= 4; ++kn) { combla_1.n = ns[kn - 1]; ksize = min(2,kn); lenx = lens[kn + (mx << 2) - 5]; leny = lens[kn + (my << 2) - 5]; /* .. initialize all argument arrays .. */ for (i__ = 1; i__ <= 7; ++i__) { i__1 = i__ - 1; i__2 = i__ - 1; cx[i__1].r = cx1[i__2].r, cx[i__1].i = cx1[i__2].i; i__1 = i__ - 1; i__2 = i__ - 1; cy[i__1].r = cy1[i__2].r, cy[i__1].i = cy1[i__2].i; /* L20: */ } if (combla_1.icase == 1) { /* .. CDOTC .. */ q__1 = cdotc_(&combla_1.n, cx, &combla_1.incx, cy, & combla_1.incy); cdot[0].r = q__1.r, cdot[0].i = q__1.i; ctest_(&c__1, cdot, &ct6[kn + (ki << 2) - 5], &csize1[kn - 1], sfac); } else if (combla_1.icase == 2) { /* .. CDOTU .. */ q__1 = cdotu_(&combla_1.n, cx, &combla_1.incx, cy, & combla_1.incy); cdot[0].r = q__1.r, cdot[0].i = q__1.i; ctest_(&c__1, cdot, &ct7[kn + (ki << 2) - 5], &csize1[kn - 1], sfac); } else if (combla_1.icase == 3) { /* .. CAXPY .. */ caxpy_(&combla_1.n, &ca, cx, &combla_1.incx, cy, & combla_1.incy); ctest_(&leny, cy, &ct8[(kn + (ki << 2)) * 7 - 35], &csize2[ ksize * 7 - 7], sfac); } else if (combla_1.icase == 4) { /* .. CCOPY .. */ ccopy_(&combla_1.n, cx, &combla_1.incx, cy, &combla_1.incy); ctest_(&leny, cy, &ct10y[(kn + (ki << 2)) * 7 - 35], csize3, & c_b43); } else if (combla_1.icase == 5) { /* .. CSWAP .. */ cswap_(&combla_1.n, cx, &combla_1.incx, cy, &combla_1.incy); ctest_(&lenx, cx, &ct10x[(kn + (ki << 2)) * 7 - 35], csize3, & c_b43); ctest_(&leny, cy, &ct10y[(kn + (ki << 2)) * 7 - 35], csize3, & c_b43); } else { s_wsle(&io___48); do_lio(&c__9, &c__1, " Shouldn't be here in CHECK2", (ftnlen) 28); e_wsle(); s_stop("", (ftnlen)0); } /* L40: */ } /* L60: */ } return 0; } /* check2_ */ /* Subroutine */ int stest_(integer *len, real *scomp, real *strue, real * ssize, real *sfac) { /* Format strings */ static char fmt_99999[] = "(\002 F" "AIL\002)"; static char fmt_99998[] = "(/\002 CASE N INCX INCY MODE I " " \002,\002 COMP(I) TRU" "E(I) DIFFERENCE\002,\002 SIZE(I)\002,/1x)"; static char fmt_99997[] = "(1x,i4,i3,3i5,i3,2e36.8,2e12.4)"; /* System generated locals */ integer i__1; real r__1, r__2; /* Builtin functions */ integer s_wsfe(cilist *), e_wsfe(void), do_fio(integer *, char *, ftnlen); /* Local variables */ integer i__; real sd; extern real s_epsilon_(real *); /* Fortran I/O blocks */ static cilist io___51 = { 0, 6, 0, fmt_99999, 0 }; static cilist io___52 = { 0, 6, 0, fmt_99998, 0 }; static cilist io___53 = { 0, 6, 0, fmt_99997, 0 }; /* ********************************* STEST ************************** */ /* THIS SUBR COMPARES ARRAYS SCOMP() AND STRUE() OF LENGTH LEN TO */ /* SEE IF THE TERM BY TERM DIFFERENCES, MULTIPLIED BY SFAC, ARE */ /* NEGLIGIBLE. */ /* C. L. LAWSON, JPL, 1974 DEC 10 */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. Intrinsic Functions .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --ssize; --strue; --scomp; /* Function Body */ i__1 = *len; for (i__ = 1; i__ <= i__1; ++i__) { sd = scomp[i__] - strue[i__]; if ((r__2 = *sfac * sd, abs(r__2)) <= (r__1 = ssize[i__], abs(r__1)) * s_epsilon_(&c_b52)) { goto L40; } /* HERE SCOMP(I) IS NOT CLOSE TO STRUE(I). */ if (! combla_1.pass) { goto L20; } /* PRINT FAIL MESSAGE AND HEADER. */ combla_1.pass = FALSE_; s_wsfe(&io___51); e_wsfe(); s_wsfe(&io___52); e_wsfe(); L20: s_wsfe(&io___53); do_fio(&c__1, (char *)&combla_1.icase, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.incy, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.mode, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&scomp[i__], (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&strue[i__], (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&sd, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&ssize[i__], (ftnlen)sizeof(real)); e_wsfe(); L40: ; } return 0; } /* stest_ */ /* Subroutine */ int stest1_(real *scomp1, real *strue1, real *ssize, real * sfac) { real scomp[1], strue[1]; extern /* Subroutine */ int stest_(integer *, real *, real *, real *, real *); /* ************************* STEST1 ***************************** */ /* THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN */ /* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE */ /* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT. */ /* C.L. LAWSON, JPL, 1978 DEC 6 */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Arrays .. */ /* .. External Subroutines .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --ssize; /* Function Body */ scomp[0] = *scomp1; strue[0] = *strue1; stest_(&c__1, scomp, strue, &ssize[1], sfac); return 0; } /* stest1_ */ real sdiff_(real *sa, real *sb) { /* System generated locals */ real ret_val; /* ********************************* SDIFF ************************** */ /* COMPUTES DIFFERENCE OF TWO NUMBERS. C. L. LAWSON, JPL 1974 FEB 15 */ /* .. Scalar Arguments .. */ /* .. Executable Statements .. */ ret_val = *sa - *sb; return ret_val; } /* sdiff_ */ /* Subroutine */ int ctest_(integer *len, complex *ccomp, complex *ctrue, complex *csize, real *sfac) { /* System generated locals */ integer i__1, i__2; /* Builtin functions */ double r_imag(complex *); /* Local variables */ integer i__; real scomp[20], ssize[20], strue[20]; extern /* Subroutine */ int stest_(integer *, real *, real *, real *, real *); /* **************************** CTEST ***************************** */ /* C.L. LAWSON, JPL, 1978 DEC 6 */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --csize; --ctrue; --ccomp; /* Function Body */ i__1 = *len; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__; scomp[(i__ << 1) - 2] = ccomp[i__2].r; scomp[(i__ << 1) - 1] = r_imag(&ccomp[i__]); i__2 = i__; strue[(i__ << 1) - 2] = ctrue[i__2].r; strue[(i__ << 1) - 1] = r_imag(&ctrue[i__]); i__2 = i__; ssize[(i__ << 1) - 2] = csize[i__2].r; ssize[(i__ << 1) - 1] = r_imag(&csize[i__]); /* L20: */ } i__1 = *len << 1; stest_(&i__1, scomp, strue, ssize, sfac); return 0; } /* ctest_ */ /* Subroutine */ int itest1_(integer *icomp, integer *itrue) { /* Format strings */ static char fmt_99999[] = "(\002 F" "AIL\002)"; static char fmt_99998[] = "(/\002 CASE N INCX INCY MODE " " \002,\002 COMP TRU" "E DIFFERENCE\002,/1x)"; static char fmt_99997[] = "(1x,i4,i3,3i5,2i36,i12)"; /* Builtin functions */ integer s_wsfe(cilist *), e_wsfe(void), do_fio(integer *, char *, ftnlen); /* Local variables */ integer id; /* Fortran I/O blocks */ static cilist io___60 = { 0, 6, 0, fmt_99999, 0 }; static cilist io___61 = { 0, 6, 0, fmt_99998, 0 }; static cilist io___63 = { 0, 6, 0, fmt_99997, 0 }; /* ********************************* ITEST1 ************************* */ /* THIS SUBROUTINE COMPARES THE VARIABLES ICOMP AND ITRUE FOR */ /* EQUALITY. */ /* C. L. LAWSON, JPL, 1974 DEC 10 */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ if (*icomp == *itrue) { goto L40; } /* HERE ICOMP IS NOT EQUAL TO ITRUE. */ if (! combla_1.pass) { goto L20; } /* PRINT FAIL MESSAGE AND HEADER. */ combla_1.pass = FALSE_; s_wsfe(&io___60); e_wsfe(); s_wsfe(&io___61); e_wsfe(); L20: id = *icomp - *itrue; s_wsfe(&io___63); do_fio(&c__1, (char *)&combla_1.icase, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.incy, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.mode, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&(*icomp), (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&(*itrue), (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&id, (ftnlen)sizeof(integer)); e_wsfe(); L40: return 0; } /* itest1_ */ /* Main program alias */ int cblat1_ () { main (); return 0; } blis-0.6.1/blastest/src/cblat2.c000066400000000000000000004766721360743507500164320ustar00rootroot00000000000000/* cblat2.f -- translated by f2c (version 20100827). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" /* Common Block Declarations */ union { struct { integer infot, noutc; logical ok, lerr; } _1; struct { integer infot, nout; logical ok, lerr; } _2; } infoc_; #define infoc_1 (infoc_._1) #define infoc_2 (infoc_._2) struct { char srnamt[6]; } srnamc_; #define srnamc_1 srnamc_ /* Table of constant values */ static complex c_b1 = {0.f,0.f}; static complex c_b2 = {1.f,0.f}; static integer c__9 = 9; static integer c__1 = 1; static integer c__3 = 3; static integer c__8 = 8; static integer c__4 = 4; static integer c__65 = 65; static integer c__7 = 7; static integer c__2 = 2; static integer c__6 = 6; static real c_b122 = 0.f; static logical c_true = TRUE_; static integer c_n1 = -1; static integer c__0 = 0; static logical c_false = FALSE_; /* > \brief \b CBLAT2 */ /* =========== DOCUMENTATION =========== */ /* Online html documentation available at */ /* http://www.netlib.org/lapack/explore-html/ */ /* Definition: */ /* =========== */ /* PROGRAM CBLAT2 */ /* > \par Purpose: */ /* ============= */ /* > */ /* > \verbatim */ /* > */ /* > Test program for the COMPLEX Level 2 Blas. */ /* > */ /* > The program must be driven by a short data file. The first 18 records */ /* > of the file are read using list-directed input, the last 17 records */ /* > are read using the format ( A6, L2 ). An annotated example of a data */ /* > file can be obtained by deleting the first 3 characters from the */ /* > following 35 lines: */ /* > 'cblat2.out' NAME OF SUMMARY OUTPUT FILE */ /* > 6 UNIT NUMBER OF SUMMARY FILE */ /* > 'CBLA2T.SNAP' NAME OF SNAPSHOT OUTPUT FILE */ /* > -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) */ /* > F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. */ /* > F LOGICAL FLAG, T TO STOP ON FAILURES. */ /* > T LOGICAL FLAG, T TO TEST ERROR EXITS. */ /* > 16.0 THRESHOLD VALUE OF TEST RATIO */ /* > 6 NUMBER OF VALUES OF N */ /* > 0 1 2 3 5 9 VALUES OF N */ /* > 4 NUMBER OF VALUES OF K */ /* > 0 1 2 4 VALUES OF K */ /* > 4 NUMBER OF VALUES OF INCX AND INCY */ /* > 1 2 -1 -2 VALUES OF INCX AND INCY */ /* > 3 NUMBER OF VALUES OF ALPHA */ /* > (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA */ /* > 3 NUMBER OF VALUES OF BETA */ /* > (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA */ /* > CGEMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > CGBMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > CHEMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > CHBMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > CHPMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > CTRMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > CTBMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > CTPMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > CTRSV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > CTBSV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > CTPSV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > CGERC T PUT F FOR NO TEST. SAME COLUMNS. */ /* > CGERU T PUT F FOR NO TEST. SAME COLUMNS. */ /* > CHER T PUT F FOR NO TEST. SAME COLUMNS. */ /* > CHPR T PUT F FOR NO TEST. SAME COLUMNS. */ /* > CHER2 T PUT F FOR NO TEST. SAME COLUMNS. */ /* > CHPR2 T PUT F FOR NO TEST. SAME COLUMNS. */ /* > */ /* > Further Details */ /* > =============== */ /* > */ /* > See: */ /* > */ /* > Dongarra J. J., Du Croz J. J., Hammarling S. and Hanson R. J.. */ /* > An extended set of Fortran Basic Linear Algebra Subprograms. */ /* > */ /* > Technical Memoranda Nos. 41 (revision 3) and 81, Mathematics */ /* > and Computer Science Division, Argonne National Laboratory, */ /* > 9700 South Cass Avenue, Argonne, Illinois 60439, US. */ /* > */ /* > Or */ /* > */ /* > NAG Technical Reports TR3/87 and TR4/87, Numerical Algorithms */ /* > Group Ltd., NAG Central Office, 256 Banbury Road, Oxford */ /* > OX2 7DE, UK, and Numerical Algorithms Group Inc., 1101 31st */ /* > Street, Suite 100, Downers Grove, Illinois 60515-1263, USA. */ /* > */ /* > */ /* > -- Written on 10-August-1987. */ /* > Richard Hanson, Sandia National Labs. */ /* > Jeremy Du Croz, NAG Central Office. */ /* > */ /* > 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers */ /* > can be run multiple times without deleting generated */ /* > output files (susan) */ /* > \endverbatim */ /* Authors: */ /* ======== */ /* > \author Univ. of Tennessee */ /* > \author Univ. of California Berkeley */ /* > \author Univ. of Colorado Denver */ /* > \author NAG Ltd. */ /* > \date April 2012 */ /* > \ingroup complex_blas_testing */ /* ===================================================================== */ /* Main program */ int main(void) { /* Initialized data */ static char snames[6*17] = "CGEMV " "CGBMV " "CHEMV " "CHBMV " "CHPMV " "CTRMV " "CTBMV " "CTPMV " "CTRSV " "CTBSV " "CTPSV " "CGERC " "CGERU " "CHER " "CHPR " "CHER2 " "CHPR2 "; /* Format strings */ static char fmt_9997[] = "(\002 NUMBER OF VALUES OF \002,a,\002 IS LESS " "THAN 1 OR GREATER \002,\002THAN \002,i2)"; static char fmt_9996[] = "(\002 VALUE OF N IS LESS THAN 0 OR GREATER THA" "N \002,i2)"; static char fmt_9995[] = "(\002 VALUE OF K IS LESS THAN 0\002)"; static char fmt_9994[] = "(\002 ABSOLUTE VALUE OF INCX OR INCY IS 0 OR G" "REATER THAN \002,i2)"; static char fmt_9993[] = "(\002 TESTS OF THE COMPLEX LEVEL 2 BL" "AS\002,//\002 THE F\002,\002OLLOWING PARAMETER VALUES WILL BE US" "ED:\002)"; static char fmt_9992[] = "(\002 FOR N \002,9i6)"; static char fmt_9991[] = "(\002 FOR K \002,7i6)"; static char fmt_9990[] = "(\002 FOR INCX AND INCY \002,7i6)"; static char fmt_9989[] = "(\002 FOR ALPHA \002,7(\002(\002,f4" ".1,\002,\002,f4.1,\002) \002,:))"; static char fmt_9988[] = "(\002 FOR BETA \002,7(\002(\002,f4" ".1,\002,\002,f4.1,\002) \002,:))"; static char fmt_9980[] = "(\002 ERROR-EXITS WILL NOT BE TESTED\002)"; static char fmt_9999[] = "(\002 ROUTINES PASS COMPUTATIONAL TESTS IF TES" "T RATIO IS LES\002,\002S THAN\002,f8.2)"; static char fmt_9984[] = "(a6,l2)"; static char fmt_9986[] = "(\002 SUBPROGRAM NAME \002,a6,\002 NOT RECOGNI" "ZED\002,/\002 ******* T\002,\002ESTS ABANDONED *******\002)"; static char fmt_9998[] = "(\002 RELATIVE MACHINE PRECISION IS TAKEN TO" " BE\002,1p,e9.1)"; static char fmt_9985[] = "(\002 ERROR IN CMVCH - IN-LINE DOT PRODUCTS A" "RE BEING EVALU\002,\002ATED WRONGLY.\002,/\002 CMVCH WAS CALLED " "WITH TRANS = \002,a1,\002 AND RETURNED SAME = \002,l1,\002 AND E" "RR = \002,f12.3,\002.\002,/\002 THIS MAY BE DUE TO FAULTS IN THE" " ARITHMETIC OR THE COMPILER.\002,/\002 ******* TESTS ABANDONED *" "******\002)"; static char fmt_9983[] = "(1x,a6,\002 WAS NOT TESTED\002)"; static char fmt_9982[] = "(/\002 END OF TESTS\002)"; static char fmt_9981[] = "(/\002 ******* FATAL ERROR - TESTS ABANDONED *" "******\002)"; static char fmt_9987[] = "(\002 AMEND DATA FILE OR INCREASE ARRAY SIZES " "IN PROGRAM\002,/\002 ******* TESTS ABANDONED *******\002)"; /* System generated locals */ integer i__1, i__2, i__3, i__4, i__5; olist o__1; cllist cl__1; /* Builtin functions */ integer s_rsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_rsle(void), f_open(olist *), s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), s_wsle(cilist *), e_wsle(void), s_rsfe(cilist *), e_rsfe(void), s_cmp(const char *, const char *, ftnlen, ftnlen); /* Subroutine */ int s_stop(char *, ftnlen); integer f_clos(cllist *); /* Subroutine */ int s_copy(char *, const char *, ftnlen, ftnlen); /* Local variables */ complex a[4225] /* was [65][65] */; real g[65]; integer i__, j, n; complex x[65], y[65], z__[130], aa[4225]; integer kb[7]; complex as[4225], xs[130], ys[130], yt[65], xx[130], yy[130], alf[7]; extern logical lce_(complex *, complex *, integer *); integer inc[7], nkb; complex bet[7]; real eps, err; integer nalf, idim[9]; logical same; integer ninc, nbet, ntra; logical rewi; integer nout; extern /* Subroutine */ int cchk1_(char *, real *, real *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, integer *, integer *, complex *, integer *, complex *, integer *, integer *, integer *, integer *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, real *, ftnlen), cchk2_(char *, real *, real *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, integer *, integer *, complex *, integer *, complex *, integer *, integer *, integer *, integer *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, real *, ftnlen), cchk3_(char *, real *, real *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, real *, complex *, ftnlen), cchk4_(char *, real *, real *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, complex *, integer *, integer *, integer *, integer *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, real *, complex *, ftnlen), cchk5_(char *, real *, real *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, complex *, integer *, integer *, integer *, integer *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, real *, complex *, ftnlen), cchk6_(char *, real *, real *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, complex *, integer *, integer *, integer *, integer *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, real *, complex *, ftnlen), cchke_(integer * , char *, integer *, ftnlen); logical fatal, trace; integer nidim; extern /* Subroutine */ int cmvch_(char *, integer *, integer *, complex * , complex *, integer *, complex *, integer *, complex *, complex * , integer *, complex *, real *, complex *, real *, real *, logical *, integer *, logical *, ftnlen); char snaps[32], trans[1]; integer isnum; logical ltest[17], sfatal; char snamet[6]; real thresh; logical ltestt, tsterr; char summry[32]; extern real s_epsilon_(real *); /* Fortran I/O blocks */ static cilist io___2 = { 0, 5, 0, 0, 0 }; static cilist io___4 = { 0, 5, 0, 0, 0 }; static cilist io___6 = { 0, 5, 0, 0, 0 }; static cilist io___8 = { 0, 5, 0, 0, 0 }; static cilist io___11 = { 0, 5, 0, 0, 0 }; static cilist io___13 = { 0, 5, 0, 0, 0 }; static cilist io___15 = { 0, 5, 0, 0, 0 }; static cilist io___17 = { 0, 5, 0, 0, 0 }; static cilist io___19 = { 0, 5, 0, 0, 0 }; static cilist io___21 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___22 = { 0, 5, 0, 0, 0 }; static cilist io___25 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___26 = { 0, 5, 0, 0, 0 }; static cilist io___28 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___29 = { 0, 5, 0, 0, 0 }; static cilist io___31 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___32 = { 0, 5, 0, 0, 0 }; static cilist io___34 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___35 = { 0, 5, 0, 0, 0 }; static cilist io___37 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___38 = { 0, 5, 0, 0, 0 }; static cilist io___40 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___41 = { 0, 5, 0, 0, 0 }; static cilist io___43 = { 0, 5, 0, 0, 0 }; static cilist io___45 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___46 = { 0, 5, 0, 0, 0 }; static cilist io___48 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___49 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___50 = { 0, 0, 0, fmt_9991, 0 }; static cilist io___51 = { 0, 0, 0, fmt_9990, 0 }; static cilist io___52 = { 0, 0, 0, fmt_9989, 0 }; static cilist io___53 = { 0, 0, 0, fmt_9988, 0 }; static cilist io___54 = { 0, 0, 0, 0, 0 }; static cilist io___55 = { 0, 0, 0, fmt_9980, 0 }; static cilist io___56 = { 0, 0, 0, 0, 0 }; static cilist io___57 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___58 = { 0, 0, 0, 0, 0 }; static cilist io___60 = { 0, 5, 1, fmt_9984, 0 }; static cilist io___63 = { 0, 0, 0, fmt_9986, 0 }; static cilist io___65 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___78 = { 0, 0, 0, fmt_9985, 0 }; static cilist io___79 = { 0, 0, 0, fmt_9985, 0 }; static cilist io___81 = { 0, 0, 0, 0, 0 }; static cilist io___82 = { 0, 0, 0, fmt_9983, 0 }; static cilist io___83 = { 0, 0, 0, 0, 0 }; static cilist io___90 = { 0, 0, 0, fmt_9982, 0 }; static cilist io___91 = { 0, 0, 0, fmt_9981, 0 }; static cilist io___92 = { 0, 0, 0, fmt_9987, 0 }; /* -- Reference BLAS test routine (version 3.4.1) -- */ /* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* April 2012 */ /* ===================================================================== */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* .. Executable Statements .. */ /* Read name and unit number for summary output file and open file. */ s_rsle(&io___2); do_lio(&c__9, &c__1, summry, (ftnlen)32); e_rsle(); s_rsle(&io___4); do_lio(&c__3, &c__1, (char *)&nout, (ftnlen)sizeof(integer)); e_rsle(); o__1.oerr = 0; o__1.ounit = nout; o__1.ofnmlen = 32; o__1.ofnm = summry; o__1.orl = 0; o__1.osta = "UNKNOWN"; o__1.oacc = 0; o__1.ofm = 0; o__1.oblnk = 0; f_open(&o__1); infoc_1.noutc = nout; /* Read name and unit number for snapshot output file and open file. */ s_rsle(&io___6); do_lio(&c__9, &c__1, snaps, (ftnlen)32); e_rsle(); s_rsle(&io___8); do_lio(&c__3, &c__1, (char *)&ntra, (ftnlen)sizeof(integer)); e_rsle(); trace = ntra >= 0; if (trace) { o__1.oerr = 0; o__1.ounit = ntra; o__1.ofnmlen = 32; o__1.ofnm = snaps; o__1.orl = 0; o__1.osta = "UNKNOWN"; o__1.oacc = 0; o__1.ofm = 0; o__1.oblnk = 0; f_open(&o__1); } /* Read the flag that directs rewinding of the snapshot file. */ s_rsle(&io___11); do_lio(&c__8, &c__1, (char *)&rewi, (ftnlen)sizeof(logical)); e_rsle(); rewi = rewi && trace; /* Read the flag that directs stopping on any failure. */ s_rsle(&io___13); do_lio(&c__8, &c__1, (char *)&sfatal, (ftnlen)sizeof(logical)); e_rsle(); /* Read the flag that indicates whether error exits are to be tested. */ s_rsle(&io___15); do_lio(&c__8, &c__1, (char *)&tsterr, (ftnlen)sizeof(logical)); e_rsle(); /* Read the threshold value of the test ratio */ s_rsle(&io___17); do_lio(&c__4, &c__1, (char *)&thresh, (ftnlen)sizeof(real)); e_rsle(); /* Read and check the parameter values for the tests. */ /* Values of N */ s_rsle(&io___19); do_lio(&c__3, &c__1, (char *)&nidim, (ftnlen)sizeof(integer)); e_rsle(); if (nidim < 1 || nidim > 9) { io___21.ciunit = nout; s_wsfe(&io___21); do_fio(&c__1, "N", (ftnlen)1); do_fio(&c__1, (char *)&c__9, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } s_rsle(&io___22); i__1 = nidim; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&idim[i__ - 1], (ftnlen)sizeof(integer)); } e_rsle(); i__1 = nidim; for (i__ = 1; i__ <= i__1; ++i__) { if (idim[i__ - 1] < 0 || idim[i__ - 1] > 65) { io___25.ciunit = nout; s_wsfe(&io___25); do_fio(&c__1, (char *)&c__65, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } /* L10: */ } /* Values of K */ s_rsle(&io___26); do_lio(&c__3, &c__1, (char *)&nkb, (ftnlen)sizeof(integer)); e_rsle(); if (nkb < 1 || nkb > 7) { io___28.ciunit = nout; s_wsfe(&io___28); do_fio(&c__1, "K", (ftnlen)1); do_fio(&c__1, (char *)&c__7, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } s_rsle(&io___29); i__1 = nkb; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&kb[i__ - 1], (ftnlen)sizeof(integer)); } e_rsle(); i__1 = nkb; for (i__ = 1; i__ <= i__1; ++i__) { if (kb[i__ - 1] < 0) { io___31.ciunit = nout; s_wsfe(&io___31); e_wsfe(); goto L230; } /* L20: */ } /* Values of INCX and INCY */ s_rsle(&io___32); do_lio(&c__3, &c__1, (char *)&ninc, (ftnlen)sizeof(integer)); e_rsle(); if (ninc < 1 || ninc > 7) { io___34.ciunit = nout; s_wsfe(&io___34); do_fio(&c__1, "INCX AND INCY", (ftnlen)13); do_fio(&c__1, (char *)&c__7, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } s_rsle(&io___35); i__1 = ninc; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&inc[i__ - 1], (ftnlen)sizeof(integer)); } e_rsle(); i__1 = ninc; for (i__ = 1; i__ <= i__1; ++i__) { if (inc[i__ - 1] == 0 || (i__2 = inc[i__ - 1], abs(i__2)) > 2) { io___37.ciunit = nout; s_wsfe(&io___37); do_fio(&c__1, (char *)&c__2, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } /* L30: */ } /* Values of ALPHA */ s_rsle(&io___38); do_lio(&c__3, &c__1, (char *)&nalf, (ftnlen)sizeof(integer)); e_rsle(); if (nalf < 1 || nalf > 7) { io___40.ciunit = nout; s_wsfe(&io___40); do_fio(&c__1, "ALPHA", (ftnlen)5); do_fio(&c__1, (char *)&c__7, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } s_rsle(&io___41); i__1 = nalf; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__6, &c__1, (char *)&alf[i__ - 1], (ftnlen)sizeof(complex)); } e_rsle(); /* Values of BETA */ s_rsle(&io___43); do_lio(&c__3, &c__1, (char *)&nbet, (ftnlen)sizeof(integer)); e_rsle(); if (nbet < 1 || nbet > 7) { io___45.ciunit = nout; s_wsfe(&io___45); do_fio(&c__1, "BETA", (ftnlen)4); do_fio(&c__1, (char *)&c__7, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } s_rsle(&io___46); i__1 = nbet; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__6, &c__1, (char *)&bet[i__ - 1], (ftnlen)sizeof(complex)); } e_rsle(); /* Report values of parameters. */ io___48.ciunit = nout; s_wsfe(&io___48); e_wsfe(); io___49.ciunit = nout; s_wsfe(&io___49); i__1 = nidim; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&idim[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); io___50.ciunit = nout; s_wsfe(&io___50); i__1 = nkb; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&kb[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); io___51.ciunit = nout; s_wsfe(&io___51); i__1 = ninc; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&inc[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); io___52.ciunit = nout; s_wsfe(&io___52); i__1 = nalf; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__2, (char *)&alf[i__ - 1], (ftnlen)sizeof(real)); } e_wsfe(); io___53.ciunit = nout; s_wsfe(&io___53); i__1 = nbet; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__2, (char *)&bet[i__ - 1], (ftnlen)sizeof(real)); } e_wsfe(); if (! tsterr) { io___54.ciunit = nout; s_wsle(&io___54); e_wsle(); io___55.ciunit = nout; s_wsfe(&io___55); e_wsfe(); } io___56.ciunit = nout; s_wsle(&io___56); e_wsle(); io___57.ciunit = nout; s_wsfe(&io___57); do_fio(&c__1, (char *)&thresh, (ftnlen)sizeof(real)); e_wsfe(); io___58.ciunit = nout; s_wsle(&io___58); e_wsle(); /* Read names of subroutines and flags which indicate */ /* whether they are to be tested. */ for (i__ = 1; i__ <= 17; ++i__) { ltest[i__ - 1] = FALSE_; /* L40: */ } L50: i__1 = s_rsfe(&io___60); if (i__1 != 0) { goto L80; } i__1 = do_fio(&c__1, snamet, (ftnlen)6); if (i__1 != 0) { goto L80; } i__1 = do_fio(&c__1, (char *)<estt, (ftnlen)sizeof(logical)); if (i__1 != 0) { goto L80; } i__1 = e_rsfe(); if (i__1 != 0) { goto L80; } for (i__ = 1; i__ <= 17; ++i__) { if (s_cmp(snamet, snames + (i__ - 1) * 6, (ftnlen)6, (ftnlen)6) == 0) { goto L70; } /* L60: */ } io___63.ciunit = nout; s_wsfe(&io___63); do_fio(&c__1, snamet, (ftnlen)6); e_wsfe(); s_stop("", (ftnlen)0); L70: ltest[i__ - 1] = ltestt; goto L50; L80: cl__1.cerr = 0; cl__1.cunit = 5; cl__1.csta = 0; f_clos(&cl__1); /* Compute EPS (the machine precision). */ eps = s_epsilon_(&c_b122); io___65.ciunit = nout; s_wsfe(&io___65); do_fio(&c__1, (char *)&eps, (ftnlen)sizeof(real)); e_wsfe(); /* Check the reliability of CMVCH using exact data. */ n = 32; i__1 = n; for (j = 1; j <= i__1; ++j) { i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__ + j * 65 - 66; /* Computing MAX */ i__5 = i__ - j + 1; i__4 = max(i__5,0); a[i__3].r = (real) i__4, a[i__3].i = 0.f; /* L110: */ } i__2 = j - 1; x[i__2].r = (real) j, x[i__2].i = 0.f; i__2 = j - 1; y[i__2].r = 0.f, y[i__2].i = 0.f; /* L120: */ } i__1 = n; for (j = 1; j <= i__1; ++j) { i__2 = j - 1; i__3 = j * ((j + 1) * j) / 2 - (j + 1) * j * (j - 1) / 3; yy[i__2].r = (real) i__3, yy[i__2].i = 0.f; /* L130: */ } /* YY holds the exact result. On exit from CMVCH YT holds */ /* the result computed by CMVCH. */ *(unsigned char *)trans = 'N'; cmvch_(trans, &n, &n, &c_b2, a, &c__65, x, &c__1, &c_b1, y, &c__1, yt, g, yy, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1); same = lce_(yy, yt, &n); if (! same || err != 0.f) { io___78.ciunit = nout; s_wsfe(&io___78); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&same, (ftnlen)sizeof(logical)); do_fio(&c__1, (char *)&err, (ftnlen)sizeof(real)); e_wsfe(); s_stop("", (ftnlen)0); } *(unsigned char *)trans = 'T'; cmvch_(trans, &n, &n, &c_b2, a, &c__65, x, &c_n1, &c_b1, y, &c_n1, yt, g, yy, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1); same = lce_(yy, yt, &n); if (! same || err != 0.f) { io___79.ciunit = nout; s_wsfe(&io___79); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&same, (ftnlen)sizeof(logical)); do_fio(&c__1, (char *)&err, (ftnlen)sizeof(real)); e_wsfe(); s_stop("", (ftnlen)0); } /* Test each subroutine in turn. */ for (isnum = 1; isnum <= 17; ++isnum) { io___81.ciunit = nout; s_wsle(&io___81); e_wsle(); if (! ltest[isnum - 1]) { /* Subprogram is not to be tested. */ io___82.ciunit = nout; s_wsfe(&io___82); do_fio(&c__1, snames + (isnum - 1) * 6, (ftnlen)6); e_wsfe(); } else { s_copy(srnamc_1.srnamt, snames + (isnum - 1) * 6, (ftnlen)6, ( ftnlen)6); /* Test error exits. */ if (tsterr) { cchke_(&isnum, snames + (isnum - 1) * 6, &nout, (ftnlen)6); io___83.ciunit = nout; s_wsle(&io___83); e_wsle(); } /* Test computations. */ infoc_1.infot = 0; infoc_1.ok = TRUE_; fatal = FALSE_; switch (isnum) { case 1: goto L140; case 2: goto L140; case 3: goto L150; case 4: goto L150; case 5: goto L150; case 6: goto L160; case 7: goto L160; case 8: goto L160; case 9: goto L160; case 10: goto L160; case 11: goto L160; case 12: goto L170; case 13: goto L170; case 14: goto L180; case 15: goto L180; case 16: goto L190; case 17: goto L190; } /* Test CGEMV, 01, and CGBMV, 02. */ L140: cchk1_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nkb, kb, &nalf, alf, &nbet, bet, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, (ftnlen)6); goto L200; /* Test CHEMV, 03, CHBMV, 04, and CHPMV, 05. */ L150: cchk2_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nkb, kb, &nalf, alf, &nbet, bet, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, (ftnlen)6); goto L200; /* Test CTRMV, 06, CTBMV, 07, CTPMV, 08, */ /* CTRSV, 09, CTBSV, 10, and CTPSV, 11. */ L160: cchk3_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nkb, kb, &ninc, inc, &c__65, &c__2, a, aa, as, y, yy, ys, yt, g, z__, (ftnlen) 6); goto L200; /* Test CGERC, 12, CGERU, 13. */ L170: cchk4_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, z__, (ftnlen)6); goto L200; /* Test CHER, 14, and CHPR, 15. */ L180: cchk5_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, z__, (ftnlen)6); goto L200; /* Test CHER2, 16, and CHPR2, 17. */ L190: cchk6_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, z__, (ftnlen)6); L200: if (fatal && sfatal) { goto L220; } } /* L210: */ } io___90.ciunit = nout; s_wsfe(&io___90); e_wsfe(); goto L240; L220: io___91.ciunit = nout; s_wsfe(&io___91); e_wsfe(); goto L240; L230: io___92.ciunit = nout; s_wsfe(&io___92); e_wsfe(); L240: if (trace) { cl__1.cerr = 0; cl__1.cunit = ntra; cl__1.csta = 0; f_clos(&cl__1); } cl__1.cerr = 0; cl__1.cunit = nout; cl__1.csta = 0; f_clos(&cl__1); s_stop("", (ftnlen)0); /* End of CBLAT2. */ return 0; } /* main */ /* Subroutine */ int cchk1_(char *sname, real *eps, real *thresh, integer * nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nkb, integer *kb, integer * nalf, complex *alf, integer *nbet, complex *bet, integer *ninc, integer *inc, integer *nmax, integer *incmax, complex *a, complex *aa, complex *as, complex *x, complex *xx, complex *xs, complex *y, complex *yy, complex *ys, complex *yt, real *g, ftnlen sname_len) { /* Initialized data */ static char ich[3] = "NTC"; /* Format strings */ static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "2(i3,\002,\002),\002(\002,f4.1,\002,\002,f4.1,\002), A,\002,i3" ",\002, X,\002,i2,\002,(\002,f4.1,\002,\002,f4.1,\002), Y,\002,i2," "\002) .\002)"; static char fmt_9995[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "4(i3,\002,\002),\002(\002,f4.1,\002,\002,f4.1,\002), A,\002,i3" ",\002, X,\002,i2,\002,(\002,f4.1,\002,\002,f4.1,\002), Y,\002,i2," "\002) .\002)"; static char fmt_9993[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8, i__9; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, m, n, ia, ib, ic, nc, nd, im, in, kl, ml, nk, nl, ku, ix, iy, ms, lx, ly, ns, laa, lda; extern logical lce_(complex *, complex *, integer *); complex als, bls; real err; integer iku, kls, kus; complex beta; integer ldas; logical same; integer incx, incy; logical full, tran, null; extern /* Subroutine */ int cmake_(char *, char *, char *, integer *, integer *, complex *, integer *, complex *, integer *, integer *, integer *, logical *, complex *, ftnlen, ftnlen, ftnlen); complex alpha; logical isame[13]; extern /* Subroutine */ int cgbmv_(char *, integer *, integer *, integer * , integer *, complex *, complex *, integer *, complex *, integer * , complex *, complex *, integer *, ftnlen), cgemv_(char *, integer *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, complex *, integer *, ftnlen), cmvch_(char * , integer *, integer *, complex *, complex *, integer *, complex * , integer *, complex *, complex *, integer *, complex *, real *, complex *, real *, real *, logical *, integer *, logical *, ftnlen); integer nargs; logical reset; integer incxs, incys; char trans[1]; logical banded; extern logical lceres_(char *, char *, integer *, integer *, complex *, complex *, integer *, ftnlen, ftnlen); real errmax; complex transl; char transs[1]; /* Fortran I/O blocks */ static cilist io___139 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___140 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___141 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___144 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___146 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___147 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___148 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___149 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___150 = { 0, 0, 0, fmt_9995, 0 }; /* Tests CGEMV and CGBMV. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --kb; --alf; --bet; --inc; --g; --yt; --y; --x; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --ys; --yy; --xs; --xx; /* Function Body */ /* .. Executable Statements .. */ full = *(unsigned char *)&sname[2] == 'E'; banded = *(unsigned char *)&sname[2] == 'B'; /* Define the number of arguments. */ if (full) { nargs = 11; } else if (banded) { nargs = 13; } nc = 0; reset = TRUE_; errmax = 0.f; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; nd = n / 2 + 1; for (im = 1; im <= 2; ++im) { if (im == 1) { /* Computing MAX */ i__2 = n - nd; m = max(i__2,0); } if (im == 2) { /* Computing MIN */ i__2 = n + nd; m = min(i__2,*nmax); } if (banded) { nk = *nkb; } else { nk = 1; } i__2 = nk; for (iku = 1; iku <= i__2; ++iku) { if (banded) { ku = kb[iku]; /* Computing MAX */ i__3 = ku - 1; kl = max(i__3,0); } else { ku = n - 1; kl = m - 1; } /* Set LDA to 1 more than minimum value if room. */ if (banded) { lda = kl + ku + 1; } else { lda = m; } if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L100; } laa = lda * n; null = n <= 0 || m <= 0; /* Generate the matrix A. */ transl.r = 0.f, transl.i = 0.f; cmake_(sname + 1, " ", " ", &m, &n, &a[a_offset], nmax, &aa[1] , &lda, &kl, &ku, &reset, &transl, (ftnlen)2, (ftnlen) 1, (ftnlen)1); for (ic = 1; ic <= 3; ++ic) { *(unsigned char *)trans = *(unsigned char *)&ich[ic - 1]; tran = *(unsigned char *)trans == 'T' || *(unsigned char * )trans == 'C'; if (tran) { ml = n; nl = m; } else { ml = m; nl = n; } i__3 = *ninc; for (ix = 1; ix <= i__3; ++ix) { incx = inc[ix]; lx = abs(incx) * nl; /* Generate the vector X. */ transl.r = .5f, transl.i = 0.f; i__4 = abs(incx); i__5 = nl - 1; cmake_("GE", " ", " ", &c__1, &nl, &x[1], &c__1, &xx[ 1], &i__4, &c__0, &i__5, &reset, &transl, ( ftnlen)2, (ftnlen)1, (ftnlen)1); if (nl > 1) { i__4 = nl / 2; x[i__4].r = 0.f, x[i__4].i = 0.f; i__4 = abs(incx) * (nl / 2 - 1) + 1; xx[i__4].r = 0.f, xx[i__4].i = 0.f; } i__4 = *ninc; for (iy = 1; iy <= i__4; ++iy) { incy = inc[iy]; ly = abs(incy) * ml; i__5 = *nalf; for (ia = 1; ia <= i__5; ++ia) { i__6 = ia; alpha.r = alf[i__6].r, alpha.i = alf[i__6].i; i__6 = *nbet; for (ib = 1; ib <= i__6; ++ib) { i__7 = ib; beta.r = bet[i__7].r, beta.i = bet[i__7] .i; /* Generate the vector Y. */ transl.r = 0.f, transl.i = 0.f; i__7 = abs(incy); i__8 = ml - 1; cmake_("GE", " ", " ", &c__1, &ml, &y[1], &c__1, &yy[1], &i__7, &c__0, & i__8, &reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the */ /* subroutine. */ *(unsigned char *)transs = *(unsigned char *)trans; ms = m; ns = n; kls = kl; kus = ku; als.r = alpha.r, als.i = alpha.i; i__7 = laa; for (i__ = 1; i__ <= i__7; ++i__) { i__8 = i__; i__9 = i__; as[i__8].r = aa[i__9].r, as[i__8].i = aa[i__9].i; /* L10: */ } ldas = lda; i__7 = lx; for (i__ = 1; i__ <= i__7; ++i__) { i__8 = i__; i__9 = i__; xs[i__8].r = xx[i__9].r, xs[i__8].i = xx[i__9].i; /* L20: */ } incxs = incx; bls.r = beta.r, bls.i = beta.i; i__7 = ly; for (i__ = 1; i__ <= i__7; ++i__) { i__8 = i__; i__9 = i__; ys[i__8].r = yy[i__9].r, ys[i__8].i = yy[i__9].i; /* L30: */ } incys = incy; /* Call the subroutine. */ if (full) { if (*trace) { io___139.ciunit = *ntra; s_wsfe(&io___139); do_fio(&c__1, (char *)&nc, ( ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__2, (char *)&alpha, ( ftnlen)sizeof(real)); do_fio(&c__1, (char *)&lda, ( ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, ( ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&beta, ( ftnlen)sizeof(real)); do_fio(&c__1, (char *)&incy, ( ftnlen)sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } cgemv_(trans, &m, &n, &alpha, &aa[1], &lda, &xx[1], &incx, &beta, & yy[1], &incy, (ftnlen)1); } else if (banded) { if (*trace) { io___140.ciunit = *ntra; s_wsfe(&io___140); do_fio(&c__1, (char *)&nc, ( ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&kl, ( ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ku, ( ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, ( ftnlen)sizeof(real)); do_fio(&c__1, (char *)&lda, ( ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, ( ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&beta, ( ftnlen)sizeof(real)); do_fio(&c__1, (char *)&incy, ( ftnlen)sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } cgbmv_(trans, &m, &n, &kl, &ku, & alpha, &aa[1], &lda, &xx[1], & incx, &beta, &yy[1], &incy, ( ftnlen)1); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___141.ciunit = *nout; s_wsfe(&io___141); e_wsfe(); *fatal = TRUE_; goto L130; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)trans == *( unsigned char *)transs; isame[1] = ms == m; isame[2] = ns == n; if (full) { isame[3] = als.r == alpha.r && als.i == alpha.i; isame[4] = lce_(&as[1], &aa[1], &laa); isame[5] = ldas == lda; isame[6] = lce_(&xs[1], &xx[1], &lx); isame[7] = incxs == incx; isame[8] = bls.r == beta.r && bls.i == beta.i; if (null) { isame[9] = lce_(&ys[1], &yy[1], & ly); } else { i__7 = abs(incy); isame[9] = lceres_("GE", " ", & c__1, &ml, &ys[1], &yy[1], &i__7, (ftnlen)2, ( ftnlen)1); } isame[10] = incys == incy; } else if (banded) { isame[3] = kls == kl; isame[4] = kus == ku; isame[5] = als.r == alpha.r && als.i == alpha.i; isame[6] = lce_(&as[1], &aa[1], &laa); isame[7] = ldas == lda; isame[8] = lce_(&xs[1], &xx[1], &lx); isame[9] = incxs == incx; isame[10] = bls.r == beta.r && bls.i == beta.i; if (null) { isame[11] = lce_(&ys[1], &yy[1], & ly); } else { i__7 = abs(incy); isame[11] = lceres_("GE", " ", & c__1, &ml, &ys[1], &yy[1], &i__7, (ftnlen)2, ( ftnlen)1); } isame[12] = incys == incy; } /* If data was incorrectly changed, report */ /* and return. */ same = TRUE_; i__7 = nargs; for (i__ = 1; i__ <= i__7; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___144.ciunit = *nout; s_wsfe(&io___144); do_fio(&c__1, (char *)&i__, ( ftnlen)sizeof(integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L130; } if (! null) { /* Check the result. */ cmvch_(trans, &m, &n, &alpha, &a[ a_offset], nmax, &x[1], &incx, &beta, &y[1], &incy, &yt[1], &g[1], &yy[1], eps, &err, fatal, nout, &c_true, (ftnlen) 1); errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L130; } } else { /* Avoid repeating tests with M.le.0 or */ /* N.le.0. */ goto L110; } /* L50: */ } /* L60: */ } /* L70: */ } /* L80: */ } /* L90: */ } L100: ; } L110: ; } /* L120: */ } /* Report result. */ if (errmax < *thresh) { io___146.ciunit = *nout; s_wsfe(&io___146); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___147.ciunit = *nout; s_wsfe(&io___147); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(real)); e_wsfe(); } goto L140; L130: io___148.ciunit = *nout; s_wsfe(&io___148); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); if (full) { io___149.ciunit = *nout; s_wsfe(&io___149); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); e_wsfe(); } else if (banded) { io___150.ciunit = *nout; s_wsfe(&io___150); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&kl, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ku, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); e_wsfe(); } L140: return 0; /* End of CCHK1. */ } /* cchk1_ */ /* Subroutine */ int cchk2_(char *sname, real *eps, real *thresh, integer * nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nkb, integer *kb, integer * nalf, complex *alf, integer *nbet, complex *bet, integer *ninc, integer *inc, integer *nmax, integer *incmax, complex *a, complex *aa, complex *as, complex *x, complex *xx, complex *xs, complex *y, complex *yy, complex *ys, complex *yt, real *g, ftnlen sname_len) { /* Initialized data */ static char ich[2] = "UL"; /* Format strings */ static char fmt_9993[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "i3,\002,(\002,f4.1,\002,\002,f4.1,\002), A,\002,i3,\002, X,\002," "i2,\002,(\002,f4.1,\002,\002,f4.1,\002), \002,\002Y,\002,i2,\002" ") .\002)"; static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "2(i3,\002,\002),\002(\002,f4.1,\002,\002,f4.1,\002), A,\002,i3" ",\002, X,\002,i2,\002,(\002,f4.1,\002,\002,f4.1,\002), Y,\002,i2," "\002) .\002)"; static char fmt_9995[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "i3,\002,(\002,f4.1,\002,\002,f4.1,\002), AP, X,\002,i2,\002,(" "\002,f4.1,\002,\002,f4.1,\002), Y,\002,i2,\002) " ".\002)"; static char fmt_9992[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8, i__9; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, k, n, ia, ib, ic, nc, ik, in, nk, ks, ix, iy, ns, lx, ly, laa, lda; extern logical lce_(complex *, complex *, integer *); complex als, bls; real err; complex beta; integer ldas; logical same; integer incx, incy; logical full, null; char uplo[1]; extern /* Subroutine */ int cmake_(char *, char *, char *, integer *, integer *, complex *, integer *, complex *, integer *, integer *, integer *, logical *, complex *, ftnlen, ftnlen, ftnlen); complex alpha; logical isame[13]; extern /* Subroutine */ int chbmv_(char *, integer *, integer *, complex * , complex *, integer *, complex *, integer *, complex *, complex * , integer *, ftnlen), chemv_(char *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, complex *, integer *, ftnlen), cmvch_(char *, integer *, integer *, complex * , complex *, integer *, complex *, integer *, complex *, complex * , integer *, complex *, real *, complex *, real *, real *, logical *, integer *, logical *, ftnlen); integer nargs; extern /* Subroutine */ int chpmv_(char *, integer *, complex *, complex * , complex *, integer *, complex *, complex *, integer *, ftnlen); logical reset; integer incxs, incys; char uplos[1]; logical banded, packed; extern logical lceres_(char *, char *, integer *, integer *, complex *, complex *, integer *, ftnlen, ftnlen); real errmax; complex transl; /* Fortran I/O blocks */ static cilist io___189 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___190 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___191 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___192 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___195 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___197 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___198 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___199 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___200 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___201 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___202 = { 0, 0, 0, fmt_9995, 0 }; /* Tests CHEMV, CHBMV and CHPMV. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --kb; --alf; --bet; --inc; --g; --yt; --y; --x; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --ys; --yy; --xs; --xx; /* Function Body */ /* .. Executable Statements .. */ full = *(unsigned char *)&sname[2] == 'E'; banded = *(unsigned char *)&sname[2] == 'B'; packed = *(unsigned char *)&sname[2] == 'P'; /* Define the number of arguments. */ if (full) { nargs = 10; } else if (banded) { nargs = 11; } else if (packed) { nargs = 9; } nc = 0; reset = TRUE_; errmax = 0.f; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; if (banded) { nk = *nkb; } else { nk = 1; } i__2 = nk; for (ik = 1; ik <= i__2; ++ik) { if (banded) { k = kb[ik]; } else { k = n - 1; } /* Set LDA to 1 more than minimum value if room. */ if (banded) { lda = k + 1; } else { lda = n; } if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L100; } if (packed) { laa = n * (n + 1) / 2; } else { laa = lda * n; } null = n <= 0; for (ic = 1; ic <= 2; ++ic) { *(unsigned char *)uplo = *(unsigned char *)&ich[ic - 1]; /* Generate the matrix A. */ transl.r = 0.f, transl.i = 0.f; cmake_(sname + 1, uplo, " ", &n, &n, &a[a_offset], nmax, &aa[ 1], &lda, &k, &k, &reset, &transl, (ftnlen)2, (ftnlen) 1, (ftnlen)1); i__3 = *ninc; for (ix = 1; ix <= i__3; ++ix) { incx = inc[ix]; lx = abs(incx) * n; /* Generate the vector X. */ transl.r = .5f, transl.i = 0.f; i__4 = abs(incx); i__5 = n - 1; cmake_("GE", " ", " ", &c__1, &n, &x[1], &c__1, &xx[1], & i__4, &c__0, &i__5, &reset, &transl, (ftnlen)2, ( ftnlen)1, (ftnlen)1); if (n > 1) { i__4 = n / 2; x[i__4].r = 0.f, x[i__4].i = 0.f; i__4 = abs(incx) * (n / 2 - 1) + 1; xx[i__4].r = 0.f, xx[i__4].i = 0.f; } i__4 = *ninc; for (iy = 1; iy <= i__4; ++iy) { incy = inc[iy]; ly = abs(incy) * n; i__5 = *nalf; for (ia = 1; ia <= i__5; ++ia) { i__6 = ia; alpha.r = alf[i__6].r, alpha.i = alf[i__6].i; i__6 = *nbet; for (ib = 1; ib <= i__6; ++ib) { i__7 = ib; beta.r = bet[i__7].r, beta.i = bet[i__7].i; /* Generate the vector Y. */ transl.r = 0.f, transl.i = 0.f; i__7 = abs(incy); i__8 = n - 1; cmake_("GE", " ", " ", &c__1, &n, &y[1], & c__1, &yy[1], &i__7, &c__0, &i__8, & reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the */ /* subroutine. */ *(unsigned char *)uplos = *(unsigned char *) uplo; ns = n; ks = k; als.r = alpha.r, als.i = alpha.i; i__7 = laa; for (i__ = 1; i__ <= i__7; ++i__) { i__8 = i__; i__9 = i__; as[i__8].r = aa[i__9].r, as[i__8].i = aa[ i__9].i; /* L10: */ } ldas = lda; i__7 = lx; for (i__ = 1; i__ <= i__7; ++i__) { i__8 = i__; i__9 = i__; xs[i__8].r = xx[i__9].r, xs[i__8].i = xx[ i__9].i; /* L20: */ } incxs = incx; bls.r = beta.r, bls.i = beta.i; i__7 = ly; for (i__ = 1; i__ <= i__7; ++i__) { i__8 = i__; i__9 = i__; ys[i__8].r = yy[i__9].r, ys[i__8].i = yy[ i__9].i; /* L30: */ } incys = incy; /* Call the subroutine. */ if (full) { if (*trace) { io___189.ciunit = *ntra; s_wsfe(&io___189); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&incy, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } chemv_(uplo, &n, &alpha, &aa[1], &lda, & xx[1], &incx, &beta, &yy[1], & incy, (ftnlen)1); } else if (banded) { if (*trace) { io___190.ciunit = *ntra; s_wsfe(&io___190); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen) sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&incy, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } chbmv_(uplo, &n, &k, &alpha, &aa[1], &lda, &xx[1], &incx, &beta, &yy[1], & incy, (ftnlen)1); } else if (packed) { if (*trace) { io___191.ciunit = *ntra; s_wsfe(&io___191); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&incy, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } chpmv_(uplo, &n, &alpha, &aa[1], &xx[1], & incx, &beta, &yy[1], &incy, ( ftnlen)1); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___192.ciunit = *nout; s_wsfe(&io___192); e_wsfe(); *fatal = TRUE_; goto L120; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)uplo == *( unsigned char *)uplos; isame[1] = ns == n; if (full) { isame[2] = als.r == alpha.r && als.i == alpha.i; isame[3] = lce_(&as[1], &aa[1], &laa); isame[4] = ldas == lda; isame[5] = lce_(&xs[1], &xx[1], &lx); isame[6] = incxs == incx; isame[7] = bls.r == beta.r && bls.i == beta.i; if (null) { isame[8] = lce_(&ys[1], &yy[1], &ly); } else { i__7 = abs(incy); isame[8] = lceres_("GE", " ", &c__1, & n, &ys[1], &yy[1], &i__7, ( ftnlen)2, (ftnlen)1); } isame[9] = incys == incy; } else if (banded) { isame[2] = ks == k; isame[3] = als.r == alpha.r && als.i == alpha.i; isame[4] = lce_(&as[1], &aa[1], &laa); isame[5] = ldas == lda; isame[6] = lce_(&xs[1], &xx[1], &lx); isame[7] = incxs == incx; isame[8] = bls.r == beta.r && bls.i == beta.i; if (null) { isame[9] = lce_(&ys[1], &yy[1], &ly); } else { i__7 = abs(incy); isame[9] = lceres_("GE", " ", &c__1, & n, &ys[1], &yy[1], &i__7, ( ftnlen)2, (ftnlen)1); } isame[10] = incys == incy; } else if (packed) { isame[2] = als.r == alpha.r && als.i == alpha.i; isame[3] = lce_(&as[1], &aa[1], &laa); isame[4] = lce_(&xs[1], &xx[1], &lx); isame[5] = incxs == incx; isame[6] = bls.r == beta.r && bls.i == beta.i; if (null) { isame[7] = lce_(&ys[1], &yy[1], &ly); } else { i__7 = abs(incy); isame[7] = lceres_("GE", " ", &c__1, & n, &ys[1], &yy[1], &i__7, ( ftnlen)2, (ftnlen)1); } isame[8] = incys == incy; } /* If data was incorrectly changed, report and */ /* return. */ same = TRUE_; i__7 = nargs; for (i__ = 1; i__ <= i__7; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___195.ciunit = *nout; s_wsfe(&io___195); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L120; } if (! null) { /* Check the result. */ cmvch_("N", &n, &n, &alpha, &a[a_offset], nmax, &x[1], &incx, &beta, &y[1], &incy, &yt[1], &g[1], &yy[1], eps, &err, fatal, nout, &c_true, ( ftnlen)1); errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L120; } } else { /* Avoid repeating tests with N.le.0 */ goto L110; } /* L50: */ } /* L60: */ } /* L70: */ } /* L80: */ } /* L90: */ } L100: ; } L110: ; } /* Report result. */ if (errmax < *thresh) { io___197.ciunit = *nout; s_wsfe(&io___197); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___198.ciunit = *nout; s_wsfe(&io___198); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(real)); e_wsfe(); } goto L130; L120: io___199.ciunit = *nout; s_wsfe(&io___199); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); if (full) { io___200.ciunit = *nout; s_wsfe(&io___200); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); e_wsfe(); } else if (banded) { io___201.ciunit = *nout; s_wsfe(&io___201); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); e_wsfe(); } else if (packed) { io___202.ciunit = *nout; s_wsfe(&io___202); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); e_wsfe(); } L130: return 0; /* End of CCHK2. */ } /* cchk2_ */ /* Subroutine */ int cchk3_(char *sname, real *eps, real *thresh, integer * nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nkb, integer *kb, integer * ninc, integer *inc, integer *nmax, integer *incmax, complex *a, complex *aa, complex *as, complex *x, complex *xx, complex *xs, complex *xt, real *g, complex *z__, ftnlen sname_len) { /* Initialized data */ static char ichu[2] = "UL"; static char icht[3] = "NTC"; static char ichd[2] = "UN"; /* Format strings */ static char fmt_9993[] = "(1x,i6,\002: \002,a6,\002(\002,3(\002'\002,a1" ",\002',\002),i3,\002, A,\002,i3,\002, X,\002,i2,\002) " " .\002)"; static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002(\002,3(\002'\002,a1" ",\002',\002),2(i3,\002,\002),\002 A,\002,i3,\002, X,\002,i2,\002" ") .\002)"; static char fmt_9995[] = "(1x,i6,\002: \002,a6,\002(\002,3(\002'\002,a1" ",\002',\002),i3,\002, AP, \002,\002X,\002,i2,\002) " " .\002)"; static char fmt_9992[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; alist al__1; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen), s_wsfe(cilist *), do_fio( integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, k, n, nc, ik, in, nk, ks, ix, ns, lx, laa, icd, lda; extern logical lce_(complex *, complex *, integer *); integer ict, icu; real err; char diag[1]; integer ldas; logical same; integer incx; logical full, null; char uplo[1]; extern /* Subroutine */ int cmake_(char *, char *, char *, integer *, integer *, complex *, integer *, complex *, integer *, integer *, integer *, logical *, complex *, ftnlen, ftnlen, ftnlen); char diags[1]; logical isame[13]; extern /* Subroutine */ int cmvch_(char *, integer *, integer *, complex * , complex *, integer *, complex *, integer *, complex *, complex * , integer *, complex *, real *, complex *, real *, real *, logical *, integer *, logical *, ftnlen); integer nargs; extern /* Subroutine */ int ctbmv_(char *, char *, char *, integer *, integer *, complex *, integer *, complex *, integer *, ftnlen, ftnlen, ftnlen), ctbsv_(char *, char *, char *, integer *, integer *, complex *, integer *, complex *, integer *, ftnlen, ftnlen, ftnlen); logical reset; integer incxs; char trans[1]; extern /* Subroutine */ int ctpmv_(char *, char *, char *, integer *, complex *, complex *, integer *, ftnlen, ftnlen, ftnlen), ctrmv_( char *, char *, char *, integer *, complex *, integer *, complex * , integer *, ftnlen, ftnlen, ftnlen), ctpsv_(char *, char *, char *, integer *, complex *, complex *, integer *, ftnlen, ftnlen, ftnlen); char uplos[1]; extern /* Subroutine */ int ctrsv_(char *, char *, char *, integer *, complex *, integer *, complex *, integer *, ftnlen, ftnlen, ftnlen); logical banded, packed; extern logical lceres_(char *, char *, integer *, integer *, complex *, complex *, integer *, ftnlen, ftnlen); real errmax; complex transl; char transs[1]; /* Fortran I/O blocks */ static cilist io___239 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___240 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___241 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___242 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___243 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___244 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___245 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___248 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___250 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___251 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___252 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___253 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___254 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___255 = { 0, 0, 0, fmt_9995, 0 }; /* Tests CTRMV, CTBMV, CTPMV, CTRSV, CTBSV and CTPSV. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --kb; --inc; --z__; --g; --xt; --x; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --xs; --xx; /* Function Body */ /* .. Executable Statements .. */ full = *(unsigned char *)&sname[2] == 'R'; banded = *(unsigned char *)&sname[2] == 'B'; packed = *(unsigned char *)&sname[2] == 'P'; /* Define the number of arguments. */ if (full) { nargs = 8; } else if (banded) { nargs = 9; } else if (packed) { nargs = 7; } nc = 0; reset = TRUE_; errmax = 0.f; /* Set up zero vector for CMVCH. */ i__1 = *nmax; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__; z__[i__2].r = 0.f, z__[i__2].i = 0.f; /* L10: */ } i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; if (banded) { nk = *nkb; } else { nk = 1; } i__2 = nk; for (ik = 1; ik <= i__2; ++ik) { if (banded) { k = kb[ik]; } else { k = n - 1; } /* Set LDA to 1 more than minimum value if room. */ if (banded) { lda = k + 1; } else { lda = n; } if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L100; } if (packed) { laa = n * (n + 1) / 2; } else { laa = lda * n; } null = n <= 0; for (icu = 1; icu <= 2; ++icu) { *(unsigned char *)uplo = *(unsigned char *)&ichu[icu - 1]; for (ict = 1; ict <= 3; ++ict) { *(unsigned char *)trans = *(unsigned char *)&icht[ict - 1] ; for (icd = 1; icd <= 2; ++icd) { *(unsigned char *)diag = *(unsigned char *)&ichd[icd - 1]; /* Generate the matrix A. */ transl.r = 0.f, transl.i = 0.f; cmake_(sname + 1, uplo, diag, &n, &n, &a[a_offset], nmax, &aa[1], &lda, &k, &k, &reset, &transl, ( ftnlen)2, (ftnlen)1, (ftnlen)1); i__3 = *ninc; for (ix = 1; ix <= i__3; ++ix) { incx = inc[ix]; lx = abs(incx) * n; /* Generate the vector X. */ transl.r = .5f, transl.i = 0.f; i__4 = abs(incx); i__5 = n - 1; cmake_("GE", " ", " ", &c__1, &n, &x[1], &c__1, & xx[1], &i__4, &c__0, &i__5, &reset, & transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); if (n > 1) { i__4 = n / 2; x[i__4].r = 0.f, x[i__4].i = 0.f; i__4 = abs(incx) * (n / 2 - 1) + 1; xx[i__4].r = 0.f, xx[i__4].i = 0.f; } ++nc; /* Save every datum before calling the subroutine. */ *(unsigned char *)uplos = *(unsigned char *)uplo; *(unsigned char *)transs = *(unsigned char *) trans; *(unsigned char *)diags = *(unsigned char *)diag; ns = n; ks = k; i__4 = laa; for (i__ = 1; i__ <= i__4; ++i__) { i__5 = i__; i__6 = i__; as[i__5].r = aa[i__6].r, as[i__5].i = aa[i__6] .i; /* L20: */ } ldas = lda; i__4 = lx; for (i__ = 1; i__ <= i__4; ++i__) { i__5 = i__; i__6 = i__; xs[i__5].r = xx[i__6].r, xs[i__5].i = xx[i__6] .i; /* L30: */ } incxs = incx; /* Call the subroutine. */ if (s_cmp(sname + 3, "MV", (ftnlen)2, (ftnlen)2) == 0) { if (full) { if (*trace) { io___239.ciunit = *ntra; s_wsfe(&io___239); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } ctrmv_(uplo, trans, diag, &n, &aa[1], & lda, &xx[1], &incx, (ftnlen)1, ( ftnlen)1, (ftnlen)1); } else if (banded) { if (*trace) { io___240.ciunit = *ntra; s_wsfe(&io___240); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } ctbmv_(uplo, trans, diag, &n, &k, &aa[1], &lda, &xx[1], &incx, (ftnlen)1, ( ftnlen)1, (ftnlen)1); } else if (packed) { if (*trace) { io___241.ciunit = *ntra; s_wsfe(&io___241); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } ctpmv_(uplo, trans, diag, &n, &aa[1], &xx[ 1], &incx, (ftnlen)1, (ftnlen)1, ( ftnlen)1); } } else if (s_cmp(sname + 3, "SV", (ftnlen)2, ( ftnlen)2) == 0) { if (full) { if (*trace) { io___242.ciunit = *ntra; s_wsfe(&io___242); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } ctrsv_(uplo, trans, diag, &n, &aa[1], & lda, &xx[1], &incx, (ftnlen)1, ( ftnlen)1, (ftnlen)1); } else if (banded) { if (*trace) { io___243.ciunit = *ntra; s_wsfe(&io___243); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } ctbsv_(uplo, trans, diag, &n, &k, &aa[1], &lda, &xx[1], &incx, (ftnlen)1, ( ftnlen)1, (ftnlen)1); } else if (packed) { if (*trace) { io___244.ciunit = *ntra; s_wsfe(&io___244); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } ctpsv_(uplo, trans, diag, &n, &aa[1], &xx[ 1], &incx, (ftnlen)1, (ftnlen)1, ( ftnlen)1); } } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___245.ciunit = *nout; s_wsfe(&io___245); e_wsfe(); *fatal = TRUE_; goto L120; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)uplo == *(unsigned char *)uplos; isame[1] = *(unsigned char *)trans == *(unsigned char *)transs; isame[2] = *(unsigned char *)diag == *(unsigned char *)diags; isame[3] = ns == n; if (full) { isame[4] = lce_(&as[1], &aa[1], &laa); isame[5] = ldas == lda; if (null) { isame[6] = lce_(&xs[1], &xx[1], &lx); } else { i__4 = abs(incx); isame[6] = lceres_("GE", " ", &c__1, &n, & xs[1], &xx[1], &i__4, (ftnlen)2, ( ftnlen)1); } isame[7] = incxs == incx; } else if (banded) { isame[4] = ks == k; isame[5] = lce_(&as[1], &aa[1], &laa); isame[6] = ldas == lda; if (null) { isame[7] = lce_(&xs[1], &xx[1], &lx); } else { i__4 = abs(incx); isame[7] = lceres_("GE", " ", &c__1, &n, & xs[1], &xx[1], &i__4, (ftnlen)2, ( ftnlen)1); } isame[8] = incxs == incx; } else if (packed) { isame[4] = lce_(&as[1], &aa[1], &laa); if (null) { isame[5] = lce_(&xs[1], &xx[1], &lx); } else { i__4 = abs(incx); isame[5] = lceres_("GE", " ", &c__1, &n, & xs[1], &xx[1], &i__4, (ftnlen)2, ( ftnlen)1); } isame[6] = incxs == incx; } /* If data was incorrectly changed, report and */ /* return. */ same = TRUE_; i__4 = nargs; for (i__ = 1; i__ <= i__4; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___248.ciunit = *nout; s_wsfe(&io___248); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L120; } if (! null) { if (s_cmp(sname + 3, "MV", (ftnlen)2, (ftnlen) 2) == 0) { /* Check the result. */ cmvch_(trans, &n, &n, &c_b2, &a[a_offset], nmax, &x[1], &incx, &c_b1, &z__[ 1], &incx, &xt[1], &g[1], &xx[1], eps, &err, fatal, nout, &c_true, ( ftnlen)1); } else if (s_cmp(sname + 3, "SV", (ftnlen)2, ( ftnlen)2) == 0) { /* Compute approximation to original vector. */ i__4 = n; for (i__ = 1; i__ <= i__4; ++i__) { i__5 = i__; i__6 = (i__ - 1) * abs(incx) + 1; z__[i__5].r = xx[i__6].r, z__[i__5].i = xx[i__6].i; i__5 = (i__ - 1) * abs(incx) + 1; i__6 = i__; xx[i__5].r = x[i__6].r, xx[i__5].i = x[i__6].i; /* L50: */ } cmvch_(trans, &n, &n, &c_b2, &a[a_offset], nmax, &z__[1], &incx, &c_b1, &x[ 1], &incx, &xt[1], &g[1], &xx[1], eps, &err, fatal, nout, &c_false, (ftnlen)1); } errmax = max(errmax,err); /* If got really bad answer, report and return. */ if (*fatal) { goto L120; } } else { /* Avoid repeating tests with N.le.0. */ goto L110; } /* L60: */ } /* L70: */ } /* L80: */ } /* L90: */ } L100: ; } L110: ; } /* Report result. */ if (errmax < *thresh) { io___250.ciunit = *nout; s_wsfe(&io___250); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___251.ciunit = *nout; s_wsfe(&io___251); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(real)); e_wsfe(); } goto L130; L120: io___252.ciunit = *nout; s_wsfe(&io___252); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); if (full) { io___253.ciunit = *nout; s_wsfe(&io___253); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); e_wsfe(); } else if (banded) { io___254.ciunit = *nout; s_wsfe(&io___254); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); e_wsfe(); } else if (packed) { io___255.ciunit = *nout; s_wsfe(&io___255); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); e_wsfe(); } L130: return 0; /* End of CCHK3. */ } /* cchk3_ */ /* Subroutine */ int cchk4_(char *sname, real *eps, real *thresh, integer * nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nalf, complex *alf, integer * ninc, integer *inc, integer *nmax, integer *incmax, complex *a, complex *aa, complex *as, complex *x, complex *xx, complex *xs, complex *y, complex *yy, complex *ys, complex *yt, real *g, complex * z__, ftnlen sname_len) { /* Format strings */ static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002(\002,2(i3,\002," "\002),\002(\002,f4.1,\002,\002,f4.1,\002), X,\002,i2,\002, Y," "\002,i2,\002, A,\002,i3,\002) \002,\002 " ".\002)"; static char fmt_9993[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9995[] = "(\002 THESE ARE THE RESULTS FOR COLUMN" " \002,i3)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; complex q__1; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); void r_cnjg(complex *, complex *); /* Local variables */ integer i__, j, m, n; complex w[1]; integer ia, nc, nd, im, in, ms, ix, iy, ns, lx, ly, laa, lda; extern logical lce_(complex *, complex *, integer *); complex als; real err; integer ldas; logical same, conj; integer incx, incy; logical null; extern /* Subroutine */ int cmake_(char *, char *, char *, integer *, integer *, complex *, integer *, complex *, integer *, integer *, integer *, logical *, complex *, ftnlen, ftnlen, ftnlen), cgerc_( integer *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, integer *); complex alpha; logical isame[13]; extern /* Subroutine */ int cmvch_(char *, integer *, integer *, complex * , complex *, integer *, complex *, integer *, complex *, complex * , integer *, complex *, real *, complex *, real *, real *, logical *, integer *, logical *, ftnlen), cgeru_(integer *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, integer *); integer nargs; logical reset; integer incxs, incys; extern logical lceres_(char *, char *, integer *, integer *, complex *, complex *, integer *, ftnlen, ftnlen); real errmax; complex transl; /* Fortran I/O blocks */ static cilist io___285 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___286 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___289 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___293 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___294 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___295 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___296 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___297 = { 0, 0, 0, fmt_9994, 0 }; /* Tests CGERC and CGERU. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --idim; --alf; --inc; --z__; --g; --yt; --y; --x; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --ys; --yy; --xs; --xx; /* Function Body */ conj = *(unsigned char *)&sname[4] == 'C'; /* Define the number of arguments. */ nargs = 9; nc = 0; reset = TRUE_; errmax = 0.f; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; nd = n / 2 + 1; for (im = 1; im <= 2; ++im) { if (im == 1) { /* Computing MAX */ i__2 = n - nd; m = max(i__2,0); } if (im == 2) { /* Computing MIN */ i__2 = n + nd; m = min(i__2,*nmax); } /* Set LDA to 1 more than minimum value if room. */ lda = m; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L110; } laa = lda * n; null = n <= 0 || m <= 0; i__2 = *ninc; for (ix = 1; ix <= i__2; ++ix) { incx = inc[ix]; lx = abs(incx) * m; /* Generate the vector X. */ transl.r = .5f, transl.i = 0.f; i__3 = abs(incx); i__4 = m - 1; cmake_("GE", " ", " ", &c__1, &m, &x[1], &c__1, &xx[1], &i__3, &c__0, &i__4, &reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); if (m > 1) { i__3 = m / 2; x[i__3].r = 0.f, x[i__3].i = 0.f; i__3 = abs(incx) * (m / 2 - 1) + 1; xx[i__3].r = 0.f, xx[i__3].i = 0.f; } i__3 = *ninc; for (iy = 1; iy <= i__3; ++iy) { incy = inc[iy]; ly = abs(incy) * n; /* Generate the vector Y. */ transl.r = 0.f, transl.i = 0.f; i__4 = abs(incy); i__5 = n - 1; cmake_("GE", " ", " ", &c__1, &n, &y[1], &c__1, &yy[1], & i__4, &c__0, &i__5, &reset, &transl, (ftnlen)2, ( ftnlen)1, (ftnlen)1); if (n > 1) { i__4 = n / 2; y[i__4].r = 0.f, y[i__4].i = 0.f; i__4 = abs(incy) * (n / 2 - 1) + 1; yy[i__4].r = 0.f, yy[i__4].i = 0.f; } i__4 = *nalf; for (ia = 1; ia <= i__4; ++ia) { i__5 = ia; alpha.r = alf[i__5].r, alpha.i = alf[i__5].i; /* Generate the matrix A. */ transl.r = 0.f, transl.i = 0.f; i__5 = m - 1; i__6 = n - 1; cmake_(sname + 1, " ", " ", &m, &n, &a[a_offset], nmax, &aa[1], &lda, &i__5, &i__6, &reset, & transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the subroutine. */ ms = m; ns = n; als.r = alpha.r, als.i = alpha.i; i__5 = laa; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; as[i__6].r = aa[i__7].r, as[i__6].i = aa[i__7].i; /* L10: */ } ldas = lda; i__5 = lx; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; xs[i__6].r = xx[i__7].r, xs[i__6].i = xx[i__7].i; /* L20: */ } incxs = incx; i__5 = ly; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; ys[i__6].r = yy[i__7].r, ys[i__6].i = yy[i__7].i; /* L30: */ } incys = incy; /* Call the subroutine. */ if (*trace) { io___285.ciunit = *ntra; s_wsfe(&io___285); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer) ); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)) ; do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)) ; do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(real) ); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof( integer)); e_wsfe(); } if (conj) { if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } cgerc_(&m, &n, &alpha, &xx[1], &incx, &yy[1], & incy, &aa[1], &lda); } else { if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } cgeru_(&m, &n, &alpha, &xx[1], &incx, &yy[1], & incy, &aa[1], &lda); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___286.ciunit = *nout; s_wsfe(&io___286); e_wsfe(); *fatal = TRUE_; goto L140; } /* See what data changed inside subroutine. */ isame[0] = ms == m; isame[1] = ns == n; isame[2] = als.r == alpha.r && als.i == alpha.i; isame[3] = lce_(&xs[1], &xx[1], &lx); isame[4] = incxs == incx; isame[5] = lce_(&ys[1], &yy[1], &ly); isame[6] = incys == incy; if (null) { isame[7] = lce_(&as[1], &aa[1], &laa); } else { isame[7] = lceres_("GE", " ", &m, &n, &as[1], &aa[ 1], &lda, (ftnlen)2, (ftnlen)1); } isame[8] = ldas == lda; /* If data was incorrectly changed, report and return. */ same = TRUE_; i__5 = nargs; for (i__ = 1; i__ <= i__5; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___289.ciunit = *nout; s_wsfe(&io___289); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof( integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L140; } if (! null) { /* Check the result column by column. */ if (incx > 0) { i__5 = m; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; z__[i__6].r = x[i__7].r, z__[i__6].i = x[ i__7].i; /* L50: */ } } else { i__5 = m; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = m - i__ + 1; z__[i__6].r = x[i__7].r, z__[i__6].i = x[ i__7].i; /* L60: */ } } i__5 = n; for (j = 1; j <= i__5; ++j) { if (incy > 0) { i__6 = j; w[0].r = y[i__6].r, w[0].i = y[i__6].i; } else { i__6 = n - j + 1; w[0].r = y[i__6].r, w[0].i = y[i__6].i; } if (conj) { r_cnjg(&q__1, w); w[0].r = q__1.r, w[0].i = q__1.i; } cmvch_("N", &m, &c__1, &alpha, &z__[1], nmax, w, &c__1, &c_b2, &a[j * a_dim1 + 1], & c__1, &yt[1], &g[1], &aa[(j - 1) * lda + 1], eps, &err, fatal, nout, & c_true, (ftnlen)1); errmax = max(errmax,err); /* If got really bad answer, report and return. */ if (*fatal) { goto L130; } /* L70: */ } } else { /* Avoid repeating tests with M.le.0 or N.le.0. */ goto L110; } /* L80: */ } /* L90: */ } /* L100: */ } L110: ; } /* L120: */ } /* Report result. */ if (errmax < *thresh) { io___293.ciunit = *nout; s_wsfe(&io___293); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___294.ciunit = *nout; s_wsfe(&io___294); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(real)); e_wsfe(); } goto L150; L130: io___295.ciunit = *nout; s_wsfe(&io___295); do_fio(&c__1, (char *)&j, (ftnlen)sizeof(integer)); e_wsfe(); L140: io___296.ciunit = *nout; s_wsfe(&io___296); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); io___297.ciunit = *nout; s_wsfe(&io___297); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); e_wsfe(); L150: return 0; /* End of CCHK4. */ } /* cchk4_ */ /* Subroutine */ int cchk5_(char *sname, real *eps, real *thresh, integer * nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nalf, complex *alf, integer * ninc, integer *inc, integer *nmax, integer *incmax, complex *a, complex *aa, complex *as, complex *x, complex *xx, complex *xs, complex *y, complex *yy, complex *ys, complex *yt, real *g, complex * z__, ftnlen sname_len) { /* Initialized data */ static char ich[2] = "UL"; /* Format strings */ static char fmt_9993[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "i3,\002,\002,f4.1,\002, X,\002,i2,\002, A,\002,i3,\002) " " .\002)"; static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "i3,\002,\002,f4.1,\002, X,\002,i2,\002, AP) " " .\002)"; static char fmt_9992[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9995[] = "(\002 THESE ARE THE RESULTS FOR COLUMN" " \002,i3)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; complex q__1; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); void r_cnjg(complex *, complex *); /* Local variables */ integer i__, j, n; complex w[1]; integer ia, ja, ic, nc, jj, lj, in, ix, ns, lx, laa, lda; extern logical lce_(complex *, complex *, integer *); real err; extern /* Subroutine */ int cher_(char *, integer *, real *, complex *, integer *, complex *, integer *, ftnlen); integer ldas; logical same; extern /* Subroutine */ int chpr_(char *, integer *, real *, complex *, integer *, complex *, ftnlen); real rals; integer incx; logical full, null; char uplo[1]; extern /* Subroutine */ int cmake_(char *, char *, char *, integer *, integer *, complex *, integer *, complex *, integer *, integer *, integer *, logical *, complex *, ftnlen, ftnlen, ftnlen); complex alpha; logical isame[13]; extern /* Subroutine */ int cmvch_(char *, integer *, integer *, complex * , complex *, integer *, complex *, integer *, complex *, complex * , integer *, complex *, real *, complex *, real *, real *, logical *, integer *, logical *, ftnlen); integer nargs; logical reset; integer incxs; logical upper; char uplos[1]; logical packed; real ralpha; extern logical lceres_(char *, char *, integer *, integer *, complex *, complex *, integer *, ftnlen, ftnlen); real errmax; complex transl; /* Fortran I/O blocks */ static cilist io___326 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___327 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___328 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___331 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___338 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___339 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___340 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___341 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___342 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___343 = { 0, 0, 0, fmt_9994, 0 }; /* Tests CHER and CHPR. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; --inc; --z__; --g; --yt; --y; --x; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --ys; --yy; --xs; --xx; /* Function Body */ /* .. Executable Statements .. */ full = *(unsigned char *)&sname[2] == 'E'; packed = *(unsigned char *)&sname[2] == 'P'; /* Define the number of arguments. */ if (full) { nargs = 7; } else if (packed) { nargs = 6; } nc = 0; reset = TRUE_; errmax = 0.f; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; /* Set LDA to 1 more than minimum value if room. */ lda = n; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L100; } if (packed) { laa = n * (n + 1) / 2; } else { laa = lda * n; } for (ic = 1; ic <= 2; ++ic) { *(unsigned char *)uplo = *(unsigned char *)&ich[ic - 1]; upper = *(unsigned char *)uplo == 'U'; i__2 = *ninc; for (ix = 1; ix <= i__2; ++ix) { incx = inc[ix]; lx = abs(incx) * n; /* Generate the vector X. */ transl.r = .5f, transl.i = 0.f; i__3 = abs(incx); i__4 = n - 1; cmake_("GE", " ", " ", &c__1, &n, &x[1], &c__1, &xx[1], &i__3, &c__0, &i__4, &reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); if (n > 1) { i__3 = n / 2; x[i__3].r = 0.f, x[i__3].i = 0.f; i__3 = abs(incx) * (n / 2 - 1) + 1; xx[i__3].r = 0.f, xx[i__3].i = 0.f; } i__3 = *nalf; for (ia = 1; ia <= i__3; ++ia) { i__4 = ia; ralpha = alf[i__4].r; q__1.r = ralpha, q__1.i = 0.f; alpha.r = q__1.r, alpha.i = q__1.i; null = n <= 0 || ralpha == 0.f; /* Generate the matrix A. */ transl.r = 0.f, transl.i = 0.f; i__4 = n - 1; i__5 = n - 1; cmake_(sname + 1, uplo, " ", &n, &n, &a[a_offset], nmax, & aa[1], &lda, &i__4, &i__5, &reset, &transl, ( ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the subroutine. */ *(unsigned char *)uplos = *(unsigned char *)uplo; ns = n; rals = ralpha; i__4 = laa; for (i__ = 1; i__ <= i__4; ++i__) { i__5 = i__; i__6 = i__; as[i__5].r = aa[i__6].r, as[i__5].i = aa[i__6].i; /* L10: */ } ldas = lda; i__4 = lx; for (i__ = 1; i__ <= i__4; ++i__) { i__5 = i__; i__6 = i__; xs[i__5].r = xx[i__6].r, xs[i__5].i = xx[i__6].i; /* L20: */ } incxs = incx; /* Call the subroutine. */ if (full) { if (*trace) { io___326.ciunit = *ntra; s_wsfe(&io___326); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer) ); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)) ; do_fio(&c__1, (char *)&ralpha, (ftnlen)sizeof( real)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof( integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } cher_(uplo, &n, &ralpha, &xx[1], &incx, &aa[1], &lda, (ftnlen)1); } else if (packed) { if (*trace) { io___327.ciunit = *ntra; s_wsfe(&io___327); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer) ); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)) ; do_fio(&c__1, (char *)&ralpha, (ftnlen)sizeof( real)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof( integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } chpr_(uplo, &n, &ralpha, &xx[1], &incx, &aa[1], ( ftnlen)1); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___328.ciunit = *nout; s_wsfe(&io___328); e_wsfe(); *fatal = TRUE_; goto L120; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)uplo == *(unsigned char *) uplos; isame[1] = ns == n; isame[2] = rals == ralpha; isame[3] = lce_(&xs[1], &xx[1], &lx); isame[4] = incxs == incx; if (null) { isame[5] = lce_(&as[1], &aa[1], &laa); } else { isame[5] = lceres_(sname + 1, uplo, &n, &n, &as[1], & aa[1], &lda, (ftnlen)2, (ftnlen)1); } if (! packed) { isame[6] = ldas == lda; } /* If data was incorrectly changed, report and return. */ same = TRUE_; i__4 = nargs; for (i__ = 1; i__ <= i__4; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___331.ciunit = *nout; s_wsfe(&io___331); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof( integer)); e_wsfe(); } /* L30: */ } if (! same) { *fatal = TRUE_; goto L120; } if (! null) { /* Check the result column by column. */ if (incx > 0) { i__4 = n; for (i__ = 1; i__ <= i__4; ++i__) { i__5 = i__; i__6 = i__; z__[i__5].r = x[i__6].r, z__[i__5].i = x[i__6] .i; /* L40: */ } } else { i__4 = n; for (i__ = 1; i__ <= i__4; ++i__) { i__5 = i__; i__6 = n - i__ + 1; z__[i__5].r = x[i__6].r, z__[i__5].i = x[i__6] .i; /* L50: */ } } ja = 1; i__4 = n; for (j = 1; j <= i__4; ++j) { r_cnjg(&q__1, &z__[j]); w[0].r = q__1.r, w[0].i = q__1.i; if (upper) { jj = 1; lj = j; } else { jj = j; lj = n - j + 1; } cmvch_("N", &lj, &c__1, &alpha, &z__[jj], &lj, w, &c__1, &c_b2, &a[jj + j * a_dim1], &c__1, &yt[1], &g[1], &aa[ja], eps, &err, fatal, nout, &c_true, (ftnlen)1); if (full) { if (upper) { ja += lda; } else { ja = ja + lda + 1; } } else { ja += lj; } errmax = max(errmax,err); /* If got really bad answer, report and return. */ if (*fatal) { goto L110; } /* L60: */ } } else { /* Avoid repeating tests if N.le.0. */ if (n <= 0) { goto L100; } } /* L70: */ } /* L80: */ } /* L90: */ } L100: ; } /* Report result. */ if (errmax < *thresh) { io___338.ciunit = *nout; s_wsfe(&io___338); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___339.ciunit = *nout; s_wsfe(&io___339); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(real)); e_wsfe(); } goto L130; L110: io___340.ciunit = *nout; s_wsfe(&io___340); do_fio(&c__1, (char *)&j, (ftnlen)sizeof(integer)); e_wsfe(); L120: io___341.ciunit = *nout; s_wsfe(&io___341); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); if (full) { io___342.ciunit = *nout; s_wsfe(&io___342); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ralpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); e_wsfe(); } else if (packed) { io___343.ciunit = *nout; s_wsfe(&io___343); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ralpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); e_wsfe(); } L130: return 0; /* End of CCHK5. */ } /* cchk5_ */ /* Subroutine */ int cchk6_(char *sname, real *eps, real *thresh, integer * nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nalf, complex *alf, integer * ninc, integer *inc, integer *nmax, integer *incmax, complex *a, complex *aa, complex *as, complex *x, complex *xx, complex *xs, complex *y, complex *yy, complex *ys, complex *yt, real *g, complex * z__, ftnlen sname_len) { /* Initialized data */ static char ich[2] = "UL"; /* Format strings */ static char fmt_9993[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "i3,\002,(\002,f4.1,\002,\002,f4.1,\002), X,\002,i2,\002, Y,\002," "i2,\002, A,\002,i3,\002) \002,\002 .\002)"; static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "i3,\002,(\002,f4.1,\002,\002,f4.1,\002), X,\002,i2,\002, Y,\002," "i2,\002, AP) \002,\002 .\002)"; static char fmt_9992[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9995[] = "(\002 THESE ARE THE RESULTS FOR COLUMN" " \002,i3)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; complex q__1, q__2, q__3; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); void r_cnjg(complex *, complex *); /* Local variables */ integer i__, j, n; complex w[2]; integer ia, ja, ic, nc, jj, lj, in, ix, iy, ns, lx, ly, laa, lda; extern logical lce_(complex *, complex *, integer *); complex als; real err; integer ldas; logical same; integer incx, incy; logical full, null; char uplo[1]; extern /* Subroutine */ int cher2_(char *, integer *, complex *, complex * , integer *, complex *, integer *, complex *, integer *, ftnlen), chpr2_(char *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, ftnlen), cmake_(char *, char *, char *, integer *, integer *, complex *, integer *, complex *, integer *, integer *, integer *, logical *, complex *, ftnlen, ftnlen, ftnlen); complex alpha; logical isame[13]; extern /* Subroutine */ int cmvch_(char *, integer *, integer *, complex * , complex *, integer *, complex *, integer *, complex *, complex * , integer *, complex *, real *, complex *, real *, real *, logical *, integer *, logical *, ftnlen); integer nargs; logical reset; integer incxs, incys; logical upper; char uplos[1]; logical packed; extern logical lceres_(char *, char *, integer *, integer *, complex *, complex *, integer *, ftnlen, ftnlen); real errmax; complex transl; /* Fortran I/O blocks */ static cilist io___375 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___376 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___377 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___380 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___387 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___388 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___389 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___390 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___391 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___392 = { 0, 0, 0, fmt_9994, 0 }; /* Tests CHER2 and CHPR2. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; --inc; z_dim1 = *nmax; z_offset = 1 + z_dim1; z__ -= z_offset; --g; --yt; --y; --x; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --ys; --yy; --xs; --xx; /* Function Body */ /* .. Executable Statements .. */ full = *(unsigned char *)&sname[2] == 'E'; packed = *(unsigned char *)&sname[2] == 'P'; /* Define the number of arguments. */ if (full) { nargs = 9; } else if (packed) { nargs = 8; } nc = 0; reset = TRUE_; errmax = 0.f; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; /* Set LDA to 1 more than minimum value if room. */ lda = n; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L140; } if (packed) { laa = n * (n + 1) / 2; } else { laa = lda * n; } for (ic = 1; ic <= 2; ++ic) { *(unsigned char *)uplo = *(unsigned char *)&ich[ic - 1]; upper = *(unsigned char *)uplo == 'U'; i__2 = *ninc; for (ix = 1; ix <= i__2; ++ix) { incx = inc[ix]; lx = abs(incx) * n; /* Generate the vector X. */ transl.r = .5f, transl.i = 0.f; i__3 = abs(incx); i__4 = n - 1; cmake_("GE", " ", " ", &c__1, &n, &x[1], &c__1, &xx[1], &i__3, &c__0, &i__4, &reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); if (n > 1) { i__3 = n / 2; x[i__3].r = 0.f, x[i__3].i = 0.f; i__3 = abs(incx) * (n / 2 - 1) + 1; xx[i__3].r = 0.f, xx[i__3].i = 0.f; } i__3 = *ninc; for (iy = 1; iy <= i__3; ++iy) { incy = inc[iy]; ly = abs(incy) * n; /* Generate the vector Y. */ transl.r = 0.f, transl.i = 0.f; i__4 = abs(incy); i__5 = n - 1; cmake_("GE", " ", " ", &c__1, &n, &y[1], &c__1, &yy[1], & i__4, &c__0, &i__5, &reset, &transl, (ftnlen)2, ( ftnlen)1, (ftnlen)1); if (n > 1) { i__4 = n / 2; y[i__4].r = 0.f, y[i__4].i = 0.f; i__4 = abs(incy) * (n / 2 - 1) + 1; yy[i__4].r = 0.f, yy[i__4].i = 0.f; } i__4 = *nalf; for (ia = 1; ia <= i__4; ++ia) { i__5 = ia; alpha.r = alf[i__5].r, alpha.i = alf[i__5].i; null = n <= 0 || alpha.r == 0.f && alpha.i == 0.f; /* Generate the matrix A. */ transl.r = 0.f, transl.i = 0.f; i__5 = n - 1; i__6 = n - 1; cmake_(sname + 1, uplo, " ", &n, &n, &a[a_offset], nmax, &aa[1], &lda, &i__5, &i__6, &reset, & transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the subroutine. */ *(unsigned char *)uplos = *(unsigned char *)uplo; ns = n; als.r = alpha.r, als.i = alpha.i; i__5 = laa; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; as[i__6].r = aa[i__7].r, as[i__6].i = aa[i__7].i; /* L10: */ } ldas = lda; i__5 = lx; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; xs[i__6].r = xx[i__7].r, xs[i__6].i = xx[i__7].i; /* L20: */ } incxs = incx; i__5 = ly; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; ys[i__6].r = yy[i__7].r, ys[i__6].i = yy[i__7].i; /* L30: */ } incys = incy; /* Call the subroutine. */ if (full) { if (*trace) { io___375.ciunit = *ntra; s_wsfe(&io___375); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof( real)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof( integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } cher2_(uplo, &n, &alpha, &xx[1], &incx, &yy[1], & incy, &aa[1], &lda, (ftnlen)1); } else if (packed) { if (*trace) { io___376.ciunit = *ntra; s_wsfe(&io___376); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof( real)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof( integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } chpr2_(uplo, &n, &alpha, &xx[1], &incx, &yy[1], & incy, &aa[1], (ftnlen)1); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___377.ciunit = *nout; s_wsfe(&io___377); e_wsfe(); *fatal = TRUE_; goto L160; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)uplo == *(unsigned char * )uplos; isame[1] = ns == n; isame[2] = als.r == alpha.r && als.i == alpha.i; isame[3] = lce_(&xs[1], &xx[1], &lx); isame[4] = incxs == incx; isame[5] = lce_(&ys[1], &yy[1], &ly); isame[6] = incys == incy; if (null) { isame[7] = lce_(&as[1], &aa[1], &laa); } else { isame[7] = lceres_(sname + 1, uplo, &n, &n, &as[1] , &aa[1], &lda, (ftnlen)2, (ftnlen)1); } if (! packed) { isame[8] = ldas == lda; } /* If data was incorrectly changed, report and return. */ same = TRUE_; i__5 = nargs; for (i__ = 1; i__ <= i__5; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___380.ciunit = *nout; s_wsfe(&io___380); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof( integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L160; } if (! null) { /* Check the result column by column. */ if (incx > 0) { i__5 = n; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__ + z_dim1; i__7 = i__; z__[i__6].r = x[i__7].r, z__[i__6].i = x[ i__7].i; /* L50: */ } } else { i__5 = n; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__ + z_dim1; i__7 = n - i__ + 1; z__[i__6].r = x[i__7].r, z__[i__6].i = x[ i__7].i; /* L60: */ } } if (incy > 0) { i__5 = n; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__ + (z_dim1 << 1); i__7 = i__; z__[i__6].r = y[i__7].r, z__[i__6].i = y[ i__7].i; /* L70: */ } } else { i__5 = n; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__ + (z_dim1 << 1); i__7 = n - i__ + 1; z__[i__6].r = y[i__7].r, z__[i__6].i = y[ i__7].i; /* L80: */ } } ja = 1; i__5 = n; for (j = 1; j <= i__5; ++j) { r_cnjg(&q__2, &z__[j + (z_dim1 << 1)]); q__1.r = alpha.r * q__2.r - alpha.i * q__2.i, q__1.i = alpha.r * q__2.i + alpha.i * q__2.r; w[0].r = q__1.r, w[0].i = q__1.i; r_cnjg(&q__2, &alpha); r_cnjg(&q__3, &z__[j + z_dim1]); q__1.r = q__2.r * q__3.r - q__2.i * q__3.i, q__1.i = q__2.r * q__3.i + q__2.i * q__3.r; w[1].r = q__1.r, w[1].i = q__1.i; if (upper) { jj = 1; lj = j; } else { jj = j; lj = n - j + 1; } cmvch_("N", &lj, &c__2, &c_b2, &z__[jj + z_dim1], nmax, w, &c__1, &c_b2, &a[jj + j * a_dim1], &c__1, &yt[1], &g[1], & aa[ja], eps, &err, fatal, nout, & c_true, (ftnlen)1); if (full) { if (upper) { ja += lda; } else { ja = ja + lda + 1; } } else { ja += lj; } errmax = max(errmax,err); /* If got really bad answer, report and return. */ if (*fatal) { goto L150; } /* L90: */ } } else { /* Avoid repeating tests with N.le.0. */ if (n <= 0) { goto L140; } } /* L100: */ } /* L110: */ } /* L120: */ } /* L130: */ } L140: ; } /* Report result. */ if (errmax < *thresh) { io___387.ciunit = *nout; s_wsfe(&io___387); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___388.ciunit = *nout; s_wsfe(&io___388); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(real)); e_wsfe(); } goto L170; L150: io___389.ciunit = *nout; s_wsfe(&io___389); do_fio(&c__1, (char *)&j, (ftnlen)sizeof(integer)); e_wsfe(); L160: io___390.ciunit = *nout; s_wsfe(&io___390); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); if (full) { io___391.ciunit = *nout; s_wsfe(&io___391); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); e_wsfe(); } else if (packed) { io___392.ciunit = *nout; s_wsfe(&io___392); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); e_wsfe(); } L170: return 0; /* End of CCHK6. */ } /* cchk6_ */ /* Subroutine */ int cchke_(integer *isnum, char *srnamt, integer *nout, ftnlen srnamt_len) { /* Format strings */ static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE TESTS OF ERROR-E" "XITS\002)"; static char fmt_9998[] = "(\002 ******* \002,a6,\002 FAILED THE TESTS OF" " ERROR-EXITS *****\002,\002**\002)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Local variables */ complex a[1] /* was [1][1] */, x[1], y[1], beta; extern /* Subroutine */ int cher_(char *, integer *, real *, complex *, integer *, complex *, integer *, ftnlen), chpr_(char *, integer *, real *, complex *, integer *, complex *, ftnlen), cher2_(char *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, integer *, ftnlen), chpr2_(char *, integer *, complex * , complex *, integer *, complex *, integer *, complex *, ftnlen), cgerc_(integer *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, integer *); complex alpha; extern /* Subroutine */ int cgbmv_(char *, integer *, integer *, integer * , integer *, complex *, complex *, integer *, complex *, integer * , complex *, complex *, integer *, ftnlen), chbmv_(char *, integer *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, complex *, integer *, ftnlen), cgemv_(char * , integer *, integer *, complex *, complex *, integer *, complex * , integer *, complex *, complex *, integer *, ftnlen), chemv_( char *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, complex *, integer *, ftnlen), cgeru_( integer *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, integer *), ctbmv_(char *, char *, char *, integer *, integer *, complex *, integer *, complex *, integer *, ftnlen, ftnlen, ftnlen), chpmv_(char *, integer *, complex *, complex *, complex *, integer *, complex *, complex *, integer *, ftnlen), ctbsv_(char *, char *, char *, integer *, integer *, complex *, integer *, complex *, integer *, ftnlen, ftnlen, ftnlen), ctpmv_(char *, char *, char *, integer *, complex *, complex *, integer *, ftnlen, ftnlen, ftnlen), ctrmv_(char *, char *, char *, integer *, complex *, integer *, complex *, integer *, ftnlen, ftnlen, ftnlen), ctpsv_(char *, char *, char *, integer *, complex *, complex *, integer *, ftnlen, ftnlen, ftnlen), ctrsv_(char *, char *, char *, integer *, complex *, integer *, complex *, integer *, ftnlen, ftnlen, ftnlen); real ralpha; extern /* Subroutine */ int chkxer_(char *, integer *, integer *, logical *, logical *, ftnlen); /* Fortran I/O blocks */ static cilist io___399 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___400 = { 0, 0, 0, fmt_9998, 0 }; /* Tests the error exits from the Level 2 Blas. */ /* Requires a special version of the error-handling routine XERBLA. */ /* ALPHA, RALPHA, BETA, A, X and Y should not need to be defined. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Subroutines .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ /* OK is set to .FALSE. by the special version of XERBLA or by CHKXER */ /* if anything is wrong. */ infoc_1.ok = TRUE_; /* LERR is set to .TRUE. by the special version of XERBLA each time */ /* it is called, and is then tested and re-set by CHKXER. */ infoc_1.lerr = FALSE_; switch (*isnum) { case 1: goto L10; case 2: goto L20; case 3: goto L30; case 4: goto L40; case 5: goto L50; case 6: goto L60; case 7: goto L70; case 8: goto L80; case 9: goto L90; case 10: goto L100; case 11: goto L110; case 12: goto L120; case 13: goto L130; case 14: goto L140; case 15: goto L150; case 16: goto L160; case 17: goto L170; } L10: infoc_1.infot = 1; cgemv_("/", &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; cgemv_("N", &c_n1, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; cgemv_("N", &c__0, &c_n1, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; cgemv_("N", &c__2, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; cgemv_("N", &c__0, &c__0, &alpha, a, &c__1, x, &c__0, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; cgemv_("N", &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__0, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L20: infoc_1.infot = 1; cgbmv_("/", &c__0, &c__0, &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; cgbmv_("N", &c_n1, &c__0, &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; cgbmv_("N", &c__0, &c_n1, &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; cgbmv_("N", &c__0, &c__0, &c_n1, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; cgbmv_("N", &c__2, &c__0, &c__0, &c_n1, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; cgbmv_("N", &c__0, &c__0, &c__1, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; cgbmv_("N", &c__0, &c__0, &c__0, &c__0, &alpha, a, &c__1, x, &c__0, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; cgbmv_("N", &c__0, &c__0, &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__0, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L30: infoc_1.infot = 1; chemv_("/", &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; chemv_("U", &c_n1, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; chemv_("U", &c__2, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; chemv_("U", &c__0, &alpha, a, &c__1, x, &c__0, &beta, y, &c__1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; chemv_("U", &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__0, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L40: infoc_1.infot = 1; chbmv_("/", &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; chbmv_("U", &c_n1, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; chbmv_("U", &c__0, &c_n1, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; chbmv_("U", &c__0, &c__1, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; chbmv_("U", &c__0, &c__0, &alpha, a, &c__1, x, &c__0, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; chbmv_("U", &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__0, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L50: infoc_1.infot = 1; chpmv_("/", &c__0, &alpha, a, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; chpmv_("U", &c_n1, &alpha, a, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; chpmv_("U", &c__0, &alpha, a, x, &c__0, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; chpmv_("U", &c__0, &alpha, a, x, &c__1, &beta, y, &c__0, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L60: infoc_1.infot = 1; ctrmv_("/", "N", "N", &c__0, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; ctrmv_("U", "/", "N", &c__0, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ctrmv_("U", "N", "/", &c__0, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ctrmv_("U", "N", "N", &c_n1, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ctrmv_("U", "N", "N", &c__2, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; ctrmv_("U", "N", "N", &c__0, a, &c__1, x, &c__0, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L70: infoc_1.infot = 1; ctbmv_("/", "N", "N", &c__0, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; ctbmv_("U", "/", "N", &c__0, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ctbmv_("U", "N", "/", &c__0, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ctbmv_("U", "N", "N", &c_n1, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ctbmv_("U", "N", "N", &c__0, &c_n1, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; ctbmv_("U", "N", "N", &c__0, &c__1, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ctbmv_("U", "N", "N", &c__0, &c__0, a, &c__1, x, &c__0, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L80: infoc_1.infot = 1; ctpmv_("/", "N", "N", &c__0, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; ctpmv_("U", "/", "N", &c__0, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ctpmv_("U", "N", "/", &c__0, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ctpmv_("U", "N", "N", &c_n1, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; ctpmv_("U", "N", "N", &c__0, a, x, &c__0, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L90: infoc_1.infot = 1; ctrsv_("/", "N", "N", &c__0, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; ctrsv_("U", "/", "N", &c__0, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ctrsv_("U", "N", "/", &c__0, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ctrsv_("U", "N", "N", &c_n1, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ctrsv_("U", "N", "N", &c__2, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; ctrsv_("U", "N", "N", &c__0, a, &c__1, x, &c__0, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L100: infoc_1.infot = 1; ctbsv_("/", "N", "N", &c__0, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; ctbsv_("U", "/", "N", &c__0, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ctbsv_("U", "N", "/", &c__0, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ctbsv_("U", "N", "N", &c_n1, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ctbsv_("U", "N", "N", &c__0, &c_n1, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; ctbsv_("U", "N", "N", &c__0, &c__1, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ctbsv_("U", "N", "N", &c__0, &c__0, a, &c__1, x, &c__0, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L110: infoc_1.infot = 1; ctpsv_("/", "N", "N", &c__0, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; ctpsv_("U", "/", "N", &c__0, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ctpsv_("U", "N", "/", &c__0, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ctpsv_("U", "N", "N", &c_n1, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; ctpsv_("U", "N", "N", &c__0, a, x, &c__0, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L120: infoc_1.infot = 1; cgerc_(&c_n1, &c__0, &alpha, x, &c__1, y, &c__1, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; cgerc_(&c__0, &c_n1, &alpha, x, &c__1, y, &c__1, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; cgerc_(&c__0, &c__0, &alpha, x, &c__0, y, &c__1, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; cgerc_(&c__0, &c__0, &alpha, x, &c__1, y, &c__0, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; cgerc_(&c__2, &c__0, &alpha, x, &c__1, y, &c__1, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L130: infoc_1.infot = 1; cgeru_(&c_n1, &c__0, &alpha, x, &c__1, y, &c__1, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; cgeru_(&c__0, &c_n1, &alpha, x, &c__1, y, &c__1, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; cgeru_(&c__0, &c__0, &alpha, x, &c__0, y, &c__1, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; cgeru_(&c__0, &c__0, &alpha, x, &c__1, y, &c__0, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; cgeru_(&c__2, &c__0, &alpha, x, &c__1, y, &c__1, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L140: infoc_1.infot = 1; cher_("/", &c__0, &ralpha, x, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; cher_("U", &c_n1, &ralpha, x, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; cher_("U", &c__0, &ralpha, x, &c__0, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; cher_("U", &c__2, &ralpha, x, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L150: infoc_1.infot = 1; chpr_("/", &c__0, &ralpha, x, &c__1, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; chpr_("U", &c_n1, &ralpha, x, &c__1, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; chpr_("U", &c__0, &ralpha, x, &c__0, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L160: infoc_1.infot = 1; cher2_("/", &c__0, &alpha, x, &c__1, y, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; cher2_("U", &c_n1, &alpha, x, &c__1, y, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; cher2_("U", &c__0, &alpha, x, &c__0, y, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; cher2_("U", &c__0, &alpha, x, &c__1, y, &c__0, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; cher2_("U", &c__2, &alpha, x, &c__1, y, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L170: infoc_1.infot = 1; chpr2_("/", &c__0, &alpha, x, &c__1, y, &c__1, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; chpr2_("U", &c_n1, &alpha, x, &c__1, y, &c__1, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; chpr2_("U", &c__0, &alpha, x, &c__0, y, &c__1, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; chpr2_("U", &c__0, &alpha, x, &c__1, y, &c__0, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); L180: if (infoc_1.ok) { io___399.ciunit = *nout; s_wsfe(&io___399); do_fio(&c__1, srnamt, (ftnlen)6); e_wsfe(); } else { io___400.ciunit = *nout; s_wsfe(&io___400); do_fio(&c__1, srnamt, (ftnlen)6); e_wsfe(); } return 0; /* End of CCHKE. */ } /* cchke_ */ /* Subroutine */ int cmake_(char *type__, char *uplo, char *diag, integer *m, integer *n, complex *a, integer *nmax, complex *aa, integer *lda, integer *kl, integer *ku, logical *reset, complex *transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4; real r__1; complex q__1, q__2; /* Builtin functions */ void r_cnjg(complex *, complex *); integer s_cmp(const char *, const char *, ftnlen, ftnlen); /* Local variables */ integer i__, j, i1, i2, i3, jj, kk; logical gen, tri, sym; extern /* Complex */ void cbeg_(complex *, logical *); integer ibeg, iend, ioff; logical unit, lower, upper; /* Generates values for an M by N matrix A within the bandwidth */ /* defined by KL and KU. */ /* Stores the values in the array AA in the data structure required */ /* by the routine, with unwanted elements set to rogue value. */ /* TYPE is 'GE', 'GB', 'HE', 'HB', 'HP', 'TR', 'TB' OR 'TP'. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. Intrinsic Functions .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --aa; /* Function Body */ gen = *(unsigned char *)type__ == 'G'; sym = *(unsigned char *)type__ == 'H'; tri = *(unsigned char *)type__ == 'T'; upper = (sym || tri) && *(unsigned char *)uplo == 'U'; lower = (sym || tri) && *(unsigned char *)uplo == 'L'; unit = tri && *(unsigned char *)diag == 'U'; /* Generate data in array A. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { if (gen || upper && i__ <= j || lower && i__ >= j) { if (i__ <= j && j - i__ <= *ku || i__ >= j && i__ - j <= *kl) { i__3 = i__ + j * a_dim1; cbeg_(&q__2, reset); q__1.r = q__2.r + transl->r, q__1.i = q__2.i + transl->i; a[i__3].r = q__1.r, a[i__3].i = q__1.i; } else { i__3 = i__ + j * a_dim1; a[i__3].r = 0.f, a[i__3].i = 0.f; } if (i__ != j) { if (sym) { i__3 = j + i__ * a_dim1; r_cnjg(&q__1, &a[i__ + j * a_dim1]); a[i__3].r = q__1.r, a[i__3].i = q__1.i; } else if (tri) { i__3 = j + i__ * a_dim1; a[i__3].r = 0.f, a[i__3].i = 0.f; } } } /* L10: */ } if (sym) { i__2 = j + j * a_dim1; i__3 = j + j * a_dim1; r__1 = a[i__3].r; q__1.r = r__1, q__1.i = 0.f; a[i__2].r = q__1.r, a[i__2].i = q__1.i; } if (tri) { i__2 = j + j * a_dim1; i__3 = j + j * a_dim1; q__1.r = a[i__3].r + 1.f, q__1.i = a[i__3].i + 0.f; a[i__2].r = q__1.r, a[i__2].i = q__1.i; } if (unit) { i__2 = j + j * a_dim1; a[i__2].r = 1.f, a[i__2].i = 0.f; } /* L20: */ } /* Store elements in array AS in data structure required by routine. */ if (s_cmp(type__, "GE", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__ + (j - 1) * *lda; i__4 = i__ + j * a_dim1; aa[i__3].r = a[i__4].r, aa[i__3].i = a[i__4].i; /* L30: */ } i__2 = *lda; for (i__ = *m + 1; i__ <= i__2; ++i__) { i__3 = i__ + (j - 1) * *lda; aa[i__3].r = -1e10f, aa[i__3].i = 1e10f; /* L40: */ } /* L50: */ } } else if (s_cmp(type__, "GB", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *ku + 1 - j; for (i1 = 1; i1 <= i__2; ++i1) { i__3 = i1 + (j - 1) * *lda; aa[i__3].r = -1e10f, aa[i__3].i = 1e10f; /* L60: */ } /* Computing MIN */ i__3 = *kl + *ku + 1, i__4 = *ku + 1 + *m - j; i__2 = min(i__3,i__4); for (i2 = i1; i2 <= i__2; ++i2) { i__3 = i2 + (j - 1) * *lda; i__4 = i2 + j - *ku - 1 + j * a_dim1; aa[i__3].r = a[i__4].r, aa[i__3].i = a[i__4].i; /* L70: */ } i__2 = *lda; for (i3 = i2; i3 <= i__2; ++i3) { i__3 = i3 + (j - 1) * *lda; aa[i__3].r = -1e10f, aa[i__3].i = 1e10f; /* L80: */ } /* L90: */ } } else if (s_cmp(type__, "HE", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(type__, "TR", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (upper) { ibeg = 1; if (unit) { iend = j - 1; } else { iend = j; } } else { if (unit) { ibeg = j + 1; } else { ibeg = j; } iend = *n; } i__2 = ibeg - 1; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__ + (j - 1) * *lda; aa[i__3].r = -1e10f, aa[i__3].i = 1e10f; /* L100: */ } i__2 = iend; for (i__ = ibeg; i__ <= i__2; ++i__) { i__3 = i__ + (j - 1) * *lda; i__4 = i__ + j * a_dim1; aa[i__3].r = a[i__4].r, aa[i__3].i = a[i__4].i; /* L110: */ } i__2 = *lda; for (i__ = iend + 1; i__ <= i__2; ++i__) { i__3 = i__ + (j - 1) * *lda; aa[i__3].r = -1e10f, aa[i__3].i = 1e10f; /* L120: */ } if (sym) { jj = j + (j - 1) * *lda; i__2 = jj; i__3 = jj; r__1 = aa[i__3].r; q__1.r = r__1, q__1.i = -1e10f; aa[i__2].r = q__1.r, aa[i__2].i = q__1.i; } /* L130: */ } } else if (s_cmp(type__, "HB", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(type__, "TB", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (upper) { kk = *kl + 1; /* Computing MAX */ i__2 = 1, i__3 = *kl + 2 - j; ibeg = max(i__2,i__3); if (unit) { iend = *kl; } else { iend = *kl + 1; } } else { kk = 1; if (unit) { ibeg = 2; } else { ibeg = 1; } /* Computing MIN */ i__2 = *kl + 1, i__3 = *m + 1 - j; iend = min(i__2,i__3); } i__2 = ibeg - 1; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__ + (j - 1) * *lda; aa[i__3].r = -1e10f, aa[i__3].i = 1e10f; /* L140: */ } i__2 = iend; for (i__ = ibeg; i__ <= i__2; ++i__) { i__3 = i__ + (j - 1) * *lda; i__4 = i__ + j - kk + j * a_dim1; aa[i__3].r = a[i__4].r, aa[i__3].i = a[i__4].i; /* L150: */ } i__2 = *lda; for (i__ = iend + 1; i__ <= i__2; ++i__) { i__3 = i__ + (j - 1) * *lda; aa[i__3].r = -1e10f, aa[i__3].i = 1e10f; /* L160: */ } if (sym) { jj = kk + (j - 1) * *lda; i__2 = jj; i__3 = jj; r__1 = aa[i__3].r; q__1.r = r__1, q__1.i = -1e10f; aa[i__2].r = q__1.r, aa[i__2].i = q__1.i; } /* L170: */ } } else if (s_cmp(type__, "HP", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(type__, "TP", (ftnlen)2, (ftnlen)2) == 0) { ioff = 0; i__1 = *n; for (j = 1; j <= i__1; ++j) { if (upper) { ibeg = 1; iend = j; } else { ibeg = j; iend = *n; } i__2 = iend; for (i__ = ibeg; i__ <= i__2; ++i__) { ++ioff; i__3 = ioff; i__4 = i__ + j * a_dim1; aa[i__3].r = a[i__4].r, aa[i__3].i = a[i__4].i; if (i__ == j) { if (unit) { i__3 = ioff; aa[i__3].r = -1e10f, aa[i__3].i = 1e10f; } if (sym) { i__3 = ioff; i__4 = ioff; r__1 = aa[i__4].r; q__1.r = r__1, q__1.i = -1e10f; aa[i__3].r = q__1.r, aa[i__3].i = q__1.i; } } /* L180: */ } /* L190: */ } } return 0; /* End of CMAKE. */ } /* cmake_ */ /* Subroutine */ int cmvch_(char *trans, integer *m, integer *n, complex * alpha, complex *a, integer *nmax, complex *x, integer *incx, complex * beta, complex *y, integer *incy, complex *yt, real *g, complex *yy, real *eps, real *err, logical *fatal, integer *nout, logical *mv, ftnlen trans_len) { /* Format strings */ static char fmt_9999[] = "(\002 ******* FATAL ERROR - COMPUTED RESULT IS" " LESS THAN HAL\002,\002F ACCURATE *******\002,/\002 " " EXPECTED RE\002,\002SULT COMPUTED R" "ESULT\002)"; static char fmt_9998[] = "(1x,i7,2(\002 (\002,g15.6,\002,\002,g15.6," "\002)\002))"; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; real r__1, r__2, r__3, r__4, r__5, r__6; complex q__1, q__2, q__3; /* Builtin functions */ double r_imag(complex *); void r_cnjg(complex *, complex *); double c_abs(const complex *), sqrt(doublereal); integer s_wsfe(cilist *), e_wsfe(void), do_fio(integer *, char *, ftnlen); /* Local variables */ integer i__, j, ml, nl, iy, jx, kx, ky; real erri; logical tran, ctran; integer incxl, incyl; /* Fortran I/O blocks */ static cilist io___430 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___431 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___432 = { 0, 0, 0, fmt_9998, 0 }; /* Checks the results of the computational tests. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Intrinsic Functions .. */ /* .. Statement Functions .. */ /* .. Statement Function definitions .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --x; --y; --yt; --g; --yy; /* Function Body */ tran = *(unsigned char *)trans == 'T'; ctran = *(unsigned char *)trans == 'C'; if (tran || ctran) { ml = *n; nl = *m; } else { ml = *m; nl = *n; } if (*incx < 0) { kx = nl; incxl = -1; } else { kx = 1; incxl = 1; } if (*incy < 0) { ky = ml; incyl = -1; } else { ky = 1; incyl = 1; } /* Compute expected result in YT using data in A, X and Y. */ /* Compute gauges in G. */ iy = ky; i__1 = ml; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = iy; yt[i__2].r = 0.f, yt[i__2].i = 0.f; g[iy] = 0.f; jx = kx; if (tran) { i__2 = nl; for (j = 1; j <= i__2; ++j) { i__3 = iy; i__4 = iy; i__5 = j + i__ * a_dim1; i__6 = jx; q__2.r = a[i__5].r * x[i__6].r - a[i__5].i * x[i__6].i, q__2.i = a[i__5].r * x[i__6].i + a[i__5].i * x[i__6] .r; q__1.r = yt[i__4].r + q__2.r, q__1.i = yt[i__4].i + q__2.i; yt[i__3].r = q__1.r, yt[i__3].i = q__1.i; i__3 = j + i__ * a_dim1; i__4 = jx; g[iy] += ((r__1 = a[i__3].r, abs(r__1)) + (r__2 = r_imag(&a[j + i__ * a_dim1]), abs(r__2))) * ((r__3 = x[i__4].r, abs(r__3)) + (r__4 = r_imag(&x[jx]), abs(r__4))); jx += incxl; /* L10: */ } } else if (ctran) { i__2 = nl; for (j = 1; j <= i__2; ++j) { i__3 = iy; i__4 = iy; r_cnjg(&q__3, &a[j + i__ * a_dim1]); i__5 = jx; q__2.r = q__3.r * x[i__5].r - q__3.i * x[i__5].i, q__2.i = q__3.r * x[i__5].i + q__3.i * x[i__5].r; q__1.r = yt[i__4].r + q__2.r, q__1.i = yt[i__4].i + q__2.i; yt[i__3].r = q__1.r, yt[i__3].i = q__1.i; i__3 = j + i__ * a_dim1; i__4 = jx; g[iy] += ((r__1 = a[i__3].r, abs(r__1)) + (r__2 = r_imag(&a[j + i__ * a_dim1]), abs(r__2))) * ((r__3 = x[i__4].r, abs(r__3)) + (r__4 = r_imag(&x[jx]), abs(r__4))); jx += incxl; /* L20: */ } } else { i__2 = nl; for (j = 1; j <= i__2; ++j) { i__3 = iy; i__4 = iy; i__5 = i__ + j * a_dim1; i__6 = jx; q__2.r = a[i__5].r * x[i__6].r - a[i__5].i * x[i__6].i, q__2.i = a[i__5].r * x[i__6].i + a[i__5].i * x[i__6] .r; q__1.r = yt[i__4].r + q__2.r, q__1.i = yt[i__4].i + q__2.i; yt[i__3].r = q__1.r, yt[i__3].i = q__1.i; i__3 = i__ + j * a_dim1; i__4 = jx; g[iy] += ((r__1 = a[i__3].r, abs(r__1)) + (r__2 = r_imag(&a[ i__ + j * a_dim1]), abs(r__2))) * ((r__3 = x[i__4].r, abs(r__3)) + (r__4 = r_imag(&x[jx]), abs(r__4))); jx += incxl; /* L30: */ } } i__2 = iy; i__3 = iy; q__2.r = alpha->r * yt[i__3].r - alpha->i * yt[i__3].i, q__2.i = alpha->r * yt[i__3].i + alpha->i * yt[i__3].r; i__4 = iy; q__3.r = beta->r * y[i__4].r - beta->i * y[i__4].i, q__3.i = beta->r * y[i__4].i + beta->i * y[i__4].r; q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; yt[i__2].r = q__1.r, yt[i__2].i = q__1.i; i__2 = iy; g[iy] = ((r__1 = alpha->r, abs(r__1)) + (r__2 = r_imag(alpha), abs( r__2))) * g[iy] + ((r__3 = beta->r, abs(r__3)) + (r__4 = r_imag(beta), abs(r__4))) * ((r__5 = y[i__2].r, abs(r__5)) + ( r__6 = r_imag(&y[iy]), abs(r__6))); iy += incyl; /* L40: */ } /* Compute the error ratio for this result. */ *err = 0.f; i__1 = ml; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__; i__3 = (i__ - 1) * abs(*incy) + 1; q__1.r = yt[i__2].r - yy[i__3].r, q__1.i = yt[i__2].i - yy[i__3].i; erri = c_abs(&q__1) / *eps; if (g[i__] != 0.f) { erri /= g[i__]; } *err = max(*err,erri); if (*err * sqrt(*eps) >= 1.f) { goto L60; } /* L50: */ } /* If the loop completes, all results are at least half accurate. */ goto L80; /* Report fatal error. */ L60: *fatal = TRUE_; io___430.ciunit = *nout; s_wsfe(&io___430); e_wsfe(); i__1 = ml; for (i__ = 1; i__ <= i__1; ++i__) { if (*mv) { io___431.ciunit = *nout; s_wsfe(&io___431); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&yt[i__], (ftnlen)sizeof(real)); do_fio(&c__2, (char *)&yy[(i__ - 1) * abs(*incy) + 1], (ftnlen) sizeof(real)); e_wsfe(); } else { io___432.ciunit = *nout; s_wsfe(&io___432); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&yy[(i__ - 1) * abs(*incy) + 1], (ftnlen) sizeof(real)); do_fio(&c__2, (char *)&yt[i__], (ftnlen)sizeof(real)); e_wsfe(); } /* L70: */ } L80: return 0; /* End of CMVCH. */ } /* cmvch_ */ logical lce_(complex *ri, complex *rj, integer *lr) { /* System generated locals */ integer i__1, i__2, i__3; logical ret_val; /* Local variables */ integer i__; /* Tests if two arrays are identical. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --rj; --ri; /* Function Body */ i__1 = *lr; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__; i__3 = i__; if (ri[i__2].r != rj[i__3].r || ri[i__2].i != rj[i__3].i) { goto L20; } /* L10: */ } ret_val = TRUE_; goto L30; L20: ret_val = FALSE_; L30: return ret_val; /* End of LCE. */ } /* lce_ */ logical lceres_(char *type__, char *uplo, integer *m, integer *n, complex *aa, complex *as, integer *lda, ftnlen type_len, ftnlen uplo_len) { /* System generated locals */ integer aa_dim1, aa_offset, as_dim1, as_offset, i__1, i__2, i__3, i__4; logical ret_val; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen); /* Local variables */ integer i__, j, ibeg, iend; logical upper; /* Tests if selected elements in two arrays are equal. */ /* TYPE is 'GE', 'HE' or 'HP'. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ as_dim1 = *lda; as_offset = 1 + as_dim1; as -= as_offset; aa_dim1 = *lda; aa_offset = 1 + aa_dim1; aa -= aa_offset; /* Function Body */ upper = *(unsigned char *)uplo == 'U'; if (s_cmp(type__, "GE", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *lda; for (i__ = *m + 1; i__ <= i__2; ++i__) { i__3 = i__ + j * aa_dim1; i__4 = i__ + j * as_dim1; if (aa[i__3].r != as[i__4].r || aa[i__3].i != as[i__4].i) { goto L70; } /* L10: */ } /* L20: */ } } else if (s_cmp(type__, "HE", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (upper) { ibeg = 1; iend = j; } else { ibeg = j; iend = *n; } i__2 = ibeg - 1; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__ + j * aa_dim1; i__4 = i__ + j * as_dim1; if (aa[i__3].r != as[i__4].r || aa[i__3].i != as[i__4].i) { goto L70; } /* L30: */ } i__2 = *lda; for (i__ = iend + 1; i__ <= i__2; ++i__) { i__3 = i__ + j * aa_dim1; i__4 = i__ + j * as_dim1; if (aa[i__3].r != as[i__4].r || aa[i__3].i != as[i__4].i) { goto L70; } /* L40: */ } /* L50: */ } } ret_val = TRUE_; goto L80; L70: ret_val = FALSE_; L80: return ret_val; /* End of LCERES. */ } /* lceres_ */ /* Complex */ void cbeg_(complex * ret_val, logical *reset) { /* System generated locals */ real r__1, r__2; complex q__1; /* Local variables */ static integer i__, j, ic, mi, mj; /* Generates complex numbers as pairs of random numbers uniformly */ /* distributed between -0.5 and 0.5. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Scalar Arguments .. */ /* .. Local Scalars .. */ /* .. Save statement .. */ /* .. Intrinsic Functions .. */ /* .. Executable Statements .. */ if (*reset) { /* Initialize local variables. */ mi = 891; mj = 457; i__ = 7; j = 7; ic = 0; *reset = FALSE_; } /* The sequence of values of I or J is bounded between 1 and 999. */ /* If initial I or J = 1,2,3,6,7 or 9, the period will be 50. */ /* If initial I or J = 4 or 8, the period will be 25. */ /* If initial I or J = 5, the period will be 10. */ /* IC is used to break up the period by skipping 1 value of I or J */ /* in 6. */ ++ic; L10: i__ *= mi; j *= mj; i__ -= i__ / 1000 * 1000; j -= j / 1000 * 1000; if (ic >= 5) { ic = 0; goto L10; } r__1 = (i__ - 500) / 1001.f; r__2 = (j - 500) / 1001.f; q__1.r = r__1, q__1.i = r__2; ret_val->r = q__1.r, ret_val->i = q__1.i; return ; /* End of CBEG. */ } /* cbeg_ */ real sdiff_(real *x, real *y) { /* System generated locals */ real ret_val; /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* .. Scalar Arguments .. */ /* .. Executable Statements .. */ ret_val = *x - *y; return ret_val; /* End of SDIFF. */ } /* sdiff_ */ /* Subroutine */ int chkxer_(char *srnamt, integer *infot, integer *nout, logical *lerr, logical *ok, ftnlen srnamt_len) { /* Format strings */ static char fmt_9999[] = "(\002 ***** ILLEGAL VALUE OF PARAMETER NUMBER" " \002,i2,\002 NOT D\002,\002ETECTED BY \002,a6,\002 *****\002)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Fortran I/O blocks */ static cilist io___444 = { 0, 0, 0, fmt_9999, 0 }; /* Tests whether XERBLA has detected an error when it should. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Scalar Arguments .. */ /* .. Executable Statements .. */ if (! (*lerr)) { io___444.ciunit = *nout; s_wsfe(&io___444); do_fio(&c__1, (char *)&(*infot), (ftnlen)sizeof(integer)); do_fio(&c__1, srnamt, (ftnlen)6); e_wsfe(); *ok = FALSE_; } *lerr = FALSE_; return 0; /* End of CHKXER. */ } /* chkxer_ */ /* Subroutine */ int xerbla_(char *srname, integer *info, ftnlen srname_len) { /* Format strings */ static char fmt_9999[] = "(\002 ******* XERBLA WAS CALLED WITH INFO =" " \002,i6,\002 INSTEAD\002,\002 OF \002,i2,\002 *******\002)"; static char fmt_9997[] = "(\002 ******* XERBLA WAS CALLED WITH INFO =" " \002,i6,\002 *******\002)"; static char fmt_9998[] = "(\002 ******* XERBLA WAS CALLED WITH SRNAME =" " \002,a6,\002 INSTE\002,\002AD OF \002,a6,\002 *******\002)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), s_cmp(const char *, const char *, ftnlen, ftnlen); /* Fortran I/O blocks */ static cilist io___445 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___446 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___447 = { 0, 0, 0, fmt_9998, 0 }; /* This is a special version of XERBLA to be used only as part of */ /* the test program for testing error exits from the Level 2 BLAS */ /* routines. */ /* XERBLA is an error handler for the Level 2 BLAS routines. */ /* It is called by the Level 2 BLAS routines if an input parameter is */ /* invalid. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ infoc_2.lerr = TRUE_; if (*info != infoc_2.infot) { if (infoc_2.infot != 0) { io___445.ciunit = infoc_2.nout; s_wsfe(&io___445); do_fio(&c__1, (char *)&(*info), (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&infoc_2.infot, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___446.ciunit = infoc_2.nout; s_wsfe(&io___446); do_fio(&c__1, (char *)&(*info), (ftnlen)sizeof(integer)); e_wsfe(); } infoc_2.ok = FALSE_; } if (s_cmp(srname, srnamc_1.srnamt, (ftnlen)6, (ftnlen)6) != 0) { io___447.ciunit = infoc_2.nout; s_wsfe(&io___447); do_fio(&c__1, srname, (ftnlen)6); do_fio(&c__1, srnamc_1.srnamt, (ftnlen)6); e_wsfe(); infoc_2.ok = FALSE_; } return 0; /* End of XERBLA */ } /* xerbla_ */ /* Main program alias */ int cblat2_ () { main (); return 0; } blis-0.6.1/blastest/src/cblat3.c000066400000000000000000005737351360743507500164310ustar00rootroot00000000000000/* cblat3.f -- translated by f2c (version 20100827). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" /* Common Block Declarations */ union { struct { integer infot, noutc; logical ok, lerr; } _1; struct { integer infot, nout; logical ok, lerr; } _2; } infoc_; #define infoc_1 (infoc_._1) #define infoc_2 (infoc_._2) struct { char srnamt[6]; } srnamc_; #define srnamc_1 srnamc_ /* Table of constant values */ static complex c_b1 = {0.f,0.f}; static complex c_b2 = {1.f,0.f}; static integer c__9 = 9; static integer c__1 = 1; static integer c__3 = 3; static integer c__8 = 8; static integer c__4 = 4; static integer c__65 = 65; static integer c__7 = 7; static integer c__6 = 6; static integer c__2 = 2; static real c_b86 = 0.f; static logical c_true = TRUE_; static logical c_false = FALSE_; static integer c__0 = 0; static integer c_n1 = -1; /* > \brief \b CBLAT3 */ /* =========== DOCUMENTATION =========== */ /* Online html documentation available at */ /* http://www.netlib.org/lapack/explore-html/ */ /* Definition: */ /* =========== */ /* PROGRAM CBLAT3 */ /* > \par Purpose: */ /* ============= */ /* > */ /* > \verbatim */ /* > */ /* > Test program for the COMPLEX Level 3 Blas. */ /* > */ /* > The program must be driven by a short data file. The first 14 records */ /* > of the file are read using list-directed input, the last 9 records */ /* > are read using the format ( A6, L2 ). An annotated example of a data */ /* > file can be obtained by deleting the first 3 characters from the */ /* > following 23 lines: */ /* > 'cblat3.out' NAME OF SUMMARY OUTPUT FILE */ /* > 6 UNIT NUMBER OF SUMMARY FILE */ /* > 'CBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE */ /* > -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) */ /* > F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. */ /* > F LOGICAL FLAG, T TO STOP ON FAILURES. */ /* > T LOGICAL FLAG, T TO TEST ERROR EXITS. */ /* > 16.0 THRESHOLD VALUE OF TEST RATIO */ /* > 6 NUMBER OF VALUES OF N */ /* > 0 1 2 3 5 9 VALUES OF N */ /* > 3 NUMBER OF VALUES OF ALPHA */ /* > (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA */ /* > 3 NUMBER OF VALUES OF BETA */ /* > (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA */ /* > CGEMM T PUT F FOR NO TEST. SAME COLUMNS. */ /* > CHEMM T PUT F FOR NO TEST. SAME COLUMNS. */ /* > CSYMM T PUT F FOR NO TEST. SAME COLUMNS. */ /* > CTRMM T PUT F FOR NO TEST. SAME COLUMNS. */ /* > CTRSM T PUT F FOR NO TEST. SAME COLUMNS. */ /* > CHERK T PUT F FOR NO TEST. SAME COLUMNS. */ /* > CSYRK T PUT F FOR NO TEST. SAME COLUMNS. */ /* > CHER2K T PUT F FOR NO TEST. SAME COLUMNS. */ /* > CSYR2K T PUT F FOR NO TEST. SAME COLUMNS. */ /* > */ /* > Further Details */ /* > =============== */ /* > */ /* > See: */ /* > */ /* > Dongarra J. J., Du Croz J. J., Duff I. S. and Hammarling S. */ /* > A Set of Level 3 Basic Linear Algebra Subprograms. */ /* > */ /* > Technical Memorandum No.88 (Revision 1), Mathematics and */ /* > Computer Science Division, Argonne National Laboratory, 9700 */ /* > South Cass Avenue, Argonne, Illinois 60439, US. */ /* > */ /* > -- Written on 8-February-1989. */ /* > Jack Dongarra, Argonne National Laboratory. */ /* > Iain Duff, AERE Harwell. */ /* > Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* > Sven Hammarling, Numerical Algorithms Group Ltd. */ /* > */ /* > 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers */ /* > can be run multiple times without deleting generated */ /* > output files (susan) */ /* > \endverbatim */ /* Authors: */ /* ======== */ /* > \author Univ. of Tennessee */ /* > \author Univ. of California Berkeley */ /* > \author Univ. of Colorado Denver */ /* > \author NAG Ltd. */ /* > \date April 2012 */ /* > \ingroup complex_blas_testing */ /* ===================================================================== */ /* Main program */ int main(void) { /* Initialized data */ static char snames[6*9] = "CGEMM " "CHEMM " "CSYMM " "CTRMM " "CTRSM " "CHERK " "CSYRK " "CHER2K" "CSYR2K"; /* Format strings */ static char fmt_9997[] = "(\002 NUMBER OF VALUES OF \002,a,\002 IS LESS " "THAN 1 OR GREATER \002,\002THAN \002,i2)"; static char fmt_9996[] = "(\002 VALUE OF N IS LESS THAN 0 OR GREATER THA" "N \002,i2)"; static char fmt_9995[] = "(\002 TESTS OF THE COMPLEX LEVEL 3 BL" "AS\002,//\002 THE F\002,\002OLLOWING PARAMETER VALUES WILL BE US" "ED:\002)"; static char fmt_9994[] = "(\002 FOR N \002,9i6)"; static char fmt_9993[] = "(\002 FOR ALPHA \002,7(\002(\002,f4" ".1,\002,\002,f4.1,\002) \002,:))"; static char fmt_9992[] = "(\002 FOR BETA \002,7(\002(\002,f4" ".1,\002,\002,f4.1,\002) \002,:))"; static char fmt_9984[] = "(\002 ERROR-EXITS WILL NOT BE TESTED\002)"; static char fmt_9999[] = "(\002 ROUTINES PASS COMPUTATIONAL TESTS IF TES" "T RATIO IS LES\002,\002S THAN\002,f8.2)"; static char fmt_9988[] = "(a6,l2)"; static char fmt_9990[] = "(\002 SUBPROGRAM NAME \002,a6,\002 NOT RECOGNI" "ZED\002,/\002 ******* T\002,\002ESTS ABANDONED *******\002)"; static char fmt_9998[] = "(\002 RELATIVE MACHINE PRECISION IS TAKEN TO" " BE\002,1p,e9.1)"; static char fmt_9989[] = "(\002 ERROR IN CMMCH - IN-LINE DOT PRODUCTS A" "RE BEING EVALU\002,\002ATED WRONGLY.\002,/\002 CMMCH WAS CALLED " "WITH TRANSA = \002,a1,\002 AND TRANSB = \002,a1,/\002 AND RETURN" "ED SAME = \002,l1,\002 AND \002,\002ERR = \002,f12.3,\002.\002," "/\002 THIS MAY BE DUE TO FAULTS IN THE \002,\002ARITHMETIC OR TH" "E COMPILER.\002,/\002 ******* TESTS ABANDONED \002,\002******" "*\002)"; static char fmt_9987[] = "(1x,a6,\002 WAS NOT TESTED\002)"; static char fmt_9986[] = "(/\002 END OF TESTS\002)"; static char fmt_9985[] = "(/\002 ******* FATAL ERROR - TESTS ABANDONED *" "******\002)"; static char fmt_9991[] = "(\002 AMEND DATA FILE OR INCREASE ARRAY SIZES " "IN PROGRAM\002,/\002 ******* TESTS ABANDONED *******\002)"; /* System generated locals */ integer i__1, i__2, i__3, i__4, i__5; olist o__1; cllist cl__1; /* Builtin functions */ integer s_rsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_rsle(void), f_open(olist *), s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), s_wsle(cilist *), e_wsle(void), s_rsfe(cilist *), e_rsfe(void), s_cmp(const char *, const char *, ftnlen, ftnlen); /* Subroutine */ int s_stop(char *, ftnlen); integer f_clos(cllist *); /* Subroutine */ int s_copy(char *, const char *, ftnlen, ftnlen); /* Local variables */ complex c__[4225] /* was [65][65] */; real g[65]; integer i__, j, n; complex w[130], aa[4225], ab[8450] /* was [65][130] */, bb[4225], cc[ 4225], as[4225], bs[4225], cs[4225], ct[65], alf[7]; extern logical lce_(complex *, complex *, integer *); complex bet[7]; real eps, err; integer nalf, idim[9]; logical same; integer nbet, ntra; logical rewi; integer nout; extern /* Subroutine */ int cchk1_(char *, real *, real *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, complex *, integer *, complex *, integer *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, real *, ftnlen), cchk2_(char *, real *, real *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, complex *, integer *, complex *, integer *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, real *, ftnlen), cchk3_(char *, real *, real *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, complex *, integer *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, real *, complex *, ftnlen), cchk4_(char *, real *, real *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, complex *, integer *, complex *, integer *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, real *, ftnlen), cchk5_(char *, real *, real *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, complex *, integer *, complex *, integer *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, complex *, real *, complex *, ftnlen), cchke_(integer *, char *, integer *, ftnlen); logical fatal; extern /* Subroutine */ int cmmch_(char *, char *, integer *, integer *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, complex *, integer *, complex *, real *, complex *, integer *, real *, real *, logical *, integer *, logical *, ftnlen, ftnlen); logical trace; integer nidim; char snaps[32]; integer isnum; logical ltest[9], sfatal; char snamet[6], transa[1], transb[1]; real thresh; logical ltestt, tsterr; char summry[32]; extern real s_epsilon_(real *); /* Fortran I/O blocks */ static cilist io___2 = { 0, 5, 0, 0, 0 }; static cilist io___4 = { 0, 5, 0, 0, 0 }; static cilist io___6 = { 0, 5, 0, 0, 0 }; static cilist io___8 = { 0, 5, 0, 0, 0 }; static cilist io___11 = { 0, 5, 0, 0, 0 }; static cilist io___13 = { 0, 5, 0, 0, 0 }; static cilist io___15 = { 0, 5, 0, 0, 0 }; static cilist io___17 = { 0, 5, 0, 0, 0 }; static cilist io___19 = { 0, 5, 0, 0, 0 }; static cilist io___21 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___22 = { 0, 5, 0, 0, 0 }; static cilist io___25 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___26 = { 0, 5, 0, 0, 0 }; static cilist io___28 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___29 = { 0, 5, 0, 0, 0 }; static cilist io___31 = { 0, 5, 0, 0, 0 }; static cilist io___33 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___34 = { 0, 5, 0, 0, 0 }; static cilist io___36 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___37 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___38 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___39 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___40 = { 0, 0, 0, 0, 0 }; static cilist io___41 = { 0, 0, 0, fmt_9984, 0 }; static cilist io___42 = { 0, 0, 0, 0, 0 }; static cilist io___43 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___44 = { 0, 0, 0, 0, 0 }; static cilist io___46 = { 0, 5, 1, fmt_9988, 0 }; static cilist io___49 = { 0, 0, 0, fmt_9990, 0 }; static cilist io___51 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___64 = { 0, 0, 0, fmt_9989, 0 }; static cilist io___65 = { 0, 0, 0, fmt_9989, 0 }; static cilist io___66 = { 0, 0, 0, fmt_9989, 0 }; static cilist io___67 = { 0, 0, 0, fmt_9989, 0 }; static cilist io___69 = { 0, 0, 0, 0, 0 }; static cilist io___70 = { 0, 0, 0, fmt_9987, 0 }; static cilist io___71 = { 0, 0, 0, 0, 0 }; static cilist io___78 = { 0, 0, 0, fmt_9986, 0 }; static cilist io___79 = { 0, 0, 0, fmt_9985, 0 }; static cilist io___80 = { 0, 0, 0, fmt_9991, 0 }; /* -- Reference BLAS test routine (version 3.4.1) -- */ /* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* April 2012 */ /* ===================================================================== */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* .. Executable Statements .. */ /* Read name and unit number for summary output file and open file. */ s_rsle(&io___2); do_lio(&c__9, &c__1, summry, (ftnlen)32); e_rsle(); s_rsle(&io___4); do_lio(&c__3, &c__1, (char *)&nout, (ftnlen)sizeof(integer)); e_rsle(); o__1.oerr = 0; o__1.ounit = nout; o__1.ofnmlen = 32; o__1.ofnm = summry; o__1.orl = 0; o__1.osta = 0; o__1.oacc = 0; o__1.ofm = 0; o__1.oblnk = 0; f_open(&o__1); infoc_1.noutc = nout; /* Read name and unit number for snapshot output file and open file. */ s_rsle(&io___6); do_lio(&c__9, &c__1, snaps, (ftnlen)32); e_rsle(); s_rsle(&io___8); do_lio(&c__3, &c__1, (char *)&ntra, (ftnlen)sizeof(integer)); e_rsle(); trace = ntra >= 0; if (trace) { o__1.oerr = 0; o__1.ounit = ntra; o__1.ofnmlen = 32; o__1.ofnm = snaps; o__1.orl = 0; o__1.osta = 0; o__1.oacc = 0; o__1.ofm = 0; o__1.oblnk = 0; f_open(&o__1); } /* Read the flag that directs rewinding of the snapshot file. */ s_rsle(&io___11); do_lio(&c__8, &c__1, (char *)&rewi, (ftnlen)sizeof(logical)); e_rsle(); rewi = rewi && trace; /* Read the flag that directs stopping on any failure. */ s_rsle(&io___13); do_lio(&c__8, &c__1, (char *)&sfatal, (ftnlen)sizeof(logical)); e_rsle(); /* Read the flag that indicates whether error exits are to be tested. */ s_rsle(&io___15); do_lio(&c__8, &c__1, (char *)&tsterr, (ftnlen)sizeof(logical)); e_rsle(); /* Read the threshold value of the test ratio */ s_rsle(&io___17); do_lio(&c__4, &c__1, (char *)&thresh, (ftnlen)sizeof(real)); e_rsle(); /* Read and check the parameter values for the tests. */ /* Values of N */ s_rsle(&io___19); do_lio(&c__3, &c__1, (char *)&nidim, (ftnlen)sizeof(integer)); e_rsle(); if (nidim < 1 || nidim > 9) { io___21.ciunit = nout; s_wsfe(&io___21); do_fio(&c__1, "N", (ftnlen)1); do_fio(&c__1, (char *)&c__9, (ftnlen)sizeof(integer)); e_wsfe(); goto L220; } s_rsle(&io___22); i__1 = nidim; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&idim[i__ - 1], (ftnlen)sizeof(integer)); } e_rsle(); i__1 = nidim; for (i__ = 1; i__ <= i__1; ++i__) { if (idim[i__ - 1] < 0 || idim[i__ - 1] > 65) { io___25.ciunit = nout; s_wsfe(&io___25); do_fio(&c__1, (char *)&c__65, (ftnlen)sizeof(integer)); e_wsfe(); goto L220; } /* L10: */ } /* Values of ALPHA */ s_rsle(&io___26); do_lio(&c__3, &c__1, (char *)&nalf, (ftnlen)sizeof(integer)); e_rsle(); if (nalf < 1 || nalf > 7) { io___28.ciunit = nout; s_wsfe(&io___28); do_fio(&c__1, "ALPHA", (ftnlen)5); do_fio(&c__1, (char *)&c__7, (ftnlen)sizeof(integer)); e_wsfe(); goto L220; } s_rsle(&io___29); i__1 = nalf; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__6, &c__1, (char *)&alf[i__ - 1], (ftnlen)sizeof(complex)); } e_rsle(); /* Values of BETA */ s_rsle(&io___31); do_lio(&c__3, &c__1, (char *)&nbet, (ftnlen)sizeof(integer)); e_rsle(); if (nbet < 1 || nbet > 7) { io___33.ciunit = nout; s_wsfe(&io___33); do_fio(&c__1, "BETA", (ftnlen)4); do_fio(&c__1, (char *)&c__7, (ftnlen)sizeof(integer)); e_wsfe(); goto L220; } s_rsle(&io___34); i__1 = nbet; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__6, &c__1, (char *)&bet[i__ - 1], (ftnlen)sizeof(complex)); } e_rsle(); /* Report values of parameters. */ io___36.ciunit = nout; s_wsfe(&io___36); e_wsfe(); io___37.ciunit = nout; s_wsfe(&io___37); i__1 = nidim; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&idim[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); io___38.ciunit = nout; s_wsfe(&io___38); i__1 = nalf; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__2, (char *)&alf[i__ - 1], (ftnlen)sizeof(real)); } e_wsfe(); io___39.ciunit = nout; s_wsfe(&io___39); i__1 = nbet; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__2, (char *)&bet[i__ - 1], (ftnlen)sizeof(real)); } e_wsfe(); if (! tsterr) { io___40.ciunit = nout; s_wsle(&io___40); e_wsle(); io___41.ciunit = nout; s_wsfe(&io___41); e_wsfe(); } io___42.ciunit = nout; s_wsle(&io___42); e_wsle(); io___43.ciunit = nout; s_wsfe(&io___43); do_fio(&c__1, (char *)&thresh, (ftnlen)sizeof(real)); e_wsfe(); io___44.ciunit = nout; s_wsle(&io___44); e_wsle(); /* Read names of subroutines and flags which indicate */ /* whether they are to be tested. */ for (i__ = 1; i__ <= 9; ++i__) { ltest[i__ - 1] = FALSE_; /* L20: */ } L30: i__1 = s_rsfe(&io___46); if (i__1 != 0) { goto L60; } i__1 = do_fio(&c__1, snamet, (ftnlen)6); if (i__1 != 0) { goto L60; } i__1 = do_fio(&c__1, (char *)<estt, (ftnlen)sizeof(logical)); if (i__1 != 0) { goto L60; } i__1 = e_rsfe(); if (i__1 != 0) { goto L60; } for (i__ = 1; i__ <= 9; ++i__) { if (s_cmp(snamet, snames + (i__ - 1) * 6, (ftnlen)6, (ftnlen)6) == 0) { goto L50; } /* L40: */ } io___49.ciunit = nout; s_wsfe(&io___49); do_fio(&c__1, snamet, (ftnlen)6); e_wsfe(); s_stop("", (ftnlen)0); L50: ltest[i__ - 1] = ltestt; goto L30; L60: cl__1.cerr = 0; cl__1.cunit = 5; cl__1.csta = 0; f_clos(&cl__1); /* Compute EPS (the machine precision). */ eps = s_epsilon_(&c_b86); io___51.ciunit = nout; s_wsfe(&io___51); do_fio(&c__1, (char *)&eps, (ftnlen)sizeof(real)); e_wsfe(); /* Check the reliability of CMMCH using exact data. */ n = 32; i__1 = n; for (j = 1; j <= i__1; ++j) { i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__ + j * 65 - 66; /* Computing MAX */ i__5 = i__ - j + 1; i__4 = max(i__5,0); ab[i__3].r = (real) i__4, ab[i__3].i = 0.f; /* L90: */ } i__2 = j + 4224; ab[i__2].r = (real) j, ab[i__2].i = 0.f; i__2 = (j + 65) * 65 - 65; ab[i__2].r = (real) j, ab[i__2].i = 0.f; i__2 = j - 1; c__[i__2].r = 0.f, c__[i__2].i = 0.f; /* L100: */ } i__1 = n; for (j = 1; j <= i__1; ++j) { i__2 = j - 1; i__3 = j * ((j + 1) * j) / 2 - (j + 1) * j * (j - 1) / 3; cc[i__2].r = (real) i__3, cc[i__2].i = 0.f; /* L110: */ } /* CC holds the exact result. On exit from CMMCH CT holds */ /* the result computed by CMMCH. */ *(unsigned char *)transa = 'N'; *(unsigned char *)transb = 'N'; cmmch_(transa, transb, &n, &c__1, &n, &c_b2, ab, &c__65, &ab[4225], & c__65, &c_b1, c__, &c__65, ct, g, cc, &c__65, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1, (ftnlen)1); same = lce_(cc, ct, &n); if (! same || err != 0.f) { io___64.ciunit = nout; s_wsfe(&io___64); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, transb, (ftnlen)1); do_fio(&c__1, (char *)&same, (ftnlen)sizeof(logical)); do_fio(&c__1, (char *)&err, (ftnlen)sizeof(real)); e_wsfe(); s_stop("", (ftnlen)0); } *(unsigned char *)transb = 'C'; cmmch_(transa, transb, &n, &c__1, &n, &c_b2, ab, &c__65, &ab[4225], & c__65, &c_b1, c__, &c__65, ct, g, cc, &c__65, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1, (ftnlen)1); same = lce_(cc, ct, &n); if (! same || err != 0.f) { io___65.ciunit = nout; s_wsfe(&io___65); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, transb, (ftnlen)1); do_fio(&c__1, (char *)&same, (ftnlen)sizeof(logical)); do_fio(&c__1, (char *)&err, (ftnlen)sizeof(real)); e_wsfe(); s_stop("", (ftnlen)0); } i__1 = n; for (j = 1; j <= i__1; ++j) { i__2 = j + 4224; i__3 = n - j + 1; ab[i__2].r = (real) i__3, ab[i__2].i = 0.f; i__2 = (j + 65) * 65 - 65; i__3 = n - j + 1; ab[i__2].r = (real) i__3, ab[i__2].i = 0.f; /* L120: */ } i__1 = n; for (j = 1; j <= i__1; ++j) { i__2 = n - j; i__3 = j * ((j + 1) * j) / 2 - (j + 1) * j * (j - 1) / 3; cc[i__2].r = (real) i__3, cc[i__2].i = 0.f; /* L130: */ } *(unsigned char *)transa = 'C'; *(unsigned char *)transb = 'N'; cmmch_(transa, transb, &n, &c__1, &n, &c_b2, ab, &c__65, &ab[4225], & c__65, &c_b1, c__, &c__65, ct, g, cc, &c__65, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1, (ftnlen)1); same = lce_(cc, ct, &n); if (! same || err != 0.f) { io___66.ciunit = nout; s_wsfe(&io___66); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, transb, (ftnlen)1); do_fio(&c__1, (char *)&same, (ftnlen)sizeof(logical)); do_fio(&c__1, (char *)&err, (ftnlen)sizeof(real)); e_wsfe(); s_stop("", (ftnlen)0); } *(unsigned char *)transb = 'C'; cmmch_(transa, transb, &n, &c__1, &n, &c_b2, ab, &c__65, &ab[4225], & c__65, &c_b1, c__, &c__65, ct, g, cc, &c__65, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1, (ftnlen)1); same = lce_(cc, ct, &n); if (! same || err != 0.f) { io___67.ciunit = nout; s_wsfe(&io___67); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, transb, (ftnlen)1); do_fio(&c__1, (char *)&same, (ftnlen)sizeof(logical)); do_fio(&c__1, (char *)&err, (ftnlen)sizeof(real)); e_wsfe(); s_stop("", (ftnlen)0); } /* Test each subroutine in turn. */ for (isnum = 1; isnum <= 9; ++isnum) { io___69.ciunit = nout; s_wsle(&io___69); e_wsle(); if (! ltest[isnum - 1]) { /* Subprogram is not to be tested. */ io___70.ciunit = nout; s_wsfe(&io___70); do_fio(&c__1, snames + (isnum - 1) * 6, (ftnlen)6); e_wsfe(); } else { s_copy(srnamc_1.srnamt, snames + (isnum - 1) * 6, (ftnlen)6, ( ftnlen)6); /* Test error exits. */ if (tsterr) { cchke_(&isnum, snames + (isnum - 1) * 6, &nout, (ftnlen)6); io___71.ciunit = nout; s_wsle(&io___71); e_wsle(); } /* Test computations. */ infoc_1.infot = 0; infoc_1.ok = TRUE_; fatal = FALSE_; switch (isnum) { case 1: goto L140; case 2: goto L150; case 3: goto L150; case 4: goto L160; case 5: goto L160; case 6: goto L170; case 7: goto L170; case 8: goto L180; case 9: goto L180; } /* Test CGEMM, 01. */ L140: cchk1_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, bet, &c__65, ab, aa, as, &ab[4225], bb, bs, c__, cc, cs, ct, g, (ftnlen)6); goto L190; /* Test CHEMM, 02, CSYMM, 03. */ L150: cchk2_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, bet, &c__65, ab, aa, as, &ab[4225], bb, bs, c__, cc, cs, ct, g, (ftnlen)6); goto L190; /* Test CTRMM, 04, CTRSM, 05. */ L160: cchk3_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &c__65, ab, aa, as, &ab[4225], bb, bs, ct, g, c__, (ftnlen)6); goto L190; /* Test CHERK, 06, CSYRK, 07. */ L170: cchk4_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, bet, &c__65, ab, aa, as, &ab[4225], bb, bs, c__, cc, cs, ct, g, (ftnlen)6); goto L190; /* Test CHER2K, 08, CSYR2K, 09. */ L180: cchk5_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, bet, &c__65, ab, aa, as, bb, bs, c__, cc, cs, ct, g, w, ( ftnlen)6); goto L190; L190: if (fatal && sfatal) { goto L210; } } /* L200: */ } io___78.ciunit = nout; s_wsfe(&io___78); e_wsfe(); goto L230; L210: io___79.ciunit = nout; s_wsfe(&io___79); e_wsfe(); goto L230; L220: io___80.ciunit = nout; s_wsfe(&io___80); e_wsfe(); L230: if (trace) { cl__1.cerr = 0; cl__1.cunit = ntra; cl__1.csta = 0; f_clos(&cl__1); } cl__1.cerr = 0; cl__1.cunit = nout; cl__1.csta = 0; f_clos(&cl__1); s_stop("", (ftnlen)0); /* End of CBLAT3. */ return 0; } /* main */ /* Subroutine */ int cchk1_(char *sname, real *eps, real *thresh, integer * nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nalf, complex *alf, integer * nbet, complex *bet, integer *nmax, complex *a, complex *aa, complex * as, complex *b, complex *bb, complex *bs, complex *c__, complex *cc, complex *cs, complex *ct, real *g, ftnlen sname_len) { /* Initialized data */ static char ich[3] = "NTC"; /* Format strings */ static char fmt_9995[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002','\002" ",a1,\002',\002,3(i3,\002,\002),\002(\002,f4.1,\002,\002,f4.1," "\002), A,\002,i3,\002, B,\002,i3,\002,(\002,f4.1,\002,\002,f4.1" ",\002), C,\002,i3,\002).\002)"; static char fmt_9994[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, k, m, n, ia, ib, ma, mb, na, nb, nc, ik, im, in, ks, ms, ns, ica, icb, laa, lbb, lda, lcc, ldb, ldc; extern logical lce_(complex *, complex *, integer *); complex als, bls; real err; complex beta; integer ldas, ldbs, ldcs; logical same, null; extern /* Subroutine */ int cmake_(char *, char *, char *, integer *, integer *, complex *, integer *, complex *, integer *, logical *, complex *, ftnlen, ftnlen, ftnlen); complex alpha; extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, complex *, integer *, ftnlen, ftnlen), cmmch_(char *, char *, integer *, integer *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, complex *, integer *, complex *, real *, complex *, integer *, real *, real *, logical * , integer *, logical *, ftnlen, ftnlen); logical isame[13], trana, tranb; integer nargs; logical reset; extern logical lceres_(char *, char *, integer *, integer *, complex *, complex *, integer *, ftnlen, ftnlen); char tranas[1], tranbs[1], transa[1], transb[1]; real errmax; /* Fortran I/O blocks */ static cilist io___124 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___125 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___128 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___130 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___131 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___132 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___133 = { 0, 0, 0, fmt_9995, 0 }; /* Tests CGEMM. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; --bet; --g; --ct; --cs; --cc; c_dim1 = *nmax; c_offset = 1 + c_dim1; c__ -= c_offset; --bs; --bb; b_dim1 = *nmax; b_offset = 1 + b_dim1; b -= b_offset; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ /* .. Executable Statements .. */ nargs = 13; nc = 0; reset = TRUE_; errmax = 0.f; i__1 = *nidim; for (im = 1; im <= i__1; ++im) { m = idim[im]; i__2 = *nidim; for (in = 1; in <= i__2; ++in) { n = idim[in]; /* Set LDC to 1 more than minimum value if room. */ ldc = m; if (ldc < *nmax) { ++ldc; } /* Skip tests if not enough room. */ if (ldc > *nmax) { goto L100; } lcc = ldc * n; null = n <= 0 || m <= 0; i__3 = *nidim; for (ik = 1; ik <= i__3; ++ik) { k = idim[ik]; for (ica = 1; ica <= 3; ++ica) { *(unsigned char *)transa = *(unsigned char *)&ich[ica - 1] ; trana = *(unsigned char *)transa == 'T' || *(unsigned char *)transa == 'C'; if (trana) { ma = k; na = m; } else { ma = m; na = k; } /* Set LDA to 1 more than minimum value if room. */ lda = ma; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L80; } laa = lda * na; /* Generate the matrix A. */ cmake_("GE", " ", " ", &ma, &na, &a[a_offset], nmax, &aa[ 1], &lda, &reset, &c_b1, (ftnlen)2, (ftnlen)1, ( ftnlen)1); for (icb = 1; icb <= 3; ++icb) { *(unsigned char *)transb = *(unsigned char *)&ich[icb - 1]; tranb = *(unsigned char *)transb == 'T' || *(unsigned char *)transb == 'C'; if (tranb) { mb = n; nb = k; } else { mb = k; nb = n; } /* Set LDB to 1 more than minimum value if room. */ ldb = mb; if (ldb < *nmax) { ++ldb; } /* Skip tests if not enough room. */ if (ldb > *nmax) { goto L70; } lbb = ldb * nb; /* Generate the matrix B. */ cmake_("GE", " ", " ", &mb, &nb, &b[b_offset], nmax, & bb[1], &ldb, &reset, &c_b1, (ftnlen)2, ( ftnlen)1, (ftnlen)1); i__4 = *nalf; for (ia = 1; ia <= i__4; ++ia) { i__5 = ia; alpha.r = alf[i__5].r, alpha.i = alf[i__5].i; i__5 = *nbet; for (ib = 1; ib <= i__5; ++ib) { i__6 = ib; beta.r = bet[i__6].r, beta.i = bet[i__6].i; /* Generate the matrix C. */ cmake_("GE", " ", " ", &m, &n, &c__[c_offset], nmax, &cc[1], &ldc, &reset, &c_b1, ( ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the */ /* subroutine. */ *(unsigned char *)tranas = *(unsigned char *) transa; *(unsigned char *)tranbs = *(unsigned char *) transb; ms = m; ns = n; ks = k; als.r = alpha.r, als.i = alpha.i; i__6 = laa; for (i__ = 1; i__ <= i__6; ++i__) { i__7 = i__; i__8 = i__; as[i__7].r = aa[i__8].r, as[i__7].i = aa[ i__8].i; /* L10: */ } ldas = lda; i__6 = lbb; for (i__ = 1; i__ <= i__6; ++i__) { i__7 = i__; i__8 = i__; bs[i__7].r = bb[i__8].r, bs[i__7].i = bb[ i__8].i; /* L20: */ } ldbs = ldb; bls.r = beta.r, bls.i = beta.i; i__6 = lcc; for (i__ = 1; i__ <= i__6; ++i__) { i__7 = i__; i__8 = i__; cs[i__7].r = cc[i__8].r, cs[i__7].i = cc[ i__8].i; /* L30: */ } ldcs = ldc; /* Call the subroutine. */ if (*trace) { io___124.ciunit = *ntra; s_wsfe(&io___124); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, transb, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof( integer)); do_fio(&c__2, (char *)&alpha, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen) sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&ldc, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } cgemm_(transa, transb, &m, &n, &k, &alpha, & aa[1], &lda, &bb[1], &ldb, &beta, &cc[ 1], &ldc, (ftnlen)1, (ftnlen)1); /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___125.ciunit = *nout; s_wsfe(&io___125); e_wsfe(); *fatal = TRUE_; goto L120; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)transa == *( unsigned char *)tranas; isame[1] = *(unsigned char *)transb == *( unsigned char *)tranbs; isame[2] = ms == m; isame[3] = ns == n; isame[4] = ks == k; isame[5] = als.r == alpha.r && als.i == alpha.i; isame[6] = lce_(&as[1], &aa[1], &laa); isame[7] = ldas == lda; isame[8] = lce_(&bs[1], &bb[1], &lbb); isame[9] = ldbs == ldb; isame[10] = bls.r == beta.r && bls.i == beta.i; if (null) { isame[11] = lce_(&cs[1], &cc[1], &lcc); } else { isame[11] = lceres_("GE", " ", &m, &n, & cs[1], &cc[1], &ldc, (ftnlen)2, ( ftnlen)1); } isame[12] = ldcs == ldc; /* If data was incorrectly changed, report */ /* and return. */ same = TRUE_; i__6 = nargs; for (i__ = 1; i__ <= i__6; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___128.ciunit = *nout; s_wsfe(&io___128); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L120; } if (! null) { /* Check the result. */ cmmch_(transa, transb, &m, &n, &k, &alpha, &a[a_offset], nmax, &b[b_offset], nmax, &beta, &c__[c_offset], nmax, &ct[1], &g[1], &cc[1], &ldc, eps, &err, fatal, nout, &c_true, (ftnlen)1, (ftnlen)1); errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L120; } } /* L50: */ } /* L60: */ } L70: ; } L80: ; } /* L90: */ } L100: ; } /* L110: */ } /* Report result. */ if (errmax < *thresh) { io___130.ciunit = *nout; s_wsfe(&io___130); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___131.ciunit = *nout; s_wsfe(&io___131); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(real)); e_wsfe(); } goto L130; L120: io___132.ciunit = *nout; s_wsfe(&io___132); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); io___133.ciunit = *nout; s_wsfe(&io___133); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, transb, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof(integer)); e_wsfe(); L130: return 0; /* End of CCHK1. */ } /* cchk1_ */ /* Subroutine */ int cchk2_(char *sname, real *eps, real *thresh, integer * nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nalf, complex *alf, integer * nbet, complex *bet, integer *nmax, complex *a, complex *aa, complex * as, complex *b, complex *bb, complex *bs, complex *c__, complex *cc, complex *cs, complex *ct, real *g, ftnlen sname_len) { /* Initialized data */ static char ichs[2] = "LR"; static char ichu[2] = "UL"; /* Format strings */ static char fmt_9995[] = "(1x,i6,\002: \002,a6,\002(\002,2(\002'\002,a1" ",\002',\002),2(i3,\002,\002),\002(\002,f4.1,\002,\002,f4.1,\002)" ", A,\002,i3,\002, B,\002,i3,\002,(\002,f4.1,\002,\002,f4.1,\002)" ", C,\002,i3,\002) .\002)"; static char fmt_9994[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; alist al__1; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen), s_wsfe(cilist *), do_fio( integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, m, n, ia, ib, na, nc, im, in, ms, ns, laa, lbb, lda, lcc, ldb, ldc; extern logical lce_(complex *, complex *, integer *); integer ics; complex als, bls; integer icu; real err; complex beta; integer ldas, ldbs, ldcs; logical same; char side[1]; logical conj, left, null; char uplo[1]; extern /* Subroutine */ int cmake_(char *, char *, char *, integer *, integer *, complex *, integer *, complex *, integer *, logical *, complex *, ftnlen, ftnlen, ftnlen); complex alpha; extern /* Subroutine */ int cmmch_(char *, char *, integer *, integer *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, complex *, integer *, complex *, real *, complex *, integer *, real *, real *, logical *, integer *, logical *, ftnlen, ftnlen), chemm_(char *, char *, integer *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, complex *, integer *, ftnlen, ftnlen); logical isame[13]; char sides[1]; integer nargs; logical reset; extern /* Subroutine */ int csymm_(char *, char *, integer *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, complex *, integer *, ftnlen, ftnlen); char uplos[1]; extern logical lceres_(char *, char *, integer *, integer *, complex *, complex *, integer *, ftnlen, ftnlen); real errmax; /* Fortran I/O blocks */ static cilist io___172 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___173 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___176 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___178 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___179 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___180 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___181 = { 0, 0, 0, fmt_9995, 0 }; /* Tests CHEMM and CSYMM. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; --bet; --g; --ct; --cs; --cc; c_dim1 = *nmax; c_offset = 1 + c_dim1; c__ -= c_offset; --bs; --bb; b_dim1 = *nmax; b_offset = 1 + b_dim1; b -= b_offset; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ /* .. Executable Statements .. */ conj = s_cmp(sname + 1, "HE", (ftnlen)2, (ftnlen)2) == 0; nargs = 12; nc = 0; reset = TRUE_; errmax = 0.f; i__1 = *nidim; for (im = 1; im <= i__1; ++im) { m = idim[im]; i__2 = *nidim; for (in = 1; in <= i__2; ++in) { n = idim[in]; /* Set LDC to 1 more than minimum value if room. */ ldc = m; if (ldc < *nmax) { ++ldc; } /* Skip tests if not enough room. */ if (ldc > *nmax) { goto L90; } lcc = ldc * n; null = n <= 0 || m <= 0; /* Set LDB to 1 more than minimum value if room. */ ldb = m; if (ldb < *nmax) { ++ldb; } /* Skip tests if not enough room. */ if (ldb > *nmax) { goto L90; } lbb = ldb * n; /* Generate the matrix B. */ cmake_("GE", " ", " ", &m, &n, &b[b_offset], nmax, &bb[1], &ldb, & reset, &c_b1, (ftnlen)2, (ftnlen)1, (ftnlen)1); for (ics = 1; ics <= 2; ++ics) { *(unsigned char *)side = *(unsigned char *)&ichs[ics - 1]; left = *(unsigned char *)side == 'L'; if (left) { na = m; } else { na = n; } /* Set LDA to 1 more than minimum value if room. */ lda = na; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L80; } laa = lda * na; for (icu = 1; icu <= 2; ++icu) { *(unsigned char *)uplo = *(unsigned char *)&ichu[icu - 1]; /* Generate the hermitian or symmetric matrix A. */ cmake_(sname + 1, uplo, " ", &na, &na, &a[a_offset], nmax, &aa[1], &lda, &reset, &c_b1, (ftnlen)2, (ftnlen) 1, (ftnlen)1); i__3 = *nalf; for (ia = 1; ia <= i__3; ++ia) { i__4 = ia; alpha.r = alf[i__4].r, alpha.i = alf[i__4].i; i__4 = *nbet; for (ib = 1; ib <= i__4; ++ib) { i__5 = ib; beta.r = bet[i__5].r, beta.i = bet[i__5].i; /* Generate the matrix C. */ cmake_("GE", " ", " ", &m, &n, &c__[c_offset], nmax, &cc[1], &ldc, &reset, &c_b1, ( ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the */ /* subroutine. */ *(unsigned char *)sides = *(unsigned char *)side; *(unsigned char *)uplos = *(unsigned char *)uplo; ms = m; ns = n; als.r = alpha.r, als.i = alpha.i; i__5 = laa; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; as[i__6].r = aa[i__7].r, as[i__6].i = aa[i__7] .i; /* L10: */ } ldas = lda; i__5 = lbb; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; bs[i__6].r = bb[i__7].r, bs[i__6].i = bb[i__7] .i; /* L20: */ } ldbs = ldb; bls.r = beta.r, bls.i = beta.i; i__5 = lcc; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; cs[i__6].r = cc[i__7].r, cs[i__6].i = cc[i__7] .i; /* L30: */ } ldcs = ldc; /* Call the subroutine. */ if (*trace) { io___172.ciunit = *ntra; s_wsfe(&io___172); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, side, (ftnlen)1); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof( real)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&ldb, (ftnlen)sizeof( integer)); do_fio(&c__2, (char *)&beta, (ftnlen)sizeof( real)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof( integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } if (conj) { chemm_(side, uplo, &m, &n, &alpha, &aa[1], & lda, &bb[1], &ldb, &beta, &cc[1], & ldc, (ftnlen)1, (ftnlen)1); } else { csymm_(side, uplo, &m, &n, &alpha, &aa[1], & lda, &bb[1], &ldb, &beta, &cc[1], & ldc, (ftnlen)1, (ftnlen)1); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___173.ciunit = *nout; s_wsfe(&io___173); e_wsfe(); *fatal = TRUE_; goto L110; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)sides == *(unsigned char *)side; isame[1] = *(unsigned char *)uplos == *(unsigned char *)uplo; isame[2] = ms == m; isame[3] = ns == n; isame[4] = als.r == alpha.r && als.i == alpha.i; isame[5] = lce_(&as[1], &aa[1], &laa); isame[6] = ldas == lda; isame[7] = lce_(&bs[1], &bb[1], &lbb); isame[8] = ldbs == ldb; isame[9] = bls.r == beta.r && bls.i == beta.i; if (null) { isame[10] = lce_(&cs[1], &cc[1], &lcc); } else { isame[10] = lceres_("GE", " ", &m, &n, &cs[1], &cc[1], &ldc, (ftnlen)2, (ftnlen)1); } isame[11] = ldcs == ldc; /* If data was incorrectly changed, report and */ /* return. */ same = TRUE_; i__5 = nargs; for (i__ = 1; i__ <= i__5; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___176.ciunit = *nout; s_wsfe(&io___176); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L110; } if (! null) { /* Check the result. */ if (left) { cmmch_("N", "N", &m, &n, &m, &alpha, &a[ a_offset], nmax, &b[b_offset], nmax, &beta, &c__[c_offset], nmax, &ct[1], &g[1], &cc[1], &ldc, eps, &err, fatal, nout, &c_true, ( ftnlen)1, (ftnlen)1); } else { cmmch_("N", "N", &m, &n, &n, &alpha, &b[ b_offset], nmax, &a[a_offset], nmax, &beta, &c__[c_offset], nmax, &ct[1], &g[1], &cc[1], &ldc, eps, &err, fatal, nout, &c_true, ( ftnlen)1, (ftnlen)1); } errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L110; } } /* L50: */ } /* L60: */ } /* L70: */ } L80: ; } L90: ; } /* L100: */ } /* Report result. */ if (errmax < *thresh) { io___178.ciunit = *nout; s_wsfe(&io___178); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___179.ciunit = *nout; s_wsfe(&io___179); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(real)); e_wsfe(); } goto L120; L110: io___180.ciunit = *nout; s_wsfe(&io___180); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); io___181.ciunit = *nout; s_wsfe(&io___181); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, side, (ftnlen)1); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof(integer)); e_wsfe(); L120: return 0; /* End of CCHK2. */ } /* cchk2_ */ /* Subroutine */ int cchk3_(char *sname, real *eps, real *thresh, integer * nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nalf, complex *alf, integer * nmax, complex *a, complex *aa, complex *as, complex *b, complex *bb, complex *bs, complex *ct, real *g, complex *c__, ftnlen sname_len) { /* Initialized data */ static char ichu[2] = "UL"; static char icht[3] = "NTC"; static char ichd[2] = "UN"; static char ichs[2] = "LR"; /* Format strings */ static char fmt_9995[] = "(1x,i6,\002: \002,a6,\002(\002,4(\002'\002,a1" ",\002',\002),2(i3,\002,\002),\002(\002,f4.1,\002,\002,f4.1,\002)" ", A,\002,i3,\002, B,\002,i3,\002) \002,\002 .\002)"; static char fmt_9994[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; complex q__1; alist al__1; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen), s_wsfe(cilist *), do_fio( integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, j, m, n, ia, na, nc, im, in, ms, ns, laa, icd, lbb, lda, ldb; extern logical lce_(complex *, complex *, integer *); integer ics; complex als; integer ict, icu; real err; char diag[1]; integer ldas, ldbs; logical same; char side[1]; logical left, null; char uplo[1]; extern /* Subroutine */ int cmake_(char *, char *, char *, integer *, integer *, complex *, integer *, complex *, integer *, logical *, complex *, ftnlen, ftnlen, ftnlen); complex alpha; char diags[1]; extern /* Subroutine */ int cmmch_(char *, char *, integer *, integer *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, complex *, integer *, complex *, real *, complex *, integer *, real *, real *, logical *, integer *, logical *, ftnlen, ftnlen); logical isame[13]; char sides[1]; integer nargs; logical reset; extern /* Subroutine */ int ctrmm_(char *, char *, char *, char *, integer *, integer *, complex *, complex *, integer *, complex *, integer *, ftnlen, ftnlen, ftnlen, ftnlen), ctrsm_(char *, char *, char *, char *, integer *, integer *, complex *, complex *, integer *, complex *, integer *, ftnlen, ftnlen, ftnlen, ftnlen); char uplos[1]; extern logical lceres_(char *, char *, integer *, integer *, complex *, complex *, integer *, ftnlen, ftnlen); char tranas[1], transa[1]; real errmax; /* Fortran I/O blocks */ static cilist io___222 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___223 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___224 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___227 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___229 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___230 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___231 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___232 = { 0, 0, 0, fmt_9995, 0 }; /* Tests CTRMM and CTRSM. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; c_dim1 = *nmax; c_offset = 1 + c_dim1; c__ -= c_offset; --g; --ct; --bs; --bb; b_dim1 = *nmax; b_offset = 1 + b_dim1; b -= b_offset; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ /* .. Executable Statements .. */ nargs = 11; nc = 0; reset = TRUE_; errmax = 0.f; /* Set up zero matrix for CMMCH. */ i__1 = *nmax; for (j = 1; j <= i__1; ++j) { i__2 = *nmax; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__ + j * c_dim1; c__[i__3].r = 0.f, c__[i__3].i = 0.f; /* L10: */ } /* L20: */ } i__1 = *nidim; for (im = 1; im <= i__1; ++im) { m = idim[im]; i__2 = *nidim; for (in = 1; in <= i__2; ++in) { n = idim[in]; /* Set LDB to 1 more than minimum value if room. */ ldb = m; if (ldb < *nmax) { ++ldb; } /* Skip tests if not enough room. */ if (ldb > *nmax) { goto L130; } lbb = ldb * n; null = m <= 0 || n <= 0; for (ics = 1; ics <= 2; ++ics) { *(unsigned char *)side = *(unsigned char *)&ichs[ics - 1]; left = *(unsigned char *)side == 'L'; if (left) { na = m; } else { na = n; } /* Set LDA to 1 more than minimum value if room. */ lda = na; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L130; } laa = lda * na; for (icu = 1; icu <= 2; ++icu) { *(unsigned char *)uplo = *(unsigned char *)&ichu[icu - 1]; for (ict = 1; ict <= 3; ++ict) { *(unsigned char *)transa = *(unsigned char *)&icht[ ict - 1]; for (icd = 1; icd <= 2; ++icd) { *(unsigned char *)diag = *(unsigned char *)&ichd[ icd - 1]; i__3 = *nalf; for (ia = 1; ia <= i__3; ++ia) { i__4 = ia; alpha.r = alf[i__4].r, alpha.i = alf[i__4].i; /* Generate the matrix A. */ cmake_("TR", uplo, diag, &na, &na, &a[ a_offset], nmax, &aa[1], &lda, &reset, &c_b1, (ftnlen)2, (ftnlen)1, (ftnlen) 1); /* Generate the matrix B. */ cmake_("GE", " ", " ", &m, &n, &b[b_offset], nmax, &bb[1], &ldb, &reset, &c_b1, ( ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the */ /* subroutine. */ *(unsigned char *)sides = *(unsigned char *) side; *(unsigned char *)uplos = *(unsigned char *) uplo; *(unsigned char *)tranas = *(unsigned char *) transa; *(unsigned char *)diags = *(unsigned char *) diag; ms = m; ns = n; als.r = alpha.r, als.i = alpha.i; i__4 = laa; for (i__ = 1; i__ <= i__4; ++i__) { i__5 = i__; i__6 = i__; as[i__5].r = aa[i__6].r, as[i__5].i = aa[ i__6].i; /* L30: */ } ldas = lda; i__4 = lbb; for (i__ = 1; i__ <= i__4; ++i__) { i__5 = i__; i__6 = i__; bs[i__5].r = bb[i__6].r, bs[i__5].i = bb[ i__6].i; /* L40: */ } ldbs = ldb; /* Call the subroutine. */ if (s_cmp(sname + 3, "MM", (ftnlen)2, (ftnlen) 2) == 0) { if (*trace) { io___222.ciunit = *ntra; s_wsfe(&io___222); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, side, (ftnlen)1); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } ctrmm_(side, uplo, transa, diag, &m, &n, & alpha, &aa[1], &lda, &bb[1], &ldb, (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); } else if (s_cmp(sname + 3, "SM", (ftnlen)2, ( ftnlen)2) == 0) { if (*trace) { io___223.ciunit = *ntra; s_wsfe(&io___223); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, side, (ftnlen)1); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } ctrsm_(side, uplo, transa, diag, &m, &n, & alpha, &aa[1], &lda, &bb[1], &ldb, (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___224.ciunit = *nout; s_wsfe(&io___224); e_wsfe(); *fatal = TRUE_; goto L150; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)sides == *( unsigned char *)side; isame[1] = *(unsigned char *)uplos == *( unsigned char *)uplo; isame[2] = *(unsigned char *)tranas == *( unsigned char *)transa; isame[3] = *(unsigned char *)diags == *( unsigned char *)diag; isame[4] = ms == m; isame[5] = ns == n; isame[6] = als.r == alpha.r && als.i == alpha.i; isame[7] = lce_(&as[1], &aa[1], &laa); isame[8] = ldas == lda; if (null) { isame[9] = lce_(&bs[1], &bb[1], &lbb); } else { isame[9] = lceres_("GE", " ", &m, &n, &bs[ 1], &bb[1], &ldb, (ftnlen)2, ( ftnlen)1); } isame[10] = ldbs == ldb; /* If data was incorrectly changed, report and */ /* return. */ same = TRUE_; i__4 = nargs; for (i__ = 1; i__ <= i__4; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___227.ciunit = *nout; s_wsfe(&io___227); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L50: */ } if (! same) { *fatal = TRUE_; goto L150; } if (! null) { if (s_cmp(sname + 3, "MM", (ftnlen)2, ( ftnlen)2) == 0) { /* Check the result. */ if (left) { cmmch_(transa, "N", &m, &n, &m, & alpha, &a[a_offset], nmax, &b[b_offset], nmax, & c_b1, &c__[c_offset], nmax, &ct[1], &g[1], &bb[ 1], &ldb, eps, &err, fatal, nout, &c_true, ( ftnlen)1, (ftnlen)1); } else { cmmch_("N", transa, &m, &n, &n, & alpha, &b[b_offset], nmax, &a[a_offset], nmax, & c_b1, &c__[c_offset], nmax, &ct[1], &g[1], &bb[ 1], &ldb, eps, &err, fatal, nout, &c_true, ( ftnlen)1, (ftnlen)1); } } else if (s_cmp(sname + 3, "SM", (ftnlen) 2, (ftnlen)2) == 0) { /* Compute approximation to original */ /* matrix. */ i__4 = n; for (j = 1; j <= i__4; ++j) { i__5 = m; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__ + j * c_dim1; i__7 = i__ + (j - 1) * ldb; c__[i__6].r = bb[i__7].r, c__[i__6].i = bb[i__7].i; i__6 = i__ + (j - 1) * ldb; i__7 = i__ + j * b_dim1; q__1.r = alpha.r * b[i__7].r - alpha.i * b[i__7].i, q__1.i = alpha.r * b[i__7].i + alpha.i * b[ i__7].r; bb[i__6].r = q__1.r, bb[i__6].i = q__1.i; /* L60: */ } /* L70: */ } if (left) { cmmch_(transa, "N", &m, &n, &m, & c_b2, &a[a_offset], nmax, &c__[c_offset], nmax, & c_b1, &b[b_offset], nmax, &ct[1], &g[1], &bb[1], & ldb, eps, &err, fatal, nout, &c_false, (ftnlen)1, (ftnlen)1); } else { cmmch_("N", transa, &m, &n, &n, & c_b2, &c__[c_offset], nmax, &a[a_offset], nmax, &c_b1, &b[b_offset], nmax, &ct[1], &g[1], &bb[1], & ldb, eps, &err, fatal, nout, &c_false, (ftnlen)1, (ftnlen)1); } } errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L150; } } /* L80: */ } /* L90: */ } /* L100: */ } /* L110: */ } /* L120: */ } L130: ; } /* L140: */ } /* Report result. */ if (errmax < *thresh) { io___229.ciunit = *nout; s_wsfe(&io___229); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___230.ciunit = *nout; s_wsfe(&io___230); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(real)); e_wsfe(); } goto L160; L150: io___231.ciunit = *nout; s_wsfe(&io___231); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); io___232.ciunit = *nout; s_wsfe(&io___232); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, side, (ftnlen)1); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen)sizeof(integer)); e_wsfe(); L160: return 0; /* End of CCHK3. */ } /* cchk3_ */ /* Subroutine */ int cchk4_(char *sname, real *eps, real *thresh, integer * nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nalf, complex *alf, integer * nbet, complex *bet, integer *nmax, complex *a, complex *aa, complex * as, complex *b, complex *bb, complex *bs, complex *c__, complex *cc, complex *cs, complex *ct, real *g, ftnlen sname_len) { /* Initialized data */ static char icht[2] = "NC"; static char ichu[2] = "UL"; /* Format strings */ static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002(\002,2(\002'\002,a1" ",\002',\002),2(i3,\002,\002),f4.1,\002, A,\002,i3,\002,\002,f4.1," "\002, C,\002,i3,\002) \002,\002 .\002)"; static char fmt_9993[] = "(1x,i6,\002: \002,a6,\002(\002,2(\002'\002,a1" ",\002',\002),2(i3,\002,\002),\002(\002,f4.1,\002,\002,f4.1,\002)" " , A,\002,i3,\002,(\002,f4.1,\002,\002,f4.1,\002), C,\002,i3," "\002) .\002)"; static char fmt_9992[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9995[] = "(\002 THESE ARE THE RESULTS FOR COLUMN" " \002,i3)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; complex q__1; alist al__1; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen), s_wsfe(cilist *), do_fio( integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, j, k, n, ia, ib, jc, ma, na, nc, ik, in, jj, lj, ks, ns, laa, lda, lcc, ldc; extern logical lce_(complex *, complex *, integer *); complex als; integer ict, icu; real err; complex beta; integer ldas, ldcs; logical same, conj; complex bets; real rals; logical tran, null; char uplo[1]; extern /* Subroutine */ int cmake_(char *, char *, char *, integer *, integer *, complex *, integer *, complex *, integer *, logical *, complex *, ftnlen, ftnlen, ftnlen); complex alpha; extern /* Subroutine */ int cmmch_(char *, char *, integer *, integer *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, complex *, integer *, complex *, real *, complex *, integer *, real *, real *, logical *, integer *, logical *, ftnlen, ftnlen), cherk_(char *, char *, integer *, integer *, real *, complex *, integer *, real *, complex *, integer *, ftnlen, ftnlen); real rbeta; logical isame[13]; integer nargs; real rbets; logical reset; char trans[1]; logical upper; extern /* Subroutine */ int csyrk_(char *, char *, integer *, integer *, complex *, complex *, integer *, complex *, complex *, integer *, ftnlen, ftnlen); char uplos[1]; real ralpha; extern logical lceres_(char *, char *, integer *, integer *, complex *, complex *, integer *, ftnlen, ftnlen); real errmax; char transs[1], transt[1]; /* Fortran I/O blocks */ static cilist io___274 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___275 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___276 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___279 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___286 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___287 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___288 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___289 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___290 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___291 = { 0, 0, 0, fmt_9993, 0 }; /* Tests CHERK and CSYRK. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; --bet; --g; --ct; --cs; --cc; c_dim1 = *nmax; c_offset = 1 + c_dim1; c__ -= c_offset; --bs; --bb; b_dim1 = *nmax; b_offset = 1 + b_dim1; b -= b_offset; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ /* .. Executable Statements .. */ conj = s_cmp(sname + 1, "HE", (ftnlen)2, (ftnlen)2) == 0; nargs = 10; nc = 0; reset = TRUE_; errmax = 0.f; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; /* Set LDC to 1 more than minimum value if room. */ ldc = n; if (ldc < *nmax) { ++ldc; } /* Skip tests if not enough room. */ if (ldc > *nmax) { goto L100; } lcc = ldc * n; i__2 = *nidim; for (ik = 1; ik <= i__2; ++ik) { k = idim[ik]; for (ict = 1; ict <= 2; ++ict) { *(unsigned char *)trans = *(unsigned char *)&icht[ict - 1]; tran = *(unsigned char *)trans == 'C'; if (tran && ! conj) { *(unsigned char *)trans = 'T'; } if (tran) { ma = k; na = n; } else { ma = n; na = k; } /* Set LDA to 1 more than minimum value if room. */ lda = ma; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L80; } laa = lda * na; /* Generate the matrix A. */ cmake_("GE", " ", " ", &ma, &na, &a[a_offset], nmax, &aa[1], & lda, &reset, &c_b1, (ftnlen)2, (ftnlen)1, (ftnlen)1); for (icu = 1; icu <= 2; ++icu) { *(unsigned char *)uplo = *(unsigned char *)&ichu[icu - 1]; upper = *(unsigned char *)uplo == 'U'; i__3 = *nalf; for (ia = 1; ia <= i__3; ++ia) { i__4 = ia; alpha.r = alf[i__4].r, alpha.i = alf[i__4].i; if (conj) { ralpha = alpha.r; q__1.r = ralpha, q__1.i = 0.f; alpha.r = q__1.r, alpha.i = q__1.i; } i__4 = *nbet; for (ib = 1; ib <= i__4; ++ib) { i__5 = ib; beta.r = bet[i__5].r, beta.i = bet[i__5].i; if (conj) { rbeta = beta.r; q__1.r = rbeta, q__1.i = 0.f; beta.r = q__1.r, beta.i = q__1.i; } null = n <= 0; if (conj) { null = null || (k <= 0 || ralpha == 0.f) && rbeta == 1.f; } /* Generate the matrix C. */ cmake_(sname + 1, uplo, " ", &n, &n, &c__[ c_offset], nmax, &cc[1], &ldc, &reset, & c_b1, (ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the subroutine. */ *(unsigned char *)uplos = *(unsigned char *)uplo; *(unsigned char *)transs = *(unsigned char *) trans; ns = n; ks = k; if (conj) { rals = ralpha; } else { als.r = alpha.r, als.i = alpha.i; } i__5 = laa; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; as[i__6].r = aa[i__7].r, as[i__6].i = aa[i__7] .i; /* L10: */ } ldas = lda; if (conj) { rbets = rbeta; } else { bets.r = beta.r, bets.i = beta.i; } i__5 = lcc; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; cs[i__6].r = cc[i__7].r, cs[i__6].i = cc[i__7] .i; /* L20: */ } ldcs = ldc; /* Call the subroutine. */ if (conj) { if (*trace) { io___274.ciunit = *ntra; s_wsfe(&io___274); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&ralpha, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&rbeta, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&ldc, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } cherk_(uplo, trans, &n, &k, &ralpha, &aa[1], & lda, &rbeta, &cc[1], &ldc, (ftnlen)1, (ftnlen)1); } else { if (*trace) { io___275.ciunit = *ntra; s_wsfe(&io___275); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof( integer)); do_fio(&c__2, (char *)&alpha, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&ldc, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } csyrk_(uplo, trans, &n, &k, &alpha, &aa[1], & lda, &beta, &cc[1], &ldc, (ftnlen)1, ( ftnlen)1); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___276.ciunit = *nout; s_wsfe(&io___276); e_wsfe(); *fatal = TRUE_; goto L120; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)uplos == *(unsigned char *)uplo; isame[1] = *(unsigned char *)transs == *(unsigned char *)trans; isame[2] = ns == n; isame[3] = ks == k; if (conj) { isame[4] = rals == ralpha; } else { isame[4] = als.r == alpha.r && als.i == alpha.i; } isame[5] = lce_(&as[1], &aa[1], &laa); isame[6] = ldas == lda; if (conj) { isame[7] = rbets == rbeta; } else { isame[7] = bets.r == beta.r && bets.i == beta.i; } if (null) { isame[8] = lce_(&cs[1], &cc[1], &lcc); } else { isame[8] = lceres_(sname + 1, uplo, &n, &n, & cs[1], &cc[1], &ldc, (ftnlen)2, ( ftnlen)1); } isame[9] = ldcs == ldc; /* If data was incorrectly changed, report and */ /* return. */ same = TRUE_; i__5 = nargs; for (i__ = 1; i__ <= i__5; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___279.ciunit = *nout; s_wsfe(&io___279); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L30: */ } if (! same) { *fatal = TRUE_; goto L120; } if (! null) { /* Check the result column by column. */ if (conj) { *(unsigned char *)transt = 'C'; } else { *(unsigned char *)transt = 'T'; } jc = 1; i__5 = n; for (j = 1; j <= i__5; ++j) { if (upper) { jj = 1; lj = j; } else { jj = j; lj = n - j + 1; } if (tran) { cmmch_(transt, "N", &lj, &c__1, &k, & alpha, &a[jj * a_dim1 + 1], nmax, &a[j * a_dim1 + 1], nmax, &beta, &c__[jj + j * c_dim1], nmax, &ct[1], &g[1], &cc[jc], &ldc, eps, &err, fatal, nout, &c_true, (ftnlen) 1, (ftnlen)1); } else { cmmch_("N", transt, &lj, &c__1, &k, & alpha, &a[jj + a_dim1], nmax, &a[j + a_dim1], nmax, &beta, & c__[jj + j * c_dim1], nmax, & ct[1], &g[1], &cc[jc], &ldc, eps, &err, fatal, nout, & c_true, (ftnlen)1, (ftnlen)1); } if (upper) { jc += ldc; } else { jc = jc + ldc + 1; } errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L110; } /* L40: */ } } /* L50: */ } /* L60: */ } /* L70: */ } L80: ; } /* L90: */ } L100: ; } /* Report result. */ if (errmax < *thresh) { io___286.ciunit = *nout; s_wsfe(&io___286); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___287.ciunit = *nout; s_wsfe(&io___287); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(real)); e_wsfe(); } goto L130; L110: if (n > 1) { io___288.ciunit = *nout; s_wsfe(&io___288); do_fio(&c__1, (char *)&j, (ftnlen)sizeof(integer)); e_wsfe(); } L120: io___289.ciunit = *nout; s_wsfe(&io___289); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); if (conj) { io___290.ciunit = *nout; s_wsfe(&io___290); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ralpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&rbeta, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___291.ciunit = *nout; s_wsfe(&io___291); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof(integer)); e_wsfe(); } L130: return 0; /* End of CCHK4. */ } /* cchk4_ */ /* Subroutine */ int cchk5_(char *sname, real *eps, real *thresh, integer * nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nalf, complex *alf, integer * nbet, complex *bet, integer *nmax, complex *ab, complex *aa, complex * as, complex *bb, complex *bs, complex *c__, complex *cc, complex *cs, complex *ct, real *g, complex *w, ftnlen sname_len) { /* Initialized data */ static char icht[2] = "NC"; static char ichu[2] = "UL"; /* Format strings */ static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002(\002,2(\002'\002,a1" ",\002',\002),2(i3,\002,\002),\002(\002,f4.1,\002,\002,f4.1,\002)" ", A,\002,i3,\002, B,\002,i3,\002,\002,f4.1,\002, C,\002,i3,\002)" " .\002)"; static char fmt_9993[] = "(1x,i6,\002: \002,a6,\002(\002,2(\002'\002,a1" ",\002',\002),2(i3,\002,\002),\002(\002,f4.1,\002,\002,f4.1,\002)" ", A,\002,i3,\002, B,\002,i3,\002,(\002,f4.1,\002,\002,f4.1,\002)" ", C,\002,i3,\002) .\002)"; static char fmt_9992[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9995[] = "(\002 THESE ARE THE RESULTS FOR COLUMN" " \002,i3)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8; complex q__1, q__2; alist al__1; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen), s_wsfe(cilist *), do_fio( integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); void r_cnjg(complex *, complex *); /* Local variables */ integer i__, j, k, n, ia, ib, jc, ma, na, nc, ik, in, jj, lj, ks, ns, laa, lbb, lda, lcc, ldb, ldc; extern logical lce_(complex *, complex *, integer *); complex als; integer ict, icu; real err; integer jjab; complex beta; integer ldas, ldbs, ldcs; logical same, conj; complex bets; logical tran, null; char uplo[1]; extern /* Subroutine */ int cmake_(char *, char *, char *, integer *, integer *, complex *, integer *, complex *, integer *, logical *, complex *, ftnlen, ftnlen, ftnlen); complex alpha; extern /* Subroutine */ int cmmch_(char *, char *, integer *, integer *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, complex *, integer *, complex *, real *, complex *, integer *, real *, real *, logical *, integer *, logical *, ftnlen, ftnlen); real rbeta; logical isame[13]; integer nargs; real rbets; logical reset; char trans[1]; logical upper; char uplos[1]; extern /* Subroutine */ int cher2k_(char *, char *, integer *, integer *, complex *, complex *, integer *, complex *, integer *, real *, complex *, integer *, ftnlen, ftnlen), csyr2k_(char *, char *, integer *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, complex *, integer *, ftnlen, ftnlen); extern logical lceres_(char *, char *, integer *, integer *, complex *, complex *, integer *, ftnlen, ftnlen); real errmax; char transs[1], transt[1]; /* Fortran I/O blocks */ static cilist io___334 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___335 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___336 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___339 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___347 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___348 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___349 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___350 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___351 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___352 = { 0, 0, 0, fmt_9993, 0 }; /* Tests CHER2K and CSYR2K. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; --bet; --w; --g; --ct; --cs; --cc; c_dim1 = *nmax; c_offset = 1 + c_dim1; c__ -= c_offset; --bs; --bb; --as; --aa; --ab; /* Function Body */ /* .. Executable Statements .. */ conj = s_cmp(sname + 1, "HE", (ftnlen)2, (ftnlen)2) == 0; nargs = 12; nc = 0; reset = TRUE_; errmax = 0.f; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; /* Set LDC to 1 more than minimum value if room. */ ldc = n; if (ldc < *nmax) { ++ldc; } /* Skip tests if not enough room. */ if (ldc > *nmax) { goto L130; } lcc = ldc * n; i__2 = *nidim; for (ik = 1; ik <= i__2; ++ik) { k = idim[ik]; for (ict = 1; ict <= 2; ++ict) { *(unsigned char *)trans = *(unsigned char *)&icht[ict - 1]; tran = *(unsigned char *)trans == 'C'; if (tran && ! conj) { *(unsigned char *)trans = 'T'; } if (tran) { ma = k; na = n; } else { ma = n; na = k; } /* Set LDA to 1 more than minimum value if room. */ lda = ma; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L110; } laa = lda * na; /* Generate the matrix A. */ if (tran) { i__3 = *nmax << 1; cmake_("GE", " ", " ", &ma, &na, &ab[1], &i__3, &aa[1], & lda, &reset, &c_b1, (ftnlen)2, (ftnlen)1, (ftnlen) 1); } else { cmake_("GE", " ", " ", &ma, &na, &ab[1], nmax, &aa[1], & lda, &reset, &c_b1, (ftnlen)2, (ftnlen)1, (ftnlen) 1); } /* Generate the matrix B. */ ldb = lda; lbb = laa; if (tran) { i__3 = *nmax << 1; cmake_("GE", " ", " ", &ma, &na, &ab[k + 1], &i__3, &bb[1] , &ldb, &reset, &c_b1, (ftnlen)2, (ftnlen)1, ( ftnlen)1); } else { cmake_("GE", " ", " ", &ma, &na, &ab[k * *nmax + 1], nmax, &bb[1], &ldb, &reset, &c_b1, (ftnlen)2, (ftnlen) 1, (ftnlen)1); } for (icu = 1; icu <= 2; ++icu) { *(unsigned char *)uplo = *(unsigned char *)&ichu[icu - 1]; upper = *(unsigned char *)uplo == 'U'; i__3 = *nalf; for (ia = 1; ia <= i__3; ++ia) { i__4 = ia; alpha.r = alf[i__4].r, alpha.i = alf[i__4].i; i__4 = *nbet; for (ib = 1; ib <= i__4; ++ib) { i__5 = ib; beta.r = bet[i__5].r, beta.i = bet[i__5].i; if (conj) { rbeta = beta.r; q__1.r = rbeta, q__1.i = 0.f; beta.r = q__1.r, beta.i = q__1.i; } null = n <= 0; if (conj) { null = null || (k <= 0 || alpha.r == 0.f && alpha.i == 0.f) && rbeta == 1.f; } /* Generate the matrix C. */ cmake_(sname + 1, uplo, " ", &n, &n, &c__[ c_offset], nmax, &cc[1], &ldc, &reset, & c_b1, (ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the subroutine. */ *(unsigned char *)uplos = *(unsigned char *)uplo; *(unsigned char *)transs = *(unsigned char *) trans; ns = n; ks = k; als.r = alpha.r, als.i = alpha.i; i__5 = laa; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; as[i__6].r = aa[i__7].r, as[i__6].i = aa[i__7] .i; /* L10: */ } ldas = lda; i__5 = lbb; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; bs[i__6].r = bb[i__7].r, bs[i__6].i = bb[i__7] .i; /* L20: */ } ldbs = ldb; if (conj) { rbets = rbeta; } else { bets.r = beta.r, bets.i = beta.i; } i__5 = lcc; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; cs[i__6].r = cc[i__7].r, cs[i__6].i = cc[i__7] .i; /* L30: */ } ldcs = ldc; /* Call the subroutine. */ if (conj) { if (*trace) { io___334.ciunit = *ntra; s_wsfe(&io___334); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof( integer)); do_fio(&c__2, (char *)&alpha, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&rbeta, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&ldc, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } cher2k_(uplo, trans, &n, &k, &alpha, &aa[1], & lda, &bb[1], &ldb, &rbeta, &cc[1], & ldc, (ftnlen)1, (ftnlen)1); } else { if (*trace) { io___335.ciunit = *ntra; s_wsfe(&io___335); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof( integer)); do_fio(&c__2, (char *)&alpha, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen) sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&ldc, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } csyr2k_(uplo, trans, &n, &k, &alpha, &aa[1], & lda, &bb[1], &ldb, &beta, &cc[1], & ldc, (ftnlen)1, (ftnlen)1); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___336.ciunit = *nout; s_wsfe(&io___336); e_wsfe(); *fatal = TRUE_; goto L150; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)uplos == *(unsigned char *)uplo; isame[1] = *(unsigned char *)transs == *(unsigned char *)trans; isame[2] = ns == n; isame[3] = ks == k; isame[4] = als.r == alpha.r && als.i == alpha.i; isame[5] = lce_(&as[1], &aa[1], &laa); isame[6] = ldas == lda; isame[7] = lce_(&bs[1], &bb[1], &lbb); isame[8] = ldbs == ldb; if (conj) { isame[9] = rbets == rbeta; } else { isame[9] = bets.r == beta.r && bets.i == beta.i; } if (null) { isame[10] = lce_(&cs[1], &cc[1], &lcc); } else { isame[10] = lceres_("HE", uplo, &n, &n, &cs[1] , &cc[1], &ldc, (ftnlen)2, (ftnlen)1); } isame[11] = ldcs == ldc; /* If data was incorrectly changed, report and */ /* return. */ same = TRUE_; i__5 = nargs; for (i__ = 1; i__ <= i__5; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___339.ciunit = *nout; s_wsfe(&io___339); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L150; } if (! null) { /* Check the result column by column. */ if (conj) { *(unsigned char *)transt = 'C'; } else { *(unsigned char *)transt = 'T'; } jjab = 1; jc = 1; i__5 = n; for (j = 1; j <= i__5; ++j) { if (upper) { jj = 1; lj = j; } else { jj = j; lj = n - j + 1; } if (tran) { i__6 = k; for (i__ = 1; i__ <= i__6; ++i__) { i__7 = i__; i__8 = (j - 1 << 1) * *nmax + k + i__; q__1.r = alpha.r * ab[i__8].r - alpha.i * ab[i__8].i, q__1.i = alpha.r * ab[ i__8].i + alpha.i * ab[ i__8].r; w[i__7].r = q__1.r, w[i__7].i = q__1.i; if (conj) { i__7 = k + i__; r_cnjg(&q__2, &alpha); i__8 = (j - 1 << 1) * *nmax + i__; q__1.r = q__2.r * ab[i__8].r - q__2.i * ab[i__8].i, q__1.i = q__2.r * ab[i__8].i + q__2.i * ab[ i__8].r; w[i__7].r = q__1.r, w[i__7].i = q__1.i; } else { i__7 = k + i__; i__8 = (j - 1 << 1) * *nmax + i__; q__1.r = alpha.r * ab[i__8].r - alpha.i * ab[i__8] .i, q__1.i = alpha.r * ab[i__8].i + alpha.i * ab[i__8].r; w[i__7].r = q__1.r, w[i__7].i = q__1.i; } /* L50: */ } i__6 = k << 1; i__7 = *nmax << 1; i__8 = *nmax << 1; cmmch_(transt, "N", &lj, &c__1, &i__6, &c_b2, &ab[jjab], &i__7, &w[ 1], &i__8, &beta, &c__[jj + j * c_dim1], nmax, &ct[1], &g[1] , &cc[jc], &ldc, eps, &err, fatal, nout, &c_true, (ftnlen) 1, (ftnlen)1); } else { i__6 = k; for (i__ = 1; i__ <= i__6; ++i__) { if (conj) { i__7 = i__; r_cnjg(&q__2, &ab[(k + i__ - 1) * *nmax + j]); q__1.r = alpha.r * q__2.r - alpha.i * q__2.i, q__1.i = alpha.r * q__2.i + alpha.i * q__2.r; w[i__7].r = q__1.r, w[i__7].i = q__1.i; i__7 = k + i__; i__8 = (i__ - 1) * *nmax + j; q__2.r = alpha.r * ab[i__8].r - alpha.i * ab[i__8] .i, q__2.i = alpha.r * ab[i__8].i + alpha.i * ab[i__8].r; r_cnjg(&q__1, &q__2); w[i__7].r = q__1.r, w[i__7].i = q__1.i; } else { i__7 = i__; i__8 = (k + i__ - 1) * *nmax + j; q__1.r = alpha.r * ab[i__8].r - alpha.i * ab[i__8] .i, q__1.i = alpha.r * ab[i__8].i + alpha.i * ab[i__8].r; w[i__7].r = q__1.r, w[i__7].i = q__1.i; i__7 = k + i__; i__8 = (i__ - 1) * *nmax + j; q__1.r = alpha.r * ab[i__8].r - alpha.i * ab[i__8] .i, q__1.i = alpha.r * ab[i__8].i + alpha.i * ab[i__8].r; w[i__7].r = q__1.r, w[i__7].i = q__1.i; } /* L60: */ } i__6 = k << 1; i__7 = *nmax << 1; cmmch_("N", "N", &lj, &c__1, &i__6, & c_b2, &ab[jj], nmax, &w[1], & i__7, &beta, &c__[jj + j * c_dim1], nmax, &ct[1], &g[1], &cc[jc], &ldc, eps, &err, fatal, nout, &c_true, (ftnlen) 1, (ftnlen)1); } if (upper) { jc += ldc; } else { jc = jc + ldc + 1; if (tran) { jjab += *nmax << 1; } } errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L140; } /* L70: */ } } /* L80: */ } /* L90: */ } /* L100: */ } L110: ; } /* L120: */ } L130: ; } /* Report result. */ if (errmax < *thresh) { io___347.ciunit = *nout; s_wsfe(&io___347); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___348.ciunit = *nout; s_wsfe(&io___348); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(real)); e_wsfe(); } goto L160; L140: if (n > 1) { io___349.ciunit = *nout; s_wsfe(&io___349); do_fio(&c__1, (char *)&j, (ftnlen)sizeof(integer)); e_wsfe(); } L150: io___350.ciunit = *nout; s_wsfe(&io___350); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); if (conj) { io___351.ciunit = *nout; s_wsfe(&io___351); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&rbeta, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___352.ciunit = *nout; s_wsfe(&io___352); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof(integer)); e_wsfe(); } L160: return 0; /* End of CCHK5. */ } /* cchk5_ */ /* Subroutine */ int cchke_(integer *isnum, char *srnamt, integer *nout, ftnlen srnamt_len) { /* Format strings */ static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE TESTS OF ERROR-E" "XITS\002)"; static char fmt_9998[] = "(\002 ******* \002,a6,\002 FAILED THE TESTS OF" " ERROR-EXITS *****\002,\002**\002)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Local variables */ complex a[2] /* was [2][1] */, b[2] /* was [2][1] */, c__[2] /* was [2][1] */, beta, alpha; extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, complex *, integer *, ftnlen, ftnlen), chemm_(char *, char *, integer *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, complex *, integer *, ftnlen, ftnlen), cherk_(char *, char *, integer *, integer *, real *, complex *, integer *, real *, complex *, integer *, ftnlen, ftnlen); real rbeta; extern /* Subroutine */ int ctrmm_(char *, char *, char *, char *, integer *, integer *, complex *, complex *, integer *, complex *, integer *, ftnlen, ftnlen, ftnlen, ftnlen), csymm_(char *, char *, integer *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, complex *, integer *, ftnlen, ftnlen), ctrsm_(char *, char *, char *, char *, integer *, integer *, complex *, complex *, integer *, complex *, integer *, ftnlen, ftnlen, ftnlen, ftnlen), csyrk_(char *, char *, integer *, integer *, complex *, complex *, integer *, complex *, complex *, integer *, ftnlen, ftnlen), cher2k_(char *, char *, integer *, integer *, complex *, complex *, integer *, complex *, integer *, real *, complex *, integer *, ftnlen, ftnlen), csyr2k_(char *, char *, integer *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, complex *, integer *, ftnlen, ftnlen); real ralpha; extern /* Subroutine */ int chkxer_(char *, integer *, integer *, logical *, logical *, ftnlen); /* Fortran I/O blocks */ static cilist io___360 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___361 = { 0, 0, 0, fmt_9998, 0 }; /* Tests the error exits from the Level 3 Blas. */ /* Requires a special version of the error-handling routine XERBLA. */ /* A, B and C should not need to be defined. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* 3-19-92: Initialize ALPHA, BETA, RALPHA, and RBETA (eca) */ /* 3-19-92: Fix argument 12 in calls to CSYMM and CHEMM */ /* with INFOT = 9 (eca) */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Subroutines .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ /* OK is set to .FALSE. by the special version of XERBLA or by CHKXER */ /* if anything is wrong. */ infoc_1.ok = TRUE_; /* LERR is set to .TRUE. by the special version of XERBLA each time */ /* it is called, and is then tested and re-set by CHKXER. */ infoc_1.lerr = FALSE_; /* Initialize ALPHA, BETA, RALPHA, and RBETA. */ alpha.r = 1.f, alpha.i = -1.f; beta.r = 2.f, beta.i = -2.f; ralpha = 1.f; rbeta = 2.f; switch (*isnum) { case 1: goto L10; case 2: goto L20; case 3: goto L30; case 4: goto L40; case 5: goto L50; case 6: goto L60; case 7: goto L70; case 8: goto L80; case 9: goto L90; } L10: infoc_1.infot = 1; cgemm_("/", "N", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 1; cgemm_("/", "C", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 1; cgemm_("/", "T", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; cgemm_("N", "/", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; cgemm_("C", "/", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; cgemm_("T", "/", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; cgemm_("N", "N", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; cgemm_("N", "C", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; cgemm_("N", "T", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; cgemm_("C", "N", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; cgemm_("C", "C", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; cgemm_("C", "T", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; cgemm_("T", "N", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; cgemm_("T", "C", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; cgemm_("T", "T", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; cgemm_("N", "N", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; cgemm_("N", "C", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; cgemm_("N", "T", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; cgemm_("C", "N", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; cgemm_("C", "C", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; cgemm_("C", "T", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; cgemm_("T", "N", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; cgemm_("T", "C", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; cgemm_("T", "T", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; cgemm_("N", "N", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; cgemm_("N", "C", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; cgemm_("N", "T", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; cgemm_("C", "N", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; cgemm_("C", "C", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; cgemm_("C", "T", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; cgemm_("T", "N", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; cgemm_("T", "C", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; cgemm_("T", "T", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; cgemm_("N", "N", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; cgemm_("N", "C", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; cgemm_("N", "T", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; cgemm_("C", "N", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__2, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; cgemm_("C", "C", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; cgemm_("C", "T", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; cgemm_("T", "N", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__2, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; cgemm_("T", "C", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; cgemm_("T", "T", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; cgemm_("N", "N", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; cgemm_("C", "N", &c__0, &c__0, &c__2, &alpha, a, &c__2, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; cgemm_("T", "N", &c__0, &c__0, &c__2, &alpha, a, &c__2, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; cgemm_("N", "C", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; cgemm_("C", "C", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; cgemm_("T", "C", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; cgemm_("N", "T", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; cgemm_("C", "T", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; cgemm_("T", "T", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; cgemm_("N", "N", &c__2, &c__0, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; cgemm_("N", "C", &c__2, &c__0, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; cgemm_("N", "T", &c__2, &c__0, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; cgemm_("C", "N", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; cgemm_("C", "C", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; cgemm_("C", "T", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; cgemm_("T", "N", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; cgemm_("T", "C", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; cgemm_("T", "T", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L100; L20: infoc_1.infot = 1; chemm_("/", "U", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; chemm_("L", "/", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; chemm_("L", "U", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; chemm_("R", "U", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; chemm_("L", "L", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; chemm_("R", "L", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; chemm_("L", "U", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; chemm_("R", "U", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; chemm_("L", "L", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; chemm_("R", "L", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; chemm_("L", "U", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; chemm_("R", "U", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; chemm_("L", "L", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; chemm_("R", "L", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; chemm_("L", "U", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; chemm_("R", "U", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; chemm_("L", "L", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; chemm_("R", "L", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; chemm_("L", "U", &c__2, &c__0, &alpha, a, &c__2, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; chemm_("R", "U", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; chemm_("L", "L", &c__2, &c__0, &alpha, a, &c__2, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; chemm_("R", "L", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L100; L30: infoc_1.infot = 1; csymm_("/", "U", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; csymm_("L", "/", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; csymm_("L", "U", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; csymm_("R", "U", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; csymm_("L", "L", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; csymm_("R", "L", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; csymm_("L", "U", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; csymm_("R", "U", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; csymm_("L", "L", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; csymm_("R", "L", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; csymm_("L", "U", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; csymm_("R", "U", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; csymm_("L", "L", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; csymm_("R", "L", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; csymm_("L", "U", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; csymm_("R", "U", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; csymm_("L", "L", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; csymm_("R", "L", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; csymm_("L", "U", &c__2, &c__0, &alpha, a, &c__2, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; csymm_("R", "U", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; csymm_("L", "L", &c__2, &c__0, &alpha, a, &c__2, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; csymm_("R", "L", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L100; L40: infoc_1.infot = 1; ctrmm_("/", "U", "N", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; ctrmm_("L", "/", "N", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ctrmm_("L", "U", "/", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ctrmm_("L", "U", "N", "/", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ctrmm_("L", "U", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ctrmm_("L", "U", "C", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ctrmm_("L", "U", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ctrmm_("R", "U", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ctrmm_("R", "U", "C", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ctrmm_("R", "U", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ctrmm_("L", "L", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ctrmm_("L", "L", "C", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ctrmm_("L", "L", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ctrmm_("R", "L", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ctrmm_("R", "L", "C", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ctrmm_("R", "L", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ctrmm_("L", "U", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ctrmm_("L", "U", "C", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ctrmm_("L", "U", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ctrmm_("R", "U", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ctrmm_("R", "U", "C", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ctrmm_("R", "U", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ctrmm_("L", "L", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ctrmm_("L", "L", "C", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ctrmm_("L", "L", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ctrmm_("R", "L", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ctrmm_("R", "L", "C", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ctrmm_("R", "L", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ctrmm_("L", "U", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ctrmm_("L", "U", "C", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ctrmm_("L", "U", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ctrmm_("R", "U", "N", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ctrmm_("R", "U", "C", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ctrmm_("R", "U", "T", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ctrmm_("L", "L", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ctrmm_("L", "L", "C", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ctrmm_("L", "L", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ctrmm_("R", "L", "N", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ctrmm_("R", "L", "C", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ctrmm_("R", "L", "T", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ctrmm_("L", "U", "N", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ctrmm_("L", "U", "C", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ctrmm_("L", "U", "T", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ctrmm_("R", "U", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ctrmm_("R", "U", "C", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ctrmm_("R", "U", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ctrmm_("L", "L", "N", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ctrmm_("L", "L", "C", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ctrmm_("L", "L", "T", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ctrmm_("R", "L", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ctrmm_("R", "L", "C", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ctrmm_("R", "L", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L100; L50: infoc_1.infot = 1; ctrsm_("/", "U", "N", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; ctrsm_("L", "/", "N", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ctrsm_("L", "U", "/", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ctrsm_("L", "U", "N", "/", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ctrsm_("L", "U", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ctrsm_("L", "U", "C", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ctrsm_("L", "U", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ctrsm_("R", "U", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ctrsm_("R", "U", "C", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ctrsm_("R", "U", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ctrsm_("L", "L", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ctrsm_("L", "L", "C", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ctrsm_("L", "L", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ctrsm_("R", "L", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ctrsm_("R", "L", "C", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ctrsm_("R", "L", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ctrsm_("L", "U", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ctrsm_("L", "U", "C", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ctrsm_("L", "U", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ctrsm_("R", "U", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ctrsm_("R", "U", "C", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ctrsm_("R", "U", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ctrsm_("L", "L", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ctrsm_("L", "L", "C", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ctrsm_("L", "L", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ctrsm_("R", "L", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ctrsm_("R", "L", "C", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ctrsm_("R", "L", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ctrsm_("L", "U", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ctrsm_("L", "U", "C", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ctrsm_("L", "U", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ctrsm_("R", "U", "N", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ctrsm_("R", "U", "C", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ctrsm_("R", "U", "T", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ctrsm_("L", "L", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ctrsm_("L", "L", "C", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ctrsm_("L", "L", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ctrsm_("R", "L", "N", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ctrsm_("R", "L", "C", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ctrsm_("R", "L", "T", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ctrsm_("L", "U", "N", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ctrsm_("L", "U", "C", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ctrsm_("L", "U", "T", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ctrsm_("R", "U", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ctrsm_("R", "U", "C", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ctrsm_("R", "U", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ctrsm_("L", "L", "N", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ctrsm_("L", "L", "C", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ctrsm_("L", "L", "T", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ctrsm_("R", "L", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ctrsm_("R", "L", "C", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ctrsm_("R", "L", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L100; L60: infoc_1.infot = 1; cherk_("/", "N", &c__0, &c__0, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; cherk_("U", "T", &c__0, &c__0, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; cherk_("U", "N", &c_n1, &c__0, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; cherk_("U", "C", &c_n1, &c__0, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; cherk_("L", "N", &c_n1, &c__0, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; cherk_("L", "C", &c_n1, &c__0, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; cherk_("U", "N", &c__0, &c_n1, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; cherk_("U", "C", &c__0, &c_n1, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; cherk_("L", "N", &c__0, &c_n1, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; cherk_("L", "C", &c__0, &c_n1, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; cherk_("U", "N", &c__2, &c__0, &ralpha, a, &c__1, &rbeta, c__, &c__2, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; cherk_("U", "C", &c__0, &c__2, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; cherk_("L", "N", &c__2, &c__0, &ralpha, a, &c__1, &rbeta, c__, &c__2, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; cherk_("L", "C", &c__0, &c__2, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; cherk_("U", "N", &c__2, &c__0, &ralpha, a, &c__2, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; cherk_("U", "C", &c__2, &c__0, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; cherk_("L", "N", &c__2, &c__0, &ralpha, a, &c__2, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; cherk_("L", "C", &c__2, &c__0, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L100; L70: infoc_1.infot = 1; csyrk_("/", "N", &c__0, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; csyrk_("U", "C", &c__0, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; csyrk_("U", "N", &c_n1, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; csyrk_("U", "T", &c_n1, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; csyrk_("L", "N", &c_n1, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; csyrk_("L", "T", &c_n1, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; csyrk_("U", "N", &c__0, &c_n1, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; csyrk_("U", "T", &c__0, &c_n1, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; csyrk_("L", "N", &c__0, &c_n1, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; csyrk_("L", "T", &c__0, &c_n1, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; csyrk_("U", "N", &c__2, &c__0, &alpha, a, &c__1, &beta, c__, &c__2, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; csyrk_("U", "T", &c__0, &c__2, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; csyrk_("L", "N", &c__2, &c__0, &alpha, a, &c__1, &beta, c__, &c__2, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; csyrk_("L", "T", &c__0, &c__2, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; csyrk_("U", "N", &c__2, &c__0, &alpha, a, &c__2, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; csyrk_("U", "T", &c__2, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; csyrk_("L", "N", &c__2, &c__0, &alpha, a, &c__2, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; csyrk_("L", "T", &c__2, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L100; L80: infoc_1.infot = 1; cher2k_("/", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; cher2k_("U", "T", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; cher2k_("U", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; cher2k_("U", "C", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; cher2k_("L", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; cher2k_("L", "C", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; cher2k_("U", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; cher2k_("U", "C", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; cher2k_("L", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; cher2k_("L", "C", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; cher2k_("U", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; cher2k_("U", "C", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; cher2k_("L", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; cher2k_("L", "C", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; cher2k_("U", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, &rbeta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; cher2k_("U", "C", &c__0, &c__2, &alpha, a, &c__2, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; cher2k_("L", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, &rbeta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; cher2k_("L", "C", &c__0, &c__2, &alpha, a, &c__2, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; cher2k_("U", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__2, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; cher2k_("U", "C", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; cher2k_("L", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__2, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; cher2k_("L", "C", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L100; L90: infoc_1.infot = 1; csyr2k_("/", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; csyr2k_("U", "C", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; csyr2k_("U", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; csyr2k_("U", "T", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; csyr2k_("L", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; csyr2k_("L", "T", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; csyr2k_("U", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; csyr2k_("U", "T", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; csyr2k_("L", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; csyr2k_("L", "T", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; csyr2k_("U", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; csyr2k_("U", "T", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; csyr2k_("L", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; csyr2k_("L", "T", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; csyr2k_("U", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; csyr2k_("U", "T", &c__0, &c__2, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; csyr2k_("L", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; csyr2k_("L", "T", &c__0, &c__2, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; csyr2k_("U", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; csyr2k_("U", "T", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; csyr2k_("L", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; csyr2k_("L", "T", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); L100: if (infoc_1.ok) { io___360.ciunit = *nout; s_wsfe(&io___360); do_fio(&c__1, srnamt, (ftnlen)6); e_wsfe(); } else { io___361.ciunit = *nout; s_wsfe(&io___361); do_fio(&c__1, srnamt, (ftnlen)6); e_wsfe(); } return 0; /* End of CCHKE. */ } /* cchke_ */ /* Subroutine */ int cmake_(char *type__, char *uplo, char *diag, integer *m, integer *n, complex *a, integer *nmax, complex *aa, integer *lda, logical *reset, complex *transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4; real r__1; complex q__1, q__2; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen); void r_cnjg(complex *, complex *); /* Local variables */ integer i__, j, jj; logical gen, her, tri, sym; extern /* Complex */ void cbeg_(complex *, logical *); integer ibeg, iend; logical unit, lower, upper; /* Generates values for an M by N matrix A. */ /* Stores the values in the array AA in the data structure required */ /* by the routine, with unwanted elements set to rogue value. */ /* TYPE is 'GE', 'HE', 'SY' or 'TR'. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. Intrinsic Functions .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --aa; /* Function Body */ gen = s_cmp(type__, "GE", (ftnlen)2, (ftnlen)2) == 0; her = s_cmp(type__, "HE", (ftnlen)2, (ftnlen)2) == 0; sym = s_cmp(type__, "SY", (ftnlen)2, (ftnlen)2) == 0; tri = s_cmp(type__, "TR", (ftnlen)2, (ftnlen)2) == 0; upper = (her || sym || tri) && *(unsigned char *)uplo == 'U'; lower = (her || sym || tri) && *(unsigned char *)uplo == 'L'; unit = tri && *(unsigned char *)diag == 'U'; /* Generate data in array A. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { if (gen || upper && i__ <= j || lower && i__ >= j) { i__3 = i__ + j * a_dim1; cbeg_(&q__2, reset); q__1.r = q__2.r + transl->r, q__1.i = q__2.i + transl->i; a[i__3].r = q__1.r, a[i__3].i = q__1.i; if (i__ != j) { /* Set some elements to zero */ if (*n > 3 && j == *n / 2) { i__3 = i__ + j * a_dim1; a[i__3].r = 0.f, a[i__3].i = 0.f; } if (her) { i__3 = j + i__ * a_dim1; r_cnjg(&q__1, &a[i__ + j * a_dim1]); a[i__3].r = q__1.r, a[i__3].i = q__1.i; } else if (sym) { i__3 = j + i__ * a_dim1; i__4 = i__ + j * a_dim1; a[i__3].r = a[i__4].r, a[i__3].i = a[i__4].i; } else if (tri) { i__3 = j + i__ * a_dim1; a[i__3].r = 0.f, a[i__3].i = 0.f; } } } /* L10: */ } if (her) { i__2 = j + j * a_dim1; i__3 = j + j * a_dim1; r__1 = a[i__3].r; q__1.r = r__1, q__1.i = 0.f; a[i__2].r = q__1.r, a[i__2].i = q__1.i; } if (tri) { i__2 = j + j * a_dim1; i__3 = j + j * a_dim1; q__1.r = a[i__3].r + 1.f, q__1.i = a[i__3].i + 0.f; a[i__2].r = q__1.r, a[i__2].i = q__1.i; } if (unit) { i__2 = j + j * a_dim1; a[i__2].r = 1.f, a[i__2].i = 0.f; } /* L20: */ } /* Store elements in array AS in data structure required by routine. */ if (s_cmp(type__, "GE", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__ + (j - 1) * *lda; i__4 = i__ + j * a_dim1; aa[i__3].r = a[i__4].r, aa[i__3].i = a[i__4].i; /* L30: */ } i__2 = *lda; for (i__ = *m + 1; i__ <= i__2; ++i__) { i__3 = i__ + (j - 1) * *lda; aa[i__3].r = -1e10f, aa[i__3].i = 1e10f; /* L40: */ } /* L50: */ } } else if (s_cmp(type__, "HE", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(type__, "SY", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(type__, "TR", (ftnlen) 2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (upper) { ibeg = 1; if (unit) { iend = j - 1; } else { iend = j; } } else { if (unit) { ibeg = j + 1; } else { ibeg = j; } iend = *n; } i__2 = ibeg - 1; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__ + (j - 1) * *lda; aa[i__3].r = -1e10f, aa[i__3].i = 1e10f; /* L60: */ } i__2 = iend; for (i__ = ibeg; i__ <= i__2; ++i__) { i__3 = i__ + (j - 1) * *lda; i__4 = i__ + j * a_dim1; aa[i__3].r = a[i__4].r, aa[i__3].i = a[i__4].i; /* L70: */ } i__2 = *lda; for (i__ = iend + 1; i__ <= i__2; ++i__) { i__3 = i__ + (j - 1) * *lda; aa[i__3].r = -1e10f, aa[i__3].i = 1e10f; /* L80: */ } if (her) { jj = j + (j - 1) * *lda; i__2 = jj; i__3 = jj; r__1 = aa[i__3].r; q__1.r = r__1, q__1.i = -1e10f; aa[i__2].r = q__1.r, aa[i__2].i = q__1.i; } /* L90: */ } } return 0; /* End of CMAKE. */ } /* cmake_ */ /* Subroutine */ int cmmch_(char *transa, char *transb, integer *m, integer * n, integer *kk, complex *alpha, complex *a, integer *lda, complex *b, integer *ldb, complex *beta, complex *c__, integer *ldc, complex *ct, real *g, complex *cc, integer *ldcc, real *eps, real *err, logical * fatal, integer *nout, logical *mv, ftnlen transa_len, ftnlen transb_len) { /* Format strings */ static char fmt_9999[] = "(\002 ******* FATAL ERROR - COMPUTED RESULT IS" " LESS THAN HAL\002,\002F ACCURATE *******\002,/\002 " " EXPECTED RE\002,\002SULT COMPUTED R" "ESULT\002)"; static char fmt_9998[] = "(1x,i7,2(\002 (\002,g15.6,\002,\002,g15.6," "\002)\002))"; static char fmt_9997[] = "(\002 THESE ARE THE RESULTS FOR COLUMN" " \002,i3)"; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, cc_dim1, cc_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; real r__1, r__2, r__3, r__4, r__5, r__6; complex q__1, q__2, q__3, q__4; /* Builtin functions */ double r_imag(complex *); void r_cnjg(complex *, complex *); double sqrt(doublereal); integer s_wsfe(cilist *), e_wsfe(void), do_fio(integer *, char *, ftnlen); /* Local variables */ integer i__, j, k; real erri; logical trana, tranb, ctrana, ctranb; /* Fortran I/O blocks */ static cilist io___382 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___383 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___384 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___385 = { 0, 0, 0, fmt_9997, 0 }; /* Checks the results of the computational tests. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Intrinsic Functions .. */ /* .. Statement Functions .. */ /* .. Statement Function definitions .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; c_dim1 = *ldc; c_offset = 1 + c_dim1; c__ -= c_offset; --ct; --g; cc_dim1 = *ldcc; cc_offset = 1 + cc_dim1; cc -= cc_offset; /* Function Body */ trana = *(unsigned char *)transa == 'T' || *(unsigned char *)transa == 'C'; tranb = *(unsigned char *)transb == 'T' || *(unsigned char *)transb == 'C'; ctrana = *(unsigned char *)transa == 'C'; ctranb = *(unsigned char *)transb == 'C'; /* Compute expected result, one column at a time, in CT using data */ /* in A, B and C. */ /* Compute gauges in G. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__; ct[i__3].r = 0.f, ct[i__3].i = 0.f; g[i__] = 0.f; /* L10: */ } if (! trana && ! tranb) { i__2 = *kk; for (k = 1; k <= i__2; ++k) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { i__4 = i__; i__5 = i__; i__6 = i__ + k * a_dim1; i__7 = k + j * b_dim1; q__2.r = a[i__6].r * b[i__7].r - a[i__6].i * b[i__7].i, q__2.i = a[i__6].r * b[i__7].i + a[i__6].i * b[ i__7].r; q__1.r = ct[i__5].r + q__2.r, q__1.i = ct[i__5].i + q__2.i; ct[i__4].r = q__1.r, ct[i__4].i = q__1.i; i__4 = i__ + k * a_dim1; i__5 = k + j * b_dim1; g[i__] += ((r__1 = a[i__4].r, abs(r__1)) + (r__2 = r_imag( &a[i__ + k * a_dim1]), abs(r__2))) * ((r__3 = b[ i__5].r, abs(r__3)) + (r__4 = r_imag(&b[k + j * b_dim1]), abs(r__4))); /* L20: */ } /* L30: */ } } else if (trana && ! tranb) { if (ctrana) { i__2 = *kk; for (k = 1; k <= i__2; ++k) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { i__4 = i__; i__5 = i__; r_cnjg(&q__3, &a[k + i__ * a_dim1]); i__6 = k + j * b_dim1; q__2.r = q__3.r * b[i__6].r - q__3.i * b[i__6].i, q__2.i = q__3.r * b[i__6].i + q__3.i * b[i__6] .r; q__1.r = ct[i__5].r + q__2.r, q__1.i = ct[i__5].i + q__2.i; ct[i__4].r = q__1.r, ct[i__4].i = q__1.i; i__4 = k + i__ * a_dim1; i__5 = k + j * b_dim1; g[i__] += ((r__1 = a[i__4].r, abs(r__1)) + (r__2 = r_imag(&a[k + i__ * a_dim1]), abs(r__2))) * (( r__3 = b[i__5].r, abs(r__3)) + (r__4 = r_imag( &b[k + j * b_dim1]), abs(r__4))); /* L40: */ } /* L50: */ } } else { i__2 = *kk; for (k = 1; k <= i__2; ++k) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { i__4 = i__; i__5 = i__; i__6 = k + i__ * a_dim1; i__7 = k + j * b_dim1; q__2.r = a[i__6].r * b[i__7].r - a[i__6].i * b[i__7] .i, q__2.i = a[i__6].r * b[i__7].i + a[i__6] .i * b[i__7].r; q__1.r = ct[i__5].r + q__2.r, q__1.i = ct[i__5].i + q__2.i; ct[i__4].r = q__1.r, ct[i__4].i = q__1.i; i__4 = k + i__ * a_dim1; i__5 = k + j * b_dim1; g[i__] += ((r__1 = a[i__4].r, abs(r__1)) + (r__2 = r_imag(&a[k + i__ * a_dim1]), abs(r__2))) * (( r__3 = b[i__5].r, abs(r__3)) + (r__4 = r_imag( &b[k + j * b_dim1]), abs(r__4))); /* L60: */ } /* L70: */ } } } else if (! trana && tranb) { if (ctranb) { i__2 = *kk; for (k = 1; k <= i__2; ++k) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { i__4 = i__; i__5 = i__; i__6 = i__ + k * a_dim1; r_cnjg(&q__3, &b[j + k * b_dim1]); q__2.r = a[i__6].r * q__3.r - a[i__6].i * q__3.i, q__2.i = a[i__6].r * q__3.i + a[i__6].i * q__3.r; q__1.r = ct[i__5].r + q__2.r, q__1.i = ct[i__5].i + q__2.i; ct[i__4].r = q__1.r, ct[i__4].i = q__1.i; i__4 = i__ + k * a_dim1; i__5 = j + k * b_dim1; g[i__] += ((r__1 = a[i__4].r, abs(r__1)) + (r__2 = r_imag(&a[i__ + k * a_dim1]), abs(r__2))) * (( r__3 = b[i__5].r, abs(r__3)) + (r__4 = r_imag( &b[j + k * b_dim1]), abs(r__4))); /* L80: */ } /* L90: */ } } else { i__2 = *kk; for (k = 1; k <= i__2; ++k) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { i__4 = i__; i__5 = i__; i__6 = i__ + k * a_dim1; i__7 = j + k * b_dim1; q__2.r = a[i__6].r * b[i__7].r - a[i__6].i * b[i__7] .i, q__2.i = a[i__6].r * b[i__7].i + a[i__6] .i * b[i__7].r; q__1.r = ct[i__5].r + q__2.r, q__1.i = ct[i__5].i + q__2.i; ct[i__4].r = q__1.r, ct[i__4].i = q__1.i; i__4 = i__ + k * a_dim1; i__5 = j + k * b_dim1; g[i__] += ((r__1 = a[i__4].r, abs(r__1)) + (r__2 = r_imag(&a[i__ + k * a_dim1]), abs(r__2))) * (( r__3 = b[i__5].r, abs(r__3)) + (r__4 = r_imag( &b[j + k * b_dim1]), abs(r__4))); /* L100: */ } /* L110: */ } } } else if (trana && tranb) { if (ctrana) { if (ctranb) { i__2 = *kk; for (k = 1; k <= i__2; ++k) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { i__4 = i__; i__5 = i__; r_cnjg(&q__3, &a[k + i__ * a_dim1]); r_cnjg(&q__4, &b[j + k * b_dim1]); q__2.r = q__3.r * q__4.r - q__3.i * q__4.i, q__2.i = q__3.r * q__4.i + q__3.i * q__4.r; q__1.r = ct[i__5].r + q__2.r, q__1.i = ct[i__5].i + q__2.i; ct[i__4].r = q__1.r, ct[i__4].i = q__1.i; i__4 = k + i__ * a_dim1; i__5 = j + k * b_dim1; g[i__] += ((r__1 = a[i__4].r, abs(r__1)) + (r__2 = r_imag(&a[k + i__ * a_dim1]), abs(r__2))) * ((r__3 = b[i__5].r, abs(r__3)) + (r__4 = r_imag(&b[j + k * b_dim1]), abs(r__4))); /* L120: */ } /* L130: */ } } else { i__2 = *kk; for (k = 1; k <= i__2; ++k) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { i__4 = i__; i__5 = i__; r_cnjg(&q__3, &a[k + i__ * a_dim1]); i__6 = j + k * b_dim1; q__2.r = q__3.r * b[i__6].r - q__3.i * b[i__6].i, q__2.i = q__3.r * b[i__6].i + q__3.i * b[ i__6].r; q__1.r = ct[i__5].r + q__2.r, q__1.i = ct[i__5].i + q__2.i; ct[i__4].r = q__1.r, ct[i__4].i = q__1.i; i__4 = k + i__ * a_dim1; i__5 = j + k * b_dim1; g[i__] += ((r__1 = a[i__4].r, abs(r__1)) + (r__2 = r_imag(&a[k + i__ * a_dim1]), abs(r__2))) * ((r__3 = b[i__5].r, abs(r__3)) + (r__4 = r_imag(&b[j + k * b_dim1]), abs(r__4))); /* L140: */ } /* L150: */ } } } else { if (ctranb) { i__2 = *kk; for (k = 1; k <= i__2; ++k) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { i__4 = i__; i__5 = i__; i__6 = k + i__ * a_dim1; r_cnjg(&q__3, &b[j + k * b_dim1]); q__2.r = a[i__6].r * q__3.r - a[i__6].i * q__3.i, q__2.i = a[i__6].r * q__3.i + a[i__6].i * q__3.r; q__1.r = ct[i__5].r + q__2.r, q__1.i = ct[i__5].i + q__2.i; ct[i__4].r = q__1.r, ct[i__4].i = q__1.i; i__4 = k + i__ * a_dim1; i__5 = j + k * b_dim1; g[i__] += ((r__1 = a[i__4].r, abs(r__1)) + (r__2 = r_imag(&a[k + i__ * a_dim1]), abs(r__2))) * ((r__3 = b[i__5].r, abs(r__3)) + (r__4 = r_imag(&b[j + k * b_dim1]), abs(r__4))); /* L160: */ } /* L170: */ } } else { i__2 = *kk; for (k = 1; k <= i__2; ++k) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { i__4 = i__; i__5 = i__; i__6 = k + i__ * a_dim1; i__7 = j + k * b_dim1; q__2.r = a[i__6].r * b[i__7].r - a[i__6].i * b[ i__7].i, q__2.i = a[i__6].r * b[i__7].i + a[i__6].i * b[i__7].r; q__1.r = ct[i__5].r + q__2.r, q__1.i = ct[i__5].i + q__2.i; ct[i__4].r = q__1.r, ct[i__4].i = q__1.i; i__4 = k + i__ * a_dim1; i__5 = j + k * b_dim1; g[i__] += ((r__1 = a[i__4].r, abs(r__1)) + (r__2 = r_imag(&a[k + i__ * a_dim1]), abs(r__2))) * ((r__3 = b[i__5].r, abs(r__3)) + (r__4 = r_imag(&b[j + k * b_dim1]), abs(r__4))); /* L180: */ } /* L190: */ } } } } i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__; i__4 = i__; q__2.r = alpha->r * ct[i__4].r - alpha->i * ct[i__4].i, q__2.i = alpha->r * ct[i__4].i + alpha->i * ct[i__4].r; i__5 = i__ + j * c_dim1; q__3.r = beta->r * c__[i__5].r - beta->i * c__[i__5].i, q__3.i = beta->r * c__[i__5].i + beta->i * c__[i__5].r; q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i; ct[i__3].r = q__1.r, ct[i__3].i = q__1.i; i__3 = i__ + j * c_dim1; g[i__] = ((r__1 = alpha->r, abs(r__1)) + (r__2 = r_imag(alpha), abs(r__2))) * g[i__] + ((r__3 = beta->r, abs(r__3)) + ( r__4 = r_imag(beta), abs(r__4))) * ((r__5 = c__[i__3].r, abs(r__5)) + (r__6 = r_imag(&c__[i__ + j * c_dim1]), abs( r__6))); /* L200: */ } /* Compute the error ratio for this result. */ *err = 0.f; i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__; i__4 = i__ + j * cc_dim1; q__2.r = ct[i__3].r - cc[i__4].r, q__2.i = ct[i__3].i - cc[i__4] .i; q__1.r = q__2.r, q__1.i = q__2.i; erri = ((r__1 = q__1.r, abs(r__1)) + (r__2 = r_imag(&q__1), abs( r__2))) / *eps; if (g[i__] != 0.f) { erri /= g[i__]; } *err = max(*err,erri); if (*err * sqrt(*eps) >= 1.f) { goto L230; } /* L210: */ } /* L220: */ } /* If the loop completes, all results are at least half accurate. */ goto L250; /* Report fatal error. */ L230: *fatal = TRUE_; io___382.ciunit = *nout; s_wsfe(&io___382); e_wsfe(); i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { if (*mv) { io___383.ciunit = *nout; s_wsfe(&io___383); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&ct[i__], (ftnlen)sizeof(real)); do_fio(&c__2, (char *)&cc[i__ + j * cc_dim1], (ftnlen)sizeof(real) ); e_wsfe(); } else { io___384.ciunit = *nout; s_wsfe(&io___384); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&cc[i__ + j * cc_dim1], (ftnlen)sizeof(real) ); do_fio(&c__2, (char *)&ct[i__], (ftnlen)sizeof(real)); e_wsfe(); } /* L240: */ } if (*n > 1) { io___385.ciunit = *nout; s_wsfe(&io___385); do_fio(&c__1, (char *)&j, (ftnlen)sizeof(integer)); e_wsfe(); } L250: return 0; /* End of CMMCH. */ } /* cmmch_ */ logical lce_(complex *ri, complex *rj, integer *lr) { /* System generated locals */ integer i__1, i__2, i__3; logical ret_val; /* Local variables */ integer i__; /* Tests if two arrays are identical. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --rj; --ri; /* Function Body */ i__1 = *lr; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__; i__3 = i__; if (ri[i__2].r != rj[i__3].r || ri[i__2].i != rj[i__3].i) { goto L20; } /* L10: */ } ret_val = TRUE_; goto L30; L20: ret_val = FALSE_; L30: return ret_val; /* End of LCE. */ } /* lce_ */ logical lceres_(char *type__, char *uplo, integer *m, integer *n, complex *aa, complex *as, integer *lda, ftnlen type_len, ftnlen uplo_len) { /* System generated locals */ integer aa_dim1, aa_offset, as_dim1, as_offset, i__1, i__2, i__3, i__4; logical ret_val; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen); /* Local variables */ integer i__, j, ibeg, iend; logical upper; /* Tests if selected elements in two arrays are equal. */ /* TYPE is 'GE' or 'HE' or 'SY'. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ as_dim1 = *lda; as_offset = 1 + as_dim1; as -= as_offset; aa_dim1 = *lda; aa_offset = 1 + aa_dim1; aa -= aa_offset; /* Function Body */ upper = *(unsigned char *)uplo == 'U'; if (s_cmp(type__, "GE", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *lda; for (i__ = *m + 1; i__ <= i__2; ++i__) { i__3 = i__ + j * aa_dim1; i__4 = i__ + j * as_dim1; if (aa[i__3].r != as[i__4].r || aa[i__3].i != as[i__4].i) { goto L70; } /* L10: */ } /* L20: */ } } else if (s_cmp(type__, "HE", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(type__, "SY", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (upper) { ibeg = 1; iend = j; } else { ibeg = j; iend = *n; } i__2 = ibeg - 1; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__ + j * aa_dim1; i__4 = i__ + j * as_dim1; if (aa[i__3].r != as[i__4].r || aa[i__3].i != as[i__4].i) { goto L70; } /* L30: */ } i__2 = *lda; for (i__ = iend + 1; i__ <= i__2; ++i__) { i__3 = i__ + j * aa_dim1; i__4 = i__ + j * as_dim1; if (aa[i__3].r != as[i__4].r || aa[i__3].i != as[i__4].i) { goto L70; } /* L40: */ } /* L50: */ } } ret_val = TRUE_; goto L80; L70: ret_val = FALSE_; L80: return ret_val; /* End of LCERES. */ } /* lceres_ */ /* Complex */ void cbeg_(complex * ret_val, logical *reset) { /* System generated locals */ real r__1, r__2; complex q__1; /* Local variables */ static integer i__, j, ic, mi, mj; /* Generates complex numbers as pairs of random numbers uniformly */ /* distributed between -0.5 and 0.5. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Scalar Arguments .. */ /* .. Local Scalars .. */ /* .. Save statement .. */ /* .. Intrinsic Functions .. */ /* .. Executable Statements .. */ if (*reset) { /* Initialize local variables. */ mi = 891; mj = 457; i__ = 7; j = 7; ic = 0; *reset = FALSE_; } /* The sequence of values of I or J is bounded between 1 and 999. */ /* If initial I or J = 1,2,3,6,7 or 9, the period will be 50. */ /* If initial I or J = 4 or 8, the period will be 25. */ /* If initial I or J = 5, the period will be 10. */ /* IC is used to break up the period by skipping 1 value of I or J */ /* in 6. */ ++ic; L10: i__ *= mi; j *= mj; i__ -= i__ / 1000 * 1000; j -= j / 1000 * 1000; if (ic >= 5) { ic = 0; goto L10; } r__1 = (i__ - 500) / 1001.f; r__2 = (j - 500) / 1001.f; q__1.r = r__1, q__1.i = r__2; ret_val->r = q__1.r, ret_val->i = q__1.i; return ; /* End of CBEG. */ } /* cbeg_ */ real sdiff_(real *x, real *y) { /* System generated locals */ real ret_val; /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Scalar Arguments .. */ /* .. Executable Statements .. */ ret_val = *x - *y; return ret_val; /* End of SDIFF. */ } /* sdiff_ */ /* Subroutine */ int chkxer_(char *srnamt, integer *infot, integer *nout, logical *lerr, logical *ok, ftnlen srnamt_len) { /* Format strings */ static char fmt_9999[] = "(\002 ***** ILLEGAL VALUE OF PARAMETER NUMBER" " \002,i2,\002 NOT D\002,\002ETECTED BY \002,a6,\002 *****\002)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Fortran I/O blocks */ static cilist io___397 = { 0, 0, 0, fmt_9999, 0 }; /* Tests whether XERBLA has detected an error when it should. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Scalar Arguments .. */ /* .. Executable Statements .. */ if (! (*lerr)) { io___397.ciunit = *nout; s_wsfe(&io___397); do_fio(&c__1, (char *)&(*infot), (ftnlen)sizeof(integer)); do_fio(&c__1, srnamt, (ftnlen)6); e_wsfe(); *ok = FALSE_; } *lerr = FALSE_; return 0; /* End of CHKXER. */ } /* chkxer_ */ /* Subroutine */ int xerbla_(char *srname, integer *info, ftnlen srname_len) { /* Format strings */ static char fmt_9999[] = "(\002 ******* XERBLA WAS CALLED WITH INFO =" " \002,i6,\002 INSTEAD\002,\002 OF \002,i2,\002 *******\002)"; static char fmt_9997[] = "(\002 ******* XERBLA WAS CALLED WITH INFO =" " \002,i6,\002 *******\002)"; static char fmt_9998[] = "(\002 ******* XERBLA WAS CALLED WITH SRNAME =" " \002,a6,\002 INSTE\002,\002AD OF \002,a6,\002 *******\002)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), s_cmp(const char *, const char *, ftnlen, ftnlen); /* Fortran I/O blocks */ static cilist io___398 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___399 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___400 = { 0, 0, 0, fmt_9998, 0 }; /* This is a special version of XERBLA to be used only as part of */ /* the test program for testing error exits from the Level 3 BLAS */ /* routines. */ /* XERBLA is an error handler for the Level 3 BLAS routines. */ /* It is called by the Level 3 BLAS routines if an input parameter is */ /* invalid. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ infoc_2.lerr = TRUE_; if (*info != infoc_2.infot) { if (infoc_2.infot != 0) { io___398.ciunit = infoc_2.nout; s_wsfe(&io___398); do_fio(&c__1, (char *)&(*info), (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&infoc_2.infot, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___399.ciunit = infoc_2.nout; s_wsfe(&io___399); do_fio(&c__1, (char *)&(*info), (ftnlen)sizeof(integer)); e_wsfe(); } infoc_2.ok = FALSE_; } if (s_cmp(srname, srnamc_1.srnamt, (ftnlen)6, (ftnlen)6) != 0) { io___400.ciunit = infoc_2.nout; s_wsfe(&io___400); do_fio(&c__1, srname, (ftnlen)6); do_fio(&c__1, srnamc_1.srnamt, (ftnlen)6); e_wsfe(); infoc_2.ok = FALSE_; } return 0; /* End of XERBLA */ } /* xerbla_ */ /* Main program alias */ int cblat3_ () { main (); return 0; } blis-0.6.1/blastest/src/dblat1.c000066400000000000000000001172761360743507500164220ustar00rootroot00000000000000/* dblat1.f -- translated by f2c (version 20100827). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" /* Common Block Declarations */ struct { integer icase, n, incx, incy; logical pass; } combla_; #define combla_1 combla_ /* Table of constant values */ static integer c__1 = 1; static integer c__9 = 9; static doublereal c_b35 = 1.; static real c_b39 = .03125f; static integer c__5 = 5; static doublereal c_b63 = 0.; static real c_b81 = 0.f; /* > \brief \b DBLAT1 */ /* =========== DOCUMENTATION =========== */ /* Online html documentation available at */ /* http://www.netlib.org/lapack/explore-html/ */ /* Definition: */ /* =========== */ /* PROGRAM DBLAT1 */ /* > \par Purpose: */ /* ============= */ /* > */ /* > \verbatim */ /* > */ /* > Test program for the DOUBLE PRECISION Level 1 BLAS. */ /* > */ /* > Based upon the original BLAS test routine together with: */ /* > F06EAF Example Program Text */ /* > \endverbatim */ /* Authors: */ /* ======== */ /* > \author Univ. of Tennessee */ /* > \author Univ. of California Berkeley */ /* > \author Univ. of Colorado Denver */ /* > \author NAG Ltd. */ /* > \date April 2012 */ /* > \ingroup double_blas_testing */ /* ===================================================================== */ /* Main program */ int main(void) { /* Initialized data */ static doublereal sfac = 9.765625e-4; /* Format strings */ static char fmt_99999[] = "(\002 Real BLAS Test Program Results\002,/1x)"; static char fmt_99998[] = "(\002 ----" "- PASS -----\002)"; /* Builtin functions */ integer s_wsfe(cilist *), e_wsfe(void); /* Subroutine */ int s_stop(char *, ftnlen); /* Local variables */ integer ic; extern /* Subroutine */ int check0_(doublereal *), check1_(doublereal *), check2_(doublereal *), check3_(doublereal *), header_(void); /* Fortran I/O blocks */ static cilist io___2 = { 0, 6, 0, fmt_99999, 0 }; static cilist io___4 = { 0, 6, 0, fmt_99998, 0 }; /* -- Reference BLAS test routine (version 3.4.1) -- */ /* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* April 2012 */ /* ===================================================================== */ /* .. Parameters .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. External Subroutines .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* .. Executable Statements .. */ s_wsfe(&io___2); e_wsfe(); for (ic = 1; ic <= 13; ++ic) { combla_1.icase = ic; header_(); /* .. Initialize PASS, INCX, and INCY for a new case. .. */ /* .. the value 9999 for INCX or INCY will appear in the .. */ /* .. detailed output, if any, for cases that do not involve .. */ /* .. these parameters .. */ combla_1.pass = TRUE_; combla_1.incx = 9999; combla_1.incy = 9999; if (combla_1.icase == 3 || combla_1.icase == 11) { check0_(&sfac); } else if (combla_1.icase == 7 || combla_1.icase == 8 || combla_1.icase == 9 || combla_1.icase == 10) { check1_(&sfac); } else if (combla_1.icase == 1 || combla_1.icase == 2 || combla_1.icase == 5 || combla_1.icase == 6 || combla_1.icase == 12 || combla_1.icase == 13) { check2_(&sfac); } else if (combla_1.icase == 4) { check3_(&sfac); } /* -- Print */ if (combla_1.pass) { s_wsfe(&io___4); e_wsfe(); } /* L20: */ } s_stop("", (ftnlen)0); return 0; } /* main */ /* Subroutine */ int header_(void) { /* Initialized data */ static char l[6*13] = " DDOT " "DAXPY " "DROTG " " DROT " "DCOPY " "DSWA" "P " "DNRM2 " "DASUM " "DSCAL " "IDAMAX" "DROTMG" "DROTM " "DSDOT " ; /* Format strings */ static char fmt_99999[] = "(/\002 Test of subprogram number\002,i3,12x,a" "6)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Fortran I/O blocks */ static cilist io___6 = { 0, 6, 0, fmt_99999, 0 }; /* .. Parameters .. */ /* .. Scalars in Common .. */ /* .. Local Arrays .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* .. Executable Statements .. */ s_wsfe(&io___6); do_fio(&c__1, (char *)&combla_1.icase, (ftnlen)sizeof(integer)); do_fio(&c__1, l + (0 + (0 + (combla_1.icase - 1) * 6)), (ftnlen)6); e_wsfe(); return 0; } /* header_ */ /* Subroutine */ int check0_(doublereal *sfac) { /* Initialized data */ static doublereal ds1[8] = { .8,.6,.8,-.6,.8,0.,1.,0. }; static doublereal datrue[8] = { .5,.5,.5,-.5,-.5,0.,1.,1. }; static doublereal dbtrue[8] = { 0.,.6,0.,-.6,0.,0.,1.,0. }; static doublereal dab[36] /* was [4][9] */ = { .1,.3,1.2,.2,.7,.2,.6, 4.2,0.,0.,0.,0.,4.,-1.,2.,4.,6e-10,.02,1e5,10.,4e10,.02,1e-5,10., 2e-10,.04,1e5,10.,2e10,.04,1e-5,10.,4.,-2.,8.,4. }; static doublereal dtrue[81] /* was [9][9] */ = { 0.,0.,1.3,.2,0.,0.,0.,.5, 0.,0.,0.,4.5,4.2,1.,.5,0.,0.,0.,0.,0.,0.,0.,-2.,0.,0.,0.,0.,0.,0., 0.,4.,-1.,0.,0.,0.,0.,0.,.015,0.,10.,-1.,0.,-1e-4,0.,1.,0.,0., .06144,10.,-1.,4096.,-1e6,0.,1.,0.,0.,15.,10.,-1.,5e-5,0.,1.,0., 0.,0.,15.,10.,-1.,5e5,-4096.,1.,.004096,0.,0.,7.,4.,0.,0.,-.5, -.25,0. }; static doublereal d12 = 4096.; static doublereal da1[8] = { .3,.4,-.3,-.4,-.3,0.,0.,1. }; static doublereal db1[8] = { .4,.3,.4,.3,-.4,0.,1.,0. }; static doublereal dc1[8] = { .6,.8,-.6,.8,.6,1.,0.,1. }; /* Builtin functions */ integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_wsle(void); /* Subroutine */ int s_stop(char *, ftnlen); /* Local variables */ integer i__, k; doublereal sa, sb, sc, ss, dtemp[9]; extern /* Subroutine */ int drotg_(doublereal *, doublereal *, doublereal *, doublereal *), stest_(integer *, doublereal *, doublereal *, doublereal *, doublereal *), stest1_(doublereal *, doublereal *, doublereal *, doublereal *), drotmg_(doublereal *, doublereal *, doublereal *, doublereal *, doublereal *); /* Fortran I/O blocks */ static cilist io___23 = { 0, 6, 0, 0, 0 }; /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Subroutines .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* INPUT FOR MODIFIED GIVENS */ /* TRUE RESULTS FOR MODIFIED GIVENS */ /* 4096 = 2 ** 12 */ dtrue[0] = .092307692307692313; dtrue[1] = .27692307692307694; dtrue[6] = -.16666666666666666; dtrue[9] = .18666666666666668; dtrue[10] = .65333333333333332; dtrue[17] = .14285714285714285; dtrue[36] = d12 * d12 * 4.5e-10; dtrue[38] = 4e5 / (d12 * 3.); dtrue[41] = 1. / d12; dtrue[43] = 1e4 / (d12 * 3.); dtrue[45] = 4e10 / (d12 * 1.5 * d12); dtrue[46] = .013333333333333334; dtrue[52] = d12 * 5e-7; dtrue[54] = .026666666666666668; dtrue[55] = d12 * d12 * 1.3333333333333334e-10; dtrue[60] = -dtrue[41]; dtrue[62] = 1e4 / d12; dtrue[63] = dtrue[54]; dtrue[64] = 2e10 / (d12 * 1.5 * d12); dtrue[72] = 4.5714285714285712; dtrue[73] = -2.2857142857142856; /* .. Executable Statements .. */ /* Compute true values which cannot be prestored */ /* in decimal notation */ dbtrue[0] = 1.6666666666666667; dbtrue[2] = -1.6666666666666667; dbtrue[4] = 1.6666666666666667; for (k = 1; k <= 8; ++k) { /* .. Set N=K for identification in output if any .. */ combla_1.n = k; if (combla_1.icase == 3) { /* .. DROTG .. */ if (k > 8) { goto L40; } sa = da1[k - 1]; sb = db1[k - 1]; drotg_(&sa, &sb, &sc, &ss); stest1_(&sa, &datrue[k - 1], &datrue[k - 1], sfac); stest1_(&sb, &dbtrue[k - 1], &dbtrue[k - 1], sfac); stest1_(&sc, &dc1[k - 1], &dc1[k - 1], sfac); stest1_(&ss, &ds1[k - 1], &ds1[k - 1], sfac); } else if (combla_1.icase == 11) { /* .. DROTMG .. */ for (i__ = 1; i__ <= 4; ++i__) { dtemp[i__ - 1] = dab[i__ + (k << 2) - 5]; dtemp[i__ + 3] = 0.f; } dtemp[8] = 0.f; drotmg_(dtemp, &dtemp[1], &dtemp[2], &dtemp[3], &dtemp[4]); stest_(&c__9, dtemp, &dtrue[k * 9 - 9], &dtrue[k * 9 - 9], sfac); } else { s_wsle(&io___23); do_lio(&c__9, &c__1, " Shouldn't be here in CHECK0", (ftnlen)28); e_wsle(); s_stop("", (ftnlen)0); } /* L20: */ } L40: return 0; } /* check0_ */ /* Subroutine */ int check1_(doublereal *sfac) { /* Initialized data */ static doublereal sa[10] = { .3,-1.,0.,1.,.3,.3,.3,.3,.3,.3 }; static doublereal dv[80] /* was [8][5][2] */ = { .1,2.,2.,2.,2.,2.,2., 2.,.3,3.,3.,3.,3.,3.,3.,3.,.3,-.4,4.,4.,4.,4.,4.,4.,.2,-.6,.3,5., 5.,5.,5.,5.,.1,-.3,.5,-.1,6.,6.,6.,6.,.1,8.,8.,8.,8.,8.,8.,8.,.3, 9.,9.,9.,9.,9.,9.,9.,.3,2.,-.4,2.,2.,2.,2.,2.,.2,3.,-.6,5.,.3,2., 2.,2.,.1,4.,-.3,6.,-.5,7.,-.1,3. }; static doublereal dtrue1[5] = { 0.,.3,.5,.7,.6 }; static doublereal dtrue3[5] = { 0.,.3,.7,1.1,1. }; static doublereal dtrue5[80] /* was [8][5][2] */ = { .1,2.,2.,2., 2.,2.,2.,2.,-.3,3.,3.,3.,3.,3.,3.,3.,0.,0.,4.,4.,4.,4.,4.,4.,.2, -.6,.3,5.,5.,5.,5.,5.,.03,-.09,.15,-.03,6.,6.,6.,6.,.1,8.,8.,8., 8.,8.,8.,8.,.09,9.,9.,9.,9.,9.,9.,9.,.09,2.,-.12,2.,2.,2.,2.,2., .06,3.,-.18,5.,.09,2.,2.,2.,.03,4.,-.09,6.,-.15,7.,-.03,3. }; static integer itrue2[5] = { 0,1,2,2,3 }; /* System generated locals */ integer i__1; doublereal d__1; /* Builtin functions */ integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_wsle(void); /* Subroutine */ int s_stop(char *, ftnlen); /* Local variables */ integer i__; doublereal sx[8]; integer np1, len; extern doublereal dnrm2_(integer *, doublereal *, integer *); extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, integer *); extern doublereal dasum_(integer *, doublereal *, integer *); doublereal stemp[1], strue[8]; extern /* Subroutine */ int stest_(integer *, doublereal *, doublereal *, doublereal *, doublereal *), itest1_(integer *, integer *), stest1_(doublereal *, doublereal *, doublereal *, doublereal *); extern integer idamax_(integer *, doublereal *, integer *); /* Fortran I/O blocks */ static cilist io___36 = { 0, 6, 0, 0, 0 }; /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* .. Executable Statements .. */ for (combla_1.incx = 1; combla_1.incx <= 2; ++combla_1.incx) { for (np1 = 1; np1 <= 5; ++np1) { combla_1.n = np1 - 1; len = max(combla_1.n,1) << 1; /* .. Set vector arguments .. */ i__1 = len; for (i__ = 1; i__ <= i__1; ++i__) { sx[i__ - 1] = dv[i__ + (np1 + combla_1.incx * 5 << 3) - 49]; /* L20: */ } if (combla_1.icase == 7) { /* .. DNRM2 .. */ stemp[0] = dtrue1[np1 - 1]; d__1 = dnrm2_(&combla_1.n, sx, &combla_1.incx); stest1_(&d__1, stemp, stemp, sfac); } else if (combla_1.icase == 8) { /* .. DASUM .. */ stemp[0] = dtrue3[np1 - 1]; d__1 = dasum_(&combla_1.n, sx, &combla_1.incx); stest1_(&d__1, stemp, stemp, sfac); } else if (combla_1.icase == 9) { /* .. DSCAL .. */ dscal_(&combla_1.n, &sa[(combla_1.incx - 1) * 5 + np1 - 1], sx, &combla_1.incx); i__1 = len; for (i__ = 1; i__ <= i__1; ++i__) { strue[i__ - 1] = dtrue5[i__ + (np1 + combla_1.incx * 5 << 3) - 49]; /* L40: */ } stest_(&len, sx, strue, strue, sfac); } else if (combla_1.icase == 10) { /* .. IDAMAX .. */ i__1 = idamax_(&combla_1.n, sx, &combla_1.incx); itest1_(&i__1, &itrue2[np1 - 1]); } else { s_wsle(&io___36); do_lio(&c__9, &c__1, " Shouldn't be here in CHECK1", (ftnlen) 28); e_wsle(); s_stop("", (ftnlen)0); } /* L60: */ } /* L80: */ } return 0; } /* check1_ */ /* Subroutine */ int check2_(doublereal *sfac) { /* Initialized data */ static doublereal sa = .3; static integer incxs[4] = { 1,2,-2,-1 }; static integer incys[4] = { 1,-2,1,-2 }; static integer lens[8] /* was [4][2] */ = { 1,1,2,4,1,1,3,7 }; static integer ns[4] = { 0,1,2,4 }; static doublereal dx1[7] = { .6,.1,-.5,.8,.9,-.3,-.4 }; static doublereal dy1[7] = { .5,-.9,.3,.7,-.6,.2,.8 }; static real sx1[7] = { .6f,.1f,-.5f,.8f,.9f,-.3f,-.4f }; static real sy1[7] = { .5f,-.9f,.3f,.7f,-.6f,.2f,.8f }; static doublereal dt7[16] /* was [4][4] */ = { 0.,.3,.21,.62,0.,.3,-.07, .85,0.,.3,-.79,-.74,0.,.3,.33,1.27 }; static doublereal dt8[112] /* was [7][4][4] */ = { .5,0.,0.,0.,0.,0.,0., .68,0.,0.,0.,0.,0.,0.,.68,-.87,0.,0.,0.,0.,0.,.68,-.87,.15,.94,0., 0.,0.,.5,0.,0.,0.,0.,0.,0.,.68,0.,0.,0.,0.,0.,0.,.35,-.9,.48,0., 0.,0.,0.,.38,-.9,.57,.7,-.75,.2,.98,.5,0.,0.,0.,0.,0.,0.,.68,0., 0.,0.,0.,0.,0.,.35,-.72,0.,0.,0.,0.,0.,.38,-.63,.15,.88,0.,0.,0., .5,0.,0.,0.,0.,0.,0.,.68,0.,0.,0.,0.,0.,0.,.68,-.9,.33,0.,0.,0., 0.,.68,-.9,.33,.7,-.75,.2,1.04 }; static doublereal dt10x[112] /* was [7][4][4] */ = { .6,0.,0.,0., 0.,0.,0.,.5,0.,0.,0.,0.,0.,0.,.5,-.9,0.,0.,0.,0.,0.,.5,-.9,.3,.7, 0.,0.,0.,.6,0.,0.,0.,0.,0.,0.,.5,0.,0.,0.,0.,0.,0.,.3,.1,.5,0.,0., 0.,0.,.8,.1,-.6,.8,.3,-.3,.5,.6,0.,0.,0.,0.,0.,0.,.5,0.,0.,0.,0., 0.,0.,-.9,.1,.5,0.,0.,0.,0.,.7,.1,.3,.8,-.9,-.3,.5,.6,0.,0.,0.,0., 0.,0.,.5,0.,0.,0.,0.,0.,0.,.5,.3,0.,0.,0.,0.,0.,.5,.3,-.6,.8,0., 0.,0. }; static doublereal dt10y[112] /* was [7][4][4] */ = { .5,0.,0.,0., 0.,0.,0.,.6,0.,0.,0.,0.,0.,0.,.6,.1,0.,0.,0.,0.,0.,.6,.1,-.5,.8, 0.,0.,0.,.5,0.,0.,0.,0.,0.,0.,.6,0.,0.,0.,0.,0.,0.,-.5,-.9,.6,0., 0.,0.,0.,-.4,-.9,.9,.7,-.5,.2,.6,.5,0.,0.,0.,0.,0.,0.,.6,0.,0.,0., 0.,0.,0.,-.5,.6,0.,0.,0.,0.,0.,-.4,.9,-.5,.6,0.,0.,0.,.5,0.,0.,0., 0.,0.,0.,.6,0.,0.,0.,0.,0.,0.,.6,-.9,.1,0.,0.,0.,0.,.6,-.9,.1,.7, -.5,.2,.8 }; static doublereal ssize1[4] = { 0.,.3,1.6,3.2 }; static doublereal ssize2[28] /* was [14][2] */ = { 0.,0.,0.,0.,0., 0.,0.,0.,0.,0.,0.,0.,0.,0.,1.17,1.17,1.17,1.17,1.17,1.17,1.17, 1.17,1.17,1.17,1.17,1.17,1.17,1.17 }; static doublereal dpar[20] /* was [5][4] */ = { -2.,0.,0.,0.,0.,-1.,2., -3.,-4.,5.,0.,0.,2.,-3.,0.,1.,5.,2.,0.,-4. }; static struct { doublereal e_1[448]; } equiv_3 = {{ .6, 0., 0., 0., 0., 0., 0., .6, 0., 0., 0., 0., 0., 0., .6, 0., 0., 0., 0., 0., 0., .6, 0., 0., 0., 0., 0., 0., .6, 0., 0., 0., 0., 0., 0., -.8, 0., 0., 0., 0., 0., 0., -.9, 0., 0., 0., 0., 0., 0., 3.5, 0., 0., 0., 0., 0., 0., .6, .1, 0., 0., 0., 0., 0., -.8, 3.8, 0., 0., 0., 0., 0., -.9, 2.8, 0., 0., 0., 0., 0., 3.5, -.4, 0., 0., 0., 0., 0., .6, .1, -.5, .8, 0., 0., 0., -.8, 3.8, -2.2, -1.2, 0., 0., 0., -.9, 2.8, -1.4, -1.3, 0., 0., 0., 3.5, -.4, -2.2, 4.7, 0., 0., 0., .6, 0., 0., 0., 0., 0., 0., .6, 0., 0., 0., 0., 0., 0., .6, 0., 0., 0., 0., 0., 0., .6, 0., 0., 0., 0., 0., 0., .6, 0., 0., 0., 0., 0., 0., -.8, 0., 0., 0., 0., 0., 0., -.9, 0., 0., 0., 0., 0., 0., 3.5, 0., 0., 0., 0., 0., 0., .6, .1, -.5, 0., 0., 0., 0., 0., .1, -3., 0., 0., 0., 0., -.3, .1, -2., 0., 0., 0., 0., 3.3, .1, -2., 0., 0., 0., 0., .6, .1, -.5, .8, .9, -.3, -.4, -2., .1, 1.4, .8, .6, -.3, -2.8, -1.8, .1, 1.3, .8, 0., -.3, -1.9, 3.8, .1, -3.1, .8, 4.8, -.3, -1.5, .6, 0., 0., 0., 0., 0., 0., .6, 0., 0., 0., 0., 0., 0., .6, 0., 0., 0., 0., 0., 0., .6, 0., 0., 0., 0., 0., 0., .6, 0., 0., 0., 0., 0., 0., -.8, 0., 0., 0., 0., 0., 0., -.9, 0., 0., 0., 0., 0., 0., 3.5, 0., 0., 0., 0., 0., 0., .6, .1, -.5, 0., 0., 0., 0., 4.8, .1, -3., 0., 0., 0., 0., 3.3, .1, -2., 0., 0., 0., 0., 2.1, .1, -2., 0., 0., 0., 0., .6, .1, -.5, .8, .9, -.3, -.4, -1.6, .1, -2.2, .8, 5.4, -.3, -2.8, -1.5, .1, -1.4, .8, 3.6, -.3, -1.9, 3.7, .1, -2.2, .8, 3.6, -.3, -1.5, .6, 0., 0., 0., 0., 0., 0., .6, 0., 0., 0., 0., 0., 0., .6, 0., 0., 0., 0., 0., 0., .6, 0., 0., 0., 0., 0., 0., .6, 0., 0., 0., 0., 0., 0., -.8, 0., 0., 0., 0., 0., 0., -.9, 0., 0., 0., 0., 0., 0., 3.5, 0., 0., 0., 0., 0., 0., .6, .1, 0., 0., 0., 0., 0., -.8, -1., 0., 0., 0., 0., 0., -.9, -.8, 0., 0., 0., 0., 0., 3.5, .8, 0., 0., 0., 0., 0., .6, .1, -.5, .8, 0., 0., 0., -.8, -1., 1.4, -1.6, 0., 0., 0., -.9, -.8, 1.3, -1.6, 0., 0., 0., 3.5, .8, -3.1, 4.8, 0., 0., 0. }}; static struct { doublereal e_1[448]; } equiv_7 = {{ .5, 0., 0., 0., 0., 0., 0., .5, 0., 0., 0., 0., 0., 0., .5, 0., 0., 0., 0., 0., 0., .5, 0., 0., 0., 0., 0., 0., .5, 0., 0., 0., 0., 0., 0., .7, 0., 0., 0., 0., 0., 0., 1.7, 0., 0., 0., 0., 0., 0., -2.6, 0., 0., 0., 0., 0., 0., .5, -.9, 0., 0., 0., 0., 0., .7, -4.8, 0., 0., 0., 0., 0., 1.7, -.7, 0., 0., 0., 0., 0., -2.6, 3.5, 0., 0., 0., 0., 0., .5, -.9, .3, .7, 0., 0., 0., .7, -4.8, 3., 1.1, 0., 0., 0., 1.7, -.7, -.7, 2.3, 0., 0., 0., -2.6, 3.5, -.7, -3.6, 0., 0., 0., .5, 0., 0., 0., 0., 0., 0., .5, 0., 0., 0., 0., 0., 0., .5, 0., 0., 0., 0., 0., 0., .5, 0., 0., 0., 0., 0., 0., .5, 0., 0., 0., 0., 0., 0., .7, 0., 0., 0., 0., 0., 0., 1.7, 0., 0., 0., 0., 0., 0., -2.6, 0., 0., 0., 0., 0., 0., .5, -.9, .3, 0., 0., 0., 0., 4., -.9, -.3, 0., 0., 0., 0., -.5, -.9, 1.5, 0., 0., 0., 0., -1.5, -.9, -1.8, 0., 0., 0., 0., .5, -.9, .3, .7, -.6, .2, .8, 3.7, -.9, -1.2, .7, -1.5, .2, 2.2, -.3, -.9, 2.1, .7, -1.6, .2, 2., -1.6, -.9, -2.1, .7, 2.9, .2, -3.8, .5, 0., 0., 0., 0., 0., 0., .5, 0., 0., 0., 0., 0., 0., .5, 0., 0., 0., 0., 0., 0., .5, 0., 0., 0., 0., 0., 0., .5, 0., 0., 0., 0., 0., 0., .7, 0., 0., 0., 0., 0., 0., 1.7, 0., 0., 0., 0., 0., 0., -2.6, 0., 0., 0., 0., 0., 0., .5, -.9, 0., 0., 0., 0., 0., 4., -6.3, 0., 0., 0., 0., 0., -.5, .3, 0., 0., 0., 0., 0., -1.5, 3., 0., 0., 0., 0., 0., .5, -.9, .3, .7, 0., 0., 0., 3.7, -7.2, 3., 1.7, 0., 0., 0., -.3, .9, -.7, 1.9, 0., 0., 0., -1.6, 2.7, -.7, -3.4, 0., 0., 0., .5, 0., 0., 0., 0., 0., 0., .5, 0., 0., 0., 0., 0., 0., .5, 0., 0., 0., 0., 0., 0., .5, 0., 0., 0., 0., 0., 0., .5, 0., 0., 0., 0., 0., 0., .7, 0., 0., 0., 0., 0., 0., 1.7, 0., 0., 0., 0., 0., 0., -2.6, 0., 0., 0., 0., 0., 0., .5, -.9, .3, 0., 0., 0., 0., .7, -.9, 1.2, 0., 0., 0., 0., 1.7, -.9, .5, 0., 0., 0., 0., -2.6, -.9, -1.3, 0., 0., 0., 0., .5, -.9, .3, .7, -.6, .2, .8, .7, -.9, 1.2, .7, -1.5, .2, 1.6, 1.7, -.9, .5, .7, -1.6, .2, 2.4, -2.6, -.9, -1.3, .7, 2.9, .2, -4. }}; /* System generated locals */ integer i__1; real r__1, r__2, r__3; doublereal d__1; /* Builtin functions */ integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_wsle(void); /* Subroutine */ int s_stop(char *, ftnlen); /* Local variables */ integer i__, j; extern /* Subroutine */ int testdsdot_(real *, real *, real *, real *); integer ki, kn, mx, my; doublereal sx[7], sy[7]; integer kni; doublereal stx[7], sty[7]; extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, integer *); integer kpar, lenx, leny; #define dt19x ((doublereal *)&equiv_3) #define dt19y ((doublereal *)&equiv_7) doublereal dtemp[5]; #define dt19xa ((doublereal *)&equiv_3) #define dt19xb ((doublereal *)&equiv_3 + 112) #define dt19xc ((doublereal *)&equiv_3 + 224) #define dt19xd ((doublereal *)&equiv_3 + 336) #define dt19ya ((doublereal *)&equiv_7) #define dt19yb ((doublereal *)&equiv_7 + 112) #define dt19yc ((doublereal *)&equiv_7 + 224) #define dt19yd ((doublereal *)&equiv_7 + 336) extern doublereal dsdot_(integer *, real *, integer *, real *, integer *); extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); integer ksize; extern /* Subroutine */ int daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), drotm_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *), dswap_( integer *, doublereal *, integer *, doublereal *, integer *); doublereal ssize[7]; extern /* Subroutine */ int stest_(integer *, doublereal *, doublereal *, doublereal *, doublereal *), stest1_(doublereal *, doublereal *, doublereal *, doublereal *); /* Fortran I/O blocks */ static cilist io___80 = { 0, 6, 0, 0, 0 }; /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* **** FGVZ: We have to add separate REAL arrays for DSDOT() because */ /* **** REAL() on an array argument does not translate via f2c. */ /* FOR DROTM */ /* TRUE X RESULTS F0R ROTATIONS DROTM */ /* TRUE Y RESULTS FOR ROTATIONS DROTM */ /* .. Executable Statements .. */ for (ki = 1; ki <= 4; ++ki) { combla_1.incx = incxs[ki - 1]; combla_1.incy = incys[ki - 1]; mx = abs(combla_1.incx); my = abs(combla_1.incy); for (kn = 1; kn <= 4; ++kn) { combla_1.n = ns[kn - 1]; ksize = min(2,kn); lenx = lens[kn + (mx << 2) - 5]; leny = lens[kn + (my << 2) - 5]; /* .. Initialize all argument arrays .. */ for (i__ = 1; i__ <= 7; ++i__) { sx[i__ - 1] = dx1[i__ - 1]; sy[i__ - 1] = dy1[i__ - 1]; /* **** FGVZ: We have to add a loop to initialize separate REAL arrays */ /* **** for DSDOT() because REAL() on an array argument does not */ /* **** translate via f2c. */ sx1[i__ - 1] = dx1[i__ - 1]; sy1[i__ - 1] = dy1[i__ - 1]; /* L20: */ } if (combla_1.icase == 1) { /* .. DDOT .. */ d__1 = ddot_(&combla_1.n, sx, &combla_1.incx, sy, & combla_1.incy); stest1_(&d__1, &dt7[kn + (ki << 2) - 5], &ssize1[kn - 1], sfac); } else if (combla_1.icase == 2) { /* .. DAXPY .. */ daxpy_(&combla_1.n, &sa, sx, &combla_1.incx, sy, & combla_1.incy); i__1 = leny; for (j = 1; j <= i__1; ++j) { sty[j - 1] = dt8[j + (kn + (ki << 2)) * 7 - 36]; /* L40: */ } stest_(&leny, sy, sty, &ssize2[ksize * 14 - 14], sfac); } else if (combla_1.icase == 5) { /* .. DCOPY .. */ for (i__ = 1; i__ <= 7; ++i__) { sty[i__ - 1] = dt10y[i__ + (kn + (ki << 2)) * 7 - 36]; /* L60: */ } dcopy_(&combla_1.n, sx, &combla_1.incx, sy, &combla_1.incy); stest_(&leny, sy, sty, ssize2, &c_b35); } else if (combla_1.icase == 6) { /* .. DSWAP .. */ dswap_(&combla_1.n, sx, &combla_1.incx, sy, &combla_1.incy); for (i__ = 1; i__ <= 7; ++i__) { stx[i__ - 1] = dt10x[i__ + (kn + (ki << 2)) * 7 - 36]; sty[i__ - 1] = dt10y[i__ + (kn + (ki << 2)) * 7 - 36]; /* L80: */ } stest_(&lenx, sx, stx, ssize2, &c_b35); stest_(&leny, sy, sty, ssize2, &c_b35); } else if (combla_1.icase == 12) { /* .. DROTM .. */ kni = kn + (ki - 1 << 2); for (kpar = 1; kpar <= 4; ++kpar) { for (i__ = 1; i__ <= 7; ++i__) { sx[i__ - 1] = dx1[i__ - 1]; sy[i__ - 1] = dy1[i__ - 1]; stx[i__ - 1] = dt19x[i__ + (kpar + (kni << 2)) * 7 - 36]; sty[i__ - 1] = dt19y[i__ + (kpar + (kni << 2)) * 7 - 36]; } for (i__ = 1; i__ <= 5; ++i__) { dtemp[i__ - 1] = dpar[i__ + kpar * 5 - 6]; } i__1 = lenx; for (i__ = 1; i__ <= i__1; ++i__) { ssize[i__ - 1] = stx[i__ - 1]; } /* SEE REMARK ABOVE ABOUT DT11X(1,2,7) */ /* AND DT11X(5,3,8). */ if (kpar == 2 && kni == 7) { ssize[0] = 2.4; } if (kpar == 3 && kni == 8) { ssize[4] = 1.8; } drotm_(&combla_1.n, sx, &combla_1.incx, sy, & combla_1.incy, dtemp); stest_(&lenx, sx, stx, ssize, sfac); stest_(&leny, sy, sty, sty, sfac); } } else if (combla_1.icase == 13) { /* .. DSDOT .. */ /* **** CALL TESTDSDOT(REAL(DSDOT(N,REAL(SX),INCX,REAL(SY),INCY)), */ r__1 = (real) dsdot_(&combla_1.n, sx1, &combla_1.incx, sy1, & combla_1.incy); r__2 = (real) dt7[kn + (ki << 2) - 5]; r__3 = (real) ssize1[kn - 1]; testdsdot_(&r__1, &r__2, &r__3, &c_b39); } else { s_wsle(&io___80); do_lio(&c__9, &c__1, " Shouldn't be here in CHECK2", (ftnlen) 28); e_wsle(); s_stop("", (ftnlen)0); } /* L100: */ } /* L120: */ } return 0; } /* check2_ */ #undef dt19yd #undef dt19yc #undef dt19yb #undef dt19ya #undef dt19xd #undef dt19xc #undef dt19xb #undef dt19xa #undef dt19y #undef dt19x /* Subroutine */ int check3_(doublereal *sfac) { /* Initialized data */ static integer incxs[4] = { 1,2,-2,-1 }; static integer incys[4] = { 1,-2,1,-2 }; static integer lens[8] /* was [4][2] */ = { 1,1,2,4,1,1,3,7 }; static integer ns[4] = { 0,1,2,4 }; static doublereal dx1[7] = { .6,.1,-.5,.8,.9,-.3,-.4 }; static doublereal dy1[7] = { .5,-.9,.3,.7,-.6,.2,.8 }; static doublereal sc = .8; static doublereal ss = .6; static doublereal dt9x[112] /* was [7][4][4] */ = { .6,0.,0.,0.,0.,0.,0., .78,0.,0.,0.,0.,0.,0.,.78,-.46,0.,0.,0.,0.,0.,.78,-.46,-.22,1.06, 0.,0.,0.,.6,0.,0.,0.,0.,0.,0.,.78,0.,0.,0.,0.,0.,0.,.66,.1,-.1,0., 0.,0.,0.,.96,.1,-.76,.8,.9,-.3,-.02,.6,0.,0.,0.,0.,0.,0.,.78,0., 0.,0.,0.,0.,0.,-.06,.1,-.1,0.,0.,0.,0.,.9,.1,-.22,.8,.18,-.3,-.02, .6,0.,0.,0.,0.,0.,0.,.78,0.,0.,0.,0.,0.,0.,.78,.26,0.,0.,0.,0.,0., .78,.26,-.76,1.12,0.,0.,0. }; static doublereal dt9y[112] /* was [7][4][4] */ = { .5,0.,0.,0.,0.,0.,0., .04,0.,0.,0.,0.,0.,0.,.04,-.78,0.,0.,0.,0.,0.,.04,-.78,.54,.08,0., 0.,0.,.5,0.,0.,0.,0.,0.,0.,.04,0.,0.,0.,0.,0.,0.,.7,-.9,-.12,0., 0.,0.,0.,.64,-.9,-.3,.7,-.18,.2,.28,.5,0.,0.,0.,0.,0.,0.,.04,0., 0.,0.,0.,0.,0.,.7,-1.08,0.,0.,0.,0.,0.,.64,-1.26,.54,.2,0.,0.,0., .5,0.,0.,0.,0.,0.,0.,.04,0.,0.,0.,0.,0.,0.,.04,-.9,.18,0.,0.,0., 0.,.04,-.9,.18,.7,-.18,.2,.16 }; static doublereal ssize2[28] /* was [14][2] */ = { 0.,0.,0.,0.,0., 0.,0.,0.,0.,0.,0.,0.,0.,0.,1.17,1.17,1.17,1.17,1.17,1.17,1.17, 1.17,1.17,1.17,1.17,1.17,1.17,1.17 }; /* Builtin functions */ integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_wsle(void); /* Subroutine */ int s_stop(char *, ftnlen); /* Local variables */ integer i__, k, ki, kn, mx, my; doublereal sx[7], sy[7], stx[7], sty[7]; integer lenx, leny; doublereal mwpc[11]; extern /* Subroutine */ int drot_(integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *); integer mwpn[11]; doublereal mwps[11], mwpx[5], mwpy[5]; integer ksize; doublereal copyx[5], copyy[5]; extern /* Subroutine */ int stest_(integer *, doublereal *, doublereal *, doublereal *, doublereal *); doublereal mwptx[55] /* was [11][5] */, mwpty[55] /* was [11][5] */; integer mwpinx[11], mwpiny[11]; doublereal mwpstx[5], mwpsty[5]; /* Fortran I/O blocks */ static cilist io___104 = { 0, 6, 0, 0, 0 }; /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* .. Executable Statements .. */ for (ki = 1; ki <= 4; ++ki) { combla_1.incx = incxs[ki - 1]; combla_1.incy = incys[ki - 1]; mx = abs(combla_1.incx); my = abs(combla_1.incy); for (kn = 1; kn <= 4; ++kn) { combla_1.n = ns[kn - 1]; ksize = min(2,kn); lenx = lens[kn + (mx << 2) - 5]; leny = lens[kn + (my << 2) - 5]; if (combla_1.icase == 4) { /* .. DROT .. */ for (i__ = 1; i__ <= 7; ++i__) { sx[i__ - 1] = dx1[i__ - 1]; sy[i__ - 1] = dy1[i__ - 1]; stx[i__ - 1] = dt9x[i__ + (kn + (ki << 2)) * 7 - 36]; sty[i__ - 1] = dt9y[i__ + (kn + (ki << 2)) * 7 - 36]; /* L20: */ } drot_(&combla_1.n, sx, &combla_1.incx, sy, &combla_1.incy, & sc, &ss); stest_(&lenx, sx, stx, &ssize2[ksize * 14 - 14], sfac); stest_(&leny, sy, sty, &ssize2[ksize * 14 - 14], sfac); } else { s_wsle(&io___104); do_lio(&c__9, &c__1, " Shouldn't be here in CHECK3", (ftnlen) 28); e_wsle(); s_stop("", (ftnlen)0); } /* L40: */ } /* L60: */ } mwpc[0] = 1.; for (i__ = 2; i__ <= 11; ++i__) { mwpc[i__ - 1] = 0.; /* L80: */ } mwps[0] = 0.; for (i__ = 2; i__ <= 6; ++i__) { mwps[i__ - 1] = 1.; /* L100: */ } for (i__ = 7; i__ <= 11; ++i__) { mwps[i__ - 1] = -1.; /* L120: */ } mwpinx[0] = 1; mwpinx[1] = 1; mwpinx[2] = 1; mwpinx[3] = -1; mwpinx[4] = 1; mwpinx[5] = -1; mwpinx[6] = 1; mwpinx[7] = 1; mwpinx[8] = -1; mwpinx[9] = 1; mwpinx[10] = -1; mwpiny[0] = 1; mwpiny[1] = 1; mwpiny[2] = -1; mwpiny[3] = -1; mwpiny[4] = 2; mwpiny[5] = 1; mwpiny[6] = 1; mwpiny[7] = -1; mwpiny[8] = -1; mwpiny[9] = 2; mwpiny[10] = 1; for (i__ = 1; i__ <= 11; ++i__) { mwpn[i__ - 1] = 5; /* L140: */ } mwpn[4] = 3; mwpn[9] = 3; for (i__ = 1; i__ <= 5; ++i__) { mwpx[i__ - 1] = (doublereal) i__; mwpy[i__ - 1] = (doublereal) i__; mwptx[i__ * 11 - 11] = (doublereal) i__; mwpty[i__ * 11 - 11] = (doublereal) i__; mwptx[i__ * 11 - 10] = (doublereal) i__; mwpty[i__ * 11 - 10] = (doublereal) (-i__); mwptx[i__ * 11 - 9] = (doublereal) (6 - i__); mwpty[i__ * 11 - 9] = (doublereal) (i__ - 6); mwptx[i__ * 11 - 8] = (doublereal) i__; mwpty[i__ * 11 - 8] = (doublereal) (-i__); mwptx[i__ * 11 - 6] = (doublereal) (6 - i__); mwpty[i__ * 11 - 6] = (doublereal) (i__ - 6); mwptx[i__ * 11 - 5] = (doublereal) (-i__); mwpty[i__ * 11 - 5] = (doublereal) i__; mwptx[i__ * 11 - 4] = (doublereal) (i__ - 6); mwpty[i__ * 11 - 4] = (doublereal) (6 - i__); mwptx[i__ * 11 - 3] = (doublereal) (-i__); mwpty[i__ * 11 - 3] = (doublereal) i__; mwptx[i__ * 11 - 1] = (doublereal) (i__ - 6); mwpty[i__ * 11 - 1] = (doublereal) (6 - i__); /* L160: */ } mwptx[4] = 1.; mwptx[15] = 3.; mwptx[26] = 5.; mwptx[37] = 4.; mwptx[48] = 5.; mwpty[4] = -1.; mwpty[15] = 2.; mwpty[26] = -2.; mwpty[37] = 4.; mwpty[48] = -3.; mwptx[9] = -1.; mwptx[20] = -3.; mwptx[31] = -5.; mwptx[42] = 4.; mwptx[53] = 5.; mwpty[9] = 1.; mwpty[20] = 2.; mwpty[31] = 2.; mwpty[42] = 4.; mwpty[53] = 3.; for (i__ = 1; i__ <= 11; ++i__) { combla_1.incx = mwpinx[i__ - 1]; combla_1.incy = mwpiny[i__ - 1]; for (k = 1; k <= 5; ++k) { copyx[k - 1] = mwpx[k - 1]; copyy[k - 1] = mwpy[k - 1]; mwpstx[k - 1] = mwptx[i__ + k * 11 - 12]; mwpsty[k - 1] = mwpty[i__ + k * 11 - 12]; /* L180: */ } drot_(&mwpn[i__ - 1], copyx, &combla_1.incx, copyy, &combla_1.incy, & mwpc[i__ - 1], &mwps[i__ - 1]); stest_(&c__5, copyx, mwpstx, mwpstx, sfac); stest_(&c__5, copyy, mwpsty, mwpsty, sfac); /* L200: */ } return 0; } /* check3_ */ /* Subroutine */ int stest_(integer *len, doublereal *scomp, doublereal * strue, doublereal *ssize, doublereal *sfac) { /* Format strings */ static char fmt_99999[] = "(\002 F" "AIL\002)"; static char fmt_99998[] = "(/\002 CASE N INCX INCY I " " \002,\002 COMP(I) TRUE(I) " " DIFFERENCE\002,\002 SIZE(I)\002,/1x)"; static char fmt_99997[] = "(1x,i4,i3,2i5,i3,2d36.8,2d12.4)"; /* System generated locals */ integer i__1; doublereal d__1, d__2; /* Builtin functions */ integer s_wsfe(cilist *), e_wsfe(void), do_fio(integer *, char *, ftnlen); /* Local variables */ integer i__; doublereal sd; extern double d_epsilon_(doublereal *); /* Fortran I/O blocks */ static cilist io___121 = { 0, 6, 0, fmt_99999, 0 }; static cilist io___122 = { 0, 6, 0, fmt_99998, 0 }; static cilist io___123 = { 0, 6, 0, fmt_99997, 0 }; /* ********************************* STEST ************************** */ /* THIS SUBR COMPARES ARRAYS SCOMP() AND STRUE() OF LENGTH LEN TO */ /* SEE IF THE TERM BY TERM DIFFERENCES, MULTIPLIED BY SFAC, ARE */ /* NEGLIGIBLE. */ /* C. L. LAWSON, JPL, 1974 DEC 10 */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. Intrinsic Functions .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --ssize; --strue; --scomp; /* Function Body */ i__1 = *len; for (i__ = 1; i__ <= i__1; ++i__) { sd = scomp[i__] - strue[i__]; if ((d__2 = *sfac * sd, abs(d__2)) <= (d__1 = ssize[i__], abs(d__1)) * d_epsilon_(&c_b63)) { goto L40; } /* HERE SCOMP(I) IS NOT CLOSE TO STRUE(I). */ if (! combla_1.pass) { goto L20; } /* PRINT FAIL MESSAGE AND HEADER. */ combla_1.pass = FALSE_; s_wsfe(&io___121); e_wsfe(); s_wsfe(&io___122); e_wsfe(); L20: s_wsfe(&io___123); do_fio(&c__1, (char *)&combla_1.icase, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.incy, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&scomp[i__], (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&strue[i__], (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&sd, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&ssize[i__], (ftnlen)sizeof(doublereal)); e_wsfe(); L40: ; } return 0; } /* stest_ */ /* Subroutine */ int testdsdot_(real *scomp, real *strue, real *ssize, real * sfac) { /* Format strings */ static char fmt_99999[] = "(\002 F" "AIL\002)"; static char fmt_99998[] = "(/\002 CASE N INCX INCY " " \002,\002 COMP(I) TRUE(I) DIF" "FERENCE\002,\002 SIZE(I)\002,/1x)"; static char fmt_99997[] = "(1x,i4,i3,1i5,i3,2e36.8,2e12.4)"; /* System generated locals */ real r__1; /* Builtin functions */ integer s_wsfe(cilist *), e_wsfe(void), do_fio(integer *, char *, ftnlen); /* Local variables */ real sd; extern real s_epsilon_(); /* Fortran I/O blocks */ static cilist io___125 = { 0, 6, 0, fmt_99999, 0 }; static cilist io___126 = { 0, 6, 0, fmt_99998, 0 }; static cilist io___127 = { 0, 6, 0, fmt_99997, 0 }; /* ********************************* STEST ************************** */ /* THIS SUBR COMPARES ARRAYS SCOMP() AND STRUE() OF LENGTH LEN TO */ /* SEE IF THE TERM BY TERM DIFFERENCES, MULTIPLIED BY SFAC, ARE */ /* NEGLIGIBLE. */ /* C. L. LAWSON, JPL, 1974 DEC 10 */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. Intrinsic Functions .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ sd = *scomp - *strue; if ((r__1 = *sfac * sd, abs(r__1)) <= abs(*ssize) * s_epsilon_(&c_b81)) { goto L40; } /* HERE SCOMP(I) IS NOT CLOSE TO STRUE(I). */ if (! combla_1.pass) { goto L20; } /* PRINT FAIL MESSAGE AND HEADER. */ combla_1.pass = FALSE_; s_wsfe(&io___125); e_wsfe(); s_wsfe(&io___126); e_wsfe(); L20: s_wsfe(&io___127); do_fio(&c__1, (char *)&combla_1.icase, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.incy, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&(*scomp), (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&(*strue), (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&sd, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&(*ssize), (ftnlen)sizeof(real)); e_wsfe(); L40: return 0; } /* testdsdot_ */ /* Subroutine */ int stest1_(doublereal *scomp1, doublereal *strue1, doublereal *ssize, doublereal *sfac) { doublereal scomp[1], strue[1]; extern /* Subroutine */ int stest_(integer *, doublereal *, doublereal *, doublereal *, doublereal *); /* ************************* STEST1 ***************************** */ /* THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN */ /* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE */ /* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT. */ /* C.L. LAWSON, JPL, 1978 DEC 6 */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Arrays .. */ /* .. External Subroutines .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --ssize; /* Function Body */ scomp[0] = *scomp1; strue[0] = *strue1; stest_(&c__1, scomp, strue, &ssize[1], sfac); return 0; } /* stest1_ */ doublereal sdiff_(doublereal *sa, doublereal *sb) { /* System generated locals */ doublereal ret_val; /* ********************************* SDIFF ************************** */ /* COMPUTES DIFFERENCE OF TWO NUMBERS. C. L. LAWSON, JPL 1974 FEB 15 */ /* .. Scalar Arguments .. */ /* .. Executable Statements .. */ ret_val = *sa - *sb; return ret_val; } /* sdiff_ */ /* Subroutine */ int itest1_(integer *icomp, integer *itrue) { /* Format strings */ static char fmt_99999[] = "(\002 F" "AIL\002)"; static char fmt_99998[] = "(/\002 CASE N INCX INCY " " \002,\002 COMP TRUE " " DIFFERENCE\002,/1x)"; static char fmt_99997[] = "(1x,i4,i3,2i5,2i36,i12)"; /* Builtin functions */ integer s_wsfe(cilist *), e_wsfe(void), do_fio(integer *, char *, ftnlen); /* Local variables */ integer id; /* Fortran I/O blocks */ static cilist io___130 = { 0, 6, 0, fmt_99999, 0 }; static cilist io___131 = { 0, 6, 0, fmt_99998, 0 }; static cilist io___133 = { 0, 6, 0, fmt_99997, 0 }; /* ********************************* ITEST1 ************************* */ /* THIS SUBROUTINE COMPARES THE VARIABLES ICOMP AND ITRUE FOR */ /* EQUALITY. */ /* C. L. LAWSON, JPL, 1974 DEC 10 */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ if (*icomp == *itrue) { goto L40; } /* HERE ICOMP IS NOT EQUAL TO ITRUE. */ if (! combla_1.pass) { goto L20; } /* PRINT FAIL MESSAGE AND HEADER. */ combla_1.pass = FALSE_; s_wsfe(&io___130); e_wsfe(); s_wsfe(&io___131); e_wsfe(); L20: id = *icomp - *itrue; s_wsfe(&io___133); do_fio(&c__1, (char *)&combla_1.icase, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.incy, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&(*icomp), (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&(*itrue), (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&id, (ftnlen)sizeof(integer)); e_wsfe(); L40: return 0; } /* itest1_ */ /* Main program alias */ int dblat1_ () { main (); return 0; } blis-0.6.1/blastest/src/dblat2.c000066400000000000000000004540561360743507500164230ustar00rootroot00000000000000/* dblat2.f -- translated by f2c (version 20100827). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" /* Common Block Declarations */ union { struct { integer infot, noutc; logical ok, lerr; } _1; struct { integer infot, nout; logical ok, lerr; } _2; } infoc_; #define infoc_1 (infoc_._1) #define infoc_2 (infoc_._2) struct { char srnamt[6]; } srnamc_; #define srnamc_1 srnamc_ /* Table of constant values */ static integer c__9 = 9; static integer c__1 = 1; static integer c__3 = 3; static integer c__8 = 8; static integer c__5 = 5; static integer c__65 = 65; static integer c__7 = 7; static integer c__2 = 2; static doublereal c_b120 = 0.; static doublereal c_b128 = 1.; static logical c_true = TRUE_; static integer c_n1 = -1; static integer c__0 = 0; static logical c_false = FALSE_; /* > \brief \b DBLAT2 */ /* =========== DOCUMENTATION =========== */ /* Online html documentation available at */ /* http://www.netlib.org/lapack/explore-html/ */ /* Definition: */ /* =========== */ /* PROGRAM DBLAT2 */ /* > \par Purpose: */ /* ============= */ /* > */ /* > \verbatim */ /* > */ /* > Test program for the DOUBLE PRECISION Level 2 Blas. */ /* > */ /* > The program must be driven by a short data file. The first 18 records */ /* > of the file are read using list-directed input, the last 16 records */ /* > are read using the format ( A6, L2 ). An annotated example of a data */ /* > file can be obtained by deleting the first 3 characters from the */ /* > following 34 lines: */ /* > 'dblat2.out' NAME OF SUMMARY OUTPUT FILE */ /* > 6 UNIT NUMBER OF SUMMARY FILE */ /* > 'DBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE */ /* > -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) */ /* > F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. */ /* > F LOGICAL FLAG, T TO STOP ON FAILURES. */ /* > T LOGICAL FLAG, T TO TEST ERROR EXITS. */ /* > 16.0 THRESHOLD VALUE OF TEST RATIO */ /* > 6 NUMBER OF VALUES OF N */ /* > 0 1 2 3 5 9 VALUES OF N */ /* > 4 NUMBER OF VALUES OF K */ /* > 0 1 2 4 VALUES OF K */ /* > 4 NUMBER OF VALUES OF INCX AND INCY */ /* > 1 2 -1 -2 VALUES OF INCX AND INCY */ /* > 3 NUMBER OF VALUES OF ALPHA */ /* > 0.0 1.0 0.7 VALUES OF ALPHA */ /* > 3 NUMBER OF VALUES OF BETA */ /* > 0.0 1.0 0.9 VALUES OF BETAC */ /* > DGEMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > DGBMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > DSYMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > DSBMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > DSPMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > DTRMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > DTBMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > DTPMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > DTRSV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > DTBSV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > DTPSV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > DGER T PUT F FOR NO TEST. SAME COLUMNS. */ /* > DSYR T PUT F FOR NO TEST. SAME COLUMNS. */ /* > DSPR T PUT F FOR NO TEST. SAME COLUMNS. */ /* > DSYR2 T PUT F FOR NO TEST. SAME COLUMNS. */ /* > DSPR2 T PUT F FOR NO TEST. SAME COLUMNS. */ /* > */ /* > Further Details */ /* > =============== */ /* > */ /* > See: */ /* > */ /* > Dongarra J. J., Du Croz J. J., Hammarling S. and Hanson R. J.. */ /* > An extended set of Fortran Basic Linear Algebra Subprograms. */ /* > */ /* > Technical Memoranda Nos. 41 (revision 3) and 81, Mathematics */ /* > and Computer Science Division, Argonne National Laboratory, */ /* > 9700 South Cass Avenue, Argonne, Illinois 60439, US. */ /* > */ /* > Or */ /* > */ /* > NAG Technical Reports TR3/87 and TR4/87, Numerical Algorithms */ /* > Group Ltd., NAG Central Office, 256 Banbury Road, Oxford */ /* > OX2 7DE, UK, and Numerical Algorithms Group Inc., 1101 31st */ /* > Street, Suite 100, Downers Grove, Illinois 60515-1263, USA. */ /* > */ /* > */ /* > -- Written on 10-August-1987. */ /* > Richard Hanson, Sandia National Labs. */ /* > Jeremy Du Croz, NAG Central Office. */ /* > */ /* > 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers */ /* > can be run multiple times without deleting generated */ /* > output files (susan) */ /* > \endverbatim */ /* Authors: */ /* ======== */ /* > \author Univ. of Tennessee */ /* > \author Univ. of California Berkeley */ /* > \author Univ. of Colorado Denver */ /* > \author NAG Ltd. */ /* > \date April 2012 */ /* > \ingroup double_blas_testing */ /* ===================================================================== */ /* Main program */ int main(void) { /* Initialized data */ static char snames[6*16] = "DGEMV " "DGBMV " "DSYMV " "DSBMV " "DSPMV " "DTRMV " "DTBMV " "DTPMV " "DTRSV " "DTBSV " "DTPSV " "DGER " "DSYR " "DSPR " "DSYR2 " "DSPR2 "; /* Format strings */ static char fmt_9997[] = "(\002 NUMBER OF VALUES OF \002,a,\002 IS LESS " "THAN 1 OR GREATER \002,\002THAN \002,i2)"; static char fmt_9996[] = "(\002 VALUE OF N IS LESS THAN 0 OR GREATER THA" "N \002,i2)"; static char fmt_9995[] = "(\002 VALUE OF K IS LESS THAN 0\002)"; static char fmt_9994[] = "(\002 ABSOLUTE VALUE OF INCX OR INCY IS 0 OR G" "REATER THAN \002,i2)"; static char fmt_9993[] = "(\002 TESTS OF THE DOUBLE PRECISION LEVEL 2 BL" "AS\002,//\002 THE F\002,\002OLLOWING PARAMETER VALUES WILL BE US" "ED:\002)"; static char fmt_9992[] = "(\002 FOR N \002,9i6)"; static char fmt_9991[] = "(\002 FOR K \002,7i6)"; static char fmt_9990[] = "(\002 FOR INCX AND INCY \002,7i6)"; static char fmt_9989[] = "(\002 FOR ALPHA \002,7f6.1)"; static char fmt_9988[] = "(\002 FOR BETA \002,7f6.1)"; static char fmt_9980[] = "(\002 ERROR-EXITS WILL NOT BE TESTED\002)"; static char fmt_9999[] = "(\002 ROUTINES PASS COMPUTATIONAL TESTS IF TES" "T RATIO IS LES\002,\002S THAN\002,f8.2)"; static char fmt_9984[] = "(a6,l2)"; static char fmt_9986[] = "(\002 SUBPROGRAM NAME \002,a6,\002 NOT RECOGNI" "ZED\002,/\002 ******* T\002,\002ESTS ABANDONED *******\002)"; static char fmt_9998[] = "(\002 RELATIVE MACHINE PRECISION IS TAKEN TO" " BE\002,1p,d9.1)"; static char fmt_9985[] = "(\002 ERROR IN DMVCH - IN-LINE DOT PRODUCTS A" "RE BEING EVALU\002,\002ATED WRONGLY.\002,/\002 DMVCH WAS CALLED " "WITH TRANS = \002,a1,\002 AND RETURNED SAME = \002,l1,\002 AND E" "RR = \002,f12.3,\002.\002,/\002 THIS MAY BE DUE TO FAULTS IN THE" " ARITHMETIC OR THE COMPILER.\002,/\002 ******* TESTS ABANDONED *" "******\002)"; static char fmt_9983[] = "(1x,a6,\002 WAS NOT TESTED\002)"; static char fmt_9982[] = "(/\002 END OF TESTS\002)"; static char fmt_9981[] = "(/\002 ******* FATAL ERROR - TESTS ABANDONED *" "******\002)"; static char fmt_9987[] = "(\002 AMEND DATA FILE OR INCREASE ARRAY SIZES " "IN PROGRAM\002,/\002 ******* TESTS ABANDONED *******\002)"; /* System generated locals */ integer i__1, i__2, i__3; olist o__1; cllist cl__1; /* Builtin functions */ integer s_rsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_rsle(void), f_open(olist *), s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), s_wsle(cilist *), e_wsle(void), s_rsfe(cilist *), e_rsfe(void), s_cmp(const char *, const char *, ftnlen, ftnlen); /* Subroutine */ int s_stop(char *, ftnlen); integer f_clos(cllist *); /* Subroutine */ int s_copy(char *, const char *, ftnlen, ftnlen); /* Local variables */ doublereal a[4225] /* was [65][65] */, g[65]; integer i__, j, n; doublereal x[65], y[65], z__[130], aa[4225]; integer kb[7]; doublereal as[4225], xs[130], ys[130], yt[65], xx[130], yy[130], alf[7]; extern logical lde_(doublereal *, doublereal *, integer *); integer inc[7], nkb; doublereal bet[7], eps, err; integer nalf, idim[9]; logical same; integer ninc, nbet, ntra; logical rewi; integer nout; extern /* Subroutine */ int dchk1_(char *, doublereal *, doublereal *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, ftnlen), dchk2_(char *, doublereal *, doublereal *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, ftnlen), dchk3_(char *, doublereal *, doublereal *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, ftnlen), dchk4_(char *, doublereal *, doublereal *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, doublereal *, integer *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, ftnlen), dchk5_(char *, doublereal *, doublereal *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, doublereal *, integer *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, ftnlen), dchk6_(char *, doublereal *, doublereal *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, doublereal *, integer *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, ftnlen), dchke_(integer *, char *, integer *, ftnlen); logical fatal, trace; integer nidim; extern /* Subroutine */ int dmvch_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen); char snaps[32], trans[1]; integer isnum; logical ltest[16], sfatal; char snamet[6]; doublereal thresh; logical ltestt, tsterr; char summry[32]; extern double d_epsilon_(doublereal *); /* Fortran I/O blocks */ static cilist io___2 = { 0, 5, 0, 0, 0 }; static cilist io___4 = { 0, 5, 0, 0, 0 }; static cilist io___6 = { 0, 5, 0, 0, 0 }; static cilist io___8 = { 0, 5, 0, 0, 0 }; static cilist io___11 = { 0, 5, 0, 0, 0 }; static cilist io___13 = { 0, 5, 0, 0, 0 }; static cilist io___15 = { 0, 5, 0, 0, 0 }; static cilist io___17 = { 0, 5, 0, 0, 0 }; static cilist io___19 = { 0, 5, 0, 0, 0 }; static cilist io___21 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___22 = { 0, 5, 0, 0, 0 }; static cilist io___25 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___26 = { 0, 5, 0, 0, 0 }; static cilist io___28 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___29 = { 0, 5, 0, 0, 0 }; static cilist io___31 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___32 = { 0, 5, 0, 0, 0 }; static cilist io___34 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___35 = { 0, 5, 0, 0, 0 }; static cilist io___37 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___38 = { 0, 5, 0, 0, 0 }; static cilist io___40 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___41 = { 0, 5, 0, 0, 0 }; static cilist io___43 = { 0, 5, 0, 0, 0 }; static cilist io___45 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___46 = { 0, 5, 0, 0, 0 }; static cilist io___48 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___49 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___50 = { 0, 0, 0, fmt_9991, 0 }; static cilist io___51 = { 0, 0, 0, fmt_9990, 0 }; static cilist io___52 = { 0, 0, 0, fmt_9989, 0 }; static cilist io___53 = { 0, 0, 0, fmt_9988, 0 }; static cilist io___54 = { 0, 0, 0, 0, 0 }; static cilist io___55 = { 0, 0, 0, fmt_9980, 0 }; static cilist io___56 = { 0, 0, 0, 0, 0 }; static cilist io___57 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___58 = { 0, 0, 0, 0, 0 }; static cilist io___60 = { 0, 5, 1, fmt_9984, 0 }; static cilist io___63 = { 0, 0, 0, fmt_9986, 0 }; static cilist io___65 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___78 = { 0, 0, 0, fmt_9985, 0 }; static cilist io___79 = { 0, 0, 0, fmt_9985, 0 }; static cilist io___81 = { 0, 0, 0, 0, 0 }; static cilist io___82 = { 0, 0, 0, fmt_9983, 0 }; static cilist io___83 = { 0, 0, 0, 0, 0 }; static cilist io___90 = { 0, 0, 0, fmt_9982, 0 }; static cilist io___91 = { 0, 0, 0, fmt_9981, 0 }; static cilist io___92 = { 0, 0, 0, fmt_9987, 0 }; /* -- Reference BLAS test routine (version 3.4.1) -- */ /* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* April 2012 */ /* ===================================================================== */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* .. Executable Statements .. */ /* Read name and unit number for summary output file and open file. */ s_rsle(&io___2); do_lio(&c__9, &c__1, summry, (ftnlen)32); e_rsle(); s_rsle(&io___4); do_lio(&c__3, &c__1, (char *)&nout, (ftnlen)sizeof(integer)); e_rsle(); o__1.oerr = 0; o__1.ounit = nout; o__1.ofnmlen = 32; o__1.ofnm = summry; o__1.orl = 0; o__1.osta = "UNKNOWN"; o__1.oacc = 0; o__1.ofm = 0; o__1.oblnk = 0; f_open(&o__1); infoc_1.noutc = nout; /* Read name and unit number for snapshot output file and open file. */ s_rsle(&io___6); do_lio(&c__9, &c__1, snaps, (ftnlen)32); e_rsle(); s_rsle(&io___8); do_lio(&c__3, &c__1, (char *)&ntra, (ftnlen)sizeof(integer)); e_rsle(); trace = ntra >= 0; if (trace) { o__1.oerr = 0; o__1.ounit = ntra; o__1.ofnmlen = 32; o__1.ofnm = snaps; o__1.orl = 0; o__1.osta = "UNKNOWN"; o__1.oacc = 0; o__1.ofm = 0; o__1.oblnk = 0; f_open(&o__1); } /* Read the flag that directs rewinding of the snapshot file. */ s_rsle(&io___11); do_lio(&c__8, &c__1, (char *)&rewi, (ftnlen)sizeof(logical)); e_rsle(); rewi = rewi && trace; /* Read the flag that directs stopping on any failure. */ s_rsle(&io___13); do_lio(&c__8, &c__1, (char *)&sfatal, (ftnlen)sizeof(logical)); e_rsle(); /* Read the flag that indicates whether error exits are to be tested. */ s_rsle(&io___15); do_lio(&c__8, &c__1, (char *)&tsterr, (ftnlen)sizeof(logical)); e_rsle(); /* Read the threshold value of the test ratio */ s_rsle(&io___17); do_lio(&c__5, &c__1, (char *)&thresh, (ftnlen)sizeof(doublereal)); e_rsle(); /* Read and check the parameter values for the tests. */ /* Values of N */ s_rsle(&io___19); do_lio(&c__3, &c__1, (char *)&nidim, (ftnlen)sizeof(integer)); e_rsle(); if (nidim < 1 || nidim > 9) { io___21.ciunit = nout; s_wsfe(&io___21); do_fio(&c__1, "N", (ftnlen)1); do_fio(&c__1, (char *)&c__9, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } s_rsle(&io___22); i__1 = nidim; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&idim[i__ - 1], (ftnlen)sizeof(integer)); } e_rsle(); i__1 = nidim; for (i__ = 1; i__ <= i__1; ++i__) { if (idim[i__ - 1] < 0 || idim[i__ - 1] > 65) { io___25.ciunit = nout; s_wsfe(&io___25); do_fio(&c__1, (char *)&c__65, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } /* L10: */ } /* Values of K */ s_rsle(&io___26); do_lio(&c__3, &c__1, (char *)&nkb, (ftnlen)sizeof(integer)); e_rsle(); if (nkb < 1 || nkb > 7) { io___28.ciunit = nout; s_wsfe(&io___28); do_fio(&c__1, "K", (ftnlen)1); do_fio(&c__1, (char *)&c__7, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } s_rsle(&io___29); i__1 = nkb; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&kb[i__ - 1], (ftnlen)sizeof(integer)); } e_rsle(); i__1 = nkb; for (i__ = 1; i__ <= i__1; ++i__) { if (kb[i__ - 1] < 0) { io___31.ciunit = nout; s_wsfe(&io___31); e_wsfe(); goto L230; } /* L20: */ } /* Values of INCX and INCY */ s_rsle(&io___32); do_lio(&c__3, &c__1, (char *)&ninc, (ftnlen)sizeof(integer)); e_rsle(); if (ninc < 1 || ninc > 7) { io___34.ciunit = nout; s_wsfe(&io___34); do_fio(&c__1, "INCX AND INCY", (ftnlen)13); do_fio(&c__1, (char *)&c__7, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } s_rsle(&io___35); i__1 = ninc; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&inc[i__ - 1], (ftnlen)sizeof(integer)); } e_rsle(); i__1 = ninc; for (i__ = 1; i__ <= i__1; ++i__) { if (inc[i__ - 1] == 0 || (i__2 = inc[i__ - 1], abs(i__2)) > 2) { io___37.ciunit = nout; s_wsfe(&io___37); do_fio(&c__1, (char *)&c__2, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } /* L30: */ } /* Values of ALPHA */ s_rsle(&io___38); do_lio(&c__3, &c__1, (char *)&nalf, (ftnlen)sizeof(integer)); e_rsle(); if (nalf < 1 || nalf > 7) { io___40.ciunit = nout; s_wsfe(&io___40); do_fio(&c__1, "ALPHA", (ftnlen)5); do_fio(&c__1, (char *)&c__7, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } s_rsle(&io___41); i__1 = nalf; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__5, &c__1, (char *)&alf[i__ - 1], (ftnlen)sizeof(doublereal) ); } e_rsle(); /* Values of BETA */ s_rsle(&io___43); do_lio(&c__3, &c__1, (char *)&nbet, (ftnlen)sizeof(integer)); e_rsle(); if (nbet < 1 || nbet > 7) { io___45.ciunit = nout; s_wsfe(&io___45); do_fio(&c__1, "BETA", (ftnlen)4); do_fio(&c__1, (char *)&c__7, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } s_rsle(&io___46); i__1 = nbet; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__5, &c__1, (char *)&bet[i__ - 1], (ftnlen)sizeof(doublereal) ); } e_rsle(); /* Report values of parameters. */ io___48.ciunit = nout; s_wsfe(&io___48); e_wsfe(); io___49.ciunit = nout; s_wsfe(&io___49); i__1 = nidim; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&idim[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); io___50.ciunit = nout; s_wsfe(&io___50); i__1 = nkb; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&kb[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); io___51.ciunit = nout; s_wsfe(&io___51); i__1 = ninc; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&inc[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); io___52.ciunit = nout; s_wsfe(&io___52); i__1 = nalf; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&alf[i__ - 1], (ftnlen)sizeof(doublereal)); } e_wsfe(); io___53.ciunit = nout; s_wsfe(&io___53); i__1 = nbet; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&bet[i__ - 1], (ftnlen)sizeof(doublereal)); } e_wsfe(); if (! tsterr) { io___54.ciunit = nout; s_wsle(&io___54); e_wsle(); io___55.ciunit = nout; s_wsfe(&io___55); e_wsfe(); } io___56.ciunit = nout; s_wsle(&io___56); e_wsle(); io___57.ciunit = nout; s_wsfe(&io___57); do_fio(&c__1, (char *)&thresh, (ftnlen)sizeof(doublereal)); e_wsfe(); io___58.ciunit = nout; s_wsle(&io___58); e_wsle(); /* Read names of subroutines and flags which indicate */ /* whether they are to be tested. */ for (i__ = 1; i__ <= 16; ++i__) { ltest[i__ - 1] = FALSE_; /* L40: */ } L50: i__1 = s_rsfe(&io___60); if (i__1 != 0) { goto L80; } i__1 = do_fio(&c__1, snamet, (ftnlen)6); if (i__1 != 0) { goto L80; } i__1 = do_fio(&c__1, (char *)<estt, (ftnlen)sizeof(logical)); if (i__1 != 0) { goto L80; } i__1 = e_rsfe(); if (i__1 != 0) { goto L80; } for (i__ = 1; i__ <= 16; ++i__) { if (s_cmp(snamet, snames + (i__ - 1) * 6, (ftnlen)6, (ftnlen)6) == 0) { goto L70; } /* L60: */ } io___63.ciunit = nout; s_wsfe(&io___63); do_fio(&c__1, snamet, (ftnlen)6); e_wsfe(); s_stop("", (ftnlen)0); L70: ltest[i__ - 1] = ltestt; goto L50; L80: cl__1.cerr = 0; cl__1.cunit = 5; cl__1.csta = 0; f_clos(&cl__1); /* Compute EPS (the machine precision). */ eps = d_epsilon_(&c_b120); io___65.ciunit = nout; s_wsfe(&io___65); do_fio(&c__1, (char *)&eps, (ftnlen)sizeof(doublereal)); e_wsfe(); /* Check the reliability of DMVCH using exact data. */ n = 32; i__1 = n; for (j = 1; j <= i__1; ++j) { i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ i__3 = i__ - j + 1; a[i__ + j * 65 - 66] = (doublereal) max(i__3,0); /* L110: */ } x[j - 1] = (doublereal) j; y[j - 1] = 0.; /* L120: */ } i__1 = n; for (j = 1; j <= i__1; ++j) { yy[j - 1] = (doublereal) (j * ((j + 1) * j) / 2 - (j + 1) * j * (j - 1) / 3); /* L130: */ } /* YY holds the exact result. On exit from DMVCH YT holds */ /* the result computed by DMVCH. */ *(unsigned char *)trans = 'N'; dmvch_(trans, &n, &n, &c_b128, a, &c__65, x, &c__1, &c_b120, y, &c__1, yt, g, yy, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1); same = lde_(yy, yt, &n); if (! same || err != 0.) { io___78.ciunit = nout; s_wsfe(&io___78); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&same, (ftnlen)sizeof(logical)); do_fio(&c__1, (char *)&err, (ftnlen)sizeof(doublereal)); e_wsfe(); s_stop("", (ftnlen)0); } *(unsigned char *)trans = 'T'; dmvch_(trans, &n, &n, &c_b128, a, &c__65, x, &c_n1, &c_b120, y, &c_n1, yt, g, yy, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1); same = lde_(yy, yt, &n); if (! same || err != 0.) { io___79.ciunit = nout; s_wsfe(&io___79); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&same, (ftnlen)sizeof(logical)); do_fio(&c__1, (char *)&err, (ftnlen)sizeof(doublereal)); e_wsfe(); s_stop("", (ftnlen)0); } /* Test each subroutine in turn. */ for (isnum = 1; isnum <= 16; ++isnum) { io___81.ciunit = nout; s_wsle(&io___81); e_wsle(); if (! ltest[isnum - 1]) { /* Subprogram is not to be tested. */ io___82.ciunit = nout; s_wsfe(&io___82); do_fio(&c__1, snames + (isnum - 1) * 6, (ftnlen)6); e_wsfe(); } else { s_copy(srnamc_1.srnamt, snames + (isnum - 1) * 6, (ftnlen)6, ( ftnlen)6); /* Test error exits. */ if (tsterr) { dchke_(&isnum, snames + (isnum - 1) * 6, &nout, (ftnlen)6); io___83.ciunit = nout; s_wsle(&io___83); e_wsle(); } /* Test computations. */ infoc_1.infot = 0; infoc_1.ok = TRUE_; fatal = FALSE_; switch (isnum) { case 1: goto L140; case 2: goto L140; case 3: goto L150; case 4: goto L150; case 5: goto L150; case 6: goto L160; case 7: goto L160; case 8: goto L160; case 9: goto L160; case 10: goto L160; case 11: goto L160; case 12: goto L170; case 13: goto L180; case 14: goto L180; case 15: goto L190; case 16: goto L190; } /* Test DGEMV, 01, and DGBMV, 02. */ L140: dchk1_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nkb, kb, &nalf, alf, &nbet, bet, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, (ftnlen)6); goto L200; /* Test DSYMV, 03, DSBMV, 04, and DSPMV, 05. */ L150: dchk2_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nkb, kb, &nalf, alf, &nbet, bet, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, (ftnlen)6); goto L200; /* Test DTRMV, 06, DTBMV, 07, DTPMV, 08, */ /* DTRSV, 09, DTBSV, 10, and DTPSV, 11. */ L160: dchk3_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nkb, kb, &ninc, inc, &c__65, &c__2, a, aa, as, y, yy, ys, yt, g, z__, (ftnlen) 6); goto L200; /* Test DGER, 12. */ L170: dchk4_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, z__, (ftnlen)6); goto L200; /* Test DSYR, 13, and DSPR, 14. */ L180: dchk5_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, z__, (ftnlen)6); goto L200; /* Test DSYR2, 15, and DSPR2, 16. */ L190: dchk6_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, z__, (ftnlen)6); L200: if (fatal && sfatal) { goto L220; } } /* L210: */ } io___90.ciunit = nout; s_wsfe(&io___90); e_wsfe(); goto L240; L220: io___91.ciunit = nout; s_wsfe(&io___91); e_wsfe(); goto L240; L230: io___92.ciunit = nout; s_wsfe(&io___92); e_wsfe(); L240: if (trace) { cl__1.cerr = 0; cl__1.cunit = ntra; cl__1.csta = 0; f_clos(&cl__1); } cl__1.cerr = 0; cl__1.cunit = nout; cl__1.csta = 0; f_clos(&cl__1); s_stop("", (ftnlen)0); /* End of DBLAT2. */ return 0; } /* main */ /* Subroutine */ int dchk1_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * fatal, integer *nidim, integer *idim, integer *nkb, integer *kb, integer *nalf, doublereal *alf, integer *nbet, doublereal *bet, integer *ninc, integer *inc, integer *nmax, integer *incmax, doublereal *a, doublereal *aa, doublereal *as, doublereal *x, doublereal *xx, doublereal *xs, doublereal *y, doublereal *yy, doublereal *ys, doublereal *yt, doublereal *g, ftnlen sname_len) { /* Initialized data */ static char ich[3] = "NTC"; /* Format strings */ static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "2(i3,\002,\002),f4.1,\002, A,\002,i3,\002, X,\002,i2,\002,\002,f" "4.1,\002, Y,\002,i2,\002) .\002)"; static char fmt_9995[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "4(i3,\002,\002),f4.1,\002, A,\002,i3,\002, X,\002,i2,\002,\002,f" "4.1,\002, Y,\002,i2,\002) .\002)"; static char fmt_9993[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, m, n, ia, ib, ic, nc, nd, im, in, kl, ml, nk, nl, ku, ix, iy, ms, lx, ly, ns, laa, lda; extern logical lde_(doublereal *, doublereal *, integer *); doublereal als, bls, err; integer iku, kls, kus; doublereal beta; integer ldas; logical same; integer incx, incy; logical full, tran, null; extern /* Subroutine */ int dmake_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, logical *, doublereal *, ftnlen, ftnlen, ftnlen); doublereal alpha; logical isame[13]; extern /* Subroutine */ int dgbmv_(char *, integer *, integer *, integer * , integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen), dgemv_( char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen), dmvch_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen); integer nargs; logical reset; integer incxs, incys; char trans[1]; logical banded; extern logical lderes_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen); doublereal errmax, transl; char transs[1]; /* Fortran I/O blocks */ static cilist io___139 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___140 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___141 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___144 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___146 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___147 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___148 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___149 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___150 = { 0, 0, 0, fmt_9995, 0 }; /* Tests DGEMV and DGBMV. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --kb; --alf; --bet; --inc; --g; --yt; --y; --x; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --ys; --yy; --xs; --xx; /* Function Body */ /* .. Executable Statements .. */ full = *(unsigned char *)&sname[2] == 'E'; banded = *(unsigned char *)&sname[2] == 'B'; /* Define the number of arguments. */ if (full) { nargs = 11; } else if (banded) { nargs = 13; } nc = 0; reset = TRUE_; errmax = 0.; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; nd = n / 2 + 1; for (im = 1; im <= 2; ++im) { if (im == 1) { /* Computing MAX */ i__2 = n - nd; m = max(i__2,0); } if (im == 2) { /* Computing MIN */ i__2 = n + nd; m = min(i__2,*nmax); } if (banded) { nk = *nkb; } else { nk = 1; } i__2 = nk; for (iku = 1; iku <= i__2; ++iku) { if (banded) { ku = kb[iku]; /* Computing MAX */ i__3 = ku - 1; kl = max(i__3,0); } else { ku = n - 1; kl = m - 1; } /* Set LDA to 1 more than minimum value if room. */ if (banded) { lda = kl + ku + 1; } else { lda = m; } if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L100; } laa = lda * n; null = n <= 0 || m <= 0; /* Generate the matrix A. */ transl = 0.; dmake_(sname + 1, " ", " ", &m, &n, &a[a_offset], nmax, &aa[1] , &lda, &kl, &ku, &reset, &transl, (ftnlen)2, (ftnlen) 1, (ftnlen)1); for (ic = 1; ic <= 3; ++ic) { *(unsigned char *)trans = *(unsigned char *)&ich[ic - 1]; tran = *(unsigned char *)trans == 'T' || *(unsigned char * )trans == 'C'; if (tran) { ml = n; nl = m; } else { ml = m; nl = n; } i__3 = *ninc; for (ix = 1; ix <= i__3; ++ix) { incx = inc[ix]; lx = abs(incx) * nl; /* Generate the vector X. */ transl = .5; i__4 = abs(incx); i__5 = nl - 1; dmake_("GE", " ", " ", &c__1, &nl, &x[1], &c__1, &xx[ 1], &i__4, &c__0, &i__5, &reset, &transl, ( ftnlen)2, (ftnlen)1, (ftnlen)1); if (nl > 1) { x[nl / 2] = 0.; xx[abs(incx) * (nl / 2 - 1) + 1] = 0.; } i__4 = *ninc; for (iy = 1; iy <= i__4; ++iy) { incy = inc[iy]; ly = abs(incy) * ml; i__5 = *nalf; for (ia = 1; ia <= i__5; ++ia) { alpha = alf[ia]; i__6 = *nbet; for (ib = 1; ib <= i__6; ++ib) { beta = bet[ib]; /* Generate the vector Y. */ transl = 0.; i__7 = abs(incy); i__8 = ml - 1; dmake_("GE", " ", " ", &c__1, &ml, &y[1], &c__1, &yy[1], &i__7, &c__0, & i__8, &reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the */ /* subroutine. */ *(unsigned char *)transs = *(unsigned char *)trans; ms = m; ns = n; kls = kl; kus = ku; als = alpha; i__7 = laa; for (i__ = 1; i__ <= i__7; ++i__) { as[i__] = aa[i__]; /* L10: */ } ldas = lda; i__7 = lx; for (i__ = 1; i__ <= i__7; ++i__) { xs[i__] = xx[i__]; /* L20: */ } incxs = incx; bls = beta; i__7 = ly; for (i__ = 1; i__ <= i__7; ++i__) { ys[i__] = yy[i__]; /* L30: */ } incys = incy; /* Call the subroutine. */ if (full) { if (*trace) { io___139.ciunit = *ntra; s_wsfe(&io___139); do_fio(&c__1, (char *)&nc, ( ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&alpha, ( ftnlen)sizeof(doublereal)) ; do_fio(&c__1, (char *)&lda, ( ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, ( ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&beta, ( ftnlen)sizeof(doublereal)) ; do_fio(&c__1, (char *)&incy, ( ftnlen)sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } dgemv_(trans, &m, &n, &alpha, &aa[1], &lda, &xx[1], &incx, &beta, & yy[1], &incy, (ftnlen)1); } else if (banded) { if (*trace) { io___140.ciunit = *ntra; s_wsfe(&io___140); do_fio(&c__1, (char *)&nc, ( ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&kl, ( ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ku, ( ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, ( ftnlen)sizeof(doublereal)) ; do_fio(&c__1, (char *)&lda, ( ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, ( ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&beta, ( ftnlen)sizeof(doublereal)) ; do_fio(&c__1, (char *)&incy, ( ftnlen)sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } dgbmv_(trans, &m, &n, &kl, &ku, & alpha, &aa[1], &lda, &xx[1], & incx, &beta, &yy[1], &incy, ( ftnlen)1); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___141.ciunit = *nout; s_wsfe(&io___141); e_wsfe(); *fatal = TRUE_; goto L130; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)trans == *( unsigned char *)transs; isame[1] = ms == m; isame[2] = ns == n; if (full) { isame[3] = als == alpha; isame[4] = lde_(&as[1], &aa[1], &laa); isame[5] = ldas == lda; isame[6] = lde_(&xs[1], &xx[1], &lx); isame[7] = incxs == incx; isame[8] = bls == beta; if (null) { isame[9] = lde_(&ys[1], &yy[1], & ly); } else { i__7 = abs(incy); isame[9] = lderes_("GE", " ", & c__1, &ml, &ys[1], &yy[1], &i__7, (ftnlen)2, ( ftnlen)1); } isame[10] = incys == incy; } else if (banded) { isame[3] = kls == kl; isame[4] = kus == ku; isame[5] = als == alpha; isame[6] = lde_(&as[1], &aa[1], &laa); isame[7] = ldas == lda; isame[8] = lde_(&xs[1], &xx[1], &lx); isame[9] = incxs == incx; isame[10] = bls == beta; if (null) { isame[11] = lde_(&ys[1], &yy[1], & ly); } else { i__7 = abs(incy); isame[11] = lderes_("GE", " ", & c__1, &ml, &ys[1], &yy[1], &i__7, (ftnlen)2, ( ftnlen)1); } isame[12] = incys == incy; } /* If data was incorrectly changed, report */ /* and return. */ same = TRUE_; i__7 = nargs; for (i__ = 1; i__ <= i__7; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___144.ciunit = *nout; s_wsfe(&io___144); do_fio(&c__1, (char *)&i__, ( ftnlen)sizeof(integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L130; } if (! null) { /* Check the result. */ dmvch_(trans, &m, &n, &alpha, &a[ a_offset], nmax, &x[1], &incx, &beta, &y[1], &incy, &yt[1], &g[1], &yy[1], eps, &err, fatal, nout, &c_true, (ftnlen) 1); errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L130; } } else { /* Avoid repeating tests with M.le.0 or */ /* N.le.0. */ goto L110; } /* L50: */ } /* L60: */ } /* L70: */ } /* L80: */ } /* L90: */ } L100: ; } L110: ; } /* L120: */ } /* Report result. */ if (errmax < *thresh) { io___146.ciunit = *nout; s_wsfe(&io___146); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___147.ciunit = *nout; s_wsfe(&io___147); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(doublereal)); e_wsfe(); } goto L140; L130: io___148.ciunit = *nout; s_wsfe(&io___148); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); if (full) { io___149.ciunit = *nout; s_wsfe(&io___149); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&beta, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); e_wsfe(); } else if (banded) { io___150.ciunit = *nout; s_wsfe(&io___150); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&kl, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ku, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&beta, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); e_wsfe(); } L140: return 0; /* End of DCHK1. */ } /* dchk1_ */ /* Subroutine */ int dchk2_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * fatal, integer *nidim, integer *idim, integer *nkb, integer *kb, integer *nalf, doublereal *alf, integer *nbet, doublereal *bet, integer *ninc, integer *inc, integer *nmax, integer *incmax, doublereal *a, doublereal *aa, doublereal *as, doublereal *x, doublereal *xx, doublereal *xs, doublereal *y, doublereal *yy, doublereal *ys, doublereal *yt, doublereal *g, ftnlen sname_len) { /* Initialized data */ static char ich[2] = "UL"; /* Format strings */ static char fmt_9993[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "i3,\002,\002,f4.1,\002, A,\002,i3,\002, X,\002,i2,\002,\002,f4.1," "\002, Y,\002,i2,\002) .\002)"; static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "2(i3,\002,\002),f4.1,\002, A,\002,i3,\002, X,\002,i2,\002,\002,f" "4.1,\002, Y,\002,i2,\002) .\002)"; static char fmt_9995[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "i3,\002,\002,f4.1,\002, AP\002,\002, X,\002,i2,\002,\002,f4.1" ",\002, Y,\002,i2,\002) .\002)"; static char fmt_9992[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, k, n, ia, ib, ic, nc, ik, in, nk, ks, ix, iy, ns, lx, ly, laa, lda; extern logical lde_(doublereal *, doublereal *, integer *); doublereal als, bls, err, beta; integer ldas; logical same; integer incx, incy; logical full, null; char uplo[1]; extern /* Subroutine */ int dmake_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, logical *, doublereal *, ftnlen, ftnlen, ftnlen); doublereal alpha; logical isame[13]; extern /* Subroutine */ int dmvch_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen); integer nargs; extern /* Subroutine */ int dsbmv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen); logical reset; integer incxs, incys; extern /* Subroutine */ int dspmv_(char *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen); char uplos[1]; extern /* Subroutine */ int dsymv_(char *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen); logical banded, packed; extern logical lderes_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen); doublereal errmax, transl; /* Fortran I/O blocks */ static cilist io___189 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___190 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___191 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___192 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___195 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___197 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___198 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___199 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___200 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___201 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___202 = { 0, 0, 0, fmt_9995, 0 }; /* Tests DSYMV, DSBMV and DSPMV. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --kb; --alf; --bet; --inc; --g; --yt; --y; --x; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --ys; --yy; --xs; --xx; /* Function Body */ /* .. Executable Statements .. */ full = *(unsigned char *)&sname[2] == 'Y'; banded = *(unsigned char *)&sname[2] == 'B'; packed = *(unsigned char *)&sname[2] == 'P'; /* Define the number of arguments. */ if (full) { nargs = 10; } else if (banded) { nargs = 11; } else if (packed) { nargs = 9; } nc = 0; reset = TRUE_; errmax = 0.; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; if (banded) { nk = *nkb; } else { nk = 1; } i__2 = nk; for (ik = 1; ik <= i__2; ++ik) { if (banded) { k = kb[ik]; } else { k = n - 1; } /* Set LDA to 1 more than minimum value if room. */ if (banded) { lda = k + 1; } else { lda = n; } if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L100; } if (packed) { laa = n * (n + 1) / 2; } else { laa = lda * n; } null = n <= 0; for (ic = 1; ic <= 2; ++ic) { *(unsigned char *)uplo = *(unsigned char *)&ich[ic - 1]; /* Generate the matrix A. */ transl = 0.; dmake_(sname + 1, uplo, " ", &n, &n, &a[a_offset], nmax, &aa[ 1], &lda, &k, &k, &reset, &transl, (ftnlen)2, (ftnlen) 1, (ftnlen)1); i__3 = *ninc; for (ix = 1; ix <= i__3; ++ix) { incx = inc[ix]; lx = abs(incx) * n; /* Generate the vector X. */ transl = .5; i__4 = abs(incx); i__5 = n - 1; dmake_("GE", " ", " ", &c__1, &n, &x[1], &c__1, &xx[1], & i__4, &c__0, &i__5, &reset, &transl, (ftnlen)2, ( ftnlen)1, (ftnlen)1); if (n > 1) { x[n / 2] = 0.; xx[abs(incx) * (n / 2 - 1) + 1] = 0.; } i__4 = *ninc; for (iy = 1; iy <= i__4; ++iy) { incy = inc[iy]; ly = abs(incy) * n; i__5 = *nalf; for (ia = 1; ia <= i__5; ++ia) { alpha = alf[ia]; i__6 = *nbet; for (ib = 1; ib <= i__6; ++ib) { beta = bet[ib]; /* Generate the vector Y. */ transl = 0.; i__7 = abs(incy); i__8 = n - 1; dmake_("GE", " ", " ", &c__1, &n, &y[1], & c__1, &yy[1], &i__7, &c__0, &i__8, & reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the */ /* subroutine. */ *(unsigned char *)uplos = *(unsigned char *) uplo; ns = n; ks = k; als = alpha; i__7 = laa; for (i__ = 1; i__ <= i__7; ++i__) { as[i__] = aa[i__]; /* L10: */ } ldas = lda; i__7 = lx; for (i__ = 1; i__ <= i__7; ++i__) { xs[i__] = xx[i__]; /* L20: */ } incxs = incx; bls = beta; i__7 = ly; for (i__ = 1; i__ <= i__7; ++i__) { ys[i__] = yy[i__]; /* L30: */ } incys = incy; /* Call the subroutine. */ if (full) { if (*trace) { io___189.ciunit = *ntra; s_wsfe(&io___189); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&beta, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&incy, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } dsymv_(uplo, &n, &alpha, &aa[1], &lda, & xx[1], &incx, &beta, &yy[1], & incy, (ftnlen)1); } else if (banded) { if (*trace) { io___190.ciunit = *ntra; s_wsfe(&io___190); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&beta, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&incy, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } dsbmv_(uplo, &n, &k, &alpha, &aa[1], &lda, &xx[1], &incx, &beta, &yy[1], & incy, (ftnlen)1); } else if (packed) { if (*trace) { io___191.ciunit = *ntra; s_wsfe(&io___191); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&beta, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&incy, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } dspmv_(uplo, &n, &alpha, &aa[1], &xx[1], & incx, &beta, &yy[1], &incy, ( ftnlen)1); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___192.ciunit = *nout; s_wsfe(&io___192); e_wsfe(); *fatal = TRUE_; goto L120; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)uplo == *( unsigned char *)uplos; isame[1] = ns == n; if (full) { isame[2] = als == alpha; isame[3] = lde_(&as[1], &aa[1], &laa); isame[4] = ldas == lda; isame[5] = lde_(&xs[1], &xx[1], &lx); isame[6] = incxs == incx; isame[7] = bls == beta; if (null) { isame[8] = lde_(&ys[1], &yy[1], &ly); } else { i__7 = abs(incy); isame[8] = lderes_("GE", " ", &c__1, & n, &ys[1], &yy[1], &i__7, ( ftnlen)2, (ftnlen)1); } isame[9] = incys == incy; } else if (banded) { isame[2] = ks == k; isame[3] = als == alpha; isame[4] = lde_(&as[1], &aa[1], &laa); isame[5] = ldas == lda; isame[6] = lde_(&xs[1], &xx[1], &lx); isame[7] = incxs == incx; isame[8] = bls == beta; if (null) { isame[9] = lde_(&ys[1], &yy[1], &ly); } else { i__7 = abs(incy); isame[9] = lderes_("GE", " ", &c__1, & n, &ys[1], &yy[1], &i__7, ( ftnlen)2, (ftnlen)1); } isame[10] = incys == incy; } else if (packed) { isame[2] = als == alpha; isame[3] = lde_(&as[1], &aa[1], &laa); isame[4] = lde_(&xs[1], &xx[1], &lx); isame[5] = incxs == incx; isame[6] = bls == beta; if (null) { isame[7] = lde_(&ys[1], &yy[1], &ly); } else { i__7 = abs(incy); isame[7] = lderes_("GE", " ", &c__1, & n, &ys[1], &yy[1], &i__7, ( ftnlen)2, (ftnlen)1); } isame[8] = incys == incy; } /* If data was incorrectly changed, report and */ /* return. */ same = TRUE_; i__7 = nargs; for (i__ = 1; i__ <= i__7; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___195.ciunit = *nout; s_wsfe(&io___195); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L120; } if (! null) { /* Check the result. */ dmvch_("N", &n, &n, &alpha, &a[a_offset], nmax, &x[1], &incx, &beta, &y[1], &incy, &yt[1], &g[1], &yy[1], eps, &err, fatal, nout, &c_true, ( ftnlen)1); errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L120; } } else { /* Avoid repeating tests with N.le.0 */ goto L110; } /* L50: */ } /* L60: */ } /* L70: */ } /* L80: */ } /* L90: */ } L100: ; } L110: ; } /* Report result. */ if (errmax < *thresh) { io___197.ciunit = *nout; s_wsfe(&io___197); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___198.ciunit = *nout; s_wsfe(&io___198); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(doublereal)); e_wsfe(); } goto L130; L120: io___199.ciunit = *nout; s_wsfe(&io___199); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); if (full) { io___200.ciunit = *nout; s_wsfe(&io___200); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&beta, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); e_wsfe(); } else if (banded) { io___201.ciunit = *nout; s_wsfe(&io___201); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&beta, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); e_wsfe(); } else if (packed) { io___202.ciunit = *nout; s_wsfe(&io___202); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&beta, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); e_wsfe(); } L130: return 0; /* End of DCHK2. */ } /* dchk2_ */ /* Subroutine */ int dchk3_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * fatal, integer *nidim, integer *idim, integer *nkb, integer *kb, integer *ninc, integer *inc, integer *nmax, integer *incmax, doublereal *a, doublereal *aa, doublereal *as, doublereal *x, doublereal *xx, doublereal *xs, doublereal *xt, doublereal *g, doublereal *z__, ftnlen sname_len) { /* Initialized data */ static char ichu[2] = "UL"; static char icht[3] = "NTC"; static char ichd[2] = "UN"; /* Format strings */ static char fmt_9993[] = "(1x,i6,\002: \002,a6,\002(\002,3(\002'\002,a1" ",\002',\002),i3,\002, A,\002,i3,\002, X,\002,i2,\002) " " .\002)"; static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002(\002,3(\002'\002,a1" ",\002',\002),2(i3,\002,\002),\002 A,\002,i3,\002, X,\002,i2,\002" ") .\002)"; static char fmt_9995[] = "(1x,i6,\002: \002,a6,\002(\002,3(\002'\002,a1" ",\002',\002),i3,\002, AP, \002,\002X,\002,i2,\002) " " .\002)"; static char fmt_9992[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; alist al__1; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen), s_wsfe(cilist *), do_fio( integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, k, n, nc, ik, in, nk, ks, ix, ns, lx, laa, icd, lda; extern logical lde_(doublereal *, doublereal *, integer *); integer ict, icu; doublereal err; char diag[1]; integer ldas; logical same; integer incx; logical full, null; char uplo[1]; extern /* Subroutine */ int dmake_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, logical *, doublereal *, ftnlen, ftnlen, ftnlen); char diags[1]; logical isame[13]; extern /* Subroutine */ int dmvch_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen); integer nargs; extern /* Subroutine */ int dtbmv_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen); logical reset; extern /* Subroutine */ int dtbsv_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen); integer incxs; char trans[1]; extern /* Subroutine */ int dtpmv_(char *, char *, char *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen, ftnlen), dtrmv_(char *, char *, char *, integer *, doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen), dtpsv_(char *, char *, char *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen, ftnlen); char uplos[1]; extern /* Subroutine */ int dtrsv_(char *, char *, char *, integer *, doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen); logical banded, packed; extern logical lderes_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen); doublereal errmax, transl; char transs[1]; /* Fortran I/O blocks */ static cilist io___239 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___240 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___241 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___242 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___243 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___244 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___245 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___248 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___250 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___251 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___252 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___253 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___254 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___255 = { 0, 0, 0, fmt_9995, 0 }; /* Tests DTRMV, DTBMV, DTPMV, DTRSV, DTBSV and DTPSV. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --kb; --inc; --z__; --g; --xt; --x; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --xs; --xx; /* Function Body */ /* .. Executable Statements .. */ full = *(unsigned char *)&sname[2] == 'R'; banded = *(unsigned char *)&sname[2] == 'B'; packed = *(unsigned char *)&sname[2] == 'P'; /* Define the number of arguments. */ if (full) { nargs = 8; } else if (banded) { nargs = 9; } else if (packed) { nargs = 7; } nc = 0; reset = TRUE_; errmax = 0.; /* Set up zero vector for DMVCH. */ i__1 = *nmax; for (i__ = 1; i__ <= i__1; ++i__) { z__[i__] = 0.; /* L10: */ } i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; if (banded) { nk = *nkb; } else { nk = 1; } i__2 = nk; for (ik = 1; ik <= i__2; ++ik) { if (banded) { k = kb[ik]; } else { k = n - 1; } /* Set LDA to 1 more than minimum value if room. */ if (banded) { lda = k + 1; } else { lda = n; } if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L100; } if (packed) { laa = n * (n + 1) / 2; } else { laa = lda * n; } null = n <= 0; for (icu = 1; icu <= 2; ++icu) { *(unsigned char *)uplo = *(unsigned char *)&ichu[icu - 1]; for (ict = 1; ict <= 3; ++ict) { *(unsigned char *)trans = *(unsigned char *)&icht[ict - 1] ; for (icd = 1; icd <= 2; ++icd) { *(unsigned char *)diag = *(unsigned char *)&ichd[icd - 1]; /* Generate the matrix A. */ transl = 0.; dmake_(sname + 1, uplo, diag, &n, &n, &a[a_offset], nmax, &aa[1], &lda, &k, &k, &reset, &transl, ( ftnlen)2, (ftnlen)1, (ftnlen)1); i__3 = *ninc; for (ix = 1; ix <= i__3; ++ix) { incx = inc[ix]; lx = abs(incx) * n; /* Generate the vector X. */ transl = .5; i__4 = abs(incx); i__5 = n - 1; dmake_("GE", " ", " ", &c__1, &n, &x[1], &c__1, & xx[1], &i__4, &c__0, &i__5, &reset, & transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); if (n > 1) { x[n / 2] = 0.; xx[abs(incx) * (n / 2 - 1) + 1] = 0.; } ++nc; /* Save every datum before calling the subroutine. */ *(unsigned char *)uplos = *(unsigned char *)uplo; *(unsigned char *)transs = *(unsigned char *) trans; *(unsigned char *)diags = *(unsigned char *)diag; ns = n; ks = k; i__4 = laa; for (i__ = 1; i__ <= i__4; ++i__) { as[i__] = aa[i__]; /* L20: */ } ldas = lda; i__4 = lx; for (i__ = 1; i__ <= i__4; ++i__) { xs[i__] = xx[i__]; /* L30: */ } incxs = incx; /* Call the subroutine. */ if (s_cmp(sname + 3, "MV", (ftnlen)2, (ftnlen)2) == 0) { if (full) { if (*trace) { io___239.ciunit = *ntra; s_wsfe(&io___239); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } dtrmv_(uplo, trans, diag, &n, &aa[1], & lda, &xx[1], &incx, (ftnlen)1, ( ftnlen)1, (ftnlen)1); } else if (banded) { if (*trace) { io___240.ciunit = *ntra; s_wsfe(&io___240); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } dtbmv_(uplo, trans, diag, &n, &k, &aa[1], &lda, &xx[1], &incx, (ftnlen)1, ( ftnlen)1, (ftnlen)1); } else if (packed) { if (*trace) { io___241.ciunit = *ntra; s_wsfe(&io___241); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } dtpmv_(uplo, trans, diag, &n, &aa[1], &xx[ 1], &incx, (ftnlen)1, (ftnlen)1, ( ftnlen)1); } } else if (s_cmp(sname + 3, "SV", (ftnlen)2, ( ftnlen)2) == 0) { if (full) { if (*trace) { io___242.ciunit = *ntra; s_wsfe(&io___242); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } dtrsv_(uplo, trans, diag, &n, &aa[1], & lda, &xx[1], &incx, (ftnlen)1, ( ftnlen)1, (ftnlen)1); } else if (banded) { if (*trace) { io___243.ciunit = *ntra; s_wsfe(&io___243); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } dtbsv_(uplo, trans, diag, &n, &k, &aa[1], &lda, &xx[1], &incx, (ftnlen)1, ( ftnlen)1, (ftnlen)1); } else if (packed) { if (*trace) { io___244.ciunit = *ntra; s_wsfe(&io___244); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } dtpsv_(uplo, trans, diag, &n, &aa[1], &xx[ 1], &incx, (ftnlen)1, (ftnlen)1, ( ftnlen)1); } } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___245.ciunit = *nout; s_wsfe(&io___245); e_wsfe(); *fatal = TRUE_; goto L120; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)uplo == *(unsigned char *)uplos; isame[1] = *(unsigned char *)trans == *(unsigned char *)transs; isame[2] = *(unsigned char *)diag == *(unsigned char *)diags; isame[3] = ns == n; if (full) { isame[4] = lde_(&as[1], &aa[1], &laa); isame[5] = ldas == lda; if (null) { isame[6] = lde_(&xs[1], &xx[1], &lx); } else { i__4 = abs(incx); isame[6] = lderes_("GE", " ", &c__1, &n, & xs[1], &xx[1], &i__4, (ftnlen)2, ( ftnlen)1); } isame[7] = incxs == incx; } else if (banded) { isame[4] = ks == k; isame[5] = lde_(&as[1], &aa[1], &laa); isame[6] = ldas == lda; if (null) { isame[7] = lde_(&xs[1], &xx[1], &lx); } else { i__4 = abs(incx); isame[7] = lderes_("GE", " ", &c__1, &n, & xs[1], &xx[1], &i__4, (ftnlen)2, ( ftnlen)1); } isame[8] = incxs == incx; } else if (packed) { isame[4] = lde_(&as[1], &aa[1], &laa); if (null) { isame[5] = lde_(&xs[1], &xx[1], &lx); } else { i__4 = abs(incx); isame[5] = lderes_("GE", " ", &c__1, &n, & xs[1], &xx[1], &i__4, (ftnlen)2, ( ftnlen)1); } isame[6] = incxs == incx; } /* If data was incorrectly changed, report and */ /* return. */ same = TRUE_; i__4 = nargs; for (i__ = 1; i__ <= i__4; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___248.ciunit = *nout; s_wsfe(&io___248); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L120; } if (! null) { if (s_cmp(sname + 3, "MV", (ftnlen)2, (ftnlen) 2) == 0) { /* Check the result. */ dmvch_(trans, &n, &n, &c_b128, &a[ a_offset], nmax, &x[1], &incx, & c_b120, &z__[1], &incx, &xt[1], & g[1], &xx[1], eps, &err, fatal, nout, &c_true, (ftnlen)1); } else if (s_cmp(sname + 3, "SV", (ftnlen)2, ( ftnlen)2) == 0) { /* Compute approximation to original vector. */ i__4 = n; for (i__ = 1; i__ <= i__4; ++i__) { z__[i__] = xx[(i__ - 1) * abs(incx) + 1]; xx[(i__ - 1) * abs(incx) + 1] = x[i__] ; /* L50: */ } dmvch_(trans, &n, &n, &c_b128, &a[ a_offset], nmax, &z__[1], &incx, & c_b120, &x[1], &incx, &xt[1], &g[ 1], &xx[1], eps, &err, fatal, nout, &c_false, (ftnlen)1); } errmax = max(errmax,err); /* If got really bad answer, report and return. */ if (*fatal) { goto L120; } } else { /* Avoid repeating tests with N.le.0. */ goto L110; } /* L60: */ } /* L70: */ } /* L80: */ } /* L90: */ } L100: ; } L110: ; } /* Report result. */ if (errmax < *thresh) { io___250.ciunit = *nout; s_wsfe(&io___250); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___251.ciunit = *nout; s_wsfe(&io___251); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(doublereal)); e_wsfe(); } goto L130; L120: io___252.ciunit = *nout; s_wsfe(&io___252); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); if (full) { io___253.ciunit = *nout; s_wsfe(&io___253); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); e_wsfe(); } else if (banded) { io___254.ciunit = *nout; s_wsfe(&io___254); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); e_wsfe(); } else if (packed) { io___255.ciunit = *nout; s_wsfe(&io___255); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); e_wsfe(); } L130: return 0; /* End of DCHK3. */ } /* dchk3_ */ /* Subroutine */ int dchk4_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * fatal, integer *nidim, integer *idim, integer *nalf, doublereal *alf, integer *ninc, integer *inc, integer *nmax, integer *incmax, doublereal *a, doublereal *aa, doublereal *as, doublereal *x, doublereal *xx, doublereal *xs, doublereal *y, doublereal *yy, doublereal *ys, doublereal *yt, doublereal *g, doublereal *z__, ftnlen sname_len) { /* Format strings */ static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002(\002,2(i3,\002,\002)" ",f4.1,\002, X,\002,i2,\002, Y,\002,i2,\002, A,\002,i3,\002) " " .\002)"; static char fmt_9993[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9995[] = "(\002 THESE ARE THE RESULTS FOR COLUMN" " \002,i3)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, j, m, n; doublereal w[1]; integer ia, nc, nd, im, in, ms, ix, iy, ns, lx, ly, laa, lda; extern logical lde_(doublereal *, doublereal *, integer *); doublereal als, err; extern /* Subroutine */ int dger_(integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *); integer ldas; logical same; integer incx, incy; logical null; extern /* Subroutine */ int dmake_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, logical *, doublereal *, ftnlen, ftnlen, ftnlen); doublereal alpha; logical isame[13]; extern /* Subroutine */ int dmvch_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen); integer nargs; logical reset; integer incxs, incys; extern logical lderes_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen); doublereal errmax, transl; /* Fortran I/O blocks */ static cilist io___284 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___285 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___288 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___292 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___293 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___294 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___295 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___296 = { 0, 0, 0, fmt_9994, 0 }; /* Tests DGER. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ /* Define the number of arguments. */ /* Parameter adjustments */ --idim; --alf; --inc; --z__; --g; --yt; --y; --x; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --ys; --yy; --xs; --xx; /* Function Body */ nargs = 9; nc = 0; reset = TRUE_; errmax = 0.; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; nd = n / 2 + 1; for (im = 1; im <= 2; ++im) { if (im == 1) { /* Computing MAX */ i__2 = n - nd; m = max(i__2,0); } if (im == 2) { /* Computing MIN */ i__2 = n + nd; m = min(i__2,*nmax); } /* Set LDA to 1 more than minimum value if room. */ lda = m; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L110; } laa = lda * n; null = n <= 0 || m <= 0; i__2 = *ninc; for (ix = 1; ix <= i__2; ++ix) { incx = inc[ix]; lx = abs(incx) * m; /* Generate the vector X. */ transl = .5; i__3 = abs(incx); i__4 = m - 1; dmake_("GE", " ", " ", &c__1, &m, &x[1], &c__1, &xx[1], &i__3, &c__0, &i__4, &reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); if (m > 1) { x[m / 2] = 0.; xx[abs(incx) * (m / 2 - 1) + 1] = 0.; } i__3 = *ninc; for (iy = 1; iy <= i__3; ++iy) { incy = inc[iy]; ly = abs(incy) * n; /* Generate the vector Y. */ transl = 0.; i__4 = abs(incy); i__5 = n - 1; dmake_("GE", " ", " ", &c__1, &n, &y[1], &c__1, &yy[1], & i__4, &c__0, &i__5, &reset, &transl, (ftnlen)2, ( ftnlen)1, (ftnlen)1); if (n > 1) { y[n / 2] = 0.; yy[abs(incy) * (n / 2 - 1) + 1] = 0.; } i__4 = *nalf; for (ia = 1; ia <= i__4; ++ia) { alpha = alf[ia]; /* Generate the matrix A. */ transl = 0.; i__5 = m - 1; i__6 = n - 1; dmake_(sname + 1, " ", " ", &m, &n, &a[a_offset], nmax, &aa[1], &lda, &i__5, &i__6, &reset, & transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the subroutine. */ ms = m; ns = n; als = alpha; i__5 = laa; for (i__ = 1; i__ <= i__5; ++i__) { as[i__] = aa[i__]; /* L10: */ } ldas = lda; i__5 = lx; for (i__ = 1; i__ <= i__5; ++i__) { xs[i__] = xx[i__]; /* L20: */ } incxs = incx; i__5 = ly; for (i__ = 1; i__ <= i__5; ++i__) { ys[i__] = yy[i__]; /* L30: */ } incys = incy; /* Call the subroutine. */ if (*trace) { io___284.ciunit = *ntra; s_wsfe(&io___284); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer) ); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)) ; do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)) ; do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof( doublereal)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof( integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } dger_(&m, &n, &alpha, &xx[1], &incx, &yy[1], &incy, & aa[1], &lda); /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___285.ciunit = *nout; s_wsfe(&io___285); e_wsfe(); *fatal = TRUE_; goto L140; } /* See what data changed inside subroutine. */ isame[0] = ms == m; isame[1] = ns == n; isame[2] = als == alpha; isame[3] = lde_(&xs[1], &xx[1], &lx); isame[4] = incxs == incx; isame[5] = lde_(&ys[1], &yy[1], &ly); isame[6] = incys == incy; if (null) { isame[7] = lde_(&as[1], &aa[1], &laa); } else { isame[7] = lderes_("GE", " ", &m, &n, &as[1], &aa[ 1], &lda, (ftnlen)2, (ftnlen)1); } isame[8] = ldas == lda; /* If data was incorrectly changed, report and return. */ same = TRUE_; i__5 = nargs; for (i__ = 1; i__ <= i__5; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___288.ciunit = *nout; s_wsfe(&io___288); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof( integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L140; } if (! null) { /* Check the result column by column. */ if (incx > 0) { i__5 = m; for (i__ = 1; i__ <= i__5; ++i__) { z__[i__] = x[i__]; /* L50: */ } } else { i__5 = m; for (i__ = 1; i__ <= i__5; ++i__) { z__[i__] = x[m - i__ + 1]; /* L60: */ } } i__5 = n; for (j = 1; j <= i__5; ++j) { if (incy > 0) { w[0] = y[j]; } else { w[0] = y[n - j + 1]; } dmvch_("N", &m, &c__1, &alpha, &z__[1], nmax, w, &c__1, &c_b128, &a[j * a_dim1 + 1], &c__1, &yt[1], &g[1], &aa[(j - 1) * lda + 1], eps, &err, fatal, nout, & c_true, (ftnlen)1); errmax = max(errmax,err); /* If got really bad answer, report and return. */ if (*fatal) { goto L130; } /* L70: */ } } else { /* Avoid repeating tests with M.le.0 or N.le.0. */ goto L110; } /* L80: */ } /* L90: */ } /* L100: */ } L110: ; } /* L120: */ } /* Report result. */ if (errmax < *thresh) { io___292.ciunit = *nout; s_wsfe(&io___292); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___293.ciunit = *nout; s_wsfe(&io___293); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(doublereal)); e_wsfe(); } goto L150; L130: io___294.ciunit = *nout; s_wsfe(&io___294); do_fio(&c__1, (char *)&j, (ftnlen)sizeof(integer)); e_wsfe(); L140: io___295.ciunit = *nout; s_wsfe(&io___295); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); io___296.ciunit = *nout; s_wsfe(&io___296); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); e_wsfe(); L150: return 0; /* End of DCHK4. */ } /* dchk4_ */ /* Subroutine */ int dchk5_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * fatal, integer *nidim, integer *idim, integer *nalf, doublereal *alf, integer *ninc, integer *inc, integer *nmax, integer *incmax, doublereal *a, doublereal *aa, doublereal *as, doublereal *x, doublereal *xx, doublereal *xs, doublereal *y, doublereal *yy, doublereal *ys, doublereal *yt, doublereal *g, doublereal *z__, ftnlen sname_len) { /* Initialized data */ static char ich[2] = "UL"; /* Format strings */ static char fmt_9993[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "i3,\002,\002,f4.1,\002, X,\002,i2,\002, A,\002,i3,\002) " " .\002)"; static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "i3,\002,\002,f4.1,\002, X,\002,i2,\002, AP) " " .\002)"; static char fmt_9992[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9995[] = "(\002 THESE ARE THE RESULTS FOR COLUMN" " \002,i3)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, j, n; doublereal w[1]; integer ia, ja, ic, nc, jj, lj, in, ix, ns, lx, laa, lda; extern logical lde_(doublereal *, doublereal *, integer *); doublereal als, err; integer ldas; logical same; integer incx; logical full; extern /* Subroutine */ int dspr_(char *, integer *, doublereal *, doublereal *, integer *, doublereal *, ftnlen); logical null; char uplo[1]; extern /* Subroutine */ int dsyr_(char *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, ftnlen), dmake_( char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, logical *, doublereal *, ftnlen, ftnlen, ftnlen); doublereal alpha; logical isame[13]; extern /* Subroutine */ int dmvch_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen); integer nargs; logical reset; integer incxs; logical upper; char uplos[1]; logical packed; extern logical lderes_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen); doublereal errmax, transl; /* Fortran I/O blocks */ static cilist io___324 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___325 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___326 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___329 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___336 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___337 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___338 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___339 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___340 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___341 = { 0, 0, 0, fmt_9994, 0 }; /* Tests DSYR and DSPR. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; --inc; --z__; --g; --yt; --y; --x; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --ys; --yy; --xs; --xx; /* Function Body */ /* .. Executable Statements .. */ full = *(unsigned char *)&sname[2] == 'Y'; packed = *(unsigned char *)&sname[2] == 'P'; /* Define the number of arguments. */ if (full) { nargs = 7; } else if (packed) { nargs = 6; } nc = 0; reset = TRUE_; errmax = 0.; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; /* Set LDA to 1 more than minimum value if room. */ lda = n; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L100; } if (packed) { laa = n * (n + 1) / 2; } else { laa = lda * n; } for (ic = 1; ic <= 2; ++ic) { *(unsigned char *)uplo = *(unsigned char *)&ich[ic - 1]; upper = *(unsigned char *)uplo == 'U'; i__2 = *ninc; for (ix = 1; ix <= i__2; ++ix) { incx = inc[ix]; lx = abs(incx) * n; /* Generate the vector X. */ transl = .5; i__3 = abs(incx); i__4 = n - 1; dmake_("GE", " ", " ", &c__1, &n, &x[1], &c__1, &xx[1], &i__3, &c__0, &i__4, &reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); if (n > 1) { x[n / 2] = 0.; xx[abs(incx) * (n / 2 - 1) + 1] = 0.; } i__3 = *nalf; for (ia = 1; ia <= i__3; ++ia) { alpha = alf[ia]; null = n <= 0 || alpha == 0.; /* Generate the matrix A. */ transl = 0.; i__4 = n - 1; i__5 = n - 1; dmake_(sname + 1, uplo, " ", &n, &n, &a[a_offset], nmax, & aa[1], &lda, &i__4, &i__5, &reset, &transl, ( ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the subroutine. */ *(unsigned char *)uplos = *(unsigned char *)uplo; ns = n; als = alpha; i__4 = laa; for (i__ = 1; i__ <= i__4; ++i__) { as[i__] = aa[i__]; /* L10: */ } ldas = lda; i__4 = lx; for (i__ = 1; i__ <= i__4; ++i__) { xs[i__] = xx[i__]; /* L20: */ } incxs = incx; /* Call the subroutine. */ if (full) { if (*trace) { io___324.ciunit = *ntra; s_wsfe(&io___324); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer) ); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)) ; do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof( doublereal)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof( integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } dsyr_(uplo, &n, &alpha, &xx[1], &incx, &aa[1], &lda, ( ftnlen)1); } else if (packed) { if (*trace) { io___325.ciunit = *ntra; s_wsfe(&io___325); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer) ); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)) ; do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof( doublereal)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof( integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } dspr_(uplo, &n, &alpha, &xx[1], &incx, &aa[1], ( ftnlen)1); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___326.ciunit = *nout; s_wsfe(&io___326); e_wsfe(); *fatal = TRUE_; goto L120; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)uplo == *(unsigned char *) uplos; isame[1] = ns == n; isame[2] = als == alpha; isame[3] = lde_(&xs[1], &xx[1], &lx); isame[4] = incxs == incx; if (null) { isame[5] = lde_(&as[1], &aa[1], &laa); } else { isame[5] = lderes_(sname + 1, uplo, &n, &n, &as[1], & aa[1], &lda, (ftnlen)2, (ftnlen)1); } if (! packed) { isame[6] = ldas == lda; } /* If data was incorrectly changed, report and return. */ same = TRUE_; i__4 = nargs; for (i__ = 1; i__ <= i__4; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___329.ciunit = *nout; s_wsfe(&io___329); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof( integer)); e_wsfe(); } /* L30: */ } if (! same) { *fatal = TRUE_; goto L120; } if (! null) { /* Check the result column by column. */ if (incx > 0) { i__4 = n; for (i__ = 1; i__ <= i__4; ++i__) { z__[i__] = x[i__]; /* L40: */ } } else { i__4 = n; for (i__ = 1; i__ <= i__4; ++i__) { z__[i__] = x[n - i__ + 1]; /* L50: */ } } ja = 1; i__4 = n; for (j = 1; j <= i__4; ++j) { w[0] = z__[j]; if (upper) { jj = 1; lj = j; } else { jj = j; lj = n - j + 1; } dmvch_("N", &lj, &c__1, &alpha, &z__[jj], &lj, w, &c__1, &c_b128, &a[jj + j * a_dim1], & c__1, &yt[1], &g[1], &aa[ja], eps, &err, fatal, nout, &c_true, (ftnlen)1); if (full) { if (upper) { ja += lda; } else { ja = ja + lda + 1; } } else { ja += lj; } errmax = max(errmax,err); /* If got really bad answer, report and return. */ if (*fatal) { goto L110; } /* L60: */ } } else { /* Avoid repeating tests if N.le.0. */ if (n <= 0) { goto L100; } } /* L70: */ } /* L80: */ } /* L90: */ } L100: ; } /* Report result. */ if (errmax < *thresh) { io___336.ciunit = *nout; s_wsfe(&io___336); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___337.ciunit = *nout; s_wsfe(&io___337); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(doublereal)); e_wsfe(); } goto L130; L110: io___338.ciunit = *nout; s_wsfe(&io___338); do_fio(&c__1, (char *)&j, (ftnlen)sizeof(integer)); e_wsfe(); L120: io___339.ciunit = *nout; s_wsfe(&io___339); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); if (full) { io___340.ciunit = *nout; s_wsfe(&io___340); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); e_wsfe(); } else if (packed) { io___341.ciunit = *nout; s_wsfe(&io___341); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); e_wsfe(); } L130: return 0; /* End of DCHK5. */ } /* dchk5_ */ /* Subroutine */ int dchk6_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * fatal, integer *nidim, integer *idim, integer *nalf, doublereal *alf, integer *ninc, integer *inc, integer *nmax, integer *incmax, doublereal *a, doublereal *aa, doublereal *as, doublereal *x, doublereal *xx, doublereal *xs, doublereal *y, doublereal *yy, doublereal *ys, doublereal *yt, doublereal *g, doublereal *z__, ftnlen sname_len) { /* Initialized data */ static char ich[2] = "UL"; /* Format strings */ static char fmt_9993[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "i3,\002,\002,f4.1,\002, X,\002,i2,\002, Y,\002,i2,\002, A,\002,i" "3,\002) .\002)"; static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "i3,\002,\002,f4.1,\002, X,\002,i2,\002, Y,\002,i2,\002, AP) " " .\002)"; static char fmt_9992[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9995[] = "(\002 THESE ARE THE RESULTS FOR COLUMN" " \002,i3)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5, i__6; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, j, n; doublereal w[2]; integer ia, ja, ic, nc, jj, lj, in, ix, iy, ns, lx, ly, laa, lda; extern logical lde_(doublereal *, doublereal *, integer *); doublereal als, err; integer ldas; logical same; integer incx, incy; logical full, null; char uplo[1]; extern /* Subroutine */ int dspr2_(char *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, ftnlen), dsyr2_(char *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, ftnlen), dmake_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, logical *, doublereal *, ftnlen, ftnlen, ftnlen); doublereal alpha; logical isame[13]; extern /* Subroutine */ int dmvch_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen); integer nargs; logical reset; integer incxs, incys; logical upper; char uplos[1]; logical packed; extern logical lderes_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen); doublereal errmax, transl; /* Fortran I/O blocks */ static cilist io___373 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___374 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___375 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___378 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___385 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___386 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___387 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___388 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___389 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___390 = { 0, 0, 0, fmt_9994, 0 }; /* Tests DSYR2 and DSPR2. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; --inc; z_dim1 = *nmax; z_offset = 1 + z_dim1; z__ -= z_offset; --g; --yt; --y; --x; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --ys; --yy; --xs; --xx; /* Function Body */ /* .. Executable Statements .. */ full = *(unsigned char *)&sname[2] == 'Y'; packed = *(unsigned char *)&sname[2] == 'P'; /* Define the number of arguments. */ if (full) { nargs = 9; } else if (packed) { nargs = 8; } nc = 0; reset = TRUE_; errmax = 0.; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; /* Set LDA to 1 more than minimum value if room. */ lda = n; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L140; } if (packed) { laa = n * (n + 1) / 2; } else { laa = lda * n; } for (ic = 1; ic <= 2; ++ic) { *(unsigned char *)uplo = *(unsigned char *)&ich[ic - 1]; upper = *(unsigned char *)uplo == 'U'; i__2 = *ninc; for (ix = 1; ix <= i__2; ++ix) { incx = inc[ix]; lx = abs(incx) * n; /* Generate the vector X. */ transl = .5; i__3 = abs(incx); i__4 = n - 1; dmake_("GE", " ", " ", &c__1, &n, &x[1], &c__1, &xx[1], &i__3, &c__0, &i__4, &reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); if (n > 1) { x[n / 2] = 0.; xx[abs(incx) * (n / 2 - 1) + 1] = 0.; } i__3 = *ninc; for (iy = 1; iy <= i__3; ++iy) { incy = inc[iy]; ly = abs(incy) * n; /* Generate the vector Y. */ transl = 0.; i__4 = abs(incy); i__5 = n - 1; dmake_("GE", " ", " ", &c__1, &n, &y[1], &c__1, &yy[1], & i__4, &c__0, &i__5, &reset, &transl, (ftnlen)2, ( ftnlen)1, (ftnlen)1); if (n > 1) { y[n / 2] = 0.; yy[abs(incy) * (n / 2 - 1) + 1] = 0.; } i__4 = *nalf; for (ia = 1; ia <= i__4; ++ia) { alpha = alf[ia]; null = n <= 0 || alpha == 0.; /* Generate the matrix A. */ transl = 0.; i__5 = n - 1; i__6 = n - 1; dmake_(sname + 1, uplo, " ", &n, &n, &a[a_offset], nmax, &aa[1], &lda, &i__5, &i__6, &reset, & transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the subroutine. */ *(unsigned char *)uplos = *(unsigned char *)uplo; ns = n; als = alpha; i__5 = laa; for (i__ = 1; i__ <= i__5; ++i__) { as[i__] = aa[i__]; /* L10: */ } ldas = lda; i__5 = lx; for (i__ = 1; i__ <= i__5; ++i__) { xs[i__] = xx[i__]; /* L20: */ } incxs = incx; i__5 = ly; for (i__ = 1; i__ <= i__5; ++i__) { ys[i__] = yy[i__]; /* L30: */ } incys = incy; /* Call the subroutine. */ if (full) { if (*trace) { io___373.ciunit = *ntra; s_wsfe(&io___373); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof( doublereal)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof( integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } dsyr2_(uplo, &n, &alpha, &xx[1], &incx, &yy[1], & incy, &aa[1], &lda, (ftnlen)1); } else if (packed) { if (*trace) { io___374.ciunit = *ntra; s_wsfe(&io___374); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof( doublereal)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof( integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } dspr2_(uplo, &n, &alpha, &xx[1], &incx, &yy[1], & incy, &aa[1], (ftnlen)1); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___375.ciunit = *nout; s_wsfe(&io___375); e_wsfe(); *fatal = TRUE_; goto L160; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)uplo == *(unsigned char * )uplos; isame[1] = ns == n; isame[2] = als == alpha; isame[3] = lde_(&xs[1], &xx[1], &lx); isame[4] = incxs == incx; isame[5] = lde_(&ys[1], &yy[1], &ly); isame[6] = incys == incy; if (null) { isame[7] = lde_(&as[1], &aa[1], &laa); } else { isame[7] = lderes_(sname + 1, uplo, &n, &n, &as[1] , &aa[1], &lda, (ftnlen)2, (ftnlen)1); } if (! packed) { isame[8] = ldas == lda; } /* If data was incorrectly changed, report and return. */ same = TRUE_; i__5 = nargs; for (i__ = 1; i__ <= i__5; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___378.ciunit = *nout; s_wsfe(&io___378); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof( integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L160; } if (! null) { /* Check the result column by column. */ if (incx > 0) { i__5 = n; for (i__ = 1; i__ <= i__5; ++i__) { z__[i__ + z_dim1] = x[i__]; /* L50: */ } } else { i__5 = n; for (i__ = 1; i__ <= i__5; ++i__) { z__[i__ + z_dim1] = x[n - i__ + 1]; /* L60: */ } } if (incy > 0) { i__5 = n; for (i__ = 1; i__ <= i__5; ++i__) { z__[i__ + (z_dim1 << 1)] = y[i__]; /* L70: */ } } else { i__5 = n; for (i__ = 1; i__ <= i__5; ++i__) { z__[i__ + (z_dim1 << 1)] = y[n - i__ + 1]; /* L80: */ } } ja = 1; i__5 = n; for (j = 1; j <= i__5; ++j) { w[0] = z__[j + (z_dim1 << 1)]; w[1] = z__[j + z_dim1]; if (upper) { jj = 1; lj = j; } else { jj = j; lj = n - j + 1; } dmvch_("N", &lj, &c__2, &alpha, &z__[jj + z_dim1], nmax, w, &c__1, &c_b128, &a[ jj + j * a_dim1], &c__1, &yt[1], &g[1] , &aa[ja], eps, &err, fatal, nout, & c_true, (ftnlen)1); if (full) { if (upper) { ja += lda; } else { ja = ja + lda + 1; } } else { ja += lj; } errmax = max(errmax,err); /* If got really bad answer, report and return. */ if (*fatal) { goto L150; } /* L90: */ } } else { /* Avoid repeating tests with N.le.0. */ if (n <= 0) { goto L140; } } /* L100: */ } /* L110: */ } /* L120: */ } /* L130: */ } L140: ; } /* Report result. */ if (errmax < *thresh) { io___385.ciunit = *nout; s_wsfe(&io___385); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___386.ciunit = *nout; s_wsfe(&io___386); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(doublereal)); e_wsfe(); } goto L170; L150: io___387.ciunit = *nout; s_wsfe(&io___387); do_fio(&c__1, (char *)&j, (ftnlen)sizeof(integer)); e_wsfe(); L160: io___388.ciunit = *nout; s_wsfe(&io___388); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); if (full) { io___389.ciunit = *nout; s_wsfe(&io___389); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); e_wsfe(); } else if (packed) { io___390.ciunit = *nout; s_wsfe(&io___390); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); e_wsfe(); } L170: return 0; /* End of DCHK6. */ } /* dchk6_ */ /* Subroutine */ int dchke_(integer *isnum, char *srnamt, integer *nout, ftnlen srnamt_len) { /* Format strings */ static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE TESTS OF ERROR-E" "XITS\002)"; static char fmt_9998[] = "(\002 ******* \002,a6,\002 FAILED THE TESTS OF" " ERROR-EXITS *****\002,\002**\002)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Local variables */ doublereal a[1] /* was [1][1] */, x[1], y[1], beta; extern /* Subroutine */ int dger_(integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *), dspr_(char *, integer *, doublereal *, doublereal *, integer *, doublereal *, ftnlen), dsyr_(char *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, ftnlen), dspr2_(char *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, ftnlen), dsyr2_( char *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, ftnlen); doublereal alpha; extern /* Subroutine */ int dgbmv_(char *, integer *, integer *, integer * , integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen), dgemv_( char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen), dsbmv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen), dtbmv_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen), dtbsv_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen), dspmv_(char *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen), dtpmv_(char *, char *, char *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen, ftnlen), dtrmv_(char *, char *, char *, integer *, doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen), dtpsv_(char *, char *, char *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen, ftnlen), dsymv_(char *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen), dtrsv_( char *, char *, char *, integer *, doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen), chkxer_(char *, integer *, integer *, logical *, logical *, ftnlen); /* Fortran I/O blocks */ static cilist io___396 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___397 = { 0, 0, 0, fmt_9998, 0 }; /* Tests the error exits from the Level 2 Blas. */ /* Requires a special version of the error-handling routine XERBLA. */ /* ALPHA, BETA, A, X and Y should not need to be defined. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Subroutines .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ /* OK is set to .FALSE. by the special version of XERBLA or by CHKXER */ /* if anything is wrong. */ infoc_1.ok = TRUE_; /* LERR is set to .TRUE. by the special version of XERBLA each time */ /* it is called, and is then tested and re-set by CHKXER. */ infoc_1.lerr = FALSE_; switch (*isnum) { case 1: goto L10; case 2: goto L20; case 3: goto L30; case 4: goto L40; case 5: goto L50; case 6: goto L60; case 7: goto L70; case 8: goto L80; case 9: goto L90; case 10: goto L100; case 11: goto L110; case 12: goto L120; case 13: goto L130; case 14: goto L140; case 15: goto L150; case 16: goto L160; } L10: infoc_1.infot = 1; dgemv_("/", &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; dgemv_("N", &c_n1, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dgemv_("N", &c__0, &c_n1, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; dgemv_("N", &c__2, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; dgemv_("N", &c__0, &c__0, &alpha, a, &c__1, x, &c__0, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; dgemv_("N", &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__0, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L20: infoc_1.infot = 1; dgbmv_("/", &c__0, &c__0, &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; dgbmv_("N", &c_n1, &c__0, &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dgbmv_("N", &c__0, &c_n1, &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; dgbmv_("N", &c__0, &c__0, &c_n1, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dgbmv_("N", &c__2, &c__0, &c__0, &c_n1, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; dgbmv_("N", &c__0, &c__0, &c__1, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; dgbmv_("N", &c__0, &c__0, &c__0, &c__0, &alpha, a, &c__1, x, &c__0, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; dgbmv_("N", &c__0, &c__0, &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__0, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L30: infoc_1.infot = 1; dsymv_("/", &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; dsymv_("U", &c_n1, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dsymv_("U", &c__2, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; dsymv_("U", &c__0, &alpha, a, &c__1, x, &c__0, &beta, y, &c__1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; dsymv_("U", &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__0, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L40: infoc_1.infot = 1; dsbmv_("/", &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; dsbmv_("U", &c_n1, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dsbmv_("U", &c__0, &c_n1, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; dsbmv_("U", &c__0, &c__1, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; dsbmv_("U", &c__0, &c__0, &alpha, a, &c__1, x, &c__0, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; dsbmv_("U", &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__0, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L50: infoc_1.infot = 1; dspmv_("/", &c__0, &alpha, a, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; dspmv_("U", &c_n1, &alpha, a, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; dspmv_("U", &c__0, &alpha, a, x, &c__0, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dspmv_("U", &c__0, &alpha, a, x, &c__1, &beta, y, &c__0, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L60: infoc_1.infot = 1; dtrmv_("/", "N", "N", &c__0, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; dtrmv_("U", "/", "N", &c__0, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dtrmv_("U", "N", "/", &c__0, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; dtrmv_("U", "N", "N", &c_n1, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; dtrmv_("U", "N", "N", &c__2, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; dtrmv_("U", "N", "N", &c__0, a, &c__1, x, &c__0, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L70: infoc_1.infot = 1; dtbmv_("/", "N", "N", &c__0, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; dtbmv_("U", "/", "N", &c__0, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dtbmv_("U", "N", "/", &c__0, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; dtbmv_("U", "N", "N", &c_n1, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dtbmv_("U", "N", "N", &c__0, &c_n1, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; dtbmv_("U", "N", "N", &c__0, &c__1, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dtbmv_("U", "N", "N", &c__0, &c__0, a, &c__1, x, &c__0, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L80: infoc_1.infot = 1; dtpmv_("/", "N", "N", &c__0, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; dtpmv_("U", "/", "N", &c__0, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dtpmv_("U", "N", "/", &c__0, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; dtpmv_("U", "N", "N", &c_n1, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; dtpmv_("U", "N", "N", &c__0, a, x, &c__0, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L90: infoc_1.infot = 1; dtrsv_("/", "N", "N", &c__0, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; dtrsv_("U", "/", "N", &c__0, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dtrsv_("U", "N", "/", &c__0, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; dtrsv_("U", "N", "N", &c_n1, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; dtrsv_("U", "N", "N", &c__2, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; dtrsv_("U", "N", "N", &c__0, a, &c__1, x, &c__0, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L100: infoc_1.infot = 1; dtbsv_("/", "N", "N", &c__0, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; dtbsv_("U", "/", "N", &c__0, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dtbsv_("U", "N", "/", &c__0, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; dtbsv_("U", "N", "N", &c_n1, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dtbsv_("U", "N", "N", &c__0, &c_n1, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; dtbsv_("U", "N", "N", &c__0, &c__1, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dtbsv_("U", "N", "N", &c__0, &c__0, a, &c__1, x, &c__0, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L110: infoc_1.infot = 1; dtpsv_("/", "N", "N", &c__0, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; dtpsv_("U", "/", "N", &c__0, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dtpsv_("U", "N", "/", &c__0, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; dtpsv_("U", "N", "N", &c_n1, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; dtpsv_("U", "N", "N", &c__0, a, x, &c__0, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L120: infoc_1.infot = 1; dger_(&c_n1, &c__0, &alpha, x, &c__1, y, &c__1, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; dger_(&c__0, &c_n1, &alpha, x, &c__1, y, &c__1, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dger_(&c__0, &c__0, &alpha, x, &c__0, y, &c__1, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; dger_(&c__0, &c__0, &alpha, x, &c__1, y, &c__0, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dger_(&c__2, &c__0, &alpha, x, &c__1, y, &c__1, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L130: infoc_1.infot = 1; dsyr_("/", &c__0, &alpha, x, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; dsyr_("U", &c_n1, &alpha, x, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dsyr_("U", &c__0, &alpha, x, &c__0, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; dsyr_("U", &c__2, &alpha, x, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L140: infoc_1.infot = 1; dspr_("/", &c__0, &alpha, x, &c__1, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; dspr_("U", &c_n1, &alpha, x, &c__1, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dspr_("U", &c__0, &alpha, x, &c__0, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L150: infoc_1.infot = 1; dsyr2_("/", &c__0, &alpha, x, &c__1, y, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; dsyr2_("U", &c_n1, &alpha, x, &c__1, y, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dsyr2_("U", &c__0, &alpha, x, &c__0, y, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; dsyr2_("U", &c__0, &alpha, x, &c__1, y, &c__0, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dsyr2_("U", &c__2, &alpha, x, &c__1, y, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L160: infoc_1.infot = 1; dspr2_("/", &c__0, &alpha, x, &c__1, y, &c__1, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; dspr2_("U", &c_n1, &alpha, x, &c__1, y, &c__1, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dspr2_("U", &c__0, &alpha, x, &c__0, y, &c__1, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; dspr2_("U", &c__0, &alpha, x, &c__1, y, &c__0, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); L170: if (infoc_1.ok) { io___396.ciunit = *nout; s_wsfe(&io___396); do_fio(&c__1, srnamt, (ftnlen)6); e_wsfe(); } else { io___397.ciunit = *nout; s_wsfe(&io___397); do_fio(&c__1, srnamt, (ftnlen)6); e_wsfe(); } return 0; /* End of DCHKE. */ } /* dchke_ */ /* Subroutine */ int dmake_(char *type__, char *uplo, char *diag, integer *m, integer *n, doublereal *a, integer *nmax, doublereal *aa, integer * lda, integer *kl, integer *ku, logical *reset, doublereal *transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen); /* Local variables */ integer i__, j, i1, i2, i3, kk; logical gen, tri, sym; extern doublereal dbeg_(logical *); integer ibeg, iend, ioff; logical unit, lower, upper; /* Generates values for an M by N matrix A within the bandwidth */ /* defined by KL and KU. */ /* Stores the values in the array AA in the data structure required */ /* by the routine, with unwanted elements set to rogue value. */ /* TYPE is 'GE', 'GB', 'SY', 'SB', 'SP', 'TR', 'TB' OR 'TP'. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. Intrinsic Functions .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --aa; /* Function Body */ gen = *(unsigned char *)type__ == 'G'; sym = *(unsigned char *)type__ == 'S'; tri = *(unsigned char *)type__ == 'T'; upper = (sym || tri) && *(unsigned char *)uplo == 'U'; lower = (sym || tri) && *(unsigned char *)uplo == 'L'; unit = tri && *(unsigned char *)diag == 'U'; /* Generate data in array A. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { if (gen || upper && i__ <= j || lower && i__ >= j) { if (i__ <= j && j - i__ <= *ku || i__ >= j && i__ - j <= *kl) { a[i__ + j * a_dim1] = dbeg_(reset) + *transl; } else { a[i__ + j * a_dim1] = 0.; } if (i__ != j) { if (sym) { a[j + i__ * a_dim1] = a[i__ + j * a_dim1]; } else if (tri) { a[j + i__ * a_dim1] = 0.; } } } /* L10: */ } if (tri) { a[j + j * a_dim1] += 1.; } if (unit) { a[j + j * a_dim1] = 1.; } /* L20: */ } /* Store elements in array AS in data structure required by routine. */ if (s_cmp(type__, "GE", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { aa[i__ + (j - 1) * *lda] = a[i__ + j * a_dim1]; /* L30: */ } i__2 = *lda; for (i__ = *m + 1; i__ <= i__2; ++i__) { aa[i__ + (j - 1) * *lda] = -1e10; /* L40: */ } /* L50: */ } } else if (s_cmp(type__, "GB", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *ku + 1 - j; for (i1 = 1; i1 <= i__2; ++i1) { aa[i1 + (j - 1) * *lda] = -1e10; /* L60: */ } /* Computing MIN */ i__3 = *kl + *ku + 1, i__4 = *ku + 1 + *m - j; i__2 = min(i__3,i__4); for (i2 = i1; i2 <= i__2; ++i2) { aa[i2 + (j - 1) * *lda] = a[i2 + j - *ku - 1 + j * a_dim1]; /* L70: */ } i__2 = *lda; for (i3 = i2; i3 <= i__2; ++i3) { aa[i3 + (j - 1) * *lda] = -1e10; /* L80: */ } /* L90: */ } } else if (s_cmp(type__, "SY", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(type__, "TR", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (upper) { ibeg = 1; if (unit) { iend = j - 1; } else { iend = j; } } else { if (unit) { ibeg = j + 1; } else { ibeg = j; } iend = *n; } i__2 = ibeg - 1; for (i__ = 1; i__ <= i__2; ++i__) { aa[i__ + (j - 1) * *lda] = -1e10; /* L100: */ } i__2 = iend; for (i__ = ibeg; i__ <= i__2; ++i__) { aa[i__ + (j - 1) * *lda] = a[i__ + j * a_dim1]; /* L110: */ } i__2 = *lda; for (i__ = iend + 1; i__ <= i__2; ++i__) { aa[i__ + (j - 1) * *lda] = -1e10; /* L120: */ } /* L130: */ } } else if (s_cmp(type__, "SB", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(type__, "TB", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (upper) { kk = *kl + 1; /* Computing MAX */ i__2 = 1, i__3 = *kl + 2 - j; ibeg = max(i__2,i__3); if (unit) { iend = *kl; } else { iend = *kl + 1; } } else { kk = 1; if (unit) { ibeg = 2; } else { ibeg = 1; } /* Computing MIN */ i__2 = *kl + 1, i__3 = *m + 1 - j; iend = min(i__2,i__3); } i__2 = ibeg - 1; for (i__ = 1; i__ <= i__2; ++i__) { aa[i__ + (j - 1) * *lda] = -1e10; /* L140: */ } i__2 = iend; for (i__ = ibeg; i__ <= i__2; ++i__) { aa[i__ + (j - 1) * *lda] = a[i__ + j - kk + j * a_dim1]; /* L150: */ } i__2 = *lda; for (i__ = iend + 1; i__ <= i__2; ++i__) { aa[i__ + (j - 1) * *lda] = -1e10; /* L160: */ } /* L170: */ } } else if (s_cmp(type__, "SP", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(type__, "TP", (ftnlen)2, (ftnlen)2) == 0) { ioff = 0; i__1 = *n; for (j = 1; j <= i__1; ++j) { if (upper) { ibeg = 1; iend = j; } else { ibeg = j; iend = *n; } i__2 = iend; for (i__ = ibeg; i__ <= i__2; ++i__) { ++ioff; aa[ioff] = a[i__ + j * a_dim1]; if (i__ == j) { if (unit) { aa[ioff] = -1e10; } } /* L180: */ } /* L190: */ } } return 0; /* End of DMAKE. */ } /* dmake_ */ /* Subroutine */ int dmvch_(char *trans, integer *m, integer *n, doublereal * alpha, doublereal *a, integer *nmax, doublereal *x, integer *incx, doublereal *beta, doublereal *y, integer *incy, doublereal *yt, doublereal *g, doublereal *yy, doublereal *eps, doublereal *err, logical *fatal, integer *nout, logical *mv, ftnlen trans_len) { /* Format strings */ static char fmt_9999[] = "(\002 ******* FATAL ERROR - COMPUTED RESULT IS" " LESS THAN HAL\002,\002F ACCURATE *******\002,/\002 EX" "PECTED RESULT COMPU\002,\002TED RESULT\002)"; static char fmt_9998[] = "(1x,i7,2g18.6)"; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; doublereal d__1; /* Builtin functions */ double sqrt(doublereal); integer s_wsfe(cilist *), e_wsfe(void), do_fio(integer *, char *, ftnlen); /* Local variables */ integer i__, j, ml, nl, iy, jx, kx, ky; doublereal erri; logical tran; integer incxl, incyl; /* Fortran I/O blocks */ static cilist io___425 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___426 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___427 = { 0, 0, 0, fmt_9998, 0 }; /* Checks the results of the computational tests. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Intrinsic Functions .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --x; --y; --yt; --g; --yy; /* Function Body */ tran = *(unsigned char *)trans == 'T' || *(unsigned char *)trans == 'C'; if (tran) { ml = *n; nl = *m; } else { ml = *m; nl = *n; } if (*incx < 0) { kx = nl; incxl = -1; } else { kx = 1; incxl = 1; } if (*incy < 0) { ky = ml; incyl = -1; } else { ky = 1; incyl = 1; } /* Compute expected result in YT using data in A, X and Y. */ /* Compute gauges in G. */ iy = ky; i__1 = ml; for (i__ = 1; i__ <= i__1; ++i__) { yt[iy] = 0.; g[iy] = 0.; jx = kx; if (tran) { i__2 = nl; for (j = 1; j <= i__2; ++j) { yt[iy] += a[j + i__ * a_dim1] * x[jx]; g[iy] += (d__1 = a[j + i__ * a_dim1] * x[jx], abs(d__1)); jx += incxl; /* L10: */ } } else { i__2 = nl; for (j = 1; j <= i__2; ++j) { yt[iy] += a[i__ + j * a_dim1] * x[jx]; g[iy] += (d__1 = a[i__ + j * a_dim1] * x[jx], abs(d__1)); jx += incxl; /* L20: */ } } yt[iy] = *alpha * yt[iy] + *beta * y[iy]; g[iy] = abs(*alpha) * g[iy] + (d__1 = *beta * y[iy], abs(d__1)); iy += incyl; /* L30: */ } /* Compute the error ratio for this result. */ *err = 0.; i__1 = ml; for (i__ = 1; i__ <= i__1; ++i__) { erri = (d__1 = yt[i__] - yy[(i__ - 1) * abs(*incy) + 1], abs(d__1)) / *eps; if (g[i__] != 0.) { erri /= g[i__]; } *err = max(*err,erri); if (*err * sqrt(*eps) >= 1.) { goto L50; } /* L40: */ } /* If the loop completes, all results are at least half accurate. */ goto L70; /* Report fatal error. */ L50: *fatal = TRUE_; io___425.ciunit = *nout; s_wsfe(&io___425); e_wsfe(); i__1 = ml; for (i__ = 1; i__ <= i__1; ++i__) { if (*mv) { io___426.ciunit = *nout; s_wsfe(&io___426); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&yt[i__], (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&yy[(i__ - 1) * abs(*incy) + 1], (ftnlen) sizeof(doublereal)); e_wsfe(); } else { io___427.ciunit = *nout; s_wsfe(&io___427); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&yy[(i__ - 1) * abs(*incy) + 1], (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&yt[i__], (ftnlen)sizeof(doublereal)); e_wsfe(); } /* L60: */ } L70: return 0; /* End of DMVCH. */ } /* dmvch_ */ logical lde_(doublereal *ri, doublereal *rj, integer *lr) { /* System generated locals */ integer i__1; logical ret_val; /* Local variables */ integer i__; /* Tests if two arrays are identical. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --rj; --ri; /* Function Body */ i__1 = *lr; for (i__ = 1; i__ <= i__1; ++i__) { if (ri[i__] != rj[i__]) { goto L20; } /* L10: */ } ret_val = TRUE_; goto L30; L20: ret_val = FALSE_; L30: return ret_val; /* End of LDE. */ } /* lde_ */ logical lderes_(char *type__, char *uplo, integer *m, integer *n, doublereal * aa, doublereal *as, integer *lda, ftnlen type_len, ftnlen uplo_len) { /* System generated locals */ integer aa_dim1, aa_offset, as_dim1, as_offset, i__1, i__2; logical ret_val; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen); /* Local variables */ integer i__, j, ibeg, iend; logical upper; /* Tests if selected elements in two arrays are equal. */ /* TYPE is 'GE', 'SY' or 'SP'. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ as_dim1 = *lda; as_offset = 1 + as_dim1; as -= as_offset; aa_dim1 = *lda; aa_offset = 1 + aa_dim1; aa -= aa_offset; /* Function Body */ upper = *(unsigned char *)uplo == 'U'; if (s_cmp(type__, "GE", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *lda; for (i__ = *m + 1; i__ <= i__2; ++i__) { if (aa[i__ + j * aa_dim1] != as[i__ + j * as_dim1]) { goto L70; } /* L10: */ } /* L20: */ } } else if (s_cmp(type__, "SY", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (upper) { ibeg = 1; iend = j; } else { ibeg = j; iend = *n; } i__2 = ibeg - 1; for (i__ = 1; i__ <= i__2; ++i__) { if (aa[i__ + j * aa_dim1] != as[i__ + j * as_dim1]) { goto L70; } /* L30: */ } i__2 = *lda; for (i__ = iend + 1; i__ <= i__2; ++i__) { if (aa[i__ + j * aa_dim1] != as[i__ + j * as_dim1]) { goto L70; } /* L40: */ } /* L50: */ } } ret_val = TRUE_; goto L80; L70: ret_val = FALSE_; L80: return ret_val; /* End of LDERES. */ } /* lderes_ */ doublereal dbeg_(logical *reset) { /* System generated locals */ doublereal ret_val; /* Local variables */ static integer i__, ic, mi; /* Generates random numbers uniformly distributed between -0.5 and 0.5. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Scalar Arguments .. */ /* .. Local Scalars .. */ /* .. Save statement .. */ /* .. Intrinsic Functions .. */ /* .. Executable Statements .. */ if (*reset) { /* Initialize local variables. */ mi = 891; i__ = 7; ic = 0; *reset = FALSE_; } /* The sequence of values of I is bounded between 1 and 999. */ /* If initial I = 1,2,3,6,7 or 9, the period will be 50. */ /* If initial I = 4 or 8, the period will be 25. */ /* If initial I = 5, the period will be 10. */ /* IC is used to break up the period by skipping 1 value of I in 6. */ ++ic; L10: i__ *= mi; i__ -= i__ / 1000 * 1000; if (ic >= 5) { ic = 0; goto L10; } ret_val = (doublereal) (i__ - 500) / 1001.; return ret_val; /* End of DBEG. */ } /* dbeg_ */ doublereal ddiff_(doublereal *x, doublereal *y) { /* System generated locals */ doublereal ret_val; /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* .. Scalar Arguments .. */ /* .. Executable Statements .. */ ret_val = *x - *y; return ret_val; /* End of DDIFF. */ } /* ddiff_ */ /* Subroutine */ int chkxer_(char *srnamt, integer *infot, integer *nout, logical *lerr, logical *ok, ftnlen srnamt_len) { /* Format strings */ static char fmt_9999[] = "(\002 ***** ILLEGAL VALUE OF PARAMETER NUMBER" " \002,i2,\002 NOT D\002,\002ETECTED BY \002,a6,\002 *****\002)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Fortran I/O blocks */ static cilist io___437 = { 0, 0, 0, fmt_9999, 0 }; /* Tests whether XERBLA has detected an error when it should. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Scalar Arguments .. */ /* .. Executable Statements .. */ if (! (*lerr)) { io___437.ciunit = *nout; s_wsfe(&io___437); do_fio(&c__1, (char *)&(*infot), (ftnlen)sizeof(integer)); do_fio(&c__1, srnamt, (ftnlen)6); e_wsfe(); *ok = FALSE_; } *lerr = FALSE_; return 0; /* End of CHKXER. */ } /* chkxer_ */ /* Subroutine */ int xerbla_(char *srname, integer *info, ftnlen srname_len) { /* Format strings */ static char fmt_9999[] = "(\002 ******* XERBLA WAS CALLED WITH INFO =" " \002,i6,\002 INSTEAD\002,\002 OF \002,i2,\002 *******\002)"; static char fmt_9997[] = "(\002 ******* XERBLA WAS CALLED WITH INFO =" " \002,i6,\002 *******\002)"; static char fmt_9998[] = "(\002 ******* XERBLA WAS CALLED WITH SRNAME =" " \002,a6,\002 INSTE\002,\002AD OF \002,a6,\002 *******\002)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), s_cmp(const char *, const char *, ftnlen, ftnlen); /* Fortran I/O blocks */ static cilist io___438 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___439 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___440 = { 0, 0, 0, fmt_9998, 0 }; /* This is a special version of XERBLA to be used only as part of */ /* the test program for testing error exits from the Level 2 BLAS */ /* routines. */ /* XERBLA is an error handler for the Level 2 BLAS routines. */ /* It is called by the Level 2 BLAS routines if an input parameter is */ /* invalid. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ infoc_2.lerr = TRUE_; if (*info != infoc_2.infot) { if (infoc_2.infot != 0) { io___438.ciunit = infoc_2.nout; s_wsfe(&io___438); do_fio(&c__1, (char *)&(*info), (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&infoc_2.infot, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___439.ciunit = infoc_2.nout; s_wsfe(&io___439); do_fio(&c__1, (char *)&(*info), (ftnlen)sizeof(integer)); e_wsfe(); } infoc_2.ok = FALSE_; } if (s_cmp(srname, srnamc_1.srnamt, (ftnlen)6, (ftnlen)6) != 0) { io___440.ciunit = infoc_2.nout; s_wsfe(&io___440); do_fio(&c__1, srname, (ftnlen)6); do_fio(&c__1, srnamc_1.srnamt, (ftnlen)6); e_wsfe(); infoc_2.ok = FALSE_; } return 0; /* End of XERBLA */ } /* xerbla_ */ /* Main program alias */ int dblat2_ () { main (); return 0; } blis-0.6.1/blastest/src/dblat3.c000066400000000000000000004360641360743507500164230ustar00rootroot00000000000000/* dblat3.f -- translated by f2c (version 20100827). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" /* Common Block Declarations */ union { struct { integer infot, noutc; logical ok, lerr; } _1; struct { integer infot, nout; logical ok, lerr; } _2; } infoc_; #define infoc_1 (infoc_._1) #define infoc_2 (infoc_._2) struct { char srnamt[6]; } srnamc_; #define srnamc_1 srnamc_ /* Table of constant values */ static integer c__9 = 9; static integer c__1 = 1; static integer c__3 = 3; static integer c__8 = 8; static integer c__5 = 5; static integer c__65 = 65; static integer c__7 = 7; static doublereal c_b86 = 0.; static doublereal c_b96 = 1.; static logical c_true = TRUE_; static logical c_false = FALSE_; static integer c__0 = 0; static integer c_n1 = -1; static integer c__2 = 2; /* > \brief \b DBLAT3 */ /* =========== DOCUMENTATION =========== */ /* Online html documentation available at */ /* http://www.netlib.org/lapack/explore-html/ */ /* Definition: */ /* =========== */ /* PROGRAM DBLAT3 */ /* > \par Purpose: */ /* ============= */ /* > */ /* > \verbatim */ /* > */ /* > Test program for the DOUBLE PRECISION Level 3 Blas. */ /* > */ /* > The program must be driven by a short data file. The first 14 records */ /* > of the file are read using list-directed input, the last 6 records */ /* > are read using the format ( A6, L2 ). An annotated example of a data */ /* > file can be obtained by deleting the first 3 characters from the */ /* > following 20 lines: */ /* > 'dblat3.out' NAME OF SUMMARY OUTPUT FILE */ /* > 6 UNIT NUMBER OF SUMMARY FILE */ /* > 'DBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE */ /* > -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) */ /* > F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. */ /* > F LOGICAL FLAG, T TO STOP ON FAILURES. */ /* > T LOGICAL FLAG, T TO TEST ERROR EXITS. */ /* > 16.0 THRESHOLD VALUE OF TEST RATIO */ /* > 6 NUMBER OF VALUES OF N */ /* > 0 1 2 3 5 9 VALUES OF N */ /* > 3 NUMBER OF VALUES OF ALPHA */ /* > 0.0 1.0 0.7 VALUES OF ALPHA */ /* > 3 NUMBER OF VALUES OF BETA */ /* > 0.0 1.0 1.3 VALUES OF BETA */ /* > DGEMM T PUT F FOR NO TEST. SAME COLUMNS. */ /* > DSYMM T PUT F FOR NO TEST. SAME COLUMNS. */ /* > DTRMM T PUT F FOR NO TEST. SAME COLUMNS. */ /* > DTRSM T PUT F FOR NO TEST. SAME COLUMNS. */ /* > DSYRK T PUT F FOR NO TEST. SAME COLUMNS. */ /* > DSYR2K T PUT F FOR NO TEST. SAME COLUMNS. */ /* > */ /* > Further Details */ /* > =============== */ /* > */ /* > See: */ /* > */ /* > Dongarra J. J., Du Croz J. J., Duff I. S. and Hammarling S. */ /* > A Set of Level 3 Basic Linear Algebra Subprograms. */ /* > */ /* > Technical Memorandum No.88 (Revision 1), Mathematics and */ /* > Computer Science Division, Argonne National Laboratory, 9700 */ /* > South Cass Avenue, Argonne, Illinois 60439, US. */ /* > */ /* > -- Written on 8-February-1989. */ /* > Jack Dongarra, Argonne National Laboratory. */ /* > Iain Duff, AERE Harwell. */ /* > Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* > Sven Hammarling, Numerical Algorithms Group Ltd. */ /* > */ /* > 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers */ /* > can be run multiple times without deleting generated */ /* > output files (susan) */ /* > \endverbatim */ /* Authors: */ /* ======== */ /* > \author Univ. of Tennessee */ /* > \author Univ. of California Berkeley */ /* > \author Univ. of Colorado Denver */ /* > \author NAG Ltd. */ /* > \date April 2012 */ /* > \ingroup double_blas_testing */ /* ===================================================================== */ /* Main program */ int main(void) { /* Initialized data */ static char snames[6*6] = "DGEMM " "DSYMM " "DTRMM " "DTRSM " "DSYRK " "DSYR2K"; /* Format strings */ static char fmt_9997[] = "(\002 NUMBER OF VALUES OF \002,a,\002 IS LESS " "THAN 1 OR GREATER \002,\002THAN \002,i2)"; static char fmt_9996[] = "(\002 VALUE OF N IS LESS THAN 0 OR GREATER THA" "N \002,i2)"; static char fmt_9995[] = "(\002 TESTS OF THE DOUBLE PRECISION LEVEL 3 BL" "AS\002,//\002 THE F\002,\002OLLOWING PARAMETER VALUES WILL BE US" "ED:\002)"; static char fmt_9994[] = "(\002 FOR N \002,9i6)"; static char fmt_9993[] = "(\002 FOR ALPHA \002,7f6.1)"; static char fmt_9992[] = "(\002 FOR BETA \002,7f6.1)"; static char fmt_9984[] = "(\002 ERROR-EXITS WILL NOT BE TESTED\002)"; static char fmt_9999[] = "(\002 ROUTINES PASS COMPUTATIONAL TESTS IF TES" "T RATIO IS LES\002,\002S THAN\002,f8.2)"; static char fmt_9988[] = "(a6,l2)"; static char fmt_9990[] = "(\002 SUBPROGRAM NAME \002,a6,\002 NOT RECOGNI" "ZED\002,/\002 ******* T\002,\002ESTS ABANDONED *******\002)"; static char fmt_9998[] = "(\002 RELATIVE MACHINE PRECISION IS TAKEN TO" " BE\002,1p,d9.1)"; static char fmt_9989[] = "(\002 ERROR IN DMMCH - IN-LINE DOT PRODUCTS A" "RE BEING EVALU\002,\002ATED WRONGLY.\002,/\002 DMMCH WAS CALLED " "WITH TRANSA = \002,a1,\002 AND TRANSB = \002,a1,/\002 AND RETURN" "ED SAME = \002,l1,\002 AND \002,\002ERR = \002,f12.3,\002.\002," "/\002 THIS MAY BE DUE TO FAULTS IN THE \002,\002ARITHMETIC OR TH" "E COMPILER.\002,/\002 ******* TESTS ABANDONED \002,\002******" "*\002)"; static char fmt_9987[] = "(1x,a6,\002 WAS NOT TESTED\002)"; static char fmt_9986[] = "(/\002 END OF TESTS\002)"; static char fmt_9985[] = "(/\002 ******* FATAL ERROR - TESTS ABANDONED *" "******\002)"; static char fmt_9991[] = "(\002 AMEND DATA FILE OR INCREASE ARRAY SIZES " "IN PROGRAM\002,/\002 ******* TESTS ABANDONED *******\002)"; /* System generated locals */ integer i__1, i__2, i__3; olist o__1; cllist cl__1; /* Builtin functions */ integer s_rsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_rsle(void), f_open(olist *), s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), s_wsle(cilist *), e_wsle(void), s_rsfe(cilist *), e_rsfe(void), s_cmp(const char *, const char *, ftnlen, ftnlen); /* Subroutine */ int s_stop(char *, ftnlen); integer f_clos(cllist *); /* Subroutine */ int s_copy(char *, const char *, ftnlen, ftnlen); /* Local variables */ doublereal c__[4225] /* was [65][65] */, g[65]; integer i__, j, n; doublereal w[130], aa[4225], ab[8450] /* was [65][130] */, bb[4225], cc[4225], as[4225], bs[4225], cs[4225], ct[65], alf[7]; extern logical lde_(doublereal *, doublereal *, integer *); doublereal bet[7], eps, err; integer nalf, idim[9]; logical same; integer nbet, ntra; logical rewi; integer nout; extern /* Subroutine */ int dchk1_(char *, doublereal *, doublereal *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, ftnlen), dchk2_(char *, doublereal *, doublereal *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, ftnlen), dchk3_(char *, doublereal *, doublereal *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, ftnlen), dchk4_(char *, doublereal *, doublereal *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, ftnlen), dchk5_(char *, doublereal *, doublereal *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, ftnlen), dchke_(integer *, char *, integer *, ftnlen); logical fatal; extern /* Subroutine */ int dmmch_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen, ftnlen); logical trace; integer nidim; char snaps[32]; integer isnum; logical ltest[6], sfatal; char snamet[6], transa[1], transb[1]; doublereal thresh; logical ltestt, tsterr; char summry[32]; extern double d_epsilon_(doublereal *); /* Fortran I/O blocks */ static cilist io___2 = { 0, 5, 0, 0, 0 }; static cilist io___4 = { 0, 5, 0, 0, 0 }; static cilist io___6 = { 0, 5, 0, 0, 0 }; static cilist io___8 = { 0, 5, 0, 0, 0 }; static cilist io___11 = { 0, 5, 0, 0, 0 }; static cilist io___13 = { 0, 5, 0, 0, 0 }; static cilist io___15 = { 0, 5, 0, 0, 0 }; static cilist io___17 = { 0, 5, 0, 0, 0 }; static cilist io___19 = { 0, 5, 0, 0, 0 }; static cilist io___21 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___22 = { 0, 5, 0, 0, 0 }; static cilist io___25 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___26 = { 0, 5, 0, 0, 0 }; static cilist io___28 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___29 = { 0, 5, 0, 0, 0 }; static cilist io___31 = { 0, 5, 0, 0, 0 }; static cilist io___33 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___34 = { 0, 5, 0, 0, 0 }; static cilist io___36 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___37 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___38 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___39 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___40 = { 0, 0, 0, 0, 0 }; static cilist io___41 = { 0, 0, 0, fmt_9984, 0 }; static cilist io___42 = { 0, 0, 0, 0, 0 }; static cilist io___43 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___44 = { 0, 0, 0, 0, 0 }; static cilist io___46 = { 0, 5, 1, fmt_9988, 0 }; static cilist io___49 = { 0, 0, 0, fmt_9990, 0 }; static cilist io___51 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___64 = { 0, 0, 0, fmt_9989, 0 }; static cilist io___65 = { 0, 0, 0, fmt_9989, 0 }; static cilist io___66 = { 0, 0, 0, fmt_9989, 0 }; static cilist io___67 = { 0, 0, 0, fmt_9989, 0 }; static cilist io___69 = { 0, 0, 0, 0, 0 }; static cilist io___70 = { 0, 0, 0, fmt_9987, 0 }; static cilist io___71 = { 0, 0, 0, 0, 0 }; static cilist io___78 = { 0, 0, 0, fmt_9986, 0 }; static cilist io___79 = { 0, 0, 0, fmt_9985, 0 }; static cilist io___80 = { 0, 0, 0, fmt_9991, 0 }; /* -- Reference BLAS test routine (version 3.4.1) -- */ /* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* April 2012 */ /* ===================================================================== */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* .. Executable Statements .. */ /* Read name and unit number for summary output file and open file. */ s_rsle(&io___2); do_lio(&c__9, &c__1, summry, (ftnlen)32); e_rsle(); s_rsle(&io___4); do_lio(&c__3, &c__1, (char *)&nout, (ftnlen)sizeof(integer)); e_rsle(); o__1.oerr = 0; o__1.ounit = nout; o__1.ofnmlen = 32; o__1.ofnm = summry; o__1.orl = 0; o__1.osta = "UNKNOWN"; o__1.oacc = 0; o__1.ofm = 0; o__1.oblnk = 0; f_open(&o__1); infoc_1.noutc = nout; /* Read name and unit number for snapshot output file and open file. */ s_rsle(&io___6); do_lio(&c__9, &c__1, snaps, (ftnlen)32); e_rsle(); s_rsle(&io___8); do_lio(&c__3, &c__1, (char *)&ntra, (ftnlen)sizeof(integer)); e_rsle(); trace = ntra >= 0; if (trace) { o__1.oerr = 0; o__1.ounit = ntra; o__1.ofnmlen = 32; o__1.ofnm = snaps; o__1.orl = 0; o__1.osta = "UNKNOWN"; o__1.oacc = 0; o__1.ofm = 0; o__1.oblnk = 0; f_open(&o__1); } /* Read the flag that directs rewinding of the snapshot file. */ s_rsle(&io___11); do_lio(&c__8, &c__1, (char *)&rewi, (ftnlen)sizeof(logical)); e_rsle(); rewi = rewi && trace; /* Read the flag that directs stopping on any failure. */ s_rsle(&io___13); do_lio(&c__8, &c__1, (char *)&sfatal, (ftnlen)sizeof(logical)); e_rsle(); /* Read the flag that indicates whether error exits are to be tested. */ s_rsle(&io___15); do_lio(&c__8, &c__1, (char *)&tsterr, (ftnlen)sizeof(logical)); e_rsle(); /* Read the threshold value of the test ratio */ s_rsle(&io___17); do_lio(&c__5, &c__1, (char *)&thresh, (ftnlen)sizeof(doublereal)); e_rsle(); /* Read and check the parameter values for the tests. */ /* Values of N */ s_rsle(&io___19); do_lio(&c__3, &c__1, (char *)&nidim, (ftnlen)sizeof(integer)); e_rsle(); if (nidim < 1 || nidim > 9) { io___21.ciunit = nout; s_wsfe(&io___21); do_fio(&c__1, "N", (ftnlen)1); do_fio(&c__1, (char *)&c__9, (ftnlen)sizeof(integer)); e_wsfe(); goto L220; } s_rsle(&io___22); i__1 = nidim; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&idim[i__ - 1], (ftnlen)sizeof(integer)); } e_rsle(); i__1 = nidim; for (i__ = 1; i__ <= i__1; ++i__) { if (idim[i__ - 1] < 0 || idim[i__ - 1] > 65) { io___25.ciunit = nout; s_wsfe(&io___25); do_fio(&c__1, (char *)&c__65, (ftnlen)sizeof(integer)); e_wsfe(); goto L220; } /* L10: */ } /* Values of ALPHA */ s_rsle(&io___26); do_lio(&c__3, &c__1, (char *)&nalf, (ftnlen)sizeof(integer)); e_rsle(); if (nalf < 1 || nalf > 7) { io___28.ciunit = nout; s_wsfe(&io___28); do_fio(&c__1, "ALPHA", (ftnlen)5); do_fio(&c__1, (char *)&c__7, (ftnlen)sizeof(integer)); e_wsfe(); goto L220; } s_rsle(&io___29); i__1 = nalf; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__5, &c__1, (char *)&alf[i__ - 1], (ftnlen)sizeof(doublereal) ); } e_rsle(); /* Values of BETA */ s_rsle(&io___31); do_lio(&c__3, &c__1, (char *)&nbet, (ftnlen)sizeof(integer)); e_rsle(); if (nbet < 1 || nbet > 7) { io___33.ciunit = nout; s_wsfe(&io___33); do_fio(&c__1, "BETA", (ftnlen)4); do_fio(&c__1, (char *)&c__7, (ftnlen)sizeof(integer)); e_wsfe(); goto L220; } s_rsle(&io___34); i__1 = nbet; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__5, &c__1, (char *)&bet[i__ - 1], (ftnlen)sizeof(doublereal) ); } e_rsle(); /* Report values of parameters. */ io___36.ciunit = nout; s_wsfe(&io___36); e_wsfe(); io___37.ciunit = nout; s_wsfe(&io___37); i__1 = nidim; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&idim[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); io___38.ciunit = nout; s_wsfe(&io___38); i__1 = nalf; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&alf[i__ - 1], (ftnlen)sizeof(doublereal)); } e_wsfe(); io___39.ciunit = nout; s_wsfe(&io___39); i__1 = nbet; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&bet[i__ - 1], (ftnlen)sizeof(doublereal)); } e_wsfe(); if (! tsterr) { io___40.ciunit = nout; s_wsle(&io___40); e_wsle(); io___41.ciunit = nout; s_wsfe(&io___41); e_wsfe(); } io___42.ciunit = nout; s_wsle(&io___42); e_wsle(); io___43.ciunit = nout; s_wsfe(&io___43); do_fio(&c__1, (char *)&thresh, (ftnlen)sizeof(doublereal)); e_wsfe(); io___44.ciunit = nout; s_wsle(&io___44); e_wsle(); /* Read names of subroutines and flags which indicate */ /* whether they are to be tested. */ for (i__ = 1; i__ <= 6; ++i__) { ltest[i__ - 1] = FALSE_; /* L20: */ } L30: i__1 = s_rsfe(&io___46); if (i__1 != 0) { goto L60; } i__1 = do_fio(&c__1, snamet, (ftnlen)6); if (i__1 != 0) { goto L60; } i__1 = do_fio(&c__1, (char *)<estt, (ftnlen)sizeof(logical)); if (i__1 != 0) { goto L60; } i__1 = e_rsfe(); if (i__1 != 0) { goto L60; } for (i__ = 1; i__ <= 6; ++i__) { if (s_cmp(snamet, snames + (i__ - 1) * 6, (ftnlen)6, (ftnlen)6) == 0) { goto L50; } /* L40: */ } io___49.ciunit = nout; s_wsfe(&io___49); do_fio(&c__1, snamet, (ftnlen)6); e_wsfe(); s_stop("", (ftnlen)0); L50: ltest[i__ - 1] = ltestt; goto L30; L60: cl__1.cerr = 0; cl__1.cunit = 5; cl__1.csta = 0; f_clos(&cl__1); /* Compute EPS (the machine precision). */ eps = d_epsilon_(&c_b86); io___51.ciunit = nout; s_wsfe(&io___51); do_fio(&c__1, (char *)&eps, (ftnlen)sizeof(doublereal)); e_wsfe(); /* Check the reliability of DMMCH using exact data. */ n = 32; i__1 = n; for (j = 1; j <= i__1; ++j) { i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ i__3 = i__ - j + 1; ab[i__ + j * 65 - 66] = (doublereal) max(i__3,0); /* L90: */ } ab[j + 4224] = (doublereal) j; ab[(j + 65) * 65 - 65] = (doublereal) j; c__[j - 1] = 0.; /* L100: */ } i__1 = n; for (j = 1; j <= i__1; ++j) { cc[j - 1] = (doublereal) (j * ((j + 1) * j) / 2 - (j + 1) * j * (j - 1) / 3); /* L110: */ } /* CC holds the exact result. On exit from DMMCH CT holds */ /* the result computed by DMMCH. */ *(unsigned char *)transa = 'N'; *(unsigned char *)transb = 'N'; dmmch_(transa, transb, &n, &c__1, &n, &c_b96, ab, &c__65, &ab[4225], & c__65, &c_b86, c__, &c__65, ct, g, cc, &c__65, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1, (ftnlen)1); same = lde_(cc, ct, &n); if (! same || err != 0.) { io___64.ciunit = nout; s_wsfe(&io___64); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, transb, (ftnlen)1); do_fio(&c__1, (char *)&same, (ftnlen)sizeof(logical)); do_fio(&c__1, (char *)&err, (ftnlen)sizeof(doublereal)); e_wsfe(); s_stop("", (ftnlen)0); } *(unsigned char *)transb = 'T'; dmmch_(transa, transb, &n, &c__1, &n, &c_b96, ab, &c__65, &ab[4225], & c__65, &c_b86, c__, &c__65, ct, g, cc, &c__65, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1, (ftnlen)1); same = lde_(cc, ct, &n); if (! same || err != 0.) { io___65.ciunit = nout; s_wsfe(&io___65); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, transb, (ftnlen)1); do_fio(&c__1, (char *)&same, (ftnlen)sizeof(logical)); do_fio(&c__1, (char *)&err, (ftnlen)sizeof(doublereal)); e_wsfe(); s_stop("", (ftnlen)0); } i__1 = n; for (j = 1; j <= i__1; ++j) { ab[j + 4224] = (doublereal) (n - j + 1); ab[(j + 65) * 65 - 65] = (doublereal) (n - j + 1); /* L120: */ } i__1 = n; for (j = 1; j <= i__1; ++j) { cc[n - j] = (doublereal) (j * ((j + 1) * j) / 2 - (j + 1) * j * (j - 1) / 3); /* L130: */ } *(unsigned char *)transa = 'T'; *(unsigned char *)transb = 'N'; dmmch_(transa, transb, &n, &c__1, &n, &c_b96, ab, &c__65, &ab[4225], & c__65, &c_b86, c__, &c__65, ct, g, cc, &c__65, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1, (ftnlen)1); same = lde_(cc, ct, &n); if (! same || err != 0.) { io___66.ciunit = nout; s_wsfe(&io___66); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, transb, (ftnlen)1); do_fio(&c__1, (char *)&same, (ftnlen)sizeof(logical)); do_fio(&c__1, (char *)&err, (ftnlen)sizeof(doublereal)); e_wsfe(); s_stop("", (ftnlen)0); } *(unsigned char *)transb = 'T'; dmmch_(transa, transb, &n, &c__1, &n, &c_b96, ab, &c__65, &ab[4225], & c__65, &c_b86, c__, &c__65, ct, g, cc, &c__65, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1, (ftnlen)1); same = lde_(cc, ct, &n); if (! same || err != 0.) { io___67.ciunit = nout; s_wsfe(&io___67); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, transb, (ftnlen)1); do_fio(&c__1, (char *)&same, (ftnlen)sizeof(logical)); do_fio(&c__1, (char *)&err, (ftnlen)sizeof(doublereal)); e_wsfe(); s_stop("", (ftnlen)0); } /* Test each subroutine in turn. */ for (isnum = 1; isnum <= 6; ++isnum) { io___69.ciunit = nout; s_wsle(&io___69); e_wsle(); if (! ltest[isnum - 1]) { /* Subprogram is not to be tested. */ io___70.ciunit = nout; s_wsfe(&io___70); do_fio(&c__1, snames + (isnum - 1) * 6, (ftnlen)6); e_wsfe(); } else { s_copy(srnamc_1.srnamt, snames + (isnum - 1) * 6, (ftnlen)6, ( ftnlen)6); /* Test error exits. */ if (tsterr) { dchke_(&isnum, snames + (isnum - 1) * 6, &nout, (ftnlen)6); io___71.ciunit = nout; s_wsle(&io___71); e_wsle(); } /* Test computations. */ infoc_1.infot = 0; infoc_1.ok = TRUE_; fatal = FALSE_; switch (isnum) { case 1: goto L140; case 2: goto L150; case 3: goto L160; case 4: goto L160; case 5: goto L170; case 6: goto L180; } /* Test DGEMM, 01. */ L140: dchk1_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, bet, &c__65, ab, aa, as, &ab[4225], bb, bs, c__, cc, cs, ct, g, (ftnlen)6); goto L190; /* Test DSYMM, 02. */ L150: dchk2_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, bet, &c__65, ab, aa, as, &ab[4225], bb, bs, c__, cc, cs, ct, g, (ftnlen)6); goto L190; /* Test DTRMM, 03, DTRSM, 04. */ L160: dchk3_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &c__65, ab, aa, as, &ab[4225], bb, bs, ct, g, c__, (ftnlen)6); goto L190; /* Test DSYRK, 05. */ L170: dchk4_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, bet, &c__65, ab, aa, as, &ab[4225], bb, bs, c__, cc, cs, ct, g, (ftnlen)6); goto L190; /* Test DSYR2K, 06. */ L180: dchk5_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, bet, &c__65, ab, aa, as, bb, bs, c__, cc, cs, ct, g, w, ( ftnlen)6); goto L190; L190: if (fatal && sfatal) { goto L210; } } /* L200: */ } io___78.ciunit = nout; s_wsfe(&io___78); e_wsfe(); goto L230; L210: io___79.ciunit = nout; s_wsfe(&io___79); e_wsfe(); goto L230; L220: io___80.ciunit = nout; s_wsfe(&io___80); e_wsfe(); L230: if (trace) { cl__1.cerr = 0; cl__1.cunit = ntra; cl__1.csta = 0; f_clos(&cl__1); } cl__1.cerr = 0; cl__1.cunit = nout; cl__1.csta = 0; f_clos(&cl__1); s_stop("", (ftnlen)0); /* End of DBLAT3. */ return 0; } /* main */ /* Subroutine */ int dchk1_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * fatal, integer *nidim, integer *idim, integer *nalf, doublereal *alf, integer *nbet, doublereal *bet, integer *nmax, doublereal *a, doublereal *aa, doublereal *as, doublereal *b, doublereal *bb, doublereal *bs, doublereal *c__, doublereal *cc, doublereal *cs, doublereal *ct, doublereal *g, ftnlen sname_len) { /* Initialized data */ static char ich[3] = "NTC"; /* Format strings */ static char fmt_9995[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002','\002" ",a1,\002',\002,3(i3,\002,\002),f4.1,\002, A,\002,i3,\002, B,\002" ",i3,\002,\002,f4.1,\002, \002,\002C,\002,i3,\002).\002)"; static char fmt_9994[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, k, m, n, ia, ib, ma, mb, na, nb, nc, ik, im, in, ks, ms, ns, ica, icb, laa, lbb, lda, lcc, ldb, ldc; extern logical lde_(doublereal *, doublereal *, integer *); doublereal als, bls, err, beta; integer ldas, ldbs, ldcs; logical same, null; extern /* Subroutine */ int dmake_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, logical *, doublereal *, ftnlen, ftnlen, ftnlen); doublereal alpha; extern /* Subroutine */ int dmmch_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen, ftnlen), dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen); logical isame[13], trana, tranb; integer nargs; logical reset; extern logical lderes_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen); char tranas[1], tranbs[1], transa[1], transb[1]; doublereal errmax; /* Fortran I/O blocks */ static cilist io___124 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___125 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___128 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___130 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___131 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___132 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___133 = { 0, 0, 0, fmt_9995, 0 }; /* Tests DGEMM. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; --bet; --g; --ct; --cs; --cc; c_dim1 = *nmax; c_offset = 1 + c_dim1; c__ -= c_offset; --bs; --bb; b_dim1 = *nmax; b_offset = 1 + b_dim1; b -= b_offset; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ /* .. Executable Statements .. */ nargs = 13; nc = 0; reset = TRUE_; errmax = 0.; i__1 = *nidim; for (im = 1; im <= i__1; ++im) { m = idim[im]; i__2 = *nidim; for (in = 1; in <= i__2; ++in) { n = idim[in]; /* Set LDC to 1 more than minimum value if room. */ ldc = m; if (ldc < *nmax) { ++ldc; } /* Skip tests if not enough room. */ if (ldc > *nmax) { goto L100; } lcc = ldc * n; null = n <= 0 || m <= 0; i__3 = *nidim; for (ik = 1; ik <= i__3; ++ik) { k = idim[ik]; for (ica = 1; ica <= 3; ++ica) { *(unsigned char *)transa = *(unsigned char *)&ich[ica - 1] ; trana = *(unsigned char *)transa == 'T' || *(unsigned char *)transa == 'C'; if (trana) { ma = k; na = m; } else { ma = m; na = k; } /* Set LDA to 1 more than minimum value if room. */ lda = ma; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L80; } laa = lda * na; /* Generate the matrix A. */ dmake_("GE", " ", " ", &ma, &na, &a[a_offset], nmax, &aa[ 1], &lda, &reset, &c_b86, (ftnlen)2, (ftnlen)1, ( ftnlen)1); for (icb = 1; icb <= 3; ++icb) { *(unsigned char *)transb = *(unsigned char *)&ich[icb - 1]; tranb = *(unsigned char *)transb == 'T' || *(unsigned char *)transb == 'C'; if (tranb) { mb = n; nb = k; } else { mb = k; nb = n; } /* Set LDB to 1 more than minimum value if room. */ ldb = mb; if (ldb < *nmax) { ++ldb; } /* Skip tests if not enough room. */ if (ldb > *nmax) { goto L70; } lbb = ldb * nb; /* Generate the matrix B. */ dmake_("GE", " ", " ", &mb, &nb, &b[b_offset], nmax, & bb[1], &ldb, &reset, &c_b86, (ftnlen)2, ( ftnlen)1, (ftnlen)1); i__4 = *nalf; for (ia = 1; ia <= i__4; ++ia) { alpha = alf[ia]; i__5 = *nbet; for (ib = 1; ib <= i__5; ++ib) { beta = bet[ib]; /* Generate the matrix C. */ dmake_("GE", " ", " ", &m, &n, &c__[c_offset], nmax, &cc[1], &ldc, &reset, &c_b86, ( ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the */ /* subroutine. */ *(unsigned char *)tranas = *(unsigned char *) transa; *(unsigned char *)tranbs = *(unsigned char *) transb; ms = m; ns = n; ks = k; als = alpha; i__6 = laa; for (i__ = 1; i__ <= i__6; ++i__) { as[i__] = aa[i__]; /* L10: */ } ldas = lda; i__6 = lbb; for (i__ = 1; i__ <= i__6; ++i__) { bs[i__] = bb[i__]; /* L20: */ } ldbs = ldb; bls = beta; i__6 = lcc; for (i__ = 1; i__ <= i__6; ++i__) { cs[i__] = cc[i__]; /* L30: */ } ldcs = ldc; /* Call the subroutine. */ if (*trace) { io___124.ciunit = *ntra; s_wsfe(&io___124); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, transb, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&alpha, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&beta, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&ldc, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } dgemm_(transa, transb, &m, &n, &k, &alpha, & aa[1], &lda, &bb[1], &ldb, &beta, &cc[ 1], &ldc, (ftnlen)1, (ftnlen)1); /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___125.ciunit = *nout; s_wsfe(&io___125); e_wsfe(); *fatal = TRUE_; goto L120; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)transa == *( unsigned char *)tranas; isame[1] = *(unsigned char *)transb == *( unsigned char *)tranbs; isame[2] = ms == m; isame[3] = ns == n; isame[4] = ks == k; isame[5] = als == alpha; isame[6] = lde_(&as[1], &aa[1], &laa); isame[7] = ldas == lda; isame[8] = lde_(&bs[1], &bb[1], &lbb); isame[9] = ldbs == ldb; isame[10] = bls == beta; if (null) { isame[11] = lde_(&cs[1], &cc[1], &lcc); } else { isame[11] = lderes_("GE", " ", &m, &n, & cs[1], &cc[1], &ldc, (ftnlen)2, ( ftnlen)1); } isame[12] = ldcs == ldc; /* If data was incorrectly changed, report */ /* and return. */ same = TRUE_; i__6 = nargs; for (i__ = 1; i__ <= i__6; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___128.ciunit = *nout; s_wsfe(&io___128); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L120; } if (! null) { /* Check the result. */ dmmch_(transa, transb, &m, &n, &k, &alpha, &a[a_offset], nmax, &b[b_offset], nmax, &beta, &c__[c_offset], nmax, &ct[1], &g[1], &cc[1], &ldc, eps, &err, fatal, nout, &c_true, (ftnlen)1, (ftnlen)1); errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L120; } } /* L50: */ } /* L60: */ } L70: ; } L80: ; } /* L90: */ } L100: ; } /* L110: */ } /* Report result. */ if (errmax < *thresh) { io___130.ciunit = *nout; s_wsfe(&io___130); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___131.ciunit = *nout; s_wsfe(&io___131); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(doublereal)); e_wsfe(); } goto L130; L120: io___132.ciunit = *nout; s_wsfe(&io___132); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); io___133.ciunit = *nout; s_wsfe(&io___133); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, transb, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&beta, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof(integer)); e_wsfe(); L130: return 0; /* End of DCHK1. */ } /* dchk1_ */ /* Subroutine */ int dchk2_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * fatal, integer *nidim, integer *idim, integer *nalf, doublereal *alf, integer *nbet, doublereal *bet, integer *nmax, doublereal *a, doublereal *aa, doublereal *as, doublereal *b, doublereal *bb, doublereal *bs, doublereal *c__, doublereal *cc, doublereal *cs, doublereal *ct, doublereal *g, ftnlen sname_len) { /* Initialized data */ static char ichs[2] = "LR"; static char ichu[2] = "UL"; /* Format strings */ static char fmt_9995[] = "(1x,i6,\002: \002,a6,\002(\002,2(\002'\002,a1" ",\002',\002),2(i3,\002,\002),f4.1,\002, A,\002,i3,\002, B,\002,i" "3,\002,\002,f4.1,\002, C,\002,i3,\002) \002,\002 .\002)"; static char fmt_9994[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, m, n, ia, ib, na, nc, im, in, ms, ns, laa, lbb, lda, lcc, ldb, ldc; extern logical lde_(doublereal *, doublereal *, integer *); integer ics; doublereal als, bls; integer icu; doublereal err, beta; integer ldas, ldbs, ldcs; logical same; char side[1]; logical left, null; char uplo[1]; extern /* Subroutine */ int dmake_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, logical *, doublereal *, ftnlen, ftnlen, ftnlen); doublereal alpha; extern /* Subroutine */ int dmmch_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen, ftnlen); logical isame[13]; char sides[1]; integer nargs; logical reset; extern /* Subroutine */ int dsymm_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen); char uplos[1]; extern logical lderes_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen); doublereal errmax; /* Fortran I/O blocks */ static cilist io___171 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___172 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___175 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___177 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___178 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___179 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___180 = { 0, 0, 0, fmt_9995, 0 }; /* Tests DSYMM. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; --bet; --g; --ct; --cs; --cc; c_dim1 = *nmax; c_offset = 1 + c_dim1; c__ -= c_offset; --bs; --bb; b_dim1 = *nmax; b_offset = 1 + b_dim1; b -= b_offset; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ /* .. Executable Statements .. */ nargs = 12; nc = 0; reset = TRUE_; errmax = 0.; i__1 = *nidim; for (im = 1; im <= i__1; ++im) { m = idim[im]; i__2 = *nidim; for (in = 1; in <= i__2; ++in) { n = idim[in]; /* Set LDC to 1 more than minimum value if room. */ ldc = m; if (ldc < *nmax) { ++ldc; } /* Skip tests if not enough room. */ if (ldc > *nmax) { goto L90; } lcc = ldc * n; null = n <= 0 || m <= 0; /* Set LDB to 1 more than minimum value if room. */ ldb = m; if (ldb < *nmax) { ++ldb; } /* Skip tests if not enough room. */ if (ldb > *nmax) { goto L90; } lbb = ldb * n; /* Generate the matrix B. */ dmake_("GE", " ", " ", &m, &n, &b[b_offset], nmax, &bb[1], &ldb, & reset, &c_b86, (ftnlen)2, (ftnlen)1, (ftnlen)1); for (ics = 1; ics <= 2; ++ics) { *(unsigned char *)side = *(unsigned char *)&ichs[ics - 1]; left = *(unsigned char *)side == 'L'; if (left) { na = m; } else { na = n; } /* Set LDA to 1 more than minimum value if room. */ lda = na; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L80; } laa = lda * na; for (icu = 1; icu <= 2; ++icu) { *(unsigned char *)uplo = *(unsigned char *)&ichu[icu - 1]; /* Generate the symmetric matrix A. */ dmake_("SY", uplo, " ", &na, &na, &a[a_offset], nmax, &aa[ 1], &lda, &reset, &c_b86, (ftnlen)2, (ftnlen)1, ( ftnlen)1); i__3 = *nalf; for (ia = 1; ia <= i__3; ++ia) { alpha = alf[ia]; i__4 = *nbet; for (ib = 1; ib <= i__4; ++ib) { beta = bet[ib]; /* Generate the matrix C. */ dmake_("GE", " ", " ", &m, &n, &c__[c_offset], nmax, &cc[1], &ldc, &reset, &c_b86, ( ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the */ /* subroutine. */ *(unsigned char *)sides = *(unsigned char *)side; *(unsigned char *)uplos = *(unsigned char *)uplo; ms = m; ns = n; als = alpha; i__5 = laa; for (i__ = 1; i__ <= i__5; ++i__) { as[i__] = aa[i__]; /* L10: */ } ldas = lda; i__5 = lbb; for (i__ = 1; i__ <= i__5; ++i__) { bs[i__] = bb[i__]; /* L20: */ } ldbs = ldb; bls = beta; i__5 = lcc; for (i__ = 1; i__ <= i__5; ++i__) { cs[i__] = cc[i__]; /* L30: */ } ldcs = ldc; /* Call the subroutine. */ if (*trace) { io___171.ciunit = *ntra; s_wsfe(&io___171); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, side, (ftnlen)1); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof( doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&ldb, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&beta, (ftnlen)sizeof( doublereal)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof( integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } dsymm_(side, uplo, &m, &n, &alpha, &aa[1], &lda, & bb[1], &ldb, &beta, &cc[1], &ldc, (ftnlen) 1, (ftnlen)1); /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___172.ciunit = *nout; s_wsfe(&io___172); e_wsfe(); *fatal = TRUE_; goto L110; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)sides == *(unsigned char *)side; isame[1] = *(unsigned char *)uplos == *(unsigned char *)uplo; isame[2] = ms == m; isame[3] = ns == n; isame[4] = als == alpha; isame[5] = lde_(&as[1], &aa[1], &laa); isame[6] = ldas == lda; isame[7] = lde_(&bs[1], &bb[1], &lbb); isame[8] = ldbs == ldb; isame[9] = bls == beta; if (null) { isame[10] = lde_(&cs[1], &cc[1], &lcc); } else { isame[10] = lderes_("GE", " ", &m, &n, &cs[1], &cc[1], &ldc, (ftnlen)2, (ftnlen)1); } isame[11] = ldcs == ldc; /* If data was incorrectly changed, report and */ /* return. */ same = TRUE_; i__5 = nargs; for (i__ = 1; i__ <= i__5; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___175.ciunit = *nout; s_wsfe(&io___175); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L110; } if (! null) { /* Check the result. */ if (left) { dmmch_("N", "N", &m, &n, &m, &alpha, &a[ a_offset], nmax, &b[b_offset], nmax, &beta, &c__[c_offset], nmax, &ct[1], &g[1], &cc[1], &ldc, eps, &err, fatal, nout, &c_true, ( ftnlen)1, (ftnlen)1); } else { dmmch_("N", "N", &m, &n, &n, &alpha, &b[ b_offset], nmax, &a[a_offset], nmax, &beta, &c__[c_offset], nmax, &ct[1], &g[1], &cc[1], &ldc, eps, &err, fatal, nout, &c_true, ( ftnlen)1, (ftnlen)1); } errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L110; } } /* L50: */ } /* L60: */ } /* L70: */ } L80: ; } L90: ; } /* L100: */ } /* Report result. */ if (errmax < *thresh) { io___177.ciunit = *nout; s_wsfe(&io___177); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___178.ciunit = *nout; s_wsfe(&io___178); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(doublereal)); e_wsfe(); } goto L120; L110: io___179.ciunit = *nout; s_wsfe(&io___179); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); io___180.ciunit = *nout; s_wsfe(&io___180); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, side, (ftnlen)1); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&beta, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof(integer)); e_wsfe(); L120: return 0; /* End of DCHK2. */ } /* dchk2_ */ /* Subroutine */ int dchk3_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * fatal, integer *nidim, integer *idim, integer *nalf, doublereal *alf, integer *nmax, doublereal *a, doublereal *aa, doublereal *as, doublereal *b, doublereal *bb, doublereal *bs, doublereal *ct, doublereal *g, doublereal *c__, ftnlen sname_len) { /* Initialized data */ static char ichu[2] = "UL"; static char icht[3] = "NTC"; static char ichd[2] = "UN"; static char ichs[2] = "LR"; /* Format strings */ static char fmt_9995[] = "(1x,i6,\002: \002,a6,\002(\002,4(\002'\002,a1" ",\002',\002),2(i3,\002,\002),f4.1,\002, A,\002,i3,\002, B,\002,i" "3,\002) .\002)"; static char fmt_9994[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5; alist al__1; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen), s_wsfe(cilist *), do_fio( integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, j, m, n, ia, na, nc, im, in, ms, ns, laa, icd, lbb, lda, ldb; extern logical lde_(doublereal *, doublereal *, integer *); integer ics; doublereal als; integer ict, icu; doublereal err; char diag[1]; integer ldas, ldbs; logical same; char side[1]; logical left, null; char uplo[1]; extern /* Subroutine */ int dmake_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, logical *, doublereal *, ftnlen, ftnlen, ftnlen); doublereal alpha; char diags[1]; extern /* Subroutine */ int dmmch_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen, ftnlen); logical isame[13]; char sides[1]; integer nargs; logical reset; extern /* Subroutine */ int dtrmm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen, ftnlen), dtrsm_( char *, char *, char *, char *, integer *, integer *, doublereal * , doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen, ftnlen); char uplos[1]; extern logical lderes_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen); char tranas[1], transa[1]; doublereal errmax; /* Fortran I/O blocks */ static cilist io___221 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___222 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___223 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___226 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___228 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___229 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___230 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___231 = { 0, 0, 0, fmt_9995, 0 }; /* Tests DTRMM and DTRSM. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; c_dim1 = *nmax; c_offset = 1 + c_dim1; c__ -= c_offset; --g; --ct; --bs; --bb; b_dim1 = *nmax; b_offset = 1 + b_dim1; b -= b_offset; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ /* .. Executable Statements .. */ nargs = 11; nc = 0; reset = TRUE_; errmax = 0.; /* Set up zero matrix for DMMCH. */ i__1 = *nmax; for (j = 1; j <= i__1; ++j) { i__2 = *nmax; for (i__ = 1; i__ <= i__2; ++i__) { c__[i__ + j * c_dim1] = 0.; /* L10: */ } /* L20: */ } i__1 = *nidim; for (im = 1; im <= i__1; ++im) { m = idim[im]; i__2 = *nidim; for (in = 1; in <= i__2; ++in) { n = idim[in]; /* Set LDB to 1 more than minimum value if room. */ ldb = m; if (ldb < *nmax) { ++ldb; } /* Skip tests if not enough room. */ if (ldb > *nmax) { goto L130; } lbb = ldb * n; null = m <= 0 || n <= 0; for (ics = 1; ics <= 2; ++ics) { *(unsigned char *)side = *(unsigned char *)&ichs[ics - 1]; left = *(unsigned char *)side == 'L'; if (left) { na = m; } else { na = n; } /* Set LDA to 1 more than minimum value if room. */ lda = na; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L130; } laa = lda * na; for (icu = 1; icu <= 2; ++icu) { *(unsigned char *)uplo = *(unsigned char *)&ichu[icu - 1]; for (ict = 1; ict <= 3; ++ict) { *(unsigned char *)transa = *(unsigned char *)&icht[ ict - 1]; for (icd = 1; icd <= 2; ++icd) { *(unsigned char *)diag = *(unsigned char *)&ichd[ icd - 1]; i__3 = *nalf; for (ia = 1; ia <= i__3; ++ia) { alpha = alf[ia]; /* Generate the matrix A. */ dmake_("TR", uplo, diag, &na, &na, &a[ a_offset], nmax, &aa[1], &lda, &reset, &c_b86, (ftnlen)2, (ftnlen)1, ( ftnlen)1); /* Generate the matrix B. */ dmake_("GE", " ", " ", &m, &n, &b[b_offset], nmax, &bb[1], &ldb, &reset, &c_b86, ( ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the */ /* subroutine. */ *(unsigned char *)sides = *(unsigned char *) side; *(unsigned char *)uplos = *(unsigned char *) uplo; *(unsigned char *)tranas = *(unsigned char *) transa; *(unsigned char *)diags = *(unsigned char *) diag; ms = m; ns = n; als = alpha; i__4 = laa; for (i__ = 1; i__ <= i__4; ++i__) { as[i__] = aa[i__]; /* L30: */ } ldas = lda; i__4 = lbb; for (i__ = 1; i__ <= i__4; ++i__) { bs[i__] = bb[i__]; /* L40: */ } ldbs = ldb; /* Call the subroutine. */ if (s_cmp(sname + 3, "MM", (ftnlen)2, (ftnlen) 2) == 0) { if (*trace) { io___221.ciunit = *ntra; s_wsfe(&io___221); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, side, (ftnlen)1); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } dtrmm_(side, uplo, transa, diag, &m, &n, & alpha, &aa[1], &lda, &bb[1], &ldb, (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); } else if (s_cmp(sname + 3, "SM", (ftnlen)2, ( ftnlen)2) == 0) { if (*trace) { io___222.ciunit = *ntra; s_wsfe(&io___222); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, side, (ftnlen)1); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } dtrsm_(side, uplo, transa, diag, &m, &n, & alpha, &aa[1], &lda, &bb[1], &ldb, (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___223.ciunit = *nout; s_wsfe(&io___223); e_wsfe(); *fatal = TRUE_; goto L150; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)sides == *( unsigned char *)side; isame[1] = *(unsigned char *)uplos == *( unsigned char *)uplo; isame[2] = *(unsigned char *)tranas == *( unsigned char *)transa; isame[3] = *(unsigned char *)diags == *( unsigned char *)diag; isame[4] = ms == m; isame[5] = ns == n; isame[6] = als == alpha; isame[7] = lde_(&as[1], &aa[1], &laa); isame[8] = ldas == lda; if (null) { isame[9] = lde_(&bs[1], &bb[1], &lbb); } else { isame[9] = lderes_("GE", " ", &m, &n, &bs[ 1], &bb[1], &ldb, (ftnlen)2, ( ftnlen)1); } isame[10] = ldbs == ldb; /* If data was incorrectly changed, report and */ /* return. */ same = TRUE_; i__4 = nargs; for (i__ = 1; i__ <= i__4; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___226.ciunit = *nout; s_wsfe(&io___226); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L50: */ } if (! same) { *fatal = TRUE_; goto L150; } if (! null) { if (s_cmp(sname + 3, "MM", (ftnlen)2, ( ftnlen)2) == 0) { /* Check the result. */ if (left) { dmmch_(transa, "N", &m, &n, &m, & alpha, &a[a_offset], nmax, &b[b_offset], nmax, & c_b86, &c__[c_offset], nmax, &ct[1], &g[1], &bb[ 1], &ldb, eps, &err, fatal, nout, &c_true, ( ftnlen)1, (ftnlen)1); } else { dmmch_("N", transa, &m, &n, &n, & alpha, &b[b_offset], nmax, &a[a_offset], nmax, & c_b86, &c__[c_offset], nmax, &ct[1], &g[1], &bb[ 1], &ldb, eps, &err, fatal, nout, &c_true, ( ftnlen)1, (ftnlen)1); } } else if (s_cmp(sname + 3, "SM", (ftnlen) 2, (ftnlen)2) == 0) { /* Compute approximation to original */ /* matrix. */ i__4 = n; for (j = 1; j <= i__4; ++j) { i__5 = m; for (i__ = 1; i__ <= i__5; ++i__) { c__[i__ + j * c_dim1] = bb[i__ + (j - 1) * ldb]; bb[i__ + (j - 1) * ldb] = alpha * b[i__ + j * b_dim1]; /* L60: */ } /* L70: */ } if (left) { dmmch_(transa, "N", &m, &n, &m, & c_b96, &a[a_offset], nmax, &c__[c_offset], nmax, & c_b86, &b[b_offset], nmax, &ct[1], &g[1], &bb[1], & ldb, eps, &err, fatal, nout, &c_false, (ftnlen)1, (ftnlen)1); } else { dmmch_("N", transa, &m, &n, &n, & c_b96, &c__[c_offset], nmax, &a[a_offset], nmax, &c_b86, &b[b_offset], nmax, &ct[1], &g[1], &bb[ 1], &ldb, eps, &err, fatal, nout, &c_false, ( ftnlen)1, (ftnlen)1); } } errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L150; } } /* L80: */ } /* L90: */ } /* L100: */ } /* L110: */ } /* L120: */ } L130: ; } /* L140: */ } /* Report result. */ if (errmax < *thresh) { io___228.ciunit = *nout; s_wsfe(&io___228); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___229.ciunit = *nout; s_wsfe(&io___229); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(doublereal)); e_wsfe(); } goto L160; L150: io___230.ciunit = *nout; s_wsfe(&io___230); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); io___231.ciunit = *nout; s_wsfe(&io___231); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, side, (ftnlen)1); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen)sizeof(integer)); e_wsfe(); L160: return 0; /* End of DCHK3. */ } /* dchk3_ */ /* Subroutine */ int dchk4_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * fatal, integer *nidim, integer *idim, integer *nalf, doublereal *alf, integer *nbet, doublereal *bet, integer *nmax, doublereal *a, doublereal *aa, doublereal *as, doublereal *b, doublereal *bb, doublereal *bs, doublereal *c__, doublereal *cc, doublereal *cs, doublereal *ct, doublereal *g, ftnlen sname_len) { /* Initialized data */ static char icht[3] = "NTC"; static char ichu[2] = "UL"; /* Format strings */ static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002(\002,2(\002'\002,a1" ",\002',\002),2(i3,\002,\002),f4.1,\002, A,\002,i3,\002,\002,f4.1," "\002, C,\002,i3,\002) .\002)"; static char fmt_9993[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9995[] = "(\002 THESE ARE THE RESULTS FOR COLUMN" " \002,i3)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, j, k, n, ia, ib, jc, ma, na, nc, ik, in, jj, lj, ks, ns, laa, lda, lcc, ldc; extern logical lde_(doublereal *, doublereal *, integer *); doublereal als; integer ict, icu; doublereal err, beta; integer ldas, ldcs; logical same; doublereal bets; logical tran, null; char uplo[1]; extern /* Subroutine */ int dmake_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, logical *, doublereal *, ftnlen, ftnlen, ftnlen); doublereal alpha; extern /* Subroutine */ int dmmch_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen, ftnlen); logical isame[13]; integer nargs; logical reset; char trans[1]; logical upper; extern /* Subroutine */ int dsyrk_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen); char uplos[1]; extern logical lderes_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen); doublereal errmax; char transs[1]; /* Fortran I/O blocks */ static cilist io___268 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___269 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___272 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___278 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___279 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___280 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___281 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___282 = { 0, 0, 0, fmt_9994, 0 }; /* Tests DSYRK. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; --bet; --g; --ct; --cs; --cc; c_dim1 = *nmax; c_offset = 1 + c_dim1; c__ -= c_offset; --bs; --bb; b_dim1 = *nmax; b_offset = 1 + b_dim1; b -= b_offset; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ /* .. Executable Statements .. */ nargs = 10; nc = 0; reset = TRUE_; errmax = 0.; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; /* Set LDC to 1 more than minimum value if room. */ ldc = n; if (ldc < *nmax) { ++ldc; } /* Skip tests if not enough room. */ if (ldc > *nmax) { goto L100; } lcc = ldc * n; null = n <= 0; i__2 = *nidim; for (ik = 1; ik <= i__2; ++ik) { k = idim[ik]; for (ict = 1; ict <= 3; ++ict) { *(unsigned char *)trans = *(unsigned char *)&icht[ict - 1]; tran = *(unsigned char *)trans == 'T' || *(unsigned char *) trans == 'C'; if (tran) { ma = k; na = n; } else { ma = n; na = k; } /* Set LDA to 1 more than minimum value if room. */ lda = ma; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L80; } laa = lda * na; /* Generate the matrix A. */ dmake_("GE", " ", " ", &ma, &na, &a[a_offset], nmax, &aa[1], & lda, &reset, &c_b86, (ftnlen)2, (ftnlen)1, (ftnlen)1); for (icu = 1; icu <= 2; ++icu) { *(unsigned char *)uplo = *(unsigned char *)&ichu[icu - 1]; upper = *(unsigned char *)uplo == 'U'; i__3 = *nalf; for (ia = 1; ia <= i__3; ++ia) { alpha = alf[ia]; i__4 = *nbet; for (ib = 1; ib <= i__4; ++ib) { beta = bet[ib]; /* Generate the matrix C. */ dmake_("SY", uplo, " ", &n, &n, &c__[c_offset], nmax, &cc[1], &ldc, &reset, &c_b86, ( ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the subroutine. */ *(unsigned char *)uplos = *(unsigned char *)uplo; *(unsigned char *)transs = *(unsigned char *) trans; ns = n; ks = k; als = alpha; i__5 = laa; for (i__ = 1; i__ <= i__5; ++i__) { as[i__] = aa[i__]; /* L10: */ } ldas = lda; bets = beta; i__5 = lcc; for (i__ = 1; i__ <= i__5; ++i__) { cs[i__] = cc[i__]; /* L20: */ } ldcs = ldc; /* Call the subroutine. */ if (*trace) { io___268.ciunit = *ntra; s_wsfe(&io___268); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof( doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&beta, (ftnlen)sizeof( doublereal)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof( integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } dsyrk_(uplo, trans, &n, &k, &alpha, &aa[1], &lda, &beta, &cc[1], &ldc, (ftnlen)1, (ftnlen)1) ; /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___269.ciunit = *nout; s_wsfe(&io___269); e_wsfe(); *fatal = TRUE_; goto L120; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)uplos == *(unsigned char *)uplo; isame[1] = *(unsigned char *)transs == *(unsigned char *)trans; isame[2] = ns == n; isame[3] = ks == k; isame[4] = als == alpha; isame[5] = lde_(&as[1], &aa[1], &laa); isame[6] = ldas == lda; isame[7] = bets == beta; if (null) { isame[8] = lde_(&cs[1], &cc[1], &lcc); } else { isame[8] = lderes_("SY", uplo, &n, &n, &cs[1], &cc[1], &ldc, (ftnlen)2, (ftnlen)1); } isame[9] = ldcs == ldc; /* If data was incorrectly changed, report and */ /* return. */ same = TRUE_; i__5 = nargs; for (i__ = 1; i__ <= i__5; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___272.ciunit = *nout; s_wsfe(&io___272); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L30: */ } if (! same) { *fatal = TRUE_; goto L120; } if (! null) { /* Check the result column by column. */ jc = 1; i__5 = n; for (j = 1; j <= i__5; ++j) { if (upper) { jj = 1; lj = j; } else { jj = j; lj = n - j + 1; } if (tran) { dmmch_("T", "N", &lj, &c__1, &k, & alpha, &a[jj * a_dim1 + 1], nmax, &a[j * a_dim1 + 1], nmax, &beta, &c__[jj + j * c_dim1], nmax, &ct[1], &g[1], &cc[jc], &ldc, eps, &err, fatal, nout, &c_true, (ftnlen) 1, (ftnlen)1); } else { dmmch_("N", "T", &lj, &c__1, &k, & alpha, &a[jj + a_dim1], nmax, &a[j + a_dim1], nmax, &beta, & c__[jj + j * c_dim1], nmax, & ct[1], &g[1], &cc[jc], &ldc, eps, &err, fatal, nout, & c_true, (ftnlen)1, (ftnlen)1); } if (upper) { jc += ldc; } else { jc = jc + ldc + 1; } errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L110; } /* L40: */ } } /* L50: */ } /* L60: */ } /* L70: */ } L80: ; } /* L90: */ } L100: ; } /* Report result. */ if (errmax < *thresh) { io___278.ciunit = *nout; s_wsfe(&io___278); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___279.ciunit = *nout; s_wsfe(&io___279); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(doublereal)); e_wsfe(); } goto L130; L110: if (n > 1) { io___280.ciunit = *nout; s_wsfe(&io___280); do_fio(&c__1, (char *)&j, (ftnlen)sizeof(integer)); e_wsfe(); } L120: io___281.ciunit = *nout; s_wsfe(&io___281); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); io___282.ciunit = *nout; s_wsfe(&io___282); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&beta, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof(integer)); e_wsfe(); L130: return 0; /* End of DCHK4. */ } /* dchk4_ */ /* Subroutine */ int dchk5_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * fatal, integer *nidim, integer *idim, integer *nalf, doublereal *alf, integer *nbet, doublereal *bet, integer *nmax, doublereal *ab, doublereal *aa, doublereal *as, doublereal *bb, doublereal *bs, doublereal *c__, doublereal *cc, doublereal *cs, doublereal *ct, doublereal *g, doublereal *w, ftnlen sname_len) { /* Initialized data */ static char icht[3] = "NTC"; static char ichu[2] = "UL"; /* Format strings */ static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002(\002,2(\002'\002,a1" ",\002',\002),2(i3,\002,\002),f4.1,\002, A,\002,i3,\002, B,\002,i" "3,\002,\002,f4.1,\002, C,\002,i3,\002) \002,\002 .\002)"; static char fmt_9993[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9995[] = "(\002 THESE ARE THE RESULTS FOR COLUMN" " \002,i3)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, j, k, n, ia, ib, jc, ma, na, nc, ik, in, jj, lj, ks, ns, laa, lbb, lda, lcc, ldb, ldc; extern logical lde_(doublereal *, doublereal *, integer *); doublereal als; integer ict, icu; doublereal err; integer jjab; doublereal beta; integer ldas, ldbs, ldcs; logical same; doublereal bets; logical tran, null; char uplo[1]; extern /* Subroutine */ int dmake_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, logical *, doublereal *, ftnlen, ftnlen, ftnlen); doublereal alpha; extern /* Subroutine */ int dmmch_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen, ftnlen); logical isame[13]; integer nargs; logical reset; char trans[1]; logical upper; char uplos[1]; extern /* Subroutine */ int dsyr2k_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen); extern logical lderes_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen); doublereal errmax; char transs[1]; /* Fortran I/O blocks */ static cilist io___322 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___323 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___326 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___333 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___334 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___335 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___336 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___337 = { 0, 0, 0, fmt_9994, 0 }; /* Tests DSYR2K. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; --bet; --w; --g; --ct; --cs; --cc; c_dim1 = *nmax; c_offset = 1 + c_dim1; c__ -= c_offset; --bs; --bb; --as; --aa; --ab; /* Function Body */ /* .. Executable Statements .. */ nargs = 12; nc = 0; reset = TRUE_; errmax = 0.; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; /* Set LDC to 1 more than minimum value if room. */ ldc = n; if (ldc < *nmax) { ++ldc; } /* Skip tests if not enough room. */ if (ldc > *nmax) { goto L130; } lcc = ldc * n; null = n <= 0; i__2 = *nidim; for (ik = 1; ik <= i__2; ++ik) { k = idim[ik]; for (ict = 1; ict <= 3; ++ict) { *(unsigned char *)trans = *(unsigned char *)&icht[ict - 1]; tran = *(unsigned char *)trans == 'T' || *(unsigned char *) trans == 'C'; if (tran) { ma = k; na = n; } else { ma = n; na = k; } /* Set LDA to 1 more than minimum value if room. */ lda = ma; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L110; } laa = lda * na; /* Generate the matrix A. */ if (tran) { i__3 = *nmax << 1; dmake_("GE", " ", " ", &ma, &na, &ab[1], &i__3, &aa[1], & lda, &reset, &c_b86, (ftnlen)2, (ftnlen)1, ( ftnlen)1); } else { dmake_("GE", " ", " ", &ma, &na, &ab[1], nmax, &aa[1], & lda, &reset, &c_b86, (ftnlen)2, (ftnlen)1, ( ftnlen)1); } /* Generate the matrix B. */ ldb = lda; lbb = laa; if (tran) { i__3 = *nmax << 1; dmake_("GE", " ", " ", &ma, &na, &ab[k + 1], &i__3, &bb[1] , &ldb, &reset, &c_b86, (ftnlen)2, (ftnlen)1, ( ftnlen)1); } else { dmake_("GE", " ", " ", &ma, &na, &ab[k * *nmax + 1], nmax, &bb[1], &ldb, &reset, &c_b86, (ftnlen)2, (ftnlen) 1, (ftnlen)1); } for (icu = 1; icu <= 2; ++icu) { *(unsigned char *)uplo = *(unsigned char *)&ichu[icu - 1]; upper = *(unsigned char *)uplo == 'U'; i__3 = *nalf; for (ia = 1; ia <= i__3; ++ia) { alpha = alf[ia]; i__4 = *nbet; for (ib = 1; ib <= i__4; ++ib) { beta = bet[ib]; /* Generate the matrix C. */ dmake_("SY", uplo, " ", &n, &n, &c__[c_offset], nmax, &cc[1], &ldc, &reset, &c_b86, ( ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the subroutine. */ *(unsigned char *)uplos = *(unsigned char *)uplo; *(unsigned char *)transs = *(unsigned char *) trans; ns = n; ks = k; als = alpha; i__5 = laa; for (i__ = 1; i__ <= i__5; ++i__) { as[i__] = aa[i__]; /* L10: */ } ldas = lda; i__5 = lbb; for (i__ = 1; i__ <= i__5; ++i__) { bs[i__] = bb[i__]; /* L20: */ } ldbs = ldb; bets = beta; i__5 = lcc; for (i__ = 1; i__ <= i__5; ++i__) { cs[i__] = cc[i__]; /* L30: */ } ldcs = ldc; /* Call the subroutine. */ if (*trace) { io___322.ciunit = *ntra; s_wsfe(&io___322); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof( doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&ldb, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&beta, (ftnlen)sizeof( doublereal)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof( integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } dsyr2k_(uplo, trans, &n, &k, &alpha, &aa[1], &lda, &bb[1], &ldb, &beta, &cc[1], &ldc, ( ftnlen)1, (ftnlen)1); /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___323.ciunit = *nout; s_wsfe(&io___323); e_wsfe(); *fatal = TRUE_; goto L150; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)uplos == *(unsigned char *)uplo; isame[1] = *(unsigned char *)transs == *(unsigned char *)trans; isame[2] = ns == n; isame[3] = ks == k; isame[4] = als == alpha; isame[5] = lde_(&as[1], &aa[1], &laa); isame[6] = ldas == lda; isame[7] = lde_(&bs[1], &bb[1], &lbb); isame[8] = ldbs == ldb; isame[9] = bets == beta; if (null) { isame[10] = lde_(&cs[1], &cc[1], &lcc); } else { isame[10] = lderes_("SY", uplo, &n, &n, &cs[1] , &cc[1], &ldc, (ftnlen)2, (ftnlen)1); } isame[11] = ldcs == ldc; /* If data was incorrectly changed, report and */ /* return. */ same = TRUE_; i__5 = nargs; for (i__ = 1; i__ <= i__5; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___326.ciunit = *nout; s_wsfe(&io___326); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L150; } if (! null) { /* Check the result column by column. */ jjab = 1; jc = 1; i__5 = n; for (j = 1; j <= i__5; ++j) { if (upper) { jj = 1; lj = j; } else { jj = j; lj = n - j + 1; } if (tran) { i__6 = k; for (i__ = 1; i__ <= i__6; ++i__) { w[i__] = ab[(j - 1 << 1) * *nmax + k + i__]; w[k + i__] = ab[(j - 1 << 1) * * nmax + i__]; /* L50: */ } i__6 = k << 1; i__7 = *nmax << 1; i__8 = *nmax << 1; dmmch_("T", "N", &lj, &c__1, &i__6, & alpha, &ab[jjab], &i__7, &w[1] , &i__8, &beta, &c__[jj + j * c_dim1], nmax, &ct[1], &g[1], &cc[jc], &ldc, eps, &err, fatal, nout, &c_true, (ftnlen) 1, (ftnlen)1); } else { i__6 = k; for (i__ = 1; i__ <= i__6; ++i__) { w[i__] = ab[(k + i__ - 1) * *nmax + j]; w[k + i__] = ab[(i__ - 1) * *nmax + j]; /* L60: */ } i__6 = k << 1; i__7 = *nmax << 1; dmmch_("N", "N", &lj, &c__1, &i__6, & alpha, &ab[jj], nmax, &w[1], & i__7, &beta, &c__[jj + j * c_dim1], nmax, &ct[1], &g[1], &cc[jc], &ldc, eps, &err, fatal, nout, &c_true, (ftnlen) 1, (ftnlen)1); } if (upper) { jc += ldc; } else { jc = jc + ldc + 1; if (tran) { jjab += *nmax << 1; } } errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L140; } /* L70: */ } } /* L80: */ } /* L90: */ } /* L100: */ } L110: ; } /* L120: */ } L130: ; } /* Report result. */ if (errmax < *thresh) { io___333.ciunit = *nout; s_wsfe(&io___333); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___334.ciunit = *nout; s_wsfe(&io___334); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(doublereal)); e_wsfe(); } goto L160; L140: if (n > 1) { io___335.ciunit = *nout; s_wsfe(&io___335); do_fio(&c__1, (char *)&j, (ftnlen)sizeof(integer)); e_wsfe(); } L150: io___336.ciunit = *nout; s_wsfe(&io___336); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); io___337.ciunit = *nout; s_wsfe(&io___337); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&beta, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof(integer)); e_wsfe(); L160: return 0; /* End of DCHK5. */ } /* dchk5_ */ /* Subroutine */ int dchke_(integer *isnum, char *srnamt, integer *nout, ftnlen srnamt_len) { /* Format strings */ static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE TESTS OF ERROR-E" "XITS\002)"; static char fmt_9998[] = "(\002 ******* \002,a6,\002 FAILED THE TESTS OF" " ERROR-EXITS *****\002,\002**\002)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Local variables */ doublereal a[2] /* was [2][1] */, b[2] /* was [2][1] */, c__[2] /* was [2][1] */, beta, alpha; extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen), dtrmm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen, ftnlen), dsymm_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen), dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, ftnlen, ftnlen, ftnlen, ftnlen), dsyrk_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen), dsyr2k_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, ftnlen, ftnlen), chkxer_(char *, integer *, integer *, logical *, logical *, ftnlen); /* Fortran I/O blocks */ static cilist io___343 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___344 = { 0, 0, 0, fmt_9998, 0 }; /* Tests the error exits from the Level 3 Blas. */ /* Requires a special version of the error-handling routine XERBLA. */ /* A, B and C should not need to be defined. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* 3-19-92: Initialize ALPHA and BETA (eca) */ /* 3-19-92: Fix argument 12 in calls to SSYMM with INFOT = 9 (eca) */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Subroutines .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ /* OK is set to .FALSE. by the special version of XERBLA or by CHKXER */ /* if anything is wrong. */ infoc_1.ok = TRUE_; /* LERR is set to .TRUE. by the special version of XERBLA each time */ /* it is called, and is then tested and re-set by CHKXER. */ infoc_1.lerr = FALSE_; /* Initialize ALPHA and BETA. */ alpha = 1.; beta = 2.; switch (*isnum) { case 1: goto L10; case 2: goto L20; case 3: goto L30; case 4: goto L40; case 5: goto L50; case 6: goto L60; } L10: infoc_1.infot = 1; dgemm_("/", "N", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 1; dgemm_("/", "T", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; dgemm_("N", "/", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; dgemm_("T", "/", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dgemm_("N", "N", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dgemm_("N", "T", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dgemm_("T", "N", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dgemm_("T", "T", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; dgemm_("N", "N", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; dgemm_("N", "T", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; dgemm_("T", "N", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; dgemm_("T", "T", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dgemm_("N", "N", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dgemm_("N", "T", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dgemm_("T", "N", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dgemm_("T", "T", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; dgemm_("N", "N", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; dgemm_("N", "T", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; dgemm_("T", "N", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__2, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; dgemm_("T", "T", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; dgemm_("N", "N", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; dgemm_("T", "N", &c__0, &c__0, &c__2, &alpha, a, &c__2, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; dgemm_("N", "T", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; dgemm_("T", "T", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; dgemm_("N", "N", &c__2, &c__0, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; dgemm_("N", "T", &c__2, &c__0, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; dgemm_("T", "N", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; dgemm_("T", "T", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L70; L20: infoc_1.infot = 1; dsymm_("/", "U", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; dsymm_("L", "/", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dsymm_("L", "U", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dsymm_("R", "U", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dsymm_("L", "L", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dsymm_("R", "L", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; dsymm_("L", "U", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; dsymm_("R", "U", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; dsymm_("L", "L", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; dsymm_("R", "L", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; dsymm_("L", "U", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; dsymm_("R", "U", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; dsymm_("L", "L", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; dsymm_("R", "L", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dsymm_("L", "U", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dsymm_("R", "U", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dsymm_("L", "L", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dsymm_("R", "L", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; dsymm_("L", "U", &c__2, &c__0, &alpha, a, &c__2, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; dsymm_("R", "U", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; dsymm_("L", "L", &c__2, &c__0, &alpha, a, &c__2, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; dsymm_("R", "L", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L70; L30: infoc_1.infot = 1; dtrmm_("/", "U", "N", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; dtrmm_("L", "/", "N", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dtrmm_("L", "U", "/", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; dtrmm_("L", "U", "N", "/", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dtrmm_("L", "U", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dtrmm_("L", "U", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dtrmm_("R", "U", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dtrmm_("R", "U", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dtrmm_("L", "L", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dtrmm_("L", "L", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dtrmm_("R", "L", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dtrmm_("R", "L", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; dtrmm_("L", "U", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; dtrmm_("L", "U", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; dtrmm_("R", "U", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; dtrmm_("R", "U", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; dtrmm_("L", "L", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; dtrmm_("L", "L", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; dtrmm_("R", "L", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; dtrmm_("R", "L", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dtrmm_("L", "U", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dtrmm_("L", "U", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dtrmm_("R", "U", "N", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dtrmm_("R", "U", "T", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dtrmm_("L", "L", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dtrmm_("L", "L", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dtrmm_("R", "L", "N", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dtrmm_("R", "L", "T", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; dtrmm_("L", "U", "N", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; dtrmm_("L", "U", "T", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; dtrmm_("R", "U", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; dtrmm_("R", "U", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; dtrmm_("L", "L", "N", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; dtrmm_("L", "L", "T", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; dtrmm_("R", "L", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; dtrmm_("R", "L", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L70; L40: infoc_1.infot = 1; dtrsm_("/", "U", "N", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; dtrsm_("L", "/", "N", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dtrsm_("L", "U", "/", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; dtrsm_("L", "U", "N", "/", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dtrsm_("L", "U", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dtrsm_("L", "U", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dtrsm_("R", "U", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dtrsm_("R", "U", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dtrsm_("L", "L", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dtrsm_("L", "L", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dtrsm_("R", "L", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; dtrsm_("R", "L", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; dtrsm_("L", "U", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; dtrsm_("L", "U", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; dtrsm_("R", "U", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; dtrsm_("R", "U", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; dtrsm_("L", "L", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; dtrsm_("L", "L", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; dtrsm_("R", "L", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; dtrsm_("R", "L", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dtrsm_("L", "U", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dtrsm_("L", "U", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dtrsm_("R", "U", "N", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dtrsm_("R", "U", "T", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dtrsm_("L", "L", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dtrsm_("L", "L", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dtrsm_("R", "L", "N", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dtrsm_("R", "L", "T", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; dtrsm_("L", "U", "N", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; dtrsm_("L", "U", "T", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; dtrsm_("R", "U", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; dtrsm_("R", "U", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; dtrsm_("L", "L", "N", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; dtrsm_("L", "L", "T", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; dtrsm_("R", "L", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; dtrsm_("R", "L", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L70; L50: infoc_1.infot = 1; dsyrk_("/", "N", &c__0, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; dsyrk_("U", "/", &c__0, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dsyrk_("U", "N", &c_n1, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dsyrk_("U", "T", &c_n1, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dsyrk_("L", "N", &c_n1, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dsyrk_("L", "T", &c_n1, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; dsyrk_("U", "N", &c__0, &c_n1, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; dsyrk_("U", "T", &c__0, &c_n1, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; dsyrk_("L", "N", &c__0, &c_n1, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; dsyrk_("L", "T", &c__0, &c_n1, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; dsyrk_("U", "N", &c__2, &c__0, &alpha, a, &c__1, &beta, c__, &c__2, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; dsyrk_("U", "T", &c__0, &c__2, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; dsyrk_("L", "N", &c__2, &c__0, &alpha, a, &c__1, &beta, c__, &c__2, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; dsyrk_("L", "T", &c__0, &c__2, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; dsyrk_("U", "N", &c__2, &c__0, &alpha, a, &c__2, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; dsyrk_("U", "T", &c__2, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; dsyrk_("L", "N", &c__2, &c__0, &alpha, a, &c__2, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; dsyrk_("L", "T", &c__2, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L70; L60: infoc_1.infot = 1; dsyr2k_("/", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; dsyr2k_("U", "/", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dsyr2k_("U", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dsyr2k_("U", "T", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dsyr2k_("L", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; dsyr2k_("L", "T", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; dsyr2k_("U", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; dsyr2k_("U", "T", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; dsyr2k_("L", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; dsyr2k_("L", "T", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; dsyr2k_("U", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; dsyr2k_("U", "T", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; dsyr2k_("L", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; dsyr2k_("L", "T", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dsyr2k_("U", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dsyr2k_("U", "T", &c__0, &c__2, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dsyr2k_("L", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; dsyr2k_("L", "T", &c__0, &c__2, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; dsyr2k_("U", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; dsyr2k_("U", "T", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; dsyr2k_("L", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; dsyr2k_("L", "T", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); L70: if (infoc_1.ok) { io___343.ciunit = *nout; s_wsfe(&io___343); do_fio(&c__1, srnamt, (ftnlen)6); e_wsfe(); } else { io___344.ciunit = *nout; s_wsfe(&io___344); do_fio(&c__1, srnamt, (ftnlen)6); e_wsfe(); } return 0; /* End of DCHKE. */ } /* dchke_ */ /* Subroutine */ int dmake_(char *type__, char *uplo, char *diag, integer *m, integer *n, doublereal *a, integer *nmax, doublereal *aa, integer * lda, logical *reset, doublereal *transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen); /* Local variables */ integer i__, j; logical gen, tri, sym; extern doublereal dbeg_(logical *); integer ibeg, iend; logical unit, lower, upper; /* Generates values for an M by N matrix A. */ /* Stores the values in the array AA in the data structure required */ /* by the routine, with unwanted elements set to rogue value. */ /* TYPE is 'GE', 'SY' or 'TR'. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --aa; /* Function Body */ gen = s_cmp(type__, "GE", (ftnlen)2, (ftnlen)2) == 0; sym = s_cmp(type__, "SY", (ftnlen)2, (ftnlen)2) == 0; tri = s_cmp(type__, "TR", (ftnlen)2, (ftnlen)2) == 0; upper = (sym || tri) && *(unsigned char *)uplo == 'U'; lower = (sym || tri) && *(unsigned char *)uplo == 'L'; unit = tri && *(unsigned char *)diag == 'U'; /* Generate data in array A. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { if (gen || upper && i__ <= j || lower && i__ >= j) { a[i__ + j * a_dim1] = dbeg_(reset) + *transl; if (i__ != j) { /* Set some elements to zero */ if (*n > 3 && j == *n / 2) { a[i__ + j * a_dim1] = 0.; } if (sym) { a[j + i__ * a_dim1] = a[i__ + j * a_dim1]; } else if (tri) { a[j + i__ * a_dim1] = 0.; } } } /* L10: */ } if (tri) { a[j + j * a_dim1] += 1.; } if (unit) { a[j + j * a_dim1] = 1.; } /* L20: */ } /* Store elements in array AS in data structure required by routine. */ if (s_cmp(type__, "GE", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { aa[i__ + (j - 1) * *lda] = a[i__ + j * a_dim1]; /* L30: */ } i__2 = *lda; for (i__ = *m + 1; i__ <= i__2; ++i__) { aa[i__ + (j - 1) * *lda] = -1e10; /* L40: */ } /* L50: */ } } else if (s_cmp(type__, "SY", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(type__, "TR", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (upper) { ibeg = 1; if (unit) { iend = j - 1; } else { iend = j; } } else { if (unit) { ibeg = j + 1; } else { ibeg = j; } iend = *n; } i__2 = ibeg - 1; for (i__ = 1; i__ <= i__2; ++i__) { aa[i__ + (j - 1) * *lda] = -1e10; /* L60: */ } i__2 = iend; for (i__ = ibeg; i__ <= i__2; ++i__) { aa[i__ + (j - 1) * *lda] = a[i__ + j * a_dim1]; /* L70: */ } i__2 = *lda; for (i__ = iend + 1; i__ <= i__2; ++i__) { aa[i__ + (j - 1) * *lda] = -1e10; /* L80: */ } /* L90: */ } } return 0; /* End of DMAKE. */ } /* dmake_ */ /* Subroutine */ int dmmch_(char *transa, char *transb, integer *m, integer * n, integer *kk, doublereal *alpha, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *beta, doublereal *c__, integer *ldc, doublereal *ct, doublereal *g, doublereal *cc, integer * ldcc, doublereal *eps, doublereal *err, logical *fatal, integer *nout, logical *mv, ftnlen transa_len, ftnlen transb_len) { /* Format strings */ static char fmt_9999[] = "(\002 ******* FATAL ERROR - COMPUTED RESULT IS" " LESS THAN HAL\002,\002F ACCURATE *******\002,/\002 EX" "PECTED RESULT COMPU\002,\002TED RESULT\002)"; static char fmt_9998[] = "(1x,i7,2g18.6)"; static char fmt_9997[] = "(\002 THESE ARE THE RESULTS FOR COLUMN" " \002,i3)"; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, cc_dim1, cc_offset, i__1, i__2, i__3; doublereal d__1, d__2; /* Builtin functions */ double sqrt(doublereal); integer s_wsfe(cilist *), e_wsfe(void), do_fio(integer *, char *, ftnlen); /* Local variables */ integer i__, j, k; doublereal erri; logical trana, tranb; /* Fortran I/O blocks */ static cilist io___361 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___362 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___363 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___364 = { 0, 0, 0, fmt_9997, 0 }; /* Checks the results of the computational tests. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Intrinsic Functions .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; c_dim1 = *ldc; c_offset = 1 + c_dim1; c__ -= c_offset; --ct; --g; cc_dim1 = *ldcc; cc_offset = 1 + cc_dim1; cc -= cc_offset; /* Function Body */ trana = *(unsigned char *)transa == 'T' || *(unsigned char *)transa == 'C'; tranb = *(unsigned char *)transb == 'T' || *(unsigned char *)transb == 'C'; /* Compute expected result, one column at a time, in CT using data */ /* in A, B and C. */ /* Compute gauges in G. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { ct[i__] = 0.; g[i__] = 0.; /* L10: */ } if (! trana && ! tranb) { i__2 = *kk; for (k = 1; k <= i__2; ++k) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { ct[i__] += a[i__ + k * a_dim1] * b[k + j * b_dim1]; g[i__] += (d__1 = a[i__ + k * a_dim1], abs(d__1)) * (d__2 = b[k + j * b_dim1], abs(d__2)); /* L20: */ } /* L30: */ } } else if (trana && ! tranb) { i__2 = *kk; for (k = 1; k <= i__2; ++k) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { ct[i__] += a[k + i__ * a_dim1] * b[k + j * b_dim1]; g[i__] += (d__1 = a[k + i__ * a_dim1], abs(d__1)) * (d__2 = b[k + j * b_dim1], abs(d__2)); /* L40: */ } /* L50: */ } } else if (! trana && tranb) { i__2 = *kk; for (k = 1; k <= i__2; ++k) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { ct[i__] += a[i__ + k * a_dim1] * b[j + k * b_dim1]; g[i__] += (d__1 = a[i__ + k * a_dim1], abs(d__1)) * (d__2 = b[j + k * b_dim1], abs(d__2)); /* L60: */ } /* L70: */ } } else if (trana && tranb) { i__2 = *kk; for (k = 1; k <= i__2; ++k) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { ct[i__] += a[k + i__ * a_dim1] * b[j + k * b_dim1]; g[i__] += (d__1 = a[k + i__ * a_dim1], abs(d__1)) * (d__2 = b[j + k * b_dim1], abs(d__2)); /* L80: */ } /* L90: */ } } i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { ct[i__] = *alpha * ct[i__] + *beta * c__[i__ + j * c_dim1]; g[i__] = abs(*alpha) * g[i__] + abs(*beta) * (d__1 = c__[i__ + j * c_dim1], abs(d__1)); /* L100: */ } /* Compute the error ratio for this result. */ *err = 0.; i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { erri = (d__1 = ct[i__] - cc[i__ + j * cc_dim1], abs(d__1)) / *eps; if (g[i__] != 0.) { erri /= g[i__]; } *err = max(*err,erri); if (*err * sqrt(*eps) >= 1.) { goto L130; } /* L110: */ } /* L120: */ } /* If the loop completes, all results are at least half accurate. */ goto L150; /* Report fatal error. */ L130: *fatal = TRUE_; io___361.ciunit = *nout; s_wsfe(&io___361); e_wsfe(); i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { if (*mv) { io___362.ciunit = *nout; s_wsfe(&io___362); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ct[i__], (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&cc[i__ + j * cc_dim1], (ftnlen)sizeof( doublereal)); e_wsfe(); } else { io___363.ciunit = *nout; s_wsfe(&io___363); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&cc[i__ + j * cc_dim1], (ftnlen)sizeof( doublereal)); do_fio(&c__1, (char *)&ct[i__], (ftnlen)sizeof(doublereal)); e_wsfe(); } /* L140: */ } if (*n > 1) { io___364.ciunit = *nout; s_wsfe(&io___364); do_fio(&c__1, (char *)&j, (ftnlen)sizeof(integer)); e_wsfe(); } L150: return 0; /* End of DMMCH. */ } /* dmmch_ */ logical lde_(doublereal *ri, doublereal *rj, integer *lr) { /* System generated locals */ integer i__1; logical ret_val; /* Local variables */ integer i__; /* Tests if two arrays are identical. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --rj; --ri; /* Function Body */ i__1 = *lr; for (i__ = 1; i__ <= i__1; ++i__) { if (ri[i__] != rj[i__]) { goto L20; } /* L10: */ } ret_val = TRUE_; goto L30; L20: ret_val = FALSE_; L30: return ret_val; /* End of LDE. */ } /* lde_ */ logical lderes_(char *type__, char *uplo, integer *m, integer *n, doublereal * aa, doublereal *as, integer *lda, ftnlen type_len, ftnlen uplo_len) { /* System generated locals */ integer aa_dim1, aa_offset, as_dim1, as_offset, i__1, i__2; logical ret_val; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen); /* Local variables */ integer i__, j, ibeg, iend; logical upper; /* Tests if selected elements in two arrays are equal. */ /* TYPE is 'GE' or 'SY'. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ as_dim1 = *lda; as_offset = 1 + as_dim1; as -= as_offset; aa_dim1 = *lda; aa_offset = 1 + aa_dim1; aa -= aa_offset; /* Function Body */ upper = *(unsigned char *)uplo == 'U'; if (s_cmp(type__, "GE", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *lda; for (i__ = *m + 1; i__ <= i__2; ++i__) { if (aa[i__ + j * aa_dim1] != as[i__ + j * as_dim1]) { goto L70; } /* L10: */ } /* L20: */ } } else if (s_cmp(type__, "SY", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (upper) { ibeg = 1; iend = j; } else { ibeg = j; iend = *n; } i__2 = ibeg - 1; for (i__ = 1; i__ <= i__2; ++i__) { if (aa[i__ + j * aa_dim1] != as[i__ + j * as_dim1]) { goto L70; } /* L30: */ } i__2 = *lda; for (i__ = iend + 1; i__ <= i__2; ++i__) { if (aa[i__ + j * aa_dim1] != as[i__ + j * as_dim1]) { goto L70; } /* L40: */ } /* L50: */ } } ret_val = TRUE_; goto L80; L70: ret_val = FALSE_; L80: return ret_val; /* End of LDERES. */ } /* lderes_ */ doublereal dbeg_(logical *reset) { /* System generated locals */ doublereal ret_val; /* Local variables */ static integer i__, ic, mi; /* Generates random numbers uniformly distributed between -0.5 and 0.5. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Scalar Arguments .. */ /* .. Local Scalars .. */ /* .. Save statement .. */ /* .. Executable Statements .. */ if (*reset) { /* Initialize local variables. */ mi = 891; i__ = 7; ic = 0; *reset = FALSE_; } /* The sequence of values of I is bounded between 1 and 999. */ /* If initial I = 1,2,3,6,7 or 9, the period will be 50. */ /* If initial I = 4 or 8, the period will be 25. */ /* If initial I = 5, the period will be 10. */ /* IC is used to break up the period by skipping 1 value of I in 6. */ ++ic; L10: i__ *= mi; i__ -= i__ / 1000 * 1000; if (ic >= 5) { ic = 0; goto L10; } ret_val = (i__ - 500) / 1001.; return ret_val; /* End of DBEG. */ } /* dbeg_ */ doublereal ddiff_(doublereal *x, doublereal *y) { /* System generated locals */ doublereal ret_val; /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Scalar Arguments .. */ /* .. Executable Statements .. */ ret_val = *x - *y; return ret_val; /* End of DDIFF. */ } /* ddiff_ */ /* Subroutine */ int chkxer_(char *srnamt, integer *infot, integer *nout, logical *lerr, logical *ok, ftnlen srnamt_len) { /* Format strings */ static char fmt_9999[] = "(\002 ***** ILLEGAL VALUE OF PARAMETER NUMBER" " \002,i2,\002 NOT D\002,\002ETECTED BY \002,a6,\002 *****\002)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Fortran I/O blocks */ static cilist io___374 = { 0, 0, 0, fmt_9999, 0 }; /* Tests whether XERBLA has detected an error when it should. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Scalar Arguments .. */ /* .. Executable Statements .. */ if (! (*lerr)) { io___374.ciunit = *nout; s_wsfe(&io___374); do_fio(&c__1, (char *)&(*infot), (ftnlen)sizeof(integer)); do_fio(&c__1, srnamt, (ftnlen)6); e_wsfe(); *ok = FALSE_; } *lerr = FALSE_; return 0; /* End of CHKXER. */ } /* chkxer_ */ /* Subroutine */ int xerbla_(char *srname, integer *info, ftnlen srname_len) { /* Format strings */ static char fmt_9999[] = "(\002 ******* XERBLA WAS CALLED WITH INFO =" " \002,i6,\002 INSTEAD\002,\002 OF \002,i2,\002 *******\002)"; static char fmt_9997[] = "(\002 ******* XERBLA WAS CALLED WITH INFO =" " \002,i6,\002 *******\002)"; static char fmt_9998[] = "(\002 ******* XERBLA WAS CALLED WITH SRNAME =" " \002,a6,\002 INSTE\002,\002AD OF \002,a6,\002 *******\002)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), s_cmp(const char *, const char *, ftnlen, ftnlen); /* Fortran I/O blocks */ static cilist io___375 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___376 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___377 = { 0, 0, 0, fmt_9998, 0 }; /* This is a special version of XERBLA to be used only as part of */ /* the test program for testing error exits from the Level 3 BLAS */ /* routines. */ /* XERBLA is an error handler for the Level 3 BLAS routines. */ /* It is called by the Level 3 BLAS routines if an input parameter is */ /* invalid. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ infoc_2.lerr = TRUE_; if (*info != infoc_2.infot) { if (infoc_2.infot != 0) { io___375.ciunit = infoc_2.nout; s_wsfe(&io___375); do_fio(&c__1, (char *)&(*info), (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&infoc_2.infot, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___376.ciunit = infoc_2.nout; s_wsfe(&io___376); do_fio(&c__1, (char *)&(*info), (ftnlen)sizeof(integer)); e_wsfe(); } infoc_2.ok = FALSE_; } if (s_cmp(srname, srnamc_1.srnamt, (ftnlen)6, (ftnlen)6) != 0) { io___377.ciunit = infoc_2.nout; s_wsfe(&io___377); do_fio(&c__1, srname, (ftnlen)6); do_fio(&c__1, srnamc_1.srnamt, (ftnlen)6); e_wsfe(); infoc_2.ok = FALSE_; } return 0; /* End of XERBLA */ } /* xerbla_ */ /* Main program alias */ int dblat3_ () { main (); return 0; } blis-0.6.1/blastest/src/fortran/000077500000000000000000000000001360743507500165445ustar00rootroot00000000000000blis-0.6.1/blastest/src/fortran/cblat1.f000066400000000000000000000765551360743507500201030ustar00rootroot00000000000000*> \brief \b CBLAT1 * * =========== DOCUMENTATION =========== * * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * * Definition: * =========== * * PROGRAM CBLAT1 * * *> \par Purpose: * ============= *> *> \verbatim *> *> Test program for the COMPLEX Level 1 BLAS. *> Based upon the original BLAS test routine together with: *> *> F06GAF Example Program Text *> \endverbatim * * Authors: * ======== * *> \author Univ. of Tennessee *> \author Univ. of California Berkeley *> \author Univ. of Colorado Denver *> \author NAG Ltd. * *> \date April 2012 * *> \ingroup complex_blas_testing * * ===================================================================== PROGRAM CBLAT1 * * -- Reference BLAS test routine (version 3.4.1) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * April 2012 * * ===================================================================== * * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, MODE, N LOGICAL PASS * .. Local Scalars .. REAL SFAC INTEGER IC * .. External Subroutines .. EXTERNAL CHECK1, CHECK2, HEADER * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, MODE, PASS * .. Data statements .. DATA SFAC/9.765625E-4/ * .. Executable Statements .. WRITE (NOUT,99999) DO 20 IC = 1, 10 ICASE = IC CALL HEADER * * Initialize PASS, INCX, INCY, and MODE for a new case. * The value 9999 for INCX, INCY or MODE will appear in the * detailed output, if any, for cases that do not involve * these parameters. * PASS = .TRUE. INCX = 9999 INCY = 9999 MODE = 9999 IF (ICASE.LE.5) THEN CALL CHECK2(SFAC) ELSE IF (ICASE.GE.6) THEN CALL CHECK1(SFAC) END IF * -- Print IF (PASS) WRITE (NOUT,99998) 20 CONTINUE STOP * 99999 FORMAT (' Complex BLAS Test Program Results',/1X) 99998 FORMAT (' ----- PASS -----') END SUBROUTINE HEADER * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, MODE, N LOGICAL PASS * .. Local Arrays .. CHARACTER*6 L(10) * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, MODE, PASS * .. Data statements .. DATA L(1)/'CDOTC '/ DATA L(2)/'CDOTU '/ DATA L(3)/'CAXPY '/ DATA L(4)/'CCOPY '/ DATA L(5)/'CSWAP '/ DATA L(6)/'SCNRM2'/ DATA L(7)/'SCASUM'/ DATA L(8)/'CSCAL '/ DATA L(9)/'CSSCAL'/ DATA L(10)/'ICAMAX'/ * .. Executable Statements .. WRITE (NOUT,99999) ICASE, L(ICASE) RETURN * 99999 FORMAT (/' Test of subprogram number',I3,12X,A6) END SUBROUTINE CHECK1(SFAC) * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) * .. Scalar Arguments .. REAL SFAC * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, MODE, N LOGICAL PASS * .. Local Scalars .. COMPLEX CA REAL SA INTEGER I, J, LEN, NP1 * .. Local Arrays .. COMPLEX CTRUE5(8,5,2), CTRUE6(8,5,2), CV(8,5,2), CX(8), + MWPCS(5), MWPCT(5) REAL STRUE2(5), STRUE4(5) INTEGER ITRUE3(5) * .. External Functions .. REAL SCASUM, SCNRM2 INTEGER ICAMAX EXTERNAL SCASUM, SCNRM2, ICAMAX * .. External Subroutines .. EXTERNAL CSCAL, CSSCAL, CTEST, ITEST1, STEST1 * .. Intrinsic Functions .. INTRINSIC MAX * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, MODE, PASS * .. Data statements .. DATA SA, CA/0.3E0, (0.4E0,-0.7E0)/ DATA ((CV(I,J,1),I=1,8),J=1,5)/(0.1E0,0.1E0), + (1.0E0,2.0E0), (1.0E0,2.0E0), (1.0E0,2.0E0), + (1.0E0,2.0E0), (1.0E0,2.0E0), (1.0E0,2.0E0), + (1.0E0,2.0E0), (0.3E0,-0.4E0), (3.0E0,4.0E0), + (3.0E0,4.0E0), (3.0E0,4.0E0), (3.0E0,4.0E0), + (3.0E0,4.0E0), (3.0E0,4.0E0), (3.0E0,4.0E0), + (0.1E0,-0.3E0), (0.5E0,-0.1E0), (5.0E0,6.0E0), + (5.0E0,6.0E0), (5.0E0,6.0E0), (5.0E0,6.0E0), + (5.0E0,6.0E0), (5.0E0,6.0E0), (0.1E0,0.1E0), + (-0.6E0,0.1E0), (0.1E0,-0.3E0), (7.0E0,8.0E0), + (7.0E0,8.0E0), (7.0E0,8.0E0), (7.0E0,8.0E0), + (7.0E0,8.0E0), (0.3E0,0.1E0), (0.5E0,0.0E0), + (0.0E0,0.5E0), (0.0E0,0.2E0), (2.0E0,3.0E0), + (2.0E0,3.0E0), (2.0E0,3.0E0), (2.0E0,3.0E0)/ DATA ((CV(I,J,2),I=1,8),J=1,5)/(0.1E0,0.1E0), + (4.0E0,5.0E0), (4.0E0,5.0E0), (4.0E0,5.0E0), + (4.0E0,5.0E0), (4.0E0,5.0E0), (4.0E0,5.0E0), + (4.0E0,5.0E0), (0.3E0,-0.4E0), (6.0E0,7.0E0), + (6.0E0,7.0E0), (6.0E0,7.0E0), (6.0E0,7.0E0), + (6.0E0,7.0E0), (6.0E0,7.0E0), (6.0E0,7.0E0), + (0.1E0,-0.3E0), (8.0E0,9.0E0), (0.5E0,-0.1E0), + (2.0E0,5.0E0), (2.0E0,5.0E0), (2.0E0,5.0E0), + (2.0E0,5.0E0), (2.0E0,5.0E0), (0.1E0,0.1E0), + (3.0E0,6.0E0), (-0.6E0,0.1E0), (4.0E0,7.0E0), + (0.1E0,-0.3E0), (7.0E0,2.0E0), (7.0E0,2.0E0), + (7.0E0,2.0E0), (0.3E0,0.1E0), (5.0E0,8.0E0), + (0.5E0,0.0E0), (6.0E0,9.0E0), (0.0E0,0.5E0), + (8.0E0,3.0E0), (0.0E0,0.2E0), (9.0E0,4.0E0)/ DATA STRUE2/0.0E0, 0.5E0, 0.6E0, 0.7E0, 0.8E0/ DATA STRUE4/0.0E0, 0.7E0, 1.0E0, 1.3E0, 1.6E0/ DATA ((CTRUE5(I,J,1),I=1,8),J=1,5)/(0.1E0,0.1E0), + (1.0E0,2.0E0), (1.0E0,2.0E0), (1.0E0,2.0E0), + (1.0E0,2.0E0), (1.0E0,2.0E0), (1.0E0,2.0E0), + (1.0E0,2.0E0), (-0.16E0,-0.37E0), (3.0E0,4.0E0), + (3.0E0,4.0E0), (3.0E0,4.0E0), (3.0E0,4.0E0), + (3.0E0,4.0E0), (3.0E0,4.0E0), (3.0E0,4.0E0), + (-0.17E0,-0.19E0), (0.13E0,-0.39E0), + (5.0E0,6.0E0), (5.0E0,6.0E0), (5.0E0,6.0E0), + (5.0E0,6.0E0), (5.0E0,6.0E0), (5.0E0,6.0E0), + (0.11E0,-0.03E0), (-0.17E0,0.46E0), + (-0.17E0,-0.19E0), (7.0E0,8.0E0), (7.0E0,8.0E0), + (7.0E0,8.0E0), (7.0E0,8.0E0), (7.0E0,8.0E0), + (0.19E0,-0.17E0), (0.20E0,-0.35E0), + (0.35E0,0.20E0), (0.14E0,0.08E0), + (2.0E0,3.0E0), (2.0E0,3.0E0), (2.0E0,3.0E0), + (2.0E0,3.0E0)/ DATA ((CTRUE5(I,J,2),I=1,8),J=1,5)/(0.1E0,0.1E0), + (4.0E0,5.0E0), (4.0E0,5.0E0), (4.0E0,5.0E0), + (4.0E0,5.0E0), (4.0E0,5.0E0), (4.0E0,5.0E0), + (4.0E0,5.0E0), (-0.16E0,-0.37E0), (6.0E0,7.0E0), + (6.0E0,7.0E0), (6.0E0,7.0E0), (6.0E0,7.0E0), + (6.0E0,7.0E0), (6.0E0,7.0E0), (6.0E0,7.0E0), + (-0.17E0,-0.19E0), (8.0E0,9.0E0), + (0.13E0,-0.39E0), (2.0E0,5.0E0), (2.0E0,5.0E0), + (2.0E0,5.0E0), (2.0E0,5.0E0), (2.0E0,5.0E0), + (0.11E0,-0.03E0), (3.0E0,6.0E0), + (-0.17E0,0.46E0), (4.0E0,7.0E0), + (-0.17E0,-0.19E0), (7.0E0,2.0E0), (7.0E0,2.0E0), + (7.0E0,2.0E0), (0.19E0,-0.17E0), (5.0E0,8.0E0), + (0.20E0,-0.35E0), (6.0E0,9.0E0), + (0.35E0,0.20E0), (8.0E0,3.0E0), + (0.14E0,0.08E0), (9.0E0,4.0E0)/ DATA ((CTRUE6(I,J,1),I=1,8),J=1,5)/(0.1E0,0.1E0), + (1.0E0,2.0E0), (1.0E0,2.0E0), (1.0E0,2.0E0), + (1.0E0,2.0E0), (1.0E0,2.0E0), (1.0E0,2.0E0), + (1.0E0,2.0E0), (0.09E0,-0.12E0), (3.0E0,4.0E0), + (3.0E0,4.0E0), (3.0E0,4.0E0), (3.0E0,4.0E0), + (3.0E0,4.0E0), (3.0E0,4.0E0), (3.0E0,4.0E0), + (0.03E0,-0.09E0), (0.15E0,-0.03E0), + (5.0E0,6.0E0), (5.0E0,6.0E0), (5.0E0,6.0E0), + (5.0E0,6.0E0), (5.0E0,6.0E0), (5.0E0,6.0E0), + (0.03E0,0.03E0), (-0.18E0,0.03E0), + (0.03E0,-0.09E0), (7.0E0,8.0E0), (7.0E0,8.0E0), + (7.0E0,8.0E0), (7.0E0,8.0E0), (7.0E0,8.0E0), + (0.09E0,0.03E0), (0.15E0,0.00E0), + (0.00E0,0.15E0), (0.00E0,0.06E0), (2.0E0,3.0E0), + (2.0E0,3.0E0), (2.0E0,3.0E0), (2.0E0,3.0E0)/ DATA ((CTRUE6(I,J,2),I=1,8),J=1,5)/(0.1E0,0.1E0), + (4.0E0,5.0E0), (4.0E0,5.0E0), (4.0E0,5.0E0), + (4.0E0,5.0E0), (4.0E0,5.0E0), (4.0E0,5.0E0), + (4.0E0,5.0E0), (0.09E0,-0.12E0), (6.0E0,7.0E0), + (6.0E0,7.0E0), (6.0E0,7.0E0), (6.0E0,7.0E0), + (6.0E0,7.0E0), (6.0E0,7.0E0), (6.0E0,7.0E0), + (0.03E0,-0.09E0), (8.0E0,9.0E0), + (0.15E0,-0.03E0), (2.0E0,5.0E0), (2.0E0,5.0E0), + (2.0E0,5.0E0), (2.0E0,5.0E0), (2.0E0,5.0E0), + (0.03E0,0.03E0), (3.0E0,6.0E0), + (-0.18E0,0.03E0), (4.0E0,7.0E0), + (0.03E0,-0.09E0), (7.0E0,2.0E0), (7.0E0,2.0E0), + (7.0E0,2.0E0), (0.09E0,0.03E0), (5.0E0,8.0E0), + (0.15E0,0.00E0), (6.0E0,9.0E0), (0.00E0,0.15E0), + (8.0E0,3.0E0), (0.00E0,0.06E0), (9.0E0,4.0E0)/ DATA ITRUE3/0, 1, 2, 2, 2/ * .. Executable Statements .. DO 60 INCX = 1, 2 DO 40 NP1 = 1, 5 N = NP1 - 1 LEN = 2*MAX(N,1) * .. Set vector arguments .. DO 20 I = 1, LEN CX(I) = CV(I,NP1,INCX) 20 CONTINUE IF (ICASE.EQ.6) THEN * .. SCNRM2 .. CALL STEST1(SCNRM2(N,CX,INCX),STRUE2(NP1),STRUE2(NP1), + SFAC) ELSE IF (ICASE.EQ.7) THEN * .. SCASUM .. CALL STEST1(SCASUM(N,CX,INCX),STRUE4(NP1),STRUE4(NP1), + SFAC) ELSE IF (ICASE.EQ.8) THEN * .. CSCAL .. CALL CSCAL(N,CA,CX,INCX) CALL CTEST(LEN,CX,CTRUE5(1,NP1,INCX),CTRUE5(1,NP1,INCX), + SFAC) ELSE IF (ICASE.EQ.9) THEN * .. CSSCAL .. CALL CSSCAL(N,SA,CX,INCX) CALL CTEST(LEN,CX,CTRUE6(1,NP1,INCX),CTRUE6(1,NP1,INCX), + SFAC) ELSE IF (ICASE.EQ.10) THEN * .. ICAMAX .. CALL ITEST1(ICAMAX(N,CX,INCX),ITRUE3(NP1)) ELSE WRITE (NOUT,*) ' Shouldn''t be here in CHECK1' STOP END IF * 40 CONTINUE 60 CONTINUE * INCX = 1 IF (ICASE.EQ.8) THEN * CSCAL * Add a test for alpha equal to zero. CA = (0.0E0,0.0E0) DO 80 I = 1, 5 MWPCT(I) = (0.0E0,0.0E0) MWPCS(I) = (1.0E0,1.0E0) 80 CONTINUE CALL CSCAL(5,CA,CX,INCX) CALL CTEST(5,CX,MWPCT,MWPCS,SFAC) ELSE IF (ICASE.EQ.9) THEN * CSSCAL * Add a test for alpha equal to zero. SA = 0.0E0 DO 100 I = 1, 5 MWPCT(I) = (0.0E0,0.0E0) MWPCS(I) = (1.0E0,1.0E0) 100 CONTINUE CALL CSSCAL(5,SA,CX,INCX) CALL CTEST(5,CX,MWPCT,MWPCS,SFAC) * Add a test for alpha equal to one. SA = 1.0E0 DO 120 I = 1, 5 MWPCT(I) = CX(I) MWPCS(I) = CX(I) 120 CONTINUE CALL CSSCAL(5,SA,CX,INCX) CALL CTEST(5,CX,MWPCT,MWPCS,SFAC) * Add a test for alpha equal to minus one. SA = -1.0E0 DO 140 I = 1, 5 MWPCT(I) = -CX(I) MWPCS(I) = -CX(I) 140 CONTINUE CALL CSSCAL(5,SA,CX,INCX) CALL CTEST(5,CX,MWPCT,MWPCS,SFAC) END IF RETURN END SUBROUTINE CHECK2(SFAC) * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) * .. Scalar Arguments .. REAL SFAC * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, MODE, N LOGICAL PASS * .. Local Scalars .. COMPLEX CA INTEGER I, J, KI, KN, KSIZE, LENX, LENY, MX, MY * .. Local Arrays .. COMPLEX CDOT(1), CSIZE1(4), CSIZE2(7,2), CSIZE3(14), + CT10X(7,4,4), CT10Y(7,4,4), CT6(4,4), CT7(4,4), + CT8(7,4,4), CX(7), CX1(7), CY(7), CY1(7) INTEGER INCXS(4), INCYS(4), LENS(4,2), NS(4) * .. External Functions .. COMPLEX CDOTC, CDOTU EXTERNAL CDOTC, CDOTU * .. External Subroutines .. EXTERNAL CAXPY, CCOPY, CSWAP, CTEST * .. Intrinsic Functions .. INTRINSIC ABS, MIN * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, MODE, PASS * .. Data statements .. DATA CA/(0.4E0,-0.7E0)/ DATA INCXS/1, 2, -2, -1/ DATA INCYS/1, -2, 1, -2/ DATA LENS/1, 1, 2, 4, 1, 1, 3, 7/ DATA NS/0, 1, 2, 4/ DATA CX1/(0.7E0,-0.8E0), (-0.4E0,-0.7E0), + (-0.1E0,-0.9E0), (0.2E0,-0.8E0), + (-0.9E0,-0.4E0), (0.1E0,0.4E0), (-0.6E0,0.6E0)/ DATA CY1/(0.6E0,-0.6E0), (-0.9E0,0.5E0), + (0.7E0,-0.6E0), (0.1E0,-0.5E0), (-0.1E0,-0.2E0), + (-0.5E0,-0.3E0), (0.8E0,-0.7E0)/ DATA ((CT8(I,J,1),I=1,7),J=1,4)/(0.6E0,-0.6E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.32E0,-1.41E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.32E0,-1.41E0), + (-1.55E0,0.5E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.32E0,-1.41E0), (-1.55E0,0.5E0), + (0.03E0,-0.89E0), (-0.38E0,-0.96E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0)/ DATA ((CT8(I,J,2),I=1,7),J=1,4)/(0.6E0,-0.6E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.32E0,-1.41E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (-0.07E0,-0.89E0), + (-0.9E0,0.5E0), (0.42E0,-1.41E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.78E0,0.06E0), (-0.9E0,0.5E0), + (0.06E0,-0.13E0), (0.1E0,-0.5E0), + (-0.77E0,-0.49E0), (-0.5E0,-0.3E0), + (0.52E0,-1.51E0)/ DATA ((CT8(I,J,3),I=1,7),J=1,4)/(0.6E0,-0.6E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.32E0,-1.41E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (-0.07E0,-0.89E0), + (-1.18E0,-0.31E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.78E0,0.06E0), (-1.54E0,0.97E0), + (0.03E0,-0.89E0), (-0.18E0,-1.31E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0)/ DATA ((CT8(I,J,4),I=1,7),J=1,4)/(0.6E0,-0.6E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.32E0,-1.41E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.32E0,-1.41E0), (-0.9E0,0.5E0), + (0.05E0,-0.6E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.32E0,-1.41E0), + (-0.9E0,0.5E0), (0.05E0,-0.6E0), (0.1E0,-0.5E0), + (-0.77E0,-0.49E0), (-0.5E0,-0.3E0), + (0.32E0,-1.16E0)/ DATA CT7/(0.0E0,0.0E0), (-0.06E0,-0.90E0), + (0.65E0,-0.47E0), (-0.34E0,-1.22E0), + (0.0E0,0.0E0), (-0.06E0,-0.90E0), + (-0.59E0,-1.46E0), (-1.04E0,-0.04E0), + (0.0E0,0.0E0), (-0.06E0,-0.90E0), + (-0.83E0,0.59E0), (0.07E0,-0.37E0), + (0.0E0,0.0E0), (-0.06E0,-0.90E0), + (-0.76E0,-1.15E0), (-1.33E0,-1.82E0)/ DATA CT6/(0.0E0,0.0E0), (0.90E0,0.06E0), + (0.91E0,-0.77E0), (1.80E0,-0.10E0), + (0.0E0,0.0E0), (0.90E0,0.06E0), (1.45E0,0.74E0), + (0.20E0,0.90E0), (0.0E0,0.0E0), (0.90E0,0.06E0), + (-0.55E0,0.23E0), (0.83E0,-0.39E0), + (0.0E0,0.0E0), (0.90E0,0.06E0), (1.04E0,0.79E0), + (1.95E0,1.22E0)/ DATA ((CT10X(I,J,1),I=1,7),J=1,4)/(0.7E0,-0.8E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.6E0,-0.6E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.6E0,-0.6E0), (-0.9E0,0.5E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.6E0,-0.6E0), + (-0.9E0,0.5E0), (0.7E0,-0.6E0), (0.1E0,-0.5E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0)/ DATA ((CT10X(I,J,2),I=1,7),J=1,4)/(0.7E0,-0.8E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.6E0,-0.6E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.7E0,-0.6E0), (-0.4E0,-0.7E0), + (0.6E0,-0.6E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.8E0,-0.7E0), + (-0.4E0,-0.7E0), (-0.1E0,-0.2E0), + (0.2E0,-0.8E0), (0.7E0,-0.6E0), (0.1E0,0.4E0), + (0.6E0,-0.6E0)/ DATA ((CT10X(I,J,3),I=1,7),J=1,4)/(0.7E0,-0.8E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.6E0,-0.6E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (-0.9E0,0.5E0), (-0.4E0,-0.7E0), + (0.6E0,-0.6E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.1E0,-0.5E0), + (-0.4E0,-0.7E0), (0.7E0,-0.6E0), (0.2E0,-0.8E0), + (-0.9E0,0.5E0), (0.1E0,0.4E0), (0.6E0,-0.6E0)/ DATA ((CT10X(I,J,4),I=1,7),J=1,4)/(0.7E0,-0.8E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.6E0,-0.6E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.6E0,-0.6E0), (0.7E0,-0.6E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.6E0,-0.6E0), + (0.7E0,-0.6E0), (-0.1E0,-0.2E0), (0.8E0,-0.7E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0)/ DATA ((CT10Y(I,J,1),I=1,7),J=1,4)/(0.6E0,-0.6E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.7E0,-0.8E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.7E0,-0.8E0), (-0.4E0,-0.7E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.7E0,-0.8E0), + (-0.4E0,-0.7E0), (-0.1E0,-0.9E0), + (0.2E0,-0.8E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0)/ DATA ((CT10Y(I,J,2),I=1,7),J=1,4)/(0.6E0,-0.6E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.7E0,-0.8E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (-0.1E0,-0.9E0), (-0.9E0,0.5E0), + (0.7E0,-0.8E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (-0.6E0,0.6E0), + (-0.9E0,0.5E0), (-0.9E0,-0.4E0), (0.1E0,-0.5E0), + (-0.1E0,-0.9E0), (-0.5E0,-0.3E0), + (0.7E0,-0.8E0)/ DATA ((CT10Y(I,J,3),I=1,7),J=1,4)/(0.6E0,-0.6E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.7E0,-0.8E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (-0.1E0,-0.9E0), (0.7E0,-0.8E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (-0.6E0,0.6E0), + (-0.9E0,-0.4E0), (-0.1E0,-0.9E0), + (0.7E0,-0.8E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0)/ DATA ((CT10Y(I,J,4),I=1,7),J=1,4)/(0.6E0,-0.6E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.7E0,-0.8E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.7E0,-0.8E0), (-0.9E0,0.5E0), + (-0.4E0,-0.7E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.7E0,-0.8E0), + (-0.9E0,0.5E0), (-0.4E0,-0.7E0), (0.1E0,-0.5E0), + (-0.1E0,-0.9E0), (-0.5E0,-0.3E0), + (0.2E0,-0.8E0)/ DATA CSIZE1/(0.0E0,0.0E0), (0.9E0,0.9E0), + (1.63E0,1.73E0), (2.90E0,2.78E0)/ DATA CSIZE3/(0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (1.17E0,1.17E0), + (1.17E0,1.17E0), (1.17E0,1.17E0), + (1.17E0,1.17E0), (1.17E0,1.17E0), + (1.17E0,1.17E0), (1.17E0,1.17E0)/ DATA CSIZE2/(0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (0.0E0,0.0E0), + (0.0E0,0.0E0), (0.0E0,0.0E0), (1.54E0,1.54E0), + (1.54E0,1.54E0), (1.54E0,1.54E0), + (1.54E0,1.54E0), (1.54E0,1.54E0), + (1.54E0,1.54E0), (1.54E0,1.54E0)/ * .. Executable Statements .. DO 60 KI = 1, 4 INCX = INCXS(KI) INCY = INCYS(KI) MX = ABS(INCX) MY = ABS(INCY) * DO 40 KN = 1, 4 N = NS(KN) KSIZE = MIN(2,KN) LENX = LENS(KN,MX) LENY = LENS(KN,MY) * .. initialize all argument arrays .. DO 20 I = 1, 7 CX(I) = CX1(I) CY(I) = CY1(I) 20 CONTINUE IF (ICASE.EQ.1) THEN * .. CDOTC .. CDOT(1) = CDOTC(N,CX,INCX,CY,INCY) CALL CTEST(1,CDOT,CT6(KN,KI),CSIZE1(KN),SFAC) ELSE IF (ICASE.EQ.2) THEN * .. CDOTU .. CDOT(1) = CDOTU(N,CX,INCX,CY,INCY) CALL CTEST(1,CDOT,CT7(KN,KI),CSIZE1(KN),SFAC) ELSE IF (ICASE.EQ.3) THEN * .. CAXPY .. CALL CAXPY(N,CA,CX,INCX,CY,INCY) CALL CTEST(LENY,CY,CT8(1,KN,KI),CSIZE2(1,KSIZE),SFAC) ELSE IF (ICASE.EQ.4) THEN * .. CCOPY .. CALL CCOPY(N,CX,INCX,CY,INCY) CALL CTEST(LENY,CY,CT10Y(1,KN,KI),CSIZE3,1.0E0) ELSE IF (ICASE.EQ.5) THEN * .. CSWAP .. CALL CSWAP(N,CX,INCX,CY,INCY) CALL CTEST(LENX,CX,CT10X(1,KN,KI),CSIZE3,1.0E0) CALL CTEST(LENY,CY,CT10Y(1,KN,KI),CSIZE3,1.0E0) ELSE WRITE (NOUT,*) ' Shouldn''t be here in CHECK2' STOP END IF * 40 CONTINUE 60 CONTINUE RETURN END SUBROUTINE STEST(LEN,SCOMP,STRUE,SSIZE,SFAC) * ********************************* STEST ************************** * * THIS SUBR COMPARES ARRAYS SCOMP() AND STRUE() OF LENGTH LEN TO * SEE IF THE TERM BY TERM DIFFERENCES, MULTIPLIED BY SFAC, ARE * NEGLIGIBLE. * * C. L. LAWSON, JPL, 1974 DEC 10 * * .. Parameters .. INTEGER NOUT REAL ZERO PARAMETER (NOUT=6, ZERO=0.0E0) * .. Scalar Arguments .. REAL SFAC INTEGER LEN * .. Array Arguments .. REAL SCOMP(LEN), SSIZE(LEN), STRUE(LEN) * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, MODE, N LOGICAL PASS * .. Local Scalars .. REAL SD INTEGER I * .. External Functions .. REAL SDIFF EXTERNAL SDIFF * .. Intrinsic Functions .. INTRINSIC ABS * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, MODE, PASS * .. Executable Statements .. * DO 40 I = 1, LEN SD = SCOMP(I) - STRUE(I) IF (ABS(SFAC*SD) .LE. ABS(SSIZE(I))*EPSILON(ZERO)) + GO TO 40 * * HERE SCOMP(I) IS NOT CLOSE TO STRUE(I). * IF ( .NOT. PASS) GO TO 20 * PRINT FAIL MESSAGE AND HEADER. PASS = .FALSE. WRITE (NOUT,99999) WRITE (NOUT,99998) 20 WRITE (NOUT,99997) ICASE, N, INCX, INCY, MODE, I, SCOMP(I), + STRUE(I), SD, SSIZE(I) 40 CONTINUE RETURN * 99999 FORMAT (' FAIL') 99998 FORMAT (/' CASE N INCX INCY MODE I ', + ' COMP(I) TRUE(I) DIFFERENCE', + ' SIZE(I)',/1X) 99997 FORMAT (1X,I4,I3,3I5,I3,2E36.8,2E12.4) END SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC) * ************************* STEST1 ***************************** * * THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN * REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE * ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT. * * C.L. LAWSON, JPL, 1978 DEC 6 * * .. Scalar Arguments .. REAL SCOMP1, SFAC, STRUE1 * .. Array Arguments .. REAL SSIZE(*) * .. Local Arrays .. REAL SCOMP(1), STRUE(1) * .. External Subroutines .. EXTERNAL STEST * .. Executable Statements .. * SCOMP(1) = SCOMP1 STRUE(1) = STRUE1 CALL STEST(1,SCOMP,STRUE,SSIZE,SFAC) * RETURN END REAL FUNCTION SDIFF(SA,SB) * ********************************* SDIFF ************************** * COMPUTES DIFFERENCE OF TWO NUMBERS. C. L. LAWSON, JPL 1974 FEB 15 * * .. Scalar Arguments .. REAL SA, SB * .. Executable Statements .. SDIFF = SA - SB RETURN END SUBROUTINE CTEST(LEN,CCOMP,CTRUE,CSIZE,SFAC) * **************************** CTEST ***************************** * * C.L. LAWSON, JPL, 1978 DEC 6 * * .. Scalar Arguments .. REAL SFAC INTEGER LEN * .. Array Arguments .. COMPLEX CCOMP(LEN), CSIZE(LEN), CTRUE(LEN) * .. Local Scalars .. INTEGER I * .. Local Arrays .. REAL SCOMP(20), SSIZE(20), STRUE(20) * .. External Subroutines .. EXTERNAL STEST * .. Intrinsic Functions .. INTRINSIC AIMAG, REAL * .. Executable Statements .. DO 20 I = 1, LEN SCOMP(2*I-1) = REAL(CCOMP(I)) SCOMP(2*I) = AIMAG(CCOMP(I)) STRUE(2*I-1) = REAL(CTRUE(I)) STRUE(2*I) = AIMAG(CTRUE(I)) SSIZE(2*I-1) = REAL(CSIZE(I)) SSIZE(2*I) = AIMAG(CSIZE(I)) 20 CONTINUE * CALL STEST(2*LEN,SCOMP,STRUE,SSIZE,SFAC) RETURN END SUBROUTINE ITEST1(ICOMP,ITRUE) * ********************************* ITEST1 ************************* * * THIS SUBROUTINE COMPARES THE VARIABLES ICOMP AND ITRUE FOR * EQUALITY. * C. L. LAWSON, JPL, 1974 DEC 10 * * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) * .. Scalar Arguments .. INTEGER ICOMP, ITRUE * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, MODE, N LOGICAL PASS * .. Local Scalars .. INTEGER ID * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, MODE, PASS * .. Executable Statements .. IF (ICOMP.EQ.ITRUE) GO TO 40 * * HERE ICOMP IS NOT EQUAL TO ITRUE. * IF ( .NOT. PASS) GO TO 20 * PRINT FAIL MESSAGE AND HEADER. PASS = .FALSE. WRITE (NOUT,99999) WRITE (NOUT,99998) 20 ID = ICOMP - ITRUE WRITE (NOUT,99997) ICASE, N, INCX, INCY, MODE, ICOMP, ITRUE, ID 40 CONTINUE RETURN * 99999 FORMAT (' FAIL') 99998 FORMAT (/' CASE N INCX INCY MODE ', + ' COMP TRUE DIFFERENCE', + /1X) 99997 FORMAT (1X,I4,I3,3I5,2I36,I12) END blis-0.6.1/blastest/src/fortran/cblat2.f000066400000000000000000003436611360743507500200770ustar00rootroot00000000000000*> \brief \b CBLAT2 * * =========== DOCUMENTATION =========== * * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * * Definition: * =========== * * PROGRAM CBLAT2 * * *> \par Purpose: * ============= *> *> \verbatim *> *> Test program for the COMPLEX Level 2 Blas. *> *> The program must be driven by a short data file. The first 18 records *> of the file are read using list-directed input, the last 17 records *> are read using the format ( A6, L2 ). An annotated example of a data *> file can be obtained by deleting the first 3 characters from the *> following 35 lines: *> 'cblat2.out' NAME OF SUMMARY OUTPUT FILE *> 6 UNIT NUMBER OF SUMMARY FILE *> 'CBLA2T.SNAP' NAME OF SNAPSHOT OUTPUT FILE *> -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) *> F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. *> F LOGICAL FLAG, T TO STOP ON FAILURES. *> T LOGICAL FLAG, T TO TEST ERROR EXITS. *> 16.0 THRESHOLD VALUE OF TEST RATIO *> 6 NUMBER OF VALUES OF N *> 0 1 2 3 5 9 VALUES OF N *> 4 NUMBER OF VALUES OF K *> 0 1 2 4 VALUES OF K *> 4 NUMBER OF VALUES OF INCX AND INCY *> 1 2 -1 -2 VALUES OF INCX AND INCY *> 3 NUMBER OF VALUES OF ALPHA *> (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA *> 3 NUMBER OF VALUES OF BETA *> (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA *> CGEMV T PUT F FOR NO TEST. SAME COLUMNS. *> CGBMV T PUT F FOR NO TEST. SAME COLUMNS. *> CHEMV T PUT F FOR NO TEST. SAME COLUMNS. *> CHBMV T PUT F FOR NO TEST. SAME COLUMNS. *> CHPMV T PUT F FOR NO TEST. SAME COLUMNS. *> CTRMV T PUT F FOR NO TEST. SAME COLUMNS. *> CTBMV T PUT F FOR NO TEST. SAME COLUMNS. *> CTPMV T PUT F FOR NO TEST. SAME COLUMNS. *> CTRSV T PUT F FOR NO TEST. SAME COLUMNS. *> CTBSV T PUT F FOR NO TEST. SAME COLUMNS. *> CTPSV T PUT F FOR NO TEST. SAME COLUMNS. *> CGERC T PUT F FOR NO TEST. SAME COLUMNS. *> CGERU T PUT F FOR NO TEST. SAME COLUMNS. *> CHER T PUT F FOR NO TEST. SAME COLUMNS. *> CHPR T PUT F FOR NO TEST. SAME COLUMNS. *> CHER2 T PUT F FOR NO TEST. SAME COLUMNS. *> CHPR2 T PUT F FOR NO TEST. SAME COLUMNS. *> *> Further Details *> =============== *> *> See: *> *> Dongarra J. J., Du Croz J. J., Hammarling S. and Hanson R. J.. *> An extended set of Fortran Basic Linear Algebra Subprograms. *> *> Technical Memoranda Nos. 41 (revision 3) and 81, Mathematics *> and Computer Science Division, Argonne National Laboratory, *> 9700 South Cass Avenue, Argonne, Illinois 60439, US. *> *> Or *> *> NAG Technical Reports TR3/87 and TR4/87, Numerical Algorithms *> Group Ltd., NAG Central Office, 256 Banbury Road, Oxford *> OX2 7DE, UK, and Numerical Algorithms Group Inc., 1101 31st *> Street, Suite 100, Downers Grove, Illinois 60515-1263, USA. *> *> *> -- Written on 10-August-1987. *> Richard Hanson, Sandia National Labs. *> Jeremy Du Croz, NAG Central Office. *> *> 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers *> can be run multiple times without deleting generated *> output files (susan) *> \endverbatim * * Authors: * ======== * *> \author Univ. of Tennessee *> \author Univ. of California Berkeley *> \author Univ. of Colorado Denver *> \author NAG Ltd. * *> \date April 2012 * *> \ingroup complex_blas_testing * * ===================================================================== PROGRAM CBLAT2 * * -- Reference BLAS test routine (version 3.4.1) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * April 2012 * * ===================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 5 ) INTEGER NSUBS PARAMETER ( NSUBS = 17 ) COMPLEX ZERO, ONE PARAMETER ( ZERO = ( 0.0, 0.0 ), ONE = ( 1.0, 0.0 ) ) REAL RZERO PARAMETER ( RZERO = 0.0 ) INTEGER NMAX, INCMAX PARAMETER ( NMAX = 65, INCMAX = 2 ) INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX PARAMETER ( NINMAX = 7, NIDMAX = 9, NKBMAX = 7, $ NALMAX = 7, NBEMAX = 7 ) * .. Local Scalars .. REAL EPS, ERR, THRESH INTEGER I, ISNUM, J, N, NALF, NBET, NIDIM, NINC, NKB, $ NOUT, NTRA LOGICAL FATAL, LTESTT, REWI, SAME, SFATAL, TRACE, $ TSTERR CHARACTER*1 TRANS CHARACTER*6 SNAMET CHARACTER*32 SNAPS, SUMMRY * .. Local Arrays .. COMPLEX A( NMAX, NMAX ), AA( NMAX*NMAX ), $ ALF( NALMAX ), AS( NMAX*NMAX ), BET( NBEMAX ), $ X( NMAX ), XS( NMAX*INCMAX ), $ XX( NMAX*INCMAX ), Y( NMAX ), $ YS( NMAX*INCMAX ), YT( NMAX ), $ YY( NMAX*INCMAX ), Z( 2*NMAX ) REAL G( NMAX ) INTEGER IDIM( NIDMAX ), INC( NINMAX ), KB( NKBMAX ) LOGICAL LTEST( NSUBS ) CHARACTER*6 SNAMES( NSUBS ) * .. External Functions .. REAL SDIFF LOGICAL LCE EXTERNAL SDIFF, LCE * .. External Subroutines .. EXTERNAL CCHK1, CCHK2, CCHK3, CCHK4, CCHK5, CCHK6, $ CCHKE, CMVCH * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK CHARACTER*6 SRNAMT * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR COMMON /SRNAMC/SRNAMT * .. Data statements .. DATA SNAMES/'CGEMV ', 'CGBMV ', 'CHEMV ', 'CHBMV ', $ 'CHPMV ', 'CTRMV ', 'CTBMV ', 'CTPMV ', $ 'CTRSV ', 'CTBSV ', 'CTPSV ', 'CGERC ', $ 'CGERU ', 'CHER ', 'CHPR ', 'CHER2 ', $ 'CHPR2 '/ * .. Executable Statements .. * * Read name and unit number for summary output file and open file. * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) NOUTC = NOUT * * Read name and unit number for snapshot output file and open file. * READ( NIN, FMT = * )SNAPS READ( NIN, FMT = * )NTRA TRACE = NTRA.GE.0 IF( TRACE )THEN OPEN( NTRA, FILE = SNAPS, STATUS = 'UNKNOWN' ) END IF * Read the flag that directs rewinding of the snapshot file. READ( NIN, FMT = * )REWI REWI = REWI.AND.TRACE * Read the flag that directs stopping on any failure. READ( NIN, FMT = * )SFATAL * Read the flag that indicates whether error exits are to be tested. READ( NIN, FMT = * )TSTERR * Read the threshold value of the test ratio READ( NIN, FMT = * )THRESH * * Read and check the parameter values for the tests. * * Values of N READ( NIN, FMT = * )NIDIM IF( NIDIM.LT.1.OR.NIDIM.GT.NIDMAX )THEN WRITE( NOUT, FMT = 9997 )'N', NIDMAX GO TO 230 END IF READ( NIN, FMT = * )( IDIM( I ), I = 1, NIDIM ) DO 10 I = 1, NIDIM IF( IDIM( I ).LT.0.OR.IDIM( I ).GT.NMAX )THEN WRITE( NOUT, FMT = 9996 )NMAX GO TO 230 END IF 10 CONTINUE * Values of K READ( NIN, FMT = * )NKB IF( NKB.LT.1.OR.NKB.GT.NKBMAX )THEN WRITE( NOUT, FMT = 9997 )'K', NKBMAX GO TO 230 END IF READ( NIN, FMT = * )( KB( I ), I = 1, NKB ) DO 20 I = 1, NKB IF( KB( I ).LT.0 )THEN WRITE( NOUT, FMT = 9995 ) GO TO 230 END IF 20 CONTINUE * Values of INCX and INCY READ( NIN, FMT = * )NINC IF( NINC.LT.1.OR.NINC.GT.NINMAX )THEN WRITE( NOUT, FMT = 9997 )'INCX AND INCY', NINMAX GO TO 230 END IF READ( NIN, FMT = * )( INC( I ), I = 1, NINC ) DO 30 I = 1, NINC IF( INC( I ).EQ.0.OR.ABS( INC( I ) ).GT.INCMAX )THEN WRITE( NOUT, FMT = 9994 )INCMAX GO TO 230 END IF 30 CONTINUE * Values of ALPHA READ( NIN, FMT = * )NALF IF( NALF.LT.1.OR.NALF.GT.NALMAX )THEN WRITE( NOUT, FMT = 9997 )'ALPHA', NALMAX GO TO 230 END IF READ( NIN, FMT = * )( ALF( I ), I = 1, NALF ) * Values of BETA READ( NIN, FMT = * )NBET IF( NBET.LT.1.OR.NBET.GT.NBEMAX )THEN WRITE( NOUT, FMT = 9997 )'BETA', NBEMAX GO TO 230 END IF READ( NIN, FMT = * )( BET( I ), I = 1, NBET ) * * Report values of parameters. * WRITE( NOUT, FMT = 9993 ) WRITE( NOUT, FMT = 9992 )( IDIM( I ), I = 1, NIDIM ) WRITE( NOUT, FMT = 9991 )( KB( I ), I = 1, NKB ) WRITE( NOUT, FMT = 9990 )( INC( I ), I = 1, NINC ) WRITE( NOUT, FMT = 9989 )( ALF( I ), I = 1, NALF ) WRITE( NOUT, FMT = 9988 )( BET( I ), I = 1, NBET ) IF( .NOT.TSTERR )THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9980 ) END IF WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 )THRESH WRITE( NOUT, FMT = * ) * * Read names of subroutines and flags which indicate * whether they are to be tested. * DO 40 I = 1, NSUBS LTEST( I ) = .FALSE. 40 CONTINUE 50 READ( NIN, FMT = 9984, END = 80 )SNAMET, LTESTT DO 60 I = 1, NSUBS IF( SNAMET.EQ.SNAMES( I ) ) $ GO TO 70 60 CONTINUE WRITE( NOUT, FMT = 9986 )SNAMET STOP 70 LTEST( I ) = LTESTT GO TO 50 * 80 CONTINUE CLOSE ( NIN ) * * Compute EPS (the machine precision). * EPS = EPSILON(RZERO) WRITE( NOUT, FMT = 9998 )EPS * * Check the reliability of CMVCH using exact data. * N = MIN( 32, NMAX ) DO 120 J = 1, N DO 110 I = 1, N A( I, J ) = MAX( I - J + 1, 0 ) 110 CONTINUE X( J ) = J Y( J ) = ZERO 120 CONTINUE DO 130 J = 1, N YY( J ) = J*( ( J + 1 )*J )/2 - ( ( J + 1 )*J*( J - 1 ) )/3 130 CONTINUE * YY holds the exact result. On exit from CMVCH YT holds * the result computed by CMVCH. TRANS = 'N' CALL CMVCH( TRANS, N, N, ONE, A, NMAX, X, 1, ZERO, Y, 1, YT, G, $ YY, EPS, ERR, FATAL, NOUT, .TRUE. ) SAME = LCE( YY, YT, N ) IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN WRITE( NOUT, FMT = 9985 )TRANS, SAME, ERR STOP END IF TRANS = 'T' CALL CMVCH( TRANS, N, N, ONE, A, NMAX, X, -1, ZERO, Y, -1, YT, G, $ YY, EPS, ERR, FATAL, NOUT, .TRUE. ) SAME = LCE( YY, YT, N ) IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN WRITE( NOUT, FMT = 9985 )TRANS, SAME, ERR STOP END IF * * Test each subroutine in turn. * DO 210 ISNUM = 1, NSUBS WRITE( NOUT, FMT = * ) IF( .NOT.LTEST( ISNUM ) )THEN * Subprogram is not to be tested. WRITE( NOUT, FMT = 9983 )SNAMES( ISNUM ) ELSE SRNAMT = SNAMES( ISNUM ) * Test error exits. IF( TSTERR )THEN CALL CCHKE( ISNUM, SNAMES( ISNUM ), NOUT ) WRITE( NOUT, FMT = * ) END IF * Test computations. INFOT = 0 OK = .TRUE. FATAL = .FALSE. GO TO ( 140, 140, 150, 150, 150, 160, 160, $ 160, 160, 160, 160, 170, 170, 180, $ 180, 190, 190 )ISNUM * Test CGEMV, 01, and CGBMV, 02. 140 CALL CCHK1( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NKB, KB, NALF, ALF, $ NBET, BET, NINC, INC, NMAX, INCMAX, A, AA, AS, $ X, XX, XS, Y, YY, YS, YT, G ) GO TO 200 * Test CHEMV, 03, CHBMV, 04, and CHPMV, 05. 150 CALL CCHK2( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NKB, KB, NALF, ALF, $ NBET, BET, NINC, INC, NMAX, INCMAX, A, AA, AS, $ X, XX, XS, Y, YY, YS, YT, G ) GO TO 200 * Test CTRMV, 06, CTBMV, 07, CTPMV, 08, * CTRSV, 09, CTBSV, 10, and CTPSV, 11. 160 CALL CCHK3( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NKB, KB, NINC, INC, $ NMAX, INCMAX, A, AA, AS, Y, YY, YS, YT, G, Z ) GO TO 200 * Test CGERC, 12, CGERU, 13. 170 CALL CCHK4( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NINC, INC, $ NMAX, INCMAX, A, AA, AS, X, XX, XS, Y, YY, YS, $ YT, G, Z ) GO TO 200 * Test CHER, 14, and CHPR, 15. 180 CALL CCHK5( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NINC, INC, $ NMAX, INCMAX, A, AA, AS, X, XX, XS, Y, YY, YS, $ YT, G, Z ) GO TO 200 * Test CHER2, 16, and CHPR2, 17. 190 CALL CCHK6( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NINC, INC, $ NMAX, INCMAX, A, AA, AS, X, XX, XS, Y, YY, YS, $ YT, G, Z ) * 200 IF( FATAL.AND.SFATAL ) $ GO TO 220 END IF 210 CONTINUE WRITE( NOUT, FMT = 9982 ) GO TO 240 * 220 CONTINUE WRITE( NOUT, FMT = 9981 ) GO TO 240 * 230 CONTINUE WRITE( NOUT, FMT = 9987 ) * 240 CONTINUE IF( TRACE ) $ CLOSE ( NTRA ) CLOSE ( NOUT ) STOP * 9999 FORMAT( ' ROUTINES PASS COMPUTATIONAL TESTS IF TEST RATIO IS LES', $ 'S THAN', F8.2 ) 9998 FORMAT( ' RELATIVE MACHINE PRECISION IS TAKEN TO BE', 1P, E9.1 ) 9997 FORMAT( ' NUMBER OF VALUES OF ', A, ' IS LESS THAN 1 OR GREATER ', $ 'THAN ', I2 ) 9996 FORMAT( ' VALUE OF N IS LESS THAN 0 OR GREATER THAN ', I2 ) 9995 FORMAT( ' VALUE OF K IS LESS THAN 0' ) 9994 FORMAT( ' ABSOLUTE VALUE OF INCX OR INCY IS 0 OR GREATER THAN ', $ I2 ) 9993 FORMAT( ' TESTS OF THE COMPLEX LEVEL 2 BLAS', //' THE F', $ 'OLLOWING PARAMETER VALUES WILL BE USED:' ) 9992 FORMAT( ' FOR N ', 9I6 ) 9991 FORMAT( ' FOR K ', 7I6 ) 9990 FORMAT( ' FOR INCX AND INCY ', 7I6 ) 9989 FORMAT( ' FOR ALPHA ', $ 7( '(', F4.1, ',', F4.1, ') ', : ) ) 9988 FORMAT( ' FOR BETA ', $ 7( '(', F4.1, ',', F4.1, ') ', : ) ) 9987 FORMAT( ' AMEND DATA FILE OR INCREASE ARRAY SIZES IN PROGRAM', $ /' ******* TESTS ABANDONED *******' ) 9986 FORMAT( ' SUBPROGRAM NAME ', A6, ' NOT RECOGNIZED', /' ******* T', $ 'ESTS ABANDONED *******' ) 9985 FORMAT( ' ERROR IN CMVCH - IN-LINE DOT PRODUCTS ARE BEING EVALU', $ 'ATED WRONGLY.', /' CMVCH WAS CALLED WITH TRANS = ', A1, $ ' AND RETURNED SAME = ', L1, ' AND ERR = ', F12.3, '.', / $ ' THIS MAY BE DUE TO FAULTS IN THE ARITHMETIC OR THE COMPILER.' $ , /' ******* TESTS ABANDONED *******' ) 9984 FORMAT( A6, L2 ) 9983 FORMAT( 1X, A6, ' WAS NOT TESTED' ) 9982 FORMAT( /' END OF TESTS' ) 9981 FORMAT( /' ******* FATAL ERROR - TESTS ABANDONED *******' ) 9980 FORMAT( ' ERROR-EXITS WILL NOT BE TESTED' ) * * End of CBLAT2. * END SUBROUTINE CCHK1( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NKB, KB, NALF, ALF, NBET, $ BET, NINC, INC, NMAX, INCMAX, A, AA, AS, X, XX, $ XS, Y, YY, YS, YT, G ) * * Tests CGEMV and CGBMV. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. COMPLEX ZERO, HALF PARAMETER ( ZERO = ( 0.0, 0.0 ), HALF = ( 0.5, 0.0 ) ) REAL RZERO PARAMETER ( RZERO = 0.0 ) * .. Scalar Arguments .. REAL EPS, THRESH INTEGER INCMAX, NALF, NBET, NIDIM, NINC, NKB, NMAX, $ NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. COMPLEX A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), BET( NBET ), X( NMAX ), $ XS( NMAX*INCMAX ), XX( NMAX*INCMAX ), $ Y( NMAX ), YS( NMAX*INCMAX ), YT( NMAX ), $ YY( NMAX*INCMAX ) REAL G( NMAX ) INTEGER IDIM( NIDIM ), INC( NINC ), KB( NKB ) * .. Local Scalars .. COMPLEX ALPHA, ALS, BETA, BLS, TRANSL REAL ERR, ERRMAX INTEGER I, IA, IB, IC, IKU, IM, IN, INCX, INCXS, INCY, $ INCYS, IX, IY, KL, KLS, KU, KUS, LAA, LDA, $ LDAS, LX, LY, M, ML, MS, N, NARGS, NC, ND, NK, $ NL, NS LOGICAL BANDED, FULL, NULL, RESET, SAME, TRAN CHARACTER*1 TRANS, TRANSS CHARACTER*3 ICH * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LCE, LCERES EXTERNAL LCE, LCERES * .. External Subroutines .. EXTERNAL CGBMV, CGEMV, CMAKE, CMVCH * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICH/'NTC'/ * .. Executable Statements .. FULL = SNAME( 3: 3 ).EQ.'E' BANDED = SNAME( 3: 3 ).EQ.'B' * Define the number of arguments. IF( FULL )THEN NARGS = 11 ELSE IF( BANDED )THEN NARGS = 13 END IF * NC = 0 RESET = .TRUE. ERRMAX = RZERO * DO 120 IN = 1, NIDIM N = IDIM( IN ) ND = N/2 + 1 * DO 110 IM = 1, 2 IF( IM.EQ.1 ) $ M = MAX( N - ND, 0 ) IF( IM.EQ.2 ) $ M = MIN( N + ND, NMAX ) * IF( BANDED )THEN NK = NKB ELSE NK = 1 END IF DO 100 IKU = 1, NK IF( BANDED )THEN KU = KB( IKU ) KL = MAX( KU - 1, 0 ) ELSE KU = N - 1 KL = M - 1 END IF * Set LDA to 1 more than minimum value if room. IF( BANDED )THEN LDA = KL + KU + 1 ELSE LDA = M END IF IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 100 LAA = LDA*N NULL = N.LE.0.OR.M.LE.0 * * Generate the matrix A. * TRANSL = ZERO CALL CMAKE( SNAME( 2: 3 ), ' ', ' ', M, N, A, NMAX, AA, $ LDA, KL, KU, RESET, TRANSL ) * DO 90 IC = 1, 3 TRANS = ICH( IC: IC ) TRAN = TRANS.EQ.'T'.OR.TRANS.EQ.'C' * IF( TRAN )THEN ML = N NL = M ELSE ML = M NL = N END IF * DO 80 IX = 1, NINC INCX = INC( IX ) LX = ABS( INCX )*NL * * Generate the vector X. * TRANSL = HALF CALL CMAKE( 'GE', ' ', ' ', 1, NL, X, 1, XX, $ ABS( INCX ), 0, NL - 1, RESET, TRANSL ) IF( NL.GT.1 )THEN X( NL/2 ) = ZERO XX( 1 + ABS( INCX )*( NL/2 - 1 ) ) = ZERO END IF * DO 70 IY = 1, NINC INCY = INC( IY ) LY = ABS( INCY )*ML * DO 60 IA = 1, NALF ALPHA = ALF( IA ) * DO 50 IB = 1, NBET BETA = BET( IB ) * * Generate the vector Y. * TRANSL = ZERO CALL CMAKE( 'GE', ' ', ' ', 1, ML, Y, 1, $ YY, ABS( INCY ), 0, ML - 1, $ RESET, TRANSL ) * NC = NC + 1 * * Save every datum before calling the * subroutine. * TRANSS = TRANS MS = M NS = N KLS = KL KUS = KU ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LX XS( I ) = XX( I ) 20 CONTINUE INCXS = INCX BLS = BETA DO 30 I = 1, LY YS( I ) = YY( I ) 30 CONTINUE INCYS = INCY * * Call the subroutine. * IF( FULL )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, $ TRANS, M, N, ALPHA, LDA, INCX, BETA, $ INCY IF( REWI ) $ REWIND NTRA CALL CGEMV( TRANS, M, N, ALPHA, AA, $ LDA, XX, INCX, BETA, YY, $ INCY ) ELSE IF( BANDED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ TRANS, M, N, KL, KU, ALPHA, LDA, $ INCX, BETA, INCY IF( REWI ) $ REWIND NTRA CALL CGBMV( TRANS, M, N, KL, KU, ALPHA, $ AA, LDA, XX, INCX, BETA, $ YY, INCY ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9993 ) FATAL = .TRUE. GO TO 130 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = TRANS.EQ.TRANSS ISAME( 2 ) = MS.EQ.M ISAME( 3 ) = NS.EQ.N IF( FULL )THEN ISAME( 4 ) = ALS.EQ.ALPHA ISAME( 5 ) = LCE( AS, AA, LAA ) ISAME( 6 ) = LDAS.EQ.LDA ISAME( 7 ) = LCE( XS, XX, LX ) ISAME( 8 ) = INCXS.EQ.INCX ISAME( 9 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 10 ) = LCE( YS, YY, LY ) ELSE ISAME( 10 ) = LCERES( 'GE', ' ', 1, $ ML, YS, YY, $ ABS( INCY ) ) END IF ISAME( 11 ) = INCYS.EQ.INCY ELSE IF( BANDED )THEN ISAME( 4 ) = KLS.EQ.KL ISAME( 5 ) = KUS.EQ.KU ISAME( 6 ) = ALS.EQ.ALPHA ISAME( 7 ) = LCE( AS, AA, LAA ) ISAME( 8 ) = LDAS.EQ.LDA ISAME( 9 ) = LCE( XS, XX, LX ) ISAME( 10 ) = INCXS.EQ.INCX ISAME( 11 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 12 ) = LCE( YS, YY, LY ) ELSE ISAME( 12 ) = LCERES( 'GE', ' ', 1, $ ML, YS, YY, $ ABS( INCY ) ) END IF ISAME( 13 ) = INCYS.EQ.INCY END IF * * If data was incorrectly changed, report * and return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 130 END IF * IF( .NOT.NULL )THEN * * Check the result. * CALL CMVCH( TRANS, M, N, ALPHA, A, $ NMAX, X, INCX, BETA, Y, $ INCY, YT, G, YY, EPS, ERR, $ FATAL, NOUT, .TRUE. ) ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 130 ELSE * Avoid repeating tests with M.le.0 or * N.le.0. GO TO 110 END IF * 50 CONTINUE * 60 CONTINUE * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * 120 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 140 * 130 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME IF( FULL )THEN WRITE( NOUT, FMT = 9994 )NC, SNAME, TRANS, M, N, ALPHA, LDA, $ INCX, BETA, INCY ELSE IF( BANDED )THEN WRITE( NOUT, FMT = 9995 )NC, SNAME, TRANS, M, N, KL, KU, $ ALPHA, LDA, INCX, BETA, INCY END IF * 140 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', 4( I3, ',' ), '(', $ F4.1, ',', F4.1, '), A,', I3, ', X,', I2, ',(', F4.1, ',', $ F4.1, '), Y,', I2, ') .' ) 9994 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', 2( I3, ',' ), '(', $ F4.1, ',', F4.1, '), A,', I3, ', X,', I2, ',(', F4.1, ',', $ F4.1, '), Y,', I2, ') .' ) 9993 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of CCHK1. * END SUBROUTINE CCHK2( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NKB, KB, NALF, ALF, NBET, $ BET, NINC, INC, NMAX, INCMAX, A, AA, AS, X, XX, $ XS, Y, YY, YS, YT, G ) * * Tests CHEMV, CHBMV and CHPMV. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. COMPLEX ZERO, HALF PARAMETER ( ZERO = ( 0.0, 0.0 ), HALF = ( 0.5, 0.0 ) ) REAL RZERO PARAMETER ( RZERO = 0.0 ) * .. Scalar Arguments .. REAL EPS, THRESH INTEGER INCMAX, NALF, NBET, NIDIM, NINC, NKB, NMAX, $ NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. COMPLEX A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), BET( NBET ), X( NMAX ), $ XS( NMAX*INCMAX ), XX( NMAX*INCMAX ), $ Y( NMAX ), YS( NMAX*INCMAX ), YT( NMAX ), $ YY( NMAX*INCMAX ) REAL G( NMAX ) INTEGER IDIM( NIDIM ), INC( NINC ), KB( NKB ) * .. Local Scalars .. COMPLEX ALPHA, ALS, BETA, BLS, TRANSL REAL ERR, ERRMAX INTEGER I, IA, IB, IC, IK, IN, INCX, INCXS, INCY, $ INCYS, IX, IY, K, KS, LAA, LDA, LDAS, LX, LY, $ N, NARGS, NC, NK, NS LOGICAL BANDED, FULL, NULL, PACKED, RESET, SAME CHARACTER*1 UPLO, UPLOS CHARACTER*2 ICH * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LCE, LCERES EXTERNAL LCE, LCERES * .. External Subroutines .. EXTERNAL CHBMV, CHEMV, CHPMV, CMAKE, CMVCH * .. Intrinsic Functions .. INTRINSIC ABS, MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICH/'UL'/ * .. Executable Statements .. FULL = SNAME( 3: 3 ).EQ.'E' BANDED = SNAME( 3: 3 ).EQ.'B' PACKED = SNAME( 3: 3 ).EQ.'P' * Define the number of arguments. IF( FULL )THEN NARGS = 10 ELSE IF( BANDED )THEN NARGS = 11 ELSE IF( PACKED )THEN NARGS = 9 END IF * NC = 0 RESET = .TRUE. ERRMAX = RZERO * DO 110 IN = 1, NIDIM N = IDIM( IN ) * IF( BANDED )THEN NK = NKB ELSE NK = 1 END IF DO 100 IK = 1, NK IF( BANDED )THEN K = KB( IK ) ELSE K = N - 1 END IF * Set LDA to 1 more than minimum value if room. IF( BANDED )THEN LDA = K + 1 ELSE LDA = N END IF IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 100 IF( PACKED )THEN LAA = ( N*( N + 1 ) )/2 ELSE LAA = LDA*N END IF NULL = N.LE.0 * DO 90 IC = 1, 2 UPLO = ICH( IC: IC ) * * Generate the matrix A. * TRANSL = ZERO CALL CMAKE( SNAME( 2: 3 ), UPLO, ' ', N, N, A, NMAX, AA, $ LDA, K, K, RESET, TRANSL ) * DO 80 IX = 1, NINC INCX = INC( IX ) LX = ABS( INCX )*N * * Generate the vector X. * TRANSL = HALF CALL CMAKE( 'GE', ' ', ' ', 1, N, X, 1, XX, $ ABS( INCX ), 0, N - 1, RESET, TRANSL ) IF( N.GT.1 )THEN X( N/2 ) = ZERO XX( 1 + ABS( INCX )*( N/2 - 1 ) ) = ZERO END IF * DO 70 IY = 1, NINC INCY = INC( IY ) LY = ABS( INCY )*N * DO 60 IA = 1, NALF ALPHA = ALF( IA ) * DO 50 IB = 1, NBET BETA = BET( IB ) * * Generate the vector Y. * TRANSL = ZERO CALL CMAKE( 'GE', ' ', ' ', 1, N, Y, 1, YY, $ ABS( INCY ), 0, N - 1, RESET, $ TRANSL ) * NC = NC + 1 * * Save every datum before calling the * subroutine. * UPLOS = UPLO NS = N KS = K ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LX XS( I ) = XX( I ) 20 CONTINUE INCXS = INCX BLS = BETA DO 30 I = 1, LY YS( I ) = YY( I ) 30 CONTINUE INCYS = INCY * * Call the subroutine. * IF( FULL )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9993 )NC, SNAME, $ UPLO, N, ALPHA, LDA, INCX, BETA, INCY IF( REWI ) $ REWIND NTRA CALL CHEMV( UPLO, N, ALPHA, AA, LDA, XX, $ INCX, BETA, YY, INCY ) ELSE IF( BANDED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, $ UPLO, N, K, ALPHA, LDA, INCX, BETA, $ INCY IF( REWI ) $ REWIND NTRA CALL CHBMV( UPLO, N, K, ALPHA, AA, LDA, $ XX, INCX, BETA, YY, INCY ) ELSE IF( PACKED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ UPLO, N, ALPHA, INCX, BETA, INCY IF( REWI ) $ REWIND NTRA CALL CHPMV( UPLO, N, ALPHA, AA, XX, INCX, $ BETA, YY, INCY ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9992 ) FATAL = .TRUE. GO TO 120 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = UPLO.EQ.UPLOS ISAME( 2 ) = NS.EQ.N IF( FULL )THEN ISAME( 3 ) = ALS.EQ.ALPHA ISAME( 4 ) = LCE( AS, AA, LAA ) ISAME( 5 ) = LDAS.EQ.LDA ISAME( 6 ) = LCE( XS, XX, LX ) ISAME( 7 ) = INCXS.EQ.INCX ISAME( 8 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 9 ) = LCE( YS, YY, LY ) ELSE ISAME( 9 ) = LCERES( 'GE', ' ', 1, N, $ YS, YY, ABS( INCY ) ) END IF ISAME( 10 ) = INCYS.EQ.INCY ELSE IF( BANDED )THEN ISAME( 3 ) = KS.EQ.K ISAME( 4 ) = ALS.EQ.ALPHA ISAME( 5 ) = LCE( AS, AA, LAA ) ISAME( 6 ) = LDAS.EQ.LDA ISAME( 7 ) = LCE( XS, XX, LX ) ISAME( 8 ) = INCXS.EQ.INCX ISAME( 9 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 10 ) = LCE( YS, YY, LY ) ELSE ISAME( 10 ) = LCERES( 'GE', ' ', 1, N, $ YS, YY, ABS( INCY ) ) END IF ISAME( 11 ) = INCYS.EQ.INCY ELSE IF( PACKED )THEN ISAME( 3 ) = ALS.EQ.ALPHA ISAME( 4 ) = LCE( AS, AA, LAA ) ISAME( 5 ) = LCE( XS, XX, LX ) ISAME( 6 ) = INCXS.EQ.INCX ISAME( 7 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 8 ) = LCE( YS, YY, LY ) ELSE ISAME( 8 ) = LCERES( 'GE', ' ', 1, N, $ YS, YY, ABS( INCY ) ) END IF ISAME( 9 ) = INCYS.EQ.INCY END IF * * If data was incorrectly changed, report and * return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 120 END IF * IF( .NOT.NULL )THEN * * Check the result. * CALL CMVCH( 'N', N, N, ALPHA, A, NMAX, X, $ INCX, BETA, Y, INCY, YT, G, $ YY, EPS, ERR, FATAL, NOUT, $ .TRUE. ) ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 120 ELSE * Avoid repeating tests with N.le.0 GO TO 110 END IF * 50 CONTINUE * 60 CONTINUE * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 130 * 120 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME IF( FULL )THEN WRITE( NOUT, FMT = 9993 )NC, SNAME, UPLO, N, ALPHA, LDA, INCX, $ BETA, INCY ELSE IF( BANDED )THEN WRITE( NOUT, FMT = 9994 )NC, SNAME, UPLO, N, K, ALPHA, LDA, $ INCX, BETA, INCY ELSE IF( PACKED )THEN WRITE( NOUT, FMT = 9995 )NC, SNAME, UPLO, N, ALPHA, INCX, $ BETA, INCY END IF * 130 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', I3, ',(', F4.1, ',', $ F4.1, '), AP, X,', I2, ',(', F4.1, ',', F4.1, '), Y,', I2, $ ') .' ) 9994 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', 2( I3, ',' ), '(', $ F4.1, ',', F4.1, '), A,', I3, ', X,', I2, ',(', F4.1, ',', $ F4.1, '), Y,', I2, ') .' ) 9993 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', I3, ',(', F4.1, ',', $ F4.1, '), A,', I3, ', X,', I2, ',(', F4.1, ',', F4.1, '), ', $ 'Y,', I2, ') .' ) 9992 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of CCHK2. * END SUBROUTINE CCHK3( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NKB, KB, NINC, INC, NMAX, $ INCMAX, A, AA, AS, X, XX, XS, XT, G, Z ) * * Tests CTRMV, CTBMV, CTPMV, CTRSV, CTBSV and CTPSV. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. COMPLEX ZERO, HALF, ONE PARAMETER ( ZERO = ( 0.0, 0.0 ), HALF = ( 0.5, 0.0 ), $ ONE = ( 1.0, 0.0 ) ) REAL RZERO PARAMETER ( RZERO = 0.0 ) * .. Scalar Arguments .. REAL EPS, THRESH INTEGER INCMAX, NIDIM, NINC, NKB, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. COMPLEX A( NMAX, NMAX ), AA( NMAX*NMAX ), $ AS( NMAX*NMAX ), X( NMAX ), XS( NMAX*INCMAX ), $ XT( NMAX ), XX( NMAX*INCMAX ), Z( NMAX ) REAL G( NMAX ) INTEGER IDIM( NIDIM ), INC( NINC ), KB( NKB ) * .. Local Scalars .. COMPLEX TRANSL REAL ERR, ERRMAX INTEGER I, ICD, ICT, ICU, IK, IN, INCX, INCXS, IX, K, $ KS, LAA, LDA, LDAS, LX, N, NARGS, NC, NK, NS LOGICAL BANDED, FULL, NULL, PACKED, RESET, SAME CHARACTER*1 DIAG, DIAGS, TRANS, TRANSS, UPLO, UPLOS CHARACTER*2 ICHD, ICHU CHARACTER*3 ICHT * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LCE, LCERES EXTERNAL LCE, LCERES * .. External Subroutines .. EXTERNAL CMAKE, CMVCH, CTBMV, CTBSV, CTPMV, CTPSV, $ CTRMV, CTRSV * .. Intrinsic Functions .. INTRINSIC ABS, MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICHU/'UL'/, ICHT/'NTC'/, ICHD/'UN'/ * .. Executable Statements .. FULL = SNAME( 3: 3 ).EQ.'R' BANDED = SNAME( 3: 3 ).EQ.'B' PACKED = SNAME( 3: 3 ).EQ.'P' * Define the number of arguments. IF( FULL )THEN NARGS = 8 ELSE IF( BANDED )THEN NARGS = 9 ELSE IF( PACKED )THEN NARGS = 7 END IF * NC = 0 RESET = .TRUE. ERRMAX = RZERO * Set up zero vector for CMVCH. DO 10 I = 1, NMAX Z( I ) = ZERO 10 CONTINUE * DO 110 IN = 1, NIDIM N = IDIM( IN ) * IF( BANDED )THEN NK = NKB ELSE NK = 1 END IF DO 100 IK = 1, NK IF( BANDED )THEN K = KB( IK ) ELSE K = N - 1 END IF * Set LDA to 1 more than minimum value if room. IF( BANDED )THEN LDA = K + 1 ELSE LDA = N END IF IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 100 IF( PACKED )THEN LAA = ( N*( N + 1 ) )/2 ELSE LAA = LDA*N END IF NULL = N.LE.0 * DO 90 ICU = 1, 2 UPLO = ICHU( ICU: ICU ) * DO 80 ICT = 1, 3 TRANS = ICHT( ICT: ICT ) * DO 70 ICD = 1, 2 DIAG = ICHD( ICD: ICD ) * * Generate the matrix A. * TRANSL = ZERO CALL CMAKE( SNAME( 2: 3 ), UPLO, DIAG, N, N, A, $ NMAX, AA, LDA, K, K, RESET, TRANSL ) * DO 60 IX = 1, NINC INCX = INC( IX ) LX = ABS( INCX )*N * * Generate the vector X. * TRANSL = HALF CALL CMAKE( 'GE', ' ', ' ', 1, N, X, 1, XX, $ ABS( INCX ), 0, N - 1, RESET, $ TRANSL ) IF( N.GT.1 )THEN X( N/2 ) = ZERO XX( 1 + ABS( INCX )*( N/2 - 1 ) ) = ZERO END IF * NC = NC + 1 * * Save every datum before calling the subroutine. * UPLOS = UPLO TRANSS = TRANS DIAGS = DIAG NS = N KS = K DO 20 I = 1, LAA AS( I ) = AA( I ) 20 CONTINUE LDAS = LDA DO 30 I = 1, LX XS( I ) = XX( I ) 30 CONTINUE INCXS = INCX * * Call the subroutine. * IF( SNAME( 4: 5 ).EQ.'MV' )THEN IF( FULL )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9993 )NC, SNAME, $ UPLO, TRANS, DIAG, N, LDA, INCX IF( REWI ) $ REWIND NTRA CALL CTRMV( UPLO, TRANS, DIAG, N, AA, LDA, $ XX, INCX ) ELSE IF( BANDED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, $ UPLO, TRANS, DIAG, N, K, LDA, INCX IF( REWI ) $ REWIND NTRA CALL CTBMV( UPLO, TRANS, DIAG, N, K, AA, $ LDA, XX, INCX ) ELSE IF( PACKED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ UPLO, TRANS, DIAG, N, INCX IF( REWI ) $ REWIND NTRA CALL CTPMV( UPLO, TRANS, DIAG, N, AA, XX, $ INCX ) END IF ELSE IF( SNAME( 4: 5 ).EQ.'SV' )THEN IF( FULL )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9993 )NC, SNAME, $ UPLO, TRANS, DIAG, N, LDA, INCX IF( REWI ) $ REWIND NTRA CALL CTRSV( UPLO, TRANS, DIAG, N, AA, LDA, $ XX, INCX ) ELSE IF( BANDED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, $ UPLO, TRANS, DIAG, N, K, LDA, INCX IF( REWI ) $ REWIND NTRA CALL CTBSV( UPLO, TRANS, DIAG, N, K, AA, $ LDA, XX, INCX ) ELSE IF( PACKED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ UPLO, TRANS, DIAG, N, INCX IF( REWI ) $ REWIND NTRA CALL CTPSV( UPLO, TRANS, DIAG, N, AA, XX, $ INCX ) END IF END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9992 ) FATAL = .TRUE. GO TO 120 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = UPLO.EQ.UPLOS ISAME( 2 ) = TRANS.EQ.TRANSS ISAME( 3 ) = DIAG.EQ.DIAGS ISAME( 4 ) = NS.EQ.N IF( FULL )THEN ISAME( 5 ) = LCE( AS, AA, LAA ) ISAME( 6 ) = LDAS.EQ.LDA IF( NULL )THEN ISAME( 7 ) = LCE( XS, XX, LX ) ELSE ISAME( 7 ) = LCERES( 'GE', ' ', 1, N, XS, $ XX, ABS( INCX ) ) END IF ISAME( 8 ) = INCXS.EQ.INCX ELSE IF( BANDED )THEN ISAME( 5 ) = KS.EQ.K ISAME( 6 ) = LCE( AS, AA, LAA ) ISAME( 7 ) = LDAS.EQ.LDA IF( NULL )THEN ISAME( 8 ) = LCE( XS, XX, LX ) ELSE ISAME( 8 ) = LCERES( 'GE', ' ', 1, N, XS, $ XX, ABS( INCX ) ) END IF ISAME( 9 ) = INCXS.EQ.INCX ELSE IF( PACKED )THEN ISAME( 5 ) = LCE( AS, AA, LAA ) IF( NULL )THEN ISAME( 6 ) = LCE( XS, XX, LX ) ELSE ISAME( 6 ) = LCERES( 'GE', ' ', 1, N, XS, $ XX, ABS( INCX ) ) END IF ISAME( 7 ) = INCXS.EQ.INCX END IF * * If data was incorrectly changed, report and * return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 120 END IF * IF( .NOT.NULL )THEN IF( SNAME( 4: 5 ).EQ.'MV' )THEN * * Check the result. * CALL CMVCH( TRANS, N, N, ONE, A, NMAX, X, $ INCX, ZERO, Z, INCX, XT, G, $ XX, EPS, ERR, FATAL, NOUT, $ .TRUE. ) ELSE IF( SNAME( 4: 5 ).EQ.'SV' )THEN * * Compute approximation to original vector. * DO 50 I = 1, N Z( I ) = XX( 1 + ( I - 1 )* $ ABS( INCX ) ) XX( 1 + ( I - 1 )*ABS( INCX ) ) $ = X( I ) 50 CONTINUE CALL CMVCH( TRANS, N, N, ONE, A, NMAX, Z, $ INCX, ZERO, X, INCX, XT, G, $ XX, EPS, ERR, FATAL, NOUT, $ .FALSE. ) END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and return. IF( FATAL ) $ GO TO 120 ELSE * Avoid repeating tests with N.le.0. GO TO 110 END IF * 60 CONTINUE * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 130 * 120 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME IF( FULL )THEN WRITE( NOUT, FMT = 9993 )NC, SNAME, UPLO, TRANS, DIAG, N, LDA, $ INCX ELSE IF( BANDED )THEN WRITE( NOUT, FMT = 9994 )NC, SNAME, UPLO, TRANS, DIAG, N, K, $ LDA, INCX ELSE IF( PACKED )THEN WRITE( NOUT, FMT = 9995 )NC, SNAME, UPLO, TRANS, DIAG, N, INCX END IF * 130 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ', A6, '(', 3( '''', A1, ''',' ), I3, ', AP, ', $ 'X,', I2, ') .' ) 9994 FORMAT( 1X, I6, ': ', A6, '(', 3( '''', A1, ''',' ), 2( I3, ',' ), $ ' A,', I3, ', X,', I2, ') .' ) 9993 FORMAT( 1X, I6, ': ', A6, '(', 3( '''', A1, ''',' ), I3, ', A,', $ I3, ', X,', I2, ') .' ) 9992 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of CCHK3. * END SUBROUTINE CCHK4( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NINC, INC, NMAX, $ INCMAX, A, AA, AS, X, XX, XS, Y, YY, YS, YT, G, $ Z ) * * Tests CGERC and CGERU. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. COMPLEX ZERO, HALF, ONE PARAMETER ( ZERO = ( 0.0, 0.0 ), HALF = ( 0.5, 0.0 ), $ ONE = ( 1.0, 0.0 ) ) REAL RZERO PARAMETER ( RZERO = 0.0 ) * .. Scalar Arguments .. REAL EPS, THRESH INTEGER INCMAX, NALF, NIDIM, NINC, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. COMPLEX A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), X( NMAX ), XS( NMAX*INCMAX ), $ XX( NMAX*INCMAX ), Y( NMAX ), $ YS( NMAX*INCMAX ), YT( NMAX ), $ YY( NMAX*INCMAX ), Z( NMAX ) REAL G( NMAX ) INTEGER IDIM( NIDIM ), INC( NINC ) * .. Local Scalars .. COMPLEX ALPHA, ALS, TRANSL REAL ERR, ERRMAX INTEGER I, IA, IM, IN, INCX, INCXS, INCY, INCYS, IX, $ IY, J, LAA, LDA, LDAS, LX, LY, M, MS, N, NARGS, $ NC, ND, NS LOGICAL CONJ, NULL, RESET, SAME * .. Local Arrays .. COMPLEX W( 1 ) LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LCE, LCERES EXTERNAL LCE, LCERES * .. External Subroutines .. EXTERNAL CGERC, CGERU, CMAKE, CMVCH * .. Intrinsic Functions .. INTRINSIC ABS, CONJG, MAX, MIN * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Executable Statements .. CONJ = SNAME( 5: 5 ).EQ.'C' * Define the number of arguments. NARGS = 9 * NC = 0 RESET = .TRUE. ERRMAX = RZERO * DO 120 IN = 1, NIDIM N = IDIM( IN ) ND = N/2 + 1 * DO 110 IM = 1, 2 IF( IM.EQ.1 ) $ M = MAX( N - ND, 0 ) IF( IM.EQ.2 ) $ M = MIN( N + ND, NMAX ) * * Set LDA to 1 more than minimum value if room. LDA = M IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 110 LAA = LDA*N NULL = N.LE.0.OR.M.LE.0 * DO 100 IX = 1, NINC INCX = INC( IX ) LX = ABS( INCX )*M * * Generate the vector X. * TRANSL = HALF CALL CMAKE( 'GE', ' ', ' ', 1, M, X, 1, XX, ABS( INCX ), $ 0, M - 1, RESET, TRANSL ) IF( M.GT.1 )THEN X( M/2 ) = ZERO XX( 1 + ABS( INCX )*( M/2 - 1 ) ) = ZERO END IF * DO 90 IY = 1, NINC INCY = INC( IY ) LY = ABS( INCY )*N * * Generate the vector Y. * TRANSL = ZERO CALL CMAKE( 'GE', ' ', ' ', 1, N, Y, 1, YY, $ ABS( INCY ), 0, N - 1, RESET, TRANSL ) IF( N.GT.1 )THEN Y( N/2 ) = ZERO YY( 1 + ABS( INCY )*( N/2 - 1 ) ) = ZERO END IF * DO 80 IA = 1, NALF ALPHA = ALF( IA ) * * Generate the matrix A. * TRANSL = ZERO CALL CMAKE( SNAME( 2: 3 ), ' ', ' ', M, N, A, NMAX, $ AA, LDA, M - 1, N - 1, RESET, TRANSL ) * NC = NC + 1 * * Save every datum before calling the subroutine. * MS = M NS = N ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LX XS( I ) = XX( I ) 20 CONTINUE INCXS = INCX DO 30 I = 1, LY YS( I ) = YY( I ) 30 CONTINUE INCYS = INCY * * Call the subroutine. * IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, M, N, $ ALPHA, INCX, INCY, LDA IF( CONJ )THEN IF( REWI ) $ REWIND NTRA CALL CGERC( M, N, ALPHA, XX, INCX, YY, INCY, AA, $ LDA ) ELSE IF( REWI ) $ REWIND NTRA CALL CGERU( M, N, ALPHA, XX, INCX, YY, INCY, AA, $ LDA ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9993 ) FATAL = .TRUE. GO TO 140 END IF * * See what data changed inside subroutine. * ISAME( 1 ) = MS.EQ.M ISAME( 2 ) = NS.EQ.N ISAME( 3 ) = ALS.EQ.ALPHA ISAME( 4 ) = LCE( XS, XX, LX ) ISAME( 5 ) = INCXS.EQ.INCX ISAME( 6 ) = LCE( YS, YY, LY ) ISAME( 7 ) = INCYS.EQ.INCY IF( NULL )THEN ISAME( 8 ) = LCE( AS, AA, LAA ) ELSE ISAME( 8 ) = LCERES( 'GE', ' ', M, N, AS, AA, $ LDA ) END IF ISAME( 9 ) = LDAS.EQ.LDA * * If data was incorrectly changed, report and return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 140 END IF * IF( .NOT.NULL )THEN * * Check the result column by column. * IF( INCX.GT.0 )THEN DO 50 I = 1, M Z( I ) = X( I ) 50 CONTINUE ELSE DO 60 I = 1, M Z( I ) = X( M - I + 1 ) 60 CONTINUE END IF DO 70 J = 1, N IF( INCY.GT.0 )THEN W( 1 ) = Y( J ) ELSE W( 1 ) = Y( N - J + 1 ) END IF IF( CONJ ) $ W( 1 ) = CONJG( W( 1 ) ) CALL CMVCH( 'N', M, 1, ALPHA, Z, NMAX, W, 1, $ ONE, A( 1, J ), 1, YT, G, $ AA( 1 + ( J - 1 )*LDA ), EPS, $ ERR, FATAL, NOUT, .TRUE. ) ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and return. IF( FATAL ) $ GO TO 130 70 CONTINUE ELSE * Avoid repeating tests with M.le.0 or N.le.0. GO TO 110 END IF * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * 120 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 150 * 130 CONTINUE WRITE( NOUT, FMT = 9995 )J * 140 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME WRITE( NOUT, FMT = 9994 )NC, SNAME, M, N, ALPHA, INCX, INCY, LDA * 150 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) 9994 FORMAT( 1X, I6, ': ', A6, '(', 2( I3, ',' ), '(', F4.1, ',', F4.1, $ '), X,', I2, ', Y,', I2, ', A,', I3, ') ', $ ' .' ) 9993 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of CCHK4. * END SUBROUTINE CCHK5( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NINC, INC, NMAX, $ INCMAX, A, AA, AS, X, XX, XS, Y, YY, YS, YT, G, $ Z ) * * Tests CHER and CHPR. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. COMPLEX ZERO, HALF, ONE PARAMETER ( ZERO = ( 0.0, 0.0 ), HALF = ( 0.5, 0.0 ), $ ONE = ( 1.0, 0.0 ) ) REAL RZERO PARAMETER ( RZERO = 0.0 ) * .. Scalar Arguments .. REAL EPS, THRESH INTEGER INCMAX, NALF, NIDIM, NINC, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. COMPLEX A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), X( NMAX ), XS( NMAX*INCMAX ), $ XX( NMAX*INCMAX ), Y( NMAX ), $ YS( NMAX*INCMAX ), YT( NMAX ), $ YY( NMAX*INCMAX ), Z( NMAX ) REAL G( NMAX ) INTEGER IDIM( NIDIM ), INC( NINC ) * .. Local Scalars .. COMPLEX ALPHA, TRANSL REAL ERR, ERRMAX, RALPHA, RALS INTEGER I, IA, IC, IN, INCX, INCXS, IX, J, JA, JJ, LAA, $ LDA, LDAS, LJ, LX, N, NARGS, NC, NS LOGICAL FULL, NULL, PACKED, RESET, SAME, UPPER CHARACTER*1 UPLO, UPLOS CHARACTER*2 ICH * .. Local Arrays .. COMPLEX W( 1 ) LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LCE, LCERES EXTERNAL LCE, LCERES * .. External Subroutines .. EXTERNAL CHER, CHPR, CMAKE, CMVCH * .. Intrinsic Functions .. INTRINSIC ABS, CMPLX, CONJG, MAX, REAL * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICH/'UL'/ * .. Executable Statements .. FULL = SNAME( 3: 3 ).EQ.'E' PACKED = SNAME( 3: 3 ).EQ.'P' * Define the number of arguments. IF( FULL )THEN NARGS = 7 ELSE IF( PACKED )THEN NARGS = 6 END IF * NC = 0 RESET = .TRUE. ERRMAX = RZERO * DO 100 IN = 1, NIDIM N = IDIM( IN ) * Set LDA to 1 more than minimum value if room. LDA = N IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 100 IF( PACKED )THEN LAA = ( N*( N + 1 ) )/2 ELSE LAA = LDA*N END IF * DO 90 IC = 1, 2 UPLO = ICH( IC: IC ) UPPER = UPLO.EQ.'U' * DO 80 IX = 1, NINC INCX = INC( IX ) LX = ABS( INCX )*N * * Generate the vector X. * TRANSL = HALF CALL CMAKE( 'GE', ' ', ' ', 1, N, X, 1, XX, ABS( INCX ), $ 0, N - 1, RESET, TRANSL ) IF( N.GT.1 )THEN X( N/2 ) = ZERO XX( 1 + ABS( INCX )*( N/2 - 1 ) ) = ZERO END IF * DO 70 IA = 1, NALF RALPHA = REAL( ALF( IA ) ) ALPHA = CMPLX( RALPHA, RZERO ) NULL = N.LE.0.OR.RALPHA.EQ.RZERO * * Generate the matrix A. * TRANSL = ZERO CALL CMAKE( SNAME( 2: 3 ), UPLO, ' ', N, N, A, NMAX, $ AA, LDA, N - 1, N - 1, RESET, TRANSL ) * NC = NC + 1 * * Save every datum before calling the subroutine. * UPLOS = UPLO NS = N RALS = RALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LX XS( I ) = XX( I ) 20 CONTINUE INCXS = INCX * * Call the subroutine. * IF( FULL )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9993 )NC, SNAME, UPLO, N, $ RALPHA, INCX, LDA IF( REWI ) $ REWIND NTRA CALL CHER( UPLO, N, RALPHA, XX, INCX, AA, LDA ) ELSE IF( PACKED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, UPLO, N, $ RALPHA, INCX IF( REWI ) $ REWIND NTRA CALL CHPR( UPLO, N, RALPHA, XX, INCX, AA ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9992 ) FATAL = .TRUE. GO TO 120 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = UPLO.EQ.UPLOS ISAME( 2 ) = NS.EQ.N ISAME( 3 ) = RALS.EQ.RALPHA ISAME( 4 ) = LCE( XS, XX, LX ) ISAME( 5 ) = INCXS.EQ.INCX IF( NULL )THEN ISAME( 6 ) = LCE( AS, AA, LAA ) ELSE ISAME( 6 ) = LCERES( SNAME( 2: 3 ), UPLO, N, N, AS, $ AA, LDA ) END IF IF( .NOT.PACKED )THEN ISAME( 7 ) = LDAS.EQ.LDA END IF * * If data was incorrectly changed, report and return. * SAME = .TRUE. DO 30 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 30 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 120 END IF * IF( .NOT.NULL )THEN * * Check the result column by column. * IF( INCX.GT.0 )THEN DO 40 I = 1, N Z( I ) = X( I ) 40 CONTINUE ELSE DO 50 I = 1, N Z( I ) = X( N - I + 1 ) 50 CONTINUE END IF JA = 1 DO 60 J = 1, N W( 1 ) = CONJG( Z( J ) ) IF( UPPER )THEN JJ = 1 LJ = J ELSE JJ = J LJ = N - J + 1 END IF CALL CMVCH( 'N', LJ, 1, ALPHA, Z( JJ ), LJ, W, $ 1, ONE, A( JJ, J ), 1, YT, G, $ AA( JA ), EPS, ERR, FATAL, NOUT, $ .TRUE. ) IF( FULL )THEN IF( UPPER )THEN JA = JA + LDA ELSE JA = JA + LDA + 1 END IF ELSE JA = JA + LJ END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and return. IF( FATAL ) $ GO TO 110 60 CONTINUE ELSE * Avoid repeating tests if N.le.0. IF( N.LE.0 ) $ GO TO 100 END IF * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 130 * 110 CONTINUE WRITE( NOUT, FMT = 9995 )J * 120 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME IF( FULL )THEN WRITE( NOUT, FMT = 9993 )NC, SNAME, UPLO, N, RALPHA, INCX, LDA ELSE IF( PACKED )THEN WRITE( NOUT, FMT = 9994 )NC, SNAME, UPLO, N, RALPHA, INCX END IF * 130 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) 9994 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', I3, ',', F4.1, ', X,', $ I2, ', AP) .' ) 9993 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', I3, ',', F4.1, ', X,', $ I2, ', A,', I3, ') .' ) 9992 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of CCHK5. * END SUBROUTINE CCHK6( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NINC, INC, NMAX, $ INCMAX, A, AA, AS, X, XX, XS, Y, YY, YS, YT, G, $ Z ) * * Tests CHER2 and CHPR2. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. COMPLEX ZERO, HALF, ONE PARAMETER ( ZERO = ( 0.0, 0.0 ), HALF = ( 0.5, 0.0 ), $ ONE = ( 1.0, 0.0 ) ) REAL RZERO PARAMETER ( RZERO = 0.0 ) * .. Scalar Arguments .. REAL EPS, THRESH INTEGER INCMAX, NALF, NIDIM, NINC, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. COMPLEX A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), X( NMAX ), XS( NMAX*INCMAX ), $ XX( NMAX*INCMAX ), Y( NMAX ), $ YS( NMAX*INCMAX ), YT( NMAX ), $ YY( NMAX*INCMAX ), Z( NMAX, 2 ) REAL G( NMAX ) INTEGER IDIM( NIDIM ), INC( NINC ) * .. Local Scalars .. COMPLEX ALPHA, ALS, TRANSL REAL ERR, ERRMAX INTEGER I, IA, IC, IN, INCX, INCXS, INCY, INCYS, IX, $ IY, J, JA, JJ, LAA, LDA, LDAS, LJ, LX, LY, N, $ NARGS, NC, NS LOGICAL FULL, NULL, PACKED, RESET, SAME, UPPER CHARACTER*1 UPLO, UPLOS CHARACTER*2 ICH * .. Local Arrays .. COMPLEX W( 2 ) LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LCE, LCERES EXTERNAL LCE, LCERES * .. External Subroutines .. EXTERNAL CHER2, CHPR2, CMAKE, CMVCH * .. Intrinsic Functions .. INTRINSIC ABS, CONJG, MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICH/'UL'/ * .. Executable Statements .. FULL = SNAME( 3: 3 ).EQ.'E' PACKED = SNAME( 3: 3 ).EQ.'P' * Define the number of arguments. IF( FULL )THEN NARGS = 9 ELSE IF( PACKED )THEN NARGS = 8 END IF * NC = 0 RESET = .TRUE. ERRMAX = RZERO * DO 140 IN = 1, NIDIM N = IDIM( IN ) * Set LDA to 1 more than minimum value if room. LDA = N IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 140 IF( PACKED )THEN LAA = ( N*( N + 1 ) )/2 ELSE LAA = LDA*N END IF * DO 130 IC = 1, 2 UPLO = ICH( IC: IC ) UPPER = UPLO.EQ.'U' * DO 120 IX = 1, NINC INCX = INC( IX ) LX = ABS( INCX )*N * * Generate the vector X. * TRANSL = HALF CALL CMAKE( 'GE', ' ', ' ', 1, N, X, 1, XX, ABS( INCX ), $ 0, N - 1, RESET, TRANSL ) IF( N.GT.1 )THEN X( N/2 ) = ZERO XX( 1 + ABS( INCX )*( N/2 - 1 ) ) = ZERO END IF * DO 110 IY = 1, NINC INCY = INC( IY ) LY = ABS( INCY )*N * * Generate the vector Y. * TRANSL = ZERO CALL CMAKE( 'GE', ' ', ' ', 1, N, Y, 1, YY, $ ABS( INCY ), 0, N - 1, RESET, TRANSL ) IF( N.GT.1 )THEN Y( N/2 ) = ZERO YY( 1 + ABS( INCY )*( N/2 - 1 ) ) = ZERO END IF * DO 100 IA = 1, NALF ALPHA = ALF( IA ) NULL = N.LE.0.OR.ALPHA.EQ.ZERO * * Generate the matrix A. * TRANSL = ZERO CALL CMAKE( SNAME( 2: 3 ), UPLO, ' ', N, N, A, $ NMAX, AA, LDA, N - 1, N - 1, RESET, $ TRANSL ) * NC = NC + 1 * * Save every datum before calling the subroutine. * UPLOS = UPLO NS = N ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LX XS( I ) = XX( I ) 20 CONTINUE INCXS = INCX DO 30 I = 1, LY YS( I ) = YY( I ) 30 CONTINUE INCYS = INCY * * Call the subroutine. * IF( FULL )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9993 )NC, SNAME, UPLO, N, $ ALPHA, INCX, INCY, LDA IF( REWI ) $ REWIND NTRA CALL CHER2( UPLO, N, ALPHA, XX, INCX, YY, INCY, $ AA, LDA ) ELSE IF( PACKED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, UPLO, N, $ ALPHA, INCX, INCY IF( REWI ) $ REWIND NTRA CALL CHPR2( UPLO, N, ALPHA, XX, INCX, YY, INCY, $ AA ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9992 ) FATAL = .TRUE. GO TO 160 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = UPLO.EQ.UPLOS ISAME( 2 ) = NS.EQ.N ISAME( 3 ) = ALS.EQ.ALPHA ISAME( 4 ) = LCE( XS, XX, LX ) ISAME( 5 ) = INCXS.EQ.INCX ISAME( 6 ) = LCE( YS, YY, LY ) ISAME( 7 ) = INCYS.EQ.INCY IF( NULL )THEN ISAME( 8 ) = LCE( AS, AA, LAA ) ELSE ISAME( 8 ) = LCERES( SNAME( 2: 3 ), UPLO, N, N, $ AS, AA, LDA ) END IF IF( .NOT.PACKED )THEN ISAME( 9 ) = LDAS.EQ.LDA END IF * * If data was incorrectly changed, report and return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 160 END IF * IF( .NOT.NULL )THEN * * Check the result column by column. * IF( INCX.GT.0 )THEN DO 50 I = 1, N Z( I, 1 ) = X( I ) 50 CONTINUE ELSE DO 60 I = 1, N Z( I, 1 ) = X( N - I + 1 ) 60 CONTINUE END IF IF( INCY.GT.0 )THEN DO 70 I = 1, N Z( I, 2 ) = Y( I ) 70 CONTINUE ELSE DO 80 I = 1, N Z( I, 2 ) = Y( N - I + 1 ) 80 CONTINUE END IF JA = 1 DO 90 J = 1, N W( 1 ) = ALPHA*CONJG( Z( J, 2 ) ) W( 2 ) = CONJG( ALPHA )*CONJG( Z( J, 1 ) ) IF( UPPER )THEN JJ = 1 LJ = J ELSE JJ = J LJ = N - J + 1 END IF CALL CMVCH( 'N', LJ, 2, ONE, Z( JJ, 1 ), $ NMAX, W, 1, ONE, A( JJ, J ), 1, $ YT, G, AA( JA ), EPS, ERR, FATAL, $ NOUT, .TRUE. ) IF( FULL )THEN IF( UPPER )THEN JA = JA + LDA ELSE JA = JA + LDA + 1 END IF ELSE JA = JA + LJ END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and return. IF( FATAL ) $ GO TO 150 90 CONTINUE ELSE * Avoid repeating tests with N.le.0. IF( N.LE.0 ) $ GO TO 140 END IF * 100 CONTINUE * 110 CONTINUE * 120 CONTINUE * 130 CONTINUE * 140 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 170 * 150 CONTINUE WRITE( NOUT, FMT = 9995 )J * 160 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME IF( FULL )THEN WRITE( NOUT, FMT = 9993 )NC, SNAME, UPLO, N, ALPHA, INCX, $ INCY, LDA ELSE IF( PACKED )THEN WRITE( NOUT, FMT = 9994 )NC, SNAME, UPLO, N, ALPHA, INCX, INCY END IF * 170 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) 9994 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', I3, ',(', F4.1, ',', $ F4.1, '), X,', I2, ', Y,', I2, ', AP) ', $ ' .' ) 9993 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', I3, ',(', F4.1, ',', $ F4.1, '), X,', I2, ', Y,', I2, ', A,', I3, ') ', $ ' .' ) 9992 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of CCHK6. * END SUBROUTINE CCHKE( ISNUM, SRNAMT, NOUT ) * * Tests the error exits from the Level 2 Blas. * Requires a special version of the error-handling routine XERBLA. * ALPHA, RALPHA, BETA, A, X and Y should not need to be defined. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Scalar Arguments .. INTEGER ISNUM, NOUT CHARACTER*6 SRNAMT * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Local Scalars .. COMPLEX ALPHA, BETA REAL RALPHA * .. Local Arrays .. COMPLEX A( 1, 1 ), X( 1 ), Y( 1 ) * .. External Subroutines .. EXTERNAL CGBMV, CGEMV, CGERC, CGERU, CHBMV, CHEMV, CHER, $ CHER2, CHKXER, CHPMV, CHPR, CHPR2, CTBMV, $ CTBSV, CTPMV, CTPSV, CTRMV, CTRSV * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Executable Statements .. * OK is set to .FALSE. by the special version of XERBLA or by CHKXER * if anything is wrong. OK = .TRUE. * LERR is set to .TRUE. by the special version of XERBLA each time * it is called, and is then tested and re-set by CHKXER. LERR = .FALSE. GO TO ( 10, 20, 30, 40, 50, 60, 70, 80, $ 90, 100, 110, 120, 130, 140, 150, 160, $ 170 )ISNUM 10 INFOT = 1 CALL CGEMV( '/', 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CGEMV( 'N', -1, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CGEMV( 'N', 0, -1, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CGEMV( 'N', 2, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL CGEMV( 'N', 0, 0, ALPHA, A, 1, X, 0, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL CGEMV( 'N', 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 20 INFOT = 1 CALL CGBMV( '/', 0, 0, 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CGBMV( 'N', -1, 0, 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CGBMV( 'N', 0, -1, 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CGBMV( 'N', 0, 0, -1, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CGBMV( 'N', 2, 0, 0, -1, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL CGBMV( 'N', 0, 0, 1, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL CGBMV( 'N', 0, 0, 0, 0, ALPHA, A, 1, X, 0, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL CGBMV( 'N', 0, 0, 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 30 INFOT = 1 CALL CHEMV( '/', 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CHEMV( 'U', -1, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CHEMV( 'U', 2, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CHEMV( 'U', 0, ALPHA, A, 1, X, 0, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL CHEMV( 'U', 0, ALPHA, A, 1, X, 1, BETA, Y, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 40 INFOT = 1 CALL CHBMV( '/', 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CHBMV( 'U', -1, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CHBMV( 'U', 0, -1, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CHBMV( 'U', 0, 1, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL CHBMV( 'U', 0, 0, ALPHA, A, 1, X, 0, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL CHBMV( 'U', 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 50 INFOT = 1 CALL CHPMV( '/', 0, ALPHA, A, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CHPMV( 'U', -1, ALPHA, A, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CHPMV( 'U', 0, ALPHA, A, X, 0, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CHPMV( 'U', 0, ALPHA, A, X, 1, BETA, Y, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 60 INFOT = 1 CALL CTRMV( '/', 'N', 'N', 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CTRMV( 'U', '/', 'N', 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CTRMV( 'U', 'N', '/', 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CTRMV( 'U', 'N', 'N', -1, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CTRMV( 'U', 'N', 'N', 2, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL CTRMV( 'U', 'N', 'N', 0, A, 1, X, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 70 INFOT = 1 CALL CTBMV( '/', 'N', 'N', 0, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CTBMV( 'U', '/', 'N', 0, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CTBMV( 'U', 'N', '/', 0, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CTBMV( 'U', 'N', 'N', -1, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CTBMV( 'U', 'N', 'N', 0, -1, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CTBMV( 'U', 'N', 'N', 0, 1, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CTBMV( 'U', 'N', 'N', 0, 0, A, 1, X, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 80 INFOT = 1 CALL CTPMV( '/', 'N', 'N', 0, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CTPMV( 'U', '/', 'N', 0, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CTPMV( 'U', 'N', '/', 0, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CTPMV( 'U', 'N', 'N', -1, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CTPMV( 'U', 'N', 'N', 0, A, X, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 90 INFOT = 1 CALL CTRSV( '/', 'N', 'N', 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CTRSV( 'U', '/', 'N', 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CTRSV( 'U', 'N', '/', 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CTRSV( 'U', 'N', 'N', -1, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CTRSV( 'U', 'N', 'N', 2, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL CTRSV( 'U', 'N', 'N', 0, A, 1, X, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 100 INFOT = 1 CALL CTBSV( '/', 'N', 'N', 0, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CTBSV( 'U', '/', 'N', 0, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CTBSV( 'U', 'N', '/', 0, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CTBSV( 'U', 'N', 'N', -1, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CTBSV( 'U', 'N', 'N', 0, -1, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CTBSV( 'U', 'N', 'N', 0, 1, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CTBSV( 'U', 'N', 'N', 0, 0, A, 1, X, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 110 INFOT = 1 CALL CTPSV( '/', 'N', 'N', 0, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CTPSV( 'U', '/', 'N', 0, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CTPSV( 'U', 'N', '/', 0, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CTPSV( 'U', 'N', 'N', -1, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CTPSV( 'U', 'N', 'N', 0, A, X, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 120 INFOT = 1 CALL CGERC( -1, 0, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CGERC( 0, -1, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CGERC( 0, 0, ALPHA, X, 0, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CGERC( 0, 0, ALPHA, X, 1, Y, 0, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CGERC( 2, 0, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 130 INFOT = 1 CALL CGERU( -1, 0, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CGERU( 0, -1, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CGERU( 0, 0, ALPHA, X, 0, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CGERU( 0, 0, ALPHA, X, 1, Y, 0, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CGERU( 2, 0, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 140 INFOT = 1 CALL CHER( '/', 0, RALPHA, X, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CHER( 'U', -1, RALPHA, X, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CHER( 'U', 0, RALPHA, X, 0, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CHER( 'U', 2, RALPHA, X, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 150 INFOT = 1 CALL CHPR( '/', 0, RALPHA, X, 1, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CHPR( 'U', -1, RALPHA, X, 1, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CHPR( 'U', 0, RALPHA, X, 0, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 160 INFOT = 1 CALL CHER2( '/', 0, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CHER2( 'U', -1, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CHER2( 'U', 0, ALPHA, X, 0, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CHER2( 'U', 0, ALPHA, X, 1, Y, 0, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CHER2( 'U', 2, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 170 INFOT = 1 CALL CHPR2( '/', 0, ALPHA, X, 1, Y, 1, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CHPR2( 'U', -1, ALPHA, X, 1, Y, 1, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CHPR2( 'U', 0, ALPHA, X, 0, Y, 1, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CHPR2( 'U', 0, ALPHA, X, 1, Y, 0, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) * 180 IF( OK )THEN WRITE( NOUT, FMT = 9999 )SRNAMT ELSE WRITE( NOUT, FMT = 9998 )SRNAMT END IF RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE TESTS OF ERROR-EXITS' ) 9998 FORMAT( ' ******* ', A6, ' FAILED THE TESTS OF ERROR-EXITS *****', $ '**' ) * * End of CCHKE. * END SUBROUTINE CMAKE( TYPE, UPLO, DIAG, M, N, A, NMAX, AA, LDA, KL, $ KU, RESET, TRANSL ) * * Generates values for an M by N matrix A within the bandwidth * defined by KL and KU. * Stores the values in the array AA in the data structure required * by the routine, with unwanted elements set to rogue value. * * TYPE is 'GE', 'GB', 'HE', 'HB', 'HP', 'TR', 'TB' OR 'TP'. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. COMPLEX ZERO, ONE PARAMETER ( ZERO = ( 0.0, 0.0 ), ONE = ( 1.0, 0.0 ) ) COMPLEX ROGUE PARAMETER ( ROGUE = ( -1.0E10, 1.0E10 ) ) REAL RZERO PARAMETER ( RZERO = 0.0 ) REAL RROGUE PARAMETER ( RROGUE = -1.0E10 ) * .. Scalar Arguments .. COMPLEX TRANSL INTEGER KL, KU, LDA, M, N, NMAX LOGICAL RESET CHARACTER*1 DIAG, UPLO CHARACTER*2 TYPE * .. Array Arguments .. COMPLEX A( NMAX, * ), AA( * ) * .. Local Scalars .. INTEGER I, I1, I2, I3, IBEG, IEND, IOFF, J, JJ, KK LOGICAL GEN, LOWER, SYM, TRI, UNIT, UPPER * .. External Functions .. COMPLEX CBEG EXTERNAL CBEG * .. Intrinsic Functions .. INTRINSIC CMPLX, CONJG, MAX, MIN, REAL * .. Executable Statements .. GEN = TYPE( 1: 1 ).EQ.'G' SYM = TYPE( 1: 1 ).EQ.'H' TRI = TYPE( 1: 1 ).EQ.'T' UPPER = ( SYM.OR.TRI ).AND.UPLO.EQ.'U' LOWER = ( SYM.OR.TRI ).AND.UPLO.EQ.'L' UNIT = TRI.AND.DIAG.EQ.'U' * * Generate data in array A. * DO 20 J = 1, N DO 10 I = 1, M IF( GEN.OR.( UPPER.AND.I.LE.J ).OR.( LOWER.AND.I.GE.J ) ) $ THEN IF( ( I.LE.J.AND.J - I.LE.KU ).OR. $ ( I.GE.J.AND.I - J.LE.KL ) )THEN A( I, J ) = CBEG( RESET ) + TRANSL ELSE A( I, J ) = ZERO END IF IF( I.NE.J )THEN IF( SYM )THEN A( J, I ) = CONJG( A( I, J ) ) ELSE IF( TRI )THEN A( J, I ) = ZERO END IF END IF END IF 10 CONTINUE IF( SYM ) $ A( J, J ) = CMPLX( REAL( A( J, J ) ), RZERO ) IF( TRI ) $ A( J, J ) = A( J, J ) + ONE IF( UNIT ) $ A( J, J ) = ONE 20 CONTINUE * * Store elements in array AS in data structure required by routine. * IF( TYPE.EQ.'GE' )THEN DO 50 J = 1, N DO 30 I = 1, M AA( I + ( J - 1 )*LDA ) = A( I, J ) 30 CONTINUE DO 40 I = M + 1, LDA AA( I + ( J - 1 )*LDA ) = ROGUE 40 CONTINUE 50 CONTINUE ELSE IF( TYPE.EQ.'GB' )THEN DO 90 J = 1, N DO 60 I1 = 1, KU + 1 - J AA( I1 + ( J - 1 )*LDA ) = ROGUE 60 CONTINUE DO 70 I2 = I1, MIN( KL + KU + 1, KU + 1 + M - J ) AA( I2 + ( J - 1 )*LDA ) = A( I2 + J - KU - 1, J ) 70 CONTINUE DO 80 I3 = I2, LDA AA( I3 + ( J - 1 )*LDA ) = ROGUE 80 CONTINUE 90 CONTINUE ELSE IF( TYPE.EQ.'HE'.OR.TYPE.EQ.'TR' )THEN DO 130 J = 1, N IF( UPPER )THEN IBEG = 1 IF( UNIT )THEN IEND = J - 1 ELSE IEND = J END IF ELSE IF( UNIT )THEN IBEG = J + 1 ELSE IBEG = J END IF IEND = N END IF DO 100 I = 1, IBEG - 1 AA( I + ( J - 1 )*LDA ) = ROGUE 100 CONTINUE DO 110 I = IBEG, IEND AA( I + ( J - 1 )*LDA ) = A( I, J ) 110 CONTINUE DO 120 I = IEND + 1, LDA AA( I + ( J - 1 )*LDA ) = ROGUE 120 CONTINUE IF( SYM )THEN JJ = J + ( J - 1 )*LDA AA( JJ ) = CMPLX( REAL( AA( JJ ) ), RROGUE ) END IF 130 CONTINUE ELSE IF( TYPE.EQ.'HB'.OR.TYPE.EQ.'TB' )THEN DO 170 J = 1, N IF( UPPER )THEN KK = KL + 1 IBEG = MAX( 1, KL + 2 - J ) IF( UNIT )THEN IEND = KL ELSE IEND = KL + 1 END IF ELSE KK = 1 IF( UNIT )THEN IBEG = 2 ELSE IBEG = 1 END IF IEND = MIN( KL + 1, 1 + M - J ) END IF DO 140 I = 1, IBEG - 1 AA( I + ( J - 1 )*LDA ) = ROGUE 140 CONTINUE DO 150 I = IBEG, IEND AA( I + ( J - 1 )*LDA ) = A( I + J - KK, J ) 150 CONTINUE DO 160 I = IEND + 1, LDA AA( I + ( J - 1 )*LDA ) = ROGUE 160 CONTINUE IF( SYM )THEN JJ = KK + ( J - 1 )*LDA AA( JJ ) = CMPLX( REAL( AA( JJ ) ), RROGUE ) END IF 170 CONTINUE ELSE IF( TYPE.EQ.'HP'.OR.TYPE.EQ.'TP' )THEN IOFF = 0 DO 190 J = 1, N IF( UPPER )THEN IBEG = 1 IEND = J ELSE IBEG = J IEND = N END IF DO 180 I = IBEG, IEND IOFF = IOFF + 1 AA( IOFF ) = A( I, J ) IF( I.EQ.J )THEN IF( UNIT ) $ AA( IOFF ) = ROGUE IF( SYM ) $ AA( IOFF ) = CMPLX( REAL( AA( IOFF ) ), RROGUE ) END IF 180 CONTINUE 190 CONTINUE END IF RETURN * * End of CMAKE. * END SUBROUTINE CMVCH( TRANS, M, N, ALPHA, A, NMAX, X, INCX, BETA, Y, $ INCY, YT, G, YY, EPS, ERR, FATAL, NOUT, MV ) * * Checks the results of the computational tests. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. COMPLEX ZERO PARAMETER ( ZERO = ( 0.0, 0.0 ) ) REAL RZERO, RONE PARAMETER ( RZERO = 0.0, RONE = 1.0 ) * .. Scalar Arguments .. COMPLEX ALPHA, BETA REAL EPS, ERR INTEGER INCX, INCY, M, N, NMAX, NOUT LOGICAL FATAL, MV CHARACTER*1 TRANS * .. Array Arguments .. COMPLEX A( NMAX, * ), X( * ), Y( * ), YT( * ), YY( * ) REAL G( * ) * .. Local Scalars .. COMPLEX C REAL ERRI INTEGER I, INCXL, INCYL, IY, J, JX, KX, KY, ML, NL LOGICAL CTRAN, TRAN * .. Intrinsic Functions .. INTRINSIC ABS, AIMAG, CONJG, MAX, REAL, SQRT * .. Statement Functions .. REAL ABS1 * .. Statement Function definitions .. ABS1( C ) = ABS( REAL( C ) ) + ABS( AIMAG( C ) ) * .. Executable Statements .. TRAN = TRANS.EQ.'T' CTRAN = TRANS.EQ.'C' IF( TRAN.OR.CTRAN )THEN ML = N NL = M ELSE ML = M NL = N END IF IF( INCX.LT.0 )THEN KX = NL INCXL = -1 ELSE KX = 1 INCXL = 1 END IF IF( INCY.LT.0 )THEN KY = ML INCYL = -1 ELSE KY = 1 INCYL = 1 END IF * * Compute expected result in YT using data in A, X and Y. * Compute gauges in G. * IY = KY DO 40 I = 1, ML YT( IY ) = ZERO G( IY ) = RZERO JX = KX IF( TRAN )THEN DO 10 J = 1, NL YT( IY ) = YT( IY ) + A( J, I )*X( JX ) G( IY ) = G( IY ) + ABS1( A( J, I ) )*ABS1( X( JX ) ) JX = JX + INCXL 10 CONTINUE ELSE IF( CTRAN )THEN DO 20 J = 1, NL YT( IY ) = YT( IY ) + CONJG( A( J, I ) )*X( JX ) G( IY ) = G( IY ) + ABS1( A( J, I ) )*ABS1( X( JX ) ) JX = JX + INCXL 20 CONTINUE ELSE DO 30 J = 1, NL YT( IY ) = YT( IY ) + A( I, J )*X( JX ) G( IY ) = G( IY ) + ABS1( A( I, J ) )*ABS1( X( JX ) ) JX = JX + INCXL 30 CONTINUE END IF YT( IY ) = ALPHA*YT( IY ) + BETA*Y( IY ) G( IY ) = ABS1( ALPHA )*G( IY ) + ABS1( BETA )*ABS1( Y( IY ) ) IY = IY + INCYL 40 CONTINUE * * Compute the error ratio for this result. * ERR = ZERO DO 50 I = 1, ML ERRI = ABS( YT( I ) - YY( 1 + ( I - 1 )*ABS( INCY ) ) )/EPS IF( G( I ).NE.RZERO ) $ ERRI = ERRI/G( I ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.RONE ) $ GO TO 60 50 CONTINUE * If the loop completes, all results are at least half accurate. GO TO 80 * * Report fatal error. * 60 FATAL = .TRUE. WRITE( NOUT, FMT = 9999 ) DO 70 I = 1, ML IF( MV )THEN WRITE( NOUT, FMT = 9998 )I, YT( I ), $ YY( 1 + ( I - 1 )*ABS( INCY ) ) ELSE WRITE( NOUT, FMT = 9998 )I, $ YY( 1 + ( I - 1 )*ABS( INCY ) ), YT( I ) END IF 70 CONTINUE * 80 CONTINUE RETURN * 9999 FORMAT( ' ******* FATAL ERROR - COMPUTED RESULT IS LESS THAN HAL', $ 'F ACCURATE *******', /' EXPECTED RE', $ 'SULT COMPUTED RESULT' ) 9998 FORMAT( 1X, I7, 2( ' (', G15.6, ',', G15.6, ')' ) ) * * End of CMVCH. * END LOGICAL FUNCTION LCE( RI, RJ, LR ) * * Tests if two arrays are identical. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Scalar Arguments .. INTEGER LR * .. Array Arguments .. COMPLEX RI( * ), RJ( * ) * .. Local Scalars .. INTEGER I * .. Executable Statements .. DO 10 I = 1, LR IF( RI( I ).NE.RJ( I ) ) $ GO TO 20 10 CONTINUE LCE = .TRUE. GO TO 30 20 CONTINUE LCE = .FALSE. 30 RETURN * * End of LCE. * END LOGICAL FUNCTION LCERES( TYPE, UPLO, M, N, AA, AS, LDA ) * * Tests if selected elements in two arrays are equal. * * TYPE is 'GE', 'HE' or 'HP'. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Scalar Arguments .. INTEGER LDA, M, N CHARACTER*1 UPLO CHARACTER*2 TYPE * .. Array Arguments .. COMPLEX AA( LDA, * ), AS( LDA, * ) * .. Local Scalars .. INTEGER I, IBEG, IEND, J LOGICAL UPPER * .. Executable Statements .. UPPER = UPLO.EQ.'U' IF( TYPE.EQ.'GE' )THEN DO 20 J = 1, N DO 10 I = M + 1, LDA IF( AA( I, J ).NE.AS( I, J ) ) $ GO TO 70 10 CONTINUE 20 CONTINUE ELSE IF( TYPE.EQ.'HE' )THEN DO 50 J = 1, N IF( UPPER )THEN IBEG = 1 IEND = J ELSE IBEG = J IEND = N END IF DO 30 I = 1, IBEG - 1 IF( AA( I, J ).NE.AS( I, J ) ) $ GO TO 70 30 CONTINUE DO 40 I = IEND + 1, LDA IF( AA( I, J ).NE.AS( I, J ) ) $ GO TO 70 40 CONTINUE 50 CONTINUE END IF * LCERES = .TRUE. GO TO 80 70 CONTINUE LCERES = .FALSE. 80 RETURN * * End of LCERES. * END COMPLEX FUNCTION CBEG( RESET ) * * Generates complex numbers as pairs of random numbers uniformly * distributed between -0.5 and 0.5. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Scalar Arguments .. LOGICAL RESET * .. Local Scalars .. INTEGER I, IC, J, MI, MJ * .. Save statement .. SAVE I, IC, J, MI, MJ * .. Intrinsic Functions .. INTRINSIC CMPLX * .. Executable Statements .. IF( RESET )THEN * Initialize local variables. MI = 891 MJ = 457 I = 7 J = 7 IC = 0 RESET = .FALSE. END IF * * The sequence of values of I or J is bounded between 1 and 999. * If initial I or J = 1,2,3,6,7 or 9, the period will be 50. * If initial I or J = 4 or 8, the period will be 25. * If initial I or J = 5, the period will be 10. * IC is used to break up the period by skipping 1 value of I or J * in 6. * IC = IC + 1 10 I = I*MI J = J*MJ I = I - 1000*( I/1000 ) J = J - 1000*( J/1000 ) IF( IC.GE.5 )THEN IC = 0 GO TO 10 END IF CBEG = CMPLX( ( I - 500 )/1001.0, ( J - 500 )/1001.0 ) RETURN * * End of CBEG. * END REAL FUNCTION SDIFF( X, Y ) * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * * .. Scalar Arguments .. REAL X, Y * .. Executable Statements .. SDIFF = X - Y RETURN * * End of SDIFF. * END SUBROUTINE CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) * * Tests whether XERBLA has detected an error when it should. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Scalar Arguments .. INTEGER INFOT, NOUT LOGICAL LERR, OK CHARACTER*6 SRNAMT * .. Executable Statements .. IF( .NOT.LERR )THEN WRITE( NOUT, FMT = 9999 )INFOT, SRNAMT OK = .FALSE. END IF LERR = .FALSE. RETURN * 9999 FORMAT( ' ***** ILLEGAL VALUE OF PARAMETER NUMBER ', I2, ' NOT D', $ 'ETECTED BY ', A6, ' *****' ) * * End of CHKXER. * END SUBROUTINE XERBLA( SRNAME, INFO ) * * This is a special version of XERBLA to be used only as part of * the test program for testing error exits from the Level 2 BLAS * routines. * * XERBLA is an error handler for the Level 2 BLAS routines. * * It is called by the Level 2 BLAS routines if an input parameter is * invalid. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Scalar Arguments .. INTEGER INFO CHARACTER*6 SRNAME * .. Scalars in Common .. INTEGER INFOT, NOUT LOGICAL LERR, OK CHARACTER*6 SRNAMT * .. Common blocks .. COMMON /INFOC/INFOT, NOUT, OK, LERR COMMON /SRNAMC/SRNAMT * .. Executable Statements .. LERR = .TRUE. IF( INFO.NE.INFOT )THEN IF( INFOT.NE.0 )THEN WRITE( NOUT, FMT = 9999 )INFO, INFOT ELSE WRITE( NOUT, FMT = 9997 )INFO END IF OK = .FALSE. END IF IF( SRNAME.NE.SRNAMT )THEN WRITE( NOUT, FMT = 9998 )SRNAME, SRNAMT OK = .FALSE. END IF RETURN * 9999 FORMAT( ' ******* XERBLA WAS CALLED WITH INFO = ', I6, ' INSTEAD', $ ' OF ', I2, ' *******' ) 9998 FORMAT( ' ******* XERBLA WAS CALLED WITH SRNAME = ', A6, ' INSTE', $ 'AD OF ', A6, ' *******' ) 9997 FORMAT( ' ******* XERBLA WAS CALLED WITH INFO = ', I6, $ ' *******' ) * * End of XERBLA * END blis-0.6.1/blastest/src/fortran/cblat3.f000066400000000000000000004007361360743507500200750ustar00rootroot00000000000000*> \brief \b CBLAT3 * * =========== DOCUMENTATION =========== * * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * * Definition: * =========== * * PROGRAM CBLAT3 * * *> \par Purpose: * ============= *> *> \verbatim *> *> Test program for the COMPLEX Level 3 Blas. *> *> The program must be driven by a short data file. The first 14 records *> of the file are read using list-directed input, the last 9 records *> are read using the format ( A6, L2 ). An annotated example of a data *> file can be obtained by deleting the first 3 characters from the *> following 23 lines: *> 'cblat3.out' NAME OF SUMMARY OUTPUT FILE *> 6 UNIT NUMBER OF SUMMARY FILE *> 'CBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE *> -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) *> F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. *> F LOGICAL FLAG, T TO STOP ON FAILURES. *> T LOGICAL FLAG, T TO TEST ERROR EXITS. *> 16.0 THRESHOLD VALUE OF TEST RATIO *> 6 NUMBER OF VALUES OF N *> 0 1 2 3 5 9 VALUES OF N *> 3 NUMBER OF VALUES OF ALPHA *> (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA *> 3 NUMBER OF VALUES OF BETA *> (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA *> CGEMM T PUT F FOR NO TEST. SAME COLUMNS. *> CHEMM T PUT F FOR NO TEST. SAME COLUMNS. *> CSYMM T PUT F FOR NO TEST. SAME COLUMNS. *> CTRMM T PUT F FOR NO TEST. SAME COLUMNS. *> CTRSM T PUT F FOR NO TEST. SAME COLUMNS. *> CHERK T PUT F FOR NO TEST. SAME COLUMNS. *> CSYRK T PUT F FOR NO TEST. SAME COLUMNS. *> CHER2K T PUT F FOR NO TEST. SAME COLUMNS. *> CSYR2K T PUT F FOR NO TEST. SAME COLUMNS. *> *> Further Details *> =============== *> *> See: *> *> Dongarra J. J., Du Croz J. J., Duff I. S. and Hammarling S. *> A Set of Level 3 Basic Linear Algebra Subprograms. *> *> Technical Memorandum No.88 (Revision 1), Mathematics and *> Computer Science Division, Argonne National Laboratory, 9700 *> South Cass Avenue, Argonne, Illinois 60439, US. *> *> -- Written on 8-February-1989. *> Jack Dongarra, Argonne National Laboratory. *> Iain Duff, AERE Harwell. *> Jeremy Du Croz, Numerical Algorithms Group Ltd. *> Sven Hammarling, Numerical Algorithms Group Ltd. *> *> 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers *> can be run multiple times without deleting generated *> output files (susan) *> \endverbatim * * Authors: * ======== * *> \author Univ. of Tennessee *> \author Univ. of California Berkeley *> \author Univ. of Colorado Denver *> \author NAG Ltd. * *> \date April 2012 * *> \ingroup complex_blas_testing * * ===================================================================== PROGRAM CBLAT3 * * -- Reference BLAS test routine (version 3.4.1) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * April 2012 * * ===================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 5 ) INTEGER NSUBS PARAMETER ( NSUBS = 9 ) COMPLEX ZERO, ONE PARAMETER ( ZERO = ( 0.0, 0.0 ), ONE = ( 1.0, 0.0 ) ) REAL RZERO PARAMETER ( RZERO = 0.0 ) INTEGER NMAX PARAMETER ( NMAX = 65 ) INTEGER NIDMAX, NALMAX, NBEMAX PARAMETER ( NIDMAX = 9, NALMAX = 7, NBEMAX = 7 ) * .. Local Scalars .. REAL EPS, ERR, THRESH INTEGER I, ISNUM, J, N, NALF, NBET, NIDIM, NOUT, NTRA LOGICAL FATAL, LTESTT, REWI, SAME, SFATAL, TRACE, $ TSTERR CHARACTER*1 TRANSA, TRANSB CHARACTER*6 SNAMET CHARACTER*32 SNAPS, SUMMRY * .. Local Arrays .. COMPLEX AA( NMAX*NMAX ), AB( NMAX, 2*NMAX ), $ ALF( NALMAX ), AS( NMAX*NMAX ), $ BB( NMAX*NMAX ), BET( NBEMAX ), $ BS( NMAX*NMAX ), C( NMAX, NMAX ), $ CC( NMAX*NMAX ), CS( NMAX*NMAX ), CT( NMAX ), $ W( 2*NMAX ) REAL G( NMAX ) INTEGER IDIM( NIDMAX ) LOGICAL LTEST( NSUBS ) CHARACTER*6 SNAMES( NSUBS ) * .. External Functions .. REAL SDIFF LOGICAL LCE EXTERNAL SDIFF, LCE * .. External Subroutines .. EXTERNAL CCHK1, CCHK2, CCHK3, CCHK4, CCHK5, CCHKE, CMMCH * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK CHARACTER*6 SRNAMT * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR COMMON /SRNAMC/SRNAMT * .. Data statements .. DATA SNAMES/'CGEMM ', 'CHEMM ', 'CSYMM ', 'CTRMM ', $ 'CTRSM ', 'CHERK ', 'CSYRK ', 'CHER2K', $ 'CSYR2K'/ * .. Executable Statements .. * * Read name and unit number for summary output file and open file. * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT OPEN( NOUT, FILE = SUMMRY ) NOUTC = NOUT * * Read name and unit number for snapshot output file and open file. * READ( NIN, FMT = * )SNAPS READ( NIN, FMT = * )NTRA TRACE = NTRA.GE.0 IF( TRACE )THEN OPEN( NTRA, FILE = SNAPS ) END IF * Read the flag that directs rewinding of the snapshot file. READ( NIN, FMT = * )REWI REWI = REWI.AND.TRACE * Read the flag that directs stopping on any failure. READ( NIN, FMT = * )SFATAL * Read the flag that indicates whether error exits are to be tested. READ( NIN, FMT = * )TSTERR * Read the threshold value of the test ratio READ( NIN, FMT = * )THRESH * * Read and check the parameter values for the tests. * * Values of N READ( NIN, FMT = * )NIDIM IF( NIDIM.LT.1.OR.NIDIM.GT.NIDMAX )THEN WRITE( NOUT, FMT = 9997 )'N', NIDMAX GO TO 220 END IF READ( NIN, FMT = * )( IDIM( I ), I = 1, NIDIM ) DO 10 I = 1, NIDIM IF( IDIM( I ).LT.0.OR.IDIM( I ).GT.NMAX )THEN WRITE( NOUT, FMT = 9996 )NMAX GO TO 220 END IF 10 CONTINUE * Values of ALPHA READ( NIN, FMT = * )NALF IF( NALF.LT.1.OR.NALF.GT.NALMAX )THEN WRITE( NOUT, FMT = 9997 )'ALPHA', NALMAX GO TO 220 END IF READ( NIN, FMT = * )( ALF( I ), I = 1, NALF ) * Values of BETA READ( NIN, FMT = * )NBET IF( NBET.LT.1.OR.NBET.GT.NBEMAX )THEN WRITE( NOUT, FMT = 9997 )'BETA', NBEMAX GO TO 220 END IF READ( NIN, FMT = * )( BET( I ), I = 1, NBET ) * * Report values of parameters. * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 )( IDIM( I ), I = 1, NIDIM ) WRITE( NOUT, FMT = 9993 )( ALF( I ), I = 1, NALF ) WRITE( NOUT, FMT = 9992 )( BET( I ), I = 1, NBET ) IF( .NOT.TSTERR )THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9984 ) END IF WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 )THRESH WRITE( NOUT, FMT = * ) * * Read names of subroutines and flags which indicate * whether they are to be tested. * DO 20 I = 1, NSUBS LTEST( I ) = .FALSE. 20 CONTINUE 30 READ( NIN, FMT = 9988, END = 60 )SNAMET, LTESTT DO 40 I = 1, NSUBS IF( SNAMET.EQ.SNAMES( I ) ) $ GO TO 50 40 CONTINUE WRITE( NOUT, FMT = 9990 )SNAMET STOP 50 LTEST( I ) = LTESTT GO TO 30 * 60 CONTINUE CLOSE ( NIN ) * * Compute EPS (the machine precision). * EPS = EPSILON(RZERO) WRITE( NOUT, FMT = 9998 )EPS * * Check the reliability of CMMCH using exact data. * N = MIN( 32, NMAX ) DO 100 J = 1, N DO 90 I = 1, N AB( I, J ) = MAX( I - J + 1, 0 ) 90 CONTINUE AB( J, NMAX + 1 ) = J AB( 1, NMAX + J ) = J C( J, 1 ) = ZERO 100 CONTINUE DO 110 J = 1, N CC( J ) = J*( ( J + 1 )*J )/2 - ( ( J + 1 )*J*( J - 1 ) )/3 110 CONTINUE * CC holds the exact result. On exit from CMMCH CT holds * the result computed by CMMCH. TRANSA = 'N' TRANSB = 'N' CALL CMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, $ AB( 1, NMAX + 1 ), NMAX, ZERO, C, NMAX, CT, G, CC, $ NMAX, EPS, ERR, FATAL, NOUT, .TRUE. ) SAME = LCE( CC, CT, N ) IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR STOP END IF TRANSB = 'C' CALL CMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, $ AB( 1, NMAX + 1 ), NMAX, ZERO, C, NMAX, CT, G, CC, $ NMAX, EPS, ERR, FATAL, NOUT, .TRUE. ) SAME = LCE( CC, CT, N ) IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR STOP END IF DO 120 J = 1, N AB( J, NMAX + 1 ) = N - J + 1 AB( 1, NMAX + J ) = N - J + 1 120 CONTINUE DO 130 J = 1, N CC( N - J + 1 ) = J*( ( J + 1 )*J )/2 - $ ( ( J + 1 )*J*( J - 1 ) )/3 130 CONTINUE TRANSA = 'C' TRANSB = 'N' CALL CMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, $ AB( 1, NMAX + 1 ), NMAX, ZERO, C, NMAX, CT, G, CC, $ NMAX, EPS, ERR, FATAL, NOUT, .TRUE. ) SAME = LCE( CC, CT, N ) IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR STOP END IF TRANSB = 'C' CALL CMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, $ AB( 1, NMAX + 1 ), NMAX, ZERO, C, NMAX, CT, G, CC, $ NMAX, EPS, ERR, FATAL, NOUT, .TRUE. ) SAME = LCE( CC, CT, N ) IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR STOP END IF * * Test each subroutine in turn. * DO 200 ISNUM = 1, NSUBS WRITE( NOUT, FMT = * ) IF( .NOT.LTEST( ISNUM ) )THEN * Subprogram is not to be tested. WRITE( NOUT, FMT = 9987 )SNAMES( ISNUM ) ELSE SRNAMT = SNAMES( ISNUM ) * Test error exits. IF( TSTERR )THEN CALL CCHKE( ISNUM, SNAMES( ISNUM ), NOUT ) WRITE( NOUT, FMT = * ) END IF * Test computations. INFOT = 0 OK = .TRUE. FATAL = .FALSE. GO TO ( 140, 150, 150, 160, 160, 170, 170, $ 180, 180 )ISNUM * Test CGEMM, 01. 140 CALL CCHK1( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, $ NMAX, AB, AA, AS, AB( 1, NMAX + 1 ), BB, BS, C, $ CC, CS, CT, G ) GO TO 190 * Test CHEMM, 02, CSYMM, 03. 150 CALL CCHK2( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, $ NMAX, AB, AA, AS, AB( 1, NMAX + 1 ), BB, BS, C, $ CC, CS, CT, G ) GO TO 190 * Test CTRMM, 04, CTRSM, 05. 160 CALL CCHK3( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NMAX, AB, $ AA, AS, AB( 1, NMAX + 1 ), BB, BS, CT, G, C ) GO TO 190 * Test CHERK, 06, CSYRK, 07. 170 CALL CCHK4( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, $ NMAX, AB, AA, AS, AB( 1, NMAX + 1 ), BB, BS, C, $ CC, CS, CT, G ) GO TO 190 * Test CHER2K, 08, CSYR2K, 09. 180 CALL CCHK5( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, $ NMAX, AB, AA, AS, BB, BS, C, CC, CS, CT, G, W ) GO TO 190 * 190 IF( FATAL.AND.SFATAL ) $ GO TO 210 END IF 200 CONTINUE WRITE( NOUT, FMT = 9986 ) GO TO 230 * 210 CONTINUE WRITE( NOUT, FMT = 9985 ) GO TO 230 * 220 CONTINUE WRITE( NOUT, FMT = 9991 ) * 230 CONTINUE IF( TRACE ) $ CLOSE ( NTRA ) CLOSE ( NOUT ) STOP * 9999 FORMAT( ' ROUTINES PASS COMPUTATIONAL TESTS IF TEST RATIO IS LES', $ 'S THAN', F8.2 ) 9998 FORMAT( ' RELATIVE MACHINE PRECISION IS TAKEN TO BE', 1P, E9.1 ) 9997 FORMAT( ' NUMBER OF VALUES OF ', A, ' IS LESS THAN 1 OR GREATER ', $ 'THAN ', I2 ) 9996 FORMAT( ' VALUE OF N IS LESS THAN 0 OR GREATER THAN ', I2 ) 9995 FORMAT( ' TESTS OF THE COMPLEX LEVEL 3 BLAS', //' THE F', $ 'OLLOWING PARAMETER VALUES WILL BE USED:' ) 9994 FORMAT( ' FOR N ', 9I6 ) 9993 FORMAT( ' FOR ALPHA ', $ 7( '(', F4.1, ',', F4.1, ') ', : ) ) 9992 FORMAT( ' FOR BETA ', $ 7( '(', F4.1, ',', F4.1, ') ', : ) ) 9991 FORMAT( ' AMEND DATA FILE OR INCREASE ARRAY SIZES IN PROGRAM', $ /' ******* TESTS ABANDONED *******' ) 9990 FORMAT( ' SUBPROGRAM NAME ', A6, ' NOT RECOGNIZED', /' ******* T', $ 'ESTS ABANDONED *******' ) 9989 FORMAT( ' ERROR IN CMMCH - IN-LINE DOT PRODUCTS ARE BEING EVALU', $ 'ATED WRONGLY.', /' CMMCH WAS CALLED WITH TRANSA = ', A1, $ ' AND TRANSB = ', A1, /' AND RETURNED SAME = ', L1, ' AND ', $ 'ERR = ', F12.3, '.', /' THIS MAY BE DUE TO FAULTS IN THE ', $ 'ARITHMETIC OR THE COMPILER.', /' ******* TESTS ABANDONED ', $ '*******' ) 9988 FORMAT( A6, L2 ) 9987 FORMAT( 1X, A6, ' WAS NOT TESTED' ) 9986 FORMAT( /' END OF TESTS' ) 9985 FORMAT( /' ******* FATAL ERROR - TESTS ABANDONED *******' ) 9984 FORMAT( ' ERROR-EXITS WILL NOT BE TESTED' ) * * End of CBLAT3. * END SUBROUTINE CCHK1( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, NMAX, $ A, AA, AS, B, BB, BS, C, CC, CS, CT, G ) * * Tests CGEMM. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. COMPLEX ZERO PARAMETER ( ZERO = ( 0.0, 0.0 ) ) REAL RZERO PARAMETER ( RZERO = 0.0 ) * .. Scalar Arguments .. REAL EPS, THRESH INTEGER NALF, NBET, NIDIM, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. COMPLEX A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), B( NMAX, NMAX ), $ BB( NMAX*NMAX ), BET( NBET ), BS( NMAX*NMAX ), $ C( NMAX, NMAX ), CC( NMAX*NMAX ), $ CS( NMAX*NMAX ), CT( NMAX ) REAL G( NMAX ) INTEGER IDIM( NIDIM ) * .. Local Scalars .. COMPLEX ALPHA, ALS, BETA, BLS REAL ERR, ERRMAX INTEGER I, IA, IB, ICA, ICB, IK, IM, IN, K, KS, LAA, $ LBB, LCC, LDA, LDAS, LDB, LDBS, LDC, LDCS, M, $ MA, MB, MS, N, NA, NARGS, NB, NC, NS LOGICAL NULL, RESET, SAME, TRANA, TRANB CHARACTER*1 TRANAS, TRANBS, TRANSA, TRANSB CHARACTER*3 ICH * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LCE, LCERES EXTERNAL LCE, LCERES * .. External Subroutines .. EXTERNAL CGEMM, CMAKE, CMMCH * .. Intrinsic Functions .. INTRINSIC MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICH/'NTC'/ * .. Executable Statements .. * NARGS = 13 NC = 0 RESET = .TRUE. ERRMAX = RZERO * DO 110 IM = 1, NIDIM M = IDIM( IM ) * DO 100 IN = 1, NIDIM N = IDIM( IN ) * Set LDC to 1 more than minimum value if room. LDC = M IF( LDC.LT.NMAX ) $ LDC = LDC + 1 * Skip tests if not enough room. IF( LDC.GT.NMAX ) $ GO TO 100 LCC = LDC*N NULL = N.LE.0.OR.M.LE.0 * DO 90 IK = 1, NIDIM K = IDIM( IK ) * DO 80 ICA = 1, 3 TRANSA = ICH( ICA: ICA ) TRANA = TRANSA.EQ.'T'.OR.TRANSA.EQ.'C' * IF( TRANA )THEN MA = K NA = M ELSE MA = M NA = K END IF * Set LDA to 1 more than minimum value if room. LDA = MA IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 80 LAA = LDA*NA * * Generate the matrix A. * CALL CMAKE( 'GE', ' ', ' ', MA, NA, A, NMAX, AA, LDA, $ RESET, ZERO ) * DO 70 ICB = 1, 3 TRANSB = ICH( ICB: ICB ) TRANB = TRANSB.EQ.'T'.OR.TRANSB.EQ.'C' * IF( TRANB )THEN MB = N NB = K ELSE MB = K NB = N END IF * Set LDB to 1 more than minimum value if room. LDB = MB IF( LDB.LT.NMAX ) $ LDB = LDB + 1 * Skip tests if not enough room. IF( LDB.GT.NMAX ) $ GO TO 70 LBB = LDB*NB * * Generate the matrix B. * CALL CMAKE( 'GE', ' ', ' ', MB, NB, B, NMAX, BB, $ LDB, RESET, ZERO ) * DO 60 IA = 1, NALF ALPHA = ALF( IA ) * DO 50 IB = 1, NBET BETA = BET( IB ) * * Generate the matrix C. * CALL CMAKE( 'GE', ' ', ' ', M, N, C, NMAX, $ CC, LDC, RESET, ZERO ) * NC = NC + 1 * * Save every datum before calling the * subroutine. * TRANAS = TRANSA TRANBS = TRANSB MS = M NS = N KS = K ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LBB BS( I ) = BB( I ) 20 CONTINUE LDBS = LDB BLS = BETA DO 30 I = 1, LCC CS( I ) = CC( I ) 30 CONTINUE LDCS = LDC * * Call the subroutine. * IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ TRANSA, TRANSB, M, N, K, ALPHA, LDA, LDB, $ BETA, LDC IF( REWI ) $ REWIND NTRA CALL CGEMM( TRANSA, TRANSB, M, N, K, ALPHA, $ AA, LDA, BB, LDB, BETA, CC, LDC ) * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9994 ) FATAL = .TRUE. GO TO 120 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = TRANSA.EQ.TRANAS ISAME( 2 ) = TRANSB.EQ.TRANBS ISAME( 3 ) = MS.EQ.M ISAME( 4 ) = NS.EQ.N ISAME( 5 ) = KS.EQ.K ISAME( 6 ) = ALS.EQ.ALPHA ISAME( 7 ) = LCE( AS, AA, LAA ) ISAME( 8 ) = LDAS.EQ.LDA ISAME( 9 ) = LCE( BS, BB, LBB ) ISAME( 10 ) = LDBS.EQ.LDB ISAME( 11 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 12 ) = LCE( CS, CC, LCC ) ELSE ISAME( 12 ) = LCERES( 'GE', ' ', M, N, CS, $ CC, LDC ) END IF ISAME( 13 ) = LDCS.EQ.LDC * * If data was incorrectly changed, report * and return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 120 END IF * IF( .NOT.NULL )THEN * * Check the result. * CALL CMMCH( TRANSA, TRANSB, M, N, K, $ ALPHA, A, NMAX, B, NMAX, BETA, $ C, NMAX, CT, G, CC, LDC, EPS, $ ERR, FATAL, NOUT, .TRUE. ) ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 120 END IF * 50 CONTINUE * 60 CONTINUE * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 130 * 120 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME WRITE( NOUT, FMT = 9995 )NC, SNAME, TRANSA, TRANSB, M, N, K, $ ALPHA, LDA, LDB, BETA, LDC * 130 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',''', A1, ''',', $ 3( I3, ',' ), '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, $ ',(', F4.1, ',', F4.1, '), C,', I3, ').' ) 9994 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of CCHK1. * END SUBROUTINE CCHK2( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, NMAX, $ A, AA, AS, B, BB, BS, C, CC, CS, CT, G ) * * Tests CHEMM and CSYMM. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. COMPLEX ZERO PARAMETER ( ZERO = ( 0.0, 0.0 ) ) REAL RZERO PARAMETER ( RZERO = 0.0 ) * .. Scalar Arguments .. REAL EPS, THRESH INTEGER NALF, NBET, NIDIM, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. COMPLEX A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), B( NMAX, NMAX ), $ BB( NMAX*NMAX ), BET( NBET ), BS( NMAX*NMAX ), $ C( NMAX, NMAX ), CC( NMAX*NMAX ), $ CS( NMAX*NMAX ), CT( NMAX ) REAL G( NMAX ) INTEGER IDIM( NIDIM ) * .. Local Scalars .. COMPLEX ALPHA, ALS, BETA, BLS REAL ERR, ERRMAX INTEGER I, IA, IB, ICS, ICU, IM, IN, LAA, LBB, LCC, $ LDA, LDAS, LDB, LDBS, LDC, LDCS, M, MS, N, NA, $ NARGS, NC, NS LOGICAL CONJ, LEFT, NULL, RESET, SAME CHARACTER*1 SIDE, SIDES, UPLO, UPLOS CHARACTER*2 ICHS, ICHU * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LCE, LCERES EXTERNAL LCE, LCERES * .. External Subroutines .. EXTERNAL CHEMM, CMAKE, CMMCH, CSYMM * .. Intrinsic Functions .. INTRINSIC MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICHS/'LR'/, ICHU/'UL'/ * .. Executable Statements .. CONJ = SNAME( 2: 3 ).EQ.'HE' * NARGS = 12 NC = 0 RESET = .TRUE. ERRMAX = RZERO * DO 100 IM = 1, NIDIM M = IDIM( IM ) * DO 90 IN = 1, NIDIM N = IDIM( IN ) * Set LDC to 1 more than minimum value if room. LDC = M IF( LDC.LT.NMAX ) $ LDC = LDC + 1 * Skip tests if not enough room. IF( LDC.GT.NMAX ) $ GO TO 90 LCC = LDC*N NULL = N.LE.0.OR.M.LE.0 * Set LDB to 1 more than minimum value if room. LDB = M IF( LDB.LT.NMAX ) $ LDB = LDB + 1 * Skip tests if not enough room. IF( LDB.GT.NMAX ) $ GO TO 90 LBB = LDB*N * * Generate the matrix B. * CALL CMAKE( 'GE', ' ', ' ', M, N, B, NMAX, BB, LDB, RESET, $ ZERO ) * DO 80 ICS = 1, 2 SIDE = ICHS( ICS: ICS ) LEFT = SIDE.EQ.'L' * IF( LEFT )THEN NA = M ELSE NA = N END IF * Set LDA to 1 more than minimum value if room. LDA = NA IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 80 LAA = LDA*NA * DO 70 ICU = 1, 2 UPLO = ICHU( ICU: ICU ) * * Generate the hermitian or symmetric matrix A. * CALL CMAKE( SNAME( 2: 3 ), UPLO, ' ', NA, NA, A, NMAX, $ AA, LDA, RESET, ZERO ) * DO 60 IA = 1, NALF ALPHA = ALF( IA ) * DO 50 IB = 1, NBET BETA = BET( IB ) * * Generate the matrix C. * CALL CMAKE( 'GE', ' ', ' ', M, N, C, NMAX, CC, $ LDC, RESET, ZERO ) * NC = NC + 1 * * Save every datum before calling the * subroutine. * SIDES = SIDE UPLOS = UPLO MS = M NS = N ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LBB BS( I ) = BB( I ) 20 CONTINUE LDBS = LDB BLS = BETA DO 30 I = 1, LCC CS( I ) = CC( I ) 30 CONTINUE LDCS = LDC * * Call the subroutine. * IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, SIDE, $ UPLO, M, N, ALPHA, LDA, LDB, BETA, LDC IF( REWI ) $ REWIND NTRA IF( CONJ )THEN CALL CHEMM( SIDE, UPLO, M, N, ALPHA, AA, LDA, $ BB, LDB, BETA, CC, LDC ) ELSE CALL CSYMM( SIDE, UPLO, M, N, ALPHA, AA, LDA, $ BB, LDB, BETA, CC, LDC ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9994 ) FATAL = .TRUE. GO TO 110 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = SIDES.EQ.SIDE ISAME( 2 ) = UPLOS.EQ.UPLO ISAME( 3 ) = MS.EQ.M ISAME( 4 ) = NS.EQ.N ISAME( 5 ) = ALS.EQ.ALPHA ISAME( 6 ) = LCE( AS, AA, LAA ) ISAME( 7 ) = LDAS.EQ.LDA ISAME( 8 ) = LCE( BS, BB, LBB ) ISAME( 9 ) = LDBS.EQ.LDB ISAME( 10 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 11 ) = LCE( CS, CC, LCC ) ELSE ISAME( 11 ) = LCERES( 'GE', ' ', M, N, CS, $ CC, LDC ) END IF ISAME( 12 ) = LDCS.EQ.LDC * * If data was incorrectly changed, report and * return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 110 END IF * IF( .NOT.NULL )THEN * * Check the result. * IF( LEFT )THEN CALL CMMCH( 'N', 'N', M, N, M, ALPHA, A, $ NMAX, B, NMAX, BETA, C, NMAX, $ CT, G, CC, LDC, EPS, ERR, $ FATAL, NOUT, .TRUE. ) ELSE CALL CMMCH( 'N', 'N', M, N, N, ALPHA, B, $ NMAX, A, NMAX, BETA, C, NMAX, $ CT, G, CC, LDC, EPS, ERR, $ FATAL, NOUT, .TRUE. ) END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 110 END IF * 50 CONTINUE * 60 CONTINUE * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 120 * 110 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME WRITE( NOUT, FMT = 9995 )NC, SNAME, SIDE, UPLO, M, N, ALPHA, LDA, $ LDB, BETA, LDC * 120 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ', A6, '(', 2( '''', A1, ''',' ), 2( I3, ',' ), $ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ',(', F4.1, $ ',', F4.1, '), C,', I3, ') .' ) 9994 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of CCHK2. * END SUBROUTINE CCHK3( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NMAX, A, AA, AS, $ B, BB, BS, CT, G, C ) * * Tests CTRMM and CTRSM. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. COMPLEX ZERO, ONE PARAMETER ( ZERO = ( 0.0, 0.0 ), ONE = ( 1.0, 0.0 ) ) REAL RZERO PARAMETER ( RZERO = 0.0 ) * .. Scalar Arguments .. REAL EPS, THRESH INTEGER NALF, NIDIM, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. COMPLEX A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), B( NMAX, NMAX ), $ BB( NMAX*NMAX ), BS( NMAX*NMAX ), $ C( NMAX, NMAX ), CT( NMAX ) REAL G( NMAX ) INTEGER IDIM( NIDIM ) * .. Local Scalars .. COMPLEX ALPHA, ALS REAL ERR, ERRMAX INTEGER I, IA, ICD, ICS, ICT, ICU, IM, IN, J, LAA, LBB, $ LDA, LDAS, LDB, LDBS, M, MS, N, NA, NARGS, NC, $ NS LOGICAL LEFT, NULL, RESET, SAME CHARACTER*1 DIAG, DIAGS, SIDE, SIDES, TRANAS, TRANSA, UPLO, $ UPLOS CHARACTER*2 ICHD, ICHS, ICHU CHARACTER*3 ICHT * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LCE, LCERES EXTERNAL LCE, LCERES * .. External Subroutines .. EXTERNAL CMAKE, CMMCH, CTRMM, CTRSM * .. Intrinsic Functions .. INTRINSIC MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICHU/'UL'/, ICHT/'NTC'/, ICHD/'UN'/, ICHS/'LR'/ * .. Executable Statements .. * NARGS = 11 NC = 0 RESET = .TRUE. ERRMAX = RZERO * Set up zero matrix for CMMCH. DO 20 J = 1, NMAX DO 10 I = 1, NMAX C( I, J ) = ZERO 10 CONTINUE 20 CONTINUE * DO 140 IM = 1, NIDIM M = IDIM( IM ) * DO 130 IN = 1, NIDIM N = IDIM( IN ) * Set LDB to 1 more than minimum value if room. LDB = M IF( LDB.LT.NMAX ) $ LDB = LDB + 1 * Skip tests if not enough room. IF( LDB.GT.NMAX ) $ GO TO 130 LBB = LDB*N NULL = M.LE.0.OR.N.LE.0 * DO 120 ICS = 1, 2 SIDE = ICHS( ICS: ICS ) LEFT = SIDE.EQ.'L' IF( LEFT )THEN NA = M ELSE NA = N END IF * Set LDA to 1 more than minimum value if room. LDA = NA IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 130 LAA = LDA*NA * DO 110 ICU = 1, 2 UPLO = ICHU( ICU: ICU ) * DO 100 ICT = 1, 3 TRANSA = ICHT( ICT: ICT ) * DO 90 ICD = 1, 2 DIAG = ICHD( ICD: ICD ) * DO 80 IA = 1, NALF ALPHA = ALF( IA ) * * Generate the matrix A. * CALL CMAKE( 'TR', UPLO, DIAG, NA, NA, A, $ NMAX, AA, LDA, RESET, ZERO ) * * Generate the matrix B. * CALL CMAKE( 'GE', ' ', ' ', M, N, B, NMAX, $ BB, LDB, RESET, ZERO ) * NC = NC + 1 * * Save every datum before calling the * subroutine. * SIDES = SIDE UPLOS = UPLO TRANAS = TRANSA DIAGS = DIAG MS = M NS = N ALS = ALPHA DO 30 I = 1, LAA AS( I ) = AA( I ) 30 CONTINUE LDAS = LDA DO 40 I = 1, LBB BS( I ) = BB( I ) 40 CONTINUE LDBS = LDB * * Call the subroutine. * IF( SNAME( 4: 5 ).EQ.'MM' )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, $ LDA, LDB IF( REWI ) $ REWIND NTRA CALL CTRMM( SIDE, UPLO, TRANSA, DIAG, M, $ N, ALPHA, AA, LDA, BB, LDB ) ELSE IF( SNAME( 4: 5 ).EQ.'SM' )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, $ LDA, LDB IF( REWI ) $ REWIND NTRA CALL CTRSM( SIDE, UPLO, TRANSA, DIAG, M, $ N, ALPHA, AA, LDA, BB, LDB ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9994 ) FATAL = .TRUE. GO TO 150 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = SIDES.EQ.SIDE ISAME( 2 ) = UPLOS.EQ.UPLO ISAME( 3 ) = TRANAS.EQ.TRANSA ISAME( 4 ) = DIAGS.EQ.DIAG ISAME( 5 ) = MS.EQ.M ISAME( 6 ) = NS.EQ.N ISAME( 7 ) = ALS.EQ.ALPHA ISAME( 8 ) = LCE( AS, AA, LAA ) ISAME( 9 ) = LDAS.EQ.LDA IF( NULL )THEN ISAME( 10 ) = LCE( BS, BB, LBB ) ELSE ISAME( 10 ) = LCERES( 'GE', ' ', M, N, BS, $ BB, LDB ) END IF ISAME( 11 ) = LDBS.EQ.LDB * * If data was incorrectly changed, report and * return. * SAME = .TRUE. DO 50 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 50 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 150 END IF * IF( .NOT.NULL )THEN IF( SNAME( 4: 5 ).EQ.'MM' )THEN * * Check the result. * IF( LEFT )THEN CALL CMMCH( TRANSA, 'N', M, N, M, $ ALPHA, A, NMAX, B, NMAX, $ ZERO, C, NMAX, CT, G, $ BB, LDB, EPS, ERR, $ FATAL, NOUT, .TRUE. ) ELSE CALL CMMCH( 'N', TRANSA, M, N, N, $ ALPHA, B, NMAX, A, NMAX, $ ZERO, C, NMAX, CT, G, $ BB, LDB, EPS, ERR, $ FATAL, NOUT, .TRUE. ) END IF ELSE IF( SNAME( 4: 5 ).EQ.'SM' )THEN * * Compute approximation to original * matrix. * DO 70 J = 1, N DO 60 I = 1, M C( I, J ) = BB( I + ( J - 1 )* $ LDB ) BB( I + ( J - 1 )*LDB ) = ALPHA* $ B( I, J ) 60 CONTINUE 70 CONTINUE * IF( LEFT )THEN CALL CMMCH( TRANSA, 'N', M, N, M, $ ONE, A, NMAX, C, NMAX, $ ZERO, B, NMAX, CT, G, $ BB, LDB, EPS, ERR, $ FATAL, NOUT, .FALSE. ) ELSE CALL CMMCH( 'N', TRANSA, M, N, N, $ ONE, C, NMAX, A, NMAX, $ ZERO, B, NMAX, CT, G, $ BB, LDB, EPS, ERR, $ FATAL, NOUT, .FALSE. ) END IF END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 150 END IF * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * 120 CONTINUE * 130 CONTINUE * 140 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 160 * 150 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME WRITE( NOUT, FMT = 9995 )NC, SNAME, SIDE, UPLO, TRANSA, DIAG, M, $ N, ALPHA, LDA, LDB * 160 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ', A6, '(', 4( '''', A1, ''',' ), 2( I3, ',' ), $ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ') ', $ ' .' ) 9994 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of CCHK3. * END SUBROUTINE CCHK4( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, NMAX, $ A, AA, AS, B, BB, BS, C, CC, CS, CT, G ) * * Tests CHERK and CSYRK. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. COMPLEX ZERO PARAMETER ( ZERO = ( 0.0, 0.0 ) ) REAL RONE, RZERO PARAMETER ( RONE = 1.0, RZERO = 0.0 ) * .. Scalar Arguments .. REAL EPS, THRESH INTEGER NALF, NBET, NIDIM, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. COMPLEX A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), B( NMAX, NMAX ), $ BB( NMAX*NMAX ), BET( NBET ), BS( NMAX*NMAX ), $ C( NMAX, NMAX ), CC( NMAX*NMAX ), $ CS( NMAX*NMAX ), CT( NMAX ) REAL G( NMAX ) INTEGER IDIM( NIDIM ) * .. Local Scalars .. COMPLEX ALPHA, ALS, BETA, BETS REAL ERR, ERRMAX, RALPHA, RALS, RBETA, RBETS INTEGER I, IA, IB, ICT, ICU, IK, IN, J, JC, JJ, K, KS, $ LAA, LCC, LDA, LDAS, LDC, LDCS, LJ, MA, N, NA, $ NARGS, NC, NS LOGICAL CONJ, NULL, RESET, SAME, TRAN, UPPER CHARACTER*1 TRANS, TRANSS, TRANST, UPLO, UPLOS CHARACTER*2 ICHT, ICHU * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LCE, LCERES EXTERNAL LCE, LCERES * .. External Subroutines .. EXTERNAL CHERK, CMAKE, CMMCH, CSYRK * .. Intrinsic Functions .. INTRINSIC CMPLX, MAX, REAL * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICHT/'NC'/, ICHU/'UL'/ * .. Executable Statements .. CONJ = SNAME( 2: 3 ).EQ.'HE' * NARGS = 10 NC = 0 RESET = .TRUE. ERRMAX = RZERO * DO 100 IN = 1, NIDIM N = IDIM( IN ) * Set LDC to 1 more than minimum value if room. LDC = N IF( LDC.LT.NMAX ) $ LDC = LDC + 1 * Skip tests if not enough room. IF( LDC.GT.NMAX ) $ GO TO 100 LCC = LDC*N * DO 90 IK = 1, NIDIM K = IDIM( IK ) * DO 80 ICT = 1, 2 TRANS = ICHT( ICT: ICT ) TRAN = TRANS.EQ.'C' IF( TRAN.AND..NOT.CONJ ) $ TRANS = 'T' IF( TRAN )THEN MA = K NA = N ELSE MA = N NA = K END IF * Set LDA to 1 more than minimum value if room. LDA = MA IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 80 LAA = LDA*NA * * Generate the matrix A. * CALL CMAKE( 'GE', ' ', ' ', MA, NA, A, NMAX, AA, LDA, $ RESET, ZERO ) * DO 70 ICU = 1, 2 UPLO = ICHU( ICU: ICU ) UPPER = UPLO.EQ.'U' * DO 60 IA = 1, NALF ALPHA = ALF( IA ) IF( CONJ )THEN RALPHA = REAL( ALPHA ) ALPHA = CMPLX( RALPHA, RZERO ) END IF * DO 50 IB = 1, NBET BETA = BET( IB ) IF( CONJ )THEN RBETA = REAL( BETA ) BETA = CMPLX( RBETA, RZERO ) END IF NULL = N.LE.0 IF( CONJ ) $ NULL = NULL.OR.( ( K.LE.0.OR.RALPHA.EQ. $ RZERO ).AND.RBETA.EQ.RONE ) * * Generate the matrix C. * CALL CMAKE( SNAME( 2: 3 ), UPLO, ' ', N, N, C, $ NMAX, CC, LDC, RESET, ZERO ) * NC = NC + 1 * * Save every datum before calling the subroutine. * UPLOS = UPLO TRANSS = TRANS NS = N KS = K IF( CONJ )THEN RALS = RALPHA ELSE ALS = ALPHA END IF DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA IF( CONJ )THEN RBETS = RBETA ELSE BETS = BETA END IF DO 20 I = 1, LCC CS( I ) = CC( I ) 20 CONTINUE LDCS = LDC * * Call the subroutine. * IF( CONJ )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, UPLO, $ TRANS, N, K, RALPHA, LDA, RBETA, LDC IF( REWI ) $ REWIND NTRA CALL CHERK( UPLO, TRANS, N, K, RALPHA, AA, $ LDA, RBETA, CC, LDC ) ELSE IF( TRACE ) $ WRITE( NTRA, FMT = 9993 )NC, SNAME, UPLO, $ TRANS, N, K, ALPHA, LDA, BETA, LDC IF( REWI ) $ REWIND NTRA CALL CSYRK( UPLO, TRANS, N, K, ALPHA, AA, $ LDA, BETA, CC, LDC ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9992 ) FATAL = .TRUE. GO TO 120 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = UPLOS.EQ.UPLO ISAME( 2 ) = TRANSS.EQ.TRANS ISAME( 3 ) = NS.EQ.N ISAME( 4 ) = KS.EQ.K IF( CONJ )THEN ISAME( 5 ) = RALS.EQ.RALPHA ELSE ISAME( 5 ) = ALS.EQ.ALPHA END IF ISAME( 6 ) = LCE( AS, AA, LAA ) ISAME( 7 ) = LDAS.EQ.LDA IF( CONJ )THEN ISAME( 8 ) = RBETS.EQ.RBETA ELSE ISAME( 8 ) = BETS.EQ.BETA END IF IF( NULL )THEN ISAME( 9 ) = LCE( CS, CC, LCC ) ELSE ISAME( 9 ) = LCERES( SNAME( 2: 3 ), UPLO, N, $ N, CS, CC, LDC ) END IF ISAME( 10 ) = LDCS.EQ.LDC * * If data was incorrectly changed, report and * return. * SAME = .TRUE. DO 30 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 30 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 120 END IF * IF( .NOT.NULL )THEN * * Check the result column by column. * IF( CONJ )THEN TRANST = 'C' ELSE TRANST = 'T' END IF JC = 1 DO 40 J = 1, N IF( UPPER )THEN JJ = 1 LJ = J ELSE JJ = J LJ = N - J + 1 END IF IF( TRAN )THEN CALL CMMCH( TRANST, 'N', LJ, 1, K, $ ALPHA, A( 1, JJ ), NMAX, $ A( 1, J ), NMAX, BETA, $ C( JJ, J ), NMAX, CT, G, $ CC( JC ), LDC, EPS, ERR, $ FATAL, NOUT, .TRUE. ) ELSE CALL CMMCH( 'N', TRANST, LJ, 1, K, $ ALPHA, A( JJ, 1 ), NMAX, $ A( J, 1 ), NMAX, BETA, $ C( JJ, J ), NMAX, CT, G, $ CC( JC ), LDC, EPS, ERR, $ FATAL, NOUT, .TRUE. ) END IF IF( UPPER )THEN JC = JC + LDC ELSE JC = JC + LDC + 1 END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 110 40 CONTINUE END IF * 50 CONTINUE * 60 CONTINUE * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 130 * 110 CONTINUE IF( N.GT.1 ) $ WRITE( NOUT, FMT = 9995 )J * 120 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME IF( CONJ )THEN WRITE( NOUT, FMT = 9994 )NC, SNAME, UPLO, TRANS, N, K, RALPHA, $ LDA, RBETA, LDC ELSE WRITE( NOUT, FMT = 9993 )NC, SNAME, UPLO, TRANS, N, K, ALPHA, $ LDA, BETA, LDC END IF * 130 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) 9994 FORMAT( 1X, I6, ': ', A6, '(', 2( '''', A1, ''',' ), 2( I3, ',' ), $ F4.1, ', A,', I3, ',', F4.1, ', C,', I3, ') ', $ ' .' ) 9993 FORMAT( 1X, I6, ': ', A6, '(', 2( '''', A1, ''',' ), 2( I3, ',' ), $ '(', F4.1, ',', F4.1, ') , A,', I3, ',(', F4.1, ',', F4.1, $ '), C,', I3, ') .' ) 9992 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of CCHK4. * END SUBROUTINE CCHK5( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, NMAX, $ AB, AA, AS, BB, BS, C, CC, CS, CT, G, W ) * * Tests CHER2K and CSYR2K. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. COMPLEX ZERO, ONE PARAMETER ( ZERO = ( 0.0, 0.0 ), ONE = ( 1.0, 0.0 ) ) REAL RONE, RZERO PARAMETER ( RONE = 1.0, RZERO = 0.0 ) * .. Scalar Arguments .. REAL EPS, THRESH INTEGER NALF, NBET, NIDIM, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. COMPLEX AA( NMAX*NMAX ), AB( 2*NMAX*NMAX ), $ ALF( NALF ), AS( NMAX*NMAX ), BB( NMAX*NMAX ), $ BET( NBET ), BS( NMAX*NMAX ), C( NMAX, NMAX ), $ CC( NMAX*NMAX ), CS( NMAX*NMAX ), CT( NMAX ), $ W( 2*NMAX ) REAL G( NMAX ) INTEGER IDIM( NIDIM ) * .. Local Scalars .. COMPLEX ALPHA, ALS, BETA, BETS REAL ERR, ERRMAX, RBETA, RBETS INTEGER I, IA, IB, ICT, ICU, IK, IN, J, JC, JJ, JJAB, $ K, KS, LAA, LBB, LCC, LDA, LDAS, LDB, LDBS, $ LDC, LDCS, LJ, MA, N, NA, NARGS, NC, NS LOGICAL CONJ, NULL, RESET, SAME, TRAN, UPPER CHARACTER*1 TRANS, TRANSS, TRANST, UPLO, UPLOS CHARACTER*2 ICHT, ICHU * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LCE, LCERES EXTERNAL LCE, LCERES * .. External Subroutines .. EXTERNAL CHER2K, CMAKE, CMMCH, CSYR2K * .. Intrinsic Functions .. INTRINSIC CMPLX, CONJG, MAX, REAL * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICHT/'NC'/, ICHU/'UL'/ * .. Executable Statements .. CONJ = SNAME( 2: 3 ).EQ.'HE' * NARGS = 12 NC = 0 RESET = .TRUE. ERRMAX = RZERO * DO 130 IN = 1, NIDIM N = IDIM( IN ) * Set LDC to 1 more than minimum value if room. LDC = N IF( LDC.LT.NMAX ) $ LDC = LDC + 1 * Skip tests if not enough room. IF( LDC.GT.NMAX ) $ GO TO 130 LCC = LDC*N * DO 120 IK = 1, NIDIM K = IDIM( IK ) * DO 110 ICT = 1, 2 TRANS = ICHT( ICT: ICT ) TRAN = TRANS.EQ.'C' IF( TRAN.AND..NOT.CONJ ) $ TRANS = 'T' IF( TRAN )THEN MA = K NA = N ELSE MA = N NA = K END IF * Set LDA to 1 more than minimum value if room. LDA = MA IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 110 LAA = LDA*NA * * Generate the matrix A. * IF( TRAN )THEN CALL CMAKE( 'GE', ' ', ' ', MA, NA, AB, 2*NMAX, AA, $ LDA, RESET, ZERO ) ELSE CALL CMAKE( 'GE', ' ', ' ', MA, NA, AB, NMAX, AA, LDA, $ RESET, ZERO ) END IF * * Generate the matrix B. * LDB = LDA LBB = LAA IF( TRAN )THEN CALL CMAKE( 'GE', ' ', ' ', MA, NA, AB( K + 1 ), $ 2*NMAX, BB, LDB, RESET, ZERO ) ELSE CALL CMAKE( 'GE', ' ', ' ', MA, NA, AB( K*NMAX + 1 ), $ NMAX, BB, LDB, RESET, ZERO ) END IF * DO 100 ICU = 1, 2 UPLO = ICHU( ICU: ICU ) UPPER = UPLO.EQ.'U' * DO 90 IA = 1, NALF ALPHA = ALF( IA ) * DO 80 IB = 1, NBET BETA = BET( IB ) IF( CONJ )THEN RBETA = REAL( BETA ) BETA = CMPLX( RBETA, RZERO ) END IF NULL = N.LE.0 IF( CONJ ) $ NULL = NULL.OR.( ( K.LE.0.OR.ALPHA.EQ. $ ZERO ).AND.RBETA.EQ.RONE ) * * Generate the matrix C. * CALL CMAKE( SNAME( 2: 3 ), UPLO, ' ', N, N, C, $ NMAX, CC, LDC, RESET, ZERO ) * NC = NC + 1 * * Save every datum before calling the subroutine. * UPLOS = UPLO TRANSS = TRANS NS = N KS = K ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LBB BS( I ) = BB( I ) 20 CONTINUE LDBS = LDB IF( CONJ )THEN RBETS = RBETA ELSE BETS = BETA END IF DO 30 I = 1, LCC CS( I ) = CC( I ) 30 CONTINUE LDCS = LDC * * Call the subroutine. * IF( CONJ )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, UPLO, $ TRANS, N, K, ALPHA, LDA, LDB, RBETA, LDC IF( REWI ) $ REWIND NTRA CALL CHER2K( UPLO, TRANS, N, K, ALPHA, AA, $ LDA, BB, LDB, RBETA, CC, LDC ) ELSE IF( TRACE ) $ WRITE( NTRA, FMT = 9993 )NC, SNAME, UPLO, $ TRANS, N, K, ALPHA, LDA, LDB, BETA, LDC IF( REWI ) $ REWIND NTRA CALL CSYR2K( UPLO, TRANS, N, K, ALPHA, AA, $ LDA, BB, LDB, BETA, CC, LDC ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9992 ) FATAL = .TRUE. GO TO 150 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = UPLOS.EQ.UPLO ISAME( 2 ) = TRANSS.EQ.TRANS ISAME( 3 ) = NS.EQ.N ISAME( 4 ) = KS.EQ.K ISAME( 5 ) = ALS.EQ.ALPHA ISAME( 6 ) = LCE( AS, AA, LAA ) ISAME( 7 ) = LDAS.EQ.LDA ISAME( 8 ) = LCE( BS, BB, LBB ) ISAME( 9 ) = LDBS.EQ.LDB IF( CONJ )THEN ISAME( 10 ) = RBETS.EQ.RBETA ELSE ISAME( 10 ) = BETS.EQ.BETA END IF IF( NULL )THEN ISAME( 11 ) = LCE( CS, CC, LCC ) ELSE ISAME( 11 ) = LCERES( 'HE', UPLO, N, N, CS, $ CC, LDC ) END IF ISAME( 12 ) = LDCS.EQ.LDC * * If data was incorrectly changed, report and * return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 150 END IF * IF( .NOT.NULL )THEN * * Check the result column by column. * IF( CONJ )THEN TRANST = 'C' ELSE TRANST = 'T' END IF JJAB = 1 JC = 1 DO 70 J = 1, N IF( UPPER )THEN JJ = 1 LJ = J ELSE JJ = J LJ = N - J + 1 END IF IF( TRAN )THEN DO 50 I = 1, K W( I ) = ALPHA*AB( ( J - 1 )*2* $ NMAX + K + I ) IF( CONJ )THEN W( K + I ) = CONJG( ALPHA )* $ AB( ( J - 1 )*2* $ NMAX + I ) ELSE W( K + I ) = ALPHA* $ AB( ( J - 1 )*2* $ NMAX + I ) END IF 50 CONTINUE CALL CMMCH( TRANST, 'N', LJ, 1, 2*K, $ ONE, AB( JJAB ), 2*NMAX, W, $ 2*NMAX, BETA, C( JJ, J ), $ NMAX, CT, G, CC( JC ), LDC, $ EPS, ERR, FATAL, NOUT, $ .TRUE. ) ELSE DO 60 I = 1, K IF( CONJ )THEN W( I ) = ALPHA*CONJG( AB( ( K + $ I - 1 )*NMAX + J ) ) W( K + I ) = CONJG( ALPHA* $ AB( ( I - 1 )*NMAX + $ J ) ) ELSE W( I ) = ALPHA*AB( ( K + I - 1 )* $ NMAX + J ) W( K + I ) = ALPHA* $ AB( ( I - 1 )*NMAX + $ J ) END IF 60 CONTINUE CALL CMMCH( 'N', 'N', LJ, 1, 2*K, ONE, $ AB( JJ ), NMAX, W, 2*NMAX, $ BETA, C( JJ, J ), NMAX, CT, $ G, CC( JC ), LDC, EPS, ERR, $ FATAL, NOUT, .TRUE. ) END IF IF( UPPER )THEN JC = JC + LDC ELSE JC = JC + LDC + 1 IF( TRAN ) $ JJAB = JJAB + 2*NMAX END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 140 70 CONTINUE END IF * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * 120 CONTINUE * 130 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 160 * 140 CONTINUE IF( N.GT.1 ) $ WRITE( NOUT, FMT = 9995 )J * 150 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME IF( CONJ )THEN WRITE( NOUT, FMT = 9994 )NC, SNAME, UPLO, TRANS, N, K, ALPHA, $ LDA, LDB, RBETA, LDC ELSE WRITE( NOUT, FMT = 9993 )NC, SNAME, UPLO, TRANS, N, K, ALPHA, $ LDA, LDB, BETA, LDC END IF * 160 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) 9994 FORMAT( 1X, I6, ': ', A6, '(', 2( '''', A1, ''',' ), 2( I3, ',' ), $ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ',', F4.1, $ ', C,', I3, ') .' ) 9993 FORMAT( 1X, I6, ': ', A6, '(', 2( '''', A1, ''',' ), 2( I3, ',' ), $ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ',(', F4.1, $ ',', F4.1, '), C,', I3, ') .' ) 9992 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of CCHK5. * END SUBROUTINE CCHKE( ISNUM, SRNAMT, NOUT ) * * Tests the error exits from the Level 3 Blas. * Requires a special version of the error-handling routine XERBLA. * A, B and C should not need to be defined. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * 3-19-92: Initialize ALPHA, BETA, RALPHA, and RBETA (eca) * 3-19-92: Fix argument 12 in calls to CSYMM and CHEMM * with INFOT = 9 (eca) * * .. Scalar Arguments .. INTEGER ISNUM, NOUT CHARACTER*6 SRNAMT * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Parameters .. REAL ONE, TWO PARAMETER ( ONE = 1.0E0, TWO = 2.0E0 ) * .. Local Scalars .. COMPLEX ALPHA, BETA REAL RALPHA, RBETA * .. Local Arrays .. COMPLEX A( 2, 1 ), B( 2, 1 ), C( 2, 1 ) * .. External Subroutines .. EXTERNAL CGEMM, CHEMM, CHER2K, CHERK, CHKXER, CSYMM, $ CSYR2K, CSYRK, CTRMM, CTRSM * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Executable Statements .. * OK is set to .FALSE. by the special version of XERBLA or by CHKXER * if anything is wrong. OK = .TRUE. * LERR is set to .TRUE. by the special version of XERBLA each time * it is called, and is then tested and re-set by CHKXER. LERR = .FALSE. * * Initialize ALPHA, BETA, RALPHA, and RBETA. * ALPHA = CMPLX( ONE, -ONE ) BETA = CMPLX( TWO, -TWO ) RALPHA = ONE RBETA = TWO * GO TO ( 10, 20, 30, 40, 50, 60, 70, 80, $ 90 )ISNUM 10 INFOT = 1 CALL CGEMM( '/', 'N', 0, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 1 CALL CGEMM( '/', 'C', 0, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 1 CALL CGEMM( '/', 'T', 0, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CGEMM( 'N', '/', 0, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CGEMM( 'C', '/', 0, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CGEMM( 'T', '/', 0, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CGEMM( 'N', 'N', -1, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CGEMM( 'N', 'C', -1, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CGEMM( 'N', 'T', -1, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CGEMM( 'C', 'N', -1, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CGEMM( 'C', 'C', -1, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CGEMM( 'C', 'T', -1, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CGEMM( 'T', 'N', -1, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CGEMM( 'T', 'C', -1, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CGEMM( 'T', 'T', -1, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CGEMM( 'N', 'N', 0, -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CGEMM( 'N', 'C', 0, -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CGEMM( 'N', 'T', 0, -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CGEMM( 'C', 'N', 0, -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CGEMM( 'C', 'C', 0, -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CGEMM( 'C', 'T', 0, -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CGEMM( 'T', 'N', 0, -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CGEMM( 'T', 'C', 0, -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CGEMM( 'T', 'T', 0, -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CGEMM( 'N', 'N', 0, 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CGEMM( 'N', 'C', 0, 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CGEMM( 'N', 'T', 0, 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CGEMM( 'C', 'N', 0, 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CGEMM( 'C', 'C', 0, 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CGEMM( 'C', 'T', 0, 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CGEMM( 'T', 'N', 0, 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CGEMM( 'T', 'C', 0, 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CGEMM( 'T', 'T', 0, 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL CGEMM( 'N', 'N', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL CGEMM( 'N', 'C', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL CGEMM( 'N', 'T', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL CGEMM( 'C', 'N', 0, 0, 2, ALPHA, A, 1, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL CGEMM( 'C', 'C', 0, 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL CGEMM( 'C', 'T', 0, 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL CGEMM( 'T', 'N', 0, 0, 2, ALPHA, A, 1, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL CGEMM( 'T', 'C', 0, 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL CGEMM( 'T', 'T', 0, 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL CGEMM( 'N', 'N', 0, 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL CGEMM( 'C', 'N', 0, 0, 2, ALPHA, A, 2, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL CGEMM( 'T', 'N', 0, 0, 2, ALPHA, A, 2, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL CGEMM( 'N', 'C', 0, 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL CGEMM( 'C', 'C', 0, 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL CGEMM( 'T', 'C', 0, 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL CGEMM( 'N', 'T', 0, 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL CGEMM( 'C', 'T', 0, 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL CGEMM( 'T', 'T', 0, 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL CGEMM( 'N', 'N', 2, 0, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL CGEMM( 'N', 'C', 2, 0, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL CGEMM( 'N', 'T', 2, 0, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL CGEMM( 'C', 'N', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL CGEMM( 'C', 'C', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL CGEMM( 'C', 'T', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL CGEMM( 'T', 'N', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL CGEMM( 'T', 'C', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL CGEMM( 'T', 'T', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 100 20 INFOT = 1 CALL CHEMM( '/', 'U', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CHEMM( 'L', '/', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CHEMM( 'L', 'U', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CHEMM( 'R', 'U', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CHEMM( 'L', 'L', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CHEMM( 'R', 'L', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CHEMM( 'L', 'U', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CHEMM( 'R', 'U', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CHEMM( 'L', 'L', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CHEMM( 'R', 'L', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CHEMM( 'L', 'U', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CHEMM( 'R', 'U', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CHEMM( 'L', 'L', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CHEMM( 'R', 'L', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CHEMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CHEMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CHEMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CHEMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL CHEMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL CHEMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL CHEMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL CHEMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 100 30 INFOT = 1 CALL CSYMM( '/', 'U', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CSYMM( 'L', '/', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CSYMM( 'L', 'U', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CSYMM( 'R', 'U', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CSYMM( 'L', 'L', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CSYMM( 'R', 'L', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CSYMM( 'L', 'U', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CSYMM( 'R', 'U', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CSYMM( 'L', 'L', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CSYMM( 'R', 'L', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CSYMM( 'L', 'U', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CSYMM( 'R', 'U', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CSYMM( 'L', 'L', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CSYMM( 'R', 'L', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CSYMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CSYMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CSYMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL CSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL CSYMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL CSYMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL CSYMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 100 40 INFOT = 1 CALL CTRMM( '/', 'U', 'N', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CTRMM( 'L', '/', 'N', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CTRMM( 'L', 'U', '/', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CTRMM( 'L', 'U', 'N', '/', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CTRMM( 'L', 'U', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CTRMM( 'L', 'U', 'C', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CTRMM( 'L', 'U', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CTRMM( 'R', 'U', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CTRMM( 'R', 'U', 'C', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CTRMM( 'R', 'U', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CTRMM( 'L', 'L', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CTRMM( 'L', 'L', 'C', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CTRMM( 'L', 'L', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CTRMM( 'R', 'L', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CTRMM( 'R', 'L', 'C', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CTRMM( 'R', 'L', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CTRMM( 'L', 'U', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CTRMM( 'L', 'U', 'C', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CTRMM( 'L', 'U', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CTRMM( 'R', 'U', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CTRMM( 'R', 'U', 'C', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CTRMM( 'R', 'U', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CTRMM( 'L', 'L', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CTRMM( 'L', 'L', 'C', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CTRMM( 'L', 'L', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CTRMM( 'R', 'L', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CTRMM( 'R', 'L', 'C', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CTRMM( 'R', 'L', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CTRMM( 'L', 'U', 'N', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CTRMM( 'L', 'U', 'C', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CTRMM( 'L', 'U', 'T', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CTRMM( 'R', 'U', 'N', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CTRMM( 'R', 'U', 'C', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CTRMM( 'R', 'U', 'T', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CTRMM( 'L', 'L', 'N', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CTRMM( 'L', 'L', 'C', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CTRMM( 'L', 'L', 'T', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CTRMM( 'R', 'L', 'N', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CTRMM( 'R', 'L', 'C', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CTRMM( 'R', 'L', 'T', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL CTRMM( 'L', 'U', 'N', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL CTRMM( 'L', 'U', 'C', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL CTRMM( 'L', 'U', 'T', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL CTRMM( 'R', 'U', 'N', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL CTRMM( 'R', 'U', 'C', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL CTRMM( 'R', 'U', 'T', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL CTRMM( 'L', 'L', 'N', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL CTRMM( 'L', 'L', 'C', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL CTRMM( 'L', 'L', 'T', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL CTRMM( 'R', 'L', 'N', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL CTRMM( 'R', 'L', 'C', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL CTRMM( 'R', 'L', 'T', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 100 50 INFOT = 1 CALL CTRSM( '/', 'U', 'N', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CTRSM( 'L', '/', 'N', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CTRSM( 'L', 'U', '/', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CTRSM( 'L', 'U', 'N', '/', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CTRSM( 'L', 'U', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CTRSM( 'L', 'U', 'C', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CTRSM( 'L', 'U', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CTRSM( 'R', 'U', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CTRSM( 'R', 'U', 'C', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CTRSM( 'R', 'U', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CTRSM( 'L', 'L', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CTRSM( 'L', 'L', 'C', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CTRSM( 'L', 'L', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CTRSM( 'R', 'L', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CTRSM( 'R', 'L', 'C', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL CTRSM( 'R', 'L', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CTRSM( 'L', 'U', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CTRSM( 'L', 'U', 'C', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CTRSM( 'L', 'U', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CTRSM( 'R', 'U', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CTRSM( 'R', 'U', 'C', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CTRSM( 'R', 'U', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CTRSM( 'L', 'L', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CTRSM( 'L', 'L', 'C', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CTRSM( 'L', 'L', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CTRSM( 'R', 'L', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CTRSM( 'R', 'L', 'C', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL CTRSM( 'R', 'L', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CTRSM( 'L', 'U', 'N', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CTRSM( 'L', 'U', 'C', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CTRSM( 'L', 'U', 'T', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CTRSM( 'R', 'U', 'N', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CTRSM( 'R', 'U', 'C', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CTRSM( 'R', 'U', 'T', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CTRSM( 'L', 'L', 'N', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CTRSM( 'L', 'L', 'C', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CTRSM( 'L', 'L', 'T', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CTRSM( 'R', 'L', 'N', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CTRSM( 'R', 'L', 'C', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CTRSM( 'R', 'L', 'T', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL CTRSM( 'L', 'U', 'N', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL CTRSM( 'L', 'U', 'C', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL CTRSM( 'L', 'U', 'T', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL CTRSM( 'R', 'U', 'N', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL CTRSM( 'R', 'U', 'C', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL CTRSM( 'R', 'U', 'T', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL CTRSM( 'L', 'L', 'N', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL CTRSM( 'L', 'L', 'C', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL CTRSM( 'L', 'L', 'T', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL CTRSM( 'R', 'L', 'N', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL CTRSM( 'R', 'L', 'C', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL CTRSM( 'R', 'L', 'T', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 100 60 INFOT = 1 CALL CHERK( '/', 'N', 0, 0, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CHERK( 'U', 'T', 0, 0, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CHERK( 'U', 'N', -1, 0, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CHERK( 'U', 'C', -1, 0, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CHERK( 'L', 'N', -1, 0, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CHERK( 'L', 'C', -1, 0, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CHERK( 'U', 'N', 0, -1, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CHERK( 'U', 'C', 0, -1, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CHERK( 'L', 'N', 0, -1, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CHERK( 'L', 'C', 0, -1, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CHERK( 'U', 'N', 2, 0, RALPHA, A, 1, RBETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CHERK( 'U', 'C', 0, 2, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CHERK( 'L', 'N', 2, 0, RALPHA, A, 1, RBETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CHERK( 'L', 'C', 0, 2, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL CHERK( 'U', 'N', 2, 0, RALPHA, A, 2, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL CHERK( 'U', 'C', 2, 0, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL CHERK( 'L', 'N', 2, 0, RALPHA, A, 2, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL CHERK( 'L', 'C', 2, 0, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 100 70 INFOT = 1 CALL CSYRK( '/', 'N', 0, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CSYRK( 'U', 'C', 0, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CSYRK( 'U', 'N', -1, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CSYRK( 'U', 'T', -1, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CSYRK( 'L', 'N', -1, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CSYRK( 'L', 'T', -1, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CSYRK( 'U', 'N', 0, -1, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CSYRK( 'U', 'T', 0, -1, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CSYRK( 'L', 'N', 0, -1, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CSYRK( 'L', 'T', 0, -1, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CSYRK( 'U', 'N', 2, 0, ALPHA, A, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CSYRK( 'U', 'T', 0, 2, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CSYRK( 'L', 'N', 2, 0, ALPHA, A, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CSYRK( 'L', 'T', 0, 2, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL CSYRK( 'U', 'N', 2, 0, ALPHA, A, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL CSYRK( 'U', 'T', 2, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL CSYRK( 'L', 'N', 2, 0, ALPHA, A, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL CSYRK( 'L', 'T', 2, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 100 80 INFOT = 1 CALL CHER2K( '/', 'N', 0, 0, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CHER2K( 'U', 'T', 0, 0, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CHER2K( 'U', 'N', -1, 0, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CHER2K( 'U', 'C', -1, 0, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CHER2K( 'L', 'N', -1, 0, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CHER2K( 'L', 'C', -1, 0, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CHER2K( 'U', 'N', 0, -1, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CHER2K( 'U', 'C', 0, -1, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CHER2K( 'L', 'N', 0, -1, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CHER2K( 'L', 'C', 0, -1, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CHER2K( 'U', 'N', 2, 0, ALPHA, A, 1, B, 1, RBETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CHER2K( 'U', 'C', 0, 2, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CHER2K( 'L', 'N', 2, 0, ALPHA, A, 1, B, 1, RBETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CHER2K( 'L', 'C', 0, 2, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CHER2K( 'U', 'N', 2, 0, ALPHA, A, 2, B, 1, RBETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CHER2K( 'U', 'C', 0, 2, ALPHA, A, 2, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CHER2K( 'L', 'N', 2, 0, ALPHA, A, 2, B, 1, RBETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CHER2K( 'L', 'C', 0, 2, ALPHA, A, 2, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL CHER2K( 'U', 'N', 2, 0, ALPHA, A, 2, B, 2, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL CHER2K( 'U', 'C', 2, 0, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL CHER2K( 'L', 'N', 2, 0, ALPHA, A, 2, B, 2, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL CHER2K( 'L', 'C', 2, 0, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 100 90 INFOT = 1 CALL CSYR2K( '/', 'N', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL CSYR2K( 'U', 'C', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CSYR2K( 'U', 'N', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CSYR2K( 'U', 'T', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CSYR2K( 'L', 'N', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL CSYR2K( 'L', 'T', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CSYR2K( 'U', 'N', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CSYR2K( 'U', 'T', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CSYR2K( 'L', 'N', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL CSYR2K( 'L', 'T', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CSYR2K( 'U', 'N', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CSYR2K( 'U', 'T', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CSYR2K( 'L', 'N', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL CSYR2K( 'L', 'T', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CSYR2K( 'U', 'N', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CSYR2K( 'U', 'T', 0, 2, ALPHA, A, 2, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CSYR2K( 'L', 'N', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL CSYR2K( 'L', 'T', 0, 2, ALPHA, A, 2, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL CSYR2K( 'U', 'N', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL CSYR2K( 'U', 'T', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL CSYR2K( 'L', 'N', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL CSYR2K( 'L', 'T', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) * 100 IF( OK )THEN WRITE( NOUT, FMT = 9999 )SRNAMT ELSE WRITE( NOUT, FMT = 9998 )SRNAMT END IF RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE TESTS OF ERROR-EXITS' ) 9998 FORMAT( ' ******* ', A6, ' FAILED THE TESTS OF ERROR-EXITS *****', $ '**' ) * * End of CCHKE. * END SUBROUTINE CMAKE( TYPE, UPLO, DIAG, M, N, A, NMAX, AA, LDA, RESET, $ TRANSL ) * * Generates values for an M by N matrix A. * Stores the values in the array AA in the data structure required * by the routine, with unwanted elements set to rogue value. * * TYPE is 'GE', 'HE', 'SY' or 'TR'. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. COMPLEX ZERO, ONE PARAMETER ( ZERO = ( 0.0, 0.0 ), ONE = ( 1.0, 0.0 ) ) COMPLEX ROGUE PARAMETER ( ROGUE = ( -1.0E10, 1.0E10 ) ) REAL RZERO PARAMETER ( RZERO = 0.0 ) REAL RROGUE PARAMETER ( RROGUE = -1.0E10 ) * .. Scalar Arguments .. COMPLEX TRANSL INTEGER LDA, M, N, NMAX LOGICAL RESET CHARACTER*1 DIAG, UPLO CHARACTER*2 TYPE * .. Array Arguments .. COMPLEX A( NMAX, * ), AA( * ) * .. Local Scalars .. INTEGER I, IBEG, IEND, J, JJ LOGICAL GEN, HER, LOWER, SYM, TRI, UNIT, UPPER * .. External Functions .. COMPLEX CBEG EXTERNAL CBEG * .. Intrinsic Functions .. INTRINSIC CMPLX, CONJG, REAL * .. Executable Statements .. GEN = TYPE.EQ.'GE' HER = TYPE.EQ.'HE' SYM = TYPE.EQ.'SY' TRI = TYPE.EQ.'TR' UPPER = ( HER.OR.SYM.OR.TRI ).AND.UPLO.EQ.'U' LOWER = ( HER.OR.SYM.OR.TRI ).AND.UPLO.EQ.'L' UNIT = TRI.AND.DIAG.EQ.'U' * * Generate data in array A. * DO 20 J = 1, N DO 10 I = 1, M IF( GEN.OR.( UPPER.AND.I.LE.J ).OR.( LOWER.AND.I.GE.J ) ) $ THEN A( I, J ) = CBEG( RESET ) + TRANSL IF( I.NE.J )THEN * Set some elements to zero IF( N.GT.3.AND.J.EQ.N/2 ) $ A( I, J ) = ZERO IF( HER )THEN A( J, I ) = CONJG( A( I, J ) ) ELSE IF( SYM )THEN A( J, I ) = A( I, J ) ELSE IF( TRI )THEN A( J, I ) = ZERO END IF END IF END IF 10 CONTINUE IF( HER ) $ A( J, J ) = CMPLX( REAL( A( J, J ) ), RZERO ) IF( TRI ) $ A( J, J ) = A( J, J ) + ONE IF( UNIT ) $ A( J, J ) = ONE 20 CONTINUE * * Store elements in array AS in data structure required by routine. * IF( TYPE.EQ.'GE' )THEN DO 50 J = 1, N DO 30 I = 1, M AA( I + ( J - 1 )*LDA ) = A( I, J ) 30 CONTINUE DO 40 I = M + 1, LDA AA( I + ( J - 1 )*LDA ) = ROGUE 40 CONTINUE 50 CONTINUE ELSE IF( TYPE.EQ.'HE'.OR.TYPE.EQ.'SY'.OR.TYPE.EQ.'TR' )THEN DO 90 J = 1, N IF( UPPER )THEN IBEG = 1 IF( UNIT )THEN IEND = J - 1 ELSE IEND = J END IF ELSE IF( UNIT )THEN IBEG = J + 1 ELSE IBEG = J END IF IEND = N END IF DO 60 I = 1, IBEG - 1 AA( I + ( J - 1 )*LDA ) = ROGUE 60 CONTINUE DO 70 I = IBEG, IEND AA( I + ( J - 1 )*LDA ) = A( I, J ) 70 CONTINUE DO 80 I = IEND + 1, LDA AA( I + ( J - 1 )*LDA ) = ROGUE 80 CONTINUE IF( HER )THEN JJ = J + ( J - 1 )*LDA AA( JJ ) = CMPLX( REAL( AA( JJ ) ), RROGUE ) END IF 90 CONTINUE END IF RETURN * * End of CMAKE. * END SUBROUTINE CMMCH( TRANSA, TRANSB, M, N, KK, ALPHA, A, LDA, B, LDB, $ BETA, C, LDC, CT, G, CC, LDCC, EPS, ERR, FATAL, $ NOUT, MV ) * * Checks the results of the computational tests. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. COMPLEX ZERO PARAMETER ( ZERO = ( 0.0, 0.0 ) ) REAL RZERO, RONE PARAMETER ( RZERO = 0.0, RONE = 1.0 ) * .. Scalar Arguments .. COMPLEX ALPHA, BETA REAL EPS, ERR INTEGER KK, LDA, LDB, LDC, LDCC, M, N, NOUT LOGICAL FATAL, MV CHARACTER*1 TRANSA, TRANSB * .. Array Arguments .. COMPLEX A( LDA, * ), B( LDB, * ), C( LDC, * ), $ CC( LDCC, * ), CT( * ) REAL G( * ) * .. Local Scalars .. COMPLEX CL REAL ERRI INTEGER I, J, K LOGICAL CTRANA, CTRANB, TRANA, TRANB * .. Intrinsic Functions .. INTRINSIC ABS, AIMAG, CONJG, MAX, REAL, SQRT * .. Statement Functions .. REAL ABS1 * .. Statement Function definitions .. ABS1( CL ) = ABS( REAL( CL ) ) + ABS( AIMAG( CL ) ) * .. Executable Statements .. TRANA = TRANSA.EQ.'T'.OR.TRANSA.EQ.'C' TRANB = TRANSB.EQ.'T'.OR.TRANSB.EQ.'C' CTRANA = TRANSA.EQ.'C' CTRANB = TRANSB.EQ.'C' * * Compute expected result, one column at a time, in CT using data * in A, B and C. * Compute gauges in G. * DO 220 J = 1, N * DO 10 I = 1, M CT( I ) = ZERO G( I ) = RZERO 10 CONTINUE IF( .NOT.TRANA.AND..NOT.TRANB )THEN DO 30 K = 1, KK DO 20 I = 1, M CT( I ) = CT( I ) + A( I, K )*B( K, J ) G( I ) = G( I ) + ABS1( A( I, K ) )*ABS1( B( K, J ) ) 20 CONTINUE 30 CONTINUE ELSE IF( TRANA.AND..NOT.TRANB )THEN IF( CTRANA )THEN DO 50 K = 1, KK DO 40 I = 1, M CT( I ) = CT( I ) + CONJG( A( K, I ) )*B( K, J ) G( I ) = G( I ) + ABS1( A( K, I ) )* $ ABS1( B( K, J ) ) 40 CONTINUE 50 CONTINUE ELSE DO 70 K = 1, KK DO 60 I = 1, M CT( I ) = CT( I ) + A( K, I )*B( K, J ) G( I ) = G( I ) + ABS1( A( K, I ) )* $ ABS1( B( K, J ) ) 60 CONTINUE 70 CONTINUE END IF ELSE IF( .NOT.TRANA.AND.TRANB )THEN IF( CTRANB )THEN DO 90 K = 1, KK DO 80 I = 1, M CT( I ) = CT( I ) + A( I, K )*CONJG( B( J, K ) ) G( I ) = G( I ) + ABS1( A( I, K ) )* $ ABS1( B( J, K ) ) 80 CONTINUE 90 CONTINUE ELSE DO 110 K = 1, KK DO 100 I = 1, M CT( I ) = CT( I ) + A( I, K )*B( J, K ) G( I ) = G( I ) + ABS1( A( I, K ) )* $ ABS1( B( J, K ) ) 100 CONTINUE 110 CONTINUE END IF ELSE IF( TRANA.AND.TRANB )THEN IF( CTRANA )THEN IF( CTRANB )THEN DO 130 K = 1, KK DO 120 I = 1, M CT( I ) = CT( I ) + CONJG( A( K, I ) )* $ CONJG( B( J, K ) ) G( I ) = G( I ) + ABS1( A( K, I ) )* $ ABS1( B( J, K ) ) 120 CONTINUE 130 CONTINUE ELSE DO 150 K = 1, KK DO 140 I = 1, M CT( I ) = CT( I ) + CONJG( A( K, I ) )*B( J, K ) G( I ) = G( I ) + ABS1( A( K, I ) )* $ ABS1( B( J, K ) ) 140 CONTINUE 150 CONTINUE END IF ELSE IF( CTRANB )THEN DO 170 K = 1, KK DO 160 I = 1, M CT( I ) = CT( I ) + A( K, I )*CONJG( B( J, K ) ) G( I ) = G( I ) + ABS1( A( K, I ) )* $ ABS1( B( J, K ) ) 160 CONTINUE 170 CONTINUE ELSE DO 190 K = 1, KK DO 180 I = 1, M CT( I ) = CT( I ) + A( K, I )*B( J, K ) G( I ) = G( I ) + ABS1( A( K, I ) )* $ ABS1( B( J, K ) ) 180 CONTINUE 190 CONTINUE END IF END IF END IF DO 200 I = 1, M CT( I ) = ALPHA*CT( I ) + BETA*C( I, J ) G( I ) = ABS1( ALPHA )*G( I ) + $ ABS1( BETA )*ABS1( C( I, J ) ) 200 CONTINUE * * Compute the error ratio for this result. * ERR = ZERO DO 210 I = 1, M ERRI = ABS1( CT( I ) - CC( I, J ) )/EPS IF( G( I ).NE.RZERO ) $ ERRI = ERRI/G( I ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.RONE ) $ GO TO 230 210 CONTINUE * 220 CONTINUE * * If the loop completes, all results are at least half accurate. GO TO 250 * * Report fatal error. * 230 FATAL = .TRUE. WRITE( NOUT, FMT = 9999 ) DO 240 I = 1, M IF( MV )THEN WRITE( NOUT, FMT = 9998 )I, CT( I ), CC( I, J ) ELSE WRITE( NOUT, FMT = 9998 )I, CC( I, J ), CT( I ) END IF 240 CONTINUE IF( N.GT.1 ) $ WRITE( NOUT, FMT = 9997 )J * 250 CONTINUE RETURN * 9999 FORMAT( ' ******* FATAL ERROR - COMPUTED RESULT IS LESS THAN HAL', $ 'F ACCURATE *******', /' EXPECTED RE', $ 'SULT COMPUTED RESULT' ) 9998 FORMAT( 1X, I7, 2( ' (', G15.6, ',', G15.6, ')' ) ) 9997 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) * * End of CMMCH. * END LOGICAL FUNCTION LCE( RI, RJ, LR ) * * Tests if two arrays are identical. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Scalar Arguments .. INTEGER LR * .. Array Arguments .. COMPLEX RI( * ), RJ( * ) * .. Local Scalars .. INTEGER I * .. Executable Statements .. DO 10 I = 1, LR IF( RI( I ).NE.RJ( I ) ) $ GO TO 20 10 CONTINUE LCE = .TRUE. GO TO 30 20 CONTINUE LCE = .FALSE. 30 RETURN * * End of LCE. * END LOGICAL FUNCTION LCERES( TYPE, UPLO, M, N, AA, AS, LDA ) * * Tests if selected elements in two arrays are equal. * * TYPE is 'GE' or 'HE' or 'SY'. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Scalar Arguments .. INTEGER LDA, M, N CHARACTER*1 UPLO CHARACTER*2 TYPE * .. Array Arguments .. COMPLEX AA( LDA, * ), AS( LDA, * ) * .. Local Scalars .. INTEGER I, IBEG, IEND, J LOGICAL UPPER * .. Executable Statements .. UPPER = UPLO.EQ.'U' IF( TYPE.EQ.'GE' )THEN DO 20 J = 1, N DO 10 I = M + 1, LDA IF( AA( I, J ).NE.AS( I, J ) ) $ GO TO 70 10 CONTINUE 20 CONTINUE ELSE IF( TYPE.EQ.'HE'.OR.TYPE.EQ.'SY' )THEN DO 50 J = 1, N IF( UPPER )THEN IBEG = 1 IEND = J ELSE IBEG = J IEND = N END IF DO 30 I = 1, IBEG - 1 IF( AA( I, J ).NE.AS( I, J ) ) $ GO TO 70 30 CONTINUE DO 40 I = IEND + 1, LDA IF( AA( I, J ).NE.AS( I, J ) ) $ GO TO 70 40 CONTINUE 50 CONTINUE END IF * LCERES = .TRUE. GO TO 80 70 CONTINUE LCERES = .FALSE. 80 RETURN * * End of LCERES. * END COMPLEX FUNCTION CBEG( RESET ) * * Generates complex numbers as pairs of random numbers uniformly * distributed between -0.5 and 0.5. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Scalar Arguments .. LOGICAL RESET * .. Local Scalars .. INTEGER I, IC, J, MI, MJ * .. Save statement .. SAVE I, IC, J, MI, MJ * .. Intrinsic Functions .. INTRINSIC CMPLX * .. Executable Statements .. IF( RESET )THEN * Initialize local variables. MI = 891 MJ = 457 I = 7 J = 7 IC = 0 RESET = .FALSE. END IF * * The sequence of values of I or J is bounded between 1 and 999. * If initial I or J = 1,2,3,6,7 or 9, the period will be 50. * If initial I or J = 4 or 8, the period will be 25. * If initial I or J = 5, the period will be 10. * IC is used to break up the period by skipping 1 value of I or J * in 6. * IC = IC + 1 10 I = I*MI J = J*MJ I = I - 1000*( I/1000 ) J = J - 1000*( J/1000 ) IF( IC.GE.5 )THEN IC = 0 GO TO 10 END IF CBEG = CMPLX( ( I - 500 )/1001.0, ( J - 500 )/1001.0 ) RETURN * * End of CBEG. * END REAL FUNCTION SDIFF( X, Y ) * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Scalar Arguments .. REAL X, Y * .. Executable Statements .. SDIFF = X - Y RETURN * * End of SDIFF. * END SUBROUTINE CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) * * Tests whether XERBLA has detected an error when it should. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Scalar Arguments .. INTEGER INFOT, NOUT LOGICAL LERR, OK CHARACTER*6 SRNAMT * .. Executable Statements .. IF( .NOT.LERR )THEN WRITE( NOUT, FMT = 9999 )INFOT, SRNAMT OK = .FALSE. END IF LERR = .FALSE. RETURN * 9999 FORMAT( ' ***** ILLEGAL VALUE OF PARAMETER NUMBER ', I2, ' NOT D', $ 'ETECTED BY ', A6, ' *****' ) * * End of CHKXER. * END SUBROUTINE XERBLA( SRNAME, INFO ) * * This is a special version of XERBLA to be used only as part of * the test program for testing error exits from the Level 3 BLAS * routines. * * XERBLA is an error handler for the Level 3 BLAS routines. * * It is called by the Level 3 BLAS routines if an input parameter is * invalid. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Scalar Arguments .. INTEGER INFO CHARACTER*6 SRNAME * .. Scalars in Common .. INTEGER INFOT, NOUT LOGICAL LERR, OK CHARACTER*6 SRNAMT * .. Common blocks .. COMMON /INFOC/INFOT, NOUT, OK, LERR COMMON /SRNAMC/SRNAMT * .. Executable Statements .. LERR = .TRUE. IF( INFO.NE.INFOT )THEN IF( INFOT.NE.0 )THEN WRITE( NOUT, FMT = 9999 )INFO, INFOT ELSE WRITE( NOUT, FMT = 9997 )INFO END IF OK = .FALSE. END IF IF( SRNAME.NE.SRNAMT )THEN WRITE( NOUT, FMT = 9998 )SRNAME, SRNAMT OK = .FALSE. END IF RETURN * 9999 FORMAT( ' ******* XERBLA WAS CALLED WITH INFO = ', I6, ' INSTEAD', $ ' OF ', I2, ' *******' ) 9998 FORMAT( ' ******* XERBLA WAS CALLED WITH SRNAME = ', A6, ' INSTE', $ 'AD OF ', A6, ' *******' ) 9997 FORMAT( ' ******* XERBLA WAS CALLED WITH INFO = ', I6, $ ' *******' ) * * End of XERBLA * END blis-0.6.1/blastest/src/fortran/dblat1.f000066400000000000000000001306651360743507500200750ustar00rootroot00000000000000*> \brief \b DBLAT1 * * =========== DOCUMENTATION =========== * * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * * Definition: * =========== * * PROGRAM DBLAT1 * * *> \par Purpose: * ============= *> *> \verbatim *> *> Test program for the DOUBLE PRECISION Level 1 BLAS. *> *> Based upon the original BLAS test routine together with: *> F06EAF Example Program Text *> \endverbatim * * Authors: * ======== * *> \author Univ. of Tennessee *> \author Univ. of California Berkeley *> \author Univ. of Colorado Denver *> \author NAG Ltd. * *> \date April 2012 * *> \ingroup double_blas_testing * * ===================================================================== PROGRAM DBLAT1 * * -- Reference BLAS test routine (version 3.4.1) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * April 2012 * * ===================================================================== * * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, N LOGICAL PASS * .. Local Scalars .. DOUBLE PRECISION SFAC INTEGER IC * .. External Subroutines .. EXTERNAL CHECK0, CHECK1, CHECK2, CHECK3, HEADER * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, PASS * .. Data statements .. DATA SFAC/9.765625D-4/ * .. Executable Statements .. WRITE (NOUT,99999) DO 20 IC = 1, 13 ICASE = IC CALL HEADER * * .. Initialize PASS, INCX, and INCY for a new case. .. * .. the value 9999 for INCX or INCY will appear in the .. * .. detailed output, if any, for cases that do not involve .. * .. these parameters .. * PASS = .TRUE. INCX = 9999 INCY = 9999 IF (ICASE.EQ.3 .OR. ICASE.EQ.11) THEN CALL CHECK0(SFAC) ELSE IF (ICASE.EQ.7 .OR. ICASE.EQ.8 .OR. ICASE.EQ.9 .OR. + ICASE.EQ.10) THEN CALL CHECK1(SFAC) ELSE IF (ICASE.EQ.1 .OR. ICASE.EQ.2 .OR. ICASE.EQ.5 .OR. + ICASE.EQ.6 .OR. ICASE.EQ.12 .OR. ICASE.EQ.13) THEN CALL CHECK2(SFAC) ELSE IF (ICASE.EQ.4) THEN CALL CHECK3(SFAC) END IF * -- Print IF (PASS) WRITE (NOUT,99998) 20 CONTINUE STOP * 99999 FORMAT (' Real BLAS Test Program Results',/1X) 99998 FORMAT (' ----- PASS -----') END SUBROUTINE HEADER * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, N LOGICAL PASS * .. Local Arrays .. CHARACTER*6 L(13) * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, PASS * .. Data statements .. DATA L(1)/' DDOT '/ DATA L(2)/'DAXPY '/ DATA L(3)/'DROTG '/ DATA L(4)/' DROT '/ DATA L(5)/'DCOPY '/ DATA L(6)/'DSWAP '/ DATA L(7)/'DNRM2 '/ DATA L(8)/'DASUM '/ DATA L(9)/'DSCAL '/ DATA L(10)/'IDAMAX'/ DATA L(11)/'DROTMG'/ DATA L(12)/'DROTM '/ DATA L(13)/'DSDOT '/ * .. Executable Statements .. WRITE (NOUT,99999) ICASE, L(ICASE) RETURN * 99999 FORMAT (/' Test of subprogram number',I3,12X,A6) END SUBROUTINE CHECK0(SFAC) * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) * .. Scalar Arguments .. DOUBLE PRECISION SFAC * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, N LOGICAL PASS * .. Local Scalars .. DOUBLE PRECISION SA, SB, SC, SS, D12 INTEGER I, K * .. Local Arrays .. DOUBLE PRECISION DA1(8), DATRUE(8), DB1(8), DBTRUE(8), DC1(8), $ DS1(8), DAB(4,9), DTEMP(9), DTRUE(9,9) * .. External Subroutines .. EXTERNAL DROTG, DROTMG, STEST1 * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, PASS * .. Data statements .. DATA DA1/0.3D0, 0.4D0, -0.3D0, -0.4D0, -0.3D0, 0.0D0, + 0.0D0, 1.0D0/ DATA DB1/0.4D0, 0.3D0, 0.4D0, 0.3D0, -0.4D0, 0.0D0, + 1.0D0, 0.0D0/ DATA DC1/0.6D0, 0.8D0, -0.6D0, 0.8D0, 0.6D0, 1.0D0, + 0.0D0, 1.0D0/ DATA DS1/0.8D0, 0.6D0, 0.8D0, -0.6D0, 0.8D0, 0.0D0, + 1.0D0, 0.0D0/ DATA DATRUE/0.5D0, 0.5D0, 0.5D0, -0.5D0, -0.5D0, + 0.0D0, 1.0D0, 1.0D0/ DATA DBTRUE/0.0D0, 0.6D0, 0.0D0, -0.6D0, 0.0D0, + 0.0D0, 1.0D0, 0.0D0/ * INPUT FOR MODIFIED GIVENS DATA DAB/ .1D0,.3D0,1.2D0,.2D0, A .7D0, .2D0, .6D0, 4.2D0, B 0.D0,0.D0,0.D0,0.D0, C 4.D0, -1.D0, 2.D0, 4.D0, D 6.D-10, 2.D-2, 1.D5, 10.D0, E 4.D10, 2.D-2, 1.D-5, 10.D0, F 2.D-10, 4.D-2, 1.D5, 10.D0, G 2.D10, 4.D-2, 1.D-5, 10.D0, H 4.D0, -2.D0, 8.D0, 4.D0 / * TRUE RESULTS FOR MODIFIED GIVENS DATA DTRUE/0.D0,0.D0, 1.3D0, .2D0, 0.D0,0.D0,0.D0, .5D0, 0.D0, A 0.D0,0.D0, 4.5D0, 4.2D0, 1.D0, .5D0, 0.D0,0.D0,0.D0, B 0.D0,0.D0,0.D0,0.D0, -2.D0, 0.D0,0.D0,0.D0,0.D0, C 0.D0,0.D0,0.D0, 4.D0, -1.D0, 0.D0,0.D0,0.D0,0.D0, D 0.D0, 15.D-3, 0.D0, 10.D0, -1.D0, 0.D0, -1.D-4, E 0.D0, 1.D0, F 0.D0,0.D0, 6144.D-5, 10.D0, -1.D0, 4096.D0, -1.D6, G 0.D0, 1.D0, H 0.D0,0.D0,15.D0,10.D0,-1.D0, 5.D-5, 0.D0,1.D0,0.D0, I 0.D0,0.D0, 15.D0, 10.D0, -1. D0, 5.D5, -4096.D0, J 1.D0, 4096.D-6, K 0.D0,0.D0, 7.D0, 4.D0, 0.D0,0.D0, -.5D0, -.25D0, 0.D0/ * 4096 = 2 ** 12 DATA D12 /4096.D0/ DTRUE(1,1) = 12.D0 / 130.D0 DTRUE(2,1) = 36.D0 / 130.D0 DTRUE(7,1) = -1.D0 / 6.D0 DTRUE(1,2) = 14.D0 / 75.D0 DTRUE(2,2) = 49.D0 / 75.D0 DTRUE(9,2) = 1.D0 / 7.D0 DTRUE(1,5) = 45.D-11 * (D12 * D12) DTRUE(3,5) = 4.D5 / (3.D0 * D12) DTRUE(6,5) = 1.D0 / D12 DTRUE(8,5) = 1.D4 / (3.D0 * D12) DTRUE(1,6) = 4.D10 / (1.5D0 * D12 * D12) DTRUE(2,6) = 2.D-2 / 1.5D0 DTRUE(8,6) = 5.D-7 * D12 DTRUE(1,7) = 4.D0 / 150.D0 DTRUE(2,7) = (2.D-10 / 1.5D0) * (D12 * D12) DTRUE(7,7) = -DTRUE(6,5) DTRUE(9,7) = 1.D4 / D12 DTRUE(1,8) = DTRUE(1,7) DTRUE(2,8) = 2.D10 / (1.5D0 * D12 * D12) DTRUE(1,9) = 32.D0 / 7.D0 DTRUE(2,9) = -16.D0 / 7.D0 * .. Executable Statements .. * * Compute true values which cannot be prestored * in decimal notation * DBTRUE(1) = 1.0D0/0.6D0 DBTRUE(3) = -1.0D0/0.6D0 DBTRUE(5) = 1.0D0/0.6D0 * DO 20 K = 1, 8 * .. Set N=K for identification in output if any .. N = K IF (ICASE.EQ.3) THEN * .. DROTG .. IF (K.GT.8) GO TO 40 SA = DA1(K) SB = DB1(K) CALL DROTG(SA,SB,SC,SS) CALL STEST1(SA,DATRUE(K),DATRUE(K),SFAC) CALL STEST1(SB,DBTRUE(K),DBTRUE(K),SFAC) CALL STEST1(SC,DC1(K),DC1(K),SFAC) CALL STEST1(SS,DS1(K),DS1(K),SFAC) ELSEIF (ICASE.EQ.11) THEN * .. DROTMG .. DO I=1,4 DTEMP(I)= DAB(I,K) DTEMP(I+4) = 0.0 END DO DTEMP(9) = 0.0 CALL DROTMG(DTEMP(1),DTEMP(2),DTEMP(3),DTEMP(4),DTEMP(5)) CALL STEST(9,DTEMP,DTRUE(1,K),DTRUE(1,K),SFAC) ELSE WRITE (NOUT,*) ' Shouldn''t be here in CHECK0' STOP END IF 20 CONTINUE 40 RETURN END SUBROUTINE CHECK1(SFAC) * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) * .. Scalar Arguments .. DOUBLE PRECISION SFAC * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, N LOGICAL PASS * .. Local Scalars .. INTEGER I, LEN, NP1 * .. Local Arrays .. DOUBLE PRECISION DTRUE1(5), DTRUE3(5), DTRUE5(8,5,2), DV(8,5,2), + SA(10), STEMP(1), STRUE(8), SX(8) INTEGER ITRUE2(5) * .. External Functions .. DOUBLE PRECISION DASUM, DNRM2 INTEGER IDAMAX EXTERNAL DASUM, DNRM2, IDAMAX * .. External Subroutines .. EXTERNAL ITEST1, DSCAL, STEST, STEST1 * .. Intrinsic Functions .. INTRINSIC MAX * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, PASS * .. Data statements .. DATA SA/0.3D0, -1.0D0, 0.0D0, 1.0D0, 0.3D0, 0.3D0, + 0.3D0, 0.3D0, 0.3D0, 0.3D0/ DATA DV/0.1D0, 2.0D0, 2.0D0, 2.0D0, 2.0D0, 2.0D0, + 2.0D0, 2.0D0, 0.3D0, 3.0D0, 3.0D0, 3.0D0, 3.0D0, + 3.0D0, 3.0D0, 3.0D0, 0.3D0, -0.4D0, 4.0D0, + 4.0D0, 4.0D0, 4.0D0, 4.0D0, 4.0D0, 0.2D0, + -0.6D0, 0.3D0, 5.0D0, 5.0D0, 5.0D0, 5.0D0, + 5.0D0, 0.1D0, -0.3D0, 0.5D0, -0.1D0, 6.0D0, + 6.0D0, 6.0D0, 6.0D0, 0.1D0, 8.0D0, 8.0D0, 8.0D0, + 8.0D0, 8.0D0, 8.0D0, 8.0D0, 0.3D0, 9.0D0, 9.0D0, + 9.0D0, 9.0D0, 9.0D0, 9.0D0, 9.0D0, 0.3D0, 2.0D0, + -0.4D0, 2.0D0, 2.0D0, 2.0D0, 2.0D0, 2.0D0, + 0.2D0, 3.0D0, -0.6D0, 5.0D0, 0.3D0, 2.0D0, + 2.0D0, 2.0D0, 0.1D0, 4.0D0, -0.3D0, 6.0D0, + -0.5D0, 7.0D0, -0.1D0, 3.0D0/ DATA DTRUE1/0.0D0, 0.3D0, 0.5D0, 0.7D0, 0.6D0/ DATA DTRUE3/0.0D0, 0.3D0, 0.7D0, 1.1D0, 1.0D0/ DATA DTRUE5/0.10D0, 2.0D0, 2.0D0, 2.0D0, 2.0D0, + 2.0D0, 2.0D0, 2.0D0, -0.3D0, 3.0D0, 3.0D0, + 3.0D0, 3.0D0, 3.0D0, 3.0D0, 3.0D0, 0.0D0, 0.0D0, + 4.0D0, 4.0D0, 4.0D0, 4.0D0, 4.0D0, 4.0D0, + 0.20D0, -0.60D0, 0.30D0, 5.0D0, 5.0D0, 5.0D0, + 5.0D0, 5.0D0, 0.03D0, -0.09D0, 0.15D0, -0.03D0, + 6.0D0, 6.0D0, 6.0D0, 6.0D0, 0.10D0, 8.0D0, + 8.0D0, 8.0D0, 8.0D0, 8.0D0, 8.0D0, 8.0D0, + 0.09D0, 9.0D0, 9.0D0, 9.0D0, 9.0D0, 9.0D0, + 9.0D0, 9.0D0, 0.09D0, 2.0D0, -0.12D0, 2.0D0, + 2.0D0, 2.0D0, 2.0D0, 2.0D0, 0.06D0, 3.0D0, + -0.18D0, 5.0D0, 0.09D0, 2.0D0, 2.0D0, 2.0D0, + 0.03D0, 4.0D0, -0.09D0, 6.0D0, -0.15D0, 7.0D0, + -0.03D0, 3.0D0/ DATA ITRUE2/0, 1, 2, 2, 3/ * .. Executable Statements .. DO 80 INCX = 1, 2 DO 60 NP1 = 1, 5 N = NP1 - 1 LEN = 2*MAX(N,1) * .. Set vector arguments .. DO 20 I = 1, LEN SX(I) = DV(I,NP1,INCX) 20 CONTINUE * IF (ICASE.EQ.7) THEN * .. DNRM2 .. STEMP(1) = DTRUE1(NP1) CALL STEST1(DNRM2(N,SX,INCX),STEMP(1),STEMP,SFAC) ELSE IF (ICASE.EQ.8) THEN * .. DASUM .. STEMP(1) = DTRUE3(NP1) CALL STEST1(DASUM(N,SX,INCX),STEMP(1),STEMP,SFAC) ELSE IF (ICASE.EQ.9) THEN * .. DSCAL .. CALL DSCAL(N,SA((INCX-1)*5+NP1),SX,INCX) DO 40 I = 1, LEN STRUE(I) = DTRUE5(I,NP1,INCX) 40 CONTINUE CALL STEST(LEN,SX,STRUE,STRUE,SFAC) ELSE IF (ICASE.EQ.10) THEN * .. IDAMAX .. CALL ITEST1(IDAMAX(N,SX,INCX),ITRUE2(NP1)) ELSE WRITE (NOUT,*) ' Shouldn''t be here in CHECK1' STOP END IF 60 CONTINUE 80 CONTINUE RETURN END SUBROUTINE CHECK2(SFAC) * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) * .. Scalar Arguments .. DOUBLE PRECISION SFAC * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, N LOGICAL PASS * .. Local Scalars .. DOUBLE PRECISION SA INTEGER I, J, KI, KN, KNI, KPAR, KSIZE, LENX, LENY, $ MX, MY * .. Local Arrays .. DOUBLE PRECISION DT10X(7,4,4), DT10Y(7,4,4), DT7(4,4), $ DT8(7,4,4), DX1(7), $ DY1(7), SSIZE1(4), SSIZE2(14,2), SSIZE(7), $ STX(7), STY(7), SX(7), SY(7), $ DPAR(5,4), DT19X(7,4,16),DT19XA(7,4,4), $ DT19XB(7,4,4), DT19XC(7,4,4),DT19XD(7,4,4), $ DT19Y(7,4,16), DT19YA(7,4,4),DT19YB(7,4,4), $ DT19YC(7,4,4), DT19YD(7,4,4), DTEMP(5) REAL SX1(7), SY1(7) INTEGER INCXS(4), INCYS(4), LENS(4,2), NS(4) * .. External Functions .. DOUBLE PRECISION DDOT, DSDOT EXTERNAL DDOT, DSDOT * .. External Subroutines .. EXTERNAL DAXPY, DCOPY, DROTM, DSWAP, STEST, STEST1 * .. Intrinsic Functions .. INTRINSIC ABS, MIN * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, PASS * .. Data statements .. EQUIVALENCE (DT19X(1,1,1),DT19XA(1,1,1)),(DT19X(1,1,5), A DT19XB(1,1,1)),(DT19X(1,1,9),DT19XC(1,1,1)), B (DT19X(1,1,13),DT19XD(1,1,1)) EQUIVALENCE (DT19Y(1,1,1),DT19YA(1,1,1)),(DT19Y(1,1,5), A DT19YB(1,1,1)),(DT19Y(1,1,9),DT19YC(1,1,1)), B (DT19Y(1,1,13),DT19YD(1,1,1)) DATA SA/0.3D0/ DATA INCXS/1, 2, -2, -1/ DATA INCYS/1, -2, 1, -2/ DATA LENS/1, 1, 2, 4, 1, 1, 3, 7/ DATA NS/0, 1, 2, 4/ DATA DX1/0.6D0, 0.1D0, -0.5D0, 0.8D0, 0.9D0, -0.3D0, + -0.4D0/ DATA DY1/0.5D0, -0.9D0, 0.3D0, 0.7D0, -0.6D0, 0.2D0, + 0.8D0/ ***** FGVZ: We have to add separate REAL arrays for DSDOT() because ***** REAL() on an array argument does not translate via f2c. DATA SX1/0.6E0, 0.1E0, -0.5E0, 0.8E0, 0.9E0, -0.3E0, + -0.4E0/ DATA SY1/0.5E0, -0.9E0, 0.3E0, 0.7E0, -0.6E0, 0.2E0, + 0.8E0/ DATA DT7/0.0D0, 0.30D0, 0.21D0, 0.62D0, 0.0D0, + 0.30D0, -0.07D0, 0.85D0, 0.0D0, 0.30D0, -0.79D0, + -0.74D0, 0.0D0, 0.30D0, 0.33D0, 1.27D0/ DATA DT8/0.5D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.68D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.0D0, 0.68D0, -0.87D0, 0.0D0, 0.0D0, + 0.0D0, 0.0D0, 0.0D0, 0.68D0, -0.87D0, 0.15D0, + 0.94D0, 0.0D0, 0.0D0, 0.0D0, 0.5D0, 0.0D0, + 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.68D0, + 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.35D0, -0.9D0, 0.48D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.38D0, -0.9D0, 0.57D0, 0.7D0, -0.75D0, + 0.2D0, 0.98D0, 0.5D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.0D0, 0.0D0, 0.68D0, 0.0D0, 0.0D0, + 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.35D0, -0.72D0, + 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.38D0, + -0.63D0, 0.15D0, 0.88D0, 0.0D0, 0.0D0, 0.0D0, + 0.5D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.68D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.68D0, -0.9D0, 0.33D0, 0.0D0, 0.0D0, + 0.0D0, 0.0D0, 0.68D0, -0.9D0, 0.33D0, 0.7D0, + -0.75D0, 0.2D0, 1.04D0/ DATA DT10X/0.6D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.5D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.5D0, -0.9D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.0D0, 0.5D0, -0.9D0, 0.3D0, 0.7D0, + 0.0D0, 0.0D0, 0.0D0, 0.6D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.0D0, 0.0D0, 0.5D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.0D0, 0.0D0, 0.3D0, 0.1D0, 0.5D0, 0.0D0, + 0.0D0, 0.0D0, 0.0D0, 0.8D0, 0.1D0, -0.6D0, + 0.8D0, 0.3D0, -0.3D0, 0.5D0, 0.6D0, 0.0D0, + 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.5D0, 0.0D0, + 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, -0.9D0, + 0.1D0, 0.5D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.7D0, + 0.1D0, 0.3D0, 0.8D0, -0.9D0, -0.3D0, 0.5D0, + 0.6D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.5D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.5D0, 0.3D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.5D0, 0.3D0, -0.6D0, 0.8D0, 0.0D0, 0.0D0, + 0.0D0/ DATA DT10Y/0.5D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.6D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.6D0, 0.1D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.6D0, 0.1D0, -0.5D0, 0.8D0, 0.0D0, + 0.0D0, 0.0D0, 0.5D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.0D0, 0.6D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.0D0, -0.5D0, -0.9D0, 0.6D0, 0.0D0, + 0.0D0, 0.0D0, 0.0D0, -0.4D0, -0.9D0, 0.9D0, + 0.7D0, -0.5D0, 0.2D0, 0.6D0, 0.5D0, 0.0D0, + 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.6D0, 0.0D0, + 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, -0.5D0, + 0.6D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + -0.4D0, 0.9D0, -0.5D0, 0.6D0, 0.0D0, 0.0D0, + 0.0D0, 0.5D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.6D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.6D0, -0.9D0, 0.1D0, 0.0D0, 0.0D0, + 0.0D0, 0.0D0, 0.6D0, -0.9D0, 0.1D0, 0.7D0, + -0.5D0, 0.2D0, 0.8D0/ DATA SSIZE1/0.0D0, 0.3D0, 1.6D0, 3.2D0/ DATA SSIZE2/0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 1.17D0, 1.17D0, 1.17D0, 1.17D0, 1.17D0, + 1.17D0, 1.17D0, 1.17D0, 1.17D0, 1.17D0, 1.17D0, + 1.17D0, 1.17D0, 1.17D0/ * * FOR DROTM * DATA DPAR/-2.D0, 0.D0,0.D0,0.D0,0.D0, A -1.D0, 2.D0, -3.D0, -4.D0, 5.D0, B 0.D0, 0.D0, 2.D0, -3.D0, 0.D0, C 1.D0, 5.D0, 2.D0, 0.D0, -4.D0/ * TRUE X RESULTS F0R ROTATIONS DROTM DATA DT19XA/.6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, A .6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, B .6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, C .6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, D .6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, E -.8D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, F -.9D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, G 3.5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, H .6D0, .1D0, 0.D0,0.D0,0.D0,0.D0,0.D0, I -.8D0, 3.8D0, 0.D0,0.D0,0.D0,0.D0,0.D0, J -.9D0, 2.8D0, 0.D0,0.D0,0.D0,0.D0,0.D0, K 3.5D0, -.4D0, 0.D0,0.D0,0.D0,0.D0,0.D0, L .6D0, .1D0, -.5D0, .8D0, 0.D0,0.D0,0.D0, M -.8D0, 3.8D0, -2.2D0, -1.2D0, 0.D0,0.D0,0.D0, N -.9D0, 2.8D0, -1.4D0, -1.3D0, 0.D0,0.D0,0.D0, O 3.5D0, -.4D0, -2.2D0, 4.7D0, 0.D0,0.D0,0.D0/ * DATA DT19XB/.6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, A .6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, B .6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, C .6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, D .6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, E -.8D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, F -.9D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, G 3.5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, H .6D0, .1D0, -.5D0, 0.D0,0.D0,0.D0,0.D0, I 0.D0, .1D0, -3.0D0, 0.D0,0.D0,0.D0,0.D0, J -.3D0, .1D0, -2.0D0, 0.D0,0.D0,0.D0,0.D0, K 3.3D0, .1D0, -2.0D0, 0.D0,0.D0,0.D0,0.D0, L .6D0, .1D0, -.5D0, .8D0, .9D0, -.3D0, -.4D0, M -2.0D0, .1D0, 1.4D0, .8D0, .6D0, -.3D0, -2.8D0, N -1.8D0, .1D0, 1.3D0, .8D0, 0.D0, -.3D0, -1.9D0, O 3.8D0, .1D0, -3.1D0, .8D0, 4.8D0, -.3D0, -1.5D0 / * DATA DT19XC/.6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, A .6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, B .6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, C .6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, D .6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, E -.8D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, F -.9D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, G 3.5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, H .6D0, .1D0, -.5D0, 0.D0,0.D0,0.D0,0.D0, I 4.8D0, .1D0, -3.0D0, 0.D0,0.D0,0.D0,0.D0, J 3.3D0, .1D0, -2.0D0, 0.D0,0.D0,0.D0,0.D0, K 2.1D0, .1D0, -2.0D0, 0.D0,0.D0,0.D0,0.D0, L .6D0, .1D0, -.5D0, .8D0, .9D0, -.3D0, -.4D0, M -1.6D0, .1D0, -2.2D0, .8D0, 5.4D0, -.3D0, -2.8D0, N -1.5D0, .1D0, -1.4D0, .8D0, 3.6D0, -.3D0, -1.9D0, O 3.7D0, .1D0, -2.2D0, .8D0, 3.6D0, -.3D0, -1.5D0 / * DATA DT19XD/.6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, A .6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, B .6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, C .6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, D .6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, E -.8D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, F -.9D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, G 3.5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, H .6D0, .1D0, 0.D0,0.D0,0.D0,0.D0,0.D0, I -.8D0, -1.0D0, 0.D0,0.D0,0.D0,0.D0,0.D0, J -.9D0, -.8D0, 0.D0,0.D0,0.D0,0.D0,0.D0, K 3.5D0, .8D0, 0.D0,0.D0,0.D0,0.D0,0.D0, L .6D0, .1D0, -.5D0, .8D0, 0.D0,0.D0,0.D0, M -.8D0, -1.0D0, 1.4D0, -1.6D0, 0.D0,0.D0,0.D0, N -.9D0, -.8D0, 1.3D0, -1.6D0, 0.D0,0.D0,0.D0, O 3.5D0, .8D0, -3.1D0, 4.8D0, 0.D0,0.D0,0.D0/ * TRUE Y RESULTS FOR ROTATIONS DROTM DATA DT19YA/.5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, A .5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, B .5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, C .5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, D .5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, E .7D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, F 1.7D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, G -2.6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, H .5D0, -.9D0, 0.D0,0.D0,0.D0,0.D0,0.D0, I .7D0, -4.8D0, 0.D0,0.D0,0.D0,0.D0,0.D0, J 1.7D0, -.7D0, 0.D0,0.D0,0.D0,0.D0,0.D0, K -2.6D0, 3.5D0, 0.D0,0.D0,0.D0,0.D0,0.D0, L .5D0, -.9D0, .3D0, .7D0, 0.D0,0.D0,0.D0, M .7D0, -4.8D0, 3.0D0, 1.1D0, 0.D0,0.D0,0.D0, N 1.7D0, -.7D0, -.7D0, 2.3D0, 0.D0,0.D0,0.D0, O -2.6D0, 3.5D0, -.7D0, -3.6D0, 0.D0,0.D0,0.D0/ * DATA DT19YB/.5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, A .5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, B .5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, C .5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, D .5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, E .7D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, F 1.7D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, G -2.6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, H .5D0, -.9D0, .3D0, 0.D0,0.D0,0.D0,0.D0, I 4.0D0, -.9D0, -.3D0, 0.D0,0.D0,0.D0,0.D0, J -.5D0, -.9D0, 1.5D0, 0.D0,0.D0,0.D0,0.D0, K -1.5D0, -.9D0, -1.8D0, 0.D0,0.D0,0.D0,0.D0, L .5D0, -.9D0, .3D0, .7D0, -.6D0, .2D0, .8D0, M 3.7D0, -.9D0, -1.2D0, .7D0, -1.5D0, .2D0, 2.2D0, N -.3D0, -.9D0, 2.1D0, .7D0, -1.6D0, .2D0, 2.0D0, O -1.6D0, -.9D0, -2.1D0, .7D0, 2.9D0, .2D0, -3.8D0 / * DATA DT19YC/.5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, A .5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, B .5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, C .5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, D .5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, E .7D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, F 1.7D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, G -2.6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, H .5D0, -.9D0, 0.D0,0.D0,0.D0,0.D0,0.D0, I 4.0D0, -6.3D0, 0.D0,0.D0,0.D0,0.D0,0.D0, J -.5D0, .3D0, 0.D0,0.D0,0.D0,0.D0,0.D0, K -1.5D0, 3.0D0, 0.D0,0.D0,0.D0,0.D0,0.D0, L .5D0, -.9D0, .3D0, .7D0, 0.D0,0.D0,0.D0, M 3.7D0, -7.2D0, 3.0D0, 1.7D0, 0.D0,0.D0,0.D0, N -.3D0, .9D0, -.7D0, 1.9D0, 0.D0,0.D0,0.D0, O -1.6D0, 2.7D0, -.7D0, -3.4D0, 0.D0,0.D0,0.D0/ * DATA DT19YD/.5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, A .5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, B .5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, C .5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, D .5D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, E .7D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, F 1.7D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, G -2.6D0, 0.D0,0.D0,0.D0,0.D0,0.D0,0.D0, H .5D0, -.9D0, .3D0, 0.D0,0.D0,0.D0,0.D0, I .7D0, -.9D0, 1.2D0, 0.D0,0.D0,0.D0,0.D0, J 1.7D0, -.9D0, .5D0, 0.D0,0.D0,0.D0,0.D0, K -2.6D0, -.9D0, -1.3D0, 0.D0,0.D0,0.D0,0.D0, L .5D0, -.9D0, .3D0, .7D0, -.6D0, .2D0, .8D0, M .7D0, -.9D0, 1.2D0, .7D0, -1.5D0, .2D0, 1.6D0, N 1.7D0, -.9D0, .5D0, .7D0, -1.6D0, .2D0, 2.4D0, O -2.6D0, -.9D0, -1.3D0, .7D0, 2.9D0, .2D0, -4.0D0 / * * .. Executable Statements .. * DO 120 KI = 1, 4 INCX = INCXS(KI) INCY = INCYS(KI) MX = ABS(INCX) MY = ABS(INCY) * DO 100 KN = 1, 4 N = NS(KN) KSIZE = MIN(2,KN) LENX = LENS(KN,MX) LENY = LENS(KN,MY) * .. Initialize all argument arrays .. DO 20 I = 1, 7 SX(I) = DX1(I) SY(I) = DY1(I) ***** FGVZ: We have to add a loop to initialize separate REAL arrays ***** for DSDOT() because REAL() on an array argument does not ***** translate via f2c. SX1(I) = DX1(I) SY1(I) = DY1(I) 20 CONTINUE * IF (ICASE.EQ.1) THEN * .. DDOT .. CALL STEST1(DDOT(N,SX,INCX,SY,INCY),DT7(KN,KI),SSIZE1(KN) + ,SFAC) ELSE IF (ICASE.EQ.2) THEN * .. DAXPY .. CALL DAXPY(N,SA,SX,INCX,SY,INCY) DO 40 J = 1, LENY STY(J) = DT8(J,KN,KI) 40 CONTINUE CALL STEST(LENY,SY,STY,SSIZE2(1,KSIZE),SFAC) ELSE IF (ICASE.EQ.5) THEN * .. DCOPY .. DO 60 I = 1, 7 STY(I) = DT10Y(I,KN,KI) 60 CONTINUE CALL DCOPY(N,SX,INCX,SY,INCY) CALL STEST(LENY,SY,STY,SSIZE2(1,1),1.0D0) ELSE IF (ICASE.EQ.6) THEN * .. DSWAP .. CALL DSWAP(N,SX,INCX,SY,INCY) DO 80 I = 1, 7 STX(I) = DT10X(I,KN,KI) STY(I) = DT10Y(I,KN,KI) 80 CONTINUE CALL STEST(LENX,SX,STX,SSIZE2(1,1),1.0D0) CALL STEST(LENY,SY,STY,SSIZE2(1,1),1.0D0) ELSE IF (ICASE.EQ.12) THEN * .. DROTM .. KNI=KN+4*(KI-1) DO KPAR=1,4 DO I=1,7 SX(I) = DX1(I) SY(I) = DY1(I) STX(I)= DT19X(I,KPAR,KNI) STY(I)= DT19Y(I,KPAR,KNI) END DO * DO I=1,5 DTEMP(I) = DPAR(I,KPAR) END DO * DO I=1,LENX SSIZE(I)=STX(I) END DO * SEE REMARK ABOVE ABOUT DT11X(1,2,7) * AND DT11X(5,3,8). IF ((KPAR .EQ. 2) .AND. (KNI .EQ. 7)) $ SSIZE(1) = 2.4D0 IF ((KPAR .EQ. 3) .AND. (KNI .EQ. 8)) $ SSIZE(5) = 1.8D0 * CALL DROTM(N,SX,INCX,SY,INCY,DTEMP) CALL STEST(LENX,SX,STX,SSIZE,SFAC) CALL STEST(LENY,SY,STY,STY,SFAC) END DO ELSE IF (ICASE.EQ.13) THEN * .. DSDOT .. ***** CALL TESTDSDOT(REAL(DSDOT(N,REAL(SX),INCX,REAL(SY),INCY)), CALL TESTDSDOT(REAL(DSDOT(N,SX1,INCX,SY1,INCY)), $ REAL(DT7(KN,KI)),REAL(SSIZE1(KN)), .3125E-1) ELSE WRITE (NOUT,*) ' Shouldn''t be here in CHECK2' STOP END IF 100 CONTINUE 120 CONTINUE RETURN END SUBROUTINE CHECK3(SFAC) * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) * .. Scalar Arguments .. DOUBLE PRECISION SFAC * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, N LOGICAL PASS * .. Local Scalars .. DOUBLE PRECISION SC, SS INTEGER I, K, KI, KN, KSIZE, LENX, LENY, MX, MY * .. Local Arrays .. DOUBLE PRECISION COPYX(5), COPYY(5), DT9X(7,4,4), DT9Y(7,4,4), + DX1(7), DY1(7), MWPC(11), MWPS(11), MWPSTX(5), + MWPSTY(5), MWPTX(11,5), MWPTY(11,5), MWPX(5), + MWPY(5), SSIZE2(14,2), STX(7), STY(7), SX(7), + SY(7) INTEGER INCXS(4), INCYS(4), LENS(4,2), MWPINX(11), + MWPINY(11), MWPN(11), NS(4) * .. External Subroutines .. EXTERNAL DROT, STEST * .. Intrinsic Functions .. INTRINSIC ABS, MIN * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, PASS * .. Data statements .. DATA INCXS/1, 2, -2, -1/ DATA INCYS/1, -2, 1, -2/ DATA LENS/1, 1, 2, 4, 1, 1, 3, 7/ DATA NS/0, 1, 2, 4/ DATA DX1/0.6D0, 0.1D0, -0.5D0, 0.8D0, 0.9D0, -0.3D0, + -0.4D0/ DATA DY1/0.5D0, -0.9D0, 0.3D0, 0.7D0, -0.6D0, 0.2D0, + 0.8D0/ DATA SC, SS/0.8D0, 0.6D0/ DATA DT9X/0.6D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.78D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.0D0, 0.78D0, -0.46D0, 0.0D0, 0.0D0, + 0.0D0, 0.0D0, 0.0D0, 0.78D0, -0.46D0, -0.22D0, + 1.06D0, 0.0D0, 0.0D0, 0.0D0, 0.6D0, 0.0D0, + 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.78D0, + 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.66D0, 0.1D0, -0.1D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.96D0, 0.1D0, -0.76D0, 0.8D0, 0.90D0, + -0.3D0, -0.02D0, 0.6D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.0D0, 0.0D0, 0.78D0, 0.0D0, 0.0D0, + 0.0D0, 0.0D0, 0.0D0, 0.0D0, -0.06D0, 0.1D0, + -0.1D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.90D0, + 0.1D0, -0.22D0, 0.8D0, 0.18D0, -0.3D0, -0.02D0, + 0.6D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.78D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.78D0, 0.26D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.0D0, 0.78D0, 0.26D0, -0.76D0, 1.12D0, + 0.0D0, 0.0D0, 0.0D0/ DATA DT9Y/0.5D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.04D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.0D0, 0.04D0, -0.78D0, 0.0D0, 0.0D0, + 0.0D0, 0.0D0, 0.0D0, 0.04D0, -0.78D0, 0.54D0, + 0.08D0, 0.0D0, 0.0D0, 0.0D0, 0.5D0, 0.0D0, + 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.04D0, + 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.7D0, + -0.9D0, -0.12D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.64D0, -0.9D0, -0.30D0, 0.7D0, -0.18D0, 0.2D0, + 0.28D0, 0.5D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.0D0, 0.04D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.0D0, 0.0D0, 0.7D0, -1.08D0, 0.0D0, + 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.64D0, -1.26D0, + 0.54D0, 0.20D0, 0.0D0, 0.0D0, 0.0D0, 0.5D0, + 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.04D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.04D0, -0.9D0, 0.18D0, 0.0D0, 0.0D0, + 0.0D0, 0.0D0, 0.04D0, -0.9D0, 0.18D0, 0.7D0, + -0.18D0, 0.2D0, 0.16D0/ DATA SSIZE2/0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, 0.0D0, + 0.0D0, 1.17D0, 1.17D0, 1.17D0, 1.17D0, 1.17D0, + 1.17D0, 1.17D0, 1.17D0, 1.17D0, 1.17D0, 1.17D0, + 1.17D0, 1.17D0, 1.17D0/ * .. Executable Statements .. * DO 60 KI = 1, 4 INCX = INCXS(KI) INCY = INCYS(KI) MX = ABS(INCX) MY = ABS(INCY) * DO 40 KN = 1, 4 N = NS(KN) KSIZE = MIN(2,KN) LENX = LENS(KN,MX) LENY = LENS(KN,MY) * IF (ICASE.EQ.4) THEN * .. DROT .. DO 20 I = 1, 7 SX(I) = DX1(I) SY(I) = DY1(I) STX(I) = DT9X(I,KN,KI) STY(I) = DT9Y(I,KN,KI) 20 CONTINUE CALL DROT(N,SX,INCX,SY,INCY,SC,SS) CALL STEST(LENX,SX,STX,SSIZE2(1,KSIZE),SFAC) CALL STEST(LENY,SY,STY,SSIZE2(1,KSIZE),SFAC) ELSE WRITE (NOUT,*) ' Shouldn''t be here in CHECK3' STOP END IF 40 CONTINUE 60 CONTINUE * MWPC(1) = 1 DO 80 I = 2, 11 MWPC(I) = 0 80 CONTINUE MWPS(1) = 0 DO 100 I = 2, 6 MWPS(I) = 1 100 CONTINUE DO 120 I = 7, 11 MWPS(I) = -1 120 CONTINUE MWPINX(1) = 1 MWPINX(2) = 1 MWPINX(3) = 1 MWPINX(4) = -1 MWPINX(5) = 1 MWPINX(6) = -1 MWPINX(7) = 1 MWPINX(8) = 1 MWPINX(9) = -1 MWPINX(10) = 1 MWPINX(11) = -1 MWPINY(1) = 1 MWPINY(2) = 1 MWPINY(3) = -1 MWPINY(4) = -1 MWPINY(5) = 2 MWPINY(6) = 1 MWPINY(7) = 1 MWPINY(8) = -1 MWPINY(9) = -1 MWPINY(10) = 2 MWPINY(11) = 1 DO 140 I = 1, 11 MWPN(I) = 5 140 CONTINUE MWPN(5) = 3 MWPN(10) = 3 DO 160 I = 1, 5 MWPX(I) = I MWPY(I) = I MWPTX(1,I) = I MWPTY(1,I) = I MWPTX(2,I) = I MWPTY(2,I) = -I MWPTX(3,I) = 6 - I MWPTY(3,I) = I - 6 MWPTX(4,I) = I MWPTY(4,I) = -I MWPTX(6,I) = 6 - I MWPTY(6,I) = I - 6 MWPTX(7,I) = -I MWPTY(7,I) = I MWPTX(8,I) = I - 6 MWPTY(8,I) = 6 - I MWPTX(9,I) = -I MWPTY(9,I) = I MWPTX(11,I) = I - 6 MWPTY(11,I) = 6 - I 160 CONTINUE MWPTX(5,1) = 1 MWPTX(5,2) = 3 MWPTX(5,3) = 5 MWPTX(5,4) = 4 MWPTX(5,5) = 5 MWPTY(5,1) = -1 MWPTY(5,2) = 2 MWPTY(5,3) = -2 MWPTY(5,4) = 4 MWPTY(5,5) = -3 MWPTX(10,1) = -1 MWPTX(10,2) = -3 MWPTX(10,3) = -5 MWPTX(10,4) = 4 MWPTX(10,5) = 5 MWPTY(10,1) = 1 MWPTY(10,2) = 2 MWPTY(10,3) = 2 MWPTY(10,4) = 4 MWPTY(10,5) = 3 DO 200 I = 1, 11 INCX = MWPINX(I) INCY = MWPINY(I) DO 180 K = 1, 5 COPYX(K) = MWPX(K) COPYY(K) = MWPY(K) MWPSTX(K) = MWPTX(I,K) MWPSTY(K) = MWPTY(I,K) 180 CONTINUE CALL DROT(MWPN(I),COPYX,INCX,COPYY,INCY,MWPC(I),MWPS(I)) CALL STEST(5,COPYX,MWPSTX,MWPSTX,SFAC) CALL STEST(5,COPYY,MWPSTY,MWPSTY,SFAC) 200 CONTINUE RETURN END SUBROUTINE STEST(LEN,SCOMP,STRUE,SSIZE,SFAC) * ********************************* STEST ************************** * * THIS SUBR COMPARES ARRAYS SCOMP() AND STRUE() OF LENGTH LEN TO * SEE IF THE TERM BY TERM DIFFERENCES, MULTIPLIED BY SFAC, ARE * NEGLIGIBLE. * * C. L. LAWSON, JPL, 1974 DEC 10 * * .. Parameters .. INTEGER NOUT DOUBLE PRECISION ZERO PARAMETER (NOUT=6, ZERO=0.0D0) * .. Scalar Arguments .. DOUBLE PRECISION SFAC INTEGER LEN * .. Array Arguments .. DOUBLE PRECISION SCOMP(LEN), SSIZE(LEN), STRUE(LEN) * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, N LOGICAL PASS * .. Local Scalars .. DOUBLE PRECISION SD INTEGER I * .. External Functions .. DOUBLE PRECISION SDIFF EXTERNAL SDIFF * .. Intrinsic Functions .. INTRINSIC ABS * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, PASS * .. Executable Statements .. * DO 40 I = 1, LEN SD = SCOMP(I) - STRUE(I) IF (ABS(SFAC*SD) .LE. ABS(SSIZE(I))*EPSILON(ZERO)) + GO TO 40 * * HERE SCOMP(I) IS NOT CLOSE TO STRUE(I). * IF ( .NOT. PASS) GO TO 20 * PRINT FAIL MESSAGE AND HEADER. PASS = .FALSE. WRITE (NOUT,99999) WRITE (NOUT,99998) 20 WRITE (NOUT,99997) ICASE, N, INCX, INCY, I, SCOMP(I), + STRUE(I), SD, SSIZE(I) 40 CONTINUE RETURN * 99999 FORMAT (' FAIL') 99998 FORMAT (/' CASE N INCX INCY I ', + ' COMP(I) TRUE(I) DIFFERENCE', + ' SIZE(I)',/1X) 99997 FORMAT (1X,I4,I3,2I5,I3,2D36.8,2D12.4) END SUBROUTINE TESTDSDOT(SCOMP,STRUE,SSIZE,SFAC) * ********************************* STEST ************************** * * THIS SUBR COMPARES ARRAYS SCOMP() AND STRUE() OF LENGTH LEN TO * SEE IF THE TERM BY TERM DIFFERENCES, MULTIPLIED BY SFAC, ARE * NEGLIGIBLE. * * C. L. LAWSON, JPL, 1974 DEC 10 * * .. Parameters .. INTEGER NOUT REAL ZERO PARAMETER (NOUT=6, ZERO=0.0E0) * .. Scalar Arguments .. REAL SFAC, SCOMP, SSIZE, STRUE * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, N LOGICAL PASS * .. Local Scalars .. REAL SD * .. Intrinsic Functions .. INTRINSIC ABS * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, PASS * .. Executable Statements .. * SD = SCOMP - STRUE IF (ABS(SFAC*SD) .LE. ABS(SSIZE) * EPSILON(ZERO)) + GO TO 40 * * HERE SCOMP(I) IS NOT CLOSE TO STRUE(I). * IF ( .NOT. PASS) GO TO 20 * PRINT FAIL MESSAGE AND HEADER. PASS = .FALSE. WRITE (NOUT,99999) WRITE (NOUT,99998) 20 WRITE (NOUT,99997) ICASE, N, INCX, INCY, SCOMP, + STRUE, SD, SSIZE 40 CONTINUE RETURN * 99999 FORMAT (' FAIL') 99998 FORMAT (/' CASE N INCX INCY ', + ' COMP(I) TRUE(I) DIFFERENCE', + ' SIZE(I)',/1X) 99997 FORMAT (1X,I4,I3,1I5,I3,2E36.8,2E12.4) END SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC) * ************************* STEST1 ***************************** * * THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN * REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE * ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT. * * C.L. LAWSON, JPL, 1978 DEC 6 * * .. Scalar Arguments .. DOUBLE PRECISION SCOMP1, SFAC, STRUE1 * .. Array Arguments .. DOUBLE PRECISION SSIZE(*) * .. Local Arrays .. DOUBLE PRECISION SCOMP(1), STRUE(1) * .. External Subroutines .. EXTERNAL STEST * .. Executable Statements .. * SCOMP(1) = SCOMP1 STRUE(1) = STRUE1 CALL STEST(1,SCOMP,STRUE,SSIZE,SFAC) * RETURN END DOUBLE PRECISION FUNCTION SDIFF(SA,SB) * ********************************* SDIFF ************************** * COMPUTES DIFFERENCE OF TWO NUMBERS. C. L. LAWSON, JPL 1974 FEB 15 * * .. Scalar Arguments .. DOUBLE PRECISION SA, SB * .. Executable Statements .. SDIFF = SA - SB RETURN END SUBROUTINE ITEST1(ICOMP,ITRUE) * ********************************* ITEST1 ************************* * * THIS SUBROUTINE COMPARES THE VARIABLES ICOMP AND ITRUE FOR * EQUALITY. * C. L. LAWSON, JPL, 1974 DEC 10 * * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) * .. Scalar Arguments .. INTEGER ICOMP, ITRUE * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, N LOGICAL PASS * .. Local Scalars .. INTEGER ID * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, PASS * .. Executable Statements .. * IF (ICOMP.EQ.ITRUE) GO TO 40 * * HERE ICOMP IS NOT EQUAL TO ITRUE. * IF ( .NOT. PASS) GO TO 20 * PRINT FAIL MESSAGE AND HEADER. PASS = .FALSE. WRITE (NOUT,99999) WRITE (NOUT,99998) 20 ID = ICOMP - ITRUE WRITE (NOUT,99997) ICASE, N, INCX, INCY, ICOMP, ITRUE, ID 40 CONTINUE RETURN * 99999 FORMAT (' FAIL') 99998 FORMAT (/' CASE N INCX INCY ', + ' COMP TRUE DIFFERENCE', + /1X) 99997 FORMAT (1X,I4,I3,2I5,2I36,I12) END blis-0.6.1/blastest/src/fortran/dblat2.f000066400000000000000000003333171360743507500200750ustar00rootroot00000000000000*> \brief \b DBLAT2 * * =========== DOCUMENTATION =========== * * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * * Definition: * =========== * * PROGRAM DBLAT2 * * *> \par Purpose: * ============= *> *> \verbatim *> *> Test program for the DOUBLE PRECISION Level 2 Blas. *> *> The program must be driven by a short data file. The first 18 records *> of the file are read using list-directed input, the last 16 records *> are read using the format ( A6, L2 ). An annotated example of a data *> file can be obtained by deleting the first 3 characters from the *> following 34 lines: *> 'dblat2.out' NAME OF SUMMARY OUTPUT FILE *> 6 UNIT NUMBER OF SUMMARY FILE *> 'DBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE *> -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) *> F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. *> F LOGICAL FLAG, T TO STOP ON FAILURES. *> T LOGICAL FLAG, T TO TEST ERROR EXITS. *> 16.0 THRESHOLD VALUE OF TEST RATIO *> 6 NUMBER OF VALUES OF N *> 0 1 2 3 5 9 VALUES OF N *> 4 NUMBER OF VALUES OF K *> 0 1 2 4 VALUES OF K *> 4 NUMBER OF VALUES OF INCX AND INCY *> 1 2 -1 -2 VALUES OF INCX AND INCY *> 3 NUMBER OF VALUES OF ALPHA *> 0.0 1.0 0.7 VALUES OF ALPHA *> 3 NUMBER OF VALUES OF BETA *> 0.0 1.0 0.9 VALUES OF BETAC *> DGEMV T PUT F FOR NO TEST. SAME COLUMNS. *> DGBMV T PUT F FOR NO TEST. SAME COLUMNS. *> DSYMV T PUT F FOR NO TEST. SAME COLUMNS. *> DSBMV T PUT F FOR NO TEST. SAME COLUMNS. *> DSPMV T PUT F FOR NO TEST. SAME COLUMNS. *> DTRMV T PUT F FOR NO TEST. SAME COLUMNS. *> DTBMV T PUT F FOR NO TEST. SAME COLUMNS. *> DTPMV T PUT F FOR NO TEST. SAME COLUMNS. *> DTRSV T PUT F FOR NO TEST. SAME COLUMNS. *> DTBSV T PUT F FOR NO TEST. SAME COLUMNS. *> DTPSV T PUT F FOR NO TEST. SAME COLUMNS. *> DGER T PUT F FOR NO TEST. SAME COLUMNS. *> DSYR T PUT F FOR NO TEST. SAME COLUMNS. *> DSPR T PUT F FOR NO TEST. SAME COLUMNS. *> DSYR2 T PUT F FOR NO TEST. SAME COLUMNS. *> DSPR2 T PUT F FOR NO TEST. SAME COLUMNS. *> *> Further Details *> =============== *> *> See: *> *> Dongarra J. J., Du Croz J. J., Hammarling S. and Hanson R. J.. *> An extended set of Fortran Basic Linear Algebra Subprograms. *> *> Technical Memoranda Nos. 41 (revision 3) and 81, Mathematics *> and Computer Science Division, Argonne National Laboratory, *> 9700 South Cass Avenue, Argonne, Illinois 60439, US. *> *> Or *> *> NAG Technical Reports TR3/87 and TR4/87, Numerical Algorithms *> Group Ltd., NAG Central Office, 256 Banbury Road, Oxford *> OX2 7DE, UK, and Numerical Algorithms Group Inc., 1101 31st *> Street, Suite 100, Downers Grove, Illinois 60515-1263, USA. *> *> *> -- Written on 10-August-1987. *> Richard Hanson, Sandia National Labs. *> Jeremy Du Croz, NAG Central Office. *> *> 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers *> can be run multiple times without deleting generated *> output files (susan) *> \endverbatim * * Authors: * ======== * *> \author Univ. of Tennessee *> \author Univ. of California Berkeley *> \author Univ. of Colorado Denver *> \author NAG Ltd. * *> \date April 2012 * *> \ingroup double_blas_testing * * ===================================================================== PROGRAM DBLAT2 * * -- Reference BLAS test routine (version 3.4.1) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * April 2012 * * ===================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 5 ) INTEGER NSUBS PARAMETER ( NSUBS = 16 ) DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) INTEGER NMAX, INCMAX PARAMETER ( NMAX = 65, INCMAX = 2 ) INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX PARAMETER ( NINMAX = 7, NIDMAX = 9, NKBMAX = 7, $ NALMAX = 7, NBEMAX = 7 ) * .. Local Scalars .. DOUBLE PRECISION EPS, ERR, THRESH INTEGER I, ISNUM, J, N, NALF, NBET, NIDIM, NINC, NKB, $ NOUT, NTRA LOGICAL FATAL, LTESTT, REWI, SAME, SFATAL, TRACE, $ TSTERR CHARACTER*1 TRANS CHARACTER*6 SNAMET CHARACTER*32 SNAPS, SUMMRY * .. Local Arrays .. DOUBLE PRECISION A( NMAX, NMAX ), AA( NMAX*NMAX ), $ ALF( NALMAX ), AS( NMAX*NMAX ), BET( NBEMAX ), $ G( NMAX ), X( NMAX ), XS( NMAX*INCMAX ), $ XX( NMAX*INCMAX ), Y( NMAX ), $ YS( NMAX*INCMAX ), YT( NMAX ), $ YY( NMAX*INCMAX ), Z( 2*NMAX ) INTEGER IDIM( NIDMAX ), INC( NINMAX ), KB( NKBMAX ) LOGICAL LTEST( NSUBS ) CHARACTER*6 SNAMES( NSUBS ) * .. External Functions .. DOUBLE PRECISION DDIFF LOGICAL LDE EXTERNAL DDIFF, LDE * .. External Subroutines .. EXTERNAL DCHK1, DCHK2, DCHK3, DCHK4, DCHK5, DCHK6, $ DCHKE, DMVCH * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK CHARACTER*6 SRNAMT * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR COMMON /SRNAMC/SRNAMT * .. Data statements .. DATA SNAMES/'DGEMV ', 'DGBMV ', 'DSYMV ', 'DSBMV ', $ 'DSPMV ', 'DTRMV ', 'DTBMV ', 'DTPMV ', $ 'DTRSV ', 'DTBSV ', 'DTPSV ', 'DGER ', $ 'DSYR ', 'DSPR ', 'DSYR2 ', 'DSPR2 '/ * .. Executable Statements .. * * Read name and unit number for summary output file and open file. * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) NOUTC = NOUT * * Read name and unit number for snapshot output file and open file. * READ( NIN, FMT = * )SNAPS READ( NIN, FMT = * )NTRA TRACE = NTRA.GE.0 IF( TRACE )THEN OPEN( NTRA, FILE = SNAPS, STATUS = 'UNKNOWN' ) END IF * Read the flag that directs rewinding of the snapshot file. READ( NIN, FMT = * )REWI REWI = REWI.AND.TRACE * Read the flag that directs stopping on any failure. READ( NIN, FMT = * )SFATAL * Read the flag that indicates whether error exits are to be tested. READ( NIN, FMT = * )TSTERR * Read the threshold value of the test ratio READ( NIN, FMT = * )THRESH * * Read and check the parameter values for the tests. * * Values of N READ( NIN, FMT = * )NIDIM IF( NIDIM.LT.1.OR.NIDIM.GT.NIDMAX )THEN WRITE( NOUT, FMT = 9997 )'N', NIDMAX GO TO 230 END IF READ( NIN, FMT = * )( IDIM( I ), I = 1, NIDIM ) DO 10 I = 1, NIDIM IF( IDIM( I ).LT.0.OR.IDIM( I ).GT.NMAX )THEN WRITE( NOUT, FMT = 9996 )NMAX GO TO 230 END IF 10 CONTINUE * Values of K READ( NIN, FMT = * )NKB IF( NKB.LT.1.OR.NKB.GT.NKBMAX )THEN WRITE( NOUT, FMT = 9997 )'K', NKBMAX GO TO 230 END IF READ( NIN, FMT = * )( KB( I ), I = 1, NKB ) DO 20 I = 1, NKB IF( KB( I ).LT.0 )THEN WRITE( NOUT, FMT = 9995 ) GO TO 230 END IF 20 CONTINUE * Values of INCX and INCY READ( NIN, FMT = * )NINC IF( NINC.LT.1.OR.NINC.GT.NINMAX )THEN WRITE( NOUT, FMT = 9997 )'INCX AND INCY', NINMAX GO TO 230 END IF READ( NIN, FMT = * )( INC( I ), I = 1, NINC ) DO 30 I = 1, NINC IF( INC( I ).EQ.0.OR.ABS( INC( I ) ).GT.INCMAX )THEN WRITE( NOUT, FMT = 9994 )INCMAX GO TO 230 END IF 30 CONTINUE * Values of ALPHA READ( NIN, FMT = * )NALF IF( NALF.LT.1.OR.NALF.GT.NALMAX )THEN WRITE( NOUT, FMT = 9997 )'ALPHA', NALMAX GO TO 230 END IF READ( NIN, FMT = * )( ALF( I ), I = 1, NALF ) * Values of BETA READ( NIN, FMT = * )NBET IF( NBET.LT.1.OR.NBET.GT.NBEMAX )THEN WRITE( NOUT, FMT = 9997 )'BETA', NBEMAX GO TO 230 END IF READ( NIN, FMT = * )( BET( I ), I = 1, NBET ) * * Report values of parameters. * WRITE( NOUT, FMT = 9993 ) WRITE( NOUT, FMT = 9992 )( IDIM( I ), I = 1, NIDIM ) WRITE( NOUT, FMT = 9991 )( KB( I ), I = 1, NKB ) WRITE( NOUT, FMT = 9990 )( INC( I ), I = 1, NINC ) WRITE( NOUT, FMT = 9989 )( ALF( I ), I = 1, NALF ) WRITE( NOUT, FMT = 9988 )( BET( I ), I = 1, NBET ) IF( .NOT.TSTERR )THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9980 ) END IF WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 )THRESH WRITE( NOUT, FMT = * ) * * Read names of subroutines and flags which indicate * whether they are to be tested. * DO 40 I = 1, NSUBS LTEST( I ) = .FALSE. 40 CONTINUE 50 READ( NIN, FMT = 9984, END = 80 )SNAMET, LTESTT DO 60 I = 1, NSUBS IF( SNAMET.EQ.SNAMES( I ) ) $ GO TO 70 60 CONTINUE WRITE( NOUT, FMT = 9986 )SNAMET STOP 70 LTEST( I ) = LTESTT GO TO 50 * 80 CONTINUE CLOSE ( NIN ) * * Compute EPS (the machine precision). * EPS = EPSILON(ZERO) WRITE( NOUT, FMT = 9998 )EPS * * Check the reliability of DMVCH using exact data. * N = MIN( 32, NMAX ) DO 120 J = 1, N DO 110 I = 1, N A( I, J ) = MAX( I - J + 1, 0 ) 110 CONTINUE X( J ) = J Y( J ) = ZERO 120 CONTINUE DO 130 J = 1, N YY( J ) = J*( ( J + 1 )*J )/2 - ( ( J + 1 )*J*( J - 1 ) )/3 130 CONTINUE * YY holds the exact result. On exit from DMVCH YT holds * the result computed by DMVCH. TRANS = 'N' CALL DMVCH( TRANS, N, N, ONE, A, NMAX, X, 1, ZERO, Y, 1, YT, G, $ YY, EPS, ERR, FATAL, NOUT, .TRUE. ) SAME = LDE( YY, YT, N ) IF( .NOT.SAME.OR.ERR.NE.ZERO )THEN WRITE( NOUT, FMT = 9985 )TRANS, SAME, ERR STOP END IF TRANS = 'T' CALL DMVCH( TRANS, N, N, ONE, A, NMAX, X, -1, ZERO, Y, -1, YT, G, $ YY, EPS, ERR, FATAL, NOUT, .TRUE. ) SAME = LDE( YY, YT, N ) IF( .NOT.SAME.OR.ERR.NE.ZERO )THEN WRITE( NOUT, FMT = 9985 )TRANS, SAME, ERR STOP END IF * * Test each subroutine in turn. * DO 210 ISNUM = 1, NSUBS WRITE( NOUT, FMT = * ) IF( .NOT.LTEST( ISNUM ) )THEN * Subprogram is not to be tested. WRITE( NOUT, FMT = 9983 )SNAMES( ISNUM ) ELSE SRNAMT = SNAMES( ISNUM ) * Test error exits. IF( TSTERR )THEN CALL DCHKE( ISNUM, SNAMES( ISNUM ), NOUT ) WRITE( NOUT, FMT = * ) END IF * Test computations. INFOT = 0 OK = .TRUE. FATAL = .FALSE. GO TO ( 140, 140, 150, 150, 150, 160, 160, $ 160, 160, 160, 160, 170, 180, 180, $ 190, 190 )ISNUM * Test DGEMV, 01, and DGBMV, 02. 140 CALL DCHK1( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NKB, KB, NALF, ALF, $ NBET, BET, NINC, INC, NMAX, INCMAX, A, AA, AS, $ X, XX, XS, Y, YY, YS, YT, G ) GO TO 200 * Test DSYMV, 03, DSBMV, 04, and DSPMV, 05. 150 CALL DCHK2( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NKB, KB, NALF, ALF, $ NBET, BET, NINC, INC, NMAX, INCMAX, A, AA, AS, $ X, XX, XS, Y, YY, YS, YT, G ) GO TO 200 * Test DTRMV, 06, DTBMV, 07, DTPMV, 08, * DTRSV, 09, DTBSV, 10, and DTPSV, 11. 160 CALL DCHK3( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NKB, KB, NINC, INC, $ NMAX, INCMAX, A, AA, AS, Y, YY, YS, YT, G, Z ) GO TO 200 * Test DGER, 12. 170 CALL DCHK4( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NINC, INC, $ NMAX, INCMAX, A, AA, AS, X, XX, XS, Y, YY, YS, $ YT, G, Z ) GO TO 200 * Test DSYR, 13, and DSPR, 14. 180 CALL DCHK5( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NINC, INC, $ NMAX, INCMAX, A, AA, AS, X, XX, XS, Y, YY, YS, $ YT, G, Z ) GO TO 200 * Test DSYR2, 15, and DSPR2, 16. 190 CALL DCHK6( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NINC, INC, $ NMAX, INCMAX, A, AA, AS, X, XX, XS, Y, YY, YS, $ YT, G, Z ) * 200 IF( FATAL.AND.SFATAL ) $ GO TO 220 END IF 210 CONTINUE WRITE( NOUT, FMT = 9982 ) GO TO 240 * 220 CONTINUE WRITE( NOUT, FMT = 9981 ) GO TO 240 * 230 CONTINUE WRITE( NOUT, FMT = 9987 ) * 240 CONTINUE IF( TRACE ) $ CLOSE ( NTRA ) CLOSE ( NOUT ) STOP * 9999 FORMAT( ' ROUTINES PASS COMPUTATIONAL TESTS IF TEST RATIO IS LES', $ 'S THAN', F8.2 ) 9998 FORMAT( ' RELATIVE MACHINE PRECISION IS TAKEN TO BE', 1P, D9.1 ) 9997 FORMAT( ' NUMBER OF VALUES OF ', A, ' IS LESS THAN 1 OR GREATER ', $ 'THAN ', I2 ) 9996 FORMAT( ' VALUE OF N IS LESS THAN 0 OR GREATER THAN ', I2 ) 9995 FORMAT( ' VALUE OF K IS LESS THAN 0' ) 9994 FORMAT( ' ABSOLUTE VALUE OF INCX OR INCY IS 0 OR GREATER THAN ', $ I2 ) 9993 FORMAT( ' TESTS OF THE DOUBLE PRECISION LEVEL 2 BLAS', //' THE F', $ 'OLLOWING PARAMETER VALUES WILL BE USED:' ) 9992 FORMAT( ' FOR N ', 9I6 ) 9991 FORMAT( ' FOR K ', 7I6 ) 9990 FORMAT( ' FOR INCX AND INCY ', 7I6 ) 9989 FORMAT( ' FOR ALPHA ', 7F6.1 ) 9988 FORMAT( ' FOR BETA ', 7F6.1 ) 9987 FORMAT( ' AMEND DATA FILE OR INCREASE ARRAY SIZES IN PROGRAM', $ /' ******* TESTS ABANDONED *******' ) 9986 FORMAT( ' SUBPROGRAM NAME ', A6, ' NOT RECOGNIZED', /' ******* T', $ 'ESTS ABANDONED *******' ) 9985 FORMAT( ' ERROR IN DMVCH - IN-LINE DOT PRODUCTS ARE BEING EVALU', $ 'ATED WRONGLY.', /' DMVCH WAS CALLED WITH TRANS = ', A1, $ ' AND RETURNED SAME = ', L1, ' AND ERR = ', F12.3, '.', / $ ' THIS MAY BE DUE TO FAULTS IN THE ARITHMETIC OR THE COMPILER.' $ , /' ******* TESTS ABANDONED *******' ) 9984 FORMAT( A6, L2 ) 9983 FORMAT( 1X, A6, ' WAS NOT TESTED' ) 9982 FORMAT( /' END OF TESTS' ) 9981 FORMAT( /' ******* FATAL ERROR - TESTS ABANDONED *******' ) 9980 FORMAT( ' ERROR-EXITS WILL NOT BE TESTED' ) * * End of DBLAT2. * END SUBROUTINE DCHK1( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NKB, KB, NALF, ALF, NBET, $ BET, NINC, INC, NMAX, INCMAX, A, AA, AS, X, XX, $ XS, Y, YY, YS, YT, G ) * * Tests DGEMV and DGBMV. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. DOUBLE PRECISION ZERO, HALF PARAMETER ( ZERO = 0.0D0, HALF = 0.5D0 ) * .. Scalar Arguments .. DOUBLE PRECISION EPS, THRESH INTEGER INCMAX, NALF, NBET, NIDIM, NINC, NKB, NMAX, $ NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. DOUBLE PRECISION A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), BET( NBET ), G( NMAX ), $ X( NMAX ), XS( NMAX*INCMAX ), $ XX( NMAX*INCMAX ), Y( NMAX ), $ YS( NMAX*INCMAX ), YT( NMAX ), $ YY( NMAX*INCMAX ) INTEGER IDIM( NIDIM ), INC( NINC ), KB( NKB ) * .. Local Scalars .. DOUBLE PRECISION ALPHA, ALS, BETA, BLS, ERR, ERRMAX, TRANSL INTEGER I, IA, IB, IC, IKU, IM, IN, INCX, INCXS, INCY, $ INCYS, IX, IY, KL, KLS, KU, KUS, LAA, LDA, $ LDAS, LX, LY, M, ML, MS, N, NARGS, NC, ND, NK, $ NL, NS LOGICAL BANDED, FULL, NULL, RESET, SAME, TRAN CHARACTER*1 TRANS, TRANSS CHARACTER*3 ICH * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LDE, LDERES EXTERNAL LDE, LDERES * .. External Subroutines .. EXTERNAL DGBMV, DGEMV, DMAKE, DMVCH * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICH/'NTC'/ * .. Executable Statements .. FULL = SNAME( 3: 3 ).EQ.'E' BANDED = SNAME( 3: 3 ).EQ.'B' * Define the number of arguments. IF( FULL )THEN NARGS = 11 ELSE IF( BANDED )THEN NARGS = 13 END IF * NC = 0 RESET = .TRUE. ERRMAX = ZERO * DO 120 IN = 1, NIDIM N = IDIM( IN ) ND = N/2 + 1 * DO 110 IM = 1, 2 IF( IM.EQ.1 ) $ M = MAX( N - ND, 0 ) IF( IM.EQ.2 ) $ M = MIN( N + ND, NMAX ) * IF( BANDED )THEN NK = NKB ELSE NK = 1 END IF DO 100 IKU = 1, NK IF( BANDED )THEN KU = KB( IKU ) KL = MAX( KU - 1, 0 ) ELSE KU = N - 1 KL = M - 1 END IF * Set LDA to 1 more than minimum value if room. IF( BANDED )THEN LDA = KL + KU + 1 ELSE LDA = M END IF IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 100 LAA = LDA*N NULL = N.LE.0.OR.M.LE.0 * * Generate the matrix A. * TRANSL = ZERO CALL DMAKE( SNAME( 2: 3 ), ' ', ' ', M, N, A, NMAX, AA, $ LDA, KL, KU, RESET, TRANSL ) * DO 90 IC = 1, 3 TRANS = ICH( IC: IC ) TRAN = TRANS.EQ.'T'.OR.TRANS.EQ.'C' * IF( TRAN )THEN ML = N NL = M ELSE ML = M NL = N END IF * DO 80 IX = 1, NINC INCX = INC( IX ) LX = ABS( INCX )*NL * * Generate the vector X. * TRANSL = HALF CALL DMAKE( 'GE', ' ', ' ', 1, NL, X, 1, XX, $ ABS( INCX ), 0, NL - 1, RESET, TRANSL ) IF( NL.GT.1 )THEN X( NL/2 ) = ZERO XX( 1 + ABS( INCX )*( NL/2 - 1 ) ) = ZERO END IF * DO 70 IY = 1, NINC INCY = INC( IY ) LY = ABS( INCY )*ML * DO 60 IA = 1, NALF ALPHA = ALF( IA ) * DO 50 IB = 1, NBET BETA = BET( IB ) * * Generate the vector Y. * TRANSL = ZERO CALL DMAKE( 'GE', ' ', ' ', 1, ML, Y, 1, $ YY, ABS( INCY ), 0, ML - 1, $ RESET, TRANSL ) * NC = NC + 1 * * Save every datum before calling the * subroutine. * TRANSS = TRANS MS = M NS = N KLS = KL KUS = KU ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LX XS( I ) = XX( I ) 20 CONTINUE INCXS = INCX BLS = BETA DO 30 I = 1, LY YS( I ) = YY( I ) 30 CONTINUE INCYS = INCY * * Call the subroutine. * IF( FULL )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, $ TRANS, M, N, ALPHA, LDA, INCX, BETA, $ INCY IF( REWI ) $ REWIND NTRA CALL DGEMV( TRANS, M, N, ALPHA, AA, $ LDA, XX, INCX, BETA, YY, $ INCY ) ELSE IF( BANDED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ TRANS, M, N, KL, KU, ALPHA, LDA, $ INCX, BETA, INCY IF( REWI ) $ REWIND NTRA CALL DGBMV( TRANS, M, N, KL, KU, ALPHA, $ AA, LDA, XX, INCX, BETA, $ YY, INCY ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9993 ) FATAL = .TRUE. GO TO 130 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = TRANS.EQ.TRANSS ISAME( 2 ) = MS.EQ.M ISAME( 3 ) = NS.EQ.N IF( FULL )THEN ISAME( 4 ) = ALS.EQ.ALPHA ISAME( 5 ) = LDE( AS, AA, LAA ) ISAME( 6 ) = LDAS.EQ.LDA ISAME( 7 ) = LDE( XS, XX, LX ) ISAME( 8 ) = INCXS.EQ.INCX ISAME( 9 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 10 ) = LDE( YS, YY, LY ) ELSE ISAME( 10 ) = LDERES( 'GE', ' ', 1, $ ML, YS, YY, $ ABS( INCY ) ) END IF ISAME( 11 ) = INCYS.EQ.INCY ELSE IF( BANDED )THEN ISAME( 4 ) = KLS.EQ.KL ISAME( 5 ) = KUS.EQ.KU ISAME( 6 ) = ALS.EQ.ALPHA ISAME( 7 ) = LDE( AS, AA, LAA ) ISAME( 8 ) = LDAS.EQ.LDA ISAME( 9 ) = LDE( XS, XX, LX ) ISAME( 10 ) = INCXS.EQ.INCX ISAME( 11 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 12 ) = LDE( YS, YY, LY ) ELSE ISAME( 12 ) = LDERES( 'GE', ' ', 1, $ ML, YS, YY, $ ABS( INCY ) ) END IF ISAME( 13 ) = INCYS.EQ.INCY END IF * * If data was incorrectly changed, report * and return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 130 END IF * IF( .NOT.NULL )THEN * * Check the result. * CALL DMVCH( TRANS, M, N, ALPHA, A, $ NMAX, X, INCX, BETA, Y, $ INCY, YT, G, YY, EPS, ERR, $ FATAL, NOUT, .TRUE. ) ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 130 ELSE * Avoid repeating tests with M.le.0 or * N.le.0. GO TO 110 END IF * 50 CONTINUE * 60 CONTINUE * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * 120 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 140 * 130 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME IF( FULL )THEN WRITE( NOUT, FMT = 9994 )NC, SNAME, TRANS, M, N, ALPHA, LDA, $ INCX, BETA, INCY ELSE IF( BANDED )THEN WRITE( NOUT, FMT = 9995 )NC, SNAME, TRANS, M, N, KL, KU, $ ALPHA, LDA, INCX, BETA, INCY END IF * 140 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', 4( I3, ',' ), F4.1, $ ', A,', I3, ', X,', I2, ',', F4.1, ', Y,', I2, ') .' ) 9994 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', 2( I3, ',' ), F4.1, $ ', A,', I3, ', X,', I2, ',', F4.1, ', Y,', I2, $ ') .' ) 9993 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of DCHK1. * END SUBROUTINE DCHK2( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NKB, KB, NALF, ALF, NBET, $ BET, NINC, INC, NMAX, INCMAX, A, AA, AS, X, XX, $ XS, Y, YY, YS, YT, G ) * * Tests DSYMV, DSBMV and DSPMV. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. DOUBLE PRECISION ZERO, HALF PARAMETER ( ZERO = 0.0D0, HALF = 0.5D0 ) * .. Scalar Arguments .. DOUBLE PRECISION EPS, THRESH INTEGER INCMAX, NALF, NBET, NIDIM, NINC, NKB, NMAX, $ NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. DOUBLE PRECISION A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), BET( NBET ), G( NMAX ), $ X( NMAX ), XS( NMAX*INCMAX ), $ XX( NMAX*INCMAX ), Y( NMAX ), $ YS( NMAX*INCMAX ), YT( NMAX ), $ YY( NMAX*INCMAX ) INTEGER IDIM( NIDIM ), INC( NINC ), KB( NKB ) * .. Local Scalars .. DOUBLE PRECISION ALPHA, ALS, BETA, BLS, ERR, ERRMAX, TRANSL INTEGER I, IA, IB, IC, IK, IN, INCX, INCXS, INCY, $ INCYS, IX, IY, K, KS, LAA, LDA, LDAS, LX, LY, $ N, NARGS, NC, NK, NS LOGICAL BANDED, FULL, NULL, PACKED, RESET, SAME CHARACTER*1 UPLO, UPLOS CHARACTER*2 ICH * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LDE, LDERES EXTERNAL LDE, LDERES * .. External Subroutines .. EXTERNAL DMAKE, DMVCH, DSBMV, DSPMV, DSYMV * .. Intrinsic Functions .. INTRINSIC ABS, MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICH/'UL'/ * .. Executable Statements .. FULL = SNAME( 3: 3 ).EQ.'Y' BANDED = SNAME( 3: 3 ).EQ.'B' PACKED = SNAME( 3: 3 ).EQ.'P' * Define the number of arguments. IF( FULL )THEN NARGS = 10 ELSE IF( BANDED )THEN NARGS = 11 ELSE IF( PACKED )THEN NARGS = 9 END IF * NC = 0 RESET = .TRUE. ERRMAX = ZERO * DO 110 IN = 1, NIDIM N = IDIM( IN ) * IF( BANDED )THEN NK = NKB ELSE NK = 1 END IF DO 100 IK = 1, NK IF( BANDED )THEN K = KB( IK ) ELSE K = N - 1 END IF * Set LDA to 1 more than minimum value if room. IF( BANDED )THEN LDA = K + 1 ELSE LDA = N END IF IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 100 IF( PACKED )THEN LAA = ( N*( N + 1 ) )/2 ELSE LAA = LDA*N END IF NULL = N.LE.0 * DO 90 IC = 1, 2 UPLO = ICH( IC: IC ) * * Generate the matrix A. * TRANSL = ZERO CALL DMAKE( SNAME( 2: 3 ), UPLO, ' ', N, N, A, NMAX, AA, $ LDA, K, K, RESET, TRANSL ) * DO 80 IX = 1, NINC INCX = INC( IX ) LX = ABS( INCX )*N * * Generate the vector X. * TRANSL = HALF CALL DMAKE( 'GE', ' ', ' ', 1, N, X, 1, XX, $ ABS( INCX ), 0, N - 1, RESET, TRANSL ) IF( N.GT.1 )THEN X( N/2 ) = ZERO XX( 1 + ABS( INCX )*( N/2 - 1 ) ) = ZERO END IF * DO 70 IY = 1, NINC INCY = INC( IY ) LY = ABS( INCY )*N * DO 60 IA = 1, NALF ALPHA = ALF( IA ) * DO 50 IB = 1, NBET BETA = BET( IB ) * * Generate the vector Y. * TRANSL = ZERO CALL DMAKE( 'GE', ' ', ' ', 1, N, Y, 1, YY, $ ABS( INCY ), 0, N - 1, RESET, $ TRANSL ) * NC = NC + 1 * * Save every datum before calling the * subroutine. * UPLOS = UPLO NS = N KS = K ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LX XS( I ) = XX( I ) 20 CONTINUE INCXS = INCX BLS = BETA DO 30 I = 1, LY YS( I ) = YY( I ) 30 CONTINUE INCYS = INCY * * Call the subroutine. * IF( FULL )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9993 )NC, SNAME, $ UPLO, N, ALPHA, LDA, INCX, BETA, INCY IF( REWI ) $ REWIND NTRA CALL DSYMV( UPLO, N, ALPHA, AA, LDA, XX, $ INCX, BETA, YY, INCY ) ELSE IF( BANDED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, $ UPLO, N, K, ALPHA, LDA, INCX, BETA, $ INCY IF( REWI ) $ REWIND NTRA CALL DSBMV( UPLO, N, K, ALPHA, AA, LDA, $ XX, INCX, BETA, YY, INCY ) ELSE IF( PACKED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ UPLO, N, ALPHA, INCX, BETA, INCY IF( REWI ) $ REWIND NTRA CALL DSPMV( UPLO, N, ALPHA, AA, XX, INCX, $ BETA, YY, INCY ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9992 ) FATAL = .TRUE. GO TO 120 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = UPLO.EQ.UPLOS ISAME( 2 ) = NS.EQ.N IF( FULL )THEN ISAME( 3 ) = ALS.EQ.ALPHA ISAME( 4 ) = LDE( AS, AA, LAA ) ISAME( 5 ) = LDAS.EQ.LDA ISAME( 6 ) = LDE( XS, XX, LX ) ISAME( 7 ) = INCXS.EQ.INCX ISAME( 8 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 9 ) = LDE( YS, YY, LY ) ELSE ISAME( 9 ) = LDERES( 'GE', ' ', 1, N, $ YS, YY, ABS( INCY ) ) END IF ISAME( 10 ) = INCYS.EQ.INCY ELSE IF( BANDED )THEN ISAME( 3 ) = KS.EQ.K ISAME( 4 ) = ALS.EQ.ALPHA ISAME( 5 ) = LDE( AS, AA, LAA ) ISAME( 6 ) = LDAS.EQ.LDA ISAME( 7 ) = LDE( XS, XX, LX ) ISAME( 8 ) = INCXS.EQ.INCX ISAME( 9 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 10 ) = LDE( YS, YY, LY ) ELSE ISAME( 10 ) = LDERES( 'GE', ' ', 1, N, $ YS, YY, ABS( INCY ) ) END IF ISAME( 11 ) = INCYS.EQ.INCY ELSE IF( PACKED )THEN ISAME( 3 ) = ALS.EQ.ALPHA ISAME( 4 ) = LDE( AS, AA, LAA ) ISAME( 5 ) = LDE( XS, XX, LX ) ISAME( 6 ) = INCXS.EQ.INCX ISAME( 7 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 8 ) = LDE( YS, YY, LY ) ELSE ISAME( 8 ) = LDERES( 'GE', ' ', 1, N, $ YS, YY, ABS( INCY ) ) END IF ISAME( 9 ) = INCYS.EQ.INCY END IF * * If data was incorrectly changed, report and * return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 120 END IF * IF( .NOT.NULL )THEN * * Check the result. * CALL DMVCH( 'N', N, N, ALPHA, A, NMAX, X, $ INCX, BETA, Y, INCY, YT, G, $ YY, EPS, ERR, FATAL, NOUT, $ .TRUE. ) ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 120 ELSE * Avoid repeating tests with N.le.0 GO TO 110 END IF * 50 CONTINUE * 60 CONTINUE * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 130 * 120 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME IF( FULL )THEN WRITE( NOUT, FMT = 9993 )NC, SNAME, UPLO, N, ALPHA, LDA, INCX, $ BETA, INCY ELSE IF( BANDED )THEN WRITE( NOUT, FMT = 9994 )NC, SNAME, UPLO, N, K, ALPHA, LDA, $ INCX, BETA, INCY ELSE IF( PACKED )THEN WRITE( NOUT, FMT = 9995 )NC, SNAME, UPLO, N, ALPHA, INCX, $ BETA, INCY END IF * 130 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', I3, ',', F4.1, ', AP', $ ', X,', I2, ',', F4.1, ', Y,', I2, ') .' ) 9994 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', 2( I3, ',' ), F4.1, $ ', A,', I3, ', X,', I2, ',', F4.1, ', Y,', I2, $ ') .' ) 9993 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', I3, ',', F4.1, ', A,', $ I3, ', X,', I2, ',', F4.1, ', Y,', I2, ') .' ) 9992 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of DCHK2. * END SUBROUTINE DCHK3( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NKB, KB, NINC, INC, NMAX, $ INCMAX, A, AA, AS, X, XX, XS, XT, G, Z ) * * Tests DTRMV, DTBMV, DTPMV, DTRSV, DTBSV and DTPSV. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. DOUBLE PRECISION ZERO, HALF, ONE PARAMETER ( ZERO = 0.0D0, HALF = 0.5D0, ONE = 1.0D0 ) * .. Scalar Arguments .. DOUBLE PRECISION EPS, THRESH INTEGER INCMAX, NIDIM, NINC, NKB, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. DOUBLE PRECISION A( NMAX, NMAX ), AA( NMAX*NMAX ), $ AS( NMAX*NMAX ), G( NMAX ), X( NMAX ), $ XS( NMAX*INCMAX ), XT( NMAX ), $ XX( NMAX*INCMAX ), Z( NMAX ) INTEGER IDIM( NIDIM ), INC( NINC ), KB( NKB ) * .. Local Scalars .. DOUBLE PRECISION ERR, ERRMAX, TRANSL INTEGER I, ICD, ICT, ICU, IK, IN, INCX, INCXS, IX, K, $ KS, LAA, LDA, LDAS, LX, N, NARGS, NC, NK, NS LOGICAL BANDED, FULL, NULL, PACKED, RESET, SAME CHARACTER*1 DIAG, DIAGS, TRANS, TRANSS, UPLO, UPLOS CHARACTER*2 ICHD, ICHU CHARACTER*3 ICHT * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LDE, LDERES EXTERNAL LDE, LDERES * .. External Subroutines .. EXTERNAL DMAKE, DMVCH, DTBMV, DTBSV, DTPMV, DTPSV, $ DTRMV, DTRSV * .. Intrinsic Functions .. INTRINSIC ABS, MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICHU/'UL'/, ICHT/'NTC'/, ICHD/'UN'/ * .. Executable Statements .. FULL = SNAME( 3: 3 ).EQ.'R' BANDED = SNAME( 3: 3 ).EQ.'B' PACKED = SNAME( 3: 3 ).EQ.'P' * Define the number of arguments. IF( FULL )THEN NARGS = 8 ELSE IF( BANDED )THEN NARGS = 9 ELSE IF( PACKED )THEN NARGS = 7 END IF * NC = 0 RESET = .TRUE. ERRMAX = ZERO * Set up zero vector for DMVCH. DO 10 I = 1, NMAX Z( I ) = ZERO 10 CONTINUE * DO 110 IN = 1, NIDIM N = IDIM( IN ) * IF( BANDED )THEN NK = NKB ELSE NK = 1 END IF DO 100 IK = 1, NK IF( BANDED )THEN K = KB( IK ) ELSE K = N - 1 END IF * Set LDA to 1 more than minimum value if room. IF( BANDED )THEN LDA = K + 1 ELSE LDA = N END IF IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 100 IF( PACKED )THEN LAA = ( N*( N + 1 ) )/2 ELSE LAA = LDA*N END IF NULL = N.LE.0 * DO 90 ICU = 1, 2 UPLO = ICHU( ICU: ICU ) * DO 80 ICT = 1, 3 TRANS = ICHT( ICT: ICT ) * DO 70 ICD = 1, 2 DIAG = ICHD( ICD: ICD ) * * Generate the matrix A. * TRANSL = ZERO CALL DMAKE( SNAME( 2: 3 ), UPLO, DIAG, N, N, A, $ NMAX, AA, LDA, K, K, RESET, TRANSL ) * DO 60 IX = 1, NINC INCX = INC( IX ) LX = ABS( INCX )*N * * Generate the vector X. * TRANSL = HALF CALL DMAKE( 'GE', ' ', ' ', 1, N, X, 1, XX, $ ABS( INCX ), 0, N - 1, RESET, $ TRANSL ) IF( N.GT.1 )THEN X( N/2 ) = ZERO XX( 1 + ABS( INCX )*( N/2 - 1 ) ) = ZERO END IF * NC = NC + 1 * * Save every datum before calling the subroutine. * UPLOS = UPLO TRANSS = TRANS DIAGS = DIAG NS = N KS = K DO 20 I = 1, LAA AS( I ) = AA( I ) 20 CONTINUE LDAS = LDA DO 30 I = 1, LX XS( I ) = XX( I ) 30 CONTINUE INCXS = INCX * * Call the subroutine. * IF( SNAME( 4: 5 ).EQ.'MV' )THEN IF( FULL )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9993 )NC, SNAME, $ UPLO, TRANS, DIAG, N, LDA, INCX IF( REWI ) $ REWIND NTRA CALL DTRMV( UPLO, TRANS, DIAG, N, AA, LDA, $ XX, INCX ) ELSE IF( BANDED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, $ UPLO, TRANS, DIAG, N, K, LDA, INCX IF( REWI ) $ REWIND NTRA CALL DTBMV( UPLO, TRANS, DIAG, N, K, AA, $ LDA, XX, INCX ) ELSE IF( PACKED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ UPLO, TRANS, DIAG, N, INCX IF( REWI ) $ REWIND NTRA CALL DTPMV( UPLO, TRANS, DIAG, N, AA, XX, $ INCX ) END IF ELSE IF( SNAME( 4: 5 ).EQ.'SV' )THEN IF( FULL )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9993 )NC, SNAME, $ UPLO, TRANS, DIAG, N, LDA, INCX IF( REWI ) $ REWIND NTRA CALL DTRSV( UPLO, TRANS, DIAG, N, AA, LDA, $ XX, INCX ) ELSE IF( BANDED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, $ UPLO, TRANS, DIAG, N, K, LDA, INCX IF( REWI ) $ REWIND NTRA CALL DTBSV( UPLO, TRANS, DIAG, N, K, AA, $ LDA, XX, INCX ) ELSE IF( PACKED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ UPLO, TRANS, DIAG, N, INCX IF( REWI ) $ REWIND NTRA CALL DTPSV( UPLO, TRANS, DIAG, N, AA, XX, $ INCX ) END IF END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9992 ) FATAL = .TRUE. GO TO 120 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = UPLO.EQ.UPLOS ISAME( 2 ) = TRANS.EQ.TRANSS ISAME( 3 ) = DIAG.EQ.DIAGS ISAME( 4 ) = NS.EQ.N IF( FULL )THEN ISAME( 5 ) = LDE( AS, AA, LAA ) ISAME( 6 ) = LDAS.EQ.LDA IF( NULL )THEN ISAME( 7 ) = LDE( XS, XX, LX ) ELSE ISAME( 7 ) = LDERES( 'GE', ' ', 1, N, XS, $ XX, ABS( INCX ) ) END IF ISAME( 8 ) = INCXS.EQ.INCX ELSE IF( BANDED )THEN ISAME( 5 ) = KS.EQ.K ISAME( 6 ) = LDE( AS, AA, LAA ) ISAME( 7 ) = LDAS.EQ.LDA IF( NULL )THEN ISAME( 8 ) = LDE( XS, XX, LX ) ELSE ISAME( 8 ) = LDERES( 'GE', ' ', 1, N, XS, $ XX, ABS( INCX ) ) END IF ISAME( 9 ) = INCXS.EQ.INCX ELSE IF( PACKED )THEN ISAME( 5 ) = LDE( AS, AA, LAA ) IF( NULL )THEN ISAME( 6 ) = LDE( XS, XX, LX ) ELSE ISAME( 6 ) = LDERES( 'GE', ' ', 1, N, XS, $ XX, ABS( INCX ) ) END IF ISAME( 7 ) = INCXS.EQ.INCX END IF * * If data was incorrectly changed, report and * return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 120 END IF * IF( .NOT.NULL )THEN IF( SNAME( 4: 5 ).EQ.'MV' )THEN * * Check the result. * CALL DMVCH( TRANS, N, N, ONE, A, NMAX, X, $ INCX, ZERO, Z, INCX, XT, G, $ XX, EPS, ERR, FATAL, NOUT, $ .TRUE. ) ELSE IF( SNAME( 4: 5 ).EQ.'SV' )THEN * * Compute approximation to original vector. * DO 50 I = 1, N Z( I ) = XX( 1 + ( I - 1 )* $ ABS( INCX ) ) XX( 1 + ( I - 1 )*ABS( INCX ) ) $ = X( I ) 50 CONTINUE CALL DMVCH( TRANS, N, N, ONE, A, NMAX, Z, $ INCX, ZERO, X, INCX, XT, G, $ XX, EPS, ERR, FATAL, NOUT, $ .FALSE. ) END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and return. IF( FATAL ) $ GO TO 120 ELSE * Avoid repeating tests with N.le.0. GO TO 110 END IF * 60 CONTINUE * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 130 * 120 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME IF( FULL )THEN WRITE( NOUT, FMT = 9993 )NC, SNAME, UPLO, TRANS, DIAG, N, LDA, $ INCX ELSE IF( BANDED )THEN WRITE( NOUT, FMT = 9994 )NC, SNAME, UPLO, TRANS, DIAG, N, K, $ LDA, INCX ELSE IF( PACKED )THEN WRITE( NOUT, FMT = 9995 )NC, SNAME, UPLO, TRANS, DIAG, N, INCX END IF * 130 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ', A6, '(', 3( '''', A1, ''',' ), I3, ', AP, ', $ 'X,', I2, ') .' ) 9994 FORMAT( 1X, I6, ': ', A6, '(', 3( '''', A1, ''',' ), 2( I3, ',' ), $ ' A,', I3, ', X,', I2, ') .' ) 9993 FORMAT( 1X, I6, ': ', A6, '(', 3( '''', A1, ''',' ), I3, ', A,', $ I3, ', X,', I2, ') .' ) 9992 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of DCHK3. * END SUBROUTINE DCHK4( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NINC, INC, NMAX, $ INCMAX, A, AA, AS, X, XX, XS, Y, YY, YS, YT, G, $ Z ) * * Tests DGER. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. DOUBLE PRECISION ZERO, HALF, ONE PARAMETER ( ZERO = 0.0D0, HALF = 0.5D0, ONE = 1.0D0 ) * .. Scalar Arguments .. DOUBLE PRECISION EPS, THRESH INTEGER INCMAX, NALF, NIDIM, NINC, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. DOUBLE PRECISION A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), G( NMAX ), X( NMAX ), $ XS( NMAX*INCMAX ), XX( NMAX*INCMAX ), $ Y( NMAX ), YS( NMAX*INCMAX ), YT( NMAX ), $ YY( NMAX*INCMAX ), Z( NMAX ) INTEGER IDIM( NIDIM ), INC( NINC ) * .. Local Scalars .. DOUBLE PRECISION ALPHA, ALS, ERR, ERRMAX, TRANSL INTEGER I, IA, IM, IN, INCX, INCXS, INCY, INCYS, IX, $ IY, J, LAA, LDA, LDAS, LX, LY, M, MS, N, NARGS, $ NC, ND, NS LOGICAL NULL, RESET, SAME * .. Local Arrays .. DOUBLE PRECISION W( 1 ) LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LDE, LDERES EXTERNAL LDE, LDERES * .. External Subroutines .. EXTERNAL DGER, DMAKE, DMVCH * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Executable Statements .. * Define the number of arguments. NARGS = 9 * NC = 0 RESET = .TRUE. ERRMAX = ZERO * DO 120 IN = 1, NIDIM N = IDIM( IN ) ND = N/2 + 1 * DO 110 IM = 1, 2 IF( IM.EQ.1 ) $ M = MAX( N - ND, 0 ) IF( IM.EQ.2 ) $ M = MIN( N + ND, NMAX ) * * Set LDA to 1 more than minimum value if room. LDA = M IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 110 LAA = LDA*N NULL = N.LE.0.OR.M.LE.0 * DO 100 IX = 1, NINC INCX = INC( IX ) LX = ABS( INCX )*M * * Generate the vector X. * TRANSL = HALF CALL DMAKE( 'GE', ' ', ' ', 1, M, X, 1, XX, ABS( INCX ), $ 0, M - 1, RESET, TRANSL ) IF( M.GT.1 )THEN X( M/2 ) = ZERO XX( 1 + ABS( INCX )*( M/2 - 1 ) ) = ZERO END IF * DO 90 IY = 1, NINC INCY = INC( IY ) LY = ABS( INCY )*N * * Generate the vector Y. * TRANSL = ZERO CALL DMAKE( 'GE', ' ', ' ', 1, N, Y, 1, YY, $ ABS( INCY ), 0, N - 1, RESET, TRANSL ) IF( N.GT.1 )THEN Y( N/2 ) = ZERO YY( 1 + ABS( INCY )*( N/2 - 1 ) ) = ZERO END IF * DO 80 IA = 1, NALF ALPHA = ALF( IA ) * * Generate the matrix A. * TRANSL = ZERO CALL DMAKE( SNAME( 2: 3 ), ' ', ' ', M, N, A, NMAX, $ AA, LDA, M - 1, N - 1, RESET, TRANSL ) * NC = NC + 1 * * Save every datum before calling the subroutine. * MS = M NS = N ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LX XS( I ) = XX( I ) 20 CONTINUE INCXS = INCX DO 30 I = 1, LY YS( I ) = YY( I ) 30 CONTINUE INCYS = INCY * * Call the subroutine. * IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, M, N, $ ALPHA, INCX, INCY, LDA IF( REWI ) $ REWIND NTRA CALL DGER( M, N, ALPHA, XX, INCX, YY, INCY, AA, $ LDA ) * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9993 ) FATAL = .TRUE. GO TO 140 END IF * * See what data changed inside subroutine. * ISAME( 1 ) = MS.EQ.M ISAME( 2 ) = NS.EQ.N ISAME( 3 ) = ALS.EQ.ALPHA ISAME( 4 ) = LDE( XS, XX, LX ) ISAME( 5 ) = INCXS.EQ.INCX ISAME( 6 ) = LDE( YS, YY, LY ) ISAME( 7 ) = INCYS.EQ.INCY IF( NULL )THEN ISAME( 8 ) = LDE( AS, AA, LAA ) ELSE ISAME( 8 ) = LDERES( 'GE', ' ', M, N, AS, AA, $ LDA ) END IF ISAME( 9 ) = LDAS.EQ.LDA * * If data was incorrectly changed, report and return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 140 END IF * IF( .NOT.NULL )THEN * * Check the result column by column. * IF( INCX.GT.0 )THEN DO 50 I = 1, M Z( I ) = X( I ) 50 CONTINUE ELSE DO 60 I = 1, M Z( I ) = X( M - I + 1 ) 60 CONTINUE END IF DO 70 J = 1, N IF( INCY.GT.0 )THEN W( 1 ) = Y( J ) ELSE W( 1 ) = Y( N - J + 1 ) END IF CALL DMVCH( 'N', M, 1, ALPHA, Z, NMAX, W, 1, $ ONE, A( 1, J ), 1, YT, G, $ AA( 1 + ( J - 1 )*LDA ), EPS, $ ERR, FATAL, NOUT, .TRUE. ) ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and return. IF( FATAL ) $ GO TO 130 70 CONTINUE ELSE * Avoid repeating tests with M.le.0 or N.le.0. GO TO 110 END IF * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * 120 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 150 * 130 CONTINUE WRITE( NOUT, FMT = 9995 )J * 140 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME WRITE( NOUT, FMT = 9994 )NC, SNAME, M, N, ALPHA, INCX, INCY, LDA * 150 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) 9994 FORMAT( 1X, I6, ': ', A6, '(', 2( I3, ',' ), F4.1, ', X,', I2, $ ', Y,', I2, ', A,', I3, ') .' ) 9993 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of DCHK4. * END SUBROUTINE DCHK5( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NINC, INC, NMAX, $ INCMAX, A, AA, AS, X, XX, XS, Y, YY, YS, YT, G, $ Z ) * * Tests DSYR and DSPR. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. DOUBLE PRECISION ZERO, HALF, ONE PARAMETER ( ZERO = 0.0D0, HALF = 0.5D0, ONE = 1.0D0 ) * .. Scalar Arguments .. DOUBLE PRECISION EPS, THRESH INTEGER INCMAX, NALF, NIDIM, NINC, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. DOUBLE PRECISION A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), G( NMAX ), X( NMAX ), $ XS( NMAX*INCMAX ), XX( NMAX*INCMAX ), $ Y( NMAX ), YS( NMAX*INCMAX ), YT( NMAX ), $ YY( NMAX*INCMAX ), Z( NMAX ) INTEGER IDIM( NIDIM ), INC( NINC ) * .. Local Scalars .. DOUBLE PRECISION ALPHA, ALS, ERR, ERRMAX, TRANSL INTEGER I, IA, IC, IN, INCX, INCXS, IX, J, JA, JJ, LAA, $ LDA, LDAS, LJ, LX, N, NARGS, NC, NS LOGICAL FULL, NULL, PACKED, RESET, SAME, UPPER CHARACTER*1 UPLO, UPLOS CHARACTER*2 ICH * .. Local Arrays .. DOUBLE PRECISION W( 1 ) LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LDE, LDERES EXTERNAL LDE, LDERES * .. External Subroutines .. EXTERNAL DMAKE, DMVCH, DSPR, DSYR * .. Intrinsic Functions .. INTRINSIC ABS, MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICH/'UL'/ * .. Executable Statements .. FULL = SNAME( 3: 3 ).EQ.'Y' PACKED = SNAME( 3: 3 ).EQ.'P' * Define the number of arguments. IF( FULL )THEN NARGS = 7 ELSE IF( PACKED )THEN NARGS = 6 END IF * NC = 0 RESET = .TRUE. ERRMAX = ZERO * DO 100 IN = 1, NIDIM N = IDIM( IN ) * Set LDA to 1 more than minimum value if room. LDA = N IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 100 IF( PACKED )THEN LAA = ( N*( N + 1 ) )/2 ELSE LAA = LDA*N END IF * DO 90 IC = 1, 2 UPLO = ICH( IC: IC ) UPPER = UPLO.EQ.'U' * DO 80 IX = 1, NINC INCX = INC( IX ) LX = ABS( INCX )*N * * Generate the vector X. * TRANSL = HALF CALL DMAKE( 'GE', ' ', ' ', 1, N, X, 1, XX, ABS( INCX ), $ 0, N - 1, RESET, TRANSL ) IF( N.GT.1 )THEN X( N/2 ) = ZERO XX( 1 + ABS( INCX )*( N/2 - 1 ) ) = ZERO END IF * DO 70 IA = 1, NALF ALPHA = ALF( IA ) NULL = N.LE.0.OR.ALPHA.EQ.ZERO * * Generate the matrix A. * TRANSL = ZERO CALL DMAKE( SNAME( 2: 3 ), UPLO, ' ', N, N, A, NMAX, $ AA, LDA, N - 1, N - 1, RESET, TRANSL ) * NC = NC + 1 * * Save every datum before calling the subroutine. * UPLOS = UPLO NS = N ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LX XS( I ) = XX( I ) 20 CONTINUE INCXS = INCX * * Call the subroutine. * IF( FULL )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9993 )NC, SNAME, UPLO, N, $ ALPHA, INCX, LDA IF( REWI ) $ REWIND NTRA CALL DSYR( UPLO, N, ALPHA, XX, INCX, AA, LDA ) ELSE IF( PACKED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, UPLO, N, $ ALPHA, INCX IF( REWI ) $ REWIND NTRA CALL DSPR( UPLO, N, ALPHA, XX, INCX, AA ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9992 ) FATAL = .TRUE. GO TO 120 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = UPLO.EQ.UPLOS ISAME( 2 ) = NS.EQ.N ISAME( 3 ) = ALS.EQ.ALPHA ISAME( 4 ) = LDE( XS, XX, LX ) ISAME( 5 ) = INCXS.EQ.INCX IF( NULL )THEN ISAME( 6 ) = LDE( AS, AA, LAA ) ELSE ISAME( 6 ) = LDERES( SNAME( 2: 3 ), UPLO, N, N, AS, $ AA, LDA ) END IF IF( .NOT.PACKED )THEN ISAME( 7 ) = LDAS.EQ.LDA END IF * * If data was incorrectly changed, report and return. * SAME = .TRUE. DO 30 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 30 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 120 END IF * IF( .NOT.NULL )THEN * * Check the result column by column. * IF( INCX.GT.0 )THEN DO 40 I = 1, N Z( I ) = X( I ) 40 CONTINUE ELSE DO 50 I = 1, N Z( I ) = X( N - I + 1 ) 50 CONTINUE END IF JA = 1 DO 60 J = 1, N W( 1 ) = Z( J ) IF( UPPER )THEN JJ = 1 LJ = J ELSE JJ = J LJ = N - J + 1 END IF CALL DMVCH( 'N', LJ, 1, ALPHA, Z( JJ ), LJ, W, $ 1, ONE, A( JJ, J ), 1, YT, G, $ AA( JA ), EPS, ERR, FATAL, NOUT, $ .TRUE. ) IF( FULL )THEN IF( UPPER )THEN JA = JA + LDA ELSE JA = JA + LDA + 1 END IF ELSE JA = JA + LJ END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and return. IF( FATAL ) $ GO TO 110 60 CONTINUE ELSE * Avoid repeating tests if N.le.0. IF( N.LE.0 ) $ GO TO 100 END IF * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 130 * 110 CONTINUE WRITE( NOUT, FMT = 9995 )J * 120 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME IF( FULL )THEN WRITE( NOUT, FMT = 9993 )NC, SNAME, UPLO, N, ALPHA, INCX, LDA ELSE IF( PACKED )THEN WRITE( NOUT, FMT = 9994 )NC, SNAME, UPLO, N, ALPHA, INCX END IF * 130 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) 9994 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', I3, ',', F4.1, ', X,', $ I2, ', AP) .' ) 9993 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', I3, ',', F4.1, ', X,', $ I2, ', A,', I3, ') .' ) 9992 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of DCHK5. * END SUBROUTINE DCHK6( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NINC, INC, NMAX, $ INCMAX, A, AA, AS, X, XX, XS, Y, YY, YS, YT, G, $ Z ) * * Tests DSYR2 and DSPR2. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. DOUBLE PRECISION ZERO, HALF, ONE PARAMETER ( ZERO = 0.0D0, HALF = 0.5D0, ONE = 1.0D0 ) * .. Scalar Arguments .. DOUBLE PRECISION EPS, THRESH INTEGER INCMAX, NALF, NIDIM, NINC, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. DOUBLE PRECISION A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), G( NMAX ), X( NMAX ), $ XS( NMAX*INCMAX ), XX( NMAX*INCMAX ), $ Y( NMAX ), YS( NMAX*INCMAX ), YT( NMAX ), $ YY( NMAX*INCMAX ), Z( NMAX, 2 ) INTEGER IDIM( NIDIM ), INC( NINC ) * .. Local Scalars .. DOUBLE PRECISION ALPHA, ALS, ERR, ERRMAX, TRANSL INTEGER I, IA, IC, IN, INCX, INCXS, INCY, INCYS, IX, $ IY, J, JA, JJ, LAA, LDA, LDAS, LJ, LX, LY, N, $ NARGS, NC, NS LOGICAL FULL, NULL, PACKED, RESET, SAME, UPPER CHARACTER*1 UPLO, UPLOS CHARACTER*2 ICH * .. Local Arrays .. DOUBLE PRECISION W( 2 ) LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LDE, LDERES EXTERNAL LDE, LDERES * .. External Subroutines .. EXTERNAL DMAKE, DMVCH, DSPR2, DSYR2 * .. Intrinsic Functions .. INTRINSIC ABS, MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICH/'UL'/ * .. Executable Statements .. FULL = SNAME( 3: 3 ).EQ.'Y' PACKED = SNAME( 3: 3 ).EQ.'P' * Define the number of arguments. IF( FULL )THEN NARGS = 9 ELSE IF( PACKED )THEN NARGS = 8 END IF * NC = 0 RESET = .TRUE. ERRMAX = ZERO * DO 140 IN = 1, NIDIM N = IDIM( IN ) * Set LDA to 1 more than minimum value if room. LDA = N IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 140 IF( PACKED )THEN LAA = ( N*( N + 1 ) )/2 ELSE LAA = LDA*N END IF * DO 130 IC = 1, 2 UPLO = ICH( IC: IC ) UPPER = UPLO.EQ.'U' * DO 120 IX = 1, NINC INCX = INC( IX ) LX = ABS( INCX )*N * * Generate the vector X. * TRANSL = HALF CALL DMAKE( 'GE', ' ', ' ', 1, N, X, 1, XX, ABS( INCX ), $ 0, N - 1, RESET, TRANSL ) IF( N.GT.1 )THEN X( N/2 ) = ZERO XX( 1 + ABS( INCX )*( N/2 - 1 ) ) = ZERO END IF * DO 110 IY = 1, NINC INCY = INC( IY ) LY = ABS( INCY )*N * * Generate the vector Y. * TRANSL = ZERO CALL DMAKE( 'GE', ' ', ' ', 1, N, Y, 1, YY, $ ABS( INCY ), 0, N - 1, RESET, TRANSL ) IF( N.GT.1 )THEN Y( N/2 ) = ZERO YY( 1 + ABS( INCY )*( N/2 - 1 ) ) = ZERO END IF * DO 100 IA = 1, NALF ALPHA = ALF( IA ) NULL = N.LE.0.OR.ALPHA.EQ.ZERO * * Generate the matrix A. * TRANSL = ZERO CALL DMAKE( SNAME( 2: 3 ), UPLO, ' ', N, N, A, $ NMAX, AA, LDA, N - 1, N - 1, RESET, $ TRANSL ) * NC = NC + 1 * * Save every datum before calling the subroutine. * UPLOS = UPLO NS = N ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LX XS( I ) = XX( I ) 20 CONTINUE INCXS = INCX DO 30 I = 1, LY YS( I ) = YY( I ) 30 CONTINUE INCYS = INCY * * Call the subroutine. * IF( FULL )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9993 )NC, SNAME, UPLO, N, $ ALPHA, INCX, INCY, LDA IF( REWI ) $ REWIND NTRA CALL DSYR2( UPLO, N, ALPHA, XX, INCX, YY, INCY, $ AA, LDA ) ELSE IF( PACKED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, UPLO, N, $ ALPHA, INCX, INCY IF( REWI ) $ REWIND NTRA CALL DSPR2( UPLO, N, ALPHA, XX, INCX, YY, INCY, $ AA ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9992 ) FATAL = .TRUE. GO TO 160 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = UPLO.EQ.UPLOS ISAME( 2 ) = NS.EQ.N ISAME( 3 ) = ALS.EQ.ALPHA ISAME( 4 ) = LDE( XS, XX, LX ) ISAME( 5 ) = INCXS.EQ.INCX ISAME( 6 ) = LDE( YS, YY, LY ) ISAME( 7 ) = INCYS.EQ.INCY IF( NULL )THEN ISAME( 8 ) = LDE( AS, AA, LAA ) ELSE ISAME( 8 ) = LDERES( SNAME( 2: 3 ), UPLO, N, N, $ AS, AA, LDA ) END IF IF( .NOT.PACKED )THEN ISAME( 9 ) = LDAS.EQ.LDA END IF * * If data was incorrectly changed, report and return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 160 END IF * IF( .NOT.NULL )THEN * * Check the result column by column. * IF( INCX.GT.0 )THEN DO 50 I = 1, N Z( I, 1 ) = X( I ) 50 CONTINUE ELSE DO 60 I = 1, N Z( I, 1 ) = X( N - I + 1 ) 60 CONTINUE END IF IF( INCY.GT.0 )THEN DO 70 I = 1, N Z( I, 2 ) = Y( I ) 70 CONTINUE ELSE DO 80 I = 1, N Z( I, 2 ) = Y( N - I + 1 ) 80 CONTINUE END IF JA = 1 DO 90 J = 1, N W( 1 ) = Z( J, 2 ) W( 2 ) = Z( J, 1 ) IF( UPPER )THEN JJ = 1 LJ = J ELSE JJ = J LJ = N - J + 1 END IF CALL DMVCH( 'N', LJ, 2, ALPHA, Z( JJ, 1 ), $ NMAX, W, 1, ONE, A( JJ, J ), 1, $ YT, G, AA( JA ), EPS, ERR, FATAL, $ NOUT, .TRUE. ) IF( FULL )THEN IF( UPPER )THEN JA = JA + LDA ELSE JA = JA + LDA + 1 END IF ELSE JA = JA + LJ END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and return. IF( FATAL ) $ GO TO 150 90 CONTINUE ELSE * Avoid repeating tests with N.le.0. IF( N.LE.0 ) $ GO TO 140 END IF * 100 CONTINUE * 110 CONTINUE * 120 CONTINUE * 130 CONTINUE * 140 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 170 * 150 CONTINUE WRITE( NOUT, FMT = 9995 )J * 160 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME IF( FULL )THEN WRITE( NOUT, FMT = 9993 )NC, SNAME, UPLO, N, ALPHA, INCX, $ INCY, LDA ELSE IF( PACKED )THEN WRITE( NOUT, FMT = 9994 )NC, SNAME, UPLO, N, ALPHA, INCX, INCY END IF * 170 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) 9994 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', I3, ',', F4.1, ', X,', $ I2, ', Y,', I2, ', AP) .' ) 9993 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', I3, ',', F4.1, ', X,', $ I2, ', Y,', I2, ', A,', I3, ') .' ) 9992 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of DCHK6. * END SUBROUTINE DCHKE( ISNUM, SRNAMT, NOUT ) * * Tests the error exits from the Level 2 Blas. * Requires a special version of the error-handling routine XERBLA. * ALPHA, BETA, A, X and Y should not need to be defined. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Scalar Arguments .. INTEGER ISNUM, NOUT CHARACTER*6 SRNAMT * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Local Scalars .. DOUBLE PRECISION ALPHA, BETA * .. Local Arrays .. DOUBLE PRECISION A( 1, 1 ), X( 1 ), Y( 1 ) * .. External Subroutines .. EXTERNAL CHKXER, DGBMV, DGEMV, DGER, DSBMV, DSPMV, DSPR, $ DSPR2, DSYMV, DSYR, DSYR2, DTBMV, DTBSV, DTPMV, $ DTPSV, DTRMV, DTRSV * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Executable Statements .. * OK is set to .FALSE. by the special version of XERBLA or by CHKXER * if anything is wrong. OK = .TRUE. * LERR is set to .TRUE. by the special version of XERBLA each time * it is called, and is then tested and re-set by CHKXER. LERR = .FALSE. GO TO ( 10, 20, 30, 40, 50, 60, 70, 80, $ 90, 100, 110, 120, 130, 140, 150, $ 160 )ISNUM 10 INFOT = 1 CALL DGEMV( '/', 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL DGEMV( 'N', -1, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DGEMV( 'N', 0, -1, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL DGEMV( 'N', 2, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL DGEMV( 'N', 0, 0, ALPHA, A, 1, X, 0, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL DGEMV( 'N', 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 20 INFOT = 1 CALL DGBMV( '/', 0, 0, 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL DGBMV( 'N', -1, 0, 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DGBMV( 'N', 0, -1, 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL DGBMV( 'N', 0, 0, -1, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DGBMV( 'N', 2, 0, 0, -1, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL DGBMV( 'N', 0, 0, 1, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL DGBMV( 'N', 0, 0, 0, 0, ALPHA, A, 1, X, 0, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL DGBMV( 'N', 0, 0, 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 30 INFOT = 1 CALL DSYMV( '/', 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL DSYMV( 'U', -1, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DSYMV( 'U', 2, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL DSYMV( 'U', 0, ALPHA, A, 1, X, 0, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL DSYMV( 'U', 0, ALPHA, A, 1, X, 1, BETA, Y, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 40 INFOT = 1 CALL DSBMV( '/', 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL DSBMV( 'U', -1, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DSBMV( 'U', 0, -1, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL DSBMV( 'U', 0, 1, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL DSBMV( 'U', 0, 0, ALPHA, A, 1, X, 0, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL DSBMV( 'U', 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 50 INFOT = 1 CALL DSPMV( '/', 0, ALPHA, A, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL DSPMV( 'U', -1, ALPHA, A, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL DSPMV( 'U', 0, ALPHA, A, X, 0, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DSPMV( 'U', 0, ALPHA, A, X, 1, BETA, Y, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 60 INFOT = 1 CALL DTRMV( '/', 'N', 'N', 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL DTRMV( 'U', '/', 'N', 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DTRMV( 'U', 'N', '/', 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL DTRMV( 'U', 'N', 'N', -1, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL DTRMV( 'U', 'N', 'N', 2, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL DTRMV( 'U', 'N', 'N', 0, A, 1, X, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 70 INFOT = 1 CALL DTBMV( '/', 'N', 'N', 0, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL DTBMV( 'U', '/', 'N', 0, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DTBMV( 'U', 'N', '/', 0, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL DTBMV( 'U', 'N', 'N', -1, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DTBMV( 'U', 'N', 'N', 0, -1, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL DTBMV( 'U', 'N', 'N', 0, 1, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DTBMV( 'U', 'N', 'N', 0, 0, A, 1, X, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 80 INFOT = 1 CALL DTPMV( '/', 'N', 'N', 0, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL DTPMV( 'U', '/', 'N', 0, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DTPMV( 'U', 'N', '/', 0, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL DTPMV( 'U', 'N', 'N', -1, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL DTPMV( 'U', 'N', 'N', 0, A, X, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 90 INFOT = 1 CALL DTRSV( '/', 'N', 'N', 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL DTRSV( 'U', '/', 'N', 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DTRSV( 'U', 'N', '/', 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL DTRSV( 'U', 'N', 'N', -1, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL DTRSV( 'U', 'N', 'N', 2, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL DTRSV( 'U', 'N', 'N', 0, A, 1, X, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 100 INFOT = 1 CALL DTBSV( '/', 'N', 'N', 0, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL DTBSV( 'U', '/', 'N', 0, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DTBSV( 'U', 'N', '/', 0, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL DTBSV( 'U', 'N', 'N', -1, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DTBSV( 'U', 'N', 'N', 0, -1, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL DTBSV( 'U', 'N', 'N', 0, 1, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DTBSV( 'U', 'N', 'N', 0, 0, A, 1, X, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 110 INFOT = 1 CALL DTPSV( '/', 'N', 'N', 0, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL DTPSV( 'U', '/', 'N', 0, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DTPSV( 'U', 'N', '/', 0, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL DTPSV( 'U', 'N', 'N', -1, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL DTPSV( 'U', 'N', 'N', 0, A, X, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 120 INFOT = 1 CALL DGER( -1, 0, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL DGER( 0, -1, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DGER( 0, 0, ALPHA, X, 0, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL DGER( 0, 0, ALPHA, X, 1, Y, 0, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DGER( 2, 0, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 130 INFOT = 1 CALL DSYR( '/', 0, ALPHA, X, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL DSYR( 'U', -1, ALPHA, X, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DSYR( 'U', 0, ALPHA, X, 0, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL DSYR( 'U', 2, ALPHA, X, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 140 INFOT = 1 CALL DSPR( '/', 0, ALPHA, X, 1, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL DSPR( 'U', -1, ALPHA, X, 1, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DSPR( 'U', 0, ALPHA, X, 0, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 150 INFOT = 1 CALL DSYR2( '/', 0, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL DSYR2( 'U', -1, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DSYR2( 'U', 0, ALPHA, X, 0, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL DSYR2( 'U', 0, ALPHA, X, 1, Y, 0, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DSYR2( 'U', 2, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 160 INFOT = 1 CALL DSPR2( '/', 0, ALPHA, X, 1, Y, 1, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL DSPR2( 'U', -1, ALPHA, X, 1, Y, 1, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DSPR2( 'U', 0, ALPHA, X, 0, Y, 1, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL DSPR2( 'U', 0, ALPHA, X, 1, Y, 0, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) * 170 IF( OK )THEN WRITE( NOUT, FMT = 9999 )SRNAMT ELSE WRITE( NOUT, FMT = 9998 )SRNAMT END IF RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE TESTS OF ERROR-EXITS' ) 9998 FORMAT( ' ******* ', A6, ' FAILED THE TESTS OF ERROR-EXITS *****', $ '**' ) * * End of DCHKE. * END SUBROUTINE DMAKE( TYPE, UPLO, DIAG, M, N, A, NMAX, AA, LDA, KL, $ KU, RESET, TRANSL ) * * Generates values for an M by N matrix A within the bandwidth * defined by KL and KU. * Stores the values in the array AA in the data structure required * by the routine, with unwanted elements set to rogue value. * * TYPE is 'GE', 'GB', 'SY', 'SB', 'SP', 'TR', 'TB' OR 'TP'. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) DOUBLE PRECISION ROGUE PARAMETER ( ROGUE = -1.0D10 ) * .. Scalar Arguments .. DOUBLE PRECISION TRANSL INTEGER KL, KU, LDA, M, N, NMAX LOGICAL RESET CHARACTER*1 DIAG, UPLO CHARACTER*2 TYPE * .. Array Arguments .. DOUBLE PRECISION A( NMAX, * ), AA( * ) * .. Local Scalars .. INTEGER I, I1, I2, I3, IBEG, IEND, IOFF, J, KK LOGICAL GEN, LOWER, SYM, TRI, UNIT, UPPER * .. External Functions .. DOUBLE PRECISION DBEG EXTERNAL DBEG * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. Executable Statements .. GEN = TYPE( 1: 1 ).EQ.'G' SYM = TYPE( 1: 1 ).EQ.'S' TRI = TYPE( 1: 1 ).EQ.'T' UPPER = ( SYM.OR.TRI ).AND.UPLO.EQ.'U' LOWER = ( SYM.OR.TRI ).AND.UPLO.EQ.'L' UNIT = TRI.AND.DIAG.EQ.'U' * * Generate data in array A. * DO 20 J = 1, N DO 10 I = 1, M IF( GEN.OR.( UPPER.AND.I.LE.J ).OR.( LOWER.AND.I.GE.J ) ) $ THEN IF( ( I.LE.J.AND.J - I.LE.KU ).OR. $ ( I.GE.J.AND.I - J.LE.KL ) )THEN A( I, J ) = DBEG( RESET ) + TRANSL ELSE A( I, J ) = ZERO END IF IF( I.NE.J )THEN IF( SYM )THEN A( J, I ) = A( I, J ) ELSE IF( TRI )THEN A( J, I ) = ZERO END IF END IF END IF 10 CONTINUE IF( TRI ) $ A( J, J ) = A( J, J ) + ONE IF( UNIT ) $ A( J, J ) = ONE 20 CONTINUE * * Store elements in array AS in data structure required by routine. * IF( TYPE.EQ.'GE' )THEN DO 50 J = 1, N DO 30 I = 1, M AA( I + ( J - 1 )*LDA ) = A( I, J ) 30 CONTINUE DO 40 I = M + 1, LDA AA( I + ( J - 1 )*LDA ) = ROGUE 40 CONTINUE 50 CONTINUE ELSE IF( TYPE.EQ.'GB' )THEN DO 90 J = 1, N DO 60 I1 = 1, KU + 1 - J AA( I1 + ( J - 1 )*LDA ) = ROGUE 60 CONTINUE DO 70 I2 = I1, MIN( KL + KU + 1, KU + 1 + M - J ) AA( I2 + ( J - 1 )*LDA ) = A( I2 + J - KU - 1, J ) 70 CONTINUE DO 80 I3 = I2, LDA AA( I3 + ( J - 1 )*LDA ) = ROGUE 80 CONTINUE 90 CONTINUE ELSE IF( TYPE.EQ.'SY'.OR.TYPE.EQ.'TR' )THEN DO 130 J = 1, N IF( UPPER )THEN IBEG = 1 IF( UNIT )THEN IEND = J - 1 ELSE IEND = J END IF ELSE IF( UNIT )THEN IBEG = J + 1 ELSE IBEG = J END IF IEND = N END IF DO 100 I = 1, IBEG - 1 AA( I + ( J - 1 )*LDA ) = ROGUE 100 CONTINUE DO 110 I = IBEG, IEND AA( I + ( J - 1 )*LDA ) = A( I, J ) 110 CONTINUE DO 120 I = IEND + 1, LDA AA( I + ( J - 1 )*LDA ) = ROGUE 120 CONTINUE 130 CONTINUE ELSE IF( TYPE.EQ.'SB'.OR.TYPE.EQ.'TB' )THEN DO 170 J = 1, N IF( UPPER )THEN KK = KL + 1 IBEG = MAX( 1, KL + 2 - J ) IF( UNIT )THEN IEND = KL ELSE IEND = KL + 1 END IF ELSE KK = 1 IF( UNIT )THEN IBEG = 2 ELSE IBEG = 1 END IF IEND = MIN( KL + 1, 1 + M - J ) END IF DO 140 I = 1, IBEG - 1 AA( I + ( J - 1 )*LDA ) = ROGUE 140 CONTINUE DO 150 I = IBEG, IEND AA( I + ( J - 1 )*LDA ) = A( I + J - KK, J ) 150 CONTINUE DO 160 I = IEND + 1, LDA AA( I + ( J - 1 )*LDA ) = ROGUE 160 CONTINUE 170 CONTINUE ELSE IF( TYPE.EQ.'SP'.OR.TYPE.EQ.'TP' )THEN IOFF = 0 DO 190 J = 1, N IF( UPPER )THEN IBEG = 1 IEND = J ELSE IBEG = J IEND = N END IF DO 180 I = IBEG, IEND IOFF = IOFF + 1 AA( IOFF ) = A( I, J ) IF( I.EQ.J )THEN IF( UNIT ) $ AA( IOFF ) = ROGUE END IF 180 CONTINUE 190 CONTINUE END IF RETURN * * End of DMAKE. * END SUBROUTINE DMVCH( TRANS, M, N, ALPHA, A, NMAX, X, INCX, BETA, Y, $ INCY, YT, G, YY, EPS, ERR, FATAL, NOUT, MV ) * * Checks the results of the computational tests. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) * .. Scalar Arguments .. DOUBLE PRECISION ALPHA, BETA, EPS, ERR INTEGER INCX, INCY, M, N, NMAX, NOUT LOGICAL FATAL, MV CHARACTER*1 TRANS * .. Array Arguments .. DOUBLE PRECISION A( NMAX, * ), G( * ), X( * ), Y( * ), YT( * ), $ YY( * ) * .. Local Scalars .. DOUBLE PRECISION ERRI INTEGER I, INCXL, INCYL, IY, J, JX, KX, KY, ML, NL LOGICAL TRAN * .. Intrinsic Functions .. INTRINSIC ABS, MAX, SQRT * .. Executable Statements .. TRAN = TRANS.EQ.'T'.OR.TRANS.EQ.'C' IF( TRAN )THEN ML = N NL = M ELSE ML = M NL = N END IF IF( INCX.LT.0 )THEN KX = NL INCXL = -1 ELSE KX = 1 INCXL = 1 END IF IF( INCY.LT.0 )THEN KY = ML INCYL = -1 ELSE KY = 1 INCYL = 1 END IF * * Compute expected result in YT using data in A, X and Y. * Compute gauges in G. * IY = KY DO 30 I = 1, ML YT( IY ) = ZERO G( IY ) = ZERO JX = KX IF( TRAN )THEN DO 10 J = 1, NL YT( IY ) = YT( IY ) + A( J, I )*X( JX ) G( IY ) = G( IY ) + ABS( A( J, I )*X( JX ) ) JX = JX + INCXL 10 CONTINUE ELSE DO 20 J = 1, NL YT( IY ) = YT( IY ) + A( I, J )*X( JX ) G( IY ) = G( IY ) + ABS( A( I, J )*X( JX ) ) JX = JX + INCXL 20 CONTINUE END IF YT( IY ) = ALPHA*YT( IY ) + BETA*Y( IY ) G( IY ) = ABS( ALPHA )*G( IY ) + ABS( BETA*Y( IY ) ) IY = IY + INCYL 30 CONTINUE * * Compute the error ratio for this result. * ERR = ZERO DO 40 I = 1, ML ERRI = ABS( YT( I ) - YY( 1 + ( I - 1 )*ABS( INCY ) ) )/EPS IF( G( I ).NE.ZERO ) $ ERRI = ERRI/G( I ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.ONE ) $ GO TO 50 40 CONTINUE * If the loop completes, all results are at least half accurate. GO TO 70 * * Report fatal error. * 50 FATAL = .TRUE. WRITE( NOUT, FMT = 9999 ) DO 60 I = 1, ML IF( MV )THEN WRITE( NOUT, FMT = 9998 )I, YT( I ), $ YY( 1 + ( I - 1 )*ABS( INCY ) ) ELSE WRITE( NOUT, FMT = 9998 )I, $ YY( 1 + ( I - 1 )*ABS( INCY ) ), YT( I ) END IF 60 CONTINUE * 70 CONTINUE RETURN * 9999 FORMAT( ' ******* FATAL ERROR - COMPUTED RESULT IS LESS THAN HAL', $ 'F ACCURATE *******', /' EXPECTED RESULT COMPU', $ 'TED RESULT' ) 9998 FORMAT( 1X, I7, 2G18.6 ) * * End of DMVCH. * END LOGICAL FUNCTION LDE( RI, RJ, LR ) * * Tests if two arrays are identical. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Scalar Arguments .. INTEGER LR * .. Array Arguments .. DOUBLE PRECISION RI( * ), RJ( * ) * .. Local Scalars .. INTEGER I * .. Executable Statements .. DO 10 I = 1, LR IF( RI( I ).NE.RJ( I ) ) $ GO TO 20 10 CONTINUE LDE = .TRUE. GO TO 30 20 CONTINUE LDE = .FALSE. 30 RETURN * * End of LDE. * END LOGICAL FUNCTION LDERES( TYPE, UPLO, M, N, AA, AS, LDA ) * * Tests if selected elements in two arrays are equal. * * TYPE is 'GE', 'SY' or 'SP'. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Scalar Arguments .. INTEGER LDA, M, N CHARACTER*1 UPLO CHARACTER*2 TYPE * .. Array Arguments .. DOUBLE PRECISION AA( LDA, * ), AS( LDA, * ) * .. Local Scalars .. INTEGER I, IBEG, IEND, J LOGICAL UPPER * .. Executable Statements .. UPPER = UPLO.EQ.'U' IF( TYPE.EQ.'GE' )THEN DO 20 J = 1, N DO 10 I = M + 1, LDA IF( AA( I, J ).NE.AS( I, J ) ) $ GO TO 70 10 CONTINUE 20 CONTINUE ELSE IF( TYPE.EQ.'SY' )THEN DO 50 J = 1, N IF( UPPER )THEN IBEG = 1 IEND = J ELSE IBEG = J IEND = N END IF DO 30 I = 1, IBEG - 1 IF( AA( I, J ).NE.AS( I, J ) ) $ GO TO 70 30 CONTINUE DO 40 I = IEND + 1, LDA IF( AA( I, J ).NE.AS( I, J ) ) $ GO TO 70 40 CONTINUE 50 CONTINUE END IF * LDERES = .TRUE. GO TO 80 70 CONTINUE LDERES = .FALSE. 80 RETURN * * End of LDERES. * END DOUBLE PRECISION FUNCTION DBEG( RESET ) * * Generates random numbers uniformly distributed between -0.5 and 0.5. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Scalar Arguments .. LOGICAL RESET * .. Local Scalars .. INTEGER I, IC, MI * .. Save statement .. SAVE I, IC, MI * .. Intrinsic Functions .. INTRINSIC DBLE * .. Executable Statements .. IF( RESET )THEN * Initialize local variables. MI = 891 I = 7 IC = 0 RESET = .FALSE. END IF * * The sequence of values of I is bounded between 1 and 999. * If initial I = 1,2,3,6,7 or 9, the period will be 50. * If initial I = 4 or 8, the period will be 25. * If initial I = 5, the period will be 10. * IC is used to break up the period by skipping 1 value of I in 6. * IC = IC + 1 10 I = I*MI I = I - 1000*( I/1000 ) IF( IC.GE.5 )THEN IC = 0 GO TO 10 END IF DBEG = DBLE( I - 500 )/1001.0D0 RETURN * * End of DBEG. * END DOUBLE PRECISION FUNCTION DDIFF( X, Y ) * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * * .. Scalar Arguments .. DOUBLE PRECISION X, Y * .. Executable Statements .. DDIFF = X - Y RETURN * * End of DDIFF. * END SUBROUTINE CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) * * Tests whether XERBLA has detected an error when it should. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Scalar Arguments .. INTEGER INFOT, NOUT LOGICAL LERR, OK CHARACTER*6 SRNAMT * .. Executable Statements .. IF( .NOT.LERR )THEN WRITE( NOUT, FMT = 9999 )INFOT, SRNAMT OK = .FALSE. END IF LERR = .FALSE. RETURN * 9999 FORMAT( ' ***** ILLEGAL VALUE OF PARAMETER NUMBER ', I2, ' NOT D', $ 'ETECTED BY ', A6, ' *****' ) * * End of CHKXER. * END SUBROUTINE XERBLA( SRNAME, INFO ) * * This is a special version of XERBLA to be used only as part of * the test program for testing error exits from the Level 2 BLAS * routines. * * XERBLA is an error handler for the Level 2 BLAS routines. * * It is called by the Level 2 BLAS routines if an input parameter is * invalid. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Scalar Arguments .. INTEGER INFO CHARACTER*6 SRNAME * .. Scalars in Common .. INTEGER INFOT, NOUT LOGICAL LERR, OK CHARACTER*6 SRNAMT * .. Common blocks .. COMMON /INFOC/INFOT, NOUT, OK, LERR COMMON /SRNAMC/SRNAMT * .. Executable Statements .. LERR = .TRUE. IF( INFO.NE.INFOT )THEN IF( INFOT.NE.0 )THEN WRITE( NOUT, FMT = 9999 )INFO, INFOT ELSE WRITE( NOUT, FMT = 9997 )INFO END IF OK = .FALSE. END IF IF( SRNAME.NE.SRNAMT )THEN WRITE( NOUT, FMT = 9998 )SRNAME, SRNAMT OK = .FALSE. END IF RETURN * 9999 FORMAT( ' ******* XERBLA WAS CALLED WITH INFO = ', I6, ' INSTEAD', $ ' OF ', I2, ' *******' ) 9998 FORMAT( ' ******* XERBLA WAS CALLED WITH SRNAME = ', A6, ' INSTE', $ 'AD OF ', A6, ' *******' ) 9997 FORMAT( ' ******* XERBLA WAS CALLED WITH INFO = ', I6, $ ' *******' ) * * End of XERBLA * END blis-0.6.1/blastest/src/fortran/dblat3.f000066400000000000000000003135061360743507500200740ustar00rootroot00000000000000*> \brief \b DBLAT3 * * =========== DOCUMENTATION =========== * * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * * Definition: * =========== * * PROGRAM DBLAT3 * * *> \par Purpose: * ============= *> *> \verbatim *> *> Test program for the DOUBLE PRECISION Level 3 Blas. *> *> The program must be driven by a short data file. The first 14 records *> of the file are read using list-directed input, the last 6 records *> are read using the format ( A6, L2 ). An annotated example of a data *> file can be obtained by deleting the first 3 characters from the *> following 20 lines: *> 'dblat3.out' NAME OF SUMMARY OUTPUT FILE *> 6 UNIT NUMBER OF SUMMARY FILE *> 'DBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE *> -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) *> F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. *> F LOGICAL FLAG, T TO STOP ON FAILURES. *> T LOGICAL FLAG, T TO TEST ERROR EXITS. *> 16.0 THRESHOLD VALUE OF TEST RATIO *> 6 NUMBER OF VALUES OF N *> 0 1 2 3 5 9 VALUES OF N *> 3 NUMBER OF VALUES OF ALPHA *> 0.0 1.0 0.7 VALUES OF ALPHA *> 3 NUMBER OF VALUES OF BETA *> 0.0 1.0 1.3 VALUES OF BETA *> DGEMM T PUT F FOR NO TEST. SAME COLUMNS. *> DSYMM T PUT F FOR NO TEST. SAME COLUMNS. *> DTRMM T PUT F FOR NO TEST. SAME COLUMNS. *> DTRSM T PUT F FOR NO TEST. SAME COLUMNS. *> DSYRK T PUT F FOR NO TEST. SAME COLUMNS. *> DSYR2K T PUT F FOR NO TEST. SAME COLUMNS. *> *> Further Details *> =============== *> *> See: *> *> Dongarra J. J., Du Croz J. J., Duff I. S. and Hammarling S. *> A Set of Level 3 Basic Linear Algebra Subprograms. *> *> Technical Memorandum No.88 (Revision 1), Mathematics and *> Computer Science Division, Argonne National Laboratory, 9700 *> South Cass Avenue, Argonne, Illinois 60439, US. *> *> -- Written on 8-February-1989. *> Jack Dongarra, Argonne National Laboratory. *> Iain Duff, AERE Harwell. *> Jeremy Du Croz, Numerical Algorithms Group Ltd. *> Sven Hammarling, Numerical Algorithms Group Ltd. *> *> 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers *> can be run multiple times without deleting generated *> output files (susan) *> \endverbatim * * Authors: * ======== * *> \author Univ. of Tennessee *> \author Univ. of California Berkeley *> \author Univ. of Colorado Denver *> \author NAG Ltd. * *> \date April 2012 * *> \ingroup double_blas_testing * * ===================================================================== PROGRAM DBLAT3 * * -- Reference BLAS test routine (version 3.4.1) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * April 2012 * * ===================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 5 ) INTEGER NSUBS PARAMETER ( NSUBS = 6 ) DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) INTEGER NMAX PARAMETER ( NMAX = 65 ) INTEGER NIDMAX, NALMAX, NBEMAX PARAMETER ( NIDMAX = 9, NALMAX = 7, NBEMAX = 7 ) * .. Local Scalars .. DOUBLE PRECISION EPS, ERR, THRESH INTEGER I, ISNUM, J, N, NALF, NBET, NIDIM, NOUT, NTRA LOGICAL FATAL, LTESTT, REWI, SAME, SFATAL, TRACE, $ TSTERR CHARACTER*1 TRANSA, TRANSB CHARACTER*6 SNAMET CHARACTER*32 SNAPS, SUMMRY * .. Local Arrays .. DOUBLE PRECISION AA( NMAX*NMAX ), AB( NMAX, 2*NMAX ), $ ALF( NALMAX ), AS( NMAX*NMAX ), $ BB( NMAX*NMAX ), BET( NBEMAX ), $ BS( NMAX*NMAX ), C( NMAX, NMAX ), $ CC( NMAX*NMAX ), CS( NMAX*NMAX ), CT( NMAX ), $ G( NMAX ), W( 2*NMAX ) INTEGER IDIM( NIDMAX ) LOGICAL LTEST( NSUBS ) CHARACTER*6 SNAMES( NSUBS ) * .. External Functions .. DOUBLE PRECISION DDIFF LOGICAL LDE EXTERNAL DDIFF, LDE * .. External Subroutines .. EXTERNAL DCHK1, DCHK2, DCHK3, DCHK4, DCHK5, DCHKE, DMMCH * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK CHARACTER*6 SRNAMT * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR COMMON /SRNAMC/SRNAMT * .. Data statements .. DATA SNAMES/'DGEMM ', 'DSYMM ', 'DTRMM ', 'DTRSM ', $ 'DSYRK ', 'DSYR2K'/ * .. Executable Statements .. * * Read name and unit number for summary output file and open file. * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) NOUTC = NOUT * * Read name and unit number for snapshot output file and open file. * READ( NIN, FMT = * )SNAPS READ( NIN, FMT = * )NTRA TRACE = NTRA.GE.0 IF( TRACE )THEN OPEN( NTRA, FILE = SNAPS, STATUS = 'UNKNOWN' ) END IF * Read the flag that directs rewinding of the snapshot file. READ( NIN, FMT = * )REWI REWI = REWI.AND.TRACE * Read the flag that directs stopping on any failure. READ( NIN, FMT = * )SFATAL * Read the flag that indicates whether error exits are to be tested. READ( NIN, FMT = * )TSTERR * Read the threshold value of the test ratio READ( NIN, FMT = * )THRESH * * Read and check the parameter values for the tests. * * Values of N READ( NIN, FMT = * )NIDIM IF( NIDIM.LT.1.OR.NIDIM.GT.NIDMAX )THEN WRITE( NOUT, FMT = 9997 )'N', NIDMAX GO TO 220 END IF READ( NIN, FMT = * )( IDIM( I ), I = 1, NIDIM ) DO 10 I = 1, NIDIM IF( IDIM( I ).LT.0.OR.IDIM( I ).GT.NMAX )THEN WRITE( NOUT, FMT = 9996 )NMAX GO TO 220 END IF 10 CONTINUE * Values of ALPHA READ( NIN, FMT = * )NALF IF( NALF.LT.1.OR.NALF.GT.NALMAX )THEN WRITE( NOUT, FMT = 9997 )'ALPHA', NALMAX GO TO 220 END IF READ( NIN, FMT = * )( ALF( I ), I = 1, NALF ) * Values of BETA READ( NIN, FMT = * )NBET IF( NBET.LT.1.OR.NBET.GT.NBEMAX )THEN WRITE( NOUT, FMT = 9997 )'BETA', NBEMAX GO TO 220 END IF READ( NIN, FMT = * )( BET( I ), I = 1, NBET ) * * Report values of parameters. * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 )( IDIM( I ), I = 1, NIDIM ) WRITE( NOUT, FMT = 9993 )( ALF( I ), I = 1, NALF ) WRITE( NOUT, FMT = 9992 )( BET( I ), I = 1, NBET ) IF( .NOT.TSTERR )THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9984 ) END IF WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 )THRESH WRITE( NOUT, FMT = * ) * * Read names of subroutines and flags which indicate * whether they are to be tested. * DO 20 I = 1, NSUBS LTEST( I ) = .FALSE. 20 CONTINUE 30 READ( NIN, FMT = 9988, END = 60 )SNAMET, LTESTT DO 40 I = 1, NSUBS IF( SNAMET.EQ.SNAMES( I ) ) $ GO TO 50 40 CONTINUE WRITE( NOUT, FMT = 9990 )SNAMET STOP 50 LTEST( I ) = LTESTT GO TO 30 * 60 CONTINUE CLOSE ( NIN ) * * Compute EPS (the machine precision). * EPS = EPSILON(ZERO) WRITE( NOUT, FMT = 9998 )EPS * * Check the reliability of DMMCH using exact data. * N = MIN( 32, NMAX ) DO 100 J = 1, N DO 90 I = 1, N AB( I, J ) = MAX( I - J + 1, 0 ) 90 CONTINUE AB( J, NMAX + 1 ) = J AB( 1, NMAX + J ) = J C( J, 1 ) = ZERO 100 CONTINUE DO 110 J = 1, N CC( J ) = J*( ( J + 1 )*J )/2 - ( ( J + 1 )*J*( J - 1 ) )/3 110 CONTINUE * CC holds the exact result. On exit from DMMCH CT holds * the result computed by DMMCH. TRANSA = 'N' TRANSB = 'N' CALL DMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, $ AB( 1, NMAX + 1 ), NMAX, ZERO, C, NMAX, CT, G, CC, $ NMAX, EPS, ERR, FATAL, NOUT, .TRUE. ) SAME = LDE( CC, CT, N ) IF( .NOT.SAME.OR.ERR.NE.ZERO )THEN WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR STOP END IF TRANSB = 'T' CALL DMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, $ AB( 1, NMAX + 1 ), NMAX, ZERO, C, NMAX, CT, G, CC, $ NMAX, EPS, ERR, FATAL, NOUT, .TRUE. ) SAME = LDE( CC, CT, N ) IF( .NOT.SAME.OR.ERR.NE.ZERO )THEN WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR STOP END IF DO 120 J = 1, N AB( J, NMAX + 1 ) = N - J + 1 AB( 1, NMAX + J ) = N - J + 1 120 CONTINUE DO 130 J = 1, N CC( N - J + 1 ) = J*( ( J + 1 )*J )/2 - $ ( ( J + 1 )*J*( J - 1 ) )/3 130 CONTINUE TRANSA = 'T' TRANSB = 'N' CALL DMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, $ AB( 1, NMAX + 1 ), NMAX, ZERO, C, NMAX, CT, G, CC, $ NMAX, EPS, ERR, FATAL, NOUT, .TRUE. ) SAME = LDE( CC, CT, N ) IF( .NOT.SAME.OR.ERR.NE.ZERO )THEN WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR STOP END IF TRANSB = 'T' CALL DMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, $ AB( 1, NMAX + 1 ), NMAX, ZERO, C, NMAX, CT, G, CC, $ NMAX, EPS, ERR, FATAL, NOUT, .TRUE. ) SAME = LDE( CC, CT, N ) IF( .NOT.SAME.OR.ERR.NE.ZERO )THEN WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR STOP END IF * * Test each subroutine in turn. * DO 200 ISNUM = 1, NSUBS WRITE( NOUT, FMT = * ) IF( .NOT.LTEST( ISNUM ) )THEN * Subprogram is not to be tested. WRITE( NOUT, FMT = 9987 )SNAMES( ISNUM ) ELSE SRNAMT = SNAMES( ISNUM ) * Test error exits. IF( TSTERR )THEN CALL DCHKE( ISNUM, SNAMES( ISNUM ), NOUT ) WRITE( NOUT, FMT = * ) END IF * Test computations. INFOT = 0 OK = .TRUE. FATAL = .FALSE. GO TO ( 140, 150, 160, 160, 170, 180 )ISNUM * Test DGEMM, 01. 140 CALL DCHK1( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, $ NMAX, AB, AA, AS, AB( 1, NMAX + 1 ), BB, BS, C, $ CC, CS, CT, G ) GO TO 190 * Test DSYMM, 02. 150 CALL DCHK2( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, $ NMAX, AB, AA, AS, AB( 1, NMAX + 1 ), BB, BS, C, $ CC, CS, CT, G ) GO TO 190 * Test DTRMM, 03, DTRSM, 04. 160 CALL DCHK3( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NMAX, AB, $ AA, AS, AB( 1, NMAX + 1 ), BB, BS, CT, G, C ) GO TO 190 * Test DSYRK, 05. 170 CALL DCHK4( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, $ NMAX, AB, AA, AS, AB( 1, NMAX + 1 ), BB, BS, C, $ CC, CS, CT, G ) GO TO 190 * Test DSYR2K, 06. 180 CALL DCHK5( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, $ NMAX, AB, AA, AS, BB, BS, C, CC, CS, CT, G, W ) GO TO 190 * 190 IF( FATAL.AND.SFATAL ) $ GO TO 210 END IF 200 CONTINUE WRITE( NOUT, FMT = 9986 ) GO TO 230 * 210 CONTINUE WRITE( NOUT, FMT = 9985 ) GO TO 230 * 220 CONTINUE WRITE( NOUT, FMT = 9991 ) * 230 CONTINUE IF( TRACE ) $ CLOSE ( NTRA ) CLOSE ( NOUT ) STOP * 9999 FORMAT( ' ROUTINES PASS COMPUTATIONAL TESTS IF TEST RATIO IS LES', $ 'S THAN', F8.2 ) 9998 FORMAT( ' RELATIVE MACHINE PRECISION IS TAKEN TO BE', 1P, D9.1 ) 9997 FORMAT( ' NUMBER OF VALUES OF ', A, ' IS LESS THAN 1 OR GREATER ', $ 'THAN ', I2 ) 9996 FORMAT( ' VALUE OF N IS LESS THAN 0 OR GREATER THAN ', I2 ) 9995 FORMAT( ' TESTS OF THE DOUBLE PRECISION LEVEL 3 BLAS', //' THE F', $ 'OLLOWING PARAMETER VALUES WILL BE USED:' ) 9994 FORMAT( ' FOR N ', 9I6 ) 9993 FORMAT( ' FOR ALPHA ', 7F6.1 ) 9992 FORMAT( ' FOR BETA ', 7F6.1 ) 9991 FORMAT( ' AMEND DATA FILE OR INCREASE ARRAY SIZES IN PROGRAM', $ /' ******* TESTS ABANDONED *******' ) 9990 FORMAT( ' SUBPROGRAM NAME ', A6, ' NOT RECOGNIZED', /' ******* T', $ 'ESTS ABANDONED *******' ) 9989 FORMAT( ' ERROR IN DMMCH - IN-LINE DOT PRODUCTS ARE BEING EVALU', $ 'ATED WRONGLY.', /' DMMCH WAS CALLED WITH TRANSA = ', A1, $ ' AND TRANSB = ', A1, /' AND RETURNED SAME = ', L1, ' AND ', $ 'ERR = ', F12.3, '.', /' THIS MAY BE DUE TO FAULTS IN THE ', $ 'ARITHMETIC OR THE COMPILER.', /' ******* TESTS ABANDONED ', $ '*******' ) 9988 FORMAT( A6, L2 ) 9987 FORMAT( 1X, A6, ' WAS NOT TESTED' ) 9986 FORMAT( /' END OF TESTS' ) 9985 FORMAT( /' ******* FATAL ERROR - TESTS ABANDONED *******' ) 9984 FORMAT( ' ERROR-EXITS WILL NOT BE TESTED' ) * * End of DBLAT3. * END SUBROUTINE DCHK1( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, NMAX, $ A, AA, AS, B, BB, BS, C, CC, CS, CT, G ) * * Tests DGEMM. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D0 ) * .. Scalar Arguments .. DOUBLE PRECISION EPS, THRESH INTEGER NALF, NBET, NIDIM, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. DOUBLE PRECISION A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), B( NMAX, NMAX ), $ BB( NMAX*NMAX ), BET( NBET ), BS( NMAX*NMAX ), $ C( NMAX, NMAX ), CC( NMAX*NMAX ), $ CS( NMAX*NMAX ), CT( NMAX ), G( NMAX ) INTEGER IDIM( NIDIM ) * .. Local Scalars .. DOUBLE PRECISION ALPHA, ALS, BETA, BLS, ERR, ERRMAX INTEGER I, IA, IB, ICA, ICB, IK, IM, IN, K, KS, LAA, $ LBB, LCC, LDA, LDAS, LDB, LDBS, LDC, LDCS, M, $ MA, MB, MS, N, NA, NARGS, NB, NC, NS LOGICAL NULL, RESET, SAME, TRANA, TRANB CHARACTER*1 TRANAS, TRANBS, TRANSA, TRANSB CHARACTER*3 ICH * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LDE, LDERES EXTERNAL LDE, LDERES * .. External Subroutines .. EXTERNAL DGEMM, DMAKE, DMMCH * .. Intrinsic Functions .. INTRINSIC MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICH/'NTC'/ * .. Executable Statements .. * NARGS = 13 NC = 0 RESET = .TRUE. ERRMAX = ZERO * DO 110 IM = 1, NIDIM M = IDIM( IM ) * DO 100 IN = 1, NIDIM N = IDIM( IN ) * Set LDC to 1 more than minimum value if room. LDC = M IF( LDC.LT.NMAX ) $ LDC = LDC + 1 * Skip tests if not enough room. IF( LDC.GT.NMAX ) $ GO TO 100 LCC = LDC*N NULL = N.LE.0.OR.M.LE.0 * DO 90 IK = 1, NIDIM K = IDIM( IK ) * DO 80 ICA = 1, 3 TRANSA = ICH( ICA: ICA ) TRANA = TRANSA.EQ.'T'.OR.TRANSA.EQ.'C' * IF( TRANA )THEN MA = K NA = M ELSE MA = M NA = K END IF * Set LDA to 1 more than minimum value if room. LDA = MA IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 80 LAA = LDA*NA * * Generate the matrix A. * CALL DMAKE( 'GE', ' ', ' ', MA, NA, A, NMAX, AA, LDA, $ RESET, ZERO ) * DO 70 ICB = 1, 3 TRANSB = ICH( ICB: ICB ) TRANB = TRANSB.EQ.'T'.OR.TRANSB.EQ.'C' * IF( TRANB )THEN MB = N NB = K ELSE MB = K NB = N END IF * Set LDB to 1 more than minimum value if room. LDB = MB IF( LDB.LT.NMAX ) $ LDB = LDB + 1 * Skip tests if not enough room. IF( LDB.GT.NMAX ) $ GO TO 70 LBB = LDB*NB * * Generate the matrix B. * CALL DMAKE( 'GE', ' ', ' ', MB, NB, B, NMAX, BB, $ LDB, RESET, ZERO ) * DO 60 IA = 1, NALF ALPHA = ALF( IA ) * DO 50 IB = 1, NBET BETA = BET( IB ) * * Generate the matrix C. * CALL DMAKE( 'GE', ' ', ' ', M, N, C, NMAX, $ CC, LDC, RESET, ZERO ) * NC = NC + 1 * * Save every datum before calling the * subroutine. * TRANAS = TRANSA TRANBS = TRANSB MS = M NS = N KS = K ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LBB BS( I ) = BB( I ) 20 CONTINUE LDBS = LDB BLS = BETA DO 30 I = 1, LCC CS( I ) = CC( I ) 30 CONTINUE LDCS = LDC * * Call the subroutine. * IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ TRANSA, TRANSB, M, N, K, ALPHA, LDA, LDB, $ BETA, LDC IF( REWI ) $ REWIND NTRA CALL DGEMM( TRANSA, TRANSB, M, N, K, ALPHA, $ AA, LDA, BB, LDB, BETA, CC, LDC ) * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9994 ) FATAL = .TRUE. GO TO 120 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = TRANSA.EQ.TRANAS ISAME( 2 ) = TRANSB.EQ.TRANBS ISAME( 3 ) = MS.EQ.M ISAME( 4 ) = NS.EQ.N ISAME( 5 ) = KS.EQ.K ISAME( 6 ) = ALS.EQ.ALPHA ISAME( 7 ) = LDE( AS, AA, LAA ) ISAME( 8 ) = LDAS.EQ.LDA ISAME( 9 ) = LDE( BS, BB, LBB ) ISAME( 10 ) = LDBS.EQ.LDB ISAME( 11 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 12 ) = LDE( CS, CC, LCC ) ELSE ISAME( 12 ) = LDERES( 'GE', ' ', M, N, CS, $ CC, LDC ) END IF ISAME( 13 ) = LDCS.EQ.LDC * * If data was incorrectly changed, report * and return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 120 END IF * IF( .NOT.NULL )THEN * * Check the result. * CALL DMMCH( TRANSA, TRANSB, M, N, K, $ ALPHA, A, NMAX, B, NMAX, BETA, $ C, NMAX, CT, G, CC, LDC, EPS, $ ERR, FATAL, NOUT, .TRUE. ) ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 120 END IF * 50 CONTINUE * 60 CONTINUE * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 130 * 120 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME WRITE( NOUT, FMT = 9995 )NC, SNAME, TRANSA, TRANSB, M, N, K, $ ALPHA, LDA, LDB, BETA, LDC * 130 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',''', A1, ''',', $ 3( I3, ',' ), F4.1, ', A,', I3, ', B,', I3, ',', F4.1, ', ', $ 'C,', I3, ').' ) 9994 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of DCHK1. * END SUBROUTINE DCHK2( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, NMAX, $ A, AA, AS, B, BB, BS, C, CC, CS, CT, G ) * * Tests DSYMM. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D0 ) * .. Scalar Arguments .. DOUBLE PRECISION EPS, THRESH INTEGER NALF, NBET, NIDIM, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. DOUBLE PRECISION A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), B( NMAX, NMAX ), $ BB( NMAX*NMAX ), BET( NBET ), BS( NMAX*NMAX ), $ C( NMAX, NMAX ), CC( NMAX*NMAX ), $ CS( NMAX*NMAX ), CT( NMAX ), G( NMAX ) INTEGER IDIM( NIDIM ) * .. Local Scalars .. DOUBLE PRECISION ALPHA, ALS, BETA, BLS, ERR, ERRMAX INTEGER I, IA, IB, ICS, ICU, IM, IN, LAA, LBB, LCC, $ LDA, LDAS, LDB, LDBS, LDC, LDCS, M, MS, N, NA, $ NARGS, NC, NS LOGICAL LEFT, NULL, RESET, SAME CHARACTER*1 SIDE, SIDES, UPLO, UPLOS CHARACTER*2 ICHS, ICHU * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LDE, LDERES EXTERNAL LDE, LDERES * .. External Subroutines .. EXTERNAL DMAKE, DMMCH, DSYMM * .. Intrinsic Functions .. INTRINSIC MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICHS/'LR'/, ICHU/'UL'/ * .. Executable Statements .. * NARGS = 12 NC = 0 RESET = .TRUE. ERRMAX = ZERO * DO 100 IM = 1, NIDIM M = IDIM( IM ) * DO 90 IN = 1, NIDIM N = IDIM( IN ) * Set LDC to 1 more than minimum value if room. LDC = M IF( LDC.LT.NMAX ) $ LDC = LDC + 1 * Skip tests if not enough room. IF( LDC.GT.NMAX ) $ GO TO 90 LCC = LDC*N NULL = N.LE.0.OR.M.LE.0 * * Set LDB to 1 more than minimum value if room. LDB = M IF( LDB.LT.NMAX ) $ LDB = LDB + 1 * Skip tests if not enough room. IF( LDB.GT.NMAX ) $ GO TO 90 LBB = LDB*N * * Generate the matrix B. * CALL DMAKE( 'GE', ' ', ' ', M, N, B, NMAX, BB, LDB, RESET, $ ZERO ) * DO 80 ICS = 1, 2 SIDE = ICHS( ICS: ICS ) LEFT = SIDE.EQ.'L' * IF( LEFT )THEN NA = M ELSE NA = N END IF * Set LDA to 1 more than minimum value if room. LDA = NA IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 80 LAA = LDA*NA * DO 70 ICU = 1, 2 UPLO = ICHU( ICU: ICU ) * * Generate the symmetric matrix A. * CALL DMAKE( 'SY', UPLO, ' ', NA, NA, A, NMAX, AA, LDA, $ RESET, ZERO ) * DO 60 IA = 1, NALF ALPHA = ALF( IA ) * DO 50 IB = 1, NBET BETA = BET( IB ) * * Generate the matrix C. * CALL DMAKE( 'GE', ' ', ' ', M, N, C, NMAX, CC, $ LDC, RESET, ZERO ) * NC = NC + 1 * * Save every datum before calling the * subroutine. * SIDES = SIDE UPLOS = UPLO MS = M NS = N ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LBB BS( I ) = BB( I ) 20 CONTINUE LDBS = LDB BLS = BETA DO 30 I = 1, LCC CS( I ) = CC( I ) 30 CONTINUE LDCS = LDC * * Call the subroutine. * IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, SIDE, $ UPLO, M, N, ALPHA, LDA, LDB, BETA, LDC IF( REWI ) $ REWIND NTRA CALL DSYMM( SIDE, UPLO, M, N, ALPHA, AA, LDA, $ BB, LDB, BETA, CC, LDC ) * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9994 ) FATAL = .TRUE. GO TO 110 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = SIDES.EQ.SIDE ISAME( 2 ) = UPLOS.EQ.UPLO ISAME( 3 ) = MS.EQ.M ISAME( 4 ) = NS.EQ.N ISAME( 5 ) = ALS.EQ.ALPHA ISAME( 6 ) = LDE( AS, AA, LAA ) ISAME( 7 ) = LDAS.EQ.LDA ISAME( 8 ) = LDE( BS, BB, LBB ) ISAME( 9 ) = LDBS.EQ.LDB ISAME( 10 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 11 ) = LDE( CS, CC, LCC ) ELSE ISAME( 11 ) = LDERES( 'GE', ' ', M, N, CS, $ CC, LDC ) END IF ISAME( 12 ) = LDCS.EQ.LDC * * If data was incorrectly changed, report and * return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 110 END IF * IF( .NOT.NULL )THEN * * Check the result. * IF( LEFT )THEN CALL DMMCH( 'N', 'N', M, N, M, ALPHA, A, $ NMAX, B, NMAX, BETA, C, NMAX, $ CT, G, CC, LDC, EPS, ERR, $ FATAL, NOUT, .TRUE. ) ELSE CALL DMMCH( 'N', 'N', M, N, N, ALPHA, B, $ NMAX, A, NMAX, BETA, C, NMAX, $ CT, G, CC, LDC, EPS, ERR, $ FATAL, NOUT, .TRUE. ) END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 110 END IF * 50 CONTINUE * 60 CONTINUE * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 120 * 110 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME WRITE( NOUT, FMT = 9995 )NC, SNAME, SIDE, UPLO, M, N, ALPHA, LDA, $ LDB, BETA, LDC * 120 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ', A6, '(', 2( '''', A1, ''',' ), 2( I3, ',' ), $ F4.1, ', A,', I3, ', B,', I3, ',', F4.1, ', C,', I3, ') ', $ ' .' ) 9994 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of DCHK2. * END SUBROUTINE DCHK3( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NMAX, A, AA, AS, $ B, BB, BS, CT, G, C ) * * Tests DTRMM and DTRSM. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) * .. Scalar Arguments .. DOUBLE PRECISION EPS, THRESH INTEGER NALF, NIDIM, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. DOUBLE PRECISION A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), B( NMAX, NMAX ), $ BB( NMAX*NMAX ), BS( NMAX*NMAX ), $ C( NMAX, NMAX ), CT( NMAX ), G( NMAX ) INTEGER IDIM( NIDIM ) * .. Local Scalars .. DOUBLE PRECISION ALPHA, ALS, ERR, ERRMAX INTEGER I, IA, ICD, ICS, ICT, ICU, IM, IN, J, LAA, LBB, $ LDA, LDAS, LDB, LDBS, M, MS, N, NA, NARGS, NC, $ NS LOGICAL LEFT, NULL, RESET, SAME CHARACTER*1 DIAG, DIAGS, SIDE, SIDES, TRANAS, TRANSA, UPLO, $ UPLOS CHARACTER*2 ICHD, ICHS, ICHU CHARACTER*3 ICHT * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LDE, LDERES EXTERNAL LDE, LDERES * .. External Subroutines .. EXTERNAL DMAKE, DMMCH, DTRMM, DTRSM * .. Intrinsic Functions .. INTRINSIC MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICHU/'UL'/, ICHT/'NTC'/, ICHD/'UN'/, ICHS/'LR'/ * .. Executable Statements .. * NARGS = 11 NC = 0 RESET = .TRUE. ERRMAX = ZERO * Set up zero matrix for DMMCH. DO 20 J = 1, NMAX DO 10 I = 1, NMAX C( I, J ) = ZERO 10 CONTINUE 20 CONTINUE * DO 140 IM = 1, NIDIM M = IDIM( IM ) * DO 130 IN = 1, NIDIM N = IDIM( IN ) * Set LDB to 1 more than minimum value if room. LDB = M IF( LDB.LT.NMAX ) $ LDB = LDB + 1 * Skip tests if not enough room. IF( LDB.GT.NMAX ) $ GO TO 130 LBB = LDB*N NULL = M.LE.0.OR.N.LE.0 * DO 120 ICS = 1, 2 SIDE = ICHS( ICS: ICS ) LEFT = SIDE.EQ.'L' IF( LEFT )THEN NA = M ELSE NA = N END IF * Set LDA to 1 more than minimum value if room. LDA = NA IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 130 LAA = LDA*NA * DO 110 ICU = 1, 2 UPLO = ICHU( ICU: ICU ) * DO 100 ICT = 1, 3 TRANSA = ICHT( ICT: ICT ) * DO 90 ICD = 1, 2 DIAG = ICHD( ICD: ICD ) * DO 80 IA = 1, NALF ALPHA = ALF( IA ) * * Generate the matrix A. * CALL DMAKE( 'TR', UPLO, DIAG, NA, NA, A, $ NMAX, AA, LDA, RESET, ZERO ) * * Generate the matrix B. * CALL DMAKE( 'GE', ' ', ' ', M, N, B, NMAX, $ BB, LDB, RESET, ZERO ) * NC = NC + 1 * * Save every datum before calling the * subroutine. * SIDES = SIDE UPLOS = UPLO TRANAS = TRANSA DIAGS = DIAG MS = M NS = N ALS = ALPHA DO 30 I = 1, LAA AS( I ) = AA( I ) 30 CONTINUE LDAS = LDA DO 40 I = 1, LBB BS( I ) = BB( I ) 40 CONTINUE LDBS = LDB * * Call the subroutine. * IF( SNAME( 4: 5 ).EQ.'MM' )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, $ LDA, LDB IF( REWI ) $ REWIND NTRA CALL DTRMM( SIDE, UPLO, TRANSA, DIAG, M, $ N, ALPHA, AA, LDA, BB, LDB ) ELSE IF( SNAME( 4: 5 ).EQ.'SM' )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, $ LDA, LDB IF( REWI ) $ REWIND NTRA CALL DTRSM( SIDE, UPLO, TRANSA, DIAG, M, $ N, ALPHA, AA, LDA, BB, LDB ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9994 ) FATAL = .TRUE. GO TO 150 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = SIDES.EQ.SIDE ISAME( 2 ) = UPLOS.EQ.UPLO ISAME( 3 ) = TRANAS.EQ.TRANSA ISAME( 4 ) = DIAGS.EQ.DIAG ISAME( 5 ) = MS.EQ.M ISAME( 6 ) = NS.EQ.N ISAME( 7 ) = ALS.EQ.ALPHA ISAME( 8 ) = LDE( AS, AA, LAA ) ISAME( 9 ) = LDAS.EQ.LDA IF( NULL )THEN ISAME( 10 ) = LDE( BS, BB, LBB ) ELSE ISAME( 10 ) = LDERES( 'GE', ' ', M, N, BS, $ BB, LDB ) END IF ISAME( 11 ) = LDBS.EQ.LDB * * If data was incorrectly changed, report and * return. * SAME = .TRUE. DO 50 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 50 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 150 END IF * IF( .NOT.NULL )THEN IF( SNAME( 4: 5 ).EQ.'MM' )THEN * * Check the result. * IF( LEFT )THEN CALL DMMCH( TRANSA, 'N', M, N, M, $ ALPHA, A, NMAX, B, NMAX, $ ZERO, C, NMAX, CT, G, $ BB, LDB, EPS, ERR, $ FATAL, NOUT, .TRUE. ) ELSE CALL DMMCH( 'N', TRANSA, M, N, N, $ ALPHA, B, NMAX, A, NMAX, $ ZERO, C, NMAX, CT, G, $ BB, LDB, EPS, ERR, $ FATAL, NOUT, .TRUE. ) END IF ELSE IF( SNAME( 4: 5 ).EQ.'SM' )THEN * * Compute approximation to original * matrix. * DO 70 J = 1, N DO 60 I = 1, M C( I, J ) = BB( I + ( J - 1 )* $ LDB ) BB( I + ( J - 1 )*LDB ) = ALPHA* $ B( I, J ) 60 CONTINUE 70 CONTINUE * IF( LEFT )THEN CALL DMMCH( TRANSA, 'N', M, N, M, $ ONE, A, NMAX, C, NMAX, $ ZERO, B, NMAX, CT, G, $ BB, LDB, EPS, ERR, $ FATAL, NOUT, .FALSE. ) ELSE CALL DMMCH( 'N', TRANSA, M, N, N, $ ONE, C, NMAX, A, NMAX, $ ZERO, B, NMAX, CT, G, $ BB, LDB, EPS, ERR, $ FATAL, NOUT, .FALSE. ) END IF END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 150 END IF * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * 120 CONTINUE * 130 CONTINUE * 140 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 160 * 150 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME WRITE( NOUT, FMT = 9995 )NC, SNAME, SIDE, UPLO, TRANSA, DIAG, M, $ N, ALPHA, LDA, LDB * 160 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ', A6, '(', 4( '''', A1, ''',' ), 2( I3, ',' ), $ F4.1, ', A,', I3, ', B,', I3, ') .' ) 9994 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of DCHK3. * END SUBROUTINE DCHK4( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, NMAX, $ A, AA, AS, B, BB, BS, C, CC, CS, CT, G ) * * Tests DSYRK. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D0 ) * .. Scalar Arguments .. DOUBLE PRECISION EPS, THRESH INTEGER NALF, NBET, NIDIM, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. DOUBLE PRECISION A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), B( NMAX, NMAX ), $ BB( NMAX*NMAX ), BET( NBET ), BS( NMAX*NMAX ), $ C( NMAX, NMAX ), CC( NMAX*NMAX ), $ CS( NMAX*NMAX ), CT( NMAX ), G( NMAX ) INTEGER IDIM( NIDIM ) * .. Local Scalars .. DOUBLE PRECISION ALPHA, ALS, BETA, BETS, ERR, ERRMAX INTEGER I, IA, IB, ICT, ICU, IK, IN, J, JC, JJ, K, KS, $ LAA, LCC, LDA, LDAS, LDC, LDCS, LJ, MA, N, NA, $ NARGS, NC, NS LOGICAL NULL, RESET, SAME, TRAN, UPPER CHARACTER*1 TRANS, TRANSS, UPLO, UPLOS CHARACTER*2 ICHU CHARACTER*3 ICHT * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LDE, LDERES EXTERNAL LDE, LDERES * .. External Subroutines .. EXTERNAL DMAKE, DMMCH, DSYRK * .. Intrinsic Functions .. INTRINSIC MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICHT/'NTC'/, ICHU/'UL'/ * .. Executable Statements .. * NARGS = 10 NC = 0 RESET = .TRUE. ERRMAX = ZERO * DO 100 IN = 1, NIDIM N = IDIM( IN ) * Set LDC to 1 more than minimum value if room. LDC = N IF( LDC.LT.NMAX ) $ LDC = LDC + 1 * Skip tests if not enough room. IF( LDC.GT.NMAX ) $ GO TO 100 LCC = LDC*N NULL = N.LE.0 * DO 90 IK = 1, NIDIM K = IDIM( IK ) * DO 80 ICT = 1, 3 TRANS = ICHT( ICT: ICT ) TRAN = TRANS.EQ.'T'.OR.TRANS.EQ.'C' IF( TRAN )THEN MA = K NA = N ELSE MA = N NA = K END IF * Set LDA to 1 more than minimum value if room. LDA = MA IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 80 LAA = LDA*NA * * Generate the matrix A. * CALL DMAKE( 'GE', ' ', ' ', MA, NA, A, NMAX, AA, LDA, $ RESET, ZERO ) * DO 70 ICU = 1, 2 UPLO = ICHU( ICU: ICU ) UPPER = UPLO.EQ.'U' * DO 60 IA = 1, NALF ALPHA = ALF( IA ) * DO 50 IB = 1, NBET BETA = BET( IB ) * * Generate the matrix C. * CALL DMAKE( 'SY', UPLO, ' ', N, N, C, NMAX, CC, $ LDC, RESET, ZERO ) * NC = NC + 1 * * Save every datum before calling the subroutine. * UPLOS = UPLO TRANSS = TRANS NS = N KS = K ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA BETS = BETA DO 20 I = 1, LCC CS( I ) = CC( I ) 20 CONTINUE LDCS = LDC * * Call the subroutine. * IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, UPLO, $ TRANS, N, K, ALPHA, LDA, BETA, LDC IF( REWI ) $ REWIND NTRA CALL DSYRK( UPLO, TRANS, N, K, ALPHA, AA, LDA, $ BETA, CC, LDC ) * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9993 ) FATAL = .TRUE. GO TO 120 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = UPLOS.EQ.UPLO ISAME( 2 ) = TRANSS.EQ.TRANS ISAME( 3 ) = NS.EQ.N ISAME( 4 ) = KS.EQ.K ISAME( 5 ) = ALS.EQ.ALPHA ISAME( 6 ) = LDE( AS, AA, LAA ) ISAME( 7 ) = LDAS.EQ.LDA ISAME( 8 ) = BETS.EQ.BETA IF( NULL )THEN ISAME( 9 ) = LDE( CS, CC, LCC ) ELSE ISAME( 9 ) = LDERES( 'SY', UPLO, N, N, CS, $ CC, LDC ) END IF ISAME( 10 ) = LDCS.EQ.LDC * * If data was incorrectly changed, report and * return. * SAME = .TRUE. DO 30 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 30 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 120 END IF * IF( .NOT.NULL )THEN * * Check the result column by column. * JC = 1 DO 40 J = 1, N IF( UPPER )THEN JJ = 1 LJ = J ELSE JJ = J LJ = N - J + 1 END IF IF( TRAN )THEN CALL DMMCH( 'T', 'N', LJ, 1, K, ALPHA, $ A( 1, JJ ), NMAX, $ A( 1, J ), NMAX, BETA, $ C( JJ, J ), NMAX, CT, G, $ CC( JC ), LDC, EPS, ERR, $ FATAL, NOUT, .TRUE. ) ELSE CALL DMMCH( 'N', 'T', LJ, 1, K, ALPHA, $ A( JJ, 1 ), NMAX, $ A( J, 1 ), NMAX, BETA, $ C( JJ, J ), NMAX, CT, G, $ CC( JC ), LDC, EPS, ERR, $ FATAL, NOUT, .TRUE. ) END IF IF( UPPER )THEN JC = JC + LDC ELSE JC = JC + LDC + 1 END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 110 40 CONTINUE END IF * 50 CONTINUE * 60 CONTINUE * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 130 * 110 CONTINUE IF( N.GT.1 ) $ WRITE( NOUT, FMT = 9995 )J * 120 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME WRITE( NOUT, FMT = 9994 )NC, SNAME, UPLO, TRANS, N, K, ALPHA, $ LDA, BETA, LDC * 130 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) 9994 FORMAT( 1X, I6, ': ', A6, '(', 2( '''', A1, ''',' ), 2( I3, ',' ), $ F4.1, ', A,', I3, ',', F4.1, ', C,', I3, ') .' ) 9993 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of DCHK4. * END SUBROUTINE DCHK5( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, NMAX, $ AB, AA, AS, BB, BS, C, CC, CS, CT, G, W ) * * Tests DSYR2K. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. DOUBLE PRECISION ZERO PARAMETER ( ZERO = 0.0D0 ) * .. Scalar Arguments .. DOUBLE PRECISION EPS, THRESH INTEGER NALF, NBET, NIDIM, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. DOUBLE PRECISION AA( NMAX*NMAX ), AB( 2*NMAX*NMAX ), $ ALF( NALF ), AS( NMAX*NMAX ), BB( NMAX*NMAX ), $ BET( NBET ), BS( NMAX*NMAX ), C( NMAX, NMAX ), $ CC( NMAX*NMAX ), CS( NMAX*NMAX ), CT( NMAX ), $ G( NMAX ), W( 2*NMAX ) INTEGER IDIM( NIDIM ) * .. Local Scalars .. DOUBLE PRECISION ALPHA, ALS, BETA, BETS, ERR, ERRMAX INTEGER I, IA, IB, ICT, ICU, IK, IN, J, JC, JJ, JJAB, $ K, KS, LAA, LBB, LCC, LDA, LDAS, LDB, LDBS, $ LDC, LDCS, LJ, MA, N, NA, NARGS, NC, NS LOGICAL NULL, RESET, SAME, TRAN, UPPER CHARACTER*1 TRANS, TRANSS, UPLO, UPLOS CHARACTER*2 ICHU CHARACTER*3 ICHT * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LDE, LDERES EXTERNAL LDE, LDERES * .. External Subroutines .. EXTERNAL DMAKE, DMMCH, DSYR2K * .. Intrinsic Functions .. INTRINSIC MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICHT/'NTC'/, ICHU/'UL'/ * .. Executable Statements .. * NARGS = 12 NC = 0 RESET = .TRUE. ERRMAX = ZERO * DO 130 IN = 1, NIDIM N = IDIM( IN ) * Set LDC to 1 more than minimum value if room. LDC = N IF( LDC.LT.NMAX ) $ LDC = LDC + 1 * Skip tests if not enough room. IF( LDC.GT.NMAX ) $ GO TO 130 LCC = LDC*N NULL = N.LE.0 * DO 120 IK = 1, NIDIM K = IDIM( IK ) * DO 110 ICT = 1, 3 TRANS = ICHT( ICT: ICT ) TRAN = TRANS.EQ.'T'.OR.TRANS.EQ.'C' IF( TRAN )THEN MA = K NA = N ELSE MA = N NA = K END IF * Set LDA to 1 more than minimum value if room. LDA = MA IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 110 LAA = LDA*NA * * Generate the matrix A. * IF( TRAN )THEN CALL DMAKE( 'GE', ' ', ' ', MA, NA, AB, 2*NMAX, AA, $ LDA, RESET, ZERO ) ELSE CALL DMAKE( 'GE', ' ', ' ', MA, NA, AB, NMAX, AA, LDA, $ RESET, ZERO ) END IF * * Generate the matrix B. * LDB = LDA LBB = LAA IF( TRAN )THEN CALL DMAKE( 'GE', ' ', ' ', MA, NA, AB( K + 1 ), $ 2*NMAX, BB, LDB, RESET, ZERO ) ELSE CALL DMAKE( 'GE', ' ', ' ', MA, NA, AB( K*NMAX + 1 ), $ NMAX, BB, LDB, RESET, ZERO ) END IF * DO 100 ICU = 1, 2 UPLO = ICHU( ICU: ICU ) UPPER = UPLO.EQ.'U' * DO 90 IA = 1, NALF ALPHA = ALF( IA ) * DO 80 IB = 1, NBET BETA = BET( IB ) * * Generate the matrix C. * CALL DMAKE( 'SY', UPLO, ' ', N, N, C, NMAX, CC, $ LDC, RESET, ZERO ) * NC = NC + 1 * * Save every datum before calling the subroutine. * UPLOS = UPLO TRANSS = TRANS NS = N KS = K ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LBB BS( I ) = BB( I ) 20 CONTINUE LDBS = LDB BETS = BETA DO 30 I = 1, LCC CS( I ) = CC( I ) 30 CONTINUE LDCS = LDC * * Call the subroutine. * IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, UPLO, $ TRANS, N, K, ALPHA, LDA, LDB, BETA, LDC IF( REWI ) $ REWIND NTRA CALL DSYR2K( UPLO, TRANS, N, K, ALPHA, AA, LDA, $ BB, LDB, BETA, CC, LDC ) * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9993 ) FATAL = .TRUE. GO TO 150 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = UPLOS.EQ.UPLO ISAME( 2 ) = TRANSS.EQ.TRANS ISAME( 3 ) = NS.EQ.N ISAME( 4 ) = KS.EQ.K ISAME( 5 ) = ALS.EQ.ALPHA ISAME( 6 ) = LDE( AS, AA, LAA ) ISAME( 7 ) = LDAS.EQ.LDA ISAME( 8 ) = LDE( BS, BB, LBB ) ISAME( 9 ) = LDBS.EQ.LDB ISAME( 10 ) = BETS.EQ.BETA IF( NULL )THEN ISAME( 11 ) = LDE( CS, CC, LCC ) ELSE ISAME( 11 ) = LDERES( 'SY', UPLO, N, N, CS, $ CC, LDC ) END IF ISAME( 12 ) = LDCS.EQ.LDC * * If data was incorrectly changed, report and * return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 150 END IF * IF( .NOT.NULL )THEN * * Check the result column by column. * JJAB = 1 JC = 1 DO 70 J = 1, N IF( UPPER )THEN JJ = 1 LJ = J ELSE JJ = J LJ = N - J + 1 END IF IF( TRAN )THEN DO 50 I = 1, K W( I ) = AB( ( J - 1 )*2*NMAX + K + $ I ) W( K + I ) = AB( ( J - 1 )*2*NMAX + $ I ) 50 CONTINUE CALL DMMCH( 'T', 'N', LJ, 1, 2*K, $ ALPHA, AB( JJAB ), 2*NMAX, $ W, 2*NMAX, BETA, $ C( JJ, J ), NMAX, CT, G, $ CC( JC ), LDC, EPS, ERR, $ FATAL, NOUT, .TRUE. ) ELSE DO 60 I = 1, K W( I ) = AB( ( K + I - 1 )*NMAX + $ J ) W( K + I ) = AB( ( I - 1 )*NMAX + $ J ) 60 CONTINUE CALL DMMCH( 'N', 'N', LJ, 1, 2*K, $ ALPHA, AB( JJ ), NMAX, W, $ 2*NMAX, BETA, C( JJ, J ), $ NMAX, CT, G, CC( JC ), LDC, $ EPS, ERR, FATAL, NOUT, $ .TRUE. ) END IF IF( UPPER )THEN JC = JC + LDC ELSE JC = JC + LDC + 1 IF( TRAN ) $ JJAB = JJAB + 2*NMAX END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 140 70 CONTINUE END IF * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * 120 CONTINUE * 130 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 160 * 140 CONTINUE IF( N.GT.1 ) $ WRITE( NOUT, FMT = 9995 )J * 150 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME WRITE( NOUT, FMT = 9994 )NC, SNAME, UPLO, TRANS, N, K, ALPHA, $ LDA, LDB, BETA, LDC * 160 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) 9994 FORMAT( 1X, I6, ': ', A6, '(', 2( '''', A1, ''',' ), 2( I3, ',' ), $ F4.1, ', A,', I3, ', B,', I3, ',', F4.1, ', C,', I3, ') ', $ ' .' ) 9993 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of DCHK5. * END SUBROUTINE DCHKE( ISNUM, SRNAMT, NOUT ) * * Tests the error exits from the Level 3 Blas. * Requires a special version of the error-handling routine XERBLA. * A, B and C should not need to be defined. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * 3-19-92: Initialize ALPHA and BETA (eca) * 3-19-92: Fix argument 12 in calls to SSYMM with INFOT = 9 (eca) * * .. Scalar Arguments .. INTEGER ISNUM, NOUT CHARACTER*6 SRNAMT * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Parameters .. DOUBLE PRECISION ONE, TWO PARAMETER ( ONE = 1.0D0, TWO = 2.0D0 ) * .. Local Scalars .. DOUBLE PRECISION ALPHA, BETA * .. Local Arrays .. DOUBLE PRECISION A( 2, 1 ), B( 2, 1 ), C( 2, 1 ) * .. External Subroutines .. EXTERNAL CHKXER, DGEMM, DSYMM, DSYR2K, DSYRK, DTRMM, $ DTRSM * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Executable Statements .. * OK is set to .FALSE. by the special version of XERBLA or by CHKXER * if anything is wrong. OK = .TRUE. * LERR is set to .TRUE. by the special version of XERBLA each time * it is called, and is then tested and re-set by CHKXER. LERR = .FALSE. * * Initialize ALPHA and BETA. * ALPHA = ONE BETA = TWO * GO TO ( 10, 20, 30, 40, 50, 60 )ISNUM 10 INFOT = 1 CALL DGEMM( '/', 'N', 0, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 1 CALL DGEMM( '/', 'T', 0, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL DGEMM( 'N', '/', 0, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL DGEMM( 'T', '/', 0, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DGEMM( 'N', 'N', -1, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DGEMM( 'N', 'T', -1, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DGEMM( 'T', 'N', -1, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DGEMM( 'T', 'T', -1, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL DGEMM( 'N', 'N', 0, -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL DGEMM( 'N', 'T', 0, -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL DGEMM( 'T', 'N', 0, -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL DGEMM( 'T', 'T', 0, -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DGEMM( 'N', 'N', 0, 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DGEMM( 'N', 'T', 0, 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DGEMM( 'T', 'N', 0, 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DGEMM( 'T', 'T', 0, 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL DGEMM( 'N', 'N', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL DGEMM( 'N', 'T', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL DGEMM( 'T', 'N', 0, 0, 2, ALPHA, A, 1, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL DGEMM( 'T', 'T', 0, 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL DGEMM( 'N', 'N', 0, 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL DGEMM( 'T', 'N', 0, 0, 2, ALPHA, A, 2, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL DGEMM( 'N', 'T', 0, 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL DGEMM( 'T', 'T', 0, 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL DGEMM( 'N', 'N', 2, 0, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL DGEMM( 'N', 'T', 2, 0, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL DGEMM( 'T', 'N', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL DGEMM( 'T', 'T', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 70 20 INFOT = 1 CALL DSYMM( '/', 'U', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL DSYMM( 'L', '/', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DSYMM( 'L', 'U', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DSYMM( 'R', 'U', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DSYMM( 'L', 'L', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DSYMM( 'R', 'L', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL DSYMM( 'L', 'U', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL DSYMM( 'R', 'U', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL DSYMM( 'L', 'L', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL DSYMM( 'R', 'L', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL DSYMM( 'L', 'U', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL DSYMM( 'R', 'U', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL DSYMM( 'L', 'L', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL DSYMM( 'R', 'L', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DSYMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DSYMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DSYMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL DSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL DSYMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL DSYMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL DSYMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 70 30 INFOT = 1 CALL DTRMM( '/', 'U', 'N', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL DTRMM( 'L', '/', 'N', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DTRMM( 'L', 'U', '/', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL DTRMM( 'L', 'U', 'N', '/', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DTRMM( 'L', 'U', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DTRMM( 'L', 'U', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DTRMM( 'R', 'U', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DTRMM( 'R', 'U', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DTRMM( 'L', 'L', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DTRMM( 'L', 'L', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DTRMM( 'R', 'L', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DTRMM( 'R', 'L', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL DTRMM( 'L', 'U', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL DTRMM( 'L', 'U', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL DTRMM( 'R', 'U', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL DTRMM( 'R', 'U', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL DTRMM( 'L', 'L', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL DTRMM( 'L', 'L', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL DTRMM( 'R', 'L', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL DTRMM( 'R', 'L', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DTRMM( 'L', 'U', 'N', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DTRMM( 'L', 'U', 'T', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DTRMM( 'R', 'U', 'N', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DTRMM( 'R', 'U', 'T', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DTRMM( 'L', 'L', 'N', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DTRMM( 'L', 'L', 'T', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DTRMM( 'R', 'L', 'N', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DTRMM( 'R', 'L', 'T', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL DTRMM( 'L', 'U', 'N', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL DTRMM( 'L', 'U', 'T', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL DTRMM( 'R', 'U', 'N', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL DTRMM( 'R', 'U', 'T', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL DTRMM( 'L', 'L', 'N', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL DTRMM( 'L', 'L', 'T', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL DTRMM( 'R', 'L', 'N', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL DTRMM( 'R', 'L', 'T', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 70 40 INFOT = 1 CALL DTRSM( '/', 'U', 'N', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL DTRSM( 'L', '/', 'N', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DTRSM( 'L', 'U', '/', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL DTRSM( 'L', 'U', 'N', '/', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DTRSM( 'L', 'U', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DTRSM( 'L', 'U', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DTRSM( 'R', 'U', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DTRSM( 'R', 'U', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DTRSM( 'L', 'L', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DTRSM( 'L', 'L', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DTRSM( 'R', 'L', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL DTRSM( 'R', 'L', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL DTRSM( 'L', 'U', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL DTRSM( 'L', 'U', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL DTRSM( 'R', 'U', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL DTRSM( 'R', 'U', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL DTRSM( 'L', 'L', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL DTRSM( 'L', 'L', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL DTRSM( 'R', 'L', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL DTRSM( 'R', 'L', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DTRSM( 'L', 'U', 'N', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DTRSM( 'L', 'U', 'T', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DTRSM( 'R', 'U', 'N', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DTRSM( 'R', 'U', 'T', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DTRSM( 'L', 'L', 'N', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DTRSM( 'L', 'L', 'T', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DTRSM( 'R', 'L', 'N', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DTRSM( 'R', 'L', 'T', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL DTRSM( 'L', 'U', 'N', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL DTRSM( 'L', 'U', 'T', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL DTRSM( 'R', 'U', 'N', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL DTRSM( 'R', 'U', 'T', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL DTRSM( 'L', 'L', 'N', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL DTRSM( 'L', 'L', 'T', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL DTRSM( 'R', 'L', 'N', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL DTRSM( 'R', 'L', 'T', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 70 50 INFOT = 1 CALL DSYRK( '/', 'N', 0, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL DSYRK( 'U', '/', 0, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DSYRK( 'U', 'N', -1, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DSYRK( 'U', 'T', -1, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DSYRK( 'L', 'N', -1, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DSYRK( 'L', 'T', -1, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL DSYRK( 'U', 'N', 0, -1, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL DSYRK( 'U', 'T', 0, -1, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL DSYRK( 'L', 'N', 0, -1, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL DSYRK( 'L', 'T', 0, -1, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL DSYRK( 'U', 'N', 2, 0, ALPHA, A, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL DSYRK( 'U', 'T', 0, 2, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL DSYRK( 'L', 'N', 2, 0, ALPHA, A, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL DSYRK( 'L', 'T', 0, 2, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL DSYRK( 'U', 'N', 2, 0, ALPHA, A, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL DSYRK( 'U', 'T', 2, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL DSYRK( 'L', 'N', 2, 0, ALPHA, A, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL DSYRK( 'L', 'T', 2, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 70 60 INFOT = 1 CALL DSYR2K( '/', 'N', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL DSYR2K( 'U', '/', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DSYR2K( 'U', 'N', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DSYR2K( 'U', 'T', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DSYR2K( 'L', 'N', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL DSYR2K( 'L', 'T', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL DSYR2K( 'U', 'N', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL DSYR2K( 'U', 'T', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL DSYR2K( 'L', 'N', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL DSYR2K( 'L', 'T', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL DSYR2K( 'U', 'N', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL DSYR2K( 'U', 'T', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL DSYR2K( 'L', 'N', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL DSYR2K( 'L', 'T', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DSYR2K( 'U', 'N', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DSYR2K( 'U', 'T', 0, 2, ALPHA, A, 2, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DSYR2K( 'L', 'N', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL DSYR2K( 'L', 'T', 0, 2, ALPHA, A, 2, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL DSYR2K( 'U', 'N', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL DSYR2K( 'U', 'T', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL DSYR2K( 'L', 'N', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL DSYR2K( 'L', 'T', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) * 70 IF( OK )THEN WRITE( NOUT, FMT = 9999 )SRNAMT ELSE WRITE( NOUT, FMT = 9998 )SRNAMT END IF RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE TESTS OF ERROR-EXITS' ) 9998 FORMAT( ' ******* ', A6, ' FAILED THE TESTS OF ERROR-EXITS *****', $ '**' ) * * End of DCHKE. * END SUBROUTINE DMAKE( TYPE, UPLO, DIAG, M, N, A, NMAX, AA, LDA, RESET, $ TRANSL ) * * Generates values for an M by N matrix A. * Stores the values in the array AA in the data structure required * by the routine, with unwanted elements set to rogue value. * * TYPE is 'GE', 'SY' or 'TR'. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) DOUBLE PRECISION ROGUE PARAMETER ( ROGUE = -1.0D10 ) * .. Scalar Arguments .. DOUBLE PRECISION TRANSL INTEGER LDA, M, N, NMAX LOGICAL RESET CHARACTER*1 DIAG, UPLO CHARACTER*2 TYPE * .. Array Arguments .. DOUBLE PRECISION A( NMAX, * ), AA( * ) * .. Local Scalars .. INTEGER I, IBEG, IEND, J LOGICAL GEN, LOWER, SYM, TRI, UNIT, UPPER * .. External Functions .. DOUBLE PRECISION DBEG EXTERNAL DBEG * .. Executable Statements .. GEN = TYPE.EQ.'GE' SYM = TYPE.EQ.'SY' TRI = TYPE.EQ.'TR' UPPER = ( SYM.OR.TRI ).AND.UPLO.EQ.'U' LOWER = ( SYM.OR.TRI ).AND.UPLO.EQ.'L' UNIT = TRI.AND.DIAG.EQ.'U' * * Generate data in array A. * DO 20 J = 1, N DO 10 I = 1, M IF( GEN.OR.( UPPER.AND.I.LE.J ).OR.( LOWER.AND.I.GE.J ) ) $ THEN A( I, J ) = DBEG( RESET ) + TRANSL IF( I.NE.J )THEN * Set some elements to zero IF( N.GT.3.AND.J.EQ.N/2 ) $ A( I, J ) = ZERO IF( SYM )THEN A( J, I ) = A( I, J ) ELSE IF( TRI )THEN A( J, I ) = ZERO END IF END IF END IF 10 CONTINUE IF( TRI ) $ A( J, J ) = A( J, J ) + ONE IF( UNIT ) $ A( J, J ) = ONE 20 CONTINUE * * Store elements in array AS in data structure required by routine. * IF( TYPE.EQ.'GE' )THEN DO 50 J = 1, N DO 30 I = 1, M AA( I + ( J - 1 )*LDA ) = A( I, J ) 30 CONTINUE DO 40 I = M + 1, LDA AA( I + ( J - 1 )*LDA ) = ROGUE 40 CONTINUE 50 CONTINUE ELSE IF( TYPE.EQ.'SY'.OR.TYPE.EQ.'TR' )THEN DO 90 J = 1, N IF( UPPER )THEN IBEG = 1 IF( UNIT )THEN IEND = J - 1 ELSE IEND = J END IF ELSE IF( UNIT )THEN IBEG = J + 1 ELSE IBEG = J END IF IEND = N END IF DO 60 I = 1, IBEG - 1 AA( I + ( J - 1 )*LDA ) = ROGUE 60 CONTINUE DO 70 I = IBEG, IEND AA( I + ( J - 1 )*LDA ) = A( I, J ) 70 CONTINUE DO 80 I = IEND + 1, LDA AA( I + ( J - 1 )*LDA ) = ROGUE 80 CONTINUE 90 CONTINUE END IF RETURN * * End of DMAKE. * END SUBROUTINE DMMCH( TRANSA, TRANSB, M, N, KK, ALPHA, A, LDA, B, LDB, $ BETA, C, LDC, CT, G, CC, LDCC, EPS, ERR, FATAL, $ NOUT, MV ) * * Checks the results of the computational tests. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. DOUBLE PRECISION ZERO, ONE PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) * .. Scalar Arguments .. DOUBLE PRECISION ALPHA, BETA, EPS, ERR INTEGER KK, LDA, LDB, LDC, LDCC, M, N, NOUT LOGICAL FATAL, MV CHARACTER*1 TRANSA, TRANSB * .. Array Arguments .. DOUBLE PRECISION A( LDA, * ), B( LDB, * ), C( LDC, * ), $ CC( LDCC, * ), CT( * ), G( * ) * .. Local Scalars .. DOUBLE PRECISION ERRI INTEGER I, J, K LOGICAL TRANA, TRANB * .. Intrinsic Functions .. INTRINSIC ABS, MAX, SQRT * .. Executable Statements .. TRANA = TRANSA.EQ.'T'.OR.TRANSA.EQ.'C' TRANB = TRANSB.EQ.'T'.OR.TRANSB.EQ.'C' * * Compute expected result, one column at a time, in CT using data * in A, B and C. * Compute gauges in G. * DO 120 J = 1, N * DO 10 I = 1, M CT( I ) = ZERO G( I ) = ZERO 10 CONTINUE IF( .NOT.TRANA.AND..NOT.TRANB )THEN DO 30 K = 1, KK DO 20 I = 1, M CT( I ) = CT( I ) + A( I, K )*B( K, J ) G( I ) = G( I ) + ABS( A( I, K ) )*ABS( B( K, J ) ) 20 CONTINUE 30 CONTINUE ELSE IF( TRANA.AND..NOT.TRANB )THEN DO 50 K = 1, KK DO 40 I = 1, M CT( I ) = CT( I ) + A( K, I )*B( K, J ) G( I ) = G( I ) + ABS( A( K, I ) )*ABS( B( K, J ) ) 40 CONTINUE 50 CONTINUE ELSE IF( .NOT.TRANA.AND.TRANB )THEN DO 70 K = 1, KK DO 60 I = 1, M CT( I ) = CT( I ) + A( I, K )*B( J, K ) G( I ) = G( I ) + ABS( A( I, K ) )*ABS( B( J, K ) ) 60 CONTINUE 70 CONTINUE ELSE IF( TRANA.AND.TRANB )THEN DO 90 K = 1, KK DO 80 I = 1, M CT( I ) = CT( I ) + A( K, I )*B( J, K ) G( I ) = G( I ) + ABS( A( K, I ) )*ABS( B( J, K ) ) 80 CONTINUE 90 CONTINUE END IF DO 100 I = 1, M CT( I ) = ALPHA*CT( I ) + BETA*C( I, J ) G( I ) = ABS( ALPHA )*G( I ) + ABS( BETA )*ABS( C( I, J ) ) 100 CONTINUE * * Compute the error ratio for this result. * ERR = ZERO DO 110 I = 1, M ERRI = ABS( CT( I ) - CC( I, J ) )/EPS IF( G( I ).NE.ZERO ) $ ERRI = ERRI/G( I ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.ONE ) $ GO TO 130 110 CONTINUE * 120 CONTINUE * * If the loop completes, all results are at least half accurate. GO TO 150 * * Report fatal error. * 130 FATAL = .TRUE. WRITE( NOUT, FMT = 9999 ) DO 140 I = 1, M IF( MV )THEN WRITE( NOUT, FMT = 9998 )I, CT( I ), CC( I, J ) ELSE WRITE( NOUT, FMT = 9998 )I, CC( I, J ), CT( I ) END IF 140 CONTINUE IF( N.GT.1 ) $ WRITE( NOUT, FMT = 9997 )J * 150 CONTINUE RETURN * 9999 FORMAT( ' ******* FATAL ERROR - COMPUTED RESULT IS LESS THAN HAL', $ 'F ACCURATE *******', /' EXPECTED RESULT COMPU', $ 'TED RESULT' ) 9998 FORMAT( 1X, I7, 2G18.6 ) 9997 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) * * End of DMMCH. * END LOGICAL FUNCTION LDE( RI, RJ, LR ) * * Tests if two arrays are identical. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Scalar Arguments .. INTEGER LR * .. Array Arguments .. DOUBLE PRECISION RI( * ), RJ( * ) * .. Local Scalars .. INTEGER I * .. Executable Statements .. DO 10 I = 1, LR IF( RI( I ).NE.RJ( I ) ) $ GO TO 20 10 CONTINUE LDE = .TRUE. GO TO 30 20 CONTINUE LDE = .FALSE. 30 RETURN * * End of LDE. * END LOGICAL FUNCTION LDERES( TYPE, UPLO, M, N, AA, AS, LDA ) * * Tests if selected elements in two arrays are equal. * * TYPE is 'GE' or 'SY'. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Scalar Arguments .. INTEGER LDA, M, N CHARACTER*1 UPLO CHARACTER*2 TYPE * .. Array Arguments .. DOUBLE PRECISION AA( LDA, * ), AS( LDA, * ) * .. Local Scalars .. INTEGER I, IBEG, IEND, J LOGICAL UPPER * .. Executable Statements .. UPPER = UPLO.EQ.'U' IF( TYPE.EQ.'GE' )THEN DO 20 J = 1, N DO 10 I = M + 1, LDA IF( AA( I, J ).NE.AS( I, J ) ) $ GO TO 70 10 CONTINUE 20 CONTINUE ELSE IF( TYPE.EQ.'SY' )THEN DO 50 J = 1, N IF( UPPER )THEN IBEG = 1 IEND = J ELSE IBEG = J IEND = N END IF DO 30 I = 1, IBEG - 1 IF( AA( I, J ).NE.AS( I, J ) ) $ GO TO 70 30 CONTINUE DO 40 I = IEND + 1, LDA IF( AA( I, J ).NE.AS( I, J ) ) $ GO TO 70 40 CONTINUE 50 CONTINUE END IF * LDERES = .TRUE. GO TO 80 70 CONTINUE LDERES = .FALSE. 80 RETURN * * End of LDERES. * END DOUBLE PRECISION FUNCTION DBEG( RESET ) * * Generates random numbers uniformly distributed between -0.5 and 0.5. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Scalar Arguments .. LOGICAL RESET * .. Local Scalars .. INTEGER I, IC, MI * .. Save statement .. SAVE I, IC, MI * .. Executable Statements .. IF( RESET )THEN * Initialize local variables. MI = 891 I = 7 IC = 0 RESET = .FALSE. END IF * * The sequence of values of I is bounded between 1 and 999. * If initial I = 1,2,3,6,7 or 9, the period will be 50. * If initial I = 4 or 8, the period will be 25. * If initial I = 5, the period will be 10. * IC is used to break up the period by skipping 1 value of I in 6. * IC = IC + 1 10 I = I*MI I = I - 1000*( I/1000 ) IF( IC.GE.5 )THEN IC = 0 GO TO 10 END IF DBEG = ( I - 500 )/1001.0D0 RETURN * * End of DBEG. * END DOUBLE PRECISION FUNCTION DDIFF( X, Y ) * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Scalar Arguments .. DOUBLE PRECISION X, Y * .. Executable Statements .. DDIFF = X - Y RETURN * * End of DDIFF. * END SUBROUTINE CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) * * Tests whether XERBLA has detected an error when it should. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Scalar Arguments .. INTEGER INFOT, NOUT LOGICAL LERR, OK CHARACTER*6 SRNAMT * .. Executable Statements .. IF( .NOT.LERR )THEN WRITE( NOUT, FMT = 9999 )INFOT, SRNAMT OK = .FALSE. END IF LERR = .FALSE. RETURN * 9999 FORMAT( ' ***** ILLEGAL VALUE OF PARAMETER NUMBER ', I2, ' NOT D', $ 'ETECTED BY ', A6, ' *****' ) * * End of CHKXER. * END SUBROUTINE XERBLA( SRNAME, INFO ) * * This is a special version of XERBLA to be used only as part of * the test program for testing error exits from the Level 3 BLAS * routines. * * XERBLA is an error handler for the Level 3 BLAS routines. * * It is called by the Level 3 BLAS routines if an input parameter is * invalid. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Scalar Arguments .. INTEGER INFO CHARACTER*6 SRNAME * .. Scalars in Common .. INTEGER INFOT, NOUT LOGICAL LERR, OK CHARACTER*6 SRNAMT * .. Common blocks .. COMMON /INFOC/INFOT, NOUT, OK, LERR COMMON /SRNAMC/SRNAMT * .. Executable Statements .. LERR = .TRUE. IF( INFO.NE.INFOT )THEN IF( INFOT.NE.0 )THEN WRITE( NOUT, FMT = 9999 )INFO, INFOT ELSE WRITE( NOUT, FMT = 9997 )INFO END IF OK = .FALSE. END IF IF( SRNAME.NE.SRNAMT )THEN WRITE( NOUT, FMT = 9998 )SRNAME, SRNAMT OK = .FALSE. END IF RETURN * 9999 FORMAT( ' ******* XERBLA WAS CALLED WITH INFO = ', I6, ' INSTEAD', $ ' OF ', I2, ' *******' ) 9998 FORMAT( ' ******* XERBLA WAS CALLED WITH SRNAME = ', A6, ' INSTE', $ 'AD OF ', A6, ' *******' ) 9997 FORMAT( ' ******* XERBLA WAS CALLED WITH INFO = ', I6, $ ' *******' ) * * End of XERBLA * END blis-0.6.1/blastest/src/fortran/run-f2c.sh000077500000000000000000000060661360743507500203670ustar00rootroot00000000000000#!/bin/bash # This script converts netlib [sdcz]blat[123].f files from Fortran to C. # # Start by converting to C with f2c. # Options used: # -A Produce ANSI C (instead of old-style C). # -R Do not promote REAL functions and operations to DOUBLE PRECISION. # -a Make local variables automatic rather than static (unless they # appear in a DATA, EQUIVALENCE, NAMELIST, or SAVE statement). f2c -A -R -a *.f # Add 'const' qualifier to certain function delcarations so they match # the prototypes taken from libf2c. recursive-sed.sh -c "s/s_cmp(char \*, char/s_cmp(const char \*, const char/g" -p "*.c" recursive-sed.sh -c "s/s_copy(char \*, char/s_copy(char \*, const char/g" -p "*.c" recursive-sed.sh -c "s/d_cnjg(doublecomplex \*, doublecomplex/d_cnjg(doublecomplex *, const doublecomplex/g" -p "*.c" recursive-sed.sh -c "s/d_imag(doublecomplex/d_imag(const doublecomplex/g" -p "*.c" recursive-sed.sh -c "s/c_abs(complex/c_abs(const complex/g" -p "*.c" recursive-sed.sh -c "s/z_abs(doublecomplex/c_abs(const doublecomplex/g" -p "*.c" # Use main() and 'void' instead of MAIN__ and VOID. recursive-sed.sh -c "s/MAIN__/main/g" -p "*.c" recursive-sed.sh -c "s/VOID/void/g" -p "*.c" # Add prefix to calls to epsilon_() based on the file in which the # function is called. [sd]_epsilon_() are not libf2c functions, but # they are present in the local subset of libf2c used to link the # BLAS testsuite drivers. recursive-sed.sh -c "s/epsilon_/s_epsilon_/g" -p "[sc]*.c" recursive-sed.sh -c "s/epsilon_/d_epsilon_/g" -p "[dz]*.c" # The dsdot_() check needs s_epsilon_(), not d_epsilon_(). recursive-sed.sh -c "s/real d_epsilon_()/real s_epsilon_()/g" -p "d*1.c" recursive-sed.sh -c "s/d_epsilon_(\&c_b81)/s_epsilon_(\&c_b81)/g" -p "d*1.c" # Fix type inconsistencies in the original Fortran file vis-a-vis # epsilon() and abs(). recursive-sed.sh -c "s/real d_epsilon_(doublereal/double d_epsilon_(doublereal/g" -p "[dz]*.c" recursive-sed.sh -c "s/c_abs/z_abs/g" -p "z*.c" # Fix missing braces around struct initializers. recursive-sed.sh -c "s/equiv_3 = {/equiv_3 = {{/g" -p "[sd]*1.c" recursive-sed.sh -c "s/equiv_7 = {/equiv_7 = {{/g" -p "[sd]*1.c" recursive-sed.sh -c "s/0., 0., 0. }/0., 0., 0. }}/g" -p "d*1.c" recursive-sed.sh -c "s/2.9, .2, -4. }/2.9, .2, -4. }}/g" -p "d*1.c" recursive-sed.sh -c "s/0.f, 0.f, 0.f }/0.f, 0.f, 0.f }}/g" -p "s*1.c" recursive-sed.sh -c "s/-4.f };/-4.f }};/g" -p "s*1.c" # Convert from brain-dead f2c complex calling conventions to normal # return-based conventions. recursive-sed.sh -c "s/void cdotc_(complex \*, /complex cdotc_(/g" -p "c*1.c" recursive-sed.sh -c "s/void cdotu_(complex \*, /complex cdotu_(/g" -p "c*1.c" recursive-sed.sh -c "s/cdotc_(&q__1, /q__1 = cdotc_(/g" -p "c*1.c" recursive-sed.sh -c "s/cdotu_(&q__1, /q__1 = cdotu_(/g" -p "c*1.c" recursive-sed.sh -c "s/void zdotc_(doublecomplex \*, /doublecomplex zdotc_(/g" -p "z*1.c" recursive-sed.sh -c "s/void zdotu_(doublecomplex \*, /doublecomplex zdotu_(/g" -p "z*1.c" recursive-sed.sh -c "s/zdotc_(\&z__1, /z__1 = zdotc_(/g" -p "z*1.c" recursive-sed.sh -c "s/zdotu_(\&z__1, /z__1 = zdotu_(/g" -p "z*1.c" blis-0.6.1/blastest/src/fortran/sblat1.f000066400000000000000000001245741360743507500201160ustar00rootroot00000000000000*> \brief \b SBLAT1 * * =========== DOCUMENTATION =========== * * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * * Definition: * =========== * * PROGRAM SBLAT1 * * *> \par Purpose: * ============= *> *> \verbatim *> *> Test program for the REAL Level 1 BLAS. *> *> Based upon the original BLAS test routine together with: *> F06EAF Example Program Text *> \endverbatim * * Authors: * ======== * *> \author Univ. of Tennessee *> \author Univ. of California Berkeley *> \author Univ. of Colorado Denver *> \author NAG Ltd. * *> \date April 2012 * *> \ingroup single_blas_testing * * ===================================================================== PROGRAM SBLAT1 * * -- Reference BLAS test routine (version 3.4.1) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * April 2012 * * ===================================================================== * * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, N LOGICAL PASS * .. Local Scalars .. REAL SFAC INTEGER IC * .. External Subroutines .. EXTERNAL CHECK0, CHECK1, CHECK2, CHECK3, HEADER * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, PASS * .. Data statements .. DATA SFAC/9.765625E-4/ * .. Executable Statements .. WRITE (NOUT,99999) DO 20 IC = 1, 13 ICASE = IC CALL HEADER * * .. Initialize PASS, INCX, and INCY for a new case. .. * .. the value 9999 for INCX or INCY will appear in the .. * .. detailed output, if any, for cases that do not involve .. * .. these parameters .. * PASS = .TRUE. INCX = 9999 INCY = 9999 IF (ICASE.EQ.3 .OR. ICASE.EQ.11) THEN CALL CHECK0(SFAC) ELSE IF (ICASE.EQ.7 .OR. ICASE.EQ.8 .OR. ICASE.EQ.9 .OR. + ICASE.EQ.10) THEN CALL CHECK1(SFAC) ELSE IF (ICASE.EQ.1 .OR. ICASE.EQ.2 .OR. ICASE.EQ.5 .OR. + ICASE.EQ.6 .OR. ICASE.EQ.12 .OR. ICASE.EQ.13) THEN CALL CHECK2(SFAC) ELSE IF (ICASE.EQ.4) THEN CALL CHECK3(SFAC) END IF * -- Print IF (PASS) WRITE (NOUT,99998) 20 CONTINUE STOP * 99999 FORMAT (' Real BLAS Test Program Results',/1X) 99998 FORMAT (' ----- PASS -----') END SUBROUTINE HEADER * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, N LOGICAL PASS * .. Local Arrays .. CHARACTER*6 L(13) * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, PASS * .. Data statements .. DATA L(1)/' SDOT '/ DATA L(2)/'SAXPY '/ DATA L(3)/'SROTG '/ DATA L(4)/' SROT '/ DATA L(5)/'SCOPY '/ DATA L(6)/'SSWAP '/ DATA L(7)/'SNRM2 '/ DATA L(8)/'SASUM '/ DATA L(9)/'SSCAL '/ DATA L(10)/'ISAMAX'/ DATA L(11)/'SROTMG'/ DATA L(12)/'SROTM '/ DATA L(13)/'SDSDOT'/ * .. Executable Statements .. WRITE (NOUT,99999) ICASE, L(ICASE) RETURN * 99999 FORMAT (/' Test of subprogram number',I3,12X,A6) END SUBROUTINE CHECK0(SFAC) * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) * .. Scalar Arguments .. REAL SFAC * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, N LOGICAL PASS * .. Local Scalars .. REAL D12, SA, SB, SC, SS INTEGER I, K * .. Local Arrays .. REAL DA1(8), DATRUE(8), DB1(8), DBTRUE(8), DC1(8), + DS1(8), DAB(4,9), DTEMP(9), DTRUE(9,9) * .. External Subroutines .. EXTERNAL SROTG, SROTMG, STEST1 * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, PASS * .. Data statements .. DATA DA1/0.3E0, 0.4E0, -0.3E0, -0.4E0, -0.3E0, 0.0E0, + 0.0E0, 1.0E0/ DATA DB1/0.4E0, 0.3E0, 0.4E0, 0.3E0, -0.4E0, 0.0E0, + 1.0E0, 0.0E0/ DATA DC1/0.6E0, 0.8E0, -0.6E0, 0.8E0, 0.6E0, 1.0E0, + 0.0E0, 1.0E0/ DATA DS1/0.8E0, 0.6E0, 0.8E0, -0.6E0, 0.8E0, 0.0E0, + 1.0E0, 0.0E0/ DATA DATRUE/0.5E0, 0.5E0, 0.5E0, -0.5E0, -0.5E0, + 0.0E0, 1.0E0, 1.0E0/ DATA DBTRUE/0.0E0, 0.6E0, 0.0E0, -0.6E0, 0.0E0, + 0.0E0, 1.0E0, 0.0E0/ * INPUT FOR MODIFIED GIVENS DATA DAB/ .1E0,.3E0,1.2E0,.2E0, A .7E0, .2E0, .6E0, 4.2E0, B 0.E0,0.E0,0.E0,0.E0, C 4.E0, -1.E0, 2.E0, 4.E0, D 6.E-10, 2.E-2, 1.E5, 10.E0, E 4.E10, 2.E-2, 1.E-5, 10.E0, F 2.E-10, 4.E-2, 1.E5, 10.E0, G 2.E10, 4.E-2, 1.E-5, 10.E0, H 4.E0, -2.E0, 8.E0, 4.E0 / * TRUE RESULTS FOR MODIFIED GIVENS DATA DTRUE/0.E0,0.E0, 1.3E0, .2E0, 0.E0,0.E0,0.E0, .5E0, 0.E0, A 0.E0,0.E0, 4.5E0, 4.2E0, 1.E0, .5E0, 0.E0,0.E0,0.E0, B 0.E0,0.E0,0.E0,0.E0, -2.E0, 0.E0,0.E0,0.E0,0.E0, C 0.E0,0.E0,0.E0, 4.E0, -1.E0, 0.E0,0.E0,0.E0,0.E0, D 0.E0, 15.E-3, 0.E0, 10.E0, -1.E0, 0.E0, -1.E-4, E 0.E0, 1.E0, F 0.E0,0.E0, 6144.E-5, 10.E0, -1.E0, 4096.E0, -1.E6, G 0.E0, 1.E0, H 0.E0,0.E0,15.E0,10.E0,-1.E0, 5.E-5, 0.E0,1.E0,0.E0, I 0.E0,0.E0, 15.E0, 10.E0, -1. E0, 5.E5, -4096.E0, J 1.E0, 4096.E-6, K 0.E0,0.E0, 7.E0, 4.E0, 0.E0,0.E0, -.5E0, -.25E0, 0.E0/ * 4096 = 2 ** 12 DATA D12 /4096.E0/ DTRUE(1,1) = 12.E0 / 130.E0 DTRUE(2,1) = 36.E0 / 130.E0 DTRUE(7,1) = -1.E0 / 6.E0 DTRUE(1,2) = 14.E0 / 75.E0 DTRUE(2,2) = 49.E0 / 75.E0 DTRUE(9,2) = 1.E0 / 7.E0 DTRUE(1,5) = 45.E-11 * (D12 * D12) DTRUE(3,5) = 4.E5 / (3.E0 * D12) DTRUE(6,5) = 1.E0 / D12 DTRUE(8,5) = 1.E4 / (3.E0 * D12) DTRUE(1,6) = 4.E10 / (1.5E0 * D12 * D12) DTRUE(2,6) = 2.E-2 / 1.5E0 DTRUE(8,6) = 5.E-7 * D12 DTRUE(1,7) = 4.E0 / 150.E0 DTRUE(2,7) = (2.E-10 / 1.5E0) * (D12 * D12) DTRUE(7,7) = -DTRUE(6,5) DTRUE(9,7) = 1.E4 / D12 DTRUE(1,8) = DTRUE(1,7) DTRUE(2,8) = 2.E10 / (1.5E0 * D12 * D12) DTRUE(1,9) = 32.E0 / 7.E0 DTRUE(2,9) = -16.E0 / 7.E0 * .. Executable Statements .. * * Compute true values which cannot be prestored * in decimal notation * DBTRUE(1) = 1.0E0/0.6E0 DBTRUE(3) = -1.0E0/0.6E0 DBTRUE(5) = 1.0E0/0.6E0 * DO 20 K = 1, 8 * .. Set N=K for identification in output if any .. N = K IF (ICASE.EQ.3) THEN * .. SROTG .. IF (K.GT.8) GO TO 40 SA = DA1(K) SB = DB1(K) CALL SROTG(SA,SB,SC,SS) CALL STEST1(SA,DATRUE(K),DATRUE(K),SFAC) CALL STEST1(SB,DBTRUE(K),DBTRUE(K),SFAC) CALL STEST1(SC,DC1(K),DC1(K),SFAC) CALL STEST1(SS,DS1(K),DS1(K),SFAC) ELSEIF (ICASE.EQ.11) THEN * .. SROTMG .. DO I=1,4 DTEMP(I)= DAB(I,K) DTEMP(I+4) = 0.0 END DO DTEMP(9) = 0.0 CALL SROTMG(DTEMP(1),DTEMP(2),DTEMP(3),DTEMP(4),DTEMP(5)) CALL STEST(9,DTEMP,DTRUE(1,K),DTRUE(1,K),SFAC) ELSE WRITE (NOUT,*) ' Shouldn''t be here in CHECK0' STOP END IF 20 CONTINUE 40 RETURN END SUBROUTINE CHECK1(SFAC) * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) * .. Scalar Arguments .. REAL SFAC * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, N LOGICAL PASS * .. Local Scalars .. INTEGER I, LEN, NP1 * .. Local Arrays .. REAL DTRUE1(5), DTRUE3(5), DTRUE5(8,5,2), DV(8,5,2), + SA(10), STEMP(1), STRUE(8), SX(8) INTEGER ITRUE2(5) * .. External Functions .. REAL SASUM, SNRM2 INTEGER ISAMAX EXTERNAL SASUM, SNRM2, ISAMAX * .. External Subroutines .. EXTERNAL ITEST1, SSCAL, STEST, STEST1 * .. Intrinsic Functions .. INTRINSIC MAX * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, PASS * .. Data statements .. DATA SA/0.3E0, -1.0E0, 0.0E0, 1.0E0, 0.3E0, 0.3E0, + 0.3E0, 0.3E0, 0.3E0, 0.3E0/ DATA DV/0.1E0, 2.0E0, 2.0E0, 2.0E0, 2.0E0, 2.0E0, + 2.0E0, 2.0E0, 0.3E0, 3.0E0, 3.0E0, 3.0E0, 3.0E0, + 3.0E0, 3.0E0, 3.0E0, 0.3E0, -0.4E0, 4.0E0, + 4.0E0, 4.0E0, 4.0E0, 4.0E0, 4.0E0, 0.2E0, + -0.6E0, 0.3E0, 5.0E0, 5.0E0, 5.0E0, 5.0E0, + 5.0E0, 0.1E0, -0.3E0, 0.5E0, -0.1E0, 6.0E0, + 6.0E0, 6.0E0, 6.0E0, 0.1E0, 8.0E0, 8.0E0, 8.0E0, + 8.0E0, 8.0E0, 8.0E0, 8.0E0, 0.3E0, 9.0E0, 9.0E0, + 9.0E0, 9.0E0, 9.0E0, 9.0E0, 9.0E0, 0.3E0, 2.0E0, + -0.4E0, 2.0E0, 2.0E0, 2.0E0, 2.0E0, 2.0E0, + 0.2E0, 3.0E0, -0.6E0, 5.0E0, 0.3E0, 2.0E0, + 2.0E0, 2.0E0, 0.1E0, 4.0E0, -0.3E0, 6.0E0, + -0.5E0, 7.0E0, -0.1E0, 3.0E0/ DATA DTRUE1/0.0E0, 0.3E0, 0.5E0, 0.7E0, 0.6E0/ DATA DTRUE3/0.0E0, 0.3E0, 0.7E0, 1.1E0, 1.0E0/ DATA DTRUE5/0.10E0, 2.0E0, 2.0E0, 2.0E0, 2.0E0, + 2.0E0, 2.0E0, 2.0E0, -0.3E0, 3.0E0, 3.0E0, + 3.0E0, 3.0E0, 3.0E0, 3.0E0, 3.0E0, 0.0E0, 0.0E0, + 4.0E0, 4.0E0, 4.0E0, 4.0E0, 4.0E0, 4.0E0, + 0.20E0, -0.60E0, 0.30E0, 5.0E0, 5.0E0, 5.0E0, + 5.0E0, 5.0E0, 0.03E0, -0.09E0, 0.15E0, -0.03E0, + 6.0E0, 6.0E0, 6.0E0, 6.0E0, 0.10E0, 8.0E0, + 8.0E0, 8.0E0, 8.0E0, 8.0E0, 8.0E0, 8.0E0, + 0.09E0, 9.0E0, 9.0E0, 9.0E0, 9.0E0, 9.0E0, + 9.0E0, 9.0E0, 0.09E0, 2.0E0, -0.12E0, 2.0E0, + 2.0E0, 2.0E0, 2.0E0, 2.0E0, 0.06E0, 3.0E0, + -0.18E0, 5.0E0, 0.09E0, 2.0E0, 2.0E0, 2.0E0, + 0.03E0, 4.0E0, -0.09E0, 6.0E0, -0.15E0, 7.0E0, + -0.03E0, 3.0E0/ DATA ITRUE2/0, 1, 2, 2, 3/ * .. Executable Statements .. DO 80 INCX = 1, 2 DO 60 NP1 = 1, 5 N = NP1 - 1 LEN = 2*MAX(N,1) * .. Set vector arguments .. DO 20 I = 1, LEN SX(I) = DV(I,NP1,INCX) 20 CONTINUE * IF (ICASE.EQ.7) THEN * .. SNRM2 .. STEMP(1) = DTRUE1(NP1) CALL STEST1(SNRM2(N,SX,INCX),STEMP(1),STEMP,SFAC) ELSE IF (ICASE.EQ.8) THEN * .. SASUM .. STEMP(1) = DTRUE3(NP1) CALL STEST1(SASUM(N,SX,INCX),STEMP(1),STEMP,SFAC) ELSE IF (ICASE.EQ.9) THEN * .. SSCAL .. CALL SSCAL(N,SA((INCX-1)*5+NP1),SX,INCX) DO 40 I = 1, LEN STRUE(I) = DTRUE5(I,NP1,INCX) 40 CONTINUE CALL STEST(LEN,SX,STRUE,STRUE,SFAC) ELSE IF (ICASE.EQ.10) THEN * .. ISAMAX .. CALL ITEST1(ISAMAX(N,SX,INCX),ITRUE2(NP1)) ELSE WRITE (NOUT,*) ' Shouldn''t be here in CHECK1' STOP END IF 60 CONTINUE 80 CONTINUE RETURN END SUBROUTINE CHECK2(SFAC) * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) * .. Scalar Arguments .. REAL SFAC * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, N LOGICAL PASS * .. Local Scalars .. REAL SA INTEGER I, J, KI, KN, KNI, KPAR, KSIZE, LENX, LENY, $ MX, MY * .. Local Arrays .. REAL DT10X(7,4,4), DT10Y(7,4,4), DT7(4,4), $ DT8(7,4,4), DX1(7), $ DY1(7), SSIZE1(4), SSIZE2(14,2), SSIZE3(4), $ SSIZE(7), STX(7), STY(7), SX(7), SY(7), $ DPAR(5,4), DT19X(7,4,16),DT19XA(7,4,4), $ DT19XB(7,4,4), DT19XC(7,4,4),DT19XD(7,4,4), $ DT19Y(7,4,16), DT19YA(7,4,4),DT19YB(7,4,4), $ DT19YC(7,4,4), DT19YD(7,4,4), DTEMP(5), $ ST7B(4,4) INTEGER INCXS(4), INCYS(4), LENS(4,2), NS(4) * .. External Functions .. REAL SDOT, SDSDOT EXTERNAL SDOT, SDSDOT * .. External Subroutines .. EXTERNAL SAXPY, SCOPY, SROTM, SSWAP, STEST, STEST1 * .. Intrinsic Functions .. INTRINSIC ABS, MIN * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, PASS * .. Data statements .. EQUIVALENCE (DT19X(1,1,1),DT19XA(1,1,1)),(DT19X(1,1,5), A DT19XB(1,1,1)),(DT19X(1,1,9),DT19XC(1,1,1)), B (DT19X(1,1,13),DT19XD(1,1,1)) EQUIVALENCE (DT19Y(1,1,1),DT19YA(1,1,1)),(DT19Y(1,1,5), A DT19YB(1,1,1)),(DT19Y(1,1,9),DT19YC(1,1,1)), B (DT19Y(1,1,13),DT19YD(1,1,1)) DATA SA/0.3E0/ DATA INCXS/1, 2, -2, -1/ DATA INCYS/1, -2, 1, -2/ DATA LENS/1, 1, 2, 4, 1, 1, 3, 7/ DATA NS/0, 1, 2, 4/ DATA DX1/0.6E0, 0.1E0, -0.5E0, 0.8E0, 0.9E0, -0.3E0, + -0.4E0/ DATA DY1/0.5E0, -0.9E0, 0.3E0, 0.7E0, -0.6E0, 0.2E0, + 0.8E0/ DATA DT7/0.0E0, 0.30E0, 0.21E0, 0.62E0, 0.0E0, + 0.30E0, -0.07E0, 0.85E0, 0.0E0, 0.30E0, -0.79E0, + -0.74E0, 0.0E0, 0.30E0, 0.33E0, 1.27E0/ DATA ST7B/ .1, .4, .31, .72, .1, .4, .03, .95, + .1, .4, -.69, -.64, .1, .4, .43, 1.37/ DATA DT8/0.5E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.68E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.0E0, 0.68E0, -0.87E0, 0.0E0, 0.0E0, + 0.0E0, 0.0E0, 0.0E0, 0.68E0, -0.87E0, 0.15E0, + 0.94E0, 0.0E0, 0.0E0, 0.0E0, 0.5E0, 0.0E0, + 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.68E0, + 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.35E0, -0.9E0, 0.48E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.38E0, -0.9E0, 0.57E0, 0.7E0, -0.75E0, + 0.2E0, 0.98E0, 0.5E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.0E0, 0.0E0, 0.68E0, 0.0E0, 0.0E0, + 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.35E0, -0.72E0, + 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.38E0, + -0.63E0, 0.15E0, 0.88E0, 0.0E0, 0.0E0, 0.0E0, + 0.5E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.68E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.68E0, -0.9E0, 0.33E0, 0.0E0, 0.0E0, + 0.0E0, 0.0E0, 0.68E0, -0.9E0, 0.33E0, 0.7E0, + -0.75E0, 0.2E0, 1.04E0/ DATA DT10X/0.6E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.5E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.5E0, -0.9E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.0E0, 0.5E0, -0.9E0, 0.3E0, 0.7E0, + 0.0E0, 0.0E0, 0.0E0, 0.6E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.0E0, 0.0E0, 0.5E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.0E0, 0.0E0, 0.3E0, 0.1E0, 0.5E0, 0.0E0, + 0.0E0, 0.0E0, 0.0E0, 0.8E0, 0.1E0, -0.6E0, + 0.8E0, 0.3E0, -0.3E0, 0.5E0, 0.6E0, 0.0E0, + 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.5E0, 0.0E0, + 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, -0.9E0, + 0.1E0, 0.5E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.7E0, + 0.1E0, 0.3E0, 0.8E0, -0.9E0, -0.3E0, 0.5E0, + 0.6E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.5E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.5E0, 0.3E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.5E0, 0.3E0, -0.6E0, 0.8E0, 0.0E0, 0.0E0, + 0.0E0/ DATA DT10Y/0.5E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.6E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.6E0, 0.1E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.6E0, 0.1E0, -0.5E0, 0.8E0, 0.0E0, + 0.0E0, 0.0E0, 0.5E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.0E0, 0.6E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.0E0, -0.5E0, -0.9E0, 0.6E0, 0.0E0, + 0.0E0, 0.0E0, 0.0E0, -0.4E0, -0.9E0, 0.9E0, + 0.7E0, -0.5E0, 0.2E0, 0.6E0, 0.5E0, 0.0E0, + 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.6E0, 0.0E0, + 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, -0.5E0, + 0.6E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + -0.4E0, 0.9E0, -0.5E0, 0.6E0, 0.0E0, 0.0E0, + 0.0E0, 0.5E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.6E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.6E0, -0.9E0, 0.1E0, 0.0E0, 0.0E0, + 0.0E0, 0.0E0, 0.6E0, -0.9E0, 0.1E0, 0.7E0, + -0.5E0, 0.2E0, 0.8E0/ DATA SSIZE1/0.0E0, 0.3E0, 1.6E0, 3.2E0/ DATA SSIZE2/0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 1.17E0, 1.17E0, 1.17E0, 1.17E0, 1.17E0, + 1.17E0, 1.17E0, 1.17E0, 1.17E0, 1.17E0, 1.17E0, + 1.17E0, 1.17E0, 1.17E0/ DATA SSIZE3/ .1, .4, 1.7, 3.3 / * * FOR DROTM * DATA DPAR/-2.E0, 0.E0,0.E0,0.E0,0.E0, A -1.E0, 2.E0, -3.E0, -4.E0, 5.E0, B 0.E0, 0.E0, 2.E0, -3.E0, 0.E0, C 1.E0, 5.E0, 2.E0, 0.E0, -4.E0/ * TRUE X RESULTS F0R ROTATIONS DROTM DATA DT19XA/.6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, A .6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, B .6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, C .6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, D .6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, E -.8E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, F -.9E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, G 3.5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, H .6E0, .1E0, 0.E0,0.E0,0.E0,0.E0,0.E0, I -.8E0, 3.8E0, 0.E0,0.E0,0.E0,0.E0,0.E0, J -.9E0, 2.8E0, 0.E0,0.E0,0.E0,0.E0,0.E0, K 3.5E0, -.4E0, 0.E0,0.E0,0.E0,0.E0,0.E0, L .6E0, .1E0, -.5E0, .8E0, 0.E0,0.E0,0.E0, M -.8E0, 3.8E0, -2.2E0, -1.2E0, 0.E0,0.E0,0.E0, N -.9E0, 2.8E0, -1.4E0, -1.3E0, 0.E0,0.E0,0.E0, O 3.5E0, -.4E0, -2.2E0, 4.7E0, 0.E0,0.E0,0.E0/ * DATA DT19XB/.6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, A .6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, B .6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, C .6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, D .6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, E -.8E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, F -.9E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, G 3.5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, H .6E0, .1E0, -.5E0, 0.E0,0.E0,0.E0,0.E0, I 0.E0, .1E0, -3.0E0, 0.E0,0.E0,0.E0,0.E0, J -.3E0, .1E0, -2.0E0, 0.E0,0.E0,0.E0,0.E0, K 3.3E0, .1E0, -2.0E0, 0.E0,0.E0,0.E0,0.E0, L .6E0, .1E0, -.5E0, .8E0, .9E0, -.3E0, -.4E0, M -2.0E0, .1E0, 1.4E0, .8E0, .6E0, -.3E0, -2.8E0, N -1.8E0, .1E0, 1.3E0, .8E0, 0.E0, -.3E0, -1.9E0, O 3.8E0, .1E0, -3.1E0, .8E0, 4.8E0, -.3E0, -1.5E0 / * DATA DT19XC/.6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, A .6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, B .6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, C .6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, D .6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, E -.8E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, F -.9E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, G 3.5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, H .6E0, .1E0, -.5E0, 0.E0,0.E0,0.E0,0.E0, I 4.8E0, .1E0, -3.0E0, 0.E0,0.E0,0.E0,0.E0, J 3.3E0, .1E0, -2.0E0, 0.E0,0.E0,0.E0,0.E0, K 2.1E0, .1E0, -2.0E0, 0.E0,0.E0,0.E0,0.E0, L .6E0, .1E0, -.5E0, .8E0, .9E0, -.3E0, -.4E0, M -1.6E0, .1E0, -2.2E0, .8E0, 5.4E0, -.3E0, -2.8E0, N -1.5E0, .1E0, -1.4E0, .8E0, 3.6E0, -.3E0, -1.9E0, O 3.7E0, .1E0, -2.2E0, .8E0, 3.6E0, -.3E0, -1.5E0 / * DATA DT19XD/.6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, A .6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, B .6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, C .6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, D .6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, E -.8E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, F -.9E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, G 3.5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, H .6E0, .1E0, 0.E0,0.E0,0.E0,0.E0,0.E0, I -.8E0, -1.0E0, 0.E0,0.E0,0.E0,0.E0,0.E0, J -.9E0, -.8E0, 0.E0,0.E0,0.E0,0.E0,0.E0, K 3.5E0, .8E0, 0.E0,0.E0,0.E0,0.E0,0.E0, L .6E0, .1E0, -.5E0, .8E0, 0.E0,0.E0,0.E0, M -.8E0, -1.0E0, 1.4E0, -1.6E0, 0.E0,0.E0,0.E0, N -.9E0, -.8E0, 1.3E0, -1.6E0, 0.E0,0.E0,0.E0, O 3.5E0, .8E0, -3.1E0, 4.8E0, 0.E0,0.E0,0.E0/ * TRUE Y RESULTS FOR ROTATIONS DROTM DATA DT19YA/.5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, A .5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, B .5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, C .5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, D .5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, E .7E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, F 1.7E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, G -2.6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, H .5E0, -.9E0, 0.E0,0.E0,0.E0,0.E0,0.E0, I .7E0, -4.8E0, 0.E0,0.E0,0.E0,0.E0,0.E0, J 1.7E0, -.7E0, 0.E0,0.E0,0.E0,0.E0,0.E0, K -2.6E0, 3.5E0, 0.E0,0.E0,0.E0,0.E0,0.E0, L .5E0, -.9E0, .3E0, .7E0, 0.E0,0.E0,0.E0, M .7E0, -4.8E0, 3.0E0, 1.1E0, 0.E0,0.E0,0.E0, N 1.7E0, -.7E0, -.7E0, 2.3E0, 0.E0,0.E0,0.E0, O -2.6E0, 3.5E0, -.7E0, -3.6E0, 0.E0,0.E0,0.E0/ * DATA DT19YB/.5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, A .5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, B .5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, C .5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, D .5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, E .7E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, F 1.7E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, G -2.6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, H .5E0, -.9E0, .3E0, 0.E0,0.E0,0.E0,0.E0, I 4.0E0, -.9E0, -.3E0, 0.E0,0.E0,0.E0,0.E0, J -.5E0, -.9E0, 1.5E0, 0.E0,0.E0,0.E0,0.E0, K -1.5E0, -.9E0, -1.8E0, 0.E0,0.E0,0.E0,0.E0, L .5E0, -.9E0, .3E0, .7E0, -.6E0, .2E0, .8E0, M 3.7E0, -.9E0, -1.2E0, .7E0, -1.5E0, .2E0, 2.2E0, N -.3E0, -.9E0, 2.1E0, .7E0, -1.6E0, .2E0, 2.0E0, O -1.6E0, -.9E0, -2.1E0, .7E0, 2.9E0, .2E0, -3.8E0 / * DATA DT19YC/.5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, A .5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, B .5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, C .5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, D .5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, E .7E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, F 1.7E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, G -2.6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, H .5E0, -.9E0, 0.E0,0.E0,0.E0,0.E0,0.E0, I 4.0E0, -6.3E0, 0.E0,0.E0,0.E0,0.E0,0.E0, J -.5E0, .3E0, 0.E0,0.E0,0.E0,0.E0,0.E0, K -1.5E0, 3.0E0, 0.E0,0.E0,0.E0,0.E0,0.E0, L .5E0, -.9E0, .3E0, .7E0, 0.E0,0.E0,0.E0, M 3.7E0, -7.2E0, 3.0E0, 1.7E0, 0.E0,0.E0,0.E0, N -.3E0, .9E0, -.7E0, 1.9E0, 0.E0,0.E0,0.E0, O -1.6E0, 2.7E0, -.7E0, -3.4E0, 0.E0,0.E0,0.E0/ * DATA DT19YD/.5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, A .5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, B .5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, C .5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, D .5E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, E .7E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, F 1.7E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, G -2.6E0, 0.E0,0.E0,0.E0,0.E0,0.E0,0.E0, H .5E0, -.9E0, .3E0, 0.E0,0.E0,0.E0,0.E0, I .7E0, -.9E0, 1.2E0, 0.E0,0.E0,0.E0,0.E0, J 1.7E0, -.9E0, .5E0, 0.E0,0.E0,0.E0,0.E0, K -2.6E0, -.9E0, -1.3E0, 0.E0,0.E0,0.E0,0.E0, L .5E0, -.9E0, .3E0, .7E0, -.6E0, .2E0, .8E0, M .7E0, -.9E0, 1.2E0, .7E0, -1.5E0, .2E0, 1.6E0, N 1.7E0, -.9E0, .5E0, .7E0, -1.6E0, .2E0, 2.4E0, O -2.6E0, -.9E0, -1.3E0, .7E0, 2.9E0, .2E0, -4.0E0 / * * .. Executable Statements .. * DO 120 KI = 1, 4 INCX = INCXS(KI) INCY = INCYS(KI) MX = ABS(INCX) MY = ABS(INCY) * DO 100 KN = 1, 4 N = NS(KN) KSIZE = MIN(2,KN) LENX = LENS(KN,MX) LENY = LENS(KN,MY) * .. Initialize all argument arrays .. DO 20 I = 1, 7 SX(I) = DX1(I) SY(I) = DY1(I) 20 CONTINUE * IF (ICASE.EQ.1) THEN * .. SDOT .. CALL STEST1(SDOT(N,SX,INCX,SY,INCY),DT7(KN,KI),SSIZE1(KN) + ,SFAC) ELSE IF (ICASE.EQ.2) THEN * .. SAXPY .. CALL SAXPY(N,SA,SX,INCX,SY,INCY) DO 40 J = 1, LENY STY(J) = DT8(J,KN,KI) 40 CONTINUE CALL STEST(LENY,SY,STY,SSIZE2(1,KSIZE),SFAC) ELSE IF (ICASE.EQ.5) THEN * .. SCOPY .. DO 60 I = 1, 7 STY(I) = DT10Y(I,KN,KI) 60 CONTINUE CALL SCOPY(N,SX,INCX,SY,INCY) CALL STEST(LENY,SY,STY,SSIZE2(1,1),1.0E0) ELSE IF (ICASE.EQ.6) THEN * .. SSWAP .. CALL SSWAP(N,SX,INCX,SY,INCY) DO 80 I = 1, 7 STX(I) = DT10X(I,KN,KI) STY(I) = DT10Y(I,KN,KI) 80 CONTINUE CALL STEST(LENX,SX,STX,SSIZE2(1,1),1.0E0) CALL STEST(LENY,SY,STY,SSIZE2(1,1),1.0E0) ELSEIF (ICASE.EQ.12) THEN * .. SROTM .. KNI=KN+4*(KI-1) DO KPAR=1,4 DO I=1,7 SX(I) = DX1(I) SY(I) = DY1(I) STX(I)= DT19X(I,KPAR,KNI) STY(I)= DT19Y(I,KPAR,KNI) END DO * DO I=1,5 DTEMP(I) = DPAR(I,KPAR) END DO * DO I=1,LENX SSIZE(I)=STX(I) END DO * SEE REMARK ABOVE ABOUT DT11X(1,2,7) * AND DT11X(5,3,8). IF ((KPAR .EQ. 2) .AND. (KNI .EQ. 7)) $ SSIZE(1) = 2.4E0 IF ((KPAR .EQ. 3) .AND. (KNI .EQ. 8)) $ SSIZE(5) = 1.8E0 * CALL SROTM(N,SX,INCX,SY,INCY,DTEMP) CALL STEST(LENX,SX,STX,SSIZE,SFAC) CALL STEST(LENY,SY,STY,STY,SFAC) END DO ELSEIF (ICASE.EQ.13) THEN * .. SDSROT .. CALL STEST1 (SDSDOT(N,.1,SX,INCX,SY,INCY), $ ST7B(KN,KI),SSIZE3(KN),SFAC) ELSE WRITE (NOUT,*) ' Shouldn''t be here in CHECK2' STOP END IF 100 CONTINUE 120 CONTINUE RETURN END SUBROUTINE CHECK3(SFAC) * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) * .. Scalar Arguments .. REAL SFAC * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, N LOGICAL PASS * .. Local Scalars .. REAL SC, SS INTEGER I, K, KI, KN, KSIZE, LENX, LENY, MX, MY * .. Local Arrays .. REAL COPYX(5), COPYY(5), DT9X(7,4,4), DT9Y(7,4,4), + DX1(7), DY1(7), MWPC(11), MWPS(11), MWPSTX(5), + MWPSTY(5), MWPTX(11,5), MWPTY(11,5), MWPX(5), + MWPY(5), SSIZE2(14,2), STX(7), STY(7), SX(7), + SY(7) INTEGER INCXS(4), INCYS(4), LENS(4,2), MWPINX(11), + MWPINY(11), MWPN(11), NS(4) * .. External Subroutines .. EXTERNAL SROT, STEST * .. Intrinsic Functions .. INTRINSIC ABS, MIN * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, PASS * .. Data statements .. DATA INCXS/1, 2, -2, -1/ DATA INCYS/1, -2, 1, -2/ DATA LENS/1, 1, 2, 4, 1, 1, 3, 7/ DATA NS/0, 1, 2, 4/ DATA DX1/0.6E0, 0.1E0, -0.5E0, 0.8E0, 0.9E0, -0.3E0, + -0.4E0/ DATA DY1/0.5E0, -0.9E0, 0.3E0, 0.7E0, -0.6E0, 0.2E0, + 0.8E0/ DATA SC, SS/0.8E0, 0.6E0/ DATA DT9X/0.6E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.78E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.0E0, 0.78E0, -0.46E0, 0.0E0, 0.0E0, + 0.0E0, 0.0E0, 0.0E0, 0.78E0, -0.46E0, -0.22E0, + 1.06E0, 0.0E0, 0.0E0, 0.0E0, 0.6E0, 0.0E0, + 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.78E0, + 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.66E0, 0.1E0, -0.1E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.96E0, 0.1E0, -0.76E0, 0.8E0, 0.90E0, + -0.3E0, -0.02E0, 0.6E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.0E0, 0.0E0, 0.78E0, 0.0E0, 0.0E0, + 0.0E0, 0.0E0, 0.0E0, 0.0E0, -0.06E0, 0.1E0, + -0.1E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.90E0, + 0.1E0, -0.22E0, 0.8E0, 0.18E0, -0.3E0, -0.02E0, + 0.6E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.78E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.78E0, 0.26E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.0E0, 0.78E0, 0.26E0, -0.76E0, 1.12E0, + 0.0E0, 0.0E0, 0.0E0/ DATA DT9Y/0.5E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.04E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.0E0, 0.04E0, -0.78E0, 0.0E0, 0.0E0, + 0.0E0, 0.0E0, 0.0E0, 0.04E0, -0.78E0, 0.54E0, + 0.08E0, 0.0E0, 0.0E0, 0.0E0, 0.5E0, 0.0E0, + 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.04E0, + 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.7E0, + -0.9E0, -0.12E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.64E0, -0.9E0, -0.30E0, 0.7E0, -0.18E0, 0.2E0, + 0.28E0, 0.5E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.0E0, 0.04E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.0E0, 0.0E0, 0.7E0, -1.08E0, 0.0E0, + 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.64E0, -1.26E0, + 0.54E0, 0.20E0, 0.0E0, 0.0E0, 0.0E0, 0.5E0, + 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.04E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.04E0, -0.9E0, 0.18E0, 0.0E0, 0.0E0, + 0.0E0, 0.0E0, 0.04E0, -0.9E0, 0.18E0, 0.7E0, + -0.18E0, 0.2E0, 0.16E0/ DATA SSIZE2/0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, 0.0E0, + 0.0E0, 1.17E0, 1.17E0, 1.17E0, 1.17E0, 1.17E0, + 1.17E0, 1.17E0, 1.17E0, 1.17E0, 1.17E0, 1.17E0, + 1.17E0, 1.17E0, 1.17E0/ * .. Executable Statements .. * DO 60 KI = 1, 4 INCX = INCXS(KI) INCY = INCYS(KI) MX = ABS(INCX) MY = ABS(INCY) * DO 40 KN = 1, 4 N = NS(KN) KSIZE = MIN(2,KN) LENX = LENS(KN,MX) LENY = LENS(KN,MY) * IF (ICASE.EQ.4) THEN * .. SROT .. DO 20 I = 1, 7 SX(I) = DX1(I) SY(I) = DY1(I) STX(I) = DT9X(I,KN,KI) STY(I) = DT9Y(I,KN,KI) 20 CONTINUE CALL SROT(N,SX,INCX,SY,INCY,SC,SS) CALL STEST(LENX,SX,STX,SSIZE2(1,KSIZE),SFAC) CALL STEST(LENY,SY,STY,SSIZE2(1,KSIZE),SFAC) ELSE WRITE (NOUT,*) ' Shouldn''t be here in CHECK3' STOP END IF 40 CONTINUE 60 CONTINUE * MWPC(1) = 1 DO 80 I = 2, 11 MWPC(I) = 0 80 CONTINUE MWPS(1) = 0 DO 100 I = 2, 6 MWPS(I) = 1 100 CONTINUE DO 120 I = 7, 11 MWPS(I) = -1 120 CONTINUE MWPINX(1) = 1 MWPINX(2) = 1 MWPINX(3) = 1 MWPINX(4) = -1 MWPINX(5) = 1 MWPINX(6) = -1 MWPINX(7) = 1 MWPINX(8) = 1 MWPINX(9) = -1 MWPINX(10) = 1 MWPINX(11) = -1 MWPINY(1) = 1 MWPINY(2) = 1 MWPINY(3) = -1 MWPINY(4) = -1 MWPINY(5) = 2 MWPINY(6) = 1 MWPINY(7) = 1 MWPINY(8) = -1 MWPINY(9) = -1 MWPINY(10) = 2 MWPINY(11) = 1 DO 140 I = 1, 11 MWPN(I) = 5 140 CONTINUE MWPN(5) = 3 MWPN(10) = 3 DO 160 I = 1, 5 MWPX(I) = I MWPY(I) = I MWPTX(1,I) = I MWPTY(1,I) = I MWPTX(2,I) = I MWPTY(2,I) = -I MWPTX(3,I) = 6 - I MWPTY(3,I) = I - 6 MWPTX(4,I) = I MWPTY(4,I) = -I MWPTX(6,I) = 6 - I MWPTY(6,I) = I - 6 MWPTX(7,I) = -I MWPTY(7,I) = I MWPTX(8,I) = I - 6 MWPTY(8,I) = 6 - I MWPTX(9,I) = -I MWPTY(9,I) = I MWPTX(11,I) = I - 6 MWPTY(11,I) = 6 - I 160 CONTINUE MWPTX(5,1) = 1 MWPTX(5,2) = 3 MWPTX(5,3) = 5 MWPTX(5,4) = 4 MWPTX(5,5) = 5 MWPTY(5,1) = -1 MWPTY(5,2) = 2 MWPTY(5,3) = -2 MWPTY(5,4) = 4 MWPTY(5,5) = -3 MWPTX(10,1) = -1 MWPTX(10,2) = -3 MWPTX(10,3) = -5 MWPTX(10,4) = 4 MWPTX(10,5) = 5 MWPTY(10,1) = 1 MWPTY(10,2) = 2 MWPTY(10,3) = 2 MWPTY(10,4) = 4 MWPTY(10,5) = 3 DO 200 I = 1, 11 INCX = MWPINX(I) INCY = MWPINY(I) DO 180 K = 1, 5 COPYX(K) = MWPX(K) COPYY(K) = MWPY(K) MWPSTX(K) = MWPTX(I,K) MWPSTY(K) = MWPTY(I,K) 180 CONTINUE CALL SROT(MWPN(I),COPYX,INCX,COPYY,INCY,MWPC(I),MWPS(I)) CALL STEST(5,COPYX,MWPSTX,MWPSTX,SFAC) CALL STEST(5,COPYY,MWPSTY,MWPSTY,SFAC) 200 CONTINUE RETURN END SUBROUTINE STEST(LEN,SCOMP,STRUE,SSIZE,SFAC) * ********************************* STEST ************************** * * THIS SUBR COMPARES ARRAYS SCOMP() AND STRUE() OF LENGTH LEN TO * SEE IF THE TERM BY TERM DIFFERENCES, MULTIPLIED BY SFAC, ARE * NEGLIGIBLE. * * C. L. LAWSON, JPL, 1974 DEC 10 * * .. Parameters .. INTEGER NOUT REAL ZERO PARAMETER (NOUT=6, ZERO=0.0E0) * .. Scalar Arguments .. REAL SFAC INTEGER LEN * .. Array Arguments .. REAL SCOMP(LEN), SSIZE(LEN), STRUE(LEN) * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, N LOGICAL PASS * .. Local Scalars .. REAL SD INTEGER I * .. External Functions .. REAL SDIFF EXTERNAL SDIFF * .. Intrinsic Functions .. INTRINSIC ABS * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, PASS * .. Executable Statements .. * DO 40 I = 1, LEN SD = SCOMP(I) - STRUE(I) IF (ABS(SFAC*SD) .LE. ABS(SSIZE(I))*EPSILON(ZERO)) + GO TO 40 * * HERE SCOMP(I) IS NOT CLOSE TO STRUE(I). * IF ( .NOT. PASS) GO TO 20 * PRINT FAIL MESSAGE AND HEADER. PASS = .FALSE. WRITE (NOUT,99999) WRITE (NOUT,99998) 20 WRITE (NOUT,99997) ICASE, N, INCX, INCY, I, SCOMP(I), + STRUE(I), SD, SSIZE(I) 40 CONTINUE RETURN * 99999 FORMAT (' FAIL') 99998 FORMAT (/' CASE N INCX INCY I ', + ' COMP(I) TRUE(I) DIFFERENCE', + ' SIZE(I)',/1X) 99997 FORMAT (1X,I4,I3,2I5,I3,2E36.8,2E12.4) END SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC) * ************************* STEST1 ***************************** * * THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN * REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE * ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT. * * C.L. LAWSON, JPL, 1978 DEC 6 * * .. Scalar Arguments .. REAL SCOMP1, SFAC, STRUE1 * .. Array Arguments .. REAL SSIZE(*) * .. Local Arrays .. REAL SCOMP(1), STRUE(1) * .. External Subroutines .. EXTERNAL STEST * .. Executable Statements .. * SCOMP(1) = SCOMP1 STRUE(1) = STRUE1 CALL STEST(1,SCOMP,STRUE,SSIZE,SFAC) * RETURN END REAL FUNCTION SDIFF(SA,SB) * ********************************* SDIFF ************************** * COMPUTES DIFFERENCE OF TWO NUMBERS. C. L. LAWSON, JPL 1974 FEB 15 * * .. Scalar Arguments .. REAL SA, SB * .. Executable Statements .. SDIFF = SA - SB RETURN END SUBROUTINE ITEST1(ICOMP,ITRUE) * ********************************* ITEST1 ************************* * * THIS SUBROUTINE COMPARES THE VARIABLES ICOMP AND ITRUE FOR * EQUALITY. * C. L. LAWSON, JPL, 1974 DEC 10 * * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) * .. Scalar Arguments .. INTEGER ICOMP, ITRUE * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, N LOGICAL PASS * .. Local Scalars .. INTEGER ID * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, PASS * .. Executable Statements .. * IF (ICOMP.EQ.ITRUE) GO TO 40 * * HERE ICOMP IS NOT EQUAL TO ITRUE. * IF ( .NOT. PASS) GO TO 20 * PRINT FAIL MESSAGE AND HEADER. PASS = .FALSE. WRITE (NOUT,99999) WRITE (NOUT,99998) 20 ID = ICOMP - ITRUE WRITE (NOUT,99997) ICASE, N, INCX, INCY, ICOMP, ITRUE, ID 40 CONTINUE RETURN * 99999 FORMAT (' FAIL') 99998 FORMAT (/' CASE N INCX INCY ', + ' COMP TRUE DIFFERENCE', + /1X) 99997 FORMAT (1X,I4,I3,2I5,2I36,I12) END blis-0.6.1/blastest/src/fortran/sblat2.f000066400000000000000000003331731360743507500201140ustar00rootroot00000000000000*> \brief \b SBLAT2 * * =========== DOCUMENTATION =========== * * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * * Definition: * =========== * * PROGRAM SBLAT2 * * *> \par Purpose: * ============= *> *> \verbatim *> *> Test program for the REAL Level 2 Blas. *> *> The program must be driven by a short data file. The first 18 records *> of the file are read using list-directed input, the last 16 records *> are read using the format ( A6, L2 ). An annotated example of a data *> file can be obtained by deleting the first 3 characters from the *> following 34 lines: *> 'sblat2.out' NAME OF SUMMARY OUTPUT FILE *> 6 UNIT NUMBER OF SUMMARY FILE *> 'SBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE *> -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) *> F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. *> F LOGICAL FLAG, T TO STOP ON FAILURES. *> T LOGICAL FLAG, T TO TEST ERROR EXITS. *> 16.0 THRESHOLD VALUE OF TEST RATIO *> 6 NUMBER OF VALUES OF N *> 0 1 2 3 5 9 VALUES OF N *> 4 NUMBER OF VALUES OF K *> 0 1 2 4 VALUES OF K *> 4 NUMBER OF VALUES OF INCX AND INCY *> 1 2 -1 -2 VALUES OF INCX AND INCY *> 3 NUMBER OF VALUES OF ALPHA *> 0.0 1.0 0.7 VALUES OF ALPHA *> 3 NUMBER OF VALUES OF BETA *> 0.0 1.0 0.9 VALUES OF BETA *> SGEMV T PUT F FOR NO TEST. SAME COLUMNS. *> SGBMV T PUT F FOR NO TEST. SAME COLUMNS. *> SSYMV T PUT F FOR NO TEST. SAME COLUMNS. *> SSBMV T PUT F FOR NO TEST. SAME COLUMNS. *> SSPMV T PUT F FOR NO TEST. SAME COLUMNS. *> STRMV T PUT F FOR NO TEST. SAME COLUMNS. *> STBMV T PUT F FOR NO TEST. SAME COLUMNS. *> STPMV T PUT F FOR NO TEST. SAME COLUMNS. *> STRSV T PUT F FOR NO TEST. SAME COLUMNS. *> STBSV T PUT F FOR NO TEST. SAME COLUMNS. *> STPSV T PUT F FOR NO TEST. SAME COLUMNS. *> SGER T PUT F FOR NO TEST. SAME COLUMNS. *> SSYR T PUT F FOR NO TEST. SAME COLUMNS. *> SSPR T PUT F FOR NO TEST. SAME COLUMNS. *> SSYR2 T PUT F FOR NO TEST. SAME COLUMNS. *> SSPR2 T PUT F FOR NO TEST. SAME COLUMNS. *> *> Further Details *> =============== *> *> See: *> *> Dongarra J. J., Du Croz J. J., Hammarling S. and Hanson R. J.. *> An extended set of Fortran Basic Linear Algebra Subprograms. *> *> Technical Memoranda Nos. 41 (revision 3) and 81, Mathematics *> and Computer Science Division, Argonne National Laboratory, *> 9700 South Cass Avenue, Argonne, Illinois 60439, US. *> *> Or *> *> NAG Technical Reports TR3/87 and TR4/87, Numerical Algorithms *> Group Ltd., NAG Central Office, 256 Banbury Road, Oxford *> OX2 7DE, UK, and Numerical Algorithms Group Inc., 1101 31st *> Street, Suite 100, Downers Grove, Illinois 60515-1263, USA. *> *> *> -- Written on 10-August-1987. *> Richard Hanson, Sandia National Labs. *> Jeremy Du Croz, NAG Central Office. *> *> 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers *> can be run multiple times without deleting generated *> output files (susan) *> \endverbatim * * Authors: * ======== * *> \author Univ. of Tennessee *> \author Univ. of California Berkeley *> \author Univ. of Colorado Denver *> \author NAG Ltd. * *> \date April 2012 * *> \ingroup single_blas_testing * * ===================================================================== PROGRAM SBLAT2 * * -- Reference BLAS test routine (version 3.4.1) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * April 2012 * * ===================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 5 ) INTEGER NSUBS PARAMETER ( NSUBS = 16 ) REAL ZERO, ONE PARAMETER ( ZERO = 0.0, ONE = 1.0 ) INTEGER NMAX, INCMAX PARAMETER ( NMAX = 65, INCMAX = 2 ) INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX PARAMETER ( NINMAX = 7, NIDMAX = 9, NKBMAX = 7, $ NALMAX = 7, NBEMAX = 7 ) * .. Local Scalars .. REAL EPS, ERR, THRESH INTEGER I, ISNUM, J, N, NALF, NBET, NIDIM, NINC, NKB, $ NOUT, NTRA LOGICAL FATAL, LTESTT, REWI, SAME, SFATAL, TRACE, $ TSTERR CHARACTER*1 TRANS CHARACTER*6 SNAMET CHARACTER*32 SNAPS, SUMMRY * .. Local Arrays .. REAL A( NMAX, NMAX ), AA( NMAX*NMAX ), $ ALF( NALMAX ), AS( NMAX*NMAX ), BET( NBEMAX ), $ G( NMAX ), X( NMAX ), XS( NMAX*INCMAX ), $ XX( NMAX*INCMAX ), Y( NMAX ), $ YS( NMAX*INCMAX ), YT( NMAX ), $ YY( NMAX*INCMAX ), Z( 2*NMAX ) INTEGER IDIM( NIDMAX ), INC( NINMAX ), KB( NKBMAX ) LOGICAL LTEST( NSUBS ) CHARACTER*6 SNAMES( NSUBS ) * .. External Functions .. REAL SDIFF LOGICAL LSE EXTERNAL SDIFF, LSE * .. External Subroutines .. EXTERNAL SCHK1, SCHK2, SCHK3, SCHK4, SCHK5, SCHK6, $ SCHKE, SMVCH * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK CHARACTER*6 SRNAMT * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR COMMON /SRNAMC/SRNAMT * .. Data statements .. DATA SNAMES/'SGEMV ', 'SGBMV ', 'SSYMV ', 'SSBMV ', $ 'SSPMV ', 'STRMV ', 'STBMV ', 'STPMV ', $ 'STRSV ', 'STBSV ', 'STPSV ', 'SGER ', $ 'SSYR ', 'SSPR ', 'SSYR2 ', 'SSPR2 '/ * .. Executable Statements .. * * Read name and unit number for summary output file and open file. * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) NOUTC = NOUT * * Read name and unit number for snapshot output file and open file. * READ( NIN, FMT = * )SNAPS READ( NIN, FMT = * )NTRA TRACE = NTRA.GE.0 IF( TRACE )THEN OPEN( NTRA, FILE = SNAPS, STATUS = 'UNKNOWN' ) END IF * Read the flag that directs rewinding of the snapshot file. READ( NIN, FMT = * )REWI REWI = REWI.AND.TRACE * Read the flag that directs stopping on any failure. READ( NIN, FMT = * )SFATAL * Read the flag that indicates whether error exits are to be tested. READ( NIN, FMT = * )TSTERR * Read the threshold value of the test ratio READ( NIN, FMT = * )THRESH * * Read and check the parameter values for the tests. * * Values of N READ( NIN, FMT = * )NIDIM IF( NIDIM.LT.1.OR.NIDIM.GT.NIDMAX )THEN WRITE( NOUT, FMT = 9997 )'N', NIDMAX GO TO 230 END IF READ( NIN, FMT = * )( IDIM( I ), I = 1, NIDIM ) DO 10 I = 1, NIDIM IF( IDIM( I ).LT.0.OR.IDIM( I ).GT.NMAX )THEN WRITE( NOUT, FMT = 9996 )NMAX GO TO 230 END IF 10 CONTINUE * Values of K READ( NIN, FMT = * )NKB IF( NKB.LT.1.OR.NKB.GT.NKBMAX )THEN WRITE( NOUT, FMT = 9997 )'K', NKBMAX GO TO 230 END IF READ( NIN, FMT = * )( KB( I ), I = 1, NKB ) DO 20 I = 1, NKB IF( KB( I ).LT.0 )THEN WRITE( NOUT, FMT = 9995 ) GO TO 230 END IF 20 CONTINUE * Values of INCX and INCY READ( NIN, FMT = * )NINC IF( NINC.LT.1.OR.NINC.GT.NINMAX )THEN WRITE( NOUT, FMT = 9997 )'INCX AND INCY', NINMAX GO TO 230 END IF READ( NIN, FMT = * )( INC( I ), I = 1, NINC ) DO 30 I = 1, NINC IF( INC( I ).EQ.0.OR.ABS( INC( I ) ).GT.INCMAX )THEN WRITE( NOUT, FMT = 9994 )INCMAX GO TO 230 END IF 30 CONTINUE * Values of ALPHA READ( NIN, FMT = * )NALF IF( NALF.LT.1.OR.NALF.GT.NALMAX )THEN WRITE( NOUT, FMT = 9997 )'ALPHA', NALMAX GO TO 230 END IF READ( NIN, FMT = * )( ALF( I ), I = 1, NALF ) * Values of BETA READ( NIN, FMT = * )NBET IF( NBET.LT.1.OR.NBET.GT.NBEMAX )THEN WRITE( NOUT, FMT = 9997 )'BETA', NBEMAX GO TO 230 END IF READ( NIN, FMT = * )( BET( I ), I = 1, NBET ) * * Report values of parameters. * WRITE( NOUT, FMT = 9993 ) WRITE( NOUT, FMT = 9992 )( IDIM( I ), I = 1, NIDIM ) WRITE( NOUT, FMT = 9991 )( KB( I ), I = 1, NKB ) WRITE( NOUT, FMT = 9990 )( INC( I ), I = 1, NINC ) WRITE( NOUT, FMT = 9989 )( ALF( I ), I = 1, NALF ) WRITE( NOUT, FMT = 9988 )( BET( I ), I = 1, NBET ) IF( .NOT.TSTERR )THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9980 ) END IF WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 )THRESH WRITE( NOUT, FMT = * ) * * Read names of subroutines and flags which indicate * whether they are to be tested. * DO 40 I = 1, NSUBS LTEST( I ) = .FALSE. 40 CONTINUE 50 READ( NIN, FMT = 9984, END = 80 )SNAMET, LTESTT DO 60 I = 1, NSUBS IF( SNAMET.EQ.SNAMES( I ) ) $ GO TO 70 60 CONTINUE WRITE( NOUT, FMT = 9986 )SNAMET STOP 70 LTEST( I ) = LTESTT GO TO 50 * 80 CONTINUE CLOSE ( NIN ) * * Compute EPS (the machine precision). * EPS = EPSILON(ZERO) WRITE( NOUT, FMT = 9998 )EPS * * Check the reliability of SMVCH using exact data. * N = MIN( 32, NMAX ) DO 120 J = 1, N DO 110 I = 1, N A( I, J ) = MAX( I - J + 1, 0 ) 110 CONTINUE X( J ) = J Y( J ) = ZERO 120 CONTINUE DO 130 J = 1, N YY( J ) = J*( ( J + 1 )*J )/2 - ( ( J + 1 )*J*( J - 1 ) )/3 130 CONTINUE * YY holds the exact result. On exit from SMVCH YT holds * the result computed by SMVCH. TRANS = 'N' CALL SMVCH( TRANS, N, N, ONE, A, NMAX, X, 1, ZERO, Y, 1, YT, G, $ YY, EPS, ERR, FATAL, NOUT, .TRUE. ) SAME = LSE( YY, YT, N ) IF( .NOT.SAME.OR.ERR.NE.ZERO )THEN WRITE( NOUT, FMT = 9985 )TRANS, SAME, ERR STOP END IF TRANS = 'T' CALL SMVCH( TRANS, N, N, ONE, A, NMAX, X, -1, ZERO, Y, -1, YT, G, $ YY, EPS, ERR, FATAL, NOUT, .TRUE. ) SAME = LSE( YY, YT, N ) IF( .NOT.SAME.OR.ERR.NE.ZERO )THEN WRITE( NOUT, FMT = 9985 )TRANS, SAME, ERR STOP END IF * * Test each subroutine in turn. * DO 210 ISNUM = 1, NSUBS WRITE( NOUT, FMT = * ) IF( .NOT.LTEST( ISNUM ) )THEN * Subprogram is not to be tested. WRITE( NOUT, FMT = 9983 )SNAMES( ISNUM ) ELSE SRNAMT = SNAMES( ISNUM ) * Test error exits. IF( TSTERR )THEN CALL SCHKE( ISNUM, SNAMES( ISNUM ), NOUT ) WRITE( NOUT, FMT = * ) END IF * Test computations. INFOT = 0 OK = .TRUE. FATAL = .FALSE. GO TO ( 140, 140, 150, 150, 150, 160, 160, $ 160, 160, 160, 160, 170, 180, 180, $ 190, 190 )ISNUM * Test SGEMV, 01, and SGBMV, 02. 140 CALL SCHK1( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NKB, KB, NALF, ALF, $ NBET, BET, NINC, INC, NMAX, INCMAX, A, AA, AS, $ X, XX, XS, Y, YY, YS, YT, G ) GO TO 200 * Test SSYMV, 03, SSBMV, 04, and SSPMV, 05. 150 CALL SCHK2( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NKB, KB, NALF, ALF, $ NBET, BET, NINC, INC, NMAX, INCMAX, A, AA, AS, $ X, XX, XS, Y, YY, YS, YT, G ) GO TO 200 * Test STRMV, 06, STBMV, 07, STPMV, 08, * STRSV, 09, STBSV, 10, and STPSV, 11. 160 CALL SCHK3( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NKB, KB, NINC, INC, $ NMAX, INCMAX, A, AA, AS, Y, YY, YS, YT, G, Z ) GO TO 200 * Test SGER, 12. 170 CALL SCHK4( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NINC, INC, $ NMAX, INCMAX, A, AA, AS, X, XX, XS, Y, YY, YS, $ YT, G, Z ) GO TO 200 * Test SSYR, 13, and SSPR, 14. 180 CALL SCHK5( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NINC, INC, $ NMAX, INCMAX, A, AA, AS, X, XX, XS, Y, YY, YS, $ YT, G, Z ) GO TO 200 * Test SSYR2, 15, and SSPR2, 16. 190 CALL SCHK6( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NINC, INC, $ NMAX, INCMAX, A, AA, AS, X, XX, XS, Y, YY, YS, $ YT, G, Z ) * 200 IF( FATAL.AND.SFATAL ) $ GO TO 220 END IF 210 CONTINUE WRITE( NOUT, FMT = 9982 ) GO TO 240 * 220 CONTINUE WRITE( NOUT, FMT = 9981 ) GO TO 240 * 230 CONTINUE WRITE( NOUT, FMT = 9987 ) * 240 CONTINUE IF( TRACE ) $ CLOSE ( NTRA ) CLOSE ( NOUT ) STOP * 9999 FORMAT( ' ROUTINES PASS COMPUTATIONAL TESTS IF TEST RATIO IS LES', $ 'S THAN', F8.2 ) 9998 FORMAT( ' RELATIVE MACHINE PRECISION IS TAKEN TO BE', 1P, E9.1 ) 9997 FORMAT( ' NUMBER OF VALUES OF ', A, ' IS LESS THAN 1 OR GREATER ', $ 'THAN ', I2 ) 9996 FORMAT( ' VALUE OF N IS LESS THAN 0 OR GREATER THAN ', I2 ) 9995 FORMAT( ' VALUE OF K IS LESS THAN 0' ) 9994 FORMAT( ' ABSOLUTE VALUE OF INCX OR INCY IS 0 OR GREATER THAN ', $ I2 ) 9993 FORMAT( ' TESTS OF THE REAL LEVEL 2 BLAS', //' THE F', $ 'OLLOWING PARAMETER VALUES WILL BE USED:' ) 9992 FORMAT( ' FOR N ', 9I6 ) 9991 FORMAT( ' FOR K ', 7I6 ) 9990 FORMAT( ' FOR INCX AND INCY ', 7I6 ) 9989 FORMAT( ' FOR ALPHA ', 7F6.1 ) 9988 FORMAT( ' FOR BETA ', 7F6.1 ) 9987 FORMAT( ' AMEND DATA FILE OR INCREASE ARRAY SIZES IN PROGRAM', $ /' ******* TESTS ABANDONED *******' ) 9986 FORMAT( ' SUBPROGRAM NAME ', A6, ' NOT RECOGNIZED', /' ******* T', $ 'ESTS ABANDONED *******' ) 9985 FORMAT( ' ERROR IN SMVCH - IN-LINE DOT PRODUCTS ARE BEING EVALU', $ 'ATED WRONGLY.', /' SMVCH WAS CALLED WITH TRANS = ', A1, $ ' AND RETURNED SAME = ', L1, ' AND ERR = ', F12.3, '.', / $ ' THIS MAY BE DUE TO FAULTS IN THE ARITHMETIC OR THE COMPILER.' $ , /' ******* TESTS ABANDONED *******' ) 9984 FORMAT( A6, L2 ) 9983 FORMAT( 1X, A6, ' WAS NOT TESTED' ) 9982 FORMAT( /' END OF TESTS' ) 9981 FORMAT( /' ******* FATAL ERROR - TESTS ABANDONED *******' ) 9980 FORMAT( ' ERROR-EXITS WILL NOT BE TESTED' ) * * End of SBLAT2. * END SUBROUTINE SCHK1( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NKB, KB, NALF, ALF, NBET, $ BET, NINC, INC, NMAX, INCMAX, A, AA, AS, X, XX, $ XS, Y, YY, YS, YT, G ) * * Tests SGEMV and SGBMV. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. REAL ZERO, HALF PARAMETER ( ZERO = 0.0, HALF = 0.5 ) * .. Scalar Arguments .. REAL EPS, THRESH INTEGER INCMAX, NALF, NBET, NIDIM, NINC, NKB, NMAX, $ NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. REAL A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), BET( NBET ), G( NMAX ), $ X( NMAX ), XS( NMAX*INCMAX ), $ XX( NMAX*INCMAX ), Y( NMAX ), $ YS( NMAX*INCMAX ), YT( NMAX ), $ YY( NMAX*INCMAX ) INTEGER IDIM( NIDIM ), INC( NINC ), KB( NKB ) * .. Local Scalars .. REAL ALPHA, ALS, BETA, BLS, ERR, ERRMAX, TRANSL INTEGER I, IA, IB, IC, IKU, IM, IN, INCX, INCXS, INCY, $ INCYS, IX, IY, KL, KLS, KU, KUS, LAA, LDA, $ LDAS, LX, LY, M, ML, MS, N, NARGS, NC, ND, NK, $ NL, NS LOGICAL BANDED, FULL, NULL, RESET, SAME, TRAN CHARACTER*1 TRANS, TRANSS CHARACTER*3 ICH * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LSE, LSERES EXTERNAL LSE, LSERES * .. External Subroutines .. EXTERNAL SGBMV, SGEMV, SMAKE, SMVCH * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICH/'NTC'/ * .. Executable Statements .. FULL = SNAME( 3: 3 ).EQ.'E' BANDED = SNAME( 3: 3 ).EQ.'B' * Define the number of arguments. IF( FULL )THEN NARGS = 11 ELSE IF( BANDED )THEN NARGS = 13 END IF * NC = 0 RESET = .TRUE. ERRMAX = ZERO * DO 120 IN = 1, NIDIM N = IDIM( IN ) ND = N/2 + 1 * DO 110 IM = 1, 2 IF( IM.EQ.1 ) $ M = MAX( N - ND, 0 ) IF( IM.EQ.2 ) $ M = MIN( N + ND, NMAX ) * IF( BANDED )THEN NK = NKB ELSE NK = 1 END IF DO 100 IKU = 1, NK IF( BANDED )THEN KU = KB( IKU ) KL = MAX( KU - 1, 0 ) ELSE KU = N - 1 KL = M - 1 END IF * Set LDA to 1 more than minimum value if room. IF( BANDED )THEN LDA = KL + KU + 1 ELSE LDA = M END IF IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 100 LAA = LDA*N NULL = N.LE.0.OR.M.LE.0 * * Generate the matrix A. * TRANSL = ZERO CALL SMAKE( SNAME( 2: 3 ), ' ', ' ', M, N, A, NMAX, AA, $ LDA, KL, KU, RESET, TRANSL ) * DO 90 IC = 1, 3 TRANS = ICH( IC: IC ) TRAN = TRANS.EQ.'T'.OR.TRANS.EQ.'C' * IF( TRAN )THEN ML = N NL = M ELSE ML = M NL = N END IF * DO 80 IX = 1, NINC INCX = INC( IX ) LX = ABS( INCX )*NL * * Generate the vector X. * TRANSL = HALF CALL SMAKE( 'GE', ' ', ' ', 1, NL, X, 1, XX, $ ABS( INCX ), 0, NL - 1, RESET, TRANSL ) IF( NL.GT.1 )THEN X( NL/2 ) = ZERO XX( 1 + ABS( INCX )*( NL/2 - 1 ) ) = ZERO END IF * DO 70 IY = 1, NINC INCY = INC( IY ) LY = ABS( INCY )*ML * DO 60 IA = 1, NALF ALPHA = ALF( IA ) * DO 50 IB = 1, NBET BETA = BET( IB ) * * Generate the vector Y. * TRANSL = ZERO CALL SMAKE( 'GE', ' ', ' ', 1, ML, Y, 1, $ YY, ABS( INCY ), 0, ML - 1, $ RESET, TRANSL ) * NC = NC + 1 * * Save every datum before calling the * subroutine. * TRANSS = TRANS MS = M NS = N KLS = KL KUS = KU ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LX XS( I ) = XX( I ) 20 CONTINUE INCXS = INCX BLS = BETA DO 30 I = 1, LY YS( I ) = YY( I ) 30 CONTINUE INCYS = INCY * * Call the subroutine. * IF( FULL )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, $ TRANS, M, N, ALPHA, LDA, INCX, BETA, $ INCY IF( REWI ) $ REWIND NTRA CALL SGEMV( TRANS, M, N, ALPHA, AA, $ LDA, XX, INCX, BETA, YY, $ INCY ) ELSE IF( BANDED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ TRANS, M, N, KL, KU, ALPHA, LDA, $ INCX, BETA, INCY IF( REWI ) $ REWIND NTRA CALL SGBMV( TRANS, M, N, KL, KU, ALPHA, $ AA, LDA, XX, INCX, BETA, $ YY, INCY ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9993 ) FATAL = .TRUE. GO TO 130 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = TRANS.EQ.TRANSS ISAME( 2 ) = MS.EQ.M ISAME( 3 ) = NS.EQ.N IF( FULL )THEN ISAME( 4 ) = ALS.EQ.ALPHA ISAME( 5 ) = LSE( AS, AA, LAA ) ISAME( 6 ) = LDAS.EQ.LDA ISAME( 7 ) = LSE( XS, XX, LX ) ISAME( 8 ) = INCXS.EQ.INCX ISAME( 9 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 10 ) = LSE( YS, YY, LY ) ELSE ISAME( 10 ) = LSERES( 'GE', ' ', 1, $ ML, YS, YY, $ ABS( INCY ) ) END IF ISAME( 11 ) = INCYS.EQ.INCY ELSE IF( BANDED )THEN ISAME( 4 ) = KLS.EQ.KL ISAME( 5 ) = KUS.EQ.KU ISAME( 6 ) = ALS.EQ.ALPHA ISAME( 7 ) = LSE( AS, AA, LAA ) ISAME( 8 ) = LDAS.EQ.LDA ISAME( 9 ) = LSE( XS, XX, LX ) ISAME( 10 ) = INCXS.EQ.INCX ISAME( 11 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 12 ) = LSE( YS, YY, LY ) ELSE ISAME( 12 ) = LSERES( 'GE', ' ', 1, $ ML, YS, YY, $ ABS( INCY ) ) END IF ISAME( 13 ) = INCYS.EQ.INCY END IF * * If data was incorrectly changed, report * and return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 130 END IF * IF( .NOT.NULL )THEN * * Check the result. * CALL SMVCH( TRANS, M, N, ALPHA, A, $ NMAX, X, INCX, BETA, Y, $ INCY, YT, G, YY, EPS, ERR, $ FATAL, NOUT, .TRUE. ) ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 130 ELSE * Avoid repeating tests with M.le.0 or * N.le.0. GO TO 110 END IF * 50 CONTINUE * 60 CONTINUE * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * 120 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 140 * 130 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME IF( FULL )THEN WRITE( NOUT, FMT = 9994 )NC, SNAME, TRANS, M, N, ALPHA, LDA, $ INCX, BETA, INCY ELSE IF( BANDED )THEN WRITE( NOUT, FMT = 9995 )NC, SNAME, TRANS, M, N, KL, KU, $ ALPHA, LDA, INCX, BETA, INCY END IF * 140 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', 4( I3, ',' ), F4.1, $ ', A,', I3, ', X,', I2, ',', F4.1, ', Y,', I2, ') .' ) 9994 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', 2( I3, ',' ), F4.1, $ ', A,', I3, ', X,', I2, ',', F4.1, ', Y,', I2, $ ') .' ) 9993 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of SCHK1. * END SUBROUTINE SCHK2( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NKB, KB, NALF, ALF, NBET, $ BET, NINC, INC, NMAX, INCMAX, A, AA, AS, X, XX, $ XS, Y, YY, YS, YT, G ) * * Tests SSYMV, SSBMV and SSPMV. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. REAL ZERO, HALF PARAMETER ( ZERO = 0.0, HALF = 0.5 ) * .. Scalar Arguments .. REAL EPS, THRESH INTEGER INCMAX, NALF, NBET, NIDIM, NINC, NKB, NMAX, $ NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. REAL A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), BET( NBET ), G( NMAX ), $ X( NMAX ), XS( NMAX*INCMAX ), $ XX( NMAX*INCMAX ), Y( NMAX ), $ YS( NMAX*INCMAX ), YT( NMAX ), $ YY( NMAX*INCMAX ) INTEGER IDIM( NIDIM ), INC( NINC ), KB( NKB ) * .. Local Scalars .. REAL ALPHA, ALS, BETA, BLS, ERR, ERRMAX, TRANSL INTEGER I, IA, IB, IC, IK, IN, INCX, INCXS, INCY, $ INCYS, IX, IY, K, KS, LAA, LDA, LDAS, LX, LY, $ N, NARGS, NC, NK, NS LOGICAL BANDED, FULL, NULL, PACKED, RESET, SAME CHARACTER*1 UPLO, UPLOS CHARACTER*2 ICH * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LSE, LSERES EXTERNAL LSE, LSERES * .. External Subroutines .. EXTERNAL SMAKE, SMVCH, SSBMV, SSPMV, SSYMV * .. Intrinsic Functions .. INTRINSIC ABS, MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICH/'UL'/ * .. Executable Statements .. FULL = SNAME( 3: 3 ).EQ.'Y' BANDED = SNAME( 3: 3 ).EQ.'B' PACKED = SNAME( 3: 3 ).EQ.'P' * Define the number of arguments. IF( FULL )THEN NARGS = 10 ELSE IF( BANDED )THEN NARGS = 11 ELSE IF( PACKED )THEN NARGS = 9 END IF * NC = 0 RESET = .TRUE. ERRMAX = ZERO * DO 110 IN = 1, NIDIM N = IDIM( IN ) * IF( BANDED )THEN NK = NKB ELSE NK = 1 END IF DO 100 IK = 1, NK IF( BANDED )THEN K = KB( IK ) ELSE K = N - 1 END IF * Set LDA to 1 more than minimum value if room. IF( BANDED )THEN LDA = K + 1 ELSE LDA = N END IF IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 100 IF( PACKED )THEN LAA = ( N*( N + 1 ) )/2 ELSE LAA = LDA*N END IF NULL = N.LE.0 * DO 90 IC = 1, 2 UPLO = ICH( IC: IC ) * * Generate the matrix A. * TRANSL = ZERO CALL SMAKE( SNAME( 2: 3 ), UPLO, ' ', N, N, A, NMAX, AA, $ LDA, K, K, RESET, TRANSL ) * DO 80 IX = 1, NINC INCX = INC( IX ) LX = ABS( INCX )*N * * Generate the vector X. * TRANSL = HALF CALL SMAKE( 'GE', ' ', ' ', 1, N, X, 1, XX, $ ABS( INCX ), 0, N - 1, RESET, TRANSL ) IF( N.GT.1 )THEN X( N/2 ) = ZERO XX( 1 + ABS( INCX )*( N/2 - 1 ) ) = ZERO END IF * DO 70 IY = 1, NINC INCY = INC( IY ) LY = ABS( INCY )*N * DO 60 IA = 1, NALF ALPHA = ALF( IA ) * DO 50 IB = 1, NBET BETA = BET( IB ) * * Generate the vector Y. * TRANSL = ZERO CALL SMAKE( 'GE', ' ', ' ', 1, N, Y, 1, YY, $ ABS( INCY ), 0, N - 1, RESET, $ TRANSL ) * NC = NC + 1 * * Save every datum before calling the * subroutine. * UPLOS = UPLO NS = N KS = K ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LX XS( I ) = XX( I ) 20 CONTINUE INCXS = INCX BLS = BETA DO 30 I = 1, LY YS( I ) = YY( I ) 30 CONTINUE INCYS = INCY * * Call the subroutine. * IF( FULL )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9993 )NC, SNAME, $ UPLO, N, ALPHA, LDA, INCX, BETA, INCY IF( REWI ) $ REWIND NTRA CALL SSYMV( UPLO, N, ALPHA, AA, LDA, XX, $ INCX, BETA, YY, INCY ) ELSE IF( BANDED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, $ UPLO, N, K, ALPHA, LDA, INCX, BETA, $ INCY IF( REWI ) $ REWIND NTRA CALL SSBMV( UPLO, N, K, ALPHA, AA, LDA, $ XX, INCX, BETA, YY, INCY ) ELSE IF( PACKED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ UPLO, N, ALPHA, INCX, BETA, INCY IF( REWI ) $ REWIND NTRA CALL SSPMV( UPLO, N, ALPHA, AA, XX, INCX, $ BETA, YY, INCY ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9992 ) FATAL = .TRUE. GO TO 120 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = UPLO.EQ.UPLOS ISAME( 2 ) = NS.EQ.N IF( FULL )THEN ISAME( 3 ) = ALS.EQ.ALPHA ISAME( 4 ) = LSE( AS, AA, LAA ) ISAME( 5 ) = LDAS.EQ.LDA ISAME( 6 ) = LSE( XS, XX, LX ) ISAME( 7 ) = INCXS.EQ.INCX ISAME( 8 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 9 ) = LSE( YS, YY, LY ) ELSE ISAME( 9 ) = LSERES( 'GE', ' ', 1, N, $ YS, YY, ABS( INCY ) ) END IF ISAME( 10 ) = INCYS.EQ.INCY ELSE IF( BANDED )THEN ISAME( 3 ) = KS.EQ.K ISAME( 4 ) = ALS.EQ.ALPHA ISAME( 5 ) = LSE( AS, AA, LAA ) ISAME( 6 ) = LDAS.EQ.LDA ISAME( 7 ) = LSE( XS, XX, LX ) ISAME( 8 ) = INCXS.EQ.INCX ISAME( 9 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 10 ) = LSE( YS, YY, LY ) ELSE ISAME( 10 ) = LSERES( 'GE', ' ', 1, N, $ YS, YY, ABS( INCY ) ) END IF ISAME( 11 ) = INCYS.EQ.INCY ELSE IF( PACKED )THEN ISAME( 3 ) = ALS.EQ.ALPHA ISAME( 4 ) = LSE( AS, AA, LAA ) ISAME( 5 ) = LSE( XS, XX, LX ) ISAME( 6 ) = INCXS.EQ.INCX ISAME( 7 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 8 ) = LSE( YS, YY, LY ) ELSE ISAME( 8 ) = LSERES( 'GE', ' ', 1, N, $ YS, YY, ABS( INCY ) ) END IF ISAME( 9 ) = INCYS.EQ.INCY END IF * * If data was incorrectly changed, report and * return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 120 END IF * IF( .NOT.NULL )THEN * * Check the result. * CALL SMVCH( 'N', N, N, ALPHA, A, NMAX, X, $ INCX, BETA, Y, INCY, YT, G, $ YY, EPS, ERR, FATAL, NOUT, $ .TRUE. ) ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 120 ELSE * Avoid repeating tests with N.le.0 GO TO 110 END IF * 50 CONTINUE * 60 CONTINUE * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 130 * 120 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME IF( FULL )THEN WRITE( NOUT, FMT = 9993 )NC, SNAME, UPLO, N, ALPHA, LDA, INCX, $ BETA, INCY ELSE IF( BANDED )THEN WRITE( NOUT, FMT = 9994 )NC, SNAME, UPLO, N, K, ALPHA, LDA, $ INCX, BETA, INCY ELSE IF( PACKED )THEN WRITE( NOUT, FMT = 9995 )NC, SNAME, UPLO, N, ALPHA, INCX, $ BETA, INCY END IF * 130 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', I3, ',', F4.1, ', AP', $ ', X,', I2, ',', F4.1, ', Y,', I2, ') .' ) 9994 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', 2( I3, ',' ), F4.1, $ ', A,', I3, ', X,', I2, ',', F4.1, ', Y,', I2, $ ') .' ) 9993 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', I3, ',', F4.1, ', A,', $ I3, ', X,', I2, ',', F4.1, ', Y,', I2, ') .' ) 9992 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of SCHK2. * END SUBROUTINE SCHK3( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NKB, KB, NINC, INC, NMAX, $ INCMAX, A, AA, AS, X, XX, XS, XT, G, Z ) * * Tests STRMV, STBMV, STPMV, STRSV, STBSV and STPSV. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. REAL ZERO, HALF, ONE PARAMETER ( ZERO = 0.0, HALF = 0.5, ONE = 1.0 ) * .. Scalar Arguments .. REAL EPS, THRESH INTEGER INCMAX, NIDIM, NINC, NKB, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. REAL A( NMAX, NMAX ), AA( NMAX*NMAX ), $ AS( NMAX*NMAX ), G( NMAX ), X( NMAX ), $ XS( NMAX*INCMAX ), XT( NMAX ), $ XX( NMAX*INCMAX ), Z( NMAX ) INTEGER IDIM( NIDIM ), INC( NINC ), KB( NKB ) * .. Local Scalars .. REAL ERR, ERRMAX, TRANSL INTEGER I, ICD, ICT, ICU, IK, IN, INCX, INCXS, IX, K, $ KS, LAA, LDA, LDAS, LX, N, NARGS, NC, NK, NS LOGICAL BANDED, FULL, NULL, PACKED, RESET, SAME CHARACTER*1 DIAG, DIAGS, TRANS, TRANSS, UPLO, UPLOS CHARACTER*2 ICHD, ICHU CHARACTER*3 ICHT * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LSE, LSERES EXTERNAL LSE, LSERES * .. External Subroutines .. EXTERNAL SMAKE, SMVCH, STBMV, STBSV, STPMV, STPSV, $ STRMV, STRSV * .. Intrinsic Functions .. INTRINSIC ABS, MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICHU/'UL'/, ICHT/'NTC'/, ICHD/'UN'/ * .. Executable Statements .. FULL = SNAME( 3: 3 ).EQ.'R' BANDED = SNAME( 3: 3 ).EQ.'B' PACKED = SNAME( 3: 3 ).EQ.'P' * Define the number of arguments. IF( FULL )THEN NARGS = 8 ELSE IF( BANDED )THEN NARGS = 9 ELSE IF( PACKED )THEN NARGS = 7 END IF * NC = 0 RESET = .TRUE. ERRMAX = ZERO * Set up zero vector for SMVCH. DO 10 I = 1, NMAX Z( I ) = ZERO 10 CONTINUE * DO 110 IN = 1, NIDIM N = IDIM( IN ) * IF( BANDED )THEN NK = NKB ELSE NK = 1 END IF DO 100 IK = 1, NK IF( BANDED )THEN K = KB( IK ) ELSE K = N - 1 END IF * Set LDA to 1 more than minimum value if room. IF( BANDED )THEN LDA = K + 1 ELSE LDA = N END IF IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 100 IF( PACKED )THEN LAA = ( N*( N + 1 ) )/2 ELSE LAA = LDA*N END IF NULL = N.LE.0 * DO 90 ICU = 1, 2 UPLO = ICHU( ICU: ICU ) * DO 80 ICT = 1, 3 TRANS = ICHT( ICT: ICT ) * DO 70 ICD = 1, 2 DIAG = ICHD( ICD: ICD ) * * Generate the matrix A. * TRANSL = ZERO CALL SMAKE( SNAME( 2: 3 ), UPLO, DIAG, N, N, A, $ NMAX, AA, LDA, K, K, RESET, TRANSL ) * DO 60 IX = 1, NINC INCX = INC( IX ) LX = ABS( INCX )*N * * Generate the vector X. * TRANSL = HALF CALL SMAKE( 'GE', ' ', ' ', 1, N, X, 1, XX, $ ABS( INCX ), 0, N - 1, RESET, $ TRANSL ) IF( N.GT.1 )THEN X( N/2 ) = ZERO XX( 1 + ABS( INCX )*( N/2 - 1 ) ) = ZERO END IF * NC = NC + 1 * * Save every datum before calling the subroutine. * UPLOS = UPLO TRANSS = TRANS DIAGS = DIAG NS = N KS = K DO 20 I = 1, LAA AS( I ) = AA( I ) 20 CONTINUE LDAS = LDA DO 30 I = 1, LX XS( I ) = XX( I ) 30 CONTINUE INCXS = INCX * * Call the subroutine. * IF( SNAME( 4: 5 ).EQ.'MV' )THEN IF( FULL )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9993 )NC, SNAME, $ UPLO, TRANS, DIAG, N, LDA, INCX IF( REWI ) $ REWIND NTRA CALL STRMV( UPLO, TRANS, DIAG, N, AA, LDA, $ XX, INCX ) ELSE IF( BANDED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, $ UPLO, TRANS, DIAG, N, K, LDA, INCX IF( REWI ) $ REWIND NTRA CALL STBMV( UPLO, TRANS, DIAG, N, K, AA, $ LDA, XX, INCX ) ELSE IF( PACKED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ UPLO, TRANS, DIAG, N, INCX IF( REWI ) $ REWIND NTRA CALL STPMV( UPLO, TRANS, DIAG, N, AA, XX, $ INCX ) END IF ELSE IF( SNAME( 4: 5 ).EQ.'SV' )THEN IF( FULL )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9993 )NC, SNAME, $ UPLO, TRANS, DIAG, N, LDA, INCX IF( REWI ) $ REWIND NTRA CALL STRSV( UPLO, TRANS, DIAG, N, AA, LDA, $ XX, INCX ) ELSE IF( BANDED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, $ UPLO, TRANS, DIAG, N, K, LDA, INCX IF( REWI ) $ REWIND NTRA CALL STBSV( UPLO, TRANS, DIAG, N, K, AA, $ LDA, XX, INCX ) ELSE IF( PACKED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ UPLO, TRANS, DIAG, N, INCX IF( REWI ) $ REWIND NTRA CALL STPSV( UPLO, TRANS, DIAG, N, AA, XX, $ INCX ) END IF END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9992 ) FATAL = .TRUE. GO TO 120 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = UPLO.EQ.UPLOS ISAME( 2 ) = TRANS.EQ.TRANSS ISAME( 3 ) = DIAG.EQ.DIAGS ISAME( 4 ) = NS.EQ.N IF( FULL )THEN ISAME( 5 ) = LSE( AS, AA, LAA ) ISAME( 6 ) = LDAS.EQ.LDA IF( NULL )THEN ISAME( 7 ) = LSE( XS, XX, LX ) ELSE ISAME( 7 ) = LSERES( 'GE', ' ', 1, N, XS, $ XX, ABS( INCX ) ) END IF ISAME( 8 ) = INCXS.EQ.INCX ELSE IF( BANDED )THEN ISAME( 5 ) = KS.EQ.K ISAME( 6 ) = LSE( AS, AA, LAA ) ISAME( 7 ) = LDAS.EQ.LDA IF( NULL )THEN ISAME( 8 ) = LSE( XS, XX, LX ) ELSE ISAME( 8 ) = LSERES( 'GE', ' ', 1, N, XS, $ XX, ABS( INCX ) ) END IF ISAME( 9 ) = INCXS.EQ.INCX ELSE IF( PACKED )THEN ISAME( 5 ) = LSE( AS, AA, LAA ) IF( NULL )THEN ISAME( 6 ) = LSE( XS, XX, LX ) ELSE ISAME( 6 ) = LSERES( 'GE', ' ', 1, N, XS, $ XX, ABS( INCX ) ) END IF ISAME( 7 ) = INCXS.EQ.INCX END IF * * If data was incorrectly changed, report and * return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 120 END IF * IF( .NOT.NULL )THEN IF( SNAME( 4: 5 ).EQ.'MV' )THEN * * Check the result. * CALL SMVCH( TRANS, N, N, ONE, A, NMAX, X, $ INCX, ZERO, Z, INCX, XT, G, $ XX, EPS, ERR, FATAL, NOUT, $ .TRUE. ) ELSE IF( SNAME( 4: 5 ).EQ.'SV' )THEN * * Compute approximation to original vector. * DO 50 I = 1, N Z( I ) = XX( 1 + ( I - 1 )* $ ABS( INCX ) ) XX( 1 + ( I - 1 )*ABS( INCX ) ) $ = X( I ) 50 CONTINUE CALL SMVCH( TRANS, N, N, ONE, A, NMAX, Z, $ INCX, ZERO, X, INCX, XT, G, $ XX, EPS, ERR, FATAL, NOUT, $ .FALSE. ) END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and return. IF( FATAL ) $ GO TO 120 ELSE * Avoid repeating tests with N.le.0. GO TO 110 END IF * 60 CONTINUE * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 130 * 120 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME IF( FULL )THEN WRITE( NOUT, FMT = 9993 )NC, SNAME, UPLO, TRANS, DIAG, N, LDA, $ INCX ELSE IF( BANDED )THEN WRITE( NOUT, FMT = 9994 )NC, SNAME, UPLO, TRANS, DIAG, N, K, $ LDA, INCX ELSE IF( PACKED )THEN WRITE( NOUT, FMT = 9995 )NC, SNAME, UPLO, TRANS, DIAG, N, INCX END IF * 130 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ', A6, '(', 3( '''', A1, ''',' ), I3, ', AP, ', $ 'X,', I2, ') .' ) 9994 FORMAT( 1X, I6, ': ', A6, '(', 3( '''', A1, ''',' ), 2( I3, ',' ), $ ' A,', I3, ', X,', I2, ') .' ) 9993 FORMAT( 1X, I6, ': ', A6, '(', 3( '''', A1, ''',' ), I3, ', A,', $ I3, ', X,', I2, ') .' ) 9992 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of SCHK3. * END SUBROUTINE SCHK4( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NINC, INC, NMAX, $ INCMAX, A, AA, AS, X, XX, XS, Y, YY, YS, YT, G, $ Z ) * * Tests SGER. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. REAL ZERO, HALF, ONE PARAMETER ( ZERO = 0.0, HALF = 0.5, ONE = 1.0 ) * .. Scalar Arguments .. REAL EPS, THRESH INTEGER INCMAX, NALF, NIDIM, NINC, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. REAL A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), G( NMAX ), X( NMAX ), $ XS( NMAX*INCMAX ), XX( NMAX*INCMAX ), $ Y( NMAX ), YS( NMAX*INCMAX ), YT( NMAX ), $ YY( NMAX*INCMAX ), Z( NMAX ) INTEGER IDIM( NIDIM ), INC( NINC ) * .. Local Scalars .. REAL ALPHA, ALS, ERR, ERRMAX, TRANSL INTEGER I, IA, IM, IN, INCX, INCXS, INCY, INCYS, IX, $ IY, J, LAA, LDA, LDAS, LX, LY, M, MS, N, NARGS, $ NC, ND, NS LOGICAL NULL, RESET, SAME * .. Local Arrays .. REAL W( 1 ) LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LSE, LSERES EXTERNAL LSE, LSERES * .. External Subroutines .. EXTERNAL SGER, SMAKE, SMVCH * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Executable Statements .. * Define the number of arguments. NARGS = 9 * NC = 0 RESET = .TRUE. ERRMAX = ZERO * DO 120 IN = 1, NIDIM N = IDIM( IN ) ND = N/2 + 1 * DO 110 IM = 1, 2 IF( IM.EQ.1 ) $ M = MAX( N - ND, 0 ) IF( IM.EQ.2 ) $ M = MIN( N + ND, NMAX ) * * Set LDA to 1 more than minimum value if room. LDA = M IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 110 LAA = LDA*N NULL = N.LE.0.OR.M.LE.0 * DO 100 IX = 1, NINC INCX = INC( IX ) LX = ABS( INCX )*M * * Generate the vector X. * TRANSL = HALF CALL SMAKE( 'GE', ' ', ' ', 1, M, X, 1, XX, ABS( INCX ), $ 0, M - 1, RESET, TRANSL ) IF( M.GT.1 )THEN X( M/2 ) = ZERO XX( 1 + ABS( INCX )*( M/2 - 1 ) ) = ZERO END IF * DO 90 IY = 1, NINC INCY = INC( IY ) LY = ABS( INCY )*N * * Generate the vector Y. * TRANSL = ZERO CALL SMAKE( 'GE', ' ', ' ', 1, N, Y, 1, YY, $ ABS( INCY ), 0, N - 1, RESET, TRANSL ) IF( N.GT.1 )THEN Y( N/2 ) = ZERO YY( 1 + ABS( INCY )*( N/2 - 1 ) ) = ZERO END IF * DO 80 IA = 1, NALF ALPHA = ALF( IA ) * * Generate the matrix A. * TRANSL = ZERO CALL SMAKE( SNAME( 2: 3 ), ' ', ' ', M, N, A, NMAX, $ AA, LDA, M - 1, N - 1, RESET, TRANSL ) * NC = NC + 1 * * Save every datum before calling the subroutine. * MS = M NS = N ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LX XS( I ) = XX( I ) 20 CONTINUE INCXS = INCX DO 30 I = 1, LY YS( I ) = YY( I ) 30 CONTINUE INCYS = INCY * * Call the subroutine. * IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, M, N, $ ALPHA, INCX, INCY, LDA IF( REWI ) $ REWIND NTRA CALL SGER( M, N, ALPHA, XX, INCX, YY, INCY, AA, $ LDA ) * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9993 ) FATAL = .TRUE. GO TO 140 END IF * * See what data changed inside subroutine. * ISAME( 1 ) = MS.EQ.M ISAME( 2 ) = NS.EQ.N ISAME( 3 ) = ALS.EQ.ALPHA ISAME( 4 ) = LSE( XS, XX, LX ) ISAME( 5 ) = INCXS.EQ.INCX ISAME( 6 ) = LSE( YS, YY, LY ) ISAME( 7 ) = INCYS.EQ.INCY IF( NULL )THEN ISAME( 8 ) = LSE( AS, AA, LAA ) ELSE ISAME( 8 ) = LSERES( 'GE', ' ', M, N, AS, AA, $ LDA ) END IF ISAME( 9 ) = LDAS.EQ.LDA * * If data was incorrectly changed, report and return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 140 END IF * IF( .NOT.NULL )THEN * * Check the result column by column. * IF( INCX.GT.0 )THEN DO 50 I = 1, M Z( I ) = X( I ) 50 CONTINUE ELSE DO 60 I = 1, M Z( I ) = X( M - I + 1 ) 60 CONTINUE END IF DO 70 J = 1, N IF( INCY.GT.0 )THEN W( 1 ) = Y( J ) ELSE W( 1 ) = Y( N - J + 1 ) END IF CALL SMVCH( 'N', M, 1, ALPHA, Z, NMAX, W, 1, $ ONE, A( 1, J ), 1, YT, G, $ AA( 1 + ( J - 1 )*LDA ), EPS, $ ERR, FATAL, NOUT, .TRUE. ) ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and return. IF( FATAL ) $ GO TO 130 70 CONTINUE ELSE * Avoid repeating tests with M.le.0 or N.le.0. GO TO 110 END IF * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * 120 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 150 * 130 CONTINUE WRITE( NOUT, FMT = 9995 )J * 140 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME WRITE( NOUT, FMT = 9994 )NC, SNAME, M, N, ALPHA, INCX, INCY, LDA * 150 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) 9994 FORMAT( 1X, I6, ': ', A6, '(', 2( I3, ',' ), F4.1, ', X,', I2, $ ', Y,', I2, ', A,', I3, ') .' ) 9993 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of SCHK4. * END SUBROUTINE SCHK5( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NINC, INC, NMAX, $ INCMAX, A, AA, AS, X, XX, XS, Y, YY, YS, YT, G, $ Z ) * * Tests SSYR and SSPR. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. REAL ZERO, HALF, ONE PARAMETER ( ZERO = 0.0, HALF = 0.5, ONE = 1.0 ) * .. Scalar Arguments .. REAL EPS, THRESH INTEGER INCMAX, NALF, NIDIM, NINC, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. REAL A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), G( NMAX ), X( NMAX ), $ XS( NMAX*INCMAX ), XX( NMAX*INCMAX ), $ Y( NMAX ), YS( NMAX*INCMAX ), YT( NMAX ), $ YY( NMAX*INCMAX ), Z( NMAX ) INTEGER IDIM( NIDIM ), INC( NINC ) * .. Local Scalars .. REAL ALPHA, ALS, ERR, ERRMAX, TRANSL INTEGER I, IA, IC, IN, INCX, INCXS, IX, J, JA, JJ, LAA, $ LDA, LDAS, LJ, LX, N, NARGS, NC, NS LOGICAL FULL, NULL, PACKED, RESET, SAME, UPPER CHARACTER*1 UPLO, UPLOS CHARACTER*2 ICH * .. Local Arrays .. REAL W( 1 ) LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LSE, LSERES EXTERNAL LSE, LSERES * .. External Subroutines .. EXTERNAL SMAKE, SMVCH, SSPR, SSYR * .. Intrinsic Functions .. INTRINSIC ABS, MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICH/'UL'/ * .. Executable Statements .. FULL = SNAME( 3: 3 ).EQ.'Y' PACKED = SNAME( 3: 3 ).EQ.'P' * Define the number of arguments. IF( FULL )THEN NARGS = 7 ELSE IF( PACKED )THEN NARGS = 6 END IF * NC = 0 RESET = .TRUE. ERRMAX = ZERO * DO 100 IN = 1, NIDIM N = IDIM( IN ) * Set LDA to 1 more than minimum value if room. LDA = N IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 100 IF( PACKED )THEN LAA = ( N*( N + 1 ) )/2 ELSE LAA = LDA*N END IF * DO 90 IC = 1, 2 UPLO = ICH( IC: IC ) UPPER = UPLO.EQ.'U' * DO 80 IX = 1, NINC INCX = INC( IX ) LX = ABS( INCX )*N * * Generate the vector X. * TRANSL = HALF CALL SMAKE( 'GE', ' ', ' ', 1, N, X, 1, XX, ABS( INCX ), $ 0, N - 1, RESET, TRANSL ) IF( N.GT.1 )THEN X( N/2 ) = ZERO XX( 1 + ABS( INCX )*( N/2 - 1 ) ) = ZERO END IF * DO 70 IA = 1, NALF ALPHA = ALF( IA ) NULL = N.LE.0.OR.ALPHA.EQ.ZERO * * Generate the matrix A. * TRANSL = ZERO CALL SMAKE( SNAME( 2: 3 ), UPLO, ' ', N, N, A, NMAX, $ AA, LDA, N - 1, N - 1, RESET, TRANSL ) * NC = NC + 1 * * Save every datum before calling the subroutine. * UPLOS = UPLO NS = N ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LX XS( I ) = XX( I ) 20 CONTINUE INCXS = INCX * * Call the subroutine. * IF( FULL )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9993 )NC, SNAME, UPLO, N, $ ALPHA, INCX, LDA IF( REWI ) $ REWIND NTRA CALL SSYR( UPLO, N, ALPHA, XX, INCX, AA, LDA ) ELSE IF( PACKED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, UPLO, N, $ ALPHA, INCX IF( REWI ) $ REWIND NTRA CALL SSPR( UPLO, N, ALPHA, XX, INCX, AA ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9992 ) FATAL = .TRUE. GO TO 120 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = UPLO.EQ.UPLOS ISAME( 2 ) = NS.EQ.N ISAME( 3 ) = ALS.EQ.ALPHA ISAME( 4 ) = LSE( XS, XX, LX ) ISAME( 5 ) = INCXS.EQ.INCX IF( NULL )THEN ISAME( 6 ) = LSE( AS, AA, LAA ) ELSE ISAME( 6 ) = LSERES( SNAME( 2: 3 ), UPLO, N, N, AS, $ AA, LDA ) END IF IF( .NOT.PACKED )THEN ISAME( 7 ) = LDAS.EQ.LDA END IF * * If data was incorrectly changed, report and return. * SAME = .TRUE. DO 30 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 30 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 120 END IF * IF( .NOT.NULL )THEN * * Check the result column by column. * IF( INCX.GT.0 )THEN DO 40 I = 1, N Z( I ) = X( I ) 40 CONTINUE ELSE DO 50 I = 1, N Z( I ) = X( N - I + 1 ) 50 CONTINUE END IF JA = 1 DO 60 J = 1, N W( 1 ) = Z( J ) IF( UPPER )THEN JJ = 1 LJ = J ELSE JJ = J LJ = N - J + 1 END IF CALL SMVCH( 'N', LJ, 1, ALPHA, Z( JJ ), LJ, W, $ 1, ONE, A( JJ, J ), 1, YT, G, $ AA( JA ), EPS, ERR, FATAL, NOUT, $ .TRUE. ) IF( FULL )THEN IF( UPPER )THEN JA = JA + LDA ELSE JA = JA + LDA + 1 END IF ELSE JA = JA + LJ END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and return. IF( FATAL ) $ GO TO 110 60 CONTINUE ELSE * Avoid repeating tests if N.le.0. IF( N.LE.0 ) $ GO TO 100 END IF * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 130 * 110 CONTINUE WRITE( NOUT, FMT = 9995 )J * 120 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME IF( FULL )THEN WRITE( NOUT, FMT = 9993 )NC, SNAME, UPLO, N, ALPHA, INCX, LDA ELSE IF( PACKED )THEN WRITE( NOUT, FMT = 9994 )NC, SNAME, UPLO, N, ALPHA, INCX END IF * 130 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) 9994 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', I3, ',', F4.1, ', X,', $ I2, ', AP) .' ) 9993 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', I3, ',', F4.1, ', X,', $ I2, ', A,', I3, ') .' ) 9992 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of SCHK5. * END SUBROUTINE SCHK6( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NINC, INC, NMAX, $ INCMAX, A, AA, AS, X, XX, XS, Y, YY, YS, YT, G, $ Z ) * * Tests SSYR2 and SSPR2. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. REAL ZERO, HALF, ONE PARAMETER ( ZERO = 0.0, HALF = 0.5, ONE = 1.0 ) * .. Scalar Arguments .. REAL EPS, THRESH INTEGER INCMAX, NALF, NIDIM, NINC, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. REAL A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), G( NMAX ), X( NMAX ), $ XS( NMAX*INCMAX ), XX( NMAX*INCMAX ), $ Y( NMAX ), YS( NMAX*INCMAX ), YT( NMAX ), $ YY( NMAX*INCMAX ), Z( NMAX, 2 ) INTEGER IDIM( NIDIM ), INC( NINC ) * .. Local Scalars .. REAL ALPHA, ALS, ERR, ERRMAX, TRANSL INTEGER I, IA, IC, IN, INCX, INCXS, INCY, INCYS, IX, $ IY, J, JA, JJ, LAA, LDA, LDAS, LJ, LX, LY, N, $ NARGS, NC, NS LOGICAL FULL, NULL, PACKED, RESET, SAME, UPPER CHARACTER*1 UPLO, UPLOS CHARACTER*2 ICH * .. Local Arrays .. REAL W( 2 ) LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LSE, LSERES EXTERNAL LSE, LSERES * .. External Subroutines .. EXTERNAL SMAKE, SMVCH, SSPR2, SSYR2 * .. Intrinsic Functions .. INTRINSIC ABS, MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICH/'UL'/ * .. Executable Statements .. FULL = SNAME( 3: 3 ).EQ.'Y' PACKED = SNAME( 3: 3 ).EQ.'P' * Define the number of arguments. IF( FULL )THEN NARGS = 9 ELSE IF( PACKED )THEN NARGS = 8 END IF * NC = 0 RESET = .TRUE. ERRMAX = ZERO * DO 140 IN = 1, NIDIM N = IDIM( IN ) * Set LDA to 1 more than minimum value if room. LDA = N IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 140 IF( PACKED )THEN LAA = ( N*( N + 1 ) )/2 ELSE LAA = LDA*N END IF * DO 130 IC = 1, 2 UPLO = ICH( IC: IC ) UPPER = UPLO.EQ.'U' * DO 120 IX = 1, NINC INCX = INC( IX ) LX = ABS( INCX )*N * * Generate the vector X. * TRANSL = HALF CALL SMAKE( 'GE', ' ', ' ', 1, N, X, 1, XX, ABS( INCX ), $ 0, N - 1, RESET, TRANSL ) IF( N.GT.1 )THEN X( N/2 ) = ZERO XX( 1 + ABS( INCX )*( N/2 - 1 ) ) = ZERO END IF * DO 110 IY = 1, NINC INCY = INC( IY ) LY = ABS( INCY )*N * * Generate the vector Y. * TRANSL = ZERO CALL SMAKE( 'GE', ' ', ' ', 1, N, Y, 1, YY, $ ABS( INCY ), 0, N - 1, RESET, TRANSL ) IF( N.GT.1 )THEN Y( N/2 ) = ZERO YY( 1 + ABS( INCY )*( N/2 - 1 ) ) = ZERO END IF * DO 100 IA = 1, NALF ALPHA = ALF( IA ) NULL = N.LE.0.OR.ALPHA.EQ.ZERO * * Generate the matrix A. * TRANSL = ZERO CALL SMAKE( SNAME( 2: 3 ), UPLO, ' ', N, N, A, $ NMAX, AA, LDA, N - 1, N - 1, RESET, $ TRANSL ) * NC = NC + 1 * * Save every datum before calling the subroutine. * UPLOS = UPLO NS = N ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LX XS( I ) = XX( I ) 20 CONTINUE INCXS = INCX DO 30 I = 1, LY YS( I ) = YY( I ) 30 CONTINUE INCYS = INCY * * Call the subroutine. * IF( FULL )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9993 )NC, SNAME, UPLO, N, $ ALPHA, INCX, INCY, LDA IF( REWI ) $ REWIND NTRA CALL SSYR2( UPLO, N, ALPHA, XX, INCX, YY, INCY, $ AA, LDA ) ELSE IF( PACKED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, UPLO, N, $ ALPHA, INCX, INCY IF( REWI ) $ REWIND NTRA CALL SSPR2( UPLO, N, ALPHA, XX, INCX, YY, INCY, $ AA ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9992 ) FATAL = .TRUE. GO TO 160 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = UPLO.EQ.UPLOS ISAME( 2 ) = NS.EQ.N ISAME( 3 ) = ALS.EQ.ALPHA ISAME( 4 ) = LSE( XS, XX, LX ) ISAME( 5 ) = INCXS.EQ.INCX ISAME( 6 ) = LSE( YS, YY, LY ) ISAME( 7 ) = INCYS.EQ.INCY IF( NULL )THEN ISAME( 8 ) = LSE( AS, AA, LAA ) ELSE ISAME( 8 ) = LSERES( SNAME( 2: 3 ), UPLO, N, N, $ AS, AA, LDA ) END IF IF( .NOT.PACKED )THEN ISAME( 9 ) = LDAS.EQ.LDA END IF * * If data was incorrectly changed, report and return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 160 END IF * IF( .NOT.NULL )THEN * * Check the result column by column. * IF( INCX.GT.0 )THEN DO 50 I = 1, N Z( I, 1 ) = X( I ) 50 CONTINUE ELSE DO 60 I = 1, N Z( I, 1 ) = X( N - I + 1 ) 60 CONTINUE END IF IF( INCY.GT.0 )THEN DO 70 I = 1, N Z( I, 2 ) = Y( I ) 70 CONTINUE ELSE DO 80 I = 1, N Z( I, 2 ) = Y( N - I + 1 ) 80 CONTINUE END IF JA = 1 DO 90 J = 1, N W( 1 ) = Z( J, 2 ) W( 2 ) = Z( J, 1 ) IF( UPPER )THEN JJ = 1 LJ = J ELSE JJ = J LJ = N - J + 1 END IF CALL SMVCH( 'N', LJ, 2, ALPHA, Z( JJ, 1 ), $ NMAX, W, 1, ONE, A( JJ, J ), 1, $ YT, G, AA( JA ), EPS, ERR, FATAL, $ NOUT, .TRUE. ) IF( FULL )THEN IF( UPPER )THEN JA = JA + LDA ELSE JA = JA + LDA + 1 END IF ELSE JA = JA + LJ END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and return. IF( FATAL ) $ GO TO 150 90 CONTINUE ELSE * Avoid repeating tests with N.le.0. IF( N.LE.0 ) $ GO TO 140 END IF * 100 CONTINUE * 110 CONTINUE * 120 CONTINUE * 130 CONTINUE * 140 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 170 * 150 CONTINUE WRITE( NOUT, FMT = 9995 )J * 160 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME IF( FULL )THEN WRITE( NOUT, FMT = 9993 )NC, SNAME, UPLO, N, ALPHA, INCX, $ INCY, LDA ELSE IF( PACKED )THEN WRITE( NOUT, FMT = 9994 )NC, SNAME, UPLO, N, ALPHA, INCX, INCY END IF * 170 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) 9994 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', I3, ',', F4.1, ', X,', $ I2, ', Y,', I2, ', AP) .' ) 9993 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', I3, ',', F4.1, ', X,', $ I2, ', Y,', I2, ', A,', I3, ') .' ) 9992 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of SCHK6. * END SUBROUTINE SCHKE( ISNUM, SRNAMT, NOUT ) * * Tests the error exits from the Level 2 Blas. * Requires a special version of the error-handling routine XERBLA. * ALPHA, BETA, A, X and Y should not need to be defined. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Scalar Arguments .. INTEGER ISNUM, NOUT CHARACTER*6 SRNAMT * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Local Scalars .. REAL ALPHA, BETA * .. Local Arrays .. REAL A( 1, 1 ), X( 1 ), Y( 1 ) * .. External Subroutines .. EXTERNAL CHKXER, SGBMV, SGEMV, SGER, SSBMV, SSPMV, SSPR, $ SSPR2, SSYMV, SSYR, SSYR2, STBMV, STBSV, STPMV, $ STPSV, STRMV, STRSV * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Executable Statements .. * OK is set to .FALSE. by the special version of XERBLA or by CHKXER * if anything is wrong. OK = .TRUE. * LERR is set to .TRUE. by the special version of XERBLA each time * it is called, and is then tested and re-set by CHKXER. LERR = .FALSE. GO TO ( 10, 20, 30, 40, 50, 60, 70, 80, $ 90, 100, 110, 120, 130, 140, 150, $ 160 )ISNUM 10 INFOT = 1 CALL SGEMV( '/', 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL SGEMV( 'N', -1, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL SGEMV( 'N', 0, -1, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL SGEMV( 'N', 2, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL SGEMV( 'N', 0, 0, ALPHA, A, 1, X, 0, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL SGEMV( 'N', 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 20 INFOT = 1 CALL SGBMV( '/', 0, 0, 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL SGBMV( 'N', -1, 0, 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL SGBMV( 'N', 0, -1, 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL SGBMV( 'N', 0, 0, -1, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL SGBMV( 'N', 2, 0, 0, -1, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL SGBMV( 'N', 0, 0, 1, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL SGBMV( 'N', 0, 0, 0, 0, ALPHA, A, 1, X, 0, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL SGBMV( 'N', 0, 0, 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 30 INFOT = 1 CALL SSYMV( '/', 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL SSYMV( 'U', -1, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL SSYMV( 'U', 2, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL SSYMV( 'U', 0, ALPHA, A, 1, X, 0, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL SSYMV( 'U', 0, ALPHA, A, 1, X, 1, BETA, Y, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 40 INFOT = 1 CALL SSBMV( '/', 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL SSBMV( 'U', -1, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL SSBMV( 'U', 0, -1, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL SSBMV( 'U', 0, 1, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL SSBMV( 'U', 0, 0, ALPHA, A, 1, X, 0, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL SSBMV( 'U', 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 50 INFOT = 1 CALL SSPMV( '/', 0, ALPHA, A, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL SSPMV( 'U', -1, ALPHA, A, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL SSPMV( 'U', 0, ALPHA, A, X, 0, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL SSPMV( 'U', 0, ALPHA, A, X, 1, BETA, Y, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 60 INFOT = 1 CALL STRMV( '/', 'N', 'N', 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL STRMV( 'U', '/', 'N', 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL STRMV( 'U', 'N', '/', 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL STRMV( 'U', 'N', 'N', -1, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL STRMV( 'U', 'N', 'N', 2, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL STRMV( 'U', 'N', 'N', 0, A, 1, X, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 70 INFOT = 1 CALL STBMV( '/', 'N', 'N', 0, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL STBMV( 'U', '/', 'N', 0, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL STBMV( 'U', 'N', '/', 0, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL STBMV( 'U', 'N', 'N', -1, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL STBMV( 'U', 'N', 'N', 0, -1, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL STBMV( 'U', 'N', 'N', 0, 1, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL STBMV( 'U', 'N', 'N', 0, 0, A, 1, X, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 80 INFOT = 1 CALL STPMV( '/', 'N', 'N', 0, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL STPMV( 'U', '/', 'N', 0, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL STPMV( 'U', 'N', '/', 0, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL STPMV( 'U', 'N', 'N', -1, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL STPMV( 'U', 'N', 'N', 0, A, X, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 90 INFOT = 1 CALL STRSV( '/', 'N', 'N', 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL STRSV( 'U', '/', 'N', 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL STRSV( 'U', 'N', '/', 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL STRSV( 'U', 'N', 'N', -1, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL STRSV( 'U', 'N', 'N', 2, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL STRSV( 'U', 'N', 'N', 0, A, 1, X, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 100 INFOT = 1 CALL STBSV( '/', 'N', 'N', 0, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL STBSV( 'U', '/', 'N', 0, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL STBSV( 'U', 'N', '/', 0, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL STBSV( 'U', 'N', 'N', -1, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL STBSV( 'U', 'N', 'N', 0, -1, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL STBSV( 'U', 'N', 'N', 0, 1, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL STBSV( 'U', 'N', 'N', 0, 0, A, 1, X, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 110 INFOT = 1 CALL STPSV( '/', 'N', 'N', 0, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL STPSV( 'U', '/', 'N', 0, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL STPSV( 'U', 'N', '/', 0, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL STPSV( 'U', 'N', 'N', -1, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL STPSV( 'U', 'N', 'N', 0, A, X, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 120 INFOT = 1 CALL SGER( -1, 0, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL SGER( 0, -1, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL SGER( 0, 0, ALPHA, X, 0, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL SGER( 0, 0, ALPHA, X, 1, Y, 0, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL SGER( 2, 0, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 130 INFOT = 1 CALL SSYR( '/', 0, ALPHA, X, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL SSYR( 'U', -1, ALPHA, X, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL SSYR( 'U', 0, ALPHA, X, 0, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL SSYR( 'U', 2, ALPHA, X, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 140 INFOT = 1 CALL SSPR( '/', 0, ALPHA, X, 1, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL SSPR( 'U', -1, ALPHA, X, 1, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL SSPR( 'U', 0, ALPHA, X, 0, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 150 INFOT = 1 CALL SSYR2( '/', 0, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL SSYR2( 'U', -1, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL SSYR2( 'U', 0, ALPHA, X, 0, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL SSYR2( 'U', 0, ALPHA, X, 1, Y, 0, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL SSYR2( 'U', 2, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 170 160 INFOT = 1 CALL SSPR2( '/', 0, ALPHA, X, 1, Y, 1, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL SSPR2( 'U', -1, ALPHA, X, 1, Y, 1, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL SSPR2( 'U', 0, ALPHA, X, 0, Y, 1, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL SSPR2( 'U', 0, ALPHA, X, 1, Y, 0, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) * 170 IF( OK )THEN WRITE( NOUT, FMT = 9999 )SRNAMT ELSE WRITE( NOUT, FMT = 9998 )SRNAMT END IF RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE TESTS OF ERROR-EXITS' ) 9998 FORMAT( ' ******* ', A6, ' FAILED THE TESTS OF ERROR-EXITS *****', $ '**' ) * * End of SCHKE. * END SUBROUTINE SMAKE( TYPE, UPLO, DIAG, M, N, A, NMAX, AA, LDA, KL, $ KU, RESET, TRANSL ) * * Generates values for an M by N matrix A within the bandwidth * defined by KL and KU. * Stores the values in the array AA in the data structure required * by the routine, with unwanted elements set to rogue value. * * TYPE is 'GE', 'GB', 'SY', 'SB', 'SP', 'TR', 'TB' OR 'TP'. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. REAL ZERO, ONE PARAMETER ( ZERO = 0.0, ONE = 1.0 ) REAL ROGUE PARAMETER ( ROGUE = -1.0E10 ) * .. Scalar Arguments .. REAL TRANSL INTEGER KL, KU, LDA, M, N, NMAX LOGICAL RESET CHARACTER*1 DIAG, UPLO CHARACTER*2 TYPE * .. Array Arguments .. REAL A( NMAX, * ), AA( * ) * .. Local Scalars .. INTEGER I, I1, I2, I3, IBEG, IEND, IOFF, J, KK LOGICAL GEN, LOWER, SYM, TRI, UNIT, UPPER * .. External Functions .. REAL SBEG EXTERNAL SBEG * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. Executable Statements .. GEN = TYPE( 1: 1 ).EQ.'G' SYM = TYPE( 1: 1 ).EQ.'S' TRI = TYPE( 1: 1 ).EQ.'T' UPPER = ( SYM.OR.TRI ).AND.UPLO.EQ.'U' LOWER = ( SYM.OR.TRI ).AND.UPLO.EQ.'L' UNIT = TRI.AND.DIAG.EQ.'U' * * Generate data in array A. * DO 20 J = 1, N DO 10 I = 1, M IF( GEN.OR.( UPPER.AND.I.LE.J ).OR.( LOWER.AND.I.GE.J ) ) $ THEN IF( ( I.LE.J.AND.J - I.LE.KU ).OR. $ ( I.GE.J.AND.I - J.LE.KL ) )THEN A( I, J ) = SBEG( RESET ) + TRANSL ELSE A( I, J ) = ZERO END IF IF( I.NE.J )THEN IF( SYM )THEN A( J, I ) = A( I, J ) ELSE IF( TRI )THEN A( J, I ) = ZERO END IF END IF END IF 10 CONTINUE IF( TRI ) $ A( J, J ) = A( J, J ) + ONE IF( UNIT ) $ A( J, J ) = ONE 20 CONTINUE * * Store elements in array AS in data structure required by routine. * IF( TYPE.EQ.'GE' )THEN DO 50 J = 1, N DO 30 I = 1, M AA( I + ( J - 1 )*LDA ) = A( I, J ) 30 CONTINUE DO 40 I = M + 1, LDA AA( I + ( J - 1 )*LDA ) = ROGUE 40 CONTINUE 50 CONTINUE ELSE IF( TYPE.EQ.'GB' )THEN DO 90 J = 1, N DO 60 I1 = 1, KU + 1 - J AA( I1 + ( J - 1 )*LDA ) = ROGUE 60 CONTINUE DO 70 I2 = I1, MIN( KL + KU + 1, KU + 1 + M - J ) AA( I2 + ( J - 1 )*LDA ) = A( I2 + J - KU - 1, J ) 70 CONTINUE DO 80 I3 = I2, LDA AA( I3 + ( J - 1 )*LDA ) = ROGUE 80 CONTINUE 90 CONTINUE ELSE IF( TYPE.EQ.'SY'.OR.TYPE.EQ.'TR' )THEN DO 130 J = 1, N IF( UPPER )THEN IBEG = 1 IF( UNIT )THEN IEND = J - 1 ELSE IEND = J END IF ELSE IF( UNIT )THEN IBEG = J + 1 ELSE IBEG = J END IF IEND = N END IF DO 100 I = 1, IBEG - 1 AA( I + ( J - 1 )*LDA ) = ROGUE 100 CONTINUE DO 110 I = IBEG, IEND AA( I + ( J - 1 )*LDA ) = A( I, J ) 110 CONTINUE DO 120 I = IEND + 1, LDA AA( I + ( J - 1 )*LDA ) = ROGUE 120 CONTINUE 130 CONTINUE ELSE IF( TYPE.EQ.'SB'.OR.TYPE.EQ.'TB' )THEN DO 170 J = 1, N IF( UPPER )THEN KK = KL + 1 IBEG = MAX( 1, KL + 2 - J ) IF( UNIT )THEN IEND = KL ELSE IEND = KL + 1 END IF ELSE KK = 1 IF( UNIT )THEN IBEG = 2 ELSE IBEG = 1 END IF IEND = MIN( KL + 1, 1 + M - J ) END IF DO 140 I = 1, IBEG - 1 AA( I + ( J - 1 )*LDA ) = ROGUE 140 CONTINUE DO 150 I = IBEG, IEND AA( I + ( J - 1 )*LDA ) = A( I + J - KK, J ) 150 CONTINUE DO 160 I = IEND + 1, LDA AA( I + ( J - 1 )*LDA ) = ROGUE 160 CONTINUE 170 CONTINUE ELSE IF( TYPE.EQ.'SP'.OR.TYPE.EQ.'TP' )THEN IOFF = 0 DO 190 J = 1, N IF( UPPER )THEN IBEG = 1 IEND = J ELSE IBEG = J IEND = N END IF DO 180 I = IBEG, IEND IOFF = IOFF + 1 AA( IOFF ) = A( I, J ) IF( I.EQ.J )THEN IF( UNIT ) $ AA( IOFF ) = ROGUE END IF 180 CONTINUE 190 CONTINUE END IF RETURN * * End of SMAKE. * END SUBROUTINE SMVCH( TRANS, M, N, ALPHA, A, NMAX, X, INCX, BETA, Y, $ INCY, YT, G, YY, EPS, ERR, FATAL, NOUT, MV ) * * Checks the results of the computational tests. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. REAL ZERO, ONE PARAMETER ( ZERO = 0.0, ONE = 1.0 ) * .. Scalar Arguments .. REAL ALPHA, BETA, EPS, ERR INTEGER INCX, INCY, M, N, NMAX, NOUT LOGICAL FATAL, MV CHARACTER*1 TRANS * .. Array Arguments .. REAL A( NMAX, * ), G( * ), X( * ), Y( * ), YT( * ), $ YY( * ) * .. Local Scalars .. REAL ERRI INTEGER I, INCXL, INCYL, IY, J, JX, KX, KY, ML, NL LOGICAL TRAN * .. Intrinsic Functions .. INTRINSIC ABS, MAX, SQRT * .. Executable Statements .. TRAN = TRANS.EQ.'T'.OR.TRANS.EQ.'C' IF( TRAN )THEN ML = N NL = M ELSE ML = M NL = N END IF IF( INCX.LT.0 )THEN KX = NL INCXL = -1 ELSE KX = 1 INCXL = 1 END IF IF( INCY.LT.0 )THEN KY = ML INCYL = -1 ELSE KY = 1 INCYL = 1 END IF * * Compute expected result in YT using data in A, X and Y. * Compute gauges in G. * IY = KY DO 30 I = 1, ML YT( IY ) = ZERO G( IY ) = ZERO JX = KX IF( TRAN )THEN DO 10 J = 1, NL YT( IY ) = YT( IY ) + A( J, I )*X( JX ) G( IY ) = G( IY ) + ABS( A( J, I )*X( JX ) ) JX = JX + INCXL 10 CONTINUE ELSE DO 20 J = 1, NL YT( IY ) = YT( IY ) + A( I, J )*X( JX ) G( IY ) = G( IY ) + ABS( A( I, J )*X( JX ) ) JX = JX + INCXL 20 CONTINUE END IF YT( IY ) = ALPHA*YT( IY ) + BETA*Y( IY ) G( IY ) = ABS( ALPHA )*G( IY ) + ABS( BETA*Y( IY ) ) IY = IY + INCYL 30 CONTINUE * * Compute the error ratio for this result. * ERR = ZERO DO 40 I = 1, ML ERRI = ABS( YT( I ) - YY( 1 + ( I - 1 )*ABS( INCY ) ) )/EPS IF( G( I ).NE.ZERO ) $ ERRI = ERRI/G( I ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.ONE ) $ GO TO 50 40 CONTINUE * If the loop completes, all results are at least half accurate. GO TO 70 * * Report fatal error. * 50 FATAL = .TRUE. WRITE( NOUT, FMT = 9999 ) DO 60 I = 1, ML IF( MV )THEN WRITE( NOUT, FMT = 9998 )I, YT( I ), $ YY( 1 + ( I - 1 )*ABS( INCY ) ) ELSE WRITE( NOUT, FMT = 9998 )I, $ YY( 1 + ( I - 1 )*ABS( INCY ) ), YT(I) END IF 60 CONTINUE * 70 CONTINUE RETURN * 9999 FORMAT( ' ******* FATAL ERROR - COMPUTED RESULT IS LESS THAN HAL', $ 'F ACCURATE *******', /' EXPECTED RESULT COMPU', $ 'TED RESULT' ) 9998 FORMAT( 1X, I7, 2G18.6 ) * * End of SMVCH. * END LOGICAL FUNCTION LSE( RI, RJ, LR ) * * Tests if two arrays are identical. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Scalar Arguments .. INTEGER LR * .. Array Arguments .. REAL RI( * ), RJ( * ) * .. Local Scalars .. INTEGER I * .. Executable Statements .. DO 10 I = 1, LR IF( RI( I ).NE.RJ( I ) ) $ GO TO 20 10 CONTINUE LSE = .TRUE. GO TO 30 20 CONTINUE LSE = .FALSE. 30 RETURN * * End of LSE. * END LOGICAL FUNCTION LSERES( TYPE, UPLO, M, N, AA, AS, LDA ) * * Tests if selected elements in two arrays are equal. * * TYPE is 'GE', 'SY' or 'SP'. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Scalar Arguments .. INTEGER LDA, M, N CHARACTER*1 UPLO CHARACTER*2 TYPE * .. Array Arguments .. REAL AA( LDA, * ), AS( LDA, * ) * .. Local Scalars .. INTEGER I, IBEG, IEND, J LOGICAL UPPER * .. Executable Statements .. UPPER = UPLO.EQ.'U' IF( TYPE.EQ.'GE' )THEN DO 20 J = 1, N DO 10 I = M + 1, LDA IF( AA( I, J ).NE.AS( I, J ) ) $ GO TO 70 10 CONTINUE 20 CONTINUE ELSE IF( TYPE.EQ.'SY' )THEN DO 50 J = 1, N IF( UPPER )THEN IBEG = 1 IEND = J ELSE IBEG = J IEND = N END IF DO 30 I = 1, IBEG - 1 IF( AA( I, J ).NE.AS( I, J ) ) $ GO TO 70 30 CONTINUE DO 40 I = IEND + 1, LDA IF( AA( I, J ).NE.AS( I, J ) ) $ GO TO 70 40 CONTINUE 50 CONTINUE END IF * LSERES = .TRUE. GO TO 80 70 CONTINUE LSERES = .FALSE. 80 RETURN * * End of LSERES. * END REAL FUNCTION SBEG( RESET ) * * Generates random numbers uniformly distributed between -0.5 and 0.5. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Scalar Arguments .. LOGICAL RESET * .. Local Scalars .. INTEGER I, IC, MI * .. Save statement .. SAVE I, IC, MI * .. Intrinsic Functions .. INTRINSIC REAL * .. Executable Statements .. IF( RESET )THEN * Initialize local variables. MI = 891 I = 7 IC = 0 RESET = .FALSE. END IF * * The sequence of values of I is bounded between 1 and 999. * If initial I = 1,2,3,6,7 or 9, the period will be 50. * If initial I = 4 or 8, the period will be 25. * If initial I = 5, the period will be 10. * IC is used to break up the period by skipping 1 value of I in 6. * IC = IC + 1 10 I = I*MI I = I - 1000*( I/1000 ) IF( IC.GE.5 )THEN IC = 0 GO TO 10 END IF SBEG = REAL( I - 500 )/1001.0 RETURN * * End of SBEG. * END REAL FUNCTION SDIFF( X, Y ) * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * * .. Scalar Arguments .. REAL X, Y * .. Executable Statements .. SDIFF = X - Y RETURN * * End of SDIFF. * END SUBROUTINE CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) * * Tests whether XERBLA has detected an error when it should. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Scalar Arguments .. INTEGER INFOT, NOUT LOGICAL LERR, OK CHARACTER*6 SRNAMT * .. Executable Statements .. IF( .NOT.LERR )THEN WRITE( NOUT, FMT = 9999 )INFOT, SRNAMT OK = .FALSE. END IF LERR = .FALSE. RETURN * 9999 FORMAT( ' ***** ILLEGAL VALUE OF PARAMETER NUMBER ', I2, ' NOT D', $ 'ETECTED BY ', A6, ' *****' ) * * End of CHKXER. * END SUBROUTINE XERBLA( SRNAME, INFO ) * * This is a special version of XERBLA to be used only as part of * the test program for testing error exits from the Level 2 BLAS * routines. * * XERBLA is an error handler for the Level 2 BLAS routines. * * It is called by the Level 2 BLAS routines if an input parameter is * invalid. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Scalar Arguments .. INTEGER INFO CHARACTER*6 SRNAME * .. Scalars in Common .. INTEGER INFOT, NOUT LOGICAL LERR, OK CHARACTER*6 SRNAMT * .. Common blocks .. COMMON /INFOC/INFOT, NOUT, OK, LERR COMMON /SRNAMC/SRNAMT * .. Executable Statements .. LERR = .TRUE. IF( INFO.NE.INFOT )THEN IF( INFOT.NE.0 )THEN WRITE( NOUT, FMT = 9999 )INFO, INFOT ELSE WRITE( NOUT, FMT = 9997 )INFO END IF OK = .FALSE. END IF IF( SRNAME.NE.SRNAMT )THEN WRITE( NOUT, FMT = 9998 )SRNAME, SRNAMT OK = .FALSE. END IF RETURN * 9999 FORMAT( ' ******* XERBLA WAS CALLED WITH INFO = ', I6, ' INSTEAD', $ ' OF ', I2, ' *******' ) 9998 FORMAT( ' ******* XERBLA WAS CALLED WITH SRNAME = ', A6, ' INSTE', $ 'AD OF ', A6, ' *******' ) 9997 FORMAT( ' ******* XERBLA WAS CALLED WITH INFO = ', I6, $ ' *******' ) * * End of XERBLA * END blis-0.6.1/blastest/src/fortran/sblat3.f000066400000000000000000003133541360743507500201140ustar00rootroot00000000000000*> \brief \b SBLAT3 * * =========== DOCUMENTATION =========== * * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * * Definition: * =========== * * PROGRAM SBLAT3 * * *> \par Purpose: * ============= *> *> \verbatim *> *> Test program for the REAL Level 3 Blas. *> *> The program must be driven by a short data file. The first 14 records *> of the file are read using list-directed input, the last 6 records *> are read using the format ( A6, L2 ). An annotated example of a data *> file can be obtained by deleting the first 3 characters from the *> following 20 lines: *> 'sblat3.out' NAME OF SUMMARY OUTPUT FILE *> 6 UNIT NUMBER OF SUMMARY FILE *> 'SBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE *> -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) *> F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. *> F LOGICAL FLAG, T TO STOP ON FAILURES. *> T LOGICAL FLAG, T TO TEST ERROR EXITS. *> 16.0 THRESHOLD VALUE OF TEST RATIO *> 6 NUMBER OF VALUES OF N *> 0 1 2 3 5 9 VALUES OF N *> 3 NUMBER OF VALUES OF ALPHA *> 0.0 1.0 0.7 VALUES OF ALPHA *> 3 NUMBER OF VALUES OF BETA *> 0.0 1.0 1.3 VALUES OF BETA *> SGEMM T PUT F FOR NO TEST. SAME COLUMNS. *> SSYMM T PUT F FOR NO TEST. SAME COLUMNS. *> STRMM T PUT F FOR NO TEST. SAME COLUMNS. *> STRSM T PUT F FOR NO TEST. SAME COLUMNS. *> SSYRK T PUT F FOR NO TEST. SAME COLUMNS. *> SSYR2K T PUT F FOR NO TEST. SAME COLUMNS. *> *> Further Details *> =============== *> *> See: *> *> Dongarra J. J., Du Croz J. J., Duff I. S. and Hammarling S. *> A Set of Level 3 Basic Linear Algebra Subprograms. *> *> Technical Memorandum No.88 (Revision 1), Mathematics and *> Computer Science Division, Argonne National Laboratory, 9700 *> South Cass Avenue, Argonne, Illinois 60439, US. *> *> -- Written on 8-February-1989. *> Jack Dongarra, Argonne National Laboratory. *> Iain Duff, AERE Harwell. *> Jeremy Du Croz, Numerical Algorithms Group Ltd. *> Sven Hammarling, Numerical Algorithms Group Ltd. *> *> 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers *> can be run multiple times without deleting generated *> output files (susan) *> \endverbatim * * Authors: * ======== * *> \author Univ. of Tennessee *> \author Univ. of California Berkeley *> \author Univ. of Colorado Denver *> \author NAG Ltd. * *> \date April 2012 * *> \ingroup single_blas_testing * * ===================================================================== PROGRAM SBLAT3 * * -- Reference BLAS test routine (version 3.4.1) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * April 2012 * * ===================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 5 ) INTEGER NSUBS PARAMETER ( NSUBS = 6 ) REAL ZERO, ONE PARAMETER ( ZERO = 0.0, ONE = 1.0 ) INTEGER NMAX PARAMETER ( NMAX = 65 ) INTEGER NIDMAX, NALMAX, NBEMAX PARAMETER ( NIDMAX = 9, NALMAX = 7, NBEMAX = 7 ) * .. Local Scalars .. REAL EPS, ERR, THRESH INTEGER I, ISNUM, J, N, NALF, NBET, NIDIM, NOUT, NTRA LOGICAL FATAL, LTESTT, REWI, SAME, SFATAL, TRACE, $ TSTERR CHARACTER*1 TRANSA, TRANSB CHARACTER*6 SNAMET CHARACTER*32 SNAPS, SUMMRY * .. Local Arrays .. REAL AA( NMAX*NMAX ), AB( NMAX, 2*NMAX ), $ ALF( NALMAX ), AS( NMAX*NMAX ), $ BB( NMAX*NMAX ), BET( NBEMAX ), $ BS( NMAX*NMAX ), C( NMAX, NMAX ), $ CC( NMAX*NMAX ), CS( NMAX*NMAX ), CT( NMAX ), $ G( NMAX ), W( 2*NMAX ) INTEGER IDIM( NIDMAX ) LOGICAL LTEST( NSUBS ) CHARACTER*6 SNAMES( NSUBS ) * .. External Functions .. REAL SDIFF LOGICAL LSE EXTERNAL SDIFF, LSE * .. External Subroutines .. EXTERNAL SCHK1, SCHK2, SCHK3, SCHK4, SCHK5, SCHKE, SMMCH * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK CHARACTER*6 SRNAMT * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR COMMON /SRNAMC/SRNAMT * .. Data statements .. DATA SNAMES/'SGEMM ', 'SSYMM ', 'STRMM ', 'STRSM ', $ 'SSYRK ', 'SSYR2K'/ * .. Executable Statements .. * * Read name and unit number for summary output file and open file. * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT OPEN( NOUT, FILE = SUMMRY ) NOUTC = NOUT * * Read name and unit number for snapshot output file and open file. * READ( NIN, FMT = * )SNAPS READ( NIN, FMT = * )NTRA TRACE = NTRA.GE.0 IF( TRACE )THEN OPEN( NTRA, FILE = SNAPS ) END IF * Read the flag that directs rewinding of the snapshot file. READ( NIN, FMT = * )REWI REWI = REWI.AND.TRACE * Read the flag that directs stopping on any failure. READ( NIN, FMT = * )SFATAL * Read the flag that indicates whether error exits are to be tested. READ( NIN, FMT = * )TSTERR * Read the threshold value of the test ratio READ( NIN, FMT = * )THRESH * * Read and check the parameter values for the tests. * * Values of N READ( NIN, FMT = * )NIDIM IF( NIDIM.LT.1.OR.NIDIM.GT.NIDMAX )THEN WRITE( NOUT, FMT = 9997 )'N', NIDMAX GO TO 220 END IF READ( NIN, FMT = * )( IDIM( I ), I = 1, NIDIM ) DO 10 I = 1, NIDIM IF( IDIM( I ).LT.0.OR.IDIM( I ).GT.NMAX )THEN WRITE( NOUT, FMT = 9996 )NMAX GO TO 220 END IF 10 CONTINUE * Values of ALPHA READ( NIN, FMT = * )NALF IF( NALF.LT.1.OR.NALF.GT.NALMAX )THEN WRITE( NOUT, FMT = 9997 )'ALPHA', NALMAX GO TO 220 END IF READ( NIN, FMT = * )( ALF( I ), I = 1, NALF ) * Values of BETA READ( NIN, FMT = * )NBET IF( NBET.LT.1.OR.NBET.GT.NBEMAX )THEN WRITE( NOUT, FMT = 9997 )'BETA', NBEMAX GO TO 220 END IF READ( NIN, FMT = * )( BET( I ), I = 1, NBET ) * * Report values of parameters. * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 )( IDIM( I ), I = 1, NIDIM ) WRITE( NOUT, FMT = 9993 )( ALF( I ), I = 1, NALF ) WRITE( NOUT, FMT = 9992 )( BET( I ), I = 1, NBET ) IF( .NOT.TSTERR )THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9984 ) END IF WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 )THRESH WRITE( NOUT, FMT = * ) * * Read names of subroutines and flags which indicate * whether they are to be tested. * DO 20 I = 1, NSUBS LTEST( I ) = .FALSE. 20 CONTINUE 30 READ( NIN, FMT = 9988, END = 60 )SNAMET, LTESTT DO 40 I = 1, NSUBS IF( SNAMET.EQ.SNAMES( I ) ) $ GO TO 50 40 CONTINUE WRITE( NOUT, FMT = 9990 )SNAMET STOP 50 LTEST( I ) = LTESTT GO TO 30 * 60 CONTINUE CLOSE ( NIN ) * * Compute EPS (the machine precision). * EPS = EPSILON(ZERO) WRITE( NOUT, FMT = 9998 )EPS * * Check the reliability of SMMCH using exact data. * N = MIN( 32, NMAX ) DO 100 J = 1, N DO 90 I = 1, N AB( I, J ) = MAX( I - J + 1, 0 ) 90 CONTINUE AB( J, NMAX + 1 ) = J AB( 1, NMAX + J ) = J C( J, 1 ) = ZERO 100 CONTINUE DO 110 J = 1, N CC( J ) = J*( ( J + 1 )*J )/2 - ( ( J + 1 )*J*( J - 1 ) )/3 110 CONTINUE * CC holds the exact result. On exit from SMMCH CT holds * the result computed by SMMCH. TRANSA = 'N' TRANSB = 'N' CALL SMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, $ AB( 1, NMAX + 1 ), NMAX, ZERO, C, NMAX, CT, G, CC, $ NMAX, EPS, ERR, FATAL, NOUT, .TRUE. ) SAME = LSE( CC, CT, N ) IF( .NOT.SAME.OR.ERR.NE.ZERO )THEN WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR STOP END IF TRANSB = 'T' CALL SMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, $ AB( 1, NMAX + 1 ), NMAX, ZERO, C, NMAX, CT, G, CC, $ NMAX, EPS, ERR, FATAL, NOUT, .TRUE. ) SAME = LSE( CC, CT, N ) IF( .NOT.SAME.OR.ERR.NE.ZERO )THEN WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR STOP END IF DO 120 J = 1, N AB( J, NMAX + 1 ) = N - J + 1 AB( 1, NMAX + J ) = N - J + 1 120 CONTINUE DO 130 J = 1, N CC( N - J + 1 ) = J*( ( J + 1 )*J )/2 - $ ( ( J + 1 )*J*( J - 1 ) )/3 130 CONTINUE TRANSA = 'T' TRANSB = 'N' CALL SMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, $ AB( 1, NMAX + 1 ), NMAX, ZERO, C, NMAX, CT, G, CC, $ NMAX, EPS, ERR, FATAL, NOUT, .TRUE. ) SAME = LSE( CC, CT, N ) IF( .NOT.SAME.OR.ERR.NE.ZERO )THEN WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR STOP END IF TRANSB = 'T' CALL SMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, $ AB( 1, NMAX + 1 ), NMAX, ZERO, C, NMAX, CT, G, CC, $ NMAX, EPS, ERR, FATAL, NOUT, .TRUE. ) SAME = LSE( CC, CT, N ) IF( .NOT.SAME.OR.ERR.NE.ZERO )THEN WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR STOP END IF * * Test each subroutine in turn. * DO 200 ISNUM = 1, NSUBS WRITE( NOUT, FMT = * ) IF( .NOT.LTEST( ISNUM ) )THEN * Subprogram is not to be tested. WRITE( NOUT, FMT = 9987 )SNAMES( ISNUM ) ELSE SRNAMT = SNAMES( ISNUM ) * Test error exits. IF( TSTERR )THEN CALL SCHKE( ISNUM, SNAMES( ISNUM ), NOUT ) WRITE( NOUT, FMT = * ) END IF * Test computations. INFOT = 0 OK = .TRUE. FATAL = .FALSE. GO TO ( 140, 150, 160, 160, 170, 180 )ISNUM * Test SGEMM, 01. 140 CALL SCHK1( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, $ NMAX, AB, AA, AS, AB( 1, NMAX + 1 ), BB, BS, C, $ CC, CS, CT, G ) GO TO 190 * Test SSYMM, 02. 150 CALL SCHK2( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, $ NMAX, AB, AA, AS, AB( 1, NMAX + 1 ), BB, BS, C, $ CC, CS, CT, G ) GO TO 190 * Test STRMM, 03, STRSM, 04. 160 CALL SCHK3( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NMAX, AB, $ AA, AS, AB( 1, NMAX + 1 ), BB, BS, CT, G, C ) GO TO 190 * Test SSYRK, 05. 170 CALL SCHK4( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, $ NMAX, AB, AA, AS, AB( 1, NMAX + 1 ), BB, BS, C, $ CC, CS, CT, G ) GO TO 190 * Test SSYR2K, 06. 180 CALL SCHK5( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, $ NMAX, AB, AA, AS, BB, BS, C, CC, CS, CT, G, W ) GO TO 190 * 190 IF( FATAL.AND.SFATAL ) $ GO TO 210 END IF 200 CONTINUE WRITE( NOUT, FMT = 9986 ) GO TO 230 * 210 CONTINUE WRITE( NOUT, FMT = 9985 ) GO TO 230 * 220 CONTINUE WRITE( NOUT, FMT = 9991 ) * 230 CONTINUE IF( TRACE ) $ CLOSE ( NTRA ) CLOSE ( NOUT ) STOP * 9999 FORMAT( ' ROUTINES PASS COMPUTATIONAL TESTS IF TEST RATIO IS LES', $ 'S THAN', F8.2 ) 9998 FORMAT( ' RELATIVE MACHINE PRECISION IS TAKEN TO BE', 1P, E9.1 ) 9997 FORMAT( ' NUMBER OF VALUES OF ', A, ' IS LESS THAN 1 OR GREATER ', $ 'THAN ', I2 ) 9996 FORMAT( ' VALUE OF N IS LESS THAN 0 OR GREATER THAN ', I2 ) 9995 FORMAT( ' TESTS OF THE REAL LEVEL 3 BLAS', //' THE F', $ 'OLLOWING PARAMETER VALUES WILL BE USED:' ) 9994 FORMAT( ' FOR N ', 9I6 ) 9993 FORMAT( ' FOR ALPHA ', 7F6.1 ) 9992 FORMAT( ' FOR BETA ', 7F6.1 ) 9991 FORMAT( ' AMEND DATA FILE OR INCREASE ARRAY SIZES IN PROGRAM', $ /' ******* TESTS ABANDONED *******' ) 9990 FORMAT( ' SUBPROGRAM NAME ', A6, ' NOT RECOGNIZED', /' ******* T', $ 'ESTS ABANDONED *******' ) 9989 FORMAT( ' ERROR IN SMMCH - IN-LINE DOT PRODUCTS ARE BEING EVALU', $ 'ATED WRONGLY.', /' SMMCH WAS CALLED WITH TRANSA = ', A1, $ ' AND TRANSB = ', A1, /' AND RETURNED SAME = ', L1, ' AND ', $ 'ERR = ', F12.3, '.', /' THIS MAY BE DUE TO FAULTS IN THE ', $ 'ARITHMETIC OR THE COMPILER.', /' ******* TESTS ABANDONED ', $ '*******' ) 9988 FORMAT( A6, L2 ) 9987 FORMAT( 1X, A6, ' WAS NOT TESTED' ) 9986 FORMAT( /' END OF TESTS' ) 9985 FORMAT( /' ******* FATAL ERROR - TESTS ABANDONED *******' ) 9984 FORMAT( ' ERROR-EXITS WILL NOT BE TESTED' ) * * End of SBLAT3. * END SUBROUTINE SCHK1( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, NMAX, $ A, AA, AS, B, BB, BS, C, CC, CS, CT, G ) * * Tests SGEMM. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. REAL ZERO PARAMETER ( ZERO = 0.0 ) * .. Scalar Arguments .. REAL EPS, THRESH INTEGER NALF, NBET, NIDIM, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. REAL A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), B( NMAX, NMAX ), $ BB( NMAX*NMAX ), BET( NBET ), BS( NMAX*NMAX ), $ C( NMAX, NMAX ), CC( NMAX*NMAX ), $ CS( NMAX*NMAX ), CT( NMAX ), G( NMAX ) INTEGER IDIM( NIDIM ) * .. Local Scalars .. REAL ALPHA, ALS, BETA, BLS, ERR, ERRMAX INTEGER I, IA, IB, ICA, ICB, IK, IM, IN, K, KS, LAA, $ LBB, LCC, LDA, LDAS, LDB, LDBS, LDC, LDCS, M, $ MA, MB, MS, N, NA, NARGS, NB, NC, NS LOGICAL NULL, RESET, SAME, TRANA, TRANB CHARACTER*1 TRANAS, TRANBS, TRANSA, TRANSB CHARACTER*3 ICH * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LSE, LSERES EXTERNAL LSE, LSERES * .. External Subroutines .. EXTERNAL SGEMM, SMAKE, SMMCH * .. Intrinsic Functions .. INTRINSIC MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICH/'NTC'/ * .. Executable Statements .. * NARGS = 13 NC = 0 RESET = .TRUE. ERRMAX = ZERO * DO 110 IM = 1, NIDIM M = IDIM( IM ) * DO 100 IN = 1, NIDIM N = IDIM( IN ) * Set LDC to 1 more than minimum value if room. LDC = M IF( LDC.LT.NMAX ) $ LDC = LDC + 1 * Skip tests if not enough room. IF( LDC.GT.NMAX ) $ GO TO 100 LCC = LDC*N NULL = N.LE.0.OR.M.LE.0 * DO 90 IK = 1, NIDIM K = IDIM( IK ) * DO 80 ICA = 1, 3 TRANSA = ICH( ICA: ICA ) TRANA = TRANSA.EQ.'T'.OR.TRANSA.EQ.'C' * IF( TRANA )THEN MA = K NA = M ELSE MA = M NA = K END IF * Set LDA to 1 more than minimum value if room. LDA = MA IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 80 LAA = LDA*NA * * Generate the matrix A. * CALL SMAKE( 'GE', ' ', ' ', MA, NA, A, NMAX, AA, LDA, $ RESET, ZERO ) * DO 70 ICB = 1, 3 TRANSB = ICH( ICB: ICB ) TRANB = TRANSB.EQ.'T'.OR.TRANSB.EQ.'C' * IF( TRANB )THEN MB = N NB = K ELSE MB = K NB = N END IF * Set LDB to 1 more than minimum value if room. LDB = MB IF( LDB.LT.NMAX ) $ LDB = LDB + 1 * Skip tests if not enough room. IF( LDB.GT.NMAX ) $ GO TO 70 LBB = LDB*NB * * Generate the matrix B. * CALL SMAKE( 'GE', ' ', ' ', MB, NB, B, NMAX, BB, $ LDB, RESET, ZERO ) * DO 60 IA = 1, NALF ALPHA = ALF( IA ) * DO 50 IB = 1, NBET BETA = BET( IB ) * * Generate the matrix C. * CALL SMAKE( 'GE', ' ', ' ', M, N, C, NMAX, $ CC, LDC, RESET, ZERO ) * NC = NC + 1 * * Save every datum before calling the * subroutine. * TRANAS = TRANSA TRANBS = TRANSB MS = M NS = N KS = K ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LBB BS( I ) = BB( I ) 20 CONTINUE LDBS = LDB BLS = BETA DO 30 I = 1, LCC CS( I ) = CC( I ) 30 CONTINUE LDCS = LDC * * Call the subroutine. * IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ TRANSA, TRANSB, M, N, K, ALPHA, LDA, LDB, $ BETA, LDC IF( REWI ) $ REWIND NTRA CALL SGEMM( TRANSA, TRANSB, M, N, K, ALPHA, $ AA, LDA, BB, LDB, BETA, CC, LDC ) * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9994 ) FATAL = .TRUE. GO TO 120 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = TRANSA.EQ.TRANAS ISAME( 2 ) = TRANSB.EQ.TRANBS ISAME( 3 ) = MS.EQ.M ISAME( 4 ) = NS.EQ.N ISAME( 5 ) = KS.EQ.K ISAME( 6 ) = ALS.EQ.ALPHA ISAME( 7 ) = LSE( AS, AA, LAA ) ISAME( 8 ) = LDAS.EQ.LDA ISAME( 9 ) = LSE( BS, BB, LBB ) ISAME( 10 ) = LDBS.EQ.LDB ISAME( 11 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 12 ) = LSE( CS, CC, LCC ) ELSE ISAME( 12 ) = LSERES( 'GE', ' ', M, N, CS, $ CC, LDC ) END IF ISAME( 13 ) = LDCS.EQ.LDC * * If data was incorrectly changed, report * and return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 120 END IF * IF( .NOT.NULL )THEN * * Check the result. * CALL SMMCH( TRANSA, TRANSB, M, N, K, $ ALPHA, A, NMAX, B, NMAX, BETA, $ C, NMAX, CT, G, CC, LDC, EPS, $ ERR, FATAL, NOUT, .TRUE. ) ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 120 END IF * 50 CONTINUE * 60 CONTINUE * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 130 * 120 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME WRITE( NOUT, FMT = 9995 )NC, SNAME, TRANSA, TRANSB, M, N, K, $ ALPHA, LDA, LDB, BETA, LDC * 130 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',''', A1, ''',', $ 3( I3, ',' ), F4.1, ', A,', I3, ', B,', I3, ',', F4.1, ', ', $ 'C,', I3, ').' ) 9994 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of SCHK1. * END SUBROUTINE SCHK2( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, NMAX, $ A, AA, AS, B, BB, BS, C, CC, CS, CT, G ) * * Tests SSYMM. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. REAL ZERO PARAMETER ( ZERO = 0.0 ) * .. Scalar Arguments .. REAL EPS, THRESH INTEGER NALF, NBET, NIDIM, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. REAL A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), B( NMAX, NMAX ), $ BB( NMAX*NMAX ), BET( NBET ), BS( NMAX*NMAX ), $ C( NMAX, NMAX ), CC( NMAX*NMAX ), $ CS( NMAX*NMAX ), CT( NMAX ), G( NMAX ) INTEGER IDIM( NIDIM ) * .. Local Scalars .. REAL ALPHA, ALS, BETA, BLS, ERR, ERRMAX INTEGER I, IA, IB, ICS, ICU, IM, IN, LAA, LBB, LCC, $ LDA, LDAS, LDB, LDBS, LDC, LDCS, M, MS, N, NA, $ NARGS, NC, NS LOGICAL LEFT, NULL, RESET, SAME CHARACTER*1 SIDE, SIDES, UPLO, UPLOS CHARACTER*2 ICHS, ICHU * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LSE, LSERES EXTERNAL LSE, LSERES * .. External Subroutines .. EXTERNAL SMAKE, SMMCH, SSYMM * .. Intrinsic Functions .. INTRINSIC MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICHS/'LR'/, ICHU/'UL'/ * .. Executable Statements .. * NARGS = 12 NC = 0 RESET = .TRUE. ERRMAX = ZERO * DO 100 IM = 1, NIDIM M = IDIM( IM ) * DO 90 IN = 1, NIDIM N = IDIM( IN ) * Set LDC to 1 more than minimum value if room. LDC = M IF( LDC.LT.NMAX ) $ LDC = LDC + 1 * Skip tests if not enough room. IF( LDC.GT.NMAX ) $ GO TO 90 LCC = LDC*N NULL = N.LE.0.OR.M.LE.0 * * Set LDB to 1 more than minimum value if room. LDB = M IF( LDB.LT.NMAX ) $ LDB = LDB + 1 * Skip tests if not enough room. IF( LDB.GT.NMAX ) $ GO TO 90 LBB = LDB*N * * Generate the matrix B. * CALL SMAKE( 'GE', ' ', ' ', M, N, B, NMAX, BB, LDB, RESET, $ ZERO ) * DO 80 ICS = 1, 2 SIDE = ICHS( ICS: ICS ) LEFT = SIDE.EQ.'L' * IF( LEFT )THEN NA = M ELSE NA = N END IF * Set LDA to 1 more than minimum value if room. LDA = NA IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 80 LAA = LDA*NA * DO 70 ICU = 1, 2 UPLO = ICHU( ICU: ICU ) * * Generate the symmetric matrix A. * CALL SMAKE( 'SY', UPLO, ' ', NA, NA, A, NMAX, AA, LDA, $ RESET, ZERO ) * DO 60 IA = 1, NALF ALPHA = ALF( IA ) * DO 50 IB = 1, NBET BETA = BET( IB ) * * Generate the matrix C. * CALL SMAKE( 'GE', ' ', ' ', M, N, C, NMAX, CC, $ LDC, RESET, ZERO ) * NC = NC + 1 * * Save every datum before calling the * subroutine. * SIDES = SIDE UPLOS = UPLO MS = M NS = N ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LBB BS( I ) = BB( I ) 20 CONTINUE LDBS = LDB BLS = BETA DO 30 I = 1, LCC CS( I ) = CC( I ) 30 CONTINUE LDCS = LDC * * Call the subroutine. * IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, SIDE, $ UPLO, M, N, ALPHA, LDA, LDB, BETA, LDC IF( REWI ) $ REWIND NTRA CALL SSYMM( SIDE, UPLO, M, N, ALPHA, AA, LDA, $ BB, LDB, BETA, CC, LDC ) * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9994 ) FATAL = .TRUE. GO TO 110 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = SIDES.EQ.SIDE ISAME( 2 ) = UPLOS.EQ.UPLO ISAME( 3 ) = MS.EQ.M ISAME( 4 ) = NS.EQ.N ISAME( 5 ) = ALS.EQ.ALPHA ISAME( 6 ) = LSE( AS, AA, LAA ) ISAME( 7 ) = LDAS.EQ.LDA ISAME( 8 ) = LSE( BS, BB, LBB ) ISAME( 9 ) = LDBS.EQ.LDB ISAME( 10 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 11 ) = LSE( CS, CC, LCC ) ELSE ISAME( 11 ) = LSERES( 'GE', ' ', M, N, CS, $ CC, LDC ) END IF ISAME( 12 ) = LDCS.EQ.LDC * * If data was incorrectly changed, report and * return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 110 END IF * IF( .NOT.NULL )THEN * * Check the result. * IF( LEFT )THEN CALL SMMCH( 'N', 'N', M, N, M, ALPHA, A, $ NMAX, B, NMAX, BETA, C, NMAX, $ CT, G, CC, LDC, EPS, ERR, $ FATAL, NOUT, .TRUE. ) ELSE CALL SMMCH( 'N', 'N', M, N, N, ALPHA, B, $ NMAX, A, NMAX, BETA, C, NMAX, $ CT, G, CC, LDC, EPS, ERR, $ FATAL, NOUT, .TRUE. ) END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 110 END IF * 50 CONTINUE * 60 CONTINUE * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 120 * 110 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME WRITE( NOUT, FMT = 9995 )NC, SNAME, SIDE, UPLO, M, N, ALPHA, LDA, $ LDB, BETA, LDC * 120 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ', A6, '(', 2( '''', A1, ''',' ), 2( I3, ',' ), $ F4.1, ', A,', I3, ', B,', I3, ',', F4.1, ', C,', I3, ') ', $ ' .' ) 9994 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of SCHK2. * END SUBROUTINE SCHK3( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NMAX, A, AA, AS, $ B, BB, BS, CT, G, C ) * * Tests STRMM and STRSM. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. REAL ZERO, ONE PARAMETER ( ZERO = 0.0, ONE = 1.0 ) * .. Scalar Arguments .. REAL EPS, THRESH INTEGER NALF, NIDIM, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. REAL A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), B( NMAX, NMAX ), $ BB( NMAX*NMAX ), BS( NMAX*NMAX ), $ C( NMAX, NMAX ), CT( NMAX ), G( NMAX ) INTEGER IDIM( NIDIM ) * .. Local Scalars .. REAL ALPHA, ALS, ERR, ERRMAX INTEGER I, IA, ICD, ICS, ICT, ICU, IM, IN, J, LAA, LBB, $ LDA, LDAS, LDB, LDBS, M, MS, N, NA, NARGS, NC, $ NS LOGICAL LEFT, NULL, RESET, SAME CHARACTER*1 DIAG, DIAGS, SIDE, SIDES, TRANAS, TRANSA, UPLO, $ UPLOS CHARACTER*2 ICHD, ICHS, ICHU CHARACTER*3 ICHT * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LSE, LSERES EXTERNAL LSE, LSERES * .. External Subroutines .. EXTERNAL SMAKE, SMMCH, STRMM, STRSM * .. Intrinsic Functions .. INTRINSIC MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICHU/'UL'/, ICHT/'NTC'/, ICHD/'UN'/, ICHS/'LR'/ * .. Executable Statements .. * NARGS = 11 NC = 0 RESET = .TRUE. ERRMAX = ZERO * Set up zero matrix for SMMCH. DO 20 J = 1, NMAX DO 10 I = 1, NMAX C( I, J ) = ZERO 10 CONTINUE 20 CONTINUE * DO 140 IM = 1, NIDIM M = IDIM( IM ) * DO 130 IN = 1, NIDIM N = IDIM( IN ) * Set LDB to 1 more than minimum value if room. LDB = M IF( LDB.LT.NMAX ) $ LDB = LDB + 1 * Skip tests if not enough room. IF( LDB.GT.NMAX ) $ GO TO 130 LBB = LDB*N NULL = M.LE.0.OR.N.LE.0 * DO 120 ICS = 1, 2 SIDE = ICHS( ICS: ICS ) LEFT = SIDE.EQ.'L' IF( LEFT )THEN NA = M ELSE NA = N END IF * Set LDA to 1 more than minimum value if room. LDA = NA IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 130 LAA = LDA*NA * DO 110 ICU = 1, 2 UPLO = ICHU( ICU: ICU ) * DO 100 ICT = 1, 3 TRANSA = ICHT( ICT: ICT ) * DO 90 ICD = 1, 2 DIAG = ICHD( ICD: ICD ) * DO 80 IA = 1, NALF ALPHA = ALF( IA ) * * Generate the matrix A. * CALL SMAKE( 'TR', UPLO, DIAG, NA, NA, A, $ NMAX, AA, LDA, RESET, ZERO ) * * Generate the matrix B. * CALL SMAKE( 'GE', ' ', ' ', M, N, B, NMAX, $ BB, LDB, RESET, ZERO ) * NC = NC + 1 * * Save every datum before calling the * subroutine. * SIDES = SIDE UPLOS = UPLO TRANAS = TRANSA DIAGS = DIAG MS = M NS = N ALS = ALPHA DO 30 I = 1, LAA AS( I ) = AA( I ) 30 CONTINUE LDAS = LDA DO 40 I = 1, LBB BS( I ) = BB( I ) 40 CONTINUE LDBS = LDB * * Call the subroutine. * IF( SNAME( 4: 5 ).EQ.'MM' )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, $ LDA, LDB IF( REWI ) $ REWIND NTRA CALL STRMM( SIDE, UPLO, TRANSA, DIAG, M, $ N, ALPHA, AA, LDA, BB, LDB ) ELSE IF( SNAME( 4: 5 ).EQ.'SM' )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, $ LDA, LDB IF( REWI ) $ REWIND NTRA CALL STRSM( SIDE, UPLO, TRANSA, DIAG, M, $ N, ALPHA, AA, LDA, BB, LDB ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9994 ) FATAL = .TRUE. GO TO 150 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = SIDES.EQ.SIDE ISAME( 2 ) = UPLOS.EQ.UPLO ISAME( 3 ) = TRANAS.EQ.TRANSA ISAME( 4 ) = DIAGS.EQ.DIAG ISAME( 5 ) = MS.EQ.M ISAME( 6 ) = NS.EQ.N ISAME( 7 ) = ALS.EQ.ALPHA ISAME( 8 ) = LSE( AS, AA, LAA ) ISAME( 9 ) = LDAS.EQ.LDA IF( NULL )THEN ISAME( 10 ) = LSE( BS, BB, LBB ) ELSE ISAME( 10 ) = LSERES( 'GE', ' ', M, N, BS, $ BB, LDB ) END IF ISAME( 11 ) = LDBS.EQ.LDB * * If data was incorrectly changed, report and * return. * SAME = .TRUE. DO 50 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 50 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 150 END IF * IF( .NOT.NULL )THEN IF( SNAME( 4: 5 ).EQ.'MM' )THEN * * Check the result. * IF( LEFT )THEN CALL SMMCH( TRANSA, 'N', M, N, M, $ ALPHA, A, NMAX, B, NMAX, $ ZERO, C, NMAX, CT, G, $ BB, LDB, EPS, ERR, $ FATAL, NOUT, .TRUE. ) ELSE CALL SMMCH( 'N', TRANSA, M, N, N, $ ALPHA, B, NMAX, A, NMAX, $ ZERO, C, NMAX, CT, G, $ BB, LDB, EPS, ERR, $ FATAL, NOUT, .TRUE. ) END IF ELSE IF( SNAME( 4: 5 ).EQ.'SM' )THEN * * Compute approximation to original * matrix. * DO 70 J = 1, N DO 60 I = 1, M C( I, J ) = BB( I + ( J - 1 )* $ LDB ) BB( I + ( J - 1 )*LDB ) = ALPHA* $ B( I, J ) 60 CONTINUE 70 CONTINUE * IF( LEFT )THEN CALL SMMCH( TRANSA, 'N', M, N, M, $ ONE, A, NMAX, C, NMAX, $ ZERO, B, NMAX, CT, G, $ BB, LDB, EPS, ERR, $ FATAL, NOUT, .FALSE. ) ELSE CALL SMMCH( 'N', TRANSA, M, N, N, $ ONE, C, NMAX, A, NMAX, $ ZERO, B, NMAX, CT, G, $ BB, LDB, EPS, ERR, $ FATAL, NOUT, .FALSE. ) END IF END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 150 END IF * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * 120 CONTINUE * 130 CONTINUE * 140 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 160 * 150 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME WRITE( NOUT, FMT = 9995 )NC, SNAME, SIDE, UPLO, TRANSA, DIAG, M, $ N, ALPHA, LDA, LDB * 160 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ', A6, '(', 4( '''', A1, ''',' ), 2( I3, ',' ), $ F4.1, ', A,', I3, ', B,', I3, ') .' ) 9994 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of SCHK3. * END SUBROUTINE SCHK4( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, NMAX, $ A, AA, AS, B, BB, BS, C, CC, CS, CT, G ) * * Tests SSYRK. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. REAL ZERO PARAMETER ( ZERO = 0.0 ) * .. Scalar Arguments .. REAL EPS, THRESH INTEGER NALF, NBET, NIDIM, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. REAL A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), B( NMAX, NMAX ), $ BB( NMAX*NMAX ), BET( NBET ), BS( NMAX*NMAX ), $ C( NMAX, NMAX ), CC( NMAX*NMAX ), $ CS( NMAX*NMAX ), CT( NMAX ), G( NMAX ) INTEGER IDIM( NIDIM ) * .. Local Scalars .. REAL ALPHA, ALS, BETA, BETS, ERR, ERRMAX INTEGER I, IA, IB, ICT, ICU, IK, IN, J, JC, JJ, K, KS, $ LAA, LCC, LDA, LDAS, LDC, LDCS, LJ, MA, N, NA, $ NARGS, NC, NS LOGICAL NULL, RESET, SAME, TRAN, UPPER CHARACTER*1 TRANS, TRANSS, UPLO, UPLOS CHARACTER*2 ICHU CHARACTER*3 ICHT * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LSE, LSERES EXTERNAL LSE, LSERES * .. External Subroutines .. EXTERNAL SMAKE, SMMCH, SSYRK * .. Intrinsic Functions .. INTRINSIC MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICHT/'NTC'/, ICHU/'UL'/ * .. Executable Statements .. * NARGS = 10 NC = 0 RESET = .TRUE. ERRMAX = ZERO * DO 100 IN = 1, NIDIM N = IDIM( IN ) * Set LDC to 1 more than minimum value if room. LDC = N IF( LDC.LT.NMAX ) $ LDC = LDC + 1 * Skip tests if not enough room. IF( LDC.GT.NMAX ) $ GO TO 100 LCC = LDC*N NULL = N.LE.0 * DO 90 IK = 1, NIDIM K = IDIM( IK ) * DO 80 ICT = 1, 3 TRANS = ICHT( ICT: ICT ) TRAN = TRANS.EQ.'T'.OR.TRANS.EQ.'C' IF( TRAN )THEN MA = K NA = N ELSE MA = N NA = K END IF * Set LDA to 1 more than minimum value if room. LDA = MA IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 80 LAA = LDA*NA * * Generate the matrix A. * CALL SMAKE( 'GE', ' ', ' ', MA, NA, A, NMAX, AA, LDA, $ RESET, ZERO ) * DO 70 ICU = 1, 2 UPLO = ICHU( ICU: ICU ) UPPER = UPLO.EQ.'U' * DO 60 IA = 1, NALF ALPHA = ALF( IA ) * DO 50 IB = 1, NBET BETA = BET( IB ) * * Generate the matrix C. * CALL SMAKE( 'SY', UPLO, ' ', N, N, C, NMAX, CC, $ LDC, RESET, ZERO ) * NC = NC + 1 * * Save every datum before calling the subroutine. * UPLOS = UPLO TRANSS = TRANS NS = N KS = K ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA BETS = BETA DO 20 I = 1, LCC CS( I ) = CC( I ) 20 CONTINUE LDCS = LDC * * Call the subroutine. * IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, UPLO, $ TRANS, N, K, ALPHA, LDA, BETA, LDC IF( REWI ) $ REWIND NTRA CALL SSYRK( UPLO, TRANS, N, K, ALPHA, AA, LDA, $ BETA, CC, LDC ) * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9993 ) FATAL = .TRUE. GO TO 120 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = UPLOS.EQ.UPLO ISAME( 2 ) = TRANSS.EQ.TRANS ISAME( 3 ) = NS.EQ.N ISAME( 4 ) = KS.EQ.K ISAME( 5 ) = ALS.EQ.ALPHA ISAME( 6 ) = LSE( AS, AA, LAA ) ISAME( 7 ) = LDAS.EQ.LDA ISAME( 8 ) = BETS.EQ.BETA IF( NULL )THEN ISAME( 9 ) = LSE( CS, CC, LCC ) ELSE ISAME( 9 ) = LSERES( 'SY', UPLO, N, N, CS, $ CC, LDC ) END IF ISAME( 10 ) = LDCS.EQ.LDC * * If data was incorrectly changed, report and * return. * SAME = .TRUE. DO 30 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 30 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 120 END IF * IF( .NOT.NULL )THEN * * Check the result column by column. * JC = 1 DO 40 J = 1, N IF( UPPER )THEN JJ = 1 LJ = J ELSE JJ = J LJ = N - J + 1 END IF IF( TRAN )THEN CALL SMMCH( 'T', 'N', LJ, 1, K, ALPHA, $ A( 1, JJ ), NMAX, $ A( 1, J ), NMAX, BETA, $ C( JJ, J ), NMAX, CT, G, $ CC( JC ), LDC, EPS, ERR, $ FATAL, NOUT, .TRUE. ) ELSE CALL SMMCH( 'N', 'T', LJ, 1, K, ALPHA, $ A( JJ, 1 ), NMAX, $ A( J, 1 ), NMAX, BETA, $ C( JJ, J ), NMAX, CT, G, $ CC( JC ), LDC, EPS, ERR, $ FATAL, NOUT, .TRUE. ) END IF IF( UPPER )THEN JC = JC + LDC ELSE JC = JC + LDC + 1 END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 110 40 CONTINUE END IF * 50 CONTINUE * 60 CONTINUE * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 130 * 110 CONTINUE IF( N.GT.1 ) $ WRITE( NOUT, FMT = 9995 )J * 120 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME WRITE( NOUT, FMT = 9994 )NC, SNAME, UPLO, TRANS, N, K, ALPHA, $ LDA, BETA, LDC * 130 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) 9994 FORMAT( 1X, I6, ': ', A6, '(', 2( '''', A1, ''',' ), 2( I3, ',' ), $ F4.1, ', A,', I3, ',', F4.1, ', C,', I3, ') .' ) 9993 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of SCHK4. * END SUBROUTINE SCHK5( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, NMAX, $ AB, AA, AS, BB, BS, C, CC, CS, CT, G, W ) * * Tests SSYR2K. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. REAL ZERO PARAMETER ( ZERO = 0.0 ) * .. Scalar Arguments .. REAL EPS, THRESH INTEGER NALF, NBET, NIDIM, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. REAL AA( NMAX*NMAX ), AB( 2*NMAX*NMAX ), $ ALF( NALF ), AS( NMAX*NMAX ), BB( NMAX*NMAX ), $ BET( NBET ), BS( NMAX*NMAX ), C( NMAX, NMAX ), $ CC( NMAX*NMAX ), CS( NMAX*NMAX ), CT( NMAX ), $ G( NMAX ), W( 2*NMAX ) INTEGER IDIM( NIDIM ) * .. Local Scalars .. REAL ALPHA, ALS, BETA, BETS, ERR, ERRMAX INTEGER I, IA, IB, ICT, ICU, IK, IN, J, JC, JJ, JJAB, $ K, KS, LAA, LBB, LCC, LDA, LDAS, LDB, LDBS, $ LDC, LDCS, LJ, MA, N, NA, NARGS, NC, NS LOGICAL NULL, RESET, SAME, TRAN, UPPER CHARACTER*1 TRANS, TRANSS, UPLO, UPLOS CHARACTER*2 ICHU CHARACTER*3 ICHT * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LSE, LSERES EXTERNAL LSE, LSERES * .. External Subroutines .. EXTERNAL SMAKE, SMMCH, SSYR2K * .. Intrinsic Functions .. INTRINSIC MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICHT/'NTC'/, ICHU/'UL'/ * .. Executable Statements .. * NARGS = 12 NC = 0 RESET = .TRUE. ERRMAX = ZERO * DO 130 IN = 1, NIDIM N = IDIM( IN ) * Set LDC to 1 more than minimum value if room. LDC = N IF( LDC.LT.NMAX ) $ LDC = LDC + 1 * Skip tests if not enough room. IF( LDC.GT.NMAX ) $ GO TO 130 LCC = LDC*N NULL = N.LE.0 * DO 120 IK = 1, NIDIM K = IDIM( IK ) * DO 110 ICT = 1, 3 TRANS = ICHT( ICT: ICT ) TRAN = TRANS.EQ.'T'.OR.TRANS.EQ.'C' IF( TRAN )THEN MA = K NA = N ELSE MA = N NA = K END IF * Set LDA to 1 more than minimum value if room. LDA = MA IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 110 LAA = LDA*NA * * Generate the matrix A. * IF( TRAN )THEN CALL SMAKE( 'GE', ' ', ' ', MA, NA, AB, 2*NMAX, AA, $ LDA, RESET, ZERO ) ELSE CALL SMAKE( 'GE', ' ', ' ', MA, NA, AB, NMAX, AA, LDA, $ RESET, ZERO ) END IF * * Generate the matrix B. * LDB = LDA LBB = LAA IF( TRAN )THEN CALL SMAKE( 'GE', ' ', ' ', MA, NA, AB( K + 1 ), $ 2*NMAX, BB, LDB, RESET, ZERO ) ELSE CALL SMAKE( 'GE', ' ', ' ', MA, NA, AB( K*NMAX + 1 ), $ NMAX, BB, LDB, RESET, ZERO ) END IF * DO 100 ICU = 1, 2 UPLO = ICHU( ICU: ICU ) UPPER = UPLO.EQ.'U' * DO 90 IA = 1, NALF ALPHA = ALF( IA ) * DO 80 IB = 1, NBET BETA = BET( IB ) * * Generate the matrix C. * CALL SMAKE( 'SY', UPLO, ' ', N, N, C, NMAX, CC, $ LDC, RESET, ZERO ) * NC = NC + 1 * * Save every datum before calling the subroutine. * UPLOS = UPLO TRANSS = TRANS NS = N KS = K ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LBB BS( I ) = BB( I ) 20 CONTINUE LDBS = LDB BETS = BETA DO 30 I = 1, LCC CS( I ) = CC( I ) 30 CONTINUE LDCS = LDC * * Call the subroutine. * IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, UPLO, $ TRANS, N, K, ALPHA, LDA, LDB, BETA, LDC IF( REWI ) $ REWIND NTRA CALL SSYR2K( UPLO, TRANS, N, K, ALPHA, AA, LDA, $ BB, LDB, BETA, CC, LDC ) * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9993 ) FATAL = .TRUE. GO TO 150 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = UPLOS.EQ.UPLO ISAME( 2 ) = TRANSS.EQ.TRANS ISAME( 3 ) = NS.EQ.N ISAME( 4 ) = KS.EQ.K ISAME( 5 ) = ALS.EQ.ALPHA ISAME( 6 ) = LSE( AS, AA, LAA ) ISAME( 7 ) = LDAS.EQ.LDA ISAME( 8 ) = LSE( BS, BB, LBB ) ISAME( 9 ) = LDBS.EQ.LDB ISAME( 10 ) = BETS.EQ.BETA IF( NULL )THEN ISAME( 11 ) = LSE( CS, CC, LCC ) ELSE ISAME( 11 ) = LSERES( 'SY', UPLO, N, N, CS, $ CC, LDC ) END IF ISAME( 12 ) = LDCS.EQ.LDC * * If data was incorrectly changed, report and * return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 150 END IF * IF( .NOT.NULL )THEN * * Check the result column by column. * JJAB = 1 JC = 1 DO 70 J = 1, N IF( UPPER )THEN JJ = 1 LJ = J ELSE JJ = J LJ = N - J + 1 END IF IF( TRAN )THEN DO 50 I = 1, K W( I ) = AB( ( J - 1 )*2*NMAX + K + $ I ) W( K + I ) = AB( ( J - 1 )*2*NMAX + $ I ) 50 CONTINUE CALL SMMCH( 'T', 'N', LJ, 1, 2*K, $ ALPHA, AB( JJAB ), 2*NMAX, $ W, 2*NMAX, BETA, $ C( JJ, J ), NMAX, CT, G, $ CC( JC ), LDC, EPS, ERR, $ FATAL, NOUT, .TRUE. ) ELSE DO 60 I = 1, K W( I ) = AB( ( K + I - 1 )*NMAX + $ J ) W( K + I ) = AB( ( I - 1 )*NMAX + $ J ) 60 CONTINUE CALL SMMCH( 'N', 'N', LJ, 1, 2*K, $ ALPHA, AB( JJ ), NMAX, W, $ 2*NMAX, BETA, C( JJ, J ), $ NMAX, CT, G, CC( JC ), LDC, $ EPS, ERR, FATAL, NOUT, $ .TRUE. ) END IF IF( UPPER )THEN JC = JC + LDC ELSE JC = JC + LDC + 1 IF( TRAN ) $ JJAB = JJAB + 2*NMAX END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 140 70 CONTINUE END IF * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * 120 CONTINUE * 130 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 160 * 140 CONTINUE IF( N.GT.1 ) $ WRITE( NOUT, FMT = 9995 )J * 150 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME WRITE( NOUT, FMT = 9994 )NC, SNAME, UPLO, TRANS, N, K, ALPHA, $ LDA, LDB, BETA, LDC * 160 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) 9994 FORMAT( 1X, I6, ': ', A6, '(', 2( '''', A1, ''',' ), 2( I3, ',' ), $ F4.1, ', A,', I3, ', B,', I3, ',', F4.1, ', C,', I3, ') ', $ ' .' ) 9993 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of SCHK5. * END SUBROUTINE SCHKE( ISNUM, SRNAMT, NOUT ) * * Tests the error exits from the Level 3 Blas. * Requires a special version of the error-handling routine XERBLA. * A, B and C should not need to be defined. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * 3-19-92: Initialize ALPHA and BETA (eca) * 3-19-92: Fix argument 12 in calls to SSYMM with INFOT = 9 (eca) * * .. Scalar Arguments .. INTEGER ISNUM, NOUT CHARACTER*6 SRNAMT * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Parameters .. REAL ONE, TWO PARAMETER ( ONE = 1.0E0, TWO = 2.0E0 ) * .. Local Scalars .. REAL ALPHA, BETA * .. Local Arrays .. REAL A( 2, 1 ), B( 2, 1 ), C( 2, 1 ) * .. External Subroutines .. EXTERNAL CHKXER, SGEMM, SSYMM, SSYR2K, SSYRK, STRMM, $ STRSM * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Executable Statements .. * OK is set to .FALSE. by the special version of XERBLA or by CHKXER * if anything is wrong. OK = .TRUE. * LERR is set to .TRUE. by the special version of XERBLA each time * it is called, and is then tested and re-set by CHKXER. LERR = .FALSE. * * Initialize ALPHA and BETA. * ALPHA = ONE BETA = TWO * GO TO ( 10, 20, 30, 40, 50, 60 )ISNUM 10 INFOT = 1 CALL SGEMM( '/', 'N', 0, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 1 CALL SGEMM( '/', 'T', 0, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL SGEMM( 'N', '/', 0, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL SGEMM( 'T', '/', 0, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL SGEMM( 'N', 'N', -1, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL SGEMM( 'N', 'T', -1, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL SGEMM( 'T', 'N', -1, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL SGEMM( 'T', 'T', -1, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL SGEMM( 'N', 'N', 0, -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL SGEMM( 'N', 'T', 0, -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL SGEMM( 'T', 'N', 0, -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL SGEMM( 'T', 'T', 0, -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL SGEMM( 'N', 'N', 0, 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL SGEMM( 'N', 'T', 0, 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL SGEMM( 'T', 'N', 0, 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL SGEMM( 'T', 'T', 0, 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL SGEMM( 'N', 'N', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL SGEMM( 'N', 'T', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL SGEMM( 'T', 'N', 0, 0, 2, ALPHA, A, 1, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL SGEMM( 'T', 'T', 0, 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL SGEMM( 'N', 'N', 0, 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL SGEMM( 'T', 'N', 0, 0, 2, ALPHA, A, 2, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL SGEMM( 'N', 'T', 0, 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL SGEMM( 'T', 'T', 0, 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL SGEMM( 'N', 'N', 2, 0, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL SGEMM( 'N', 'T', 2, 0, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL SGEMM( 'T', 'N', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL SGEMM( 'T', 'T', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 70 20 INFOT = 1 CALL SSYMM( '/', 'U', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL SSYMM( 'L', '/', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL SSYMM( 'L', 'U', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL SSYMM( 'R', 'U', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL SSYMM( 'L', 'L', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL SSYMM( 'R', 'L', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL SSYMM( 'L', 'U', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL SSYMM( 'R', 'U', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL SSYMM( 'L', 'L', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL SSYMM( 'R', 'L', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL SSYMM( 'L', 'U', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL SSYMM( 'R', 'U', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL SSYMM( 'L', 'L', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL SSYMM( 'R', 'L', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL SSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL SSYMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL SSYMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL SSYMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL SSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL SSYMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL SSYMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL SSYMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 70 30 INFOT = 1 CALL STRMM( '/', 'U', 'N', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL STRMM( 'L', '/', 'N', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL STRMM( 'L', 'U', '/', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL STRMM( 'L', 'U', 'N', '/', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL STRMM( 'L', 'U', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL STRMM( 'L', 'U', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL STRMM( 'R', 'U', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL STRMM( 'R', 'U', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL STRMM( 'L', 'L', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL STRMM( 'L', 'L', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL STRMM( 'R', 'L', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL STRMM( 'R', 'L', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL STRMM( 'L', 'U', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL STRMM( 'L', 'U', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL STRMM( 'R', 'U', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL STRMM( 'R', 'U', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL STRMM( 'L', 'L', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL STRMM( 'L', 'L', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL STRMM( 'R', 'L', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL STRMM( 'R', 'L', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL STRMM( 'L', 'U', 'N', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL STRMM( 'L', 'U', 'T', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL STRMM( 'R', 'U', 'N', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL STRMM( 'R', 'U', 'T', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL STRMM( 'L', 'L', 'N', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL STRMM( 'L', 'L', 'T', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL STRMM( 'R', 'L', 'N', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL STRMM( 'R', 'L', 'T', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL STRMM( 'L', 'U', 'N', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL STRMM( 'L', 'U', 'T', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL STRMM( 'R', 'U', 'N', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL STRMM( 'R', 'U', 'T', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL STRMM( 'L', 'L', 'N', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL STRMM( 'L', 'L', 'T', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL STRMM( 'R', 'L', 'N', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL STRMM( 'R', 'L', 'T', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 70 40 INFOT = 1 CALL STRSM( '/', 'U', 'N', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL STRSM( 'L', '/', 'N', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL STRSM( 'L', 'U', '/', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL STRSM( 'L', 'U', 'N', '/', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL STRSM( 'L', 'U', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL STRSM( 'L', 'U', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL STRSM( 'R', 'U', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL STRSM( 'R', 'U', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL STRSM( 'L', 'L', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL STRSM( 'L', 'L', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL STRSM( 'R', 'L', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL STRSM( 'R', 'L', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL STRSM( 'L', 'U', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL STRSM( 'L', 'U', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL STRSM( 'R', 'U', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL STRSM( 'R', 'U', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL STRSM( 'L', 'L', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL STRSM( 'L', 'L', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL STRSM( 'R', 'L', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL STRSM( 'R', 'L', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL STRSM( 'L', 'U', 'N', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL STRSM( 'L', 'U', 'T', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL STRSM( 'R', 'U', 'N', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL STRSM( 'R', 'U', 'T', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL STRSM( 'L', 'L', 'N', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL STRSM( 'L', 'L', 'T', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL STRSM( 'R', 'L', 'N', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL STRSM( 'R', 'L', 'T', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL STRSM( 'L', 'U', 'N', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL STRSM( 'L', 'U', 'T', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL STRSM( 'R', 'U', 'N', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL STRSM( 'R', 'U', 'T', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL STRSM( 'L', 'L', 'N', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL STRSM( 'L', 'L', 'T', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL STRSM( 'R', 'L', 'N', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL STRSM( 'R', 'L', 'T', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 70 50 INFOT = 1 CALL SSYRK( '/', 'N', 0, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL SSYRK( 'U', '/', 0, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL SSYRK( 'U', 'N', -1, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL SSYRK( 'U', 'T', -1, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL SSYRK( 'L', 'N', -1, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL SSYRK( 'L', 'T', -1, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL SSYRK( 'U', 'N', 0, -1, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL SSYRK( 'U', 'T', 0, -1, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL SSYRK( 'L', 'N', 0, -1, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL SSYRK( 'L', 'T', 0, -1, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL SSYRK( 'U', 'N', 2, 0, ALPHA, A, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL SSYRK( 'U', 'T', 0, 2, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL SSYRK( 'L', 'N', 2, 0, ALPHA, A, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL SSYRK( 'L', 'T', 0, 2, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL SSYRK( 'U', 'N', 2, 0, ALPHA, A, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL SSYRK( 'U', 'T', 2, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL SSYRK( 'L', 'N', 2, 0, ALPHA, A, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL SSYRK( 'L', 'T', 2, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 70 60 INFOT = 1 CALL SSYR2K( '/', 'N', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL SSYR2K( 'U', '/', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL SSYR2K( 'U', 'N', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL SSYR2K( 'U', 'T', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL SSYR2K( 'L', 'N', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL SSYR2K( 'L', 'T', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL SSYR2K( 'U', 'N', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL SSYR2K( 'U', 'T', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL SSYR2K( 'L', 'N', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL SSYR2K( 'L', 'T', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL SSYR2K( 'U', 'N', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL SSYR2K( 'U', 'T', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL SSYR2K( 'L', 'N', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL SSYR2K( 'L', 'T', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL SSYR2K( 'U', 'N', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL SSYR2K( 'U', 'T', 0, 2, ALPHA, A, 2, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL SSYR2K( 'L', 'N', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL SSYR2K( 'L', 'T', 0, 2, ALPHA, A, 2, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL SSYR2K( 'U', 'N', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL SSYR2K( 'U', 'T', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL SSYR2K( 'L', 'N', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL SSYR2K( 'L', 'T', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) * 70 IF( OK )THEN WRITE( NOUT, FMT = 9999 )SRNAMT ELSE WRITE( NOUT, FMT = 9998 )SRNAMT END IF RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE TESTS OF ERROR-EXITS' ) 9998 FORMAT( ' ******* ', A6, ' FAILED THE TESTS OF ERROR-EXITS *****', $ '**' ) * * End of SCHKE. * END SUBROUTINE SMAKE( TYPE, UPLO, DIAG, M, N, A, NMAX, AA, LDA, RESET, $ TRANSL ) * * Generates values for an M by N matrix A. * Stores the values in the array AA in the data structure required * by the routine, with unwanted elements set to rogue value. * * TYPE is 'GE', 'SY' or 'TR'. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. REAL ZERO, ONE PARAMETER ( ZERO = 0.0, ONE = 1.0 ) REAL ROGUE PARAMETER ( ROGUE = -1.0E10 ) * .. Scalar Arguments .. REAL TRANSL INTEGER LDA, M, N, NMAX LOGICAL RESET CHARACTER*1 DIAG, UPLO CHARACTER*2 TYPE * .. Array Arguments .. REAL A( NMAX, * ), AA( * ) * .. Local Scalars .. INTEGER I, IBEG, IEND, J LOGICAL GEN, LOWER, SYM, TRI, UNIT, UPPER * .. External Functions .. REAL SBEG EXTERNAL SBEG * .. Executable Statements .. GEN = TYPE.EQ.'GE' SYM = TYPE.EQ.'SY' TRI = TYPE.EQ.'TR' UPPER = ( SYM.OR.TRI ).AND.UPLO.EQ.'U' LOWER = ( SYM.OR.TRI ).AND.UPLO.EQ.'L' UNIT = TRI.AND.DIAG.EQ.'U' * * Generate data in array A. * DO 20 J = 1, N DO 10 I = 1, M IF( GEN.OR.( UPPER.AND.I.LE.J ).OR.( LOWER.AND.I.GE.J ) ) $ THEN A( I, J ) = SBEG( RESET ) + TRANSL IF( I.NE.J )THEN * Set some elements to zero IF( N.GT.3.AND.J.EQ.N/2 ) $ A( I, J ) = ZERO IF( SYM )THEN A( J, I ) = A( I, J ) ELSE IF( TRI )THEN A( J, I ) = ZERO END IF END IF END IF 10 CONTINUE IF( TRI ) $ A( J, J ) = A( J, J ) + ONE IF( UNIT ) $ A( J, J ) = ONE 20 CONTINUE * * Store elements in array AS in data structure required by routine. * IF( TYPE.EQ.'GE' )THEN DO 50 J = 1, N DO 30 I = 1, M AA( I + ( J - 1 )*LDA ) = A( I, J ) 30 CONTINUE DO 40 I = M + 1, LDA AA( I + ( J - 1 )*LDA ) = ROGUE 40 CONTINUE 50 CONTINUE ELSE IF( TYPE.EQ.'SY'.OR.TYPE.EQ.'TR' )THEN DO 90 J = 1, N IF( UPPER )THEN IBEG = 1 IF( UNIT )THEN IEND = J - 1 ELSE IEND = J END IF ELSE IF( UNIT )THEN IBEG = J + 1 ELSE IBEG = J END IF IEND = N END IF DO 60 I = 1, IBEG - 1 AA( I + ( J - 1 )*LDA ) = ROGUE 60 CONTINUE DO 70 I = IBEG, IEND AA( I + ( J - 1 )*LDA ) = A( I, J ) 70 CONTINUE DO 80 I = IEND + 1, LDA AA( I + ( J - 1 )*LDA ) = ROGUE 80 CONTINUE 90 CONTINUE END IF RETURN * * End of SMAKE. * END SUBROUTINE SMMCH( TRANSA, TRANSB, M, N, KK, ALPHA, A, LDA, B, LDB, $ BETA, C, LDC, CT, G, CC, LDCC, EPS, ERR, FATAL, $ NOUT, MV ) * * Checks the results of the computational tests. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. REAL ZERO, ONE PARAMETER ( ZERO = 0.0, ONE = 1.0 ) * .. Scalar Arguments .. REAL ALPHA, BETA, EPS, ERR INTEGER KK, LDA, LDB, LDC, LDCC, M, N, NOUT LOGICAL FATAL, MV CHARACTER*1 TRANSA, TRANSB * .. Array Arguments .. REAL A( LDA, * ), B( LDB, * ), C( LDC, * ), $ CC( LDCC, * ), CT( * ), G( * ) * .. Local Scalars .. REAL ERRI INTEGER I, J, K LOGICAL TRANA, TRANB * .. Intrinsic Functions .. INTRINSIC ABS, MAX, SQRT * .. Executable Statements .. TRANA = TRANSA.EQ.'T'.OR.TRANSA.EQ.'C' TRANB = TRANSB.EQ.'T'.OR.TRANSB.EQ.'C' * * Compute expected result, one column at a time, in CT using data * in A, B and C. * Compute gauges in G. * DO 120 J = 1, N * DO 10 I = 1, M CT( I ) = ZERO G( I ) = ZERO 10 CONTINUE IF( .NOT.TRANA.AND..NOT.TRANB )THEN DO 30 K = 1, KK DO 20 I = 1, M CT( I ) = CT( I ) + A( I, K )*B( K, J ) G( I ) = G( I ) + ABS( A( I, K ) )*ABS( B( K, J ) ) 20 CONTINUE 30 CONTINUE ELSE IF( TRANA.AND..NOT.TRANB )THEN DO 50 K = 1, KK DO 40 I = 1, M CT( I ) = CT( I ) + A( K, I )*B( K, J ) G( I ) = G( I ) + ABS( A( K, I ) )*ABS( B( K, J ) ) 40 CONTINUE 50 CONTINUE ELSE IF( .NOT.TRANA.AND.TRANB )THEN DO 70 K = 1, KK DO 60 I = 1, M CT( I ) = CT( I ) + A( I, K )*B( J, K ) G( I ) = G( I ) + ABS( A( I, K ) )*ABS( B( J, K ) ) 60 CONTINUE 70 CONTINUE ELSE IF( TRANA.AND.TRANB )THEN DO 90 K = 1, KK DO 80 I = 1, M CT( I ) = CT( I ) + A( K, I )*B( J, K ) G( I ) = G( I ) + ABS( A( K, I ) )*ABS( B( J, K ) ) 80 CONTINUE 90 CONTINUE END IF DO 100 I = 1, M CT( I ) = ALPHA*CT( I ) + BETA*C( I, J ) G( I ) = ABS( ALPHA )*G( I ) + ABS( BETA )*ABS( C( I, J ) ) 100 CONTINUE * * Compute the error ratio for this result. * ERR = ZERO DO 110 I = 1, M ERRI = ABS( CT( I ) - CC( I, J ) )/EPS IF( G( I ).NE.ZERO ) $ ERRI = ERRI/G( I ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.ONE ) $ GO TO 130 110 CONTINUE * 120 CONTINUE * * If the loop completes, all results are at least half accurate. GO TO 150 * * Report fatal error. * 130 FATAL = .TRUE. WRITE( NOUT, FMT = 9999 ) DO 140 I = 1, M IF( MV )THEN WRITE( NOUT, FMT = 9998 )I, CT( I ), CC( I, J ) ELSE WRITE( NOUT, FMT = 9998 )I, CC( I, J ), CT( I ) END IF 140 CONTINUE IF( N.GT.1 ) $ WRITE( NOUT, FMT = 9997 )J * 150 CONTINUE RETURN * 9999 FORMAT( ' ******* FATAL ERROR - COMPUTED RESULT IS LESS THAN HAL', $ 'F ACCURATE *******', /' EXPECTED RESULT COMPU', $ 'TED RESULT' ) 9998 FORMAT( 1X, I7, 2G18.6 ) 9997 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) * * End of SMMCH. * END LOGICAL FUNCTION LSE( RI, RJ, LR ) * * Tests if two arrays are identical. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Scalar Arguments .. INTEGER LR * .. Array Arguments .. REAL RI( * ), RJ( * ) * .. Local Scalars .. INTEGER I * .. Executable Statements .. DO 10 I = 1, LR IF( RI( I ).NE.RJ( I ) ) $ GO TO 20 10 CONTINUE LSE = .TRUE. GO TO 30 20 CONTINUE LSE = .FALSE. 30 RETURN * * End of LSE. * END LOGICAL FUNCTION LSERES( TYPE, UPLO, M, N, AA, AS, LDA ) * * Tests if selected elements in two arrays are equal. * * TYPE is 'GE' or 'SY'. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Scalar Arguments .. INTEGER LDA, M, N CHARACTER*1 UPLO CHARACTER*2 TYPE * .. Array Arguments .. REAL AA( LDA, * ), AS( LDA, * ) * .. Local Scalars .. INTEGER I, IBEG, IEND, J LOGICAL UPPER * .. Executable Statements .. UPPER = UPLO.EQ.'U' IF( TYPE.EQ.'GE' )THEN DO 20 J = 1, N DO 10 I = M + 1, LDA IF( AA( I, J ).NE.AS( I, J ) ) $ GO TO 70 10 CONTINUE 20 CONTINUE ELSE IF( TYPE.EQ.'SY' )THEN DO 50 J = 1, N IF( UPPER )THEN IBEG = 1 IEND = J ELSE IBEG = J IEND = N END IF DO 30 I = 1, IBEG - 1 IF( AA( I, J ).NE.AS( I, J ) ) $ GO TO 70 30 CONTINUE DO 40 I = IEND + 1, LDA IF( AA( I, J ).NE.AS( I, J ) ) $ GO TO 70 40 CONTINUE 50 CONTINUE END IF * LSERES = .TRUE. GO TO 80 70 CONTINUE LSERES = .FALSE. 80 RETURN * * End of LSERES. * END REAL FUNCTION SBEG( RESET ) * * Generates random numbers uniformly distributed between -0.5 and 0.5. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Scalar Arguments .. LOGICAL RESET * .. Local Scalars .. INTEGER I, IC, MI * .. Save statement .. SAVE I, IC, MI * .. Executable Statements .. IF( RESET )THEN * Initialize local variables. MI = 891 I = 7 IC = 0 RESET = .FALSE. END IF * * The sequence of values of I is bounded between 1 and 999. * If initial I = 1,2,3,6,7 or 9, the period will be 50. * If initial I = 4 or 8, the period will be 25. * If initial I = 5, the period will be 10. * IC is used to break up the period by skipping 1 value of I in 6. * IC = IC + 1 10 I = I*MI I = I - 1000*( I/1000 ) IF( IC.GE.5 )THEN IC = 0 GO TO 10 END IF SBEG = ( I - 500 )/1001.0 RETURN * * End of SBEG. * END REAL FUNCTION SDIFF( X, Y ) * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Scalar Arguments .. REAL X, Y * .. Executable Statements .. SDIFF = X - Y RETURN * * End of SDIFF. * END SUBROUTINE CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) * * Tests whether XERBLA has detected an error when it should. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Scalar Arguments .. INTEGER INFOT, NOUT LOGICAL LERR, OK CHARACTER*6 SRNAMT * .. Executable Statements .. IF( .NOT.LERR )THEN WRITE( NOUT, FMT = 9999 )INFOT, SRNAMT OK = .FALSE. END IF LERR = .FALSE. RETURN * 9999 FORMAT( ' ***** ILLEGAL VALUE OF PARAMETER NUMBER ', I2, ' NOT D', $ 'ETECTED BY ', A6, ' *****' ) * * End of CHKXER. * END SUBROUTINE XERBLA( SRNAME, INFO ) * * This is a special version of XERBLA to be used only as part of * the test program for testing error exits from the Level 3 BLAS * routines. * * XERBLA is an error handler for the Level 3 BLAS routines. * * It is called by the Level 3 BLAS routines if an input parameter is * invalid. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Scalar Arguments .. INTEGER INFO CHARACTER*6 SRNAME * .. Scalars in Common .. INTEGER INFOT, NOUT LOGICAL LERR, OK CHARACTER*6 SRNAMT * .. Common blocks .. COMMON /INFOC/INFOT, NOUT, OK, LERR COMMON /SRNAMC/SRNAMT * .. Executable Statements .. LERR = .TRUE. IF( INFO.NE.INFOT )THEN IF( INFOT.NE.0 )THEN WRITE( NOUT, FMT = 9999 )INFO, INFOT ELSE WRITE( NOUT, FMT = 9997 )INFO END IF OK = .FALSE. END IF IF( SRNAME.NE.SRNAMT )THEN WRITE( NOUT, FMT = 9998 )SRNAME, SRNAMT OK = .FALSE. END IF RETURN * 9999 FORMAT( ' ******* XERBLA WAS CALLED WITH INFO = ', I6, ' INSTEAD', $ ' OF ', I2, ' *******' ) 9998 FORMAT( ' ******* XERBLA WAS CALLED WITH SRNAME = ', A6, ' INSTE', $ 'AD OF ', A6, ' *******' ) 9997 FORMAT( ' ******* XERBLA WAS CALLED WITH INFO = ', I6, $ ' *******' ) * * End of XERBLA * END blis-0.6.1/blastest/src/fortran/zblat1.f000066400000000000000000000765621360743507500201300ustar00rootroot00000000000000*> \brief \b ZBLAT1 * * =========== DOCUMENTATION =========== * * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * * Definition: * =========== * * PROGRAM ZBLAT1 * * *> \par Purpose: * ============= *> *> \verbatim *> *> Test program for the COMPLEX*16 Level 1 BLAS. *> *> Based upon the original BLAS test routine together with: *> F06GAF Example Program Text *> \endverbatim * * Authors: * ======== * *> \author Univ. of Tennessee *> \author Univ. of California Berkeley *> \author Univ. of Colorado Denver *> \author NAG Ltd. * *> \date April 2012 * *> \ingroup complex16_blas_testing * * ===================================================================== PROGRAM ZBLAT1 * * -- Reference BLAS test routine (version 3.4.1) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * April 2012 * * ===================================================================== * * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, MODE, N LOGICAL PASS * .. Local Scalars .. DOUBLE PRECISION SFAC INTEGER IC * .. External Subroutines .. EXTERNAL CHECK1, CHECK2, HEADER * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, MODE, PASS * .. Data statements .. DATA SFAC/9.765625D-4/ * .. Executable Statements .. WRITE (NOUT,99999) DO 20 IC = 1, 10 ICASE = IC CALL HEADER * * Initialize PASS, INCX, INCY, and MODE for a new case. * The value 9999 for INCX, INCY or MODE will appear in the * detailed output, if any, for cases that do not involve * these parameters. * PASS = .TRUE. INCX = 9999 INCY = 9999 MODE = 9999 IF (ICASE.LE.5) THEN CALL CHECK2(SFAC) ELSE IF (ICASE.GE.6) THEN CALL CHECK1(SFAC) END IF * -- Print IF (PASS) WRITE (NOUT,99998) 20 CONTINUE STOP * 99999 FORMAT (' Complex BLAS Test Program Results',/1X) 99998 FORMAT (' ----- PASS -----') END SUBROUTINE HEADER * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, MODE, N LOGICAL PASS * .. Local Arrays .. CHARACTER*6 L(10) * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, MODE, PASS * .. Data statements .. DATA L(1)/'ZDOTC '/ DATA L(2)/'ZDOTU '/ DATA L(3)/'ZAXPY '/ DATA L(4)/'ZCOPY '/ DATA L(5)/'ZSWAP '/ DATA L(6)/'DZNRM2'/ DATA L(7)/'DZASUM'/ DATA L(8)/'ZSCAL '/ DATA L(9)/'ZDSCAL'/ DATA L(10)/'IZAMAX'/ * .. Executable Statements .. WRITE (NOUT,99999) ICASE, L(ICASE) RETURN * 99999 FORMAT (/' Test of subprogram number',I3,12X,A6) END SUBROUTINE CHECK1(SFAC) * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) * .. Scalar Arguments .. DOUBLE PRECISION SFAC * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, MODE, N LOGICAL PASS * .. Local Scalars .. COMPLEX*16 CA DOUBLE PRECISION SA INTEGER I, J, LEN, NP1 * .. Local Arrays .. COMPLEX*16 CTRUE5(8,5,2), CTRUE6(8,5,2), CV(8,5,2), CX(8), + MWPCS(5), MWPCT(5) DOUBLE PRECISION STRUE2(5), STRUE4(5) INTEGER ITRUE3(5) * .. External Functions .. DOUBLE PRECISION DZASUM, DZNRM2 INTEGER IZAMAX EXTERNAL DZASUM, DZNRM2, IZAMAX * .. External Subroutines .. EXTERNAL ZSCAL, ZDSCAL, CTEST, ITEST1, STEST1 * .. Intrinsic Functions .. INTRINSIC MAX * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, MODE, PASS * .. Data statements .. DATA SA, CA/0.3D0, (0.4D0,-0.7D0)/ DATA ((CV(I,J,1),I=1,8),J=1,5)/(0.1D0,0.1D0), + (1.0D0,2.0D0), (1.0D0,2.0D0), (1.0D0,2.0D0), + (1.0D0,2.0D0), (1.0D0,2.0D0), (1.0D0,2.0D0), + (1.0D0,2.0D0), (0.3D0,-0.4D0), (3.0D0,4.0D0), + (3.0D0,4.0D0), (3.0D0,4.0D0), (3.0D0,4.0D0), + (3.0D0,4.0D0), (3.0D0,4.0D0), (3.0D0,4.0D0), + (0.1D0,-0.3D0), (0.5D0,-0.1D0), (5.0D0,6.0D0), + (5.0D0,6.0D0), (5.0D0,6.0D0), (5.0D0,6.0D0), + (5.0D0,6.0D0), (5.0D0,6.0D0), (0.1D0,0.1D0), + (-0.6D0,0.1D0), (0.1D0,-0.3D0), (7.0D0,8.0D0), + (7.0D0,8.0D0), (7.0D0,8.0D0), (7.0D0,8.0D0), + (7.0D0,8.0D0), (0.3D0,0.1D0), (0.5D0,0.0D0), + (0.0D0,0.5D0), (0.0D0,0.2D0), (2.0D0,3.0D0), + (2.0D0,3.0D0), (2.0D0,3.0D0), (2.0D0,3.0D0)/ DATA ((CV(I,J,2),I=1,8),J=1,5)/(0.1D0,0.1D0), + (4.0D0,5.0D0), (4.0D0,5.0D0), (4.0D0,5.0D0), + (4.0D0,5.0D0), (4.0D0,5.0D0), (4.0D0,5.0D0), + (4.0D0,5.0D0), (0.3D0,-0.4D0), (6.0D0,7.0D0), + (6.0D0,7.0D0), (6.0D0,7.0D0), (6.0D0,7.0D0), + (6.0D0,7.0D0), (6.0D0,7.0D0), (6.0D0,7.0D0), + (0.1D0,-0.3D0), (8.0D0,9.0D0), (0.5D0,-0.1D0), + (2.0D0,5.0D0), (2.0D0,5.0D0), (2.0D0,5.0D0), + (2.0D0,5.0D0), (2.0D0,5.0D0), (0.1D0,0.1D0), + (3.0D0,6.0D0), (-0.6D0,0.1D0), (4.0D0,7.0D0), + (0.1D0,-0.3D0), (7.0D0,2.0D0), (7.0D0,2.0D0), + (7.0D0,2.0D0), (0.3D0,0.1D0), (5.0D0,8.0D0), + (0.5D0,0.0D0), (6.0D0,9.0D0), (0.0D0,0.5D0), + (8.0D0,3.0D0), (0.0D0,0.2D0), (9.0D0,4.0D0)/ DATA STRUE2/0.0D0, 0.5D0, 0.6D0, 0.7D0, 0.8D0/ DATA STRUE4/0.0D0, 0.7D0, 1.0D0, 1.3D0, 1.6D0/ DATA ((CTRUE5(I,J,1),I=1,8),J=1,5)/(0.1D0,0.1D0), + (1.0D0,2.0D0), (1.0D0,2.0D0), (1.0D0,2.0D0), + (1.0D0,2.0D0), (1.0D0,2.0D0), (1.0D0,2.0D0), + (1.0D0,2.0D0), (-0.16D0,-0.37D0), (3.0D0,4.0D0), + (3.0D0,4.0D0), (3.0D0,4.0D0), (3.0D0,4.0D0), + (3.0D0,4.0D0), (3.0D0,4.0D0), (3.0D0,4.0D0), + (-0.17D0,-0.19D0), (0.13D0,-0.39D0), + (5.0D0,6.0D0), (5.0D0,6.0D0), (5.0D0,6.0D0), + (5.0D0,6.0D0), (5.0D0,6.0D0), (5.0D0,6.0D0), + (0.11D0,-0.03D0), (-0.17D0,0.46D0), + (-0.17D0,-0.19D0), (7.0D0,8.0D0), (7.0D0,8.0D0), + (7.0D0,8.0D0), (7.0D0,8.0D0), (7.0D0,8.0D0), + (0.19D0,-0.17D0), (0.20D0,-0.35D0), + (0.35D0,0.20D0), (0.14D0,0.08D0), + (2.0D0,3.0D0), (2.0D0,3.0D0), (2.0D0,3.0D0), + (2.0D0,3.0D0)/ DATA ((CTRUE5(I,J,2),I=1,8),J=1,5)/(0.1D0,0.1D0), + (4.0D0,5.0D0), (4.0D0,5.0D0), (4.0D0,5.0D0), + (4.0D0,5.0D0), (4.0D0,5.0D0), (4.0D0,5.0D0), + (4.0D0,5.0D0), (-0.16D0,-0.37D0), (6.0D0,7.0D0), + (6.0D0,7.0D0), (6.0D0,7.0D0), (6.0D0,7.0D0), + (6.0D0,7.0D0), (6.0D0,7.0D0), (6.0D0,7.0D0), + (-0.17D0,-0.19D0), (8.0D0,9.0D0), + (0.13D0,-0.39D0), (2.0D0,5.0D0), (2.0D0,5.0D0), + (2.0D0,5.0D0), (2.0D0,5.0D0), (2.0D0,5.0D0), + (0.11D0,-0.03D0), (3.0D0,6.0D0), + (-0.17D0,0.46D0), (4.0D0,7.0D0), + (-0.17D0,-0.19D0), (7.0D0,2.0D0), (7.0D0,2.0D0), + (7.0D0,2.0D0), (0.19D0,-0.17D0), (5.0D0,8.0D0), + (0.20D0,-0.35D0), (6.0D0,9.0D0), + (0.35D0,0.20D0), (8.0D0,3.0D0), + (0.14D0,0.08D0), (9.0D0,4.0D0)/ DATA ((CTRUE6(I,J,1),I=1,8),J=1,5)/(0.1D0,0.1D0), + (1.0D0,2.0D0), (1.0D0,2.0D0), (1.0D0,2.0D0), + (1.0D0,2.0D0), (1.0D0,2.0D0), (1.0D0,2.0D0), + (1.0D0,2.0D0), (0.09D0,-0.12D0), (3.0D0,4.0D0), + (3.0D0,4.0D0), (3.0D0,4.0D0), (3.0D0,4.0D0), + (3.0D0,4.0D0), (3.0D0,4.0D0), (3.0D0,4.0D0), + (0.03D0,-0.09D0), (0.15D0,-0.03D0), + (5.0D0,6.0D0), (5.0D0,6.0D0), (5.0D0,6.0D0), + (5.0D0,6.0D0), (5.0D0,6.0D0), (5.0D0,6.0D0), + (0.03D0,0.03D0), (-0.18D0,0.03D0), + (0.03D0,-0.09D0), (7.0D0,8.0D0), (7.0D0,8.0D0), + (7.0D0,8.0D0), (7.0D0,8.0D0), (7.0D0,8.0D0), + (0.09D0,0.03D0), (0.15D0,0.00D0), + (0.00D0,0.15D0), (0.00D0,0.06D0), (2.0D0,3.0D0), + (2.0D0,3.0D0), (2.0D0,3.0D0), (2.0D0,3.0D0)/ DATA ((CTRUE6(I,J,2),I=1,8),J=1,5)/(0.1D0,0.1D0), + (4.0D0,5.0D0), (4.0D0,5.0D0), (4.0D0,5.0D0), + (4.0D0,5.0D0), (4.0D0,5.0D0), (4.0D0,5.0D0), + (4.0D0,5.0D0), (0.09D0,-0.12D0), (6.0D0,7.0D0), + (6.0D0,7.0D0), (6.0D0,7.0D0), (6.0D0,7.0D0), + (6.0D0,7.0D0), (6.0D0,7.0D0), (6.0D0,7.0D0), + (0.03D0,-0.09D0), (8.0D0,9.0D0), + (0.15D0,-0.03D0), (2.0D0,5.0D0), (2.0D0,5.0D0), + (2.0D0,5.0D0), (2.0D0,5.0D0), (2.0D0,5.0D0), + (0.03D0,0.03D0), (3.0D0,6.0D0), + (-0.18D0,0.03D0), (4.0D0,7.0D0), + (0.03D0,-0.09D0), (7.0D0,2.0D0), (7.0D0,2.0D0), + (7.0D0,2.0D0), (0.09D0,0.03D0), (5.0D0,8.0D0), + (0.15D0,0.00D0), (6.0D0,9.0D0), (0.00D0,0.15D0), + (8.0D0,3.0D0), (0.00D0,0.06D0), (9.0D0,4.0D0)/ DATA ITRUE3/0, 1, 2, 2, 2/ * .. Executable Statements .. DO 60 INCX = 1, 2 DO 40 NP1 = 1, 5 N = NP1 - 1 LEN = 2*MAX(N,1) * .. Set vector arguments .. DO 20 I = 1, LEN CX(I) = CV(I,NP1,INCX) 20 CONTINUE IF (ICASE.EQ.6) THEN * .. DZNRM2 .. CALL STEST1(DZNRM2(N,CX,INCX),STRUE2(NP1),STRUE2(NP1), + SFAC) ELSE IF (ICASE.EQ.7) THEN * .. DZASUM .. CALL STEST1(DZASUM(N,CX,INCX),STRUE4(NP1),STRUE4(NP1), + SFAC) ELSE IF (ICASE.EQ.8) THEN * .. ZSCAL .. CALL ZSCAL(N,CA,CX,INCX) CALL CTEST(LEN,CX,CTRUE5(1,NP1,INCX),CTRUE5(1,NP1,INCX), + SFAC) ELSE IF (ICASE.EQ.9) THEN * .. ZDSCAL .. CALL ZDSCAL(N,SA,CX,INCX) CALL CTEST(LEN,CX,CTRUE6(1,NP1,INCX),CTRUE6(1,NP1,INCX), + SFAC) ELSE IF (ICASE.EQ.10) THEN * .. IZAMAX .. CALL ITEST1(IZAMAX(N,CX,INCX),ITRUE3(NP1)) ELSE WRITE (NOUT,*) ' Shouldn''t be here in CHECK1' STOP END IF * 40 CONTINUE 60 CONTINUE * INCX = 1 IF (ICASE.EQ.8) THEN * ZSCAL * Add a test for alpha equal to zero. CA = (0.0D0,0.0D0) DO 80 I = 1, 5 MWPCT(I) = (0.0D0,0.0D0) MWPCS(I) = (1.0D0,1.0D0) 80 CONTINUE CALL ZSCAL(5,CA,CX,INCX) CALL CTEST(5,CX,MWPCT,MWPCS,SFAC) ELSE IF (ICASE.EQ.9) THEN * ZDSCAL * Add a test for alpha equal to zero. SA = 0.0D0 DO 100 I = 1, 5 MWPCT(I) = (0.0D0,0.0D0) MWPCS(I) = (1.0D0,1.0D0) 100 CONTINUE CALL ZDSCAL(5,SA,CX,INCX) CALL CTEST(5,CX,MWPCT,MWPCS,SFAC) * Add a test for alpha equal to one. SA = 1.0D0 DO 120 I = 1, 5 MWPCT(I) = CX(I) MWPCS(I) = CX(I) 120 CONTINUE CALL ZDSCAL(5,SA,CX,INCX) CALL CTEST(5,CX,MWPCT,MWPCS,SFAC) * Add a test for alpha equal to minus one. SA = -1.0D0 DO 140 I = 1, 5 MWPCT(I) = -CX(I) MWPCS(I) = -CX(I) 140 CONTINUE CALL ZDSCAL(5,SA,CX,INCX) CALL CTEST(5,CX,MWPCT,MWPCS,SFAC) END IF RETURN END SUBROUTINE CHECK2(SFAC) * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) * .. Scalar Arguments .. DOUBLE PRECISION SFAC * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, MODE, N LOGICAL PASS * .. Local Scalars .. COMPLEX*16 CA INTEGER I, J, KI, KN, KSIZE, LENX, LENY, MX, MY * .. Local Arrays .. COMPLEX*16 CDOT(1), CSIZE1(4), CSIZE2(7,2), CSIZE3(14), + CT10X(7,4,4), CT10Y(7,4,4), CT6(4,4), CT7(4,4), + CT8(7,4,4), CX(7), CX1(7), CY(7), CY1(7) INTEGER INCXS(4), INCYS(4), LENS(4,2), NS(4) * .. External Functions .. COMPLEX*16 ZDOTC, ZDOTU EXTERNAL ZDOTC, ZDOTU * .. External Subroutines .. EXTERNAL ZAXPY, ZCOPY, ZSWAP, CTEST * .. Intrinsic Functions .. INTRINSIC ABS, MIN * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, MODE, PASS * .. Data statements .. DATA CA/(0.4D0,-0.7D0)/ DATA INCXS/1, 2, -2, -1/ DATA INCYS/1, -2, 1, -2/ DATA LENS/1, 1, 2, 4, 1, 1, 3, 7/ DATA NS/0, 1, 2, 4/ DATA CX1/(0.7D0,-0.8D0), (-0.4D0,-0.7D0), + (-0.1D0,-0.9D0), (0.2D0,-0.8D0), + (-0.9D0,-0.4D0), (0.1D0,0.4D0), (-0.6D0,0.6D0)/ DATA CY1/(0.6D0,-0.6D0), (-0.9D0,0.5D0), + (0.7D0,-0.6D0), (0.1D0,-0.5D0), (-0.1D0,-0.2D0), + (-0.5D0,-0.3D0), (0.8D0,-0.7D0)/ DATA ((CT8(I,J,1),I=1,7),J=1,4)/(0.6D0,-0.6D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.32D0,-1.41D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.32D0,-1.41D0), + (-1.55D0,0.5D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.32D0,-1.41D0), (-1.55D0,0.5D0), + (0.03D0,-0.89D0), (-0.38D0,-0.96D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0)/ DATA ((CT8(I,J,2),I=1,7),J=1,4)/(0.6D0,-0.6D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.32D0,-1.41D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (-0.07D0,-0.89D0), + (-0.9D0,0.5D0), (0.42D0,-1.41D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.78D0,0.06D0), (-0.9D0,0.5D0), + (0.06D0,-0.13D0), (0.1D0,-0.5D0), + (-0.77D0,-0.49D0), (-0.5D0,-0.3D0), + (0.52D0,-1.51D0)/ DATA ((CT8(I,J,3),I=1,7),J=1,4)/(0.6D0,-0.6D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.32D0,-1.41D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (-0.07D0,-0.89D0), + (-1.18D0,-0.31D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.78D0,0.06D0), (-1.54D0,0.97D0), + (0.03D0,-0.89D0), (-0.18D0,-1.31D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0)/ DATA ((CT8(I,J,4),I=1,7),J=1,4)/(0.6D0,-0.6D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.32D0,-1.41D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.32D0,-1.41D0), (-0.9D0,0.5D0), + (0.05D0,-0.6D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.32D0,-1.41D0), + (-0.9D0,0.5D0), (0.05D0,-0.6D0), (0.1D0,-0.5D0), + (-0.77D0,-0.49D0), (-0.5D0,-0.3D0), + (0.32D0,-1.16D0)/ DATA CT7/(0.0D0,0.0D0), (-0.06D0,-0.90D0), + (0.65D0,-0.47D0), (-0.34D0,-1.22D0), + (0.0D0,0.0D0), (-0.06D0,-0.90D0), + (-0.59D0,-1.46D0), (-1.04D0,-0.04D0), + (0.0D0,0.0D0), (-0.06D0,-0.90D0), + (-0.83D0,0.59D0), (0.07D0,-0.37D0), + (0.0D0,0.0D0), (-0.06D0,-0.90D0), + (-0.76D0,-1.15D0), (-1.33D0,-1.82D0)/ DATA CT6/(0.0D0,0.0D0), (0.90D0,0.06D0), + (0.91D0,-0.77D0), (1.80D0,-0.10D0), + (0.0D0,0.0D0), (0.90D0,0.06D0), (1.45D0,0.74D0), + (0.20D0,0.90D0), (0.0D0,0.0D0), (0.90D0,0.06D0), + (-0.55D0,0.23D0), (0.83D0,-0.39D0), + (0.0D0,0.0D0), (0.90D0,0.06D0), (1.04D0,0.79D0), + (1.95D0,1.22D0)/ DATA ((CT10X(I,J,1),I=1,7),J=1,4)/(0.7D0,-0.8D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.6D0,-0.6D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.6D0,-0.6D0), (-0.9D0,0.5D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.6D0,-0.6D0), + (-0.9D0,0.5D0), (0.7D0,-0.6D0), (0.1D0,-0.5D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0)/ DATA ((CT10X(I,J,2),I=1,7),J=1,4)/(0.7D0,-0.8D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.6D0,-0.6D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.7D0,-0.6D0), (-0.4D0,-0.7D0), + (0.6D0,-0.6D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.8D0,-0.7D0), + (-0.4D0,-0.7D0), (-0.1D0,-0.2D0), + (0.2D0,-0.8D0), (0.7D0,-0.6D0), (0.1D0,0.4D0), + (0.6D0,-0.6D0)/ DATA ((CT10X(I,J,3),I=1,7),J=1,4)/(0.7D0,-0.8D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.6D0,-0.6D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (-0.9D0,0.5D0), (-0.4D0,-0.7D0), + (0.6D0,-0.6D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.1D0,-0.5D0), + (-0.4D0,-0.7D0), (0.7D0,-0.6D0), (0.2D0,-0.8D0), + (-0.9D0,0.5D0), (0.1D0,0.4D0), (0.6D0,-0.6D0)/ DATA ((CT10X(I,J,4),I=1,7),J=1,4)/(0.7D0,-0.8D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.6D0,-0.6D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.6D0,-0.6D0), (0.7D0,-0.6D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.6D0,-0.6D0), + (0.7D0,-0.6D0), (-0.1D0,-0.2D0), (0.8D0,-0.7D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0)/ DATA ((CT10Y(I,J,1),I=1,7),J=1,4)/(0.6D0,-0.6D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.7D0,-0.8D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.7D0,-0.8D0), (-0.4D0,-0.7D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.7D0,-0.8D0), + (-0.4D0,-0.7D0), (-0.1D0,-0.9D0), + (0.2D0,-0.8D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0)/ DATA ((CT10Y(I,J,2),I=1,7),J=1,4)/(0.6D0,-0.6D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.7D0,-0.8D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (-0.1D0,-0.9D0), (-0.9D0,0.5D0), + (0.7D0,-0.8D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (-0.6D0,0.6D0), + (-0.9D0,0.5D0), (-0.9D0,-0.4D0), (0.1D0,-0.5D0), + (-0.1D0,-0.9D0), (-0.5D0,-0.3D0), + (0.7D0,-0.8D0)/ DATA ((CT10Y(I,J,3),I=1,7),J=1,4)/(0.6D0,-0.6D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.7D0,-0.8D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (-0.1D0,-0.9D0), (0.7D0,-0.8D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (-0.6D0,0.6D0), + (-0.9D0,-0.4D0), (-0.1D0,-0.9D0), + (0.7D0,-0.8D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0)/ DATA ((CT10Y(I,J,4),I=1,7),J=1,4)/(0.6D0,-0.6D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.7D0,-0.8D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.7D0,-0.8D0), (-0.9D0,0.5D0), + (-0.4D0,-0.7D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.7D0,-0.8D0), + (-0.9D0,0.5D0), (-0.4D0,-0.7D0), (0.1D0,-0.5D0), + (-0.1D0,-0.9D0), (-0.5D0,-0.3D0), + (0.2D0,-0.8D0)/ DATA CSIZE1/(0.0D0,0.0D0), (0.9D0,0.9D0), + (1.63D0,1.73D0), (2.90D0,2.78D0)/ DATA CSIZE3/(0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (1.17D0,1.17D0), + (1.17D0,1.17D0), (1.17D0,1.17D0), + (1.17D0,1.17D0), (1.17D0,1.17D0), + (1.17D0,1.17D0), (1.17D0,1.17D0)/ DATA CSIZE2/(0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (0.0D0,0.0D0), + (0.0D0,0.0D0), (0.0D0,0.0D0), (1.54D0,1.54D0), + (1.54D0,1.54D0), (1.54D0,1.54D0), + (1.54D0,1.54D0), (1.54D0,1.54D0), + (1.54D0,1.54D0), (1.54D0,1.54D0)/ * .. Executable Statements .. DO 60 KI = 1, 4 INCX = INCXS(KI) INCY = INCYS(KI) MX = ABS(INCX) MY = ABS(INCY) * DO 40 KN = 1, 4 N = NS(KN) KSIZE = MIN(2,KN) LENX = LENS(KN,MX) LENY = LENS(KN,MY) * .. initialize all argument arrays .. DO 20 I = 1, 7 CX(I) = CX1(I) CY(I) = CY1(I) 20 CONTINUE IF (ICASE.EQ.1) THEN * .. ZDOTC .. CDOT(1) = ZDOTC(N,CX,INCX,CY,INCY) CALL CTEST(1,CDOT,CT6(KN,KI),CSIZE1(KN),SFAC) ELSE IF (ICASE.EQ.2) THEN * .. ZDOTU .. CDOT(1) = ZDOTU(N,CX,INCX,CY,INCY) CALL CTEST(1,CDOT,CT7(KN,KI),CSIZE1(KN),SFAC) ELSE IF (ICASE.EQ.3) THEN * .. ZAXPY .. CALL ZAXPY(N,CA,CX,INCX,CY,INCY) CALL CTEST(LENY,CY,CT8(1,KN,KI),CSIZE2(1,KSIZE),SFAC) ELSE IF (ICASE.EQ.4) THEN * .. ZCOPY .. CALL ZCOPY(N,CX,INCX,CY,INCY) CALL CTEST(LENY,CY,CT10Y(1,KN,KI),CSIZE3,1.0D0) ELSE IF (ICASE.EQ.5) THEN * .. ZSWAP .. CALL ZSWAP(N,CX,INCX,CY,INCY) CALL CTEST(LENX,CX,CT10X(1,KN,KI),CSIZE3,1.0D0) CALL CTEST(LENY,CY,CT10Y(1,KN,KI),CSIZE3,1.0D0) ELSE WRITE (NOUT,*) ' Shouldn''t be here in CHECK2' STOP END IF * 40 CONTINUE 60 CONTINUE RETURN END SUBROUTINE STEST(LEN,SCOMP,STRUE,SSIZE,SFAC) * ********************************* STEST ************************** * * THIS SUBR COMPARES ARRAYS SCOMP() AND STRUE() OF LENGTH LEN TO * SEE IF THE TERM BY TERM DIFFERENCES, MULTIPLIED BY SFAC, ARE * NEGLIGIBLE. * * C. L. LAWSON, JPL, 1974 DEC 10 * * .. Parameters .. INTEGER NOUT DOUBLE PRECISION ZERO PARAMETER (NOUT=6, ZERO=0.0D0) * .. Scalar Arguments .. DOUBLE PRECISION SFAC INTEGER LEN * .. Array Arguments .. DOUBLE PRECISION SCOMP(LEN), SSIZE(LEN), STRUE(LEN) * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, MODE, N LOGICAL PASS * .. Local Scalars .. DOUBLE PRECISION SD INTEGER I * .. External Functions .. DOUBLE PRECISION SDIFF EXTERNAL SDIFF * .. Intrinsic Functions .. INTRINSIC ABS * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, MODE, PASS * .. Executable Statements .. * DO 40 I = 1, LEN SD = SCOMP(I) - STRUE(I) IF (ABS(SFAC*SD) .LE. ABS(SSIZE(I))*EPSILON(ZERO)) + GO TO 40 * * HERE SCOMP(I) IS NOT CLOSE TO STRUE(I). * IF ( .NOT. PASS) GO TO 20 * PRINT FAIL MESSAGE AND HEADER. PASS = .FALSE. WRITE (NOUT,99999) WRITE (NOUT,99998) 20 WRITE (NOUT,99997) ICASE, N, INCX, INCY, MODE, I, SCOMP(I), + STRUE(I), SD, SSIZE(I) 40 CONTINUE RETURN * 99999 FORMAT (' FAIL') 99998 FORMAT (/' CASE N INCX INCY MODE I ', + ' COMP(I) TRUE(I) DIFFERENCE', + ' SIZE(I)',/1X) 99997 FORMAT (1X,I4,I3,3I5,I3,2D36.8,2D12.4) END SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC) * ************************* STEST1 ***************************** * * THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN * REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE * ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT. * * C.L. LAWSON, JPL, 1978 DEC 6 * * .. Scalar Arguments .. DOUBLE PRECISION SCOMP1, SFAC, STRUE1 * .. Array Arguments .. DOUBLE PRECISION SSIZE(*) * .. Local Arrays .. DOUBLE PRECISION SCOMP(1), STRUE(1) * .. External Subroutines .. EXTERNAL STEST * .. Executable Statements .. * SCOMP(1) = SCOMP1 STRUE(1) = STRUE1 CALL STEST(1,SCOMP,STRUE,SSIZE,SFAC) * RETURN END DOUBLE PRECISION FUNCTION SDIFF(SA,SB) * ********************************* SDIFF ************************** * COMPUTES DIFFERENCE OF TWO NUMBERS. C. L. LAWSON, JPL 1974 FEB 15 * * .. Scalar Arguments .. DOUBLE PRECISION SA, SB * .. Executable Statements .. SDIFF = SA - SB RETURN END SUBROUTINE CTEST(LEN,CCOMP,CTRUE,CSIZE,SFAC) * **************************** CTEST ***************************** * * C.L. LAWSON, JPL, 1978 DEC 6 * * .. Scalar Arguments .. DOUBLE PRECISION SFAC INTEGER LEN * .. Array Arguments .. COMPLEX*16 CCOMP(LEN), CSIZE(LEN), CTRUE(LEN) * .. Local Scalars .. INTEGER I * .. Local Arrays .. DOUBLE PRECISION SCOMP(20), SSIZE(20), STRUE(20) * .. External Subroutines .. EXTERNAL STEST * .. Intrinsic Functions .. INTRINSIC DIMAG, DBLE * .. Executable Statements .. DO 20 I = 1, LEN SCOMP(2*I-1) = DBLE(CCOMP(I)) SCOMP(2*I) = DIMAG(CCOMP(I)) STRUE(2*I-1) = DBLE(CTRUE(I)) STRUE(2*I) = DIMAG(CTRUE(I)) SSIZE(2*I-1) = DBLE(CSIZE(I)) SSIZE(2*I) = DIMAG(CSIZE(I)) 20 CONTINUE * CALL STEST(2*LEN,SCOMP,STRUE,SSIZE,SFAC) RETURN END SUBROUTINE ITEST1(ICOMP,ITRUE) * ********************************* ITEST1 ************************* * * THIS SUBROUTINE COMPARES THE VARIABLES ICOMP AND ITRUE FOR * EQUALITY. * C. L. LAWSON, JPL, 1974 DEC 10 * * .. Parameters .. INTEGER NOUT PARAMETER (NOUT=6) * .. Scalar Arguments .. INTEGER ICOMP, ITRUE * .. Scalars in Common .. INTEGER ICASE, INCX, INCY, MODE, N LOGICAL PASS * .. Local Scalars .. INTEGER ID * .. Common blocks .. COMMON /COMBLA/ICASE, N, INCX, INCY, MODE, PASS * .. Executable Statements .. IF (ICOMP.EQ.ITRUE) GO TO 40 * * HERE ICOMP IS NOT EQUAL TO ITRUE. * IF ( .NOT. PASS) GO TO 20 * PRINT FAIL MESSAGE AND HEADER. PASS = .FALSE. WRITE (NOUT,99999) WRITE (NOUT,99998) 20 ID = ICOMP - ITRUE WRITE (NOUT,99997) ICASE, N, INCX, INCY, MODE, ICOMP, ITRUE, ID 40 CONTINUE RETURN * 99999 FORMAT (' FAIL') 99998 FORMAT (/' CASE N INCX INCY MODE ', + ' COMP TRUE DIFFERENCE', + /1X) 99997 FORMAT (1X,I4,I3,3I5,2I36,I12) END blis-0.6.1/blastest/src/fortran/zblat2.f000066400000000000000000003444131360743507500201220ustar00rootroot00000000000000*> \brief \b ZBLAT2 * * =========== DOCUMENTATION =========== * * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * * Definition: * =========== * * PROGRAM ZBLAT2 * * *> \par Purpose: * ============= *> *> \verbatim *> *> Test program for the COMPLEX*16 Level 2 Blas. *> *> The program must be driven by a short data file. The first 18 records *> of the file are read using list-directed input, the last 17 records *> are read using the format ( A6, L2 ). An annotated example of a data *> file can be obtained by deleting the first 3 characters from the *> following 35 lines: *> 'zblat2.out' NAME OF SUMMARY OUTPUT FILE *> 6 UNIT NUMBER OF SUMMARY FILE *> 'CBLA2T.SNAP' NAME OF SNAPSHOT OUTPUT FILE *> -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) *> F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. *> F LOGICAL FLAG, T TO STOP ON FAILURES. *> T LOGICAL FLAG, T TO TEST ERROR EXITS. *> 16.0 THRESHOLD VALUE OF TEST RATIO *> 6 NUMBER OF VALUES OF N *> 0 1 2 3 5 9 VALUES OF N *> 4 NUMBER OF VALUES OF K *> 0 1 2 4 VALUES OF K *> 4 NUMBER OF VALUES OF INCX AND INCY *> 1 2 -1 -2 VALUES OF INCX AND INCY *> 3 NUMBER OF VALUES OF ALPHA *> (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA *> 3 NUMBER OF VALUES OF BETA *> (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA *> ZGEMV T PUT F FOR NO TEST. SAME COLUMNS. *> ZGBMV T PUT F FOR NO TEST. SAME COLUMNS. *> ZHEMV T PUT F FOR NO TEST. SAME COLUMNS. *> ZHBMV T PUT F FOR NO TEST. SAME COLUMNS. *> ZHPMV T PUT F FOR NO TEST. SAME COLUMNS. *> ZTRMV T PUT F FOR NO TEST. SAME COLUMNS. *> ZTBMV T PUT F FOR NO TEST. SAME COLUMNS. *> ZTPMV T PUT F FOR NO TEST. SAME COLUMNS. *> ZTRSV T PUT F FOR NO TEST. SAME COLUMNS. *> ZTBSV T PUT F FOR NO TEST. SAME COLUMNS. *> ZTPSV T PUT F FOR NO TEST. SAME COLUMNS. *> ZGERC T PUT F FOR NO TEST. SAME COLUMNS. *> ZGERU T PUT F FOR NO TEST. SAME COLUMNS. *> ZHER T PUT F FOR NO TEST. SAME COLUMNS. *> ZHPR T PUT F FOR NO TEST. SAME COLUMNS. *> ZHER2 T PUT F FOR NO TEST. SAME COLUMNS. *> ZHPR2 T PUT F FOR NO TEST. SAME COLUMNS. *> *> Further Details *> =============== *> *> See: *> *> Dongarra J. J., Du Croz J. J., Hammarling S. and Hanson R. J.. *> An extended set of Fortran Basic Linear Algebra Subprograms. *> *> Technical Memoranda Nos. 41 (revision 3) and 81, Mathematics *> and Computer Science Division, Argonne National Laboratory, *> 9700 South Cass Avenue, Argonne, Illinois 60439, US. *> *> Or *> *> NAG Technical Reports TR3/87 and TR4/87, Numerical Algorithms *> Group Ltd., NAG Central Office, 256 Banbury Road, Oxford *> OX2 7DE, UK, and Numerical Algorithms Group Inc., 1101 31st *> Street, Suite 100, Downers Grove, Illinois 60515-1263, USA. *> *> *> -- Written on 10-August-1987. *> Richard Hanson, Sandia National Labs. *> Jeremy Du Croz, NAG Central Office. *> *> 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers *> can be run multiple times without deleting generated *> output files (susan) *> \endverbatim * * Authors: * ======== * *> \author Univ. of Tennessee *> \author Univ. of California Berkeley *> \author Univ. of Colorado Denver *> \author NAG Ltd. * *> \date April 2012 * *> \ingroup complex16_blas_testing * * ===================================================================== PROGRAM ZBLAT2 * * -- Reference BLAS test routine (version 3.4.1) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * April 2012 * * ===================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 5 ) INTEGER NSUBS PARAMETER ( NSUBS = 17 ) COMPLEX*16 ZERO, ONE PARAMETER ( ZERO = ( 0.0D0, 0.0D0 ), $ ONE = ( 1.0D0, 0.0D0 ) ) DOUBLE PRECISION RZERO PARAMETER ( RZERO = 0.0D0 ) INTEGER NMAX, INCMAX PARAMETER ( NMAX = 65, INCMAX = 2 ) INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX PARAMETER ( NINMAX = 7, NIDMAX = 9, NKBMAX = 7, $ NALMAX = 7, NBEMAX = 7 ) * .. Local Scalars .. DOUBLE PRECISION EPS, ERR, THRESH INTEGER I, ISNUM, J, N, NALF, NBET, NIDIM, NINC, NKB, $ NOUT, NTRA LOGICAL FATAL, LTESTT, REWI, SAME, SFATAL, TRACE, $ TSTERR CHARACTER*1 TRANS CHARACTER*6 SNAMET CHARACTER*32 SNAPS, SUMMRY * .. Local Arrays .. COMPLEX*16 A( NMAX, NMAX ), AA( NMAX*NMAX ), $ ALF( NALMAX ), AS( NMAX*NMAX ), BET( NBEMAX ), $ X( NMAX ), XS( NMAX*INCMAX ), $ XX( NMAX*INCMAX ), Y( NMAX ), $ YS( NMAX*INCMAX ), YT( NMAX ), $ YY( NMAX*INCMAX ), Z( 2*NMAX ) DOUBLE PRECISION G( NMAX ) INTEGER IDIM( NIDMAX ), INC( NINMAX ), KB( NKBMAX ) LOGICAL LTEST( NSUBS ) CHARACTER*6 SNAMES( NSUBS ) * .. External Functions .. DOUBLE PRECISION DDIFF LOGICAL LZE EXTERNAL DDIFF, LZE * .. External Subroutines .. EXTERNAL ZCHK1, ZCHK2, ZCHK3, ZCHK4, ZCHK5, ZCHK6, $ ZCHKE, ZMVCH * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK CHARACTER*6 SRNAMT * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR COMMON /SRNAMC/SRNAMT * .. Data statements .. DATA SNAMES/'ZGEMV ', 'ZGBMV ', 'ZHEMV ', 'ZHBMV ', $ 'ZHPMV ', 'ZTRMV ', 'ZTBMV ', 'ZTPMV ', $ 'ZTRSV ', 'ZTBSV ', 'ZTPSV ', 'ZGERC ', $ 'ZGERU ', 'ZHER ', 'ZHPR ', 'ZHER2 ', $ 'ZHPR2 '/ * .. Executable Statements .. * * Read name and unit number for summary output file and open file. * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) NOUTC = NOUT * * Read name and unit number for snapshot output file and open file. * READ( NIN, FMT = * )SNAPS READ( NIN, FMT = * )NTRA TRACE = NTRA.GE.0 IF( TRACE )THEN OPEN( NTRA, FILE = SNAPS, STATUS = 'UNKNOWN' ) END IF * Read the flag that directs rewinding of the snapshot file. READ( NIN, FMT = * )REWI REWI = REWI.AND.TRACE * Read the flag that directs stopping on any failure. READ( NIN, FMT = * )SFATAL * Read the flag that indicates whether error exits are to be tested. READ( NIN, FMT = * )TSTERR * Read the threshold value of the test ratio READ( NIN, FMT = * )THRESH * * Read and check the parameter values for the tests. * * Values of N READ( NIN, FMT = * )NIDIM IF( NIDIM.LT.1.OR.NIDIM.GT.NIDMAX )THEN WRITE( NOUT, FMT = 9997 )'N', NIDMAX GO TO 230 END IF READ( NIN, FMT = * )( IDIM( I ), I = 1, NIDIM ) DO 10 I = 1, NIDIM IF( IDIM( I ).LT.0.OR.IDIM( I ).GT.NMAX )THEN WRITE( NOUT, FMT = 9996 )NMAX GO TO 230 END IF 10 CONTINUE * Values of K READ( NIN, FMT = * )NKB IF( NKB.LT.1.OR.NKB.GT.NKBMAX )THEN WRITE( NOUT, FMT = 9997 )'K', NKBMAX GO TO 230 END IF READ( NIN, FMT = * )( KB( I ), I = 1, NKB ) DO 20 I = 1, NKB IF( KB( I ).LT.0 )THEN WRITE( NOUT, FMT = 9995 ) GO TO 230 END IF 20 CONTINUE * Values of INCX and INCY READ( NIN, FMT = * )NINC IF( NINC.LT.1.OR.NINC.GT.NINMAX )THEN WRITE( NOUT, FMT = 9997 )'INCX AND INCY', NINMAX GO TO 230 END IF READ( NIN, FMT = * )( INC( I ), I = 1, NINC ) DO 30 I = 1, NINC IF( INC( I ).EQ.0.OR.ABS( INC( I ) ).GT.INCMAX )THEN WRITE( NOUT, FMT = 9994 )INCMAX GO TO 230 END IF 30 CONTINUE * Values of ALPHA READ( NIN, FMT = * )NALF IF( NALF.LT.1.OR.NALF.GT.NALMAX )THEN WRITE( NOUT, FMT = 9997 )'ALPHA', NALMAX GO TO 230 END IF READ( NIN, FMT = * )( ALF( I ), I = 1, NALF ) * Values of BETA READ( NIN, FMT = * )NBET IF( NBET.LT.1.OR.NBET.GT.NBEMAX )THEN WRITE( NOUT, FMT = 9997 )'BETA', NBEMAX GO TO 230 END IF READ( NIN, FMT = * )( BET( I ), I = 1, NBET ) * * Report values of parameters. * WRITE( NOUT, FMT = 9993 ) WRITE( NOUT, FMT = 9992 )( IDIM( I ), I = 1, NIDIM ) WRITE( NOUT, FMT = 9991 )( KB( I ), I = 1, NKB ) WRITE( NOUT, FMT = 9990 )( INC( I ), I = 1, NINC ) WRITE( NOUT, FMT = 9989 )( ALF( I ), I = 1, NALF ) WRITE( NOUT, FMT = 9988 )( BET( I ), I = 1, NBET ) IF( .NOT.TSTERR )THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9980 ) END IF WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 )THRESH WRITE( NOUT, FMT = * ) * * Read names of subroutines and flags which indicate * whether they are to be tested. * DO 40 I = 1, NSUBS LTEST( I ) = .FALSE. 40 CONTINUE 50 READ( NIN, FMT = 9984, END = 80 )SNAMET, LTESTT DO 60 I = 1, NSUBS IF( SNAMET.EQ.SNAMES( I ) ) $ GO TO 70 60 CONTINUE WRITE( NOUT, FMT = 9986 )SNAMET STOP 70 LTEST( I ) = LTESTT GO TO 50 * 80 CONTINUE CLOSE ( NIN ) * * Compute EPS (the machine precision). * EPS = EPSILON(RZERO) WRITE( NOUT, FMT = 9998 )EPS * * Check the reliability of ZMVCH using exact data. * N = MIN( 32, NMAX ) DO 120 J = 1, N DO 110 I = 1, N A( I, J ) = MAX( I - J + 1, 0 ) 110 CONTINUE X( J ) = J Y( J ) = ZERO 120 CONTINUE DO 130 J = 1, N YY( J ) = J*( ( J + 1 )*J )/2 - ( ( J + 1 )*J*( J - 1 ) )/3 130 CONTINUE * YY holds the exact result. On exit from ZMVCH YT holds * the result computed by ZMVCH. TRANS = 'N' CALL ZMVCH( TRANS, N, N, ONE, A, NMAX, X, 1, ZERO, Y, 1, YT, G, $ YY, EPS, ERR, FATAL, NOUT, .TRUE. ) SAME = LZE( YY, YT, N ) IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN WRITE( NOUT, FMT = 9985 )TRANS, SAME, ERR STOP END IF TRANS = 'T' CALL ZMVCH( TRANS, N, N, ONE, A, NMAX, X, -1, ZERO, Y, -1, YT, G, $ YY, EPS, ERR, FATAL, NOUT, .TRUE. ) SAME = LZE( YY, YT, N ) IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN WRITE( NOUT, FMT = 9985 )TRANS, SAME, ERR STOP END IF * * Test each subroutine in turn. * DO 210 ISNUM = 1, NSUBS WRITE( NOUT, FMT = * ) IF( .NOT.LTEST( ISNUM ) )THEN * Subprogram is not to be tested. WRITE( NOUT, FMT = 9983 )SNAMES( ISNUM ) ELSE SRNAMT = SNAMES( ISNUM ) * Test error exits. IF( TSTERR )THEN CALL ZCHKE( ISNUM, SNAMES( ISNUM ), NOUT ) WRITE( NOUT, FMT = * ) END IF * Test computations. INFOT = 0 OK = .TRUE. FATAL = .FALSE. GO TO ( 140, 140, 150, 150, 150, 160, 160, $ 160, 160, 160, 160, 170, 170, 180, $ 180, 190, 190 )ISNUM * Test ZGEMV, 01, and ZGBMV, 02. 140 CALL ZCHK1( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NKB, KB, NALF, ALF, $ NBET, BET, NINC, INC, NMAX, INCMAX, A, AA, AS, $ X, XX, XS, Y, YY, YS, YT, G ) GO TO 200 * Test ZHEMV, 03, ZHBMV, 04, and ZHPMV, 05. 150 CALL ZCHK2( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NKB, KB, NALF, ALF, $ NBET, BET, NINC, INC, NMAX, INCMAX, A, AA, AS, $ X, XX, XS, Y, YY, YS, YT, G ) GO TO 200 * Test ZTRMV, 06, ZTBMV, 07, ZTPMV, 08, * ZTRSV, 09, ZTBSV, 10, and ZTPSV, 11. 160 CALL ZCHK3( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NKB, KB, NINC, INC, $ NMAX, INCMAX, A, AA, AS, Y, YY, YS, YT, G, Z ) GO TO 200 * Test ZGERC, 12, ZGERU, 13. 170 CALL ZCHK4( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NINC, INC, $ NMAX, INCMAX, A, AA, AS, X, XX, XS, Y, YY, YS, $ YT, G, Z ) GO TO 200 * Test ZHER, 14, and ZHPR, 15. 180 CALL ZCHK5( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NINC, INC, $ NMAX, INCMAX, A, AA, AS, X, XX, XS, Y, YY, YS, $ YT, G, Z ) GO TO 200 * Test ZHER2, 16, and ZHPR2, 17. 190 CALL ZCHK6( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NINC, INC, $ NMAX, INCMAX, A, AA, AS, X, XX, XS, Y, YY, YS, $ YT, G, Z ) * 200 IF( FATAL.AND.SFATAL ) $ GO TO 220 END IF 210 CONTINUE WRITE( NOUT, FMT = 9982 ) GO TO 240 * 220 CONTINUE WRITE( NOUT, FMT = 9981 ) GO TO 240 * 230 CONTINUE WRITE( NOUT, FMT = 9987 ) * 240 CONTINUE IF( TRACE ) $ CLOSE ( NTRA ) CLOSE ( NOUT ) STOP * 9999 FORMAT( ' ROUTINES PASS COMPUTATIONAL TESTS IF TEST RATIO IS LES', $ 'S THAN', F8.2 ) 9998 FORMAT( ' RELATIVE MACHINE PRECISION IS TAKEN TO BE', 1P, D9.1 ) 9997 FORMAT( ' NUMBER OF VALUES OF ', A, ' IS LESS THAN 1 OR GREATER ', $ 'THAN ', I2 ) 9996 FORMAT( ' VALUE OF N IS LESS THAN 0 OR GREATER THAN ', I2 ) 9995 FORMAT( ' VALUE OF K IS LESS THAN 0' ) 9994 FORMAT( ' ABSOLUTE VALUE OF INCX OR INCY IS 0 OR GREATER THAN ', $ I2 ) 9993 FORMAT( ' TESTS OF THE COMPLEX*16 LEVEL 2 BLAS', //' THE F', $ 'OLLOWING PARAMETER VALUES WILL BE USED:' ) 9992 FORMAT( ' FOR N ', 9I6 ) 9991 FORMAT( ' FOR K ', 7I6 ) 9990 FORMAT( ' FOR INCX AND INCY ', 7I6 ) 9989 FORMAT( ' FOR ALPHA ', $ 7( '(', F4.1, ',', F4.1, ') ', : ) ) 9988 FORMAT( ' FOR BETA ', $ 7( '(', F4.1, ',', F4.1, ') ', : ) ) 9987 FORMAT( ' AMEND DATA FILE OR INCREASE ARRAY SIZES IN PROGRAM', $ /' ******* TESTS ABANDONED *******' ) 9986 FORMAT( ' SUBPROGRAM NAME ', A6, ' NOT RECOGNIZED', /' ******* T', $ 'ESTS ABANDONED *******' ) 9985 FORMAT( ' ERROR IN ZMVCH - IN-LINE DOT PRODUCTS ARE BEING EVALU', $ 'ATED WRONGLY.', /' ZMVCH WAS CALLED WITH TRANS = ', A1, $ ' AND RETURNED SAME = ', L1, ' AND ERR = ', F12.3, '.', / $ ' THIS MAY BE DUE TO FAULTS IN THE ARITHMETIC OR THE COMPILER.' $ , /' ******* TESTS ABANDONED *******' ) 9984 FORMAT( A6, L2 ) 9983 FORMAT( 1X, A6, ' WAS NOT TESTED' ) 9982 FORMAT( /' END OF TESTS' ) 9981 FORMAT( /' ******* FATAL ERROR - TESTS ABANDONED *******' ) 9980 FORMAT( ' ERROR-EXITS WILL NOT BE TESTED' ) * * End of ZBLAT2. * END SUBROUTINE ZCHK1( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NKB, KB, NALF, ALF, NBET, $ BET, NINC, INC, NMAX, INCMAX, A, AA, AS, X, XX, $ XS, Y, YY, YS, YT, G ) * * Tests ZGEMV and ZGBMV. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. COMPLEX*16 ZERO, HALF PARAMETER ( ZERO = ( 0.0D0, 0.0D0 ), $ HALF = ( 0.5D0, 0.0D0 ) ) DOUBLE PRECISION RZERO PARAMETER ( RZERO = 0.0D0 ) * .. Scalar Arguments .. DOUBLE PRECISION EPS, THRESH INTEGER INCMAX, NALF, NBET, NIDIM, NINC, NKB, NMAX, $ NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. COMPLEX*16 A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), BET( NBET ), X( NMAX ), $ XS( NMAX*INCMAX ), XX( NMAX*INCMAX ), $ Y( NMAX ), YS( NMAX*INCMAX ), YT( NMAX ), $ YY( NMAX*INCMAX ) DOUBLE PRECISION G( NMAX ) INTEGER IDIM( NIDIM ), INC( NINC ), KB( NKB ) * .. Local Scalars .. COMPLEX*16 ALPHA, ALS, BETA, BLS, TRANSL DOUBLE PRECISION ERR, ERRMAX INTEGER I, IA, IB, IC, IKU, IM, IN, INCX, INCXS, INCY, $ INCYS, IX, IY, KL, KLS, KU, KUS, LAA, LDA, $ LDAS, LX, LY, M, ML, MS, N, NARGS, NC, ND, NK, $ NL, NS LOGICAL BANDED, FULL, NULL, RESET, SAME, TRAN CHARACTER*1 TRANS, TRANSS CHARACTER*3 ICH * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LZE, LZERES EXTERNAL LZE, LZERES * .. External Subroutines .. EXTERNAL ZGBMV, ZGEMV, ZMAKE, ZMVCH * .. Intrinsic Functions .. INTRINSIC ABS, MAX, MIN * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICH/'NTC'/ * .. Executable Statements .. FULL = SNAME( 3: 3 ).EQ.'E' BANDED = SNAME( 3: 3 ).EQ.'B' * Define the number of arguments. IF( FULL )THEN NARGS = 11 ELSE IF( BANDED )THEN NARGS = 13 END IF * NC = 0 RESET = .TRUE. ERRMAX = RZERO * DO 120 IN = 1, NIDIM N = IDIM( IN ) ND = N/2 + 1 * DO 110 IM = 1, 2 IF( IM.EQ.1 ) $ M = MAX( N - ND, 0 ) IF( IM.EQ.2 ) $ M = MIN( N + ND, NMAX ) * IF( BANDED )THEN NK = NKB ELSE NK = 1 END IF DO 100 IKU = 1, NK IF( BANDED )THEN KU = KB( IKU ) KL = MAX( KU - 1, 0 ) ELSE KU = N - 1 KL = M - 1 END IF * Set LDA to 1 more than minimum value if room. IF( BANDED )THEN LDA = KL + KU + 1 ELSE LDA = M END IF IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 100 LAA = LDA*N NULL = N.LE.0.OR.M.LE.0 * * Generate the matrix A. * TRANSL = ZERO CALL ZMAKE( SNAME( 2: 3 ), ' ', ' ', M, N, A, NMAX, AA, $ LDA, KL, KU, RESET, TRANSL ) * DO 90 IC = 1, 3 TRANS = ICH( IC: IC ) TRAN = TRANS.EQ.'T'.OR.TRANS.EQ.'C' * IF( TRAN )THEN ML = N NL = M ELSE ML = M NL = N END IF * DO 80 IX = 1, NINC INCX = INC( IX ) LX = ABS( INCX )*NL * * Generate the vector X. * TRANSL = HALF CALL ZMAKE( 'GE', ' ', ' ', 1, NL, X, 1, XX, $ ABS( INCX ), 0, NL - 1, RESET, TRANSL ) IF( NL.GT.1 )THEN X( NL/2 ) = ZERO XX( 1 + ABS( INCX )*( NL/2 - 1 ) ) = ZERO END IF * DO 70 IY = 1, NINC INCY = INC( IY ) LY = ABS( INCY )*ML * DO 60 IA = 1, NALF ALPHA = ALF( IA ) * DO 50 IB = 1, NBET BETA = BET( IB ) * * Generate the vector Y. * TRANSL = ZERO CALL ZMAKE( 'GE', ' ', ' ', 1, ML, Y, 1, $ YY, ABS( INCY ), 0, ML - 1, $ RESET, TRANSL ) * NC = NC + 1 * * Save every datum before calling the * subroutine. * TRANSS = TRANS MS = M NS = N KLS = KL KUS = KU ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LX XS( I ) = XX( I ) 20 CONTINUE INCXS = INCX BLS = BETA DO 30 I = 1, LY YS( I ) = YY( I ) 30 CONTINUE INCYS = INCY * * Call the subroutine. * IF( FULL )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, $ TRANS, M, N, ALPHA, LDA, INCX, BETA, $ INCY IF( REWI ) $ REWIND NTRA CALL ZGEMV( TRANS, M, N, ALPHA, AA, $ LDA, XX, INCX, BETA, YY, $ INCY ) ELSE IF( BANDED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ TRANS, M, N, KL, KU, ALPHA, LDA, $ INCX, BETA, INCY IF( REWI ) $ REWIND NTRA CALL ZGBMV( TRANS, M, N, KL, KU, ALPHA, $ AA, LDA, XX, INCX, BETA, $ YY, INCY ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9993 ) FATAL = .TRUE. GO TO 130 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = TRANS.EQ.TRANSS ISAME( 2 ) = MS.EQ.M ISAME( 3 ) = NS.EQ.N IF( FULL )THEN ISAME( 4 ) = ALS.EQ.ALPHA ISAME( 5 ) = LZE( AS, AA, LAA ) ISAME( 6 ) = LDAS.EQ.LDA ISAME( 7 ) = LZE( XS, XX, LX ) ISAME( 8 ) = INCXS.EQ.INCX ISAME( 9 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 10 ) = LZE( YS, YY, LY ) ELSE ISAME( 10 ) = LZERES( 'GE', ' ', 1, $ ML, YS, YY, $ ABS( INCY ) ) END IF ISAME( 11 ) = INCYS.EQ.INCY ELSE IF( BANDED )THEN ISAME( 4 ) = KLS.EQ.KL ISAME( 5 ) = KUS.EQ.KU ISAME( 6 ) = ALS.EQ.ALPHA ISAME( 7 ) = LZE( AS, AA, LAA ) ISAME( 8 ) = LDAS.EQ.LDA ISAME( 9 ) = LZE( XS, XX, LX ) ISAME( 10 ) = INCXS.EQ.INCX ISAME( 11 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 12 ) = LZE( YS, YY, LY ) ELSE ISAME( 12 ) = LZERES( 'GE', ' ', 1, $ ML, YS, YY, $ ABS( INCY ) ) END IF ISAME( 13 ) = INCYS.EQ.INCY END IF * * If data was incorrectly changed, report * and return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 130 END IF * IF( .NOT.NULL )THEN * * Check the result. * CALL ZMVCH( TRANS, M, N, ALPHA, A, $ NMAX, X, INCX, BETA, Y, $ INCY, YT, G, YY, EPS, ERR, $ FATAL, NOUT, .TRUE. ) ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 130 ELSE * Avoid repeating tests with M.le.0 or * N.le.0. GO TO 110 END IF * 50 CONTINUE * 60 CONTINUE * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * 120 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 140 * 130 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME IF( FULL )THEN WRITE( NOUT, FMT = 9994 )NC, SNAME, TRANS, M, N, ALPHA, LDA, $ INCX, BETA, INCY ELSE IF( BANDED )THEN WRITE( NOUT, FMT = 9995 )NC, SNAME, TRANS, M, N, KL, KU, $ ALPHA, LDA, INCX, BETA, INCY END IF * 140 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', 4( I3, ',' ), '(', $ F4.1, ',', F4.1, '), A,', I3, ', X,', I2, ',(', F4.1, ',', $ F4.1, '), Y,', I2, ') .' ) 9994 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', 2( I3, ',' ), '(', $ F4.1, ',', F4.1, '), A,', I3, ', X,', I2, ',(', F4.1, ',', $ F4.1, '), Y,', I2, ') .' ) 9993 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of ZCHK1. * END SUBROUTINE ZCHK2( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NKB, KB, NALF, ALF, NBET, $ BET, NINC, INC, NMAX, INCMAX, A, AA, AS, X, XX, $ XS, Y, YY, YS, YT, G ) * * Tests ZHEMV, ZHBMV and ZHPMV. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. COMPLEX*16 ZERO, HALF PARAMETER ( ZERO = ( 0.0D0, 0.0D0 ), $ HALF = ( 0.5D0, 0.0D0 ) ) DOUBLE PRECISION RZERO PARAMETER ( RZERO = 0.0D0 ) * .. Scalar Arguments .. DOUBLE PRECISION EPS, THRESH INTEGER INCMAX, NALF, NBET, NIDIM, NINC, NKB, NMAX, $ NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. COMPLEX*16 A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), BET( NBET ), X( NMAX ), $ XS( NMAX*INCMAX ), XX( NMAX*INCMAX ), $ Y( NMAX ), YS( NMAX*INCMAX ), YT( NMAX ), $ YY( NMAX*INCMAX ) DOUBLE PRECISION G( NMAX ) INTEGER IDIM( NIDIM ), INC( NINC ), KB( NKB ) * .. Local Scalars .. COMPLEX*16 ALPHA, ALS, BETA, BLS, TRANSL DOUBLE PRECISION ERR, ERRMAX INTEGER I, IA, IB, IC, IK, IN, INCX, INCXS, INCY, $ INCYS, IX, IY, K, KS, LAA, LDA, LDAS, LX, LY, $ N, NARGS, NC, NK, NS LOGICAL BANDED, FULL, NULL, PACKED, RESET, SAME CHARACTER*1 UPLO, UPLOS CHARACTER*2 ICH * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LZE, LZERES EXTERNAL LZE, LZERES * .. External Subroutines .. EXTERNAL ZHBMV, ZHEMV, ZHPMV, ZMAKE, ZMVCH * .. Intrinsic Functions .. INTRINSIC ABS, MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICH/'UL'/ * .. Executable Statements .. FULL = SNAME( 3: 3 ).EQ.'E' BANDED = SNAME( 3: 3 ).EQ.'B' PACKED = SNAME( 3: 3 ).EQ.'P' * Define the number of arguments. IF( FULL )THEN NARGS = 10 ELSE IF( BANDED )THEN NARGS = 11 ELSE IF( PACKED )THEN NARGS = 9 END IF * NC = 0 RESET = .TRUE. ERRMAX = RZERO * DO 110 IN = 1, NIDIM N = IDIM( IN ) * IF( BANDED )THEN NK = NKB ELSE NK = 1 END IF DO 100 IK = 1, NK IF( BANDED )THEN K = KB( IK ) ELSE K = N - 1 END IF * Set LDA to 1 more than minimum value if room. IF( BANDED )THEN LDA = K + 1 ELSE LDA = N END IF IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 100 IF( PACKED )THEN LAA = ( N*( N + 1 ) )/2 ELSE LAA = LDA*N END IF NULL = N.LE.0 * DO 90 IC = 1, 2 UPLO = ICH( IC: IC ) * * Generate the matrix A. * TRANSL = ZERO CALL ZMAKE( SNAME( 2: 3 ), UPLO, ' ', N, N, A, NMAX, AA, $ LDA, K, K, RESET, TRANSL ) * DO 80 IX = 1, NINC INCX = INC( IX ) LX = ABS( INCX )*N * * Generate the vector X. * TRANSL = HALF CALL ZMAKE( 'GE', ' ', ' ', 1, N, X, 1, XX, $ ABS( INCX ), 0, N - 1, RESET, TRANSL ) IF( N.GT.1 )THEN X( N/2 ) = ZERO XX( 1 + ABS( INCX )*( N/2 - 1 ) ) = ZERO END IF * DO 70 IY = 1, NINC INCY = INC( IY ) LY = ABS( INCY )*N * DO 60 IA = 1, NALF ALPHA = ALF( IA ) * DO 50 IB = 1, NBET BETA = BET( IB ) * * Generate the vector Y. * TRANSL = ZERO CALL ZMAKE( 'GE', ' ', ' ', 1, N, Y, 1, YY, $ ABS( INCY ), 0, N - 1, RESET, $ TRANSL ) * NC = NC + 1 * * Save every datum before calling the * subroutine. * UPLOS = UPLO NS = N KS = K ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LX XS( I ) = XX( I ) 20 CONTINUE INCXS = INCX BLS = BETA DO 30 I = 1, LY YS( I ) = YY( I ) 30 CONTINUE INCYS = INCY * * Call the subroutine. * IF( FULL )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9993 )NC, SNAME, $ UPLO, N, ALPHA, LDA, INCX, BETA, INCY IF( REWI ) $ REWIND NTRA CALL ZHEMV( UPLO, N, ALPHA, AA, LDA, XX, $ INCX, BETA, YY, INCY ) ELSE IF( BANDED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, $ UPLO, N, K, ALPHA, LDA, INCX, BETA, $ INCY IF( REWI ) $ REWIND NTRA CALL ZHBMV( UPLO, N, K, ALPHA, AA, LDA, $ XX, INCX, BETA, YY, INCY ) ELSE IF( PACKED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ UPLO, N, ALPHA, INCX, BETA, INCY IF( REWI ) $ REWIND NTRA CALL ZHPMV( UPLO, N, ALPHA, AA, XX, INCX, $ BETA, YY, INCY ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9992 ) FATAL = .TRUE. GO TO 120 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = UPLO.EQ.UPLOS ISAME( 2 ) = NS.EQ.N IF( FULL )THEN ISAME( 3 ) = ALS.EQ.ALPHA ISAME( 4 ) = LZE( AS, AA, LAA ) ISAME( 5 ) = LDAS.EQ.LDA ISAME( 6 ) = LZE( XS, XX, LX ) ISAME( 7 ) = INCXS.EQ.INCX ISAME( 8 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 9 ) = LZE( YS, YY, LY ) ELSE ISAME( 9 ) = LZERES( 'GE', ' ', 1, N, $ YS, YY, ABS( INCY ) ) END IF ISAME( 10 ) = INCYS.EQ.INCY ELSE IF( BANDED )THEN ISAME( 3 ) = KS.EQ.K ISAME( 4 ) = ALS.EQ.ALPHA ISAME( 5 ) = LZE( AS, AA, LAA ) ISAME( 6 ) = LDAS.EQ.LDA ISAME( 7 ) = LZE( XS, XX, LX ) ISAME( 8 ) = INCXS.EQ.INCX ISAME( 9 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 10 ) = LZE( YS, YY, LY ) ELSE ISAME( 10 ) = LZERES( 'GE', ' ', 1, N, $ YS, YY, ABS( INCY ) ) END IF ISAME( 11 ) = INCYS.EQ.INCY ELSE IF( PACKED )THEN ISAME( 3 ) = ALS.EQ.ALPHA ISAME( 4 ) = LZE( AS, AA, LAA ) ISAME( 5 ) = LZE( XS, XX, LX ) ISAME( 6 ) = INCXS.EQ.INCX ISAME( 7 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 8 ) = LZE( YS, YY, LY ) ELSE ISAME( 8 ) = LZERES( 'GE', ' ', 1, N, $ YS, YY, ABS( INCY ) ) END IF ISAME( 9 ) = INCYS.EQ.INCY END IF * * If data was incorrectly changed, report and * return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 120 END IF * IF( .NOT.NULL )THEN * * Check the result. * CALL ZMVCH( 'N', N, N, ALPHA, A, NMAX, X, $ INCX, BETA, Y, INCY, YT, G, $ YY, EPS, ERR, FATAL, NOUT, $ .TRUE. ) ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 120 ELSE * Avoid repeating tests with N.le.0 GO TO 110 END IF * 50 CONTINUE * 60 CONTINUE * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 130 * 120 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME IF( FULL )THEN WRITE( NOUT, FMT = 9993 )NC, SNAME, UPLO, N, ALPHA, LDA, INCX, $ BETA, INCY ELSE IF( BANDED )THEN WRITE( NOUT, FMT = 9994 )NC, SNAME, UPLO, N, K, ALPHA, LDA, $ INCX, BETA, INCY ELSE IF( PACKED )THEN WRITE( NOUT, FMT = 9995 )NC, SNAME, UPLO, N, ALPHA, INCX, $ BETA, INCY END IF * 130 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', I3, ',(', F4.1, ',', $ F4.1, '), AP, X,', I2, ',(', F4.1, ',', F4.1, '), Y,', I2, $ ') .' ) 9994 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', 2( I3, ',' ), '(', $ F4.1, ',', F4.1, '), A,', I3, ', X,', I2, ',(', F4.1, ',', $ F4.1, '), Y,', I2, ') .' ) 9993 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', I3, ',(', F4.1, ',', $ F4.1, '), A,', I3, ', X,', I2, ',(', F4.1, ',', F4.1, '), ', $ 'Y,', I2, ') .' ) 9992 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of ZCHK2. * END SUBROUTINE ZCHK3( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NKB, KB, NINC, INC, NMAX, $ INCMAX, A, AA, AS, X, XX, XS, XT, G, Z ) * * Tests ZTRMV, ZTBMV, ZTPMV, ZTRSV, ZTBSV and ZTPSV. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. COMPLEX*16 ZERO, HALF, ONE PARAMETER ( ZERO = ( 0.0D0, 0.0D0 ), $ HALF = ( 0.5D0, 0.0D0 ), $ ONE = ( 1.0D0, 0.0D0 ) ) DOUBLE PRECISION RZERO PARAMETER ( RZERO = 0.0D0 ) * .. Scalar Arguments .. DOUBLE PRECISION EPS, THRESH INTEGER INCMAX, NIDIM, NINC, NKB, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. COMPLEX*16 A( NMAX, NMAX ), AA( NMAX*NMAX ), $ AS( NMAX*NMAX ), X( NMAX ), XS( NMAX*INCMAX ), $ XT( NMAX ), XX( NMAX*INCMAX ), Z( NMAX ) DOUBLE PRECISION G( NMAX ) INTEGER IDIM( NIDIM ), INC( NINC ), KB( NKB ) * .. Local Scalars .. COMPLEX*16 TRANSL DOUBLE PRECISION ERR, ERRMAX INTEGER I, ICD, ICT, ICU, IK, IN, INCX, INCXS, IX, K, $ KS, LAA, LDA, LDAS, LX, N, NARGS, NC, NK, NS LOGICAL BANDED, FULL, NULL, PACKED, RESET, SAME CHARACTER*1 DIAG, DIAGS, TRANS, TRANSS, UPLO, UPLOS CHARACTER*2 ICHD, ICHU CHARACTER*3 ICHT * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LZE, LZERES EXTERNAL LZE, LZERES * .. External Subroutines .. EXTERNAL ZMAKE, ZMVCH, ZTBMV, ZTBSV, ZTPMV, ZTPSV, $ ZTRMV, ZTRSV * .. Intrinsic Functions .. INTRINSIC ABS, MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICHU/'UL'/, ICHT/'NTC'/, ICHD/'UN'/ * .. Executable Statements .. FULL = SNAME( 3: 3 ).EQ.'R' BANDED = SNAME( 3: 3 ).EQ.'B' PACKED = SNAME( 3: 3 ).EQ.'P' * Define the number of arguments. IF( FULL )THEN NARGS = 8 ELSE IF( BANDED )THEN NARGS = 9 ELSE IF( PACKED )THEN NARGS = 7 END IF * NC = 0 RESET = .TRUE. ERRMAX = RZERO * Set up zero vector for ZMVCH. DO 10 I = 1, NMAX Z( I ) = ZERO 10 CONTINUE * DO 110 IN = 1, NIDIM N = IDIM( IN ) * IF( BANDED )THEN NK = NKB ELSE NK = 1 END IF DO 100 IK = 1, NK IF( BANDED )THEN K = KB( IK ) ELSE K = N - 1 END IF * Set LDA to 1 more than minimum value if room. IF( BANDED )THEN LDA = K + 1 ELSE LDA = N END IF IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 100 IF( PACKED )THEN LAA = ( N*( N + 1 ) )/2 ELSE LAA = LDA*N END IF NULL = N.LE.0 * DO 90 ICU = 1, 2 UPLO = ICHU( ICU: ICU ) * DO 80 ICT = 1, 3 TRANS = ICHT( ICT: ICT ) * DO 70 ICD = 1, 2 DIAG = ICHD( ICD: ICD ) * * Generate the matrix A. * TRANSL = ZERO CALL ZMAKE( SNAME( 2: 3 ), UPLO, DIAG, N, N, A, $ NMAX, AA, LDA, K, K, RESET, TRANSL ) * DO 60 IX = 1, NINC INCX = INC( IX ) LX = ABS( INCX )*N * * Generate the vector X. * TRANSL = HALF CALL ZMAKE( 'GE', ' ', ' ', 1, N, X, 1, XX, $ ABS( INCX ), 0, N - 1, RESET, $ TRANSL ) IF( N.GT.1 )THEN X( N/2 ) = ZERO XX( 1 + ABS( INCX )*( N/2 - 1 ) ) = ZERO END IF * NC = NC + 1 * * Save every datum before calling the subroutine. * UPLOS = UPLO TRANSS = TRANS DIAGS = DIAG NS = N KS = K DO 20 I = 1, LAA AS( I ) = AA( I ) 20 CONTINUE LDAS = LDA DO 30 I = 1, LX XS( I ) = XX( I ) 30 CONTINUE INCXS = INCX * * Call the subroutine. * IF( SNAME( 4: 5 ).EQ.'MV' )THEN IF( FULL )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9993 )NC, SNAME, $ UPLO, TRANS, DIAG, N, LDA, INCX IF( REWI ) $ REWIND NTRA CALL ZTRMV( UPLO, TRANS, DIAG, N, AA, LDA, $ XX, INCX ) ELSE IF( BANDED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, $ UPLO, TRANS, DIAG, N, K, LDA, INCX IF( REWI ) $ REWIND NTRA CALL ZTBMV( UPLO, TRANS, DIAG, N, K, AA, $ LDA, XX, INCX ) ELSE IF( PACKED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ UPLO, TRANS, DIAG, N, INCX IF( REWI ) $ REWIND NTRA CALL ZTPMV( UPLO, TRANS, DIAG, N, AA, XX, $ INCX ) END IF ELSE IF( SNAME( 4: 5 ).EQ.'SV' )THEN IF( FULL )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9993 )NC, SNAME, $ UPLO, TRANS, DIAG, N, LDA, INCX IF( REWI ) $ REWIND NTRA CALL ZTRSV( UPLO, TRANS, DIAG, N, AA, LDA, $ XX, INCX ) ELSE IF( BANDED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, $ UPLO, TRANS, DIAG, N, K, LDA, INCX IF( REWI ) $ REWIND NTRA CALL ZTBSV( UPLO, TRANS, DIAG, N, K, AA, $ LDA, XX, INCX ) ELSE IF( PACKED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ UPLO, TRANS, DIAG, N, INCX IF( REWI ) $ REWIND NTRA CALL ZTPSV( UPLO, TRANS, DIAG, N, AA, XX, $ INCX ) END IF END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9992 ) FATAL = .TRUE. GO TO 120 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = UPLO.EQ.UPLOS ISAME( 2 ) = TRANS.EQ.TRANSS ISAME( 3 ) = DIAG.EQ.DIAGS ISAME( 4 ) = NS.EQ.N IF( FULL )THEN ISAME( 5 ) = LZE( AS, AA, LAA ) ISAME( 6 ) = LDAS.EQ.LDA IF( NULL )THEN ISAME( 7 ) = LZE( XS, XX, LX ) ELSE ISAME( 7 ) = LZERES( 'GE', ' ', 1, N, XS, $ XX, ABS( INCX ) ) END IF ISAME( 8 ) = INCXS.EQ.INCX ELSE IF( BANDED )THEN ISAME( 5 ) = KS.EQ.K ISAME( 6 ) = LZE( AS, AA, LAA ) ISAME( 7 ) = LDAS.EQ.LDA IF( NULL )THEN ISAME( 8 ) = LZE( XS, XX, LX ) ELSE ISAME( 8 ) = LZERES( 'GE', ' ', 1, N, XS, $ XX, ABS( INCX ) ) END IF ISAME( 9 ) = INCXS.EQ.INCX ELSE IF( PACKED )THEN ISAME( 5 ) = LZE( AS, AA, LAA ) IF( NULL )THEN ISAME( 6 ) = LZE( XS, XX, LX ) ELSE ISAME( 6 ) = LZERES( 'GE', ' ', 1, N, XS, $ XX, ABS( INCX ) ) END IF ISAME( 7 ) = INCXS.EQ.INCX END IF * * If data was incorrectly changed, report and * return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 120 END IF * IF( .NOT.NULL )THEN IF( SNAME( 4: 5 ).EQ.'MV' )THEN * * Check the result. * CALL ZMVCH( TRANS, N, N, ONE, A, NMAX, X, $ INCX, ZERO, Z, INCX, XT, G, $ XX, EPS, ERR, FATAL, NOUT, $ .TRUE. ) ELSE IF( SNAME( 4: 5 ).EQ.'SV' )THEN * * Compute approximation to original vector. * DO 50 I = 1, N Z( I ) = XX( 1 + ( I - 1 )* $ ABS( INCX ) ) XX( 1 + ( I - 1 )*ABS( INCX ) ) $ = X( I ) 50 CONTINUE CALL ZMVCH( TRANS, N, N, ONE, A, NMAX, Z, $ INCX, ZERO, X, INCX, XT, G, $ XX, EPS, ERR, FATAL, NOUT, $ .FALSE. ) END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and return. IF( FATAL ) $ GO TO 120 ELSE * Avoid repeating tests with N.le.0. GO TO 110 END IF * 60 CONTINUE * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 130 * 120 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME IF( FULL )THEN WRITE( NOUT, FMT = 9993 )NC, SNAME, UPLO, TRANS, DIAG, N, LDA, $ INCX ELSE IF( BANDED )THEN WRITE( NOUT, FMT = 9994 )NC, SNAME, UPLO, TRANS, DIAG, N, K, $ LDA, INCX ELSE IF( PACKED )THEN WRITE( NOUT, FMT = 9995 )NC, SNAME, UPLO, TRANS, DIAG, N, INCX END IF * 130 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ', A6, '(', 3( '''', A1, ''',' ), I3, ', AP, ', $ 'X,', I2, ') .' ) 9994 FORMAT( 1X, I6, ': ', A6, '(', 3( '''', A1, ''',' ), 2( I3, ',' ), $ ' A,', I3, ', X,', I2, ') .' ) 9993 FORMAT( 1X, I6, ': ', A6, '(', 3( '''', A1, ''',' ), I3, ', A,', $ I3, ', X,', I2, ') .' ) 9992 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of ZCHK3. * END SUBROUTINE ZCHK4( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NINC, INC, NMAX, $ INCMAX, A, AA, AS, X, XX, XS, Y, YY, YS, YT, G, $ Z ) * * Tests ZGERC and ZGERU. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. COMPLEX*16 ZERO, HALF, ONE PARAMETER ( ZERO = ( 0.0D0, 0.0D0 ), $ HALF = ( 0.5D0, 0.0D0 ), $ ONE = ( 1.0D0, 0.0D0 ) ) DOUBLE PRECISION RZERO PARAMETER ( RZERO = 0.0D0 ) * .. Scalar Arguments .. DOUBLE PRECISION EPS, THRESH INTEGER INCMAX, NALF, NIDIM, NINC, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. COMPLEX*16 A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), X( NMAX ), XS( NMAX*INCMAX ), $ XX( NMAX*INCMAX ), Y( NMAX ), $ YS( NMAX*INCMAX ), YT( NMAX ), $ YY( NMAX*INCMAX ), Z( NMAX ) DOUBLE PRECISION G( NMAX ) INTEGER IDIM( NIDIM ), INC( NINC ) * .. Local Scalars .. COMPLEX*16 ALPHA, ALS, TRANSL DOUBLE PRECISION ERR, ERRMAX INTEGER I, IA, IM, IN, INCX, INCXS, INCY, INCYS, IX, $ IY, J, LAA, LDA, LDAS, LX, LY, M, MS, N, NARGS, $ NC, ND, NS LOGICAL CONJ, NULL, RESET, SAME * .. Local Arrays .. COMPLEX*16 W( 1 ) LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LZE, LZERES EXTERNAL LZE, LZERES * .. External Subroutines .. EXTERNAL ZGERC, ZGERU, ZMAKE, ZMVCH * .. Intrinsic Functions .. INTRINSIC ABS, DCONJG, MAX, MIN * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Executable Statements .. CONJ = SNAME( 5: 5 ).EQ.'C' * Define the number of arguments. NARGS = 9 * NC = 0 RESET = .TRUE. ERRMAX = RZERO * DO 120 IN = 1, NIDIM N = IDIM( IN ) ND = N/2 + 1 * DO 110 IM = 1, 2 IF( IM.EQ.1 ) $ M = MAX( N - ND, 0 ) IF( IM.EQ.2 ) $ M = MIN( N + ND, NMAX ) * * Set LDA to 1 more than minimum value if room. LDA = M IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 110 LAA = LDA*N NULL = N.LE.0.OR.M.LE.0 * DO 100 IX = 1, NINC INCX = INC( IX ) LX = ABS( INCX )*M * * Generate the vector X. * TRANSL = HALF CALL ZMAKE( 'GE', ' ', ' ', 1, M, X, 1, XX, ABS( INCX ), $ 0, M - 1, RESET, TRANSL ) IF( M.GT.1 )THEN X( M/2 ) = ZERO XX( 1 + ABS( INCX )*( M/2 - 1 ) ) = ZERO END IF * DO 90 IY = 1, NINC INCY = INC( IY ) LY = ABS( INCY )*N * * Generate the vector Y. * TRANSL = ZERO CALL ZMAKE( 'GE', ' ', ' ', 1, N, Y, 1, YY, $ ABS( INCY ), 0, N - 1, RESET, TRANSL ) IF( N.GT.1 )THEN Y( N/2 ) = ZERO YY( 1 + ABS( INCY )*( N/2 - 1 ) ) = ZERO END IF * DO 80 IA = 1, NALF ALPHA = ALF( IA ) * * Generate the matrix A. * TRANSL = ZERO CALL ZMAKE( SNAME( 2: 3 ), ' ', ' ', M, N, A, NMAX, $ AA, LDA, M - 1, N - 1, RESET, TRANSL ) * NC = NC + 1 * * Save every datum before calling the subroutine. * MS = M NS = N ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LX XS( I ) = XX( I ) 20 CONTINUE INCXS = INCX DO 30 I = 1, LY YS( I ) = YY( I ) 30 CONTINUE INCYS = INCY * * Call the subroutine. * IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, M, N, $ ALPHA, INCX, INCY, LDA IF( CONJ )THEN IF( REWI ) $ REWIND NTRA CALL ZGERC( M, N, ALPHA, XX, INCX, YY, INCY, AA, $ LDA ) ELSE IF( REWI ) $ REWIND NTRA CALL ZGERU( M, N, ALPHA, XX, INCX, YY, INCY, AA, $ LDA ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9993 ) FATAL = .TRUE. GO TO 140 END IF * * See what data changed inside subroutine. * ISAME( 1 ) = MS.EQ.M ISAME( 2 ) = NS.EQ.N ISAME( 3 ) = ALS.EQ.ALPHA ISAME( 4 ) = LZE( XS, XX, LX ) ISAME( 5 ) = INCXS.EQ.INCX ISAME( 6 ) = LZE( YS, YY, LY ) ISAME( 7 ) = INCYS.EQ.INCY IF( NULL )THEN ISAME( 8 ) = LZE( AS, AA, LAA ) ELSE ISAME( 8 ) = LZERES( 'GE', ' ', M, N, AS, AA, $ LDA ) END IF ISAME( 9 ) = LDAS.EQ.LDA * * If data was incorrectly changed, report and return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 140 END IF * IF( .NOT.NULL )THEN * * Check the result column by column. * IF( INCX.GT.0 )THEN DO 50 I = 1, M Z( I ) = X( I ) 50 CONTINUE ELSE DO 60 I = 1, M Z( I ) = X( M - I + 1 ) 60 CONTINUE END IF DO 70 J = 1, N IF( INCY.GT.0 )THEN W( 1 ) = Y( J ) ELSE W( 1 ) = Y( N - J + 1 ) END IF IF( CONJ ) $ W( 1 ) = DCONJG( W( 1 ) ) CALL ZMVCH( 'N', M, 1, ALPHA, Z, NMAX, W, 1, $ ONE, A( 1, J ), 1, YT, G, $ AA( 1 + ( J - 1 )*LDA ), EPS, $ ERR, FATAL, NOUT, .TRUE. ) ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and return. IF( FATAL ) $ GO TO 130 70 CONTINUE ELSE * Avoid repeating tests with M.le.0 or N.le.0. GO TO 110 END IF * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * 120 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 150 * 130 CONTINUE WRITE( NOUT, FMT = 9995 )J * 140 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME WRITE( NOUT, FMT = 9994 )NC, SNAME, M, N, ALPHA, INCX, INCY, LDA * 150 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) 9994 FORMAT( 1X, I6, ': ', A6, '(', 2( I3, ',' ), '(', F4.1, ',', F4.1, $ '), X,', I2, ', Y,', I2, ', A,', I3, ') ', $ ' .' ) 9993 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of ZCHK4. * END SUBROUTINE ZCHK5( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NINC, INC, NMAX, $ INCMAX, A, AA, AS, X, XX, XS, Y, YY, YS, YT, G, $ Z ) * * Tests ZHER and ZHPR. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. COMPLEX*16 ZERO, HALF, ONE PARAMETER ( ZERO = ( 0.0D0, 0.0D0 ), $ HALF = ( 0.5D0, 0.0D0 ), $ ONE = ( 1.0D0, 0.0D0 ) ) DOUBLE PRECISION RZERO PARAMETER ( RZERO = 0.0D0 ) * .. Scalar Arguments .. DOUBLE PRECISION EPS, THRESH INTEGER INCMAX, NALF, NIDIM, NINC, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. COMPLEX*16 A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), X( NMAX ), XS( NMAX*INCMAX ), $ XX( NMAX*INCMAX ), Y( NMAX ), $ YS( NMAX*INCMAX ), YT( NMAX ), $ YY( NMAX*INCMAX ), Z( NMAX ) DOUBLE PRECISION G( NMAX ) INTEGER IDIM( NIDIM ), INC( NINC ) * .. Local Scalars .. COMPLEX*16 ALPHA, TRANSL DOUBLE PRECISION ERR, ERRMAX, RALPHA, RALS INTEGER I, IA, IC, IN, INCX, INCXS, IX, J, JA, JJ, LAA, $ LDA, LDAS, LJ, LX, N, NARGS, NC, NS LOGICAL FULL, NULL, PACKED, RESET, SAME, UPPER CHARACTER*1 UPLO, UPLOS CHARACTER*2 ICH * .. Local Arrays .. COMPLEX*16 W( 1 ) LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LZE, LZERES EXTERNAL LZE, LZERES * .. External Subroutines .. EXTERNAL ZHER, ZHPR, ZMAKE, ZMVCH * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DCMPLX, DCONJG, MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICH/'UL'/ * .. Executable Statements .. FULL = SNAME( 3: 3 ).EQ.'E' PACKED = SNAME( 3: 3 ).EQ.'P' * Define the number of arguments. IF( FULL )THEN NARGS = 7 ELSE IF( PACKED )THEN NARGS = 6 END IF * NC = 0 RESET = .TRUE. ERRMAX = RZERO * DO 100 IN = 1, NIDIM N = IDIM( IN ) * Set LDA to 1 more than minimum value if room. LDA = N IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 100 IF( PACKED )THEN LAA = ( N*( N + 1 ) )/2 ELSE LAA = LDA*N END IF * DO 90 IC = 1, 2 UPLO = ICH( IC: IC ) UPPER = UPLO.EQ.'U' * DO 80 IX = 1, NINC INCX = INC( IX ) LX = ABS( INCX )*N * * Generate the vector X. * TRANSL = HALF CALL ZMAKE( 'GE', ' ', ' ', 1, N, X, 1, XX, ABS( INCX ), $ 0, N - 1, RESET, TRANSL ) IF( N.GT.1 )THEN X( N/2 ) = ZERO XX( 1 + ABS( INCX )*( N/2 - 1 ) ) = ZERO END IF * DO 70 IA = 1, NALF RALPHA = DBLE( ALF( IA ) ) ALPHA = DCMPLX( RALPHA, RZERO ) NULL = N.LE.0.OR.RALPHA.EQ.RZERO * * Generate the matrix A. * TRANSL = ZERO CALL ZMAKE( SNAME( 2: 3 ), UPLO, ' ', N, N, A, NMAX, $ AA, LDA, N - 1, N - 1, RESET, TRANSL ) * NC = NC + 1 * * Save every datum before calling the subroutine. * UPLOS = UPLO NS = N RALS = RALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LX XS( I ) = XX( I ) 20 CONTINUE INCXS = INCX * * Call the subroutine. * IF( FULL )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9993 )NC, SNAME, UPLO, N, $ RALPHA, INCX, LDA IF( REWI ) $ REWIND NTRA CALL ZHER( UPLO, N, RALPHA, XX, INCX, AA, LDA ) ELSE IF( PACKED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, UPLO, N, $ RALPHA, INCX IF( REWI ) $ REWIND NTRA CALL ZHPR( UPLO, N, RALPHA, XX, INCX, AA ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9992 ) FATAL = .TRUE. GO TO 120 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = UPLO.EQ.UPLOS ISAME( 2 ) = NS.EQ.N ISAME( 3 ) = RALS.EQ.RALPHA ISAME( 4 ) = LZE( XS, XX, LX ) ISAME( 5 ) = INCXS.EQ.INCX IF( NULL )THEN ISAME( 6 ) = LZE( AS, AA, LAA ) ELSE ISAME( 6 ) = LZERES( SNAME( 2: 3 ), UPLO, N, N, AS, $ AA, LDA ) END IF IF( .NOT.PACKED )THEN ISAME( 7 ) = LDAS.EQ.LDA END IF * * If data was incorrectly changed, report and return. * SAME = .TRUE. DO 30 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 30 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 120 END IF * IF( .NOT.NULL )THEN * * Check the result column by column. * IF( INCX.GT.0 )THEN DO 40 I = 1, N Z( I ) = X( I ) 40 CONTINUE ELSE DO 50 I = 1, N Z( I ) = X( N - I + 1 ) 50 CONTINUE END IF JA = 1 DO 60 J = 1, N W( 1 ) = DCONJG( Z( J ) ) IF( UPPER )THEN JJ = 1 LJ = J ELSE JJ = J LJ = N - J + 1 END IF CALL ZMVCH( 'N', LJ, 1, ALPHA, Z( JJ ), LJ, W, $ 1, ONE, A( JJ, J ), 1, YT, G, $ AA( JA ), EPS, ERR, FATAL, NOUT, $ .TRUE. ) IF( FULL )THEN IF( UPPER )THEN JA = JA + LDA ELSE JA = JA + LDA + 1 END IF ELSE JA = JA + LJ END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and return. IF( FATAL ) $ GO TO 110 60 CONTINUE ELSE * Avoid repeating tests if N.le.0. IF( N.LE.0 ) $ GO TO 100 END IF * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 130 * 110 CONTINUE WRITE( NOUT, FMT = 9995 )J * 120 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME IF( FULL )THEN WRITE( NOUT, FMT = 9993 )NC, SNAME, UPLO, N, RALPHA, INCX, LDA ELSE IF( PACKED )THEN WRITE( NOUT, FMT = 9994 )NC, SNAME, UPLO, N, RALPHA, INCX END IF * 130 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) 9994 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', I3, ',', F4.1, ', X,', $ I2, ', AP) .' ) 9993 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', I3, ',', F4.1, ', X,', $ I2, ', A,', I3, ') .' ) 9992 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of ZCHK5. * END SUBROUTINE ZCHK6( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NINC, INC, NMAX, $ INCMAX, A, AA, AS, X, XX, XS, Y, YY, YS, YT, G, $ Z ) * * Tests ZHER2 and ZHPR2. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. COMPLEX*16 ZERO, HALF, ONE PARAMETER ( ZERO = ( 0.0D0, 0.0D0 ), $ HALF = ( 0.5D0, 0.0D0 ), $ ONE = ( 1.0D0, 0.0D0 ) ) DOUBLE PRECISION RZERO PARAMETER ( RZERO = 0.0D0 ) * .. Scalar Arguments .. DOUBLE PRECISION EPS, THRESH INTEGER INCMAX, NALF, NIDIM, NINC, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. COMPLEX*16 A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), X( NMAX ), XS( NMAX*INCMAX ), $ XX( NMAX*INCMAX ), Y( NMAX ), $ YS( NMAX*INCMAX ), YT( NMAX ), $ YY( NMAX*INCMAX ), Z( NMAX, 2 ) DOUBLE PRECISION G( NMAX ) INTEGER IDIM( NIDIM ), INC( NINC ) * .. Local Scalars .. COMPLEX*16 ALPHA, ALS, TRANSL DOUBLE PRECISION ERR, ERRMAX INTEGER I, IA, IC, IN, INCX, INCXS, INCY, INCYS, IX, $ IY, J, JA, JJ, LAA, LDA, LDAS, LJ, LX, LY, N, $ NARGS, NC, NS LOGICAL FULL, NULL, PACKED, RESET, SAME, UPPER CHARACTER*1 UPLO, UPLOS CHARACTER*2 ICH * .. Local Arrays .. COMPLEX*16 W( 2 ) LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LZE, LZERES EXTERNAL LZE, LZERES * .. External Subroutines .. EXTERNAL ZHER2, ZHPR2, ZMAKE, ZMVCH * .. Intrinsic Functions .. INTRINSIC ABS, DCONJG, MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICH/'UL'/ * .. Executable Statements .. FULL = SNAME( 3: 3 ).EQ.'E' PACKED = SNAME( 3: 3 ).EQ.'P' * Define the number of arguments. IF( FULL )THEN NARGS = 9 ELSE IF( PACKED )THEN NARGS = 8 END IF * NC = 0 RESET = .TRUE. ERRMAX = RZERO * DO 140 IN = 1, NIDIM N = IDIM( IN ) * Set LDA to 1 more than minimum value if room. LDA = N IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 140 IF( PACKED )THEN LAA = ( N*( N + 1 ) )/2 ELSE LAA = LDA*N END IF * DO 130 IC = 1, 2 UPLO = ICH( IC: IC ) UPPER = UPLO.EQ.'U' * DO 120 IX = 1, NINC INCX = INC( IX ) LX = ABS( INCX )*N * * Generate the vector X. * TRANSL = HALF CALL ZMAKE( 'GE', ' ', ' ', 1, N, X, 1, XX, ABS( INCX ), $ 0, N - 1, RESET, TRANSL ) IF( N.GT.1 )THEN X( N/2 ) = ZERO XX( 1 + ABS( INCX )*( N/2 - 1 ) ) = ZERO END IF * DO 110 IY = 1, NINC INCY = INC( IY ) LY = ABS( INCY )*N * * Generate the vector Y. * TRANSL = ZERO CALL ZMAKE( 'GE', ' ', ' ', 1, N, Y, 1, YY, $ ABS( INCY ), 0, N - 1, RESET, TRANSL ) IF( N.GT.1 )THEN Y( N/2 ) = ZERO YY( 1 + ABS( INCY )*( N/2 - 1 ) ) = ZERO END IF * DO 100 IA = 1, NALF ALPHA = ALF( IA ) NULL = N.LE.0.OR.ALPHA.EQ.ZERO * * Generate the matrix A. * TRANSL = ZERO CALL ZMAKE( SNAME( 2: 3 ), UPLO, ' ', N, N, A, $ NMAX, AA, LDA, N - 1, N - 1, RESET, $ TRANSL ) * NC = NC + 1 * * Save every datum before calling the subroutine. * UPLOS = UPLO NS = N ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LX XS( I ) = XX( I ) 20 CONTINUE INCXS = INCX DO 30 I = 1, LY YS( I ) = YY( I ) 30 CONTINUE INCYS = INCY * * Call the subroutine. * IF( FULL )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9993 )NC, SNAME, UPLO, N, $ ALPHA, INCX, INCY, LDA IF( REWI ) $ REWIND NTRA CALL ZHER2( UPLO, N, ALPHA, XX, INCX, YY, INCY, $ AA, LDA ) ELSE IF( PACKED )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, UPLO, N, $ ALPHA, INCX, INCY IF( REWI ) $ REWIND NTRA CALL ZHPR2( UPLO, N, ALPHA, XX, INCX, YY, INCY, $ AA ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9992 ) FATAL = .TRUE. GO TO 160 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = UPLO.EQ.UPLOS ISAME( 2 ) = NS.EQ.N ISAME( 3 ) = ALS.EQ.ALPHA ISAME( 4 ) = LZE( XS, XX, LX ) ISAME( 5 ) = INCXS.EQ.INCX ISAME( 6 ) = LZE( YS, YY, LY ) ISAME( 7 ) = INCYS.EQ.INCY IF( NULL )THEN ISAME( 8 ) = LZE( AS, AA, LAA ) ELSE ISAME( 8 ) = LZERES( SNAME( 2: 3 ), UPLO, N, N, $ AS, AA, LDA ) END IF IF( .NOT.PACKED )THEN ISAME( 9 ) = LDAS.EQ.LDA END IF * * If data was incorrectly changed, report and return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 160 END IF * IF( .NOT.NULL )THEN * * Check the result column by column. * IF( INCX.GT.0 )THEN DO 50 I = 1, N Z( I, 1 ) = X( I ) 50 CONTINUE ELSE DO 60 I = 1, N Z( I, 1 ) = X( N - I + 1 ) 60 CONTINUE END IF IF( INCY.GT.0 )THEN DO 70 I = 1, N Z( I, 2 ) = Y( I ) 70 CONTINUE ELSE DO 80 I = 1, N Z( I, 2 ) = Y( N - I + 1 ) 80 CONTINUE END IF JA = 1 DO 90 J = 1, N W( 1 ) = ALPHA*DCONJG( Z( J, 2 ) ) W( 2 ) = DCONJG( ALPHA )*DCONJG( Z( J, 1 ) ) IF( UPPER )THEN JJ = 1 LJ = J ELSE JJ = J LJ = N - J + 1 END IF CALL ZMVCH( 'N', LJ, 2, ONE, Z( JJ, 1 ), $ NMAX, W, 1, ONE, A( JJ, J ), 1, $ YT, G, AA( JA ), EPS, ERR, FATAL, $ NOUT, .TRUE. ) IF( FULL )THEN IF( UPPER )THEN JA = JA + LDA ELSE JA = JA + LDA + 1 END IF ELSE JA = JA + LJ END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and return. IF( FATAL ) $ GO TO 150 90 CONTINUE ELSE * Avoid repeating tests with N.le.0. IF( N.LE.0 ) $ GO TO 140 END IF * 100 CONTINUE * 110 CONTINUE * 120 CONTINUE * 130 CONTINUE * 140 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 170 * 150 CONTINUE WRITE( NOUT, FMT = 9995 )J * 160 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME IF( FULL )THEN WRITE( NOUT, FMT = 9993 )NC, SNAME, UPLO, N, ALPHA, INCX, $ INCY, LDA ELSE IF( PACKED )THEN WRITE( NOUT, FMT = 9994 )NC, SNAME, UPLO, N, ALPHA, INCX, INCY END IF * 170 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) 9994 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', I3, ',(', F4.1, ',', $ F4.1, '), X,', I2, ', Y,', I2, ', AP) ', $ ' .' ) 9993 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',', I3, ',(', F4.1, ',', $ F4.1, '), X,', I2, ', Y,', I2, ', A,', I3, ') ', $ ' .' ) 9992 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of ZCHK6. * END SUBROUTINE ZCHKE( ISNUM, SRNAMT, NOUT ) * * Tests the error exits from the Level 2 Blas. * Requires a special version of the error-handling routine XERBLA. * ALPHA, RALPHA, BETA, A, X and Y should not need to be defined. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Scalar Arguments .. INTEGER ISNUM, NOUT CHARACTER*6 SRNAMT * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Local Scalars .. COMPLEX*16 ALPHA, BETA DOUBLE PRECISION RALPHA * .. Local Arrays .. COMPLEX*16 A( 1, 1 ), X( 1 ), Y( 1 ) * .. External Subroutines .. EXTERNAL CHKXER, ZGBMV, ZGEMV, ZGERC, ZGERU, ZHBMV, $ ZHEMV, ZHER, ZHER2, ZHPMV, ZHPR, ZHPR2, ZTBMV, $ ZTBSV, ZTPMV, ZTPSV, ZTRMV, ZTRSV * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Executable Statements .. * OK is set to .FALSE. by the special version of XERBLA or by CHKXER * if anything is wrong. OK = .TRUE. * LERR is set to .TRUE. by the special version of XERBLA each time * it is called, and is then tested and re-set by CHKXER. LERR = .FALSE. GO TO ( 10, 20, 30, 40, 50, 60, 70, 80, $ 90, 100, 110, 120, 130, 140, 150, 160, $ 170 )ISNUM 10 INFOT = 1 CALL ZGEMV( '/', 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZGEMV( 'N', -1, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZGEMV( 'N', 0, -1, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZGEMV( 'N', 2, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL ZGEMV( 'N', 0, 0, ALPHA, A, 1, X, 0, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL ZGEMV( 'N', 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 20 INFOT = 1 CALL ZGBMV( '/', 0, 0, 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZGBMV( 'N', -1, 0, 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZGBMV( 'N', 0, -1, 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZGBMV( 'N', 0, 0, -1, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZGBMV( 'N', 2, 0, 0, -1, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL ZGBMV( 'N', 0, 0, 1, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL ZGBMV( 'N', 0, 0, 0, 0, ALPHA, A, 1, X, 0, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL ZGBMV( 'N', 0, 0, 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 30 INFOT = 1 CALL ZHEMV( '/', 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZHEMV( 'U', -1, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZHEMV( 'U', 2, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZHEMV( 'U', 0, ALPHA, A, 1, X, 0, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL ZHEMV( 'U', 0, ALPHA, A, 1, X, 1, BETA, Y, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 40 INFOT = 1 CALL ZHBMV( '/', 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZHBMV( 'U', -1, 0, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZHBMV( 'U', 0, -1, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZHBMV( 'U', 0, 1, ALPHA, A, 1, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL ZHBMV( 'U', 0, 0, ALPHA, A, 1, X, 0, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL ZHBMV( 'U', 0, 0, ALPHA, A, 1, X, 1, BETA, Y, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 50 INFOT = 1 CALL ZHPMV( '/', 0, ALPHA, A, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZHPMV( 'U', -1, ALPHA, A, X, 1, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZHPMV( 'U', 0, ALPHA, A, X, 0, BETA, Y, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZHPMV( 'U', 0, ALPHA, A, X, 1, BETA, Y, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 60 INFOT = 1 CALL ZTRMV( '/', 'N', 'N', 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZTRMV( 'U', '/', 'N', 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZTRMV( 'U', 'N', '/', 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZTRMV( 'U', 'N', 'N', -1, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZTRMV( 'U', 'N', 'N', 2, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL ZTRMV( 'U', 'N', 'N', 0, A, 1, X, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 70 INFOT = 1 CALL ZTBMV( '/', 'N', 'N', 0, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZTBMV( 'U', '/', 'N', 0, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZTBMV( 'U', 'N', '/', 0, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZTBMV( 'U', 'N', 'N', -1, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZTBMV( 'U', 'N', 'N', 0, -1, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZTBMV( 'U', 'N', 'N', 0, 1, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZTBMV( 'U', 'N', 'N', 0, 0, A, 1, X, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 80 INFOT = 1 CALL ZTPMV( '/', 'N', 'N', 0, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZTPMV( 'U', '/', 'N', 0, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZTPMV( 'U', 'N', '/', 0, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZTPMV( 'U', 'N', 'N', -1, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZTPMV( 'U', 'N', 'N', 0, A, X, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 90 INFOT = 1 CALL ZTRSV( '/', 'N', 'N', 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZTRSV( 'U', '/', 'N', 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZTRSV( 'U', 'N', '/', 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZTRSV( 'U', 'N', 'N', -1, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZTRSV( 'U', 'N', 'N', 2, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL ZTRSV( 'U', 'N', 'N', 0, A, 1, X, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 100 INFOT = 1 CALL ZTBSV( '/', 'N', 'N', 0, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZTBSV( 'U', '/', 'N', 0, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZTBSV( 'U', 'N', '/', 0, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZTBSV( 'U', 'N', 'N', -1, 0, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZTBSV( 'U', 'N', 'N', 0, -1, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZTBSV( 'U', 'N', 'N', 0, 1, A, 1, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZTBSV( 'U', 'N', 'N', 0, 0, A, 1, X, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 110 INFOT = 1 CALL ZTPSV( '/', 'N', 'N', 0, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZTPSV( 'U', '/', 'N', 0, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZTPSV( 'U', 'N', '/', 0, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZTPSV( 'U', 'N', 'N', -1, A, X, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZTPSV( 'U', 'N', 'N', 0, A, X, 0 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 120 INFOT = 1 CALL ZGERC( -1, 0, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZGERC( 0, -1, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZGERC( 0, 0, ALPHA, X, 0, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZGERC( 0, 0, ALPHA, X, 1, Y, 0, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZGERC( 2, 0, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 130 INFOT = 1 CALL ZGERU( -1, 0, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZGERU( 0, -1, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZGERU( 0, 0, ALPHA, X, 0, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZGERU( 0, 0, ALPHA, X, 1, Y, 0, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZGERU( 2, 0, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 140 INFOT = 1 CALL ZHER( '/', 0, RALPHA, X, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZHER( 'U', -1, RALPHA, X, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZHER( 'U', 0, RALPHA, X, 0, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZHER( 'U', 2, RALPHA, X, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 150 INFOT = 1 CALL ZHPR( '/', 0, RALPHA, X, 1, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZHPR( 'U', -1, RALPHA, X, 1, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZHPR( 'U', 0, RALPHA, X, 0, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 160 INFOT = 1 CALL ZHER2( '/', 0, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZHER2( 'U', -1, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZHER2( 'U', 0, ALPHA, X, 0, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZHER2( 'U', 0, ALPHA, X, 1, Y, 0, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZHER2( 'U', 2, ALPHA, X, 1, Y, 1, A, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 180 170 INFOT = 1 CALL ZHPR2( '/', 0, ALPHA, X, 1, Y, 1, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZHPR2( 'U', -1, ALPHA, X, 1, Y, 1, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZHPR2( 'U', 0, ALPHA, X, 0, Y, 1, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZHPR2( 'U', 0, ALPHA, X, 1, Y, 0, A ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) * 180 IF( OK )THEN WRITE( NOUT, FMT = 9999 )SRNAMT ELSE WRITE( NOUT, FMT = 9998 )SRNAMT END IF RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE TESTS OF ERROR-EXITS' ) 9998 FORMAT( ' ******* ', A6, ' FAILED THE TESTS OF ERROR-EXITS *****', $ '**' ) * * End of ZCHKE. * END SUBROUTINE ZMAKE( TYPE, UPLO, DIAG, M, N, A, NMAX, AA, LDA, KL, $ KU, RESET, TRANSL ) * * Generates values for an M by N matrix A within the bandwidth * defined by KL and KU. * Stores the values in the array AA in the data structure required * by the routine, with unwanted elements set to rogue value. * * TYPE is 'GE', 'GB', 'HE', 'HB', 'HP', 'TR', 'TB' OR 'TP'. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. COMPLEX*16 ZERO, ONE PARAMETER ( ZERO = ( 0.0D0, 0.0D0 ), $ ONE = ( 1.0D0, 0.0D0 ) ) COMPLEX*16 ROGUE PARAMETER ( ROGUE = ( -1.0D10, 1.0D10 ) ) DOUBLE PRECISION RZERO PARAMETER ( RZERO = 0.0D0 ) DOUBLE PRECISION RROGUE PARAMETER ( RROGUE = -1.0D10 ) * .. Scalar Arguments .. COMPLEX*16 TRANSL INTEGER KL, KU, LDA, M, N, NMAX LOGICAL RESET CHARACTER*1 DIAG, UPLO CHARACTER*2 TYPE * .. Array Arguments .. COMPLEX*16 A( NMAX, * ), AA( * ) * .. Local Scalars .. INTEGER I, I1, I2, I3, IBEG, IEND, IOFF, J, JJ, KK LOGICAL GEN, LOWER, SYM, TRI, UNIT, UPPER * .. External Functions .. COMPLEX*16 ZBEG EXTERNAL ZBEG * .. Intrinsic Functions .. INTRINSIC DBLE, DCMPLX, DCONJG, MAX, MIN * .. Executable Statements .. GEN = TYPE( 1: 1 ).EQ.'G' SYM = TYPE( 1: 1 ).EQ.'H' TRI = TYPE( 1: 1 ).EQ.'T' UPPER = ( SYM.OR.TRI ).AND.UPLO.EQ.'U' LOWER = ( SYM.OR.TRI ).AND.UPLO.EQ.'L' UNIT = TRI.AND.DIAG.EQ.'U' * * Generate data in array A. * DO 20 J = 1, N DO 10 I = 1, M IF( GEN.OR.( UPPER.AND.I.LE.J ).OR.( LOWER.AND.I.GE.J ) ) $ THEN IF( ( I.LE.J.AND.J - I.LE.KU ).OR. $ ( I.GE.J.AND.I - J.LE.KL ) )THEN A( I, J ) = ZBEG( RESET ) + TRANSL ELSE A( I, J ) = ZERO END IF IF( I.NE.J )THEN IF( SYM )THEN A( J, I ) = DCONJG( A( I, J ) ) ELSE IF( TRI )THEN A( J, I ) = ZERO END IF END IF END IF 10 CONTINUE IF( SYM ) $ A( J, J ) = DCMPLX( DBLE( A( J, J ) ), RZERO ) IF( TRI ) $ A( J, J ) = A( J, J ) + ONE IF( UNIT ) $ A( J, J ) = ONE 20 CONTINUE * * Store elements in array AS in data structure required by routine. * IF( TYPE.EQ.'GE' )THEN DO 50 J = 1, N DO 30 I = 1, M AA( I + ( J - 1 )*LDA ) = A( I, J ) 30 CONTINUE DO 40 I = M + 1, LDA AA( I + ( J - 1 )*LDA ) = ROGUE 40 CONTINUE 50 CONTINUE ELSE IF( TYPE.EQ.'GB' )THEN DO 90 J = 1, N DO 60 I1 = 1, KU + 1 - J AA( I1 + ( J - 1 )*LDA ) = ROGUE 60 CONTINUE DO 70 I2 = I1, MIN( KL + KU + 1, KU + 1 + M - J ) AA( I2 + ( J - 1 )*LDA ) = A( I2 + J - KU - 1, J ) 70 CONTINUE DO 80 I3 = I2, LDA AA( I3 + ( J - 1 )*LDA ) = ROGUE 80 CONTINUE 90 CONTINUE ELSE IF( TYPE.EQ.'HE'.OR.TYPE.EQ.'TR' )THEN DO 130 J = 1, N IF( UPPER )THEN IBEG = 1 IF( UNIT )THEN IEND = J - 1 ELSE IEND = J END IF ELSE IF( UNIT )THEN IBEG = J + 1 ELSE IBEG = J END IF IEND = N END IF DO 100 I = 1, IBEG - 1 AA( I + ( J - 1 )*LDA ) = ROGUE 100 CONTINUE DO 110 I = IBEG, IEND AA( I + ( J - 1 )*LDA ) = A( I, J ) 110 CONTINUE DO 120 I = IEND + 1, LDA AA( I + ( J - 1 )*LDA ) = ROGUE 120 CONTINUE IF( SYM )THEN JJ = J + ( J - 1 )*LDA AA( JJ ) = DCMPLX( DBLE( AA( JJ ) ), RROGUE ) END IF 130 CONTINUE ELSE IF( TYPE.EQ.'HB'.OR.TYPE.EQ.'TB' )THEN DO 170 J = 1, N IF( UPPER )THEN KK = KL + 1 IBEG = MAX( 1, KL + 2 - J ) IF( UNIT )THEN IEND = KL ELSE IEND = KL + 1 END IF ELSE KK = 1 IF( UNIT )THEN IBEG = 2 ELSE IBEG = 1 END IF IEND = MIN( KL + 1, 1 + M - J ) END IF DO 140 I = 1, IBEG - 1 AA( I + ( J - 1 )*LDA ) = ROGUE 140 CONTINUE DO 150 I = IBEG, IEND AA( I + ( J - 1 )*LDA ) = A( I + J - KK, J ) 150 CONTINUE DO 160 I = IEND + 1, LDA AA( I + ( J - 1 )*LDA ) = ROGUE 160 CONTINUE IF( SYM )THEN JJ = KK + ( J - 1 )*LDA AA( JJ ) = DCMPLX( DBLE( AA( JJ ) ), RROGUE ) END IF 170 CONTINUE ELSE IF( TYPE.EQ.'HP'.OR.TYPE.EQ.'TP' )THEN IOFF = 0 DO 190 J = 1, N IF( UPPER )THEN IBEG = 1 IEND = J ELSE IBEG = J IEND = N END IF DO 180 I = IBEG, IEND IOFF = IOFF + 1 AA( IOFF ) = A( I, J ) IF( I.EQ.J )THEN IF( UNIT ) $ AA( IOFF ) = ROGUE IF( SYM ) $ AA( IOFF ) = DCMPLX( DBLE( AA( IOFF ) ), RROGUE ) END IF 180 CONTINUE 190 CONTINUE END IF RETURN * * End of ZMAKE. * END SUBROUTINE ZMVCH( TRANS, M, N, ALPHA, A, NMAX, X, INCX, BETA, Y, $ INCY, YT, G, YY, EPS, ERR, FATAL, NOUT, MV ) * * Checks the results of the computational tests. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Parameters .. COMPLEX*16 ZERO PARAMETER ( ZERO = ( 0.0D0, 0.0D0 ) ) DOUBLE PRECISION RZERO, RONE PARAMETER ( RZERO = 0.0D0, RONE = 1.0D0 ) * .. Scalar Arguments .. COMPLEX*16 ALPHA, BETA DOUBLE PRECISION EPS, ERR INTEGER INCX, INCY, M, N, NMAX, NOUT LOGICAL FATAL, MV CHARACTER*1 TRANS * .. Array Arguments .. COMPLEX*16 A( NMAX, * ), X( * ), Y( * ), YT( * ), YY( * ) DOUBLE PRECISION G( * ) * .. Local Scalars .. COMPLEX*16 C DOUBLE PRECISION ERRI INTEGER I, INCXL, INCYL, IY, J, JX, KX, KY, ML, NL LOGICAL CTRAN, TRAN * .. Intrinsic Functions .. INTRINSIC ABS, DBLE, DCONJG, DIMAG, MAX, SQRT * .. Statement Functions .. DOUBLE PRECISION ABS1 * .. Statement Function definitions .. ABS1( C ) = ABS( DBLE( C ) ) + ABS( DIMAG( C ) ) * .. Executable Statements .. TRAN = TRANS.EQ.'T' CTRAN = TRANS.EQ.'C' IF( TRAN.OR.CTRAN )THEN ML = N NL = M ELSE ML = M NL = N END IF IF( INCX.LT.0 )THEN KX = NL INCXL = -1 ELSE KX = 1 INCXL = 1 END IF IF( INCY.LT.0 )THEN KY = ML INCYL = -1 ELSE KY = 1 INCYL = 1 END IF * * Compute expected result in YT using data in A, X and Y. * Compute gauges in G. * IY = KY DO 40 I = 1, ML YT( IY ) = ZERO G( IY ) = RZERO JX = KX IF( TRAN )THEN DO 10 J = 1, NL YT( IY ) = YT( IY ) + A( J, I )*X( JX ) G( IY ) = G( IY ) + ABS1( A( J, I ) )*ABS1( X( JX ) ) JX = JX + INCXL 10 CONTINUE ELSE IF( CTRAN )THEN DO 20 J = 1, NL YT( IY ) = YT( IY ) + DCONJG( A( J, I ) )*X( JX ) G( IY ) = G( IY ) + ABS1( A( J, I ) )*ABS1( X( JX ) ) JX = JX + INCXL 20 CONTINUE ELSE DO 30 J = 1, NL YT( IY ) = YT( IY ) + A( I, J )*X( JX ) G( IY ) = G( IY ) + ABS1( A( I, J ) )*ABS1( X( JX ) ) JX = JX + INCXL 30 CONTINUE END IF YT( IY ) = ALPHA*YT( IY ) + BETA*Y( IY ) G( IY ) = ABS1( ALPHA )*G( IY ) + ABS1( BETA )*ABS1( Y( IY ) ) IY = IY + INCYL 40 CONTINUE * * Compute the error ratio for this result. * ERR = ZERO DO 50 I = 1, ML ERRI = ABS( YT( I ) - YY( 1 + ( I - 1 )*ABS( INCY ) ) )/EPS IF( G( I ).NE.RZERO ) $ ERRI = ERRI/G( I ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.RONE ) $ GO TO 60 50 CONTINUE * If the loop completes, all results are at least half accurate. GO TO 80 * * Report fatal error. * 60 FATAL = .TRUE. WRITE( NOUT, FMT = 9999 ) DO 70 I = 1, ML IF( MV )THEN WRITE( NOUT, FMT = 9998 )I, YT( I ), $ YY( 1 + ( I - 1 )*ABS( INCY ) ) ELSE WRITE( NOUT, FMT = 9998 )I, $ YY( 1 + ( I - 1 )*ABS( INCY ) ), YT( I ) END IF 70 CONTINUE * 80 CONTINUE RETURN * 9999 FORMAT( ' ******* FATAL ERROR - COMPUTED RESULT IS LESS THAN HAL', $ 'F ACCURATE *******', /' EXPECTED RE', $ 'SULT COMPUTED RESULT' ) 9998 FORMAT( 1X, I7, 2( ' (', G15.6, ',', G15.6, ')' ) ) * * End of ZMVCH. * END LOGICAL FUNCTION LZE( RI, RJ, LR ) * * Tests if two arrays are identical. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Scalar Arguments .. INTEGER LR * .. Array Arguments .. COMPLEX*16 RI( * ), RJ( * ) * .. Local Scalars .. INTEGER I * .. Executable Statements .. DO 10 I = 1, LR IF( RI( I ).NE.RJ( I ) ) $ GO TO 20 10 CONTINUE LZE = .TRUE. GO TO 30 20 CONTINUE LZE = .FALSE. 30 RETURN * * End of LZE. * END LOGICAL FUNCTION LZERES( TYPE, UPLO, M, N, AA, AS, LDA ) * * Tests if selected elements in two arrays are equal. * * TYPE is 'GE', 'HE' or 'HP'. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Scalar Arguments .. INTEGER LDA, M, N CHARACTER*1 UPLO CHARACTER*2 TYPE * .. Array Arguments .. COMPLEX*16 AA( LDA, * ), AS( LDA, * ) * .. Local Scalars .. INTEGER I, IBEG, IEND, J LOGICAL UPPER * .. Executable Statements .. UPPER = UPLO.EQ.'U' IF( TYPE.EQ.'GE' )THEN DO 20 J = 1, N DO 10 I = M + 1, LDA IF( AA( I, J ).NE.AS( I, J ) ) $ GO TO 70 10 CONTINUE 20 CONTINUE ELSE IF( TYPE.EQ.'HE' )THEN DO 50 J = 1, N IF( UPPER )THEN IBEG = 1 IEND = J ELSE IBEG = J IEND = N END IF DO 30 I = 1, IBEG - 1 IF( AA( I, J ).NE.AS( I, J ) ) $ GO TO 70 30 CONTINUE DO 40 I = IEND + 1, LDA IF( AA( I, J ).NE.AS( I, J ) ) $ GO TO 70 40 CONTINUE 50 CONTINUE END IF * LZERES = .TRUE. GO TO 80 70 CONTINUE LZERES = .FALSE. 80 RETURN * * End of LZERES. * END COMPLEX*16 FUNCTION ZBEG( RESET ) * * Generates complex numbers as pairs of random numbers uniformly * distributed between -0.5 and 0.5. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Scalar Arguments .. LOGICAL RESET * .. Local Scalars .. INTEGER I, IC, J, MI, MJ * .. Save statement .. SAVE I, IC, J, MI, MJ * .. Intrinsic Functions .. INTRINSIC DCMPLX * .. Executable Statements .. IF( RESET )THEN * Initialize local variables. MI = 891 MJ = 457 I = 7 J = 7 IC = 0 RESET = .FALSE. END IF * * The sequence of values of I or J is bounded between 1 and 999. * If initial I or J = 1,2,3,6,7 or 9, the period will be 50. * If initial I or J = 4 or 8, the period will be 25. * If initial I or J = 5, the period will be 10. * IC is used to break up the period by skipping 1 value of I or J * in 6. * IC = IC + 1 10 I = I*MI J = J*MJ I = I - 1000*( I/1000 ) J = J - 1000*( J/1000 ) IF( IC.GE.5 )THEN IC = 0 GO TO 10 END IF ZBEG = DCMPLX( ( I - 500 )/1001.0D0, ( J - 500 )/1001.0D0 ) RETURN * * End of ZBEG. * END DOUBLE PRECISION FUNCTION DDIFF( X, Y ) * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * * .. Scalar Arguments .. DOUBLE PRECISION X, Y * .. Executable Statements .. DDIFF = X - Y RETURN * * End of DDIFF. * END SUBROUTINE CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) * * Tests whether XERBLA has detected an error when it should. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Scalar Arguments .. INTEGER INFOT, NOUT LOGICAL LERR, OK CHARACTER*6 SRNAMT * .. Executable Statements .. IF( .NOT.LERR )THEN WRITE( NOUT, FMT = 9999 )INFOT, SRNAMT OK = .FALSE. END IF LERR = .FALSE. RETURN * 9999 FORMAT( ' ***** ILLEGAL VALUE OF PARAMETER NUMBER ', I2, ' NOT D', $ 'ETECTED BY ', A6, ' *****' ) * * End of CHKXER. * END SUBROUTINE XERBLA( SRNAME, INFO ) * * This is a special version of XERBLA to be used only as part of * the test program for testing error exits from the Level 2 BLAS * routines. * * XERBLA is an error handler for the Level 2 BLAS routines. * * It is called by the Level 2 BLAS routines if an input parameter is * invalid. * * Auxiliary routine for test program for Level 2 Blas. * * -- Written on 10-August-1987. * Richard Hanson, Sandia National Labs. * Jeremy Du Croz, NAG Central Office. * * .. Scalar Arguments .. INTEGER INFO CHARACTER*6 SRNAME * .. Scalars in Common .. INTEGER INFOT, NOUT LOGICAL LERR, OK CHARACTER*6 SRNAMT * .. Common blocks .. COMMON /INFOC/INFOT, NOUT, OK, LERR COMMON /SRNAMC/SRNAMT * .. Executable Statements .. LERR = .TRUE. IF( INFO.NE.INFOT )THEN IF( INFOT.NE.0 )THEN WRITE( NOUT, FMT = 9999 )INFO, INFOT ELSE WRITE( NOUT, FMT = 9997 )INFO END IF OK = .FALSE. END IF IF( SRNAME.NE.SRNAMT )THEN WRITE( NOUT, FMT = 9998 )SRNAME, SRNAMT OK = .FALSE. END IF RETURN * 9999 FORMAT( ' ******* XERBLA WAS CALLED WITH INFO = ', I6, ' INSTEAD', $ ' OF ', I2, ' *******' ) 9998 FORMAT( ' ******* XERBLA WAS CALLED WITH SRNAME = ', A6, ' INSTE', $ 'AD OF ', A6, ' *******' ) 9997 FORMAT( ' ******* XERBLA WAS CALLED WITH INFO = ', I6, $ ' *******' ) * * End of XERBLA * END blis-0.6.1/blastest/src/fortran/zblat3.f000066400000000000000000004016331360743507500201210ustar00rootroot00000000000000*> \brief \b ZBLAT3 * * =========== DOCUMENTATION =========== * * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * * Definition: * =========== * * PROGRAM ZBLAT3 * * *> \par Purpose: * ============= *> *> \verbatim *> *> Test program for the COMPLEX*16 Level 3 Blas. *> *> The program must be driven by a short data file. The first 14 records *> of the file are read using list-directed input, the last 9 records *> are read using the format ( A6, L2 ). An annotated example of a data *> file can be obtained by deleting the first 3 characters from the *> following 23 lines: *> 'zblat3.out' NAME OF SUMMARY OUTPUT FILE *> 6 UNIT NUMBER OF SUMMARY FILE *> 'ZBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE *> -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) *> F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. *> F LOGICAL FLAG, T TO STOP ON FAILURES. *> T LOGICAL FLAG, T TO TEST ERROR EXITS. *> 16.0 THRESHOLD VALUE OF TEST RATIO *> 6 NUMBER OF VALUES OF N *> 0 1 2 3 5 9 VALUES OF N *> 3 NUMBER OF VALUES OF ALPHA *> (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA *> 3 NUMBER OF VALUES OF BETA *> (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA *> ZGEMM T PUT F FOR NO TEST. SAME COLUMNS. *> ZHEMM T PUT F FOR NO TEST. SAME COLUMNS. *> ZSYMM T PUT F FOR NO TEST. SAME COLUMNS. *> ZTRMM T PUT F FOR NO TEST. SAME COLUMNS. *> ZTRSM T PUT F FOR NO TEST. SAME COLUMNS. *> ZHERK T PUT F FOR NO TEST. SAME COLUMNS. *> ZSYRK T PUT F FOR NO TEST. SAME COLUMNS. *> ZHER2K T PUT F FOR NO TEST. SAME COLUMNS. *> ZSYR2K T PUT F FOR NO TEST. SAME COLUMNS. *> *> *> Further Details *> =============== *> *> See: *> *> Dongarra J. J., Du Croz J. J., Duff I. S. and Hammarling S. *> A Set of Level 3 Basic Linear Algebra Subprograms. *> *> Technical Memorandum No.88 (Revision 1), Mathematics and *> Computer Science Division, Argonne National Laboratory, 9700 *> South Cass Avenue, Argonne, Illinois 60439, US. *> *> -- Written on 8-February-1989. *> Jack Dongarra, Argonne National Laboratory. *> Iain Duff, AERE Harwell. *> Jeremy Du Croz, Numerical Algorithms Group Ltd. *> Sven Hammarling, Numerical Algorithms Group Ltd. *> *> 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers *> can be run multiple times without deleting generated *> output files (susan) *> \endverbatim * * Authors: * ======== * *> \author Univ. of Tennessee *> \author Univ. of California Berkeley *> \author Univ. of Colorado Denver *> \author NAG Ltd. * *> \date April 2012 * *> \ingroup complex16_blas_testing * * ===================================================================== PROGRAM ZBLAT3 * * -- Reference BLAS test routine (version 3.4.1) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * April 2012 * * ===================================================================== * * .. Parameters .. INTEGER NIN PARAMETER ( NIN = 5 ) INTEGER NSUBS PARAMETER ( NSUBS = 9 ) COMPLEX*16 ZERO, ONE PARAMETER ( ZERO = ( 0.0D0, 0.0D0 ), $ ONE = ( 1.0D0, 0.0D0 ) ) DOUBLE PRECISION RZERO PARAMETER ( RZERO = 0.0D0 ) INTEGER NMAX PARAMETER ( NMAX = 65 ) INTEGER NIDMAX, NALMAX, NBEMAX PARAMETER ( NIDMAX = 9, NALMAX = 7, NBEMAX = 7 ) * .. Local Scalars .. DOUBLE PRECISION EPS, ERR, THRESH INTEGER I, ISNUM, J, N, NALF, NBET, NIDIM, NOUT, NTRA LOGICAL FATAL, LTESTT, REWI, SAME, SFATAL, TRACE, $ TSTERR CHARACTER*1 TRANSA, TRANSB CHARACTER*6 SNAMET CHARACTER*32 SNAPS, SUMMRY * .. Local Arrays .. COMPLEX*16 AA( NMAX*NMAX ), AB( NMAX, 2*NMAX ), $ ALF( NALMAX ), AS( NMAX*NMAX ), $ BB( NMAX*NMAX ), BET( NBEMAX ), $ BS( NMAX*NMAX ), C( NMAX, NMAX ), $ CC( NMAX*NMAX ), CS( NMAX*NMAX ), CT( NMAX ), $ W( 2*NMAX ) DOUBLE PRECISION G( NMAX ) INTEGER IDIM( NIDMAX ) LOGICAL LTEST( NSUBS ) CHARACTER*6 SNAMES( NSUBS ) * .. External Functions .. DOUBLE PRECISION DDIFF LOGICAL LZE EXTERNAL DDIFF, LZE * .. External Subroutines .. EXTERNAL ZCHK1, ZCHK2, ZCHK3, ZCHK4, ZCHK5, ZCHKE, ZMMCH * .. Intrinsic Functions .. INTRINSIC MAX, MIN * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK CHARACTER*6 SRNAMT * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR COMMON /SRNAMC/SRNAMT * .. Data statements .. DATA SNAMES/'ZGEMM ', 'ZHEMM ', 'ZSYMM ', 'ZTRMM ', $ 'ZTRSM ', 'ZHERK ', 'ZSYRK ', 'ZHER2K', $ 'ZSYR2K'/ * .. Executable Statements .. * * Read name and unit number for summary output file and open file. * READ( NIN, FMT = * )SUMMRY READ( NIN, FMT = * )NOUT OPEN( NOUT, FILE = SUMMRY, STATUS = 'UNKNOWN' ) NOUTC = NOUT * * Read name and unit number for snapshot output file and open file. * READ( NIN, FMT = * )SNAPS READ( NIN, FMT = * )NTRA TRACE = NTRA.GE.0 IF( TRACE )THEN OPEN( NTRA, FILE = SNAPS, STATUS = 'UNKNOWN' ) END IF * Read the flag that directs rewinding of the snapshot file. READ( NIN, FMT = * )REWI REWI = REWI.AND.TRACE * Read the flag that directs stopping on any failure. READ( NIN, FMT = * )SFATAL * Read the flag that indicates whether error exits are to be tested. READ( NIN, FMT = * )TSTERR * Read the threshold value of the test ratio READ( NIN, FMT = * )THRESH * * Read and check the parameter values for the tests. * * Values of N READ( NIN, FMT = * )NIDIM IF( NIDIM.LT.1.OR.NIDIM.GT.NIDMAX )THEN WRITE( NOUT, FMT = 9997 )'N', NIDMAX GO TO 220 END IF READ( NIN, FMT = * )( IDIM( I ), I = 1, NIDIM ) DO 10 I = 1, NIDIM IF( IDIM( I ).LT.0.OR.IDIM( I ).GT.NMAX )THEN WRITE( NOUT, FMT = 9996 )NMAX GO TO 220 END IF 10 CONTINUE * Values of ALPHA READ( NIN, FMT = * )NALF IF( NALF.LT.1.OR.NALF.GT.NALMAX )THEN WRITE( NOUT, FMT = 9997 )'ALPHA', NALMAX GO TO 220 END IF READ( NIN, FMT = * )( ALF( I ), I = 1, NALF ) * Values of BETA READ( NIN, FMT = * )NBET IF( NBET.LT.1.OR.NBET.GT.NBEMAX )THEN WRITE( NOUT, FMT = 9997 )'BETA', NBEMAX GO TO 220 END IF READ( NIN, FMT = * )( BET( I ), I = 1, NBET ) * * Report values of parameters. * WRITE( NOUT, FMT = 9995 ) WRITE( NOUT, FMT = 9994 )( IDIM( I ), I = 1, NIDIM ) WRITE( NOUT, FMT = 9993 )( ALF( I ), I = 1, NALF ) WRITE( NOUT, FMT = 9992 )( BET( I ), I = 1, NBET ) IF( .NOT.TSTERR )THEN WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9984 ) END IF WRITE( NOUT, FMT = * ) WRITE( NOUT, FMT = 9999 )THRESH WRITE( NOUT, FMT = * ) * * Read names of subroutines and flags which indicate * whether they are to be tested. * DO 20 I = 1, NSUBS LTEST( I ) = .FALSE. 20 CONTINUE 30 READ( NIN, FMT = 9988, END = 60 )SNAMET, LTESTT DO 40 I = 1, NSUBS IF( SNAMET.EQ.SNAMES( I ) ) $ GO TO 50 40 CONTINUE WRITE( NOUT, FMT = 9990 )SNAMET STOP 50 LTEST( I ) = LTESTT GO TO 30 * 60 CONTINUE CLOSE ( NIN ) * * Compute EPS (the machine precision). * EPS = EPSILON(RZERO) WRITE( NOUT, FMT = 9998 )EPS * * Check the reliability of ZMMCH using exact data. * N = MIN( 32, NMAX ) DO 100 J = 1, N DO 90 I = 1, N AB( I, J ) = MAX( I - J + 1, 0 ) 90 CONTINUE AB( J, NMAX + 1 ) = J AB( 1, NMAX + J ) = J C( J, 1 ) = ZERO 100 CONTINUE DO 110 J = 1, N CC( J ) = J*( ( J + 1 )*J )/2 - ( ( J + 1 )*J*( J - 1 ) )/3 110 CONTINUE * CC holds the exact result. On exit from ZMMCH CT holds * the result computed by ZMMCH. TRANSA = 'N' TRANSB = 'N' CALL ZMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, $ AB( 1, NMAX + 1 ), NMAX, ZERO, C, NMAX, CT, G, CC, $ NMAX, EPS, ERR, FATAL, NOUT, .TRUE. ) SAME = LZE( CC, CT, N ) IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR STOP END IF TRANSB = 'C' CALL ZMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, $ AB( 1, NMAX + 1 ), NMAX, ZERO, C, NMAX, CT, G, CC, $ NMAX, EPS, ERR, FATAL, NOUT, .TRUE. ) SAME = LZE( CC, CT, N ) IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR STOP END IF DO 120 J = 1, N AB( J, NMAX + 1 ) = N - J + 1 AB( 1, NMAX + J ) = N - J + 1 120 CONTINUE DO 130 J = 1, N CC( N - J + 1 ) = J*( ( J + 1 )*J )/2 - $ ( ( J + 1 )*J*( J - 1 ) )/3 130 CONTINUE TRANSA = 'C' TRANSB = 'N' CALL ZMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, $ AB( 1, NMAX + 1 ), NMAX, ZERO, C, NMAX, CT, G, CC, $ NMAX, EPS, ERR, FATAL, NOUT, .TRUE. ) SAME = LZE( CC, CT, N ) IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR STOP END IF TRANSB = 'C' CALL ZMMCH( TRANSA, TRANSB, N, 1, N, ONE, AB, NMAX, $ AB( 1, NMAX + 1 ), NMAX, ZERO, C, NMAX, CT, G, CC, $ NMAX, EPS, ERR, FATAL, NOUT, .TRUE. ) SAME = LZE( CC, CT, N ) IF( .NOT.SAME.OR.ERR.NE.RZERO )THEN WRITE( NOUT, FMT = 9989 )TRANSA, TRANSB, SAME, ERR STOP END IF * * Test each subroutine in turn. * DO 200 ISNUM = 1, NSUBS WRITE( NOUT, FMT = * ) IF( .NOT.LTEST( ISNUM ) )THEN * Subprogram is not to be tested. WRITE( NOUT, FMT = 9987 )SNAMES( ISNUM ) ELSE SRNAMT = SNAMES( ISNUM ) * Test error exits. IF( TSTERR )THEN CALL ZCHKE( ISNUM, SNAMES( ISNUM ), NOUT ) WRITE( NOUT, FMT = * ) END IF * Test computations. INFOT = 0 OK = .TRUE. FATAL = .FALSE. GO TO ( 140, 150, 150, 160, 160, 170, 170, $ 180, 180 )ISNUM * Test ZGEMM, 01. 140 CALL ZCHK1( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, $ NMAX, AB, AA, AS, AB( 1, NMAX + 1 ), BB, BS, C, $ CC, CS, CT, G ) GO TO 190 * Test ZHEMM, 02, ZSYMM, 03. 150 CALL ZCHK2( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, $ NMAX, AB, AA, AS, AB( 1, NMAX + 1 ), BB, BS, C, $ CC, CS, CT, G ) GO TO 190 * Test ZTRMM, 04, ZTRSM, 05. 160 CALL ZCHK3( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NMAX, AB, $ AA, AS, AB( 1, NMAX + 1 ), BB, BS, CT, G, C ) GO TO 190 * Test ZHERK, 06, ZSYRK, 07. 170 CALL ZCHK4( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, $ NMAX, AB, AA, AS, AB( 1, NMAX + 1 ), BB, BS, C, $ CC, CS, CT, G ) GO TO 190 * Test ZHER2K, 08, ZSYR2K, 09. 180 CALL ZCHK5( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE, $ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, $ NMAX, AB, AA, AS, BB, BS, C, CC, CS, CT, G, W ) GO TO 190 * 190 IF( FATAL.AND.SFATAL ) $ GO TO 210 END IF 200 CONTINUE WRITE( NOUT, FMT = 9986 ) GO TO 230 * 210 CONTINUE WRITE( NOUT, FMT = 9985 ) GO TO 230 * 220 CONTINUE WRITE( NOUT, FMT = 9991 ) * 230 CONTINUE IF( TRACE ) $ CLOSE ( NTRA ) CLOSE ( NOUT ) STOP * 9999 FORMAT( ' ROUTINES PASS COMPUTATIONAL TESTS IF TEST RATIO IS LES', $ 'S THAN', F8.2 ) 9998 FORMAT( ' RELATIVE MACHINE PRECISION IS TAKEN TO BE', 1P, D9.1 ) 9997 FORMAT( ' NUMBER OF VALUES OF ', A, ' IS LESS THAN 1 OR GREATER ', $ 'THAN ', I2 ) 9996 FORMAT( ' VALUE OF N IS LESS THAN 0 OR GREATER THAN ', I2 ) 9995 FORMAT( ' TESTS OF THE COMPLEX*16 LEVEL 3 BLAS', //' THE F', $ 'OLLOWING PARAMETER VALUES WILL BE USED:' ) 9994 FORMAT( ' FOR N ', 9I6 ) 9993 FORMAT( ' FOR ALPHA ', $ 7( '(', F4.1, ',', F4.1, ') ', : ) ) 9992 FORMAT( ' FOR BETA ', $ 7( '(', F4.1, ',', F4.1, ') ', : ) ) 9991 FORMAT( ' AMEND DATA FILE OR INCREASE ARRAY SIZES IN PROGRAM', $ /' ******* TESTS ABANDONED *******' ) 9990 FORMAT( ' SUBPROGRAM NAME ', A6, ' NOT RECOGNIZED', /' ******* T', $ 'ESTS ABANDONED *******' ) 9989 FORMAT( ' ERROR IN ZMMCH - IN-LINE DOT PRODUCTS ARE BEING EVALU', $ 'ATED WRONGLY.', /' ZMMCH WAS CALLED WITH TRANSA = ', A1, $ ' AND TRANSB = ', A1, /' AND RETURNED SAME = ', L1, ' AND ', $ 'ERR = ', F12.3, '.', /' THIS MAY BE DUE TO FAULTS IN THE ', $ 'ARITHMETIC OR THE COMPILER.', /' ******* TESTS ABANDONED ', $ '*******' ) 9988 FORMAT( A6, L2 ) 9987 FORMAT( 1X, A6, ' WAS NOT TESTED' ) 9986 FORMAT( /' END OF TESTS' ) 9985 FORMAT( /' ******* FATAL ERROR - TESTS ABANDONED *******' ) 9984 FORMAT( ' ERROR-EXITS WILL NOT BE TESTED' ) * * End of ZBLAT3. * END SUBROUTINE ZCHK1( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, NMAX, $ A, AA, AS, B, BB, BS, C, CC, CS, CT, G ) * * Tests ZGEMM. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. COMPLEX*16 ZERO PARAMETER ( ZERO = ( 0.0D0, 0.0D0 ) ) DOUBLE PRECISION RZERO PARAMETER ( RZERO = 0.0D0 ) * .. Scalar Arguments .. DOUBLE PRECISION EPS, THRESH INTEGER NALF, NBET, NIDIM, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. COMPLEX*16 A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), B( NMAX, NMAX ), $ BB( NMAX*NMAX ), BET( NBET ), BS( NMAX*NMAX ), $ C( NMAX, NMAX ), CC( NMAX*NMAX ), $ CS( NMAX*NMAX ), CT( NMAX ) DOUBLE PRECISION G( NMAX ) INTEGER IDIM( NIDIM ) * .. Local Scalars .. COMPLEX*16 ALPHA, ALS, BETA, BLS DOUBLE PRECISION ERR, ERRMAX INTEGER I, IA, IB, ICA, ICB, IK, IM, IN, K, KS, LAA, $ LBB, LCC, LDA, LDAS, LDB, LDBS, LDC, LDCS, M, $ MA, MB, MS, N, NA, NARGS, NB, NC, NS LOGICAL NULL, RESET, SAME, TRANA, TRANB CHARACTER*1 TRANAS, TRANBS, TRANSA, TRANSB CHARACTER*3 ICH * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LZE, LZERES EXTERNAL LZE, LZERES * .. External Subroutines .. EXTERNAL ZGEMM, ZMAKE, ZMMCH * .. Intrinsic Functions .. INTRINSIC MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICH/'NTC'/ * .. Executable Statements .. * NARGS = 13 NC = 0 RESET = .TRUE. ERRMAX = RZERO * DO 110 IM = 1, NIDIM M = IDIM( IM ) * DO 100 IN = 1, NIDIM N = IDIM( IN ) * Set LDC to 1 more than minimum value if room. LDC = M IF( LDC.LT.NMAX ) $ LDC = LDC + 1 * Skip tests if not enough room. IF( LDC.GT.NMAX ) $ GO TO 100 LCC = LDC*N NULL = N.LE.0.OR.M.LE.0 * DO 90 IK = 1, NIDIM K = IDIM( IK ) * DO 80 ICA = 1, 3 TRANSA = ICH( ICA: ICA ) TRANA = TRANSA.EQ.'T'.OR.TRANSA.EQ.'C' * IF( TRANA )THEN MA = K NA = M ELSE MA = M NA = K END IF * Set LDA to 1 more than minimum value if room. LDA = MA IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 80 LAA = LDA*NA * * Generate the matrix A. * CALL ZMAKE( 'GE', ' ', ' ', MA, NA, A, NMAX, AA, LDA, $ RESET, ZERO ) * DO 70 ICB = 1, 3 TRANSB = ICH( ICB: ICB ) TRANB = TRANSB.EQ.'T'.OR.TRANSB.EQ.'C' * IF( TRANB )THEN MB = N NB = K ELSE MB = K NB = N END IF * Set LDB to 1 more than minimum value if room. LDB = MB IF( LDB.LT.NMAX ) $ LDB = LDB + 1 * Skip tests if not enough room. IF( LDB.GT.NMAX ) $ GO TO 70 LBB = LDB*NB * * Generate the matrix B. * CALL ZMAKE( 'GE', ' ', ' ', MB, NB, B, NMAX, BB, $ LDB, RESET, ZERO ) * DO 60 IA = 1, NALF ALPHA = ALF( IA ) * DO 50 IB = 1, NBET BETA = BET( IB ) * * Generate the matrix C. * CALL ZMAKE( 'GE', ' ', ' ', M, N, C, NMAX, $ CC, LDC, RESET, ZERO ) * NC = NC + 1 * * Save every datum before calling the * subroutine. * TRANAS = TRANSA TRANBS = TRANSB MS = M NS = N KS = K ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LBB BS( I ) = BB( I ) 20 CONTINUE LDBS = LDB BLS = BETA DO 30 I = 1, LCC CS( I ) = CC( I ) 30 CONTINUE LDCS = LDC * * Call the subroutine. * IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ TRANSA, TRANSB, M, N, K, ALPHA, LDA, LDB, $ BETA, LDC IF( REWI ) $ REWIND NTRA CALL ZGEMM( TRANSA, TRANSB, M, N, K, ALPHA, $ AA, LDA, BB, LDB, BETA, CC, LDC ) * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9994 ) FATAL = .TRUE. GO TO 120 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = TRANSA.EQ.TRANAS ISAME( 2 ) = TRANSB.EQ.TRANBS ISAME( 3 ) = MS.EQ.M ISAME( 4 ) = NS.EQ.N ISAME( 5 ) = KS.EQ.K ISAME( 6 ) = ALS.EQ.ALPHA ISAME( 7 ) = LZE( AS, AA, LAA ) ISAME( 8 ) = LDAS.EQ.LDA ISAME( 9 ) = LZE( BS, BB, LBB ) ISAME( 10 ) = LDBS.EQ.LDB ISAME( 11 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 12 ) = LZE( CS, CC, LCC ) ELSE ISAME( 12 ) = LZERES( 'GE', ' ', M, N, CS, $ CC, LDC ) END IF ISAME( 13 ) = LDCS.EQ.LDC * * If data was incorrectly changed, report * and return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 120 END IF * IF( .NOT.NULL )THEN * * Check the result. * CALL ZMMCH( TRANSA, TRANSB, M, N, K, $ ALPHA, A, NMAX, B, NMAX, BETA, $ C, NMAX, CT, G, CC, LDC, EPS, $ ERR, FATAL, NOUT, .TRUE. ) ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 120 END IF * 50 CONTINUE * 60 CONTINUE * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 130 * 120 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME WRITE( NOUT, FMT = 9995 )NC, SNAME, TRANSA, TRANSB, M, N, K, $ ALPHA, LDA, LDB, BETA, LDC * 130 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ', A6, '(''', A1, ''',''', A1, ''',', $ 3( I3, ',' ), '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, $ ',(', F4.1, ',', F4.1, '), C,', I3, ').' ) 9994 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of ZCHK1. * END SUBROUTINE ZCHK2( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, NMAX, $ A, AA, AS, B, BB, BS, C, CC, CS, CT, G ) * * Tests ZHEMM and ZSYMM. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. COMPLEX*16 ZERO PARAMETER ( ZERO = ( 0.0D0, 0.0D0 ) ) DOUBLE PRECISION RZERO PARAMETER ( RZERO = 0.0D0 ) * .. Scalar Arguments .. DOUBLE PRECISION EPS, THRESH INTEGER NALF, NBET, NIDIM, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. COMPLEX*16 A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), B( NMAX, NMAX ), $ BB( NMAX*NMAX ), BET( NBET ), BS( NMAX*NMAX ), $ C( NMAX, NMAX ), CC( NMAX*NMAX ), $ CS( NMAX*NMAX ), CT( NMAX ) DOUBLE PRECISION G( NMAX ) INTEGER IDIM( NIDIM ) * .. Local Scalars .. COMPLEX*16 ALPHA, ALS, BETA, BLS DOUBLE PRECISION ERR, ERRMAX INTEGER I, IA, IB, ICS, ICU, IM, IN, LAA, LBB, LCC, $ LDA, LDAS, LDB, LDBS, LDC, LDCS, M, MS, N, NA, $ NARGS, NC, NS LOGICAL CONJ, LEFT, NULL, RESET, SAME CHARACTER*1 SIDE, SIDES, UPLO, UPLOS CHARACTER*2 ICHS, ICHU * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LZE, LZERES EXTERNAL LZE, LZERES * .. External Subroutines .. EXTERNAL ZHEMM, ZMAKE, ZMMCH, ZSYMM * .. Intrinsic Functions .. INTRINSIC MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICHS/'LR'/, ICHU/'UL'/ * .. Executable Statements .. CONJ = SNAME( 2: 3 ).EQ.'HE' * NARGS = 12 NC = 0 RESET = .TRUE. ERRMAX = RZERO * DO 100 IM = 1, NIDIM M = IDIM( IM ) * DO 90 IN = 1, NIDIM N = IDIM( IN ) * Set LDC to 1 more than minimum value if room. LDC = M IF( LDC.LT.NMAX ) $ LDC = LDC + 1 * Skip tests if not enough room. IF( LDC.GT.NMAX ) $ GO TO 90 LCC = LDC*N NULL = N.LE.0.OR.M.LE.0 * Set LDB to 1 more than minimum value if room. LDB = M IF( LDB.LT.NMAX ) $ LDB = LDB + 1 * Skip tests if not enough room. IF( LDB.GT.NMAX ) $ GO TO 90 LBB = LDB*N * * Generate the matrix B. * CALL ZMAKE( 'GE', ' ', ' ', M, N, B, NMAX, BB, LDB, RESET, $ ZERO ) * DO 80 ICS = 1, 2 SIDE = ICHS( ICS: ICS ) LEFT = SIDE.EQ.'L' * IF( LEFT )THEN NA = M ELSE NA = N END IF * Set LDA to 1 more than minimum value if room. LDA = NA IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 80 LAA = LDA*NA * DO 70 ICU = 1, 2 UPLO = ICHU( ICU: ICU ) * * Generate the hermitian or symmetric matrix A. * CALL ZMAKE( SNAME( 2: 3 ), UPLO, ' ', NA, NA, A, NMAX, $ AA, LDA, RESET, ZERO ) * DO 60 IA = 1, NALF ALPHA = ALF( IA ) * DO 50 IB = 1, NBET BETA = BET( IB ) * * Generate the matrix C. * CALL ZMAKE( 'GE', ' ', ' ', M, N, C, NMAX, CC, $ LDC, RESET, ZERO ) * NC = NC + 1 * * Save every datum before calling the * subroutine. * SIDES = SIDE UPLOS = UPLO MS = M NS = N ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LBB BS( I ) = BB( I ) 20 CONTINUE LDBS = LDB BLS = BETA DO 30 I = 1, LCC CS( I ) = CC( I ) 30 CONTINUE LDCS = LDC * * Call the subroutine. * IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, SIDE, $ UPLO, M, N, ALPHA, LDA, LDB, BETA, LDC IF( REWI ) $ REWIND NTRA IF( CONJ )THEN CALL ZHEMM( SIDE, UPLO, M, N, ALPHA, AA, LDA, $ BB, LDB, BETA, CC, LDC ) ELSE CALL ZSYMM( SIDE, UPLO, M, N, ALPHA, AA, LDA, $ BB, LDB, BETA, CC, LDC ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9994 ) FATAL = .TRUE. GO TO 110 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = SIDES.EQ.SIDE ISAME( 2 ) = UPLOS.EQ.UPLO ISAME( 3 ) = MS.EQ.M ISAME( 4 ) = NS.EQ.N ISAME( 5 ) = ALS.EQ.ALPHA ISAME( 6 ) = LZE( AS, AA, LAA ) ISAME( 7 ) = LDAS.EQ.LDA ISAME( 8 ) = LZE( BS, BB, LBB ) ISAME( 9 ) = LDBS.EQ.LDB ISAME( 10 ) = BLS.EQ.BETA IF( NULL )THEN ISAME( 11 ) = LZE( CS, CC, LCC ) ELSE ISAME( 11 ) = LZERES( 'GE', ' ', M, N, CS, $ CC, LDC ) END IF ISAME( 12 ) = LDCS.EQ.LDC * * If data was incorrectly changed, report and * return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 110 END IF * IF( .NOT.NULL )THEN * * Check the result. * IF( LEFT )THEN CALL ZMMCH( 'N', 'N', M, N, M, ALPHA, A, $ NMAX, B, NMAX, BETA, C, NMAX, $ CT, G, CC, LDC, EPS, ERR, $ FATAL, NOUT, .TRUE. ) ELSE CALL ZMMCH( 'N', 'N', M, N, N, ALPHA, B, $ NMAX, A, NMAX, BETA, C, NMAX, $ CT, G, CC, LDC, EPS, ERR, $ FATAL, NOUT, .TRUE. ) END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 110 END IF * 50 CONTINUE * 60 CONTINUE * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 120 * 110 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME WRITE( NOUT, FMT = 9995 )NC, SNAME, SIDE, UPLO, M, N, ALPHA, LDA, $ LDB, BETA, LDC * 120 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ', A6, '(', 2( '''', A1, ''',' ), 2( I3, ',' ), $ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ',(', F4.1, $ ',', F4.1, '), C,', I3, ') .' ) 9994 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of ZCHK2. * END SUBROUTINE ZCHK3( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NMAX, A, AA, AS, $ B, BB, BS, CT, G, C ) * * Tests ZTRMM and ZTRSM. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. COMPLEX*16 ZERO, ONE PARAMETER ( ZERO = ( 0.0D0, 0.0D0 ), $ ONE = ( 1.0D0, 0.0D0 ) ) DOUBLE PRECISION RZERO PARAMETER ( RZERO = 0.0D0 ) * .. Scalar Arguments .. DOUBLE PRECISION EPS, THRESH INTEGER NALF, NIDIM, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. COMPLEX*16 A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), B( NMAX, NMAX ), $ BB( NMAX*NMAX ), BS( NMAX*NMAX ), $ C( NMAX, NMAX ), CT( NMAX ) DOUBLE PRECISION G( NMAX ) INTEGER IDIM( NIDIM ) * .. Local Scalars .. COMPLEX*16 ALPHA, ALS DOUBLE PRECISION ERR, ERRMAX INTEGER I, IA, ICD, ICS, ICT, ICU, IM, IN, J, LAA, LBB, $ LDA, LDAS, LDB, LDBS, M, MS, N, NA, NARGS, NC, $ NS LOGICAL LEFT, NULL, RESET, SAME CHARACTER*1 DIAG, DIAGS, SIDE, SIDES, TRANAS, TRANSA, UPLO, $ UPLOS CHARACTER*2 ICHD, ICHS, ICHU CHARACTER*3 ICHT * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LZE, LZERES EXTERNAL LZE, LZERES * .. External Subroutines .. EXTERNAL ZMAKE, ZMMCH, ZTRMM, ZTRSM * .. Intrinsic Functions .. INTRINSIC MAX * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICHU/'UL'/, ICHT/'NTC'/, ICHD/'UN'/, ICHS/'LR'/ * .. Executable Statements .. * NARGS = 11 NC = 0 RESET = .TRUE. ERRMAX = RZERO * Set up zero matrix for ZMMCH. DO 20 J = 1, NMAX DO 10 I = 1, NMAX C( I, J ) = ZERO 10 CONTINUE 20 CONTINUE * DO 140 IM = 1, NIDIM M = IDIM( IM ) * DO 130 IN = 1, NIDIM N = IDIM( IN ) * Set LDB to 1 more than minimum value if room. LDB = M IF( LDB.LT.NMAX ) $ LDB = LDB + 1 * Skip tests if not enough room. IF( LDB.GT.NMAX ) $ GO TO 130 LBB = LDB*N NULL = M.LE.0.OR.N.LE.0 * DO 120 ICS = 1, 2 SIDE = ICHS( ICS: ICS ) LEFT = SIDE.EQ.'L' IF( LEFT )THEN NA = M ELSE NA = N END IF * Set LDA to 1 more than minimum value if room. LDA = NA IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 130 LAA = LDA*NA * DO 110 ICU = 1, 2 UPLO = ICHU( ICU: ICU ) * DO 100 ICT = 1, 3 TRANSA = ICHT( ICT: ICT ) * DO 90 ICD = 1, 2 DIAG = ICHD( ICD: ICD ) * DO 80 IA = 1, NALF ALPHA = ALF( IA ) * * Generate the matrix A. * CALL ZMAKE( 'TR', UPLO, DIAG, NA, NA, A, $ NMAX, AA, LDA, RESET, ZERO ) * * Generate the matrix B. * CALL ZMAKE( 'GE', ' ', ' ', M, N, B, NMAX, $ BB, LDB, RESET, ZERO ) * NC = NC + 1 * * Save every datum before calling the * subroutine. * SIDES = SIDE UPLOS = UPLO TRANAS = TRANSA DIAGS = DIAG MS = M NS = N ALS = ALPHA DO 30 I = 1, LAA AS( I ) = AA( I ) 30 CONTINUE LDAS = LDA DO 40 I = 1, LBB BS( I ) = BB( I ) 40 CONTINUE LDBS = LDB * * Call the subroutine. * IF( SNAME( 4: 5 ).EQ.'MM' )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, $ LDA, LDB IF( REWI ) $ REWIND NTRA CALL ZTRMM( SIDE, UPLO, TRANSA, DIAG, M, $ N, ALPHA, AA, LDA, BB, LDB ) ELSE IF( SNAME( 4: 5 ).EQ.'SM' )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9995 )NC, SNAME, $ SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, $ LDA, LDB IF( REWI ) $ REWIND NTRA CALL ZTRSM( SIDE, UPLO, TRANSA, DIAG, M, $ N, ALPHA, AA, LDA, BB, LDB ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9994 ) FATAL = .TRUE. GO TO 150 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = SIDES.EQ.SIDE ISAME( 2 ) = UPLOS.EQ.UPLO ISAME( 3 ) = TRANAS.EQ.TRANSA ISAME( 4 ) = DIAGS.EQ.DIAG ISAME( 5 ) = MS.EQ.M ISAME( 6 ) = NS.EQ.N ISAME( 7 ) = ALS.EQ.ALPHA ISAME( 8 ) = LZE( AS, AA, LAA ) ISAME( 9 ) = LDAS.EQ.LDA IF( NULL )THEN ISAME( 10 ) = LZE( BS, BB, LBB ) ELSE ISAME( 10 ) = LZERES( 'GE', ' ', M, N, BS, $ BB, LDB ) END IF ISAME( 11 ) = LDBS.EQ.LDB * * If data was incorrectly changed, report and * return. * SAME = .TRUE. DO 50 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 50 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 150 END IF * IF( .NOT.NULL )THEN IF( SNAME( 4: 5 ).EQ.'MM' )THEN * * Check the result. * IF( LEFT )THEN CALL ZMMCH( TRANSA, 'N', M, N, M, $ ALPHA, A, NMAX, B, NMAX, $ ZERO, C, NMAX, CT, G, $ BB, LDB, EPS, ERR, $ FATAL, NOUT, .TRUE. ) ELSE CALL ZMMCH( 'N', TRANSA, M, N, N, $ ALPHA, B, NMAX, A, NMAX, $ ZERO, C, NMAX, CT, G, $ BB, LDB, EPS, ERR, $ FATAL, NOUT, .TRUE. ) END IF ELSE IF( SNAME( 4: 5 ).EQ.'SM' )THEN * * Compute approximation to original * matrix. * DO 70 J = 1, N DO 60 I = 1, M C( I, J ) = BB( I + ( J - 1 )* $ LDB ) BB( I + ( J - 1 )*LDB ) = ALPHA* $ B( I, J ) 60 CONTINUE 70 CONTINUE * IF( LEFT )THEN CALL ZMMCH( TRANSA, 'N', M, N, M, $ ONE, A, NMAX, C, NMAX, $ ZERO, B, NMAX, CT, G, $ BB, LDB, EPS, ERR, $ FATAL, NOUT, .FALSE. ) ELSE CALL ZMMCH( 'N', TRANSA, M, N, N, $ ONE, C, NMAX, A, NMAX, $ ZERO, B, NMAX, CT, G, $ BB, LDB, EPS, ERR, $ FATAL, NOUT, .FALSE. ) END IF END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 150 END IF * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * 120 CONTINUE * 130 CONTINUE * 140 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 160 * 150 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME WRITE( NOUT, FMT = 9995 )NC, SNAME, SIDE, UPLO, TRANSA, DIAG, M, $ N, ALPHA, LDA, LDB * 160 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( 1X, I6, ': ', A6, '(', 4( '''', A1, ''',' ), 2( I3, ',' ), $ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ') ', $ ' .' ) 9994 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of ZCHK3. * END SUBROUTINE ZCHK4( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, NMAX, $ A, AA, AS, B, BB, BS, C, CC, CS, CT, G ) * * Tests ZHERK and ZSYRK. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. COMPLEX*16 ZERO PARAMETER ( ZERO = ( 0.0D0, 0.0D0 ) ) DOUBLE PRECISION RONE, RZERO PARAMETER ( RONE = 1.0D0, RZERO = 0.0D0 ) * .. Scalar Arguments .. DOUBLE PRECISION EPS, THRESH INTEGER NALF, NBET, NIDIM, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. COMPLEX*16 A( NMAX, NMAX ), AA( NMAX*NMAX ), ALF( NALF ), $ AS( NMAX*NMAX ), B( NMAX, NMAX ), $ BB( NMAX*NMAX ), BET( NBET ), BS( NMAX*NMAX ), $ C( NMAX, NMAX ), CC( NMAX*NMAX ), $ CS( NMAX*NMAX ), CT( NMAX ) DOUBLE PRECISION G( NMAX ) INTEGER IDIM( NIDIM ) * .. Local Scalars .. COMPLEX*16 ALPHA, ALS, BETA, BETS DOUBLE PRECISION ERR, ERRMAX, RALPHA, RALS, RBETA, RBETS INTEGER I, IA, IB, ICT, ICU, IK, IN, J, JC, JJ, K, KS, $ LAA, LCC, LDA, LDAS, LDC, LDCS, LJ, MA, N, NA, $ NARGS, NC, NS LOGICAL CONJ, NULL, RESET, SAME, TRAN, UPPER CHARACTER*1 TRANS, TRANSS, TRANST, UPLO, UPLOS CHARACTER*2 ICHT, ICHU * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LZE, LZERES EXTERNAL LZE, LZERES * .. External Subroutines .. EXTERNAL ZHERK, ZMAKE, ZMMCH, ZSYRK * .. Intrinsic Functions .. INTRINSIC DCMPLX, MAX, DBLE * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICHT/'NC'/, ICHU/'UL'/ * .. Executable Statements .. CONJ = SNAME( 2: 3 ).EQ.'HE' * NARGS = 10 NC = 0 RESET = .TRUE. ERRMAX = RZERO * DO 100 IN = 1, NIDIM N = IDIM( IN ) * Set LDC to 1 more than minimum value if room. LDC = N IF( LDC.LT.NMAX ) $ LDC = LDC + 1 * Skip tests if not enough room. IF( LDC.GT.NMAX ) $ GO TO 100 LCC = LDC*N * DO 90 IK = 1, NIDIM K = IDIM( IK ) * DO 80 ICT = 1, 2 TRANS = ICHT( ICT: ICT ) TRAN = TRANS.EQ.'C' IF( TRAN.AND..NOT.CONJ ) $ TRANS = 'T' IF( TRAN )THEN MA = K NA = N ELSE MA = N NA = K END IF * Set LDA to 1 more than minimum value if room. LDA = MA IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 80 LAA = LDA*NA * * Generate the matrix A. * CALL ZMAKE( 'GE', ' ', ' ', MA, NA, A, NMAX, AA, LDA, $ RESET, ZERO ) * DO 70 ICU = 1, 2 UPLO = ICHU( ICU: ICU ) UPPER = UPLO.EQ.'U' * DO 60 IA = 1, NALF ALPHA = ALF( IA ) IF( CONJ )THEN RALPHA = DBLE( ALPHA ) ALPHA = DCMPLX( RALPHA, RZERO ) END IF * DO 50 IB = 1, NBET BETA = BET( IB ) IF( CONJ )THEN RBETA = DBLE( BETA ) BETA = DCMPLX( RBETA, RZERO ) END IF NULL = N.LE.0 IF( CONJ ) $ NULL = NULL.OR.( ( K.LE.0.OR.RALPHA.EQ. $ RZERO ).AND.RBETA.EQ.RONE ) * * Generate the matrix C. * CALL ZMAKE( SNAME( 2: 3 ), UPLO, ' ', N, N, C, $ NMAX, CC, LDC, RESET, ZERO ) * NC = NC + 1 * * Save every datum before calling the subroutine. * UPLOS = UPLO TRANSS = TRANS NS = N KS = K IF( CONJ )THEN RALS = RALPHA ELSE ALS = ALPHA END IF DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA IF( CONJ )THEN RBETS = RBETA ELSE BETS = BETA END IF DO 20 I = 1, LCC CS( I ) = CC( I ) 20 CONTINUE LDCS = LDC * * Call the subroutine. * IF( CONJ )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, UPLO, $ TRANS, N, K, RALPHA, LDA, RBETA, LDC IF( REWI ) $ REWIND NTRA CALL ZHERK( UPLO, TRANS, N, K, RALPHA, AA, $ LDA, RBETA, CC, LDC ) ELSE IF( TRACE ) $ WRITE( NTRA, FMT = 9993 )NC, SNAME, UPLO, $ TRANS, N, K, ALPHA, LDA, BETA, LDC IF( REWI ) $ REWIND NTRA CALL ZSYRK( UPLO, TRANS, N, K, ALPHA, AA, $ LDA, BETA, CC, LDC ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9992 ) FATAL = .TRUE. GO TO 120 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = UPLOS.EQ.UPLO ISAME( 2 ) = TRANSS.EQ.TRANS ISAME( 3 ) = NS.EQ.N ISAME( 4 ) = KS.EQ.K IF( CONJ )THEN ISAME( 5 ) = RALS.EQ.RALPHA ELSE ISAME( 5 ) = ALS.EQ.ALPHA END IF ISAME( 6 ) = LZE( AS, AA, LAA ) ISAME( 7 ) = LDAS.EQ.LDA IF( CONJ )THEN ISAME( 8 ) = RBETS.EQ.RBETA ELSE ISAME( 8 ) = BETS.EQ.BETA END IF IF( NULL )THEN ISAME( 9 ) = LZE( CS, CC, LCC ) ELSE ISAME( 9 ) = LZERES( SNAME( 2: 3 ), UPLO, N, $ N, CS, CC, LDC ) END IF ISAME( 10 ) = LDCS.EQ.LDC * * If data was incorrectly changed, report and * return. * SAME = .TRUE. DO 30 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 30 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 120 END IF * IF( .NOT.NULL )THEN * * Check the result column by column. * IF( CONJ )THEN TRANST = 'C' ELSE TRANST = 'T' END IF JC = 1 DO 40 J = 1, N IF( UPPER )THEN JJ = 1 LJ = J ELSE JJ = J LJ = N - J + 1 END IF IF( TRAN )THEN CALL ZMMCH( TRANST, 'N', LJ, 1, K, $ ALPHA, A( 1, JJ ), NMAX, $ A( 1, J ), NMAX, BETA, $ C( JJ, J ), NMAX, CT, G, $ CC( JC ), LDC, EPS, ERR, $ FATAL, NOUT, .TRUE. ) ELSE CALL ZMMCH( 'N', TRANST, LJ, 1, K, $ ALPHA, A( JJ, 1 ), NMAX, $ A( J, 1 ), NMAX, BETA, $ C( JJ, J ), NMAX, CT, G, $ CC( JC ), LDC, EPS, ERR, $ FATAL, NOUT, .TRUE. ) END IF IF( UPPER )THEN JC = JC + LDC ELSE JC = JC + LDC + 1 END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 110 40 CONTINUE END IF * 50 CONTINUE * 60 CONTINUE * 70 CONTINUE * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 130 * 110 CONTINUE IF( N.GT.1 ) $ WRITE( NOUT, FMT = 9995 )J * 120 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME IF( CONJ )THEN WRITE( NOUT, FMT = 9994 )NC, SNAME, UPLO, TRANS, N, K, RALPHA, $ LDA, RBETA, LDC ELSE WRITE( NOUT, FMT = 9993 )NC, SNAME, UPLO, TRANS, N, K, ALPHA, $ LDA, BETA, LDC END IF * 130 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) 9994 FORMAT( 1X, I6, ': ', A6, '(', 2( '''', A1, ''',' ), 2( I3, ',' ), $ F4.1, ', A,', I3, ',', F4.1, ', C,', I3, ') ', $ ' .' ) 9993 FORMAT( 1X, I6, ': ', A6, '(', 2( '''', A1, ''',' ), 2( I3, ',' ), $ '(', F4.1, ',', F4.1, ') , A,', I3, ',(', F4.1, ',', F4.1, $ '), C,', I3, ') .' ) 9992 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of ZCHK4. * END SUBROUTINE ZCHK5( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI, $ FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, NMAX, $ AB, AA, AS, BB, BS, C, CC, CS, CT, G, W ) * * Tests ZHER2K and ZSYR2K. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. COMPLEX*16 ZERO, ONE PARAMETER ( ZERO = ( 0.0D0, 0.0D0 ), $ ONE = ( 1.0D0, 0.0D0 ) ) DOUBLE PRECISION RONE, RZERO PARAMETER ( RONE = 1.0D0, RZERO = 0.0D0 ) * .. Scalar Arguments .. DOUBLE PRECISION EPS, THRESH INTEGER NALF, NBET, NIDIM, NMAX, NOUT, NTRA LOGICAL FATAL, REWI, TRACE CHARACTER*6 SNAME * .. Array Arguments .. COMPLEX*16 AA( NMAX*NMAX ), AB( 2*NMAX*NMAX ), $ ALF( NALF ), AS( NMAX*NMAX ), BB( NMAX*NMAX ), $ BET( NBET ), BS( NMAX*NMAX ), C( NMAX, NMAX ), $ CC( NMAX*NMAX ), CS( NMAX*NMAX ), CT( NMAX ), $ W( 2*NMAX ) DOUBLE PRECISION G( NMAX ) INTEGER IDIM( NIDIM ) * .. Local Scalars .. COMPLEX*16 ALPHA, ALS, BETA, BETS DOUBLE PRECISION ERR, ERRMAX, RBETA, RBETS INTEGER I, IA, IB, ICT, ICU, IK, IN, J, JC, JJ, JJAB, $ K, KS, LAA, LBB, LCC, LDA, LDAS, LDB, LDBS, $ LDC, LDCS, LJ, MA, N, NA, NARGS, NC, NS LOGICAL CONJ, NULL, RESET, SAME, TRAN, UPPER CHARACTER*1 TRANS, TRANSS, TRANST, UPLO, UPLOS CHARACTER*2 ICHT, ICHU * .. Local Arrays .. LOGICAL ISAME( 13 ) * .. External Functions .. LOGICAL LZE, LZERES EXTERNAL LZE, LZERES * .. External Subroutines .. EXTERNAL ZHER2K, ZMAKE, ZMMCH, ZSYR2K * .. Intrinsic Functions .. INTRINSIC DCMPLX, DCONJG, MAX, DBLE * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Data statements .. DATA ICHT/'NC'/, ICHU/'UL'/ * .. Executable Statements .. CONJ = SNAME( 2: 3 ).EQ.'HE' * NARGS = 12 NC = 0 RESET = .TRUE. ERRMAX = RZERO * DO 130 IN = 1, NIDIM N = IDIM( IN ) * Set LDC to 1 more than minimum value if room. LDC = N IF( LDC.LT.NMAX ) $ LDC = LDC + 1 * Skip tests if not enough room. IF( LDC.GT.NMAX ) $ GO TO 130 LCC = LDC*N * DO 120 IK = 1, NIDIM K = IDIM( IK ) * DO 110 ICT = 1, 2 TRANS = ICHT( ICT: ICT ) TRAN = TRANS.EQ.'C' IF( TRAN.AND..NOT.CONJ ) $ TRANS = 'T' IF( TRAN )THEN MA = K NA = N ELSE MA = N NA = K END IF * Set LDA to 1 more than minimum value if room. LDA = MA IF( LDA.LT.NMAX ) $ LDA = LDA + 1 * Skip tests if not enough room. IF( LDA.GT.NMAX ) $ GO TO 110 LAA = LDA*NA * * Generate the matrix A. * IF( TRAN )THEN CALL ZMAKE( 'GE', ' ', ' ', MA, NA, AB, 2*NMAX, AA, $ LDA, RESET, ZERO ) ELSE CALL ZMAKE( 'GE', ' ', ' ', MA, NA, AB, NMAX, AA, LDA, $ RESET, ZERO ) END IF * * Generate the matrix B. * LDB = LDA LBB = LAA IF( TRAN )THEN CALL ZMAKE( 'GE', ' ', ' ', MA, NA, AB( K + 1 ), $ 2*NMAX, BB, LDB, RESET, ZERO ) ELSE CALL ZMAKE( 'GE', ' ', ' ', MA, NA, AB( K*NMAX + 1 ), $ NMAX, BB, LDB, RESET, ZERO ) END IF * DO 100 ICU = 1, 2 UPLO = ICHU( ICU: ICU ) UPPER = UPLO.EQ.'U' * DO 90 IA = 1, NALF ALPHA = ALF( IA ) * DO 80 IB = 1, NBET BETA = BET( IB ) IF( CONJ )THEN RBETA = DBLE( BETA ) BETA = DCMPLX( RBETA, RZERO ) END IF NULL = N.LE.0 IF( CONJ ) $ NULL = NULL.OR.( ( K.LE.0.OR.ALPHA.EQ. $ ZERO ).AND.RBETA.EQ.RONE ) * * Generate the matrix C. * CALL ZMAKE( SNAME( 2: 3 ), UPLO, ' ', N, N, C, $ NMAX, CC, LDC, RESET, ZERO ) * NC = NC + 1 * * Save every datum before calling the subroutine. * UPLOS = UPLO TRANSS = TRANS NS = N KS = K ALS = ALPHA DO 10 I = 1, LAA AS( I ) = AA( I ) 10 CONTINUE LDAS = LDA DO 20 I = 1, LBB BS( I ) = BB( I ) 20 CONTINUE LDBS = LDB IF( CONJ )THEN RBETS = RBETA ELSE BETS = BETA END IF DO 30 I = 1, LCC CS( I ) = CC( I ) 30 CONTINUE LDCS = LDC * * Call the subroutine. * IF( CONJ )THEN IF( TRACE ) $ WRITE( NTRA, FMT = 9994 )NC, SNAME, UPLO, $ TRANS, N, K, ALPHA, LDA, LDB, RBETA, LDC IF( REWI ) $ REWIND NTRA CALL ZHER2K( UPLO, TRANS, N, K, ALPHA, AA, $ LDA, BB, LDB, RBETA, CC, LDC ) ELSE IF( TRACE ) $ WRITE( NTRA, FMT = 9993 )NC, SNAME, UPLO, $ TRANS, N, K, ALPHA, LDA, LDB, BETA, LDC IF( REWI ) $ REWIND NTRA CALL ZSYR2K( UPLO, TRANS, N, K, ALPHA, AA, $ LDA, BB, LDB, BETA, CC, LDC ) END IF * * Check if error-exit was taken incorrectly. * IF( .NOT.OK )THEN WRITE( NOUT, FMT = 9992 ) FATAL = .TRUE. GO TO 150 END IF * * See what data changed inside subroutines. * ISAME( 1 ) = UPLOS.EQ.UPLO ISAME( 2 ) = TRANSS.EQ.TRANS ISAME( 3 ) = NS.EQ.N ISAME( 4 ) = KS.EQ.K ISAME( 5 ) = ALS.EQ.ALPHA ISAME( 6 ) = LZE( AS, AA, LAA ) ISAME( 7 ) = LDAS.EQ.LDA ISAME( 8 ) = LZE( BS, BB, LBB ) ISAME( 9 ) = LDBS.EQ.LDB IF( CONJ )THEN ISAME( 10 ) = RBETS.EQ.RBETA ELSE ISAME( 10 ) = BETS.EQ.BETA END IF IF( NULL )THEN ISAME( 11 ) = LZE( CS, CC, LCC ) ELSE ISAME( 11 ) = LZERES( 'HE', UPLO, N, N, CS, $ CC, LDC ) END IF ISAME( 12 ) = LDCS.EQ.LDC * * If data was incorrectly changed, report and * return. * SAME = .TRUE. DO 40 I = 1, NARGS SAME = SAME.AND.ISAME( I ) IF( .NOT.ISAME( I ) ) $ WRITE( NOUT, FMT = 9998 )I 40 CONTINUE IF( .NOT.SAME )THEN FATAL = .TRUE. GO TO 150 END IF * IF( .NOT.NULL )THEN * * Check the result column by column. * IF( CONJ )THEN TRANST = 'C' ELSE TRANST = 'T' END IF JJAB = 1 JC = 1 DO 70 J = 1, N IF( UPPER )THEN JJ = 1 LJ = J ELSE JJ = J LJ = N - J + 1 END IF IF( TRAN )THEN DO 50 I = 1, K W( I ) = ALPHA*AB( ( J - 1 )*2* $ NMAX + K + I ) IF( CONJ )THEN W( K + I ) = DCONJG( ALPHA )* $ AB( ( J - 1 )*2* $ NMAX + I ) ELSE W( K + I ) = ALPHA* $ AB( ( J - 1 )*2* $ NMAX + I ) END IF 50 CONTINUE CALL ZMMCH( TRANST, 'N', LJ, 1, 2*K, $ ONE, AB( JJAB ), 2*NMAX, W, $ 2*NMAX, BETA, C( JJ, J ), $ NMAX, CT, G, CC( JC ), LDC, $ EPS, ERR, FATAL, NOUT, $ .TRUE. ) ELSE DO 60 I = 1, K IF( CONJ )THEN W( I ) = ALPHA*DCONJG( AB( ( K + $ I - 1 )*NMAX + J ) ) W( K + I ) = DCONJG( ALPHA* $ AB( ( I - 1 )*NMAX + $ J ) ) ELSE W( I ) = ALPHA*AB( ( K + I - 1 )* $ NMAX + J ) W( K + I ) = ALPHA* $ AB( ( I - 1 )*NMAX + $ J ) END IF 60 CONTINUE CALL ZMMCH( 'N', 'N', LJ, 1, 2*K, ONE, $ AB( JJ ), NMAX, W, 2*NMAX, $ BETA, C( JJ, J ), NMAX, CT, $ G, CC( JC ), LDC, EPS, ERR, $ FATAL, NOUT, .TRUE. ) END IF IF( UPPER )THEN JC = JC + LDC ELSE JC = JC + LDC + 1 IF( TRAN ) $ JJAB = JJAB + 2*NMAX END IF ERRMAX = MAX( ERRMAX, ERR ) * If got really bad answer, report and * return. IF( FATAL ) $ GO TO 140 70 CONTINUE END IF * 80 CONTINUE * 90 CONTINUE * 100 CONTINUE * 110 CONTINUE * 120 CONTINUE * 130 CONTINUE * * Report result. * IF( ERRMAX.LT.THRESH )THEN WRITE( NOUT, FMT = 9999 )SNAME, NC ELSE WRITE( NOUT, FMT = 9997 )SNAME, NC, ERRMAX END IF GO TO 160 * 140 CONTINUE IF( N.GT.1 ) $ WRITE( NOUT, FMT = 9995 )J * 150 CONTINUE WRITE( NOUT, FMT = 9996 )SNAME IF( CONJ )THEN WRITE( NOUT, FMT = 9994 )NC, SNAME, UPLO, TRANS, N, K, ALPHA, $ LDA, LDB, RBETA, LDC ELSE WRITE( NOUT, FMT = 9993 )NC, SNAME, UPLO, TRANS, N, K, ALPHA, $ LDA, LDB, BETA, LDC END IF * 160 CONTINUE RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE COMPUTATIONAL TESTS (', I6, ' CALL', $ 'S)' ) 9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH', $ 'ANGED INCORRECTLY *******' ) 9997 FORMAT( ' ', A6, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C', $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2, $ ' - SUSPECT *******' ) 9996 FORMAT( ' ******* ', A6, ' FAILED ON CALL NUMBER:' ) 9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) 9994 FORMAT( 1X, I6, ': ', A6, '(', 2( '''', A1, ''',' ), 2( I3, ',' ), $ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ',', F4.1, $ ', C,', I3, ') .' ) 9993 FORMAT( 1X, I6, ': ', A6, '(', 2( '''', A1, ''',' ), 2( I3, ',' ), $ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ',(', F4.1, $ ',', F4.1, '), C,', I3, ') .' ) 9992 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *', $ '******' ) * * End of ZCHK5. * END SUBROUTINE ZCHKE( ISNUM, SRNAMT, NOUT ) * * Tests the error exits from the Level 3 Blas. * Requires a special version of the error-handling routine XERBLA. * A, B and C should not need to be defined. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * 3-19-92: Initialize ALPHA, BETA, RALPHA, and RBETA (eca) * 3-19-92: Fix argument 12 in calls to ZSYMM and ZHEMM * with INFOT = 9 (eca) * 10-9-00: Declared INTRINSIC DCMPLX (susan) * * .. Scalar Arguments .. INTEGER ISNUM, NOUT CHARACTER*6 SRNAMT * .. Scalars in Common .. INTEGER INFOT, NOUTC LOGICAL LERR, OK * .. Parameters .. REAL ONE, TWO PARAMETER ( ONE = 1.0D0, TWO = 2.0D0 ) * .. Local Scalars .. COMPLEX*16 ALPHA, BETA DOUBLE PRECISION RALPHA, RBETA * .. Local Arrays .. COMPLEX*16 A( 2, 1 ), B( 2, 1 ), C( 2, 1 ) * .. External Subroutines .. EXTERNAL ZGEMM, ZHEMM, ZHER2K, ZHERK, CHKXER, ZSYMM, $ ZSYR2K, ZSYRK, ZTRMM, ZTRSM * .. Intrinsic Functions .. INTRINSIC DCMPLX * .. Common blocks .. COMMON /INFOC/INFOT, NOUTC, OK, LERR * .. Executable Statements .. * OK is set to .FALSE. by the special version of XERBLA or by CHKXER * if anything is wrong. OK = .TRUE. * LERR is set to .TRUE. by the special version of XERBLA each time * it is called, and is then tested and re-set by CHKXER. LERR = .FALSE. * * Initialize ALPHA, BETA, RALPHA, and RBETA. * ALPHA = DCMPLX( ONE, -ONE ) BETA = DCMPLX( TWO, -TWO ) RALPHA = ONE RBETA = TWO * GO TO ( 10, 20, 30, 40, 50, 60, 70, 80, $ 90 )ISNUM 10 INFOT = 1 CALL ZGEMM( '/', 'N', 0, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 1 CALL ZGEMM( '/', 'C', 0, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 1 CALL ZGEMM( '/', 'T', 0, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZGEMM( 'N', '/', 0, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZGEMM( 'C', '/', 0, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZGEMM( 'T', '/', 0, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZGEMM( 'N', 'N', -1, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZGEMM( 'N', 'C', -1, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZGEMM( 'N', 'T', -1, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZGEMM( 'C', 'N', -1, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZGEMM( 'C', 'C', -1, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZGEMM( 'C', 'T', -1, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZGEMM( 'T', 'N', -1, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZGEMM( 'T', 'C', -1, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZGEMM( 'T', 'T', -1, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZGEMM( 'N', 'N', 0, -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZGEMM( 'N', 'C', 0, -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZGEMM( 'N', 'T', 0, -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZGEMM( 'C', 'N', 0, -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZGEMM( 'C', 'C', 0, -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZGEMM( 'C', 'T', 0, -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZGEMM( 'T', 'N', 0, -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZGEMM( 'T', 'C', 0, -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZGEMM( 'T', 'T', 0, -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZGEMM( 'N', 'N', 0, 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZGEMM( 'N', 'C', 0, 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZGEMM( 'N', 'T', 0, 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZGEMM( 'C', 'N', 0, 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZGEMM( 'C', 'C', 0, 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZGEMM( 'C', 'T', 0, 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZGEMM( 'T', 'N', 0, 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZGEMM( 'T', 'C', 0, 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZGEMM( 'T', 'T', 0, 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL ZGEMM( 'N', 'N', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL ZGEMM( 'N', 'C', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL ZGEMM( 'N', 'T', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL ZGEMM( 'C', 'N', 0, 0, 2, ALPHA, A, 1, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL ZGEMM( 'C', 'C', 0, 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL ZGEMM( 'C', 'T', 0, 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL ZGEMM( 'T', 'N', 0, 0, 2, ALPHA, A, 1, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL ZGEMM( 'T', 'C', 0, 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 8 CALL ZGEMM( 'T', 'T', 0, 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL ZGEMM( 'N', 'N', 0, 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL ZGEMM( 'C', 'N', 0, 0, 2, ALPHA, A, 2, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL ZGEMM( 'T', 'N', 0, 0, 2, ALPHA, A, 2, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL ZGEMM( 'N', 'C', 0, 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL ZGEMM( 'C', 'C', 0, 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL ZGEMM( 'T', 'C', 0, 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL ZGEMM( 'N', 'T', 0, 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL ZGEMM( 'C', 'T', 0, 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL ZGEMM( 'T', 'T', 0, 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL ZGEMM( 'N', 'N', 2, 0, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL ZGEMM( 'N', 'C', 2, 0, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL ZGEMM( 'N', 'T', 2, 0, 0, ALPHA, A, 2, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL ZGEMM( 'C', 'N', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL ZGEMM( 'C', 'C', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL ZGEMM( 'C', 'T', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL ZGEMM( 'T', 'N', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL ZGEMM( 'T', 'C', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 13 CALL ZGEMM( 'T', 'T', 2, 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 100 20 INFOT = 1 CALL ZHEMM( '/', 'U', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZHEMM( 'L', '/', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZHEMM( 'L', 'U', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZHEMM( 'R', 'U', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZHEMM( 'L', 'L', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZHEMM( 'R', 'L', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZHEMM( 'L', 'U', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZHEMM( 'R', 'U', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZHEMM( 'L', 'L', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZHEMM( 'R', 'L', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZHEMM( 'L', 'U', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZHEMM( 'R', 'U', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZHEMM( 'L', 'L', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZHEMM( 'R', 'L', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZHEMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZHEMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZHEMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZHEMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL ZHEMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL ZHEMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL ZHEMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL ZHEMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 100 30 INFOT = 1 CALL ZSYMM( '/', 'U', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZSYMM( 'L', '/', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZSYMM( 'L', 'U', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZSYMM( 'R', 'U', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZSYMM( 'L', 'L', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZSYMM( 'R', 'L', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZSYMM( 'L', 'U', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZSYMM( 'R', 'U', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZSYMM( 'L', 'L', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZSYMM( 'R', 'L', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZSYMM( 'L', 'U', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZSYMM( 'R', 'U', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZSYMM( 'L', 'L', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZSYMM( 'R', 'L', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZSYMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZSYMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZSYMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL ZSYMM( 'L', 'U', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL ZSYMM( 'R', 'U', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL ZSYMM( 'L', 'L', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL ZSYMM( 'R', 'L', 2, 0, ALPHA, A, 1, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 100 40 INFOT = 1 CALL ZTRMM( '/', 'U', 'N', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZTRMM( 'L', '/', 'N', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZTRMM( 'L', 'U', '/', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZTRMM( 'L', 'U', 'N', '/', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZTRMM( 'L', 'U', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZTRMM( 'L', 'U', 'C', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZTRMM( 'L', 'U', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZTRMM( 'R', 'U', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZTRMM( 'R', 'U', 'C', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZTRMM( 'R', 'U', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZTRMM( 'L', 'L', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZTRMM( 'L', 'L', 'C', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZTRMM( 'L', 'L', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZTRMM( 'R', 'L', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZTRMM( 'R', 'L', 'C', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZTRMM( 'R', 'L', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZTRMM( 'L', 'U', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZTRMM( 'L', 'U', 'C', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZTRMM( 'L', 'U', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZTRMM( 'R', 'U', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZTRMM( 'R', 'U', 'C', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZTRMM( 'R', 'U', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZTRMM( 'L', 'L', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZTRMM( 'L', 'L', 'C', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZTRMM( 'L', 'L', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZTRMM( 'R', 'L', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZTRMM( 'R', 'L', 'C', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZTRMM( 'R', 'L', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZTRMM( 'L', 'U', 'N', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZTRMM( 'L', 'U', 'C', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZTRMM( 'L', 'U', 'T', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZTRMM( 'R', 'U', 'N', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZTRMM( 'R', 'U', 'C', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZTRMM( 'R', 'U', 'T', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZTRMM( 'L', 'L', 'N', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZTRMM( 'L', 'L', 'C', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZTRMM( 'L', 'L', 'T', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZTRMM( 'R', 'L', 'N', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZTRMM( 'R', 'L', 'C', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZTRMM( 'R', 'L', 'T', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL ZTRMM( 'L', 'U', 'N', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL ZTRMM( 'L', 'U', 'C', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL ZTRMM( 'L', 'U', 'T', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL ZTRMM( 'R', 'U', 'N', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL ZTRMM( 'R', 'U', 'C', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL ZTRMM( 'R', 'U', 'T', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL ZTRMM( 'L', 'L', 'N', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL ZTRMM( 'L', 'L', 'C', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL ZTRMM( 'L', 'L', 'T', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL ZTRMM( 'R', 'L', 'N', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL ZTRMM( 'R', 'L', 'C', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL ZTRMM( 'R', 'L', 'T', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 100 50 INFOT = 1 CALL ZTRSM( '/', 'U', 'N', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZTRSM( 'L', '/', 'N', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZTRSM( 'L', 'U', '/', 'N', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZTRSM( 'L', 'U', 'N', '/', 0, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZTRSM( 'L', 'U', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZTRSM( 'L', 'U', 'C', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZTRSM( 'L', 'U', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZTRSM( 'R', 'U', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZTRSM( 'R', 'U', 'C', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZTRSM( 'R', 'U', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZTRSM( 'L', 'L', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZTRSM( 'L', 'L', 'C', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZTRSM( 'L', 'L', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZTRSM( 'R', 'L', 'N', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZTRSM( 'R', 'L', 'C', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 5 CALL ZTRSM( 'R', 'L', 'T', 'N', -1, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZTRSM( 'L', 'U', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZTRSM( 'L', 'U', 'C', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZTRSM( 'L', 'U', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZTRSM( 'R', 'U', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZTRSM( 'R', 'U', 'C', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZTRSM( 'R', 'U', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZTRSM( 'L', 'L', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZTRSM( 'L', 'L', 'C', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZTRSM( 'L', 'L', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZTRSM( 'R', 'L', 'N', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZTRSM( 'R', 'L', 'C', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 6 CALL ZTRSM( 'R', 'L', 'T', 'N', 0, -1, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZTRSM( 'L', 'U', 'N', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZTRSM( 'L', 'U', 'C', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZTRSM( 'L', 'U', 'T', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZTRSM( 'R', 'U', 'N', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZTRSM( 'R', 'U', 'C', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZTRSM( 'R', 'U', 'T', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZTRSM( 'L', 'L', 'N', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZTRSM( 'L', 'L', 'C', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZTRSM( 'L', 'L', 'T', 'N', 2, 0, ALPHA, A, 1, B, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZTRSM( 'R', 'L', 'N', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZTRSM( 'R', 'L', 'C', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZTRSM( 'R', 'L', 'T', 'N', 0, 2, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL ZTRSM( 'L', 'U', 'N', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL ZTRSM( 'L', 'U', 'C', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL ZTRSM( 'L', 'U', 'T', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL ZTRSM( 'R', 'U', 'N', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL ZTRSM( 'R', 'U', 'C', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL ZTRSM( 'R', 'U', 'T', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL ZTRSM( 'L', 'L', 'N', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL ZTRSM( 'L', 'L', 'C', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL ZTRSM( 'L', 'L', 'T', 'N', 2, 0, ALPHA, A, 2, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL ZTRSM( 'R', 'L', 'N', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL ZTRSM( 'R', 'L', 'C', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 11 CALL ZTRSM( 'R', 'L', 'T', 'N', 2, 0, ALPHA, A, 1, B, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 100 60 INFOT = 1 CALL ZHERK( '/', 'N', 0, 0, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZHERK( 'U', 'T', 0, 0, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZHERK( 'U', 'N', -1, 0, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZHERK( 'U', 'C', -1, 0, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZHERK( 'L', 'N', -1, 0, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZHERK( 'L', 'C', -1, 0, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZHERK( 'U', 'N', 0, -1, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZHERK( 'U', 'C', 0, -1, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZHERK( 'L', 'N', 0, -1, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZHERK( 'L', 'C', 0, -1, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZHERK( 'U', 'N', 2, 0, RALPHA, A, 1, RBETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZHERK( 'U', 'C', 0, 2, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZHERK( 'L', 'N', 2, 0, RALPHA, A, 1, RBETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZHERK( 'L', 'C', 0, 2, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL ZHERK( 'U', 'N', 2, 0, RALPHA, A, 2, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL ZHERK( 'U', 'C', 2, 0, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL ZHERK( 'L', 'N', 2, 0, RALPHA, A, 2, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL ZHERK( 'L', 'C', 2, 0, RALPHA, A, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 100 70 INFOT = 1 CALL ZSYRK( '/', 'N', 0, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZSYRK( 'U', 'C', 0, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZSYRK( 'U', 'N', -1, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZSYRK( 'U', 'T', -1, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZSYRK( 'L', 'N', -1, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZSYRK( 'L', 'T', -1, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZSYRK( 'U', 'N', 0, -1, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZSYRK( 'U', 'T', 0, -1, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZSYRK( 'L', 'N', 0, -1, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZSYRK( 'L', 'T', 0, -1, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZSYRK( 'U', 'N', 2, 0, ALPHA, A, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZSYRK( 'U', 'T', 0, 2, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZSYRK( 'L', 'N', 2, 0, ALPHA, A, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZSYRK( 'L', 'T', 0, 2, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL ZSYRK( 'U', 'N', 2, 0, ALPHA, A, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL ZSYRK( 'U', 'T', 2, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL ZSYRK( 'L', 'N', 2, 0, ALPHA, A, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 10 CALL ZSYRK( 'L', 'T', 2, 0, ALPHA, A, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 100 80 INFOT = 1 CALL ZHER2K( '/', 'N', 0, 0, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZHER2K( 'U', 'T', 0, 0, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZHER2K( 'U', 'N', -1, 0, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZHER2K( 'U', 'C', -1, 0, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZHER2K( 'L', 'N', -1, 0, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZHER2K( 'L', 'C', -1, 0, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZHER2K( 'U', 'N', 0, -1, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZHER2K( 'U', 'C', 0, -1, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZHER2K( 'L', 'N', 0, -1, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZHER2K( 'L', 'C', 0, -1, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZHER2K( 'U', 'N', 2, 0, ALPHA, A, 1, B, 1, RBETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZHER2K( 'U', 'C', 0, 2, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZHER2K( 'L', 'N', 2, 0, ALPHA, A, 1, B, 1, RBETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZHER2K( 'L', 'C', 0, 2, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZHER2K( 'U', 'N', 2, 0, ALPHA, A, 2, B, 1, RBETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZHER2K( 'U', 'C', 0, 2, ALPHA, A, 2, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZHER2K( 'L', 'N', 2, 0, ALPHA, A, 2, B, 1, RBETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZHER2K( 'L', 'C', 0, 2, ALPHA, A, 2, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL ZHER2K( 'U', 'N', 2, 0, ALPHA, A, 2, B, 2, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL ZHER2K( 'U', 'C', 2, 0, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL ZHER2K( 'L', 'N', 2, 0, ALPHA, A, 2, B, 2, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL ZHER2K( 'L', 'C', 2, 0, ALPHA, A, 1, B, 1, RBETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) GO TO 100 90 INFOT = 1 CALL ZSYR2K( '/', 'N', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 2 CALL ZSYR2K( 'U', 'C', 0, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZSYR2K( 'U', 'N', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZSYR2K( 'U', 'T', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZSYR2K( 'L', 'N', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 3 CALL ZSYR2K( 'L', 'T', -1, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZSYR2K( 'U', 'N', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZSYR2K( 'U', 'T', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZSYR2K( 'L', 'N', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 4 CALL ZSYR2K( 'L', 'T', 0, -1, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZSYR2K( 'U', 'N', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZSYR2K( 'U', 'T', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZSYR2K( 'L', 'N', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 7 CALL ZSYR2K( 'L', 'T', 0, 2, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZSYR2K( 'U', 'N', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZSYR2K( 'U', 'T', 0, 2, ALPHA, A, 2, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZSYR2K( 'L', 'N', 2, 0, ALPHA, A, 2, B, 1, BETA, C, 2 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 9 CALL ZSYR2K( 'L', 'T', 0, 2, ALPHA, A, 2, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL ZSYR2K( 'U', 'N', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL ZSYR2K( 'U', 'T', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL ZSYR2K( 'L', 'N', 2, 0, ALPHA, A, 2, B, 2, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) INFOT = 12 CALL ZSYR2K( 'L', 'T', 2, 0, ALPHA, A, 1, B, 1, BETA, C, 1 ) CALL CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) * 100 IF( OK )THEN WRITE( NOUT, FMT = 9999 )SRNAMT ELSE WRITE( NOUT, FMT = 9998 )SRNAMT END IF RETURN * 9999 FORMAT( ' ', A6, ' PASSED THE TESTS OF ERROR-EXITS' ) 9998 FORMAT( ' ******* ', A6, ' FAILED THE TESTS OF ERROR-EXITS *****', $ '**' ) * * End of ZCHKE. * END SUBROUTINE ZMAKE( TYPE, UPLO, DIAG, M, N, A, NMAX, AA, LDA, RESET, $ TRANSL ) * * Generates values for an M by N matrix A. * Stores the values in the array AA in the data structure required * by the routine, with unwanted elements set to rogue value. * * TYPE is 'GE', 'HE', 'SY' or 'TR'. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. COMPLEX*16 ZERO, ONE PARAMETER ( ZERO = ( 0.0D0, 0.0D0 ), $ ONE = ( 1.0D0, 0.0D0 ) ) COMPLEX*16 ROGUE PARAMETER ( ROGUE = ( -1.0D10, 1.0D10 ) ) DOUBLE PRECISION RZERO PARAMETER ( RZERO = 0.0D0 ) DOUBLE PRECISION RROGUE PARAMETER ( RROGUE = -1.0D10 ) * .. Scalar Arguments .. COMPLEX*16 TRANSL INTEGER LDA, M, N, NMAX LOGICAL RESET CHARACTER*1 DIAG, UPLO CHARACTER*2 TYPE * .. Array Arguments .. COMPLEX*16 A( NMAX, * ), AA( * ) * .. Local Scalars .. INTEGER I, IBEG, IEND, J, JJ LOGICAL GEN, HER, LOWER, SYM, TRI, UNIT, UPPER * .. External Functions .. COMPLEX*16 ZBEG EXTERNAL ZBEG * .. Intrinsic Functions .. INTRINSIC DCMPLX, DCONJG, DBLE * .. Executable Statements .. GEN = TYPE.EQ.'GE' HER = TYPE.EQ.'HE' SYM = TYPE.EQ.'SY' TRI = TYPE.EQ.'TR' UPPER = ( HER.OR.SYM.OR.TRI ).AND.UPLO.EQ.'U' LOWER = ( HER.OR.SYM.OR.TRI ).AND.UPLO.EQ.'L' UNIT = TRI.AND.DIAG.EQ.'U' * * Generate data in array A. * DO 20 J = 1, N DO 10 I = 1, M IF( GEN.OR.( UPPER.AND.I.LE.J ).OR.( LOWER.AND.I.GE.J ) ) $ THEN A( I, J ) = ZBEG( RESET ) + TRANSL IF( I.NE.J )THEN * Set some elements to zero IF( N.GT.3.AND.J.EQ.N/2 ) $ A( I, J ) = ZERO IF( HER )THEN A( J, I ) = DCONJG( A( I, J ) ) ELSE IF( SYM )THEN A( J, I ) = A( I, J ) ELSE IF( TRI )THEN A( J, I ) = ZERO END IF END IF END IF 10 CONTINUE IF( HER ) $ A( J, J ) = DCMPLX( DBLE( A( J, J ) ), RZERO ) IF( TRI ) $ A( J, J ) = A( J, J ) + ONE IF( UNIT ) $ A( J, J ) = ONE 20 CONTINUE * * Store elements in array AS in data structure required by routine. * IF( TYPE.EQ.'GE' )THEN DO 50 J = 1, N DO 30 I = 1, M AA( I + ( J - 1 )*LDA ) = A( I, J ) 30 CONTINUE DO 40 I = M + 1, LDA AA( I + ( J - 1 )*LDA ) = ROGUE 40 CONTINUE 50 CONTINUE ELSE IF( TYPE.EQ.'HE'.OR.TYPE.EQ.'SY'.OR.TYPE.EQ.'TR' )THEN DO 90 J = 1, N IF( UPPER )THEN IBEG = 1 IF( UNIT )THEN IEND = J - 1 ELSE IEND = J END IF ELSE IF( UNIT )THEN IBEG = J + 1 ELSE IBEG = J END IF IEND = N END IF DO 60 I = 1, IBEG - 1 AA( I + ( J - 1 )*LDA ) = ROGUE 60 CONTINUE DO 70 I = IBEG, IEND AA( I + ( J - 1 )*LDA ) = A( I, J ) 70 CONTINUE DO 80 I = IEND + 1, LDA AA( I + ( J - 1 )*LDA ) = ROGUE 80 CONTINUE IF( HER )THEN JJ = J + ( J - 1 )*LDA AA( JJ ) = DCMPLX( DBLE( AA( JJ ) ), RROGUE ) END IF 90 CONTINUE END IF RETURN * * End of ZMAKE. * END SUBROUTINE ZMMCH( TRANSA, TRANSB, M, N, KK, ALPHA, A, LDA, B, LDB, $ BETA, C, LDC, CT, G, CC, LDCC, EPS, ERR, FATAL, $ NOUT, MV ) * * Checks the results of the computational tests. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Parameters .. COMPLEX*16 ZERO PARAMETER ( ZERO = ( 0.0D0, 0.0D0 ) ) DOUBLE PRECISION RZERO, RONE PARAMETER ( RZERO = 0.0D0, RONE = 1.0D0 ) * .. Scalar Arguments .. COMPLEX*16 ALPHA, BETA DOUBLE PRECISION EPS, ERR INTEGER KK, LDA, LDB, LDC, LDCC, M, N, NOUT LOGICAL FATAL, MV CHARACTER*1 TRANSA, TRANSB * .. Array Arguments .. COMPLEX*16 A( LDA, * ), B( LDB, * ), C( LDC, * ), $ CC( LDCC, * ), CT( * ) DOUBLE PRECISION G( * ) * .. Local Scalars .. COMPLEX*16 CL DOUBLE PRECISION ERRI INTEGER I, J, K LOGICAL CTRANA, CTRANB, TRANA, TRANB * .. Intrinsic Functions .. INTRINSIC ABS, DIMAG, DCONJG, MAX, DBLE, SQRT * .. Statement Functions .. DOUBLE PRECISION ABS1 * .. Statement Function definitions .. ABS1( CL ) = ABS( DBLE( CL ) ) + ABS( DIMAG( CL ) ) * .. Executable Statements .. TRANA = TRANSA.EQ.'T'.OR.TRANSA.EQ.'C' TRANB = TRANSB.EQ.'T'.OR.TRANSB.EQ.'C' CTRANA = TRANSA.EQ.'C' CTRANB = TRANSB.EQ.'C' * * Compute expected result, one column at a time, in CT using data * in A, B and C. * Compute gauges in G. * DO 220 J = 1, N * DO 10 I = 1, M CT( I ) = ZERO G( I ) = RZERO 10 CONTINUE IF( .NOT.TRANA.AND..NOT.TRANB )THEN DO 30 K = 1, KK DO 20 I = 1, M CT( I ) = CT( I ) + A( I, K )*B( K, J ) G( I ) = G( I ) + ABS1( A( I, K ) )*ABS1( B( K, J ) ) 20 CONTINUE 30 CONTINUE ELSE IF( TRANA.AND..NOT.TRANB )THEN IF( CTRANA )THEN DO 50 K = 1, KK DO 40 I = 1, M CT( I ) = CT( I ) + DCONJG( A( K, I ) )*B( K, J ) G( I ) = G( I ) + ABS1( A( K, I ) )* $ ABS1( B( K, J ) ) 40 CONTINUE 50 CONTINUE ELSE DO 70 K = 1, KK DO 60 I = 1, M CT( I ) = CT( I ) + A( K, I )*B( K, J ) G( I ) = G( I ) + ABS1( A( K, I ) )* $ ABS1( B( K, J ) ) 60 CONTINUE 70 CONTINUE END IF ELSE IF( .NOT.TRANA.AND.TRANB )THEN IF( CTRANB )THEN DO 90 K = 1, KK DO 80 I = 1, M CT( I ) = CT( I ) + A( I, K )*DCONJG( B( J, K ) ) G( I ) = G( I ) + ABS1( A( I, K ) )* $ ABS1( B( J, K ) ) 80 CONTINUE 90 CONTINUE ELSE DO 110 K = 1, KK DO 100 I = 1, M CT( I ) = CT( I ) + A( I, K )*B( J, K ) G( I ) = G( I ) + ABS1( A( I, K ) )* $ ABS1( B( J, K ) ) 100 CONTINUE 110 CONTINUE END IF ELSE IF( TRANA.AND.TRANB )THEN IF( CTRANA )THEN IF( CTRANB )THEN DO 130 K = 1, KK DO 120 I = 1, M CT( I ) = CT( I ) + DCONJG( A( K, I ) )* $ DCONJG( B( J, K ) ) G( I ) = G( I ) + ABS1( A( K, I ) )* $ ABS1( B( J, K ) ) 120 CONTINUE 130 CONTINUE ELSE DO 150 K = 1, KK DO 140 I = 1, M CT( I ) = CT( I ) + DCONJG( A( K, I ) )* $ B( J, K ) G( I ) = G( I ) + ABS1( A( K, I ) )* $ ABS1( B( J, K ) ) 140 CONTINUE 150 CONTINUE END IF ELSE IF( CTRANB )THEN DO 170 K = 1, KK DO 160 I = 1, M CT( I ) = CT( I ) + A( K, I )* $ DCONJG( B( J, K ) ) G( I ) = G( I ) + ABS1( A( K, I ) )* $ ABS1( B( J, K ) ) 160 CONTINUE 170 CONTINUE ELSE DO 190 K = 1, KK DO 180 I = 1, M CT( I ) = CT( I ) + A( K, I )*B( J, K ) G( I ) = G( I ) + ABS1( A( K, I ) )* $ ABS1( B( J, K ) ) 180 CONTINUE 190 CONTINUE END IF END IF END IF DO 200 I = 1, M CT( I ) = ALPHA*CT( I ) + BETA*C( I, J ) G( I ) = ABS1( ALPHA )*G( I ) + $ ABS1( BETA )*ABS1( C( I, J ) ) 200 CONTINUE * * Compute the error ratio for this result. * ERR = ZERO DO 210 I = 1, M ERRI = ABS1( CT( I ) - CC( I, J ) )/EPS IF( G( I ).NE.RZERO ) $ ERRI = ERRI/G( I ) ERR = MAX( ERR, ERRI ) IF( ERR*SQRT( EPS ).GE.RONE ) $ GO TO 230 210 CONTINUE * 220 CONTINUE * * If the loop completes, all results are at least half accurate. GO TO 250 * * Report fatal error. * 230 FATAL = .TRUE. WRITE( NOUT, FMT = 9999 ) DO 240 I = 1, M IF( MV )THEN WRITE( NOUT, FMT = 9998 )I, CT( I ), CC( I, J ) ELSE WRITE( NOUT, FMT = 9998 )I, CC( I, J ), CT( I ) END IF 240 CONTINUE IF( N.GT.1 ) $ WRITE( NOUT, FMT = 9997 )J * 250 CONTINUE RETURN * 9999 FORMAT( ' ******* FATAL ERROR - COMPUTED RESULT IS LESS THAN HAL', $ 'F ACCURATE *******', /' EXPECTED RE', $ 'SULT COMPUTED RESULT' ) 9998 FORMAT( 1X, I7, 2( ' (', G15.6, ',', G15.6, ')' ) ) 9997 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 ) * * End of ZMMCH. * END LOGICAL FUNCTION LZE( RI, RJ, LR ) * * Tests if two arrays are identical. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Scalar Arguments .. INTEGER LR * .. Array Arguments .. COMPLEX*16 RI( * ), RJ( * ) * .. Local Scalars .. INTEGER I * .. Executable Statements .. DO 10 I = 1, LR IF( RI( I ).NE.RJ( I ) ) $ GO TO 20 10 CONTINUE LZE = .TRUE. GO TO 30 20 CONTINUE LZE = .FALSE. 30 RETURN * * End of LZE. * END LOGICAL FUNCTION LZERES( TYPE, UPLO, M, N, AA, AS, LDA ) * * Tests if selected elements in two arrays are equal. * * TYPE is 'GE' or 'HE' or 'SY'. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Scalar Arguments .. INTEGER LDA, M, N CHARACTER*1 UPLO CHARACTER*2 TYPE * .. Array Arguments .. COMPLEX*16 AA( LDA, * ), AS( LDA, * ) * .. Local Scalars .. INTEGER I, IBEG, IEND, J LOGICAL UPPER * .. Executable Statements .. UPPER = UPLO.EQ.'U' IF( TYPE.EQ.'GE' )THEN DO 20 J = 1, N DO 10 I = M + 1, LDA IF( AA( I, J ).NE.AS( I, J ) ) $ GO TO 70 10 CONTINUE 20 CONTINUE ELSE IF( TYPE.EQ.'HE'.OR.TYPE.EQ.'SY' )THEN DO 50 J = 1, N IF( UPPER )THEN IBEG = 1 IEND = J ELSE IBEG = J IEND = N END IF DO 30 I = 1, IBEG - 1 IF( AA( I, J ).NE.AS( I, J ) ) $ GO TO 70 30 CONTINUE DO 40 I = IEND + 1, LDA IF( AA( I, J ).NE.AS( I, J ) ) $ GO TO 70 40 CONTINUE 50 CONTINUE END IF * LZERES = .TRUE. GO TO 80 70 CONTINUE LZERES = .FALSE. 80 RETURN * * End of LZERES. * END COMPLEX*16 FUNCTION ZBEG( RESET ) * * Generates complex numbers as pairs of random numbers uniformly * distributed between -0.5 and 0.5. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Scalar Arguments .. LOGICAL RESET * .. Local Scalars .. INTEGER I, IC, J, MI, MJ * .. Save statement .. SAVE I, IC, J, MI, MJ * .. Intrinsic Functions .. INTRINSIC DCMPLX * .. Executable Statements .. IF( RESET )THEN * Initialize local variables. MI = 891 MJ = 457 I = 7 J = 7 IC = 0 RESET = .FALSE. END IF * * The sequence of values of I or J is bounded between 1 and 999. * If initial I or J = 1,2,3,6,7 or 9, the period will be 50. * If initial I or J = 4 or 8, the period will be 25. * If initial I or J = 5, the period will be 10. * IC is used to break up the period by skipping 1 value of I or J * in 6. * IC = IC + 1 10 I = I*MI J = J*MJ I = I - 1000*( I/1000 ) J = J - 1000*( J/1000 ) IF( IC.GE.5 )THEN IC = 0 GO TO 10 END IF ZBEG = DCMPLX( ( I - 500 )/1001.0D0, ( J - 500 )/1001.0D0 ) RETURN * * End of ZBEG. * END DOUBLE PRECISION FUNCTION DDIFF( X, Y ) * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Scalar Arguments .. DOUBLE PRECISION X, Y * .. Executable Statements .. DDIFF = X - Y RETURN * * End of DDIFF. * END SUBROUTINE CHKXER( SRNAMT, INFOT, NOUT, LERR, OK ) * * Tests whether XERBLA has detected an error when it should. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Scalar Arguments .. INTEGER INFOT, NOUT LOGICAL LERR, OK CHARACTER*6 SRNAMT * .. Executable Statements .. IF( .NOT.LERR )THEN WRITE( NOUT, FMT = 9999 )INFOT, SRNAMT OK = .FALSE. END IF LERR = .FALSE. RETURN * 9999 FORMAT( ' ***** ILLEGAL VALUE OF PARAMETER NUMBER ', I2, ' NOT D', $ 'ETECTED BY ', A6, ' *****' ) * * End of CHKXER. * END SUBROUTINE XERBLA( SRNAME, INFO ) * * This is a special version of XERBLA to be used only as part of * the test program for testing error exits from the Level 3 BLAS * routines. * * XERBLA is an error handler for the Level 3 BLAS routines. * * It is called by the Level 3 BLAS routines if an input parameter is * invalid. * * Auxiliary routine for test program for Level 3 Blas. * * -- Written on 8-February-1989. * Jack Dongarra, Argonne National Laboratory. * Iain Duff, AERE Harwell. * Jeremy Du Croz, Numerical Algorithms Group Ltd. * Sven Hammarling, Numerical Algorithms Group Ltd. * * .. Scalar Arguments .. INTEGER INFO CHARACTER*6 SRNAME * .. Scalars in Common .. INTEGER INFOT, NOUT LOGICAL LERR, OK CHARACTER*6 SRNAMT * .. Common blocks .. COMMON /INFOC/INFOT, NOUT, OK, LERR COMMON /SRNAMC/SRNAMT * .. Executable Statements .. LERR = .TRUE. IF( INFO.NE.INFOT )THEN IF( INFOT.NE.0 )THEN WRITE( NOUT, FMT = 9999 )INFO, INFOT ELSE WRITE( NOUT, FMT = 9997 )INFO END IF OK = .FALSE. END IF IF( SRNAME.NE.SRNAMT )THEN WRITE( NOUT, FMT = 9998 )SRNAME, SRNAMT OK = .FALSE. END IF RETURN * 9999 FORMAT( ' ******* XERBLA WAS CALLED WITH INFO = ', I6, ' INSTEAD', $ ' OF ', I2, ' *******' ) 9998 FORMAT( ' ******* XERBLA WAS CALLED WITH SRNAME = ', A6, ' INSTE', $ 'AD OF ', A6, ' *******' ) 9997 FORMAT( ' ******* XERBLA WAS CALLED WITH INFO = ', I6, $ ' *******' ) * * End of XERBLA * END blis-0.6.1/blastest/src/sblat1.c000066400000000000000000001133361360743507500164320ustar00rootroot00000000000000/* sblat1.f -- translated by f2c (version 20100827). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" /* Common Block Declarations */ struct { integer icase, n, incx, incy; logical pass; } combla_; #define combla_1 combla_ /* Table of constant values */ static integer c__1 = 1; static integer c__9 = 9; static real c_b35 = 1.f; static real c_b39 = .1f; static integer c__5 = 5; static real c_b63 = 0.f; /* > \brief \b SBLAT1 */ /* =========== DOCUMENTATION =========== */ /* Online html documentation available at */ /* http://www.netlib.org/lapack/explore-html/ */ /* Definition: */ /* =========== */ /* PROGRAM SBLAT1 */ /* > \par Purpose: */ /* ============= */ /* > */ /* > \verbatim */ /* > */ /* > Test program for the REAL Level 1 BLAS. */ /* > */ /* > Based upon the original BLAS test routine together with: */ /* > F06EAF Example Program Text */ /* > \endverbatim */ /* Authors: */ /* ======== */ /* > \author Univ. of Tennessee */ /* > \author Univ. of California Berkeley */ /* > \author Univ. of Colorado Denver */ /* > \author NAG Ltd. */ /* > \date April 2012 */ /* > \ingroup single_blas_testing */ /* ===================================================================== */ /* Main program */ int main(void) { /* Initialized data */ static real sfac = 9.765625e-4f; /* Format strings */ static char fmt_99999[] = "(\002 Real BLAS Test Program Results\002,/1x)"; static char fmt_99998[] = "(\002 ----" "- PASS -----\002)"; /* Builtin functions */ integer s_wsfe(cilist *), e_wsfe(void); /* Subroutine */ int s_stop(char *, ftnlen); /* Local variables */ integer ic; extern /* Subroutine */ int check0_(real *), check1_(real *), check2_( real *), check3_(real *), header_(void); /* Fortran I/O blocks */ static cilist io___2 = { 0, 6, 0, fmt_99999, 0 }; static cilist io___4 = { 0, 6, 0, fmt_99998, 0 }; /* -- Reference BLAS test routine (version 3.4.1) -- */ /* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* April 2012 */ /* ===================================================================== */ /* .. Parameters .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. External Subroutines .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* .. Executable Statements .. */ s_wsfe(&io___2); e_wsfe(); for (ic = 1; ic <= 13; ++ic) { combla_1.icase = ic; header_(); /* .. Initialize PASS, INCX, and INCY for a new case. .. */ /* .. the value 9999 for INCX or INCY will appear in the .. */ /* .. detailed output, if any, for cases that do not involve .. */ /* .. these parameters .. */ combla_1.pass = TRUE_; combla_1.incx = 9999; combla_1.incy = 9999; if (combla_1.icase == 3 || combla_1.icase == 11) { check0_(&sfac); } else if (combla_1.icase == 7 || combla_1.icase == 8 || combla_1.icase == 9 || combla_1.icase == 10) { check1_(&sfac); } else if (combla_1.icase == 1 || combla_1.icase == 2 || combla_1.icase == 5 || combla_1.icase == 6 || combla_1.icase == 12 || combla_1.icase == 13) { check2_(&sfac); } else if (combla_1.icase == 4) { check3_(&sfac); } /* -- Print */ if (combla_1.pass) { s_wsfe(&io___4); e_wsfe(); } /* L20: */ } s_stop("", (ftnlen)0); return 0; } /* main */ /* Subroutine */ int header_(void) { /* Initialized data */ static char l[6*13] = " SDOT " "SAXPY " "SROTG " " SROT " "SCOPY " "SSWA" "P " "SNRM2 " "SASUM " "SSCAL " "ISAMAX" "SROTMG" "SROTM " "SDSDOT" ; /* Format strings */ static char fmt_99999[] = "(/\002 Test of subprogram number\002,i3,12x,a" "6)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Fortran I/O blocks */ static cilist io___6 = { 0, 6, 0, fmt_99999, 0 }; /* .. Parameters .. */ /* .. Scalars in Common .. */ /* .. Local Arrays .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* .. Executable Statements .. */ s_wsfe(&io___6); do_fio(&c__1, (char *)&combla_1.icase, (ftnlen)sizeof(integer)); do_fio(&c__1, l + (0 + (0 + (combla_1.icase - 1) * 6)), (ftnlen)6); e_wsfe(); return 0; } /* header_ */ /* Subroutine */ int check0_(real *sfac) { /* Initialized data */ static real ds1[8] = { .8f,.6f,.8f,-.6f,.8f,0.f,1.f,0.f }; static real datrue[8] = { .5f,.5f,.5f,-.5f,-.5f,0.f,1.f,1.f }; static real dbtrue[8] = { 0.f,.6f,0.f,-.6f,0.f,0.f,1.f,0.f }; static real dab[36] /* was [4][9] */ = { .1f,.3f,1.2f,.2f,.7f,.2f,.6f, 4.2f,0.f,0.f,0.f,0.f,4.f,-1.f,2.f,4.f,6e-10f,.02f,1e5f,10.f,4e10f, .02f,1e-5f,10.f,2e-10f,.04f,1e5f,10.f,2e10f,.04f,1e-5f,10.f,4.f, -2.f,8.f,4.f }; static real dtrue[81] /* was [9][9] */ = { 0.f,0.f,1.3f,.2f,0.f,0.f, 0.f,.5f,0.f,0.f,0.f,4.5f,4.2f,1.f,.5f,0.f,0.f,0.f,0.f,0.f,0.f,0.f, -2.f,0.f,0.f,0.f,0.f,0.f,0.f,0.f,4.f,-1.f,0.f,0.f,0.f,0.f,0.f, .015f,0.f,10.f,-1.f,0.f,-1e-4f,0.f,1.f,0.f,0.f,.06144f,10.f,-1.f, 4096.f,-1e6f,0.f,1.f,0.f,0.f,15.f,10.f,-1.f,5e-5f,0.f,1.f,0.f,0.f, 0.f,15.f,10.f,-1.f,5e5f,-4096.f,1.f,.004096f,0.f,0.f,7.f,4.f,0.f, 0.f,-.5f,-.25f,0.f }; static real d12 = 4096.f; static real da1[8] = { .3f,.4f,-.3f,-.4f,-.3f,0.f,0.f,1.f }; static real db1[8] = { .4f,.3f,.4f,.3f,-.4f,0.f,1.f,0.f }; static real dc1[8] = { .6f,.8f,-.6f,.8f,.6f,1.f,0.f,1.f }; /* Builtin functions */ integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_wsle(void); /* Subroutine */ int s_stop(char *, ftnlen); /* Local variables */ integer i__, k; real sa, sb, sc, ss, dtemp[9]; extern /* Subroutine */ int srotg_(real *, real *, real *, real *), stest_(integer *, real *, real *, real *, real *), stest1_(real *, real *, real *, real *), srotmg_(real *, real *, real *, real *, real *); /* Fortran I/O blocks */ static cilist io___23 = { 0, 6, 0, 0, 0 }; /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Subroutines .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* INPUT FOR MODIFIED GIVENS */ /* TRUE RESULTS FOR MODIFIED GIVENS */ /* 4096 = 2 ** 12 */ dtrue[0] = .092307692307692313f; dtrue[1] = .27692307692307694f; dtrue[6] = -.16666666666666666f; dtrue[9] = .18666666666666668f; dtrue[10] = .65333333333333332f; dtrue[17] = .14285714285714285f; dtrue[36] = d12 * d12 * 4.5e-10f; dtrue[38] = 4e5f / (d12 * 3.f); dtrue[41] = 1.f / d12; dtrue[43] = 1e4f / (d12 * 3.f); dtrue[45] = 4e10f / (d12 * 1.5f * d12); dtrue[46] = .013333333333333334f; dtrue[52] = d12 * 5e-7f; dtrue[54] = .026666666666666668f; dtrue[55] = d12 * d12 * 1.3333333333333334e-10f; dtrue[60] = -dtrue[41]; dtrue[62] = 1e4f / d12; dtrue[63] = dtrue[54]; dtrue[64] = 2e10f / (d12 * 1.5f * d12); dtrue[72] = 4.5714285714285712f; dtrue[73] = -2.2857142857142856f; /* .. Executable Statements .. */ /* Compute true values which cannot be prestored */ /* in decimal notation */ dbtrue[0] = 1.6666666666666667f; dbtrue[2] = -1.6666666666666667f; dbtrue[4] = 1.6666666666666667f; for (k = 1; k <= 8; ++k) { /* .. Set N=K for identification in output if any .. */ combla_1.n = k; if (combla_1.icase == 3) { /* .. SROTG .. */ if (k > 8) { goto L40; } sa = da1[k - 1]; sb = db1[k - 1]; srotg_(&sa, &sb, &sc, &ss); stest1_(&sa, &datrue[k - 1], &datrue[k - 1], sfac); stest1_(&sb, &dbtrue[k - 1], &dbtrue[k - 1], sfac); stest1_(&sc, &dc1[k - 1], &dc1[k - 1], sfac); stest1_(&ss, &ds1[k - 1], &ds1[k - 1], sfac); } else if (combla_1.icase == 11) { /* .. SROTMG .. */ for (i__ = 1; i__ <= 4; ++i__) { dtemp[i__ - 1] = dab[i__ + (k << 2) - 5]; dtemp[i__ + 3] = 0.f; } dtemp[8] = 0.f; srotmg_(dtemp, &dtemp[1], &dtemp[2], &dtemp[3], &dtemp[4]); stest_(&c__9, dtemp, &dtrue[k * 9 - 9], &dtrue[k * 9 - 9], sfac); } else { s_wsle(&io___23); do_lio(&c__9, &c__1, " Shouldn't be here in CHECK0", (ftnlen)28); e_wsle(); s_stop("", (ftnlen)0); } /* L20: */ } L40: return 0; } /* check0_ */ /* Subroutine */ int check1_(real *sfac) { /* Initialized data */ static real sa[10] = { .3f,-1.f,0.f,1.f,.3f,.3f,.3f,.3f,.3f,.3f }; static real dv[80] /* was [8][5][2] */ = { .1f,2.f,2.f,2.f,2.f,2.f,2.f, 2.f,.3f,3.f,3.f,3.f,3.f,3.f,3.f,3.f,.3f,-.4f,4.f,4.f,4.f,4.f,4.f, 4.f,.2f,-.6f,.3f,5.f,5.f,5.f,5.f,5.f,.1f,-.3f,.5f,-.1f,6.f,6.f, 6.f,6.f,.1f,8.f,8.f,8.f,8.f,8.f,8.f,8.f,.3f,9.f,9.f,9.f,9.f,9.f, 9.f,9.f,.3f,2.f,-.4f,2.f,2.f,2.f,2.f,2.f,.2f,3.f,-.6f,5.f,.3f,2.f, 2.f,2.f,.1f,4.f,-.3f,6.f,-.5f,7.f,-.1f,3.f }; static real dtrue1[5] = { 0.f,.3f,.5f,.7f,.6f }; static real dtrue3[5] = { 0.f,.3f,.7f,1.1f,1.f }; static real dtrue5[80] /* was [8][5][2] */ = { .1f,2.f,2.f,2.f,2.f, 2.f,2.f,2.f,-.3f,3.f,3.f,3.f,3.f,3.f,3.f,3.f,0.f,0.f,4.f,4.f,4.f, 4.f,4.f,4.f,.2f,-.6f,.3f,5.f,5.f,5.f,5.f,5.f,.03f,-.09f,.15f, -.03f,6.f,6.f,6.f,6.f,.1f,8.f,8.f,8.f,8.f,8.f,8.f,8.f,.09f,9.f, 9.f,9.f,9.f,9.f,9.f,9.f,.09f,2.f,-.12f,2.f,2.f,2.f,2.f,2.f,.06f, 3.f,-.18f,5.f,.09f,2.f,2.f,2.f,.03f,4.f,-.09f,6.f,-.15f,7.f,-.03f, 3.f }; static integer itrue2[5] = { 0,1,2,2,3 }; /* System generated locals */ integer i__1; real r__1; /* Builtin functions */ integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_wsle(void); /* Subroutine */ int s_stop(char *, ftnlen); /* Local variables */ integer i__; real sx[8]; integer np1, len; extern real snrm2_(integer *, real *, integer *); extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); real stemp[1]; extern real sasum_(integer *, real *, integer *); real strue[8]; extern /* Subroutine */ int stest_(integer *, real *, real *, real *, real *), itest1_(integer *, integer *), stest1_(real *, real *, real *, real *); extern integer isamax_(integer *, real *, integer *); /* Fortran I/O blocks */ static cilist io___36 = { 0, 6, 0, 0, 0 }; /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* .. Executable Statements .. */ for (combla_1.incx = 1; combla_1.incx <= 2; ++combla_1.incx) { for (np1 = 1; np1 <= 5; ++np1) { combla_1.n = np1 - 1; len = max(combla_1.n,1) << 1; /* .. Set vector arguments .. */ i__1 = len; for (i__ = 1; i__ <= i__1; ++i__) { sx[i__ - 1] = dv[i__ + (np1 + combla_1.incx * 5 << 3) - 49]; /* L20: */ } if (combla_1.icase == 7) { /* .. SNRM2 .. */ stemp[0] = dtrue1[np1 - 1]; r__1 = snrm2_(&combla_1.n, sx, &combla_1.incx); stest1_(&r__1, stemp, stemp, sfac); } else if (combla_1.icase == 8) { /* .. SASUM .. */ stemp[0] = dtrue3[np1 - 1]; r__1 = sasum_(&combla_1.n, sx, &combla_1.incx); stest1_(&r__1, stemp, stemp, sfac); } else if (combla_1.icase == 9) { /* .. SSCAL .. */ sscal_(&combla_1.n, &sa[(combla_1.incx - 1) * 5 + np1 - 1], sx, &combla_1.incx); i__1 = len; for (i__ = 1; i__ <= i__1; ++i__) { strue[i__ - 1] = dtrue5[i__ + (np1 + combla_1.incx * 5 << 3) - 49]; /* L40: */ } stest_(&len, sx, strue, strue, sfac); } else if (combla_1.icase == 10) { /* .. ISAMAX .. */ i__1 = isamax_(&combla_1.n, sx, &combla_1.incx); itest1_(&i__1, &itrue2[np1 - 1]); } else { s_wsle(&io___36); do_lio(&c__9, &c__1, " Shouldn't be here in CHECK1", (ftnlen) 28); e_wsle(); s_stop("", (ftnlen)0); } /* L60: */ } /* L80: */ } return 0; } /* check1_ */ /* Subroutine */ int check2_(real *sfac) { /* Initialized data */ static real sa = .3f; static integer incxs[4] = { 1,2,-2,-1 }; static integer incys[4] = { 1,-2,1,-2 }; static integer lens[8] /* was [4][2] */ = { 1,1,2,4,1,1,3,7 }; static integer ns[4] = { 0,1,2,4 }; static real dx1[7] = { .6f,.1f,-.5f,.8f,.9f,-.3f,-.4f }; static real dy1[7] = { .5f,-.9f,.3f,.7f,-.6f,.2f,.8f }; static real dt7[16] /* was [4][4] */ = { 0.f,.3f,.21f,.62f,0.f,.3f,-.07f, .85f,0.f,.3f,-.79f,-.74f,0.f,.3f,.33f,1.27f }; static real st7b[16] /* was [4][4] */ = { .1f,.4f,.31f,.72f,.1f, .4f,.03f,.95f,.1f,.4f,-.69f,-.64f,.1f,.4f,.43f,1.37f }; static real dt8[112] /* was [7][4][4] */ = { .5f,0.f,0.f,0.f,0.f, 0.f,0.f,.68f,0.f,0.f,0.f,0.f,0.f,0.f,.68f,-.87f,0.f,0.f,0.f,0.f, 0.f,.68f,-.87f,.15f,.94f,0.f,0.f,0.f,.5f,0.f,0.f,0.f,0.f,0.f,0.f, .68f,0.f,0.f,0.f,0.f,0.f,0.f,.35f,-.9f,.48f,0.f,0.f,0.f,0.f,.38f, -.9f,.57f,.7f,-.75f,.2f,.98f,.5f,0.f,0.f,0.f,0.f,0.f,0.f,.68f,0.f, 0.f,0.f,0.f,0.f,0.f,.35f,-.72f,0.f,0.f,0.f,0.f,0.f,.38f,-.63f, .15f,.88f,0.f,0.f,0.f,.5f,0.f,0.f,0.f,0.f,0.f,0.f,.68f,0.f,0.f, 0.f,0.f,0.f,0.f,.68f,-.9f,.33f,0.f,0.f,0.f,0.f,.68f,-.9f,.33f,.7f, -.75f,.2f,1.04f }; static real dt10x[112] /* was [7][4][4] */ = { .6f,0.f,0.f,0.f,0.f, 0.f,0.f,.5f,0.f,0.f,0.f,0.f,0.f,0.f,.5f,-.9f,0.f,0.f,0.f,0.f,0.f, .5f,-.9f,.3f,.7f,0.f,0.f,0.f,.6f,0.f,0.f,0.f,0.f,0.f,0.f,.5f,0.f, 0.f,0.f,0.f,0.f,0.f,.3f,.1f,.5f,0.f,0.f,0.f,0.f,.8f,.1f,-.6f,.8f, .3f,-.3f,.5f,.6f,0.f,0.f,0.f,0.f,0.f,0.f,.5f,0.f,0.f,0.f,0.f,0.f, 0.f,-.9f,.1f,.5f,0.f,0.f,0.f,0.f,.7f,.1f,.3f,.8f,-.9f,-.3f,.5f, .6f,0.f,0.f,0.f,0.f,0.f,0.f,.5f,0.f,0.f,0.f,0.f,0.f,0.f,.5f,.3f, 0.f,0.f,0.f,0.f,0.f,.5f,.3f,-.6f,.8f,0.f,0.f,0.f }; static real dt10y[112] /* was [7][4][4] */ = { .5f,0.f,0.f,0.f,0.f, 0.f,0.f,.6f,0.f,0.f,0.f,0.f,0.f,0.f,.6f,.1f,0.f,0.f,0.f,0.f,0.f, .6f,.1f,-.5f,.8f,0.f,0.f,0.f,.5f,0.f,0.f,0.f,0.f,0.f,0.f,.6f,0.f, 0.f,0.f,0.f,0.f,0.f,-.5f,-.9f,.6f,0.f,0.f,0.f,0.f,-.4f,-.9f,.9f, .7f,-.5f,.2f,.6f,.5f,0.f,0.f,0.f,0.f,0.f,0.f,.6f,0.f,0.f,0.f,0.f, 0.f,0.f,-.5f,.6f,0.f,0.f,0.f,0.f,0.f,-.4f,.9f,-.5f,.6f,0.f,0.f, 0.f,.5f,0.f,0.f,0.f,0.f,0.f,0.f,.6f,0.f,0.f,0.f,0.f,0.f,0.f,.6f, -.9f,.1f,0.f,0.f,0.f,0.f,.6f,-.9f,.1f,.7f,-.5f,.2f,.8f }; static real ssize1[4] = { 0.f,.3f,1.6f,3.2f }; static real ssize2[28] /* was [14][2] */ = { 0.f,0.f,0.f,0.f,0.f,0.f, 0.f,0.f,0.f,0.f,0.f,0.f,0.f,0.f,1.17f,1.17f,1.17f,1.17f,1.17f, 1.17f,1.17f,1.17f,1.17f,1.17f,1.17f,1.17f,1.17f,1.17f }; static real ssize3[4] = { .1f,.4f,1.7f,3.3f }; static real dpar[20] /* was [5][4] */ = { -2.f,0.f,0.f,0.f,0.f, -1.f,2.f,-3.f,-4.f,5.f,0.f,0.f,2.f,-3.f,0.f,1.f,5.f,2.f,0.f,-4.f } ; static struct { real e_1[448]; } equiv_3 = {{ .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, -.8f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, -.9f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 3.5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, .1f, 0.f, 0.f, 0.f, 0.f, 0.f, -.8f, 3.8f, 0.f, 0.f, 0.f, 0.f, 0.f, -.9f, 2.8f, 0.f, 0.f, 0.f, 0.f, 0.f, 3.5f, -.4f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, .1f, -.5f, .8f, 0.f, 0.f, 0.f, -.8f, 3.8f, -2.2f, -1.2f, 0.f, 0.f, 0.f, -.9f, 2.8f, -1.4f, -1.3f, 0.f, 0.f, 0.f, 3.5f, -.4f, -2.2f, 4.7f, 0.f, 0.f, 0.f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, -.8f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, -.9f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 3.5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, .1f, -.5f, 0.f, 0.f, 0.f, 0.f, 0.f, .1f, -3.f, 0.f, 0.f, 0.f, 0.f, -.3f, .1f, -2.f, 0.f, 0.f, 0.f, 0.f, 3.3f, .1f, -2.f, 0.f, 0.f, 0.f, 0.f, .6f, .1f, -.5f, .8f, .9f, -.3f, -.4f, -2.f, .1f, 1.4f, .8f, .6f, -.3f, -2.8f, -1.8f, .1f, 1.3f, .8f, 0.f, -.3f, -1.9f, 3.8f, .1f, -3.1f, .8f, 4.8f, -.3f, -1.5f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, -.8f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, -.9f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 3.5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, .1f, -.5f, 0.f, 0.f, 0.f, 0.f, 4.8f, .1f, -3.f, 0.f, 0.f, 0.f, 0.f, 3.3f, .1f, -2.f, 0.f, 0.f, 0.f, 0.f, 2.1f, .1f, -2.f, 0.f, 0.f, 0.f, 0.f, .6f, .1f, -.5f, .8f, .9f, -.3f, -.4f, -1.6f, .1f, -2.2f, .8f, 5.4f, -.3f, -2.8f, -1.5f, .1f, -1.4f, .8f, 3.6f, -.3f, -1.9f, 3.7f, .1f, -2.2f, .8f, 3.6f, -.3f, -1.5f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, -.8f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, -.9f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 3.5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, .1f, 0.f, 0.f, 0.f, 0.f, 0.f, -.8f, -1.f, 0.f, 0.f, 0.f, 0.f, 0.f, -.9f, -.8f, 0.f, 0.f, 0.f, 0.f, 0.f, 3.5f, .8f, 0.f, 0.f, 0.f, 0.f, 0.f, .6f, .1f, -.5f, .8f, 0.f, 0.f, 0.f, -.8f, -1.f, 1.4f, -1.6f, 0.f, 0.f, 0.f, -.9f, -.8f, 1.3f, -1.6f, 0.f, 0.f, 0.f, 3.5f, .8f, -3.1f, 4.8f, 0.f, 0.f, 0.f }}; static struct { real e_1[448]; } equiv_7 = {{ .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .7f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 1.7f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, -2.6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, -.9f, 0.f, 0.f, 0.f, 0.f, 0.f, .7f, -4.8f, 0.f, 0.f, 0.f, 0.f, 0.f, 1.7f, -.7f, 0.f, 0.f, 0.f, 0.f, 0.f, -2.6f, 3.5f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, -.9f, .3f, .7f, 0.f, 0.f, 0.f, .7f, -4.8f, 3.f, 1.1f, 0.f, 0.f, 0.f, 1.7f, -.7f, -.7f, 2.3f, 0.f, 0.f, 0.f, -2.6f, 3.5f, -.7f, -3.6f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .7f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 1.7f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, -2.6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, -.9f, .3f, 0.f, 0.f, 0.f, 0.f, 4.f, -.9f, -.3f, 0.f, 0.f, 0.f, 0.f, -.5f, -.9f, 1.5f, 0.f, 0.f, 0.f, 0.f, -1.5f, -.9f, -1.8f, 0.f, 0.f, 0.f, 0.f, .5f, -.9f, .3f, .7f, -.6f, .2f, .8f, 3.7f, -.9f, -1.2f, .7f, -1.5f, .2f, 2.2f, -.3f, -.9f, 2.1f, .7f, -1.6f, .2f, 2.f, -1.6f, -.9f, -2.1f, .7f, 2.9f, .2f, -3.8f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .7f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 1.7f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, -2.6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, -.9f, 0.f, 0.f, 0.f, 0.f, 0.f, 4.f, -6.3f, 0.f, 0.f, 0.f, 0.f, 0.f, -.5f, .3f, 0.f, 0.f, 0.f, 0.f, 0.f, -1.5f, 3.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, -.9f, .3f, .7f, 0.f, 0.f, 0.f, 3.7f, -7.2f, 3.f, 1.7f, 0.f, 0.f, 0.f, -.3f, .9f, -.7f, 1.9f, 0.f, 0.f, 0.f, -1.6f, 2.7f, -.7f, -3.4f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .7f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 1.7f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, -2.6f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, .5f, -.9f, .3f, 0.f, 0.f, 0.f, 0.f, .7f, -.9f, 1.2f, 0.f, 0.f, 0.f, 0.f, 1.7f, -.9f, .5f, 0.f, 0.f, 0.f, 0.f, -2.6f, -.9f, -1.3f, 0.f, 0.f, 0.f, 0.f, .5f, -.9f, .3f, .7f, -.6f, .2f, .8f, .7f, -.9f, 1.2f, .7f, -1.5f, .2f, 1.6f, 1.7f, -.9f, .5f, .7f, -1.6f, .2f, 2.4f, -2.6f, -.9f, -1.3f, .7f, 2.9f, .2f, -4.f }}; /* System generated locals */ integer i__1; real r__1; /* Builtin functions */ integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_wsle(void); /* Subroutine */ int s_stop(char *, ftnlen); /* Local variables */ integer i__, j, ki, kn, mx, my; real sx[7], sy[7]; integer kni; real stx[7], sty[7]; integer kpar, lenx, leny; #define dt19x ((real *)&equiv_3) #define dt19y ((real *)&equiv_7) extern real sdot_(integer *, real *, integer *, real *, integer *); real dtemp[5]; #define dt19xa ((real *)&equiv_3) #define dt19xb ((real *)&equiv_3 + 112) #define dt19xc ((real *)&equiv_3 + 224) #define dt19xd ((real *)&equiv_3 + 336) #define dt19ya ((real *)&equiv_7) #define dt19yb ((real *)&equiv_7 + 112) #define dt19yc ((real *)&equiv_7 + 224) #define dt19yd ((real *)&equiv_7 + 336) integer ksize; real ssize[7]; extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, integer *), sswap_(integer *, real *, integer *, real *, integer * ), stest_(integer *, real *, real *, real *, real *), saxpy_( integer *, real *, real *, integer *, real *, integer *), srotm_( integer *, real *, integer *, real *, integer *, real *), stest1_( real *, real *, real *, real *); extern real sdsdot_(integer *, real *, real *, integer *, real *, integer *); /* Fortran I/O blocks */ static cilist io___80 = { 0, 6, 0, 0, 0 }; /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* FOR DROTM */ /* TRUE X RESULTS F0R ROTATIONS DROTM */ /* TRUE Y RESULTS FOR ROTATIONS DROTM */ /* .. Executable Statements .. */ for (ki = 1; ki <= 4; ++ki) { combla_1.incx = incxs[ki - 1]; combla_1.incy = incys[ki - 1]; mx = abs(combla_1.incx); my = abs(combla_1.incy); for (kn = 1; kn <= 4; ++kn) { combla_1.n = ns[kn - 1]; ksize = min(2,kn); lenx = lens[kn + (mx << 2) - 5]; leny = lens[kn + (my << 2) - 5]; /* .. Initialize all argument arrays .. */ for (i__ = 1; i__ <= 7; ++i__) { sx[i__ - 1] = dx1[i__ - 1]; sy[i__ - 1] = dy1[i__ - 1]; /* L20: */ } if (combla_1.icase == 1) { /* .. SDOT .. */ r__1 = sdot_(&combla_1.n, sx, &combla_1.incx, sy, & combla_1.incy); stest1_(&r__1, &dt7[kn + (ki << 2) - 5], &ssize1[kn - 1], sfac); } else if (combla_1.icase == 2) { /* .. SAXPY .. */ saxpy_(&combla_1.n, &sa, sx, &combla_1.incx, sy, & combla_1.incy); i__1 = leny; for (j = 1; j <= i__1; ++j) { sty[j - 1] = dt8[j + (kn + (ki << 2)) * 7 - 36]; /* L40: */ } stest_(&leny, sy, sty, &ssize2[ksize * 14 - 14], sfac); } else if (combla_1.icase == 5) { /* .. SCOPY .. */ for (i__ = 1; i__ <= 7; ++i__) { sty[i__ - 1] = dt10y[i__ + (kn + (ki << 2)) * 7 - 36]; /* L60: */ } scopy_(&combla_1.n, sx, &combla_1.incx, sy, &combla_1.incy); stest_(&leny, sy, sty, ssize2, &c_b35); } else if (combla_1.icase == 6) { /* .. SSWAP .. */ sswap_(&combla_1.n, sx, &combla_1.incx, sy, &combla_1.incy); for (i__ = 1; i__ <= 7; ++i__) { stx[i__ - 1] = dt10x[i__ + (kn + (ki << 2)) * 7 - 36]; sty[i__ - 1] = dt10y[i__ + (kn + (ki << 2)) * 7 - 36]; /* L80: */ } stest_(&lenx, sx, stx, ssize2, &c_b35); stest_(&leny, sy, sty, ssize2, &c_b35); } else if (combla_1.icase == 12) { /* .. SROTM .. */ kni = kn + (ki - 1 << 2); for (kpar = 1; kpar <= 4; ++kpar) { for (i__ = 1; i__ <= 7; ++i__) { sx[i__ - 1] = dx1[i__ - 1]; sy[i__ - 1] = dy1[i__ - 1]; stx[i__ - 1] = dt19x[i__ + (kpar + (kni << 2)) * 7 - 36]; sty[i__ - 1] = dt19y[i__ + (kpar + (kni << 2)) * 7 - 36]; } for (i__ = 1; i__ <= 5; ++i__) { dtemp[i__ - 1] = dpar[i__ + kpar * 5 - 6]; } i__1 = lenx; for (i__ = 1; i__ <= i__1; ++i__) { ssize[i__ - 1] = stx[i__ - 1]; } /* SEE REMARK ABOVE ABOUT DT11X(1,2,7) */ /* AND DT11X(5,3,8). */ if (kpar == 2 && kni == 7) { ssize[0] = 2.4f; } if (kpar == 3 && kni == 8) { ssize[4] = 1.8f; } srotm_(&combla_1.n, sx, &combla_1.incx, sy, & combla_1.incy, dtemp); stest_(&lenx, sx, stx, ssize, sfac); stest_(&leny, sy, sty, sty, sfac); } } else if (combla_1.icase == 13) { /* .. SDSROT .. */ r__1 = sdsdot_(&combla_1.n, &c_b39, sx, &combla_1.incx, sy, & combla_1.incy); stest1_(&r__1, &st7b[kn + (ki << 2) - 5], &ssize3[kn - 1], sfac); } else { s_wsle(&io___80); do_lio(&c__9, &c__1, " Shouldn't be here in CHECK2", (ftnlen) 28); e_wsle(); s_stop("", (ftnlen)0); } /* L100: */ } /* L120: */ } return 0; } /* check2_ */ #undef dt19yd #undef dt19yc #undef dt19yb #undef dt19ya #undef dt19xd #undef dt19xc #undef dt19xb #undef dt19xa #undef dt19y #undef dt19x /* Subroutine */ int check3_(real *sfac) { /* Initialized data */ static integer incxs[4] = { 1,2,-2,-1 }; static integer incys[4] = { 1,-2,1,-2 }; static integer lens[8] /* was [4][2] */ = { 1,1,2,4,1,1,3,7 }; static integer ns[4] = { 0,1,2,4 }; static real dx1[7] = { .6f,.1f,-.5f,.8f,.9f,-.3f,-.4f }; static real dy1[7] = { .5f,-.9f,.3f,.7f,-.6f,.2f,.8f }; static real sc = .8f; static real ss = .6f; static real dt9x[112] /* was [7][4][4] */ = { .6f,0.f,0.f,0.f,0.f, 0.f,0.f,.78f,0.f,0.f,0.f,0.f,0.f,0.f,.78f,-.46f,0.f,0.f,0.f,0.f, 0.f,.78f,-.46f,-.22f,1.06f,0.f,0.f,0.f,.6f,0.f,0.f,0.f,0.f,0.f, 0.f,.78f,0.f,0.f,0.f,0.f,0.f,0.f,.66f,.1f,-.1f,0.f,0.f,0.f,0.f, .96f,.1f,-.76f,.8f,.9f,-.3f,-.02f,.6f,0.f,0.f,0.f,0.f,0.f,0.f, .78f,0.f,0.f,0.f,0.f,0.f,0.f,-.06f,.1f,-.1f,0.f,0.f,0.f,0.f,.9f, .1f,-.22f,.8f,.18f,-.3f,-.02f,.6f,0.f,0.f,0.f,0.f,0.f,0.f,.78f, 0.f,0.f,0.f,0.f,0.f,0.f,.78f,.26f,0.f,0.f,0.f,0.f,0.f,.78f,.26f, -.76f,1.12f,0.f,0.f,0.f }; static real dt9y[112] /* was [7][4][4] */ = { .5f,0.f,0.f,0.f,0.f, 0.f,0.f,.04f,0.f,0.f,0.f,0.f,0.f,0.f,.04f,-.78f,0.f,0.f,0.f,0.f, 0.f,.04f,-.78f,.54f,.08f,0.f,0.f,0.f,.5f,0.f,0.f,0.f,0.f,0.f,0.f, .04f,0.f,0.f,0.f,0.f,0.f,0.f,.7f,-.9f,-.12f,0.f,0.f,0.f,0.f,.64f, -.9f,-.3f,.7f,-.18f,.2f,.28f,.5f,0.f,0.f,0.f,0.f,0.f,0.f,.04f,0.f, 0.f,0.f,0.f,0.f,0.f,.7f,-1.08f,0.f,0.f,0.f,0.f,0.f,.64f,-1.26f, .54f,.2f,0.f,0.f,0.f,.5f,0.f,0.f,0.f,0.f,0.f,0.f,.04f,0.f,0.f,0.f, 0.f,0.f,0.f,.04f,-.9f,.18f,0.f,0.f,0.f,0.f,.04f,-.9f,.18f,.7f, -.18f,.2f,.16f }; static real ssize2[28] /* was [14][2] */ = { 0.f,0.f,0.f,0.f,0.f,0.f, 0.f,0.f,0.f,0.f,0.f,0.f,0.f,0.f,1.17f,1.17f,1.17f,1.17f,1.17f, 1.17f,1.17f,1.17f,1.17f,1.17f,1.17f,1.17f,1.17f,1.17f }; /* Builtin functions */ integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_wsle(void); /* Subroutine */ int s_stop(char *, ftnlen); /* Local variables */ integer i__, k, ki, kn, mx, my; real sx[7], sy[7], stx[7], sty[7]; integer lenx, leny; real mwpc[11]; integer mwpn[11]; real mwps[11]; extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, integer *, real *, real *); real mwpx[5], mwpy[5]; integer ksize; real copyx[5], copyy[5]; extern /* Subroutine */ int stest_(integer *, real *, real *, real *, real *); real mwptx[55] /* was [11][5] */, mwpty[55] /* was [11][5] */; integer mwpinx[11], mwpiny[11]; real mwpstx[5], mwpsty[5]; /* Fortran I/O blocks */ static cilist io___104 = { 0, 6, 0, 0, 0 }; /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* .. Executable Statements .. */ for (ki = 1; ki <= 4; ++ki) { combla_1.incx = incxs[ki - 1]; combla_1.incy = incys[ki - 1]; mx = abs(combla_1.incx); my = abs(combla_1.incy); for (kn = 1; kn <= 4; ++kn) { combla_1.n = ns[kn - 1]; ksize = min(2,kn); lenx = lens[kn + (mx << 2) - 5]; leny = lens[kn + (my << 2) - 5]; if (combla_1.icase == 4) { /* .. SROT .. */ for (i__ = 1; i__ <= 7; ++i__) { sx[i__ - 1] = dx1[i__ - 1]; sy[i__ - 1] = dy1[i__ - 1]; stx[i__ - 1] = dt9x[i__ + (kn + (ki << 2)) * 7 - 36]; sty[i__ - 1] = dt9y[i__ + (kn + (ki << 2)) * 7 - 36]; /* L20: */ } srot_(&combla_1.n, sx, &combla_1.incx, sy, &combla_1.incy, & sc, &ss); stest_(&lenx, sx, stx, &ssize2[ksize * 14 - 14], sfac); stest_(&leny, sy, sty, &ssize2[ksize * 14 - 14], sfac); } else { s_wsle(&io___104); do_lio(&c__9, &c__1, " Shouldn't be here in CHECK3", (ftnlen) 28); e_wsle(); s_stop("", (ftnlen)0); } /* L40: */ } /* L60: */ } mwpc[0] = 1.f; for (i__ = 2; i__ <= 11; ++i__) { mwpc[i__ - 1] = 0.f; /* L80: */ } mwps[0] = 0.f; for (i__ = 2; i__ <= 6; ++i__) { mwps[i__ - 1] = 1.f; /* L100: */ } for (i__ = 7; i__ <= 11; ++i__) { mwps[i__ - 1] = -1.f; /* L120: */ } mwpinx[0] = 1; mwpinx[1] = 1; mwpinx[2] = 1; mwpinx[3] = -1; mwpinx[4] = 1; mwpinx[5] = -1; mwpinx[6] = 1; mwpinx[7] = 1; mwpinx[8] = -1; mwpinx[9] = 1; mwpinx[10] = -1; mwpiny[0] = 1; mwpiny[1] = 1; mwpiny[2] = -1; mwpiny[3] = -1; mwpiny[4] = 2; mwpiny[5] = 1; mwpiny[6] = 1; mwpiny[7] = -1; mwpiny[8] = -1; mwpiny[9] = 2; mwpiny[10] = 1; for (i__ = 1; i__ <= 11; ++i__) { mwpn[i__ - 1] = 5; /* L140: */ } mwpn[4] = 3; mwpn[9] = 3; for (i__ = 1; i__ <= 5; ++i__) { mwpx[i__ - 1] = (real) i__; mwpy[i__ - 1] = (real) i__; mwptx[i__ * 11 - 11] = (real) i__; mwpty[i__ * 11 - 11] = (real) i__; mwptx[i__ * 11 - 10] = (real) i__; mwpty[i__ * 11 - 10] = (real) (-i__); mwptx[i__ * 11 - 9] = (real) (6 - i__); mwpty[i__ * 11 - 9] = (real) (i__ - 6); mwptx[i__ * 11 - 8] = (real) i__; mwpty[i__ * 11 - 8] = (real) (-i__); mwptx[i__ * 11 - 6] = (real) (6 - i__); mwpty[i__ * 11 - 6] = (real) (i__ - 6); mwptx[i__ * 11 - 5] = (real) (-i__); mwpty[i__ * 11 - 5] = (real) i__; mwptx[i__ * 11 - 4] = (real) (i__ - 6); mwpty[i__ * 11 - 4] = (real) (6 - i__); mwptx[i__ * 11 - 3] = (real) (-i__); mwpty[i__ * 11 - 3] = (real) i__; mwptx[i__ * 11 - 1] = (real) (i__ - 6); mwpty[i__ * 11 - 1] = (real) (6 - i__); /* L160: */ } mwptx[4] = 1.f; mwptx[15] = 3.f; mwptx[26] = 5.f; mwptx[37] = 4.f; mwptx[48] = 5.f; mwpty[4] = -1.f; mwpty[15] = 2.f; mwpty[26] = -2.f; mwpty[37] = 4.f; mwpty[48] = -3.f; mwptx[9] = -1.f; mwptx[20] = -3.f; mwptx[31] = -5.f; mwptx[42] = 4.f; mwptx[53] = 5.f; mwpty[9] = 1.f; mwpty[20] = 2.f; mwpty[31] = 2.f; mwpty[42] = 4.f; mwpty[53] = 3.f; for (i__ = 1; i__ <= 11; ++i__) { combla_1.incx = mwpinx[i__ - 1]; combla_1.incy = mwpiny[i__ - 1]; for (k = 1; k <= 5; ++k) { copyx[k - 1] = mwpx[k - 1]; copyy[k - 1] = mwpy[k - 1]; mwpstx[k - 1] = mwptx[i__ + k * 11 - 12]; mwpsty[k - 1] = mwpty[i__ + k * 11 - 12]; /* L180: */ } srot_(&mwpn[i__ - 1], copyx, &combla_1.incx, copyy, &combla_1.incy, & mwpc[i__ - 1], &mwps[i__ - 1]); stest_(&c__5, copyx, mwpstx, mwpstx, sfac); stest_(&c__5, copyy, mwpsty, mwpsty, sfac); /* L200: */ } return 0; } /* check3_ */ /* Subroutine */ int stest_(integer *len, real *scomp, real *strue, real * ssize, real *sfac) { /* Format strings */ static char fmt_99999[] = "(\002 F" "AIL\002)"; static char fmt_99998[] = "(/\002 CASE N INCX INCY I " " \002,\002 COMP(I) TRUE(I) " " DIFFERENCE\002,\002 SIZE(I)\002,/1x)"; static char fmt_99997[] = "(1x,i4,i3,2i5,i3,2e36.8,2e12.4)"; /* System generated locals */ integer i__1; real r__1, r__2; /* Builtin functions */ integer s_wsfe(cilist *), e_wsfe(void), do_fio(integer *, char *, ftnlen); /* Local variables */ integer i__; real sd; extern real s_epsilon_(real *); /* Fortran I/O blocks */ static cilist io___121 = { 0, 6, 0, fmt_99999, 0 }; static cilist io___122 = { 0, 6, 0, fmt_99998, 0 }; static cilist io___123 = { 0, 6, 0, fmt_99997, 0 }; /* ********************************* STEST ************************** */ /* THIS SUBR COMPARES ARRAYS SCOMP() AND STRUE() OF LENGTH LEN TO */ /* SEE IF THE TERM BY TERM DIFFERENCES, MULTIPLIED BY SFAC, ARE */ /* NEGLIGIBLE. */ /* C. L. LAWSON, JPL, 1974 DEC 10 */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. Intrinsic Functions .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --ssize; --strue; --scomp; /* Function Body */ i__1 = *len; for (i__ = 1; i__ <= i__1; ++i__) { sd = scomp[i__] - strue[i__]; if ((r__2 = *sfac * sd, abs(r__2)) <= (r__1 = ssize[i__], abs(r__1)) * s_epsilon_(&c_b63)) { goto L40; } /* HERE SCOMP(I) IS NOT CLOSE TO STRUE(I). */ if (! combla_1.pass) { goto L20; } /* PRINT FAIL MESSAGE AND HEADER. */ combla_1.pass = FALSE_; s_wsfe(&io___121); e_wsfe(); s_wsfe(&io___122); e_wsfe(); L20: s_wsfe(&io___123); do_fio(&c__1, (char *)&combla_1.icase, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.incy, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&scomp[i__], (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&strue[i__], (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&sd, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&ssize[i__], (ftnlen)sizeof(real)); e_wsfe(); L40: ; } return 0; } /* stest_ */ /* Subroutine */ int stest1_(real *scomp1, real *strue1, real *ssize, real * sfac) { real scomp[1], strue[1]; extern /* Subroutine */ int stest_(integer *, real *, real *, real *, real *); /* ************************* STEST1 ***************************** */ /* THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN */ /* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE */ /* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT. */ /* C.L. LAWSON, JPL, 1978 DEC 6 */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Arrays .. */ /* .. External Subroutines .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --ssize; /* Function Body */ scomp[0] = *scomp1; strue[0] = *strue1; stest_(&c__1, scomp, strue, &ssize[1], sfac); return 0; } /* stest1_ */ real sdiff_(real *sa, real *sb) { /* System generated locals */ real ret_val; /* ********************************* SDIFF ************************** */ /* COMPUTES DIFFERENCE OF TWO NUMBERS. C. L. LAWSON, JPL 1974 FEB 15 */ /* .. Scalar Arguments .. */ /* .. Executable Statements .. */ ret_val = *sa - *sb; return ret_val; } /* sdiff_ */ /* Subroutine */ int itest1_(integer *icomp, integer *itrue) { /* Format strings */ static char fmt_99999[] = "(\002 F" "AIL\002)"; static char fmt_99998[] = "(/\002 CASE N INCX INCY " " \002,\002 COMP TRUE " " DIFFERENCE\002,/1x)"; static char fmt_99997[] = "(1x,i4,i3,2i5,2i36,i12)"; /* Builtin functions */ integer s_wsfe(cilist *), e_wsfe(void), do_fio(integer *, char *, ftnlen); /* Local variables */ integer id; /* Fortran I/O blocks */ static cilist io___126 = { 0, 6, 0, fmt_99999, 0 }; static cilist io___127 = { 0, 6, 0, fmt_99998, 0 }; static cilist io___129 = { 0, 6, 0, fmt_99997, 0 }; /* ********************************* ITEST1 ************************* */ /* THIS SUBROUTINE COMPARES THE VARIABLES ICOMP AND ITRUE FOR */ /* EQUALITY. */ /* C. L. LAWSON, JPL, 1974 DEC 10 */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ if (*icomp == *itrue) { goto L40; } /* HERE ICOMP IS NOT EQUAL TO ITRUE. */ if (! combla_1.pass) { goto L20; } /* PRINT FAIL MESSAGE AND HEADER. */ combla_1.pass = FALSE_; s_wsfe(&io___126); e_wsfe(); s_wsfe(&io___127); e_wsfe(); L20: id = *icomp - *itrue; s_wsfe(&io___129); do_fio(&c__1, (char *)&combla_1.icase, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.incy, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&(*icomp), (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&(*itrue), (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&id, (ftnlen)sizeof(integer)); e_wsfe(); L40: return 0; } /* itest1_ */ /* Main program alias */ int sblat1_ () { main (); return 0; } blis-0.6.1/blastest/src/sblat2.c000066400000000000000000004457001360743507500164360ustar00rootroot00000000000000/* sblat2.f -- translated by f2c (version 20100827). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" /* Common Block Declarations */ union { struct { integer infot, noutc; logical ok, lerr; } _1; struct { integer infot, nout; logical ok, lerr; } _2; } infoc_; #define infoc_1 (infoc_._1) #define infoc_2 (infoc_._2) struct { char srnamt[6]; } srnamc_; #define srnamc_1 srnamc_ /* Table of constant values */ static integer c__9 = 9; static integer c__1 = 1; static integer c__3 = 3; static integer c__8 = 8; static integer c__4 = 4; static integer c__65 = 65; static integer c__7 = 7; static integer c__2 = 2; static real c_b120 = 0.f; static real c_b128 = 1.f; static logical c_true = TRUE_; static integer c_n1 = -1; static integer c__0 = 0; static logical c_false = FALSE_; /* > \brief \b SBLAT2 */ /* =========== DOCUMENTATION =========== */ /* Online html documentation available at */ /* http://www.netlib.org/lapack/explore-html/ */ /* Definition: */ /* =========== */ /* PROGRAM SBLAT2 */ /* > \par Purpose: */ /* ============= */ /* > */ /* > \verbatim */ /* > */ /* > Test program for the REAL Level 2 Blas. */ /* > */ /* > The program must be driven by a short data file. The first 18 records */ /* > of the file are read using list-directed input, the last 16 records */ /* > are read using the format ( A6, L2 ). An annotated example of a data */ /* > file can be obtained by deleting the first 3 characters from the */ /* > following 34 lines: */ /* > 'sblat2.out' NAME OF SUMMARY OUTPUT FILE */ /* > 6 UNIT NUMBER OF SUMMARY FILE */ /* > 'SBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE */ /* > -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) */ /* > F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. */ /* > F LOGICAL FLAG, T TO STOP ON FAILURES. */ /* > T LOGICAL FLAG, T TO TEST ERROR EXITS. */ /* > 16.0 THRESHOLD VALUE OF TEST RATIO */ /* > 6 NUMBER OF VALUES OF N */ /* > 0 1 2 3 5 9 VALUES OF N */ /* > 4 NUMBER OF VALUES OF K */ /* > 0 1 2 4 VALUES OF K */ /* > 4 NUMBER OF VALUES OF INCX AND INCY */ /* > 1 2 -1 -2 VALUES OF INCX AND INCY */ /* > 3 NUMBER OF VALUES OF ALPHA */ /* > 0.0 1.0 0.7 VALUES OF ALPHA */ /* > 3 NUMBER OF VALUES OF BETA */ /* > 0.0 1.0 0.9 VALUES OF BETA */ /* > SGEMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > SGBMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > SSYMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > SSBMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > SSPMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > STRMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > STBMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > STPMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > STRSV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > STBSV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > STPSV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > SGER T PUT F FOR NO TEST. SAME COLUMNS. */ /* > SSYR T PUT F FOR NO TEST. SAME COLUMNS. */ /* > SSPR T PUT F FOR NO TEST. SAME COLUMNS. */ /* > SSYR2 T PUT F FOR NO TEST. SAME COLUMNS. */ /* > SSPR2 T PUT F FOR NO TEST. SAME COLUMNS. */ /* > */ /* > Further Details */ /* > =============== */ /* > */ /* > See: */ /* > */ /* > Dongarra J. J., Du Croz J. J., Hammarling S. and Hanson R. J.. */ /* > An extended set of Fortran Basic Linear Algebra Subprograms. */ /* > */ /* > Technical Memoranda Nos. 41 (revision 3) and 81, Mathematics */ /* > and Computer Science Division, Argonne National Laboratory, */ /* > 9700 South Cass Avenue, Argonne, Illinois 60439, US. */ /* > */ /* > Or */ /* > */ /* > NAG Technical Reports TR3/87 and TR4/87, Numerical Algorithms */ /* > Group Ltd., NAG Central Office, 256 Banbury Road, Oxford */ /* > OX2 7DE, UK, and Numerical Algorithms Group Inc., 1101 31st */ /* > Street, Suite 100, Downers Grove, Illinois 60515-1263, USA. */ /* > */ /* > */ /* > -- Written on 10-August-1987. */ /* > Richard Hanson, Sandia National Labs. */ /* > Jeremy Du Croz, NAG Central Office. */ /* > */ /* > 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers */ /* > can be run multiple times without deleting generated */ /* > output files (susan) */ /* > \endverbatim */ /* Authors: */ /* ======== */ /* > \author Univ. of Tennessee */ /* > \author Univ. of California Berkeley */ /* > \author Univ. of Colorado Denver */ /* > \author NAG Ltd. */ /* > \date April 2012 */ /* > \ingroup single_blas_testing */ /* ===================================================================== */ /* Main program */ int main(void) { /* Initialized data */ static char snames[6*16] = "SGEMV " "SGBMV " "SSYMV " "SSBMV " "SSPMV " "STRMV " "STBMV " "STPMV " "STRSV " "STBSV " "STPSV " "SGER " "SSYR " "SSPR " "SSYR2 " "SSPR2 "; /* Format strings */ static char fmt_9997[] = "(\002 NUMBER OF VALUES OF \002,a,\002 IS LESS " "THAN 1 OR GREATER \002,\002THAN \002,i2)"; static char fmt_9996[] = "(\002 VALUE OF N IS LESS THAN 0 OR GREATER THA" "N \002,i2)"; static char fmt_9995[] = "(\002 VALUE OF K IS LESS THAN 0\002)"; static char fmt_9994[] = "(\002 ABSOLUTE VALUE OF INCX OR INCY IS 0 OR G" "REATER THAN \002,i2)"; static char fmt_9993[] = "(\002 TESTS OF THE REAL LEVEL 2 BL" "AS\002,//\002 THE F\002,\002OLLOWING PARAMETER VALUES WILL BE US" "ED:\002)"; static char fmt_9992[] = "(\002 FOR N \002,9i6)"; static char fmt_9991[] = "(\002 FOR K \002,7i6)"; static char fmt_9990[] = "(\002 FOR INCX AND INCY \002,7i6)"; static char fmt_9989[] = "(\002 FOR ALPHA \002,7f6.1)"; static char fmt_9988[] = "(\002 FOR BETA \002,7f6.1)"; static char fmt_9980[] = "(\002 ERROR-EXITS WILL NOT BE TESTED\002)"; static char fmt_9999[] = "(\002 ROUTINES PASS COMPUTATIONAL TESTS IF TES" "T RATIO IS LES\002,\002S THAN\002,f8.2)"; static char fmt_9984[] = "(a6,l2)"; static char fmt_9986[] = "(\002 SUBPROGRAM NAME \002,a6,\002 NOT RECOGNI" "ZED\002,/\002 ******* T\002,\002ESTS ABANDONED *******\002)"; static char fmt_9998[] = "(\002 RELATIVE MACHINE PRECISION IS TAKEN TO" " BE\002,1p,e9.1)"; static char fmt_9985[] = "(\002 ERROR IN SMVCH - IN-LINE DOT PRODUCTS A" "RE BEING EVALU\002,\002ATED WRONGLY.\002,/\002 SMVCH WAS CALLED " "WITH TRANS = \002,a1,\002 AND RETURNED SAME = \002,l1,\002 AND E" "RR = \002,f12.3,\002.\002,/\002 THIS MAY BE DUE TO FAULTS IN THE" " ARITHMETIC OR THE COMPILER.\002,/\002 ******* TESTS ABANDONED *" "******\002)"; static char fmt_9983[] = "(1x,a6,\002 WAS NOT TESTED\002)"; static char fmt_9982[] = "(/\002 END OF TESTS\002)"; static char fmt_9981[] = "(/\002 ******* FATAL ERROR - TESTS ABANDONED *" "******\002)"; static char fmt_9987[] = "(\002 AMEND DATA FILE OR INCREASE ARRAY SIZES " "IN PROGRAM\002,/\002 ******* TESTS ABANDONED *******\002)"; /* System generated locals */ integer i__1, i__2, i__3; olist o__1; cllist cl__1; /* Builtin functions */ integer s_rsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_rsle(void), f_open(olist *), s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), s_wsle(cilist *), e_wsle(void), s_rsfe(cilist *), e_rsfe(void), s_cmp(const char *, const char *, ftnlen, ftnlen); /* Subroutine */ int s_stop(char *, ftnlen); integer f_clos(cllist *); /* Subroutine */ int s_copy(char *, const char *, ftnlen, ftnlen); /* Local variables */ real a[4225] /* was [65][65] */, g[65]; integer i__, j, n; real x[65], y[65], z__[130], aa[4225]; integer kb[7]; real as[4225], xs[130], ys[130], yt[65], xx[130], yy[130], alf[7]; integer inc[7], nkb; real bet[7]; extern logical lse_(real *, real *, integer *); real eps, err; integer nalf, idim[9]; logical same; integer ninc, nbet, ntra; logical rewi; integer nout; extern /* Subroutine */ int schk1_(char *, real *, real *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, integer *, integer *, real *, integer *, real *, integer *, integer *, integer *, integer *, real *, real *, real * , real *, real *, real *, real *, real *, real *, real *, real *, ftnlen), schk2_(char *, real *, real *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, integer *, integer *, real *, integer *, real *, integer *, integer *, integer *, integer *, real *, real *, real *, real *, real *, real *, real *, real *, real *, real *, real *, ftnlen), schk3_(char *, real *, real *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, real *, real *, real * , real *, real *, real *, real *, real *, real *, ftnlen), schk4_( char *, real *, real *, integer *, integer *, logical *, logical * , logical *, integer *, integer *, integer *, real *, integer *, integer *, integer *, integer *, real *, real *, real *, real *, real *, real *, real *, real *, real *, real *, real *, real *, ftnlen), schk5_(char *, real *, real *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, real *, integer *, integer *, integer *, integer *, real *, real * , real *, real *, real *, real *, real *, real *, real *, real *, real *, real *, ftnlen), schk6_(char *, real *, real *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, real *, integer *, integer *, integer *, integer *, real *, real *, real *, real *, real *, real *, real *, real *, real *, real *, real *, real *, ftnlen); logical fatal; extern /* Subroutine */ int schke_(integer *, char *, integer *, ftnlen); logical trace; integer nidim; extern /* Subroutine */ int smvch_(char *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, real *, real *, real *, real *, real *, logical *, integer *, logical *, ftnlen); char snaps[32], trans[1]; integer isnum; logical ltest[16], sfatal; char snamet[6]; real thresh; logical ltestt, tsterr; char summry[32]; extern real s_epsilon_(real *); /* Fortran I/O blocks */ static cilist io___2 = { 0, 5, 0, 0, 0 }; static cilist io___4 = { 0, 5, 0, 0, 0 }; static cilist io___6 = { 0, 5, 0, 0, 0 }; static cilist io___8 = { 0, 5, 0, 0, 0 }; static cilist io___11 = { 0, 5, 0, 0, 0 }; static cilist io___13 = { 0, 5, 0, 0, 0 }; static cilist io___15 = { 0, 5, 0, 0, 0 }; static cilist io___17 = { 0, 5, 0, 0, 0 }; static cilist io___19 = { 0, 5, 0, 0, 0 }; static cilist io___21 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___22 = { 0, 5, 0, 0, 0 }; static cilist io___25 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___26 = { 0, 5, 0, 0, 0 }; static cilist io___28 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___29 = { 0, 5, 0, 0, 0 }; static cilist io___31 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___32 = { 0, 5, 0, 0, 0 }; static cilist io___34 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___35 = { 0, 5, 0, 0, 0 }; static cilist io___37 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___38 = { 0, 5, 0, 0, 0 }; static cilist io___40 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___41 = { 0, 5, 0, 0, 0 }; static cilist io___43 = { 0, 5, 0, 0, 0 }; static cilist io___45 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___46 = { 0, 5, 0, 0, 0 }; static cilist io___48 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___49 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___50 = { 0, 0, 0, fmt_9991, 0 }; static cilist io___51 = { 0, 0, 0, fmt_9990, 0 }; static cilist io___52 = { 0, 0, 0, fmt_9989, 0 }; static cilist io___53 = { 0, 0, 0, fmt_9988, 0 }; static cilist io___54 = { 0, 0, 0, 0, 0 }; static cilist io___55 = { 0, 0, 0, fmt_9980, 0 }; static cilist io___56 = { 0, 0, 0, 0, 0 }; static cilist io___57 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___58 = { 0, 0, 0, 0, 0 }; static cilist io___60 = { 0, 5, 1, fmt_9984, 0 }; static cilist io___63 = { 0, 0, 0, fmt_9986, 0 }; static cilist io___65 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___78 = { 0, 0, 0, fmt_9985, 0 }; static cilist io___79 = { 0, 0, 0, fmt_9985, 0 }; static cilist io___81 = { 0, 0, 0, 0, 0 }; static cilist io___82 = { 0, 0, 0, fmt_9983, 0 }; static cilist io___83 = { 0, 0, 0, 0, 0 }; static cilist io___90 = { 0, 0, 0, fmt_9982, 0 }; static cilist io___91 = { 0, 0, 0, fmt_9981, 0 }; static cilist io___92 = { 0, 0, 0, fmt_9987, 0 }; /* -- Reference BLAS test routine (version 3.4.1) -- */ /* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* April 2012 */ /* ===================================================================== */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* .. Executable Statements .. */ /* Read name and unit number for summary output file and open file. */ s_rsle(&io___2); do_lio(&c__9, &c__1, summry, (ftnlen)32); e_rsle(); s_rsle(&io___4); do_lio(&c__3, &c__1, (char *)&nout, (ftnlen)sizeof(integer)); e_rsle(); o__1.oerr = 0; o__1.ounit = nout; o__1.ofnmlen = 32; o__1.ofnm = summry; o__1.orl = 0; o__1.osta = "UNKNOWN"; o__1.oacc = 0; o__1.ofm = 0; o__1.oblnk = 0; f_open(&o__1); infoc_1.noutc = nout; /* Read name and unit number for snapshot output file and open file. */ s_rsle(&io___6); do_lio(&c__9, &c__1, snaps, (ftnlen)32); e_rsle(); s_rsle(&io___8); do_lio(&c__3, &c__1, (char *)&ntra, (ftnlen)sizeof(integer)); e_rsle(); trace = ntra >= 0; if (trace) { o__1.oerr = 0; o__1.ounit = ntra; o__1.ofnmlen = 32; o__1.ofnm = snaps; o__1.orl = 0; o__1.osta = "UNKNOWN"; o__1.oacc = 0; o__1.ofm = 0; o__1.oblnk = 0; f_open(&o__1); } /* Read the flag that directs rewinding of the snapshot file. */ s_rsle(&io___11); do_lio(&c__8, &c__1, (char *)&rewi, (ftnlen)sizeof(logical)); e_rsle(); rewi = rewi && trace; /* Read the flag that directs stopping on any failure. */ s_rsle(&io___13); do_lio(&c__8, &c__1, (char *)&sfatal, (ftnlen)sizeof(logical)); e_rsle(); /* Read the flag that indicates whether error exits are to be tested. */ s_rsle(&io___15); do_lio(&c__8, &c__1, (char *)&tsterr, (ftnlen)sizeof(logical)); e_rsle(); /* Read the threshold value of the test ratio */ s_rsle(&io___17); do_lio(&c__4, &c__1, (char *)&thresh, (ftnlen)sizeof(real)); e_rsle(); /* Read and check the parameter values for the tests. */ /* Values of N */ s_rsle(&io___19); do_lio(&c__3, &c__1, (char *)&nidim, (ftnlen)sizeof(integer)); e_rsle(); if (nidim < 1 || nidim > 9) { io___21.ciunit = nout; s_wsfe(&io___21); do_fio(&c__1, "N", (ftnlen)1); do_fio(&c__1, (char *)&c__9, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } s_rsle(&io___22); i__1 = nidim; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&idim[i__ - 1], (ftnlen)sizeof(integer)); } e_rsle(); i__1 = nidim; for (i__ = 1; i__ <= i__1; ++i__) { if (idim[i__ - 1] < 0 || idim[i__ - 1] > 65) { io___25.ciunit = nout; s_wsfe(&io___25); do_fio(&c__1, (char *)&c__65, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } /* L10: */ } /* Values of K */ s_rsle(&io___26); do_lio(&c__3, &c__1, (char *)&nkb, (ftnlen)sizeof(integer)); e_rsle(); if (nkb < 1 || nkb > 7) { io___28.ciunit = nout; s_wsfe(&io___28); do_fio(&c__1, "K", (ftnlen)1); do_fio(&c__1, (char *)&c__7, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } s_rsle(&io___29); i__1 = nkb; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&kb[i__ - 1], (ftnlen)sizeof(integer)); } e_rsle(); i__1 = nkb; for (i__ = 1; i__ <= i__1; ++i__) { if (kb[i__ - 1] < 0) { io___31.ciunit = nout; s_wsfe(&io___31); e_wsfe(); goto L230; } /* L20: */ } /* Values of INCX and INCY */ s_rsle(&io___32); do_lio(&c__3, &c__1, (char *)&ninc, (ftnlen)sizeof(integer)); e_rsle(); if (ninc < 1 || ninc > 7) { io___34.ciunit = nout; s_wsfe(&io___34); do_fio(&c__1, "INCX AND INCY", (ftnlen)13); do_fio(&c__1, (char *)&c__7, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } s_rsle(&io___35); i__1 = ninc; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&inc[i__ - 1], (ftnlen)sizeof(integer)); } e_rsle(); i__1 = ninc; for (i__ = 1; i__ <= i__1; ++i__) { if (inc[i__ - 1] == 0 || (i__2 = inc[i__ - 1], abs(i__2)) > 2) { io___37.ciunit = nout; s_wsfe(&io___37); do_fio(&c__1, (char *)&c__2, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } /* L30: */ } /* Values of ALPHA */ s_rsle(&io___38); do_lio(&c__3, &c__1, (char *)&nalf, (ftnlen)sizeof(integer)); e_rsle(); if (nalf < 1 || nalf > 7) { io___40.ciunit = nout; s_wsfe(&io___40); do_fio(&c__1, "ALPHA", (ftnlen)5); do_fio(&c__1, (char *)&c__7, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } s_rsle(&io___41); i__1 = nalf; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__4, &c__1, (char *)&alf[i__ - 1], (ftnlen)sizeof(real)); } e_rsle(); /* Values of BETA */ s_rsle(&io___43); do_lio(&c__3, &c__1, (char *)&nbet, (ftnlen)sizeof(integer)); e_rsle(); if (nbet < 1 || nbet > 7) { io___45.ciunit = nout; s_wsfe(&io___45); do_fio(&c__1, "BETA", (ftnlen)4); do_fio(&c__1, (char *)&c__7, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } s_rsle(&io___46); i__1 = nbet; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__4, &c__1, (char *)&bet[i__ - 1], (ftnlen)sizeof(real)); } e_rsle(); /* Report values of parameters. */ io___48.ciunit = nout; s_wsfe(&io___48); e_wsfe(); io___49.ciunit = nout; s_wsfe(&io___49); i__1 = nidim; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&idim[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); io___50.ciunit = nout; s_wsfe(&io___50); i__1 = nkb; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&kb[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); io___51.ciunit = nout; s_wsfe(&io___51); i__1 = ninc; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&inc[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); io___52.ciunit = nout; s_wsfe(&io___52); i__1 = nalf; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&alf[i__ - 1], (ftnlen)sizeof(real)); } e_wsfe(); io___53.ciunit = nout; s_wsfe(&io___53); i__1 = nbet; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&bet[i__ - 1], (ftnlen)sizeof(real)); } e_wsfe(); if (! tsterr) { io___54.ciunit = nout; s_wsle(&io___54); e_wsle(); io___55.ciunit = nout; s_wsfe(&io___55); e_wsfe(); } io___56.ciunit = nout; s_wsle(&io___56); e_wsle(); io___57.ciunit = nout; s_wsfe(&io___57); do_fio(&c__1, (char *)&thresh, (ftnlen)sizeof(real)); e_wsfe(); io___58.ciunit = nout; s_wsle(&io___58); e_wsle(); /* Read names of subroutines and flags which indicate */ /* whether they are to be tested. */ for (i__ = 1; i__ <= 16; ++i__) { ltest[i__ - 1] = FALSE_; /* L40: */ } L50: i__1 = s_rsfe(&io___60); if (i__1 != 0) { goto L80; } i__1 = do_fio(&c__1, snamet, (ftnlen)6); if (i__1 != 0) { goto L80; } i__1 = do_fio(&c__1, (char *)<estt, (ftnlen)sizeof(logical)); if (i__1 != 0) { goto L80; } i__1 = e_rsfe(); if (i__1 != 0) { goto L80; } for (i__ = 1; i__ <= 16; ++i__) { if (s_cmp(snamet, snames + (i__ - 1) * 6, (ftnlen)6, (ftnlen)6) == 0) { goto L70; } /* L60: */ } io___63.ciunit = nout; s_wsfe(&io___63); do_fio(&c__1, snamet, (ftnlen)6); e_wsfe(); s_stop("", (ftnlen)0); L70: ltest[i__ - 1] = ltestt; goto L50; L80: cl__1.cerr = 0; cl__1.cunit = 5; cl__1.csta = 0; f_clos(&cl__1); /* Compute EPS (the machine precision). */ eps = s_epsilon_(&c_b120); io___65.ciunit = nout; s_wsfe(&io___65); do_fio(&c__1, (char *)&eps, (ftnlen)sizeof(real)); e_wsfe(); /* Check the reliability of SMVCH using exact data. */ n = 32; i__1 = n; for (j = 1; j <= i__1; ++j) { i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ i__3 = i__ - j + 1; a[i__ + j * 65 - 66] = (real) max(i__3,0); /* L110: */ } x[j - 1] = (real) j; y[j - 1] = 0.f; /* L120: */ } i__1 = n; for (j = 1; j <= i__1; ++j) { yy[j - 1] = (real) (j * ((j + 1) * j) / 2 - (j + 1) * j * (j - 1) / 3) ; /* L130: */ } /* YY holds the exact result. On exit from SMVCH YT holds */ /* the result computed by SMVCH. */ *(unsigned char *)trans = 'N'; smvch_(trans, &n, &n, &c_b128, a, &c__65, x, &c__1, &c_b120, y, &c__1, yt, g, yy, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1); same = lse_(yy, yt, &n); if (! same || err != 0.f) { io___78.ciunit = nout; s_wsfe(&io___78); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&same, (ftnlen)sizeof(logical)); do_fio(&c__1, (char *)&err, (ftnlen)sizeof(real)); e_wsfe(); s_stop("", (ftnlen)0); } *(unsigned char *)trans = 'T'; smvch_(trans, &n, &n, &c_b128, a, &c__65, x, &c_n1, &c_b120, y, &c_n1, yt, g, yy, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1); same = lse_(yy, yt, &n); if (! same || err != 0.f) { io___79.ciunit = nout; s_wsfe(&io___79); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&same, (ftnlen)sizeof(logical)); do_fio(&c__1, (char *)&err, (ftnlen)sizeof(real)); e_wsfe(); s_stop("", (ftnlen)0); } /* Test each subroutine in turn. */ for (isnum = 1; isnum <= 16; ++isnum) { io___81.ciunit = nout; s_wsle(&io___81); e_wsle(); if (! ltest[isnum - 1]) { /* Subprogram is not to be tested. */ io___82.ciunit = nout; s_wsfe(&io___82); do_fio(&c__1, snames + (isnum - 1) * 6, (ftnlen)6); e_wsfe(); } else { s_copy(srnamc_1.srnamt, snames + (isnum - 1) * 6, (ftnlen)6, ( ftnlen)6); /* Test error exits. */ if (tsterr) { schke_(&isnum, snames + (isnum - 1) * 6, &nout, (ftnlen)6); io___83.ciunit = nout; s_wsle(&io___83); e_wsle(); } /* Test computations. */ infoc_1.infot = 0; infoc_1.ok = TRUE_; fatal = FALSE_; switch (isnum) { case 1: goto L140; case 2: goto L140; case 3: goto L150; case 4: goto L150; case 5: goto L150; case 6: goto L160; case 7: goto L160; case 8: goto L160; case 9: goto L160; case 10: goto L160; case 11: goto L160; case 12: goto L170; case 13: goto L180; case 14: goto L180; case 15: goto L190; case 16: goto L190; } /* Test SGEMV, 01, and SGBMV, 02. */ L140: schk1_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nkb, kb, &nalf, alf, &nbet, bet, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, (ftnlen)6); goto L200; /* Test SSYMV, 03, SSBMV, 04, and SSPMV, 05. */ L150: schk2_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nkb, kb, &nalf, alf, &nbet, bet, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, (ftnlen)6); goto L200; /* Test STRMV, 06, STBMV, 07, STPMV, 08, */ /* STRSV, 09, STBSV, 10, and STPSV, 11. */ L160: schk3_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nkb, kb, &ninc, inc, &c__65, &c__2, a, aa, as, y, yy, ys, yt, g, z__, (ftnlen) 6); goto L200; /* Test SGER, 12. */ L170: schk4_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, z__, (ftnlen)6); goto L200; /* Test SSYR, 13, and SSPR, 14. */ L180: schk5_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, z__, (ftnlen)6); goto L200; /* Test SSYR2, 15, and SSPR2, 16. */ L190: schk6_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, z__, (ftnlen)6); L200: if (fatal && sfatal) { goto L220; } } /* L210: */ } io___90.ciunit = nout; s_wsfe(&io___90); e_wsfe(); goto L240; L220: io___91.ciunit = nout; s_wsfe(&io___91); e_wsfe(); goto L240; L230: io___92.ciunit = nout; s_wsfe(&io___92); e_wsfe(); L240: if (trace) { cl__1.cerr = 0; cl__1.cunit = ntra; cl__1.csta = 0; f_clos(&cl__1); } cl__1.cerr = 0; cl__1.cunit = nout; cl__1.csta = 0; f_clos(&cl__1); s_stop("", (ftnlen)0); /* End of SBLAT2. */ return 0; } /* main */ /* Subroutine */ int schk1_(char *sname, real *eps, real *thresh, integer * nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nkb, integer *kb, integer * nalf, real *alf, integer *nbet, real *bet, integer *ninc, integer * inc, integer *nmax, integer *incmax, real *a, real *aa, real *as, real *x, real *xx, real *xs, real *y, real *yy, real *ys, real *yt, real *g, ftnlen sname_len) { /* Initialized data */ static char ich[3] = "NTC"; /* Format strings */ static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "2(i3,\002,\002),f4.1,\002, A,\002,i3,\002, X,\002,i2,\002,\002,f" "4.1,\002, Y,\002,i2,\002) .\002)"; static char fmt_9995[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "4(i3,\002,\002),f4.1,\002, A,\002,i3,\002, X,\002,i2,\002,\002,f" "4.1,\002, Y,\002,i2,\002) .\002)"; static char fmt_9993[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, m, n, ia, ib, ic, nc, nd, im, in, kl, ml, nk, nl, ku, ix, iy, ms, lx, ly, ns, laa, lda; real als, bls; extern logical lse_(real *, real *, integer *); real err; integer iku, kls, kus; real beta; integer ldas; logical same; integer incx, incy; logical full, tran, null; real alpha; logical isame[13]; extern /* Subroutine */ int smake_(char *, char *, char *, integer *, integer *, real *, integer *, real *, integer *, integer *, integer *, logical *, real *, ftnlen, ftnlen, ftnlen); integer nargs; extern /* Subroutine */ int sgbmv_(char *, integer *, integer *, integer * , integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, ftnlen), smvch_(char *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, real *, real *, real *, real *, real *, logical *, integer *, logical *, ftnlen), sgemv_(char *, integer *, integer * , real *, real *, integer *, real *, integer *, real *, real *, integer *, ftnlen); logical reset; integer incxs, incys; char trans[1]; logical banded; real errmax; extern logical lseres_(char *, char *, integer *, integer *, real *, real *, integer *, ftnlen, ftnlen); real transl; char transs[1]; /* Fortran I/O blocks */ static cilist io___139 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___140 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___141 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___144 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___146 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___147 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___148 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___149 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___150 = { 0, 0, 0, fmt_9995, 0 }; /* Tests SGEMV and SGBMV. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --kb; --alf; --bet; --inc; --g; --yt; --y; --x; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --ys; --yy; --xs; --xx; /* Function Body */ /* .. Executable Statements .. */ full = *(unsigned char *)&sname[2] == 'E'; banded = *(unsigned char *)&sname[2] == 'B'; /* Define the number of arguments. */ if (full) { nargs = 11; } else if (banded) { nargs = 13; } nc = 0; reset = TRUE_; errmax = 0.f; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; nd = n / 2 + 1; for (im = 1; im <= 2; ++im) { if (im == 1) { /* Computing MAX */ i__2 = n - nd; m = max(i__2,0); } if (im == 2) { /* Computing MIN */ i__2 = n + nd; m = min(i__2,*nmax); } if (banded) { nk = *nkb; } else { nk = 1; } i__2 = nk; for (iku = 1; iku <= i__2; ++iku) { if (banded) { ku = kb[iku]; /* Computing MAX */ i__3 = ku - 1; kl = max(i__3,0); } else { ku = n - 1; kl = m - 1; } /* Set LDA to 1 more than minimum value if room. */ if (banded) { lda = kl + ku + 1; } else { lda = m; } if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L100; } laa = lda * n; null = n <= 0 || m <= 0; /* Generate the matrix A. */ transl = 0.f; smake_(sname + 1, " ", " ", &m, &n, &a[a_offset], nmax, &aa[1] , &lda, &kl, &ku, &reset, &transl, (ftnlen)2, (ftnlen) 1, (ftnlen)1); for (ic = 1; ic <= 3; ++ic) { *(unsigned char *)trans = *(unsigned char *)&ich[ic - 1]; tran = *(unsigned char *)trans == 'T' || *(unsigned char * )trans == 'C'; if (tran) { ml = n; nl = m; } else { ml = m; nl = n; } i__3 = *ninc; for (ix = 1; ix <= i__3; ++ix) { incx = inc[ix]; lx = abs(incx) * nl; /* Generate the vector X. */ transl = .5f; i__4 = abs(incx); i__5 = nl - 1; smake_("GE", " ", " ", &c__1, &nl, &x[1], &c__1, &xx[ 1], &i__4, &c__0, &i__5, &reset, &transl, ( ftnlen)2, (ftnlen)1, (ftnlen)1); if (nl > 1) { x[nl / 2] = 0.f; xx[abs(incx) * (nl / 2 - 1) + 1] = 0.f; } i__4 = *ninc; for (iy = 1; iy <= i__4; ++iy) { incy = inc[iy]; ly = abs(incy) * ml; i__5 = *nalf; for (ia = 1; ia <= i__5; ++ia) { alpha = alf[ia]; i__6 = *nbet; for (ib = 1; ib <= i__6; ++ib) { beta = bet[ib]; /* Generate the vector Y. */ transl = 0.f; i__7 = abs(incy); i__8 = ml - 1; smake_("GE", " ", " ", &c__1, &ml, &y[1], &c__1, &yy[1], &i__7, &c__0, & i__8, &reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the */ /* subroutine. */ *(unsigned char *)transs = *(unsigned char *)trans; ms = m; ns = n; kls = kl; kus = ku; als = alpha; i__7 = laa; for (i__ = 1; i__ <= i__7; ++i__) { as[i__] = aa[i__]; /* L10: */ } ldas = lda; i__7 = lx; for (i__ = 1; i__ <= i__7; ++i__) { xs[i__] = xx[i__]; /* L20: */ } incxs = incx; bls = beta; i__7 = ly; for (i__ = 1; i__ <= i__7; ++i__) { ys[i__] = yy[i__]; /* L30: */ } incys = incy; /* Call the subroutine. */ if (full) { if (*trace) { io___139.ciunit = *ntra; s_wsfe(&io___139); do_fio(&c__1, (char *)&nc, ( ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&alpha, ( ftnlen)sizeof(real)); do_fio(&c__1, (char *)&lda, ( ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, ( ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&beta, ( ftnlen)sizeof(real)); do_fio(&c__1, (char *)&incy, ( ftnlen)sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } sgemv_(trans, &m, &n, &alpha, &aa[1], &lda, &xx[1], &incx, &beta, & yy[1], &incy, (ftnlen)1); } else if (banded) { if (*trace) { io___140.ciunit = *ntra; s_wsfe(&io___140); do_fio(&c__1, (char *)&nc, ( ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&kl, ( ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ku, ( ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, ( ftnlen)sizeof(real)); do_fio(&c__1, (char *)&lda, ( ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, ( ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&beta, ( ftnlen)sizeof(real)); do_fio(&c__1, (char *)&incy, ( ftnlen)sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } sgbmv_(trans, &m, &n, &kl, &ku, & alpha, &aa[1], &lda, &xx[1], & incx, &beta, &yy[1], &incy, ( ftnlen)1); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___141.ciunit = *nout; s_wsfe(&io___141); e_wsfe(); *fatal = TRUE_; goto L130; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)trans == *( unsigned char *)transs; isame[1] = ms == m; isame[2] = ns == n; if (full) { isame[3] = als == alpha; isame[4] = lse_(&as[1], &aa[1], &laa); isame[5] = ldas == lda; isame[6] = lse_(&xs[1], &xx[1], &lx); isame[7] = incxs == incx; isame[8] = bls == beta; if (null) { isame[9] = lse_(&ys[1], &yy[1], & ly); } else { i__7 = abs(incy); isame[9] = lseres_("GE", " ", & c__1, &ml, &ys[1], &yy[1], &i__7, (ftnlen)2, ( ftnlen)1); } isame[10] = incys == incy; } else if (banded) { isame[3] = kls == kl; isame[4] = kus == ku; isame[5] = als == alpha; isame[6] = lse_(&as[1], &aa[1], &laa); isame[7] = ldas == lda; isame[8] = lse_(&xs[1], &xx[1], &lx); isame[9] = incxs == incx; isame[10] = bls == beta; if (null) { isame[11] = lse_(&ys[1], &yy[1], & ly); } else { i__7 = abs(incy); isame[11] = lseres_("GE", " ", & c__1, &ml, &ys[1], &yy[1], &i__7, (ftnlen)2, ( ftnlen)1); } isame[12] = incys == incy; } /* If data was incorrectly changed, report */ /* and return. */ same = TRUE_; i__7 = nargs; for (i__ = 1; i__ <= i__7; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___144.ciunit = *nout; s_wsfe(&io___144); do_fio(&c__1, (char *)&i__, ( ftnlen)sizeof(integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L130; } if (! null) { /* Check the result. */ smvch_(trans, &m, &n, &alpha, &a[ a_offset], nmax, &x[1], &incx, &beta, &y[1], &incy, &yt[1], &g[1], &yy[1], eps, &err, fatal, nout, &c_true, (ftnlen) 1); errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L130; } } else { /* Avoid repeating tests with M.le.0 or */ /* N.le.0. */ goto L110; } /* L50: */ } /* L60: */ } /* L70: */ } /* L80: */ } /* L90: */ } L100: ; } L110: ; } /* L120: */ } /* Report result. */ if (errmax < *thresh) { io___146.ciunit = *nout; s_wsfe(&io___146); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___147.ciunit = *nout; s_wsfe(&io___147); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(real)); e_wsfe(); } goto L140; L130: io___148.ciunit = *nout; s_wsfe(&io___148); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); if (full) { io___149.ciunit = *nout; s_wsfe(&io___149); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&beta, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); e_wsfe(); } else if (banded) { io___150.ciunit = *nout; s_wsfe(&io___150); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&kl, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ku, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&beta, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); e_wsfe(); } L140: return 0; /* End of SCHK1. */ } /* schk1_ */ /* Subroutine */ int schk2_(char *sname, real *eps, real *thresh, integer * nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nkb, integer *kb, integer * nalf, real *alf, integer *nbet, real *bet, integer *ninc, integer * inc, integer *nmax, integer *incmax, real *a, real *aa, real *as, real *x, real *xx, real *xs, real *y, real *yy, real *ys, real *yt, real *g, ftnlen sname_len) { /* Initialized data */ static char ich[2] = "UL"; /* Format strings */ static char fmt_9993[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "i3,\002,\002,f4.1,\002, A,\002,i3,\002, X,\002,i2,\002,\002,f4.1," "\002, Y,\002,i2,\002) .\002)"; static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "2(i3,\002,\002),f4.1,\002, A,\002,i3,\002, X,\002,i2,\002,\002,f" "4.1,\002, Y,\002,i2,\002) .\002)"; static char fmt_9995[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "i3,\002,\002,f4.1,\002, AP\002,\002, X,\002,i2,\002,\002,f4.1" ",\002, Y,\002,i2,\002) .\002)"; static char fmt_9992[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, k, n, ia, ib, ic, nc, ik, in, nk, ks, ix, iy, ns, lx, ly, laa, lda; real als, bls; extern logical lse_(real *, real *, integer *); real err, beta; integer ldas; logical same; integer incx, incy; logical full, null; char uplo[1]; real alpha; logical isame[13]; extern /* Subroutine */ int smake_(char *, char *, char *, integer *, integer *, real *, integer *, real *, integer *, integer *, integer *, logical *, real *, ftnlen, ftnlen, ftnlen); integer nargs; extern /* Subroutine */ int smvch_(char *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, real *, real *, real *, real *, real *, logical *, integer *, logical *, ftnlen); logical reset; integer incxs, incys; extern /* Subroutine */ int ssbmv_(char *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, ftnlen); char uplos[1]; extern /* Subroutine */ int sspmv_(char *, integer *, real *, real *, real *, integer *, real *, real *, integer *, ftnlen), ssymv_( char *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, ftnlen); logical banded, packed; real errmax; extern logical lseres_(char *, char *, integer *, integer *, real *, real *, integer *, ftnlen, ftnlen); real transl; /* Fortran I/O blocks */ static cilist io___189 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___190 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___191 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___192 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___195 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___197 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___198 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___199 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___200 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___201 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___202 = { 0, 0, 0, fmt_9995, 0 }; /* Tests SSYMV, SSBMV and SSPMV. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --kb; --alf; --bet; --inc; --g; --yt; --y; --x; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --ys; --yy; --xs; --xx; /* Function Body */ /* .. Executable Statements .. */ full = *(unsigned char *)&sname[2] == 'Y'; banded = *(unsigned char *)&sname[2] == 'B'; packed = *(unsigned char *)&sname[2] == 'P'; /* Define the number of arguments. */ if (full) { nargs = 10; } else if (banded) { nargs = 11; } else if (packed) { nargs = 9; } nc = 0; reset = TRUE_; errmax = 0.f; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; if (banded) { nk = *nkb; } else { nk = 1; } i__2 = nk; for (ik = 1; ik <= i__2; ++ik) { if (banded) { k = kb[ik]; } else { k = n - 1; } /* Set LDA to 1 more than minimum value if room. */ if (banded) { lda = k + 1; } else { lda = n; } if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L100; } if (packed) { laa = n * (n + 1) / 2; } else { laa = lda * n; } null = n <= 0; for (ic = 1; ic <= 2; ++ic) { *(unsigned char *)uplo = *(unsigned char *)&ich[ic - 1]; /* Generate the matrix A. */ transl = 0.f; smake_(sname + 1, uplo, " ", &n, &n, &a[a_offset], nmax, &aa[ 1], &lda, &k, &k, &reset, &transl, (ftnlen)2, (ftnlen) 1, (ftnlen)1); i__3 = *ninc; for (ix = 1; ix <= i__3; ++ix) { incx = inc[ix]; lx = abs(incx) * n; /* Generate the vector X. */ transl = .5f; i__4 = abs(incx); i__5 = n - 1; smake_("GE", " ", " ", &c__1, &n, &x[1], &c__1, &xx[1], & i__4, &c__0, &i__5, &reset, &transl, (ftnlen)2, ( ftnlen)1, (ftnlen)1); if (n > 1) { x[n / 2] = 0.f; xx[abs(incx) * (n / 2 - 1) + 1] = 0.f; } i__4 = *ninc; for (iy = 1; iy <= i__4; ++iy) { incy = inc[iy]; ly = abs(incy) * n; i__5 = *nalf; for (ia = 1; ia <= i__5; ++ia) { alpha = alf[ia]; i__6 = *nbet; for (ib = 1; ib <= i__6; ++ib) { beta = bet[ib]; /* Generate the vector Y. */ transl = 0.f; i__7 = abs(incy); i__8 = n - 1; smake_("GE", " ", " ", &c__1, &n, &y[1], & c__1, &yy[1], &i__7, &c__0, &i__8, & reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the */ /* subroutine. */ *(unsigned char *)uplos = *(unsigned char *) uplo; ns = n; ks = k; als = alpha; i__7 = laa; for (i__ = 1; i__ <= i__7; ++i__) { as[i__] = aa[i__]; /* L10: */ } ldas = lda; i__7 = lx; for (i__ = 1; i__ <= i__7; ++i__) { xs[i__] = xx[i__]; /* L20: */ } incxs = incx; bls = beta; i__7 = ly; for (i__ = 1; i__ <= i__7; ++i__) { ys[i__] = yy[i__]; /* L30: */ } incys = incy; /* Call the subroutine. */ if (full) { if (*trace) { io___189.ciunit = *ntra; s_wsfe(&io___189); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&beta, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&incy, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } ssymv_(uplo, &n, &alpha, &aa[1], &lda, & xx[1], &incx, &beta, &yy[1], & incy, (ftnlen)1); } else if (banded) { if (*trace) { io___190.ciunit = *ntra; s_wsfe(&io___190); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&beta, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&incy, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } ssbmv_(uplo, &n, &k, &alpha, &aa[1], &lda, &xx[1], &incx, &beta, &yy[1], & incy, (ftnlen)1); } else if (packed) { if (*trace) { io___191.ciunit = *ntra; s_wsfe(&io___191); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&beta, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&incy, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } sspmv_(uplo, &n, &alpha, &aa[1], &xx[1], & incx, &beta, &yy[1], &incy, ( ftnlen)1); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___192.ciunit = *nout; s_wsfe(&io___192); e_wsfe(); *fatal = TRUE_; goto L120; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)uplo == *( unsigned char *)uplos; isame[1] = ns == n; if (full) { isame[2] = als == alpha; isame[3] = lse_(&as[1], &aa[1], &laa); isame[4] = ldas == lda; isame[5] = lse_(&xs[1], &xx[1], &lx); isame[6] = incxs == incx; isame[7] = bls == beta; if (null) { isame[8] = lse_(&ys[1], &yy[1], &ly); } else { i__7 = abs(incy); isame[8] = lseres_("GE", " ", &c__1, & n, &ys[1], &yy[1], &i__7, ( ftnlen)2, (ftnlen)1); } isame[9] = incys == incy; } else if (banded) { isame[2] = ks == k; isame[3] = als == alpha; isame[4] = lse_(&as[1], &aa[1], &laa); isame[5] = ldas == lda; isame[6] = lse_(&xs[1], &xx[1], &lx); isame[7] = incxs == incx; isame[8] = bls == beta; if (null) { isame[9] = lse_(&ys[1], &yy[1], &ly); } else { i__7 = abs(incy); isame[9] = lseres_("GE", " ", &c__1, & n, &ys[1], &yy[1], &i__7, ( ftnlen)2, (ftnlen)1); } isame[10] = incys == incy; } else if (packed) { isame[2] = als == alpha; isame[3] = lse_(&as[1], &aa[1], &laa); isame[4] = lse_(&xs[1], &xx[1], &lx); isame[5] = incxs == incx; isame[6] = bls == beta; if (null) { isame[7] = lse_(&ys[1], &yy[1], &ly); } else { i__7 = abs(incy); isame[7] = lseres_("GE", " ", &c__1, & n, &ys[1], &yy[1], &i__7, ( ftnlen)2, (ftnlen)1); } isame[8] = incys == incy; } /* If data was incorrectly changed, report and */ /* return. */ same = TRUE_; i__7 = nargs; for (i__ = 1; i__ <= i__7; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___195.ciunit = *nout; s_wsfe(&io___195); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L120; } if (! null) { /* Check the result. */ smvch_("N", &n, &n, &alpha, &a[a_offset], nmax, &x[1], &incx, &beta, &y[1], &incy, &yt[1], &g[1], &yy[1], eps, &err, fatal, nout, &c_true, ( ftnlen)1); errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L120; } } else { /* Avoid repeating tests with N.le.0 */ goto L110; } /* L50: */ } /* L60: */ } /* L70: */ } /* L80: */ } /* L90: */ } L100: ; } L110: ; } /* Report result. */ if (errmax < *thresh) { io___197.ciunit = *nout; s_wsfe(&io___197); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___198.ciunit = *nout; s_wsfe(&io___198); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(real)); e_wsfe(); } goto L130; L120: io___199.ciunit = *nout; s_wsfe(&io___199); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); if (full) { io___200.ciunit = *nout; s_wsfe(&io___200); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&beta, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); e_wsfe(); } else if (banded) { io___201.ciunit = *nout; s_wsfe(&io___201); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&beta, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); e_wsfe(); } else if (packed) { io___202.ciunit = *nout; s_wsfe(&io___202); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&beta, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); e_wsfe(); } L130: return 0; /* End of SCHK2. */ } /* schk2_ */ /* Subroutine */ int schk3_(char *sname, real *eps, real *thresh, integer * nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nkb, integer *kb, integer * ninc, integer *inc, integer *nmax, integer *incmax, real *a, real *aa, real *as, real *x, real *xx, real *xs, real *xt, real *g, real *z__, ftnlen sname_len) { /* Initialized data */ static char ichu[2] = "UL"; static char icht[3] = "NTC"; static char ichd[2] = "UN"; /* Format strings */ static char fmt_9993[] = "(1x,i6,\002: \002,a6,\002(\002,3(\002'\002,a1" ",\002',\002),i3,\002, A,\002,i3,\002, X,\002,i2,\002) " " .\002)"; static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002(\002,3(\002'\002,a1" ",\002',\002),2(i3,\002,\002),\002 A,\002,i3,\002, X,\002,i2,\002" ") .\002)"; static char fmt_9995[] = "(1x,i6,\002: \002,a6,\002(\002,3(\002'\002,a1" ",\002',\002),i3,\002, AP, \002,\002X,\002,i2,\002) " " .\002)"; static char fmt_9992[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; alist al__1; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen), s_wsfe(cilist *), do_fio( integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, k, n, nc, ik, in, nk, ks, ix, ns, lx, laa, icd, lda, ict, icu; extern logical lse_(real *, real *, integer *); real err; char diag[1]; integer ldas; logical same; integer incx; logical full, null; char uplo[1], diags[1]; logical isame[13]; extern /* Subroutine */ int smake_(char *, char *, char *, integer *, integer *, real *, integer *, real *, integer *, integer *, integer *, logical *, real *, ftnlen, ftnlen, ftnlen); integer nargs; extern /* Subroutine */ int smvch_(char *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, real *, real *, real *, real *, real *, logical *, integer *, logical *, ftnlen); logical reset; integer incxs; char trans[1]; extern /* Subroutine */ int stbmv_(char *, char *, char *, integer *, integer *, real *, integer *, real *, integer *, ftnlen, ftnlen, ftnlen), stbsv_(char *, char *, char *, integer *, integer *, real *, integer *, real *, integer *, ftnlen, ftnlen, ftnlen); char uplos[1]; extern /* Subroutine */ int stpmv_(char *, char *, char *, integer *, real *, real *, integer *, ftnlen, ftnlen, ftnlen), strmv_(char *, char *, char *, integer *, real *, integer *, real *, integer *, ftnlen, ftnlen, ftnlen), stpsv_(char *, char *, char *, integer *, real *, real *, integer *, ftnlen, ftnlen, ftnlen), strsv_(char * , char *, char *, integer *, real *, integer *, real *, integer *, ftnlen, ftnlen, ftnlen); logical banded, packed; real errmax; extern logical lseres_(char *, char *, integer *, integer *, real *, real *, integer *, ftnlen, ftnlen); real transl; char transs[1]; /* Fortran I/O blocks */ static cilist io___239 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___240 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___241 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___242 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___243 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___244 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___245 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___248 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___250 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___251 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___252 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___253 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___254 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___255 = { 0, 0, 0, fmt_9995, 0 }; /* Tests STRMV, STBMV, STPMV, STRSV, STBSV and STPSV. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --kb; --inc; --z__; --g; --xt; --x; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --xs; --xx; /* Function Body */ /* .. Executable Statements .. */ full = *(unsigned char *)&sname[2] == 'R'; banded = *(unsigned char *)&sname[2] == 'B'; packed = *(unsigned char *)&sname[2] == 'P'; /* Define the number of arguments. */ if (full) { nargs = 8; } else if (banded) { nargs = 9; } else if (packed) { nargs = 7; } nc = 0; reset = TRUE_; errmax = 0.f; /* Set up zero vector for SMVCH. */ i__1 = *nmax; for (i__ = 1; i__ <= i__1; ++i__) { z__[i__] = 0.f; /* L10: */ } i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; if (banded) { nk = *nkb; } else { nk = 1; } i__2 = nk; for (ik = 1; ik <= i__2; ++ik) { if (banded) { k = kb[ik]; } else { k = n - 1; } /* Set LDA to 1 more than minimum value if room. */ if (banded) { lda = k + 1; } else { lda = n; } if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L100; } if (packed) { laa = n * (n + 1) / 2; } else { laa = lda * n; } null = n <= 0; for (icu = 1; icu <= 2; ++icu) { *(unsigned char *)uplo = *(unsigned char *)&ichu[icu - 1]; for (ict = 1; ict <= 3; ++ict) { *(unsigned char *)trans = *(unsigned char *)&icht[ict - 1] ; for (icd = 1; icd <= 2; ++icd) { *(unsigned char *)diag = *(unsigned char *)&ichd[icd - 1]; /* Generate the matrix A. */ transl = 0.f; smake_(sname + 1, uplo, diag, &n, &n, &a[a_offset], nmax, &aa[1], &lda, &k, &k, &reset, &transl, ( ftnlen)2, (ftnlen)1, (ftnlen)1); i__3 = *ninc; for (ix = 1; ix <= i__3; ++ix) { incx = inc[ix]; lx = abs(incx) * n; /* Generate the vector X. */ transl = .5f; i__4 = abs(incx); i__5 = n - 1; smake_("GE", " ", " ", &c__1, &n, &x[1], &c__1, & xx[1], &i__4, &c__0, &i__5, &reset, & transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); if (n > 1) { x[n / 2] = 0.f; xx[abs(incx) * (n / 2 - 1) + 1] = 0.f; } ++nc; /* Save every datum before calling the subroutine. */ *(unsigned char *)uplos = *(unsigned char *)uplo; *(unsigned char *)transs = *(unsigned char *) trans; *(unsigned char *)diags = *(unsigned char *)diag; ns = n; ks = k; i__4 = laa; for (i__ = 1; i__ <= i__4; ++i__) { as[i__] = aa[i__]; /* L20: */ } ldas = lda; i__4 = lx; for (i__ = 1; i__ <= i__4; ++i__) { xs[i__] = xx[i__]; /* L30: */ } incxs = incx; /* Call the subroutine. */ if (s_cmp(sname + 3, "MV", (ftnlen)2, (ftnlen)2) == 0) { if (full) { if (*trace) { io___239.ciunit = *ntra; s_wsfe(&io___239); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } strmv_(uplo, trans, diag, &n, &aa[1], & lda, &xx[1], &incx, (ftnlen)1, ( ftnlen)1, (ftnlen)1); } else if (banded) { if (*trace) { io___240.ciunit = *ntra; s_wsfe(&io___240); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } stbmv_(uplo, trans, diag, &n, &k, &aa[1], &lda, &xx[1], &incx, (ftnlen)1, ( ftnlen)1, (ftnlen)1); } else if (packed) { if (*trace) { io___241.ciunit = *ntra; s_wsfe(&io___241); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } stpmv_(uplo, trans, diag, &n, &aa[1], &xx[ 1], &incx, (ftnlen)1, (ftnlen)1, ( ftnlen)1); } } else if (s_cmp(sname + 3, "SV", (ftnlen)2, ( ftnlen)2) == 0) { if (full) { if (*trace) { io___242.ciunit = *ntra; s_wsfe(&io___242); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } strsv_(uplo, trans, diag, &n, &aa[1], & lda, &xx[1], &incx, (ftnlen)1, ( ftnlen)1, (ftnlen)1); } else if (banded) { if (*trace) { io___243.ciunit = *ntra; s_wsfe(&io___243); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } stbsv_(uplo, trans, diag, &n, &k, &aa[1], &lda, &xx[1], &incx, (ftnlen)1, ( ftnlen)1, (ftnlen)1); } else if (packed) { if (*trace) { io___244.ciunit = *ntra; s_wsfe(&io___244); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } stpsv_(uplo, trans, diag, &n, &aa[1], &xx[ 1], &incx, (ftnlen)1, (ftnlen)1, ( ftnlen)1); } } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___245.ciunit = *nout; s_wsfe(&io___245); e_wsfe(); *fatal = TRUE_; goto L120; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)uplo == *(unsigned char *)uplos; isame[1] = *(unsigned char *)trans == *(unsigned char *)transs; isame[2] = *(unsigned char *)diag == *(unsigned char *)diags; isame[3] = ns == n; if (full) { isame[4] = lse_(&as[1], &aa[1], &laa); isame[5] = ldas == lda; if (null) { isame[6] = lse_(&xs[1], &xx[1], &lx); } else { i__4 = abs(incx); isame[6] = lseres_("GE", " ", &c__1, &n, & xs[1], &xx[1], &i__4, (ftnlen)2, ( ftnlen)1); } isame[7] = incxs == incx; } else if (banded) { isame[4] = ks == k; isame[5] = lse_(&as[1], &aa[1], &laa); isame[6] = ldas == lda; if (null) { isame[7] = lse_(&xs[1], &xx[1], &lx); } else { i__4 = abs(incx); isame[7] = lseres_("GE", " ", &c__1, &n, & xs[1], &xx[1], &i__4, (ftnlen)2, ( ftnlen)1); } isame[8] = incxs == incx; } else if (packed) { isame[4] = lse_(&as[1], &aa[1], &laa); if (null) { isame[5] = lse_(&xs[1], &xx[1], &lx); } else { i__4 = abs(incx); isame[5] = lseres_("GE", " ", &c__1, &n, & xs[1], &xx[1], &i__4, (ftnlen)2, ( ftnlen)1); } isame[6] = incxs == incx; } /* If data was incorrectly changed, report and */ /* return. */ same = TRUE_; i__4 = nargs; for (i__ = 1; i__ <= i__4; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___248.ciunit = *nout; s_wsfe(&io___248); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L120; } if (! null) { if (s_cmp(sname + 3, "MV", (ftnlen)2, (ftnlen) 2) == 0) { /* Check the result. */ smvch_(trans, &n, &n, &c_b128, &a[ a_offset], nmax, &x[1], &incx, & c_b120, &z__[1], &incx, &xt[1], & g[1], &xx[1], eps, &err, fatal, nout, &c_true, (ftnlen)1); } else if (s_cmp(sname + 3, "SV", (ftnlen)2, ( ftnlen)2) == 0) { /* Compute approximation to original vector. */ i__4 = n; for (i__ = 1; i__ <= i__4; ++i__) { z__[i__] = xx[(i__ - 1) * abs(incx) + 1]; xx[(i__ - 1) * abs(incx) + 1] = x[i__] ; /* L50: */ } smvch_(trans, &n, &n, &c_b128, &a[ a_offset], nmax, &z__[1], &incx, & c_b120, &x[1], &incx, &xt[1], &g[ 1], &xx[1], eps, &err, fatal, nout, &c_false, (ftnlen)1); } errmax = max(errmax,err); /* If got really bad answer, report and return. */ if (*fatal) { goto L120; } } else { /* Avoid repeating tests with N.le.0. */ goto L110; } /* L60: */ } /* L70: */ } /* L80: */ } /* L90: */ } L100: ; } L110: ; } /* Report result. */ if (errmax < *thresh) { io___250.ciunit = *nout; s_wsfe(&io___250); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___251.ciunit = *nout; s_wsfe(&io___251); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(real)); e_wsfe(); } goto L130; L120: io___252.ciunit = *nout; s_wsfe(&io___252); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); if (full) { io___253.ciunit = *nout; s_wsfe(&io___253); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); e_wsfe(); } else if (banded) { io___254.ciunit = *nout; s_wsfe(&io___254); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); e_wsfe(); } else if (packed) { io___255.ciunit = *nout; s_wsfe(&io___255); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); e_wsfe(); } L130: return 0; /* End of SCHK3. */ } /* schk3_ */ /* Subroutine */ int schk4_(char *sname, real *eps, real *thresh, integer * nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nalf, real *alf, integer * ninc, integer *inc, integer *nmax, integer *incmax, real *a, real *aa, real *as, real *x, real *xx, real *xs, real *y, real *yy, real *ys, real *yt, real *g, real *z__, ftnlen sname_len) { /* Format strings */ static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002(\002,2(i3,\002,\002)" ",f4.1,\002, X,\002,i2,\002, Y,\002,i2,\002, A,\002,i3,\002) " " .\002)"; static char fmt_9993[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9995[] = "(\002 THESE ARE THE RESULTS FOR COLUMN" " \002,i3)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, j, m, n; real w[1]; integer ia, nc, nd, im, in, ms, ix, iy, ns, lx, ly, laa, lda; real als; extern logical lse_(real *, real *, integer *); real err; integer ldas; logical same; extern /* Subroutine */ int sger_(integer *, integer *, real *, real *, integer *, real *, integer *, real *, integer *); integer incx, incy; logical null; real alpha; logical isame[13]; extern /* Subroutine */ int smake_(char *, char *, char *, integer *, integer *, real *, integer *, real *, integer *, integer *, integer *, logical *, real *, ftnlen, ftnlen, ftnlen); integer nargs; extern /* Subroutine */ int smvch_(char *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, real *, real *, real *, real *, real *, logical *, integer *, logical *, ftnlen); logical reset; integer incxs, incys; real errmax; extern logical lseres_(char *, char *, integer *, integer *, real *, real *, integer *, ftnlen, ftnlen); real transl; /* Fortran I/O blocks */ static cilist io___284 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___285 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___288 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___292 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___293 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___294 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___295 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___296 = { 0, 0, 0, fmt_9994, 0 }; /* Tests SGER. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ /* Define the number of arguments. */ /* Parameter adjustments */ --idim; --alf; --inc; --z__; --g; --yt; --y; --x; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --ys; --yy; --xs; --xx; /* Function Body */ nargs = 9; nc = 0; reset = TRUE_; errmax = 0.f; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; nd = n / 2 + 1; for (im = 1; im <= 2; ++im) { if (im == 1) { /* Computing MAX */ i__2 = n - nd; m = max(i__2,0); } if (im == 2) { /* Computing MIN */ i__2 = n + nd; m = min(i__2,*nmax); } /* Set LDA to 1 more than minimum value if room. */ lda = m; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L110; } laa = lda * n; null = n <= 0 || m <= 0; i__2 = *ninc; for (ix = 1; ix <= i__2; ++ix) { incx = inc[ix]; lx = abs(incx) * m; /* Generate the vector X. */ transl = .5f; i__3 = abs(incx); i__4 = m - 1; smake_("GE", " ", " ", &c__1, &m, &x[1], &c__1, &xx[1], &i__3, &c__0, &i__4, &reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); if (m > 1) { x[m / 2] = 0.f; xx[abs(incx) * (m / 2 - 1) + 1] = 0.f; } i__3 = *ninc; for (iy = 1; iy <= i__3; ++iy) { incy = inc[iy]; ly = abs(incy) * n; /* Generate the vector Y. */ transl = 0.f; i__4 = abs(incy); i__5 = n - 1; smake_("GE", " ", " ", &c__1, &n, &y[1], &c__1, &yy[1], & i__4, &c__0, &i__5, &reset, &transl, (ftnlen)2, ( ftnlen)1, (ftnlen)1); if (n > 1) { y[n / 2] = 0.f; yy[abs(incy) * (n / 2 - 1) + 1] = 0.f; } i__4 = *nalf; for (ia = 1; ia <= i__4; ++ia) { alpha = alf[ia]; /* Generate the matrix A. */ transl = 0.f; i__5 = m - 1; i__6 = n - 1; smake_(sname + 1, " ", " ", &m, &n, &a[a_offset], nmax, &aa[1], &lda, &i__5, &i__6, &reset, & transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the subroutine. */ ms = m; ns = n; als = alpha; i__5 = laa; for (i__ = 1; i__ <= i__5; ++i__) { as[i__] = aa[i__]; /* L10: */ } ldas = lda; i__5 = lx; for (i__ = 1; i__ <= i__5; ++i__) { xs[i__] = xx[i__]; /* L20: */ } incxs = incx; i__5 = ly; for (i__ = 1; i__ <= i__5; ++i__) { ys[i__] = yy[i__]; /* L30: */ } incys = incy; /* Call the subroutine. */ if (*trace) { io___284.ciunit = *ntra; s_wsfe(&io___284); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer) ); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)) ; do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)) ; do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(real) ); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof( integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } sger_(&m, &n, &alpha, &xx[1], &incx, &yy[1], &incy, & aa[1], &lda); /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___285.ciunit = *nout; s_wsfe(&io___285); e_wsfe(); *fatal = TRUE_; goto L140; } /* See what data changed inside subroutine. */ isame[0] = ms == m; isame[1] = ns == n; isame[2] = als == alpha; isame[3] = lse_(&xs[1], &xx[1], &lx); isame[4] = incxs == incx; isame[5] = lse_(&ys[1], &yy[1], &ly); isame[6] = incys == incy; if (null) { isame[7] = lse_(&as[1], &aa[1], &laa); } else { isame[7] = lseres_("GE", " ", &m, &n, &as[1], &aa[ 1], &lda, (ftnlen)2, (ftnlen)1); } isame[8] = ldas == lda; /* If data was incorrectly changed, report and return. */ same = TRUE_; i__5 = nargs; for (i__ = 1; i__ <= i__5; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___288.ciunit = *nout; s_wsfe(&io___288); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof( integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L140; } if (! null) { /* Check the result column by column. */ if (incx > 0) { i__5 = m; for (i__ = 1; i__ <= i__5; ++i__) { z__[i__] = x[i__]; /* L50: */ } } else { i__5 = m; for (i__ = 1; i__ <= i__5; ++i__) { z__[i__] = x[m - i__ + 1]; /* L60: */ } } i__5 = n; for (j = 1; j <= i__5; ++j) { if (incy > 0) { w[0] = y[j]; } else { w[0] = y[n - j + 1]; } smvch_("N", &m, &c__1, &alpha, &z__[1], nmax, w, &c__1, &c_b128, &a[j * a_dim1 + 1], &c__1, &yt[1], &g[1], &aa[(j - 1) * lda + 1], eps, &err, fatal, nout, & c_true, (ftnlen)1); errmax = max(errmax,err); /* If got really bad answer, report and return. */ if (*fatal) { goto L130; } /* L70: */ } } else { /* Avoid repeating tests with M.le.0 or N.le.0. */ goto L110; } /* L80: */ } /* L90: */ } /* L100: */ } L110: ; } /* L120: */ } /* Report result. */ if (errmax < *thresh) { io___292.ciunit = *nout; s_wsfe(&io___292); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___293.ciunit = *nout; s_wsfe(&io___293); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(real)); e_wsfe(); } goto L150; L130: io___294.ciunit = *nout; s_wsfe(&io___294); do_fio(&c__1, (char *)&j, (ftnlen)sizeof(integer)); e_wsfe(); L140: io___295.ciunit = *nout; s_wsfe(&io___295); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); io___296.ciunit = *nout; s_wsfe(&io___296); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); e_wsfe(); L150: return 0; /* End of SCHK4. */ } /* schk4_ */ /* Subroutine */ int schk5_(char *sname, real *eps, real *thresh, integer * nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nalf, real *alf, integer * ninc, integer *inc, integer *nmax, integer *incmax, real *a, real *aa, real *as, real *x, real *xx, real *xs, real *y, real *yy, real *ys, real *yt, real *g, real *z__, ftnlen sname_len) { /* Initialized data */ static char ich[2] = "UL"; /* Format strings */ static char fmt_9993[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "i3,\002,\002,f4.1,\002, X,\002,i2,\002, A,\002,i3,\002) " " .\002)"; static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "i3,\002,\002,f4.1,\002, X,\002,i2,\002, AP) " " .\002)"; static char fmt_9992[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9995[] = "(\002 THESE ARE THE RESULTS FOR COLUMN" " \002,i3)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, j, n; real w[1]; integer ia, ja, ic, nc, jj, lj, in, ix, ns, lx, laa, lda; real als; extern logical lse_(real *, real *, integer *); real err; integer ldas; logical same; integer incx; logical full, null; char uplo[1]; extern /* Subroutine */ int sspr_(char *, integer *, real *, real *, integer *, real *, ftnlen), ssyr_(char *, integer *, real *, real *, integer *, real *, integer *, ftnlen); real alpha; logical isame[13]; extern /* Subroutine */ int smake_(char *, char *, char *, integer *, integer *, real *, integer *, real *, integer *, integer *, integer *, logical *, real *, ftnlen, ftnlen, ftnlen); integer nargs; extern /* Subroutine */ int smvch_(char *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, real *, real *, real *, real *, real *, logical *, integer *, logical *, ftnlen); logical reset; integer incxs; logical upper; char uplos[1]; logical packed; real errmax; extern logical lseres_(char *, char *, integer *, integer *, real *, real *, integer *, ftnlen, ftnlen); real transl; /* Fortran I/O blocks */ static cilist io___324 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___325 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___326 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___329 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___336 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___337 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___338 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___339 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___340 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___341 = { 0, 0, 0, fmt_9994, 0 }; /* Tests SSYR and SSPR. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; --inc; --z__; --g; --yt; --y; --x; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --ys; --yy; --xs; --xx; /* Function Body */ /* .. Executable Statements .. */ full = *(unsigned char *)&sname[2] == 'Y'; packed = *(unsigned char *)&sname[2] == 'P'; /* Define the number of arguments. */ if (full) { nargs = 7; } else if (packed) { nargs = 6; } nc = 0; reset = TRUE_; errmax = 0.f; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; /* Set LDA to 1 more than minimum value if room. */ lda = n; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L100; } if (packed) { laa = n * (n + 1) / 2; } else { laa = lda * n; } for (ic = 1; ic <= 2; ++ic) { *(unsigned char *)uplo = *(unsigned char *)&ich[ic - 1]; upper = *(unsigned char *)uplo == 'U'; i__2 = *ninc; for (ix = 1; ix <= i__2; ++ix) { incx = inc[ix]; lx = abs(incx) * n; /* Generate the vector X. */ transl = .5f; i__3 = abs(incx); i__4 = n - 1; smake_("GE", " ", " ", &c__1, &n, &x[1], &c__1, &xx[1], &i__3, &c__0, &i__4, &reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); if (n > 1) { x[n / 2] = 0.f; xx[abs(incx) * (n / 2 - 1) + 1] = 0.f; } i__3 = *nalf; for (ia = 1; ia <= i__3; ++ia) { alpha = alf[ia]; null = n <= 0 || alpha == 0.f; /* Generate the matrix A. */ transl = 0.f; i__4 = n - 1; i__5 = n - 1; smake_(sname + 1, uplo, " ", &n, &n, &a[a_offset], nmax, & aa[1], &lda, &i__4, &i__5, &reset, &transl, ( ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the subroutine. */ *(unsigned char *)uplos = *(unsigned char *)uplo; ns = n; als = alpha; i__4 = laa; for (i__ = 1; i__ <= i__4; ++i__) { as[i__] = aa[i__]; /* L10: */ } ldas = lda; i__4 = lx; for (i__ = 1; i__ <= i__4; ++i__) { xs[i__] = xx[i__]; /* L20: */ } incxs = incx; /* Call the subroutine. */ if (full) { if (*trace) { io___324.ciunit = *ntra; s_wsfe(&io___324); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer) ); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)) ; do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(real) ); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof( integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } ssyr_(uplo, &n, &alpha, &xx[1], &incx, &aa[1], &lda, ( ftnlen)1); } else if (packed) { if (*trace) { io___325.ciunit = *ntra; s_wsfe(&io___325); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer) ); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)) ; do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(real) ); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof( integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } sspr_(uplo, &n, &alpha, &xx[1], &incx, &aa[1], ( ftnlen)1); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___326.ciunit = *nout; s_wsfe(&io___326); e_wsfe(); *fatal = TRUE_; goto L120; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)uplo == *(unsigned char *) uplos; isame[1] = ns == n; isame[2] = als == alpha; isame[3] = lse_(&xs[1], &xx[1], &lx); isame[4] = incxs == incx; if (null) { isame[5] = lse_(&as[1], &aa[1], &laa); } else { isame[5] = lseres_(sname + 1, uplo, &n, &n, &as[1], & aa[1], &lda, (ftnlen)2, (ftnlen)1); } if (! packed) { isame[6] = ldas == lda; } /* If data was incorrectly changed, report and return. */ same = TRUE_; i__4 = nargs; for (i__ = 1; i__ <= i__4; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___329.ciunit = *nout; s_wsfe(&io___329); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof( integer)); e_wsfe(); } /* L30: */ } if (! same) { *fatal = TRUE_; goto L120; } if (! null) { /* Check the result column by column. */ if (incx > 0) { i__4 = n; for (i__ = 1; i__ <= i__4; ++i__) { z__[i__] = x[i__]; /* L40: */ } } else { i__4 = n; for (i__ = 1; i__ <= i__4; ++i__) { z__[i__] = x[n - i__ + 1]; /* L50: */ } } ja = 1; i__4 = n; for (j = 1; j <= i__4; ++j) { w[0] = z__[j]; if (upper) { jj = 1; lj = j; } else { jj = j; lj = n - j + 1; } smvch_("N", &lj, &c__1, &alpha, &z__[jj], &lj, w, &c__1, &c_b128, &a[jj + j * a_dim1], & c__1, &yt[1], &g[1], &aa[ja], eps, &err, fatal, nout, &c_true, (ftnlen)1); if (full) { if (upper) { ja += lda; } else { ja = ja + lda + 1; } } else { ja += lj; } errmax = max(errmax,err); /* If got really bad answer, report and return. */ if (*fatal) { goto L110; } /* L60: */ } } else { /* Avoid repeating tests if N.le.0. */ if (n <= 0) { goto L100; } } /* L70: */ } /* L80: */ } /* L90: */ } L100: ; } /* Report result. */ if (errmax < *thresh) { io___336.ciunit = *nout; s_wsfe(&io___336); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___337.ciunit = *nout; s_wsfe(&io___337); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(real)); e_wsfe(); } goto L130; L110: io___338.ciunit = *nout; s_wsfe(&io___338); do_fio(&c__1, (char *)&j, (ftnlen)sizeof(integer)); e_wsfe(); L120: io___339.ciunit = *nout; s_wsfe(&io___339); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); if (full) { io___340.ciunit = *nout; s_wsfe(&io___340); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); e_wsfe(); } else if (packed) { io___341.ciunit = *nout; s_wsfe(&io___341); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); e_wsfe(); } L130: return 0; /* End of SCHK5. */ } /* schk5_ */ /* Subroutine */ int schk6_(char *sname, real *eps, real *thresh, integer * nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nalf, real *alf, integer * ninc, integer *inc, integer *nmax, integer *incmax, real *a, real *aa, real *as, real *x, real *xx, real *xs, real *y, real *yy, real *ys, real *yt, real *g, real *z__, ftnlen sname_len) { /* Initialized data */ static char ich[2] = "UL"; /* Format strings */ static char fmt_9993[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "i3,\002,\002,f4.1,\002, X,\002,i2,\002, Y,\002,i2,\002, A,\002,i" "3,\002) .\002)"; static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "i3,\002,\002,f4.1,\002, X,\002,i2,\002, Y,\002,i2,\002, AP) " " .\002)"; static char fmt_9992[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9995[] = "(\002 THESE ARE THE RESULTS FOR COLUMN" " \002,i3)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5, i__6; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, j, n; real w[2]; integer ia, ja, ic, nc, jj, lj, in, ix, iy, ns, lx, ly, laa, lda; real als; extern logical lse_(real *, real *, integer *); real err; integer ldas; logical same; integer incx, incy; logical full, null; char uplo[1]; extern /* Subroutine */ int sspr2_(char *, integer *, real *, real *, integer *, real *, integer *, real *, ftnlen), ssyr2_(char *, integer *, real *, real *, integer *, real *, integer *, real *, integer *, ftnlen); real alpha; logical isame[13]; extern /* Subroutine */ int smake_(char *, char *, char *, integer *, integer *, real *, integer *, real *, integer *, integer *, integer *, logical *, real *, ftnlen, ftnlen, ftnlen); integer nargs; extern /* Subroutine */ int smvch_(char *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, real *, real *, real *, real *, real *, logical *, integer *, logical *, ftnlen); logical reset; integer incxs, incys; logical upper; char uplos[1]; logical packed; real errmax; extern logical lseres_(char *, char *, integer *, integer *, real *, real *, integer *, ftnlen, ftnlen); real transl; /* Fortran I/O blocks */ static cilist io___373 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___374 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___375 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___378 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___385 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___386 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___387 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___388 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___389 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___390 = { 0, 0, 0, fmt_9994, 0 }; /* Tests SSYR2 and SSPR2. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; --inc; z_dim1 = *nmax; z_offset = 1 + z_dim1; z__ -= z_offset; --g; --yt; --y; --x; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --ys; --yy; --xs; --xx; /* Function Body */ /* .. Executable Statements .. */ full = *(unsigned char *)&sname[2] == 'Y'; packed = *(unsigned char *)&sname[2] == 'P'; /* Define the number of arguments. */ if (full) { nargs = 9; } else if (packed) { nargs = 8; } nc = 0; reset = TRUE_; errmax = 0.f; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; /* Set LDA to 1 more than minimum value if room. */ lda = n; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L140; } if (packed) { laa = n * (n + 1) / 2; } else { laa = lda * n; } for (ic = 1; ic <= 2; ++ic) { *(unsigned char *)uplo = *(unsigned char *)&ich[ic - 1]; upper = *(unsigned char *)uplo == 'U'; i__2 = *ninc; for (ix = 1; ix <= i__2; ++ix) { incx = inc[ix]; lx = abs(incx) * n; /* Generate the vector X. */ transl = .5f; i__3 = abs(incx); i__4 = n - 1; smake_("GE", " ", " ", &c__1, &n, &x[1], &c__1, &xx[1], &i__3, &c__0, &i__4, &reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); if (n > 1) { x[n / 2] = 0.f; xx[abs(incx) * (n / 2 - 1) + 1] = 0.f; } i__3 = *ninc; for (iy = 1; iy <= i__3; ++iy) { incy = inc[iy]; ly = abs(incy) * n; /* Generate the vector Y. */ transl = 0.f; i__4 = abs(incy); i__5 = n - 1; smake_("GE", " ", " ", &c__1, &n, &y[1], &c__1, &yy[1], & i__4, &c__0, &i__5, &reset, &transl, (ftnlen)2, ( ftnlen)1, (ftnlen)1); if (n > 1) { y[n / 2] = 0.f; yy[abs(incy) * (n / 2 - 1) + 1] = 0.f; } i__4 = *nalf; for (ia = 1; ia <= i__4; ++ia) { alpha = alf[ia]; null = n <= 0 || alpha == 0.f; /* Generate the matrix A. */ transl = 0.f; i__5 = n - 1; i__6 = n - 1; smake_(sname + 1, uplo, " ", &n, &n, &a[a_offset], nmax, &aa[1], &lda, &i__5, &i__6, &reset, & transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the subroutine. */ *(unsigned char *)uplos = *(unsigned char *)uplo; ns = n; als = alpha; i__5 = laa; for (i__ = 1; i__ <= i__5; ++i__) { as[i__] = aa[i__]; /* L10: */ } ldas = lda; i__5 = lx; for (i__ = 1; i__ <= i__5; ++i__) { xs[i__] = xx[i__]; /* L20: */ } incxs = incx; i__5 = ly; for (i__ = 1; i__ <= i__5; ++i__) { ys[i__] = yy[i__]; /* L30: */ } incys = incy; /* Call the subroutine. */ if (full) { if (*trace) { io___373.ciunit = *ntra; s_wsfe(&io___373); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof( real)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof( integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } ssyr2_(uplo, &n, &alpha, &xx[1], &incx, &yy[1], & incy, &aa[1], &lda, (ftnlen)1); } else if (packed) { if (*trace) { io___374.ciunit = *ntra; s_wsfe(&io___374); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof( real)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof( integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } sspr2_(uplo, &n, &alpha, &xx[1], &incx, &yy[1], & incy, &aa[1], (ftnlen)1); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___375.ciunit = *nout; s_wsfe(&io___375); e_wsfe(); *fatal = TRUE_; goto L160; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)uplo == *(unsigned char * )uplos; isame[1] = ns == n; isame[2] = als == alpha; isame[3] = lse_(&xs[1], &xx[1], &lx); isame[4] = incxs == incx; isame[5] = lse_(&ys[1], &yy[1], &ly); isame[6] = incys == incy; if (null) { isame[7] = lse_(&as[1], &aa[1], &laa); } else { isame[7] = lseres_(sname + 1, uplo, &n, &n, &as[1] , &aa[1], &lda, (ftnlen)2, (ftnlen)1); } if (! packed) { isame[8] = ldas == lda; } /* If data was incorrectly changed, report and return. */ same = TRUE_; i__5 = nargs; for (i__ = 1; i__ <= i__5; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___378.ciunit = *nout; s_wsfe(&io___378); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof( integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L160; } if (! null) { /* Check the result column by column. */ if (incx > 0) { i__5 = n; for (i__ = 1; i__ <= i__5; ++i__) { z__[i__ + z_dim1] = x[i__]; /* L50: */ } } else { i__5 = n; for (i__ = 1; i__ <= i__5; ++i__) { z__[i__ + z_dim1] = x[n - i__ + 1]; /* L60: */ } } if (incy > 0) { i__5 = n; for (i__ = 1; i__ <= i__5; ++i__) { z__[i__ + (z_dim1 << 1)] = y[i__]; /* L70: */ } } else { i__5 = n; for (i__ = 1; i__ <= i__5; ++i__) { z__[i__ + (z_dim1 << 1)] = y[n - i__ + 1]; /* L80: */ } } ja = 1; i__5 = n; for (j = 1; j <= i__5; ++j) { w[0] = z__[j + (z_dim1 << 1)]; w[1] = z__[j + z_dim1]; if (upper) { jj = 1; lj = j; } else { jj = j; lj = n - j + 1; } smvch_("N", &lj, &c__2, &alpha, &z__[jj + z_dim1], nmax, w, &c__1, &c_b128, &a[ jj + j * a_dim1], &c__1, &yt[1], &g[1] , &aa[ja], eps, &err, fatal, nout, & c_true, (ftnlen)1); if (full) { if (upper) { ja += lda; } else { ja = ja + lda + 1; } } else { ja += lj; } errmax = max(errmax,err); /* If got really bad answer, report and return. */ if (*fatal) { goto L150; } /* L90: */ } } else { /* Avoid repeating tests with N.le.0. */ if (n <= 0) { goto L140; } } /* L100: */ } /* L110: */ } /* L120: */ } /* L130: */ } L140: ; } /* Report result. */ if (errmax < *thresh) { io___385.ciunit = *nout; s_wsfe(&io___385); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___386.ciunit = *nout; s_wsfe(&io___386); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(real)); e_wsfe(); } goto L170; L150: io___387.ciunit = *nout; s_wsfe(&io___387); do_fio(&c__1, (char *)&j, (ftnlen)sizeof(integer)); e_wsfe(); L160: io___388.ciunit = *nout; s_wsfe(&io___388); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); if (full) { io___389.ciunit = *nout; s_wsfe(&io___389); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); e_wsfe(); } else if (packed) { io___390.ciunit = *nout; s_wsfe(&io___390); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); e_wsfe(); } L170: return 0; /* End of SCHK6. */ } /* schk6_ */ /* Subroutine */ int schke_(integer *isnum, char *srnamt, integer *nout, ftnlen srnamt_len) { /* Format strings */ static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE TESTS OF ERROR-E" "XITS\002)"; static char fmt_9998[] = "(\002 ******* \002,a6,\002 FAILED THE TESTS OF" " ERROR-EXITS *****\002,\002**\002)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Local variables */ real a[1] /* was [1][1] */, x[1], y[1], beta; extern /* Subroutine */ int sger_(integer *, integer *, real *, real *, integer *, real *, integer *, real *, integer *), sspr_(char *, integer *, real *, real *, integer *, real *, ftnlen), ssyr_(char *, integer *, real *, real *, integer *, real *, integer *, ftnlen), sspr2_(char *, integer *, real *, real *, integer *, real *, integer *, real *, ftnlen), ssyr2_(char *, integer *, real *, real *, integer *, real *, integer *, real *, integer *, ftnlen); real alpha; extern /* Subroutine */ int sgbmv_(char *, integer *, integer *, integer * , integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, ftnlen), sgemv_(char *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, ftnlen), ssbmv_(char *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, ftnlen), stbmv_(char *, char *, char *, integer *, integer *, real *, integer *, real *, integer *, ftnlen, ftnlen, ftnlen), stbsv_(char *, char *, char *, integer *, integer *, real *, integer *, real *, integer *, ftnlen, ftnlen, ftnlen), sspmv_( char *, integer *, real *, real *, real *, integer *, real *, real *, integer *, ftnlen), stpmv_(char *, char *, char *, integer *, real *, real *, integer *, ftnlen, ftnlen, ftnlen), strmv_(char *, char *, char *, integer *, real *, integer *, real *, integer *, ftnlen, ftnlen, ftnlen), stpsv_(char *, char *, char *, integer *, real *, real *, integer *, ftnlen, ftnlen, ftnlen), ssymv_(char *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, ftnlen), strsv_( char *, char *, char *, integer *, real *, integer *, real *, integer *, ftnlen, ftnlen, ftnlen), chkxer_(char *, integer *, integer *, logical *, logical *, ftnlen); /* Fortran I/O blocks */ static cilist io___396 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___397 = { 0, 0, 0, fmt_9998, 0 }; /* Tests the error exits from the Level 2 Blas. */ /* Requires a special version of the error-handling routine XERBLA. */ /* ALPHA, BETA, A, X and Y should not need to be defined. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Subroutines .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ /* OK is set to .FALSE. by the special version of XERBLA or by CHKXER */ /* if anything is wrong. */ infoc_1.ok = TRUE_; /* LERR is set to .TRUE. by the special version of XERBLA each time */ /* it is called, and is then tested and re-set by CHKXER. */ infoc_1.lerr = FALSE_; switch (*isnum) { case 1: goto L10; case 2: goto L20; case 3: goto L30; case 4: goto L40; case 5: goto L50; case 6: goto L60; case 7: goto L70; case 8: goto L80; case 9: goto L90; case 10: goto L100; case 11: goto L110; case 12: goto L120; case 13: goto L130; case 14: goto L140; case 15: goto L150; case 16: goto L160; } L10: infoc_1.infot = 1; sgemv_("/", &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; sgemv_("N", &c_n1, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; sgemv_("N", &c__0, &c_n1, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; sgemv_("N", &c__2, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; sgemv_("N", &c__0, &c__0, &alpha, a, &c__1, x, &c__0, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; sgemv_("N", &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__0, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L20: infoc_1.infot = 1; sgbmv_("/", &c__0, &c__0, &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; sgbmv_("N", &c_n1, &c__0, &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; sgbmv_("N", &c__0, &c_n1, &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; sgbmv_("N", &c__0, &c__0, &c_n1, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; sgbmv_("N", &c__2, &c__0, &c__0, &c_n1, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; sgbmv_("N", &c__0, &c__0, &c__1, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; sgbmv_("N", &c__0, &c__0, &c__0, &c__0, &alpha, a, &c__1, x, &c__0, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; sgbmv_("N", &c__0, &c__0, &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__0, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L30: infoc_1.infot = 1; ssymv_("/", &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; ssymv_("U", &c_n1, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ssymv_("U", &c__2, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; ssymv_("U", &c__0, &alpha, a, &c__1, x, &c__0, &beta, y, &c__1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; ssymv_("U", &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__0, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L40: infoc_1.infot = 1; ssbmv_("/", &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; ssbmv_("U", &c_n1, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ssbmv_("U", &c__0, &c_n1, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ssbmv_("U", &c__0, &c__1, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; ssbmv_("U", &c__0, &c__0, &alpha, a, &c__1, x, &c__0, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ssbmv_("U", &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__0, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L50: infoc_1.infot = 1; sspmv_("/", &c__0, &alpha, a, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; sspmv_("U", &c_n1, &alpha, a, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; sspmv_("U", &c__0, &alpha, a, x, &c__0, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; sspmv_("U", &c__0, &alpha, a, x, &c__1, &beta, y, &c__0, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L60: infoc_1.infot = 1; strmv_("/", "N", "N", &c__0, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; strmv_("U", "/", "N", &c__0, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; strmv_("U", "N", "/", &c__0, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; strmv_("U", "N", "N", &c_n1, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; strmv_("U", "N", "N", &c__2, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; strmv_("U", "N", "N", &c__0, a, &c__1, x, &c__0, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L70: infoc_1.infot = 1; stbmv_("/", "N", "N", &c__0, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; stbmv_("U", "/", "N", &c__0, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; stbmv_("U", "N", "/", &c__0, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; stbmv_("U", "N", "N", &c_n1, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; stbmv_("U", "N", "N", &c__0, &c_n1, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; stbmv_("U", "N", "N", &c__0, &c__1, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; stbmv_("U", "N", "N", &c__0, &c__0, a, &c__1, x, &c__0, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L80: infoc_1.infot = 1; stpmv_("/", "N", "N", &c__0, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; stpmv_("U", "/", "N", &c__0, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; stpmv_("U", "N", "/", &c__0, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; stpmv_("U", "N", "N", &c_n1, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; stpmv_("U", "N", "N", &c__0, a, x, &c__0, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L90: infoc_1.infot = 1; strsv_("/", "N", "N", &c__0, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; strsv_("U", "/", "N", &c__0, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; strsv_("U", "N", "/", &c__0, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; strsv_("U", "N", "N", &c_n1, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; strsv_("U", "N", "N", &c__2, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; strsv_("U", "N", "N", &c__0, a, &c__1, x, &c__0, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L100: infoc_1.infot = 1; stbsv_("/", "N", "N", &c__0, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; stbsv_("U", "/", "N", &c__0, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; stbsv_("U", "N", "/", &c__0, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; stbsv_("U", "N", "N", &c_n1, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; stbsv_("U", "N", "N", &c__0, &c_n1, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; stbsv_("U", "N", "N", &c__0, &c__1, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; stbsv_("U", "N", "N", &c__0, &c__0, a, &c__1, x, &c__0, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L110: infoc_1.infot = 1; stpsv_("/", "N", "N", &c__0, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; stpsv_("U", "/", "N", &c__0, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; stpsv_("U", "N", "/", &c__0, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; stpsv_("U", "N", "N", &c_n1, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; stpsv_("U", "N", "N", &c__0, a, x, &c__0, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L120: infoc_1.infot = 1; sger_(&c_n1, &c__0, &alpha, x, &c__1, y, &c__1, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; sger_(&c__0, &c_n1, &alpha, x, &c__1, y, &c__1, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; sger_(&c__0, &c__0, &alpha, x, &c__0, y, &c__1, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; sger_(&c__0, &c__0, &alpha, x, &c__1, y, &c__0, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; sger_(&c__2, &c__0, &alpha, x, &c__1, y, &c__1, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L130: infoc_1.infot = 1; ssyr_("/", &c__0, &alpha, x, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; ssyr_("U", &c_n1, &alpha, x, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ssyr_("U", &c__0, &alpha, x, &c__0, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; ssyr_("U", &c__2, &alpha, x, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L140: infoc_1.infot = 1; sspr_("/", &c__0, &alpha, x, &c__1, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; sspr_("U", &c_n1, &alpha, x, &c__1, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; sspr_("U", &c__0, &alpha, x, &c__0, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L150: infoc_1.infot = 1; ssyr2_("/", &c__0, &alpha, x, &c__1, y, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; ssyr2_("U", &c_n1, &alpha, x, &c__1, y, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ssyr2_("U", &c__0, &alpha, x, &c__0, y, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; ssyr2_("U", &c__0, &alpha, x, &c__1, y, &c__0, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ssyr2_("U", &c__2, &alpha, x, &c__1, y, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L170; L160: infoc_1.infot = 1; sspr2_("/", &c__0, &alpha, x, &c__1, y, &c__1, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; sspr2_("U", &c_n1, &alpha, x, &c__1, y, &c__1, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; sspr2_("U", &c__0, &alpha, x, &c__0, y, &c__1, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; sspr2_("U", &c__0, &alpha, x, &c__1, y, &c__0, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); L170: if (infoc_1.ok) { io___396.ciunit = *nout; s_wsfe(&io___396); do_fio(&c__1, srnamt, (ftnlen)6); e_wsfe(); } else { io___397.ciunit = *nout; s_wsfe(&io___397); do_fio(&c__1, srnamt, (ftnlen)6); e_wsfe(); } return 0; /* End of SCHKE. */ } /* schke_ */ /* Subroutine */ int smake_(char *type__, char *uplo, char *diag, integer *m, integer *n, real *a, integer *nmax, real *aa, integer *lda, integer * kl, integer *ku, logical *reset, real *transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen); /* Local variables */ integer i__, j, i1, i2, i3, kk; logical gen, tri, sym; integer ibeg, iend; extern real sbeg_(logical *); integer ioff; logical unit, lower, upper; /* Generates values for an M by N matrix A within the bandwidth */ /* defined by KL and KU. */ /* Stores the values in the array AA in the data structure required */ /* by the routine, with unwanted elements set to rogue value. */ /* TYPE is 'GE', 'GB', 'SY', 'SB', 'SP', 'TR', 'TB' OR 'TP'. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. Intrinsic Functions .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --aa; /* Function Body */ gen = *(unsigned char *)type__ == 'G'; sym = *(unsigned char *)type__ == 'S'; tri = *(unsigned char *)type__ == 'T'; upper = (sym || tri) && *(unsigned char *)uplo == 'U'; lower = (sym || tri) && *(unsigned char *)uplo == 'L'; unit = tri && *(unsigned char *)diag == 'U'; /* Generate data in array A. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { if (gen || upper && i__ <= j || lower && i__ >= j) { if (i__ <= j && j - i__ <= *ku || i__ >= j && i__ - j <= *kl) { a[i__ + j * a_dim1] = sbeg_(reset) + *transl; } else { a[i__ + j * a_dim1] = 0.f; } if (i__ != j) { if (sym) { a[j + i__ * a_dim1] = a[i__ + j * a_dim1]; } else if (tri) { a[j + i__ * a_dim1] = 0.f; } } } /* L10: */ } if (tri) { a[j + j * a_dim1] += 1.f; } if (unit) { a[j + j * a_dim1] = 1.f; } /* L20: */ } /* Store elements in array AS in data structure required by routine. */ if (s_cmp(type__, "GE", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { aa[i__ + (j - 1) * *lda] = a[i__ + j * a_dim1]; /* L30: */ } i__2 = *lda; for (i__ = *m + 1; i__ <= i__2; ++i__) { aa[i__ + (j - 1) * *lda] = -1e10f; /* L40: */ } /* L50: */ } } else if (s_cmp(type__, "GB", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *ku + 1 - j; for (i1 = 1; i1 <= i__2; ++i1) { aa[i1 + (j - 1) * *lda] = -1e10f; /* L60: */ } /* Computing MIN */ i__3 = *kl + *ku + 1, i__4 = *ku + 1 + *m - j; i__2 = min(i__3,i__4); for (i2 = i1; i2 <= i__2; ++i2) { aa[i2 + (j - 1) * *lda] = a[i2 + j - *ku - 1 + j * a_dim1]; /* L70: */ } i__2 = *lda; for (i3 = i2; i3 <= i__2; ++i3) { aa[i3 + (j - 1) * *lda] = -1e10f; /* L80: */ } /* L90: */ } } else if (s_cmp(type__, "SY", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(type__, "TR", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (upper) { ibeg = 1; if (unit) { iend = j - 1; } else { iend = j; } } else { if (unit) { ibeg = j + 1; } else { ibeg = j; } iend = *n; } i__2 = ibeg - 1; for (i__ = 1; i__ <= i__2; ++i__) { aa[i__ + (j - 1) * *lda] = -1e10f; /* L100: */ } i__2 = iend; for (i__ = ibeg; i__ <= i__2; ++i__) { aa[i__ + (j - 1) * *lda] = a[i__ + j * a_dim1]; /* L110: */ } i__2 = *lda; for (i__ = iend + 1; i__ <= i__2; ++i__) { aa[i__ + (j - 1) * *lda] = -1e10f; /* L120: */ } /* L130: */ } } else if (s_cmp(type__, "SB", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(type__, "TB", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (upper) { kk = *kl + 1; /* Computing MAX */ i__2 = 1, i__3 = *kl + 2 - j; ibeg = max(i__2,i__3); if (unit) { iend = *kl; } else { iend = *kl + 1; } } else { kk = 1; if (unit) { ibeg = 2; } else { ibeg = 1; } /* Computing MIN */ i__2 = *kl + 1, i__3 = *m + 1 - j; iend = min(i__2,i__3); } i__2 = ibeg - 1; for (i__ = 1; i__ <= i__2; ++i__) { aa[i__ + (j - 1) * *lda] = -1e10f; /* L140: */ } i__2 = iend; for (i__ = ibeg; i__ <= i__2; ++i__) { aa[i__ + (j - 1) * *lda] = a[i__ + j - kk + j * a_dim1]; /* L150: */ } i__2 = *lda; for (i__ = iend + 1; i__ <= i__2; ++i__) { aa[i__ + (j - 1) * *lda] = -1e10f; /* L160: */ } /* L170: */ } } else if (s_cmp(type__, "SP", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(type__, "TP", (ftnlen)2, (ftnlen)2) == 0) { ioff = 0; i__1 = *n; for (j = 1; j <= i__1; ++j) { if (upper) { ibeg = 1; iend = j; } else { ibeg = j; iend = *n; } i__2 = iend; for (i__ = ibeg; i__ <= i__2; ++i__) { ++ioff; aa[ioff] = a[i__ + j * a_dim1]; if (i__ == j) { if (unit) { aa[ioff] = -1e10f; } } /* L180: */ } /* L190: */ } } return 0; /* End of SMAKE. */ } /* smake_ */ /* Subroutine */ int smvch_(char *trans, integer *m, integer *n, real *alpha, real *a, integer *nmax, real *x, integer *incx, real *beta, real *y, integer *incy, real *yt, real *g, real *yy, real *eps, real *err, logical *fatal, integer *nout, logical *mv, ftnlen trans_len) { /* Format strings */ static char fmt_9999[] = "(\002 ******* FATAL ERROR - COMPUTED RESULT IS" " LESS THAN HAL\002,\002F ACCURATE *******\002,/\002 EX" "PECTED RESULT COMPU\002,\002TED RESULT\002)"; static char fmt_9998[] = "(1x,i7,2g18.6)"; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; real r__1; /* Builtin functions */ double sqrt(doublereal); integer s_wsfe(cilist *), e_wsfe(void), do_fio(integer *, char *, ftnlen); /* Local variables */ integer i__, j, ml, nl, iy, jx, kx, ky; real erri; logical tran; integer incxl, incyl; /* Fortran I/O blocks */ static cilist io___425 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___426 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___427 = { 0, 0, 0, fmt_9998, 0 }; /* Checks the results of the computational tests. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Intrinsic Functions .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --x; --y; --yt; --g; --yy; /* Function Body */ tran = *(unsigned char *)trans == 'T' || *(unsigned char *)trans == 'C'; if (tran) { ml = *n; nl = *m; } else { ml = *m; nl = *n; } if (*incx < 0) { kx = nl; incxl = -1; } else { kx = 1; incxl = 1; } if (*incy < 0) { ky = ml; incyl = -1; } else { ky = 1; incyl = 1; } /* Compute expected result in YT using data in A, X and Y. */ /* Compute gauges in G. */ iy = ky; i__1 = ml; for (i__ = 1; i__ <= i__1; ++i__) { yt[iy] = 0.f; g[iy] = 0.f; jx = kx; if (tran) { i__2 = nl; for (j = 1; j <= i__2; ++j) { yt[iy] += a[j + i__ * a_dim1] * x[jx]; g[iy] += (r__1 = a[j + i__ * a_dim1] * x[jx], abs(r__1)); jx += incxl; /* L10: */ } } else { i__2 = nl; for (j = 1; j <= i__2; ++j) { yt[iy] += a[i__ + j * a_dim1] * x[jx]; g[iy] += (r__1 = a[i__ + j * a_dim1] * x[jx], abs(r__1)); jx += incxl; /* L20: */ } } yt[iy] = *alpha * yt[iy] + *beta * y[iy]; g[iy] = abs(*alpha) * g[iy] + (r__1 = *beta * y[iy], abs(r__1)); iy += incyl; /* L30: */ } /* Compute the error ratio for this result. */ *err = 0.f; i__1 = ml; for (i__ = 1; i__ <= i__1; ++i__) { erri = (r__1 = yt[i__] - yy[(i__ - 1) * abs(*incy) + 1], abs(r__1)) / *eps; if (g[i__] != 0.f) { erri /= g[i__]; } *err = max(*err,erri); if (*err * sqrt(*eps) >= 1.f) { goto L50; } /* L40: */ } /* If the loop completes, all results are at least half accurate. */ goto L70; /* Report fatal error. */ L50: *fatal = TRUE_; io___425.ciunit = *nout; s_wsfe(&io___425); e_wsfe(); i__1 = ml; for (i__ = 1; i__ <= i__1; ++i__) { if (*mv) { io___426.ciunit = *nout; s_wsfe(&io___426); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&yt[i__], (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&yy[(i__ - 1) * abs(*incy) + 1], (ftnlen) sizeof(real)); e_wsfe(); } else { io___427.ciunit = *nout; s_wsfe(&io___427); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&yy[(i__ - 1) * abs(*incy) + 1], (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&yt[i__], (ftnlen)sizeof(real)); e_wsfe(); } /* L60: */ } L70: return 0; /* End of SMVCH. */ } /* smvch_ */ logical lse_(real *ri, real *rj, integer *lr) { /* System generated locals */ integer i__1; logical ret_val; /* Local variables */ integer i__; /* Tests if two arrays are identical. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --rj; --ri; /* Function Body */ i__1 = *lr; for (i__ = 1; i__ <= i__1; ++i__) { if (ri[i__] != rj[i__]) { goto L20; } /* L10: */ } ret_val = TRUE_; goto L30; L20: ret_val = FALSE_; L30: return ret_val; /* End of LSE. */ } /* lse_ */ logical lseres_(char *type__, char *uplo, integer *m, integer *n, real *aa, real *as, integer *lda, ftnlen type_len, ftnlen uplo_len) { /* System generated locals */ integer aa_dim1, aa_offset, as_dim1, as_offset, i__1, i__2; logical ret_val; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen); /* Local variables */ integer i__, j, ibeg, iend; logical upper; /* Tests if selected elements in two arrays are equal. */ /* TYPE is 'GE', 'SY' or 'SP'. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ as_dim1 = *lda; as_offset = 1 + as_dim1; as -= as_offset; aa_dim1 = *lda; aa_offset = 1 + aa_dim1; aa -= aa_offset; /* Function Body */ upper = *(unsigned char *)uplo == 'U'; if (s_cmp(type__, "GE", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *lda; for (i__ = *m + 1; i__ <= i__2; ++i__) { if (aa[i__ + j * aa_dim1] != as[i__ + j * as_dim1]) { goto L70; } /* L10: */ } /* L20: */ } } else if (s_cmp(type__, "SY", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (upper) { ibeg = 1; iend = j; } else { ibeg = j; iend = *n; } i__2 = ibeg - 1; for (i__ = 1; i__ <= i__2; ++i__) { if (aa[i__ + j * aa_dim1] != as[i__ + j * as_dim1]) { goto L70; } /* L30: */ } i__2 = *lda; for (i__ = iend + 1; i__ <= i__2; ++i__) { if (aa[i__ + j * aa_dim1] != as[i__ + j * as_dim1]) { goto L70; } /* L40: */ } /* L50: */ } } ret_val = TRUE_; goto L80; L70: ret_val = FALSE_; L80: return ret_val; /* End of LSERES. */ } /* lseres_ */ real sbeg_(logical *reset) { /* System generated locals */ real ret_val; /* Local variables */ static integer i__, ic, mi; /* Generates random numbers uniformly distributed between -0.5 and 0.5. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Scalar Arguments .. */ /* .. Local Scalars .. */ /* .. Save statement .. */ /* .. Intrinsic Functions .. */ /* .. Executable Statements .. */ if (*reset) { /* Initialize local variables. */ mi = 891; i__ = 7; ic = 0; *reset = FALSE_; } /* The sequence of values of I is bounded between 1 and 999. */ /* If initial I = 1,2,3,6,7 or 9, the period will be 50. */ /* If initial I = 4 or 8, the period will be 25. */ /* If initial I = 5, the period will be 10. */ /* IC is used to break up the period by skipping 1 value of I in 6. */ ++ic; L10: i__ *= mi; i__ -= i__ / 1000 * 1000; if (ic >= 5) { ic = 0; goto L10; } ret_val = (real) (i__ - 500) / 1001.f; return ret_val; /* End of SBEG. */ } /* sbeg_ */ real sdiff_(real *x, real *y) { /* System generated locals */ real ret_val; /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* .. Scalar Arguments .. */ /* .. Executable Statements .. */ ret_val = *x - *y; return ret_val; /* End of SDIFF. */ } /* sdiff_ */ /* Subroutine */ int chkxer_(char *srnamt, integer *infot, integer *nout, logical *lerr, logical *ok, ftnlen srnamt_len) { /* Format strings */ static char fmt_9999[] = "(\002 ***** ILLEGAL VALUE OF PARAMETER NUMBER" " \002,i2,\002 NOT D\002,\002ETECTED BY \002,a6,\002 *****\002)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Fortran I/O blocks */ static cilist io___437 = { 0, 0, 0, fmt_9999, 0 }; /* Tests whether XERBLA has detected an error when it should. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Scalar Arguments .. */ /* .. Executable Statements .. */ if (! (*lerr)) { io___437.ciunit = *nout; s_wsfe(&io___437); do_fio(&c__1, (char *)&(*infot), (ftnlen)sizeof(integer)); do_fio(&c__1, srnamt, (ftnlen)6); e_wsfe(); *ok = FALSE_; } *lerr = FALSE_; return 0; /* End of CHKXER. */ } /* chkxer_ */ /* Subroutine */ int xerbla_(char *srname, integer *info, ftnlen srname_len) { /* Format strings */ static char fmt_9999[] = "(\002 ******* XERBLA WAS CALLED WITH INFO =" " \002,i6,\002 INSTEAD\002,\002 OF \002,i2,\002 *******\002)"; static char fmt_9997[] = "(\002 ******* XERBLA WAS CALLED WITH INFO =" " \002,i6,\002 *******\002)"; static char fmt_9998[] = "(\002 ******* XERBLA WAS CALLED WITH SRNAME =" " \002,a6,\002 INSTE\002,\002AD OF \002,a6,\002 *******\002)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), s_cmp(const char *, const char *, ftnlen, ftnlen); /* Fortran I/O blocks */ static cilist io___438 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___439 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___440 = { 0, 0, 0, fmt_9998, 0 }; /* This is a special version of XERBLA to be used only as part of */ /* the test program for testing error exits from the Level 2 BLAS */ /* routines. */ /* XERBLA is an error handler for the Level 2 BLAS routines. */ /* It is called by the Level 2 BLAS routines if an input parameter is */ /* invalid. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ infoc_2.lerr = TRUE_; if (*info != infoc_2.infot) { if (infoc_2.infot != 0) { io___438.ciunit = infoc_2.nout; s_wsfe(&io___438); do_fio(&c__1, (char *)&(*info), (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&infoc_2.infot, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___439.ciunit = infoc_2.nout; s_wsfe(&io___439); do_fio(&c__1, (char *)&(*info), (ftnlen)sizeof(integer)); e_wsfe(); } infoc_2.ok = FALSE_; } if (s_cmp(srname, srnamc_1.srnamt, (ftnlen)6, (ftnlen)6) != 0) { io___440.ciunit = infoc_2.nout; s_wsfe(&io___440); do_fio(&c__1, srname, (ftnlen)6); do_fio(&c__1, srnamc_1.srnamt, (ftnlen)6); e_wsfe(); infoc_2.ok = FALSE_; } return 0; /* End of XERBLA */ } /* xerbla_ */ /* Main program alias */ int sblat2_ () { main (); return 0; } blis-0.6.1/blastest/src/sblat3.c000066400000000000000000004311631360743507500164350ustar00rootroot00000000000000/* sblat3.f -- translated by f2c (version 20100827). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" /* Common Block Declarations */ union { struct { integer infot, noutc; logical ok, lerr; } _1; struct { integer infot, nout; logical ok, lerr; } _2; } infoc_; #define infoc_1 (infoc_._1) #define infoc_2 (infoc_._2) struct { char srnamt[6]; } srnamc_; #define srnamc_1 srnamc_ /* Table of constant values */ static integer c__9 = 9; static integer c__1 = 1; static integer c__3 = 3; static integer c__8 = 8; static integer c__4 = 4; static integer c__65 = 65; static integer c__7 = 7; static real c_b84 = 0.f; static real c_b94 = 1.f; static logical c_true = TRUE_; static logical c_false = FALSE_; static integer c__0 = 0; static integer c_n1 = -1; static integer c__2 = 2; /* > \brief \b SBLAT3 */ /* =========== DOCUMENTATION =========== */ /* Online html documentation available at */ /* http://www.netlib.org/lapack/explore-html/ */ /* Definition: */ /* =========== */ /* PROGRAM SBLAT3 */ /* > \par Purpose: */ /* ============= */ /* > */ /* > \verbatim */ /* > */ /* > Test program for the REAL Level 3 Blas. */ /* > */ /* > The program must be driven by a short data file. The first 14 records */ /* > of the file are read using list-directed input, the last 6 records */ /* > are read using the format ( A6, L2 ). An annotated example of a data */ /* > file can be obtained by deleting the first 3 characters from the */ /* > following 20 lines: */ /* > 'sblat3.out' NAME OF SUMMARY OUTPUT FILE */ /* > 6 UNIT NUMBER OF SUMMARY FILE */ /* > 'SBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE */ /* > -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) */ /* > F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. */ /* > F LOGICAL FLAG, T TO STOP ON FAILURES. */ /* > T LOGICAL FLAG, T TO TEST ERROR EXITS. */ /* > 16.0 THRESHOLD VALUE OF TEST RATIO */ /* > 6 NUMBER OF VALUES OF N */ /* > 0 1 2 3 5 9 VALUES OF N */ /* > 3 NUMBER OF VALUES OF ALPHA */ /* > 0.0 1.0 0.7 VALUES OF ALPHA */ /* > 3 NUMBER OF VALUES OF BETA */ /* > 0.0 1.0 1.3 VALUES OF BETA */ /* > SGEMM T PUT F FOR NO TEST. SAME COLUMNS. */ /* > SSYMM T PUT F FOR NO TEST. SAME COLUMNS. */ /* > STRMM T PUT F FOR NO TEST. SAME COLUMNS. */ /* > STRSM T PUT F FOR NO TEST. SAME COLUMNS. */ /* > SSYRK T PUT F FOR NO TEST. SAME COLUMNS. */ /* > SSYR2K T PUT F FOR NO TEST. SAME COLUMNS. */ /* > */ /* > Further Details */ /* > =============== */ /* > */ /* > See: */ /* > */ /* > Dongarra J. J., Du Croz J. J., Duff I. S. and Hammarling S. */ /* > A Set of Level 3 Basic Linear Algebra Subprograms. */ /* > */ /* > Technical Memorandum No.88 (Revision 1), Mathematics and */ /* > Computer Science Division, Argonne National Laboratory, 9700 */ /* > South Cass Avenue, Argonne, Illinois 60439, US. */ /* > */ /* > -- Written on 8-February-1989. */ /* > Jack Dongarra, Argonne National Laboratory. */ /* > Iain Duff, AERE Harwell. */ /* > Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* > Sven Hammarling, Numerical Algorithms Group Ltd. */ /* > */ /* > 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers */ /* > can be run multiple times without deleting generated */ /* > output files (susan) */ /* > \endverbatim */ /* Authors: */ /* ======== */ /* > \author Univ. of Tennessee */ /* > \author Univ. of California Berkeley */ /* > \author Univ. of Colorado Denver */ /* > \author NAG Ltd. */ /* > \date April 2012 */ /* > \ingroup single_blas_testing */ /* ===================================================================== */ /* Main program */ int main(void) { /* Initialized data */ static char snames[6*6] = "SGEMM " "SSYMM " "STRMM " "STRSM " "SSYRK " "SSYR2K"; /* Format strings */ static char fmt_9997[] = "(\002 NUMBER OF VALUES OF \002,a,\002 IS LESS " "THAN 1 OR GREATER \002,\002THAN \002,i2)"; static char fmt_9996[] = "(\002 VALUE OF N IS LESS THAN 0 OR GREATER THA" "N \002,i2)"; static char fmt_9995[] = "(\002 TESTS OF THE REAL LEVEL 3 BL" "AS\002,//\002 THE F\002,\002OLLOWING PARAMETER VALUES WILL BE US" "ED:\002)"; static char fmt_9994[] = "(\002 FOR N \002,9i6)"; static char fmt_9993[] = "(\002 FOR ALPHA \002,7f6.1)"; static char fmt_9992[] = "(\002 FOR BETA \002,7f6.1)"; static char fmt_9984[] = "(\002 ERROR-EXITS WILL NOT BE TESTED\002)"; static char fmt_9999[] = "(\002 ROUTINES PASS COMPUTATIONAL TESTS IF TES" "T RATIO IS LES\002,\002S THAN\002,f8.2)"; static char fmt_9988[] = "(a6,l2)"; static char fmt_9990[] = "(\002 SUBPROGRAM NAME \002,a6,\002 NOT RECOGNI" "ZED\002,/\002 ******* T\002,\002ESTS ABANDONED *******\002)"; static char fmt_9998[] = "(\002 RELATIVE MACHINE PRECISION IS TAKEN TO" " BE\002,1p,e9.1)"; static char fmt_9989[] = "(\002 ERROR IN SMMCH - IN-LINE DOT PRODUCTS A" "RE BEING EVALU\002,\002ATED WRONGLY.\002,/\002 SMMCH WAS CALLED " "WITH TRANSA = \002,a1,\002 AND TRANSB = \002,a1,/\002 AND RETURN" "ED SAME = \002,l1,\002 AND \002,\002ERR = \002,f12.3,\002.\002," "/\002 THIS MAY BE DUE TO FAULTS IN THE \002,\002ARITHMETIC OR TH" "E COMPILER.\002,/\002 ******* TESTS ABANDONED \002,\002******" "*\002)"; static char fmt_9987[] = "(1x,a6,\002 WAS NOT TESTED\002)"; static char fmt_9986[] = "(/\002 END OF TESTS\002)"; static char fmt_9985[] = "(/\002 ******* FATAL ERROR - TESTS ABANDONED *" "******\002)"; static char fmt_9991[] = "(\002 AMEND DATA FILE OR INCREASE ARRAY SIZES " "IN PROGRAM\002,/\002 ******* TESTS ABANDONED *******\002)"; /* System generated locals */ integer i__1, i__2, i__3; olist o__1; cllist cl__1; /* Builtin functions */ integer s_rsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_rsle(void), f_open(olist *), s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), s_wsle(cilist *), e_wsle(void), s_rsfe(cilist *), e_rsfe(void), s_cmp(const char *, const char *, ftnlen, ftnlen); /* Subroutine */ int s_stop(char *, ftnlen); integer f_clos(cllist *); /* Subroutine */ int s_copy(char *, const char *, ftnlen, ftnlen); /* Local variables */ real c__[4225] /* was [65][65] */, g[65]; integer i__, j, n; real w[130], aa[4225], ab[8450] /* was [65][130] */, bb[4225], cc[ 4225], as[4225], bs[4225], cs[4225], ct[65], alf[7], bet[7]; extern logical lse_(real *, real *, integer *); real eps, err; integer nalf, idim[9]; logical same; integer nbet, ntra; logical rewi; integer nout; extern /* Subroutine */ int schk1_(char *, real *, real *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, real *, integer *, real *, integer *, real *, real *, real *, real *, real *, real *, real *, real *, real *, real *, real *, ftnlen), schk2_(char *, real *, real *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, real *, integer *, real *, integer *, real *, real *, real *, real *, real *, real *, real *, real *, real *, real *, real *, ftnlen), schk3_(char *, real *, real *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, real *, integer *, real *, real *, real *, real *, real *, real *, real *, real *, real *, ftnlen), schk4_(char *, real *, real *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, real *, integer *, real *, integer *, real *, real *, real *, real *, real *, real *, real *, real *, real *, real *, real *, ftnlen), schk5_(char *, real *, real *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, real *, integer *, real *, integer *, real *, real *, real *, real *, real *, real *, real *, real *, real *, real *, real *, ftnlen); logical fatal; extern /* Subroutine */ int schke_(integer *, char *, integer *, ftnlen); logical trace; integer nidim; extern /* Subroutine */ int smmch_(char *, char *, integer *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, real *, real *, real *, integer *, real *, real *, logical *, integer *, logical *, ftnlen, ftnlen); char snaps[32]; integer isnum; logical ltest[6], sfatal; char snamet[6], transa[1], transb[1]; real thresh; logical ltestt, tsterr; char summry[32]; extern real s_epsilon_(real *); /* Fortran I/O blocks */ static cilist io___2 = { 0, 5, 0, 0, 0 }; static cilist io___4 = { 0, 5, 0, 0, 0 }; static cilist io___6 = { 0, 5, 0, 0, 0 }; static cilist io___8 = { 0, 5, 0, 0, 0 }; static cilist io___11 = { 0, 5, 0, 0, 0 }; static cilist io___13 = { 0, 5, 0, 0, 0 }; static cilist io___15 = { 0, 5, 0, 0, 0 }; static cilist io___17 = { 0, 5, 0, 0, 0 }; static cilist io___19 = { 0, 5, 0, 0, 0 }; static cilist io___21 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___22 = { 0, 5, 0, 0, 0 }; static cilist io___25 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___26 = { 0, 5, 0, 0, 0 }; static cilist io___28 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___29 = { 0, 5, 0, 0, 0 }; static cilist io___31 = { 0, 5, 0, 0, 0 }; static cilist io___33 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___34 = { 0, 5, 0, 0, 0 }; static cilist io___36 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___37 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___38 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___39 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___40 = { 0, 0, 0, 0, 0 }; static cilist io___41 = { 0, 0, 0, fmt_9984, 0 }; static cilist io___42 = { 0, 0, 0, 0, 0 }; static cilist io___43 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___44 = { 0, 0, 0, 0, 0 }; static cilist io___46 = { 0, 5, 1, fmt_9988, 0 }; static cilist io___49 = { 0, 0, 0, fmt_9990, 0 }; static cilist io___51 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___64 = { 0, 0, 0, fmt_9989, 0 }; static cilist io___65 = { 0, 0, 0, fmt_9989, 0 }; static cilist io___66 = { 0, 0, 0, fmt_9989, 0 }; static cilist io___67 = { 0, 0, 0, fmt_9989, 0 }; static cilist io___69 = { 0, 0, 0, 0, 0 }; static cilist io___70 = { 0, 0, 0, fmt_9987, 0 }; static cilist io___71 = { 0, 0, 0, 0, 0 }; static cilist io___78 = { 0, 0, 0, fmt_9986, 0 }; static cilist io___79 = { 0, 0, 0, fmt_9985, 0 }; static cilist io___80 = { 0, 0, 0, fmt_9991, 0 }; /* -- Reference BLAS test routine (version 3.4.1) -- */ /* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* April 2012 */ /* ===================================================================== */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* .. Executable Statements .. */ /* Read name and unit number for summary output file and open file. */ s_rsle(&io___2); do_lio(&c__9, &c__1, summry, (ftnlen)32); e_rsle(); s_rsle(&io___4); do_lio(&c__3, &c__1, (char *)&nout, (ftnlen)sizeof(integer)); e_rsle(); o__1.oerr = 0; o__1.ounit = nout; o__1.ofnmlen = 32; o__1.ofnm = summry; o__1.orl = 0; o__1.osta = 0; o__1.oacc = 0; o__1.ofm = 0; o__1.oblnk = 0; f_open(&o__1); infoc_1.noutc = nout; /* Read name and unit number for snapshot output file and open file. */ s_rsle(&io___6); do_lio(&c__9, &c__1, snaps, (ftnlen)32); e_rsle(); s_rsle(&io___8); do_lio(&c__3, &c__1, (char *)&ntra, (ftnlen)sizeof(integer)); e_rsle(); trace = ntra >= 0; if (trace) { o__1.oerr = 0; o__1.ounit = ntra; o__1.ofnmlen = 32; o__1.ofnm = snaps; o__1.orl = 0; o__1.osta = 0; o__1.oacc = 0; o__1.ofm = 0; o__1.oblnk = 0; f_open(&o__1); } /* Read the flag that directs rewinding of the snapshot file. */ s_rsle(&io___11); do_lio(&c__8, &c__1, (char *)&rewi, (ftnlen)sizeof(logical)); e_rsle(); rewi = rewi && trace; /* Read the flag that directs stopping on any failure. */ s_rsle(&io___13); do_lio(&c__8, &c__1, (char *)&sfatal, (ftnlen)sizeof(logical)); e_rsle(); /* Read the flag that indicates whether error exits are to be tested. */ s_rsle(&io___15); do_lio(&c__8, &c__1, (char *)&tsterr, (ftnlen)sizeof(logical)); e_rsle(); /* Read the threshold value of the test ratio */ s_rsle(&io___17); do_lio(&c__4, &c__1, (char *)&thresh, (ftnlen)sizeof(real)); e_rsle(); /* Read and check the parameter values for the tests. */ /* Values of N */ s_rsle(&io___19); do_lio(&c__3, &c__1, (char *)&nidim, (ftnlen)sizeof(integer)); e_rsle(); if (nidim < 1 || nidim > 9) { io___21.ciunit = nout; s_wsfe(&io___21); do_fio(&c__1, "N", (ftnlen)1); do_fio(&c__1, (char *)&c__9, (ftnlen)sizeof(integer)); e_wsfe(); goto L220; } s_rsle(&io___22); i__1 = nidim; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&idim[i__ - 1], (ftnlen)sizeof(integer)); } e_rsle(); i__1 = nidim; for (i__ = 1; i__ <= i__1; ++i__) { if (idim[i__ - 1] < 0 || idim[i__ - 1] > 65) { io___25.ciunit = nout; s_wsfe(&io___25); do_fio(&c__1, (char *)&c__65, (ftnlen)sizeof(integer)); e_wsfe(); goto L220; } /* L10: */ } /* Values of ALPHA */ s_rsle(&io___26); do_lio(&c__3, &c__1, (char *)&nalf, (ftnlen)sizeof(integer)); e_rsle(); if (nalf < 1 || nalf > 7) { io___28.ciunit = nout; s_wsfe(&io___28); do_fio(&c__1, "ALPHA", (ftnlen)5); do_fio(&c__1, (char *)&c__7, (ftnlen)sizeof(integer)); e_wsfe(); goto L220; } s_rsle(&io___29); i__1 = nalf; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__4, &c__1, (char *)&alf[i__ - 1], (ftnlen)sizeof(real)); } e_rsle(); /* Values of BETA */ s_rsle(&io___31); do_lio(&c__3, &c__1, (char *)&nbet, (ftnlen)sizeof(integer)); e_rsle(); if (nbet < 1 || nbet > 7) { io___33.ciunit = nout; s_wsfe(&io___33); do_fio(&c__1, "BETA", (ftnlen)4); do_fio(&c__1, (char *)&c__7, (ftnlen)sizeof(integer)); e_wsfe(); goto L220; } s_rsle(&io___34); i__1 = nbet; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__4, &c__1, (char *)&bet[i__ - 1], (ftnlen)sizeof(real)); } e_rsle(); /* Report values of parameters. */ io___36.ciunit = nout; s_wsfe(&io___36); e_wsfe(); io___37.ciunit = nout; s_wsfe(&io___37); i__1 = nidim; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&idim[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); io___38.ciunit = nout; s_wsfe(&io___38); i__1 = nalf; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&alf[i__ - 1], (ftnlen)sizeof(real)); } e_wsfe(); io___39.ciunit = nout; s_wsfe(&io___39); i__1 = nbet; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&bet[i__ - 1], (ftnlen)sizeof(real)); } e_wsfe(); if (! tsterr) { io___40.ciunit = nout; s_wsle(&io___40); e_wsle(); io___41.ciunit = nout; s_wsfe(&io___41); e_wsfe(); } io___42.ciunit = nout; s_wsle(&io___42); e_wsle(); io___43.ciunit = nout; s_wsfe(&io___43); do_fio(&c__1, (char *)&thresh, (ftnlen)sizeof(real)); e_wsfe(); io___44.ciunit = nout; s_wsle(&io___44); e_wsle(); /* Read names of subroutines and flags which indicate */ /* whether they are to be tested. */ for (i__ = 1; i__ <= 6; ++i__) { ltest[i__ - 1] = FALSE_; /* L20: */ } L30: i__1 = s_rsfe(&io___46); if (i__1 != 0) { goto L60; } i__1 = do_fio(&c__1, snamet, (ftnlen)6); if (i__1 != 0) { goto L60; } i__1 = do_fio(&c__1, (char *)<estt, (ftnlen)sizeof(logical)); if (i__1 != 0) { goto L60; } i__1 = e_rsfe(); if (i__1 != 0) { goto L60; } for (i__ = 1; i__ <= 6; ++i__) { if (s_cmp(snamet, snames + (i__ - 1) * 6, (ftnlen)6, (ftnlen)6) == 0) { goto L50; } /* L40: */ } io___49.ciunit = nout; s_wsfe(&io___49); do_fio(&c__1, snamet, (ftnlen)6); e_wsfe(); s_stop("", (ftnlen)0); L50: ltest[i__ - 1] = ltestt; goto L30; L60: cl__1.cerr = 0; cl__1.cunit = 5; cl__1.csta = 0; f_clos(&cl__1); /* Compute EPS (the machine precision). */ eps = s_epsilon_(&c_b84); io___51.ciunit = nout; s_wsfe(&io___51); do_fio(&c__1, (char *)&eps, (ftnlen)sizeof(real)); e_wsfe(); /* Check the reliability of SMMCH using exact data. */ n = 32; i__1 = n; for (j = 1; j <= i__1; ++j) { i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ i__3 = i__ - j + 1; ab[i__ + j * 65 - 66] = (real) max(i__3,0); /* L90: */ } ab[j + 4224] = (real) j; ab[(j + 65) * 65 - 65] = (real) j; c__[j - 1] = 0.f; /* L100: */ } i__1 = n; for (j = 1; j <= i__1; ++j) { cc[j - 1] = (real) (j * ((j + 1) * j) / 2 - (j + 1) * j * (j - 1) / 3) ; /* L110: */ } /* CC holds the exact result. On exit from SMMCH CT holds */ /* the result computed by SMMCH. */ *(unsigned char *)transa = 'N'; *(unsigned char *)transb = 'N'; smmch_(transa, transb, &n, &c__1, &n, &c_b94, ab, &c__65, &ab[4225], & c__65, &c_b84, c__, &c__65, ct, g, cc, &c__65, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1, (ftnlen)1); same = lse_(cc, ct, &n); if (! same || err != 0.f) { io___64.ciunit = nout; s_wsfe(&io___64); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, transb, (ftnlen)1); do_fio(&c__1, (char *)&same, (ftnlen)sizeof(logical)); do_fio(&c__1, (char *)&err, (ftnlen)sizeof(real)); e_wsfe(); s_stop("", (ftnlen)0); } *(unsigned char *)transb = 'T'; smmch_(transa, transb, &n, &c__1, &n, &c_b94, ab, &c__65, &ab[4225], & c__65, &c_b84, c__, &c__65, ct, g, cc, &c__65, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1, (ftnlen)1); same = lse_(cc, ct, &n); if (! same || err != 0.f) { io___65.ciunit = nout; s_wsfe(&io___65); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, transb, (ftnlen)1); do_fio(&c__1, (char *)&same, (ftnlen)sizeof(logical)); do_fio(&c__1, (char *)&err, (ftnlen)sizeof(real)); e_wsfe(); s_stop("", (ftnlen)0); } i__1 = n; for (j = 1; j <= i__1; ++j) { ab[j + 4224] = (real) (n - j + 1); ab[(j + 65) * 65 - 65] = (real) (n - j + 1); /* L120: */ } i__1 = n; for (j = 1; j <= i__1; ++j) { cc[n - j] = (real) (j * ((j + 1) * j) / 2 - (j + 1) * j * (j - 1) / 3) ; /* L130: */ } *(unsigned char *)transa = 'T'; *(unsigned char *)transb = 'N'; smmch_(transa, transb, &n, &c__1, &n, &c_b94, ab, &c__65, &ab[4225], & c__65, &c_b84, c__, &c__65, ct, g, cc, &c__65, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1, (ftnlen)1); same = lse_(cc, ct, &n); if (! same || err != 0.f) { io___66.ciunit = nout; s_wsfe(&io___66); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, transb, (ftnlen)1); do_fio(&c__1, (char *)&same, (ftnlen)sizeof(logical)); do_fio(&c__1, (char *)&err, (ftnlen)sizeof(real)); e_wsfe(); s_stop("", (ftnlen)0); } *(unsigned char *)transb = 'T'; smmch_(transa, transb, &n, &c__1, &n, &c_b94, ab, &c__65, &ab[4225], & c__65, &c_b84, c__, &c__65, ct, g, cc, &c__65, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1, (ftnlen)1); same = lse_(cc, ct, &n); if (! same || err != 0.f) { io___67.ciunit = nout; s_wsfe(&io___67); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, transb, (ftnlen)1); do_fio(&c__1, (char *)&same, (ftnlen)sizeof(logical)); do_fio(&c__1, (char *)&err, (ftnlen)sizeof(real)); e_wsfe(); s_stop("", (ftnlen)0); } /* Test each subroutine in turn. */ for (isnum = 1; isnum <= 6; ++isnum) { io___69.ciunit = nout; s_wsle(&io___69); e_wsle(); if (! ltest[isnum - 1]) { /* Subprogram is not to be tested. */ io___70.ciunit = nout; s_wsfe(&io___70); do_fio(&c__1, snames + (isnum - 1) * 6, (ftnlen)6); e_wsfe(); } else { s_copy(srnamc_1.srnamt, snames + (isnum - 1) * 6, (ftnlen)6, ( ftnlen)6); /* Test error exits. */ if (tsterr) { schke_(&isnum, snames + (isnum - 1) * 6, &nout, (ftnlen)6); io___71.ciunit = nout; s_wsle(&io___71); e_wsle(); } /* Test computations. */ infoc_1.infot = 0; infoc_1.ok = TRUE_; fatal = FALSE_; switch (isnum) { case 1: goto L140; case 2: goto L150; case 3: goto L160; case 4: goto L160; case 5: goto L170; case 6: goto L180; } /* Test SGEMM, 01. */ L140: schk1_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, bet, &c__65, ab, aa, as, &ab[4225], bb, bs, c__, cc, cs, ct, g, (ftnlen)6); goto L190; /* Test SSYMM, 02. */ L150: schk2_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, bet, &c__65, ab, aa, as, &ab[4225], bb, bs, c__, cc, cs, ct, g, (ftnlen)6); goto L190; /* Test STRMM, 03, STRSM, 04. */ L160: schk3_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &c__65, ab, aa, as, &ab[4225], bb, bs, ct, g, c__, (ftnlen)6); goto L190; /* Test SSYRK, 05. */ L170: schk4_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, bet, &c__65, ab, aa, as, &ab[4225], bb, bs, c__, cc, cs, ct, g, (ftnlen)6); goto L190; /* Test SSYR2K, 06. */ L180: schk5_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, bet, &c__65, ab, aa, as, bb, bs, c__, cc, cs, ct, g, w, ( ftnlen)6); goto L190; L190: if (fatal && sfatal) { goto L210; } } /* L200: */ } io___78.ciunit = nout; s_wsfe(&io___78); e_wsfe(); goto L230; L210: io___79.ciunit = nout; s_wsfe(&io___79); e_wsfe(); goto L230; L220: io___80.ciunit = nout; s_wsfe(&io___80); e_wsfe(); L230: if (trace) { cl__1.cerr = 0; cl__1.cunit = ntra; cl__1.csta = 0; f_clos(&cl__1); } cl__1.cerr = 0; cl__1.cunit = nout; cl__1.csta = 0; f_clos(&cl__1); s_stop("", (ftnlen)0); /* End of SBLAT3. */ return 0; } /* main */ /* Subroutine */ int schk1_(char *sname, real *eps, real *thresh, integer * nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nalf, real *alf, integer * nbet, real *bet, integer *nmax, real *a, real *aa, real *as, real *b, real *bb, real *bs, real *c__, real *cc, real *cs, real *ct, real *g, ftnlen sname_len) { /* Initialized data */ static char ich[3] = "NTC"; /* Format strings */ static char fmt_9995[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002','\002" ",a1,\002',\002,3(i3,\002,\002),f4.1,\002, A,\002,i3,\002, B,\002" ",i3,\002,\002,f4.1,\002, \002,\002C,\002,i3,\002).\002)"; static char fmt_9994[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, k, m, n, ia, ib, ma, mb, na, nb, nc, ik, im, in, ks, ms, ns, ica, icb, laa, lbb, lda, lcc, ldb, ldc; real als, bls; extern logical lse_(real *, real *, integer *); real err, beta; integer ldas, ldbs, ldcs; logical same, null; real alpha; logical isame[13]; extern /* Subroutine */ int smake_(char *, char *, char *, integer *, integer *, real *, integer *, real *, integer *, logical *, real * , ftnlen, ftnlen, ftnlen); logical trana, tranb; extern /* Subroutine */ int smmch_(char *, char *, integer *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, real *, real *, real *, integer *, real *, real *, logical *, integer *, logical *, ftnlen, ftnlen), sgemm_( char *, char *, integer *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, ftnlen, ftnlen); integer nargs; logical reset; char tranas[1], tranbs[1], transa[1], transb[1]; real errmax; extern logical lseres_(char *, char *, integer *, integer *, real *, real *, integer *, ftnlen, ftnlen); /* Fortran I/O blocks */ static cilist io___124 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___125 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___128 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___130 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___131 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___132 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___133 = { 0, 0, 0, fmt_9995, 0 }; /* Tests SGEMM. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; --bet; --g; --ct; --cs; --cc; c_dim1 = *nmax; c_offset = 1 + c_dim1; c__ -= c_offset; --bs; --bb; b_dim1 = *nmax; b_offset = 1 + b_dim1; b -= b_offset; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ /* .. Executable Statements .. */ nargs = 13; nc = 0; reset = TRUE_; errmax = 0.f; i__1 = *nidim; for (im = 1; im <= i__1; ++im) { m = idim[im]; i__2 = *nidim; for (in = 1; in <= i__2; ++in) { n = idim[in]; /* Set LDC to 1 more than minimum value if room. */ ldc = m; if (ldc < *nmax) { ++ldc; } /* Skip tests if not enough room. */ if (ldc > *nmax) { goto L100; } lcc = ldc * n; null = n <= 0 || m <= 0; i__3 = *nidim; for (ik = 1; ik <= i__3; ++ik) { k = idim[ik]; for (ica = 1; ica <= 3; ++ica) { *(unsigned char *)transa = *(unsigned char *)&ich[ica - 1] ; trana = *(unsigned char *)transa == 'T' || *(unsigned char *)transa == 'C'; if (trana) { ma = k; na = m; } else { ma = m; na = k; } /* Set LDA to 1 more than minimum value if room. */ lda = ma; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L80; } laa = lda * na; /* Generate the matrix A. */ smake_("GE", " ", " ", &ma, &na, &a[a_offset], nmax, &aa[ 1], &lda, &reset, &c_b84, (ftnlen)2, (ftnlen)1, ( ftnlen)1); for (icb = 1; icb <= 3; ++icb) { *(unsigned char *)transb = *(unsigned char *)&ich[icb - 1]; tranb = *(unsigned char *)transb == 'T' || *(unsigned char *)transb == 'C'; if (tranb) { mb = n; nb = k; } else { mb = k; nb = n; } /* Set LDB to 1 more than minimum value if room. */ ldb = mb; if (ldb < *nmax) { ++ldb; } /* Skip tests if not enough room. */ if (ldb > *nmax) { goto L70; } lbb = ldb * nb; /* Generate the matrix B. */ smake_("GE", " ", " ", &mb, &nb, &b[b_offset], nmax, & bb[1], &ldb, &reset, &c_b84, (ftnlen)2, ( ftnlen)1, (ftnlen)1); i__4 = *nalf; for (ia = 1; ia <= i__4; ++ia) { alpha = alf[ia]; i__5 = *nbet; for (ib = 1; ib <= i__5; ++ib) { beta = bet[ib]; /* Generate the matrix C. */ smake_("GE", " ", " ", &m, &n, &c__[c_offset], nmax, &cc[1], &ldc, &reset, &c_b84, ( ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the */ /* subroutine. */ *(unsigned char *)tranas = *(unsigned char *) transa; *(unsigned char *)tranbs = *(unsigned char *) transb; ms = m; ns = n; ks = k; als = alpha; i__6 = laa; for (i__ = 1; i__ <= i__6; ++i__) { as[i__] = aa[i__]; /* L10: */ } ldas = lda; i__6 = lbb; for (i__ = 1; i__ <= i__6; ++i__) { bs[i__] = bb[i__]; /* L20: */ } ldbs = ldb; bls = beta; i__6 = lcc; for (i__ = 1; i__ <= i__6; ++i__) { cs[i__] = cc[i__]; /* L30: */ } ldcs = ldc; /* Call the subroutine. */ if (*trace) { io___124.ciunit = *ntra; s_wsfe(&io___124); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, transb, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&alpha, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&beta, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&ldc, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } sgemm_(transa, transb, &m, &n, &k, &alpha, & aa[1], &lda, &bb[1], &ldb, &beta, &cc[ 1], &ldc, (ftnlen)1, (ftnlen)1); /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___125.ciunit = *nout; s_wsfe(&io___125); e_wsfe(); *fatal = TRUE_; goto L120; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)transa == *( unsigned char *)tranas; isame[1] = *(unsigned char *)transb == *( unsigned char *)tranbs; isame[2] = ms == m; isame[3] = ns == n; isame[4] = ks == k; isame[5] = als == alpha; isame[6] = lse_(&as[1], &aa[1], &laa); isame[7] = ldas == lda; isame[8] = lse_(&bs[1], &bb[1], &lbb); isame[9] = ldbs == ldb; isame[10] = bls == beta; if (null) { isame[11] = lse_(&cs[1], &cc[1], &lcc); } else { isame[11] = lseres_("GE", " ", &m, &n, & cs[1], &cc[1], &ldc, (ftnlen)2, ( ftnlen)1); } isame[12] = ldcs == ldc; /* If data was incorrectly changed, report */ /* and return. */ same = TRUE_; i__6 = nargs; for (i__ = 1; i__ <= i__6; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___128.ciunit = *nout; s_wsfe(&io___128); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L120; } if (! null) { /* Check the result. */ smmch_(transa, transb, &m, &n, &k, &alpha, &a[a_offset], nmax, &b[b_offset], nmax, &beta, &c__[c_offset], nmax, &ct[1], &g[1], &cc[1], &ldc, eps, &err, fatal, nout, &c_true, (ftnlen)1, (ftnlen)1); errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L120; } } /* L50: */ } /* L60: */ } L70: ; } L80: ; } /* L90: */ } L100: ; } /* L110: */ } /* Report result. */ if (errmax < *thresh) { io___130.ciunit = *nout; s_wsfe(&io___130); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___131.ciunit = *nout; s_wsfe(&io___131); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(real)); e_wsfe(); } goto L130; L120: io___132.ciunit = *nout; s_wsfe(&io___132); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); io___133.ciunit = *nout; s_wsfe(&io___133); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, transb, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&beta, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof(integer)); e_wsfe(); L130: return 0; /* End of SCHK1. */ } /* schk1_ */ /* Subroutine */ int schk2_(char *sname, real *eps, real *thresh, integer * nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nalf, real *alf, integer * nbet, real *bet, integer *nmax, real *a, real *aa, real *as, real *b, real *bb, real *bs, real *c__, real *cc, real *cs, real *ct, real *g, ftnlen sname_len) { /* Initialized data */ static char ichs[2] = "LR"; static char ichu[2] = "UL"; /* Format strings */ static char fmt_9995[] = "(1x,i6,\002: \002,a6,\002(\002,2(\002'\002,a1" ",\002',\002),2(i3,\002,\002),f4.1,\002, A,\002,i3,\002, B,\002,i" "3,\002,\002,f4.1,\002, C,\002,i3,\002) \002,\002 .\002)"; static char fmt_9994[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, m, n, ia, ib, na, nc, im, in, ms, ns, laa, lbb, lda, lcc, ldb, ldc, ics; real als, bls; integer icu; extern logical lse_(real *, real *, integer *); real err, beta; integer ldas, ldbs, ldcs; logical same; char side[1]; logical left, null; char uplo[1]; real alpha; logical isame[13]; extern /* Subroutine */ int smake_(char *, char *, char *, integer *, integer *, real *, integer *, real *, integer *, logical *, real * , ftnlen, ftnlen, ftnlen); char sides[1]; extern /* Subroutine */ int smmch_(char *, char *, integer *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, real *, real *, real *, integer *, real *, real *, logical *, integer *, logical *, ftnlen, ftnlen); integer nargs; logical reset; char uplos[1]; extern /* Subroutine */ int ssymm_(char *, char *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, ftnlen, ftnlen); real errmax; extern logical lseres_(char *, char *, integer *, integer *, real *, real *, integer *, ftnlen, ftnlen); /* Fortran I/O blocks */ static cilist io___171 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___172 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___175 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___177 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___178 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___179 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___180 = { 0, 0, 0, fmt_9995, 0 }; /* Tests SSYMM. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; --bet; --g; --ct; --cs; --cc; c_dim1 = *nmax; c_offset = 1 + c_dim1; c__ -= c_offset; --bs; --bb; b_dim1 = *nmax; b_offset = 1 + b_dim1; b -= b_offset; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ /* .. Executable Statements .. */ nargs = 12; nc = 0; reset = TRUE_; errmax = 0.f; i__1 = *nidim; for (im = 1; im <= i__1; ++im) { m = idim[im]; i__2 = *nidim; for (in = 1; in <= i__2; ++in) { n = idim[in]; /* Set LDC to 1 more than minimum value if room. */ ldc = m; if (ldc < *nmax) { ++ldc; } /* Skip tests if not enough room. */ if (ldc > *nmax) { goto L90; } lcc = ldc * n; null = n <= 0 || m <= 0; /* Set LDB to 1 more than minimum value if room. */ ldb = m; if (ldb < *nmax) { ++ldb; } /* Skip tests if not enough room. */ if (ldb > *nmax) { goto L90; } lbb = ldb * n; /* Generate the matrix B. */ smake_("GE", " ", " ", &m, &n, &b[b_offset], nmax, &bb[1], &ldb, & reset, &c_b84, (ftnlen)2, (ftnlen)1, (ftnlen)1); for (ics = 1; ics <= 2; ++ics) { *(unsigned char *)side = *(unsigned char *)&ichs[ics - 1]; left = *(unsigned char *)side == 'L'; if (left) { na = m; } else { na = n; } /* Set LDA to 1 more than minimum value if room. */ lda = na; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L80; } laa = lda * na; for (icu = 1; icu <= 2; ++icu) { *(unsigned char *)uplo = *(unsigned char *)&ichu[icu - 1]; /* Generate the symmetric matrix A. */ smake_("SY", uplo, " ", &na, &na, &a[a_offset], nmax, &aa[ 1], &lda, &reset, &c_b84, (ftnlen)2, (ftnlen)1, ( ftnlen)1); i__3 = *nalf; for (ia = 1; ia <= i__3; ++ia) { alpha = alf[ia]; i__4 = *nbet; for (ib = 1; ib <= i__4; ++ib) { beta = bet[ib]; /* Generate the matrix C. */ smake_("GE", " ", " ", &m, &n, &c__[c_offset], nmax, &cc[1], &ldc, &reset, &c_b84, ( ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the */ /* subroutine. */ *(unsigned char *)sides = *(unsigned char *)side; *(unsigned char *)uplos = *(unsigned char *)uplo; ms = m; ns = n; als = alpha; i__5 = laa; for (i__ = 1; i__ <= i__5; ++i__) { as[i__] = aa[i__]; /* L10: */ } ldas = lda; i__5 = lbb; for (i__ = 1; i__ <= i__5; ++i__) { bs[i__] = bb[i__]; /* L20: */ } ldbs = ldb; bls = beta; i__5 = lcc; for (i__ = 1; i__ <= i__5; ++i__) { cs[i__] = cc[i__]; /* L30: */ } ldcs = ldc; /* Call the subroutine. */ if (*trace) { io___171.ciunit = *ntra; s_wsfe(&io___171); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, side, (ftnlen)1); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof( real)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&ldb, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&beta, (ftnlen)sizeof( real)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof( integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } ssymm_(side, uplo, &m, &n, &alpha, &aa[1], &lda, & bb[1], &ldb, &beta, &cc[1], &ldc, (ftnlen) 1, (ftnlen)1); /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___172.ciunit = *nout; s_wsfe(&io___172); e_wsfe(); *fatal = TRUE_; goto L110; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)sides == *(unsigned char *)side; isame[1] = *(unsigned char *)uplos == *(unsigned char *)uplo; isame[2] = ms == m; isame[3] = ns == n; isame[4] = als == alpha; isame[5] = lse_(&as[1], &aa[1], &laa); isame[6] = ldas == lda; isame[7] = lse_(&bs[1], &bb[1], &lbb); isame[8] = ldbs == ldb; isame[9] = bls == beta; if (null) { isame[10] = lse_(&cs[1], &cc[1], &lcc); } else { isame[10] = lseres_("GE", " ", &m, &n, &cs[1], &cc[1], &ldc, (ftnlen)2, (ftnlen)1); } isame[11] = ldcs == ldc; /* If data was incorrectly changed, report and */ /* return. */ same = TRUE_; i__5 = nargs; for (i__ = 1; i__ <= i__5; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___175.ciunit = *nout; s_wsfe(&io___175); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L110; } if (! null) { /* Check the result. */ if (left) { smmch_("N", "N", &m, &n, &m, &alpha, &a[ a_offset], nmax, &b[b_offset], nmax, &beta, &c__[c_offset], nmax, &ct[1], &g[1], &cc[1], &ldc, eps, &err, fatal, nout, &c_true, ( ftnlen)1, (ftnlen)1); } else { smmch_("N", "N", &m, &n, &n, &alpha, &b[ b_offset], nmax, &a[a_offset], nmax, &beta, &c__[c_offset], nmax, &ct[1], &g[1], &cc[1], &ldc, eps, &err, fatal, nout, &c_true, ( ftnlen)1, (ftnlen)1); } errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L110; } } /* L50: */ } /* L60: */ } /* L70: */ } L80: ; } L90: ; } /* L100: */ } /* Report result. */ if (errmax < *thresh) { io___177.ciunit = *nout; s_wsfe(&io___177); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___178.ciunit = *nout; s_wsfe(&io___178); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(real)); e_wsfe(); } goto L120; L110: io___179.ciunit = *nout; s_wsfe(&io___179); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); io___180.ciunit = *nout; s_wsfe(&io___180); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, side, (ftnlen)1); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&beta, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof(integer)); e_wsfe(); L120: return 0; /* End of SCHK2. */ } /* schk2_ */ /* Subroutine */ int schk3_(char *sname, real *eps, real *thresh, integer * nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nalf, real *alf, integer * nmax, real *a, real *aa, real *as, real *b, real *bb, real *bs, real * ct, real *g, real *c__, ftnlen sname_len) { /* Initialized data */ static char ichu[2] = "UL"; static char icht[3] = "NTC"; static char ichd[2] = "UN"; static char ichs[2] = "LR"; /* Format strings */ static char fmt_9995[] = "(1x,i6,\002: \002,a6,\002(\002,4(\002'\002,a1" ",\002',\002),2(i3,\002,\002),f4.1,\002, A,\002,i3,\002, B,\002,i" "3,\002) .\002)"; static char fmt_9994[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5; alist al__1; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen), s_wsfe(cilist *), do_fio( integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, j, m, n, ia, na, nc, im, in, ms, ns, laa, icd, lbb, lda, ldb, ics; real als; integer ict, icu; extern logical lse_(real *, real *, integer *); real err; char diag[1]; integer ldas, ldbs; logical same; char side[1]; logical left, null; char uplo[1]; real alpha; char diags[1]; logical isame[13]; extern /* Subroutine */ int smake_(char *, char *, char *, integer *, integer *, real *, integer *, real *, integer *, logical *, real * , ftnlen, ftnlen, ftnlen); char sides[1]; extern /* Subroutine */ int smmch_(char *, char *, integer *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, real *, real *, real *, integer *, real *, real *, logical *, integer *, logical *, ftnlen, ftnlen); integer nargs; logical reset; char uplos[1]; extern /* Subroutine */ int strmm_(char *, char *, char *, char *, integer *, integer *, real *, real *, integer *, real *, integer * , ftnlen, ftnlen, ftnlen, ftnlen), strsm_(char *, char *, char *, char *, integer *, integer *, real *, real *, integer *, real *, integer *, ftnlen, ftnlen, ftnlen, ftnlen); char tranas[1], transa[1]; real errmax; extern logical lseres_(char *, char *, integer *, integer *, real *, real *, integer *, ftnlen, ftnlen); /* Fortran I/O blocks */ static cilist io___221 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___222 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___223 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___226 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___228 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___229 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___230 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___231 = { 0, 0, 0, fmt_9995, 0 }; /* Tests STRMM and STRSM. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; c_dim1 = *nmax; c_offset = 1 + c_dim1; c__ -= c_offset; --g; --ct; --bs; --bb; b_dim1 = *nmax; b_offset = 1 + b_dim1; b -= b_offset; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ /* .. Executable Statements .. */ nargs = 11; nc = 0; reset = TRUE_; errmax = 0.f; /* Set up zero matrix for SMMCH. */ i__1 = *nmax; for (j = 1; j <= i__1; ++j) { i__2 = *nmax; for (i__ = 1; i__ <= i__2; ++i__) { c__[i__ + j * c_dim1] = 0.f; /* L10: */ } /* L20: */ } i__1 = *nidim; for (im = 1; im <= i__1; ++im) { m = idim[im]; i__2 = *nidim; for (in = 1; in <= i__2; ++in) { n = idim[in]; /* Set LDB to 1 more than minimum value if room. */ ldb = m; if (ldb < *nmax) { ++ldb; } /* Skip tests if not enough room. */ if (ldb > *nmax) { goto L130; } lbb = ldb * n; null = m <= 0 || n <= 0; for (ics = 1; ics <= 2; ++ics) { *(unsigned char *)side = *(unsigned char *)&ichs[ics - 1]; left = *(unsigned char *)side == 'L'; if (left) { na = m; } else { na = n; } /* Set LDA to 1 more than minimum value if room. */ lda = na; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L130; } laa = lda * na; for (icu = 1; icu <= 2; ++icu) { *(unsigned char *)uplo = *(unsigned char *)&ichu[icu - 1]; for (ict = 1; ict <= 3; ++ict) { *(unsigned char *)transa = *(unsigned char *)&icht[ ict - 1]; for (icd = 1; icd <= 2; ++icd) { *(unsigned char *)diag = *(unsigned char *)&ichd[ icd - 1]; i__3 = *nalf; for (ia = 1; ia <= i__3; ++ia) { alpha = alf[ia]; /* Generate the matrix A. */ smake_("TR", uplo, diag, &na, &na, &a[ a_offset], nmax, &aa[1], &lda, &reset, &c_b84, (ftnlen)2, (ftnlen)1, ( ftnlen)1); /* Generate the matrix B. */ smake_("GE", " ", " ", &m, &n, &b[b_offset], nmax, &bb[1], &ldb, &reset, &c_b84, ( ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the */ /* subroutine. */ *(unsigned char *)sides = *(unsigned char *) side; *(unsigned char *)uplos = *(unsigned char *) uplo; *(unsigned char *)tranas = *(unsigned char *) transa; *(unsigned char *)diags = *(unsigned char *) diag; ms = m; ns = n; als = alpha; i__4 = laa; for (i__ = 1; i__ <= i__4; ++i__) { as[i__] = aa[i__]; /* L30: */ } ldas = lda; i__4 = lbb; for (i__ = 1; i__ <= i__4; ++i__) { bs[i__] = bb[i__]; /* L40: */ } ldbs = ldb; /* Call the subroutine. */ if (s_cmp(sname + 3, "MM", (ftnlen)2, (ftnlen) 2) == 0) { if (*trace) { io___221.ciunit = *ntra; s_wsfe(&io___221); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, side, (ftnlen)1); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } strmm_(side, uplo, transa, diag, &m, &n, & alpha, &aa[1], &lda, &bb[1], &ldb, (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); } else if (s_cmp(sname + 3, "SM", (ftnlen)2, ( ftnlen)2) == 0) { if (*trace) { io___222.ciunit = *ntra; s_wsfe(&io___222); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, side, (ftnlen)1); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen) sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } strsm_(side, uplo, transa, diag, &m, &n, & alpha, &aa[1], &lda, &bb[1], &ldb, (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___223.ciunit = *nout; s_wsfe(&io___223); e_wsfe(); *fatal = TRUE_; goto L150; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)sides == *( unsigned char *)side; isame[1] = *(unsigned char *)uplos == *( unsigned char *)uplo; isame[2] = *(unsigned char *)tranas == *( unsigned char *)transa; isame[3] = *(unsigned char *)diags == *( unsigned char *)diag; isame[4] = ms == m; isame[5] = ns == n; isame[6] = als == alpha; isame[7] = lse_(&as[1], &aa[1], &laa); isame[8] = ldas == lda; if (null) { isame[9] = lse_(&bs[1], &bb[1], &lbb); } else { isame[9] = lseres_("GE", " ", &m, &n, &bs[ 1], &bb[1], &ldb, (ftnlen)2, ( ftnlen)1); } isame[10] = ldbs == ldb; /* If data was incorrectly changed, report and */ /* return. */ same = TRUE_; i__4 = nargs; for (i__ = 1; i__ <= i__4; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___226.ciunit = *nout; s_wsfe(&io___226); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L50: */ } if (! same) { *fatal = TRUE_; goto L150; } if (! null) { if (s_cmp(sname + 3, "MM", (ftnlen)2, ( ftnlen)2) == 0) { /* Check the result. */ if (left) { smmch_(transa, "N", &m, &n, &m, & alpha, &a[a_offset], nmax, &b[b_offset], nmax, & c_b84, &c__[c_offset], nmax, &ct[1], &g[1], &bb[ 1], &ldb, eps, &err, fatal, nout, &c_true, ( ftnlen)1, (ftnlen)1); } else { smmch_("N", transa, &m, &n, &n, & alpha, &b[b_offset], nmax, &a[a_offset], nmax, & c_b84, &c__[c_offset], nmax, &ct[1], &g[1], &bb[ 1], &ldb, eps, &err, fatal, nout, &c_true, ( ftnlen)1, (ftnlen)1); } } else if (s_cmp(sname + 3, "SM", (ftnlen) 2, (ftnlen)2) == 0) { /* Compute approximation to original */ /* matrix. */ i__4 = n; for (j = 1; j <= i__4; ++j) { i__5 = m; for (i__ = 1; i__ <= i__5; ++i__) { c__[i__ + j * c_dim1] = bb[i__ + (j - 1) * ldb]; bb[i__ + (j - 1) * ldb] = alpha * b[i__ + j * b_dim1]; /* L60: */ } /* L70: */ } if (left) { smmch_(transa, "N", &m, &n, &m, & c_b94, &a[a_offset], nmax, &c__[c_offset], nmax, & c_b84, &b[b_offset], nmax, &ct[1], &g[1], &bb[1], & ldb, eps, &err, fatal, nout, &c_false, (ftnlen)1, (ftnlen)1); } else { smmch_("N", transa, &m, &n, &n, & c_b94, &c__[c_offset], nmax, &a[a_offset], nmax, &c_b84, &b[b_offset], nmax, &ct[1], &g[1], &bb[ 1], &ldb, eps, &err, fatal, nout, &c_false, ( ftnlen)1, (ftnlen)1); } } errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L150; } } /* L80: */ } /* L90: */ } /* L100: */ } /* L110: */ } /* L120: */ } L130: ; } /* L140: */ } /* Report result. */ if (errmax < *thresh) { io___228.ciunit = *nout; s_wsfe(&io___228); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___229.ciunit = *nout; s_wsfe(&io___229); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(real)); e_wsfe(); } goto L160; L150: io___230.ciunit = *nout; s_wsfe(&io___230); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); io___231.ciunit = *nout; s_wsfe(&io___231); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, side, (ftnlen)1); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen)sizeof(integer)); e_wsfe(); L160: return 0; /* End of SCHK3. */ } /* schk3_ */ /* Subroutine */ int schk4_(char *sname, real *eps, real *thresh, integer * nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nalf, real *alf, integer * nbet, real *bet, integer *nmax, real *a, real *aa, real *as, real *b, real *bb, real *bs, real *c__, real *cc, real *cs, real *ct, real *g, ftnlen sname_len) { /* Initialized data */ static char icht[3] = "NTC"; static char ichu[2] = "UL"; /* Format strings */ static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002(\002,2(\002'\002,a1" ",\002',\002),2(i3,\002,\002),f4.1,\002, A,\002,i3,\002,\002,f4.1," "\002, C,\002,i3,\002) .\002)"; static char fmt_9993[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9995[] = "(\002 THESE ARE THE RESULTS FOR COLUMN" " \002,i3)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, j, k, n, ia, ib, jc, ma, na, nc, ik, in, jj, lj, ks, ns, laa, lda, lcc, ldc; real als; integer ict, icu; extern logical lse_(real *, real *, integer *); real err, beta; integer ldas, ldcs; logical same; real bets; logical tran, null; char uplo[1]; real alpha; logical isame[13]; extern /* Subroutine */ int smake_(char *, char *, char *, integer *, integer *, real *, integer *, real *, integer *, logical *, real * , ftnlen, ftnlen, ftnlen), smmch_(char *, char *, integer *, integer *, integer *, real *, real *, integer *, real *, integer * , real *, real *, integer *, real *, real *, real *, integer *, real *, real *, logical *, integer *, logical *, ftnlen, ftnlen); integer nargs; logical reset; char trans[1]; logical upper; char uplos[1]; extern /* Subroutine */ int ssyrk_(char *, char *, integer *, integer *, real *, real *, integer *, real *, real *, integer *, ftnlen, ftnlen); real errmax; extern logical lseres_(char *, char *, integer *, integer *, real *, real *, integer *, ftnlen, ftnlen); char transs[1]; /* Fortran I/O blocks */ static cilist io___268 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___269 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___272 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___278 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___279 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___280 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___281 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___282 = { 0, 0, 0, fmt_9994, 0 }; /* Tests SSYRK. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; --bet; --g; --ct; --cs; --cc; c_dim1 = *nmax; c_offset = 1 + c_dim1; c__ -= c_offset; --bs; --bb; b_dim1 = *nmax; b_offset = 1 + b_dim1; b -= b_offset; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ /* .. Executable Statements .. */ nargs = 10; nc = 0; reset = TRUE_; errmax = 0.f; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; /* Set LDC to 1 more than minimum value if room. */ ldc = n; if (ldc < *nmax) { ++ldc; } /* Skip tests if not enough room. */ if (ldc > *nmax) { goto L100; } lcc = ldc * n; null = n <= 0; i__2 = *nidim; for (ik = 1; ik <= i__2; ++ik) { k = idim[ik]; for (ict = 1; ict <= 3; ++ict) { *(unsigned char *)trans = *(unsigned char *)&icht[ict - 1]; tran = *(unsigned char *)trans == 'T' || *(unsigned char *) trans == 'C'; if (tran) { ma = k; na = n; } else { ma = n; na = k; } /* Set LDA to 1 more than minimum value if room. */ lda = ma; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L80; } laa = lda * na; /* Generate the matrix A. */ smake_("GE", " ", " ", &ma, &na, &a[a_offset], nmax, &aa[1], & lda, &reset, &c_b84, (ftnlen)2, (ftnlen)1, (ftnlen)1); for (icu = 1; icu <= 2; ++icu) { *(unsigned char *)uplo = *(unsigned char *)&ichu[icu - 1]; upper = *(unsigned char *)uplo == 'U'; i__3 = *nalf; for (ia = 1; ia <= i__3; ++ia) { alpha = alf[ia]; i__4 = *nbet; for (ib = 1; ib <= i__4; ++ib) { beta = bet[ib]; /* Generate the matrix C. */ smake_("SY", uplo, " ", &n, &n, &c__[c_offset], nmax, &cc[1], &ldc, &reset, &c_b84, ( ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the subroutine. */ *(unsigned char *)uplos = *(unsigned char *)uplo; *(unsigned char *)transs = *(unsigned char *) trans; ns = n; ks = k; als = alpha; i__5 = laa; for (i__ = 1; i__ <= i__5; ++i__) { as[i__] = aa[i__]; /* L10: */ } ldas = lda; bets = beta; i__5 = lcc; for (i__ = 1; i__ <= i__5; ++i__) { cs[i__] = cc[i__]; /* L20: */ } ldcs = ldc; /* Call the subroutine. */ if (*trace) { io___268.ciunit = *ntra; s_wsfe(&io___268); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof( real)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&beta, (ftnlen)sizeof( real)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof( integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } ssyrk_(uplo, trans, &n, &k, &alpha, &aa[1], &lda, &beta, &cc[1], &ldc, (ftnlen)1, (ftnlen)1) ; /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___269.ciunit = *nout; s_wsfe(&io___269); e_wsfe(); *fatal = TRUE_; goto L120; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)uplos == *(unsigned char *)uplo; isame[1] = *(unsigned char *)transs == *(unsigned char *)trans; isame[2] = ns == n; isame[3] = ks == k; isame[4] = als == alpha; isame[5] = lse_(&as[1], &aa[1], &laa); isame[6] = ldas == lda; isame[7] = bets == beta; if (null) { isame[8] = lse_(&cs[1], &cc[1], &lcc); } else { isame[8] = lseres_("SY", uplo, &n, &n, &cs[1], &cc[1], &ldc, (ftnlen)2, (ftnlen)1); } isame[9] = ldcs == ldc; /* If data was incorrectly changed, report and */ /* return. */ same = TRUE_; i__5 = nargs; for (i__ = 1; i__ <= i__5; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___272.ciunit = *nout; s_wsfe(&io___272); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L30: */ } if (! same) { *fatal = TRUE_; goto L120; } if (! null) { /* Check the result column by column. */ jc = 1; i__5 = n; for (j = 1; j <= i__5; ++j) { if (upper) { jj = 1; lj = j; } else { jj = j; lj = n - j + 1; } if (tran) { smmch_("T", "N", &lj, &c__1, &k, & alpha, &a[jj * a_dim1 + 1], nmax, &a[j * a_dim1 + 1], nmax, &beta, &c__[jj + j * c_dim1], nmax, &ct[1], &g[1], &cc[jc], &ldc, eps, &err, fatal, nout, &c_true, (ftnlen) 1, (ftnlen)1); } else { smmch_("N", "T", &lj, &c__1, &k, & alpha, &a[jj + a_dim1], nmax, &a[j + a_dim1], nmax, &beta, & c__[jj + j * c_dim1], nmax, & ct[1], &g[1], &cc[jc], &ldc, eps, &err, fatal, nout, & c_true, (ftnlen)1, (ftnlen)1); } if (upper) { jc += ldc; } else { jc = jc + ldc + 1; } errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L110; } /* L40: */ } } /* L50: */ } /* L60: */ } /* L70: */ } L80: ; } /* L90: */ } L100: ; } /* Report result. */ if (errmax < *thresh) { io___278.ciunit = *nout; s_wsfe(&io___278); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___279.ciunit = *nout; s_wsfe(&io___279); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(real)); e_wsfe(); } goto L130; L110: if (n > 1) { io___280.ciunit = *nout; s_wsfe(&io___280); do_fio(&c__1, (char *)&j, (ftnlen)sizeof(integer)); e_wsfe(); } L120: io___281.ciunit = *nout; s_wsfe(&io___281); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); io___282.ciunit = *nout; s_wsfe(&io___282); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&beta, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof(integer)); e_wsfe(); L130: return 0; /* End of SCHK4. */ } /* schk4_ */ /* Subroutine */ int schk5_(char *sname, real *eps, real *thresh, integer * nout, integer *ntra, logical *trace, logical *rewi, logical *fatal, integer *nidim, integer *idim, integer *nalf, real *alf, integer * nbet, real *bet, integer *nmax, real *ab, real *aa, real *as, real * bb, real *bs, real *c__, real *cc, real *cs, real *ct, real *g, real * w, ftnlen sname_len) { /* Initialized data */ static char icht[3] = "NTC"; static char ichu[2] = "UL"; /* Format strings */ static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002(\002,2(\002'\002,a1" ",\002',\002),2(i3,\002,\002),f4.1,\002, A,\002,i3,\002, B,\002,i" "3,\002,\002,f4.1,\002, C,\002,i3,\002) \002,\002 .\002)"; static char fmt_9993[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9995[] = "(\002 THESE ARE THE RESULTS FOR COLUMN" " \002,i3)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, j, k, n, ia, ib, jc, ma, na, nc, ik, in, jj, lj, ks, ns, laa, lbb, lda, lcc, ldb, ldc; real als; integer ict, icu; extern logical lse_(real *, real *, integer *); real err; integer jjab; real beta; integer ldas, ldbs, ldcs; logical same; real bets; logical tran, null; char uplo[1]; real alpha; logical isame[13]; extern /* Subroutine */ int smake_(char *, char *, char *, integer *, integer *, real *, integer *, real *, integer *, logical *, real * , ftnlen, ftnlen, ftnlen), smmch_(char *, char *, integer *, integer *, integer *, real *, real *, integer *, real *, integer * , real *, real *, integer *, real *, real *, real *, integer *, real *, real *, logical *, integer *, logical *, ftnlen, ftnlen); integer nargs; logical reset; char trans[1]; logical upper; char uplos[1]; extern /* Subroutine */ int ssyr2k_(char *, char *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, ftnlen, ftnlen); real errmax; extern logical lseres_(char *, char *, integer *, integer *, real *, real *, integer *, ftnlen, ftnlen); char transs[1]; /* Fortran I/O blocks */ static cilist io___322 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___323 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___326 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___333 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___334 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___335 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___336 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___337 = { 0, 0, 0, fmt_9994, 0 }; /* Tests SSYR2K. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; --bet; --w; --g; --ct; --cs; --cc; c_dim1 = *nmax; c_offset = 1 + c_dim1; c__ -= c_offset; --bs; --bb; --as; --aa; --ab; /* Function Body */ /* .. Executable Statements .. */ nargs = 12; nc = 0; reset = TRUE_; errmax = 0.f; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; /* Set LDC to 1 more than minimum value if room. */ ldc = n; if (ldc < *nmax) { ++ldc; } /* Skip tests if not enough room. */ if (ldc > *nmax) { goto L130; } lcc = ldc * n; null = n <= 0; i__2 = *nidim; for (ik = 1; ik <= i__2; ++ik) { k = idim[ik]; for (ict = 1; ict <= 3; ++ict) { *(unsigned char *)trans = *(unsigned char *)&icht[ict - 1]; tran = *(unsigned char *)trans == 'T' || *(unsigned char *) trans == 'C'; if (tran) { ma = k; na = n; } else { ma = n; na = k; } /* Set LDA to 1 more than minimum value if room. */ lda = ma; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L110; } laa = lda * na; /* Generate the matrix A. */ if (tran) { i__3 = *nmax << 1; smake_("GE", " ", " ", &ma, &na, &ab[1], &i__3, &aa[1], & lda, &reset, &c_b84, (ftnlen)2, (ftnlen)1, ( ftnlen)1); } else { smake_("GE", " ", " ", &ma, &na, &ab[1], nmax, &aa[1], & lda, &reset, &c_b84, (ftnlen)2, (ftnlen)1, ( ftnlen)1); } /* Generate the matrix B. */ ldb = lda; lbb = laa; if (tran) { i__3 = *nmax << 1; smake_("GE", " ", " ", &ma, &na, &ab[k + 1], &i__3, &bb[1] , &ldb, &reset, &c_b84, (ftnlen)2, (ftnlen)1, ( ftnlen)1); } else { smake_("GE", " ", " ", &ma, &na, &ab[k * *nmax + 1], nmax, &bb[1], &ldb, &reset, &c_b84, (ftnlen)2, (ftnlen) 1, (ftnlen)1); } for (icu = 1; icu <= 2; ++icu) { *(unsigned char *)uplo = *(unsigned char *)&ichu[icu - 1]; upper = *(unsigned char *)uplo == 'U'; i__3 = *nalf; for (ia = 1; ia <= i__3; ++ia) { alpha = alf[ia]; i__4 = *nbet; for (ib = 1; ib <= i__4; ++ib) { beta = bet[ib]; /* Generate the matrix C. */ smake_("SY", uplo, " ", &n, &n, &c__[c_offset], nmax, &cc[1], &ldc, &reset, &c_b84, ( ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the subroutine. */ *(unsigned char *)uplos = *(unsigned char *)uplo; *(unsigned char *)transs = *(unsigned char *) trans; ns = n; ks = k; als = alpha; i__5 = laa; for (i__ = 1; i__ <= i__5; ++i__) { as[i__] = aa[i__]; /* L10: */ } ldas = lda; i__5 = lbb; for (i__ = 1; i__ <= i__5; ++i__) { bs[i__] = bb[i__]; /* L20: */ } ldbs = ldb; bets = beta; i__5 = lcc; for (i__ = 1; i__ <= i__5; ++i__) { cs[i__] = cc[i__]; /* L30: */ } ldcs = ldc; /* Call the subroutine. */ if (*trace) { io___322.ciunit = *ntra; s_wsfe(&io___322); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof( real)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&ldb, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&beta, (ftnlen)sizeof( real)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof( integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } ssyr2k_(uplo, trans, &n, &k, &alpha, &aa[1], &lda, &bb[1], &ldb, &beta, &cc[1], &ldc, ( ftnlen)1, (ftnlen)1); /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___323.ciunit = *nout; s_wsfe(&io___323); e_wsfe(); *fatal = TRUE_; goto L150; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)uplos == *(unsigned char *)uplo; isame[1] = *(unsigned char *)transs == *(unsigned char *)trans; isame[2] = ns == n; isame[3] = ks == k; isame[4] = als == alpha; isame[5] = lse_(&as[1], &aa[1], &laa); isame[6] = ldas == lda; isame[7] = lse_(&bs[1], &bb[1], &lbb); isame[8] = ldbs == ldb; isame[9] = bets == beta; if (null) { isame[10] = lse_(&cs[1], &cc[1], &lcc); } else { isame[10] = lseres_("SY", uplo, &n, &n, &cs[1] , &cc[1], &ldc, (ftnlen)2, (ftnlen)1); } isame[11] = ldcs == ldc; /* If data was incorrectly changed, report and */ /* return. */ same = TRUE_; i__5 = nargs; for (i__ = 1; i__ <= i__5; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___326.ciunit = *nout; s_wsfe(&io___326); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L150; } if (! null) { /* Check the result column by column. */ jjab = 1; jc = 1; i__5 = n; for (j = 1; j <= i__5; ++j) { if (upper) { jj = 1; lj = j; } else { jj = j; lj = n - j + 1; } if (tran) { i__6 = k; for (i__ = 1; i__ <= i__6; ++i__) { w[i__] = ab[(j - 1 << 1) * *nmax + k + i__]; w[k + i__] = ab[(j - 1 << 1) * * nmax + i__]; /* L50: */ } i__6 = k << 1; i__7 = *nmax << 1; i__8 = *nmax << 1; smmch_("T", "N", &lj, &c__1, &i__6, & alpha, &ab[jjab], &i__7, &w[1] , &i__8, &beta, &c__[jj + j * c_dim1], nmax, &ct[1], &g[1], &cc[jc], &ldc, eps, &err, fatal, nout, &c_true, (ftnlen) 1, (ftnlen)1); } else { i__6 = k; for (i__ = 1; i__ <= i__6; ++i__) { w[i__] = ab[(k + i__ - 1) * *nmax + j]; w[k + i__] = ab[(i__ - 1) * *nmax + j]; /* L60: */ } i__6 = k << 1; i__7 = *nmax << 1; smmch_("N", "N", &lj, &c__1, &i__6, & alpha, &ab[jj], nmax, &w[1], & i__7, &beta, &c__[jj + j * c_dim1], nmax, &ct[1], &g[1], &cc[jc], &ldc, eps, &err, fatal, nout, &c_true, (ftnlen) 1, (ftnlen)1); } if (upper) { jc += ldc; } else { jc = jc + ldc + 1; if (tran) { jjab += *nmax << 1; } } errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L140; } /* L70: */ } } /* L80: */ } /* L90: */ } /* L100: */ } L110: ; } /* L120: */ } L130: ; } /* Report result. */ if (errmax < *thresh) { io___333.ciunit = *nout; s_wsfe(&io___333); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___334.ciunit = *nout; s_wsfe(&io___334); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(real)); e_wsfe(); } goto L160; L140: if (n > 1) { io___335.ciunit = *nout; s_wsfe(&io___335); do_fio(&c__1, (char *)&j, (ftnlen)sizeof(integer)); e_wsfe(); } L150: io___336.ciunit = *nout; s_wsfe(&io___336); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); io___337.ciunit = *nout; s_wsfe(&io___337); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&alpha, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&beta, (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof(integer)); e_wsfe(); L160: return 0; /* End of SCHK5. */ } /* schk5_ */ /* Subroutine */ int schke_(integer *isnum, char *srnamt, integer *nout, ftnlen srnamt_len) { /* Format strings */ static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE TESTS OF ERROR-E" "XITS\002)"; static char fmt_9998[] = "(\002 ******* \002,a6,\002 FAILED THE TESTS OF" " ERROR-EXITS *****\002,\002**\002)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Local variables */ real a[2] /* was [2][1] */, b[2] /* was [2][1] */, c__[2] /* was [2][1] */, beta, alpha; extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, ftnlen, ftnlen), strmm_(char *, char *, char *, char *, integer *, integer *, real *, real *, integer *, real *, integer *, ftnlen, ftnlen, ftnlen, ftnlen), ssymm_(char *, char *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, ftnlen, ftnlen), strsm_(char *, char *, char *, char *, integer *, integer *, real *, real *, integer *, real *, integer *, ftnlen, ftnlen, ftnlen, ftnlen), ssyrk_(char *, char *, integer *, integer *, real *, real *, integer *, real *, real *, integer *, ftnlen, ftnlen), ssyr2k_( char *, char *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, ftnlen, ftnlen), chkxer_(char *, integer *, integer *, logical *, logical *, ftnlen); /* Fortran I/O blocks */ static cilist io___343 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___344 = { 0, 0, 0, fmt_9998, 0 }; /* Tests the error exits from the Level 3 Blas. */ /* Requires a special version of the error-handling routine XERBLA. */ /* A, B and C should not need to be defined. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* 3-19-92: Initialize ALPHA and BETA (eca) */ /* 3-19-92: Fix argument 12 in calls to SSYMM with INFOT = 9 (eca) */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Subroutines .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ /* OK is set to .FALSE. by the special version of XERBLA or by CHKXER */ /* if anything is wrong. */ infoc_1.ok = TRUE_; /* LERR is set to .TRUE. by the special version of XERBLA each time */ /* it is called, and is then tested and re-set by CHKXER. */ infoc_1.lerr = FALSE_; /* Initialize ALPHA and BETA. */ alpha = 1.f; beta = 2.f; switch (*isnum) { case 1: goto L10; case 2: goto L20; case 3: goto L30; case 4: goto L40; case 5: goto L50; case 6: goto L60; } L10: infoc_1.infot = 1; sgemm_("/", "N", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 1; sgemm_("/", "T", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; sgemm_("N", "/", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; sgemm_("T", "/", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; sgemm_("N", "N", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; sgemm_("N", "T", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; sgemm_("T", "N", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; sgemm_("T", "T", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; sgemm_("N", "N", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; sgemm_("N", "T", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; sgemm_("T", "N", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; sgemm_("T", "T", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; sgemm_("N", "N", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; sgemm_("N", "T", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; sgemm_("T", "N", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; sgemm_("T", "T", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; sgemm_("N", "N", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; sgemm_("N", "T", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; sgemm_("T", "N", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__2, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; sgemm_("T", "T", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; sgemm_("N", "N", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; sgemm_("T", "N", &c__0, &c__0, &c__2, &alpha, a, &c__2, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; sgemm_("N", "T", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; sgemm_("T", "T", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; sgemm_("N", "N", &c__2, &c__0, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; sgemm_("N", "T", &c__2, &c__0, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; sgemm_("T", "N", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; sgemm_("T", "T", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L70; L20: infoc_1.infot = 1; ssymm_("/", "U", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; ssymm_("L", "/", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ssymm_("L", "U", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ssymm_("R", "U", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ssymm_("L", "L", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ssymm_("R", "L", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ssymm_("L", "U", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ssymm_("R", "U", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ssymm_("L", "L", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ssymm_("R", "L", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; ssymm_("L", "U", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; ssymm_("R", "U", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; ssymm_("L", "L", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; ssymm_("R", "L", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ssymm_("L", "U", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ssymm_("R", "U", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ssymm_("L", "L", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ssymm_("R", "L", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; ssymm_("L", "U", &c__2, &c__0, &alpha, a, &c__2, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; ssymm_("R", "U", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; ssymm_("L", "L", &c__2, &c__0, &alpha, a, &c__2, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; ssymm_("R", "L", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L70; L30: infoc_1.infot = 1; strmm_("/", "U", "N", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; strmm_("L", "/", "N", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; strmm_("L", "U", "/", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; strmm_("L", "U", "N", "/", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; strmm_("L", "U", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; strmm_("L", "U", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; strmm_("R", "U", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; strmm_("R", "U", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; strmm_("L", "L", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; strmm_("L", "L", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; strmm_("R", "L", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; strmm_("R", "L", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; strmm_("L", "U", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; strmm_("L", "U", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; strmm_("R", "U", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; strmm_("R", "U", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; strmm_("L", "L", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; strmm_("L", "L", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; strmm_("R", "L", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; strmm_("R", "L", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; strmm_("L", "U", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; strmm_("L", "U", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; strmm_("R", "U", "N", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; strmm_("R", "U", "T", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; strmm_("L", "L", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; strmm_("L", "L", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; strmm_("R", "L", "N", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; strmm_("R", "L", "T", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; strmm_("L", "U", "N", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; strmm_("L", "U", "T", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; strmm_("R", "U", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; strmm_("R", "U", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; strmm_("L", "L", "N", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; strmm_("L", "L", "T", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; strmm_("R", "L", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; strmm_("R", "L", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L70; L40: infoc_1.infot = 1; strsm_("/", "U", "N", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; strsm_("L", "/", "N", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; strsm_("L", "U", "/", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; strsm_("L", "U", "N", "/", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; strsm_("L", "U", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; strsm_("L", "U", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; strsm_("R", "U", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; strsm_("R", "U", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; strsm_("L", "L", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; strsm_("L", "L", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; strsm_("R", "L", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; strsm_("R", "L", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; strsm_("L", "U", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; strsm_("L", "U", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; strsm_("R", "U", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; strsm_("R", "U", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; strsm_("L", "L", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; strsm_("L", "L", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; strsm_("R", "L", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; strsm_("R", "L", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; strsm_("L", "U", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; strsm_("L", "U", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; strsm_("R", "U", "N", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; strsm_("R", "U", "T", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; strsm_("L", "L", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; strsm_("L", "L", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; strsm_("R", "L", "N", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; strsm_("R", "L", "T", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; strsm_("L", "U", "N", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; strsm_("L", "U", "T", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; strsm_("R", "U", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; strsm_("R", "U", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; strsm_("L", "L", "N", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; strsm_("L", "L", "T", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; strsm_("R", "L", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; strsm_("R", "L", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L70; L50: infoc_1.infot = 1; ssyrk_("/", "N", &c__0, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; ssyrk_("U", "/", &c__0, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ssyrk_("U", "N", &c_n1, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ssyrk_("U", "T", &c_n1, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ssyrk_("L", "N", &c_n1, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ssyrk_("L", "T", &c_n1, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ssyrk_("U", "N", &c__0, &c_n1, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ssyrk_("U", "T", &c__0, &c_n1, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ssyrk_("L", "N", &c__0, &c_n1, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ssyrk_("L", "T", &c__0, &c_n1, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; ssyrk_("U", "N", &c__2, &c__0, &alpha, a, &c__1, &beta, c__, &c__2, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; ssyrk_("U", "T", &c__0, &c__2, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; ssyrk_("L", "N", &c__2, &c__0, &alpha, a, &c__1, &beta, c__, &c__2, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; ssyrk_("L", "T", &c__0, &c__2, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; ssyrk_("U", "N", &c__2, &c__0, &alpha, a, &c__2, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; ssyrk_("U", "T", &c__2, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; ssyrk_("L", "N", &c__2, &c__0, &alpha, a, &c__2, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; ssyrk_("L", "T", &c__2, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L70; L60: infoc_1.infot = 1; ssyr2k_("/", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; ssyr2k_("U", "/", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ssyr2k_("U", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ssyr2k_("U", "T", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ssyr2k_("L", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ssyr2k_("L", "T", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ssyr2k_("U", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ssyr2k_("U", "T", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ssyr2k_("L", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ssyr2k_("L", "T", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; ssyr2k_("U", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; ssyr2k_("U", "T", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; ssyr2k_("L", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; ssyr2k_("L", "T", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ssyr2k_("U", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ssyr2k_("U", "T", &c__0, &c__2, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ssyr2k_("L", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ssyr2k_("L", "T", &c__0, &c__2, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; ssyr2k_("U", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; ssyr2k_("U", "T", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; ssyr2k_("L", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; ssyr2k_("L", "T", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); L70: if (infoc_1.ok) { io___343.ciunit = *nout; s_wsfe(&io___343); do_fio(&c__1, srnamt, (ftnlen)6); e_wsfe(); } else { io___344.ciunit = *nout; s_wsfe(&io___344); do_fio(&c__1, srnamt, (ftnlen)6); e_wsfe(); } return 0; /* End of SCHKE. */ } /* schke_ */ /* Subroutine */ int smake_(char *type__, char *uplo, char *diag, integer *m, integer *n, real *a, integer *nmax, real *aa, integer *lda, logical * reset, real *transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen); /* Local variables */ integer i__, j; logical gen, tri, sym; integer ibeg, iend; extern real sbeg_(logical *); logical unit, lower, upper; /* Generates values for an M by N matrix A. */ /* Stores the values in the array AA in the data structure required */ /* by the routine, with unwanted elements set to rogue value. */ /* TYPE is 'GE', 'SY' or 'TR'. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --aa; /* Function Body */ gen = s_cmp(type__, "GE", (ftnlen)2, (ftnlen)2) == 0; sym = s_cmp(type__, "SY", (ftnlen)2, (ftnlen)2) == 0; tri = s_cmp(type__, "TR", (ftnlen)2, (ftnlen)2) == 0; upper = (sym || tri) && *(unsigned char *)uplo == 'U'; lower = (sym || tri) && *(unsigned char *)uplo == 'L'; unit = tri && *(unsigned char *)diag == 'U'; /* Generate data in array A. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { if (gen || upper && i__ <= j || lower && i__ >= j) { a[i__ + j * a_dim1] = sbeg_(reset) + *transl; if (i__ != j) { /* Set some elements to zero */ if (*n > 3 && j == *n / 2) { a[i__ + j * a_dim1] = 0.f; } if (sym) { a[j + i__ * a_dim1] = a[i__ + j * a_dim1]; } else if (tri) { a[j + i__ * a_dim1] = 0.f; } } } /* L10: */ } if (tri) { a[j + j * a_dim1] += 1.f; } if (unit) { a[j + j * a_dim1] = 1.f; } /* L20: */ } /* Store elements in array AS in data structure required by routine. */ if (s_cmp(type__, "GE", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { aa[i__ + (j - 1) * *lda] = a[i__ + j * a_dim1]; /* L30: */ } i__2 = *lda; for (i__ = *m + 1; i__ <= i__2; ++i__) { aa[i__ + (j - 1) * *lda] = -1e10f; /* L40: */ } /* L50: */ } } else if (s_cmp(type__, "SY", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(type__, "TR", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (upper) { ibeg = 1; if (unit) { iend = j - 1; } else { iend = j; } } else { if (unit) { ibeg = j + 1; } else { ibeg = j; } iend = *n; } i__2 = ibeg - 1; for (i__ = 1; i__ <= i__2; ++i__) { aa[i__ + (j - 1) * *lda] = -1e10f; /* L60: */ } i__2 = iend; for (i__ = ibeg; i__ <= i__2; ++i__) { aa[i__ + (j - 1) * *lda] = a[i__ + j * a_dim1]; /* L70: */ } i__2 = *lda; for (i__ = iend + 1; i__ <= i__2; ++i__) { aa[i__ + (j - 1) * *lda] = -1e10f; /* L80: */ } /* L90: */ } } return 0; /* End of SMAKE. */ } /* smake_ */ /* Subroutine */ int smmch_(char *transa, char *transb, integer *m, integer * n, integer *kk, real *alpha, real *a, integer *lda, real *b, integer * ldb, real *beta, real *c__, integer *ldc, real *ct, real *g, real *cc, integer *ldcc, real *eps, real *err, logical *fatal, integer *nout, logical *mv, ftnlen transa_len, ftnlen transb_len) { /* Format strings */ static char fmt_9999[] = "(\002 ******* FATAL ERROR - COMPUTED RESULT IS" " LESS THAN HAL\002,\002F ACCURATE *******\002,/\002 EX" "PECTED RESULT COMPU\002,\002TED RESULT\002)"; static char fmt_9998[] = "(1x,i7,2g18.6)"; static char fmt_9997[] = "(\002 THESE ARE THE RESULTS FOR COLUMN" " \002,i3)"; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, cc_dim1, cc_offset, i__1, i__2, i__3; real r__1, r__2; /* Builtin functions */ double sqrt(doublereal); integer s_wsfe(cilist *), e_wsfe(void), do_fio(integer *, char *, ftnlen); /* Local variables */ integer i__, j, k; real erri; logical trana, tranb; /* Fortran I/O blocks */ static cilist io___361 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___362 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___363 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___364 = { 0, 0, 0, fmt_9997, 0 }; /* Checks the results of the computational tests. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Intrinsic Functions .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; c_dim1 = *ldc; c_offset = 1 + c_dim1; c__ -= c_offset; --ct; --g; cc_dim1 = *ldcc; cc_offset = 1 + cc_dim1; cc -= cc_offset; /* Function Body */ trana = *(unsigned char *)transa == 'T' || *(unsigned char *)transa == 'C'; tranb = *(unsigned char *)transb == 'T' || *(unsigned char *)transb == 'C'; /* Compute expected result, one column at a time, in CT using data */ /* in A, B and C. */ /* Compute gauges in G. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { ct[i__] = 0.f; g[i__] = 0.f; /* L10: */ } if (! trana && ! tranb) { i__2 = *kk; for (k = 1; k <= i__2; ++k) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { ct[i__] += a[i__ + k * a_dim1] * b[k + j * b_dim1]; g[i__] += (r__1 = a[i__ + k * a_dim1], abs(r__1)) * (r__2 = b[k + j * b_dim1], abs(r__2)); /* L20: */ } /* L30: */ } } else if (trana && ! tranb) { i__2 = *kk; for (k = 1; k <= i__2; ++k) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { ct[i__] += a[k + i__ * a_dim1] * b[k + j * b_dim1]; g[i__] += (r__1 = a[k + i__ * a_dim1], abs(r__1)) * (r__2 = b[k + j * b_dim1], abs(r__2)); /* L40: */ } /* L50: */ } } else if (! trana && tranb) { i__2 = *kk; for (k = 1; k <= i__2; ++k) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { ct[i__] += a[i__ + k * a_dim1] * b[j + k * b_dim1]; g[i__] += (r__1 = a[i__ + k * a_dim1], abs(r__1)) * (r__2 = b[j + k * b_dim1], abs(r__2)); /* L60: */ } /* L70: */ } } else if (trana && tranb) { i__2 = *kk; for (k = 1; k <= i__2; ++k) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { ct[i__] += a[k + i__ * a_dim1] * b[j + k * b_dim1]; g[i__] += (r__1 = a[k + i__ * a_dim1], abs(r__1)) * (r__2 = b[j + k * b_dim1], abs(r__2)); /* L80: */ } /* L90: */ } } i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { ct[i__] = *alpha * ct[i__] + *beta * c__[i__ + j * c_dim1]; g[i__] = abs(*alpha) * g[i__] + abs(*beta) * (r__1 = c__[i__ + j * c_dim1], abs(r__1)); /* L100: */ } /* Compute the error ratio for this result. */ *err = 0.f; i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { erri = (r__1 = ct[i__] - cc[i__ + j * cc_dim1], abs(r__1)) / *eps; if (g[i__] != 0.f) { erri /= g[i__]; } *err = max(*err,erri); if (*err * sqrt(*eps) >= 1.f) { goto L130; } /* L110: */ } /* L120: */ } /* If the loop completes, all results are at least half accurate. */ goto L150; /* Report fatal error. */ L130: *fatal = TRUE_; io___361.ciunit = *nout; s_wsfe(&io___361); e_wsfe(); i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { if (*mv) { io___362.ciunit = *nout; s_wsfe(&io___362); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ct[i__], (ftnlen)sizeof(real)); do_fio(&c__1, (char *)&cc[i__ + j * cc_dim1], (ftnlen)sizeof(real) ); e_wsfe(); } else { io___363.ciunit = *nout; s_wsfe(&io___363); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&cc[i__ + j * cc_dim1], (ftnlen)sizeof(real) ); do_fio(&c__1, (char *)&ct[i__], (ftnlen)sizeof(real)); e_wsfe(); } /* L140: */ } if (*n > 1) { io___364.ciunit = *nout; s_wsfe(&io___364); do_fio(&c__1, (char *)&j, (ftnlen)sizeof(integer)); e_wsfe(); } L150: return 0; /* End of SMMCH. */ } /* smmch_ */ logical lse_(real *ri, real *rj, integer *lr) { /* System generated locals */ integer i__1; logical ret_val; /* Local variables */ integer i__; /* Tests if two arrays are identical. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --rj; --ri; /* Function Body */ i__1 = *lr; for (i__ = 1; i__ <= i__1; ++i__) { if (ri[i__] != rj[i__]) { goto L20; } /* L10: */ } ret_val = TRUE_; goto L30; L20: ret_val = FALSE_; L30: return ret_val; /* End of LSE. */ } /* lse_ */ logical lseres_(char *type__, char *uplo, integer *m, integer *n, real *aa, real *as, integer *lda, ftnlen type_len, ftnlen uplo_len) { /* System generated locals */ integer aa_dim1, aa_offset, as_dim1, as_offset, i__1, i__2; logical ret_val; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen); /* Local variables */ integer i__, j, ibeg, iend; logical upper; /* Tests if selected elements in two arrays are equal. */ /* TYPE is 'GE' or 'SY'. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ as_dim1 = *lda; as_offset = 1 + as_dim1; as -= as_offset; aa_dim1 = *lda; aa_offset = 1 + aa_dim1; aa -= aa_offset; /* Function Body */ upper = *(unsigned char *)uplo == 'U'; if (s_cmp(type__, "GE", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *lda; for (i__ = *m + 1; i__ <= i__2; ++i__) { if (aa[i__ + j * aa_dim1] != as[i__ + j * as_dim1]) { goto L70; } /* L10: */ } /* L20: */ } } else if (s_cmp(type__, "SY", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (upper) { ibeg = 1; iend = j; } else { ibeg = j; iend = *n; } i__2 = ibeg - 1; for (i__ = 1; i__ <= i__2; ++i__) { if (aa[i__ + j * aa_dim1] != as[i__ + j * as_dim1]) { goto L70; } /* L30: */ } i__2 = *lda; for (i__ = iend + 1; i__ <= i__2; ++i__) { if (aa[i__ + j * aa_dim1] != as[i__ + j * as_dim1]) { goto L70; } /* L40: */ } /* L50: */ } } ret_val = TRUE_; goto L80; L70: ret_val = FALSE_; L80: return ret_val; /* End of LSERES. */ } /* lseres_ */ real sbeg_(logical *reset) { /* System generated locals */ real ret_val; /* Local variables */ static integer i__, ic, mi; /* Generates random numbers uniformly distributed between -0.5 and 0.5. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Scalar Arguments .. */ /* .. Local Scalars .. */ /* .. Save statement .. */ /* .. Executable Statements .. */ if (*reset) { /* Initialize local variables. */ mi = 891; i__ = 7; ic = 0; *reset = FALSE_; } /* The sequence of values of I is bounded between 1 and 999. */ /* If initial I = 1,2,3,6,7 or 9, the period will be 50. */ /* If initial I = 4 or 8, the period will be 25. */ /* If initial I = 5, the period will be 10. */ /* IC is used to break up the period by skipping 1 value of I in 6. */ ++ic; L10: i__ *= mi; i__ -= i__ / 1000 * 1000; if (ic >= 5) { ic = 0; goto L10; } ret_val = (i__ - 500) / 1001.f; return ret_val; /* End of SBEG. */ } /* sbeg_ */ real sdiff_(real *x, real *y) { /* System generated locals */ real ret_val; /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Scalar Arguments .. */ /* .. Executable Statements .. */ ret_val = *x - *y; return ret_val; /* End of SDIFF. */ } /* sdiff_ */ /* Subroutine */ int chkxer_(char *srnamt, integer *infot, integer *nout, logical *lerr, logical *ok, ftnlen srnamt_len) { /* Format strings */ static char fmt_9999[] = "(\002 ***** ILLEGAL VALUE OF PARAMETER NUMBER" " \002,i2,\002 NOT D\002,\002ETECTED BY \002,a6,\002 *****\002)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Fortran I/O blocks */ static cilist io___374 = { 0, 0, 0, fmt_9999, 0 }; /* Tests whether XERBLA has detected an error when it should. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Scalar Arguments .. */ /* .. Executable Statements .. */ if (! (*lerr)) { io___374.ciunit = *nout; s_wsfe(&io___374); do_fio(&c__1, (char *)&(*infot), (ftnlen)sizeof(integer)); do_fio(&c__1, srnamt, (ftnlen)6); e_wsfe(); *ok = FALSE_; } *lerr = FALSE_; return 0; /* End of CHKXER. */ } /* chkxer_ */ /* Subroutine */ int xerbla_(char *srname, integer *info, ftnlen srname_len) { /* Format strings */ static char fmt_9999[] = "(\002 ******* XERBLA WAS CALLED WITH INFO =" " \002,i6,\002 INSTEAD\002,\002 OF \002,i2,\002 *******\002)"; static char fmt_9997[] = "(\002 ******* XERBLA WAS CALLED WITH INFO =" " \002,i6,\002 *******\002)"; static char fmt_9998[] = "(\002 ******* XERBLA WAS CALLED WITH SRNAME =" " \002,a6,\002 INSTE\002,\002AD OF \002,a6,\002 *******\002)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), s_cmp(const char *, const char *, ftnlen, ftnlen); /* Fortran I/O blocks */ static cilist io___375 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___376 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___377 = { 0, 0, 0, fmt_9998, 0 }; /* This is a special version of XERBLA to be used only as part of */ /* the test program for testing error exits from the Level 3 BLAS */ /* routines. */ /* XERBLA is an error handler for the Level 3 BLAS routines. */ /* It is called by the Level 3 BLAS routines if an input parameter is */ /* invalid. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ infoc_2.lerr = TRUE_; if (*info != infoc_2.infot) { if (infoc_2.infot != 0) { io___375.ciunit = infoc_2.nout; s_wsfe(&io___375); do_fio(&c__1, (char *)&(*info), (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&infoc_2.infot, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___376.ciunit = infoc_2.nout; s_wsfe(&io___376); do_fio(&c__1, (char *)&(*info), (ftnlen)sizeof(integer)); e_wsfe(); } infoc_2.ok = FALSE_; } if (s_cmp(srname, srnamc_1.srnamt, (ftnlen)6, (ftnlen)6) != 0) { io___377.ciunit = infoc_2.nout; s_wsfe(&io___377); do_fio(&c__1, srname, (ftnlen)6); do_fio(&c__1, srnamc_1.srnamt, (ftnlen)6); e_wsfe(); infoc_2.ok = FALSE_; } return 0; /* End of XERBLA */ } /* xerbla_ */ /* Main program alias */ int sblat3_ () { main (); return 0; } blis-0.6.1/blastest/src/zblat1.c000066400000000000000000000671541360743507500164470ustar00rootroot00000000000000/* zblat1.f -- translated by f2c (version 20100827). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" /* Common Block Declarations */ struct { integer icase, n, incx, incy, mode; logical pass; } combla_; #define combla_1 combla_ /* Table of constant values */ static integer c__1 = 1; static integer c__9 = 9; static integer c__5 = 5; static doublereal c_b43 = 1.; static doublereal c_b52 = 0.; /* > \brief \b ZBLAT1 */ /* =========== DOCUMENTATION =========== */ /* Online html documentation available at */ /* http://www.netlib.org/lapack/explore-html/ */ /* Definition: */ /* =========== */ /* PROGRAM ZBLAT1 */ /* > \par Purpose: */ /* ============= */ /* > */ /* > \verbatim */ /* > */ /* > Test program for the COMPLEX*16 Level 1 BLAS. */ /* > */ /* > Based upon the original BLAS test routine together with: */ /* > F06GAF Example Program Text */ /* > \endverbatim */ /* Authors: */ /* ======== */ /* > \author Univ. of Tennessee */ /* > \author Univ. of California Berkeley */ /* > \author Univ. of Colorado Denver */ /* > \author NAG Ltd. */ /* > \date April 2012 */ /* > \ingroup complex16_blas_testing */ /* ===================================================================== */ /* Main program */ int main(void) { /* Initialized data */ static doublereal sfac = 9.765625e-4; /* Format strings */ static char fmt_99999[] = "(\002 Complex BLAS Test Program Results\002,/" "1x)"; static char fmt_99998[] = "(\002 ----" "- PASS -----\002)"; /* Builtin functions */ integer s_wsfe(cilist *), e_wsfe(void); /* Subroutine */ int s_stop(char *, ftnlen); /* Local variables */ integer ic; extern /* Subroutine */ int check1_(doublereal *), check2_(doublereal *), header_(void); /* Fortran I/O blocks */ static cilist io___2 = { 0, 6, 0, fmt_99999, 0 }; static cilist io___4 = { 0, 6, 0, fmt_99998, 0 }; /* -- Reference BLAS test routine (version 3.4.1) -- */ /* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* April 2012 */ /* ===================================================================== */ /* .. Parameters .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. External Subroutines .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* .. Executable Statements .. */ s_wsfe(&io___2); e_wsfe(); for (ic = 1; ic <= 10; ++ic) { combla_1.icase = ic; header_(); /* Initialize PASS, INCX, INCY, and MODE for a new case. */ /* The value 9999 for INCX, INCY or MODE will appear in the */ /* detailed output, if any, for cases that do not involve */ /* these parameters. */ combla_1.pass = TRUE_; combla_1.incx = 9999; combla_1.incy = 9999; combla_1.mode = 9999; if (combla_1.icase <= 5) { check2_(&sfac); } else if (combla_1.icase >= 6) { check1_(&sfac); } /* -- Print */ if (combla_1.pass) { s_wsfe(&io___4); e_wsfe(); } /* L20: */ } s_stop("", (ftnlen)0); return 0; } /* main */ /* Subroutine */ int header_(void) { /* Initialized data */ static char l[6*10] = "ZDOTC " "ZDOTU " "ZAXPY " "ZCOPY " "ZSWAP " "DZNR" "M2" "DZASUM" "ZSCAL " "ZDSCAL" "IZAMAX"; /* Format strings */ static char fmt_99999[] = "(/\002 Test of subprogram number\002,i3,12x,a" "6)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Fortran I/O blocks */ static cilist io___6 = { 0, 6, 0, fmt_99999, 0 }; /* .. Parameters .. */ /* .. Scalars in Common .. */ /* .. Local Arrays .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* .. Executable Statements .. */ s_wsfe(&io___6); do_fio(&c__1, (char *)&combla_1.icase, (ftnlen)sizeof(integer)); do_fio(&c__1, l + (0 + (0 + (combla_1.icase - 1) * 6)), (ftnlen)6); e_wsfe(); return 0; } /* header_ */ /* Subroutine */ int check1_(doublereal *sfac) { /* Initialized data */ static doublereal strue2[5] = { 0.,.5,.6,.7,.8 }; static doublereal strue4[5] = { 0.,.7,1.,1.3,1.6 }; static doublecomplex ctrue5[80] /* was [8][5][2] */ = { {.1,.1},{1., 2.},{1.,2.},{1.,2.},{1.,2.},{1.,2.},{1.,2.},{1.,2.},{-.16,-.37},{ 3.,4.},{3.,4.},{3.,4.},{3.,4.},{3.,4.},{3.,4.},{3.,4.},{-.17,-.19} ,{.13,-.39},{5.,6.},{5.,6.},{5.,6.},{5.,6.},{5.,6.},{5.,6.},{.11, -.03},{-.17,.46},{-.17,-.19},{7.,8.},{7.,8.},{7.,8.},{7.,8.},{7., 8.},{.19,-.17},{.2,-.35},{.35,.2},{.14,.08},{2.,3.},{2.,3.},{2., 3.},{2.,3.},{.1,.1},{4.,5.},{4.,5.},{4.,5.},{4.,5.},{4.,5.},{4., 5.},{4.,5.},{-.16,-.37},{6.,7.},{6.,7.},{6.,7.},{6.,7.},{6.,7.},{ 6.,7.},{6.,7.},{-.17,-.19},{8.,9.},{.13,-.39},{2.,5.},{2.,5.},{2., 5.},{2.,5.},{2.,5.},{.11,-.03},{3.,6.},{-.17,.46},{4.,7.},{-.17, -.19},{7.,2.},{7.,2.},{7.,2.},{.19,-.17},{5.,8.},{.2,-.35},{6.,9.} ,{.35,.2},{8.,3.},{.14,.08},{9.,4.} }; static doublecomplex ctrue6[80] /* was [8][5][2] */ = { {.1,.1},{1., 2.},{1.,2.},{1.,2.},{1.,2.},{1.,2.},{1.,2.},{1.,2.},{.09,-.12},{ 3.,4.},{3.,4.},{3.,4.},{3.,4.},{3.,4.},{3.,4.},{3.,4.},{.03,-.09}, {.15,-.03},{5.,6.},{5.,6.},{5.,6.},{5.,6.},{5.,6.},{5.,6.},{.03, .03},{-.18,.03},{.03,-.09},{7.,8.},{7.,8.},{7.,8.},{7.,8.},{7.,8.} ,{.09,.03},{.15,0.},{0.,.15},{0.,.06},{2.,3.},{2.,3.},{2.,3.},{2., 3.},{.1,.1},{4.,5.},{4.,5.},{4.,5.},{4.,5.},{4.,5.},{4.,5.},{4., 5.},{.09,-.12},{6.,7.},{6.,7.},{6.,7.},{6.,7.},{6.,7.},{6.,7.},{ 6.,7.},{.03,-.09},{8.,9.},{.15,-.03},{2.,5.},{2.,5.},{2.,5.},{2., 5.},{2.,5.},{.03,.03},{3.,6.},{-.18,.03},{4.,7.},{.03,-.09},{7., 2.},{7.,2.},{7.,2.},{.09,.03},{5.,8.},{.15,0.},{6.,9.},{0.,.15},{ 8.,3.},{0.,.06},{9.,4.} }; static integer itrue3[5] = { 0,1,2,2,2 }; static doublereal sa = .3; static doublecomplex ca = {.4,-.7}; static doublecomplex cv[80] /* was [8][5][2] */ = { {.1,.1},{1.,2.},{1., 2.},{1.,2.},{1.,2.},{1.,2.},{1.,2.},{1.,2.},{.3,-.4},{3.,4.},{3., 4.},{3.,4.},{3.,4.},{3.,4.},{3.,4.},{3.,4.},{.1,-.3},{.5,-.1},{5., 6.},{5.,6.},{5.,6.},{5.,6.},{5.,6.},{5.,6.},{.1,.1},{-.6,.1},{.1, -.3},{7.,8.},{7.,8.},{7.,8.},{7.,8.},{7.,8.},{.3,.1},{.5,0.},{0., .5},{0.,.2},{2.,3.},{2.,3.},{2.,3.},{2.,3.},{.1,.1},{4.,5.},{4., 5.},{4.,5.},{4.,5.},{4.,5.},{4.,5.},{4.,5.},{.3,-.4},{6.,7.},{6., 7.},{6.,7.},{6.,7.},{6.,7.},{6.,7.},{6.,7.},{.1,-.3},{8.,9.},{.5, -.1},{2.,5.},{2.,5.},{2.,5.},{2.,5.},{2.,5.},{.1,.1},{3.,6.},{-.6, .1},{4.,7.},{.1,-.3},{7.,2.},{7.,2.},{7.,2.},{.3,.1},{5.,8.},{.5, 0.},{6.,9.},{0.,.5},{8.,3.},{0.,.2},{9.,4.} }; /* System generated locals */ integer i__1, i__2, i__3; doublereal d__1; doublecomplex z__1; /* Builtin functions */ integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_wsle(void); /* Subroutine */ int s_stop(char *, ftnlen); /* Local variables */ integer i__; doublecomplex cx[8]; integer np1, len; extern /* Subroutine */ int zscal_(integer *, doublecomplex *, doublecomplex *, integer *), ctest_(integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublereal *); doublecomplex mwpcs[5], mwpct[5]; extern /* Subroutine */ int itest1_(integer *, integer *); extern doublereal dznrm2_(integer *, doublecomplex *, integer *); extern /* Subroutine */ int stest1_(doublereal *, doublereal *, doublereal *, doublereal *), zdscal_(integer *, doublereal *, doublecomplex *, integer *); extern integer izamax_(integer *, doublecomplex *, integer *); extern doublereal dzasum_(integer *, doublecomplex *, integer *); /* Fortran I/O blocks */ static cilist io___19 = { 0, 6, 0, 0, 0 }; /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* .. Executable Statements .. */ for (combla_1.incx = 1; combla_1.incx <= 2; ++combla_1.incx) { for (np1 = 1; np1 <= 5; ++np1) { combla_1.n = np1 - 1; len = max(combla_1.n,1) << 1; /* .. Set vector arguments .. */ i__1 = len; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__ - 1; i__3 = i__ + (np1 + combla_1.incx * 5 << 3) - 49; cx[i__2].r = cv[i__3].r, cx[i__2].i = cv[i__3].i; /* L20: */ } if (combla_1.icase == 6) { /* .. DZNRM2 .. */ d__1 = dznrm2_(&combla_1.n, cx, &combla_1.incx); stest1_(&d__1, &strue2[np1 - 1], &strue2[np1 - 1], sfac); } else if (combla_1.icase == 7) { /* .. DZASUM .. */ d__1 = dzasum_(&combla_1.n, cx, &combla_1.incx); stest1_(&d__1, &strue4[np1 - 1], &strue4[np1 - 1], sfac); } else if (combla_1.icase == 8) { /* .. ZSCAL .. */ zscal_(&combla_1.n, &ca, cx, &combla_1.incx); ctest_(&len, cx, &ctrue5[(np1 + combla_1.incx * 5 << 3) - 48], &ctrue5[(np1 + combla_1.incx * 5 << 3) - 48], sfac); } else if (combla_1.icase == 9) { /* .. ZDSCAL .. */ zdscal_(&combla_1.n, &sa, cx, &combla_1.incx); ctest_(&len, cx, &ctrue6[(np1 + combla_1.incx * 5 << 3) - 48], &ctrue6[(np1 + combla_1.incx * 5 << 3) - 48], sfac); } else if (combla_1.icase == 10) { /* .. IZAMAX .. */ i__1 = izamax_(&combla_1.n, cx, &combla_1.incx); itest1_(&i__1, &itrue3[np1 - 1]); } else { s_wsle(&io___19); do_lio(&c__9, &c__1, " Shouldn't be here in CHECK1", (ftnlen) 28); e_wsle(); s_stop("", (ftnlen)0); } /* L40: */ } /* L60: */ } combla_1.incx = 1; if (combla_1.icase == 8) { /* ZSCAL */ /* Add a test for alpha equal to zero. */ ca.r = 0., ca.i = 0.; for (i__ = 1; i__ <= 5; ++i__) { i__1 = i__ - 1; mwpct[i__1].r = 0., mwpct[i__1].i = 0.; i__1 = i__ - 1; mwpcs[i__1].r = 1., mwpcs[i__1].i = 1.; /* L80: */ } zscal_(&c__5, &ca, cx, &combla_1.incx); ctest_(&c__5, cx, mwpct, mwpcs, sfac); } else if (combla_1.icase == 9) { /* ZDSCAL */ /* Add a test for alpha equal to zero. */ sa = 0.; for (i__ = 1; i__ <= 5; ++i__) { i__1 = i__ - 1; mwpct[i__1].r = 0., mwpct[i__1].i = 0.; i__1 = i__ - 1; mwpcs[i__1].r = 1., mwpcs[i__1].i = 1.; /* L100: */ } zdscal_(&c__5, &sa, cx, &combla_1.incx); ctest_(&c__5, cx, mwpct, mwpcs, sfac); /* Add a test for alpha equal to one. */ sa = 1.; for (i__ = 1; i__ <= 5; ++i__) { i__1 = i__ - 1; i__2 = i__ - 1; mwpct[i__1].r = cx[i__2].r, mwpct[i__1].i = cx[i__2].i; i__1 = i__ - 1; i__2 = i__ - 1; mwpcs[i__1].r = cx[i__2].r, mwpcs[i__1].i = cx[i__2].i; /* L120: */ } zdscal_(&c__5, &sa, cx, &combla_1.incx); ctest_(&c__5, cx, mwpct, mwpcs, sfac); /* Add a test for alpha equal to minus one. */ sa = -1.; for (i__ = 1; i__ <= 5; ++i__) { i__1 = i__ - 1; i__2 = i__ - 1; z__1.r = -cx[i__2].r, z__1.i = -cx[i__2].i; mwpct[i__1].r = z__1.r, mwpct[i__1].i = z__1.i; i__1 = i__ - 1; i__2 = i__ - 1; z__1.r = -cx[i__2].r, z__1.i = -cx[i__2].i; mwpcs[i__1].r = z__1.r, mwpcs[i__1].i = z__1.i; /* L140: */ } zdscal_(&c__5, &sa, cx, &combla_1.incx); ctest_(&c__5, cx, mwpct, mwpcs, sfac); } return 0; } /* check1_ */ /* Subroutine */ int check2_(doublereal *sfac) { /* Initialized data */ static doublecomplex ca = {.4,-.7}; static integer incxs[4] = { 1,2,-2,-1 }; static integer incys[4] = { 1,-2,1,-2 }; static integer lens[8] /* was [4][2] */ = { 1,1,2,4,1,1,3,7 }; static integer ns[4] = { 0,1,2,4 }; static doublecomplex cx1[7] = { {.7,-.8},{-.4,-.7},{-.1,-.9},{.2,-.8},{ -.9,-.4},{.1,.4},{-.6,.6} }; static doublecomplex cy1[7] = { {.6,-.6},{-.9,.5},{.7,-.6},{.1,-.5},{-.1, -.2},{-.5,-.3},{.8,-.7} }; static doublecomplex ct8[112] /* was [7][4][4] */ = { {.6,-.6},{0., 0.},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{.32,-1.41},{0.,0.},{ 0.,0.},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{.32,-1.41},{-1.55,.5},{0., 0.},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{.32,-1.41},{-1.55,.5},{.03, -.89},{-.38,-.96},{0.,0.},{0.,0.},{0.,0.},{.6,-.6},{0.,0.},{0.,0.} ,{0.,0.},{0.,0.},{0.,0.},{0.,0.},{.32,-1.41},{0.,0.},{0.,0.},{0., 0.},{0.,0.},{0.,0.},{0.,0.},{-.07,-.89},{-.9,.5},{.42,-1.41},{0., 0.},{0.,0.},{0.,0.},{0.,0.},{.78,.06},{-.9,.5},{.06,-.13},{.1,-.5} ,{-.77,-.49},{-.5,-.3},{.52,-1.51},{.6,-.6},{0.,0.},{0.,0.},{0., 0.},{0.,0.},{0.,0.},{0.,0.},{.32,-1.41},{0.,0.},{0.,0.},{0.,0.},{ 0.,0.},{0.,0.},{0.,0.},{-.07,-.89},{-1.18,-.31},{0.,0.},{0.,0.},{ 0.,0.},{0.,0.},{0.,0.},{.78,.06},{-1.54,.97},{.03,-.89},{-.18, -1.31},{0.,0.},{0.,0.},{0.,0.},{.6,-.6},{0.,0.},{0.,0.},{0.,0.},{ 0.,0.},{0.,0.},{0.,0.},{.32,-1.41},{0.,0.},{0.,0.},{0.,0.},{0.,0.} ,{0.,0.},{0.,0.},{.32,-1.41},{-.9,.5},{.05,-.6},{0.,0.},{0.,0.},{ 0.,0.},{0.,0.},{.32,-1.41},{-.9,.5},{.05,-.6},{.1,-.5},{-.77,-.49} ,{-.5,-.3},{.32,-1.16} }; static doublecomplex ct7[16] /* was [4][4] */ = { {0.,0.},{-.06, -.9},{.65,-.47},{-.34,-1.22},{0.,0.},{-.06,-.9},{-.59,-1.46},{ -1.04,-.04},{0.,0.},{-.06,-.9},{-.83,.59},{.07,-.37},{0.,0.},{ -.06,-.9},{-.76,-1.15},{-1.33,-1.82} }; static doublecomplex ct6[16] /* was [4][4] */ = { {0.,0.},{.9,.06}, {.91,-.77},{1.8,-.1},{0.,0.},{.9,.06},{1.45,.74},{.2,.9},{0.,0.},{ .9,.06},{-.55,.23},{.83,-.39},{0.,0.},{.9,.06},{1.04,.79},{1.95, 1.22} }; static doublecomplex ct10x[112] /* was [7][4][4] */ = { {.7,-.8},{0., 0.},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{.6,-.6},{0.,0.},{0., 0.},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{.6,-.6},{-.9,.5},{0.,0.},{0., 0.},{0.,0.},{0.,0.},{0.,0.},{.6,-.6},{-.9,.5},{.7,-.6},{.1,-.5},{ 0.,0.},{0.,0.},{0.,0.},{.7,-.8},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{ 0.,0.},{0.,0.},{.6,-.6},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{ 0.,0.},{.7,-.6},{-.4,-.7},{.6,-.6},{0.,0.},{0.,0.},{0.,0.},{0.,0.} ,{.8,-.7},{-.4,-.7},{-.1,-.2},{.2,-.8},{.7,-.6},{.1,.4},{.6,-.6},{ .7,-.8},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{.6,-.6},{ 0.,0.},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{-.9,.5},{-.4,-.7}, {.6,-.6},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{.1,-.5},{-.4,-.7},{.7, -.6},{.2,-.8},{-.9,.5},{.1,.4},{.6,-.6},{.7,-.8},{0.,0.},{0.,0.},{ 0.,0.},{0.,0.},{0.,0.},{0.,0.},{.6,-.6},{0.,0.},{0.,0.},{0.,0.},{ 0.,0.},{0.,0.},{0.,0.},{.6,-.6},{.7,-.6},{0.,0.},{0.,0.},{0.,0.},{ 0.,0.},{0.,0.},{.6,-.6},{.7,-.6},{-.1,-.2},{.8,-.7},{0.,0.},{0., 0.},{0.,0.} }; static doublecomplex ct10y[112] /* was [7][4][4] */ = { {.6,-.6},{0., 0.},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{.7,-.8},{0.,0.},{0., 0.},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{.7,-.8},{-.4,-.7},{0.,0.},{ 0.,0.},{0.,0.},{0.,0.},{0.,0.},{.7,-.8},{-.4,-.7},{-.1,-.9},{.2, -.8},{0.,0.},{0.,0.},{0.,0.},{.6,-.6},{0.,0.},{0.,0.},{0.,0.},{0., 0.},{0.,0.},{0.,0.},{.7,-.8},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{0., 0.},{0.,0.},{-.1,-.9},{-.9,.5},{.7,-.8},{0.,0.},{0.,0.},{0.,0.},{ 0.,0.},{-.6,.6},{-.9,.5},{-.9,-.4},{.1,-.5},{-.1,-.9},{-.5,-.3},{ .7,-.8},{.6,-.6},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{ .7,-.8},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{-.1,-.9}, {.7,-.8},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{-.6,.6},{-.9, -.4},{-.1,-.9},{.7,-.8},{0.,0.},{0.,0.},{0.,0.},{.6,-.6},{0.,0.},{ 0.,0.},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{.7,-.8},{0.,0.},{0.,0.},{ 0.,0.},{0.,0.},{0.,0.},{0.,0.},{.7,-.8},{-.9,.5},{-.4,-.7},{0.,0.} ,{0.,0.},{0.,0.},{0.,0.},{.7,-.8},{-.9,.5},{-.4,-.7},{.1,-.5},{ -.1,-.9},{-.5,-.3},{.2,-.8} }; static doublecomplex csize1[4] = { {0.,0.},{.9,.9},{1.63,1.73},{2.9,2.78} }; static doublecomplex csize3[14] = { {0.,0.},{0.,0.},{0.,0.},{0.,0.},{0., 0.},{0.,0.},{0.,0.},{1.17,1.17},{1.17,1.17},{1.17,1.17},{1.17, 1.17},{1.17,1.17},{1.17,1.17},{1.17,1.17} }; static doublecomplex csize2[14] /* was [7][2] */ = { {0.,0.},{0.,0.},{ 0.,0.},{0.,0.},{0.,0.},{0.,0.},{0.,0.},{1.54,1.54},{1.54,1.54},{ 1.54,1.54},{1.54,1.54},{1.54,1.54},{1.54,1.54},{1.54,1.54} }; /* System generated locals */ integer i__1, i__2; doublecomplex z__1; /* Builtin functions */ integer s_wsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_wsle(void); /* Subroutine */ int s_stop(char *, ftnlen); /* Local variables */ integer i__, ki, kn; doublecomplex cx[7], cy[7]; integer mx, my; doublecomplex cdot[1]; integer lenx, leny; extern /* Subroutine */ int ctest_(integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublereal *); extern /* Double Complex */ doublecomplex zdotc_(integer *, doublecomplex *, integer *, doublecomplex *, integer *); integer ksize; extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *, doublecomplex *, integer *); extern /* Double Complex */ doublecomplex zdotu_(integer *, doublecomplex *, integer *, doublecomplex *, integer *); extern /* Subroutine */ int zswap_(integer *, doublecomplex *, integer *, doublecomplex *, integer *), zaxpy_(integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *); /* Fortran I/O blocks */ static cilist io___48 = { 0, 6, 0, 0, 0 }; /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* .. Executable Statements .. */ for (ki = 1; ki <= 4; ++ki) { combla_1.incx = incxs[ki - 1]; combla_1.incy = incys[ki - 1]; mx = abs(combla_1.incx); my = abs(combla_1.incy); for (kn = 1; kn <= 4; ++kn) { combla_1.n = ns[kn - 1]; ksize = min(2,kn); lenx = lens[kn + (mx << 2) - 5]; leny = lens[kn + (my << 2) - 5]; /* .. initialize all argument arrays .. */ for (i__ = 1; i__ <= 7; ++i__) { i__1 = i__ - 1; i__2 = i__ - 1; cx[i__1].r = cx1[i__2].r, cx[i__1].i = cx1[i__2].i; i__1 = i__ - 1; i__2 = i__ - 1; cy[i__1].r = cy1[i__2].r, cy[i__1].i = cy1[i__2].i; /* L20: */ } if (combla_1.icase == 1) { /* .. ZDOTC .. */ z__1 = zdotc_(&combla_1.n, cx, &combla_1.incx, cy, & combla_1.incy); cdot[0].r = z__1.r, cdot[0].i = z__1.i; ctest_(&c__1, cdot, &ct6[kn + (ki << 2) - 5], &csize1[kn - 1], sfac); } else if (combla_1.icase == 2) { /* .. ZDOTU .. */ z__1 = zdotu_(&combla_1.n, cx, &combla_1.incx, cy, & combla_1.incy); cdot[0].r = z__1.r, cdot[0].i = z__1.i; ctest_(&c__1, cdot, &ct7[kn + (ki << 2) - 5], &csize1[kn - 1], sfac); } else if (combla_1.icase == 3) { /* .. ZAXPY .. */ zaxpy_(&combla_1.n, &ca, cx, &combla_1.incx, cy, & combla_1.incy); ctest_(&leny, cy, &ct8[(kn + (ki << 2)) * 7 - 35], &csize2[ ksize * 7 - 7], sfac); } else if (combla_1.icase == 4) { /* .. ZCOPY .. */ zcopy_(&combla_1.n, cx, &combla_1.incx, cy, &combla_1.incy); ctest_(&leny, cy, &ct10y[(kn + (ki << 2)) * 7 - 35], csize3, & c_b43); } else if (combla_1.icase == 5) { /* .. ZSWAP .. */ zswap_(&combla_1.n, cx, &combla_1.incx, cy, &combla_1.incy); ctest_(&lenx, cx, &ct10x[(kn + (ki << 2)) * 7 - 35], csize3, & c_b43); ctest_(&leny, cy, &ct10y[(kn + (ki << 2)) * 7 - 35], csize3, & c_b43); } else { s_wsle(&io___48); do_lio(&c__9, &c__1, " Shouldn't be here in CHECK2", (ftnlen) 28); e_wsle(); s_stop("", (ftnlen)0); } /* L40: */ } /* L60: */ } return 0; } /* check2_ */ /* Subroutine */ int stest_(integer *len, doublereal *scomp, doublereal * strue, doublereal *ssize, doublereal *sfac) { /* Format strings */ static char fmt_99999[] = "(\002 F" "AIL\002)"; static char fmt_99998[] = "(/\002 CASE N INCX INCY MODE I " " \002,\002 COMP(I) TRU" "E(I) DIFFERENCE\002,\002 SIZE(I)\002,/1x)"; static char fmt_99997[] = "(1x,i4,i3,3i5,i3,2d36.8,2d12.4)"; /* System generated locals */ integer i__1; doublereal d__1, d__2; /* Builtin functions */ integer s_wsfe(cilist *), e_wsfe(void), do_fio(integer *, char *, ftnlen); /* Local variables */ integer i__; doublereal sd; extern double d_epsilon_(doublereal *); /* Fortran I/O blocks */ static cilist io___51 = { 0, 6, 0, fmt_99999, 0 }; static cilist io___52 = { 0, 6, 0, fmt_99998, 0 }; static cilist io___53 = { 0, 6, 0, fmt_99997, 0 }; /* ********************************* STEST ************************** */ /* THIS SUBR COMPARES ARRAYS SCOMP() AND STRUE() OF LENGTH LEN TO */ /* SEE IF THE TERM BY TERM DIFFERENCES, MULTIPLIED BY SFAC, ARE */ /* NEGLIGIBLE. */ /* C. L. LAWSON, JPL, 1974 DEC 10 */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. Intrinsic Functions .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --ssize; --strue; --scomp; /* Function Body */ i__1 = *len; for (i__ = 1; i__ <= i__1; ++i__) { sd = scomp[i__] - strue[i__]; if ((d__2 = *sfac * sd, abs(d__2)) <= (d__1 = ssize[i__], abs(d__1)) * d_epsilon_(&c_b52)) { goto L40; } /* HERE SCOMP(I) IS NOT CLOSE TO STRUE(I). */ if (! combla_1.pass) { goto L20; } /* PRINT FAIL MESSAGE AND HEADER. */ combla_1.pass = FALSE_; s_wsfe(&io___51); e_wsfe(); s_wsfe(&io___52); e_wsfe(); L20: s_wsfe(&io___53); do_fio(&c__1, (char *)&combla_1.icase, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.incy, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.mode, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&scomp[i__], (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&strue[i__], (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&sd, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&ssize[i__], (ftnlen)sizeof(doublereal)); e_wsfe(); L40: ; } return 0; } /* stest_ */ /* Subroutine */ int stest1_(doublereal *scomp1, doublereal *strue1, doublereal *ssize, doublereal *sfac) { doublereal scomp[1], strue[1]; extern /* Subroutine */ int stest_(integer *, doublereal *, doublereal *, doublereal *, doublereal *); /* ************************* STEST1 ***************************** */ /* THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN */ /* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE */ /* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT. */ /* C.L. LAWSON, JPL, 1978 DEC 6 */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Arrays .. */ /* .. External Subroutines .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --ssize; /* Function Body */ scomp[0] = *scomp1; strue[0] = *strue1; stest_(&c__1, scomp, strue, &ssize[1], sfac); return 0; } /* stest1_ */ doublereal sdiff_(doublereal *sa, doublereal *sb) { /* System generated locals */ doublereal ret_val; /* ********************************* SDIFF ************************** */ /* COMPUTES DIFFERENCE OF TWO NUMBERS. C. L. LAWSON, JPL 1974 FEB 15 */ /* .. Scalar Arguments .. */ /* .. Executable Statements .. */ ret_val = *sa - *sb; return ret_val; } /* sdiff_ */ /* Subroutine */ int ctest_(integer *len, doublecomplex *ccomp, doublecomplex *ctrue, doublecomplex *csize, doublereal *sfac) { /* System generated locals */ integer i__1, i__2; /* Builtin functions */ double d_imag(const doublecomplex *); /* Local variables */ integer i__; doublereal scomp[20], ssize[20], strue[20]; extern /* Subroutine */ int stest_(integer *, doublereal *, doublereal *, doublereal *, doublereal *); /* **************************** CTEST ***************************** */ /* C.L. LAWSON, JPL, 1978 DEC 6 */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --csize; --ctrue; --ccomp; /* Function Body */ i__1 = *len; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__; scomp[(i__ << 1) - 2] = ccomp[i__2].r; scomp[(i__ << 1) - 1] = d_imag(&ccomp[i__]); i__2 = i__; strue[(i__ << 1) - 2] = ctrue[i__2].r; strue[(i__ << 1) - 1] = d_imag(&ctrue[i__]); i__2 = i__; ssize[(i__ << 1) - 2] = csize[i__2].r; ssize[(i__ << 1) - 1] = d_imag(&csize[i__]); /* L20: */ } i__1 = *len << 1; stest_(&i__1, scomp, strue, ssize, sfac); return 0; } /* ctest_ */ /* Subroutine */ int itest1_(integer *icomp, integer *itrue) { /* Format strings */ static char fmt_99999[] = "(\002 F" "AIL\002)"; static char fmt_99998[] = "(/\002 CASE N INCX INCY MODE " " \002,\002 COMP TRU" "E DIFFERENCE\002,/1x)"; static char fmt_99997[] = "(1x,i4,i3,3i5,2i36,i12)"; /* Builtin functions */ integer s_wsfe(cilist *), e_wsfe(void), do_fio(integer *, char *, ftnlen); /* Local variables */ integer id; /* Fortran I/O blocks */ static cilist io___60 = { 0, 6, 0, fmt_99999, 0 }; static cilist io___61 = { 0, 6, 0, fmt_99998, 0 }; static cilist io___63 = { 0, 6, 0, fmt_99997, 0 }; /* ********************************* ITEST1 ************************* */ /* THIS SUBROUTINE COMPARES THE VARIABLES ICOMP AND ITRUE FOR */ /* EQUALITY. */ /* C. L. LAWSON, JPL, 1974 DEC 10 */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ if (*icomp == *itrue) { goto L40; } /* HERE ICOMP IS NOT EQUAL TO ITRUE. */ if (! combla_1.pass) { goto L20; } /* PRINT FAIL MESSAGE AND HEADER. */ combla_1.pass = FALSE_; s_wsfe(&io___60); e_wsfe(); s_wsfe(&io___61); e_wsfe(); L20: id = *icomp - *itrue; s_wsfe(&io___63); do_fio(&c__1, (char *)&combla_1.icase, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.incy, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&combla_1.mode, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&(*icomp), (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&(*itrue), (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&id, (ftnlen)sizeof(integer)); e_wsfe(); L40: return 0; } /* itest1_ */ /* Main program alias */ int zblat1_ () { main (); return 0; } blis-0.6.1/blastest/src/zblat2.c000066400000000000000000005055711360743507500164500ustar00rootroot00000000000000/* zblat2.f -- translated by f2c (version 20100827). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" /* Common Block Declarations */ union { struct { integer infot, noutc; logical ok, lerr; } _1; struct { integer infot, nout; logical ok, lerr; } _2; } infoc_; #define infoc_1 (infoc_._1) #define infoc_2 (infoc_._2) struct { char srnamt[6]; } srnamc_; #define srnamc_1 srnamc_ /* Table of constant values */ static doublecomplex c_b1 = {0.,0.}; static doublecomplex c_b2 = {1.,0.}; static integer c__9 = 9; static integer c__1 = 1; static integer c__3 = 3; static integer c__8 = 8; static integer c__5 = 5; static integer c__65 = 65; static integer c__7 = 7; static integer c__2 = 2; static doublereal c_b122 = 0.; static logical c_true = TRUE_; static integer c_n1 = -1; static integer c__0 = 0; static logical c_false = FALSE_; /* > \brief \b ZBLAT2 */ /* =========== DOCUMENTATION =========== */ /* Online html documentation available at */ /* http://www.netlib.org/lapack/explore-html/ */ /* Definition: */ /* =========== */ /* PROGRAM ZBLAT2 */ /* > \par Purpose: */ /* ============= */ /* > */ /* > \verbatim */ /* > */ /* > Test program for the COMPLEX*16 Level 2 Blas. */ /* > */ /* > The program must be driven by a short data file. The first 18 records */ /* > of the file are read using list-directed input, the last 17 records */ /* > are read using the format ( A6, L2 ). An annotated example of a data */ /* > file can be obtained by deleting the first 3 characters from the */ /* > following 35 lines: */ /* > 'zblat2.out' NAME OF SUMMARY OUTPUT FILE */ /* > 6 UNIT NUMBER OF SUMMARY FILE */ /* > 'CBLA2T.SNAP' NAME OF SNAPSHOT OUTPUT FILE */ /* > -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) */ /* > F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. */ /* > F LOGICAL FLAG, T TO STOP ON FAILURES. */ /* > T LOGICAL FLAG, T TO TEST ERROR EXITS. */ /* > 16.0 THRESHOLD VALUE OF TEST RATIO */ /* > 6 NUMBER OF VALUES OF N */ /* > 0 1 2 3 5 9 VALUES OF N */ /* > 4 NUMBER OF VALUES OF K */ /* > 0 1 2 4 VALUES OF K */ /* > 4 NUMBER OF VALUES OF INCX AND INCY */ /* > 1 2 -1 -2 VALUES OF INCX AND INCY */ /* > 3 NUMBER OF VALUES OF ALPHA */ /* > (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA */ /* > 3 NUMBER OF VALUES OF BETA */ /* > (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA */ /* > ZGEMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > ZGBMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > ZHEMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > ZHBMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > ZHPMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > ZTRMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > ZTBMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > ZTPMV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > ZTRSV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > ZTBSV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > ZTPSV T PUT F FOR NO TEST. SAME COLUMNS. */ /* > ZGERC T PUT F FOR NO TEST. SAME COLUMNS. */ /* > ZGERU T PUT F FOR NO TEST. SAME COLUMNS. */ /* > ZHER T PUT F FOR NO TEST. SAME COLUMNS. */ /* > ZHPR T PUT F FOR NO TEST. SAME COLUMNS. */ /* > ZHER2 T PUT F FOR NO TEST. SAME COLUMNS. */ /* > ZHPR2 T PUT F FOR NO TEST. SAME COLUMNS. */ /* > */ /* > Further Details */ /* > =============== */ /* > */ /* > See: */ /* > */ /* > Dongarra J. J., Du Croz J. J., Hammarling S. and Hanson R. J.. */ /* > An extended set of Fortran Basic Linear Algebra Subprograms. */ /* > */ /* > Technical Memoranda Nos. 41 (revision 3) and 81, Mathematics */ /* > and Computer Science Division, Argonne National Laboratory, */ /* > 9700 South Cass Avenue, Argonne, Illinois 60439, US. */ /* > */ /* > Or */ /* > */ /* > NAG Technical Reports TR3/87 and TR4/87, Numerical Algorithms */ /* > Group Ltd., NAG Central Office, 256 Banbury Road, Oxford */ /* > OX2 7DE, UK, and Numerical Algorithms Group Inc., 1101 31st */ /* > Street, Suite 100, Downers Grove, Illinois 60515-1263, USA. */ /* > */ /* > */ /* > -- Written on 10-August-1987. */ /* > Richard Hanson, Sandia National Labs. */ /* > Jeremy Du Croz, NAG Central Office. */ /* > */ /* > 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers */ /* > can be run multiple times without deleting generated */ /* > output files (susan) */ /* > \endverbatim */ /* Authors: */ /* ======== */ /* > \author Univ. of Tennessee */ /* > \author Univ. of California Berkeley */ /* > \author Univ. of Colorado Denver */ /* > \author NAG Ltd. */ /* > \date April 2012 */ /* > \ingroup complex16_blas_testing */ /* ===================================================================== */ /* Main program */ int main(void) { /* Initialized data */ static char snames[6*17] = "ZGEMV " "ZGBMV " "ZHEMV " "ZHBMV " "ZHPMV " "ZTRMV " "ZTBMV " "ZTPMV " "ZTRSV " "ZTBSV " "ZTPSV " "ZGERC " "ZGERU " "ZHER " "ZHPR " "ZHER2 " "ZHPR2 "; /* Format strings */ static char fmt_9997[] = "(\002 NUMBER OF VALUES OF \002,a,\002 IS LESS " "THAN 1 OR GREATER \002,\002THAN \002,i2)"; static char fmt_9996[] = "(\002 VALUE OF N IS LESS THAN 0 OR GREATER THA" "N \002,i2)"; static char fmt_9995[] = "(\002 VALUE OF K IS LESS THAN 0\002)"; static char fmt_9994[] = "(\002 ABSOLUTE VALUE OF INCX OR INCY IS 0 OR G" "REATER THAN \002,i2)"; static char fmt_9993[] = "(\002 TESTS OF THE COMPLEX*16 LEVEL 2 BL" "AS\002,//\002 THE F\002,\002OLLOWING PARAMETER VALUES WILL BE US" "ED:\002)"; static char fmt_9992[] = "(\002 FOR N \002,9i6)"; static char fmt_9991[] = "(\002 FOR K \002,7i6)"; static char fmt_9990[] = "(\002 FOR INCX AND INCY \002,7i6)"; static char fmt_9989[] = "(\002 FOR ALPHA \002,7(\002(\002,f4" ".1,\002,\002,f4.1,\002) \002,:))"; static char fmt_9988[] = "(\002 FOR BETA \002,7(\002(\002,f4" ".1,\002,\002,f4.1,\002) \002,:))"; static char fmt_9980[] = "(\002 ERROR-EXITS WILL NOT BE TESTED\002)"; static char fmt_9999[] = "(\002 ROUTINES PASS COMPUTATIONAL TESTS IF TES" "T RATIO IS LES\002,\002S THAN\002,f8.2)"; static char fmt_9984[] = "(a6,l2)"; static char fmt_9986[] = "(\002 SUBPROGRAM NAME \002,a6,\002 NOT RECOGNI" "ZED\002,/\002 ******* T\002,\002ESTS ABANDONED *******\002)"; static char fmt_9998[] = "(\002 RELATIVE MACHINE PRECISION IS TAKEN TO" " BE\002,1p,d9.1)"; static char fmt_9985[] = "(\002 ERROR IN ZMVCH - IN-LINE DOT PRODUCTS A" "RE BEING EVALU\002,\002ATED WRONGLY.\002,/\002 ZMVCH WAS CALLED " "WITH TRANS = \002,a1,\002 AND RETURNED SAME = \002,l1,\002 AND E" "RR = \002,f12.3,\002.\002,/\002 THIS MAY BE DUE TO FAULTS IN THE" " ARITHMETIC OR THE COMPILER.\002,/\002 ******* TESTS ABANDONED *" "******\002)"; static char fmt_9983[] = "(1x,a6,\002 WAS NOT TESTED\002)"; static char fmt_9982[] = "(/\002 END OF TESTS\002)"; static char fmt_9981[] = "(/\002 ******* FATAL ERROR - TESTS ABANDONED *" "******\002)"; static char fmt_9987[] = "(\002 AMEND DATA FILE OR INCREASE ARRAY SIZES " "IN PROGRAM\002,/\002 ******* TESTS ABANDONED *******\002)"; /* System generated locals */ integer i__1, i__2, i__3, i__4, i__5; olist o__1; cllist cl__1; /* Builtin functions */ integer s_rsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_rsle(void), f_open(olist *), s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), s_wsle(cilist *), e_wsle(void), s_rsfe(cilist *), e_rsfe(void), s_cmp(const char *, const char *, ftnlen, ftnlen); /* Subroutine */ int s_stop(char *, ftnlen); integer f_clos(cllist *); /* Subroutine */ int s_copy(char *, const char *, ftnlen, ftnlen); /* Local variables */ doublecomplex a[4225] /* was [65][65] */; doublereal g[65]; integer i__, j, n; doublecomplex x[65], y[65], z__[130], aa[4225]; integer kb[7]; doublecomplex as[4225], xs[130], ys[130], yt[65], xx[130], yy[130], alf[7] ; integer inc[7], nkb; doublecomplex bet[7]; doublereal eps, err; extern logical lze_(doublecomplex *, doublecomplex *, integer *); integer nalf, idim[9]; logical same; integer ninc, nbet, ntra; logical rewi; integer nout; extern /* Subroutine */ int zchk1_(char *, doublereal *, doublereal *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, integer *, integer *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex * , doublecomplex *, doublecomplex *, doublecomplex *, doublereal *, ftnlen), zchk2_(char *, doublereal *, doublereal *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, integer *, integer *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex * , doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublereal *, ftnlen), zchk3_(char *, doublereal *, doublereal *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex * , doublecomplex *, doublecomplex *, doublecomplex *, doublereal *, doublecomplex *, ftnlen), zchk4_(char *, doublereal *, doublereal *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, doublecomplex *, integer *, integer *, integer *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublereal *, doublecomplex *, ftnlen), zchk5_( char *, doublereal *, doublereal *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, doublecomplex *, integer *, integer *, integer *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex * , doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublereal *, doublecomplex *, ftnlen), zchk6_(char *, doublereal *, doublereal *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, doublecomplex *, integer *, integer *, integer *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex * , doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublereal *, doublecomplex *, ftnlen); logical fatal, trace; integer nidim; extern /* Subroutine */ int zchke_(integer *, char *, integer *, ftnlen); char snaps[32], trans[1]; extern /* Subroutine */ int zmvch_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, doublereal *, doublecomplex *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen); integer isnum; logical ltest[17], sfatal; char snamet[6]; doublereal thresh; logical ltestt, tsterr; char summry[32]; extern double d_epsilon_(doublereal *); /* Fortran I/O blocks */ static cilist io___2 = { 0, 5, 0, 0, 0 }; static cilist io___4 = { 0, 5, 0, 0, 0 }; static cilist io___6 = { 0, 5, 0, 0, 0 }; static cilist io___8 = { 0, 5, 0, 0, 0 }; static cilist io___11 = { 0, 5, 0, 0, 0 }; static cilist io___13 = { 0, 5, 0, 0, 0 }; static cilist io___15 = { 0, 5, 0, 0, 0 }; static cilist io___17 = { 0, 5, 0, 0, 0 }; static cilist io___19 = { 0, 5, 0, 0, 0 }; static cilist io___21 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___22 = { 0, 5, 0, 0, 0 }; static cilist io___25 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___26 = { 0, 5, 0, 0, 0 }; static cilist io___28 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___29 = { 0, 5, 0, 0, 0 }; static cilist io___31 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___32 = { 0, 5, 0, 0, 0 }; static cilist io___34 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___35 = { 0, 5, 0, 0, 0 }; static cilist io___37 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___38 = { 0, 5, 0, 0, 0 }; static cilist io___40 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___41 = { 0, 5, 0, 0, 0 }; static cilist io___43 = { 0, 5, 0, 0, 0 }; static cilist io___45 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___46 = { 0, 5, 0, 0, 0 }; static cilist io___48 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___49 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___50 = { 0, 0, 0, fmt_9991, 0 }; static cilist io___51 = { 0, 0, 0, fmt_9990, 0 }; static cilist io___52 = { 0, 0, 0, fmt_9989, 0 }; static cilist io___53 = { 0, 0, 0, fmt_9988, 0 }; static cilist io___54 = { 0, 0, 0, 0, 0 }; static cilist io___55 = { 0, 0, 0, fmt_9980, 0 }; static cilist io___56 = { 0, 0, 0, 0, 0 }; static cilist io___57 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___58 = { 0, 0, 0, 0, 0 }; static cilist io___60 = { 0, 5, 1, fmt_9984, 0 }; static cilist io___63 = { 0, 0, 0, fmt_9986, 0 }; static cilist io___65 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___78 = { 0, 0, 0, fmt_9985, 0 }; static cilist io___79 = { 0, 0, 0, fmt_9985, 0 }; static cilist io___81 = { 0, 0, 0, 0, 0 }; static cilist io___82 = { 0, 0, 0, fmt_9983, 0 }; static cilist io___83 = { 0, 0, 0, 0, 0 }; static cilist io___90 = { 0, 0, 0, fmt_9982, 0 }; static cilist io___91 = { 0, 0, 0, fmt_9981, 0 }; static cilist io___92 = { 0, 0, 0, fmt_9987, 0 }; /* -- Reference BLAS test routine (version 3.4.1) -- */ /* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* April 2012 */ /* ===================================================================== */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* .. Executable Statements .. */ /* Read name and unit number for summary output file and open file. */ s_rsle(&io___2); do_lio(&c__9, &c__1, summry, (ftnlen)32); e_rsle(); s_rsle(&io___4); do_lio(&c__3, &c__1, (char *)&nout, (ftnlen)sizeof(integer)); e_rsle(); o__1.oerr = 0; o__1.ounit = nout; o__1.ofnmlen = 32; o__1.ofnm = summry; o__1.orl = 0; o__1.osta = "UNKNOWN"; o__1.oacc = 0; o__1.ofm = 0; o__1.oblnk = 0; f_open(&o__1); infoc_1.noutc = nout; /* Read name and unit number for snapshot output file and open file. */ s_rsle(&io___6); do_lio(&c__9, &c__1, snaps, (ftnlen)32); e_rsle(); s_rsle(&io___8); do_lio(&c__3, &c__1, (char *)&ntra, (ftnlen)sizeof(integer)); e_rsle(); trace = ntra >= 0; if (trace) { o__1.oerr = 0; o__1.ounit = ntra; o__1.ofnmlen = 32; o__1.ofnm = snaps; o__1.orl = 0; o__1.osta = "UNKNOWN"; o__1.oacc = 0; o__1.ofm = 0; o__1.oblnk = 0; f_open(&o__1); } /* Read the flag that directs rewinding of the snapshot file. */ s_rsle(&io___11); do_lio(&c__8, &c__1, (char *)&rewi, (ftnlen)sizeof(logical)); e_rsle(); rewi = rewi && trace; /* Read the flag that directs stopping on any failure. */ s_rsle(&io___13); do_lio(&c__8, &c__1, (char *)&sfatal, (ftnlen)sizeof(logical)); e_rsle(); /* Read the flag that indicates whether error exits are to be tested. */ s_rsle(&io___15); do_lio(&c__8, &c__1, (char *)&tsterr, (ftnlen)sizeof(logical)); e_rsle(); /* Read the threshold value of the test ratio */ s_rsle(&io___17); do_lio(&c__5, &c__1, (char *)&thresh, (ftnlen)sizeof(doublereal)); e_rsle(); /* Read and check the parameter values for the tests. */ /* Values of N */ s_rsle(&io___19); do_lio(&c__3, &c__1, (char *)&nidim, (ftnlen)sizeof(integer)); e_rsle(); if (nidim < 1 || nidim > 9) { io___21.ciunit = nout; s_wsfe(&io___21); do_fio(&c__1, "N", (ftnlen)1); do_fio(&c__1, (char *)&c__9, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } s_rsle(&io___22); i__1 = nidim; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&idim[i__ - 1], (ftnlen)sizeof(integer)); } e_rsle(); i__1 = nidim; for (i__ = 1; i__ <= i__1; ++i__) { if (idim[i__ - 1] < 0 || idim[i__ - 1] > 65) { io___25.ciunit = nout; s_wsfe(&io___25); do_fio(&c__1, (char *)&c__65, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } /* L10: */ } /* Values of K */ s_rsle(&io___26); do_lio(&c__3, &c__1, (char *)&nkb, (ftnlen)sizeof(integer)); e_rsle(); if (nkb < 1 || nkb > 7) { io___28.ciunit = nout; s_wsfe(&io___28); do_fio(&c__1, "K", (ftnlen)1); do_fio(&c__1, (char *)&c__7, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } s_rsle(&io___29); i__1 = nkb; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&kb[i__ - 1], (ftnlen)sizeof(integer)); } e_rsle(); i__1 = nkb; for (i__ = 1; i__ <= i__1; ++i__) { if (kb[i__ - 1] < 0) { io___31.ciunit = nout; s_wsfe(&io___31); e_wsfe(); goto L230; } /* L20: */ } /* Values of INCX and INCY */ s_rsle(&io___32); do_lio(&c__3, &c__1, (char *)&ninc, (ftnlen)sizeof(integer)); e_rsle(); if (ninc < 1 || ninc > 7) { io___34.ciunit = nout; s_wsfe(&io___34); do_fio(&c__1, "INCX AND INCY", (ftnlen)13); do_fio(&c__1, (char *)&c__7, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } s_rsle(&io___35); i__1 = ninc; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&inc[i__ - 1], (ftnlen)sizeof(integer)); } e_rsle(); i__1 = ninc; for (i__ = 1; i__ <= i__1; ++i__) { if (inc[i__ - 1] == 0 || (i__2 = inc[i__ - 1], abs(i__2)) > 2) { io___37.ciunit = nout; s_wsfe(&io___37); do_fio(&c__1, (char *)&c__2, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } /* L30: */ } /* Values of ALPHA */ s_rsle(&io___38); do_lio(&c__3, &c__1, (char *)&nalf, (ftnlen)sizeof(integer)); e_rsle(); if (nalf < 1 || nalf > 7) { io___40.ciunit = nout; s_wsfe(&io___40); do_fio(&c__1, "ALPHA", (ftnlen)5); do_fio(&c__1, (char *)&c__7, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } s_rsle(&io___41); i__1 = nalf; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__7, &c__1, (char *)&alf[i__ - 1], (ftnlen)sizeof( doublecomplex)); } e_rsle(); /* Values of BETA */ s_rsle(&io___43); do_lio(&c__3, &c__1, (char *)&nbet, (ftnlen)sizeof(integer)); e_rsle(); if (nbet < 1 || nbet > 7) { io___45.ciunit = nout; s_wsfe(&io___45); do_fio(&c__1, "BETA", (ftnlen)4); do_fio(&c__1, (char *)&c__7, (ftnlen)sizeof(integer)); e_wsfe(); goto L230; } s_rsle(&io___46); i__1 = nbet; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__7, &c__1, (char *)&bet[i__ - 1], (ftnlen)sizeof( doublecomplex)); } e_rsle(); /* Report values of parameters. */ io___48.ciunit = nout; s_wsfe(&io___48); e_wsfe(); io___49.ciunit = nout; s_wsfe(&io___49); i__1 = nidim; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&idim[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); io___50.ciunit = nout; s_wsfe(&io___50); i__1 = nkb; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&kb[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); io___51.ciunit = nout; s_wsfe(&io___51); i__1 = ninc; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&inc[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); io___52.ciunit = nout; s_wsfe(&io___52); i__1 = nalf; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__2, (char *)&alf[i__ - 1], (ftnlen)sizeof(doublereal)); } e_wsfe(); io___53.ciunit = nout; s_wsfe(&io___53); i__1 = nbet; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__2, (char *)&bet[i__ - 1], (ftnlen)sizeof(doublereal)); } e_wsfe(); if (! tsterr) { io___54.ciunit = nout; s_wsle(&io___54); e_wsle(); io___55.ciunit = nout; s_wsfe(&io___55); e_wsfe(); } io___56.ciunit = nout; s_wsle(&io___56); e_wsle(); io___57.ciunit = nout; s_wsfe(&io___57); do_fio(&c__1, (char *)&thresh, (ftnlen)sizeof(doublereal)); e_wsfe(); io___58.ciunit = nout; s_wsle(&io___58); e_wsle(); /* Read names of subroutines and flags which indicate */ /* whether they are to be tested. */ for (i__ = 1; i__ <= 17; ++i__) { ltest[i__ - 1] = FALSE_; /* L40: */ } L50: i__1 = s_rsfe(&io___60); if (i__1 != 0) { goto L80; } i__1 = do_fio(&c__1, snamet, (ftnlen)6); if (i__1 != 0) { goto L80; } i__1 = do_fio(&c__1, (char *)<estt, (ftnlen)sizeof(logical)); if (i__1 != 0) { goto L80; } i__1 = e_rsfe(); if (i__1 != 0) { goto L80; } for (i__ = 1; i__ <= 17; ++i__) { if (s_cmp(snamet, snames + (i__ - 1) * 6, (ftnlen)6, (ftnlen)6) == 0) { goto L70; } /* L60: */ } io___63.ciunit = nout; s_wsfe(&io___63); do_fio(&c__1, snamet, (ftnlen)6); e_wsfe(); s_stop("", (ftnlen)0); L70: ltest[i__ - 1] = ltestt; goto L50; L80: cl__1.cerr = 0; cl__1.cunit = 5; cl__1.csta = 0; f_clos(&cl__1); /* Compute EPS (the machine precision). */ eps = d_epsilon_(&c_b122); io___65.ciunit = nout; s_wsfe(&io___65); do_fio(&c__1, (char *)&eps, (ftnlen)sizeof(doublereal)); e_wsfe(); /* Check the reliability of ZMVCH using exact data. */ n = 32; i__1 = n; for (j = 1; j <= i__1; ++j) { i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__ + j * 65 - 66; /* Computing MAX */ i__5 = i__ - j + 1; i__4 = max(i__5,0); a[i__3].r = (doublereal) i__4, a[i__3].i = 0.; /* L110: */ } i__2 = j - 1; x[i__2].r = (doublereal) j, x[i__2].i = 0.; i__2 = j - 1; y[i__2].r = 0., y[i__2].i = 0.; /* L120: */ } i__1 = n; for (j = 1; j <= i__1; ++j) { i__2 = j - 1; i__3 = j * ((j + 1) * j) / 2 - (j + 1) * j * (j - 1) / 3; yy[i__2].r = (doublereal) i__3, yy[i__2].i = 0.; /* L130: */ } /* YY holds the exact result. On exit from ZMVCH YT holds */ /* the result computed by ZMVCH. */ *(unsigned char *)trans = 'N'; zmvch_(trans, &n, &n, &c_b2, a, &c__65, x, &c__1, &c_b1, y, &c__1, yt, g, yy, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1); same = lze_(yy, yt, &n); if (! same || err != 0.) { io___78.ciunit = nout; s_wsfe(&io___78); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&same, (ftnlen)sizeof(logical)); do_fio(&c__1, (char *)&err, (ftnlen)sizeof(doublereal)); e_wsfe(); s_stop("", (ftnlen)0); } *(unsigned char *)trans = 'T'; zmvch_(trans, &n, &n, &c_b2, a, &c__65, x, &c_n1, &c_b1, y, &c_n1, yt, g, yy, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1); same = lze_(yy, yt, &n); if (! same || err != 0.) { io___79.ciunit = nout; s_wsfe(&io___79); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&same, (ftnlen)sizeof(logical)); do_fio(&c__1, (char *)&err, (ftnlen)sizeof(doublereal)); e_wsfe(); s_stop("", (ftnlen)0); } /* Test each subroutine in turn. */ for (isnum = 1; isnum <= 17; ++isnum) { io___81.ciunit = nout; s_wsle(&io___81); e_wsle(); if (! ltest[isnum - 1]) { /* Subprogram is not to be tested. */ io___82.ciunit = nout; s_wsfe(&io___82); do_fio(&c__1, snames + (isnum - 1) * 6, (ftnlen)6); e_wsfe(); } else { s_copy(srnamc_1.srnamt, snames + (isnum - 1) * 6, (ftnlen)6, ( ftnlen)6); /* Test error exits. */ if (tsterr) { zchke_(&isnum, snames + (isnum - 1) * 6, &nout, (ftnlen)6); io___83.ciunit = nout; s_wsle(&io___83); e_wsle(); } /* Test computations. */ infoc_1.infot = 0; infoc_1.ok = TRUE_; fatal = FALSE_; switch (isnum) { case 1: goto L140; case 2: goto L140; case 3: goto L150; case 4: goto L150; case 5: goto L150; case 6: goto L160; case 7: goto L160; case 8: goto L160; case 9: goto L160; case 10: goto L160; case 11: goto L160; case 12: goto L170; case 13: goto L170; case 14: goto L180; case 15: goto L180; case 16: goto L190; case 17: goto L190; } /* Test ZGEMV, 01, and ZGBMV, 02. */ L140: zchk1_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nkb, kb, &nalf, alf, &nbet, bet, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, (ftnlen)6); goto L200; /* Test ZHEMV, 03, ZHBMV, 04, and ZHPMV, 05. */ L150: zchk2_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nkb, kb, &nalf, alf, &nbet, bet, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, (ftnlen)6); goto L200; /* Test ZTRMV, 06, ZTBMV, 07, ZTPMV, 08, */ /* ZTRSV, 09, ZTBSV, 10, and ZTPSV, 11. */ L160: zchk3_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nkb, kb, &ninc, inc, &c__65, &c__2, a, aa, as, y, yy, ys, yt, g, z__, (ftnlen) 6); goto L200; /* Test ZGERC, 12, ZGERU, 13. */ L170: zchk4_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, z__, (ftnlen)6); goto L200; /* Test ZHER, 14, and ZHPR, 15. */ L180: zchk5_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, z__, (ftnlen)6); goto L200; /* Test ZHER2, 16, and ZHPR2, 17. */ L190: zchk6_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &ninc, inc, &c__65, &c__2, a, aa, as, x, xx, xs, y, yy, ys, yt, g, z__, (ftnlen)6); L200: if (fatal && sfatal) { goto L220; } } /* L210: */ } io___90.ciunit = nout; s_wsfe(&io___90); e_wsfe(); goto L240; L220: io___91.ciunit = nout; s_wsfe(&io___91); e_wsfe(); goto L240; L230: io___92.ciunit = nout; s_wsfe(&io___92); e_wsfe(); L240: if (trace) { cl__1.cerr = 0; cl__1.cunit = ntra; cl__1.csta = 0; f_clos(&cl__1); } cl__1.cerr = 0; cl__1.cunit = nout; cl__1.csta = 0; f_clos(&cl__1); s_stop("", (ftnlen)0); /* End of ZBLAT2. */ return 0; } /* main */ /* Subroutine */ int zchk1_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * fatal, integer *nidim, integer *idim, integer *nkb, integer *kb, integer *nalf, doublecomplex *alf, integer *nbet, doublecomplex *bet, integer *ninc, integer *inc, integer *nmax, integer *incmax, doublecomplex *a, doublecomplex *aa, doublecomplex *as, doublecomplex *x, doublecomplex *xx, doublecomplex *xs, doublecomplex *y, doublecomplex *yy, doublecomplex *ys, doublecomplex *yt, doublereal * g, ftnlen sname_len) { /* Initialized data */ static char ich[3] = "NTC"; /* Format strings */ static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "2(i3,\002,\002),\002(\002,f4.1,\002,\002,f4.1,\002), A,\002,i3" ",\002, X,\002,i2,\002,(\002,f4.1,\002,\002,f4.1,\002), Y,\002,i2," "\002) .\002)"; static char fmt_9995[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "4(i3,\002,\002),\002(\002,f4.1,\002,\002,f4.1,\002), A,\002,i3" ",\002, X,\002,i2,\002,(\002,f4.1,\002,\002,f4.1,\002), Y,\002,i2," "\002) .\002)"; static char fmt_9993[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8, i__9; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, m, n, ia, ib, ic, nc, nd, im, in, kl, ml, nk, nl, ku, ix, iy, ms, lx, ly, ns, laa, lda; doublecomplex als, bls; doublereal err; integer iku, kls; extern logical lze_(doublecomplex *, doublecomplex *, integer *); integer kus; doublecomplex beta; integer ldas; logical same; integer incx, incy; logical full, tran, null; doublecomplex alpha; logical isame[13]; extern /* Subroutine */ int zmake_(char *, char *, char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, integer *, integer *, logical *, doublecomplex *, ftnlen, ftnlen, ftnlen); integer nargs; logical reset; integer incxs, incys; extern /* Subroutine */ int zgbmv_(char *, integer *, integer *, integer * , integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen); char trans[1]; extern /* Subroutine */ int zgemv_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen), zmvch_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, doublereal *, doublecomplex *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen); logical banded; doublereal errmax; doublecomplex transl; extern logical lzeres_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen); char transs[1]; /* Fortran I/O blocks */ static cilist io___139 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___140 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___141 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___144 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___146 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___147 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___148 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___149 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___150 = { 0, 0, 0, fmt_9995, 0 }; /* Tests ZGEMV and ZGBMV. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --kb; --alf; --bet; --inc; --g; --yt; --y; --x; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --ys; --yy; --xs; --xx; /* Function Body */ /* .. Executable Statements .. */ full = *(unsigned char *)&sname[2] == 'E'; banded = *(unsigned char *)&sname[2] == 'B'; /* Define the number of arguments. */ if (full) { nargs = 11; } else if (banded) { nargs = 13; } nc = 0; reset = TRUE_; errmax = 0.; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; nd = n / 2 + 1; for (im = 1; im <= 2; ++im) { if (im == 1) { /* Computing MAX */ i__2 = n - nd; m = max(i__2,0); } if (im == 2) { /* Computing MIN */ i__2 = n + nd; m = min(i__2,*nmax); } if (banded) { nk = *nkb; } else { nk = 1; } i__2 = nk; for (iku = 1; iku <= i__2; ++iku) { if (banded) { ku = kb[iku]; /* Computing MAX */ i__3 = ku - 1; kl = max(i__3,0); } else { ku = n - 1; kl = m - 1; } /* Set LDA to 1 more than minimum value if room. */ if (banded) { lda = kl + ku + 1; } else { lda = m; } if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L100; } laa = lda * n; null = n <= 0 || m <= 0; /* Generate the matrix A. */ transl.r = 0., transl.i = 0.; zmake_(sname + 1, " ", " ", &m, &n, &a[a_offset], nmax, &aa[1] , &lda, &kl, &ku, &reset, &transl, (ftnlen)2, (ftnlen) 1, (ftnlen)1); for (ic = 1; ic <= 3; ++ic) { *(unsigned char *)trans = *(unsigned char *)&ich[ic - 1]; tran = *(unsigned char *)trans == 'T' || *(unsigned char * )trans == 'C'; if (tran) { ml = n; nl = m; } else { ml = m; nl = n; } i__3 = *ninc; for (ix = 1; ix <= i__3; ++ix) { incx = inc[ix]; lx = abs(incx) * nl; /* Generate the vector X. */ transl.r = .5, transl.i = 0.; i__4 = abs(incx); i__5 = nl - 1; zmake_("GE", " ", " ", &c__1, &nl, &x[1], &c__1, &xx[ 1], &i__4, &c__0, &i__5, &reset, &transl, ( ftnlen)2, (ftnlen)1, (ftnlen)1); if (nl > 1) { i__4 = nl / 2; x[i__4].r = 0., x[i__4].i = 0.; i__4 = abs(incx) * (nl / 2 - 1) + 1; xx[i__4].r = 0., xx[i__4].i = 0.; } i__4 = *ninc; for (iy = 1; iy <= i__4; ++iy) { incy = inc[iy]; ly = abs(incy) * ml; i__5 = *nalf; for (ia = 1; ia <= i__5; ++ia) { i__6 = ia; alpha.r = alf[i__6].r, alpha.i = alf[i__6].i; i__6 = *nbet; for (ib = 1; ib <= i__6; ++ib) { i__7 = ib; beta.r = bet[i__7].r, beta.i = bet[i__7] .i; /* Generate the vector Y. */ transl.r = 0., transl.i = 0.; i__7 = abs(incy); i__8 = ml - 1; zmake_("GE", " ", " ", &c__1, &ml, &y[1], &c__1, &yy[1], &i__7, &c__0, & i__8, &reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the */ /* subroutine. */ *(unsigned char *)transs = *(unsigned char *)trans; ms = m; ns = n; kls = kl; kus = ku; als.r = alpha.r, als.i = alpha.i; i__7 = laa; for (i__ = 1; i__ <= i__7; ++i__) { i__8 = i__; i__9 = i__; as[i__8].r = aa[i__9].r, as[i__8].i = aa[i__9].i; /* L10: */ } ldas = lda; i__7 = lx; for (i__ = 1; i__ <= i__7; ++i__) { i__8 = i__; i__9 = i__; xs[i__8].r = xx[i__9].r, xs[i__8].i = xx[i__9].i; /* L20: */ } incxs = incx; bls.r = beta.r, bls.i = beta.i; i__7 = ly; for (i__ = 1; i__ <= i__7; ++i__) { i__8 = i__; i__9 = i__; ys[i__8].r = yy[i__9].r, ys[i__8].i = yy[i__9].i; /* L30: */ } incys = incy; /* Call the subroutine. */ if (full) { if (*trace) { io___139.ciunit = *ntra; s_wsfe(&io___139); do_fio(&c__1, (char *)&nc, ( ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__2, (char *)&alpha, ( ftnlen)sizeof(doublereal)) ; do_fio(&c__1, (char *)&lda, ( ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, ( ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&beta, ( ftnlen)sizeof(doublereal)) ; do_fio(&c__1, (char *)&incy, ( ftnlen)sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } zgemv_(trans, &m, &n, &alpha, &aa[1], &lda, &xx[1], &incx, &beta, & yy[1], &incy, (ftnlen)1); } else if (banded) { if (*trace) { io___140.ciunit = *ntra; s_wsfe(&io___140); do_fio(&c__1, (char *)&nc, ( ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&kl, ( ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ku, ( ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, ( ftnlen)sizeof(doublereal)) ; do_fio(&c__1, (char *)&lda, ( ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, ( ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&beta, ( ftnlen)sizeof(doublereal)) ; do_fio(&c__1, (char *)&incy, ( ftnlen)sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } zgbmv_(trans, &m, &n, &kl, &ku, & alpha, &aa[1], &lda, &xx[1], & incx, &beta, &yy[1], &incy, ( ftnlen)1); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___141.ciunit = *nout; s_wsfe(&io___141); e_wsfe(); *fatal = TRUE_; goto L130; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)trans == *( unsigned char *)transs; isame[1] = ms == m; isame[2] = ns == n; if (full) { isame[3] = als.r == alpha.r && als.i == alpha.i; isame[4] = lze_(&as[1], &aa[1], &laa); isame[5] = ldas == lda; isame[6] = lze_(&xs[1], &xx[1], &lx); isame[7] = incxs == incx; isame[8] = bls.r == beta.r && bls.i == beta.i; if (null) { isame[9] = lze_(&ys[1], &yy[1], & ly); } else { i__7 = abs(incy); isame[9] = lzeres_("GE", " ", & c__1, &ml, &ys[1], &yy[1], &i__7, (ftnlen)2, ( ftnlen)1); } isame[10] = incys == incy; } else if (banded) { isame[3] = kls == kl; isame[4] = kus == ku; isame[5] = als.r == alpha.r && als.i == alpha.i; isame[6] = lze_(&as[1], &aa[1], &laa); isame[7] = ldas == lda; isame[8] = lze_(&xs[1], &xx[1], &lx); isame[9] = incxs == incx; isame[10] = bls.r == beta.r && bls.i == beta.i; if (null) { isame[11] = lze_(&ys[1], &yy[1], & ly); } else { i__7 = abs(incy); isame[11] = lzeres_("GE", " ", & c__1, &ml, &ys[1], &yy[1], &i__7, (ftnlen)2, ( ftnlen)1); } isame[12] = incys == incy; } /* If data was incorrectly changed, report */ /* and return. */ same = TRUE_; i__7 = nargs; for (i__ = 1; i__ <= i__7; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___144.ciunit = *nout; s_wsfe(&io___144); do_fio(&c__1, (char *)&i__, ( ftnlen)sizeof(integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L130; } if (! null) { /* Check the result. */ zmvch_(trans, &m, &n, &alpha, &a[ a_offset], nmax, &x[1], &incx, &beta, &y[1], &incy, &yt[1], &g[1], &yy[1], eps, &err, fatal, nout, &c_true, (ftnlen) 1); errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L130; } } else { /* Avoid repeating tests with M.le.0 or */ /* N.le.0. */ goto L110; } /* L50: */ } /* L60: */ } /* L70: */ } /* L80: */ } /* L90: */ } L100: ; } L110: ; } /* L120: */ } /* Report result. */ if (errmax < *thresh) { io___146.ciunit = *nout; s_wsfe(&io___146); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___147.ciunit = *nout; s_wsfe(&io___147); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(doublereal)); e_wsfe(); } goto L140; L130: io___148.ciunit = *nout; s_wsfe(&io___148); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); if (full) { io___149.ciunit = *nout; s_wsfe(&io___149); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); e_wsfe(); } else if (banded) { io___150.ciunit = *nout; s_wsfe(&io___150); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&kl, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ku, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); e_wsfe(); } L140: return 0; /* End of ZCHK1. */ } /* zchk1_ */ /* Subroutine */ int zchk2_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * fatal, integer *nidim, integer *idim, integer *nkb, integer *kb, integer *nalf, doublecomplex *alf, integer *nbet, doublecomplex *bet, integer *ninc, integer *inc, integer *nmax, integer *incmax, doublecomplex *a, doublecomplex *aa, doublecomplex *as, doublecomplex *x, doublecomplex *xx, doublecomplex *xs, doublecomplex *y, doublecomplex *yy, doublecomplex *ys, doublecomplex *yt, doublereal * g, ftnlen sname_len) { /* Initialized data */ static char ich[2] = "UL"; /* Format strings */ static char fmt_9993[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "i3,\002,(\002,f4.1,\002,\002,f4.1,\002), A,\002,i3,\002, X,\002," "i2,\002,(\002,f4.1,\002,\002,f4.1,\002), \002,\002Y,\002,i2,\002" ") .\002)"; static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "2(i3,\002,\002),\002(\002,f4.1,\002,\002,f4.1,\002), A,\002,i3" ",\002, X,\002,i2,\002,(\002,f4.1,\002,\002,f4.1,\002), Y,\002,i2," "\002) .\002)"; static char fmt_9995[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "i3,\002,(\002,f4.1,\002,\002,f4.1,\002), AP, X,\002,i2,\002,(" "\002,f4.1,\002,\002,f4.1,\002), Y,\002,i2,\002) " ".\002)"; static char fmt_9992[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8, i__9; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, k, n, ia, ib, ic, nc, ik, in, nk, ks, ix, iy, ns, lx, ly, laa, lda; doublecomplex als, bls; doublereal err; extern logical lze_(doublecomplex *, doublecomplex *, integer *); doublecomplex beta; integer ldas; logical same; integer incx, incy; logical full, null; char uplo[1]; doublecomplex alpha; logical isame[13]; extern /* Subroutine */ int zmake_(char *, char *, char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, integer *, integer *, logical *, doublecomplex *, ftnlen, ftnlen, ftnlen); integer nargs; logical reset; integer incxs, incys; extern /* Subroutine */ int zhbmv_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen), zmvch_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, doublereal *, doublecomplex *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen), zhemv_(char *, integer * , doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen); char uplos[1]; extern /* Subroutine */ int zhpmv_(char *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen); logical banded, packed; doublereal errmax; doublecomplex transl; extern logical lzeres_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen); /* Fortran I/O blocks */ static cilist io___189 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___190 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___191 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___192 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___195 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___197 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___198 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___199 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___200 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___201 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___202 = { 0, 0, 0, fmt_9995, 0 }; /* Tests ZHEMV, ZHBMV and ZHPMV. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --kb; --alf; --bet; --inc; --g; --yt; --y; --x; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --ys; --yy; --xs; --xx; /* Function Body */ /* .. Executable Statements .. */ full = *(unsigned char *)&sname[2] == 'E'; banded = *(unsigned char *)&sname[2] == 'B'; packed = *(unsigned char *)&sname[2] == 'P'; /* Define the number of arguments. */ if (full) { nargs = 10; } else if (banded) { nargs = 11; } else if (packed) { nargs = 9; } nc = 0; reset = TRUE_; errmax = 0.; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; if (banded) { nk = *nkb; } else { nk = 1; } i__2 = nk; for (ik = 1; ik <= i__2; ++ik) { if (banded) { k = kb[ik]; } else { k = n - 1; } /* Set LDA to 1 more than minimum value if room. */ if (banded) { lda = k + 1; } else { lda = n; } if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L100; } if (packed) { laa = n * (n + 1) / 2; } else { laa = lda * n; } null = n <= 0; for (ic = 1; ic <= 2; ++ic) { *(unsigned char *)uplo = *(unsigned char *)&ich[ic - 1]; /* Generate the matrix A. */ transl.r = 0., transl.i = 0.; zmake_(sname + 1, uplo, " ", &n, &n, &a[a_offset], nmax, &aa[ 1], &lda, &k, &k, &reset, &transl, (ftnlen)2, (ftnlen) 1, (ftnlen)1); i__3 = *ninc; for (ix = 1; ix <= i__3; ++ix) { incx = inc[ix]; lx = abs(incx) * n; /* Generate the vector X. */ transl.r = .5, transl.i = 0.; i__4 = abs(incx); i__5 = n - 1; zmake_("GE", " ", " ", &c__1, &n, &x[1], &c__1, &xx[1], & i__4, &c__0, &i__5, &reset, &transl, (ftnlen)2, ( ftnlen)1, (ftnlen)1); if (n > 1) { i__4 = n / 2; x[i__4].r = 0., x[i__4].i = 0.; i__4 = abs(incx) * (n / 2 - 1) + 1; xx[i__4].r = 0., xx[i__4].i = 0.; } i__4 = *ninc; for (iy = 1; iy <= i__4; ++iy) { incy = inc[iy]; ly = abs(incy) * n; i__5 = *nalf; for (ia = 1; ia <= i__5; ++ia) { i__6 = ia; alpha.r = alf[i__6].r, alpha.i = alf[i__6].i; i__6 = *nbet; for (ib = 1; ib <= i__6; ++ib) { i__7 = ib; beta.r = bet[i__7].r, beta.i = bet[i__7].i; /* Generate the vector Y. */ transl.r = 0., transl.i = 0.; i__7 = abs(incy); i__8 = n - 1; zmake_("GE", " ", " ", &c__1, &n, &y[1], & c__1, &yy[1], &i__7, &c__0, &i__8, & reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the */ /* subroutine. */ *(unsigned char *)uplos = *(unsigned char *) uplo; ns = n; ks = k; als.r = alpha.r, als.i = alpha.i; i__7 = laa; for (i__ = 1; i__ <= i__7; ++i__) { i__8 = i__; i__9 = i__; as[i__8].r = aa[i__9].r, as[i__8].i = aa[ i__9].i; /* L10: */ } ldas = lda; i__7 = lx; for (i__ = 1; i__ <= i__7; ++i__) { i__8 = i__; i__9 = i__; xs[i__8].r = xx[i__9].r, xs[i__8].i = xx[ i__9].i; /* L20: */ } incxs = incx; bls.r = beta.r, bls.i = beta.i; i__7 = ly; for (i__ = 1; i__ <= i__7; ++i__) { i__8 = i__; i__9 = i__; ys[i__8].r = yy[i__9].r, ys[i__8].i = yy[ i__9].i; /* L30: */ } incys = incy; /* Call the subroutine. */ if (full) { if (*trace) { io___189.ciunit = *ntra; s_wsfe(&io___189); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&incy, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } zhemv_(uplo, &n, &alpha, &aa[1], &lda, & xx[1], &incx, &beta, &yy[1], & incy, (ftnlen)1); } else if (banded) { if (*trace) { io___190.ciunit = *ntra; s_wsfe(&io___190); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen) sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&incy, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } zhbmv_(uplo, &n, &k, &alpha, &aa[1], &lda, &xx[1], &incx, &beta, &yy[1], & incy, (ftnlen)1); } else if (packed) { if (*trace) { io___191.ciunit = *ntra; s_wsfe(&io___191); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&incy, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } zhpmv_(uplo, &n, &alpha, &aa[1], &xx[1], & incx, &beta, &yy[1], &incy, ( ftnlen)1); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___192.ciunit = *nout; s_wsfe(&io___192); e_wsfe(); *fatal = TRUE_; goto L120; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)uplo == *( unsigned char *)uplos; isame[1] = ns == n; if (full) { isame[2] = als.r == alpha.r && als.i == alpha.i; isame[3] = lze_(&as[1], &aa[1], &laa); isame[4] = ldas == lda; isame[5] = lze_(&xs[1], &xx[1], &lx); isame[6] = incxs == incx; isame[7] = bls.r == beta.r && bls.i == beta.i; if (null) { isame[8] = lze_(&ys[1], &yy[1], &ly); } else { i__7 = abs(incy); isame[8] = lzeres_("GE", " ", &c__1, & n, &ys[1], &yy[1], &i__7, ( ftnlen)2, (ftnlen)1); } isame[9] = incys == incy; } else if (banded) { isame[2] = ks == k; isame[3] = als.r == alpha.r && als.i == alpha.i; isame[4] = lze_(&as[1], &aa[1], &laa); isame[5] = ldas == lda; isame[6] = lze_(&xs[1], &xx[1], &lx); isame[7] = incxs == incx; isame[8] = bls.r == beta.r && bls.i == beta.i; if (null) { isame[9] = lze_(&ys[1], &yy[1], &ly); } else { i__7 = abs(incy); isame[9] = lzeres_("GE", " ", &c__1, & n, &ys[1], &yy[1], &i__7, ( ftnlen)2, (ftnlen)1); } isame[10] = incys == incy; } else if (packed) { isame[2] = als.r == alpha.r && als.i == alpha.i; isame[3] = lze_(&as[1], &aa[1], &laa); isame[4] = lze_(&xs[1], &xx[1], &lx); isame[5] = incxs == incx; isame[6] = bls.r == beta.r && bls.i == beta.i; if (null) { isame[7] = lze_(&ys[1], &yy[1], &ly); } else { i__7 = abs(incy); isame[7] = lzeres_("GE", " ", &c__1, & n, &ys[1], &yy[1], &i__7, ( ftnlen)2, (ftnlen)1); } isame[8] = incys == incy; } /* If data was incorrectly changed, report and */ /* return. */ same = TRUE_; i__7 = nargs; for (i__ = 1; i__ <= i__7; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___195.ciunit = *nout; s_wsfe(&io___195); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L120; } if (! null) { /* Check the result. */ zmvch_("N", &n, &n, &alpha, &a[a_offset], nmax, &x[1], &incx, &beta, &y[1], &incy, &yt[1], &g[1], &yy[1], eps, &err, fatal, nout, &c_true, ( ftnlen)1); errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L120; } } else { /* Avoid repeating tests with N.le.0 */ goto L110; } /* L50: */ } /* L60: */ } /* L70: */ } /* L80: */ } /* L90: */ } L100: ; } L110: ; } /* Report result. */ if (errmax < *thresh) { io___197.ciunit = *nout; s_wsfe(&io___197); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___198.ciunit = *nout; s_wsfe(&io___198); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(doublereal)); e_wsfe(); } goto L130; L120: io___199.ciunit = *nout; s_wsfe(&io___199); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); if (full) { io___200.ciunit = *nout; s_wsfe(&io___200); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); e_wsfe(); } else if (banded) { io___201.ciunit = *nout; s_wsfe(&io___201); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); e_wsfe(); } else if (packed) { io___202.ciunit = *nout; s_wsfe(&io___202); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); e_wsfe(); } L130: return 0; /* End of ZCHK2. */ } /* zchk2_ */ /* Subroutine */ int zchk3_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * fatal, integer *nidim, integer *idim, integer *nkb, integer *kb, integer *ninc, integer *inc, integer *nmax, integer *incmax, doublecomplex *a, doublecomplex *aa, doublecomplex *as, doublecomplex *x, doublecomplex *xx, doublecomplex *xs, doublecomplex *xt, doublereal *g, doublecomplex *z__, ftnlen sname_len) { /* Initialized data */ static char ichu[2] = "UL"; static char icht[3] = "NTC"; static char ichd[2] = "UN"; /* Format strings */ static char fmt_9993[] = "(1x,i6,\002: \002,a6,\002(\002,3(\002'\002,a1" ",\002',\002),i3,\002, A,\002,i3,\002, X,\002,i2,\002) " " .\002)"; static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002(\002,3(\002'\002,a1" ",\002',\002),2(i3,\002,\002),\002 A,\002,i3,\002, X,\002,i2,\002" ") .\002)"; static char fmt_9995[] = "(1x,i6,\002: \002,a6,\002(\002,3(\002'\002,a1" ",\002',\002),i3,\002, AP, \002,\002X,\002,i2,\002) " " .\002)"; static char fmt_9992[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; alist al__1; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen), s_wsfe(cilist *), do_fio( integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, k, n, nc, ik, in, nk, ks, ix, ns, lx, laa, icd, lda, ict, icu; doublereal err; extern logical lze_(doublecomplex *, doublecomplex *, integer *); char diag[1]; integer ldas; logical same; integer incx; logical full, null; char uplo[1], diags[1]; logical isame[13]; extern /* Subroutine */ int zmake_(char *, char *, char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, integer *, integer *, logical *, doublecomplex *, ftnlen, ftnlen, ftnlen); integer nargs; logical reset; integer incxs; char trans[1]; extern /* Subroutine */ int zmvch_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, doublereal *, doublecomplex *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen); char uplos[1]; extern /* Subroutine */ int ztbmv_(char *, char *, char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen), ztbsv_(char *, char *, char *, integer * , integer *, doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen), ztpmv_(char *, char *, char *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen), ztrmv_(char *, char *, char *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen), ztpsv_(char *, char *, char *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen), ztrsv_(char *, char *, char *, integer *, doublecomplex * , integer *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen); logical banded, packed; doublereal errmax; doublecomplex transl; extern logical lzeres_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen); char transs[1]; /* Fortran I/O blocks */ static cilist io___239 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___240 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___241 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___242 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___243 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___244 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___245 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___248 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___250 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___251 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___252 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___253 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___254 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___255 = { 0, 0, 0, fmt_9995, 0 }; /* Tests ZTRMV, ZTBMV, ZTPMV, ZTRSV, ZTBSV and ZTPSV. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --kb; --inc; --z__; --g; --xt; --x; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --xs; --xx; /* Function Body */ /* .. Executable Statements .. */ full = *(unsigned char *)&sname[2] == 'R'; banded = *(unsigned char *)&sname[2] == 'B'; packed = *(unsigned char *)&sname[2] == 'P'; /* Define the number of arguments. */ if (full) { nargs = 8; } else if (banded) { nargs = 9; } else if (packed) { nargs = 7; } nc = 0; reset = TRUE_; errmax = 0.; /* Set up zero vector for ZMVCH. */ i__1 = *nmax; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__; z__[i__2].r = 0., z__[i__2].i = 0.; /* L10: */ } i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; if (banded) { nk = *nkb; } else { nk = 1; } i__2 = nk; for (ik = 1; ik <= i__2; ++ik) { if (banded) { k = kb[ik]; } else { k = n - 1; } /* Set LDA to 1 more than minimum value if room. */ if (banded) { lda = k + 1; } else { lda = n; } if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L100; } if (packed) { laa = n * (n + 1) / 2; } else { laa = lda * n; } null = n <= 0; for (icu = 1; icu <= 2; ++icu) { *(unsigned char *)uplo = *(unsigned char *)&ichu[icu - 1]; for (ict = 1; ict <= 3; ++ict) { *(unsigned char *)trans = *(unsigned char *)&icht[ict - 1] ; for (icd = 1; icd <= 2; ++icd) { *(unsigned char *)diag = *(unsigned char *)&ichd[icd - 1]; /* Generate the matrix A. */ transl.r = 0., transl.i = 0.; zmake_(sname + 1, uplo, diag, &n, &n, &a[a_offset], nmax, &aa[1], &lda, &k, &k, &reset, &transl, ( ftnlen)2, (ftnlen)1, (ftnlen)1); i__3 = *ninc; for (ix = 1; ix <= i__3; ++ix) { incx = inc[ix]; lx = abs(incx) * n; /* Generate the vector X. */ transl.r = .5, transl.i = 0.; i__4 = abs(incx); i__5 = n - 1; zmake_("GE", " ", " ", &c__1, &n, &x[1], &c__1, & xx[1], &i__4, &c__0, &i__5, &reset, & transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); if (n > 1) { i__4 = n / 2; x[i__4].r = 0., x[i__4].i = 0.; i__4 = abs(incx) * (n / 2 - 1) + 1; xx[i__4].r = 0., xx[i__4].i = 0.; } ++nc; /* Save every datum before calling the subroutine. */ *(unsigned char *)uplos = *(unsigned char *)uplo; *(unsigned char *)transs = *(unsigned char *) trans; *(unsigned char *)diags = *(unsigned char *)diag; ns = n; ks = k; i__4 = laa; for (i__ = 1; i__ <= i__4; ++i__) { i__5 = i__; i__6 = i__; as[i__5].r = aa[i__6].r, as[i__5].i = aa[i__6] .i; /* L20: */ } ldas = lda; i__4 = lx; for (i__ = 1; i__ <= i__4; ++i__) { i__5 = i__; i__6 = i__; xs[i__5].r = xx[i__6].r, xs[i__5].i = xx[i__6] .i; /* L30: */ } incxs = incx; /* Call the subroutine. */ if (s_cmp(sname + 3, "MV", (ftnlen)2, (ftnlen)2) == 0) { if (full) { if (*trace) { io___239.ciunit = *ntra; s_wsfe(&io___239); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } ztrmv_(uplo, trans, diag, &n, &aa[1], & lda, &xx[1], &incx, (ftnlen)1, ( ftnlen)1, (ftnlen)1); } else if (banded) { if (*trace) { io___240.ciunit = *ntra; s_wsfe(&io___240); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } ztbmv_(uplo, trans, diag, &n, &k, &aa[1], &lda, &xx[1], &incx, (ftnlen)1, ( ftnlen)1, (ftnlen)1); } else if (packed) { if (*trace) { io___241.ciunit = *ntra; s_wsfe(&io___241); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } ztpmv_(uplo, trans, diag, &n, &aa[1], &xx[ 1], &incx, (ftnlen)1, (ftnlen)1, ( ftnlen)1); } } else if (s_cmp(sname + 3, "SV", (ftnlen)2, ( ftnlen)2) == 0) { if (full) { if (*trace) { io___242.ciunit = *ntra; s_wsfe(&io___242); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } ztrsv_(uplo, trans, diag, &n, &aa[1], & lda, &xx[1], &incx, (ftnlen)1, ( ftnlen)1, (ftnlen)1); } else if (banded) { if (*trace) { io___243.ciunit = *ntra; s_wsfe(&io___243); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } ztbsv_(uplo, trans, diag, &n, &k, &aa[1], &lda, &xx[1], &incx, (ftnlen)1, ( ftnlen)1, (ftnlen)1); } else if (packed) { if (*trace) { io___244.ciunit = *ntra; s_wsfe(&io___244); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } ztpsv_(uplo, trans, diag, &n, &aa[1], &xx[ 1], &incx, (ftnlen)1, (ftnlen)1, ( ftnlen)1); } } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___245.ciunit = *nout; s_wsfe(&io___245); e_wsfe(); *fatal = TRUE_; goto L120; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)uplo == *(unsigned char *)uplos; isame[1] = *(unsigned char *)trans == *(unsigned char *)transs; isame[2] = *(unsigned char *)diag == *(unsigned char *)diags; isame[3] = ns == n; if (full) { isame[4] = lze_(&as[1], &aa[1], &laa); isame[5] = ldas == lda; if (null) { isame[6] = lze_(&xs[1], &xx[1], &lx); } else { i__4 = abs(incx); isame[6] = lzeres_("GE", " ", &c__1, &n, & xs[1], &xx[1], &i__4, (ftnlen)2, ( ftnlen)1); } isame[7] = incxs == incx; } else if (banded) { isame[4] = ks == k; isame[5] = lze_(&as[1], &aa[1], &laa); isame[6] = ldas == lda; if (null) { isame[7] = lze_(&xs[1], &xx[1], &lx); } else { i__4 = abs(incx); isame[7] = lzeres_("GE", " ", &c__1, &n, & xs[1], &xx[1], &i__4, (ftnlen)2, ( ftnlen)1); } isame[8] = incxs == incx; } else if (packed) { isame[4] = lze_(&as[1], &aa[1], &laa); if (null) { isame[5] = lze_(&xs[1], &xx[1], &lx); } else { i__4 = abs(incx); isame[5] = lzeres_("GE", " ", &c__1, &n, & xs[1], &xx[1], &i__4, (ftnlen)2, ( ftnlen)1); } isame[6] = incxs == incx; } /* If data was incorrectly changed, report and */ /* return. */ same = TRUE_; i__4 = nargs; for (i__ = 1; i__ <= i__4; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___248.ciunit = *nout; s_wsfe(&io___248); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L120; } if (! null) { if (s_cmp(sname + 3, "MV", (ftnlen)2, (ftnlen) 2) == 0) { /* Check the result. */ zmvch_(trans, &n, &n, &c_b2, &a[a_offset], nmax, &x[1], &incx, &c_b1, &z__[ 1], &incx, &xt[1], &g[1], &xx[1], eps, &err, fatal, nout, &c_true, ( ftnlen)1); } else if (s_cmp(sname + 3, "SV", (ftnlen)2, ( ftnlen)2) == 0) { /* Compute approximation to original vector. */ i__4 = n; for (i__ = 1; i__ <= i__4; ++i__) { i__5 = i__; i__6 = (i__ - 1) * abs(incx) + 1; z__[i__5].r = xx[i__6].r, z__[i__5].i = xx[i__6].i; i__5 = (i__ - 1) * abs(incx) + 1; i__6 = i__; xx[i__5].r = x[i__6].r, xx[i__5].i = x[i__6].i; /* L50: */ } zmvch_(trans, &n, &n, &c_b2, &a[a_offset], nmax, &z__[1], &incx, &c_b1, &x[ 1], &incx, &xt[1], &g[1], &xx[1], eps, &err, fatal, nout, &c_false, (ftnlen)1); } errmax = max(errmax,err); /* If got really bad answer, report and return. */ if (*fatal) { goto L120; } } else { /* Avoid repeating tests with N.le.0. */ goto L110; } /* L60: */ } /* L70: */ } /* L80: */ } /* L90: */ } L100: ; } L110: ; } /* Report result. */ if (errmax < *thresh) { io___250.ciunit = *nout; s_wsfe(&io___250); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___251.ciunit = *nout; s_wsfe(&io___251); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(doublereal)); e_wsfe(); } goto L130; L120: io___252.ciunit = *nout; s_wsfe(&io___252); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); if (full) { io___253.ciunit = *nout; s_wsfe(&io___253); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); e_wsfe(); } else if (banded) { io___254.ciunit = *nout; s_wsfe(&io___254); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); e_wsfe(); } else if (packed) { io___255.ciunit = *nout; s_wsfe(&io___255); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); e_wsfe(); } L130: return 0; /* End of ZCHK3. */ } /* zchk3_ */ /* Subroutine */ int zchk4_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * fatal, integer *nidim, integer *idim, integer *nalf, doublecomplex * alf, integer *ninc, integer *inc, integer *nmax, integer *incmax, doublecomplex *a, doublecomplex *aa, doublecomplex *as, doublecomplex *x, doublecomplex *xx, doublecomplex *xs, doublecomplex *y, doublecomplex *yy, doublecomplex *ys, doublecomplex *yt, doublereal * g, doublecomplex *z__, ftnlen sname_len) { /* Format strings */ static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002(\002,2(i3,\002," "\002),\002(\002,f4.1,\002,\002,f4.1,\002), X,\002,i2,\002, Y," "\002,i2,\002, A,\002,i3,\002) \002,\002 " ".\002)"; static char fmt_9993[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9995[] = "(\002 THESE ARE THE RESULTS FOR COLUMN" " \002,i3)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; doublecomplex z__1; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); void d_cnjg(doublecomplex *, const doublecomplex *); /* Local variables */ integer i__, j, m, n; doublecomplex w[1]; integer ia, nc, nd, im, in, ms, ix, iy, ns, lx, ly, laa, lda; doublecomplex als; doublereal err; extern logical lze_(doublecomplex *, doublecomplex *, integer *); integer ldas; logical same, conj; integer incx, incy; logical null; doublecomplex alpha; logical isame[13]; extern /* Subroutine */ int zmake_(char *, char *, char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, integer *, integer *, logical *, doublecomplex *, ftnlen, ftnlen, ftnlen); integer nargs; extern /* Subroutine */ int zgerc_(integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *); logical reset; integer incxs, incys; extern /* Subroutine */ int zmvch_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, doublereal *, doublecomplex *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen), zgeru_( integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *); doublereal errmax; doublecomplex transl; extern logical lzeres_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen); /* Fortran I/O blocks */ static cilist io___285 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___286 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___289 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___293 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___294 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___295 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___296 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___297 = { 0, 0, 0, fmt_9994, 0 }; /* Tests ZGERC and ZGERU. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --idim; --alf; --inc; --z__; --g; --yt; --y; --x; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --ys; --yy; --xs; --xx; /* Function Body */ conj = *(unsigned char *)&sname[4] == 'C'; /* Define the number of arguments. */ nargs = 9; nc = 0; reset = TRUE_; errmax = 0.; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; nd = n / 2 + 1; for (im = 1; im <= 2; ++im) { if (im == 1) { /* Computing MAX */ i__2 = n - nd; m = max(i__2,0); } if (im == 2) { /* Computing MIN */ i__2 = n + nd; m = min(i__2,*nmax); } /* Set LDA to 1 more than minimum value if room. */ lda = m; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L110; } laa = lda * n; null = n <= 0 || m <= 0; i__2 = *ninc; for (ix = 1; ix <= i__2; ++ix) { incx = inc[ix]; lx = abs(incx) * m; /* Generate the vector X. */ transl.r = .5, transl.i = 0.; i__3 = abs(incx); i__4 = m - 1; zmake_("GE", " ", " ", &c__1, &m, &x[1], &c__1, &xx[1], &i__3, &c__0, &i__4, &reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); if (m > 1) { i__3 = m / 2; x[i__3].r = 0., x[i__3].i = 0.; i__3 = abs(incx) * (m / 2 - 1) + 1; xx[i__3].r = 0., xx[i__3].i = 0.; } i__3 = *ninc; for (iy = 1; iy <= i__3; ++iy) { incy = inc[iy]; ly = abs(incy) * n; /* Generate the vector Y. */ transl.r = 0., transl.i = 0.; i__4 = abs(incy); i__5 = n - 1; zmake_("GE", " ", " ", &c__1, &n, &y[1], &c__1, &yy[1], & i__4, &c__0, &i__5, &reset, &transl, (ftnlen)2, ( ftnlen)1, (ftnlen)1); if (n > 1) { i__4 = n / 2; y[i__4].r = 0., y[i__4].i = 0.; i__4 = abs(incy) * (n / 2 - 1) + 1; yy[i__4].r = 0., yy[i__4].i = 0.; } i__4 = *nalf; for (ia = 1; ia <= i__4; ++ia) { i__5 = ia; alpha.r = alf[i__5].r, alpha.i = alf[i__5].i; /* Generate the matrix A. */ transl.r = 0., transl.i = 0.; i__5 = m - 1; i__6 = n - 1; zmake_(sname + 1, " ", " ", &m, &n, &a[a_offset], nmax, &aa[1], &lda, &i__5, &i__6, &reset, & transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the subroutine. */ ms = m; ns = n; als.r = alpha.r, als.i = alpha.i; i__5 = laa; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; as[i__6].r = aa[i__7].r, as[i__6].i = aa[i__7].i; /* L10: */ } ldas = lda; i__5 = lx; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; xs[i__6].r = xx[i__7].r, xs[i__6].i = xx[i__7].i; /* L20: */ } incxs = incx; i__5 = ly; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; ys[i__6].r = yy[i__7].r, ys[i__6].i = yy[i__7].i; /* L30: */ } incys = incy; /* Call the subroutine. */ if (*trace) { io___285.ciunit = *ntra; s_wsfe(&io___285); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer) ); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)) ; do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)) ; do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof( doublereal)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof( integer)); e_wsfe(); } if (conj) { if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } zgerc_(&m, &n, &alpha, &xx[1], &incx, &yy[1], & incy, &aa[1], &lda); } else { if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } zgeru_(&m, &n, &alpha, &xx[1], &incx, &yy[1], & incy, &aa[1], &lda); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___286.ciunit = *nout; s_wsfe(&io___286); e_wsfe(); *fatal = TRUE_; goto L140; } /* See what data changed inside subroutine. */ isame[0] = ms == m; isame[1] = ns == n; isame[2] = als.r == alpha.r && als.i == alpha.i; isame[3] = lze_(&xs[1], &xx[1], &lx); isame[4] = incxs == incx; isame[5] = lze_(&ys[1], &yy[1], &ly); isame[6] = incys == incy; if (null) { isame[7] = lze_(&as[1], &aa[1], &laa); } else { isame[7] = lzeres_("GE", " ", &m, &n, &as[1], &aa[ 1], &lda, (ftnlen)2, (ftnlen)1); } isame[8] = ldas == lda; /* If data was incorrectly changed, report and return. */ same = TRUE_; i__5 = nargs; for (i__ = 1; i__ <= i__5; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___289.ciunit = *nout; s_wsfe(&io___289); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof( integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L140; } if (! null) { /* Check the result column by column. */ if (incx > 0) { i__5 = m; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; z__[i__6].r = x[i__7].r, z__[i__6].i = x[ i__7].i; /* L50: */ } } else { i__5 = m; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = m - i__ + 1; z__[i__6].r = x[i__7].r, z__[i__6].i = x[ i__7].i; /* L60: */ } } i__5 = n; for (j = 1; j <= i__5; ++j) { if (incy > 0) { i__6 = j; w[0].r = y[i__6].r, w[0].i = y[i__6].i; } else { i__6 = n - j + 1; w[0].r = y[i__6].r, w[0].i = y[i__6].i; } if (conj) { d_cnjg(&z__1, w); w[0].r = z__1.r, w[0].i = z__1.i; } zmvch_("N", &m, &c__1, &alpha, &z__[1], nmax, w, &c__1, &c_b2, &a[j * a_dim1 + 1], & c__1, &yt[1], &g[1], &aa[(j - 1) * lda + 1], eps, &err, fatal, nout, & c_true, (ftnlen)1); errmax = max(errmax,err); /* If got really bad answer, report and return. */ if (*fatal) { goto L130; } /* L70: */ } } else { /* Avoid repeating tests with M.le.0 or N.le.0. */ goto L110; } /* L80: */ } /* L90: */ } /* L100: */ } L110: ; } /* L120: */ } /* Report result. */ if (errmax < *thresh) { io___293.ciunit = *nout; s_wsfe(&io___293); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___294.ciunit = *nout; s_wsfe(&io___294); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(doublereal)); e_wsfe(); } goto L150; L130: io___295.ciunit = *nout; s_wsfe(&io___295); do_fio(&c__1, (char *)&j, (ftnlen)sizeof(integer)); e_wsfe(); L140: io___296.ciunit = *nout; s_wsfe(&io___296); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); io___297.ciunit = *nout; s_wsfe(&io___297); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); e_wsfe(); L150: return 0; /* End of ZCHK4. */ } /* zchk4_ */ /* Subroutine */ int zchk5_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * fatal, integer *nidim, integer *idim, integer *nalf, doublecomplex * alf, integer *ninc, integer *inc, integer *nmax, integer *incmax, doublecomplex *a, doublecomplex *aa, doublecomplex *as, doublecomplex *x, doublecomplex *xx, doublecomplex *xs, doublecomplex *y, doublecomplex *yy, doublecomplex *ys, doublecomplex *yt, doublereal * g, doublecomplex *z__, ftnlen sname_len) { /* Initialized data */ static char ich[2] = "UL"; /* Format strings */ static char fmt_9993[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "i3,\002,\002,f4.1,\002, X,\002,i2,\002, A,\002,i3,\002) " " .\002)"; static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "i3,\002,\002,f4.1,\002, X,\002,i2,\002, AP) " " .\002)"; static char fmt_9992[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9995[] = "(\002 THESE ARE THE RESULTS FOR COLUMN" " \002,i3)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; doublecomplex z__1; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); void d_cnjg(doublecomplex *, const doublecomplex *); /* Local variables */ integer i__, j, n; doublecomplex w[1]; integer ia, ja, ic, nc, jj, lj, in, ix, ns, lx, laa, lda; doublereal err; extern logical lze_(doublecomplex *, doublecomplex *, integer *); integer ldas; logical same; doublereal rals; integer incx; logical full; extern /* Subroutine */ int zher_(char *, integer *, doublereal *, doublecomplex *, integer *, doublecomplex *, integer *, ftnlen); logical null; char uplo[1]; extern /* Subroutine */ int zhpr_(char *, integer *, doublereal *, doublecomplex *, integer *, doublecomplex *, ftnlen); doublecomplex alpha; logical isame[13]; extern /* Subroutine */ int zmake_(char *, char *, char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, integer *, integer *, logical *, doublecomplex *, ftnlen, ftnlen, ftnlen); integer nargs; logical reset; integer incxs; extern /* Subroutine */ int zmvch_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, doublereal *, doublecomplex *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen); logical upper; char uplos[1]; logical packed; doublereal ralpha, errmax; doublecomplex transl; extern logical lzeres_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen); /* Fortran I/O blocks */ static cilist io___326 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___327 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___328 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___331 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___338 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___339 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___340 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___341 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___342 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___343 = { 0, 0, 0, fmt_9994, 0 }; /* Tests ZHER and ZHPR. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; --inc; --z__; --g; --yt; --y; --x; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --ys; --yy; --xs; --xx; /* Function Body */ /* .. Executable Statements .. */ full = *(unsigned char *)&sname[2] == 'E'; packed = *(unsigned char *)&sname[2] == 'P'; /* Define the number of arguments. */ if (full) { nargs = 7; } else if (packed) { nargs = 6; } nc = 0; reset = TRUE_; errmax = 0.; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; /* Set LDA to 1 more than minimum value if room. */ lda = n; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L100; } if (packed) { laa = n * (n + 1) / 2; } else { laa = lda * n; } for (ic = 1; ic <= 2; ++ic) { *(unsigned char *)uplo = *(unsigned char *)&ich[ic - 1]; upper = *(unsigned char *)uplo == 'U'; i__2 = *ninc; for (ix = 1; ix <= i__2; ++ix) { incx = inc[ix]; lx = abs(incx) * n; /* Generate the vector X. */ transl.r = .5, transl.i = 0.; i__3 = abs(incx); i__4 = n - 1; zmake_("GE", " ", " ", &c__1, &n, &x[1], &c__1, &xx[1], &i__3, &c__0, &i__4, &reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); if (n > 1) { i__3 = n / 2; x[i__3].r = 0., x[i__3].i = 0.; i__3 = abs(incx) * (n / 2 - 1) + 1; xx[i__3].r = 0., xx[i__3].i = 0.; } i__3 = *nalf; for (ia = 1; ia <= i__3; ++ia) { i__4 = ia; ralpha = alf[i__4].r; z__1.r = ralpha, z__1.i = 0.; alpha.r = z__1.r, alpha.i = z__1.i; null = n <= 0 || ralpha == 0.; /* Generate the matrix A. */ transl.r = 0., transl.i = 0.; i__4 = n - 1; i__5 = n - 1; zmake_(sname + 1, uplo, " ", &n, &n, &a[a_offset], nmax, & aa[1], &lda, &i__4, &i__5, &reset, &transl, ( ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the subroutine. */ *(unsigned char *)uplos = *(unsigned char *)uplo; ns = n; rals = ralpha; i__4 = laa; for (i__ = 1; i__ <= i__4; ++i__) { i__5 = i__; i__6 = i__; as[i__5].r = aa[i__6].r, as[i__5].i = aa[i__6].i; /* L10: */ } ldas = lda; i__4 = lx; for (i__ = 1; i__ <= i__4; ++i__) { i__5 = i__; i__6 = i__; xs[i__5].r = xx[i__6].r, xs[i__5].i = xx[i__6].i; /* L20: */ } incxs = incx; /* Call the subroutine. */ if (full) { if (*trace) { io___326.ciunit = *ntra; s_wsfe(&io___326); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer) ); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)) ; do_fio(&c__1, (char *)&ralpha, (ftnlen)sizeof( doublereal)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof( integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } zher_(uplo, &n, &ralpha, &xx[1], &incx, &aa[1], &lda, (ftnlen)1); } else if (packed) { if (*trace) { io___327.ciunit = *ntra; s_wsfe(&io___327); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer) ); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)) ; do_fio(&c__1, (char *)&ralpha, (ftnlen)sizeof( doublereal)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof( integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } zhpr_(uplo, &n, &ralpha, &xx[1], &incx, &aa[1], ( ftnlen)1); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___328.ciunit = *nout; s_wsfe(&io___328); e_wsfe(); *fatal = TRUE_; goto L120; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)uplo == *(unsigned char *) uplos; isame[1] = ns == n; isame[2] = rals == ralpha; isame[3] = lze_(&xs[1], &xx[1], &lx); isame[4] = incxs == incx; if (null) { isame[5] = lze_(&as[1], &aa[1], &laa); } else { isame[5] = lzeres_(sname + 1, uplo, &n, &n, &as[1], & aa[1], &lda, (ftnlen)2, (ftnlen)1); } if (! packed) { isame[6] = ldas == lda; } /* If data was incorrectly changed, report and return. */ same = TRUE_; i__4 = nargs; for (i__ = 1; i__ <= i__4; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___331.ciunit = *nout; s_wsfe(&io___331); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof( integer)); e_wsfe(); } /* L30: */ } if (! same) { *fatal = TRUE_; goto L120; } if (! null) { /* Check the result column by column. */ if (incx > 0) { i__4 = n; for (i__ = 1; i__ <= i__4; ++i__) { i__5 = i__; i__6 = i__; z__[i__5].r = x[i__6].r, z__[i__5].i = x[i__6] .i; /* L40: */ } } else { i__4 = n; for (i__ = 1; i__ <= i__4; ++i__) { i__5 = i__; i__6 = n - i__ + 1; z__[i__5].r = x[i__6].r, z__[i__5].i = x[i__6] .i; /* L50: */ } } ja = 1; i__4 = n; for (j = 1; j <= i__4; ++j) { d_cnjg(&z__1, &z__[j]); w[0].r = z__1.r, w[0].i = z__1.i; if (upper) { jj = 1; lj = j; } else { jj = j; lj = n - j + 1; } zmvch_("N", &lj, &c__1, &alpha, &z__[jj], &lj, w, &c__1, &c_b2, &a[jj + j * a_dim1], &c__1, &yt[1], &g[1], &aa[ja], eps, &err, fatal, nout, &c_true, (ftnlen)1); if (full) { if (upper) { ja += lda; } else { ja = ja + lda + 1; } } else { ja += lj; } errmax = max(errmax,err); /* If got really bad answer, report and return. */ if (*fatal) { goto L110; } /* L60: */ } } else { /* Avoid repeating tests if N.le.0. */ if (n <= 0) { goto L100; } } /* L70: */ } /* L80: */ } /* L90: */ } L100: ; } /* Report result. */ if (errmax < *thresh) { io___338.ciunit = *nout; s_wsfe(&io___338); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___339.ciunit = *nout; s_wsfe(&io___339); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(doublereal)); e_wsfe(); } goto L130; L110: io___340.ciunit = *nout; s_wsfe(&io___340); do_fio(&c__1, (char *)&j, (ftnlen)sizeof(integer)); e_wsfe(); L120: io___341.ciunit = *nout; s_wsfe(&io___341); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); if (full) { io___342.ciunit = *nout; s_wsfe(&io___342); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ralpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); e_wsfe(); } else if (packed) { io___343.ciunit = *nout; s_wsfe(&io___343); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ralpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); e_wsfe(); } L130: return 0; /* End of ZCHK5. */ } /* zchk5_ */ /* Subroutine */ int zchk6_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * fatal, integer *nidim, integer *idim, integer *nalf, doublecomplex * alf, integer *ninc, integer *inc, integer *nmax, integer *incmax, doublecomplex *a, doublecomplex *aa, doublecomplex *as, doublecomplex *x, doublecomplex *xx, doublecomplex *xs, doublecomplex *y, doublecomplex *yy, doublecomplex *ys, doublecomplex *yt, doublereal * g, doublecomplex *z__, ftnlen sname_len) { /* Initialized data */ static char ich[2] = "UL"; /* Format strings */ static char fmt_9993[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "i3,\002,(\002,f4.1,\002,\002,f4.1,\002), X,\002,i2,\002, Y,\002," "i2,\002, A,\002,i3,\002) \002,\002 .\002)"; static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002',\002," "i3,\002,(\002,f4.1,\002,\002,f4.1,\002), X,\002,i2,\002, Y,\002," "i2,\002, AP) \002,\002 .\002)"; static char fmt_9992[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9995[] = "(\002 THESE ARE THE RESULTS FOR COLUMN" " \002,i3)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; doublecomplex z__1, z__2, z__3; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); void d_cnjg(doublecomplex *, const doublecomplex *); /* Local variables */ integer i__, j, n; doublecomplex w[2]; integer ia, ja, ic, nc, jj, lj, in, ix, iy, ns, lx, ly, laa, lda; doublecomplex als; doublereal err; extern logical lze_(doublecomplex *, doublecomplex *, integer *); integer ldas; logical same; integer incx, incy; logical full, null; char uplo[1]; extern /* Subroutine */ int zher2_(char *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, ftnlen), zhpr2_(char *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, ftnlen); doublecomplex alpha; logical isame[13]; extern /* Subroutine */ int zmake_(char *, char *, char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, integer *, integer *, logical *, doublecomplex *, ftnlen, ftnlen, ftnlen); integer nargs; logical reset; integer incxs, incys; extern /* Subroutine */ int zmvch_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, doublereal *, doublecomplex *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen); logical upper; char uplos[1]; logical packed; doublereal errmax; doublecomplex transl; extern logical lzeres_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen); /* Fortran I/O blocks */ static cilist io___375 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___376 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___377 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___380 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___387 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___388 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___389 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___390 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___391 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___392 = { 0, 0, 0, fmt_9994, 0 }; /* Tests ZHER2 and ZHPR2. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; --inc; z_dim1 = *nmax; z_offset = 1 + z_dim1; z__ -= z_offset; --g; --yt; --y; --x; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --ys; --yy; --xs; --xx; /* Function Body */ /* .. Executable Statements .. */ full = *(unsigned char *)&sname[2] == 'E'; packed = *(unsigned char *)&sname[2] == 'P'; /* Define the number of arguments. */ if (full) { nargs = 9; } else if (packed) { nargs = 8; } nc = 0; reset = TRUE_; errmax = 0.; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; /* Set LDA to 1 more than minimum value if room. */ lda = n; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L140; } if (packed) { laa = n * (n + 1) / 2; } else { laa = lda * n; } for (ic = 1; ic <= 2; ++ic) { *(unsigned char *)uplo = *(unsigned char *)&ich[ic - 1]; upper = *(unsigned char *)uplo == 'U'; i__2 = *ninc; for (ix = 1; ix <= i__2; ++ix) { incx = inc[ix]; lx = abs(incx) * n; /* Generate the vector X. */ transl.r = .5, transl.i = 0.; i__3 = abs(incx); i__4 = n - 1; zmake_("GE", " ", " ", &c__1, &n, &x[1], &c__1, &xx[1], &i__3, &c__0, &i__4, &reset, &transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); if (n > 1) { i__3 = n / 2; x[i__3].r = 0., x[i__3].i = 0.; i__3 = abs(incx) * (n / 2 - 1) + 1; xx[i__3].r = 0., xx[i__3].i = 0.; } i__3 = *ninc; for (iy = 1; iy <= i__3; ++iy) { incy = inc[iy]; ly = abs(incy) * n; /* Generate the vector Y. */ transl.r = 0., transl.i = 0.; i__4 = abs(incy); i__5 = n - 1; zmake_("GE", " ", " ", &c__1, &n, &y[1], &c__1, &yy[1], & i__4, &c__0, &i__5, &reset, &transl, (ftnlen)2, ( ftnlen)1, (ftnlen)1); if (n > 1) { i__4 = n / 2; y[i__4].r = 0., y[i__4].i = 0.; i__4 = abs(incy) * (n / 2 - 1) + 1; yy[i__4].r = 0., yy[i__4].i = 0.; } i__4 = *nalf; for (ia = 1; ia <= i__4; ++ia) { i__5 = ia; alpha.r = alf[i__5].r, alpha.i = alf[i__5].i; null = n <= 0 || alpha.r == 0. && alpha.i == 0.; /* Generate the matrix A. */ transl.r = 0., transl.i = 0.; i__5 = n - 1; i__6 = n - 1; zmake_(sname + 1, uplo, " ", &n, &n, &a[a_offset], nmax, &aa[1], &lda, &i__5, &i__6, &reset, & transl, (ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the subroutine. */ *(unsigned char *)uplos = *(unsigned char *)uplo; ns = n; als.r = alpha.r, als.i = alpha.i; i__5 = laa; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; as[i__6].r = aa[i__7].r, as[i__6].i = aa[i__7].i; /* L10: */ } ldas = lda; i__5 = lx; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; xs[i__6].r = xx[i__7].r, xs[i__6].i = xx[i__7].i; /* L20: */ } incxs = incx; i__5 = ly; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; ys[i__6].r = yy[i__7].r, ys[i__6].i = yy[i__7].i; /* L30: */ } incys = incy; /* Call the subroutine. */ if (full) { if (*trace) { io___375.ciunit = *ntra; s_wsfe(&io___375); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof( doublereal)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof( integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } zher2_(uplo, &n, &alpha, &xx[1], &incx, &yy[1], & incy, &aa[1], &lda, (ftnlen)1); } else if (packed) { if (*trace) { io___376.ciunit = *ntra; s_wsfe(&io___376); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof( doublereal)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof( integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } zhpr2_(uplo, &n, &alpha, &xx[1], &incx, &yy[1], & incy, &aa[1], (ftnlen)1); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___377.ciunit = *nout; s_wsfe(&io___377); e_wsfe(); *fatal = TRUE_; goto L160; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)uplo == *(unsigned char * )uplos; isame[1] = ns == n; isame[2] = als.r == alpha.r && als.i == alpha.i; isame[3] = lze_(&xs[1], &xx[1], &lx); isame[4] = incxs == incx; isame[5] = lze_(&ys[1], &yy[1], &ly); isame[6] = incys == incy; if (null) { isame[7] = lze_(&as[1], &aa[1], &laa); } else { isame[7] = lzeres_(sname + 1, uplo, &n, &n, &as[1] , &aa[1], &lda, (ftnlen)2, (ftnlen)1); } if (! packed) { isame[8] = ldas == lda; } /* If data was incorrectly changed, report and return. */ same = TRUE_; i__5 = nargs; for (i__ = 1; i__ <= i__5; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___380.ciunit = *nout; s_wsfe(&io___380); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof( integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L160; } if (! null) { /* Check the result column by column. */ if (incx > 0) { i__5 = n; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__ + z_dim1; i__7 = i__; z__[i__6].r = x[i__7].r, z__[i__6].i = x[ i__7].i; /* L50: */ } } else { i__5 = n; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__ + z_dim1; i__7 = n - i__ + 1; z__[i__6].r = x[i__7].r, z__[i__6].i = x[ i__7].i; /* L60: */ } } if (incy > 0) { i__5 = n; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__ + (z_dim1 << 1); i__7 = i__; z__[i__6].r = y[i__7].r, z__[i__6].i = y[ i__7].i; /* L70: */ } } else { i__5 = n; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__ + (z_dim1 << 1); i__7 = n - i__ + 1; z__[i__6].r = y[i__7].r, z__[i__6].i = y[ i__7].i; /* L80: */ } } ja = 1; i__5 = n; for (j = 1; j <= i__5; ++j) { d_cnjg(&z__2, &z__[j + (z_dim1 << 1)]); z__1.r = alpha.r * z__2.r - alpha.i * z__2.i, z__1.i = alpha.r * z__2.i + alpha.i * z__2.r; w[0].r = z__1.r, w[0].i = z__1.i; d_cnjg(&z__2, &alpha); d_cnjg(&z__3, &z__[j + z_dim1]); z__1.r = z__2.r * z__3.r - z__2.i * z__3.i, z__1.i = z__2.r * z__3.i + z__2.i * z__3.r; w[1].r = z__1.r, w[1].i = z__1.i; if (upper) { jj = 1; lj = j; } else { jj = j; lj = n - j + 1; } zmvch_("N", &lj, &c__2, &c_b2, &z__[jj + z_dim1], nmax, w, &c__1, &c_b2, &a[jj + j * a_dim1], &c__1, &yt[1], &g[1], & aa[ja], eps, &err, fatal, nout, & c_true, (ftnlen)1); if (full) { if (upper) { ja += lda; } else { ja = ja + lda + 1; } } else { ja += lj; } errmax = max(errmax,err); /* If got really bad answer, report and return. */ if (*fatal) { goto L150; } /* L90: */ } } else { /* Avoid repeating tests with N.le.0. */ if (n <= 0) { goto L140; } } /* L100: */ } /* L110: */ } /* L120: */ } /* L130: */ } L140: ; } /* Report result. */ if (errmax < *thresh) { io___387.ciunit = *nout; s_wsfe(&io___387); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___388.ciunit = *nout; s_wsfe(&io___388); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(doublereal)); e_wsfe(); } goto L170; L150: io___389.ciunit = *nout; s_wsfe(&io___389); do_fio(&c__1, (char *)&j, (ftnlen)sizeof(integer)); e_wsfe(); L160: io___390.ciunit = *nout; s_wsfe(&io___390); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); if (full) { io___391.ciunit = *nout; s_wsfe(&io___391); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); e_wsfe(); } else if (packed) { io___392.ciunit = *nout; s_wsfe(&io___392); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&incx, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&incy, (ftnlen)sizeof(integer)); e_wsfe(); } L170: return 0; /* End of ZCHK6. */ } /* zchk6_ */ /* Subroutine */ int zchke_(integer *isnum, char *srnamt, integer *nout, ftnlen srnamt_len) { /* Format strings */ static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE TESTS OF ERROR-E" "XITS\002)"; static char fmt_9998[] = "(\002 ******* \002,a6,\002 FAILED THE TESTS OF" " ERROR-EXITS *****\002,\002**\002)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Local variables */ doublecomplex a[1] /* was [1][1] */, x[1], y[1], beta; extern /* Subroutine */ int zher_(char *, integer *, doublereal *, doublecomplex *, integer *, doublecomplex *, integer *, ftnlen), zhpr_(char *, integer *, doublereal *, doublecomplex *, integer *, doublecomplex *, ftnlen), zher2_(char *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, ftnlen), zhpr2_(char *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, ftnlen); doublecomplex alpha; extern /* Subroutine */ int zgerc_(integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *), zgbmv_(char *, integer *, integer *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen), zhbmv_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen), zgemv_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen), zhemv_(char *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen), zgeru_(integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *), ztbmv_(char *, char *, char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen), zhpmv_(char *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen), ztbsv_(char *, char *, char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen), ztpmv_( char *, char *, char *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen), ztrmv_(char *, char *, char *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen), ztpsv_(char *, char *, char *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen), ztrsv_(char *, char *, char *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen); doublereal ralpha; extern /* Subroutine */ int chkxer_(char *, integer *, integer *, logical *, logical *, ftnlen); /* Fortran I/O blocks */ static cilist io___399 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___400 = { 0, 0, 0, fmt_9998, 0 }; /* Tests the error exits from the Level 2 Blas. */ /* Requires a special version of the error-handling routine XERBLA. */ /* ALPHA, RALPHA, BETA, A, X and Y should not need to be defined. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Subroutines .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ /* OK is set to .FALSE. by the special version of XERBLA or by CHKXER */ /* if anything is wrong. */ infoc_1.ok = TRUE_; /* LERR is set to .TRUE. by the special version of XERBLA each time */ /* it is called, and is then tested and re-set by CHKXER. */ infoc_1.lerr = FALSE_; switch (*isnum) { case 1: goto L10; case 2: goto L20; case 3: goto L30; case 4: goto L40; case 5: goto L50; case 6: goto L60; case 7: goto L70; case 8: goto L80; case 9: goto L90; case 10: goto L100; case 11: goto L110; case 12: goto L120; case 13: goto L130; case 14: goto L140; case 15: goto L150; case 16: goto L160; case 17: goto L170; } L10: infoc_1.infot = 1; zgemv_("/", &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; zgemv_("N", &c_n1, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zgemv_("N", &c__0, &c_n1, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; zgemv_("N", &c__2, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; zgemv_("N", &c__0, &c__0, &alpha, a, &c__1, x, &c__0, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; zgemv_("N", &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__0, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L20: infoc_1.infot = 1; zgbmv_("/", &c__0, &c__0, &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; zgbmv_("N", &c_n1, &c__0, &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zgbmv_("N", &c__0, &c_n1, &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zgbmv_("N", &c__0, &c__0, &c_n1, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; zgbmv_("N", &c__2, &c__0, &c__0, &c_n1, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; zgbmv_("N", &c__0, &c__0, &c__1, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; zgbmv_("N", &c__0, &c__0, &c__0, &c__0, &alpha, a, &c__1, x, &c__0, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; zgbmv_("N", &c__0, &c__0, &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__0, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L30: infoc_1.infot = 1; zhemv_("/", &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; zhemv_("U", &c_n1, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; zhemv_("U", &c__2, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zhemv_("U", &c__0, &alpha, a, &c__1, x, &c__0, &beta, y, &c__1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; zhemv_("U", &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__0, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L40: infoc_1.infot = 1; zhbmv_("/", &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; zhbmv_("U", &c_n1, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zhbmv_("U", &c__0, &c_n1, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; zhbmv_("U", &c__0, &c__1, &alpha, a, &c__1, x, &c__1, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; zhbmv_("U", &c__0, &c__0, &alpha, a, &c__1, x, &c__0, &beta, y, &c__1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; zhbmv_("U", &c__0, &c__0, &alpha, a, &c__1, x, &c__1, &beta, y, &c__0, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L50: infoc_1.infot = 1; zhpmv_("/", &c__0, &alpha, a, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; zhpmv_("U", &c_n1, &alpha, a, x, &c__1, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; zhpmv_("U", &c__0, &alpha, a, x, &c__0, &beta, y, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; zhpmv_("U", &c__0, &alpha, a, x, &c__1, &beta, y, &c__0, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L60: infoc_1.infot = 1; ztrmv_("/", "N", "N", &c__0, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; ztrmv_("U", "/", "N", &c__0, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ztrmv_("U", "N", "/", &c__0, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ztrmv_("U", "N", "N", &c_n1, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ztrmv_("U", "N", "N", &c__2, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; ztrmv_("U", "N", "N", &c__0, a, &c__1, x, &c__0, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L70: infoc_1.infot = 1; ztbmv_("/", "N", "N", &c__0, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; ztbmv_("U", "/", "N", &c__0, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ztbmv_("U", "N", "/", &c__0, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ztbmv_("U", "N", "N", &c_n1, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ztbmv_("U", "N", "N", &c__0, &c_n1, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; ztbmv_("U", "N", "N", &c__0, &c__1, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ztbmv_("U", "N", "N", &c__0, &c__0, a, &c__1, x, &c__0, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L80: infoc_1.infot = 1; ztpmv_("/", "N", "N", &c__0, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; ztpmv_("U", "/", "N", &c__0, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ztpmv_("U", "N", "/", &c__0, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ztpmv_("U", "N", "N", &c_n1, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; ztpmv_("U", "N", "N", &c__0, a, x, &c__0, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L90: infoc_1.infot = 1; ztrsv_("/", "N", "N", &c__0, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; ztrsv_("U", "/", "N", &c__0, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ztrsv_("U", "N", "/", &c__0, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ztrsv_("U", "N", "N", &c_n1, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ztrsv_("U", "N", "N", &c__2, a, &c__1, x, &c__1, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; ztrsv_("U", "N", "N", &c__0, a, &c__1, x, &c__0, (ftnlen)1, (ftnlen)1, ( ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L100: infoc_1.infot = 1; ztbsv_("/", "N", "N", &c__0, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; ztbsv_("U", "/", "N", &c__0, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ztbsv_("U", "N", "/", &c__0, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ztbsv_("U", "N", "N", &c_n1, &c__0, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ztbsv_("U", "N", "N", &c__0, &c_n1, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; ztbsv_("U", "N", "N", &c__0, &c__1, a, &c__1, x, &c__1, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ztbsv_("U", "N", "N", &c__0, &c__0, a, &c__1, x, &c__0, (ftnlen)1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L110: infoc_1.infot = 1; ztpsv_("/", "N", "N", &c__0, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; ztpsv_("U", "/", "N", &c__0, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ztpsv_("U", "N", "/", &c__0, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ztpsv_("U", "N", "N", &c_n1, a, x, &c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; ztpsv_("U", "N", "N", &c__0, a, x, &c__0, (ftnlen)1, (ftnlen)1, (ftnlen)1) ; chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L120: infoc_1.infot = 1; zgerc_(&c_n1, &c__0, &alpha, x, &c__1, y, &c__1, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; zgerc_(&c__0, &c_n1, &alpha, x, &c__1, y, &c__1, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; zgerc_(&c__0, &c__0, &alpha, x, &c__0, y, &c__1, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zgerc_(&c__0, &c__0, &alpha, x, &c__1, y, &c__0, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; zgerc_(&c__2, &c__0, &alpha, x, &c__1, y, &c__1, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L130: infoc_1.infot = 1; zgeru_(&c_n1, &c__0, &alpha, x, &c__1, y, &c__1, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; zgeru_(&c__0, &c_n1, &alpha, x, &c__1, y, &c__1, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; zgeru_(&c__0, &c__0, &alpha, x, &c__0, y, &c__1, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zgeru_(&c__0, &c__0, &alpha, x, &c__1, y, &c__0, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; zgeru_(&c__2, &c__0, &alpha, x, &c__1, y, &c__1, a, &c__1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L140: infoc_1.infot = 1; zher_("/", &c__0, &ralpha, x, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; zher_("U", &c_n1, &ralpha, x, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; zher_("U", &c__0, &ralpha, x, &c__0, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zher_("U", &c__2, &ralpha, x, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L150: infoc_1.infot = 1; zhpr_("/", &c__0, &ralpha, x, &c__1, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; zhpr_("U", &c_n1, &ralpha, x, &c__1, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; zhpr_("U", &c__0, &ralpha, x, &c__0, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L160: infoc_1.infot = 1; zher2_("/", &c__0, &alpha, x, &c__1, y, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; zher2_("U", &c_n1, &alpha, x, &c__1, y, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; zher2_("U", &c__0, &alpha, x, &c__0, y, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zher2_("U", &c__0, &alpha, x, &c__1, y, &c__0, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; zher2_("U", &c__2, &alpha, x, &c__1, y, &c__1, a, &c__1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L180; L170: infoc_1.infot = 1; zhpr2_("/", &c__0, &alpha, x, &c__1, y, &c__1, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; zhpr2_("U", &c_n1, &alpha, x, &c__1, y, &c__1, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; zhpr2_("U", &c__0, &alpha, x, &c__0, y, &c__1, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zhpr2_("U", &c__0, &alpha, x, &c__1, y, &c__0, a, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); L180: if (infoc_1.ok) { io___399.ciunit = *nout; s_wsfe(&io___399); do_fio(&c__1, srnamt, (ftnlen)6); e_wsfe(); } else { io___400.ciunit = *nout; s_wsfe(&io___400); do_fio(&c__1, srnamt, (ftnlen)6); e_wsfe(); } return 0; /* End of ZCHKE. */ } /* zchke_ */ /* Subroutine */ int zmake_(char *type__, char *uplo, char *diag, integer *m, integer *n, doublecomplex *a, integer *nmax, doublecomplex *aa, integer *lda, integer *kl, integer *ku, logical *reset, doublecomplex *transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4; doublereal d__1; doublecomplex z__1, z__2; /* Builtin functions */ void d_cnjg(doublecomplex *, const doublecomplex *); integer s_cmp(const char *, const char *, ftnlen, ftnlen); /* Local variables */ integer i__, j, i1, i2, i3, jj, kk; logical gen, tri, sym; integer ibeg, iend, ioff; extern /* Double Complex */ void zbeg_(doublecomplex *, logical *); logical unit, lower, upper; /* Generates values for an M by N matrix A within the bandwidth */ /* defined by KL and KU. */ /* Stores the values in the array AA in the data structure required */ /* by the routine, with unwanted elements set to rogue value. */ /* TYPE is 'GE', 'GB', 'HE', 'HB', 'HP', 'TR', 'TB' OR 'TP'. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. Intrinsic Functions .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --aa; /* Function Body */ gen = *(unsigned char *)type__ == 'G'; sym = *(unsigned char *)type__ == 'H'; tri = *(unsigned char *)type__ == 'T'; upper = (sym || tri) && *(unsigned char *)uplo == 'U'; lower = (sym || tri) && *(unsigned char *)uplo == 'L'; unit = tri && *(unsigned char *)diag == 'U'; /* Generate data in array A. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { if (gen || upper && i__ <= j || lower && i__ >= j) { if (i__ <= j && j - i__ <= *ku || i__ >= j && i__ - j <= *kl) { i__3 = i__ + j * a_dim1; zbeg_(&z__2, reset); z__1.r = z__2.r + transl->r, z__1.i = z__2.i + transl->i; a[i__3].r = z__1.r, a[i__3].i = z__1.i; } else { i__3 = i__ + j * a_dim1; a[i__3].r = 0., a[i__3].i = 0.; } if (i__ != j) { if (sym) { i__3 = j + i__ * a_dim1; d_cnjg(&z__1, &a[i__ + j * a_dim1]); a[i__3].r = z__1.r, a[i__3].i = z__1.i; } else if (tri) { i__3 = j + i__ * a_dim1; a[i__3].r = 0., a[i__3].i = 0.; } } } /* L10: */ } if (sym) { i__2 = j + j * a_dim1; i__3 = j + j * a_dim1; d__1 = a[i__3].r; z__1.r = d__1, z__1.i = 0.; a[i__2].r = z__1.r, a[i__2].i = z__1.i; } if (tri) { i__2 = j + j * a_dim1; i__3 = j + j * a_dim1; z__1.r = a[i__3].r + 1., z__1.i = a[i__3].i + 0.; a[i__2].r = z__1.r, a[i__2].i = z__1.i; } if (unit) { i__2 = j + j * a_dim1; a[i__2].r = 1., a[i__2].i = 0.; } /* L20: */ } /* Store elements in array AS in data structure required by routine. */ if (s_cmp(type__, "GE", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__ + (j - 1) * *lda; i__4 = i__ + j * a_dim1; aa[i__3].r = a[i__4].r, aa[i__3].i = a[i__4].i; /* L30: */ } i__2 = *lda; for (i__ = *m + 1; i__ <= i__2; ++i__) { i__3 = i__ + (j - 1) * *lda; aa[i__3].r = -1e10, aa[i__3].i = 1e10; /* L40: */ } /* L50: */ } } else if (s_cmp(type__, "GB", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *ku + 1 - j; for (i1 = 1; i1 <= i__2; ++i1) { i__3 = i1 + (j - 1) * *lda; aa[i__3].r = -1e10, aa[i__3].i = 1e10; /* L60: */ } /* Computing MIN */ i__3 = *kl + *ku + 1, i__4 = *ku + 1 + *m - j; i__2 = min(i__3,i__4); for (i2 = i1; i2 <= i__2; ++i2) { i__3 = i2 + (j - 1) * *lda; i__4 = i2 + j - *ku - 1 + j * a_dim1; aa[i__3].r = a[i__4].r, aa[i__3].i = a[i__4].i; /* L70: */ } i__2 = *lda; for (i3 = i2; i3 <= i__2; ++i3) { i__3 = i3 + (j - 1) * *lda; aa[i__3].r = -1e10, aa[i__3].i = 1e10; /* L80: */ } /* L90: */ } } else if (s_cmp(type__, "HE", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(type__, "TR", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (upper) { ibeg = 1; if (unit) { iend = j - 1; } else { iend = j; } } else { if (unit) { ibeg = j + 1; } else { ibeg = j; } iend = *n; } i__2 = ibeg - 1; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__ + (j - 1) * *lda; aa[i__3].r = -1e10, aa[i__3].i = 1e10; /* L100: */ } i__2 = iend; for (i__ = ibeg; i__ <= i__2; ++i__) { i__3 = i__ + (j - 1) * *lda; i__4 = i__ + j * a_dim1; aa[i__3].r = a[i__4].r, aa[i__3].i = a[i__4].i; /* L110: */ } i__2 = *lda; for (i__ = iend + 1; i__ <= i__2; ++i__) { i__3 = i__ + (j - 1) * *lda; aa[i__3].r = -1e10, aa[i__3].i = 1e10; /* L120: */ } if (sym) { jj = j + (j - 1) * *lda; i__2 = jj; i__3 = jj; d__1 = aa[i__3].r; z__1.r = d__1, z__1.i = -1e10; aa[i__2].r = z__1.r, aa[i__2].i = z__1.i; } /* L130: */ } } else if (s_cmp(type__, "HB", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(type__, "TB", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (upper) { kk = *kl + 1; /* Computing MAX */ i__2 = 1, i__3 = *kl + 2 - j; ibeg = max(i__2,i__3); if (unit) { iend = *kl; } else { iend = *kl + 1; } } else { kk = 1; if (unit) { ibeg = 2; } else { ibeg = 1; } /* Computing MIN */ i__2 = *kl + 1, i__3 = *m + 1 - j; iend = min(i__2,i__3); } i__2 = ibeg - 1; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__ + (j - 1) * *lda; aa[i__3].r = -1e10, aa[i__3].i = 1e10; /* L140: */ } i__2 = iend; for (i__ = ibeg; i__ <= i__2; ++i__) { i__3 = i__ + (j - 1) * *lda; i__4 = i__ + j - kk + j * a_dim1; aa[i__3].r = a[i__4].r, aa[i__3].i = a[i__4].i; /* L150: */ } i__2 = *lda; for (i__ = iend + 1; i__ <= i__2; ++i__) { i__3 = i__ + (j - 1) * *lda; aa[i__3].r = -1e10, aa[i__3].i = 1e10; /* L160: */ } if (sym) { jj = kk + (j - 1) * *lda; i__2 = jj; i__3 = jj; d__1 = aa[i__3].r; z__1.r = d__1, z__1.i = -1e10; aa[i__2].r = z__1.r, aa[i__2].i = z__1.i; } /* L170: */ } } else if (s_cmp(type__, "HP", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(type__, "TP", (ftnlen)2, (ftnlen)2) == 0) { ioff = 0; i__1 = *n; for (j = 1; j <= i__1; ++j) { if (upper) { ibeg = 1; iend = j; } else { ibeg = j; iend = *n; } i__2 = iend; for (i__ = ibeg; i__ <= i__2; ++i__) { ++ioff; i__3 = ioff; i__4 = i__ + j * a_dim1; aa[i__3].r = a[i__4].r, aa[i__3].i = a[i__4].i; if (i__ == j) { if (unit) { i__3 = ioff; aa[i__3].r = -1e10, aa[i__3].i = 1e10; } if (sym) { i__3 = ioff; i__4 = ioff; d__1 = aa[i__4].r; z__1.r = d__1, z__1.i = -1e10; aa[i__3].r = z__1.r, aa[i__3].i = z__1.i; } } /* L180: */ } /* L190: */ } } return 0; /* End of ZMAKE. */ } /* zmake_ */ /* Subroutine */ int zmvch_(char *trans, integer *m, integer *n, doublecomplex *alpha, doublecomplex *a, integer *nmax, doublecomplex * x, integer *incx, doublecomplex *beta, doublecomplex *y, integer * incy, doublecomplex *yt, doublereal *g, doublecomplex *yy, doublereal *eps, doublereal *err, logical *fatal, integer *nout, logical *mv, ftnlen trans_len) { /* Format strings */ static char fmt_9999[] = "(\002 ******* FATAL ERROR - COMPUTED RESULT IS" " LESS THAN HAL\002,\002F ACCURATE *******\002,/\002 " " EXPECTED RE\002,\002SULT COMPUTED R" "ESULT\002)"; static char fmt_9998[] = "(1x,i7,2(\002 (\002,g15.6,\002,\002,g15.6," "\002)\002))"; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; doublereal d__1, d__2, d__3, d__4, d__5, d__6; doublecomplex z__1, z__2, z__3; /* Builtin functions */ double d_imag(const doublecomplex *); void d_cnjg(doublecomplex *, const doublecomplex *); double z_abs(const doublecomplex *), sqrt(doublereal); integer s_wsfe(cilist *), e_wsfe(void), do_fio(integer *, char *, ftnlen); /* Local variables */ integer i__, j, ml, nl, iy, jx, kx, ky; doublereal erri; logical tran, ctran; integer incxl, incyl; /* Fortran I/O blocks */ static cilist io___430 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___431 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___432 = { 0, 0, 0, fmt_9998, 0 }; /* Checks the results of the computational tests. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Intrinsic Functions .. */ /* .. Statement Functions .. */ /* .. Statement Function definitions .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --x; --y; --yt; --g; --yy; /* Function Body */ tran = *(unsigned char *)trans == 'T'; ctran = *(unsigned char *)trans == 'C'; if (tran || ctran) { ml = *n; nl = *m; } else { ml = *m; nl = *n; } if (*incx < 0) { kx = nl; incxl = -1; } else { kx = 1; incxl = 1; } if (*incy < 0) { ky = ml; incyl = -1; } else { ky = 1; incyl = 1; } /* Compute expected result in YT using data in A, X and Y. */ /* Compute gauges in G. */ iy = ky; i__1 = ml; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = iy; yt[i__2].r = 0., yt[i__2].i = 0.; g[iy] = 0.; jx = kx; if (tran) { i__2 = nl; for (j = 1; j <= i__2; ++j) { i__3 = iy; i__4 = iy; i__5 = j + i__ * a_dim1; i__6 = jx; z__2.r = a[i__5].r * x[i__6].r - a[i__5].i * x[i__6].i, z__2.i = a[i__5].r * x[i__6].i + a[i__5].i * x[i__6] .r; z__1.r = yt[i__4].r + z__2.r, z__1.i = yt[i__4].i + z__2.i; yt[i__3].r = z__1.r, yt[i__3].i = z__1.i; i__3 = j + i__ * a_dim1; i__4 = jx; g[iy] += ((d__1 = a[i__3].r, abs(d__1)) + (d__2 = d_imag(&a[j + i__ * a_dim1]), abs(d__2))) * ((d__3 = x[i__4].r, abs(d__3)) + (d__4 = d_imag(&x[jx]), abs(d__4))); jx += incxl; /* L10: */ } } else if (ctran) { i__2 = nl; for (j = 1; j <= i__2; ++j) { i__3 = iy; i__4 = iy; d_cnjg(&z__3, &a[j + i__ * a_dim1]); i__5 = jx; z__2.r = z__3.r * x[i__5].r - z__3.i * x[i__5].i, z__2.i = z__3.r * x[i__5].i + z__3.i * x[i__5].r; z__1.r = yt[i__4].r + z__2.r, z__1.i = yt[i__4].i + z__2.i; yt[i__3].r = z__1.r, yt[i__3].i = z__1.i; i__3 = j + i__ * a_dim1; i__4 = jx; g[iy] += ((d__1 = a[i__3].r, abs(d__1)) + (d__2 = d_imag(&a[j + i__ * a_dim1]), abs(d__2))) * ((d__3 = x[i__4].r, abs(d__3)) + (d__4 = d_imag(&x[jx]), abs(d__4))); jx += incxl; /* L20: */ } } else { i__2 = nl; for (j = 1; j <= i__2; ++j) { i__3 = iy; i__4 = iy; i__5 = i__ + j * a_dim1; i__6 = jx; z__2.r = a[i__5].r * x[i__6].r - a[i__5].i * x[i__6].i, z__2.i = a[i__5].r * x[i__6].i + a[i__5].i * x[i__6] .r; z__1.r = yt[i__4].r + z__2.r, z__1.i = yt[i__4].i + z__2.i; yt[i__3].r = z__1.r, yt[i__3].i = z__1.i; i__3 = i__ + j * a_dim1; i__4 = jx; g[iy] += ((d__1 = a[i__3].r, abs(d__1)) + (d__2 = d_imag(&a[ i__ + j * a_dim1]), abs(d__2))) * ((d__3 = x[i__4].r, abs(d__3)) + (d__4 = d_imag(&x[jx]), abs(d__4))); jx += incxl; /* L30: */ } } i__2 = iy; i__3 = iy; z__2.r = alpha->r * yt[i__3].r - alpha->i * yt[i__3].i, z__2.i = alpha->r * yt[i__3].i + alpha->i * yt[i__3].r; i__4 = iy; z__3.r = beta->r * y[i__4].r - beta->i * y[i__4].i, z__3.i = beta->r * y[i__4].i + beta->i * y[i__4].r; z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; yt[i__2].r = z__1.r, yt[i__2].i = z__1.i; i__2 = iy; g[iy] = ((d__1 = alpha->r, abs(d__1)) + (d__2 = d_imag(alpha), abs( d__2))) * g[iy] + ((d__3 = beta->r, abs(d__3)) + (d__4 = d_imag(beta), abs(d__4))) * ((d__5 = y[i__2].r, abs(d__5)) + ( d__6 = d_imag(&y[iy]), abs(d__6))); iy += incyl; /* L40: */ } /* Compute the error ratio for this result. */ *err = 0.; i__1 = ml; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__; i__3 = (i__ - 1) * abs(*incy) + 1; z__1.r = yt[i__2].r - yy[i__3].r, z__1.i = yt[i__2].i - yy[i__3].i; erri = z_abs(&z__1) / *eps; if (g[i__] != 0.) { erri /= g[i__]; } *err = max(*err,erri); if (*err * sqrt(*eps) >= 1.) { goto L60; } /* L50: */ } /* If the loop completes, all results are at least half accurate. */ goto L80; /* Report fatal error. */ L60: *fatal = TRUE_; io___430.ciunit = *nout; s_wsfe(&io___430); e_wsfe(); i__1 = ml; for (i__ = 1; i__ <= i__1; ++i__) { if (*mv) { io___431.ciunit = *nout; s_wsfe(&io___431); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&yt[i__], (ftnlen)sizeof(doublereal)); do_fio(&c__2, (char *)&yy[(i__ - 1) * abs(*incy) + 1], (ftnlen) sizeof(doublereal)); e_wsfe(); } else { io___432.ciunit = *nout; s_wsfe(&io___432); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&yy[(i__ - 1) * abs(*incy) + 1], (ftnlen) sizeof(doublereal)); do_fio(&c__2, (char *)&yt[i__], (ftnlen)sizeof(doublereal)); e_wsfe(); } /* L70: */ } L80: return 0; /* End of ZMVCH. */ } /* zmvch_ */ logical lze_(doublecomplex *ri, doublecomplex *rj, integer *lr) { /* System generated locals */ integer i__1, i__2, i__3; logical ret_val; /* Local variables */ integer i__; /* Tests if two arrays are identical. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --rj; --ri; /* Function Body */ i__1 = *lr; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__; i__3 = i__; if (ri[i__2].r != rj[i__3].r || ri[i__2].i != rj[i__3].i) { goto L20; } /* L10: */ } ret_val = TRUE_; goto L30; L20: ret_val = FALSE_; L30: return ret_val; /* End of LZE. */ } /* lze_ */ logical lzeres_(char *type__, char *uplo, integer *m, integer *n, doublecomplex *aa, doublecomplex *as, integer *lda, ftnlen type_len, ftnlen uplo_len) { /* System generated locals */ integer aa_dim1, aa_offset, as_dim1, as_offset, i__1, i__2, i__3, i__4; logical ret_val; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen); /* Local variables */ integer i__, j, ibeg, iend; logical upper; /* Tests if selected elements in two arrays are equal. */ /* TYPE is 'GE', 'HE' or 'HP'. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ as_dim1 = *lda; as_offset = 1 + as_dim1; as -= as_offset; aa_dim1 = *lda; aa_offset = 1 + aa_dim1; aa -= aa_offset; /* Function Body */ upper = *(unsigned char *)uplo == 'U'; if (s_cmp(type__, "GE", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *lda; for (i__ = *m + 1; i__ <= i__2; ++i__) { i__3 = i__ + j * aa_dim1; i__4 = i__ + j * as_dim1; if (aa[i__3].r != as[i__4].r || aa[i__3].i != as[i__4].i) { goto L70; } /* L10: */ } /* L20: */ } } else if (s_cmp(type__, "HE", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (upper) { ibeg = 1; iend = j; } else { ibeg = j; iend = *n; } i__2 = ibeg - 1; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__ + j * aa_dim1; i__4 = i__ + j * as_dim1; if (aa[i__3].r != as[i__4].r || aa[i__3].i != as[i__4].i) { goto L70; } /* L30: */ } i__2 = *lda; for (i__ = iend + 1; i__ <= i__2; ++i__) { i__3 = i__ + j * aa_dim1; i__4 = i__ + j * as_dim1; if (aa[i__3].r != as[i__4].r || aa[i__3].i != as[i__4].i) { goto L70; } /* L40: */ } /* L50: */ } } ret_val = TRUE_; goto L80; L70: ret_val = FALSE_; L80: return ret_val; /* End of LZERES. */ } /* lzeres_ */ /* Double Complex */ void zbeg_(doublecomplex * ret_val, logical *reset) { /* System generated locals */ doublereal d__1, d__2; doublecomplex z__1; /* Local variables */ static integer i__, j, ic, mi, mj; /* Generates complex numbers as pairs of random numbers uniformly */ /* distributed between -0.5 and 0.5. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Scalar Arguments .. */ /* .. Local Scalars .. */ /* .. Save statement .. */ /* .. Intrinsic Functions .. */ /* .. Executable Statements .. */ if (*reset) { /* Initialize local variables. */ mi = 891; mj = 457; i__ = 7; j = 7; ic = 0; *reset = FALSE_; } /* The sequence of values of I or J is bounded between 1 and 999. */ /* If initial I or J = 1,2,3,6,7 or 9, the period will be 50. */ /* If initial I or J = 4 or 8, the period will be 25. */ /* If initial I or J = 5, the period will be 10. */ /* IC is used to break up the period by skipping 1 value of I or J */ /* in 6. */ ++ic; L10: i__ *= mi; j *= mj; i__ -= i__ / 1000 * 1000; j -= j / 1000 * 1000; if (ic >= 5) { ic = 0; goto L10; } d__1 = (i__ - 500) / 1001.; d__2 = (j - 500) / 1001.; z__1.r = d__1, z__1.i = d__2; ret_val->r = z__1.r, ret_val->i = z__1.i; return ; /* End of ZBEG. */ } /* zbeg_ */ doublereal ddiff_(doublereal *x, doublereal *y) { /* System generated locals */ doublereal ret_val; /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* .. Scalar Arguments .. */ /* .. Executable Statements .. */ ret_val = *x - *y; return ret_val; /* End of DDIFF. */ } /* ddiff_ */ /* Subroutine */ int chkxer_(char *srnamt, integer *infot, integer *nout, logical *lerr, logical *ok, ftnlen srnamt_len) { /* Format strings */ static char fmt_9999[] = "(\002 ***** ILLEGAL VALUE OF PARAMETER NUMBER" " \002,i2,\002 NOT D\002,\002ETECTED BY \002,a6,\002 *****\002)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Fortran I/O blocks */ static cilist io___444 = { 0, 0, 0, fmt_9999, 0 }; /* Tests whether XERBLA has detected an error when it should. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Scalar Arguments .. */ /* .. Executable Statements .. */ if (! (*lerr)) { io___444.ciunit = *nout; s_wsfe(&io___444); do_fio(&c__1, (char *)&(*infot), (ftnlen)sizeof(integer)); do_fio(&c__1, srnamt, (ftnlen)6); e_wsfe(); *ok = FALSE_; } *lerr = FALSE_; return 0; /* End of CHKXER. */ } /* chkxer_ */ /* Subroutine */ int xerbla_(char *srname, integer *info, ftnlen srname_len) { /* Format strings */ static char fmt_9999[] = "(\002 ******* XERBLA WAS CALLED WITH INFO =" " \002,i6,\002 INSTEAD\002,\002 OF \002,i2,\002 *******\002)"; static char fmt_9997[] = "(\002 ******* XERBLA WAS CALLED WITH INFO =" " \002,i6,\002 *******\002)"; static char fmt_9998[] = "(\002 ******* XERBLA WAS CALLED WITH SRNAME =" " \002,a6,\002 INSTE\002,\002AD OF \002,a6,\002 *******\002)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), s_cmp(const char *, const char *, ftnlen, ftnlen); /* Fortran I/O blocks */ static cilist io___445 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___446 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___447 = { 0, 0, 0, fmt_9998, 0 }; /* This is a special version of XERBLA to be used only as part of */ /* the test program for testing error exits from the Level 2 BLAS */ /* routines. */ /* XERBLA is an error handler for the Level 2 BLAS routines. */ /* It is called by the Level 2 BLAS routines if an input parameter is */ /* invalid. */ /* Auxiliary routine for test program for Level 2 Blas. */ /* -- Written on 10-August-1987. */ /* Richard Hanson, Sandia National Labs. */ /* Jeremy Du Croz, NAG Central Office. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ infoc_2.lerr = TRUE_; if (*info != infoc_2.infot) { if (infoc_2.infot != 0) { io___445.ciunit = infoc_2.nout; s_wsfe(&io___445); do_fio(&c__1, (char *)&(*info), (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&infoc_2.infot, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___446.ciunit = infoc_2.nout; s_wsfe(&io___446); do_fio(&c__1, (char *)&(*info), (ftnlen)sizeof(integer)); e_wsfe(); } infoc_2.ok = FALSE_; } if (s_cmp(srname, srnamc_1.srnamt, (ftnlen)6, (ftnlen)6) != 0) { io___447.ciunit = infoc_2.nout; s_wsfe(&io___447); do_fio(&c__1, srname, (ftnlen)6); do_fio(&c__1, srnamc_1.srnamt, (ftnlen)6); e_wsfe(); infoc_2.ok = FALSE_; } return 0; /* End of XERBLA */ } /* xerbla_ */ /* Main program alias */ int zblat2_ () { main (); return 0; } blis-0.6.1/blastest/src/zblat3.c000066400000000000000000006015111360743507500164400ustar00rootroot00000000000000/* zblat3.f -- translated by f2c (version 20100827). You must link the resulting object file with libf2c: on Microsoft Windows system, link with libf2c.lib; on Linux or Unix systems, link with .../path/to/libf2c.a -lm or, if you install libf2c.a in a standard place, with -lf2c -lm -- in that order, at the end of the command line, as in cc *.o -lf2c -lm Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., http://www.netlib.org/f2c/libf2c.zip */ #include "f2c.h" /* Common Block Declarations */ union { struct { integer infot, noutc; logical ok, lerr; } _1; struct { integer infot, nout; logical ok, lerr; } _2; } infoc_; #define infoc_1 (infoc_._1) #define infoc_2 (infoc_._2) struct { char srnamt[6]; } srnamc_; #define srnamc_1 srnamc_ /* Table of constant values */ static doublecomplex c_b1 = {0.,0.}; static doublecomplex c_b2 = {1.,0.}; static integer c__9 = 9; static integer c__1 = 1; static integer c__3 = 3; static integer c__8 = 8; static integer c__5 = 5; static integer c__65 = 65; static integer c__7 = 7; static integer c__2 = 2; static doublereal c_b88 = 0.; static logical c_true = TRUE_; static logical c_false = FALSE_; static integer c__0 = 0; static integer c_n1 = -1; /* > \brief \b ZBLAT3 */ /* =========== DOCUMENTATION =========== */ /* Online html documentation available at */ /* http://www.netlib.org/lapack/explore-html/ */ /* Definition: */ /* =========== */ /* PROGRAM ZBLAT3 */ /* > \par Purpose: */ /* ============= */ /* > */ /* > \verbatim */ /* > */ /* > Test program for the COMPLEX*16 Level 3 Blas. */ /* > */ /* > The program must be driven by a short data file. The first 14 records */ /* > of the file are read using list-directed input, the last 9 records */ /* > are read using the format ( A6, L2 ). An annotated example of a data */ /* > file can be obtained by deleting the first 3 characters from the */ /* > following 23 lines: */ /* > 'zblat3.out' NAME OF SUMMARY OUTPUT FILE */ /* > 6 UNIT NUMBER OF SUMMARY FILE */ /* > 'ZBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE */ /* > -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) */ /* > F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. */ /* > F LOGICAL FLAG, T TO STOP ON FAILURES. */ /* > T LOGICAL FLAG, T TO TEST ERROR EXITS. */ /* > 16.0 THRESHOLD VALUE OF TEST RATIO */ /* > 6 NUMBER OF VALUES OF N */ /* > 0 1 2 3 5 9 VALUES OF N */ /* > 3 NUMBER OF VALUES OF ALPHA */ /* > (0.0,0.0) (1.0,0.0) (0.7,-0.9) VALUES OF ALPHA */ /* > 3 NUMBER OF VALUES OF BETA */ /* > (0.0,0.0) (1.0,0.0) (1.3,-1.1) VALUES OF BETA */ /* > ZGEMM T PUT F FOR NO TEST. SAME COLUMNS. */ /* > ZHEMM T PUT F FOR NO TEST. SAME COLUMNS. */ /* > ZSYMM T PUT F FOR NO TEST. SAME COLUMNS. */ /* > ZTRMM T PUT F FOR NO TEST. SAME COLUMNS. */ /* > ZTRSM T PUT F FOR NO TEST. SAME COLUMNS. */ /* > ZHERK T PUT F FOR NO TEST. SAME COLUMNS. */ /* > ZSYRK T PUT F FOR NO TEST. SAME COLUMNS. */ /* > ZHER2K T PUT F FOR NO TEST. SAME COLUMNS. */ /* > ZSYR2K T PUT F FOR NO TEST. SAME COLUMNS. */ /* > */ /* > */ /* > Further Details */ /* > =============== */ /* > */ /* > See: */ /* > */ /* > Dongarra J. J., Du Croz J. J., Duff I. S. and Hammarling S. */ /* > A Set of Level 3 Basic Linear Algebra Subprograms. */ /* > */ /* > Technical Memorandum No.88 (Revision 1), Mathematics and */ /* > Computer Science Division, Argonne National Laboratory, 9700 */ /* > South Cass Avenue, Argonne, Illinois 60439, US. */ /* > */ /* > -- Written on 8-February-1989. */ /* > Jack Dongarra, Argonne National Laboratory. */ /* > Iain Duff, AERE Harwell. */ /* > Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* > Sven Hammarling, Numerical Algorithms Group Ltd. */ /* > */ /* > 10-9-00: Change STATUS='NEW' to 'UNKNOWN' so that the testers */ /* > can be run multiple times without deleting generated */ /* > output files (susan) */ /* > \endverbatim */ /* Authors: */ /* ======== */ /* > \author Univ. of Tennessee */ /* > \author Univ. of California Berkeley */ /* > \author Univ. of Colorado Denver */ /* > \author NAG Ltd. */ /* > \date April 2012 */ /* > \ingroup complex16_blas_testing */ /* ===================================================================== */ /* Main program */ int main(void) { /* Initialized data */ static char snames[6*9] = "ZGEMM " "ZHEMM " "ZSYMM " "ZTRMM " "ZTRSM " "ZHERK " "ZSYRK " "ZHER2K" "ZSYR2K"; /* Format strings */ static char fmt_9997[] = "(\002 NUMBER OF VALUES OF \002,a,\002 IS LESS " "THAN 1 OR GREATER \002,\002THAN \002,i2)"; static char fmt_9996[] = "(\002 VALUE OF N IS LESS THAN 0 OR GREATER THA" "N \002,i2)"; static char fmt_9995[] = "(\002 TESTS OF THE COMPLEX*16 LEVEL 3 BL" "AS\002,//\002 THE F\002,\002OLLOWING PARAMETER VALUES WILL BE US" "ED:\002)"; static char fmt_9994[] = "(\002 FOR N \002,9i6)"; static char fmt_9993[] = "(\002 FOR ALPHA \002,7(\002(\002,f4" ".1,\002,\002,f4.1,\002) \002,:))"; static char fmt_9992[] = "(\002 FOR BETA \002,7(\002(\002,f4" ".1,\002,\002,f4.1,\002) \002,:))"; static char fmt_9984[] = "(\002 ERROR-EXITS WILL NOT BE TESTED\002)"; static char fmt_9999[] = "(\002 ROUTINES PASS COMPUTATIONAL TESTS IF TES" "T RATIO IS LES\002,\002S THAN\002,f8.2)"; static char fmt_9988[] = "(a6,l2)"; static char fmt_9990[] = "(\002 SUBPROGRAM NAME \002,a6,\002 NOT RECOGNI" "ZED\002,/\002 ******* T\002,\002ESTS ABANDONED *******\002)"; static char fmt_9998[] = "(\002 RELATIVE MACHINE PRECISION IS TAKEN TO" " BE\002,1p,d9.1)"; static char fmt_9989[] = "(\002 ERROR IN ZMMCH - IN-LINE DOT PRODUCTS A" "RE BEING EVALU\002,\002ATED WRONGLY.\002,/\002 ZMMCH WAS CALLED " "WITH TRANSA = \002,a1,\002 AND TRANSB = \002,a1,/\002 AND RETURN" "ED SAME = \002,l1,\002 AND \002,\002ERR = \002,f12.3,\002.\002," "/\002 THIS MAY BE DUE TO FAULTS IN THE \002,\002ARITHMETIC OR TH" "E COMPILER.\002,/\002 ******* TESTS ABANDONED \002,\002******" "*\002)"; static char fmt_9987[] = "(1x,a6,\002 WAS NOT TESTED\002)"; static char fmt_9986[] = "(/\002 END OF TESTS\002)"; static char fmt_9985[] = "(/\002 ******* FATAL ERROR - TESTS ABANDONED *" "******\002)"; static char fmt_9991[] = "(\002 AMEND DATA FILE OR INCREASE ARRAY SIZES " "IN PROGRAM\002,/\002 ******* TESTS ABANDONED *******\002)"; /* System generated locals */ integer i__1, i__2, i__3, i__4, i__5; olist o__1; cllist cl__1; /* Builtin functions */ integer s_rsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_rsle(void), f_open(olist *), s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), s_wsle(cilist *), e_wsle(void), s_rsfe(cilist *), e_rsfe(void), s_cmp(const char *, const char *, ftnlen, ftnlen); /* Subroutine */ int s_stop(char *, ftnlen); integer f_clos(cllist *); /* Subroutine */ int s_copy(char *, const char *, ftnlen, ftnlen); /* Local variables */ doublecomplex c__[4225] /* was [65][65] */; doublereal g[65]; integer i__, j, n; doublecomplex w[130], aa[4225], ab[8450] /* was [65][130] */, bb[4225], cc[4225], as[4225], bs[4225], cs[4225], ct[65], alf[7], bet[7]; doublereal eps, err; extern logical lze_(doublecomplex *, doublecomplex *, integer *); integer nalf, idim[9]; logical same; integer nbet, ntra; logical rewi; integer nout; extern /* Subroutine */ int zchk1_(char *, doublereal *, doublereal *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex * , doublecomplex *, doublecomplex *, doublecomplex *, doublereal *, ftnlen), zchk2_(char *, doublereal *, doublereal *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublereal *, ftnlen), zchk3_(char *, doublereal *, doublereal *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex * , doublecomplex *, doublecomplex *, doublereal *, doublecomplex *, ftnlen), zchk4_(char *, doublereal *, doublereal *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublereal *, ftnlen), zchk5_(char *, doublereal *, doublereal *, integer *, integer *, logical *, logical *, logical *, integer *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *, doublereal *, doublecomplex *, ftnlen); logical fatal, trace; integer nidim; extern /* Subroutine */ int zchke_(integer *, char *, integer *, ftnlen), zmmch_(char *, char *, integer *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, doublereal *, doublecomplex *, integer *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen, ftnlen); char snaps[32]; integer isnum; logical ltest[9], sfatal; char snamet[6], transa[1], transb[1]; doublereal thresh; logical ltestt, tsterr; char summry[32]; extern double d_epsilon_(doublereal *); /* Fortran I/O blocks */ static cilist io___2 = { 0, 5, 0, 0, 0 }; static cilist io___4 = { 0, 5, 0, 0, 0 }; static cilist io___6 = { 0, 5, 0, 0, 0 }; static cilist io___8 = { 0, 5, 0, 0, 0 }; static cilist io___11 = { 0, 5, 0, 0, 0 }; static cilist io___13 = { 0, 5, 0, 0, 0 }; static cilist io___15 = { 0, 5, 0, 0, 0 }; static cilist io___17 = { 0, 5, 0, 0, 0 }; static cilist io___19 = { 0, 5, 0, 0, 0 }; static cilist io___21 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___22 = { 0, 5, 0, 0, 0 }; static cilist io___25 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___26 = { 0, 5, 0, 0, 0 }; static cilist io___28 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___29 = { 0, 5, 0, 0, 0 }; static cilist io___31 = { 0, 5, 0, 0, 0 }; static cilist io___33 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___34 = { 0, 5, 0, 0, 0 }; static cilist io___36 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___37 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___38 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___39 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___40 = { 0, 0, 0, 0, 0 }; static cilist io___41 = { 0, 0, 0, fmt_9984, 0 }; static cilist io___42 = { 0, 0, 0, 0, 0 }; static cilist io___43 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___44 = { 0, 0, 0, 0, 0 }; static cilist io___46 = { 0, 5, 1, fmt_9988, 0 }; static cilist io___49 = { 0, 0, 0, fmt_9990, 0 }; static cilist io___51 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___64 = { 0, 0, 0, fmt_9989, 0 }; static cilist io___65 = { 0, 0, 0, fmt_9989, 0 }; static cilist io___66 = { 0, 0, 0, fmt_9989, 0 }; static cilist io___67 = { 0, 0, 0, fmt_9989, 0 }; static cilist io___69 = { 0, 0, 0, 0, 0 }; static cilist io___70 = { 0, 0, 0, fmt_9987, 0 }; static cilist io___71 = { 0, 0, 0, 0, 0 }; static cilist io___78 = { 0, 0, 0, fmt_9986, 0 }; static cilist io___79 = { 0, 0, 0, fmt_9985, 0 }; static cilist io___80 = { 0, 0, 0, fmt_9991, 0 }; /* -- Reference BLAS test routine (version 3.4.1) -- */ /* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* April 2012 */ /* ===================================================================== */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* .. Executable Statements .. */ /* Read name and unit number for summary output file and open file. */ s_rsle(&io___2); do_lio(&c__9, &c__1, summry, (ftnlen)32); e_rsle(); s_rsle(&io___4); do_lio(&c__3, &c__1, (char *)&nout, (ftnlen)sizeof(integer)); e_rsle(); o__1.oerr = 0; o__1.ounit = nout; o__1.ofnmlen = 32; o__1.ofnm = summry; o__1.orl = 0; o__1.osta = "UNKNOWN"; o__1.oacc = 0; o__1.ofm = 0; o__1.oblnk = 0; f_open(&o__1); infoc_1.noutc = nout; /* Read name and unit number for snapshot output file and open file. */ s_rsle(&io___6); do_lio(&c__9, &c__1, snaps, (ftnlen)32); e_rsle(); s_rsle(&io___8); do_lio(&c__3, &c__1, (char *)&ntra, (ftnlen)sizeof(integer)); e_rsle(); trace = ntra >= 0; if (trace) { o__1.oerr = 0; o__1.ounit = ntra; o__1.ofnmlen = 32; o__1.ofnm = snaps; o__1.orl = 0; o__1.osta = "UNKNOWN"; o__1.oacc = 0; o__1.ofm = 0; o__1.oblnk = 0; f_open(&o__1); } /* Read the flag that directs rewinding of the snapshot file. */ s_rsle(&io___11); do_lio(&c__8, &c__1, (char *)&rewi, (ftnlen)sizeof(logical)); e_rsle(); rewi = rewi && trace; /* Read the flag that directs stopping on any failure. */ s_rsle(&io___13); do_lio(&c__8, &c__1, (char *)&sfatal, (ftnlen)sizeof(logical)); e_rsle(); /* Read the flag that indicates whether error exits are to be tested. */ s_rsle(&io___15); do_lio(&c__8, &c__1, (char *)&tsterr, (ftnlen)sizeof(logical)); e_rsle(); /* Read the threshold value of the test ratio */ s_rsle(&io___17); do_lio(&c__5, &c__1, (char *)&thresh, (ftnlen)sizeof(doublereal)); e_rsle(); /* Read and check the parameter values for the tests. */ /* Values of N */ s_rsle(&io___19); do_lio(&c__3, &c__1, (char *)&nidim, (ftnlen)sizeof(integer)); e_rsle(); if (nidim < 1 || nidim > 9) { io___21.ciunit = nout; s_wsfe(&io___21); do_fio(&c__1, "N", (ftnlen)1); do_fio(&c__1, (char *)&c__9, (ftnlen)sizeof(integer)); e_wsfe(); goto L220; } s_rsle(&io___22); i__1 = nidim; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__3, &c__1, (char *)&idim[i__ - 1], (ftnlen)sizeof(integer)); } e_rsle(); i__1 = nidim; for (i__ = 1; i__ <= i__1; ++i__) { if (idim[i__ - 1] < 0 || idim[i__ - 1] > 65) { io___25.ciunit = nout; s_wsfe(&io___25); do_fio(&c__1, (char *)&c__65, (ftnlen)sizeof(integer)); e_wsfe(); goto L220; } /* L10: */ } /* Values of ALPHA */ s_rsle(&io___26); do_lio(&c__3, &c__1, (char *)&nalf, (ftnlen)sizeof(integer)); e_rsle(); if (nalf < 1 || nalf > 7) { io___28.ciunit = nout; s_wsfe(&io___28); do_fio(&c__1, "ALPHA", (ftnlen)5); do_fio(&c__1, (char *)&c__7, (ftnlen)sizeof(integer)); e_wsfe(); goto L220; } s_rsle(&io___29); i__1 = nalf; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__7, &c__1, (char *)&alf[i__ - 1], (ftnlen)sizeof( doublecomplex)); } e_rsle(); /* Values of BETA */ s_rsle(&io___31); do_lio(&c__3, &c__1, (char *)&nbet, (ftnlen)sizeof(integer)); e_rsle(); if (nbet < 1 || nbet > 7) { io___33.ciunit = nout; s_wsfe(&io___33); do_fio(&c__1, "BETA", (ftnlen)4); do_fio(&c__1, (char *)&c__7, (ftnlen)sizeof(integer)); e_wsfe(); goto L220; } s_rsle(&io___34); i__1 = nbet; for (i__ = 1; i__ <= i__1; ++i__) { do_lio(&c__7, &c__1, (char *)&bet[i__ - 1], (ftnlen)sizeof( doublecomplex)); } e_rsle(); /* Report values of parameters. */ io___36.ciunit = nout; s_wsfe(&io___36); e_wsfe(); io___37.ciunit = nout; s_wsfe(&io___37); i__1 = nidim; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__1, (char *)&idim[i__ - 1], (ftnlen)sizeof(integer)); } e_wsfe(); io___38.ciunit = nout; s_wsfe(&io___38); i__1 = nalf; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__2, (char *)&alf[i__ - 1], (ftnlen)sizeof(doublereal)); } e_wsfe(); io___39.ciunit = nout; s_wsfe(&io___39); i__1 = nbet; for (i__ = 1; i__ <= i__1; ++i__) { do_fio(&c__2, (char *)&bet[i__ - 1], (ftnlen)sizeof(doublereal)); } e_wsfe(); if (! tsterr) { io___40.ciunit = nout; s_wsle(&io___40); e_wsle(); io___41.ciunit = nout; s_wsfe(&io___41); e_wsfe(); } io___42.ciunit = nout; s_wsle(&io___42); e_wsle(); io___43.ciunit = nout; s_wsfe(&io___43); do_fio(&c__1, (char *)&thresh, (ftnlen)sizeof(doublereal)); e_wsfe(); io___44.ciunit = nout; s_wsle(&io___44); e_wsle(); /* Read names of subroutines and flags which indicate */ /* whether they are to be tested. */ for (i__ = 1; i__ <= 9; ++i__) { ltest[i__ - 1] = FALSE_; /* L20: */ } L30: i__1 = s_rsfe(&io___46); if (i__1 != 0) { goto L60; } i__1 = do_fio(&c__1, snamet, (ftnlen)6); if (i__1 != 0) { goto L60; } i__1 = do_fio(&c__1, (char *)<estt, (ftnlen)sizeof(logical)); if (i__1 != 0) { goto L60; } i__1 = e_rsfe(); if (i__1 != 0) { goto L60; } for (i__ = 1; i__ <= 9; ++i__) { if (s_cmp(snamet, snames + (i__ - 1) * 6, (ftnlen)6, (ftnlen)6) == 0) { goto L50; } /* L40: */ } io___49.ciunit = nout; s_wsfe(&io___49); do_fio(&c__1, snamet, (ftnlen)6); e_wsfe(); s_stop("", (ftnlen)0); L50: ltest[i__ - 1] = ltestt; goto L30; L60: cl__1.cerr = 0; cl__1.cunit = 5; cl__1.csta = 0; f_clos(&cl__1); /* Compute EPS (the machine precision). */ eps = d_epsilon_(&c_b88); io___51.ciunit = nout; s_wsfe(&io___51); do_fio(&c__1, (char *)&eps, (ftnlen)sizeof(doublereal)); e_wsfe(); /* Check the reliability of ZMMCH using exact data. */ n = 32; i__1 = n; for (j = 1; j <= i__1; ++j) { i__2 = n; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__ + j * 65 - 66; /* Computing MAX */ i__5 = i__ - j + 1; i__4 = max(i__5,0); ab[i__3].r = (doublereal) i__4, ab[i__3].i = 0.; /* L90: */ } i__2 = j + 4224; ab[i__2].r = (doublereal) j, ab[i__2].i = 0.; i__2 = (j + 65) * 65 - 65; ab[i__2].r = (doublereal) j, ab[i__2].i = 0.; i__2 = j - 1; c__[i__2].r = 0., c__[i__2].i = 0.; /* L100: */ } i__1 = n; for (j = 1; j <= i__1; ++j) { i__2 = j - 1; i__3 = j * ((j + 1) * j) / 2 - (j + 1) * j * (j - 1) / 3; cc[i__2].r = (doublereal) i__3, cc[i__2].i = 0.; /* L110: */ } /* CC holds the exact result. On exit from ZMMCH CT holds */ /* the result computed by ZMMCH. */ *(unsigned char *)transa = 'N'; *(unsigned char *)transb = 'N'; zmmch_(transa, transb, &n, &c__1, &n, &c_b2, ab, &c__65, &ab[4225], & c__65, &c_b1, c__, &c__65, ct, g, cc, &c__65, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1, (ftnlen)1); same = lze_(cc, ct, &n); if (! same || err != 0.) { io___64.ciunit = nout; s_wsfe(&io___64); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, transb, (ftnlen)1); do_fio(&c__1, (char *)&same, (ftnlen)sizeof(logical)); do_fio(&c__1, (char *)&err, (ftnlen)sizeof(doublereal)); e_wsfe(); s_stop("", (ftnlen)0); } *(unsigned char *)transb = 'C'; zmmch_(transa, transb, &n, &c__1, &n, &c_b2, ab, &c__65, &ab[4225], & c__65, &c_b1, c__, &c__65, ct, g, cc, &c__65, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1, (ftnlen)1); same = lze_(cc, ct, &n); if (! same || err != 0.) { io___65.ciunit = nout; s_wsfe(&io___65); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, transb, (ftnlen)1); do_fio(&c__1, (char *)&same, (ftnlen)sizeof(logical)); do_fio(&c__1, (char *)&err, (ftnlen)sizeof(doublereal)); e_wsfe(); s_stop("", (ftnlen)0); } i__1 = n; for (j = 1; j <= i__1; ++j) { i__2 = j + 4224; i__3 = n - j + 1; ab[i__2].r = (doublereal) i__3, ab[i__2].i = 0.; i__2 = (j + 65) * 65 - 65; i__3 = n - j + 1; ab[i__2].r = (doublereal) i__3, ab[i__2].i = 0.; /* L120: */ } i__1 = n; for (j = 1; j <= i__1; ++j) { i__2 = n - j; i__3 = j * ((j + 1) * j) / 2 - (j + 1) * j * (j - 1) / 3; cc[i__2].r = (doublereal) i__3, cc[i__2].i = 0.; /* L130: */ } *(unsigned char *)transa = 'C'; *(unsigned char *)transb = 'N'; zmmch_(transa, transb, &n, &c__1, &n, &c_b2, ab, &c__65, &ab[4225], & c__65, &c_b1, c__, &c__65, ct, g, cc, &c__65, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1, (ftnlen)1); same = lze_(cc, ct, &n); if (! same || err != 0.) { io___66.ciunit = nout; s_wsfe(&io___66); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, transb, (ftnlen)1); do_fio(&c__1, (char *)&same, (ftnlen)sizeof(logical)); do_fio(&c__1, (char *)&err, (ftnlen)sizeof(doublereal)); e_wsfe(); s_stop("", (ftnlen)0); } *(unsigned char *)transb = 'C'; zmmch_(transa, transb, &n, &c__1, &n, &c_b2, ab, &c__65, &ab[4225], & c__65, &c_b1, c__, &c__65, ct, g, cc, &c__65, &eps, &err, &fatal, &nout, &c_true, (ftnlen)1, (ftnlen)1); same = lze_(cc, ct, &n); if (! same || err != 0.) { io___67.ciunit = nout; s_wsfe(&io___67); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, transb, (ftnlen)1); do_fio(&c__1, (char *)&same, (ftnlen)sizeof(logical)); do_fio(&c__1, (char *)&err, (ftnlen)sizeof(doublereal)); e_wsfe(); s_stop("", (ftnlen)0); } /* Test each subroutine in turn. */ for (isnum = 1; isnum <= 9; ++isnum) { io___69.ciunit = nout; s_wsle(&io___69); e_wsle(); if (! ltest[isnum - 1]) { /* Subprogram is not to be tested. */ io___70.ciunit = nout; s_wsfe(&io___70); do_fio(&c__1, snames + (isnum - 1) * 6, (ftnlen)6); e_wsfe(); } else { s_copy(srnamc_1.srnamt, snames + (isnum - 1) * 6, (ftnlen)6, ( ftnlen)6); /* Test error exits. */ if (tsterr) { zchke_(&isnum, snames + (isnum - 1) * 6, &nout, (ftnlen)6); io___71.ciunit = nout; s_wsle(&io___71); e_wsle(); } /* Test computations. */ infoc_1.infot = 0; infoc_1.ok = TRUE_; fatal = FALSE_; switch (isnum) { case 1: goto L140; case 2: goto L150; case 3: goto L150; case 4: goto L160; case 5: goto L160; case 6: goto L170; case 7: goto L170; case 8: goto L180; case 9: goto L180; } /* Test ZGEMM, 01. */ L140: zchk1_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, bet, &c__65, ab, aa, as, &ab[4225], bb, bs, c__, cc, cs, ct, g, (ftnlen)6); goto L190; /* Test ZHEMM, 02, ZSYMM, 03. */ L150: zchk2_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, bet, &c__65, ab, aa, as, &ab[4225], bb, bs, c__, cc, cs, ct, g, (ftnlen)6); goto L190; /* Test ZTRMM, 04, ZTRSM, 05. */ L160: zchk3_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &c__65, ab, aa, as, &ab[4225], bb, bs, ct, g, c__, (ftnlen)6); goto L190; /* Test ZHERK, 06, ZSYRK, 07. */ L170: zchk4_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, bet, &c__65, ab, aa, as, &ab[4225], bb, bs, c__, cc, cs, ct, g, (ftnlen)6); goto L190; /* Test ZHER2K, 08, ZSYR2K, 09. */ L180: zchk5_(snames + (isnum - 1) * 6, &eps, &thresh, &nout, &ntra, & trace, &rewi, &fatal, &nidim, idim, &nalf, alf, &nbet, bet, &c__65, ab, aa, as, bb, bs, c__, cc, cs, ct, g, w, ( ftnlen)6); goto L190; L190: if (fatal && sfatal) { goto L210; } } /* L200: */ } io___78.ciunit = nout; s_wsfe(&io___78); e_wsfe(); goto L230; L210: io___79.ciunit = nout; s_wsfe(&io___79); e_wsfe(); goto L230; L220: io___80.ciunit = nout; s_wsfe(&io___80); e_wsfe(); L230: if (trace) { cl__1.cerr = 0; cl__1.cunit = ntra; cl__1.csta = 0; f_clos(&cl__1); } cl__1.cerr = 0; cl__1.cunit = nout; cl__1.csta = 0; f_clos(&cl__1); s_stop("", (ftnlen)0); /* End of ZBLAT3. */ return 0; } /* main */ /* Subroutine */ int zchk1_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * fatal, integer *nidim, integer *idim, integer *nalf, doublecomplex * alf, integer *nbet, doublecomplex *bet, integer *nmax, doublecomplex * a, doublecomplex *aa, doublecomplex *as, doublecomplex *b, doublecomplex *bb, doublecomplex *bs, doublecomplex *c__, doublecomplex *cc, doublecomplex *cs, doublecomplex *ct, doublereal * g, ftnlen sname_len) { /* Initialized data */ static char ich[3] = "NTC"; /* Format strings */ static char fmt_9995[] = "(1x,i6,\002: \002,a6,\002('\002,a1,\002','\002" ",a1,\002',\002,3(i3,\002,\002),\002(\002,f4.1,\002,\002,f4.1," "\002), A,\002,i3,\002, B,\002,i3,\002,(\002,f4.1,\002,\002,f4.1" ",\002), C,\002,i3,\002).\002)"; static char fmt_9994[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8; alist al__1; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, k, m, n, ia, ib, ma, mb, na, nb, nc, ik, im, in, ks, ms, ns, ica, icb, laa, lbb, lda, lcc, ldb, ldc; doublecomplex als, bls; doublereal err; extern logical lze_(doublecomplex *, doublecomplex *, integer *); doublecomplex beta; integer ldas, ldbs, ldcs; logical same, null; doublecomplex alpha; logical isame[13], trana, tranb; extern /* Subroutine */ int zmake_(char *, char *, char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, logical *, doublecomplex *, ftnlen, ftnlen, ftnlen); integer nargs; extern /* Subroutine */ int zmmch_(char *, char *, integer *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, doublereal *, doublecomplex *, integer *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen, ftnlen), zgemm_(char *, char *, integer *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen); logical reset; char tranas[1], tranbs[1], transa[1], transb[1]; doublereal errmax; extern logical lzeres_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen); /* Fortran I/O blocks */ static cilist io___124 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___125 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___128 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___130 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___131 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___132 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___133 = { 0, 0, 0, fmt_9995, 0 }; /* Tests ZGEMM. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; --bet; --g; --ct; --cs; --cc; c_dim1 = *nmax; c_offset = 1 + c_dim1; c__ -= c_offset; --bs; --bb; b_dim1 = *nmax; b_offset = 1 + b_dim1; b -= b_offset; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ /* .. Executable Statements .. */ nargs = 13; nc = 0; reset = TRUE_; errmax = 0.; i__1 = *nidim; for (im = 1; im <= i__1; ++im) { m = idim[im]; i__2 = *nidim; for (in = 1; in <= i__2; ++in) { n = idim[in]; /* Set LDC to 1 more than minimum value if room. */ ldc = m; if (ldc < *nmax) { ++ldc; } /* Skip tests if not enough room. */ if (ldc > *nmax) { goto L100; } lcc = ldc * n; null = n <= 0 || m <= 0; i__3 = *nidim; for (ik = 1; ik <= i__3; ++ik) { k = idim[ik]; for (ica = 1; ica <= 3; ++ica) { *(unsigned char *)transa = *(unsigned char *)&ich[ica - 1] ; trana = *(unsigned char *)transa == 'T' || *(unsigned char *)transa == 'C'; if (trana) { ma = k; na = m; } else { ma = m; na = k; } /* Set LDA to 1 more than minimum value if room. */ lda = ma; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L80; } laa = lda * na; /* Generate the matrix A. */ zmake_("GE", " ", " ", &ma, &na, &a[a_offset], nmax, &aa[ 1], &lda, &reset, &c_b1, (ftnlen)2, (ftnlen)1, ( ftnlen)1); for (icb = 1; icb <= 3; ++icb) { *(unsigned char *)transb = *(unsigned char *)&ich[icb - 1]; tranb = *(unsigned char *)transb == 'T' || *(unsigned char *)transb == 'C'; if (tranb) { mb = n; nb = k; } else { mb = k; nb = n; } /* Set LDB to 1 more than minimum value if room. */ ldb = mb; if (ldb < *nmax) { ++ldb; } /* Skip tests if not enough room. */ if (ldb > *nmax) { goto L70; } lbb = ldb * nb; /* Generate the matrix B. */ zmake_("GE", " ", " ", &mb, &nb, &b[b_offset], nmax, & bb[1], &ldb, &reset, &c_b1, (ftnlen)2, ( ftnlen)1, (ftnlen)1); i__4 = *nalf; for (ia = 1; ia <= i__4; ++ia) { i__5 = ia; alpha.r = alf[i__5].r, alpha.i = alf[i__5].i; i__5 = *nbet; for (ib = 1; ib <= i__5; ++ib) { i__6 = ib; beta.r = bet[i__6].r, beta.i = bet[i__6].i; /* Generate the matrix C. */ zmake_("GE", " ", " ", &m, &n, &c__[c_offset], nmax, &cc[1], &ldc, &reset, &c_b1, ( ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the */ /* subroutine. */ *(unsigned char *)tranas = *(unsigned char *) transa; *(unsigned char *)tranbs = *(unsigned char *) transb; ms = m; ns = n; ks = k; als.r = alpha.r, als.i = alpha.i; i__6 = laa; for (i__ = 1; i__ <= i__6; ++i__) { i__7 = i__; i__8 = i__; as[i__7].r = aa[i__8].r, as[i__7].i = aa[ i__8].i; /* L10: */ } ldas = lda; i__6 = lbb; for (i__ = 1; i__ <= i__6; ++i__) { i__7 = i__; i__8 = i__; bs[i__7].r = bb[i__8].r, bs[i__7].i = bb[ i__8].i; /* L20: */ } ldbs = ldb; bls.r = beta.r, bls.i = beta.i; i__6 = lcc; for (i__ = 1; i__ <= i__6; ++i__) { i__7 = i__; i__8 = i__; cs[i__7].r = cc[i__8].r, cs[i__7].i = cc[ i__8].i; /* L30: */ } ldcs = ldc; /* Call the subroutine. */ if (*trace) { io___124.ciunit = *ntra; s_wsfe(&io___124); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, transb, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof( integer)); do_fio(&c__2, (char *)&alpha, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen) sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&ldc, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } zgemm_(transa, transb, &m, &n, &k, &alpha, & aa[1], &lda, &bb[1], &ldb, &beta, &cc[ 1], &ldc, (ftnlen)1, (ftnlen)1); /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___125.ciunit = *nout; s_wsfe(&io___125); e_wsfe(); *fatal = TRUE_; goto L120; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)transa == *( unsigned char *)tranas; isame[1] = *(unsigned char *)transb == *( unsigned char *)tranbs; isame[2] = ms == m; isame[3] = ns == n; isame[4] = ks == k; isame[5] = als.r == alpha.r && als.i == alpha.i; isame[6] = lze_(&as[1], &aa[1], &laa); isame[7] = ldas == lda; isame[8] = lze_(&bs[1], &bb[1], &lbb); isame[9] = ldbs == ldb; isame[10] = bls.r == beta.r && bls.i == beta.i; if (null) { isame[11] = lze_(&cs[1], &cc[1], &lcc); } else { isame[11] = lzeres_("GE", " ", &m, &n, & cs[1], &cc[1], &ldc, (ftnlen)2, ( ftnlen)1); } isame[12] = ldcs == ldc; /* If data was incorrectly changed, report */ /* and return. */ same = TRUE_; i__6 = nargs; for (i__ = 1; i__ <= i__6; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___128.ciunit = *nout; s_wsfe(&io___128); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L120; } if (! null) { /* Check the result. */ zmmch_(transa, transb, &m, &n, &k, &alpha, &a[a_offset], nmax, &b[b_offset], nmax, &beta, &c__[c_offset], nmax, &ct[1], &g[1], &cc[1], &ldc, eps, &err, fatal, nout, &c_true, (ftnlen)1, (ftnlen)1); errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L120; } } /* L50: */ } /* L60: */ } L70: ; } L80: ; } /* L90: */ } L100: ; } /* L110: */ } /* Report result. */ if (errmax < *thresh) { io___130.ciunit = *nout; s_wsfe(&io___130); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___131.ciunit = *nout; s_wsfe(&io___131); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(doublereal)); e_wsfe(); } goto L130; L120: io___132.ciunit = *nout; s_wsfe(&io___132); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); io___133.ciunit = *nout; s_wsfe(&io___133); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, transb, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof(integer)); e_wsfe(); L130: return 0; /* End of ZCHK1. */ } /* zchk1_ */ /* Subroutine */ int zchk2_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * fatal, integer *nidim, integer *idim, integer *nalf, doublecomplex * alf, integer *nbet, doublecomplex *bet, integer *nmax, doublecomplex * a, doublecomplex *aa, doublecomplex *as, doublecomplex *b, doublecomplex *bb, doublecomplex *bs, doublecomplex *c__, doublecomplex *cc, doublecomplex *cs, doublecomplex *ct, doublereal * g, ftnlen sname_len) { /* Initialized data */ static char ichs[2] = "LR"; static char ichu[2] = "UL"; /* Format strings */ static char fmt_9995[] = "(1x,i6,\002: \002,a6,\002(\002,2(\002'\002,a1" ",\002',\002),2(i3,\002,\002),\002(\002,f4.1,\002,\002,f4.1,\002)" ", A,\002,i3,\002, B,\002,i3,\002,(\002,f4.1,\002,\002,f4.1,\002)" ", C,\002,i3,\002) .\002)"; static char fmt_9994[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; alist al__1; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen), s_wsfe(cilist *), do_fio( integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, m, n, ia, ib, na, nc, im, in, ms, ns, laa, lbb, lda, lcc, ldb, ldc, ics; doublecomplex als, bls; integer icu; doublereal err; extern logical lze_(doublecomplex *, doublecomplex *, integer *); doublecomplex beta; integer ldas, ldbs, ldcs; logical same; char side[1]; logical conj, left, null; char uplo[1]; doublecomplex alpha; logical isame[13]; char sides[1]; extern /* Subroutine */ int zmake_(char *, char *, char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, logical *, doublecomplex *, ftnlen, ftnlen, ftnlen); integer nargs; extern /* Subroutine */ int zmmch_(char *, char *, integer *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, doublereal *, doublecomplex *, integer *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen, ftnlen), zhemm_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen); logical reset; char uplos[1]; extern /* Subroutine */ int zsymm_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen); doublereal errmax; extern logical lzeres_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen); /* Fortran I/O blocks */ static cilist io___172 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___173 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___176 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___178 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___179 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___180 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___181 = { 0, 0, 0, fmt_9995, 0 }; /* Tests ZHEMM and ZSYMM. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; --bet; --g; --ct; --cs; --cc; c_dim1 = *nmax; c_offset = 1 + c_dim1; c__ -= c_offset; --bs; --bb; b_dim1 = *nmax; b_offset = 1 + b_dim1; b -= b_offset; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ /* .. Executable Statements .. */ conj = s_cmp(sname + 1, "HE", (ftnlen)2, (ftnlen)2) == 0; nargs = 12; nc = 0; reset = TRUE_; errmax = 0.; i__1 = *nidim; for (im = 1; im <= i__1; ++im) { m = idim[im]; i__2 = *nidim; for (in = 1; in <= i__2; ++in) { n = idim[in]; /* Set LDC to 1 more than minimum value if room. */ ldc = m; if (ldc < *nmax) { ++ldc; } /* Skip tests if not enough room. */ if (ldc > *nmax) { goto L90; } lcc = ldc * n; null = n <= 0 || m <= 0; /* Set LDB to 1 more than minimum value if room. */ ldb = m; if (ldb < *nmax) { ++ldb; } /* Skip tests if not enough room. */ if (ldb > *nmax) { goto L90; } lbb = ldb * n; /* Generate the matrix B. */ zmake_("GE", " ", " ", &m, &n, &b[b_offset], nmax, &bb[1], &ldb, & reset, &c_b1, (ftnlen)2, (ftnlen)1, (ftnlen)1); for (ics = 1; ics <= 2; ++ics) { *(unsigned char *)side = *(unsigned char *)&ichs[ics - 1]; left = *(unsigned char *)side == 'L'; if (left) { na = m; } else { na = n; } /* Set LDA to 1 more than minimum value if room. */ lda = na; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L80; } laa = lda * na; for (icu = 1; icu <= 2; ++icu) { *(unsigned char *)uplo = *(unsigned char *)&ichu[icu - 1]; /* Generate the hermitian or symmetric matrix A. */ zmake_(sname + 1, uplo, " ", &na, &na, &a[a_offset], nmax, &aa[1], &lda, &reset, &c_b1, (ftnlen)2, (ftnlen) 1, (ftnlen)1); i__3 = *nalf; for (ia = 1; ia <= i__3; ++ia) { i__4 = ia; alpha.r = alf[i__4].r, alpha.i = alf[i__4].i; i__4 = *nbet; for (ib = 1; ib <= i__4; ++ib) { i__5 = ib; beta.r = bet[i__5].r, beta.i = bet[i__5].i; /* Generate the matrix C. */ zmake_("GE", " ", " ", &m, &n, &c__[c_offset], nmax, &cc[1], &ldc, &reset, &c_b1, ( ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the */ /* subroutine. */ *(unsigned char *)sides = *(unsigned char *)side; *(unsigned char *)uplos = *(unsigned char *)uplo; ms = m; ns = n; als.r = alpha.r, als.i = alpha.i; i__5 = laa; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; as[i__6].r = aa[i__7].r, as[i__6].i = aa[i__7] .i; /* L10: */ } ldas = lda; i__5 = lbb; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; bs[i__6].r = bb[i__7].r, bs[i__6].i = bb[i__7] .i; /* L20: */ } ldbs = ldb; bls.r = beta.r, bls.i = beta.i; i__5 = lcc; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; cs[i__6].r = cc[i__7].r, cs[i__6].i = cc[i__7] .i; /* L30: */ } ldcs = ldc; /* Call the subroutine. */ if (*trace) { io___172.ciunit = *ntra; s_wsfe(&io___172); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, side, (ftnlen)1); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof( doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&ldb, (ftnlen)sizeof( integer)); do_fio(&c__2, (char *)&beta, (ftnlen)sizeof( doublereal)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof( integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } if (conj) { zhemm_(side, uplo, &m, &n, &alpha, &aa[1], & lda, &bb[1], &ldb, &beta, &cc[1], & ldc, (ftnlen)1, (ftnlen)1); } else { zsymm_(side, uplo, &m, &n, &alpha, &aa[1], & lda, &bb[1], &ldb, &beta, &cc[1], & ldc, (ftnlen)1, (ftnlen)1); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___173.ciunit = *nout; s_wsfe(&io___173); e_wsfe(); *fatal = TRUE_; goto L110; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)sides == *(unsigned char *)side; isame[1] = *(unsigned char *)uplos == *(unsigned char *)uplo; isame[2] = ms == m; isame[3] = ns == n; isame[4] = als.r == alpha.r && als.i == alpha.i; isame[5] = lze_(&as[1], &aa[1], &laa); isame[6] = ldas == lda; isame[7] = lze_(&bs[1], &bb[1], &lbb); isame[8] = ldbs == ldb; isame[9] = bls.r == beta.r && bls.i == beta.i; if (null) { isame[10] = lze_(&cs[1], &cc[1], &lcc); } else { isame[10] = lzeres_("GE", " ", &m, &n, &cs[1], &cc[1], &ldc, (ftnlen)2, (ftnlen)1); } isame[11] = ldcs == ldc; /* If data was incorrectly changed, report and */ /* return. */ same = TRUE_; i__5 = nargs; for (i__ = 1; i__ <= i__5; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___176.ciunit = *nout; s_wsfe(&io___176); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L110; } if (! null) { /* Check the result. */ if (left) { zmmch_("N", "N", &m, &n, &m, &alpha, &a[ a_offset], nmax, &b[b_offset], nmax, &beta, &c__[c_offset], nmax, &ct[1], &g[1], &cc[1], &ldc, eps, &err, fatal, nout, &c_true, ( ftnlen)1, (ftnlen)1); } else { zmmch_("N", "N", &m, &n, &n, &alpha, &b[ b_offset], nmax, &a[a_offset], nmax, &beta, &c__[c_offset], nmax, &ct[1], &g[1], &cc[1], &ldc, eps, &err, fatal, nout, &c_true, ( ftnlen)1, (ftnlen)1); } errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L110; } } /* L50: */ } /* L60: */ } /* L70: */ } L80: ; } L90: ; } /* L100: */ } /* Report result. */ if (errmax < *thresh) { io___178.ciunit = *nout; s_wsfe(&io___178); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___179.ciunit = *nout; s_wsfe(&io___179); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(doublereal)); e_wsfe(); } goto L120; L110: io___180.ciunit = *nout; s_wsfe(&io___180); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); io___181.ciunit = *nout; s_wsfe(&io___181); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, side, (ftnlen)1); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof(integer)); e_wsfe(); L120: return 0; /* End of ZCHK2. */ } /* zchk2_ */ /* Subroutine */ int zchk3_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * fatal, integer *nidim, integer *idim, integer *nalf, doublecomplex * alf, integer *nmax, doublecomplex *a, doublecomplex *aa, doublecomplex *as, doublecomplex *b, doublecomplex *bb, doublecomplex *bs, doublecomplex *ct, doublereal *g, doublecomplex *c__, ftnlen sname_len) { /* Initialized data */ static char ichu[2] = "UL"; static char icht[3] = "NTC"; static char ichd[2] = "UN"; static char ichs[2] = "LR"; /* Format strings */ static char fmt_9995[] = "(1x,i6,\002: \002,a6,\002(\002,4(\002'\002,a1" ",\002',\002),2(i3,\002,\002),\002(\002,f4.1,\002,\002,f4.1,\002)" ", A,\002,i3,\002, B,\002,i3,\002) \002,\002 .\002)"; static char fmt_9994[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; doublecomplex z__1; alist al__1; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen), s_wsfe(cilist *), do_fio( integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, j, m, n, ia, na, nc, im, in, ms, ns, laa, icd, lbb, lda, ldb, ics; doublecomplex als; integer ict, icu; doublereal err; extern logical lze_(doublecomplex *, doublecomplex *, integer *); char diag[1]; integer ldas, ldbs; logical same; char side[1]; logical left, null; char uplo[1]; doublecomplex alpha; char diags[1]; logical isame[13]; char sides[1]; extern /* Subroutine */ int zmake_(char *, char *, char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, logical *, doublecomplex *, ftnlen, ftnlen, ftnlen); integer nargs; extern /* Subroutine */ int zmmch_(char *, char *, integer *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, doublereal *, doublecomplex *, integer *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen, ftnlen); logical reset; char uplos[1]; extern /* Subroutine */ int ztrmm_(char *, char *, char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen, ftnlen), ztrsm_(char *, char *, char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen, ftnlen); char tranas[1], transa[1]; doublereal errmax; extern logical lzeres_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen); /* Fortran I/O blocks */ static cilist io___222 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___223 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___224 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___227 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___229 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___230 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___231 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___232 = { 0, 0, 0, fmt_9995, 0 }; /* Tests ZTRMM and ZTRSM. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; c_dim1 = *nmax; c_offset = 1 + c_dim1; c__ -= c_offset; --g; --ct; --bs; --bb; b_dim1 = *nmax; b_offset = 1 + b_dim1; b -= b_offset; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ /* .. Executable Statements .. */ nargs = 11; nc = 0; reset = TRUE_; errmax = 0.; /* Set up zero matrix for ZMMCH. */ i__1 = *nmax; for (j = 1; j <= i__1; ++j) { i__2 = *nmax; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__ + j * c_dim1; c__[i__3].r = 0., c__[i__3].i = 0.; /* L10: */ } /* L20: */ } i__1 = *nidim; for (im = 1; im <= i__1; ++im) { m = idim[im]; i__2 = *nidim; for (in = 1; in <= i__2; ++in) { n = idim[in]; /* Set LDB to 1 more than minimum value if room. */ ldb = m; if (ldb < *nmax) { ++ldb; } /* Skip tests if not enough room. */ if (ldb > *nmax) { goto L130; } lbb = ldb * n; null = m <= 0 || n <= 0; for (ics = 1; ics <= 2; ++ics) { *(unsigned char *)side = *(unsigned char *)&ichs[ics - 1]; left = *(unsigned char *)side == 'L'; if (left) { na = m; } else { na = n; } /* Set LDA to 1 more than minimum value if room. */ lda = na; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L130; } laa = lda * na; for (icu = 1; icu <= 2; ++icu) { *(unsigned char *)uplo = *(unsigned char *)&ichu[icu - 1]; for (ict = 1; ict <= 3; ++ict) { *(unsigned char *)transa = *(unsigned char *)&icht[ ict - 1]; for (icd = 1; icd <= 2; ++icd) { *(unsigned char *)diag = *(unsigned char *)&ichd[ icd - 1]; i__3 = *nalf; for (ia = 1; ia <= i__3; ++ia) { i__4 = ia; alpha.r = alf[i__4].r, alpha.i = alf[i__4].i; /* Generate the matrix A. */ zmake_("TR", uplo, diag, &na, &na, &a[ a_offset], nmax, &aa[1], &lda, &reset, &c_b1, (ftnlen)2, (ftnlen)1, (ftnlen) 1); /* Generate the matrix B. */ zmake_("GE", " ", " ", &m, &n, &b[b_offset], nmax, &bb[1], &ldb, &reset, &c_b1, ( ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the */ /* subroutine. */ *(unsigned char *)sides = *(unsigned char *) side; *(unsigned char *)uplos = *(unsigned char *) uplo; *(unsigned char *)tranas = *(unsigned char *) transa; *(unsigned char *)diags = *(unsigned char *) diag; ms = m; ns = n; als.r = alpha.r, als.i = alpha.i; i__4 = laa; for (i__ = 1; i__ <= i__4; ++i__) { i__5 = i__; i__6 = i__; as[i__5].r = aa[i__6].r, as[i__5].i = aa[ i__6].i; /* L30: */ } ldas = lda; i__4 = lbb; for (i__ = 1; i__ <= i__4; ++i__) { i__5 = i__; i__6 = i__; bs[i__5].r = bb[i__6].r, bs[i__5].i = bb[ i__6].i; /* L40: */ } ldbs = ldb; /* Call the subroutine. */ if (s_cmp(sname + 3, "MM", (ftnlen)2, (ftnlen) 2) == 0) { if (*trace) { io___222.ciunit = *ntra; s_wsfe(&io___222); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, side, (ftnlen)1); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } ztrmm_(side, uplo, transa, diag, &m, &n, & alpha, &aa[1], &lda, &bb[1], &ldb, (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); } else if (s_cmp(sname + 3, "SM", (ftnlen)2, ( ftnlen)2) == 0) { if (*trace) { io___223.ciunit = *ntra; s_wsfe(&io___223); do_fio(&c__1, (char *)&nc, (ftnlen) sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, side, (ftnlen)1); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen) sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } ztrsm_(side, uplo, transa, diag, &m, &n, & alpha, &aa[1], &lda, &bb[1], &ldb, (ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___224.ciunit = *nout; s_wsfe(&io___224); e_wsfe(); *fatal = TRUE_; goto L150; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)sides == *( unsigned char *)side; isame[1] = *(unsigned char *)uplos == *( unsigned char *)uplo; isame[2] = *(unsigned char *)tranas == *( unsigned char *)transa; isame[3] = *(unsigned char *)diags == *( unsigned char *)diag; isame[4] = ms == m; isame[5] = ns == n; isame[6] = als.r == alpha.r && als.i == alpha.i; isame[7] = lze_(&as[1], &aa[1], &laa); isame[8] = ldas == lda; if (null) { isame[9] = lze_(&bs[1], &bb[1], &lbb); } else { isame[9] = lzeres_("GE", " ", &m, &n, &bs[ 1], &bb[1], &ldb, (ftnlen)2, ( ftnlen)1); } isame[10] = ldbs == ldb; /* If data was incorrectly changed, report and */ /* return. */ same = TRUE_; i__4 = nargs; for (i__ = 1; i__ <= i__4; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___227.ciunit = *nout; s_wsfe(&io___227); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L50: */ } if (! same) { *fatal = TRUE_; goto L150; } if (! null) { if (s_cmp(sname + 3, "MM", (ftnlen)2, ( ftnlen)2) == 0) { /* Check the result. */ if (left) { zmmch_(transa, "N", &m, &n, &m, & alpha, &a[a_offset], nmax, &b[b_offset], nmax, & c_b1, &c__[c_offset], nmax, &ct[1], &g[1], &bb[ 1], &ldb, eps, &err, fatal, nout, &c_true, ( ftnlen)1, (ftnlen)1); } else { zmmch_("N", transa, &m, &n, &n, & alpha, &b[b_offset], nmax, &a[a_offset], nmax, & c_b1, &c__[c_offset], nmax, &ct[1], &g[1], &bb[ 1], &ldb, eps, &err, fatal, nout, &c_true, ( ftnlen)1, (ftnlen)1); } } else if (s_cmp(sname + 3, "SM", (ftnlen) 2, (ftnlen)2) == 0) { /* Compute approximation to original */ /* matrix. */ i__4 = n; for (j = 1; j <= i__4; ++j) { i__5 = m; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__ + j * c_dim1; i__7 = i__ + (j - 1) * ldb; c__[i__6].r = bb[i__7].r, c__[i__6].i = bb[i__7].i; i__6 = i__ + (j - 1) * ldb; i__7 = i__ + j * b_dim1; z__1.r = alpha.r * b[i__7].r - alpha.i * b[i__7].i, z__1.i = alpha.r * b[i__7].i + alpha.i * b[ i__7].r; bb[i__6].r = z__1.r, bb[i__6].i = z__1.i; /* L60: */ } /* L70: */ } if (left) { zmmch_(transa, "N", &m, &n, &m, & c_b2, &a[a_offset], nmax, &c__[c_offset], nmax, & c_b1, &b[b_offset], nmax, &ct[1], &g[1], &bb[1], & ldb, eps, &err, fatal, nout, &c_false, (ftnlen)1, (ftnlen)1); } else { zmmch_("N", transa, &m, &n, &n, & c_b2, &c__[c_offset], nmax, &a[a_offset], nmax, &c_b1, &b[b_offset], nmax, &ct[1], &g[1], &bb[1], & ldb, eps, &err, fatal, nout, &c_false, (ftnlen)1, (ftnlen)1); } } errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L150; } } /* L80: */ } /* L90: */ } /* L100: */ } /* L110: */ } /* L120: */ } L130: ; } /* L140: */ } /* Report result. */ if (errmax < *thresh) { io___229.ciunit = *nout; s_wsfe(&io___229); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___230.ciunit = *nout; s_wsfe(&io___230); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(doublereal)); e_wsfe(); } goto L160; L150: io___231.ciunit = *nout; s_wsfe(&io___231); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); io___232.ciunit = *nout; s_wsfe(&io___232); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, side, (ftnlen)1); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, transa, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen)sizeof(integer)); e_wsfe(); L160: return 0; /* End of ZCHK3. */ } /* zchk3_ */ /* Subroutine */ int zchk4_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * fatal, integer *nidim, integer *idim, integer *nalf, doublecomplex * alf, integer *nbet, doublecomplex *bet, integer *nmax, doublecomplex * a, doublecomplex *aa, doublecomplex *as, doublecomplex *b, doublecomplex *bb, doublecomplex *bs, doublecomplex *c__, doublecomplex *cc, doublecomplex *cs, doublecomplex *ct, doublereal * g, ftnlen sname_len) { /* Initialized data */ static char icht[2] = "NC"; static char ichu[2] = "UL"; /* Format strings */ static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002(\002,2(\002'\002,a1" ",\002',\002),2(i3,\002,\002),f4.1,\002, A,\002,i3,\002,\002,f4.1," "\002, C,\002,i3,\002) \002,\002 .\002)"; static char fmt_9993[] = "(1x,i6,\002: \002,a6,\002(\002,2(\002'\002,a1" ",\002',\002),2(i3,\002,\002),\002(\002,f4.1,\002,\002,f4.1,\002)" " , A,\002,i3,\002,(\002,f4.1,\002,\002,f4.1,\002), C,\002,i3," "\002) .\002)"; static char fmt_9992[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9995[] = "(\002 THESE ARE THE RESULTS FOR COLUMN" " \002,i3)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; doublecomplex z__1; alist al__1; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen), s_wsfe(cilist *), do_fio( integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); /* Local variables */ integer i__, j, k, n, ia, ib, jc, ma, na, nc, ik, in, jj, lj, ks, ns, laa, lda, lcc, ldc; doublecomplex als; integer ict, icu; doublereal err; extern logical lze_(doublecomplex *, doublecomplex *, integer *); doublecomplex beta; integer ldas, ldcs; logical same, conj; doublecomplex bets; doublereal rals; logical tran, null; char uplo[1]; doublecomplex alpha; doublereal rbeta; logical isame[13]; extern /* Subroutine */ int zmake_(char *, char *, char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, logical *, doublecomplex *, ftnlen, ftnlen, ftnlen); integer nargs; extern /* Subroutine */ int zmmch_(char *, char *, integer *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, doublereal *, doublecomplex *, integer *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen, ftnlen); doublereal rbets; logical reset; extern /* Subroutine */ int zherk_(char *, char *, integer *, integer *, doublereal *, doublecomplex *, integer *, doublereal *, doublecomplex *, integer *, ftnlen, ftnlen); char trans[1]; logical upper; char uplos[1]; extern /* Subroutine */ int zsyrk_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen); doublereal ralpha, errmax; extern logical lzeres_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen); char transs[1], transt[1]; /* Fortran I/O blocks */ static cilist io___274 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___275 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___276 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___279 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___286 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___287 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___288 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___289 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___290 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___291 = { 0, 0, 0, fmt_9993, 0 }; /* Tests ZHERK and ZSYRK. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; --bet; --g; --ct; --cs; --cc; c_dim1 = *nmax; c_offset = 1 + c_dim1; c__ -= c_offset; --bs; --bb; b_dim1 = *nmax; b_offset = 1 + b_dim1; b -= b_offset; --as; --aa; a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ /* .. Executable Statements .. */ conj = s_cmp(sname + 1, "HE", (ftnlen)2, (ftnlen)2) == 0; nargs = 10; nc = 0; reset = TRUE_; errmax = 0.; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; /* Set LDC to 1 more than minimum value if room. */ ldc = n; if (ldc < *nmax) { ++ldc; } /* Skip tests if not enough room. */ if (ldc > *nmax) { goto L100; } lcc = ldc * n; i__2 = *nidim; for (ik = 1; ik <= i__2; ++ik) { k = idim[ik]; for (ict = 1; ict <= 2; ++ict) { *(unsigned char *)trans = *(unsigned char *)&icht[ict - 1]; tran = *(unsigned char *)trans == 'C'; if (tran && ! conj) { *(unsigned char *)trans = 'T'; } if (tran) { ma = k; na = n; } else { ma = n; na = k; } /* Set LDA to 1 more than minimum value if room. */ lda = ma; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L80; } laa = lda * na; /* Generate the matrix A. */ zmake_("GE", " ", " ", &ma, &na, &a[a_offset], nmax, &aa[1], & lda, &reset, &c_b1, (ftnlen)2, (ftnlen)1, (ftnlen)1); for (icu = 1; icu <= 2; ++icu) { *(unsigned char *)uplo = *(unsigned char *)&ichu[icu - 1]; upper = *(unsigned char *)uplo == 'U'; i__3 = *nalf; for (ia = 1; ia <= i__3; ++ia) { i__4 = ia; alpha.r = alf[i__4].r, alpha.i = alf[i__4].i; if (conj) { ralpha = alpha.r; z__1.r = ralpha, z__1.i = 0.; alpha.r = z__1.r, alpha.i = z__1.i; } i__4 = *nbet; for (ib = 1; ib <= i__4; ++ib) { i__5 = ib; beta.r = bet[i__5].r, beta.i = bet[i__5].i; if (conj) { rbeta = beta.r; z__1.r = rbeta, z__1.i = 0.; beta.r = z__1.r, beta.i = z__1.i; } null = n <= 0; if (conj) { null = null || (k <= 0 || ralpha == 0.) && rbeta == 1.; } /* Generate the matrix C. */ zmake_(sname + 1, uplo, " ", &n, &n, &c__[ c_offset], nmax, &cc[1], &ldc, &reset, & c_b1, (ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the subroutine. */ *(unsigned char *)uplos = *(unsigned char *)uplo; *(unsigned char *)transs = *(unsigned char *) trans; ns = n; ks = k; if (conj) { rals = ralpha; } else { als.r = alpha.r, als.i = alpha.i; } i__5 = laa; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; as[i__6].r = aa[i__7].r, as[i__6].i = aa[i__7] .i; /* L10: */ } ldas = lda; if (conj) { rbets = rbeta; } else { bets.r = beta.r, bets.i = beta.i; } i__5 = lcc; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; cs[i__6].r = cc[i__7].r, cs[i__6].i = cc[i__7] .i; /* L20: */ } ldcs = ldc; /* Call the subroutine. */ if (conj) { if (*trace) { io___274.ciunit = *ntra; s_wsfe(&io___274); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&ralpha, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&rbeta, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&ldc, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } zherk_(uplo, trans, &n, &k, &ralpha, &aa[1], & lda, &rbeta, &cc[1], &ldc, (ftnlen)1, (ftnlen)1); } else { if (*trace) { io___275.ciunit = *ntra; s_wsfe(&io___275); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof( integer)); do_fio(&c__2, (char *)&alpha, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&ldc, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } zsyrk_(uplo, trans, &n, &k, &alpha, &aa[1], & lda, &beta, &cc[1], &ldc, (ftnlen)1, ( ftnlen)1); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___276.ciunit = *nout; s_wsfe(&io___276); e_wsfe(); *fatal = TRUE_; goto L120; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)uplos == *(unsigned char *)uplo; isame[1] = *(unsigned char *)transs == *(unsigned char *)trans; isame[2] = ns == n; isame[3] = ks == k; if (conj) { isame[4] = rals == ralpha; } else { isame[4] = als.r == alpha.r && als.i == alpha.i; } isame[5] = lze_(&as[1], &aa[1], &laa); isame[6] = ldas == lda; if (conj) { isame[7] = rbets == rbeta; } else { isame[7] = bets.r == beta.r && bets.i == beta.i; } if (null) { isame[8] = lze_(&cs[1], &cc[1], &lcc); } else { isame[8] = lzeres_(sname + 1, uplo, &n, &n, & cs[1], &cc[1], &ldc, (ftnlen)2, ( ftnlen)1); } isame[9] = ldcs == ldc; /* If data was incorrectly changed, report and */ /* return. */ same = TRUE_; i__5 = nargs; for (i__ = 1; i__ <= i__5; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___279.ciunit = *nout; s_wsfe(&io___279); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L30: */ } if (! same) { *fatal = TRUE_; goto L120; } if (! null) { /* Check the result column by column. */ if (conj) { *(unsigned char *)transt = 'C'; } else { *(unsigned char *)transt = 'T'; } jc = 1; i__5 = n; for (j = 1; j <= i__5; ++j) { if (upper) { jj = 1; lj = j; } else { jj = j; lj = n - j + 1; } if (tran) { zmmch_(transt, "N", &lj, &c__1, &k, & alpha, &a[jj * a_dim1 + 1], nmax, &a[j * a_dim1 + 1], nmax, &beta, &c__[jj + j * c_dim1], nmax, &ct[1], &g[1], &cc[jc], &ldc, eps, &err, fatal, nout, &c_true, (ftnlen) 1, (ftnlen)1); } else { zmmch_("N", transt, &lj, &c__1, &k, & alpha, &a[jj + a_dim1], nmax, &a[j + a_dim1], nmax, &beta, & c__[jj + j * c_dim1], nmax, & ct[1], &g[1], &cc[jc], &ldc, eps, &err, fatal, nout, & c_true, (ftnlen)1, (ftnlen)1); } if (upper) { jc += ldc; } else { jc = jc + ldc + 1; } errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L110; } /* L40: */ } } /* L50: */ } /* L60: */ } /* L70: */ } L80: ; } /* L90: */ } L100: ; } /* Report result. */ if (errmax < *thresh) { io___286.ciunit = *nout; s_wsfe(&io___286); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___287.ciunit = *nout; s_wsfe(&io___287); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(doublereal)); e_wsfe(); } goto L130; L110: if (n > 1) { io___288.ciunit = *nout; s_wsfe(&io___288); do_fio(&c__1, (char *)&j, (ftnlen)sizeof(integer)); e_wsfe(); } L120: io___289.ciunit = *nout; s_wsfe(&io___289); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); if (conj) { io___290.ciunit = *nout; s_wsfe(&io___290); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ralpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&rbeta, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___291.ciunit = *nout; s_wsfe(&io___291); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof(integer)); e_wsfe(); } L130: return 0; /* End of ZCHK4. */ } /* zchk4_ */ /* Subroutine */ int zchk5_(char *sname, doublereal *eps, doublereal *thresh, integer *nout, integer *ntra, logical *trace, logical *rewi, logical * fatal, integer *nidim, integer *idim, integer *nalf, doublecomplex * alf, integer *nbet, doublecomplex *bet, integer *nmax, doublecomplex * ab, doublecomplex *aa, doublecomplex *as, doublecomplex *bb, doublecomplex *bs, doublecomplex *c__, doublecomplex *cc, doublecomplex *cs, doublecomplex *ct, doublereal *g, doublecomplex *w, ftnlen sname_len) { /* Initialized data */ static char icht[2] = "NC"; static char ichu[2] = "UL"; /* Format strings */ static char fmt_9994[] = "(1x,i6,\002: \002,a6,\002(\002,2(\002'\002,a1" ",\002',\002),2(i3,\002,\002),\002(\002,f4.1,\002,\002,f4.1,\002)" ", A,\002,i3,\002, B,\002,i3,\002,\002,f4.1,\002, C,\002,i3,\002)" " .\002)"; static char fmt_9993[] = "(1x,i6,\002: \002,a6,\002(\002,2(\002'\002,a1" ",\002',\002),2(i3,\002,\002),\002(\002,f4.1,\002,\002,f4.1,\002)" ", A,\002,i3,\002, B,\002,i3,\002,(\002,f4.1,\002,\002,f4.1,\002)" ", C,\002,i3,\002) .\002)"; static char fmt_9992[] = "(\002 ******* FATAL ERROR - ERROR-EXIT TAKEN O" "N VALID CALL *\002,\002******\002)"; static char fmt_9998[] = "(\002 ******* FATAL ERROR - PARAMETER NUMBER" " \002,i2,\002 WAS CH\002,\002ANGED INCORRECTLY *******\002)"; static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE COMPUTATIONAL TE" "STS (\002,i6,\002 CALL\002,\002S)\002)"; static char fmt_9997[] = "(\002 \002,a6,\002 COMPLETED THE COMPUTATIONAL" " TESTS (\002,i6,\002 C\002,\002ALLS)\002,/\002 ******* BUT WITH " "MAXIMUM TEST RATIO\002,f8.2,\002 - SUSPECT *******\002)"; static char fmt_9995[] = "(\002 THESE ARE THE RESULTS FOR COLUMN" " \002,i3)"; static char fmt_9996[] = "(\002 ******* \002,a6,\002 FAILED ON CALL NUMB" "ER:\002)"; /* System generated locals */ integer c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8; doublecomplex z__1, z__2; alist al__1; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen), s_wsfe(cilist *), do_fio( integer *, char *, ftnlen), e_wsfe(void), f_rew(alist *); void d_cnjg(doublecomplex *, const doublecomplex *); /* Local variables */ integer i__, j, k, n, ia, ib, jc, ma, na, nc, ik, in, jj, lj, ks, ns, laa, lbb, lda, lcc, ldb, ldc; doublecomplex als; integer ict, icu; doublereal err; extern logical lze_(doublecomplex *, doublecomplex *, integer *); integer jjab; doublecomplex beta; integer ldas, ldbs, ldcs; logical same, conj; doublecomplex bets; logical tran, null; char uplo[1]; doublecomplex alpha; doublereal rbeta; logical isame[13]; extern /* Subroutine */ int zmake_(char *, char *, char *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, integer *, logical *, doublecomplex *, ftnlen, ftnlen, ftnlen); integer nargs; extern /* Subroutine */ int zmmch_(char *, char *, integer *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, doublereal *, doublecomplex *, integer *, doublereal *, doublereal *, logical *, integer *, logical *, ftnlen, ftnlen); doublereal rbets; logical reset; char trans[1]; logical upper; char uplos[1]; extern /* Subroutine */ int zher2k_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublereal *, doublecomplex *, integer *, ftnlen, ftnlen), zsyr2k_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen); doublereal errmax; extern logical lzeres_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen); char transs[1], transt[1]; /* Fortran I/O blocks */ static cilist io___334 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___335 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___336 = { 0, 0, 0, fmt_9992, 0 }; static cilist io___339 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___347 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___348 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___349 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___350 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___351 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___352 = { 0, 0, 0, fmt_9993, 0 }; /* Tests ZHER2K and ZSYR2K. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Data statements .. */ /* Parameter adjustments */ --idim; --alf; --bet; --w; --g; --ct; --cs; --cc; c_dim1 = *nmax; c_offset = 1 + c_dim1; c__ -= c_offset; --bs; --bb; --as; --aa; --ab; /* Function Body */ /* .. Executable Statements .. */ conj = s_cmp(sname + 1, "HE", (ftnlen)2, (ftnlen)2) == 0; nargs = 12; nc = 0; reset = TRUE_; errmax = 0.; i__1 = *nidim; for (in = 1; in <= i__1; ++in) { n = idim[in]; /* Set LDC to 1 more than minimum value if room. */ ldc = n; if (ldc < *nmax) { ++ldc; } /* Skip tests if not enough room. */ if (ldc > *nmax) { goto L130; } lcc = ldc * n; i__2 = *nidim; for (ik = 1; ik <= i__2; ++ik) { k = idim[ik]; for (ict = 1; ict <= 2; ++ict) { *(unsigned char *)trans = *(unsigned char *)&icht[ict - 1]; tran = *(unsigned char *)trans == 'C'; if (tran && ! conj) { *(unsigned char *)trans = 'T'; } if (tran) { ma = k; na = n; } else { ma = n; na = k; } /* Set LDA to 1 more than minimum value if room. */ lda = ma; if (lda < *nmax) { ++lda; } /* Skip tests if not enough room. */ if (lda > *nmax) { goto L110; } laa = lda * na; /* Generate the matrix A. */ if (tran) { i__3 = *nmax << 1; zmake_("GE", " ", " ", &ma, &na, &ab[1], &i__3, &aa[1], & lda, &reset, &c_b1, (ftnlen)2, (ftnlen)1, (ftnlen) 1); } else { zmake_("GE", " ", " ", &ma, &na, &ab[1], nmax, &aa[1], & lda, &reset, &c_b1, (ftnlen)2, (ftnlen)1, (ftnlen) 1); } /* Generate the matrix B. */ ldb = lda; lbb = laa; if (tran) { i__3 = *nmax << 1; zmake_("GE", " ", " ", &ma, &na, &ab[k + 1], &i__3, &bb[1] , &ldb, &reset, &c_b1, (ftnlen)2, (ftnlen)1, ( ftnlen)1); } else { zmake_("GE", " ", " ", &ma, &na, &ab[k * *nmax + 1], nmax, &bb[1], &ldb, &reset, &c_b1, (ftnlen)2, (ftnlen) 1, (ftnlen)1); } for (icu = 1; icu <= 2; ++icu) { *(unsigned char *)uplo = *(unsigned char *)&ichu[icu - 1]; upper = *(unsigned char *)uplo == 'U'; i__3 = *nalf; for (ia = 1; ia <= i__3; ++ia) { i__4 = ia; alpha.r = alf[i__4].r, alpha.i = alf[i__4].i; i__4 = *nbet; for (ib = 1; ib <= i__4; ++ib) { i__5 = ib; beta.r = bet[i__5].r, beta.i = bet[i__5].i; if (conj) { rbeta = beta.r; z__1.r = rbeta, z__1.i = 0.; beta.r = z__1.r, beta.i = z__1.i; } null = n <= 0; if (conj) { null = null || (k <= 0 || alpha.r == 0. && alpha.i == 0.) && rbeta == 1.; } /* Generate the matrix C. */ zmake_(sname + 1, uplo, " ", &n, &n, &c__[ c_offset], nmax, &cc[1], &ldc, &reset, & c_b1, (ftnlen)2, (ftnlen)1, (ftnlen)1); ++nc; /* Save every datum before calling the subroutine. */ *(unsigned char *)uplos = *(unsigned char *)uplo; *(unsigned char *)transs = *(unsigned char *) trans; ns = n; ks = k; als.r = alpha.r, als.i = alpha.i; i__5 = laa; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; as[i__6].r = aa[i__7].r, as[i__6].i = aa[i__7] .i; /* L10: */ } ldas = lda; i__5 = lbb; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; bs[i__6].r = bb[i__7].r, bs[i__6].i = bb[i__7] .i; /* L20: */ } ldbs = ldb; if (conj) { rbets = rbeta; } else { bets.r = beta.r, bets.i = beta.i; } i__5 = lcc; for (i__ = 1; i__ <= i__5; ++i__) { i__6 = i__; i__7 = i__; cs[i__6].r = cc[i__7].r, cs[i__6].i = cc[i__7] .i; /* L30: */ } ldcs = ldc; /* Call the subroutine. */ if (conj) { if (*trace) { io___334.ciunit = *ntra; s_wsfe(&io___334); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof( integer)); do_fio(&c__2, (char *)&alpha, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&rbeta, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&ldc, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } zher2k_(uplo, trans, &n, &k, &alpha, &aa[1], & lda, &bb[1], &ldb, &rbeta, &cc[1], & ldc, (ftnlen)1, (ftnlen)1); } else { if (*trace) { io___335.ciunit = *ntra; s_wsfe(&io___335); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof( integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof( integer)); do_fio(&c__2, (char *)&alpha, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen) sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen) sizeof(doublereal)); do_fio(&c__1, (char *)&ldc, (ftnlen) sizeof(integer)); e_wsfe(); } if (*rewi) { al__1.aerr = 0; al__1.aunit = *ntra; f_rew(&al__1); } zsyr2k_(uplo, trans, &n, &k, &alpha, &aa[1], & lda, &bb[1], &ldb, &beta, &cc[1], & ldc, (ftnlen)1, (ftnlen)1); } /* Check if error-exit was taken incorrectly. */ if (! infoc_1.ok) { io___336.ciunit = *nout; s_wsfe(&io___336); e_wsfe(); *fatal = TRUE_; goto L150; } /* See what data changed inside subroutines. */ isame[0] = *(unsigned char *)uplos == *(unsigned char *)uplo; isame[1] = *(unsigned char *)transs == *(unsigned char *)trans; isame[2] = ns == n; isame[3] = ks == k; isame[4] = als.r == alpha.r && als.i == alpha.i; isame[5] = lze_(&as[1], &aa[1], &laa); isame[6] = ldas == lda; isame[7] = lze_(&bs[1], &bb[1], &lbb); isame[8] = ldbs == ldb; if (conj) { isame[9] = rbets == rbeta; } else { isame[9] = bets.r == beta.r && bets.i == beta.i; } if (null) { isame[10] = lze_(&cs[1], &cc[1], &lcc); } else { isame[10] = lzeres_("HE", uplo, &n, &n, &cs[1] , &cc[1], &ldc, (ftnlen)2, (ftnlen)1); } isame[11] = ldcs == ldc; /* If data was incorrectly changed, report and */ /* return. */ same = TRUE_; i__5 = nargs; for (i__ = 1; i__ <= i__5; ++i__) { same = same && isame[i__ - 1]; if (! isame[i__ - 1]) { io___339.ciunit = *nout; s_wsfe(&io___339); do_fio(&c__1, (char *)&i__, (ftnlen) sizeof(integer)); e_wsfe(); } /* L40: */ } if (! same) { *fatal = TRUE_; goto L150; } if (! null) { /* Check the result column by column. */ if (conj) { *(unsigned char *)transt = 'C'; } else { *(unsigned char *)transt = 'T'; } jjab = 1; jc = 1; i__5 = n; for (j = 1; j <= i__5; ++j) { if (upper) { jj = 1; lj = j; } else { jj = j; lj = n - j + 1; } if (tran) { i__6 = k; for (i__ = 1; i__ <= i__6; ++i__) { i__7 = i__; i__8 = (j - 1 << 1) * *nmax + k + i__; z__1.r = alpha.r * ab[i__8].r - alpha.i * ab[i__8].i, z__1.i = alpha.r * ab[ i__8].i + alpha.i * ab[ i__8].r; w[i__7].r = z__1.r, w[i__7].i = z__1.i; if (conj) { i__7 = k + i__; d_cnjg(&z__2, &alpha); i__8 = (j - 1 << 1) * *nmax + i__; z__1.r = z__2.r * ab[i__8].r - z__2.i * ab[i__8].i, z__1.i = z__2.r * ab[i__8].i + z__2.i * ab[ i__8].r; w[i__7].r = z__1.r, w[i__7].i = z__1.i; } else { i__7 = k + i__; i__8 = (j - 1 << 1) * *nmax + i__; z__1.r = alpha.r * ab[i__8].r - alpha.i * ab[i__8] .i, z__1.i = alpha.r * ab[i__8].i + alpha.i * ab[i__8].r; w[i__7].r = z__1.r, w[i__7].i = z__1.i; } /* L50: */ } i__6 = k << 1; i__7 = *nmax << 1; i__8 = *nmax << 1; zmmch_(transt, "N", &lj, &c__1, &i__6, &c_b2, &ab[jjab], &i__7, &w[ 1], &i__8, &beta, &c__[jj + j * c_dim1], nmax, &ct[1], &g[1] , &cc[jc], &ldc, eps, &err, fatal, nout, &c_true, (ftnlen) 1, (ftnlen)1); } else { i__6 = k; for (i__ = 1; i__ <= i__6; ++i__) { if (conj) { i__7 = i__; d_cnjg(&z__2, &ab[(k + i__ - 1) * *nmax + j]); z__1.r = alpha.r * z__2.r - alpha.i * z__2.i, z__1.i = alpha.r * z__2.i + alpha.i * z__2.r; w[i__7].r = z__1.r, w[i__7].i = z__1.i; i__7 = k + i__; i__8 = (i__ - 1) * *nmax + j; z__2.r = alpha.r * ab[i__8].r - alpha.i * ab[i__8] .i, z__2.i = alpha.r * ab[i__8].i + alpha.i * ab[i__8].r; d_cnjg(&z__1, &z__2); w[i__7].r = z__1.r, w[i__7].i = z__1.i; } else { i__7 = i__; i__8 = (k + i__ - 1) * *nmax + j; z__1.r = alpha.r * ab[i__8].r - alpha.i * ab[i__8] .i, z__1.i = alpha.r * ab[i__8].i + alpha.i * ab[i__8].r; w[i__7].r = z__1.r, w[i__7].i = z__1.i; i__7 = k + i__; i__8 = (i__ - 1) * *nmax + j; z__1.r = alpha.r * ab[i__8].r - alpha.i * ab[i__8] .i, z__1.i = alpha.r * ab[i__8].i + alpha.i * ab[i__8].r; w[i__7].r = z__1.r, w[i__7].i = z__1.i; } /* L60: */ } i__6 = k << 1; i__7 = *nmax << 1; zmmch_("N", "N", &lj, &c__1, &i__6, & c_b2, &ab[jj], nmax, &w[1], & i__7, &beta, &c__[jj + j * c_dim1], nmax, &ct[1], &g[1], &cc[jc], &ldc, eps, &err, fatal, nout, &c_true, (ftnlen) 1, (ftnlen)1); } if (upper) { jc += ldc; } else { jc = jc + ldc + 1; if (tran) { jjab += *nmax << 1; } } errmax = max(errmax,err); /* If got really bad answer, report and */ /* return. */ if (*fatal) { goto L140; } /* L70: */ } } /* L80: */ } /* L90: */ } /* L100: */ } L110: ; } /* L120: */ } L130: ; } /* Report result. */ if (errmax < *thresh) { io___347.ciunit = *nout; s_wsfe(&io___347); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___348.ciunit = *nout; s_wsfe(&io___348); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&errmax, (ftnlen)sizeof(doublereal)); e_wsfe(); } goto L160; L140: if (n > 1) { io___349.ciunit = *nout; s_wsfe(&io___349); do_fio(&c__1, (char *)&j, (ftnlen)sizeof(integer)); e_wsfe(); } L150: io___350.ciunit = *nout; s_wsfe(&io___350); do_fio(&c__1, sname, (ftnlen)6); e_wsfe(); if (conj) { io___351.ciunit = *nout; s_wsfe(&io___351); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&rbeta, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___352.ciunit = *nout; s_wsfe(&io___352); do_fio(&c__1, (char *)&nc, (ftnlen)sizeof(integer)); do_fio(&c__1, sname, (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&alpha, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&lda, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ldb, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&beta, (ftnlen)sizeof(doublereal)); do_fio(&c__1, (char *)&ldc, (ftnlen)sizeof(integer)); e_wsfe(); } L160: return 0; /* End of ZCHK5. */ } /* zchk5_ */ /* Subroutine */ int zchke_(integer *isnum, char *srnamt, integer *nout, ftnlen srnamt_len) { /* Format strings */ static char fmt_9999[] = "(\002 \002,a6,\002 PASSED THE TESTS OF ERROR-E" "XITS\002)"; static char fmt_9998[] = "(\002 ******* \002,a6,\002 FAILED THE TESTS OF" " ERROR-EXITS *****\002,\002**\002)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Local variables */ doublecomplex a[2] /* was [2][1] */, b[2] /* was [2][1] */, c__[2] /* was [2][1] */, beta, alpha; doublereal rbeta; extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen), zhemm_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen), zherk_(char *, char *, integer *, integer *, doublereal *, doublecomplex *, integer *, doublereal *, doublecomplex *, integer *, ftnlen, ftnlen), ztrmm_(char *, char *, char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, ftnlen, ftnlen, ftnlen, ftnlen), zsymm_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen), ztrsm_(char *, char *, char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer * , doublecomplex *, integer *, ftnlen, ftnlen, ftnlen, ftnlen), zsyrk_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen), zher2k_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublereal *, doublecomplex *, integer *, ftnlen, ftnlen), zsyr2k_(char *, char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, ftnlen, ftnlen); doublereal ralpha; extern /* Subroutine */ int chkxer_(char *, integer *, integer *, logical *, logical *, ftnlen); /* Fortran I/O blocks */ static cilist io___360 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___361 = { 0, 0, 0, fmt_9998, 0 }; /* Tests the error exits from the Level 3 Blas. */ /* Requires a special version of the error-handling routine XERBLA. */ /* A, B and C should not need to be defined. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* 3-19-92: Initialize ALPHA, BETA, RALPHA, and RBETA (eca) */ /* 3-19-92: Fix argument 12 in calls to ZSYMM and ZHEMM */ /* with INFOT = 9 (eca) */ /* 10-9-00: Declared INTRINSIC DCMPLX (susan) */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. Local Arrays .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ /* OK is set to .FALSE. by the special version of XERBLA or by CHKXER */ /* if anything is wrong. */ infoc_1.ok = TRUE_; /* LERR is set to .TRUE. by the special version of XERBLA each time */ /* it is called, and is then tested and re-set by CHKXER. */ infoc_1.lerr = FALSE_; /* Initialize ALPHA, BETA, RALPHA, and RBETA. */ alpha.r = 1., alpha.i = -1.; beta.r = 2., beta.i = -2.; ralpha = 1.f; rbeta = 2.f; switch (*isnum) { case 1: goto L10; case 2: goto L20; case 3: goto L30; case 4: goto L40; case 5: goto L50; case 6: goto L60; case 7: goto L70; case 8: goto L80; case 9: goto L90; } L10: infoc_1.infot = 1; zgemm_("/", "N", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 1; zgemm_("/", "C", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 1; zgemm_("/", "T", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; zgemm_("N", "/", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; zgemm_("C", "/", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; zgemm_("T", "/", &c__0, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zgemm_("N", "N", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zgemm_("N", "C", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zgemm_("N", "T", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zgemm_("C", "N", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zgemm_("C", "C", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zgemm_("C", "T", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zgemm_("T", "N", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zgemm_("T", "C", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zgemm_("T", "T", &c_n1, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zgemm_("N", "N", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zgemm_("N", "C", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zgemm_("N", "T", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zgemm_("C", "N", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zgemm_("C", "C", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zgemm_("C", "T", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zgemm_("T", "N", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zgemm_("T", "C", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zgemm_("T", "T", &c__0, &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; zgemm_("N", "N", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; zgemm_("N", "C", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; zgemm_("N", "T", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; zgemm_("C", "N", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; zgemm_("C", "C", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; zgemm_("C", "T", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; zgemm_("T", "N", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; zgemm_("T", "C", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; zgemm_("T", "T", &c__0, &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; zgemm_("N", "N", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; zgemm_("N", "C", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; zgemm_("N", "T", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; zgemm_("C", "N", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__2, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; zgemm_("C", "C", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; zgemm_("C", "T", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; zgemm_("T", "N", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__2, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; zgemm_("T", "C", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 8; zgemm_("T", "T", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; zgemm_("N", "N", &c__0, &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; zgemm_("C", "N", &c__0, &c__0, &c__2, &alpha, a, &c__2, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; zgemm_("T", "N", &c__0, &c__0, &c__2, &alpha, a, &c__2, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; zgemm_("N", "C", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; zgemm_("C", "C", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; zgemm_("T", "C", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; zgemm_("N", "T", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; zgemm_("C", "T", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; zgemm_("T", "T", &c__0, &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; zgemm_("N", "N", &c__2, &c__0, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; zgemm_("N", "C", &c__2, &c__0, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; zgemm_("N", "T", &c__2, &c__0, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; zgemm_("C", "N", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; zgemm_("C", "C", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; zgemm_("C", "T", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; zgemm_("T", "N", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; zgemm_("T", "C", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 13; zgemm_("T", "T", &c__2, &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, &c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L100; L20: infoc_1.infot = 1; zhemm_("/", "U", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; zhemm_("L", "/", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zhemm_("L", "U", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zhemm_("R", "U", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zhemm_("L", "L", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zhemm_("R", "L", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zhemm_("L", "U", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zhemm_("R", "U", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zhemm_("L", "L", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zhemm_("R", "L", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zhemm_("L", "U", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zhemm_("R", "U", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zhemm_("L", "L", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zhemm_("R", "L", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; zhemm_("L", "U", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; zhemm_("R", "U", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; zhemm_("L", "L", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; zhemm_("R", "L", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; zhemm_("L", "U", &c__2, &c__0, &alpha, a, &c__2, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; zhemm_("R", "U", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; zhemm_("L", "L", &c__2, &c__0, &alpha, a, &c__2, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; zhemm_("R", "L", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L100; L30: infoc_1.infot = 1; zsymm_("/", "U", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; zsymm_("L", "/", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zsymm_("L", "U", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zsymm_("R", "U", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zsymm_("L", "L", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zsymm_("R", "L", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zsymm_("L", "U", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zsymm_("R", "U", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zsymm_("L", "L", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zsymm_("R", "L", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zsymm_("L", "U", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zsymm_("R", "U", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zsymm_("L", "L", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zsymm_("R", "L", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; zsymm_("L", "U", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; zsymm_("R", "U", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; zsymm_("L", "L", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; zsymm_("R", "L", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; zsymm_("L", "U", &c__2, &c__0, &alpha, a, &c__2, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; zsymm_("R", "U", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; zsymm_("L", "L", &c__2, &c__0, &alpha, a, &c__2, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; zsymm_("R", "L", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L100; L40: infoc_1.infot = 1; ztrmm_("/", "U", "N", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; ztrmm_("L", "/", "N", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ztrmm_("L", "U", "/", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ztrmm_("L", "U", "N", "/", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ztrmm_("L", "U", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ztrmm_("L", "U", "C", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ztrmm_("L", "U", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ztrmm_("R", "U", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ztrmm_("R", "U", "C", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ztrmm_("R", "U", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ztrmm_("L", "L", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ztrmm_("L", "L", "C", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ztrmm_("L", "L", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ztrmm_("R", "L", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ztrmm_("R", "L", "C", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ztrmm_("R", "L", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ztrmm_("L", "U", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ztrmm_("L", "U", "C", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ztrmm_("L", "U", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ztrmm_("R", "U", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ztrmm_("R", "U", "C", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ztrmm_("R", "U", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ztrmm_("L", "L", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ztrmm_("L", "L", "C", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ztrmm_("L", "L", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ztrmm_("R", "L", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ztrmm_("R", "L", "C", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ztrmm_("R", "L", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ztrmm_("L", "U", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ztrmm_("L", "U", "C", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ztrmm_("L", "U", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ztrmm_("R", "U", "N", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ztrmm_("R", "U", "C", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ztrmm_("R", "U", "T", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ztrmm_("L", "L", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ztrmm_("L", "L", "C", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ztrmm_("L", "L", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ztrmm_("R", "L", "N", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ztrmm_("R", "L", "C", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ztrmm_("R", "L", "T", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ztrmm_("L", "U", "N", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ztrmm_("L", "U", "C", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ztrmm_("L", "U", "T", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ztrmm_("R", "U", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ztrmm_("R", "U", "C", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ztrmm_("R", "U", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ztrmm_("L", "L", "N", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ztrmm_("L", "L", "C", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ztrmm_("L", "L", "T", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ztrmm_("R", "L", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ztrmm_("R", "L", "C", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ztrmm_("R", "L", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L100; L50: infoc_1.infot = 1; ztrsm_("/", "U", "N", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; ztrsm_("L", "/", "N", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; ztrsm_("L", "U", "/", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; ztrsm_("L", "U", "N", "/", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ztrsm_("L", "U", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ztrsm_("L", "U", "C", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ztrsm_("L", "U", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ztrsm_("R", "U", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ztrsm_("R", "U", "C", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ztrsm_("R", "U", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ztrsm_("L", "L", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ztrsm_("L", "L", "C", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ztrsm_("L", "L", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ztrsm_("R", "L", "N", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ztrsm_("R", "L", "C", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 5; ztrsm_("R", "L", "T", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ztrsm_("L", "U", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ztrsm_("L", "U", "C", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ztrsm_("L", "U", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ztrsm_("R", "U", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ztrsm_("R", "U", "C", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ztrsm_("R", "U", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ztrsm_("L", "L", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ztrsm_("L", "L", "C", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ztrsm_("L", "L", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ztrsm_("R", "L", "N", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ztrsm_("R", "L", "C", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 6; ztrsm_("R", "L", "T", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ztrsm_("L", "U", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ztrsm_("L", "U", "C", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ztrsm_("L", "U", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ztrsm_("R", "U", "N", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ztrsm_("R", "U", "C", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ztrsm_("R", "U", "T", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ztrsm_("L", "L", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ztrsm_("L", "L", "C", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ztrsm_("L", "L", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__2, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ztrsm_("R", "L", "N", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ztrsm_("R", "L", "C", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; ztrsm_("R", "L", "T", "N", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ztrsm_("L", "U", "N", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ztrsm_("L", "U", "C", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ztrsm_("L", "U", "T", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ztrsm_("R", "U", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ztrsm_("R", "U", "C", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ztrsm_("R", "U", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ztrsm_("L", "L", "N", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ztrsm_("L", "L", "C", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ztrsm_("L", "L", "T", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ztrsm_("R", "L", "N", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ztrsm_("R", "L", "C", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 11; ztrsm_("R", "L", "T", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, ( ftnlen)1, (ftnlen)1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L100; L60: infoc_1.infot = 1; zherk_("/", "N", &c__0, &c__0, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; zherk_("U", "T", &c__0, &c__0, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zherk_("U", "N", &c_n1, &c__0, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zherk_("U", "C", &c_n1, &c__0, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zherk_("L", "N", &c_n1, &c__0, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zherk_("L", "C", &c_n1, &c__0, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zherk_("U", "N", &c__0, &c_n1, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zherk_("U", "C", &c__0, &c_n1, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zherk_("L", "N", &c__0, &c_n1, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zherk_("L", "C", &c__0, &c_n1, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zherk_("U", "N", &c__2, &c__0, &ralpha, a, &c__1, &rbeta, c__, &c__2, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zherk_("U", "C", &c__0, &c__2, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zherk_("L", "N", &c__2, &c__0, &ralpha, a, &c__1, &rbeta, c__, &c__2, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zherk_("L", "C", &c__0, &c__2, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; zherk_("U", "N", &c__2, &c__0, &ralpha, a, &c__2, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; zherk_("U", "C", &c__2, &c__0, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; zherk_("L", "N", &c__2, &c__0, &ralpha, a, &c__2, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; zherk_("L", "C", &c__2, &c__0, &ralpha, a, &c__1, &rbeta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L100; L70: infoc_1.infot = 1; zsyrk_("/", "N", &c__0, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; zsyrk_("U", "C", &c__0, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zsyrk_("U", "N", &c_n1, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zsyrk_("U", "T", &c_n1, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zsyrk_("L", "N", &c_n1, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zsyrk_("L", "T", &c_n1, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zsyrk_("U", "N", &c__0, &c_n1, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zsyrk_("U", "T", &c__0, &c_n1, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zsyrk_("L", "N", &c__0, &c_n1, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zsyrk_("L", "T", &c__0, &c_n1, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zsyrk_("U", "N", &c__2, &c__0, &alpha, a, &c__1, &beta, c__, &c__2, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zsyrk_("U", "T", &c__0, &c__2, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zsyrk_("L", "N", &c__2, &c__0, &alpha, a, &c__1, &beta, c__, &c__2, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zsyrk_("L", "T", &c__0, &c__2, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; zsyrk_("U", "N", &c__2, &c__0, &alpha, a, &c__2, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; zsyrk_("U", "T", &c__2, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; zsyrk_("L", "N", &c__2, &c__0, &alpha, a, &c__2, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 10; zsyrk_("L", "T", &c__2, &c__0, &alpha, a, &c__1, &beta, c__, &c__1, ( ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L100; L80: infoc_1.infot = 1; zher2k_("/", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; zher2k_("U", "T", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zher2k_("U", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zher2k_("U", "C", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zher2k_("L", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zher2k_("L", "C", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zher2k_("U", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zher2k_("U", "C", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zher2k_("L", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zher2k_("L", "C", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zher2k_("U", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zher2k_("U", "C", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zher2k_("L", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zher2k_("L", "C", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; zher2k_("U", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, &rbeta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; zher2k_("U", "C", &c__0, &c__2, &alpha, a, &c__2, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; zher2k_("L", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, &rbeta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; zher2k_("L", "C", &c__0, &c__2, &alpha, a, &c__2, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; zher2k_("U", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__2, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; zher2k_("U", "C", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; zher2k_("L", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__2, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; zher2k_("L", "C", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &rbeta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); goto L100; L90: infoc_1.infot = 1; zsyr2k_("/", "N", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 2; zsyr2k_("U", "C", &c__0, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zsyr2k_("U", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zsyr2k_("U", "T", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zsyr2k_("L", "N", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 3; zsyr2k_("L", "T", &c_n1, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zsyr2k_("U", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zsyr2k_("U", "T", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zsyr2k_("L", "N", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 4; zsyr2k_("L", "T", &c__0, &c_n1, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zsyr2k_("U", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zsyr2k_("U", "T", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zsyr2k_("L", "N", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 7; zsyr2k_("L", "T", &c__0, &c__2, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; zsyr2k_("U", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; zsyr2k_("U", "T", &c__0, &c__2, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; zsyr2k_("L", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__2, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 9; zsyr2k_("L", "T", &c__0, &c__2, &alpha, a, &c__2, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; zsyr2k_("U", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; zsyr2k_("U", "T", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; zsyr2k_("L", "N", &c__2, &c__0, &alpha, a, &c__2, b, &c__2, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); infoc_1.infot = 12; zsyr2k_("L", "T", &c__2, &c__0, &alpha, a, &c__1, b, &c__1, &beta, c__, & c__1, (ftnlen)1, (ftnlen)1); chkxer_(srnamt, &infoc_1.infot, nout, &infoc_1.lerr, &infoc_1.ok, (ftnlen) 6); L100: if (infoc_1.ok) { io___360.ciunit = *nout; s_wsfe(&io___360); do_fio(&c__1, srnamt, (ftnlen)6); e_wsfe(); } else { io___361.ciunit = *nout; s_wsfe(&io___361); do_fio(&c__1, srnamt, (ftnlen)6); e_wsfe(); } return 0; /* End of ZCHKE. */ } /* zchke_ */ /* Subroutine */ int zmake_(char *type__, char *uplo, char *diag, integer *m, integer *n, doublecomplex *a, integer *nmax, doublecomplex *aa, integer *lda, logical *reset, doublecomplex *transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4; doublereal d__1; doublecomplex z__1, z__2; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen); void d_cnjg(doublecomplex *, const doublecomplex *); /* Local variables */ integer i__, j, jj; logical gen, her, tri, sym; integer ibeg, iend; extern /* Double Complex */ void zbeg_(doublecomplex *, logical *); logical unit, lower, upper; /* Generates values for an M by N matrix A. */ /* Stores the values in the array AA in the data structure required */ /* by the routine, with unwanted elements set to rogue value. */ /* TYPE is 'GE', 'HE', 'SY' or 'TR'. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. Intrinsic Functions .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *nmax; a_offset = 1 + a_dim1; a -= a_offset; --aa; /* Function Body */ gen = s_cmp(type__, "GE", (ftnlen)2, (ftnlen)2) == 0; her = s_cmp(type__, "HE", (ftnlen)2, (ftnlen)2) == 0; sym = s_cmp(type__, "SY", (ftnlen)2, (ftnlen)2) == 0; tri = s_cmp(type__, "TR", (ftnlen)2, (ftnlen)2) == 0; upper = (her || sym || tri) && *(unsigned char *)uplo == 'U'; lower = (her || sym || tri) && *(unsigned char *)uplo == 'L'; unit = tri && *(unsigned char *)diag == 'U'; /* Generate data in array A. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { if (gen || upper && i__ <= j || lower && i__ >= j) { i__3 = i__ + j * a_dim1; zbeg_(&z__2, reset); z__1.r = z__2.r + transl->r, z__1.i = z__2.i + transl->i; a[i__3].r = z__1.r, a[i__3].i = z__1.i; if (i__ != j) { /* Set some elements to zero */ if (*n > 3 && j == *n / 2) { i__3 = i__ + j * a_dim1; a[i__3].r = 0., a[i__3].i = 0.; } if (her) { i__3 = j + i__ * a_dim1; d_cnjg(&z__1, &a[i__ + j * a_dim1]); a[i__3].r = z__1.r, a[i__3].i = z__1.i; } else if (sym) { i__3 = j + i__ * a_dim1; i__4 = i__ + j * a_dim1; a[i__3].r = a[i__4].r, a[i__3].i = a[i__4].i; } else if (tri) { i__3 = j + i__ * a_dim1; a[i__3].r = 0., a[i__3].i = 0.; } } } /* L10: */ } if (her) { i__2 = j + j * a_dim1; i__3 = j + j * a_dim1; d__1 = a[i__3].r; z__1.r = d__1, z__1.i = 0.; a[i__2].r = z__1.r, a[i__2].i = z__1.i; } if (tri) { i__2 = j + j * a_dim1; i__3 = j + j * a_dim1; z__1.r = a[i__3].r + 1., z__1.i = a[i__3].i + 0.; a[i__2].r = z__1.r, a[i__2].i = z__1.i; } if (unit) { i__2 = j + j * a_dim1; a[i__2].r = 1., a[i__2].i = 0.; } /* L20: */ } /* Store elements in array AS in data structure required by routine. */ if (s_cmp(type__, "GE", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__ + (j - 1) * *lda; i__4 = i__ + j * a_dim1; aa[i__3].r = a[i__4].r, aa[i__3].i = a[i__4].i; /* L30: */ } i__2 = *lda; for (i__ = *m + 1; i__ <= i__2; ++i__) { i__3 = i__ + (j - 1) * *lda; aa[i__3].r = -1e10, aa[i__3].i = 1e10; /* L40: */ } /* L50: */ } } else if (s_cmp(type__, "HE", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(type__, "SY", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(type__, "TR", (ftnlen) 2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (upper) { ibeg = 1; if (unit) { iend = j - 1; } else { iend = j; } } else { if (unit) { ibeg = j + 1; } else { ibeg = j; } iend = *n; } i__2 = ibeg - 1; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__ + (j - 1) * *lda; aa[i__3].r = -1e10, aa[i__3].i = 1e10; /* L60: */ } i__2 = iend; for (i__ = ibeg; i__ <= i__2; ++i__) { i__3 = i__ + (j - 1) * *lda; i__4 = i__ + j * a_dim1; aa[i__3].r = a[i__4].r, aa[i__3].i = a[i__4].i; /* L70: */ } i__2 = *lda; for (i__ = iend + 1; i__ <= i__2; ++i__) { i__3 = i__ + (j - 1) * *lda; aa[i__3].r = -1e10, aa[i__3].i = 1e10; /* L80: */ } if (her) { jj = j + (j - 1) * *lda; i__2 = jj; i__3 = jj; d__1 = aa[i__3].r; z__1.r = d__1, z__1.i = -1e10; aa[i__2].r = z__1.r, aa[i__2].i = z__1.i; } /* L90: */ } } return 0; /* End of ZMAKE. */ } /* zmake_ */ /* Subroutine */ int zmmch_(char *transa, char *transb, integer *m, integer * n, integer *kk, doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublecomplex *beta, doublecomplex * c__, integer *ldc, doublecomplex *ct, doublereal *g, doublecomplex * cc, integer *ldcc, doublereal *eps, doublereal *err, logical *fatal, integer *nout, logical *mv, ftnlen transa_len, ftnlen transb_len) { /* Format strings */ static char fmt_9999[] = "(\002 ******* FATAL ERROR - COMPUTED RESULT IS" " LESS THAN HAL\002,\002F ACCURATE *******\002,/\002 " " EXPECTED RE\002,\002SULT COMPUTED R" "ESULT\002)"; static char fmt_9998[] = "(1x,i7,2(\002 (\002,g15.6,\002,\002,g15.6," "\002)\002))"; static char fmt_9997[] = "(\002 THESE ARE THE RESULTS FOR COLUMN" " \002,i3)"; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, cc_dim1, cc_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; doublereal d__1, d__2, d__3, d__4, d__5, d__6; doublecomplex z__1, z__2, z__3, z__4; /* Builtin functions */ double d_imag(const doublecomplex *); void d_cnjg(doublecomplex *, const doublecomplex *); double sqrt(doublereal); integer s_wsfe(cilist *), e_wsfe(void), do_fio(integer *, char *, ftnlen); /* Local variables */ integer i__, j, k; doublereal erri; logical trana, tranb, ctrana, ctranb; /* Fortran I/O blocks */ static cilist io___382 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___383 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___384 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___385 = { 0, 0, 0, fmt_9997, 0 }; /* Checks the results of the computational tests. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Parameters .. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Intrinsic Functions .. */ /* .. Statement Functions .. */ /* .. Statement Function definitions .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; c_dim1 = *ldc; c_offset = 1 + c_dim1; c__ -= c_offset; --ct; --g; cc_dim1 = *ldcc; cc_offset = 1 + cc_dim1; cc -= cc_offset; /* Function Body */ trana = *(unsigned char *)transa == 'T' || *(unsigned char *)transa == 'C'; tranb = *(unsigned char *)transb == 'T' || *(unsigned char *)transb == 'C'; ctrana = *(unsigned char *)transa == 'C'; ctranb = *(unsigned char *)transb == 'C'; /* Compute expected result, one column at a time, in CT using data */ /* in A, B and C. */ /* Compute gauges in G. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__; ct[i__3].r = 0., ct[i__3].i = 0.; g[i__] = 0.; /* L10: */ } if (! trana && ! tranb) { i__2 = *kk; for (k = 1; k <= i__2; ++k) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { i__4 = i__; i__5 = i__; i__6 = i__ + k * a_dim1; i__7 = k + j * b_dim1; z__2.r = a[i__6].r * b[i__7].r - a[i__6].i * b[i__7].i, z__2.i = a[i__6].r * b[i__7].i + a[i__6].i * b[ i__7].r; z__1.r = ct[i__5].r + z__2.r, z__1.i = ct[i__5].i + z__2.i; ct[i__4].r = z__1.r, ct[i__4].i = z__1.i; i__4 = i__ + k * a_dim1; i__5 = k + j * b_dim1; g[i__] += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = d_imag( &a[i__ + k * a_dim1]), abs(d__2))) * ((d__3 = b[ i__5].r, abs(d__3)) + (d__4 = d_imag(&b[k + j * b_dim1]), abs(d__4))); /* L20: */ } /* L30: */ } } else if (trana && ! tranb) { if (ctrana) { i__2 = *kk; for (k = 1; k <= i__2; ++k) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { i__4 = i__; i__5 = i__; d_cnjg(&z__3, &a[k + i__ * a_dim1]); i__6 = k + j * b_dim1; z__2.r = z__3.r * b[i__6].r - z__3.i * b[i__6].i, z__2.i = z__3.r * b[i__6].i + z__3.i * b[i__6] .r; z__1.r = ct[i__5].r + z__2.r, z__1.i = ct[i__5].i + z__2.i; ct[i__4].r = z__1.r, ct[i__4].i = z__1.i; i__4 = k + i__ * a_dim1; i__5 = k + j * b_dim1; g[i__] += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = d_imag(&a[k + i__ * a_dim1]), abs(d__2))) * (( d__3 = b[i__5].r, abs(d__3)) + (d__4 = d_imag( &b[k + j * b_dim1]), abs(d__4))); /* L40: */ } /* L50: */ } } else { i__2 = *kk; for (k = 1; k <= i__2; ++k) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { i__4 = i__; i__5 = i__; i__6 = k + i__ * a_dim1; i__7 = k + j * b_dim1; z__2.r = a[i__6].r * b[i__7].r - a[i__6].i * b[i__7] .i, z__2.i = a[i__6].r * b[i__7].i + a[i__6] .i * b[i__7].r; z__1.r = ct[i__5].r + z__2.r, z__1.i = ct[i__5].i + z__2.i; ct[i__4].r = z__1.r, ct[i__4].i = z__1.i; i__4 = k + i__ * a_dim1; i__5 = k + j * b_dim1; g[i__] += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = d_imag(&a[k + i__ * a_dim1]), abs(d__2))) * (( d__3 = b[i__5].r, abs(d__3)) + (d__4 = d_imag( &b[k + j * b_dim1]), abs(d__4))); /* L60: */ } /* L70: */ } } } else if (! trana && tranb) { if (ctranb) { i__2 = *kk; for (k = 1; k <= i__2; ++k) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { i__4 = i__; i__5 = i__; i__6 = i__ + k * a_dim1; d_cnjg(&z__3, &b[j + k * b_dim1]); z__2.r = a[i__6].r * z__3.r - a[i__6].i * z__3.i, z__2.i = a[i__6].r * z__3.i + a[i__6].i * z__3.r; z__1.r = ct[i__5].r + z__2.r, z__1.i = ct[i__5].i + z__2.i; ct[i__4].r = z__1.r, ct[i__4].i = z__1.i; i__4 = i__ + k * a_dim1; i__5 = j + k * b_dim1; g[i__] += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = d_imag(&a[i__ + k * a_dim1]), abs(d__2))) * (( d__3 = b[i__5].r, abs(d__3)) + (d__4 = d_imag( &b[j + k * b_dim1]), abs(d__4))); /* L80: */ } /* L90: */ } } else { i__2 = *kk; for (k = 1; k <= i__2; ++k) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { i__4 = i__; i__5 = i__; i__6 = i__ + k * a_dim1; i__7 = j + k * b_dim1; z__2.r = a[i__6].r * b[i__7].r - a[i__6].i * b[i__7] .i, z__2.i = a[i__6].r * b[i__7].i + a[i__6] .i * b[i__7].r; z__1.r = ct[i__5].r + z__2.r, z__1.i = ct[i__5].i + z__2.i; ct[i__4].r = z__1.r, ct[i__4].i = z__1.i; i__4 = i__ + k * a_dim1; i__5 = j + k * b_dim1; g[i__] += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = d_imag(&a[i__ + k * a_dim1]), abs(d__2))) * (( d__3 = b[i__5].r, abs(d__3)) + (d__4 = d_imag( &b[j + k * b_dim1]), abs(d__4))); /* L100: */ } /* L110: */ } } } else if (trana && tranb) { if (ctrana) { if (ctranb) { i__2 = *kk; for (k = 1; k <= i__2; ++k) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { i__4 = i__; i__5 = i__; d_cnjg(&z__3, &a[k + i__ * a_dim1]); d_cnjg(&z__4, &b[j + k * b_dim1]); z__2.r = z__3.r * z__4.r - z__3.i * z__4.i, z__2.i = z__3.r * z__4.i + z__3.i * z__4.r; z__1.r = ct[i__5].r + z__2.r, z__1.i = ct[i__5].i + z__2.i; ct[i__4].r = z__1.r, ct[i__4].i = z__1.i; i__4 = k + i__ * a_dim1; i__5 = j + k * b_dim1; g[i__] += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = d_imag(&a[k + i__ * a_dim1]), abs(d__2))) * ((d__3 = b[i__5].r, abs(d__3)) + (d__4 = d_imag(&b[j + k * b_dim1]), abs(d__4))); /* L120: */ } /* L130: */ } } else { i__2 = *kk; for (k = 1; k <= i__2; ++k) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { i__4 = i__; i__5 = i__; d_cnjg(&z__3, &a[k + i__ * a_dim1]); i__6 = j + k * b_dim1; z__2.r = z__3.r * b[i__6].r - z__3.i * b[i__6].i, z__2.i = z__3.r * b[i__6].i + z__3.i * b[ i__6].r; z__1.r = ct[i__5].r + z__2.r, z__1.i = ct[i__5].i + z__2.i; ct[i__4].r = z__1.r, ct[i__4].i = z__1.i; i__4 = k + i__ * a_dim1; i__5 = j + k * b_dim1; g[i__] += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = d_imag(&a[k + i__ * a_dim1]), abs(d__2))) * ((d__3 = b[i__5].r, abs(d__3)) + (d__4 = d_imag(&b[j + k * b_dim1]), abs(d__4))); /* L140: */ } /* L150: */ } } } else { if (ctranb) { i__2 = *kk; for (k = 1; k <= i__2; ++k) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { i__4 = i__; i__5 = i__; i__6 = k + i__ * a_dim1; d_cnjg(&z__3, &b[j + k * b_dim1]); z__2.r = a[i__6].r * z__3.r - a[i__6].i * z__3.i, z__2.i = a[i__6].r * z__3.i + a[i__6].i * z__3.r; z__1.r = ct[i__5].r + z__2.r, z__1.i = ct[i__5].i + z__2.i; ct[i__4].r = z__1.r, ct[i__4].i = z__1.i; i__4 = k + i__ * a_dim1; i__5 = j + k * b_dim1; g[i__] += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = d_imag(&a[k + i__ * a_dim1]), abs(d__2))) * ((d__3 = b[i__5].r, abs(d__3)) + (d__4 = d_imag(&b[j + k * b_dim1]), abs(d__4))); /* L160: */ } /* L170: */ } } else { i__2 = *kk; for (k = 1; k <= i__2; ++k) { i__3 = *m; for (i__ = 1; i__ <= i__3; ++i__) { i__4 = i__; i__5 = i__; i__6 = k + i__ * a_dim1; i__7 = j + k * b_dim1; z__2.r = a[i__6].r * b[i__7].r - a[i__6].i * b[ i__7].i, z__2.i = a[i__6].r * b[i__7].i + a[i__6].i * b[i__7].r; z__1.r = ct[i__5].r + z__2.r, z__1.i = ct[i__5].i + z__2.i; ct[i__4].r = z__1.r, ct[i__4].i = z__1.i; i__4 = k + i__ * a_dim1; i__5 = j + k * b_dim1; g[i__] += ((d__1 = a[i__4].r, abs(d__1)) + (d__2 = d_imag(&a[k + i__ * a_dim1]), abs(d__2))) * ((d__3 = b[i__5].r, abs(d__3)) + (d__4 = d_imag(&b[j + k * b_dim1]), abs(d__4))); /* L180: */ } /* L190: */ } } } } i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__; i__4 = i__; z__2.r = alpha->r * ct[i__4].r - alpha->i * ct[i__4].i, z__2.i = alpha->r * ct[i__4].i + alpha->i * ct[i__4].r; i__5 = i__ + j * c_dim1; z__3.r = beta->r * c__[i__5].r - beta->i * c__[i__5].i, z__3.i = beta->r * c__[i__5].i + beta->i * c__[i__5].r; z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i; ct[i__3].r = z__1.r, ct[i__3].i = z__1.i; i__3 = i__ + j * c_dim1; g[i__] = ((d__1 = alpha->r, abs(d__1)) + (d__2 = d_imag(alpha), abs(d__2))) * g[i__] + ((d__3 = beta->r, abs(d__3)) + ( d__4 = d_imag(beta), abs(d__4))) * ((d__5 = c__[i__3].r, abs(d__5)) + (d__6 = d_imag(&c__[i__ + j * c_dim1]), abs( d__6))); /* L200: */ } /* Compute the error ratio for this result. */ *err = 0.; i__2 = *m; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__; i__4 = i__ + j * cc_dim1; z__2.r = ct[i__3].r - cc[i__4].r, z__2.i = ct[i__3].i - cc[i__4] .i; z__1.r = z__2.r, z__1.i = z__2.i; erri = ((d__1 = z__1.r, abs(d__1)) + (d__2 = d_imag(&z__1), abs( d__2))) / *eps; if (g[i__] != 0.) { erri /= g[i__]; } *err = max(*err,erri); if (*err * sqrt(*eps) >= 1.) { goto L230; } /* L210: */ } /* L220: */ } /* If the loop completes, all results are at least half accurate. */ goto L250; /* Report fatal error. */ L230: *fatal = TRUE_; io___382.ciunit = *nout; s_wsfe(&io___382); e_wsfe(); i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { if (*mv) { io___383.ciunit = *nout; s_wsfe(&io___383); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&ct[i__], (ftnlen)sizeof(doublereal)); do_fio(&c__2, (char *)&cc[i__ + j * cc_dim1], (ftnlen)sizeof( doublereal)); e_wsfe(); } else { io___384.ciunit = *nout; s_wsfe(&io___384); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer)); do_fio(&c__2, (char *)&cc[i__ + j * cc_dim1], (ftnlen)sizeof( doublereal)); do_fio(&c__2, (char *)&ct[i__], (ftnlen)sizeof(doublereal)); e_wsfe(); } /* L240: */ } if (*n > 1) { io___385.ciunit = *nout; s_wsfe(&io___385); do_fio(&c__1, (char *)&j, (ftnlen)sizeof(integer)); e_wsfe(); } L250: return 0; /* End of ZMMCH. */ } /* zmmch_ */ logical lze_(doublecomplex *ri, doublecomplex *rj, integer *lr) { /* System generated locals */ integer i__1, i__2, i__3; logical ret_val; /* Local variables */ integer i__; /* Tests if two arrays are identical. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --rj; --ri; /* Function Body */ i__1 = *lr; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__; i__3 = i__; if (ri[i__2].r != rj[i__3].r || ri[i__2].i != rj[i__3].i) { goto L20; } /* L10: */ } ret_val = TRUE_; goto L30; L20: ret_val = FALSE_; L30: return ret_val; /* End of LZE. */ } /* lze_ */ logical lzeres_(char *type__, char *uplo, integer *m, integer *n, doublecomplex *aa, doublecomplex *as, integer *lda, ftnlen type_len, ftnlen uplo_len) { /* System generated locals */ integer aa_dim1, aa_offset, as_dim1, as_offset, i__1, i__2, i__3, i__4; logical ret_val; /* Builtin functions */ integer s_cmp(const char *, const char *, ftnlen, ftnlen); /* Local variables */ integer i__, j, ibeg, iend; logical upper; /* Tests if selected elements in two arrays are equal. */ /* TYPE is 'GE' or 'HE' or 'SY'. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. Local Scalars .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ as_dim1 = *lda; as_offset = 1 + as_dim1; as -= as_offset; aa_dim1 = *lda; aa_offset = 1 + aa_dim1; aa -= aa_offset; /* Function Body */ upper = *(unsigned char *)uplo == 'U'; if (s_cmp(type__, "GE", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *lda; for (i__ = *m + 1; i__ <= i__2; ++i__) { i__3 = i__ + j * aa_dim1; i__4 = i__ + j * as_dim1; if (aa[i__3].r != as[i__4].r || aa[i__3].i != as[i__4].i) { goto L70; } /* L10: */ } /* L20: */ } } else if (s_cmp(type__, "HE", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(type__, "SY", (ftnlen)2, (ftnlen)2) == 0) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (upper) { ibeg = 1; iend = j; } else { ibeg = j; iend = *n; } i__2 = ibeg - 1; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__ + j * aa_dim1; i__4 = i__ + j * as_dim1; if (aa[i__3].r != as[i__4].r || aa[i__3].i != as[i__4].i) { goto L70; } /* L30: */ } i__2 = *lda; for (i__ = iend + 1; i__ <= i__2; ++i__) { i__3 = i__ + j * aa_dim1; i__4 = i__ + j * as_dim1; if (aa[i__3].r != as[i__4].r || aa[i__3].i != as[i__4].i) { goto L70; } /* L40: */ } /* L50: */ } } ret_val = TRUE_; goto L80; L70: ret_val = FALSE_; L80: return ret_val; /* End of LZERES. */ } /* lzeres_ */ /* Double Complex */ void zbeg_(doublecomplex * ret_val, logical *reset) { /* System generated locals */ doublereal d__1, d__2; doublecomplex z__1; /* Local variables */ static integer i__, j, ic, mi, mj; /* Generates complex numbers as pairs of random numbers uniformly */ /* distributed between -0.5 and 0.5. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Scalar Arguments .. */ /* .. Local Scalars .. */ /* .. Save statement .. */ /* .. Intrinsic Functions .. */ /* .. Executable Statements .. */ if (*reset) { /* Initialize local variables. */ mi = 891; mj = 457; i__ = 7; j = 7; ic = 0; *reset = FALSE_; } /* The sequence of values of I or J is bounded between 1 and 999. */ /* If initial I or J = 1,2,3,6,7 or 9, the period will be 50. */ /* If initial I or J = 4 or 8, the period will be 25. */ /* If initial I or J = 5, the period will be 10. */ /* IC is used to break up the period by skipping 1 value of I or J */ /* in 6. */ ++ic; L10: i__ *= mi; j *= mj; i__ -= i__ / 1000 * 1000; j -= j / 1000 * 1000; if (ic >= 5) { ic = 0; goto L10; } d__1 = (i__ - 500) / 1001.; d__2 = (j - 500) / 1001.; z__1.r = d__1, z__1.i = d__2; ret_val->r = z__1.r, ret_val->i = z__1.i; return ; /* End of ZBEG. */ } /* zbeg_ */ doublereal ddiff_(doublereal *x, doublereal *y) { /* System generated locals */ doublereal ret_val; /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Scalar Arguments .. */ /* .. Executable Statements .. */ ret_val = *x - *y; return ret_val; /* End of DDIFF. */ } /* ddiff_ */ /* Subroutine */ int chkxer_(char *srnamt, integer *infot, integer *nout, logical *lerr, logical *ok, ftnlen srnamt_len) { /* Format strings */ static char fmt_9999[] = "(\002 ***** ILLEGAL VALUE OF PARAMETER NUMBER" " \002,i2,\002 NOT D\002,\002ETECTED BY \002,a6,\002 *****\002)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Fortran I/O blocks */ static cilist io___397 = { 0, 0, 0, fmt_9999, 0 }; /* Tests whether XERBLA has detected an error when it should. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Scalar Arguments .. */ /* .. Executable Statements .. */ if (! (*lerr)) { io___397.ciunit = *nout; s_wsfe(&io___397); do_fio(&c__1, (char *)&(*infot), (ftnlen)sizeof(integer)); do_fio(&c__1, srnamt, (ftnlen)6); e_wsfe(); *ok = FALSE_; } *lerr = FALSE_; return 0; /* End of CHKXER. */ } /* chkxer_ */ /* Subroutine */ int xerbla_(char *srname, integer *info, ftnlen srname_len) { /* Format strings */ static char fmt_9999[] = "(\002 ******* XERBLA WAS CALLED WITH INFO =" " \002,i6,\002 INSTEAD\002,\002 OF \002,i2,\002 *******\002)"; static char fmt_9997[] = "(\002 ******* XERBLA WAS CALLED WITH INFO =" " \002,i6,\002 *******\002)"; static char fmt_9998[] = "(\002 ******* XERBLA WAS CALLED WITH SRNAME =" " \002,a6,\002 INSTE\002,\002AD OF \002,a6,\002 *******\002)"; /* Builtin functions */ integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), s_cmp(const char *, const char *, ftnlen, ftnlen); /* Fortran I/O blocks */ static cilist io___398 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___399 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___400 = { 0, 0, 0, fmt_9998, 0 }; /* This is a special version of XERBLA to be used only as part of */ /* the test program for testing error exits from the Level 3 BLAS */ /* routines. */ /* XERBLA is an error handler for the Level 3 BLAS routines. */ /* It is called by the Level 3 BLAS routines if an input parameter is */ /* invalid. */ /* Auxiliary routine for test program for Level 3 Blas. */ /* -- Written on 8-February-1989. */ /* Jack Dongarra, Argonne National Laboratory. */ /* Iain Duff, AERE Harwell. */ /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */ /* Sven Hammarling, Numerical Algorithms Group Ltd. */ /* .. Scalar Arguments .. */ /* .. Scalars in Common .. */ /* .. Common blocks .. */ /* .. Executable Statements .. */ infoc_2.lerr = TRUE_; if (*info != infoc_2.infot) { if (infoc_2.infot != 0) { io___398.ciunit = infoc_2.nout; s_wsfe(&io___398); do_fio(&c__1, (char *)&(*info), (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&infoc_2.infot, (ftnlen)sizeof(integer)); e_wsfe(); } else { io___399.ciunit = infoc_2.nout; s_wsfe(&io___399); do_fio(&c__1, (char *)&(*info), (ftnlen)sizeof(integer)); e_wsfe(); } infoc_2.ok = FALSE_; } if (s_cmp(srname, srnamc_1.srnamt, (ftnlen)6, (ftnlen)6) != 0) { io___400.ciunit = infoc_2.nout; s_wsfe(&io___400); do_fio(&c__1, srname, (ftnlen)6); do_fio(&c__1, srnamc_1.srnamt, (ftnlen)6); e_wsfe(); infoc_2.ok = FALSE_; } return 0; /* End of XERBLA */ } /* xerbla_ */ /* Main program alias */ int zblat3_ () { main (); return 0; } blis-0.6.1/build/000077500000000000000000000000001360743507500135605ustar00rootroot00000000000000blis-0.6.1/build/add-copyright.py000077500000000000000000000254141360743507500167010ustar00rootroot00000000000000#!/usr/bin/env python3 # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2018, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Import modules import os import sys import getopt import re import subprocess import datetime def print_usage(): my_print( " " ) my_print( " %s" % script_name ) my_print( " " ) my_print( " Field G. Van Zee" ) my_print( " " ) my_print( " Update copyright lines of all created or modified source files currently" ) my_print( " staged in the git index, and also insert new copyright lines where they" ) my_print( " currently are missing. This script targets copyright lines for one" ) my_print( " organization at a time." ) my_print( " " ) my_print( " Usage:" ) my_print( " " ) my_print( " %s [options]" % script_name ) my_print( " " ) my_print( " Arguments:" ) my_print( " " ) my_print( " " ) my_print( " The following options are accepted:" ) my_print( " " ) my_print( " -o org organization name" ) my_print( " Update and add copyrights for an organization named ." ) my_print( " By default, is 'Advanced Micro Devices, Inc.'" ) my_print( " " ) my_print( " -u update only" ) my_print( " Update existing copyrights to reflect the current year," ) my_print( " but do not add any additional copyright lines. With this" ) my_print( " option, the script still only updates copyright lines for" ) my_print( " the specified (or default) organization. The default is" ) my_print( " to update but also add copyright lines where missing." ) my_print( " " ) my_print( " -d dry run" ) my_print( " Go through all of the motions, but don't actually modify" ) my_print( " any files. The default behavior is to not enable dry run." ) my_print( " " ) my_print( " -q quiet" ) my_print( " Do not output feedback while processing each file. The" ) my_print( " default behavior is to output one line of text to stdout" ) my_print( " per file updated." ) my_print( " " ) my_print( " -h help" ) my_print( " Output this information and exit." ) my_print( " " ) # ------------------------------------------------------------------------------ def my_print( s ): sys.stdout.write( "%s\n" % s ) #sys.stdout.flush() def my_echo( s ): if not quiet: sys.stdout.write( "%s: %s\n" % ( output_name, s ) ) #sys.stdout.flush() # ------------------------------------------------------------------------------ def main(): global script_name global output_name global quiet # Obtain the script name. path, script_name = os.path.split(sys.argv[0]) output_name = script_name # Default values for optional arguments. the_org = 'Advanced Micro Devices, Inc.' update_only = False dry_run = False quiet = False # Process our command line options. try: opts, args = getopt.getopt( sys.argv[1:], "do:uhq" ) except getopt.GetoptError as err: # print help information and exit: my_print( str(err) ) # will print something like "option -a not recognized" print_usage() sys.exit(2) for opt, optarg in opts: if opt == "-o": the_org = optarg elif opt == "-u": update_only = True elif opt == "-d": dry_run = True elif opt == "-q": quiet = True elif opt == "-h": print_usage() sys.exit() else: print_usage() sys.exit() # Print usage if we don't have exactly zero arguments. if len( args ) != 0: print_usage() sys.exit() # Acquire our only mandatory argument. #driverfile = args[0] # Query the current year. the_time = datetime.datetime.now() cur_year = str(the_time.year) # We run 'git status' with --porcelain to make the output easily parseable. gitstatus = 'git status --porcelain' # Run the 'git status' command and capture the output. p = subprocess.run( gitstatus, stdout=subprocess.PIPE, shell=True ) git_lines = p.stdout.decode().splitlines() git_num_lines = int( len( git_lines ) ) # Consider each line of output from 'git status' for i in range( git_num_lines ): # Parse the current line to find the performance value. git_line = git_lines[i] git_words = git_line.split() mod_char = git_line[0] # Check the first character of the git output. We want to only update # files that are new ('A'), modified ('M'), or renamed ('R'). if mod_char != 'A' and \ mod_char != 'M' and \ mod_char != 'R': continue # Identify the filename for the current line of 'git status' output. if mod_char == 'R': # For renamed files, we need to reference them by their new names, # which appear after the "->" char sequence in git_words[2]. filename = git_words[3] else: filename = git_words[1] #my_echo( "-debug---- %s" % filename ) # Start by opening the file. (We can assume it exists since it # was found by 'git status', so no need to check for existence.) # Read all lines in the file and then close it. f = open( filename, "r" ) file_lines = f.readlines() f.close() # Concatenate all lines in the file into one string. file_string = "".join( file_lines ) # Search for an existing copyright line. has_cr = re.search( 'Copyright \(C\)', file_string ) # If the file does not have any copyright notice in it already, we # assume we don't need to update it. if not has_cr: my_echo( "[nocrline] %s" % filename ) continue # Check whether the file already has a copyright for the_org. We may # need to use this information later. has_org_cr = re.search( 'Copyright \(C\) ([0-9][0-9][0-9][0-9]), %s' % the_org, file_string ) # Initialize the list of processed (potentially modified) file lines. mod_file_lines = [] # At this point we know that the file has at least one copyright, and # has_org_cr encodes whether it already has a copyright for the_org. # We process the files that we know already have copyrights for the_org # differently from the files that do not yet have them. if has_org_cr: # Iterate through the lines in the current file. for line in file_lines: result = re.search( 'Copyright \(C\) ([0-9][0-9][0-9][0-9]), %s' % the_org, line ) # If the current line matches a copyright line for the_org... if result: # Extract the year saved as the first/only group in the # regular expression. old_year = result.group(1) # Don't need to update the year if it's already up-to-date. if old_year != cur_year: # Substitute the old year for the current year. find_line = ' %s, ' % old_year repl_line = ' %s, ' % cur_year line_ny = re.sub( find_line, repl_line, line ) my_echo( "[updated ] %s" % filename ) # Add the updated line to the running list. mod_file_lines += line_ny else: my_echo( "[up2date ] %s" % filename ) # Add the unchanged line to the running list. mod_file_lines += line else: # Add the unchanged line to the running list. mod_file_lines += line # endif result # endfor else: # Don't go any further if we're only updating existing copyright # lines. if update_only: my_echo( "[nocrline] %s" % filename ) continue num_file_lines = len( file_lines ) # Iterate through the lines in the current file. for i in range( int(num_file_lines) ): line = file_lines[i] # Only look at the next line if we are not at the last line. if i < int(num_file_lines) - 1: line_next = file_lines[i+1] else: line_next = file_lines[i] # Try to match both the current line and the next line. result = re.search( 'Copyright \(C\) ([0-9][0-9][0-9][0-9]), (.*)', line ) resnext = re.search( 'Copyright \(C\) ([0-9][0-9][0-9][0-9]), (.*)', line_next ) # Parse the results. if result: if resnext: # The current line matches but so does the next. Add the # current line unchanged to the running list. mod_file_lines += line else: # The current line matches but the next does not. Thus, # this branch only executes for the *last* copyright line # in the file. # Extract the year and organization from the matched # string. old_year = result.group(1) old_org = result.group(2) # Set up search/replace strings to convert the current # line into one that serves as copyright for the_org. find_line = '%s, %s' % (old_year, old_org) repl_line = '%s, %s' % (cur_year, the_org) line_nyno = re.sub( find_line, repl_line, line ) # Add the current line and then also insert our new # copyright line for the_org into the running list. mod_file_lines += line mod_file_lines += line_nyno my_echo( "[added ] %s" % filename ) # endif resnext else: # The current line does not match. Pass it through unchanged. mod_file_lines += line # endif result # endfor # endif has_org_cr if not dry_run: # Open the file for writing. f = open( filename, "w" ) # Join the modified file lines into a single string. final_string = "".join( mod_file_lines ) # Write the lines to the file. f.write( final_string ) # Close the file. f.close() # endif not dry_run # Return from main(). return 0 if __name__ == "__main__": main() blis-0.6.1/build/bli_config.h.in000066400000000000000000000076731360743507500164460ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_CONFIG_H #define BLIS_CONFIG_H // Enabled configuration "family" (config_name) @config_name_define@ // Enabled sub-configurations (config_list) @config_list_defines@ // Enabled kernel sets (kernel_list) @kernel_list_defines@ #if @enable_openmp@ #define BLIS_ENABLE_OPENMP #endif #if @enable_pthreads@ #define BLIS_ENABLE_PTHREADS #endif #if @enable_jrir_slab@ #define BLIS_ENABLE_JRIR_SLAB #endif #if @enable_jrir_rr@ #define BLIS_ENABLE_JRIR_RR #endif #if @enable_pba_pools@ #define BLIS_ENABLE_PBA_POOLS #else #define BLIS_DISABLE_PBA_POOLS #endif #if @enable_sba_pools@ #define BLIS_ENABLE_SBA_POOLS #else #define BLIS_DISABLE_SBA_POOLS #endif #if @enable_mem_tracing@ #define BLIS_ENABLE_MEM_TRACING #else #define BLIS_DISABLE_MEM_TRACING #endif #if @int_type_size@ == 64 #define BLIS_INT_TYPE_SIZE 64 #elif @int_type_size@ == 32 #define BLIS_INT_TYPE_SIZE 32 #else // determine automatically #endif #if @blas_int_type_size@ == 64 #define BLIS_BLAS_INT_TYPE_SIZE 64 #elif @blas_int_type_size@ == 32 #define BLIS_BLAS_INT_TYPE_SIZE 32 #else // determine automatically #endif #ifndef BLIS_ENABLE_BLAS #ifndef BLIS_DISABLE_BLAS #if @enable_blas@ #define BLIS_ENABLE_BLAS #else #define BLIS_DISABLE_BLAS #endif #endif #endif #ifndef BLIS_ENABLE_CBLAS #ifndef BLIS_DISABLE_CBLAS #if @enable_cblas@ #define BLIS_ENABLE_CBLAS #else #define BLIS_DISABLE_CBLAS #endif #endif #endif #ifndef BLIS_ENABLE_MIXED_DT #ifndef BLIS_DISABLE_MIXED_DT #if @enable_mixed_dt@ #define BLIS_ENABLE_MIXED_DT #else #define BLIS_DISABLE_MIXED_DT #endif #endif #endif #ifndef BLIS_ENABLE_MIXED_DT_EXTRA_MEM #ifndef BLIS_DISABLE_MIXED_DT_EXTRA_MEM #if @enable_mixed_dt_extra_mem@ #define BLIS_ENABLE_MIXED_DT_EXTRA_MEM #else #define BLIS_DISABLE_MIXED_DT_EXTRA_MEM #endif #endif #endif #if @enable_sup_handling@ #define BLIS_ENABLE_SUP_HANDLING #else #define BLIS_DISABLE_SUP_HANDLING #endif #if @enable_memkind@ #define BLIS_ENABLE_MEMKIND #else #define BLIS_DISABLE_MEMKIND #endif #if @enable_pragma_omp_simd@ #define BLIS_ENABLE_PRAGMA_OMP_SIMD #else #define BLIS_DISABLE_PRAGMA_OMP_SIMD #endif #if @enable_sandbox@ #define BLIS_ENABLE_SANDBOX #else #define BLIS_DISABLE_SANDBOX #endif #if @enable_shared@ #define BLIS_ENABLE_SHARED #else #define BLIS_DISABLE_SHARED #endif #endif blis-0.6.1/build/bump-version.sh000077500000000000000000000144011360743507500165450ustar00rootroot00000000000000#!/bin/sh # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # # bump-version.sh # # Field G. Van Zee # print_usage() { #local script_name # Get the script name #script_name=${0##*/} # Echo usage info echo " " echo " "$script_name echo " " echo " Field G. Van Zee" echo " " echo " Performs a series of actions needed when incrementing (bumping) the" echo " BLIS version number:" echo " 1. Overwrite the version file with the version string passed" echo " into this script (new_vers)." echo " 2. Commit the updated version file." echo " 3. Create a new tag (named the same as new_vers) which refers to" echo " the commit created in (2)." echo " 4. Update the CHANGELOG file." echo " 5. Commit the updated CHANGELOG file." echo " " echo " Usage:" echo " ${script_name} [options] new_vers" echo " " echo " Arguments:" echo " " echo " new_vers The new version string." echo " " echo " Options:" echo " " echo " -d dry-run" echo " Go through all the motions, but don't actually make any" echo " changes to files or perform any git commits. Note that" echo " this will result in the commits for (2) and (5) above" echo " being equal to the initial commit in the script output." echo " -f VERSFILE version file name" echo " Update VERSFILE with new version string instead of default" echo " 'version' file." # Exit with non-zero exit status exit 1 } main() { # -- BEGIN GLOBAL VARIABLE DECLARATIONS -- # The name of the script, stripped of any preceeding path. script_name=${0##*/} # The name of the config.mk file. configmk_file='config.mk' # The name of the CHANGELOG file. changelog_file='CHANGELOG' # The name of the default version file. version_file_def='version' # The name of the specified version file. version_file='' # Strings used during version query. git_commit_str='' new_version_str='' # The script name to use instead of the $0 when outputting messages. output_name='' # The git directory. gitdir='.git' # Whether we are performing a dry run or not. dry_run_flag="" # -- END GLOBAL VARIABLE DECLARATIONS -- # Process our command line options. while getopts ":dhf:" opt; do case $opt in d ) dry_run_flag="1" ;; f ) version_file=$OPTARG ;; h ) print_usage ;; \? ) print_usage esac done shift $(($OPTIND - 1)) # If a version file name was not given, set version_file to the default # value. if [ -n "${version_file}" ]; then echo "${script_name}: version file specified: '${version_file}'." else echo "${script_name}: no version file specified; defaulting to '${version_file_def}'." version_file="${version_file_def}" fi # Check the number of arguments after command line option processing. if [ $# = "1" ]; then new_version_str=$1 echo "${script_name}: preparing to bump to version '${new_version_str}'." else print_usage fi # Check if the .git dir exists; if it does not, we do nothing. if [ -d "${gitdir}" ]; then echo "${script_name}: found '${gitdir}' directory; assuming git clone." git_commit_str=$(git describe --always) echo "${script_name}: initial commit: ${git_commit_str}." echo "${script_name}: updating version file '${version_file}'." if [ -z "$dry_run_flag" ]; then echo "${new_version_str}" > ${version_file} fi echo "${script_name}: executing: git commit -m \"Version file update (${new_version_str})\" ${version_file}." if [ -z "$dry_run_flag" ]; then git commit -m "Version file update (${new_version_str})" ${version_file} fi git_commit_str=$(git describe --always) echo "${script_name}: commit to be tagged: ${git_commit_str}." echo "${script_name}: executing: git tag ${new_version_str} ${git_commit_str}." if [ -z "$dry_run_flag" ]; then git tag ${new_version_str} ${git_commit_str} fi echo "${script_name}: updating ${changelog_file}." if [ -z "$dry_run_flag" ]; then # If 'make distclean' was run recently, we need to re-run # configure in order for 'make changelog' to work properly. if [ ! -f "${configmk_file}" ]; then ./configure auto fi make changelog fi echo "${script_name}: executing: git commit -m \"CHANGELOG update (${new_version_str})\" ${changelog_file}." if [ -z "$dry_run_flag" ]; then git commit -m "CHANGELOG update (${new_version_str})" ${changelog_file} fi git_commit_str=$(git describe --always) echo "${script_name}: latest commit: ${git_commit_str}." else echo "${script_name}: could not find '${gitdir}' directory; bailing out." fi # Exit peacefully. return 0 } # The script's main entry point, passing all parameters given. main "$@" blis-0.6.1/build/config.mk.in000066400000000000000000000170261360743507500157710ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Only include this block of code once ifndef CONFIG_MK_INCLUDED CONFIG_MK_INCLUDED := yes # The version string. This could be the official string or a custom # string forced at configure-time. VERSION := @version@ # The shared library .so major and minor.build version numbers. SO_MAJOR := @so_version_major@ SO_MINORB := @so_version_minorbuild@ SO_MMB := $(SO_MAJOR).$(SO_MINORB) # The name of the configuration family. CONFIG_NAME := @config_name@ # The list of sub-configurations associated with CONFIG_NAME. Each # sub-configuration in CONFIG_LIST corresponds to a configuration # sub-directory in the 'config' directory. See the 'config_registry' # file for the full list of registered configurations. CONFIG_LIST := @config_list@ # This list of kernels needed for the configurations in CONFIG_LIST. # Each item in this list corresponds to a sub-directory in the top-level # 'kernels' directory. Oftentimes, this list is identical to CONFIG_LIST, # but not always. For example, if configuration X and Y use the same # kernel set X, and configuration W uses kernel set Q, and the CONFIG_LIST # might contained "X Y Z W", then the KERNEL_LIST would contain "X Z Q". KERNEL_LIST := @kernel_list@ # This list contains some number of "kernel:config" pairs, where "config" # specifies which configuration's compilation flags (CFLAGS) should be # used to compile the source code for the kernel set named "kernel". KCONFIG_MAP := @kconfig_map@ # The operating system name, which should be either 'Linux' or 'Darwin'. OS_NAME := @os_name@ # Check for whether the operating system is Windows. IS_WIN := @is_win@ # The directory path to the top level of the source distribution. When # building in-tree, this path is ".". When building out-of-tree, this path # is path used to identify the location of configure. We also allow the # includer of config.mk to override this value by setting DIST_PATH prior # to including this file. This override option is employed, for example, # when common.mk (and therefore config.mk) is included by the Makefile # local to the 'testsuite' directory, or the 'test' directory containing # individual test drivers. ifeq ($(strip $(DIST_PATH)),) DIST_PATH := @dist_path@ endif # The C compiler. CC_VENDOR := @CC_VENDOR@ CC := @CC@ # Important C compiler ranges. GCC_OT_4_9_0 := @gcc_older_than_4_9_0@ GCC_OT_6_1_0 := @gcc_older_than_6_1_0@ GCC_OT_9_1_0 := @gcc_older_than_9_1_0@ # The C++ compiler. NOTE: A C++ is typically not needed. CXX := @CXX@ # Static library indexer. RANLIB := @RANLIB@ # Archiver. AR := @AR@ # Preset (required) CFLAGS and LDFLAGS. These variables capture the value # of the CFLAGS and LDFLAGS environment variables at configure-time (and/or # the value of CFLAGS/LDFLAGS if either was specified on the command line). # These flags are used in addition to the flags automatically determined # by the build system. CFLAGS_PRESET := @cflags_preset@ LDFLAGS_PRESET := @ldflags_preset@ # The level of debugging info to generate. DEBUG_TYPE := @debug_type@ # The requested threading model. THREADING_MODEL := @threading_model@ # Whether the compiler supports "#pragma omp simd" via the -fopenmp-simd option. PRAGMA_OMP_SIMD := @pragma_omp_simd@ # The installation prefix, exec_prefix, libdir, includedir, and shareddir # values from configure tell us where to install the libraries, header files, # and public makefile fragments. We must first assign each substituted # @anchor@ to its own variable. Why? Because the subsitutions may contain # unevaluated variable expressions. For example, '@libdir@' may be replaced # with '${exec_prefix}/lib'. By assigning the anchors to variables first, and # then assigning them to their final INSTALL_* variables, we allow prefix and # exec_prefix to be used in the definitions of exec_prefix, libdir, # includedir, and sharedir. prefix := @prefix@ exec_prefix := @exec_prefix@ libdir := @libdir@ includedir := @includedir@ sharedir := @sharedir@ # Notice that we support the use of DESTDIR so that advanced users may install # to a temporary location. INSTALL_LIBDIR := $(DESTDIR)$(libdir) INSTALL_INCDIR := $(DESTDIR)$(includedir) INSTALL_SHAREDIR := $(DESTDIR)$(sharedir) #$(info prefix = $(prefix) ) #$(info exec_prefix = $(exec_prefix) ) #$(info libdir = $(libdir) ) #$(info includedir = $(includedir) ) #$(info sharedir = $(sharedir) ) #$(error .) # Whether to output verbose command-line feedback as the Makefile is # processed. ENABLE_VERBOSE := @enable_verbose@ # Whether we are building out-of-tree. BUILDING_OOT := @configured_oot@ # Whether we need to employ an alternate method for passing object files to # ar and/or the linker to work around a small value of ARG_MAX. ARG_MAX_HACK := @enable_arg_max_hack@ # Whether to build the static and shared libraries. # NOTE: The "MK_" prefix, which helps differentiate these variables from # their corresonding cpp macros that use the BLIS_ prefix. MK_ENABLE_STATIC := @enable_static@ MK_ENABLE_SHARED := @enable_shared@ # Whether to export all symbols within the shared library, even those symbols # that are considered to be for internal use only. EXPORT_SHARED := @export_shared@ # Whether to enable either the BLAS or CBLAS compatibility layers. MK_ENABLE_BLAS := @enable_blas@ MK_ENABLE_CBLAS := @enable_cblas@ # Whether libblis will depend on libmemkind for certain memory allocations. MK_ENABLE_MEMKIND := @enable_memkind@ # The name of a sandbox defining an alternative gemm implementation. If empty, # no sandbox will be used and the conventional gemm implementation will remain # enabled. SANDBOX := @sandbox@ # The name of the pthreads library. LIBPTHREAD := @libpthread@ # end of ifndef CONFIG_MK_INCLUDED conditional block endif blis-0.6.1/build/detect/000077500000000000000000000000001360743507500150305ustar00rootroot00000000000000blis-0.6.1/build/detect/config/000077500000000000000000000000001360743507500162755ustar00rootroot00000000000000blis-0.6.1/build/detect/config/config_detect.c000066400000000000000000000037171360743507500212460ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #define BLIS_EXPORT_BLIS #include "bli_system.h" #include "bli_type_defs.h" #include "bli_arch.h" #include "bli_cpuid.h" int main( int argc, char** argv ) { arch_t id = bli_cpuid_query_id(); char* s = bli_arch_string( id ); printf( "%s\n", s ); return 0; } blis-0.6.1/build/detect/config/old/000077500000000000000000000000001360743507500170535ustar00rootroot00000000000000blis-0.6.1/build/detect/config/old/arch_detect.c000066400000000000000000000003131360743507500214610ustar00rootroot00000000000000#if defined(__i386) || defined(_X86) ARCH_X86 #endif #if defined(__x86_64__) || defined(__amd64__) ARCH_X86_64 #endif #if defined(__arm__) ARCH_ARM #endif #if defined(__aarch64__) ARCH_AARCH64 #endif blis-0.6.1/build/detect/config/old/auto-detect.sh000077500000000000000000000060371360743507500216360ustar00rootroot00000000000000#!/bin/sh # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2015, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # # auto-detect.sh # # Zhang Xianyi # main() { if [ clang -v > /dev/null 2>&1 ]; then CC=clang else CC=gcc fi CPUID_SRC=cpuid_x86.c CPUID_BIN=blis_cpu_detect ARCH=generic # The name of the script, stripped of any preceeding path. script_name=${0##*/} # The path to the script. We need this to find the top-level directory # of the source distribution in the event that the user has chosen to # build elsewhere. dist_path=${0%/${script_name}} # The path to the directory in which we are building. We do this to # make explicit that we distinguish between the top-level directory # of the distribution and the directory in which we are building. cur_dirpath="." # # Detect architecture by predefined macros # out1=`$CC -E ${dist_path}/arch_detect.c` ARCH=`echo $out1 | grep -o "ARCH_[a-zA-Z0-9_]*" | head -n1` if [ $ARCH = "ARCH_X86_64" ]; then CPUID_SRC=cpuid_x86.c elif [ $ARCH = "ARCH_X86" ]; then CPUID_SRC=cpuid_x86.c elif [ $ARCH = "ARCH_ARM" ]; then CPUID_SRC=cpuid_arm.c elif [ $ARCH = "ARCH_AARCH64" ]; then # Only support armv8 now echo "armv8a" return 0 else echo "generic" return 0 fi # # Detect CPU cores # $CC -o ${cur_dirpath}/$CPUID_BIN ${dist_path}/$CPUID_SRC ${cur_dirpath}/$CPUID_BIN rm -rf ${cur_dirpath}/$CPUID_BIN # Exit peacefully. return 0 } # The script's main entry point, passing all parameters given. main "$@" blis-0.6.1/build/detect/config/old/cpuid_arm.c000066400000000000000000000071521360743507500211670ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2015, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #define CPUNAME_REFERENCE 0 #define CPUNAME_ARMV7 1 #define CPUNAME_CORTEXA9 2 #define CPUNAME_CORTEXA15 3 static char *cpuname[] = { "reference", "armv7a", "cortex-a9", "cortex-a15", }; int get_feature(char *search) { FILE *infile; char buffer[2048], *p,*t; p = (char *) NULL; infile = fopen("/proc/cpuinfo", "r"); if (infile == NULL) { return 0; } while (fgets(buffer, sizeof(buffer), infile)) { if (!strncmp("Features", buffer, 8)) { p = strchr(buffer, ':') + 2; break; } } fclose(infile); if( p == NULL ) return 0; t = strtok(p," "); if (t != NULL) { if (!strcmp(t, search)) { return 1; } } while( t = strtok(NULL," ")){ if (!strcmp(t, search)) { return 1; } } return 0; } int cpu_detect(void) { FILE *infile; char buffer[512], *p; p = (char *) NULL ; infile = fopen("/proc/cpuinfo", "r"); if (infile == NULL) { return CPUNAME_REFERENCE; } while (fgets(buffer, sizeof(buffer), infile)) { if (!strncmp("CPU part", buffer, 8)) { p = strchr(buffer, ':') + 2; break; } } fclose(infile); if(p != NULL) { if (strstr(p, "0xc09")) { if(get_feature("neon")) return CPUNAME_CORTEXA9; else return CPUNAME_ARMV7; } if (strstr(p, "0xc0f")) { if(get_feature("neon")) return CPUNAME_CORTEXA15; else return CPUNAME_ARMV7; } } p = (char *) NULL ; infile = fopen("/proc/cpuinfo", "r"); if (infile == NULL) { return CPUNAME_REFERENCE; } while (fgets(buffer, sizeof(buffer), infile)) { if ((!strncmp("model name", buffer, 10)) || (!strncmp("Processor", buffer, 9))) { p = strchr(buffer, ':') + 2; break; } } fclose(infile); if(p != NULL) { if (strstr(p, "ARMv7")) { return CPUNAME_ARMV7; } } return CPUNAME_REFERENCE; } int main() { int cpuname_id; cpuname_id=cpu_detect(); printf("%s\n", cpuname[cpuname_id]); return 0; } blis-0.6.1/build/detect/config/old/cpuid_x86.c000066400000000000000000000155471360743507500210440ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2015, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #define VENDOR_UNKNOWN 0 #define VENDOR_INTEL 1 #define VENDOR_AMD 2 #define CPUNAME_GENERIC 0 #define CPUNAME_PENRYN 1 #define CPUNAME_SANDYBRIDGE 2 #define CPUNAME_HASWELL 3 #define CPUNAME_KNC 4 #define CPUNAME_KNL 5 #define CPUNAME_BULLDOZER 6 #define CPUNAME_PILEDRIVER 7 #define CPUNAME_STEAMROLLER 8 #define CPUNAME_EXCAVATOR 9 #define CPUNAME_ZEN 10 static char *cpuname[] = { "generic", "penryn", "sandybridge", "haswell", "knc", "knl", "bulldozer", "piledriver", "steamroller", "excavator", "zen", }; #define BITMASK(a, b, c) ((((a) >> (b)) & (c))) static inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){ #if defined(__i386__) && defined(__PIC__) __asm__ __volatile__ ("mov %%ebx, %%edi;" "cpuid;" "xchgl %%ebx, %%edi;" : "=a" (*eax), "=D" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op) : "cc"); #else __asm__ __volatile__ ("cpuid": "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op) : "cc"); #endif } static inline int have_cpuid(void){ int eax, ebx, ecx, edx; cpuid(0, &eax, &ebx, &ecx, &edx); return eax; } int get_vendor(void){ int eax, ebx, ecx, edx; char vendor[13]; cpuid(0, &eax, &ebx, &ecx, &edx); *(int *)(&vendor[0]) = ebx; *(int *)(&vendor[4]) = edx; *(int *)(&vendor[8]) = ecx; vendor[12] = (char)0; if (!strcmp(vendor, "GenuineIntel")) return VENDOR_INTEL; if (!strcmp(vendor, "AuthenticAMD")) return VENDOR_AMD; if ((eax == 0) || ((eax & 0x500) != 0)) return VENDOR_INTEL; return VENDOR_UNKNOWN; } static inline void xgetbv(int op, int * eax, int * edx){ //Use binary code for xgetbv __asm__ __volatile__ (".byte 0x0f, 0x01, 0xd0": "=a" (*eax), "=d" (*edx) : "c" (op) : "cc"); } int support_avx(){ int eax, ebx, ecx, edx; int ret=0; cpuid(1, &eax, &ebx, &ecx, &edx); if ((ecx & (1 << 28)) != 0 && (ecx & (1 << 27)) != 0 && (ecx & (1 << 26)) != 0){ xgetbv(0, &eax, &edx); if((eax & 6) == 6){ ret=1; //OS support AVX } } return ret; } int support_avx512(){ int eax, ebx, ecx, edx; int ret=0; cpuid(1, &eax, &ebx, &ecx, &edx); if ((ecx & (1 << 28)) != 0 && (ecx & (1 << 27)) != 0 && (ecx & (1 << 26)) != 0){ xgetbv(0, &eax, &edx); if((eax & 0xE6) == 0xE6){ ret=1; //OS support AVX-512 } } return ret; } int cpu_detect() { int eax, ebx, ecx, edx; int vendor, family, extend_family, model, extend_model; if ( !have_cpuid() ) return CPUNAME_GENERIC; vendor = get_vendor(); cpuid( 1, &eax, &ebx, &ecx, &edx ); extend_family = BITMASK( eax, 20, 0xff ); extend_model = BITMASK( eax, 16, 0x0f ); family = BITMASK( eax, 8, 0x0f ); model = BITMASK( eax, 4, 0x0f ); if (vendor == VENDOR_INTEL){ model |= extend_model<<4; switch (family) { case 0x6: switch (model) { case 0x0F: //Core2 case 0x16: //Core2 case 0x17: //Penryn case 0x1D: //Penryn case 0x1A: //Nehalem case 0x1E: //Nehalem case 0x2E: //Nehalem case 0x25: //Westmere case 0x2C: //Westmere case 0x2F: //Westmere return CPUNAME_PENRYN; case 0x2A: //Sandy Bridge case 0x2D: //Sandy Bridge case 0x3A: //Ivy Bridge case 0x3E: //Ivy Bridge if(support_avx()) { return CPUNAME_SANDYBRIDGE; }else{ return CPUNAME_GENERIC; //OS doesn't support AVX } case 0x3C: //Haswell case 0x3F: //Haswell case 0x3D: //Broadwell case 0x47: //Broadwell case 0x4F: //Broadwell case 0x56: //Broadwell case 0x4E: //Skylake case 0x5E: //Skylake if(support_avx()) { return CPUNAME_HASWELL; }else{ return CPUNAME_GENERIC; //OS doesn't support AVX } case 0x57: //KNL if(support_avx512()) { return CPUNAME_KNL; }else{ return CPUNAME_GENERIC; //OS doesn't support AVX } } break; case 0xB: switch (model) { case 0x01: //KNC return CPUNAME_KNC; } } }else if (vendor == VENDOR_AMD){ switch (family) { case 0xf: switch (extend_family) { case 6: switch (model) { case 1: if(support_avx()) return CPUNAME_BULLDOZER; else return CPUNAME_GENERIC; //OS don't support AVX. case 2: if(support_avx()) return CPUNAME_PILEDRIVER; else return CPUNAME_GENERIC; //OS don't support AVX. case 0: // Steamroller. Temp use Piledriver. if(support_avx()) return CPUNAME_STEAMROLLER; else return CPUNAME_GENERIC; //OS don't support AVX. } case 8: switch (model){ case 1: if(support_avx()) return CPUNAME_ZEN; else return CPUNAME_REFERENCE; //OS don't support AVX. } } break; } } return CPUNAME_GENERIC; } int main() { int cpuname_id; cpuname_id=cpu_detect(); printf("%s\n", cpuname[cpuname_id]); return 0; } blis-0.6.1/build/detect/iset/000077500000000000000000000000001360743507500157745ustar00rootroot00000000000000blis-0.6.1/build/detect/iset/avx.s000066400000000000000000000001431360743507500167540ustar00rootroot00000000000000// // Test for AVX instruction set. // vzeroall vmovapd %ymm0, %ymm1 vmulpd %ymm0, %ymm0, %ymm1 blis-0.6.1/build/detect/iset/avx512dq.s000066400000000000000000000001561360743507500175350ustar00rootroot00000000000000// // Test for AVX-512dq instruction set. // vzeroall vpmullq %zmm0, %zmm0, %zmm1 vpmullw %zmm0, %zmm0, %zmm1 blis-0.6.1/build/detect/iset/avx512f.s000066400000000000000000000002321360743507500173510ustar00rootroot00000000000000// // Test for AVX-512f instruction set. // vzeroall vmovapd %zmm0, %zmm1 vmulpd %zmm0, %zmm0, %zmm1 vfmadd213pd 0x400(%rax,%rsi,8) {1to8}, %zmm1, %zmm2 blis-0.6.1/build/detect/iset/fma3.s000066400000000000000000000001211360743507500170000ustar00rootroot00000000000000// // Test for FMA3 instruction set. // vzeroall vfmadd213pd %ymm0, %ymm1, %ymm2 blis-0.6.1/build/detect/iset/fma4.s000066400000000000000000000001521360743507500170050ustar00rootroot00000000000000// // Test for FMA4 instruction set (AMD Bulldozer only). // vzeroall vfmaddpd %ymm0, %ymm1, %ymm2, %ymm3 blis-0.6.1/build/detect/memkind/000077500000000000000000000000001360743507500164545ustar00rootroot00000000000000blis-0.6.1/build/detect/memkind/libmemkind_detect.c000066400000000000000000000002761360743507500222700ustar00rootroot00000000000000#include #include int main( int argc, char **argv ) { void* p = hbw_malloc( 4096 ); printf( "%s: hbw_malloc() returned %p\n", __FILE__, p ); return 0; } blis-0.6.1/build/detect/omp_simd/000077500000000000000000000000001360743507500166375ustar00rootroot00000000000000blis-0.6.1/build/detect/omp_simd/omp_simd_detect.c000066400000000000000000000007311360743507500221430ustar00rootroot00000000000000#include #include #define ARRAY_LEN 4096 double x[ ARRAY_LEN ]; double y[ ARRAY_LEN ]; int main( int argc, char **argv ) { const double alpha = 2.1; for ( int i = 0; i < ARRAY_LEN; ++i ) { y[ i ] = 0.0; x[ i ] = 1.0; } #pragma omp simd for ( int i = 0; i < ARRAY_LEN; ++i ) { y[ i ] += alpha * x[ i ]; } #if 0 _Pragma( "omp simd" ) for ( int i = 0; i < ARRAY_LEN; ++i ) { x[ i ] += alpha * y[ i ]; } #endif return 0; } blis-0.6.1/build/flatten-headers.py000077500000000000000000000370661360743507500172170ustar00rootroot00000000000000#!/usr/bin/env python # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Import modules import os import sys import getopt import re def print_usage(): my_print( " " ) my_print( " %s" % script_name ) my_print( " " ) my_print( " Field G. Van Zee" ) my_print( " " ) my_print( " Generate a monolithic header by recursively replacing all #include" ) my_print( " directives in a selected file with the contents of the header files" ) my_print( " they reference." ) my_print( " " ) my_print( " Usage:" ) my_print( " " ) my_print( " %s header header_out temp_dir dir_list" % script_name ) my_print( " " ) my_print( " Arguments:" ) my_print( " " ) my_print( " header The filepath to the top-level header, which is the file" ) my_print( " that will #include all other header files." ) my_print( " " ) my_print( " header_out The filepath of the file into which the script will output" ) my_print( " the monolithic header." ) my_print( " " ) my_print( " temp_dir A directory in which temporary files may be created." ) my_print( " NOTE: No temporary files are created in the current" ) my_print( " implementation, but this argument must still be specified." ) my_print( " " ) my_print( " dir_list The list of directory paths in which to search for the" ) my_print( " headers that are #included by 'header'. By default, these" ) my_print( " directories are scanned for .h files, but sub-directories" ) my_print( " within the various directories are not inspected. If the" ) my_print( " -r option is given, these directories are recursively" ) my_print( " scanned. In either case, the subset of directories scanned" ) my_print( " that actually contains .h files is then searched whenever" ) my_print( " a #include directive is encountered in 'header' (or any" ) my_print( " file subsequently #included). If a referenced header file" ) my_print( " is not found, the #include directive is left untouched and" ) my_print( " translated directly into 'header_out'." ) my_print( " " ) my_print( " The following options are accepted:" ) my_print( " " ) my_print( " -r recursive" ) my_print( " Scan the directories listed in 'dir_list' recursively when" ) my_print( " searching for .h header files. By default, the directories" ) my_print( " are not searched recursively." ) my_print( " " ) my_print( " -c strip C-style comments" ) my_print( " Strip comments enclosed in /* */ delimiters from the" ) my_print( " output, including multi-line comments. By default, C-style" ) my_print( " comments are not stripped." ) my_print( " " ) my_print( " -o SCRIPT output script name" ) my_print( " Use SCRIPT as a prefix when outputting messages instead" ) my_print( " the script's actual name. Useful when the current script" ) my_print( " is going to be called from within another, higher-level" ) my_print( " driver script and seeing the current script's name might" ) my_print( " unnecessarily confuse the user." ) my_print( " " ) my_print( " -v [0|1|2] verboseness level" ) my_print( " level 0: silent (no output)" ) my_print( " level 1: default (single character '.' per header)" ) my_print( " level 2: verbose (several lines per header)." ) my_print( " " ) my_print( " -h help" ) my_print( " Output this information and exit." ) my_print( " " ) # ------------------------------------------------------------------------------ def canonicalize_ws( s ): return re.sub( '\s+', ' ', s ).strip() # --- def my_print( s ): sys.stdout.write( "%s\n" % s ) # --- #def echov1( s ): # # if verbose_flag == "1": # print "%s: %s" % ( output_name, s ) def echov1_n( s ): if verbose_flag == "1": sys.stdout.write( s ) sys.stdout.flush() def echov1_n2( s ): if verbose_flag == "1": sys.stdout.write( "%s\n" % s ) sys.stdout.flush() # --- def echov2( s ): if verbose_flag == "2": sys.stdout.write( "%s: %s\n" % ( output_name, s ) ) sys.stdout.flush() def echov2_n( s ): if verbose_flag == "2": sys.stdout.write( output_name ) sys.stdout.write( ": " ) sys.stdout.write( s ) sys.stdout.flush() def echov2_n2( s ): if verbose_flag == "2": sys.stdout.write( "%s\n" % s ) sys.stdout.flush() # ------------------------------------------------------------------------------ def list_contains_header( items ): rval = False for item in items: is_h = re.search( "\.h", item ) if is_h: rval = True break return rval # ------------------------------------------------------------------------------ def get_header_path( filename, header_dirpaths ): filepath = None # Search each directory path for the filename given. for dirpath in header_dirpaths: # Construct a possible path to the sought-after file. cur_filepath = "%s/%s" % ( dirpath, filename ) # Check whether the file exists. found = os.path.exists( cur_filepath ) if found: filepath = cur_filepath break return filepath # ------------------------------------------------------------------------------ def strip_cstyle_comments( string ): return re.sub( "/\*.*?\*/", "", string, flags=re.S ) # ------------------------------------------------------------------------------ def flatten_header( inputfile, header_dirpaths, cursp ): # This string is inserted after #include directives after having # determined that they are not present in the directory tree. skipstr = "// skipped" beginstr = "// begin " endstr = "// end " ostring = "" # Open the input file to process. ifile = open( inputfile, "r" ) # Iterate over the lines in the file. while True: # Read a line in the file. line = ifile.readline() # Check for EOF. if line == '': break # Check for the #include directive and isolate the header name within # a group (parentheses). #result = re.search( '^[\s]*#include (["<])([\w\.\-/]*)([">])', line ) result = regex.search( line ) # If the line contained a #include directive, we must try to replace # it with the contents of the header referenced by the directive. if result: # Extract the header file referenced in the #include directive, # saved as the second group in the regular expression # above. header = result.group(2) echov2( "%sfound reference to '%s'." % ( cursp, header ) ) # Search for the path to the header referenced in the #include # directive. header_path = get_header_path( header, header_dirpaths ) # First, check if the header is our root header (and if so, ignore it). # Otherwise, if the header was found, we recurse. Otherwise, we output # the #include directive with a comment indicating that it as skipped if header == root_inputfile: markl = result.group(1) markr = result.group(3) echov2( "%sthis is the root header '%s'; commenting out / skipping." \ % ( cursp, header ) ) # If the header found is our root header, then we cannot # recurse into it lest we enter an infinite loop. Output the # line but make sure it's commented out entirely. ostring += "%s #include %c%s%c %c" \ % ( skipstr, markl, header, markr, '\n' ) elif header_path: echov2( "%slocated file '%s'; recursing." \ % ( cursp, header_path ) ) # Mark the beginning of the header being inserted. ostring += "%s%s%c" % ( beginstr, header, '\n' ) # Recurse on the header, accumulating the string. ostring += flatten_header( header_path, header_dirpaths, cursp + " " ) # Mark the end of the header being inserted. ostring += "%s%s%c" % ( endstr, header, '\n' ) echov2( "%sheader file '%s' fully processed." \ % ( cursp, header_path ) ) else: markl = result.group(1) markr = result.group(3) echov2( "%scould not locate file '%s'; marking as skipped." \ % ( cursp, header ) ) # If the header was not found, output the line with a # comment that the header was skipped. ostring += "#include %c%s%c %s%c" \ % ( markl, header, markr, skipstr, '\n' ) # endif else: # If the line did not contain a #include directive, simply output # the line verbatim. ostring += "%s" % line # endif # endwhile # Close the input file. ifile.close() echov1_n( "." ) return ostring # ------------------------------------------------------------------------------ def find_header_dirs( dirpath ): header_dirpaths = [] for root, dirs, files in os.walk( dirpath, topdown=True ): echov2_n( "scanning contents of %s" % root ) if list_contains_header( files ): echov2_n2( "...found headers" ) header_dirpaths.append( root ) else: echov2_n2( "" ) #endif #endfor return header_dirpaths # ------------------------------------------------------------------------------ # Global variables. script_name = None output_name = None strip_comments = None recursive_flag = None verbose_flag = None regex = None root_inputfile = None def main(): global script_name global output_name global strip_comments global recursive_flag global verbose_flag global regex global root_inputfile # Obtain the script name. path, script_name = os.path.split(sys.argv[0]) output_name = script_name strip_comments = False recursive_flag = False verbose_flag = "1" nestsp = " " # Process our command line options. try: opts, args = getopt.getopt( sys.argv[1:], "o:rchv:" ) except getopt.GetoptError as err: # print help information and exit: my_print( str(err) ) # will print something like "option -a not recognized" print_usage() sys.exit(2) for opt, optarg in opts: if opt == "-o": output_name = optarg elif opt == "-r": recursive_flag = True elif opt == "-c": strip_comments = True elif opt == "-v": verbose_flag = optarg elif opt == "-h": print_usage() sys.exit() else: print_usage() sys.exit() # Make sure that the verboseness level is valid. if ( verbose_flag != "0" and verbose_flag != "1" and verbose_flag != "2" ): my_print( "%s Invalid verboseness argument: %s" \ % output_name, verbose_flag ) sys.exit() # Print usage if we don't have exactly four arguments. if len( args ) != 4: print_usage() sys.exit() # Acquire the four required arguments: # - the input header file, # - the output header file, # - the temporary directory in which we can write intermediate files, # - the list of directories in which to search for the headers. inputfile = args[0] outputfile = args[1] temp_dir = args[2] dir_list = args[3] # Save the filename (basename) part of the input file (or root file) into a # global variable that we can access later from within flatten_header(). root_inputfile = os.path.basename( inputfile ) # Separate the directories into distinct strings. dir_list = dir_list.split() # First, confirm that the directories in dir_list are valid. dir_list_checked = [] for item in dir_list: #absitem = os.path.abspath( item ) echov2_n( "checking " + item ) if os.path.exists( item ): dir_list_checked.append( item ) echov2_n2( "...directory exists." ) else: echov2_n2( "...invalid directory; omitting." ) # endfor # Overwrite the original dir_list with the updated copy that omits # invalid directories. dir_list = dir_list_checked echov2( "check summary:" ) echov2( " accessible directories:" ) echov2( " %s" % ' '.join( dir_list ) ) # Generate a list of directories (header_dirpaths) which will be searched # whenever a #include directive is encountered. The method by which # header_dirpaths is compiled will depend on whether the recursive flag # was given. if recursive_flag: header_dirpaths = [] for d in dir_list: # For each directory in dir_list, recursively walk that directory # and return a list of directories that contain headers. d_dirpaths = find_header_dirs( d ) # Add the list resulting from the current search to the running # list of directory paths that contain headers. header_dirpaths += d_dirpaths # endfor else: # If the recursive flag was not given, we can just use dir_list # as-is, though we opt to filter out the directories that don't # contain .h files. header_dirpaths = [] for d in dir_list: echov2_n( "scanning %s" % d ) # Acquire a list of the directory's contents. sub_items = os.listdir( d ) # If there is at least one header present, add the current # directory to the list of header directories. if list_contains_header( sub_items ): header_dirpaths.append( d ) echov2_n2( "...found headers." ) else: echov2_n2( "...no headers found." ) # endif # endfor # endfor echov2( "scan summary:" ) echov2( " headers found in:" ) echov2( " %s" % ' '.join( header_dirpaths ) ) echov2( "preparing to monolithify '%s'" % inputfile ) echov2( "new header will be saved to '%s'" % outputfile ) echov1_n( "." ) # Open the output file. ofile = open( outputfile, "w" ) # Precompile the main regular expression used to isolate #include # directives and the headers they reference. This regex object will # get reused over and over again in flatten_header(). regex = re.compile( '^[\s]*#include (["<])([\w\.\-/]*)([">])' ) # Recursively substitute headers for occurrences of #include directives. final_string = flatten_header( inputfile, header_dirpaths, nestsp ) # Strip C-style comments from the final output, if requested. if strip_comments: final_string = strip_cstyle_comments( final_string ) # Write the lines to the file. ofile.write( final_string ) # Close the output file. ofile.close() echov2( "substitution complete." ) echov2( "monolithic header saved as '%s'" % outputfile ) echov1_n2( "." ) return 0 if __name__ == "__main__": main() blis-0.6.1/build/flatten-headers.sh000077500000000000000000000435711360743507500171770ustar00rootroot00000000000000#!/usr/bin/env bash # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # # -- Helper functions ---------------------------------------------------------- # print_usage() { # Echo usage info. echo " " echo " ${script_name}" echo " " echo " Field G. Van Zee" echo " " echo " Generate a monolithic header by recursively replacing all #include" echo " directives in a selected file with the contents of the header files" echo " they reference." echo " " echo " Usage:" echo " " echo " ${script_name} header header_out temp_dir dir_list" echo " " echo " Arguments:" echo " " echo " header The filepath to the top-level header, which is the file" echo " that will #include all other header files." echo " " echo " header_out The filepath of the file into which the script will output" echo " the monolithic header." echo " " echo " temp_dir A directory in which temporary files may be created." echo " " echo " dir_list The list of directory paths in which to search for the" echo " headers that are #included by 'header'. By default, these" echo " directories are scanned for .h files, but sub-directories" echo " within the various directories are not inspected. If the" echo " -r option is given, these directories are recursively" echo " scanned. In either case, the subset of directories scanned" echo " that actually contains .h files is then searched whenever" echo " a #include directive is encountered in 'header' (or any" echo " file subsequently #included). If a referenced header file" echo " is not found, the #include directive is left untouched and" echo " translated directly into 'header_out'." echo " " echo " The following options are accepted:" echo " " echo " -r recursive" echo " Scan the directories listed in 'dir_list' recursively when" echo " searching for .h header files. By default, the directories" echo " are not searched recursively." echo " " echo " -c strip C-style comments" echo " Strip comments enclosed in /* */ delimiters from the" echo " output, including multi-line comments. By default, C-style" echo " comments are not stripped." echo " " echo " -o SCRIPT output script name" echo " Use SCRIPT as a prefix when outputting messages instead" echo " the script's actual name. Useful when the current script" echo " is going to be called from within another, higher-level" echo " driver script and seeing the current script's name might" echo " unnecessarily confuse the user." echo " " echo " -v [0|1|2] verboseness level" echo " level 0: silent (no output)" echo " level 1: default (single character '.' per header)" echo " level 2: verbose (several lines per header)." echo " " echo " -h help" echo " Output this information and exit." echo " " # Exit with non-zero exit status exit 1 } canonicalize_ws() { local str="$1" # Remove leading and trailing whitespace. str=$(echo -e "${str}" | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//') # Remove duplicate spaces between words. str=$(echo -e "${str}" | tr -s " ") # Update the input argument. echo "${str}" } is_word_in_list() { word="$1" list="$2" rval="" for item in ${list}; do if [ "${item}" == "${word}" ]; then rval="${word}" break fi done echo "${rval}" } echovo() { if [ "${verbose_flag}" == "1" ]; then # Echo the argument string to stderr instead of stdout. echo "${output_name}: $1" 1>&2; fi } echovo_n() { if [ "${verbose_flag}" == "1" ]; then # Echo the argument string to stderr instead of stdout. echo -n "$1" 1>&2; fi } echovo_n2() { if [ "${verbose_flag}" == "1" ]; then # Echo the argument string to stderr instead of stdout. echo "$1" 1>&2; fi } # --- echovt() { if [ "${verbose_flag}" == "2" ]; then # Echo the argument string to stderr instead of stdout. echo "${output_name}: $1" 1>&2; fi } echovt_n() { if [ "${verbose_flag}" == "2" ]; then # Echo the argument string to stderr instead of stdout. echo -n "${output_name}: $1" 1>&2; fi } echovt_n2() { if [ "${verbose_flag}" == "2" ]; then # Echo the argument string to stderr instead of stdout. echo "$1" 1>&2; fi } find_header_dirs() { local cur_dirpath sub_items result cur_list item child_list # Extract the argument: the current directory, and the list of # directories found so far that contain headers. cur_dirpath="$1" echovt_n "scanning contents of ${cur_dirpath}" # Acquire a list of the directory's contents. sub_items=$(ls ${cur_dirpath}) # If there is at least one header present, add the current directory to # the list header of directories. Otherwise, the current directory does # not contribute to the list returned to the caller. result=$(echo ${sub_items} | grep "\.h") if [ -n "${result}" ]; then cur_list="${cur_dirpath}" echovt_n2 " ...found headers" else cur_list="" echovt_n2 "" fi # Iterate over the list of directory contents. for item in ${sub_items}; do # Check whether the current item is in the ignore_list. If so, we # ignore it. result=$(is_word_in_list "${item}" "${ignore_list}") if [ -n "${result}" ]; then echovt "ignoring directory '${item}'." continue fi # If the current item is a directory, recursively accumulate header # directories for that sub-directory. if [ -d "${cur_dirpath}/${item}" ]; then # Recursively find header directories within the sub-directory # ${item} and store the directory list to child_list. child_list=$(find_header_dirs "${cur_dirpath}/${item}") # Accumulate the sub-directory's header list with the running list # of header directories cur_list="${cur_list} ${child_list}" fi done # Return the list of header directories. echo "${cur_list}" } get_header_path() { local filename dirpaths filepath filename="$1" dirpaths="$2" filepath="" # Search each directory path for the filename given. for dirpath in ${dirpaths}; do if [ -f "${dirpath}/${filename}" ]; then filepath="${dirpath}/${filename}" break fi done # Return the filepath that was found. Note that if no filepath was found # in the loop above, the empty string gets returned. echo "${filepath}" } replace_pass() { local inputfile dirpaths intermfile skipstr commstr result local header headerlist header_filepath header_esc subintermfile inputfile="$1" dirpaths="$2" cursp="$3" # Set the output filename, which we will return to the caller. Starting # with the input filepath, we strip it down to just the filename and # reconstruct it with the .interm suffix in temp_dir. intermfile="${inputfile##*/}" intermfile="${temp_dir}/${intermfile}.interm" # This string is inserted after #include directives after having # determined that they are not present in the directory tree. skipstr="\/\/ skipped" # Initialize the list of headers referenced in #include directives # found in the current header file. headerlist="" result=$(grep '^[[:space:]]*#include ' ${inputfile}) # Only iterate through the file line-by-line if it contains at least # one #include directive. If it does not contain any #include directives, # then we can leave headerlist initialized to empty and proceed. if [ -n "${result}" ]; then # Iterate through each line of the header file, accumulating the names of # header files referenced in #include directives. while read -r curline do # Check whether the line begins with a #include directive, but ignore # the line if it contains the skip string. result=$(echo ${curline} | grep '^[[:space:]]*#include ') # If the #include directive was found... if [ -n "${result}" ]; then # Isolate the header filename. We must take care to include all # characters that might appear between the "" or <>. header=$(echo ${curline} | sed -e "s/#include [\"<]\([a-zA-Z0-9\_\.\/\-]*\)[\">].*/\1/g") # Add the header file to a list. headerlist=$(canonicalize_ws "${headerlist} ${header}") fi done < "${inputfile}" fi if [ -n "${headerlist}" ]; then echovt "${cursp}found references to: ${headerlist}" else echovt "${cursp}no header references found." fi # Before we go any further, we strip C-style comments from the file, # if requested. if [ -n "${strip_comments}" ]; then # Make a copy of inputfile stripped of its C-style comments and # save it to intermfile. This substitution leaves behind a single # blank line. cat ${inputfile} \ | perl -0777 -pe "s/\/\*.*?\*\///gs" \ > "${intermfile}" else # Otherwise, just copy inputfile to intermfile verbatim. cp ${inputfile} ${intermfile} fi # Iterate over each header file found in the previous loop. for header in ${headerlist}; do # Find the path to the header. header_filepath=$(get_header_path ${header} "${dirpaths}") # If the header has a slash, escape it so that sed doesn't get confused # (since we use '/' as our search-and-replace delimiter). header_esc=$(echo "${header}" | sed -e 's/\//\\\//g') # If the header file was not found, get_header_path() returns an # empty string. This probably means that the header file is a # system header and thus we skip it since we don't want to inline # the contents of system headers anyway. if [ -z "${header_filepath}" ]; then echovt "${cursp}could not locate file '${header}'; marking as skipped." # Insert a comment after the #include so we know it was ignored. # Notice that we mimic the quotes or angle brackets around the # header name, whichever pair was used in the input. cat ${intermfile} \ | sed -e "s/^[[:space:]]*#include \([\"<]\)\(${header_esc}\)\([\">]\).*/#include \1\2\3 ${skipstr}/" \ > "${intermfile}.tmp" mv "${intermfile}.tmp" ${intermfile} else echovt "${cursp}located file '${header_filepath}'; recursing." # Recursively produce an inlined/flattened intermediate file at # ${header_filepath}. subintermfile=$(replace_pass ${header_filepath} "${dirpaths}" "${cursp}${nestsp}") echovt "${cursp}inserting '${subintermfile}'." # Replace the #include directive for the current header file with the # contents of that header file, saving the result to a temporary file. # We also insert begin and end markers to allow for more readability. # NOTE: We use the 'i\...' and 'a\...' notation with '$', which causes # bash to interpret '\n' as a newline, as needed for the 'a\' and 'i\' # commands in POSIX (e.g. OS X) sed. (GNU sed allows a much more # natural usage that does not require the backslash or newline.) cat ${intermfile} \ | sed -e "/^[[:space:]]*#include \"${header_esc}\"/ {" \ -e 'i\'$'\n'"// begin ${header}"$'\n' \ -e "r ${subintermfile}" \ -e 'a\'$'\n'"// end ${header}"$'\n' \ -e "d" \ -e "}" \ > "${intermfile}.tmp" mv "${intermfile}.tmp" ${intermfile} echovt "${cursp}removing intermediate file '${subintermfile}'." # Remove the recursive call's intermediate file now that it has been # inserted into this level's intermediate. rm "${subintermfile}" fi done # works, but leaves blank line: #cat "test.h" | sed -e "/^#include \"foo.h\"/r foo.h" -e "s///" > "test.new.h" # works: #cat "test.h" | sed -e '/^#include \"foo.h\"/ {' -e 'r foo.h' -e 'd' -e '}' > "test.new.h" # works: #cat "test.h" | sed -e '/^#include \"foo.h\"/r foo.h' -e '/^#include \"foo.h\"/d' > "test.new.h" #cat zorn/header.h | sed -e '/^#include \"header1.h\"/ {' -e 'i // begin insertion' -e 'r alice/header1.h' -e 'a // end insertion' -e 'd' -e '}' echovt "${cursp}header file '${inputfile}' fully processed." echovt "${cursp}returning via '${intermfile}'." echovo_n "." # Return the intermediate filename so the caller knows the name of this # invocation's output file. echo "${intermfile}" } # # -- main function ------------------------------------------------------------- # main() { # The name of the script, stripped of any preceding path. script_name=${0##*/} # The script name to use in informational output. Defaults to ${script_name}. output_name=${script_name} # Whether or not we should strip C-style comments from the output. (Default # is to not strip C-style comments.) strip_comments="" # Whether or not we search the directories in dir_list recursively. (Default # is to not search recursively.) recursive_flag="" # The list of directories to ignore ignore_list="old other temp test testsuite windows" # The amount to nest each level of recursion in the output. nestsp=" " # Process our command line options. while getopts ":o:rchv:" opt; do case $opt in o ) output_name=$OPTARG ;; r ) recursive_flag="1" ;; c ) strip_comments="1" ;; v ) verbose_flag=$OPTARG ;; h ) print_usage ;; \? ) print_usage esac done shift $(($OPTIND - 1)) # Make sure that the verboseness level is valid. if [ "${verbose_flag}" != "0" ] && [ "${verbose_flag}" != "1" ] && [ "${verbose_flag}" != "2" ]; then echo "${output_name}: Invalid verboseness argument '${verbose_flag}'." 1>&2; exit 1 fi # Print usage if we don't have exactly two arguments. if [ $# != "4" ]; then print_usage fi # Acquire the four required arguments: # - the input header file, # - the output header file, # - the temporary directory in which we can write intermediate files, # - the list of directories in which to search for the headers inputfile="$1" outputfile="$2" temp_dir="$3" dir_list="$4" # First, confirm that the directories in dir_list are valid. dir_list2="" for item in ${dir_list}; do # Strip a trailing slash from the path, if it has one. item=${item%/} echovt_n "checking ${item} " if [ -d ${item} ]; then echovt_n2 " ...directory exists." dir_list2="${dir_list2} ${item}" else echovt_n2 " ...invalid directory; omitting." fi done dir_list2=$(canonicalize_ws "${dir_list2}") # Overwrite the original dir_list with the updated copy that omits # invalid directories. dir_list="${dir_list2}" echovt "check summary:" echovt " accessible directories:" echovt " ${dir_list}" # Generate a list of directories (dirpaths) which will be searched whenever # a #include directive is encountered. The method by which dirpaths is # compiled will depend on whether the recursive flag was given. if [ -n "${recursive_flag}" ]; then # If the recursive flag was given, we need to recursively scan each # directory in dir_list for directories with headers via the # function find_header_dirs(). dirpaths="" for item in ${dir_list}; do item_dirpaths=$(find_header_dirs ${item}) dirpaths="${dirpaths} ${item_dirpaths}" done dirpaths=$(canonicalize_ws "${dirpaths}") else # If the recursive flag was not given, we can just use dir_list # as-is, though we opt to filter out the directories that don't # contain .h files. dirpaths="" for item in ${dir_list}; do echovt_n "scanning ${item}" # Acquire a list of the directory's contents. sub_items=$(ls ${item}) # If there is at least one header present, add the current directory to # the list header of directories. result=$(echo ${sub_items} | grep "\.h") if [ -n "${result}" ]; then dirpaths="${dirpaths} ${item}" echovt_n2 " ...found headers." else echovt_n2 " ...no headers found." fi done dirpaths=$(canonicalize_ws "${dirpaths}") fi echovt "scan summary:" echovt " headers found in:" echovt " ${dirpaths}" echovt "preparing to monolithify '${inputfile}'." # Make a copy of the inputfile. #cp ${inputfile} ${outputfile} echovt "new header will be saved to '${outputfile}'." echovo_n "." # Recursively substitute headers for occurrences of #include directives. intermfile=$(replace_pass ${inputfile} "${dirpaths}" "${nestsp}") # Rename the intermediate file(path) to the output file(path). mv ${intermfile} ${outputfile} echovt "substitution complete." echovt "monolithic header saved as '${outputfile}'." echovo_n2 "." # Exit peacefully. return 0 } # The script's main entry point, passing all parameters given. main "$@" blis-0.6.1/build/gen-make-frags/000077500000000000000000000000001360743507500163445ustar00rootroot00000000000000blis-0.6.1/build/gen-make-frags/fragment.mk000066400000000000000000000066231360743507500205070ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # # fragment.mk # # This is an automatically-generated makefile fragment and will likely get # overwritten or deleted if the user is not careful. Modify at your own risk. # # These two mmakefile variables need to be set in order for the recursive # include process to work! CURRENT_DIR_NAME := _mkfile_fragment_cur_dir_name_ CURRENT_SUB_DIRS := _mkfile_fragment_sub_dir_names_ # Source files local to this fragment LOCAL_SRC_FILES := _mkfile_fragment_local_src_files_ # Add the fragment's local source files to the _global_variable_ variable. _mkfile_fragment_src_var_name_ += $(addprefix $(PARENT_SRC_PATH)/$(CURRENT_DIR_NAME)/, $(LOCAL_SRC_FILES)) # ----------------------------------------------------------------------------- # NOTE: The code below is generic and should remain in all fragment.mk files! # ----------------------------------------------------------------------------- # Add the current fragment to the global list of fragments so the top-level # Makefile knows which directories are participating in the build. FRAGMENT_DIR_PATHS += $(PARENT_SRC_PATH)/$(CURRENT_DIR_NAME) # Recursively descend into other subfragments' local makefiles and include them. ifneq ($(strip $(CURRENT_SUB_DIRS)),) key1 := $(key1).x key2 := $(key2).y stack_$(key1) := $(PARENT_PATH) stack_$(key2) := $(PARENT_SRC_PATH) PARENT_PATH := $(PARENT_PATH)/$(CURRENT_DIR_NAME) PARENT_SRC_PATH := $(PARENT_SRC_PATH)/$(CURRENT_DIR_NAME) FRAGMENT_SUB_DIRS := $(addprefix $(PARENT_PATH)/, $(CURRENT_SUB_DIRS)) -include $(addsuffix /$(FRAGMENT_MK), $(FRAGMENT_SUB_DIRS)) PARENT_PATH := $(stack_$(key1)) PARENT_SRC_PATH := $(stack_$(key2)) key1 := $(basename $(key1)) key2 := $(basename $(key2)) endif blis-0.6.1/build/gen-make-frags/gen-make-frag.sh000077500000000000000000000403301360743507500213040ustar00rootroot00000000000000#!/bin/sh # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # # gen-make-frag.sh # # Field G. Van Zee # print_usage() { #local script_name # Get the script name #script_name=${0##*/} # Echo usage info echo " " echo " "$script_name echo " " echo " Field G. Van Zee" echo " " echo " Automatically generates makefile fragments for a specified directory" echo " tree. " echo " " echo " Usage:" echo " ${script_name} [options] root_dir frag_dir templ.mk suff_list ign_list" echo " " echo " Arguments (mandatory):" echo " " echo " root_dir The root directory to scan when generating makefile" echo " fragments." echo " " echo " frag_dir The root directory in which makefile fragments will be" echo " generated." echo " " echo " templ.mk The template makefile fragment used to generate the actual" echo " fragments." echo " " echo " suff_list File containing a newline-separated list of file suffixes" echo " of source files to that the top-level makefile expects to" echo " access." echo " " echo " ign_list File containing a newline-separated list of directory names" echo " to ignore when descending recursively into " echo " " echo " The following options are accepted:" echo " " echo " -d dry-run" echo " Go through all the motions, but don't actually generate any" echo " makefile fragments." echo " -r recursive" echo " Also generate makefile fragments for subdirectories of" echo " root_dir." echo " -h hide" echo " Hide the makefile fragments by prepending filenames with '.'." echo " -p PREFIX prefix name" echo " Use PREFIX instead of uppercased root_dir in the makefile" echo " variable name. If the root_dir were 'stuff' and -p was not" echo " used, then source would be accumulated into a makefile" echo " variable named 'MK_STUFF', but if -p JUNK were given, then" echo " the variable name would instead be MK_JUNK." echo " -o SCRIPT output script name" echo " Use SCRIPT when outputting messages instead of the script's" echo " actual name." echo " -v [0|1|2] verboseness level" echo " level 0: silent (no output)" echo " level 1: default (one line per directory)" echo " level 2: verbose (several lines per directory)." echo " " # Exit with non-zero exit status exit 1 } # # gen_mkfile() # # Creates a single makefile fragment in a user-specified directory and adds # any local source files found to a top-level Makefile variable. # gen_mkfile() { # Local variable declarations local mkfile_frag_var_name local this_dir local this_frag_dir local mkfile_frag_tmpl_name local mkfile_name local mkfile_frag_path local cur_frag_dir local cur_frag_path local local_src_files local sub_items local item_path local item_suffix local cur_frag_sub_dirs # Extract our arguments to local variables mkfile_frag_var_name=$1 this_dir=$2 this_frag_dir=$3 # Strip the leading path from the template makefile path to get its # simple filename. Hide the output makefile fragment filename, if # requested. mkfile_frag_tmpl_name=${mkfile_frag_tmpl_path##*/} if [ -n "$hide_flag" ]; then mkfile_frag_path=$this_frag_dir/.$mkfile_frag_tmpl_name else mkfile_frag_path=$this_frag_dir/$mkfile_frag_tmpl_name fi # Determine the directory in which the fragment will reside. cur_frag_path=$this_dir cur_frag_dir=${this_dir##*/} # Initialize the local source list to empty local_src_files="" # Get a listing of the items in $this_dir sub_items=$(ls $this_dir) # Generate a list of the source files we've chosen for item in $sub_items; do # Prepend the directory to the item to get a relative path item_path=$this_dir/$item # Acquire the item's suffix, if it has one item_suffix=${item_path##*.} # If the suffix matches, then add it to our list if is_in_list $item_suffix "$src_file_suffixes" then local_src_files="$local_src_files $item" fi done # Delete the leading " " space character in the local source files list. local_src_files=${local_src_files##" "} # Initialize the fragment subdirectory list to empty cur_frag_sub_dirs="" # Capture the relative path listing of items in $this_dir. sub_items=$(ls $this_dir) # Determine the fragment's subdirectory names, if any exist for item in $sub_items; do # Prepend the directory to the item to get a relative path item_path=$this_dir/$item # If item is a directory, and it's not in the ignore list, descend into it. #if [ -d $item_path ] && ! should_ignore $item; then if [ -d $item_path ] && ! is_in_list $item "$ignore_dirs" ; then cur_frag_sub_dirs=$cur_frag_sub_dirs" "$item fi done # Delete the leading " " space character in fragment's subdirectory list. cur_frag_sub_dirs=${cur_frag_sub_dirs##" "} # Be verbose, if level 2 was requested. if [ "$verbose_flag" = "2" ]; then echo "mkf frag tmpl path: $mkfile_frag_tmpl_path" echo "mkf frag path: $mkfile_frag_path" echo "cur frag path: $cur_frag_path" echo "cur frag dir: $cur_frag_dir" echo "cur frag sub dirs: $cur_frag_sub_dirs" echo "local src files: $local_src_files" echo "src file suffixes: $src_file_suffixes" echo "mkf frag var name: $mkfile_frag_var_name" echo "--------------------------------------------------" fi # Copy the template makefile to the directory given, using the new # makefile name we just created above. if [ -z "$dry_run_flag" ]; then cat $mkfile_frag_tmpl_path | sed -e s/"$mkfile_fragment_cur_dir_name_anchor"/"$cur_frag_dir"/g \ | sed -e s/"$mkfile_fragment_sub_dir_names_anchor"/"$cur_frag_sub_dirs"/g \ | sed -e s/"$mkfile_fragment_local_src_files_anchor"/"$local_src_files"/g \ | sed -e s/"$mkfile_fragment_src_var_name_anchor"/"$mkfile_frag_var_name"/g \ > $mkfile_frag_path fi # Return peacefully. return 0 } # # gen_mkfiles # # Recursively generates makefile fragments for a directory and all # subdirectories. All of the actual work happens in gen_mkfile(). # gen_mkfiles() { # Local variable declarations local item sub_items cur_dir this_frag_dir this_dir # Extract our argument cur_dir=$1 this_frag_dir=$2 # Append a relevant suffix to the makefile variable name, if necesary # NOTE: This step is disabled because special directories are presently # ignored when generating makefile variable names. #all_add_src_var_name "$cur_dir" # Be verbose if level 2 was requested if [ "$verbose_flag" = "2" ]; then echo ">>>" $script_name ${src_var_name}_$SRC $cur_dir $this_frag_dir elif [ "$verbose_flag" = "1" ]; then echo "$script_name: creating makefile fragment in $this_frag_dir from $cur_dir" fi # Call our function to generate a makefile in the directory given. gen_mkfile "${src_var_name}_$SRC" $cur_dir $this_frag_dir # Get a listing of the directories in $directory sub_items=$(ls $cur_dir) # Descend into the contents of root_dir to generate the subdirectories' # makefile fragments. for item in $sub_items; do # If item is a directory, and it's not in the ignore list, descend into it. #if [ -d "$cur_dir/$item" ] && ! should_ignore $item; then if [ -d "$cur_dir/$item" ] && ! is_in_list $item "$ignore_dirs" ; then gen_mkfiles $cur_dir/$item $this_frag_dir/$item fi done # Remove a relevant suffix from the makefile variable name, if necesary # NOTE: This step is disabled because special directories are presently # ignored when generating makefile variable names. #all_del_src_var_name "$cur_dir" # Return peacefully return 0 } #update_src_var_name_special() #{ # local dir act i name var_suffix # # # Extract arguments. # act="$1" # dir="$2" # # # Strip / from end of directory path, if there is one, and then strip # # path from directory name. # dir=${dir%/} # dir=${dir##*/} # # # Run through our list. # # NOTE: CURRENTLY, SPECIAL DIRECTORY NAMES ARE IGNORED. In order to # # re-enable them, remove the quotes from "${special_dirs}". # for specdir in "${special_dirs}"; do # # # If the current item matches sdir, then we'll have # # to make a modification of some form. # if [ "$dir" = "$specdir" ]; then # # # Convert the directory name to uppercase. # var_suffix=$(echo "$dir" | tr '[:lower:]' '[:upper:]') # # # Either add or remove the suffix, and also update the # # source file suffix variable. # if [ "$act" == "+" ]; then # src_var_name=${src_var_name}_$var_suffix # else # src_var_name=${src_var_name%_$var_suffix} # fi # # # No need to continue iterating. # break; # fi # done #} #init_src_var_name() #{ # local dir="$1" # # # Strip off the leading / if there is one # dir=${dir%%/} # # # Convert the / directory separators into spaces to make a list of # # directories. # list=${dir//\// } # # # Inspect each item in $list # for item in $list; do # # # Try to initialize the source variable name # all_add_src_var_name $item # done #} #all_add_src_var_name() #{ # local dir="$1" # # update_src_var_name_special "+" "$dir" # #} #all_del_src_var_name() #{ # local dir="$1" # # update_src_var_name_special "-" "$dir" #} read_mkfile_config() { # Read the file describing file suffixes. src_file_suffixes=$(cat "${suffix_file}") # Read the file listing the directories to ignore. ignore_dirs=$(cat "${ignore_file}") # Change newlines into spaces. This is optional, but helps when # printing these values out (so they appear on one line). src_file_suffixes=$(echo ${src_file_suffixes} | sed "s/\n/ /g") ignore_dirs=$(echo ${ignore_dirs} | sed "s/\n/ /g") } main() { # -- BEGIN GLOBAL VARIABLE DECLARATIONS -- # Define these makefile template "anchors" used in gen_mkfile(). mkfile_fragment_cur_dir_name_anchor="_mkfile_fragment_cur_dir_name_" mkfile_fragment_sub_dir_names_anchor="_mkfile_fragment_sub_dir_names_" mkfile_fragment_local_src_files_anchor="_mkfile_fragment_local_src_files_" mkfile_fragment_src_var_name_anchor="_mkfile_fragment_src_var_name_" # The name of the script, stripped of any preceeding path. script_name=${0##*/} # The prefix for all makefile variables. src_var_name_prefix='MK' # The variable that always holds the string that will be passed to # gen_mkfile() as the source variable to insert into the fragment.mk. src_var_name='' # The suffix appended to all makefile fragment source variables. SRC='SRC' # The list of source file suffixes to add to the makefile variables. src_file_suffixes='' # The lists of directories to ignore. ignore_dirs='' # The arguments to this function. They'll get assigned meaningful # values after getopts. root_dir="" frag_dir="" mkfile_frag_tmpl_path="" suffix_file="" ignore_file="" # Flags set by getopts. dry_run_flag="" hide_flag="" recursive_flag="" output_name="" prefix_flag="" verbose_flag="" # -- END GLOBAL VARIABLE DECLARATIONS -- # Local variable declarations. local item sub_items this_dir # Process our command line options. while getopts ":dho:p:rv:" opt; do case $opt in d ) dry_run_flag="1" ;; h ) hide_flag="1" ;; r ) recursive_flag="1" ;; o ) output_name=$OPTARG ;; p ) prefix_flag=$OPTARG ;; v ) verbose_flag=$OPTARG ;; \? ) print_usage esac done shift $(($OPTIND - 1)) # Make sure that verboseness level is valid. if [ "$verbose_flag" != "0" ] && [ "$verbose_flag" != "1" ] && [ "$verbose_flag" != "2" ]; then verbose_flag="1" fi # Check the number of arguments after command line option processing. if [ $# != "5" ]; then print_usage fi # If an output script name was given, overwrite script_name with it. if [ -n "${output_name}" ]; then script_name="${output_name}" fi # Extract our arguments. root_dir=$1 frag_dir=$2 mkfile_frag_tmpl_path=$3 suffix_file=$4 ignore_file=$5 # Read the makefile config files to be used in the makefile fragment # generation. read_mkfile_config # Strip / from end of directory path, if there is one. root_dir=${root_dir%/} frag_dir=${frag_dir%/} # Initialize the name of the makefile source variable. if [ -n "$prefix_flag" ]; then # If prefix_flag is not null, then we construct src_var_name using # it instead of root_dir. So if the prefix is 'junk', we will get # makefile variables that begin with 'MK_JUNK'. root_dir_upper=$(echo "$prefix_flag" | tr '[:lower:]' '[:upper:]') src_var_name="${src_var_name_prefix}_${root_dir_upper}" else # Otherwise, we use root_dir. If the root directory is 'foo' then # makefile variables will begin with 'MK_FOO'. # We are also careful to convert forward slashes into underscore so # root directories such as foo/bar result in makefile variables # that begin with 'MK_FOO_BAR'. root_dir_upper=$(echo "$root_dir" | tr '[:lower:]' '[:upper:]') root_dir_upper=$(echo "$root_dir_upper" | tr '/' '_') src_var_name="${src_var_name_prefix}_${root_dir_upper}" fi # Be verbose if level 2 was requested. if [ "$verbose_flag" = "2" ]; then echo ">>>" $script_name ${src_var_name}_$SRC $root_dir $frag_dir elif [ "$verbose_flag" = "1" ]; then echo "$script_name: creating makefile fragment in $frag_dir from $root_dir" fi # Call our function to generate a makefile in the root directory given. gen_mkfile "${src_var_name}_$SRC" $root_dir $frag_dir # If we were asked to act recursively, then continue processing # root_dir's contents. if [ -n "$recursive_flag" ]; then # Get a listing of the directories in $directory. sub_items=$(ls $root_dir) # Descend into the contents of root_dir to generate the makefile # fragments. for item in $sub_items; do # If item is a directory, and it's not in the ignore list, descend into it. #if [ -d "$root_dir/$item" ] && ! should_ignore $item ; then if [ -d "$root_dir/$item" ] && ! is_in_list $item "$ignore_dirs" ; then gen_mkfiles $root_dir/$item $frag_dir/$item fi done fi # Exit peacefully. return 0 } is_in_list() { local cur_item the_item item_list # Extract argument. the_item="$1" item_list="$2" # Check each item in the list against the item of interest. for cur_item in ${item_list}; do # If the current item in the list matches the one of interest. if [ "${cur_item}" = "${the_item}" ]; then # Return success (ie: item was found). return 0 fi done # If we made it this far, return failure (ie: item not found). return 1 } # The script's main entry point, passing all parameters given. main "$@" blis-0.6.1/build/gen-make-frags/ignore_list000066400000000000000000000000451360743507500206040ustar00rootroot00000000000000attic broken old other temp tmp test blis-0.6.1/build/gen-make-frags/special_list000066400000000000000000000000161360743507500207370ustar00rootroot00000000000000noopt kernels blis-0.6.1/build/gen-make-frags/suffix_list000066400000000000000000000000211360743507500206170ustar00rootroot00000000000000c cc cpp cxx s S blis-0.6.1/build/irun.py000077500000000000000000000241441360743507500151170ustar00rootroot00000000000000#!/usr/bin/env python3 # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2018, The University of Texas at Austin # Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Import modules import os import sys import getopt import re import subprocess import time import statistics def print_usage(): my_print( " " ) my_print( " %s" % script_name ) my_print( " " ) my_print( " Field G. Van Zee" ) my_print( " " ) my_print( " Repeatedly run a test driver and accumulate statistics for the" ) my_print( " output." ) my_print( " " ) my_print( " Usage:" ) my_print( " " ) my_print( " %s [options] drivername" % script_name ) my_print( " " ) my_print( " Arguments:" ) my_print( " " ) my_print( " drivername The filename/path of the test driver to run. The" ) my_print( " test driver must output its performance data to" ) my_print( " standard output." ) my_print( " " ) my_print( " The following options are accepted:" ) my_print( " " ) my_print( " -c num performance column index" ) my_print( " Find the performance result in column index of" ) my_print( " the test driver's output. Here, a column is defined" ) my_print( " as a contiguous sequence of non-whitespace characters," ) my_print( " with the column indices beginning at 0. By default," ) my_print( " the second-to-last column index in the output is used." ) my_print( " " ) my_print( " -d delay sleep() delay" ) my_print( " Wait seconds after each execution of the" ) my_print( " test driver. The default delay is 0." ) my_print( " " ) my_print( " -n niter number of iterations" ) my_print( " Execute the test driver times. The default" ) my_print( " value is 10." ) my_print( " " ) my_print( " -q quiet; summary only" ) my_print( " Do not output statistics after every new execution of" ) my_print( " the test driver; instead, only output the final values" ) my_print( " after all iterations are complete. The default is to" ) my_print( " output updated statistics after each iteration." ) my_print( " " ) my_print( " -h help" ) my_print( " Output this information and exit." ) my_print( " " ) # ------------------------------------------------------------------------------ def my_print( s ): sys.stdout.write( "%s\n" % s ) #sys.stdout.flush() # ------------------------------------------------------------------------------ # Global variables. script_name = None output_name = None def main(): global script_name global output_name # Obtain the script name. path, script_name = os.path.split(sys.argv[0]) output_name = script_name # Default values for optional arguments. #perf_col = 9 perf_col = -1 delay = 0 niter = 10 quiet = False # Process our command line options. try: opts, args = getopt.getopt( sys.argv[1:], "c:d:n:hq" ) except getopt.GetoptError as err: # print help information and exit: my_print( str(err) ) # will print something like "option -a not recognized" print_usage() sys.exit(2) for opt, optarg in opts: if opt == "-c": perf_col = optarg elif opt == "-d": delay = optarg elif opt == "-n": niter = optarg elif opt == "-q": quiet = True elif opt == "-h": print_usage() sys.exit() else: print_usage() sys.exit() # Print usage if we don't have exactly one argument. if len( args ) != 1: print_usage() sys.exit() # Acquire our only mandatory argument: the name of the test driver. driverfile = args[0] #my_print( "test driver: %s" % driverfile ) #my_print( "column num: %s" % perf_col ) #my_print( "delay: %s" % delay ) #my_print( "num iter: %s" % niter ) # Build a list of iterations. iters = range( int(niter) ) # Run the test driver once to detect the number of lines of output. p = subprocess.run( driverfile, stdout=subprocess.PIPE ) lines0 = p.stdout.decode().splitlines() num_lines0 = int(len(lines0)) # Initialize the list of lists (one list per performance result). aperf = [] for i in range( num_lines0 ): aperf.append( [] ) for it in iters: # Run the test driver. p = subprocess.run( driverfile, stdout=subprocess.PIPE ) # Acquire the lines of output. lines = p.stdout.decode().splitlines() # Accumulate the test driver's latest results into aperf. for i in range( num_lines0 ): # Parse the current line to find the performance value. line = lines[i] words = line.split() if perf_col == -1: perf = words[ len(words)-2 ] else: perf = words[ int(perf_col) ] # As unlikely as it is, guard against Inf and NaN. if float(perf) == float('Inf') or \ float(perf) == -float('Inf') or \ float(perf) == float('NaN'): perf = 0.0 # Add the performance value to the list at the ith entry of aperf. aperf[i].append( float(perf) ) # Compute stats for the current line. avgp = statistics.mean( aperf[i] ) maxp = max( aperf[i] ) minp = min( aperf[i] ) # Only compute stdev() when we have two or more data points. if len( aperf[i] ) > 1: stdp = statistics.stdev( aperf[i] ) else: stdp = 0.0 # Construct a string to match the performance value and then # use that string to search-and-replace with four format specs # for the min, avg, max, and stdev values computed above. search = '%8s' % perf newline = re.sub( str(search), ' %7.2f %7.2f %7.2f %6.2f', line ) # Search for the column index range that would be present if this were # matlab-compatible output. The index range will typically be 1:n, # where n is the number of columns of data. found_index = False for word in words: if re.match( '1:', word ): index_str = word found_index = True break # If we find the column index range, we need to update it to reflect # the replacement of one column of data with four, for a net increase # of columns. We do so via another instance of re.sub() in which we # search for the old index string and replace it with the new one. if found_index: last_col = int(index_str[2]) + 3 new_index_str = '1:%1s' % last_col newline = re.sub( index_str, new_index_str, newline ) # If the quiet flag was not give, output the intermediate results. if not quiet: print( newline % ( float(minp), float(avgp), float(maxp), float(stdp) ) ) # Flush stdout after each set of output prior to sleeping. sys.stdout.flush() # Sleep for a bit until the next iteration. time.sleep( int(delay) ) # If the quiet flag was given, output the final results. if quiet: for i in range( num_lines0 ): # Parse the current line to find the performance value (only # needed for call to re.sub() below). line = lines0[i] words = line.split() if perf_col == -1: perf = words[ len(words)-2 ] else: perf = words[ int(perf_col) ] # Compute stats for the current line. avgp = statistics.mean( aperf[i] ) maxp = max( aperf[i] ) minp = min( aperf[i] ) # Only compute stdev() when we have two or more data points. if len( aperf[i] ) > 1: stdp = statistics.stdev( aperf[i] ) else: stdp = 0.0 # Construct a string to match the performance value and then # use that string to search-and-replace with four format specs # for the min, avg, max, and stdev values computed above. search = '%8s' % perf newline = re.sub( str(search), ' %7.2f %7.2f %7.2f %6.2f', line ) # Search for the column index range that would be present if this were # matlab-compatible output. The index range will typically be 1:n, # where n is the number of columns of data. found_index = False for word in words: if re.match( '1:', word ): index_str = word found_index = True break # If we find the column index range, we need to update it to reflect # the replacement of one column of data with four, for a net increase # of columns. We do so via another instance of re.sub() in which we # search for the old index string and replace it with the new one. if found_index: last_col = int(index_str[2]) + 3 new_index_str = '1:%1s' % last_col newline = re.sub( index_str, new_index_str, newline ) # Output the results for the current line. print( newline % ( float(minp), float(avgp), float(maxp), float(stdp) ) ) # Flush stdout afterwards. sys.stdout.flush() # Return from main(). return 0 if __name__ == "__main__": main() blis-0.6.1/build/libblis-symbols.def000066400000000000000000001300761360743507500173550ustar00rootroot00000000000000EXPORTS bli_abort bli_absqsc bli_absqsc_check bli_absqsc_qfp bli_acquire_mij bli_acquire_mpart bli_acquire_mpart_b2t bli_acquire_mpart_br2tl bli_acquire_mpart_l2r bli_acquire_mpart_l2r_check bli_acquire_mpart_mdim bli_acquire_mpart_mndim bli_acquire_mpart_ndim bli_acquire_mpart_r2l bli_acquire_mpart_t2b bli_acquire_mpart_t2b_check bli_acquire_mpart_tl2br bli_acquire_mpart_tl2br_check bli_acquire_vi bli_acquire_vpart_b2f bli_acquire_vpart_f2b bli_addd bli_addd_check bli_addd_ex bli_addd_ex_qfp bli_addm bli_addm_check bli_addm_ex bli_addm_ex_qfp bli_addsc bli_addsc_check bli_addsc_qfp bli_addv bli_addv_check bli_addv_ex bli_addv_ex_qfp bli_adjust_strides bli_align_dim_to_mult bli_align_dim_to_size bli_align_ptr_to_size bli_amaxv bli_amaxv_check bli_amaxv_ex bli_amaxv_ex_qfp bli_apool_alloc_block bli_apool_array_elem bli_apool_checkin_array bli_apool_checkout_array bli_apool_finalize bli_apool_free_block bli_apool_grow bli_apool_init bli_arch_query_id bli_arch_set_id bli_arch_set_id_once bli_arch_string bli_array_elem bli_array_finalize bli_array_init bli_array_resize bli_array_set_elem bli_asumv bli_asumv_check bli_asumv_ex bli_asumv_ex_qfp bli_axpbyv bli_axpbyv_check bli_axpbyv_ex bli_axpbyv_ex_qfp bli_axpy2v bli_axpy2v_check bli_axpy2v_ex bli_axpy2v_ex_qfp bli_axpyd bli_axpyd_check bli_axpyd_ex bli_axpyd_ex_qfp bli_axpyf bli_axpyf_check bli_axpyf_ex bli_axpyf_ex_qfp bli_axpym bli_axpym_check bli_axpym_ex bli_axpym_ex_qfp bli_axpyv bli_axpyv_check bli_axpyv_ex bli_axpyv_ex_qfp bli_blksz_create bli_blksz_create_ed bli_blksz_free bli_blksz_init bli_blksz_init_easy bli_blksz_init_ed bli_blksz_reduce_def_to bli_blksz_reduce_max_to bli_cabsqsc bli_caddd bli_caddd_ex bli_caddm bli_caddm_ex bli_caddm_unb_var1 bli_caddsc bli_caddv bli_caddv_ex bli_calloc_intl bli_camaxv bli_camaxv_ex bli_castm bli_castm_check bli_castnzm bli_castnzm_check bli_castv bli_castv_check bli_casumv bli_casumv_ex bli_casumv_unb_var1 bli_caxpbyv bli_caxpbyv_ex bli_caxpy2v bli_caxpy2v_ex bli_caxpyd bli_caxpyd_ex bli_caxpyf bli_caxpyf_ex bli_caxpym bli_caxpym_ex bli_caxpym_unb_var1 bli_caxpyv bli_caxpyv_ex bli_cccastm bli_cccastnzm bli_cccastv bli_cccopysc bli_ccgemm_ker_var2_md bli_ccopyd bli_ccopyd_ex bli_ccopym bli_ccopym_ex bli_ccopym_unb_var1 bli_ccopyv bli_ccopyv_ex bli_ccpackm_blk_var1_md bli_ccpackm_cxk_1e_md bli_ccpackm_cxk_1r_md bli_ccpackm_struc_cxk_md bli_ccxpbym_md bli_ccxpbym_md_ex bli_ccxpbym_md_unb_var1 bli_cdcastm bli_cdcastnzm bli_cdcastv bli_cdcopysc bli_cdgemm_ker_var2_md bli_cdivsc bli_cdotaxpyv bli_cdotaxpyv_ex bli_cdotv bli_cdotv_ex bli_cdotxaxpyf bli_cdotxaxpyf_ex bli_cdotxf bli_cdotxf_ex bli_cdotxv bli_cdotxv_ex bli_cdpackm_blk_var1_md bli_cdpackm_cxk_1e_md bli_cdpackm_cxk_1r_md bli_cdpackm_struc_cxk_md bli_cdxpbym_md bli_cdxpbym_md_ex bli_cdxpbym_md_unb_var1 bli_cfprintm bli_cfprintv bli_cgemm bli_cgemm1m bli_cgemm3m1 bli_cgemm3mh bli_cgemm4m1 bli_cgemm4mb bli_cgemm4mb_ker_var2 bli_cgemm4mh bli_cgemm_ex bli_cgemm_ker_var2 bli_cgemm_md_c2r_ref bli_cgemmtrsm_l_ukernel bli_cgemmtrsm_u_ukernel bli_cgemm_ukernel bli_cgemv bli_cgemv_ex bli_cgemv_unb_var1 bli_cgemv_unb_var2 bli_cgemv_unf_var1 bli_cgemv_unf_var2 bli_cger bli_cger_ex bli_cger_unb_var1 bli_cger_unb_var2 bli_cgetijm bli_cgetsc bli_check_alignment_is_mult_of_ptr_size bli_check_alignment_is_power_of_two bli_check_conformal_dims bli_check_consistent_datatypes bli_check_consistent_object_datatypes bli_check_consistent_object_precisions bli_check_consistent_precisions bli_check_datatype_real_proj_of bli_check_equal_vector_lengths bli_check_error_code_helper bli_check_floating_datatype bli_check_floating_object bli_check_general_object bli_check_hermitian_object bli_check_if_exhausted_pool bli_check_integer_datatype bli_check_integer_object bli_check_level3_dims bli_check_matrix_object bli_check_matrix_strides bli_check_nonconstant_datatype bli_check_nonconstant_object bli_check_noninteger_datatype bli_check_noninteger_object bli_check_nonunit_diag bli_check_null_pointer bli_check_object_alias_of bli_check_object_buffer bli_check_object_diag_offset_equals bli_check_object_length_equals bli_check_object_real_proj_of bli_check_object_struc bli_check_object_valid_datatype bli_check_object_width_equals bli_check_packm_schema_on_unpack bli_check_packv_schema_on_unpack bli_check_real_datatype bli_check_real_object bli_check_real_valued_object bli_check_scalar_object bli_check_square_object bli_check_sufficient_stack_buf_size bli_check_symmetric_object bli_check_triangular_object bli_check_upper_or_lower_object bli_check_valid_1x3_subpart bli_check_valid_3x1_subpart bli_check_valid_3x3_subpart bli_check_valid_arch_id bli_check_valid_cntl bli_check_valid_datatype bli_check_valid_diag bli_check_valid_error_level bli_check_valid_kc_mod_mult bli_check_valid_malloc_buf bli_check_valid_mc_mod_mult bli_check_valid_nc_mod_mult bli_check_valid_packbuf bli_check_valid_side bli_check_valid_trans bli_check_valid_uplo bli_check_vector_dim_equals bli_check_vector_object bli_chemm bli_chemm1m bli_chemm3m1 bli_chemm3mh bli_chemm4m1 bli_chemm4mh bli_chemm_ex bli_chemv bli_chemv_ex bli_chemv_unb_var1 bli_chemv_unb_var2 bli_chemv_unb_var3 bli_chemv_unb_var4 bli_chemv_unf_var1 bli_chemv_unf_var1a bli_chemv_unf_var3 bli_chemv_unf_var3a bli_cher bli_cher2 bli_cher2_ex bli_cher2k bli_cher2k1m bli_cher2k3m1 bli_cher2k3mh bli_cher2k4m1 bli_cher2k4mh bli_cher2k_ex bli_cher2_unb_var1 bli_cher2_unb_var2 bli_cher2_unb_var3 bli_cher2_unb_var4 bli_cher2_unf_var1 bli_cher2_unf_var4 bli_cher_ex bli_cherk bli_cherk1m bli_cherk3m1 bli_cherk3mh bli_cherk4m1 bli_cherk4mh bli_cherk_ex bli_cherk_l_ker_var2 bli_cherk_u_ker_var2 bli_cher_unb_var1 bli_cher_unb_var2 bli_cinvertd bli_cinvertd_ex bli_cinvertsc bli_cinvertv bli_cinvertv_ex bli_clock bli_clock_helper bli_clock_min_diff bli_cmachval bli_cmkherm bli_cmkherm_ex bli_cmkherm_unb_var1 bli_cmksymm bli_cmksymm_ex bli_cmksymm_unb_var1 bli_cmktrim bli_cmktrim_ex bli_cmktrim_unb_var1 bli_cmulsc bli_cnorm1m bli_cnorm1m_ex bli_cnorm1m_unb_var1 bli_cnorm1v bli_cnorm1v_ex bli_cnorm1v_unb_var1 bli_cnormfm bli_cnormfm_ex bli_cnormfm_unb_var1 bli_cnormfsc bli_cnormfv bli_cnormfv_ex bli_cnormfv_unb_var1 bli_cnormim bli_cnormim_ex bli_cnormim_unb_var1 bli_cnormiv bli_cnormiv_ex bli_cnormiv_unb_var1 bli_cntl_calc_num_threads_in bli_cntl_clear_node bli_cntl_copy bli_cntl_create_node bli_cntl_free bli_cntl_free_node bli_cntl_free_wo_thrinfo bli_cntl_free_w_thrinfo bli_cntl_mark_family bli_cntx_1m_stage bli_cntx_3m1_stage bli_cntx_3mh_stage bli_cntx_4m1_stage bli_cntx_4mb_stage bli_cntx_4mh_stage bli_cntx_clear bli_cntx_ind_stage bli_cntx_nat_stage bli_cntx_print bli_cntx_set_blkszs bli_cntx_set_ind_blkszs bli_cntx_set_l1f_kers bli_cntx_set_l1v_kers bli_cntx_set_l3_nat_ukrs bli_cntx_set_packm_kers bli_copyd bli_copyd_check bli_copyd_ex bli_copyd_ex_qfp bli_copym bli_copym_check bli_copym_ex bli_copym_ex_qfp bli_copysc bli_copysc_check bli_copyv bli_copyv_check bli_copyv_ex bli_copyv_ex_qfp bli_cpackm_blk_var1 bli_cpackm_cxk bli_cpackm_cxk_1er bli_cpackm_cxk_3mis bli_cpackm_cxk_4mi bli_cpackm_cxk_rih bli_cpackm_herm_cxk bli_cpackm_herm_cxk_1er bli_cpackm_herm_cxk_3mis bli_cpackm_herm_cxk_4mi bli_cpackm_herm_cxk_rih bli_cpackm_struc_cxk bli_cpackm_struc_cxk_1er bli_cpackm_struc_cxk_3mis bli_cpackm_struc_cxk_4mi bli_cpackm_struc_cxk_rih bli_cpackm_tri_cxk bli_cpackm_tri_cxk_1er bli_cpackm_tri_cxk_3mis bli_cpackm_tri_cxk_4mi bli_cpackm_tri_cxk_rih bli_cpackm_unb_var1 bli_cprintm bli_cprintm_ex bli_cprintv bli_cprintv_ex bli_cpuid_is_bulldozer bli_cpuid_is_excavator bli_cpuid_is_haswell bli_cpuid_is_knl bli_cpuid_is_penryn bli_cpuid_is_piledriver bli_cpuid_is_sandybridge bli_cpuid_is_skx bli_cpuid_is_steamroller bli_cpuid_is_zen bli_cpuid_query bli_cpuid_query_id bli_crandm bli_crandm_ex bli_crandm_unb_var1 bli_crandnm bli_crandnm_ex bli_crandnm_unb_var1 bli_crandnv bli_crandnv_ex bli_crandnv_unb_var1 bli_crandv bli_crandv_ex bli_crandv_unb_var1 bli_cscal2d bli_cscal2d_ex bli_cscal2m bli_cscal2m_ex bli_cscal2m_unb_var1 bli_cscal2v bli_cscal2v_ex bli_cscald bli_cscald_ex bli_cscalm bli_cscalm_ex bli_cscalm_unb_var1 bli_cscalv bli_cscalv_ex bli_cscastm bli_cscastnzm bli_cscastv bli_cscopysc bli_csetd bli_csetd_ex bli_csetid bli_csetid_ex bli_csetijm bli_csetm bli_csetm_ex bli_csetm_unb_var1 bli_csetsc bli_csetv bli_csetv_ex bli_csgemm_ker_var2_md bli_cshiftd bli_cshiftd_ex bli_cspackm_blk_var1_md bli_cspackm_cxk_1e_md bli_cspackm_cxk_1r_md bli_cspackm_struc_cxk_md bli_csqrtsc bli_csubd bli_csubd_ex bli_csubm bli_csubm_ex bli_csubm_unb_var1 bli_csubsc bli_csubv bli_csubv_ex bli_csumsqv bli_csumsqv_ex bli_csumsqv_unb_var1 bli_cswapv bli_cswapv_ex bli_csxpbym_md bli_csxpbym_md_ex bli_csxpbym_md_unb_var1 bli_csymm bli_csymm1m bli_csymm3m1 bli_csymm3mh bli_csymm4m1 bli_csymm4mh bli_csymm_ex bli_csymv bli_csymv_ex bli_csyr bli_csyr2 bli_csyr2_ex bli_csyr2k bli_csyr2k1m bli_csyr2k3m1 bli_csyr2k3mh bli_csyr2k4m1 bli_csyr2k4mh bli_csyr2k_ex bli_csyr_ex bli_csyrk bli_csyrk1m bli_csyrk3m1 bli_csyrk3mh bli_csyrk4m1 bli_csyrk4mh bli_csyrk_ex bli_ctrmm bli_ctrmm1m bli_ctrmm3 bli_ctrmm31m bli_ctrmm33m1 bli_ctrmm33mh bli_ctrmm34m1 bli_ctrmm34mh bli_ctrmm3_ex bli_ctrmm3m1 bli_ctrmm4m1 bli_ctrmm_ex bli_ctrmm_ll_ker_var2 bli_ctrmm_lu_ker_var2 bli_ctrmm_rl_ker_var2 bli_ctrmm_ru_ker_var2 bli_ctrmv bli_ctrmv_ex bli_ctrmv_unb_var1 bli_ctrmv_unb_var2 bli_ctrmv_unf_var1 bli_ctrmv_unf_var2 bli_ctrsm bli_ctrsm1m bli_ctrsm3m1 bli_ctrsm4m1 bli_ctrsm_ex bli_ctrsm_ll_ker_var2 bli_ctrsm_l_ukernel bli_ctrsm_lu_ker_var2 bli_ctrsm_rl_ker_var2 bli_ctrsm_ru_ker_var2 bli_ctrsm_u_ukernel bli_ctrsv bli_ctrsv_ex bli_ctrsv_unb_var1 bli_ctrsv_unb_var2 bli_ctrsv_unf_var1 bli_ctrsv_unf_var2 bli_cunpackm_blk_var1 bli_cunpackm_cxk bli_cunpackm_unb_var1 bli_cunzipsc bli_cxpbyd bli_cxpbyd_ex bli_cxpbym bli_cxpbym_ex bli_cxpbym_unb_var1 bli_cxpbyv bli_cxpbyv_ex bli_czcastm bli_czcastnzm bli_czcastv bli_czcopysc bli_czgemm_ker_var2_md bli_czipsc bli_czpackm_blk_var1_md bli_czpackm_cxk_1e_md bli_czpackm_cxk_1r_md bli_czpackm_struc_cxk_md bli_czxpbym_md bli_czxpbym_md_ex bli_czxpbym_md_unb_var1 bli_dabsqsc bli_daddd bli_daddd_ex bli_daddm bli_daddm_ex bli_daddm_unb_var1 bli_daddsc bli_daddv bli_daddv_ex bli_damaxv bli_damaxv_ex bli_dasumv bli_dasumv_ex bli_dasumv_unb_var1 bli_daxpbyv bli_daxpbyv_ex bli_daxpy2v bli_daxpy2v_ex bli_daxpyd bli_daxpyd_ex bli_daxpyf bli_daxpyf_ex bli_daxpym bli_daxpym_ex bli_daxpym_unb_var1 bli_daxpyv bli_daxpyv_ex bli_dccastm bli_dccastnzm bli_dccastv bli_dccopysc bli_dcgemm_ker_var2_md bli_dcopyd bli_dcopyd_ex bli_dcopym bli_dcopym_ex bli_dcopym_unb_var1 bli_dcopyv bli_dcopyv_ex bli_dcpackm_blk_var1_md bli_dcpackm_cxk_1e_md bli_dcpackm_cxk_1r_md bli_dcpackm_struc_cxk_md bli_dcxpbym_md bli_dcxpbym_md_ex bli_dcxpbym_md_unb_var1 bli_ddcastm bli_ddcastnzm bli_ddcastv bli_ddcopysc bli_ddgemm_ker_var2_md bli_ddivsc bli_ddotaxpyv bli_ddotaxpyv_ex bli_ddotv bli_ddotv_ex bli_ddotxaxpyf bli_ddotxaxpyf_ex bli_ddotxf bli_ddotxf_ex bli_ddotxv bli_ddotxv_ex bli_ddpackm_blk_var1_md bli_ddpackm_cxk_1e_md bli_ddpackm_cxk_1r_md bli_ddpackm_struc_cxk_md bli_ddxpbym_md bli_ddxpbym_md_ex bli_ddxpbym_md_unb_var1 bli_determine_blocksize bli_determine_blocksize_b bli_determine_blocksize_b_sub bli_determine_blocksize_f bli_determine_blocksize_f_sub bli_dfprintm bli_dfprintv bli_dgemm bli_dgemm1m bli_dgemm3m1 bli_dgemm3mh bli_dgemm4m1 bli_dgemm4mb bli_dgemm4mb_ker_var2 bli_dgemm4mh bli_dgemm_ex bli_dgemm_ker_var2 bli_dgemmtrsm_l_ukernel bli_dgemmtrsm_u_ukernel bli_dgemm_ukernel bli_dgemv bli_dgemv_ex bli_dgemv_unb_var1 bli_dgemv_unb_var2 bli_dgemv_unf_var1 bli_dgemv_unf_var2 bli_dger bli_dger_ex bli_dger_unb_var1 bli_dger_unb_var2 bli_dgetijm bli_dgetsc bli_dhemm bli_dhemm1m bli_dhemm3m1 bli_dhemm3mh bli_dhemm4m1 bli_dhemm4mh bli_dhemm_ex bli_dhemv bli_dhemv_ex bli_dhemv_unb_var1 bli_dhemv_unb_var2 bli_dhemv_unb_var3 bli_dhemv_unb_var4 bli_dhemv_unf_var1 bli_dhemv_unf_var1a bli_dhemv_unf_var3 bli_dhemv_unf_var3a bli_dher bli_dher2 bli_dher2_ex bli_dher2k bli_dher2k1m bli_dher2k3m1 bli_dher2k3mh bli_dher2k4m1 bli_dher2k4mh bli_dher2k_ex bli_dher2_unb_var1 bli_dher2_unb_var2 bli_dher2_unb_var3 bli_dher2_unb_var4 bli_dher2_unf_var1 bli_dher2_unf_var4 bli_dher_ex bli_dherk bli_dherk1m bli_dherk3m1 bli_dherk3mh bli_dherk4m1 bli_dherk4mh bli_dherk_ex bli_dherk_l_ker_var2 bli_dherk_u_ker_var2 bli_dher_unb_var1 bli_dher_unb_var2 bli_dinvertd bli_dinvertd_ex bli_dinvertsc bli_dinvertv bli_dinvertv_ex bli_divsc bli_divsc_check bli_divsc_qfp bli_dlamch bli_dmachval bli_dmkherm bli_dmkherm_ex bli_dmkherm_unb_var1 bli_dmksymm bli_dmksymm_ex bli_dmksymm_unb_var1 bli_dmktrim bli_dmktrim_ex bli_dmktrim_unb_var1 bli_dmulsc bli_dnorm1m bli_dnorm1m_ex bli_dnorm1m_unb_var1 bli_dnorm1v bli_dnorm1v_ex bli_dnorm1v_unb_var1 bli_dnormfm bli_dnormfm_ex bli_dnormfm_unb_var1 bli_dnormfsc bli_dnormfv bli_dnormfv_ex bli_dnormfv_unb_var1 bli_dnormim bli_dnormim_ex bli_dnormim_unb_var1 bli_dnormiv bli_dnormiv_ex bli_dnormiv_unb_var1 bli_dotaxpyv bli_dotaxpyv_check bli_dotaxpyv_ex bli_dotaxpyv_ex_qfp bli_dotv bli_dotv_check bli_dotv_ex bli_dotv_ex_qfp bli_dotxaxpyf bli_dotxaxpyf_check bli_dotxaxpyf_ex bli_dotxaxpyf_ex_qfp bli_dotxf bli_dotxf_check bli_dotxf_ex bli_dotxf_ex_qfp bli_dotxv bli_dotxv_check bli_dotxv_ex bli_dotxv_ex_qfp bli_dpackm_blk_var1 bli_dpackm_cxk bli_dpackm_herm_cxk bli_dpackm_struc_cxk bli_dpackm_tri_cxk bli_dpackm_unb_var1 bli_dprintm bli_dprintm_ex bli_dprintv bli_dprintv_ex bli_drandm bli_drandm_ex bli_drandm_unb_var1 bli_drandnm bli_drandnm_ex bli_drandnm_unb_var1 bli_drandnv bli_drandnv_ex bli_drandnv_unb_var1 bli_drandv bli_drandv_ex bli_drandv_unb_var1 bli_dscal2d bli_dscal2d_ex bli_dscal2m bli_dscal2m_ex bli_dscal2m_unb_var1 bli_dscal2v bli_dscal2v_ex bli_dscald bli_dscald_ex bli_dscalm bli_dscalm_ex bli_dscalm_unb_var1 bli_dscalv bli_dscalv_ex bli_dscastm bli_dscastnzm bli_dscastv bli_dscopysc bli_dsetd bli_dsetd_ex bli_dsetid bli_dsetid_ex bli_dsetijm bli_dsetm bli_dsetm_ex bli_dsetm_unb_var1 bli_dsetsc bli_dsetv bli_dsetv_ex bli_dsgemm_ker_var2_md bli_dshiftd bli_dshiftd_ex bli_dspackm_blk_var1_md bli_dspackm_cxk_1e_md bli_dspackm_cxk_1r_md bli_dspackm_struc_cxk_md bli_dsqrtsc bli_dsubd bli_dsubd_ex bli_dsubm bli_dsubm_ex bli_dsubm_unb_var1 bli_dsubsc bli_dsubv bli_dsubv_ex bli_dsumsqv bli_dsumsqv_ex bli_dsumsqv_unb_var1 bli_dswapv bli_dswapv_ex bli_dsxpbym_md bli_dsxpbym_md_ex bli_dsxpbym_md_unb_var1 bli_dsymm bli_dsymm1m bli_dsymm3m1 bli_dsymm3mh bli_dsymm4m1 bli_dsymm4mh bli_dsymm_ex bli_dsymv bli_dsymv_ex bli_dsyr bli_dsyr2 bli_dsyr2_ex bli_dsyr2k bli_dsyr2k1m bli_dsyr2k3m1 bli_dsyr2k3mh bli_dsyr2k4m1 bli_dsyr2k4mh bli_dsyr2k_ex bli_dsyr_ex bli_dsyrk bli_dsyrk1m bli_dsyrk3m1 bli_dsyrk3mh bli_dsyrk4m1 bli_dsyrk4mh bli_dsyrk_ex bli_dtrmm bli_dtrmm1m bli_dtrmm3 bli_dtrmm31m bli_dtrmm33m1 bli_dtrmm33mh bli_dtrmm34m1 bli_dtrmm34mh bli_dtrmm3_ex bli_dtrmm3m1 bli_dtrmm4m1 bli_dtrmm_ex bli_dtrmm_ll_ker_var2 bli_dtrmm_lu_ker_var2 bli_dtrmm_rl_ker_var2 bli_dtrmm_ru_ker_var2 bli_dtrmv bli_dtrmv_ex bli_dtrmv_unb_var1 bli_dtrmv_unb_var2 bli_dtrmv_unf_var1 bli_dtrmv_unf_var2 bli_dtrsm bli_dtrsm1m bli_dtrsm3m1 bli_dtrsm4m1 bli_dtrsm_ex bli_dtrsm_ll_ker_var2 bli_dtrsm_l_ukernel bli_dtrsm_lu_ker_var2 bli_dtrsm_rl_ker_var2 bli_dtrsm_ru_ker_var2 bli_dtrsm_u_ukernel bli_dtrsv bli_dtrsv_ex bli_dtrsv_unb_var1 bli_dtrsv_unb_var2 bli_dtrsv_unf_var1 bli_dtrsv_unf_var2 bli_dt_size bli_dt_size_check bli_dt_string bli_dt_string_check bli_dt_union_check bli_dunpackm_blk_var1 bli_dunpackm_cxk bli_dunpackm_unb_var1 bli_dunzipsc bli_dxpbyd bli_dxpbyd_ex bli_dxpbym bli_dxpbym_ex bli_dxpbym_unb_var1 bli_dxpbyv bli_dxpbyv_ex bli_dzcastm bli_dzcastnzm bli_dzcastv bli_dzcopysc bli_dzgemm_ker_var2_md bli_dzipsc bli_dzpackm_blk_var1_md bli_dzpackm_cxk_1e_md bli_dzpackm_cxk_1r_md bli_dzpackm_struc_cxk_md bli_dzxpbym_md bli_dzxpbym_md_ex bli_dzxpbym_md_unb_var1 bli_error_checking_is_enabled bli_error_checking_level bli_error_checking_level_set bli_error_string_for_code bli_ffree_align bli_ffree_noalign bli_finalize bli_finalize_apis bli_finalize_auto bli_finalize_once bli_find_area_trap_l bli_fmalloc_align bli_fmalloc_align_check bli_fmalloc_noalign bli_fmalloc_post_check bli_fprintm bli_fprintm_check bli_fprintm_ex bli_fprintm_qfp bli_fprintv bli_fprintv_check bli_fprintv_ex bli_fprintv_qfp bli_free_intl bli_free_user bli_func_create bli_func_free bli_func_init bli_func_init_null bli_func_is_null bli_func_is_null_dt bli_gcd bli_gemm bli_gemm1m bli_gemm3m1 bli_gemm3mh bli_gemm4m1 bli_gemm4mb bli_gemm4mb_ker_var2 bli_gemm4mh bli_gemm_basic_check bli_gemm_blk_var1 bli_gemm_blk_var2 bli_gemm_blk_var3 bli_gemmbp_cntl_create bli_gemm_check bli_gemm_cntl_create bli_gemm_cntl_create_node bli_gemm_cntl_free bli_gemm_determine_kc bli_gemm_determine_kc_b bli_gemm_determine_kc_f bli_gemm_direct bli_gemm_ex bli_gemm_front bli_gemmind bli_gemmind_get_avail bli_gemm_int bli_gemm_ker_var2 bli_gemm_ker_var2_md bli_gemm_md bli_gemm_md_ccc bli_gemm_md_ccr bli_gemm_md_crc bli_gemm_md_crr bli_gemm_md_rcc bli_gemm_md_rcr bli_gemm_md_rrc bli_gemm_md_rrr bli_gemmnat bli_gemm_packa bli_gemm_packb bli_gemm_prune_unref_mparts_k bli_gemm_prune_unref_mparts_m bli_gemm_prune_unref_mparts_n bli_gemmtrsm_l_ukernel_qfp bli_gemmtrsm_ukernel bli_gemmtrsm_u_ukernel_qfp bli_gemm_ukernel bli_gemm_ukernel_qfp bli_gemv bli_gemv_check bli_gemv_ex bli_gemv_ex_qfp bli_gemv_unb_var1 bli_gemv_unb_var1_qfp bli_gemv_unb_var2 bli_gemv_unb_var2_qfp bli_gemv_unf_var1 bli_gemv_unf_var1_qfp bli_gemv_unf_var2 bli_gemv_unf_var2_qfp bli_ger bli_ger_check bli_ger_ex bli_ger_ex_qfp bli_ger_unb_var1 bli_ger_unb_var1_qfp bli_ger_unb_var2 bli_ger_unb_var2_qfp bli_getijm bli_getopt bli_getopt_init_state bli_getsc bli_getsc_check bli_getsc_qfp bli_gks_cntx_l3_nat_ukr_is_ref bli_gks_finalize bli_gks_init bli_gks_init_index bli_gks_init_ref_cntx bli_gks_l3_ukr_impl_string bli_gks_l3_ukr_impl_type bli_gks_lookup_ind_cntx bli_gks_lookup_nat_cntx bli_gks_query_cntx bli_gks_query_cntx_noinit bli_gks_query_ind_cntx bli_gks_query_nat_cntx bli_gks_register_cntx bli_hemm bli_hemm1m bli_hemm3m1 bli_hemm3mh bli_hemm4m1 bli_hemm4mh bli_hemm_basic_check bli_hemm_check bli_hemm_ex bli_hemm_front bli_hemmind bli_hemmind_get_avail bli_hemmnat bli_hemv bli_hemv_check bli_hemv_ex bli_hemv_ex_qfp bli_hemv_unb_var1 bli_hemv_unb_var1_qfp bli_hemv_unb_var2 bli_hemv_unb_var2_qfp bli_hemv_unb_var3 bli_hemv_unb_var3_qfp bli_hemv_unb_var4 bli_hemv_unb_var4_qfp bli_hemv_unf_var1 bli_hemv_unf_var1a bli_hemv_unf_var1a_qfp bli_hemv_unf_var1_qfp bli_hemv_unf_var3 bli_hemv_unf_var3a bli_hemv_unf_var3a_qfp bli_hemv_unf_var3_qfp bli_her bli_her2 bli_her2_check bli_her2_ex bli_her2_ex_qfp bli_her2k bli_her2k1m bli_her2k3m1 bli_her2k3mh bli_her2k4m1 bli_her2k4mh bli_her2k_basic_check bli_her2k_check bli_her2k_ex bli_her2k_front bli_her2kind bli_her2kind_get_avail bli_her2knat bli_her2_unb_var1 bli_her2_unb_var1_qfp bli_her2_unb_var2 bli_her2_unb_var2_qfp bli_her2_unb_var3 bli_her2_unb_var3_qfp bli_her2_unb_var4 bli_her2_unb_var4_qfp bli_her2_unf_var1 bli_her2_unf_var1_qfp bli_her2_unf_var4 bli_her2_unf_var4_qfp bli_her_check bli_her_ex bli_her_ex_qfp bli_herk bli_herk1m bli_herk3m1 bli_herk3mh bli_herk4m1 bli_herk4mh bli_herk_basic_check bli_herk_check bli_herk_determine_kc bli_herk_determine_kc_b bli_herk_determine_kc_f bli_herk_direct bli_herk_ex bli_herk_front bli_herkind bli_herkind_get_avail bli_herk_l_ker_var2 bli_herknat bli_herk_prune_unref_mparts_k bli_herk_prune_unref_mparts_m bli_herk_prune_unref_mparts_n bli_herk_u_ker_var2 bli_herk_x_ker_var2 bli_her_unb_var1 bli_her_unb_var1_qfp bli_her_unb_var2 bli_her_unb_var2_qfp bli_ifprintm bli_ifprintv bli_igetsc bli_ind_disable bli_ind_disable_all bli_ind_disable_all_dt bli_ind_disable_dt bli_ind_enable bli_ind_enable_dt bli_ind_finalize bli_ind_get_impl_string bli_ind_init bli_ind_map_cdt_to_index bli_ind_oper_enable_only bli_ind_oper_find_avail bli_ind_oper_get_avail bli_ind_oper_get_avail_impl_string bli_ind_oper_is_impl bli_info_get_blas_int_type_size bli_info_get_enable_blas bli_info_get_enable_cblas bli_info_get_enable_memkind bli_info_get_enable_openmp bli_info_get_enable_pba_pools bli_info_get_enable_pthreads bli_info_get_enable_sandbox bli_info_get_enable_sba_pools bli_info_get_enable_stay_auto_init bli_info_get_enable_threading bli_info_get_gemm_impl_string bli_info_get_gemmtrsm_l_ukr_impl_string bli_info_get_gemmtrsm_u_ukr_impl_string bli_info_get_gemm_ukr_impl_string bli_info_get_heap_addr_align_size bli_info_get_heap_stride_align_size bli_info_get_hemm_impl_string bli_info_get_her2k_impl_string bli_info_get_herk_impl_string bli_info_get_int_type_size bli_info_get_int_type_size_str bli_info_get_max_type_size bli_info_get_num_fp_types bli_info_get_page_size bli_info_get_pool_addr_align_size bli_info_get_simd_align_size bli_info_get_simd_num_registers bli_info_get_simd_size bli_info_get_stack_buf_align_size bli_info_get_stack_buf_max_size bli_info_get_symm_impl_string bli_info_get_syr2k_impl_string bli_info_get_syrk_impl_string bli_info_get_thread_part_jrir_rr bli_info_get_thread_part_jrir_slab bli_info_get_trmm3_impl_string bli_info_get_trmm_impl_string bli_info_get_trsm_impl_string bli_info_get_trsm_l_ukr_impl_string bli_info_get_trsm_u_ukr_impl_string bli_info_get_version_str bli_init bli_init_apis bli_init_auto bli_init_once bli_invertd bli_invertd_check bli_invertd_ex bli_invertd_ex_qfp bli_invertsc bli_invertsc_check bli_invertsc_qfp bli_invertv bli_invertv_check bli_invertv_ex bli_invertv_ex_qfp bli_ipow bli_iprintm bli_iprintm_ex bli_iprintv bli_iprintv_ex bli_isetsc bli_l0_xsc_check bli_l0_xx2sc_check bli_l0_xxsc_check bli_l1d_ax_check bli_l1d_axy_check bli_l1d_x_check bli_l1d_xy_check bli_l1m_ax_check bli_l1m_axy_check bli_l1m_xy_check bli_l1v_axby_check bli_l1v_ax_check bli_l1v_axy_check bli_l1v_dot_check bli_l1v_xby_check bli_l1v_x_check bli_l1v_xi_check bli_l1v_xy_check bli_l3_basic_check bli_l3_cntl_create_if bli_l3_cntl_free bli_l3_determine_kc bli_l3_direct bli_l3_ind_oper_enable_only bli_l3_ind_oper_find_avail bli_l3_ind_oper_get_enable bli_l3_ind_oper_get_func bli_l3_ind_oper_set_enable bli_l3_ind_oper_set_enable_all bli_l3_ind_set_enable_dt bli_l3_packm bli_l3_prune_unref_mparts_k bli_l3_prune_unref_mparts_m bli_l3_prune_unref_mparts_n bli_l3_thread_decorator bli_l3_thread_entry bli_l3_thrinfo_create_root bli_l3_thrinfo_free bli_l3_thrinfo_free_paths bli_l3_thrinfo_init_single bli_l3_thrinfo_print_gemm_paths bli_l3_thrinfo_print_trsm_paths bli_lcm bli_lsame bli_machval bli_malloc_intl bli_malloc_user bli_mbool_create bli_mbool_free bli_mbool_init bli_membrk_acquire_m bli_membrk_compute_pool_block_sizes bli_membrk_compute_pool_block_sizes_dt bli_membrk_finalize bli_membrk_finalize_pools bli_membrk_init bli_membrk_init_pools bli_membrk_pool_size bli_membrk_query bli_membrk_release bli_membrk_rntm_set_membrk bli_memsys_finalize bli_memsys_init bli_mkherm bli_mkherm_check bli_mkherm_ex bli_mkherm_ex_qfp bli_mksymm bli_mksymm_check bli_mksymm_ex bli_mksymm_ex_qfp bli_mktrim bli_mktrim_check bli_mktrim_ex bli_mktrim_ex_qfp bli_mulsc bli_mulsc_check bli_mulsc_qfp bli_next_prime_factor bli_norm1m bli_norm1m_check bli_norm1m_ex bli_norm1m_ex_qfp bli_norm1v bli_norm1v_check bli_norm1v_ex bli_norm1v_ex_qfp bli_normfm bli_normfm_check bli_normfm_ex bli_normfm_ex_qfp bli_normfsc bli_normfsc_check bli_normfsc_qfp bli_normfv bli_normfv_check bli_normfv_ex bli_normfv_ex_qfp bli_normim bli_normim_check bli_normim_ex bli_normim_ex_qfp bli_normiv bli_normiv_check bli_normiv_ex bli_normiv_ex_qfp bli_obj_alloc_buffer bli_obj_alloc_buffer_check bli_obj_attach_buffer bli_obj_attach_buffer_check bli_obj_create bli_obj_create_1x1 bli_obj_create_1x1_with_attached_buffer bli_obj_create_check bli_obj_create_conf_to bli_obj_create_const_check bli_obj_create_scalar_check bli_obj_create_with_attached_buffer bli_obj_create_without_buffer bli_obj_create_without_buffer_check bli_obj_equals bli_obj_free bli_obj_free_check bli_obj_imag_equals bli_obj_imag_is_zero bli_obj_print bli_obj_print_check bli_obj_scalar_apply_scalar bli_obj_scalar_attach bli_obj_scalar_cast_to bli_obj_scalar_detach bli_obj_scalar_equals bli_obj_scalar_has_nonzero_imag bli_obj_scalar_init_detached bli_obj_scalar_init_detached_copy_of bli_obj_scalar_reset bli_packm_acquire_mpart_l2r bli_packm_acquire_mpart_t2b bli_packm_acquire_mpart_tl2br bli_packm_blk_var1 bli_packm_blk_var1_md bli_packm_cntl_create_node bli_packm_init bli_packm_init_check bli_packm_init_pack bli_packm_int bli_packm_int_check bli_packm_offset_to_panel_for bli_packm_thrinfo_init bli_packm_thrinfo_init_single bli_packm_unb_var1 bli_param_map_blis_to_char_conj bli_param_map_blis_to_char_diag bli_param_map_blis_to_char_dt bli_param_map_blis_to_char_side bli_param_map_blis_to_char_trans bli_param_map_blis_to_char_uplo bli_param_map_blis_to_netlib_diag bli_param_map_blis_to_netlib_machval bli_param_map_blis_to_netlib_side bli_param_map_blis_to_netlib_trans bli_param_map_blis_to_netlib_uplo bli_param_map_char_to_blis_conj bli_param_map_char_to_blis_diag bli_param_map_char_to_blis_dt bli_param_map_char_to_blis_side bli_param_map_char_to_blis_trans bli_param_map_char_to_blis_uplo bli_param_map_netlib_to_blis_diag bli_param_map_netlib_to_blis_side bli_param_map_netlib_to_blis_trans bli_param_map_netlib_to_blis_uplo bli_partition_2x2 bli_pblk_print bli_pool_alloc_block bli_pool_checkin_block bli_pool_checkout_block bli_pool_finalize bli_pool_free_block bli_pool_grow bli_pool_init bli_pool_print bli_pool_reinit bli_pool_shrink bli_prime_factorization bli_printm bli_printm_ex bli_print_msg bli_printv bli_printv_ex bli_projm bli_projm_check bli_projv bli_projv_check bli_prune_unref_mparts bli_pthread_barrier_destroy bli_pthread_barrier_init bli_pthread_barrier_wait bli_pthread_cond_broadcast bli_pthread_cond_destroy bli_pthread_cond_init bli_pthread_cond_wait bli_pthread_create bli_pthread_join bli_pthread_mutex_destroy bli_pthread_mutex_init bli_pthread_mutex_lock bli_pthread_mutex_trylock bli_pthread_mutex_unlock bli_pthread_once bli_randm bli_randm_check bli_randm_ex bli_randm_ex_qfp bli_randnm bli_randnm_check bli_randnm_ex bli_randnm_ex_qfp bli_randnv bli_randnv_check bli_randnv_ex bli_randnv_ex_qfp bli_randv bli_randv_check bli_randv_ex bli_randv_ex_qfp bli_rntm_print bli_rntm_set_ways_for_op bli_rntm_set_ways_from_rntm bli_sabsqsc bli_saddd bli_saddd_ex bli_saddm bli_saddm_ex bli_saddm_unb_var1 bli_saddsc bli_saddv bli_saddv_ex bli_samaxv bli_samaxv_ex bli_sasumv bli_sasumv_ex bli_sasumv_unb_var1 bli_saxpbyv bli_saxpbyv_ex bli_saxpy2v bli_saxpy2v_ex bli_saxpyd bli_saxpyd_ex bli_saxpyf bli_saxpyf_ex bli_saxpym bli_saxpym_ex bli_saxpym_unb_var1 bli_saxpyv bli_saxpyv_ex bli_sba_acquire bli_sba_checkin_array bli_sba_checkout_array bli_sba_finalize bli_sba_init bli_sba_query bli_sba_release bli_sba_rntm_set_pool bli_scal2d bli_scal2d_check bli_scal2d_ex bli_scal2d_ex_qfp bli_scal2m bli_scal2m_check bli_scal2m_ex bli_scal2m_ex_qfp bli_scal2v bli_scal2v_check bli_scal2v_ex bli_scal2v_ex_qfp bli_scald bli_scald_check bli_scald_ex bli_scald_ex_qfp bli_scalm bli_scalm_check bli_scalm_ex bli_scalm_ex_qfp bli_scalv bli_scalv_check bli_scalv_ex bli_scalv_ex_qfp bli_sccastm bli_sccastnzm bli_sccastv bli_sccopysc bli_scgemm_ker_var2_md bli_scopyd bli_scopyd_ex bli_scopym bli_scopym_ex bli_scopym_unb_var1 bli_scopyv bli_scopyv_ex bli_scpackm_blk_var1_md bli_scpackm_cxk_1e_md bli_scpackm_cxk_1r_md bli_scpackm_struc_cxk_md bli_scxpbym_md bli_scxpbym_md_ex bli_scxpbym_md_unb_var1 bli_sdcastm bli_sdcastnzm bli_sdcastv bli_sdcopysc bli_sdgemm_ker_var2_md bli_sdivsc bli_sdotaxpyv bli_sdotaxpyv_ex bli_sdotv bli_sdotv_ex bli_sdotxaxpyf bli_sdotxaxpyf_ex bli_sdotxf bli_sdotxf_ex bli_sdotxv bli_sdotxv_ex bli_sdpackm_blk_var1_md bli_sdpackm_cxk_1e_md bli_sdpackm_cxk_1r_md bli_sdpackm_struc_cxk_md bli_sdxpbym_md bli_sdxpbym_md_ex bli_sdxpbym_md_unb_var1 bli_setd bli_setd_check bli_setd_ex bli_setd_ex_qfp bli_setid bli_setid_check bli_setid_ex bli_setid_ex_qfp bli_setijm bli_setim bli_setiv bli_setm bli_setm_check bli_setm_ex bli_setm_ex_qfp bli_setrm bli_setrv bli_setsc bli_setsc_check bli_setsc_qfp bli_setv bli_setv_check bli_setv_ex bli_setv_ex_qfp bli_sfprintm bli_sfprintv bli_sgemm bli_sgemm1m bli_sgemm3m1 bli_sgemm3mh bli_sgemm4m1 bli_sgemm4mb bli_sgemm4mb_ker_var2 bli_sgemm4mh bli_sgemm_ex bli_sgemm_ker_var2 bli_sgemmtrsm_l_ukernel bli_sgemmtrsm_u_ukernel bli_sgemm_ukernel bli_sgemv bli_sgemv_ex bli_sgemv_unb_var1 bli_sgemv_unb_var2 bli_sgemv_unf_var1 bli_sgemv_unf_var2 bli_sger bli_sger_ex bli_sger_unb_var1 bli_sger_unb_var2 bli_sgetijm bli_sgetsc bli_shemm bli_shemm1m bli_shemm3m1 bli_shemm3mh bli_shemm4m1 bli_shemm4mh bli_shemm_ex bli_shemv bli_shemv_ex bli_shemv_unb_var1 bli_shemv_unb_var2 bli_shemv_unb_var3 bli_shemv_unb_var4 bli_shemv_unf_var1 bli_shemv_unf_var1a bli_shemv_unf_var3 bli_shemv_unf_var3a bli_sher bli_sher2 bli_sher2_ex bli_sher2k bli_sher2k1m bli_sher2k3m1 bli_sher2k3mh bli_sher2k4m1 bli_sher2k4mh bli_sher2k_ex bli_sher2_unb_var1 bli_sher2_unb_var2 bli_sher2_unb_var3 bli_sher2_unb_var4 bli_sher2_unf_var1 bli_sher2_unf_var4 bli_sher_ex bli_sherk bli_sherk1m bli_sherk3m1 bli_sherk3mh bli_sherk4m1 bli_sherk4mh bli_sherk_ex bli_sherk_l_ker_var2 bli_sherk_u_ker_var2 bli_sher_unb_var1 bli_sher_unb_var2 bli_shiftd bli_shiftd_check bli_shiftd_ex bli_shiftd_ex_qfp bli_sinvertd bli_sinvertd_ex bli_sinvertsc bli_sinvertv bli_sinvertv_ex bli_slamch bli_sleep bli_smachval bli_smkherm bli_smkherm_ex bli_smkherm_unb_var1 bli_smksymm bli_smksymm_ex bli_smksymm_unb_var1 bli_smktrim bli_smktrim_ex bli_smktrim_unb_var1 bli_smulsc bli_snorm1m bli_snorm1m_ex bli_snorm1m_unb_var1 bli_snorm1v bli_snorm1v_ex bli_snorm1v_unb_var1 bli_snormfm bli_snormfm_ex bli_snormfm_unb_var1 bli_snormfsc bli_snormfv bli_snormfv_ex bli_snormfv_unb_var1 bli_snormim bli_snormim_ex bli_snormim_unb_var1 bli_snormiv bli_snormiv_ex bli_snormiv_unb_var1 bli_spackm_blk_var1 bli_spackm_cxk bli_spackm_herm_cxk bli_spackm_struc_cxk bli_spackm_tri_cxk bli_spackm_unb_var1 bli_sprintm bli_sprintm_ex bli_sprintv bli_sprintv_ex bli_sqrtsc bli_sqrtsc_check bli_sqrtsc_qfp bli_srandm bli_srandm_ex bli_srandm_unb_var1 bli_srandnm bli_srandnm_ex bli_srandnm_unb_var1 bli_srandnv bli_srandnv_ex bli_srandnv_unb_var1 bli_srandv bli_srandv_ex bli_srandv_unb_var1 bli_sscal2d bli_sscal2d_ex bli_sscal2m bli_sscal2m_ex bli_sscal2m_unb_var1 bli_sscal2v bli_sscal2v_ex bli_sscald bli_sscald_ex bli_sscalm bli_sscalm_ex bli_sscalm_unb_var1 bli_sscalv bli_sscalv_ex bli_sscastm bli_sscastnzm bli_sscastv bli_sscopysc bli_ssetd bli_ssetd_ex bli_ssetid bli_ssetid_ex bli_ssetijm bli_ssetm bli_ssetm_ex bli_ssetm_unb_var1 bli_ssetsc bli_ssetv bli_ssetv_ex bli_ssgemm_ker_var2_md bli_sshiftd bli_sshiftd_ex bli_sspackm_blk_var1_md bli_sspackm_cxk_1e_md bli_sspackm_cxk_1r_md bli_sspackm_struc_cxk_md bli_ssqrtsc bli_ssubd bli_ssubd_ex bli_ssubm bli_ssubm_ex bli_ssubm_unb_var1 bli_ssubsc bli_ssubv bli_ssubv_ex bli_ssumsqv bli_ssumsqv_ex bli_ssumsqv_unb_var1 bli_sswapv bli_sswapv_ex bli_ssxpbym_md bli_ssxpbym_md_ex bli_ssxpbym_md_unb_var1 bli_ssymm bli_ssymm1m bli_ssymm3m1 bli_ssymm3mh bli_ssymm4m1 bli_ssymm4mh bli_ssymm_ex bli_ssymv bli_ssymv_ex bli_ssyr bli_ssyr2 bli_ssyr2_ex bli_ssyr2k bli_ssyr2k1m bli_ssyr2k3m1 bli_ssyr2k3mh bli_ssyr2k4m1 bli_ssyr2k4mh bli_ssyr2k_ex bli_ssyr_ex bli_ssyrk bli_ssyrk1m bli_ssyrk3m1 bli_ssyrk3mh bli_ssyrk4m1 bli_ssyrk4mh bli_ssyrk_ex bli_string_mkupper bli_strmm bli_strmm1m bli_strmm3 bli_strmm31m bli_strmm33m1 bli_strmm33mh bli_strmm34m1 bli_strmm34mh bli_strmm3_ex bli_strmm3m1 bli_strmm4m1 bli_strmm_ex bli_strmm_ll_ker_var2 bli_strmm_lu_ker_var2 bli_strmm_rl_ker_var2 bli_strmm_ru_ker_var2 bli_strmv bli_strmv_ex bli_strmv_unb_var1 bli_strmv_unb_var2 bli_strmv_unf_var1 bli_strmv_unf_var2 bli_strsm bli_strsm1m bli_strsm3m1 bli_strsm4m1 bli_strsm_ex bli_strsm_ll_ker_var2 bli_strsm_l_ukernel bli_strsm_lu_ker_var2 bli_strsm_rl_ker_var2 bli_strsm_ru_ker_var2 bli_strsm_u_ukernel bli_strsv bli_strsv_ex bli_strsv_unb_var1 bli_strsv_unb_var2 bli_strsv_unf_var1 bli_strsv_unf_var2 bli_subd bli_subd_check bli_subd_ex bli_subd_ex_qfp bli_subm bli_subm_check bli_subm_ex bli_subm_ex_qfp bli_subsc bli_subsc_check bli_subsc_qfp bli_subv bli_subv_check bli_subv_ex bli_subv_ex_qfp bli_sumsqv bli_sumsqv_check bli_sumsqv_ex bli_sumsqv_ex_qfp bli_sunpackm_blk_var1 bli_sunpackm_cxk bli_sunpackm_unb_var1 bli_sunzipsc bli_swapv bli_swapv_check bli_swapv_ex bli_swapv_ex_qfp bli_sxpbyd bli_sxpbyd_ex bli_sxpbym bli_sxpbym_ex bli_sxpbym_unb_var1 bli_sxpbyv bli_sxpbyv_ex bli_symm bli_symm1m bli_symm3m1 bli_symm3mh bli_symm4m1 bli_symm4mh bli_symm_check bli_symm_ex bli_symm_front bli_symmind bli_symmind_get_avail bli_symmnat bli_symv bli_symv_check bli_symv_ex bli_symv_ex_qfp bli_syr bli_syr2 bli_syr2_check bli_syr2_ex bli_syr2_ex_qfp bli_syr2k bli_syr2k1m bli_syr2k3m1 bli_syr2k3mh bli_syr2k4m1 bli_syr2k4mh bli_syr2k_check bli_syr2k_ex bli_syr2k_front bli_syr2kind bli_syr2kind_get_avail bli_syr2knat bli_syr_check bli_syr_ex bli_syr_ex_qfp bli_syrk bli_syrk1m bli_syrk3m1 bli_syrk3mh bli_syrk4m1 bli_syrk4mh bli_syrk_check bli_syrk_ex bli_syrk_front bli_syrkind bli_syrkind_get_avail bli_syrknat bli_szcastm bli_szcastnzm bli_szcastv bli_szcopysc bli_szgemm_ker_var2_md bli_szipsc bli_szpackm_blk_var1_md bli_szpackm_cxk_1e_md bli_szpackm_cxk_1r_md bli_szpackm_struc_cxk_md bli_szxpbym_md bli_szxpbym_md_ex bli_szxpbym_md_unb_var1 bli_thrcomm_barrier bli_thrcomm_barrier_atomic bli_thrcomm_bcast bli_thrcomm_cleanup bli_thrcomm_create bli_thrcomm_free bli_thrcomm_init bli_thread_finalize bli_thread_get_env bli_thread_get_ic_nt bli_thread_get_ir_nt bli_thread_get_jc_nt bli_thread_get_jr_nt bli_thread_get_num_threads bli_thread_get_pc_nt bli_thread_init bli_thread_init_rntm bli_thread_init_rntm_from_env bli_thread_range_b2t bli_thread_range_l2r bli_thread_range_mdim bli_thread_range_ndim bli_thread_range_r2l bli_thread_range_sub bli_thread_range_t2b bli_thread_range_weighted_b2t bli_thread_range_weighted_l2r bli_thread_range_weighted_r2l bli_thread_range_weighted_sub bli_thread_range_weighted_t2b bli_thread_range_width_l bli_thread_set_num_threads bli_thread_set_num_threads_ bli_thread_set_ways bli_thread_set_ways_ bli_thrinfo_create bli_thrinfo_create_for_cntl bli_thrinfo_create_for_cntl_prenode bli_thrinfo_free bli_thrinfo_grow bli_thrinfo_init bli_thrinfo_init_single bli_thrinfo_rgrow bli_thrinfo_rgrow_prenode bli_trmm bli_trmm1m bli_trmm3 bli_trmm31m bli_trmm33m1 bli_trmm33mh bli_trmm34m1 bli_trmm34mh bli_trmm3_ex bli_trmm3_front bli_trmm3ind bli_trmm3ind_get_avail bli_trmm3m1 bli_trmm3nat bli_trmm4m1 bli_trmm_check bli_trmm_determine_kc bli_trmm_determine_kc_b bli_trmm_determine_kc_f bli_trmm_direct bli_trmm_ex bli_trmm_front bli_trmmind bli_trmmind_get_avail bli_trmm_ll_ker_var2 bli_trmm_lu_ker_var2 bli_trmmnat bli_trmm_prune_unref_mparts_k bli_trmm_prune_unref_mparts_m bli_trmm_prune_unref_mparts_n bli_trmm_rl_ker_var2 bli_trmm_ru_ker_var2 bli_trmm_xx_ker_var2 bli_trmv bli_trmv_check bli_trmv_ex bli_trmv_ex_qfp bli_trmv_unb_var1 bli_trmv_unb_var1_qfp bli_trmv_unb_var2 bli_trmv_unb_var2_qfp bli_trmv_unf_var1 bli_trmv_unf_var1_qfp bli_trmv_unf_var2 bli_trmv_unf_var2_qfp bli_trsm bli_trsm1m bli_trsm3m1 bli_trsm4m1 bli_trsm_blk_var1 bli_trsm_blk_var2 bli_trsm_blk_var3 bli_trsm_check bli_trsm_cntl_create bli_trsm_cntl_create_node bli_trsm_cntl_free bli_trsm_determine_kc bli_trsm_determine_kc_b bli_trsm_determine_kc_f bli_trsm_direct bli_trsm_ex bli_trsm_front bli_trsmind bli_trsmind_get_avail bli_trsm_int bli_trsm_l_cntl_create bli_trsm_ll_ker_var2 bli_trsm_l_ukernel_qfp bli_trsm_lu_ker_var2 bli_trsmnat bli_trsm_packa bli_trsm_packb bli_trsm_prune_unref_mparts_k bli_trsm_prune_unref_mparts_m bli_trsm_prune_unref_mparts_n bli_trsm_r_cntl_create bli_trsm_rl_ker_var2 bli_trsm_ru_ker_var2 bli_trsm_ukernel bli_trsm_u_ukernel_qfp bli_trsm_xx_ker_var2 bli_trsv bli_trsv_check bli_trsv_ex bli_trsv_ex_qfp bli_trsv_unb_var1 bli_trsv_unb_var1_qfp bli_trsv_unb_var2 bli_trsv_unb_var2_qfp bli_trsv_unf_var1 bli_trsv_unf_var1_qfp bli_trsv_unf_var2 bli_trsv_unf_var2_qfp bli_unpackm_blk_var1 bli_unpackm_cntl_create_node bli_unpackm_int bli_unpackm_int_check bli_unpackm_unb_var1 bli_unzipsc bli_unzipsc_check bli_unzipsc_qfp bli_utilm_fprint_check bli_utilm_mkhst_check bli_utilm_norm_check bli_utilm_rand_check bli_utilv_norm_check bli_utilv_sumsqv_check bli_utilv_xa_check bli_xpbyd bli_xpbyd_check bli_xpbyd_ex bli_xpbyd_ex_qfp bli_xpbym bli_xpbym_check bli_xpbym_ex bli_xpbym_ex_qfp bli_xpbym_md bli_xpbym_md_ex bli_xpbym_md_ex_qfp2 bli_xpbyv bli_xpbyv_check bli_xpbyv_ex bli_xpbyv_ex_qfp bli_xxmv_check bli_xxr_check bli_zabsqsc bli_zaddd bli_zaddd_ex bli_zaddm bli_zaddm_ex bli_zaddm_unb_var1 bli_zaddsc bli_zaddv bli_zaddv_ex bli_zamaxv bli_zamaxv_ex bli_zasumv bli_zasumv_ex bli_zasumv_unb_var1 bli_zaxpbyv bli_zaxpbyv_ex bli_zaxpy2v bli_zaxpy2v_ex bli_zaxpyd bli_zaxpyd_ex bli_zaxpyf bli_zaxpyf_ex bli_zaxpym bli_zaxpym_ex bli_zaxpym_unb_var1 bli_zaxpyv bli_zaxpyv_ex bli_zccastm bli_zccastnzm bli_zccastv bli_zccopysc bli_zcgemm_ker_var2_md bli_zcopyd bli_zcopyd_ex bli_zcopym bli_zcopym_ex bli_zcopym_unb_var1 bli_zcopyv bli_zcopyv_ex bli_zcpackm_blk_var1_md bli_zcpackm_cxk_1e_md bli_zcpackm_cxk_1r_md bli_zcpackm_struc_cxk_md bli_zcxpbym_md bli_zcxpbym_md_ex bli_zcxpbym_md_unb_var1 bli_zdcastm bli_zdcastnzm bli_zdcastv bli_zdcopysc bli_zdgemm_ker_var2_md bli_zdivsc bli_zdotaxpyv bli_zdotaxpyv_ex bli_zdotv bli_zdotv_ex bli_zdotxaxpyf bli_zdotxaxpyf_ex bli_zdotxf bli_zdotxf_ex bli_zdotxv bli_zdotxv_ex bli_zdpackm_blk_var1_md bli_zdpackm_cxk_1e_md bli_zdpackm_cxk_1r_md bli_zdpackm_struc_cxk_md bli_zdxpbym_md bli_zdxpbym_md_ex bli_zdxpbym_md_unb_var1 bli_zfprintm bli_zfprintv bli_zgemm bli_zgemm1m bli_zgemm3m1 bli_zgemm3mh bli_zgemm4m1 bli_zgemm4mb bli_zgemm4mb_ker_var2 bli_zgemm4mh bli_zgemm_ex bli_zgemm_ker_var2 bli_zgemm_md_c2r_ref bli_zgemmtrsm_l_ukernel bli_zgemmtrsm_u_ukernel bli_zgemm_ukernel bli_zgemv bli_zgemv_ex bli_zgemv_unb_var1 bli_zgemv_unb_var2 bli_zgemv_unf_var1 bli_zgemv_unf_var2 bli_zger bli_zger_ex bli_zger_unb_var1 bli_zger_unb_var2 bli_zgetijm bli_zgetsc bli_zhemm bli_zhemm1m bli_zhemm3m1 bli_zhemm3mh bli_zhemm4m1 bli_zhemm4mh bli_zhemm_ex bli_zhemv bli_zhemv_ex bli_zhemv_unb_var1 bli_zhemv_unb_var2 bli_zhemv_unb_var3 bli_zhemv_unb_var4 bli_zhemv_unf_var1 bli_zhemv_unf_var1a bli_zhemv_unf_var3 bli_zhemv_unf_var3a bli_zher bli_zher2 bli_zher2_ex bli_zher2k bli_zher2k1m bli_zher2k3m1 bli_zher2k3mh bli_zher2k4m1 bli_zher2k4mh bli_zher2k_ex bli_zher2_unb_var1 bli_zher2_unb_var2 bli_zher2_unb_var3 bli_zher2_unb_var4 bli_zher2_unf_var1 bli_zher2_unf_var4 bli_zher_ex bli_zherk bli_zherk1m bli_zherk3m1 bli_zherk3mh bli_zherk4m1 bli_zherk4mh bli_zherk_ex bli_zherk_l_ker_var2 bli_zherk_u_ker_var2 bli_zher_unb_var1 bli_zher_unb_var2 bli_zinvertd bli_zinvertd_ex bli_zinvertsc bli_zinvertv bli_zinvertv_ex bli_zipsc bli_zipsc_check bli_zipsc_qfp bli_zmachval bli_zmkherm bli_zmkherm_ex bli_zmkherm_unb_var1 bli_zmksymm bli_zmksymm_ex bli_zmksymm_unb_var1 bli_zmktrim bli_zmktrim_ex bli_zmktrim_unb_var1 bli_zmulsc bli_znorm1m bli_znorm1m_ex bli_znorm1m_unb_var1 bli_znorm1v bli_znorm1v_ex bli_znorm1v_unb_var1 bli_znormfm bli_znormfm_ex bli_znormfm_unb_var1 bli_znormfsc bli_znormfv bli_znormfv_ex bli_znormfv_unb_var1 bli_znormim bli_znormim_ex bli_znormim_unb_var1 bli_znormiv bli_znormiv_ex bli_znormiv_unb_var1 bli_zpackm_blk_var1 bli_zpackm_cxk bli_zpackm_cxk_1er bli_zpackm_cxk_3mis bli_zpackm_cxk_4mi bli_zpackm_cxk_rih bli_zpackm_herm_cxk bli_zpackm_herm_cxk_1er bli_zpackm_herm_cxk_3mis bli_zpackm_herm_cxk_4mi bli_zpackm_herm_cxk_rih bli_zpackm_struc_cxk bli_zpackm_struc_cxk_1er bli_zpackm_struc_cxk_3mis bli_zpackm_struc_cxk_4mi bli_zpackm_struc_cxk_rih bli_zpackm_tri_cxk bli_zpackm_tri_cxk_1er bli_zpackm_tri_cxk_3mis bli_zpackm_tri_cxk_4mi bli_zpackm_tri_cxk_rih bli_zpackm_unb_var1 bli_zprintm bli_zprintm_ex bli_zprintv bli_zprintv_ex bli_zrandm bli_zrandm_ex bli_zrandm_unb_var1 bli_zrandnm bli_zrandnm_ex bli_zrandnm_unb_var1 bli_zrandnv bli_zrandnv_ex bli_zrandnv_unb_var1 bli_zrandv bli_zrandv_ex bli_zrandv_unb_var1 bli_zscal2d bli_zscal2d_ex bli_zscal2m bli_zscal2m_ex bli_zscal2m_unb_var1 bli_zscal2v bli_zscal2v_ex bli_zscald bli_zscald_ex bli_zscalm bli_zscalm_ex bli_zscalm_unb_var1 bli_zscalv bli_zscalv_ex bli_zscastm bli_zscastnzm bli_zscastv bli_zscopysc bli_zsetd bli_zsetd_ex bli_zsetid bli_zsetid_ex bli_zsetijm bli_zsetm bli_zsetm_ex bli_zsetm_unb_var1 bli_zsetsc bli_zsetv bli_zsetv_ex bli_zsgemm_ker_var2_md bli_zshiftd bli_zshiftd_ex bli_zspackm_blk_var1_md bli_zspackm_cxk_1e_md bli_zspackm_cxk_1r_md bli_zspackm_struc_cxk_md bli_zsqrtsc bli_zsubd bli_zsubd_ex bli_zsubm bli_zsubm_ex bli_zsubm_unb_var1 bli_zsubsc bli_zsubv bli_zsubv_ex bli_zsumsqv bli_zsumsqv_ex bli_zsumsqv_unb_var1 bli_zswapv bli_zswapv_ex bli_zsxpbym_md bli_zsxpbym_md_ex bli_zsxpbym_md_unb_var1 bli_zsymm bli_zsymm1m bli_zsymm3m1 bli_zsymm3mh bli_zsymm4m1 bli_zsymm4mh bli_zsymm_ex bli_zsymv bli_zsymv_ex bli_zsyr bli_zsyr2 bli_zsyr2_ex bli_zsyr2k bli_zsyr2k1m bli_zsyr2k3m1 bli_zsyr2k3mh bli_zsyr2k4m1 bli_zsyr2k4mh bli_zsyr2k_ex bli_zsyr_ex bli_zsyrk bli_zsyrk1m bli_zsyrk3m1 bli_zsyrk3mh bli_zsyrk4m1 bli_zsyrk4mh bli_zsyrk_ex bli_ztrmm bli_ztrmm1m bli_ztrmm3 bli_ztrmm31m bli_ztrmm33m1 bli_ztrmm33mh bli_ztrmm34m1 bli_ztrmm34mh bli_ztrmm3_ex bli_ztrmm3m1 bli_ztrmm4m1 bli_ztrmm_ex bli_ztrmm_ll_ker_var2 bli_ztrmm_lu_ker_var2 bli_ztrmm_rl_ker_var2 bli_ztrmm_ru_ker_var2 bli_ztrmv bli_ztrmv_ex bli_ztrmv_unb_var1 bli_ztrmv_unb_var2 bli_ztrmv_unf_var1 bli_ztrmv_unf_var2 bli_ztrsm bli_ztrsm1m bli_ztrsm3m1 bli_ztrsm4m1 bli_ztrsm_ex bli_ztrsm_ll_ker_var2 bli_ztrsm_l_ukernel bli_ztrsm_lu_ker_var2 bli_ztrsm_rl_ker_var2 bli_ztrsm_ru_ker_var2 bli_ztrsm_u_ukernel bli_ztrsv bli_ztrsv_ex bli_ztrsv_unb_var1 bli_ztrsv_unb_var2 bli_ztrsv_unf_var1 bli_ztrsv_unf_var2 bli_zunpackm_blk_var1 bli_zunpackm_cxk bli_zunpackm_unb_var1 bli_zunzipsc bli_zxpbyd bli_zxpbyd_ex bli_zxpbym bli_zxpbym_ex bli_zxpbym_unb_var1 bli_zxpbyv bli_zxpbyv_ex bli_zzcastm bli_zzcastnzm bli_zzcastv bli_zzcopysc bli_zzgemm_ker_var2_md bli_zzipsc bli_zzpackm_blk_var1_md bli_zzpackm_cxk_1e_md bli_zzpackm_cxk_1r_md bli_zzpackm_struc_cxk_md bli_zzxpbym_md bli_zzxpbym_md_ex bli_zzxpbym_md_unb_var1 sasum_ sasumsub_ saxpy_ scabs1_ scasum_ scasumsub_ scnrm2_ scnrm2sub_ scopy_ sdot_ sdotsub_ sdsdot_ sdsdotsub_ sgbmv_ sgemm_ sgemv_ sger_ snrm2_ snrm2sub_ srot_ srotg_ srotm_ srotmg_ ssbmv_ sscal_ sspmv_ sspr_ sspr2_ sswap_ ssymm_ ssymv_ ssyr_ ssyr2_ ssyr2k_ ssyrk_ stbmv_ stbsv_ stpmv_ stpsv_ strmm_ strmv_ strsm_ strsv_ dasum_ dasumsub_ daxpy_ dcabs1_ dcopy_ ddot_ ddotsub_ dgbmv_ dgemm_ dgemv_ dger_ dnrm2_ dnrm2sub_ drot_ drotg_ drotm_ drotmg_ dsbmv_ dscal_ dsdot_ dsdotsub_ dspmv_ dspr_ dspr2_ dswap_ dsymm_ dsymv_ dsyr_ dsyr2_ dsyr2k_ dsyrk_ dtbmv_ dtbsv_ dtpmv_ dtpsv_ dtrmm_ dtrmv_ dtrsm_ dtrsv_ dzasum_ dzasumsub_ dznrm2_ dznrm2sub_ caxpy_ ccopy_ cdotc_ cdotcsub_ cdotu_ cdotusub_ cgbmv_ cgemm_ cgemv_ cgerc_ cgeru_ chbmv_ chemm_ chemv_ cher_ cher2_ cher2k_ cherk_ chpmv_ chpr_ chpr2_ crotg_ cscal_ csrot_ csscal_ cswap_ csymm_ csyr2k_ csyrk_ ctbmv_ ctbsv_ ctpmv_ ctpsv_ ctrmm_ ctrmv_ ctrsm_ ctrsv_ zaxpy_ zcopy_ zdotc_ zdotcsub_ zdotu_ zdotusub_ zdrot_ zdscal_ zgbmv_ zgemm_ zgemv_ zgerc_ zgeru_ zhbmv_ zhemm_ zhemv_ zher_ zher2_ zher2k_ zherk_ zhpmv_ zhpr_ zhpr2_ zrotg_ zscal_ zswap_ zsymm_ zsyr2k_ zsyrk_ ztbmv_ ztbsv_ ztpmv_ ztpsv_ ztrmm_ ztrmv_ ztrsm_ ztrsv_ icamax_ icamaxsub_ idamax_ idamaxsub_ isamax_ isamaxsub_ izamax_ izamaxsub_ cblas_caxpy cblas_ccopy cblas_cdotc_sub cblas_cdotu_sub cblas_cgbmv cblas_cgemm cblas_cgemv cblas_cgerc cblas_cgeru cblas_chbmv cblas_chemm cblas_chemv cblas_cher cblas_cher2 cblas_cher2k cblas_cherk cblas_chpmv cblas_chpr cblas_chpr2 cblas_cscal cblas_csscal cblas_cswap cblas_csymm cblas_csyr2k cblas_csyrk cblas_ctbmv cblas_ctbsv cblas_ctpmv cblas_ctpsv cblas_ctrmm cblas_ctrmv cblas_ctrsm cblas_ctrsv cblas_dasum cblas_daxpy cblas_dcopy cblas_ddot cblas_dgbmv cblas_dgemm cblas_dgemv cblas_dger cblas_dnrm2 cblas_drot cblas_drotg cblas_drotm cblas_drotmg cblas_dsbmv cblas_dscal cblas_dsdot cblas_dspmv cblas_dspr cblas_dspr2 cblas_dswap cblas_dsymm cblas_dsymv cblas_dsyr cblas_dsyr2 cblas_dsyr2k cblas_dsyrk cblas_dtbmv cblas_dtbsv cblas_dtpmv cblas_dtpsv cblas_dtrmm cblas_dtrmv cblas_dtrsm cblas_dtrsv cblas_dzasum cblas_dznrm2 cblas_icamax cblas_idamax cblas_isamax cblas_izamax cblas_sasum cblas_saxpy cblas_scasum cblas_scnrm2 cblas_scopy cblas_sdot cblas_sdsdot cblas_sgbmv cblas_sgemm cblas_sgemv cblas_sger cblas_snrm2 cblas_srot cblas_srotg cblas_srotm cblas_srotmg cblas_ssbmv cblas_sscal cblas_sspmv cblas_sspr cblas_sspr2 cblas_sswap cblas_ssymm cblas_ssymv cblas_ssyr cblas_ssyr2 cblas_ssyr2k cblas_ssyrk cblas_stbmv cblas_stbsv cblas_stpmv cblas_stpsv cblas_strmm cblas_strmv cblas_strsm cblas_strsv cblas_xerbla cblas_zaxpy cblas_zcopy cblas_zdotc_sub cblas_zdotu_sub cblas_zdscal cblas_zgbmv cblas_zgemm cblas_zgemv cblas_zgerc cblas_zgeru cblas_zhbmv cblas_zhemm cblas_zhemv cblas_zher cblas_zher2 cblas_zher2k cblas_zherk cblas_zhpmv cblas_zhpr cblas_zhpr2 cblas_zscal cblas_zswap cblas_zsymm cblas_zsyr2k cblas_zsyrk cblas_ztbmv cblas_ztbsv cblas_ztpmv cblas_ztpsv cblas_ztrmm cblas_ztrmv cblas_ztrsm cblas_ztrsv blis-0.6.1/build/mirror-tree.sh000077500000000000000000000111001360743507500163570ustar00rootroot00000000000000#!/bin/sh # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # print_usage() { local script_name # Get the script name script_name=${0##*/} # Echo usage info echo " " echo " "${script_name} echo " " echo " Field G. Van Zee" echo " " echo " Recursively descends through the directory given in argument 1 while" echo " creating a symmetric directory structure in the new directory specified" echo " by argument 2, ignoring regular files along the way." echo " " echo " Usage:" echo " ${script_name} [-v] existing_dir new_mirror_dir" echo " " echo " The following options are accepted:" echo " " echo " -v verbose" echo " Echo progress as directories are recursively created." echo " " # Exit with non-zero exit status exit 1 } main() { # Process our command line options. We only respond to the -v flag, # in which case we'll echo what we're doing as we go along. while getopts ":v" opt; do case $opt in v ) verbose_flag=1 ;; \? ) print_usage exit 1 esac done shift $(($OPTIND - 1)) # Check the number of arguments after command line option processing. if [ $# != "2" ]; then print_usage fi # Extract arguments. e_dir=$1 n_dir=$2 # If the root new directory does not exist, then create it. if [ ! -d $n_dir ]; then # Be verbose, if -v was one of the command line options. if [ -n "$verbose_flag" ]; then echo "Creating $n_dir" fi # Make the root new directory. Create the parent directories if # they do not exist with the -p option. mkdir -p $n_dir fi # Initialize the recursive variables. We keep a separate variable # for the existing and new directories because they have different # roots, but they will always change in parallel. cur_e_dir=$e_dir cur_n_dir=$n_dir # Begin recursion, starting with the contents of the existing # directory. mirror_tree "$(ls $e_dir)" # Exit peacefully. return 0 } mirror_tree() { # Extract arguments. dir_contents="$1" # Process each item in our argument list (ie: each item in cur_e_dir). for thing in ${dir_contents}; do # Adjust the current existing and new directory paths to # include the current instance of thing. cur_e_dir="$cur_e_dir/$thing" cur_n_dir="$cur_n_dir/$thing" # If the current existing directory exists, then create a # corresponding subdirectory in new directory. if [ -d ${cur_e_dir} ]; then # Be verbose, if -v was one of the command line options. if [ -n "$verbose_flag" ]; then echo "Creating $cur_n_dir" fi # Make the new subdirectory, but only if it doesn't # already exist. if [ ! -d $cur_n_dir ]; then mkdir $cur_n_dir fi # Continue recursively on the contents of cur_e_dir. mirror_tree "$(ls $cur_e_dir)" fi # Delete the end of the path, up to the first / character to # prepare for the next "thing" in $@. cur_e_dir=${cur_e_dir%/*} cur_n_dir=${cur_n_dir%/*} done } # The script's main entry point, passing all parameters given. main "$@" blis-0.6.1/build/old/000077500000000000000000000000001360743507500143365ustar00rootroot00000000000000blis-0.6.1/build/old/flatten-headers-pass.sh000077500000000000000000000420061360743507500207110ustar00rootroot00000000000000#!/usr/bin/env bash # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # # -- Helper functions ---------------------------------------------------------- # print_usage() { # Echo usage info. echo " " echo " ${script_name}" echo " " echo " Field G. Van Zee" echo " " echo " Generate a monolithic header by recursively replacing all #include" echo " directives in a selected file with the contents of the header files" echo " they reference." echo " " echo " Usage:" echo " " echo " ${script_name} header header_out dir_list" echo " " echo " Arguments:" echo " " echo " header The filepath to the top-level header, which is file that" echo " will #include all other header files. NOTE: It is okay if" echo " this file resides somewhere in root_dir, described below." echo " " echo " header_out The filepath of the file into which the script will output" echo " the monolithic header." echo " " echo " dir_list The list of directory paths in which to search for the" echo " headers that are #included by 'header'. By default, these" echo " directories are scanned for .h files, but sub-directories" echo " within the various directories are not inspected. If the" echo " -r option is given, these directories are recursively" echo " scanned. In either case, the subset of directories scanned" echo " that actually contains .h files is then searched whenever" echo " a #include directive is encountered in 'header' (or any" echo " file subsequently #included). If a referenced header file" echo " is not found, the #include directive is left untouched and" echo " translated directly into 'header_out'." echo " " echo " The following options are accepted:" echo " " echo " -r recursive" echo " Scan the directories listed in 'dir_list' recursively when" echo " searching for .h header files. By default, the directories" echo " are not searched recursively." echo " " echo " -c strip C-style comments" echo " Strip comments enclosed in /* */ delimiters from the" echo " output, including multi-line comments. (This only applies" echo " to #included headers; C-style comments in the top-level" echo " 'header' are never stripped.) By default, C-style comments" echo " are not stripped." echo " " echo " -o SCRIPT output script name" echo " Use SCRIPT as a prefix when outputting messages instead" echo " the script's actual name. Useful when the current script" echo " is going to be called from within another, higher-level" echo " driver script and seeing the current script's name might" echo " unnecessarily confuse the user." echo " " echo " -q quiet" echo " Suppress informational output. By default, the script is" echo " verbose." echo " " echo " -h help" echo " Output this information and exit." echo " " # Exit with non-zero exit status exit 1 } canonicalize_ws() { local str="$1" # Remove leading and trailing whitespace. str=$(echo -e "${str}" | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//') # Remove duplicate spaces between words. str=$(echo -e "${str}" | tr -s " ") # Update the input argument. echo "${str}" } is_word_in_list() { word="$1" list="$2" rval="" for item in ${list}; do if [ "${item}" == "${word}" ]; then rval="${word}" break fi done echo "${rval}" } echoinfo() { if [ -z "${quiet_flag}" ]; then # Echo the argument string to stderr instead of stdout. echo "${output_name}: $1" 1>&2; fi } echoninfo() { if [ -z "${quiet_flag}" ]; then # Echo the argument string to stderr instead of stdout. echo -n "${output_name}: $1" 1>&2; fi } echon2info() { if [ -z "${quiet_flag}" ]; then # Echo the argument string to stderr instead of stdout. echo "$1" 1>&2; fi } find_header_dirs() { local cur_dirpath sub_items result cur_list item child_list # Extract the argument: the current directory, and the list of # directories found so far that contain headers. cur_dirpath="$1" echoninfo "scanning contents of ${cur_dirpath}" # Acquire a list of the directory's contents. sub_items=$(ls ${cur_dirpath}) # If there is at least one header present, add the current directory to # the list header of directories. Otherwise, the current directory does # not contribute to the list returned to the caller. result=$(echo ${sub_items} | grep "\.h") if [ -n "${result}" ]; then cur_list="${cur_dirpath}" echon2info " ...found headers" else cur_list="" echon2info "" fi # Iterate over the list of directory contents. for item in ${sub_items}; do # Check whether the current item is in the ignore_list. If so, we # ignore it. result=$(is_word_in_list "${item}" "${ignore_list}") if [ -n "${result}" ]; then echoinfo "ignoring directory '${item}'." continue fi # If the current item is a directory, recursively accumulate header # directories for that sub-directory. if [ -d "${cur_dirpath}/${item}" ]; then # Recursively find header directories within the sub-directory # ${item} and store the directory list to child_list. child_list=$(find_header_dirs "${cur_dirpath}/${item}") # Accumulate the sub-directory's header list with the running list # of header directories cur_list="${cur_list} ${child_list}" fi done # Return the list of header directories. echo "${cur_list}" } get_header_path() { local filename dirpaths filepath filename="$1" dirpaths="$2" filepath="" # Search each directory path for the filename given. for dirpath in ${dirpaths}; do if [ -f "${dirpath}/${filename}" ]; then filepath="${dirpath}/${filename}" break fi done # Return the filepath that was found. Note that if no filepath was found # in the loop above, the empty string gets returned. echo "${filepath}" } replace_pass() { local filename dirpaths result header headerlist filename="$1" dirpaths="$2" # This string is inserted after #include directives after having # determined that they are not present in the directory tree and should # be ignored when assessing whether there are still #include directives # that need to be expanded. Note that it is formatted as a comment and # thus will be ignored when the monolithic header is eventually read C # preprocessor and/or compiler. skipstr="\/\/skipped" #skipstr="\/\*skipped\*\/" # The way we (optionally) remove C-style comments results in a single # blank line in its place (regardless of how many lines the comment # spanned. When a comment is removed, it is replaced by this string # so that the line can be deleted with a subsequent sed command. commstr="DeLeTeDCsTyLeCoMmEnT" headerlist="" # Iterate through each line of the header file, accumulating the names of # header files referenced in #include directives. while read -r curline do # Check whether the line begins with a #include directive, but ignore # the line if it contains the skip string. result=$(echo ${curline} | grep '^[[:space:]]*#include ' | grep -v "${skipstr}") # If the #include directive was found... if [ -n "${result}" ]; then # Isolate the header filename. We must take care to include all # characters that might appear between the "" or <>. header=$(echo ${curline} | sed -e "s/#include [\"<]\([a-zA-Z0-9\_\.\/\-]*\)[\">].*/\1/g") # Add the header file to a list. headerlist=$(canonicalize_ws "${headerlist} ${header}") fi done < "${filename}" echoinfo " found references to: ${headerlist}" # Initialize the return value to null. result="" # Iterate over each header file found in the previous loop. for header in ${headerlist}; do # Find the path to the header. header_filepath=$(get_header_path ${header} "${dirpaths}") # If the header has a slash, escape it so that sed doesn't get confused # (since we use '/' as our search-and-replace delimiter). header_esc=$(echo "${header}" | sed -e 's/\//\\\//g') # If the header file was not found, get_header_path() returns an # empty string. This probably means that the header file is a # system header and thus we skip it since we don't want to inline # the contents of system headers anyway. if [ -z "${header_filepath}" ]; then echoinfo " could not locate file '${header}'; marking to skip." # Insert a comment after the #include so we know to ignore it # later. Notice that we mimic the quotes or angle brackets # around the header name, whichever pair was used in the input. cat ${filename} \ | sed -e "s/^[[:space:]]*#include \([\"<]\)\(${header_esc}\)\([\">]\).*/#include \1\2\3 ${skipstr}/" \ > "${filename}.tmp" # Overwrite the original file with the updated copy. mv "${filename}.tmp" ${filename} else echoinfo " located file '${header_filepath}'; inserting." # Strip C-style comments from the file, if requested. if [ -n "${strip_comments}" ]; then header_filename=${header_filepath##*/} # Make a temporary copy of ${header_filepath} stripped of its # C-style comments. This leaves behind a single blank line, # which is then deleted. cat ${header_filepath} \ | perl -0777 -pe "s/\/\*.*?\*\//${commstr}/gs" \ | sed -e "/${commstr}/d" \ > "${header_filename}.tmp" header_to_insert="${header_filename}.tmp" else header_to_insert="${header_filepath}" fi # Replace the #include directive for the current header file with the # contents of that header file, saving the result to a temporary file. # We also insert begin and end markers to allow for more readability. cat ${filename} \ | sed -e "/^[[:space:]]*#include \"${header_esc}\"/ {" \ -e "i // begin ${header}" \ -e "r ${header_to_insert}" \ -e "a // end ${header}" \ -e "d" \ -e "}" \ > "${filename}.tmp" # Overwrite the original header file with the updated copy. mv "${filename}.tmp" ${filename} # If C-style comments were stripped, remove the temporary file. if [ -n "${strip_comments}" ]; then rm "${header_filename}.tmp" fi fi done # works, but leaves blank line: #cat "test.h" | sed -e "/^#include \"foo.h\"/r foo.h" -e "s///" > "test.new.h" # works: #cat "test.h" | sed -e '/^#include \"foo.h\"/ {' -e 'r foo.h' -e 'd' -e '}' > "test.new.h" # works: #cat "test.h" | sed -e '/^#include \"foo.h\"/r foo.h' -e '/^#include \"foo.h\"/d' > "test.new.h" #cat zorn/header.h | sed -e '/^#include \"header1.h\"/ {' -e 'i // begin insertion' -e 'r alice/header1.h' -e 'a // end insertion' -e 'd' -e '}' # Search the updated file for #include directives, but ignore any # hits that also contain the skip string (indicating that the header # file referenced by that #include could not be found). result=$(cat ${filename} | grep '^[[:space:]]*#include ' | grep -v "${skipstr}") # Return the result so the caller knows if we need to proceed with # another pass. echo ${result} } # # -- main function ------------------------------------------------------------- # main() { # The name of the script, stripped of any preceeding path. script_name=${0##*/} # The script name to use in informational output. Defaults to ${script_name}. output_name=${script_name} # Whether or not we should strip C-style comments from the outout. (Default # is to not strip C-style comments.) strip_comments="" # Whether or not we search the directories in dir_list recursively. (Default # is to not search recursively.) recursive_flag="" # Whether or not we should suppress informational output. (Default is to # output messages.) quiet_flag="" # The list of directories to ignore ignore_list="old other temp test testsuite windows" # Process our command line options. while getopts ":o:rcqh" opt; do case $opt in o ) output_name=$OPTARG ;; r ) recursive_flag="1" ;; c ) strip_comments="1" ;; q ) quiet_flag="1" ;; h ) print_usage ;; \? ) print_usage esac done shift $(($OPTIND - 1)) # Print usage if we don't have exactly two arguments. if [ $# != "3" ]; then print_usage fi # Acquire the two required arguments: # - the input header file, # - the output header file, # - the list of directories in which to search for the headers inputfile="$1" outputfile="$2" dir_list="$3" # First, confirm that the directories in dir_list are valid. dir_list2="" for item in ${dir_list}; do # Strip a trailing slash from the path, if it has one. item=${item%/} echoninfo "checking ${item} " if [ -d ${item} ]; then echon2info " ...directory exists." dir_list2="${dir_list2} ${item}" else echon2info " ...invalid directory; omitting." fi done dir_list2=$(canonicalize_ws "${dir_list2}") # Overwrite the original dir_list with the updated copy that omits # invalid directories. dir_list="${dir_list2}" echoinfo "check summary:" echoinfo " accessible directories:" echoinfo " ${dir_list}" # Generate a list of directories (dirpaths) which will be searched whenever # a #include directive is encountered. The method by which dirpaths is # compiled will depend on whether the recursive flag was given. if [ -n "${recursive_flag}" ]; then # If the recursive flag was given, we need to recursively scan each # directory in dir_list for directories with headers via the # function find_header_dirs(). dirpaths="" for item in ${dir_list}; do item_dirpaths=$(find_header_dirs ${item}) dirpaths="${dirpaths} ${item_dirpaths}" done dirpaths=$(canonicalize_ws "${dirpaths}") else # If the recursive flag was not given, we can just use dir_list # as-is, though we opt to filter out the directories that don't # contain .h files. dirpaths="" for item in ${dir_list}; do echoninfo "scanning ${item}" # Acquire a list of the directory's contents. sub_items=$(ls ${item}) # If there is at least one header present, add the current directory to # the list header of directories. result=$(echo ${sub_items} | grep "\.h") if [ -n "${result}" ]; then dirpaths="${dirpaths} ${item}" echon2info " ...found headers." else echon2info " ...no headers found." fi done dirpaths=$(canonicalize_ws "${dirpaths}") fi echoinfo "scan summary:" echoinfo " headers found in:" echoinfo " ${dirpaths}" echoinfo "preparing to monolithify '${inputfile}'." # Make a copy of the inputfile. cp ${inputfile} ${outputfile} echoinfo "new header will be saved to '${outputfile}'." done_flag="0" while [ ${done_flag} == "0" ]; do echoinfo "starting new pass." # Perform a replacement pass. The return string is non-null if # additional passes are necessary and null otherwise. result=$(replace_pass ${outputfile} "${dirpaths}") if [ -n "${result}" ]; then echoinfo "pass finished; result: additional pass(es) needed." else echoinfo "pass finished; result: no further passes needed." fi #exit 1 # If the return value was null, then we're done. if [ -z "${result}" ]; then done_flag="1" fi done echoinfo "substitution complete." echoinfo "monolithic header saved as '${outputfile}'." # Exit peacefully. return 0 } # The script's main entry point, passing all parameters given. main "$@" blis-0.6.1/build/regen-symbols.sh000077500000000000000000000063631360743507500167150ustar00rootroot00000000000000#!/bin/sh # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2018, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name of copyright holder(s) nor the names # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # # This script regenerates a list of symbols for use when building # Windows-compatible DLLs. We assume that this script will be run after # running configure as: # # ./configure --enable-cblas haswell # # and compiling BLIS normally. (Notice that we also prune out all # haswell/zen-related context initialization and reference kernels.) # libblis='lib/haswell/libblis.so' symfile='build/libblis-symbols.def' echo "EXPORTS" > def.exports #nm -g ${libblis} | grep -o " D BLIS_.*" | cut -f2- "-dD" > def.blis_const nm -g ${libblis} | grep -o " T bli_.*" | cut -f2- "-dT" > def.blis nm -g ${libblis} | grep -o " T bla_.*" | cut -f2- "-dT" > def.blis_bla nm -g ${libblis} | grep -o " T cblas_.*" | cut -f2- "-dT" > def.blis_cblas nm -g ${libblis} | grep -o " T s[acdgnrst].*" | cut -f2- "-dT" > def.blas_s nm -g ${libblis} | grep -o " T d[acdgnrstz].*" | cut -f2- "-dT" > def.blas_d nm -g ${libblis} | grep -o " T c[acdghrst].*" | cut -f2- "-dT" > def.blas_c nm -g ${libblis} | grep -o " T z[acdghrst].*" | cut -f2- "-dT" > def.blas_z nm -g ${libblis} | grep -o " T i[cdsz].*" | cut -f2- "-dT" > def.blas_i cat def.exports \ def.blis \ def.blis_bla \ def.blas_s \ def.blas_d \ def.blas_c \ def.blas_z \ def.blas_i \ def.blis_cblas \ | cut -f2- "-d " \ | grep -v init_haswell \ | grep -v haswell_ref \ | grep -v zen_ref \ > ${symfile} rm -f \ def.exports \ def.blis \ def.blis_bla \ def.blas_s \ def.blas_d \ def.blas_c \ def.blas_z \ def.blas_i \ def.blis_cblas blis-0.6.1/build/templates/000077500000000000000000000000001360743507500155565ustar00rootroot00000000000000blis-0.6.1/build/templates/license.c000066400000000000000000000032101360743507500173400ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2019, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ blis-0.6.1/build/templates/license.h000066400000000000000000000032101360743507500173450ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2019, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ blis-0.6.1/build/templates/license.sh000066400000000000000000000032411360743507500175340ustar00rootroot00000000000000#!/usr/bin/env bash # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2019, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # blis-0.6.1/common.mk000066400000000000000000001123351360743507500143070ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Only include this block of code once ifndef COMMON_MK_INCLUDED COMMON_MK_INCLUDED := yes # # --- CFLAGS storage functions ------------------------------------------------- # # Define a function that stores the value of a variable to a different # variable containing a specified suffix (corresponding to a configuration). define store-var-for $(strip $(1)).$(strip $(2)) := $($(strip $(1))) endef # Define a function similar to store-var-for, except that appends instead # of overwriting. define append-var-for $(strip $(1)).$(strip $(2)) += $($(strip $(1))) endef # Define a function that stores the value of all of the variables in a # make_defs.mk file to other variables with the configuration (the # argument $(1)) added as a suffix. This function is called once from # each make_defs.mk. Also, add the configuration to CONFIGS_INCL. define store-make-defs $(eval $(call store-var-for,CC, $(1))) $(eval $(call store-var-for,CC_VENDOR, $(1))) $(eval $(call store-var-for,CPPROCFLAGS,$(1))) $(eval $(call store-var-for,CLANGFLAGS, $(1))) $(eval $(call store-var-for,CXXLANGFLAGS,$(1))) $(eval $(call store-var-for,CMISCFLAGS, $(1))) $(eval $(call store-var-for,CPICFLAGS, $(1))) $(eval $(call store-var-for,CWARNFLAGS, $(1))) $(eval $(call store-var-for,CDBGFLAGS, $(1))) $(eval $(call store-var-for,COPTFLAGS, $(1))) $(eval $(call store-var-for,CKOPTFLAGS, $(1))) $(eval $(call store-var-for,CKVECFLAGS, $(1))) $(eval $(call store-var-for,CROPTFLAGS, $(1))) $(eval $(call store-var-for,CRVECFLAGS, $(1))) CONFIGS_INCL += $(1) endef # Define a function that retreives the value of a variable for a # given configuration. define load-var-for $($(strip $(1)).$(strip $(2))) endef # # --- CFLAGS query functions --------------------------------------------------- # # Define some functions that return the appropriate CFLAGS for a given # configuration. This assumes that the make_defs.mk files have already been # included, which results in those values having been stored to # configuration-qualified variables. get-noopt-cflags-for = $(strip $(CFLAGS_PRESET) \ $(call load-var-for,CDBGFLAGS,$(1)) \ $(call load-var-for,CWARNFLAGS,$(1)) \ $(call load-var-for,CPICFLAGS,$(1)) \ $(call load-var-for,CMISCFLAGS,$(1)) \ $(call load-var-for,CLANGFLAGS,$(1)) \ $(call load-var-for,CPPROCFLAGS,$(1)) \ $(CTHREADFLAGS) \ $(CINCFLAGS) $(VERS_DEF) \ ) get-noopt-cxxflags-for = $(strip $(CFLAGS_PRESET) \ $(call load-var-for,CDBGFLAGS,$(1)) \ $(call load-var-for,CWARNFLAGS,$(1)) \ $(call load-var-for,CPICFLAGS,$(1)) \ $(call load-var-for,CMISCFLAGS,$(1)) \ $(call load-var-for,CXXLANGFLAGS,$(1)) \ $(call load-var-for,CPPROCFLAGS,$(1)) \ $(CTHREADFLAGS) \ $(CINCFLAGS) $(VERS_DEF) \ ) get-refinit-cflags-for = $(strip $(call load-var-for,COPTFLAGS,$(1)) \ $(call get-noopt-cflags-for,$(1)) \ -DBLIS_CNAME=$(1) \ $(BUILD_CPPFLAGS) \ $(BUILD_SYMFLAGS) \ ) get-refkern-cflags-for = $(strip $(call load-var-for,CROPTFLAGS,$(1)) \ $(call load-var-for,CRVECFLAGS,$(1)) \ $(call get-noopt-cflags-for,$(1)) \ $(COMPSIMDFLAGS) \ -DBLIS_CNAME=$(1) \ $(BUILD_CPPFLAGS) \ $(BUILD_SYMFLAGS) \ ) get-config-cflags-for = $(strip $(call load-var-for,COPTFLAGS,$(1)) \ $(call get-noopt-cflags-for,$(1)) \ $(BUILD_CPPFLAGS) \ $(BUILD_SYMFLAGS) \ ) get-frame-cflags-for = $(strip $(call load-var-for,COPTFLAGS,$(1)) \ $(call get-noopt-cflags-for,$(1)) \ $(BUILD_CPPFLAGS) \ $(BUILD_SYMFLAGS) \ ) get-kernel-cflags-for = $(strip $(call load-var-for,CKOPTFLAGS,$(1)) \ $(call load-var-for,CKVECFLAGS,$(1)) \ $(call get-noopt-cflags-for,$(1)) \ $(BUILD_CPPFLAGS) \ $(BUILD_SYMFLAGS) \ ) # When compiling sandboxes, we use flags similar to those of general framework # source. This ensures that the same code can be linked and run across various # sub-configurations. (If we switch to using refkern/kernel flags, we should # prevent enabling sandboxes for umbrella families by verifying that # config_list == config_name if --enable-sandbox is given.) get-sandbox-c99flags-for = $(strip $(call load-var-for,COPTFLAGS,$(1)) \ $(call get-noopt-cflags-for,$(1)) \ $(CSBOXINCFLAGS) \ $(BUILD_CPPFLAGS) \ $(BUILD_SYMFLAGS) \ ) get-sandbox-cxxflags-for = $(strip $(call load-var-for,COPTFLAGS,$(1)) \ $(call get-noopt-cxxflags-for,$(1)) \ $(CSBOXINCFLAGS) \ $(BUILD_CPPFLAGS) \ $(BUILD_SYMFLAGS) \ ) # Define a separate function that will return appropriate flags for use by # applications that want to use the same basic flags as those used when BLIS # was compiled. (NOTE: This is the same as the $(get-frame-cflags-for ...) # function, except that it omits two variables that contain flags exclusively # for use when BLIS is being compiled/built: BUILD_CPPFLAGS, which contains a # cpp macro that confirms that BLIS is being built; and BUILD_SYMFLAGS, which # contains symbol export flags that are only needed when a shared library is # being compiled/linked.) get-user-cflags-for = $(strip $(call load-var-for,COPTFLAGS,$(1)) \ $(call get-noopt-cflags-for,$(1)) \ ) # Define functions that return messages appropriate for each non-verbose line # of compilation output. get-noopt-text = "(CFLAGS for no optimization)" get-refinit-text-for = "('$(1)' CFLAGS for ref. kernel init)" get-refkern-text-for = "('$(1)' CFLAGS for ref. kernels)" get-config-text-for = "('$(1)' CFLAGS for config code)" get-frame-text-for = "('$(1)' CFLAGS for framework code)" get-kernel-text-for = "('$(1)' CFLAGS for kernels)" get-sandbox-c99text-for = "('$(1)' CFLAGS for sandboxes)" get-sandbox-cxxtext-for = "('$(1)' CXXFLAGS for sandboxes)" # # --- Miscellaneous helper functions ------------------------------------------- # # Define functions that filters a list of filepaths $(1) that contain (or # omit) an arbitrary substring $(2). files-that-contain = $(strip $(foreach f, $(1), $(if $(findstring $(2),$(f)),$(f),))) files-that-dont-contain = $(strip $(foreach f, $(1), $(if $(findstring $(2),$(f)),,$(f)))) # # --- Include makefile configuration file -------------------------------------- # # Use the current directory as the default path to the root directory for # makefile fragments (and the configuration family's make_defs.mk), but # allow the includer to override this value if it needs to point to an # installation directory. ifeq ($(strip $(SHARE_PATH)),) SHARE_PATH := . endif # Define the name of the configuration file. CONFIG_MK_FILE := config.mk # Identify the base path for the root directory for makefile fragments (and # the configuration family's make_defs.mk). We define this path in terms of # SHARE_PATH, which gets a default value above (which is what happens for the # top-level Makefile). If SHARE_PATH is specified by the Makefile prior to # including common.mk, that path is used instead. This allows Makefiles for # example code and test drivers to reference an installed prefix directory # for situations when the build directory no longer exists. BASE_SHARE_PATH := $(SHARE_PATH) # Include the configuration file. -include $(BASE_SHARE_PATH)/$(CONFIG_MK_FILE) # # --- Handle 'make clean' and friends without config.mk ------------------------ # # Detect whether we actually got the configuration file. If we didn't, then # it is likely that the user has not yet generated it (via configure). ifeq ($(strip $(CONFIG_MK_INCLUDED)),yes) CONFIG_MK_PRESENT := yes IS_CONFIGURED := yes else CONFIG_MK_PRESENT := no IS_CONFIGURED := no endif # If we didn't get config.mk, then we need to set some basic variables so # that make will function without error for things like 'make clean'. ifeq ($(IS_CONFIGURED),no) # If this makefile fragment is being run and there is no config.mk present, # then it's probably safe to assume that the user is currently located in the # source distribution. DIST_PATH := . # Even though they won't be used explicitly, it appears that setting these # INSTALL_* variables to something sane (that is, not allowing them default # to the empty string) is necessary to prevent make from hanging, likely # because the statements that define UNINSTALL_LIBS and UNINSTALL_HEADERS, # when evaluated, result in running 'find' on the root directory--definitely # something we would like to avoid. INSTALL_LIBDIR := $(HOME)/blis/lib INSTALL_INCDIR := $(HOME)/blis/include INSTALL_SHAREDIR := $(HOME)/blis/share endif # # --- Primary makefile variable definitions ------------------------------------ # # Construct the architecture-version string, which will be used to name the # library upon installation. VERS_CONF := $(VERSION)-$(CONFIG_NAME) # All makefile fragments in the tree will have this name. FRAGMENT_MK := .fragment.mk # Locations of important files. BUILD_DIR := build CONFIG_DIR := config FRAME_DIR := frame REFKERN_DIR := ref_kernels KERNELS_DIR := kernels SANDBOX_DIR := sandbox OBJ_DIR := obj LIB_DIR := lib INCLUDE_DIR := include BLASTEST_DIR := blastest TESTSUITE_DIR := testsuite # The filename suffix for reference kernels. REFNM := ref # Source suffixes. CONFIG_SRC_SUFS := c KERNELS_SRC_SUFS := c s S FRAME_SRC_SUFS := c SANDBOX_C99_SUFS := c SANDBOX_CXX_SUFS := cc cpp cxx SANDBOX_SRC_SUFS := $(SANDBOX_C99_SUFS) $(SANDBOX_CXX_SUFS) # Header suffixes. FRAME_HDR_SUFS := h SANDBOX_H99_SUFS := h SANDBOX_HXX_SUFS := hh hpp hxx SANDBOX_HDR_SUFS := $(SANDBOX_H99_SUFS) $(SANDBOX_HXX_SUFS) # Combine all header suffixes and remove duplicates via sort(). ALL_HDR_SUFS := $(sort $(FRAME_HDR_SUFS) \ $(SANDBOX_HDR_SUFS) ) ALL_H99_SUFS := $(sort $(FRAME_HDR_SUFS) \ $(SANDBOX_H99_SUFS) ) # The names of scripts that check output from the BLAS test drivers and # BLIS test suite. BLASTEST_CHECK := check-blastest.sh TESTSUITE_CHECK := check-blistest.sh # The names of the testsuite input/configuration files. TESTSUITE_CONF_GEN := input.general TESTSUITE_CONF_OPS := input.operations TESTSUITE_FAST_GEN := input.general.fast TESTSUITE_FAST_OPS := input.operations.fast TESTSUITE_MIXD_GEN := input.general.mixed TESTSUITE_MIXD_OPS := input.operations.mixed TESTSUITE_SALT_GEN := input.general.salt TESTSUITE_SALT_OPS := input.operations.salt TESTSUITE_OUT_FILE := output.testsuite # CHANGELOG file. CHANGELOG := CHANGELOG # Something for OS X so that echo -n works as expected. SHELL := bash # Construct paths to the four primary directories of source code: # the config directory, general framework code, reference kernel code, # and optimized kernel code. CONFIG_PATH := $(DIST_PATH)/$(CONFIG_DIR) FRAME_PATH := $(DIST_PATH)/$(FRAME_DIR) REFKERN_PATH := $(DIST_PATH)/$(REFKERN_DIR) KERNELS_PATH := $(DIST_PATH)/$(KERNELS_DIR) SANDBOX_PATH := $(DIST_PATH)/$(SANDBOX_DIR) # Construct paths to the makefile fragments for the four primary directories # of source code: the config directory, general framework code, reference # kernel code, and optimized kernel code. CONFIG_FRAG_PATH := ./obj/$(CONFIG_NAME)/$(CONFIG_DIR) FRAME_FRAG_PATH := ./obj/$(CONFIG_NAME)/$(FRAME_DIR) REFKERN_FRAG_PATH := ./obj/$(CONFIG_NAME)/$(REFKERN_DIR) KERNELS_FRAG_PATH := ./obj/$(CONFIG_NAME)/$(KERNELS_DIR) SANDBOX_FRAG_PATH := ./obj/$(CONFIG_NAME)/$(SANDBOX_DIR) # # --- Library name and local paths --------------------------------------------- # # Use lib/CONFIG_NAME as the default path to the local header files, but # allow the includer to override this value if it needs to point to an # installation directory. ifeq ($(strip $(LIB_PATH)),) LIB_PATH := $(LIB_DIR)/$(CONFIG_NAME) endif # Identify the base path for the intermediate library directory. We define # this path in terms of LIB_PATH, which gets a default value above (which is # what happens for the top-level Makefile). If LIB_PATH is specified by the # Makefile prior to including common.mk, that path is used instead. This # allows Makefiles for example code and test drivers to reference an installed # prefix directory for situations when the build directory no longer exists. BASE_LIB_PATH := $(LIB_PATH) # The base name of the BLIS library that we will build. LIBBLIS := libblis # The shared (dynamic) library file suffix is different for Linux and OS X. ifeq ($(OS_NAME),Darwin) SHLIB_EXT := dylib else ifeq ($(IS_WIN),yes) ifeq ($(CC_VENDOR),gcc) SHLIB_EXT := dll.a else SHLIB_EXT := lib endif else SHLIB_EXT := so endif # Note: These names will be modified later to include the configuration and # version strings. LIBBLIS_A := $(LIBBLIS).a LIBBLIS_SO := $(LIBBLIS).$(SHLIB_EXT) # Append the base library path to the library names. LIBBLIS_A_PATH := $(BASE_LIB_PATH)/$(LIBBLIS_A) LIBBLIS_SO_PATH := $(BASE_LIB_PATH)/$(LIBBLIS_SO) # Create a filepath to a local symlink to the soname--that is, the same as # LIBBLIS_SO_PATH except with the .so major version number. Since the shared # library lists its soname as 'libblis.so.n', where n is the .so major version # number, a symlink in BASE_LIB_PATH is needed so that ld can find the local # shared library when the testsuite is run via 'make test' or 'make check'. ifeq ($(OS_NAME),Darwin) # OS X shared library extensions. LIBBLIS_SO_MAJ_EXT := $(SO_MAJOR).$(SHLIB_EXT) LIBBLIS_SO_MMB_EXT := $(SO_MMB).$(SHLIB_EXT) else ifeq ($(IS_WIN),yes) # Windows shared library extension. LIBBLIS_SO_MAJ_EXT := $(SO_MAJOR).dll LIBBLIS_SO_MMB_EXT := else # Linux shared library extensions. LIBBLIS_SO_MAJ_EXT := $(SHLIB_EXT).$(SO_MAJOR) LIBBLIS_SO_MMB_EXT := $(SHLIB_EXT).$(SO_MMB) endif LIBBLIS_SONAME := $(LIBBLIS).$(LIBBLIS_SO_MAJ_EXT) LIBBLIS_SO_MAJ_PATH := $(BASE_LIB_PATH)/$(LIBBLIS_SONAME) # Construct the output path when building a shared library. # NOTE: This code and the code immediately above is a little curious and # perhaps could be refactored (carefully). ifeq ($(IS_WIN),yes) LIBBLIS_SO_OUTPUT_NAME := $(LIBBLIS_SO_MAJ_PATH) else LIBBLIS_SO_OUTPUT_NAME := $(LIBBLIS_SO_PATH) endif # # --- Utility program definitions ---------------------------------------------- # SH := /bin/sh MV := mv MKDIR := mkdir -p RM_F := rm -f RM_RF := rm -rf SYMLINK := ln -sf FIND := find GREP := grep EGREP := grep -E XARGS := xargs INSTALL := install -c # Script for creating a monolithic header file. #FLATTEN_H := $(DIST_PATH)/build/flatten-headers.sh FLATTEN_H := $(PYTHON) $(DIST_PATH)/build/flatten-headers.py # Default archiver flags. ARFLAGS := cr # Used to refresh CHANGELOG. GIT := git GIT_LOG := $(GIT) log --decorate # # --- Default linker definitions ----------------------------------------------- # # NOTE: This section needs to reside before the inclusion of make_defs.mk # files (just below), as most configurations' make_defs.mk don't tinker # with things like LDFLAGS, but some do (or may), in which case they can # manually override whatever they need. # Define the external libraries we may potentially need at link-time. ifeq ($(IS_WIN),yes) LIBM := else LIBM := -lm endif LIBMEMKIND := -lmemkind # Default linker flags. # NOTE: -lpthread is needed unconditionally because BLIS uses pthread_once() # to initialize itself in a thread-safe manner. LDFLAGS := $(LDFLAGS_PRESET) $(LIBM) $(LIBPTHREAD) # Add libmemkind to the link-time flags, if it was enabled at configure-time. ifeq ($(MK_ENABLE_MEMKIND),yes) LDFLAGS += $(LIBMEMKIND) endif # Never use libm with Intel compilers. ifeq ($(CC_VENDOR),icc) LDFLAGS := $(filter-out $(LIBM),$(LDFLAGS)) endif # Never use libmemkind with Intel SDE. ifeq ($(DEBUG_TYPE),sde) LDFLAGS := $(filter-out $(LIBMEMKIND),$(LDFLAGS)) endif # Specify the shared library's 'soname' field. # NOTE: The flag for creating shared objects is different for Linux and OS X. ifeq ($(OS_NAME),Darwin) # OS X shared library link flags. SOFLAGS := -dynamiclib SOFLAGS += -Wl,-install_name,$(LIBBLIS_SONAME) else SOFLAGS := -shared ifeq ($(IS_WIN),yes) # Windows shared library link flags. ifeq ($(CC_VENDOR),clang) SOFLAGS += -Wl,-implib:$(BASE_LIB_PATH)/$(LIBBLIS).lib else SOFLAGS += -Wl,--out-implib,$(BASE_LIB_PATH)/$(LIBBLIS).dll.a endif else # Linux shared library link flags. SOFLAGS += -Wl,-soname,$(LIBBLIS_SONAME) endif endif # Decide which library to link to for things like the testsuite and BLIS test # drivers. We default to the static library, unless only the shared library was # enabled, in which case we use the shared library. LIBBLIS_L := $(LIBBLIS_A) LIBBLIS_LINK := $(LIBBLIS_A_PATH) ifeq ($(MK_ENABLE_SHARED),yes) ifeq ($(MK_ENABLE_STATIC),no) LIBBLIS_L := $(LIBBLIS_SO) LIBBLIS_LINK := $(LIBBLIS_SO_PATH) ifeq ($(IS_WIN),no) # For Linux and OS X: set rpath property of shared object. LDFLAGS += -Wl,-rpath,$(BASE_LIB_PATH) endif endif # On windows, use the shared library even if static is created. ifeq ($(IS_WIN),yes) LIBBLIS_L := $(LIBBLIS_SO) LIBBLIS_LINK := $(LIBBLIS_SO_PATH) endif endif # # --- Include makefile definitions file ---------------------------------------- # # Define the name of the file containing build and architecture-specific # makefile definitions. MAKE_DEFS_FILE := make_defs.mk # Assemble a list of all configuration family members, including the # configuration family name itself. Note that sort() will remove duplicates # for situations where CONFIG_NAME is present in CONFIG_LIST, such as would # be the case for singleton families. CONFIG_LIST_FAM := $(sort $(strip $(CONFIG_LIST) $(CONFIG_NAME))) # Construct the paths to the makefile definitions files, each of which # resides in a separate configuration sub-directory. We use CONFIG_LIST_FAM # since we might need the makefile definitions associated with the # configuration family (if it is an umbrella family). # NOTE: We use the prefix $(BASE_SHARE_PATH)/$(CONFIG_DIR)/ instead of # $(CONFIG_PATH) so that make_defs.mk can be found when it is installed, # provided the caller defined SHARE_PATH to that install directory. CONFIG_PATHS := $(addprefix $(BASE_SHARE_PATH)/$(CONFIG_DIR)/, \ $(CONFIG_LIST_FAM)) MAKE_DEFS_MK_PATHS := $(addsuffix /$(MAKE_DEFS_FILE), $(CONFIG_PATHS)) # Initialize the list of included (found) configurations to empty. CONFIGS_INCL := # Include the makefile definitions files implied by the list of configurations. -include $(MAKE_DEFS_MK_PATHS) # Detect whether we actually got all of the make definitions files. If # we didn't, then maybe a configuration is mislabeled or missing. The # check-env-make-defs target checks ALL_MAKE_DEFS_MK_PRESENT and outputs # an error message if it is set to 'no'. # NOTE: We use CONFIG_LIST_FAM as the expected list of configurations. # This combines CONFIG_NAME with CONFIG_LIST. The inclusion of CONFIG_NAME # is needed for situations where the configuration family is an umbrella # family (e.g. 'intel64'), since families have separate make_def.mk files. CONFIGS_EXPECTED := $(CONFIG_LIST_FAM) ifeq ($(sort $(strip $(CONFIGS_INCL))), \ $(sort $(strip $(CONFIGS_EXPECTED)))) ALL_MAKE_DEFS_MK_PRESENT := yes else ALL_MAKE_DEFS_MK_PRESENT := no endif # # --- Configuration-agnostic flags --------------------------------------------- # # --- Linker program --- # Use whatever compiler was chosen. LINKER := $(CC) # --- Warning flags --- CWARNFLAGS := # Disable unused function warnings and stop compiling on first error for # all compilers that accept such options: gcc, clang, and icc. ifneq ($(CC_VENDOR),ibm) CWARNFLAGS += -Wall -Wno-unused-function -Wfatal-errors endif # Disable tautological comparision warnings in clang. ifeq ($(CC_VENDOR),clang) CWARNFLAGS += -Wno-tautological-compare endif $(foreach c, $(CONFIG_LIST_FAM), $(eval $(call append-var-for,CWARNFLAGS,$(c)))) # --- Position-independent code flags (shared libraries only) --- # Emit position-independent code for dynamic linking. ifeq ($(IS_WIN),yes) # Note: Don't use any fPIC flags for Windows builds since all code is position- # independent. CPICFLAGS := else CPICFLAGS := -fPIC endif $(foreach c, $(CONFIG_LIST_FAM), $(eval $(call append-var-for,CPICFLAGS,$(c)))) # --- Symbol exporting flags (shared libraries only) --- # NOTE: These flags are only applied when building BLIS and not used by # applications that import BLIS compilation flags via the # $(get-user-cflags-for ...) function. # Determine default export behavior / visibility of symbols for gcc. ifeq ($(CC_VENDOR),gcc) ifeq ($(IS_WIN),yes) ifeq ($(EXPORT_SHARED),all) BUILD_SYMFLAGS := -Wl,--export-all-symbols, -Wl,--enable-auto-import else # ifeq ($(EXPORT_SHARED),public) BUILD_SYMFLAGS := -Wl,--exclude-all-symbols endif else # ifeq ($(IS_WIN),no) ifeq ($(EXPORT_SHARED),all) # Export all symbols by default. BUILD_SYMFLAGS := -fvisibility=default else # ifeq ($(EXPORT_SHARED),public) # Hide all symbols by default and export only those that have been annotated # as needing to be exported. BUILD_SYMFLAGS := -fvisibility=hidden endif endif endif # Determine default export behavior / visibility of symbols for icc. # NOTE: The Windows branches have been omitted since we currently make no # effort to support Windows builds via icc (only gcc/clang via AppVeyor). ifeq ($(CC_VENDOR),icc) ifeq ($(EXPORT_SHARED),all) # Export all symbols by default. BUILD_SYMFLAGS := -fvisibility=default else # ifeq ($(EXPORT_SHARED),public) # Hide all symbols by default and export only those that have been annotated # as needing to be exported. BUILD_SYMFLAGS := -fvisibility=hidden endif endif # Determine default export behavior / visibility of symbols for clang. ifeq ($(CC_VENDOR),clang) ifeq ($(IS_WIN),yes) ifeq ($(EXPORT_SHARED),all) # NOTE: clang on Windows does not appear to support exporting all symbols # by default, and therefore we ignore the value of EXPORT_SHARED. BUILD_SYMFLAGS := else # ifeq ($(EXPORT_SHARED),public) # NOTE: The default behavior of clang on Windows is to hide all symbols # and only export functions and other declarations that have beenannotated # as needing to be exported. BUILD_SYMFLAGS := endif else # ifeq ($(IS_WIN),no) ifeq ($(EXPORT_SHARED),all) # Export all symbols by default. BUILD_SYMFLAGS := -fvisibility=default else # ifeq ($(EXPORT_SHARED),public) # Hide all symbols by default and export only those that have been annotated # as needing to be exported. BUILD_SYMFLAGS := -fvisibility=hidden endif endif endif # --- Language flags --- # Enable C99. CLANGFLAGS := -std=c99 $(foreach c, $(CONFIG_LIST_FAM), $(eval $(call append-var-for,CLANGFLAGS,$(c)))) # Enable C++11. CXXLANGFLAGS := -std=c++11 $(foreach c, $(CONFIG_LIST_FAM), $(eval $(call append-var-for,CXXLANGFLAGS,$(c)))) # --- C Preprocessor flags --- # Enable clock_gettime() in time.h. CPPROCFLAGS := -D_POSIX_C_SOURCE=200112L $(foreach c, $(CONFIG_LIST_FAM), $(eval $(call append-var-for,CPPROCFLAGS,$(c)))) # --- Threading flags --- ifeq ($(CC_VENDOR),gcc) ifeq ($(THREADING_MODEL),auto) THREADING_MODEL := openmp endif ifeq ($(THREADING_MODEL),openmp) CTHREADFLAGS := -fopenmp LDFLAGS += -fopenmp endif ifeq ($(THREADING_MODEL),pthreads) CTHREADFLAGS := -pthread LDFLAGS += $(LIBPTHREAD) endif endif ifeq ($(CC_VENDOR),icc) ifeq ($(THREADING_MODEL),auto) THREADING_MODEL := openmp endif ifeq ($(THREADING_MODEL),openmp) CTHREADFLAGS := -fopenmp LDFLAGS += -fopenmp endif ifeq ($(THREADING_MODEL),pthreads) CTHREADFLAGS := -pthread LDFLAGS += $(LIBPTHREAD) endif endif ifeq ($(CC_VENDOR),clang) ifeq ($(THREADING_MODEL),auto) THREADING_MODEL := pthreads endif ifeq ($(THREADING_MODEL),openmp) CTHREADFLAGS := -fopenmp LDFLAGS += -fopenmp endif ifeq ($(THREADING_MODEL),pthreads) CTHREADFLAGS := -pthread LDFLAGS += $(LIBPTHREAD) endif endif # --- #pragma omp simd flags (used for reference kernels only) --- ifeq ($(PRAGMA_OMP_SIMD),yes) ifeq ($(CC_VENDOR),gcc) COMPSIMDFLAGS := -fopenmp-simd else ifeq ($(CC_VENDOR),clang) COMPSIMDFLAGS := -fopenmp-simd else ifeq ($(CC_VENDOR),icc) COMPSIMDFLAGS := -qopenmp-simd endif endif endif else # ifeq ($(PRAGMA_OMP_SIMD),no) COMPSIMDFLAGS := endif # # --- Adjust verbosity level manually using make V=[0,1] ----------------------- # ifeq ($(V),1) ENABLE_VERBOSE := yes BLIS_ENABLE_TEST_OUTPUT := yes endif ifeq ($(V),0) ENABLE_VERBOSE := no BLIS_ENABLE_TEST_OUTPUT := no endif # # --- Append OS-specific libraries to LDFLAGS ---------------------------------- # ifeq ($(OS_NAME),Linux) LDFLAGS += -lrt endif # # --- LDFLAGS cleanup ---------------------------------------------------------- # # Remove duplicate flags/options in LDFLAGS (such as -lpthread) by sorting. LDFLAGS := $(sort $(LDFLAGS)) # # --- Include makefile fragments ----------------------------------------------- # # Initialize our list of directory paths to makefile fragments with the empty # list. This variable will accumulate all of the directory paths in which # makefile fragments reside. FRAGMENT_DIR_PATHS := # Initialize our makefile variables that source code files will be accumulated # into by the makefile fragments. This initialization is very important! These # variables will end up with weird contents if we don't initialize them to # empty prior to recursively including the makefile fragments. MK_CONFIG_SRC := MK_KERNELS_SRC := MK_REFKERN_SRC := MK_FRAME_SRC := MK_SANDBOX_SRC := # -- config -- # Construct paths to each of the sub-configurations specified in the # configuration list. Note that we use CONFIG_LIST_FAM, which already # has CONFIG_NAME included (with duplicates removed). CONFIG_PATHS := $(addprefix $(CONFIG_FRAG_PATH)/, $(CONFIG_LIST_FAM)) # This variable is used by the include statements as they recursively include # one another. For the 'config' directory, we initialize it to that directory # in preparation to include the fragments in the configuration sub-directory. PARENT_SRC_PATH := $(CONFIG_PATH) PARENT_PATH := $(CONFIG_FRAG_PATH) # Recursively include the makefile fragments in each of the sub-configuration # directories. -include $(addsuffix /$(FRAGMENT_MK), $(CONFIG_PATHS)) # -- kernels -- # Construct paths to each of the kernel sets required by the sub-configurations # in the configuration list. KERNEL_PATHS := $(addprefix $(KERNELS_FRAG_PATH)/, $(KERNEL_LIST)) # This variable is used by the include statements as they recursively include # one another. For the 'kernels' directory, we initialize it to that directory # in preparation to include the fragments in the configuration sub-directory. PARENT_SRC_PATH := $(KERNELS_PATH) PARENT_PATH := $(KERNELS_FRAG_PATH) # Recursively include the makefile fragments in each of the kernels sub- # directories. -include $(addsuffix /$(FRAGMENT_MK), $(KERNEL_PATHS)) # -- ref_kernels -- # -- frame -- # This variable is used by the include statements as they recursively include # one another. For the framework and reference kernel source trees (ie: the # 'frame' and 'ref_kernels' directories), we initialize it to the top-level # directory since that is its parent. PARENT_SRC_PATH := $(DIST_PATH) PARENT_PATH := $(OBJ_DIR)/$(CONFIG_NAME) # Recursively include all the makefile fragments in the directories for the # reference kernels and portable framework. -include $(addsuffix /$(FRAGMENT_MK), $(REFKERN_FRAG_PATH)) -include $(addsuffix /$(FRAGMENT_MK), $(FRAME_FRAG_PATH)) # -- sandbox -- # Construct paths to each sandbox. (At present, there can be only one.) # NOTE: If $(SANDBOX) is empty (because no sandbox was enabled at configure- # time) then $(SANDBOX_PATHS) will also be empty, which will cause no # fragments to be included. SANDBOX_PATHS := $(addprefix $(SANDBOX_FRAG_PATH)/, $(SANDBOX)) # This variable is used by the include statements as they recursively include # one another. For the 'sandbox' directory, we initialize it to that directory # in preparation to include the fragments in the configuration sub-directory. PARENT_SRC_PATH := $(SANDBOX_PATH) PARENT_PATH := $(SANDBOX_FRAG_PATH) # Recursively include the makefile fragments in the sandbox sub-directory. -include $(addsuffix /$(FRAGMENT_MK), $(SANDBOX_PATHS)) # Create a list of the makefile fragments using the variable into which each # of the above include statements accumulated their directory paths. MAKEFILE_FRAGMENTS := $(addsuffix /$(FRAGMENT_MK), $(FRAGMENT_DIR_PATHS)) # Detect whether we actually got any makefile fragments. If we didn't, then it # is likely that the user has not yet generated them (via configure). ifeq ($(strip $(MAKEFILE_FRAGMENTS)),) MAKEFILE_FRAGMENTS_PRESENT := no else MAKEFILE_FRAGMENTS_PRESENT := yes endif # # --- Important sets of header files and paths --------------------------------- # # Define a function that will expand all of the directory paths given in $(1) # to actual filepaths using the list of suffixes provided $(2). get-filepaths = $(strip $(foreach path, $(1), \ $(foreach suf, $(2), \ $(wildcard $(path)/*.$(suf)) \ ) ) ) # Define a function that will expand all of the directory paths given in $(1) # to actual filepaths using the list of suffixes provided $(2), taking only # the first expansion from each directory with at least one file matching # the current suffix. Finally, strip the filenames from all resulting files, # returning only the directory paths. get-dirpaths = $(dir $(foreach path, $(1), \ $(firstword \ $(foreach suf, $(2), \ $(wildcard $(path)/*.$(suf)) \ ) ) ) ) # We'll use two directory lists. The first is a list of all of the directories # in which makefile fragments were generated (plus the current directory). The # second is the subset of the first that begins with the sandbox root path. ALLFRAG_DIR_PATHS := . $(FRAGMENT_DIR_PATHS) SANDBOX_DIR_PATHS := $(filter $(SANDBOX_PATH)/%,$(ALLFRAG_DIR_PATHS)) ALL_H99_FILES := $(call get-filepaths,$(ALLFRAG_DIR_PATHS),$(ALL_H99_SUFS)) FRAME_H99_FILES := $(filter-out $(SANDBOX_PATH)/%,$(ALL_H99_FILES)) ALL_H99_DIRPATHS := $(call get-dirpaths,$(ALLFRAG_DIR_PATHS),$(ALL_H99_SUFS)) SANDBOX_H99_FILES := $(call get-filepaths,$(SANDBOX_DIR_PATHS),$(SANDBOX_H99_SUFS)) SANDBOX_HXX_FILES := $(call get-filepaths,$(SANDBOX_DIR_PATHS),$(SANDBOX_HXX_SUFS)) SANDBOX_HDR_DIRPATHS := $(call get-dirpaths,$(SANDBOX_DIR_PATHS),$(ALL_HDR_SUFS)) # # --- blis.h header definitions ------------------------------------------------ # # Use include/CONFIG_NAME as the default path to the local header files, but # allow the includer to override this value if it needs to point to an # installation directory. ifeq ($(strip $(INC_PATH)),) INC_PATH := $(INCLUDE_DIR)/$(CONFIG_NAME) endif # Identify the base path for the intermediate include directory. We define # this path in terms of INC_PATH, which gets a default value above (which is # what happens for the top-level Makefile). If INC_PATH is specified by the # Makefile prior to including common.mk, that path is used instead. This # allows Makefiles for example code and test drivers to reference an installed # prefix directory for situations when the build directory no longer exists. BASE_INC_PATH := $(INC_PATH) # Isolate the path to blis.h by filtering the file from the list of framework # header files. BLIS_H := blis.h BLIS_H_SRC_PATH := $(filter %/$(BLIS_H), $(FRAME_H99_FILES)) # Construct the path to what will be the intermediate flattened/monolithic # blis.h file. BLIS_H_FLAT := $(BASE_INC_PATH)/$(BLIS_H) # # --- cblas.h header definitions ----------------------------------------------- # # Isolate the path to cblas.h by filtering the file from the list of framework # header files. CBLAS_H := cblas.h CBLAS_H_SRC_PATH := $(filter %/$(CBLAS_H), $(FRAME_H99_FILES)) # Construct the path to what will be the intermediate flattened/monolithic # cblas.h file. CBLAS_H_FLAT := $(BASE_INC_PATH)/$(CBLAS_H) # # --- Compiler include path definitions ---------------------------------------- # # Obtain a list of header files #included inside of the bli_cntx_ref.c file. # Paths to these files will be needed when compiling with the monolithic # header. ifeq ($(strip $(SHARE_PATH)),.) REF_KER_SRC := $(DIST_PATH)/$(REFKERN_DIR)/bli_cntx_ref.c REF_KER_HEADERS := $(shell $(GREP) "\#include" $(REF_KER_SRC) | sed -e "s/\#include [\"<]\([a-zA-Z0-9\_\.\/\-]*\)[\">].*/\1/g" | $(GREP) -v $(BLIS_H)) endif # Match each header found above with the path to that header, and then strip # leading, trailing, and internal whitespace. REF_KER_H_PATHS := $(strip $(foreach header, $(REF_KER_HEADERS), \ $(dir $(filter %/$(header), \ $(FRAME_H99_FILES))))) # Add -I to each header path so we can specify our include search paths to the # C compiler. Then add frame/include since it's needed for bli_oapi_w[o]_cntx.h. REF_KER_I_PATHS := $(strip $(patsubst %, -I%, $(REF_KER_H_PATHS))) REF_KER_I_PATHS += -I$(DIST_PATH)/frame/include # Prefix the paths above with the base include path. # NOTE: We no longer need every header path in the source tree since we # now #include the monolithic/flattened blis.h instead. CINCFLAGS := -I$(BASE_INC_PATH) $(REF_KER_I_PATHS) # Obtain a list of header paths in the configured sandbox. Then add -I to each # header path. CSBOXINCFLAGS := $(strip $(patsubst %, -I%, $(SANDBOX_HDR_DIRPATHS))) # # --- BLIS configuration header definitions ------------------------------------ # # This file was created by configure, but we need to define it here so we can # remove it as part of the clean targets. BLIS_CONFIG_H := ./bli_config.h # # --- Special preprocessor macro definitions ----------------------------------- # # Define a C preprocessor macro to communicate the current version so that it # can be embedded into the library and queried later. VERS_DEF := -DBLIS_VERSION_STRING=\"$(VERSION)\" # Define a C preprocessor flag that is *only* defined when BLIS is being # compiled. (In other words, an application that #includes blis.h will not # get this cpp macro.) BUILD_CPPFLAGS := -DBLIS_IS_BUILDING_LIBRARY # end of ifndef COMMON_MK_INCLUDED conditional block endif blis-0.6.1/config/000077500000000000000000000000001360743507500137265ustar00rootroot00000000000000blis-0.6.1/config/README.md000066400000000000000000000014671360743507500152150ustar00rootroot00000000000000 For more information on sub-configurations and configuration families in BLIS, please read the Configuration Guide, which can be viewed in markdown-rendered form [from the BLIS wiki page](https://github.com/flame/blis/wiki/). If you don't have time, or are impatient, take a look at the `config_registry` file in the top-level directory of the BLIS distribution. It contains a grammar-like mapping of configuration names, or families, to sub-configurations, which may be other families. Keep in mind that the `/` notation: ``` : / ``` means that the kernel set associated with `` should be made available to the configuration `` if `` is targeted at configure-time. (Some configurations borrow kernels from other configurations, and this is how we specify that requirement.) blis-0.6.1/config/amd64/000077500000000000000000000000001360743507500146415ustar00rootroot00000000000000blis-0.6.1/config/amd64/bli_family_amd64.h000066400000000000000000000034721360743507500201220ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H // -- MEMORY ALLOCATION -------------------------------------------------------- #define BLIS_SIMD_ALIGN_SIZE 16 //#endif blis-0.6.1/config/amd64/make_defs.mk000066400000000000000000000060261360743507500171140ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := amd64 #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := CMISCFLAGS := CPICFLAGS := CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O3 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := -mfpmath=sse -mavx -mfma -march=bdver2 else ifeq ($(CC_VENDOR),clang) CKVECFLAGS := -mfpmath=sse -mavx -mfma -march=bdver2 else $(error gcc or clang are required for this configuration.) endif endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := $(CKVECFLAGS) endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-0.6.1/config/arm32/000077500000000000000000000000001360743507500146525ustar00rootroot00000000000000blis-0.6.1/config/arm32/bli_family_arm32.h000066400000000000000000000034721360743507500201440ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H // -- MEMORY ALLOCATION -------------------------------------------------------- #define BLIS_SIMD_ALIGN_SIZE 16 //#endif blis-0.6.1/config/arm32/make_defs.mk000066400000000000000000000056621360743507500171320ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := arm32 #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := CMISCFLAGS := -mfloat-abi=hard -mfpu=neon CPICFLAGS := CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O2 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := -march=armv7-a else $(error gcc is required for this configuration.) endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := $(CKVECFLAGS) endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-0.6.1/config/arm64/000077500000000000000000000000001360743507500146575ustar00rootroot00000000000000blis-0.6.1/config/arm64/bli_family_arm64.h000066400000000000000000000034721360743507500201560ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H // -- MEMORY ALLOCATION -------------------------------------------------------- #define BLIS_SIMD_ALIGN_SIZE 16 //#endif blis-0.6.1/config/arm64/make_defs.mk000066400000000000000000000056441360743507500171370ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := arm64 #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := -D_GNU_SOURCE CMISCFLAGS := CPICFLAGS := CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O3 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := -march=armv8-a else $(error gcc is required for this configuration.) endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := $(CKVECFLAGS) endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-0.6.1/config/bgq/000077500000000000000000000000001360743507500144775ustar00rootroot00000000000000blis-0.6.1/config/bgq/bli_cntx_init_bgq.c000066400000000000000000000060311360743507500203210ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_cntx_init_bgq( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. bli_cntx_init_bgq_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native gemm micro-kernels and // their storage preferences. bli_cntx_set_l3_nat_ukrs ( 2, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_bgq_int_8x8, FALSE, BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemm_bgq_int_4x4, FALSE, cntx ); // Initialize level-3 blocksize objects with architecture-specific values. // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MR ], 0, 8, 0, 4 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 0, 8, 0, 4 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], 0, 1024, 0, 768 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 0, 2048, 0, 1536 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 0, 10240, 0, 10240 ); // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( BLIS_NAT, 5, BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, cntx ); } blis-0.6.1/config/bgq/bli_family_bgq.h000066400000000000000000000062131360743507500176120ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H #undef restrict #if 0 // -- LEVEL-3 MICRO-KERNEL CONSTANTS ------------------------------------------- #define BLIS_DEFAULT_MR_S 8 #define BLIS_DEFAULT_NR_S 4 #define BLIS_DEFAULT_MC_S 1024 #define BLIS_DEFAULT_KC_S 2048 #define BLIS_DEFAULT_NC_S 8192 // 1 MPI RANK CASE: #define BLIS_DGEMM_UKERNEL bli_dgemm_int_8x8 #define BLIS_DEFAULT_MR_D 8 #define BLIS_DEFAULT_NR_D 8 #define BLIS_DEFAULT_MC_D 1024 #define BLIS_DEFAULT_KC_D 2048 #define BLIS_DEFAULT_NC_D 10240 #define BLIS_DEFAULT_MR_C 8 #define BLIS_DEFAULT_NR_C 4 #define BLIS_DEFAULT_MC_C 1024 #define BLIS_DEFAULT_KC_C 2048 #define BLIS_DEFAULT_NC_C 8192 #define BLIS_ZGEMM_UKERNEL bli_zgemm_int_8x8 #define BLIS_DEFAULT_MR_Z 4 #define BLIS_DEFAULT_NR_Z 4 #define BLIS_DEFAULT_MC_Z 768 #define BLIS_DEFAULT_KC_Z 1536 #define BLIS_DEFAULT_NC_Z 10240 // -- LEVEL-1F KERNEL CONSTANTS ------------------------------------------------ #define BLIS_DEFAULT_AF_D 8 #define BLIS_DAXPYF_KERNEL bli_daxpyf_opt_var1 // -- LEVEL-1V KERNEL DEFINITIONS ---------------------------------------------- #define BLIS_DAXPYV_KERNEL bli_daxpyv_opt_var1 #define BLIS_DDOTV_KERNEL bli_ddotv_opt_var1 #endif //#endif blis-0.6.1/config/bgq/make_defs.mk000066400000000000000000000071321360743507500167510ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := bgq #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # #ifeq ($(CC),) #CC := /bgsys/drivers/ppcfloor/comm/gcc.legacy/bin/mpixlc_r #CC_VENDOR := ibm #endif # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := -I/bgsys/drivers/ppcfloor -I/bgsys/drivers/ppcfloor/spi/include/kernel/cnk ifeq ($(CC_VENDOR),ibm) CMISCFLAGS := -qthreaded -qsmp=omp -qasm=gcc -qkeyword=asm # -qreport -qsource -qlistopt -qlist else ifeq ($(CC_VENDOR),clang) CMISCFLAGS := -fopenmp else $(error xlc or bgclang is required for this configuration.) endif CPICFLAGS := CWARNFLAGS := -w ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O3 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) ifeq ($(CC_VENDOR),ibm) CKVECFLAGS := -qarch=qp -qtune=qp -qsimd=auto -qhot=level=1 -qprefetch -qunroll=yes -qnoipa endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := $(CKVECFLAGS) endif endif # Override the default value for LDFLAGS. ifeq ($(CC_VENDOR),ibm) LDFLAGS := -L/bgsys/drivers/ppcfloor/spi/lib -lSPI -lSPI_cnk -qthreaded -qsmp=omp else ifeq ($(CC_VENDOR),clang) LDFLAGS := -L/bgsys/drivers/ppcfloor/spi/lib -lSPI -lSPI_cnk -fopenmp endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-0.6.1/config/bulldozer/000077500000000000000000000000001360743507500157305ustar00rootroot00000000000000blis-0.6.1/config/bulldozer/bli_cntx_init_bulldozer.c000066400000000000000000000063171360743507500230120ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_cntx_init_bulldozer( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. bli_cntx_init_bulldozer_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native gemm micro-kernels and // their storage preferences. bli_cntx_set_l3_nat_ukrs ( 4, BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_bulldozer_asm_8x8_fma4, FALSE, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_bulldozer_asm_4x6_fma4, FALSE, BLIS_GEMM_UKR, BLIS_SCOMPLEX, bli_cgemm_bulldozer_asm_8x4_fma4, FALSE, BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemm_bulldozer_asm_4x4_fma4, FALSE, cntx ); // Initialize level-3 blocksize objects with architecture-specific values. // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MR ], 8, 4, 8, 4 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 8, 6, 4, 4 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], 128, 1080, 96, 64 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 384, 120, 256, 192 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4096, 8400, 4096, 4096 ); // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( BLIS_NAT, 5, BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, cntx ); } blis-0.6.1/config/bulldozer/bli_family_bulldozer.h000066400000000000000000000054231360743507500222760ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H #if 0 // -- LEVEL-3 MICRO-KERNEL CONSTANTS ------------------------------------------- #define BLIS_SGEMM_UKERNEL bli_sgemm_asm_8x8_fma4 #define BLIS_DEFAULT_MC_S 128 #define BLIS_DEFAULT_KC_S 384 #define BLIS_DEFAULT_NC_S 4096 #define BLIS_DEFAULT_MR_S 8 #define BLIS_DEFAULT_NR_S 8 #define BLIS_DGEMM_UKERNEL bli_dgemm_asm_4x6_fma4 #define BLIS_DEFAULT_MC_D 1080 #define BLIS_DEFAULT_KC_D 120 #define BLIS_DEFAULT_NC_D 8400 #define BLIS_DEFAULT_MR_D 4 #define BLIS_DEFAULT_NR_D 6 #define BLIS_CGEMM_UKERNEL bli_cgemm_asm_8x4_fma4 #define BLIS_DEFAULT_MC_C 96 #define BLIS_DEFAULT_KC_C 256 #define BLIS_DEFAULT_NC_C 4096 #define BLIS_DEFAULT_MR_C 8 #define BLIS_DEFAULT_NR_C 4 #define BLIS_ZGEMM_UKERNEL bli_zgemm_asm_4x4_fma4 #define BLIS_DEFAULT_MC_Z 64 #define BLIS_DEFAULT_KC_Z 192 #define BLIS_DEFAULT_NC_Z 4096 #define BLIS_DEFAULT_MR_Z 4 #define BLIS_DEFAULT_NR_Z 4 #endif //#endif blis-0.6.1/config/bulldozer/make_defs.mk000066400000000000000000000061221360743507500202000ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := bulldozer #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := CMISCFLAGS := CPICFLAGS := CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O3 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := -mfpmath=sse -mavx -mfma4 -march=bdver1 -mno-tbm -mno-xop -mno-lwp else ifeq ($(CC_VENDOR),clang) CKVECFLAGS := -mfpmath=sse -mavx -mfma4 -march=bdver1 -mno-tbm -mno-xop -mno-lwp else $(error gcc or clang are required for this configuration.) endif endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := $(CKVECFLAGS) endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-0.6.1/config/cortexa15/000077500000000000000000000000001360743507500155415ustar00rootroot00000000000000blis-0.6.1/config/cortexa15/bli_cntx_init_cortexa15.c000066400000000000000000000060531360743507500224310ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_cntx_init_cortexa15( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. bli_cntx_init_cortexa15_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native gemm micro-kernels and // their storage preferences. bli_cntx_set_l3_nat_ukrs ( 2, BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_armv7a_int_4x4, FALSE, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_armv7a_int_4x4, FALSE, cntx ); // Initialize level-3 blocksize objects with architecture-specific values. // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MR ], 4, 4, 0, 0 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 4, 4, 0, 0 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], 336, 176, 0, 0 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 528, 368, 0, 0 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4096, 4096, 0, 0 ); // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( BLIS_NAT, 5, BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, cntx ); } blis-0.6.1/config/cortexa15/bli_family_cortexa15.h000066400000000000000000000055721360743507500217250ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H // -- MEMORY ALLOCATION -------------------------------------------------------- #define BLIS_SIMD_ALIGN_SIZE 16 #if 0 // -- LEVEL-3 MICRO-KERNEL CONSTANTS ------------------------------------------- #define BLIS_SGEMM_UKERNEL bli_sgemm_armv7a_int_4x4 #define BLIS_DEFAULT_MR_S 4 #define BLIS_DEFAULT_NR_S 4 #define BLIS_DEFAULT_MC_S 336 #define BLIS_DEFAULT_KC_S 528 #define BLIS_DEFAULT_NC_S 4096 #define BLIS_DGEMM_UKERNEL bli_dgemm_armv7a_int_4x4 #define BLIS_DEFAULT_MR_D 4 #define BLIS_DEFAULT_NR_D 4 #define BLIS_DEFAULT_MC_D 176 #define BLIS_DEFAULT_KC_D 368 #define BLIS_DEFAULT_NC_D 4096 #define BLIS_DEFAULT_MR_C 8 #define BLIS_DEFAULT_NR_C 4 #define BLIS_DEFAULT_MC_C 64 #define BLIS_DEFAULT_KC_C 128 #define BLIS_DEFAULT_NC_C 4096 #define BLIS_DEFAULT_MR_Z 8 #define BLIS_DEFAULT_NR_Z 4 #define BLIS_DEFAULT_MC_Z 64 #define BLIS_DEFAULT_KC_Z 128 #define BLIS_DEFAULT_NC_Z 4096 #endif //#endif blis-0.6.1/config/cortexa15/make_defs.mk000066400000000000000000000056661360743507500200250ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := cortexa15 #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := CMISCFLAGS := -mfloat-abi=hard -mfpu=neon CPICFLAGS := CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O2 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := -march=armv7-a else $(error gcc is required for this configuration.) endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := $(CKVECFLAGS) endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-0.6.1/config/cortexa53/000077500000000000000000000000001360743507500155435ustar00rootroot00000000000000blis-0.6.1/config/cortexa53/bli_cntx_init_cortexa53.c000066400000000000000000000060551360743507500224370ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_cntx_init_cortexa53( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. bli_cntx_init_cortexa53_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native gemm micro-kernels and // their storage preferences. bli_cntx_set_l3_nat_ukrs ( 2, BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_armv8a_asm_8x12, FALSE, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_armv8a_asm_6x8, FALSE, cntx ); // Initialize level-3 blocksize objects with architecture-specific values. // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MR ], 8, 6, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 12, 8, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], 120, 120, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 640, 240, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 3072, 3072, -1, -1 ); // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( BLIS_NAT, 5, BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, cntx ); } blis-0.6.1/config/cortexa53/bli_family_cortexa53.h000066400000000000000000000034101360743507500217160ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // -- MEMORY ALLOCATION -------------------------------------------------------- #define BLIS_SIMD_ALIGN_SIZE 16 blis-0.6.1/config/cortexa53/make_defs.mk000066400000000000000000000057441360743507500200240ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := cortexa53 #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := -D_GNU_SOURCE CMISCFLAGS := CPICFLAGS := CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O3 -ftree-vectorize -mtune=cortex-a53 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := -march=armv8-a+fp+simd -mcpu=cortex-a53 else $(error gcc is required for this configuration.) endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := $(CKVECFLAGS) endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-0.6.1/config/cortexa57/000077500000000000000000000000001360743507500155475ustar00rootroot00000000000000blis-0.6.1/config/cortexa57/bli_cntx_init_cortexa57.c000066400000000000000000000060551360743507500224470ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_cntx_init_cortexa57( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. bli_cntx_init_cortexa57_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native gemm micro-kernels and // their storage preferences. bli_cntx_set_l3_nat_ukrs ( 2, BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_armv8a_asm_8x12, FALSE, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_armv8a_asm_6x8, FALSE, cntx ); // Initialize level-3 blocksize objects with architecture-specific values. // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MR ], 8, 6, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 12, 8, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], 120, 120, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 640, 240, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 3072, 3072, -1, -1 ); // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( BLIS_NAT, 5, BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, cntx ); } blis-0.6.1/config/cortexa57/bli_family_cortexa57.h000066400000000000000000000060241360743507500217320ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H // -- MEMORY ALLOCATION -------------------------------------------------------- #define BLIS_SIMD_ALIGN_SIZE 16 #if 0 // -- LEVEL-3 MICRO-KERNEL CONSTANTS ------------------------------------------- #define BLIS_SGEMM_UKERNEL bli_sgemm_opt_8x12 #define BLIS_DEFAULT_MR_S 8 #define BLIS_DEFAULT_NR_S 12 #define BLIS_DEFAULT_MC_S 120 //1536 //336 //416 // 1280 //160 // 160 // 160 //2048 //336 #define BLIS_DEFAULT_KC_S 640 //1536 //336 //704 //1280 //672 //528 // 856 //2048 //528 #define BLIS_DEFAULT_NC_S 3072 #define BLIS_DGEMM_UKERNEL bli_dgemm_opt_6x8 #define BLIS_DEFAULT_MR_D 6 #define BLIS_DEFAULT_NR_D 8 #define BLIS_DEFAULT_MC_D 120 //1536 //160 //80 //176 #define BLIS_DEFAULT_KC_D 240 //1536 //304 //336 //368 #define BLIS_DEFAULT_NC_D 3072 #define BLIS_DEFAULT_MR_C 8 #define BLIS_DEFAULT_NR_C 4 #define BLIS_DEFAULT_MC_C 64 #define BLIS_DEFAULT_KC_C 128 #define BLIS_DEFAULT_NC_C 4096 #define BLIS_DEFAULT_MR_Z 8 #define BLIS_DEFAULT_NR_Z 4 #define BLIS_DEFAULT_MC_Z 64 #define BLIS_DEFAULT_KC_Z 128 #define BLIS_DEFAULT_NC_Z 4096 #endif //#endif blis-0.6.1/config/cortexa57/make_defs.mk000066400000000000000000000057441360743507500200300ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := cortexa57 #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := -D_GNU_SOURCE CMISCFLAGS := CPICFLAGS := CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O3 -ftree-vectorize -mtune=cortex-a57 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := -march=armv8-a+fp+simd -mcpu=cortex-a57 else $(error gcc is required for this configuration.) endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := $(CKVECFLAGS) endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-0.6.1/config/cortexa9/000077500000000000000000000000001360743507500154645ustar00rootroot00000000000000blis-0.6.1/config/cortexa9/bli_cntx_init_cortexa9.c000066400000000000000000000060511360743507500222750ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_cntx_init_cortexa9( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. bli_cntx_init_cortexa9_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native gemm micro-kernels and // their storage preferences. bli_cntx_set_l3_nat_ukrs ( 2, BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_armv7a_int_4x4, FALSE, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_armv7a_int_4x4, FALSE, cntx ); // Initialize level-3 blocksize objects with architecture-specific values. // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MR ], 4, 4, 0, 0 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 4, 4, 0, 0 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], 432, 176, 0, 0 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 352, 368, 0, 0 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4096, 4096, 0, 0 ); // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( BLIS_NAT, 5, BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, cntx ); } blis-0.6.1/config/cortexa9/bli_family_cortexa9.h000066400000000000000000000055661360743507500215760ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H // -- MEMORY ALLOCATION -------------------------------------------------------- #define BLIS_SIMD_ALIGN_SIZE 16 #if 0 // -- LEVEL-3 MICRO-KERNEL CONSTANTS ------------------------------------------- #define BLIS_SGEMM_UKERNEL bli_sgemm_armv7a_int_4x4 #define BLIS_DEFAULT_MR_S 4 #define BLIS_DEFAULT_NR_S 4 #define BLIS_DEFAULT_MC_S 432 #define BLIS_DEFAULT_KC_S 352 #define BLIS_DEFAULT_NC_S 4096 #define BLIS_DGEMM_UKERNEL bli_dgemm_armv7a_int_4x4 #define BLIS_DEFAULT_MR_D 4 #define BLIS_DEFAULT_NR_D 4 #define BLIS_DEFAULT_MC_D 176 #define BLIS_DEFAULT_KC_D 368 #define BLIS_DEFAULT_NC_D 4096 #define BLIS_DEFAULT_MR_C 8 #define BLIS_DEFAULT_NR_C 4 #define BLIS_DEFAULT_MC_C 64 #define BLIS_DEFAULT_KC_C 128 #define BLIS_DEFAULT_NC_C 4096 #define BLIS_DEFAULT_MR_Z 8 #define BLIS_DEFAULT_NR_Z 4 #define BLIS_DEFAULT_MC_Z 64 #define BLIS_DEFAULT_KC_Z 128 #define BLIS_DEFAULT_NC_Z 4096 #endif //#endif blis-0.6.1/config/cortexa9/make_defs.mk000066400000000000000000000056651360743507500177470ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := cortexa9 #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := CMISCFLAGS := -mfloat-abi=hard -mfpu=neon CPICFLAGS := CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O2 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := -march=armv7-a else $(error gcc is required for this configuration.) endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := $(CKVECFLAGS) endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-0.6.1/config/excavator/000077500000000000000000000000001360743507500157225ustar00rootroot00000000000000blis-0.6.1/config/excavator/bli_cntx_init_excavator.c000066400000000000000000000063031360743507500227710ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_cntx_init_excavator( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. bli_cntx_init_excavator_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native gemm micro-kernels and // their storage preferences. bli_cntx_set_l3_nat_ukrs ( 4, BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_piledriver_asm_16x3, FALSE, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_piledriver_asm_8x3, FALSE, BLIS_GEMM_UKR, BLIS_SCOMPLEX, bli_cgemm_piledriver_asm_4x2, FALSE, BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemm_piledriver_asm_2x2, FALSE, cntx ); // Initialize level-3 blocksize objects with architecture-specific values. // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MR ], 16, 8, 4, 2 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 3, 3, 2, 2 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], 528, 264, 264, 100 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 256, 256, 320 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 8400, 8400, 8400, 8400 ); // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( BLIS_NAT, 5, BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, cntx ); } blis-0.6.1/config/excavator/bli_family_excavator.h000066400000000000000000000057351360743507500222700ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H // -- MEMORY ALLOCATION -------------------------------------------------------- #define BLIS_SIMD_ALIGN_SIZE 16 #if 0 // -- LEVEL-3 MICRO-KERNEL CONSTANTS ------------------------------------------- #define BLIS_SGEMM_UKERNEL bli_sgemm_asm_16x3 #define BLIS_DEFAULT_MR_S 16 #define BLIS_DEFAULT_NR_S 3 #define BLIS_DEFAULT_MC_S 528 #define BLIS_DEFAULT_KC_S 256 #define BLIS_DEFAULT_NC_S 8400 #define BLIS_DGEMM_UKERNEL bli_dgemm_asm_8x3 #define BLIS_DEFAULT_MR_D 8 #define BLIS_DEFAULT_NR_D 3 #define BLIS_DEFAULT_MC_D 264 #define BLIS_DEFAULT_KC_D 256 #define BLIS_DEFAULT_NC_D 8400 #define BLIS_CGEMM_UKERNEL bli_cgemm_asm_4x2 #define BLIS_DEFAULT_MR_C 4 #define BLIS_DEFAULT_NR_C 2 #define BLIS_DEFAULT_MC_C 264 #define BLIS_DEFAULT_KC_C 256 #define BLIS_DEFAULT_NC_C 8400 #define BLIS_ZGEMM_UKERNEL bli_zgemm_asm_2x2 #define BLIS_DEFAULT_MR_Z 2 #define BLIS_DEFAULT_NR_Z 2 #define BLIS_DEFAULT_MC_Z 100 #define BLIS_DEFAULT_KC_Z 320 #define BLIS_DEFAULT_NC_Z 8400 #endif //#endif blis-0.6.1/config/excavator/make_defs.mk000066400000000000000000000061441360743507500201760ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := excavator #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := CMISCFLAGS := CPICFLAGS := CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O3 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := -mfpmath=sse -mavx -mfma -march=bdver4 -mno-fma4 -mno-tbm -mno-xop -mno-lwp else ifeq ($(CC_VENDOR),clang) CKVECFLAGS := -mfpmath=sse -mavx -mfma -march=bdver4 -mno-fma4 -mno-tbm -mno-xop -mno-lwp else $(error gcc or clang are required for this configuration.) endif endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := $(CKVECFLAGS) endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-0.6.1/config/generic/000077500000000000000000000000001360743507500153425ustar00rootroot00000000000000blis-0.6.1/config/generic/bli_cntx_init_generic.c000066400000000000000000000034421360743507500220320ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_cntx_init_generic( cntx_t* cntx ) { // Set default kernel blocksizes and functions. bli_cntx_init_generic_ref( cntx ); } blis-0.6.1/config/generic/bli_family_generic.h000066400000000000000000000033071360743507500213210ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H //#endif blis-0.6.1/config/generic/make_defs.mk000066400000000000000000000060041360743507500176110ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := generic #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := CMISCFLAGS := CPICFLAGS := CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O3 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := else ifeq ($(CC_VENDOR),icc) CKVECFLAGS := else ifeq ($(CC_VENDOR),clang) CKVECFLAGS := else $(error gcc, icc, or clang is required for this configuration.) endif endif endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := $(CKVECFLAGS) endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-0.6.1/config/haswell/000077500000000000000000000000001360743507500153655ustar00rootroot00000000000000blis-0.6.1/config/haswell/bli_cntx_init_haswell.c000066400000000000000000000207531360743507500221040ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" //GEMMSUP_KER_PROT( double, d, gemmsup_r_haswell_ref ) void bli_cntx_init_haswell( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; blksz_t thresh[ BLIS_NUM_THRESH ]; // Set default kernel blocksizes and functions. bli_cntx_init_haswell_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native gemm micro-kernels and // their storage preferences. bli_cntx_set_l3_nat_ukrs ( 8, // gemm #if 1 BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_haswell_asm_6x16, TRUE, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_haswell_asm_6x8, TRUE, BLIS_GEMM_UKR, BLIS_SCOMPLEX, bli_cgemm_haswell_asm_3x8, TRUE, BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemm_haswell_asm_3x4, TRUE, #else BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_haswell_asm_16x6, FALSE, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_haswell_asm_8x6, FALSE, BLIS_GEMM_UKR, BLIS_SCOMPLEX, bli_cgemm_haswell_asm_8x3, FALSE, BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemm_haswell_asm_4x3, FALSE, #endif // gemmtrsm_l BLIS_GEMMTRSM_L_UKR, BLIS_FLOAT, bli_sgemmtrsm_l_haswell_asm_6x16, TRUE, BLIS_GEMMTRSM_L_UKR, BLIS_DOUBLE, bli_dgemmtrsm_l_haswell_asm_6x8, TRUE, // gemmtrsm_u BLIS_GEMMTRSM_U_UKR, BLIS_FLOAT, bli_sgemmtrsm_u_haswell_asm_6x16, TRUE, BLIS_GEMMTRSM_U_UKR, BLIS_DOUBLE, bli_dgemmtrsm_u_haswell_asm_6x8, TRUE, cntx ); // Update the context with optimized level-1f kernels. bli_cntx_set_l1f_kers ( 4, // axpyf BLIS_AXPYF_KER, BLIS_FLOAT, bli_saxpyf_zen_int_8, BLIS_AXPYF_KER, BLIS_DOUBLE, bli_daxpyf_zen_int_8, // dotxf BLIS_DOTXF_KER, BLIS_FLOAT, bli_sdotxf_zen_int_8, BLIS_DOTXF_KER, BLIS_DOUBLE, bli_ddotxf_zen_int_8, cntx ); // Update the context with optimized level-1v kernels. bli_cntx_set_l1v_kers ( 10, // amaxv BLIS_AMAXV_KER, BLIS_FLOAT, bli_samaxv_zen_int, BLIS_AMAXV_KER, BLIS_DOUBLE, bli_damaxv_zen_int, // axpyv #if 0 BLIS_AXPYV_KER, BLIS_FLOAT, bli_saxpyv_zen_int, BLIS_AXPYV_KER, BLIS_DOUBLE, bli_daxpyv_zen_int, #else BLIS_AXPYV_KER, BLIS_FLOAT, bli_saxpyv_zen_int10, BLIS_AXPYV_KER, BLIS_DOUBLE, bli_daxpyv_zen_int10, #endif // dotv BLIS_DOTV_KER, BLIS_FLOAT, bli_sdotv_zen_int, BLIS_DOTV_KER, BLIS_DOUBLE, bli_ddotv_zen_int, // dotxv BLIS_DOTXV_KER, BLIS_FLOAT, bli_sdotxv_zen_int, BLIS_DOTXV_KER, BLIS_DOUBLE, bli_ddotxv_zen_int, // scalv #if 0 BLIS_SCALV_KER, BLIS_FLOAT, bli_sscalv_zen_int, BLIS_SCALV_KER, BLIS_DOUBLE, bli_dscalv_zen_int, #else BLIS_SCALV_KER, BLIS_FLOAT, bli_sscalv_zen_int10, BLIS_SCALV_KER, BLIS_DOUBLE, bli_dscalv_zen_int10, #endif cntx ); // Initialize level-3 blocksize objects with architecture-specific values. // s d c z #if 1 bli_blksz_init_easy( &blkszs[ BLIS_MR ], 6, 6, 3, 3 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 16, 8, 8, 4 ); //bli_blksz_init_easy( &blkszs[ BLIS_MC ], 1008, 1008, 1008, 1008 ); //bli_blksz_init_easy( &blkszs[ BLIS_MC ], 168, 72, 72, 36 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], 168, 72, 75, 192 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 256, 256, 256 ); #else bli_blksz_init_easy( &blkszs[ BLIS_MR ], 16, 8, 8, 4 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 6, 6, 3, 3 ); //bli_blksz_init_easy( &blkszs[ BLIS_MC ], 1024, 1024, 1024, 1024 ); //bli_blksz_init_easy( &blkszs[ BLIS_MC ], 112, 64, 56, 32 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], 112, 72, 56, 44 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 256, 256, 256 ); #endif bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 4080, 4080, 4080 ); bli_blksz_init_easy( &blkszs[ BLIS_AF ], 8, 8, 8, 8 ); bli_blksz_init_easy( &blkszs[ BLIS_DF ], 8, 8, 8, 8 ); // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( BLIS_NAT, 7, // level-3 BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, // level-1f BLIS_AF, &blkszs[ BLIS_AF ], BLIS_AF, BLIS_DF, &blkszs[ BLIS_DF ], BLIS_DF, cntx ); // ------------------------------------------------------------------------- // Initialize sup thresholds with architecture-appropriate values. // s d c z bli_blksz_init_easy( &thresh[ BLIS_MT ], -1, 201, -1, -1 ); bli_blksz_init_easy( &thresh[ BLIS_NT ], -1, 100, -1, -1 ); bli_blksz_init_easy( &thresh[ BLIS_KT ], -1, 120, -1, -1 ); // Initialize the context with the sup thresholds. bli_cntx_set_l3_sup_thresh ( 3, BLIS_MT, &thresh[ BLIS_MT ], BLIS_NT, &thresh[ BLIS_NT ], BLIS_KT, &thresh[ BLIS_KT ], cntx ); #if 0 // Initialize the context with the sup handlers. bli_cntx_set_l3_sup_handlers ( 1, BLIS_GEMM, bli_gemmsup_ref, cntx ); #endif // Update the context with optimized small/unpacked gemm kernels. bli_cntx_set_l3_sup_kers ( 8, //BLIS_RCR, BLIS_DOUBLE, bli_dgemmsup_r_haswell_ref, BLIS_RRR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8m, TRUE, BLIS_RRC, BLIS_DOUBLE, bli_dgemmsup_rd_haswell_asm_6x8m, TRUE, BLIS_RCR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8m, TRUE, BLIS_RCC, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8n, TRUE, BLIS_CRR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8m, TRUE, BLIS_CRC, BLIS_DOUBLE, bli_dgemmsup_rd_haswell_asm_6x8n, TRUE, BLIS_CCR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8n, TRUE, BLIS_CCC, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8n, TRUE, cntx ); // Initialize level-3 sup blocksize objects with architecture-specific // values. // s d c z bli_blksz_init ( &blkszs[ BLIS_MR ], -1, 6, -1, -1, -1, 9, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], -1, 8, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], -1, 72, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], -1, 256, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], -1, 4080, -1, -1 ); // Update the context with the current architecture's register and cache // blocksizes for small/unpacked level-3 problems. bli_cntx_set_l3_sup_blkszs ( 5, BLIS_NC, &blkszs[ BLIS_NC ], BLIS_KC, &blkszs[ BLIS_KC ], BLIS_MC, &blkszs[ BLIS_MC ], BLIS_NR, &blkszs[ BLIS_NR ], BLIS_MR, &blkszs[ BLIS_MR ], cntx ); } blis-0.6.1/config/haswell/bli_family_haswell.h000066400000000000000000000114211360743507500213630ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H #if 0 // -- LEVEL-3 MICRO-KERNEL CONSTANTS AND DEFINITIONS --------------------------- // -- sgemm micro-kernel -- #if 0 #define BLIS_SGEMM_UKERNEL bli_sgemm_asm_4x24 #define BLIS_DEFAULT_MC_S 256 #define BLIS_DEFAULT_KC_S 256 #define BLIS_DEFAULT_NC_S 4080 #define BLIS_DEFAULT_MR_S 4 #define BLIS_DEFAULT_NR_S 24 #define BLIS_SGEMM_UKERNEL_PREFERS_CONTIG_ROWS #endif #if 1 #define BLIS_SGEMM_UKERNEL bli_sgemm_asm_6x16 #define BLIS_DEFAULT_MC_S 144 #define BLIS_DEFAULT_KC_S 256 #define BLIS_DEFAULT_NC_S 4080 #define BLIS_DEFAULT_MR_S 6 #define BLIS_DEFAULT_NR_S 16 #define BLIS_SGEMM_UKERNEL_PREFERS_CONTIG_ROWS #endif #if 0 #define BLIS_SGEMM_UKERNEL bli_sgemm_asm_16x6 #define BLIS_DEFAULT_MC_S 144 #define BLIS_DEFAULT_KC_S 256 #define BLIS_DEFAULT_NC_S 4080 #define BLIS_DEFAULT_MR_S 16 #define BLIS_DEFAULT_NR_S 6 #endif // -- dgemm micro-kernel -- #if 0 #define BLIS_DGEMM_UKERNEL bli_dgemm_asm_4x12 #define BLIS_DEFAULT_MC_D 152 #define BLIS_DEFAULT_KC_D 160 #define BLIS_DEFAULT_NC_D 4080 #define BLIS_DEFAULT_MR_D 4 #define BLIS_DEFAULT_NR_D 12 #define BLIS_DGEMM_UKERNEL_PREFERS_CONTIG_ROWS #endif #if 1 #define BLIS_DGEMM_UKERNEL bli_dgemm_asm_6x8 #define BLIS_DEFAULT_MC_D 72 #define BLIS_DEFAULT_KC_D 256 #define BLIS_DEFAULT_NC_D 4080 #define BLIS_DEFAULT_MR_D 6 #define BLIS_DEFAULT_NR_D 8 #define BLIS_DGEMM_UKERNEL_PREFERS_CONTIG_ROWS #endif #if 0 #define BLIS_DGEMM_UKERNEL bli_dgemm_asm_8x6 #define BLIS_DEFAULT_MC_D 72 #define BLIS_DEFAULT_KC_D 256 #define BLIS_DEFAULT_NC_D 4080 #define BLIS_DEFAULT_MR_D 8 #define BLIS_DEFAULT_NR_D 6 #endif // -- cgemm micro-kernel -- #if 1 #define BLIS_CGEMM_UKERNEL bli_cgemm_asm_3x8 #define BLIS_DEFAULT_MC_C 144 #define BLIS_DEFAULT_KC_C 256 #define BLIS_DEFAULT_NC_C 4080 #define BLIS_DEFAULT_MR_C 3 #define BLIS_DEFAULT_NR_C 8 #define BLIS_CGEMM_UKERNEL_PREFERS_CONTIG_ROWS #endif #if 0 #define BLIS_CGEMM_UKERNEL bli_cgemm_asm_8x3 #define BLIS_DEFAULT_MC_C 144 #define BLIS_DEFAULT_KC_C 256 #define BLIS_DEFAULT_NC_C 4080 #define BLIS_DEFAULT_MR_C 8 #define BLIS_DEFAULT_NR_C 3 #endif // -- zgemm micro-kernel -- #if 1 #define BLIS_ZGEMM_UKERNEL bli_zgemm_asm_3x4 #define BLIS_DEFAULT_MC_Z 72 #define BLIS_DEFAULT_KC_Z 256 #define BLIS_DEFAULT_NC_Z 4080 #define BLIS_DEFAULT_MR_Z 3 #define BLIS_DEFAULT_NR_Z 4 #define BLIS_ZGEMM_UKERNEL_PREFERS_CONTIG_ROWS #endif #if 0 #define BLIS_ZGEMM_UKERNEL bli_zgemm_asm_4x3 #define BLIS_DEFAULT_MC_Z 72 #define BLIS_DEFAULT_KC_Z 256 #define BLIS_DEFAULT_NC_Z 4080 #define BLIS_DEFAULT_MR_Z 4 #define BLIS_DEFAULT_NR_Z 3 #endif #endif //#endif blis-0.6.1/config/haswell/make_defs.mk000066400000000000000000000064101360743507500176350ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := haswell #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := CMISCFLAGS := CPICFLAGS := CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O3 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := -mavx2 -mfma -mfpmath=sse -march=haswell ifeq ($(GCC_OT_4_9_0),yes) # If gcc is older than 4.9.0, we must use a different label for -march. CKVECFLAGS := -mavx2 -mfma -mfpmath=sse -march=core-avx2 endif else ifeq ($(CC_VENDOR),icc) CKVECFLAGS := -xCORE-AVX2 else ifeq ($(CC_VENDOR),clang) CKVECFLAGS := -mavx2 -mfma -mfpmath=sse -march=haswell else $(error gcc, icc, or clang is required for this configuration.) endif endif endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := $(CKVECFLAGS) endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-0.6.1/config/intel64/000077500000000000000000000000001360743507500152135ustar00rootroot00000000000000blis-0.6.1/config/intel64/bli_family_intel64.h000066400000000000000000000033061360743507500210420ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H //#endif blis-0.6.1/config/intel64/make_defs.mk000066400000000000000000000061201360743507500174610ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := intel64 #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := CMISCFLAGS := CPICFLAGS := CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O3 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := -mssse3 -mfpmath=sse -march=core2 else ifeq ($(CC_VENDOR),icc) CKVECFLAGS := -xSSSE3 else ifeq ($(CC_VENDOR),clang) CKVECFLAGS := -mssse3 -mfpmath=sse -march=core2 else $(error gcc, icc, or clang is required for this configuration.) endif endif endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := $(CKVECFLAGS) endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-0.6.1/config/knc/000077500000000000000000000000001360743507500145015ustar00rootroot00000000000000blis-0.6.1/config/knc/bli_cntx_init_knc.c000066400000000000000000000061501360743507500203270ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_cntx_init_knc( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. bli_cntx_init_knc_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native gemm micro-kernels and // their storage preferences. bli_cntx_set_l3_nat_ukrs ( 1, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_knc_asm_30x8, TRUE, cntx ); // Initialize level-3 blocksize objects with architecture-specific values. // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MR ], 0, 30, 0, 0 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 0, 8, 0, 0 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], 0, 120, 0, 0, 0, 160, 0, 0 ); bli_blksz_init ( &blkszs[ BLIS_KC ], 0, 240, 0, 0, 0, 300, 0, 0 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 0, 14400, 0, 0 ); // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( BLIS_NAT, 5, BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, cntx ); } blis-0.6.1/config/knc/bli_family_knc.h000066400000000000000000000067241360743507500176250ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H // -- THREADING PARAMTERS ------------------------------------------------------ #define BLIS_TREE_BARRIER #define BLIS_TREE_BARRIER_ARITY 4 // -- MEMORY ALLOCATION -------------------------------------------------------- #define BLIS_SIMD_ALIGN_SIZE 64 #define BLIS_SIMD_SIZE 64 #define BLIS_SIMD_NUM_REGISTERS 32 #if 0 // -- LEVEL-3 MICRO-KERNEL CONSTANTS ------------------------------------------- #define BLIS_SGEMM_UKERNEL bli_sgemm_asm_30x16 #define BLIS_DEFAULT_MR_S 30 #define BLIS_DEFAULT_NR_S 16 #define BLIS_DEFAULT_MC_S 240 #define BLIS_DEFAULT_KC_S 240 #define BLIS_DEFAULT_NC_S 14400 #define BLIS_DGEMM_UKERNEL_PREFERS_CONTIG_ROWS #define BLIS_DGEMM_UKERNEL bli_dgemm_asm_30x8 #define BLIS_DEFAULT_MR_D 30 #define BLIS_DEFAULT_NR_D 8 #define BLIS_DEFAULT_MC_D 120 #define BLIS_DEFAULT_KC_D 240 #define BLIS_DEFAULT_NC_D 14400 #define BLIS_MAXIMUM_MC_S (BLIS_DEFAULT_MC_S + BLIS_DEFAULT_MC_S/4) #define BLIS_MAXIMUM_KC_S (BLIS_DEFAULT_KC_S + BLIS_DEFAULT_KC_S/4) #define BLIS_MAXIMUM_NC_S (BLIS_DEFAULT_NC_S + 0) #define BLIS_MAXIMUM_MC_D (BLIS_DEFAULT_MC_D + BLIS_DEFAULT_MC_D/4) #define BLIS_MAXIMUM_KC_D (BLIS_DEFAULT_KC_D + BLIS_DEFAULT_KC_D/4) #define BLIS_MAXIMUM_NC_D (BLIS_DEFAULT_NC_D + 0) #define BLIS_PACKDIM_MR_S (BLIS_DEFAULT_MR_S + 2) //#define BLIS_PACKDIM_NR_S (BLIS_DEFAULT_NR_S + ...) #define BLIS_PACKDIM_MR_D (BLIS_DEFAULT_MR_D + 2) //#define BLIS_PACKDIM_NR_D (BLIS_DEFAULT_NR_D + ...) #endif //#endif blis-0.6.1/config/knc/make_defs.mk000066400000000000000000000060751360743507500167600ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := knc #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := CMISCFLAGS := -mmic -fasm-blocks CPICFLAGS := CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O3 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) ifeq ($(CC_VENDOR),icc) CKVECFLAGS := else $(error icc is required for this configuration.) endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := $(CKVECFLAGS) endif endif # Override the default value for LDFLAGS. LDFLAGS := -mmic # Never use libm with Intel compilers. ifneq ($(CC_VENDOR),icc) LDFLAGS += $(LIBM) endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-0.6.1/config/knl/000077500000000000000000000000001360743507500145125ustar00rootroot00000000000000blis-0.6.1/config/knl/bli_cntx_init_knl.c000066400000000000000000000116531360743507500203550ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_cntx_init_knl( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. bli_cntx_init_knl_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native gemm micro-kernels and // their storage preferences. bli_cntx_set_l3_nat_ukrs ( 2, BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_knl_asm_24x16, FALSE, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_knl_asm_24x8, FALSE, cntx ); // Update the context with optimized packm kernels. bli_cntx_set_packm_kers ( 2, BLIS_PACKM_8XK_KER, BLIS_DOUBLE, bli_dpackm_knl_asm_8xk, BLIS_PACKM_24XK_KER, BLIS_DOUBLE, bli_dpackm_knl_asm_24xk, cntx ); // Update the context with optimized level-1f kernels. bli_cntx_set_l1f_kers ( 4, // axpyf BLIS_AXPYF_KER, BLIS_FLOAT, bli_saxpyf_zen_int_8, BLIS_AXPYF_KER, BLIS_DOUBLE, bli_daxpyf_zen_int_8, // dotxf BLIS_DOTXF_KER, BLIS_FLOAT, bli_sdotxf_zen_int_8, BLIS_DOTXF_KER, BLIS_DOUBLE, bli_ddotxf_zen_int_8, cntx ); // Update the context with optimized level-1v kernels. bli_cntx_set_l1v_kers ( 10, // amaxv BLIS_AMAXV_KER, BLIS_FLOAT, bli_samaxv_zen_int, BLIS_AMAXV_KER, BLIS_DOUBLE, bli_damaxv_zen_int, // axpyv #if 0 BLIS_AXPYV_KER, BLIS_FLOAT, bli_saxpyv_zen_int, BLIS_AXPYV_KER, BLIS_DOUBLE, bli_daxpyv_zen_int, #else BLIS_AXPYV_KER, BLIS_FLOAT, bli_saxpyv_zen_int10, BLIS_AXPYV_KER, BLIS_DOUBLE, bli_daxpyv_zen_int10, #endif // dotv BLIS_DOTV_KER, BLIS_FLOAT, bli_sdotv_zen_int, BLIS_DOTV_KER, BLIS_DOUBLE, bli_ddotv_zen_int, // dotxv BLIS_DOTXV_KER, BLIS_FLOAT, bli_sdotxv_zen_int, BLIS_DOTXV_KER, BLIS_DOUBLE, bli_ddotxv_zen_int, // scalv #if 0 BLIS_SCALV_KER, BLIS_FLOAT, bli_sscalv_zen_int, BLIS_SCALV_KER, BLIS_DOUBLE, bli_dscalv_zen_int, #else BLIS_SCALV_KER, BLIS_FLOAT, bli_sscalv_zen_int10, BLIS_SCALV_KER, BLIS_DOUBLE, bli_dscalv_zen_int10, #endif cntx ); // Initialize level-3 blocksize objects with architecture-specific values. // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MR ], 24, 24, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 16, 8, -1, -1 ); bli_blksz_init ( &blkszs[ BLIS_MC ], 240, 120, -1, -1, 288, 144, -1, -1 ); bli_blksz_init ( &blkszs[ BLIS_KC ], 336, 336, -1, -1, 408, 408, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 14400, 14400, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_AF ], 8, 8, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_DF ], 8, 8, -1, -1 ); // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( BLIS_NAT, 7, // level-3 BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, // level-1f BLIS_AF, &blkszs[ BLIS_AF ], BLIS_AF, BLIS_DF, &blkszs[ BLIS_DF ], BLIS_DF, cntx ); } blis-0.6.1/config/knl/bli_family_knl.h000066400000000000000000000116171360743507500176440ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H // -- THREADING PARAMETERS ----------------------------------------------------- #define BLIS_THREAD_RATIO_M 4 #define BLIS_THREAD_RATIO_N 1 #define BLIS_THREAD_MAX_IR 1 #define BLIS_THREAD_MAX_JR 1 // -- MEMORY ALLOCATION -------------------------------------------------------- //#define BLIS_TREE_BARRIER //#define BLIS_TREE_BARRIER_ARITY 4 #define BLIS_SIMD_ALIGN_SIZE 64 #define BLIS_SIMD_SIZE 64 #define BLIS_SIMD_NUM_REGISTERS 32 /* #ifdef BLIS_NO_HBWMALLOC #include #define BLIS_MALLOC_POOL malloc #define BLIS_FREE_POOL free #else #include #define BLIS_MALLOC_POOL hbw_malloc #define BLIS_FREE_POOL hbw_free #endif */ //#define BLIS_MALLOC_INTL hbw_malloc //#define BLIS_FREE_INTL hbw_free #if 0 // -- LEVEL-3 MICRO-KERNEL CONSTANTS ------------------------------------------- #define BLIS_SGEMM_UKERNEL_PREFERS_CONTIG_ROWS #define BLIS_SGEMM_UKERNEL bli_sgemm_opt_30x16_knc #define BLIS_DEFAULT_MC_S 240 #define BLIS_DEFAULT_KC_S 240 #define BLIS_DEFAULT_NC_S 14400 #define BLIS_DEFAULT_MR_S 30 #define BLIS_DEFAULT_NR_S 16 #define BLIS_PACKDIM_MR_S 32 #define BLIS_PACKDIM_NR_S 16 #if 0 #define BLIS_DGEMM_UKERNEL_PREFERS_CONTIG_ROWS #define BLIS_DGEMM_UKERNEL bli_dgemm_opt_30x8_knc #define BLIS_DEFAULT_MC_D 120 #define BLIS_DEFAULT_KC_D 240 #define BLIS_DEFAULT_NC_D 14400 #define BLIS_DEFAULT_MR_D 30 #define BLIS_DEFAULT_NR_D 8 #define BLIS_PACKDIM_MR_D 32 #define BLIS_PACKDIM_NR_D 8 #elif 0 #define BLIS_DGEMM_UKERNEL_PREFERS_CONTIG_ROWS #define BLIS_DGEMM_UKERNEL bli_dgemm_opt_30x8 #define BLIS_DEFAULT_MC_D 120 #define BLIS_DEFAULT_KC_D 240 #define BLIS_DEFAULT_NC_D 14400 #define BLIS_DEFAULT_MR_D 30 #define BLIS_DEFAULT_NR_D 8 #define BLIS_PACKDIM_MR_D 32 #define BLIS_PACKDIM_NR_D 8 #define BLIS_DPACKM_8XK_KERNEL bli_dpackm_8xk_opt #define BLIS_DPACKM_30XK_KERNEL bli_dpackm_30xk_opt #else #define BLIS_DGEMM_UKERNEL_PREFERS_CONTIG_ROWS #define BLIS_DGEMM_UKERNEL bli_dgemm_opt_24x8 #define BLIS_DEFAULT_MR_D 24 #define BLIS_DEFAULT_NR_D 8 #define BLIS_PACKDIM_MR_D 24 #define BLIS_PACKDIM_NR_D 8 #define BLIS_DEFAULT_MC_D 120 #define BLIS_DEFAULT_KC_D 336 #define BLIS_DEFAULT_NC_D 14400 #define BLIS_DPACKM_8XK_KERNEL bli_dpackm_8xk_opt #define BLIS_DPACKM_24XK_KERNEL bli_dpackm_24xk_opt #endif #define BLIS_MAXIMUM_MC_S (BLIS_DEFAULT_MC_S + BLIS_DEFAULT_MC_S/4) #define BLIS_MAXIMUM_KC_S (BLIS_DEFAULT_KC_S + BLIS_DEFAULT_KC_S/4) #define BLIS_MAXIMUM_NC_S (BLIS_DEFAULT_NC_S + 0) #define BLIS_MAXIMUM_MC_D (BLIS_DEFAULT_MC_D + BLIS_DEFAULT_MC_D/4) #define BLIS_MAXIMUM_KC_D (BLIS_DEFAULT_KC_D + BLIS_DEFAULT_KC_D/4) #define BLIS_MAXIMUM_NC_D (BLIS_DEFAULT_NC_D + 0) #endif //#endif blis-0.6.1/config/knl/make_defs.mk000066400000000000000000000077721360743507500167760ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := knl #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := CMISCFLAGS := CPICFLAGS := CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O3 endif ifeq ($(DEBUG_TYPE),sde) # Unconditionally disable use of libmemkind in Intel SDE. # Note: The BLIS_DISABLE_MEMKIND macro definition will override # (undefine) the BLIS_ENABLE_MEMKIND macro definition. CPPROCFLAGS += -DBLIS_DISABLE_MEMKIND # This value is normally set by configure and communicated to make via # config.mk, however, the make_defs.mk files (this file) get included # after config.mk, so this definition will override that earlier # definition. MK_ENABLE_MEMKIND := no endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := -mavx512f -mavx512pf -mfpmath=sse -march=knl else ifeq ($(CC_VENDOR),icc) CKVECFLAGS := -xMIC-AVX512 else ifeq ($(CC_VENDOR),clang) CKVECFLAGS := -mavx512f -mavx512pf -mfpmath=sse -march=knl else $(error gcc, icc, or clang is required for this configuration.) endif endif endif # The assembler on OS X won't recognize AVX512 without help. ifneq ($(CC_VENDOR),icc) ifeq ($(OS_NAME),Darwin) CKVECFLAGS += -Wa,-march=knl endif endif # Flags specific to reference kernels. # Note: We use AVX2 for reference kernels instead of AVX-512. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := -march=knl -mno-avx512f -mno-avx512pf -mno-avx512er -mno-avx512cd -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),icc) CRVECFLAGS := -xMIC-AVX512 else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := -march=knl -mno-avx512f -mno-avx512pf -mno-avx512er -mno-avx512cd -funsafe-math-optimizations -ffp-contract=fast else $(error gcc, icc, or clang is required for this configuration.) endif endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-0.6.1/config/old/000077500000000000000000000000001360743507500145045ustar00rootroot00000000000000blis-0.6.1/config/old/armv7a/000077500000000000000000000000001360743507500157015ustar00rootroot00000000000000blis-0.6.1/config/old/armv7a/bli_cntx_init_armv7a.c000066400000000000000000000062431360743507500221540ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_cntx_init_armv7a( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. bli_cntx_init_armv7a_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native gemm micro-kernels and // their storage preferences. bli_cntx_set_l3_nat_ukrs ( BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_armv7a_asm_4x4, FALSE, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_armv7a_asm_4x4, FALSE, BLIS_GEMM_UKR, BLIS_SCOMPLEX, bli_cgemm_armv7a_asm_2x2, FALSE, BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemm_armv7a_asm_2x2, FALSE, cntx ); // Initialize level-3 blocksize objects with architecture-specific values. // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MR ], 4, 4, 2, 2 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 4, 4, 2, 2 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], 432, 192, 64, 64 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 352, 256, 128, 128 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4096, 4096, 4096, 4096 ); // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( BLIS_NAT, 5, BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, cntx ); } blis-0.6.1/config/old/armv7a/bli_family_armv7a.h000066400000000000000000000057701360743507500214470ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_FAMILY_H #define BLIS_FAMILY_H // -- ARCHITECTURE-SPECIFIC PROTOTYPES ----------------------------------------- // Define the current architecture's name. #define archname armv7a // Include the context initialization function API template. #include "bli_cntx_init_arch.h" #if 0 #define BLIS_SGEMM_UKERNEL bli_sgemm_opt_4x4 #define BLIS_DEFAULT_MR_S 4 #define BLIS_DEFAULT_NR_S 4 #define BLIS_DEFAULT_MC_S 432 #define BLIS_DEFAULT_KC_S 352 #define BLIS_DEFAULT_NC_S 4096 #define BLIS_DGEMM_UKERNEL bli_dgemm_opt_4x4 #define BLIS_DEFAULT_MR_D 4 #define BLIS_DEFAULT_NR_D 4 #define BLIS_DEFAULT_MC_D 192 #define BLIS_DEFAULT_KC_D 256 #define BLIS_DEFAULT_NC_D 4096 #define BLIS_CGEMM_UKERNEL bli_cgemm_opt_4x4 #define BLIS_DEFAULT_MR_C 2 #define BLIS_DEFAULT_NR_C 2 #define BLIS_DEFAULT_MC_C 64 #define BLIS_DEFAULT_KC_C 128 #define BLIS_DEFAULT_NC_C 4096 #define BLIS_ZGEMM_UKERNEL bli_zgemm_opt_4x4 #define BLIS_DEFAULT_MR_Z 2 #define BLIS_DEFAULT_NR_Z 2 #define BLIS_DEFAULT_MC_Z 64 #define BLIS_DEFAULT_KC_Z 128 #define BLIS_DEFAULT_NC_Z 4096 #endif #endif blis-0.6.1/config/old/armv7a/make_defs.mk000066400000000000000000000052131360743507500201510ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := armv7a #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # ifeq ($(CC),) CC := gcc CC_VENDOR := gcc endif # Enable IEEE Standard 1003.1-2004 (POSIX.1d). # NOTE: This is needed to enable posix_memalign(). CPPROCFLAGS := -D_POSIX_C_SOURCE=200112L CMISCFLAGS := -std=c99 -mfloat-abi=hard CPICFLAGS := -fPIC CWARNFLAGS := -Wall -Wno-unused-function -Wfatal-errors ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O3 endif CKOPTFLAGS := $(COPTFLAGS) ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := -mfpu=vfpv3 -marm -march=armv7-a else $(error gcc is required for this configuration.) endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-0.6.1/config/old/emscripten/000077500000000000000000000000001360743507500166555ustar00rootroot00000000000000blis-0.6.1/config/old/emscripten/bli_kernel.h000066400000000000000000000146161360743507500211440ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_KERNEL_H #define BLIS_KERNEL_H /* Use the same parameters as non-SIMD PNaCl */ // -- LEVEL-3 MICRO-KERNEL CONSTANTS ------------------------------------------- #define BLIS_SIMD_ALIGN_SIZE 16 // -- Cache blocksizes -- // // Constraints: // // (1) MC must be a multiple of: // (a) MR (for zero-padding purposes) // (b) NR (for zero-padding purposes when MR and NR are "swapped") // (2) NC must be a multiple of // (a) NR (for zero-padding purposes) // (b) MR (for zero-padding purposes when MR and NR are "swapped") // #define BLIS_DEFAULT_MC_S 252 #define BLIS_DEFAULT_KC_S 264 #define BLIS_DEFAULT_NC_S 8196 #define BLIS_DEFAULT_MC_D 1080 #define BLIS_DEFAULT_KC_D 120 #define BLIS_DEFAULT_NC_D 8400 #define BLIS_DEFAULT_MC_C 120 #define BLIS_DEFAULT_KC_C 264 #define BLIS_DEFAULT_NC_C 4092 #define BLIS_DEFAULT_MC_Z 60 #define BLIS_DEFAULT_KC_Z 264 #define BLIS_DEFAULT_NC_Z 2040 // -- Register blocksizes -- #define BLIS_DEFAULT_MR_S 4 #define BLIS_DEFAULT_NR_S 3 #define BLIS_DEFAULT_MR_D 4 #define BLIS_DEFAULT_NR_D 3 #define BLIS_DEFAULT_MR_C 2 #define BLIS_DEFAULT_NR_C 3 #define BLIS_DEFAULT_MR_Z 2 #define BLIS_DEFAULT_NR_Z 3 // NOTE: If the micro-kernel, which is typically unrolled to a factor // of f, handles leftover edge cases (ie: when k % f > 0) then these // register blocksizes in the k dimension can be defined to 1. //#define BLIS_DEFAULT_KR_S 1 //#define BLIS_DEFAULT_KR_D 1 //#define BLIS_DEFAULT_KR_C 1 //#define BLIS_DEFAULT_KR_Z 1 // -- Maximum cache blocksizes (for optimizing edge cases) -- // NOTE: These cache blocksize "extensions" have the same constraints as // the corresponding default blocksizes above. When these values are // larger than the default blocksizes, blocksizes used at edge cases are // enlarged if such an extension would encompass the remaining portion of // the matrix dimension. //#define BLIS_MAXIMUM_MC_S (BLIS_DEFAULT_MC_S + BLIS_DEFAULT_MC_S/4) //#define BLIS_MAXIMUM_KC_S (BLIS_DEFAULT_KC_S + BLIS_DEFAULT_KC_S/4) //#define BLIS_MAXIMUM_NC_S (BLIS_DEFAULT_NC_S + BLIS_DEFAULT_NC_S/4) //#define BLIS_MAXIMUM_MC_D (BLIS_DEFAULT_MC_D + BLIS_DEFAULT_MC_D/4) //#define BLIS_MAXIMUM_KC_D (BLIS_DEFAULT_KC_D + BLIS_DEFAULT_KC_D/4) //#define BLIS_MAXIMUM_NC_D (BLIS_DEFAULT_NC_D + BLIS_DEFAULT_NC_D/4) //#define BLIS_MAXIMUM_MC_C (BLIS_DEFAULT_MC_C + BLIS_DEFAULT_MC_C/4) //#define BLIS_MAXIMUM_KC_C (BLIS_DEFAULT_KC_C + BLIS_DEFAULT_KC_C/4) //#define BLIS_MAXIMUM_NC_C (BLIS_DEFAULT_NC_C + BLIS_DEFAULT_NC_C/4) //#define BLIS_MAXIMUM_MC_Z (BLIS_DEFAULT_MC_Z + BLIS_DEFAULT_MC_Z/4) //#define BLIS_MAXIMUM_KC_Z (BLIS_DEFAULT_KC_Z + BLIS_DEFAULT_KC_Z/4) //#define BLIS_MAXIMUM_NC_Z (BLIS_DEFAULT_NC_Z + BLIS_DEFAULT_NC_Z/4) // -- Packing register blocksize (for packed micro-panels) -- // NOTE: These register blocksize "extensions" determine whether the // leading dimensions used within the packed micro-panels are equal to // or greater than their corresponding register blocksizes above. //#define BLIS_PACKDIM_MR_S (BLIS_DEFAULT_MR_S + ...) //#define BLIS_PACKDIM_NR_S (BLIS_DEFAULT_NR_S + ...) //#define BLIS_PACKDIM_MR_D (BLIS_DEFAULT_MR_D + ...) //#define BLIS_PACKDIM_NR_D (BLIS_DEFAULT_NR_D + ...) //#define BLIS_PACKDIM_MR_C (BLIS_DEFAULT_MR_C + ...) //#define BLIS_PACKDIM_NR_C (BLIS_DEFAULT_NR_C + ...) //#define BLIS_PACKDIM_MR_Z (BLIS_DEFAULT_MR_Z + ...) //#define BLIS_PACKDIM_NR_Z (BLIS_DEFAULT_NR_Z + ...) // -- LEVEL-2 KERNEL CONSTANTS ------------------------------------------------- // -- LEVEL-1F KERNEL CONSTANTS ------------------------------------------------ // -- LEVEL-3 KERNEL DEFINITIONS ----------------------------------------------- // -- gemm -- // -- trsm-related -- // -- LEVEL-1M KERNEL DEFINITIONS ---------------------------------------------- // -- packm -- // -- unpackm -- // -- LEVEL-1F KERNEL DEFINITIONS ---------------------------------------------- // -- axpy2v -- // -- dotaxpyv -- // -- axpyf -- // -- dotxf -- // -- dotxaxpyf -- // -- LEVEL-1V KERNEL DEFINITIONS ---------------------------------------------- // -- addv -- // -- axpyv -- // -- copyv -- // -- dotv -- // -- dotxv -- // -- invertv -- // -- scal2v -- // -- scalv -- // -- setv -- // -- subv -- // -- swapv -- #endif blis-0.6.1/config/old/emscripten/make_defs.mk000066400000000000000000000053051360743507500211270ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Only include this block of code once. ifndef MAKE_DEFS_MK_INCLUDED MAKE_DEFS_MK_INCLUDED := yes # # --- Development tools definitions -------------------------------------------- # # --- Determine the C compiler and related flags --- CC := emcc CC_VENDOR := emcc # Enable IEEE Standard 1003.1-2004 (POSIX.1d). # NOTE: This is needed to enable posix_memalign(). CPPROCFLAGS := -D_POSIX_C_SOURCE=200112L CMISCFLAGS := -std=c99 CPICFLAGS := -fPIC CDBGFLAGS := #-g4 CWARNFLAGS := -Wall -Wno-unused-function -Wfatal-errors COPTFLAGS := -O2 CKOPTFLAGS := -O3 CKVECFLAGS := # --- Determine the archiver and related flags --- AR := emar RANLIB := emranlib ARFLAGS := cr # --- Determine the linker and related flags --- LINKER := $(CC) SOFLAGS := -shared LDFLAGS := -O3 -s TOTAL_MEMORY=67108864 -s FORCE_ALIGNED_MEMORY=1 -s PRECISE_F32=2 -s GC_SUPPORT=0 # --- Determine JS interpreter --- JSINT := node # end of ifndef MAKE_DEFS_MK_INCLUDED conditional block endif blis-0.6.1/config/old/haswellbb/000077500000000000000000000000001360743507500164475ustar00rootroot00000000000000blis-0.6.1/config/old/haswellbb/bli_cntx_init_haswell.c000066400000000000000000000256641360743507500231740ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // Instantiate prototypes for packm kernels. PACKM_KER_PROT( float, s, packm_6xk_bb4_haswell_ref ) PACKM_KER_PROT( double, d, packm_6xk_bb2_haswell_ref ) // Instantiate prototypes for level-3 kernels. GEMM_UKR_PROT( float, s, gemmbb_haswell_ref ) GEMMTRSM_UKR_PROT( float, s, gemmtrsmbb_l_haswell_ref ) GEMMTRSM_UKR_PROT( float, s, gemmtrsmbb_u_haswell_ref ) TRSM_UKR_PROT( float, s, trsmbb_l_haswell_ref ) TRSM_UKR_PROT( float, s, trsmbb_u_haswell_ref ) GEMM_UKR_PROT( double, d, gemmbb_haswell_ref ) GEMMTRSM_UKR_PROT( double, d, gemmtrsmbb_l_haswell_ref ) GEMMTRSM_UKR_PROT( double, d, gemmtrsmbb_u_haswell_ref ) TRSM_UKR_PROT( double, d, trsmbb_l_haswell_ref ) TRSM_UKR_PROT( double, d, trsmbb_u_haswell_ref ) GEMM_UKR_PROT( scomplex, c, gemmbb_haswell_ref ) GEMMTRSM_UKR_PROT( scomplex, c, gemmtrsmbb_l_haswell_ref ) GEMMTRSM_UKR_PROT( scomplex, c, gemmtrsmbb_u_haswell_ref ) TRSM_UKR_PROT( scomplex, c, trsmbb_l_haswell_ref ) TRSM_UKR_PROT( scomplex, c, trsmbb_u_haswell_ref ) GEMM_UKR_PROT( dcomplex, z, gemmbb_haswell_ref ) GEMMTRSM_UKR_PROT( dcomplex, z, gemmtrsmbb_l_haswell_ref ) GEMMTRSM_UKR_PROT( dcomplex, z, gemmtrsmbb_u_haswell_ref ) TRSM_UKR_PROT( dcomplex, z, trsmbb_l_haswell_ref ) TRSM_UKR_PROT( dcomplex, z, trsmbb_u_haswell_ref ) void bli_cntx_init_haswell( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; blksz_t thresh[ BLIS_NUM_THRESH ]; // Set default kernel blocksizes and functions. bli_cntx_init_haswell_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native gemm micro-kernels and // their storage preferences. bli_cntx_set_l3_nat_ukrs ( #if 0 8, // gemm BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_haswell_asm_6x16, TRUE, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_haswell_asm_6x8, TRUE, BLIS_GEMM_UKR, BLIS_SCOMPLEX, bli_cgemm_haswell_asm_3x8, TRUE, BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemm_haswell_asm_3x4, TRUE, // gemmtrsm_l BLIS_GEMMTRSM_L_UKR, BLIS_FLOAT, bli_sgemmtrsm_l_haswell_asm_6x16, TRUE, BLIS_GEMMTRSM_L_UKR, BLIS_DOUBLE, bli_dgemmtrsm_l_haswell_asm_6x8, TRUE, // gemmtrsm_u BLIS_GEMMTRSM_U_UKR, BLIS_FLOAT, bli_sgemmtrsm_u_haswell_asm_6x16, TRUE, BLIS_GEMMTRSM_U_UKR, BLIS_DOUBLE, bli_dgemmtrsm_u_haswell_asm_6x8, TRUE, #else 12, BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemmbb_haswell_ref, FALSE, BLIS_TRSM_L_UKR, BLIS_FLOAT, bli_strsmbb_l_haswell_ref, FALSE, BLIS_TRSM_U_UKR, BLIS_FLOAT, bli_strsmbb_u_haswell_ref, FALSE, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemmbb_haswell_ref, FALSE, BLIS_TRSM_L_UKR, BLIS_DOUBLE, bli_dtrsmbb_l_haswell_ref, FALSE, BLIS_TRSM_U_UKR, BLIS_DOUBLE, bli_dtrsmbb_u_haswell_ref, FALSE, BLIS_GEMM_UKR, BLIS_SCOMPLEX, bli_cgemmbb_haswell_ref, FALSE, BLIS_TRSM_L_UKR, BLIS_SCOMPLEX, bli_ctrsmbb_l_haswell_ref, FALSE, BLIS_TRSM_U_UKR, BLIS_SCOMPLEX, bli_ctrsmbb_u_haswell_ref, FALSE, BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemmbb_haswell_ref, FALSE, BLIS_TRSM_L_UKR, BLIS_DCOMPLEX, bli_ztrsmbb_l_haswell_ref, FALSE, BLIS_TRSM_U_UKR, BLIS_DCOMPLEX, bli_ztrsmbb_u_haswell_ref, FALSE, #endif cntx ); // Update the context with customized virtual [gemm]trsm micro-kernels. bli_cntx_set_l3_vir_ukrs ( 8, BLIS_GEMMTRSM_L_UKR, BLIS_FLOAT, bli_sgemmtrsmbb_l_haswell_ref, BLIS_GEMMTRSM_U_UKR, BLIS_FLOAT, bli_sgemmtrsmbb_u_haswell_ref, BLIS_GEMMTRSM_L_UKR, BLIS_DOUBLE, bli_dgemmtrsmbb_l_haswell_ref, BLIS_GEMMTRSM_U_UKR, BLIS_DOUBLE, bli_dgemmtrsmbb_u_haswell_ref, BLIS_GEMMTRSM_L_UKR, BLIS_SCOMPLEX, bli_cgemmtrsmbb_l_haswell_ref, BLIS_GEMMTRSM_U_UKR, BLIS_SCOMPLEX, bli_cgemmtrsmbb_u_haswell_ref, BLIS_GEMMTRSM_L_UKR, BLIS_DCOMPLEX, bli_zgemmtrsmbb_l_haswell_ref, BLIS_GEMMTRSM_U_UKR, BLIS_DCOMPLEX, bli_zgemmtrsmbb_u_haswell_ref, cntx ); // Update the context with optimized packm kernels. bli_cntx_set_packm_kers ( 2, BLIS_PACKM_6XK_KER, BLIS_FLOAT, bli_spackm_6xk_bb4_haswell_ref, BLIS_PACKM_6XK_KER, BLIS_DOUBLE, bli_dpackm_6xk_bb2_haswell_ref, cntx ); // Update the context with optimized level-1f kernels. bli_cntx_set_l1f_kers ( 4, // axpyf BLIS_AXPYF_KER, BLIS_FLOAT, bli_saxpyf_zen_int_8, BLIS_AXPYF_KER, BLIS_DOUBLE, bli_daxpyf_zen_int_8, // dotxf BLIS_DOTXF_KER, BLIS_FLOAT, bli_sdotxf_zen_int_8, BLIS_DOTXF_KER, BLIS_DOUBLE, bli_ddotxf_zen_int_8, cntx ); // Update the context with optimized level-1v kernels. bli_cntx_set_l1v_kers ( 10, // amaxv BLIS_AMAXV_KER, BLIS_FLOAT, bli_samaxv_zen_int, BLIS_AMAXV_KER, BLIS_DOUBLE, bli_damaxv_zen_int, // axpyv #if 0 BLIS_AXPYV_KER, BLIS_FLOAT, bli_saxpyv_zen_int, BLIS_AXPYV_KER, BLIS_DOUBLE, bli_daxpyv_zen_int, #else BLIS_AXPYV_KER, BLIS_FLOAT, bli_saxpyv_zen_int10, BLIS_AXPYV_KER, BLIS_DOUBLE, bli_daxpyv_zen_int10, #endif // dotv BLIS_DOTV_KER, BLIS_FLOAT, bli_sdotv_zen_int, BLIS_DOTV_KER, BLIS_DOUBLE, bli_ddotv_zen_int, // dotxv BLIS_DOTXV_KER, BLIS_FLOAT, bli_sdotxv_zen_int, BLIS_DOTXV_KER, BLIS_DOUBLE, bli_ddotxv_zen_int, // scalv #if 0 BLIS_SCALV_KER, BLIS_FLOAT, bli_sscalv_zen_int, BLIS_SCALV_KER, BLIS_DOUBLE, bli_dscalv_zen_int, #else BLIS_SCALV_KER, BLIS_FLOAT, bli_sscalv_zen_int10, BLIS_SCALV_KER, BLIS_DOUBLE, bli_dscalv_zen_int10, #endif cntx ); // Initialize level-3 blocksize objects with architecture-specific values. // s d c z #if 0 bli_blksz_init_easy( &blkszs[ BLIS_MR ], 6, 6, 3, 3 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 16, 8, 8, 4 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], 168, 72, 75, 192 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 256, 256, 256 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 4080, 4080, 4080 ); #else bli_blksz_init_easy( &blkszs[ BLIS_MR ], 24, 12, 12, 6 ); bli_blksz_init ( &blkszs[ BLIS_NR ], 6, 6, 6, 6, 24, 12, 6, 6 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 72, 72, 36 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 256, 256, 256 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 8160, 4080, 4080, 2076 ); #endif bli_blksz_init_easy( &blkszs[ BLIS_AF ], 8, 8, 8, 8 ); bli_blksz_init_easy( &blkszs[ BLIS_DF ], 8, 8, 8, 8 ); // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( BLIS_NAT, 7, // level-3 BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, // level-1f BLIS_AF, &blkszs[ BLIS_AF ], BLIS_AF, BLIS_DF, &blkszs[ BLIS_DF ], BLIS_DF, cntx ); // ------------------------------------------------------------------------- // Initialize sup thresholds with architecture-appropriate values. // s d c z bli_blksz_init_easy( &thresh[ BLIS_MT ], -1, 1, -1, -1 ); bli_blksz_init_easy( &thresh[ BLIS_NT ], -1, 1, -1, -1 ); bli_blksz_init_easy( &thresh[ BLIS_KT ], -1, 1, -1, -1 ); // Initialize the context with the sup thresholds. bli_cntx_set_l3_sup_thresh ( 3, BLIS_MT, &thresh[ BLIS_MT ], BLIS_NT, &thresh[ BLIS_NT ], BLIS_KT, &thresh[ BLIS_KT ], cntx ); // Update the context with optimized small/unpacked gemm kernels. bli_cntx_set_l3_sup_kers ( 8, //BLIS_RCR, BLIS_DOUBLE, bli_dgemmsup_r_haswell_ref, BLIS_RRR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8m, TRUE, BLIS_RRC, BLIS_DOUBLE, bli_dgemmsup_rd_haswell_asm_6x8m, TRUE, BLIS_RCR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8m, TRUE, BLIS_RCC, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8n, TRUE, BLIS_CRR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8m, TRUE, BLIS_CRC, BLIS_DOUBLE, bli_dgemmsup_rd_haswell_asm_6x8n, TRUE, BLIS_CCR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8n, TRUE, BLIS_CCC, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8n, TRUE, cntx ); // Initialize level-3 sup blocksize objects with architecture-specific // values. // s d c z bli_blksz_init ( &blkszs[ BLIS_MR ], -1, 6, -1, -1, -1, 9, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], -1, 8, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], -1, 72, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], -1, 256, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], -1, 4080, -1, -1 ); // Update the context with the current architecture's register and cache // blocksizes for small/unpacked level-3 problems. bli_cntx_set_l3_sup_blkszs ( 5, BLIS_NC, &blkszs[ BLIS_NC ], BLIS_KC, &blkszs[ BLIS_KC ], BLIS_MC, &blkszs[ BLIS_MC ], BLIS_NR, &blkszs[ BLIS_NR ], BLIS_MR, &blkszs[ BLIS_MR ], cntx ); } blis-0.6.1/config/old/haswellbb/bli_family_haswell.h000066400000000000000000000122601360743507500224470ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H #define BLIS_POOL_ADDR_ALIGN_SIZE_A 4096 #define BLIS_POOL_ADDR_ALIGN_SIZE_B 4096 #define BLIS_POOL_ADDR_OFFSET_SIZE_A 32 #define BLIS_POOL_ADDR_OFFSET_SIZE_B 64 // Disable right-side hemm, symm, and trmm[3] to accommodate the broadcasting of // elements within the packed matrix B. #define BLIS_DISABLE_HEMM_RIGHT #define BLIS_DISABLE_SYMM_RIGHT #define BLIS_DISABLE_TRMM_RIGHT #define BLIS_DISABLE_TRMM3_RIGHT #if 0 // -- LEVEL-3 MICRO-KERNEL CONSTANTS AND DEFINITIONS --------------------------- // -- sgemm micro-kernel -- #if 0 #define BLIS_SGEMM_UKERNEL bli_sgemm_asm_4x24 #define BLIS_DEFAULT_MC_S 256 #define BLIS_DEFAULT_KC_S 256 #define BLIS_DEFAULT_NC_S 4080 #define BLIS_DEFAULT_MR_S 4 #define BLIS_DEFAULT_NR_S 24 #define BLIS_SGEMM_UKERNEL_PREFERS_CONTIG_ROWS #endif #if 1 #define BLIS_SGEMM_UKERNEL bli_sgemm_asm_6x16 #define BLIS_DEFAULT_MC_S 144 #define BLIS_DEFAULT_KC_S 256 #define BLIS_DEFAULT_NC_S 4080 #define BLIS_DEFAULT_MR_S 6 #define BLIS_DEFAULT_NR_S 16 #define BLIS_SGEMM_UKERNEL_PREFERS_CONTIG_ROWS #endif #if 0 #define BLIS_SGEMM_UKERNEL bli_sgemm_asm_16x6 #define BLIS_DEFAULT_MC_S 144 #define BLIS_DEFAULT_KC_S 256 #define BLIS_DEFAULT_NC_S 4080 #define BLIS_DEFAULT_MR_S 16 #define BLIS_DEFAULT_NR_S 6 #endif // -- dgemm micro-kernel -- #if 0 #define BLIS_DGEMM_UKERNEL bli_dgemm_asm_4x12 #define BLIS_DEFAULT_MC_D 152 #define BLIS_DEFAULT_KC_D 160 #define BLIS_DEFAULT_NC_D 4080 #define BLIS_DEFAULT_MR_D 4 #define BLIS_DEFAULT_NR_D 12 #define BLIS_DGEMM_UKERNEL_PREFERS_CONTIG_ROWS #endif #if 1 #define BLIS_DGEMM_UKERNEL bli_dgemm_asm_6x8 #define BLIS_DEFAULT_MC_D 72 #define BLIS_DEFAULT_KC_D 256 #define BLIS_DEFAULT_NC_D 4080 #define BLIS_DEFAULT_MR_D 6 #define BLIS_DEFAULT_NR_D 8 #define BLIS_DGEMM_UKERNEL_PREFERS_CONTIG_ROWS #endif #if 0 #define BLIS_DGEMM_UKERNEL bli_dgemm_asm_8x6 #define BLIS_DEFAULT_MC_D 72 #define BLIS_DEFAULT_KC_D 256 #define BLIS_DEFAULT_NC_D 4080 #define BLIS_DEFAULT_MR_D 8 #define BLIS_DEFAULT_NR_D 6 #endif // -- cgemm micro-kernel -- #if 1 #define BLIS_CGEMM_UKERNEL bli_cgemm_asm_3x8 #define BLIS_DEFAULT_MC_C 144 #define BLIS_DEFAULT_KC_C 256 #define BLIS_DEFAULT_NC_C 4080 #define BLIS_DEFAULT_MR_C 3 #define BLIS_DEFAULT_NR_C 8 #define BLIS_CGEMM_UKERNEL_PREFERS_CONTIG_ROWS #endif #if 0 #define BLIS_CGEMM_UKERNEL bli_cgemm_asm_8x3 #define BLIS_DEFAULT_MC_C 144 #define BLIS_DEFAULT_KC_C 256 #define BLIS_DEFAULT_NC_C 4080 #define BLIS_DEFAULT_MR_C 8 #define BLIS_DEFAULT_NR_C 3 #endif // -- zgemm micro-kernel -- #if 1 #define BLIS_ZGEMM_UKERNEL bli_zgemm_asm_3x4 #define BLIS_DEFAULT_MC_Z 72 #define BLIS_DEFAULT_KC_Z 256 #define BLIS_DEFAULT_NC_Z 4080 #define BLIS_DEFAULT_MR_Z 3 #define BLIS_DEFAULT_NR_Z 4 #define BLIS_ZGEMM_UKERNEL_PREFERS_CONTIG_ROWS #endif #if 0 #define BLIS_ZGEMM_UKERNEL bli_zgemm_asm_4x3 #define BLIS_DEFAULT_MC_Z 72 #define BLIS_DEFAULT_KC_Z 256 #define BLIS_DEFAULT_NC_Z 4080 #define BLIS_DEFAULT_MR_Z 4 #define BLIS_DEFAULT_NR_Z 3 #endif #endif //#endif blis-0.6.1/config/old/haswellbb/make_defs.mk000066400000000000000000000064101360743507500207170ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := haswell #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := CMISCFLAGS := CPICFLAGS := CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O3 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := -mavx2 -mfma -mfpmath=sse -march=haswell ifeq ($(GCC_OT_4_9_0),yes) # If gcc is older than 4.9.0, we must use a different label for -march. CKVECFLAGS := -mavx2 -mfma -mfpmath=sse -march=core-avx2 endif else ifeq ($(CC_VENDOR),icc) CKVECFLAGS := -xCORE-AVX2 else ifeq ($(CC_VENDOR),clang) CKVECFLAGS := -mavx2 -mfma -mfpmath=sse -march=haswell else $(error gcc, icc, or clang is required for this configuration.) endif endif endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := $(CKVECFLAGS) endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-0.6.1/config/old/loongson3a/000077500000000000000000000000001360743507500165665ustar00rootroot00000000000000blis-0.6.1/config/old/loongson3a/bli_kernel.h000066400000000000000000000146231360743507500210530ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_KERNEL_H #define BLIS_KERNEL_H // -- LEVEL-3 MICRO-KERNEL CONSTANTS ------------------------------------------- #define BLIS_SIMD_ALIGN_SIZE 16 // -- Cache blocksizes -- // // Constraints: // // (1) MC must be a multiple of: // (a) MR (for zero-padding purposes) // (b) NR (for zero-padding purposes when MR and NR are "swapped") // (2) NC must be a multiple of // (a) NR (for zero-padding purposes) // (b) MR (for zero-padding purposes when MR and NR are "swapped") // #define BLIS_DEFAULT_MC_S 256 #define BLIS_DEFAULT_KC_S 256 #define BLIS_DEFAULT_NC_S 8192 #define BLIS_DEFAULT_MC_D 32 #define BLIS_DEFAULT_KC_D 128 #define BLIS_DEFAULT_NC_D 1024 #define BLIS_DEFAULT_MC_C 128 #define BLIS_DEFAULT_KC_C 256 #define BLIS_DEFAULT_NC_C 4096 #define BLIS_DEFAULT_MC_Z 64 #define BLIS_DEFAULT_KC_Z 256 #define BLIS_DEFAULT_NC_Z 2048 // -- Register blocksizes -- #define BLIS_DEFAULT_MR_S 8 #define BLIS_DEFAULT_NR_S 4 #define BLIS_DEFAULT_MR_D 4 #define BLIS_DEFAULT_NR_D 4 #define BLIS_DEFAULT_MR_C 8 #define BLIS_DEFAULT_NR_C 4 #define BLIS_DEFAULT_MR_Z 8 #define BLIS_DEFAULT_NR_Z 4 // NOTE: If the micro-kernel, which is typically unrolled to a factor // of f, handles leftover edge cases (ie: when k % f > 0) then these // register blocksizes in the k dimension can be defined to 1. //#define BLIS_DEFAULT_KR_S 1 //#define BLIS_DEFAULT_KR_D 1 //#define BLIS_DEFAULT_KR_C 1 //#define BLIS_DEFAULT_KR_Z 1 // -- Maximum cache blocksizes (for optimizing edge cases) -- // NOTE: These cache blocksize "extensions" have the same constraints as // the corresponding default blocksizes above. When these values are // larger than the default blocksizes, blocksizes used at edge cases are // enlarged if such an extension would encompass the remaining portion of // the matrix dimension. //#define BLIS_MAXIMUM_MC_S (BLIS_DEFAULT_MC_S + BLIS_DEFAULT_MC_S/4) //#define BLIS_MAXIMUM_KC_S (BLIS_DEFAULT_KC_S + BLIS_DEFAULT_KC_S/4) //#define BLIS_MAXIMUM_NC_S (BLIS_DEFAULT_NC_S + BLIS_DEFAULT_NC_S/4) //#define BLIS_MAXIMUM_MC_D (BLIS_DEFAULT_MC_D + BLIS_DEFAULT_MC_D/4) //#define BLIS_MAXIMUM_KC_D (BLIS_DEFAULT_KC_D + BLIS_DEFAULT_KC_D/4) //#define BLIS_MAXIMUM_NC_D (BLIS_DEFAULT_NC_D + BLIS_DEFAULT_NC_D/4) //#define BLIS_MAXIMUM_MC_C (BLIS_DEFAULT_MC_C + BLIS_DEFAULT_MC_C/4) //#define BLIS_MAXIMUM_KC_C (BLIS_DEFAULT_KC_C + BLIS_DEFAULT_KC_C/4) //#define BLIS_MAXIMUM_NC_C (BLIS_DEFAULT_NC_C + BLIS_DEFAULT_NC_C/4) //#define BLIS_MAXIMUM_MC_Z (BLIS_DEFAULT_MC_Z + BLIS_DEFAULT_MC_Z/4) //#define BLIS_MAXIMUM_KC_Z (BLIS_DEFAULT_KC_Z + BLIS_DEFAULT_KC_Z/4) //#define BLIS_MAXIMUM_NC_Z (BLIS_DEFAULT_NC_Z + BLIS_DEFAULT_NC_Z/4) // -- Packing register blocksize (for packed micro-panels) -- // NOTE: These register blocksize "extensions" determine whether the // leading dimensions used within the packed micro-panels are equal to // or greater than their corresponding register blocksizes above. //#define BLIS_PACKDIM_MR_S (BLIS_DEFAULT_MR_S + ...) //#define BLIS_PACKDIM_NR_S (BLIS_DEFAULT_NR_S + ...) //#define BLIS_PACKDIM_MR_D (BLIS_DEFAULT_MR_D + ...) //#define BLIS_PACKDIM_NR_D (BLIS_DEFAULT_NR_D + ...) //#define BLIS_PACKDIM_MR_C (BLIS_DEFAULT_MR_C + ...) //#define BLIS_PACKDIM_NR_C (BLIS_DEFAULT_NR_C + ...) //#define BLIS_PACKDIM_MR_Z (BLIS_DEFAULT_MR_Z + ...) //#define BLIS_PACKDIM_NR_Z (BLIS_DEFAULT_NR_Z + ...) // -- LEVEL-2 KERNEL CONSTANTS ------------------------------------------------- // -- LEVEL-1F KERNEL CONSTANTS ------------------------------------------------ // -- LEVEL-3 KERNEL DEFINITIONS ----------------------------------------------- // -- gemm -- #define BLIS_DGEMM_UKERNEL bli_dgemm_opt_4x4 // -- trsm-related -- // -- LEVEL-1M KERNEL DEFINITIONS ---------------------------------------------- // -- packm -- // -- unpackm -- // -- LEVEL-1F KERNEL DEFINITIONS ---------------------------------------------- // -- axpy2v -- // -- dotaxpyv -- // -- axpyf -- // -- dotxf -- // -- dotxaxpyf -- // -- LEVEL-1V KERNEL DEFINITIONS ---------------------------------------------- // -- addv -- // -- axpyv -- // -- copyv -- // -- dotv -- // -- dotxv -- // -- invertv -- // -- scal2v -- // -- scalv -- // -- setv -- // -- subv -- // -- swapv -- #endif blis-0.6.1/config/old/loongson3a/make_defs.mk000066400000000000000000000052121360743507500210350ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := loongson3a #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # ifeq ($(CC),) CC := gcc CC_VENDOR := gcc endif # Enable IEEE Standard 1003.1-2004 (POSIX.1d). # NOTE: This is needed to enable posix_memalign(). CPPROCFLAGS := -D_POSIX_C_SOURCE=200112L -mabi=64 CMISCFLAGS := -std=c99 CPICFLAGS := -fPIC CWARNFLAGS := -Wall -Wno-unused-function -Wfatal-errors ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O3 -mtune=loongson3a endif CKOPTFLAGS := $(COPTFLAGS) ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := -march=loongson3a else $(error gcc is required for this configuration.) endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-0.6.1/config/old/newarch/000077500000000000000000000000001360743507500161335ustar00rootroot00000000000000blis-0.6.1/config/old/newarch/bli_kernel.h000066400000000000000000000033001360743507500204060ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_KERNEL_H #define BLIS_KERNEL_H #endif blis-0.6.1/config/old/newarch/make_defs.mk000066400000000000000000000053421360743507500204060ustar00rootroot00000000000000#!/bin/bash # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := newarch #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # ifeq ($(CC),) CC := gcc CC_VENDOR := gcc endif # Enable IEEE Standard 1003.1-2004 (POSIX.1d). # NOTE: This is needed to enable posix_memalign(). CPPROCFLAGS := -D_POSIX_C_SOURCE=200112L CMISCFLAGS := -std=c99 CPICFLAGS := -fPIC CWARNFLAGS := -Wall -Wno-unused-function -Wfatal-errors ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O2 endif CKOPTFLAGS := $(COPTFLAGS) ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := else ifeq ($(CC_VENDOR),icc) CKVECFLAGS := else ifeq ($(CC_VENDOR),clang) CKVECFLAGS := else $(error gcc, icc, or clang is required for this configuration.) endif endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-0.6.1/config/old/pnacl/000077500000000000000000000000001360743507500156015ustar00rootroot00000000000000blis-0.6.1/config/old/pnacl/bli_kernel.h000066400000000000000000000172151360743507500200660ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_KERNEL_H #define BLIS_KERNEL_H /* * SIMD-enabled (SP only) PNaCl shipped in Chrome 36 and it is not backward-compatible. * Therefore, if compilation targets an older Chrome release, we use scalar kernels. * The target Chrome version is indicated by PPAPI_MACRO defined in the header below. */ #include // -- LEVEL-3 MICRO-KERNEL CONSTANTS ------------------------------------------- #define BLIS_SIMD_ALIGN_SIZE 16 // -- Cache blocksizes -- // // Constraints: // // (1) MC must be a multiple of: // (a) MR (for zero-padding purposes) // (b) NR (for zero-padding purposes when MR and NR are "swapped") // (2) NC must be a multiple of // (a) NR (for zero-padding purposes) // (b) MR (for zero-padding purposes when MR and NR are "swapped") // #if PPAPI_RELEASE >= 36 #define BLIS_DEFAULT_MC_S 256 #define BLIS_DEFAULT_KC_S 256 #define BLIS_DEFAULT_NC_S 8192 #else #define BLIS_DEFAULT_MC_S 252 #define BLIS_DEFAULT_KC_S 264 #define BLIS_DEFAULT_NC_S 8196 #endif #define BLIS_DEFAULT_MC_D 1080 #define BLIS_DEFAULT_KC_D 120 #define BLIS_DEFAULT_NC_D 8400 #if PPAPI_RELEASE >= 36 #define BLIS_DEFAULT_MC_C 128 #define BLIS_DEFAULT_KC_C 256 #define BLIS_DEFAULT_NC_C 4096 #else #define BLIS_DEFAULT_MC_C 120 #define BLIS_DEFAULT_KC_C 264 #define BLIS_DEFAULT_NC_C 4092 #endif #define BLIS_DEFAULT_MC_Z 60 #define BLIS_DEFAULT_KC_Z 264 #define BLIS_DEFAULT_NC_Z 2040 // -- Register blocksizes -- #if PPAPI_RELEASE >= 36 #define BLIS_DEFAULT_MR_S 8 #define BLIS_DEFAULT_NR_S 4 #else #define BLIS_DEFAULT_MR_S 4 #define BLIS_DEFAULT_NR_S 3 #endif #define BLIS_DEFAULT_MR_D 4 #define BLIS_DEFAULT_NR_D 3 #if PPAPI_RELEASE >= 36 #define BLIS_DEFAULT_MR_C 4 #define BLIS_DEFAULT_NR_C 4 #else #define BLIS_DEFAULT_MR_C 2 #define BLIS_DEFAULT_NR_C 3 #endif #define BLIS_DEFAULT_MR_Z 2 #define BLIS_DEFAULT_NR_Z 3 // NOTE: If the micro-kernel, which is typically unrolled to a factor // of f, handles leftover edge cases (ie: when k % f > 0) then these // register blocksizes in the k dimension can be defined to 1. //#define BLIS_DEFAULT_KR_S 1 //#define BLIS_DEFAULT_KR_D 1 //#define BLIS_DEFAULT_KR_C 1 //#define BLIS_DEFAULT_KR_Z 1 // -- Maximum cache blocksizes (for optimizing edge cases) -- // NOTE: These cache blocksize "extensions" have the same constraints as // the corresponding default blocksizes above. When these values are // larger than the default blocksizes, blocksizes used at edge cases are // enlarged if such an extension would encompass the remaining portion of // the matrix dimension. //#define BLIS_MAXIMUM_MC_S (BLIS_DEFAULT_MC_S + BLIS_DEFAULT_MC_S/4) //#define BLIS_MAXIMUM_KC_S (BLIS_DEFAULT_KC_S + BLIS_DEFAULT_KC_S/4) //#define BLIS_MAXIMUM_NC_S (BLIS_DEFAULT_NC_S + BLIS_DEFAULT_NC_S/4) //#define BLIS_MAXIMUM_MC_D (BLIS_DEFAULT_MC_D + BLIS_DEFAULT_MC_D/4) //#define BLIS_MAXIMUM_KC_D (BLIS_DEFAULT_KC_D + BLIS_DEFAULT_KC_D/4) //#define BLIS_MAXIMUM_NC_D (BLIS_DEFAULT_NC_D + BLIS_DEFAULT_NC_D/4) //#define BLIS_MAXIMUM_MC_C (BLIS_DEFAULT_MC_C + BLIS_DEFAULT_MC_C/4) //#define BLIS_MAXIMUM_KC_C (BLIS_DEFAULT_KC_C + BLIS_DEFAULT_KC_C/4) //#define BLIS_MAXIMUM_NC_C (BLIS_DEFAULT_NC_C + BLIS_DEFAULT_NC_C/4) //#define BLIS_MAXIMUM_MC_Z (BLIS_DEFAULT_MC_Z + BLIS_DEFAULT_MC_Z/4) //#define BLIS_MAXIMUM_KC_Z (BLIS_DEFAULT_KC_Z + BLIS_DEFAULT_KC_Z/4) //#define BLIS_MAXIMUM_NC_Z (BLIS_DEFAULT_NC_Z + BLIS_DEFAULT_NC_Z/4) // -- Packing register blocksize (for packed micro-panels) -- // NOTE: These register blocksize "extensions" determine whether the // leading dimensions used within the packed micro-panels are equal to // or greater than their corresponding register blocksizes above. //#define BLIS_PACKDIM_MR_S (BLIS_DEFAULT_MR_S + ...) //#define BLIS_PACKDIM_NR_S (BLIS_DEFAULT_NR_S + ...) //#define BLIS_PACKDIM_MR_D (BLIS_DEFAULT_MR_D + ...) //#define BLIS_PACKDIM_NR_D (BLIS_DEFAULT_NR_D + ...) //#define BLIS_PACKDIM_MR_C (BLIS_DEFAULT_MR_C + ...) //#define BLIS_PACKDIM_NR_C (BLIS_DEFAULT_NR_C + ...) //#define BLIS_PACKDIM_MR_Z (BLIS_DEFAULT_MR_Z + ...) //#define BLIS_PACKDIM_NR_Z (BLIS_DEFAULT_NR_Z + ...) // -- LEVEL-2 KERNEL CONSTANTS ------------------------------------------------- // -- LEVEL-1F KERNEL CONSTANTS ------------------------------------------------ // -- LEVEL-3 KERNEL DEFINITIONS ----------------------------------------------- // -- gemm -- #if PPAPI_RELEASE >= 36 #define BLIS_SGEMM_UKERNEL bli_sgemm_opt #define BLIS_CGEMM_UKERNEL bli_cgemm_opt #endif // -- trsm-related -- // -- LEVEL-1M KERNEL DEFINITIONS ---------------------------------------------- // -- packm -- // -- unpackm -- // -- LEVEL-1F KERNEL DEFINITIONS ---------------------------------------------- // -- axpy2v -- // -- dotaxpyv -- // -- axpyf -- // -- dotxf -- // -- dotxaxpyf -- // -- LEVEL-1V KERNEL DEFINITIONS ---------------------------------------------- // -- addv -- // -- axpyv -- #if PPAPI_RELEASE >= 36 #define BLIS_SAXPYV_KERNEL bli_saxpyv_opt #define BLIS_CAXPYV_KERNEL bli_caxpyv_opt #endif // -- copyv -- // -- dotv -- #define BLIS_SDOTV_KERNEL bli_sdotv_opt #define BLIS_DDOTV_KERNEL bli_ddotv_opt #define BLIS_CDOTV_KERNEL bli_cdotv_opt #define BLIS_ZDOTV_KERNEL bli_zdotv_opt // -- dotxv -- // -- invertv -- // -- scal2v -- // -- scalv -- // -- setv -- // -- subv -- // -- swapv -- #endif blis-0.6.1/config/old/pnacl/make_defs.mk000066400000000000000000000056411360743507500200560ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Only include this block of code once. ifndef MAKE_DEFS_MK_INCLUDED MAKE_DEFS_MK_INCLUDED := yes # # --- Development tools definitions -------------------------------------------- # # --- Determine the C compiler and related flags --- CC := pnacl-clang CC_VENDOR := pnacl-clang # Enable IEEE Standard 1003.1-2004 (POSIX.1d). # NOTE: This is needed to enable posix_memalign(). CPPROCFLAGS := -D_POSIX_C_SOURCE=200112L CMISCFLAGS := -std=gnu11 -I$(NACL_SDK_ROOT)/include CPICFLAGS := CDBGFLAGS := -g CWARNFLAGS := -Wall -Wno-unused-function -Wfatal-errors COPTFLAGS := -O3 CKOPTFLAGS := $(COPTFLAGS) -ffast-math CKVECFLAGS := # --- Determine the archiver and related flags --- AR := pnacl-ar ARFLAGS := rcs # --- Determine the linker and related flags --- LINKER := $(CC) SOFLAGS := ifneq ($(CC_VENDOR),icc) LDFLAGS := -lm endif # --- Determine the finalizer and related flags --- FINALIZER := pnacl-finalize FINFLAGS := # --- Determine the translator and related flags --- TRANSLATOR := pnacl-translate TRNSFLAGS := -O3 TRNSAMD64FLAGS := -arch x86-64 TRNSX86FLAGS := -arch i686 TRNSARMFLAGS := -arch armv7 # end of ifndef MAKE_DEFS_MK_INCLUDED conditional block endif blis-0.6.1/config/penryn/000077500000000000000000000000001360743507500152415ustar00rootroot00000000000000blis-0.6.1/config/penryn/bli_cntx_init_penryn.c000066400000000000000000000065111360743507500216300ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_cntx_init_penryn( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. bli_cntx_init_penryn_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native gemm micro-kernels and // their storage preferences. bli_cntx_set_l3_nat_ukrs ( 4, BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_penryn_asm_8x4, FALSE, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_penryn_asm_4x4, FALSE, //BLIS_GEMM_UKR, BLIS_SCOMPLEX, bli_cgemm_penryn_asm_8x4, FALSE, //BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemm_penryn_asm_4x4, FALSE, BLIS_GEMMTRSM_L_UKR, BLIS_DOUBLE, bli_dgemmtrsm_l_penryn_asm_4x4, FALSE, BLIS_GEMMTRSM_U_UKR, BLIS_DOUBLE, bli_dgemmtrsm_u_penryn_asm_4x4, FALSE, cntx ); // Initialize level-3 blocksize objects with architecture-specific values. // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MR ], 8, 4, 0, 0 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 4, 4, 0, 0 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], 768, 384, 0, 0 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 384, 384, 0, 0 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4096, 4096, 0, 0 ); // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( BLIS_NAT, 5, BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, cntx ); } blis-0.6.1/config/penryn/bli_family_penryn.h000066400000000000000000000061321360743507500211160ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H // -- MEMORY ALLOCATION -------------------------------------------------------- #define BLIS_SIMD_ALIGN_SIZE 16 #if 0 // -- LEVEL-3 MICRO-KERNEL CONSTANTS ------------------------------------------- #define BLIS_SGEMM_UKERNEL bli_sgemm_asm_8x4 #define BLIS_DEFAULT_MR_S 8 #define BLIS_DEFAULT_NR_S 4 #define BLIS_DEFAULT_MC_S 768 #define BLIS_DEFAULT_KC_S 384 #define BLIS_DEFAULT_NC_S 4096 #define BLIS_DGEMM_UKERNEL bli_dgemm_asm_4x4 #define BLIS_DEFAULT_MR_D 4 #define BLIS_DEFAULT_NR_D 4 #define BLIS_DEFAULT_MC_D 384 #define BLIS_DEFAULT_KC_D 384 #define BLIS_DEFAULT_NC_D 4096 #define BLIS_DGEMMTRSM_L_UKERNEL bli_dgemmtrsm_l_asm_4x4 #define BLIS_DGEMMTRSM_U_UKERNEL bli_dgemmtrsm_u_asm_4x4 // -- LEVEL-1F KERNEL DEFINITIONS ---------------------------------------------- #define BLIS_DAXPY2V_KERNEL bli_daxpy2v_int_var1 #define BLIS_DDOTAXPYV_KERNEL bli_ddotaxpyv_int_var1 #define BLIS_DAXPYF_KERNEL bli_daxpyf_int_var1 #define BLIS_DDOTXF_KERNEL bli_ddotxf_int_var1 #define BLIS_DDOTXAXPYF_KERNEL bli_ddotxaxpyf_int_var1 // -- LEVEL-1V KERNEL DEFINITIONS ---------------------------------------------- #define BLIS_DAXPYV_KERNEL bli_daxpyv_opt_var1 #define BLIS_DDOTV_KERNEL bli_ddotv_opt_var1 #endif //#endif blis-0.6.1/config/penryn/make_defs.mk000066400000000000000000000061171360743507500175150ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := penryn #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := CMISCFLAGS := CPICFLAGS := CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O3 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := -mssse3 -mfpmath=sse -march=core2 else ifeq ($(CC_VENDOR),icc) CKVECFLAGS := -xSSSE3 else ifeq ($(CC_VENDOR),clang) CKVECFLAGS := -mssse3 -mfpmath=sse -march=core2 else $(error gcc, icc, or clang is required for this configuration.) endif endif endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := $(CKVECFLAGS) endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-0.6.1/config/piledriver/000077500000000000000000000000001360743507500160735ustar00rootroot00000000000000blis-0.6.1/config/piledriver/bli_cntx_init_piledriver.c000066400000000000000000000063051360743507500233150ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_cntx_init_piledriver( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. bli_cntx_init_piledriver_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native gemm micro-kernels and // their storage preferences. bli_cntx_set_l3_nat_ukrs ( 4, BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_piledriver_asm_16x3, FALSE, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_piledriver_asm_8x3, FALSE, BLIS_GEMM_UKR, BLIS_SCOMPLEX, bli_cgemm_piledriver_asm_4x2, FALSE, BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemm_piledriver_asm_2x2, FALSE, cntx ); // Initialize level-3 blocksize objects with architecture-specific values. // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MR ], 16, 8, 4, 2 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 3, 3, 2, 2 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], 2016, 1008, 512, 400 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 128, 128, 256, 160 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 8400, 8400, 8400, 8400 ); // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( BLIS_NAT, 5, BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, cntx ); } blis-0.6.1/config/piledriver/bli_family_piledriver.h000066400000000000000000000057171360743507500226120ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H // -- MEMORY ALLOCATION -------------------------------------------------------- #define BLIS_SIMD_ALIGN_SIZE 16 #if 0 // -- LEVEL-3 MICRO-KERNEL CONSTANTS ------------------------------------------- #define BLIS_SGEMM_UKERNEL bli_sgemm_asm_16x3 #define BLIS_DEFAULT_MC_S 2016 #define BLIS_DEFAULT_KC_S 128 #define BLIS_DEFAULT_NC_S 8400 #define BLIS_DEFAULT_MR_S 16 #define BLIS_DEFAULT_NR_S 3 #define BLIS_DGEMM_UKERNEL bli_dgemm_asm_8x3 #define BLIS_DEFAULT_MC_D 1008 #define BLIS_DEFAULT_KC_D 128 #define BLIS_DEFAULT_NC_D 8400 #define BLIS_DEFAULT_MR_D 8 #define BLIS_DEFAULT_NR_D 3 #define BLIS_CGEMM_UKERNEL bli_cgemm_asm_4x2 #define BLIS_DEFAULT_MC_C 512 #define BLIS_DEFAULT_KC_C 256 #define BLIS_DEFAULT_NC_C 8400 #define BLIS_DEFAULT_MR_C 4 #define BLIS_DEFAULT_NR_C 2 #define BLIS_ZGEMM_UKERNEL bli_zgemm_asm_2x2 #define BLIS_DEFAULT_MC_Z 400 #define BLIS_DEFAULT_KC_Z 160 #define BLIS_DEFAULT_NC_Z 8400 #define BLIS_DEFAULT_MR_Z 2 #define BLIS_DEFAULT_NR_Z 2 #endif //#endif blis-0.6.1/config/piledriver/make_defs.mk000066400000000000000000000061451360743507500203500ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := piledriver #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := CMISCFLAGS := CPICFLAGS := CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O3 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := -mfpmath=sse -mavx -mfma -march=bdver2 -mno-fma4 -mno-tbm -mno-xop -mno-lwp else ifeq ($(CC_VENDOR),clang) CKVECFLAGS := -mfpmath=sse -mavx -mfma -march=bdver2 -mno-fma4 -mno-tbm -mno-xop -mno-lwp else $(error gcc or clang are required for this configuration.) endif endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := $(CKVECFLAGS) endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-0.6.1/config/power7/000077500000000000000000000000001360743507500151515ustar00rootroot00000000000000blis-0.6.1/config/power7/bli_cntx_init_power7.c000066400000000000000000000057441360743507500214570ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_cntx_init_power7( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. bli_cntx_init_power7_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native gemm micro-kernels and // their storage preferences. bli_cntx_set_l3_nat_ukrs ( 1, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_power7_int_8x4, FALSE, cntx ); // Initialize level-3 blocksize objects with architecture-specific values. // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MR ], 0, 8, 0, 0 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 0, 4, 0, 0 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], 0, 64, 0, 0 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 0, 256, 0, 0 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 0, 4096, 0, 0 ); // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( BLIS_NAT, 5, BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, cntx ); } blis-0.6.1/config/power7/bli_family_power7.h000066400000000000000000000040621360743507500207360ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H #if 0 // -- LEVEL-3 MICRO-KERNEL CONSTANTS ------------------------------------------- #define BLIS_DGEMM_UKERNEL bli_dgemm_opt_8x4 #define BLIS_DEFAULT_MR_D 8 #define BLIS_DEFAULT_NR_D 4 #define BLIS_DEFAULT_MC_D 64 #define BLIS_DEFAULT_KC_D 256 #define BLIS_DEFAULT_NC_D 4096 #endif //#endif blis-0.6.1/config/power7/make_defs.mk000066400000000000000000000056511360743507500174270ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := power7 #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := CMISCFLAGS := -mcpu=power7 CPICFLAGS := CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O3 -mtune=power7 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := -mvsx else $(error gcc is required for this configuration.) endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := $(CKVECFLAGS) endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-0.6.1/config/power9/000077500000000000000000000000001360743507500151535ustar00rootroot00000000000000blis-0.6.1/config/power9/bli_cntx_init_power9.c000066400000000000000000000136501360743507500214560ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2019, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // Instantiate prototypes for packm kernels. PACKM_KER_PROT( float, s, packm_6xk_bb4_power9_ref ) PACKM_KER_PROT( double, d, packm_6xk_bb2_power9_ref ) // Instantiate prototypes for level-3 kernels. GEMM_UKR_PROT( float, s, gemmbb_power9_ref ) GEMMTRSM_UKR_PROT( float, s, gemmtrsmbb_l_power9_ref ) GEMMTRSM_UKR_PROT( float, s, gemmtrsmbb_u_power9_ref ) TRSM_UKR_PROT( float, s, trsmbb_l_power9_ref ) TRSM_UKR_PROT( float, s, trsmbb_u_power9_ref ) GEMM_UKR_PROT( double, d, gemmbb_power9_ref ) GEMMTRSM_UKR_PROT( double, d, gemmtrsmbb_l_power9_ref ) GEMMTRSM_UKR_PROT( double, d, gemmtrsmbb_u_power9_ref ) TRSM_UKR_PROT( double, d, trsmbb_l_power9_ref ) TRSM_UKR_PROT( double, d, trsmbb_u_power9_ref ) GEMM_UKR_PROT( scomplex, c, gemmbb_power9_ref ) GEMMTRSM_UKR_PROT( scomplex, c, gemmtrsmbb_l_power9_ref ) GEMMTRSM_UKR_PROT( scomplex, c, gemmtrsmbb_u_power9_ref ) TRSM_UKR_PROT( scomplex, c, trsmbb_l_power9_ref ) TRSM_UKR_PROT( scomplex, c, trsmbb_u_power9_ref ) GEMM_UKR_PROT( dcomplex, z, gemmbb_power9_ref ) GEMMTRSM_UKR_PROT( dcomplex, z, gemmtrsmbb_l_power9_ref ) GEMMTRSM_UKR_PROT( dcomplex, z, gemmtrsmbb_u_power9_ref ) TRSM_UKR_PROT( dcomplex, z, trsmbb_l_power9_ref ) TRSM_UKR_PROT( dcomplex, z, trsmbb_u_power9_ref ) void bli_cntx_init_power9( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. bli_cntx_init_power9_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native gemm micro-kernels and // their storage preferences. bli_cntx_set_l3_nat_ukrs ( 12, BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemmbb_power9_ref, FALSE, BLIS_TRSM_L_UKR, BLIS_FLOAT, bli_strsmbb_l_power9_ref, FALSE, BLIS_TRSM_U_UKR, BLIS_FLOAT, bli_strsmbb_u_power9_ref, FALSE, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_power9_asm_12x6, FALSE, BLIS_TRSM_L_UKR, BLIS_DOUBLE, bli_dtrsmbb_l_power9_ref, FALSE, BLIS_TRSM_U_UKR, BLIS_DOUBLE, bli_dtrsmbb_u_power9_ref, FALSE, BLIS_GEMM_UKR, BLIS_SCOMPLEX, bli_cgemmbb_power9_ref, FALSE, BLIS_TRSM_L_UKR, BLIS_SCOMPLEX, bli_ctrsmbb_l_power9_ref, FALSE, BLIS_TRSM_U_UKR, BLIS_SCOMPLEX, bli_ctrsmbb_u_power9_ref, FALSE, BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemmbb_power9_ref, FALSE, BLIS_TRSM_L_UKR, BLIS_DCOMPLEX, bli_ztrsmbb_l_power9_ref, FALSE, BLIS_TRSM_U_UKR, BLIS_DCOMPLEX, bli_ztrsmbb_u_power9_ref, FALSE, cntx ); // Update the context with customized virtual [gemm]trsm micro-kernels. bli_cntx_set_l3_vir_ukrs ( 8, BLIS_GEMMTRSM_L_UKR, BLIS_FLOAT, bli_sgemmtrsmbb_l_power9_ref, BLIS_GEMMTRSM_U_UKR, BLIS_FLOAT, bli_sgemmtrsmbb_u_power9_ref, BLIS_GEMMTRSM_L_UKR, BLIS_DOUBLE, bli_dgemmtrsmbb_l_power9_ref, BLIS_GEMMTRSM_U_UKR, BLIS_DOUBLE, bli_dgemmtrsmbb_u_power9_ref, BLIS_GEMMTRSM_L_UKR, BLIS_SCOMPLEX, bli_cgemmtrsmbb_l_power9_ref, BLIS_GEMMTRSM_U_UKR, BLIS_SCOMPLEX, bli_cgemmtrsmbb_u_power9_ref, BLIS_GEMMTRSM_L_UKR, BLIS_DCOMPLEX, bli_zgemmtrsmbb_l_power9_ref, BLIS_GEMMTRSM_U_UKR, BLIS_DCOMPLEX, bli_zgemmtrsmbb_u_power9_ref, cntx ); // Update the context with optimized packm kernels. bli_cntx_set_packm_kers ( 2, BLIS_PACKM_6XK_KER, BLIS_FLOAT, bli_spackm_6xk_bb4_power9_ref, BLIS_PACKM_6XK_KER, BLIS_DOUBLE, bli_dpackm_6xk_bb2_power9_ref, cntx ); bli_blksz_init_easy( &blkszs[ BLIS_MR ], -1, 12, -1, -1 ); bli_blksz_init ( &blkszs[ BLIS_NR ], -1, 6, -1, -1, -1, 12, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], -1, 576, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], -1, 1408, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], -1, 8190, -1, -1 ); // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( BLIS_NAT, 5, // level-3 BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, cntx ); } blis-0.6.1/config/power9/bli_family_power9.h000066400000000000000000000040511360743507500207400ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2019, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #define BLIS_POOL_ADDR_ALIGN_SIZE_A 4096 #define BLIS_POOL_ADDR_ALIGN_SIZE_B 4096 #define BLIS_POOL_ADDR_OFFSET_SIZE_A 192 #define BLIS_POOL_ADDR_OFFSET_SIZE_B 152 // Disable right-side hemm, symm, and trmm[3] to accommodate the broadcasting of // elements within the packed matrix B. #define BLIS_DISABLE_HEMM_RIGHT #define BLIS_DISABLE_SYMM_RIGHT #define BLIS_DISABLE_TRMM_RIGHT #define BLIS_DISABLE_TRMM3_RIGHT blis-0.6.1/config/power9/make_defs.mk000066400000000000000000000054701360743507500174300ustar00rootroot00000000000000 # # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2019, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := power9 #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := CMISCFLAGS := CPICFLAGS := CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O3 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := -mcpu=power9 -mtune=power9 -DXLC=0 else ifeq ($(CC_VENDOR),IBM) CKVECFLAGS := -qarch=pwr9 -qtune=pwr9 -DXLC=1 else $(info $(CC_VENDOR)) $(error gcc/xlc is required for this configuration.) endif endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) CRVECFLAGS := $(CKVECFLAGS) # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-0.6.1/config/sandybridge/000077500000000000000000000000001360743507500162215ustar00rootroot00000000000000blis-0.6.1/config/sandybridge/bli_cntx_init_sandybridge.c000066400000000000000000000063071360743507500235730ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_cntx_init_sandybridge( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. bli_cntx_init_sandybridge_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native gemm micro-kernels and // their storage preferences. bli_cntx_set_l3_nat_ukrs ( 4, BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_sandybridge_asm_8x8, FALSE, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_sandybridge_asm_8x4, FALSE, BLIS_GEMM_UKR, BLIS_SCOMPLEX, bli_cgemm_sandybridge_asm_8x4, FALSE, BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemm_sandybridge_asm_4x4, FALSE, cntx ); // Initialize level-3 blocksize objects with architecture-specific values. // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MR ], 8, 8, 8, 4 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 8, 4, 4, 4 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], 128, 96, 96, 64 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 384, 256, 256, 192 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4096, 4096, 4096, 4096 ); // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( BLIS_NAT, 5, BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, cntx ); } blis-0.6.1/config/sandybridge/bli_family_sandybridge.h000066400000000000000000000053741360743507500230650ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H #if 0 // -- LEVEL-3 MICRO-KERNEL CONSTANTS AND DEFINITIONS --------------------------- #define BLIS_SGEMM_UKERNEL bli_sgemm_asm_8x8 #define BLIS_DEFAULT_MC_S 128 #define BLIS_DEFAULT_KC_S 384 #define BLIS_DEFAULT_NC_S 4096 #define BLIS_DEFAULT_MR_S 8 #define BLIS_DEFAULT_NR_S 8 #define BLIS_DGEMM_UKERNEL bli_dgemm_asm_8x4 #define BLIS_DEFAULT_MC_D 96 #define BLIS_DEFAULT_KC_D 256 #define BLIS_DEFAULT_NC_D 4096 #define BLIS_DEFAULT_MR_D 8 #define BLIS_DEFAULT_NR_D 4 #define BLIS_CGEMM_UKERNEL bli_cgemm_asm_8x4 #define BLIS_DEFAULT_MC_C 96 #define BLIS_DEFAULT_KC_C 256 #define BLIS_DEFAULT_NC_C 4096 #define BLIS_DEFAULT_MR_C 8 #define BLIS_DEFAULT_NR_C 4 #define BLIS_ZGEMM_UKERNEL bli_zgemm_asm_4x4 #define BLIS_DEFAULT_MC_Z 64 #define BLIS_DEFAULT_KC_Z 192 #define BLIS_DEFAULT_NC_Z 4096 #define BLIS_DEFAULT_MR_Z 4 #define BLIS_DEFAULT_NR_Z 4 #endif //#endif blis-0.6.1/config/sandybridge/make_defs.mk000066400000000000000000000063721360743507500205000ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := sandybridge #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := CMISCFLAGS := CPICFLAGS := CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O3 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := -mavx -mfpmath=sse -march=sandybridge ifeq ($(GCC_OT_4_9_0),yes) # If gcc is older than 4.9.0, we must use a different label for -march. CKVECFLAGS := -mavx -mfpmath=sse -march=corei7-avx endif else ifeq ($(CC_VENDOR),icc) CKVECFLAGS := -xAVX else ifeq ($(CC_VENDOR),clang) CKVECFLAGS := -mavx -mfpmath=sse -march=sandybridge else $(error gcc, icc, or clang is required for this configuration.) endif endif endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := $(CKVECFLAGS) endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-0.6.1/config/skx/000077500000000000000000000000001360743507500145335ustar00rootroot00000000000000blis-0.6.1/config/skx/bli_cntx_init_skx.c000066400000000000000000000112331360743507500204110ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_cntx_init_skx( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. bli_cntx_init_skx_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native gemm micro-kernels and // their storage preferences. bli_cntx_set_l3_nat_ukrs ( 2, // gemm BLIS_GEMM_UKR, BLIS_FLOAT , bli_sgemm_skx_asm_32x12_l2, FALSE, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_skx_asm_16x14, FALSE, cntx ); // Update the context with optimized level-1f kernels. bli_cntx_set_l1f_kers ( 4, // axpyf BLIS_AXPYF_KER, BLIS_FLOAT, bli_saxpyf_zen_int_8, BLIS_AXPYF_KER, BLIS_DOUBLE, bli_daxpyf_zen_int_8, // dotxf BLIS_DOTXF_KER, BLIS_FLOAT, bli_sdotxf_zen_int_8, BLIS_DOTXF_KER, BLIS_DOUBLE, bli_ddotxf_zen_int_8, cntx ); // Update the context with optimized level-1v kernels. bli_cntx_set_l1v_kers ( 10, // amaxv BLIS_AMAXV_KER, BLIS_FLOAT, bli_samaxv_zen_int, BLIS_AMAXV_KER, BLIS_DOUBLE, bli_damaxv_zen_int, // axpyv #if 0 BLIS_AXPYV_KER, BLIS_FLOAT, bli_saxpyv_zen_int, BLIS_AXPYV_KER, BLIS_DOUBLE, bli_daxpyv_zen_int, #else BLIS_AXPYV_KER, BLIS_FLOAT, bli_saxpyv_zen_int10, BLIS_AXPYV_KER, BLIS_DOUBLE, bli_daxpyv_zen_int10, #endif // dotv BLIS_DOTV_KER, BLIS_FLOAT, bli_sdotv_zen_int, BLIS_DOTV_KER, BLIS_DOUBLE, bli_ddotv_zen_int, // dotxv BLIS_DOTXV_KER, BLIS_FLOAT, bli_sdotxv_zen_int, BLIS_DOTXV_KER, BLIS_DOUBLE, bli_ddotxv_zen_int, // scalv #if 0 BLIS_SCALV_KER, BLIS_FLOAT, bli_sscalv_zen_int, BLIS_SCALV_KER, BLIS_DOUBLE, bli_dscalv_zen_int, #else BLIS_SCALV_KER, BLIS_FLOAT, bli_sscalv_zen_int10, BLIS_SCALV_KER, BLIS_DOUBLE, bli_dscalv_zen_int10, #endif cntx ); // Initialize level-3 blocksize objects with architecture-specific values. // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MR ], 32, 16, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 12, 14, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], 480, 240, -1, -1 ); bli_blksz_init ( &blkszs[ BLIS_KC ], 384, 384, -1, -1, 480, 480, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 3072, 3752, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_AF ], 8, 8, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_DF ], 8, 8, -1, -1 ); // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( BLIS_NAT, 7, // level-3 BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, // level-1f BLIS_AF, &blkszs[ BLIS_AF ], BLIS_AF, BLIS_DF, &blkszs[ BLIS_DF ], BLIS_DF, cntx ); } blis-0.6.1/config/skx/bli_family_skx.h000066400000000000000000000113171360743507500177030ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H // -- THREADING PARAMETERS ----------------------------------------------------- #define BLIS_THREAD_RATIO_M 3 #define BLIS_THREAD_RATIO_N 2 #define BLIS_THREAD_MAX_IR 1 #define BLIS_THREAD_MAX_JR 4 // -- MEMORY ALLOCATION -------------------------------------------------------- #define BLIS_SIMD_ALIGN_SIZE 64 #define BLIS_SIMD_SIZE 64 #define BLIS_SIMD_NUM_REGISTERS 32 //#include //#define BLIS_MALLOC_POOL malloc //#define BLIS_FREE_POOL free #if 0 // -- LEVEL-3 MICRO-KERNEL CONSTANTS ------------------------------------------- // -- Cache and register blocksizes -- // // Constraints: // // (1) MC must be a multiple of: // (a) MR (for zero-padding purposes) // (b) NR (for zero-padding purposes when MR and NR are "swapped") // (2) NC must be a multiple of // (a) NR (for zero-padding purposes) // (b) MR (for zero-padding purposes when MR and NR are "swapped") // #define BLIS_DGEMM_UKERNEL bli_dgemm_opt_16x12_l2 #define BLIS_DEFAULT_MC_D 144 #define BLIS_DEFAULT_KC_D 336 #define BLIS_DEFAULT_NC_D 5760 #define BLIS_DEFAULT_MR_D 16 #define BLIS_DEFAULT_NR_D 12 #define BLIS_PACKDIM_MR_D 16 #define BLIS_PACKDIM_NR_D 12 // NOTE: If the micro-kernel, which is typically unrolled to a factor // of f, handles leftover edge cases (ie: when k % f > 0) then these // register blocksizes in the k dimension can be defined to 1. //#define BLIS_DEFAULT_KR_S 1 //#define BLIS_DEFAULT_KR_D 1 //#define BLIS_DEFAULT_KR_C 1 //#define BLIS_DEFAULT_KR_Z 1 // -- Maximum cache blocksizes (for optimizing edge cases) -- // NOTE: These cache blocksize "extensions" have the same constraints as // the corresponding default blocksizes above. When these values are // larger than the default blocksizes, blocksizes used at edge cases are // enlarged if such an extension would encompass the remaining portion of // the matrix dimension. #define BLIS_MAXIMUM_MC_S (BLIS_DEFAULT_MC_S + BLIS_DEFAULT_MC_S/4) #define BLIS_MAXIMUM_KC_S (BLIS_DEFAULT_KC_S + BLIS_DEFAULT_KC_S/4) #define BLIS_MAXIMUM_NC_S (BLIS_DEFAULT_NC_S + 0) #define BLIS_MAXIMUM_MC_D (BLIS_DEFAULT_MC_D + BLIS_DEFAULT_MC_D/4) #define BLIS_MAXIMUM_KC_D (BLIS_DEFAULT_KC_D + BLIS_DEFAULT_KC_D/4) #define BLIS_MAXIMUM_NC_D (BLIS_DEFAULT_NC_D + 0) //#define BLIS_MAXIMUM_MC_C (BLIS_DEFAULT_MC_C + BLIS_DEFAULT_MC_C/4) //#define BLIS_MAXIMUM_KC_C (BLIS_DEFAULT_KC_C + BLIS_DEFAULT_KC_C/4) //#define BLIS_MAXIMUM_NC_C (BLIS_DEFAULT_NC_C + BLIS_DEFAULT_NC_C/4) //#define BLIS_MAXIMUM_MC_Z (BLIS_DEFAULT_MC_Z + BLIS_DEFAULT_MC_Z/4) //#define BLIS_MAXIMUM_KC_Z (BLIS_DEFAULT_KC_Z + BLIS_DEFAULT_KC_Z/4) //#define BLIS_MAXIMUM_NC_Z (BLIS_DEFAULT_NC_Z + BLIS_DEFAULT_NC_Z/4) #endif //#endif blis-0.6.1/config/skx/make_defs.mk000066400000000000000000000074331360743507500170110ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := skx #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := CMISCFLAGS := CPICFLAGS := CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O3 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := -mavx512f -mavx512dq -mavx512bw -mavx512vl -mfpmath=sse -march=skylake-avx512 else ifeq ($(CC_VENDOR),icc) CKVECFLAGS := -xCORE-AVX512 else ifeq ($(CC_VENDOR),clang) CKVECFLAGS := -mavx512f -mavx512dq -mavx512bw -mavx512vl -mfpmath=sse -march=skylake-avx512 else $(error gcc, icc, or clang is required for this configuration.) endif endif endif # The assembler on OS X won't recognize AVX512 without help ifneq ($(CC_VENDOR),icc) ifeq ($(OS_NAME),Darwin) CKVECFLAGS += -Wa,-march=skylake-avx512 endif endif # Flags specific to reference kernels. # Note: We use AVX2 for reference kernels because, as Jeff Hammond says, # reference kernel code "is not going to achieve high enough SIMD utilization # to overcome the AVX-512 frequency drop". (Issue #187) CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := -march=skylake-avx512 -mno-avx512f -mno-avx512vl -mno-avx512bw -mno-avx512dq -mno-avx512cd -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),icc) CRVECFLAGS := -xCORE-AVX2 else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := -march=skylake-avx512 -mno-avx512f -mno-avx512vl -mno-avx512bw -mno-avx512dq -mno-avx512cd -funsafe-math-optimizations -ffp-contract=fast else $(error gcc, icc, or clang is required for this configuration.) endif endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-0.6.1/config/steamroller/000077500000000000000000000000001360743507500162575ustar00rootroot00000000000000blis-0.6.1/config/steamroller/bli_cntx_init_steamroller.c000066400000000000000000000063071360743507500236670ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_cntx_init_steamroller( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. bli_cntx_init_steamroller_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native gemm micro-kernels and // their storage preferences. bli_cntx_set_l3_nat_ukrs ( 4, BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_piledriver_asm_16x3, FALSE, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_piledriver_asm_8x3, FALSE, BLIS_GEMM_UKR, BLIS_SCOMPLEX, bli_cgemm_piledriver_asm_4x2, FALSE, BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemm_piledriver_asm_2x2, FALSE, cntx ); // Initialize level-3 blocksize objects with architecture-specific values. // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MR ], 16, 8, 4, 2 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 3, 3, 2, 2 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], 2016, 1008, 512, 400 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 128, 128, 256, 160 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 8400, 8400, 8400, 8400 ); // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( BLIS_NAT, 5, BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, cntx ); } blis-0.6.1/config/steamroller/bli_family_steamroller.h000066400000000000000000000035051360743507500231530ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H // -- MEMORY ALLOCATION -------------------------------------------------------- #define BLIS_SIMD_ALIGN_SIZE 16 //#endif blis-0.6.1/config/steamroller/make_defs.mk000066400000000000000000000061461360743507500205350ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := steamroller #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := CMISCFLAGS := CPICFLAGS := CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O3 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := -mfpmath=sse -mavx -mfma -march=bdver3 -mno-fma4 -mno-tbm -mno-xop -mno-lwp else ifeq ($(CC_VENDOR),clang) CKVECFLAGS := -mfpmath=sse -mavx -mfma -march=bdver3 -mno-fma4 -mno-tbm -mno-xop -mno-lwp else $(error gcc or clang are required for this configuration.) endif endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := $(CKVECFLAGS) endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-0.6.1/config/template/000077500000000000000000000000001360743507500155415ustar00rootroot00000000000000blis-0.6.1/config/template/bli_cntx_init_template.c000066400000000000000000000076571360743507500224440ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_cntx_init_template( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. bli_cntx_init_template_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native gemm micro-kernels and // their storage preferences. bli_cntx_set_l3_nat_ukrs ( 5, BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemm_template_noopt, FALSE, BLIS_GEMMTRSM_L_UKR, BLIS_DCOMPLEX, bli_zgemmtrsm_l_template_noopt, FALSE, BLIS_GEMMTRSM_U_UKR, BLIS_DCOMPLEX, bli_zgemmtrsm_u_template_noopt, FALSE, BLIS_TRSM_L_UKR, BLIS_DCOMPLEX, bli_ztrsm_l_template_noopt, FALSE, BLIS_TRSM_U_UKR, BLIS_DCOMPLEX, bli_ztrsm_u_template_noopt, FALSE, cntx ); // Update the context with optimized level-1f kernels. bli_cntx_set_l1f_kers ( BLIS_AXPY2V_KER, BLIS_DCOMPLEX, bli_zaxpy2v_template_noopt, BLIS_DOTAXPYV_KER, BLIS_DCOMPLEX, bli_zdotaxpyv_template_noopt, BLIS_AXPYF_KER, BLIS_DCOMPLEX, bli_zaxpyf_template_noopt, BLIS_DOTXF_KER, BLIS_DCOMPLEX, bli_zdotxf_template_noopt, BLIS_DOTXAXPYF_KER, BLIS_DCOMPLEX, bli_zdotxaxpyf_template_noopt, cntx ); // Update the context with optimized level-1v kernels. bli_cntx_set_l1v_kers ( BLIS_AXPYV_KER, BLIS_DCOMPLEX, bli_zaxpyv_template_noopt, BLIS_DOTV_KER, BLIS_DCOMPLEX, bli_zdotv_template_noopt, cntx ); // Initialize level-3 blocksize objects with architecture-specific values. // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MR ], 0, 0, 0, 4 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 0, 0, 0, 4 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], 0, 0, 0, 128 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 0, 0, 0, 256 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 0, 0, 0, 4096 ); // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( BLIS_NAT, 5, BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, cntx ); } blis-0.6.1/config/template/bli_family_template.h000066400000000000000000000033071360743507500217170ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H //#endif blis-0.6.1/config/template/kernels/000077500000000000000000000000001360743507500172045ustar00rootroot00000000000000blis-0.6.1/config/template/kernels/1/000077500000000000000000000000001360743507500173445ustar00rootroot00000000000000blis-0.6.1/config/template/kernels/1/bli_axpyv_template_noopt_var1.c000066400000000000000000000154101360743507500255510ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_zaxpyv_template_noopt ( conj_t conjx, dim_t n, dcomplex* restrict alpha, dcomplex* restrict x, inc_t incx, dcomplex* restrict y, inc_t incy, cntx_t* restrict cntx ) { /* Template axpyv kernel implementation This function contains a template implementation for a double-precision complex kernel, coded in C, which can serve as the starting point for one to write an optimized kernel on an arbitrary architecture. (We show a template implementation for only double-precision complex because the templates for the other three floating-point types would be similar, with the real instantiations being noticeably simpler due to the disappearance of conjugation in the real domain.) This kernel performs a vector scale and accumulate (axpy) operation: y := y + alpha * conjx( x ) where x and y are vectors of length n and alpha is a scalar. Parameters: - conjx: Compute with conjugated values of x? - n: The number of elements in vectors x and y. - alpha: The address of a scalar. - x: The address of vector x. - incx: The vector increment of x. incx should be unit unless the implementation makes special accomodation for non-unit values. - y: The address of vector y. - incy: The vector increment of y. incy should be unit unless the implementation makes special accomodation for non-unit values. This template code calls the reference implementation if any of the following conditions are true: - Either of the strides incx or incy is non-unit. - Vectors x and y are unaligned with different offsets. If the vectors are aligned, or unaligned by the same offset, then optimized code can be used for the bulk of the computation. This template shows how the front-edge case can be handled so that the remaining computation is aligned. (This template guarantees alignment to be BLIS_SIMD_ALIGN_SIZE.) Additional things to consider: - Because conjugation disappears in the real domain, real instances of this kernel can safely ignore the values of any conjugation parameters, thereby simplifying the implementation. For more info, please refer to the BLIS website and/or contact the blis-devel mailing list. -FGVZ */ const dim_t n_elem_per_reg = 1; const dim_t n_iter_unroll = 1; const dim_t n_elem_per_iter = n_elem_per_reg * n_iter_unroll; const siz_t type_size = sizeof( *x ); dcomplex* xp; dcomplex* yp; bool_t use_ref = FALSE; dim_t n_pre = 0; dim_t n_iter; dim_t n_left; dim_t off_x, off_y; dim_t i; if ( bli_zero_dim1( n ) ) return; if ( bli_zeq0( *alpha ) ) return; // If there is anything that would interfere with our use of aligned // vector loads/stores, call the reference implementation. if ( bli_has_nonunit_inc2( incx, incy ) ) { use_ref = TRUE; } else if ( bli_is_unaligned_to( x, BLIS_SIMD_ALIGN_SIZE ) || bli_is_unaligned_to( y, BLIS_SIMD_ALIGN_SIZE ) ) { use_ref = TRUE; // If a, the second column of a, and y are unaligned by the same // offset, then we can still use an implementation that depends on // alignment for most of the operation. off_x = bli_offset_from_alignment( x, BLIS_SIMD_ALIGN_SIZE ); off_y = bli_offset_from_alignment( y, BLIS_SIMD_ALIGN_SIZE ); if ( off_x == off_y ) { use_ref = FALSE; n_pre = off_x / type_size; } } // Call the reference implementation if needed. if ( use_ref == TRUE ) { zaxpyv_ft f = bli_zaxpyv_template_ref; f ( conjx, n, alpha, x, incx, y, incy, cntx ); return; } // Compute the number of unrolled and leftover (edge) iterations. n_iter = ( n - n_pre ) / n_elem_per_iter; n_left = ( n - n_pre ) % n_elem_per_iter; // Initialize pointers into x and y. xp = x; yp = y; // Iterate over elements of x and y to compute: // y += alpha * conjx( x ); if ( bli_is_noconj( conjx ) ) { // Compute front edge cases if x and y were unaligned. for ( i = 0; i < n_pre; ++i ) { bli_zaxpys( *alpha, *xp, *yp ); xp += 1; yp += 1; } // The bulk of the operation is executed here. The addresses xp and // yp are guaranteed to be aligned to BLIS_SIMD_ALIGN_SIZE. for ( i = 0; i < n_iter; ++i ) { bli_zaxpys( *alpha, *xp, *yp ); xp += n_elem_per_iter; yp += n_elem_per_iter; } // Compute tail edge cases, if applicable. for ( i = 0; i < n_left; ++i ) { bli_zaxpys( *alpha, *xp, *yp ); xp += 1; yp += 1; } } else // if ( bli_is_conj( conjx ) ) { // Compute front edge cases if x and y were unaligned. for ( i = 0; i < n_pre; ++i ) { bli_zaxpyjs( *alpha, *xp, *yp ); xp += 1; yp += 1; } // The bulk of the operation is executed here. The addresses xp and // yp are guaranteed to be aligned to BLIS_SIMD_ALIGN_SIZE. for ( i = 0; i < n_iter; ++i ) { bli_zaxpyjs( *alpha, *xp, *yp ); xp += n_elem_per_iter; yp += n_elem_per_iter; } // Compute tail edge cases, if applicable. for ( i = 0; i < n_left; ++i ) { bli_zaxpyjs( *alpha, *xp, *yp ); xp += 1; yp += 1; } } } blis-0.6.1/config/template/kernels/1/bli_dotv_template_noopt_var1.c000066400000000000000000000172501360743507500253620ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_zdotv_template_noopt ( conj_t conjx, conj_t conjy, dim_t n, dcomplex* restrict x, inc_t incx, dcomplex* restrict y, inc_t incy, dcomplex* restrict rho, cntx_t* restrict cntx ) { /* Template dotv kernel implementation This function contains a template implementation for a double-precision complex kernel, coded in C, which can serve as the starting point for one to write an optimized kernel on an arbitrary architecture. (We show a template implementation for only double-precision complex because the templates for the other three floating-point types would be similar, with the real instantiations being noticeably simpler due to the disappearance of conjugation in the real domain.) This kernel performs an inner (dot) product operation: rho := conjx( x^T ) * conjy( y ) where x and y are vectors of length n and rho is a scalar. Parameters: - conjx: Compute with conjugated values of x? - conjy: Compute with conjugated values of y? - n: The number of elements in vectors x and y. - x: The address of vector x. - incx: The vector increment of x. incx should be unit unless the implementation makes special accomodation for non-unit values. - y: The address of vector y. - incy: The vector increment of y. incy should be unit unless the implementation makes special accomodation for non-unit values. - rho: The address of the output scalar. This template code calls the reference implementation if any of the following conditions are true: - Either of the strides incx or incy is non-unit. - Vectors x and y are unaligned with different offsets. If the vectors are aligned, or unaligned by the same offset, then optimized code can be used for the bulk of the computation. This template shows how the front-edge case can be handled so that the remaining computation is aligned. (This template guarantees alignment to be BLIS_SIMD_ALIGN_SIZE.) Additional things to consider: - While four combinations of possible values of conjx and conjy exist, we implement only conjugation on x explicitly; we induce the other two cases by toggling the effective conjugation on x and then conjugating the dot product result. - Because conjugation disappears in the real domain, real instances of this kernel can safely ignore the values of any conjugation parameters, thereby simplifying the implementation. For more info, please refer to the BLIS website and/or contact the blis-devel mailing list. -FGVZ */ const dim_t n_elem_per_reg = 1; const dim_t n_iter_unroll = 1; const dim_t n_elem_per_iter = n_elem_per_reg * n_iter_unroll; const siz_t type_size = sizeof( *x ); dcomplex* xp; dcomplex* yp; dcomplex dotxy; bool_t use_ref = FALSE; dim_t n_pre = 0; dim_t n_iter; dim_t n_left; dim_t off_x, off_y; dim_t i; conj_t conjx_use; // If the vector lengths are zero, set rho to zero and return. if ( bli_zero_dim1( n ) ) { bli_zset0s( *rho ); return; } // If there is anything that would interfere with our use of aligned // vector loads/stores, call the reference implementation. if ( bli_has_nonunit_inc2( incx, incy ) ) { use_ref = TRUE; } else if ( bli_is_unaligned_to( x, BLIS_SIMD_ALIGN_SIZE ) || bli_is_unaligned_to( y, BLIS_SIMD_ALIGN_SIZE ) ) { use_ref = TRUE; // If a, the second column of a, and y are unaligned by the same // offset, then we can still use an implementation that depends on // alignment for most of the operation. off_x = bli_offset_from_alignment( x, BLIS_SIMD_ALIGN_SIZE ); off_y = bli_offset_from_alignment( y, BLIS_SIMD_ALIGN_SIZE ); if ( off_x == off_y ) { use_ref = FALSE; n_pre = off_x / type_size; } } // Call the reference implementation if needed. if ( use_ref == TRUE ) { zdotv_ft f = bli_zdotv_template_ref; f ( conjx, conjy, n, x, incx, y, incy, rho, cntx ); return; } // Compute the number of unrolled and leftover (edge) iterations. n_iter = ( n - n_pre ) / n_elem_per_iter; n_left = ( n - n_pre ) % n_elem_per_iter; // Initialize pointers into x and y. xp = x; yp = y; // Initialize accumulator to zero. bli_zset0s( dotxy ); conjx_use = conjx; // If y must be conjugated, we compute the result indirectly by first // toggling the effective conjugation of x and then conjugating the // resulting dot product. if ( bli_is_conj( conjy ) ) bli_toggle_conj( &conjx_use ); // Iterate over elements of x and y to compute: // rho = conjx( x^T ) * conjy( y ); if ( bli_is_noconj( conjx_use ) ) { // Compute front edge cases if x and y were unaligned. for ( i = 0; i < n_pre; ++i ) { bli_zdots( *xp, *yp, dotxy ); xp += 1; yp += 1; } // The bulk of the operation is executed here. The addresses xp and // yp are guaranteed to be aligned to BLIS_SIMD_ALIGN_SIZE. for ( i = 0; i < n_iter; ++i ) { bli_zdots( *xp, *yp, dotxy ); xp += n_elem_per_iter; yp += n_elem_per_iter; } // Compute tail edge cases, if applicable. for ( i = 0; i < n_left; ++i ) { bli_zdots( *xp, *yp, dotxy ); xp += 1; yp += 1; } } else // if ( bli_is_conj( conjx_use ) ) { // Compute front edge cases if x and y were unaligned. for ( i = 0; i < n_pre; ++i ) { bli_zdotjs( *xp, *yp, dotxy ); xp += 1; yp += 1; } // The bulk of the operation is executed here. The addresses xp and // yp are guaranteed to be aligned to BLIS_SIMD_ALIGN_SIZE. for ( i = 0; i < n_iter; ++i ) { bli_zdotjs( *xp, *yp, dotxy ); xp += n_elem_per_iter; yp += n_elem_per_iter; } // Compute tail edge cases, if applicable. for ( i = 0; i < n_left; ++i ) { bli_zdotjs( *xp, *yp, dotxy ); xp += 1; yp += 1; } } // If conjugation on y was requested, we induce it by conjugating // the contents of dotxy. if ( bli_is_conj( conjy ) ) bli_zconjs( dotxy ); bli_zcopys( dotxy, *rho ); } blis-0.6.1/config/template/kernels/1f/000077500000000000000000000000001360743507500175125ustar00rootroot00000000000000blis-0.6.1/config/template/kernels/1f/bli_axpy2v_template_noopt_var1.c000066400000000000000000000240661360743507500260100ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_zaxpy2v_template_noopt ( conj_t conjx, conj_t conjy, dim_t n, dcomplex* restrict alpha1, dcomplex* restrict alpha2, dcomplex* restrict x, inc_t incx, dcomplex* restrict y, inc_t incy, dcomplex* restrict z, inc_t incz, cntx_t* restrict cntx ) { /* Template axpy2v kernel implementation This function contains a template implementation for a double-precision complex kernel, coded in C, which can serve as the starting point for one to write an optimized kernel on an arbitrary architecture. (We show a template implementation for only double-precision complex because the templates for the other three floating-point types would be similar, with the real instantiations being noticeably simpler due to the disappearance of conjugation in the real domain.) This kernel fuses two axpyv operations: z := z + alpha1 * conjx( x ) z := z + alpha2 * conjy( y ) where x, y, and z are vectors of length n and alpha1 and alpha2 are scalars. Parameters: - conjx: Compute with conjugated values of x? - conjy: Compute with conjugated values of y? - n: The number of elements in vectors x, y, and z. - alpha1: The address of the scalar to be applied to x. - alpha2: The address of the scalar to be applied to y. - x: The address of vector x. - incx: The vector increment of x. incx should be unit unless the implementation makes special accomodation for non-unit values. - y: The address of vector y. - incy: The vector increment of y. incy should be unit unless the implementation makes special accomodation for non-unit values. - z: The address of vector z. - incz: The vector increment of z. incz should be unit unless the implementation makes special accomodation for non-unit values. This template code calls the reference implementation if any of the following conditions are true: - Any of the strides incx, incy, or incz is non-unit. - Vectors x, y, and z are unaligned with different offsets. If the vectors are aligned, or unaligned by the same offset, then optimized code can be used for the bulk of the computation. This template shows how the front-edge case can be handled so that the remaining computation is aligned. (This template guarantees alignment in the main loops to be BLIS_SIMD_ALIGN_SIZE.) Here are a few additional things to consider: - Because conjugation disappears in the real domain, real instances of this kernel can safely ignore the values of any conjugation parameters, thereby simplifying the implementation. For more info, please refer to the BLIS website and/or contact the blis-devel mailing list. -FGVZ */ const dim_t n_elem_per_reg = 1; const dim_t n_iter_unroll = 1; const dim_t n_elem_per_iter = n_elem_per_reg * n_iter_unroll; const siz_t type_size = sizeof( *x ); dcomplex* xp; dcomplex* yp; dcomplex* zp; bool_t use_ref = FALSE; dim_t n_pre = 0; dim_t n_iter; dim_t n_left; dim_t off_x, off_y, off_z; dim_t i; // Return early if possible. if ( bli_zero_dim1( n ) ) return; // If there is anything that would interfere with our use of aligned // vector loads/stores, call the reference implementation. if ( bli_has_nonunit_inc3( incx, incy, incz ) ) { use_ref = TRUE; } else if ( bli_is_unaligned_to( x, BLIS_SIMD_ALIGN_SIZE ) || bli_is_unaligned_to( y, BLIS_SIMD_ALIGN_SIZE ) || bli_is_unaligned_to( z, BLIS_SIMD_ALIGN_SIZE ) ) { use_ref = TRUE; // If a, the second column of a, and y are unaligned by the same // offset, then we can still use an implementation that depends on // alignment for most of the operation. off_x = bli_offset_from_alignment( x, BLIS_SIMD_ALIGN_SIZE ); off_y = bli_offset_from_alignment( y, BLIS_SIMD_ALIGN_SIZE ); off_z = bli_offset_from_alignment( z, BLIS_SIMD_ALIGN_SIZE ); if ( off_x == off_y && off_x == off_z ) { use_ref = FALSE; n_pre = off_x / type_size; } } // Call the reference implementation if needed. if ( use_ref == TRUE ) { zaxpy2v_ft f = bli_zaxpy2v_template_ref; f ( conjx, conjy, n, alpha1, alpha2, x, incx, y, incy, z, incz, cntx ); return; } // Compute the number of unrolled and leftover (edge) iterations. n_iter = ( n - n_pre ) / n_elem_per_iter; n_left = ( n - n_pre ) % n_elem_per_iter; // Initialize pointers into x, y, and z. xp = x; yp = y; zp = z; // Iterate over rows of x, y, and z to compute: // z += alpha1 * conjx( x ) + alpha2 * conjy( y ); if ( bli_is_noconj( conjx ) && bli_is_noconj( conjy ) ) { // Compute front edge cases if x, y, and z were unaligned. for ( i = 0; i < n_pre; ++i ) { bli_zaxpys( *alpha1, *xp, *zp ); bli_zaxpys( *alpha2, *yp, *zp ); xp += 1; yp += 1; zp += 1; } // The bulk of the operation is executed here. For best performance, // alpha1 and alpha2 should be loaded once prior to the n_iter // loop and the elements of z should be loaded and stored only once // each. The addresses xp, yp, and zp are guaranteed to be aligned // to BLIS_SIMD_ALIGN_SIZE. for ( i = 0; i < n_iter; ++i ) { bli_zaxpys( *alpha1, *xp, *zp ); bli_zaxpys( *alpha2, *yp, *zp ); xp += n_elem_per_iter; yp += n_elem_per_iter; zp += n_elem_per_iter; } // Compute tail edge cases, if applicable. for ( i = 0; i < n_left; ++i ) { bli_zaxpys( *alpha1, *xp, *zp ); bli_zaxpys( *alpha2, *yp, *zp ); xp += 1; yp += 1; zp += 1; } } else if ( bli_is_noconj( conjx ) && bli_is_conj( conjy ) ) { // Compute front edge cases if x, y, and z were unaligned. for ( i = 0; i < n_pre; ++i ) { bli_zaxpys( *alpha1, *xp, *zp ); bli_zaxpyjs( *alpha2, *yp, *zp ); xp += 1; yp += 1; zp += 1; } // The bulk of the operation is executed here. For best performance, // alpha1 and alpha2 should be loaded once prior to the n_iter // loop and the elements of z should be loaded and stored only once // each. The addresses xp, yp, and zp are guaranteed to be aligned // to BLIS_SIMD_ALIGN_SIZE. for ( i = 0; i < n_iter; ++i ) { bli_zaxpys( *alpha1, *xp, *zp ); bli_zaxpyjs( *alpha2, *yp, *zp ); xp += n_elem_per_iter; yp += n_elem_per_iter; zp += n_elem_per_iter; } // Compute tail edge cases, if applicable. for ( i = 0; i < n_left; ++i ) { bli_zaxpys( *alpha1, *xp, *zp ); bli_zaxpyjs( *alpha2, *yp, *zp ); xp += 1; yp += 1; zp += 1; } } else if ( bli_is_conj( conjx ) && bli_is_noconj( conjy ) ) { // Compute front edge cases if x, y, and z were unaligned. for ( i = 0; i < n_pre; ++i ) { bli_zaxpyjs( *alpha1, *xp, *zp ); bli_zaxpys( *alpha2, *yp, *zp ); xp += 1; yp += 1; zp += 1; } // The bulk of the operation is executed here. For best performance, // alpha1 and alpha2 should be loaded once prior to the n_iter // loop and the elements of z should be loaded and stored only once // each. The addresses xp, yp, and zp are guaranteed to be aligned // to BLIS_SIMD_ALIGN_SIZE. for ( i = 0; i < n_iter; ++i ) { bli_zaxpyjs( *alpha1, *xp, *zp ); bli_zaxpys( *alpha2, *yp, *zp ); xp += n_elem_per_iter; yp += n_elem_per_iter; zp += n_elem_per_iter; } // Compute tail edge cases, if applicable. for ( i = 0; i < n_left; ++i ) { bli_zaxpyjs( *alpha1, *xp, *zp ); bli_zaxpys( *alpha2, *yp, *zp ); xp += 1; yp += 1; zp += 1; } } else // if ( bli_is_conj( conjx ) && bli_is_conj( conjy ) ) { // Compute front edge cases if x, y, and z were unaligned. for ( i = 0; i < n_pre; ++i ) { bli_zaxpyjs( *alpha1, *xp, *zp ); bli_zaxpyjs( *alpha2, *yp, *zp ); xp += 1; yp += 1; zp += 1; } // The bulk of the operation is executed here. For best performance, // alpha1 and alpha2 should be loaded once prior to the n_iter // loop and the elements of z should be loaded and stored only once // each. The addresses xp, yp, and zp are guaranteed to be aligned // to BLIS_SIMD_ALIGN_SIZE. for ( i = 0; i < n_iter; ++i ) { bli_zaxpyjs( *alpha1, *xp, *zp ); bli_zaxpyjs( *alpha2, *yp, *zp ); xp += n_elem_per_iter; yp += n_elem_per_iter; zp += n_elem_per_iter; } // Compute tail edge cases, if applicable. for ( i = 0; i < n_left; ++i ) { bli_zaxpyjs( *alpha1, *xp, *zp ); bli_zaxpyjs( *alpha2, *yp, *zp ); xp += 1; yp += 1; zp += 1; } } } blis-0.6.1/config/template/kernels/1f/bli_axpyf_template_noopt_var1.c000066400000000000000000000223431360743507500257020ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_zaxpyf_template_noopt ( conj_t conja, conj_t conjx, dim_t m, dim_t b_n, dcomplex* restrict alpha, dcomplex* restrict a, inc_t inca, inc_t lda, dcomplex* restrict x, inc_t incx, dcomplex* restrict y, inc_t incy, cntx_t* restrict cntx ) { /* Template axpyf kernel implementation This function contains a template implementation for a double-precision complex kernel, coded in C, which can serve as the starting point for one to write an optimized kernel on an arbitrary architecture. (We show a template implementation for only double-precision complex because the templates for the other three floating-point types would be similar, with the real instantiations being noticeably simpler due to the disappearance of conjugation in the real domain.) This kernel performs the following gemv-like operation: y := y + alpha * conja( A ) * conjx( x ) where A is an m x b_n matrix, x is a vector of length b_n, y is a vector of length m, and alpha is a scalar. The operation is performed as a series of fused axpyv operations, and therefore A should be column-stored. Parameters: - conja: Compute with conjugated values of A? - conjx: Compute with conjugated values of x? - m: The number of rows in matrix A. - b_n: The number of columns in matrix A. Must be equal to or less than the fusing factor. - alpha: The address of a scalar. - a: The address of matrix A. - inca: The row stride of A. inca should be unit unless the implementation makes special accomodation for non-unit values. - lda: The column stride of A. - x: The address of vector x. - incx: The vector increment of x. - y: The address of vector y. - incy: The vector increment of y. incy should be unit unless the implementation makes special accomodation for non-unit values. This template code calls the reference implementation if any of the following conditions are true: - Either of the strides inca or incy is non-unit. - The address of A, the second column of A, and y are unaligned with different offsets. If the first/second columns of A and address of y are aligned, or unaligned by the same offset, then optimized code can be used for the bulk of the computation. This template shows how the front-edge case can be handled so that the remaining computation is aligned. (This template guarantees alignment in the main loops to be BLIS_SIMD_ALIGN_SIZE.) Additional things to consider: - When optimizing, you should fully unroll the loops over b_n. This is the dimension across which we are fusing axpyv operations. - This template code chooses to call the reference implementation whenever b_n is less than the fusing factor, so as to avoid having to handle edge cases. One may choose to optimize this edge case, if desired. - Because conjugation disappears in the real domain, real instances of this kernel can safely ignore the values of any conjugation parameters, thereby simplifying the implementation. For more info, please refer to the BLIS website and/or contact the blis-devel mailing list. -FGVZ */ const dim_t n_elem_per_reg = 1; const dim_t n_iter_unroll = 1; const dim_t n_elem_per_iter = n_elem_per_reg * n_iter_unroll; const siz_t type_size = sizeof( *a ); dcomplex* ap[ bli_zaxpyf_fusefac ]; dcomplex* xp[ bli_zaxpyf_fusefac ]; dcomplex* yp; dcomplex alpha_x[ bli_zaxpyf_fusefac ]; bool_t use_ref = FALSE; dim_t m_pre = 0; dim_t m_iter; dim_t m_left; dim_t off_a, off_a2, off_y; dim_t i, j; // Return early if possible. if ( bli_zero_dim2( m, b_n ) ) return; // If there is anything that would interfere with our use of aligned // vector loads/stores, call the reference implementation. if ( b_n < bli_zaxpyf_fusefac ) { use_ref = TRUE; } else if ( bli_has_nonunit_inc3( inca, incx, incy ) ) { use_ref = TRUE; } else if ( bli_is_unaligned_to( a, BLIS_SIMD_ALIGN_SIZE ) || bli_is_unaligned_to( a+lda, BLIS_SIMD_ALIGN_SIZE ) || bli_is_unaligned_to( y, BLIS_SIMD_ALIGN_SIZE ) ) { use_ref = TRUE; // If a, the second column of a, and y are unaligned by the same // offset, then we can still use an implementation that depends on // alignment for most of the operation. off_a = bli_offset_from_alignment( a, BLIS_SIMD_ALIGN_SIZE ); off_a2 = bli_offset_from_alignment( a+lda, BLIS_SIMD_ALIGN_SIZE ); off_y = bli_offset_from_alignment( y, BLIS_SIMD_ALIGN_SIZE ); if ( off_a == off_y && off_a == off_a2 ) { use_ref = FALSE; m_pre = off_a / type_size; } } // Call the reference implementation if needed. if ( use_ref == TRUE ) { zaxpyf_ft f = bli_zaxpyf_template_ref; f ( conja, conjx, m, b_n, alpha, a, inca, lda, x, incx, y, incy, cntx ); return; } // Compute the number of unrolled and leftover (edge) iterations. m_iter = ( m - m_pre ) / n_elem_per_iter; m_left = ( m - m_pre ) % n_elem_per_iter; // Initialize pointers into the columns of A and elements of x. for ( j = 0; j < b_n; ++j ) { ap[ j ] = a + (j )*lda; xp[ j ] = x + (j )*incx; } yp = y; // Load elements of x or conj(x) into alpha_x and scale by alpha. if ( bli_is_noconj( conjx ) ) { for ( j = 0; j < b_n; ++j ) { bli_zcopys( *xp[ j ], alpha_x[ j ] ); bli_zscals( *alpha, alpha_x[ j ] ); } } else // if ( bli_is_conj( conjx ) ) { for ( j = 0; j < b_n; ++j ) { bli_zcopyjs( *xp[ j ], alpha_x[ j ] ); bli_zscals( *alpha, alpha_x[ j ] ); } } // Iterate over rows of A and y to compute: // y += conja( A )*conjx( x ); if ( bli_is_noconj( conja ) ) { // Compute front edge cases if a and y were unaligned. for ( i = 0; i < m_pre; ++i ) { for ( j = 0; j < b_n; ++j ) { bli_zaxpys( alpha_x[ j ], *ap[ j ], *yp ); ap[ j ] += 1; } yp += 1; } // The bulk of the operation is executed here. For best performance, // the elements of alpha_x should be loaded once prior to the m_iter // loop, and the b_n loop should be fully unrolled. The addresses in // ap[] and yp are guaranteed to be aligned to // BLIS_SIMD_ALIGN_SIZE. for ( i = 0; i < m_iter; ++i ) { for ( j = 0; j < b_n; ++j ) { bli_zaxpys( alpha_x[ j ], *ap[ j ], *yp ); ap[ j ] += n_elem_per_iter; } yp += n_elem_per_iter; } // Compute tail edge cases, if applicable. for ( i = 0; i < m_left; ++i ) { for ( j = 0; j < b_n; ++j ) { bli_zaxpys( alpha_x[ j ], *ap[ j ], *yp ); ap[ j ] += 1; } yp += 1; } } else // if ( bli_is_conj( conja ) ) { // Compute front edge cases if a and y were unaligned. for ( i = 0; i < m_pre; ++i ) { for ( j = 0; j < b_n; ++j ) { bli_zaxpyjs( alpha_x[ j ], *ap[ j ], *yp ); ap[ j ] += 1; } yp += 1; } // The bulk of the operation is executed here. For best performance, // the elements of alpha_x should be loaded once prior to the m_iter // loop, and the b_n loop should be fully unrolled. The addresses in // ap[] and yp are guaranteed to be aligned to // BLIS_SIMD_ALIGN_SIZE. for ( i = 0; i < m_iter; ++i ) { for ( j = 0; j < b_n; ++j ) { bli_zaxpyjs( alpha_x[ j ], *ap[ j ], *yp ); ap[ j ] += n_elem_per_iter; } yp += n_elem_per_iter; } // Compute tail edge cases. for ( i = 0; i < m_left; ++i ) { for ( j = 0; j < b_n; ++j ) { bli_zaxpyjs( alpha_x[ j ], *ap[ j ], *yp ); ap[ j ] += 1; } yp += 1; } } } blis-0.6.1/config/template/kernels/1f/bli_dotaxpyv_template_noopt_var1.c000066400000000000000000000260401360743507500264270ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_zdotaxpyv_template_noopt ( conj_t conjxt, conj_t conjx, conj_t conjy, dim_t n, dcomplex* restrict alpha, dcomplex* restrict x, inc_t incx, dcomplex* restrict y, inc_t incy, dcomplex* restrict rho, dcomplex* restrict z, inc_t incz, cntx_t* restrict cntx ) { /* Template dotaxpyv kernel implementation This function contains a template implementation for a double-precision complex kernel, coded in C, which can serve as the starting point for one to write an optimized kernel on an arbitrary architecture. (We show a template implementation for only double-precision complex because the templates for the other three floating-point types would be similar, with the real instantiations being noticeably simpler due to the disappearance of conjugation in the real domain.) This kernel fuses a dotv and axpyv operation: rho := conjxt( x^T ) * conjy( y ) z := z + alpha * conjx( x ) where x, y, and z are vectors of length n and alpha1 and alpha2 are scalars. Parameters: - conjxt: Compute with conjugated values of x^T? - conjx: Compute with conjugated values of x? - conjy: Compute with conjugated values of y? - n: The number of elements in vectors x, y, and z. - alpha: The address of the scalar to be applied to x. - x: The address of vector x. - incx: The vector increment of x. incx should be unit unless the implementation makes special accomodation for non-unit values. - y: The address of vector y. - incy: The vector increment of y. incy should be unit unless the implementation makes special accomodation for non-unit values. - rho: The address of the output scalar of the dotv subproblem. - z: The address of vector z. - incz: The vector increment of z. incz should be unit unless the implementation makes special accomodation for non-unit values. This template code calls the reference implementation if any of the following conditions are true: - Any of the strides incx, incy, or incz is non-unit. - Vectors x, y, and z are unaligned with different offsets. If the vectors are aligned, or unaligned by the same offset, then optimized code can be used for the bulk of the computation. This template shows how the front-edge case can be handled so that the remaining computation is aligned. (This template guarantees alignment in the main loops to be BLIS_SIMD_ALIGN_SIZE.) Here are a few additional things to consider: - While four combinations of possible values of conjx and conjy exist, we implement only conjugation on x explicitly; we induce the other two cases by toggling the effective conjugation on x and then conjugating the dot product result. - Because conjugation disappears in the real domain, real instances of this kernel can safely ignore the values of any conjugation parameters, thereby simplifying the implementation. For more info, please refer to the BLIS website and/or contact the blis-devel mailing list. -FGVZ */ const dim_t n_elem_per_reg = 1; const dim_t n_iter_unroll = 1; const dim_t n_elem_per_iter = n_elem_per_reg * n_iter_unroll; const siz_t type_size = sizeof( *x ); dcomplex* xp; dcomplex* yp; dcomplex* zp; dcomplex dotxy; bool_t use_ref = FALSE; dim_t n_pre = 0; dim_t n_iter; dim_t n_left; dim_t off_x, off_y, off_z; dim_t i; conj_t conjxt_use; // If the vector lengths are zero, set rho to zero and return. if ( bli_zero_dim1( n ) ) { bli_zset0s( *rho ); return; } // If there is anything that would interfere with our use of aligned // vector loads/stores, call the reference implementation. if ( bli_has_nonunit_inc3( incx, incy, incz ) ) { use_ref = TRUE; } else if ( bli_is_unaligned_to( x, BLIS_SIMD_ALIGN_SIZE ) || bli_is_unaligned_to( y, BLIS_SIMD_ALIGN_SIZE ) || bli_is_unaligned_to( z, BLIS_SIMD_ALIGN_SIZE ) ) { use_ref = TRUE; // If x, y, and z are unaligned by the same offset, then we can // still use an implementation that depends on alignment for most // of the operation. off_x = bli_offset_from_alignment( x, BLIS_SIMD_ALIGN_SIZE ); off_y = bli_offset_from_alignment( y, BLIS_SIMD_ALIGN_SIZE ); off_z = bli_offset_from_alignment( z, BLIS_SIMD_ALIGN_SIZE ); if ( off_x == off_y && off_x == off_z ) { use_ref = FALSE; n_pre = off_x / type_size; } } // Call the reference implementation if needed. if ( use_ref == TRUE ) { zdotaxpyv_ft f = bli_zdotaxpyv_template_ref; f ( conjxt, conjx, conjy, n, alpha, x, incx, y, incy, rho, z, incz, cntx ); return; } // Compute the number of unrolled and leftover (edge) iterations. n_iter = ( n - n_pre ) / n_elem_per_iter; n_left = ( n - n_pre ) % n_elem_per_iter; // Initialize pointers into x, y, and z. xp = x; yp = y; zp = z; // Initialize accumulator to zero. bli_zset0s( dotxy ); conjxt_use = conjxt; // If y must be conjugated, we compute the result indirectly by first // toggling the effective conjugation of xt and then conjugating the // resulting dot product. if ( bli_is_conj( conjy ) ) bli_toggle_conj( &conjxt_use ); // Iterate over elements of x, y, and z to compute: // r = conjxt( x^T ) * conjy( y ); // z += alpha * conjx( x ); if ( bli_is_noconj( conjx ) && bli_is_noconj( conjxt_use ) ) { // Compute front edge cases if x, y, and z were unaligned. for ( i = 0; i < n_pre; ++i ) { bli_zdots( *xp, *yp, dotxy ); bli_zaxpys( *alpha, *xp, *zp ); xp += 1; yp += 1; zp += 1; } // The bulk of the operation is executed here. For best performance, // alpha should be loaded once prior to the n_iter loop, dotxy // should be and kept in registers, and each element of x should be // loaded only once each. The addresses xp, yp, and zp are // guaranteed to be aligned to BLIS_SIMD_ALIGN_SIZE. for ( i = 0; i < n_iter; ++i ) { bli_zdots( *xp, *yp, dotxy ); bli_zaxpys( *alpha, *xp, *zp ); xp += n_elem_per_iter; yp += n_elem_per_iter; zp += n_elem_per_iter; } // Compute tail edge cases, if applicable. for ( i = 0; i < n_left; ++i ) { bli_zdots( *xp, *yp, dotxy ); bli_zaxpys( *alpha, *xp, *zp ); xp += 1; yp += 1; zp += 1; } } else if ( bli_is_noconj( conjx ) && bli_is_conj( conjxt_use ) ) { // Compute front edge cases if x, y, and z were unaligned. for ( i = 0; i < n_pre; ++i ) { bli_zdotjs( *xp, *yp, dotxy ); bli_zaxpys( *alpha, *xp, *zp ); xp += 1; yp += 1; zp += 1; } // The bulk of the operation is executed here. For best performance, // alpha should be loaded once prior to the n_iter loop, dotxy // should be and kept in registers, and each element of x should be // loaded only once each. The addresses xp, yp, and zp are // guaranteed to be aligned to BLIS_SIMD_ALIGN_SIZE. for ( i = 0; i < n_iter; ++i ) { bli_zdotjs( *xp, *yp, dotxy ); bli_zaxpys( *alpha, *xp, *zp ); xp += n_elem_per_iter; yp += n_elem_per_iter; zp += n_elem_per_iter; } // Compute tail edge cases, if applicable. for ( i = 0; i < n_left; ++i ) { bli_zdotjs( *xp, *yp, dotxy ); bli_zaxpys( *alpha, *xp, *zp ); xp += 1; yp += 1; zp += 1; } } else if ( bli_is_conj( conjx ) && bli_is_noconj( conjxt_use ) ) { // Compute front edge cases if x, y, and z were unaligned. for ( i = 0; i < n_pre; ++i ) { bli_zdots( *xp, *yp, dotxy ); bli_zaxpyjs( *alpha, *xp, *zp ); xp += 1; yp += 1; zp += 1; } // The bulk of the operation is executed here. For best performance, // alpha should be loaded once prior to the n_iter loop, dotxy // should be and kept in registers, and each element of x should be // loaded only once each. The addresses xp, yp, and zp are // guaranteed to be aligned to BLIS_SIMD_ALIGN_SIZE. for ( i = 0; i < n_iter; ++i ) { bli_zdots( *xp, *yp, dotxy ); bli_zaxpyjs( *alpha, *xp, *zp ); xp += n_elem_per_iter; yp += n_elem_per_iter; zp += n_elem_per_iter; } // Compute tail edge cases, if applicable. for ( i = 0; i < n_left; ++i ) { bli_zdots( *xp, *yp, dotxy ); bli_zaxpyjs( *alpha, *xp, *zp ); xp += 1; yp += 1; zp += 1; } } else // if ( bli_is_conj( conjx ) && bli_is_conj( conjxt_use ) ) { // Compute front edge cases if x, y, and z were unaligned. for ( i = 0; i < n_pre; ++i ) { bli_zdotjs( *xp, *yp, dotxy ); bli_zaxpyjs( *alpha, *xp, *zp ); xp += 1; yp += 1; zp += 1; } // The bulk of the operation is executed here. For best performance, // alpha should be loaded once prior to the n_iter loop, dotxy // should be and kept in registers, and each element of x should be // loaded only once each. The addresses xp, yp, and zp are // guaranteed to be aligned to BLIS_SIMD_ALIGN_SIZE. for ( i = 0; i < n_iter; ++i ) { bli_zdotjs( *xp, *yp, dotxy ); bli_zaxpyjs( *alpha, *xp, *zp ); xp += n_elem_per_iter; yp += n_elem_per_iter; zp += n_elem_per_iter; } // Compute tail edge cases, if applicable. for ( i = 0; i < n_left; ++i ) { bli_zdotjs( *xp, *yp, dotxy ); bli_zaxpyjs( *alpha, *xp, *zp ); xp += 1; yp += 1; zp += 1; } } // If conjugation on y was requested, we induce it by conjugating // the contents of rho. if ( bli_is_conj( conjy ) ) bli_zconjs( dotxy ); bli_zcopys( dotxy, *rho ); } blis-0.6.1/config/template/kernels/1f/bli_dotxaxpyf_template_noopt_var1.c000066400000000000000000000340001360743507500265720ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_zdotxaxpyf_template_noopt ( conj_t conjat, conj_t conja, conj_t conjw, conj_t conjx, dim_t m, dim_t b_n, dcomplex* restrict alpha, dcomplex* restrict a, inc_t inca, inc_t lda, dcomplex* restrict w, inc_t incw, dcomplex* restrict x, inc_t incx, dcomplex* restrict beta, dcomplex* restrict y, inc_t incy, dcomplex* restrict z, inc_t incz, cntx_t* restrict cntx ) { /* Template dotxaxpyf kernel implementation This function contains a template implementation for a double-precision complex kernel, coded in C, which can serve as the starting point for one to write an optimized kernel on an arbitrary architecture. (We show a template implementation for only double-precision complex because the templates for the other three floating-point types would be similar, with the real instantiations being noticeably simpler due to the disappearance of conjugation in the real domain.) This kernel performs the following two gemv-like operations: y := beta * y + alpha * conjat( A^T ) * conjw( w ) z := z + alpha * conja( A ) * conjx( x ) where A is an m x b_n matrix, x and y are vector of length b_n, w and z are vectors of length m, and alpha and beta are scalars. The operation fuses a dotxf and an axpyf operation, and therefore A should be column- stored. Parameters: - conjat: Compute with conjugated values of A^T? - conja: Compute with conjugated values of A? - conjw: Compute with conjugated values of w? - conjx: Compute with conjugated values of x? - m: The number of rows in matrix A. - b_n: The number of columns in matrix A. Must be equal to or less than the fusing factor. - alpha: The address of the scalar to be applied to A^T*w and A*x. - a: The address of matrix A. - inca: The row stride of A. inca should be unit unless the implementation makes special accomodation for non-unit values. - lda: The column stride of A. - w: The address of vector w. - incw: The vector increment of w. incw should be unit unless the implementation makes special accomodation for non-unit values. - x: The address of vector x. - incx: The vector increment of x. - beta: The address of the scalar to be applied to y. - y: The address of vector y. - incy: The vector increment of y. - z: The address of vector z. - incz: The vector increment of z. incz should be unit unless the implementation makes special accomodation for non-unit values. This template code calls the reference implementation if any of the following conditions are true: - Any of the strides inca, incw, or incz is non-unit. - The address of A, the second column of A, w, and z are unaligned with different offsets. If the first/second rows of A and addresses of w and z are aligned, or unaligned by the same offset, then optimized code can be used for the bulk of the computation. This template shows how the front-edge case can be handled so that the remaining computation is aligned. (This template guarantees alignment in the main loops to be BLIS_SIMD_ALIGN_SIZE.) Additional things to consider: - When optimizing, you should fully unroll the loops over b_n. This is the dimension across which we are fusing dotxv operations. - This template code chooses to call the reference implementation whenever b_n is less than the fusing factor, so as to avoid having to handle edge cases. One may choose to optimize this edge case, if desired. - Because conjugation disappears in the real domain, real instances of this kernel can safely ignore the values of any conjugation parameters, thereby simplifying the implementation. For more info, please refer to the BLIS website and/or contact the blis-devel mailing list. -FGVZ */ const dim_t n_elem_per_reg = 1; const dim_t n_iter_unroll = 1; const dim_t n_elem_per_iter = n_elem_per_reg * n_iter_unroll; const siz_t type_size = sizeof( *a ); dcomplex* ap[ bli_zdotxaxpyf_fusefac ]; dcomplex* xp[ bli_zdotxaxpyf_fusefac ]; dcomplex* yp[ bli_zdotxaxpyf_fusefac ]; dcomplex* wp; dcomplex* zp; dcomplex At_w[ bli_zdotxaxpyf_fusefac ]; dcomplex alpha_x[ bli_zdotxaxpyf_fusefac ]; bool_t use_ref = FALSE; dim_t m_pre = 0; dim_t m_iter; dim_t m_left; dim_t off_a, off_a2, off_w, off_z; dim_t i, j; conj_t conjat_use; // Return early if possible. if ( bli_zero_dim2( m, b_n ) ) return; // If there is anything that would interfere with our use of aligned // vector loads/stores, call the reference implementation. if ( b_n < bli_zdotxaxpyf_fusefac ) { use_ref = TRUE; } else if ( bli_has_nonunit_inc3( inca, incw, incz ) ) { use_ref = TRUE; } else if ( bli_is_unaligned_to( a, BLIS_SIMD_ALIGN_SIZE ) || bli_is_unaligned_to( a+lda, BLIS_SIMD_ALIGN_SIZE ) || bli_is_unaligned_to( w, BLIS_SIMD_ALIGN_SIZE ) || bli_is_unaligned_to( z, BLIS_SIMD_ALIGN_SIZE ) ) { use_ref = TRUE; // If a, the second column of a, w, and z are unaligned by the same // offset, then we can still use an implementation that depends on // alignment for most of the operation. off_a = bli_offset_from_alignment( a, BLIS_SIMD_ALIGN_SIZE ); off_a2 = bli_offset_from_alignment( a+lda, BLIS_SIMD_ALIGN_SIZE ); off_w = bli_offset_from_alignment( w, BLIS_SIMD_ALIGN_SIZE ); off_z = bli_offset_from_alignment( z, BLIS_SIMD_ALIGN_SIZE ); if ( off_a == off_a2 && off_a == off_w && off_a == off_z ) { use_ref = FALSE; m_pre = off_a / type_size; } } // Call the reference implementation if needed. if ( use_ref == TRUE ) { zdotxaxpyf_ft f = bli_zdotxaxpyf_template_ref; f ( conjat, conja, conjw, conjx, m, b_n, alpha, a, inca, lda, w, incw, x, incx, beta, y, incy, z, incz, cntx ); return; } // Compute the number of unrolled and leftover (edge) iterations. m_iter = ( m - m_pre ) / n_elem_per_iter; m_left = ( m - m_pre ) % n_elem_per_iter; // Initialize pointers into the columns of A and elements of x. for ( j = 0; j < b_n; ++j ) { ap[ j ] = a + (j )*lda; xp[ j ] = x + (j )*incx; yp[ j ] = y + (j )*incy; } wp = w; zp = z; // Load elements of x or conj(x) into alpha_x and scale by alpha. if ( bli_is_noconj( conjx ) ) { for ( j = 0; j < b_n; ++j ) { bli_zcopys( *xp[ j ], alpha_x[ j ] ); bli_zscals( *alpha, alpha_x[ j ] ); } } else // if ( bli_is_conj( conjx ) ) { for ( j = 0; j < b_n; ++j ) { bli_zcopyjs( *xp[ j ], alpha_x[ j ] ); bli_zscals( *alpha, alpha_x[ j ] ); } } // Initialize our accumulators to zero. for ( j = 0; j < b_n; ++j ) { bli_zset0s( At_w[ j ] ); } conjat_use = conjat; // If w must be conjugated, we compute the result indirectly by first // toggling the effective conjugation of At and then conjugating the // resulting dot products. if ( bli_is_conj( conjw ) ) bli_toggle_conj( &conjat_use ); // Iterate over the columns of A and elements of w and z to compute: // y = beta * y + alpha * conjat( A^T ) * conjw( w ); // z = z + alpha * conja( A ) * conjx( x ); // where A is m x b_n. if ( bli_is_noconj( conja ) && bli_is_noconj( conjat_use ) ) { // Compute front edge cases if A, w, and z were unaligned. for ( i = 0; i < m_pre; ++i ) { for ( j = 0; j < b_n; ++j ) { bli_zdots( *ap[ j ], *wp, At_w[ j ] ); bli_zdots( *ap[ j ], alpha_x[ j ], *zp ); ap[ j ] += 1; } wp += 1; zp += 1; } // The bulk of the operation is executed here. For best performance, // the elements of alpha_x should be loaded once prior to the m_iter // loop, At_w should be kept in registers, and the b_n loop should // be fully unrolled. The addresses in ap[], wp, and zp are // guaranteed to be aligned to BLIS_SIMD_ALIGN_SIZE. for ( i = 0; i < m_iter; ++i ) { for ( j = 0; j < b_n; ++j ) { bli_zdots( *ap[ j ], *wp, At_w[ j ] ); bli_zdots( *ap[ j ], alpha_x[ j ], *zp ); ap[ j ] += n_elem_per_iter; } wp += n_elem_per_iter; zp += n_elem_per_iter; } // Compute tail edge cases, if applicable. for ( i = 0; i < m_left; ++i ) { for ( j = 0; j < b_n; ++j ) { bli_zdots( *ap[ j ], *wp, At_w[ j ] ); bli_zdots( *ap[ j ], alpha_x[ j ], *zp ); ap[ j ] += 1; } wp += 1; zp += 1; } } else if ( bli_is_noconj( conja ) && bli_is_conj( conjat_use ) ) { // Compute front edge cases if A, w, and z were unaligned. for ( i = 0; i < m_pre; ++i ) { for ( j = 0; j < b_n; ++j ) { bli_zdotjs( *ap[ j ], *wp, At_w[ j ] ); bli_zdots( *ap[ j ], alpha_x[ j ], *zp ); ap[ j ] += 1; } wp += 1; zp += 1; } // The bulk of the operation is executed here. For best performance, // the elements of alpha_x should be loaded once prior to the m_iter // loop, At_w should be kept in registers, and the b_n loop should // be fully unrolled. The addresses in ap[], wp, and zp are // guaranteed to be aligned to BLIS_SIMD_ALIGN_SIZE. for ( i = 0; i < m_iter; ++i ) { for ( j = 0; j < b_n; ++j ) { bli_zdotjs( *ap[ j ], *wp, At_w[ j ] ); bli_zdots( *ap[ j ], alpha_x[ j ], *zp ); ap[ j ] += n_elem_per_iter; } wp += n_elem_per_iter; zp += n_elem_per_iter; } // Compute tail edge cases, if applicable. for ( i = 0; i < m_left; ++i ) { for ( j = 0; j < b_n; ++j ) { bli_zdotjs( *ap[ j ], *wp, At_w[ j ] ); bli_zdots( *ap[ j ], alpha_x[ j ], *zp ); ap[ j ] += 1; } wp += 1; zp += 1; } } else if ( bli_is_conj( conja ) && bli_is_noconj( conjat_use ) ) { // Compute front edge cases if A, w, and z were unaligned. for ( i = 0; i < m_pre; ++i ) { for ( j = 0; j < b_n; ++j ) { bli_zdots( *ap[ j ], *wp, At_w[ j ] ); bli_zdotjs( *ap[ j ], alpha_x[ j ], *zp ); ap[ j ] += 1; } wp += 1; zp += 1; } // The bulk of the operation is executed here. For best performance, // the elements of alpha_x should be loaded once prior to the m_iter // loop, At_w should be kept in registers, and the b_n loop should // be fully unrolled. The addresses in ap[], wp, and zp are // guaranteed to be aligned to BLIS_SIMD_ALIGN_SIZE. for ( i = 0; i < m_iter; ++i ) { for ( j = 0; j < b_n; ++j ) { bli_zdots( *ap[ j ], *wp, At_w[ j ] ); bli_zdotjs( *ap[ j ], alpha_x[ j ], *zp ); ap[ j ] += n_elem_per_iter; } wp += n_elem_per_iter; zp += n_elem_per_iter; } // Compute tail edge cases, if applicable. for ( i = 0; i < m_left; ++i ) { for ( j = 0; j < b_n; ++j ) { bli_zdots( *ap[ j ], *wp, At_w[ j ] ); bli_zdotjs( *ap[ j ], alpha_x[ j ], *zp ); ap[ j ] += 1; } wp += 1; zp += 1; } } else if ( bli_is_conj( conja ) && bli_is_conj( conjat_use ) ) { // Compute front edge cases if A, w, and z were unaligned. for ( i = 0; i < m_pre; ++i ) { for ( j = 0; j < b_n; ++j ) { bli_zdotjs( *ap[ j ], *wp, At_w[ j ] ); bli_zdotjs( *ap[ j ], alpha_x[ j ], *zp ); ap[ j ] += 1; } wp += 1; zp += 1; } // The bulk of the operation is executed here. For best performance, // the elements of alpha_x should be loaded once prior to the m_iter // loop, At_w should be kept in registers, and the b_n loop should // be fully unrolled. The addresses in ap[], wp, and zp are // guaranteed to be aligned to BLIS_SIMD_ALIGN_SIZE. for ( i = 0; i < m_iter; ++i ) { for ( j = 0; j < b_n; ++j ) { bli_zdotjs( *ap[ j ], *wp, At_w[ j ] ); bli_zdotjs( *ap[ j ], alpha_x[ j ], *zp ); ap[ j ] += n_elem_per_iter; } wp += n_elem_per_iter; zp += n_elem_per_iter; } // Compute tail edge cases, if applicable. for ( i = 0; i < m_left; ++i ) { for ( j = 0; j < b_n; ++j ) { bli_zdotjs( *ap[ j ], *wp, At_w[ j ] ); bli_zdotjs( *ap[ j ], alpha_x[ j ], *zp ); ap[ j ] += 1; } wp += 1; zp += 1; } } // If conjugation on w was requested, we induce it by conjugating // the contents of At_w. if ( bli_is_conj( conjw ) ) { for ( j = 0; j < b_n; ++j ) { bli_zconjs( At_w[ j ] ); } } // Scale the At_w product by alpha and accumulate into y after // scaling by beta. for ( j = 0; j < b_n; ++j ) { bli_zscals( *beta, *yp[ j ] ); bli_zaxpys( *alpha, At_w[ j ], *yp[ j ] ); } } blis-0.6.1/config/template/kernels/1f/bli_dotxf_template_noopt_var1.c000066400000000000000000000236251360743507500257030ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_zdotxf_template_noopt ( conj_t conjat, conj_t conjx, dim_t m, dim_t b_n, dcomplex* restrict alpha, dcomplex* restrict a, inc_t inca, inc_t lda, dcomplex* restrict x, inc_t incx, dcomplex* restrict beta, dcomplex* restrict y, inc_t incy, cntx_t* restrict cntx ) { /* Template dotxf kernel implementation This function contains a template implementation for a double-precision complex kernel, coded in C, which can serve as the starting point for one to write an optimized kernel on an arbitrary architecture. (We show a template implementation for only double-precision complex because the templates for the other three floating-point types would be similar, with the real instantiations being noticeably simpler due to the disappearance of conjugation in the real domain.) This kernel performs the following gemv-like operation: y := beta * y + alpha * conjat( A^T ) * conjx( x ) where A is an m x b_n matrix, x is a vector of length m, y is a vector of length b_n, and alpha and beta are scalars. The operation is performed as a series of fused dotxv operations, and therefore A should be column- stored. Parameters: - conjat: Compute with conjugated values of A^T? - conjx: Compute with conjugated values of x? - m: The number of rows in matrix A. - b_n: The number of columns in matrix A. Must be equal to or less than the fusing factor. - alpha: The address of the scalar to be applied to A*x. - a: The address of matrix A. - inca: The row stride of A. inca should be unit unless the implementation makes special accomodation for non-unit values. - lda: The column stride of A. - x: The address of vector x. - incx: The vector increment of x. incx should be unit unless the implementation makes special accomodation for non-unit values. - beta: The address of the scalar to be applied to y. - y: The address of vector y. - incy: The vector increment of y. This template code calls the reference implementation if any of the following conditions are true: - Either of the strides inca or incx is non-unit. - The address of A, the second column of A, and x are unaligned with different offsets. If the first/second columns of A and address of x are aligned, or unaligned by the same offset, then optimized code can be used for the bulk of the computation. This template shows how the front-edge case can be handled so that the remaining computation is aligned. (This template guarantees alignment in the main loops to be BLIS_SIMD_ALIGN_SIZE.) Additional things to consider: - When optimizing, you should fully unroll the loops over b_n. This is the dimension across which we are fusing dotxv operations. - This template code chooses to call the reference implementation whenever b_n is less than the fusing factor, so as to avoid having to handle edge cases. One may choose to optimize this edge case, if desired. - Because conjugation disappears in the real domain, real instances of this kernel can safely ignore the values of any conjugation parameters, thereby simplifying the implementation. For more info, please refer to the BLIS website and/or contact the blis-devel mailing list. -FGVZ */ const dim_t n_elem_per_reg = 1; const dim_t n_iter_unroll = 1; const dim_t n_elem_per_iter = n_elem_per_reg * n_iter_unroll; const siz_t type_size = sizeof( *x ); dcomplex* ap[ bli_zdotxf_fusefac ]; dcomplex* xp; dcomplex* yp[ bli_zdotxf_fusefac ]; dcomplex Atx[ bli_zdotxf_fusefac ]; bool_t use_ref = FALSE; dim_t m_pre = 0; dim_t m_iter; dim_t m_left; dim_t off_a, off_a2, off_x; dim_t i, j; conj_t conjat_use; // Return early if possible. if ( bli_zero_dim1( b_n ) ) return; // If the vector lengths are zero, scale r by beta and return. if ( bli_zero_dim1( m ) ) { bli_zscalv_ex ( BLIS_NO_CONJUGATE, b_n, beta, y, incy, cntx ); return; } // If there is anything that would interfere with our use of aligned // vector loads/stores, call the reference implementation. if ( b_n < bli_zdotxf_fusefac ) { use_ref = TRUE; } else if ( bli_has_nonunit_inc2( inca, incx ) ) { use_ref = TRUE; } else if ( bli_is_unaligned_to( a, BLIS_SIMD_ALIGN_SIZE ) || bli_is_unaligned_to( a+lda, BLIS_SIMD_ALIGN_SIZE ) || bli_is_unaligned_to( x, BLIS_SIMD_ALIGN_SIZE ) ) { use_ref = TRUE; // If a, the second column of a, and x are unaligned by the same // offset, then we can still use an implementation that depends on // alignment for most of the operation. off_a = bli_offset_from_alignment( a, BLIS_SIMD_ALIGN_SIZE ); off_a2 = bli_offset_from_alignment( a+lda, BLIS_SIMD_ALIGN_SIZE ); off_x = bli_offset_from_alignment( x, BLIS_SIMD_ALIGN_SIZE ); if ( off_a == off_a2 && off_a == off_x ) { use_ref = FALSE; m_pre = off_x / type_size; } } // Call the reference implementation if needed. if ( use_ref == TRUE ) { zdotxf_ft f = bli_zdotxf_template_ref; f ( conjat, conjx, m, b_n, alpha, a, inca, lda, x, incx, beta, y, incy, cntx ); return; } // Compute the number of unrolled and leftover (edge) iterations. m_iter = ( m - m_pre ) / n_elem_per_iter; m_left = ( m - m_pre ) % n_elem_per_iter; // Initialize pointers into the rows of A and elements of y. for ( i = 0; i < b_n; ++i ) { ap[ i ] = a + (i )*lda; yp[ i ] = y + (i )*incy; } xp = x; // Initialize our accumulators to zero. for ( i = 0; i < b_n; ++i ) { bli_zset0s( Atx[ i ] ); } conjat_use = conjat; // If x must be conjugated, we compute the result indirectly by first // toggling the effective conjugation of A and then conjugating the // resulting product A^T*x. if ( bli_is_conj( conjx ) ) bli_toggle_conj( &conjat_use ); // Iterate over columns of A and rows of x to compute: // Atx = conjat_use( A^T ) * x; if ( bli_is_noconj( conjat_use ) ) { // Compute front edge cases if A and y were unaligned. for ( j = 0; j < m_pre; ++j ) { for ( i = 0; i < b_n; ++i ) { bli_zzzdots( *ap[ i ], *xp, Atx[ i ] ); ap[ i ] += 1; } xp += 1; } // The bulk of the operation is executed here. For best performance, // the elements of Atx should be kept in registers, and the b_n loop // should be fully unrolled. The addresses in ap[] and xp are // guaranteed to be aligned to BLIS_SIMD_ALIGN_SIZE. for ( j = 0; j < m_iter; ++j ) { for ( i = 0; i < b_n; ++i ) { bli_zzzdots( *ap[ i ], *xp, Atx[ i ] ); ap[ i ] += n_elem_per_iter; } xp += n_elem_per_iter; } // Compute tail edge cases, if applicable. for ( j = 0; j < m_left; ++j ) { for ( i = 0; i < b_n; ++i ) { bli_zzzdots( *ap[ i ], *xp, Atx[ i ] ); ap[ i ] += 1; } xp += 1; } } else // if ( bli_is_conj( conjat_use ) ) { // Compute front edge cases if A and y were unaligned. for ( j = 0; j < m_pre; ++j ) { for ( i = 0; i < b_n; ++i ) { bli_zzzdotjs( *ap[ i ], *xp, Atx[ i ] ); ap[ i ] += 1; } xp += 1; } // The bulk of the operation is executed here. For best performance, // the elements of Atx should be kept in registers, and the b_n loop // should be fully unrolled. The addresses in ap[] and xp are // guaranteed to be aligned to BLIS_SIMD_ALIGN_SIZE. for ( j = 0; j < m_iter; ++j ) { for ( i = 0; i < b_n; ++i ) { bli_zzzdotjs( *ap[ i ], *xp, Atx[ i ] ); ap[ i ] += n_elem_per_iter; } xp += n_elem_per_iter; } // Compute tail edge cases, if applicable. for ( j = 0; j < m_left; ++j ) { for ( i = 0; i < b_n; ++i ) { bli_zzzdotjs( *ap[ i ], *xp, Atx[ i ] ); ap[ i ] += 1; } xp += 1; } } // If conjugation on y was requested, we induce it by conjugating // the contents of Atx. if ( bli_is_conj( conjx ) ) { for ( i = 0; i < b_n; ++i ) { bli_zconjs( Atx[ i ] ); } } // Scale the Atx product by alpha and accumulate into y after // scaling by beta. for ( i = 0; i < b_n; ++i ) { bli_zzscals( *beta, *yp[ i ] ); bli_zzzaxpys( *alpha, Atx[ i ], *yp[ i ] ); } } blis-0.6.1/config/template/kernels/3/000077500000000000000000000000001360743507500173465ustar00rootroot00000000000000blis-0.6.1/config/template/kernels/3/bli_gemm_template_noopt_mxn.c000066400000000000000000000110401360743507500252550ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_zgemm_template_noopt ( dim_t k, dcomplex* restrict alpha, dcomplex* restrict a1, dcomplex* restrict b1, dcomplex* restrict beta, dcomplex* restrict c11, inc_t rs_c, inc_t cs_c, auxinfo_t* restrict data, cntx_t* restrict cntx ) { /* Template gemm micro-kernel implementation This function contains a template implementation for a double-precision complex micro-kernel, coded in C, which can serve as the starting point for one to write an optimized micro-kernel on an arbitrary architecture. (We show a template implementation for only double-precision complex because the templates for the other three floating-point types would be nearly identical.) This micro-kernel performs a matrix-matrix multiplication of the form: C11 := beta * C11 + alpha * A1 * B1 where A1 is MR x k, B1 is k x NR, C11 is MR x NR, and alpha and beta are scalars. For more info, please refer to the BLIS website's wiki on kernels: https://github.com/flame/blis/wiki/KernelsHowTo and/or contact the blis-devel mailing list. -FGVZ */ const num_t dt = BLIS_DCOMPLEX; const dim_t mr = bli_cntx_get_blksz_def_dt( dt, BLIS_MR, cntx ); const dim_t nr = bli_cntx_get_blksz_def_dt( dt, BLIS_NR, cntx ); const inc_t packmr = bli_cntx_get_blksz_max_dt( dt, BLIS_MR, cntx ); const inc_t packnr = bli_cntx_get_blksz_max_dt( dt, BLIS_NR, cntx ); const inc_t cs_a = packmr; const inc_t rs_b = packnr; const inc_t rs_ab = 1; const inc_t cs_ab = mr; dim_t l, j, i; dcomplex ab[ bli_zmr * bli_znr ]; dcomplex* abij; dcomplex ai, bj; /* Initialize the accumulator elements in ab to zero. */ for ( i = 0; i < mr * nr; ++i ) { bli_zset0s( *(ab + i) ); } /* Perform a series of k rank-1 updates into ab. */ for ( l = 0; l < k; ++l ) { abij = ab; /* In an optimized implementation, these two loops over MR and NR are typically fully unrolled. */ for ( j = 0; j < nr; ++j ) { bj = *(b1 + j); for ( i = 0; i < mr; ++i ) { ai = *(a1 + i); bli_zdots( ai, bj, *abij ); abij += rs_ab; } } a1 += cs_a; b1 += rs_b; } /* Scale each element of ab by alpha. */ for ( i = 0; i < mr * nr; ++i ) { bli_zscals( *alpha, *(ab + i) ); } /* If beta is zero, overwrite c11 with the scaled result in ab. Otherwise, scale c11 by beta and then add the scaled result in ab. */ if ( bli_zeq0( *beta ) ) { /* c11 := ab */ bli_zcopys_mxn( mr, nr, ab, rs_ab, cs_ab, c11, rs_c, cs_c ); } else { /* c11 := beta * c11 + ab */ bli_zxpbys_mxn( mr, nr, ab, rs_ab, cs_ab, beta, c11, rs_c, cs_c ); } } blis-0.6.1/config/template/kernels/3/bli_gemmtrsm_l_template_noopt_mxn.c000066400000000000000000000063201360743507500265030ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_zgemmtrsm_l_template_noopt ( dim_t k, dcomplex* restrict alpha, dcomplex* restrict a10, dcomplex* restrict a11, dcomplex* restrict b01, dcomplex* restrict b11, dcomplex* restrict c11, inc_t rs_c, inc_t cs_c, auxinfo_t* restrict data, cntx_t* restrict cntx ) { /* Template gemmtrsm_l micro-kernel implementation This function contains a template implementation for a double-precision complex micro-kernel that fuses a gemm with a trsm_l subproblem. This micro-kernel performs the following compound operation: B11 := alpha * B11 - A10 * B01 (gemm) B11 := inv(A11) * B11 (trsm) C11 := B11 where A11 is MR x MR and lower triangular, A10 is MR x k, B01 is k x NR, B11 is MR x NR, and alpha is a scalar. Here, inv() denotes matrix inverse. For more info, please refer to the BLIS website's wiki on kernels: https://github.com/flame/blis/wiki/KernelsHowTo and/or contact the blis-devel mailing list. -FGVZ */ const num_t dt = BLIS_DCOMPLEX; const inc_t packnr = bli_cntx_get_blksz_max_dt( dt, BLIS_NR, cntx ); const inc_t rs_b = packnr; const inc_t cs_b = 1; dcomplex* restrict minus_one = bli_zm1; /* b11 = alpha * b11 - a10 * b01; */ bli_zgemm_template_noopt ( k, minus_one, a10, b01, alpha, b11, rs_b, cs_b, data ); /* b11 = inv(a11) * b11; c11 = b11; */ bli_ztrsm_l_template_noopt ( a11, b11, c11, rs_c, cs_c, data ); } blis-0.6.1/config/template/kernels/3/bli_gemmtrsm_u_template_noopt_mxn.c000066400000000000000000000063171360743507500265220ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_zgemmtrsm_u_template_noopt ( dim_t k, dcomplex* restrict alpha, dcomplex* restrict a10, dcomplex* restrict a11, dcomplex* restrict b01, dcomplex* restrict b11, dcomplex* restrict c11, inc_t rs_c, inc_t cs_c, auxinfo_t* restrict data, cntx_t* restrict cntx ) { /* Template gemmtrsm_u micro-kernel implementation This function contains a template implementation for a double-precision complex micro-kernel that fuses a gemm with a trsm_u subproblem. This micro-kernel performs the following compound operation: B11 := alpha * B11 - A12 * B21 (gemm) B11 := inv(A11) * B11 (trsm) C11 := B11 where A11 is MR x MR and upper triangular, A12 is MR x k, B21 is k x NR, B11 is MR x NR, and alpha is a scalar. Here, inv() denotes matrix inverse. For more info, please refer to the BLIS website's wiki on kernels: https://github.com/flame/blis/wiki/KernelsHowTo and/or contact the blis-devel mailing list. -FGVZ */ const num_t dt = BLIS_DCOMPLEX; const inc_t packnr = bli_cntx_get_blksz_max_dt( dt, BLIS_NR, cntx ); const inc_t rs_b = packnr; const inc_t cs_b = 1; dcomplex* restrict minus_one = bli_zm1; /* b11 = alpha * b11 - a12 * b21; */ bli_zgemm_template_noopt ( k, minus_one, a12, b21, alpha, b11, rs_b, cs_b, data ); /* b11 = inv(a11) * b11; c11 = b11; */ bli_ztrsm_u_template_noopt ( a11, b11, c11, rs_c, cs_c, data ); } blis-0.6.1/config/template/kernels/3/bli_trsm_l_template_noopt_mxn.c000066400000000000000000000111321360743507500256320ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_ztrsm_l_template_noopt ( dcomplex* restrict a11, dcomplex* restrict b11, dcomplex* restrict c11, inc_t rs_c, inc_t cs_c, auxinfo_t* restrict data, cntx_t* restrict cntx ) { /* Template trsm_l micro-kernel implementation This function contains a template implementation for a double-precision complex trsm micro-kernel, coded in C, which can serve as the starting point for one to write an optimized micro-kernel on an arbitrary architecture. (We show a template implementation for only double-precision complex because the templates for the other three floating-point types would be nearly identical.) This micro-kernel performs the following operation: C11 := inv(A11) * B11 where A11 is MR x MR and lower triangular, B11 is MR x NR, and C11 is MR x NR. For more info, please refer to the BLIS website's wiki on kernels: https://github.com/flame/blis/wiki/KernelsHowTo and/or contact the blis-devel mailing list. -FGVZ */ const dim_t mr = bli_cntx_get_blksz_def_dt( dt, BLIS_MR, cntx ); const dim_t nr = bli_cntx_get_blksz_def_dt( dt, BLIS_NR, cntx ); const inc_t packmr = bli_cntx_get_blksz_max_dt( dt, BLIS_MR, cntx ); const inc_t packnr = bli_cntx_get_blksz_max_dt( dt, BLIS_NR, cntx ); const dim_t m = mr; const dim_t n = nr; const inc_t rs_a = 1; const inc_t cs_a = packmr; const inc_t rs_b = packnr; const inc_t cs_b = 1; dim_t iter, i, j, l; dim_t n_behind; dcomplex* restrict alpha11; dcomplex* restrict a10t; dcomplex* restrict alpha10; dcomplex* restrict X0; dcomplex* restrict x1; dcomplex* restrict x01; dcomplex* restrict chi01; dcomplex* restrict chi11; dcomplex* restrict gamma11; dcomplex rho11; for ( iter = 0; iter < m; ++iter ) { i = iter; n_behind = i; alpha11 = a11 + (i )*rs_a + (i )*cs_a; a10t = a11 + (i )*rs_a + (0 )*cs_a; X0 = b11 + (0 )*rs_b + (0 )*cs_b; x1 = b11 + (i )*rs_b + (0 )*cs_b; /* x1 = x1 - a10t * X0; */ /* x1 = x1 / alpha11; */ for ( j = 0; j < n; ++j ) { x01 = X0 + (0 )*rs_b + (j )*cs_b; chi11 = x1 + (0 )*rs_b + (j )*cs_b; gamma11 = c11 + (i )*rs_c + (j )*cs_c; /* chi11 = chi11 - a10t * x01; */ bli_zset0s( rho11 ); for ( l = 0; l < n_behind; ++l ) { alpha10 = a10t + (l )*cs_a; chi01 = x01 + (l )*rs_b; bli_zaxpys( *alpha10, *chi01, rho11 ); } bli_zsubs( rho11, *chi11 ); /* chi11 = chi11 / alpha11; */ /* NOTE: The INVERSE of alpha11 (1.0/alpha11) is stored instead of alpha11, so we can multiply rather than divide. We store the inverse of alpha11 intentionally to avoid expensive division instructions within the micro-kernel. */ bli_zscals( *alpha11, *chi11 ); /* Output final result to matrix C. */ bli_zcopys( *chi11, *gamma11 ); } } } blis-0.6.1/config/template/kernels/3/bli_trsm_u_template_noopt_mxn.c000066400000000000000000000111451360743507500256470ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_ztrsm_u_template_noopt ( dcomplex* restrict a11, dcomplex* restrict b11, dcomplex* restrict c11, inc_t rs_c, inc_t cs_c, auxinfo_t* restrict data, cntx_t* restrict cntx ) { /* Template trsm_u micro-kernel implementation This function contains a template implementation for a double-precision complex trsm micro-kernel, coded in C, which can serve as the starting point for one to write an optimized micro-kernel on an arbitrary architecture. (We show a template implementation for only double-precision complex because the templates for the other three floating-point types would be nearly identical.) This micro-kernel performs the following operation: C11 := inv(A11) * B11 where A11 is MR x MR and upper triangular, B11 is MR x NR, and C11 is MR x NR. For more info, please refer to the BLIS website's wiki on kernels: https://github.com/flame/blis/wiki/KernelsHowTo and/or contact the blis-devel mailing list. -FGVZ */ const dim_t mr = bli_cntx_get_blksz_def_dt( dt, BLIS_MR, cntx ); const dim_t nr = bli_cntx_get_blksz_def_dt( dt, BLIS_NR, cntx ); const inc_t packmr = bli_cntx_get_blksz_max_dt( dt, BLIS_MR, cntx ); const inc_t packnr = bli_cntx_get_blksz_max_dt( dt, BLIS_NR, cntx ); const dim_t m = mr; const dim_t n = nr; const inc_t rs_a = 1; const inc_t cs_a = packmr; const inc_t rs_b = packnr; const inc_t cs_b = 1; dim_t iter, i, j, l; dim_t n_behind; dcomplex* restrict alpha11; dcomplex* restrict a12t; dcomplex* restrict alpha12; dcomplex* restrict X2; dcomplex* restrict x1; dcomplex* restrict x21; dcomplex* restrict chi21; dcomplex* restrict chi11; dcomplex* restrict gamma11; dcomplex rho11; for ( iter = 0; iter < m; ++iter ) { i = m - iter - 1; n_behind = iter; alpha11 = a11 + (i )*rs_a + (i )*cs_a; a12t = a11 + (i )*rs_a + (i+1)*cs_a; x1 = b11 + (i )*rs_b + (0 )*cs_b; X2 = b11 + (i+1)*rs_b + (0 )*cs_b; /* x1 = x1 - a12t * X2; */ /* x1 = x1 / alpha11; */ for ( j = 0; j < n; ++j ) { chi11 = x1 + (0 )*rs_b + (j )*cs_b; x21 = X2 + (0 )*rs_b + (j )*cs_b; gamma11 = c11 + (i )*rs_c + (j )*cs_c; /* chi11 = chi11 - a12t * x21; */ bli_zset0s( rho11 ); for ( l = 0; l < n_behind; ++l ) { alpha12 = a12t + (l )*cs_a; chi21 = x21 + (l )*rs_b; bli_zaxpys( *alpha12, *chi21, rho11 ); } bli_zsubs( rho11, *chi11 ); /* chi11 = chi11 / alpha11; */ /* NOTE: The INVERSE of alpha11 (1.0/alpha11) is stored instead of alpha11, so we can multiply rather than divide. We store the inverse of alpha11 intentionally to avoid expensive division instructions within the micro-kernel. */ bli_zscals( *alpha11, *chi11 ); /* Output final result to matrix C. */ bli_zcopys( *chi11, *gamma11 ); } } } blis-0.6.1/config/template/make_defs.mk000066400000000000000000000051201360743507500200060ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := template #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := CMISCFLAGS := CPICFLAGS := CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O3 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) CKVECFLAGS := # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) CRVECFLAGS := $(CKVECFLAGS) # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-0.6.1/config/thunderx2/000077500000000000000000000000001360743507500156515ustar00rootroot00000000000000blis-0.6.1/config/thunderx2/bli_cntx_init_thunderx2.c000066400000000000000000000060551360743507500226530ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_cntx_init_thunderx2( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. bli_cntx_init_thunderx2_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native gemm micro-kernels and // their storage preferences. bli_cntx_set_l3_nat_ukrs ( 2, BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_armv8a_asm_8x12, FALSE, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_armv8a_asm_6x8, FALSE, cntx ); // Initialize level-3 blocksize objects with architecture-specific values. // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MR ], 8, 6, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 12, 8, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], 120, 120, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 640, 240, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 3072, 3072, -1, -1 ); // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( BLIS_NAT, 5, BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, cntx ); } blis-0.6.1/config/thunderx2/bli_family_thunderx2.h000066400000000000000000000034701360743507500221400ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H // -- MEMORY ALLOCATION -------------------------------------------------------- #define BLIS_SIMD_ALIGN_SIZE 16 blis-0.6.1/config/thunderx2/make_defs.mk000066400000000000000000000057461360743507500201340ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := thunderx2 #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := -D_GNU_SOURCE CMISCFLAGS := CPICFLAGS := CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O3 -ftree-vectorize -mtune=thunderx2t99 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := -march=armv8.1-a+fp+simd -mcpu=thunderx2t99 else $(error gcc is required for this configuration.) endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := $(CKVECFLAGS) endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-0.6.1/config/x86_64/000077500000000000000000000000001360743507500146645ustar00rootroot00000000000000blis-0.6.1/config/x86_64/bli_family_x86_64.h000066400000000000000000000033061360743507500201640ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H //#endif blis-0.6.1/config/x86_64/make_defs.mk000066400000000000000000000061161360743507500171370ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := x86_64 #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := CMISCFLAGS := CPICFLAGS := CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O3 endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := -mssse3 -mfpmath=sse -march=core2 else ifeq ($(CC_VENDOR),icc) CKVECFLAGS := -xSSE3 else ifeq ($(CC_VENDOR),clang) CKVECFLAGS := -mssse3 -mfpmath=sse -march=core2 else $(error gcc, icc, or clang is required for this configuration.) endif endif endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := $(CKVECFLAGS) endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-0.6.1/config/zen/000077500000000000000000000000001360743507500145225ustar00rootroot00000000000000blis-0.6.1/config/zen/amd_config.mk000066400000000000000000000057751360743507500171570ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2019, Advanced Micro Devices, Inc. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # All the common flags for AMD architectures will be added here # NOTE: The build system will append these variables with various # general-purpose/configuration-agnostic flags in common.mk. You # may specify additional flags here as needed. CPPROCFLAGS := CMISCFLAGS := CPICFLAGS := CWARNFLAGS := ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O3 -fomit-frame-pointer endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) ifeq ($(CC_VENDOR),gcc) CKVECFLAGS := -mavx2 -mfpmath=sse -mfma else ifeq ($(CC_VENDOR),clang) #CKVECFLAGS := -mavx2 -mfpmath=sse -mfma -march=znver1 -mno-fma4 -mno-tbm -mno-xop -mno-lwp CKVECFLAGS := -mavx2 -mfpmath=sse -mfma # When compiling with AOCC, add these flags to the default flags set above. ifeq ($(strip $(shell clang -v |& head -1 | grep -c 'AOCC.LLVM.2.0.0')),1) CKVECFLAGS += -mllvm -disable-licm-vrp endif else $(error gcc or clang are required for this configuration.) endif endif # Flags specific to reference kernels. CROPTFLAGS := $(CKOPTFLAGS) ifeq ($(CC_VENDOR),gcc) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else ifeq ($(CC_VENDOR),clang) CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast else CRVECFLAGS := $(CKVECFLAGS) endif endif blis-0.6.1/config/zen/bli_cntx_init_zen.c000066400000000000000000000210671360743507500203750ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" //GEMMSUP_KER_PROT( double, d, gemmsup_r_haswell_ref ) void bli_cntx_init_zen( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; blksz_t thresh[ BLIS_NUM_THRESH ]; // Set default kernel blocksizes and functions. bli_cntx_init_zen_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native gemm micro-kernels and // their storage preferences. bli_cntx_set_l3_nat_ukrs ( 8, // gemm BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_haswell_asm_6x16, TRUE, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_haswell_asm_6x8, TRUE, BLIS_GEMM_UKR, BLIS_SCOMPLEX, bli_cgemm_haswell_asm_3x8, TRUE, BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemm_haswell_asm_3x4, TRUE, // gemmtrsm_l BLIS_GEMMTRSM_L_UKR, BLIS_FLOAT, bli_sgemmtrsm_l_haswell_asm_6x16, TRUE, BLIS_GEMMTRSM_L_UKR, BLIS_DOUBLE, bli_dgemmtrsm_l_haswell_asm_6x8, TRUE, // gemmtrsm_u BLIS_GEMMTRSM_U_UKR, BLIS_FLOAT, bli_sgemmtrsm_u_haswell_asm_6x16, TRUE, BLIS_GEMMTRSM_U_UKR, BLIS_DOUBLE, bli_dgemmtrsm_u_haswell_asm_6x8, TRUE, cntx ); // Update the context with optimized level-1f kernels. bli_cntx_set_l1f_kers ( 4, // axpyf BLIS_AXPYF_KER, BLIS_FLOAT, bli_saxpyf_zen_int_8, BLIS_AXPYF_KER, BLIS_DOUBLE, bli_daxpyf_zen_int_8, // dotxf BLIS_DOTXF_KER, BLIS_FLOAT, bli_sdotxf_zen_int_8, BLIS_DOTXF_KER, BLIS_DOUBLE, bli_ddotxf_zen_int_8, cntx ); // Update the context with optimized level-1v kernels. bli_cntx_set_l1v_kers ( 10, // amaxv BLIS_AMAXV_KER, BLIS_FLOAT, bli_samaxv_zen_int, BLIS_AMAXV_KER, BLIS_DOUBLE, bli_damaxv_zen_int, // axpyv #if 0 BLIS_AXPYV_KER, BLIS_FLOAT, bli_saxpyv_zen_int, BLIS_AXPYV_KER, BLIS_DOUBLE, bli_daxpyv_zen_int, #else BLIS_AXPYV_KER, BLIS_FLOAT, bli_saxpyv_zen_int10, BLIS_AXPYV_KER, BLIS_DOUBLE, bli_daxpyv_zen_int10, #endif // dotv BLIS_DOTV_KER, BLIS_FLOAT, bli_sdotv_zen_int, BLIS_DOTV_KER, BLIS_DOUBLE, bli_ddotv_zen_int, // dotxv BLIS_DOTXV_KER, BLIS_FLOAT, bli_sdotxv_zen_int, BLIS_DOTXV_KER, BLIS_DOUBLE, bli_ddotxv_zen_int, // scalv #if 0 BLIS_SCALV_KER, BLIS_FLOAT, bli_sscalv_zen_int, BLIS_SCALV_KER, BLIS_DOUBLE, bli_dscalv_zen_int, #else BLIS_SCALV_KER, BLIS_FLOAT, bli_sscalv_zen_int10, BLIS_SCALV_KER, BLIS_DOUBLE, bli_dscalv_zen_int10, #endif cntx ); // Initialize level-3 blocksize objects with architecture-specific values. // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MR ], 6, 6, 3, 3 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 16, 8, 8, 4 ); /* Multi Instance performance improvement of DGEMM when binded to a CCX In Multi instance each thread runs a sequential DGEMM. a) If BLIS is run in a multi-instance mode with CPU freq 2.6/2.2 Ghz DDR4 clock frequency 2400Mhz mc = 240, kc = 512, and nc = 2040 has better performance on EPYC server, over the default block sizes. b) If BLIS is run in Single Instance mode mc = 510, kc = 1024 and nc = 4080 */ #ifdef BLIS_ENABLE_ZEN_BLOCK_SIZES // Zen optmized level 3 cache block sizes #if BLIS_ENABLE_SINGLE_INSTANCE_BLOCK_SIZES bli_blksz_init_easy( &blkszs[ BLIS_MC ], 1020, 510, 510, 255 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 1024, 1024, 1024, 1024 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 8160, 4080, 4080, 3056 ); #else bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 240, 144, 72 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 512, 256, 256 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 2040, 2040, 1528 ); #endif #else bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 72, 144, 72 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 256, 256, 256 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 8160, 4080, 4080, 3056 ); #endif bli_blksz_init_easy( &blkszs[ BLIS_AF ], 8, 8, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_DF ], 8, 8, -1, -1 ); // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( BLIS_NAT, 7, // level-3 BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, // level-1f BLIS_AF, &blkszs[ BLIS_AF ], BLIS_AF, BLIS_DF, &blkszs[ BLIS_DF ], BLIS_DF, cntx ); // ------------------------------------------------------------------------- // Initialize sup thresholds with architecture-appropriate values. // s d c z bli_blksz_init_easy( &thresh[ BLIS_MT ], -1, 256, -1, -1 ); bli_blksz_init_easy( &thresh[ BLIS_NT ], -1, 100, -1, -1 ); bli_blksz_init_easy( &thresh[ BLIS_KT ], -1, 120, -1, -1 ); // Initialize the context with the sup thresholds. bli_cntx_set_l3_sup_thresh ( 3, BLIS_MT, &thresh[ BLIS_MT ], BLIS_NT, &thresh[ BLIS_NT ], BLIS_KT, &thresh[ BLIS_KT ], cntx ); // Initialize the context with the sup handlers. bli_cntx_set_l3_sup_handlers ( 1, BLIS_GEMM, bli_gemmsup_ref, cntx ); // Update the context with optimized small/unpacked gemm kernels. bli_cntx_set_l3_sup_kers ( 8, //BLIS_RCR, BLIS_DOUBLE, bli_dgemmsup_r_haswell_ref, BLIS_RRR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8m, TRUE, BLIS_RRC, BLIS_DOUBLE, bli_dgemmsup_rd_haswell_asm_6x8m, TRUE, BLIS_RCR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8m, TRUE, BLIS_RCC, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8n, TRUE, BLIS_CRR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8m, TRUE, BLIS_CRC, BLIS_DOUBLE, bli_dgemmsup_rd_haswell_asm_6x8n, TRUE, BLIS_CCR, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8n, TRUE, BLIS_CCC, BLIS_DOUBLE, bli_dgemmsup_rv_haswell_asm_6x8n, TRUE, cntx ); // Initialize level-3 sup blocksize objects with architecture-specific // values. // s d c z bli_blksz_init ( &blkszs[ BLIS_MR ], -1, 6, -1, -1, -1, 9, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], -1, 8, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], -1, 72, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], -1, 256, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], -1, 4080, -1, -1 ); // Update the context with the current architecture's register and cache // blocksizes for small/unpacked level-3 problems. bli_cntx_set_l3_sup_blkszs ( 5, BLIS_NC, &blkszs[ BLIS_NC ], BLIS_KC, &blkszs[ BLIS_KC ], BLIS_MC, &blkszs[ BLIS_MC ], BLIS_NR, &blkszs[ BLIS_NR ], BLIS_MR, &blkszs[ BLIS_MR ], cntx ); } blis-0.6.1/config/zen/bli_family_zen.h000066400000000000000000000054201360743507500176570ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H // By default, it is effective to parallelize the outer loops. // Setting these macros to 1 will force JR and IR inner loops // to be not paralleized. #define BLIS_THREAD_MAX_IR 1 #define BLIS_THREAD_MAX_JR 1 #define BLIS_ENABLE_ZEN_BLOCK_SIZES #define BLIS_ENABLE_SMALL_MATRIX #define BLIS_ENABLE_SMALL_MATRIX_TRSM // This will select the threshold below which small matrix code will be called. #define BLIS_SMALL_MATRIX_THRES 700 #define BLIS_SMALL_M_RECT_MATRIX_THRES 160 #define BLIS_SMALL_K_RECT_MATRIX_THRES 128 #define BLIS_SMALL_MATRIX_THRES_TRSM 32768 //128(128+128) => m*(m+n) #define BLIS_SMALL_MATRIX_A_THRES_TRSM 128 #define BLIS_SMALL_MATRIX_A_THRES_M_SYRK 96 #define BLIS_SMALL_MATRIX_A_THRES_N_SYRK 128 //This macro will enable BLIS DGEMM to choose block sizes for a single instance mode #define BLIS_ENABLE_SINGLE_INSTANCE_BLOCK_SIZES 0 #define D_BLIS_SMALL_MATRIX_THRES_TRSM_NAPLES 250 #define D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_NAPLES 90 #define D_BLIS_SMALL_MATRIX_THRES_TRSM_DIM_RATIO 22 //#endif blis-0.6.1/config/zen/make_defs.mk000066400000000000000000000066251360743507500170020ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # Copyright (C) 2019, Advanced Micro Devices, Inc. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # FLAGS that are specific to the 'zen' architecture are added here. # FLAGS that are common for all the AMD architectures are present in # amd_config.mk. # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := zen #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # Include the file containing common flags for all AMD architectures. AMD_CONFIG_FILE := amd_config.mk AMD_CONFIG_PATH := $(BASE_SHARE_PATH)/config/zen -include $(AMD_CONFIG_PATH)/$(AMD_CONFIG_FILE) ifeq ($(CC_VENDOR),gcc) # If gcc is older than 6.1.0, we must use -march=bdver4 and then remove the # Bulldozer instruction sets that were omitted from Zen. # Additionally, if gcc is 4.9 (clang 3.5?) or newer, we may want to add # Zen-specific instructions back into the mix: # -mclzero -madx -mrdseed -mmwaitx -msha -mxsavec -mxsaves -mclflushopt -mpopcnt ifeq ($(GCC_OT_6_1_0),yes) CRVECFLAGS += -march=bdver4 -mno-fma4 -mno-tbm -mno-xop -mno-lwp CKVECFLAGS += -march=bdver4 -mno-fma4 -mno-tbm -mno-xop -mno-lwp else # If gcc is at least 6.1.0, then we can specify the microarchitecture using # the preferred option. CRVECFLAGS += -march=znver1 CKVECFLAGS += -march=znver1 endif else ifeq ($(CC_VENDOR),clang) # I couldn't find which versions of clang added support for -march=znver1, # so we don't even bother attempting the differentiation that appears in the # gcc branch above. CRVECFLAGS += -march=znver1 CKVECFLAGS += -march=znver1 else $(error gcc or clang are required for this configuration.) endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-0.6.1/config/zen/old/000077500000000000000000000000001360743507500153005ustar00rootroot00000000000000blis-0.6.1/config/zen/old/bli_kernel.h000066400000000000000000000165661360743507500175750ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2017 - 2019, Advanced Micro Devices, Inc. Copyright (C) 2018, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_KERNEL_H #define BLIS_KERNEL_H // -- LEVEL-3 MICRO-KERNEL CONSTANTS AND DEFINITIONS --------------------------- // // Constraints: // // (1) MC must be a multiple of: // (a) MR (for zero-padding purposes) // (b) NR (for zero-padding purposes when MR and NR are "swapped") // (2) NC must be a multiple of // (a) NR (for zero-padding purposes) // (b) MR (for zero-padding purposes when MR and NR are "swapped") // // threading related // By default it is effective to paralleize the // outerloops. Setting these macros to 1 will force // JR and NR inner loops to be not paralleized. #define BLIS_DEFAULT_MR_THREAD_MAX 1 #define BLIS_DEFAULT_NR_THREAD_MAX 1 // sgemm micro-kernel #if 0 #define BLIS_SGEMM_UKERNEL bli_sgemm_asm_24x4 #define BLIS_DEFAULT_MC_S 264 #define BLIS_DEFAULT_KC_S 128 #define BLIS_DEFAULT_NC_S 4080 #define BLIS_DEFAULT_MR_S 24 #define BLIS_DEFAULT_NR_S 4 #endif #if 0 #define BLIS_SGEMM_UKERNEL bli_sgemm_asm_16x6 #define BLIS_DEFAULT_MC_S 144 #define BLIS_DEFAULT_KC_S 256 #define BLIS_DEFAULT_NC_S 4080 #define BLIS_DEFAULT_MR_S 16 #define BLIS_DEFAULT_NR_S 6 #endif #if 1 #define BLIS_SGEMM_UKERNEL bli_sgemm_asm_6x16 #define BLIS_DEFAULT_MC_S 144 #define BLIS_DEFAULT_KC_S 256 #define BLIS_DEFAULT_NC_S 4080 #define BLIS_DEFAULT_MR_S 6 #define BLIS_DEFAULT_NR_S 16 #define BLIS_SGEMM_UKERNEL_PREFERS_CONTIG_ROWS #endif // dgemm micro-kernel #if 0 #define BLIS_DGEMM_UKERNEL bli_dgemm_asm_12x4 #define BLIS_DEFAULT_MC_D 96 #define BLIS_DEFAULT_KC_D 192 #define BLIS_DEFAULT_NC_D 4080 #define BLIS_DEFAULT_MR_D 12 #define BLIS_DEFAULT_NR_D 4 #endif #if 0 #define BLIS_DGEMM_UKERNEL bli_dgemm_asm_8x6 #define BLIS_DEFAULT_MC_D 72 #define BLIS_DEFAULT_KC_D 256 #define BLIS_DEFAULT_NC_D 4080 #define BLIS_DEFAULT_MR_D 8 #define BLIS_DEFAULT_NR_D 6 #endif #if 1 #define BLIS_DGEMM_UKERNEL bli_dgemm_asm_6x8 #define BLIS_DEFAULT_MC_D 510 // 72 /* Improves performance for large Matrices */ #define BLIS_DEFAULT_KC_D 1024 // 256 #define BLIS_DEFAULT_NC_D 4080 #define BLIS_DEFAULT_MR_D 6 #define BLIS_DEFAULT_NR_D 8 #define BLIS_DGEMM_UKERNEL_PREFERS_CONTIG_ROWS #endif // cgemm micro-kernel #if 1 #define BLIS_CGEMM_UKERNEL bli_cgemm_asm_3x8 #define BLIS_DEFAULT_MC_C 144 #define BLIS_DEFAULT_KC_C 256 #define BLIS_DEFAULT_NC_C 4080 #define BLIS_DEFAULT_MR_C 3 #define BLIS_DEFAULT_NR_C 8 #define BLIS_CGEMM_UKERNEL_PREFERS_CONTIG_ROWS #endif // zgemm micro-kernel #if 1 #define BLIS_ZGEMM_UKERNEL bli_zgemm_asm_3x4 #define BLIS_DEFAULT_MC_Z 72 #define BLIS_DEFAULT_KC_Z 256 #define BLIS_DEFAULT_NC_Z 4080 #define BLIS_DEFAULT_MR_Z 3 #define BLIS_DEFAULT_NR_Z 4 #define BLIS_ZGEMM_UKERNEL_PREFERS_CONTIG_ROWS #endif // zgemm micro-kernel #if 1 #define BLIS_ZGEMM_UKERNEL bli_zgemm_asm_3x4 #define BLIS_DEFAULT_MC_Z 72 #define BLIS_DEFAULT_KC_Z 256 #define BLIS_DEFAULT_NC_Z 4080 #define BLIS_DEFAULT_MR_Z 3 #define BLIS_DEFAULT_NR_Z 4 #define BLIS_ZGEMM_UKERNEL_PREFERS_CONTIG_ROWS #endif // -- trsm-related -- #define BLIS_STRSM_L_UKERNEL bli_strsm_l_int_6x16 #define BLIS_DTRSM_L_UKERNEL bli_dtrsm_l_int_6x8 // --gemmtrsm-related -- #define BLIS_SGEMMTRSM_L_UKERNEL bli_sgemmtrsm_l_6x16 #define BLIS_DGEMMTRSM_L_UKERNEL bli_dgemmtrsm_l_6x8 #define BLIS_SMALL_MATRIX_ENABLE //This will select the threshold below which small matrix code will be called. #define BLIS_SMALL_MATRIX_THRES 700 #define BLIS_SMALL_M_RECT_MATRIX_THRES 160 #define BLIS_SMALL_K_RECT_MATRIX_THRES 128 gint_t bli_gemm_small_matrix ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, cntl_t* cntl ); // -- LEVEL-2 KERNEL CONSTANTS ------------------------------------------------- // -- LEVEL-1F KERNEL CONSTANTS ------------------------------------------------ // -- LEVEL-1M KERNEL DEFINITIONS ---------------------------------------------- // -- packm -- // -- unpackm -- #define BLIS_DEFAULT_1F_S 8 #define BLIS_DEFAULT_1F_D 4 // -- LEVEL-1F KERNEL DEFINITIONS ---------------------------------------------- // -- axpy2v -- // -- dotaxpyv -- // -- axpyf -- #define BLIS_SAXPYF_KERNEL bli_saxpyf_int_var1 #define BLIS_DAXPYF_KERNEL bli_daxpyf_int_var1 // -- dotxf -- #define BLIS_SDOTXF_KERNEL bli_sdotxf_int_var1 #define BLIS_DDOTXF_KERNEL bli_ddotxf_int_var1 // -- dotxaxpyf -- // -- LEVEL-1M KERNEL DEFINITIONS ---------------------------------------------- // -- packm -- // -- unpackm -- // -- LEVEL-1V KERNEL DEFINITIONS ---------------------------------------------- // -- amax -- #define BLIS_SAMAXV_KERNEL bli_samaxv_opt_var1 #define BLIS_DAMAXV_KERNEL bli_damaxv_opt_var1 // -- addv -- // -- axpyv -- #define BLIS_DAXPYV_KERNEL bli_daxpyv_opt_var10 #define BLIS_SAXPYV_KERNEL bli_saxpyv_opt_var10 // -- copyv -- // -- dotv -- #define BLIS_DDOTV_KERNEL bli_ddotv_opt_var1 #define BLIS_SDOTV_KERNEL bli_sdotv_opt_var1 // -- dotxv -- #define BLIS_SDOTXV_KERNEL bli_sdotxv_unb_var1 #define BLIS_DDOTXV_KERNEL bli_ddotxv_unb_var1 // -- invertv -- // -- scal2v -- // -- scalv -- #define BLIS_SSCALV_KERNEL bli_sscalv_opt_var2 #define BLIS_DSCALV_KERNEL bli_dscalv_opt_var2 // -- setv -- // -- subv -- // -- swapv -- #endif blis-0.6.1/config/zen2/000077500000000000000000000000001360743507500146045ustar00rootroot00000000000000blis-0.6.1/config/zen2/bli_cntx_init_zen2.c000066400000000000000000000122311360743507500205320ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_cntx_init_zen2( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. bli_cntx_init_zen2_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native gemm micro-kernels and // their storage preferences. bli_cntx_set_l3_nat_ukrs ( 8, // gemm BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_haswell_asm_6x16, TRUE, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_haswell_asm_6x8, TRUE, BLIS_GEMM_UKR, BLIS_SCOMPLEX, bli_cgemm_haswell_asm_3x8, TRUE, BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemm_haswell_asm_3x4, TRUE, // gemmtrsm_l BLIS_GEMMTRSM_L_UKR, BLIS_FLOAT, bli_sgemmtrsm_l_haswell_asm_6x16, TRUE, BLIS_GEMMTRSM_L_UKR, BLIS_DOUBLE, bli_dgemmtrsm_l_haswell_asm_6x8, TRUE, // gemmtrsm_u BLIS_GEMMTRSM_U_UKR, BLIS_FLOAT, bli_sgemmtrsm_u_haswell_asm_6x16, TRUE, BLIS_GEMMTRSM_U_UKR, BLIS_DOUBLE, bli_dgemmtrsm_u_haswell_asm_6x8, TRUE, cntx ); // Update the context with optimized level-1f kernels. bli_cntx_set_l1f_kers ( 4, // axpyf BLIS_AXPYF_KER, BLIS_FLOAT, bli_saxpyf_zen_int_8, BLIS_AXPYF_KER, BLIS_DOUBLE, bli_daxpyf_zen_int_8, // dotxf BLIS_DOTXF_KER, BLIS_FLOAT, bli_sdotxf_zen_int_8, BLIS_DOTXF_KER, BLIS_DOUBLE, bli_ddotxf_zen_int_8, cntx ); // Update the context with optimized level-1v kernels. bli_cntx_set_l1v_kers ( 10, // amaxv BLIS_AMAXV_KER, BLIS_FLOAT, bli_samaxv_zen_int, BLIS_AMAXV_KER, BLIS_DOUBLE, bli_damaxv_zen_int, // axpyv BLIS_AXPYV_KER, BLIS_FLOAT, bli_saxpyv_zen_int10, BLIS_AXPYV_KER, BLIS_DOUBLE, bli_daxpyv_zen_int10, // dotv BLIS_DOTV_KER, BLIS_FLOAT, bli_sdotv_zen_int, BLIS_DOTV_KER, BLIS_DOUBLE, bli_ddotv_zen_int, // dotxv BLIS_DOTXV_KER, BLIS_FLOAT, bli_sdotxv_zen_int, BLIS_DOTXV_KER, BLIS_DOUBLE, bli_ddotxv_zen_int, // scalv BLIS_SCALV_KER, BLIS_FLOAT, bli_sscalv_zen_int10, BLIS_SCALV_KER, BLIS_DOUBLE, bli_dscalv_zen_int10, cntx ); // Initialize level-3 blocksize objects with architecture-specific values. // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MR ], 6, 6, 3, 3 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 16, 8, 8, 4 ); #if AOCL_BLIS_MULTIINSTANCE bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 240, 144, 72 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 512, 256, 256 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 2040, 4080, 4080 ); #else bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 72, 144, 72 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 256, 256, 256 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 4080, 4080, 4080 ); #endif bli_blksz_init_easy( &blkszs[ BLIS_AF ], 8, 8, -1, -1 ); bli_blksz_init_easy( &blkszs[ BLIS_DF ], 8, 8, -1, -1 ); // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( BLIS_NAT, 7, // level-3 BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, // level-1f BLIS_AF, &blkszs[ BLIS_AF ], BLIS_AF, BLIS_DF, &blkszs[ BLIS_DF ], BLIS_DF, cntx ); } blis-0.6.1/config/zen2/bli_family_zen2.h000066400000000000000000000056161360743507500200320ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2019, Advanced Micro Devices, Inc Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLI_FAMILY_ZEN2_ #define BLI_FAMILY_ZEN2_ // By default, it is effective to parallelize the outer loops. // Setting these macros to 1 will force JR and IR inner loops // to be not paralleized. #define BLIS_THREAD_MAX_IR 1 #define BLIS_THREAD_MAX_JR 1 #define BLIS_ENABLE_SMALL_MATRIX #define BLIS_ENABLE_SMALL_MATRIX_TRSM // This will select the threshold below which small matrix code will be called. #define BLIS_SMALL_MATRIX_THRES 700 #define BLIS_SMALL_M_RECT_MATRIX_THRES 160 #define BLIS_SMALL_K_RECT_MATRIX_THRES 128 #define BLIS_SMALL_MATRIX_THRES_TRSM 32768 //128(128+128) => m*(m+n) #define BLIS_SMALL_MATRIX_A_THRES_TRSM 128 #define BLIS_SMALL_MATRIX_A_THRES_M_SYRK 96 #define BLIS_SMALL_MATRIX_A_THRES_N_SYRK 128 #define BLIS_ENABLE_SMALL_MATRIX_ROME #define BLIS_SMALL_MATRIX_THRES_ROME 400 #define D_BLIS_SMALL_MATRIX_THRES_TRSM_ROME 120 #define D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME 60 #define D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME 150 #define D_BLIS_SMALL_MATRIX_THRES_TRSM_DIM_RATIO 22 // When running HPL with pure MPI without DGEMM threading (Single-threaded // BLIS), defining this macro as 1 yields better performance. #define AOCL_BLIS_MULTIINSTANCE 0 #endif blis-0.6.1/config/zen2/make_defs.mk000066400000000000000000000067621360743507500170660ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # Copyright (C) 2019, Advanced Micro Devices, Inc. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # FLAGS that are specific to the 'zen2' architecture are added here. # FLAGS that are common for all the AMD architectures are present in # config/zen/amd_config.mk. # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := zen2 #CONFIGS_INCL += $(THIS_CONFIG) # # --- Determine the C compiler and related flags --- # # Include file containing common flags for all AMD architectures. AMD_CONFIG_FILE := amd_config.mk AMD_CONFIG_PATH := $(BASE_SHARE_PATH)/config/zen -include $(AMD_CONFIG_PATH)/$(AMD_CONFIG_FILE) ifeq ($(CC_VENDOR),gcc) ifeq ($(GCC_OT_9_1_0),yes) ifeq ($(GCC_OT_6_1_0),yes) # If gcc is older than 6.1.0, we must use -march=bdver4 and then remove the # Bulldozer instruction sets that were omitted from Zen. CRVECFLAGS += -march=bdver4 -mno-fma4 -mno-tbm -mno-xop -mno-lwp CKVECFLAGS += -march=bdver4 -mno-fma4 -mno-tbm -mno-xop -mno-lwp else # If gcc is older than 9.1.0 but at least 6.1.0, then we can use -march=znver1 # as the fallback option. CRVECFLAGS += -march=znver1 -mno-avx256-split-unaligned-store CKVECFLAGS += -march=znver1 -mno-avx256-split-unaligned-store endif else # If gcc is at least 9.1.0, then we can specify the microarchitecture using # the preferred option. CRVECFLAGS += -march=znver2 CKVECFLAGS += -march=znver2 endif else ifeq ($(CC_VENDOR),clang) # I couldn't find which versions of clang added support for -march=znver1 # or -march=znver2, so we don't even bother attempting the differentiation # that appears in the gcc branch above. CRVECFLAGS += -march=znver1 CKVECFLAGS += -march=znver1 else $(error gcc or clang are required for this configuration.) endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) blis-0.6.1/config_registry000066400000000000000000000023001360743507500155740ustar00rootroot00000000000000# # config_registry # # Please refer to the BLIS wiki on configurations for information on the # syntax and semantics of this file [1]. # # [1] https://github.com/flame/blis/blob/master/docs/ConfigurationHowTo.md # # Processor families. x86_64: intel64 amd64 intel64: skx knl haswell sandybridge penryn generic amd64: zen2 zen excavator steamroller piledriver bulldozer generic # NOTE: ARM families will remain disabled until runtime hardware detection # logic is added to BLIS. #arm64: cortexa57 generic #arm32: cortexa15 cortexa9 generic # Intel architectures. skx: skx/skx/haswell/zen knl: knl/knl/haswell/zen haswell: haswell/haswell/zen sandybridge: sandybridge penryn: penryn # AMD architectures. zen2: zen2/zen2/zen/haswell zen: zen/zen/haswell excavator: excavator/piledriver steamroller: steamroller/piledriver piledriver: piledriver bulldozer: bulldozer # ARM architectures. thunderx2: thunderx2/armv8a cortexa57: cortexa57/armv8a cortexa53: cortexa53/armv8a cortexa15: cortexa15/armv7a cortexa9: cortexa9/armv7a # IBM architectures. power9: power9 bgq: bgq # Generic architectures. generic: generic blis-0.6.1/configure000077500000000000000000003375511360743507500144060ustar00rootroot00000000000000#!/usr/bin/env bash # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # # -- Helper functions ---------------------------------------------------------- # print_usage() { # Use the version string in the 'version' file since we don't have # the patched version string yet. if [ -z "${version}" ]; then version=$(cat "${version_filepath}") fi # Echo usage info. echo " " echo " ${script_name} (BLIS ${version})" #echo " " #echo " BLIS ${version}" echo " " echo " Configure BLIS's build system for compilation using a specified" echo " configuration directory." echo " " echo " Usage:" echo " " echo " ${script_name} [options] [env. vars.] confname" echo " " echo " Arguments:" echo " " echo " confname The name of the sub-directory inside of the 'config'" echo " directory containing the desired BLIS configuration." echo " Note that confname MUST be specified; if it is not," echo " configure will complain. To build a completely generic" echo " implementation, use the 'generic' configuration" echo " " echo " Options:" echo " " echo " -p PREFIX, --prefix=PREFIX" echo " " echo " The common installation prefix for all files. If given," echo " this option effectively implies:" echo " --libdir=EXECPREFIX/lib" echo " --includedir=PREFIX/include" echo " --sharedir=PREFIX/share" echo " where EXECPREFIX defaults to PREFIX. If this option is" echo " not given, PREFIX defaults to '${prefix_def}'. If PREFIX" echo " refers to a directory that does not exist, it will be" echo " created." echo " " echo " --exec-prefix=EXECPREFIX" echo " " echo " The installation prefix for libraries. Specifically, if" echo " given, this option effectively implies:" echo " --libdir=EXECPREFIX/lib" echo " If not given, EXECPREFIX defaults to PREFIX, which may be" echo " modified by the --prefix option. If EXECPREFIX refers to" echo " a directory that does not exist, it will be created." echo " " echo " --libdir=LIBDIR" echo " " echo " The path to which make will install libraries. If not" echo " given, LIBDIR defaults to PREFIX/lib. If LIBDIR refers to" echo " a directory that does not exist, it will be created." echo " " echo " --includedir=INCDIR" echo " " echo " The path to which make will install development header" echo " files. If not given, INCDIR defaults to PREFIX/include." echo " If INCDIR refers to a directory that does not exist, it" echo " will be created." echo " " echo " --sharedir=SHAREDIR" echo " " echo " The path to which make will makefile fragments containing" echo " make variables determined by configure (e.g. CC, CFLAGS," echo " and LDFLAGS). These files allow certain BLIS makefiles," echo " such as those in the examples or testsuite directories, to" echo " operate on an installed copy of BLIS rather than a local" echo " (and possibly uninstalled) copy. If not given, SHAREDIR" echo " defaults to PREFIX/share. If SHAREDIR refers to a" echo " directory that does not exist, it will be created." echo " " echo " --enable-verbose-make, --disable-verbose-make" echo " " echo " Enable (disabled by default) verbose compilation output" echo " during make." echo " " echo " --enable-arg-max-hack --disable-arg-max-hack" echo " " echo " Enable (disabled by default) build system logic that" echo " will allow archiving/linking the static/shared library" echo " even if the command plus command line arguments exceeds" echo " the operating system limit (ARG_MAX)." echo " " echo " -d DEBUG, --enable-debug[=DEBUG]" echo " " echo " Enable debugging symbols in the library. If argument" echo " DEBUG is given as 'opt', then optimization flags are" echo " kept in the framework, otherwise optimization is" echo " turned off." echo " " echo " --disable-static, --enable-static" echo " " echo " Disable (enabled by default) building BLIS as a static" echo " library. If the static library build is disabled, the" echo " shared library build must remain enabled." echo " " echo " --disable-shared, --enable-shared" echo " " echo " Disable (enabled by default) building BLIS as a shared" echo " library. If the shared library build is disabled, the" echo " static library build must remain enabled." echo " " echo " -e SYMBOLS, --export-shared[=SYMBOLS]" echo " " echo " Specify the subset of library symbols that are exported" echo " within a shared library. Valid values for SYMBOLS are:" echo " 'public' (the default) and 'all'. By default, only" echo " functions and variables that belong to public APIs are" echo " exported in shared libraries. However, the user may" echo " instead export all symbols in BLIS, even those that were" echo " intended for internal use only. Note that the public APIs" echo " encompass all functions that almost any user would ever" echo " want to call, including the BLAS/CBLAS compatibility APIs" echo " as well as the basic and expert interfaces to the typed" echo " and object APIs that are unique to BLIS. Also note that" echo " changing this option to 'all' will have no effect in some" echo " environments, such as when compiling with clang on" echo " Windows." echo " " echo " -t MODEL, --enable-threading[=MODEL], --disable-threading" echo " " echo " Enable threading in the library, using threading model" echo " MODEL={openmp,pthreads,no}. If MODEL=no or " echo " --disable-threading is specified, threading will be" echo " disabled. The default is 'no'." echo " " echo " --disable-pba-pools, --enable-pba-pools" echo " --disable-sba-pools, --enable-sba-pools" echo " " echo " Disable (enabled by default) use of internal memory pools" echo " within the packing block allocator (pba) and/or the small" echo " block allocator (sba). The former is used to allocate" echo " memory used to pack submatrices while the latter is used" echo " to allocate control/thread tree nodes and thread" echo " communicators. Both allocations take place in the context" echo " of level-3 operations. When the pba is disabled, the" echo " malloc()-like function specified by BLIS_MALLOC_POOL is" echo " called on-demand whenever a packing block is needed, and" echo " when the sba is disabled, the malloc()-like function" echo " specified by BLIS_MALLOC_INTL is called whenever a small" echo " block is needed, with the two allocators calling free()-" echo " like functions BLIS_FREE_POOL and BLIS_FREE_INTL," echo " respectively when blocks are released. When enabled," echo " either or both pools are populated via the same functions" echo " mentioned previously, and henceforth blocks are checked" echo " out and in. The library quickly reaches a state in which" echo " it no longer needs to call malloc() or free(), even" echo " across many separate level-3 operation invocations." echo " " echo " --enable-mem-tracing, --disable-mem-tracing" echo " " echo " Enable (disable by default) output to stdout that traces" echo " the allocation and freeing of memory, including the names" echo " of the functions that triggered the allocation/freeing." echo " Enabling this option WILL NEGATIVELY IMPACT PERFORMANCE." echo " Please use only for informational/debugging purposes." echo " " echo " -i SIZE, --int-size=SIZE" echo " " echo " Set the size (in bits) of internal BLIS integers and" echo " integer types used in native BLIS interfaces. The" echo " default inteter type size is architecture dependent." echo " (Hint: You can always find this value printed at the" echo " beginning of the testsuite output.)" echo " " echo " -b SIZE, --blas-int-size=SIZE" echo " " echo " Set the size (in bits) of integer types in external" echo " BLAS and CBLAS interfaces, if enabled. The default" echo " integer type size used in BLAS/CBLAS is 32 bits." echo " " echo " --disable-blas, --enable-blas" echo " " echo " Disable (enabled by default) building the BLAS" echo " compatibility layer." echo " " echo " --enable-cblas, --disable-cblas" echo " " echo " Enable (disabled by default) building the CBLAS" echo " compatibility layer. This automatically enables the" echo " BLAS compatibility layer as well." echo " " echo " --disable-mixed-dt, --enable-mixed-dt" echo " " echo " Disable (enabled by default) support for mixing the" echo " storage domain and/or storage precision of matrix" echo " operands for the gemm operation, as well as support" echo " for computing in a precision different from one or" echo " both of matrices A and B." echo " " echo " --disable-mixed-dt-extra-mem, --enable-mixed-dt-extra-mem" echo " " echo " Disable (enabled by default) support for additional" echo " mixed datatype optimizations that require temporarily" echo " allocating extra memory--specifically, a single m x n" echo " matrix (per application thread) whose storage datatype" echo " is equal to the computation datatype. This option may" echo " only be enabled when mixed domain/precision support is" echo " enabled." echo " " echo " --disable-sup-handling, --enable-sup-handling" echo " " echo " Disable (enabled by default) handling of small/skinny" echo " matrix problems via separate code branches. When disabled," echo " these small/skinny level-3 operations will be performed by" echo " the conventional implementation, which is optimized for" echo " medium and large problems. Note that what qualifies as" echo " \"small\" depends on thresholds that may vary by sub-" echo " configuration." echo " " echo " -s NAME --enable-sandbox=NAME" echo " " echo " Enable a separate sandbox implementation of gemm. This" echo " option disables BLIS's conventional gemm implementation" echo " (which shares common infrastructure with other level-3" echo " operations) and instead compiles and uses the code in" echo " the NAME directory, which is expected to be a sub-" echo " directory of 'sandbox'. By default, no sandboxes are" echo " enabled." echo " " echo " --with-memkind, --without-memkind" echo " " echo " Forcibly enable or disable the use of libmemkind's" echo " hbw_malloc() and hbw_free() as substitutes for malloc()" echo " and free(), respectively, when allocating memory for" echo " BLIS's memory pools, which are used to manage buffers" echo " into which matrices are packed. The default behavior" echo " for this option is environment-dependent; if configure" echo " detects the presence of libmemkind, libmemkind is used" echo " by default, and otherwise it is not used by default." echo " " echo " -r METHOD, --thread-part-jrir=METHOD" echo " " echo " Request a method of assigning micropanels to threads in" echo " the JR and IR loops. Valid values for METHOD are 'slab'" echo " and 'rr'. Using 'slab' assigns (as much as possible)" echo " contiguous regions of micropanels to each thread while" echo " using 'rr' assigns micropanels to threads in a round-" echo " robin fashion. The chosen method also applies during" echo " the packing of A and B. The default method is 'slab'." echo " NOTE: Specifying this option constitutes a request," echo " which may be ignored in select situations if the" echo " implementation has a good reason to do so." echo " " echo " --force-version=STRING" echo " " echo " Force configure to use an arbitrary version string" echo " STRING. This option may be useful when repackaging" echo " custom versions of BLIS by outside organizations." echo " " echo " -c, --show-config-lists" echo " " echo " Print the config and kernel lists, and kernel-to-config" echo " map after they are read from file. This can be useful" echo " when debugging certain configuration issues, and/or as" echo " a sanity check to make sure these lists are constituted" echo " as expected." echo " " echo " -q, --quiet Suppress informational output. By default, configure" echo " is verbose. (NOTE: -q is not yet implemented)" echo " " echo " -h, --help Output this information and quit." echo " " echo " Environment Variables:" echo " " echo " CC Specifies the C compiler to use." echo " CXX Specifies the C++ compiler to use (sandbox only)." echo " RANLIB Specifies the ranlib executable to use." echo " AR Specifies the archiver to use." echo " CFLAGS Specifies additional compiler flags to use (prepended)." echo " LDFLAGS Specifies additional linker flags to use (prepended)." echo " LIBPTHREAD Pthreads library to use." echo " PYTHON Specifies the python interpreter to use." echo " " echo " Environment variables may also be specified as command line" echo " options, e.g.:" echo " " echo " ./configure [options] CC=gcc haswell" echo " " echo " Note that not all compilers are compatible with a given" echo " configuration." echo " " # Exit with non-zero exit status exit 1 } query_array() { local arr key var_name arr="$1" key="$2" var_name="${arr}_${key}" echo "${!var_name}" } assign_key_value() { local arr key val arr="$1" key="$2" val="$3" printf -v "${arr}_${key}" %s "${val}" } # # FGVZ: This commented-out function is being kept as an example how how # to effectively "pass by reference" in bash. That is, pass the name of # a variable, instead of its conents, and then let the function use the # variable by prepending a $, at which time it can evaluate the string # as if it were a literal variable occurance. # #filteradd_to_list() #{ # local dlist ditem list_c item_c is_blacklisted # # # Add $1 to the list identified by $2, but only if $1 is not # # found in a blacklist. # # # Note: $2 can actually be a list of items. # dlist=\$"$1" # ditem=\$"$2" # # # Acquire the contents of $list and $item and store them in list_c # # and item_c, respectively. # list_c=$(eval "expr \"$dlist\" ") # item_c=$(eval "expr \"$ditem\" ") # # # Iterate over $item_c in case it is actually multiple items. # for cur_item in $item_c; do # # is_blacklisted=$(is_in_list "${cur_item}" "${config_blist}") # if [ ${is_blacklisted} == "false" ]; then # # # If cur_item is not blacklisted, add it to list_c. # list_c="${list_c} ${cur_item}" # fi # done # # # Update the argument. # eval "$1=\"${list_c}\"" #} pass_config_kernel_registries() { local filename passnum local all_blist local curline list item config kernels local cname clist klist # Read function arguments: # first argument: the file containing the configuration registry. # second argument: the pass number: 0 or 1. Pass 0 builds the # indirect config blacklist (indirect_blist) ONLY. Pass 1 actually # begins populating the config and kernel registries, and assumes # the indirect_blist has already been created. filename="$1" passnum="$2" # Initialize a list of indirect blacklisted configurations for the # current iteration. These are configurations that are invalidated by # the removal of blacklisted configurations. For example, if haswell # is registered as needing the 'haswell' and 'zen' kernel sets: # # haswell: haswell/haswell/zen # # and 'zen' was blacklisted because of the compiler version, then the # 'haswell' configuration must be omitted from the registry, as it no # longer has all of the kernel sets it was expecting. if [ "${passnum}" == "0" ]; then indirect_blist="" fi # For convenience, merge the original and indirect blacklists. # NOTE: During pass 0, all_blist is equal to config_blist, since # indirect_blist is still empty. all_blist="${config_blist} ${indirect_blist}" # Disable support for indirect blacklisting by returning early during # pass 0. See issue #214 for details [1]. Basically, I realized that # indirect blacklisting is not needed in the use case that I envisioned # in the real-life example above. If a subconfiguration such as haswell # is defined to require the zen kernel set, it implies that the zen # kernels can be compiled with haswell compiler flags. That is, just # because the zen subconfig (and its compiler flags) is blacklisted # does not mean that the haswell subconfig cannot compile the zen # kernels with haswell-specific flags. # # [1] https://github.com/flame/blis/issues/214 # if [ "${passnum}" == "0" ]; then return fi while read -r line do curline="${line}" # Remove everything after comment character '#'. curline=${curline%%#*} # We've stripped out leading whitespace and trailing comments. If # the line is now empty, then we can skip it altogether. if [ "x${curline}" = "x" ]; then continue; fi # Read the config name and config list for the current line. cname=${curline%%:*} list=${curline##*:} # If we encounter a slash, it means the name of the configuration # and the kernel set needed by that configuration are different. if [[ "${list}" == *[/]* ]]; then #echo "Slash found." klist="" clist="" for item in "${list}"; do # The sub-configuration name is always the first sub-word in # the slash-separated compound word. config=${item%%/*} # Delete the sub-configuration name from the front of the # string, leaving the slash-separated kernel names (or just # the kernel name, if there is only one). kernels=${list#*/} # Replace the slashes with spaces to transform the string # into a space-separated list of kernel names. kernels=$(echo -e ${kernels} | sed -e "s/\// /g") clist="${clist} ${config}" klist="${klist} ${kernels}" done else #echo "Slash not found." clist=${list} klist=${list} fi # Strip out whitespace from the config name and config/kernel list # on each line. cname=$(canonicalize_ws "${cname}") clist=$(canonicalize_ws "${clist}") klist=$(canonicalize_ws "${klist}") # Next, we prepare to: # - pass 0: inspect klist for blacklisted configurations, which may # reveal configurations as needing to be indirectly blacklisted. # - pass 1: compare cname to the blacklists and commit clist/klist # to their respective registries, as appropriate. # Handle singleton and umbrella configuration entries separately. if [ $(is_singleton_family "${cname}" "${clist}") == "true" ]; then # Singleton configurations/families. # Note: for singleton families, clist contains one item, which # always equals cname, but klist could contain more than one # item. # Only consider updating the indirect blacklist (pass 0) or # committing clist and klist to the registries (pass 1) if the # configuration name (cname) is not blacklisted. if [ $(is_in_list "${cname}" "${all_blist}") == "false" ]; then if [ "${passnum}" == "0" ]; then # Even if the cname isn't blacklisted, one of the requisite # kernels might be, so we need to check klist for blacklisted # items. If we find one, we must assume that the entire entry # must be thrown out. (Ideally, we would simply fall back to # reference code for the blacklisted kernels, but that is not # at all straightforward under the current configuration # system architecture.) Thus, we add cname to the indirect # blacklist. for item in ${klist}; do if [ $(is_in_list "${item}" "${config_blist}") == "true" ]; then indirect_blist="${indirect_blist} ${cname}" break fi done fi if [ "${passnum}" == "1" ]; then # Store the clist to the cname key of the config registry. #config_registry[${cname}]=${clist} #printf -v "config_registry_${cname}" %s "${clist}" assign_key_value "config_registry" "${cname}" "${clist}" fi fi if [ "${passnum}" == "1" ]; then # Store the klist to the cname key of the kernel registry. #kernel_registry[${cname}]=${klist} #printf -v "kernel_registry_${cname}" %s "${klist}" assign_key_value "kernel_registry" "${cname}" "${klist}" fi else # Umbrella configurations/families. # First we check cname, which should generally not be blacklisted # for umbrella families, but we check anyway just to be safe. if [ $(is_in_list "${cname}" "${all_blist}") == "false" ]; then if [ "${passnum}" == "1" ]; then # Check each item in the clist and klist. (At this point, # clist == klist.) If any sub-config is blacklisted, we # omit it from clist and klist. for item in ${clist}; do if [ $(is_in_list "${item}" "${all_blist}") == "true" ]; then clist=$(remove_from_list "${item}" "${clist}") klist=$(remove_from_list "${item}" "${klist}") fi done # Store the config and kernel lists to entries that # corresponds to the config name. #config_registry[${cname}]=${clist} #kernel_registry[${cname}]=${klist} #printf -v "config_registry_${cname}" %s "${clist}" #printf -v "kernel_registry_${cname}" %s "${klist}" assign_key_value "config_registry" "${cname}" "${clist}" assign_key_value "kernel_registry" "${cname}" "${klist}" fi fi fi done < "${filename}" if [ "${passnum}" == "0" ]; then # Assign the final indirect blacklist (with whitespace removed). indirect_blist="$(canonicalize_ws ${indirect_blist})" fi } read_registry_file() { local filename local clist klist local iterate_again config local cr_var mem mems_mem newclist local kr_var ker kers_ker newklist filename="$1" # Execute an initial pass through the config_registry file so that # we can accumulate a list of indirectly blacklisted configurations, # if any. pass_config_kernel_registries "${filename}" "0" # Now that the indirect_blist has been created, make a second pass # through the 'config_registry' file, this time creating the actual # config and kernel registry data structures. pass_config_kernel_registries "${filename}" "1" # Now we must go back through the config_registry and subsitute any # configuration families with their constituents' members. Each time # one of these substitutions occurs, we set a flag that causes us to # make one more pass. (Subsituting a singleton definition does not # prompt additional iterations.) This process stops when a full pass # does not result in any subsitution. iterate_again="1" while [ "${iterate_again}" == "1" ]; do iterate_again="0" #for config in "${!config_registry[@]}"; do for cr_var in ${!config_registry_*}; do config=${cr_var##config_registry_} clist=$(query_array "config_registry" ${config}) # The entries that define singleton families should never need # any substitution. if [ $(is_singleton_family "${config}" "${clist}") == "true" ]; then continue fi #for mem in ${config_registry[$config]}; do #for mem in ${!cr_var}; do for mem in ${clist}; do #mems_mem="${config_registry[${mem}]}" mems_mem=$(query_array "config_registry" ${mem}) # If mems_mem is empty string, then mem was not found as a key # in the config list associative array. In that case, we continue # and will echo an error later in the script. if [ "${mems_mem}" == "" ]; then #echo " config for ${mem} is empty string! no entry in config list." continue; fi if [ "${mem}" != "${mems_mem}" ]; then #clist="${config_registry[$config]}" clist=$(query_array "config_registry" ${config}) # Replace the current config with its constituent config set, # canonicalize whitespace, and then remove duplicate config # set names, if they exist. Finally, update the config registry # with the new config list. newclist=$(echo -e "${clist}" | sed -e "s/${mem}/${mems_mem}/g") newclist=$(canonicalize_ws "${newclist}") newclist=$(rm_duplicate_words "${newclist}") #config_registry[${config}]=${newclist} #printf -v "config_registry_${config}" %s "${newclist}" assign_key_value "config_registry" "${config}" "${newclist}" # Since we performed a substitution and changed the config # list, mark the iteration flag to continue another round, # but only if the config (mem) value is NOT present # in the list of sub-configs. If it is present, then further # substitution may not necessarily be needed this round. if [ $(is_in_list "${mem}" "${mems_mem}") == "false" ]; then iterate_again="1" fi fi done done done # Similar to what we just did for the config_registry, we now iterate # through the kernel_registry and substitute any configuration families # in the kernel list (right side of ':') with the members of that # family's kernel set. This process continues iteratively, as before, # until all families have been replaced with singleton configurations' # kernel sets. iterate_again="1" while [ "${iterate_again}" == "1" ]; do iterate_again="0" #for config in "${!kernel_registry[@]}"; do for kr_var in ${!kernel_registry_*}; do config=${kr_var##kernel_registry_} klist=$(query_array "kernel_registry" ${config}) # The entries that define singleton families should never need # any substitution. In the kernel registry, we know it's a # singleton entry when the cname occurs somewhere in the klist. # (This is slightly different than the same test in the config # registry, where we test that clist is one word and that # clist == cname.) if [ $(is_in_list "${config}" "${klist}") == "true" ]; then #echo "debug: '${config}' not found in '${klist}'; skipping." continue fi #for ker in ${kernel_registry[$config]}; do #for ker in ${!kr_var}; do for ker in ${klist}; do #kers_ker="${kernel_registry[${ker}]}" kers_ker=$(query_array "kernel_registry" ${ker}) # If kers_ker is empty string, then ker was not found as a key # in the kernel registry. While not common, this can happen # when ker identifies a kernel set that does not correspond to # any configuration. (Example: armv7a and armv8a kernel sets are # used by cortexa* configurations, but do not corresond to their # own configurations.) if [ "${kers_ker}" == "" ]; then #echo "debug: ${ker} not found in kernel registry." continue fi # If the current config/kernel (ker) differs from its singleton kernel # entry (kers_ker), then that singleton entry was specified to use # a different configuration's kernel set. Thus, we need to replace the # occurrence in the current config/kernel name with that of the kernel # set it needs. if [ "${ker}" != "${kers_ker}" ]; then #klisttmp="${kernel_registry[$config]}" klisttmp=$(query_array "kernel_registry" ${config}) # Replace the current config with its requisite kernels, # canonicalize whitespace, and then remove duplicate kernel # set names, if they exist. Finally, update the kernel registry # with the new kernel list. newklist=$(echo -e "${klisttmp}" | sed -e "s/${ker}/${kers_ker}/g") newklist=$(canonicalize_ws "${newklist}") newklist=$(rm_duplicate_words "${newklist}") #kernel_registry[${config}]=${newklist} #printf -v "kernel_registry_${config}" %s "${newklist}" assign_key_value "kernel_registry" "${config}" "${newklist}" # Since we performed a substitution and changed the kernel # list, mark the iteration flag to continue another round, # unless we just substituted using a singleton family # definition, in which case we don't necessarily need to # iterate further this round. if [ $(is_in_list "${ker}" "${kers_ker}") == "false" ]; then iterate_again="1" fi fi done done done } build_kconfig_registry() { local familyname clist config kernels kernel cur_configs newvalue familyname="$1" #clist="${config_registry[${familyname}]}" clist=$(query_array "config_registry" ${familyname}) for config in ${clist}; do # Look up the kernels for the current sub-configuration. #kernels="${kernel_registry[${config}]}" kernels=$(query_array "kernel_registry" ${config}) for kernel in ${kernels}; do # Add the sub-configuration to the list associated with the # kernel. # Query the current sub-configs for the current ${kernel}. #cur_configs="${kconfig_registry[${kernel}]}" cur_configs=$(query_array "kconfig_registry" ${kernel}) # Add the current sub-configuration to the list of sub-configs # we just queried. newvalue=$(canonicalize_ws "${cur_configs} ${config}") # Update the array. #kconfig_registry[${kernel}]="${newvalue}" #printf -v "kconfig_registry_${kernel}" %s "${newvalue}" assign_key_value "kconfig_registry" "${kernel}" "${newvalue}" done done } is_in_list() { local word list rval item word="$1" list="$2" rval="false" for item in ${list}; do if [ "${item}" == "${word}" ]; then rval="true" break fi done echo "${rval}" } is_singleton() { local list rval count_str item list="$1" rval="false" count_str="" for item in ${list}; do count_str="${count_str}x" done if [ "${count_str}" == "x" ]; then rval="true" fi echo "${rval}" } is_singleton_family() { local familyname memberlist rval familyname="$1" memberlist="$2" rval="false" if [ $(is_singleton "${memberlist}") ]; then if [ "${memberlist}" == "${familyname}" ]; then rval="true" fi fi echo "${rval}" } remove_from_list() { local strike_list list flist item strike_words="$1" list="$2" flist="" for item in ${list}; do # Filter out any list item that matches any of the strike words. if [ $(is_in_list "${item}" "${strike_words}") == "false" ]; then flist="${flist} ${item}" fi done flist=$(canonicalize_ws "${flist}") # Return the filtered list. echo "${flist}" } canonicalize_ws() { local str str="$1" # Remove leading and trailing whitespace. str=$(echo -e "${str}" | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//') # Remove duplicate spaces between words. str=$(echo -e "${str}" | tr -s " ") # Update the input argument. echo "${str}" } rm_duplicate_words() { local str revstr revres res str="$1" # We reverse the initial string, THEN remove duplicates, then reverse # the de-duplicated result so that only the last instance is kept after # removing duplicates (rather than keeping only the first). This is # totally unnecessary but works well for the kinds of duplicates that # show up in certain use cases of the config and kernel registries. # For example, these gymnastics allow us to keep only the last instance # of the 'generic' configuration in a configuration family that # includes it twice or more. revstr=$(echo "${str}" | awk '{ for (i=NF; i>1; i--) printf("%s ",$i); print $1; }') revres=$(echo "${revstr}" | awk '{for (i=1;i<=NF;i++) if (!a[$i]++) printf("%s%s",$i,FS)}{printf("\n")}') res=$(echo "${revres}" | awk '{ for (i=NF; i>1; i--) printf("%s ",$i); print $1; }') echo "${res}" } get_cc_search_list() { local list # For Linux, Darwin (OS X), and generic OSes, prioritize gcc. list="gcc clang cc" # For OpenBSD and FreeBSD, prioritize cc and clang over gcc. if [ "${os_name}" = "OpenBSD" ]; then list="cc clang gcc" elif [ "${os_name}" = "FreeBSD" ]; then list="cc clang gcc" fi echo "${list}" } get_cxx_search_list() { local list # For Linux, Darwin (OS X), and generic OSes, prioritize g++. list="g++ clang++ c++" # For OpenBSD and FreeBSD, prioritize cc and clang over gcc. if [ "${os_name}" = "OpenBSD" ]; then list="c++ clang++ g++" elif [ "${os_name}" = "FreeBSD" ]; then list="c++ clang++ g++" fi echo "${list}" } select_tool() { local search_list CC_env the_cc cc # This is the list of compilers/tools to search for, and the order in # which to search for them. search_list=$1 # The environment variable associated with the compiler/tool type we # are searching (e.g. CC, CXX, PYTHON). CC_env=$2 # If CC_env contains something, add it to the beginning of our default # search list. if [ -n "${CC_env}" ]; then search_list="${CC_env} ${search_list}" fi # Initialize our selected compiler/tool to empty. the_cc="" # Try each compiler/tool in the list and select the first one we find that # works. for cc in ${search_list}; do # See if the current compiler/tool works and/or is present. ${cc} --version > /dev/null 2>&1 if [ "$?" == 0 ]; then the_cc=${cc} break fi done # Return the selected compiler/tool. echo "${the_cc}" } auto_detect() { local cc cflags config_defines detected_config rval # Use the same compiler that was found earlier. cc="${found_cc}" # For debugging: reveal what compiler was chosen for auto-detection. #touch "${cc}.txt" # Tweak the flags we use based on the compiler. This is mostly just # an opportunity to turn off annoying warnings that some compilers # may throw off. if [ "${cc}" == "clang" ]; then cflags="-Wno-tautological-compare" else cflags= fi # Locate our source files. bli_arch_c="bli_arch.c" bli_cpuid_c="bli_cpuid.c" main_c="config_detect.c" bli_arch_c_filepath=$(find ${dist_path}/frame -name "${bli_arch_c}") bli_cpuid_c_filepath=$(find ${dist_path}/frame -name "${bli_cpuid_c}") main_c_filepath=$(find ${dist_path}/build -name "${main_c}") # Locate headers needed directly by the above files. bli_arch_h="bli_arch.h" bli_cpuid_h="bli_cpuid.h" bli_typed_h="bli_type_defs.h" bli_arch_h_filepath=$(find ${dist_path}/frame -name "${bli_arch_h}") bli_cpuid_h_filepath=$(find ${dist_path}/frame -name "${bli_cpuid_h}") bli_typed_h_filepath=$(find ${dist_path}/frame -name "${bli_typed_h}") bli_arch_h_path=${bli_arch_h_filepath%/${bli_arch_h}} bli_cpuid_h_path=${bli_cpuid_h_filepath%/${bli_cpuid_h}} bli_typed_h_path=${bli_typed_h_filepath%/${bli_typed_h}} # Locate other headers needed by bli_type_defs.h. bli_pthread_h="bli_pthread.h" bli_pthread_h_filepath=$(find ${dist_path}/frame -name "${bli_pthread_h}") bli_pthread_h_path=${bli_pthread_h_filepath%/${bli_pthread_h}} bli_malloc_h="bli_malloc.h" bli_malloc_h_filepath=$(find ${dist_path}/frame -name "${bli_malloc_h}") bli_malloc_h_path=${bli_malloc_h_filepath%/${bli_malloc_h}} # Define the executable name. autodetect_x="auto-detect.x" # Create #defines for all of the BLIS_CONFIG_ macros in bli_cpuid.c. config_defines=$(grep BLIS_CONFIG_ ${bli_cpuid_c_filepath} \ | sed -e 's/#ifdef /-D/g') # Set the linker flags. We need pthreads because it is needed for # parts of bli_arch.c unrelated to bli_arch_string(), which is called # by the main() function in ${main_c}. if [[ $is_win == no || "$cc_vendor" != "clang" ]]; then ldflags="${LIBPTHREAD--lpthread}" fi # Compile the auto-detect program using source code inside the # framework. # NOTE: -D_GNU_SOURCE is needed to enable POSIX extensions to # pthreads (i.e., barriers). ${cc} ${config_defines} \ -DBLIS_CONFIGURETIME_CPUID \ -I${bli_cpuid_h_path} \ -I${bli_arch_h_path} \ -I${bli_typed_h_path} \ -I${bli_pthread_h_path} \ -I${bli_malloc_h_path} \ -std=c99 -D_GNU_SOURCE \ ${cflags} \ ${bli_arch_c_filepath} \ ${bli_cpuid_c_filepath} \ ${ldflags} \ ${main_c_filepath} \ -o ${autodetect_x} # Run the auto-detect program. detected_config=$(./${autodetect_x}) # Remove the executable file. rm -f ./${autodetect_x} # Return the detected sub-configuration name. echo "${detected_config}" } has_libmemkind() { local main_c main_c_filepath LDFLAGS_mk binname rval # Path to libmemkind detection source file. main_c="libmemkind_detect.c" main_c_filepath=$(find ${dist_path}/build -name "${main_c}") # Add libmemkind to LDFLAGS. LDFLAGS_mk="${LDFLAGS} -lmemkind" # Binary executable filename. binname="libmemkind-detect.x" # Attempt to compile a simple main() program that contains a call # to hbw_malloc() and that links to libmemkind. ${found_cc} -o ${binname} ${main_c_filepath} ${LDFLAGS_mk} 2> /dev/null # Depending on the return code from the compile step above, we set # enable_memkind accordingly. if [ "$?" == 0 ]; then rval='yes' else rval='no' fi # Remove the executable generated above. rm -f ./${binname} echo "${rval}" } has_pragma_omp_simd() { local main_c main_c_filepath binname rval # Path to omp-simd detection source file. main_c="omp_simd_detect.c" main_c_filepath=$(find ${dist_path}/build -name "${main_c}") # Binary executable filename. binname="omp_simd-detect.x" # Attempt to compile a simple main() program that contains a # #pragma omp simd. ${found_cc} -std=c99 -O3 -march=native -fopenmp-simd \ -o ${binname} ${main_c_filepath} 2> /dev/null # Depending on the return code from the compile step above, we set # enable_memkind accordingly. if [ "$?" == 0 ]; then rval='yes' else rval='no' fi # Remove the executable generated above. rm -f ./${binname} echo "${rval}" } echoerr() { printf "${script_name}: error: %s\n" "$*" #>&2; } echowarn() { printf "${script_name}: warning: %s\n" "$*" #>&2; } blacklistcc_add() { # Check whether we've already blacklisted the given sub-config so # we don't output redundant messages. if [ $(is_in_list "$1" "${config_blist}") == "false" ]; then echowarn "${cc_vendor} ${cc_version} does not support '$1'; adding to blacklist." config_blist="${config_blist} $1" fi } blacklistbu_add() { # Check whether we've already blacklisted the given sub-config so # we don't output redundant messages. if [ $(is_in_list "$1" "${config_blist}") == "false" ]; then echowarn "assembler ('as' ${bu_version}) does not support '$1'; adding to blacklist." config_blist="${config_blist} $1" fi } blacklist_init() { config_blist="" } blacklist_cleanup() { # Remove duplicates and whitespace from the blacklist. config_blist=$(rm_duplicate_words "${config_blist}") config_blist=$(canonicalize_ws "${config_blist}") } echoerr_unsupportedcc() { echoerr "${script_name}: *** Unsupported compiler version: ${cc_vendor} ${cc_version}." exit 1 } echoerr_unsupportedpython() { echoerr "${script_name}: *** Unsupported python version: ${python_version}." exit 1 } get_binutils_version() { binutil=${AS:-as} # Query the full binutils version string output. This includes the # version string along with (potentially) a bunch of other textual # clutter. if [ "$(uname -s)" == "Darwin" ]; then # The default OS X assembler uses a trifecta of brain-dead # conventions: responding only to '-v', hanging indefinitely if # not given an argument, and outputing the result to stderr. # (And if you still weren't convinced, it creates an 'a.out' # by default. So yeah.) bu_string=$(${binutil} -v /dev/null -o /dev/null 2>&1) else bu_string=$(${binutil} --version 2>/dev/null) fi # Query the binutils version number. # The last part ({ read first rest ; echo $first ; }) is a workaround # to OS X's egrep only returning the first match. bu_version=$(echo "${bu_string}" | egrep -o '[0-9]+\.[0-9]+\.?[0-9]*' | { read first rest ; echo ${first} ; }) # Parse the version number into its major, minor, and revision # components. bu_major=$(echo "${bu_version}" | cut -d. -f1) bu_minor=$(echo "${bu_version}" | cut -d. -f2) bu_revision=$(echo "${bu_version}" | cut -d. -f3) echo "${script_name}: found assembler ('as') version ${bu_version} (maj: ${bu_major}, min: ${bu_minor}, rev: ${bu_revision})." } get_python_search_list() { local list # For Linux, Darwin (OS X), and generic OSes, prioritize 'python'. list="python python3 python2" echo "${list}" } get_python_version() { local python vendor_string python="${found_python}" # Query the python version. This includes the version number along # with other text, such as "Python ". # NOTE: Python seems to echo its version info to stderr, not # stdout, and thus we redirect stderr to stdout and capture that. vendor_string="$(${python} --version 2>&1)" # Drop any preceding text and save only the first numbers and what # comes after. python_version=$(echo "${vendor_string}" | sed -e "s/[a-zA-Z_ ]* \([0-9]*\..*\)/\1/g") # Parse the version number into its major, minor, and revision # components. python_major=$(echo "${python_version}" | cut -d. -f1) python_minor=$(echo "${python_version}" | cut -d. -f2) python_revision=$(echo "${python_version}" | cut -d. -f3) echo "${script_name}: found python version ${python_version} (maj: ${python_major}, min: ${python_minor}, rev: ${python_revision})." } check_python() { local python python="${found_python}" # # Python requirements # # python1: no versions supported # python2: 2.7+ # python3: 3.4+ # # NOTE: It's actually unclear whether python 3.0 through 3.3.x would work. # Python 3.5 is the oldest python3 that I have available to test with, and # I only know that 3.4 will work thanks to feedback from Dave Love. So it's # quite possible that some of those "unsupported" python3 versions are # sufficient. -FGVZ # # Python 1.x is unsupported. if [ ${python_major} -eq 1 ]; then echoerr_unsupportedpython fi # Python 2.6.x or older is unsupported. if [ ${python_major} -eq 2 ]; then if [ ${python_minor} -lt 7 ]; then echoerr_unsupportedpython fi fi # Python 3.3.x or older is unsupported. if [ ${python_major} -eq 3 ]; then if [ ${python_minor} -lt 4 ]; then echoerr_unsupportedpython fi fi echo "${script_name}: python ${python_version} appears to be supported." } get_compiler_version() { local cc vendor_string cc="${found_cc}" # Query the full vendor version string output. This includes the # version number along with (potentially) a bunch of other textual # clutter. # NOTE: This maybe should use merged stdout/stderr rather than only # stdout. But it works for now. vendor_string="$(${cc} --version 2>/dev/null)" # Query the compiler "vendor" (ie: the compiler's simple name) and # isolate the version number. # The last part ({ read first rest ; echo $first ; }) is a workaround # to OS X's egrep only returning the first match. cc_vendor=$(echo "${vendor_string}" | egrep -o 'icc|gcc|clang|emcc|pnacl|IBM' | { read first rest ; echo $first ; }) if [ "${cc_vendor}" = "icc" -o \ "${cc_vendor}" = "gcc" ]; then cc_version=$(${cc} -dumpversion) else cc_version=$(echo "${vendor_string}" | egrep -o '[0-9]+\.[0-9]+\.?[0-9]*' | { read first rest ; echo ${first} ; }) fi # Parse the version number into its major, minor, and revision # components. cc_major=$(echo "${cc_version}" | cut -d. -f1) cc_minor=$(echo "${cc_version}" | cut -d. -f2) cc_revision=$(echo "${cc_version}" | cut -d. -f3) # gcc 7 introduced new behavior to -dumpversion whereby only the major # version component is output. However, as part of this change, gcc 7 # also introduced a new option, -dumpfullversion, which is guaranteed to # always output the major, minor, and revision numbers. Thus, if we're # using gcc and its version is 7 or later, we re-query and re-parse the # version string. if [ "${cc_vendor}" = "gcc" -a ${cc_major} -ge 7 ]; then # Re-query the version number using -dumpfullversion. cc_version=$(${cc} -dumpfullversion) # And parse the result. cc_major=$(echo "${cc_version}" | cut -d. -f1) cc_minor=$(echo "${cc_version}" | cut -d. -f2) cc_revision=$(echo "${cc_version}" | cut -d. -f3) fi echo "${script_name}: found ${cc_vendor} version ${cc_version} (maj: ${cc_major}, min: ${cc_minor}, rev: ${cc_revision})." } check_compiler() { local cc cc="${found_cc}" # # Compiler requirements # # General: # # icc 15+, gcc 4.7+, clang 3.3+ # # Specific: # # skx: icc 15.0.1+, gcc 6.0+, clang 3.9+ # knl: icc 14.0.1+, gcc 5.0+, clang 3.9+ # haswell: any # sandybridge: any # penryn: any # # zen: gcc 6.0+[1], clang 4.0+ # excavator: gcc 4.9+, clang 3.5+ # steamroller: any # piledriver: any # bulldozer: any # # cortexa57: any # cortexa15: any # cortexa9: any # # generic: any # # Note: These compiler requirements were originally modeled after similar # requirements encoded into TBLIS's configure.ac [2]. # # [1] While gcc 6.0 or newer is needed for zen support (-march=znver1), # we relax this compiler version constraint a bit by targeting bdver4 # and then disabling the instruction sets that were removed in the # transition from bdver4 to znver1. (See config/zen/make_defs.mk for # the specific compiler flags used.) # [2] https://github.com/devinamatthews/tblis/ # echo "${script_name}: checking for blacklisted configurations due to ${cc} ${cc_version}." # gcc if [ "x${cc_vendor}" = "xgcc" ]; then if [ ${cc_major} -lt 4 ]; then echoerr_unsupportedcc fi if [ ${cc_major} -eq 4 ]; then blacklistcc_add "knl" if [ ${cc_minor} -lt 7 ]; then echoerr_unsupportedcc fi if [ ${cc_minor} -lt 9 ]; then blacklistcc_add "excavator" blacklistcc_add "zen" fi fi if [ ${cc_major} -lt 5 ]; then blacklistcc_add "knl" fi if [ ${cc_major} -lt 6 ]; then # Normally, zen would be blacklisted for gcc prior to 6.0. # However, we have a workaround in place in the zen # configuration's make_defs.mk file that starts with bdver4 # and disables the instructions that were removed in znver1. # Thus, this "blacklistcc_add" statement has been moved above. #blacklistcc_add "zen" blacklistcc_add "skx" # gcc 5.x may support POWER9 but it is unverified. blacklistcc_add "power9" fi fi # icc if [ "x${cc_vendor}" = "xicc" ]; then if [ ${cc_major} -lt 15 ]; then echoerr_unsupportedcc fi if [ ${cc_major} -eq 15 ]; then if [ ${cc_revision} -lt 1 ]; then blacklistcc_add "skx" fi fi if [ ${cc_major} -eq 18 ]; then echo "${script_name}: ${cc} ${cc_version} is known to cause erroneous results. See https://github.com/flame/blis/issues/371 for details." blacklistcc_add "knl" blacklistcc_add "skx" fi if [ ${cc_major} -ge 19 ]; then echo "${script_name}: ${cc} ${cc_version} is known to cause erroneous results. See https://github.com/flame/blis/issues/371 for details." echoerr_unsupportedcc fi fi # clang if [ "x${cc_vendor}" = "xclang" ]; then if [ "$(echo ${vendor_string} | grep -o Apple)" = "Apple" ]; then if [ ${cc_major} -lt 5 ]; then echoerr_unsupportedcc fi # See https://en.wikipedia.org/wiki/Xcode#Toolchain_versions if [ ${cc_major} -eq 5 ]; then # Apple clang 5.0 is clang 3.4svn blacklistcc_add "excavator" blacklistcc_add "zen" fi if [ ${cc_major} -lt 7 ]; then blacklistcc_add "knl" blacklistcc_add "skx" fi else if [ ${cc_major} -lt 3 ]; then echoerr_unsupportedcc fi if [ ${cc_major} -eq 3 ]; then if [ ${cc_minor} -lt 3 ]; then echoerr_unsupportedcc fi if [ ${cc_minor} -lt 5 ]; then blacklistcc_add "excavator" blacklistcc_add "zen" fi if [ ${cc_minor} -lt 9 ]; then blacklistcc_add "knl" blacklistcc_add "skx" fi fi if [ ${cc_major} -lt 4 ]; then # See comment above regarding zen support. #blacklistcc_add "zen" : # explicit no-op since bash can't handle empty loop bodies. fi fi fi } check_compiler_version_ranges() { local cc cc="${found_cc}" # # We check for various compiler version ranges that may cause us # issues in properly supporting those compiler versions within the # BLIS build system. # # range: gcc < 4.9.0 (ie: 4.8.5 or older) # variable: gcc_older_than_4_9_0 # comments: # These older versions of gcc may support microarchitectures such as # sandybridge, but the '-march=' flag uses a different label syntax. # In newer versions, '-march=sandybridge' is the preferred syntax [1]. # However, in older versions, the syntax for the same compiler option # is '-march=corei7-avx' [2]. # # [1] https://gcc.gnu.org/onlinedocs/gcc-4.9.0/gcc/i386-and-x86-64-Options.html#i386-and-x86-64-Options # [2] https://gcc.gnu.org/onlinedocs/gcc-4.8.5/gcc/i386-and-x86-64-Options.html#i386-and-x86-64-Options # # range: gcc < 6.1 (ie: 5.5 or older) # variable: gcc_older_than_6_1_0 # comments: # These older versions of gcc do not explicitly support the Zen (Zen1) # microarchitecture; the newest microarchitectural value understood by # these versions is '-march=bdver4' [3]. However, basic support for these # older versions can be attained in a roundabout way by starting with the # instruction sets enabled by '-march=bdver4' and then disabling the # instruction sets that were removed in the transition from Excavator to # Zen, namely: FMA4, TBM, XOP, and LWP. Newer versions of gcc support Zen # via the '-march=znver1' option [4]. # # [3] https://gcc.gnu.org/onlinedocs/gcc-5.5.0/gcc/x86-Options.html#x86-Options # [4] https://gcc.gnu.org/onlinedocs/gcc-6.1.0/gcc/x86-Options.html#x86-Options # # range: gcc < 9.1 (ie: 8.3 or older) # variable: gcc_older_than_9_1_0 # comments: # These older versions of gcc do not explicitly support the Zen2 # microarchitecture; the newest microarchitectural value understood by # these versions is either '-march=znver1' (if !gcc_older_than_6_1_0) [5] # or '-march=bdver4' (if gcc_older_than_6_1_0) [3]. If gcc is 6.1 or # newer, '-march=znver1' may be used (since the instruction sets it # enables are a subset of those enabled by '-march=znver2'); otherwise, # '-march=bdver4' must be used in conjuction with disabling the # instruction sets that were removed in the transition from Excavator to # Zen, as described in the section above for gcc_older_than_6_1_0. # Newer versions of gcc support Zen2 via the '-march=znver2' option [6]. # # [5] https://gcc.gnu.org/onlinedocs/gcc-8.3.0/gcc/x86-Options.html#x86-Options # [6] https://gcc.gnu.org/onlinedocs/gcc-9.1.0/gcc/x86-Options.html#x86-Options # gcc_older_than_4_9_0='no' gcc_older_than_6_1_0='no' gcc_older_than_9_1_0='no' echo "${script_name}: checking ${cc} ${cc_version} against known consequential version ranges." # gcc if [ "x${cc_vendor}" = "xgcc" ]; then # Check for gcc < 4.9.0 (ie: 4.8.5 or older). if [ ${cc_major} -eq 4 ]; then if [ ${cc_minor} -lt 9 ]; then echo "${script_name}: note: found ${cc} version older than 4.9.0." gcc_older_than_4_9_0='yes' fi fi # Check for gcc < 6.1.0 (ie: 5.5 or older). if [ ${cc_major} -lt 6 ]; then echo "${script_name}: note: found ${cc} version older than 6.1." gcc_older_than_6_1_0='yes' fi # Check for gcc < 9.1.0 (ie: 8.3 or older). if [ ${cc_major} -lt 9 ]; then echo "${script_name}: note: found ${cc} version older than 9.1." gcc_older_than_9_1_0='yes' fi fi # icc if [ "x${cc_vendor}" = "xicc" ]; then : fi # clang if [ "x${cc_vendor}" = "xclang" ]; then : fi } check_assembler() { local cc asm_dir cflags asm_fp cc="${found_cc}" # The directory where the assembly files will be. asm_dir="${dist_path}/build" # Most of the time, we won't need any additional compiler flags. cflags="" echo "${script_name}: checking for blacklisted configurations due to as ${bu_version}." # # Check support for FMA4 (amd: bulldozer). # asm_fp=$(find ${asm_dir} -name "fma4.s") knows_fma4=$(try_assemble "${cc}" "${cflags}" "${asm_fp}") if [ "x${knows_fma4}" == "xno" ]; then blacklistbu_add "bulldozer" fi # # Check support for AVX (intel: sandybridge+, amd: piledriver+). # asm_fp=$(find ${asm_dir} -name "avx.s") knows_avx=$(try_assemble "${cc}" "${cflags}" "${asm_fp}") if [ "x${knows_avx}" == "xno" ]; then blacklistbu_add "sandybridge" fi # # Check support for FMA3 (intel: haswell+, amd: piledriver+). # asm_fp=$(find ${asm_dir} -name "fma3.s") knows_fma3=$(try_assemble "${cc}" "${cflags}" "${asm_fp}") if [ "x${knows_fma3}" == "xno" ]; then blacklistbu_add "haswell" blacklistbu_add "piledriver" blacklistbu_add "steamroller" blacklistbu_add "excavator" blacklistbu_add "skx" fi # # Check support for AVX-512f (knl, skx). # # The assembler on OS X won't recognize AVX-512 without help. if [ "${cc_vendor}" == "clang" ]; then cflags="-march=knl" fi asm_fp=$(find ${asm_dir} -name "avx512f.s") knows_avx512f=$(try_assemble "${cc}" "${cflags}" "${asm_fp}") if [ "x${knows_avx512f}" == "xno" ]; then blacklistbu_add "knl" blacklistbu_add "skx" fi # # Check support for AVX-512dq (skx). # # The assembler on OS X won't recognize AVX-512 without help. if [ "${cc_vendor}" == "clang" ]; then cflags="-march=skylake-avx512" fi asm_fp=$(find ${asm_dir} -name "avx512dq.s") knows_avx512dq=$(try_assemble "${cc}" "${cflags}" "${asm_fp}") if [ "x${knows_avx512dq}" == "xno" ]; then blacklistbu_add "skx" fi } try_assemble() { local cc cflags asm_src asm_base asm_bin rval cc="$1" cflags="$2" asm_src="$3" # Construct the filename to the .o file corresponding to asm_src. # (Strip the filepath, then the file extension, and then add ".o".) asm_base=${asm_src##*/} asm_base=${asm_base%.*} asm_bin="${asm_base}.o" # Try to assemble the file. ${cc} ${cflags} -c ${asm_src} -o ${asm_bin} > /dev/null 2>&1 if [ "$?" == 0 ]; then rval='yes' else rval='no' fi # Remove the object file. rm -f "${asm_bin}" # Return the result. echo "${rval}" } set_default_version() { local gitdir version_file gd_stderr git_describe_str git_error new_version_str gitdir='.git' # The path to the version file. version_file=$1 echo "${script_name}: determining default version string." # Check if the .git dir exists; if it does not, we do nothing. if [ -d "${dist_path}/${gitdir}" ]; then echo "${script_name}: found '${gitdir}' directory; assuming git clone." echo "${script_name}: executing: git describe --tags." gd_stderr="git_describe_stderr.txt" # Query git for the version string, which is simply the current tag, # followed by a number signifying how many commits have transpired # since the tag, followed by a 'g' and a shortened hash tab. Capture # stderr to a file. git_describe_str=$(git -C ${dist_path} describe --tags 2> ${gd_stderr}) # Pull in whatever error message was generated, if any, and delete # the file. git_error=$(cat ${gd_stderr}) # Remove the stderr file. rm -f ${gd_stderr} # If git returned an error, don't do anything. if [ -n "${git_error}" ]; then echo "${script_name}: git returned an error: '${git_error}'." echo "${script_name}: using string from unmodified version file." # Use what's in the version file as-is. version=$(cat "${version_file}") else echo "${script_name}: got back ${git_describe_str}." # Strip off the commit hash label. new_version_str=$(echo ${git_describe_str} | cut -d- -f-2) echo "${script_name}: truncating to ${new_version_str}." # Write the new version string to the version file. #echo "${new_version_str}" > ${version_file} # Set the version variable. version="${new_version_str}" fi else echo "${script_name}: could not find '${gitdir}' directory; using unmodified version file." # Use what's in the version file as-is. version=$(cat "${version_file}") fi } # # -- main function ------------------------------------------------------------- # main() { #declare -A config_registry #declare -A kernel_registry #declare -A kconfig_registry # -- Basic names and paths -- # The name of the script, stripped of any preceeding path. script_name=${0##*/} # The path to the script. We need this to find the top-level directory # of the source distribution in the event that the user has chosen to # build elsewhere. dist_path=${0%/${script_name}} # The path to the directory in which we are building. We do this to # make explicit that we distinguish between the top-level directory # of the distribution and the directory in which we are building. cur_dirpath="." # The file in which the version string is kept. version_file="version" version_filepath="${dist_path}/${version_file}" # The name of and path to the directory named "build" in the top-level # directory of the source distribution. build_dir='build' build_dirpath="${dist_path}/${build_dir}" # The name/path to the registry (master list) of supported configurations. registry_file="config_registry" registry_filepath=${dist_path}/${registry_file} # The names/paths for the template config.mk.in and its instantiated # counterpart. config_mk_in='config.mk.in' config_mk_out='config.mk' config_mk_in_path="${build_dirpath}/${config_mk_in}" config_mk_out_path="${cur_dirpath}/${config_mk_out}" # The names/paths for the template bli_config.h.in and its instantiated # counterpart. bli_config_h_in='bli_config.h.in' bli_config_h_out='bli_config.h' bli_config_h_in_path="${build_dirpath}/${bli_config_h_in}" bli_config_h_out_path="${cur_dirpath}/${bli_config_h_out}" # Path to 'mirror-tree.sh' script. mirror_tree_sh="${build_dirpath}/mirror-tree.sh" # Path to 'gen-make-frags.sh' script and directory. gen_make_frags_dirpath="${build_dirpath}/gen-make-frags" gen_make_frags_sh="${gen_make_frags_dirpath}/gen-make-frag.sh" # The name of the (top-level) configuration directory. config_dir='config' config_dirpath="${dist_path}/${config_dir}" # The name of the (top-level) kernels directory. kernels_dir='kernels' kernels_dirpath="${dist_path}/${kernels_dir}" # The name of the (top-level) reference kernels directory. refkern_dir='ref_kernels' refkern_dirpath="${dist_path}/${refkern_dir}" # The root directory of the BLIS framework. frame_dir='frame' frame_dirpath="${dist_path}/${frame_dir}" # The name of the sandbox directory. sandbox_dir='sandbox' sandbox_dirpath="${dist_path}/${sandbox_dir}" # The name of the directory in which object files will be kept. obj_dir='obj' obj_dirpath="${cur_dirpath}/${obj_dir}" # The name of the directory in which libraries will be kept. lib_dir='lib' lib_dirpath="${cur_dirpath}/${lib_dir}" # The name of the directory in which headers will be kept. include_dir='include' include_dirpath="${cur_dirpath}/${include_dir}" # The name of the directory in which the BLAS test suite is kept. blastest_dir='blastest' # The name of the directory in which the BLIS test suite is kept. testsuite_dir='testsuite' # -- Version-related -- # The shared library (.so) version file. so_version_file='so_version' so_version_filepath="${dist_path}/${so_version_file}" # The major and minor/build .so version numbers. so_version_major='' so_version_minorbuild='' # -- configure options -- # Define the default prefix so that the print_usage() function can # output it in the --help text. prefix_def='/usr/local' # The installation prefix, assigned its default value, and a flag to # track whether or not it was given by the user. prefix=${prefix_def} prefix_flag='' # The installation exec_prefix, assigned its default value, and a flag to # track whether or not it was given by the user. exec_prefix='${prefix}' exec_prefix_flag='' # The installation libdir, assigned its default value, and a flag to # track whether or not it was given by the user. libdir='${exec_prefix}/lib' libdir_flag='' # The installation includedir, assigned its default value, and a flag to # track whether or not it was given by the user. includedir='${prefix}/include' includedir_flag='' # The installation sharedir, assigned its default value, and a flag to # track whether or not it was given by the user. sharedir='${prefix}/share' sharedir_flag='' # The preset value of CFLAGS and LDFLAGS (ie: compiler and linker flags # to use in addition to those determined by the build system). cflags_preset='' ldflags_preset='' # The user-given debug type and a flag indicating it was given. debug_type='' debug_flag='' # The threading flag. threading_model='off' # The method of assigning micropanels to threads in the JR and JR loops. thread_part_jrir='slab' # Option variables. quiet_flag='' show_config_list='' # Additional flags. enable_verbose='no' enable_arg_max_hack='no' enable_static='yes' enable_shared='yes' export_shared='public' enable_pba_pools='yes' enable_sba_pools='yes' enable_mem_tracing='no' int_type_size=0 blas_int_type_size=32 enable_blas='yes' enable_cblas='no' enable_mixed_dt='yes' enable_mixed_dt_extra_mem='yes' enable_sup_handling='yes' enable_memkind='' # The default memkind value is determined later on. force_version='no' # The sandbox flag and name. sandbox_flag='' sandbox='' # -- Configuration registry -- # The name of the chosen configuration (the configuration "family"). config_name='' # The list of sub-configurations associated with config_name. config_list='' # The list of kernel sets that will be needed by the sub-configurations # in config_list.. kernel_list='' # The list of kernel:sub-configuration pairs for all kernels contained # in kernel_list. kconfig_map='' # -- Out-of-tree -- # Whether we are building out-of-tree. configured_oot="no" # Dummy file. Used to check whether the cwd is the same as the top-level # source distribution directory. dummy_file='_blis_dir_detect.tmp' # -- Command line option/argument parsing ---------------------------------- found=true while $found = true; do # Process our command line options. unset OPTIND while getopts ":hp:d:e:s:t:r:qci:b:-:" opt; do case $opt in -) case "$OPTARG" in help) print_usage ;; quiet) quiet_flag=1 ;; prefix=*) prefix_flag=1 prefix=${OPTARG#*=} ;; exec-prefix=*) exec_prefix_flag=1 exec_prefix=${OPTARG#*=} ;; libdir=*) libdir_flag=1 libdir=${OPTARG#*=} ;; includedir=*) includedir_flag=1 includedir=${OPTARG#*=} ;; sharedir=*) sharedir_flag=1 sharedir=${OPTARG#*=} ;; enable-debug) debug_flag=1 debug_type=noopt ;; enable-debug=*) debug_flag=1 debug_type=${OPTARG#*=} ;; disable-debug) debug_flag=0 ;; enable-verbose-make) enable_verbose='yes' ;; disable-verbose-make) enable_verbose='no' ;; enable-arg-max-hack) enable_arg_max_hack='yes' ;; disable-arg-max-hack) enable_arg_max_hack='no' ;; enable-static) enable_static='yes' ;; disable-static) enable_static='no' ;; enable-shared) enable_shared='yes' ;; disable-shared) enable_shared='no' ;; export-shared=*) export_shared=${OPTARG#*=} ;; enable-threading=*) threading_model=${OPTARG#*=} ;; disable-threading) threading_model='off' ;; thread-part-jrir=*) thread_part_jrir=${OPTARG#*=} ;; enable-pba-pools) enable_pba_pools='yes' ;; disable-pba-pools) enable_pba_pools='no' ;; enable-sba-pools) enable_sba_pools='yes' ;; disable-sba-pools) enable_sba_pools='no' ;; enable-mem-tracing) enable_mem_tracing='yes' ;; disable-mem-tracing) enable_mem_tracing='no' ;; enable-sandbox=*) sandbox_flag=1 sandbox=${OPTARG#*=} ;; disable-sandbox) sandbox_flag=0 ;; int-size=*) int_type_size=${OPTARG#*=} ;; blas-int-size=*) blas_int_type_size=${OPTARG#*=} ;; enable-blas) enable_blas='yes' ;; disable-blas) enable_blas='no' ;; enable-cblas) enable_cblas='yes' ;; disable-cblas) enable_cblas='no' ;; enable-mixed-dt) enable_mixed_dt='yes' ;; disable-mixed-dt) enable_mixed_dt='no' ;; enable-mixed-dt-extra-mem) enable_mixed_dt_extra_mem='yes' ;; disable-mixed-dt-extra-mem) enable_mixed_dt_extra_mem='no' ;; enable-sup-handling) enable_sup_handling='yes' ;; disable-sup-handling) enable_sup_handling='no' ;; with-memkind) enable_memkind='yes' ;; without-memkind) enable_memkind='no' ;; force-version=*) force_version=${OPTARG#*=} ;; show-config-list) show_config_list=1 ;; *) print_usage ;; esac;; h) print_usage ;; p) prefix_flag=1 prefix=$OPTARG ;; d) debug_flag=1 debug_type=$OPTARG ;; e) export_shared=$OPTARG ;; s) sandbox_flag=1 sandbox=$OPTARG ;; q) quiet_flag=1 ;; t) threading_model=$OPTARG ;; r) thread_part_jrir=$OPTARG ;; i) int_type_size=$OPTARG ;; b) blas_int_type_size=$OPTARG ;; c) show_config_list=1 ;; \?) print_usage ;; esac done shift $(($OPTIND - 1)) # Parse environment variables found=false while [ $# -gt 0 ]; do case $1 in *=*) var=`expr "$1" : '\([^=]*\)='` value=`expr "$1" : '[^=]*=\(.*\)'` eval $var=\$value export $var shift found=true ;; *) break ;; esac done done # -- Check the operating system -------------------------------------------- os_name=$(uname -s) os_vers=$(uname -r) echo "${script_name}: detected ${os_name} kernel version ${os_vers}." # Define a single variable off of which we can branch to tell if we are # building for Windows. is_win=no if [[ $os_name == MSYS* ]] || \ [[ $os_name == MINGW* ]] || \ [[ $os_name == CYGWIN* ]] ; then is_win=yes fi # -- Find a python interpreter --------------------------------------------- # Acquire the python search order. This may vary based on the os found # above. python_search_list=$(get_python_search_list) echo "${script_name}: python interpeter search list is: ${python_search_list}." # Find a working python interpreter. found_python=$(select_tool "${python_search_list}" "${PYTHON}") # If we didn't find any working python interpreters, we print an error # message. if [ -z "${found_python}" ]; then echo "${script_name}: *** Could not find working python interperter! Cannot continue." exit 1 fi echo "${script_name}: using '${found_python}' python interpreter." # -- Check the python version ---------------------------------------------- # Check the python interpreter's version. get_python_version check_python # -- Find a C compiler ----------------------------------------------------- # Acquire the compiler search order. This will vary based on the os found # above. cc_search_list=$(get_cc_search_list) echo "${script_name}: C compiler search list is: ${cc_search_list}." # Find a working C compiler. found_cc=$(select_tool "${cc_search_list}" "${CC}") # If we didn't find any working C compilers, we print an error message. if [ -z "${found_cc}" ]; then echo "${script_name}: *** Could not find working C compiler! Cannot continue." exit 1 fi echo "${script_name}: using '${found_cc}' C compiler." # -- Find a C++ compiler --------------------------------------------------- # Acquire the compiler search order. This will vary based on the os # found above. cxx_search_list=$(get_cxx_search_list) echo "${script_name}: C++ compiler search list is: ${cxx_search_list}." # Find a working C++ compiler. NOTE: We can reuse the select_tool() # function since it is written in a way that is general-purpose. found_cxx=$(select_tool "${cxx_search_list}" "${CXX}") # If we didn't find any working C++ compilers, we print an error message. if [ -z "${found_cxx}" ]; then echo "${script_name}: Could not find working C++ compiler! C++ will not be available in sandbox." found_cxx="c++notfound" fi echo "${script_name}: using '${found_cxx}' C++ compiler (for sandbox only)." # -- Check the compiler version -------------------------------------------- # Initialize the blacklist to empty. blacklist_init # Check the compiler's version. Certain versions of certain compilers # will preclude building certain sub-configurations, which are added # to a blacklist. We also make note of certain version ranges that # will be useful to know about later. get_compiler_version check_compiler check_compiler_version_ranges # Now check the assembler's ability to assemble code. Older versions # of binutils may not be aware of certain instruction sets. Those # sub-configurations employing kernels that use such instruction sets # will also be blacklisted. get_binutils_version check_assembler # Remove duplicates and whitespace from the blacklist. blacklist_cleanup if [ -n "${config_blist}" ]; then echo "${script_name}: configuration blacklist:" echo "${script_name}: ${config_blist}" fi # -- Read the configuration registry --------------------------------------- # Make sure the config registry file exists and can be opened. if [ ! -f "${registry_filepath}" ]; then echo "${script_name}: could not open '${registry_file}' file; cannot continue." echo "${script_name}: BLIS distribution appears to be incomplete." echo "${script_name}: *** Please verify source distribution." exit 1 fi # Read the registered configuration names and lists into associative # arrays. echo -n "${script_name}: reading configuration registry..." read_registry_file ${registry_filepath} echo "done." # Report if additional configurations needed to be blacklisted. # NOTE: This branch should never execute so long as indirect blacklisting # is disabled. See comment regarding issue #214 in the definition of # pass_config_kernel_registries(). if [ -n "${indirect_blist}" ]; then echo "${script_name}: needed to indirectly blacklist additional configurations:" echo "${script_name}: ${indirect_blist}" fi # -- Acquire the BLIS version ---------------------------------------------- # Set the 'version' variable to the default value (the 'git describe' # augmented instance of whatever is in the 'version' file if this is a git # clone, or whatever is in the 'version' file unmodified if it is a bare # source release). set_default_version "${version_filepath}" # Initial message. echo "${script_name}: starting configuration of BLIS ${version}." # Check if the user requested a custom version string. if [ "x${force_version}" = "xno" ]; then echo "${script_name}: configuring with official version string." else echo "${script_name}: configuring with custom version string '${force_version}'." version="${force_version}" fi # -- Acquire the shared library (.so) versions ----------------------------- # The first line of the 'so_version' file contains the .so major version. so_version_major=$(cat ${so_version_filepath} | sed -n "1p") # The second line contains the minor and build .so version numbers # (separated by a '.'). so_version_minorbuild=$(cat ${so_version_filepath} | sed -n "2p") echo "${script_name}: found shared library .so version '${so_version_major}.${so_version_minorbuild}'." echo "${script_name}: .so major version: ${so_version_major}" echo "${script_name}: .so minor.build version: ${so_version_minorbuild}" # -- Various pre-configuration checks -------------------------------------- # Set config_name based on the number of arguments leftover (after command # line option processing). if [ $# = "0" ]; then #configs_avail="auto "$(ls ${config_dirpath}) echo "${script_name}: " echo "${script_name}: *** No configuration given! ***" echo "${script_name}: " echo "${script_name}: Default configuration behavior is not implemented (for your" echo "${script_name}: own safety). Please re-run '${script_name}' and specify one" echo "${script_name}: of the existing configurations in the source distribution's" echo "${script_name} '${registry_file}' file:" echo "${script_name}: " #for k in "${!config_registry[@]}"; do for cr_var in ${!config_registry_*}; do #v=${config_registry[$k]} k=${cr_var##config_registry_}; v=${!cr_var} echo "${script_name}: $k (${v})" done echo "${script_name}: " exit 1 elif [ $# != "1" ]; then # more than one configuration argument given. print_usage fi if [ $1 = "auto" ]; then echo "${script_name}: automatic configuration requested." # Call the auto_detect() function and save the returned string in # config_name. config_name=$(auto_detect) echo "${script_name}: hardware detection driver returned '${config_name}'." else # Use the command line argument as the configuration name. config_name=$1 echo "${script_name}: manual configuration requested; configuring with '${config_name}'." fi # Use the selected config name to look up the list of configurations # and kernels associated with that name. #config_list=${config_registry[${config_name}]} #kernel_list=${kernel_registry[${config_name}]} config_list=$(query_array "config_registry" ${config_name}) kernel_list=$(query_array "kernel_registry" ${config_name}) # Use the config_registry and kernel_registry to build a kconfig_registry # for the selected config_name. build_kconfig_registry "${config_name}" # Print the configuration list and kernel list, if requested. if [ "${show_config_list}" == "1" ]; then echo "${script_name}: configuration list:" #for k in "${!config_registry[@]}"; do for cr_var in ${!config_registry_*}; do #v=${config_registry[$k]} k=${cr_var##config_registry_}; v=${!cr_var} echo "${script_name}: $k: ${v}" done echo "${script_name}: kernel list:" #for k in "${!kernel_registry[@]}"; do for kr_var in ${!kernel_registry_*}; do #v=${kernel_registry[$k]} k=${kr_var##kernel_registry_}; v=${!kr_var} echo "${script_name}: $k: ${v}" done echo "${script_name}: kernel-to-config map for '${config_name}':" #for k in "${!kconfig_registry[@]}"; do for kc_var in ${!kconfig_registry_*}; do #v=${kconfig_registry[$k]} k=${kc_var##kconfig_registry_}; v=${!kc_var} echo "${script_name}: $k: ${v}" done fi # For each kernel in the kernel list, reduce the list of associated # sub-configurations (in the kconfig_registry) to a singleton using # the following rules: # 1. If the list is a singleton, use that name. # 2. If the list contains a sub-configuration name that matches the # kernel name, use that name. # 3. Otherwise, use the first name in the list. # We use the chosen singleton to ceate a "kernel:subconfig" pair, which # we accumulate into a list. This list is the kernel-to-config map, or # kconfig_map. # We use a sorted version of kernel_list so that it ends up matching the # display order of the kconfig_registry above. kernel_list_sort=$(echo ${kernel_list} | xargs -n1 | sort -u) kconfig_map="" for kernel in ${kernel_list_sort}; do #configs="${kconfig_registry[$kernel]}" configs=$(query_array "kconfig_registry" ${kernel}) has_one_kernel=$(is_singleton "${configs}") contains_kernel=$(is_in_list "${kernel}" "${configs}") # Check if the list is a singleton. if [ "${has_one_kernel}" == "true" ]; then reducedclist="${configs}" # Check if the list contains a sub-config name that matches the kernel. elif [ "${contains_kernel}" == "true" ]; then reducedclist="${kernel}" # Otherwise, use the first name. else first_config=${configs%% *} reducedclist="${first_config}" fi # Create a new "kernel:subconfig" pair and add it to the kconfig_map # list, removing whitespace. new_pair="${kernel}:${reducedclist}" kconfig_map=$(canonicalize_ws "${kconfig_map} ${new_pair}") done if [ "${show_config_list}" == "1" ]; then echo "${script_name}: kernel-to-config map for '${config_name}' (chosen pairs):" for k in ${kconfig_map}; do echo "${script_name}: $k" done fi echo "${script_name}: checking configuration against contents of '${registry_file}'." # First, ensure that the config name is registered (ie: it is present # in the config_registry file). if [ -z "${config_list}" ]; then # NOTE: This branch should never execute when using auto-detection, # but we have it here just in case. if [ $1 = "auto" ]; then echo "${script_name}: 'auto-detected configuration '${config_name}' is NOT registered!" echo "${script_name}: " echo "${script_name}: *** Cannot continue with unregistered configuration '${config_name}'. ***" echo "${script_name}: " exit 1; else # At this point, we know: (a) config_list is empty; and (b) the user # requested manual configuration. If the config_name given by the # user is present in the configuration blacklist (config_blist), # then we can deduce why the config_list is empty: because the only # subconfig implied by config_name is blacklisted. Thus, we cannot # proceed. if [ $(is_in_list "${config_name}" "${config_blist}") == "true" ]; then echo "${script_name}: 'user-specified configuration '${config_name}' is blacklisted!" echo "${script_name}: " echo "${script_name}: *** Cannot continue with blacklisted configuration '${config_name}'. ***" echo "${script_name}: *** Try updating your compiler and/or assembler (binutils) versions. ***" echo "${script_name}: " exit 1; else # If config_name is NOT present in config_blist, then we know # that config_list is empty simply because config_name is # unregistered. echo "${script_name}: 'user-specified configuration '${config_name}' is NOT registered!" echo "${script_name}: " echo "${script_name}: *** Cannot continue with unregistered configuration '${config_name}'. ***" echo "${script_name}: " exit 1; fi fi else # This branch executes when the configuration is found to be present # (i.e. registered) in the config_registry file. echo "${script_name}: configuration '${config_name}' is registered." echo "${script_name}: '${config_name}' is defined as having the following sub-configurations:" echo "${script_name}: ${config_list}" echo "${script_name}: which collectively require the following kernels:" echo "${script_name}: ${kernel_list}" fi echo "${script_name}: checking sub-configurations:" # Now, verify that the constituent configurations associated with the # config name are all valid. for conf in ${config_list}; do # First confirm that the current configuration is registered. #this_clist=${config_registry[${conf}]} this_clist=$(query_array "config_registry" ${conf}) # If the config_list associated with conf is empty, then it was # never entered into the config_registry to begin with. Thus, # conf must be unregistered. if [ -z "${this_clist}" ]; then echo "${script_name}: '${conf}' is NOT registered!" echo "${script_name}: " echo "${script_name}: *** Cannot continue with unregistered configuration '${conf}'. ***" echo "${script_name}: " exit 1; else echo -n "${script_name}: '${conf}' is registered." fi # Then confirm that the current sub-configuration directory exists. if [ ! -d "${config_dirpath}/${conf}" ]; then echo "..but does NOT exist!" echo "${script_name}: " echo "${script_name}: *** Cannot continue with nonexistent configuration '${conf}'. ***" echo "${script_name}: " exit 1; else echo "..and exists." fi done echo "${script_name}: checking sub-configurations' requisite kernels:" # Also, let's verify that the requisite kernel sets associated with # the config name all correspond to directories that exist. for kernel in ${kernel_list}; do echo -n "${script_name}: '${kernel}' kernels..." # Confirm that the current kernel sub-directory exists. if [ ! -d "${kernels_dirpath}/${kernel}" ]; then echo "do NOT exist!" echo "${script_name}: " echo "${script_name}: *** Cannot continue with nonexistent kernel '${kernel}'. ***" echo "${script_name}: " exit 1; else echo "exist." fi done # In order to determine the default behavior of the --with[out]-memkind # option, we try to detect whether libmemkind is available. If it is, # the default implied option will be --with-memkind; otherwise, will be # --without-memkind. has_memkind=$(has_libmemkind) # Try to determine whether the chosen compiler supports #pragma omp simd. pragma_omp_simd=$(has_pragma_omp_simd) # -- Prepare variables for subsitution into template files ----------------- # Parse the status of the prefix option and echo feedback. if [ -n "${prefix_flag}" ]; then echo "${script_name}: detected --prefix='${prefix}'." else echo "${script_name}: no install prefix option given; defaulting to '${prefix}'." fi # Parse the status of the exec_prefix option and echo feedback. if [ -n "${exec_prefix_flag}" ]; then echo "${script_name}: detected --exec-prefix='${exec_prefix}'." else echo "${script_name}: no install exec_prefix option given; defaulting to PREFIX." fi # Parse the status of the libdir option and echo feedback. if [ -n "${libdir_flag}" ]; then echo "${script_name}: detected --libdir='${libdir}'." else echo "${script_name}: no install libdir option given; defaulting to EXECPREFIX/lib." fi # Parse the status of the includedir option and echo feedback. if [ -n "${includedir_flag}" ]; then echo "${script_name}: detected --includedir='${includedir}'." else echo "${script_name}: no install includedir option given; defaulting to PREFIX/include." fi # Parse the status of the sharedir option and echo feedback. if [ -n "${sharedir_flag}" ]; then echo "${script_name}: detected --sharedir='${sharedir}'." else echo "${script_name}: no install sharedir option given; defaulting to PREFIX/share." fi # Echo the installation directories that we settled on. echo "${script_name}: final installation directories:" echo "${script_name}: prefix: "${prefix} echo "${script_name}: exec_prefix: "${exec_prefix} echo "${script_name}: libdir: "${libdir} echo "${script_name}: includedir: "${includedir} echo "${script_name}: sharedir: "${sharedir} echo "${script_name}: NOTE: the variables above can be overridden when running make." # Check if CFLAGS is non-empty. if [ -n "${CFLAGS}" ]; then cflags_preset="${CFLAGS}" echo "${script_name}: detected preset CFLAGS; prepending:" echo "${script_name}: ${cflags_preset}" else cflags_preset='' echo "${script_name}: no preset CFLAGS detected." fi # Check if LDFLAGS is non-empty. if [ -n "${LDFLAGS}" ]; then ldflags_preset="${LDFLAGS}" echo "${script_name}: detected preset LDFLAGS; prepending:" echo "${script_name}: ${ldflags_preset}" else ldflags_preset='' echo "${script_name}: no preset LDFLAGS detected." fi # Check if the debug flag was specified. if [ -n "${debug_flag}" ]; then if [ "x${debug_type}" = "xopt" ]; then echo "${script_name}: enabling debug symbols with optimizations." elif [ "x${debug_type}" = "xsde" ]; then debug_type='sde' echo "${script_name}: enabling SDE processor emulation." else debug_type='noopt' echo "${script_name}: enabling debug symbols; optimizations disabled." fi else debug_type='off' echo "${script_name}: debug symbols disabled." fi # Check if the verbose make flag was specified. if [ "x${enable_verbose}" = "xyes" ]; then echo "${script_name}: enabling verbose make output. (disable with 'make V=0'.)" else echo "${script_name}: disabling verbose make output. (enable with 'make V=1'.)" fi # Check if the ARG_MAX hack was requested. if [ "x${enable_arg_max_hack}" = "xyes" ]; then echo "${script_name}: enabling ARG_MAX hack." else echo "${script_name}: disabling ARG_MAX hack." fi enable_shared_01=1 # Check if the static lib flag was specified. if [ "x${enable_static}" = "xyes" -a "x${enable_shared}" = "xyes" ]; then echo "${script_name}: building BLIS as both static and shared libraries." elif [ "x${enable_static}" = "xyes" -a "x${enable_shared}" = "xno" ]; then echo "${script_name}: building BLIS as a static library (shared library disabled)." enable_shared_01=0 elif [ "x${enable_static}" = "xno" -a "x${enable_shared}" = "xyes" ]; then echo "${script_name}: building BLIS as a shared library (static library disabled)." else echo "${script_name}: Both static and shared libraries were disabled." echo "${script_name}: *** Please enable one (or both) to continue." exit 1 fi # Check if the "export shared" flag was specified. if [ "x${export_shared}" = "xall" ]; then if [ "x${enable_shared}" = "xyes" ]; then echo "${script_name}: exporting all symbols within shared library." else echo "${script_name}: ignoring request to export all symbols within shared library." fi elif [ "x${export_shared}" = "xpublic" ]; then if [ "x${enable_shared}" = "xyes" ]; then echo "${script_name}: exporting only public symbols within shared library." fi else echo "${script_name}: *** Invalid argument '${export_shared}' to --export-shared option given." echo "${script_name}: *** Please use 'public' or 'all'." exit 1 fi # Check the threading model flag and standardize its value, if needed. # NOTE: 'omp' is deprecated but still supported; 'openmp' is preferred. enable_openmp='no' enable_openmp_01=0 enable_pthreads='no' enable_pthreads_01=0 if [ "x${threading_model}" = "xauto" ]; then echo "${script_name}: determining the threading model automatically." elif [ "x${threading_model}" = "xopenmp" ] || [ "x${threading_model}" = "xomp" ]; then echo "${script_name}: using OpenMP for threading." enable_openmp='yes' enable_openmp_01=1 threading_model="openmp" # Standardize the value. elif [ "x${threading_model}" = "xpthreads" ] || [ "x${threading_model}" = "xpthread" ] || [ "x${threading_model}" = "xposix" ]; then echo "${script_name}: using POSIX threads for threading." enable_pthreads='yes' enable_pthreads_01=1 threading_model="pthreads" # Standardize the value. elif [ "x${threading_model}" = "xoff" ] || [ "x${threading_model}" = "xno" ] || [ "x${threading_model}" = "xnone" ]; then echo "${script_name}: threading is disabled." threading_model="off" else echo "${script_name}: *** Unsupported threading model: ${threading_model}." exit 1 fi # Check the method of assigning micropanels to threads in the JR and IR # loops. enable_jrir_slab_01=0 enable_jrir_rr_01=0 if [ "x${thread_part_jrir}" = "xslab" ]; then echo "${script_name}: requesting slab threading in jr and ir loops." enable_jrir_slab_01=1 elif [ "x${thread_part_jrir}" = "xrr" ]; then echo "${script_name}: requesting round-robin threading in jr and ir loops." enable_jrir_rr_01=1 else echo "${script_name}: *** Unsupported method of thread partitioning in jr and ir loops: ${threading_model}." exit 1 fi # Convert 'yes' and 'no' flags to booleans. if [ "x${enable_pba_pools}" = "xyes" ]; then echo "${script_name}: internal memory pools for packing blocks are enabled." enable_pba_pools_01=1 else echo "${script_name}: internal memory pools for packing blocks are disabled." enable_pba_pools_01=0 fi if [ "x${enable_sba_pools}" = "xyes" ]; then echo "${script_name}: internal memory pools for small blocks are enabled." enable_sba_pools_01=1 else echo "${script_name}: internal memory pools for small blocks are disabled." enable_sba_pools_01=0 fi if [ "x${enable_mem_tracing}" = "xyes" ]; then echo "${script_name}: memory tracing output is enabled." enable_mem_tracing_01=1 else echo "${script_name}: memory tracing output is disabled." enable_mem_tracing_01=0 fi if [ "x${has_memkind}" = "xyes" ]; then if [ "x${enable_memkind}" = "x" ]; then # If no explicit option was given for libmemkind one way or the other, # we use the value returned previously by has_libmemkind(), in this # case "yes", to determine the default. echo "${script_name}: libmemkind found; default is to enable use." enable_memkind="yes" enable_memkind_01=1 else if [ "x${enable_memkind}" = "xyes" ]; then echo "${script_name}: received explicit request to enable libmemkind." enable_memkind="yes" enable_memkind_01=1 else echo "${script_name}: received explicit request to disable libmemkind." enable_memkind="no" enable_memkind_01=0 fi fi else echo "${script_name}: libmemkind not found; disabling." if [ "x${enable_memkind}" = "xyes" ]; then echo "${script_name}: cannot honor explicit request to enable libmemkind." fi enable_memkind="no" enable_memkind_01=0 fi if [ "x${pragma_omp_simd}" = "xyes" ]; then echo "${script_name}: compiler appears to support #pragma omp simd." enable_pragma_omp_simd_01=1 else echo "${script_name}: compiler appears to not support #pragma omp simd." enable_pragma_omp_simd_01=0 fi if [ "x${enable_blas}" = "xyes" ]; then echo "${script_name}: the BLAS compatibility layer is enabled." enable_blas_01=1 else echo "${script_name}: the BLAS compatibility layer is disabled." enable_blas_01=0 fi if [ "x${enable_cblas}" = "xyes" ]; then echo "${script_name}: the CBLAS compatibility layer is enabled." enable_cblas_01=1 # Force BLAS layer when CBLAS is enabled enable_blas='yes' else echo "${script_name}: the CBLAS compatibility layer is disabled." enable_cblas_01=0 fi if [ "x${enable_mixed_dt}" = "xyes" ]; then echo "${script_name}: mixed datatype support is enabled." if [ "x${enable_mixed_dt_extra_mem}" = "xyes" ]; then echo "${script_name}: mixed datatype optimizations requiring extra memory are enabled." enable_mixed_dt_extra_mem_01=1 else echo "${script_name}: mixed datatype optimizations requiring extra memory are disabled." enable_mixed_dt_extra_mem_01=0 fi enable_mixed_dt_01=1 else echo "${script_name}: mixed datatype support is disabled." enable_mixed_dt_extra_mem_01=0 enable_mixed_dt_01=0 fi if [ "x${enable_sup_handling}" = "xyes" ]; then echo "${script_name}: small matrix handling is enabled." enable_sup_handling_01=1 else echo "${script_name}: small matrix handling is disabled." enable_sup_handling_01=0 fi # Report integer sizes. if [ "x${int_type_size}" = "x32" ]; then echo "${script_name}: the BLIS API integer size is 32-bit." elif [ "x${int_type_size}" = "x64" ]; then echo "${script_name}: the BLIS API integer size is 64-bit." else echo "${script_name}: the BLIS API integer size is automatically determined." fi if [ "x${blas_int_type_size}" = "x32" ]; then echo "${script_name}: the BLAS/CBLAS API integer size is 32-bit." elif [ "x${blas_int_type_size}" = "x64" ]; then echo "${script_name}: the BLAS/CBLAS API integer size is 64-bit." else echo "${script_name}: the BLAS/CBLAS API integer size is automatically determined." fi # Disallow the simultaneous use of 64-bit integers in the BLAS and # 32-bit integers in BLIS. if [ "x${blas_int_type_size}" = "x64" -a "x${int_type_size}" = "x32" ]; then echo "${script_name}: *** To avoid the possibility of truncation, we do not allow use of 64-bit integers in the BLAS API with 32-bit integers in BLIS. Please use a different configuration of integers." exit 1 fi # Check if a sandbox was given. if [ -n "${sandbox_flag}" ]; then #sandbox_relpath="${sandbox_dir}/${sandbox}" echo "${script_name}: configuring for alternate gemm implementation:" echo "${script_name}: ${sandbox_dir}/${sandbox}" sandbox_fullpath="${sandbox_dirpath}/${sandbox}" if [ ! -d "${sandbox_fullpath}" ]; then echo "${script_name}: requested sandbox sub-directory does not exist! Cannot continue." echo "${script_name}: *** Please verify sandbox existence and name." exit 1 fi enable_sandbox_01=1 else echo "${script_name}: configuring for conventional gemm implementation." enable_sandbox_01=0 fi # Variables that may contain forward slashes, such as paths, need extra # escaping when used in sed commands. We insert those extra escape # characters here so that the sed commands below do the right thing. os_name_esc=$(echo "${os_name}" | sed 's/\//\\\//g') prefix_esc=$(echo "${prefix}" | sed 's/\//\\\//g') exec_prefix_esc=$(echo "${exec_prefix}" | sed 's/\//\\\//g') libdir_esc=$(echo "${libdir}" | sed 's/\//\\\//g') includedir_esc=$(echo "${includedir}" | sed 's/\//\\\//g') sharedir_esc=$(echo "${sharedir}" | sed 's/\//\\\//g') dist_path_esc=$(echo "${dist_path}" | sed 's/\//\\\//g') cc_esc=$(echo "${found_cc}" | sed 's/\//\\\//g') cxx_esc=$(echo "${found_cxx}" | sed 's/\//\\\//g') #sandbox_relpath_esc=$(echo "${sandbox_relpath}" | sed 's/\//\\\//g') # For RANLIB, if the variable is not set, we use a default value of # 'ranlib'. ranlib_esc=$(echo "${RANLIB:-ranlib}" | sed 's/\//\\\//g') # For AR, if the variable is not set, we use a default value of 'ar'. ar_esc=$(echo "${AR:-ar}" | sed 's/\//\\\//g') libpthread_esc=$(echo "${LIBPTHREAD--lpthread}" | sed 's/\//\\\//g') cflags_preset_esc=$(echo "${cflags_preset}" | sed 's/\//\\\//g') ldflags_preset_esc=$(echo "${ldflags_preset}" | sed 's/\//\\\//g') # For Windows builds, clear the libpthread_esc variable so that # no pthreads library is substituted into config.mk. (Windows builds # employ an implementation of pthreads that is internal to BLIS.) if [[ $is_win == yes && "$cc_vendor" == "clang" ]]; then libpthread_esc= fi # Typically, there are no slashes in the version variable. However, # downstream maintainers (such as those for Debian) may create custom # tags in their local clones such as "upstream/0.4.1", which obviously # contain slashes. This line, and subsequent use of the escaped variable # for the version string, accommodates those use cases. version_esc=$(echo "${version}" | sed 's/\//\\\//g') # Create a #define for the configuration family (config_name). uconf=$(echo ${config_name} | tr '[:lower:]' '[:upper:]') config_name_define="#define BLIS_FAMILY_${uconf}\n" # Create a list of #defines, one for each configuration in config_list. config_list_defines="" for conf in ${config_list}; do # Convert the current config name to uppercase. uconf=$(echo ${conf} | tr '[:lower:]' '[:upper:]') # Create a #define and add it to the running list. config_define="BLIS_CONFIG_${uconf}" config_list_defines="${config_list_defines}#define ${config_define}\n" done # Create a list of #defines, one for each kernel set in kernel_list. kernel_list_defines="" for kern in ${kernel_list}; do # Convert the current config name to uppercase. uconf=$(echo ${kern} | tr '[:lower:]' '[:upper:]') # Create a #define and add it to the running list. kernel_define="BLIS_KERNELS_${uconf}" kernel_list_defines="${kernel_list_defines}#define ${kernel_define}\n" done # -- Determine whether we are performing an out-of-tree build -------------- if [ "${dist_path}" != "./" ]; then # At this point, we know the user did not run "./configure". But we # have not yet ruled out "/configure" or some # equivalent # that uses relative paths. To further rule out these possibilities, # we create a dummy file in the current build directory. touch "./${dummy_file}" # If the dummy file we just created in the current directory does not # appear in the source distribution path, then we are in a different # directory and thus we must create a symbolic link. if [ ! -f "${dist_path}/${dummy_file}" ]; then configured_oot="yes" #echo "${script_name}: detected out-of-tree build directory." else configured_oot="no" #echo "${script_name}: detected in-tree build directory." fi # Remove the dummy file. rm -f "./${dummy_file}" fi # -- Instantiate config.mk, bli_config.h files from templates -------------- # Begin substituting information into the config_mk_in file, outputting # to config_mk_out. echo "${script_name}: creating ${config_mk_out_path} from ${config_mk_in_path}" cat "${config_mk_in_path}" \ | sed -e "s/@version@/${version_esc}/g" \ | sed -e "s/@so_version_major@/${so_version_major}/g" \ | sed -e "s/@so_version_minorbuild@/${so_version_minorbuild}/g" \ | sed -e "s/@config_name@/${config_name}/g" \ | sed -e "s/@config_list@/${config_list}/g" \ | sed -e "s/@kernel_list@/${kernel_list}/g" \ | sed -e "s/@kconfig_map@/${kconfig_map}/g" \ | sed -e "s/@os_name@/${os_name_esc}/g" \ | sed -e "s/@is_win@/${is_win}/g" \ | sed -e "s/@dist_path@/${dist_path_esc}/g" \ | sed -e "s/@CC_VENDOR@/${cc_vendor}/g" \ | sed -e "s/@gcc_older_than_4_9_0@/${gcc_older_than_4_9_0}/g" \ | sed -e "s/@gcc_older_than_6_1_0@/${gcc_older_than_6_1_0}/g" \ | sed -e "s/@gcc_older_than_9_1_0@/${gcc_older_than_9_1_0}/g" \ | sed -e "s/@CC@/${cc_esc}/g" \ | sed -e "s/@CXX@/${cxx_esc}/g" \ | sed -e "s/@RANLIB@/${ranlib_esc}/g" \ | sed -e "s/@AR@/${ar_esc}/g" \ | sed -e "s/@libpthread@/${libpthread_esc}/g" \ | sed -e "s/@cflags_preset@/${cflags_preset_esc}/g" \ | sed -e "s/@ldflags_preset@/${ldflags_preset_esc}/g" \ | sed -e "s/@debug_type@/${debug_type}/g" \ | sed -e "s/@threading_model@/${threading_model}/g" \ | sed -e "s/@prefix@/${prefix_esc}/g" \ | sed -e "s/@exec_prefix@/${exec_prefix_esc}/g" \ | sed -e "s/@libdir@/${libdir_esc}/g" \ | sed -e "s/@includedir@/${includedir_esc}/g" \ | sed -e "s/@sharedir@/${sharedir_esc}/g" \ | sed -e "s/@enable_verbose@/${enable_verbose}/g" \ | sed -e "s/@configured_oot@/${configured_oot}/g" \ | sed -e "s/@enable_arg_max_hack@/${enable_arg_max_hack}/g" \ | sed -e "s/@enable_static@/${enable_static}/g" \ | sed -e "s/@enable_shared@/${enable_shared}/g" \ | sed -e "s/@export_shared@/${export_shared}/g" \ | sed -e "s/@enable_blas@/${enable_blas}/g" \ | sed -e "s/@enable_cblas@/${enable_cblas}/g" \ | sed -e "s/@enable_memkind@/${enable_memkind}/g" \ | sed -e "s/@pragma_omp_simd@/${pragma_omp_simd}/g" \ | sed -e "s/@sandbox@/${sandbox}/g" \ > "${config_mk_out_path}" # Begin substituting information into the bli_config_h_in file, outputting # to bli_config_h_out. NOTE: We use perl instead of sed because the version # of sed used on OS X is old and does not handle the '\n' character # intuitively, which was used when constructing ${config_name_define}, # ${config_list_defines}, and ${kernel_list_defines}. echo "${script_name}: creating ${bli_config_h_out_path} from ${bli_config_h_in_path}" cat "${bli_config_h_in_path}" \ | perl -pe "s/\@config_name_define\@/${config_name_define}/g" \ | perl -pe "s/\@config_list_defines\@/${config_list_defines}/g" \ | perl -pe "s/\@kernel_list_defines\@/${kernel_list_defines}/g" \ | sed -e "s/@enable_openmp@/${enable_openmp_01}/g" \ | sed -e "s/@enable_pthreads@/${enable_pthreads_01}/g" \ | sed -e "s/@enable_jrir_slab@/${enable_jrir_slab_01}/g" \ | sed -e "s/@enable_jrir_rr@/${enable_jrir_rr_01}/g" \ | sed -e "s/@enable_pba_pools@/${enable_pba_pools_01}/g" \ | sed -e "s/@enable_sba_pools@/${enable_sba_pools_01}/g" \ | sed -e "s/@enable_mem_tracing@/${enable_mem_tracing_01}/g" \ | sed -e "s/@int_type_size@/${int_type_size}/g" \ | sed -e "s/@blas_int_type_size@/${blas_int_type_size}/g" \ | sed -e "s/@enable_blas@/${enable_blas_01}/g" \ | sed -e "s/@enable_cblas@/${enable_cblas_01}/g" \ | sed -e "s/@enable_mixed_dt@/${enable_mixed_dt_01}/g" \ | sed -e "s/@enable_mixed_dt_extra_mem@/${enable_mixed_dt_extra_mem_01}/g" \ | sed -e "s/@enable_sup_handling@/${enable_sup_handling_01}/g" \ | sed -e "s/@enable_memkind@/${enable_memkind_01}/g" \ | sed -e "s/@enable_pragma_omp_simd@/${enable_pragma_omp_simd_01}/g" \ | sed -e "s/@enable_sandbox@/${enable_sandbox_01}/g" \ | sed -e "s/@enable_shared@/${enable_shared_01}/g" \ > "${bli_config_h_out_path}" # -- Create top-level object directories ----------------------------------- # Create obj sub-directories (if they do not already exist). base_obj_dirpath="${obj_dirpath}/${config_name}" echo "${script_name}: creating ${base_obj_dirpath}" mkdir -p ${base_obj_dirpath} obj_config_dirpath="${base_obj_dirpath}/${config_dir}" #echo "${script_name}: creating ${obj_config_dirpath}" mkdir -p ${obj_config_dirpath} for conf in ${config_list}; do echo "${script_name}: creating ${obj_config_dirpath}/${conf}" mkdir -p ${obj_config_dirpath}/${conf} done obj_kernels_dirpath="${base_obj_dirpath}/${kernels_dir}" #echo "${script_name}: creating ${obj_kernels_dirpath}" mkdir -p ${obj_kernels_dirpath} for kern in ${kernel_list}; do echo "${script_name}: creating ${obj_kernels_dirpath}/${kern}" mkdir -p ${obj_kernels_dirpath}/${kern} done obj_refkern_dirpath="${base_obj_dirpath}/${refkern_dir}" #echo "${script_name}: creating ${obj_refkern_dirpath}" mkdir -p ${obj_refkern_dirpath} for conf in ${config_list}; do echo "${script_name}: creating ${obj_refkern_dirpath}/${conf}" mkdir -p ${obj_refkern_dirpath}/${conf} done obj_frame_dirpath="${base_obj_dirpath}/${frame_dir}" echo "${script_name}: creating ${obj_frame_dirpath}" mkdir -p ${obj_frame_dirpath} if [ -n "${sandbox_flag}" ]; then obj_sandbox_dirpath="${base_obj_dirpath}/${sandbox_dir}" echo "${script_name}: creating ${obj_sandbox_dirpath}/${sandbox}" mkdir -p ${obj_sandbox_dirpath}/${sandbox} fi obj_blastest_dirpath="${base_obj_dirpath}/${blastest_dir}" echo "${script_name}: creating ${obj_blastest_dirpath}" mkdir -p ${obj_blastest_dirpath} obj_testsuite_dirpath="${base_obj_dirpath}/${testsuite_dir}" echo "${script_name}: creating ${obj_testsuite_dirpath}" mkdir -p ${obj_testsuite_dirpath} # Create lib directory (if it does not already exist). base_lib_dirpath="${lib_dirpath}/${config_name}" echo "${script_name}: creating ${base_lib_dirpath}" mkdir -p ${base_lib_dirpath} # Create include directory (if it does not already exist). base_include_dirpath="${include_dirpath}/${config_name}" echo "${script_name}: creating ${base_include_dirpath}" mkdir -p ${base_include_dirpath} # -- Mirror source directory hierarchies to object directories ------------- # Combine the config_list with the config_name and then remove duplicates. config_list_plus_name=$(rm_duplicate_words "${config_list} ${config_name}") # Mirror each of the sub-configuration directories to the object directory. for conf in ${config_list_plus_name}; do echo "${script_name}: mirroring ${config_dirpath}/${conf} to ${obj_config_dirpath}/${conf}" ${mirror_tree_sh} "${config_dirpath}/${conf}" "${obj_config_dirpath}/${conf}" done # Mirror optimized kernels source tree to its object sub-directory. # We perform the mirroring on each configuration/kernel sub-directory # within 'kernels'. for kern in ${kernel_list}; do # Only mirror the optimized kernels source directory if it exists. # There are occasions where one of the sub-configurations in the # config_list does not correspond to a kernels sub-directory, such # as when architecture B is so close to architecture A that B can # use A's kernel source code unmodified (though perhaps with # different blocksizes). #if [ -d "${kernels_dirpath}/${conf}" ]; then echo "${script_name}: mirroring ${kernels_dirpath}/${kern} to ${obj_kernels_dirpath}/${kern}" ${mirror_tree_sh} "${kernels_dirpath}/${kern}" "${obj_kernels_dirpath}/${kern}" #else # echo "${script_name}: mirroring ${kernels_dirpath}/${conf} skipped... directory does not exist" #fi done # Mirror reference kernel source tree to its object sub-directory. echo "${script_name}: mirroring ${refkern_dirpath} to ${obj_refkern_dirpath}" ${mirror_tree_sh} ${refkern_dirpath} ${obj_refkern_dirpath} # Mirror reference kernels source tree to its object sub-directory. for conf in ${config_list}; do echo "${script_name}: mirroring ${refkern_dirpath} to ${obj_refkern_dirpath}/${conf}" ${mirror_tree_sh} "${refkern_dirpath}" "${obj_refkern_dirpath}/${conf}" done # Mirror framework source tree to its object sub-directory. echo "${script_name}: mirroring ${frame_dirpath} to ${obj_frame_dirpath}" ${mirror_tree_sh} ${frame_dirpath} ${obj_frame_dirpath} # Mirror the chosen sandbox source tree to its object sub-directory. if [ -n "${sandbox_flag}" ]; then echo "${script_name}: mirroring ${sandbox_dirpath}/${sandbox} to ${obj_sandbox_dirpath}/${sandbox}" ${mirror_tree_sh} "${sandbox_dirpath}/${sandbox}" "${obj_sandbox_dirpath}/${sandbox}" fi # -- Generate makefile fragements ------------------------------------------ clist_contains_cname=$(is_in_list "${config_name}" "${config_list}") # If the config_list does not already contain the config_name (i.e., # if config_name is an umbrella family), generate makefiles in that # directory. (In the next step, we will loop over the actual sub- # configurations and create fragments there as well.) if [ "${clist_contains_cname}" == "false" ]; then echo "${script_name}: creating makefile fragments in ${obj_config_dirpath}/${config_name}" ${gen_make_frags_sh} \ -h -r -v0 \ -o ${script_name} \ -p 'CONFIG' \ ${config_dirpath}/${config_name} \ ${obj_config_dirpath}/${config_name} \ ${gen_make_frags_dirpath}/fragment.mk \ ${gen_make_frags_dirpath}/suffix_list \ ${gen_make_frags_dirpath}/ignore_list fi # Generate makefile fragments for each of the sub-configurations present # in the configuration list. for conf in ${config_list}; do echo "${script_name}: creating makefile fragments in ${obj_config_dirpath}/${conf}" ${gen_make_frags_sh} \ -h -r -v0 \ -o ${script_name} \ -p 'CONFIG' \ ${config_dirpath}/${conf} \ ${obj_config_dirpath}/${conf} \ ${gen_make_frags_dirpath}/fragment.mk \ ${gen_make_frags_dirpath}/suffix_list \ ${gen_make_frags_dirpath}/ignore_list done # Generate makefile fragments for each of the kernel sets required by # the configuration list (in the kernel list). for kern in ${kernel_list}; do echo "${script_name}: creating makefile fragments in ${obj_kernels_dirpath}/${kern}" ${gen_make_frags_sh} \ -h -r -v0 \ -o ${script_name} \ -p 'KERNELS' \ ${kernels_dirpath}/${kern} \ ${obj_kernels_dirpath}/${kern} \ ${gen_make_frags_dirpath}/fragment.mk \ ${gen_make_frags_dirpath}/suffix_list \ ${gen_make_frags_dirpath}/ignore_list done # Generate makefile fragments in the reference kernels directory. echo "${script_name}: creating makefile fragments in ${obj_refkern_dirpath}" ${gen_make_frags_sh} \ -h -r -v0 \ -o ${script_name} \ -p 'REFKERN' \ ${refkern_dirpath} \ ${obj_refkern_dirpath} \ ${gen_make_frags_dirpath}/fragment.mk \ ${gen_make_frags_dirpath}/suffix_list \ ${gen_make_frags_dirpath}/ignore_list # Generate makefile fragments in the framework directory. echo "${script_name}: creating makefile fragments in ${obj_frame_dirpath}" ${gen_make_frags_sh} \ -h -r -v0 \ -o ${script_name} \ -p 'FRAME' \ ${frame_dirpath} \ ${obj_frame_dirpath} \ ${gen_make_frags_dirpath}/fragment.mk \ ${gen_make_frags_dirpath}/suffix_list \ ${gen_make_frags_dirpath}/ignore_list # Generate makefile fragments in the sandbox sub-directory. if [ -n "${sandbox_flag}" ]; then echo "${script_name}: creating makefile fragments in ${obj_sandbox_dirpath}/${sandbox}" ${gen_make_frags_sh} \ -h -r -v0 \ -o ${script_name} \ -p 'SANDBOX' \ ${sandbox_dirpath}/${sandbox} \ ${obj_sandbox_dirpath}/${sandbox} \ ${gen_make_frags_dirpath}/fragment.mk \ ${gen_make_frags_dirpath}/suffix_list \ ${gen_make_frags_dirpath}/ignore_list fi # -- Handle out-of-tree builds --------------------------------------------- # Under some circumstances, we need to create some symbolic links to # properly handle out-of-tree builds. if [ "${configured_oot}" = "yes" ]; then # If 'Makefile' symlink does not already exist in the current # directory, create a symbolic link to it. If one does exist, we # use -f to force creation of a new link. if [ ! -e "./Makefile" ]; then echo "${script_name}: creating symbolic link to Makefile." ln -s "${dist_path}/Makefile" elif [ -h "./Makefile" ]; then echo "${script_name}: symbolic link to Makefile already exists; forcing creation of new link." ln -sf "${dist_path}/Makefile" else echo "${script_name}: Non-symbolic link file or directory 'Makefile' blocks creation of symlink." echo "${script_name}: *** Please remove this entity and re-run configure." exit 1 fi # If 'common.mk' symlink does not already exist in the current # directory, create a symbolic link to it. If one does exist, we # use -f to force creation of a new link. if [ ! -e "./common.mk" ]; then echo "${script_name}: creating symbolic link to common.mk." ln -s "${dist_path}/common.mk" elif [ -h "./common.mk" ]; then echo "${script_name}: symbolic link to common.mk already exists; forcing creation of new link." ln -sf "${dist_path}/common.mk" else echo "${script_name}: Non-symbolic link file or directory 'common.mk' blocks creation of symlink." echo "${script_name}: *** Please remove this entity and re-run configure." exit 1 fi # If 'config' symlink does not already exist in the current # directory, create a symbolic link to it. If one does exist, we # use -f to force creation of a new link. if [ ! -e "./config" ]; then echo "${script_name}: creating symbolic link to 'config' directory." ln -s "${dist_path}/config" elif [ -h "./config" ]; then echo "${script_name}: symbolic link to 'config' directory already exists; forcing creation of new link." ln -sf "${dist_path}/config" else echo "${script_name}: Non-symbolic link file or directory 'config' blocks creation of symlink." echo "${script_name}: *** Please remove this entity and re-run configure." exit 1 fi echo "${script_name}: configured to build outside of source distribution." else echo "${script_name}: configured to build within top-level directory of source distribution." fi # Exit peacefully. return 0 } # The script's main entry point, passing all parameters given. main "$@" blis-0.6.1/docs/000077500000000000000000000000001360743507500134115ustar00rootroot00000000000000blis-0.6.1/docs/BLISObjectAPI.md000066400000000000000000002240521360743507500161520ustar00rootroot00000000000000# Contents * **[Contents](BLISObjectAPI.md#contents)** * **[Introduction](BLISObjectAPI.md#introduction)** * [BLIS types](BLISObjectAPI.md#blis-types) * [Integer-based types](BLISObjectAPI.md#integer-based-types) * [Floating-point types](BLISObjectAPI.md#floating-point-types) * [Enumerated parameter types](BLISObjectAPI.md#enumerated-parameter-types) * [Global scalar constants](BLISObjectAPI.md#global-scalar-constants) * [Basic vs expert interfaces](BLISObjectAPI.md#basic-vs-expert-interfaces) * [Context type](BLISObjectAPI.md#context-type) * [Runtime type](BLISObjectAPI.md#runtime-type) * [BLIS header file](BLISObjectAPI.md#blis-header-file) * [Initialization and cleanup](BLISObjectAPI.md#initialization-and-cleanup) * **[Object management](BLISObjectAPI.md#object-management)** * [Object creation function reference](BLISObjectAPI.md#object-creation-function-reference) * [Object accessor function reference](BLISObjectAPI.md#object-accessor-function-reference) * **[Computational function reference](BLISObjectAPI.md#computational-function-reference)** * [Operation index](BLISObjectAPI.md#operation-index) * [Level-1v operations](BLISObjectAPI.md#level-1v-operations) * [Level-1d operations](BLISObjectAPI.md#level-1d-operations) * [Level-1m operations](BLISObjectAPI.md#level-1m-operations) * [Level-1f operations](BLISObjectAPI.md#level-1f-operations) * [Level-2 operations](BLISObjectAPI.md#level-2-operations) * [Level-3 operations](BLISObjectAPI.md#level-3-operations) * [Utility operations](BLISObjectAPI.md#utility-operations) * [Level-3 microkernels](BLISObjectAPI.md#level-3-microkernels) * **[Query function reference](BLISObjectAPI.md#query-function-reference)** * [General library information](BLISObjectAPI.md#general-library-information) * [Specific configuration](BLISObjectAPI.md#specific-configuration) * [General configuration](BLISObjectAPI.md#general-configuration) * [Kernel information](BLISObjectAPI.md#kernel-information) * **[Example code](BLISObjectAPI.md#example-code)** # Introduction This document summarizes one of the primary native APIs in BLIS--the object API. Here, we also discuss BLIS-specific type definitions, header files, and prototypes to auxiliary functions. There are many functions that BLIS implements that are not listed here, either because they are lower-level functions, or they are considered for use primarily by developers and experts. The object API was given its name (a) because it abstracts the floating-point types of its operands (along with many other properties) within a `typedef struct {...}` data structure, and (b) to contrast it with the other native API in BLIS, the typed API, which is [documented here](BLISTypedAPI.md). (The third API supported by BLIS is the BLAS compatibility layer, which mimics conventional Fortran-77 BLAS.) ## BLIS types The following tables list various types used throughout the BLIS object API. ### Integer-based types | BLIS integer type | Type definition | Used to represent... | |:------------------|:-------------------------|:---------------------------------------------------------------------| | `gint_t` | `int32_t` or `int64_t` | general-purpose signed integer; used to define signed integer types. | | `guint_t` | `uint32_t` or `uint64_t` | general-purpose signed integer; used to define signed integer types. | | `dim_t` | `gint_t` | matrix and vector dimensions. | | `inc_t` | `gint_t` | matrix row/column strides and vector increments. | | `doff_t` | `gint_t` | matrix diagonal offset: if _k_ < 0, diagonal begins at element (-_k_,0); otherwise diagonal begins at element (0,_k_). | | `bool_t` | `gint_t` | boolean values: `TRUE` or `FALSE`. | | `siz_t` | `guint_t` | a byte size or byte offset. | ### Floating-point types | BLIS fp type | Type definition | Used to represent... | |:------------------|:---------------------------------------|:-------------------------------------| | `float` | _N/A_ | single-precision real numbers | | `double` | _N/A_ | double-precision real numbers | | `scomplex` | `struct { float real; float imag; }` | single-precision complex numbers | | `dcomplex` | `struct { double real; double imag; }` | double-precision complex numbers | ### Enumerated parameter types | `num_t` | Semantic meaning: Matrix/vector operand... | |:----------------|:--------------------------------------------------------| | `BLIS_FLOAT` | contains single-precision real elements. | | `BLIS_DOUBLE` | contains double-precision real elements. | | `BLIS_SCOMPLEX` | contains single-precision complex elements. | | `BLIS_DCOMPLEX` | contains double-precision complex elements. | | `BLIS_INT` | contains integer elements of type `gint_t`. | | `BLIS_CONSTANT` | contains polymorphic representation of a constant value | | `dom_t` | Semantic meaning: Matrix/vector operand... | |:----------------|:--------------------------------------------| | `BLIS_REAL` | contains real domain elements. | | `BLIS_COMPLEX` | contains complex domain elements. | | `prec_t` | Semantic meaning: Matrix/vector operand... | |:-------------------|:--------------------------------------------| | `BLIS_SINGLE_PREC` | contains single-precision elements. | | `BLIS_DOUBLE_PREC` | contains double-precision elements. | | `trans_t` | Semantic meaning: Matrix operand ... | |:-------------------------|:--------------------------------------------------| | `BLIS_NO_TRANSPOSE` | will be used as given. | | `BLIS_TRANSPOSE` | will be implicitly transposed. | | `BLIS_CONJ_NO_TRANSPOSE` | will be implicitly conjugated. | | `BLIS_CONJ_TRANSPOSE` | will be implicitly transposed _and_ conjugated. | | `conj_t` | Semantic meaning: Matrix/vector operand... | |:---------------------|:---------------------------------------------------------| | `BLIS_NO_CONJUGATE` | will be used as given. | | `BLIS_CONJUGATE` | will be implicitly conjugated. | | `side_t` | Semantic meaning: Matrix operand... | |:-------------|:---------------------------------------------------| | `BLIS_LEFT` | appears on the left. | | `BLIS_RIGHT` | appears on the right. | | `struc_t` | Semantic meaning: Matrix operand... | |:------------------|:------------------------------------------------------------------| | `BLIS_GENERAL` | has no structure. | | `BLIS_HERMITIAN` | has Hermitian structure. | | `BLIS_SYMMETRIC` | has symmetric structure. | | `BLIS_TRIANGULAR` | has triangular structure. | | `uplo_t` | Semantic meaning: Matrix operand... | |:-------------|:------------------------------------------------------------------| | `BLIS_LOWER` | is stored in (and will be accessed only from) the lower triangle. | | `BLIS_UPPER` | is stored in (and will be accessed only from) the upper triangle. | | `BLIS_DENSE` | is stored as a full matrix (ie: in both triangles). | | `diag_t` | Semantic meaning: Matrix operand ... | |:--------------------|:---------------------------------------------------------------------------| | `BLIS_NONUNIT_DIAG` | has a non-unit diagonal that should be explicitly read from. | | `BLIS_UNIT_DIAG` | has a unit diagonal that should be implicitly assumed (and not read from). | ## Global scalar constants BLIS defines a handful of scalar objects that conveniently represent various constant values for all defined numerical type values (`num_t`). The following table lists the constants defined by BLIS. | BLIS constant `obj_t` name | Numerical values | |:---------------------------|:-----------------| | `BLIS_MINUS_TWO` | `-2.0` | | `BLIS_MINUS_ONE` | `-1.0` | | `BLIS_ZERO` | ` 0.0` | | `BLIS_ONE` | ` 1.0` | | `BLIS_TWO` | ` 2.0` | These objects are polymorphic; each one contains a `float`, `double`, `scomplex`, `dcomplex`, and `gint_t` representation of the constant value in question. They can be used in place of any `obj_t*` operand in any object API function provided that the following criteria are met: * The object parameter requires unit dimensions (1x1). (In other words, the function expects a scalar for the operand in question.) * The object parameter is input-only. (In other words, the function is not trying to update the scalar.) The correct representation is chosen by context, usually by inspecting the datatype of one of the other operands involved in an operation. For example, if we create and initialize objects `x` and `y` of `num_t` type `BLIS_DOUBLE`, the following call to `bli_axpyv()` ```c bli_axpyv( &BLIS_TWO, &x, &y ); ``` will use the `BLIS_DOUBLE` representation of `BLIS_TWO`. ## Basic vs expert interfaces The functions listed in this document belong to the "basic" interface subset of the BLIS object API. There is a companion "expert" interface that mirrors the basic interface, except that it also contains two additional parameters that are only of interest to experts and library developers. The expert interfaces use the same name as the basic function names, except for an additional "_ex" suffix. For example, the basic interface for `gemm` is ```c void bli_gemm ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, ); ``` while the expert interface is: ```c void bli_gemm_ex ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm ); ``` The expert interface contains two additional parameters: a `cntx_t*` and `rntm_t*`. Note that calling a function from the expert interface with the `cntx_t*` and `rntm_t*` arguments each set to `NULL` is equivalent to calling the corresponding basic interface. Specifically, a `NULL` value passed in for the `cntx_t*` results in a valid context being queried from BLIS, and a `NULL` value passed in for the `rntm_t*` results in the current global settings for multithreading to be used. ## Context type In general, it is permissible to pass in `NULL` for a `cntx_t*` parameter when calling an expert interface such as `bli_gemm_ex()`. However, there are cases where `NULL` values are not accepted and may result in a segmentation fault. Specifically, the `cntx_t*` argument appears in the interfaces to the `gemm`, `trsm`, and `gemmtrsm` [level-3 microkernels](KernelsHowTo.md#level-3) along with all [level-1v](KernelsHowTo.md#level-1v) and [level-1f](KernelsHowTo.md#level-1f) kernels. There, as a general rule, a valid pointer must be passed in. Whenever a valid context is needed, the developer may query a default context from the global kernel structure (if a context is not already available in the current scope): ```c cntx_t* bli_gks_query_cntx( void ); ``` When BLIS is configured to target a configuration family (e.g. `intel64`, `x86_64`), `bli_gks_query_cntx()` will use `cpuid` or an equivalent heuristic to select and and return the appropriate context. When BLIS is configured to target a singleton sub-configuration (e.g. `haswell`, `skx`), `bli_gks_query_cntx()` will unconditionally return a pointer to the context appropriate for the targeted configuration. ## Runtime type When calling one of the expert interfaces, a `rntm_t` (runtime) object can be used to convey a thread-local request for parallelism to the underlying implementation. Runtime objects are thread-safe by nature when they are declared statically as a stack variable (or allocated via `malloc()`), initialized, and then passed into the expert interface of interest. Notice that runtime objects have no analogue in most BLAS libraries, where you are forced to specify parallelism at a global level (usually via environment variables). For more information on using `rntm_t` objects, please read the [Multithreading](Multithreading.md) documentation, paying close attention to the section on [local setting of parallelism](Multithreading.md#locally-at-runtime). ## BLIS header file All BLIS definitions and prototypes may be included in your C source file by including a single header file: ```c #include "blis.h" ``` ## Initialization and Cleanup As of [9804adf](https://github.com/flame/blis/commit/9804adfd405056ec332bb8e13d68c7b52bd3a6c1), BLIS no longer requires explicit initialization and finalization at runtime. In other words, users do not need to call `bli_init()` before the application can make use of the library (and `bli_finalize()` after the application is finished with the library). Instead, all computational operations (and some non-computational functions) in BLIS will initialize the library on behalf of the user if it has not already been initialized. This change was made to simplify the user experience. Application developers should keep in mind, however, that this new self-initialization regime implies the following: unless the library is *explicitly* finalized via `bli_finalize()`, it will, once initialized, remain initialized for the life of the application. This is likely not a problem in the vast majority of cases. However, a memory-constrained application that performs all of its DLA up-front, for example, may wish to explicitly finalize the library after BLIS is no longer needed in order to free up memory for other purposes. Similarly, an expert user may call `bli_init()` manually in order to control when the overhead of library initialization is incurred, even though the library would have self-initialized. The interfaces to `bli_init()` and `bli_finalize()` are quite simple; they require no arguments and return no values: ```c void bli_init( void ); void bli_finalize( void ); ``` # Object management ## Introduction Before using the object API, you must first create some objects to encapsulate your vector or matrix data. We provide examples code for creating matrix objects in the [examples/oapi](https://github.com/flame/blis/tree/master/examples/oapi) directory of the BLIS source distribution. However, we will provide API documentation for the most common functions for creating and freeing objects in the next section. Generally speaking, an object is created when an `obj_t` structure is initialized with valid properties describing the object as well as a valid data buffer (to hold the elements of the vector or matrix). The valid data buffer can be allocated automatically on your behalf at the same time that the other object fields are initialized, or "attached" in a second step after the object is initialized with preliminary values. The former is useful when using the object API at the setup stage of an application (and if `malloc()` is an acceptable method of allocating memory). Similarly, the latter is useful when interfacing BLIS into the middle of an application after the allocation has already taken place, or when some function other than `malloc()` is desired for allocating the buffer. Only objects that were created with automatic allocation must be freed via BLIS object API. Objects that were initialized with attached buffers can be freed in whatever manner is appropriate, based on how the application originally allocated the memory in question. ## Object creation function reference ```c void bli_obj_create ( num_t dt, dim_t m, dim_t n, inc_t rs, inc_t cs, obj_t* obj ); ``` Initialize an _m x n_ object `obj` and allocate sufficient storage to hold _mn_ elements whose storage type is specified by `dt` and with row and column strides `rs` and `cs`, respectively. This function allocates enough space to enforce alignment of leading dimensions, where the alignment factor is specific to the configuration being used, though the alignment factor is almost always equal to the size of the hardware's SIMD registers. The address `obj` must reference valid memory--typically an `obj_t` declared statically or allocated dynamically via `malloc()`. After an object created via `bli_obj_create()` is no longer needed, it should be deallocated via `bli_obj_free()`. --- ```c void bli_obj_free ( obj_t* obj ); ``` Deallocate (release) an object `obj` that was previously created, typically via `bli_obj_create()`. --- ```c void bli_obj_create_without_buffer ( num_t dt, dim_t m, dim_t n, obj_t* obj ); ``` Partially initialize an _m x n_ object `obj` that will eventually contain elements whose storage type is specified by `dt`. This function does not result in any memory allocation. Before `obj` can be used, the object must be fully initialized by attaching a buffer via `bli_obj_attach_buffer()`. This function is useful when the user wishes to encapsulate existing buffers into one or more `obj_t` objects. An object (partially) initialized via this function should generally not be passed to `bli_obj_free()` even after a buffer is attached to it via `bli_obj_attach_buffer()`, unless the user wishes to pass that buffer into `free()`. --- ```c void bli_obj_attach_buffer ( void* p, inc_t rs, inc_t cs, inc_t is, obj_t* obj ); ``` Given a partially initialized object (i.e., one that has already been passed to `bli_obj_create_without_buffer()`), attach the buffer pointed to by `p` to the object referenced by `obj` and initialize `obj` as containing elements with row and column strides `rs` and `cs`, respectively. The function also initializes the imaginary stride as `is`, which is experimental and not consistently used by all parts of BLIS. --- ```c void bli_obj_create_with_attached_buffer ( num_t dt, dim_t m, dim_t n, void* p, inc_t rs, inc_t cs, obj_t* obj ); ``` Initialize an _m x n_ object `obj` as containing _mn_ elements whose storage type is specified by `dt` and with row and column strides `rs` and `cs`, respectively. The function does not allocate any memory and instead attaches the buffer pointed to by `p`. Note that calling this function is effectively equivalent to calling ```c bli_obj_create_without_buffer( dt, m, n, obj ); bli_obj_attach_buffer( p, rs, cs, 1, obj ); ``` Objects initialized via this function should generally not be passed to `bli_obj_free()`, unless the user wishes to pass `p` into `free()`. --- ```c void bli_obj_alloc_buffer ( inc_t rs, inc_t cs, inc_t is, obj_t* obj ); ``` Given a partially initialized _m x n_ object, allocate and attach a buffer large enough to contain _mn_ elements with the row and column strides `rs` and `cs`, respectively. This function allocates enough space to enforce alignment of leading dimensions, where the alignment factor is specific to the configuration being used, though the alignment factor is almost always equal to the size of the hardware's SIMD registers. Note that calling `bli_obj_create()` is effectively equivalent to calling ```c bli_obj_create_without_buffer( dt, m, n, obj ); bli_obj_alloc_buffer( rs, cs, 1, obj ); ``` Very few users will likely have a need to call this function. We provide documentation for it mostly so that others can manually access the alignment features of `bli_obj_create()` without also needing to initialize an `obj_t`. --- ```c void bli_obj_create_1x1 ( num_t dt, obj_t* obj ); ``` Initialize a _1 x 1_ object `obj` and allocate sufficient storage to hold one element whose storage type is specified by `dt`. The address `obj` must reference valid memory--typically an `obj_t` declared statically or allocated dynamically via `malloc()`. This function is useful any time the user wishes to create a scalar object with an allocated buffer. Note that calling `bli_obj_create_1x1()` is effectively equivalent to calling ```c bli_obj_create_without_buffer( dt, 1, 1, obj ); bli_obj_alloc_buffer( 1, 1, 1, obj ); ``` After an object created via `bli_obj_create_1x1()` is no longer needed, it should be deallocated via `bli_obj_free()`. --- ```c void bli_obj_create_1x1_with_attached_buffer ( num_t dt, void* p, obj_t* obj ); ``` Initialize a _1 x 1_ object `obj` as containing one element whose storage type is specified by `dt`. The function does not allocate any memory and instead attaches the buffer pointed to by `p`. Note that calling this function is effectively equivalent to calling ```c bli_obj_create_without_buffer( dt, 1, 1, obj ); bli_obj_attach_buffer( p, 1, 1, 1, obj ); ``` Objects initialized via this function should generally not be passed to `bli_obj_free()`, unless the user wishes to pass `p` into `free()`. --- ```c void bli_obj_create_conf_to ( obj_t* s, obj_t* d ); ``` Initialize an object `d` with dimensions conformal to those of an existing object `s`. Object `d` is initialized with the same row and column strides as those of `s`. However, the structure, uplo, conjugation, and transposition properties of `s` are **not** inherited by `d`. On entry, object `s` must be fully initialized and the address `d` must reference valid memory--typically an `obj_t` declared statically or allocated dynamically via `malloc()`. Note that calling this function is effectively equivalent to calling ```c num_t dt = bli_obj_dt( s ); dim_t m = bli_obj_length( s ); dim_t n = bli_obj_width( s ); inc_t rs = bli_obj_row_stride( s ); inc_t cs = bli_obj_col_stride( s ); bli_obj_create( dt, m, n, rs, cs, d ); ``` After an object created via `bli_obj_create_conf_to()` is no longer needed, it should be deallocated via `bli_obj_free()`. --- ```c void bli_obj_scalar_init_detached ( num_t dt, obj_t* obj ); ``` Initialize a _1 x 1_ object `obj` using internal storage sufficient to hold one element whose storage type is specified by `dt`. (Internal storage is present within every `obj_t` and is capable of holding on element of any supported type.) This function is similar to `bli_obj_create_1x1()`, except that the object does not trigger any dynamic memory allocation. Objects initialized via this function should **never** be passed to `bli_obj_free()`. ## Object accessor function reference Notes for interpreting function descriptions: * Object accessor functions allow the caller to query certain properties of objects. * These functions are only guaranteed to return meaningful values when called upon objects that have been fully initialized/created. * Many specialized functions are omitted from this section for brevity. For a full list of accessor functions, please see [frame/include/bli_obj_macro_defs.h](https://github.com/flame/blis/tree/master/frame/include/bli_obj_macro_defs.h). **Note**: For now, we mostly omit documentation for the corresponding functions used to modify object properties because those functions can easily invalidate the state of an `obj_t` and should be used only in specific instances. If you think you need to manually set the fields of an `obj_t`, please contact BLIS developers so we can give you personalized guidance. --- ```c num_t bli_obj_dt( obj_t* obj ); ``` Return the storage datatype property of `obj`. --- ```c dom_t bli_obj_dom( obj_t* obj ); ``` Return the domain component of the storage datatype property of `obj`. --- ```c prec_t bli_obj_prec( obj_t* obj ); ``` Return the precision component of the storage datatype property of `obj`. --- ```c trans_t bli_obj_conjtrans_status( obj_t* obj ); ``` Return the `trans_t` property of `obj`, which may indicate transposition, conjugation, both, or neither. --- ```c trans_t bli_obj_onlytrans_status( obj_t* obj ); ``` Return the transposition component of the `trans_t` property of `obj`, which may indicate transposition or no transposition. Thus, possible return values are `BLIS_NO_TRANSPOSE` or `BLIS_TRANSPOSE`. --- ```c conj_t bli_obj_conj_status( obj_t* obj ); ``` Return the conjugation component of the `trans_t` property of `obj`, which may indicate conjugation or no conjugation. Thus, possible return values are `BLIS_NO_CONJUGATE` or `BLIS_CONJUGATE`. --- ```c uplo_t bli_obj_uplo( obj_t* obj ); ``` Return the `uplo_t` property of `obj`. --- ```c struc_t bli_obj_struc( obj_t* obj ); ``` Return the `struc_t` property of `obj`. --- ```c diag_t bli_obj_diag( obj_t* obj ); ``` Return the `diag_t` property of `obj`. --- ```c dim_t bli_obj_length( obj_t* obj ); ``` Return the number of rows (or _m_ dimension) of `obj`. This value is the _m_ dimension **before** taking into account the transposition property as indicated by `bli_obj_onlytrans_status()` or `bli_obj_conjtrans_status()`. --- ```c dim_t bli_obj_width( obj_t* obj ); ``` Return the number of columns (or _n_ dimension) of `obj`. This value is the _n_ dimension **before** taking into account the transposition property as indicated by `bli_obj_onlytrans_status()` or `bli_obj_conjtrans_status()`. --- ```c dim_t bli_obj_length_after_trans( obj_t* obj ); ``` Return the number of rows (or _m_ dimension) of `obj` after taking into account the transposition property as indicated by `bli_obj_onlytrans_status()` or `bli_obj_conjtrans_status()`. --- ```c dim_t bli_obj_width_after_trans( obj_t* obj ); ``` Return the number of columns (or _n_ dimension) of `obj` after taking into account the transposition property as indicated by `bli_obj_onlytrans_status()` or `bli_obj_conjtrans_status()`. --- ```c doff_t bli_obj_diag_offset( obj_t* obj ); ``` Return the diagonal offset of `obj`. Note that the diagonal offset will be negative, `-i`, if the diagonal begins at element `(-i,0)` and positive `j` if the diagonal begins at element `(0,j)`. --- ```c inc_t bli_obj_row_stride( obj_t* obj ); ``` Return the row stride property of `obj`. When storing by columns, the row stride is 1. When storing by rows, the row stride is also sometimes called the _leading dimension_. --- ```c inc_t bli_obj_col_stride( obj_t* obj ); ``` Return the column stride property of `obj`. When storing by rows, the column stride is 1. When storing by columns, the column stride is also sometimes called the _leading dimension_. --- ```c dim_t bli_obj_vector_dim( obj_t* obj ); ``` Return the number of elements in a vector object `obj`. This function assumes that at least one dimension of `obj` is unit, and that it therefore represents a vector. --- ```c inc_t bli_obj_vector_inc( obj_t* obj ); ``` Return the storage increment of a vector object `obj`. This function assumes that at least one dimension of `obj` is unit, and that it therefore represents a vector. --- ```c void* bli_obj_buffer( obj_t* obj ); ``` Return the address to the data buffer associated with object `obj`. **Note**: The address returned by this buffer will not take into account any subpartitioning. However, this will not be a problem for most casual users. --- ```c siz_t bli_obj_elem_size( obj_t* obj ); ``` Return the size, in bytes, of the storage datatype as indicated by `bli_obj_dt()`. --- ```c void bli_obj_alias_to( obj_t* a, obj_t* b ); ``` Initialize `b` to be a shallow copy, or alias, of `a`. For most people's purposes, this is equivalent to ``` b = a; ``` However, there is at least one field (one that only developers should be concerned with) that is not copied. --- ```c void bli_obj_real_part( obj_t* c, obj_t* r ); ``` Initialize `r` to be a modified shallow copy of `c` that refers only to the real part of `c`. --- ```c void bli_obj_imag_part( obj_t* c, obj_t* i ); ``` Initialize `i` to be a modified shallow copy of `c` that refers only to the imaginary part of `c`. --- ```c void bli_obj_induce_trans( obj_t* obj ); ``` Modify the properties of `obj` to induce a logical transposition. This function operations without regard to whether the transposition property is already set. Therefore, depending on the circumstance, the caller may or may not wish to clear the transposition property after calling this function. (If needed, the user may call `bli_obj_toggle_trans( obj )` to toggle the transposition status.) # Computational function reference Notes for interpreting function descriptions: * `conj?(X)` and `trans?(X)` should be interpreted as predicates that capture the operand `X` with that object's `conj_t` or `trans_t` property applied. For example: * `conj?(x)` refers to a vector `x` that is either conjugated or used as given. * `trans?(A)` refers to a matrix `A` that is either transposed, conjugated _and_ transposed, conjugated only, or used as given. * Any operand marked with `conj()` is unconditionally conjugated. * Any operand marked with `^T` is unconditionally transposed. Similarly, any operand that is marked with `^H` is unconditionally conjugate-transposed. * All occurrences of `alpha`, `beta`, and `rho` parameters are scalars. * In general, unless otherwise noted, all object parameters must be stored using the same `num_t` datatype. In a few cases, one of the object parameters must be stored in the real projection of one of the other objects' types. (The real projection of a `num_t` datatype is the equivalent datatype in the real domain. So `BLIS_DOUBLE` is the real projection of `BLIS_DCOMPLEX`. `BLIS_DOUBLE` is also the real projection of itself.) * Many object API entries list the object properties that are honored/observed by the operation. For example, for `bli_gemv()`, the observed object properties are `trans?(A)` and `conj?(x)`. The former means that matrix `A` may be (optionally) marked for conjugation and/or tranaposition while the latter means that vector `x` may be (optionally) marked for conjugation. A function may also list `diagoff(A)` as an observe property, which means that it will accept general diagonal offsets. Similarly, `diag(A)` refers to recognizing the unit/non-unit structure of the diagonal and and `uplo(A)` refers to reading/updating only the stored triangle/trapezoid/region of `A`. --- ## Operation index * **[Level-1v](BLISObjectAPI.md#level-1v-operations)**: Operations on vectors: * [addv](BLISObjectAPI.md#addv), [amaxv](BLISObjectAPI.md#amaxv), [axpyv](BLISObjectAPI.md#axpyv), [axpbyv](BLISObjectAPI.md#axpbyv), [copyv](BLISObjectAPI.md#copyv), [dotv](BLISObjectAPI.md#dotv), [dotxv](BLISObjectAPI.md#dotxv), [invertv](BLISObjectAPI.md#invertv), [scal2v](BLISObjectAPI.md#scal2v), [scalv](BLISObjectAPI.md#scalv), [setv](BLISObjectAPI.md#setv), [setrv](BLISObjectAPI.md#setrv), [setiv](BLISObjectAPI.md#setiv), [subv](BLISObjectAPI.md#subv), [swapv](BLISObjectAPI.md#swapv), [xpbyv](BLISObjectAPI.md#xpbyv) * **[Level-1d](BLISObjectAPI.md#level-1d-operations)**: Element-wise operations on matrix diagonals: * [addd](BLISObjectAPI.md#addd), [axpyd](BLISObjectAPI.md#axpyd), [copyd](BLISObjectAPI.md#copyd), [invertd](BLISObjectAPI.md#invertd), [scald](BLISObjectAPI.md#scald), [scal2d](BLISObjectAPI.md#scal2d), [setd](BLISObjectAPI.md#setd), [setid](BLISObjectAPI.md#setid), [shiftd](BLISObjectAPI.md#shiftd), [subd](BLISObjectAPI.md#subd), [xpbyd](BLISObjectAPI.md#xpbyd) * **[Level-1m](BLISObjectAPI.md#level-1m-operations)**: Element-wise operations on matrices: * [addm](BLISObjectAPI.md#addm), [axpym](BLISObjectAPI.md#axpym), [copym](BLISObjectAPI.md#copym), [scalm](BLISObjectAPI.md#scalm), [scal2m](BLISObjectAPI.md#scal2m), [setm](BLISObjectAPI.md#setm), [setrm](BLISObjectAPI.md#setrm), [setim](BLISObjectAPI.md#setim), [subm](BLISObjectAPI.md#subm) * **[Level-1f](BLISObjectAPI.md#level-1f-operations)**: Fused operations on multiple vectors: * [axpy2v](BLISObjectAPI.md#axpy2v), [dotaxpyv](BLISObjectAPI.md#dotaxpyv), [axpyf](BLISObjectAPI.md#axpyf), [dotxf](BLISObjectAPI.md#dotxf), [dotxaxpyf](BLISObjectAPI.md#dotxaxpyf) * **[Level-2](BLISObjectAPI.md#level-2-operations)**: Operations with one matrix and (at least) one vector operand: * [gemv](BLISObjectAPI.md#gemv), [ger](BLISObjectAPI.md#ger), [hemv](BLISObjectAPI.md#hemv), [her](BLISObjectAPI.md#her), [her2](BLISObjectAPI.md#her2), [symv](BLISObjectAPI.md#symv), [syr](BLISObjectAPI.md#syr), [syr2](BLISObjectAPI.md#syr2), [trmv](BLISObjectAPI.md#trmv), [trsv](BLISObjectAPI.md#trsv) * **[Level-3](BLISObjectAPI.md#level-3-operations)**: Operations with matrices that are multiplication-like: * [gemm](BLISObjectAPI.md#gemm), [hemm](BLISObjectAPI.md#hemm), [herk](BLISObjectAPI.md#herk), [her2k](BLISObjectAPI.md#her2k), [symm](BLISObjectAPI.md#symm), [syrk](BLISObjectAPI.md#syrk), [syr2k](BLISObjectAPI.md#syr2k), [trmm](BLISObjectAPI.md#trmm), [trmm3](BLISObjectAPI.md#trmm3), [trsm](BLISObjectAPI.md#trsm) * **[Utility](BLISObjectAPI.md#Utility-operations)**: Miscellaneous operations on matrices and vectors: * [asumv](BLISObjectAPI.md#asumv), [norm1v](BLISObjectAPI.md#norm1v), [normfv](BLISObjectAPI.md#normfv), [normiv](BLISObjectAPI.md#normiv), [norm1m](BLISObjectAPI.md#norm1m), [normfm](BLISObjectAPI.md#normfm), [normim](BLISObjectAPI.md#normim), [mkherm](BLISObjectAPI.md#mkherm), [mksymm](BLISObjectAPI.md#mksymm), [mktrim](BLISObjectAPI.md#mktrim), [fprintv](BLISObjectAPI.md#fprintv), [fprintm](BLISObjectAPI.md#fprintm),[printv](BLISObjectAPI.md#printv), [printm](BLISObjectAPI.md#printm), [randv](BLISObjectAPI.md#randv), [randm](BLISObjectAPI.md#randm), [sumsqv](BLISObjectAPI.md#sumsqv), [getijm](BLISObjectAPI.md#getijm), [setijm](BLISObjectAPI.md#setijm) --- ## Level-1v operations Level-1v operations perform various level-1 BLAS-like operations on vectors (hence the _v_). **Note**: Most level-1v operations have a corresponding level-1v kernel through which it is primarily implemented. --- #### addv ```c void bli_addv ( obj_t* x, obj_t* y, ); ``` Perform ``` y := y + conj?(x) ``` where `x` and `y` are vectors of length _n_. Observed object properties: `conj?(x)`. --- #### amaxv ```c void bli_amaxv ( obj_t* x, obj_t* index ); ``` Given a vector of length _n_, return the zero-based index of the element of vector `x` that contains the largest absolute value (or, in the complex domain, the largest complex modulus). The object `index` must be created of type `BLIS_INT`. If `NaN` is encountered, it is treated as if it were a valid value that was smaller than any other value in the vector. If more than one element contains the same maximum value, the index of the latter element is returned via `index`. Observed object properties: none. **Note:** This function attempts to mimic the algorithm for finding the element with the maximum absolute value in the netlib BLAS routines `i?amax()`. --- #### axpyv ```c void bli_axpyv ( obj_t* alpha, obj_t* x, obj_t* y ); ``` Perform ``` y := y + conj?(alpha) * conj?(x) ``` where `x` and `y` are vectors of length _n_, and `alpha` is a scalar. Observed object properties: `conj?(alpha)`, `conj?(x)`. --- #### axpbyv ``` void bli_axpbyv ( obj_t* alpha, obj_t* x, obj_t* beta, obj_t* y ) ``` Perform ``` y := conj?(beta) * y + conj?(alpha) * conj?(x) ``` where `x` and `y` are vectors of length _n_, and `alpha` and `beta` are scalars. Observed object properties: `conj?(alpha)`, `conj?(x)`. --- #### copyv ```c void bli_copyv ( obj_t* x, obj_t* y ); ``` Perform ``` y := conj?(x) ``` where `x` and `y` are vectors of length _n_. --- #### dotv ```c void bli_dotv ( obj_t* x, obj_t* y, obj_t* rho ); ``` Perform ``` rho := conj?(x)^T * conj?(y) ``` where `x` and `y` are vectors of length _n_, and `rho` is a scalar. --- #### dotxv ```c void bli_dotxv ( obj_t* alpha, obj_t* x, obj_t* y, obj_t* beta, obj_t* rho ); ``` Perform ``` rho := conj?(beta) * rho + conj?(alpha) * conj?(x)^T * conj?(y) ``` where `x` and `y` are vectors of length _n_, and `alpha`, `beta`, and `rho` are scalars. --- #### invertv ```c void bli_invertv ( obj_t* x ); ``` Invert all elements of an _n_-length vector `x`. --- #### scalv ```c void bli_scalv ( obj_t* alpha, obj_t* x ); ``` Perform ``` x := conj?(alpha) * x ``` where `x` is a vector of length _n_, and `alpha` is a scalar. Observed object properties: `conj?(alpha)`. --- #### scal2v ```c void bli_scal2v ( obj_t* alpha, obj_t* x, obj_t* y ); ``` Perform ``` y := conj?(alpha) * conj?(x) ``` where `x` and `y` are vectors of length _n_, and `alpha` is a scalar. Observed object properties: `conj?(alpha)`, `conj?(x)`. --- #### setv ```c void bli_setv ( obj_t* alpha, obj_t* x ); ``` Perform ``` x := conj?(alpha) ``` That is, set all elements of an _n_-length vector `x` to scalar `conj?(alpha)`. Observed object properties: `conj?(alpha)`. --- #### setrv ```c void bli_setrv ( obj_t* alpha, obj_t* x ); ``` Perform ``` real(x) := real(alpha) ``` That is, given an _n_-length vector `x`, set all elements' real components to the real component of scalar `alpha`. (If `alpha` is complex, the imaginary component is ignored.) If `x` is real, this operation is equivalent to performing `setv` on `x` with the real component of scalar `alpha`. **Note**: This operation is provided for convenience as an object wrapper to `setv`, and thus it has no analogue in the [BLIS typed API](BLISTypedAPI). --- #### setiv ```c void bli_setiv ( obj_t* alpha, obj_t* x ); ``` Perform ``` imag(x) := real(alpha) ``` That is, given an _n_-length vector `x`, set all elements' imaginary components to the real component of scalar `alpha`. (If `alpha` is complex, the imaginary component is ignored.) If `x` is real, this operation is equivalent to a no-op. **Note**: This operation is provided for convenience as an object wrapper to `setv`, and thus it has no analogue in the [BLIS typed API](BLISTypedAPI). --- #### subv ```c void bli_subv ( obj_t* x, obj_t* y ); ``` Perform ``` y := y - conj?(x) ``` where `x` and `y` are vectors of length _n_. Observed object properties: `conj?(x)`. --- #### swapv ```c void bli_swapv ( obj_t* x, obj_t* y ); ``` Swap corresponding elements of two _n_-length vectors `x` and `y`. --- #### xpbyv ``` void bli_xpbyv ( obj_t* x, obj_t* beta, obj_t* y ) ``` Perform ``` y := conj?(beta) * y + conj?(x) ``` where `x` and `y` are vectors of length _n_, and `beta` is a scalar. Observed object properties: `conj?(beta)`, `conj?(x)`. --- ## Level-1d operations Level-1d operations perform various level-1 BLAS-like operations on matrix diagonals (hence the _d_). These operations are similar to their level-1m counterparts, except they only read and update matrix diagonals and therefore ignore the `uplo` property of their applicable input operands. Please see the descriptions for the corresponding level-1m operation for a description of the arguments. --- #### addd ```c void bli_addd ( obj_t* a, obj_t* b ); ``` Observed object properties: `diagoff(A)`, `diag(A)`, `trans?(A)`. --- #### axpyd ```c void bli_axpyd ( obj_t* alpha, obj_t* a, obj_t* b ); ``` Observed object properties: `conj?(alpha)`, `diagoff(A)`, `diag(A)`, `trans?(A)`. --- #### copyd ```c void bli_copyd ( obj_t* a, obj_t* b ); ``` Observed object properties: `diagoff(A)`, `diag(A)`, `trans?(A)`. --- #### invertd ```c void bli_invertd ( obj_t* a ); ``` Observed object properties: `diagoff(A)`. --- #### scald ```c void bli_scald ( obj_t* alpha, obj_t* a ); ``` Observed object properties: `conj?(alpha)`, `diagoff(A)`. --- #### scal2d ```c void bli_scal2d ( obj_t* alpha, obj_t* a, obj_t* b ); ``` Observed object properties: `conj?(alpha)`, `diagoff(A)`, `diag(A)`, `trans?(A)`. --- #### setd ```c void bli_setd ( obj_t* alpha, obj_t* a ); ``` Observed object properties: `conj?(alpha)`, `diagoff(A)`, `diag(A)`. --- #### setid ```c void bli_setid ( obj_t* alpha, obj_t* a ); ``` Set the imaginary components of every element along the diagonal of `a` to a scalar `alpha`. Note that the datatype of `alpha` must be the real projection of the datatype of `a`. Observed object properties: `diagoff(A)`. --- #### shiftd ```c void bli_shiftd ( obj_t* alpha, obj_t* a ); ``` Add a constant value `alpha` to every element along the diagonal of `a`. Observed object properties: `diagoff(A)`. --- #### subd ```c void bli_subd ( obj_t* a, obj_t* b ); ``` Observed object properties: `diagoff(A)`, `diag(A)`, `trans?(A)`. --- #### xpbyd ```c void bli_xpbyd ( obj_t* a, obj_t* beta, obj_t* b ); ``` Observed object properties: `conj?(beta)`, `diagoff(A)`, `diag(A)`, `trans?(A)`. --- ## Level-1m operations Level-1m operations perform various level-1 BLAS-like operations on matrices (hence the _m_). --- #### addm ```c void bli_addm ( obj_t* a, obj_t* b ); ``` Perform ``` B := B + trans?(A) ``` where `B` is an _m x n_ matrix, `A` is stored as a dense matrix, or lower- or upper-triangular/trapezoidal matrix with arbitrary diagonal offset and unit or non-unit diagonal. If `uplo(A)` indicates lower or upper storage, only that part of matrix `A` will be referenced and used to update `B`. Observed object properties: `diagoff(A)`, `diag(A)`, `uplo(A)`, `trans?(A)`. --- #### axpym ```c void bli_axpym ( obj_t* alpha, obj_t* a, obj_t* b ); ``` Perform ``` B := B + conj?(alpha) * trans?(A) ``` where `B` is an _m x n_ matrix, `A` is stored as a dense matrix, or lower- or upper-triangular/trapezoidal matrix with arbitrary diagonal offset and unit or non-unit diagonal. If `uplo(A)` indicates lower or upper storage, only that part of matrix `A` will be referenced and used to update `B`. Observed object properties: `conj?(alpha)`, `diagoff(A)`, `diag(A)`, `uplo(A)`, `trans?(A)`. --- #### copym ```c void bli_copym ( obj_t* a, obj_t* b ); ``` Perform ``` B := trans?(A) ``` where `B` is an _m x n_ matrix, `A` is stored as a dense matrix, or lower- or upper-triangular/trapezoidal matrix with arbitrary diagonal offset and unit or non-unit diagonal. If `uplo(A)` indicates lower or upper storage, only that part of matrix `A` will be referenced and used to update `B`. Observed object properties: `diagoff(A)`, `diag(A)`, `uplo(A)`, `trans?(A)`. --- #### scalm ```c void bli_scalm ( obj_t* alpha, obj_t* a ); ``` Perform ``` A := conj?(alpha) * A ``` where `A` is an _m x n_ matrix stored as a dense matrix, or lower- or upper-triangular/trapezoidal matrix with arbitrary diagonal offset. If `uplo(A)` indicates lower or upper storage, only that part of matrix `A` will be updated. Observed object properties: `conj?(alpha)`, `diagoff(A)`, `uplo(A)`. --- #### scal2m ```c void bli_scal2m ( obj_t* a, obj_t* b ); ``` Perform ``` B := conj?(alpha) * trans?(A) ``` where `B` is an _m x n_ matrix, `A` is stored as a dense matrix, or lower- or upper-triangular/trapezoidal matrix with arbitrary diagonal offset and unit or non-unit diagonal. If `uplo(A)` indicates lower or upper storage, only that part of matrix `A` will be referenced and used to update `B`. Observed object properties: `conj?(alpha)`, `diagoff(A)`, `diag(A)`, `uplo(A)`, `trans?(A)`. --- #### setm ```c void bli_setm ( obj_t* alpha, obj_t* a ); ``` Perform ``` A := conj?(alpha) ``` That is, set all elements of `A` to scalar `conj?(alpha)`, where `A` is an _m x n_ matrix stored as a dense matrix, or lower- or upper-triangular/trapezoidal matrix with arbitrary diagonal offset. If `uplo(A)` indicates lower or upper storage, only that part of matrix `A` will be updated. Observed object properties: `conj?(alpha)`, `diagoff(A)`, `diag(A)`, `uplo(A)`. --- #### setrm ```c void bli_setrm ( obj_t* alpha, obj_t* a ); ``` Perform ``` real(A) := real(alpha) ``` That is, given an _m x n_ matrix `A`, set all elements' real components to the real component of scalar `alpha`. (If `alpha` is complex, the imaginary component is ignored.) If `A` is real, this operation is equivalent to performing `setm` on `A` with the real component of scalar `alpha`. **Note**: This operation is provided for convenience as an object wrapper to `setm`, and thus it has no analogue in the [BLIS typed API](BLISTypedAPI). Observed object properties: `diagoff(A)`, `diag(A)`, `uplo(A)`. --- #### setim ```c void bli_setim ( obj_t* alpha, obj_t* a ); ``` Perform ``` imag(A) := real(alpha) ``` That is, given an _m x n_ matrix `A`, set all elements' imaginary components to the real component of scalar `alpha`. (If `alpha` is complex, the imaginary component is ignored.) If `A` is real, this operation is equivalent to a no-op. **Note**: This operation is provided for convenience as an object wrapper to `setm`, and thus it has no analogue in the [BLIS typed API](BLISTypedAPI). Observed object properties: `diagoff(A)`, `diag(A)`, `uplo(A)`. --- #### subm ```c void bli_subm ( obj_t* a, obj_t* b ); ``` Perform ``` B := B - trans?(A) ``` where `B` is an _m x n_ matrix, `A` is stored as a dense matrix, or lower- or upper-triangular/trapezoidal matrix with arbitrary diagonal offset and unit or non-unit diagonal. If `uplo(A)` indicates lower or upper storage, only that part of matrix `A` will be referenced and used to update `B`. Observed object properties: `diagoff(A)`, `diag(A)`, `uplo(A)`, `trans?(A)`. --- ## Level-1f operations Level-1f operations implement various fused combinations of level-1 operations (hence the _f_). **Note**: Each level-1f operation has a corresponding level-1f kernel through which it is primarily implemented. Level-1f kernels are employed when optimizing level-2 operations. --- #### axpy2v ```c void bli_axpy2v ( obj_t* alphax, obj_t* alphay, obj_t* x, obj_t* y, obj_t* z ); ``` Perform ``` y := y + conj?(alphax) * conj?(x) + conj?(alphay) * conj?(y) ``` where `x`, `y`, and `z` are vectors of length _m_. The kernel, if optimized, is implemented as a fused pair of calls to [axpyv](BLISObjectAPI.md#axpyv). Observed object properties: `conj?(alphax)`, `conj?(x)`, `conj?(alphay)`, `conj?(y)`. --- #### dotaxpyv ```c void bli_dotaxpyv ( obj_t* alpha, obj_t* x, obj_t* y, obj_t* rho, obj_t* z ); ``` Perform ``` rho := conj?(x)^T * conj?(y) y := y + conj?(alpha) * conj?(x) ``` where `x`, `y`, and `z` are vectors of length _m_ and `alpha` and `rho` are scalars. The kernel, if optimized, is implemented as a fusion of calls to [dotv](BLISObjectAPI.md#dotv) and [axpyv](BLISObjectAPI.md#axpyv). Observed object properties: `conj?(x)`, `conj?(y)`, `conj?(alpha)`. --- #### axpyf ```c void bli_axpyf ( obj_t* alpha, obj_t* a, obj_t* x, obj_t* y ); ``` Perform ``` y := y + alpha * conja(A) * conjx(x) ``` where `A` is an _m x b_ matrix, and `x` and `y` are vectors. The kernel, if optimized, is implemented as a fused series of calls to [axpyv](BLISObjectAPI.md#axpyv) where _b_ is less than or equal to an implementation-dependent fusing factor specific to `axpyf`. Observed object properties: `conj?(alpha)`, `conj?(A)`, `conj?(x)`. --- #### dotxf ```c void bli_dotxf ( obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y ); ``` Perform ``` y := conj?(beta) * y + conj?(alpha) * conj?(A)^T * conj?(x) ``` where `A` is an _m x b_ matrix, and `x` and `y` are vectors. The kernel, if optimized, is implemented as a fused series of calls to [dotxv](BLISObjectAPI.md#dotxv) where _b_ is less than or equal to an implementation-dependent fusing factor specific to `dotxf`. Observed object properties: `conj?(alpha)`, `conj?(beta)`, `conj?(A)`, `conj?(x)`. --- #### dotxaxpyf ```c void bli_dotxaxpyf ( obj_t* alpha, obj_t* a, obj_t* w, obj_t* x, obj_t* beta, obj_t* y, obj_t* z ); ``` Perform ``` y := conj?(beta) * y + conj?(alpha) * conj?(A)^T * conj?(w) z := z + conj?(alpha) * conj?(A) * conj?(x) ``` where `A` is an _m x b_ matrix, `w` and `z` are vectors of length _m_, `x` and `y` are vectors of length `b`, and `alpha` and `beta` are scalars. The kernel, if optimized, is implemented as a fusion of calls to [dotxf](BLISObjectAPI.md#dotxf) and [axpyf](BLISObjectAPI.md#axpyf). Observed object properties: `conj?(alpha)`, `conj?(beta)`, `conj?(A)`, `conj?(w)`, `conj?(x)`. ## Level-2 operations Level-2 operations perform various level-2 BLAS-like operations. --- #### gemv ```c void bli_gemv ( obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y ); ``` Perform ``` y := conj?(beta) * y + conj?(alpha) * trans?(A) * conj?(x) ``` where `trans?(A)` is an _m x n_ matrix, and `x` and `y` are vectors. Observed object properties: `conj?(alpha)`, `conj?(beta)`, `trans?(A)`, `conj?(x)`. --- #### ger ```c void bli_ger ( obj_t* alpha, obj_t* x, obj_t* y, obj_t* a ); ``` Perform ``` A := A + conj?(alpha) * conj?(x) * conj?(y)^T ``` where `A` is an _m x n_ matrix, and `x` and `y` are vectors of length _m_ and _n_, respectively. Observed object properties: `conj?(alpha)`, `conj?(x)`, `conj?(y)`. --- #### hemv ```c void bli_hemv ( obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y ); ``` Perform ``` y := conj?(beta) * y + conj?(alpha) * conj?(A) * conj?(x) ``` where `A` is an _m x m_ Hermitian matrix stored in the lower or upper triangle as specified by `uplo(A)`, and `x` and `y` are vectors of length _m_. Observed object properties: `conj?(alpha)`, `conj?(beta)`, `conj?(A)`, `uplo(A)`, `conj?(x)`. --- #### her ```c void bli_her ( obj_t* alpha, obj_t* x, obj_t* a ); ``` Perform ``` A := A + conj?(alpha) * conj?(x) * conj?(x)^H ``` where `A` is an _m x m_ Hermitian matrix stored in the lower or upper triangle as specified by `uplo(A)`, and `x` is a vector of length _m_. Observed object properties: `conj?(alpha)`, `uplo(A)`, `conj?(x)`. **Note:** The floating-point (`num_t`) type of `alpha` is always the real projection of the floating-point types of `x` and `A`. --- #### her2 ```c void bli_her2 ( obj_t* alpha, obj_t* x, obj_t* y, obj_t* a ); ``` Perform ``` A := A + alpha * conj?(x) * conj?(y)^H + conj(alpha) * conj?(y) * conj?(x)^H ``` where `A` is an _m x m_ Hermitian matrix stored in the lower or upper triangle as specified by `uplo(A)`, and `x` and `y` are vectors of length _m_. Observed object properties: `uplo(A)`, `conj?(x)`, `conj?(y)`. --- #### symv ```c void bli_symv ( obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y ); ``` Perform ``` y := conj?(beta) * y + conj?(alpha) * conj?(A) * conj?(x) ``` where `A` is an _m x m_ symmetric matrix stored in the lower or upper triangle as specified by `uplo(A)`, and `x` and `y` are vectors of length _m_. Observed object properties: `conj?(alpha)`, `conj?(beta)`, `conj?(A)`, `uplo(A)`, `conj?(x)`. --- #### syr ```c void bli_syr ( obj_t* alpha, obj_t* x, obj_t* a ); ``` Perform ``` A := A + conj?(alpha) * conj?(x) * conj?(x)^T ``` where `A` is an _m x m_ symmetric matrix stored in the lower or upper triangle as specified by `uploa`, and `x` is a vector of length _m_. Observed object properties: `conj?(alpha)`, `conj?(x)`. --- #### syr2 ```c void bli_syr2 ( obj_t* alpha, obj_t* x, obj_t* y, obj_t* a ); ``` Perform ``` A := A + alpha * conj?(x) * conj?(y)^T + conj(alpha) * conj?(y) * conj?(x)^T ``` where `A` is an _m x m_ symmetric matrix stored in the lower or upper triangle as specified by `uplo(A)`, and `x` and `y` are vectors of length _m_. Observed object properties: `uplo(A)`, `conj?(x)`, `conj?(y)`. --- #### trmv ```c void bli_trmv ( obj_t* alpha, obj_t* a, obj_t* x ); ``` Perform ``` x := conj?(alpha) * transa(A) * x ``` where `A` is an _m x m_ triangular matrix stored in the lower or upper triangle as specified by `uplo(A)` with unit/non-unit nature specified by `diag(A)`, and `x` is a vector of length _m_. Observed object properties: `conj?(alpha)`, `uplo(A)`, `trans?(A)`, `diag(A)`. --- #### trsv ```c void bli_trsv ( obj_t* alpha, obj_t* a, obj_t* y ); ``` Solve the linear system ``` transa(A) * x = alpha * y ``` where `A` is an _m x m_ triangular matrix stored in the lower or upper triangle as specified by `uplo(A)` with unit/non-unit nature specified by `diag(A)`, and `x` and `y` are vectors of length _m_. The right-hand side vector operand `y` is overwritten with the solution vector `x`. Observed object properties: `conj?(alpha)`, `uplo(A)`, `trans?(A)`, `diag(A)`. --- ## Level-3 operations Level-3 operations perform various level-3 BLAS-like operations. **Note**: Each All level-3 operations are implemented through a handful of level-3 microkernels. Please see the [Kernels Guide](KernelsHowTo.md) for more details. --- #### gemm ```c void bli_gemm ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c ); ``` Perform ``` C := beta * C + alpha * trans?(A) * trans?(B) ``` where `C` is an _m x n_ matrix, `trans?(A)` is an _m x k_ matrix, and `trans?(B)` is a _k x n_ matrix. Observed object properties: `trans?(A)`, `trans?(B)`. --- #### hemm ```c void bli_hemm ( side_t sidea, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c ); ``` Perform ``` C := beta * C + alpha * conj?(A) * trans?(B) ``` if `sidea` is `BLIS_LEFT`, or ``` C := beta * C + alpha * trans?(B) * conj?(A) ``` if `sidea` is `BLIS_RIGHT`, where `C` and `B` are _m x n_ matrices and `A` is a Hermitian matrix stored in the lower or upper triangle as specified by `uplo(A)`. When `sidea` is `BLIS_LEFT`, `A` is _m x m_, and when `sidea` is `BLIS_RIGHT`, `A` is _n x n_. Observed object properties: `uplo(A)`, `conj?(A)`, `trans?(B)`. --- #### herk ```c void bli_herk ( obj_t* alpha, obj_t* a, obj_t* beta, obj_t* c ); ``` Perform ``` C := beta * C + alpha * trans?(A) * trans?(A)^H ``` where `C` is an _m x m_ Hermitian matrix stored in the lower or upper triangle as specified by `uplo(C)` and `trans?(A)` is an _m x k_ matrix. Observed object properties: `trans?(A)`, `uplo(C)`. **Note:** The floating-point (`num_t`) types of `alpha` and `beta` are always the real projection of the floating-point types of `A` and `C`. --- #### her2k ```c void bli_her2k ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c ); ``` Perform ``` C := beta * C + alpha * trans?(A) * trans?(B)^H + conj(alpha) * trans?(B) * trans?(A)^H ``` where `C` is an _m x m_ Hermitian matrix stored in the lower or upper triangle as specified by `uplo(C)` and `trans?(A)` and `trans?(B)` are _m x k_ matrices. Observed object properties: `trans?(A)`, `trans?(B)`, `uplo(C)`. **Note:** The floating-point (`num_t`) type of `beta` is always the real projection of the floating-point types of `A` and `C`. --- #### symm ```c void bli_symm ( side_t sidea, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c ); ``` Perform ``` C := beta * C + alpha * conj?(A) * trans?(B) ``` if `sidea` is `BLIS_LEFT`, or ``` C := beta * C + alpha * trans?(B) * conj?(A) ``` if `sidea` is `BLIS_RIGHT`, where `C` and `B` are _m x n_ matrices and `A` is a symmetric matrix stored in the lower or upper triangle as specified by `uplo(A)`. When `sidea` is `BLIS_LEFT`, `A` is _m x m_, and when `sidea` is `BLIS_RIGHT`, `A` is _n x n_. Observed object properties: `uplo(A)`, `conj?(A)`, `trans?(B)`. --- #### syrk ```c void bli_syrk ( obj_t* alpha, obj_t* a, obj_t* beta, obj_t* c ); ``` Perform ``` C := beta * C + alpha * trans?(A) * trans?(A)^T ``` where `C` is an _m x m_ symmetric matrix stored in the lower or upper triangle as specified by `uplo(A)` and `trans?(A)` is an _m x k_ matrix. Observed object properties: `trans?(A)`, `uplo(C)`. --- #### syr2k ```c void bli_syr2k ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c ); ``` Perform ``` C := beta * C + alpha * trans?(A) * trans?(B)^T + alpha * trans?(B) * trans?(A)^T ``` where `C` is an _m x m_ symmetric matrix stored in the lower or upper triangle as specified by `uplo(A)` and `trans?(A)` and `trans?(B)` are _m x k_ matrices. Observed object properties: `trans?(A)`, `trans?(B)`, `uplo(C)`. --- #### trmm ```c void bli_trmm ( side_t sidea, obj_t* alpha, obj_t* a, obj_t* b ); ``` Perform ``` B := alpha * transa(A) * B ``` if `sidea` is `BLIS_LEFT`, or ``` B := alpha * B * transa(A) ``` if `sidea` is `BLIS_RIGHT`, where `B` is an _m x n_ matrix and `A` is a triangular matrix stored in the lower or upper triangle as specified by `uplo(A)` with unit/non-unit nature specified by `diag(A)`. When `sidea` is `BLIS_LEFT`, `A` is _m x m_, and when `sidea` is `BLIS_RIGHT`, `A` is _n x n_. Observed object properties: `uplo(A)`, `trans?(A)`, `diag(A)`. --- #### trmm3 ```c void bli_trmm3 ( side_t sidea, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c ); ``` Perform ``` C := beta * C + alpha * trans?(A) * trans?(B) ``` if `sidea` is `BLIS_LEFT`, or ``` C := beta * C + alpha * trans?(B) * trans?(A) ``` if `sidea` is `BLIS_RIGHT`, where `C` and `trans?(B)` are _m x n_ matrices and `A` is a triangular matrix stored in the lower or upper triangle as specified by `uplo(A)` with unit/non-unit nature specified by `diag(A)`. When `sidea` is `BLIS_LEFT`, `A` is _m x m_, and when `sidea` is `BLIS_RIGHT`, `A` is _n x n_. Observed object properties: `uplo(A)`, `trans?(A)`, `diag(A)`, `trans?(B)`. --- #### trsm ```c void bli_trsm ( side_t sidea, obj_t* alpha, obj_t* a, obj_t* b ); ``` Solve the linear system with multiple right-hand sides ``` transa(A) * X = alpha * B ``` if `sidea` is `BLIS_LEFT`, or ``` X * transa(A) = alpha * B ``` if `sidea` is `BLIS_RIGHT`, where `X` and `B` are an _m x n_ matrices and `A` is a triangular matrix stored in the lower or upper triangle as specified by `uplo(A)` with unit/non-unit nature specified by `diag(A)`. When `sidea` is `BLIS_LEFT`, `A` is _m x m_, and when `sidea` is `BLIS_RIGHT`, `A` is _n x n_. The right-hand side matrix operand `B` is overwritten with the solution matrix `X`. Observed object properties: `uplo(A)`, `trans?(A)`, `diag(A)`. --- ## Utility operations --- #### asumv ```c void bli_asumv ( obj_t* x, obj_t* asum ); ``` Compute the sum of the absolute values of the fundamental elements of vector `x`. The resulting sum is stored to `asum`. Observed object properties: none. **Note:** The floating-point type of `asum` is always the real projection of the floating-point type of `x`. **Note:** This function attempts to mimic the algorithm for computing the absolute vector sum in the netlib BLAS routines `*asum()`. --- #### norm1m #### normfm #### normim ```c void bli_norm[1fi]m ( obj_t* a, obj_t* norm ); ``` Compute the one-norm (`bli_norm1m()`), Frobenius norm (`bli_normfm()`), or infinity norm (`bli_normim()`) of the elements in an _m x n_ matrix `A`. If `uplo(A)` is `BLIS_LOWER` or `BLIS_UPPER` then `A` is assumed to be lower or upper triangular, respectively, with the main diagonal located at offset `diagoff(A)`. The resulting norm is stored to `norm`. Observed object properties: `diagoff(A)`, `diag(A)`, `uplo(A)`. **Note:** The floating-point (`num_t`) type of `norm` is always the real projection of the floating-point type of `x`. --- #### norm1v #### normfv #### normiv ```c void bli_norm[1fi]v ( obj_t* x, obj_t* norm ); ``` Compute the one-norm (`bli_norm1v()`), Frobenius norm (`bli_normfv()`), or infinity norm (`bli_normiv()`) of the elements in a vector `x` of length _n_. The resulting norm is stored to `norm`. Observed object properties: `diagoff(A)`, `diag(A)`, `uplo(A)`. **Note:** The floating-point (`num_t`) type of `norm` is always the real projection of the floating-point type of `x`. --- #### mkherm ```c void bli_mkherm ( obj_t* a ); ``` Make an _m x m_ matrix `A` explicitly Hermitian by copying the conjugate of the triangle specified by `uplo(A)` to the opposite triangle. Imaginary components of diagonal elements are explicitly set to zero. It is assumed that the diagonal offset of `A` is zero. Observed object properties: `uplo(A)`. --- #### mksymm ```c void bli_mksymm ( obj_t* a ); ``` Make an _m x m_ matrix `A` explicitly symmetric by copying the triangle specified by `uplo(A)` to the opposite triangle. It is assumed that the diagonal offset of `A` is zero. Observed object properties: `uplo(A)`. --- #### mktrim ```c void bli_mktrim ( obj_t* a ); ``` Make an _m x m_ matrix `A` explicitly triangular by preserving the triangle specified by `uplo(A)` and zeroing the elements in the opposite triangle. It is assumed that the diagonal offset of `A` is zero. Observed object properties: `uplo(A)`. --- #### fprintv ```c void bli_fprintv ( FILE* file, char* s1, obj_t* x, char* format, char* s2 ); ``` Print a vector `x` of length _m_ to file stream `file`, where `file` is a file pointer returned by the standard C library function `fopen()`. The caller may also pass in a global file pointer such as `stdout` or `stderr`. The strings `s1` and `s2` are printed immediately before and after the output (respectively), and the format specifier `format` is used to format the individual elements. For valid format specifiers, please see documentation for the standard C library function `printf()`. **Note:** For complex datatypes, the format specifier is applied to both the real and imaginary components **individually**. Therefore, you should use format specifiers such as `"%5.2f"`, but **not** `"%5.2f + %5.2f"`. --- #### fprintm ```c void bli_fprintm ( FILE* file, char* s1, obj_t* a, char* format, char* s2 ); ``` Print an _m x n_ matrix `A` to file stream `file`, where `file` is a file pointer returned by the standard C library function `fopen()`. The caller may also pass in a global file pointer such as `stdout` or `stderr`. The strings `s1` and `s2` are printed immediately before and after the output (respectively), and the format specifier `format` is used to format the individual elements. For valid format specifiers, please see documentation for the standard C library function `printf()`. **Note:** For complex datatypes, the format specifier is applied to both the real and imaginary components **individually**. Therefore, you should use format specifiers such as `"%5.2f"`, but **not** `"%5.2f + %5.2f"`. --- #### printv ```c void bli_printv ( char* s1, obj_t* x, char* format, char* s2 ); ``` Print a vector `x` of length _m_ to standard output. This function call is equivalent to calling `bli_fprintv()` with `stdout` as the file pointer. --- #### printm ```c void bli_printm ( char* s1, obj_t* a, char* format, char* s2 ); ``` Print an _m x n_ matrix `a` to standard output. This function call is equivalent to calling `bli_fprintm()` with `stdout` as the file pointer. --- #### randv ```c void bli_randv ( obj_t* x ); ``` Set the elements of a vector `x` of length _n_ to random values on the interval `[-1,1)`. **Note:** For complex datatypes, the real and imaginary components of each element are randomized individually and independently of one another. --- #### randm ```c void bli_randm ( obj_t* a ); ``` Set the elements of an _m x n_ matrix `A` to random values on the interval `[-1,1)`. Off-diagonal elements (in the triangle specified by `uplo(A)`) are scaled by `1.0/max(m,n)`. Observed object properties: `diagoff(A)`, `uplo(A)`. **Note:** For complex datatypes, the real and imaginary components of each off-diagonal element are randomized individually and independently of one another. **Note:** If `uplo(A)` is `BLIS_LOWER` or `BLIS_UPPER` and you plan to use this matrix to test `trsv` or `trsm`, additional scaling of the diagonal is recommended to ensure that the matrix is invertible. In this case, try using the [addd](BLISObjectAPI.md#addd) operation to increase the magnitude to the diagonal elements. --- #### sumsqv ```c void bli_sumsqv ( obj_t* x, obj_t* scale, obj_t* sumsq ); ``` Compute the sum of the squares of the elements in a vector `x` of length _n_. The result is computed in scaled form, and in such a way that it may be used repeatedly to accumulate the sum of the squares of several vectors. The function computes scale\_new and sumsq\_new such that ``` scale_new^2 * sumsq_new = x[0]^2 + x[1]^2 + ... x[m-1]^2 + scale_old^2 * sumsq_old ``` where, on entry, `scale` and `sumsq` contain `scale_old` and `sumsq_old`, respectively, and on exit, `scale` and `sumsq` contain `scale_new` and `sumsq_new`, respectively. **Note:** This function attempts to mimic the algorithm for computing the Frobenius norm in the netlib LAPACK routine `?lassq()`. **Note:** The floating-point (`num_t`) types of `scale` and `sumsq` are always the real projection of the floating-point type of `x`. --- #### getijm ```c err_t bli_getijm ( dim_t i, dim_t j, obj_t* b, double* ar, double* ai ) ``` Copy the real and imaginary values at the (`i`,`j`) element of object `b` to `ar` and `ai`. f elements of `b` are stored as real types, then only `ar` is overwritten and `ai` is left unchanged. (If `b` contains elements stored in single precision, the corresponding elements are typecast/promoted during the copy.) If either the row offset `i` is beyond the _m_ dimension of `b`, or column offset `j` is beyond the _n_ dimension of `b`, the function does not perform any copy and returns `BLIS_FAILURE`. Similarly, if `b` is a global scalar constant such as `BLIS_ONE`, `BLIS_FAILURE` is returned. #### setijm ```c err_t bli_setijm ( double ar, double ai, dim_t i, dim_t j, obj_t* b ); ``` Copy real and imaginary values `ar` and `ai` to the (`i`,`j`) element of object `b`. If elements of `b` are stored as real types, then only `ar` is copied and `ai` is ignored. (If `b` contains elements stored in single precision, the corresponding elements are typecast/demoted during the copy.) If either the row offset `i` is beyond the _m_ dimension of `b`, or column offset `j` is beyond the _n_ dimension of `b`, the function does not perform any copy and returns `BLIS_FAILURE`. Similarly, if `b` is a global scalar constant such as `BLIS_ONE`, `BLIS_FAILURE` is returned. # Query function reference BLIS allows applications to query information about how BLIS was configured. The `bli_info_` API provides several categories of query routines. Most values are returned as a `gint_t`, which is a signed integer. The size of this integer can be queried through a special routine that returns the size in a character string: ```c char* bli_info_get_int_type_size_str( void ); ``` **Note:** All of the `bli_info_` functions are **always** thread-safe, no matter how BLIS was configured. ## General library information The following routine returns the address the full BLIS version string: ```c char* bli_info_get_version_str( void ); ``` ## Specific configuration The following routine returns a unique ID of type `arch_t` that identifies the current current active configuration: ```c arch_t bli_arch_query_id( void ); ``` This is most useful when BLIS is configured with multiple configurations. (When linking to multi-configuration builds of BLIS, you don't know for sure which configuration will be used until runtime since the configuration-specific parameters are not loaded until after calling a hueristic to detect the hardware--usually based the `CPUID` instruction.) Once the configuration's ID is known, it can be used to query a string that contains the name of the configuration: ```c char* bli_arch_string( arch_t id ); ``` ## General configuration The following routines return various general-purpose constants that affect the entire framework. All of these settings default to sane values, which can then be overridden by the configuration in [bli\_config.h](ConfigurationHowTo#bli_configh). If they are absent from a particular configuration's `bli_config.h` header file, then the default value is used, as specified in [frame/include/bli_config_macro_defs.h](https://github.com/flame/blis/blob/master/frame/include/bli_config_macro_defs.h). ```c gint_t bli_info_get_int_type_size( void ); gint_t bli_info_get_num_fp_types( void ); gint_t bli_info_get_max_type_size( void ); gint_t bli_info_get_page_size( void ); gint_t bli_info_get_simd_num_registers( void ); gint_t bli_info_get_simd_size( void ); gint_t bli_info_get_simd_align_size( void ); gint_t bli_info_get_stack_buf_max_size( void ); gint_t bli_info_get_stack_buf_align_size( void ); gint_t bli_info_get_heap_addr_align_size( void ); gint_t bli_info_get_heap_stride_align_size( void ); gint_t bli_info_get_pool_addr_align_size( void ); gint_t bli_info_get_enable_stay_auto_init( void ); gint_t bli_info_get_enable_blas( void ); gint_t bli_info_get_blas_int_type_size( void ); ``` ## Kernel information ### Micro-kernel implementation type query The following routines allow the caller to obtain a string that identifies the implementation type of each microkernel that is currently active (ie: part of the current active configuration, as identified bi `bli_arch_query_id()`). ```c char* bli_info_get_gemm_ukr_impl_string( ind_t method, num_t dt ) char* bli_info_get_gemmtrsm_l_ukr_impl_string( ind_t method, num_t dt ) char* bli_info_get_gemmtrsm_u_ukr_impl_string( ind_t method, num_t dt ) char* bli_info_get_trsm_l_ukr_impl_string( ind_t method, num_t dt ) char* bli_info_get_trsm_u_ukr_impl_string( ind_t method, num_t dt ) ``` Possible implementation (ie: the `ind_t method` argument) types are: * `BLIS_3MH`: Implementation based on the 3m method applied at the highest level, outside the 5th loop around the microkernel. * `BLIS_3M1`: Implementation based on the 3m method applied within the 1st loop around the microkernel. * `BLIS_4MH`: Implementation based on the 4m method applied at the highest level, outside the 5th loop around the microkernel. * `BLIS_4M1B`: Implementation based on the 4m method applied within the 1st loop around the microkernel. Computation is ordered such that the 1st loop is fissured into two loops, the first of which multiplies the real part of the current micropanel of packed matrix B (against all real and imaginary parts of packed matrix A), and the second of which multiplies the imaginary part of the current micropanel of packed matrix B. * `BLIS_4M1A`: Implementation based on the 4m method applied within the 1st loop around the microkernel. Computation is ordered such that real and imaginary components of the current micropanels are completely used before proceeding to the next virtual microkernel invocation. * `BLIS_1M`: Implementation based on the 1m method. (This is the default induced method when real domain kernels are present but complex kernels are missing.) * `BLIS_NAT`: Implementation based on "native" execution (ie: NOT an induced method). **NOTE**: `BLIS_3M3` and `BLIS_3M2` have been deprecated from the `typedef enum` of `ind_t`, and `BLIS_4M1B` is also effectively no longer available, though the `typedef enum` value still exists. Possible microkernel types (ie: the return values for `bli_info_get_*_ukr_impl_string()`) are: * `BLIS_REFERENCE_UKERNEL` (`"refrnce"`): This value is returned when the queried microkernel is provided by the reference implementation. * `BLIS_VIRTUAL_UKERNEL` (`"virtual"`): This value is returned when the queried microkernel is driven by a the "virtual" microkernel provided by an induced method. This happens for any `method` value that is not `BLIS_NAT` (ie: native), but only applies to the complex domain. * `BLIS_OPTIMIZED_UKERNEL` (`"optimzd"`): This value is returned when the queried microkernel is provided by an implementation that is neither reference nor virtual, and thus we assume the kernel author would deem it to be "optimized". Such a microkernel may not be optimal in the literal sense of the word, but nonetheless is _intended_ to be optimized, at least relative to the reference microkernels. * `BLIS_NOTAPPLIC_UKERNEL` (`"notappl"`): This value is returned usually when performing a `gemmtrsm` or `trsm` microkernel type query for any `method` value that is not `BLIS_NAT` (ie: native). That is, induced methods cannot be (purely) used on `trsm`-based microkernels because these microkernels perform more a triangular inversion, which is not matrix multiplication. # Example code BLIS provides lots of example code in the [examples/oapi](https://github.com/flame/blis/tree/master/examples/oapi) directory of the BLIS source distribution. The example code in this directory is set up like a tutorial, and so we recommend starting from the beginning. Topics include creating and managing objects, printing vectors and matrices, setting and querying object properties, and calling a representative subset of the computational level-1v, -1m, -2, -3, and utility operations documented above. blis-0.6.1/docs/BLISTypedAPI.md000066400000000000000000001712471360743507500160400ustar00rootroot00000000000000# Contents * **[Contents](BLISTypedAPI.md#contents)** * **[Introduction](BLISTypedAPI.md#introduction)** * [BLIS types](BLISTypedAPI.md#blis-types) * [Integer-based types](BLISTypedAPI.md#integer-based-types) * [Floating-point types](BLISTypedAPI.md#floating-point-types) * [Enumerated parameter types](BLISTypedAPI.md#enumerated-parameter-types) * [Basic vs expert interfaces](BLISTypedAPI.md#basic-vs-expert-interfaces) * [Context type](BLISTypedAPI.md#context-type) * [Runtime type](BLISTypedAPI.md#runtime-type) * [BLIS header file](BLISTypedAPI.md#blis-header-file) * [Initialization and cleanup](BLISTypedAPI.md#initialization-and-cleanup) * **[Computational function reference](BLISTypedAPI.md#computational-function-reference)** * [Operation index](BLISTypedAPI.md#operation-index) * [Level-1v operations](BLISTypedAPI.md#level-1v-operations) * [Level-1d operations](BLISTypedAPI.md#level-1d-operations) * [Level-1m operations](BLISTypedAPI.md#level-1m-operations) * [Level-1f operations](BLISTypedAPI.md#level-1f-operations) * [Level-2 operations](BLISTypedAPI.md#level-2-operations) * [Level-3 operations](BLISTypedAPI.md#level-3-operations) * [Utility operations](BLISTypedAPI.md#utility-operations) * [Level-3 microkernels](BLISTypedAPI.md#level-3-microkernels) * **[Query function reference](BLISTypedAPI.md#query-function-reference)** * [General library information](BLISTypedAPI.md#general-library-information) * [Specific configuration](BLISTypedAPI.md#specific-configuration) * [General configuration](BLISTypedAPI.md#general-configuration) * [Kernel information](BLISTypedAPI.md#kernel-information) * **[Example code](BLISTypedAPI.md#example-code)** # Introduction This document summarizes one of the primary native APIs in BLIS--the "typed" API. Here, we also discuss BLIS-specific type definitions, header files, and prototypes to auxiliary functions. This document also includes APIs to key kernels which are used to accelerate and optimize various level-2 and level-3 operations, though the [Kernels Guide](KernelsHowTo.md) goes into more detail, especially for level-3 microkernels. There are many functions that BLIS implements that are not listed here, either because they are lower-level functions, or they are considered for use primarily by developers and experts. For curious readers, the typed API was given its name (a) because it exposes the floating-point types in the names of its functions, and (b) to contrast it with the other native API in BLIS, the object API, which is [documented here](BLISObjectAPI.md). (The third API supported by BLIS is the BLAS compatibility layer, which mimics conventional Fortran-77 BLAS.) ## BLIS types The following tables list various types used throughout the BLIS typed API. ### Integer-based types | BLIS integer type | Type definition | Used to represent... | |:------------------|:-----------------------|:---------------------------------------------------------------------| | `gint_t` | `int32_t` or `int64_t` | general-purpose signed integer; used to define signed integer types. | | `dim_t` | `gint_t` | matrix and vector dimensions. | | `inc_t` | `gint_t` | matrix row/column strides and vector increments. | | `doff_t` | `gint_t` | matrix diagonal offset: if _k_ < 0, diagonal begins at element (-_k_,0); otherwise diagonal begins at element (0,_k_). | ### Floating-point types | BLIS type | BLIS char | Type definition | Used to represent... | |:-----------|:----------|:---------------------------------------|:-------------------------------------| | `float` | `s` | _N/A_ | single-precision real numbers | | `double` | `d` | _N/A_ | double-precision real numbers | | `scomplex` | `c` | `struct { float real; float imag; }` | single-precision complex numbers | | `dcomplex` | `z` | `struct { double real; double imag; }` | double-precision complex numbers | ### Enumerated parameter types | `trans_t` | Semantic meaning: Corresponding matrix operand... | |:-------------------------|:--------------------------------------------------| | `BLIS_NO_TRANSPOSE` | will be used as given. | | `BLIS_TRANSPOSE` | will be implicitly transposed. | | `BLIS_CONJ_NO_TRANSPOSE` | will be implicitly conjugated. | | `BLIS_CONJ_TRANSPOSE` | will be implicitly transposed _and_ conjugated. | | `conj_t` | Semantic meaning: Corresponding matrix/vector operand... | |:---------------------|:---------------------------------------------------------| | `BLIS_NO_CONJUGATE` | will be used as given. | | `BLIS_CONJUGATE` | will be implicitly conjugated. | | `side_t` | Semantic meaning: Corresponding matrix operand... | |:-------------|:---------------------------------------------------| | `BLIS_LEFT` | appears on the left. | | `BLIS_RIGHT` | appears on the right. | | `uplo_t` | Semantic meaning: Corresponding matrix operand... | |:-------------|:--------------------------------------------------| | `BLIS_LOWER` | is stored in (and will be accessed only from) the lower triangle. | | `BLIS_UPPER` | is stored in (and will be accessed only from) the upper triangle. | | `BLIS_DENSE` | is stored as a full matrix (ie: in both triangles). | | `diag_t` | Semantic meaning: Corresponding matrix operand... | |:--------------------|:--------------------------------------------------| | `BLIS_NONUNIT_DIAG` | has a non-unit diagonal that should be explicitly read from. | | `BLIS_UNIT_DIAG` | has a unit diagonal that should be implicitly assumed (and not read from). | ### Basic vs expert interfaces The functions listed in this document belong to the "basic" interface subset of the BLIS typed API. There is a companion "expert" interface that mirrors the basic interface, except that it also contains at least one additional parameter that is only of interest to experts and library developers. The expert interfaces use the same name as the basic function names, except for an additional "_ex" suffix. For example, the basic interface for `gemm` is ```c void bli_?gemm ( trans_t transa, trans_t transb, dim_t m, dim_t n, dim_t k, ctype* alpha, ctype* a, inc_t rsa, inc_t csa, ctype* b, inc_t rsb, inc_t csb, ctype* beta, ctype* c, inc_t rsc, inc_t csc ); ``` while the expert interface is: ```c void bli_?gemm_ex ( trans_t transa, trans_t transb, dim_t m, dim_t n, dim_t k, ctype* alpha, ctype* a, inc_t rsa, inc_t csa, ctype* b, inc_t rsb, inc_t csb, ctype* beta, ctype* c, inc_t rsc, inc_t csc, cntx_t* cntx, rntm_t* rntm ); ``` The expert interface contains two additional parameters: a `cntx_t*` and `rntm_t*`. Note that calling a function from the expert interface with the `cntx_t*` and `rntm_t*` arguments each set to `NULL` is equivalent to calling the corresponding basic interface. Specifically, a `NULL` value passed in for the `cntx_t*` results in a valid context being queried from BLIS, and a `NULL` value passed in for the `rntm_t*` results in the current global settings for multithreading to be used. ## Context type In general, it is permissible to pass in `NULL` for a `cntx_t*` parameter when calling an expert interface such as `bli_dgemm_ex()`. However, there are cases where `NULL` values are not accepted and may result in a segmentation fault. Specifically, the `cntx_t*` argument appears in the interfaces to the `gemm`, `trsm`, and `gemmtrsm` [level-3 microkernels](KernelsHowTo.md#level-3) along with all [level-1v](KernelsHowTo.md#level-1v) and [level-1f](KernelsHowTo.md#level-1f) kernels. There, as a general rule, a valid pointer must be passed in. Whenever a valid context is needed, the developer may query a default context from the global kernel structure (if a context is not already available in the current scope): ```c cntx_t* bli_gks_query_cntx( void ); ``` When BLIS is configured to target a configuration family (e.g. `intel64`, `x86_64`), `bli_gks_query_cntx()` will use `cpuid` or an equivalent heuristic to select and and return the appropriate context. When BLIS is configured to target a singleton sub-configuration (e.g. `haswell`, `skx`), `bli_gks_query_cntx()` will unconditionally return a pointer to the context appropriate for the targeted configuration. ## Runtime type When calling one of the expert interfaces, a `rntm_t` (runtime) object can be used to convey a thread-local request for parallelism to the underlying implementation. Runtime objects are thread-safe by nature when they are declared statically as a stack variable (or allocated via `malloc()`), initialized, and then passed into the expert interface of interest. Notice that runtime objects have no analogue in most BLAS libraries, where you are forced to specify parallelism at a global level (usually via environment variables). For more information on using `rntm_t` objects, please read the [Multithreading](Multithreading.md) documentation, paying close attention to the section on [local setting of parallelism](Multithreading.md#locally-at-runtime). ## BLIS header file All BLIS definitions and prototypes may be included in your C source file by including a single header file: ```c #include "blis.h" ``` ## Initialization and Cleanup As of [9804adf](https://github.com/flame/blis/commit/9804adfd405056ec332bb8e13d68c7b52bd3a6c1), BLIS no longer requires explicit initialization and finalization at runtime. In other words, users do not need to call `bli_init()` before the application can make use of the library (and `bli_finalize()` after the application is finished with the library). Instead, all computational operations (and some non-computational functions) in BLIS will initialize the library on behalf of the user if it has not already been initialized. This change was made to simplify the user experience. Application developers should keep in mind, however, that this new self-initialization regime implies the following: unless the library is *explicitly* finalized via `bli_finalize()`, it will, once initialized, remain initialized for the life of the application. This is likely not a problem in the vast majority of cases. However, a memory-constrained application that performs all of its DLA up-front, for example, may wish to explicitly finalize the library after BLIS is no longer needed in order to free up memory for other purposes. Similarly, an expert user may call `bli_init()` manually in order to control when the overhead of library initialization is incurred, even though the library would have self-initialized. The interfaces to `bli_init()` and `bli_finalize()` are quite simple; they require no arguments and return no values: ```c void bli_init( void ); void bli_finalize( void ); ``` # Computational function reference Notes for interpreting the following prototypes: * Any occurrence of `?` should be replaced with `s`, `d`, `c`, or `z` to form an actual function name. * Any occurrence of `ctype` should be replaced with the actual C type corresponding to the datatype instance in question, while `rtype` should be replaced by the real projection of `ctype`. For example: * If we consider the prototype for `bli_zaxpyv()` below, `ctype` refers to `dcomplex`. * If we consider the prototype for `bli_znormfv()` below, `ctype` refers to `dcomplex` while `rtype` refers to `double`. * Any occurrence of `itype` should be replaced with the general-purpose signed integer type, `gint_t`. * All vector arguments have associated increments that proceed them, typically listed as `incX` for a given vector `x`. The semantic meaning of a vector increment is "the distance, in units of elements, between any two adjacent elements in the vector." * All matrix arguments have associated row and column strides arguments that proceed them, typically listed as `rsX` and `csX` for a given matrix `X`. Row strides are always listed first, and column strides are always listed second. The semantic meaning of a row stride is "the distance, in units of elements, to the next row (within a column)," and the meaning of a column stride is "the distance, in units of elements, to the next column (within a row)." Thus, unit row stride implies column-major storage and unit column stride implies row-major storage. Notes for interpreting function descriptions: * `conjX()` and `transX()` should be interpreted as predicates that capture the operand X with any value of `conj_t` or `trans_t` applied. For example: * `conjx(x)` refers to a vector `x` that is either conjugated or used as given. * `transa(A)` refers to a matrix `A` that is either transposed, conjugated _and_ transposed, conjugated only, or used as given. * Any operand marked with `conj()` is unconditionally conjugated. * Any operand marked with `^T` is unconditionally transposed. Similarly, any operand that is marked with `^H` is unconditionally conjugate-transposed. * All occurrences of `alpha`, `beta`, and `rho` parameters are scalars. --- ## Operation index * **[Level-1v](BLISTypedAPI.md#level-1v-operations)**: Operations on vectors: * [addv](BLISTypedAPI.md#addv), [amaxv](BLISTypedAPI.md#amaxv), [axpyv](BLISTypedAPI.md#axpyv), [axpbyv](BLISTypedAPI.md#axpbyv), [copyv](BLISTypedAPI.md#copyv), [dotv](BLISTypedAPI.md#dotv), [dotxv](BLISTypedAPI.md#dotxv), [invertv](BLISTypedAPI.md#invertv), [scal2v](BLISTypedAPI.md#scal2v), [scalv](BLISTypedAPI.md#scalv), [setv](BLISTypedAPI.md#setv), [subv](BLISTypedAPI.md#subv), [swapv](BLISTypedAPI.md#swapv), [xpbyv](BLISTypedAPI.md#xpbyv) * **[Level-1d](BLISTypedAPI.md#level-1d-operations)**: Element-wise operations on matrix diagonals: * [addd](BLISTypedAPI.md#addd), [axpyd](BLISTypedAPI.md#axpyd), [copyd](BLISTypedAPI.md#copyd), [invertd](BLISTypedAPI.md#invertd), [scald](BLISTypedAPI.md#scald), [scal2d](BLISTypedAPI.md#scal2d), [setd](BLISTypedAPI.md#setd), [setid](BLISTypedAPI.md#setid), [shiftd](BLISObjectAPI.md#shiftd), [subd](BLISTypedAPI.md#subd), [xpbyd](BLISTypedAPI.md#xpbyd) * **[Level-1m](BLISTypedAPI.md#level-1m-operations)**: Element-wise operations on matrices: * [addm](BLISTypedAPI.md#addm), [axpym](BLISTypedAPI.md#axpym), [copym](BLISTypedAPI.md#copym), [scalm](BLISTypedAPI.md#scalm), [scal2m](BLISTypedAPI.md#scal2m), [setm](BLISTypedAPI.md#setm), [subm](BLISTypedAPI.md#subm) * **[Level-1f](BLISTypedAPI.md#level-1f-operations)**: Fused operations on multiple vectors: * [axpy2v](BLISTypedAPI.md#axpy2v), [dotaxpyv](BLISTypedAPI.md#dotaxpyv), [axpyf](BLISTypedAPI.md#axpyf), [dotxf](BLISTypedAPI.md#dotxf), [dotxaxpyf](BLISTypedAPI.md#dotxaxpyf) * **[Level-2](BLISTypedAPI.md#level-2-operations)**: Operations with one matrix and (at least) one vector operand: * [gemv](BLISTypedAPI.md#gemv), [ger](BLISTypedAPI.md#ger), [hemv](BLISTypedAPI.md#hemv), [her](BLISTypedAPI.md#her), [her2](BLISTypedAPI.md#her2), [symv](BLISTypedAPI.md#symv), [syr](BLISTypedAPI.md#syr), [syr2](BLISTypedAPI.md#syr2), [trmv](BLISTypedAPI.md#trmv), [trsv](BLISTypedAPI.md#trsv) * **[Level-3](BLISTypedAPI.md#level-3-operations)**: Operations with matrices that are multiplication-like: * [gemm](BLISTypedAPI.md#gemm), [hemm](BLISTypedAPI.md#hemm), [herk](BLISTypedAPI.md#herk), [her2k](BLISTypedAPI.md#her2k), [symm](BLISTypedAPI.md#symm), [syrk](BLISTypedAPI.md#syrk), [syr2k](BLISTypedAPI.md#syr2k), [trmm](BLISTypedAPI.md#trmm), [trmm3](BLISTypedAPI.md#trmm3), [trsm](BLISTypedAPI.md#trsm) * **[Utility](BLISTypedAPI.md#Utility-operations)**: Miscellaneous operations on matrices and vectors: * [asumv](BLISTypedAPI.md#asumv), [norm1v](BLISTypedAPI.md#norm1v), [normfv](BLISTypedAPI.md#normfv), [normiv](BLISTypedAPI.md#normiv), [norm1m](BLISTypedAPI.md#norm1m), [normfm](BLISTypedAPI.md#normfm), [normim](BLISTypedAPI.md#normim), [mkherm](BLISTypedAPI.md#mkherm), [mksymm](BLISTypedAPI.md#mksymm), [mktrim](BLISTypedAPI.md#mktrim), [fprintv](BLISTypedAPI.md#fprintv), [fprintm](BLISTypedAPI.md#fprintm),[printv](BLISTypedAPI.md#printv), [printm](BLISTypedAPI.md#printm), [randv](BLISTypedAPI.md#randv), [randm](BLISTypedAPI.md#randm), [sumsqv](BLISTypedAPI.md#sumsqv) --- ## Level-1v operations Level-1v operations perform various level-1 BLAS-like operations on vectors (hence the _v_). **Note**: Most level-1v operations have a corresponding level-1v kernel through which it is primarily implemented. --- #### addv ```c void bli_?addv ( conj_t conjx, dim_t n, ctype* x, inc_t incx, ctype* y, inc_t incy ); ``` Perform ``` y := y + conjx(x) ``` where `x` and `y` are vectors of length _n_. --- #### amaxv ```c void bli_?amaxv ( dim_t n, ctype* x, inc_t incx, dim_t* index ); ``` Given a vector of length _n_, return the zero-based index `index` of the element of vector `x` that contains the largest absolute value (or, in the complex domain, the largest complex modulus). If `NaN` is encountered, it is treated as if it were a valid value that was smaller than any other value in the vector. If more than one element contains the same maximum value, the index of the latter element is returned via `index`. **Note:** This function attempts to mimic the algorithm for finding the element with the maximum absolute value in the netlib BLAS routines `i?amax()`. --- #### axpyv ```c void bli_?axpyv ( conj_t conjx, dim_t n, ctype* alpha, ctype* x, inc_t incx, ctype* y, inc_t incy ); ``` Perform ``` y := y + alpha * conjx(x) ``` where `x` and `y` are vectors of length _n_, and `alpha` is a scalar. --- #### axpbyv ```c void bli_?axpbyv ( conj_t conjx, dim_t n, ctype* alpha, ctype* x, inc_t incx, ctype* beta, ctype* y, inc_t incy ) ``` Perform ``` y := beta * y + alpha * conjx(x) ``` where `x` and `y` are vectors of length _n_, and `alpha` and `beta` are scalars. --- #### copyv ```c void bli_?copyv ( conj_t conjx, dim_t n, ctype* x, inc_t incx, ctype* y, inc_t incy ); ``` Perform ``` y := conjx(x) ``` where `x` and `y` are vectors of length _n_. --- #### dotv ```c void bli_?dotv ( conj_t conjx, conj_t conjy, dim_t n, ctype* x, inc_t incx, ctype* y, inc_t incy, ctype* rho ); ``` Perform ``` rho := conjx(x)^T * conjy(y) ``` where `x` and `y` are vectors of length _n_, and `rho` is a scalar. --- #### dotxv ```c void bli_?dotxv ( conj_t conjx, conj_t conjy, dim_t n, ctype* alpha, ctype* x, inc_t incx, ctype* y, inc_t incy, ctype* beta, ctype* rho ); ``` Perform ``` rho := beta * rho + alpha * conjx(x)^T * conjy(y) ``` where `x` and `y` are vectors of length _n_, and `alpha`, `beta`, and `rho` are scalars. --- #### invertv ```c void bli_?invertv ( dim_t n, ctype* x, inc_t incx ); ``` Invert all elements of an _n_-length vector `x`. --- #### scalv ```c void bli_?scalv ( conj_t conjalpha, dim_t n, ctype* alpha, ctype* x, inc_t incx ); ``` Perform ``` x := conjalpha(alpha) * x ``` where `x` is a vector of length _n_, and `alpha` is a scalar. --- #### scal2v ```c void bli_?scal2v ( conj_t conjx, dim_t n, ctype* alpha, ctype* x, inc_t incx, ctype* y, inc_t incy ); ``` Perform ``` y := alpha * conjx(x) ``` where `x` and `y` are vectors of length _n_, and `alpha` is a scalar. --- #### setv ```c void bli_?setv ( conj_t conjalpha, dim_t n, ctype* alpha, ctype* x, inc_t incx ); ``` Perform ``` x := conjalpha(alpha) ``` That is, set all elements of an _n_-length vector `x` to scalar `conjalpha(alpha)`. --- #### subv ```c void bli_?subv ( conj_t conjx, dim_t n, ctype* x, inc_t incx, ctype* y, inc_t incy ); ``` Perform ``` y := y - conjx(x) ``` where `x` and `y` are vectors of length _n_. --- #### swapv ```c void bli_?swapv ( dim_t n, ctype* x, inc_t incx, ctype* y, inc_t incy ); ``` Swap corresponding elements of two _n_-length vectors `x` and `y`. --- #### xpbyv ```c void bli_?xpbyv ( conj_t conjx, dim_t n, ctype* x, inc_t incx, ctype* beta, ctype* y, inc_t incy ) ``` Perform ``` y := beta * y + conjx(x) ``` where `x` and `y` are vectors of length _n_, and `beta` is a scalar. --- ## Level-1d operations Level-1d operations perform various level-1 BLAS-like operations on matrix diagonals (hence the _d_). Most of these operations are similar to level-1m counterparts, except they only read and update matrix diagonals and therefore do not take any `uplo` arguments. Please see the descriptions for the corresponding level-1m operation for a description of the arguments. --- #### addd ```c void bli_?addd ( doff_t diagoffa, diag_t diaga, trans_t transa, dim_t m, dim_t n, ctype* a, inc_t rsa, inc_t csa, ctype* b, inc_t rsb, inc_t csb ); ``` --- #### axpyd ```c void bli_?axpyd ( doff_t diagoffa, diag_t diaga, trans_t transa, dim_t m, dim_t n, ctype* alpha, ctype* a, inc_t rsa, inc_t csa, ctype* b, inc_t rsb, inc_t csb ); ``` --- #### copyd ```c void bli_?copyd ( doff_t diagoffa, diag_t diaga, trans_t transa, dim_t m, dim_t n, ctype* a, inc_t rsa, inc_t csa, ctype* b, inc_t rsb, inc_t csb ); ``` --- #### invertd ```c void bli_?invertd ( doff_t diagoffa, dim_t m, dim_t n, ctype* a, inc_t rsa, inc_t csa ); ``` --- #### scald ```c void bli_?scald ( conj_t conjalpha, doff_t diagoffa, dim_t m, dim_t n, ctype* alpha, ctype* a, inc_t rsa, inc_t csa ); ``` --- #### scal2d ```c void bli_?scal2d ( doff_t diagoffa, diag_t diaga, trans_t transa, dim_t m, dim_t n, ctype* alpha, ctype* a, inc_t rsa, inc_t csa, ctype* b, inc_t rsb, inc_t csb ); ``` --- #### setd ```c void bli_?setd ( conj_t conjalpha, doff_t diagoffa, dim_t m, dim_t n, ctype* alpha, ctype* a, inc_t rsa, inc_t csa ); ``` --- #### setid ```c void bli_?setid ( doff_t diagoffa, dim_t m, dim_t n, ctype_r* alpha, ctype* a, inc_t rsa, inc_t csa ); ``` Set the imaginary components of every element along the diagonal of `a`, as specified by `diagoffa`, to a scalar `alpha`. Note that the datatype of `alpha` must be the real projection of the datatype of `a`. --- #### shiftd ```c void bli_?shiftd ( doff_t diagoffa, dim_t m, dim_t n, ctype* alpha, ctype* a, inc_t rsa, inc_t csa ); ``` Add a constant value `alpha` to every element along the diagonal of `a`, as specified by `diagoffa`. --- #### subd ```c void bli_?subd ( doff_t diagoffa, diag_t diaga, trans_t transa, dim_t m, dim_t n, ctype* a, inc_t rsa, inc_t csa, ctype* b, inc_t rsb, inc_t csb ); ``` --- #### xpbyd ```c void bli_?xpbyd ( doff_t diagoffa, diag_t diaga, trans_t transa, dim_t m, dim_t n, ctype* a, inc_t rsa, inc_t csa, ctype* beta, ctype* b, inc_t rsb, inc_t csb ); ``` --- ## Level-1m operations Level-1m operations perform various level-1 BLAS-like operations on matrices (hence the _m_). --- #### addm ```c void bli_?addm ( doff_t diagoffa, diag_t diaga, uplo_t uploa, trans_t transa, dim_t m, dim_t n, ctype* a, inc_t rsa, inc_t csa, ctype* b, inc_t rsb, inc_t csb ); ``` Perform ``` B := B + transa(A) ``` where `B` is an _m x n_ matrix, `A` is stored as a dense matrix, or lower- or upper-triangular/trapezoidal matrix, as specified by `uploa`, with the diagonal offset of `A` specified by `diagoffa` and unit/non-unit nature of the diagonal specified by `diaga`. If `uploa` indicates lower or upper storage, only that part of matrix `A` will be referenced and used to update `B`. --- #### axpym ```c void bli_?axpym ( doff_t diagoffa, diag_t diaga, uplo_t uploa, trans_t transa, dim_t m, dim_t n, ctype* alpha, ctype* a, inc_t rsa, inc_t csa, ctype* b, inc_t rsb, inc_t csb ); ``` Perform ``` B := B + alpha * transa(A) ``` where `B` is an _m x n_ matrix, `A` is stored as a dense matrix, or lower- or upper-triangular/trapezoidal matrix, as specified by `uploa`, with the diagonal offset of `A` specified by `diagoffa` and unit/non-unit nature of the diagonal specified by `diaga`. If `uploa` indicates lower or upper storage, only that part of matrix `A` will be referenced and used to update `B`. --- #### copym ```c void bli_?copym ( doff_t diagoffa, diag_t diaga, uplo_t uploa, trans_t transa, dim_t m, dim_t n, ctype* a, inc_t rsa, inc_t csa, ctype* b, inc_t rsb, inc_t csb ); ``` Perform ``` B := transa(A) ``` where `B` is an _m x n_ matrix, `A` is stored as a dense matrix, or lower- or upper-triangular/trapezoidal matrix, as specified by `uploa`, with the diagonal offset of `A` specified by `diagoffa` and unit/non-unit nature of the diagonal specified by `diaga`. If `uploa` indicates lower or upper storage, only that part of matrix `A` will be referenced and used to update `B`. --- #### scalm ```c void bli_?scalm ( conj_t conjalpha, doff_t diagoffa, uplo_t uploa, dim_t m, dim_t n, ctype* alpha, ctype* a, inc_t rsa, inc_t csa ); ``` Perform ``` A := conjalpha(alpha) * A ``` where `A` is an _m x n_ matrix stored as a dense matrix, or lower- or upper-triangular/trapezoidal matrix, as specified by `uploa`, with the diagonal offset of `A` specified by `diagoffa`. If `uploa` indicates lower or upper storage, only that part of matrix `A` will be updated. --- #### scal2m ```c void bli_?scal2m ( doff_t diagoffa, diag_t diaga, uplo_t uploa, trans_t transa, dim_t m, dim_t n, ctype* alpha, ctype* a, inc_t rsa, inc_t csa, ctype* b, inc_t rsb, inc_t csb ); ``` Perform ``` B := alpha * transa(A) ``` where `B` is an _m x n_ matrix, `A` is stored as a dense matrix, or lower- or upper-triangular/trapezoidal matrix, as specified by `uploa`, with the diagonal offset of `A` specified by `diagoffa` and unit/non-unit nature of the diagonal specified by `diaga`. If `uploa` indicates lower or upper storage, only that part of matrix `A` will be referenced and used to update `B`. --- #### setm ```c void bli_?setm ( conj_t conjalpha, doff_t diagoffa, diag_t diaga, uplo_t uploa, dim_t m, dim_t n, ctype* alpha, ctype* a, inc_t rsa, inc_t csa ); ``` Set all elements of an _m x n_ matrix `A` to `conjalpha(alpha)`, where `A` is stored as a dense matrix, or lower- or upper- triangular/trapezoidal matrix, as specified by `uploa`, with the diagonal offset of `A` specified by `diagoffa` and unit/non-unit nature of the diagonal specified by `diaga`. If `uploa` indicates lower or upper storage, only that part of matrix `A` will be updated. --- #### subm ```c void bli_?subm ( doff_t diagoffa, diag_t diaga, uplo_t uploa, trans_t transa, dim_t m, dim_t n, ctype* a, inc_t rsa, inc_t csa, ctype* b, inc_t rsb, inc_t csb ); ``` Perform ``` B := B - transa(A) ``` where `B` is an _m x n_ matrix, `A` is stored as a dense matrix, or lower- or upper-triangular/trapezoidal matrix, as specified by `uploa`, with the diagonal offset of `A` specified by `diagoffa` and unit/non-unit nature of the diagonal specified by `diaga`. If `uploa` indicates lower or upper storage, only that part of matrix `A` will be referenced and used to update `B`. --- ## Level-1f operations Level-1f operations implement various fused combinations of level-1 operations (hence the _f_). **Note**: Each level-1f operation has a corresponding level-1f kernel through which it is primarily implemented. Level-1f kernels are employed when optimizing level-2 operations. --- #### axpy2v ```c void bli_?axpy2v ( conj_t conjx, conj_t conjy, dim_t m, ctype* alphax, ctype* alphay, ctype* x, inc_t incx, ctype* y, inc_t incy, ctype* z, inc_t incz ); ``` Perform ``` z := y + alphax * conjx(x) + alphay * conjy(y) ``` where `x`, `y`, and `z` are vectors of length _m_. The kernel, if optimized, is implemented as a fused pair of calls to [axpyv](BLISTypedAPI.md#axpyv). --- #### dotaxpyv ```c void bli_?dotaxpyv ( conj_t conjxt, conj_t conjx, conj_t conjy, dim_t m, ctype* alpha, ctype* x, inc_t incx, ctype* y, inc_t incy, ctype* rho, ctype* z, inc_t incz ); ``` Perform ``` rho := conjxt(x^T) * conjy(y) y := y + alpha * conjx(x) ``` where `x`, `y`, and `z` are vectors of length _m_ and `alpha` and `rho` are scalars. The kernel, if optimized, is implemented as a fusion of calls to [dotv](BLISTypedAPI.md#dotv) and [axpyv](BLISTypedAPI.md#axpyv). --- #### axpyf ```c void bli_?axpyf ( conj_t conja, conj_t conjx, dim_t m, dim_t b, ctype* alpha, ctype* a, inc_t inca, inc_t lda, ctype* x, inc_t incx, ctype* y, inc_t incy ); ``` Perform ``` y := y + alpha * conja(A) * conjx(x) ``` where `A` is an _m x b_ matrix, and `y` and `x` are vectors. The kernel, if optimized, is implemented as a fused series of calls to [axpyv](BLISTypedAPI.md#axpyv) where _b_ is less than or equal to an implementation-dependent fusing factor specific to `axpyf`. --- #### dotxf ```c void bli_?dotxf ( conj_t conjat, conj_t conjx, dim_t m, dim_t b, ctype* alpha, ctype* a, inc_t inca, inc_t lda, ctype* x, inc_t incx, ctype* beta, ctype* y, inc_t incy ); ``` Perform ``` y := y + alpha * conjat(A^T) * conjx(x) ``` where `A` is an _m x b_ matrix, and `y` and `x` are vectors. The kernel, if optimized, is implemented as a fused series of calls to [dotxv](BLISTypedAPI.md#dotxv) where _b_ is less than or equal to an implementation-dependent fusing factor specific to `dotxf`. --- #### dotxaxpyf ```c void bli_?dotxaxpyf ( conj_t conjat, conj_t conja, conj_t conjw, conj_t conjx, dim_t m, dim_t b, ctype* alpha, ctype* a, inc_t inca, inc_t lda, ctype* w, inc_t incw, ctype* x, inc_t incx, ctype* beta, ctype* y, inc_t incy, ctype* z, inc_t incz ); ``` Perform ``` y := beta * y + alpha * conjat(A^T) * conjw(w) z := z + alpha * conja(A) * conjx(x) ``` where `A` is an _m x b_ matrix, `w` and `z` are vectors of length _m_, `x` and `y` are vectors of length `b`, and `alpha` and `beta` are scalars. The kernel, if optimized, is implemented as a fusion of calls to [dotxf](BLISTypedAPI.md#dotxf) and [axpyf](BLISTypedAPI.md#axpyf). ## Level-2 operations Level-2 operations perform various level-2 BLAS-like operations. --- #### gemv ```c void bli_?gemv ( trans_t transa, conj_t conjx, dim_t m, dim_t n, ctype* alpha, ctype* a, inc_t rsa, inc_t csa, ctype* x, inc_t incx, ctype* beta, ctype* y, inc_t incy ); ``` Perform ``` y := beta * y + alpha * transa(A) * conjx(x) ``` where `transa(A)` is an _m x n_ matrix, and `y` and `x` are vectors. --- #### ger ```c void bli_?ger ( conj_t conjx, conj_t conjy, dim_t m, dim_t n, ctype* alpha, ctype* x, inc_t incx, ctype* y, inc_t incy, ctype* a, inc_t rsa, inc_t csa ); ``` Perform ``` A := A + alpha * conjx(x) * conjy(y)^T ``` where `A` is an _m x n_ matrix, and `x` and `y` are vectors of length _m_ and _n_, respectively. --- #### hemv ```c void bli_?hemv ( uplo_t uploa, conj_t conja, conj_t conjx, dim_t m, ctype* alpha, ctype* a, inc_t rsa, inc_t csa, ctype* x, inc_t incx, ctype* beta, ctype* y, inc_t incy ); ``` Perform ``` y := beta * y + alpha * conja(A) * conjx(x) ``` where `A` is an _m x m_ Hermitian matrix stored in the lower or upper triangle as specified by `uploa`, and `y` and `x` are vectors of length _m_. --- #### her ```c void bli_?her ( uplo_t uploa, conj_t conjx, dim_t m, rtype* alpha, ctype* x, inc_t incx, ctype* a, inc_t rsa, inc_t csa ); ``` Perform ``` A := A + alpha * conjx(x) * conjx(x)^H ``` where `A` is an _m x m_ Hermitian matrix stored in the lower or upper triangle as specified by `uploa`, and `x` is a vector of length _m_. **Note:** The floating-point type of `alpha` is always the real projection of the floating-point types of `x` and `A`. --- #### her2 ```c void bli_?her2 ( uplo_t uploa, conj_t conjx, dim_t m, ctype* alpha, ctype* x, inc_t incx, ctype* y, inc_t incy, ctype* a, inc_t rsa, inc_t csa ); ``` Perform ``` A := A + alpha * conjx(x) * conjy(y)^H + conj(alpha) * conjy(y) * conjx(x)^H ``` where `A` is an _m x m_ Hermitian matrix stored in the lower or upper triangle as specified by `uploa`, and `x` and `y` are vectors of length _m_. --- #### symv ```c void bli_?symv ( uplo_t uploa, conj_t conja, conj_t conjx, dim_t m, ctype* alpha, ctype* a, inc_t rsa, inc_t csa, ctype* x, inc_t incx, ctype* beta, ctype* y, inc_t incy ); ``` Perform ``` y := beta * y + alpha * conja(A) * conjx(x) ``` where `A` is an _m x m_ symmetric matrix stored in the lower or upper triangle as specified by `uploa`, and `y` and `x` are vectors of length _m_. --- #### syr ```c void bli_?syr ( uplo_t uploa, conj_t conjx, dim_t m, ctype* alpha, ctype* x, inc_t incx, ctype* a, inc_t rsa, inc_t csa ); ``` Perform ``` A := A + alpha * conjx(x) * conjx(x)^T ``` where `A` is an _m x m_ symmetric matrix stored in the lower or upper triangle as specified by `uploa`, and `x` is a vector of length _m_. --- #### syr2 ```c void bli_?syr2 ( uplo_t uploa, conj_t conjx, dim_t m, ctype* alpha, ctype* x, inc_t incx, ctype* y, inc_t incy, ctype* a, inc_t rsa, inc_t csa ); ``` Perform ``` A := A + alpha * conjx(x) * conjy(y)^T + conj(alpha) * conjy(y) * conjx(x)^T ``` where `A` is an _m x m_ symmetric matrix stored in the lower or upper triangle as specified by `uploa`, and `x` and `y` are vectors of length _m_. --- #### trmv ```c void bli_?trmv ( uplo_t uploa, trans_t transa, diag_t diaga, dim_t m, ctype* alpha, ctype* a, inc_t rsa, inc_t csa, ctype* x, inc_t incx ); ``` Perform ``` x := alpha * transa(A) * x ``` where `A` is an _m x m_ triangular matrix stored in the lower or upper triangle as specified by `uploa` with unit/non-unit nature specified by `diaga`, and `x` is a vector of length _m_. --- #### trsv ```c void bli_?trsv ( uplo_t uploa, trans_t transa, diag_t diaga, dim_t m, ctype* alpha, ctype* a, inc_t rsa, inc_t csa, ctype* y, inc_t incy ); ``` Solve the linear system ``` transa(A) * x = alpha * y ``` where `A` is an _m x m_ triangular matrix stored in the lower or upper triangle as specified by `uploa` with unit/non-unit nature specified by `diaga`, and `x` and `y` are vectors of length _m_. The right-hand side vector operand `y` is overwritten with the solution vector `x`. --- ## Level-3 operations Level-3 operations perform various level-3 BLAS-like operations. **Note**: Each All level-3 operations are implemented through a handful of level-3 microkernels. Please see the [Kernels Guide](KernelsHowTo.md) for more details. --- #### gemm ```c void bli_?gemm ( trans_t transa, trans_t transb, dim_t m, dim_t n, dim_t k, ctype* alpha, ctype* a, inc_t rsa, inc_t csa, ctype* b, inc_t rsb, inc_t csb, ctype* beta, ctype* c, inc_t rsc, inc_t csc ); ``` Perform ``` C := beta * C + alpha * transa(A) * transb(B) ``` where C is an _m x n_ matrix, `transa(A)` is an _m x k_ matrix, and `transb(B)` is a _k x n_ matrix. --- #### hemm ```c void bli_?hemm ( side_t sidea, uplo_t uploa, conj_t conja, trans_t transb, dim_t m, dim_t n, ctype* alpha, ctype* a, inc_t rsa, inc_t csa, ctype* b, inc_t rsb, inc_t csb, ctype* beta, ctype* c, inc_t rsc, inc_t csc ); ``` Perform ``` C := beta * C + alpha * conja(A) * transb(B) ``` if `sidea` is `BLIS_LEFT`, or ``` C := beta * C + alpha * transb(B) * conja(A) ``` if `sidea` is `BLIS_RIGHT`, where `C` and `B` are _m x n_ matrices and `A` is a Hermitian matrix stored in the lower or upper triangle as specified by `uploa`. When `sidea` is `BLIS_LEFT`, `A` is _m x m_, and when `sidea` is `BLIS_RIGHT`, `A` is _n x n_. --- #### herk ```c void bli_?herk ( uplo_t uploc, trans_t transa, dim_t m, dim_t k, rtype* alpha, ctype* a, inc_t rsa, inc_t csa, rtype* beta, ctype* c, inc_t rsc, inc_t csc ); ``` Perform ``` C := beta * C + alpha * transa(A) * transa(A)^H ``` where C is an _m x m_ Hermitian matrix stored in the lower or upper triangle as specified by `uploc` and `transa(A)` is an _m x k_ matrix. **Note:** The floating-point types of `alpha` and `beta` are always the real projection of the floating-point types of `A` and `C`. --- #### her2k ```c void bli_?her2k ( uplo_t uploc, trans_t transab, dim_t m, dim_t k, ctype* alpha, ctype* a, inc_t rsa, inc_t csa, ctype* b, inc_t rsb, inc_t csb, rtype* beta, ctype* c, inc_t rsc, inc_t csc ); ``` Perform ``` C := beta * C + alpha * transab(A) * transab(B)^H + conj(alpha) * transab(B) * transab(A)^H ``` where C is an _m x m_ Hermitian matrix stored in the lower or upper triangle as specified by `uploc` and `transab(A)` and `transab(B)` are _m x k_ matrices. **Note:** The floating-point type of `beta` is always the real projection of the floating-point types of `A` and `C`. --- #### symm ```c void bli_?symm ( side_t sidea, uplo_t uploa, conj_t conja, trans_t transb, dim_t m, dim_t n, ctype* alpha, ctype* a, inc_t rsa, inc_t csa, ctype* b, inc_t rsb, inc_t csb, ctype* beta, ctype* c, inc_t rsc, inc_t csc ); ``` Perform ``` C := beta * C + alpha * conja(A) * transb(B) ``` if `sidea` is `BLIS_LEFT`, or ``` C := beta * C + alpha * transb(B) * conja(A) ``` if `sidea` is `BLIS_RIGHT`, where `C` and `B` are _m x n_ matrices and `A` is a symmetric matrix stored in the lower or upper triangle as specified by `uploa`. When `sidea` is `BLIS_LEFT`, `A` is _m x m_, and when `sidea` is `BLIS_RIGHT`, `A` is _n x n_. --- #### syrk ```c void bli_?syrk ( uplo_t uploc, trans_t transa, dim_t m, dim_t k, ctype* alpha, ctype* a, inc_t rsa, inc_t csa, ctype* beta, ctype* c, inc_t rsc, inc_t csc ); ``` Perform ``` C := beta * C + alpha * transa(A) * transa(A)^T ``` where C is an _m x m_ symmetric matrix stored in the lower or upper triangle as specified by `uploa` and `transa(A)` is an _m x k_ matrix. --- #### syr2k ```c void bli_?syr2k ( uplo_t uploc, trans_t transab, dim_t m, dim_t k, ctype* alpha, ctype* a, inc_t rsa, inc_t csa, ctype* b, inc_t rsb, inc_t csb, ctype* beta, ctype* c, inc_t rsc, inc_t csc ); ``` Perform ``` C := beta * C + alpha * transab(A) * transab(B)^T + alpha * transab(B) * transab(A)^T ``` where C is an _m x m_ symmetric matrix stored in the lower or upper triangle as specified by `uploa` and `transab(A)` and `transab(B)` are _m x k_ matrices. --- #### trmm ```c void bli_?trmm ( side_t sidea, uplo_t uploa, trans_t transa, diag_t diaga, dim_t m, dim_t n, ctype* alpha, ctype* a, inc_t rsa, inc_t csa, ctype* b, inc_t rsb, inc_t csb ); ``` Perform ``` B := alpha * transa(A) * B ``` if `sidea` is `BLIS_LEFT`, or ``` B := alpha * B * transa(A) ``` if `sidea` is `BLIS_RIGHT`, where `B` is an _m x n_ matrix and `A` is a triangular matrix stored in the lower or upper triangle as specified by `uploa` with unit/non-unit nature specified by `diaga`. When `sidea` is `BLIS_LEFT`, `A` is _m x m_, and when `sidea` is `BLIS_RIGHT`, `A` is _n x n_. --- #### trmm3 ```c void bli_?trmm3 ( side_t sidea, uplo_t uploa, trans_t transa, diag_t diaga, trans_t transb, dim_t m, dim_t n, ctype* alpha, ctype* a, inc_t rsa, inc_t csa, ctype* b, inc_t rsb, inc_t csb, ctype* beta, ctype* c, inc_t rsc, inc_t csc ); ``` Perform ``` C := beta * C + alpha * transa(A) * transb(B) ``` if `sidea` is `BLIS_LEFT`, or ``` C := beta * C + alpha * transb(B) * transa(A) ``` if `sidea` is `BLIS_RIGHT`, where `C` and `transb(B)` are _m x n_ matrices and `A` is a triangular matrix stored in the lower or upper triangle as specified by `uploa` with unit/non-unit nature specified by `diaga`. When `sidea` is `BLIS_LEFT`, `A` is _m x m_, and when `sidea` is `BLIS_RIGHT`, `A` is _n x n_. --- #### trsm ```c void bli_?trsm ( side_t sidea, uplo_t uploa, trans_t transa, diag_t diaga, dim_t m, dim_t n, ctype* alpha, ctype* a, inc_t rsa, inc_t csa, ctype* b, inc_t rsb, inc_t csb ); ``` Solve the linear system with multiple right-hand sides ``` transa(A) * X = alpha * B ``` if `sidea` is `BLIS_LEFT`, or ``` X * transa(A) = alpha * B ``` if `sidea` is `BLIS_RIGHT`, where `X` and `B` are an _m x n_ matrices and `A` is a triangular matrix stored in the lower or upper triangle as specified by `uploa` with unit/non-unit nature specified by `diaga`. When `sidea` is `BLIS_LEFT`, `A` is _m x m_, and when `sidea` is `BLIS_RIGHT`, `A` is _n x n_. The right-hand side matrix operand `B` is overwritten with the solution matrix `X`. --- ## Utility operations --- #### asumv ```c void bli_?asumv ( dim_t n, ctype* x, inc_t incx, rtype* asum ); ``` Compute the sum of the absolute values of the fundamental elements of vector `x`. The resulting sum is stored to `asum`. **Note:** The floating-point type of `asum` is always the real projection of the floating-point type of `x`. **Note:** This function attempts to mimic the algorithm for computing the absolute vector sum in the netlib BLAS routines `*asum()`. --- #### norm1m #### normfm #### normim ```c void bli_?norm[1fi]m ( doff_t diagoffa, doff_t diaga, uplo_t uploa, dim_t m, dim_t n, ctype* a, inc_t rs_a, inc_t cs_a, rtype* norm ); ``` Compute the one-norm (`bli_?norm1m()`), Frobenius norm (`bli_?normfm()`), or infinity norm (`bli_?normim()`) of the elements in an _m x n_ matrix `A`. If `uploa` is `BLIS_LOWER` or `BLIS_UPPER` then `A` is assumed to be lower or upper triangular, respectively, with the main diagonal located at offset `diagoffa`. The resulting norm is stored to `norm`. **Note:** The floating-point type of `norm` is always the real projection of the floating-point type of `x`. --- #### norm1v #### normfv #### normiv ```c void bli_?norm[1fi]v ( dim_t n, ctype* x, inc_t incx, rtype* norm ); ``` Compute the one-norm (`bli_?norm1v()`), Frobenius norm (`bli_?normfv()`), or infinity norm (`bli_?normiv()`) of the elements in a vector `x` of length _n_. The resulting norm is stored to `norm`. **Note:** The floating-point type of `norm` is always the real projection of the floating-point type of `x`. --- #### mkherm ```c void bli_?mkherm ( uplo_t uploa, dim_t m, ctype* a, inc_t rs_a, inc_t cs_a ); ``` Make an _m x m_ matrix `A` explicitly Hermitian by copying the conjugate of the triangle specified by `uploa` to the opposite triangle. Imaginary components of diagonal elements are explicitly set to zero. It is assumed that the diagonal offset of `A` is zero. --- #### mksymm ```c void bli_?mksymm ( uplo_t uploa, dim_t m, ctype* a, inc_t rs_a, inc_t cs_a ); ``` Make an _m x m_ matrix `A` explicitly symmetric by copying the triangle specified by `uploa` to the opposite triangle. It is assumed that the diagonal offset of `A` is zero. --- #### mktrim ```c void bli_?mktrim ( uplo_t uploa, dim_t m, ctype* a, inc_t rs_a, inc_t cs_a ); ``` Make an _m x m_ matrix `A` explicitly triangular by preserving the triangle specified by `uploa` and zeroing the elements in the opposite triangle. It is assumed that the diagonal offset of `A` is zero. --- #### fprintv ```c void bli_?fprintv ( FILE* file, char* s1, dim_t m, ctype* x, inc_t incx, char* format, char* s2 ); ``` Print a vector `x` of length _m_ to file stream `file`, where `file` is a file pointer returned by the standard C library function `fopen()`. The caller may also pass in a global file pointer such as `stdout` or `stderr`. The strings `s1` and `s2` are printed immediately before and after the output (respectively), and the format specifier `format` is used to format the individual elements. For valid format specifiers, please see documentation for the standard C library function `printf()`. **Note:** For complex datatypes, the format specifier is applied to both the real and imaginary components **individually**. Therefore, you should use format specifiers such as `"%5.2f"`, but **not** `"%5.2f + %5.2f"`. --- #### fprintm ```c void bli_?fprintm ( FILE* file, char* s1, dim_t m, dim_t n, ctype* a, inc_t rs_a, inc_t cs_a, char* format, char* s2 ); ``` Print an _m x n_ matrix `A` to file stream `file`, where `file` is a file pointer returned by the standard C library function `fopen()`. The caller may also pass in a global file pointer such as `stdout` or `stderr`. The strings `s1` and `s2` are printed immediately before and after the output (respectively), and the format specifier `format` is used to format the individual elements. For valid format specifiers, please see documentation for the standard C library function `printf()`. **Note:** For complex datatypes, the format specifier is applied to both the real and imaginary components **individually**. Therefore, you should use format specifiers such as `"%5.2f"`, but **not** `"%5.2f + %5.2f"`. --- #### printv ```c void bli_?printv ( char* s1, dim_t m, ctype* x, inc_t incx, char* format, char* s2 ); ``` Print a vector `x` of length _m_ to standard output. This function call is equivalent to calling `bli_?fprintv()` with `stdout` as the file pointer. --- #### printm ```c void bli_?printm ( char* s1, dim_t m, dim_t n, ctype* a, inc_t rs_a, inc_t cs_a, char* format, char* s2 ); ``` Print an _m x n_ matrix `a` to standard output. This function call is equivalent to calling `bli_?fprintm()` with `stdout` as the file pointer. --- #### randv ```c void bli_?randv ( dim_t n, ctype* x, inc_t incx ); ``` Set the elements of a vector `x` of length _n_ to random values on the interval `[-1,1)`. **Note:** For complex datatypes, the real and imaginary components of each element are randomized individually and independently of one another. --- #### randm ```c void bli_?randm ( doff_t diagoffa, uplo_t uploa, dim_t m, dim_t n, ctype* a, inc_t rs_a, inc_t cs_a ); ``` Set the elements of an _m x n_ matrix `A` to random values on the interval `[-1,1)`. If `uploa` is `BLIS_LOWER` or `BLIS_UPPER`, then additional scaling occurs so that the resulting matrix is diagonally dominant. Specifically, the diagonal elements (identified by diagonal offset `diagoffa`) are shifted so that they lie on the interval `[1,2)` and the off-diagonal elements (in the triangle specified by `uploa`) are scaled by `1.0/max(m,n)`. **Note:** For complex datatypes, the real and imaginary components of each off-diagonal element are randomized individually and independently of one another. --- #### sumsqv ```c void bli_?sumsqv ( dim_t n, ctype* x, inc_t incx, rtype* scale, rtype* sumsq ); ``` Compute the sum of the squares of the elements in a vector `x` of length _n_. The result is computed in scaled form, and in such a way that it may be used repeatedly to accumulate the sum of the squares of several vectors. The function computes scale\_new and sumsq\_new such that ``` scale_new^2 * sumsq_new = x[0]^2 + x[1]^2 + ... x[m-1]^2 + scale_old^2 * sumsq_old ``` where, on entry, `scale` and `sumsq` contain `scale_old` and `sumsq_old`, respectively, and on exit, `scale` and `sumsq` contain `scale_new` and `sumsq_new`, respectively. **Note:** This function attempts to mimic the algorithm for computing the Frobenius norm in the netlib LAPACK routine `?lassq()`. --- ## Level-3 microkernels **Note:** The `*` in level-3 microkernel function names shown below reflect that there is no exact naming convention required for the microkernels, except that they must begin with `bli_?`. We strongly recommend, however, that the microkernel function names include the name of the microkernel itself. For example, the `gemm` microkernel should be named with the prefix `bli_?gemm_` and the `trsm` microkernels should be named with the prefixes `bli_?trsm_l_` (lower triangular) and `bli_?trsm_u_` (upper triangular). --- #### gemm microkernel ```c void bli_?gemm_* ( dim_t k, ctype* restrict alpha, ctype* restrict a1, ctype* restrict b1, ctype* restrict beta, ctype* restrict c11, inc_t rsc, inc_t csc, auxinfo_t* restrict data, cntx_t* restrict cntx ); ``` Perform ``` C11 := beta * C11 + alpha * A1 * B1 ``` where `C11` is an _MR x NR_ matrix, `A1` is an _MR x k_ "micropanel" matrix stored in packed (column-stored) format, `B1` is a _k x NR_ "micropanel" matrix in packed (row-stored) format, and alpha and beta are scalars. The storage of `C11` is specified by its row and column strides, `rsc` and `csc`. Please see the [Kernel Guide](KernelsHowTo.md) for more information on the `gemm` microkernel. --- #### trsm microkernels ```c void bli_?trsm_l_* ( ctype* restrict a11, ctype* restrict b11, ctype* restrict c11, inc_t rsc, inc_t csc auxinfo_t* restrict data, cntx_t* restrict cntx ); void bli_?trsm_u_* ( ctype* restrict a11, ctype* restrict b11, ctype* restrict c11, inc_t rsc, inc_t csc auxinfo_t* restrict data, cntx_t* restrict cntx ); ``` Perform ``` B11 := inv(A11) * B11 C11 := B11 ``` where `A11` is an _MR x MR_ lower or upper triangular matrix stored in packed (column-stored) format, `B11` is an _MR x NR_ matrix stored in packed (row-stored) format, and `C11` is an _MR x NR_ matrix stored according to row and column strides `rsc` and `csc`. Please see the [Kernel Guide](KernelsHowTo.md) for more information on the `trsm` microkernel. --- #### gemmtrsm microkernels ```c void bli_?gemmtrsm_l_* ( dim_t k, ctype* restrict alpha, ctype* restrict a10, ctype* restrict a11, ctype* restrict b01, ctype* restrict b11, ctype* restrict c11, inc_t rs_c, inc_t cs_c, auxinfo_t* restrict data, cntx_t* restrict cntx ); void bli_?gemmtrsm_u_* ( dim_t k, ctype* restrict alpha, ctype* restrict a12, ctype* restrict a11, ctype* restrict b21, ctype* restrict b11, ctype* restrict c11, inc_t rs_c, inc_t cs_c, auxinfo_t* restrict data, cntx_t* restrict cntx ); ``` Perform ``` B11 := alpha * B11 - A10 * B01 B11 := inv(A11) * B11 C11 := B11 ``` if `A11` is lower triangular, or ``` B11 := alpha * B11 - A12 * B21 B11 := inv(A11) * B11 C11 := B11 ``` if `A11` is upper triangular. Please see the [Kernel Guide](KernelsHowTo.md) for more information on the `gemmtrsm` microkernel. # Query function reference BLIS allows applications to query information about how BLIS was configured. The `bli_info_` API provides several categories of query routines. Most values are returned as a `gint_t`, which is a signed integer. The size of this integer can be queried through a special routine that returns the size in a character string: ```c char* bli_info_get_int_type_size_str( void ); ``` **Note:** All of the `bli_info_` functions are **always** thread-safe, no matter how BLIS was configured. ## General library information The following routine returns the address the full BLIS version string: ```c char* bli_info_get_version_str( void ); ``` ## Specific configuration The following routine returns a unique ID of type `arch_t` that identifies the current current active configuration: ```c arch_t bli_arch_query_id( void ); ``` This is most useful when BLIS is configured with multiple configurations. (When linking to multi-configuration builds of BLIS, you don't know for sure which configuration will be used until runtime since the configuration-specific parameters are not loaded until after calling a hueristic to detect the hardware--usually based the `CPUID` instruction.) Once the configuration's ID is known, it can be used to query a string that contains the name of the configuration: ```c char* bli_arch_string( arch_t id ); ``` ## General configuration The following routines return various general-purpose constants that affect the entire framework. All of these settings default to sane values, which can then be overridden by the configuration in [bli\_config.h](ConfigurationHowTo#bli_configh). If they are absent from a particular configuration's `bli_config.h` header file, then the default value is used, as specified in [frame/include/bli_config_macro_defs.h](https://github.com/flame/blis/blob/master/frame/include/bli_config_macro_defs.h). ```c gint_t bli_info_get_int_type_size( void ); gint_t bli_info_get_num_fp_types( void ); gint_t bli_info_get_max_type_size( void ); gint_t bli_info_get_page_size( void ); gint_t bli_info_get_simd_num_registers( void ); gint_t bli_info_get_simd_size( void ); gint_t bli_info_get_simd_align_size( void ); gint_t bli_info_get_stack_buf_max_size( void ); gint_t bli_info_get_stack_buf_align_size( void ); gint_t bli_info_get_heap_addr_align_size( void ); gint_t bli_info_get_heap_stride_align_size( void ); gint_t bli_info_get_pool_addr_align_size( void ); gint_t bli_info_get_enable_stay_auto_init( void ); gint_t bli_info_get_enable_blas( void ); gint_t bli_info_get_blas_int_type_size( void ); ``` ## Kernel information ### Micro-kernel implementation type query The following routines allow the caller to obtain a string that identifies the implementation type of each microkernel that is currently active (ie: part of the current active configuration, as identified bi `bli_arch_query_id()`). ```c char* bli_info_get_gemm_ukr_impl_string( ind_t method, num_t dt ) char* bli_info_get_gemmtrsm_l_ukr_impl_string( ind_t method, num_t dt ) char* bli_info_get_gemmtrsm_u_ukr_impl_string( ind_t method, num_t dt ) char* bli_info_get_trsm_l_ukr_impl_string( ind_t method, num_t dt ) char* bli_info_get_trsm_u_ukr_impl_string( ind_t method, num_t dt ) ``` Possible implementation (ie: the `ind_t method` argument) types are: * `BLIS_3MH`: Implementation based on the 3m method applied at the highest level, outside the 5th loop around the microkernel. * `BLIS_3M1`: Implementation based on the 3m method applied within the 1st loop around the microkernel. * `BLIS_4MH`: Implementation based on the 4m method applied at the highest level, outside the 5th loop around the microkernel. * `BLIS_4M1B`: Implementation based on the 4m method applied within the 1st loop around the microkernel. Computation is ordered such that the 1st loop is fissured into two loops, the first of which multiplies the real part of the current micropanel of packed matrix B (against all real and imaginary parts of packed matrix A), and the second of which multiplies the imaginary part of the current micropanel of packed matrix B. * `BLIS_4M1A`: Implementation based on the 4m method applied within the 1st loop around the microkernel. Computation is ordered such that real and imaginary components of the current micropanels are completely used before proceeding to the next virtual microkernel invocation. * `BLIS_1M`: Implementation based on the 1m method. (This is the default induced method when real domain kernels are present but complex kernels are missing.) * `BLIS_NAT`: Implementation based on "native" execution (ie: NOT an induced method). **NOTE**: `BLIS_3M3` and `BLIS_3M2` have been deprecated from the `typedef enum` of `ind_t`, and `BLIS_4M1B` is also effectively no longer available, though the `typedef enum` value still exists. Possible microkernel types (ie: the return values for `bli_info_get_*_ukr_impl_string()`) are: * `BLIS_REFERENCE_UKERNEL` (`"refrnce"`): This value is returned when the queried microkernel is provided by the reference implementation. * `BLIS_VIRTUAL_UKERNEL` (`"virtual"`): This value is returned when the queried microkernel is driven by a the "virtual" microkernel provided by an induced method. This happens for any `method` value that is not `BLIS_NAT` (ie: native), but only applies to the complex domain. * `BLIS_OPTIMIZED_UKERNEL` (`"optimzd"`): This value is returned when the queried microkernel is provided by an implementation that is neither reference nor virtual, and thus we assume the kernel author would deem it to be "optimized". Such a microkernel may not be optimal in the literal sense of the word, but nonetheless is _intended_ to be optimized, at least relative to the reference microkernels. * `BLIS_NOTAPPLIC_UKERNEL` (`"notappl"`): This value is returned usually when performing a `gemmtrsm` or `trsm` microkernel type query for any `method` value that is not `BLIS_NAT` (ie: native). That is, induced methods cannot be (purely) used on `trsm`-based microkernels because these microkernels perform more a triangular inversion, which is not matrix multiplication. ### Operation implementation type query The following routines allow the caller to obtain a string that identifies the implementation (`ind_t`) that is currently active (ie: implemented and enabled) for each level-3 operation. Possible implementation types are listed in the section above covering [microkernel implemenation query](BLISTypedAPI.md#microkernel-implementation-type-query). ```c char* bli_info_get_gemm_impl_string( num_t dt ); char* bli_info_get_hemm_impl_string( num_t dt ); char* bli_info_get_herk_impl_string( num_t dt ); char* bli_info_get_her2k_impl_string( num_t dt ); char* bli_info_get_symm_impl_string( num_t dt ); char* bli_info_get_syrk_impl_string( num_t dt ); char* bli_info_get_syr2k_impl_string( num_t dt ); char* bli_info_get_trmm_impl_string( num_t dt ); char* bli_info_get_trmm3_impl_string( num_t dt ); char* bli_info_get_trsm_impl_string( num_t dt ); ``` # Example code BLIS provides lots of example code in the [examples/tapi](https://github.com/flame/blis/tree/master/examples/tapi) directory of the BLIS source distribution. The example code in this directory is set up like a tutorial, and so we recommend starting from the beginning. Topics include printing vectors and matrices and calling a representative subset of the computational level-1v, -1m, -2, -3, and utility operations documented above. blis-0.6.1/docs/BuildSystem.md000066400000000000000000000660421360743507500162070ustar00rootroot00000000000000## Contents * **[Contents](BuildSystem.md#contents)** * **[Introduction](BuildSystem.md#introduction)** * **[Obtaining BLIS](BuildSystem.md#obtaining-blis)** * **[Step 1: Chose a framework configuration](BuildSystem.md#step-1-choose-a-framework-configuration)** * **[Step 2: Running `configure`](BuildSystem.md#step-2-running-configure)** * **[Step 3: Compilation](BuildSystem.md#step-3-compilation)** * **[Step 3b: Testing (optional)](BuildSystem.md#step-3b-testing-optional)** * **[Step 4: Installation](BuildSystem.md#step-4-installation)** * **[Cleaning out build products](BuildSystem.md#cleaning-out-build-products)** * **[Compiling with BLIS](BuildSystem.md#compiling-with-blis)** * [Disabling BLAS prototypes](BuildSystem.md#disabling-blas-prototypes) * [CBLAS](BuildSystem.md#cblas) * **[Linking against BLIS](BuildSystem.md#linking-against-blis)** * **[Uninstalling](BuildSystem.md#uninstalling)** * **[make targets](BuildSystem.md#make-targets)** * **[Conclusion](BuildSystem.md#conclusion)** ## Introduction This document describes how to configure, compile, and install a BLIS library on your local system. The BLIS build system was designed for use with GNU/Linux (or some other sane UNIX). Other requirements are: * Python (2.7 or later for python2; 3.4 or later for python3) * GNU `bash` (3.2 or later) * GNU `make` (3.81 or later) * a working C99 compiler BLIS also requires a POSIX threads library at link-time (`-lpthread` or `libpthread.so`). This requirement holds even when configuring BLIS with multithreading disabled (the default) or with multithreading via OpenMP (`--enable-multithreading=openmp`). (Note: BLIS implements basic pthreads functionality automatically for Windows builds via [AppVeyor](https://ci.appveyor.com/project/shpc/blis/).) Finally, we also require various other shell utilities that are so ubiquitous that they are not worth mentioning (such as `mv`, `mkdir`, `find`, and so forth). If you are missing these utilities, then you have much bigger problems than not being able to build BLIS. ## Obtaining BLIS Before starting, you must obtain a copy of BLIS. If you are an end-user (i.e., not a developer), you can download a tarball or zip file of the latest tagged version by returning to the main [BLIS homepage](https://github.com/flame/blis) and clicking on the [releases](https://github.com/flame/blis/releases) link. **However**, we highly recommend that you instead clone a copy using the command: ``` $ git clone https://github.com/flame/blis.git ``` Cloning a repository allows users and developers alike to quickly and easily pull in new commits as they are available, including commits that occur **between** tagged releases. Once you download the BLIS distribution, the top-level directory should look something like: ``` $ ls CHANGELOG Makefile common.mk configure mpi_test testsuite CREDITS README.md config frame obj version INSTALL bli_config.h config.mk kernels ref_kernels windows LICENSE build config_registry lib test ``` ## Step 1: Choose a framework configuration The first step is to choose how to configure BLIS. Specifically, a user must decide which configuration to use, or whether to allow `configure` to automatically guess the best configuration for your hardware. (Note: This automatic configuration selection only applies to x86_64 systems.) Configurations are described in detail in the [Configuration Guide](ConfigurationHowTo.md). Generally speaking, a configuration consists of several files that reside in a sub-directory of the `config` directory. To see a list of the available configurations, you may inspect this directory, or run `configure` with no arguments. Here are the current (as of this writing) contents of the `config` directory: ``` $ ls config amd64 cortexa15 excavator intel64 old power7 template bgq cortexa57 generic knc penryn sandybridge zen bulldozer cortexa9 haswell knl piledriver steamroller ``` There is one additional configuration available that is not present in the `config` directory, and that is `auto`. By targeting the `auto` configuration (i.e., `./configure auto`), the user is requesting that `configure` select a configuration automatically based on the detected features of the processor. Another special configuration (one that, unlike `auto`, _is_ present in `config`) is the `generic` configuration. This configuration, like its name suggests, is architecture-agnostic and may be targeted in virtually any environment that supports the minimum build requirements of BLIS. The `generic` configuration uses a set of built-in, portable reference kernels (written in C99) that should work without modification on most, if not all, architectures. These reference kernels, however, should be expected to yield relatively low performance since they do not employ any architecture-specific optimizations beyond those the compiler provides automatically. (Historical note: The `generic` configuration corresponds to the `reference` configuration of previous releases of BLIS.) If you are a BLIS developer and wish to create your own configuration, either from scratch or using an existing configuration as a starting point, please read the BLIS [Configuration Guide](ConfigurationHowTo.md). ### Multithreading Multithreading in BLIS is disabled by default. For more information on enabling multithreading, please read the section of the [Multithreading](Multithreading.md) document titled ["Enabling Multithreading"](Multithreading.md#enabling-multithreading). **IMPORTANT**: Even when multithreading is enabled at configure-time, BLIS will default to single-threaded execution at runtime. For more information on the various ways of specifying multithreading at runtime, please read the section titled ["Specifying Multithreading"](Multithreading.md#specifying-multithreading). ## Step 2: Running `configure` This step should be somewhat familiar to many people who use open source software. To configure the build system, simply run: ``` $ ./configure ``` where `` is the configuration sub-directory name you chose in [Step 1](BuildSystem.md#step-1-choose-a-framework-configuration) above. If `` is not given, a helpful message is printed reminding you to explicit specify a configuration name along with a list of valid configuration families and their implied sub-configurations. For more information on sub-configurations and families, please see the BLIS [Configuration Guide](ConfigurationHowTo.md). Alternatively, `configure` can automatically select a configuration based on your hardware: ``` $ ./configure auto ``` However, as of this writing, BLIS lacks support for automatically detecting some architectures. If the `configure` script is not able to detect your architecture, the `generic` configuration will be used. Upon running configure, you will get output similar to the following. The exact output will depend on whether you cloned BLIS from a `git` repository or whether you obtained BLIS via a downloadable tarball from the [releases](https://github.com/flame/blis/releases) page. ``` $ ./configure --prefix=$HOME/blis haswell configure: using 'gcc' compiler. configure: found gcc version 5.4.0 (maj: 5, min: 4, rev: 0). configure: checking for blacklisted configurations due to gcc 5.4.0. configure: warning: gcc 5.4.0 does not support 'skx'; adding to blacklist. configure: found assembler ('as') version 2.26.1 (maj: 2, min: 26, rev: 1). configure: checking for blacklisted configurations due to as 2.26.1. configure: configuration blacklist: configure: skx configure: reading configuration registry...done. configure: determining default version string. configure: found '.git' directory; assuming git clone. configure: executing: git describe --tags. configure: got back 0.3.2-16-gb699bb1f. configure: truncating to 0.3.2-16. configure: starting configuration of BLIS 0.3.2-16. configure: configuring with official version string. configure: found shared library .so version '0.0.0'. configure: .so major version: 0 configure: .so minor.build version: 0.0 configure: manual configuration requested; configuring with 'haswell'. configure: checking configuration against contents of 'config_registry'. configure: configuration 'haswell' is registered. configure: 'haswell' is defined as having the following sub-configurations: configure: haswell configure: which collectively require the following kernels: configure: haswell zen configure: checking sub-configurations: configure: 'haswell' is registered...and exists. configure: checking sub-configurations' requisite kernels: configure: 'haswell' kernels...exist. configure: 'zen' kernels...exist. configure: no install prefix option given; defaulting to '/u/field/blis'. configure: no install libdir option given; defaulting to PREFIX/lib. configure: no install includedir option given; defaulting to PREFIX/include. configure: final installation directories: configure: libdir: /u/field/blis/lib configure: includedir: /u/field/blis/include configure: debug symbols disabled. configure: disabling verbose make output. (enable with 'make V=1'.) configure: building BLIS as a static library. configure: threading is disabled. configure: internal memory pools for packing buffers are enabled. configure: libmemkind not found; disabling. configure: the BLAS compatibility layer is enabled. configure: the CBLAS compatibility layer is disabled. configure: the internal integer size is automatically determined. configure: the BLAS/CBLAS interface integer size is 32-bit. configure: creating ./config.mk from ./build/config.mk.in configure: creating ./bli_config.h from ./build/bli_config.h.in configure: creating ./obj/haswell configure: creating ./obj/haswell/config configure: creating ./obj/haswell/config/haswell configure: creating ./obj/haswell/kernels configure: creating ./obj/haswell/kernels/haswell configure: creating ./obj/haswell/kernels/zen configure: creating ./obj/haswell/ref_kernels configure: creating ./obj/haswell/ref_kernels/haswell configure: creating ./obj/haswell/frame configure: creating ./obj/haswell/blastest configure: creating ./obj/haswell/testsuite configure: creating ./lib/haswell configure: creating ./include/haswell configure: mirroring ./config/haswell to ./obj/haswell/config/haswell configure: mirroring ./kernels/haswell to ./obj/haswell/kernels/haswell configure: mirroring ./kernels/zen to ./obj/haswell/kernels/zen configure: mirroring ./ref_kernels to ./obj/haswell/ref_kernels/haswell configure: mirroring ./frame to ./obj/haswell/frame configure: creating makefile fragments in ./config/haswell configure: creating makefile fragments in ./kernels/haswell configure: creating makefile fragments in ./kernels/zen configure: creating makefile fragments in ./ref_kernels configure: creating makefile fragments in ./frame configure: configured to build within top-level directory of source distribution. ``` The installation prefix can be specified via the `--prefix=PREFIX` option: ``` $ ./configure --prefix=/usr ``` This will cause libraries to eventually be installed (via `make install`) to `PREFIX/lib` and development headers to be installed to `PREFIX/include`. (The default value of `PREFIX` is `/usr/local`.) You can also specify the library install directory separately from the development header install directory with the `--libdir=LIBDIR` and `--includedir=INCDIR` options, respectively: ``` $ ./configure --libdir=/usr/lib --includedir=/usr/include ``` The `--libdir=LIBDIR` and `--includedir=INCDIR` options will override any path implied by `PREFIX`, whether it was specified explicitly via `--prefix` or implicitly (via the default). That is, `LIBDIR` defaults to `EXECPREFIX/lib` (where `EXECPREFIX`, set via `--exec-prefix=EXECPREFIX`, defaults to `PREFIX`) and `INCDIR` defaults to `PREFIX/include`, but `LIBDIR` and `INCDIR` will each be overriden by their respective `--libdir`/`--includedir` options. There is a third related option, `--sharedir=SHAREDIR`, where `SHAREDIR` defaults to `PREFIX/share`. This option specifies the installation directory for certain makefile fragments that contain variables determined by `configure` (e.g. `CC`, `CFLAGS`, `LDFLAGS`, etc.). These files allow certain BLIS makefiles, such as those in the `examples` or `testsuite` directories, to operate on an installed copy of BLIS rather than a local (and possibly uninstalled) copy. For a complete list of supported `configure` options and arguments, run `configure` with the `-h` option: ``` $ ./configure -h ``` The output from this invocation of `configure` should give you an up-to-date list of options and their descriptions. ## Step 3: Compilation Once `configure` is finished, you are ready to instantiate (compile) BLIS into a library by running `make`. Running `make` will result in output similar to: ``` $ make Generating monolithic blis.h......................................................... ..................................................................................... ..................................................................................... ..................................................................................... ..................................................................................... .......................................... Generated include/haswell/blis.h Compiling obj/haswell/config/haswell/bli_cntx_init_haswell.o ('haswell' CFLAGS for config code) Compiling obj/haswell/kernels/zen/1/bli_amaxv_zen_int.o ('haswell' CFLAGS for kernels) Compiling obj/haswell/kernels/zen/1/bli_axpyv_zen_int.o ('haswell' CFLAGS for kernels) Compiling obj/haswell/kernels/zen/1/bli_axpyv_zen_int10.o ('haswell' CFLAGS for kernels) Compiling obj/haswell/kernels/zen/1/bli_dotv_zen_int.o ('haswell' CFLAGS for kernels) Compiling obj/haswell/kernels/zen/1/bli_dotv_zen_int10.o ('haswell' CFLAGS for kernels) ``` If you want to see the individual command line invocations of the compiler, you can run `make` as follows: ``` $ make V=1 ``` Also, if you are compiling on a multicore system, you can get parallelism via: ``` $ make -j ``` where `` is the number of jobs `make` is allowed to run simultaneously. Generally, you should limit `` to p+1, where p is the number of processor cores on your system. ### Running into the ARG_MAX limit On some systems, you may observe an error message when the build system attempts to archive BLIS object files into the static library (or perhaps when the linker attempts to generate the shared library): ``` Archiving lib/x86_64/libblis.a bash: ar: Argument list too long Makefile:584: recipe for target 'lib/x86_64/libblis.a' failed make: *** [lib/x86_64/libblis.a] Error 126 ``` This error message results when the user attempts to execute a program with too many arguments (or more specifically, a program-argument string that occupies too many bytes)--that is, when the command exceeds the [ARG_MAX limit](https://www.in-ulm.de/~mascheck/various/argmax/). This doesn't occur very often, but if it does, don't worry--we have a workaround. Simply rerun `configure` as you did previously, except this time include an addition option: `--enable-arg-max-hack`. You will see confirmation that the option was accepted as configure runs: ``` configure: enabling ARG_MAX hack. ``` The archiver and/or linker should no longer choke when creating the libraries. ## Step 3b: Testing (optional) If you would like to run some ready-made tests that exercise BLIS in a number of ways, including through its BLAS compatibility layer, run `make check`: ``` $ make check ``` Watch the output near the end. You should see the following messages, though not necessarily in immediate succession: ``` All BLIS tests passed! All BLAS tests passed! ``` Please see the [Testsuite](Testsuite.md) document for more details on running either the BLIS testsuite or the BLAS test drivers. If you have any trouble, please report your problem to BLIS developers by opening a [new issue](https://github.com/flame/blis/issues/). ## Step 4: Installation Toward the end of compilation, you should get output similar to: ``` Compiling obj/haswell/frame/thread/bli_thread.o ('haswell' CFLAGS for framework code) Compiling obj/haswell/frame/thread/bli_thrinfo.o ('haswell' CFLAGS for framework code) Compiling obj/haswell/frame/util/bli_util_check.o ('haswell' CFLAGS for framework code) Compiling obj/haswell/frame/util/bli_util_oapi.o ('haswell' CFLAGS for framework code) Compiling obj/haswell/frame/util/bli_util_oapi_wc.o ('haswell' CFLAGS for framework code) Compiling obj/haswell/frame/util/bli_util_oapi_woc.o ('haswell' CFLAGS for framework code) Compiling obj/haswell/frame/util/bli_util_tapi.o ('haswell' CFLAGS for framework code) Compiling obj/haswell/frame/util/bli_util_unb_var1.o ('haswell' CFLAGS for framework code) Archiving lib/haswell/libblis.a Dynamically linking lib/haswell/libblis.so ``` Now you have a BLIS library (in static and shared forms) residing in the `lib//` directory. To install the libraries and the header files associated with it, simply execute: ``` $ make install ``` This installs copies of the libraries and header files, and also creates conventional symbolic links of shared libraries: ``` Installing libblis.a into /u/field/blis/lib/ Installing libblis.so.0.0.0 into /u/field/blis/lib/ Installing symlink libblis.so into /u/field/blis/lib/ Installing symlink libblis.so.0 into /u/field/blis/lib/ Installing blis.h into /u/field/blis/include/blis/ ``` This results in your `PREFIX` directory looking like: ``` # Check the contents of 'PREFIX'. $ ls -l $HOME/blis drwxr-xr-x 3 field dept 4096 May 10 17:36 include drwxr-xr-x 2 field dept 4096 May 10 17:42 lib # Check the contents of 'PREFIX/include'. $ ls -l $HOME/blis/include drwxr-xr-x 2 field dept 4096 May 10 17:42 blis $ ls -l $HOME/blis/include/blis -rw-r--r-- 1 field dept 915324 May 10 17:42 blis.h # Check the contents of 'PREFIX/lib'. $ ls -l $HOME/blis/lib -rw-r--r-- 1 field dept 2979052 May 10 17:42 libblis.a lrwxrwxrwx 1 field dept 16 May 10 17:42 libblis.so -> libblis.so.0.0.0 lrwxrwxrwx 1 field dept 16 May 10 17:42 libblis.so.0 -> libblis.so.0.0.0 -rw-r--r-- 1 field dept 2185976 May 10 17:42 libblis.so.0.0.0 ``` ## Cleaning out build products If you want to remove various build products, you can use one of the `make` targets already defined for you in the BLIS Makefile: ``` $ make clean Removing flattened header files from ./include/haswell. Removing object files from ./obj/haswell. Removing libraries from ./lib/haswell. ``` Executing the `clean` target will remove all binary object files and library builds from the `obj` and `lib` directories, as well as any flattened header files. Any other configurations' build products are left untouched. ``` $ make cleanmk Removing makefile fragments from ./config. Removing makefile fragments from ./frame. Removing makefile fragments from ./ref_kernels. Removing makefile fragments from ./kernels. ``` The `cleanmk` target results in removal of all makefile fragments from the framework source tree. (Makefile fragments are named `.fragment.mk` and are generated at configure-time.) ``` $ make distclean Removing makefile fragments from ./config. Removing makefile fragments from ./frame. Removing makefile fragments from ./ref_kernels. Removing makefile fragments from ./kernels. Removing flattened header files from ./include/haswell. Removing object files from ./obj/haswell. Removing libraries from ./lib/haswell. Removing object files from ./obj/haswell/blastest. Removing libf2c.a from ./obj/haswell/blastest. Removing binaries from ./obj/haswell/blastest. Removing driver output files 'out.*'. Removing object files from ./blastest/obj. Removing libf2c.a from ./blastest. Removing binaries from ./blastest. Removing driver output files 'out.*' from ./blastest. Removing object files from ./obj/haswell/testsuite. Removing binary test_libblis.x. Removing output.testsuite. Removing object files from testsuite/obj. Removing binary testsuite/test_libblis.x. Removing ./bli_config.h. Removing config.mk. Removing obj. Removing lib. Removing include. ``` Running the `distclean` target is like saying, "Remove anything ever created by the build system." ## Compiling with BLIS All BLIS definitions and prototypes may be included in your C source file by including a single header file, `blis.h`: ```c #include "stdio.h" #include "stdlib.h" #include "otherstuff.h" #include "blis.h" ``` If the BLAS compatibility layer was enabled at configure-time (as it is by default), then `blis.h` will also provide BLAS prototypes to your source code. ### Disabling BLAS prototypes Some applications already `#include` a header that contains BLAS prototypes. This can cause problems if those applications also try to `#include` the BLIS header file, as shown above. Suppose for a moment that `otherstuff.h` in the example above already provides BLAS prototypes. ``` $ gcc -I/path/to/blis -I/path/to/otherstuff -c main.c -o main.o In file included from main.c:41:0: /path/to/blis/blis.h:36900:111: error: conflicting declaration of C function ‘int xerbla_(const bla_character*, const bla_integer*, ftnlen)’ TEF770(xerbla)(const bla_character *srname, const bla_integer *info, ftnlen srname_len); ``` If your application is already declaring (prototyping) BLAS functions, then you may disable those prototypes from being defined included within `blis.h`. This prevents `blis.h` from re-declaring those prototypes, or, allows your other header to declare those functions for the first time, depending on the order that you `#include` the headers. ```c #include "stdio.h" #include "stdlib.h" #include "otherstuff.h" #define BLIS_DISABLE_BLAS_DEFS // disable BLAS prototypes within BLIS. #include "blis.h" ``` By `#defining` the `BLIS_DISABLE_BLAS_DEFS` macro, we signal to `blis.h` that it should skip over the BLAS prototypes, but otherwise `#include` everything else as it normally would. Note that `BLIS_DISABLE_BLAS_DEFS` must be `#defined` *prior* to the `#include "blis.h"` directive in order for it to have any effect. ### CBLAS If you build BLIS with CBLAS enabled and you wish to access CBLAS function prototypes from within your application, you will have to `#include` the `cblas.h` header separately from `blis.h`. ``` #include "blis.h" #include "cblas.h" ``` ## Linking against BLIS Once you have instantiated (configured and compiled, and perhaps installed) a BLIS library, you can link to it in your application's makefile as you would any other library. The following is an abbreviated makefile for a small hypothetical application that has just two external dependencies: BLIS and the standard C math library. We also link against libpthread since that library has been a runtime dependency of BLIS since 70640a3 (December 2017). ```make BLIS_PREFIX = $(HOME)/blis BLIS_INC = $(BLIS_PREFIX)/include/blis BLIS_LIB = $(BLIS_PREFIX)/lib/libblis.a OTHER_LIBS = -L/usr/lib -lm -lpthread CC = gcc CFLAGS = -O2 -g -I$(BLIS_INC) LINKER = $(CC) OBJS = main.o util.o other.o %.o: %.c $(CC) $(CFLAGS) -c $< -o $@ all: $(OBJS) $(LINKER) $(OBJS) $(BLIS_LIB) $(OTHER_LIBS) -o my_program.x ``` The above example assumes you will want to include BLIS definitions and function prototypes into your application via `#include blis.h`. (If you are only using the BLIS via the BLAS compatibility layer, including `blis.h` is not necessary.) Since BLIS headers are installed into a `blis` subdirectory of `PREFIX/include`, you must make sure that the compiler knows where to find the `blis.h` header file. This is typically accomplished by inserting `#include "blis.h"` into your application's source code files and compiling the code with `-I PREFIX/include/blis`. The makefile shown above a very simple example. If you need help linking your application to your BLIS library, please [open an issue](https://github.com/flame/blis/issues). ## Uninstalling If you decide that you want to uninstall BLIS, simply run `make uninstall` ``` $ make uninstall Uninstalling libraries libblis.a libblis.so.0.0.0 from /u/field/blis/lib/. Uninstalling symlinks libblis.so libblis.so.0 from /u/field/blis/lib/. Uninstalling directory 'blis' from /u/field/blis/include/. ``` This removes the libraries, symlinks, and header directory that was installed by `make install`. Before running `make uninstall`, however, make sure that BLIS is configured the with the same `LIBDIR` and `INCDIR` paths used during installation. ## `make` targets The BLIS `Makefile` implements many `make` targets. The table below lists most of the interesting ones that typical users and developers may wish to use. | `make` target | Description | |:----------------|:---------------------------------------------------| | `all` | Execute `libs` target. | | `libs` | Compile BLIS as a static and/or shared library (depending on `configure` options). | | `test` | Execute `checkblis` and `checkblas` targets. | | `check` | Execute `checkblis-fast` and `checkblas` targets. | | `checkblis` | Execute `testblis` and characterize the results to `stdout`. | | `checkblis-fast`| Execute `testblis-fast` and characterize the results to `stdout`. | | `checkblis-md` | Execute `testblis-md` and characterize the results to `stdout`. | | `checkblis-salt`| Execute `testblis-salt` and characterize the results to `stdout`. | | `checkblas` | Execute `testblas` and characterize the results to `stdout`. | | `testblis` | Run the BLIS testsuite with default parameters (runs for 2-8 minutes). | | `testblis-fast` | Run the BLIS testsuite with "fast" parameters (runs for a few seconds). | | `testblis-md` | Run the BLIS testsuite for `gemm` with full mixing of datatypes (runs for 10-30 seconds). | | `testblis-salt` | Run the BLIS testsuite while simulating application-level threading (runs for a few seconds). | | `testsuite` | Same as `testblis`. | | `testblas` | Run the BLAS test drivers with default parameters (runs for a few seconds). | | `showconfig` | Show a summary of currently selected `configure` options. | | `clean` | Execute `cleanh` and `cleanlib`. | | `cleanmk` | Remove `.fragment.mk` makefile fragments generated by `configure`. | | `cleanh` | Remove the flattened header file(s) in `include//`. | | `cleanlib` | Remove the libraries in `lib//`. | | `cleantest` | Remove build products produced by `testblis`/`testblis-fast` and `testblas`. | | `install` | Install libraries and header files to installation directories. | | `uninstall` | Uninstall libraries and header files that reside within installation directories. | | `uninstall-old` | Uninstall older libraries and header files that reside within installation directories. | For more details on `configure` options, such as enabling/disabling static or shared library generation, or specifying installation directories for libraries and/or headers, please review the output of `./configure --help`. ## Conclusion If you have feedback, please consider keeping in touch with the project maintainers, contributors, and other users by joining and posting to the [BLIS mailing lists](https://github.com/flame/blis#discussion). Thanks for using BLIS! blis-0.6.1/docs/CodingConventions.md000066400000000000000000000233121360743507500173650ustar00rootroot00000000000000## Contents * **[Contents](CodingConventions.md#contents)** * **[Introduction](CodingConventions.md#introduction)** * **[C99](CodingConventions.md#c99)** * [Placement of braces](CodingConventions.md#placement-of-braces) * [Indentation](CodingConventions.md#indentation) * [Comments](CodingConventions.md#comments) * [Blank lines](CodingConventions.md#blank-lines) * [Condensing short code to single lines](CodingConventions.md#condensing-short-code-to-single-lines) * [Whitespace in function calls](CodingConventions.md#whitespace-in-function-calls) * [Whitespace in function definitions](CodingConventions.md#whitespace-in-function-definitions) * [Whitespace in expressions](CodingConventions.md#whitespace-in-expressions) * [Trailing whitespace](CodingConventions.md#trailing-whitespace) ## Introduction This wiki describes the coding conventions used in BLIS. Please try to adhere to these conventions when submitting pull requests and/or (if you have permission) committing directly to the repository. There is some support for these conventions for Emacs editing in the `.dir-locals.el` file, which will affect editing with CC mode in the blis directory. ## C99 Most of the code in BLIS is written in C, and specifically in ISO C99. This section describes the C coding standards used within BLIS. ### Placement of braces Please either use braces to denote the indentation limits of scope, or to enclose multiple statements on a single line. But do not place the open brace on the same line as a conditional if the conditional will be more than one line. ```c { // This is fine. if ( bli_obj_is_real( x ) ) { foo = 1; } // This is also fine. (Ideal for short conditional bodies.) if ( bli_obj_is_real( x ) ) { foo = 1; return; } // This is bad. Please use one of the two forms above. if ( bli_obj_is_real( x ) ) { foo = 1; } // This is (much) worse. Please no. if ( bli_obj_is_real( x ) ) { foo = 1; } } ``` ### Indentation If at all possible, **please use tabs to denote changing levels of scope!** If you can't use tabs or doing so would be very inconvenient given your editor and setup, please set your indentation to use exactly four spaces per level of indentation. Below is what it would look like if you used tabs (with a tab width set to occupy four spaces), or four actual spaces per indentation level. ```c bool_t bli_obj_is_real( obj_t* x ) { bool_t r_val; if ( bli_obj_is_real( x ) ) r_val = TRUE; else r_val = FALSE; } ``` Ideally, tabs should be used to indicate changes in levels of scope, but then spaces should be used for multi-line statements within the same scope. In the example below, I've marked the characters that should be spaces with `.` (with tabs used for the first level of indentation): ```c bool_t bli_obj_is_complex( obj_t* x ) { bool_t r_val; if ( bli_obj_is_scomplex( x ) || .....bli_obj_is_dcomplex( x ) ) r_val = TRUE; else............................r_val = FALSE; return r_val; } ``` ### Comments Please use C++-style comments, and line-break your comments somewhere between character (column) 72 and 80. ```c { // This is a comment. This comment can span multiple lines, but it should // not extend beyond column 80. (For these purposes, you can count a tab // as anywhere from one to four spaces.) } ``` If you are inserting comments in a macro definition, in which case you must use C-style comments: ```c #define bli_some_macro( x ) \ \ /* This is a comment in a macro definition. It, too, should not spill beyond column 80. Please place the ending comment marker on the last line containing words, unless the comment marker would cause you to go beyond column 80, in which case you can place it on the next line aligned with the first comment marker. */ ``` ### Blank lines Please use blank lines to separate lines of code from the next line of code. However, if adjacent lines of code are meaningfully related, please skip the blank line. ```c { // Set the matrix datatype. bli_obj_set_dt( BLIS_DOUBLE, x ); // Set the matrix dimensions. bli_obj_set_length( 10, x ); bli_obj_set_width( 5, x ); // Set the matrix structure. bli_obj_set_struc( BLIS_GENERAL, x ); bli_obj_set_uplo( BLIS_DENSE, x ); } ``` ### Condensing short code to single lines Sometimes, to more efficiently display code on the screen, it's helpful to skip certain newlines, such as those in conditional statements. This is fine, just try to line things up in a way that is visually appealing. ```c { bool_t r_val; dim_t foo; // This is fine. if ( bli_obj_is_real( x ) ) r_val = TRUE; else r_val = FALSE; // This is okay. (Notice the spaces after '{' and before '}'.) // However, the next example is preferred over this style. if ( bli_obj_is_real( x ) ) { r_val = TRUE; foo = 1; } else { r_val = FALSE; foo = 0; } // Similar to above, but with some extra alignment. This is better // than above. if ( bli_obj_is_real( x ) ) { r_val = TRUE; foo = 1; } else { r_val = FALSE; foo = 0; } } ``` ### Whitespace in function calls For single-line function calls, **please avoid** a space between the last character in the function/macro name and the open parentheses. Also, please do not insert any spaces before commas that separate arguments to a function/macro invocation. But please **do** insert at least once space after each comma. (I say "at least one" because sometimes it looks nicer to align the commas with those of function calls on lines above or below the function call in question.) Also, please include one space between the opening parentheses and the first argument, and also between the last argument and closing parentheses ```c { obj_t x; // Good. bli_obj_create( BLIS_DOUBLE, 3, 4, 0, 0, &x ); bli_obj_set_length( 10, x ); // Bad. Please avoid these. bli_obj_set_dt ( BLIS_FLOAT, x ); bli_obj_set_dt( BLIS_FLOAT , x ); bli_obj_set_dt(BLIS_FLOAT, x); bli_obj_set_dt(BLIS_FLOAT,x); // Good. bli_obj_set_dt( BLIS_FLOAT, x ); } ``` For multi-line function calls, please use the following template: ```c { bli_dgemm ( BLIS_NO_TRANSPOSE, BLIS_TRANSPOSE, m, n, k, &BLIS_ONE a, rs_a, cs_a, b, rs_b, cs_b, &BLIS_ZERO, c, rs_c, cs_c ); } ``` Notice that here, the parentheses are formatted similar to braces. However, notice that the arguments do not constitute a new level of "scope." Instead, you should use exactly two additional spaces. before each line of arguments. ### Whitespace in function definitions When defining a function with few arguments, insert a single space after commas and types, and after the first parentheses and before the last parentheses: ```c // Please write "short" function signatures like this. void bli_obj_set_length( dim_t m, obj_t* a ) { // Body of function } ``` As with single-line function calls, please do not place a space between the last character of the function name and the open parentheses to the argument list! ```c // Please avoid this. void bli_obj_set_length ( dim_t m, obj_t* a ) { // Body of function } ``` When defining a function with many arguments, especially those that would not comfortably fit in a single 80-character line, you can split the type signature into multiple lines: ```c // Please write "long" function signatures like this. void bli_gemm ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx ) { // Body of function } ``` If you are going to use this style of function definition, please indent the parentheses exactly five spaces (don't use tabs here). Then, indent the arguments with an additional two spaces. Thus, parentheses should be in column 6 (counting from 1) and argument types should begin in column 8. Also notice that the number of spaces after each argument's type specifier varies so that the argument names are aligned. If you insert qualifiers such as `restrict`, please right-justify them: ```c // Please align 'restrict' keywords and variables, as appropriate. void bli_gemm ( obj_t* restrict alpha, obj_t* restrict a, obj_t* restrict b, obj_t* restrict beta, obj_t* restrict c, cntx_t* restrict cntx ) { // Body of function } ``` ### Whitespace in expressions Please insert whitespace into conditional expressions. ```c { // Good. if ( m == 10 && n > 0 ) return; // Bad. if ( m==10 && n>0 ) return; // Worse! if (m==10&&n>0) return; // Okay, now you're just messing with me. if(m==10&&n>0)return; } ``` Unlike with the parentheses that surround the argument list of a function call, there should be exactly one space after conditional keywords and the open parentheses for its associated conditional statement: `if (...)`, `else if (...)`, and `while (...)`. ```c { // Good. if ( ... ) return 0; else if ( ... ) return 1; // Good. while ( ... ) { // loop body. } // Good. do { // loop body. } while ( ... ); } ``` Sometimes, extra spaces for alignment are desired: ```c { // This is okay. if ( m == 0 ) return 0; else if ( n == 0 ) return 1; // This is sometimes preferred because it allows your eyes to more easily // see the differences between the 'if' conditional expression and the // 'else if' conditional expression. if ( m == 0 ) return 0; else if ( n == 0 ) return 1; } ``` ### Trailing whitespace Please try to avoid inserting any trailing whitespace. This also means that "blank" lines should not contain any tabs or spaces. blis-0.6.1/docs/ConfigurationHowTo.md000066400000000000000000002045151360743507500175320ustar00rootroot00000000000000## Contents * **[Contents](ConfigurationHowTo.md#contents)** * **[Introduction](ConfigurationHowTo.md#introduction)** * **[Sub-configurations](ConfigurationHowTo.md#sub-configurations)** * [`bli_cntx_init_*.c`](ConfigurationHowTo.md#bli_cntx_init_c) * [`bli_family_*.h`](ConfigurationHowTo.md#bli_family_h) * [`make_defs.mk`](ConfigurationHowTo.md#make_defsmk) * **[Configuration families](ConfigurationHowTo.md#configuration-families)** * **[Configuration registry](ConfigurationHowTo.md#configuration-registry)** * [Walkthrough](ConfigurationHowTo.md#walkthrough) * [Printing the configuration registry lists](ConfigurationHowTo.md#printing-the-configuration-registry-lists) * **[Adding a new kernel set](ConfigurationHowTo.md#adding-a-new-kernel-set)** * **[Adding a new configuration family](ConfigurationHowTo.md#adding-a-new-configuration-family)** * **[Adding a new sub-configuration](ConfigurationHowTo.md#adding-a-new-sub-configuration)** * **[Further development topics](ConfigurationHowTo.md#further-development-topics)** * [Querying the current configuration](ConfigurationHowTo.md#querying-the-current-configuration) * [Header dependencies](ConfigurationHowTo.md#header-dependencies) * [Still have questions?](ConfigurationHowTo.md#still-have-questions) ## Introduction This document describes how to manage, edit, and create BLIS framework configurations. **The target audience is primarily BLIS developers** who wish to add support for new types of hardware, and developers who write (or tinker with) BLIS kernels. The BLIS [Build System](BuildSystem.md) guide introduces the concept of a BLIS [configuration](BuildSystem.md#Step_1:_Choose_a_framework_configuration). There are actually two types of configurations: sub-configuration and configuration families. A _sub-configuration_ encapsulates all of the information needed to build BLIS for a particular microarchitecture. For example, the `haswell` configuration allows a user or developer to build a BLIS library that targets hardware based on Intel Haswell (or Broadwell or Skylake/Kabylake desktop) microprocessors. Such a sub-configuration typically includes optimized kernels as well as the corresponding cache and register blocksizes that allow those kernels to work well on the target hardware. A _configuration family_ simply specifies a collection of other registered sub-configurations. For example, the `intel64` configuration allows a user or developer to build a BLIS library that includes several Intel x86_64 configurations, and hence supports multiple microarchitectures simultaneously. The appropriate configuration information (e.g. kernels and blocksizes) will be selected via some hardware detection heuristic (e.g. the `CPUID` instruction) at runtime. (**Note:** Prior to 290dd4a, configuration families could only be defined in terms of sub-configurations. Starting with 290dd4a, configuration families may be defined in terms of other families.) Both of these configuration types are organized as directories of files and then "registered" into a configuration registry file named `config_registry`, which resides in the top-level directory. ## Sub-configurations A sub-configuration is represented by a sub-directory of the `config` directory in the top-level of the BLIS distribution: ``` $ ls config amd64 cortexa15 excavator intel64 old power7 template bgq cortexa57 generic knc penryn sandybridge zen bulldozer cortexa9 haswell knl piledriver steamroller ``` Let's inspect the `haswell` configuration as an example: ``` $ ls config/haswell bli_cntx_init_haswell.c bli_family_haswell.h make_defs.mk ``` A sub-configuration (`haswell`, in this case) usually contains just three files: * `bli_cntx_init_haswell.c`. This file contains the initialization function for a context targeting the hardware in question, in this case, Intel Haswell. A context, or `cntx_t` object, in BLIS encapsulates all of the hardware-specific information--including kernel function pointers and cache and register blocksizes--necessary to support all of the main computational operations in BLIS. The initialization function inside this file should be named the same as the filename (excluding `.c` suffix), which should begin with prefix `bli_cntx_init_` and end with the (lowercase) name of the sub-configuration. The context initialization function (in this case, `bli_cntx_init_haswell()`) is used internally by BLIS when setting up the global kernel structure--a mechanism for managing and supporting multiple microarchitectures simultaneously, so that the choice of which context to use can be deferred until the computation is ready to execute. * `bli_family_haswell.h`. This header file is `#included` when the configuration in question, in this case `haswell`, was the target to `./configure`. This is where you would specify certain global parameters and settings. For example, if you wanted to specify custom implementations of `malloc()` and `free()`, this is where you would specify them. The file is oftentimes empty. (In the case of configuration families, the definitions in this file apply to the _entire_ build, and not any specific sub-configuration, but for consistency we support them for all configuration targets, whether they be singleton sub-configurations or configuration families.) * `make_defs.mk`. This makefile fragment defines the compiler and compiler flags to use during compilation. Specifically, the values defined in this file are used whenever compiling source code specific to the sub-configuration (i.e., reference kernels and optimized kernels). If the sub-configuration is the target of `configure`, then these flags are also used to compile general framework code. Providing these three components constitutes a complete sub-configuration. A more detailed description of each file will follow. ### bli_cntx_init_*.c As mentioned above, the kernels used by a sub-configuration are specified in the `bli_cntx_init_` function. This function is flexible in that the context is typically initialized with a set of "reference" kernels. Then, the kernel developer overwrites the fields in the context that correspond to kernel operations that have optimized counterparts that should be used instead. Let's use the following hypothetical function definition to guide our walkthrough. ```c #include "blis.h" void bli_cntx_init_fooarch( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. bli_cntx_init_fooarch_ref( cntx ); // ------------------------------------------------------------------------- // Update the context with optimized native gemm microkernels and // their storage preferences. bli_cntx_set_l3_nat_ukrs ( 5, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_bararch_asm, FALSE, BLIS_GEMMTRSM_L_UKR, BLIS_DOUBLE, bli_dgemmtrsm_l_bararch_asm, FALSE, BLIS_GEMMTRSM_U_UKR, BLIS_DOUBLE, bli_dgemmtrsm_u_bararch_asm, FALSE, BLIS_TRSM_L_UKR, BLIS_DOUBLE, bli_dtrsm_l_bararch_asm, FALSE, BLIS_TRSM_U_UKR, BLIS_DOUBLE, bli_dtrsm_u_bararch_asm, FALSE, cntx ); // Update the context with optimized packm kernels. bli_cntx_set_packm_kers ( 2, BLIS_PACKM_4XK_KER, BLIS_DOUBLE, bli_dpackm_bararch_asm_4xk, BLIS_PACKM_8XK_KER, BLIS_DOUBLE, bli_dpackm_bararch_asm_8xk, cntx ); // Update the context with optimized level-1f kernels. bli_cntx_set_l1f_kers ( 5, BLIS_AXPY2V_KER, BLIS_DOUBLE, bli_daxpy2v_fooarch_asm, BLIS_DOTAXPYV_KER, BLIS_DOUBLE, bli_ddotaxpyv_fooarch_asm, BLIS_AXPYF_KER, BLIS_DOUBLE, bli_daxpyf_fooarch_asm, BLIS_DOTXF_KER, BLIS_DOUBLE, bli_ddotxf_fooarch_asm, BLIS_DOTXAXPYF_KER, BLIS_DOUBLE, bli_ddotxaxpyf_fooarch_asm, cntx ); // Update the context with optimized level-1v kernels. bli_cntx_set_l1v_kers ( 2, BLIS_AXPYV_KER, BLIS_DOUBLE, bli_daxpyv_fooarch_asm, BLIS_DOTV_KER, BLIS_DOUBLE, bli_ddotv_fooarch_asm, cntx ); // Initialize level-3 blocksize objects with architecture-specific values. // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MR ], 8, 8, 8, 4 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 8, 4, 4, 4 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], 128, 128, 128, 128 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 256, 256, 256 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4096, 4096, 4096, 4096 ); // Update the context with the current architecture's register and cache // blocksizes (and multiples) for native execution. bli_cntx_set_blkszs ( BLIS_NAT, 5, BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, cntx ); } ``` _**Function name/signature.**_ This function always takes one argument, a pointer to a `cntx_t` object. As with the name of the file, it should be named with the prefix `bli_cntx_init_` followed by the lowercase name of the configuration--in this case, `fooarch`. _**Blocksize object array.**_ The `blkszs` array declaration is needed later in the function and should generally be consistent (and unchanged) across all configurations. _**Reference initialization.**_ The first function call, `bli_cntx_init_fooarch_ref()`, initializes the context `cntx` with function pointers to reference implementations of all of the kernels supported by BLIS (as well as cache and register blocksizes, and other fields). This function is automatically generated by BLIS for every sub-configuration enabled at configure-time. The function prototype is generated by a preprocessor macro in `frame/include/bli_arch_config.h`. _**Level-3 microkernels.**_ The second function call is to a variable argument function, `bli_cntx_set_l3_nat_ukrs()`, which updates `cntx` with five optimized double-precision complex level-3 microkernels. The first argument encodes the number of individual kernels being registered into the context. Every subsequent line, except for the last line, is associated with the registration of a single kernel, and each of these lines is independent of one another and can occur in any order, provided that the kernel parameters of each line occur in the same order--kernel ID, followed by datatype, followed by function name, followed by storage preference boolean (i.e., whether the microkernel prefers row storage). The last argument of the function call is the address of the context being updated, `cntx`. Notice that we are registering microkernels written for another type of hardware, `bararch`, because in our hypothetical universe `bararch` is very similar to `fooarch` and so we recycle the code between the two configurations. After the function returns, the context contains pointers to optimized double-precision level-3 real microkernels. Note that the context will still contain reference microkernels for single-precision real and complex, and double-precision complex computation, as those kernels were not updated. _Note:_ Currently, BLIS only allows the kernel developer to signal a preference (row or column) for `gemm` microkernels. The preference of the `gemmtrsm` and `trsm` microkernels can (and must) be set, but are ignored by the framework during execution. _**Level-1m (packm) kernels.**_ The third function call is to another variable argument function, `bli_cntx_set_packm_kers()`. This function works very similar to `bli_cntx_set_l3_nat_ukrs()`, except that it expects a different set of kernel IDs (because now we are registering level-1m kernels) and it does not take a storage preference boolean. After this function returns, `cntx` contains function pointers to optimized double-precision real `packm` kernels. These kernels, like the level-3 kernels previously, are also borrowed from the `bararch` kernel set. Unregistered `packm` kernels will continue to point to reference code. _**Level-1f kernels.**_ The third function call is to yet another variable argument function, `bli_cntx_set_l1f_kers()`. This function has the same signature as `bli_cntx_set_packm_kers()`, except that it expects a different set of kernel IDs (because now we are registering level-1f kernels). After this function returns, `cntx` contains function pointers to optimized double-precision real level-1f kernels. These kernels are written for `fooarch` specifically. The unregistered level-1f kernels will continue to point to reference code. _**Level-1v kernels.**_ The fourth function call is to `bli_cntx_set_l1v_kers()`, which operates similarly to the `bli_cntx_set_l1f_kers()`, except here we are registering level-1v kernels. After the function returns, most kernels will continue to point to reference code, except double-precision real instances of `axpyv` and `dotv`. For a complete list of kernel IDs, please see the definitions of `l3ukr_t`, `l1mkr_t`, `l1fkr_t`, `l1vkr_t` in [frame/include/bli_type_defs.h](https://github.com/flame/blis/blob/master/frame/include/bli_type_defs.h). _**Setting blocksizes.**_ The next block of code initializes the `blkszs` array with register and cache blocksize values for each datatype. The values here are used by the level-3 operations that employ the level-3 microkernels we registered previously. We use `bli_blksz_init_easy()` when initializing only the primary value. If the auxiliary value needs to be set to a different value that the primary, `bli_blksz_init()` should be used instead, as in: ```c // s d c z bli_blksz_init_easy( &blkszs[ BLIS_MR ], 0, 8, 0, 0 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 0, 4, 0, 0 ); bli_blksz_init ( &blkszs[ BLIS_MC ], 0, 128, 0, 0, 0, 160, 0, 0 ); bli_blksz_init ( &blkszs[ BLIS_KC ], 0, 256, 0, 0, 0, 288, 0, 0 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 0, 4096, 0, 0 ); ``` Here, we use `bli_blksz_init()` to set different auxiliary (maximum) cache blocksizes for _MC_ and _KC_. The same function could be used to set auxiliary (packing) register blocksizes for _MR_ and _NR_, which correspond to the _PACKMR_ and _PACKNR_ parameters. Other blocksizes, particularly those corresponding to level-1f operations, may be set. For a complete list of blocksize IDs, please see the definitions of `bszid_t` in [frame/include/bli_type_defs.h](https://github.com/flame/blis/blob/master/frame/include/bli_type_defs.h). For more information on interpretations of the auxiliary blocksize value, see the digressions below. Note that we set level-3 blocksizes even for datatypes that retain reference code kernels; however, by passing in `0` for those blocksizes, we indicate to `bli_blksz_init()` and `bli_blksz_init_easy()` that the current value should be left untouched. In the example above, this leaves the blocksizes associated with the reference kernels (set by `bli_cntx_init_fooarch_ref()`) intact for the single real, single complex, and double complex datatypes. _Digression:_ Auxiliary blocksize values for register blocksizes are interpreted as the "packing" register blocksizes. _PACKMR_ and _PACKNR_ serve as "leading dimensions" of the packed micropanels that are passed into the microkernel. Oftentimes, _PACKMR = MR_ and _PACKNR = NR_, and thus the developer does not typically need to set these values manually. (See the [implementation notes for gemm](KernelsHowTo.md#Implementation_Notes_for_gemm) in the BLIS Kernel guide for more details on these topics.) _Digression:_ Auxiliary blocksize values for cache blocksizes are interpreted as the maximum cache blocksizes. The maximum cache blocksizes are a convenient and portable way of smoothing performance of the level-3 operations when computing with a matrix operand that is just slightly larger than a multiple of the preferred cache blocksize in that dimension. In these "edge cases," iterations run with highly sub-optimal blocking. We can address this problem by merging the "edge case" iteration with the second-to-last iteration, such that the cache blocksizes are slightly larger--rather than significantly smaller--than optimal. The maximum cache blocksizes allow the developer to specify the _maximum_ size of this merged iteration; if the edge case causes the merged iteration to exceed this maximum, then the edge case is _not_ merged and instead it is computed upon in separate (final) iteration. _**Committing blocksizes.**_ Finally, we commit the values in `blkszs` to the context by calling the variable argument function `bli_cntx_set_blkszs()`. This function call generally should be considered boilerplate and thus should not changed unless you are altering the matrix multiplication _algorithm_ as specified in the control tree. If this is your goal, please get in contact with BLIS developers via the [blis-devel](http://groups.google.com/group/blis-devel) mailing list for guidance, if you have not done so already. _**Availability of kernels.**_ Note that any kernel made available to the `fooarch` configuration within `config_registry` may be referenced inside `bli_cntx_init_fooarch()`. In this example, we referenced `fooarch` kernels as well as kernels native to another configuration, `bararch`. Thus, the `config_registry` would contain a line such as: ``` fooarch: fooarch/fooarch/bararch ``` Interpreting the line left-to-right: the `fooarch` configuration family contains only itself, `fooarch`, but must be able to refer to kernels from its own kernel set (`fooarch`) as well as kernels belonging to the `bararch` kernel set. The configuration registry is described more completely [in a later section](ConfigurationHowTo.md#configuration-registry). ### bli_family_*.h This file is conditionally `#included` only for the configuration family targeted at configure-time. For example, if you run `./configure haswell`, `bli_family_haswell.h` will be `#included`, and if you run `./configure intel64`, `bli_family_intel64.h` will be `#included`. The header file is `#included` by [frame/include/bli_arch_config.h](https://github.com/flame/blis/blob/master/frame/include/bli_arch_config.h). This header file is oftentimes empty. This is because the parameters specified here usually work fine with their default values, which are defined in [frame/include/bli_kernel_macro_defs.h](https://github.com/flame/blis/blob/master/frame/include/bli_kernel_macro_defs.h). However, there may be some configurations for which a kernel developer will wish to adjust some of these parameters. Furthermore, when creating a configuration family, the parameters set in the corresponding `bli_family_*.h` file must work for **all** sub-configurations in the family. A description of the parameters that may be set in `bli_family_*.h` follows. _**Memory allocation functions.**_ BLIS allows the developer to customize the functions called for memory allocation for three different categories of memory: user, pool, and internal. The functions for user allocation are called any time the creation of a BLIS matrix or vector `obj_t` requires that a matrix buffer be allocated, such as via `bli_obj_create()`. The functions for pool allocation are called only when allocating blocks to the memory pools used to manage packed matrix buffers. The function for internal allocation are called by BLIS when allocating internal data structures, such as control trees. By default, the three pairs of parameters are defined via preprocessor macros to call the implementation of `malloc()` and `free()` provided by `stdlib.h`: ```c #define BLIS_MALLOC_USER malloc #define BLIS_FREE_USER free #define BLIS_MALLOC_POOL malloc #define BLIS_FREE_POOL free #define BLIS_MALLOC_INTL malloc #define BLIS_FREE_INTL free ``` Any substitute for `malloc()` and `free()` defined by customizing these parameters must use the same function prototypes as the original functions. Namely: ```c void* malloc( size_t size ); void free( void* p ); ``` Furthermore, if a header file needs to be included, such as `my_malloc.h`, it should be `#included` within the `bli_family_*.h` file (before `#defining` any of the `BLIS_MALLOC_` and `BLIS_FREE_` macros). _**SIMD register file.**_ BLIS allows you to specify the _maximum_ number of SIMD registers available for use by your kernels, as well as the _maximum_ size (in bytes) of those registers. These values default to: ```c #define BLIS_SIMD_NUM_REGISTERS 32 #define BLIS_SIMD_SIZE 64 ``` These macros are used in computing the maximum amount of temporary storage (typically allocated statically, on the function stack) that will be needed to hold a single micro-tile of any datatype (and for any induced method): ```c #define BLIS_STACK_BUF_MAX_SIZE ( BLIS_SIMD_NUM_REGISTERS * BLIS_SIMD_SIZE * 2 ) ``` These temporary buffers are used when handling edge cases (m % _MR_ != 0 || n % _NR_ != 0) within the level-3 macrokernels, and also in the virtual microkernels of various implementations of induced methods for complex matrix multiplication. It is **very important** that these values be set correctly; otherwise, you may experience undefined behavior as stack data is overwritten at run-time. A kernel developer may set `BLIS_SIMD_NUM_REGISTERS` and `BLIS_SIMD_SIZE`, which will indirectly affect `BLIS_STACK_BUF_MAX_SIZE`, or he may set `BLIS_STACK_BUF_MAX_SIZE` directly. Notice that the default values are already set to work with modern x86_64 systems. _**Memory alignment.**_ BLIS implements memory alignment internally, rather than relying on a function such as `posix_memalign()`, and thus it can provide aligned memory even with functions that adhere to the `malloc()` and `free()` API in the standard C library. ```c #define BLIS_SIMD_ALIGN_SIZE BLIS_SIMD_SIZE #define BLIS_PAGE_SIZE 4096 #define BLIS_STACK_BUF_ALIGN_SIZE BLIS_SIMD_ALIGN_SIZE #define BLIS_HEAP_ADDR_ALIGN_SIZE BLIS_SIMD_ALIGN_SIZE #define BLIS_HEAP_STRIDE_ALIGN_SIZE BLIS_SIMD_ALIGN_SIZE #define BLIS_POOL_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE ``` The value `BLIS_STACK_BUF_ALIGN_SIZE` defines the alignment of stack memory used as temporary internal buffers, such as for output matrices to the microkernel when computing edge cases. (See [implementation notes](KernelsHowTo#implementation-notes-for-gemm) for the `gemm` microkernel for details.) This value defaults to `BLIS_SIMD_ALIGN_SIZE`, which defaults to `BLIS_SIMD_SIZE`. The value `BLIS_HEAP_ADDR_ALIGN_SIZE` defines the alignment used when allocating memory via the `malloc()` function defined by `BLIS_MALLOC_USER`. Setting this value to `BLIS_SIMD_ALIGN_SIZE` may speed up certain level-1v and -1f kernels. The value `BLIS_HEAP_STRIDE_ALIGN_SIZE` defines the alignment used for so-called "leading dimensions" (i.e. column strides for column-stored matrices, and row strides for row-stored matrices) when creating BLIS matrices via the object-based API (e.g. `bli_obj_create()`). While setting `BLIS_HEAP_ADDR_ALIGN_SIZE` guarantees alignment for the first column (or row), creating a matrix with certain dimension values (_m_ and _n_) may cause subsequent columns (or rows) to be misaligned. Setting this value to `BLIS_SIMD_ALIGN_SIZE` is usually desirable. Additional alignment may or may not be beneficial. The value `BLIS_POOL_ADDR_ALIGN_SIZE` defines the alignment used when allocating blocks to the memory pools used to manage internal packing buffers. Any block of memory returned by the memory allocator is guaranteed to be aligned to this value. Aligning these blocks to the virtual memory page size (usually 4096 bytes) is standard practice. ### make_defs.mk The `make_defs.mk` file primarily contains compiler and compiler flag definitions used by `make` when building a BLIS library. The format of the file is mostly self-explanatory. However, we will expound on the contents here, using the `make_defs.mk` file for the `haswell` configuration as an example: ```make # Declare the name of the current configuration and add it to the # running list of configurations included by common.mk. THIS_CONFIG := haswell ifeq ($(CC),) CC := gcc CC_VENDOR := gcc endif CPPROCFLAGS := -D_POSIX_C_SOURCE=200112L CMISCFLAGS := -std=c99 -m64 CPICFLAGS := -fPIC CWARNFLAGS := -Wall -Wno-unused-function -Wfatal-errors ifneq ($(DEBUG_TYPE),off) CDBGFLAGS := -g endif ifeq ($(DEBUG_TYPE),noopt) COPTFLAGS := -O0 else COPTFLAGS := -O3 endif CKOPTFLAGS := $(COPTFLAGS) ifeq ($(CC_VENDOR),gcc) CVECFLAGS := -mavx2 -mfma -mfpmath=sse -march=core-avx2 else ifeq ($(CC_VENDOR),icc) CVECFLAGS := -xCORE-AVX2 else ifeq ($(CC_VENDOR),clang) CVECFLAGS := -mavx2 -mfma -mfpmath=sse -march=core-avx2 else $(error gcc, icc, or clang is required for this configuration.) endif endif endif # Store all of the variables here to new variables containing the # configuration name. $(eval $(call store-make-defs,$(THIS_CONFIG))) ``` _**Configuration name.**_ The first statement reaffirms the name of the configuration. The `THIS_CONFIG` variable is used later to attach the configuration name as a suffix to the remaining variables so that they can co-exist with variables read from other `make_defs.mk` files during multi-configuration builds. Note that if the configuration name defined here does not match the name of the directory in which `make_defs.mk` is stored, `make` will output an error when executing the top-level `Makefile`. _**Compiler definitions.**_ Next, we set the values of `CC` and `CC_VENDOR`. The former is the name (or path) to the actual compiler executable to use during compilation. The latter is the compiler family. Currently, BLIS generally supports three compiler families: `gcc`, `clang`, and `icc`. `CC_VENDOR` is used when conditionally setting various variables based on the type of flags available--flags that might not vary across different versions or installations of the same compiler (e.g. `gcc-4.9` vs `gcc-5.0`, or `gcc` vs `/usr/local/bin/gcc`), but may vary across compiler families (e.g. `gcc` vs. `icc`). If the compiler you wish to use is in your `PATH` environment variable, `CC` and `CC_VENDOR` will usually contain the same value. _**Basic compiler flags.**_ The variables `CPPROCFLAGS` and `CWARNFLAGS` should be assigned to C preprocessor flags and compiler warning flags, respectively, while `CPICFLAGS` should be assigned flags to enable position independent code (shared library) flags. Finally, `CMISCFLAGS` may be assigned any miscellaneous flags that do not neatly fit into any other category, such as language flags and 32-/64-bit flags. These four categories of flags are usually recognized across compiler families. _**Debugging flags.**_ The `CDBGFLAGS` variable should be assigned to contain flags that insert debugging symbols into the object code emitted by the compiler. Typically, this amounts to no more than the `-g` flag, but some compilers or situations may call for different (or additional) flags. This variable is conditionally set only if `$(DEBUG_TYPE)`, which is set the by `configure` script, is not equal to `noopt`. _**Optimization flags.**_ The `COPTFLAGS` variable should be assigned any flags relating to general compiler optimization. Usually this takes the form of `-O2` or `-O3`, but more specific optimization flags may be included as well, such as `-fomit-frame-pointer`. Note that, as with `CDBGFLAGS`, `COPTFLAGS` is conditionally assigned based on the value of `$(DEBUG_TYPE)`. A separate `CKOPTFLAGS` variable tracks optimizations flags used when compiling kernels. For most configurations, `CKOPTFLAGS` is assigned as a copy of `COPTFLAGS`, but if the kernel developer needs different optimization flags to be applied when compiling kernel source code, `CKOPTFLAGS` should be set accordingly. _**Vectorization flags.**_ The second-to-last block sets the `CVECFLAGS`, which should be assigned any flags that must be given to the compiler in order to enable use of a vector instruction set needed or assumed by the kernel source code. Also, if you wish to enable automatic use of certain instruction sets (e.g. `-mfpmath=sse` for many Intel architectures), this is where you should set those flags. These flags often differ among compiler families, especially between `icc` and `gcc`/`clang`. _**Variable storage/renaming.**_ Finally, the last statement commits the variables defined in the file to "storage". That is, they are copied to variable names that contain `THIS_CONFIG` as a suffix. This allows the variables for one configuration to co-exist with variables of another configuration. ## Configuration families A configuration family is represented similarly to that of a sub-configuration: a sub-directory of the `config` directory. Additionally, there are two types of families: singleton families and umbrella families. A _singleton_ family simply refers to a sub-configuration. The `configure` script only targets configuration families. But since every sub-configuration is also a valid configuration family, every sub-configuration is a valid configuration target. An _umbrella_ family is the more interesting type of configuration family. These families are defined as collections of architecturally related sub-configurations. (**Important:** an umbrella family should always be named something different than any of its constituent sub-configurations.) BLIS provides a mechanism to define umbrella families so that users and developers can build a single instance of BLIS that supports multiple configurations, where some heuristic is used at runtime to choose among the configurations. For example, you may wish to deploy a BLIS library on a storage device that is shared among several computers, each of which is based on a different x86_64 microarchitecture. Throughout the remainder of this document, we will sometimes refer to "umbrella families" as simply "families". Similarly, we will refer to "singleton families" and "sub-configurations" interchangeably. To the extent that any ambiguity may remain, context should clarify which type of family is germane to the discussion. Let's inspect the `amd64` configuration family as an example: ``` $ ls config/amd64 bli_family_amd64.h make_defs.mk ``` A configuration family contains a subset of the files contained within a sub-configuration: A `bli_family_*.h` header file and a `make_defs.mk` makefile fragment: * `bli_family_amd64.h`. This header file is `#included` only when the configuration family in question, in this case `amd64`, was the target to `./configure`. The file serves a similar purpose as with sub-configurations--a place to define various parameters, such as those relating to memory allocation and alignment. However, in the context of configuration families, the uniqueness of this file makes a bit more sense. Importantly, the definitions in this file will be affect **all** sub-configurations within the family. Thus, it is useful to think of these as "global" parameters. For example, if custom implementations of `malloc()` and `free()` are specified in the `bli_family_amd64.h` file, these implementations will be used for every sub-configuration member of the `amd64` family. (The configuration registry, described in [the next section](ConfigurationHowTo.md#configuration-registry), specifies each configuration family's membership.) As with sub-configurations, this file may be empty, in which case reasonable defaults are selected by the framework. * `make_defs.mk`. This makefile fragment defines the compiler and compiler flags in a manner identical to that of sub-configurations. However, these configuration flags are used when compiling source code that is not specific to any one particular sub-configuration. (The build system compiles a set of reference kernels and optimized kernels for each sub-configuration, during which it uses flags read from the individual sub-configurations' `make_defs.mk` files. By contrast, the general framework code is compiled once--using the flags read from the family's `make_defs.mk` file--and executed by all sub-configurations.) For a more detailed walkthrough of these files' expected/allowed contents, please see the descriptions provided in the section on [sub-configurations](ConfigurationHowTo.md#sub-configurations): * [bli_family_*.h](ConfigurationHowTo.md#bli_family_h) * [make_defs.h](ConfigurationHowTo.md#make_defsmk) With these two files defined and present, the configuration family is properly constituted and ready to be registered within the configuration registry. ## Configuration registry The configuration registry is the official place for declaring a sub-configuration or configuration family. Unless a configuration (singleton or family) is declared within the registry, `configure` will not accept it as a valid configuration target at configure-time. Before describing the syntax and semantics of the registry, we'll first briefly describe three types of information we wish to encode into the registry: _**Configuration list.**_ First and foremost, the registry needs to enumerate the registered sub-configurations. That is, it needs to list the sub-configurations (or, singleton families) that are available to be targeted by `configure`. The registry also needs to specify configuration family membership--that is, the (umbrella) families to which those sub-configurations belong. _**Kernel list.**_ Next, the registry needs to specify the list of kernel sets that will be needed by each sub-configuration, and by proxy, each configuration family. It's easy to think of different configurations as corresponding to different microarchitectures, and that generally holds true. However, sometimes we use the same configuration for multiple microarchitectures (e.g. `haswell` is used for Intel Haswell, Broadwell, and non-server Skylake variants). It might also be tempting to think of each microarchitecture as having its own set of kernels. However, in practice, we find that some microarchitectures' kernels are identical to those of a previous microarchitectural revision, or to those of another vendor's microarchitecture. Thus, sometimes a sub-configuration will wish to use a kernel set that is "native" to a different configuration. In these cases, there is not a one-to-one mapping of sub-configuration names to kernel set names, and therefore the configuration registry must separately specify the kernel sets needed by any sub-configuration (and by proxy, any configuration family). _**Kernel-to-configuration map.**_ Lastly, and most subtly, for each kernel set in the kernel list, the registry needs to specify the sub-configuration(s) that depend on that particular kernel set. Notice that the kernel list can be obtained by mapping sub-configurations to kernel sets they require. By contrast, the kernel-to-configuration map tracks the reverse dependency and helps us answer: for any given kernel set, which sub-configurations caused the kernel set to be pulled into the build? This mapping is needed when determining which sub-configuration's compiler flags (as defined in its `make_defs.mk` file) to use when compiling that kernel set. The most obvious solution to this problem would have been to associate compiler flags with the individual kernel sets. However, given the desire to share kernel sets among sub-configurations, we needed the flexibility of applying different compiler flags to any given kernel set based on the sub-configuration that would be utilizing that kernel set. In the case that multiple sub-configurations pull in the same kernel set, a set of heuristics is used to choose between the sub-configurations so that a single set of compiler flags can be chosen for use when compiling that kernel set. ### Walkthrough The configuration registry exists as a human-readable file, `config_registry`, located at the top-level of the BLIS distribution. What follows is an example of a `config_registry` file that is based on actual contents in a BLIS commit recent as of this writing. Note that lines containing only whitespace are ignored. Furthermore, any characters that appear after (and including) a `#` are treated as comments and also ignored. ``` # # config_registry # # Processor families. x86_64: intel64 amd64 intel64: haswell sandybridge penryn generic amd64: zen excavator steamroller piledriver bulldozer generic arm64: cortexa57 generic arm32: cortexa15 cortexa9 generic # Intel architectures. haswell: haswell sandybridge: sandybridge penryn: penryn knl: knl # AMD architectures. zen: zen/haswell/sandybridge excavator: excavator/piledriver steamroller: steamroller/piledriver piledriver: piledriver bulldozer: bulldozer # ARM architectures. cortexa57: cortexa57/armv8a cortexa15: cortexa15/armv7a cortexa9: cortexa9/armv7a # Generic architectures. generic: generic ``` Generally speaking, the registry can be thought of as defining a very simple grammar. (However, as you'll soon see, there are nuances that are un-grammar-like.) The registry can contain two kinds of lines. The first type defines a singleton configuration family. For example, the line ``` haswell: haswell ``` defines a configuration family `haswell` (the left side of the `:`) as containing only itself: the sub-configuration by the same name, `haswell` (the right side of the `:`). When singleton families are defined in this way, it implicitly pulls in the kernel set by the same name as the sub-configuration (in this case, `haswell`). More specifically, the `haswell` sub-configuration depends on the kernels residing in the `kernels/haswell` sub-directory. The second type of line defines an umbrella configuration family. For example, the line ``` intel64: haswell sandybridge penryn generic ``` defines the configuration family `intel64` as containing the `haswell`, `sandybridge`, `penryn`, and `generic` sub-configurations as members (technically speaking, it is more accurate to think of the family as containing singleton families rather than their corresponding sub-configurations). Thus, if the user runs `./configure intel64`, the library will be built to support all sub-configurations defined within the `intel64` family. **Note:** `generic` is a somewhat special sub-configuration that uses only reference kernels and reference blocksizes. It is included in every umbrella family so that when those families are instantiated into BLIS libraries and linked to an application, the application will be able to run even if none of the other sub-configurations (`haswell`, `sandybridge`, `penryn`) are chosen at runtime by the hardware detection heuristic. Some sub-configurations, for various reasons, do not rely on their own set of kernels and instead use the kernel set that is native to another sub-configuration. For example, the `excavator` and `steamroller` configurations each correspond to hardware that is very similar to the hardware targeted by the `piledriver` configuration. In fact, the former two configurations rely exclusively on kernels written for the latter configuration. (Presently, there are no `excavator` or `steamroller` kernel sets in BLIS.) We denote this kernel dependency with a `/` character: ``` excavator: excavator/piledriver steamroller: steamroller/piledriver ``` Here, the first line (reading from left-to-right) defines the `excavator` singleton family as containing only itself, the `excavator` sub-configuration, and also specifies that this sub-configuration must have access to the `piledriver` kernel set. The second line defines the `steamroller` singleton family in a similar manner. **Note:** Specifying non-native kernel sets via the `/` character is only allowed when defining singleton configuration families. They may NOT appear in the definitions of umbrella families! When an umbrella family includes a singleton family that is defined to require non-native kernels, this will be accounted for during the parsing of the `config_registry` file. Sometimes, a sub-configuration may need access to more than one kernel set. If additional kernel sets are needed, they should be listed with additional `/` characters: ``` zen: zen/haswell/sandybridge ``` The line above defines the `zen` singleton family as containing only itself, the `zen` sub-configuration, and also specifies that this sub-configuration must have access to the `haswell` kernel set as well as the `sandybridge` kernel set. What if there exists a `zen` kernel set as well, which the `zen` sub-configuration must access in addition to those of `haswell` and `sanydbridge`? In this case, it would need to be annotated explicitly as: ``` zen: zen/zen/haswell/sandybridge ``` This line (which is hypothetical and does not appear in the `config_registry` example above) defines the `zen` singleton family in terms of only the `zen` sub-configuration, and provides that sub-configuration access to `zen`, `haswell`, and `sandybridge` kernel sets. (Also: the kernel sets may appear in any order.) Notice that while kernel sets usually correspond to a sub-configuration, they do not always. For example, while the `armv7a` and `armv8a` kernel sets are referenced in the example `config_registry` file, there do not exist any registered sub-configurations by those names. However, the kernel directories exist and the kernel sets appear in the definitions of a few `cortex` singleton families. One last thing to point out: take a look at the `x86_64` configuration family: ``` x86_64: intel64 amd64 ``` Unlike most of the registered families, which are defined in terms of sub-configurations, `x86_64` is defined in terms of *other* families--specifically, `intel64` and `amd64`: ``` intel64: haswell sandybridge penryn generic amd64: zen excavator steamroller piledriver bulldozer generic ``` This multi-level style of specifying sub-configurations became available starting in 290dd4a. The behavior of `configure` in this situation is as you would expect; that is, including `intel64` and `amd64` in the definition of `x86_64` is equivalent to: ``` x86_64: haswell sandybridge penryn zen excavator steamroller piledriver bulldozer generic ``` Any duplicates that may result are removed automatically. ### Printing the configuration registry lists The configuration list, kernel list, and kernel-to-configuration map are constructed internally by `configure`, but these structures can be inspected by running `configure` with the `-c` (which is the short form of `--show-config-lists`) option. This can be useful as a sanity check to make sure `configure` is properly parsing and interpreting the `config_registry` file. The first thing printed is the configuration list: ``` $ ./configure -c amd64 configure: reading configuration registry...done. ... configure: configuration list: configure: amd64: zen excavator steamroller piledriver bulldozer generic configure: arm32: cortexa15 cortexa9 generic configure: arm64: cortexa57 generic configure: bulldozer: bulldozer configure: cortexa15: cortexa15 configure: cortexa57: cortexa57 configure: cortexa9: cortexa9 configure: excavator: excavator configure: generic: generic configure: haswell: haswell configure: intel64: haswell sandybridge penryn generic configure: knl: knl configure: penryn: penryn configure: piledriver: piledriver configure: sandybridge: sandybridge configure: skx: skx configure: steamroller: steamroller configure: x86_64: haswell sandybridge penryn zen excavator steamroller piledriver bulldozer generic ``` This simply lists the sub-configurations associated with each defined configuration family (singleton or umbrella). Note that they are sorted alphabetically. Next, the kernel list (actually, all kernel lists) is printed: ``` configure: kernel list: configure: amd64: zen piledriver bulldozer generic configure: arm32: armv7a generic configure: arm64: armv8a generic configure: bulldozer: bulldozer configure: cortexa15: armv7a configure: cortexa57: armv8a configure: cortexa9: armv7a configure: excavator: piledriver configure: generic: generic configure: haswell: haswell zen configure: intel64: haswell zen sandybridge penryn generic configure: knl: knl configure: penryn: penryn configure: piledriver: piledriver configure: sandybridge: sandybridge configure: skx: skx configure: steamroller: piledriver configure: x86_64: haswell sandybridge penryn zen piledriver bulldozer generic configure: zen: zen ``` This shows the kernel sets that are pulled in by each configuration family. For singleton families, this is specified in a straightforward manner via the `/` character described [in the previous section](ConfigurationHowTo.md#Walkthrough). For umbrella families, this is determined indirectly by looking up the definitions of the singleton families that are members of the umbrella family. Next, the full kernel-to-configuration map is printed: ``` configure: kernel-to-config map for 'amd64': configure: bulldozer: bulldozer configure: generic: generic configure: piledriver: excavator steamroller piledriver configure: zen: zen ``` For each of the kernel sets required of the selected configuration family above, the kernel-to-configuration map shows the sub-configurations that required that kernel set. Notice that sometimes a single kernel set may be pulled in by more than one sub-configuration, as with the `piledriver` kernel set. Lastly, we print a version of the kernel-to-configuration map in which we've used a set of heuristics to select a single sub-configuration for each kernel set in the map: ``` configure: kernel-to-config map for 'amd64' (chosen pairs): configure: bulldozer:bulldozer configure: generic:generic configure: piledriver:piledriver configure: zen:zen ``` This variant of the kernel-to-config map is formatted as a series of "sub-configuration:kernel-set" pairs. These pairs are used during the processing of the top-level `Makefile` to determine which sub-configuration's compiler flags should be used when compiling the source code within each kernel set. ## Adding a new kernel set Adding support for a new set of kernels in BLIS is easy and can be done via the following steps. 1. _**Create and populate the kernel set directory.**_ First, we must create a directory in `kernels` that corresponds to the new kernel set. Suppose we wanted to add kernels for Intel's Knight's Landing microarchitecture. In BLIS, this corresponds to the `knl` configuration, and so we should name the directory `knl`. This is because we want the `knl` kernel set to be pulled by default into builds that include the `knl` sub-configuration. ``` $ mkdir kernels/knl $ ls kernels armv7a bgq generic knc old piledriver sandybridge armv8a bulldozer haswell knl penryn power7 ``` Next, we must write the `knl` kernels and locate them inside `kernels/knl`. (For more information on writing BLIS kernels, please see the [Kernels Guide](KernelsHowTo.md).) We recommend separating level-1v, level-1f, and level-3 kernels into separate `1`, `1f`, and `3` sub-directories, respectively. The kernel files and functions therein do not need to follow any particular naming convention, though we strongly recommend using the conventions already used by other kernel sets. Take a look at other kernel files, such as those for `haswell`, [for examples](https://github.com/flame/blis/tree/master/kernels). Finally, for the `knl` kernel set, you should insert a file named `bli_kernels_knl.h` into `kernels/knl` that prototypes all of your new kernel set's kernel functions. You are welcome to write your own prototypes, but to make the prototyping of kernels easier we recommend using the prototype-generating macros for level-1v, level-1f, level-1m, and level-3 functions defined in [frame/1/bli_l1v_ker_prot.h](https://github.com/flame/blis/blob/master/frame/1/bli_l1v_ker_prot.h), [frame/1f/bli_l1f_ker_prot.h](https://github.com/flame/blis/blob/master/frame/1f/bli_l1f_ker_prot.h), [frame/1m/bli_l1m_ker_prot.h](https://github.com/flame/blis/blob/master/frame/1m/bli_l1m_ker_prot.h), and [frame/3/bli_l3_ukr_prot.h](https://github.com/flame/blis/blob/master/frame/3/bli_l3_ukr_prot.h), respectively. The following example utilizes how a select subset of these macros can be used to generate kernel function prototypes. ```c GEMM_UKR_PROT( double, d, gemm_knl_asm_24x8 ) PACKM_KER_PROT( double, d, packm_knl_asm_24xk ) PACKM_KER_PROT( double, d, packm_knl_asm_8xk ) AXPYF_KER_PROT( dcomplex, z, axpyf_knl_asm ) DOTXF_KER_PROT( dcomplex, z, dotxf_knl_asm ) AXPYV_KER_PROT( float, s, axpyv_knl_asm ) DOTXV_KER_PROT( float, s, dotxv_knl_asm ) ``` The first line generates a function prototype for a double-precision real `gemm` microkernel named `bli_dgemm_knl_asm_24x8()`. Notice how the macro takes three arguments: the C language datatype, the single character corresponding to the datatype, and the base name of the function, which includes the operation (`gemm`), the kernel set name (`knl`), and a substring specifying its implementation (`asm_24x8`). The second and third lines generate prototypes for double-precision real `packm` kernels to go along with the `gemm` microkernel above. The fourth and fifth lines generate prototypes for double-precision complex instances of the level-1f kernels `axpyf` and `dotxf`. The last two lines generate prototypes for single-precision real instances of the level-1v kernels `axpyv` and `dotxv`. 2. _**Add support within the framework source code.**_ We also need to make a minor update to the framework to support the new kernels--specifically, to pull in the kernels' function prototypes. **`frame/include/bli_arch_config.h`**. When adding support for the `knl` kernel set to the framework, we must modify this file to `#include` the `bli_kernels_knl.h` header file: ```c #ifdef BLIS_KERNELS_KNL #include "bli_kernels_knl.h" #endif ``` The `BLIS_KERNELS_KNL` macro, which guards the `#include` directive, is automatically defined by the build system when the `knl` kernel set is required by _any_ sub-configuration. ## Adding a new configuration family Adding support for a new umbrella configuration family in BLIS is fairly straightforward and can be done via the following steps. The hypothetical examples used in these steps assume you are trying to create a new configuration family `intelavx` that supports only Intel microarchitectures that support the Intel AVX instruction set. 1. _**Create and populate the family directory.**_ First, we must create a directory in `config` that corresponds to the new family. Since we are adding a new family named `intelavx`, we would name our directory `intelavx`. ``` $ mkdir config/intelavx $ ls config amd64 cortexa15 excavator intel64 knl piledriver steamroller bgq cortexa57 generic intelavx old power7 template bulldozer cortexa9 haswell knc penryn sandybridge zen ``` We also need to create `bli_family_intelavx.h` and `make_defs.mk` files inside our new sub-directory. Since they will be very similar to those of the `intel64` family's files, we can copy those files over and then modify them accordingly: ``` $ cp config/intel64/bli_family_intel64.h config/intelavx/bli_family_intelavx.h $ cp config/intel64/make_defs.mk config/intelavx/ ``` First, we update the configuration name inside of `make_defs.mk`: ``` THIS_CONFIG := intelavx ``` and while we're editing the file, we can make any other changes to compiler flags we wish (if any). Similarly, the `bli_family_intelavx.h` header file should be updated, though in our case it does not need any changes; the original file is empty and thus the copied file can remain empty as well. Note that other configuration families may have different needs. Remember that all of the parameters set in this file, either explicitly or implicitly (via their defaults), must work for **all** sub-configurations in the family. When creating or modifying a family, it's worth reviewing the parameters' defaults, which are set in [frame/include/bli_kernel_macro_defs.h](https://github.com/flame/blis/blob/master/frame/include/bli_kernel_macro_defs.h) and convincing yourself that each parameter default (or overriding definition in `bli_family_*.h`) will work for each sub-configuration. 2. _**Add support within the framework source code.**_ Next, we need to update the BLIS framework source code so that the new configuration family is recognized and supported. Configuration families require updates to two files. * **`frame/include/bli_arch_config.h`**. This file must be updated to `#include` the `bli_family_intelavx.h` header file. Notice that the preprocessor directive should be guarded as follows: ```c #ifdef BLIS_FAMILY_INTELAVX #include "bli_family_intelavx.h" #endif ``` The `BLIS_FAMILY_INTELAVX` will automatically be defined by the build system whenever the family was targeted by `configure` is `intelavx`. (In general, if the user runs `./configure foobar`, the C preprocessor macro `BLIS_FAMILY_FOOBAR` will be defined.) * **`frame/base/bli_arch.c`**. This file must be updated so that `bli_arch_query_id()` returns the correct `arch_t` microarchitecture ID value to the caller. This function is called when the framework is trying to choose which sub-configuration to use at runtime. For x86_64 architectures, this is supported via the `CPUID` instruction, as implemented via `bli_cpuid_query_id()`. Thus, you can simply mimic what is done for the `intel64` family by inserting lines such as: ```c #ifdef BLIS_FAMILY_INTELAVX id = bli_cpuid_query_id(); #endif ``` This results in `bli_cpuid_query_id()` being called, which will return the `arch_t` ID value corresponding to the hardware detected by `CPUID`. (If your configuration family does not consist of x86_64 architectures, then you'll need some other heuristic to determine how to choose the correct sub-configuration at runtime. When in doubt, please [open an issue](https://github.com/flame/blis/issues) to begin a dialogue with developers.) 3. _**Update the configuration registry.**_ The last step is to update the `config_registry` file so that it defines the new family. Since we want the family to include only Intel sub-configurations that support AVX, we would add the following line: ``` intelavx: haswell sandybridge ``` Notice that we left out the Core2-based `penryn` sub-configuration since it targets hardware that only supports SSE vector instructions. ## Adding a new sub-configuration Adding support for a new-subconfiguration to BLIS is similar to adding support for a family, though there are a few additional steps. Throughout this section, we will use the `knl` (Knight's Landing) configuration as an example to illustrate the typical changes necessary to various files in BLIS. 1. _**Create and populate the family directory.**_ First, we must create a directory in `config` that corresponds to the new sub-configuration. ``` $ mkdir config/knl $ ls config amd64 cortexa15 excavator intel64 old power7 template bgq cortexa57 generic knc penryn sandybridge zen bulldozer cortexa9 haswell knl piledriver steamroller ``` We also need to create `bli_cntx_init_knl.c`, `bli_family_intelavx.h`, and `make_defs.mk` files inside our new sub-directory. Since they will be very similar to those of the `haswell` sub-configuration's files, we can copy those files over and then modify them accordingly: ``` $ cp config/haswell/bli_cntx_init_haswell.c config/knl/bli_cntx_init_knl.c $ cp config/haswell/bli_family_haswell.h config/knl/bli_family_knl.h $ cp config/haswell/make_defs.mk config/knl/ ``` First, we update the configuration name inside of `make_defs.mk`: ``` THIS_CONFIG := knl ``` and while we're editing the file, we can make any other changes to compiler flags we wish (if any). Similarly, the `bli_family_knl.h` header file should be updated as needed. Since the number of vector registers and the vector register size on `knl` differ from the defaults, we must explicitly set them. (The role of these parameters was explained in a [previous section](ConfigurationHowTo.md#bli_family_h).) Furthermore, provided that a macro `BLIS_NO_HBWMALLOC` is not set, we use a different implementation of `malloc()` and `free()` and `#include` that implementation's header file. ```c #define BLIS_SIMD_NUM_REGISTERS 32 #define BLIS_SIMD_SIZE 64 #ifdef BLIS_NO_HBWMALLOC #include #define BLIS_MALLOC_POOL malloc #define BLIS_FREE_POOL free #else #include #define BLIS_MALLOC_POOL hbw_malloc #define BLIS_FREE_POOL hbw_free #endif ``` Finally, we update `bli_cntx_init_knl.c` to initialize the context with the appropriate kernel function pointers and blocksize values. The functions used to perform this initialization are explained in [an earlier section](ConfigurationHowTo.md#bli_cntx_init_c). 2. _**Add support within the framework source code.**_ Next, we need to update the BLIS framework source code so that the new sub-configuration is recognized and supported. Sub-configurations require updates to four files--six if hardware detection logic is added. * **`frame/include/bli_type_defs.h`**. First, we need to define an ID to associate with the microarchitecture for which we are adding support. All microarchitecture type IDs are defined in [bli_type_defs.h](https://github.com/flame/blis/blob/master/frame/include/bli_type_defs.h) as an enumerated type that we `typedef` to `arch_t`. To support `knl`, we add a new enumerated type value `BLIS_ARCH_KNL`: ```c typedef enum { BLIS_ARCH_KNL, BLIS_ARCH_KNC, BLIS_ARCH_HASWELL, BLIS_ARCH_SANDYBRIDGE, BLIS_ARCH_PENRYN, BLIS_ARCH_ZEN, BLIS_ARCH_EXCAVATOR, BLIS_ARCH_STEAMROLLER, BLIS_ARCH_PILEDRIVER, BLIS_ARCH_BULLDOZER, BLIS_ARCH_CORTEXA57, BLIS_ARCH_CORTEXA15, BLIS_ARCH_CORTEXA9, BLIS_ARCH_POWER7, BLIS_ARCH_BGQ, BLIS_ARCH_GENERIC } arch_t; ``` Additionally, you'll need to update the definition of `BLIS_NUM_ARCHS` to reflect the new total number of enumerated `arch_t` values: ```c #define BLIS_NUM_ARCHS 16 ``` * **`frame/base/bli_gks.c`**. We must also update the global kernel structure, or gks, to register the new sub-configuration during library initialization. Sub-configuration registration occurs in `bli_gks_init()`. For `knl`, updating this function amounts to inserting the following lines ```c #ifdef BLIS_CONFIG_KNL bli_gks_register_cntx( BLIS_ARCH_KNL, bli_cntx_init_knl, bli_cntx_init_knl_ref, bli_cntx_init_knl_ind ); #endif ``` This function submits pointers to various context initialization functions to the global kernel structure, which are then stored and called at the appropriate time. The functions **must** be named strictly according to the format shown in the example above, with `knl` replaced with the sub-configuration name. Also, note the call to `bli_gks_register_cntx` is guarded by `BLIS_CONFIG_KNL`. This macro is automatically `#defined` by the build system if and when the `knl` sub-configuration is enabled at configure-time, either directly as a singleton family or indirectly via an umbrella family. * **`frame/include/bli_arch_config.h`**. This file must be updated in two places. First, we must modify it to generate prototypes for the `bli_cntx_init_*()` functions, including the developer-provided function `bli_cntx_init_knl()` (defined in `config/knl/bli_cntx_init_knl.c`), by inserting: ```c #ifdef BLIS_CONFIG_KNL CNTX_INIT_PROTS( knl ) #endif ``` Here, the `CNTX_INIT_PROTS` macro generates the appropriate prototypes based on the name of the sub-configuration. Next, we must `#include` the `bli_family_knl.h` header file, just as we would if we were adding support for an umbrella family: ```c #ifdef BLIS_FAMILY_KNL #include "bli_family_knl.h" #endif ``` As before with umbrella families, the `BLIS_FAMILY_KNL` macro is automatically defined by the build system for whatever family was targeted by `configure`. (That is, if the user runs `./configure foobar`, the C preprocessor macro `BLIS_FAMILY_FOOBAR` will be defined.) * **`frame/base/bli_arch.c`**. This file must be updated so that `bli_arch_query_id()` returns the correct `arch_t` architecture ID value to the caller. `bli_arch_query_id()` is called when the framework is trying to choose which sub-configuration to use at runtime. When adding support for a sub-configuration as a singleton family, this amounts to adding a block of code such as: ```c #ifdef BLIS_FAMILY_KNL id = BLIS_ARCH_KNL; #endif ``` The `BLIS_FAMILY_KNL` macro is automatically `#defined` by the build system if the `knl` sub-configuration was targeted directly (as a singleton family) at configure-time. Other ID values are returned only if their respective family macros are defined. (Recall that only one family is ever enabled at time.) If, however, the `knl` sub-configuration was enabled indirectly via an umbrella family, `bli_arch_query_id()` will return the `arch_t` ID value via the lines similar to the following: ```c #ifdef BLIS_FAMILY_INTEL64 id = bli_cpuid_query_id(); #endif #ifdef BLIS_FAMILY_AMD64 id = bli_cpuid_query_id(); #endif ``` Supporting runtime detection of `knl` microarchitectures requires adding `knl` support to `bli_cpuid_query_id()`, which is addressed in the next step (`bli_cpuid.c`). Before we finish editing the `bli_arch.c` file, we need to add a string label to the static array `config_name`: ```c static char* config_name[ BLIS_NUM_ARCHS ] = { "knl", "knc", "haswell", "sandybridge", "penryn", "zen", "excavator", "steamroller", "piledriver", "bulldozer", "cortexa57", "cortexa15", "cortexa9", "power7", "bgq", "generic" }; ``` This array is used by `bli_arch_string()` when mapping `arch_t` values to the strings associated with that architecture ID. Because the `arch_t` value is used as the index of each string, **the relative order of the strings in this array is important**. Be sure to insert the new string (in our case, `"knl"`) at the **same relative location** as the `arch_t` value inserted in `bli_type_defs.h`. This will ensure that each `arch_t` value will map to its corresponding string in the `config_name` array. * **`frame/base/bli_cpuid.c`**. To support the aforementioned runtime microarchitecture detection, the function `bli_cpuid_query_id()`, defined in [bli_cpuid.c](https://github.com/flame/blis/blob/master/frame/base/bli_cpuid.c), will need to be updated. Specifically, we need to insert logic that will detect the presence of the new hardware based on the results of the `CPUID` instruction (assuming the new microarchitecture belongs to the x86_64 architecture family). For example, when support for `knl` was added, this entailed adding the following code block to `bli_cpuid_query_id()`: ```c #ifdef BLIS_CONFIG_KNL if ( bli_cpuid_is_knl( family, model, features ) ) return BLIS_ARCH_KNL; #endif ``` Additionally, we had to define the function `bli_cpuid_is_knl()`, which checks for various processor features known to be present on `knl` systems and returns a boolean `TRUE` if all relevant feature checks are satisfied by the hardware. Note that the order in which we check for the sub-configurations is important. We must check for microarchitectural matches from most recent to most dated. This prevents an older sub-configuration from being selected on newer hardware when a newer sub-configuration would have also matched. * **`frame/base/bli_cpuid.h`**. After defining the function `bli_cpuid_is_knl()`, we must also update [bli_cpuid.h](https://github.com/flame/blis/blob/master/frame/base/bli_cpuid.h) to contain a prototype for the function. 3. _**Update the configuration registry.**_ Lastly, we update the `config_registry` file so that it defines the new sub-configuration. For example, if we want to define a sub-configuration called `knl` that used only `knl` kernels, we would add the following line: ``` knl: knl ``` If, when defining `bli_cntx_init_knl()`, we referenced kernels from a non-native kernel set--say, those of `haswell`--in addition to `knl`-specific kernels, we would need to explicitly pull in both `knl` and `haswell` kernel sets: ``` knl: knl/knl/haswell ``` ## Further Development Topics ### Querying the current configuration If you are ever unsure which configuration is "active", or the configuration parameters that were specified (or implied by default) at configure-time, simply run: ``` $ make showconfig configuration family: intel64 sub-configurations: haswell sandybridge penryn requisite kernels: haswell sandybridge penryn kernel-to-config map: haswell:haswell penryn:penryn sandybridge:sandybridge ----------------------- BLIS version string: 0.2.2-73 install prefix: /home/field/blis debugging status: off multithreading status: no enable BLAS API? yes enable CBLAS API? no build static library? yes build shared library? no ``` This will tell you the current configuration name, the [configuration registry lists](ConfigurationHowTo.md#printing-the-configuration-registry-lists), as well as other information stored by `configure` in the `config.mk` file. ### Header dependencies Due to the way the BLIS framework handles header files, **any** change to **any** header file will result in the entire library being rebuilt. This policy is in place mostly out of an abundance of caution. If two or more files use definitions in a header that is modified, and one or more of those files somehow does not get recompiled to reflect the updated definitions, you could end up sinking hours of time trying to track down a bug that didn't ever need to be an issue to begin with. Thus, to prevent developers (including the framework developer(s)) from shooting themselves in the foot with this problem, the BLIS build system recompiles **all** object files if any header file is touched. We apologize for the inconvenience this may cause. ### Still have questions? If you have further questions about BLIS configurations, please do not hesitate to contact the BLIS developer community. To do so, simply join and post to the [blis-devel](http://groups.google.com/group/blis-devel) mailing list. *** blis-0.6.1/docs/FAQ.md000066400000000000000000000555611360743507500143560ustar00rootroot00000000000000## Introduction Here we attempt to provide some frequently-asked questions about the BLIS framework project, as well as those we think a new user or developer might ask. If you do not see the answer to your question here, please join and post your question to one of the [BLIS mailing lists](https://github.com/flame/blis#discussion). ## Contents * [Why did you create BLIS?](FAQ.md#why-did-you-create-blis) * [Why should I use BLIS instead of GotoBLAS / OpenBLAS / ATLAS / MKL / ESSL / ACML / Accelerate?](FAQ.md#why-should-i-use-blis-instead-of-gotoblas--openblas--atlas--mkl--essl--acml--accelerate) * [How is BLIS related to FLAME / libflame?](FAQ.md#how-is-blis-related-to-flame--libflame) * [Does BLIS automatically detect my hardware?](FAQ.md#does-blis-automatically-detect-my-hardware) * [I understand that BLIS is mostly a tool for developers?](FAQ.md#i-understand-that-blis-is-mostly-a-tool-for-developers) * [How do I link against BLIS?](FAQ.md#how-do-i-link-against-blis) * [Must I use git? Can I download a tarball?](FAQ.md#must-i-use-git-can-i-download-a-tarball) * [What is a microkernel?](FAQ.md#what-is-a-microkernel) * [What is a macrokernel?](FAQ.md#what-is-a-macrokernel) * [What is a context?](FAQ.md#what-is-a-context) * [I am used to thinking in terms of column-major/row-major storage and leading dimensions. What is a "row stride" / "column stride"?](FAQ.md#im-used-to-thinking-in-terms-of-column-majorrow-major-storage-and-leading-dimensions-what-is-a-row-stride--column-stride) * [What does it mean when a matrix with general stride is column-tilted or row-tilted?](FAQ.md#what-does-it-mean-when-a-matrix-with-general-stride-is-column-tilted-or-row-tilted) * [I am not really interested in all of these newfangled features in BLIS. Can I just use BLIS as a BLAS library?](FAQ.md#im-not-really-interested-in-all-of-these-newfangled-features-in-blis-can-i-just-use-blis-as-a-blas-library) * [What about CBLAS?](FAQ.md#what-about-cblas) * [Can I call the native BLIS API from Fortran-77/90/95/2000/C++/Python?](FAQ.md#can-i-call-the-native-blis-api-from-fortran-7790952000cpython) * [Do I need to call initialization/finalization functions before being able to use BLIS from my application?](FAQ.md#do-i-need-to-call-initializationfinalization-functions-before-being-able-to-use-blis-from-my-application) * [Does BLIS support multithreading?](FAQ.md#does-blis-support-multithreading) * [Does BLIS support NUMA environments?](FAQ.md#does-blis-support-numa-environments) * [Does BLIS work with GPUs?](FAQ.md#does-blis-work-with-gpus) * [Does BLIS work on (some architecture)?](FAQ.md#does-blis-work-on-some-architecture) * [What about distributed-memory parallelism?](FAQ.md#what-about-distributed-memory-parallelism) * [Can I build BLIS on Windows / Mac OS X?](FAQ.md#can-i-build-blis-on-windows--mac-os-x) * [Can I build BLIS as a shared library?](FAQ.md#can-i-build-blis-as-a-shared-library) * [Can I use the mixed domain / mixed precision support in BLIS?](FAQ.md#can-i-use-the-mixed-domain--mixed-precision-support-in-blis) * [Who is involved in the project?](FAQ.md#who-is-involved-in-the-project) * [Who funded the development of BLIS?](FAQ.md#who-funded-the-development-of-blis) * [I found a bug. How do I report it?](FAQ.md#i-found-a-bug-how-do-i-report-it) * [How do I request a new feature?](FAQ.md#how-do-i-request-a-new-feature) * [What is the difference between this version of BLIS and the one that AMD maintains?](FAQ.md#what-is-the-difference-between-this-version-of-blis-and-the-one-that-amd-maintains) * [Who do I contact if I have a question about the AMD version of BLIS?](FAQ.md#who-do-i-contact-if-i-have-a-question-about-the-amd-version-of-blis) * [Where did you get the photo for the BLIS logo / mascot?](FAQ.md#where-did-you-get-the-photo-for-the-blis-logo--mascot) ### Why did you create BLIS? Initially, BLIS was conceived as simply "BLAS with a more flexible interface". The original BLIS was written as a wrapper layer around BLAS that allowed generalized matrix storage (i.e., separate row and column strides). We also took the opportunity to implement some complex domain features that were missing from the BLAS (mostly related to conjugating input operands). This "proto-BLIS" was deployed in [libflame](http://shpc.ices.utexas.edu/libFLAME.html) to facilitate cleaner implementations of some LAPACK-level operations. Over time, we wanted more than just a more flexible interface; we wanted an entire framework from which we could build operations in the BLAS as well as those not present within the BLAS. After this new BLIS framework was created, it turned out that the interface improvements were much less interesting (albeit still of consequence) than some of the framework's other features, and the fact that it allowed developers to rapidly instantiate new BLAS libraries by optimizing only a small amount of code. ### Why should I use BLIS instead of GotoBLAS / OpenBLAS / ATLAS / MKL / ESSL / ACML / Accelerate? BLIS has numerous advantages to existing BLAS implementations. Many of these advantages are summarized on the [BLIS homepage](https://github.com/flame/blis#key-features). But here are a few reasons one might choose BLIS over some other implementation of BLAS: * BLIS facilitates high performance while remaining very portable. BLIS isolates performance-sensitive code to a microkernel which contains only one loop and which, when optimized, accelerates virtually all level-3 operations. Thus, BLIS serves as a powerful tool for quickly instantiating BLAS on new or experimental hardware architectures, as well as a flexible "laboratory" in which to conduct research and experiments. * BLIS provides robust multithreading support, allowing symmetric multicore/many-core parallelism via either OpenMP or POSIX threads. It also computes proper load balance for structured matrix subpartitions, regardless of the location of the diagonal, or whether the subpartition is lower- or upper-stored. * BLIS supports a superset of BLAS functionality, providing operations omitted from the BLAS as well as some complex domain support that is missing in BLAS operations. BLIS is especially useful to researchers who need to develop and prototype new BLAS-like operations that do not exist in the BLAS. * BLIS is backwards compatible with BLAS. BLIS contains a BLAS compatibility layer that allows an application to treat BLIS as if it were a traditional BLAS library. * BLIS supports generalized matrix storage, which can be used to express column-major, row-major, and general stride storage. * BLIS supports mixed-datatype computation for general matrix multiplication `gemm`, and does so while holding the impact on performance to a relative minimum. * BLIS is free software, available under a [new/modified/3-clause BSD license](http://opensource.org/licenses/BSD-3-Clause). ### How is BLIS related to FLAME / `libflame`? As explained [above](FAQ.md#why-did-you-create-blis?), BLIS was initially a layer within `libflame` that allowed more convenient interfacing to the BLAS. So in some ways, BLIS is a spin-off project. Prior to developing BLIS, [its author](http://www.cs.utexas.edu/users/field/) worked as the primary maintainer of `libflame`. If you look closely, you can also see that the design of BLIS was influenced by some of the more useful and innovative aspects of `libflame`, such as internal object abstractions and control trees. Also, various members of the [SHPC research group](http://shpc.ices.utexas.edu/people.html) and its [collaborators](http://shpc.ices.utexas.edu/collaborators.html) routinely provide insight, feedback, and also contribute code (especially kernels) to the BLIS project. ### Does BLIS automatically detect my hardware? On certain architectures (most notably x86_64), yes. In order to use auto-detection, you must specify `auto` as your configuration when running `configure` (Please see the BLIS [Build System](BuildSystem.md) guide for more info.) A runtime detection option is also available. (Please see the [Configuration Guide](ConfigurationHowTo.md) for a comprehensive walkthrough.) If automatic hardware detection is requested at configure-time and the build process does not recognize your architecture, the `generic` configuration is selected. ### I understand that BLIS is mostly a tool for developers? Yes. In order to achieve high performance, BLIS requires that hand-coded kernels and microkernels be written and referenced in a valid [BLIS configuration](ConfigurationHowTo.md). These components are usually written by developers and then included within BLIS for use by others. The good news, however, is that end-users can use BLIS too. Once the aforementioned kernels are integrated into BLIS, they can be used without any developer-level knowledge, and many kernels have already been added! Usually, `./configure auto; make; make install` is sufficient for the typical users with typical hardware. ### How do I link against BLIS? Linking against BLIS is easy! Most people can link to it as if it were a generic BLAS library. Please see the [Linking against BLIS](BuildSystem.md#linking-against-blis) section of the [Build System](BuildSystem.md) guide. ### Must I use git? Can I download a tarball? We **strongly encourage** you to obtain the BLIS source code by cloning a `git` repository (via the [git clone](BuildSystem.md#obtaining-blis) command). The reason for this is that it will allow you to easily update your local copy of BLIS by executing `git pull`. Tarballs and zip files may be obtained from the [releases](https://github.com/flame/blis/releases) page. ### What is a microkernel? The microkernel (usually short for "`gemm` microkernel") is the basic unit of level-3 (matrix-matrix) computation within BLIS. It consists of one loop, where each iteration performs a very small outer product to update a very small matrix. The microkernel is typically the only piece of code that must be carefully optimized (via vector intrinsics or assembly code) to enable high performance in most of the level-3 operations such as `gemm`, `hemm`, `herk`, and `trmm`. For a more thorough explanation of the microkernel and its role in the overall level-3 computations, please read our [ACM TOMS papers](https://github.com/flame/blis#citations). For API and technical reference, please see the [gemm microkernel section](KernelsHowTo.md#gemm-microkernel) of the BLIS [Kernels Guide](KernelsHowTo.md). ### What is a macrokernel? The macrokernels are portable codes within the BLIS framework that implement relatively small subproblems within an overall level-3 operation. The overall problem (say, general matrix-matrix multiplication, or `gemm`) is partitioned down, according to cache blocksizes, such that its operands are (1) a suitable size and (2) stored in a special packed format. At that time, the macrokernel is called. The macrokernel is implemented as two loops around the microkernel. The macrokernels in BLIS correspond to the so-called "inner kernels" (or simply "kernels") that formed the fundamental unit of computation in Kazushige Goto's GotoBLAS (and now in the successor library, OpenBLAS). For more information on macrokernels, please read our [ACM TOMS papers](https://github.com/flame/blis#citations). ### What is a context? As of 0.2.0, BLIS contains a new infrastructure for communicating runtime information (such as kernel addresses and blocksizes) from the highest levels of code all the way down the function stack, even into the kernels themselves. This new data structure is called a *context* (defined in code as a `cntx_t` type), and together with its API it helped us clean up some hacks and other awkwardness that existed in BLIS prior to 0.2.0. Contexts also lay the groundwork for managing kernels and related kernel information at runtime. If you are a kernel developer, you can usually ignore the `cntx_t*` argument that is passed into each kernel, since the kernels already inherently "know" this information (such as register blocksizes). And if you are a user, and the function you want to call takes a `cntx_t*` argument, you can safely pass in `NULL` and BLIS will automatically build a suitable context for you at runtime. ### I'm used to thinking in terms of column-major/row-major storage and leading dimensions. What is a "row stride" / "column stride"? Traditional BLAS assumes that matrices are stored in column-major order (or, as we often say, matrices that are "column-stored"), where a leading dimension measures the distance from one element to the next element in the same row. But column-major order is really just a special case of BLIS's more generalized storage scheme. In generalized storage, we have a row stride and a column stride. The row stride measures the distance in memory between rows (within a single column) while the column stride measures the distance between columns (within a single row). Column-major storage corresponds to the situation where the row stride equals 1. Since the row stride is unit, you only have to track the column stride (i.e., the leading dimension). Similarly, in row-major order, the column stride is equal to 1 and only the row stride must be tracked. BLIS also supports situations where both the row stride and column stride are non-unit. We call this situation "general stride". ### What does it mean when a matrix with general stride is column-tilted or row-tilted? When a matrix is stored with general stride, both the row stride and column stride (let's call them `rs` and `cs`) are non-unit. When `rs` < `cs`, we call the general stride matrix "column-tilted" because it is "closer" to being column-stored (than row-stored). Similarly, when `rs` > `cs`, the matrix is "row-tilted" because it is closer to being row-stored. ### I'm not really interested in all of these newfangled features in BLIS. Can I just use BLIS as a BLAS library? Absolutely. Just link your application to BLIS the same way you would link to a BLAS library. For a simple linking example, see the [Linking to BLIS](KernelsHowTo.md#linking-to-blis) section of the BLIS [Build System](BuildSystem.md) guide. ### What about CBLAS? BLIS also contains an optional CBLAS compatibility layer, which leverages the BLAS compatibility layer to help map CBLAS function calls to the corresponding functionality in BLIS. Once BLIS is built with CBLAS support, your application can access CBLAS prototypes via either `cblas.h` or `blis.h`. At the time of this writing, CBLAS support is disabled by default, so be sure to enable it at configure-time. Please see `./configure --help` for the syntax for enabling CBLAS. ### Can I call the native BLIS API from Fortran-77/90/95/2000/C++/Python? In principle, BLIS's native (and BLAS-like) [typed API](BLISTypedAPI) can be called from Fortran. However, you must ensure that the size of the integer in BLIS is equal to the size of integer used by your Fortran program/compiler/environment. The size of BLIS integers is determined at configure-time. Please see `./configure --help` for the syntax for options related to integer sizes. As for bindings to other languages, please contact the [blis-devel](http://groups.google.com/group/blis-devel) mailing list. ### Do I need to call initialization/finalization functions before being able to use BLIS from my application? Originally, BLIS did indeed require the application to explicitly setup (initialize) various internal data structures via `bli_init()`. Likewise, calling `bli_finalize()` was recommended to cleanup (finalize) the library. However, since commit 9804adf (circa December 2017), BLIS has implemented self-initialization. These explicit calls to `bli_init()` and `bli_finalize()` are no longer necessary, though experts may still use them in special cases to control the allocation and freeing of resources. This topic is discussed in the BLIS [typed API reference](BLISTypedAPI.md#initialization-and-cleanup). ### Does BLIS support multithreading? Yes! BLIS supports multithreading (via OpenMP or POSIX threads) for all of its level-3 operations. For more information on enabling and controlling multithreading, please see the [Multithreading](Multithreading.md) guide. BLIS is also thread-safe so that you can call BLIS from threads within a multithreaded library or application. BLIS derives is thread-safety via unconditional use of features present in POSIX threads (pthreads). These pthreads features are employed for thread-safety regardless of whether BLIS is configured for OpenMP multithreading, pthreads multithreading, or single-threaded execution. ### Does BLIS support NUMA environments? We have integrated some early foundational support for NUMA *development*, but currently BLIS will execute sub-optimally on NUMA systems. If you are interested in adapting BLIS to a NUMA architecture, please contact us via the [blis-devel](http://groups.google.com/group/blis-devel) mailing list. ### Does BLIS work with GPUs? BLIS does not currently support graphical processing units (GPUs). However, others have applied the BLIS approach towards frameworks that provide BLAS-like functionality on GPUs. To see how NVIDIA's implementation compares to an analagous approach based on the principles that underlie BLIS, please see a paper by some of our collaborators, ["Implementing Strassen’s Algorithm with CUTLASSon NVIDIA Volta GPUs"](https://apps.cs.utexas.edu/apps/sites/default/files/tech_reports/GPUStrassen.pdf). ### Does BLIS work on _(some architecture)_? Please see the BLIS [Hardware Support](HardwareSupport.md) guide for a full list of supported architectures. If your favorite hardware is not listed and you have the expertise, please consider developing your own kernels and sharing them with the project! We will, of course, gratefully credit your contribution. ### What about distributed-memory parallelism? No. BLIS is a framework for sequential and shared-memory/multicore implementations of BLAS-like operations. If you need distributed-memory dense linear algebra implementations, we recommend the [Elemental](http://libelemental.org/) library. ### Can I build BLIS on Windows / Mac OS X? BLIS was designed for use in a GNU/Linux environment. However, we've gone to greath lengths to keep BLIS compatible with other UNIX-like systems as well, such as BSD and OS X. System software requirements for UNIX-like systems are discussed in the BLIS [Build System](BuildSystem.md) guide. Support for building in Windows is not directly supported. However, Windows 10 now provides a Linux-like environment. We suspect this is the best route for those trying to build BLIS in Windows. If all you need is a Windows DLL of BLIS, you may be in luck! BLIS uses [AppVeyor](https://ci.appveyor.com/) to automatically produces dynamically-linked libraries, which are preserved on the site as "artifacts". To try it out, just visit the [BLIS AppVeyor page](https://ci.appveyor.com/project/shpc/blis/), click on the `LIB_TYPE=shared` link for the most recent build, and then click on "Artifacts". And if you'd like to share your experiences, please join the [blis-devel](http://groups.google.com/group/blis-devel) mailing list and send us a message! ### Can I build BLIS as a shared library? Yes. By default, most configurations output only a static library archive (e.g. `.a` file). However, you can also request a shared object (e.g. `.so` file), sometimes also called a "dynamically-linked" library. For information on enabling shared library output, simply run `./configure --help`. ### Can I use the mixed domain / mixed precision support in BLIS? Yes! As of 5fec95b (circa October 2018), BLIS supports mixed-datatype (mixed domain and/or mixed precision) computation via the `gemm` operation. Documentation on utilizing this new functionality is provided via the [MixedDatatype.md](docs/MixedDatatypes.md) document in the source distribution. If this feature is important or useful to your work, we would love to hear from you. Please contact us via the [blis-devel](http://groups.google.com/group/blis-devel) mailing list and tell us about your application and why you need/want support for BLAS-like operations with mixed-domain/mixed-precision operands. ### Who is involved in the project? Lots of people! For a full list of those involved, see the [CREDITS](https://github.com/flame/blis/blob/master/CREDITS) file within the BLIS framework source distribution. ### Who funded the development of BLIS? BLIS was primarily funded by grants from [Microsoft](https://www.microsoft.com/), [Intel](https://www.intel.com/), [Texas Instruments](https://www.ti.com/), [AMD](https://www.amd.com/), [Huawei](https://www.hauwei.com/us/), [Oracle](https://www.oracle.com/), and [Facebook](https://www.facebook.com/) as well as grants from the [National Science Foundation](http://www.nsf.gov/) (Awards CCF-0917167 ACI-1148125/1340293, and CCF-1320112). Reminder: _Any opinions, findings and conclusions or recommendations expressed in this material are those of the author(s) and do not necessarily reflect the views of the National Science Foundation (NSF)._ ### I found a bug. How do I report it? If you think you've found a bug, we request that you [open an issue](http://github.com/flame/blis/issues). Don't be shy! Really, it's the best and most convenient way for us to track your issues/bugs/concerns. Other discussions that are not primarily bug-reports should take place via the [blis-devel](http://groups.google.com/group/blis-devel) mailing list. ### How do I request a new feature? Feature requests should also be submitted by [opening a new issue](http://github.com/flame/blis/issues). ### What is the difference between this version of BLIS and the one that AMD maintains? AMD has chosen BLIS as the open-source foundation for the BLAS component of their [AMD Optimizing CPU Libraries (AOCL)](https://developer.amd.com/amd-aocl/) toolkit. Our group enjoys a great collaboration and partnership with AMD, and we are pleased to have their enthusiastic support for our project. At a technical level, AMD's fork of BLIS is considered to be a downstream variant. AMD uses their fork to develop optimizations specific to AMD hardware. Occasionally, AMD will submit pull requests to merge their features, enhancements, and fixes back into our "plain vanilla" upstream repository. So our upstream BLIS will eventually contain most of the modifications originally developed by AMD in their fork, but with a lag. Similarly, features introduced into the upstream BLIS may not be immediately available in AMD's fork, but eventually their team will perform a merge and synchronize with our latest code. AMD also uses a different versioning system for AOCL which is independent of the versions used by the [upstream BLIS](http://github.com/flame/blis) project. ### Who do I contact if I have a question about the AMD version of BLIS? For questions or support regarding [AMD's fork of BLIS](https://github.com/amd/blis), please contact the [AMD Optimizing CPU Libraries](https://developer.amd.com/amd-aocl/) group at aoclsupport@amd.com. ### Where did you get the photo for the BLIS logo / mascot? The sleeping ["BLIS cat"](https://github.com/flame/blis/blob/master/README.md) photo was taken by Petar Mitchev and is used with his permission. blis-0.6.1/docs/HardwareSupport.md000066400000000000000000000134571360743507500170770ustar00rootroot00000000000000## Introduction This wiki is intended to track the support for various hardware types within the BLIS framework source distribution. We apologize if this wiki falls out of date. For the latest support, we recommend peeking inside of the relevant sub-configuration (specifically, in the `bli_cntx_init_.c` file) and looking at which kernels are registered. You may also contact the [blis-devel](http://groups.google.com/group/blis-devel) mailing list. ## Level-3 microkernels The following table lists architectures for which there exist optimized level-3 microkernels, which microkernels are optimized, the name of the author or maintainer, and the current status of the microkernels. A few remarks / reminders: * Optimizing only the [gemm microkernel](KernelsHowTo.md#gemm-microkernel) will result in optimal performance for all [level-3 operations](BLISTypedAPI#level-3-operations) except `trsm` (which will typically achieve 60 - 80% of attainable peak performance). * The [trsm](BLISTypedAPI#trsm) operation needs the [gemmtrsm microkernel(s)](KernelsHowTo.md#gemmtrsm-microkernels), in addition to the aforementioned [gemm microkernel](KernelsHowTo.md#gemm-microkernel), in order reach optimal performance. * Induced complex (1m) implementations are employed in all situations where the real domain [gemm microkernel](KernelsHowTo.md#gemm-microkernel) of the corresponding precision is available, but the "native" complex domain gemm microkernel is unavailable. Note that the table below lists native kernels, so if a microarchitecture lists only `sd`, support for both `c` and `z` datatypes will be provided via the 1m method. (Note: most people cannot tell the difference between native and 1m-based performance.) Please see our [ACM TOMS article on the 1m method](https://github.com/flame/blis#citations) for more info on this topic. * Some microarchitectures use the same sub-configuration. *This is not a typo.* For example, Haswell and Broadwell systems as well as "desktop" (non-server) versions of Skylake, Kaby Lake, and Coffee Lake all use the `haswell` sub-configuration and the kernels registered therein. Microkernels can be recycled in this manner because the key detail that determines level-3 performance outcomes is actually the vector ISA, not the microarchitecture. In the previous example, all of the microarchitectures listed support AVX2 (but not AVX-512), and therefore they can reuse the same microkernels. * Remember that you (usually) don't have to choose your sub-configuration manually! Instead, you can always request configure-time hardware detection via `./configure auto`. This will defer to internal logic (based on CPUID for x86_64 systems) that will attempt to choose the appropriate sub-configuration automatically. * There is a difficulty in automatically choosing the ideal sub-configuration for use on Skylake-X systems, which may have one or two FMA units. The `skx` sub-configuration is only beneficial when used on hardware with two FMA units. Otherwise the hardware is treated as a "desktop" Skylake system, which uses the `haswell` sub-configuration. Furthermore, the number of units can't be queried directly; instead, we rely on a manually-maintained list of CPU models (via logic in `frame/base/bli_cpuid.c`), which may be incorrect for new processors, particularly Gold models. In that case, you can either fix the code (and please raise an issue!) or manually target the `skx` at configure-time (i.e., `./configure [options] skx`). If your performance seems low, you can set `export BLIS_ARCH_DEBUG=1`, which will cause BLIS to output some basic debugging info to `stderr` that will reveal whether your system was detected as having one or two VPUs (FMA units). | Vendor/Microarchitecture | BLIS sub-configuration | `gemm` | `gemmtrsm` | |:-------------------------------------|:-----------------------|:-------|:-----------| | AMD Bulldozer (AVX/FMA4) | `bulldozer` | `sdcz` | | | AMD Piledriver (AVX/FMA3) | `piledriver` | `sdcz` | | | AMD Steamroller (AVX/FMA3) | `steamroller` | `sdcz` | | | AMD Excavator (AVX/FMA3) | `excavator` | `sdcz` | | | AMD Zen (AVX/FMA3) | `zen` | `sdcz` | `sd` | | Intel Core2 (SSE3) | `penryn` | `sd` | `d` | | Intel Sandy/Ivy Bridge (AVX/FMA3) | `sandybridge` | `sdcz` | | | Intel Haswell, Broadwell (AVX/FMA3) | `haswell` | `sdcz` | `sd` | | Intel Sky/Kaby/CoffeeLake (AVX/FMA3) | `haswell` | `sdcz` | `sd` | | Intel Knights Landing (AVX-512/FMA3) | `knl` | `sd` | | | Intel SkylakeX (AVX-512/2×FMA3) | `skx` | `sd` | | | Intel SkylakeX (AVX-512/1×FMA3) | `haswell` | `sdcz` | `sd` | | ARMv7 Cortex-A9 (NEON) | `cortex-a9` | `sd` | | | ARMv7 Cortex-A15 (NEON) | `cortex-a15` | `sd` | | | ARMv8 Cortex-A53 (NEON) | `cortex-a53` | `sd` | | | ARMv8 Cortex-A57 (NEON) | `cortex-a57` | `sd` | | | IBM Blue Gene/Q (QPX int) | `bgq` | `d` | | | IBM Power7 (QPX int) | `power7` | `d` | | | template (C99) | `template` | `sdcz` | `sdcz` | ## Level-1f kernels Not yet written. Please see the relevant sub-configuration (`bli_cntx_init_.c`) to determine which kernels are implemented/registered. ## Level-1v kernels Not yet written. Please see the relevant sub-configuration (`bli_cntx_init_.c`) to determine which kernels are implemented/registered. blis-0.6.1/docs/KernelsHowTo.md000066400000000000000000001672631360743507500163360ustar00rootroot00000000000000## Contents * **[Contents](KernelsHowTo.md#contents)** * **[Introduction](KernelsHowTo.md#introduction)** * **[BLIS kernels summary](KernelsHowTo.md#blis-kernels-summary)** * [Level-3](KernelsHowTo.md#level-3) * [Level-1f](KernelsHowTo.md#level-1f) * [Level-1v](KernelsHowTo.md#level-1v) * [Level-1v/-1f Dependencies for Level-2 operations](KernelsHowTo.md#level-1v-1f-dependencies-for-level-2-operations) * **[Calling kernels](KernelsHowTo.md#calling-kernels)** * **[BLIS kernels reference](KernelsHowTo.md#blis-kernels-reference)** * [Level-3 microkernels](KernelsHowTo.md#level-3-microkernels) * [Level-1f kernels](KernelsHowTo.md#level-1f-kernels) * [Level-1v kernels](KernelsHowTo.md#level-1v-kernels) ## Introduction This wiki describes the computational kernels used by the BLIS framework. One of the primary features of BLIS is that it provides a large set of dense linear algebra functionality while simultaneously minimizing the amount of kernel code that must be optimized for a given architecture. BLIS does this by isolating a handful of kernels which, when implemented, facilitate functionality and performance of several of the higher-level operations. Presently, BLIS supports several groups of operations: * **[Level-1v](BLISTypedAPI.md#level-1v-operations)**: Operations on vectors: * [addv](BLISTypedAPI.md#addv), [amaxv](BLISTypedAPI.md#amaxv), [axpyv](BLISTypedAPI.md#axpyv), [copyv](BLISTypedAPI.md#copyv), [dotv](BLISTypedAPI.md#dotv), [dotxv](BLISTypedAPI.md#dotxv), [invertv](BLISTypedAPI.md#invertv), [scal2v](BLISTypedAPI.md#scal2v), [scalv](BLISTypedAPI.md#scalv), [setv](BLISTypedAPI.md#setv), [subv](BLISTypedAPI.md#subv), [swapv](BLISTypedAPI.md#swapv) * **[Level-1d](BLISTypedAPI.md#level-1d-operations)**: Element-wise operations on matrix diagonals: * [addd](BLISTypedAPI.md#addd), [axpyd](BLISTypedAPI.md#axpyd), [copyd](BLISTypedAPI.md#copyd), [invertd](BLISTypedAPI.md#invertd), [scald](BLISTypedAPI.md#scald), [scal2d](BLISTypedAPI.md#scal2d), [setd](BLISTypedAPI.md#setd), [setid](BLISTypedAPI.md#setid), [subd](BLISTypedAPI.md#subd) * **[Level-1m](BLISTypedAPI.md#level-1m-operations)**: Element-wise operations on matrices: * [addm](BLISTypedAPI.md#addm), [axpym](BLISTypedAPI.md#axpym), [copym](BLISTypedAPI.md#copym), [scalm](BLISTypedAPI.md#scalm), [scal2m](BLISTypedAPI.md#scal2m), [setm](BLISTypedAPI.md#setm), [subm](BLISTypedAPI.md#subm) * **[Level-1f](BLISTypedAPI.md#level-1f-operations)**: Fused operations on multiple vectors: * [axpy2v](BLISTypedAPI.md#axpy2v), [dotaxpyv](BLISTypedAPI.md#dotaxpyv), [axpyf](BLISTypedAPI.md#axpyf), [dotxf](BLISTypedAPI.md#dotxf), [dotxaxpyf](BLISTypedAPI.md#dotxaxpyf) * **[Level-2](BLISTypedAPI.md#level-2-operations)**: Operations with one matrix and (at least) one vector operand: * [gemv](BLISTypedAPI.md#gemv), [ger](BLISTypedAPI.md#ger), [hemv](BLISTypedAPI.md#hemv), [her](BLISTypedAPI.md#her), [her2](BLISTypedAPI.md#her2), [symv](BLISTypedAPI.md#symv), [syr](BLISTypedAPI.md#syr), [syr2](BLISTypedAPI.md#syr2), [trmv](BLISTypedAPI.md#trmv), [trsv](BLISTypedAPI.md#trsv) * **[Level-3](BLISTypedAPI.md#level-3-operations)**: Operations with matrices that are multiplication-like: * [gemm](BLISTypedAPI.md#gemm), [hemm](BLISTypedAPI.md#hemm), [herk](BLISTypedAPI.md#herk), [her2k](BLISTypedAPI.md#her2k), [symm](BLISTypedAPI.md#symm), [syrk](BLISTypedAPI.md#syrk), [syr2k](BLISTypedAPI.md#syr2k), [trmm](BLISTypedAPI.md#trmm), [trmm3](BLISTypedAPI.md#trmm3), [trsm](BLISTypedAPI.md#trsm) * **[Utility](BLISTypedAPI.md#Utility-operations)**: Miscellaneous operations on matrices and vectors: * [asumv](BLISTypedAPI.md#asumv), [norm1v](BLISTypedAPI.md#norm1v), [normfv](BLISTypedAPI.md#normfv), [normiv](BLISTypedAPI.md#normiv), [norm1m](BLISTypedAPI.md#norm1m), [normfm](BLISTypedAPI.md#normfm), [normim](BLISTypedAPI.md#normim), [mkherm](BLISTypedAPI.md#mkherm), [mksymm](BLISTypedAPI.md#mksymm), [mktrim](BLISTypedAPI.md#mktrim), [fprintv](BLISTypedAPI.md#fprintv), [fprintm](BLISTypedAPI.md#fprintm),[printv](BLISTypedAPI.md#printv), [printm](BLISTypedAPI.md#printm), [randv](BLISTypedAPI.md#randv), [randm](BLISTypedAPI.md#randm), [sumsqv](BLISTypedAPI.md#sumsqv) Most of the interest with BLAS libraries centers around level-3 operations because they exhibit favorable ratios of floating-point operations (flops) to memory operations (memops), which allows high performance. Some applications also require level-2 computation; however, these operations are at an inherent disadvantage on modern architectures due to their less favorable flop-to-memop ratio. The BLIS framework allows developers to quickly and easily build high performance level-3 operations, as well as relatively well-performing level-2 operations, simply by optimizing a small set of kernels. These kernels, and their relationship to the other higher-level operations supported by BLIS, are the subject of this wiki. Some level-1v, level-1m, and level-1d operations may also be accelerated, but since they are memory-bound, optimization typically yields minor performance improvement. --- ## BLIS kernels summary This section lists and briefly describes each of the main computational kernels supported by the BLIS framework. (Other kernels are supported, but they are not of interest to most developers.) ### Level-3 BLIS supports the following three level-3 microkernels. These microkernels are used to implement optimized level-3 operations. * **gemm**: The `gemm` microkernel performs a small matrix multiplication and is used by every level-3 operation. * **trsm**: The `trsm` microkernel performs a small triangular solve with multiple right-hand sides. It is not required for optimal performance and in fact is only needed when the developer opts to not implement the fused `gemmtrsm` kernel. * **gemmtrsm**: The `gemmtrsm` microkernel implements a fused operation whereby a `gemm` and a `trsm` subproblem are fused together in a single routine. This avoids redundant memory operations that would otherwise be incurred if the operations were executed separately. The following shows the steps one would take to optimize, to varying degrees, the level-3 operations supported by BLIS: 1. By implementing and optimizing the `gemm` microkernel, **all** level-3 operations **except** `trsm` are fully optimized. In this scenario, the `trsm` operation may achieve 60-90% of attainable peak performance, depending on the architecture and problem size. 1. If one goes further and implements and optimizes the `trsm` microkernel, this kernel, when paired with an optimized `gemm` microkernel, results in a `trsm` implementation that is accelerated (but not optimized). 1. Alternatively, if one implements and optimizes the fused `gemmtrsm` microkernel, this kernel, when paired with an optimized `gemm` microkernel, enables a fully optimized `trsm` implementation. ### Level-1f BLIS supports the following five level-1f (fused) kernels. These kernels are used to implement optimized level-2 operations (as well as self-similar level-1f operations; that is, the `axpyf` kernel can be invoked indirectly via the `axpyf` operation). * **axpy2v**: Performs and fuses two [axpyv](BLISTypedAPI.md#axpyv) operations, accumulating to the same output vector. * **dotaxpyv**: Performs and fuses a [dotv](BLISTypedAPI.md#dotv) followed by an [axpyv](BLISTypedAPI.md#axpyv) operation with x. * **axpyf**: Performs and fuses some implementation-dependent number of [axpyv](BLISTypedAPI.md#axpyv) operations, accumulating to the same output vector. Can also be expressed as a [gemv](BLISTypedAPI.md#gemv) operation where matrix A is _m x nf_, where nf is the number of fused operations (fusing factor). * **dotxf**: Performs and fuses some implementation-dependent number of [dotxv](BLISTypedAPI.md#dotxv) operations, reusing the `y` vector for each [dotxv](BLISTypedAPI.md#dotxv). * **dotxaxpyf**: Performs and fuses a [dotxf](BLISTypedAPI.md#dotxf) and [axpyf](BLISTypedAPI.md#axpyf) in which the matrix operand is reused. ### Level-1v BLIS supports the following 14 level-1v kernels. These kernels are used primarily to implement their self-similar operations. However, they are occasionally used to handle special cases of level-1f kernels or in situations where level-2 operations are partially optimized. * **addv**: Performs a [vector addition](BLISTypedAPI.md#addv) operation. * **amaxv**: Performs a [search for the index of the element with the largest absolute value (or complex modulus)](BLISTypedAPI.md#amaxv). * **axpyv**: Performs a [vector scale-and-accumulate](BLISTypedAPI.md#axpyv) operation. * **axpbyv**: Performs an [extended vector scale-and-accumulate](BLISTypedAPI.md#axpbyv) operation similar to axpyv except that the output vector is scaled by a second scalar. * **copyv**: Performs a [vector copy](BLISTypedAPI.md#copyv) operation * **dotv**: Performs a [dot product](BLISTypedAPI.md#dotv) where the output scalar is overwritten. * **dotxv**: Performs an [extended dot product](BLISTypedAPI.md#dotxv) operation where the dot product is first scaled and then accumulated into a scaled output scalar. * **invertv**: Performs an [element-wise vector inversion](BLISTypedAPI.md#invertv) operation. * **scalv**: Performs an [in-place (destructive) vector scaling](BLISTypedAPI.md#scalv) operation. * **scal2v**: Performs an [out-of-place (non-destructive) vector scaling](BLISTypedAPI.md#scal2v) operation. * **setv**: Performs a [vector broadcast](BLISTypedAPI.md#setv) operation. * **subv**: Performs a [vector subtraction](BLISTypedAPI.md#subv) operation. * **swapv**: Performs a [vector swap](BLISTypedAPI.md#swapv) operation. * **xpbyv**: Performs a [alternate vector scale-and-accumulate](BLISTypedAPI.md#xpbyv) operation. ### Level-1v/-1f Dependencies for Level-2 operations The table below shows dependencies between level-2 operations and each of the level-1v and level-1f kernels. Kernels marked with a "1" for a given level-2 operation are preferred for optimization because they facilitate an optimal implementation on most architectures. Kernels marked with a "2", "3", or "4" denote those which need to be optimized for alternative implementations that would typically be second, third, or fourth choices, respectively, if the preferred kernels are not optimized. | operation / kernel | effective storage | `axpyv` | `dotxv` | `axpy2v` | `dotaxpyv` | `axpyf` | `dotxf` | `dotxaxpyf` | |:-------------------|:--------------------|:--------|:--------|:---------|:-----------|:--------|:--------|:------------| | `gemv, trmv, trsv` | row-wise | | 2 | | | | 1 | | | | column-wise | 2 | | | | 1 | | | | `hemv, symv` | row- or column-wise | 4 | 4 | | 3 | 2 | 2 | 1 | | `ger, her, syr` | row- or column-wise | 1 | | | | | | | | `her2, syr2` | row- or column-wise | 2 | | 1 | | | | | **Note:** The "effective storage" column reflects the orientation of the matrix operand **after** transposition via the corresponding `trans_t` parameter (if applicable). For example, calling `gemv` with a column-stored matrix `A` and the `transa` parameter equal to `BLIS_TRANSPOSE` would be effectively equivalent to row-wise storage. --- ## Calling kernels Note that all kernels, whether they be reference implementations or based on fully optimized assembly code, use names that are architecture- and implementation-specific. (This appears as a `` in the [kernel reference](KernelsHowTo.md#blis-kernels-reference) below.) Therefore, the easiest way to call the kernel is by querying a pointer from a valid context. The first step is to obtain a valid context. Contexts store all of the information specific to a particular sub-configuration (usually loosely specific to a microarchitecture or group of closely-related microarchitectuers). If a context is not already available in your current scope, a default context for the hardware for which BLIS was configured (or, in the case of multi-configuration builds, the hardware on which BLIS is currently running) may be queried via: ```c cntx_t* bli_gks_query_cntx( void ); ``` Once this `cntx_t*` pointer is obtained, you may call one of three functions to query any of the computation kernels described in this document: ```c void* bli_cntx_get_l3_nat_ukr_dt ( num_t dt, l3ukr_t ker_id, cntx_t* cntx ); void* bli_cntx_get_l1f_ker_dt ( num_t dt, l1fkr_t ker_id, cntx_t* cntx ); void* bli_cntx_get_l1v_ker_dt ( num_t dt, l1vkr_t ker_id, cntx_t* cntx ); ``` The `dt` and `ker_id` parameters specify the floating-point datatype and the kernel operation you wish to query, respectively. Valid values for `dt` are `BLIS_FLOAT`, `BLIS_DOUBLE`, `BLIS_SCOMPLEX`, and `BLIS_DCOMPLEX` for single- and double-precision real, and single- and double-precision complex, respectively. Valid values for `ker_id` are given in the tables below. Also, note that the return values of `bli_cntx_get_l1v_ker_dt` `bli_cntx_get_l1f_ker_dt()`, and `bli_cntx_get_l3_nat_ukr_dt()`, will be `void*` and must be typecast to typed function pointers before being called. As a convenience, BLIS defines function pointer types appropriate for usage in these situations. The function pointer type for each operation is given in the third columns of each table, with the `?` taking the place of one of the supported datatype characters. | kernel operation | l3ukr_t | function pointer type | |:-----------------|:----------------------|:----------------------| | gemm | `BLIS_GEMM` | `?gemm_ukr_ft` | | trsm_l | `BLIS_TRSM_L_UKR` | `?trsm_ukr_ft` | | trsm_u | `BLIS_TRSM_U_UKR` | `?trsm_ukr_ft` | | gemmtrsm_l | `BLIS_GEMMTRSM_L_UKR` | `?gemmtrsm_ukr_ft` | | gemmtrsm_u | `BLIS_GEMMTRSM_U_UKR` | `?gemmtrsm_ukr_ft` | | kernel operation | l1fkr_t | function pointer type | |:-----------------|:----------------------|:----------------------| | axpy2v | `BLIS_AXPY2V_KER` | `?axpy2v_ft` | | dotaxpyv | `BLIS_DOTAXPYV_KER` | `?dotaxpyv_ft` | | axpyf | `BLIS_AXPYF_KER` | `?axpyf_ft` | | dotxf | `BLIS_DOTXF_KER` | `?dotxf_ft` | | dotxaxpyf | `BLIS_DOTXAXPYF_KER` | `?dotxaxpyf_ft` | | kernel operation | l1vkr_t | function pointer type | |:-----------------|:----------------------|:----------------------| | addv | `BLIS_ADDV_KER` | `?addv_ft` | | amaxv | `BLIS_AMAXV_KER` | `?amaxv_ft` | | axpyv | `BLIS_AXPYV_KER` | `?axpyv_ft` | | axpbyv | `BLIS_AXPBYV_KER` | `?axpbyv_ft` | | dotaxpyv | `BLIS_DOTAXPYV_KER` | `?dotaxpyv_ft` | | copyv | `BLIS_COPYV_KER` | `?copyv_ft` | | dotxv | `BLIS_DOTXV_KER` | `?dotxv_ft` | | invertv | `BLIS_INVERTV_KER` | `?invertv_ft` | | scalv | `BLIS_SCALV_KER` | `?scalv_ft` | | scal2v | `BLIS_SCAL2V_KER` | `?scal2v_ft` | | setv | `BLIS_SETV_KER` | `?setv_ft` | | subv | `BLIS_SUBV_KER` | `?subv_ft` | | swapv | `BLIS_SWAPV_KER` | `?swapv_ft` | | xpybv | `BLIS_XPBYV_KER` | `?xpbyv_ft` | The specific information behind a queried function pointer is not typically available. However, it is guaranteed that the function pointer will always be valid (usually either an optimized assembly implementation or a reference implementation). --- ## BLIS kernels reference This section seeks to provide developers with a complete reference for each of the following BLIS kernels, including function prototypes, parameter descriptions, implementation notes, and diagrams: * [Level-3 microkernels](KernelsHowTo.md#level-3-microkernels) * [gemm](KernelsHowTo.md#gemm-microkernel) * [trsm](KernelsHowTo.md#trsm-microkernels) * [gemmtrsm](KernelsHowTo.md#gemmtrsm-microkernels) * [Level-1f kernels](KernelsHowTo.md#level-1f-kernels) * [axpy2v](KernelsHowTo.md#axpy2v-kernel) * [dotaxpyv](KernelsHowTo.md#dotaxpyv-kernel) * [axpyf](KernelsHowTo.md#axpyf-kernel) * [dotxf](KernelsHowTo.md#dotxf-kernel) * [dotxaxpyf](KernelsHowTo.md#dotxaxpyf-kernel) * [Level-1v kernels](KernelsHowTo.md#level-1v-kernels) * [addv](KernelsHowTo.md#addv-kernel) * [amaxv](KernelsHowTo.md#amaxv-kernel) * [axpyv](KernelsHowTo.md#axpyv-kernel) * [axpbyv](KernelsHowTo.md#axpbyv-kernel) * [copyv](KernelsHowTo.md#copyv-kernel) * [dotv](KernelsHowTo.md#dotv-kernel) * [dotxv](KernelsHowTo.md#dotxv-kernel) * [invertv](KernelsHowTo.md#invertv-kernel) * [scalv](KernelsHowTo.md#scalv-kernel) * [scal2v](KernelsHowTo.md#scal2v-kernel) * [setv](KernelsHowTo.md#setv-kernel) * [subv](KernelsHowTo.md#subv-kernel) * [swapv](KernelsHowTo.md#swapv-kernel) * [xpbyv](KernelsHowTo.md#xpbyv-kernel) The function prototypes in this section follow the same guidelines as those listed in the [BLIS typed API reference](BLISTypedAPI.md#Notes_for_using_this_reference). Namely: * Any occurrence of `?` should be replaced with `s`, `d`, `c`, or `z` to form an actual function name. * Any occurrence of `ctype` should be replaced with the actual C type corresponding to the datatype instance in question. * Some matrix arguments have associated row and column strides arguments that proceed them, typically listed as `rsX` and `csX` for a given matrix `X`. Row strides are always listed first, and column strides are always listed second. The semantic meaning of a row stride is "the distance, in units of elements, from any given element to the corresponding element (within the same column) of the next row," and the meaning of a column stride is "the distance, in units of elements, from any given element to the corresponding element (within the same row) of the next column." Thus, unit row stride implies column-major storage and unit column stride implies row-major storage. * All occurrences of `alpha` and `beta` parameters are scalars. ### Level-3 microkernels This section describes in detail the various level-3 microkernels supported by BLIS: * [gemm](KernelsHowTo.md#gemm-microkernel) * [trsm](KernelsHowTo.md#trsm-microkernels) * [gemmtrsm](KernelsHowTo.md#gemmtrsm-microkernels) #### gemm microkernel ```c void bli_?gemm_ ( dim_t k, ctype* restrict alpha, ctype* restrict a1, ctype* restrict b1, ctype* restrict beta, ctype* restrict c11, inc_t rsc, inc_t csc, auxinfo_t* restrict data, cntx_t* restrict cntx ); ``` where `` is implementation-dependent. (Recall that the precise `` associated with the microkernel along with the rest of the function name doesn't matter if you are querying the function address from the context. See section on [calling kernels](KernelsHowTo.md#calling-kernels) for details.) The following (more portable) wrapper is also defined: ```c void bli_?gemm_ukernel ( dim_t k, ctype* restrict alpha, ctype* restrict a1, ctype* restrict b1, ctype* restrict beta, ctype* restrict c11, inc_t rsc, inc_t csc, auxinfo_t* restrict data, cntx_t* restrict cntx ); ``` The `gemm` microkernel, sometimes simply referred to as "the BLIS microkernel" or "the microkernel", performs the following operation: ``` C11 := beta * C11 + A1 * B1 ``` where `A1` is an _MR x k_ "micropanel" matrix stored in packed (column-wise) format, `B1` is a _k x NR_ "micropanel" matrix stored in packed (row-wise) format, `C11` is an _MR x NR_ general matrix stored according to its row and column strides `rsc` and `csc`, and `alpha` and beta are scalars. _MR_ and _NR_ are the register blocksizes associated with the microkernel. They are chosen by the developer when the microkernel is written and then encoded into a BLIS configuration, which will reference the microkernel when the BLIS framework is instantiated into a library. For more information on setting register blocksizes and related constants, please see the [BLIS developer configuration guide](ConfigurationHowTo.md). Parameters: * `k`: The number of columns of `A1` and rows of `B1`. * `alpha`: The address of a scalar to the `A1 * B1` product. * `a1`: The address of a micropanel of matrix `A` of dimension _MR x k_, stored by columns with leading dimension _PACKMR_, where typically _PACKMR_ = _MR_. (See [Implementation Notes for gemm](KernelsHowTo.md#implementation-notes-for-gemm) for a discussion of _PACKMR_.) * `b1`: The address of a micropanel of matrix `B` of dimension _k x NR_, stored by rows with leading dimension _PACKNR_, where typically _PACKNR_ = _NR_. (See [Implementation Notes for gemm](KernelsHowTo.md#implementation-notes-for-gemm) for a discussion of _PACKNR_.) * `beta`: The address of a scalar to the input value of matrix `C11`. * `c11`: The address of a matrix `C11` of dimension _MR x NR_, stored according to `rsc` and `csc`. * `rsc`: The row stride of matrix `C11` (ie: the distance to the next row, in units of matrix elements). * `csc`: The column stride of matrix `C11` (ie: the distance to the next column, in units of matrix elements). * `data`: The address of an `auxinfo_t` object that contains auxiliary information that may be useful when optimizing the `gemm` microkernel implementation. (See [Using the auxinfo\_t object](KernelsHowTo.md#Using_the_auxinfo_t_object) for a discussion of the kinds of values available via `auxinfo_t`.) * `cntx`: The address of the runtime context. The context can be queried for implementation-specific values such as cache and register blocksizes. However, most microkernels intrinsically "know" these values already, and thus the `cntx` argument usually can be safely ignored. #### Diagram for gemm The diagram below shows the packed micropanel operands and how elements of each would be stored when _MR_ = _NR_ = 4. The hex digits indicate the layout and order (but NOT the numeric contents) of the elements in memory. Note that the storage of `C11` is not shown since it is determined by the row and column strides of `C11`. ``` c11: a1: b1: _______ ______________________ _______ | | |0 4 8 C | |0 1 2 3| MR | | |1 5 9 D . . . | |4 5 6 7| | | += |2 6 A E | |8 9 A B| |_______| |3_7_B_F_______________| |C D E F| | . | NR k | . | k | . | | | | | |_______| NR ``` #### Implementation Notes for gemm * **Register blocksizes.** The register blocksizes `MR` and `NR`, corresponding to the number of *logical* rows in `a1` and columns in `b1`, respectively, are defined in the context and may be queried via `bli_cntx_get_blksz_def_dt()`. However, you shouldn't need to query these values since the implementation inherently "knows" them already. * **Leading dimensions of `a1` and `b1`: _PACKMR_ and _PACKNR_.** The packed micropanels `a1` and `b1` are simply stored in column-major and row-major order, respectively. Usually, the width of either micropanel (ie: the number of logical rows of `a1`, or _MR_, and the number of columns of `b1`, or _NR_) is equal to that micropanel's so-called "leading dimension", or number of *physical* rows. Sometimes, it may be beneficial to specify a leading dimension that is larger than the panel width. This may be desirable because it allows each column of `a1` or row of `b1` to maintain a certain alignment in memory that would not otherwise be maintained by _MR_ and/or _NR_. In this case, you should index through `a1` and `b1` using the values _PACKMR_ and _PACKNR_, respectively (which are stored in the context as the blocksize "maximums" associated with the `bszid_t` values `BLIS_MR` and `BLIS_NR`). These values are defined in the context and may be queried via `bli_cntx_get_blksz_max_dt()`. However, you shouldn't need to query these values since the implementation inherently "knows" them already. * **Storage preference of `c11`.** Usually, an optimized `gemm` microkernel will have a "preferred" storage format for `C11`--typically either contiguous row-storage (i.e. `cs_c` = 1) or contiguous column-storage (i.e. `rs_c` = 1). This preference comes from how the microkernel is most efficiently able to load/store elements of `C11` from/to memory. Most microkernels use vector instructions to access contiguous columns (or column segments) of `C11`. However, the developer may decide that accessing contiguous rows (or row segments) is more desirable. If this is the case, this preference should be indicated via the `bool_t` argument when registering microkernels via `bli_cntx_set_l3_nat_ukrs()`--`TRUE` indicating a row preference and `FALSE` indicating a column preference. Properly setting this property allows the framework to perform a runtime optimization that will ensure the microkernel preference is honored, if at all possible. * **Edge cases in _MR_, _NR_ dimensions.** Sometimes the microkernel will be called with micropanels `a1` and `b1` that correspond to edge cases, where only partial results are needed. Zero-padding is handled automatically by the packing function to facilitate reuse of the same microkernel. Similarly, the logic for computing to temporary storage and then saving only the elements that correspond to elements of `C11` that exist (at the edges) is handled automatically within the macrokernel. * **Alignment of `a1` and `b1`.** By default, the alignment of addresses `a1` and `b1` are aligned only to `sizeof(type)`. If `BLIS_POOL_ADDR_ALIGN_SIZE` is set to some larger multiple of `sizeof(type)`, such as the page size, then the *first* `a1` and `b1` micropanels will be aligned to that value, but subsequent micropanels will only be aligned to `sizeof(type)`, or, if `BLIS_POOL_ADDR_ALIGN_SIZE` is a multiple of `PACKMR` and `PACKNR`, then subsequent micropanels `a1` and `b1` will be aligned to `PACKMR * sizeof(type)` and `PACKNR * sizeof(type)`, respectively. * **Unrolling loops.** As a general rule of thumb, the loop over _k_ is sometimes moderately unrolled; for example, in our experience, an unrolling factor of _u_ = 4 is fairly common. If unrolling is applied in the _k_ dimension, edge cases must be handled to support values of _k_ that are not multiples of _u_. It is nearly universally true that there should be no loops in the _MR_ or _NR_ directions; in other words, iteration over these dimensions should always be fully unrolled (within the loop over _k_). * **Zero `beta`.** If `beta` = 0.0 (or 0.0 + 0.0i for complex datatypes), then the microkernel should NOT use it explicitly, as `C11` may contain uninitialized memory (including elements containing `NaN` or `Inf`). This case should be detected and handled separately by overwriting `C11` with the `alpha * A1 * B1` product. #### Using the auxinfo\_t object Each microkernel ([gemm](KernelsHowTo.md#gemm-microkernel), [trsm](KernelsHowTo.md#trsm_microkernels), and [gemmtrsm](KernelsHowTo.md#gemmtrsm-microkernels)) takes as its last argument a pointer of type `auxinfo_t`. This BLIS-defined type is defined as a `struct` whose fields contain auxiliary values that may be useful to some microkernel authors, particularly when implementing certain optimization techniques. BLIS provides kernel authors access to the fields of the `auxinfo_t` object via the following function-like preprocessor macros. Each macro takes a single argument, the `auxinfo_t` pointer, and returns one of the values stored within the object. * `bli_auxinfo_next_a()`. Returns the address (`void*`) of the micropanel of `A` that will be used the next time the microkernel will be called. * `bli_auxinfo_next_b()`. Returns the address (`void*`) of the micropanel of `B` that will be used the next time the microkernel will be called. * `bli_auxinfo_ps_a()`. Returns the panel stride (`inc_t`) of the current micropanel of `A`. * `bli_auxinfo_ps_b()`. Returns the panel stride (`inc_t`) of the current micropanel of `B`. The addresses of the next micropanels of `A` and `B` may be used by the microkernel to perform prefetching, if prefetching is supported by the architecture. Similarly, it may be useful to know the precise distance in memory to the next micropanel. (Note that sometimes the next micropanel to be used is **not** the same as the next micropanel in memory.) Any and all of these values may be safely ignored; they are completely optional. However, BLIS guarantees that all values accessed via the macros listed above will **always** be initialized and meaningful, for every invocation of each microkernel (`gemm`, `trsm`, and `gemmtrsm`). #### Example code for gemm An example implementation of the `gemm` microkernel may be found in the `template` configuration directory in: * [config/template/kernels/3/bli\_gemm_opt\_mxn.c](https://github.com/flame/blis/tree/master/config/template/kernels/3/bli_gemm_opt_mxn.c) Note that this implementation is coded in C99 and lacks several kinds of optimization that are typical of real-world optimized microkernels, such as vector instructions (or intrinsics) and loop unrolling in _MR_ or _NR_. It is meant to serve only as a starting point for a microkernel developer. --- #### trsm microkernels ```c void bli_?trsm_l_ ( ctype* restrict a11, ctype* restrict b11, ctype* restrict c11, inc_t rsc, inc_t csc, auxinfo_t* restrict data, cntx_t* restrict cntx ); void bli_?trsm_u_ ( ctype* restrict a11, ctype* restrict b11, ctype* restrict c11, inc_t rsc, inc_t csc, auxinfo_t* restrict data, cntx_t* restrict cntx ); ``` where `` is implementation-dependent. (Recall that the precise `` associated with the microkernel along with the rest of the function name doesn't matter if you are querying the function address from the context. See section on [calling kernels](KernelsHowTo.md#calling-kernels) for details.) The following (more portable) wrappers are also defined: ```c void bli_?trsm_l_ukernel ( ctype* restrict a11, ctype* restrict b11, ctype* restrict c11, inc_t rsc, inc_t csc, auxinfo_t* restrict data, cntx_t* restrict cntx ); void bli_?trsm_u_ukernel ( ctype* restrict a11, ctype* restrict b11, ctype* restrict c11, inc_t rsc, inc_t csc, auxinfo_t* restrict data, cntx_t* restrict cntx ); ``` The `trsm_l` and `trsm_u` microkernels perform the following operation: ``` C11 := inv(A11) * B11 ``` where `A11` is _MR x MR_ and lower (`trsm_l`) or upper (`trsm_u`) triangular, `B11` is _MR x NR_, and `C11` is _MR x NR_. _MR_ and _NR_ are the register blocksizes associated with the microkernel. They are chosen by the developer when the microkernel is written and then encoded into a BLIS configuration, which will reference the microkernel when the BLIS framework is instantiated into a library. For more information on setting register blocksizes and related constants, please see the [BLIS developer configuration guide](ConfigurationHowTo.md). Parameters: * `a11`: The address of `A11`, which is the _MR x MR_ lower (`trsm_l`) or upper (`trsm_u`) triangular submatrix within the packed micropanel of matrix `A`. `A11` is stored by columns with leading dimension _PACKMR_, where typically _PACKMR_ = _MR_. (See [Implementation Notes for gemm](KernelsHowTo.md#implementation-notes-for-gemm) for a discussion of _PACKMR_.) Note that `A11` contains elements in both triangles, though elements in the unstored triangle are not guaranteed to be zero and thus should not be referenced. * `b11`: The address of `B11`, which is an _MR x NR_ submatrix of the packed micropanel of `B`. `B11` is stored by rows with leading dimension _PACKNR_, where typically _PACKNR_ = _NR_. (See [Implementation Notes for gemm](KernelsHowTo.md#implementation-notes-for-gemm) for a discussion of _PACKNR_.) * `c11`: The address of `C11`, which is an _MR x NR_ submatrix of matrix `C`, stored according to `rsc` and `csc`. `C11` is the submatrix within `C` that corresponds to the elements which were packed into `B11`. Thus, `C` is the original input matrix `B` to the overall `trsm` operation. * `rsc`: The row stride of matrix `C11` (ie: the distance to the next row, in units of matrix elements). * `csc`: The column stride of matrix `C11` (ie: the distance to the next column, in units of matrix elements). * `data`: The address of an `auxinfo_t` object that contains auxiliary information that may be useful when optimizing the `trsm` microkernel implementation. (See [Using the auxinfo\_t object](KernelsHowTo.md#Using_the_auxinfo_t_object) for a discussion of the kinds of values available via `auxinfo_t`, and also [Implementation Notes for trsm](KernelsHowTo.md#implementation-notes-for-trsm) for caveats.) * `cntx`: The address of the runtime context. The context can be queried for implementation-specific values such as cache and register blocksizes. However, most microkernels intrinsically "know" these values already, and thus the `cntx` argument usually can be safely ignored. #### Diagrams for trsm Please see the diagram for [gemmtrsm\_l](KernelsHowTo.md#diagram-for-gemmtrsm-l) and [gemmtrsm\_u](KernelsHowTo.md#diagram-for-gemmtrsm-u) to see depictions of the `trsm_l` and `trsm_u` microkernel operations and where they fit in with their preceding `gemm` subproblems. #### Implementation Notes for trsm * **Register blocksizes.** See [Implementation Notes for gemm](KernelsHowTo.md#implementation-notes-for-gemm). * **Leading dimensions of `a11` and `b11`: _PACKMR_ and _PACKNR_.** See [Implementation Notes for gemm](KernelsHowTo.md#implementation-notes-for-gemm). * **Edge cases in _MR_, _NR_ dimensions.** See [Implementation Notes for gemm](KernelsHowTo.md#implementation-notes-for-gemm). * **Alignment of `a11` and `b11`.** The addresses `a11` and `b11` are aligned according to `PACKMR * sizeof(type)` and `PACKNR * sizeof(type)`, respectively. * **Unrolling loops.** Most optimized implementations should unroll all three loops within the `trsm` microkernel. * **Prefetching next micropanels of `A` and `B`.** We advise against using the `bli_auxinfo_next_a()` and `bli_auxinfo_next_b()` macros from within the `trsm_l` and `trsm_u` microkernels, since the values returned usually only make sense in the context of the overall `gemmtrsm` subproblem. * **Diagonal elements of `A11`.** At the time this microkernel is called, the diagonal entries of triangular matrix `A11` contain the **_inverse_** of the original elements. This inversion is done during packing so that we can avoid expensive division instructions within the microkernel itself. If the `diag` parameter to the higher level `trsm` operation was equal to `BLIS_UNIT_DIAG`, the diagonal elements will be explicitly unit. * **Zero elements of `A11`.** Since `A11` is lower triangular (for `trsm_l`), the strictly upper triangle implicitly contains zeros. Similarly, the strictly lower triangle of `A11` implicitly contains zeros when `A11` is upper triangular (for `trsm_u`). However, the packing function may or may not actually write zeros to this region. Thus, the implementation should not reference these elements. * **Output.** This microkernel must write its result to two places: the submatrix `B11` of the current packed micropanel of `B` _and_ the submatrix `C11` of the output matrix `C`. #### Example code for trsm Example implementations of the `trsm` microkernels may be found in the `template` configuration directory in: * [config/template/kernels/3/bli\_trsm\_l\_opt\_mxn.c](https://github.com/flame/blis/tree/master/config/template/kernels/3/bli_trsm_l_opt_mxn.c) * [config/template/kernels/3/bli\_trsm\_u\_opt\_mxn.c](https://github.com/flame/blis/tree/master/config/template/kernels/3/bli_trsm_u_opt_mxn.c) Note that these implementations are coded in C99 and lack several kinds of optimization that are typical of real-world optimized microkernels, such as vector instructions (or intrinsics) and loop unrolling in _MR_ or _NR_. They are meant to serve only as a starting point for a microkernel developer. --- #### gemmtrsm microkernels ```c void bli_?gemmtrsm_l_ ( dim_t k, ctype* restrict alpha, ctype* restrict a10, ctype* restrict a11, ctype* restrict b01, ctype* restrict b11, ctype* restrict c11, inc_t rsc, inc_t csc, auxinfo_t* restrict data, cntx_t* restrict cntx ); void bli_?gemmtrsm_u_ ( dim_t k, ctype* restrict alpha, ctype* restrict a12, ctype* restrict a11, ctype* restrict b21, ctype* restrict b11, ctype* restrict c11, inc_t rsc, inc_t csc, auxinfo_t* restrict data, cntx_t* restrict cntx ); ``` where `` is implementation-dependent. (Recall that the precise `` associated with the microkernel along with the rest of the function name doesn't matter if you are querying the function address from the context. See section on [calling kernels](KernelsHowTo.md#calling-kernels) for details.) The following (more portable) wrappers are also defined: ```c void bli_?gemmtrsm_l_ukernel ( dim_t k, ctype* restrict alpha, ctype* restrict a10, ctype* restrict a11, ctype* restrict b01, ctype* restrict b11, ctype* restrict c11, inc_t rsc, inc_t csc, auxinfo_t* restrict data, cntx_t* restrict cntx ); void bli_?gemmtrsm_u_ukernel ( dim_t k, ctype* restrict alpha, ctype* restrict a12, ctype* restrict a11, ctype* restrict b21, ctype* restrict b11, ctype* restrict c11, inc_t rsc, inc_t csc, auxinfo_t* restrict data, cntx_t* restrict cntx ); ``` The `gemmtrsm_l` microkernel performs the following compound operation: ``` B11 := alpha * B11 - A10 * B01 B11 := inv(A11) * B11 C11 := B11 ``` where `A11` is _MR_ x _MR_ and lower triangular, `A10` is _MR_ x _k_, and `B01` is _k_ x _NR_. The `gemmtrsm_u` microkernel performs: ``` B11 := alpha * B11 - A12 * B21 B11 := inv(A11) * B11 C11 := B11 ``` where `A11` is _MR_ x _MR_ and upper triangular, `A12` is _MR_ x _k_, and `B21` is _k_ x _NR_. In both cases, `B11` is _MR_ x _NR_ and `alpha` is a scalar. Here, `inv()` denotes matrix inverse. _MR_ and _NR_ are the register blocksizes associated with the microkernel. They are chosen by the developer when the microkernel is written and then encoded into a BLIS configuration, which will reference the microkernel when the BLIS framework is instantiated into a library. For more information on setting register blocksizes and related constants, please see the [BLIS developer configuration guide](ConfigurationHowTo.md). Parameters: * `k`: The number of columns of `A10` and rows of `B01` (`trsm_l`); the number of columns of `A12` and rows of `B21` (`trsm_u`). * `alpha`: The address of a scalar to be applied to `B11`. * `a10`, `a12`: The address of `A10` or `A12`, which is the _MR x k_ submatrix of the packed micropanel of `A` that is situated to the left (`trsm_l`) or right (`trsm_u`) of the _MR x MR_ triangular submatrix `A11`. `A10` and `A12` are stored by columns with leading dimension _PACKMR_, where typically _PACKMR_ = _MR_. (See [Implementation Notes for gemm](KernelsHowTo.md#implementation-notes-for-gemm) for a discussion of _PACKMR_.) * `a11`: The address of `A11`, which is the _MR x MR_ lower (`trsm_l`) or upper (`trsm_u`) triangular submatrix within the packed micropanel of matrix `A` that is situated to the right of `A10` (`trsm_l`) or the left of `A12` (`trsm_u`). `A11` is stored by columns with leading dimension _PACKMR_, where typically _PACKMR_ = _MR_. (See [Implementation Notes for gemm](KernelsHowTo.md#implementation-notes-for-gemm) for a discussion of _PACKMR_.) Note that `A11` contains elements in both triangles, though elements in the unstored triangle are not guaranteed to be zero and thus should not be referenced. * `b01`, `b21`: The address of `B01` and `B21`, which is the _k x NR_ submatrix of the packed micropanel of `B` that is situated above (`trsm_l`) or below (`trsm_u`) the _MR x NR_ block `B11`. `B01` and `B21` are stored by rows with leading dimension _PACKNR_, where typically _PACKNR_ = _NR_. (See [Implementation Notes for gemm](KernelsHowTo.md#implementation-notes-for-gemm) for a discussion of _PACKNR_.) * `b11`: The address of `B11`, which is the _MR x NR_ submatrix of the packed micropanel of `B`, situated below `B01` (`trsm_l`) or above `B21` (`trsm_u`). `B11` is stored by rows with leading dimension _PACKNR_, where typically _PACKNR_ = _NR_. (See [Implementation Notes for gemm](KernelsHowTo.md#implementation-notes-for-gemm) for a discussion of _PACKNR_.) * `c11`: The address of `C11`, which is an _MR x NR_ submatrix of matrix `C`, stored according to `rsc` and `csc`. `C11` is the submatrix within `C` that corresponds to the elements which were packed into `B11`. Thus, `C` is the original input matrix `B` to the overall `trsm` operation. * `rsc`: The row stride of matrix `C11` (ie: the distance to the next row, in units of matrix elements). * `csc`: The column stride of matrix `C11` (ie: the distance to the next column, in units of matrix elements). * `data`: The address of an `auxinfo_t` object that contains auxiliary information that may be useful when optimizing the `gemmtrsm` microkernel implementation. (See [Using the auxinfo\_t object](KernelsHowTo.md#Using_the_auxinfo_t_object) for a discussion of the kinds of values available via `auxinfo_t`, and also [Implementation Notes for gemmtrsm](KernelsHowTo.md#implementation-notes-for-gemmtrsm) for caveats.) * `cntx`: The address of the runtime context. The context can be queried for implementation-specific values such as cache and register blocksizes. However, most microkernels intrinsically "know" these values already, and thus the `cntx` argument usually can be safely ignored. #### Diagram for gemmtrsm\_l The diagram below shows the packed micropanel operands for `trsm_l` and how elements of each would be stored when _MR_ = _NR_ = 4. (The hex digits indicate the layout and order (but NOT the numeric contents) in memory. Here, matrix `A11` (referenced by `a11`) is **lower triangular**. Matrix `A11` **does contain** elements corresponding to the strictly upper triangle, however, they are not guaranteed to contain zeros and thus these elements should not be referenced. ``` NR _______ b01:|0 1 2 3| |4 5 6 7| |8 9 A B| |C D E F| k | . | | . | a10: a11: | . | ___________________ _______ |_______| |0 4 8 C |`. | b11:| | MR |1 5 9 D . . . | `. | | | |2 6 A E | `. | MR | | |3_7_B_F____________|______`.| |_______| k MR ``` #### Diagram for gemmtrsm\_u The diagram below shows the packed micropanel operands for `trsm_u` and how elements of each would be stored when _MR_ = _NR_ = 4. (The hex digits indicate the layout and order (but NOT the numeric contents) in memory. Here, matrix `A11` (referenced by `a11`) is **upper triangular**. Matrix `A11` **does contain** elements corresponding to the strictly lower triangle, however, they are not guaranteed to contain zeros and thus these elements should not be referenced. ``` a11: a12: NR ________ ___________________ _______ |`. |0 4 8 | b11:|0 1 2 3| MR | `. |1 5 9 . . . | |4 5 6 7| | `. |2 6 A | MR |8 9 A B| |______`.|3_7_B______________| |___.___| b21:| . | MR k | . | | | | | NOTE: Storage digits are shown k | | starting with a12 to avoid | | obscuring triangular structure | | of a11. |_______| ``` #### Implementation Notes for gemmtrsm * **Register blocksizes.** See [Implementation Notes for gemm](KernelsHowTo.md#implementation-notes-for-gemm). * **Leading dimensions of `a1` and `b1`: _PACKMR_ and _PACKNR_.** See [Implementation Notes for gemm](KernelsHowTo.md#implementation-notes-for-gemm). * **Edge cases in _MR_, _NR_ dimensions.** See [Implementation Notes for gemm](KernelsHowTo.md#implementation-notes-for-gemm). * **Alignment of `a1` and `b1`.** See [Implementation Notes for gemm](KernelsHowTo.md#implementation-notes-for-gemm). * **Unrolling loops.** Most optimized implementations should unroll all three loops within the `trsm` subproblem of `gemmtrsm`. See [Implementation Notes for gemm](KernelsHowTo.md#implementation-notes-for-gemm) for remarks on unrolling the `gemm` subproblem. * **Prefetching next micropanels of `A` and `B`.** When invoked from within a `gemmtrsm_l` microkernel, the addresses accessible via `bli_auxinfo_next_a()` and `bli_auxinfo_next_b()` refer to the next invocation's `a10` and `b01`, respectively, while in `gemmtrsm_u`, the `_next_a()` and `_next_b()` macros return the addresses of the next invocation's `a11` and `b11` (since those submatrices precede `a12` and `b21`). * **Zero `alpha`.** The microkernel can safely assume that `alpha` is non-zero; "alpha equals zero" handling is performed at a much higher level, which means that, in such a scenario, the microkernel will never get called. * **Diagonal elements of `A11`.** See [Implementation Notes for trsm](KernelsHowTo.md#implementation-notes-for-trsm). * **Zero elements of `A11`.** See [Implementation Notes for trsm](KernelsHowTo.md#implementation-notes-for-trsm). * **Output.** See [Implementation Notes for trsm](KernelsHowTo.md#implementation-notes-for-trsm). * **Optimization.** Let's assume that the [gemm microkernel](KernelsHowTo.md#gemm-microkernel) has already been optimized. You have two options with regard to optimizing the fused `gemmtrsm` microkernels: 1. Optimize only the [trsm microkernels](KernelsHowTo.md#trsm-microkernels). This will result in the `gemm` and `trsm_l` microkernels being called in sequence. (Likewise for `gemm` and `trsm_u`.) 1. Fuse the implementation of the `gemm` microkernel with that of the `trsm` microkernels by inlining both into the `gemmtrsm_l` and `gemmtrsm_u` microkernel definitions. This option is more labor-intensive, but also more likely to yield higher performance because it avoids redundant memory operations on the packed _MR x NR_ submatrix `B11`. #### Example code for gemmtrsm Example implementations of the `gemmtrsm` microkernels may be found in the `template` configuration directory in: * [config/template/kernels/3/bli\_gemmtrsm\_l\_opt\_mxn.c](https://github.com/flame/blis/tree/master/config/template/kernels/3/bli_gemmtrsm_l_opt_mxn.c) * [config/template/kernels/3/bli\_gemmtrsm\_u\_opt\_mxn.c](https://github.com/flame/blis/tree/master/config/template/kernels/3/bli_gemmtrsm_u_opt_mxn.c) Note that these implementations are coded in C99 and lack several kinds of optimization that are typical of real-world optimized microkernels, such as vector instructions (or intrinsics) and loop unrolling in _MR_ or _NR_. They are meant to serve only as a starting point for a microkernel developer. ### Level-1f kernels --- #### axpy2v kernel ```c void bli_?axpy2v_ ( conj_t conjx, conj_t conjy, dim_t n, ctype* restrict alphax, ctype* restrict alphay, ctype* restrict x, inc_t incx, ctype* restrict y, inc_t incy, ctype* restrict z, inc_t incz, cntx_t* restrict cntx ) ``` This kernel performs the following operation: ``` z := z + alphax * conjx(x) + alphay * conjy(y) ``` where `x`, `y`, and `z` are vectors of length _n_ stored with strides `incx`, `incy`, and `incz`, respectively. This kernel is typically implemented as the fusion of two `axpyv` operations on different input vectors `x` and `y` and with different scalars `alphax` and `alpay` to update the same output vector `z`. --- #### dotaxpyv kernel ```c void bli_?dotaxpyv_ ( conj_t conjxt, conj_t conjx, conj_t conjy, dim_t n, ctype* restrict alpha, ctype* restrict x, inc_t incx, ctype* restrict y, inc_t incy, ctype* restrict rho, ctype* restrict z, inc_t incz, cntx_t* restrict cntx ) ``` This kernel performs the following operation: ``` rho := conjxt(x)^T * conjy(y) z := z + alpha * conjx(x) ``` where `x`, `y`, and `z` are vectors of length _n_ stored with strides `incx`, `incy`, and `incz`, respectively, and `rho` is a scalar. This kernel is typically implemented as a `dotv` operation fused with an `axpyv` operation. --- #### axpyf kernel ```c void bli_?axpyf_ ( conj_t conja, conj_t conjx, dim_t m, dim_t b, ctype* restrict alpha, ctype* restrict a, inc_t inca, inc_t lda, ctype* restrict x, inc_t incx, ctype* restrict y, inc_t incy, cntx_t* restrict cntx ) ``` This kernel performs the following operation: ``` y := y + alpha * conja(a) * conjy(x) ``` where `a` is an _m_ x _b_ matrix, `x` is a vector of length _b_, and `y` is a vector of length _m_. Vectors `x` and `y` are stored with strides `incx` and `incy`, respectively. Matrix `a` is stored with row stride `inca` and column stride `lda`, though `inca` is most often (in practice) unit. This kernel is typically implemented as a fused series of _b_ `axpyv` operations updating the same vector `y` (with the elements of `x` serving as the scalars and the columns of `a` serving as the vectors to be scaled). --- #### dotxf kernel ```c void bli_?dotxf_ ( conj_t conjat, conj_t conjx, dim_t m, dim_t b, ctype* restrict alpha, ctype* restrict a, inc_t inca, inc_t lda, ctype* restrict x, inc_t incx, ctype* restrict beta, ctype* restrict y, inc_t incy, cntx_t* restrict cntx ) ``` This kernel performs the following operation: ``` y := beta * y + alpha * conjat(a)^T conjx(x) ``` where `a` is an _m_ x _b_ matrix, where `w` is a vector of length _m_, `y` is a vector of length _b_, and `alpha` is a scalar. Vectors `x` and `y` are stored with strides `incx` and `incy`, respectively. Matrix `a` is stored with row stride `inca` and column stride `lda`, though `inca` is most often (in practice) unit. This kernel is typically implemented as a series of _b_ `dotxv` operations with the same right-hand operand vector `x` (contracted with the rows of `a^T` and accumulating to the corresponding elements of vector `y`). --- #### dotxaxpyf kernel ```c void bli_?dotxaxpyf_ ( conj_t conjat, conj_t conja, conj_t conjw, conj_t conjx, dim_t m, dim_t b, ctype* restrict alpha, ctype* restrict a, inc_t inca, inc_t lda, ctype* restrict w, inc_t incw, ctype* restrict x, inc_t incx, ctype* restrict beta, ctype* restrict y, inc_t incy, ctype* restrict z, inc_t incz, cntx_t* restrict cntx ) ``` This kernel performs the following operation: ``` y := beta * y + alpha * conjat(a)^T conjw(w) z := z + alpha * conja(a) conjx(x) ``` where `a` is an _m_ x _b_ matrix, `w` and `z` are vectors of length _m_, `x` and `y` are vectors of length _b_, and `alpha` and `beta` are scalars. Vectors `w`, `z`, `x` and `y` are stored with strides `incw`, `incz`, `incx`, and `incy`, respectively. Matrix `a` is stored with row stride `inca` and column stride `lda`, though `inca` is most often (in practice) unit. This kernel is typically implemented as a series of _b_ `dotxv` operations with the same right-hand operand vector `w` fused with a series of _b_ `axpyv` operations updating the same vector `z`. --- ### Level-1v kernels --- #### addv kernel ```c void bli_?addv_ ( conj_t conjx, dim_t n, ctype* restrict x, inc_t incx, ctype* restrict y, inc_t incy, cntx_t* restrict cntx ) ``` This kernel performs the following operation: ``` y := y + conjx(x) ``` where `x` and `y` are vectors of length _n_ stored with strides `incx` and `incy`, respectively. --- #### amaxv kernel ```c void bli_?amaxv_ ( dim_t n, ctype* restrict x, inc_t incx, dim_t* restrict index, cntx_t* restrict cntx ) ``` Given a vector of length _n_, this kernel returns the zero-based index `index` of the element of vector `x` that contains the largest absolute value (or, in the complex domain, the largest complex modulus). If `NaN` is encountered, it is treated as if it were a valid value that was smaller than any other value in the vector. If more than one element contains the same maximum value, the index of the latter element is returned via `index`. --- #### axpyv kernel ```c void bli_?axpyv_ ( conj_t conjx, dim_t n, ctype* restrict alpha, ctype* restrict x, inc_t incx, ctype* restrict y, inc_t incy, cntx_t* restrict cntx ) ``` This kernel performs the following operation: ``` y := y + alpha * conjx(x) ``` where `x` and `y` are vectors of length _n_ stored with strides `incx` and `incy`, respectively, and `alpha` is a scalar. --- #### axpbyv kernel ```c void bli_?axpbyv_ ( conj_t conjx, dim_t n, ctype* restrict alpha, ctype* restrict x, inc_t incx, ctype* restrict beta, ctype* restrict y, inc_t incy, cntx_t* restrict cntx ) ``` This kernel performs the following operation: ``` y := beta * y + alpha * conjx(x) ``` where `x` and `y` are vectors of length _n_ stored with strides `incx` and `incy`, respectively, and `alpha` and `beta` are scalars. --- #### copyv kernel ```c void bli_?copyv_ ( conj_t conjx, dim_t n, ctype* restrict x, inc_t incx, ctype* restrict y, inc_t incy, cntx_t* restrict cntx ) ``` This kernel performs the following operation: ``` y := conjx(x) ``` where `x` and `y` are vectors of length _n_ stored with strides `incx` and `incy`, respectively. --- #### dotv kernel ```c void bli_?dotv_ ( conj_t conjx, conj_t conjy, dim_t n, ctype* restrict x, inc_t incx, ctype* restrict y, inc_t incy, ctype* restrict rho, cntx_t* restrict cntx ) ``` This kernel performs the following operation: ``` rho := conjxt(x)^T * conjy(y) ``` where `x` and `y` are vectors of length _n_ stored with strides `incx` and `incy`, respectively, and `rho` is a scalar. --- #### dotxv kernel ```c void bli_?dotxv_ ( conj_t conjx, conj_t conjy, dim_t n, ctype* restrict alpha, ctype* restrict x, inc_t incx, ctype* restrict y, inc_t incy, ctype* restrict beta, ctype* restrict rho, cntx_t* restrict cntx ) ``` This kernel performs the following operation: ``` rho := beta * rho + alpha * conjxt(x)^T * conjy(y) ``` where `x` and `y` are vectors of length _n_ stored with strides `incx` and `incy`, respectively, and `alpha`, `beta`, and `rho` are scalars. --- #### invertv kernel ```c void bli_?invertv_ ( dim_t n, ctype* restrict x, inc_t incx, cntx_t* restrict cntx ) ``` This kernel inverts all elements of an _n_-length vector `x`. --- #### scalv kernel ```c void bli_?scalv_ ( conj_t conjalpha, dim_t n, ctype* restrict alpha, ctype* restrict x, inc_t incx, cntx_t* restrict cntx ) ``` This kernel performs the following operation: ``` x := conjalpha(alpha) * x ``` where `x` is a vector of length _n_ stored with stride `incx` and `alpha` is a scalar. --- #### scal2v kernel ```c void bli_?scal2v_ ( conj_t conjx, dim_t n, ctype* restrict alpha, ctype* restrict x, inc_t incx, ctype* restrict y, inc_t incy, cntx_t* restrict cntx ) ``` This kernel performs the following operation: ``` y := alpha * conjx(x) ``` where `x` and `y` are vectors of length _n_ stored with strides `incx` and `incy`, respectively, and `alpha` is a scalar. --- #### setv kernel ```c void bli_?setv_ ( conj_t conjalpha, dim_t n, ctype* restrict alpha, ctype* restrict x, inc_t incx, cntx_t* restrict cntx ) ``` This kernel performs the following operation: ``` x := conjalpha(alpha) ``` where `x` is a vector of length _n_ stored with stride `incx` and `alpha` is a scalar. Note that here, the `:=` operator represents a broadcast of `conjalpha(alpha)` to every element in `x`. --- #### subv kernel ```c void bli_?subv_ ( conj_t conjx, dim_t n, ctype* restrict x, inc_t incx, ctype* restrict y, inc_t incy, cntx_t* restrict cntx ) ``` This kernel performs the following operation: ``` y := y - conjx(x) ``` where `x` and `y` are vectors of length _n_. --- #### swapv kernel ```c void bli_?swapv_ ( dim_t n, ctype* restrict x, inc_t incx, ctype* restrict y, inc_t incy, cntx_t* restrict cntx ) ``` This kernel swaps corresponding elements of two _n_-length vectors `x` and `y` stored with strides `incx` and `incy`, respectively. --- #### xpbyv kernel ```c void bli_?xpbyv_ ( conj_t conjx, dim_t n, ctype* restrict x, inc_t incx, ctype* restrict beta, ctype* restrict y, inc_t incy, cntx_t* restrict cntx ) ``` This kernel performs the following operation: ``` y := beta * y + conjx(x) ``` where `x` and `y` are vectors of length _n_ stored with strides `incx` and `incy`, respectively, and `beta` is a scalar. blis-0.6.1/docs/MixedDatatypes.md000066400000000000000000000267211360743507500166700ustar00rootroot00000000000000## Contents * **[Contents](MixedDatatypes.md#contents)** * **[Introduction](MixedDatatypes.md#introduction)** * **[Categories of mixed datatypes](MixedDatatypes.md#categories-of-mixed-datatypes)** * **[Computation precision](MixedDatatypes.md#computation-precision)** * **[Computation domain](MixedDatatypes.md#computation-domain)** * **[Performing gemm with mixed datatypes](MixedDatatypes.md#performing-gemm-with-mixed-datatypes)** * **[Running the testsuite for gemm with mixed datatypes](MixedDatatypes.md#running-the-testsuite-for-gemm-with-mixed-datatypes)** * **[Known issues](MixedDatatypes.md#known-issues)** * **[Conclusion](MixedDatatypes.md#conclusion)** ## Introduction This document serves as a guide to users interested in taking advantage of BLIS's support for performing the `gemm` operation on operands of differing datatypes (domain and/or precision). For further details on the implementation present in BLIS, please see the latest draft of our paper "Supporting Mixed-domain Mixed-precision Matrix Multiplication within the BLIS Framework" available in the [Citations section](https://github.com/flame/blis/#citations) of the main [BLIS webpage](https://github.com/flame/blis). ## Categories of mixed datatypes Before going any further, we find it useful to categorize mixed datatype support into four categories: 1. **Fully identical datatypes.** This is what people generally think of when they think about the `gemm` operation: all operands are stored in the same datatype (precision and domain), and the matrix product computation is performed in the arithmetic represented by that datatype. (This category doesn't actually involve mixing datatypes, but it's still worthwhile to define.) Example: matrix C updated by the product of matrix A and matrix B (all matrices double-precision real). 2. **Mixed domain with identical precisions.** This category includes all combinations of datatypes where the domain (real or complex) of each operand may vary while the precisions (single or double precision) are held constant across all operands. Example: complex matrix C updated by the product of real matrix A and complex matrix B (all matrices single-precision). 3. **Mixed precision within a single domain.** Here, all operands are stored in the same domain (real or complex), however, the precision of each operand may vary. Example: double-precision real matrix C updated by the product of single-precision real matrix A and single-precision real matrix B. 4. **Mixed precision and mixed domain.** This category allows both domains and precision of each matrix operand to vary. Example: double-precision complex matrix C updated by the product of single-precision complex matrix A and single-precision real matrix B. BLIS's implementation of mixed-datatype `gemm` supports all combinations within all four categories. ### Computation precision Because categories 3 and 4 involve mixing precisions, they come with an added parameter: the *computation precision*. This parameter specifies the precision in which the matrix multiplication (product) takes place. This precision can be different than the storage precision of matrices A or B, and/or the storage precision of matrix C. When the computation precision differs from the storage precision of matrix A, it implies that a typecast must occur when BLIS packs matrix A to contiguous storage. Similarly, B may also need to be typecast during packing. When the computation precision differs from the storage precision of C, it means the result of the matrix product A*B must be typecast just before it is accumulated back into matrix C. ### Computation domain In addition to the computation precision, we also track a computation domain. (Together, they form the computation datatype.) However, for now we do not allow the user to explicitly specify the computation domain. Instead, the computation domain is implied by the domains of A, B, and C. The following table enumerates the six cases where there is at least one operand of each domain, along with the corresponding same-domain cases from category 1 for reference. We also list the total number of floating-point operations performed in each case. In the table, an 'R' denotes a real domain matrix operand while a 'C' denotes a matrix in the complex domain. The R's and C's appear in the following format of C += A * B, where A, B, and C are the matrix operands of `gemm`. | Case # | Mixed domain case | Implied computation domain | flops performed | |--------|:-----------------:|:--------------------------:|:---------------:| | 1 | R += R * R | real | 2mnk | | 2 | R += R * C | real | 2mnk | | 3 | R += C * R | real | 2mnk | | 4 | R += C * C | complex | 4mnk | | 5 | C += R * R | real | 2mnk | | 6 | C += R * C | complex | 4mnk | | 7 | C += C * R | complex | 4mnk | | 8 | C += C * C | complex | 8mnk | The computation domain is implied in cases 1 and 8 in the same way that it would be if mixed datatype support were absent entirely. These cases execute 2mnk and 8mnk flops, respectively, as any traditional implementation would. In cases 2 and 3, we assume the computation domain is real because only B or A, respectively, is complex. Thus, in these cases, the imaginary components of the complex matrix are ignored, allowing us to perform only 2mnk flops. In case 5, we take the computation domain to be real because A and B are both real, and thus it makes no sense to compute in the complex domain. This means that we need only update the real components of C, leaving the imaginary components untouched. This also results in 2mnk flops being performed. In case 4, we have complex A and B, allowing us to compute a complex product. However, we can only save the real part of that complex product since the output matrix C is real. Since we cannot update the imaginary component of C (since it is not stored), we avoid computing that half of the update entirely, reducing the flops performed to 4mnk. (Alternatively, one may wish to request real domain computation, in which case the imaginary components of A and B were ignored *prior* to computing the matrix product. This approach would result in only 2mnk flops being performed.) In case 6, we wish for both the real and imaginary parts of B to participate in the multiplication by A, with the result updating the corresponding real and imaginary parts of C. Granted, the imaginary part of A is zero, and this is taken advantage of in the computation to optimize performance, as indicated by the 4mnk flop count. But fundamentally this computation executes in the complex domain because both the real and imaginary parts of C are updated. A similar story can be told about case 7. ## Performing gemm with mixed datatypes In BLIS, performing a mixed-datatype `gemm` operation is easy. However, it will require that the user call `gemm` through BLIS's object API. For a basic series of examples for using the object-based API, please see the example codes in the `examples/oapi` directory of the BLIS source distribution. The first step is to ensure that BLIS is configured with mixed datatype support. Please consult with your current distribution's `configure` script for the current semantics: ``` $ ./configure --help ``` As of this writing, mixed datatype support is enabled by default, and thus no additional options are needed. With mixed datatype support enabled in BLIS, using the functionality is simply a matter of creating and initializing matrices of different precisions and/or domains. ```c dim_t m = 5, n = 4, k = 2; obj_t a, b, c; obj_t* alpha; obj_t* beta; bli_obj_create( BLIS_DOUBLE, m, k, 0, 0, &a ); bli_obj_create( BLIS_FLOAT, k, n, 0, 0, &b ); bli_obj_create( BLIS_SCOMPLEX, m, n, 0, 0, &c ); alpha = &BLIS_ONE; beta = &BLIS_ONE; bli_randm( &a ); bli_randm( &b ); bli_randm( &c ); ``` Then, you specify the computation precision by setting the computation precision property of matrix C. ```c bli_obj_set_comp_prec( BLIS_DOUBLE_PREC, &c ); ``` If you do not explicitly specify the computation precision, it will default to the *storage* precision of C. With the objects created and the computation precision specified, call `bli_gemm()` just as you would if the datatypes were identical: ```c bli_gemm( alpha, &a, &b, beta, &c ); ``` For more examples of using BLIS's object-based API, including methods of initializing an matrix object with arbitrary values, please review the example code found in the `examples/oapi` directory of the BLIS source distribution. ## Running the testsuite for gemm with mixed datatypes The BLIS testsuite has been retrofitted to test all combinations of datatypes for each matrix operand. For more information on enabling mixed-datatype tests for the `gemm` operation, please see the explanations of the relevant options in the [Testsuite](Testsuite.md) documentation. ## Known issues There may be odd behavior in the current implementation of mixed-datatype `gemm` that does not conform to the reader's expectations. Below is a list of issues that BLIS developers are aware of. If any of these issues poses a problem for your application, please contact us by [opening an issue](https://github.com/flame/blis/issues). * **alpha with non-zero imaginary components.** Currently, there are many cases of mixed-datatype `gemm` that do not yet support computing with `alpha` scalars that have non-zero imaginary components--in other words, values of `alpha` that are not in the real domain. (By contrast, non-real values for `beta` are fully supported.) In order to support these use cases, additional code complexity and logic would be required. Thus, we have chosen, for now, to not implement them. If mixed-datatype `gemm` is invoked with a non-real valued `alpha` scalar, a runtime error message will be printed and the linked program will abort. * **Manually specifying the computation domain.** As mentioned in the section discussing the [computation domain](MixedDatatype.md#computation-domain), the computation domain of any case of mixed domain `gemm` is implied by the operands and thus fixed; the user may not specify a different computation domain, even if the mixed-domain case would reasonably allow for computing in either domain. * **Sandboxes should be used with caution.** When building a `gemm` sandbox in BLIS, please consider either (a) disabling mixed datatype support, or (b) consciously **never** running the testsuite with mixed domain or precision computation enabled. Even the reference `ref99` sandbox implementation in BLIS does not support mixing datatypes. If you do choose to enable a sandbox while also keeping mixed datatype support enabled in BLIS, make sure that the mixing of datatypes is disabled in the testsuite's `input.general` file (unless, of course, you decide to implement all mixed datatype cases within your sandbox). This issue is also discussed in the documentation for [Sandboxes](Sandboxes.md#known-issues). ## Conclusion For more information and documentation on BLIS, please visit the [BLIS github page](https://github.com/flame/blis/). If you found a bug or wish to request a feature, please [open an issue](https://github.com/flame/blis/issues). For general discussion or questions, please join and post a message to the [blis-devel mailing list](http://groups.google.com/group/blis-devel). Thanks for your interest in BLIS! blis-0.6.1/docs/Multithreading.md000066400000000000000000000657451360743507500167340ustar00rootroot00000000000000# Contents * **[Contents](Multithreading.md#contents)** * **[Introduction](Multithreading.md#introduction)** * **[Enabling multithreading](Multithreading.md#enabling-multithreading)** * [Choosing OpenMP vs pthreads](Multithreading.md#choosing-openmp-vs-pthreads) * [Specifying thread-to-core affinity](Multithreading.md#specifying-thread-to-core-affinity) * **[Specifying multithreading](Multithreading.md#specifying-multithreading)** * [Globally via environment variables](Multithreading.md#globally-via-environment-variables) * [The automatic way](Multithreading.md#environment-variables-the-automatic-way) * [The manual way](Multithreading.md#environment-variables-the-manual-way) * [Globally at runtime](Multithreading.md#globally-at-runtime) * [The automatic way](Multithreading.md#globally-at-runtime-the-automatic-way) * [The manual way](Multithreading.md#globally-at-runtime-the-manual-way) * [Locally at runtime](Multithreading.md#locally-at-runtime) * [Initializing a rntm_t](Multithreading.md#initializing-a-rntm-t) * [The automatic way](Multithreading.md#locally-at-runtime-the-automatic-way) * [The manual way](Multithreading.md#locally-at-runtime-the-manual-way) * [Using the expert interface](Multithreading.md#locally-at-runtime-using-the-expert-interface) * **[Known issues](Multithreading.md#known-issues)** * **[Conclusion](Multithreading.md#conclusion)** # Introduction Our paper [Anatomy of High-Performance Many-Threaded Matrix Multiplication](https://github.com/flame/blis#citations), presented at IPDPS'14, identified five loops around the microkernel as opportunities for parallelization within level-3 operations such as `gemm`. Within BLIS, we have enabled parallelism for four of those loops, with the fifth planned for future work. This software architecture extends naturally to all level-3 operations except for `trsm`, where its application is necessarily limited to three of the five loops due to inter-iteration dependencies. **IMPORTANT**: Multithreading in BLIS is disabled by default. Furthermore, even when multithreading is enabled, BLIS will default to single-threaded execution at runtime. In order to both *allow* and *invoke* parallelism from within BLIS operations, you must both *enable* multithreading at configure-time and *specify* multithreading at runtime. To summarize: In order to observe multithreaded parallelism within a BLIS operation, you must do *both* of the following: 1. Enable multithreading at configure-time. This is discussed in the [next section](docs/Multithreading.md#enabling-multithreading). 2. Specify multithreading at runtime. This is also dicussed [later on](docs/Multithreading.md#specifying-multithreading). # Enabling multithreading BLIS disables multithreading by default. In order to allow multithreaded parallelism from BLIS, you must first enable multithreading explicitly at configure-time. As of this writing, BLIS optionally supports multithreading via either OpenMP or POSIX threads. To enable multithreading via OpenMP, you must provide the `--enable-threading` option to the `configure` script: ``` $ ./configure --enable-threading=openmp auto ``` In this example, we target the `auto` configuration, which is like asking `configure` to choose the most appropriate configuration based on some detection heuristic (e.g. `cpuid` on x86_64). Similarly, to enable multithreading via POSIX threads (pthreads), specify the threading model as `pthreads` instead of `openmp`: ``` $ ./configure --enable-threading=pthreads auto ``` You can also use the shorthand option for `--enable-threading`, which is `-t`: ``` $ ./configure -t pthreads auto ``` For more complete and up-to-date information on the `--enable-threading` option, simply run `configure` with the `--help` (or `-h`) option: ``` $ ./configure --help ``` ## Choosing OpenMP vs pthreads While we provide the ability to implement multithreading in BLIS in terms of either OpenMP or pthreads, we typically encourage users to opt for OpenMP: ``` $ ./configure -t openmp auto ``` The reason mostly comes down to the fact that most OpenMP implementations (most notably GNU) allow the user to conveniently bind threads to cores via an environment variable(s) set prior to running the application. This is important because when the operating system causes a thread to migrate from one core to another, the thread will typically leave behind the data it was using in the L1 and L2 caches. That data may not be present in the caches of the destination core. Once the thread resumes execution from the new core, it will experience a period of frequent cache misses as the data it was previously using is transmitted once again through the cache hierarchy. If migration happens frequently enough, it can pose a significant (and unnecessary) drag on performance. Note that binding threads to cores is possible in pthreads, but it requires a runtime call to the operating system, such as `sched_setaffinity()`, to convey the thread binding information, and BLIS does not yet implement this behavior for pthreads. ## Specifying thread-to-core affinity The solution to thread migration is setting *processor affinity*. In this context, affinity refers to the tendency for a thread to remain bound to a particular compute core. There are at least two ways to set affinity in OpenMP. The first way offers more control, but requires you to understand a bit about the processor topology and how core IDs are mapped to physical cores, while the second way is simpler but less powerful. Let's start with an example. Suppose I have a two-socket system with a total of eight cores, four cores per socket. By setting `GOMP_CPU_AFFINITY` as follows ``` $ export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7" ``` I am communicating to OpenMP that the first thread to be created should be spawned on core 0, from which it should not migrate. The second thread to be created should be spawned on core 1, from which it should not migrate, and so forth. If socket 0 has cores 0-3 and socket 1 has 4-7, this would result in the first four threads on socket 0 and the second four threads on socket 1. (And if more than eight threads are spawned, the mapping wraps back around, staring from the beginning.) So with `GOMP_CPU_AFFINITY`, you are doing more than just preventing threads from migrating once they are spawned--you are specifying the cores on which they will be spawned in the first place. Another example: Suppose the hardware numbers the cores alternatingly between sockets, such that socket 0 gets even-numbered cores and socket 1 gets odd-numbered cores. In such a scenario, you might want to use `GOMP_CPU_AFFINITY` as follows ``` $ export GOMP_CPU_AFFINITY="0 2 4 6 1 3 5 7" ``` Because the first four entries are `0 2 4 6`, threads 0-3 would be spawned on the first socket, since that is where cores 0, 2, 4, and 6 are located. Similarly, the subsequent `1 3 5 7` would cause threads 4-7 to be spawned on the second socket, since that is where cores 1, 3, 5, and 7 reside. Of course, setting `GOMP_CPU_AFFINITY` in this way implies that BLIS benefits from this kind of grouping of threads--which, generally, it does. As a general rule, you should try to fill up a socket with one thread per core before moving to the next socket. A second method of specifying affinity is via `OMP_PROC_BIND`, which is much simpler to set: ``` $ export OMP_PROC_BIND=close ``` This binds the threads close to the master thread, in contiguous "place" partitions. (There are other valid values aside from `close`.) Places are specified by another variable, `OMP_PLACES`: ``` $ export OMP_PLACES=cores ``` The `cores` value is most appropriate for BLIS since we usually want to ignore hardware threads (symmetric multithreading, or "hyperthreading" on Intel systems) and instead map threads to physical cores. Setting these two variables is often enough. However, it obviously does not offer the level of control that `GOMP_CPU_AFFINITY` does. Sometimes, it takes some experimentation to determine whether a particular mapping is performing as expected. If multithreaded performance on eight cores is only twice what it is observed of single-threaded performance, the affinity mapping may be to blame. But if performance is six or seven times higher than sequential execution, then the mapping you chose is probably working fine. Unfortunately, the topic of thread-to-core affinity is well beyond the scope of this document. (A web search will uncover many [great resources](http://www.nersc.gov/users/software/programming-models/openmp/process-and-thread-affinity/) discussing the use of [GOMP_CPU_AFFINITY](https://gcc.gnu.org/onlinedocs/libgomp/GOMP_005fCPU_005fAFFINITY.html) and [OMP_PROC_BIND](https://gcc.gnu.org/onlinedocs/libgomp/OMP_005fPROC_005fBIND.html#OMP_005fPROC_005fBIND).) It's up to the user to determine an appropriate affinity mapping, and then choose your preferred method of expressing that mapping to the OpenMP implementation. # Specifying multithreading There are three broad methods of specifying multithreading in BLIS: * [Globally via environment variables](Multithreading.md#globally-via-environment-variables) * [Globally at runtime](Multithreading.md#globally-at-runtime) * [Locally at runtime](Multithreading.md#locally-at-runtime) (that is, on a per-call, thread-safe basis) Within these three broad methods there are two specific ways of expressing a request for parallelism. First, the user may express a single number--the total number of threads, or ways of parallelism, to use within a single operation such as `gemm`. We call this the "automatic" way. Alternatively, the user may express the number of ways of parallelism to obtain within *each loop* of the level-3 operation. We call this the "manual" way. The latter way is actually what BLIS eventually needs before it can perform its multithreading; the former is viable only because we have a heuristic of determing a reasonable instance of the latter when given the former. This pattern--automatic or manual--holds regardless of which of the three methods is used. Regardless of which method is employed, and which specific way within each method, after setting the number of threads, the application may call the desired level-3 operation (via either the [typed API](docs/BLISTypedAPI.md) or the [object API](docs/BLISObjectAPI.md)) and the operation will execute in a multithreaded manner. (When calling BLIS via the BLAS API, only the first two (global) methods are available.) **Note**: Please be aware of what happens if you try to specify both the automatic and manual ways, as it could otherwise confuse new users. Here are the important points: * Regardless of which broad method is used, **if multithreading is specified via both the automatic and manual ways, the values set via the manual way will always take precedence.** * Specifying parallelism for even *one* loop counts as specifying the manual way (in which case the ways of parallelism for the remaining loops will be assumed to be 1). * If you have specified multithreading via *both* the automatic and manual ways, BLIS will **not** complain if the values are inconsistent with one another. (For example, you may request 8 total threads be used while also specifing 4 ways of parallelism within each of two matrix multiplication loops, for a total of 16 ways.) Furthermore, you will be able to query these inconsistent values via the runtime API both before and after multithreading executes. * If multithreading is disabled, you **may still** specify multithreading values via either the manual or automatic ways. However, BLIS will silently ignore **all** of these values. A BLIS library that is built with multithreading disabled at configure-time will always run sequentially (from the prespective of a single application thread). ## Globally via environment variables The most common method of specifying multithreading in BLIS is globally via environment variables. With this method, the user sets one or more environment variables in the shell before launching the BLIS-linked executable. Regardless of whether you end up using the automatic or manual way of expressing a request for multithreading, note that the environment variables are read (via `getenv()`) by BLIS **only once**, when the library is initialized. Subsequent to library initialization, the global settings for parallelization may only be changed via the [global runtime API](Multithreading.md#globally-at-runtime). If this constraint is not a problem, then environment variables may work fine for you. Otherwise, please consider [local settings](Multithreading.md#locally-at-runtime). (Local settings may used at any time, regardless of whether global settings were explicitly specified, and local settings always override global settings.) **Note**: Regardless of which way ([automatic](Multithreading.md#environment-variables-the-automatic-way) or [manual](Multithreading.md#environment-variables-the-manual-way)) environment variables are used to specify multithreading, that specification will affect operation of BLIS through **both** the BLAS compatibility layer as well as the native [typed](docs/BLISTypedAPI.md) and [object](docs/BLISObjectAPI.md) APIs that are unique to BLIS. ### Environment variables: the automatic way The automatic way of specifying parallelism entails simply setting the total number of threads you wish BLIS to employ in its parallelization. This total number of threads is captured by the `BLIS_NUM_THREADS` environment variable. You can set this variable prior to executing your BLIS-linked executable: ``` $ export GOMP_CPU_AFFINITY="..." # optional step when using GNU libgomp. $ export BLIS_NUM_THREADS=16 $ ./my_blis_program ``` This causes BLIS to automatically determine a reasonable threading strategy based on what is known about the operation and problem size. If `BLIS_NUM_THREADS` is not set, BLIS will attempt to query the value of `OMP_NUM_THREADS`. If neither variable is set, the default number of threads is 1. **Note**: We *highly* discourage use of the `OMP_NUM_THREADS` environment variable and may remove support for it in the future. If you wish to set parallelism globally via environment variables, please use `BLIS_NUM_THREADS`. ### Environment variables: the manual way The manual way of specifying parallelism involves communicating which loops within the matrix multiplication algorithm to parallelize and the degree of parallelism to be obtained from each of those loops. The below chart describes the five loops used in BLIS's matrix multiplication operations. | Loop around microkernel | Environment variable | Direction | Notes | |:-------------------------|:---------------------|:----------|:------------| | 5th loop | `BLIS_JC_NT` | `n` | | | 4th loop | _N/A_ | `k` | Not enabled | | 3rd loop | `BLIS_IC_NT` | `m` | | | 2nd loop | `BLIS_JR_NT` | `n` | | | 1st loop | `BLIS_IR_NT` | `m` | | **Note**: Parallelization of the 4th loop is not currently enabled because each iteration of the loop updates the same part of the output matrix C. Thus, to safely parallelize it requires either a reduction or mutex locks when updating C. Parallelization in BLIS is hierarchical. So if we parallelize multiple loops, the total number of threads will be the product of the amount of parallelism for each loop. Thus the total number of threads used is the product of all the values: `BLIS_JC_NT * BLIS_IC_NT * BLIS_JR_NT * BLIS_IR_NT`. Note that if you set at least one of these loop-specific variables, any others that are unset will default to 1. In general, the way to choose how to set these environment variables is as follows: The amount of parallelism from the M and N dimensions should be roughly the same. Thus `BLIS_IR_NT * BLIS_IC_NT` should be roughly equal to `BLIS_JR_NT * BLIS_JC_NT`. Next, which combinations of loops to parallelize depends on which caches are shared. Here are some of the more common scenarios: * When compute resources have private L3 caches (example: multi-socket systems), try parallelizing the `JC` loop. This means threads (or thread groups) will pack and compute with different row panels from matrix B. * For compute resources that have private L2 caches but that share an L3 cache (example: cores on a socket), try parallelizing the `IC` loop. In this situation, threads will share the same packed row panel from matrix B, but pack and compute with different blocks of matrix A. * If compute resources share an L2 cache but have private L1 caches (example: pairs of cores), try parallelizing the `JR` loop. Here, threads share the same packed block of matrix A but read different packed micropanels of B into their private L1 caches. In some situations, parallelizing the `IR` loop may also be effective. ![The primary algorithm for level-3 operations in BLIS](http://www.cs.utexas.edu/users/field/mm_algorithm_color.png) ## Globally at runtime If you still wish to set the parallelization scheme globally, but you want to do so at runtime, BLIS provides a thread-safe API for specifying multithreading. Think of these functions as a way to modify the same internal data structure into which the environment variables are read. (Recall that the environment variables are only read once, when BLIS is initialized). **Note**: Regardless of which way ([automatic](Multithreading.md#globally-at-runtime-the-automatic-way) or [manual](Multithreading.md#globally-at-runtime-the-manual-way)) the global runtime API is used to specify multithreading, that specification will affect operation of BLIS through **both** the BLAS compatibility layer as well as the native [typed](docs/BLISTypedAPI.md) and [object](docs/BLISObjectAPI.md) APIs that are unique to BLIS. ### Globally at runtime: the automatic way If you simply want to specify an overall number of threads and let BLIS choose a thread factorization automatically, use the following function: ```c void bli_thread_set_num_threads( dim_t n_threads ); ``` This function takes one integer--the total number of threads for BLIS to utilize in any one operation. So, for example, if we call ```c bli_thread_set_num_threads( 4 ); ``` we are requesting that the global number of threads be set to 4. You may also query the global number of threads at any time via ```c dim_t bli_thread_get_num_threads( void ); ``` Which may be called in the usual way: ```c dim_t nt = bli_thread_get_num_threads(); ``` ### Globally at runtime: the manual way If you want to specify the number of ways of parallelism to obtain for each loop, use the following function: ```c void bli_thread_set_ways( dim_t jc, dim_t pc, dim_t ic, dim_t jr, dim_t ir ); ``` This function takes one integer for each loop in the level-3 operations. (**Note**: even though the function takes a `pc` argument, it will be ignored until parallelism is supported in the `KC` loop.) So, for example, if we call ```c bli_thread_set_ways( 2, 1, 4, 1, 1 ); ``` we are requesting two ways of parallelism in the `JC` loop and 4 ways of parallelism in the `IC` loop. Unlike environment variables, which only allow the user to set the parallelization strategy prior to running the executable, `bli_thread_set_ways()` may be called any time during the normal course of the BLIS-linked application's execution. ## Locally at runtime In addition to the global methods based on environment variables and runtime function calls, BLIS also offers a local, *per-call* method of requesting parallelism at runtime. This method has the benefit of being thread-safe and flexible; your application can spawn two threads at the application level, with each thread requesting different degrees of parallelism from their respective calls to level-3 BLIS operations. As with environment variables and the global runtime API, there are two ways to specify parallelism: the automatic way and the manual way. Both ways involve allocating a BLIS-specific object, initializing the object and encoding the desired parallelization, and then passing a pointer to the object into one of the expert interfaces of either the [typed](docs/BLISTypedAPI.md) or [object](docs/BLISObjectAPI) APIs. We provide examples of utilizing this threading object below. **Note**: Neither way ([automatic](Multithreading.md#locally-at-runtime-the-automatic-way) nor [manual](Multithreading.md#locally-at-runtime-the-manual-way)) of specifying multithreading via the local runtime API can be used via the BLAS interfaces. The local runtime API may *only* be used via the native [typed](docs/BLISTypedAPI.md) and [object](docs/BLISObjectAPI.md) APIs, which are unique to BLIS. (Furthermore, the expert interfaces of each API must be used. This is demonstrated later on in this section.) ### Initializing a rntm_t Before specifying the parallelism (automatically or manually), you must first allocate a special BLIS object called a `rntm_t` (runtime). The object is quite small (about 64 bytes), and so we recommend allocating it statically on the function stack: ```c rntm_t rntm; ``` We **strongly recommend** initializing the `rntm_t`. This can be done in either of two ways. If you want to initialize it as part of the declaration, you may do so via the default `BLIS_RNTM_INITIALIZER` macro: ```c rntm_t rntm = BLIS_RNTM_INITIALIZER; ``` Alternatively, you can perform the same initialization by passing the address of the `rntm_t` to an initialization function: ```c bli_rntm_init( &rntm ); ``` As of this writing, BLIS treats a default-initialized `rntm_t` as a request for single-threaded execution. **Note**: If you choose to **not** initialize the `rntm_t` object, you **must** set its parallelism via either the automatic way or the manual way, described below. Passing a completely uninitialized `rntm_t` to a level-3 operation **will almost surely result in undefined behvaior!** ### Locally at runtime: the automatic way Once your `rntm_t` is initialized, you may request automatic parallelization by encoding only the total number of threads into the `rntm_t` via the following function: ```c void bli_rntm_set_num_threads( dim_t n_threads, rntm_t* rntm ); ``` As with `bli_thread_set_num_threads()` [discussed previously](Multithreading.md#globally-at-runtime-the-automatic-way), this function takes a single integer. It also takes the address of the `rntm_t` to modify. So, for example, if (after declaring and initializing a `rntm_t` as discussed above) we call ```c bli_rntm_set_num_threads( 6, &rntm ); ``` the `rntm_t` object will be encoded to use a total of 6 threads. ### Locally at runtime: the manual way Once your `rntm_t` is initialized, you may manually encode the ways of parallelism for each loop into the `rntm_t` by using the following function: ```c void bli_rntm_set_ways( dim_t jc, dim_t pc, dim_t ic, dim_t jr, dim_t ir, rntm_t* rntm ); ``` As with `bli_thread_set_ways()` [discussed previously](Multithreading.md#globally-at-runtime-the-manual-way), this function takes one integer for each loop in the level-3 operations. It also takes the address of the `rntm_t` to modify. (**Note**: even though the function takes a `pc` argument, it will be ignored until parallelism is supported in the `KC` loop.) So, for example, if we call ```c bli_rntm_set_ways( 1, 1, 2, 3, 1, &rntm ); ``` we are requesting two ways of parallelism in the `IC` loop and three ways of parallelism in the `JR` loop. ### Locally at runtime: using the expert interfaces Regardless of whether you specified parallelism into your `rntm_t` object via the automatic or manual method, eventually you must use the data structure when calling a BLIS operation. Let's assume you wish to call `gemm`. To so do, simply use the expert interface, which takes two additional arguments: a `cntx_t` (context) and a `rntm_t`. For the context, you may simply pass in `NULL` and BLIS will select a default context (which is exactly what happens when you call the basic/non-expert interfaces). Here is an example of such a call: ```c bli_gemm_ex( &alpha, &a, &b, &beta, &c, NULL, &rntm ); ``` This will cause `gemm` to execute and parallelize in the manner encoded by `rntm`. To summarize, using a `rntm_t` involves three steps: ```c // Declare and initialize a rntm_t object. rntm_t rntm = BLIS_RNTM_INITIALIZER; // Call ONE (not both) of the following to encode your parallelization into // the rntm_t. (These are examples only--use numbers that make sense for your // application!) bli_rntm_set_num_threads( 6, &rntm ); bli_rntm_set_ways( 1, 1, 2, 3, 1, &rntm ); // Finally, call BLIS via an expert interface and pass in your rntm_t. bli_gemm_ex( &alpha, &a, &b, &beta, &c, NULL, &rntm ); ``` Note that `rntm_t` objects may be reused over and over again once they are initialized; there is no need to reinitialize them and re-encode their threading values! Also, you may pass in `NULL` for the `rntm_t*` parameter of an expert interface. This causes the current global settings to be used. # Known issues * **Internal transposition and manual parallelism.** BLIS supports both row- and column-stored matrices (and tensor-like general storage). However, typically the `gemm` microkernel prefers to read and write microtiles of matrix C by rows, or by columns. If the storage of the user-provided matrix C does not match that of the microkernel preference, BLIS logically transpose the entire operation so that by the time the microkernel sees matrix C, it will appear to be stored according to its storage preference. If the caller is employing the automatic style of parallelism, whereby only the total number of threads is specified, this transposition happens *before* the the total number of threads is factored into the various loop-specific ways of parallelism and everything works as expected. However, if the caller employs the manual style of parallelism, the transposition must (by definition) happen *after* the thread factorization is done since, in this situation, the caller has taken responsibility for providing that factorization explicitly. This situation could lead to unexpectedly low multithreaded performance. Suppose the user calls `gemm` on a problem with a large m dimension and small k and n dimensions, and explicitly requests parallelism only in the IC loop, but also suppose that the storage of C does not match that of the microkernel's preference. After BLIS transposes the operation internally, the *effective* m dimension will no longer be large; instead, it will be small (because the original m and n dimension will have been swapped). The multithreaded implementation will then proceed to parallelize this small m dimension. There are currently no good *and* easy solutions to this problem. Eventually, though, we plan to add support for two microkernels per datatype per configuration--one for use with matrices C that are row-stored, and one for those that are column-stored. This will obviate the logic within BLIS that sometimes induces the operation transposition, and the problem will go away. # Conclusion Please send us feedback if you have any concerns or questions, or [open an issue](http://github.com/flame/blis/issues) if you observe any reproducible behavior that you think is erroneous. (You are welcome to use the issue feature to start any non-trivial dialogue; we don't restrict them only to bug reports!) Thanks for your interest in BLIS. blis-0.6.1/docs/Performance.md000066400000000000000000000607611360743507500162060ustar00rootroot00000000000000# Contents * **[Contents](Performance.md#contents)** * **[Introduction](Performance.md#introduction)** * **[General information](Performance.md#general-information)** * **[Interpretation](Performance.md#interpretation)** * **[Reproduction](Performance.md#reproduction)** * **[Level-3 performance](Performance.md#level-3-performance)** * **[ThunderX2](Performance.md#thunderx2)** * **[Experiment details](Performance.md#thunderx2-experiment-details)** * **[Results](Performance.md#thunderx2-results)** * **[SkylakeX](Performance.md#skylakex)** * **[Experiment details](Performance.md#skylakex-experiment-details)** * **[Results](Performance.md#skylakex-results)** * **[Haswell](Performance.md#haswell)** * **[Experiment details](Performance.md#haswell-experiment-details)** * **[Results](Performance.md#haswell-results)** * **[Epyc](Performance.md#epyc)** * **[Experiment details](Performance.md#epyc-experiment-details)** * **[Results](Performance.md#epyc-results)** * **[Feedback](Performance.md#feedback)** # Introduction This document showcases performance results for a representative sample of level-3 operations on large matrices with BLIS and BLAS for several hardware architectures. # General information Generally speaking, for level-3 operations on large matrices, we publish three "panels" for each type of hardware, each of which reports one of: single-threaded performance, multithreaded performance on a single socket, or multithreaded performance on two sockets. Each panel will consist of a 4x5 grid of graphs, with each row representing a different datatype (single real, double real, single complex, and double complex) and each column representing a different operation (`gemm`, `hemm`/`symm`, `herk`/`syrk`, `trmm`, and `trsm`). Each of the 20 graphs within a panel will contain an x-axis that reports problem size, with all matrix dimensions equal to the problem size (e.g. _m_ = _n_ = _k_), resulting in square matrices. The y-axis will report in units GFLOPS (billions of floating-point operations per second) in the case of single-threaded performance, or GFLOPS/core in the case of single- or dual-socket multithreaded performance, where GFLOPS/core is simply the total GFLOPS observed divided by the number of threads utilized. This normalization is done intentionally in order to facilitate a visual assessment of the drop in efficiency of multithreaded performance relative to their single-threaded baselines. It's also worth pointing out that the top of each graph (e.g. the maximum y-axis value depicted) _always_ corresponds to the theoretical peak performance under the conditions associated with that graph. Theoretical peak performance, in units of GFLOPS/core, is calculated as the product of: 1. the maximum sustainable clock rate in GHz; and 2. the maximum number of floating-point operations (flops) that can be executed per cycle (per core). Note that the maximum sustainable clock rate may change depending on the conditions. For example, on some systems the maximum clock rate is higher when only one core is active (e.g. single-threaded performance) versus when all cores are active (e.g. multithreaded performance). The maximum number of flops executable per cycle (per core) is generally computed as the product of: 1. the maximum number of fused multiply-add (FMA) vector instructions that can be issued per cycle (per core); 2. the maximum number of elements that can be stored within a single vector register (for the datatype in question); and 3. 2.0, since an FMA instruction fuses two operations (a multiply and an add). The problem size range, represented on the x-axis, is usually sampled with 50 equally-spaced problem size. For example, for single-threaded execution, we might choose to execute with problem sizes of 48 to 2400 in increments of 48, or 56 to 2800 in increments of 56. These values are almost never chosen for any particular (read: sneaky) reason; rather, we start with a "good" maximum problem size, such as 2400 or 2800, and then divide it by 50 to obtain the appropriate starting point and increment. Finally, each point along each curve represents the best of three trials. # Interpretation In general, the the curves associated with higher-performing implementations will appear higher in the graphs than lower-performing implementations. Ideally, an implementation will climb in performance (as a function of problem size) as quickly as possible and asymptotically approach some high fraction of peak performance. Occasionally, we may publish graphs with incomplete curves--for example, only the first 25 data points in a typical 50-point series--usually because the implementation being tested was slow enough that it was not practical to allow it to finish. Where along the x-axis you focus your attention will depend on the segment of the problem size range that you care about most. Some people's applications depend heavily on smaller problems, where "small" can mean anything from 10 to 1000 or even higher. Some people consider 1000 to be quite large, while others insist that 5000 is merely "medium." What each of us considers to be small, medium, or large (naturally) depends heavily on the kinds of dense linear algebra problems we tend to encounter. No one is "right" or "wrong" about their characterization of matrix smallness or bigness since each person's relative frame of reference can vary greatly. That said, the [Science of High-Performance Computing](http://shpc.ices.utexas.edu/) group at [The University of Texas at Austin](https://www.utexas.edu/) tends to target matrices that it classifies as "medium-to-large", and so most of the graphs presented in this document will reflect that targeting in their x-axis range. When corresponding with us, via email or when opening an [issue](https://github.com/flame/blis/issues) on github, we kindly ask that you specify as closely as possible (though a range is fine) your problem size of interest so that we can better assist you. # Reproduction In general, we do not offer any step-by-step guide for how to reproduce the performance graphs shown below. That said, if you are keenly interested in running your own performance benchmarks, either in an attempt to reproduce the results shown here or to measure performance of different hardware, of different implementations (or versions), and/or for different problem sizes, you should begin by studying the source code, `Makefile`, and scripts in the [test/3](https://github.com/flame/blis/tree/master/test/3) directory of the BLIS source distribution. Then, you'll need to take time to build and/or install some (or all) of the implementations shown (e.g. [OpenBLAS](https://github.com/xianyi/OpenBLAS), [MKL](https://software.intel.com/en-us/mkl/), and [Eigen](http://eigen.tuxfamily.org), including BLIS. Be sure to consult the detailed notes provided below; they should be *very* helpful in successfully building the libraries. The `runme.sh` script in `test/3` will help you run some (or all) of the test drivers produced by the `Makefile`, and the Matlab/Octave function `plot_panel_4x5()` defined in the `matlab` directory will help you turn the output of those test drivers into a PDF file of graphs. The `runthese.m` file will contain example invocations of the function. # Level-3 performance ## ThunderX2 ### ThunderX2 experiment details * Location: Unknown * Processor model: Marvell ThunderX2 CN9975 * Core topology: two sockets, 28 cores per socket, 56 cores total * SMT status: disabled at boot-time * Max clock rate: 2.2GHz (single-core and multicore) * Max vector register length: 128 bits (NEON) * Max FMA vector IPC: 2 * Peak performance: * single-core: 17.6 GFLOPS (double-precision), 35.2 GFLOPS (single-precision) * multicore: 17.6 GFLOPS/core (double-precision), 35.2 GFLOPS/core (single-precision) * Operating system: Ubuntu 16.04 (Linux kernel 4.15.0) * Page size: unknown * Compiler: gcc 7.3.0 * Results gathered: 14 February 2019 * Implementations tested: * BLIS 075143df (0.5.1-39) * configured with `./configure -t openmp thunderx2` (single- and multithreaded) * sub-configuration exercised: `thunderx2` * Single-threaded (1 core) execution requested via no change in environment variables * Multithreaded (28 core) execution requested via `export BLIS_JC_NT=4 BLIS_IC_NT=7` * Multithreaded (56 core) execution requested via `export BLIS_JC_NT=8 BLIS_IC_NT=7` * OpenBLAS 52d3f7a * configured `Makefile.rule` with `BINARY=64 NO_CBLAS=1 NO_LAPACK=1 NO_LAPACKE=1 USE_THREAD=0` (single-threaded) * configured `Makefile.rule` with `BINARY=64 NO_CBLAS=1 NO_LAPACK=1 NO_LAPACKE=1 USE_THREAD=1 NUM_THREADS=56` (multithreaded, 56 cores) * Single-threaded (1 core) execution requested via `export OPENBLAS_NUM_THREADS=1` * Multithreaded (28 core) execution requested via `export OPENBLAS_NUM_THREADS=28` * Multithreaded (56 core) execution requested via `export OPENBLAS_NUM_THREADS=56` * ARMPL 18.4 * Single-threaded (1 core) execution requested via `export OMP_NUM_THREADS=1` * Multithreaded (28 core) execution requested via `export OMP_NUM_THREADS=28` * Multithreaded (56 core) execution requested via `export OMP_NUM_THREADS=56` * Affinity: * Thread affinity for BLIS was specified manually via `GOMP_CPU_AFFINITY="0 1 2 3 ... 55"`. However, multithreaded OpenBLAS appears to revert to single-threaded execution if `GOMP_CPU_AFFINITY` is set. Therefore, when measuring OpenBLAS performance, the `GOMP_CPU_AFFINITY` environment variable was unset. * Frequency throttling (via `cpupower`): * No changes made. * Comments: * ARMPL performance is remarkably uneven across datatypes and operations, though it would appear their "base" consists of OpenBLAS, which they then optimize for select, targeted routines. Unfortunately, we were unable to test the absolute latest versions of OpenBLAS and ARMPL on this hardware before we lost access. We will rerun these experiments once we gain access to a similar system. ### ThunderX2 results #### pdf * [ThunderX2 single-threaded](graphs/large/l3_perf_tx2_nt1.pdf) * [ThunderX2 multithreaded (28 cores)](graphs/large/l3_perf_tx2_jc4ic7_nt28.pdf) * [ThunderX2 multithreaded (56 cores)](graphs/large/l3_perf_tx2_jc8ic7_nt56.pdf) #### png (inline) * **ThunderX2 single-threaded** ![single-threaded](graphs/large/l3_perf_tx2_nt1.png) * **ThunderX2 multithreaded (28 cores)** ![multithreaded (28 cores)](graphs/large/l3_perf_tx2_jc4ic7_nt28.png) * **ThunderX2 multithreaded (56 cores)** ![multithreaded (56 cores)](graphs/large/l3_perf_tx2_jc8ic7_nt56.png) --- ## SkylakeX ### SkylakeX experiment details * Location: Oracle cloud * Processor model: Intel Xeon Platinum 8167M (SkylakeX/AVX-512) * Core topology: two sockets, 26 cores per socket, 52 cores total * SMT status: enabled, but not utilized * Max clock rate: 2.0GHz (single-core and multicore) * Max vector register length: 512 bits (AVX-512) * Max FMA vector IPC: 2 * Peak performance: * single-core: 64 GFLOPS (double-precision), 128 GFLOPS (single-precision) * multicore: 64 GFLOPS/core (double-precision), 128 GFLOPS/core (single-precision) * Operating system: Ubuntu 18.04 (Linux kernel 4.15.0) * Page size: 4096 bytes * Compiler: gcc 7.3.0 * Results gathered: 6 March 2019, 27 March 2019 * Implementations tested: * BLIS 9f1dbe5 (0.5.1-54) * configured with `./configure -t openmp auto` (single- and multithreaded) * sub-configuration exercised: `skx` * Single-threaded (1 core) execution requested via no change in environment variables * Multithreaded (26 core) execution requested via `export BLIS_JC_NT=2 BLIS_IC_NT=13` * Multithreaded (52 core) execution requested via `export BLIS_JC_NT=4 BLIS_IC_NT=13` * OpenBLAS 0.3.5 * configured `Makefile.rule` with `BINARY=64 NO_CBLAS=1 NO_LAPACK=1 NO_LAPACKE=1 USE_THREAD=0` (single-threaded) * configured `Makefile.rule` with `BINARY=64 NO_CBLAS=1 NO_LAPACK=1 NO_LAPACKE=1 USE_THREAD=1 NUM_THREADS=52` (multithreaded, 52 cores) * Single-threaded (1 core) execution requested via `export OPENBLAS_NUM_THREADS=1` * Multithreaded (26 core) execution requested via `export OPENBLAS_NUM_THREADS=26` * Multithreaded (52 core) execution requested via `export OPENBLAS_NUM_THREADS=52` * Eigen 3.3.90 * Obtained via the [Eigen git mirror](https://github.com/eigenteam/eigen-git-mirror) (March 27, 2019) * Prior to compilation, modified top-level `CMakeLists.txt` to ensure that `-march=native` was added to `CXX_FLAGS` variable (h/t Sameer Agarwal): ``` # These lines added after line 67. check_cxx_compiler_flag("-march=native" COMPILER_SUPPORTS_MARCH_NATIVE) if(COMPILER_SUPPORTS_MARCH_NATIVE) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native") endif() ``` * configured and built BLAS library via `mkdir build; cd build; cmake ..; make blas` * The `gemm` implementation was pulled in at compile-time via Eigen headers; other operations were linked to Eigen's BLAS library. * Single-threaded (1 core) execution requested via `export OMP_NUM_THREADS=1` * Multithreaded (26 core) execution requested via `export OMP_NUM_THREADS=26` * Multithreaded (52 core) execution requested via `export OMP_NUM_THREADS=52` * **NOTE**: This version of Eigen does not provide multithreaded implementations of `symm`/`hemm`, `syrk`/`herk`, `trmm`, or `trsm`, and therefore those curves are omitted from the multithreaded graphs. * MKL 2019 update 1 * Single-threaded (1 core) execution requested via `export MKL_NUM_THREADS=1` * Multithreaded (26 core) execution requested via `export MKL_NUM_THREADS=26` * Multithreaded (52 core) execution requested via `export MKL_NUM_THREADS=52` * Affinity: * Thread affinity for BLIS was specified manually via `GOMP_CPU_AFFINITY="0 1 2 3 ... 51"`. However, multithreaded OpenBLAS appears to revert to single-threaded execution if `GOMP_CPU_AFFINITY` is set. Therefore, when measuring OpenBLAS performance, the `GOMP_CPU_AFFINITY` environment variable was unset. * Frequency throttling (via `cpupower`): * Driver: acpi-cpufreq * Governor: performance * Hardware limits: 1.0GHz - 2.0GHz * Adjusted minimum: 2.0GHz * Comments: * MKL yields superb performance for most operations, though BLIS is not far behind except for `trsm`. (We understand the `trsm` underperformance and hope to address it in the future.) OpenBLAS lags far behind MKL and BLIS due to lack of full support for AVX-512, and possibly other reasons related to software architecture and register/cache blocksizes. ### SkylakeX results #### pdf * [SkylakeX single-threaded](graphs/large/l3_perf_skx_nt1.pdf) * [SkylakeX multithreaded (26 cores)](graphs/large/l3_perf_skx_jc2ic13_nt26.pdf) * [SkylakeX multithreaded (52 cores)](graphs/large/l3_perf_skx_jc4ic13_nt52.pdf) #### png (inline) * **SkylakeX single-threaded** ![single-threaded](graphs/large/l3_perf_skx_nt1.png) * **SkylakeX multithreaded (26 cores)** ![multithreaded (26 cores)](graphs/large/l3_perf_skx_jc2ic13_nt26.png) * **SkylakeX multithreaded (52 cores)** ![multithreaded (52 cores)](graphs/large/l3_perf_skx_jc4ic13_nt52.png) --- ## Haswell ### Haswell experiment details * Location: TACC (Lonestar5) * Processor model: Intel Xeon E5-2690 v3 (Haswell) * Core topology: two sockets, 12 cores per socket, 24 cores total * SMT status: enabled, but not utilized * Max clock rate: 3.5GHz (single-core), 3.1GHz (multicore) * Max vector register length: 256 bits (AVX2) * Max FMA vector IPC: 2 * Peak performance: * single-core: 56 GFLOPS (double-precision), 112 GFLOPS (single-precision) * multicore: 49.6 GFLOPS/core (double-precision), 99.2 GFLOPS/core (single-precision) * Operating system: Cray Linux Environment 6 (Linux kernel 4.4.103) * Page size: 4096 bytes * Compiler: gcc 6.3.0 * Results gathered: 25-26 February 2019, 27 March 2019 * Implementations tested: * BLIS 075143df (0.5.1-39) * configured with `./configure -t openmp auto` (single- and multithreaded) * sub-configuration exercised: `haswell` * Single-threaded (1 core) execution requested via no change in environment variables * Multithreaded (12 core) execution requested via `export BLIS_JC_NT=2 BLIS_IC_NT=3 BLIS_JR_NT=2` * Multithreaded (24 core) execution requested via `export BLIS_JC_NT=4 BLIS_IC_NT=3 BLIS_JR_NT=2` * OpenBLAS 0.3.5 * configured `Makefile.rule` with `BINARY=64 NO_CBLAS=1 NO_LAPACK=1 NO_LAPACKE=1 USE_THREAD=0` (single-threaded) * configured `Makefile.rule` with `BINARY=64 NO_CBLAS=1 NO_LAPACK=1 NO_LAPACKE=1 USE_THREAD=1 NUM_THREADS=24` (multithreaded, 24 cores) * Single-threaded (1 core) execution requested via `export OPENBLAS_NUM_THREADS=1` * Multithreaded (12 core) execution requested via `export OPENBLAS_NUM_THREADS=12` * Multithreaded (24 core) execution requested via `export OPENBLAS_NUM_THREADS=24` * Eigen 3.3.90 * Obtained via the [Eigen git mirror](https://github.com/eigenteam/eigen-git-mirror) (March 27, 2019) * Prior to compilation, modified top-level `CMakeLists.txt` to ensure that `-march=native` was added to `CXX_FLAGS` variable (h/t Sameer Agarwal): ``` # These lines added after line 67. check_cxx_compiler_flag("-march=native" COMPILER_SUPPORTS_MARCH_NATIVE) if(COMPILER_SUPPORTS_MARCH_NATIVE) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native") endif() ``` * configured and built BLAS library via `mkdir build; cd build; cmake ..; make blas` * The `gemm` implementation was pulled in at compile-time via Eigen headers; other operations were linked to Eigen's BLAS library. * Single-threaded (1 core) execution requested via `export OMP_NUM_THREADS=1` * Multithreaded (12 core) execution requested via `export OMP_NUM_THREADS=12` * Multithreaded (24 core) execution requested via `export OMP_NUM_THREADS=24` * **NOTE**: This version of Eigen does not provide multithreaded implementations of `symm`/`hemm`, `syrk`/`herk`, `trmm`, or `trsm`, and therefore those curves are omitted from the multithreaded graphs. * MKL 2018 update 2 * Single-threaded (1 core) execution requested via `export MKL_NUM_THREADS=1` * Multithreaded (12 core) execution requested via `export MKL_NUM_THREADS=12` * Multithreaded (24 core) execution requested via `export MKL_NUM_THREADS=24` * Affinity: * Thread affinity for BLIS was specified manually via `GOMP_CPU_AFFINITY="0 1 2 3 ... 23"`. However, multithreaded OpenBLAS appears to revert to single-threaded execution if `GOMP_CPU_AFFINITY` is set. Therefore, when measuring OpenBLAS performance, the `GOMP_CPU_AFFINITY` environment variable was unset. * Frequency throttling (via `cpupower`): * No changes made. * Comments: * We were pleasantly surprised by how competitive BLIS performs relative to MKL on this multicore Haswell system, which is a _very_ common microarchitecture, and _very_ similar to the more recent Broadwells, Skylakes (desktop), Kaby Lakes, and Coffee Lakes that succeeded it. ### Haswell results #### pdf * [Haswell single-threaded](graphs/large/l3_perf_has_nt1.pdf) * [Haswell multithreaded (12 cores)](graphs/large/l3_perf_has_jc2ic3jr2_nt12.pdf) * [Haswell multithreaded (24 cores)](graphs/large/l3_perf_has_jc4ic3jr2_nt24.pdf) #### png (inline) * **Haswell single-threaded** ![single-threaded](graphs/large/l3_perf_has_nt1.png) * **Haswell multithreaded (12 cores)** ![multithreaded (12 cores)](graphs/large/l3_perf_has_jc2ic3jr2_nt12.png) * **Haswell multithreaded (24 cores)** ![multithreaded (24 cores)](graphs/large/l3_perf_has_jc4ic3jr2_nt24.png) --- ## Epyc ### Epyc experiment details * Location: Oracle cloud * Processor model: AMD Epyc 7551 (Zen1) * Core topology: two sockets, 4 dies per socket, 2 core complexes (CCX) per die, 4 cores per CCX, 64 cores total * SMT status: enabled, but not utilized * Max clock rate: 3.0GHz (single-core), 2.55GHz (multicore) * Max vector register length: 256 bits (AVX2) * Max FMA vector IPC: 1 * Alternatively, FMA vector IPC is 2 when vectors are limited to 128 bits each. * Peak performance: * single-core: 24 GFLOPS (double-precision), 48 GFLOPS (single-precision) * multicore: 20.4 GFLOPS/core (double-precision), 40.8 GFLOPS/core (single-precision) * Operating system: Ubuntu 18.04 (Linux kernel 4.15.0) * Page size: 4096 bytes * Compiler: gcc 7.3.0 * Results gathered: 6 March 2019, 19 March 2019, 27 March 2019 * Implementations tested: * BLIS 9f1dbe5 (0.5.1-54) * configured with `./configure -t openmp auto` (single- and multithreaded) * sub-configuration exercised: `zen` * Single-threaded (1 core) execution requested via no change in environment variables * Multithreaded (32 core) execution requested via `export BLIS_JC_NT=1 BLIS_IC_NT=8 BLIS_JR_NT=4` * Multithreaded (64 core) execution requested via `export BLIS_JC_NT=2 BLIS_IC_NT=8 BLIS_JR_NT=4` * OpenBLAS 0.3.5 * configured `Makefile.rule` with `BINARY=64 NO_CBLAS=1 NO_LAPACK=1 NO_LAPACKE=1 USE_THREAD=0` (single-threaded) * configured `Makefile.rule` with `BINARY=64 NO_CBLAS=1 NO_LAPACK=1 NO_LAPACKE=1 USE_THREAD=1 NUM_THREADS=64` (multithreaded, 64 cores) * Single-threaded (1 core) execution requested via `export OPENBLAS_NUM_THREADS=1` * Multithreaded (32 core) execution requested via `export OPENBLAS_NUM_THREADS=32` * Multithreaded (64 core) execution requested via `export OPENBLAS_NUM_THREADS=64` * Eigen 3.3.90 * Obtained via the [Eigen git mirror](https://github.com/eigenteam/eigen-git-mirror) (March 27, 2019) * Prior to compilation, modified top-level `CMakeLists.txt` to ensure that `-march=native` was added to `CXX_FLAGS` variable (h/t Sameer Agarwal): ``` # These lines added after line 67. check_cxx_compiler_flag("-march=native" COMPILER_SUPPORTS_MARCH_NATIVE) if(COMPILER_SUPPORTS_MARCH_NATIVE) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native") endif() ``` * configured and built BLAS library via `mkdir build; cd build; cmake ..; make blas` * The `gemm` implementation was pulled in at compile-time via Eigen headers; other operations were linked to Eigen's BLAS library. * Single-threaded (1 core) execution requested via `export OMP_NUM_THREADS=1` * Multithreaded (32 core) execution requested via `export OMP_NUM_THREADS=32` * Multithreaded (64 core) execution requested via `export OMP_NUM_THREADS=64` * **NOTE**: This version of Eigen does not provide multithreaded implementations of `symm`/`hemm`, `syrk`/`herk`, `trmm`, or `trsm`, and therefore those curves are omitted from the multithreaded graphs. * MKL 2019 update 1 * Single-threaded (1 core) execution requested via `export MKL_NUM_THREADS=1` * Multithreaded (32 core) execution requested via `export MKL_NUM_THREADS=32` * Multithreaded (64 core) execution requested via `export MKL_NUM_THREADS=64` * Affinity: * Thread affinity for BLIS was specified manually via `GOMP_CPU_AFFINITY="0 1 2 3 ... 63"`. However, multithreaded OpenBLAS appears to revert to single-threaded execution if `GOMP_CPU_AFFINITY` is set. Therefore, when measuring OpenBLAS performance, the `GOMP_CPU_AFFINITY` environment variable was unset. * Frequency throttling (via `cpupower`): * Driver: acpi-cpufreq * Governor: performance * Hardware limits: 1.2GHz - 2.0GHz * Adjusted minimum: 2.0GHz * Comments: * MKL performance is dismal, despite being linked in the same manner as on the Xeon Platinum. It's not clear what is causing the slowdown. It could be that MKL's runtime kernel/blocksize selection logic is falling back to some older, more basic implementation because CPUID is not returning Intel as the hardware vendor. Alternatively, it's possible that MKL is trying to use kernels for the closest Intel architectures--say, Haswell/Broadwell--but its implementations use Haswell-specific optimizations that, due to microarchitectural differences, degrade performance on Zen. ### Epyc results #### pdf * [Epyc single-threaded](graphs/large/l3_perf_epyc_nt1.pdf) * [Epyc multithreaded (32 cores)](graphs/large/l3_perf_epyc_jc1ic8jr4_nt32.pdf) * [Epyc multithreaded (64 cores)](graphs/large/l3_perf_epyc_jc2ic8jr4_nt64.pdf) #### png (inline) * **Epyc single-threaded** ![single-threaded](graphs/large/l3_perf_epyc_nt1.png) * **Epyc multithreaded (32 cores)** ![multithreaded (32 cores)](graphs/large/l3_perf_epyc_jc1ic8jr4_nt32.png) * **Epyc multithreaded (64 cores)** ![multithreaded (64 cores)](graphs/large/l3_perf_epyc_jc2ic8jr4_nt64.png) --- # Feedback Please let us know what you think of these performance results! Similarly, if you have any questions or concerns, or are interested in reproducing these performance experiments on your own hardware, we invite you to [open an issue](https://github.com/flame/blis/issues) and start a conversation with BLIS developers. Thanks for your interest in BLIS! blis-0.6.1/docs/PerformanceSmall.md000066400000000000000000000404611360743507500171720ustar00rootroot00000000000000# Contents * **[Contents](PerformanceSmall.md#contents)** * **[Introduction](PerformanceSmall.md#introduction)** * **[General information](PerformanceSmall.md#general-information)** * **[Interpretation](PerformanceSmall.md#interpretation)** * **[Reproduction](PerformanceSmall.md#reproduction)** * **[Level-3 performance](PerformanceSmall.md#level-3-performance)** * **[Kaby Lake](PerformanceSmall.md#kaby-lake)** * **[Experiment details](PerformanceSmall.md#kaby-lake-experiment-details)** * **[Results](PerformanceSmall.md#kaby-lake-results)** * **[Haswell](PerformanceSmall.md#haswell)** * **[Experiment details](PerformanceSmall.md#haswell-experiment-details)** * **[Results](PerformanceSmall.md#haswell-results)** * **[Epyc](PerformanceSmall.md#epyc)** * **[Experiment details](PerformanceSmall.md#epyc-experiment-details)** * **[Results](PerformanceSmall.md#epyc-results)** * **[Feedback](PerformanceSmall.md#feedback)** # Introduction This document showcases performance results for the level-3 `gemm` operation on small matrices with BLIS and BLAS for select hardware architectures. # General information Generally speaking, for level-3 operations on small matrices, we publish two "panels" for each type of hardware, one that reflects performance on row-stored matrices and another for column-stored matrices. Each panel will consist of a 4x7 grid of graphs, with each row representing a different transposition case (`nn`, `nt`, `tn`, `tt`) complex) and each column representing a different shape scenario, usually with one or two matrix dimensions bound to a fixed size for all problem sizes tested. Each of the 28 graphs within a panel will contain an x-axis that reports problem size, with one, two, or all three matrix dimensions equal to the problem size (e.g. _m_ = 6; _n_ = _k_, also encoded as `m6npkp`). The y-axis will report in units GFLOPS (billions of floating-point operations per second) on a single core. It's also worth pointing out that the top of each graph (e.g. the maximum y-axis value depicted) _always_ corresponds to the theoretical peak performance under the conditions associated with that graph. Theoretical peak performance, in units of GFLOPS, is calculated as the product of: 1. the maximum sustainable clock rate in GHz; and 2. the maximum number of floating-point operations (flops) that can be executed per cycle. Note that the maximum sustainable clock rate may change depending on the conditions. For example, on some systems the maximum clock rate is higher when only one core is active (e.g. single-threaded performance) versus when all cores are active (e.g. multithreaded performance). The maximum number of flops executable per cycle (per core) is generally computed as the product of: 1. the maximum number of fused multiply-add (FMA) vector instructions that can be issued per cycle (per core); 2. the maximum number of elements that can be stored within a single vector register (for the datatype in question); and 3. 2.0, since an FMA instruction fuses two operations (a multiply and an add). The problem size range, represented on the x-axis, is sampled in increments of 4 up to 800 for the cases where one or two dimensions is small (and constant) and up to 400 in the case where all dimensions (e.g. _m_, _n_, and _k_) are bound to the problem size (i.e., square matrices). Note that the constant small matrix dimensions were chosen to be _very_ small--in the neighborhood of 8--intentionally to showcase what happens when at least one of the matrices is abnormally "skinny." Typically, organizations and individuals only publish performance with square matrices, which can miss the problem sizes of interest to many applications. Here, in addition to square matrices (shown in the seventh column), we also show six other scenarios where one or two `gemm` dimensions (of _m,_ _n_, and _k_) is small. The legend in each graph contains two entries for BLIS, corresponding to the two black lines, one solid and one dotted. The dotted line, **"BLIS conv"**, represents the conventional implementation that targets large matrices. This was the only implementation available in BLIS prior to the addition to the small/skinny matrix support. The solid line, **"BLIS sup"**, makes use of the new small/skinny matrix implementation for certain small problems. Whenever these results differ by any significant amount (beyond noise), it denotes a problem size for which BLIS employed the new small/skinny implementation. Put another way, **the delta between these two lines represents the performance improvement between BLIS's previous status quo and the new regime.** Finally, each point along each curve represents the best of three trials. # Interpretation In general, the the curves associated with higher-performing implementations will appear higher in the graphs than lower-performing implementations. Ideally, an implementation will climb in performance (as a function of problem size) as quickly as possible and asymptotically approach some high fraction of peak performance. When corresponding with us, via email or when opening an [issue](https://github.com/flame/blis/issues) on github, we kindly ask that you specify as closely as possible (though a range is fine) your problem size of interest so that we can better assist you. # Reproduction In general, we do not offer any step-by-step guide for how to reproduce the performance graphs shown below. That said, if you are keenly interested in running your own performance benchmarks, either in an attempt to reproduce the results shown here or to measure performance of different hardware, of different implementations (or versions), and/or for different problem sizes, you should begin by studying the source code, `Makefile`, and scripts in the [test/sup](https://github.com/flame/blis/tree/master/test/sup) directory of the BLIS source distribution. Then, you'll need to take time to build and/or install some (or all) of the implementations shown (e.g. [OpenBLAS](https://github.com/xianyi/OpenBLAS), [MKL](https://software.intel.com/en-us/mkl/), [Eigen](http://eigen.tuxfamily.org), [BLASFEO](https://github.com/giaf/blasfeo), and [libxsmm](https://github.com/hfp/libxsmm)), including BLIS. Be sure to consult the detailed notes provided below; they should be *very* helpful in successfully building the libraries. The `runme.sh` script in `test/sup` will help you run some (or all) of the test drivers produced by the `Makefile`, and the Matlab/Octave function `plot_panel_trxsh()` defined in the `octave` directory will help you turn the output of those test drivers into a PDF file of graphs. The `runthese.m` file will contain example invocations of the function. # Level-3 performance ## Kaby Lake ### Kaby Lake experiment details * Location: undisclosed * Processor model: Intel Core i5-7500 (Kaby Lake) * Core topology: one socket, 4 cores total * SMT status: unavailable * Max clock rate: 3.8GHz (single-core) * Max vector register length: 256 bits (AVX2) * Max FMA vector IPC: 2 * Peak performance: * single-core: 57.6 GFLOPS (double-precision), 115.2 GFLOPS (single-precision) * Operating system: Gentoo Linux (Linux kernel 5.2.4) * Page size: 4096 bytes * Compiler: gcc 8.3.0 * Results gathered: 23-28 August 2019 * Implementations tested: * BLIS 4a0a6e8 (0.6.0-28) * configured with `./configure --enable-cblas auto` * sub-configuration exercised: `haswell` * OpenBLAS 0.3.7 * configured `Makefile.rule` with `BINARY=64 NO_LAPACK=1 NO_LAPACKE=1 USE_THREAD=0` (single-threaded) * BLASFEO 01f6b7f * configured `Makefile.rule` with: `BLAS_API=1 FORTRAN_BLAS_API=1 CBLAS_API=1`. * Eigen 3.3.90 * Obtained via the [Eigen git mirror](https://github.com/eigenteam/eigen-git-mirror) (28 August 2019) * Prior to compilation, modified top-level `CMakeLists.txt` to ensure that `-march=native` was added to `CXX_FLAGS` variable (h/t Sameer Agarwal): ``` # These lines added after line 67. check_cxx_compiler_flag("-march=native" COMPILER_SUPPORTS_MARCH_NATIVE) if(COMPILER_SUPPORTS_MARCH_NATIVE) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native") endif() ``` * configured and built BLAS library via `mkdir build; cd build; CC=gcc cmake ..; make blas` * installed headers via `cmake . -DCMAKE_INSTALL_PREFIX=$HOME/flame/eigen; make install` * The `gemm` implementation was pulled in at compile-time via Eigen headers; other operations were linked to Eigen's BLAS library. * Requested threading via `export OMP_NUM_THREADS=1` (single-threaded) * MKL 2019 update 4 * Requested threading via `export MKL_NUM_THREADS=1` (single-threaded) * libxsmm 77a295c (1.6.5-6679) * compiled with `make AVX=2`; linked with [netlib BLAS](http://www.netlib.org/blas/) 3.6.0 as the fallback library to better show where libxsmm stops handling the computation internally. * Affinity: * N/A. * Frequency throttling (via `cpupower`): * Driver: intel_pstate * Governor: performance * Hardware limits: 800MHz - 3.8GHz * Adjusted minimum: 3.7GHz * Comments: * libxsmm is highly competitive for very small problems, but quickly gives up once the "large" dimension exceeds about 180-240 (or 64 in the case where all operands are square). Also, libxsmm's `gemm` cannot handle a transposition on matrix A and similarly dispatches the fallback implementation for those cases. libxsmm also does not export CBLAS interfaces, and therefore only appears on the graphs for column-stored matrices. ### Kaby Lake results #### pdf * [Kaby Lake row-stored](graphs/sup/dgemm_rrr_kbl_nt1.pdf) * [Kaby Lake column-stored](graphs/sup/dgemm_ccc_kbl_nt1.pdf) #### png (inline) * **Kaby Lake row-stored** ![row-stored](graphs/sup/dgemm_rrr_kbl_nt1.png) * **Kaby Lake column-stored** ![column-stored](graphs/sup/dgemm_ccc_kbl_nt1.png) --- ## Haswell ### Haswell experiment details * Location: TACC (Lonestar5) * Processor model: Intel Xeon E5-2690 v3 (Haswell) * Core topology: two sockets, 12 cores per socket, 24 cores total * SMT status: enabled, but not utilized * Max clock rate: 3.5GHz (single-core), 3.1GHz (multicore) * Max vector register length: 256 bits (AVX2) * Max FMA vector IPC: 2 * Peak performance: * single-core: 56 GFLOPS (double-precision), 112 GFLOPS (single-precision) * Operating system: Cray Linux Environment 6 (Linux kernel 4.4.103) * Page size: 4096 bytes * Compiler: gcc 7.3.0 * Results gathered: 23-28 August 2019 * Implementations tested: * BLIS 4a0a6e8 (0.6.0-28) * configured with `./configure --enable-cblas auto` * sub-configuration exercised: `haswell` * OpenBLAS 0.3.7 * configured `Makefile.rule` with `BINARY=64 NO_LAPACK=1 NO_LAPACKE=1 USE_THREAD=0` (single-threaded) * BLASFEO 01f6b7f * configured `Makefile.rule` with: `BLAS_API=1 FORTRAN_BLAS_API=1 CBLAS_API=1`. * Eigen 3.3.90 * Obtained via the [Eigen git mirror](https://github.com/eigenteam/eigen-git-mirror) (28 August 2019) * Prior to compilation, modified top-level `CMakeLists.txt` to ensure that `-march=native` was added to `CXX_FLAGS` variable (h/t Sameer Agarwal): ``` # These lines added after line 67. check_cxx_compiler_flag("-march=native" COMPILER_SUPPORTS_MARCH_NATIVE) if(COMPILER_SUPPORTS_MARCH_NATIVE) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native") endif() ``` * configured and built BLAS library via `mkdir build; cd build; CC=gcc cmake ..; make blas` * installed headers via `cmake . -DCMAKE_INSTALL_PREFIX=$HOME/flame/eigen; make install` * The `gemm` implementation was pulled in at compile-time via Eigen headers; other operations were linked to Eigen's BLAS library. * Requested threading via `export OMP_NUM_THREADS=1` (single-threaded) * MKL 2019 update 4 * Requested threading via `export MKL_NUM_THREADS=1` (single-threaded) * libxsmm 77a295c (1.6.5-6679) * compiled with `make AVX=2`; linked with [netlib BLAS](http://www.netlib.org/blas/) 3.6.0 as the fallback library to better show where libxsmm stops handling the computation internally. * Affinity: * N/A. * Frequency throttling (via `cpupower`): * No changes made. * Comments: * libxsmm is highly competitive for very small problems, but quickly gives up once the "large" dimension exceeds about 180-240 (or 64 in the case where all operands are square). Also, libxsmm's `gemm` cannot handle a transposition on matrix A and similarly dispatches the fallback implementation for those cases. libxsmm also does not export CBLAS interfaces, and therefore only appears on the graphs for column-stored matrices. ### Haswell results #### pdf * [Haswell row-stored](graphs/sup/dgemm_rrr_has_nt1.pdf) * [Haswell column-stored](graphs/sup/dgemm_ccc_has_nt1.pdf) #### png (inline) * **Haswell row-stored** ![row-stored](graphs/sup/dgemm_rrr_has_nt1.png) * **Haswell column-stored** ![column-stored](graphs/sup/dgemm_ccc_has_nt1.png) --- ## Epyc ### Epyc experiment details * Location: Oracle cloud * Processor model: AMD Epyc 7551 (Zen1) * Core topology: two sockets, 4 dies per socket, 2 core complexes (CCX) per die, 4 cores per CCX, 64 cores total * SMT status: enabled, but not utilized * Max clock rate: 3.0GHz (single-core), 2.55GHz (multicore) * Max vector register length: 256 bits (AVX2) * Max FMA vector IPC: 1 * Alternatively, FMA vector IPC is 2 when vectors are limited to 128 bits each. * Peak performance: * single-core: 24 GFLOPS (double-precision), 48 GFLOPS (single-precision) * Operating system: Ubuntu 18.04 (Linux kernel 4.15.0) * Page size: 4096 bytes * Compiler: gcc 7.4.0 * Results gathered: 23-28 August 2019 * Implementations tested: * BLIS 4a0a6e8 (0.6.0-28) * configured with `./configure --enable-cblas auto` * sub-configuration exercised: `zen` * OpenBLAS 0.3.7 * configured `Makefile.rule` with `BINARY=64 NO_LAPACK=1 NO_LAPACKE=1 USE_THREAD=0` (single-threaded) * BLASFEO 01f6b7f * configured `Makefile.rule` with: `BLAS_API=1 FORTRAN_BLAS_API=1 CBLAS_API=1`. * Eigen 3.3.90 * Obtained via the [Eigen git mirror](https://github.com/eigenteam/eigen-git-mirror) (28 August 2019) * Prior to compilation, modified top-level `CMakeLists.txt` to ensure that `-march=native` was added to `CXX_FLAGS` variable (h/t Sameer Agarwal): ``` # These lines added after line 67. check_cxx_compiler_flag("-march=native" COMPILER_SUPPORTS_MARCH_NATIVE) if(COMPILER_SUPPORTS_MARCH_NATIVE) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native") endif() ``` * configured and built BLAS library via `mkdir build; cd build; CC=gcc cmake ..; make blas` * installed headers via `cmake . -DCMAKE_INSTALL_PREFIX=$HOME/flame/eigen; make install` * The `gemm` implementation was pulled in at compile-time via Eigen headers; other operations were linked to Eigen's BLAS library. * Requested threading via `export OMP_NUM_THREADS=1` (single-threaded) * MKL 2019 update 4 * Requested threading via `export MKL_NUM_THREADS=1` (single-threaded) * libxsmm 77a295c (1.6.5-6679) * compiled with `make AVX=2`; linked with [netlib BLAS](http://www.netlib.org/blas/) 3.6.0 as the fallback library to better show where libxsmm stops handling the computation internally. * Affinity: * N/A. * Frequency throttling (via `cpupower`): * Driver: acpi-cpufreq * Governor: performance * Hardware limits: 1.2GHz - 2.0GHz * Adjusted minimum: 2.0GHz * Comments: * libxsmm is highly competitive for very small problems, but quickly gives up once the "large" dimension exceeds about 180-240 (or 64 in the case where all operands are square). Also, libxsmm's `gemm` cannot handle a transposition on matrix A and similarly dispatches the fallback implementation for those cases. libxsmm also does not export CBLAS interfaces, and therefore only appears on the graphs for column-stored matrices. ### Epyc results #### pdf * [Epyc row-stored](graphs/sup/dgemm_rrr_epyc_nt1.pdf) * [Epyc column-stored](graphs/sup/dgemm_ccc_epyc_nt1.pdf) #### png (inline) * **Epyc row-stored** ![row-stored](graphs/sup/dgemm_rrr_epyc_nt1.png) * **Epyc column-stored** ![column-stored](graphs/sup/dgemm_ccc_epyc_nt1.png) --- # Feedback Please let us know what you think of these performance results! Similarly, if you have any questions or concerns, or are interested in reproducing these performance experiments on your own hardware, we invite you to [open an issue](https://github.com/flame/blis/issues) and start a conversation with BLIS developers. Thanks for your interest in BLIS! blis-0.6.1/docs/ReleaseNotes.md000066400000000000000000001524631360743507500163370ustar00rootroot00000000000000# Release Notes *Note: For some releases, credit for individuals' contributions are shown in parentheses.* ## Contents * [Changes in 0.6.1](ReleaseNotes.md#changes-in-061) * [Changes in 0.6.0](ReleaseNotes.md#changes-in-060) * [Changes in 0.5.2](ReleaseNotes.md#changes-in-052) * [Changes in 0.5.1](ReleaseNotes.md#changes-in-051) * [Changes in 0.5.0](ReleaseNotes.md#changes-in-050) * [Changes in 0.4.1](ReleaseNotes.md#changes-in-041) * [Changes in 0.4.0](ReleaseNotes.md#changes-in-040) * [Changes in 0.3.2](ReleaseNotes.md#changes-in-032) * [Changes in 0.3.1](ReleaseNotes.md#changes-in-031) * [Changes in 0.3.0](ReleaseNotes.md#changes-in-030) * [Changes in 0.2.2](ReleaseNotes.md#changes-in-022) * [Changes in 0.2.1](ReleaseNotes.md#changes-in-021) * [Changes in 0.2.0](ReleaseNotes.md#changes-in-020) * [Changes in 0.1.8](ReleaseNotes.md#changes-in-018) * [Changes in 0.1.7](ReleaseNotes.md#changes-in-017) * [Changes in 0.1.6](ReleaseNotes.md#changes-in-016) * [Changes in 0.1.5](ReleaseNotes.md#changes-in-015) * [Changes in 0.1.4](ReleaseNotes.md#changes-in-014) * [Changes in 0.1.3](ReleaseNotes.md#changes-in-013) * [Changes in 0.1.2](ReleaseNotes.md#changes-in-012) * [Changes in 0.1.1](ReleaseNotes.md#changes-in-011) * [Changes in 0.1.0](ReleaseNotes.md#changes-in-010) * [Changes in 0.0.9](ReleaseNotes.md#changes-in-009) * [Changes in 0.0.8](ReleaseNotes.md#changes-in-008) * [Changes in 0.0.7](ReleaseNotes.md#changes-in-007) * [Changes in 0.0.6](ReleaseNotes.md#changes-in-006) * [Changes in 0.0.5](ReleaseNotes.md#changes-in-005) * [Changes in 0.0.4](ReleaseNotes.md#changes-in-004) * [Changes in 0.0.3](ReleaseNotes.md#changes-in-003) * [Changes in 0.0.2](ReleaseNotes.md#changes-in-002) * [Changes in 0.0.1](ReleaseNotes.md#changes-in-001) ## Changes in 0.6.1 January 14, 2020 Improvements present in 0.6.1: Framework: - Added support for pre-broadcast when packing B. This causes elements of B to be repeated (broadcast) in the packed copy of B so that subsequent vector loads will result in the element already being pre-broadcast into the vector register. - Added support for selective packing to `gemmsup` (controlled via environment variables and/or the `rntm_t` object). (AMD) - Fixed a bug in `sdsdot_sub()` that redundantly added the "alpha" scalar and a separate bug in the order of typecasting intermediate products in `sdsdot_()`. (Simon Lukas Märtens, Devin Matthews) - Fixed an obscure bug in `bli_acquire_mpart_mdim()`/`bli_acquire_mpart_ndim()`. (Minh Quan Ho) - Fixed a subtle and complicated bug that only manifested via the BLAS test drivers in the `generic` subconfiguration, and possibly any other subconfiguration that did not register complex-domain `gemm` ukernels, or registered ONLY real-domain ukernels as row-preferential. (Dave Love) - Always use `sumsqv` to compute `normfv` instead of the "dot product trick" that was previously employed for performance reasons. (Roman Yurchak, Devin Matthews, and Isuru Fernando) - Fixed bug in `thrinfo_t` debugging/printing code. Kernels: - Implemented and registered an optimized `dgemm` microkernel for the `power9` kernel set. (Nicholai Tukanov) - Pacify a `restrict` warning in the `gemmtrsm4m1` reference ukernel. (Dave Love, Devin Matthews) Build system: - Fixed parsing in `vpu_count()` on some SkylakeX workstations. (Dave Love) - Reimplemented `bli_cpuid_query()` for ARM to use `stdio`-based functions instead of `popen()`. (Dave Love) - Use `-march=znver1` for clang on `zen2` subconfig. - Updated `-march` flags for `sandybridge`, `haswell` subconfigurations to use newer syntax (e.g. `haswell` instead of `core-avx2` and `sandybridge` instead of `corei7-avx`. - Correctly use `-qopenmp-simd` for reference kernels when compiling with icc. (Victor Eikjhout) - Added `-march` support for select gcc version ranges where flag syntax changes or new flags are added. The ranges we identify are: versions older than 4.9.0; versions older than 6.1.0 (but newer than 4.9.0); versions older than 9.1.0 (but newer than 6.1.0). - Use `-funsafe-math-optimizations` and `-ffp-contract=fast` for all reference kernels when using gcc or clang. - Updated MC cache blocksizes used by `haswell` subconfig. - Updated NC cache blocksizes used by `zen` subconfig. - Fixed a typo in the context registration of the `cortexa53` subconfiguration in `bli_gks.c`. (Francisco Igual) - Output a more informative error when the user manually targets a subconfiguration that configure places in the configuration blacklist. (Tze Meng Low) - Set execute bits of shared library at install-time. (Adam J. Stewart) - Added missing thread-related symbols for export to shared libraries. (Kyungmin Lee) - Removed (finally) the `attic/windows` directory since we offer Windows DLL support via AppVeyor's build artifacts, and thus that directory was only likely confusing people. Testing: - Fixed latent testsuite microkernel module bug for `power9` subconfig. (Jeff Hammond) - Added `test/1m4m` driver directory for test drivers related to the 1m paper. - Added libxsmm support to `test/sup drivers`. (Robert van de Geijn) - Updated `.travis.yml` and `do_sde.sh` to automatically accept SDE license and download SDE directly from Intel. (Devin Matthews, Jeff Hammond) - Updated standalone test drivers to iterate backwards through the specified problem space. This often helps avoid the situation whereby the CPU doesn't immediately throttle up to its maximum clock frequency, which can produce strange discontinuities (sharply rising "cliffs") in performance graphs. - Pacify an unused variable warning in `blastest/f2c/lread.c`. (Jeff Hammond) - Various other minor fixes/tweaks to test drivers. Documentation: - Added libxsmm results to `docs/PerformanceSmall.md`. - Added BLASFEO results to `docs/PerformanceSmall.md`. - Added the page size and location of the performance drivers to `docs/Performance.md` and `docs/PerformanceSmall.md`. (Dave Love) - Added notes to `docs/Multithreading.md` regarding the nuances of setting multithreading parameters the manual way vs. the automatic way. (Jérémie du Boisberranger) - Added a section on reproduction to `docs/Performance.md` and `docs/PerformanceSmall.md`. (Dave Love) - Documented Eigen `-march=native` hack in `docs/Performance.md` and `docs/PerformanceSmall.md`. (Sameer Agarwal) - Inserted multithreading links and disclaimers to `BuildSystem.md`. (Jeff Diamond) - Fixed typo in description for `bli_?axpy2v()` in `docs/BLISTypedAPI.md`. (Shmuel Levine) - Added "How to Download BLIS" section to `README.md`. (Jeff Diamond) - Various other minor documentation fixes. ## Changes in 0.6.0 June 3, 2019 Improvements present in 0.6.0: Framework: - Implemented small/skinny/unpacked (sup) framework for accelerated level-3 performance when at least one matrix dimension is small (or very small). For now, only `dgemm` is optimized, and this new implementation currently only targets Intel Haswell through Coffee Lake, and AMD Zen-based Ryzen/Epyc. (The existing kernels should extend without significant modification to Zen2-based Ryzen/Epyc once they are available.) Also, multithreaded parallelism is not yet implemented, though application-level threading should be fine. (AMD) - Changed function pointer usages of `void*` to new, typedef'ed type `void_fp`. - Allow compile-time disabling of BLAS prototypes in BLIS, in case the application already has access to prototypes. - In `bli_system.h`, define `_POSIX_C_SOURCE` to `200809L` if the macro is not already defined. This ensures that things such as pthreads are properly defined by an application that has `#include "blis.h"` but omits the definition of `_POSIX_C_SOURCE` from the command-line compiler options. (Christos Psarras) Kernels: - None. Build system: - Updated the way configure and the top-level Makefile handle installation prefixes (`prefix`, `exec_prefix`, `libdir`, `includedir`, `sharedir`) to better conform with GNU conventions. - Improved clang version detection. (Isuru Fernando) - Use pthreads on MinGW and Cygwin. (Isuru Fernando) Testing: - Added Eigen support to test drivers in `test/3`. - Fix inadvertently hidden `xerbla_()` in blastest drivers when building only shared libraries. (Isuru Fernando, M. Zhou) Documentation: - Added `docs/PerformanceSmall.md` to showcase new BLIS small/skinny `dgemm` performance on Kaby Lake and Epyc. - Added Eigen results (3.3.90) to performance graphs showcased in `docs/Performance.md`. - Added BLIS thread factorization info to `docs/Performance.md`. ## Changes in 0.5.2 March 19, 2019 Improvements present in 0.5.2: Framework: - Added support for IC loop parallelism to the `trsm` operation. - Implemented a pool-based small block allocator and a corresponding `configure` option (enabled by default), which minimizes the number of calls to `malloc()` and `free()` for the purposes of allocating small blocks (on the order of 100 bytes). These small blocks are used by internal data structures, and the repeated allocation and freeing of these structures could, perhaps, cause memory fragmentation issues in certain application circumstances. This was never reproduced and observed, however, and remains entirely theoretical. Still, the sba should be no slower, and perhaps a little faster, than repeatedly calling `malloc()` and `free()` for these internal data structures. Also, the sba was designed to be thread-safe. (AMD) - Refined and extended the output enabled by `--enable-mem-tracing`, which allows a developer to follow memory allocation and release performed by BLIS. - Initialize error messages at compile-time rather than at runtime. (Minh Quan Ho) - Fixed a potential situation whereby the multithreading parameters in a `rntm_t` object that is passed into an expert interface is ignored. - Prevent a redefinition of `ftnlen` in the `f2c_types.h` in blastest. (Jeff Diamond) Kernels: - Adjusted the cache blocksizes in the `zen` sub-configuration for `float`, `scomplex`, and `dcomplex` datatypes. The previous values, taken directly from the `haswell` subconfig, were merely meant to be reasonable placeholders until more suitable values were determined, as had already taken place for the `double` datatype. (AMD) - Rewrote reference kernels in terms of simplified indexing annotated by the `#pragma omp simd` directive, which a compiler can use to vectorize certain constant-bounded loops. The `#pragma` is disabled via a preprocessor macro layer if the compiler is found by `configure` to not support `-fopenmp-simd`. (Devin Matthews, Jeff Hammond) Build system: - Added symbol-export annotation macros to all of the function prototypes and global variable declarations for public symbols, and created a new `configure` option, `--export-shared=[public|all]`, that controls which symbols--only those that are meant to be public, or all symbols--are exported to the shared library. (Isuru Fernando) - Standardized to using `-O3` in various subconfigs, and also `-funsafe-math-optimizations` for reference kernels. (Dave Love, Jeff Hammond) - Disabled TBM, XOP, LWP instructions in all AMD subconfigs. (Devin Matthews) - Fixed issues that prevented using BLIS on GNU Hurd. (M. Zhou) - Relaxed python3 requirements to allow python 3.4 or later. Previously, python 3.5 or later was required if python3 was being used. (Dave Love) - Added `thunderx2` sub-configuration. (Devangi Parikh) - Added `power9` sub-configuration. For now, this subconfig only uses reference kernels. (Nicholai Tukanov) - Fixed an issue with `configure` failing on OSes--including certain flavors of BSD--that contain a slash '/' character in the output of `uname -s`. (Isuru Fernando, M. Zhou) Testing: - Renamed `test/3m4m` directory to `test/3`. - Lots of updates and improvements to Makefiles, shell scripts, and matlab scripts in `test/3`. Documentation: - Added a new `docs/Performance.md` document that showcases single-threaded, single-socket, and dual-socket performance results of `single`, `double`, `scomplex`, and `dcomplex` level-3 operations in BLIS, OpenBLAS, and MKL/ARMPL for Haswell, SkylakeX, ThunderX2, and Epyc hardware architectures. (Note: Other implementations such as Eigen and ATLAS may be added to these graphs in the future.) - Updated `README.md` to include new language on external packages. (Dave Love) - Updated `docs/Multithreading.md` to be more explicit about the fact that multithreading is disabled by default at configure-time, and the fact that BLIS will run executed single-threaded at runtime by default if no multithreaded specification is given. (M. Zhou) ## Changes in 0.5.1 December 18, 2018 Improvements present in 0.5.1: Framework: - Added mixed-precision support to the 1m method implementation. - Track internal scalar datatypes in the `obj_t` info bitfield. This allows slightly better handling of scalars during mixed-datatype `gemm` computation. - Fixed a bug that allowed execution of 1m with mixed-precision `gemm`, despite such usage not yet being officially supported. (Devangi Parikh) - Added missing internal calls to `bli_init_once()` in `bli_thread_set_num_threads()` and `bli_thread_set_ways()`. (Ali Emre Gülcü) Kernels: - Redefined `packm` kernels to handle edge cases and zero-filling, and updated their APIs accordingly. This was needed in order to fully support the use of non-default/non-reference packm kernels. (Devin Matthews) Build system: - Disallow explicit requests to use 64-bit integers in the BLAS API while simultaneously using 32-bit integers in the BLIS API. (Jeff Hammond, Devin Matthews) - Fixed an msys2/Windows build failure. (Isuru Fernando, Costas Yamin) - Fixed a MinGW build failure. (Isuru Fernando) - Disabled `arm32`, `arm64` configuration families since we don't yet have logic to choose the correct context at runtime. Testing: - Make sure the testsuite fails for `NaN`, `Inf` in input operands. (Devin Matthews) - Added `hemm` driver to `test/3m4m`. - Minor updates to `test/mixeddt` drivers, matlab scripts. - Added additional matlab plotting scripts to `test/3m4m`. Documentation: - Updated `docs/Multithreading.md` to include discussion of setting affinity via OpenMP. - Updated `docs/Testsuite.md` to include discussion of mixed-datatype settings. - Updated `docs/MixedDatatypes.md` to include a brief section on running the testsuite to exercise mixed-datatype functionality, and other minor updates. - Fixed broken links in `docs/KernelsHowTo.md`. (Richard Goldschmidt) - Spelling fixes in FAQ. (Rhys Ulerich) - Updated 3-clause license comment blocks to refer generically to copyright holders rather than just the original copyright holder, UT-Austin. ## Changes in 0.5.0 October 25, 2018 Improvements present in 0.5.0: Framework: - Implemented support for matrix operands of mixed datatypes (domains and precisions) within the `gemm` operation. - Added configure-time option to use slab or round-robin partitioning within JR and IR loops of most level-3 operations' macrokernels. - Allow parallelism in the JC loop for `trsm_l`, which previously was unnecessarily disabled. (Field Van Zee, Devangi Parikh) - Added Fortran-77/90-compatible APIs for some thread-related functions. (Kay Dewhurst) - Defined a new level-1d operation `shiftd`, which adds a scalar value to every element along an arbitrary diagonal of a matrix. - Patched an issue (#267) that may arise when linking against OpenMP-configured BLIS from which parallelism is requested at runtime and a level-3 operation (e.g. `gemm`) is called from within an OpenMP parallel region of an application where OpenMP nested parallelism is disabled. (Devin Matthews) Kernels: - Imported SkylakeX `dgemm` microkernel from `skx-redux` branch, which contains optimizations (mostly better prefetching on C) over the previous implementation. (Devin Matthews) - Renamed/relocated level-3 `zen` microkernels to the `haswell` kernel set. Please see a recent message to blis-devel for more information on this rename [1]. - BG/Q kernel fixes. (Ye Luo) Build system: - Added support for building Windows DLLs via AppVeyor [2], complete with a built-in implementation of pthreads for Windows, as well as an implementation of the `pthread_barrier_*()` APIs for use on OS X. (Isuru Fernando, Devin Matthews, Mathieu Poumeyrol, Matthew Honnibal) - Defined a `cortexa53` sub-configuration, which is similar to `cortexa57` except that it uses slightly different compiler flags. (Mathieu Poumeyrol) - Added python version checking to `configure` script. - Added a script to automate the regeneration of the symbols list file (now located in `build/libblis-symbols.def`). - Various tweaks in preparation for BLIS's inclusion within Debian. (M. Zhou) - Various fixes and cleanups. Testing: - Added tests for `cortexa15` and `cortexa57` in Travis CI. (Mathieu Poumeyrol) - Added tests for mixed-datatype `gemm` and the simulation of application-level threading (salt) in Travis CI. - Add statistics-collecting `irun.py` script. - Include various threading parameters in the initial comment block of testsuite output. - Various fixes and cleanups. Documentation: - Added `MixedDatatypes.md` documentation for mixed-datatype `gemm`. - Added example code demonstrating use of mixed-datatype `gemm` (object API only). - Added description of `shiftd` to `BLISTypedAPI.md` and `BLISObjectAPI.md`. - Added "Known issues" sections to `Multithreading.md` and `Sandboxes.md`. - Updated `FAQ.md`. - Various other documentation updates. [1] https://groups.google.com/forum/?fromgroups#!topic/blis-devel/pytWRjIzxVY [2] https://ci.appveyor.com/project/shpc/blis/ ## Changes in 0.4.1 August 30, 2018 Improvements present in 0.4.1: Framework: - Improved thread safety by homogenizing all critical sections to unconditionally use pthread mutexes. (AMD) - Fixed `bli_finalize()`, which had become uncallable due to sharing `pthread_once_t` objects between the initialization and finalization steps. This manifested as a rather large memory leak (many megabytes) if/when the application manually finalized BLIS in the middle of its execution. (Devangi Parikh, Field Van Zee) - Fixed a minor memory leak in the global kernel structure. (Devangi Parikh, Field Van Zee) - Replaced extensive use of function "chooser" macros in object API functions with use of a new set of functions using the suffix `_qfp()` ("query function pointer"). These functions can be used to query function pointers for most families of typed functions. - Fixed an obscure integer size bug due to improper use of integer literal constants with `va_arg()`. This oddly manifested as LP64 systems using the general stride output case of microkernels even when the output matrix storage matched that of the microkernel output preference. (Devangi Parikh, Field Van Zee) Kernels: - Fixed compilation of `armv7a` kernels. (Mathieu Poumeyrol) Build system: - Generate makefile fragments within the `obj` directory rather than in `config`, `kernels`, `ref_kernels`, and `frame`. This allows a user to perform an out-of-tree build even if the BLIS source distribution is read-only. (Devin Matthews) - Allow a dependent sub-project such as example code or the testsuite to compile and link against an installation of BLIS rather than implicitly searching for a local (uninstalled) copy. (Victor Eijkhout, Field Van Zee) - Fixed a link error that manifested after building only a shared library (e.g. `--disable-static`) and then trying to build a dependent sub-project such as example code or the testsuite. (Sajid Ali) - Changed `test` make target of top-level `Makefile` to behave more like `check` by printing a color-coded characterization of the test results. - Fixed the `-p` option to `configure`, which had likely been broken since May 7, 2018. The `--prefix` option was unaffected. (Dave Love) - Running `configure` no longer requires a C++ compiler given that a C++ compiler was only ever envisioned for *optional* use in the sandbox. (Devangi Parikh, Field Van Zee) Testing: - Added the ability to "simulate" multiple application-level threads in the testsuite by executing the individual experiments with multiple threads. This should make it easier to test for thread-safety in the future. (AMD) - Removed borderline useless wall clock time from test drivers' output. Documentation: - Updated typed and object API documents to include language on `rntm_t` parameters in the expert interfaces. - Updates to `README.md`, including language on sandboxes. - Added table of make targets to `BuildSystem.md`. - Added missing language to `ConfigurationHowTo.md` on updating the architecture string array in `bli_arch.c`. (Devangi Parikh, Field Van Zee) ## Changes in 0.4.0 July 27, 2018 Framework: - Added support for "sandboxes" for employing alternative `gemm` implementations. A ready-to-use reference C99 sandbox provides developers with a starting point for experimentation. - Separated expert, non-expert typed APIs (levels 1v, 1d, 1f, 1m, 2, and 3, and utility functions). - Defined new `rntm_t` structure and API to provide a uniform way of storing user-level threading information (equivalent of `BLIS_NUM_THREADS` and `BLIS_*_NT` environment variables), and also conveying that information to expert APIs. (Matthew Honnibal, Nathaniel Smith) - Renamed various `obj_t` accessor macros, converted to static functions, and inserted explicit typecasting to facilitate #including blis.h from a C++ application. (Jacob Gorm Hansen) - Cache and reuse `arch_t` architecture query result at runtime. (Devin Matthews) - Implemented object-based functions `bli_projm()`/`_projv()`, which project objects from one domain to another (within the same precision), and `bli_castm()`/`_castv()`, which typecast objects from one datatype to another. - Implemented object-based functions `bli_setrm()`/`_setrv()`, `bli_setim()`/`_setiv()`, which allow the caller to broadcast a scalar to all real elements or all imaginary elements within an object. - Enforce consistent datatypes in most object APIs. - For native execution, initialize a context's virtual microkernel slots to the function pointers of native microkernels. This simplifies query routines and paves the way for more generalized use of virtual microkernels beyond those for induced methods. - Various bugfixes. (Devangi Parikh) Kernels: - Re-expressed x86_64 microkernels in terms of assembly language macros, which support lower- and upper-case, AT&T and Intel syntax. (Devin Matthews) - Various bugfixes. (Robin Christ, Francisco Igual, Devangi Parikh, qnerd) Build system: - Added support for `--libdir`, `--includedir` configure options. (Nico Schlömer) - Adopted Linux-like shared library versioning and enabled building shared libraries by default. - Improved shared library handling on OS X. (Alex Arslan) - Added configure support for preset `CFLAGS`, `LDFLAGS`. (Dave Love) - Improvements to version file handling. - Implemented configure option hack for circumventing small/limited values of `ARG_MAX`. - Reorganized `cc`, `cc_vendor` detection responsibilities from `Makefile` to `configure`. (Alex Arslan) - Cross-compilation fixes. - Preliminary Windows ABI suport using `clang`, appveyor. (Isuru Fernando) - Better support for typical development environment on OpenBSD, FreeBSD. (Alex Arslan) - Bumped shared library `soname` version number to 1.0.0. - Various build system fixes and cleanups. (Mathieu Poumeyrol, Nico Schlömer, Tony Skjellum) Testing: - Rewrote Travis CI testing config file and supporting logic to use Intel's SDE emulator. This allows multiple x86_64 microarchitectures to be tested regardless of what hardware Travis happens to be using at the time. (Devin Matthews) - Added `docs/studies` hardware-specific test driver directory to track individual performance studies. (Devangi Parikh) - Streamlined `testsuite/input.operations` file format. Documentation: - Relocated all wiki documents to a `docs` directory and adjusted all links, and `README.md`, accordingly. - Added a `CONTRIBUTING.md` file to top-level directory. - Added `docs/CodingConventions.md`. - Added `docs/Sandboxes.md`. - Added `docs/BLISObjectAPI.md`. - Renamed and updated `docs/BLISTypedAPI.md`. - Updated `docs/KernelsHowTo.md`. - Updated `docs/BuildSystem.md`. (Stefanos Mavros) - Updated `docs/Multithreading.md`. - Updated indentation in `docs/ConfigurationHowTo.md` for easier reading. - Added example code for the BLIS typed API in `examples/tapi`. - Expanded existing example code for the object API in `examples/oapi`. - Added links to RHEL/Fedora and Debian packages to `README.md`. - Various cleanups. (Tony Skjellum, Dave Love, Nico Schlömer) ## Changes in 0.3.2 April 28, 2018 - Added `setijm`, `getijm` operations for updating and querying individual matrix elements via the object API. - Added `examples/oapi` directory containing a code-based tutorial on using the object-based API in BLIS. - Track separate reference kernel `CFLAGS` for each sub-configuration. - Added support for blacklisting sub-configurations based on the assembler/binutils. - Added 64-bit support to BLAS test drivers. - Various bugfixes. ## Changes in 0.3.1 April 4, 2018 - Enable use of new zen kernels in haswell sub-configuration. - Added row-storage optimizations to zen `dotxf` kernels (now also used by haswell). - Integrated an `f2c`ed version of the BLAS test drivers from netlib LAPACK into BLIS build system (e.g. `make testblas`, `make checkblas`). See the [Testsuite](Testsuite.md) document for more info. Also scheduled these BLAS drivers to execute regularly via Travis CI. - Added a new `make check` target that executes a fast version of the BLIS testsuite as well as the BLAS test drivers (primarily targeting package maintainers). - Allow individual operation overriding in the BLIS testsuite. (This makes it easy to quickly test one or two operations of interest.) - Added build system support for libmemkind. If present, `hbw_malloc()` is used as the default value for `BLIS_MALLOC_POOL` instead of `malloc()`. It can be disabled via `--disable-memkind`. - Tweaks and fixes to BLAS compatibility layer, courtesy of the new BLAS test drivers. - Output the active sub-configuration in testsuite output header. - Allow arbitrary nesting of "umbrella" configuration families in `config_registry`, allowing us to define x86_64 in terms of amd64 and intel64. - Added skx and knl to intel64 (and by proxy, x86_64) configuration families. - Implemented basic support for ARM hardware detection (via `/proc/cpuinfo`). - Various bugfixes. ## Changes in 0.3.0 February 23, 2018 This version contains significant improvements from 0.2.2. Major changes include: - Real and complex domain (s,d,c,z) assembly-based gemm microkernels for AMD's Zen microarchitecture. (AMD, Field Van Zee) - Real domain (s,d) assembly-based `gemmtrsm_l` and `gemmtrsm_u` microkernels for Zen. (AMD, Field Van Zee) - Real domain (s,d) intrinsics-based `amaxv`, `axpyv`, `dotv`, `dotxv`, `scalv`, `axpyf`, and `dotxf` kernels for Zen. (AMD, Field Van Zee) - Generalized the configuration system to allow multi-configuration builds targeting configuration "families". A single sub-configuration is chosen at runtime via some heuristic, such as querying CPUID (e.g. runtime hardware detection). This change was extensive and required a reorganization of the build system, configuration semantics, reference kernels, a new naming scheme for native kernels, and a rewrite of the global kernel structure (gks). Please see the rewritten [Configuration Guide](ConfigurationHowTo.md) for details. - Implemented runtime hardware detection for x86_64 hardware. - Reimplemented configure-time hardware detection in terms of new runtime hardware detection code, which queries for CPU features rather than individual models. - Implemented library self-initialization by rewriting `bli_init()` in terms of `pthread_once()` and inserting invocations to `bli_init()` in key places throughout BLIS. The expectation is that through normal use of any BLIS API (BLAS, typed BLIS, or object-based BLIS), the user no longer needs to explicitly initialize the library, and that `bli_finalize()` should never be called by the user unless he is absolutely sure he no longer needs BLIS functionality. Related to this: global scalar constants (`BLIS_ONE`, `BLIS_ZERO`, etc.) are now statically initialized and thus ready to use immediately. Collectively, these changes provide improved thread safety at the application level. - Compile with and install a single monolithic (flattened) `blis.h` header to (1) speed up compilation and (2) reduce the number of build product files. - Added a sub-API for setting multithreading environment variables at runtime. For a few examples, please see the [Multithreading](Multithreading.md) guide. - Reimplemented OpenMP/pthread barriers in terms of GNU atomic built-ins. - Other small changes and fixes. ## Changes in 0.2.2 May 2, 2017 - Implemented the 1m method for inducing complex matrix multiplication. (Please see ACM TOMS publication ["Implementing high-performance complex matrix multiplication via the 1m method"](https://github.com/flame/blis#citations) for more details.) - Switched to simpler `trsm_r` implementation. - Relaxed constraints that `MC % NR = 0` and `NC % MR = 0`, as this was only needed for the more sophisticated `trsm_r` implementation. - Automatic loop thread assignment. (Devin Matthews) - Updates to `.travis.yml` configuration file. (Devin Matthews) - Updates to non-default haswell microkernels. - Match storage format of the temporary micro-tiles in macrokernels to that of the microkernel storage preference for edge cases. - Added support for Intel's Knight's Landing. (Devin Matthews) - Added more flexible options to specify multithreading via the configure script. (Devin Matthews) - OS X compatibility fixes. (Devin Matthews) - Other small changes and fixes. Also, thanks to Elmar Peise, Krzysztof Drewniak, and Francisco Igual for their contributions in reporting/fixing certain bugs that were addressed in this version. ## Changes in 0.2.1 October 5, 2016 - Implemented distributed `thrinfo_t` structure management. (Ricardo Magana) - Redesigned BLIS's level-3 algorithmic control tree structure. (suggested by Tyler Smith) - Consolidated `gemm`, `herk`, and `trmm` blocked variants into one set of three bidirectional variants. - Integrated a new "memory broker" (`membrk_t`) abstraction in place of the previous memory allocator, which allows one set of pools per broker (or, in other words, per memory space). (Ricardo Magana) - Reorganized multithreading APIs, including more consistent namespace prefixes: `bli_thrinfo_*()`, `bli_thrcomm_*()`, etc. - Added `randnm`, `randnv` operations, which produce random powers of two in a narrow range, and integrated a corresponding option into the testsuite. (suggested by AMD) - Reclassified `amaxv` as a level-1v operation and kernel. - Added complex `gemm` microkernels for haswell, which have register allocations consistent with the existing 6x16 `sgemm` and 6x8 `dgemm` microkernels. - Adjusted existing microkernels to work properly when BLIS is configured to use 32-bit integers. (Devin Matthews) - Relaxed alignment constraints in sandybridge and haswell microkernels. (Devin Matthews) - Define CBLAS API with `f77_int` instead of `int`, which means the BLAS compatibility integer size is inherited by the CBLAS compatibility layer. (Devin Matthews) - Added an alignment switch to the testsuite to globally enable/disable starting address and leading dimension alignment. (suggested by Devin Matthews) - Various enhancements to configure script. (Devin Matthews) - Avoid compiling BLAS/CBLAS compatibility layer when it is disabled via configure. (suggested by Devin Matthews) - Disabled compilation of object-based blocked partitioning code for level-2 operations, as it was already functionally disabled. - Fixes and tweaks to POSIX thread support. (Tyler Smith, Jeff Hammond) - Other small changes and fixes. ## Changes in 0.2.0 April 11, 2016 Most of BLIS 0.2.0's changes are contained within a single commit, 537a1f4 (aka "the big commit"). An executive summary of the most consequential of these changes follows: - BLIS has been retrofitted with a new data structure, known as a "context," affecting virtually every internal API for every computational operation, as well as many supporting, non-computational functions that must access information within the context. - In addition to appearing within these internal APIs, the context--specifically, a pointer to a `cntx_t`--is now present within all user-level datatype-aware APIs, e.g. `bli_zgemm()`, appearing as the last argument. - User-level object APIs, e.g. `bli_gemm()`, were unaffected and continue to be "context-free." However, these APIs were duplicated so that corresponding "context-aware" APIs now also exist, differentiated with an `_ex` suffix (for "expert"). - Contexts are initialized very soon after a computational function is called (if one was not passed in by the caller) and are passed all the way down the function stack, even into the kernels, and thus allow the code at any level to query information about the runtime instantiation of the current operation being executed, such as kernel addresses, microkernel storage preferences, and cache/register blocksizes. - Contexts are thread-friendly. For example, consider the situation where a developer wishes two or more threads to execute simultaneously with somewhat different runtime parameters. Contexts also inherently promote thread-safety, such as in the event that the original source of the information stored in the context changes at run-time (see next two bullets). - BLIS now consolidates virtually all kernel/hardware information in a new "global kernel structure" (gks) API. This new API will allow the caller to initialize a context in a thread-safe manner according to the currently active kernel configuration. For now, the currently active configuration cannot be changed once the library is built. However, in the future, this API will be expanded to allow run-time management of kernels and related parameters. - The most obvious application of this new infrastructure is the run-time detection of hardware (and the implied selection of appropriate kernels). With contexts, kernels may even be "hot swapped" within the gks, and once execution begins on a level-3 operation, the memory allocator will be reinitialized on-the-fly, if necessary, to accommodate the new kernels' blocksizes. If a different application thread is executing with another (previously loaded) kernel, it will finish in a deterministic fashion because its kernel info was loaded into its context before computation began, and also because the blocks it checked out from the memory pools will be unaffected by the newer threads' reinitialization of the allocator. This version contains other changes that were committed prior to 537a1f4: - Inline assembly FMA4 microkernels for AMD bulldozer. (Etienne Sauvage) - A more feature-rich configure script and build system. Certain long-style options are now accepted, including convenient command-line switches for things like enabling debugging symbols. Important definitions were also consolidated into a new makefile fragment, `common.mk`, which can be included by the BLIS build system as well as quasi-independent build systems, such as the BLIS test suite. (Devin Matthews) - Updated and improved armv8 microkernels. (Francisco Igual) - Define `bli_clock()` in terms of `clock_gettime()` intead of `gettimeofday()`, which has been languishing on my to-do list for years, literally. (Devin Matthews) - Minor but extensive modifications to parts of the BLAS compatibility layer to avoid potential namespace conflicts with external user code when `blis.h` is included. (Devin Matthews) - Fixed a missing BLIS integer type definition (`BLIS_BLAS2BLIS_INT_TYPE_SIZE`) when CBLAS was enabled. Thanks to Tony Kelman reporting this bug. - Merged `packm_blk_var2()` into `packm_blk_var1()`. The former's functionality is used by induced methods for complex level-3 operations. (Field Van Zee) - Subtle changes to treatment of row and column strides in `bli_obj.c` that pertain to somewhat unusual use cases, in an effort to support certain situations that arise in the context of tensor computations. (Devin Matthews) - Fixed an unimplemented `beta == 0` case in the penryn (formerly "dunnington") `sgemm` microkernel. (Field Van Zee) - Enhancements to the internal memory allocator in anticipation of the context retrofit. (Field Van Zee) - Implemented so-called "quadratic" matrix partitioning for thread-level parallelism, whereby threads compute thread index ranges to produce partitions of roughly equal area (and thus computation), subject to the (register) blocksize multiple, even when given a structured rectangular subpartition with an arbitrary diagonal offset. Thanks to Devangi Parikh for reporting bugs related to this feature. (Field Van Zee) - Enabled use of Travis CI for automatic testing of github commits and pull requests. (Xianyi Zhang) - New `README.md`, written in github markdown. (Field Van Zee) - Many other minor bug fixes. Special thanks go to Lee Killough for suggesting the use of a "context" data structure in discussions that transpired years ago, during the early planning stages of BLIS, and also for suggesting such a perfectly appropriate name. ## Changes in 0.1.8 July 29, 2015 This release contains only two commits, but they are non-trivial: we now have configuration support for AMD Excavator (Carrizo) and microkernels for Intel Haswell/Broadwell. ## Changes in 0.1.7 June 19, 2015 - Replaced the static memory allocator used to manage internal packing buffers with one that dynamically allocates memory, on-demand, and then recycles the allocated blocks in a software cache, or "pool". This significantly simplifies the memory-related configuration parameter set, and it completely eliminates the need to specify a maximum number of threads. - Implemented default values for all macro constants previously found in `bli_config.h`. The default values are now set in `frame/include/bli_config_macro_defs.h`. Any value #defined in `bli_config.h` will override these defaults. - Initial support for configure-time detection of hardware. By specifying the `auto` configuration at configure-time, the configure script chooses a configuration for you. If an optimized configuration does not exist, the reference implementation serves as a fallback. - Completely reorganized implementations for complex induced methods and added support for new algorithms. - Added optimized microkernels for AMD Piledriver family of hardware. - Several bugfixes to multithreaded execution. - Various other minor tweaks, code reorganizations, and bugfixes. ## Changes in 0.1.6 October 23, 2014 - New complex domain AVX microkernels are now available and used by default by the sandybridge configuration. - Added new high-level 4m and 3m implementations presently known as "4mh" and "3mh". - Cleaned up 4m/3m front-end layering and added routines to enable, disable, and query which implementation will be called for a given level-3 operation. The test suite now prints this information in its pre-test summary. 4m (not 4mh) is still the default when complex microkernels are not present. - Consolidated control tree code and usage so that all level-3 multiplication operations use the same gemm_t structure, leaving only `trsm` to have a custom tree structure and associated code. - Re-implemented micropanel alignment, which was removed in commit c2b2ab6 earlier this year. - Relaxed the long-standing constraint that `KC` be a multiple of `MR and `NR` by allowing the developer to specify target values and then adjusting them up to the next multiple of `MR` or `NR`, as needed by the affected operations (`hemm`, `symm`, `trmm`, trsm`). - Added a new "row preference" flag that the developer can use to signal to the framework that a microkernel prefers to output micro-tiles of C that are row-stored (rather than column-stored). Column storage preference is still the default. - Changed semantics of blocksize extensions to instead be "maximum" blocksizes (and thus emphasizing the "extended" values rather than the difference). - Various other minor tweaks, code reorganizations, and bugfixes. Thanks go to those whose contributions, feedback, and bug reports led to these improvements--in particular, Tony Kelman, Kevin Locke, Devin Matthews, Tyler Smith, and perhaps others whose feedback I've lost track of. ## Changes in 0.1.5 August 4, 2014 - Added a CBLAS compatibility layer, which can be enabled at configure-time via `BLIS_ENABLE_CBLAS` in `bli_config.h`. Enabling the CBLAS layer implicitly forces the BLAS compatibility layer to also be enabled. Once enabled, the application may access CBLAS prototypes via `blis.h` or `cblas.h`. - Fixed a packing bug for cases when `MR` or `NR` (or both) are 1. - Redefined bit field macros in `bli_type_defs.h` with bitshift operator to ease future rearranging, expanding, or adding of info bits. ## Changes in 0.1.4 July 27, 2014 - Added shared library support to build system. - Preliminary parallelization of `trsm` (Tyler Smith). - Added generic `_void()` microkernel wrappers so that users (or developers) can call the microkernel without knowing the implementation/developer-specific function names, which are specified at configure-time. - Added `bli_info_*()` API for querying general information about BLIS, including blocksizes. - Reimplemented initialization/finalization for thread safety. - Fixed a possible `Inf`/`NaN` issue in several level-3 operations when beta is zero. - Minor fixes to BLAS compatibility layer. - Added initial support for Emscripten (Marat Dukhan). ## Changes in 0.1.3 June 23, 2014 This is a relatively minor release. The changes can be summarized as: - Added experimental support for PNaCL (Marat Dukhan). - Fixed aligned memory allocation on Windows (Tony Kelman). - Fixed missing version string in build products when downloading tarballs/zip files (Field Van Zee). Thanks to Victor Eijkhout for pointing out this bug. ## Changes in 0.1.2 June 2, 2014 Tyler has been hard at work developing and refining extensions to BLIS that provide multithreading support (currently via OpenMP, though POSIX threads may be supported in the future). These extensions enable multithreading within all level-3 operations except for `trsm`. We are pleased to announce that these code changes are now part of BLIS. ## Changes in 0.1.1 February 25, 2014 I. I am excited to announce that BLIS now provides high-performance complex domain support to ALL level-3 operations when ONLY the same-precision real domain equivalent gemm microkernel is present and optimized. In other words, BLIS's productivity lever just got twice as strong: optimize the `dgemm` microkernel, and you will get double-precision complex versions of all level-3 operations, for free. Same for `sgemm` microkernel and single-precision complex. II. We also now offer complex domain support based on the 3m method, but this support is ONLY accessible via separate interfaces. This separation is a safety feature, since the 3m method's numerical properties are inherently less robust. Furthermore, we think the 3m method, as implemented, is somewhat performance-limited on systems with L1 caches that have less than 8-way associativity. We plan on writing a paper on (I) and (II), so if you are curious how exactly we accomplish this, please be patient and wait for the paper. :) III. The second, user-oriented change facilitates a much more developer-friendly configuration system. This "change" actually represents a family of smaller changes. What follows is a list of those changes taken from the git log: - We now have standard names for reference kernels (levels-1v, -1f and 3) in the form of macro constants. Examples: `BLIS_SAXPYV_KERNEL_REF` `BLIS_DDOTXF_KERNEL_REF` `BLIS_ZGEMM_UKERNEL_REF` - Developers no longer have to name all datatype instances of a kernel with a common base name; [sdcz] datatype flavors of each kernel or microkernel (level-1v, -1f, or 3) may now be named independently. This means you can now, if you wish, encode the datatype-specific register blocksizes in the name of the microkernel functions. - Any datatype instances of any kernel (1v, 1f, or 3) that is left undefined in `bli_kernel.h` will default to the corresponding reference implementation. For example, if `BLIS_DGEMM_UKERNEL` is left undefined, it will be defined to be `BLIS_DGEMM_UKERNEL_REF`. - Developers no longer need to name level-1v/-1f kernels with multiple datatype chars to match the number of types the kernel WOULD take in a mixed type environment, as in `bli_dddaxpyv_opt()`. Now, one char is sufficient, as in `bli_daxpyv_opt()`. - There is no longer a need to define an obj_t wrapper to go along with your level-1v/-1f kernels. The framework now provides a `_kernel()` function, as in `bli_axpyv_kernel()`, which serves as the `obj_t` wrapper for whatever kernels are specified (or defaulted to) via `bli_kernel.h`. - Developers no longer need to prototype their kernels, and thus no longer need to include any prototyping headers from within `bli_kernel.h`. The framework now generates kernel prototypes, with the proper type signature, based on the kernel names defined (or defaulted to) via `bli_kernel.h`. - If the complex datatype x (of [cz]) implementation of the gemm microkernel is left undefined by `bli_kernel.h`, but its same-precision real domain equivalent IS defined, BLIS will enable the automatic complex domain feature described above in (1a) for the datatype x implementations of all level-3 operations, using only the corresponding real domain gemm microkernel. If the complex gemm microkernel for x IS defined, then all complex level-3 operations will be defined in terms of that microkernel. The net effect of (III) is that your `bli_kernel.h` files can be MUCH simpler and less cluttered. (Extreme example: the reference configuration's `bli_kernel.h` is now completely empty!) I have updated all configurations and kernels that are currently part of BLIS by stripping out unnecessary/outdated definitions and migrating existing definitions to their new names. (If you ever need to reference the complete list of options and macros, please refer to the `bli_kernel.h` inside the template configuration.) Please set aside some time to test and, if necessary, tweak the configurations which you originally developed and submitted. I may have broken some of them. If so, please accept my apologies and contact me for assistance. I will work with you to get them functional again. The changes mentioned in (I), (II), and (III), along with all other changes since 0.1.0, are included BLIS 0.1.1 (fde5f1fd). I know these changes may be a little disruptive to some, but I think that most developers will find the new complex functionality very useful, and the new configuration system much easier to use. ## Changes in 0.1.0 November 9, 2013 - Added `sgemm` microkernel for dunnington. - Added `dgemm` microkernels and configurations for sandybridge, bgq, mic, power7, piledriver, loonson3a, which were used to gather performance data in our second ACM TOMS paper. Many thanks to Francisco Igual, Tyler Smith, Mike Kistler, and Xianyi Zhang for developing, testing, and contributing these kernels. - Migrated to signed integer for `dim_t`, `inc_t` (to facilitate calling BLIS from Fortran). - Added "template" configuration and kernel set for developers to use as a starting point when developing new kernels from scratch. - Improvements to test suite, including section overrides and standalone level-1f/level-3 kernel modules. - Improvements to Windows build system (though it may still not yet be functional out-of-the-box). Thanks to Martin Schatz for his help here. - Removed support for element "duplication" in level-3 macrokernels. - Several bug fixes to BLAS compatibility layer. Thanks to Vladimir Sukharev for his numerous bug reports wrt the LAPACK test suite. - Various other minor bugfixes. ## Changes in 0.0.9 July 18, 2013 - A few algorithmic optimizations and bug fixes to `trmm` and `trsm`. - Parameter checking in the compatibility layer that mimics netlib BLAS. - Default use of `stdint.h` types (`int64_t`, `uint64_t` by default). - Optional (and very much untested) C99 built-in complex type/arithmetic support. Note that `bli_config.h` has changed since 0.0.8. Added configuration macros are: ``` #define BLIS_ENABLE_C99_COMPLEX #define BLIS_ENABLE_BLAS2BLIS_INT64 #define PASTEF770(name) // ... ``` The first macro enables C99 built-in complex types. The second causes a Fortran integer to be defined as an int64_t (rather than `int32_t`). The third is a macro to name-mangle a full routine name for Fortran (ie: add an underscore) and should be obtained from `config/reference/bli_config.h`. ## Changes in 0.0.8 June 12, 2013 This version includes several kernel optimizations and bug fixes. While neither `bli_config.h` nor `bli_kernel.h` has changed formats since 0.0.7, `make_defs.mk` **has** changed, so please update your copy of this file when you git-pull. Specifically, we now define a new `CFLAGS_KERNELS` variable that allows one to use different compiler flags when compiling kernels. It works like this: At compile time, make will use `CFLAGS_KERNELS` to compile any source code that resides in any directory that begins with the name `kernels`. My recommendation is to simply apply this naming convention to the symbolic link to your kernels directory that resides in your configuration directory. Thanks to Tyler for suggesting this change. ## Changes in 0.0.7 April 30, 2013 This version incorporates many small fixes and feature enhancements made during our SC13 collaboration. ## Changes in 0.0.6 April 13, 2013 Several changes regarding memory alignment were made since 0.0.5, including modifications to `bli_config.h`. Also, this update fixes a few bugs. ## Changes in 0.0.5 March 24, 2013 The most obvious change in this version is the migration to the `bli` function (and source code filename) prefix, from the old `bl2` prefix, as well as a rename of the main BLIS header (`blis2.h` -> `blis.h`). The test suite seems to indicate that the change was successful. A few other much more minor changes were made, one pertaining to a renamed constant in the `_config.h` file. ## Changes in 0.0.4 March 15, 2013 The changes included in 0.0.4 mostly relate to the contiguous (static) memory allocator. The previous implementation was intended as a temporary solution that would work for benchmarking purposes, until enough other priorities had been tended to that I could go back and do it right. I began with the assumption that the benefit of packing matrices into contiguous memory is non-negligible and worth the effort. Furthermore, we assume that: - the only portable way to acquire contiguous memory is to reserve a region of static memory and manage it ourselves; - the cache blocksizes used for one level-3 operation will be the same as those used for another level-3 operation, since all of them boil down to some form of matrix-matrix multiplication; - only three types of contiguous memory will ever be needed (for level-3 operations): a block of matrix A, a panel of matrix B, or a panel of matrix C--and the last case is not commonly used; - when a block or panel is to be acquired from the allocator, the caller knows which of the three types of memory is needed. Given these assumptions, I was able to come up with an implementation that is simple, easy to understand, and thread-safe (provided you add OpenMP directives to protect the critical sections, which are clearly marked with comments). It can also both allocate and release in O(1) time. And of course, page-alignment is taken care of behind the scenes. So while it is not a generalized solution by any means, I think it will work very well for our purposes. Also, note that based on the level of the overall matrix multiplication algorithm at which you parallelize, the minimum number of blocks/panels of each type of contiguous memory will vary. For example, if you want all of your threads to work on different iterations of a single rank-k update (via block-panel multiply), the threads share the packed panel of B, but each one needs memory to hold its own packed block of A. Thus, the memory allocator needs to be initialized so that it contains enough memory for at least one panel of B and at least t blocks of A, where t is the number of threads. All of this can be adjusted at configure-time in `bl2_config.h`. ## Changes in 0.0.3 February 22, 2013 The biggest change in this version is that the BLAS-to-BLIS compatibility layer is now available. Virtually every BLAS interface is included, even those corresponding to functionality that BLIS does not implement (such as banded and packed level-2 operations). If the application code attempts to call one of these unimplemented routines, the code aborts with a generic not-yet-implemented error message. The compatibility layer is enabled via a configuration option in `bl2_config.h`. For now, it is enabled by default (provided you have an up-to-date copy of `bl2_config.h`). ## Changes in 0.0.2 February 11, 2013 Most notably, this version contains the new test suite I've been working on for the last month. What is the test suite? It is a highly configurable test driver that allows one to test an arbitrary set of BLIS operations, with an arbitrary set of parameter combinations, and matrix/vector storage formats, as well as whichever datatypes you are interested in. (For now, only homogeneous datatyping is supported, which is what most people want.) You can also specify an arbitrary problem size range with arbitrary increments, and arbitrary ratios between dimensions (or anchor a dimension to a single value), and you can output directly to files which store the output in matlab syntax, which makes it easy to generate performance graphs. BLIS developers: note that 0.0.2 makes small changes to the configuration files. This new version also contains many bug fixes. (Most of these fixes address bugs which were found using the test suite.) ## Changes in 0.0.1 December 10, 2012 - Added auto-detection of string version (via `git`). - Wrote basic INSTALL, CHANGELOG, AUTHORS, and CREDITS files. - Updates to standalone `test` directory `Makefile`. - Added initial build system - Various code reorganizations. blis-0.6.1/docs/Sandboxes.md000066400000000000000000000246111360743507500156650ustar00rootroot00000000000000## Contents * **[Introduction](Sandboxes.md#introduction)** * **[Enabling a sandbox](Sandboxes.md#enabling-a-sandbox)** * **[Sandbox rules](Sandboxes.md#sandbox-rules)** * **[Caveats](Sandboxes.md#caveats)** * **[Known issues](Sandboxes.md#known-issues)** * **[Conclusion](Sandboxes.md#conclusion)** ## Introduction This file briefly describes the requirements for building a custom BLIS *sandbox*. Simply put, a sandbox in BLIS provides an alternative implementation to the `gemm` operation. To get a little more specific, a sandbox provides an alternative implementation to the function `bli_gemmnat()`, which is the object-based API call for computing the `gemm` operation via native execution. **Note**: Native execution simply means that an induced method will not be used. It's what you probably already think of when you think of implementing the `gemm` operation: a series of loops around an optimized (usually assembly-based) microkernel with some packing functions thrown in at various levels. Why sandboxes? Sometimes you want to experiment with tweaks or changes to the `gemm` operation, but you want to do so in a simple environment rather than the highly macroized and refactored (and somewhat obfuscated) code of the core framework. By building a BLIS sandbox, you can experiment (within limits) and still benefit from BLIS's existing build system, testsuite, and toolbox of utility functions. ## Enabling a sandbox To enable a sandbox at configure-time, you simply specify it as an option to `configure`. Either of the following usages are accepted: ``` $ ./configure --enable-sandbox=ref99 auto $ ./configure -s ref99 auto ``` Here, we tell `configure` that we want to use the `ref99` sandbox, which corresponds to a sub-directory of `sandbox` named `ref99`. (Reminder: the `auto` argument is the configuration target and thus unrelated to sandboxes.) As `configure` runs, you should get output that includes lines similar to: ``` configure: configuring for alternate gemm implementation: configure: sandbox/ref99 ``` And when you build BLIS, the last files to be compiled will be the source code in the specified sandbox: ``` Compiling obj/haswell/sandbox/ref99/blx_gemm_front.o ('haswell' CFLAGS for sandboxes) Compiling obj/haswell/sandbox/ref99/blx_gemm_int.o ('haswell' CFLAGS for sandboxes) Compiling obj/haswell/sandbox/ref99/base/blx_blksz.o ('haswell' CFLAGS for sandboxes) Compiling obj/haswell/sandbox/ref99/cntl/blx_gemm_cntl.o ('haswell' CFLAGS for sandboxes) ... ``` That's it! After the BLIS library is built, it will contain your chosen sandbox's implementation of `bli_gemmnat()` instead of the default implementation. ## Sandbox rules Like any civilized sandbox, there are rules for playing here. Please follow these guidelines for the best sandbox developer experience. 1. Don't bother worrying about makefiles. We've already taken care of the boring/annoying/headache-inducing build system stuff for you. :) By configuring BLIS with a sandbox enabled, `make` will scan your sandbox directory and compile all of its source code using similar compilation rules as were used for the rest of the framework. In addition, the compilation command line will automatically contain one `-I` option for every subdirectory in your sandbox, so it doesn't matter where in your sandbox you place your header files. They will be found! 2. Your sandbox must be written in C99 or C++11. If you write your sandbox in C++11, you must use one of the BLIS-approved file extensions for your source files (`.cc`, `.cpp`, `.cxx`) and your header files (`.hh`, `.hpp`, `.hxx`). Note that `blis.h` already contains all of its definitions inside of an `extern "C"` block, so you should be able to `#include "blis.h"` from your C++11 source code without any issues. 3. All of your code to replace BLIS's default implementation of `bli_gemmnat()` should reside in the named sandbox directory, or some directory therein. (Obviously.) For example, the "reference" sandbox is located in `sandbox/ref99`. All of the code associated with this sandbox will be contained within `sandbox/ref99`. 4. The *only* header file that is required of your sandbox is `bli_sandbox.h`. It must be named `bli_sandbox.h` because `blis.h` will `#include` this file when the sandbox is enabled at configure-time. That said, you will probably want to keep the file empty. Why require a file that is supposed to be empty? Well, it doesn't *have* to be empty. Anything placed in this file will be folded into the flattened (monolithic) `blis.h` at compile-time. Therefore, you should only place things (e.g. prototypes or type definitions) in `bli_sandbox.h` if those things would be needed at compile-time by: (a) the BLIS framework itself, or (b) an *application* that calls your sandbox-enabled BLIS library. Usually, neither of these situations will require any of your local definitions since those local definitions are only needed to define your sandbox implementation of `bli_gemmnat()`, and this function is already prototyped by BLIS. 5. Your definition of `bli_gemmnat()` should be the **only function you define** in your sandbox that begins with `bli_`. If you define other functions that begin with `bli_`, you risk a namespace collision with existing framework functions. To guarantee safety, please prefix your locally-defined sandbox functions with another prefix. Here, in the `ref99` sandbox, we use the prefix `blx_`. (The `x` is for sandbox. Or experimental.) Also, please avoid the prefix `bla_` since that prefix is also used in BLIS for BLAS compatibility functions. If you follow these rules, you will be much more likely to have a pleasant experience integrating your BLIS sandbox into the larger framework. ## Caveats Notice that the BLIS sandbox is not all-powerful. You are more-or-less stuck working with the existing BLIS infrastructure. For example, with a BLIS sandbox you **can** do the following kinds of things: - use a different `gemm` algorithmic partitioning path than the default Goto-like algorithm; - experiment with different implementations of `packm` (not just `packm` kernels, which can already be customized within each sub-configuration); - try inlining your functions manually; - pivot away from using `obj_t` objects at higher algorithmic level (such as immediately after calling `bli_gemmnat()`) to try to avoid some overhead; - create experimental implementations of new BLAS-like operations (provided that you also provide an implementation of `bli_gemmnat()`). You **cannot**, however, use a sandbox to do the following kinds of things: - define new datatypes (half-precision, quad-precision, short integer, etc.) and expect the rest of BLIS to "know" how to handle them; - use a sandbox to replace the default implementation of a different level-3 operation, such as Hermitian rank-k update; - change the existing BLIS APIs (typed or object); - remove support for one or more BLIS datatypes (to cut down on library size, for example). Another important limitation is the fact that the build system currently uses "framework `CFLAGS`" when compiling the sandbox source files. These are the same `CFLAGS` used when compiling general framework source code, ``` # Example framework CFLAGS used by 'haswell' sub-configuration -O3 -Wall -Wno-unused-function -Wfatal-errors -fPIC -std=c99 -D_POSIX_C_SOURCE=200112L -I./include/haswell -I./frame/3/ -I./frame/ind/ukernels/ -I./frame/1m/ -I./frame/1f/ -I./frame/1/ -I./frame/include -DBLIS_VERSION_STRING=\"0.3.2-51\" ``` which are likely more general-purpose than the `CFLAGS` used for, say, optimized kernels or even reference kernels. ``` # Example optimized kernel CFLAGS used by 'haswell' sub-configuration -O3 -mavx2 -mfma -mfpmath=sse -march=core-avx2 -Wall -Wno-unused-function -Wfatal-errors -fPIC -std=c99 -D_POSIX_C_SOURCE=200112L -I./include/haswell -I./frame/3/ -I./frame/ind/ukernels/ -I./frame/1m/ -I./frame/1f/ -I./frame/1/ -I./frame/include -DBLIS_VERSION_STRING=\"0.3.2-51\" ``` (To see precisely which flags are being employed for any given file, enable verbosity at compile-time via `make V=1`.) Compiling sandboxes with these more versatile `CFLAGS` compiler options means that we only need to compile one instance of each sandbox source file, even when targeting multiple configurations (for example, via `./configure x86_64`). However, it also means that sandboxes are not ideal for microkernels, as they sometimes need additional compiler flags not included in the set used for framework `CFLAGS` in order to yield the highest performance. If you have a new microkernel you would like to use within a sandbox, you can always develop it within a sandbox. However, once it is stable and ready for use by others, it's best to formally register the kernel(s) along with a new configuration, which will allow you to specify kernel-specific compiler flags to be used when compiling your microkernel. Please see the [Configuration Guide](ConfigurationHowTo) for more details, and when in doubt, please don't be shy about seeking guidance from BLIS developers by opening a [new issue](https://github.com/flame/blis/issues) or sending a message to the [blis-devel](http://groups.google.com/d/forum/blis-devel) mailing list. Notwithstanding these limitations, hopefully you still find BLIS sandboxes useful! ## Known issues * **Mixed datatype support.** Unless you *really* know what you are doing, you should probably disable mixed datatype support when using a sandbox. (Mixed datatype support can be disabled by configuring with `--disable-mixed-dt`.) The BLIS testsuite is smart enough to verify that you've configured BLIS with mixed datatype support before allowing you to test with mixed domains/precisions enabled in `input.general`. However, if those options *are* enabled and BLIS was built with mixed datatype support, then BLIS assumes that the implementation of `gemm` will support mixing of datatypes. BLIS *must* assume this, because there's no way for it to confirm at runtime that an implementation was written to support mixing datatypes. Note that even the `ref99` sandbox included with BLIS does not support mixed-datatype computation. ## Conclusion If you encounter any problems, or are really bummed-out that `gemm` is the only operation for which you can provide a sandbox implementation, please open a new [issue on GitHub](https://github.com/flame/blis/issues). If you are unsure about how something works, you can still open an issue. Or, you can send a message to [blis-devel](https://groups.google.com/d/forum/blis-devel) mailing list. Happy sandboxing! blis-0.6.1/docs/Testsuite.md000066400000000000000000000743531360743507500157400ustar00rootroot00000000000000# Contents * **[Contents](Testsuite.md#contents)** * **[BLIS testsuite](Testsuite.md#blis-testsuite)** * **[Introduction](Testsuite.md#introduction)** * **[Compiling](Testsuite.md#compiling)** * **[Setting test parameters](Testsuite.md#setting-test-parameters)** * [`input.general`](Testsuite.md#inputgeneral) * [`input.operations`](Testsuite.md#inputoperations) * **[Running tests](Testsuite.md#running-tests)** * **[Interpreting the results](Testsuite.md#interpreting-the-results)** * **[BLAS test drivers](Testsuite.md#blas-test-drivers)** # BLIS testsuite ## Introduction This wiki explains how to use the test suite included with the BLIS framework. The test suite exists in the `testsuite` directory within the top-level source distribution: ``` $ ls CHANGELOG Makefile common.mk examples sandbox version CONTRIBUTING.md README.md config frame so_version windows CREDITS RELEASING config_registry kernels test INSTALL blastest configure mpi_test testsuite LICENSE build docs ref_kernels travis ``` There, you will find a `Makefile`, a script, several input files, and two directories: ``` $ cd testsuite $ ls Makefile input.general.mixed input.operations.mixed check-blistest.sh input.general.salt input.operations.salt input.general input.operations obj input.general.fast input.operations.fast src ``` As you would expect, the test suite's source code lives in `src` and the object files, upon being built, are placed in `obj`. The two `input.*` files control how the test suite runs, while the `Makefile` controls how the test suite executable is compiled and linked. However, only two input files are used at any given time: one `input.general` and one `input.operations`. (We have several pairs so that Travis CI can run multiple variations of tests automatically when new commits are made to github.) You can focus your attention on the general-purpose input files `input.general` and `input.operations`. ## Compiling Before running the test suite, you must first configure and compile BLIS. (Installing BLIS is not necessary to run the test suite, though it is supported.) For directions on how to build and install a BLIS library, please see the [Build System](BuildSystem.md) guide. Once BLIS is installed, you are ready to compile the test suite. When you are ready to compile, simply run `make` from within the `testsuite` directory. Running `make` will result in output similar to: : ``` $ make Compiling src/test_addm.c Compiling src/test_addv.c Compiling src/test_amaxv.c Compiling src/test_axpbyv.c Compiling src/test_axpy2v.c Compiling src/test_axpyf.c Compiling src/test_axpym.c Compiling src/test_axpyv.c Compiling src/test_copym.c Compiling src/test_copyv.c ``` As with compiling a BLIS library, if you are working in a multicore environment, you may use the `-j` option to compile source code in parallel with `` parallel jobs: ``` $ make -j4 ``` After `make` is complete, an executable named `test_libblis.x` is created: ``` $ ls Makefile input.general.mixed input.operations.mixed test_libblis.x check-blistest.sh input.general.salt input.operations.salt input.general input.operations obj input.general.fast input.operations.fast src ``` ### Compiling/linking aginst an installed copy of BLIS By default, the `Makefile` in the `testsuite` directory is programmed to look in `../include//` for `blis.h` and `../lib//` for the BLIS library. However, some users may wish to run the testsuite after installing BLIS and deleting the entire source tree. In this situation, it is necessary to point `make` to the location of your BLIS installation (i.e., the installation prefix). If you would like to compile with an installed header and link against an installed library, you have two options: 1. First, you may set the envrionment variable `BLIS_INSTALL_PATH` to the install prefix used when BLIS was installed, and then run `make`. In this example, we assume that BLIS was installed after running the `configure` script with the `--prefix=/usr/local` option. ``` $ export BLIS_INSTALL_PATH=/usr/local $ make ``` 2. Alternatively, you may set the `make` variable `BLIS_INSTALL_PATH` on the command line as you execute `make`: ``` $ make BLIS_INSTALL_PATH=/usr/local ``` Both options result in the same outcome: `make` looks for the BLIS installation in `BLIS_INSTALL_PATH` when building the test suite. ## Setting test parameters The BLIS test suite reads two input files, `input.general` and `input.operations`, to determine which tests to run and how those tests are run. Each file is contains comments and thus you may find them intuitive to use without formal instructions. However, for completeness and as a reference-of-last-resort, we describe each file and its contents in detail. ### `input.general` The `input.general` input file, as its name suggests, contains parameters that control the general behavior of the test suite. These parameters (more or less) apply to all operations that get tested. Below is a representative example of the default contents of `input.general`. ``` # ---------------------------------------------------------------------- # # input.general # BLIS test suite # # This file contains input values that control how BLIS operations are # tested. Comments explain the purpose of each parameter as well as # accepted values. # 1 # Number of repeats per experiment (best result is reported) c # Matrix storage scheme(s) to test: # 'c' = col-major storage; 'g' = general stride storage; # 'r' = row-major storage c # Vector storage scheme(s) to test: # 'c' = colvec / unit stride; 'j' = colvec / non-unit stride; # 'r' = rowvec / unit stride; 'i' = rowvec / non-unit stride 0 # Test all combinations of storage schemes? 1 # Perform all tests with alignment? # '0' = do NOT align buffers/ldims; '1' = align buffers/ldims 0 # Randomize vectors and matrices using: # '0' = real values on [-1,1]; # '1' = powers of 2 in narrow precision range 32 # General stride spacing (for cases when testing general stride) sdcz # Datatype(s) to test: # 's' = single real; 'c' = single complex; # 'd' = double real; 'z' = double complex 0 # Test gemm with mixed-domain operands? 0 # Test gemm with mixed-precision operands? 100 # Problem size: first to test 300 # Problem size: maximum to test 100 # Problem size: increment between experiments # Complex level-3 implementations to test 1 # 3mh ('1' = enable; '0' = disable) 1 # 3m1 ('1' = enable; '0' = disable) 1 # 4mh ('1' = enable; '0' = disable) 1 # 4m1b ('1' = enable; '0' = disable) 1 # 4m1a ('1' = enable; '0' = disable) 1 # 1m ('1' = enable; '0' = disable) 1 # native ('1' = enable; '0' = disable) 1 # Simulate application-level threading: # '1' = disable / use one testsuite thread; # 'n' = enable and use n testsuite threads 1 # Error-checking level: # '0' = disable error checking; '1' = full error checking i # Reaction to test failure: # 'i' = ignore; 's' = sleep() and continue; 'a' = abort 0 # Output results in matlab/octave format? ('1' = yes; '0' = no) 0 # Output results to stdout AND files? ('1' = yes; '0' = no) ``` The remainder of this section explains each parameter switch in detail. _**Number of repeats.**_ This is the number of times an operation is run for each result that is reported. The result with the best performance is reported. _**Matrix storage scheme.**_ This string encodes all of the matrix storage schemes that are tested (for operations that contain matrix operands). There are three valid values: `'c'` for column storage, `'r'` for row storage, and `'g'` for general stride storage. You may choose one storage scheme, or combine more than one. The order of the characters determines the order in which the corresponding storage schemes are tested. _**Vector storage scheme.**_ Similar to the matrix storage scheme string, this string determines which vector storage schemes are tested (for operations that contain vector operands). There are four valid values: `'c'` for column vectors with unit stride, `'r'` for row vectors with unit stride, `'j'` for column vectors with non-unit stride, and `'i'` for row vectors with non-unit stride. You may choose any one storage scheme, or combine more than one. The ordering behaves similarly to that of the matrix storage scheme string. Using `cj` will test both unit and non-unit vector strides, and since row and column vectors are logically equivalent, this should provide complete test coverage for operations with vector operands. _**Test all combinations of storage schemes?**_ Enabling this option causes all combinations of storage schemes to be tested. For example, if the option is disabled, a matrix storage scheme string of `cr` would cause the `gemm` test module to test execution where all matrix operands are column-stored, and then where all matrix operands are row-stored. Enabling this option with the same matrix storage string (`cr`) would cause the test suite to test `gemm` under all eight scenarios where the three `gemm` matrix operands are either column-stored or row-stored. _**Perform all tests with alignment?**_ Disabling this option causes the leading dimension (row or column stride) of test matrices to **not** be aligned according to `BLIS_HEAP_STRIDE_ALIGN_SIZE`, which defaults to `BLIS_SIMD_ALIGN_SIZE`, which defaults to `BLIS_SIMD_SIZE`, which defaults to 64 (bytes). (If any of these values is set to a non-default value, it would be in `bli_family_.h` where `` is the configuration family.) Sometimes it's useful to disable leading dimension alignment in order to test certain aspects of BLIS that need to handle computing with unaligned user data, such as level-1v and level-1f kernels. _**Randomize vectors and matrices.**_ The default randomization method uses real values on the interval [-1,1]. However, we offer an alternate randomization using powers of two in a narrow precision range, which is more likely to result in test residuals exactly equal to zero. This method is somewhat niche/experimental and most people should use random values on the [-1,1] interval. _**General stride spacing.**_ This value determines the simulated "inner" stride when testing general stride storage. For simplicity, the test suite always generates and tests general stride storage that is ["column-tilted"](FAQ.md#What_does_it_mean_when_a_matrix_with_general_stride_is_column-ti). If general stride storage is not being tested, then this value is ignored. _**Datatype(s) to test.**_ This string determines which floating-point datatypes are tested. There are four valid values: `'s'` for single-precision real, `'d'` for double-precision real, `'c'` for single-precision complex, and `'z'` for double-precision complex. You may choose one datatype, or combine more than one. The order of the datatype characters determines the order in which they are tested. _**Test gemm with mixed-domain operands?**_ This boolean determines whether `gemm` tests are performed that exercise the mixed-domain functionality within BLIS. (In other words, with precision held constant, all combinations of real and complex matrix operands will be tested.) If this option is set to 1 and the mixed-precision option is set to 0, then domain combinations will be varied for the precisions represented by the "Datatype(s) to test" option. If this option and the mixed-precision option are both set to 1, then _all_ datatype combinations will be tested, regardless of the datatypes indicated by the "Datatype(s) to test" option. _**Test gemm with mixed-precision operands?**_ This boolean determines whether `gemm` tests are performed that exercise the mixed-precision functionality within BLIS. (In other words, with domain held constant, all combinations of supported precisions will be tested.) If this option is set to 1 and the mixed-domain option is set to 0, then precision combinations will be varied for the domains represented by the "Datatype(s) to test" option. If this option and the mixed-domain option are both set to 1, then _all_ datatype combinations will be tested, regardless of the datatypes indicated by the "Datatype(s) to test" option. _**Problem size.**_ These values determine the first problem size to test, the maximum problem size to test, and the increment between problem sizes. Note that the maximum problem size only bounds the range of problem sizes; it is not guaranteed to be tested. Example: If the initial problem size is 128, the maximum is 1000, and the increment is 64, then the last problem size to be tested will be 960. _**Complex level-3 implementations to test.**_ With the exception of the switch marked `native`, these switches control whether experimental complex domain implementations are tested (when applicable). These implementations employ induced methods complex matrix multiplication and apply to some (though not all) of the level-3 operations. If you don't know what these are, you can ignore them. The `native` switch corresponds to native execution of complex domain level-3 operations, which we test by default. We also test the `1m` method, since it is the induced method of choice when complex microkernels are not available. Note that all of these induced method tests (including `native`) are automatically disabled if the `c` and `z` datatypes are disabled. _**Simulate application-level threading.**_ This setting specifies the number of threads the testsuite will spawn, and is meant to allow the user to exercise BLIS as a multithreaded application might if it were to make multiple concurrent calls to BLIS operations. (Note that the threading controlled by this option is orthogonal to, and has no effect on, whatever multithreading may be employed _within_ BLIS, as specified by the environment variables described in the [Multithreading](Multithreading.md) documentation.) When this option is set to 1, the testsuite is run with only one thread. When set to n > 1 threads, the spawned threads will parallelize (in round-robin fashion) the total set of tests specified by the testsuite input files, executing them in roughly the same order as that of a sequential execution. _**Error-checking level.**_ BLIS supports various "levels" of error checking prior to executing most operations. For now, only two error-checking levels are implemented: fully disabled (`'0'`) and fully enabled (`'1'`). Disabling error-checking may improve performance on some systems for small problem sizes, but generally speaking the cost is negligible. _**Reaction to test failure.**_ If the test suite executes a test that results in a numerical result that is considered a "failure", this character determines how the test suite should proceed. There are three valid values: `'i'` will cause the test suite to ignore the failure and immediately continue with all remaining tests, `'s'` will cause the test suite to sleep for some short period of time before continuing, and `'a'` will cause the test suite to abort all remaining tests. The user must specify only **one** option via its character encoding. _**Output results in Matlab/Octave format?**_ When this option is disabled, the test suite outputs results in a simple human-readable format of one experiment per line. When this option is enabled, the test suite similarly outputs results for one experiment per line, but in a format that may be read into Matlab or Octave. This is useful if the user intends to use the results of the test suite to plot performance data using one of these tools. _**Output results to `stdout` AND files?**_ When this option is disabled, the test suite outputs only to standard output. When enabled, the test suite also writes its output to files, one for each operation tested. As with the Matlab/Octave option above, this option may be useful to some users who wish to gather and retain performance data for later use. ### `input.operations` The `input.operations` input file determines **which** operations are tested, which parameter combinations are tested, and the relative sizes of the operation's dimensions. The file itself contains comments that explain various sections. However, we reproduce this information here for your convenience. _**Enabling/disabling entire sections.**_ The values in the "Section overrides" section allow you to disable all operations in a given "level". Enabling a level here by itself does not enable every operation in that level; it simply means that the individual switches for each operation (in that level) determine whether or not the tests are executed. Use 1 to enable a section, or 0 to disable. _**Enabling/disabling individual operation tests.**_ Given that an operation's section override switch is set to 1 (enabled), whether or not that operation will get tested is determined by its local switch. For example, if the level-1v section override is set to 1, and there is a 1 on the line marked `addv`, then the `addv` operation will be tested. NOTE: You may ignore the lines marked "test sequential front-end." These lines are for future use, to distinguish tests of the sequential implementation from tests of the multithreaded implementation. For now, BLIS does not contain separate APIs for multithreaded execution, even though multithreading is supported. So, these should be left set to 1. _**Enabling only select operations**_ If you would like to enable just a few (or even just one) operation without adjusting any section overrides (or individual operation switches), change the desired operation switch(es) to 2. This will cause any operation that is not set to 2 to be disabled, regardless of section override values. For example, setting the `axpyv` and `gemv` operation switches to 2 will cause the test suite to test ONLY `axpyv` and `gemv`, even if all other sections and operations are set to 1. NOTE: As long as there is at least on operation switch set to 2, no other operations will be tested. When you are done testing your select operations, you should revert the operation switch(es) back to 1. _**Changing the problem size/shapes tested.**_ The problem sizes tested by an operation are determined by the dimension specifiers on the line marked `dimensions: `. If, for example, `` contains two dimension labels (e.g. `m n`), then the line should begin with two dimension specifiers. Dimension specifiers of `-1` cause the corresponding dimension to be bound to the problem size, which is determined by values set in `input.general`. Positive values cause the corresponding dimension to be fixed to that value and held constant. Examples of dimension specifiers (where the dimensions are _m_ and _n_): * `-1 -1 ` ...Dimensions m and n grow with problem size (resulting in square matrices). * `-1 150 ` ...Dimension m grows with problem size and n is fixed at 150. * `-1 -2 ` ...Dimension m grows with problem size and n grows proportional to half the problem size. _**Changing parameter combinations tested.**_ The parameter combinations tested by an operation are determined by the parameter specifier characters on the line marked `parameters: `. If, for example, `` contains two parameter labels (e.g. `transa conjx`), then the line should contain two parameter specifier characters. The `'?'` specifier character serves as a wildcard--it causes all possible values of that parameter to be tested. A character such as `'n'` or `'t'` causes only that value to be tested. Examples of parameter specifiers (where the parameters are `transa` and `conjx`): * `??` ...All combinations of the `transa` and `conjx` parameters are tested: `nn, nc, tn, tc, cn, cc, hn, hc`. * `?n` ...`conjx` is fixed to "no conjugate" but `transa` is allowed to vary: `nn, tn, cn, hn`. * `hc` ...Only the case where `transa` is "Hermitian-transpose" and `conjx` is "conjugate" is tested. Here is a full list of the parameter types used by the various BLIS operations along with their possible character encodings: * `side`: `l` = left, `r` = right * `uplo`: `l` = lower-stored, `u` = upper-stored * `trans`: `n` = no transpose, `t` = transpose, `c` = conjugate, `h` = Hermitian-transpose (conjugate-transpose) * `conj`: `n` = no conjugate, `c` = conjugate * `diag`: `n` = non-unit diagonal, `u` = unit diagonal ## Running tests Running the test suite is easy. Once `input.general` and `input.operations` have been tailored to your liking, simply run the test suit executable: ``` $ ./test_libblis.x ``` For sanity-checking purposes, the test suite begins by echoing the parameters it found in `input.general` to standard output. This is useful when troubleshooting the test suite if/when it exhibits strange behavior (such as seemingly skipped tests). ## Interpreting the results The output to the test suite is more-or-less intuitive. Here is an snippet of output for the `gemm` test module when problem sizes of 100 to 300 in increments of 100 are tested. ``` % --- gemm --- % % test gemm seq front-end? 1 % gemm m n k -1 -1 -2 % gemm operand params ?? % % blis_
__ m n k gflops resid result blis_sgemm_nn_ccc 100 100 50 1.447 1.14e-07 PASS blis_sgemm_nn_ccc 200 200 100 1.537 1.18e-07 PASS blis_sgemm_nn_ccc 300 300 150 1.532 1.38e-07 PASS blis_sgemm_nc_ccc 100 100 50 1.449 7.79e-08 PASS blis_sgemm_nc_ccc 200 200 100 1.540 1.23e-07 PASS blis_sgemm_nc_ccc 300 300 150 1.537 1.54e-07 PASS blis_sgemm_nt_ccc 100 100 50 1.479 7.40e-08 PASS blis_sgemm_nt_ccc 200 200 100 1.549 1.33e-07 PASS blis_sgemm_nt_ccc 300 300 150 1.534 1.44e-07 PASS blis_sgemm_nh_ccc 100 100 50 1.477 9.23e-08 PASS blis_sgemm_nh_ccc 200 200 100 1.547 1.13e-07 PASS blis_sgemm_nh_ccc 300 300 150 1.535 1.51e-07 PASS blis_sgemm_cn_ccc 100 100 50 1.477 9.62e-08 PASS blis_sgemm_cn_ccc 200 200 100 1.548 1.36e-07 PASS blis_sgemm_cn_ccc 300 300 150 1.539 1.51e-07 PASS blis_sgemm_cc_ccc 100 100 50 1.481 8.66e-08 PASS blis_sgemm_cc_ccc 200 200 100 1.549 1.41e-07 PASS blis_sgemm_cc_ccc 300 300 150 1.539 1.63e-07 PASS blis_sgemm_ct_ccc 100 100 50 1.484 7.09e-08 PASS blis_sgemm_ct_ccc 200 200 100 1.549 1.08e-07 PASS blis_sgemm_ct_ccc 300 300 150 1.539 1.33e-07 PASS blis_sgemm_ch_ccc 100 100 50 1.471 8.06e-08 PASS blis_sgemm_ch_ccc 200 200 100 1.546 1.24e-07 PASS blis_sgemm_ch_ccc 300 300 150 1.539 1.66e-07 PASS ``` Before each operation is tested, the test suite echos information it obtained from the `input.operations` file, such as the dimension specifier string (in this case, `"-1 -1 -2"`) and parameter specifier string (`"??"`). Each line of output contains several sections. We will cover them now, from left to right. _**Test identifier.**_ The left-most labels are strings which identify the specific test being performed. This string generally a concatenation of substrings, joined by underscores, which identify the operation being run, the parameter combination tested, and the storage scheme of each operand. When outputting to Matlab/Octave formatting is abled, these identifiers service as the names of the arrays in which the data are stored. _**Dimensions.**_ The values near the middle of the output show the size of each dimension. Different operations have different dimension sets. For example, `gemv` only has two dimensions, _m_ and _n_, while `gemm` has an additional _k_ dimension. In the snippet above, you can see that the dimension specifier string, `"-1 -1 -2"`, explains the relative sizes of the dimensions for each test: _m_ and _n_ are bound to the problem size, while _k_ is always equal to half the problem size. _**Performance.**_ The next value output is raw performance, reported in GFLOPS (billions of floating-point operations per second). _**Residual.**_ The next value, which we loosely refer to as a "residual", reports the result of the numerical correctness test for the operation. The actual method of computing the residual (and hence its exact meaning) depends on the operation being tested. However, these residuals are always computed such that the result should be no more than 2-3 orders of magnitude away from machine precision for the datatype being tested. Thus, "good" results are typically in the neighborhood of `5e-06` for single precision and `1e-16` for double precision (preferrably less). _**Test result.**_ The BLIS test suite compares the residual to internally-defined accuracy thresholds to categorize the test as either `PASS`, `MARGINAL`, or `FAIL`. The vast majority of tests should result in a `PASS` result, with perhaps a handful resulting in `MARGINAL`. Usually, a `MARGINAL` result is no cause for concern, especially when similar tests result in `PASS`. Note that the various sections of output, which line up nicely as columns, are labeled on a line beginning with `%` immediately before the results: ``` % blis_
__ m n k gflops resid result blis_sgemm_nn_ccc 100 100 50 1.447 1.14e-07 PASS ``` These labels are useful as concise reminders of the meaning of each column. They are especially useful in differentiating the various dimensions from each other for operations that contain two or three dimensions. If you simply want to run the BLIS testsuite and know if there were any failures, you can do so via the `make check` and `make check-fast`. The former uses the `input.general` and `input.operations` files, while the latter uses the `input.general.fast` and `input.operations.fast`. (We generally recommend using the "fast" target since it usually finishes in much less time while still being relatively comprehensive.) A one-line characterization of the test results is output after the tests finish: ``` $ make check-fast Running test_libblis.x (fast) with output redirected to 'output.testsuite' check-blistest.sh: All BLIS tests passed! ``` # BLAS test drivers In addition to the monolithic testsuite located in the `testsuite` directory, which exercises BLIS functionality in general (and via one of its native/preferred APIs), we also provide a C port of the netlib BLAS test drivers included in netlib LAPACK. These BLAS drivers are located in `blastest`, along with other files needed in order to build the drivers, such as a subset of `libf2c`. After configuring and compiling BLIS, the BLAS test drivers may be run from within `blastest`: ``` $ ./configure haswell # Lots of configure output... $ make -j4 # Lots of compilation output... $ cd blastest $ ls Makefile f2c input obj src ``` Simply run `make`: ``` $ make Compiling obj/abs.o Compiling obj/acos.o Compiling obj/asin.o Compiling obj/atan.o ... Compiling obj/wsfe.o Compiling obj/wsle.o Archiving libf2c.a Compiling obj/cblat1.o Linking cblat1.x against 'libf2c.a ../lib/haswell/libblis.a -lm -lpthread -lrt' Compiling obj/cblat2.o Linking cblat2.x against 'libf2c.a ../lib/haswell/libblis.a -lm -lpthread -lrt' Compiling obj/cblat3.o Linking cblat3.x against 'libf2c.a ../lib/haswell/libblis.a -lm -lpthread -lrt' ... ``` And then `make run`: ``` Running cblat1.x > 'out.cblat1' Running cblat2.x < 'input/cblat2.in' (output to 'out.cblat2') Running cblat3.x < 'input/cblat3.in' (output to 'out.cblat3') Running dblat1.x > 'out.dblat1' Running dblat2.x < 'input/dblat2.in' (output to 'out.dblat2') Running dblat3.x < 'input/dblat3.in' (output to 'out.dblat3') Running sblat1.x > 'out.sblat1' Running sblat2.x < 'input/sblat2.in' (output to 'out.sblat2') Running sblat3.x < 'input/sblat3.in' (output to 'out.sblat3') Running zblat1.x > 'out.zblat1' Running zblat2.x < 'input/zblat2.in' (output to 'out.zblat2') Running zblat3.x < 'input/zblat3.in' (output to 'out.zblat3') ``` The results can quickly be checked via a script in the top-level `build` directory: ``` $ ../build/check-blastest.sh All BLAS tests passed! ``` This is the message we expect when everything works as expected. You can also combine the `make`, `make run`, and script execution into one command: `make check`. Alternatively, you can execute all of the steps described above (`make ; make run; ../build/check-blastest.sh`, or `make check`) from the top-level directory. After running `configure` and `make`, simply run `make checkblas`: ``` $ ./configure haswell # Lots of configure output... $ make -j4 # Lots of compilation output... $ make check ``` This will build all of the necessary BLAS test driver object files, link them, and run the drivers. Output will go to the current directory (either the top-level directory of the source distribution, or the out-of-tree build directory from which you ran `configure`), with each output file (prefixed with `out.`) named according to the BLAS driver that generated its contents: ``` $ ls CHANGELOG blastest docs out.cblat1 out.sblat3 testsuite CONTRIBUTING.md bli_config.h examples out.cblat2 out.zblat1 travis CREDITS build frame out.cblat3 out.zblat2 version INSTALL common.mk include out.dblat1 out.zblat3 windows LICENSE config kernels out.dblat2 ref_kernels Makefile config.mk lib out.dblat3 sandbox README.md config_registry mpi_test out.sblat1 so_version RELEASING configure obj out.sblat2 test ``` If any of the tests fail, you'll instead see the message: ``` $ make check At least one BLAS test failed. Please see out.* files for details. ``` As the message suggests, you should inspect the `out.*` files for more details about what went wrong. blis-0.6.1/docs/graphs/000077500000000000000000000000001360743507500146755ustar00rootroot00000000000000blis-0.6.1/docs/graphs/large/000077500000000000000000000000001360743507500157675ustar00rootroot00000000000000blis-0.6.1/docs/graphs/large/l3_perf_epyc_jc1ic8jr4_nt32.pdf000066400000000000000000001047331360743507500234730ustar00rootroot00000000000000%PDF-1.4 %ª«¬­ 1 0 obj << /Producer (Apache FOP Version 2.3.0-SNAPSHOT: PDFDocumentGraphics2D) /CreationDate (D:20190328152847-05'00') >> endobj 2 0 obj << /Length 3 0 R /Filter /FlateDecode >> stream xœÌ½K¯fKrž7ï_QC{Лy¿ <°‘°L´IÀA#ªÙ²¥CZM†ýëý¾k­ŒÈÜ眮j®>ÕU_Ö÷­Ì\‘OfÆõ¿ý&~ ø¿åÿô™¾ýÃO¿ 1Uùtý ÿ·ß´8ñAêC>ã_ë(µ‹¥|ËM¾‹ÏúMÌeâÿUÿCŒøsX`ÓþÍÿñ›ÂÓ¿ÿ•|¶þñ~÷íó¿ýqOü×û…Øë·”+úŸK’?ýWùSo åÿù7ôcúÐví÷¿ùÿÿè?ý&}ûwxÞÿ…¦VêÈùÛÿó›¾ýûOŸýw/=Ãøi 짤ŒOð__ÃÚ>Ê9‡âÈoua4<"ç’k¼»à?z¿ vÈ2éþ“Ž?Ï9Gì_1Ú÷ÉÑ¿øËø-¶ràïÿÝÑgýþºU{úˆ±¶ž:¾ZÓø¹ç‘ŠäßüýÂËoSü¨øõoÿÓ·ÿð?$Ìüÿøí?~ûû÷›û÷2ô_Öd×ZŠaÌò=Ï*dzÞZ»$Þx›7^^»v4m È»Hг}„4Kóx²k{± µ„:FJ-]°m/v!Ž€§äüÉ,ض÷†‰åÜñyj{lÓû>rÂs[}SôøÀMôÜ“¿@ô¤ ^ô|Þ=é‚=ß…÷EOˆæDÏõà3Ñûõý"N?pè)Îù )î!rHwHùmøò_êß~»hzbÿE 8‹•QRDg¾q¥æšs:§´áªöä¯þò¯ÿæoÿî/þáŸÿð»£KöÜô‹3Ðk”CX­|+çÐØ{kõœ‚ÌcÌo‹>û_~ÿ»Ÿ~úþsŒ_€ágÙ?Ð'LCËEÎÆ˜£ Ö v?ZG[jã[J‰çæ¯rqTüKÌhÁ”¦0p>k=àX&møÉÆén˜‰Ó'V Û&~ròq“›â.ÊfmÃo²+XþxÑ /¦áïEÛð›ƒÏméc⥵N,¥?:»‚•ñWeÎXš´á'+~²×†UŠŸì¡¶:¤ ¿Èžd< ÇÆÐk…ìKSÄ/òðXy«LõçÜšJ“_ŒøÅq]À¦ì#‚WÒéŒÏ¾Ý>jh˜Í,ßÃ#:{RpÇ"þH“‰¡KqÏW†8|a ~‘=Ix¯x·¡å\K‘§AH:ßÎ@§c® ó\GÓ6ü"{R#æ$㨎¹lA~2ã'ùvØ"~JPâdž%B¨•**o üäš°Ä·S!5]Ú¸,ù¸že×Èœp¼¶Aè_OÏÜmÐMÌVÖÑUü$»’1µœÌN(3ÈOÖÁ“<ˆ‡s*\žø°á·naåoÕÚ2z!_hø±[XcÀ«ÆÇ&*Üðcâ˜c¦´V¼Š&_ëøIüe@žÐT?&fâ$’ÕëÇ%XüZ׈1GiÃOòÍŒZÈ”\ˆ t:d¬›ñkàмyiÃoÞÒJød¼ãy} ?ù+¾Ö@(ü–| ëfÜ“Á͵4–Mâ¤`—“@Fj–kBª©ýÞÍÑÉ[‹¹sà˜èHÄðþ]€ìÇÒ„_ä{™_Ãþ!ÿ ´gm£)—Þ{¡ËX6óV»…VÀiÃOÞÂ: ä‹hˆV(C^Ú#«øI®¶>±j&§„Ël —¾1äiX4I~°Ïo³sŽ;¶m¿DGð¥ú '¹‚ùMSœ±f&Þ äJ¾„õ™„Hš*Ηœ¢ ïï·fiÂïÝrŠßÃ¥·uéRa88%AjêwÝç•öÿûEiÀ¸-R§Ô'¨I@¬Ü’Ç jÒˆÞçƒÔ86Ðݼ“Z¨‡ßàûNêÄʼnM'¾“:Q@€¶¢ô¤ÆDb¿ë¤t ó†;<`©Èr¨fÇðI‰ù`5úU°¶»BСzŽ+Hý@uÅÙ)ÌVæ…q‹êš?&6£ô7ªkA&Ÿº‡ Õx\¢@ë=ųzžØNK•£³guÇÀ1«(;X=2N_w|°š‡áPS˜ú8Çj|-u¼æ¤`u¬†p óp²:øâÅqÇêÂß,øk9a1tt‹¤-XãC¬'íh¬ù…PñÒj<` ˜XªAW¯‡5k L„>ÈÑ:ËqS™ÚAk,‘p§ähÍ Î8mh_­±éÍQF˜cì´– /© $=­³;¢.kt`Äõ€56ô\°kÍ­ñ ÑÁªšskL:úˆ•öÑÓ_+¥å2_ŽÖ¥LÜPZWr9Zî矬Gk>Žj>îÀ5ÆMµ;zÜw\£¯ÒŠ÷Sv^ó`ÂޭóÀŽ ð „R˜[dã¤òÑ9ž"g8Çl¬!Pëtä´C›‡8ÜϰÐEäâ‹· Õ¾ÛñÁöø–¿¥où;ÈÑå0¹µäÆš<Þ“Ü9$¬—žãIn µbÒçgäÎØÿ*~´glb¨ÀBˆå3rcFñt‘xrcQ¶ ]'iGn¼õ‰—¦§^ c÷1ÛAîŽË\ÇÞ^äÝyr7–A_ÈS=ÈÕ5pÂÊAnÌ&ðpô¸“¢BÝ J:È­àËvîÉ=¸(Û³˜=¹Ï€hŠŸ wVF¼ºéÐÝæ/GxÈyÌ&±`±Lå¥{t¯è'zuè«KÃYjê9Ö¡ÿ(Ä"ѱ+º nXa¸ä•ÏÐ]°­†¨[™G7¯ñ@7$‡‚\SšºÑ†õX¨Å?ÐÍ‹i´ƒÝ7ùÜ1>=ûƒv€@`×÷ºx™¿·ÇîJIÏç´³{?~¯4ăÝ«{ö+v±yyŽ<{ã)¥ª[ Öù¼†µ}ôº¯L ‡»ÎöÑë]pC–I÷Ÿ¼î®ã¬o=ü¢»ÎanK“×ï‰ãÝ/ûçdb~pOâŽøçãž³Öê…WÞ^K ïy™ÑP}mIï"h÷‘°Oþ ‰y¸çø.|Ä<Üs|¾ÀGbîî9®ï»çX ªIóEÑã7ÑsOþÑ“.xÑó]x_ô¤ ^ô|Þ=!š=׃rÏÁ…&á¢Ç;J{Í=Ç?ä-÷ÿ”·ÜsÜSÞrÏqù!÷œ_ñ~É¢X¸Y¥ñ+Î/Á:¿üËÿûžóKž»óK‹ ÷i9§™ÏtòI³Ë=¿Pkrß•©&™4P¥Ñ¥ ¿ùÜ•'îé•¡&ªBµÉsWæàÄdW¨5y®Ê¸ÿâBš.‡šó ú¥÷o‚§$«ÜÉ¢À#‹Z¡$«ÜÁÂÅÅ.R!mF¹# Ä8°¥Èã¨5y®Ê‰ë½åÚÅGÒð\•ñ¦Âˆ…v6iå¹*ÓCTk})øÍçªLCî÷%Šf¤£Ü¡6'A¸»Ü Kqþ/m“š@AÝü_JE'‡¡KµÊ4?FŒaѰCh뺮ÊiŠûLb) ¿ù\•é#þ;â|R¨oùÂlÜN¼¯Nn¼Ä„U"Ú'ÏmLr™½]Z+Çmê·AíÌÕÛ‰Î^2TFÛÁMuzÎŒMå÷ìâ¶Ò nì}$|O¹àÀ-^-=MŸ7¹“ˆšÞpóM…Ž÷3nãìZ°àüšnWžÇ°xFk·i×¢ ½+Ù,·ñ Üíwà¹] â.3šç6Nfx#WY<žÛ8ˆ^ûõï¹Í•”#eŸã6¾‡Ï³)wnZMËí{ãÁMåuÔc;Á-N4ãvžðàî̤¯§nò nšu î¤'n|»%ºjŠûI †z©zpÓ‹aô†ÕÜÉÄy)¸±õãËáÁ™C'[Uè9nC0 XY Ò°T#Á=eþ=·yEˆ8Øt±G:nWºàòdSOnWž•FhaƃÛ4hcN Šùà6·âN«ÃÕf¹ YëáÞŸnÚ§$\^·ç6ýað›i”ƒÛ… ÉÎI9¹Mw„ú(å=·éÔg¥?íÁíLLÑÀ4ËÁmѦ6©ÜÆX±øqK¬7Ú°…pCN¸Ée¸q‰óòyÑ º¡›ÖÁ©þ¶ºNXIQH9tãPÚ1©QÀžÜ‰Þ ØßûqâNQβ@›ØEü‘'õ†—ÑÕÍÛ‘'UF.„ŽÜƒ¯‡&{цxrÓ‡"Š=·ï䞸anè_<ÈM÷t’æþÝRÔjÑm›á<Dݲ¸GûàᘞÆ;¸iÒÂf€y;Á=ä€ y·±aA û}!ò܆TâÜCn<¸ÍÛKÅšNâ»¶q¿YÑa5…{nctrC9°-n˜‰^2óÄ6§oc¦z`m&àt¦…m0•¶?z¯ØFNü@‚¾‡m¾Ü…¸Ø¼ A^Õ'Âcエ@2ÅÐê±=x†ÇŒ =L8lÓä†ÏmzÈÎ>.˧çöä÷Ç©ôrܦÛ@rŽ;·'áÈ‘ønyn³-ã)Cl·žÛŒð€˜1(zlC‚BÆ™õXxåýd ðÞj5£ÑHwÙ“Þ…ÐfÑrOþ‹–tÁ[´|Þ·hI¼EËwá}‹–H¯³h¹¼oLµþ%ÆT>p=÷ä/=é‚=ß…÷EOºàEÏwá}Ñ¢9Ñs=ø!c*NŸ;„Ü_3¦ú‡¼eLõOy˘êžò–1Õ=ä c*c±èŸBÃ…ì­©)ykêþË[ÖÔF Î!¸c5ð¬ÐÃ9*²Ó¤M+ßb*]=œ[R_ùÈJ­<ÆÑqѯҶiåCÅÝ«ŠN»e§•/Œöi1ˆ‚¶eõ•ïTü±MÿÜ’ZvZyÜÌ&ÄlŠºQmi½ër7Çe·?¡k•ò¸7Lš:%7ªMôªNÐXÇ¡‰} U«Úi ïĬ5‰hU]å•@¥}P}^5B«ê*¯c¡A¸´®~έ9­î¸çÆ ê¾ê°Í`&p抰½°Íéå±¥Û ÐÀ×3nwŒ ½ %ØîYÖwÒèYí–@Ç ¤çy`[‚˜0™ê@â±ÍÓdéƒi„l ”ð/‡èÝ·¨1ŠYw Çm|­wBO"‡<·Å<Ûâ m²/Ûí<¨ ‘ŸtkÚG‡í"G6Ð%ÔÆ™?3͵¹{Oy'µ ðÛªÆ nÔÆÓ%„jÓÆ: dI۵цW²êø=µy Àîžf< ]$ '2ÐÆY! \üò8¡ÍPÓÎg•=í"Sfèâ\ŽèKªß©cýnCêÁí)£#ÜÎÌŠRrC‹ç6}gÊchñÜο9™œ«ÜNÌø…ݸå“ÛL³5θ%ܦ%V7î¨uç67á…ò×q;NZL›ncâŃÛ¬¤Övêâ¸M÷ŸÆF=¬:nӳޗ¸.ÊÏmzÏaÜ#¦¼s›® è@¸Ü·Å0MËbš;·ÝŠ%’ZI·¹ 2Â4å:vn³-50ãqÞæã°Awn³­ktíŽmŽnÒõLŸf©Íµãídu.sØæ/ªç¬(ÖÛ?œÒÔé°›žSæevvØNLÞ… 'ª/‘ã6û‡3§ë\f¹Íça¼]¥Û|³CµÏ:nóy³í(ïÜæ 13cèAÃr›]¡™Òqp[Ú*h©‹mÊzƒ?˽ˑ›_ÃÉ%2³ÆNnŽ.plž ¹9,ÛêNnI:¹hö GnyA»ª]±äæ„ñÅż·ù0¼ÕV5@Ö[<èò•É›m‘~Y·@ î$ÙÎè"\pä–ðuõi<Èèʇuݪ¹™yfº¾VKnÑ$Œüô“:Y¦`¨¦ÍúnSêIßÂmJˆªeŸØü¢1õÓ§¿¡Ä¼Æñ“Ú+¡ïƒ7§n½nËœ¥îæÔí£×»à†,³î?yÝœê,ðŸüˆ9•¹4±s°dƒ¨þß4§2Ë#öÄÎä5ßeº}ÛœºÖë†W2{‡gù¼¼^ÍxêWT‡8d9«–{òXµ¤ Þªå»ð¾UKºà­Z¾ ï[µD~UËõà}ƒª‘q è+,ªòÄMøÜ£¿¢rôÁKŸïÔ.>xñó}ø‚Ú‚5'® ?dUÅGt0µ2×ì[fÕí)oÙU·Ç¼eXõy˲êŸò†iÂÅ\P¸+Î_IÒžºµ¬þ÷?¼§:×é,«Ì9••¶%ì–Õ‚[› f _Wg…˜à7vmëÞ²Š3Þ/¬£oY–Øä–8j±–ÕÀ|O¥iØåsœe5Vf{â}u´à «“‹)KfÀÑŠ5¬2*6ÜAr´ «•%èf)…=B•á,˜â*mÅZVÇG§v¥Ê97NcYeª,…¤Y©pÑ·–UÚ†1™AnºckYmâÌß5Q;cž˜Oª2œ\ýèãýh=™ûíÉý;˜ {™VûÇ­öYµ­}™xy¼gK7'/«KëÉ4âL1./o†bM«å£2²¾¼º5­Ò‡¼…^%¾gRùð\ž)qwîxµkZ-2Ö2D1cÓ*#$iÝÉ›)XÓ*Ë ¤r%埴>:z±ƒâÛU”Q“‹êQÒ3jx5hR!¹l«‰jG}f»1$±íà ×"÷SâÏn%=3ÐÍTkSÎÄêYJzæð£Ç½¼Ò‰©{tôôJ‘6mêFG3^Æ"Êã y2¦UF40†8h[1ZzÑM²&˜(è0×FK) ë[ñM†n?Kw5< F‰ ˜b=¸–Ý$¹Äé|¶n´ô‘ìT¸«õ`´ôÔ2Ï`Ö¹ÄÚY¶UÚB ò Ä0uË+v’B aP-=-A‚S“}w2/ß#¯“™ª-]ÝF©RyÌàDÑüO’â‘× ©¸s‚ |Û†©RgZÇÔì«´QÓ3ñ}¦JÞ‚žZÄÙrlùû®Dßi\ÝÀM…ÁÀy¨ÔÜLˆ¼’ãnàÔ1'šGuã63RÆõœÜf,îŒÜ)ËÉmV™@_T‰í¸M}Û,™_͸™ê½‚NMsÇ:pÓFð#©ÌƒÛ“Rê+à¹ÍøŠqMÊÎm¾át©Âs›‰2ƒD•ØC´÷WÉm†y5FÉñØf|E™+;¶X"`â±=%:¬È_vl3LJs ŸØÆ©’МážÚô<ÊÌBhS”Ó8æ¡Íˆ¦Ô3sÐfêâBócŽ ÚCc–‡6C§˜÷[íÚܤsd¿x@›!)“¾ý„6 ·ð,®!öÚ8©W¬Þdö=´Ù–€ì*ÚhíŽu:c'Óï¨ÝYÖ"±6ÁíÎt­¯LŽžÚ´±Ô 0k/µ;­M|I"®žÚL»˜J¼úAí.¦'ü±×ƒÚޱF5ÿxlwJs2ÆÛ…Œ:ÃÁ6h·ù¡RrþðÐf¦mì~CÏ4ÚT»'yžÌnô•Ñ(ôƒÙœ¼&ó ̦{mýE ØžÙ˜œñÂ%ׇgv£KãÃæƒìÆ mäl¼£z·+ý´ú8©lIOmêYLF6Û¸·`ó Ê'm¶ôg {hs­3ÝöíÈ Ø].®YhG)i1ù€6NÀyÃb¯;µ%s| ‹šœ¾µiêL_Nj‹m¥qŸP{Ò@ Ö++-¶¹¹4ÜOX1àÄ6~ä*£pbr!ß;Åv’4аvž¶ù5Œi=NlóRöÔfþn¼»¢ëÇc›ù0èµ£Ç=Çmf Æo^K· Z1Ô|¶™ï5ë{ýŠR-8©†®+Õs›·ˆ¤×ÃÛxm™‘Óã<â’èÁͼyÞ~“ÜCSâw]ܤÙÀå·¤Ü,äRèÖsµYps çqÆô༟à绘r=¸ÙO¬%Ü•æŽnºpážQËyÜ&ªq¯E_ž#77r%ÊuÃrèFOh¾Žš[Ý£m²ø“JžC7¶ºŽ=ijån¦Ê÷¨/Ý¡{HXý¼bR•ÝŒ2Çi"‡G)+¥óxzíDé<¦õ×Ì6Ü?+Ï9ø<ÐunÓÊ„o—Bæßüõÿòw¯yº':”ˆhËÓfâL —Àµˆ3fÆ6=ÊÇ?~œâ™ÁwA‹Ÿçßüß¿û§ó×ÿó÷õ{ýDí˜yÝÊ¡íƒvm?0j\ „L_å3êûþþwÿôCCþN/+7jºUñ pÙGmÛ~`ÔƒHï,ãP~Ôÿþýëé?mÌgèÎâuqc¶m?à2Iy¼äð£>üc×›z}Õiâóç¿c¥¨·ÛÄ3ºWž#yßð_bHÄ5´í³×½¢”­aÙ7ŸNøÏ¾ vÔ:õþ£×'ü˜¯.¸Ž.õdÍÒ}3ý‹›î›Î=WÖrÍ4/´?'g »Žd¼òFoJ¼¾ŽÝˆª${.wð¹µý³¿Äª]ŸŠ­_aÖ®‡WÅÖ‰¯°k×ݯÂ÷á +¬´Ç)UÙk ¯ ¡&ÆõBèžýB(Ø„Ðwâ „ð1d!ôø!Èy!t}ø!ïŠÖ2íM—ãR_ó®Øžò–wÅö˜·¼+ücÞò®ðOyû¢SVÊ,¥´ø+îÅ:WüËkÎÀj—”„R.‰ê^œÝdnÿ!ˆ½˜V:t7õi@£Sø²ÞWè#&uýn»ƒÅ q¨þ<¦EÅN¼‚.¨ .@%•$&¤¾]c[Är»¬—½ûœ²©IÁoýæ0^Œ¸.¬°*ö?´1ZÐbŒ/j]ÑnµÁŒBï!kr€HKš‰k  -]½é]*À]+v8º¾Ô¡ßbߺ~bCKOF©8¸ü/ÄÛ&÷ÑuêÆæÁ¤t§Fñ‚º…X¢‚˜žPËûJjÁ'SøüÉIA9­VX" êU0Ò!k>BL®0¼ŽƒŽ\ù‘ajé&ƒÅ—Š‰Ï­F1áKQø´¼0* Ðb%ÍKù‘b jf¸NÖÆþÑ)Lûñ>FºõOË)fë3ÆcÉÑÑ*+m6ÆGª<pØ?a<1/j'ñŒ×Àt©œq0ž6³žsÆÁx:r$ÖóÆÏ‰FÚÆb;ÏB À O98ÁΪË'ãeGǼ– Žñ<éeúB¤t2žî‚ŒYV§‡ñ“ MWõ€¼dö¬·;ÚùYYØ“›xB^Ù“?dƒ=cÁ¥vEúô zŸ,?\>ýø I½Vˆýñvùžl ŸU*æb.-è§8ÍŽ¯Ì‘ž_ÁÆÐ®o8ÐóY,Ã2æ'çyú.ªgì'çy©$ˆùÈJOúÁ:Â Š®'è!F8Ͷy½ ÏyæÓXýäü¤"xØ/ÑpœçžËòNZàbýd ¼Ã\>= SHûýÐ3‹ öÞ¦¹±6Ð3'=ŽW`ö' ç_FõštËyæ‘oývAß8Ïí<ë9Ñs^ŽL ¥ðœgЇÀÊRŸqž.F‰I…ó'¤"ñX“%Ÿ¤§3s‡õòÉ‘žµÊqøÂ­#ýdI—'û×÷[…M¶ü$Å¢µ¨ÿtMZõO/Ù„?yú+ªÕk?™±½ñœ”5ÿv_Û>ª¢Î•ê/ua˜bÆWüGïwÁY§}ûh¬‚ª_0 WüGŸtáW5¶ô²–vrm¢’€¥w° û©KÿSí¿ø¸‰cà6óÏzÝþûˆö¢Ã+…x/"¼½fíxÚrxF8óö‘Yåx²m{¯ ™%XÚÀ‘3ä½ ®íÅ.°JSe‘É>.ض» GàžxÐ=º`ÛÞò°¸üžðɽðù'¿/|ìÂ&|® _ |Ò/|¾ ï ŸtÁ ŸïÂgÂ÷뻆¦T©‚ÍŒ0¨8’Üãþ½þ!MêËv–>;üfÍCêŸô<"£°¨Þ¿¢AÙ?EC‰™¦ù— ½ñO ])#Á<çëçŒÖâªFKQô®j¿ ô 祭û­1-¯®ØC¦iÔD[Ãâ]8•ÙOóyÃÿTíÊ_ýå_ÿÍßþÝ_üÃ?ÿáwGŸ¬ûÜ/Îóñø•ï‹»nbµÉÉÌãÑÙ&QÿO¿ÿÝ{¡þ¢@½+Ž•ÊR£ô·Ë²!“Ö‘eáZ7Å랇:Æ0°Bp•ºäl•ˆ<Þh™Å¹Hb²U%#Ã(Yä«á*Œû<^âÒÌLÍê8¦ŸYç.õR.›ç4Y3ƒ0hÒl’°í.8–Kµy7iÌx¢2kÊå^~Sô;WÁ1ˆcéBÔUZO:‘Ì#š++)¦z£Ù…ÁGâLJb¡µ+¦!S >ÒÍ“ÑM³†¡¹O™p²³+§ï\!6Ì‚©Zdfd¼+äef›dÀèèú5“ÔQºÙj»Ê‚2EòS"/3é,1iÆ”MVG¶UùPÓ9\C€xºSã.ÐDû‘˜±”}Èç’Æô»pb¿g®²«–ŠæsœÚ¥ú™j:âµ2ðìX5Åñ2I`%àAMژБ_H¸øCæ(;©™âx™C/évJÍ$tÌ©³ _Ÿš)­âx™¹Î3'9[S7 3Ñ1ך²8u“Ï13‘(å»^m¦6ž|-OÈ…èŸÓ0ùs‚å%hª×4Lm<¶Å¸Du˜„ŽlTˆˆ1ÍUÇ>BGÎ"Ôó­!:)ÝL“€¼.3èú.åÈdªeùæ`Ò9f¦æ§v+‹A#3»ã-§¬ƒºN9šlŽüÅÚž_dfÌGL©îÌ’‘µJ›IçÈ—£¹#Äš™¢õ®äÈïij Ìɤs¤DÆÂ je¦[•™¸Òx9ðqúæ]Ê‘0°€\¬#¾Î»zúÈRõ‘V¬^\èS’ð'9ÈPÑÎ3ÛwyÇ}¯aÙã:ñ¨®+~ÇuÉDÃ=lëL%÷#°ž×R3±'êF³U¿áãX]Ûϳ ”¶šÞŘãa]™€þ™FëÊD÷8hŽéÖ,ʽXgi]°'²~nÚžÖ•W‹È­´¸Æ÷b{ÖµÇ5볯=Øãšö¢u¶ó¸få™5û×[ÖÛ‰êñåq]•¹ò\·Ñz\3…;Cû•1‚kt!¨Cuú^\§}Ø›3v¿…ƒØ´ãÿÌ›«¹?PðÄÎL#oðÄΙ°*SSŒxb'÷ö6bÙˆk“ì*±-0±ñf¯Î}Ìý„ÍÄ+©Ü/ïÀ¶=Aùö`!‰{¯õ'ì*‰'ÎÃufíÝ8Õ]Þ®ÿݳݸÃ5HkW߈ݘbëٱ韭ߟ±çÉ€5ÍUáÏØtMîØ>ÔÞéÏØ´‚-Q߈ͪ"ëÜèˆ ‰J8~ßÇ[Ol)ÉñlÃJìž~þŒÝ èZÖ9ÚSŸJ_¤·ÄÆü§¶íˆÍÜh*¬'±ó² _žØÌ鵰片»L9pì.ŽÜt;Jφܒ“emVŽÜœ²¥ ô䯮ˆ_|åÈM—Ž^h·dÅÀA›pù¡Ômz¬'yh9+_g ÚÕ¾¸ ÚÉ‘¶36},žÃŸ‡6^ÎLÏ$ogì&•ï[ævÆ.ÖÍÚC[|\ž›µ@»1¯Þ3So:õxhs±_'¹C+‚mu­ýí˜=ívµA›5ò Ê Ú¸C—yŸ¬h›‹‡6® ̾õÀ×B3ݧº…6Sâ­Sƒ;fóBfÁl ]RvKÄB»Ð}ϾUþ¬Ý˜êA›?kó&Š>;jÛv¹µY‚i¬{‚wµ¢¹{°Ñó›þ¨›DZŠ wÔ—„çŽäÚ,bN¤Ü¹îVÙü4+OŽC6o@\~÷%B-7„µ¢²ÙÆTR×}Ý!›m8Ç>·‹ì"/}ét,²ÙftÙr3·‹ì"yðêÒod³Í¬‡l¶9Ôdó4Æ|¶ƒl^r†™G‹lj!/×Vk‘Í!TœZOG‡l~Ïh’·ÙfÞ›å¶V݉]$?Ͻ%6Ûzê…8bó¶h.9ŽØïhÛ ±‹ä/\GwKì"ÈŸ›ßÃs—æÃ›“\ÓºFXb³-ñÆ¥IWÄÎ&=üønß:L©q°P› ÿ¤¶pñ |Ñ7ç³§¿b›·“ÈÛÏ)U]PdÐm½îSÃᛳ}ôzÜuÚ·^÷ÍqC¾Þ|ø“}sód¡ ûøxÙ5'ôƒA@ôÛýóñÌY+v±á•Ø’!Ë@fºNbtÙKK3ÅLyåë¦^š)f*×aÓ—&93/]'ó&¬Ý¡ôÍû¥-•XéÎûeÚ›zéêý¢šòË×§ çý’¨»¹­keXͼ¸à<¦Š2œ÷KÁ» ,y/R9÷Kc¤ð}W/Ó¹¿8ÓH™›û‹yq58÷—D½Î=‘58÷—ø—Q Îý%[ÃzÖÿ¥Z‡­fžùR+m¦œ)}‚ŒÕª&SÎTÜ9°\‚v$Yå|r¶æ {â±²j.ŸždçÃþëqyÕ3eS^šÌš7˜±Ôpµ˜z¦ô0™K W‹UÎÓƒg­ÇZ6@êVÕjê™rËO§V«›N^k5åLùµ¶ümä¸ .›Ï[þ/·%ÖÎmI‹ò(Ã7nOÙJ/Íç6%}™G<·©ƒ^ }ÇmþCK¶è¬©ÅꋸE3™¥™·ÄÓ? [n1B<ÞÜ4k-Ùóàî ìÒÑypwÚ·Jn:L,Á´à=èrrpàæÔÚÝÓƒ{Z‡n¾Êx+dvpOçiåÁ=¦ÉNâ¹M_ŒZô9n·rYæ<²X¾<;<²צªÈfšêÚn¯IlþòrMòÈnžÿÙ•«ÇeñĦgÌÏãK¦ŽØx{u?<±1æ²L€žØh kÝxb‹ë°Á²õ‚ÉÖºï‰ÍšPË<ã‘MwÝo§lÖ ZnjžÙâ$ûØÀ=³!It¹¼.OžÙ5Ђõl:–Ù,‡eö#Çlve>~žÙ8¸cÏlM‡Çjí™Ã‰ÙȲi9\§9ÏìÒ@²GÆ=³%Fû± ³éO³|a_´§žÇí[4lcí±'ylguÚzÚ,¶½¿²Çv.Ö×ÇcÛÛS=¶éF¼lùÛôbýÖa[¦éÆžÊMuùñ8lÓhäÁa»D.¡Ç¬ç°M³¯ñbpØæÎÓÍn¶a{.ÿ=mnJåq%wØæÑ÷õ9S;ßÅŽYyXe©Í$ó5Km¶]ëõÀ62-0‡mŽÎÎJwVÕä¾×7«ªñQvS,ârqì‹ëòäwì–‹Þx‘c7Û¢‘J oÎ1^ù³ÅMgUí6ÀÁ›Ö9cuðæ÷X(û´ƒ7Çg¬¸Þ|ö”kà-Þ^ó‘ oÙùãc9uðcs}<ϼi¯7n‘Þj}îÍÞ|^\žÞb©]ÞlÞ|œÝ²³­Ž¦Õ¹Þl³W†âl«Ã¼y½ŸË>ïè-¶m³ÇTg[mn–Þ³6•pã^òݶÕoÛB·u“Rk[ß´­~öô7T™÷8~2c{å9]Mˆº Û>zݰ‰£´\,I€{nýG¯wÁ Y§}ûèuÛªòõæýG?b[Å•‘åéÀ—ßÎ{P@ÀË\ús²®®5»èðÊ+ɰáÅ5kÆÓV ùwa´¸ü“ß7p± ›Ëuá \Òoàò]xßÀ%]ð.ß…÷­«Vȿĺ*ôÂçŸü¾ð± ›ð¹.|ðI¼ðù.¼/|Ò/|¾ ?d]Í,ãÇDá¬ö–uÕ?ä%ëªÈ[ÖUÿ”—¬«î!¯XW™õ‘‰K`þŠyW6c\ýÃy­\µ(w}H9uÞæ`69m/¨OTuKu%Ĥ֯x·ä”ôÉjmZvJúnµ=-×UÑ”º‹ jËVÛªUñ5ªQ®‚Të¶ü\¹1X££§ÒaY^Õ(áÊ He»Ñè´ê¬«¸”ÆG·Üj]Måy+æ®Õ±*š2ˆ‡þ¾×å²5g]mÖ¢ÖZ]Y0­ËÝš5¯öhó)4üõÎI#ðxò[·ÆÕælU­UДjV£)nÃêè1Tf„¸l‰m8%}úhK! Åg•|ŒÏó¦ÕÔã•-EÃB‹Kñ)ìoMD›¢©/ÙNq§å–X‰ \»PVSÏø±ñ¨œ:µ(·Ä2É‚ 3ìѪê)jKÒãö´Œ;Õ(·ÄæÜ[p‹^OVUŸ]8d§å–XV•˜®'«©§‘z¹×wjQ‰-$Ô³ûõìTõ«e /[U=~s® å^œªž•Ó¥t/NUÏÜO4X/VUŸ¹BÒ(€ÙS–ª>G7†jTõ™µ»Ÿ5Þ«SÕG›ñ¢7«ªÇLs —±­7ûÔ­©7§ª/àÆc0éÝéê“=Lônuõ“‘²nuõ¹Ø€žY²Ôæ¼fñ-ûê†î¶’›ìèÆë0Vn&(Oè–G7^ÑõztÓ´a8äЩ4nºO,é³è–€¢ðÄ98tSÕhBƒºU“úX¿ºÕ€ðÄR9ts÷ÉOÞGnêéÍštäíøRý9rK,ÕJâÈ-I–Ø’›ZOâÐͶ9o!‡nÙ—Õá[‘V  C·$ó1ûµE7s¹xr3ÓþRƒ{rÓʼ<%÷,ÖüëÉ6³‘yr3ïn~tñžÜà ¥'÷¸by<³ñ ³yfa_˜g6Ú8¹—\9fj}±<³™1w<³G´ <³Y0kQzf³ÞòIkžg å­™ûs- Ok,É­yZÁFŽÖÌ‘´–µ§5ÉæéiͲžÖmÚ£Ž§uköèiÝ™¥ûѱ{Z÷`­§žÖ|øŠeZs6êc÷}ѲzûñÙ9€]¬U»:ó¢v¤¯hTlœ¢ªmsÀæ!ö±7y`çösgmûMpµ6ö€´ =°™žl9½x`Ó[n¾{`c¬˜„Ï]m¶ìÔ¬H{`'¹‘=$tÀf©•¤Ë›;Üxü&=°1+&wжd´{NØò¶ò:‡[`3!ñ:ëy`çøq!¥ìÄf[hÏ•À»H§çL£À–i\ÁèØìƒ¥¶üØò®uÀæŽoòL8`K^‹å–ç°M~6KÜb[^ÛòàrØæošðX‹m¦ØX{›£6…d„ö)µ‹Ô}[,µÔ¦s}š¥¶änZÜŽÝòFŸô_ÝrDZñæÝ”Is¥tè–IIñ³ƒ6›–Š#7`.ÅŽÜE”F‹À–Ür?_þSŽÜ<½¼Ó¹y"3}Gn¶F9rëd>«Q4¤²šiýn«êI¤ÂmK?ªƒíoÚU?}þZÌ{ ?Ùѽò 1wËêöÑëfÍYênYÝ>z½ nÈ:ïÛG¯[Vݵ ÛG?bY­­2A¾4Ã|ٲʺXð½bù.+îÛ–ÕµjÞ©òü¬¡·W­Qý Ûª<Л·ü“ß7o qóuæ-×…/0oI¼yËwá}ó–tÁ›·|Þ·­1ÇÑâ+Œ«úD/~þÑ_PÒ€}ØäÏõá+jH¼ú>|AQ郗@߇2°ÒÒÉœ³›ìù÷ÅÃ1›‰W&Ïl*KV6ÏlÚ^ ncv±VYÇìä 5mÌnZ³éÀû\.<²YÝ Ô!;6·¦²îº*ÒxdKýñ…‡lF“® \ŠlöýçÎÙRwe¥×öÈf|¬é G6“;>ëË#;f›ƒÛ#›{ßÒqmÈîÖ5Ç!›mÉ Ú"›Q«†?ٜǕ…Ä![CEM›A6·a‹I‹lα¹º:dKØê2:;dË–Ò!›mÆñË!›2´"¦-±%¼s]°±åõºÃïŒæ¿´WËhù••‘‘ZŽæè]|˜ªí˜¥µœŒwáK)mÜñ÷—çõ±hŠ©E•¼*tó¼Z¢ûÓ¼£¹¬Qu ð•'Å ÆC¾È{pÛg¯Û4±ÌÔ|ÈäkO'üg_Ð ;êkò·Ï^7­úQßðŸýˆqµîQ©Õ‘zÙ¸Ú¥ZEÏ̹ÐþœŒ«vý.V¼òo<¼¿~ݘªˆF¹³¿ùÈ­j·öWظêacõø#W=¬¬['¾ÂÊU;ëÖ‰/0´Z‹(-¼*†w—Cÿì/C-0çÅÐuâ+Äð²bè;ñb(ØÄÐw⇬­ ÷ñÍ롼WÇ{{ÊKÖÖí)oY[·Ç¼dmõOyÅÚÚ#+£ÜK‹¿fnuÆÖy¯Nv Ž…Y’ëÐséãÜNî6–7¡f–*Mu_øØ©î›uŽF£3µº€É¤úo{QÍÃx2†¢±H³<ÃuÉ_:T4Zk«z‰aXs«»É£ ¿Š«¤h ¨a5A“h´W±,ÅËÖä é6‘Žh,’Çì¢b4ùÑÖ%ÙµqW›Âgpgt¥÷÷*gƒµºŽd­µhÄÏ¢?¢£IÖXSbŒÖîÚíímÎðÚm94vIgÖ0"Ñd3vŒÔ»è¤g)ßi¼çÑhm¯b³}¢=2e¯Ì%ømc¤ê*-iÙÌn;Ä(ALPƬhãRáQ­°Ýžj—[ˆ™þÏ¡‘ºˆKˆ%årµßt†Ød×c¬Ö[øÞžÜÄhdU’K”IiôÐhì’ÚLDYì´+" «×§†íP3ÖX±Žšþp·¸E™fÎ¥Ê@'œ9ÖEa£ÑÙc]$uR5êÚZ³ÕuÅHEÌ#Ê|3ËZ‹Fg’-®&gV½/£O°,„?ûˆ2Õ°Kë‹Fo–51êh³~Ñß>jtS*H©$çhC•ÐH;Ï-ÉìŽa~Àšg}ÁÒ–ЧQ ©¼Þ’ ±6iaÑØ¥Œ”Jr6ËÛ•š¹°ÃòkÕµÈ?å(OÈ3³íbΆz—ÚsC=Äj,ýÙ†ú\l‚È õ4½'ÛØ·l•+ZΡžH_FÉ õ©8 9ÔKlÈ2‹yԋ꿚aZÔK¸«QZÔK½'tÄ“^’,›ýÌ‘^âd°:ÒÓ iBIé5ÃáêI/ºð”éI/ÅûSp#½™»Ý$<è¥ÜʽéA/¦`ÛhAÏoö`¶ z©Æ·ò¿Þ ÇK³eÛ襠۲5zÐK•¸•ÔƒžÇSuу^¬¾FèiöѰʂžÕ ¹9c@»*»;ÄKFèÎìOcvŽŸ#žC0¥Œ7ÄÏ¢>Éü7Ä7gÃÙ7?óaGxÆÄ­<á§+þ°~6“ìÙ~2%v\/Ñž¦ØåÞ±~fë´že™lxÂû Åá'N]}mržðµY¶ðÓe¤WÂÓæ½Fðfm×ó$ÅÌŒgéÊY»1^’'?¦Ûñ\aÙÊã‡-Þ°1ž™4VxûÆxn¦Ô·c|6™óÆx&°0‹À3^ì{ëÌáÏjà Ó3¾Ù|ä%ëâú¦§| 6ÿÊNùá~ÖSž9†£%¹5òV[3x£<]¿Î“< ÃÆçÅó]61{âr€¶TËÆwF+¶;¬xÃ;v8ãu´á=V|Ñ»°)®°Ñ9¥—9y£»¨Üè.Ƶ¼=Ý™Gi%tßèŽí´šÓ§;S¯üëÞY h¹xÆ'çñ°13jqã!7T[ÈsJ§½ŽYÈK†l»×ZÈKÖ]s+w”s7§¥¼48ÊËÔÚýxz³o_y«=楄y¤Ã<;kœà<æÅ{Îì:óbôþ¹ƒ¼XË´:ÌK"C+‡yñ [~ÁO s,ø~kp¯ßZ'ªY^–ÒB³à)VýÓK–àOžþŠRõÇOflo<'eÍÍÛ×À¶ÚŠšy© ÃÔ=½ºà?z¿ vÈ:íþ#Ý«Ô ä fáê‚ûè³.üªªT–CxëÔã‚Hh7Ü»OúŸjüÅÇÜð@Ž!{êû¨×m¿`/6¼R®óâÁÛ+ÖŽGóÕÎýø]5jÉÒR„¸'Û¶»€çõ½Ú»`Û^r‹âãYÛ({\Û‹]H\ϽõzŠ€k{_ÈÃòËyOøä^øü“ß>é‚>ß…÷…Oˆâ…Ïuá „Oºà…Ïwá3áûõMƒ§â6ûÎ;ͼ1öÓÌ÷ã&^÷†ù ×ì(æê_ÑöêŸÒ+Žâ¸˜·ñ™Åòç¢ß÷”%u^Âõà—Æ’ÿ´§0¸ŒŠ;É/¥¸§üÅ_Òn‹ëpüö÷ÿhžù[,ù/õo¿56åÕû/bd<¸¡7I.~ÁèPlgOþ§jWþê/ÿúoþöïþâþù¿;úÄôéêÓ/γ ñø…{ŠTs¤F‚> §/›+Zû¿ÿÝ{¿±›*ZíŠÕ‘ìÃäzkT7Ì’ ƒrgšë-à\`EñÒ‹Y5Åc0tGkBU&N LÆ¢ŠðEk…ƒöœ%[Ì´ìÉÝsj®· €@wÄÜ/MÈ4©Þjíxa¡•BD§`ŠhIeœ”F⮟‚Éõƶ´”.—ÇV-H"óíáJ î鉵¾xŽÄçhc¨Nn%]mšê ÷÷,mèHÄý¹H›æzÓݳP™Y2NÝœ±¤¥=qËåá»0¾ãÌ—Ò*¢U ƒØ(:.Œe#°ÎP¬xïY”Q)›"ZlK¥Hy–6“ëmq*CÈšë-à2¢ßü•Ù,QtUX'xAâ_€ ½h9K}=3†&–¯ µÉЦz›Og[¤Á¥éK¨&Õ«(á`ÏC2¼jŠhÕœ™-ñ–Ì÷ûP讟.!šV-ù1æeT%Vj&׿—/{º´™"Z•Qìo½ˆ "u“ë­Šl)úJ»ÉõÆïõˆ%2$pꦈ–|=C,Íi˜\oü^ÄÚI)I?‡)¢U%Ô6•<´ŸÃd{c[+9_Qi®"Z5÷†ï`òd§IöÆùÇBB'õ'™3ï.û&SVZ Ú†¥)º)ñÖááb8¯6“íMÞÍ£™ÌŒ¸–ñ½¦kRr4¹Þä%”ÒZ•——£˜?oe°g*"¡l3ÙÞä{¡Q†8a,;ï²oÒ6Ê€ÐJ/“ÉöÆ :BÕV¤Íd{«Ì‰Ý™Üžïœ6êyW}ã×JÆÔ5Q‚K=æ%°‰Á«6¥mˆ:êX<°à‰ùÖ1*Ìó›¿¶'—"^¡¶kƒØg¼5Õä:lS„ËÔfb±M,ÎijŲØf.O¼\ªØ– ³—x†,·Nt¯òÇmæŶƒŸ;¶ó—Š%žvl³ Lk¹ö²c[Fh—ªøµØnTÆ|%Ñk:l3ã(¸ŒW8ÒŽm:óa¡£¢¶ØnL瘟³ÃvcòŠ6'±†8l·0ÑgLsËÃvã\í±ÍE±†fn;¶y¨P¹ë¸]¡_©UÏ·'.e̤GnÏm‰ÉéØÿÛm©]ˆ6-v}a{Ðì˜C)åÄö`}õÖ1]ãÀ6SaD쨲¢<µ%çcj˜ÊqP»ÓAn âUâ©£DÕYj7 OmŒÇ C§ÊQ›©o!ËiˆöÌS»RÞÇñ 6ý?’ìÑå v¥‡"PÕUµñ8¦MÔ@žÚøžž¡b>¨ÍâE~¿ìØflŽP±Ô¦Ñš.ä9jãÄÓ¨ÇOm–´Åk‰Q1ê¨ÉÄp[qðЦÙ3l²R=´ ù…cÒÝØS»p LEÌ;µYï K™kà 6×f S¿©]2ކC6£ïÅöwšŠ¹!ÂdÃÊι;ÝK ¶1Ÿn¦£ÆûéŽ7vEÞŒc rcAœÆìº8rWVw–ƒóqâf>ʼn…ŒóT9È­‘ÖøV9Ñie˜2ÖsÜÑÍ*£Ð1!žèffìô, ‡îÄsH ÷ãºi•£Ü ¶èŒ¡ô(kÜ“9Ÿ Q·ø÷˜x{8”êÞãÈÍ3<–Y¼¶3GnÑ"=Ù¹uäò®|Gn9eá~ÒÛqâæ¬$ìL÷ÉÙ’›»OÀÚºV¬#7g,Ëõ墺A7–®õTtsΖèPßÑݘ9I™âÝœbüC\(®ïvã”úùC£8<»§.xHÌ8Ø=y‡Öë¤nÙ-iôã >Ïî!6`\¯õ bÙÍ3JÇ ­•ËnNVÞÏut^ìæ.DõëìØÍãKÄ/-»Y‡œ´FßÙÝÜZ1¡5ìn¬q› ±Õ³›3ni‡w£g˜Ûëo^Ô±KÆú9à>G,¥Ö?9r3´}Tˆj?à=™ä'àÔveå¡:¶ƒÝƒ ¢\½‹{vVp,ëÑÁ±›²Yq;]ö]Q¥ÒcºDòM7Çî*ŽÞ8&¶yžºé„ L+'»™›wÏÑÔÁϱ›‰²p¼¡ÆÁn3¡±äÆi¶v|_o÷žÜâˆp‚ý 7½˜—Ç…'·d⮚ž},¹q.„jh‡®„_y[ê¹EÚû2Or³žr~.äŽÜ<ÿ§òuD³‡nÖ1ÉT$ª’&~¸øˆ_«·„3‚µGnú’à©3”ãÈ ±cÚo.êÜ‘Çø{Î'¸ ŽðxÓlàr¬NŸ¨Jø›x·¼pÄÜ—çõ|³ÀàÙŠ=¸±p¨†£àFðkýpóNÐUÕ3Xps"q£ ¯ïàn"±à,9vpó7é>ÔT€,·I6^ÃÔýÅq›nªó¯<ñÜ–â:õÙŒ-·±õ ü!…yb»âõbÙ6½ÏíØQNlg*°óu@Ù°]èÂŽ½S|Í=¶qÉÅæx).µéàDójGV»…`Ù©-² ø–^ÛNm^-±_‰êlÇvSd\_ÂÁmÊV ô!Liç6‡00|¼´¼s›ï ³Ž|‡ ÏmìJ¸×DI,~q›¯¬‹Öõ‡²8P£¡¦Czõ«ÅRK9[ßôÛùìé¯˜Éæí@²ÆöÆs€sqOwNØöÑëN35~;ÛG¯wÁ Y§Ýô¾ßŽòõæÃŸì·CKÛ°wÝv2Q?¾ Þ¼.^;k½.2¼òúZ2\xÑ;ÈŒ§­4ï‚hwœpOþljyxíø.|ãÄ<¼v\¾Âqb^;¾ ï{íX!7U¦ß>y >ÿä÷…Oºà…Ïwá}á¢xás]øá“.xáó]ø!¯D=u¯ðš×Ž{Èk^;þ)oyíø§¼åµãŸòC^;¿âƒ;îÛ1¤žÆ÷¸Äüç]bÀç\bðò*î„ÔÐ3dйÄ$&€,ňÁ°®ÍhÃz ¹&i3åéƒ/ъ»ã:ž U2ùÅ£è´.¬±Uá²%His>14·Õ’äæÏ$çƒ{´hœ¤Í¹Äàx† o{Òf\bpîÅm¿ •²©ªheÄ{t,Òø˜ªDõñØV%»ôcª*¸7ãæ8Šþ˜ªô ßwÜìku'ë¸/—Œ àÊš%A!ÃäŒK @å²·Kxõˆ©ubJł°<ã3©ßªEoþ…E—ƒT Îc[3ÕéZAg1ŽVÚLõCq‰éMŒíÒFuÅãa …¨—wÞKÌÖ¶¹ÄÄÇìZúí3x UzÎ!†v€4Z«JÎ!†/¾^ví2œ?ŒdXÍC´Ðe:wªAZ¹üLÊtî0™‘åa4Ö¹¹ÃT¬è2DXÃæÓëí#Qƒó†Áx!+øÅ&mÎ&ðûtza[ôÞ0=1tJTs5:o˜bk¡×hJrN@ ˆ8ÔdJJ/KåÕOšœ3 ͱ#FM›Q“)}Hi¬"[ò¸ìÜað#ŒâO¢°”¿^AZò ÒL¹ÊÞM4Þ0Ôë5I–"—Ó S¾ÇÖݶyÃ@¨R«b<¥ÕxÃdêºÆ¥×¿¹;>KÜZ°ŸŒ"š™Z7w˜Ù"+Kk›©}(ß›Œq)2†ž5¾/qÖwúÃlÌžx‹I”³YzM¸@§ƒÙR+¼c7ý•c6kÔÒè0ÏÙ¢G/ê °3›ÊòNÂ^ Ò2›úÝ”{‚¨×ÚÌÍJÅ|Òç9hS"Ä_mn-Ë!ÆC»I-¼UÝ,´›Ô>è"š;´©Ó‡t…©¸±Ìæ0€ÌèÂÙ8Fuòȳ±ët¦^R£ž‡6Í*øœNK´ë¬}ÀA[ÂEãĉ¢Ðf ãRn'íÊô8àÆ¬'´ñ=ì©è®(B=´ùð6Úe(ôÐÆ€bx\/j3_pH²žlS}ƒ–&‹ðÔ®âÆ4£º˜yl3c•B é¹Í¨s, Rñà6s¦ãÒâ'Ün ^àØæAßûÁmæ'Ê8é–‘nÓë@ž–n«ÓkëöƒÛx\˜ŠJ‰ç6“(WÖöÜn¬”TÙö=¸éUŒ/5ïxpãõ‘®­Ös›¹Ø!%SÝC·™^ {'&"Ü®|šÚn³j8ƒZÕRæ¹wP¹Š®í¥l1s)ü=·ivÌŒOLÏí"æ]œ:e©zncx¥%:pÇ›ÛÌÕ¤†©ïKxøÝFUî.^¦\T'º¥Ø‘Ø\ÊnÉ€…ÑõOÐ]èÕŽCºª'<ºKuÇ@‡îÂDgüÕØtÓeG·”:Ç>”ÜtªJĹx7!›Ðt¹ó9pÓÕ€ã4¤07cÍ^cֽǃ›gÕu?qäfžU7Ä’›F±~yûì䦙ªÒOãj³ä–L‡Xý:øWgVeš!USRYr7IÿÛå`ëÈÍçµe*t䦫Æ,‰^xy'·”°‡ü§)ÎØŽÜœ²,¾éâÁæÈ;àÄË ‹Ü4@C’nH9rËÀÆõ¢6tóAØlgº¶@‹n¶1];ºùnèÔž³NÈtfU¬KÀ¦%ݦ3«zi…©§6‹nΣ&e(ºé–[É¥‘wr³—ØÂ±ïδ‘›35 ™³ˆÁÛ‘›d®Å¦"Gn±5ã0ž¯MÄ’[Žij¿väf]èå´“[< n˜mè‡d,ók[tË袜•tó')uqèéߢ›óŒ Á7ÅåÐ͡ό9º1gW‡^ó}¬6è¦14èI#ïè–sbÇ·5¿C7×x /¦¾¼¦ ×ó;]Õ¬ŠtévY»\™É?©)ƒ­ošU?{úzÌ{?™±½òœVAmØöÑë6ÍžÚ*¨}Í­ÿèõ.¸!ë´ûÞ7«º!_oÞ}ôCfU:\2z ÜË/§C(.Í8¡1ùꟑau­ØÅ†W^àH† /®X3ž¶Dâ]m¶-ÿä÷m[ÒoÛò]xß¶%Âãm[® _`Û’.xÛ–ïÂû†U+ä_bX•záóO~_ø¤ ^ø|Þ>!Š>×…/>é‚>ß…2¬fVûY®„¯VÝC^3¬ú§¼eXõOy˰êŸò†a•a®¬‘‰›¢ØnÁ²Šk´³¬þῼeY•[ù]I®õÄä’£Ó£¤MÎ>ò6ÔiLã¥#Õ•$SÌ[ƒ/Ú–çæ$gÎtb£Ò~©zpç-=ELH–¶ºj2ˆýÈ%‹†²ekYe6SQÛêˆZ”+¹ ƒLB/ýÊFGeÒRÒÉžq“ÇQ‹®Ô‚µÏàp”¨"Žvié! ¹·V4D‹‘OíCæKȵÜ@ŒØzj²­ª}‚çWFz-Ój“"‰=«¬±4ø£õl kýÒ'Rɶôôø‡8m†´Ÿ=>i1%Q´½rwnݪéõòzÇ#`óZµ%wµÔÒ†SÓÓ¾‡­FM´áÔôŒ\Á[WÓ–ýJŒIõåìµò¤/Ó©é¹¶fšój««ø!uœTø^¾÷mªšž*­ûÕt*Qn‰eSñî{VM_¨¸òõ½±rbJÈ$~äþÍhÕô´zc®/·ü­š¾0ƒÁ)Ò©’x$¶0žª¤Ëà ¹6zzQæ•Û׺§ú¤Ã”¯uÄ”&«¦gáЊաº½N-Ê#±ÌÎ/¡.¢aéÙêé‹fH-EdÏVOOÝæ.jÈD/VO_˜a¹ÅË÷ «§gUø>SÓK›3°fûEgˬ*KQϘ¼ü„‹õjõë¿c”Ÿ¬.p’¨Ð)ƒmÍ*êñ“+犠ïÍE®F–lˆCisŠz®ê|‡võnõ¬ÃŠÃÄØÝ»3°2ê#Š _Úœu|hÕ×ò¨eäVx;ß3°nèn|ⱓ»Ñ2®’r»JòÎ0ŠèÆ<¹ÕI…)]òAnÆù‚Ex§c'7qœÕø:wr_a˜lÛÈÝ$!G¼UŽÜŒÁì8ò^–DGn‰L¬'®C°ä6¬¡¾žÜØDÐozåƒÜS\ €“Úr3©r·FÔ‘›¡ „KêŠâÈÍP«yZhKLŽFÐ×Ú2KIQœW´ÛU‹;©íÈA[‚¢2öÎ>çmj–šÆLÐÆ~u•sª´€Ä(™Ûí&»ƒù/hóE{`)ðÔ¦¢I²¹ˆxjs1c ^]žÚR«E3Ôn™%ÚïƒÚÕsçíÝå©ïAÈÛ•¢ÚS»GëÝ⨮ÌÐî*-žÚ=s#ˆ—¢ÚS›ŽØÜgVÂ:jcZ°àÒCGm¶­’žÚÁ†³\a¢žÚ\üåÉå©Ý˜ô»àT§¿é¨Ý°.%Z¦ÇQ?9ݤ‡v£%43È/Ц¡ˆA·9ŽÚ|Ä®+2<´ÙKœ ¯ÐSm±ÊNÉ¿t@[Šú¶ .ÅÚuREx³FïšÌ[TîÃÚ‹ÖÕÛ4êv¾Ët€›ž#8N$M-åÁMÑL˜é ²܉0:]Ën¼“íƒAÙ Â,¸i7—$Ïí7Ë™cƒ/jôàf9s¬á¦é'<¸X¹”ÜÜ<×áå?nþˆúy)Š,¸ùð*ò–p³Ÿ5ô;7…w6…w•S ­¡û‘+‹¡Ïx}á8rÓ­ è½Ü?=¼Ñ„…!ìðì.jäº,¯ŽÝ~³INûÝì Þâ*±›“R–¥×±»I ûÜ/ß1e·üÎÍ! л©ò¸äoqg7ÛÐÓØçn ÌÄ+ YWœE7³/PŽñ>7À0ªù ß [Rµuê´è¦`ŠäÕMƒn&‰HëŽáÐÍ6œm°ZõœnÑÍCÈ O\°C·¬Lו/Á¡»I9‡D°”Ý:¨.9wè–ia‰œÊŽî&~­OܪC7×ûXŽÝM*hÑó2Ý4[B‹ˆáÆnw‰ûVíØMƒ hô »ˆe7ýpôDoÓÁnÚ¸±Ñ%&>ÚÙÍï%v¨ …c7§bU|±[ÒwDt'‡ï|Aуâ.€®_Qõ@úà%Ð÷á‡L­,ò›Å¨•&É·l­þ)¯[·Ç¼emÝó–¹u{ÌöVXd( (ö+¹ÝqzŒ­ÿýï…±b{c+­†™„pqw6!QãCr»i„É(¦"¡zÏÎŒCP—6Sÿû¡5îDO>¨™Yiû$µðí8<¨™y” ƒ~âyÒ?[ÚL5BIý6x™”ˆ›ÑV1BQ c55M>Z±QT…§„ܤ͔"¤¯â¾elHTä•q0Á¨´£²¯Ì(UÒåpŒë§QÙ³Mº%Xc˜2„´bÆ¡aºÒfªÒYfXƒõµûËtÕ$>ˆ9#Ù6W B6±W]cLÆ4%%d2Aº<Õ—åŠeÜi¸’<ù“Å—å K-¦ivg05ÙT[/%ýš©A(F­ùÔàœ1­=m„e5·ÐÅaií™{ÂpU½›±hBîžlö“U­=$cºmE“EW +‹$>y­f2õi!Ãeã®0s°¡¬ôͦ¾_nîSãBïPÖÁ¥V® ÚÉ{ÿ#°Ìêá3K°‘¬® ò,«ò DÏñçd•ÄÉ*¥YMÝAÈj›ÒVl(+«Lv¬sCÞV$+ƒ s¸ȧ(¤ŸHÖÁ2~Xä"ʳÊ:rÎf*Êã° bMpéL‰MBY9)c¨²eöb"Y¥jôUmpönYUe¯†ð9‚cåte†KH™¿Ó]ÓÖÝ49>´q¬,ðNÚh™æ 6Ž•:æ'ñ3^„5³R×—â­2%½õ#­XoX=¬Øø(g(-Mtüßu0ùN;ëAíZd¦ Û©Ýš©Oì¡Í:âK]ï MÝsjØ¡Í8ÖÑÚ²ÏA[ò¢æ‡%Út’ÄzWòS m†X0¥õ•ÚA[¬‘š¡y;´™‚¯òv™pЦ•?x¹‰8hÓð¯g}‡6óšÚS¬ŸãŠVwÐÖôè§Ö¹tÐf¸GÅóèæ³A›CÇ^p*³%â©ä;µ³‡öÈ©H•Ýy@›C]Y~µ9›õŠDÚ©-Iû®H—Úb¿y^i<µ;óâJ¢Fù‹ÚCó"¦z0{rcÇÿ?E}î™-5G‹X˜fO)Ý~UñÈ–bÚCnyG6ßg®å¶Ó{dS1ŒïÜÄsÌžñLš©Ó!›Éê‚t ›Élq†d½ÕÙøÅ2Ð)¶lF-ÆÔTñç‘ͺÇëÔâ‘Íœõ­b‹ˆGöä=-Üùö=²'S·‡Ûî‘=h™¾ãÞ²+7ˆ&vƒƒÙƒ¥„ÙU¼&©•r0»O>òÔÊ8žÙhã‘e¨Ï€g6˜‡ƒ#+ ǃٽ|\>Où`63ݯz(ÊìNg³ÇiìE3ë†mÍQÜ.5“Ãvg¾Îc½Òî+·y•+ Ë_+8r7É·\°óëé×’[‚9qªšqÛ‘[v~,ùû`fÑÍAÇ^ð½è–¦~ÕÛqè–ì³…Írrè–c‘¬3,»9ý˜úûÕ8v‹ãèÇi[vià ×µã´-ñ»­âGuk´ìæè2 }e2rì–CA–¼Â}g·Ç ¼µÝ<@€èWwtKšè’ï\ð–Ý\sº|»y(˜ÏÁ̱›S™€-ºùìì–²:m0‹ÍqÞ–4äx^UßÙ-µl°ËÔp ›M_¹RÊ8tKòoôñª ¢zJt!%Íý}Çí«¨w¨™S$NͬrÕÓºÞü¯öþ´ï(4ëci]|åI1¨E1á`vnûìuCgÄLlŠgÉ»þ³/è„õ5ùþ³÷í­~Ôw'Üg?dqÅ14`‚üÔû»מyôì¹à.ÔþŒ,®võ.R¼òo8¼¿zݘªdŒ-wá7¹ÕúöÏþ ÃW= ¯['¾ÂòUÓ«ïÄ—˜¾êa|Ý:ñÖW+ðQr’‰ø›b¨Þ}^ ý³¿@ UiáÅÐwâ ÄP]Ô½ºN|…J'61ôø!lãU‘æ‰Ê{Õ¿ýS^3ÁnyË»=æ-ìö˜7L°˜œ,i‡JiñWl°ÅÚ`ÿå½êÚú¦O®ØŸyWÁË}¸ÅQ×%J\HSÒFÌ=kc^©øÒ¬‡h±z!ܤS‘’Dzwb´‹Üþ¢ÞÇX9éʞŊ.’ÞŒÅ?¥´Ý”pŒKaÖv~Ä©›ÔËÒ0†XÑì=j´S}ˆ¾zTõ!<¨ØÑGkŠŒ¬Âã¯òƒaZ[lgÇÆä›Q‹¤9S0*¶ŠX‡IÍKz6ÎÉÀŒšôŒc¸Õì7–Xì˜^ñàEcñÁ¯ƒº2m¡‘Y ©âeoYs3?̈VþÝ[𺚶Y“,ÓM2ƒ³ Gc—lgê &àt«7b¤Fg]6·{htvÙþÑY4Pë‰Óöc ³¸P31´†SĘÅs~Jü Á¸ËßÕ-™_WRžI"* ÑÄøYRYEËÏÊí?¥æd)Ø9ðѸ¦)ooA–¨U\Šª*ÐhUý¬³Y(œz 5øXXŽ€F–,¹dY ÖqŒKá‚Æ.™ÏT– ^òMM¦¸9-|fÑãh£QøK渋Ԣ­Ss«¢L 'ܯì¹è„Uù³ð{ÅÚÒˆQ4Z?ûªÞû*íG”1W qY‘Z™G”%œ‘E*uQÆaõþŒDM èÊW£³Õ2¹ØãÀJ_óeI §Æy5‹—^“ºÆ£Í*ÿ%ˆu„©†jkúR*Êø&µëWù/®tþe÷Ð5u˜)8£íä.s'k¥–Ç[m)Wõx4Rev‹2ssϬá—hêRbJ™)½§Ô6Ž®‡fýYùÍôÃ'ç1IL€žNÎCĘ5–yeOÎ3qì2¸m¨gœuìCœ˜ÔÓü_Ônr ž´ƒßˆ%½ìË]ŠŸÆô ôZºˆì@ÏÒi+˪½››ýεàAOhÂmÿJgçAOõ"MWZjzêÊÓ Rñ §ŸUÓá9/¦€Ë’yp^úSŸÜÇŽóT âLÔ¯"âžó¢]½ìªç¥0Úú½CXÎ3¸#ZÛƒóRæ.îãà<Ñ&.KçiË-¹b“Ôé8ÏF,+ZLçâ¼TÅB»ªÜy΋…ûµÚz=楪^M|:0/5H±Çõ«Z¥Ã| ™·†ñÄós²¤`` ø‰ùYmÑÃó“èÄXG:8ß膠è<9/š²Žçqdy°up‰“réå‰ã<ó†DM¸r~&l š¦_Ý8OS3Ç”®#‹ãüÀ&Ð4Êöä<ÿe/ã^óC«Ðæxí‘óƒ±Y¹q$'æ#CñõÛ0ßéOÒïpº ó’–ë<”ïÕ4ãþNÊ‹}£ßU 7ÎwFœÇ;ÀU6ƒl©Vñ}—Öï¶ôî¨g`Lý$6;ê©–M7Ÿ¨g¬tØ=¯S´C}ÅV–¸2ú'¨gtìxRmo¨çÁ´ÐoŽ“õ,/ FM·]ÇúÂ`à0¦Îàû"µgoÇ öô ó6wm°Ï4Ðvæ=ï'ì<8Ä~žêÅi>YwØ3Ÿ/w¡Ÿ´gnãðDWï´—"wŒ§§}–Å£Y8OõY¹Ê×n´O bÅN¡ñæí¥·„ž§zÎî}5:pÃHp©¹ž´OôéÍñA·£}"×± ådNõÜÐ{•ÄJŸÐža«xÿ˜ÂvâžµhÁ»gëu¸gb[¬å¤^:îY”}Œð¸—"«å.Bái/Õe±Æ¯Â­žö4_bs¹KM8ÚóeàÿÆåêi/Ç„™ÄÌzОVÇŽ‹vKŸœêÕ×ëQŒyÚs¾ã¨w2O{6VÐà2¯{Ús p ¿*xÚKclñxàNw9»9ÜK*åîq/Foüíªàìq¯õk¹ÜË7ï´ã^’ƒkXr9xσ ~><Ô¶¼§Kst=š8ÜKâg §ŽøëEàÐÔTþýÖa„ñÒY9¯$ù“–¤í…b«þé%Ëð'OEÍzã'3¶7ž“²¦òík`î£!þ_5Ǧa*¤^]°}EìuÚ·âÛö`7ä« þ£Oºð«ª[æîÂÎWq•¡zG*ètzµuk0>¦c<À1fùŽGíÖà¿øËøååÑß¿ÿÇ?êÁ %Ï¬Ú §É ¾ûÔ3ìOfÑüº>ùÿûéÛÿôíŸð?8ñG¨±øôJyÑ‹IoSÃŽG“ìâRÛ_Æ¡ø8bƒÚþd×öbä00´vÞÞÛöâ¦0è9pdÏqï‚k{s_’x=Àï]°mï y³£TSyOøøÀMøÜ“¿@ø¤ ^ø|Þ>!Š>×…/>…š>ß…Sød7a^ˆôÇî&<¦·‘xY ŠŠIáϽäÇ­Ðþ¹ÍºA3òíRÿ¤‡D:[ãüûFý×´t»§ðšñA›} ñ—ò' Ej)dzï–ϬÃ?cMÿ¹SÅoqA‘ÿRÇé`™¼WWì¿`:]Fu¥Ø¤Ž]¨gâÒ|5ð?U»òWù×ó·÷ÿðÏøÝÑ'+›¿|Ê©QNƒyàò[xëÊ»Oœí\¹Ýÿï÷/–ÛÝÔÿ¢s?C­¨Åå¶0L†ºÁê’×Ë+*oh†º€kã·A5öÊXÇ`ÂJ­gÕñ^å7Ñ6£èÒ&£Gû 1.<öÈ©ê&pð ÷q:•+f5N“¡®wºãƒDüàVý/&žÂb¿û’‚IQÇ„U(¯V°Îè†ÕðˆÄè8žkYZQÚž`¿5Aú•Ñ$wù§IóÓé>Ú˜CLLbò‹âä¶Â«@£ÙçñwJiÕÿêTÂh²~ͤ§c¢®ó­IÙÔÿb[&:œ)›ôtì‰FÑ+ƒ!¬’NÔåt•p=á°ãsÑSlˆñÎy™¤ªð3Uú”x…h¤¢¹éfíYzg‹AµX©šÜtüMš€f¥Hª¦òÛpÉy¦ŸõCíw3E¿øŒº$#5“–ŽmF3™š)úÅ~c‘Ô+’0u“—Žmx¸'ŠôÔM^:¶…9BJEÛLÑ/‹gÖ‡IKÇŒk]bc$œ- Sô‹#«Z¡Rº2L^:iëênÁ¶¹ª~Qî1IlL“–މÚĕ鉥mˆzLwUÎ<£Ô¤‰혔©c[YbšƒIKÇÑ™u±^žeÍwÓâ=ºMVº.IJŸdFû­ H¤&ÈˬMvW©ÓÉ|h9™¬t’ÚnäÛo/'“•Žm6+þòT•³Ø µJÛV oÎ&)›êr̬M¶Du0ïä3)Eÿyîs^³s;VcC$r°º³Þ\,àH9XÍä wV²ÕŒkRÕ³¬f¬œa§c5^ HRê‡ëX=“ÖÒ£ïXÍ6*ŠŸ6Ãj¶E#φÕàJkKq¬ÌÿgS¢ø¬9¼šÔ­êÔ,­Ó9õ›†Ö#PÙÍRñÔò´ž`èŒ÷L;Z£Kö-8\PÝoZf3ÏIÁ¯\GtÏìÎú¼b=˜=Rg„ßÃLËìÁJŽí)Fé˜=$ЊV±w:fZÖÞ'È4ÉÏZtÜŒ»Îáy£–Ûƒfеö·‡èª×«ñÜfá¾5Ç–ÛàüJ~·qÛƒ½g8póÝLFýëMÌ{:—<ÕÌ=¸niöL îÙ@\ÈP '¸gqÓïÀ=™Ôö9èxp3‹ñÏãQ¬¸GûZ ¸;ÓßÖûm{p㬶޶7Ž%Ìg nž‚BMYâÁÝ’Å÷ mÜ›FõAƒ'wovÑyró°¶“œ¸…Íù<§ ·QX Ó0™Û¸´An£Üb¶uî³:fnc“ÞâŸÛÉÜî Ö¬¹÷nÛœ³Ã\ ·»{5îòC+¸ñvGÛîÏÝp÷Ó[?;27ÞO0¸ñ Ù´™ãdpc!UË|öÜXIñ¸} +¥"»Ä¸ùÙAí´ ¸aè9"HWÝ•“˜–U÷ÉwSÁ]Ñþf®SÜxEž3*¬àÆ^;Í_AÁŒxg¸\Á]l=ÕTî kpp@À}Áå=÷¥+¸i£¸€û’¡¸½Âü\B ·ík¼ŸenÃÆ~Å_nþLLnã5»:¥÷u{TŸ ×E·,”ÞO³ÒÛ]êA¥7L:m^ƒÒ»²ðÞ6̹þл£FǤ¢Ð¶õTz£>=í¿„Þ¨kO! ¡7làuÌ¿)ô†ž-ôFñvúM…Þ€ÁÔ ½í¿Ã…ޗÞö5*·Ð;ÉP„Þî×OÏji¡÷0ÄšUèß›0Ìô†å>î´ÒÛ‹ÿÏÕ‹ÒÛ+÷¥gŸ¢ôöõñÃx‰Wĉw¡ÒÛk¶Í©§ôFŸ…x/+½1N^Z3½½¹HÄg”Þù댅ÑMo{ŽôËy®ƒ¶®[@DÏõ[7ÄÝz9Žîtýèì[IJñXYâÚvœ'—Û(ÇÇçÂä£ì;åøæ ’^‚\ò}Û—¶;ˆä’?¿üñ?;ˆŠÿØ1[—ô½¢ ´7LØR£¼éºN”Ñ;½Pÿkú‡‚A§-S¨&bÓFŸ]OõþzÜ ÃÕÂÁg~ÃÂ1¾ù‡t/X8Æ7ÿ á ÇøæÒ!ì÷ñ$§&Ýû&N¸L>9ó “χ “O‡°ò9QtòÉ^˜|7ÔdòéþÿPB1KôCïµMþ!=Ç&ÿžd—HβË?¤'ù-ÿиslKæ`SKýWÌ9ÿØhιÆbΙ[›bÎ9ÑúóÙîe„tæFÞþ!)¢ù ö‘0çØ:éÙäç“ÚGâi¦xOFLgÚ˜g!ú|Š9§ð9'1ç¼³Îi1ç=#'6ç4T À’/jéª[ºwFÆFtÊgh„S%#¨òYçˆpÎÔ>Ò ?¡æLöœªWžÅž“qûq0±ç@å‹;]ÄŸƒÎÌ3|“Ñå2ô³Ìq¢\©$´5 æJ $}œ•ŽAiô3Wgf¨576é@ň bnlÒAP1æÜܤSVus§Îõ!û6{ä.F{ÚBHÍ]Œ: ÿ3góÁQŽ©ýçñͨ3CVy°Q§Œ/;ñ#b”ƒ:ÞwþÜå`§šÿžS¨/ ¾ö½3Bëå$ÁWÅ rŠSçàÈf9©¤;nÂeQ5ôKð«ó«¤Å©C¾§’¨$¾—PøåVÎÊÅŠ¯;"²V®h 鯠ˆl–k±êÐS€:³´©[n"çNت3BüC%±êx°îQ 5ız62›Å«CV ÓžµMñôdºeõcí sÒ.¯Ž¢{h÷qËÿ+ºQ+=$>E7rãÒÝŸÅt#¸yÐÄèFDq„ƒDÐHi¬Šî’¾ÓS¤èF›Y[ú>``t£*}š“OÑ=¼®Ò|^Ýxk…±RÈ Q´ÑK„Éí ò9miLî%ª.ävsë=ö"‚AþÏ?O±@j#©›íŽÊ¡Š ´¡ãñßh7qd ´¡ñ…ÿB™Ý³¼9›˜tNe½ëp¿ŒépfC£#kª@ÇÈm#І~—®éàhwïsU ~ÓA)̆‡Ê)³ñ÷{O˜=$à¬ÌŠQ?ïZa6ÌñJZ˜}±?U™Ò>y×Þhi†õ\ö*®»˜éרaM“¯ÐºÖÄ”Ö yæs(­+Ò̧åBiÝà(¹×’ÒMr£(¶íÇÉálCZ%ÓŒ`?­>Û.­Æj‡±í—vYÁv·IãµðîÝŒ`¿wêÄgb7þ&½ …Ý8FZ…Ýþ».ž×³çëQ]á»B?·ÀW~E‘D7ŽÙ%>¿ÀÛî˜oüB¤µ ½ýÑ Xèí>ƒXy ½ýV‡ØîáÊ,«ý_VyQ'çó¢óûÝÅݪŽîTytö!Íç:þI×¶å<íˆÍÄ}aòÑ kK5ÙŸ{˽0¹äû¶/mWyå’?¿¼~ô;*/Ú’¡æïÚ\'Âã¯èž†½Û›:oÆËµ)¹þ5eÞ€FàiËê‰à´t=5få^.J›œù¥Í‡ J›a¿Òæ“G•6 JÛ=EiÓ!ì—yy’¿"óâ„Ëä“3¿0ù|:ùtû'ŸE'Ÿ á…ÉwCM&ŸáOyQ8eÌ’÷*Ü#óê96ɼz’]2¯œe—Ì«'Ù!ófì³íï%Ûx×ÿ®ó¦KuÞýÆÊáÿ§óz‡¼§`ïHôø“Ý$bÁñ•ËLãôþðOHì‡uÅ&#ø„Rš±µFÕË>!à¦z•è_‰,¹Í½¿d¤Û ÁPQÁô©fj¡í{ãkY´‚“ãËè–µõè_‰X+m­kg·¥¯¯ÚE2¨_è¸ñ)¸P{ú¥ Ù{V¨ƒ5ƒvqÒS…‚ªYc¢×yQW£Ì$φ¨Î3eñ…Šh¼Ó¸¬ó¢‰väc4už{K÷Ç£[´“u^;É íWÒý¢:ÏŒuY?‚ø-‰ÎÛYÂjˆê<3¶"²17-‰ÌëgA,gNT#9%+µK¤‚ÆÁœv‰¼[¾úŒßc£r2™#ŒØ2ËM2Zf¹ VV¼PcF 1cßþ½è”†é5BÝm¤sÉõ‚»èý›UÉçÕ¿YY/@îm(øyI00è§©üº„Òk<_—Õ]JªòE ßYˆAymÇÌÑ\yîÍ‘¤¼F/âÐו׈Ð3í^y_1T1å5¨†å5r´B–a^#R´„×=%&óYûi~t_áõ] øÉ¼îpDá5.Rœ…×í3w™×ÃSнð—~Ñ;€x%õ|^{nWŸjð鑤¡¯B=b® ¯‘÷r§àÚE†°¯(®Q”~L‰Wpí*5ïN ³ç&´;Áµ'íFHYyÂãñ ^÷Ó¥ç~¯1M(SVxï‘FxtQJ8^{á‡o3¯‘JEú…×ýôþ/ î†úã³Â€õ"(sMÀ u{ö˜ÜPÓQ–ÉÅUú¹GâW¾’{ Ü£G‡r#Hj¿’Û¦o ¯Ÿ’Ûÿd ¥.J/ÕPrÛ\#ÃŒ’J»›ŸÚCßWí!Ê‘CÛ›Lj£Êû ÜSÔ[Á=.®—¢à¶G„i)àöî±äp7TИ¾pËZ ·›Lgáv;Ø(§Ü¶% ùƒ”ÛXkÐBT¸]½®ý¼rá6Þ®apTn£*Wd*·a2À/üY× ·«$T*·uU&ÜöõaüÂí:¸˜—rÔ©”ÛW@E¹]û7^cùµ¥„׸SµÆ[Lx]ÙñòÁu•%¾âé¼á&\ãH©ÉŠk´’4H¶Šë"U`×%qaŵK)ø)¸.™£c‚k\B8½×X/ÇK]píZ>aPpí›ÖEÅ5.X'¸v·çÔo×eÈíd\ãm×·ílþÜŒkÜ~¡2®±fùÙBÛmQåNp )™Ù×8FKNÁ5äiÚ~0²ñµ+*”²qå”Gí!Jܨ Ë/K»¶fyÊOÇ£'øÞZŽïwxþ‘ÌçBþÉW·åD}¬ò®|ô‚¶:rYå]ùè…!È%ß÷}ùh»¼+—|aùèwäÝRQ ¨œ—-Æ^y­HŒÈ­èàó¢¼[P1(tmgòMã n¡ö´ ŸOñnnЕ7^œpÙäÌ/ˆl>ÙtûE6Ÿ>*²É^Ùî,"›a¿ÂKÓÜÖ6oH¼~ÆeúÉ©ßèCácÐù§cx¡…SE' ŒáN7Ùdêþ™· Ù¹íaŠ7nݤó.'Ù$ô.gÙ¥ôêivI½ËYvh½·PЀöõöSc©÷ßÿÚ—Òë…™XêúŒÑ×îAD  §®«/I½(-?µ8ß»qeEÚ¡cgJRïÁU¨|k:…34L ѧ”^$q…¬àˆ|„³^¸Þb"!õ"€Åà{m,õ&Ží",C)½Þ†êyì{Ë,õVNÄA=¾zíÈoŒ„ézQ:=’.Ã#©÷’± ½fHhÝÓ¼Žâ½ïìGêE)¶@ÝÖ“Sn"}­£³f(½—ôHr ­³bkýÃMã N•Ð#)‰p ’ jQÆù`ÍÞ÷}æGôX¼?Œàfš¡ôVÎè¥)½ˆJ ã¸÷Ò‰zTâ{Ô•a\§ôŽt Z‘H½”53PW3¤Þú >|°Ê‹XíÌŸ9“Ê+•¤2>B彨ž?ÊÁroæ4ùQ²f†Õ(„?о!äÞΡ¦üéÐ{Ń?jæô0©j=jc½¨§p?Ð@3&ëÅ"ëh™ôÞ›âÏPZc½›)û°,ô&ËšfÆ4MÌû…ÞÄOŇ ½ô–YèMœÀ6Ð13¦ibéâ^7à3›’†×_L.ø%©Wy=RTÎ\y JÅ/¸öI<(Å5äÎö\#[)6‚k$QÁ=Áµ‹'žqÓõz×~,R*מû53Í„ÖþµYÃS`Øô„oÖw6Ó,Y!°vQ6 ë;mgú$Öøž+ÑŒiÈ£äºL{ÚT¨–Âi¤™ÓžxÑRá4oHNß3fV!N{!Ë;”ÓÈ%% 8§15ÉÙ$œ¾«bÎd6á4j<ÒëN8>TG@8$i¦ sÚûD¢½pÏ”Q$°ÆùZÔ&`X#‰úèÓI%°vsAÈlkcçŒ ¬at¨QÓD` ù—Vk7HD ^`äæòXcœüîXÛ̥⫠k$m‘Š(°öN tyÌlïx ¶2Û+OM™=7sQf£'@&ž³!ÐÓó-ÌöúÑñ ˜ím¬}oýËBï‚íÜVj÷ÎåUÛp† G± ù1ž/Ŷol¦Â­Øö–Ó–¡Ø†ºZŠmÒžÛÍ^âO ¶KåñÊí'ýBŠïò_ð-eõ|6#,ø–LfÅ·O«¹|{ýâø‰>øÎ²QS|£ÙN5R|ç“»_(¾ û¼ž|£þE˜®ߨ¨† Å7’>i,‚o¢=ŒàÛNG`Å7~›¨k¢ø¶?I "Å7 xOå\éáØ*½1»Î™…­ôÆŸŒÔ{¥7Š!G»-¡·gÓKéüçð‡(½í:Ût©)½¯!/<¦÷R ]èíåªi"1½ýòè$ôFÉsÚ“ ½Q´=ÊÝK䑇=â×Û¾ÛøM“î#úú6ïîüްµõûG°'´Y¦î¸åLçqë›x×>'Ÿ½ »Úc~+œvi‚?{e|ÕŸ›¿|¶]ýÕ«~¡ŸýŽþkÏ¡á Õb+æ¶Wÿm*xµËøuÕ7õßv"ÿ£m¤~ý5õ_&HÐjËDzµŸ rMÅ+ùæ§¾óÆS®ÝàåܯÈpå› ¼ â ®|‚u¯q囼 â-˜'ü9¼'x±5ÏÎièkßeʹߘ†>ˆeê ^˜†OÅWž†2ˆ7¦áÍ9†:ˆ?A¶ HkGÞÖ~9É&Ax9Ë.AXO³K^βC¶í¾mÍò@ýóáÌzðÿíë¿~ „²Ö¶ÚA[,0e]m[Y.´¡òR}O–¥ãX•m¯s”·µƒ¬Bó׈ÙNøðcÈ!»árš½êÜ5Žê‘D ¢ÚÁ¦™¿=D¹óè¤úÕ#:aÇ ‹Øt‡6ÒH쇪‰¨Ùºa$ G·7;˜½ö܇L…ýñv°yñ¹ùö¤SÛˆ(<¾ì:fßâóU¸yã¼OdÀÚŸµùU»œÜ=丙õy"*tbe†‡dm <½Ì^†îÊç0KUåìà­kÃСg{ŽsXì nÔgã(hY"FñÓH€µË94¸ñ}+hió™Â¢ŽÔ?;ؼÏdOް—íÅͦL8;Èbqc×»Õ}f²g<‡µÁF!rqç,L˜8X/®ÜsÑ6ïfrŽdÕ:—=yÜÏÀÙJÄäQBÛä9•+êõÍÕ‚´“Ì©Œ3F=R;(ú±t µ÷¿ÈWX·ƒôçTNÒ7Í›}¦2òˆcêÝæ‚Ä•"·µm_F²ó7Ä£Ð#µTÆ#£+2;Æ{_¿ó'ŒG !IQoOeE½Gßcr0ê½ÅhÁ]A½‡Êgdxa½=÷dWQÖ÷{RâqÅ$F<’N£ˆ÷HzD¶ñЋÉó¡ˆ_B®Âxo\õÈ•ñÐB£4®"ÞûÆ3#„G¸™Úh+á!µQÖ·~0Z+á½%{$p+áûѹ6ü‡ðÐ2¨{ Þ/!ÊÀ*á¡»‘:¯„Ç7yÝ „_Ôì…ðXrFþ˜ã7c…âbuV9JÍN•ð.(d*7èHtÛš¨ƒ ’*á1XªHª„÷h=V?Ä „w!†1!¼/èÅA„wÇ7-d†ˆ•óß•ðÞh•¦•š8&UÂ÷Cª¼(á=Wèê…÷U²bÞãG.ôS1ïái¸øÅ,¼GÿjfðöÃHLUÞ{ÙnBºðÞwðöCxï=ÆzÜpá=ra£2ÕÂ{\ 퇕÷v+e.3ïñge«(¼/]oó·–l§Êû7\‰ý°ßf$u_€yÕã5«ÀGöèÌ ðÑÔ&òñàÛ= Õ’ò¾ ñ ï½múA5ÝÓ‘Lµàýßÿö±rX‚ endstream endobj 3 0 obj 34154 endobj 4 0 obj [2 0 R] endobj 5 0 obj << /Resources 6 0 R /Type /Page /MediaBox [0 0 1080 792] /CropBox [0 0 1080 792] /BleedBox [0 0 1080 792] /TrimBox [0 0 1080 792] /Parent 7 0 R /Contents 4 0 R >> endobj 8 0 obj << /Type /Font /Subtype /Type1 /BaseFont /Helvetica /Encoding /WinAnsiEncoding >> endobj 7 0 obj << /Type /Pages /Count 1 /Kids [5 0 R ] >> endobj 9 0 obj << /Type /Catalog /Pages 7 0 R /Lang (x-unknown) >> endobj 6 0 obj << /Font << /F1 8 0 R >> /ProcSet [/PDF /ImageB /ImageC /Text] >> endobj xref 0 10 0000000000 65535 f 0000000015 00000 n 0000000145 00000 n 0000034373 00000 n 0000034394 00000 n 0000034417 00000 n 0000034847 00000 n 0000034716 00000 n 0000034611 00000 n 0000034774 00000 n trailer << /Root 9 0 R /Info 1 0 R /ID [<8F68A573F4AADBEDE04259C4443355D4> <8F68A573F4AADBEDE04259C4443355D4>] /Size 10 >> startxref 34928 %%EOF blis-0.6.1/docs/graphs/large/l3_perf_epyc_jc1ic8jr4_nt32.png000066400000000000000000003275531360743507500235150ustar00rootroot00000000000000‰PNG  IHDR¬öEYa )iCCPiccxÚ•‘gP”‡†Ï÷}Û m—¥ÃÒ›T) HYz•^E–ÞY–"bCÄDiŠ AF¥H¬ˆb!((`A³HPb0Ѝ Ü¹3qîüÈóë™wÞ9çÌŠ*’*àû¹Ø³CBÃØð ‘¼Ìt®'ü#Æx ÿJtL&V Ÿ—Î ¹ •#H Ç€•”.@Γ€ÜfÜ_>̨¿|˜ü?@¢Å}ãQßø÷¨pù‚„ؘ\¶Z¬ '’ÃÎôs±g»98°}øi± É1ßü¯Êÿ€ &Wà–¾…Ÿ/`ÿßPcC##øûï|„5ø¿ÿ€oziœEìÀßYT5@÷é'gjÇD ºîñ²øÙe8‡ÏÁá+ñÍøNü ü(~ÿ@ °šs‚+!”HØJ(%%t®† S„E"‘(CÔ%Z½‰‘D±ˆXMK)Hq¥b¤öIµKH-IËIÛIÇHKwHJ–aË8É$É”é–y&‹“Õ‘õ•Í‘=&{Cv^Ž)g%Ç“+–;+÷D•ב÷“ß*B~P~QAQÁE!]¡ZáºÂ¼"KÑN1Q±Bñ²âœCÉF)A©BéŠÒ+¶$›ËNfW±ûÙ ÊòÊ®ÊYÊ ÊCÊË*š****ÏT)ªÕXÕ Õ>Õ5%5/µ|µ6µ'êduŽz¼úõõ% M`½Ý³šÒšnšyšmšZt-[­ ­F­‡ÚmŽv’öQíû:¨Ž©N¼N­Î=]T×L7A÷¨îðü‹5©k׌ëÑô¸zÙzmz“ú,}Oýýný7ja  ¾š&6>5’0r7*0ê5úÓXǘg\küp-}­óÚk{Ö¾5Ñ5‰19fòÈ”aêeº×´Ïô‹™¹߬ÝlÎ\Í<¼Î|œÃäøpJ9·,ðö;,.Z|²4³XžµüÃJÏ*ɪÕjv溘uM리U¬#­¬…6l››ã6B[eÛHÛFÛvªvÑvÍv3\mn"÷4÷½¡=ß¾Ó~ÉÁÒa›ÃUGÌÑűØqÈIÂ)ЩÆé¹³Šsœs›ó‚‹©ËV—«®xW׃®ãn n<··ws÷mîý4ž:ž|Ï^/ÔËÝë×Äzõõ©ë»½ÁÛÍû÷3MŸ ŸŸ} ¾>¾µ¾/ýŒüòýüþ›ý[ý?Ø”< Ô Ì ì  j Z v .†„l ¹*šÚF k[Üà´áð†épÓð¢ð±šs7ÞÞ$»)yӥ͢›#7Ÿ‹ÀGG´F¬DzG6F.F¹EÕE-ðxGx¯£í¢+¢çb¬cÊcfb­cËcgã¬ãÅÍÅÛÆWÆÏ'8$Ô$¼MtM¬O\JòN:™´šœÜ‘BJ‰H¹*‘š”ÚŸ¦˜–›6œ®›^”.̰Ì8œ±À÷à7g"™3{LAº`0K+kOÖd¶MvmöÇœ œs¹â¹©¹ƒ[t¶ìÛ2“çœ÷ÃVÜVÞÖ¾|åü]ù“Û¸Û¶#Û£¶÷íPÝQ¸cz§ËÎS»(»’výR`XP^ð~wðîÞB…Â…S{\ö´‰ñ‹Æ÷Zí­ÿ÷]ÂwCûÖî«Þ÷µ8ºøN‰aIeÉJ)¯ôÎ÷FßW}¿º?vÿP™YÙ±„©ÆÚuÈëPW»¢¸âýá͇oWšTÖ¡É:"¬ò¬ê©V«>P½R_3Zk_ÛQ'_·¯néhôÑ‘cvÇÚëêKê?O8þ¨Á¥¡«Q£±òáDö‰—MAM?p~hi–m.iþr2õ¤ð”ß©þó––VùÖ²6´-«mîtøéû?:þØÓ®×ÞÐÁê(9g²Î¼ú)â§±³gûÎqεŸW?_×Éè,îBº¶t-tÇw {B{†/¸_èëµêíüYÿç“•/Ö^’¼Tv™r¹ðòꕼ+‹WÓ¯Î_‹»6Õ·¹ïéõëû}û‡nxܸuÓùæõîÀ•[Ö·.Þ¶¼}áçN÷]³»]ƒ¦ƒ¿˜þÒ9d6ÔuÏü^Ï}‹û½Ãë†/ØŽ\{àøàæC·‡wG׎=>Š~4û8ùñÛ'ÙO–ŸîœÀO?{Vù\þyã¯Ú¿vÍ„—&'_ø¿x:Å›zý[æo+Ó…/é/+g”fZfg/Î9ÏݵáÕôëô×ËóE¿‹ÿ^÷FëÍù?ìþ\Y˜~Ë»úgé;™w'ß›¼ï[ôY|þ!åÃòRñG™§>q> |þ<³œ³B\©ú¢ý¥÷«Ç׉ՔÕÕÿB,¾;E^ cHRMz&€„ú€èu0ê`:˜pœºQ<bKGDÿÿÿ ½§“ oFFss0ˆÚÿR pHYsNNÆÊ/¥tIMEã$9·-ç vpAg’Zó!%Õ€IDATxÚìý”+ù]ß ¿¯ùa36MjlLlÝ™Mª ŒÀgƒ§ä«À!±Jâ37¤÷”’/Ì}’])îMO~=¸*¹ylÜ»*ÎÉs;4Qe=á†%]ÐÇãŠ3 0º2dkèzÂÌ•MŒÝ÷Å^²ÑóGõ§T*•¤*©JU¥þ¼Îéso«¤ÒWêO}ëóûsm4À0 Ã0 Ã0 Ã0ãuy/€a†a†a†a¢`ƒ•a†a†a†)$l°2 Ã0 Ã0 Ã0…„ V†a†)1®ë²¬¼—Á0 Ã0™À+Ã0 ÔÛ¶Ñh4ò^sÅ1 Žãä½ †É–õõÃ+Ã0 Ã0 ³¦i²Ï\ XÖ×Ï}ï÷~ï÷潈«Žëº¸{÷.~ê§~ ¶mãë¿þëñe_öeþqÃ0ü‹C˜¦ I’üã–eÁ0 X–Aðö·¿ i$IòÏíº.¾þë¿Þ?ŸmÛøæoþfÿÃÄažŒ/:–·Uä•å›Y7YÊ~»ÝŽôò»®‹F£×u'› ³*‹ô•0a9å=›) «Êú¢×³l/ÁˆÉ•^¯7 þÎÏÏG²,ÎÎÎFggg#£³³3ÿ¸(Š#Y–G£ÑÈ?~zzêïv»#AF£Ñh`Ôívýc’$Z­–ÿ»ªªþ¹–y>ÃÄažŒ'‘ÿÓÓÓ€Ñùùùh4bùfŠO–²¯ªêÔûœŸŸOÉ5äÅ<™F#Y–G½^Ï?–SÞ³™²°ª¬/z=Ëvr8š3”òÕn·ý”Þ^¯Qaš&dYžH SÅÿ¿eY~ÚeY¿“—&øZA B¬õÄ}>Ã,bžŒ/:&I’ŸQ`š&Z­Ö„L²|3E&KÙ—eyêýÈC¼O0LZÌ“ÙY„å”÷l¦ ¬*ëq^ϲ 6XsFEôz=@³Ùĵk×Ðn·x) ó ´]×'~¢†É‹y2>ïà9h‚J;Ë6S&Ö-û’$AUÕ‰ó0LZ,’Y†ÙV•u¾VÒ‡ Öœ¡FJÝnççç8==…iš0 ‚ Ì5Z%Iò½6ôs|| UU¹v‰) ód|Þ1ÀSÚmÛ†®ëp]—#GL©X·ìw»]¨ª \ÿĤÎ"™e˜MaUYçk%}Ø`Íj¤D†©(Š~Z),T´íºî„°Ë²<õ~3LQ˜'ãóŽÑ@×u´Z­¼? Ã$"ÙNº®óØ&UÉ,°83ŒaÊÀª²çõL2Ø`ͪKÚÞÞF£ÑÀöö¶¯¨ˆ¢ˆn·‹F£F£Z­6‘FЉ¦i¨Õj~‡àããã¼?ÃøÌ“ñyÇrÌpt•)yɾ¢(P…—Lª,’YA iG‘˜Ò³ª¬ÇÙß™d\F£¼ÁÀ¢ ‚à§óRó$Qá8DQôSÉ‚Fi°É¥ 3Lш’ñ8Ç Ã€®ë8;;Ëû#0ÌR°ì3›Ä,™ ê,óšÓ0LYXUÖçíïL2Ø`-0Žã`{{§§§$ Žã Ñh@UUNd6J¥©ÕjhµZ~mÃl:,û Ã0 3¦)ÁƒÁ`æãÃá0ïååF0%øÚµk¨Õj~*SX¾—öm<üðÃE‘e¾à°Œ§ Ë~q`Ùf6–q¦ äa=88À`0Àáá¡ÿØp8„ªªþE$Ë2t]Ï÷›b˜%`ùf6–qfSaÙf6–q¦,äaí÷û8::šz|•J¶mã¹çžÃ`0ÀÁÁAžKe˜Ä°|3›Ë8³©°l3›Ë8S&¾8¯7¾¸¸€¦ie<ð‡è÷û¸wï`kk ;;;899ÁÞÞÞÌóýÌÏü ~ø‡o}ë[óúHKóéOo|ãñÆ7¾1ï¥$æÕW_Å#<’÷2óùÏ[[[™mÂiË7ܾ}=ôPÞ_ÝR”YN>ÿùÏ—r_yõÕWñ¯ÿõ¿Îìü¼‡á=|ý|þóŸÇ£>Šøÿaêçæý{’2ËH™÷ïïÿþïGµZÍäü¼áý{ý,³çf°îïïcggÀdþüýû÷`â"­V« óèßüæ7ãÝï~÷›F988À7P¯×ó^Jbvww'RIÊB¿ßÇ /¼ÙùÓ–ox衇Jù]å—“2î+»»»™žŸ÷ð1¼‡¯Ÿ,÷pÞ¿')»Œ”qOÙÝÝÍÌXxÿÂû÷úYfÿÎ%%øääÃá0R°ç]y,7snܸëׯ罌¥Î…e ïßå`íëp8ÄÑÑîܹy|ÞL[óÊ+¯àÙgŸÍ<ªõz•J%ïe,yèÊÄÉÉ îÞ½ëÏ®M“¬äðR?vwwqrr²Þ/,Ê('P©TJëu}饗297ïáÓð¾^p÷î]¼üòË©ž—÷ïhÊ(#@ù÷ïYÝ{W÷ïixÿ^/¤ƒ'ݿמ|rr‚­­-X–˲0 pÿþ}`gggn ÄÞívá8Ç \»Ê0 Ã0pJ0Sh¨¦+M ÃÀ;wòþh Ã0Eq¢Ž•a†a˜I8ÂÊÃ0 Ë2LÓ„ªªsk»\×…išpÇÈ@۶ẮoüùC±†a†a†)&l°2kǶíXÑÓ4Ñív!LÓœê¼éº®?¶m?JA¹®ë?W?’ÑjµÐï÷óþ†a†a†Y¬ÌZqF­VkbDù Š"Z­šÍ¦o°Ú¶v» À«“eyb „¢(yL†a†a†aR€kX™¥Ðu?ü0šÍæD$sš¦¡ÓéÀ²¬¹µ©Áˆª(ŠE–eÁq4›Mt»]œžž¢ÓéÌ7PµËŸøKd†a†a¦ ,e°àÖ­[$ 899ÁÁÁAÞŸ…Y¯}ä54›Mض³³3H’„Z­Ó4¾Ö²,¸®‹V«…^¯]×aÛöÔóÇeY†¨¢(0 ÍfN'^ƒ€ÏXµ?a7²½½v» Ó4}ù»víZ,y_ÕT'i*æ8NÞ_ Àu]´Ûm<üðøvíš¿7¦‰mÛh4?š¦ùÇèÿQPFK”¼š¦‰Z­†k×®a{{;õæv Ã0 “‰ V2Pëõ:*• `kk '''ØÝÝÍûó0áº.¾ïo~>û¡Ïú³A€ªªèõzÐ4m¡¢Ön·ý4`Ap||Œf³9¡ˆÕj5ß( ¢( lÛF«ÕŠ—òë0t¨¸'×u¡i4MÃññ1ÎÎΠ˲ÝWUgggÐ4-Vfã8SÏ#÷V«Áqèº{mÛÛÛ‰2˜li4çççFhµZh4©­ô÷îõzèõzèv»~Ý>ÕñÏÂ0 ¿!]ÇqÐn·q||ŒÑh„ÓÓÓ™D†a†I“_þå_Æg?ûÙD¯ITÃ:qtt„ÃÃCÔëu¿Ëª,˨T*¸u냪ÕjÞß“ŽãÀ0 ˜¦‰ïzâ»ð•»_‰–<ÙüHEt»]hš9ôžŒEQ&"£’$áììlâ½æÍ[ >w!mxƪpùÃ0 °,Ë—ÓÓÓSÿqEQ¦œ$­VËWøg¡ë: Ãð»[‹¢è7ët:¾¬Û¶½PöøFG¸.²,çý^)LÓ„ Î5úÛêºY–}ƒ’2KHŽ(bÀܶm_¨1]T?•HÄq\˜¦‰^¯ç;GHÆèÿô» èv»ì a˜1 ÃÏ¢yñÅó^äÙ?÷s?‡'žx"Ñk¬÷ïßÔëõ©cÕjÕjyÌŠPª")È­V §§§žâ­Á‹\†ô(?£ë:TUà)Ò†aøé½ôø,)ì±±hQ3~g˜”Öéº.ŽcÉ¡ªªh6›~t4h˜Ê² Û¶!ÂøºgK’45ž‰:[/z_˲Ðív§¢`†aÀ¶í #{#qïg’„Ç_YÖü׈¢÷Ķ(ã-pþYÝÎE¦iE†a ×ëA4›M¿[yй×h4|T×uCUUÔj5ßÀug*õ—"ÿ³ EŠ¢øãÁèµ°½½ EQüæu ÃdM2<½Çq˜¦é_‹ð‰O|"ïe2L,lÛö³Ð€q3T×uýÇIoq­V å¯ü¼ð ‰Þ'‘Áºµµ¸¸¸ðÿO\\\ð\Ëá8A˜PžIq·mŠ¢L*ï”b+cl†èt:¨ÕjeÙW¢UUèà»txiÀþƒWÃÊú¶m¿õ"‡J˜n·‹f³ I’Ðëõ|£Ã4MH’4•eÁº(Õ6úZ­æ?F7AbEiKãÄ3>“¬²m°FÇQç!‚©lµZ¾QÛjµ|S–剛»išeyb~ô¢(ªµàs‘’| V8==õgMÓà8z½^¼þ Ã,„ÿ‚ øóკ®ëúûvй '''y/ŸaB倲,CQ?«ˆœ¶’$ùã$ƒúÉ2£%¬EÝÝÝÅ;wüLJÃ!îÞ½‹J¥}eŠG»Ý†ã88>>öfªLsŒÑc†jZ:Îú UB¤qÊ)ÁLRÔMÓô¯¤‚0•/ÂÔ¼àEȲ¼°Ž•ŒÀ3pÉoEñ¡y£¢J,{?IYæ;‰ñ7¤ï0Iv’Á)ÂÂè¦(Š[J)ž'k†a@’$_¶Çñ•ã áLkÕ4 †aä·o3̆@N ¹GJ<9ñ9›ÙÈAv¶(Š’‰.’xëáá!4Mí[·xVòÑѪÕjìæ!L¾P#˜ããc´Ûmß`«¸›ŽáEZçÐjµ+쩳Á:;³<”VI©œ’$Mm´y@ïïºî̵X–å+9” J+¥œÖjµÍ6X ¥þ£¨TŸÓjµ&Ò ïo(Š¢¥¿Õ¼.¿³X”Bnš&DQœp¨P·kUUý²°âœ÷µÀ0e†š™‘33xræ³ihšUU×vßHl°nmmáððƒÁÀ¯WÝÚÚâFK…Rsƒ^sŠÎP*#Eff @¼ü¡11Eľ\›<ãse faT3jYLÓœA3T÷:ËóNõ«ÀØP¢H)EäxbÅh=RÚl6'h‘ÓŽ:IS'aÇqü}¶V«¡Ýnû¯¡ÙÔ³°, ×®]ó§NíT;>Fu«AdYö EQüÑM´v×u#›æ1 ³Ã0 ëúâÙð S(‚J÷” T›ºÎŒœDk¿ßÇîî.ž{î96PK€eY~3˜`„Æ0 ¿A ¥ñÎ…jW±ÑZ4\MxÃÈðêZ™+uçmµZ~„K„d]¦× ¥iF¬Q†¬,Ëc¢€ñ¬b{ò?ñ8<<Ìû£m4Ôéœ:‡›jQíŽã8Cª¥sÐsƒÏ £Ñ(òýeYžy,L¸Ûu·ÛõÓ„©ÆŽa˜äÐ<í"dì0̪P@ n5‰4_ªQ]wùHâÖ­­-X–…•ß¼ßïãúõëþ<×0ƒÁ[[[3—‰uG>lÛöÇnðQ'ªSéLxM–H.EEt z—ë+WI¾ó¶mèºî7§q]7²EÑ!”0MŽãøõªá½#˜LPä•¢¯I[Æ/˸Ǽ½=8>&îkÖŬµ J³VUu­keÙfâ@÷ *Í(,ãLª¿VUÕ×£TU…®ëØÞÞàÝ»TU]û=,q—ཽ=`8F>goooáyNNNpppà§×ëut:¿óðp8„ªªs^Ë\KÍ,ÎÏÏ'¸È¤!eY¾±J¥ë:,˚ř‰ÉŽ»s‰ê&¼ï±…ùÆj²°+qÕä;/4Mƒišèv»~×UŠr• mŒ½õd” ‚0•bF–ÂP+ù^¯7Ñ/mXÆ“{-ÿ†@5FcâžöàÁ¼öÚk©¿Ë65J£ò’Cj^YtÇO–q& EMÃãøøóÆsï‘1JÈã?>÷gŸûÜçFïyÏ{F?ù“?éÿþ|`ôáØÎ?øÁч>ô!ÿøO<1º{÷îÜó>ÿüó Ÿ“½^o$IÒH’¤éƒÇ£ÑHMï½ÎÏÏGªªŽ$IžžN;;;‰¢8Eqõ7:]ý©!ŽF£³ä/ËJ^²’ozãÑjµF­Vkt~~ž÷RRÿL’$¥ò¹²’—«¶‡3ù¡ªê¨ÕjF£Ñèôôt$ŠâHUÕ‘¢(£J¥2úîïþîTß÷ïÍ£ÛíŽDQɲ<’eÙ—¡¸^E¯;>>^˺yÿf¢PUuÔív';??ŸÒùç¡(ÊH–呪ª¾l+в6Ù–“—×-c…ÏûY5k¢”â­­-Ôëu?b;Ñï÷qûömÿøÎÎN)›APÇ8ò¾M}?)FW©¹àÕG…£L¢(&?«Vµÿ™bÁ«OãØŒ1–' ®’|çyº»ÝnéR°æAM“ŠžZÆ2Î$a{{{áÌØ( Øh6ìê­ª*~ò'<òHªkeÙÞLÓD£Ñ€eYèõzþõïh4~VKŽã@×õ ½j*±Œ—˲ü‘|ívÛïö^«ÕÐh4æÎí&h>j§Óñm‚ÓÓS^¶w ^•z½>e¸Ý¿_þå_îÿÀDS§jµ:3¹¨;ÆQ/L.¼&@)â²mÛŸ}:/ -Q8ß×Äè4âØÓkç¢'X‹µÔÞ^ùÎj°ib$T XÆÇ2„:°SÇà« 5ý£N“q ž dh 6 \fðü"X¶Ë ™Tú5ó”ÒÉ ¥NëtÍ’C”œüeK÷]Ëx9q]w¢ÔjJeYö§}˜¦9÷žCÆnÙz|K¬'''°,Ë¿aÔëuȲœ¸Óîî®qÜ»wæ^~Ž}˜Ï|æ3°m±êh³‚ UEQ&æpMt5Ò{?Û¶Ñl6ÑëõÖî%P³Á"Œz$‡}\£?`ßôû}|ô£ÅïýÞïeºÄ4å>ýéOãàà7nÜ@½^ÏtíEÆ0ŒBަ)xõÕW3ŸMÜÓ`YVd—Þy3u‹9VÓX?5£¡hÝiìTx†5 $¯ÿ,NNNðË¿üË™~¼›`c:úݲ,¨ª«{)uÍfRpaÝÝOÃÐþ=3mxtÕ÷ï2AvÉ»ªª>E®ës Vêì›÷½‰tð¤$N >88Àþþ>*• nß¾Û·o£R©`ûûû‰Îõä“OúiO?ý4øEàQêõz‘´?þøèþýû‰‹o{½žß°‰Îuî´ xÓ¦ÓéÌ,æ?;;ɲzp4É‹Ï; I’¦Š¯Sáx4Íëù"–jt”*q{&t»£‘ ŒFm}â3­S^ÒïÑ(ߦNgê1UUG‚ dÚøˆÞƒÞ¿Õj­µ1@™Y§¼lÊž~ã ú9;;õz½‘ªª~Ó½óóóQ¯×õz=¿ ÞÙÙ™ÿÿÓÓÓQ¯×óÿ~~îÿœŸŸû÷I’Fgg«mòôùèÜÔðïøøx¤(Êh4ßEQL|/[—¼lÂþ½IPó­Mj¸ïßWEQF­VË¿¯Ä¹t: ]örI’FªªêšYF^¥Sê@”…^¯×Q­Vqÿþý¹)  íƒV6yƒ©Y§E,yø‚ Rhpt~ ë6€3zÀ’QJ›J½FÊ Ö…tù¼u¹¨?‚ˆxiɶ è:pz H=àÛ~øŽ7eºÄM‘ï Žã@Ó´©Q*T/‘ÕiJñ:;;ó£/Á&,L>ä&ãÆ¥aäË^É…ˆqVż %í#Æåkÿ‡Y4>+¼dËB«Õò=Û” C!jú"˲_çIõt&{¤Œã8h4þ5íºîDµa‰ú¸® Û¶á8lÛö? 4“®ë÷IQ ÕÌe÷ïM"XÇ—wZcYa/>”’T×QÍfªªÂ4M†Qø&ŽIHœ D§ \\\Ä*ʾqãúý¾?ß ðnêt1T*ÔëuœœœLjn‘”ú+I’Ÿ²E È<ÁØùÌÎtÍeÂzPÚ°]×Íf&RÔÜÕ"àh`vçâ©ç»@» t»€(=|äç3_æ&Èwš9Gi‹€—º(I:N&©Á4Œ6[RP®z›"›Œ‹¦á2"…Ë߃Ž@yÎ!Íø=ü±½S +ý„ÓÕƒ¿ïÁ”¯u§jQ9I§ÓA¯×C§ÓA·Ûõ¯¯V«Ó4cËdüZ–Çq (ÊÔý‰œ[áºÕ"±‰û÷&Ñn·'êø˜ä°Œj¶Œcžî5¦iBÓ´sì$аÖëuT*hš†;wîLx_îÞ½ë?gÑ9vvvpëÖ-ÔëuÜ¿<˜ðöìííawwý~ß/ò~òÉ'óþ®|t]‡,Ëèt:h6›¾×x‘§øÛ|†ï¢‚åö1ôû}¼ð °T›öJ¥27'ž†p½ž èõz‹S§  ÿ'ûøMû7'…ÑžýJÛ¢Ùs™ÝìãÞ\×ë¸û×Î÷/Ð˲LÓ´àu.N™S,Ê*ßa‚‘Tª³#¥;¸!v:4 †±TÚnPÖSŸfš^Ä}]Z×4 Ȳ6϶fÓsQ6Ášk{7EÆWa1Ò† Õããcü97àôôÔ7"ÉØ ++Ië;ɱI)º®ëNÔÄã9”‹ nܵZ ­Vk*²µ‰³ –í|¡¨½¢(~*9§§ Ëx± žóÆyÅA„J²ÔÖápˆ­­-ÖR¿ß¿REÙQƒ¨ ˆ ¼ù½/<ÿÂäãç³_Bu²™¦BÚˆŸþ&ÀÙÙ¸ù‰¢x׬ V@ñUÇ™í0×9ÀDCÑUÀSPkµš½ s||ŒF£á7X‰ Ò6|IùC¬÷K[‡g“EdD8q‹»™uÓl6!Ë2,ËšR¸ÓTÀÇ™ÈþIƒN§³Ñ SLH÷±mÛÛÛ~`€a6jlÉ2>ŸÄëÉÉ ö÷÷Q¯×ýà§Ÿ~ý~wîÜÁÎÎNÞŸ©x¸TàM¼ ÎÅS,iŽ_¦lÂ3<“fü†%R¶Š²/ 8Žþßn{õ,ã^BjuiW ÄxŽI’ ëzdj¥R)Ž’kÛ6ÚívvÆ*±NƒÎ¶³?Û«ÀzŒd&1¢(úV Û”X*×I{ý ³NH÷!Ç ];ì4a6Ê.ëv»¼ß. Q—à‹‹ àöíÛõª‡‡‡ØÛÛÃþþ~¬NÁeeéú"@Ëó¦O¥Ï赡ëº?!3\ŒG?,‹,g£œ;ð ê8ú ¹_‰æž=‰ š»  ’Â=+2DÑ×`s–0¶m£Ñh`{{š¦ùÛ3C½4Ýu¡iÙ‹jd]Oýë½¾ÏÌÌDÅ•ë’AJ>wÑfÊŽaŽJMg˜Möm¿ä¢ õýy“(Â: pqqá§¹}û6NNNÎa-;K¥pÒn°q‘­H 22Ÿ7Iº ¥õFÇ”NüÜ6å­ý(¾úÅó^éF“fô©ÝnCQ6V vÄ‚´d<<ŽéJ’±n’¨†5Uµ, ý~GGG¨V«~ôu“q'™o#²Ó­ªª0 c¢)ÇÒž]÷%kû€qÇzIòä?|/³m ÝÆoîïã =”÷j7rZš+ÖZÒœàÌçÊ—rÀÈ2¬9t.&²†-ÆÃã˜.¡’¥ŒHd°öûý©ß)5øÆ~Ú0³EQ ë:>þãÇ·ÜXídtC§úÓ(¥6‹}qVŽ"­³ÒlÛ3 ÌîÜë\ŽÜi·½=¨×®Î$QM£¶CtÏÎðG‡ÃÙO"™Íšd8_8V„ðjTppp01º¦R©$NÞÝÝE½^‡mÛxî¹çP¯×'êö÷÷Q©TüãƒÁ 3^c ª Ï`´íx¸´Ó²K&$Ìî‚Ü„g,_Ac(¯|ã´àX^Ĥ#Y¢®…¬7ÿàùã¤_Æô¨¥¸¬«yT(³ŒÏcQJ0à)úÁHS0K!˜v†Ò† ®gëD*8›"ÛŽã Õjáøø8vº®$IÐu=òaB¾MÓ,‡<…eºègDÙd|Q©Å¬×„ïä”Ê‚‰a-]Ã%Y^„U3Ëš‹e°RíêÑÑŽŽŽüzÖeè÷û¸¸¸ÀÞÞ/z{ûöm ‡C?ý ßïãöíÛþñr Ÿvüçï)¾ºî¥ÌóN-«F´0®#²åTuÔÍÜ×’P6ùv¿¦Ñ6qÆð6¦$Þõ¼7zËZì@2Œè¹“q¸"ã Ê&ãI Qó_'AO~TP¢h)†EgÄ&É69Q$IòÇ2-‚/Í’SJ &/U´)o¢22ŒFÍ¢¬2žÔh : ’o×u'"¬²,ÇŽ°>C&H»½>ù ö(ÉÈáË`=::ÂÎÎŽŸ.¸³³ƒ£££©q7q¨V«8<<œ‰sÿþ}Þ…Aÿ§ÔúÞyô‰jXÇ-/ |ì LŸ§/+O¦96ÈXÌ žñ]„nÀ9QFù&E†FÛ¬M É:"Þ¨aþæMpUŸ¦5ëWdÎje<q¢«„ªª~Š\X‰™Õœ¦U†¨¥®od=j\6E¶c;CPs¥YJ9uÊ6 #^Ù3&ªÇAÿ¦Èø99ÁÝ»wµ2KVò ¯¾ú*vwwqrr’É÷2¯~Ég•ïL' AÚLm{r\FZ„ÇÌ,º~ Ë’R{ö(¥Ï¶½®¨F¡S€wwwñÒK/erî²îá‹HRŸŒ²†ëYÃ×EW 2 ï-AáààwïÞÅË/¿œêy˼Y%uQUÕ¹¯¥´àR8`ŠÄ¬”àˆ{*íßËèØ‹(ÛþMzZƒr^$4j/¦ý|–e¡Ýn—+ضÇYpY“Q#ýæðñú!¼õÏý¹ÄûwìÖ`jÐ3³,888ÀÍ›7Q¯×ý¦Nó.ˆÌ<öè£â}ï{W^Û,b§s=ò‡Ó^êy›»„墬Á ,Ȧ{¹î²étœ;;;xê©§2Ý|Ò–oxä‘GpxxˆÔÖÕ-/ò{Ѵũ틈º~!ŸNvQt»ã´ÞNgÚûMó&OO½çÕj¹4̈Ãáá!{ì±LߣŒ{ø<5\ £ª*Úíö”ò£(Ê„3¬pµ~¶íl¡ì‚°··‡§žz _ó5_“Éù˲Ïb•h‘¢(suV«ÅÆjR¢.÷[Ú¿ƒQδ)ËþM#–’ÂNÄ ’$EÊwÔ5cÛ6¶··ašfdWíBCM E1{çzBýí[ONð…ïû¾Äûw¬.Ái3 °¿¿­­-Ü»wo*õ`¥˜ñjøÆ¯J6ÎeÙk€.:^“§uCAÇ9½A)“|‡k9"£@”îqzZ®-³"¤Ëàº^=H·ë)ôSà(k–”IÆãbÛv"ÃR–eTHº†OA*TtµÑð¨aå«Lу ÙÙβ›©$IåŠ4E —¢:ÌgHÙd<‰,“~^vœóѲãããrÊ;ÉœãxÿÏ2sÆq&Ïoc)Ìe)ãçÞõ.à…½M.«ªª ׯ_à¥)ÐÅü^ÄöîHȾC/!^ݨàtÕ“-A‰œM뤌òMD*#¦9N•]…°‚¼¦²ˆ°:ΤW[–'#Äí¶÷œããÅŸ­Ùô”üà¦\Æ›XJ”YÆg§áR˜ãããÈ×':Ï?¤wR'à"ÒaSd»PNf¶ÁCÔ®L2'[Àu]?;/º x×EÔµ!\×õ÷óF£±0=¾ð‚§§ä‘ fšÓ:’ax÷ž^øÝßM|ÊØkT^úÁÁž~úé‰Ç¥X–…ápY–'Æã@½^G¥RA½^ÇÉɉ_+kY–ï­Î‹L…VƒWšÄâºÀ¾É{-Ž…¡lòN nµZÓ ¸izÑÕ´¡ôÛ¬"•²<[!wï§Ûgˆ° h^”MÆã¤áRÒ4÷X=‡ Ëõ›"ÛË6]b2b^tkÍQÙdœöæyA#]×ý‰áyÃq‘$ µZÍ¿Ȳ\îÔwÛ—xd}Ÿ ¾½_pÒ4O”e/@ ŠÙ¬×¯_Ÿh¸D,›[OEäQ0 äÞÞvwwýöÛ[[[xòÉ'Sý~3cÀYÄã”V™Ö`ÛÀoý/À?Àú›-13)›|‡S1ªnS’²IW¢¹€Y¥BE) tÓ£Ke38 @Ùd<¥¿*²ì]šæ]ßÜñÀæÈv¸Ôƒ)Ž3Vê34bË(ã4Žf¶mãøøx¥=¼Óé ÓéøÓPJm¬ž<‘n•uºyTJ;¬)–”-4X+•ŠïeIƒ½½½…ç«V«xæ™gü +ØÑ,b70p|á§¼?ê$ Fj ™áù‘*ð•ÃRtå½J”M¾¦@Ò tS TMÓÎÎV;×¥l2‡¤ — K’N¿Ž×H-‹ì‰’²)²½q#:®í¶g¨6›ãûT”UÆçe P:plLö:ïgáï>X~%Iñ3ÙK k­·ódÞ í  xÿÇi°JÒì«"’uÙÕ¼å-ÀÝ/äýÕ0KR$ù^HšÑ—­ý3"KQÊœ2ÉxÒ†K…Å0â+»¢ŒFy¯¸”]¶Ù`-!”ž¯(@³‰‡Þóž\—S§5³tð+——é úžt0¤ô÷Il°öû}¼èìtãÆBuÖ,¼ èþ0˰gð&5X%òë/Rfy¨æc.i7„h4€óóéÇ×™~cšëÝ̙³LÃ¥ÂA BÔ½Êq¸fõ Á |H:ßør¼ÔSÿïç½òÂÀk Éà~{«eY¸yó&vwwqttäÿìîîâæÍ›™ 8.:€úæÿ‘ÒŠ")H66'c,˳˜ò±öž¢«Ž3Ù¹.‹N‰Íæôcó”zæJ²lÃ¥ÂAݼéÿš6>¦iœUpEH2¯’ÉÛö²ê #¹òÞjáÇÞýî¼?AሒmŠÀ2sдt;;Îäý%м Ö~¿UUqýúuèºî%Û¶ ]×qýúuܺukªãئ«ãž à‘çç˯ªµ/ß§`Ð~Ì÷I&6aƒ5 ¢R8ºÊL`YÖf¬¶=6JeÙ3Z]×Ëj„õf10¹áºîfÈsq]O‘¯ÕÆ%`4»;!¯|åWæýi éß’$E¦º³ÁQL®oéúì2F]_ìô—¤Ô‚Xëþþ>dYÆááá”`ßßßOuqEÁqœù‘(^Ôó“¿çdÓÉX<šÆ—vœ““|–ÍNÓA¨WÀº¡F—LrrñÄ ‚·q¦õÞ®;[ð¢®YAàè*3ã8åWð]wrî° xFëö¶gIJ“æÊÀõ«9Ñhx†*à¥Þ{£>ʾ·ä̼R –õFüRÇñ ÐZÍûi4¼×vrZ–÷šEÐN'uÙ_h°öû} ‡C<õÔSsŸwûöm ‡ÃM žk°êðºõºîây[ËÎGëš×8«Ýžíœ¡ùó‚àéEëÿFï‡àgøÁ|¿ñ¬w±cîèƒ8)Ë@×Ñ¢k%.íöfu1fÖÎFxèƒéÀ„ªz†*GV¯\Ó—1í¶g„éõ¼ÎóNöcD®Aƒ5ìd߈½; LsÒHÅè A³9Ö󎽎ñ½ž÷–czž®OÎ\]# ÖÁ`€J¥‚J¥2÷y4—õââ"—’¼©ˆÕ¬µy¶EWsÀu=C/Jç ëtÖ¼2 ÏVEïúl·§ŸcÛcÇgðó-ßrޝýÚW×ûe–‰uG_Ék—M›êH#b2+ó¥ ¬#»š)8Qã§D‘ëV¯(l°f)íìZ;²,OET-ËbY"¬K‰¢§°‡9>g,l,뎕윜 »ommáÁƒ¹,®(Ì­a5P¶Õ"¯Ã¼Æ2óìƒöå{vk2(Â}<‚YQÑÍuö´ÑõÉNð¦995Ŷ=R·;}}=öØçhv}%™›N3kü̪PJpË{â`šÞk¢fHÒ¦³‘|ejBs¹ÙÑœœ©_•eN=dÄì¹Á,ƲöîaýH–ÇúU­æÕâ5ãæ|9Õ„PfïÝÑ„ÓÛƒìÆw¦”C`< AQ¢ƒA²|%êß6]"Èh¥Ñ5ƒÁÀO®T*¸¸¸@¿ßG½^Ïû3¥†ui\FzqTLF> ðE†¤¦Mí[ дqæ‰ýq ùk þ[ŠhgÖíQÓ&³‚Šï²:õ'ˆ[)IÑåˆÝ®w¢4ãf“Çš¥ÉÚëWç!Ó#ýÑ©ÈT½Ÿëõ‚ý4ʪ4´ZãyÈlÇÄÇ0Œå<ôôegÕ˜,ÝJžÊŬÛ¶¯ŽÁJuCtßèõàÛ^éÒ‚à]G9_K4]üN–åÙΑkf&î×–e¡[ææ‹„±Mq©óÐ-çªêÀ± V‚FÜ„=ƒÁ»»»sCøeC×u¨³”ã°b5V ŠˆÜsí­?ŒÖï{X»}92©ÿ¿á¤Ó4/$)ÅT<)ä© ž*­L3EñløUî b è,I¼¡¯´f¤&!¼†çzvÒÊ€ Í!Šå3ú(ÎöL24MƒmÛ8^¦ã¡ëŽ7¸¬&Aà†JLl–Î(#sôDäw¹Äõ_Ñ462XiïÎq¼e©p]÷jÉy˺ºÆ*3%ø*BÑÕ©”À¬l°8P¨ªý™ÿÖïý)ˆ_æ â© Hoûœ~å·C:Ú$ î?úáÉÜØ” VMËNßR”xƯmsÿ<Ðu}ö†OEÄ+BÒ).;չ=XC×,Ë_¶’T¾(+§“%§ÑhÀu]ôz½å¢Q޳T­ôLh.Q¬Á2 ¸R —B÷¯¬G¤¶Û³/Sº×E•·ÓØ4z\Q¦ŒªÊÍó¢wqE¶moÎ8² ®»°–ŽÆ×mo§÷¶eŠ1²ÁëºÐ4-:ºªˆÒåãþÕC›¬ôÁ8~æõÞ/P?õ]¾õ Ëp] ñáw/B1Œñ&Y«MÖª¶ÛÙ–BQoÊž›…i–+¢µ †Ó4ÑÉ0,gžŒQæä¦é{šOÙЫGÞ/³:áe@°œÒ³Ê­Ÿ?ñ 4à•RÉeœ^¸M›ÝÐ…ièº÷³½Í´˜•بF4s×õ.—ímï ;FÓ¸ŒÂ͸eÙ{Ÿ¨s»®— $“÷¾vۻăk”/˜"®eíTŸ%Aƒ•ŒQ¿ê8ž°×j ½ÏÅOë£[–÷¶eq”$N Þt\×E£Ñ@«ÕŠŽ®Z¢t$7‰K¡¤ôCQ ÀËg9àŒ 5)=)†w^*}==4$)³²Ø ,Ë3J;ï=-kÒ(0MžÃ½NlÛ†aËG—b"Š^_%ÈHc‘$À4¡vßY¡ª@³YøÿÊ»ÙÓ¸ÊÜxU’Ø`ƒ®ëè­:Kq– Ðü;*ˆÖuo“r|jÚxc nÄ.gVÀ4ÍÙeLEƒ"Jtƒ îq„®Oö, F’&.-Ò'‚þ#jØ¸ŠŽ®ò¢Q¬ÆXw£d‹àó:Ïþ fÞq}cÎØ·eYÀ‹/¾/Ÿ¿KÁq]·¸õ«?ìý!)’'¡Š©èSY\Üîºã>QŒëzúeEµš·Î4U9ް«‘gÍDMS¿Ì;œj4Ù×uXÀÔ_8éu©iãÆwÁý?xÚVk=c_hîv¸ü–6øð=ŠÉ×uÑl6Ñív31Vu}l4Ѭ(ÓQKç?? ÷í_?±‰Ò¨"íÙ?;3 rf´v–)a, áfÎ\Ç:a“zí“m{¡P §ôzÓuv®ëi¼®ëyêÂ5§›0KbdY.~Ó%Óô,3 íÐÆ“v:pÿé¿Bû¿¿ð"@ã4R’î1á½O–£³e4mÅ4 ï² F5 c|év§/OAð.[òIÍúª½Ë?©ÁLºž$_ù•¯¬÷ïR@¬ÐQEX–UÌúUšoYóÇ$¹î¸£5y;2rÐÏ›/OýÊVÕ4ͳӃ¾V2„ÓþX #¬ƒÁ Otqq‘îÊr@Ó4(Šm¬Î‹®ºîÌ¿ yâ¨L©% ptÚWýÐûÅ7ŽŸ˜ÒõGÓŠµô#ÊØøÜ…Ã4M(Š2?eŒn ÿ8äÉ ÛšÔ7ˆö‹7¡>Þ›š %Š€üoƒaŽ”A!EçicTÕÕ³Â_E¸kv‘ 7ˆÚ”LÀ¬˜ÛDo¨;\\C¡ÕZOz s¥0M³˜Q'‚Ràeî·ÿeÁµFl^–- ýû?ÖéÿAÿ-86LKBK\ì×Q”錓à}_†”¶Kóãã$`dÄàúõ—²}“ÎrEº®{YXEÃ0<ëO=áÛÞ7s ¬aÄwašž±ÙëyòÐn{/ êBótlj–5,¬Ú•² h­Š2¾ÅQ…W¤aÝÚÚ*õHÇq`YÖl…†¢«Qî ÓŒTVjµq½ƒß¾üÃïF³ú«è¼1‘3½V‹÷ ˜&µå70Šƒ,{÷2\£º© Âxt«mn¦¸8ŽSÌúU²&OЂ©…áçŨ?"ƒ’ ‚ ")6J]Ÿe,†3–TýÐjM:Â&’ ¤W®´0ÂJóW³bÞìÖÁ`€­­-T*•ÌÞŸXè}—À·å JÁŒÑÁümÚ-ëKà8_²”Îâºó7SR¶‹ìd½jE¾©«^”BCuÆÝ–½P€¨‘•™R U\™3  µ÷ú™\Òç cœöõÔ W/*› œ #ÓÍ-k܌Ȫדeyk¥Ï”áyl¶=ùyž}öHÿƒDPŸGj^ù¸ó¶™  ² xÑÕ…ÎÈY8ÎxÈyZÎÊ7 l¸T¶­ªÞ>V¸ÁÓñ§nQÝ.àºþøI¤Û´Zã,ÍE#BW¢Ê¸á¦ì°B¬Qå+8,Ë«Ym·½k'8Ö‘ê§g¡i“MUãŒ\¢¸[ܯ–²ÝhÛQUï}ÃNùNgì„RÕ3+G9rÿþýÑã?ùø>ðÑã?>züñÇGúЇžëùçŸݽ{w©uœžžŽ$IZüDQÎÎF£ÓÓçKÿ»RÕÑèøx4êõF#EÎÏÇïÕëŸwv–þ{o"«ÈK\Ò”ïÑh4úà?¸ôZEõ‚‚BUG£‘,O SˆããѨÕòþUïr¤±,ÎC½ã<ÿü<Þ9iÝQ2~>ùx¯ç}ÎFg*;iËKEÞ¿{½ÞHUÕ©Ç£[;1ì€U8?ºÝñïQo%˳_+Šãk~¢ªc]!øtk;;óþÔ•Z­ÑHÆkY .NA:˜$yÿ.#/ S‚ûýþTÍÛÅÅúýþÒFòp8œ›~»¿¿J¥Û¶ñÜsÏÅ®£]MÓføph»êDq¦«‚æJfÑš\’ŽœÊò¸;Ê£NiÈ”Ú2Ë‹é8ãt3Ó\œêKA¹n7Ú ™×õ®ùp„B’Æçµ¬lk Š&ãQ†Z­QqšVQW5SjÊ Û¶m£Ùl²,ض½\í*Ím¡æ/«Ö,8θvéôt"íF–gg¯˜f¹fAneñY¬4Ž:mmo{ÊBÒ99‰<6Ñï÷qûöm^mìÎÎÎê£f@µ3S tx ‘ cn.Ÿ®÷ê,RrÅK 8>žn¡nDÉäK‘äð÷Y7&×½lh§i0¾á0ò8 D“$ãåøØÛL³Cô<Ì4އŒYjèJK¶=ÝšÍqýÕ­Ó¾`ýMi7Ä BfƒSœg¬‚0ùØöö¿Ïì».šŒG¡ë:NOOÓm´´¨.ƒ)=emÃ0ÐétüŸ¥R$Iƒ$ TQf+ð4‚&ŠàÀQU:´5Á×åÃÊwª!eÖGd<Z-O‘>;óþoYñæ_“ü2ÇÇ“yº+`“zFœ9ÂÔ©šh·ÇMÃÂ×5#•>Éò¸ ·×‹î˜‡ß6—±6;;;8<<ÄÞÞÞÔ±û÷ïðjg‰jµŠáp˜ÉZÖ®ªmÜN+„뎻³ŸžÎn<“¬'•ƒ"É75›U¿gÛ€üÿ|PX¿ò–)C‰˜ÂQtÙ0wOŸÂq¼¹Q£6‚ׇ,ÏN)™•nrm2^»…mœùZ­ª¦¦Ë3P_iÿÖ´ùº(ìßë³¢Ò#ÉU–â}…²¹È±7ã |¹v:ã&Åa¨[ðª*ófaÓ¥u3¸¸ÀÖÖVjïeDQœï1wN …ßÙCÈÄaò xéî­VkæÆoý“— ~ÑoszXº(¦'Ûelj¡ªã()yït|^ƒàãaÇ5uÝ#('Ž3=îfݬ[Æ£0M3ýÆ]½òA¶ ÃHÖ@¬ÝžNÍŠ"8 =ÍЈÐ,¹Íê@zhò0g|•—"È8!-ãá&Ïò,yC`š¡&¡§FK:Îdj.0ޤ’Ú|HÔ\©Ýž¾ÿÏ‚>ž¦M¿O:F#ËJ.ÖyÌ›çúàÁƒ™Ç^yå<û쳉R•çEWÍhïôöi÷ôÌÿ‹“0’\Ÿž²±Z6NNNp÷î]Ø9Î,+ßðꫯbwwwf*O˲àºîÜî‘Η>©ë'/œiz›à¼´Õ«ÕjPDuÝ$±»TÕ»Wïîî⥗ò™ã·Î=|ój¶W8i¹ïöÄÁÁîÞ½‹—_~y­ï»Îý{‰¢«º~9à:ùµà8€ýÜÅÔ EÊŽÜÞ¾ì.ßÍ®»úU…öïÁ`°ö÷.Âþ½¶íÉj­¶\;þpã "¦±JåSÁ,¬Zm²gEЙLÏ¡K«Û·È‰kÓÀ8×^¨¬Š÷½ï}‘)Q†Y–gÎ-Óÿ Ðzr\;FQÊ Z8Þ¤ËÇÎÎ*• ^xá…µ¿÷²ò <òH¢Sš¦-lÆî!£ªãÇÒ¿Rv’´zÏI¢”ãÃ܇uíáó°m{9ïü<8ÂZöööpãÆµïáëÜ¿£pŽãÄ“mÇñ,Ë%ŽµÛ€àü&Ži†Yà´”Éþ›l8<ôöïyò–EØ¿o嘡\WrÌäd•QcÊà’[­É,ã N58i»…²Ë–ÕÁc¬Aå‡<0Q Ѫòõë×xi tqÿŸÔ# ÇðÐúÇÓÇX‰gVa]òM™yŠMTzHY7@Æ£#B×%ã³°,+›9}eϧ*)”‘Z„{oÞ²mšfüèj»¸ÞÎ0¼öZÝCÏÑ/ë & €"ì1Lvä-ãÀ¸~u¡cƶ=c5AËZ]÷ŒÊfÓ³m½eURš/õ­ðÖé=N¥BáÛƒ d»o]5ÿé”àëׯãöíÛ¨V«þO½^ŸzŒ~V¥R© ^¯O¤ÍX–…F£‘Ú‡¦ú¦Èº>Û†U?¼~ÇsX‡|žÁ• Üh΋Ÿ®[žÈ!SÖ%ã³°,+ýè*¿¸ˆI1¥ëÑ]±ó OÙv]wæ¾>EœÔ08Î¥2ÿÇ~yBív'º‹ºn¹›¶0‹É{ÿ€v»o|$ysFö…1 ïÒèvçOµ1ÍÉßn×{« Qê83û±2°0ÂZ©TR ñÇeoo»»»è÷û~‘÷“O>™Úùgzà/g|ˆý,俸ÖÌ\!²–oçR£§»Síu»é¢÷?˜U5q ÃÄ!kŸ‡eY›1‚áŠáºž‚Is©9‰,Ö b8|=^|ñ«×¾Î¼d»ÝnÏm 7A cÕ0¼ïšF±êúe@ö;Þ |Ç»ýç™æ¸¶ŽJbÙo³Ùä¹Ï-×KÐQjJƒÝªMs|Y,Êoÿ ?'(û|¬—\kXëõzdã›jµŠgžyÆ/8¯×멾¯eYѵ}†È2dó+€¿žç7ÃlyÉ÷¬´1˺¬Qm}Îûþs7Y¦üä%ã³ g ³)Át_š/ÕxDQ¼ × ƒ­RùÞùÎßÊlmE’mMÓ ¸I$uÆÆ…ëA'| MZ×=e^=Å~–K3ƒãZ™Í H2Nèº>ípÔ´±µ3Ç6hœ ;é/Õ~_¢=ˆ³ÏŠGlƒu0àèèÈOž•r•VçÕ­­­L.’¹ã, Îßø8ÿ`Ye²$+ù<?>>?Ðn¢ËRÑ•M(ͼ³ímž•ÇdG–2…eYËׯڶ§ÍPxºê1™@†ªe{¤D5!!ÂQ=×^zé¹­²mšædÖ€ãx Eq¶-@Ê>t&+&FMš¦·Y¨jâæaT^ IcÙŠ¸ÞcÁZÖ8³Ö™|ˆaÝß߇,ËЃý™¿ÓÎÎTUÅÓO?½özפÌL6M@–!~§¼0å%2XQ *€ú!0G¾b^”™\ ³ †aL*ôóÐ4ÏkV€H+/‘‡j:‹®Q–½(é²ëuO ¥yÑïÿ9挌Ü À$IcÙÖuï \QIÑõd#4dyl'3LPö£(ŠžGË0–ž4NEÑ Î†K Ú› ÃÛŸXÿ/. #¬TtýÔSOÍ}ÞSO=…££# ‡Ã¼?ÓL¥ã›n¡õñb+ 3Ó4##L¶ oçæ.KÌA)d±ŒUÓç –Ø…nÀö¶g{Ï‹zÑćfs<À~\wlï»®·Ëšý|ŠjP0{UãZÆ3 mxì±Ïg󈉈à}¡+zg~wÔ(…Ó$™lñ;`›æJÆ*àíKAÃS¼Æb³²9(u˜“hŠÍÂë`0@¥R™šÁDiÀ¿ÿþZç5%!˜lc-à…ÿ7ðÛï@»~1%Åqˆ¢8¥¼Û¶§èËZ¦ìИXÉz›W¬Ê(^·'Þ0ÆÍoH—k·Çcƒø#J.ÙK’÷Üv{y{‡ŒEÒ!½ó‘ض½ï…F"¦/2+µoÓˆì–:cvü<¨Ä5ØTf™Dž½Êd…uéù’(½}ÅY1Ô¸-Éó9PU|¬[[[xðàÁÔ㇇‡y¯=1”L^b«çmä7ò^ì„iš“ÍÐ.5“<”k†É]×ãEWm{l©-é±wݱžšw$UŠAMˆTu¬<MQc`»Ýq³ÝÏÅiU·ë·®›L9£Ia” grú’aœ%­–÷9¯Â~Ù-u 4Í“)ê)F2Æ0EÁ0Œq&Á’–#e{ÌkÞ6Î (> ÖjµŠ‹‹ ôûý¹Ãúý>LD]‹„mÛ¾2?6RèxgJŒmÛãMÿRÓu%¹Ley  ×uaš&NçEœ\׳ö(rÉ Á4½ÓÈòdcÖ4²ŠƒK¤ udU”xµ†¤ßIÒâèiP‘«Õ&_4f%ilˆÚöleŽê¿Ö…㌳º/UŽ„²ÁÒH\§ÒlIòzØ-eòÇ4Mªˆã8ËuwAÙ%®ø`æË`­V«888@µZ4H/..ppp€z½Žjµš÷gŠÄ²,|úÓ Âíñü2[Ò™ÆÃ0¹tÊØ»ÿÚùW ö¦3›‡¦iPe~t•B)Ìn:>žT‚Há§·ˆ î•¤›QT 8±¡Õò"¹Š’,?éû°­¯ªÞÚèG‹Wö9ÞDÚí6ìFÃ+@^" 8H0pÕëyF?Ãä‰eYÐu­V ïÿØÇð·nß^ê<”ûÙŠ^¯åíäŽ3΃jh©äÞ0LNøÑUMƒsñ¸oþj6V™Ãqhš6?8xKYŽî¸Jõ¨d<´ZãT5'ÅÌÂu]ü?øƒøî/ú"à—~)ö먾:h®£¦˜a–Á²,t[­ÄTžA}™‚™)ÌæË`¼.ÀwîÜÁÞÞƒÿø¬4á"ñÜs¿‚w½ë«=Á¶T´8X`JŽmÛøs_ú¥ÀÏü Œoþ´Þ›÷Š&}Úí6:Nt*0iìKzj( –F Ä©ƒ’¤i£ÀuÇ3+ T&>š¦á_¾éMxÝÝ»sŸçºÞ(Ž}ðqe–aòÄ0 |×Ox›äñql‹Ó¶=Yg9¿ºÄ6X‰­­­¹Í—І¦¿ò+À{ß+y $]æfÁ‹®r:³ضïýú¯T¦öœ~OÞ+b˜tq.‹ï”(ƒÔ²Æ³ûbž×^’¼Ô^š¬—p+]á & ¶mãú/ý’—©6'/Ûu½ UGšŠžÆÍ0„eY¸ûÐCãN`  ý4NS9f³Il°– ê(øŽw< Izÿ¸:ð ÖN™a6ÇqðÇýçÿg( §É0›‡eY“c›꼑À[ïÏshçš.Õ¤ëNÙXe’bš&þÞ§>|ô£sŸ'ž±Ê2Æ” ×uñ†_øüÑ·¿8:Zøüà(1Öi˜6XuÝ«Òõáèès´¯f•Ó ˜ €F @U!‰[8˜Ù@lÛžŽ®jÚ¸óF‚‚=ïÑí®^ç'Ë^)‘´[/ÃÀÖÿ8¾ô˜é9 6ïbc•)#ÿâ~w¿ð…Ø¡R’u6Vx]Þ È Š®Êr {ª¢x‘ÕæJ§f˜BaÛ6DQ„+°†Ù\|Ç à¹Ýk5ïÿ§§±¬NJÝ<ße¯—NSº½¸®wß ¾ÃÄÁu]œ¼å-xÃG>2ó9†1ƒÄ0eä™gžÁïä#37^Ëò¶õvÛûz 0 °Ák»í9q¦ÒÈ4\ïÁl¶mC–ehÚxÌÃlc› cÜé=˜53²o/çÔ§Þ=U–Ç)Æ<ç’IŠišcgÌ ƒ£÷Lyq_ôExÛw~gäqªìèv¹V•‰f#S‚I)‘e Ý¶Ñ îò|!0†?Ò¬Ð0›É„ãq !—¤lG HÒ¸Ë0a’bÛ!=%„—-ÆÝQ™òbšftÃï¥0LfLÔ¯.À¶=HQ<%h¬0þ•¹âضÿùoœyÜqÆÍ–¦ŒüÇ_ÄŸãÇe4Í“mÎc±‘)Á„¯äü¿Þü¿ŒGØ0‡išý§_„õ7~7ï¥0L&8ŽÅó@.´ i¦j©e]e–á§~ôGñ?]\Lö”zŽB1ë$ ùÖ4 ÿšÛÀÙYÞŸŠa<Òoøìÿú¿âµ÷¾®íª­Öúz 0›Ea#¬ûûû¨T*°mÏ=÷ƒb¿þùû1|ûG?ê¹qØ=ÉŒUåû;¾ã%ü¥kÿò¿ø.–o¦¬*ãÐn·ñ·y$²ã wNeòdUùþ·ý(þúÏý~óýÿÀéÁ0E!ýû·ïÜÁ›_|¯Ý>@³É¥ÌjÒ`‡è÷û¸}û6`kk ;;;èõâEJÿã‹/âŸ}îsxÝ?ûg|…0…cUù€×ÿÒ/¡ýmÿ[G2…$ ×4 ÿûÖÞxýºo™ZÖxÎv§Ã+“+Ë·ëâÍé/áãÛš?¿Çó'™B‘ÆþýÚG>‚ÿèáß}øyüÕ¿ùv–qfe i°Þ¿P­VýǪÕjì<ú/º¸Àü¿QeþääÄO»(I=nÌêò 7¿ñüÉ“ž÷G‰MYåd0àää$ïe”ŽUeü?¾ø"v~è‡ð'ÿ䟄)w¡iÀö¶U-"¼‡_-V•ïñ§þ'|àM¿„ϼãf)š„•UFxÿ^Ž•uÇÁ/ü½ÿ ¥ú«ø‰CáUÞ¿ËA!kXç]ØÚÚšzü³Ÿý,~õWO?ý4{ì1àýïG¿ßÏû£Äâ—ù—ñàÁ\„ZÛ—çŸ7nÜÈ{‰øÌg>ƒ_|Ÿýìgsyÿeä›ÖýôÓOã-oy ¾úïþÝÒÈ7PN9€—^z /½ô*•JÞKI¼îO~ò“¹½ÿª{ø¾ø9ÜûŠã·¬wâ]Ÿû4Þõ®ÏáÇ~ìð_þ P4Ñç=|½¼òÊ+øßø üþïç3_}ùîß?ôpø«#¼ûÝ}¼\‚že” üû÷p8Ìeí«îß¿û»_ŽÎC?…ïý«€w½«_È=;ïßë…tð¤ûw! ÖyBóàÁƒÈ‹åu¯{ÎÏÏñÜsÏ•Rè†Ãa)×ýðÃã…^È{‰øä'?‰ßþí߯W|ÅWäòþËÈ7|Ù—}ž{î9¼ímoÃÛßþö\Ö¾,e”Àû{(ÝÚmÛÆý¯ÿ5·÷_u¯V¿ 7þÒoã¿{ä—ýãEÿð¾>^~ùe|ö³ŸÅ×}Ý×åòþËÈwpÿþËÙÛ¿Ëòµ—QF€òïß¿ó;¿“‹Áºêþ-I¾ç{†Ê#㼯ÒÁ“îß…4Xƒiaf]¼ßöm߆o Ì2c˜¢²Œ|ÀÓO?÷Ò&¼‡3›Ì2òÍû7SxÿfŠH!kX¯_¿`2-!¯Ô†I–ofÓag6–of“aùfŠH! ÖJ¥‚z½>Q,oYFÞKc˜•aùf6–qf“aùf6–o¦ˆ\F£¼Å`0Àîî.*•Š_ä}xx8³¾aÊË7³é°Œ3› Ë7³É°|3E£°+à~S«éz½ž÷r&UX¾™M‡eœÙdX¾™M†å›)…6X†a†a†a˜«K!kXËÊ`0˜;|x0Ìo5ïø¢×¦ùÒ\÷:×ÎdK–òçxZŸ!íµ±|o«Èø¦Ê÷ºÖÎd ïß,ß›ïßù­}-Œ˜•ùÄ'>1zâ‰'F?þøèñÇ=ñÄ£O|âþñû÷ï>ðøÇ?ô¡M¼~ÞñE¯M“»wïŽ>øÁ¦²îu¯ÉŽ,å;Îñ´Ë÷ªkcùÞV‘ñM•ïu®ÉÞ¿ó_;“-¼_ ùæk ìîî¢^¯Ã¶m<÷Üs¨×ëPUÕ?¾¿¿J¥â 888ˆu|ÑkÓ¢ßïãèèhâ±Uֽε3Ù’¥|Ç9žQò½êÚX¾7‡Ud|Så{]kg²…÷o–ïM‡÷ï+"ßy[ÌeçùçŸ=þøã£Ï}îsþc÷ïß=þøã£O|âÿ'~äG~dôÄOL=7||ÑkÓâsŸûÜè=ïyÏèCúïáYeÝqŽ3å KùŽs< ¢ä{Õµ±|o«Èø¦Ê÷ºÖÎd ïß,ß›ïßWG¾9º"ÕjuªÕ÷ýû÷[[[þÿ«ÕêÄk(Ÿ|ÞñE¯M‹ýý}ìììøÃ¢­+ãL9ÈR¾ãOƒ(ù^um,ß›Ã*2¾©ò½®µ3ÙÂû7Ë÷¦Ãû÷Õ‘o6XWdkkk¢Ý÷p8ÄÁÁvvvP©Tæ ÇÅÅÅÜãgggs_›'''‡ØÛÛ›x|•uÇ9Δƒ,å{r2K¾é³,»¶u\›ÌzXEÆÉAYå›÷ðÍ€÷ïüÖάÞ¿óY{°Áš888ÀÍ›7Q¯×qçÎÿñYxÿfùÞtxÿ^ïÚó‚ Ö±, Ãá²,£ßïOüž'£^¯ãäädâ5FcáñE¯]•z½Ž½½=ÿ§Z­âúõëØÛÛƒ$IK¯;Îq¦d)ßqޝÂ<ù®T*…¾6™õ±ŠŒoª|g½vf=ðþÍò½éðþ}uäûÚh4彈2spp9? lÛ0NW¨T*¸¸¸ÀÖÖÖDW³yǽ6íÏ2 pxx¸òºãgŠOÖòçxšŸ%(ß«®å{3XUÆ7U¾×¹v&xÿ.ÆÚ™ìàý;ÿµ¯ 6X×ÄÅŃ€è¼ùyǽ¶¨ëÎ{íÌú(³œ”õÚdÖË*rPVùÎ{íÌz(³Œ”yíÌúàý»üòÍ+Ã0 Ã0 Ã0 SH¸†•a†a†a†)$l°2 Ã0 Ã0 Ã0…„ V†a†a†a¦°ÁÊ0 Ã0 Ã0 Ã6X†a†a†a˜BÂ+Ã0 Ã0 Ã0 SHØ`e†a†a†a ¬ Ã0 Ã0 Ã0L!aƒ•a†a†a†)$l°2 Ã0 Ã0 Ã0…„ V†a†a†a¦°ÁÊ0 Ã0 Ã0 Ã6X†a†a†a˜BÂ+Ã0 Ã0 Ã0 SHØ`e†a†a†a ¬ Ã0 Ã0 Ã0L!aƒµ`¸® ˲ò^ÃdË8s•ÉBþùšbò€å޹*°¬ç¬öm4¼—Á0™Á2Î\e²¾¦˜<Ëap'ïe1Lê°¬ç¬ Ã0 Ã0ÌJ˜¦ÉJÃ,b‘Œ÷z½Qp:??ɲ<:;;›’ÿÓÓÓ€Ñùùùh4bùfŠOÖò¯ªêÄyÎÏϧäšaÒ`‘,F£‘,Ë£^¯çÿ”Qú÷l¦è¤!ëóövz>Ëv28š#Žã@–eˆ¢è?Lõ5Ms긢(þÿ-Ë‚(Š~÷²àïä¥ ¾V?aIŸÏ0óX$ãôx»Ý†eY½^¢(BEH’äg˜¦9‘R|=Àòͬå?˜. À÷Ðï “‹dyaå=›):iÈú¼½=ü€e;l°æÕ¥ÎÂuÝ…¯§Z¥àOøÁ0y±HÆEQD¯×4›M\»v ívÛ?®(Ê„Âβ͔‰uË¿$IPUuâ “‹d™a6…4d}ÑÞÎ$‡ ÖEÑ„A#U„¹F«$I¾×†~Ž¡ª*×.1…`‘ŒSsn·‹óósœžžÂ4M¿[Qض ]×áº.GŽ˜R±nùïv»~£®bÒd‘,3̦†¬/ÚÛ™ä°Áš#Š¢À²,߃NÑÒàqÛ¶ý¢mê8F»TøÍ0E`‘ŒS#1ºˆ¢8•ò«( t]•’Ã0E"ùÁïVÉc˜´X$˱LÙICÖííLrØ`ÍJßj6›¨ÕjØÞÞžÊïv»h4h4¨ÕjÇI1Ñ4 µZÍï|||œ÷Gc‹eœjò¶··Ñh4°½½í+é9f8ºÊ”¼ä_Q(ŠÂÎK&5É2àé$š¦q‰)5iÈzœ½IƵÑh4Ê{WÇq"g=Qó$QýœzšÑ4JƒM–(M˜aŠÄ,'(‹ ê¸aÐugggy †Y –fS˜'ËA…ë]™²“†¬ÏÛÛ™d°ÁZ`ÇÁöö6NOO!IÇA£Ñ€ªªœÉl<”JS«ÕÐjµxˆ6s¥`ùg†a\S‚ƒƒÁÜãÃá0Ï%æJ0%øÚµk¨ÕjP…Õ’Àò½¶mãᇆ(Š,ó…e<;Xþó…e›ÙtXÆ™2‘K„u0@UUÿB¨T*ÐuÕj0¡ªª!ɲYðÌ0E„å›ÙtXÆ™M…e›ÙtXÆ™2’‹ÁúÞ÷¾²,ãÎ;¸¸¸ÀÁÁúý>>ö±vwwñå_þåÐu¸uëöööfžs8âW~åWð–·¼%¿osI>ó™ÏࡇÂC=”÷RóÊ+¯àÑGÍ{‰yíµ×ðº×½ï~÷»S?wò ?ó3?ƒ7¿ùÍyuKQf9yíµ×J¹¯¼òÊ+™5xà=|ÞÃ×Ïk¯½†­­­ÔëÂxÿž¦Ì2Ræýû[¿õ[±µµ•ú¹yÿž„÷ïõ³ÌþýÅë^d¿ßÇÅÅ…/ø[[[¸}û6NNN0 °µµ…~¿{÷îùÇwvvprr2÷bùÿá?À4ÍR5Û¶·½ímxûÛßž÷Róì³Ïâ}ï{_ÞËHÌ'?ùI ïáëåààwïÞÅË/¿œêyyÿަŒ2”ÿÎÂéÈû÷4¼¯ÒÁ“îß¹u ¦¼ù›7o¢^¯ãÎ;þã³xðàÁÌc>ú(Þ÷¾÷áðð0¯Ä”„<õÔS™¦®¤-ßðÈ#àðð°”7`f½â±ÇËô=xgòbooO=õ¾æk¾&“óóþÍä íßYQ¼3yA:xÒý{í)Á€—ê°¿¿­­-Ü»wo*õ`Y¤G0LÚ°|3›Ë8³©°l3›Ë8SFr‰°ªªêçч/ŽëׯÀDZÂp8ä …) ,ß̦Ã2Îl*,Û̦Ã2Δ‘µGX-ËÂp8„,Ëè÷ûÇêõ:*• êõúDG2˲Ðh4òþ®f!,ß̦Ã2Îl*,Û̦Ã2Δ•µ¬TDU˜MMpööö°»»ë·ßÞÚÚ“O>™óWÅ0‹aùf6–qfSaÙf6–q¦¬¬Ý`ÝÛÛ[ØÕªZ­â™gžñ/¬`G3†)2,ß̦Ã2Îl*,Û̦Ã2Δ•\š.Å!Üz›a6 –ofÓag6–mfÓagŠFncm†a†a†afl°2 Ã0 Ã0 Ã0…„ V†a†a†a¦°ÁÊ0 Ã0 Ã0 Ã6X†a†a†a˜BÂ+Ã0 Ã0 Ã0 SHØ`e†a†a†a ¬ Ã0 Ã0 Ã0L!aƒ•I†ÀÎ{ ÃŽãÀ¶ù¢d†af3aƒ•IÄK/½„|ÏGÐh4píÚ54 hš˲ò^Z$®ëÂ4ͩLJÃ!^}õÕ¼—Ç0‘8Ž×uç>Çu]hš†F£v»v» ×uáº. ÃÀ¿ý·ÿ6ïÁ0 Ã0 ³2_œ÷˜ò`^ß=þÌßú3ø›ú›<¥Ù²,´Ûmœåº>×uaÛ6Çñ£N®ëB8ŽUUýçþèþ(Þð†7äº^¦˜˜¦ ˲|'L§Ó¢(+ŸW×uH’Y–ç>Ïu]4 ‚€^¯A&ŽY–Û¶aš&Eñ¯;]×Q«ÕŠ¢àï|gÞ_%Ã0 Ã0ÌÊ,e° ßïc0àöíÛ¨T*‡ØÛÛËûó0a>ó¿|÷þ.®®ù ‚EQ`Y–¯@ç…®ë¾*IZ­DQ„뺨ÕjhµZ–ea8â;¿ó;óþZ™áº.šÍ¦/ÓÝnŽã ÝnÃ4Mt»Ý ã1 ý¤ó©ª QÀ—WB×u(ŠQÑh4ÐëõüÇé“$ ªªN¬GUÕ §ÌîînÞ_)Ã0 Ã0ÌÊ$6Xprr‚\\\¶¶¶ppp€Á`€ÃÃü?³$†aÀ²,?­Ðq_‘v]¿ð¿€ko¸Dd*ʲ ˲b¬†a ÕjMEeTô»ëº’t½8Žƒf³ Y–'2*É ‰D·ÛE¿ßOÜø4‘ÁÚï÷±»»‹çž{Ž ÔU±±tжm?j'‘6jÓ4 ‘w€Wk¼Ì Z)Š×uF¶ÂZ—½ÐÅéZÄúD¤ÍfÖ5×êv»så–ä`{{’$-|þ²P+ý›UMìMÓ|gæ,Âòæaš¦_޲ ”ŠÖjµ2¹Ž™Í"<:l¤@Sf ÍiF„¨A$)ãI{0LVPö"Éd0ÒO“:–5X Àišþuìù!z½ž?僲Ü(˜`¢OM˜´öðÄ5¬[[[°, ;;;©, ßï_ͺW^åŒôÚY¸®‹f³9wÇ,–övSÇÝÄ­¥ÆK’$ÁuÝ¥Ó&<ªT^kŽúΕ•ï cuQ—]"ØT++dYö=™Ýn—ëUÁ2^f‚žñE]°Æ\¥„0 ’$ùQ€¤ÎHR‚dYF»Ýžj–F%0Ï?ÿ<þÄŸø™~?,ÛÅǶmèºÛyHi€'KÁûż̰M…e¼<ÌKy§=7JŸNê ñ‘Ácº®ûY³tA¦šëΠIÜ%xoo‡‘ÏÙÛÛ‹}¾ápˆÝÝÝ©åÁÁŽŽŽ&«×ë8<<\ë—“)¼.¶V²—5È3kY+q ÀhµSRj%]d+^g਄\ŒÖ+!jtp€IDAT-ß+’ÔX%²nªEò:U7}Ea//Ô †ŒPª-Š’kJ›Õ%;u…·m{bôX4MƒeY~†9héý)=SE|Ë·| F£oDs`Ù.TJœ¿Nüeœ‡W}g/º®û3ÙgAHƒ>j Ùív!Š¢ßÇ&HYzp$®aÝß߀)a&â¬ÃáƒÁ`æ9îß¿‰?LÒnR…g Ç¥§Õõ46aÇe {|Pñ_¦CðÆÆj²ÇX¾—ƒ”UÀSH’«ëb™ì†Mƒe¼ÜDeêPmRÔ^LF¨¦is3bÈ  &Ô5~á2‚RÑ(j¼î–©ŠËvqi6›àgÒèºÎûñ°Œ— ª³^”EN ¦Ô÷`éàÚí‡Il°.JŠC¿ßŸ²ðƒãÒ« ¯nu‘±š0õzX¾“Cé*e¨ eåˆe¼LÐŒ``œ6•©C)¸Q¯§¨§ã80 cfÚ¤®ëó¶%Iš;»˜¼ýtþY÷u*Y,ÛÅ„œÝn–eÁ0 6V—„e¼ø‘jYÇA§ÓYèÀ¦obSÈ×åñ¦;;;8<<œí÷û¼´„ƒƒÿ÷ÁA¬®µ¶m£V«ùÍ0Öj¬^ÔruÿD$4¯ Xn¤Ï]tÏZc îÊËwBHIææ 1 â2°ŒêMMh,ËB­VàE¥šÍ&¶··#3u¨L#Œa¾Á©(Š?â  Áó¶Z-¿Ù_øœµZÍOñ=;;+Lz&ËvþPmu°³¨®ë¾£„êíØX]–ñbìÊÛívqvv»^”&u›§S-e°žœœ`ww×OÑÙÝÝÅÉÉIª £T…‹‹‹Xçå•Wðì³Ïbwww=ßÜ*è‚3lµ`mF§ÓÉ'¿\Âìô߯Ä'héÏFkˆcŒj[ONNp÷îÝT²–%©|À«¯¾šÉµ–RJÓ‚¼€›¶±f‚ëáX„1»»»x饗r]îFïáÄ4M4›MH’ä7Ù :ðN§ƒV«…ÓÓÓ¹ÎOY–§®oÃ0|#”F‰÷N2’i BznÐÈ¥úôÓÓÓXMœ¢888ÀÝ»wñòË/çò]—uÿ. ”@N kV†:»4 ý{0ä¶Þ¿ó!8RfÝ¿ÕjA–åBëT¤ƒ'Þ¿G ¹{÷îèñÇ}øÃݽ{wt÷îÝч?üaÿ±$<ÿüó£Ç|â±û÷ïîÞ½;úÜç>ç?ö#?ò#£÷¼ç= Ïu÷îݤ':ÿ Þ?ggg£ÓÓÓÑh4žžŽDQô/Êh4ê­~Y–GÇÇÇ#UU—;8Îb4Mƒ,ËÅ X˜ÄsX©µøÍ›7'¯T*8<êõºQÝÝÝÅÝ»wqçΉBí²CáúV«åw5ít:ÐuÝO_Ij°>½÷ôdó ñe? <‹gŸS)êõœfÓS¶£¢CÍ&°u<ó6@zfÆZB)À‚ྺxÉðŒqf!š¦Á²,_îÉ`%c5Ztžn·[åW=#4 ƒÕ¶=' yN›MÏÑ2Ëqïðd6x^Iò®E”á;ff¢ëúDz$Ô×u¡išßI:\ŸÇõzféGL) ò¾Y3¥ƒöÕo«ªZ½¨`$2X/..ppp€Û·oc/Pÿwxxˆ££#ßݯŒmÛ0MÓ׫ªŠZ­æ§±R˜?öX–KCðÞOß›ldá µ11+#ˆ¡ùü™§ÛÛ¶§¤»®§¼‡¡”ápT•Ð4ψè-)KáH„Œµ4’*;ív€×”ê´iÃ¥ú:аÎC×u‚0Þ¬)RX0/²,{‘ù$Ï·Žã–åÉîññ؈”¤qÆÑhL:uÂÿ2WN c6 ×uQ«Õ ªj>¤ÕóÇ^/GÚJ EVã6N-}£ÉœId°\\\L«ÄíÛ·qrr²òX›¢`šæÄì/A (ÊDz‹$I‰F||ê‰OÁý·˜^t2œã8}øéÍ3¡è¦ Œ£GÁcÆü¨ÓKÿ øÇÿ8ïoåJAMHÉEqÂ!ãº.AXè§ZŽ œj>‹H»íy¸cD}Â7ÓôŒR×Îe/âOFüÈ)³±¸® ]×aš&Ï#f6ÊÈe„‘ëzCÞ[™5@‘ÕBêôÈRcmfñ §Q!YÕ¸Óé@UÕ‰Çã¦GB~à× ˜sè $ÏhE8š$ËÞcD³éuë÷]|é½?’îçLò¯–eùN‚Æa0W橳iiR^Ç“Qê¶» TG•Ú>¯eÍ—}fã1 µZ €—ÍÀ ³ и¿ÜÆ!Ѽê²Ö•˜Ò@%ƒ5]‰ VJ÷Ýßߟ˜Åzqqýý}ب9MQ‘¤pÓ‹XiÁ6kܬiŠLp0ŒÍÔ³àxx*ì¦éý¾Haá5wJ“‚øòÆq¿¹Ø"#“®‰YQÖv» A¦›ÃP®"Bëšg°jÚü”aQô2f}Ás¯½›/S4Mó3‚Y< S6ÈÑIЉ\)ÓÞJºR£‘÷J˜P¹Ï©^/‰#¬º®£ßïã½ï}/nݺ…[·ná½ï}/,ËÂáá!¶¶ 6¦eIbEM1…ŠD~ú§Š¢L+'ò7X;ˆ— —p·Õ ²®(ñê³Øxo‰Ä0Œ©Ì"X¯Œ°;Ýã.§ÀŒÎw”^4\w¬àÌ2Xgq§ÞEP+s¥i·Ûp]·øcž&4¶¬ÑhÀ4MF0Ç;÷‹êÐe&0MÓÕǬ—ÄkµZÅÇ>ö1躎z½Žz½Ž;wîà™gžÙ¨èj\e"V„µ|ä×>½‘ç™ âÈb´,Oz9%i?‚ùŒµ±¸–­O»ÎÜ…g@g¹/§on4secƒ5ìp‘$É7`iVëBçM’MÞu';ëf5\šEÑU‚SåJ‹mÛ‰ï €WãGs»s¯ícL:WÁ0Œb6œ±íò”_нGÊeh_A¨Ï«K`¤-Zh°V*躎{÷î¡Z­âää·nÝÂþþ~ü¤%ÃqœDB9Ó`5Hž²3scwàëF°Šþ–­=r4.דµñÎ÷„ ¬”¬_¼+)ð¦i.ö¬kZ|%‚j —Œh%"a >nšÞZÒ24Ù`--4Ò)É3`œy°hì˲üîÔ«@{áäZç; ‹J’²)fíPGàÜ3ÊŠ®§’a–¨†U–eܹs÷îÝC½^G¿ßßxã5î÷ùùmÀ•Ü)e‚4 ָʔo†ê*×^ð­LÓ3H–ŒÕ²O–.ßÓЄ§”ÂN#¬¶m/>OEßu½x8…Ø0RóÔùÐ8›0ŽÃÝj&“$:å8,Ëb%‡) ”Ön¬¸.rtæF«U®.ÁLá¡}œ3d–IJÆúÔŠ,=‡5h¼îìì`0àÖ­[‘2œÔ‹NÆè„2ãclì"Ò‹*6›‹½¼Èïª×Ù§4¨{Ùf d–% eóJ… ¬” ûZ1Œxu©‚0iÜRzîÙ7.bÖ«IÓMÓdƒ•) ddR¿e³â ›L”¡Ôq&g¯Šâ¤ƒ–)º®CÅb;gŠNptàŠÄ2XƒnÞ¼‰ÝÝ]ÿ1˲°»»‹~¿^¯‡ÝÝ]œœœäýÕä†,Ë“‘Y €³6½uV-ÞÔBá5LR”qÔjod€yi|,s1Û)}î$ˆ9¼gA±,+v¶,Ë3»\†ß`Åx²¦(“«(z]¦ VfíÐ옽º.Ø+Ï… EUÕÄ]¯ Ã@³ÙD£ÑÈ|Í<Êœ¿J°s«Pض Ã0Š-ëe€F¦õË`UUÕj÷îÝó;::òûØÇ>†½½=äýÕ¤‚mÛ‰)ƒÕ,7¦Q°üx¿Àû9³7jÛö"[Á÷‘¤±P«%˜“ªÛÛžwp•¿6Öí\µnwÈë!'ŽŠ°Æ­ƒMÍÐ\ÅSGéëi×Á2eY~T*ŽÁjš&t]çš§epœù{…iz#¯j5¼ýG¯íµ¼W\ gr´õ݈ÛùºÝnûu|…_3‹ut–OËâZÛãº.Úí6ŽÙ防¼ž¦Ký~ÃáwîÜñÓ}/..0 &æ¯îììàââb*m¸¬,ÓºZűBs ˜NLe> f ƒm{µ­•›ÑÑEUõ"ZqÓƒZ-/5Sê­ãxQÝ(T¬ßxsxÏ‚’¤yŒ 3•ïØQ¤´6}Q\ndeyŽú»í)7EOcr'¨èÏ»/´Ûmlooò¬«ÙXÓÆ×Xà˜Ûöžu¿²mÏÚlFŸŸF]žúó’ßðû¿Ÿ÷§.QõÔq¢¬®ë¢V«A„rÌŸ$gdÑkXSL“dÒƒ,QWàÂË{šÍ|Æ=¹njúßBƒu0 ^¯û†)¿ö!hŒ—eç”ÇÛØ¶»F0•Hcpžd£1îÊÕlz5¦ï¯Îžq*IñyÕM–샼JØ> QÒ%Qi1²,Ç¯í  kUQU“§|QdõøØ»Ž/-š–ž!Íl,Žãø÷ºY—LÓ„ëº8;;C·Û½z5OTg.I“ ®ë݃ û/Qƒ¾à}Ä0Æ÷ªãcï>Õu^×½c¢ˆ">ù߉ϽùÍyòÂCÍ–Â29sºÁ%Žã Ñh Õj•'-’ÆÚ½»û¢ùßÌZ±, ÛÛÛ~&(Šå‘ù¸¸n>ŽÊþL!8ðÅ‹ž°µµ…‹‹‹‰ÇƒªÕêF©A–5XEA»ÝÆÍÊMüìßøY?{ï…iQÁ5˲ws·,O –n®(Ë¥±‘,bÙ¾ü7Ï{‰ ¼ãö;pþç9.$VMuI”=@u©qR¶æu^Æa"^T&H§Ã#MÓ IR¤¡L›aêþàº.4MÃiXÆ®í¶g$ˆâdô´ÝöœLóêóLÓÛè^%IÞë‚X–÷s•¿ã%p]×7:Ãû=ÍØŽÂ4Mhš†N§sõœ/ë ê{7Œ±ü3kCÓ4X–UŽ ‚U  Öº¯g ¤ðÝ.Œ°V«U \ÍÅÅ,ËB½^Ÿx¥_¿~}½_F,›ÊE7€{ë¾ë‰ïŠ/üi8Ú\wì±SO @Çb£X–g{@¢ ø$3Mmx³V|UZ7€—^Âo½ó·r^L~Ä­]Zˆiz‘‘°‚&IC‰U=qq?« ¼T^Û¶#Ó#ÙQ—Úív¤Ape (E?é>dšÞ½cѵ|ìõSÞ/Ã2Û^~tÚ%h¬ÎêVeÕ4 º®£×ë­ÇX}øátz= ˜pœè{ã”ç3lº®Ãu]œžžn¶± xFc½è¾Øë­|ª…ÖjµŠz½ŽÝÝ]ììì ßïãââ®\\\à…^Àþþ>êõ:*•Êú¿Y6ºJ´Z-ü©¯ýSø£ïþ£ë]øq š+ŠÀùeôÐ…7wu‘œªêìM´ÑðêVƒÄÑÍ\xƲ ÏP-BÌ¥Îó_¾ü¿ä½’\YftÓ4¢F–½M0­ô&ËZ-MWÓ8ÃÌÄu]èºîwýÕ/#þ½^ÛÛÛSϧ‘6„ ÎJ¾Ò –ÚíÉûO8©™‚°øš¿Êßï’4›Í¹Æ*=šÌ4Mœ…ï÷YâºÞýdU‡u#-:³dÒ8N^ Ó4!Šb,Ô²,t‹ž>žqÆ^œX]‚;dYFïÒB6`zúé§ý.ÂËä|ÏkÒ4 0×ú…DÍ›L‚ªªx÷w¼;žAGdåØqáE6U,^ϬÏlË+ ”e|†b«9P4ù^ÇñºD;Žç1ëvÓ¿Á.{ýY–·.6V×J™dÜ0 ¸® Û¶Ñn·!‚¯°(Š2e GXiÄ¡iÚÕQxf1«®\QJ¯ •I¶ƒPÔtÑàðtƒØ]ß‹JZ½²&ê;–¤\"¬e•ñYhš†f³«£{Ø!¹±ØöF¤š/Œ°^ë;w"íìì`ggg©Èêp8Äîîî”` ‡C¨ªê§!˲ì{Â×ÁÊ©]üH^,²Èt²áÕŒ¶°8º: ×­àuÝÍ|½¢U¾Äž'9…izÕe‹ÒB⤆5㺈 ºÎ‘˜5SdÂ0 ôz½H%…¢MÁ4HÇq¦Ò")ÂJ³V¯„Â3 Y6Ùbšf¬¨¸Ž5mÍʨIÛPÓ´TR3¡@2Ëx$¿­V F½^of¤uí²ž'yÕFS£§”"¬€ç…988ÀÁÁÁ„G¦R©$6V‡Ã¡?Ó+Šýý}T*ضçž{ƒÁ`m3^W®ís'Z@ž!¼Š.A)ŸQH1Î}EÕ"Ë7±t½†aD+¨®;Û«mÛñgŸÎ›'Lˆâô{ëè˜Ì)ƒŒ‡!e–©(ÊT=_T7mQýÔâ+— lY³ÇÎle”í 4š#®2Œ²Æžl&­ÖÕªaEÔ}-#Ê.ã³  I’ÐétМ³W])ƒ5/‚ºŸë®|Ç2X°»»‹££#awww%áí÷û899‰<6Ñï÷qûöm^twggÇOGÎÇqV+¾v<Å·`•ñH–5¹YÛX~Ω®{ÞIMó¢iË(dÆéÀW"Ë7°bv·;»ÞlV§iJÕË¢ A˜>ß²²Ê,EÑe< Ã0*ãÁ&4ív;òù’$ùsú®\tU×76¢J”Q¶ƒ$u¤Ð¨¦Ò§KÒ¤l=-8Ì÷’²Ëø,‚—V«Ù$/øÜRËû2ÐÈËu1a—$\Ì –Áztt„ض Û¶±³³ƒ£££©q7qÙÙÙÁáá!ööö¦ŽÝ¿üYúÿ:óèWJ ¶°8U6̪‘z˚ܜe,Y•¤q”ªÓ™o<Ìr^¸²5«@ñå{¥:íYü<9 v°^¼¸Å«,OnºÔ®ýªÝ|r¤è2Æqœ‰yª³ ƒÕ0 8Ž3³/ƒaW3º lüuVÙÖ"„4suQíjаæ¢ÀSJpÆåå|^¹Õƒ®ÌšÖ\OB”Ã…Ê<ÂÓþÊuv—¤õ¬áFh+Ê÷Bƒ•ÒÉÛÀrÊoO“yÅ<ù•W^Á³Ï>‹ÝÝÝ•Þå”` ɵί TòÍÎxʈ,{Æê¢Ô Y²w¼âçɘ““ܽ{wù:ÎXV¾àÕW_ÅîîîLÏh2Ù¨ãlFq¢­‹2ÂÖ+õIÊîî.^zé¥\Þ;ï=< Ã0b)ñ²,Ã4M躎ãoEªªW#ºjY㌚©Zp÷î]¼üòËk}ߢìß¶mC×õ)}E×õDÆ*0Õ”K:°·€h‹EQ¦Ëû2íßYèØ‹(âþ‡(ù 7 >÷JEWƒ2·nGÝ'ÒÁ“îß±š.˜¨SÝÚÚÊì³Í» ™Ùû¥ÂãŸþ¢²\ó˵ùÍ-àWžÉá3wÁFêä-ß®ëÎv̘f¼†[Q¬ê±L2dú*ÝpJHÞ2„#¬s &0–å]Sþ¢È¶ eÙÙÒÆVÌסœ;Ž—‰nÐF݃ד®Ç»oÙ6 iøæ7½i=ß[E‘qïë°¡iE‰4J5ÔEÑÏH°m;v?ƒ!Ü©w]‚Ã]ö)¨±¤3i¡ÁZ©T2+¢®×ë‘)¸ÕjÏ<óŒß!­^¯gòþaÈK™˜ýp~:ýÇÉÀßûj`÷ÿ„õ¾7ÀÆêŠ&ßÄÜ9¬«x¦[­h£5ÜHiÖû›¦·™®û:b–¦¨2d¥Fz›Œax?ªêÍWf&(ƒl^:¯išS gJ®§×y:ȤiŽÇ ÓöeÊ[Ð0²¬xMeÙ{Þ_ø ™/¿ 2n†ot† Ö¸ãÊ(u^Ó´r^'«ÔËÖÙé=ìHP”•I±»ý~¢³Ó72æ­­­\.’¥RjÞöëÀO¾e½ uàÍA}ú÷?ýÞõ¾7³2yÉ7°À`]e3‹síH’§¤Dá8ÅKãb–&ObÛöÕJ‹«½ש.AÞ²mÛ¶UjµZåtÈ8ŽgÄ©ªgðQvϲä‘mšÀééä{#©dF²Æe‚€_øº¯Ã_^ÿ'òÉ[Æq÷_UU#GÑÄã$I4Mó³®a‡Dž™+~÷± V˲p÷îÝ©MGGG¨T*Ðu=—Üiâ.ã¡s°~%Û…g¬vÜ,n!dYÆáááT8=øøþþ~ÞŸg%¬¼fIá—lo'{cÉêA%°±Ê,Eì¬i6‚ï¯i^WbºùóMI™®º®§”·ÛžRrvæyñ)²åÑEïq6VKÍF8aŸa•Ïn,x÷”e L…E„ V×õË+’¶a†I’ü¾2aƒ5NíjÜîyÏÞ]u<Ùe0lo{?q® `œí“" Ö~¿ápˆ§žzjîónß¾ápèi—‘¨¢î¹ð"a–QºãÈ€…Éñ5Eˆ–1›ƒe-“'­ïi³›Õu»|£gR‡Fl,’ä]?’òÅÄc#œ0áTÅU>“(NG;—9Ÿëz÷0Ëò”s˰š6VÀ£ÒÁëÌΠT è*ìöKÇKqmö®mÏî#Æu½ìIò¢ggñ'48ÎìÌËÂëCå¥qXh°T*•…Cƒ©~õââbÅo3Ïês˜$;Úã´6Ž`b2u¸Ýf£•IÄ\Ï|©+‹¼‹W©½<“ ¥isíZü=ëâÆ#­¹«Žã9Niþ¸iŽõ,I¨‚ÝY»H†IÉ ß“$i*ÊZн=ÎìÝu"ñu=Jq©áÏì‹ iÞO³é];QŸûòýÿx§ƒ·¾új¢¥/lº´µµ…¬ý;]7‰Ó-xi¹¶=­`,3k(Ns¼ðþè8œBÉ$báˆU7ÖMHUc6†REX-Ë3@Ã륪¤”—åó0™°éÀÀ´ÞDŠtšÙ®;}Os]ïš²íi§5+¼ëlVCŸðŒU&uÂÆ©(Š~§àÒDWi®<¥‹çá` _qïŽã­9NP!hSIʬ¿Ïe“—ú}|:0q& #¬ÕjSÝÃPÃ¥­­­¤_g!H¬ØXdDâXFéŸwí9˜nìT´4¦ü¤!Sq^ßh°aˬR¬Q×O»í)ŽÃ5ÌTä©´„S‚Ó¾N£:»®×3ìMø7¨ŒGQýI¥1XI§„øµŸiâ8^´3j]Ô?äᇽç„ï-šoÌ”,{Ï£ŸyÆêŠÄ2X«Õ*TU™î{qqƒƒÔëõÒŽ¶IÜŒF‚g°’—.LÒ”`}Á±°~?K¹a˜9ض=;&ʽ*Q ¶$å3ä¹rض]ƒ5*+G×½ÇZ-OàZUfSèõ&_µ9L˜¨sQŠc§3nPÆލŒÇ`§à\šG.÷AòMŽŽ Òíz÷zÀùyt#>U_¯º&bu ¾sç†Ã!nݺ…““?Ú:qrr‚[·na8boo/ïϳ4‰/ŠìGÕè-3SlV_ ^„5œI0Kf3å<-#2(û³ÒÖ©™ÃdHâÞy¾V¨Ù 7#clLJp˜UÆoÆâû ¥Ölj1¹2kü=¾ÔÊU¡ò¿$z’¢ŒÓ€%)ÿ7ÁuÑø%Z[Ø`- }Ë`­V«8<<Äõë×±¿¿›7oB’$ܼyûûû¸~ý:K]MŒ¯éÒ,–Qf9ÎuDw"N»Öƒ¹¸³6[ªµX•¨èiÔyãv©c˜pËÉ·¬Io¶eyiZN±v0¹³°ÁU$*]>|ßÑ´Ù5©L¡˜@E¦iæ]m6=ÇH’I Á¬eY«2«ñQIYØt‰ £•F× ?]xQᢓ8ÞgcÏî‚•†òßð™ñ-˜Ôš–qÁ\)œyè¨9vYA +X†™ )Ut5\âAc: –’Å0…ͧÚ7 ï¾Ã†~)˜5cU’¤|éQ-ª,/?úO’V˜ËòÖ½ANšXÖ •J²,coo²,ûÆêÅÅ…ßx©l$®s’.fì¤Õ»µ ‡À[îLk¨~5h5^u^“qÑõÉ^?ýè[ñê«oÍ{Ykþô‰¹fS¤á‰£ŽxDTšÕä1L†”ª~5<–C9˜‰¤4Yó?Äê%!ÔÅ”˜w­+ÊF)î›Î¬,QaÆú3 Hç–ååÓze9y„uÞ5B3ƒ£Î麞2¾a2ŸØ`Å`0ÀîînÞŸg)ׯªþ ³3%ï87˜8–‚A¼$–5¶G û^èšêõ¼åjÚòçOê$xº_¯7þs}Çw|<òé¼—¸6lÛ†mÛ³ Vêô¶*­ÖdT(ª‘FŽò»)Ðdf6®ë–cNà]eY+“+¥i83{Æüúðc®ë)ÛÛÓu^m·× __%bV69 ×n°âIRöý7lÛëàÛnÏî.Ün{ki6§å¿Ý^­&¼ ÄN Þdf¥DbÃËήÒýû¿¸8‚g¬¶Ô,o€u˜5G ËP’íAeU’œžNÞdÙ{<ï©;ÔÍ›í"MÓÐáfƒ¦yΣðõnjɥ1ìÒ8bf1ËÀ$Eð68š]5 RÿEèüÈ¥-lY°ßñ$,=?G ¥Á'‹â´Z^OZض- îSª:¹‡lèõ™ ]®øš–^ftVÆ8²JPfgLbYÔ¢z™5ͼ¨’†™Cé V]÷~‚*IŠö&_‘MÌ4={FU½m›ëDáJ9Â=c(…pQÀ¢>WШã1§tß²ôgH‘+o°êº_©Ñ1=u¦9Þ˜ÐþÞ·AÞyü_ÿjßøÕèÞ{#dË‚ûumا1Þ+#,kù4øVköÎEPy ]Û¦é«Áu”!V4Mƒ,Ë‹£«iæp5µÚ¤uU ¨ô©×;9Óž¶³ì5užvÛû?9V‰N‡ÓßÃR©øP«ÿ¬7šÛÖD-Ëûá9³™53MOf;åÇæ‘&ˆªfîu§ ­ãx—vÙ]®ëÝÈ6 ï»ó uDm?ä$¥ó ÂØYZ$;-/ÖVÊA Nè5ë=£"¥qêÞÒºù‡)¨>µ¬ƒÁ Otqq‘÷gIŒaE1^ª¤ oþjcí±Û-Qó(#Ð4@xì« v¿ °þ#dýÂq½´ùC#k»Œzп½í}$j ´,áû“¦yç¥TS ¦Át|®Ó[Ó4aYNãlpÁ“UÑuïô:(gLZze'A<ïy¶=΂“eïÇqƽ(‹¨Ó‰¾?R)3Ʋ¬è}æèeÕ¤ÑP‚0»Kc0m…aŠ)Áòä-zºdä¦*œå÷! cì#¥¦«Ú ¶í]¦Š’|o¥ò$Ú¯éë&TÒW›–½)¬-ÂjÞ‰<³ödE™¦¨Ô^ªY¥?öŒ€fMjÖ­­-Ôëõ¼?Ol\×…®ëèÅÝtxÝu},è)¤¢”PY†ÔnC’Z€2V¨²l`ÔhL‘i:ý)“‚¢WÍæt zà~ÁÔÕpš¦Å“oËJws¥î†‘»%E† (Ž?"]O³®)òÖ/kÓ˜¦§´´Zc˜løà=βƞuÀû7\*C¿ÏsY9tËÈLƒU×=™Lû‹²mo3¥óº®÷ǧ?šãxÇI¡çfPÌX–•®2OʺeMÖÙмÉyi¤ Ѽà˜Véôq"ŠTDî6Íñm†Î•¤„‰ú:)ʤ£pVÏ›ZÍ;x¶=ÎênL†õ1‰š .ÿ&“c)É»%8Q{uÔýƒ芦ëÞWHã_ÃN›4üX ÖjµŠÃÃü¿‡ÔÑuŠ¢ÄO¯ùçÀ›Þ¼ï]ñB~‚0Ù {S×y|4›É Izù¢%¶ÛÙŽÄ´,oó§ þôÔ{ÏFcü¹)¢Äe\éB©î±äÛ¶Ó„^¬Ößÿ?aéoJýoK …¢DË.%@Ðýˆ²?©™Þ¢àuŠ_´n .Ð(H• žÜS°›:ÑG)?À¤ä{™—Åp•°, ݰmÓ^`õyu¤UÓÜ, ›‡çª*Š÷8‡Ã™H%%Ø4Ç©é­Öô†¨(c/[˜ §-Aú9Ô‚#¹æ)±ÁÞ´§‡/ÒÂ{í<¨i]§3­žÑ½BU£ÏÓjM7ˆ%_Ô¬Ë:î^NŠ>ûw÷ÑXßë¦a¯c^9ZEYì YÄ27íœ fr­bg“ß‹ô2I𼆂ì*,4Xûý>vww'„èââƒÁ ÓˆêÁÁŽŽŽ&«×ë©Ï®ëÂ4MœÅ±i8¯ü­ÀþOÄÿ‹Rûêh ŒÎ¡êOˆØÁiVq\§u†§¨f°ie-¸®÷“e*J„ín×ܲõH›,嘡°GAÃ)ÕEØ6àîBþKoè¾ Î-_†Æ+‘ƒ#83ž ^6Île¨Û]¬ÔÄÙ(`þ=Jqiµ&Ÿþ= Z­ñløÄ'¾9Ý7HHÖ2> Çqþÿìý}”ëøyß ~[‘,GTÐV$[¼’mTì\&;9»åË$vv$”'Nº-‡œŒusúÆã%*.e¼ÙqrO29™SsÈO2ÓLjClæ¸Ú¹'ä²»x²"’žÄyiÞbïÄiÞh]°­.J½\¸y§;’³6÷Ô‚  @¬çsN{‹/à¬?<ïϪRoYáÿM\À¢¸\dTË^¹"/ùÞ˜V˖Ѱ¨hXcÚ`cl`ÎrYC”w‰©S”u¯(«ÝôûËÍWéx­Ö %ܧ±®´ÝkÏíT®I²EŽÐw¾ófîß[«_uë<¤,S)eÈì(n“²›I'òšÃê±é«T{; ò_¹›oB¢”à_|§§§™zB®®®ppp°”P©TR9¶¦iÑf9‘±Ún§¶0ÿ醟è¬7‚ã¬ä4¥k±×[Qævmc_ˆšaqÓÈR¾}v/tí’ ;F„ÕÛ±yñ¹ôÜ· ‹ý?Hv©ÞG£ˆ¼K!¶wC ªç 36“‰îæ†îŸþu¯þÏ8ûMøGÑ"¶²¼ðâ­ÃÝ@ XÝØ¹V¢d-ß+^JoÚ#YŠ”ÆJéò¾šuyå-¨ y“G£…÷œR«¼)^Ae‡ ¦Ji:E[-û˜tüu™œE°YÜ{Äááã\×’¥Œ‡á»¿{=ên„” öÑÆzÏs#ÈC¾-ËÚÚýZ¼õÖ;bGa Öñx Y–ÅwïÞMå±, ¦i®*ôÔ$Æ­Ôp%1<ŽÎ#I°þãïB§e?8ú?uüq4þç;þ_ªb¤`Rô)LŸ"ω,³.U²’oÀöÂ/e(Êê®ss¦ùŸgg«Þ=÷9?ù¤q1Þ{„» ‘»‰6W÷æVošÕý­Ù´Ï9‹=7,e< ß±Íføµ×³µ¿ u’¸¼\hÔi…j˜Ls‘)äVÌŠVú›—|oÃZÿßAï×þ4Ä;þ3å© <v’TšáG熲ŽÝzMX]ÁVøÓ6oB“ï<ä[×õì VJåÊ Î4my§Ìj¿®Ñ¶e-·? cQÒŽßµeB!°xïþþ¢j…®‡0H¶©Ç Ýæè}’”|/Å…Aüæ›·ñ쳿ëý¿#ÙÇnÊŸŸÍf8<<ÄùùùÆÇôM6ŒE÷H/1KûE nÿÿø“ÿÈ[8³ÿ??ƒÞ¯þç¸xãÛ Å^75í#,kñ9š¶hâ²Ãi÷;Gò ø¤Dn8ƃ|9´ÏEJƒh·—•OM³ÿïgˆ’Je$Qî]îf¬iB%^l¬&'+wcE®ë0 Ã?µNþ©øXU»wóº[³W03:ûÞÖíÚ_5}ÝîÒ–¢° ùvc%iþ@¡ÌPªk«œ}êw yÑ…bú«ª~\\¬¿L¨‰ýmÓv¸‹bq"–edÛò­ëz´r½$xòÒIíO22IÖ(UÖ¯ÏAéònýÅ0ìÑ’ë2ÿÝÍ’è}†ý^÷ì`À6@ã:Xt=|¢GR¨‹öhœžþ2>øÁ/Ä;À| ¯¾úêüÎ;K½ð +¥ÉÕÕÕü…^˜¿ñÆÎc?þã?>ÿð‡?øž³³³ùŸøbþñ<ôØ’$Í///oÝ——óy·ëóÄ“˜ 8öphÿÿâÂ^òÙÙÆ‡½‘üôOÿôüðçÏ?ÿ|.ŸŸD¾çó¹#ß?ýÓ?øšËË˹,Ë‹Î΄2Ãá|.I«"|q1Ÿ·Ûóùh4Ÿ»?.ˆ‹ ûí¶ý“Â%Á„ðñ|þ‘|$·ÏÏr÷"Ëò¼ÛíÎÛíö\„ù6ʤt»óy³¹ü; lfŒFö‡^xaþƒ?øƒóù‘ÉeÍIä;Êþ½Y–çO¢Ê¢,/~Úíù“'¶(_^®Šs»mÿ šMû5nu¨Û]èóyð=! ¤~‰¢½.Ɔöï×^{-—ÏßæþM\^^Î¥¸AøCŸn·çó^/ùáIŸ ûø Ó!½iÕ̹ƒL˜"C:xÜý;²ÁúñÜùùØÇ>¶òýdÅo¼1¿sçÎüÕW_ \ç /¼zŒ• âÉû¯î§àHóùœ7Ð%мl“uò=ŸÏ#]_ÃápÞuï‚dU&`bŠF±Ér?NB{¸—áp8o‡i Láéõ’í!eÛÓ^ÝnwÞëõV‘aøxeÙÞÓ{½xŠú“'‹¿OšÆêâüߢvš›°»éõzóÞ&¤›v{EþÉaã'kqäùÉ; Šþ¦ƒ›fsõó./ýËÅÅÂ(ÞÔG›Iäem ë­[·pÿþý•dzÌeǘN§888p{úô)€Íº”-¥‡u6í7Ñ¢f§¸;2LYÉ7àS¿ªë‰j6LÓN- ëþF#ŠXæ/YÊ8aYTUÅÙM((Ûa¢–±ìï§6}kc¶!߀Ýñ]Ó4t»]'í=Þa¡XÔ°Y–ý]FýÞi¤…¢,júÓläÊÓžŠÇ¶äÛMäQ|~P;]*Šn6—JüÜݪýÈá׃®/:芢ÂVŸMÐxW5GMPÓ¸eÑÈïn÷æ´SXk°V«Uů¹Ü„J¥‚ÃÃCÔj5Ôj5v.½û÷$,]!ãj¬åó€Ð‰~ƒ¤2'VÞ™ud%߀GÆi§Lõ&SLŠÔÑ)YÊ8Ñï÷!Ërx“Ž Ù720ŠbäùgümÖÓ.â° ùlïv»h·ÛhG­G]«®ïTöêîJ $É®{‹2wš:íéïÀdö䛰, –e%k¸D]¯¨„»ç·{i·ƒ8ÔŒšÅ­¯n6íÏ^ŒüK·F;­q1e¡]‚kµîß¿{÷î¡^¯ãêê €½yoÂÒ|Ê·DÿE ù<€wF'sƒu"&[‘o úð^Ö Pg˜0²’q²,hš†‹uÖu½¸aÂlY‹.­ÛüLÓŒ®QÄ"ªÂU¤.ÁYË7`ïçQ'Ó´»SQÈÆÝr]U—´ojrçVÈiôp%<åQôLÙ†|»Ñ4-Ù¸&÷l¼O uð rÊÇ«.u:‹Ù¸›™‹î×Ü«o3 i°ÀÑÑœ eÓä•ΩAwIпý×ö¯ë®Ï~¹ƒ*ÃD!mù|æ¯F€M]I¡¡™` ³ YÈ8Aåk‡ÌSzX¨ª½ÿqÂM¿¿˜“øÏMŽuoø Q&——áQ8Êj jÌ_²”oPUÕ^¼ýäbŽ˜·¥¨'õE×W£GÍæbf;gÔ3~d-ßn|ggG!BZc§³>èäNÝ%D1½©d3›Ù`L&8==Åýû÷Q«ÕVçÜ]c¬ëǃjµŠjµšÊ±V”ù 4àâÀõ —n²$Ъº˜»VçÇ0A¤)߀ϦA‹¦ñ$ç$Ó “iË8YÁ1ŒÄÞDw9”÷üy¢^(ºê>w:0¡¸aPê“'ËŸI Àv„‹^½^¼½Æ4íõÉÀÊJ¾×f¸‹I½‘)ÏïAÎ Š²rä” "+ùöâ;;Û‹{`/±f!‡Ûºý™84Š[’xDd‰d°’rP­V— ®ŽŽP©T ë:&“ ^z饼Ï'Ã0¢Õ€´áÛiÉk°¦‘*À0iiÓwAƪ;›¦Ù,–òÍ0~D’õ$3+¯ÑuÛà"[—”Íæb–!þºTZ(ÊByŠâ¥èªûµggÉæ˜’Q³®º ß÷â¹ÿD›FSý‚‰»J¿ßÎ Ð4û‹ a5ÂÝîF—ÃlŒišèt:Á³WÉkhö&tv)¢J3Dilv¨gëôÅ%’Áz|| Y–Wò×kµêõ:Ðívñâ‹/n½ASTœ‚î;/ÕÔø9ä½)¬Ô3E"N H£±¨5ã&LÙˆ,놑¨v•9vÛ>”Ÿ®DF]ÝNUÕ6*¨KeåË4WïaTêØ°_Ã4Ó\t„uŸ› ,š–}ž_3ò,ʆE1[ç@X]Õ´ÈaÑ0Gƒ÷oÍ0ÛDQhš†áp¸¼Ÿ[–½ác†ÚòFÜ`“–çݰ6¥äw¬{Áx<Æl6Ã'>ñ‰Ð×}âŸÀéé)¦ÓiÞç´‚a¶±J³:ü°£Ï-Ö™rb¾ïÿa»ÝÝšPÊÙ™]gFc¦L†Í`ÕõØ–a, C¯§=èPTJ˜ÔÙ’¤ÔCÊ 5<Ö_ײ¼zl45¹„¢,^O_§~ÑÕ¬ æ(»Lht ª)ã™gì¿YqšZ1̶Ðu¦iâòòry/×4[‡욆³³ÈMh² ³»¬5X'“‰o{½^_J¦ç©@»H8õ«Ô¨À…s£Wa“x¦œhš†þÚ¯‡°P@YÆ™2)ÂJa¿ù]–eX7ÎÉ‹ „¼ü ¬¨©˜4RXÔËzßk¶ŽUY“eû˜düR £Û]¬•ê`‰nwñ;}µd Ëòöê ižá®­] «Ï6Î.ÙªºØ×WÅQ#¦˜¨ªê_¢g¶`÷zè«Å–ï°æ§†aïMŠ²Û™LƒµR©8ƒÝœœœd2) ¼OAuùEGW™bš&$IÂ×ýó¨9fÔža¶ ÉúÚîÀ†{pž¦%›µGøE»HáÚÛ[<¶¿oÆî×t:v”ÓýC‚¦—ìïÛÏ“¡BÿâDÒz½Åû›Mûwªß¢óp77’åEª®/¬ÛŒàIÒbLÅ.¢ªêÚîׂ°(壑nYqôœ1Æ Ó4ƒ³dh#¢Ÿ9©¼û«ªÚûª¢ØÏsÆØî³¶†µV«a6›a<‡¶µǰu- †a@¢Ö„.‰&Oñèÿ•÷ &9š¦á;ÿïþþßt169{€Ù4M‹ÖX,ÀŠR”E§÷zØ4JØïÛ-OÓìû‹·Ñû;émî>:–å…ívé·d0&¹®Eq‘ìæu»¶1Ýnîyv•„EïµÊ±Shš†ÑšBeï?.Ë õ©ñ¨; S£«XôW¢æv„(ÚN:z̲¥U½9¬°Öj5Ôj5 Ìf3ß×Ìf3 ÔëõÂE]n’>éaý>Ð~/ üTÞ«d˜ä躎ýÎߘÿeY¬¼0»®ëÁ%Cß·ˆrJ’­ü’O€7Åíá3‚…¢îë“ü«alª¨õzö=Ð]òNûDPš)“”ê¾]½i›æb4÷[¨‹5+òLÑ4-Ð`ítüuä`sGY»]–ñ›F¤.Á<Àáá!îÝ»‡ƒƒƒ%£ôÑ£G8??w^W4œ†Kžîºÿ¾à'ó^%Ã$ò,˜¦‰÷ ‚oçŠ(iv!Ã$d]ôÓR¨€Ó',¨i¶aæîú+‹žLš–ÎõáþèV«øFƒ ,ÏM%z½d#p˜ÍXÕdš OÊuq؈†)+NJC¹Néʆ ðµp‰d°Öj5<|ø§§§ +Ïàþýû[0Ã0l¼{¨l¦÷;ü ybJŠ#ߤØ\cY¶7^–ÙXevMÓü£«Ô-É#è–µPl¼os;ø7êG„q…ÅÖÌlšwðå¹Ûµóǯµô.ÿM˜’£iþÜþ íI×´•ô’°}‡ŒU±Å{ÔÍ#’Á Ø]€ìÎÀ SbLÓÄúS øƒÀBQ÷è; Szt]ÇÐ=>Ä3Cið¬ä3eÀ0 ´¨}´Ï潿ŸŸó…aÒàå_Äÿöo¯2¥¹¼\ êÍÜLb¬eâÇ~ì×ñ­ßúKìTåF㺫˜ÛXåÍŸ)9¦iâ?ý“Òù]UÙXev]×—Ç D¢ Ãþ•뛘2¡ë:þø›o¯ˆOUW›r1LÙøêÇøÍïÿþAV”¥j=† d§ VUð?ðïxê˜8]‹ÙLOwÞø™]D×õåq6ªº’¬ªöwl ÃÃ0Ð\rš5Kc>x_gÊŒeYøgŸûÚžð¨ª.æ­2Ì:vÖ`Õuà½ïýuü©?ugù GŸ˜Àé€Ð&© Sz4MÃY¢–µ2àÔ0ìfl¬2eÄÛýºÓ±÷ò°y¸ S4MÃøó~©¸Ÿf©rŠ/•µsXËŠ¦ïz×CÔÞzk1ØÉкþ—aJŽišxîº3·¢øÏ/c˜²Cʼ3 Á²–ºn¸³ƒ9m’)+éîXÌRec•ÙüægS·_nPÇDe'#¬¦i;àO¡:ø×@µjïü€6¸v•Ù ÃÀ_ú‡ÿ€îÎi5Ì.B³ûDqIËQVî™òâΔ!¸›Ù‚æg·Ûl¬2ñØÙë_ù+ŸÇw~Ó7ÙÞxÒdDpg`fgøúù/ñ¾ó;ðøfw œ¿ Û1iY¬à3ååêŸþS<=*Ð4íÈÃì ÞýÛ²ì6V™¸ì¤Á*ŠÀ7}Ó¿Åù[¿µÐd”¼WÅ0éò¾ÿõÅ/}臠ªy¯„a²šŠ‰ÚeqC¦Ü|ðÕWñµÛXeEžÙ%.‡C|ü—~Éù½Õ¢ H†‰ÇN¬0Êÿ£‚;ÿß/Ù¡§¾ýÃìßýÛ¿þ/~”fgYòÎûxfšMVð™ò¢ë:þØÛßî8Ö»]Î`vÓ4ñû?÷9T,²aXÆ™$ì¦Á*OßøE¼ë+ÿ P@À^xf‡ø'?û³ø÷_ómÀÛßε«ÌÎâ4ëд%·¼aص« Sf^ùÿG|s¥K9M’Ù9~î'~ßÿ›¿ 4›0M»)—/1I)¼Á:™L0Nc¿ï‹_þ$¾æo¾jGV‡àFKL!I*ß¿ñ©O᯿ó¿fO%Sx’ʸeY°, ¢ ¬L—ïtØKσ¤òmš&¾ï¥—ð[ñ¯¡Ñà4I¦˜$•oøÝÃ!ÐlBUÙÁÈlNa»O§St»]L®›ȲŒ~Ôn–eZ½ÿýÀý¼Ï„aVÙH¾¼õÚWpñ•ïÂϰ·’)(›Ê¸“ÜéØm€]3kxÞ*“7›Ê÷ëûÞq÷.”ùýevÀ0ÅbSù6 ÿÙÓ§8»ýסªÀh”÷1e§°ÖããcT«U†W^y“ɃÁ Ò{ýOÿLέa Ì&ò ÿËkß…¿ðÿ­¼OƒaÙTÆu]ÇÿõÉû—f–ÅT™â°‘|~ï/þ"†ø9¶?†aŠÄ¦û÷¯?x€¿ù-ÿ-Ô‡ïÂhÄ3²™Í)¤Á:N1qÿþ}@¥RÁÁÁF]4ïýÞïÅ×üµ¿–÷i0Œ/›Ê7üó÷߯óoÞ§Â0¾l*ã–eá¿üËxæSŸ†C¨*ÐhØM;&o6•ïÉþ(šïÿYüÎw¿›»\3…# åÿè7ð÷û‡ÙXeR£ëÕÕ vÝêþ5^ÅåAóg<'®È›óóó¼—P:6•oøå_ÿÏJu(«œÐ›‰Ç¦2®iz–½£¡Ñ`ÀÙYq#Q¼‡ß,6•ïƒ/þ|óù#¥1VË*#¼'cSùþ…?÷ßá‡ß¦áïüÝw•BOáý»Ò` œÙl–÷òRçÑ£GÎQ6tî››&ß@yåäêê =Ê{¥cS—óî¿ó ]ÞÁph÷[*r•ïá7‹Måûoÿí;øÉŸ¬ä}‘)«ŒðþŒMåûwýðà/ý_$å}&Ñàý»²éRØñôéST*«ý¿ù7ÿ?õS?…Ÿù™ŸÁíÛ·ó>…X|á _ÀÅÅ^|ñż—›×_‡‡‡y/#_øÂðÅ/~ïyÏ{ptt´õÏO"߀ÝUò»¿û»ñÌ3Ïà}ï{ßÖ×½ e”xóÍ7ñæ›o:'ÊÂãÇñ•¯|%·ÏßtÿÖo½ƒ÷ç«øš¯ù~ìÇr;Èð¾ý5Ïf3|Ë·|K.ŸŸD¾½û÷OýT.KODe(ÿþýó?ÿóø¾ïû¾­~Z:xYD†÷ïíB:xÜý»«; ÁKµZõ}ü‡ø‡ñÃ?üÃy/aÖ’D¾àþØ÷Ò&¼‡3»Lùæý›) ¼3E¤)Á·nݰœ–0NC•y†) ,ßÌ®Ã2Îì2,ßÌ.ÃòÍ‘B¬Õjõz}©˜X×u4¼—Æ0ÃòÍì:,ãÌ.ÃòÍì2,ßLyÛ|>Ÿç½?&“ Q­V1›ÍP©TprrXßÇ0e‚å›ÙuXÆ™]†å›ÙeX¾™¢QXƒ° ¿©X¾^¯ç½†I–of×agv–of—aùfŠD¡ V†a†a†aææRÈV†a†a†a†aƒ5E&“I輯Éd:9ìùuïMóÒ\÷6×ÎdK–òåù´Î!íµ±|ï›Èø®Ê÷¶ÖÎd ïß,ß»ïßù­}+Ì™yíµ×æÏ>ûìüÎ;ó;wîÌŸ}öÙùk¯½æ<uu5ÿØÇ>æ<ÿÉO~réýaϯ{oš¼ð óü㩬{Ûkg²#KùŽò|Zxå{Óµ±|ï›Èø®Ê÷6×Îdïßù¯ÉÞ¿o†|s„5Q¯×a^yåÔëut»]çùããcT«UçùÉd‚Á`éùuïM‹ñxŒÓÓÓ¥Ç6Y÷6×ÎdK–òåù4ð“ïMׯò½;l"ã»*ßÛZ;“-¼³|ï:¼ßùÎÛb.;¯¾úêüÎ;ó7ÞxÃyìêêj~çÎùk¯½¶ôâÇüÇçÏ>ûìÊk½Ï¯{oZ¼ñÆóøÃóO~ò“އg“uGyž)YÊw”çÓÀO¾7]Ë÷î°‰Œïª|okíL¶ðþÍò½ëðþ}sä›#¬R«ÕVfS]]]*•ŠóÿZ­¶ôÊ'{~Ý{Óâøø¸uëÖÊ9$Yw”ç™r¥|Gy> üä{Óµ±|ï›Èø®Ê÷¶ÖÎd ïß,ß»ïß7G¾Ù`ÝJ¥²4Ÿj:b0àààÕj5T8f³Yèó———¡ïMƒóósL§S-=¾Éº£<Ï”ƒ,å{r$ßt.I×¶k“Ù›Èø:9(«|ó¾ðþßÚ™íÀûw>kÏ6XSb6›a0à¹çžC½^ǃœÇƒxúôièó_úÒ—Bß»)Óé§§§ÎZ½ç“tÝQžgÊEòµœ„É÷¦kÏúÚd¶O_'e•oÞÃw Þ¿·»vfûðþ½½µç¬)0™LpxxˆÉd‚‡.yKÜáx/Õj5ôy·×Èï½›r~~ŽJ¥]×1 0™Lpuu…Á`€÷½ï}‰×åy¦Ï{ef0øÎOÃ0,ÒªÕ*f³*•ÊRW³°ç×½7ís™L&899ÙxÝQžgŠOÖòåù4ÏÅ-ß›®å{7ØTÆwU¾·¹v&xÿ.ÆÚ™ìàý;ÿµo 6X·Äl6Ãd2àŸ7öüº÷uÝy¯Ùe–“²^›ÌvÙDÊ*ßy¯Ùe–‘2¯Ù¼—_¾Ù`e†a†a†a ×°2 Ã0 Ã0 Ã0…„ V†a†a†a¦°ÁÊ0 Ã0 Ã0 Ã6X†a†a†a˜BÂ+Ã0 Ã0 Ã0 SHØ`e†a†a†a ¬ Ã0 Ã0 Ã0L!aƒ•a†a†a†)$l°2 Ã0 Ã0 Ã0…„ V†a†a†a¦°ÁÊ0 Ã0 Ã0 Ã6X†a†a†a˜BÂ+Ã0 Ã0 Ã0 SHØ`e†a†a†a ¬ Ã0 Ã0 Ã0L!aƒ•a˜ÜÐu–eå½ †Ù:i˾eYÐu=ïÓb†a˜Ôaƒ•a˜Üh40 #ïe0ÌÖI[ö Ã@£ÑÈû´˜Šªª0M3ïe0ÌV`yß>l°2 Ã0 Ã0‰Ñ4xæÆÀò¾}¾6ï06noM»Ý†(Š+ω¢Y–¡ë:Úí¶ó¼®ëN*X³Ù„$IEQÐív¡ª*,Ë‚$Ih6›ÎñA@·ÛuŽ÷õ •0ù€~¿˲|e,H¾–q¦ød)û½^ý~éq²,ôûý•÷1Ì&øÉ3=¦i@–åù¤Çx¿fÊÄ&ònY–#»‚  ÝnC,ÛIàkèt:è÷ûÓ4±¿¿ï\ Š¢8Ïiš†V«å\$€­ì´Z-¶‚²¿¿UU—ž³, –e¡Õj-¥¡õû}(вr¬¨¯g˜(„É7=Oµ|A2 ¬Ê·ûy–q¦ˆd-ûNÇ×ÓoY†£1L¬“g7~òÉû5S&6‘w˲°··ç85M[*Ù`ÙNÀœÉ•‹‹‹9€ùåå¥ó˜,Ëón·;¿¼¼\yNŹ,Ëóù|î<qqá·eŽþ?ŸÛòU¾éý,ãLÙ†ì»ß?ææOžûl)å(·œÌf³R~ï?Æ?þÇÿ8³ãó¾€÷ðíóÆo@üÍ¿ù7S?6ïßË”YFʼÿ­¿õ·P«Õ29>ïß xÿÞ>IöïÜ ÖããcXΟ¿ºº€¥‹´V«­Í£ÿЇ>„~ô£koEd0àîÝ»¨×ëy/%6‡‡‡K©$ea<ãÑ£G™?mù€~ðƒ¥ü®òËI÷•ÃÃÃLÏ{øÞ÷O–{8ïßË”]Fʸ§ff¬¼»áý{û$Ù¿sI >??Çt:õì°‹b6›>÷¥/} †a”2máè訔 €Ò^(?û³?‹Ï|æ3™? ù€/|á  ÇÛýÂR Œrõz½´7à×_=³ãó¾ ïáÛåüü?û³?‹Ï}îs™›÷ïeÊ(#@ù÷ï¬ñþ½ ïßÛ…tð¸û÷Ö ÖétŠÓÓSDQD³ÙÜxý†aÀ4Í•c)ŠÃ0œs 5Ó¹ýèþ(æóyÞ_?“’;·,¦qL]×cK’´ô¼$I‘>®“v»½ñšEªª$ ívÍfséZକ®YÀ–óõ¯þUjß Ã0 Ã0L^°Áºƒ†UUC’ŒN7¦iBUUhšæ(Á–eÁ4MH’ä¼G×uÇHm·Û+J<ÑétÐï÷ÑívC×ÖétØ µ¢(KŸå^#)ådL˲¼d0X–UU×ʲìPDŸnêTgL²A7mMÓR‰úojFš‹¹k¦£¬‹Î€S_Í0Lt4MsÒü‡Ã!>ô¡å½$&c¨¤‰Ê'Â$)4sFUU4 œ9e!Íf­V ’$9÷Ê £þT†BNFMӜƑ’$­í¡À0Yãî?³É1t]ßX‡¢r,w‰Ÿ·—Ž®ëx饗ð=ßó=±ŽÍkôû}§žÎ+䭢׉²ju»ÝH£šÊH,ƒu:âôô'''¨×ëN‡'Y–Q­VqïÞ=L&Ôjµ¼ÏkkP>8oØÐïÛÍ’LÓþX³]·_cšö{DÑþ¿ûï"NJ0|ßT¨nÁÛD‰ÉEY–éÞõÕ¯æ½Z&#úý¾Ó\#Ngö8sªyDØÍ†Æ·¸e‹¢ú–eA–eœí¤bÌ0Q¡@ªÏö‹’Òì^EQœZî¬öÖ´gŸ‰XëÕÕ ^¯¯`aˆº¡ÚTÀ6h% Ð4Û€å®Có´XÆ €e-œ8a,5.>ôÅ/æ½b&!Ôi8®(î&6 “¦iBQ”•z7ÀVÐi$ ÃÜ4,ËB«ÕZªý *¯pÓëõ°¿¿ïŒŸaâË`­T*€ÙlæüŸ˜Íf±gê”ò>²àeˆÛàìõlÃôìl¹žUQì×¹•õv›Õ0 ƒe¼H†m¨ªê"S°¯ð9.Cöñ­[y¯˜I¥Z6›M´Z­•1ý~Ÿ3z˜Ì …<¨)Ö.Gp& ê@i·q¡,5¾~’ñ;⼘¢¨‡‡‡KÆét:Åññ1ªÕªoôu×PÅQäYðR²쨨kñUºî@èpv¶Yb6‡êVý"Ö²,¨ªÊ³'ÝèújÄ”jZ‡CÛ0Ñu»Ì .,/ƒ|@§ÓÉN¾MÓv^DŒmöë»Ýð¾ªÆX‹¥é†9¿è¼-kõ; ùˆSóš!e‘ñm¢ëºS:Bu§ô{–s÷˜t¹I²Ýétœì¦HÒb¯Ï›$ãAèºî;²¯Ûíboouÿ$±šÑ~»éÒññ1f³NOO}Ö1›Í0 pttÃ0ðÊ+¯8¹?£Z­:ÏO&“¥ç·yĹ£×5–eÿx¿2žyÆþ÷ââF«e‘oEQ²­ÇŽÓPHU7éýýES/U]ŽÎZ–mÔ¬ýþ¢6ºßßüÆ·‹o¦ÍЖåEC%7–U˜Œ„²Èø6Ñ4Í/Ë2Úí6F£TUÍÖ1ĤʮË65ü¢®§¢(¢['ÛQéÊÌÈ‚]—ñ¨PtÕ‹(Џ¸¸H/Óì&!Ëé6eõÛ`5 #ôgÔ¬éàà€µ­×ëNÄv:b<ãþýûÎó+³è²Ä݈†¹FÓ‚½&Ã!ðä ×Ï¡<ò­iZvò7m·Ý¶ÓÄ{=ûßVË6½ÑTAXÃëF–íQ2TuÝßø‹ m¾ªºxŒ"Áqi6£yƒ6ü5R*ƒŒo]×W¢T¢(b4áâ₟iaY™*D»,Û¦i:ãøF£F£wN÷ý! 2Þ+vYÆ£BeA™œŸŒSÚcw ìt]×1Øé²,;@õz}Ű½ººÂ»ßýnçÿ–š:Õjµ­æÑ¾Ø:ã°»/ÔÍ— ¥ òMÍ_2“ï°èª_º«I²RUµS˽¯ »‘HÒr r³i¿‘åd¨É@Þß_|n«e?æ&jwß(Ñí…>£”AƳ„œ³îú&]×WRË6Vct-Y–½¯³(£ôê]–mªU-¬^SFhâAe*º¾0V3tÊ첌G%(ºÊ›Øë`0Àéé)Ìl6Ãññ1&“ ýéOc2™àää$ñ¢ë: ÃT> ®o?Ú²®þ®DœŸŸã§ú§ñö·¿=ÓÏIS¾àõ×_Çááadç4÷1Ótš êÅ4mr]]«$­ÖJ'A#`ÜFf‡­4ßôìlõÚ£(¬iÚ†ú¦Ê‹$-GTDWñøñãͿßS´= ÃpJF4Mã@7ëœRA†}ù|"ÇÓÅÐïÃhµðÂûÞ‡g¨%Џ'EQ”À¦_Ìø•I%E×1e½^¿üeL&“L'rÜ”ý›š‹v¦×mgˆ(®ÕYH»ÇJ žN§8==E¿ßǃptt„££#÷¡}ýèG7¾PJ3˜}Û^Ò26º àààŸøÄ'2OûHS¾àƒü NNN6Rvúý~öΘ Y¡qF4¾%íT*?ºÝÕÌA°£5­Ör½ªú§û’íg¬öñG#ûÇ4í.ÙŠ’¼‰¾#Â0b+E'''¸}ûvæ_o÷ð,1M¦i¢×ë¡s ÷K¾ÑìïÇ¿¶5m‘½à—epqa— ÐíB2MüßþìŸÅ·û·gvEÜ¿“@% ¥ÐkÊFšõ¦”]ÐnCÑ4ܾ};óñ‘7eÿ¦£Ñ———|-d‰(®½.H»Çа’'Æïæ\¯×Q«Õpuu…jµéxõz€#ßívqttzF=nR4M+Ç›´sÄUÕ²¢ŸwÉ(¢|K’”oŠb³i+§i¥ÐFýL7’d—ªjGnÚm[öûýàTÃ(׆(ÚJu·»PÀ-+~ÔµÝ^þnL³°×fÑdÏÆ»½éM’Ièf[TÏŠ®¢(¢×ëå3þÀ²lCq³NØIÑõÅ*¯±I4›¶á9ÚÎÏNgU)—ådÝ忚ÖÈ)»&ÛªªB’$n´‡8rm*‹¦¹µ=~×d|ªªî<+A$(]ŠKì¦KÔ]ë¹çž[z¼Z­FÊ]¯×ë888À½{÷P¯×quu…§OŸ.½÷è臇‡ÇN‘÷óÏ?ŸéQˆèST¨yMmÆÅV,¼ŠDÆh” –oØ2¦„rýHt ¨hUƳIJ,hš† OÊxn÷ŒNÇN5w×{nK)SUûg8Œv ü~ß~_³˜vM¶¹v5!qö]AØ<:ê5’3ìï°k2†eYÅo¢šüä åÈÞ†Ó%¶ÁZ«ÕðòË/c<ãÑ£GlͺȪ›àþýûNM¬÷½µZ /½ô’ãŠsì¤hšVž÷4•XðS©Ñ E[ÙXMLå[UÕe;2ž“ȃ"Êx–ôû}4›Íì#UýþBù½n0´‚®Û¯!c9Ìq™´co†a;¤./ã·À†ª›²É6eÏx£H¦i²¬bdÕ?ç*éDQ š½M˧ cùzÒ4àÛ¾-³Ó.›Œ'e§;¸o!¢‹-¬%ÑÖétŠJ¥‚££#Àx<Žã^­VC_OÃŒ·AérÜÛm{CKð$¯ˆÛ`ÍclÎŽQ$ùÖu}ó´°°Ùªn¼7^fg)’Œg‰®ëÐu={‡7RJµÞÞÈ©wL“вœ.FuE~Ê›×3nYþi‘4vÆo>òŽQ&ÙÖ4 ªªB×ug¾¶eYP¥Š§¢(Ù§”Q Üp¸0,{½Õ}¸ß·[§ôzvm)5þ’$ÛøÝÛ³ ^wÍÛ¸µ,»Üį6Ê0Ö7Cb¶Ž®ëFF­V ûûû$鿬š–ÌhÅUY§‘kq2…6½çy¯iAÀ»¾úÕ쾯¦Ñh ñ>;;+O *.d$nc<à:¼éÉ–•I}m¬ël6Ã`0Àýû÷è*`Ïü;==Åññ±Ó˜©Lhš†ËË˼—±}ÜZ÷ì¤-6`²EÓ´tj8X˜FÓ4 ‡C4¥¡ò4c5Ó´JUµoîQ"IÒj¯ÇÀ8½ (“†h·m£õâÂ6f›MÿϹ)ÆO‰°, –e9 Àôk%±ðxj––c\’’é-~õ§¤ YVtçŒ l–ñæ}Ÿ$áCŸþt:ßÍ ‚ä¿4å}›"ŠÉ³ ÒD×—¯=ï,ù”ˆaL&˜ÍfKÆ*qÿþ}T«U''¾ X–…V«UÎb~£hââ ã7›¶ðýfÇD‚ÒÂnÌæÍ0   Y–qyy A ë:4MƒišÙ:„í¥Ù’ñêþ,Y¶ŒýýàºY¦C… 9.<†‘~¯ƒ¤Vï:¨‹}œ 6QŒ÷zjœ¤Ã%Õ³d”f4eƒdÓtô4ð¦ÿJR&½LÕ°ñ4ƒ¹hYaš¦c¬–RÀÓð`˜æ²‡³×ãèêÑjµÐív“§Ä¸›¿D….1%ÃÛA{ë÷ƒ"8L»];êUÆ{á F×õâ8ÜÅŽôD¹ß¤9C˜æ¦Æ5ÔMÓÿÞÖnÛÇ[gPTI„¨kè÷m®ß·ßçmü$IøÐ¿˜Î÷sC0Mº®ïfG`?šÍ…ìåw„L éDsX1›ÍœÇg³™3‡µV«åðmŧÑh ×ë•ÏX[[†i®n°¢ÈÖ@UUˆ¢˜¬†É4måµÕZžñÅA'Šar†Æ”"*•%¢ÈÆj )Œìš¦m|ÅURÓ0Z½éˆQQÕà®öQ"¬n£–¢²Q¿+2RG#ÿ1o‚€÷ºtlf=4róÆ@º]c•²@m'VŠó²c7]ê÷ûÇøÈG>‚{÷îáÞ½{øÈG>]×qrr‚J¥’÷×´ªù(ÄFª­ ÏÞ&p$ugñs ?§¦,ÔÚŸ6Å8Fh<~ T’1LÐtƒB iñ÷ýf3½(L³ß‘oá†éºãy{€DÕ'#Î"þR té¢`Yt]¿9MÆÜd”~›Êº€UYß  ñV]×Me¹Æ*`oô¥ŸM&Š›ZߔԉˆišÑ24®¢Ý¶¯lbpòz¦D¨ªšOw¿¿¨e˜xëWs…Ê‹È\Gš£Fh¼G’(ôó#ʵ÷^iY¶qaDÖ Ï>‹“ô¾¥¦ßïßLc•(b„5LlµðÁïüN<úŽïˆuȵÖétê¦ndYÆÑÑŽŽŽpppPc°½1¥¥h™BbÆñ\kšm¬v»en Ƶ—7—(UÒ4F†¹¦Pëp¸èB½mhfpÜϦ(Ûq¯(‹¨/uþ]÷þ8¨êjÓ3f-zH9’iš7«Ù’—¼#¬ý~¼Ï—$Çaó{ñc}T¤ëéé)¦Ó)êõ:dY.Mj¥°²¢ @À üÅRÂÛíõ7PŽœ2;‚išè÷ûù7è`¥•Iˆiš0M³8)Á€-Ïy­GìÁqðÓÝu΀$:$«±±, FOž<àóÝݸÚUÀv|PgÞ<åɲ"§¸/!xýÏý9üÊ£G±Þ¶6ÂZ­VÑï÷ñðáCÔj5œŸŸãÞ½{8>>õz˲|¿¤áÁ,b¾{XTèü½-t]'ßEé4Ç0£ª*$IÂååe~ª"Œ"`JK«ÕB¯hNDIÊg$é3q HïH?ï±û\‚Žé§CybZ–=*Ê ßkc£]7~4|¾sÓ4aÆÍK¦^6€-£yõa Œ-«†Õ=ãK×uŒÇcœžž¢V«9Ñ×2`šfy#¬nƒUQla‰s.ªjÿD¨¡(5d¬¶a«&€9£³&7Ïû&ÃÓ&CEa¸¸¸ÈßaY¤ÈS*:dY.o³0ËZ4ޤN»›Iî7ôùIå× ŸÆäÐãN1FV•j¦äWæÔétŠç¸ÙÔX–¹MÐ4;½7Žm°e["v—`B–eÄÁÁ&“ îÝ»ç[ïZ4LÓÌ_aIŠw“q%cu“†&lc°èè°Õ67ÊXÕu=?ƒ5Åæ “N–ea4å¿÷Ewf ªªÂ4Íâ(éIÆìÑìQ`1¿t’(ëî5'Éã÷ZI²¿E±›<*jC,Ë‚išh6›++E\Ë(K´u< ~õûÑ^¯ªvÚüïe±"¬ãñõz}éw]×Q©Tp÷î]mm᛻² I=)nc5éù°£–g°À"ãvl}­ºÁjY‹æaÜ”ts¦4PÚØEQ²J‚¢; ³†~¿_9’7¤÷¤Ñ0&‰ñî®%%£™j£àg$шœ¨5°ÌZTUE³Ù„(Š+)Á…j:¶m²*)éõì4ö(}M4më½M"EX'“ ž{î9:麎ÃÃCŒÇcŒF#âüü|«‹OB©£«À²€¬óN6‹Á½›«€€!€"g|6`GW½k¿A%c±›Š=óÌúטf´è)§3CUÕbÕ8qý*“Ú× ¥¿x˜ªºþ=Þ¬0QLçš0 [牂»–T–kÝ(®ê‚`+ú²ÌÆjJМlQW¦{”º´oSÖ9‰úýdýnÁÎ ètÂ_×鄄ʈHk·ÛE­VÃÇǨvõáÇxùå—qtt„Á`°ÕÅ'a§„|Ýy¸ ÔMçk6taG. t¿\€mLË>G¸î ±›ŠE¹våšan¤ôŠ¢­‡)<º®Ow¡¨„¦­W”½Fn³ÍÐ]GØw³nô°ñ× ˜íC)ÀAÙb†aÜÌ«ßH4E±eUQìç-+Zª½a¬^í¶ýþFÃŽ¶zWº&rè°¿Ö`ǘN§xðà3Îf6›a2™,Í_=88Àl6Ãx<ÞúIÄ!Ö|Ê] ÛÝâÕVR xÂï‚Õõô=‚ngŠˆbE,)[¡@%jyÛ!¥‘±n4Mn&Ã䊪ªP\Æ¥®ë›G¥:Ü<Ê 4X×ýÀuŽK¿{D»,ʪªËkð‹ˆjÚ²RNéŽQÖMµïŽÝëAU·Þhæ&A#l.//qvv¶d”Š¢èËn´ÁÚí®Ž¢:p÷¨ 8=ß²ì4ù’u±^k°Öj5L&g\Íl6ƒ®ëKÝ‚8©À·nÝÊûœB)L„Õ@üñ0^ƒÕ>¡Õפu~ V jÅ1û°¿Ã(ºdÄ^ e'Ó”™(‘ª¢8ƒ˜‰¦i0MFªªnžLNòXs“…4XUãS’¯‘ š"œIp_ߢ¸ú^ƒÕoV¸,û¯[mu4ž<±ÿ½©†Òk鎰š¦y3ëWƒ 9uËfP”ղ쌡f³t²É`­×ë8<<Ä`0Àáá!f³™#,dÀ*Š‚z½ŽjµùÃ݆pÐóÓé4Õ.L—` všjgßü¾ËËåßu=†”úë'ÏE(#Pa‡|ä!ß–eÅÛÈÓTÀ%i³ðLéÈCÆÃ°, gggh6›èt:É”j^¡(vd…"*T#´)i—l0™P$Ù.dƒ?ã3Šn¡Líñ»yg¬ ÅœšÒD=Ÿ§H2„§½Ë²ìô ‰= á&à½~¼u¬šfg4öuZÂLHsX{½ƒF£*•ÊR¦_|§§§¨×둇YO&t»]çB¨V«è÷ûÎ1§Ó)ºÝ®s!ɲŒ~Ôa¶k(„±J$ÑíÃ.RÚpÓ8Gö¼U?ždûÕ„B]Gˆ×±˜ÇÉD€IDATºgLžò¨†5­_8ÂzCÈSƃp§‰µÛmH’”L©Q”E 0 Sì£t^ _¤­8°c§°Q¶ Y¿Úlú;ÇÜ iׯy¿oT‰êúܯ÷Kñ¿a£hŠ(ã–e­•qÊ +Lài›Ä-¹’e;í×ýþá°tQU7‘ÆÚ‘úòË/ãáÇ888pž;88ÀË/¿Œ“““È5­‡‡‡¨×ë0 ¯¼ò êõ:º.ïÀññ1ªÕªóüd2IedN¡„œlû4{yèz:›.Ýo‚ä:oÇ– ;²çO)!~ vBò’oÀÞôc)é²](Z‡J&7ò”ñ ¼i“‰=ð Ö}Œ4:†ZV!›Y0 Š&Ûº®/º„,o/Ré÷9Þ&2ÞŒ3ú‰•ö4(šŒÑ¢¦’$7=>K:ø™9‚°,çÝnéå>’Á Ø5ªƒÁƒÁ`itMµZ•<1›ÍpttÀ6†ïß¿étꤌÇcÜ¿ßyþàà£Ñhã“-Lýª;«,Žþ¦,õû‹–Ö›`¢øõžM$3š·pÍS¾c›d9ú&ÆQ!ùËx©íïANMIÚ\çØBSDÙ.‚žÆwÑj­wùÕ¢z Vož ”>Ê´)E”qÀ6X×”$I‚¦iÅË6Èj"˜$…÷ì,þ{ L¤”àÁ`€ÓÓSç÷ÓÓSÜ¿ßø8Ôjµ•hìÕÕû ÿSjý?<úBÌ`µ`GUÉéG×Ý´aó(˜qM[J¯u°`Ò=Ø3aã"^¯9ã ažò%¥†a6%OC×u ³î曆ÁZ&ãã†Q4Ùî÷ûÐ4-sc¡pt»Ë£¤Ls‘õGÇñs²ÞpÇkÑdœˆÚCÓ4œí˜!ˆªÚYy¯¤Dаžžžâàà†aÀ0 àôôteÜM*•ÊR‡áétŠÁ`€ƒƒT«ÕЋ"Éç¹)D„ÕÛÕ6­åt»›^ t÷u÷ƒ-¦×:˜°k~“fFmÉŽËS¾soõÖЂÙò”ñ ¶æ¬¡ñ›P”²f…¢È¶aØß߇iš¸¸¸È^¶5-¾3&I²F#ZZ½$Ù‘Zú¹¼´UM³Ó#í/‰¯¥EƽDI –e9~ÙSYQUûç¦9«BXk°Rú/¥p"«aÝÅÖ1›Í0 ðÜsÏ¡^¯ãÁƒÎãA<}ú4ð¹Ï~ö³øô§?ÃÃÃÀ×b«ˆe£+,-Ý­ÍŸ(ª(Ñk#È ”R·‹“'''‰‡(䵇‘ʘM‹–²»i„•¯‘9::ÂÝ»w3ÙÃóڿ݆fÖMî,ËŽV ‚rG13nM3¸c|eY;j¤º¡ý;LÞ6¡hûwTÝ\Åò¥«ª}5›‹”ö°kÍ4íî³³4Vä:xä¦KiÒívÏk(5ŸšÆ.Á¡‘?#¤ÞÕWQ¼sÑäóèè6¾ïûþ]ê_i^ò Y©¸Jz˜g’Až2NX–…ýý}H’I’ÐÞ´|Bד“aõz†Á# JFd{+Ñ&?EYUík êutïê¤ôcR¥2îÅ4Íâ§ú®ëÑA}gè5Ý.g¤ÌZƒµZ­&*¢Þoëí4È-ÂjÁŽf94io×õ%GUmC«ÝÎHîÏ®³o÷í … úý…ÎÕïÛz˜,²¹|’Î(Šöëb¬€u;jú)Ë×÷g€ˆð'ÿäð+¿ò{2øR£‘…|g^ó$Ëö6xÙ~>S*²q‚šÒôÒòPÓFõµ¤”4öÿýR};ö ï(YÊvæÙ`A2IFf”}Ü0Âk¹½)Á”iõœd&5²”q/™§ÀoŠ®/ºö¤`ºË>HY7Íì²vJ‚iÚ?”`”Fà-rJ01—:;ݽ{wkB½ ¹æÈS:°MØ-uìs]dôÅÑ¥"aÂŽ`Š€¡-jÁý²x€ec¶Û]¨^•:Í’3ØÖÌoÔY:ú®ÛQ^]·÷绣óð=ßóï~÷¯¤ø¥æK¢ ?íQIl°¦möA×=õjèõnæ=1u'hóóCÓì?Œ¢Ø¢ŸÜ{2e&*©t¼&ÈpŒ²IÈr¼9ÚAú”$­:6wxÍ&­s“ËMÓ,~:°_ªc«e_dÝÔ? –eûré–J·ÌM·µÈ«®ëxá…V† Ÿžž¢Z­¢ßïgÖ‚;-r»(Âô úZ°½×kTÕEd0‰BjÂahÁI»%²,[Ç’¤Å}((ÛM’Iº!ýCÈh]+ &ÿ"`ulÁ^§7R£5'r à™g#%É>†¯3·à޼¤$Ù”¦‘Yâh’®Ûò_”±g¤ÈÝ)»NÓ¶×L¼H¤>¢,¬ë©dú}ùŠbÿ¸Á“T"¬¤5v»Ñœ1QºÀa{}h¿o¬,£-úìjwv©e¥3í§¬¨ªZìt`J!ô®1§ý=’^3ÞIY”`±ƒu<£Ûí¢^¯ãŸøÄ’p麎óósÜ»w'''…¶ne®Yaû·uý# K`Mj¬6¶À,mÞlcõ €¸l ‚mà‘W„Òz“*ð’´F@¯ Uˆ°,ÁßÀÔ´åhm¯g¯Í[öäIg²Û`ßA¬muü-‚5—24AªÝŽ6†3k(›ˆ²¼_9)779`‘zÚdgK³¹o{¡Ô°¢x>˜Òaº›tï¥yºÝXߨ¯ ?‰-~_‹,}Ê<ÉËW¤ªöýD’aýÖÛjÙ™k7½ïŽeYÐ4 Eªó§k€Ò5mÑÑ7gÅ^ÎpXÜ€®ª®–16›áÕdQ‰4Öæøø²,ãäädÅâ~üøø8ïï*”ÌküüPh$Y |Pþ¸ý;Ͷ¬ecÓoTSš¶0ì–hÀî|ý5׃e{£—eûZõ=ÆÒ ?Õl†d©×k¹Þö÷—ϯ߷ÏyyÂ,‡A½O/ˆþ–DMÅz½ø»^ŒÛ²¢oRdœmŠd ‚½Éæm¬öz,˾†4ŸQL4}E‹MȃԛtÄ9Í®ô£Ý¾¹yÚL*$Ѥi¶Ç°7ËËüÒ/¶h1RYR¯gÿkYùÜK܉ggþ~2V½·QËŠ—½+ôû}4›ÍüGNº1 [§?ˆ+ó1ÈØ2ÞíWVTÕ_gÅtÊ×׬ãñÓéŸøÄ'B_wÿþ}L§S§«XÑê j«cREÃ"í×eëànÇ~Œæ {‘?R[áéÀ®£m/>߯l‹2z½5ŽY¡ÑÊ FЭÇþ^(uƽ_‘2>ÙBNv•,o¨ ‹5àï~¿÷ï+E«¡ba¨ª=C»ß¦d†ýÚVËc zOPà€¼nÈi䆊ßY¶¯½v{y„§û|ܵ!71¬ðM:&!‰”wEY¤DmROG#i‚ˆªˆlÉóç·Ërô{C%x¯%£ÂkÛ¸÷o·±êýz¨$릡iZòl7Þy×qÞç•÷fÓ¾ŽÈ:ÌÙ‹Ýj­êÇEÈ ŠÙ£×¬“ÉÕjÕj5ôuT¿:›Í¶û E$(ººnÞŸLJÙm·Þt±¿(ÏRã 5Pq3E0>;Ú[4Lê÷7L)ˆpO]é}ºýo'E¼ñ‚±¼ºþxäjµlçM» ûÃÿòiè!¨Y(]8;„ï0\D‡È®Ø8[(b”ioh|@^†?i¯ëâ§Î6k®ëEÞøŽ7¶ø¡Û!Ë”wð7¦@}…ukR‚Ý\^g¿v:²çϰõ@Ij³ÀH²Äx5åjKZüÍÕ· Î=Àýwõ‹FRWG¯'ÜïþE,¼Pê¬w-´V·1®* ªê•®7ÃX])Dä¢÷Åõ ¶Z‹‘”ÕçþîÚíå5ŠâòÍ!è& ªÀlöÞx‹) –e¥aít ïegn¹§º·Û°º=ìï_×õ×~úýRªMÄȆ»œ)(V§ª« }§ã Î ‡«¶Œ·y ÕË}5t/ô;–—›d°Òè¦ØŽGšÿ;… ¯»_?¯ W˜ï\ÓÁ,ÒÓƒˆÒG-JI–7C‹úgøáÖ×!~†¦ l^ÚÉ`­Õjèv»é¾³Ù ƒÁõz½£mÂ:¨Ò4¶¾BI=hÚõµ m¤k‡:Â-éZº|{´éBQ<*NäÇÀRc£ÔiÉVâj8%ü,œúY&}yåcXWŽ'[Ó ûg}7y£ò§1åòÚV/ËòW6ünê~i»„»\ËZ+ù8FÜ÷F]÷ïTO¸•‡f3ø!Ë‹kn¥nÜs¾Þ›{Îj¯çß¡Xe î>Þ5øE÷öþ5v‘ÔgjÇiÃ0¹™XÆÝ.г:ßýEÿåI*Æ*e¾´Zþ¥ƒn:…I½Ï/U׋ ,ï‰Ô#ÁÏèvÇ£Wí¶½¿Ó:hzY”í…¶´4º©ijW›Íhá¼°.V×Þ ³;t:Lç 5§¤t’•0™‰2â;ŠÁêu¦˜¦¿ü…•_%e:}Þzë±Þ©Kðƒ0NqïÞ=œŸŸ;ÑÖétꌴ™N§8::J÷ŒR"l7)­ŽMjÝë%àMý> ¼€ ¹š>¸½ÿ½_³ö$iQá…”áá¼_“ $ô¬fN"¯<¥D`ooùwùΓY³þýWÑù·}ëxüš`„e°y7L÷<¯0D1ÜCM5´¦uLº™…}½Þ›Æp¸P޼¯½ÇñFK©Ó¥4[8è9÷yÓç¾ão­ÿÒJJªkÞQ-†¹&R kX$hê¶ÎÓüɃÔ|9­Öb$  ºô¨ô‚eûwJ•\·&w¦'u. ŠÆÒ¾JÆôº³î(kœ©AQèõnF…B˜Nž 4WqõƒËåe°’ì“®·ÝO×Ñ´Ez/eRÒã^¹òóyQb¯c]ÓÒñéšæâ~ðàÛðå/¿'Öû#…öjµNNN0 |G×Ôëu2ºJÝÚí‘”*Ǧ×ǦtúîW\ob¡dÊIŒà¡£è ÕºQªI»m ˜­±`×Ífm(Z°ÒžÏãìñ9ÌVHT÷ÔéDÊÏðÖD€øMÿ€KôMý—nCîT|å]–—=Ø$¿Ahš-×tü¨Îýu)W”ÆB]´Ã ¨p¿oÞ)9ÀööÓO¼†)áOë #è;k·íÏ£ãÑ߬ ýï6Æ0 ´Óª;²¬›;ˆ)–e­ßË)“ýþõöë·"¢]ªj+ÀNoTêÞKI÷r?¨.•^O{äºý›îTŸ·.£yxÝã#Šs”’ëüJZ6%Jäl0MÍ´¿<7n¥ƒ¸î|¥þé CçzþJ÷,Þ ý`š¶|­˜¦-Ã~[eP  ÈOãýúýÅšÈÉ”¤5›À¿øËxôè ±Þ- £•F×L&']x]á¼ Spür aûK‘V{}ç>Ò€owà%(wðâÂ6X ¬52ÛmûÇ4—•Ý¥”Å}ØÆb†{€¥1 ì1>—ñÇ$ÇŠë’U”k­d½&B]§—0 Ȳ ]¿–»~Ú×þ5\lºä×!‡Ëºf²¼èó‘bé€x7†‹‹ÕÆQ?#èst}1–&¢Ï nï,íi¯¼’Þ÷V$R­a5 Nf ÃÚlòŠgädQÕk£Ò¥/‘Q鷻裑­„{U-M³÷$¯*åµW(úiŠcãPÅÙÙú{HœqIê÷˜e2°úµÒ5M ×ƒ¡³oˆ´"ˆTÂD2ç'{¦¹yÿ2÷gPê Þ4ôZÒ?€`y'8Uʤ=±ƒœV_übü÷FJ vS­V!Ë2ŽŽŽ Ë²c¬Îf3§ñR Rp(,¾”@Æê:‰rµvî#ìy§ëþÀäÊÛPas—bå”hÖjÖÆ*A½ÛPÖêLê†]§üŽaC*YR$ °¬EÝäõ8›Ë/†7[£×û5?òB)WmêLIâù ƒRûi¿H ÷÷´ò7c‚átàÂAÊ×3Ï\w˜o¬¯YÜÖîãqS>bâ§ÄjZxÚ¤,/jïÈÙè†~Pàu w$i¹&)‰ŽA5qAxºX™‚]Wã-[Š:ÚR×ÃKލþ™œ;AÍ"ii›ÞÿÉÐu»ÔÉÛ Ñï3©¡S”Œж‰Øk“ɇ‡‡yŸ/ëÆÚ×…öîjþ5ƪiý_ù˜¿ú6Àïìͧw·i®i(tCOYÁE‚Ý\ªuý¹½΃IDä@§y‚oêìõûhŒùóaý¥¿¾öXôú(Êyõ2ÔËrƒÒÈÒ6XûïE…]e£9•þäpIÎPdº‰ ‚}í?yrÝaþl}-ù. ëz¸lg]ü÷çk¥/^¥Ö=¦ŒSŽMÓÞ³h.}ÚŽ¶¼žÌ*‰›æy[è¯ÃÕíQÓ‚Ç 4v7Ž«¡ªd:и— c,¥E¿o¯‰>7-ÖõøÈ‹Ô Ö"tqPpýÇn431| ÅÀuÄøÔÿ÷7°7jQ„Ý”|>¨ÕZv»wqŸq/±PawÞöˆ4ê†|;²ËäBä¹”A³dB^¶ öz@ç¥ïƒqëû×+nmNZc‹Ý ÓöüS£¨]Ž($žS|ÀÝ´’ Š‹ „ûOL)˜iÍ,2¡Š|†ÑUÃVÆýº¤ÓŽÕõ/ö¡(Qq2†›ÍâEs˜lˆ•Fx>DÁµi(Êf—Îp¸•çe%‡‹wßò}QÇSÆ9Å‹‹Ý¾ç»Ùyƒ5L™w÷ÛèõÚ¦“6`ÚThÿWïBïWÿs×ÊxçDÓ Û6A½þÉ#ªÒ„m¬²¾—;‘”ø°ÁuLÓ–uß{ÄõŽK5 ŠOVÞË´š#•Dó†Ãå›s·/ Q•·(Q²yÚBíjØ¡½—®‡ïaëœn..v3›†ñ'Rc1/ºŸ¾ötE¹tüœ/î©5rÓé,w¥öB×9pÒ¼÷ß´[×άa :¼7KÁ¯£p €èîöKEeAž|v´2 €ò1oØÅRT"+ñ2HuDnD1@‰ |]ø{&K,ËŠ6§2*»n ýýÝOõC ÁšaŽM»¼‘!ê~D¨®ÄÜh5\r57ƒÕí-‰…½ÎG¢!n$iÑôÔýØ:ç ÍöÝ´ÑÒMgç Ö0eÞÝRÚŸ{Ñuÿpÿ "üëGI²ÓN;#ãXG8o8›¦H*Š­8’Bb!éwQ Q&R¯ae¶‚¦-ÆU¶Ñ·±ZÚétE#T‘jÓ›Q2©0lv*ÃD!ö~ÝØ1‰ÁÚïGKLð›ïLôzË×À:Ÿ'Ùl¬nÎÚ±6“ɃÁ`ífúæ‘-Ò7~#»Ó±w#V{“îõ"\+AD”¡2âÕ€Rp^‘U¦0„ŽAÐõµ&Ý z½ÅHMë=&+¥“1©´s·e-ÏëeÌh -¥`Κ-MÝx5-Ú˜Q´ßgYœUÃ$Ç4Íxõ«îöÑ11ŒÅäÈu¸—ä;Á]+Ž«wM7e¶î6H-ÂZ©TP¯×ó>Ÿè˜&Úz Ò}[ ¼ÞÛn׾Ɇ ›eÿÅ,x,M3&d°±ÊiBC“Ü×@é2Ô¡Ñ0ìÿóË‘µs*£b»®Ë€°N›^hˆ½eÙÑåå²b7ÚŠdûl×÷¥Ôf ¯:¶öß(Š-@VŽ1›»i5މ¡CÓ-B’ì}'êCêR«µþÖ iË=p˜í°6ÂZ«Õprr’ÙÆãq ¡;™LP©TœY¯I0 ]¿pym\ÒLÙñ<Éÿ+À§?wüŸÅhR°~$ÛX勤4d-ß¾=‚F(Š‹”Žª2IØ–ŒÇöÎ/Þhô”åÅÜV‹ íbrq±(¶­X–­±ÎŠû§ÌC9Ü–l6‡L¹Ù’$-Dž¢¥Q-Iö˜!f÷ئŽ{¤_‹êh GÜÌ `ÑLÉ­¥´,&%ÖFXÇãñŠGd6›a<oüáÓéÔwvët:Ž{÷pïÞ=<÷Üsþg VnŠÈ2ê²ÄQAµªÆðüþÄ7ïøïƒŸê’\×7Ç@òÆLL.lC¾}k´;ÿ¢ ¸ç ³ ÛÚà FÚôûöõÐíÚZI£ÁÆja5ì4C8 rø†õLJ„¤‘ÔØ¦lû6‡´¬õí{#BóÝĘpÆì(Û”q ÁS@M3y€(rAÑI”üâ‹/ú yT¦Ó)t]¼ŽQ­Va^yå•Èu´~¬(ôt7öùlQ´>lÐõtSÿŸOü]D; -Ø£k8 ¸ðlS¾Wj´¯1ërçÒ`ÍÜ,¶)ãD¢ó–µœç>Ú¡"6VQÕÕ”¸½=ûßv;Ü`Õõõ#O6ê˲¤0²MÑÕ ÑuÛOÃ4 ‘‡ŒÌÑöÐl&¿dz½åYÐLñÈ¥Kðx<Æùù¹ïsÓéãñ÷ïß`ׯ`Db²tqP÷uôÉ{HI² ÕÈ:Q@öèNÝj­-‹BÐMØm¬²ŽU ¶)ßKP¥š 5Ð`˜$ä!ãa#Ê„ÝNwŠb• à™gÖ¿O–—4o°ou²|‹£äެ ZÊ,Ékÿö5X7L¡ùñn˜Bn:Ê:(&Ô½zS<éÿLqÉÅ`=88ÀÉÉ ŽŽŽVž»ºº`×εZ Óétó¦iÖžµ£kÜ)ÖòNgqƒIâAòz…,Ø)Àl¬–ŽmÊ÷RíuPŠ˜ Ìw&)yìá¦i¦;ƒu‡étlU×m#s8´3‡¨1Ìèk6mßÍ„åÈÃp¸0UuÑäd“Û_T$ÉþÛ—ðúëïKý3òmÃ0– V]·Ovƒt`ÓäRmÆŸ÷¹¥r]×—ëW Ãé­¯ëö¯‰•vy’©Íjš]!ÈÉtNN™étŠÇãË_þr.ŸD˜|À›o¾‰ñxŒ[·n-5AXº DTè¹»Ýn3ñÆoäòÙiîánRë¼!–UÜ‘¦¹0PÃPUÛˆñ*yÔ5–ÎÍ0‚ËŠ70 $ɾ…õ«ÌòA:øÓ§Oc½/—kaó\ÃNî­·ÞÂç?ÿyò.Yìß@ÀˆI˲…6fŸÓd=¦Ì$ÕÁ׬·nÝrНý>0mnݺÀÓÅáþVêB<$ÚÔ³ê¬*ÁŽª²±ºÓ¤)ßbuÜSöJ2Ù“¶Œ>åA±š‘ÆNÑÕ<ºIR÷^·n×ë-¢¬I:óºGèúJ7ëñ…lû:c„F)Í›a6! R‚iV„MÆ0ìý2;˜›ÇZƒµZ­zL² Z­¢^¯ãüüÜù\]×Ñ 1I4h>Œ6²3*Y1ØyÒ”o+FN»ªÚ é m˜Êl‘´÷pÀg|“4$C­]Ó¢ÕwfAаúMnqÍæbò_¿6vn-“…l/5Îb«$‡žÊ ¸ÁÒͤpM—;âððãñØ)ò~þùçcgI¡÷´ˆ§Ñ|Ñvýom0å"-ùv2LÓ–ï·£ªÚ?ÛèJÏ0@z2NDJ VÕÌCŸ†±°'Z-àòrócÒ\Qj…ÐïÛH³iŸÝ«²0Žé˜l˜F'mÙŽäŒ €äƒU&MÒ–qÃ0V÷oÓ\Ûwƒ&õ‘|³s³‰l°N&œžžâþýû¨ÕjÊCœšºz½îûúZ­†—^zÉ©ÁJš~¼¤äÐH‘=Ó€ìFKÜÄ€‰HÖò \7ëÐõÐ×PslNícÒf2øD¡¼P±eŠ54aÁý±nå‰F»HÒb wÄ©RcÁÉP¥¯Q–d»½A¿f#¶)Ûí!#˲ÕV♤lKÆu]_°JÒZ/œ¢°3†YÉ`ÕuÝnÕju©¥õÑÑ*• t]Çd2ÁK/½”ÚÂ*•J*u²‚ ,zý»ˆµÉ·`§³±Ê¤DòíŒùðI %—SŘ¼Hk_]Mظ# ðÉÑh1ÂÆ›L†*ÍívmB¢7év×G79úY<Ò’m ]2äƒ/âx%¦üä-㺾¨Ye âX›ããcȲŒ—_~y©ðºV«9ˆïÞ½‹_|1ïóYÂñmRl¤Á®-åœy¦`8Šñ‰Oàôôt냼Ãp”—ÁjY¶Â¹_‚ôGØ0LJ|ó—¾äëŒáÑÌ®°6¸ÝN­m¤¢,ŒUº~z=û~ašË&•»SA°_ßjÙÇêtìÑ3Q÷ÊìKS#æ~Ó,mΜaJ‰¦…>mY‹T`vè1nÖ¬4 ÊÛÒº^¯/¥Óó~C±sÇ•¬i1êŒú°Ó€9å†) ¦iâë>ÿyßœ0Ïì –eÅß´ú~[ sRš&°¿oÿÿâbÙÙCF(E\Ý´ÛvDÕ ÍE•eûÿîòv"1n–²(÷| š–éä&†I•¥ñ’в(Ö@V÷`†"Ô°V*<}útåñ“““¼×ÊÒÜ>W„5Vo]e ‹eYøO~ó7—vvM³õnûÎì ¾ç#bYv„S’ìÝÍÇ¼× ™qZG˜(,e¬i÷Kj Ï dÊÄ’ÁÒk Ó±å›u&ˆµÖZ­†Ùl†ñxú:zÞuÍç"¨êÚL„U†àÙ¨L±±¬¥tj–Ê0»ÂÊÀyÓ´}íûì¨i»mÛí¶m´ÒÜQÛ Ø-¾n˜m5{€R̦l˜¦iׯR¦£Oz#udgc• #’ÁZ«Õ0 0›Í|_3›Í0 P¯×Q«Õò>'®¬ºõ]ÿÕ’r²ãú‡a ŒI»üµ`“Œs×HfWðí.I®ø5ˆ¢Q%%¨Ý^DD%É~ޝ&Oœ«®‡ ãpÈãk˜râDXCšŸŠ"g0ë‰Ô%øÁƒ˜N§¸wïNOO1ŸÁ`€ïÿþïÇt:ÅÑÑQÞçã@‰jÊPߺoeöìU†)0¢g—7M®]ev‹•óŠbk7!‚n‹2)¯ b3a†II’|Gï¶“_’aʈiškÄd†qk­VÃÇqzzŠÁ`°òüÁÁîß¿¿Ò˜)O~ã7žÁpø§°·‡xÆ*tpzS2x̳k˜î¼æ­šæ¢Ë/ÃG¾½-¨¯á¬¦ì82>ó⹄‰‰C$ƒ°»?xðGGG˜L&ÎãµZ­Pu«€½÷ÿäOþþüŸŸâ¯þÕg‣TLÁYj*†ETöÄ3»„SÿDš·JiÀ¬à3Eg%ÝÝ']R×y| S^t]_ȸ'‹@×¹A$È+Q©TP¯×ó^w(¢ܽû_àG4†›ÝÐpcÔ·²à g˜RcڤѨjhè”2ÎØXeÊÀRº»OŸ¦ñžÎ”§ažgd“eÙ­¸.›‰ClƒµL–m¶«m°±Ê”˲ðŸ<~ìx-%‰ç–1»ÇRý“ îåTÅ×S–ÒÝ} ™ñ SVœ AX²Nû};Y†‹L"5]*#¦iÚù5CŠ—ŒUNM`J‚aøðg>Ã9ÀÌN#¸‡¦4ZRU;½Œ»L2eb)ÝÝ^b Sf–æ _cšìŒa’±³«(ж³.§¦»f•U¦d<ó¿á¬ûûy¯†aÒeià|·ë[ì¤ë¶ÁÊ©eLÙX™/ì"Ä?Ã0¥Áoΰ ØuÙìkgⲓ«eYø]ÿñ?Ú¿„]ýëÙÓÔŒ¯þûÿðMßÀVÚÙÏìN:0ໆ]ÅÊSFe^Q]ó®‘$Ž@1»‰kt<ÃÄb' VÃ0ðÇÞþöõW…€;ð1%äwþ›ƒwýÑ? Àsî$Éì+3X]XÖb| ;k˜2âÔ°êºãq¡¶’ÄN¦Ü82×9À–Å™`Ìfì¤Á ¿ïÿß×k2mØF+ÔŒo›ÍðÎw¾3ïe0LfX–˜2I=<ØXeʈ3ÒÆYmµlû•aÊÎRÃ<Ãà®×ÌÆì¤Á*~ï¯ýšÿÕaØ`Æ<(È7¯®ùn4ò^ äišIáñ;L2eʼn>õûÎ>N£É¸v•Ùœ ™ëîaí67Æc6c' VIQ­Výsjtðø¦ôüÙ?øQtg“1ÌNÔé½Õ²•{†)+¦iâ_÷uKíRE‘ëV™Ý"¬ÖO}ÚÿöÎ{9̰›sX #8÷€½—ÌPùÒ—QD§ÁR™ÝÃI'Óõ%7Íh£µ¦È˜¦‰û¿ök@» Z¼3;†ištÊ7þ àêý¬z3³“VÈ2ðGŽ–k0aŠÇå%…‡o3»‰išø®?ðì_\)¢ÈŠ=S~¾æ×ßüèÐí¢ÓáÚ>f÷¨üÖoÁü è_ù.NfR¡ðëd2Át:÷&À_­ ìšÕìæJÜ ƒ)‰äv”‰‡o3e ‰Œ†?ôúëKš<7£aŠHùþ#¿ð øº¿òWп­Çû8ST’îßþ+_òöÿ½¿ö5yŸ³#6%x:¢Ûíb2™dYFŸv÷uˆ.`¬{dgyŸÃ,ØH¾aîëv¹~•).›Èx·ÛÅWïßw Vðe~4b™gŠÁ&òýÕ矇.¶¡)œ1À“Mäûû—ÿÿîsÂG>ÄÙLj6Âz||Œjµ Ã0ðÊ+¯`2™`0Ä;H¶áÊ3*™‚±©|KÐnç} ̦2þŽ_ø@–™«Ã!«LqØD¾¿ùàŽ=G˜eš)"›È÷·<³‡¿û»?‰Þ_ÿyŸ³CÒ`N§Ǹÿ> R©ààà£$®Hð]¸úº×ð½ß«â—~ (ºèð¾]^ýu¼ñÆxÿûß_š=œ÷ïíSöýûçþçñ}ß÷}[ÿ|ÖÁËC¯MÒÁãîß…4Xg³YàsOŸ>õ½Xžþy Z­æ½üØL§S¼ûÝï4TŠÌd2Yò•…Ùl†§OŸæöÙAÉ7üí¿ý·Kù]å—“2î+y*iéíá?˜Û9Ä÷ðíS¶=œ÷ïíSöý;¯ïœuðòPök3…4Xþü ‹¡R©”RØÂΩ ”ñBò•—$ò½î}E§¬k/ó¾’çwÎ{xyàk3>I们ßs™×^æ=…÷ïíÁû÷öI"/…¬a½uë€å´„étZj¡b‚å›ÙuXÆ™]†å›ÙeX¾™"RHƒµZ­¢^¯ãüüÜyL×u4¼—Æ0ÃòÍì:,ãÌ.ÃòÍì2,ßLyÛ|>Ÿç½?&“ Q­V"ï“““Ò¦0Œ–of×agv–of—aùfŠFa VKÝÝêõzÞËa˜Taùfv–qf—aùfv–o¦HÚ`e†a†a†an.…¬a-+“É$t”Äd2 oöüº÷¦yi®{›kg²%KùŽò|ZçöÚX¾w‡Md|Wå{[kg²…÷o–ï]‡÷ïüÖ¾æÌƼöÚkógŸ}v~çÎù;wæÏ>ûìüµ×^sž¿ººšìcsžÿä'?¹ôþ°ç×½7M^xá…ùÇ?þñTÖ½íµ3Ù‘¥|Gy>-¼ò½éÚX¾w‡Md|Wå{›kg²ƒ÷ïü×Îd ïß7C¾9š‡‡‡¨×ë0 ¯¼ò êõ:ºÝ®óüññ1ªÕªóüd2Á`0ˆôüº÷¦Åx<ÆéééÒc›¬{›kg²%KùŽò|øÉ÷¦kcùÞ6‘ñ]•ïm­ÉÞ¿Y¾wÞ¿oˆ|çm1—W_}u~çÎùo¼á }þK_úRè{7e:âôôÔY«÷|’®;ÊóL¹ÈB¾³–“0ùÞtíY_›ÌöI"ãëä ¬òÍ{ønÁû÷v×ÎlÞ¿··ö¼`ƒ5&“ 1™LððáÃ%o‰;ï¥Z­†>6¨¹Z­n¼îóósT*躎Á`€Éd‚««+ ¼ï}ïK¼î(Ï3å!+ùÎZNÂä{:úÚd¶KR_'e•oÞÃwÞ¿·¿vf»ðþ½Ýµç¬)Ðív¿¼¼œ˜_^^:¯Eq.ËòÒóÎóÃáp.Â|>ŸÏ̇áóœ$Ióv»íüÞívc%y=ÃÄÁ+ßó¹-sî߇Ãadù¦÷³Œ3E'+Ùw¿4Í8Ÿå–k†I?™–ey>œß½2Êû5SF’È:íÇîcȲìèó,Ûñákèt:E½^€í‘‘ey)}¬Ùl:ÿ×u¢(:Ý!Ý¿“—Æý^Aœt„ ⾞a¢â•oB–eçÿnù‹"ßÞ÷°Œ3E$+Ùw¿Ÿ /½û^Á0i$Ó^¼2Êû5S6’È:Ém§Ó®ë£ÑhIžY¶ãÁ5¬AQ˜¦‰Ñhä<¶nä¥PÊ0á§Ä0LžøÉ÷:X¾™]`Û²OõSN———yŸ>³ƒ$‘i†)#Ie]EŒF#§”ϲ,´Ûm ‡Ã¼O©´°ÁZTU…ªªFKABVI’¯ Ax®[bŠB|¯ƒå›);yÈþp8„eYÐ4ÍiÂ0i‘T¦¦ll"ëÔ8i8b8:cÇ$IZjšÊD‡S‚sÆ0 t:œ­("ÍfÓ)öàt#Ü]È*üf˜"&ßë`ùfÊLž²/‚Ó±’G/0iE¦×e†1LØTÖÝ‘UÀޏ²ƒg3Ø`ÍêøÛh4ð¶·½ÍùQ¢(b8¢Ñh Ñh`)%Œ”EQ°¿¿ït‘<;;Ëû´@¸|¯ƒå›)3yË~³ÙD³Ùd“ëdZ(Šâ;„aÊĦ²N#löööÐh4°··Q¹·À¼m>ŸÏó^㥀‰¢èÌa¥MnÅÅ݈ƒRÉfW`ùfn*,ûL™pë,î†2 ³kD•uÊK™6$Wƒu2™jµZàó•JÕj5¯%æŠišØÛÛÃÅÅ$I‚išh4èv»œ_X¾™]‡eœÙUX¶™]‡eœ)¹4]šL&èv»˜N§€jµŠ~¿ï\4ÓéÝn×¹˜dY^é–xp§[–AÐn·ÙX-8,ßÌ®Ã2Îì*,ÛÌ®Ã2Δ‘\jXQ¯×a^yåÔëut»]çùããcT«UçùÉd‚Á`÷w• ívOž<Á|>Ç“'O¸ãc `ùfv–qfWaÙfv–q¦Œl=Â:1›Íptt¨T*¸ÿ>ÎÏÏôƒñxŒ‡:ÏàüüÜyÿäŸü|êSŸÂ·û·çôU&ç3Ÿù ¾á¾ßð ß÷RbSÖ#_þò—ñ[¿õ[øoþ›ÿ&Õãf%ßð#?ò#ø}¿ï÷åýÕ%¢Ìròå/¹”ûŠa8==Mý¸¼‡¯Â{øöùò—¿Œw¿ûÝø á/¤vLÞ¿ý)³Œ”yÿ&Ã1Mxÿ^…÷ïí“dÿÞºÁZ«Õprr‚J¥â¥ùÜç>‡oýÖoÅíÛ·ó^Jl^}õÕR~ç?Ưþꯦ~ܬäž>}ZÊï(·œ|õ«_-åÚ_}õÕLŽË{ø*¼‡oŸÇGÚ7ãÀû·?e–‘2ïßï~÷»S?.ïß«ðþ½}’ìß[7X+• êõºóût:Å`0ÀÁÁªÕ*Æãqà{g³ÙÒEææ]ïz>ð,»Lܺu«”…í?ð?PÚï|6›¥~̬ä¾þ뿾´ßuYåäÖ­[¸}ûv)×þž÷¼'“ãòîïáÛ'í=œ÷oÊ*#eß¿Ãä))¼ûÃû÷ö‰»ç6‡u6›a0à¹çžC½^ǃÖžÀÓ§OŸûìg?‹OúÓ8<<Ìë”S¯×Ky¡ÀÁÁAÞKˆÍùù9^xág\D¤-ßðúë¯ãððçççù|qPF9ìfe¼âñãÇ™~ïá xß.ƒÁ/¼ð>ó™Ïdr|Þ¿—)£Œåß¿©éQðþ½€÷ïíB:xÜý;·.ÁÇÇǨT*xøðáJêAaõ¡}ýèG×Ö‘0 y=z”Éñ³oøà?ˆ“““|¾4¦Tœœœdª8ðÎäÉÑÑîÞ½›ÉÎû7“7´‡ÉÛ&ðþÍäIR<—k·Ûuòè½Ç­[·`)·y:–ÖûÁÜ@’¤%CP„µ ³Ût¿Þ²,hšAÐl6W·>^Ûï÷¡ª*A€(ŠÎÚdY†išKÊ»išÎšÝk}ç;ßé¼—a²Æ²,¨ªê\—†a82*ËòÒµ¤ëúÒk[ŽAXrr¹å_Q\^^.}Þ§>õ)œœœä}ê Ã0 Ã0Áëé÷ûh·Ûèv»hµZPUív;Ò{;t]wŒ1úE†a@UUèºî<îU€÷÷÷qvvæk¤†±¤ëºÓ4È ŽáØëõ t]w"<€9v+Ì^,’†a@E˜¦‰f³ MÓ Ëò’RÞjµ ˲c꺎f³‰ËËË"Ôãñ=ÊôïÍdÉ [Þ£¾ŒB’Y–ëÇë8é÷û°, ívÛ¹†LÓt®úݲ,çwº>-Ë‚®ëh·ÛÎ:é_Ó4¡i:Žãp€n·ëœÈ ¦ý¤ßïãÖ­[yÿy†a†a6† Ö-AÞÅÅ`8¢Ñh,)¹²,¯DB-ËB£Ñ€,˸¼¼t”_2R)¢Øl6#¶²,CÓ44%EÜm4º££tº‡¼3yaY–S¸îuî>3‚ 8^t}Qi”»ÔP’$ܽ{o¾ùf¬u±Áš"º®CQgSŒ«(H=-²,;‘0‚—Ò £@õ½‘E‘ñÌ3yŸ>ãÁÝ˺K7fÀŽ\’gšnÚ­V ÝnÍfÓ‰˜FRŠ¢¸”ÅRŒií´án•Fh·u×€e­ÐlB¸.)ànÁL\ȹšFs>/nï:A=hO ¾d¸3Úí6†ÃáR$K–eüÐý÷!(9dTFuú5Ÿs?G÷wóE·ã›J³ÜePNǹoŒÑý‡tÒm6Ù_Ýz!7•?1Ì6ñfCº8äð§kŒ®Owö™¦iεHוZ¶³îZIÒG&‘Á: 01™Lpÿþ}T«UL§Såø'Èò¸Ñ4JÝ®BžKtJg^¹¹(Š­ˆûÜtÜÐÈ‘,I²#Rë¾w]·_±Ñ·§Í4M§þØÝ‹6Ãýý}þ›š,Ëh4N®¨MÉÒ¦Ûí¢Óé@„ð5´Z¶,'5 Ã>ÆÅÅòõÐnÛÏ­£Ó±?ÿ:Š×÷Å”Ã0Ðjµœ ?%ZUUÇÛn·Ñl6WöuºîÉ8¥è¨$I0 ÃqD)ŠK=,ËÂp8 ½wÒgãñ8﯎I€Û‘IPßr^ȲìÈ d„Z–µ¤g†á”BP H޼:¦éìt:Žœš¦¹ä„OsªÃÚì1†IÚ[ÝQN2,½û7íÛ~6 9‚hÊI^ŽñØë`0Àùù90›Í•JƒÁ“ÉäFŒQpw!$I ¬Ó¼)Pª–;‚æ]UU@–íb{ù£¬šø™Ì*”ξUUÑï÷—ùmzTÓæU£Ñh©†4ÜÊPàw iöÏ&ë4 ;JÚïîï¥ÛööÂß«iö{»]ûšbvw£/7îÈc±Ji÷ûûûKݦé^'Š¢³—«ªŠýýý•®ðî®Û€}-SI tSívÛÙüF™1»‰®ëŽaêWêä6N½ãóÜ2¢ë:†SB¥i†ÃaâR³³3ìïïÃ0Œ%y‡Žs…#¡LžPäÒ}ÍPʹߨHÊ6 ¨ªªŽ.wPœÌÏXët:Åéé)NNNP¯×1™LØ^«jµŠ{÷îa2™ V«å}^™@7]Ó4ýÑwò¤¨q°,;"ðý­õÜÆrDKCç ë@³ow D7oò¸ÑßÑ]OÀy]ÚµÖEˆöz½¥qO+†-{†‘ÜbšÀÙ™uà•aY—k·Ó‡j¹9x'è÷ûÐ4 –e9Ê2—t½…Aõß”¶åí^ ,Ò+Ý5â£ÑûûûŽÂ#IÒÊ½Ž® ŠJº®fQ]u³Ç™ì Q|ëöv2FMÓt"5’$9ïít:°, £Ñ(4úE§’eggghµZh6›ß{A@¯×C§ÓYq°¡ÊÄ…öFŠnX먃®-Ê.pÓ}-¹³U¬ÍX)#± Ö««+@½^_y®V«¡V«9Q×]ƒ„†êivËÚHù¦º?À6l|#¥¤€ob´Z€ûÆ"INJd îìÞð÷þ^–ßd© zSr2¸Ó=TUu:W»ÇÁpš$Áó–‘R^šMûº ¢ß·å3ìš’$Û‘$×Aïu+i½^´MLá!cñòòÒ‰xJ’äDB©ë|D£Ð(åž(`yd™dÀRÓ™0ãÂMCJÒ®)H7EQœ2o"`QEÑP·¼Ìu:g]šNHI’Bç¾Ç¥($7ßõÚkÑ2ǘܡîh÷Üõ½½½D+5‹Òxò&Ë`­T*€ÙlæüŸ˜ÍfNÄu×pwÛé²aØÍ^æóøïUU×3 Ãf²°ñ¤ß£ß.Ië›Ó¸ÓE_ûôi¶ße‰èt:$É×SMòV«…v»½ä„¸ñÔ:¬ÖtM6“,I¶ÇýZY¶kT£°ËûÒ ÃmŒŽF#4 §nÒê÷÷÷fFÔ½X”eÐ& ~ÆB”qLîZT޳‹Ù H§òŒýý}ôz=Ç€%=ˆ¢¨^cd®™2…į ÄÃËKÞ× ŒÛÑO{fP@«Ùl.•Z„A½¨$ËÏYtS‰e°Rõððîqbdx’c$íQ{7ÈÖT)îéQ¯‡f³é¤×{ñ›ßn·ÙPõ!vÓ¥““(Š‚{÷î°;õžž¢V«­4"*;d¬îl pa)Ž~¸ŒP*ð/Òl®7XÅ~(®¾6ižû}’„¯ÿ¹Ÿ¾ã;¶ðeªÉ.¤3æmoKéß&dL†•‚ÿzrã×ÈÍ4ícrŠPi0MÓ10Ý#5ÈKoDQtÒ'Ýøñ$É©u?·Í(§$IK]€9e­ÜPT•"ôî¿'É“öïw}õ«y¯’¹Æ[6%Ër,»€öHww^j.F)ÄIÆ`Þ4b¬•J'''˜L&K]‚w©Ñ)òÀ 3V7‰~^{ÿ©2Ð3D›´eÙ?Þ”ª£¥×z£îHiœóò4³yÏ_þË7Þ`¥þìÅKÉ­uµ¨Q>êÚ{ËZº~?ú‹¿˜÷7Ä`±êõzE­VËñž7 Gù1MsÅP#o±Ùlb?r³¦¸ìïï;Žn(¹"8ÞÙ`ͪ±¦Næ’$-éünCuÓ_ívÛéüN ôºÝîͲ16äwÄyñx<†$I˜Íf¨Õj¨×ë¨×ë«E›¥fš&†ÓÕ°0¸ ¹,¡1qp½>r74Ëò¯Ï£œºžú}VІ¶n÷ x‹ZÑä›6`š…ZØ£¢GkÜÎuÊ]PS0W `,4mÕ‘CÇs}Îíë&yYS4/–e¡Óé8tÚ[­†3âEEȲœ»êÛï÷£¯;©ÌçÄ®É6EPÝШ™³³36V·É=äKž>1Y±k2w¹ßh4r2ÍèqÃ0°¿¿ïteïõz9÷izeÛœWÿ*(± ÖZ­†J¥²²á%e:âððpåñÁ`à1ÓßëÒÆ0 4 t»Ýâµ37ŒíÜ쓬‚°¾S¯› ú<]·Ÿ“e[1÷~AŠ‘Û¦è-!Ë+ þßñøà¿ûw™~•E“oÓ4±¿¿ï4p)tŠWÑ£¾¢¸© ˆ?նʲ¿óÅ+ßQ ¦c~ß ,]S/<ûlæ_CÑd< Œƒ¦iBQ'rå®ï$ãµ°MiL3ò^î½½¿²!M+¤!»+²M]£÷÷÷±dÔ´‹çŒæDAF튌Ç%¨Ü~o4ŽQ™V•oP¶M™œ“E!v—ࣣ# L§Sß×­=Ît:Åd2Áéé©ïóWWW888XòôU¶àmrP/$YwŒó¤Æ~oÜ´G2ZÝçešv#AX}žæ^úáVÖû}û÷Ôá¯V«ø`Fµ›E•oê>ZšJIÓh·(†_ngŒŸs&®Â¢(öq,+|.kJÎÄuUÆ7…œ–ùŒTÏ)ËràèB7(¢†0®½f³¹T—µö¸4·Ø-û Ð|ðoý-<ùÚØUMkÙ5Ù¦¸[ž À¢(°,«˜= v‰FÃÖE¼×SØ5cYøìïù=¸Ñ’vMÆ£bš¦3Ó4(ƒr8:ØÓ.—j·ÛKMõ˜xÄÞí PУ¬ãñ84JûôéS'Ýx[¨ªZü”¬¬£N¦iGâFX©®t]Ó-]·Ü£<¼F¨ûwJ¥¤(„,‡¢h×÷é:°æ&üä{¾ÿöï@í"Ê··ûhá)òu¯Rî5\œ/~´Ûv·aËòoÄ,Œ-PDߪ+:;;s:{³S‡HQW €~¿ápXì{H´/"²y¿‡P,˾¿øÕ^'àŸûfwî¤~ú»$Û4Ó×k”J’„QÐ>¤WæG#Û )ËþŽ!AÀÏþ¡?„f´œ]’ñ¨(ŠMÓ0צ½g•_ý+K6È.ˆ•l–ú…ƒƒœœœ·”S? 0 2ϱ·,Ë)†.,ÛPD›M{óL"L²¼^ñð>ï­SõF[½©”A©îãu:@„Úãÿß»ß7¾á2ù‹&߀Sª´¯v»ÜFëºMÙ4ãg#ŒF‹îÙA¯Ù’ÁZDß÷(Y–Ñëõ–æŒRÝ·išèv»N‡G÷{wÂXÍ*÷ðbš@‚ò„wL§øÂ?˜ú2wI¶ißçÆz9bÁzË&ä7`—d|ÔG€² ¸F;GL3| àb¬p~~ŽÃÃÃ¥ÜöóóóTÏ‹"¸³Ù,“㻡ùbb‘‡4ocŒE»½\›•¨žZoTÉ›ÂèUòƒjÿ‚e;z[¥q›ò­(ÊÖf3¦EÉˤdykðÖ¥T&ñ2 Bè yö1 R¸MßêØH×¥ºªªêt’¤ÆÔI’¢­ý~¿\××:,+~'ö¨¬eºÖQÙ¦ÛÕÉ™ }a‹Æ$”AÆ×AõÛÔD•79C% ‰< pzzŠƒƒÜ¿€-ÐÇÇǘL&xðàÁFç3Nqÿþ}<ÿüóNÎ|µZÅ`0ÀÁÁAàû>ûÙÏâÓŸþ4&“ NNN"¥y•¢Ž£èÍœ\—¶ë~½ ØJ»í¯ÄûÕ<”^ãáüü?ýÓ?·¿ýí[ÿŠ’Ê7¼þúë8<<„,Ëk_K”J¾ o¤½ˆ4ËŽ¯âá§„[–-ëªjËs×óõqñøñã\¾šmïᛢiAXi€4±··ç;z€ š­Zªëkq£ÿQ I–àZÿ©O~ßòÖ[øÌg>³Õ¯fÛûwR¨¡§ýæLXyUPãIÀÙ¿'“ÉÖGF–mÿ‚ŒÕÂ6·»‰\gþÌ3ÏÄ{ÿ<WWWó;wîÌG£ÑÊs¯¾úêüÎ;ó«««ÈÇ£÷¬ã7ޘ߹sgþꫯ†ë…^ˆs:óù|>o·Ûóápû}[§ÛÏ%)ïU¬_£l8Èò|þäÉòcOžÌ碸ú8qyüÜ$•—¸Ÿ‘–|ÏçóùÇ?þñØkèv»ón·›éy¦Î:9*²¼ü»(.þ?Íçí¶ÿûz½LäÙ$ò—"ìá›ðäÉ“¹(ŠóËËËDïFó‹‹‹­®¹ôx÷£$ßßÙÙ|úgþL¦òR„ý; Ãáp.IËeüåýÉÿ爳³ù_úØÇ2_^Ù÷ï ...æ¢(–C·"L>ÊŠK·K"/±R‚¯®çúùå€Ó<Ö« gÿÇã•Ôƒ§OŸH¿K™®ëΰöÂÓë­Œ­($aëó‹¾ ‚Ý&(Ú´®nµdlS¾©áF©jWÃ0 ;:™7~2îŽiZpF@·»SòìÇ6e|SúýþJs¥8äÚ¨O׋?ðÛ@ß_œkÛ0ðæí¬z¨STÙ¦ÔôV«MÓ0Š¢®(ÉÆz•÷ŒlM[Ôµ¥Þ;›å²Ô¢ÊxTÜÍóJ¡Û‡ŸLÞ+Hÿ|6ÈžKTÃ:ó¹f³Yਛ8T*'½˜8==E­VK=5BQ”b7Zò’B²1–µ¨}‹SgY‹5%™ázÃØ¦|÷ûýrÉ·¿¹Ãº^Œ Ü4W R÷bAبN£ìlSÆ7A×uèº^Þk$KƒÕ;Ë:M‚õu]æÝ˜&¾Z­f³¾Š(ÛªªB½6ö»Ý.F£×ê šn@2O%NëšòåDe<…O•Íô)‚’JSÁß²–ñ¤PmwéUM?jþÆä¦ôû‹9ßîÒ–½=»‡°ˆ°–` KQd[UÕòŽ­)Â^¾ qF“Qÿotµh.Š"ãQQív»\z}yöo¡½8MÜ¥R²Œ¯ÿ;'¶Áš¨†u:¢R©àèèÈ<œFýê¶Ðu½xžõut:ÙÖˆúåˆú¾²ßxv Ó4Ëçqw³îÚÌÓicÁM•ÊêLºat:òÏ䳬læ8’ §}lwV‚[rïSYöhØQt]/c™²A²7Ã2 ¢èžEèÕP" Ã(w??Úíü‚£ÑT.òb#ù¦á潞m¬åýwÛöuªiËJH f-ªªbTj‡ÖÉe­dáÀLzÝ™¦íèÙß_^uöž§_„Õ›VßïÛï§ñ\»¢¼% ßïc4AÅݨ[-óßR¢õZp¿FWçsSƒ³¼ïµ%¦”A¨(ä9F¨ÕJ¿ÄÉã zóömüÊlë±"¬4‡õøø3×Íf3@á —Ö“u ÍÙÙ²ÁEr·©v¯Ó²ì÷1™­¨~Õ²l%..‚o š–ý ˜¦íð–e0í´r˲kUÝñ5mU¡`J 9pJQ"IÑ»þRÏ´:n¥¢ÙŒ×|¯Ù´÷ÿ^où}A#Í¢Œ:ëví×QÖG§S¼,£- ª*dY.‡G¥lƒ,î.¿˲°··øœaåÈž‰KžkÚÙ)eÅÄnºÔï÷1ñ‘|÷îÝý{÷ð‘|º®ãää•J%ÝMêžZêîEÂݦš$;ªZF§À¨~UÓ¢¥›ëúÂ˜Ì ÓôW’-+]§ß·eW׆øpÈ–ƒýR@3JóÄ­¨Äɲ¤ÅþÑl.¿O×ý®Qaòôw»7öÞ¢iÚnu@-3ÆÓ Q”t6Ú×ÒétÃÔKiPQ ÙÈ«Lšºž_–MÖ¦O§SÌf3'rJsXu]Çd2@–åB«@ÝSèµaƒ´ÝJDVÝ!™ QJ°7u)ˆnז鼣вY”Å4m#õòrùÚÊÛX`RG×u )´,D1äÚíl²n²0Š‚2m¢~Emo(º®C„݉®º’IoåIœõ&ù›¥¤ÜÁ²,´Ûm˜¦¹¢ÛìÜ(/YÌCõâµ)èóh|_öFTs ‘jXOOO1NQ¯×!Ë2jµdY.gcë©qj44ÍÞ¼ýnÌ’´ØÀ%ß‚|M³7a6š GâZŽ¨Ê¯(ÚÑ ª»Ù&ÞÎÓažut:‹š8V vŠ~¿Ó4!Ë2,Ë*_ª˜,GKËÊxÉ´ּ_%š†í 4ÒHì½¼Lz¤ÛÀVUû\‚œ)QÓ*ý&-”é;É˲ ( F£ ÃXÑß)êZºž4ÑNÞþwúI§c—‚¹?›¾Ó´RƒM3•ž kS‚«Õ*úý>>|ˆZ­†óósÜ»wÇÇÇÐK–{OŠL!ðÖ¶Ûöc~F. Yâõâø¥X–m0·Z<ª¦€†‘,Ý=ÎŒßn7Û´`·üëú¢¾g“9ÁnhíEÙ˜Ô°,ËIÖuý~¿|k\¨«7³³ì̘2/Ehìw4jݪ(ãod¹ÜÝ’3DQ´Ûmˆ¢Qazîù;;ÊXîÖž%Û²áRŠ°Æªa•e<ÀÇQ¯×1Ke¼šÛîJ´kÚªÒOiOîAÓÛ‚ Qï½Fi¿o{IÎÎl£uo7ÛaYVöŠMœ9‘IðFNi_i·—7¼M®å²¥ˆ2‘PÝnÍfÃá———åò¾Ó©8×— d×m•š­#h4“ ;“ LО^†î¸Š²œñà¾6×9N“¦±³NåDO)ÝW’¤ý}g£«„(n'›1 ±ÓÙŠÜÆnº´8Ç…ñzpp€Éd‚{÷î9u­E$·ß½á‘à§4S—Űn¬Yt•óóæHÒ²RÝ_¯g?GÝd‹žRi(Ø8Qf ~ש; ,ËÉä2zõ™µ˜¦ ]×w#u2Î9x÷é4¡;ëPUV²3B×õÝ3Xòd¸øÕ÷Ñ߃Rš7Å{/Û¤ÜeGˆÒLiç VÀ–ƒmG‘I¦ãØ‚ìÜL1í?±Áê¦V«áèèÈI."ƶSX½mýUÕ6\ Ûè Úz½p¯vVٺ㊢m¤š,ØZ†HUöšo@`kx¢)ëdr • óƒÙEQÐ+{c’é(k£±ú¾MQÕUÜ{­Ó\Tïkv]qÌ Ã0vÓ`- ~£—Ü4äþòrõ±þ7÷«ÛEqI§©Ìøu²¦Tö8²M÷^¿ Qwù ‰e°ŽÇã•ß1 Vž+–em×㾉·Û¶òl¶Ñæ1¡4·RGš0ÃÒï°;jY.4q# VÉjX[AFôc~Ʊ7Âj¶²MõÕ{{ö5e¸;Szt]/gƒ%/IAi6ÆðË»Ÿˆ¢½_Ðõ¶¿_žhY ÙºÞÂ,°¬UeÞaìÿ³S4U¨‡×u×±î|t5ʘÁ´ðÊx»lO§V†=N"¬“ÉÏ=÷Çt]Çáá!Æã1F£q~~žÙB7ÅÊ3e‰RhÏ΢¥Ðzk#¼ŠLÚçâ}ö®Á‚­”¯ÿ_d:rœ¹œ‰j´“È’ŸG8a©&ÍæÂÈ kHAçj¶‘Jžðn×^ßhdÿ¿è)ëÌÆ¨ªº£ ¢*`Þk"­(ƒWV¯ŸfÓ¾¶(mq4ºÑ#g²f'sÚ»)󬍸”¤åkDüïQÔ 9éçÞ`4Móu>J’äDX9ó %Èá’–Ó…fÛgD$ƒµÛí¢V«ááÇÎc§§§Îc/¿ü2ŽŽŽ0 2[è¦l}ã÷sq…½6­º 7î6ÖîÏ1ÍÅg5´¯Šl˜×ësë¯"€àM´û)©Ù/48šäá䯦š¦ÙŽ ^/~ SzLÓ,V÷÷màwÍ®»'DUJü Vo6„(Ú×[V³`Ó4“eÍêøîžÇZDüÒ%½Î™ 9™†‘\q¿á÷0MÓ|{¸#¬¥Øó÷÷Ó9N–]à©§‡Ÿ¬JR´úY÷ú2ž»Ö`ǘN§xðàSŸ:›Í0™Lppp€J¥888Àl6+tjðV7~IJ>gÎ’w @»Íü:¿ï†„Ø„VÛ†a-ò>!ð:ü‹¾æH<ÒÈGéÜô3Iq &`̤ßïïF£%w”FU×+»îë'Jã%EYßÉÑOÑÅbGÁv˜­Ñ£9òY8ßÓļ·çÿIòOŸŒãö¦â—½´aTU ,ípw Öu½ø™ieBf}ÉiÔ’*ïúdÙe;liàÃZƒu2™ ^¯;†)g„ÛÃá~¾ˆl¤Ðç[h¼‘Û¬eàÉ“ë,"–ŠYÚ䈱ó«eYÉ”›<<Ü­VteÅ0Ší…grò¬Ýé ìîÔîWKƺñ tý4›áµã~ʉ,Û ÌÖ1M³øJù&ýÜÜÆhPV(®¬–egþD5<½ÍÎva?KȺ٪îÒ¾Âëó›èéÛväx¦›¬U’FªßxÌ Xk°V*Ìf³¥Ç&“ jµZáT/;½ùo‚»nРåyÌ„]#Z$´ëŸ°2¶;rÓ ñŒá$>ã*Ô^(=Ê1¨^ޝ]ÆÃÎDW[¹¡sY7NÆÏ£¦©ª}ì^Ï?5XQì”{?ÓìscgGÚ¿Žòî½%Nªc¿?]¾È‘æ-²®tOEôûýòèòIÿ®î4Û,FYºñö« «ô~7t¯üË 7`­ÁZ«Õ0™Lœùª³Ù º®£^¯/½ŽRoݺ•ÚâÒ$±BŸ%êõÏÖ>ﺳ£×PªTaG'—^{ýS Ø‘Õ3„×ÖfXP6òÆÇõTú¿ˆ¾ÐÅgúmÂÞZղܘ˜­TëTJÜ‘šuF ÕFÅí9;[½¦šM»Ã#GR ÅÎGXâÌïÕ´ðkJ–mÝi2Oµ«qšÀ…5¼AD™½*IR19–Þ2ŽS~Yž«ª®êi:ORœÁ D4Xëõ:1 pxxˆÙlæ°Š¢ ^¯£Z­ÆZ@XÍëd2Át:MåD·.઺>•±àôZ ˧%<–µHÛßæ±ébµå;|†Uú lK¾„)2I9›¬’Ï-¢³‰qئŒä‰/|ZX¬‹¨z¹¸X(%¢¸ª x;Ÿ2yÈ6±³«ûÞQ„½Ým¬éE… Ûo4Í®Û5›œ·9EYó”q7†a¬5XEQ,Fýj¿\^¡( ¹.²#Â4÷uèþn×<Ôõðk"åóÔ%¸×ëA–eŒ®‹åÝ ˜^|ñE§‹pÜÁíÓétiTŽûñ{÷îáÞ½{xî¹ç6YËHM WÖEÝݘ÷"nµÂßë—²Ii*½žíIËS·PüZ·±eïm‰Û’o`ƒ6ï²ß¶‰G˜Þ'ÅŠS‚ Ë6eÜMâšm?žyÆvâ)-/L‘k°ÞD£>ò’íÇÛUt¨jptT×÷À ì¡( h(ó!îýT’l½¬Õ² Þ-69+’ŒG‰°ÒžŸ{‡à°×4i“޹^ÇI6Œ[¯Úät/Ê`E$ƒµR©àÁƒxùå—ñðáC8Ïàå—_ÆÉÉIäšÖét ]×/‚ããcT«U†W^y“Éd£‘9…œÙÔ„© ’CQ\¤x¿§°‹À²ìÏýð7k:Ž$…§u>fÓekD5V·Ì¶å›H$ãÛN©¤Í\Ý"ÝÏÁJw¡ÈKÆ ]×Ó‹®>y²ÜÙ0M,+þX‚$¡,GÜ0ò–m  :KäQ¥FH~èú"e^–í×ù]OggáÆ¨»v/.²lß·4f®2î&jÆŒ,ËÅp5árGë7¹?¹ål]Oƒ4[kØg‡]ϪšÁ ØiƒÁƒÁ`)… Z­&J>??÷}n:b<ãþýûlcùààÀ‰î&¥©c‚SYú¨†‹¢¬zùú}»F) äÜ+Àžà‹ ÛÈ[K«`k³Xóo£HÑ¡uɯ_>)»¨¼•˜ç[@nYÖöú°?”ŽEt0H—‰;T›j4ü"©Q7ßì4åu¶½„üŒÆM,]Óé?Æ—¿üå­/?©|À›o¾‰ñx¼rŒ­GW7Ù„’(9ŠÍx<Æo¼‘Ëgg±‡ûË`U”EmX¿ï_'–E„•j™Ö­^GN˜‚â×C–må¡Ñ°•¦V+úÈ2™Lðøñc<}út«Ÿ›ÅþíG!Ëì^ƒÕ4— ÷ër6žhÿÎBÇ^Ƕöo‚zÌ”.£Àk°šf:ŽŽ ïwÿ^k°’ðQzÇ+C£nÒ$ì‚;¹·Þz ŸÿüçñèÑ£•çriºÆ¥ëÿAõ t³ÒueÇ[£ÚïÛaý¤‘d¶¡ZDGC²4`72#ûêê*7ƒ5©|¶ÂóèÑ#ÇJ$vÈÜl·"nYË™~rÎ-ÿñèÑ£\” ›=ÜKì1eÔÄnoo¹žŸˆbT&AlŽrìM•vÛþiµüÇ×ìy¬YìßA²ŒiSòì L×­AýS’›Íxci2€öïmË7°ýÛM¿ßG³ŒÎµ^o9‚ªëé칂°ùó»%iùœ2šK:x\ùþÚ¨/tשFm®”wBؼ¼÷½ï…$I¾)…næÈ£™^Þ”Hwº°a,”Mnp&¢¥»_¿ hVmŠOš!wcMƒ¤ò ï{ßû|å;±C&l–Ø:ÒHQqË7`ËxQ¯Ó’qtt”‰C1 Yìá^byã-ËÞ‡ÃE:°w¯ÌÚ8Ø–¢NFëŽspp€jµºõ=<‹ýÛËNGW½Q(·ñ˜Õç©êâ~Õn/ÿ^ШíßqûäÁ6öoÂ4Mhš†ËËËÈïÉMóÏ3Œt²ZÒª}]‡;%˜tAïØ3IÊÄ`MªƒGnº´-nݺ`9-a:n|ánÕ[4›ÈÄjD5ÌHŒrÓ2Ít¼éQRua;k¶±:б¶ DbTOòMöb³Iä2 …ÊÛö]–ýËÖR‘ÕNÄO©ðñY5óh·×+ Q Ú"Ì­¼Ád-ÛÀެÀòµ iÙŽqÑ´e]¬Œ‘¼-³ ':N1š(Å!H^‡ÃÕTò~?ß,Ò„Ýß»aøŸ—(†§ÇoqP@ƒµZ­¢^¯/u0ÓuulCˆ>¶)AEÒq#…á“·¤ÙÜÌxб~&¬É“%Ô˜* cH^÷š2YÈ7Ð!³¥ú!‹ŽÞ†) YÉ8aYV¶‘ü¼ï߃ݜãÀLx>Ç·G91x zLpm, ËØqb£µ(Œ[§jÉõ§?2§B× hoˆ,PhUˆè ޱ²¬M·@#¶Bv#A4íÕƒœö4.’¢ÇMì´èMœõ—k¯ ;ûÇç…ó>Crfô|aWâËü8|æ™çù=o+'i7“©‘››ÁFwØ?S)‘ÇÚÆ ±HÙbdÌdeërsv˜EË8 ¢«GGi2>bÖɪ*ô±×ýȾi…Eë|I ßÏd_‡ummÍÒp‰ðÊsJ­VsìpZ©TpóæÍY å;G!5ó)U§Ô.w«ì¸ '.Šíׯb¢¬:D7c›nN,C¾‰Hü²SmŒpê†ÍFE&Y¦Œ›É„Ã÷†Î©ð+e²kg5*ª*F6ù?·Ý6²¢d6\0V¥¿5MC¹\Ξ¬“ƒJP}§[“$I _ÚïÝå‰emRnnŠ „ åˆKþþ|ÖR©ªˆ:) …B"ÉJFÚ8ÑG4'lÑFKÌ1‰1€H—.ÇX“-„k,µ@’’o FÊ{…Ój…oÿï´ÎzݘY­rºVŽHRÆÍ¨ª yÅ]<æú²Gaid‚=]n 0˜ð,R¶3±“N³,íT«â>Ónûßêu¾‡$Ä¢dÈpÚ;5&"¨)Ñ`\}ôªÊ>(pâï%t ëéé)ºÝîì'ìÌ¥e³ôúU7DsXµþÂ×®šIÒñ@4XZ9´÷b8QwÎiPu½.Œõ“64˜@„®a êäQt&)̺»Ý×8Ã횦19L.QUÒEú~ƒÞ»dÙMåEŠmÆ tã¼´@eM9¬né´A6dì¸ÙüIf‹*Š(0'ÜêgÝšË.À5¬Š¢à•W^™*|xxˆR©„N§“HšpÒ¬d'§Ý¶ÖÞi¼J U¸7cZĈª³!”¤®‘¦?‹–ý”DW“$ֆ̲7sÒ²yÄ\, àÍ7—\{„5‰l™,D–™È¨ªŠ^/‡7*¢ÕÍ$ÃBc© ±±IÎëÑ7!Ë º®§Ûau‹rÚ#¬Å¢ø›[t5j©–Ýg1²L‚0ïåe·-Ù· a===…,ËX[[C§Óªª³ŸN§ƒµµ5\½z5µÑÖ•à.p³3$x×°&uÒ!"«@|ç­óõô:„£Ê}D"kC&îì³ ¨®‹ÝGînÊ$@èè*tÊ-­•jµÃÀ<ŠÈ󴳄SWÑ Ÿ»Z5"­’$jZùž’I4MKw&A§ã<²EÓæ3\üæÇGÑã‹öYœ&D¸Ý£Ví?™ä°îííA’$Ì ™ùï{{{«þ\úÂ’"ñº ÛüžIª*ä ýþiÔR£5'XJEK‰¸QÙn‹kƒü"ó$“ò%±fjÖn‹ÿÓõ "²'p¯çâT:@ƒúÌ3†.i6­¥¾4¶ °e9T§çc¸ôk—ð¶o¼-¡“¼:"e4É~ï‘ö´&s„®a côJR²©½ÿo.1¤z¤,‹mÞdÔœyô“TUq ™"<Ì-‡^ã–5M|ôN‚Ó 4ÍH®¸H·ÊÔv¶wÎ¥ª–t€„ €'ž†ªý½ƒØs^;v:« Îé¼nÍ£¨^¼×3.„áë°V*ŒÇã¹îÀv¨áR¡PXÙ‡q"²Ãº±áì9ÝÙPà;6¦Õ¶TùøÝì°öû (G Ðÿð̸ÛJªjÈŸªŠMú¡zvær )B†c"Äþ¾¸Í]KûŽ”½3ãèæ`>Göó Ë¢_P¯'þo^5tmDX wºwðàbžà B_vÜFµêýšfì0 ¥Ó¹8§:R³Ž´¤Yº9ζaΩõzìMµte&î;€îT·}t*‚CÎgåu:ÂÎhµ¼'Yiš±”r9ÜÀ† S¬hÝ››±Nk¦H­œÓt3²,¾œ¤n°º.„ªß†u±8ïgi´*IÁGr&̱,;×âú y¹lø+Âw¬M¥RA¥R,Ë888ptHÇã1ºÝ.jµZêFÛDº0Hp¾˜ Jô=S4:qJ¿/Þª^‡è"lzÛrÙØ\¤4”(Cû= üDöm«%dÔn»™72«Ug9vD b»m¼¶Xœ¾§ qÎÄß)…·Z¿&)Â|r2?æÊ¼æØSè¸élh½Xh1ð—ìA±è­X%tÊÈæf2K»hÈr¢åe©&t «ª†»Á.2Í©Z5"¾æð À3ˆ™ôCòU…þÏÿ ÿï'ÿBÜ>:SÆuZít,Ϊ¢ˆËÄ/Øjž"HKtº”é²#_AQ„Ýo÷Sȯ°÷¡q»Lu]ØXf[ªÑ0Žc_r‚Íï×n_ 5ÊÁöQ”D³)¾´8†'MJ ]_¯ïõ —$÷ˆÚt¯‘åyÃ?mç? º_»v £ÑW¯^Åñññ,Ú:p||Œ«W¯b4awwwÕŸ'>rtÐ ã ¾üËP?<±1_ãIt:&Yõ°‘t=Úî[«ôÿ7Éð½Þçä$¢VÅ,Ý™]û5¡ü[@ýyqƒ ÝLY6>·y§•jMíŽy³É}¼Ô¨ƒ¶Š;ùÅ-,”R ÒQÆGÒ2‰´ MÖ ®Y'ôdØÆ‹´ÉúÕ4¡ŒigÒnÕ2MÓÒÛpÉ ¥I†¥\Fûoýê?û}³Ë‘aÏh6g e{u:ÞûMN„í},Ìhš¸ éTK’s6ù/N¶‡“¾î÷­j¦^ë¶æÈI·ÕT.y’-R×!ØÏæOböµ$ Ù>;KæýÜ6tÝØùxóMqÌFD“&ÃZ©Tppp€µµ5ìííáÊ•+¨V«¸rå ööö°¶¶†ƒƒƒÔEW#5¤i·…s»y”ËÞZ¥ J%4þ³·zT‹âa ÓÆ£Ó{PúûƆmI- |ßj{5›âš¡t_Rè‘„"DdØ Jmþ‡@ûº¸I4›ÖÔbrbésxrÆ›ªß °ÍMCa.Ë+¤Yyé÷ÅÓ½œEJ=•ˆ„¹7v:Fö²p2¸Ì¨ªø”EdÆœvÕŽÌ‘jŸVÕÜ£ßw߉(—EMC³™Ž]f天 õÖh4¦"¬ª†á²{LÝøeú‹M ”"´Ñã›D’œ/ºeÕëâ–EÏsrL;yûŸt¡“™ææ„ÚËtÝ}ïÊiÒ”y#Þü7Àvî•ÔLŸ>c» |ö³ÿ8ú—˜RRÙ!ØÏæ u*Múýnúý¾ð”è’¤ ä°†ÓzëÖ-t:loo£ÓéàÖ­[©tV]÷¡‰ªUoC»\ö´ } s߈\özÆÛ‘³Ød×'2ÒlÎѤ؊E!Ÿ³Ç§N ü_çßg_¬+±M»s1mhD§ŽÓx§fÓV;eb0 `ÎkJh—-P£HMÊeG#ÃíéTïïå,Ò?ˆCÅ?¯×ŦJœÌ·°ç’2àÌçè÷Å×wr"ŒjBŽ+eûÐÚóî°f& ¸[Ì ã@Zº½Ó-£\º¥Z…ðàÌ!IÛÍšz$Õë@ÿîÌÍÜÖ4Ã'0ã4ÊÛí–U­ ÍüüvÛ°#ìÔëó|~÷sƦW ²ù½ÍΨ½^ð~$Ô<’ÔÆýØo„ÿSNš6f@ùÙ_Gûþ?˦êä°Êr2Ü”Øa%J¥$IÂîî.$Iš»dzÆK™ƒŠýëuÿ”0/ ¸tÆÀÉw~ÚÑ‘‡,yØ6æÚ óNÝ(D½4nú²ê ¶ÿoB¤ <~¯g5º™ðø¦×Èr¢9ÕdPp‡* :ßþ´ZÁ"œý¾aœ¸]~ªjÔF÷ûÞï«ëâ¹t_ôJµ2GúéšuJ¯¥ˆCÈ@ósÖíéaÔoÄl‘M›_˜æSËòžœÊÚ'7t=›F³V6/Þé¤fsš½þ? óàç|_wô«cÈjÚùwAû/Ÿ·ìP·Zî™ùö(+¥;í¯Þ½k$¸ÑÆ¿›íÐlZoOŠâ½¡G‚´&/ÈaÝÜœíͺR,Šumx\,í!üQ°eˆD:GI)¦TYÛûh¿ñ‡À‡>´êÓiUÆâ÷SíLIÚauc8bggg±g $;S[Ù †½Wø¥ œºGþ)3`þˆEP ª›AÚk› ü±%¦ÖÖ!íDTõ ‘Çç0É¢i€ŽbbÎj§c4ÿ𽥋06:U¨“uœœøß¨u]¼Ù sBU Í+rê”ÒKŽ“:PÕy'Ù­¤5šk¯vÌéžHÆ”y#¹Ñ°¾'u‡4\¦®Ùïa®e{Çì¼±”±I̘PÕ‹5§‚‰ÍB²BFmIÏ™uRñþú÷·\Ÿ¯ë@U ú3ïªU4÷ËèüÊ÷š>—©¹¤ö„5¿d sº°ë$Ì—R•ËþϺqN òe™K™Â»C°¦-žÃx\ÔœÂ\T¬iè¼å±ÿKoXŒnßéúâ³È:;¬i$PCš°¸Ì?Òu@/Púá»¶¸~ëuQCíÒ©È;€üÉd?®'DTµÑ™÷ÒqSþöÑ[q°Þ²ÜØmQþã=è÷¾‰£Ï¾;Ðœê› 2šì˜wÝé#ÛïW´‹n¿FèµäHšÓš û}`0¿ÓqIІ-Í"6Úß·¾G³i=U^élQ)—óm<…®ò+v"®ÃJƒ¢½¬cÆÂ``Ü2©Ùe3ƒ’÷±M¡Ê—‚b+|§:S7(ºYTÎsÅ;¿£öçŸÿÃ×Ñ®+ž;­[ ^„½®sO‹ ÷+jš¤ÂŬsËådËÅŽò½9¸(bgÉÐ.o¹,CP¯K™Î`4yjjQBõ'¿gÕ§$Ó—UB”ô5’k‡Àbòå´eçý}ºl«Q ò+K>tbfÎhÅž54-¸.ßÜô¶ë)e×,[ŠÇÂþÁç.AîY-¯‘+N)áö襛ÿ°±aýŒÔ¼Ë JqkµÄçõSÖæ59uªìõ CŒÖGõÚN× $YŸv®_P(U-õ¬¡wæ5-ü“f]„…<+Uõ\Í8aHç)3£\¿kZ¾ƒÖ‰wN%Åh’eêÄï¶3++—-óÕ¦³ ¥_yòùî\Ê k ²É@ÑWwçG˜~fˤƒØrN:ÂÛAk`(EË42IU…؆ÝÓÌ:”õ£°aš#¯IDAT°Èk„©†È½Ãꊹó]@¨~OoÚ,lÀ2ÚQ¼Æ8ùäf#f–Nzêç/®:¦8ˆxw:þ“š(Âh¦Õôo~÷Üs{=@úÄ%ËߪUçµ8¥ÕÒ¥c~¾Ûö^ϰÇÚmÿæx46I–ƒ9‹ƒ¡x½|%ºõ§ü¢qh6ÇCeEQ–Óp)ê]–BæGGùs' u÷“×<×gÇN“t¢ßJÆdçPóÃzÝù\ÎÄ–ògÏÎfòìdÔ{%˜ûøA›|ÙäUUãÌžPµìFJž© ò}#¸©$è^•w-rN¹[)VTrí°ºÖ°Z:LJjP¾„}FaZXÍ+!|:­­#/sñpL#›¦4RT U†E¯çžYBÓiì"]¯Êÿùë– ÁíxN6¼W §^N÷9r)e9HvL˜Tí¶XGÖÆ˜%þ–Bõ”[¾¬Ž”GG<0ÚD»,Âæ4–Ä ·(y ´!Oõ^¥GÔD²^Ç ­ZT²c@¿åCÿÊÙ25Ý0gÊ·Ûþ÷j¨”¼é+fžDz8 ±õc0p Ë“SåmôÊK š¼l‚.ëEFYiúGÒÇx«ß†Ã!ºÝ®ïÇãÅ|òØ[holLwòܦ7{ ªâä“ñÚj‰m«%6"¡*F;¼°FR”ì1"ªÊ6cÆ´µíWFgžë–få¢ëEC‹WõÎϪ EÚG¿¼¢W Rrdÿû½ç¢Æ¨Q#6¦VOh£¾ÕŠ^$œ³ÜSE±Ë€ô†Wš~Ðè*°¼QS« tš$)ìv[œ<'MJS–ÑùÛѦÀ`´Z¾õQ >ó =Õ‡òÁÏAþ„÷!)*$ã„aœH¤Ë{”šM%cîðìÌÊ‹ ë¤ tP[- 4Ù éûMbÖB¡€Z­¶¸3sY½¡ÇLÑ4kh²ƒ67Þè(âæu—]‡ˆ˜A>¿ vV+ÓB¯¾&Íl/Ì#(9ÀŽÙq4S¯ê£Ê,ÌiïxkǼKI݆ÝÖ'Ió ý‚~Ž$i6Ù0K ¡.Qä)ªq$‡)ù©ý¾Ø¤¥±NF°yIW’¬›Hð&C¶Ã|E1¦8¤OC^Uç?49¬GGîÝR§–¢¢Qþ¡·=Fƒ±ÏÎPÿÚ§¡>õè'gØÿ¥·ù–’dŒ¼áºP& ‰¤GÁÅa%ÝC%ÛNØ›B:®pi¯‹H?¦}«´Ü–åäûFX+• VýùA–ýç_A1dt•:ÓÍ^¢ëèé-ôŽŽ€¥>ÆŒ 1ÇÔ¯±¤  îÈËp¨ë››RmF¿ï½ËeÖádPÚí¦³3÷ã—+cPÝG½èoÄP„„ª×óiÞ*÷­aˆÀç©e³—àÆ[ˆp,óÒ‰}ž%e5‹Ý]7o^™GAQ”—~w›ÍéF^/»FXUÕûË¢Îm­–ësf›ºn„»§'îAŒËjUL)`˜¨ÝD!ü0™1ý~¤/æ©n—`»m\n·ŸFÃØ óƒÊŒÂÞžhÏÊÍ&¢Ò-êåáuªÈŸDÐíäÄûØ”:ô+¡sIº}c#¹Û±o„õôôtÎPÇ8==Mf.t»]T«UËOØ9¯æ¦4º.†`oüÊÏ„vÿ%É&4×¢@Fü\«2ü›‘³z„@cp˜t“„|¶.ØS‹p_ˆ¤¦9ïæQÆ€õ}‹°[óçuÊ:È=;«Ù') Xœüp³õBaAEÿßÜ4:[¥ºÎìÐFТýlêÈm†Ñuß—¤ä[×õù«ÙYõR̲ŒÁçÐ~ÿgçŠK鿳·¶ jSÏÏ\<’Ôß¾ÑUšgEˆ ÓÕjèТÙ6òаR) ["=Ôv2;–a(—½³cèýü¢¬öÆùA¢½F¸õöûÖóiž‡ß«ׯ_Çáá¡{—Ò8??ÇÖÖ–E‘ …Pïa¾@@û÷¯Aºü(¥PWœgŸJÂq”H1ãßexwù5;«Ü•7$!ßsöœâVÂçÖ‹ÆQU¡ ½"1Óž ¦)‹Œì0é")Õ¬#‰YwïŠÉX2oÁŸ¥¦©Sglgåµ)äwÍRÍkÔ´®En6mn¦§sRòía¥.¿,áÎ[~úÿýš?òV”M'Æmqˆ@‹é8L:IÒFñÔáŠbÚvåQ.}9èš°Cé[Ôú–<¦jÕucÑܿퟕ.F9‚]wRVZÛ9‡ûûá҂͵¶æÿ›O© ·u߃u­~‘Oú|tzƒê ³Ú"‡5‰ûB$‡u<|øµZ-±ºXM¤?ÿOÐñ×\ëòìÐÛ‘{ÿø' à=É|`ÎΨ vVsFÒòmÖd´1ïvp3DIÖIÁ$¹û^­²³zÑHJÆ=ëWÍB›4~s’VL½. /óHâ³r2´ƒÐmfx5YJ²MÓ‘”|;ʶª:‘ª Ÿ| š/~7:Ÿûqô¦Í“ȸÌZ‡s&=$i£xÖi“#êôx½.rJý6,ëuÑ(ƒÒ!}®{ã0·KЀ¿ñ§À·ÿ(¹ì6e§ vVsH\ùl»–º>«_5ïÞõCŒ=¢ñ0§‹1ñIBÆŸt2*ÊY)uV©”ºÖSs²0Yqdü8AÁ¸ ºcÚç Àræ’]«ªÀë¯/=z|±ô ùvœ/ì27Ò "5›ÖÑ3Åbª³Ö™ ”þ÷Ñ£GøÒ—¾4»¨ìM:tþÝGf;~6Žg‘sÀ‡~+Ù>ûn¡ö3°³š0£ÑwîÜÁW¾ò••®#Œ|À׿þuœžžb4Íþ6sX§Ö«]yÙ•a«å^O@Š%gS=.$§§§xðàÁª—K‡®Vjm}ÁvVÌ×°¹c}‡µ\vOÛß7 3ºn”ò¶Û"ðÑhX›l,ËIúÍß¼‡ßþí¿Ä_üÅ_.ç€.„‘o'ý 8DXÍ!“ÐËONŒ ÊEo0‹…ô÷ªÇE&¡¿†K¦1|®È2êõëPüæ?ÌWu²Èo¶®ÙÛ,—­YcÅ¢QMB˜ur³9ï°žœøß¾ÌÇpêb«›ECé3<óŒá\:©•^Ïðõ›M÷zÕrYœ'³MÙh¿+ÓÉžN¸,‹{1ððáà;ß¹è;#R™<°½½^xa–3_*•Ðív±µµåøšGáÞ½{¸}û¶c ƒÙ§§è©ù¤º†¿%øqñm%Õ¿Ù|­µ œâŒÌÊççç¸sç¾öµ¯­äøQäÏíÛ·gÏw¼+è„¢¸•´‘ÓhXÓ ™ìqûöí•;Iêp]×wçƒ;1¡yÛ«o¤iFÍ¡½fj?üˆÂ^—DÇ©Vè)*âØô¸,[_»L}ñÜs¿o~óõåÐFùvÒߎõ« ˆíiÝÔ<¦ÓáTà¬CúûáÇñ{[D Iýí›!ãWÃP,¢z$‹ôÙ˜Ÿk0pn"o×eÓÃZLêít/‰ÒsëucÉIGÒßœêóí)¼”QC×·SН9JNµ¢zÛŽùõäXÒ,ûz67sR­ŠóHM™(mÙÉo6諪ŸúÔ_óÏÞ ÿN|xíµ×&ëëë“_|qöóñ|îoô³(ŽBXN8Éùd2™LªÕÉäÍ7]c¯'~èmONÄýáèhuçM–ÅG]ÄšMëûV«†¾°ŸÚ³³Õ'²¦Ã®GY–''f…=™x#²l|Nâÿæ›m&õäE÷z=‹-n{Ðó˜d³¼ù¦÷8œ…°õxóMöº{7˜DöW¯ço{õzÆ5}t$^ëôóúïÞµÚ{Ip÷n2çÛz]|Qô·o„umm ÛÛÛsO¬YŒ”6cÞÉyøð!€à]Ê̵!N!ss15¥ÒøÒfY 4-I¨ñæÎ5IÈ7àܬÃÞŽ"%år°F*q'‚0 œŒp®†×点+n¹,~§È£$‰û¥¾.j : Ž¡ëÖ€C¿mn_PhGvÅ%ɸÒ©%ý‘Ò’Þ•¤|ÏábŒØ»7;Éwþe’ Iùvj¸DwïzÛЃuÚANµÎüØ,ŸmCl[ãüÈ;æˆ+Ý~ü (kµêŸúKvÛOˆÿ»5 §õӌؤÐÑû/êžC)ÎQJ¢}ÖR©„Ý0mœ P(`gg•J•J€È¥7ÿ2tªº‚jQ…SŽm¨›ð"ësªÍœ¸QBîIJ¾g5!ý¾Ð2’äXXO˜Üj̬:õ‘ÉIɸæVP=kY胮 C†n¢’ä“0qä˜îZRN!ŒÖåfE_.N+Ã,›¸2îØaÒcç¥ÑðŸAÙn‹]ù^/üuA‡¦¬… ©^Šbu8¨VÖ«.Ô­£ã¢Øßç ­($¢Ã šGãðÅœ–Í,Ÿ¸ò Ødœž‹(ÂêÌQÉ„W4´ß7:•7áSh½×iŒt¹ˆÑË,hëÞÞ$I­[·,Â[©T°µµ…ƒƒ\¾|ׯ__õç±P,MÃþæ‹¶S¬hƒëJ™ôbÚ‘w›Ãè4´™a²B1 ò6Ïž³Ï‚3Ï®×E$1Î&ަ‰÷™Þb°±a´íï÷EµÑÏušOZ¯§¯‰QÚÖ“w,Ù$8á]çÚU&›(Šb8¬º ÐÛÙøH³³I#`¼äœRãi´K’Y¤ÿ8"š^|#¬§§§Çx饗<Ÿ÷ÒK/áÊ•+ØÚÚŠ½#“(qêW;‘UnD䌙±c¼jî¦g†¢;l2Y#LS]7ÃÍM£|§#n´kžÄu`Ïbh6…Ó ©¿t‰r#3Æ Kö O5eËP'Ò¤ s†Y&³ Çi# õî÷Yt0•KÉ"0_I_UM?¾ëp8tL ¨Õj–ô`züüü<«¢(íó_Eëõ—qV5 €€ùï ³LTU5R‚§¡"7Øu0Y&HSÀjpôzbþìl9ƒhC¨\æHUÖº©€˜êй”ƒÉ2–^Óæ’­¹ 9pÛp7C·ÞÀ¹˜ø¦ …Ùü%3¡Û[/šû¤½þTâíáß`ѹ—&¥”ËeK_ø¨M†&­Xv ŽBN£WÌT«Ë¯ùKh,,sA˜ó³)¸ï“ª4 Þ¥¡uöåT\Æ_‡µR©`<ãÔgÊ+={hv¨o­¡xémá_(ƒ-1éÇÒ±í “u,ÑUUK#P”dG0Ì2ùàÓOÏ p§#Da²Ž¦ibSÆ4W¯Õ²>G’¸l‚ F ‡µR© Ûíb<;>g<£Ûí¢V«¥"êJ»òº¨?¸®fI‡Ñ˜aRʬ†uº3wÔ0ä K:™øÃœg*IœòÎd]×QøÂædšR$&ëèº."¬&‡•7c˜¨ksíÚ5ìììàêÕ«ØÚÚ²8¥·o߯ñññìyià÷~ï4mšÍ´ps&åXÆ6Á2݆arÃ,ÂJ©Á¦¦4Ô©—a²ˆ¦ixÎÖu00Æv0LÖ™•tLgy) 7d¢Èa­T*¸qãÑívçßÚÚÂööv*š-À¥KßÀOüÄçqxáʨƒS™L`Ž>™z/1L.PÍ[ñ¶è*Íãc˜¬¢;ä²;$0Lf™m8š¢«œ=ÀD%à ˆ.À×®]Ãîî.†Ãáìï•J%uu«ðÔS£p/Ð!,±³Êdް2yC×õYó<¨êÌèÑuñ+§3YF· ÇÖuá°²\3y@uÈýå¬&¾5¬v …jµÚì'Îj¹\†Mn÷C°1ʆa2€¢("Ì45z\ð1Lf±ô¦Ð¥M2L–±k2•ù1Læ™Õ¯šPUÖÝLtB;¬Y \.CªVÉí^èu«=œªÀd„Éd",œiX•‡^3yCÓ4±ñ¨i–<²~Ÿçð1ÙFUUkC1cžåšÉ š¦á‰¯~Õ²©Î0qȥà À2ŸÒ“D0ïú0YÃtà]K&oXf°N‡ô©ªØ£á:(&Ë躎ÇbÓqJ¯ÇrÍäMÓÐøêWgóöÊååÏÆfòE¾V¿¢>šAÏ‹™¬aŠ:iÏ`eòÇ,U.Ïvd…Ó&™ì£iÖþüÏg6 GŸ˜¼¡ë:žøüçy7IŒü:¬~E}€>D*0Ãd “|<یɺ®Ï¥L"ÐÊi“LÖÑ4 ?øïÌŒùÍÍU¯ˆa’åËôG¢ÇÍT³Œ3qɯÃêWÝ­B8«ÜY•É"¦ ‚j•£NL¾PUu®aG– “ ÷ï°4Àf˜Ü°vçŽ%3†›B2q <Ö&wð ‚É2¦~œqÃ\67£#®óc²ÏøÕWñWï{aȳ1ÏäÇ› ¶$±ÂÄ'ŸV¯éĵ« “UL2Îõ«LÞPEŒýhµU…¦ ƒ‡U&\úÆ7ð=Ï=]çúU&躎€6×~3)"®|+ŠhÌ3)™´[‡+ :ßü—Ül‰I%qä[Q„L³\3i%®þn·¹;0“,©­aÝÛÛC©T‚ªªxõÕW1ÑívÿÑ>¸É“:âÊw§#v.&­Ä’q]‡ú¨åsoãfKL*‰#ßý>o62é&Ž|S*0—q0I’J‡u4áôôÛÛÛ€B¡€­­-œœœ¬zi ›$äûä„wç™ô[Æ-}ŸU&•Ä•oYæÈ“^âÊw±(lNf’$•ëùù9 R©ÌþV©T0V½´…p||»êŠB¡€'Ÿ|O>ù䪗šo}ë[¸|ùòª—Šû÷ïãÒ¥Kxì±ÇVrü(ò øÀðÞ÷¾7“²’E9„¬Ü¿?“:å]ïz×ÊŽÏ:<;dñÚ|ê©§pïÞ=¼óï\Éñ£È7ëïå“uýýÔSO­äø¬¿³C¯M²ÁÃêïT:¬æ4;n­µ×××±¾¾¾ê¥G¢V«­z rí«"Š|À'?ùÉU/=2,'ËeÕç›uxvÈâÚW½æ(òÍú› ʪÏ7ëïì嵇%•5¬kkk¬i £Ñ(Ò¬J†I,ßLÞagò Ë7“gX¾™4’J‡µT*¡V«YŠåEÁæææª—Æ0±aùfòË8“gX¾™<Ãòͤ‘Ç&“ÉdÕ‹pb8bgg¥RiVä}ppàZßÇ0Y‚å›É;,ãLžaùfò Ë7“6Rë°¢ð›ZM_¤<=ç[y=î÷Ú$?C’ë^æÚ™Å²HùòxRŸ!éµ±|ç‡82žWù^ÖÚ™ÅÂú›å;ï°þ^Ýڗ„‰Ío¼1yþùç'ëëë“õõõÉóÏ??yã7fŸŸŸO>þñÏùå—-¯÷zÜïµIòÊ+¯L^|ñÅDÖ½ìµ3‹c‘òäñ¤°Ëwܵ±|ç‡82žWù^æÚ™ÅÁú{õkg ëï‹!ßaM€Ôj5¨ªŠW_}µZ ²,ÏßÛÛC©Tš=>Ñív=î÷Ú¤8==Åáá¡åoq̵ֽ3‹e‘òäñ$p’ï¸kcùÎqd<¯ò½¬µ3‹…õ7ËwÞaý}Aä{ÕsÖyíµ×&ëëë“Ìþv~~>Y__Ÿ¼ñÆ–ÿŸþô§'Ï?ÿüÜsíû½6)KÔµ-ãÚd–C÷“ƒ¬Ê7ëð|Àú{ukg–ëïÕ¬}°ÃšãñÝnW®\A­Võk×fwãáÇžß¿ßóµqF8<<œ­Õþy¢®;ÈãL¶X„|/ZN¼ä;îÚ}m2Ë'ŠŒûÉAVå›ux¾`ý½Üµ3ˇõ÷òÖ¾*ØaM€ápˆ ‡Cܸqò[bÇÛ)•Jž{ j.•J±×}||ŒB¡EQÐív1q~~Žn·‹ïÿþï¼î 3ÙaQò½h9ñ’ïÑh”êk“Y.QeÜO²*߬Ãóëïå¯Y.¬¿—»öUÁkȲ<Ë£· 奛Ãó£Ñh&0^û½6.—/_v½`ßýîwG^wÇ™ì°(ùòx¼ä;îÚX¾óETÏ«|/zíÌò`ýÍòwX/wí«‚Ö˜(Š‚ÑhI’pzzjùÄNF­VÃññ±å5›››¾û½6.µZ »»»³ŸJ¥‚µµ5ìîî¢Z­F^wÇ™l°Hùòx¼ä»T*¥úÚd–GÏ«|/zíÌr`ýÍòwX_ù~l2™LV½ˆ,Óívç'€ªªŒt…R©„ñxŒB¡`éjæõ¸ßk“þ,Ãá±×äq&ý,Z¾ƒ<žäg1Ëwܵ±|烸2žWù^æÚ™ÅÀú;kgëïÕ¯}Y°Ãº$Æã1†Ã!ç¼y¯Çý^›Öu¯zíÌòȲœdõÚd–K9Ȫ|¯zíÌrȲŒdyíÌò`ý}ùf‡•a†a†a†I%\ÃÊ0 Ã0 Ã0 ävX†a†a†a˜TÂ+Ã0 Ã0 Ã0 “JØae†a†a†aR ;¬ Ã0 Ã0 Ã0L*a‡•a†a†a†I%ì°2‰rzzŠñx¼êe0ÌÂ`gò Ë7“gX¾™<“gùf‡•I”Ùb†É#,ãLžaùfò Ë7“gò,ßì°2 Ã0 Ã0 Ã0©„Ö%B¡úÑh„ããc(Š2{l8âððpa;#tìñx EQ (ÊRÒF£Q®S+«’ñUÉ7À2~‘`ÎäÖßLžaýmÞºê\$vvv°µµEQP©TpzzŠZ­†R©„ÓÓS t»]loocww7ñcïîîâúõë¨T*³‹òÆ(•J ù¼Ãá;;;$ µZmáç—Y=«’ñUÈ7À2~Ñ`ÎäÖßLžaým8ºdNOOqóæM`ww§§§€[·náÆ$ia;<ׯ_ÇÁÁpóæM¼ãïÀñññBŽe¾P®]»¶°óɤUÉø2å`¿¨°gò ëo&ϰþÎ.a]2›››( €J¥$iöøÚÚšçÅB© nlmm¹îÖlmmÍŽY(°¶¶¶ã ‡C\¿~—/_ÎÍ…Â'ŽŒgA¾–ñ‹Ìªä›gÎ,ÖßLža<»p„•Iœn·‹w¼ã¸}ûv.òæÆË8“gX¾™<ÃòÍ䙼Ê7GX3F©TJ<·>éãlmmawwýèGÑívsµÃÃ,–,È7À2ÎDcYò÷X,ßLX3y' 2žWùf‡5c ‡Ct»]×Çwwwg)«:Ž$I( ¸vídYÎMÁ7³x² ßË8eÉwÜc±|3Q`ýÍä,Èx^å›ÖŒQ*•ð /x>ž–ãÐE²··‡[·n-ÿd1™#Kò °Œ3áX–|'u,–o& ¬¿™¼“%Ï›|³Ãš1 …ÂRvJ’:εk×påÊt»Ý¥¥Â1Ù%kò °Œ3ÁY–|'y,–o&(¬¿™¼“5Ï“|?6™L&«^Ã0 Ã0 Ã0 ÃØá.Á Ã0 Ã0 Ã0L*a‡•a†a†a†I%ì°2 Ã0 Ã0 Ã0©„V†a†a†a&•°ÃÊ0 Ã0 Ã0 ävX†a†a†a˜TÂ+Ã0 Ã0 Ã0 “Jþ?wcÒóù#ól%tEXtdate:create2019-03-28T17:36:48-05:00òÉ6ô%tEXtdate:modify2019-03-28T17:36:48-05:00ƒ”ŽH-tEXticc:copyrightCopyright Artifex Software 2011ºÅ´1tEXticc:descriptionArtifex Software sRGB ICC Profile †2tEXticc:manufacturerArtifex Software sRGB ICC Profile\~=Ÿ+tEXticc:modelArtifex Software sRGB ICC Profile1(‚¡!tEXtpdf:HiResBoundingBox1080x792+0+0_Ýx+tEXtpdf:VersionPDF-1.4 G:xIEND®B`‚blis-0.6.1/docs/graphs/large/l3_perf_epyc_jc2ic8jr4_nt64.pdf000066400000000000000000001057641360743507500235060ustar00rootroot00000000000000%PDF-1.4 %ª«¬­ 1 0 obj << /Producer (Apache FOP Version 2.3.0-SNAPSHOT: PDFDocumentGraphics2D) /CreationDate (D:20190328152855-05'00') >> endobj 2 0 obj << /Length 3 0 R /Filter /FlateDecode >> stream xœÌ½KÏ%É‘%¶¯_‘KÍ¢.Ãßî -¦!¡ÝÐb0+›-õT·¦Ø€0úõ:Ç,"ÜÌýV%¿,ƲѨ¼éá0?nnÇÿã»ðåÀÿ}Ïÿ´¿üá‡ïŽWˆEþvþ ý?¾«¯0ð±uù;þ,=—ö%äü%UyÿÃw!å?üwýC8BÀŸù6ýÓwÿçwÿ‚Þÿ„ß¿‘¿›ÿøÇ?~ùÇïþ¿¬Ç¿ÞB+_b*ÊQþôßåO­r&h”?üÓwñ‹±|‡ö§ïþËÅ?úoßÅ/ÿýýßhª¹ô”¾ü¿ß…ãËzÛ÷ïêÓøaNì‰^bj¯¯Í_é•Òq¡§‡:ÆÛ_w¿÷Ç»½f(+{ÿÀ꣇öìlõ{žÞ:ýկ×P_ÜÛ¿ÿG Aßÿ§ŸJ ?jΡãÑû«§–zÌ‚÷û ßÇð*xàËïøò_þ®ñ¿ûò_¿üþ?~÷~/Óýj_ñ8âkÄTs å#}Õ­¯§öã½ñù‚çfx?ÚÙÔ) Ï DèuÄ)[Ï®íÁ!”ö:Jï1Ö´ Á¶=8„Ð_zIéÍ*ضñ~¼úÍ%Öu¶éy ?^)¢ßZž=v¸ˆžëùDO†àEÏáyÑ“!xÑóCx^ôќ蹼½¯ŸMa ›1rl­<">%ϋ޶ãJàyV|ôPòÄr¼JíG ãÍ™¯NÂ/ëå8¸1¦#§Ÿé%þ¢^Z…vPpÖÖ~¦“ô‹:Á÷|áXo1Œñ3d׉()ý-À+)߯CþÛ—ïg‡f$ö_„#¯Üs Ì@Ü+—TRŠû’Vü§èH~óëßþý?üîWø×ÿ¸ ÉêM?»-W=ˆªq|Égh­Ö²/A¢ó}Ö¾ÿü§?þðÃÇõ¿ŸÄþÅcª)‹Œÿö2^µ¡)Öþ%Æöj¹`[ŽÌ›N/ØUXÑŒ%ÅW|¥ž–¡H^IÝë¨mØÎ¸lµ6:ÛÞ9Ø %åÕ¡pC‰ŒÒ†wr(Øþ_(-¥ŽÞ¤/ìì¬w¬âñj˜y ‰Å#¼‚}ñ…“iµP)Ò†¼°•Š6üè­ô’‹´áHB_Ò˜K‰CâðÊŒW^?oË1kwo9ï ¡¦×H­¡¿,mÎpÛ2‡>±xPL‡’K¶ÚÑYÏÒ_„*}.W(K$‰²\ºnæP"¾k(á…‹É2;Iãçé‚ñŽˆã¶á'‡R°²!÷W ¸ö¦¤mx'?OÅF _¼„†o-SÈáÅÙa:qBŽ S`÷’¥ÖdD™Aæ¶dw-IS]½HwuçjXÀ€qqù¯äPR­ÚÐO¼Q°­S•O@Qæ\ñ²KZ1£WÂ:´KpŠk8Ú+H!VHÚð2¬GÇd€Öõ•q̤*âÞ‰OÓÚV >±œ8Bs Ÿ‹¯ôÅø“´áü6(´áÛÔ– *ò¶N¿%rmPc®2lxKl<(Úý*â…­Sn‰ …ž ÷4š°súµú˜]®o|i¹*8çðT…PÈ·JöNi÷ž Ù“Z9JìK|D ¢¶ö* oÆa+müåhMúlŒe {çÚß|%À“•ÅLØ;ãX´áÚ&b(mxå-°!¾BÁeY0|tÈè%°"3pœÊþÀÇ . RÚ0sˆ²¬JÂÞ‰òJ|ç¬Á¿‰bÈ6¼cÁc<< /ø=w6:8àXÞYZkG‘ ’ ˆé–X̵ã8¬G‘±`óÔ[bÑÖ€!¶|šä¯BÍ8£?¤µ)ôÿç²ÌuÂv\`û-j7l9LeEmL3âÀmM¾€G혰½±àãÈ;js‹,@ØQ_nP^Ye‡Ú„NºÎiîˆ[|jÐF·n9†Á#¥¸ñ±‰–Üyn".Ð ¥Èf›HM<‚½âvÄÆÂT0ƒ²â6ßQBõÐÍjq€BÕh`…Ò‚ÛlÂêÄŽÁ¬¸Íc×@5aÁm.f‹=qEmªÄ‘;VÔŽGæ9]°7ûŠÚJb9ZÈ!ì¨Ý1ÊU‹»Â6G (1¤²®ÈMýÈùaCn(7X¾N¡X‘ ñ¬÷Må–ëD:a¹7è>äª( ›ÊÝÎêwÐÿâˆ+t¨‰ÐÌüNåΣ*rFzè>°ÒØ i„º©Üø\C४ƒ¼±b¸[½GÝTn dkwðˆB;ÃGW´òà-Uœhª:»Co8a Æì:7>C®9•R¦Î=¨AbU‡ñàÝ îA$Ñ{ÚÑ;w«ê½;%¨ÀE¯Nå.Ø LÐOêTîÁ´çT7ôÆçNPz¢ž#¼ñé¡2ˆ8oàÝ©jàĪŠÂ¼+ƒv‚°Ån D‡¾û®q£ëi‰C¯ˆ»K…zŒµ;ä ôØÍ½‰‘@u vca1¹Œ«+vS¡ÈV½_yìæqÐSãp6ì.bÍÃg}Ãn Ê6ÐM¦ç±»q(PQô²á±›ß(6òر›²$ιoØwoI1Ý[ò…±• »I]aœMnKjB@(l‡¬Æ_n£ü…ЙÐ9¨¹±¿´›!šàb(©ÄÜ ßJpÇÂ(©” ¸5¢„.v•m? Ü© %±)d¬ÀðB›7àær ‡IÝTnœÖØÈøæan| Ú ƒ¢‰×¹Ð‡üyéö:w¤Né´‡XàŽ´Öa›CÙÝtnî䆅Ým%œ]2ÞèÜPÇšÎÜkÝ™ GdzÜøp—žYNëæmc@»c½Ãm=\gòaÃm,C‚Y’ÞñÒ•†ä‘¨œÒ¡àJO¤Gä©t ¯u-nSº ®èÐf+‘+ÎTˆB\›ƒ(ÐìpMí&Œ0è%ÄiÝ<ÂiïÀ†]q[æE;UÓ ƒnï +ƒSµ­À-¼ˆGNpã⌦9.È-WRœoøÚrs(Kˆó}¬È-³‹È&¦*‡Ü2”4Ðu³•ðhÁS#/ÈÍ¡p‹ãþµiÝ”ó­KmCnŒ«7¨t]¤Ë!·Lƒ wY‘[àÕ¢c›r^i}niSº)”_cCm¾/Ws+js©&šWÐæja!+öú…ÚÒ=Ö2%YÀ(Üê…CBJ CÚÍ”'äŸÔ–4õÂy×÷#ÔØ8½6î‰=Ñ 4^8ó×ãî0%/œûÇãÝ^3”•½<î…sNP¿gü /œ¿Ä`ÀiR{Øï›3¿xFàŽö!Ÿ§ýnæ¼·ú#߬F³ÑŸsï1³©8¯3æQ`YlÏŸáü06¿?„Op~›ßÂ'8?ŒÕïÆày¿+ᇠohÇxPôØá"z®çO=‚=?„çEO†àEÏáyÑDs¢çFðM~7¸¡Dhè¼sÔÇün|'OùÝø^žò»q½<åwã:ù&¿›¯¸µ$¡d{¸ð•¯¸µÖ­åÏÿó9·–4‚w–Í–•vïœ1K™9Í ·Ù’vhÚiòA[Æ;­ÙO¥Út(Ù{¶ä€ë°Þ‹s¶fö ±LXÖ &ê\¬gKI¼xg2 Òæ<[*nÐ #– HÀ+ßfKLwp2þlèÛm¶Ìr«ÅÖ"7Ór›-1W #>¶ø åÚÉœfË|¼ŽAc´Ør[\[* “jÒ¶˜Ù ¶DÈb£ËM][ ´é¨Øðò@w®-xR‡;¹HW·®-x.@ÊP‰í‹k ®Ôô4“Åε%¿úÀ¼‚\Ãóp®-¸½·v™‡òp®-‡˜ø0g.V9œk ÎëBØÅ>_khÙB±Ü•ÃÚkè.@Å=Fž ÆÒÈâàËE12”à|[|Ä(6ÒBú¿ß‡&6rÍX9· !A\í‚xÒ´ŒíÚ…Ò,ÑZÚiíÀ6¦5ZÚðNeÑwеyˆ}¨$kj'œI\$ ½—0–8’ø½ðø¬‡^JZ|[®|ÜLl£$b(ä&mX•š8;•ì|[ðe6P»LÉη%³ Û_X*Hõkp'cÓ¨OLÇÈRÒ6çÛBó>˜ÎK!ˇ™§Å…lÍÈ"œ¢±þ.”˜ŸrmÙ»]þb+r“m!ªï‡îˆÇÈ)Å ¶±Œuà»`7l°M÷ŽJ/ݶiÞÂ<±äbûu°MHN Y~Ûc¼èšƒ¯šVÜ&_Ö;EO4zÇ–‘E0âŠÛbœ/8ÔÀá6íjR¢ÞX·Õo ”†Ûb£†¤÷žØÇ–^èWØ–3‡R:?AqFv®N»s¡‹3²‹ÖRN÷5Û\Œˆä}^a›<Äy?ËÛˆI“ªc±°Íqb $Òé+l“>†TÏC°-ž-˜[©A¿kS#;-Í7ûæQ»Xœ’S.mEm!G4Q6Ô·— Lz çÛÉìvœUuCíI”~Ÿ µ‡HÉ gá†ÚƒúIV¿ƒµyÅw©ml = ¡bëÐníF/9š²Ê†ÙPë‘°!l˜;ÂcãÄ ³0›Þˆ õ²ñJžÁª°A6=[OÀÚ7È”rHBí »Óµ =¤P6Èt\kx£â¤ƒl¶åȳe‡ìžy£J„ô ²ñ\Ŷ…âY7ÈÆç)Äë Þ(²ÛA| J‡yÌTÌpvâêxa6.r8…ãc6„³£»Âõ˜n¨ ­Ú ]sÆÛãUß<¼îB"À†P’ p *ôÑð¸±÷?wEKÜìuà-wnÁ!HN¸”jÜ*i«êbç€[üZ nÇcn²¸ègLÜ€Ävˆo~Ô ¸yLFE¾Wà&$ UªZ§En((ôdÆR ¹ÀÐÒqºˆ…aCn\Br [€`”eBn:¢Am>µK§nw|ƒ†Kn‡Ûây”QÙq›îÊeízx:Ü&ƒynÚ¶p¸µMMürjIGÌêåÊâ¶P}XÝÔ¦mó´MX4Œ6n¸ij¦vuìöÚ6£]*¥.nÚ¶(!oHú±‡cGq‘`ªçYÜ–=0°Ë¢\¡n“v„,G("iî(而¤ª±nub R;pË Þ"nliS¶Å!K×F +p‹$4©eCnŽ lºsrëJãS‡cCn¡;()”äÏ®Žo®w@‡Ü"æøBаêŠÜ\”#ñ¦ÑÚŠÜrñ§[wÐkM^ØQèsM½ÅpË÷áNps5ùU£ÚpŸ¯¤Ç~>[žq"©cÀ‡ R|ÒÓ²ÛúERòOÂb°ñA‚ô]ßOØ0Ïiü0'öH/ͤó×ãL%îÜ“ ½<Þí5CYÙûÇãé9AýžùÛ Rœ¡ôÅå;ü0A È¡»f¦'Û‡ÈØ§ Ò¹ï­þÈ7ëÑlôçv ™†¥ë9ó(°,,•ëùX*‚g©üžg©dž¥òCxž¥éu,•Áó©•ðO!HÙá"z®çO=‚=?„çEO†àEÏáyÑDs¢çFðM)ÔÞŒB.v¤¾“§RßËS©ëå)‚ÔuòAšqcÆ%+ÓDñ‚”Æ– ýñŸŸ"H+ !¸Óàš† bʯÄðË„)d£3³/ ÞijTwöÀk&cR‹½ ñâ¯ÑÚk¨ð๨NÖ59{ îIüz-«IÝÙ±ÁÒé®Û.ú­&ËÒÆ„õÃe‹·ÕJC¾S;ÔfÆ Ü;e˜Ùò£¸Š…Æb¨4„è½2H︩ WP‹%H[gT®„r‘­EÝÙ=òPÔ½ug?ÕñÆMTº«–å+#ƒÄŠëZ.!%¸ãæ*óZ-AJŸlæ.ÐXðÚH"–K¼³,Ì{ M–­ô‚OD­.mêÍ®–K¼2Öp$‰4ªÝÚkj¢5åž\·vö’É—Óµ[¬«7».ÅVD×}ý>ÃÒ£…1kø²š1 2°ç6\fÜîi¹â^‡lr{ÎX…¤ÑrÉ«d4è˜E•‹v;,=šók†ÎÈò7B.e6Ü!0‡>¬Á† JÒEe›5´g†tdÆ¿kÞy lHLêÁô²X@chOØ¡_qGØh ¹VÌå¢2[tÿŒu…ª£\Fã?½6ò³ñŽ¡Ë’¬©A)‰|«XüZ²¦vü8-UÅÀݲ5µ3Æ‚~Mtø–­­ "Ú`ðŸ,Y¶¶vL=2SECu+ÖÖH‰jJËþ½¡G ^72½bmíøBØ[4‹K[µ¶v¼²1v•ý0=º¡ö Õð 6Ô†~5jIÑס6¹­º ÿãQmX‚4²xìyÔ²(-¸¢v r«ÊêÁåQ;1D,eÅíH¿H¢ºxÜF>\Ììíqc†B—Ou¸&’[ã`UÛ‘¸R™(m¸ɺà$8zÙ€›g2Žý¢>v¸¹³Ëɋׅnöw‹´îÈÌP^”ôÀ6\M˜ÚdS·¥7&.Ðc¸ñ\ÂñÑ•ôÀÍ·ÆÛQ<ÛP¯ ÜtöS¯‘¸7C ùM»KŠÜ\EÈjÐ jfŸØü Cú¶÷'¬˜ç<~0S{¤:Ý\åüõ8Y9ʘéýãñn¯ÊÒÞ?çHÏ J¯×Ÿ¿…#-U‚à#­ÏñaŽ´Ô±hç`>Œ¿!ŽtîÁ{³?’[ûÚàïA3Ÿò,);\¨*×ó'PU2OUù!š_²lSæ—H:æh†1¥ÊëðÈ_1˜uö6f¦0‹%EY–Á|Œ“2mü íPö`HÔÝeÌ̃þä‘,-Û$›æeÌÌõ… = [ÆI7…I™–$gKÕ\²ƒiºð@DG%C¸åeñ°”i¤}ûVlLèÑF”B)¯¤Ü²¶5K™¶$¡FÍ(ˆ;¿±¾' D‹T¿›1Mt︢‹ p¤fSà hò²Á|ñ3s#ótâ_f‘t|÷É˜Š›FâÇÖ¦fãII´b¯i@ï(‡eLq?c,͵-[Æ”œ{ÃhÅâ=ø¥n‰$‹¹ÄBŒzXÆ4Ð2ZšFæ¼%62´§&fE—¶fJÅÕ¿Ö,†EÌËXßñ\¬­kôáÀž¸ï’\=‡ O5OZ—²š¨ U6ž ì<3kBŒlî—˜ïÅôÜ>Ë!➤®ïýëq—ˆkŽg¿í'œ">\ßWñcª‰WÓg]$³¶0CR>ê‡Ü1¯ooöæ |Åkç?¾7ÝŒäâ{ä+÷ƒ]®eî]ߟÂU—ÍSbÄgÕeó•XñluY½%ü>Á]ÂJ»$ɤõñ¨j1-/„®ïÏBÄ"„~Ÿ „2ˆEý >A伺1|“ÏD­ ÂÁ˜ü\áû¥—§|&–nžò™ðÝ<å3á{yÂg¢ING1WƯ¸Ldë0ñçç Ë‹ŬãE’hÐ|Ëx«¬g”@—x¯ vØBßÍvŠFǽá*bÓ´ÑhsÜ[|r©DŽv¼´Üt8s¡ÏJ“º[Ì`*iïÄ´b³øqÒ:ÅG³&\2©e)½ÆÆnÜ&†ÔÑ’âhÚÆ÷`]rO²8e<øZm´Ž,cGôÉá<'h,Ç'…Æ,¹º(„¸Õ½‘ÑF¯#“‘R Î×Bg!ÖyB¨ÓÏm´Þôº8²&ý“F¼–Ì{7ˆŽxV ÁºO4_'Ö6ë?/ >,ÂÒÌRé’WrÆ•×À\Ñb4 Œ;>]ãØ9Ý#è[£ó `4€,sÕFëBQÈýÓ,]t´˜b<¹ŠæÎ"mé.HÍb& L i”3Í#èë@à,/õµÉ:wšK/9fäyì‰fád]N9ÖlëÔ JBØoGŠŒÎò8t$ÅyRŒ—†¥ž}¼öd‰"§Ö–âOr ²„´×†¥=T7ƒ*ðëÐ¥©Æ‚Ž¬Û²~)ÚMoIN,ê ±Ñ°èÀÏêBÐk/^ÝÚè<*耣õȂ仿%9JÒFì´¤Ó¤õô–ä( Û+ÀG¿FwNä@ãJušÝzU0&ž;š[/hØK’™à¿l`]¼aý*˜@‘ù„OaÖ±BÜ?_%‘W´›Je5•d)=Ø ¡kcf–ÈS’yIczÝèd#¦k=ÀÓÌŠkU8àé[1¨ƒ‡Æ 1Àß’Ì.ëÁP$µÐ‚¾%™±AèŸJN€1qþ‡Ô˜V¢ß`È0èYöæ±.)˜'<É5GåŠó sÏ|ûŽó¬cŸkêõ„d‡óô²H#žœÂôlŒ´þ â€^bœðŠ&]õ@/Nø½ŸqCèéùÀ§°uú(1p±ÊËô’õ;°JÅyÖY 'K@¦ï Wô@/%ûXC÷‚ˆÃ±vÄ‹,zÒå‰ÎEZÞ!½µ<?-Œ éÕ=7zÛ^¨Í½#½x:6@a yƒzÉnÞ™£ùÜËꙦÜÌ’‡zy-£õÎ ë ^˜K Z±O¨¯‹!Ž ê£¦©À@O<²P/œ ðfäÔfo1õ¼!½3šœûŽôÛ[-ÒóI&žÅ†ÎÒ“ ¢§gç`«çñX”™\7¤‚ ÿ RÔ7¤§pdœÖ=ŸÇRsTÙ.,Ïñé…qì*ÄØ^ê`Væ.iCzaÊèYÎÑvç‰Á2«G%†lH/y÷Z½á}1PŽ êe@ó/»B½Ô×=Ä£$mP/±¤¤žLëÙ' +gõ(óXÏFàÀ!— ë%+/.ƒ±œGˆÅzÉSÝŠE7ÖæýOÇYäôÉ*ÈÞc›·ƒ5`w¼g“8Nªïg p‰êâ¶à=ö5=ÇZy‹÷ «Žê]µ¢}£—õzÊ´§Û+êqG{ì‚”Rkzž/hÉŸþc;ØCI¨i(®x°ç1‘´Ôð°,.tôÒã°g#ˆë¥^¢vq4Åôé釭siIéñNì+(;ç®sHO9`D~ƒó$@©˜êq¶à¼@ ù=Î7ÜrqŽfÅyh™kÞUz9ÎÎM¶ã<=1pLötì*½Y-¤®©6nœoL¾\Z}‹ó•7(|‰üæééS˜™»l0/é@ðW)œ£Üp>bÝFMopžH…˰֣yƒóA€â Ì'*L0v˜ot×ýT˜¼`ÄVO‰r0/Ã!ozŽÕÃ<Ý>XåùT<ÌW| f¿ºÜÁ>iûñ9ŽéÌð²§[~"ÌZ¥DÊ1«bˆVýÓCTï›Þ±®žóøÁÌí‰~bÒäØ:§ù«ˆ÷ü¨Ô =$?ö¹–×Ç»½f¨k;õY²ôÑùžßôúñ¦Û¯b¹ŒQ¨@=\P#½Á‚ñXmZðÂëeªrÿ@_Óº÷>œ;þ‘Ïvîò§÷¡OÌÿ³“¥’ENGP×׳m{n‰…Mj¯)i‚k{pL¶Ï¬:ж!ض‡<€–GÊM\ÛóB~L~Nø¤C/|¾çç…CX„Ï á„O†à…Ïáyá“!xáóCx'|_?¡Ä³ºG&áûP föTû_‘ÃõÐ×QÝâ‚ÿÓ”_ÔIêuöpNUžØ÷"Å[/Ü?Ç߆_4M/V¬ÃEf_¯Ÿâ¢Å÷à=о?lÁ›Xûò½aŒçPì¿£1¿PÅúŠX¸@åÑvV¼â?E‡ò›_ÿöïÿáw¿úÿþøÇmLÖ+îg× e&:âϰø"©ZSçÙ¦1ÿoúãsaùä)ï²s¸»1ÏzÃ}NŽ‚nò*fÆ*Gú««å°k^E’F_X¥­G B‘ë]gNàCÓ%f2^?ÔLÃ<ã¬En8Ó,ɨWõ¡yqo_p•S5%^ÚL^ÅÄÜGfØÁI"Ö®:^‰7èÖ.à’L¥WÐEb²Aõµ®m³î\jòÇÑÄ„)q4R±Fsˆ4\*q$tiÓÄŠâÊWÆžŽ®Æ-¹"‡[‡h,G­Ë‰¶(±¬5ðÎ!ízgœuçp?}QÃnMŒb®»b.R%W{Œë±dêÎñ¹Æ©kFS|i„J6}Lµ˜4³âQ1[¬2“æ]§MÌALPcrø%«f¡œ"ËãÝK†k9«ý:fM­8 öRb–ót¢±˜ÔЉû Ñì¤ýSz.I .øš6¦V„`½2“E'a¯c5¥çR¦5m~›jR+&É ©°e€Õ”žKmì’d“mͤVĤ^ô_*j΋ͤVDO/†EÍœ›)=Ç6Ú’Î+v“Z1IÌPÏ]ÓÆÆnJϱ¿Pqìiaì&»"IæåOX9ynÌÒs<¢™‹ýú6Ã$WL‰©÷™r]l*¼ì•fŽv,x9wVNLNräú‡RjB¯¥Ã$WäsX‹kê‰Ñ™—Èl-¦24“[1 pÌÇh³¹E–2 ÄéZ54“\118kîÈÄyW±ÄÄ,ç$£Ôlš¢I®˜p¯½ëÑ$WLL{¤ë«§$e̵X"ß9éj‰ñé¾E6±Ji¿€Š«[o‘%øÑ%lÕ«˜(—ù0û‡4È2Ǻ³È‰@Û Ýø¨äuÎ=â¡;3f óÐôº#J;nÛ‘›Q¡í=rç8˜ #ž8ä›m¤öÏç;KöÑto‹ÜY+e\f‘›å(>À†Ü¬2{`ÞJƒCn¶ÙÃß"w–̼äó·È-•kçÆ!·¼“U©5fÜ!7«è6àîÕŸEîÒó9ƒÜ 0òV6 rg2PdÎÃÇ"7—¥åJrs˜E¸[Ù¬¹©K‘µ9• ‡Ü&=mÎ¥¶È…׿\‡Üù` ä-é¹óÁÄñ-Ÿâì‘{´×!u„(ôÈùt3L‹Ü¬sŒ×¥s5rgÚ9Õ³ÝäWÅØYdólè^ÇÔ=z+ÂzÐî?­o g|Ë–Gm*RL®ØåQ;Ó¿4Ôwú6±Êã™:Àƒ¶hî«é£éÜtjË«^£¿pÕc5‰pœjÕnÕÜL&§ÔÕ‡QÖ,T)¦æ±(ÙŒ“ŸÓuPÍüëmîxÕGVæ[ÿsPÍ !@›’-c1z£S²;Þ©®<¦#ÿþ$'W˜fL~-·¾ììNfx5 ÓÄn¨ê×Qèl:ì2Íyq0Íìðéþö¥]T®=éõk:à³^ÈïPkq´pMN¿k_A½~-Àó{z”ÎLƒpMN¿–Ì2VýšC‰íŽèXP'vÈJ¨®ú5k(Hù”\6”¦g@×¼~=˜Hý¸Nz§_7§«l(ml¯_÷8ý”æPÒTÓ6”>¨­èÄë×tBfõ*½˜/(Ý"ŸÎ3Ï£t—dZ×…Þë׬¦n¶qGéÀ0ŒÈ»Ì…Ò¸ãç RûIo‡ÒؘåZ•¥Y5+Þò·èØ‘ Ãy©òpMEz^̸&1_/õÎÃ5o\ó–³Àu`U@tÛÑšw¸{1¸æ!î­`q›#1 ‹Ãmªító<•ÂUÇŽI½ŽÍ ÝÑY€;¾XÁãB§cw{Ny›©ÄÈ÷Ÿ*žnL!bÀç%ÛëØœÏœž×±€L$3ïÀ]_ÝÜJœŽÍëdÕ+øÞôVoS¥tàèr4mT^Çf¥£:ë_V§™ÜëØtíiaêѼ;7çy»óàűj»W±Ëë¬/V6ðNuÓ¬y3Öi$rš5ÝÎÓ-óy‹¿4¢U³¦ÃˆQGf“TŸ·,‡Ù‰‰´æª;Íš ÆÜ¢³¹Ç{›H½ûvšµäŒº5ÙŽ3·Û®Y'š`nDð˜ÍzÙÓç0›ïdj7fŠß4k:—N…ƒl~„0.+žƒl¾2N‹¢W¬¥ÖmÜô%È'_ÇNVà¡¿÷ .ö¼¡?”Ò„<9•䟔o©=oÞõþ%6./9·'úÁƒÆófþzܦDãysÿx¼Ûk†º¶ó×ãž7çÏoç ÷!“¦a+¦ö°ç “½½xLÐ:ó·äy3÷áÜñ|¶Í~ÐÃÇ̧N¡x`Vç×ó'8?ŒÍóÆ á3œÆæyã‡ð Îcó¼ñCxÞóÆ ¹©ÌþœðI‡^ø|ÏÏ ‡°ŸÂ'Ÿ Á ŸÂóÂ'CðÂç‡ðMž7¸ªD¦ïc\Àcž7¾“‡{@ “x^Ã_K 7‘Á+ÝôkI–¡ÊÉ ¥3‰aæyÏ=n#&³ L«›X]n'­í™æ¦;¸ »ÝšRh2³~-@—ëòJ¢Â™Ü{¼Í¾ä<¦_Kq^¦_ ÔÔ•YÖô¶a–$%O¯[MÁP૵™äj †&f`˜n¹JþÊÓ†YhöM§e-7ëÕRhjíw“sjaYàÙ$>-âóqS·¹[“{©´"ÜbЭO s,x²¹;—í^‹áJ´…8¬þ)—–µ3vÔMŠ.¨=ð‰ËDf‹Ú9YW+ÚÚ®OàP›†7ÌøZf‡Újy¥¹Z>Cm¡§Í¡6­Îør§ãP›? vÐæ¹cØ%Ú|c˜a´i4àÜàkA[ÜV¦)Þ¶˜Û§{“í,‰LâåËä@[}n›žmŽYJ*Àl­íŒï›;Ù¶z­Üœ›m’džzu -Î$½\d–my®Ü.NµÉñöñ¼Â6‰5ÈÓ8Ý·Åö<˜n³ zŸ‹·IÖvïEܴζ4o‘›RØ“$t‹+r烹믬ý¸Ù”§sn®6(k››¬¤c¶9³{µÇ¾n6NÜ´ƒèàÆ{ÜÜ·†buÀͽ§–îáìà¸épÜòí-ê€[ú3l€›È8í9àf›=²,pSJB9.%Ê·He ýuG•VNiúñnÓ•'ó•Åù³Dë“ãqj§á¹åº‘Ç©s=È”.ÐÍu†rX|§p÷ry.ÈMåñvóðÈMÂ.÷[!uÈ-ëílà‘›¬ÙŹÅKöö{X›ŽSvÈšåLzäf‹[™Z›Þb÷'÷ÈMFúæþvä¦Ò§R´ wÁ(o7ÝÌ7nòÕC7ãYS½ÕtÝx§ñÄöМ'އîDô»½·[f¿?¸Í|ê,Ðþ,À,Œ•ïùyÆŠCX+7„O`¬dž±òCxž±’!xÆÊáyºÔ ù§Ð¥Ò¡>ßóóÂÇ!,Âç†ð Â'CðÂç‡ð¼ðɼðù!|]š†Ôc–kÞct©ïä!ºÔwò]ê{yˆ.uZt†Ý;v„[` öž9--Y”wc½,‘-Y”9h')Ú²%L1ƒÉ*´lÍî¥Y:7×&ó¼UV,_Z" ÷^$F+†/-Œº¿#›Z±|)ÛòÍC¶jùÒBcQ¼xÖV-_J*¾ÎîªåK ÏϨ¯l–.ͬtɰ¢‡-dèRŸ ÓiÍšÝsuÏQ[ âLºð)ºtÁí:SM‡|·™u5Å36ÀÃ6k²—¶™˜B³6{È&ßÞî8pÙ,¤WoöÕA6ív3RÅA6_i¼5d/iifKD„‡Ùl›Ñ¦³%äg’˳Ųl€Òb6mÿFfó9‰g1["jŽ;pÃa¶ÐÓ–ê0›–Ï:c fÓv;f0ˆÃl‰Î€\œVQ‡ÙìkrÅÈ8Ì–@¯É%9Ìæs±ßƒŠÙUï˜W‡Ùl«ýŽïq˜ÍØQ#&³eÒÇ­î;Ìf›1Û:Ì& \ÒKç0[XÉÛ:Ì–q¼ø:[ fóþã„®è¸ÒnÞh3ð²Ns¯maþËíúã@›\µa!h“4;Õ£v£ŸÁ½#jg‰JškQ[xâÉÓ[ÔæcÙ>¶Dÿ›dµS`ÝrÚP¾¢9UëWZ›fÇ–òzg"p°MÍ̈¥‡mŽe: Émÿ|L^æA¶tCî~%RÚ¨>n׌¹™´þr Y;^aã jC™hñ <óÈM´wÆÜÌÛÍàÐSýµÈé½pÇõzä–R@SûuÈY™;½S¶I—™@Â7“,q è®óÀM‹ñE;¯À™ùûæˆ=pó,›~`¸Å›Èh͸¹ËÂâé›LìŒ÷÷Àe˜¾1·IMúØãvv±†·³ã‹jÒ•?*—Áö'ùÒ·ý?aɼ&òƒÝ#jÓùëqêr”1ÓûÇãÝ^3ÔÅ¿gLÏ)j¿÷oaLK•`ùÈ’§ñaÆ´=2r€É‚;Œé܉sÏ?SEùÜçÏïD3£òœi?=a må{~ž¶âÚÊ áh+‚§­üž§­dž¶òCxž35b>XÛõyÒT{ôâç»þ„ÚÃ"n ŸQ\@ÆàÐ᪠ȼú1|qJ“ù”JÊ÷sºôòuºôòwºtóyê{y„=-@.1zòæû5ö´Û`Óûñ¹`ÓθÇŸ^‘€\&Ñü)“íÜ·çž³åOYâüæA{n–?6{Ng åOÓ4ÙvÒ>“? ÖZÚ‹)çǶp'nìõpô©¹µu–¼/ÈÌÑ™ïTÜ|“>ehëL/ÙÛaùS?sV¼Ù(¦sŸÙœ»Dº^‡T'w{X÷n ù¥6ljÜÞM%?áHÕÖYopš›^µYá«i2}dŸ?hu‘Ûµo£&éÇp“iƒIío£fÖ¨IEwr§•þ踫Í`Ír§Ãr®ƒo.Jnæw®*z,îtX*t°öœ0zqFÄ Ú‘'wY2ùZûÁè²kZ_y¦±yîÔ¤ÒÉ”î“`ÞÖ+^"ƒ`¨áäN™×Ë<×,wš,m?òa£M«$œ>ßGΆ;UkáÉyš*'yÊh‚z™áF9êaÈSšÒm¹ÕTìc[Éã2bj ö‘0œ›q°¸à-±´rÍÐËÑfµ>]”Û AÓ ïmšƒen‰+×ü¬+xK,aÈHl7•úŸk"éÆ8,{§›mÙ³§ašï‹ NöÔÅÑ«ÎÀ€é€ú}ºÀvž„lãÐ)wòá¶»ZÛ2ï[RÒS–25?'/PèeznŸÅšÞ“ݘx³°Ýo´åª•ÃK£5ò0’uÆÜ„cXNuøÊ€n…L(¦€À^“ @c“”bªC0µ¡)ßK«öjsB¡1û¸T“ mÆ511°©Ò }Þ°«h3{h³ô*Vò°]†&‰Å*¦¤é†M=Ù@›Š®º¾Ö¾ Ñ2¬ø´Æ8BKº¡X[bÞnÙ!Ь‚åµÈYŸV2«–ìb)‡®„ïŒA£°¬,¼4©˜ß!ùÅD–…fÍw%4ò§,Kê̈ƅh5¡{!Ë´ªùñ †@£dnUYfcì7KÆ&iÆD–…üœño¸yY²•Ѩ“$G£e[I-Ü‘»hk3*k`²McŽoeÂà;áÊÀ”;—Em¥:“Ê2=Úc›âAKË%ˤNk¾k…¡Ñq®,Ö«íèÖf‹ÄCñÂ[Ši« (-ãŠKq¿k—£m¡\g%h:;Hu&b’¿ÓnŒÆL3ì)Äå $A£#]³`<­q8öëš±«o3%ìáâm…]46©Ñ¤BŒÆ2Êo5§Í‚s®Þaþ"Óv˜gD–GóÍ,-0Ï0bÓÁ<­ø³¾áóh„8›b1äCäϬ~ås² Ê“=n—åÉÙ;iºGyúÅ;ã·yÉ8ŽäÉÖOÎÁƒ<{¾ƒ‚<È+±kö¿y•ýÎî0^Hß™cØc<_zL]Ýc¼Æ=Í2ˆã%å^½"Ò=Ä +Xðž kËý5=Þ3´Òáõx/çèt4óxÏÔÏÃèç'ÞÓqòv]YðžïK·ÃÂ÷4 L_¿ï‡?˜=ÞsQ®ëðžß±µ[‹ôxF“`ÁûÌÌÍwäç‚÷Ã&‘~øvÁàSHÅ`º|úbL¿Ø𥌯|JäÌ˽>ÕsE÷€/Àfn|ðéše' øŒ©«kãøº™b`|:n™Ë’|VW=Œ îŸáêñfÀÀç3×ÓðSÏø˜&3ÄŸ.·“x¤†„ç|q‰‚ÖË*uØÎü“–u-AæŒVýÓC|ï›Þ1³žóøÁÌí‰~bÒŒº:§ù«ÎИ‡ª’R÷\ËëÇãÝ^3Ôµ½ôï`´Gç{~ÓóÇ»n¿j 50-^¥"ªcb=ÜvCù/¥x.¸·ãhÉ-”tõ8Ã{ï¹ßùhçzÚùh*Y©Kÿ,¼T&Ç¿â-ÖžmÛƒCÀ>¸íëlÛƒ [Ãëè8§kÏë\ÛƒC`Ò'tÜÊ.®íy!?¦ŸÍsÂ'záó=?/|2/|~Ï Ÿ Š>7„O>‚>?„wÂ÷õó‰šní‘c§¶WqKK¸Ÿ¶Íûv"×u­šb(Öê¯È°ú^Z¡ó=NÞþŽ˜üiêóc½@˧ Båÿ¹¹¤_Ö ãcqi(¸þÜ\²ëåW¿f&b\r×ßÿ£éó{&²äÿbûò½aŽçPì¿ á.Ž[ù6ñéÅ€BÝGRñŸ¢Cùͯû÷ÿð»_ýá_üã6&Ö“‹ç˜~v çhnè$%¢\Oè©°{¬¹ú°øÓŸ Ù Í”«ª´áÒ•pÕçYÐM¶ªôRNQ³]°Ð˾¤Ï£´¨‘‡¡ãõ,¿TYû+Ò«X­¨#ˆI ŠÎ¦È‹tÒUSÄÐ,lðƒ´iéÂ0YØ ³¦L»R+mš†¥„Ð–è’}Ô&dG š†í<ÁÛ+TL_Šm´­p(¹„/,|Óñvü[m›« c0.ÆEìP1š,l…iðRÌÇ{}L¦`ÛFë55m2YØŠ“ÔŽòʤYØ0jŽDb$ä‡m tãHÖ–m´ªÔµ­°@Ϲb¤:ðñ²Ü½cÖ4l¬ -9¸HµV¥­˜4l¬¹õS±S¯ªH¶2|»”e5 C×Yv¨µ£­Sb5õªØV?CX¡XM6>7rø¢² Õ¬b™#ÈD±@[3iØ C­ã8“ºÆf²°q|r!“îš)XUÈlõB»ª´u“…m-å]ÿn VñG¨µFm2YؤP$_@f>fÁ*6á ás‡$M& †4 LlëòSOU<§TŒ"ébnGßB»*¤£»Æa’°Q‚0ËÈê˜ÒÆŠ±§¼Ò3Ð rÐL6%àóˆ)‡mRçð”W `ˆ=«á^7-M&ö0FÚÍvX+¸7Ó +Ù“¢IÂÆ…ÆÊ_œMŠ& Kbá»Õ˜…”gñÚqXc›þY>+ÆÞql‹'g-m]lL*°üQ”`lÑh„€Y@•y”:ö°^0¶¦cl°]ù#A;Q¢ƒmîRü=¿¾Á¶8 ôTƒŸ-l3#fÅviy‡mfÒÄ¡zÍßÁvÕЩLVh…mfíèy”¾¢v•ª¬¹Ó‚Úَ̈€E—Ù9Ô®À'Šuê'2Ô®„XHZ:wEm¶ebFŽjWæè/ÔÅCƒÚ\0©$á^jsÁ$½½žµ«d³ ‡¼Áv¥í³§24JÆÁv•ÔŠ½T,ã Û¼Ã$ ¥äºÁ63®FÌ€VØæs{ç8áÍÂ6ËÌ`¡+³-®° ÁúԕVØ®g)A¬i^a»Jl¨%‡¶™§ß h¦“¶°]#30­ da›Œ9áÜ–¶À¸2qIq¸ÍÁù‡5ÑÁ[ܮIJ€=BÒaÁíÊ8·ŽO¤÷Ü•åt;&¬˜nÚϰ“2ä°®ÀÍî2Ë<ŽVà®w£¡£´¸!Ť΀ '¨à®Ø.<$0¯´weHx>ƒ¸™`·•GqY›» Þ%[ns:*Yh‡ÛœÁȇÞ6Ü®RÂ÷¨UoÝ·Ù‰Xé¾â6§ GHê+nsæ¸Ëô8³¸Mß[ìÈÊü9'ró¯°+4³=„sÁ+x3Ô”¬x Ä»¥„zðF[ÇÞ Øbx7Òœh-cï2˜M•ËY7ðfðlLÌh½éÜ•ž€¸€ÊI硽c82ä»+ —N²nt‡Ýdâð–\vì®,ßÑn5×a7S»8zN%Þa7k¡¢¿|Ä´aw oÅ»úªq3é2äŸà„|‹ÝÌÐKŠŠë»%´›2Y¶`÷!¥½awÅ^Æñ)¼ÙŠÝ…–—Â+iÙ°›ãøø1×»¡ý‡$nÌcÃnªÿØU¿¹Çîƒî?X°0‘zmî¸Ä¹±Á!l§Û“Gn¦™µ=r3\ Ÿ &Q¢ä¦óA„° ¹qgÆ!W®û•Cn ^ðºJ‰Ó¸N¼ø6ä ©~ì‘›9˜ôãTWäÆPÊ ;F.r'^õ2”õ16ä>X]Y*܇ ¹bMïl¸é±šùl37&—é_Úò†ÛRt¶_×tÛ<ú¡bCøvÜÙdÃ^›Y·¢žï;p‹³?ëÜäu!åºáÜš@–Ê9§o÷Á:ÏÊçØp[ŠWú­ä ·qøáh¸T÷'}xn«-’v¥›™ Bgín«DÃÎ(ï”î™õÅö$ݧ—¼Ávf¥šq0ôrƒmÆh êαmÀ-ÉÒI·ÍTÂçJƒsÞXà¦qCîp3 ö#Žùº7Xh¥Cïã¸#­î¸uÝ?¸%ýV¡ E߀›H† ·]çf‰.¨‚Q=1tC¹û½<Þí5C]Ûû×ó®8çÏo+Nd–Hì”Ú³®8‰¡^<,èÁ÷7äŠ3wáÜï|´ÍnÐåÇ̧ΠÏÂËê ázþoˆ±¹âø!|‚7ÄØ\qÜ>Ãbl®8~Ï»âX!7UŸ>éÐ Ÿïùyá“!xáóCx^øQ¼ð¹!|‚ðɼðù!|“+î+Q8$\@sÅq<æŠã{yÊÇ÷ò”+Žïå›\q¾âè‚›Mxá>;®Áqtù§]˜À9ºà+£TÿŽÅÑ¥µ|yÌ’\cXrñ¾ªŒ%Џ–v¹„2­ü-³Òì‚›w=Ä"Ó,rû ô„Y1¢\ýõ3]p§.¸Wâî+c‰ÎÑ…ƒ¦-X”d©:] ¡´[t}Ìû¹hêe2å0Ìýx1”¶"1W§C /…¸ÜÅ¡m]B®Î㑉Hy1Ó3CHR¥ÀQ>/½6|Y1ËxUHGT¢•™,ŒŸ id|&ÖF½9? ñ2ûJÛo@l$1Ž˜õ¹.ÁVê7ÀPzàÒɶjê Ò'Õ£¢¹šrƒEvûÌ!³(âí6PxÅÆL ˆÉÍù¹ˆŒ•V«tלŸKgÎy’w6õsa¹Ý™ÔÝ­Ÿ }Yj8NÛYîÎÏ¥0·;dOŽÉÜŸ ÍK5Õ&:kÖÏ…>0±×¬_t,~.´lª LÎÍ…†bÆs‰ñ¼«› Ä1¡ÇËáÜ\¢I [çæ’^Êʼn1¢ëæ_šVì‰%87zÝ„o ¦Ö xÎ\q]h‹¦Ö ÛÂÍ=—è¼\HÆãë5'DSj®,‰&¦&‰DJrn.ÕI[É’µ¡¤YjÞ*œZTg q¹¥•acX°$v[ó•7Sÿ]½l³\uãäœ853Ó¿´-N.™Ž?JýS@ïP*eDÝÒæœ\ØîwSiPÆÂŒ©Â<‹²ÀœÛ÷Àãø íæC^. fSéö³2€ì#«ùØc6ý@>³R—³I¶aŽ=„²a6CP+T%Üf“îÁ²—kËçåR2SV’P;ÌW¬ÛÑ…»t˜M£4ôÌVÕhA›MôÊj­s -öjÆF… ÚÂX`v,umÚÇ{a=Naêhs˜Ì´p ŸmqÕ ï°‚6]RèÖŠ8ã9Ц=5L÷ÚÂÍT ´õøxJž9ÐæJ)ëµ9lßzeµe¥ñÒ:ʆÚü²¦ä€Cmz‘à*lÀQÔ&sWUœeEmª {(žG€Em©`œR9èî¼\(€…CŒç´5S) Ò8Ô¦Á7•ÂWØæ0¡Mf|í¶Âv•Ì81K*+lWÖ.ÏQ”€I‡ÛÐ^x/î&=,¸ÍÇÔ‘X¤Ëáv•ØÔqm‡ÛÂpOöÛá6Wóˆü?…/ Üâw2ýþpÓG„¥’F ps,%ÞŠ™n ôëL7Và¦K NjÜÐõ,°È-+¦áÏaEnÒž#\t¯îÊôe§;­n¾rè7+p“]†tÆëè±ÀÍM‡=X2[.»‘Q÷¸Í¦¦iü0[º€7ô9Zéó‚7‰Æ;+åæÁ»fê6—÷’oê/¬†ÞÄÙÃ7uÄzÓà x‹à\Þ¼YqåôþÛÁ›©¢°èY¨:Þdu¡DZA74x|€˜„òÐ N£ÅÖtKàü‘kþiî‘¢Ê×óÝÖ‚“wóÈM'±"\؆ÜLÔ šqÚ[ê 7UwäÆyÆ"ê°¶ ·$è%¿¼#73…‰²¹©{Šºt>ç{ˆOS¶Ô#7SRå[óÈ-Õºé;WûDnQö°Ý‹^…r31î5èQæ[RpÜѺ)?êvèN<ÓlÐMg(hð)é…ÀA73É·*òÝtÐo(õóÐw•"V7èf±…&¾D«ÊM})9(ÂݼBA(ùÒ ºÉ´j¯´A7"$AFlt“*†vYÎa:è,¦q(ùì» ŠWUð-psÞ€šª@šVº4¦6ôtq°Mö„“ ¶²À4êLäq›ËEïE/ÛE’KEÒØp›ž‹‚%ruô¸Íç$ž\í/Ü>}.¾)q”²Ó¸ËBájÓ埔Ë`ë“lé»ÞŸ0d^óøÁÌí‘~šeKç¯ÇiË– [zÿx¼Ûk†º¶÷¯çÙÒsŠç7Í¿€-Í’ž8·*Q\²¥8gp¼µL§³O³¥sÎýþÈGëÑìöw¡™O"ñ,¼,„•ïùyÂJ†à +?„ç +OX¹!|a%Cð„•Âól©òOaK¥C/|¾çç…O†à…ÏáyáDñÂç†ð Â'CðÂç‡ðMlibM-ܤ¢‘{†-u<Æ–ú^žbK}/O±¥¾—'ØÒœY“Œ·–¯°¥¬!gÙÒÿù)¶”µwmA†i¬d¤K|)šZÖhn1]^ùýäj¤´`6k¼aÆÞ¤ÐÐYš.§ñ†ñ©=m+³¶ Ø`ã‘“Öü{ìŒÖK³vÆŠTÚEN ˆÈ”Z ½4Y²tDf ˆ –¶>“TP«¼´k¾afÜM™QÚʬ.ˆw3$7Ó„ m}Vdö‚Â'r'[Ê à•ŸL¬äŒžºÓS)¹ù.i³ticì£|"y®…;9% ß8¯Þ4ÄM¶´2È—ŽûBgãìšKeD^ +-¯[¶”m gžvµ[º´EÖÎh'5„]?óR²-ãp8-.uXº”Nç…z²\Ûë0ñ(õ¢ÁÙ2½!t)Ó:hÙz h4‹\ËD1ævF¥´ÃÒ¥x®hâ…(m}f¤,Ì#YÏä‚h Ž.mdÎËéÂÜ‚åK‹äѾ¢`í"—Ä^æG¸‚ø[´„i1ƒf[¹“QÒK#5|¦ª-–/%£ugal4‹ÜòZ˜¸\„tK–/-Ìî‘ÆI´dùRa3OgË–/•&Ô+rË–0eÔ;cê[¶„)Í"yˆÀ¶b Snf(UŸvüûI˜J^‰$¹¤É¦bÒ ÔóäT˘⹂ñªS~«–0¥uM²¹ Ha3´ÐÑ?\Q¢­YÂ4K.ÙËÝ£5K˜fI–ÏøËÖ,aJïzòxJL‰ÂEÌúé?ñcºà¶¤¦aÝɶ§“J‘îa›‘M44%(Û…3ã×Vu°M“œ˜A lS.¡˜õäa›Î )ô±°MœÆÆW‚Ü¡6¹MÀH¶±Cmö¨E™"Úlƒzxr¢µÅÒŠŽ w¨½E:ØfMÝgðœƒm‰Þ8î”¶ÅÝ%ƒjX`›Vðx‡!9Ôæѹªk` Cm~ƒiùƒØ‚j mÐ¥NçXQ[ ëédÅœêP[(Q`FÚßpt)n]8O·‡Ú€p´7jK”uº³¤8ÔfpvÔ83Ã8Ô®š~÷:j O²4 GéP[R´„ÞÄòïP[8Ñ¢¦G^Q[(p¬1p¨¬¨Í9@zÊogQ›¤ažIxlCo|YMÀá6Ÿ+Çõ¹l3aÀ!ëÆ Û´7€/þ]^a[rà#ž0êP›)É!®“ËÎäžlCm鎅e“È«EmrÁ*ÀrnÙ«'ÞlÐ]%f¸D ¬uÐ-t/w|RY©Kt)†ˆC$ìÐÍÎ'{î¡›¤ØN×ÔvW– š3áÂn:¹·ÛYèÃ|i§­·w\œ‚üQù ¶?ɘ¾íÿ kæ5‘ììéhTÙÎ_“—£ŒÉ™Þ?ïöš¡.îýëyÎôœ¢ö{ýø&δT —Ç&J->Ë™â!8‘¤Âäß g:÷áÜñÏX>wùóûÐ̨|k*zâÊ÷ü¸’!xâÊáyÖÔˆ9”…Ï MµG/~¾ëO¨6 cðòçÇð åU¼º1|F½ƒ—@?†o¢NY0WÒ^XžâN}/‘§K7O±§K7OѧK7Oð§Ø%þý¼÷~…?í“=ý·Ÿ‹5Å>öì)Þ€wÉÉÇb`6l)ãJ‹~¹åzΞ< ¸×5ûõÜ,yJKîíZÝåºu5Ž›YkÚ–-yÊ¢Õ©œ%½˜" iˆ£^iÀqû¼Í9’n.öËC·ób{sQI¶µ*©´™ ¥» e½™L²]ù¹‘v#¼É¨¦n³=ŠWJCž²|6&Ô†Äõônªû‘ê< ËsÝ÷#½˜ö«\•'{ÊÜ]š#•mc–ö#(JÐb”@>²eOV¨Ã­ZW“U'{J‡|…½!´±¦øC¯jI‡)ëÇpYü±ô*zú8LU?%'#sÑÊ;ÃaƒM 9×C’Óga’§DHR×,bƒik…Ãò©Èõx°îàMž {Â’~ÚD]ä) Kîà&ñ#6Kž2XBYW¶%SÍ$(@}œ´=þkƒM£[®À·!Öü‹<%7™Rûˇ6dçbÔ‘g%? —-¸<¨ e0:cF›6&UÆÖ÷(¦ŽŸ;Ãóä¹’m¸)#Ã$hLºãטᦅ»Ø#ÄȆ›2”¹Œi’¶lãM™á ¡Ê¦µÙxÓÌ"ò§ý~9g¸i¦MíÊ)6ë÷IS•ª…ÚÔlRõÄRUmý0ìifÅrÌà”V´ñ¦X†~Ò§øKc‰ÏLôv›÷Æ8,}*q*W@3zµôid‰m¦f“é±Ìà¤O#Ùt|ä”o«‹ßSüé‚ÜPŽO.d‡nä¦AZqÏAweu[¿ktã>ø=ªòÀº‹ð$=U‘LÝÄêrÛk=t³ðƒ÷M+tó± Ñëu…n}L‚ã Ý´¶¥‰º…ËÅ “6tÓò½•Wè–*!G¸bÑtK.Á (vü£¶9ä® û»StzäfÞqˆÊ¡q‰¹©^âPKiô ¹Å#Kª¿Cn`£lJ·$õ %¨&¸wÇ×)º¿Ö£7 •x%ÙöðÍì¬3/ÏßéÅ Õ‡™òð-îL€0ŠCovט®èT¬-zº23ìè-„*ÝZwô–Ô6W¦–¼qŽ ó"¸€waéç«òÃÞƒQãt‘zÞ̲ƒ?½58ð†tá 9µ Ý úí·ÃœÃnIôTÐ:S»™¾.Z»Êc7ëá#§ónã°›ÙšÍQè±»¿N`vKõ±#† »¿ÞÕ7aÇnÆ;ã£kòª»=Þ°õƒ.‹g­ì£Ü¦:šhuïx±ò­—ývÏX6ËM£Î >ÒS–G5?g4ó†I¿>¡ã`¹Ôùóy2õšåÕsût*Ô»Fe@ÁÚõOÒ©™€Ž8_êG˜ÛÇkg›9wÿ#îÚðÏïH7§"9[ó•É÷É.—Ú¾ïÏ`µÊƪ.ƒø Z«l¼ªħðZecV—A|µjWFR– Š¡Fky1ô}‚j‚M/†~Ÿ †1‹ºA|†Ê 1ôƒø&~µjÒ±Œ>?WTÛ÷ò¿ºtó¿ºtó¿ºtó¿ÚX‡¨Ia±øz5[zõÏÏÕ¬=*Iàe~·/V7“¢ÕGe¥qMÀv&†l±$¹A²x—¥X z¥C2ÐhÍ=b´ÍíÌÈÖÆ–[]Ð0,ˆTbÓÆ,ùÅ×$qÜ!ù&‹6Z‹Ýò¡†v^º1ù„Æ 8Æ5m£1fÂñÄNӳΤ;£¥ ધ··a­>D¿  ¢YÒŒ) t–^« ×>¥ZhP‰×¡5Xb°M3Âa¹V:¡‡Ü%ÑhÉVìÝŽÕTãÚðÒt–òfºã @:¹4šPUQÃPõ[…àÈV.*ÝÜ›¾kÅTcR»‡ä.Éð¦VšhYÑ×X¼bhÑY´9ºU‚ òeIl÷2ú3´|*m+á eèlžÉçИ%ÝkÎ Ã[{½ŒbhlgÀ*><^©%u +—³q(…¨ïËxß%Å|_a*8-Æ…FG»ö—}hÈWÅñ®G_KM³õ¢1¿Â%ÅgÔêY5 mM’މK`goµ§ KZ-óÊ ‡ˆ/œ†Ž§ê•a˜Gª‡ÂBYëSˆ•>¾£„h7´±«‡ØýÎêg¡YòU죟Û,qM"X‹7žòF+Ë-Ĥtñµšf:C£å_ñÚÃåÛFGÀn¿tšò!ŠL’w‰±¼ö.‡‹FËÁ¶ÃsO;Ã:, Ë*I!Ž*F9ˆê!%›T5¥áe¡E#£I.A¦©/¶BÒFÇÃâIŒì¬Ô΢n–ˆe{.g\#Yfý’d²Øµ$}-vV½%™¶X&_V&çN]¤æòÇ î}´ÄõŽòU‡v”oL{“Ð ÊWf­1ivÓåñcœCw”Çkúà  %V=Ê×ÄœpWq¶å¹›qècUÓ†ò¸ä3èmGyFG ñîÒ¡<73d÷,ù¼À<ý…䣜‡™…yZþS‘\ˆqƒy¡Ï¦zìq^â(€7]½<ΫåóŽ(÷@¯¦bIÔ>V¤Ë4æxh¼‡z1µÖÎ$ƒê•ðm¹ô²!=S¨“(óP/D+ÙjïÔ‹õ½¨¥yƒzÑ4?nÞ ^èXÀTjcL¨f±H|{Ú ^iÇÐÏø]õ$1Œ€‡zv†¯Y=‰røÌÀyV_õP/•%¡%Œ|ž‚ë…ßiáâ:=ÖKÂç;ñ¼ƒzÉ É,Çõœz2'LnØG¨Ô+Í4ñ;Ô30ǪúÜ õŒ”döõí¡žOF{ Y¨gm;4IÆõ’5U¬òç b¡^> ÞuV‹tP/±Ø@l:ÇmPOŽå`Þ¹º=ŸcBÕ¦Á­è¥|;öÐŒåè9(ãLK잯Ç]½Ö½P>ú]ÇôB$ÏÌä'GD èù ãz²²ê‚õ€Ñ0»ò;¬]*wä륮‡DÆ7XÏIJ©*Í‚õÊÛŸÛ+ÖÓOóò\¡žŽIQìPÏk‚ºÃ¼zVoS}ð ÔG\»B=žwÌ3*ÍC= Á0/êIí Wx¤7PŸXò'Ljo >ñ:žR8AÐA½ÀÝuÄC=Ñ*Ý™_c‡zS¦az&ÁÆÃÌ;Ö'I3sæ+X¡ž'>ä«åé™AZdÔš÷+Ò3MtèglâŠô¬WI¶ì„Ez¦ µÇÇŽôšw¤¥|ª—é Ùq|–#îHÏ'q;I—ú]·æqšzAz©îSÃYNfEz|SàÝ#==ðÔ%­ê)(ø_¬;ÒI~vùx,HáÊÚu¶8¨ç%£Þ1¤ Ôû’Ú Ôó"qnûêéˆÊͪQë Ô‹raÌù Ô“ùÖ+ø¨’û·°±C½T¿+Á/`/E}Õm{º£Æ˜…'ßÀ¾ˆ‹ñ­ ,`?õzŸ¢ì™çw¤Ž:+Ø××™aàÖêEê8ÎÕ{çãl/ôr|uÖ­ÃõÒ2¯Ð¬ÙZõO1½ozÄÄzÎã3·'ú‰ISìꜮ_]Rê?Jµ&13]k©?>¡Ûk†º¶óWxšß=§x~ÓëÇ›n¿jŠ Ô0Ëò@¹§e‰=¸ÇÔúWgwéÌ,L• ‰>ÐÕÊîþê×á «°ã0øý?þEsl8ÁX;ÐÒV˜ö’xÓ©â®=CgBÏ]{þÂÿýðåýò/øÿoÈZø-H01çÁ9qæi$°óÑ ·£ã˜âıç„-_מ]ÛƒC ä2 F*q‚m{è;­& ŠJa‚k{ò¬IL…©!TëlÛóB~(ÅIž>v¸Ÿëù„O†à…ÏáyáDñÂç†ð § æ„Ïa>9M$ãà_zšP÷ÆÕ‰¬8ùq†€ð¶¶Ÿ%ßÎ*û>Y¤^î;}9;)¿¨“é×ãàáüWd®]/¼;ЄŠKOø¹N~ÑT²˜¾X#.¿£{‚ÿ)­â{Þ­ø¿Ø L {Åþ‹pàfž{fÝÌ|†«mQ›ªÿÊo~ýÛ¿ÿ‡ßýêÿúã·1YÙüŠ–3^•¦N?=&£Ý£¾qžsõkÿ¿?=X¿64SN‹®ôM‹5ЧI79â:˪Æ;R&tÍwàùE!æ® I¶ÎúP߬¥pyä“õçI3*nØWKÀšV±Â$$¾aÐFOršÅÙ: “#®óphÔ¾ã$fñÍ7ÍÄìŠz]-dÏŸ+m9Ö¬>è`6¢¶­dfËe˃ßÏÌìN{f¶f4ï9{0·ú7r…=jÁ 8ß¶ëùwõ\i7ÌcêÿîÜ<]î$·|që²Ñ6h]ôGtD÷ʬ†¿ODfuþˆ†àõÊa)¯º›¬®öÏjçÇ@ÈËZb!–/ÿµ´q}3èW£¿"ŒÍ-â×öÆ8ò‹!hÿô…TxظÏ_v_ặþÓë@Ï%>Ï4ýFPšú­“tÎýÝ Y­Ézu¬ÿ$Ùè·3€ò}5MèLòý{f9 œ9¯Lœ–q^Ì4 ×ÓÔR@¿nï"îLˆ## c~dñ)|! c~dÑ)|# c~dñ)¼Ÿ'yèqýÞäÀÇ䣑¿0ùôxòñ)¼?ù”(<ùè¾0ùÔhòñ)ü@©jµ‚L¼•Äc¼”ă¼•D£¼•ăü©  “_#;ªõñSv{õ'ù5ÿõb~Mž6j¶á—Åè£FMMjŸ\A@ƶá9Ç¿å ò—\°/wèÀˆc³Xè´ "cÛp9vy`¾Ü1p$a›'^e×”˜ÒRe׌º+)f×Àu4m¥¦äбϕ݂F‘–«€ìšj¶ËU€çuk[s+¡c5†]áZæÙ5}Ý»Çg)1»f”èZ*e×̸-•²kFT’ EZ®‚éyîlžÒBFÅ­<'©´ÐY2!Ѥ Q¤å* ƒ&™EhéGvÌý‡+=f× ŽYü®ô•]sÇä†2bvM—©?,¾PFÌ®äà2Žìšî‘ù2cv Æ{”=FÙ5ò7g¶©7)»fjwÙçáÔ+f× ›ç6Q¨^”]õ×¢zõ¢ìš‰`êýÌ¡z‡ì„]®Šú¹ª‡Žìš4¯…Œ8&ÿ+W¨ö5…Œ.Ø×Ókƒî­¾×z0êåuùÈ­)Þú·f￈G=‡½U5©5áÉÁßÜCqŠyEšaÒài5°;ñЯ¶0i5Ú]Q¿Ø0¿°b2½‹)×G­ÐÞížU3c˜VÝM¶q¨fã„ÚVN÷D›·²jØÊ˜¹³j˜ØsáÍqB6Ê|s5 ²Bd&à0²[FãåHÅÈFµ°§Ú-#+¨õÔ×èMÚúV( ×8ÒK"®!´<-’Z–ÇÈ?ß$‘ñàu›Îµ†Ào“{ÔC_ê{¿Ôª–Ë-Føá >üþFP#bb¢ꡆږÄJ ÆýM™&Pã÷æR ÒÐéÊmiU€o‹f¤ñ÷‚ BÆs‰×­¨ÍMþÝ”N“Ð+c‹…iÍÛ–úBÆ@! ­‰#–oBŒÆ5ã+ÿ„‰ÑH= Ï… ùZ–0;O…ìzDŒFŒ[{¬g=V{‡­£q*hÝúà‚ ¡7è¤ÄhM@ò<@b4’Q¦‹GÄè¡&*NëÝ ÜœÆ¯µi9Äé¡Íìå NÕ€~ëØÄê¡=#,׆X;V[küMyMw¬Ÿ`Û Öïʲ„XËÝçB÷N–~ó¢.{ð%úu¥á¼Fv‚§Å1¯±(|L¹>y=…ev)Ìk-÷Å$óþ7i'·3¯±`ôÉy0†+6™ÙhÓ}¼Èlì@]dpÃåÈ«ÜX غã“Ü>§™Ü(ë÷¼~"·,yåWöGŸÉ u\Nr/k‰ÜX,ÜÍ6,Dnt‡©–Áä–Ë“UÄžLnøxn5Ó¹6éÚ¹eLïCB_½5¹ÝgÑ[þf^|†Óë]dez#Ó4lˆÞPÓ`!ù|ò‰ÞêoÉíLoÎög|÷õvÆ7>ž¤Ãø†pëîcŒï>£û ã[OU#|#§wXâ ãMº=O‡ñÖwÃ’(ß²j+(Æ·<ÄÛç ã»Qׯ·¬ÊBê á[7–¡ÀøÆÎ/üáÕ4—ÉÙŒoÌCO;e|cêù"ñ;]·w ÓYOÚ®iÝ1¢wï1uYéݯ¸|ý±d û­ÝEÏôÔÅ-1Gß”l5ú‘Ì}ÿ×öÊ8=J¶û§/h§rI.Ù>?|aØ}…ëÞúO¯K¶Ï%>Ï´ü†d w.ôHiÚ›ëUɶtØÓõ"ïlú‰:üÛ’m)Z«…†‡íï©Ø:9¯Ì›‘p^A¸žæ³ò]¢üÑLOE3>…÷E3<,šÑ)|A4[ó—D3>…÷Û8É¿¢ØbÀcòÑÈ_˜|z <ùøÞŸ|Jž|t _˜| j4ùøþÅ6£ÍÙȺó|K±å1^Rly·[å-Å–yC±-UùËÆxÿR±M™Ûþ‰•ÃÿO±…¬u|Ôbªv,ÐMP{\»¥êŽ¡º[ž¶«W{~Û£¬Ðã®0ý÷€òÊSÚ04¶Ž MØeÃ\ÖJ2xH †aëv EE…ì/·,«f”¶9—­ÓÛ´ÜõX,BêNu?´Ý9:¾{œöÒÖòÇ.¯luxËG7äûÚ;w˜œz@éJ¸[æn­ºM¨ümXSjßz=F¢íˆ•¦òF›I¨š,˜¨Ž¾Ë.ÙN˜ZÝsëÃ;>Ê?wïF96H²…4²ÝäÛˆŠí¼Ñ\o6†Ûƒâ·BÑZ›Q°•(œx¶ímVo÷;»ܦêµ~³}?—Ž Íž®]N5yt·_䆠ÎÉõ‰Zuiöt…(îWÕï(×b8O[ïw”kûˆ5óAš=[õÜ“ÐzŠrí@©=²Ž Í3[!³‡²ÿž¢Z+[Áž­@¶#F³g+NeIV=G¥u^nÞsTjQZæñìô\©EoØ7­Rjk”V{!¥V8•mQÒ+©µ2­ÔMUß_ù÷®Öâ×d:>Õ½’Z‹0’…Šz‹j-ÊÒd–>Bto¤ØR”L (\±Åif{p$[(Xõ®Ö%[üMO XÏOV9—ÚMêÕUC¿caß[’íAìáä=‰r[Og`bã}õ.ªLlYî #‡Q9[ Š,0ÍÄÖººôkbÃØ£X؈=ÐÖî$æ0—©”Ll­³å(€ó*]&vG5±ÉLl9â¢LìzÅWœ‰ Ü“ˆˆØ¸-Ããü‘ØZVdÍ:ˆØú[a´Hlh‹Í«‰Ø§j—䥰²Ucž¦°³q,ËÔÜߣIú-êšû®»#h#´›³É± ÚPݦœÐÆFfx"Aù楸mÖ7³ jãXصUôâR¢¶Ö²ºHÔ†”2Ü‘)R2ót[¢6–;A!jCK®nÖBèÆ3 Î>„nèÿÍemB7*”›;“ºqé²mÙÊ„nœg¨>#t«ÎïoOD÷X µ·DèVÓƒ“ÙZeœý;™­ná£ÙÈ ¡Ñ‡82³xy21âs¨@$f비i‰ýnFnÙ=~¼­þ±r{`{ú¢øÛòç®%flÏDìblOâaÙ;^3ÈØúHö#elË^«yý;c[î 2ÔÖZãÛO+Œòmýº8LÛØÚ]¾®'l²-8°=ÿ˜Õ ƒÛpykÕ¾<'¶«ÏfÆöÀÌ0ÿÂö rIæöh´Ý!nܘÛò7Cž sÉ‘îÚÂÜ–/ÖåŠ/sÈ@æ6’#«Ÿçâ¶Ü«éuÌí~ –ÙÉÜ3ºÐ1·Y0·¡»Å swdz™ÛròòDíBÜÆr­íˆÛr?z ¸ÜX®Zr!sÓT|æ6V!^´ÍÜf‡æö@›º²Ëš™Ûø†{ ¹-Ï{${}˜Û(¾÷lâv'çæ6îXÿ$÷€¥ñ0¹Q»<¶…ƒ;¨Òv–/ƒõñX·v˜+ö1 pc›ä¹Ö n5S³{ûcÑóGh|V`Wÿs)8þ¦lûËñ߈gî ùG¼ºWš-·û§/(¨³NnŸ¾0ì¾Âusý§×…Ûç׸ößnkS‡€$ ëé]á¶]HÑÖe*#~O¸­²Ø“^仑ÿ¦µ¶Î§Î;¶ҼςpEõÚíÐÌ`’Ïhä/Ègz ,Ÿñ)¼/ŸéôaùŒNá òÙšÁ$Ÿñ)¼¯Ý†i.ë•oˆ·:â1ýhèo´{ÐsàùÇçð…~Jž€tßhø°ÈF3Ïá/pkZ%_¡æ[ î1ÈKî1Ê[.ó–ˆ{Œò†Š+뉩†TØ‚ÿ·G÷þù^Ý-"i!¸T6\)øUoPSqu½ÍQþáÁ¥–ÿнØÊÄE=FPq¯¨æh:¼íÔ3EmÔ ×UÜ¢1ADSƒ|›ÿÐý¦bÑ„Ûn÷æuÕìÍy¢ õssá6íb0M öBÛ‡½gÔ½iaVni$é>Ý´° qÊ6æÊrSðôì”—Éô–Á`´Yû‹Ç”°!µÀ!ÃÒ§;!…‚­G`d³y&#ïëmΦ„meñ4ÏfÂö [¥a[å:ϵ!lã÷‚ <±[ËVÒe¶Ènuݬ¦¥Fv¯z$ û»q,¸"»q,”u»UðÆÄn„}Ãw›Ø=›–õAìÆ¥ßͺ»Õó.ö7•ݸ¡€‰Ø­'èõÕÄîUölÉ;ÄnMóB+b÷È3JÄn­²öW•Øw"è¬Än-@sƒŠo­(òl‚7N3£åñš²oH¢òYÝ">Á[%pïN@ðÖB+—* Þêp á­UQžŸAðÆû¯!Â…ƒ¡ÝI„÷ê'`oÁ¤¸ÃÒ Â[ME=³†à­ªõêç{ŸðVï|7< xk…V2C‚7ˆ–\°#xk“ˆî-”Ö¾f'¢½¨àøžÐòï;êõ¾§¼_¾¼$|«éŠ-Þ>ð™IøF®[ÄþïqÛ;ËøNQÑ;ð ßms’e|ãõrúÀwAU©­Ô ß0®õu+ã[^õˆZÆwŠJìotœß鄽s|½˜ÞZ“íJ߃ÞW\íô†%„™¹0½›v·¥Ó;Å×Ao2b8è}‡þì¼)uå·®¥œ6 oÙ ¯ÕåÉíP ÎÜFNGk˜ÛºÒõMÈÉíiëŒmâ4¤±¿1 õ$ŽiÈ'ñ…i¸¢é< é$¾1 çxòIüê®\@Rû¯~•×zªƒ¼¤î£¼¥îò0o©»Ç(o¨»¨döÈÎ+ýq·Di÷¿ßkY®Ë:øHW¤½N¹³Õ[(^ ^©×Us_Mÿj±hœ$u‰âÖg[’¼+»C3Sº¯®ý!UïÕê“àÞ&‹Ú¾e$ò¡)Zh%cì >‡·µÛ»¯qq®oå˜üÕöÄïµö×ÛnÊÁ~‚ѳ‡˜àØKuÑáϬxå`Qó·EɻȖӺÊAí ö|Éá}[Øñ¾¯‚B vwB¾},×MÑŸXv·äìzé”ÍE€áßçƒr,*À˜ˆþ]‘ƒ]=àš\’*Î('ßwïF¤gÝõ¬¿™<^&c( ¦}½Õ¢ eì0Ä}#âsc…¦*©—Gœä`Q#¸\î±Jj›i rP…`¨ÑÞÎP®ïR+8ʱí‰ëATp>S+ûË•?øG-X÷ Öáù®‡çùŽ{.Cü ¶Ãr°«œÎåuÐ,;å´£Œ¦nî)&ƒ",'{{.º“¿º¯¬£ sZλœiÂ-ZzÂ?•«y»W¼ÉAíËý|çPâî´÷­ÞŒ{-¦ö´¶ƒ¤ £òÜjˆä`†¡z¸XfãåÅá¸ïÃ{sÉÁ #_ß½åXç¢ÞàÜ,³õÒÎ]k.#zÝÍ—PT­=s]ºª•°ÉA’‡™wéŽú°þÙ¶ Óå`JóÌeõw6yEvíêµæ2RºÙæ>µßåψ ?íq~°>XÈ~²¾ Ç6D`½4…! öxÄÙÒ ØãÕš¦“°G <[ã¶öˆò{ô€=Šj"\#ìUQæI°4ƒ˜õë*[7zFÖã`pS`ÖkT?ž+±^õëÊ«^ìåðLyܶ`ãÈ”×êOüÊ]ç™ò8Øgy ¼6½lÇÀ”WoÑb¹4Lyüfs]›)¯v“Ýßn¢¼Ž¹’¸˜ðиB ž¯ÃÙ/º¯ó°2Z¦»Öp{æÓ}ÅÙý5$º/Îð…‹tWñVƒ„cØ5˜žÃtÇRŽF¤»ê®î%Jt׊gÖ3Ý¡9?÷†À¾4J?Oûòw7U™Á®EÔÞW‘ÁŽ7PX½Õ};V¹ñE`×Ö"a‰·¶Õ²péß°n>á3>àŽ[ïnÃÜ¡._Öšã„»\o¶RÑîdÂpÂ=ÓÚï€{©) whò9|Oîãö—åî2$¸r&?àŽ®½Þ‹ù€{CåØp—?["¢ îjƒlÅ•Üõôf¸Þ#1ÝqCÜØü »ìÖû€;Òí<ƒâ€ûP3)_çⱌsc¥ñ…Œx9Ÿxº?à@`¹„'â^]ßâñÍkç <®/~¼ˆðr}Áté ¼¦¹† œ¶UDy,îÜê†)‹ S2é‘[1‘ç^»ƒô-öÙ9H¯Mq¬jÿ =îAí~ƒ˜ôäp ûm'ÐË_½â÷?‚^ïU¸=z8y»GÈA{$šTÓ¢Úã7=qô =çÒ´ÇnÅ`NÚËgÒR´zÂÎ{us(¡5`ÿŸÿñ¿ýÿ°Ç endstream endobj 3 0 obj 34691 endobj 4 0 obj [2 0 R] endobj 5 0 obj << /Resources 6 0 R /Type /Page /MediaBox [0 0 1080 792] /CropBox [0 0 1080 792] /BleedBox [0 0 1080 792] /TrimBox [0 0 1080 792] /Parent 7 0 R /Contents 4 0 R >> endobj 8 0 obj << /Type /Font /Subtype /Type1 /BaseFont /Helvetica /Encoding /WinAnsiEncoding >> endobj 7 0 obj << /Type /Pages /Count 1 /Kids [5 0 R ] >> endobj 9 0 obj << /Type /Catalog /Pages 7 0 R /Lang (x-unknown) >> endobj 6 0 obj << /Font << /F1 8 0 R >> /ProcSet [/PDF /ImageB /ImageC /Text] >> endobj xref 0 10 0000000000 65535 f 0000000015 00000 n 0000000145 00000 n 0000034910 00000 n 0000034931 00000 n 0000034954 00000 n 0000035384 00000 n 0000035253 00000 n 0000035148 00000 n 0000035311 00000 n trailer << /Root 9 0 R /Info 1 0 R /ID [<8B8D6CDBD5BC7A8365A3E5CD763CFD41> <8B8D6CDBD5BC7A8365A3E5CD763CFD41>] /Size 10 >> startxref 35465 %%EOF blis-0.6.1/docs/graphs/large/l3_perf_epyc_jc2ic8jr4_nt64.png000066400000000000000000003465771360743507500235320ustar00rootroot00000000000000‰PNG  IHDR¬öEYa )iCCPiccxÚ•‘gP”‡†Ï÷}Û m—¥ÃÒ›T) HYz•^E–ÞY–"bCÄDiŠ AF¥H¬ˆb!((`A³HPb0Ѝ Ü¹3qîüÈóë™wÞ9çÌŠ*’*àû¹Ø³CBÃØð ‘¼Ìt®'ü#Æx ÿJtL&V Ÿ—Î ¹ •#H Ç€•”.@Γ€ÜfÜ_>̨¿|˜ü?@¢Å}ãQßø÷¨pù‚„ؘ\¶Z¬ '’ÃÎôs±g»98°}øi± É1ßü¯Êÿ€ &Wà–¾…Ÿ/`ÿßPcC##øûï|„5ø¿ÿ€oziœEìÀßYT5@÷é'gjÇD ºîñ²øÙe8‡ÏÁá+ñÍøNü ü(~ÿ@ °šs‚+!”HØJ(%%t®† S„E"‘(CÔ%Z½‰‘D±ˆXMK)Hq¥b¤öIµKH-IËIÛIÇHKwHJ–aË8É$É”é–y&‹“Õ‘õ•Í‘=&{Cv^Ž)g%Ç“+–;+÷D•ב÷“ß*B~P~QAQÁE!]¡ZáºÂ¼"KÑN1Q±Bñ²âœCÉF)A©BéŠÒ+¶$›ËNfW±ûÙ ÊòÊ®ÊYÊ ÊCÊË*š****ÏT)ªÕXÕ Õ>Õ5%5/µ|µ6µ'êduŽz¼úõõ% M`½Ý³šÒšnšyšmšZt-[­ ­F­‡ÚmŽv’öQíû:¨Ž©N¼N­Î=]T×L7A÷¨îðü‹5©k׌ëÑô¸zÙzmz“ú,}Oýýný7ja  ¾š&6>5’0r7*0ê5úÓXǘg\küp-}­óÚk{Ö¾5Ñ5‰19fòÈ”aêeº×´Ïô‹™¹߬ÝlÎ\Í<¼Î|œÃäøpJ9·,ðö;,.Z|²4³XžµüÃJÏ*ɪÕjv溘uM리U¬#­¬…6l››ã6B[eÛHÛFÛvªvÑvÍv3\mn"÷4÷½¡=ß¾Ó~ÉÁÒa›ÃUGÌÑűØqÈIÂ)ЩÆé¹³Šsœs›ó‚‹©ËV—«®xW׃®ãn n<··ws÷mîý4ž:ž|Ï^/ÔËÝë×Äzõõ©ë»½ÁÛÍû÷3MŸ ŸŸ} ¾>¾µ¾/ýŒüòýüþ›ý[ý?Ø”< Ô Ì ì  j Z v .†„l ¹*šÚF k[Üà´áð†épÓð¢ð±šs7ÞÞ$»)yӥ͢›#7Ÿ‹ÀGG´F¬DzG6F.F¹EÕE-ðxGx¯£í¢+¢çb¬cÊcfb­cËcgã¬ãÅÍÅÛÆWÆÏ'8$Ô$¼MtM¬O\JòN:™´šœÜ‘BJ‰H¹*‘š”ÚŸ¦˜–›6œ®›^”.̰Ì8œ±À÷à7g"™3{LAº`0K+kOÖd¶MvmöÇœ œs¹â¹©¹ƒ[t¶ìÛ2“çœ÷ÃVÜVÞÖ¾|åü]ù“Û¸Û¶#Û£¶÷íPÝQ¸cz§ËÎS»(»’výR`XP^ð~wðîÞB…Â…S{\ö´‰ñ‹Æ÷Zí­ÿ÷]ÂwCûÖî«Þ÷µ8ºøN‰aIeÉJ)¯ôÎ÷FßW}¿º?vÿP™YÙ±„©ÆÚuÈëPW»¢¸âýá͇oWšTÖ¡É:"¬ò¬ê©V«>P½R_3Zk_ÛQ'_·¯néhôÑ‘cvÇÚëêKê?O8þ¨Á¥¡«Q£±òáDö‰—MAM?p~hi–m.iþr2õ¤ð”ß©þó––VùÖ²6´-«mîtøéû?:þØÓ®×ÞÐÁê(9g²Î¼ú)â§±³gûÎqεŸW?_×Éè,îBº¶t-tÇw {B{†/¸_èëµêíüYÿç“•/Ö^’¼Tv™r¹ðòꕼ+‹WÓ¯Î_‹»6Õ·¹ïéõëû}û‡nxܸuÓùæõîÀ•[Ö·.Þ¶¼}áçN÷]³»]ƒ¦ƒ¿˜þÒ9d6ÔuÏü^Ï}‹û½Ãë†/ØŽ\{àøàæC·‡wG׎=>Š~4û8ùñÛ'ÙO–ŸîœÀO?{Vù\þyã¯Ú¿vÍ„—&'_ø¿x:Å›zý[æo+Ó…/é/+g”fZfg/Î9ÏݵáÕôëô×ËóE¿‹ÿ^÷FëÍù?ìþ\Y˜~Ë»úgé;™w'ß›¼ï[ôY|þ!åÃòRñG™§>q> |þ<³œ³B\©ú¢ý¥÷«Ç׉ՔÕÕÿB,¾;E^ cHRMz&€„ú€èu0ê`:˜pœºQ<bKGDÿÿÿ ½§“ oFFss0ˆÚÿR pHYsNNÆÊ/¥tIMEã$9·-ç vpAg’Zó!%Õ€IDATxÚìý}¼äø]߉~Úlqaµ kSÝÑáa ŒÊ],ë€U!ñ˜×IT°—ÞLo²/ÕõÉžÉÃîEÚíd/Ih¶”åÞÝt…¥½30ìæU Ϙ%›)… `j(³&TÛp˜é2ޱ˜jfððT÷¯J¥’TÏGªªïûõ:¯îS*I?é|õÓ÷ûû>]‡C0 Ã0 Ã0 Ã0LÎx]Ö`†a†a†a˜8Ø`e†a†a†ar ¬ Ã0 Ã0 Ã0L.aƒ•a†a6Ïó`ÛvÖÃ`†a˜µÀ+Ã0 Ãl0Žã R©d= f‡1M®ëf= †¹XÞ/6X†a†a˜…±,‹xfg`y¿|^ÿ½ßû½ß›õ vÏóp÷î]üôOÿ4ÇÁW|ÅWàMozS°Ý4Íàá–eA’$€mÛ0M¶mC|á~a°Ÿ®ë$)8¶çyøŠ¯øŠàxŽãà¿ñþ>ÃÌJšŒ'm3MžçAÅà8–eáñ!IË7³¬KöeY†aøÌg>øñÿqÐëÜó<üƒð&Þ ³ iòúÓ?ýÓøÌg>ƒ7½éMEqBFé3ž³™May!qÞg¹^ ö°fŒçyØÛÛ ò,Ë íÒu†aÛªÕ*,ˆjµ§T*Á4Í`_Úîy<ÏCµZE¥Rã8Áv]×þ>ÃÌBšŒOÛ•·eä•囹lÖ)ûµZ-v•ßó˾,×óÃkƈ¢ˆv» ¨V«¸rå jµ?$! ×uáy Ãû‰{A0L–¤ÉxÚ6À_  +í,ßÌ&qÙ²/I4M;ì‚iòÊ0ÛÄ2òÎÏÊz`ƒ5c¨R³ÙÄùù9ºÝ.,Ë‚iš!Ñh•$)Xµ¡ŸV«MÓ8o‰Éi2ž¶ ð•vÇq`<ÏcϳQ\¶ì7›MhšœŬ”iòÊ0ÛÄ2òÎÏÊz`ƒ5c¨¦¢(¡¤°Pâ¶çyÀ˲<ö;0Júf˜<‘&ãiÛèwEQ`TUÍúRf.²}AP¯×a·]`VÆ4yÒ£Âf“XFÞgÙ—™6X3†ò’öööP©T°··(*¢(¢Ùl¢R© R© T*aa¤”躎R©TnµZY_ÃŒ‘&ãiÛZœaï*³id%ûŠ¢@Q^ÀdVÆ4yº®³‰Ù –‘÷Yævf~® ‡ÃaÖƒ`xQABz©€’(Šp]¢(¡dd˜†‹,Q˜0Ãä‘8Ÿe›iš0 gggY_Ã,Ë>³-$ÉkX_I+LÃ0›Ä2òž6·3óÃkŽq]{{{èv»$ ®ë¢R©@Ó4d¶ §)•JPU5ÈÍc˜m‡eŸa†aFä"$¸×ë%~Þï÷³^f„C‚¯\¹‚R©„’1›Ë÷b8Žƒ«W¯BE–ùœÃ2¾ZXöóË6³í°Œ3›@æÖF£^¯‡ãããà³~¿MÓ‚‡H–e†‘íb˜`ùf¶–qf[aÙf¶–qfSÈÔÃÚétprr2ñù;wP,á8ž{î9ôz=4,‡Ê0sÃòÍl;,ã̶²Íl;,ãÌ&ñYYx0@×uȲŒŸ÷û}t:<õÔS€B¡€ƒƒœžžâèè(ñx?ó3?ƒù‘Á|ÁduI+祗^Âõë׳ÆÊxå•WðÊ+¯äâoôÊ+¯ P(¬m^µ|À­[·ðÐCe}ëVFžäaUäé™}饗ð¡}hmÇç9|:y’‡U§gö•W^Á»Þõ.ü£ôV~lž¿§“'YXyz^_zé%üãü±¿¿¿–ãóü=<ÉÃ*ÈÓ3»Èü™ÁzçΟ¿ÿ>Œ=¤ûûûSãèßö¶·á=ïyÏÔ—Æ&qxx8¦±ét:¼ð ¹øÑXÖŪåzè!–‡œ“§göððp­Çç9|:y’‡U§gvs8ÏßÓÉ“,¬Š<=¯‡‡‡k3Vž¿g!Oò° òôÌ.2g|zzŠ~¿{ÓÒŠÁ`Åp3ƒz®n ×®]Ã7²ÆÚaùžm”‡m{f“`Ÿm“‡m|f£°lÏÆ6ʶ=¯I°ŒÏƶÉæ?³—n°öû}œœœàöíÛ±ÛÓˆpØB”_|Ï>ûìÚ½ — ­~m Åbår9ëaàôôwïÞ ú×®’uÉ7à‡§âôôôroؚȋ<¬’¼<³‡‡‡¸wïÞZŽÍsøìäEVE^žÙF£»wïâcûØJËó÷ìäEVI^žWš¿“ª÷.Ïß³“yXyyfIŸwþ¾ôàÓÓS ض Û¶Ñëõpÿþ}4 ¤†@‹ÅÄmïz×»ðè£æÂÕÍ䛃ƒ‹Åµ„“­K¾àúõë[žÂ¬ãããµ)<‡3Ystt„7n¬|çù›É4¯#$˜ço&kÕÁ/Ý`掾v툎ðÿ&ϰ|3ÛË8³­°l3ÛË8³©\ºÁZ.—Ç\ÒT¥5¼*S.—Ç*’Ù¶J¥’õ½b˜©°|3ÛË8³­°l3ÛË8³©dV%8££#¢Óé`0 P(àñÇÏzX ³X¾™mgdÜqèº>ö™$I¨×ëp–e¡^¯OìW©TÐn·–eÁ0 8ŽQ¡iTUÍúÒ˜vA¶™Ý†eœÉ#™¬qñîûûûxú駃„ó<$ 3Ì"°|3ÛήʸçyŸ®ë¢R©@Qxž—XÔͶíàûµZ Ýn¢(Âó<”J%H’I’²¾<»+ÛÌîÀ2Îl ™¬I ~H˜­…å›ÙvvMÆEQ ÏYp]7ØA@³Ùœy&;vM¶™ÝƒeœÉ¹5X†a&ϸ®;,ËràEMƒzüíííAQȲ¼u}ÿ†af°ÁÊ0 ÃäÓ4aYV¦c¨×ëSÃug1V‰n·ä»êº×uÑn·9$˜a†aB°ÁÊ0 ÃäUUsWHűÂJ–eÁ4Í™ÆI†­,˪ë:LÓD³ÙÌúÒ†a&7¼.ë0 Ã0Ì6@Õ~gÁó<†1ñ¹ Y_Ã0 Ãä ö°2 Ã0Ìض+W®¿Ë²ŒV«Çqb·QEaP¶mcoo/0r=ÏûÃ0 Ã0l°2 Ã0ÌÜȲŒáp8÷¶ðçÍf®ëÂu]‚À¹« Ã0 ¬ Ã0 “áÖ6 Ã0 ÃLÂ9¬ Ã0 Ã0 Ã0L.aƒ•a†a†a†É%l°2 Ã0 Ã0 Ã0¹„ V†a†a†a&—,d°6 ܼy’$¡ÑhàôôF#ëka†a˜KÃó<Ôj5\½zW®\AµZ…ã8+=‡ã8¨T*c?º®Ûèÿq¸®‹J¥˲&¶Y–…R©„+W®`oo¦if};†a&–¹ V2PËå2ŠÅ" P(àôô‡‡‡Y_Ã0 Ã\ •J‚ àüüÃ᪪¢R©¬Ôhõ<Ðn·Ñn·Ñl6aYÇçy©ç2MžçM¬®ë¢V«¡Õja8¢ÛíÂ0Œ•Û Ã0 ³ æjkÓï÷qrr‚ããc”Ëeôz=~Ϲb±ˆ›7o¢×ëa?ëëb†a˜µaYA@½^>“eõz†a@–åÀ ô<ªªBQ”`_òhÒçŽãÀq¸® Çq IÒØ± jƒC†ì´1¶Ûm”J%¸®´Ï¡ÿÓï‚  ÙlÎtL†a†¹læ2Xïß¿(—ËÛö÷÷±¿¿Á`õ51;Œçy!ëa0ÌZØÛÛC·ÛÝMw]ÿ' I¢÷ƶÓ÷Eÿ'ŒãqÆ[èødTFQº®CE˜¦‰v» AP­V!DQ„®ëh·Û|/- †a ÕjAÓ4”J¥ÀÀu]w"ôW–eØ)׿8Np>EQ`Y4M ö|yR²,Ÿ1 Ã0LÞ˜Ë`- €Á`üŸ Ç•a² V«šÍfÖCa˜•cÛv`ÔÄyÞ¶×Íøœ×`•åxƒ5Î8Ž;~AO¥ªªQ«ªj``ʲ ÷âø²,ò,Ȳ QƒïOó¢ÚS®Ë²,H’Û¶!˜Á ÝnŽãÀ²,èº×uÑn·cp†a†É’¹ Vò¢âöíÛÁçý~wïÞE±XŒõ¾2ùÃ¶í±°Uáy\×Yé ‡©-{=äQ°m›½ÌÖaÛ64MƒišÐ4-ð²RØéÖ˼,û?ó²ˆq¯ªS¿"ŠblΧmÛ±ó_Ø+îy^`p ‚0õo'ŠâØ"…«)ã4M’$Á0 þ\KómØp¦±êºÓ4yÁa†És¬p|| ]×qóæM@§ÓÁÉÉ ö÷÷ƒ#“LÓ„,Ë© Ï¢Ç5MgggS¿ëº.öööpvv¶´Ñªëz ÐQþA9daîÝ»‡W_}u¥×Î0ëĶm´Ûí1/«çyAH©®ëcrÿüóÏg=ä­†BÃ^TÏó‚Ï(• EBòÀÒ|•Vå7 ÇqRçL˲ Šbv ø(äeõ%þÿééiÖ—ºõH’„³³³ ŠC’¤±¿'E±¸®;¶hFù£t únø;aãq8Æž_–åÄmQE3b›Íf&,ç®2 Ã0¹eîÖB¡Û¶qpp°ôÉ;®]»ôsÒëõP(·3‹#ŠâÊVÓ4Ñjµ *•JêwéÜ”›U«Õ&V÷© }7^G.µ~ˆ*‰ä­È2‹å›‰B2ÝívašfÐ Óu]˜¦9æe£)€/ûacƒ¼dY,ã>i‡¤ZYÿí񮮡l3ÛË8³IÌ]%øèèFý~?ö;GGGSszzŠF£„—ËeÔëõ òp¿ß‡¦ic}^9?vu„Û¬Šh'ªN™TLIJ¬@‡î ‚ô#$%Ž¡ªêÌcβŠ*Ë7“…dRho­V Â2E‹(•JÁ3dÛöØâKÖ1–ñ鬺>s9°l3ÛË8³‰¼nÞîܹƒÁ`€“““ØŸi 4 Áq<÷ÜsÁgás‹Å`{¯×ÛÎ,‡çyq9­5¬D+VR«À7÷öö‚œ-`Ü`T?¥ñhš†³³3Ôëõ Tn<,ß …YÆ}nÛöØsÒl6Ñn·Ñn·' œf³‰Z­–Xu6+XÆgƒ½—›Ëöv±ŽÔ§uAí¥*• öööðì³Ï®å<,ã̦2·Áö~ÅýLƒŠ5QHq¡P@¹\<¶ý~N·nÝ ¶p1ˆB§¤¶ óBý!ÃÞT*êá8ªÕ*Z­VPÝ”r¦ÂÊœ,Ëh·Û¨×ëÐ4-W ú<°|o>‹(9ŽãÞÒZ­†J¥2ölQõØ8Ï’Q#IEA­VËUË–qf¨–À&À²½=†‘K¯`ø]Q*•påÊìíí…*[­ÎÎÎð裮åü,ãLxðàÁÜ:殼,åryÂHºÿ>Þò–·ÿ0VÔi?1™™Ÿ°q¹ «®ë±¹¢²,…e$IB³ÙD¥RmÛ,·–ï͆zQ’ÌκeYh6›Ás@ 5ô’¯V«eyn¹§ê±y2XYÆGÄåêSn1U ÞuLÓœ©×l`ÙÞ.{¡„ZúÅÒ£Úu¦iZfõ5XÆ™¬±m·nÝš{Qf!ƒõôô¶m£ÓéðY–ç.Ätxx<O=õ¤>ƒÁ ˆ±ò©O} Žã¡L2äA 7•Ÿ•hõRêy§Ükš6Ö£Ú@ÔjµL ÖN§ƒ~ðƒøƒ?øƒµžg•ò Ÿüä'Ñh4pãÆ ”Ëå îÜvã8ÎXåêV«+׺®/|×uƒÜÓ°’"I4MCµZ zn.*óá¼ÖYi4x饗Ö~Ïv}·m;¶J¯çyÜÓô i_¥ÁzzzŠ_ù•_Yë¸yþÞ\ÂzÊe,ø¹® Ã0 ª**• šÍfPÇÃ4Í G½^Ÿi!”æï~¿¿Ö‚G»>3— EY>ýôÓxßûÞ‡·¾õ­sí?·ÁÚh4prr‚ƒƒƒ d`0àÎ;èõz¸}ûöÌÇzüñÇÑï÷qrr‚'Ÿ|GGGAx/•JÁâ‰ã83{8WMؤBzq…e2ng1@Ép Š©ËBϰªªk+Hò½¹~&.$=­M˜MòœÎËønašf=øsx»ÝN ]§ÅUú×¶íÄ® D½^R¬ÖõÜÌm°îïïã™gžA§ÓÁ /¼ •i/‹©1ñÔ̘Y-®ëN(®ë+ü”s•¨ˆ˜ÿ^NáÊ…å;?8ŽÈ7Uce–‡e±-mÊQmµZAd åÿ~»"òV’" ËòØÜ{žœ°á©ª*ªÕjªÁJ•§…øÚ¶=f°.L«üš¦Á4M|ä#Áw~çw®åž°loTY;n1RQÔjµ@)ït[ ÒY`ß~,Ë BtÃ*…ðƽ_hž%=‰" fyVÖ­[-Ô‡µßï£P(½–:'eoqa\­V+ø\’¤t¡s°²p9†Y7®ëƶ§a˜Ë¢Z­B–å ç-LêØ¶¨¾›VP=©%OÔÒ† 0èEŒ„pä,ËPU§§§øÍßüͬo“ÈóE/¸®x—vÙXe¶Ïó ëz¢'•æïhD}6Í›šs¬§§§¸sçÊårüä“O¢ÓéàöíÛ888Èúš˜â<¬s ¦ 6X™Àó¼ ï‚U& DQ <¬r'‡´PVÞEAµZ LEQ¥†rR©òê´c‡¯wZEú0õCãs]w,Ôð=DëìÃÊl®ë¶íÔT¼={ ³º®CUÕÄp^òªªšú<4›M”J%¨ª Ïó`\×XÍ!s¬ƒÁF·nÝ ¼«p||Œ“““ÀeOk~Yuw†É+TÑŽC€™,!¯e¡V:aE žŠP ƒ5ìyvMŽãŒ­ÞÏZx‰ÂØâ |0Lq¹« ³mèºÔˆël@‹ô³xHAj)~í…¼×ö˜«­M¯×Ã`03V‰[·n¡X,IÜL>ÉëÊ Ã,‚ã8¨T*á‰Ô;o–^‘ ³«Lóx†‹tÐ÷I!5/5\d ð=¬ÓLu]‡ah·Ûl¬2S¡Vžçñ\Ïl5ÔJ©ÕjŶi¢©š¦Í5IFêÙÙÙFõÔSxæ™gptt„F£‘õõlŽãàêÕ«AX¤ëº0Ms¶Uïâg•„CŒWÙ7õ¢ª1³{¸®‹J¥A–ʯc˜]#œ¿š’$A–eŽ„`‚Ò›ºÝ.Úí6Úívæ2Í0—«ó3Õ`ít:è÷ûc¹©ƒÁ½^AïÕƒƒ t:¬¯i#Ñu=0L ªŒjYjµZÐgo&¥ÞÄjJ‚ /­Úàç°ZˈY¥Ri¢mÅ*±,+è3Æ /ÃÌŽçy+-Þ´(´ÐÄ0‹f˜ËÆó<6V`ªÁÚëõP.—ÃÀX~ ÞÎÌ•o×u=¹Öu²,CQ´Ûí±ÖSq°£rm7àâ_ë1²™¹¡‚-ˬÕjuâ3𬩰+, 3;á¶œ3Êl Ô~/ŒmÛ<ÿ3[®ë¸zõj û4sõëù™šÃZ(0 Æ>£>¬l¤®òœŠ¢8VŒ#œE9O3³‡Õº `£V9ësè1s`YDQ ZÊ,‚išAp%S]×!Š"{Uf žçÁ0 xž,`Òbæ®4‹g6jSøŽŠs]—•vfk¡xív;pH…ûr3ó1Õú¿¿^¯‡^¯À¶m{¬Z0€ øÚµkY_ÓFaÛ6<Ï š­·Z­ ¯jîòù6ÅcË, µËð¼ÅÿèTí1¼ªN9Kl¬2L2Tˆ¬T*ð•|Û¶qõêUÜæ‰ÙÂmjE W˲ØXe¶šZ­xRÉÅÆêâÌd°–Ëe¢ÑhàððƒÁ ðº«ë:ÊårP5˜™ Ã0Ƽ§’$áüü|¹‰¼¿ˆ‘¾âÁRôY#è*pBÇf£8sȨT%QŸÇqÆúSõˆd&ž°¡ÚívQ¯×¡( šÍ&ÎÏϹõ³18Ž3Ö¦FÓ4X–Ïó‚þÀ ³˜¦9‘ÆW¯×ÙX]‚™ÚÚÔëu4 ´Ûm …±LO>ù$NNNP.—ç^õ%¯-+n{¡PØZ#øŸ|ï?Á·ßXý¤-a=Uw×Õ^o° Œ5#6U¾)¯HÅ…sXMÓ &jUUƒÕt˲¦öªd6‡M•ñ<âyJ¥EA·ÛÍ_„ÍŽÁ²½<¦iBÓ´@–I7 c¢‡0sù°Œ¯Çq‚ڬ묞™ V2Rã888ÀÁÁÁ\îë Åb†aM¿ß‡¦iÁÃ$ËrF²-X–…·6ߊÚwÖVp‹021½×ªsñ³ª…":Î"ú™ ß8ÏÑ¢Õ¦Ëwxå{‘¢.´zNÊåÝéºEQXß6]ÆóâàølaÙ^ Ñ÷¡iööö8Ò&CXÆWµ©Q²,-Ô0«a¦>¬€Ÿ£Úh4Ðh4ÆZ׋ŹW_Q.—á8ž{î9”Ëå±°Ø;wî X,Û{½ÞVõxuº®ãýï?ÞøÆ7®þÙMj+ã`bìaµÅ‘†õ¼^\wÅcY›.ß ø+âÎ;+q­ (lîâaL.ÙtϵZm3 )¹.P©d=Š•Â²½ÂQ6aAÂÜ™l`_-á65õz}¬°³:f2X)wõää'''A>ë"t: ð½··nÝB¿ßG¯×C¿ßG§ÓÁ­[·‚ícs7×uÁ~ë[ߺÚ\PbÑ÷@šG6œ[*aµÍermmŒò_“Xƒ;‰M—oÇqƼª¢(Î]x)Z𽬭V‹'ñ-`Óe<†J¥ô×öÿÁ ÿ‡cê÷÷÷ƒÐ…M§V«¡^¯û‚½®C‹“ižÓ°‘*`º‘8+Ô3–XÄxMÓ—lø^ãÈw®ÿàu\ÿèõ]ĈM—o˲Æ I’æ*¼D9¯ÑPâp&f³ÙtÏ–e¡Õj!ò›Ë'ŠÀ)i,Û«Þ#¼@™?XÆW‹®ëaó¢c6©+…ÿÒj €`e†âÛç¡P(ŒµÄé÷ûh4AlÚC‘f ¿øâ‹xöÙgqxxxùwqF(´2…‚»(a£sƒ8)Ä6j¤šs×@¼a)a¼g켋õ¦¬Æîóéé)ÎÛçøðï|x”κä^zé%âôôtåã&¢}ñA˜«ðW~̇‡‡¸wïÞZ޽Ësø* d´ˆ³¹N g[”F£»wïâcûØJ»éówVض½½=ìíí¡V«q5ø%¡ù{{<¯ Þè蘌8==]hþž9‡5œ§^™Y”Á`€F£Ç{ år9(ê”ö@>^zlëB×õqÁ®cõù—FÆä¼Êlâ½§6Æ+Ïkhۘ̀^$‡5maËŸCß988Àõë×ñ¶Gß6çÉfgÕò ׯ_Çññ1Ö6î¨Á*Ëò\!ÁTa˜É–ããc<üðÃk=Ç.Îá« .Ç{£‘¤K7XŽŽðÄOàK¿ôK×rüM¿³€ êµZ-œAQ” ð ³4'Uï]</Žã8¨T*eÍfsó/™ƒƒƒ…æï™ª¯š^¯‡;wî P(à©§žš=HbÓJkS/JQaš&$IšŒq_õÂ̲aÆIïËåÛº ÇÐ/>K:¶y±-)YDºÁêb<äø‚7~| Å®.Ø6ùžÇÃê8çqìÛ&ã—õ8Þ:e~Î<÷<ò=;”M}V‘sëä{Ë`_r>±œ_.™¬š¦­è„¹ví?LŽðÿ7˲`ÆXHåDO¦«ÎW|b£ö0:æ3ˆeø^4¯t̪Ϙç¦ócJºŽöÅv“mqÂrø^xÀg¾ð3+¾À›*ßÑ‚K^ ¯"Rx»(ŠÁ>[ç9bÙT¿ ¨Ÿj\/Õ¸‚d®ûy¬Žlɳϲ=¶mömî5¹°Œ/é>l¬^>3¬qqéFO>ùäØgÓÂlÛF¿ß‡,Ëcíq \.£X,¢\.ãôô4È•µm• *OmkÚívzK ³õ=2ÍëHÆÞ¼ù òű-L7XçK2ÌŒW5v#×5‹d#Þ¨¥üUú蜯_›ófc“åÛuÝXy¥ÂK²,ò,èºÔ æØÔþ†'ñíg“eü2¨Õj!¶JªišÛ¥à[Ðín‡•e{vjµZ­VÖÃ`æ„e|9 Ãàö|1Õ`½víÚXÁ%bÑØzJ"3€©éÑуòÛ…B?þxÖ÷j&Âý˜RUÀ÷V0¿ÁJ…‘¦Uê7ÜØ¹8fœ1ºÎ¥¨qÖ}ÈˆŽƒÂ‰“ƾ~£{&¯<üÊZ.e“å;)œW„`[t!FET«U´Ûm8޳¹ÕN™™Ùd_7¶mÃó<´Z-T*•1ÅÆ¶í ØÒÖ £Ÿ-€e{6t]‡¢(œþ±°ŒÏGX/bïj¶L5X‹Åb°Ê² ŽŽŽ¦oO?ýtð`…+šå]סªêz'rªz;í™™wUIö†9ßžxâ‰Ôïݺu ý~?¨*¶éèºÓŒOž´,+¨Lqîs«aæ5Xmø†ZÒ"g8*mž…аáwÙÑKSCŸ¹ðD3á¸bäw‚ Ý(æÅç´ŸŽÉ¼]ÉhÒB¿yqÿfɉÝ1–_†ÙA(<²^Ÿ·wØQ¯<¬¢èab¶Ïó`Y+ðÌÖî!_¯×ÑívÙ»šC¦¬½^ÅbÅb1õ{”¿: ²¾¦•àºn¬Áê8AÂ'eYŠj,¤ð'y&Ó_ÀhÂy¨&¾4½,-÷5ì •1n ÖŸûj\|bÌx¢Ö°Që ÞXw᫜Ã:‡o8µÚòÇ`f‚òV9<2Ï[‹aPU•eÙZlÛ‹„áÅû|2Õ`- xðàAÖãÌê£Ʋ¬1Á¦ïÌ•¿ ø9—|ãjß´0ÙJèw㡃Q¥Ü44L÷Ά DÒUâÆ–æ MB½8¦ú=®køÚÂáÏI4…1óÂÙl¬n8ìݺ4Hg¥&„ .¯nä9­ÕjØÛÛƒ®OæÑ¸®ËÞUfëpCsUØ»Ê䛩ëþþ>ƒÁDuà(Tp©P(d}MKã8DQ úè4Á‡ VEQ`Ûöü9€F¡½óØ dp˜4>£á·Qƒ5ÎsÞ7ì ÇÆdNh¸ênûbŸ¤V!f >Ïð f 3N"z.#lÔ˜%ÈÃË ÅcL«Ì䜫W}—Ü_;´8É |Iš]þXNsa>ÓívaYÖDÝ]×w;ìÙ:LÓD©T ŒV6X7‡™ Öýý}hš–î; Ðh4P.—·¢µ …Gʲ Çq‚L•Jš¦M„ÆÈ²<ÿj;ÙÁ᪸³@…š<ÄŸQc,š3š4L ã9©³TóéfbdæI C‹ïÍf²,C(ŠÃ0‚ïËEØmÂqœ Ý=l°n3U ¾}û6úý>nÞ¼‰ÓÓÓÀÛÚï÷qzzŠ›7o¢ßïãèè(ëëY á4š¦Á4MT«U¨ª[RUÕù'õ9ÞÙッoxÖá{"ç9O’繆)§é!&F¡Áqß‹.Ά½ I†ãû{ÀÙùd8s˜¸}+˜‚¼ãQkqyÙTõšÙ@Ç7V Ö% c´T*í ¢†EQxqðeÍŠT›Å묪~øú,†¨i¥Ëõ‚ضX8’d9¼ø®iZ`¤º® ]×c{ 3Ì&CÅòTUE©T‚$IœŸ½!|Ö,_ÚßßÇññ1Flëšr¹Œ£££­ð®¾Ç‰xEQ°··EQÛ$zW+ðÃeãÐfü^=å»qÆXØqìazÒè³›f膷Åé+Ñ[£„öK╟ŒˆK5¬ß˜ˆ¯®Lä4È9všZ­D£\VÂ瀌Ä<à8¾±*IœÇºÔ4¾ÛíÂ0Œ ϔ뺰mÝn7ë¡æ—´ö>¶=ZXQUÀ0ü*ÃIxžÿn×ß'ü9+—3Cù§a…œ¼JQY{YÇA½^gEžÙ*ÂῤϳŒo3¬ÀÈh¥Ö5½^/.‹ èt:(—ËY_ÓÒ„óQA@»Ý^¬À†}ñgì-šEÃpà{6Ãï|“FédÌ%-l§Í’ÖUŒ \5f¿è¾|£5ÉÃêyÀ[ß:ù¹_é‘eÿi<ÿÄB®Ã!0Ñœlfj5 ÕÊGeTÏóŸ Iò|f!Ç ø¸b{®ërØXÛž]þ=ÏfÎ.VJUÕ÷œjZ²ñiY€¢Ln/•&XfêZ@ó}x±=λJhš,Ðó{Ù6¢ùªÓzh3ùbfƒ• 7Ñ—w¯×Ãááal(Õ¦³–jáÐÕy#d Æµz¡ÏãÎãøãçùƒ˜ˆÓucŽáD¶G+„¾þ^¸Gl\ËëbŸ¤âG¶ ¼ãþ÷$ÏWPøÅ÷·Oýíõz|¡*¾škDÄâyTU…eYAÞªišÛå5ºz8¿7zŒUÀ÷°ª*Wi]*²Gˆ¢8ñãÂd1¤ÝÓÉ£ëús8!þï¦é/´¸î¤wÖ0€vÌ ªªúÆ/ç§.Y©TÅÜqœXï*!B×Ê0ÛW¼ÞlfÊaÝ%¢ÊËRhˆ÷hÚð«ü‹¼⌽¸¾¦”Ã*øºÆç‰÷®F/?n¡Uº8_8EÆÄ¸¡+Åì«øWÿ<ÙÃê8ÀÞÕ ÃW)2ªÜ9ö V'¡€ùZí¤tS^6åjoMHŒçÍW¤e‘Å5 mÌ ®;2Ø#²Ñ¢¢(N]ò< Ö0I‹#¶ ìíùÏ–,~¢J"å²Úöä±è‹»ßšæ^*áúþ Þðê«Y߉\ây$I Ò?,Ë‚eY3õNò¾2Ì&C‘“,Û›ËÜÖmg¥SÓ<}‹œÂPÅȸœ§€¢ _‰øÊ»ó÷FMº¾'”Ç%Œ{T…˜óȪ‡ÀÉ7Bqò˜Ž4ÕÑý!E‡ ï¿÷o€ÃÃQeÔè=eïj*¢(B’$”J%ÈïJ:…Ï ³î[«ùÞúyÎ㺫 Gô<ÿ˜‹ÁṊÛO,DšÇ‰ààŠÆuýPÞv{z‚(Æ{Pÿi¡zõºÿ^ø[ _ðÒKY߉\n½DQ)½‰^˜]IJ,î½á°‡5B¸BðRØUÏ]Ùààe†áÿD Ö´ÊÃ:€þ;€ýýÉq™XÌ3¹‡qÃ;Ü 5i<ü9à[?ýb»³0*´%."P¼ØOuGð Ìyì·´ðam´-ЩÚpÒ~¿ú«Àÿõø1$)à4 £ÂN³@VtÌ$õæ{÷v.œ,¬°ˆ¢ˆz^CH=o~Cš…$Oþ¬û­¢ŒçùÏr«•|¬ZÍ?×<÷ò|cüÃ^n|;€eY±½&EQ *J®ì°-¬Ó Åü4Û <Ï æ÷•ÖáØ0\ן2Û³¶ d6]׃¼ìJ¥ÌÙ•J%è·Êl+ó° …­hi³2D¤çT¦éÕâóSÆ®(úÇOJ8jsl'f<•‹ÏgìÙ*Š1/tÌèx<(¼¼íuñ…¦!9L‚ï•M*ÔT©Œ<©–T«þÿÃʺŽ?ŸWcmM„½«y%ˆè!¯ã¼ãgŸEóFI\…ÁjÛ~H¯$M¥FòJjfñÂêúbù¿;HZâ°‡•+3yÆ4MTé}‡ÝηvÝÅjé1›‰®ëe9(0V¯×Q©TP­VÑjµØXÝ2¦zX©ÿêºHëÝÚëõP(P,ç<êb¬teÒ„ïeÓ‰%Œˆ:|ŽÞ1*|ƒ/Ö¨ÃÈ0 Û½ø‰îC-j< ¨_(ÙÝG\Sô÷i™mÔËÝq9l°Fç òü80ìxE=‰¤¹‡~ʤPϸ¢ ýÿ~ýäŸ|á…ÙϽ y‘ïMð™f¨nкǚ¥0 Zº'Ø„l;} ¦9’aAðçåà¶v‰V^d|VÇiš°m;1Ê@–e}mw²Bð¢[Æ&ȶã8p'ˆpggu×?“d×ÙÃu]ضvÈ®( Ç¢(¹×w˜ùÉ4‡µßïãðð0öó›7oâæÍ›xì±Ç.­oÒJ Vj[—æ¦`Ò@Œ¶ƒIšt_?åÐýÐoŽBkMć٪7Ž% xÝo'î\uŒÂp“hûǶ,ÿÇ çÂÆåÅR«ÀÕ½ùs=$·’e¿1ýÙ™ïU[a–$`8¼´jªy’oÇqrïaUÕ ‘ <ÑÅ/6Qk EñÏáºþ³´²<ßr~t\q•®Ã˜æÈ •¤QEa"®ÈDZ”ÂÈ“ŒOömT*èºI’pvv–Z„ƒŠ.íœÇÊ0&e«VÛ¹Z›"ÛŽã ÛíEgv9WÏu9z6EÆã°,+¶S½^gcuK™j°v:‰?þ`0@§ÓYø¤ý~¶m'>wîÜA±X„ã8xî¹çfΣ]–•*&ô¾°0iFßûuøù €ïm3öè{úÂ(þöç>{dPzHn•ö¼ Pø¯GÛœ)ûEm]ŸMEßÁé†ç¸ù‚¼ž*€?ÿÖ‘Á̦ø+X¬gí%“Gù¦¾|yF–/Ä¡^Ÿß` ÷`µ¬Dy2Œ‹MdŠââÅ“Dq¶°`ÏóþZÍ q&æk;#ïjø>EoYÉ„ª.æ9ž“<Êx¥R –e¡^¯£ÝnÏä’$ žçí^…`׌|¡0ý`“d›rW©]™®ë;k¬þ«#îÑæ}ÀfÉx–eíl$Á®²‡õÉ'ŸŒ]•™•N§ƒÓÓÓØmý~N·nÝàçÆŒ¹ý×ÅÊZÚ„ T É SKÙ—B“žC Aà­­‘a˜BlÃ÷ö† Ï/~Ûèÿ€¸Iò®†I}ö"«ïc¸î7~º·á°OÛö•ïYòó6ÀXò)ßy/ÄaÛ¾BáºÉÉÕ«³ÀqÆ=¥ FFlWš {Ndy4ð´qU*þóÚíŽòQ“W©Œ…¢/cEU–e¿XSܘ.áïœGOÂ4MH’„f³9×ü.ILÓÌõs³l{Üó¿c9Ñ›$Û¶m2­i ÃÈýÂ亡ˆ¯0³–Ø6IÆã (‚¼G1«%“àƒƒãèèhbÛýû÷ø¹³Äþþ>úýþÚǵ²Â4a¯eÜáâÂÉÉÓF¼‘ ø}OaT¨å­ç³qNÈ‹ôÚãÀ?þÕÑïqcôb>7Måu:ôE§,øÄ'€ŸüÿöEñ—4”,ûÇtÝK Ó½ ò(ßyÏÃ#ΘÝ8âö®ÚIb?¦…“pÈ®eM±Q‡*šÆ¬¦éŸµš/Û$ߪšœ‡*Š~Ž,ýD¯_üÏ›Í鹬k&2ž„a …´ ‚0fìq¹«QvËÙ$Ù/²‹¢MÓvK^#T«£za¨~ã³I2‡i𻕦ÁÈaÖ´‡b­s¨`ÁÊ{q!ÀaÍó|òÛÝ)ïŒÇAŠÍ"‹5’$ížÁgœR5k&W² Lê-õz=5/{Û¡õqSòŽMà “7ƒòW™Íåó~åWð•¿ôKsí“;ƒ5íxðàAâ¶_|Ï>ûì¡Ê+ ™´12X%Lï{Jß±áªIû…#2Dxùeÿûi+à{bÃ×ö]¯ÞñL¼7<¦°aíº£·@$Ñó8Œÿö“þç÷¾Ê÷Ć=KI4›‹WY]ÓÓSܽ{7(ªr™,*ßðÒK/áðð01”'‰¼{Wß”$@úò?}(3…êš&à=ÿÑñ–31kðçþ­ß?®,ûáÄá>®$ëº>ŠUžçjZ¦9‡‡‡¸wï^&çÎjcQï*0jw“÷gg ×]<'Æ#hÑ&g¡wFwïÞÅÇ>ö±K=oó÷48,rD»í‹jø‘u]ÿ³Ö J„æï^¯wéçÎÓüE°Üo.§§§øø?ù'ø¿ÿûsí—;ƒ5†%­´ö»Þõ.<úè£ ·àqguV#ïdÜ3õð½RCŸÕ_¸(Ìë£ó¥MÆú¿W8¾êpÞMï%©W$à+ÿÕ*”¿s ¤äüÛœppp€'žx"ÏÉ¢ò ׯ_Çññ1æ:çÊr³×)â+Máêäùœ‚®ÎÇß9®¥Äº´úÞ~Ý·Ž?‚à/´Då»ÙUä®V7*,òøø?üp&çÎj²ŒwªyvÆ š‹FTDóU©wÎ8::ÂOª+³(áv»€mãåw¿{îݦzX¯]»†[·na?ø)—ËŸÑϲ‹E”Ëå±°ꟷ.Rû–Å_™ÆXÆ 8ñF©‹ñv66Æ« [þà¥ñ¥CIþäOâ{½†¡ÊÂa~é›üã[°·”J¾'iNãÆq¼éM€ L´‰\†±]Õý²’ïÔI]ÏtéỷJnP µxRío}n4&>!,¶ ýSÿÝ\©±Ì|d!ãq¬¢åFyWßXP”ùr¿ÃD•&Qdƒ5D^dXqTØ* Šnß^ëT@}­éÞOlŒ‡ÕugŸý¯—?ÐäIÆ‘œ7›M´Z-hš–¯p`ËZ|¾%jµÝª æy ¬S=¬Åb1¶’Ø:9::Âáá!:ƒ …üñµ/Ñ»ªë‹•ô¯/Ùy]Ó¼¨acV‚ߎ&¼í?:þýz}Ô~&aqÆó÷ï‡TœT„‚,I~a*ªT­†JþbT•Éǧ[G7VYð×uýcnÓªèeËwbõk*,Dm’2BGÍÞþ¶ú{O™ÄE¨ÿå óÞkãÂù¨=Z«ºˆú×| xã;'Rõ˜ÕrÙ2ŶíÝ-ÊA¡ôq •ÌÒd-Û€?§¯bAf!HA‹^ÈpR £ôßû!ˆÿɶ16ë…§2;dœGH’´ÚEDÛöçÈeŽéºË-ÔPF#!…zÒ¯cnˆÖúXEÁy§¼ðÂ\»Í¼Êårlá›ýý}<ýôÓAÂy¹\^ë8b Vò:-z•¹€³‹ÿÛˆÏ7maÜ£*`Tt‰ Þ¤‡bJ$Aõ×þ{t!LFÿŠ_’Ô4ÿÇq.*Ùxñ/¥ð0c ¬’°í¼iäA¾]×—oÏóËå*Jæ«a{YüÚÏ_ HZý¸Hzu\’(Býï"‚JÛLÞ¯½ù¯ÃþL/ykȃŒÇ±òê¾4'.b¼áJíÓP”ÅÂù”m×ualÛF=«VpŽ3¹˜ƒÖGÑÛdÀßû‡Ðˆ÷~.É×T©çç™^!ËÀ×}Ý<ºÖóäQÆ£¸®»¾ÅGòŒ.*»Ž3Ò™—ƒ¢¬Ny6Mî_uØ4õº÷¼ÌÚPÎ\t©×ëAÓ´@€i¥#ú³* …Êåò¥<(± ½eÅ+'áÜ$¢»Eí¸Å‰¸|Òpx°„ɘÌk¥‰—+ͯú§“çÓlèJ’í­ÖÔ•”p yY^mdé&V÷›•Ë’o]×ãË òÝ’B¹«Õå ŽÎ„ëÂþàøç·m_öãsaœÂM2^*¡vë5@áŠr²ì) Ðn£ýéG€vÂ/É2ríÒ8;{$Óó_æe¥i¨å‘mgò85e#¬4%…›¦ŸþQ©W®W¯úÿŸå½Æd%Û•J’$áìì,»‚p¿j‚^Ö~4Âí±­Ö0h¡'¾}¼³(®.ÿh“*<%…Œ»®;Ñ‚o­E#5m¹p^Iò·eÞ ®‹êýÿ¯ßé`PWƒUËŸ$ggþ3’Ñ{p&ƒÕ¶mܼy½^…B!øüèè·oßF¹\F¡PÀsÏ=—ÉE,ËDheÁùš¿ÿ÷&á6Ÿ;êa²*éÜÆ 2¾—5Ì›ÿxâÉÑ~qBRÇȰ¥Þ ŸÂieǘ|—ÈHo…³ « ¹Eßf&Ç3¶mÃó¼xÅ&”äO‘ïÁç†ÃðõÓ\³ÑZ©Àø_ñŸµ‹ÆÔwÓEEj´Z@»|×4gïHÒÆë3ñüóß‘õ2ž•ä9™¦oÐQ¿Ý¬&"×õÏ´²mYãj´ª»mûµ 'xv0ú¥v;³sfv¨:ªšµ×œÎÃnÜbú%VáÂÿWÿöC¾|Ë2ÄÏÿžP‘~Ï·eùsF¥2®³ÍQ£ú—^Á£¿ökÙÜÈ 1 zhñlíaï«Zdiµß·Ù„ôþkÀþèjÆB-(×Q¨Cüg)Ü£þ™É`½sçdYÆ3Ï<3V)l8>>Æ7ðä“OfrËðOÿéÏàÚµ[ã¶ eÁzË__t–¤‹•vø!¼QÅ×Áxî)à×ÃdoÓ06&+ÿÇ_>ùÇ£bLq-Ú74Ðná á®;æx]äxßžu`ËcרÕjñUR©:…(Âóü¿_ ‰"¼Þ'`šþ¼Ônûr´–ÜÃÊóŸørs‘ÑX»ƒÂ˜5Í4QDóä |}#íÝCĪWQwaÕ¼ç=ÿ{ÖCÈ„•yWiµ†æTU½¼•Çñ•*Ã7@ÃÛèûÑ7¡ÉXüë VMÌÆ‘›66­–/[á —òï2ò,FßOáÎÁ»MU!?^Lÿ‚/‡ RIQ|O”¦ùï+z–ãžQÛö¿Štð¾ö[àýÛWßð†K¿Yã8ìмv)-ùH§_öK i€ðçߺšë¡çsÞk"9öü’džÅ×õßa{{Ky@¦¬”týÄO¤~ï‰'žÀÉÉ úýþƒ¹L\¸uë5üÝ¿ûUØß?B­æßKëäÀqP?yG|­!¾ç4ªÈÇÍu&ÒÛÙàâXá‰Ôó€ßzø¶ÿb”ÿ7‘†/d–å+$¡Ù˜BãÑÇ«Vª£6´ ¬Î¸©V/JÒ§×}bbÐuТĝNšfpC-ktok5À† ûÿú“ ›… ø©4W«£w00*´•2øÏ=χ¦Š‹°­ Á¶ý—JþÌ~±C´ÈFš.=æ2:;=nÑˤN:yáÚµ{Y!Vªä„ƒÕååè åÅAÉû¦é¿˜ÆV•0ÊQ%E€Bü“¸Xàa6—ܣśð3FÕ¤/<ý—]Ü(\¸~''SøS”)ª.¬PëzÄ) (¾ñ@FS¬\7xâüÜß.ËpþîO@úÀ üÛ/ÿò˽‰ã8DQ„,˰.^°c°.5"YY$Û¢•âuÝ—Oê’F³éøÊ•ôóxÞøƒÑn/UWaªÁÚëõP,'z0Q0AÛïß¿¿ð`. ×õÿ&/¼ðCø‰Ÿx?üÃoE»í/Š)ôûþVáàçªF¿×¶F€o°–"Ÿ‡µýhˆ®aãO[o}7ÓÒÃÜnM†–5¹JžüU*Õíöøï«\W ÞÆ<ÖubY´ÈM£÷#€@ÆÃí¿4í–ýª{PäxEºÕòHÇ—5J‹›»(ù?i¡EQàA)7¡~c»PˆËÙÙØÊ…aŒŸ/mÞ ·<&åv¢5BöÿZ¢Á²€ÁàíY#Væ‰ZE5D‚Vzöö|Á%a¤'ª Ð*…ïÆM´”[ LÌÿÌv‘U Z4žÊ…µh_© òÃÕêlΚU-þŒÔ«èøS¯'MѾpÿÝ ”åÈb¦$ùÆh\¤å<…;2È2ìÿÎ\[ ‡¼_ŠÁJ‚@s鼌)T‹CSœ¬.ÅÙÙ|߯×ýŸn׿¶½=_¶u}r…^Gi%Ñy‰r©^EÓ–^4Z%¸P(àÁƒŸ¯ðÎ^.þê›ïùQ”¿=¾1bý[–ÿ„¨KðK¯_Ÿpp“Å“$ø^Ö¨•rñ&Žq§Ùí:ü0b×;HXñ{Wƒa\ä‡W«‹ë7¤ø'í»Êôš°ŽF·$ Ìy‡&þhÍ954¡¹£®4‡Ðûº”z³Ã‡ ÿ½mÛŸ| ãBÖ,cÂë`””Üíoø}MJ**ÐGèzza¼U(T›ƒ;Ïó¯·ÛÙô,1 à«¿úÕl‘ 篚¦/´ó*G¤aD“©£óz½/´ú‘vÞ$aZuH&·Ðœ~™Ðòïÿ þ¾áG!(F}ÑûRzý‡!_ù×AwJË {fÉKJ¿·Z#çM˜¸º’¸¿õgÃýëÒŸ†z?ƽ0êõÉg²ÛšMx­6ìŠã¿ïoþ=ÿ…§(ëVËxÊ’|Û ¦´Ð¨( t]_801¯7­’Ð5|ã_Cõ¯þÙ\¶¯ý~ò·¾þâšû§,–°l?Ø8huh¥†öYVŠa&ƒuFƒA|IðÁ`€F£r¹ŒýýýÕÞ°%1Íñ‚V¦ üõ¿þÉÙ+í¹€ö†HÄÖç}-ð¦_ôÿïs³l¬†'°‹ŠòÿôÞñ‰—*9zIEµ Èêü“_8Ì3Žz}±Ê»–5^ùÝuãüK˜—htE´HàOâ‹ñËSná:ñ[­ùÃV¨«)ûq+”ï=¯½S«/ P"z˜fÓ¿µó(ôT gWª ›¦9é]µ,Xsâï8±Î"“¶âašcñ]c1d9P.HQI’ï´¿YœìÉòì²î<+”¾MI¡õ³æoQáãEsµÃág4½ìªÁªë:dYžn°RÒ5…,¢Å­ç¡‚+³u¬Ë»J‹~•Êä\¿$ h©6Úµ9š[¨ÂbTéM[𠇺$„Ä„/UUºæAôFÇ„‘×—Šïs¹mƒVBÝÿEˆoù½É1ˆ"ÜoükÐ…fv¤ëñ^äðýPU=ŠœGª:*Šž·© ^“‹ûQC7¼`¿‹‘þ–eé-T|imQq+ÞIßKZÕ‰¬_þNˆÿþCA²©Ø6œ×ß“wå½ç°þÅ•ÔÝÌïÿ¼¯ý–1aê›~ƽ÷Oœ#(ÌJá ÀÈHذ»™ Öýý}<õÔS(—Ëh48<< ~NNN Ë2žzê©ÜyWQ;: QU_ÿõoðWnhÆH›!êT@{0¾/tЋÒú#@|ýE§›7ý"`\ÌþѰ°;/ŒzúI GªœÂ}QɱZuî ÂÊ{©4ªµ±,ó–4¦f3݉Q“=wѨŒè1âZ@¥ÙMTôŒÂºÃŽqE:¦ו Z´™e‘Ä0ücÒŸLUg™Õ´qûiCʨwßÔ4rë/Y­a.Ã0ÖV8lxEÖØú(ѰyÊõŒZyik´šõDÁ‚²_«Uõ#¯¯¢„æ:Êó¸(Œéý؇ —'믠݆ð¡ƒ[©¡*vÕFmihq üT(9©–Œ,TK è {ºTŸ§“lÿ]€êD½©ív{6ƒÕqæ÷HÌò2§Ð§^¦¶ ˜½oÛ ´áPÞÛÇþÿΆÃá7¼:l~åÿ1Tßñ³þIg@ýÏÏýaÝîp¨ª“ßWUÈáßëõÉï…5 ’4ž]\¢6 Âè÷©·D‹]ÝŠvÛ¿^M‹ÿ®$ù×F–“ÿ4ák§ûGçð}Åäc¼ûÝ¿?ü¯þ«Ÿ™ïfeÌ>ðÔíÝnw¨(Êp8ôe¥ûøÝ@ÆIÞ–"ü‡œõ¹Y!$—ççɧÊÙÙÙ¤láçÉ¿þ¾IßO“§(ççþ÷Óž¥³³øcv»ãÓH»Þ¤?ý­§ÉKÞˆ›Ãg¡Ýn%IžOû㜟'OBÌÆ±¨¼dżϣªªCEQ¦Ëu¢89É]}_6›þ;7áë>š6©ßHÒ¤ÂÒíúÊVQ«ÙL~&ëõqå Ýö øƒ]õzüÝn§¿ ¡Ûõ‡=ëësWæoMÓ†õ$ErÂÊqXÏÏý§“‹bðŽ¦¯[­ñ?¸$%?W”‹]/ö©ÕSCõ›~còï¬(þ,Ï¿èk‡­ú¤ðuëÂ'ËÀDåFÓüK=ÿ©3¯ÿјÙ¯¸VËŽÒdúüÜoøY[D^æ6XóJÜÅkÚøÄ\¯‡"i‘Ó´Éáp8 +ùgÃáÙ7 ‡gw†ÃáżÝlý|ö‹Ã¡|6”æPhçVÕÉH”óóñ}ÈP ?‹IÆn­ÖèZè~Eß)çç“ïz`’ìsŸ¦ÆgL7›ãcV”‘]”v/ÂeÔ€Ñ4ÿèˆ~¯×‡Ã÷¾÷Ó¥ì ‡Ó_`š¦ [‚$ÑoÏÿÛÁ™u"šmdyµoð5A²@ BÒ»ðâ½1”e_^Å—GÚ¿ÙÝ·³³ùu¤VËß'lh‡ÇBç‹rv6ZH¢ñ…Ÿ‘¤¿e·ëŸæ’]QxDQž-%àÌ&²Í«¦iCuÞ—y²@c^¤ÑwîÌÄ­"«j¼†}Oœ&Ä0Ñûè>,Œæ…]™¿gZlL#,㤔žŸ¥‡~cxö=?<”¾æOÆÅñÂmµFï_M•ÿì¥áÙ·ÿ7þñÈB A6äðìl8TÕ¡ôŽûŽˆ°ªú?Ýn ‡ú޲‹÷öÙÙ°ù?üöL‹!aÝ îõÖ펫ð¥EÁÏÎÒõ¿v{\/¡ãN#nüq:VtmŠ ÖÈÅGç»V«å¿¢îÊ8ZÃá0:©KÃá°;/ ¦ÖûŸò%õÛÿ`8¼PŒÉ^ð™ö,Ò8§­QÈ‹< +¢^7PëõÉwR³é{cÓÊ­ÖôÕžèØÛ¡Å¥y ߸{NïVQÛíîF);Ãáô˜Z.?ûÅáðü|bÑpnüUžéÏËBú--r dyô2#»Ÿi2NûEŸ/ I"üîÞ…§Ùl.§Ø3˶¬çç磹|QhA^Ó&&®VkN}BšâÆ4«QIÊ@ÜËH’æ ÛxþžéãJæÅ‹µý%Ö:Ãá0&ÚNQbW±[­ óý¯ [ÿÿöXeüÐggà |ðÜõ†ªêëÁiºA·;¾NFæ4½,i-háÈGZ·"=¢^÷§ÇQ'£5iyPÛíqý<.@•î5ݤűnw8,?3ü›ó§æº¶­6X£(Š2ü·?öcãïÙp8Œ6y8F±úÅ‘ qáH‚hä1…F¡?ô¼´ZþϪ"§=,ò;ïyÉ;šfœ·Û‹­O[I:;󣉺ÝÍSv†ÃôX°3Ûßù¿ Õ¯Ÿ¬&<Ó2+"Òd5)ìyZ8Yø˜Û®ðbŸ¸:?m¢Ù!V±ÞN¹Ìqм=÷¦Íá³>Ífs¨-×Ò»?ûr œND鞟ë0I«r´¿ƒ‹–Y²íó÷p¸‚蘸¨¯‡jZ8:22ÊÂVit»›¥"Qdý¬ªÙ,ô3í¾„£+ÓÒ¨~ò'mX¯Ï7§|Ö%¤Éfµ– ºžçÁq|ƒ(Ž„1˜ðÛÑPÎwЂ#rP@ íž°Üj’ð©’\¥âÿNY’ö›—uùª×Ó+®Ríy„ÑO³4cÚ>¢ ‡þÿ§´Þ¨Ja¸•ý¯þÒÿû+ø"ºH‹±.L³rHöÓž¤mÓj ­« @1 Š¢L¶û Ê&²¼qU ª#2ë;ªŒÆÉ‡çùÇ¢9{‘î(ÔÃyù¢1JÒ|µ>DÑ/^³+²mÚíöÂûÛ?ðaXÒÿ {¤DqT(PŒl{T)•ZGËÝ‚ÀïfåβPëσ÷ôÏÃí¿#èÕJó‹(5'Ä•ê ‘ø§Í?q-’ÒØ´‡‹\ß<×xÑ섚¡$¾oŠÅ×Ðï¿6ר·Ú` \AõÔð„ì°´.þ%!7Cÿ#˜á=}þêuÿ!I’¼ ½ ¬oL³´îØådYLÓ/(÷Ê+ŸƒfÓWÎR êcYŒaև뺰, Ýp…w²ÌlÛSæmRª(*IþåL›/u}´0zÑârâxÍæ¨Ÿå"¶)-Täu£— Ïyp]×.`Û6$IZªÿ¤ýåO@>( ùÄlt.£h†¹d À6o7 èîQ:{â—~3¼‹†´Þ’4¿qױ˃ZO­š­5Xm{Üè1Ms\¹|ïª ßsJ¸ð׿ÔSÌ?,̺e¿ð £>GžÇ?³ÝÔj5Ôëõ‘wÕ²FMi7¼‘¡aø—¡i~û‹4Ï&©õúdë&BG?•Êbý‡ÉHuœÅŒ^Óô¯ƒy4GyÞ¨3J\K§ju´ö`Yþw)úçÞ½7góZ#mÉÂÐò¼Q;Ë‚û#ÿ濼Iòïqýƒ'Ÿ`^÷ ìjC&SótêYŲüù\‚^¶ŠªªŠ®·ØÂ³¹ÌÔ‡u ÷¸¢Ðƒ‰Ð1ãžT ^±±Ê0ëÂ4q~þHðÙÑd “{ À £Þ”•Š¯Ø´ÛÓc¦/e|ñý)Ì·Zõûé%õÒö<ÿ2azÏkÓy`)ÕÂqFm0)y*ÓÚ^Æ%|-ªß÷šúœÓu†±m;]‡ãŒzi×j#û+zÏ¢áÃá–µÐïoWm×uá8ÎdïIjÀH—8Oõ༺t»°?z=5ÕˆaòD­VƒçyhµZþCN“(Ž7h§F¶‡iúsÍ3l¬î[ëauœÑ‹Î²¬øÆÛQž  ‹ÉÜU†É!.gÛ6Þÿþ7á€<‘3ÛŠã8°,k<ǯ^¿ÔðFjÎv„Q^={a›£Vm§\BʧF^Rêåö”Òq(íPQF—êyþ~q9êÕª¯ë™æ¸×RUýÏ¢6Í%a<Ï»¦®‹¼¢áïSÓxzÅ’]%I#3&Ë#£>¼_øOhšþwÂfJS)•üßûÞs¼ðÂ¥ýÙ×Nlˆ¤wáF ý‘Çÿû¶Z8½”Ù jÆi“&¹ oiºX5Å\¬E2³•k8$Òó<ض¦ëŽ¿¥]ŒŠ,*ü¼VVø™ À4ϳ¡( ¤Ïùœö§pœ¯‰ ­c˜m`"X‹±jÛ#I×}Ã°Õ GíøŸQn¨iúÛa2çTQâÃuGï¨ñßã ç}§Vó Á¸uX2Š+ÿØaƒV–ý})ïUýÿÛöx±@×õ¢8}RQF!¾’4©H¶Z£Ðá°±FÓFF5Aá”—r®Œí§ëþ9?ö±•ÿÙ3öm8Ž3RäC7Å–48aâ”ß–E†Ù,Ë‚ëºS Š‘ŒSÎ;/À3ÄV¬aïªaøþoùàå—G_0à¦Ñ›ò¿&+øÄ'xúéÿb­ãw~ÞoòJ$³Ôj5ü?¿ø‹!¯QS·mß³ŒŒÌz}ÜUÿǶGa½ív²b5-ep–*Œ”ƒJÔë¾a—ä]£"Lqt»þÜAãÎ{{£ë»ªW¯ŽÆžtMiÈ2p~>ù¹aøãNºŸ¢¸žbYcþ?ïï\í¡?¶(ŽÂ¨Ç¿7l¬2›„çyÐuÝ7V½øäSÊQ5ŒTÇ+³ÃäÞ`íõz( (‹3ï3 ™ò`YþÁ7}Ó¸ô»˜4V&‘oâßÿû?Ä7~ã ^¸hÔÿ®ÀŠ “;–‘qÂ0 üç¿üËøîÏùœD…g‹š”;I¹—š6i¨ÅŽR¬² Γ%iájZK²n7½p› øÆæ:<›¸Ø¶¬|›¦‰o.ðžïû>ÿs!˜aÛUÓf«®Ï0«fU󷪪g®¡Vóå}ƒ ¼3k&·k¿ß‡¦ièõzüjb-}O^Ⱥn ùÎÿoø?ŸþÇК‹*1³Œ|¾wç­oíú%/\-*Ý`rIJ2NX–…/ùÀ_þ†o~ôGg¶”ÂEUEÑ_³ ·h óÀ¡ôLë“MßÙuV!ßžçáµøñÿzÝ뀟ú©@(ÉÓÏô˜¬XÕümÛ6lÛFWQüÉ÷B¨)j€RXÖ™iä¶Jð;wP,á8ž{î9ôz=4™÷éç^Âûî¾òŸ€w?êT*Áf˜ŒYV¾ÿÅ¿ø}üÞïý_aT…arIJ2~î9ìÿ—ÿ%¾å¯ü|Î?˜j)yÞHÑßÛuJåÑãA^Ѱ' R²±ÊÌÃ*äÛ)•ðMÅ">ûßý;¸‚Ȭ$¥‡™3̺Y…|{ž‡Æ­[øùW_õc~»Ý`¢¥v\<ï2³’Kƒµßï£ÓéàÖ­[€B¡€ƒƒƒ©ÉÚðÆ¿ô9xøú¿®ÿ ðÿ©_ý· .¨ÄdÎÒò àç~î%üÓoüL¨\$Ãä‡Uȸçy¸÷ß·×wá­?üÃ1Ûý{ éuÝQþi·ë‡–Qx/+þÌ*Y…|¸\|M§ÃP©Œät/7ì‹UÍßO}ÕWáŸ}Ögáµïÿ!èB¥Š´Åª×¹%03¹4Xïß¿Øßß>ÛßßG¿ßŸiÿ_ï?ƒÁënàmý-£Òȼ«Yy§×ëáôô4ëadβò Ÿÿ‰OàÀs|Í|C>¶Q¶í™]”eeÜó<<÷¥_Нþê¯Æ;NNŒúË“ÂC¡¾”ÛG^©¤Ê´Y°mò°Ïì",+ß¿ý]ß…_þÍ?Åý¿ÿ£RñÃ#»ÝÍ ”ÙFYضçuQ–ÖQ<N©„ÏýœoÇ|Ç=”þÛ÷ðççM’ñm“‡Mfs™ÃšöP  …‰Ï?ýéOã#ùž|òI<üðÃè:>õÈ#@§“õå,ÌóÏ?7nd=Œ•qïÞ=Ü»wo©äýUð©O} ¿ök¿†OúÓ™œù¦q?ùä“xûÛߎãÿõSøÕ‡îíyX%yyfïÝ»‡üã™Ù9üµŸýmü<êxÝýEÜ»ñ2>þñ7â ¿ð3xøáWðÞ÷ž£ÓyðÕ_ïG /ò°*òò̾øâ‹øèG?Š?üÃ?Ìäü‹Èwxþþþî_ÅË_ø xäg_ÆûßÿÞýî—s-ÇqäEVI^žWš¿ûý~&÷wÙùûÕ{„xÿçðußþ9xøM¿‹fóoyËŸlœŒçEVE^žYÒÁç¿si°ƒÄm<ˆ}X^÷º×áüüÏ=÷Ühÿ ï*~õêU¼°á׿Áƒù5}üãÇïüÎïàó>ïó29ÿ"ò ozÓ›ðÜsÏáï|'Ü/ü—ï¼ÈÃ*ÉË3ë8þìÏþ,³ó/;‡ÑŸÿ|Îû>=ôþÂ_x ׯ2tìÍý¼ÈêÈË3û±} Ÿþô§ñå_þ噜ùÏßßõ·¾À¿ ‡›#ÏÑë²—…U’—ç•æïO|â™ËÎß’$áøŸ ¶]ÔmÚ8ò"«"/Ï,éàóÎß¹4XÃaQ’Þoû¶o÷}Û·e=t†™Ê"ò O>ùdÖCg˜™à9œÙf‘ož¿™Mço&ä2‡õÚµkÆÃ² `˜UÃòÍl;,ãÌ6ÃòÍl3,ßLÉ¥ÁZ,Q.—Ç’ƒmÛF¥RÉzh ³4,ß̶Ã2Îl3,ßÌ6ÃòÍä‘+Ãáp˜õ âèõz8<õ©ÄmÊ€k×®½é÷û(‹©Û¦í».Ò …ÂÒcÎ⚘ÅYF¾W±}¤Éø}ÑmÜõ0Ë‘&ãÛ&ß<‡ï»6O“ï<^³<çÿšæ… Ö5qãÆ t:À¶müb±ˆr¹ŒÓÓÓ±í•J%uÛ´}×ÅþþþD íF£¬Ì,;æ,®‰Yœeä{Û×AšŒ¿ç=ïÙ¸ëa–#MÆ·M¾yß-vmþž&ßy¼f9xþÎÿ5ÍË•áp8ÌzÛÊ;wpzzŠr¹Œû÷ï%H~Lúáá!ŠÅbÔ|||ŒB¡ºmÚ¾ë‚ÎI«Tó\Ï*¶3ùbù^Åöu&ã›x=Ìr¤Éø&ÊÏá ±kó÷´1åñz˜åàù;ÿ×4l°®™pÉô¸<‰Á`¬E·§m›eû:XvLy¼&fq–‘ïUl_ë|&Y¾74ßDyà9œ!vmþž¶=×Ã,Ïßù¿¦Yaƒ•a†a†a†É%œÃÊ0 Ã0 Ã0 Ãä6X†a†a†a˜\Â+Ã0 Ã0 Ã0 “KØ`e†a†a†ar ¬ Ã0 Ã0 Ã0L.aƒ•a†a†a†É%l°2 Ã0 Ã0 Ã0¹„ V†a†a†a&—°ÁÊ0 Ã0 Ã0 Ãä6X†a†a†a˜\Â+Ã0 Ã0 Ã0 “KØ`e†a†a†ar ¬ Ã0 Ã0 Ã0L.aƒ•a†a†a†É%l°2 Ã0 Ã0 Ã0¹„ V†a†a†a&—°Áš3<σmÛYƒaÖË8³Ë¬Cþù™b²€åŽÙXÖ³‡ Öœá8*•JÖÃ`˜µÁ2Îì2ë~¦˜,Ëišp]7ë!1ÌZˆÎ±,ï—¬ Ã0 Ã0ÌÂX–Å <³3°¼_>Ÿ•õ˜ÑJ $)q»(Še¶mCUU€mÛA˜‚¢(cûëºMÓ`š&<σ$IPeì|š¦-ü}†™•4÷³, Ëò„ŒÒgSeH–³³³!€±í¢(eY¶u»Ý‰ã†Íf3ø]’¤¡ªªÁ eY^øû 3i2Þn·‡áièüü<ï¨üw»Ý!€áùùùp8dùfòOšü/+ûš¦ûÒ±ÎÏÏ'äšaVÁ´¹\–åa»Ý¶Ee”>ã9›É;Ód}8L—÷´¹¾Ër=?ìaÍ×u!Ë2DQ >£P_À_•‰nW€¿â#ŠbP¹,ü;­ÒÛW„ $!‰y¿Ï0iL“qú¼V«Á¶m‚€v» Q!Š"$I " ,Ë « ï°|3ù#Mþ—•ýp¸0A«ôôž`˜U1m.#NFyÎfòÎ"²Œä=mn'X®ç‡ Ö ¡¼Ô$<ÏKÝ—ò”Â?q/†ÉŠi2.Š"Úí6 Z­âÊ•+¨ÕjÁvEQÆ”v–of“H“ÿuȾ$IÐ4mì8 ³ ¦Íå ³-,+ëÓævf1Ø`ÍQǼ¡À¸‘*B¢Ñ*IR°jC?­V š¦qÞ“¦É8h6›8??G·Û…eYA.¶¢(p†aÀó<ö1Ešü¯Cö›ÍfP¬ƒs ˜U2m.g˜maYYŸ6·3‹Ák†(ŠÛ¶ƒUtò˜†·;Ž$nSEIÀ=ÿŒ’¾&/L“q*$F/Q'B~Ea3…ä0LžH“ÿuɾ AµJn»À¬Šis9}Æ0›Î,²NŸÇ1mngƒ Ö ¡ð­jµŠR©„½½½±°/QÑl6Q©TP©TP*•‚í¤”èºŽR©TnµZY_ÃL“qÊËÛÛÛC¥RÁÞÞ^ ¨´8ÃÞUfÓH“ÿuʾ¢(P…0™•1m.º®³‰Ùx¦É:.ï³ÌíÌü\‡Ã¬±ë¸®›Ø£Ïqˆ¢ÄÔSŸ&2LÃE–(L˜aòF’ŒE·Ý4M†³³³¬/ƒa"MþYö™Mb}…s]™m mÞžEÞÓævf~Ø`Í1®ëbooÝn’$Áu]T*hšÆá‘ÌÖCá4¥R ªªr#mfg`Ùg†a˜™†÷z=ôz½Ôíý~?Ë!fJ8$øÊ•+(•JP…Õ å{9ÇÁÕ«W!Š"Ë|Na_,ûÙòÍl;,ãÌ&‘‰‡µ×ëAÓ´àA(‹0 ûûû€~¿MÓ‚I–娄g†É#,ß̶Ã2Îl+,Û̶Ã2Îl"™¬ßüÍß Y–qûöm 4 t:<óÌ3€ÃÃC¼å-oa ¸yó&*• ŽŽŽÙï÷ñáoûÛ³»›+æÅ_Ä»Þõ®¬‡±2^}õU¼ú꫹ø½úê«xÝë^‡÷¼ç=+?ö:ä~æg~o{ÛÛ²¾u+#Oò°*òô̾øâ‹k+òÀsøläIVAžžÙW_}…Baå¹a<ÏFždaUäéy}ñÅñ­ßú­( +?6Ïß³‘'yXyzf™¿?ë²Ùét0 Á/ ¸uëNOOÑëõP(ÐétðÔSOÛpzzšú°ü»÷ï`YÖV%6?ûì³xôÑG³ÆÊøøÇ?ŽßýÝßÍÅßèãÿ8»ƒ•çðÙÉ“<¬‚<=³ÿøÇ`¥cáù{vò$ «"OÏë³Ï>‹¯ÿú¯_¹ÁÊó÷ìäIVAžžÙEæïK7X÷÷÷q||<öÞ¿€ÿ`Ðÿ)4þ?-Žþmo{$IšúÒØ4¶éz(_âàà ë¡ Óéà…^Xùq×%ßð_ð,@^þFi¹IËÀsø|lÓõäé™]ÇÎó÷ìäIVI^þF½^ÅbqåÇåù{>¶ézòôÌ.2_zÑ¥B¡€r¹üÞï÷Ñh4ppp€b±˜úP ƒÄm/¾ø"ž}öY^ö%­mzPÒËÃrzzŠ»wïí€Vɺä^zé%âôô4»›·Bò"«$/Ïìáá!îÝ»·–có>;y‘‡U‘—g¶ÑhàîÝ»øØÇ>¶Òãòü=;y‘…U’—ç•æïu,:òü=;y‘‡U‘—g–tðyçï̪SÜüc=†r¹ŒÛ·oŸ'ñàÁƒÄmïz×»ðè£âøø8«Kb6„ƒƒ<ñÄk ‹Xµ|Àõë×q||œ‹ ‡É7ÇÇÇxøá‡×zžÃ™¬8::ÂOdYF§ÓÛV.—Q,Q.—Ç*’Ù¶J¥’õ½b˜©°|3ÛË8³­°l3ÛË8³©\ºÁJIäq‰ÙTçè臇‡AùíB¡€Ç<ã[Å0Óaùf¶–qf[aÙf¶–qfS¹tƒõèèhjå­ýý}<ýôÓÁƒ®hÆ0y†å›ÙvXÆ™m…e›ÙvXÆ™M%“¢K³-½Í0ÛË7³í°Œ3Û Ë6³í°Œ3y#³¶6 Ã0 Ã0 Ã0 “¬ Ã0 Ã0 Ã0L.aƒ•a†a†a†É%l°2 Ã0 Ã0 Ã0¹„ V†a†a†a&—°ÁÊ0 Ã0 Ã0 Ãä6X†a†a†a˜\Â+Ã0 Ã0 Ã0 “KØ`e†a†a†ar ¬ Ã0 Ã0 Ã0L.aƒ•a†a†a†É%ŸµÈNFN½^·nÝB±XD¿ßÇÑÑQÖ×Ã0 Ã0 Ã0 Ãl s{XNOOQ.—Q,…B§§§8<<Ìúz†a†a†a˜-a.k¿ßÇÉÉ ŽQ.—Ñëõ²,£X,âæÍ›èõzØßßÏúº†a†a†a˜ g.ëýû÷årybÛþþ>ö÷÷1 ²¾&†a†a†af ˜Ë`- k”ƒÀãÊ0 Ã0 Ã0 Ã0Ë2—ÁJ^ÔÃÃÃ1ã´ßïãÎ;(‹±ÞW†aærù½ßû½¬‡À0 Ã0 ³4s]:>>F¡PÀÍ›7Ñétprr‚Ç{ ý~†ad}= Ã0;mÛø¹Ÿû¹¬‡Á0 Ã0 ³4s·µ) 8>>F¯× Bƒ …Zb6×uá8>ô¡!î sÙèºI’ Ë2AXËñßóž÷d}™ Ã0 Ã0K3—ÁÚétpxxˆçž{Ž T&3LÓ„mÛhµZ‰ßq]®ë¶m¸® Ïóàº.DQ„$Ixøá‡ñGôGY_ ³ƒ˜¦ Çq‚ÿ{ž‡v»½2ÃU×uȲÌð†a†Ù æÎa- °m{eèt:YßfCp]•J¶mCEèº>¶ÝqÔj5ìíí¡V«ßÓ4 ívgggh·Û¨×ëxï{ß‹·½ímk3Ë7Æó<†f³‰z½Žv» UUQ©Tày^ð=Zd™Çq`Y4M»´kbg¶–mfÛag6…¹<¬…BGGGh4è÷û±ß9::šùxý~‡‡‡·h4899û¬\.ãøø8ëû• H‘]G(aÞ œI4Mƒ¢(ð<¥R š¦Axž‡jµŠz½Žf³™õ°°|3“†EQ Šb𙪪p]º®CÓ4†˲ Ërjã8A´AøxÍfóÒæ–qf[aÙf¶–qf“˜;‡õÎ;0!ÌÄ,k¿ßG¯×K<Æýû÷qppY–ƒÏ8ßp·ª×ëYe¥„=÷¤ŒSž_4dRhš]×Ñl6Q©Tc6kX¾™8\×…eYèv»Ûêõ:jµZ ÇÍfÕj†aÞRÇq ë:Ç$IŠ¢d²@Ã2Îl+,Û̶Ã2Îl"s¬Ñ•˜Eèt:©aÅ<@¹\æ9)¸®›õVyGEQ ŒRAÐn·Çûlì÷m,RŒå2ÇF¡’$^#*–¥Z­¢Ùl.ì•eyîÈÓÓSܽ{w%Ñ‹2¯|ÀK/½´–g™×uQ«ÕP­Váº.EÁÙÙšÍæX{½^+®4/’$¡ÝnÏ]Eøðð÷îÝËôñά‹F£»wïâcûX&ççù›Y'4÷z½ÌÆÀó7³.NOOñýßÿýøÈG>2×~s{X)ûàà·nÝà ô;wÐëõpûöí¥.¤ßïãÖ­[xüñǃ˜ùb±ˆF£ƒƒƒÄýÞõ®wáÑG«èÓ¦âº.$I rÙ.Ê+;/yKIy'4Mƒ(ŠA!²,(Šré×ppp€b±ˆ^xáRÏ ,.ßpýúu.z°B¨ õ楰òiòHE‘TUºX² Ù^äÇÇÇ™)<‡3ëæèè7nܸô9œçofQ\×…iš3é<4gÑ>’çofÝü¹?÷çà8¾ã;¾c®ýæò°öû}œœœÀ0 ܾ}GGG8::ÂíÛ·q||ŒÓÓÓÄêÁ³R,qtt4–à}pp€Á`Àå·/E¢(^ª‡0ÜR¦Z­NœÛ²¬ Jo\K EQ&<¬¶mç"ï2aùÎá¼iMÓpvvI’`öööRó{LÓ\Øcº °Œ3Û Ë63/žçA×uT*‚€jµ ]×s)Ç2άÓ4Q­VqçÎ|Ù—}Ù\ûÎe°Þ¿ƼdD¹\Æþþ~ðEét:¡<ÀUÊß+$dY^k«çy°mº®“­ªªhµZhµZÑjÛ6*•J ÄÇÉA^a¶í\Tõ½LX¾óµ— {TÃòm*•ÊÄ3æºîXq0f–qf[aÙf’¨Õj(•Jc†¨çy¨T*€³³3hšT‰öÞÎ ,ãLŽã R©Ì½Øbš&*• ®^½8½ºÝ.~øá¹Ç°PÑ¥Á`0!¸ƒÁ`iï*à? Aá'''c¿ï2žç­ÕÃjÛvÐ÷” %‰¢UUƒ¢I’$Å`$I‚¦i©†*!Ë2,Ë ×YöÙ6X¾WO­VC½^ŸÙˆôšõ0˜ „ZÉlcqÈ…ÚÚܹsGGGA/V À}šÖˆëº/tQS+în‚›ŸÙ](ŒŒÑhž¨ëºÁêø4ÈËJ ñªä^–å`Å’a. iœ•p˜ü<•RMÓ Þ‚  ÕjÁ0 Ôjµ ½dC’ªE*Šœ›£ÈX¥”º.êãÍ0‹B2–wêªëú˜nDú² {g˜4h±t"c‹³¤ÒQdÙ6²p[›ÓÓÓÀ8íõz( 8>>ŒXfõÄ•‘eyç+î2›‰eY‚ èã6XçñjJ’Ó4aÛ6ÎÎÎV6NQÇ^ ³nÇA­V›«Ú¨®ëA«½½½™*7š¦ Y–ÇÞ”G ð€Nó~š¦ ×u'Úw•aÖå=Ç)èTÇ#ü Ђ />2y¤R©@Q” ý€@÷ :Íf3Qß'a[å{¡¶6Ï<ó lÛF¯×@–e6VWLt2Ž+À«ÓÌ&`YjµZPñ“òØÂ .N C¨gA–åDåeYEaÅ›¹4Ç z<΢|PH=­ÀkšÓ4§z’ ÃHí'ËrP&Éh¥– ëîÁÇìT«@’¤ÔÐtÃ0‚Eû(I¥·U™g6ŠhIÒéeY†aAT¥@‘NŒÒ-¶•©k¿ßÇ`0˜õåòòë‡øš¦ UUáy^ÖCb˜¹!ã´Ùl¢Z­BÓ´ @FXy  9Ì<ý½DQD³Ù\Ë„Í CÌeB‹“áB2iDsøEA©T š¿ÇAÞÕiÏW¸r©¢(Áâ …bš¦¹uÅ=˜l¡b\®ë¢V«9Ïa,Ë‚ã8¼PÂl$$¿À¨¿û´FMÓ‚t'ª€^øßæp``Æ*Á'''¸yó&FàUeÖ …´Ûí@‰Ïcv†™F­V š£SK%ÏóbÀpØû"ÅÂòZù‘aæ" féǨëúD‘$jV«Õ‚BLÑϨ‘›†$IA¡3Ó4ƒÞ~Àø ?ÃÌ‚ëº0MÕjW¯^«LíºnúØl6qvvD›¦i‹  ³iضDP€Ye™R”é–e‹ûÛA0ÕÃZ,ƒ“mÛ8==E¯×Ãþþ>Êå2PkB×õ Ø…,ËAŽÃl–e§”C”ynÂÿ2Ì®AÕ°§µ¨¡‚Fq^&MÓà8<σã8¨T*A/îY½«aDQä!fiÇ ¼õš¦¡ÙlEí(­£^¯yì):‡ ÒP¡0öê3›F8ÕoúM½^r_·Ý›+‡5lÛ6:NNNØx]1áj€¯xPÜ:Ãl qyªÓE10TmÛf™gvŽp<=qïÖ°â÷œ‚ìGÆ/}Zî*ìÓ4aÆ„²N)¥sÄ¥uaKí—øÝÀl"”µªÅxQƒùýüü<ëË[+3…Ç!Ë2nß¾§žz èõz¸yó&‡ σ @ŸüX×õ±P-Zyä šÙ$(x/Ž,ËA$çyìaevŽpdA¸iô;•Jõz}æg„òYK¥ÒÜÞU†!Ç™êùƒò»Ýî„ÌRž´iš‰¡‘ÃÆ*³)ТýÕ«WQ©T‚׫ŽT¡H…m.æò°v:”Ëå±ßmÛF¡PÀ7ptt”õõl1¾µZ ‚ L45Yg˜M  <+äa|ňs”˜]#l°*Š‚J¥2¶]×õ ÇoÞ¨¦f³‰Z­ÆEĘ…¡œé½½=hš6³òM½“”jI’¶ÞCÄl7”‚A=á©òïºå:ÎfØFfò°öz=<öØc8<< >³m‡‡‡èt:h·Û8<<ÄééiÖ׳YD V**§¤S &‚ ²ôQ˜B«Š‹T«E‘sµ™&l°’rOi"{{{€n·»p N³Ùdï*³´hÞl6Ñn·ƒ*¥³îÇ3̶âº.ªÕ*,ËB¥R Š‚M«üËÌÎLVMÓ°¿¿?B¹«ÇÇÇ( 899A£ÑÀÁÁAÖ×”k<Ï î£â)xýï¼ÿ\ÿçAYkö(Í ¿ÿ.ÏóÆÊ°'É©çyA¨â¢á)¢(²,Vª™$ÚÊI–eT«U‚°²B ³áʼ¢(¢Õj¡T*M-gYÖNx€˜ÝÅ4͹"˜ù™êaít:è÷û¸}ûvЋu0 ×ëáàà…Bppp€Á`€N§“õ5åRæI‘ÿ¥×~ ŸûÑÏ»¬5Â\þL:žç¡T*Áu]¨ªôÇ‹ûU«[&@’$6X™A×õÀ×wXUU¨ªÊíc˜L1M’$MÈ`½^k9ãº.t]ÞÀb̶B ú,ãëeª‡µ×ë¡\.†)0 ï‡$…·ï†aÀó¼T×?)ó¤|ðÃYûÀË_œõ%l..üûȬ ê×HrKé ,±ò½ ¢(rSfg°m–e¡ÛíÆzª¸ “5ŽãÀ4ÍØ4ê¡ëzÐr†r¯ëõ:Çáœif«!™ßö¢GY3Õ`-  cŸQÖ]6RÃPÓkªÂG¬2¯`ù^~µef-ÐÊa¸wªªªØÛÛ ¼A«4VQž{X™]AUÕ@ág/*“,ËÓo’ædMÓP*• IRÐUUUÔj58ŽÃÅ”˜­†ª_3ëeªÁº¿¿^¯©ƒÁ¶mOäªR(ðµkײ¾¦K…VÄ)ï.Na·m’$Å+óó×¥aˆ`£†»r¨iLÓ ÂÁVe¬¾Á*¬ÌÖã8Azm»®ËaeL.÷Ñž6 ‚€³³³±ÏDQD»Ýê0Ì6`ÛvUIQ¦Üvòr˜É`-—Ë8<<ÄÁÁ:ƒAð‡ xá…pçΔËe‹Å¬¯éR±,+‰!/S ©œ€çñå¡V¾ñʬŒ8ï*A^VÏóÖRRWä™MÆ0 ¨ª:U‰ ÷n6›¨T*ìaerišs÷ÑŽƒå™É3žç‹*i•×)uÃqœ ÿ5ýÎ!ï—ÃLmmêõ:dYrʘž|òÉ Šð"å›ÓŠ4õz=ôûý¬ïQ*äUE²,ò¬±í®ëŽ­ÄŒ!(e}Ž Ü«›,ßIÞUBQî•Êl´Œ¯Ã0`*• <ÏKý®mÛA Šâ„—ŠÉ–]–m."³첌뺎J¥Ã0`šæD‹&JwÚÛÛƒeYAÈ;yT©X*Gƒ]3µµ) ¸}ûv춃ƒ,äYí÷û8<<œé÷ûÐ4 ½^€¿ên©“È»JPRx’§•öX8‚`yrcU‡oL#Ã*ìH‹¥qÏ3^Ú'éV:…÷FuÆ:|ƒ3ް•—rÜ7ß{3>ùW>¹òÛ7 YÊ·mÛ¨ÕjP…«à1k#sø<èºI’Rç{Ã0àylÛ†$I +û’$Áqœ‰7I» `šÀŽ´€Ø4Ù6V7~±„Y›,ãi8Žè?Ìæ1Õ`-‹kK¢.—˱•Z÷÷÷ñôÓOÒÊåò¥ÞÛ¶áºîå„KjXO»—Ep0=§v‘E)º¾°=¯ßèt0n H6èÕÈ÷¢Øz"/ëÅ÷Þøñ7ⵇ^[Ï}¼ oòMùج¬0«"o2¾T5­™išAð²$)ukVø% °·¯Ôý¦Ëv¥R(ŠÃÆ*eÓe| ÃZSrdÁf2wÑ¥N§3VÙéÆkæB¡ÙC¢ëúå5 Þ–å2‘^µ˜Š'Íû®sáœÄV¢íÀx‹£à¸ù%j‡ R¸¿·TxIö¿÷™/üÌ¥ÜÒ$²ojÏÊ sd9‡ÏƒišP%5$˜U<;dìòs¸¹l‚lSQ¯R©„z½Î‹•Ì\l‚ŒÏ -JÎSÑÉ3¬¶mãîÝ»=šNNNP,aF&%¸WišK…{ÍŒ?¤ÕŸÁ6 bû‹Ž¡OÙxIžPé^Ò¤÷ ym©R°³¯ºáýh-æzÜÈg"FáÊâóWÞÞ‚ªE²Ã0>žçMDÔD=¬FÆ¡d)¯Ý^î8•Êrû3œþÁì´È¨ª* c2„/œÏÍl6S Öýý} ƒ‰êÀQ¨àR¡PÈúšâÒ+¨.c4Õ1ªŽG@óç±ê÷ˆ†àW5&óJ«SŽé„Ž5´Û7<ÃE¨ÂžÙè|mÁãEΣ—FùUUÎKßÛK²,c˜TH)¼ˆ#IÒ˜‡•Ã*/oÇ&æ5Á :³KP¸¯¢(°m{ÂËÊÏÃö0“Áº¿¿MÓÃ}ƒÊåòF¶¶ñ<ïò“²„ÿO,Ò[ȬbŽŽŽ²¾ž…°,ëò:lpÍóÎwà‡Íšˆ ¦j¸áª¸„‡äPb#±’pÜðØÃi]Ý‹ãŽ)FÏ0ÂÅýWý¥k®V}'.¹>ì¥ýת:©Ñqàm?û³¸þÑÎqã7Ïóàº.+Þ»‚¾cñî ç]&«³â?óÎ+W¯ú2êy€iôÞE?'–Y.0ÃìáHUU'¼¬ìaÝf2X÷÷÷q||Œk×®áÎ;xì±Ç I{ì1ܹs×®]ÃñññFzWŒó  ö—}¨^]Ã/¼ |õ[üßÏÎÆKŪ<¯<ü0^{è¡þ›¬⺾7iÙ‚4³â8£"8L,Tõ7©Y¸R0+9S¨‡&vÇñ=¤õ”Rì¢èoE@QÆCŠç ÞÛ›œÛì]ev‹¸cªª¢T*ñ<¾…Ìäa S,!Ë2ŽŽŽ Ër`¬ƒ ðÒ&‘‰B.,ÌŸJG!e{Üq¤·vBÿ†Ç§`Ò uBÿÒÂx\rºîDJÊÅ•RÎø†ìÉ;|CÀóü “®ëï÷‰"ðŸ_K¾¾:ËÂËï~÷œ7}3ኑK@Jø¢Ø¶¯ˆ_VT:¸.žøÐ‡.缂뺩ÞUÀWn¨ ={W§~oÎbp ¸m$гå_s¾k"–e±‚Îì Ió4U æy|»˜Û`M¢×ëáðð0ëë™›pÂöÊ)Íø½yúƒº¡ãté° 㦠ˆ‘Ó §QØ3‚5ªµsà¡oºP^Ž-ÄŒ=.5O€fs´jOãI³Ï4¶O~ÇwÌp‘› OÖKbšþÏ¢PdZ~^­æ‡G.¢€»î¸‚ïº#ƒUqïÚµù¹Å†‘j¬~á%Ïó¸àÒ¼¬" }VCT–'+À3°mŽãpß`fgHÒqEeY)º@–tqÍÓæ§n»@&ÅÄVEÖX<ÏÃ2ž2X%)þzj5ÿßnw2”—‰é‡ö÷¼ð©ñ}Ò¼Ò+ ‡õº°íx¨7¹bÞ{ p'/Öu/¢I—$_1?;çˆzt£è˜Ì}M ÇÜr6:xÖj·iW×]®·£$Mï1I¿$ÉëÕ´‘g8z aãÒ0Ò=IIÈò¤‘Kù|’42h™ =M½^ŸÙX|%g뽫¦¹š91š·*ÉÇ]vʲÆ÷W|Þ¯üÊúïUÎq]¦i¦*î ³-P2X·za‘™`j[›^¯‡F£1õ@ƒÁ ëk™›µ+&2â½IŸÔÖebÀ ŒúFa ¾Ñ.žDßùâŸ>ú+ÀWñ…ÁúöÑ~"&{¨Ò(g68‡å+ê²ê‡1‡ÇÖ_^ú9à[Ÿ›lsðuÿQ»Q÷yU "*UUÂy84!ÉðÏçÀûCÁ¤÷X–aþÀª`ùFÄŽ¼Ø]×Ýlƒ5Í[à8¾ÒMFiô¥Eʱ,/Zœæý$(l˜Æ³ãŒQ/ëªþ6²ìWQ¸ÿj˲®M IÒv+BÝ®°êޤ{æyÓ˜æÈ{K G¶=þ| ^+ñyŸþôåÞ»œQ«ÕP¯×Û41̶`š&dYfYßaVæa- (—ËY_Ï̬=¿O†oÌ© ÛÂïo¾!þÝK9.}nm$‡èÞÿyà¾øO¯ÿñ?N¸mMئ§p2M~û·'sféz?ú<ð5…Éãþ÷ß髯ŠgŒE4Æ)7Fxšw9ú'•$ÿ¦ì{‡B¦6ÚKD†hòÅyl±pG 9—µTò=¤•Šz>o˜dT‰F¿×ëã$¬ž·ZCA–w2‚`ËÓEqsŸÙ.Ðÿw™ïè3Œ*bG™7\]–ýøVËÿ¿ëŽBäC¼xxˆÏ¼éMÝÄì¡ðHî³Íì³Tyg¶›©Öýý}g=Ε³Öv6À(\6\E—ˆË %ݼˆ6|a)²_ÔþR#Û©š0<ó@•€ÒUà{À{FßÓ/¾ÖÈ)„ÆOaŽÍ&ðÁŸª¯žé'¿+hñÆ:õm/kY”t§“ùwVâë~cT@gGp]7¿+‘÷÷‡ÒV*¾¬E¿çº¾ò*ñmgÂÞM2è¢JiúÛfñ¸“½Ÿi9£s U@]õœCãšfäïžçÁó¼…½¤;Qi•BÉ—YÄ‹3X ÓôCßIîç97œÇÃkÅ"^ë÷3ºÙà8NÐvɲ,´ÛíåÊ09‡¼«[ùÂLeª‡µÓéLvƒÁNg­k4$iìg•}^×ÞÎæ¢cEl™´(E Áeñ;ªH ßñüö»}EàÏ¿(6¦¯®Ó±Âº[8è;Þüñu ÿjüu)Çç¾6›€®Ãyþá8Sê#I˜ ]ž‚mîÕL1_¡Àë”o×uó=±§åÐ…=7I}Éc“æa ZÅÃó|ezÖÕÚhû˜E¸}_ 9ÍÓ^÷žÄFço_в\á¥i…ìTÕ¯? i©Fç&“…|S!1QÑjµò=Ï3MVów–e±w•™îaãÉ'ŸÄÉÉ œ5¶ ¹ÿ>ÆB¡°Ä'Y«ŠìÁ¸÷Iš“ÅodÆÙÒQO(…Ç ÿ+HäÀÏ'ý–_ð þg”÷F÷–ÎƼCô…üéKÆ·»¡m„mB8¯ˆ@‘—N~^¯ ×}G²£è*€9‹¾y ÿ¥7,íLX5ë”ïÜ÷$i$cdlÆ-,ѳKß¡ëzeyÔÇ— ¢G³>ÿ’”レ¶\x皸Œ9<Û¶wC¹¡(yŒzV–Yäˆ@JƒÎµ…\¶|ÓB Xb.ƒ¬æï(öÅ<Å‹3ÌBëeðàÁ”ËåµäÅ®=8LœG0î}î—šäøu"ÛLW¶ýÜ8ƒU ýKà±ÐÃþ3Àÿ2R:¢Õ~é\öÅç4Þè‹2nœá¶;Á±.6 ãT#çÓ4Ôí ìgÂÑ%ßË NÒÏòd¬ë—ï\Oîikø%I“žÍh>\¸J.0Y7Î(uœôpÞ(Ëoº â•sÀ:e<Ü/ج ]÷çäY'7×õ÷¡0Ò¤g€r­“žÑU´Ú.[¾×ÚÕ`,²~’{tÝŸ[/k¬RñŸËÌçYÍßQ,Ëâ–M €·µ¡ãF£F£±Òäµç÷Åå­¦®æëÂ7öö¾m[óî/öß¿ñ™øv/€Ÿy3ðeqôÙõÏÚCÿÿI}VLVž À?þUà×^}FFyâæžfò?y_úbü ËT 2oúëºå;× û¬ÞÀp+"Zì…¢ˆ¸Õ‰èwÚíùA{G{ûÎË:e<‰µöÖÎTxl9Œ.öÄÝ'Çñ+)9‚°X[¦-ã²å;ï¡îÔ^z« ¢_—ÕKÈYÌßQ(_› ‹åÏ»´g$·+œœœðsfqzzº’㮽凃‘Q‡ï© Ík OòjÌgÁÀ#¿«¾¹à+Ï~:Þð¬Ãï{ú¹×GŸýŸoœ …ÄFrˆ²‚™ Ö@¿¡ŠÇ¿Êm•$_ IÛj†(ÊÒýì£4›£º;y³1Ö)ß¹UÚÃç,^ËhjTŽ¢Fm\››z}ù Iò+ W*þ¥"ï¬KÆ“0Ms7”ªP=Oxè´P|2V[­ôÉ’CR.K¾©.An éa¾ •á2«°ç°pÞeÏßQ Ã`ïjÞq V[þ83Ëà~¿[·náñÇbæ‹Å"b÷yñÅñì³Ï¢×ëM­jìüÿÙûû8Éñ«¾ÿ,k{íµ·×Z?»fÍZf H £fš¢ZîµãÅ·‰ÊäzÌNˆS•íÐ ¾$«Â“ æa’Ò/ÈtèP"ŽgÍB‰mì%6±Kñâ¹Óã’íÄfkbÒ¼S¶ã‡Sߪ£’TR•êYUuÞ¯WÏt—ž¾RïWç|ÏùžcY³_ã|¯¤q(Q²¿`ߌ†ê&eª üÂ_Ä{u/ÿÛÉF)Õ5*áÏø8ðÎcÀþ]Ï ¤ð‹0>qŠr+D Àñ)àÍ‘}¨ž^Ü£ D4#ÖÒ`Ûé’¯ZV/yìÁÁ~øa<ýéOO¡)2Ž|Àã?ŽȲ“”ôž³,|àþC\}îsÑjµÏçç~Ë‹¿ã0M“3agYö Öúé࣎ߩ=¬;;;þ…?£ŸiËå°»»Zཽ½=0;ñË^ö2ÜsÏ=©:ÊTB‚mxkH“¶‘m&¢WÓ”¶ :'m—"ûF^Ã~ò¿zYym¸ósñ™LMOÞþÌ ´½Þý`s³§´ À{/7ôp˜ÛS–Qý›¿¼ï}Àïÿ ÄÜd­–¨üHRÏÕ ªë^s“šHNŠe ¥Os|wbÿž¯ùÜ}÷Ý 1VÅŒßQt]‡$I™Ž*X *•áQŠ2R$é࣎ßC Ö'NàìÙ³ÈçóþÏÖÖVßgô3 ûBnܸ`ò,e“Ôç aHJZBrho\‚#(Â3"ÍȾ„9N×½p`»»­pÒ,M3,8BÌõœÈu$ h6=Åß0<£ÀЀ³'¼ß‡„dèOžó{ÿâÇß|÷7¥˜ÅT³/ã|5ªŸÄ5X&gÞKQ1KùμÁJ‹ŠÞ gÛ½Y‡hÛxæ™ì2Kcí¼«£“£Ãýn ó–o˲²»Ì½¡Û¶Ñ3î2˜¹|䛼K£Œ‹âdåϨvõ¤[Ÿzja·>oùŽâ8Næ& 5-aS¥²:“3QlÛ»ñaUÒNêLÈÐ`ši™'ØÙÙ Á/^œŠQ<µßDØs:ˆà¸5<ƒç«Îó­¨ß—ÛmÝ;X{!*ßó-À?‡ç•Ÿ üíjo`%ÅMB²"Þ:ÓJw»o,•ÂÆiÚ䓟½ò¥=ÝÄ’£NˆãÑ»!­ÞY­z}ªXìß&Šá(ά¬³”Ϟ”¨çF–= ÅÆQH.™¥b–2GÖ”›™"Iლæõ`¿ ¨Žã…lM2.8Ž…st´è»Ïó”oÓ4³=¦Ã{·*J×h½òoÒt²Ijc±Ø‹:Hc°Ré¾q&”è!Ò$‘,ãÔåË£ŸcJÌ{üâ8 …TUÍÔä;•oï[-Û¸h(Üa‹Ê)ÆØ»5®|à ÈäÖ|>³gÏâÌ™3ØÚÚµk×xJɤŒUò#.“n ýÉ”ˆzkRÉ8$’ÊÖ4¼kè]'©øPz=ï“,÷н›fO€(«0ˆJØ­€øõ«d¼MÁ ¥Qšæuêqì 2rƒÉXÓB_g4gA´ÚÍ/z œ¥|[–•í$’]$­ÕÝ"fÌRÆ£¬•w5AgvOÙn6{ƒ ªNffÜ+—ý­Ýs¢W™ÃŸ´± ’×Ò‚ê¬N¨A8 Á±>xïi°¬Þ{&nÍ7yòˆ¬å™¶|½>»Pè%ÜŠB~]ŸZLuµ:ÙRŸU²¼Ç»ªÌb bÛ6ÇɆÊ4ãÀry47M¯?‹é×Î)Š×‹Åé†ÑŸ<éµ¥TâRN1ÌZ¾);ð"ÏP¤c_ÄSĘ *ð´ZiVƔ޶íÝ`Ðm½ÇacŠãxƒw°v„ùkŸ­õá9yrv2%³–ï(†adÖX­TzÁ,>Ô"Š%ÉèÜ—¶’…LñùÑ¥Aà ^òq×À/ÿòxüñŽt{© VÓ4qæÌ´Z­Ð‚ëÝÝ]œ;w[[[ØØØÀ£>:Æž/ö¨J÷ï? üùG€ø›át´ï]×'D¨Ý‚P订:­Ë”kèúŒÓ¿eôy‚mÛËF?Ê 'Z,œô¦´D'2%)½®Ú‰¾_¢I1G1†—‘±.ÑÚ¶F£op*ÿÀ`¿æ‡{k›§TŽš8¯òvY&ZF– š‘Ÿ+¶Ýÿâ¯T’ë 7ñ/‰¸óÞJ ¬¦õ&”è'N`TÕ Íj4¦fOk3ªD®:¦i.¬®09jú^)‘°¿`pŽ,!*ä¥LÁÔr»èzÏ+ i$†¬d¬Fó~t‚ö/þzâ¨Ìwû·OáF—‡ENÎ ƒ†ê>‡GBmk²çþn§ptú ¶sˆ@R ]膃÷F ˜‚× ŽMqJxÒ…|øÃ·ãyÏ»>Òí¥2XÏŸ?Y–ñÈ# —ËùŸçóylooc§OŸÆC=4Å'?}FVèÐ àµO?ñFO$Ï3©À3øº³.¾,Kƕ޾ÁmäQ—exO¡·A‘l˜ÿÛ;ŸãxÛL3f\o ~Mì0Tô­m%=iC"j¦ÍO|ÄiÆ']ëƒQÏ,ÐÿÞˆûª+•Õõî,ß´è8&Üɶýàù0Þð¼+Ë“k å2ô·¶üIŽUH¬wÇ“݇ Ì·6ý*aÛ6LÓœïŒ<)ª•J/ò Xô>o6ãC8¡g´V*Þ$Q¡àýÄ)굚×çÅ;ŽkY/«Ý$(ÊRÕ^%ÇYèúUÊGŠü#c5°NN–Ã"âwhÈ`†‘:a‹(öæc&òägÆ£}j؉)ê(ÎXEü´ßã1}D²ì] ®Ñ½ïÂöÅÛ”¨PhÕ´…$ÆSÀë_ª:0ÄŠŒøç‰.¡,Øôîg½ª®{ÉûºÜzëh™°‡¬‡‡‡èt:xàî÷ÀàâÅ‹h·Ûc<åù`Ûvú°eRLøéï{1xwk¬†fR‚µTC‡gè.Oz€îúTôfØ6Žþ³7ïêU«I»æ}˜mý³?†1\7‰F·•Jác‚•JÒ¶3*óÑIrN¨jo26)¯A¥’¸Þo¯míö-X5RÉw¥âYY…‚÷…'xcÈ©b¶ºTÓ°ªlf;ïi“z†±øª ”`¬Xÿ^*•Õ1ÞçÍB2 ‚·¨<™4í>Ì«IF+%ϫ׽ó¤1FÈ"X„ÁJ†33w ÃX˜w5¸&5†t3Å Z¶ ý­-ïø´ñ»)_†Ñ_¶{l/fÒs5ÍäŒÚår/âaP¹?I‚úÚ÷^™¶Ý ­'åDÓ¼-ÅYôËlÁضéÒM–ÕË_`¹ÆÀwHûÇêÁšæÉ%Q&qa¹þžb2À²MŠ3FÞO7£ÍÙöhs3•ªiÞÎÍ&L¸ûî'F¾½¡k«ÕB.— yVøaÀm¿¶à…âƒHÕ!hæ£Xô¾à§r°À~½5Ÿfïwý›½ÿe¹;xÆ•­)Á3Tkð Öà !´ÍÜÑ€Ùê]=EUÓƒš–Ü,Ë»½ ñKçT”ÁúÉIž,§Õ©ªÕþ~$I½çCQ:§®÷î-É™Aï…¸íTê&—[\³Y‘j}vµ {”²ˆ¢ÀÅð@UNôµç×þÚ×ÿ»ÞÚ Ûö¿·a¡]IÑ•Þ}öÖ“,Aðg½>žÑŒº™…Áú[¿U™ü$…¼O3‘·¬Á‚Hîž´F3ÍÔÌÛèd–]׿qrœžÒbðPÑuØ­§¼MÁù háë} c²h®TPFíAI–šÍJ’aŽZíµO{™VOžì)^ä#ëfY¤¬§!¨O&6³RLÓ×O€^’¦Ð>–åé_¥Ò|²O’wuÕjŠË#–œi!$É“yUõŒóÍÍÞ} Âì Ö ¿`pýýý™×cš6Á ª Ù‡$õfJ%Øÿ ë*ßGoÝj×0µm@{Äû(¤tF¿S½p_:…3‰G3{Ô#,@ú%@ú>ïOÊ¥1ÌËJ“7I†w{AãWze›l;Þ(¦)â%Žœt"FYïMï Š˜ ØU¾E_UUíöëöy}6zåCaíè% M>”JþóЙmÛFùGžñïmy'Ò4Øw¿6ô%%è  ‡$(“z\”À LÓÓÒ(9iÔ©,f¹<Ú³'ƒ7ñNã}Z, ØØøÂh Z"fî}"{™`YAXXJš°Œþî¬÷Qè]à8¿÷¯=½vÈÀfÛ@åÿø˜§‹½êõfš(æx™ºÁ]û¤Vóì!Š+!g öÇÇÞË‚JpÈ=€l‡áHÀÄH÷î EpMw½±ñL3~Íô¬ Ä}Ág39DŽ3y^« È]#ËžO3ûÝû¶,àÛ¾m´õ«@ ƒ5ŸÏ£Óéàððpà~´=èuÍ–eùVËê…b;ýÓÚ±qs÷x=ƒõ×þèN’×Ê쀇Õ@ßÚPûC€u¡z¬¦ (7ôâÁ)®;:xYe"¦¸øAÞS*â4îRtÕN¼{¡ŽE+A(BèEÐçqÐ2 79P.÷ä ÝdT1‘vôÞHz—Sÿ:隉ÑI³]hRð唯èÒ*Ë5L2,Â÷†ò3»»Z‡ùý{} ¶¢rL¹’ÞáÔFYöړư¤({MóÆÌANh]÷ŒZËêÝoœRDÛ‚÷2(´—È·kªBc==;Më…ÕË^{£ýÈ0€'®8KÊÔ“ÑÐL Å–•¼&•af̬&d‚Þ‚ÞÍÁ±4:Žù Y‚ëÖºxhì²mÈžòvIˆ‰$@ùü‚§o|ö¾Ög4¿ áë]éê\’PE©i&ë^e²œl‰æLñ#Ý£²Ýµö¢‰H}t½7Ù3‹Æéy¥R¿MkÆ)‰R¡£ C{ÍïBªF6XS-ñŠ(]¶=^”c*ƒ5ŸÏcooN'vŸN§ƒ½½=lmmeÖëZ.—Që~qT~«T6·žë{n?È„[º™ÏôB‚¿ú-€ey_”  ªRû'ðŒÕ®KzžD’¬’ŒZ þM“ GGýKwîÚí6Μ9ƒ‹/âððÐÿÙÛÛÃ÷ÿ÷£Ýncwww¼V̘J¥Y–ýõ}ä+Iê/´aü÷[â%U€Òww…óÙ€sžÑúÒÏÀ¼ôYoP–à²òŸÁ÷†RXãÑÐ0:ño›?ø¦™m©÷Dò7¡4XI#â¼™GGñ‚UƃQActÎØh„ßÔWƒJ8×¢Ÿ‰ëW#SÓš–<Æ—J½IÀ ®DáÂÑñÖqç3_†ôЀ¢øãçæf¸l#-ÕHR:ª›K×–¤^Ø/yìƒaVÑšæ)Á~,·¤»Ü$tlp­4!нPù ¢˜._[–éî£Ú¸woìÚôBôh÷5JHvË-O¦kà’aFúäqk¢PB$6P™ @Ñ2yœ¢5»sQ4-5£DÖ–Õïͤ\cò>ß7r QøßÛÓn(¿¬wÌ¢8Ûî¶!9±^*_ø'pýúv6M˜¿ó%È0ûfæ†æ­¡0”ÍÍ„=i¾Þ0B¥Ž].HpX×{ùÖ<Ç’®ëey¼ÐwRVæ€ ÓAûúQ\I›föŸÝ Y+ørê‹ë °@ÀŒ¸p$a)ýkí ê/¼(ý y?Ý>a¶r<ˆâHkÃNã¹èònJ®]»æþÜÏýœ{êÔ©¾ŸŸû¹Ÿs¯]»–öT3áòåËî… ú>o4®$Iñ©ªëžù¯®«º®[wÝzÝuŽz›Ê½¿UÕuë/r]·áºîÿÄUĦÛlº®{亮Ü;¦VóŽE×=>\뛿ìºF·M®w씩×]·TêýÝlº®¢„·ÇqtikàñȲëJRø¼®ëºÕªwîàóJÃñ±ë ÂèÇM›$yÉ2÷ßâ6Y–Ý£ÀCm4\ïadÿø¸÷Q½î}‡ÕªÚ.Šýç>:òäZU]ïŸZ­·Q’»ÉßmœŒ‘\5›Þµ›´dQQz¿ÑæÍfïÞK%ï'®}Ñcˆ¤¾}¦4Di4ºÏ3@t?Yö‡Œ‘(•¼ö ’—,’¶O–J%·žôDQÕþ½èÁ‡™ Ë6†§íÕjÕ­Å Z£ ªn³Ú:¦MÄñ±Û8õ`ïAíèȇå»þØ­—1YõÅêÿù{néôG{Û›Mוe·ñÆ_uKßrØëË²ì –]¤Ztéãco¿®ârüÎw»†×¼h×o6½Ïé‚ï:úÅðØ?Hw‹ë£ÃO­z-'²ªãwQCzˈÇ|O£#×mœz°÷Å+ŠÛüµÇ¯/Š®÷Åþ/ͦ'ÕjO k5OÞ‰ãco?RZ\O~i—z}¸åø8|‰££ðßÃŽUõnayIm°ׯ_w/_¾ìÿ\¿~}ôÏ€¤›WÅmF˜f3ð%Éû£îº®ê}ÙÁ/*¨ìÖ뮫Þåíwü£®+Þ0ÐrK2ì÷ÁãcO°$ÉuUÕ¿ä´ ëÐEÓ¤>=ë³1¼çÁ²);®;ø&F¾hQt]õû>êÿèÛCŸ¹nψê›Tq=™øþh4¼Iie@ŽAQ&—¡R)üŽšT׋ƒŒùf³"()jô¨$©ßè¤÷M´Áç| ¥¥^Ûg«ªðHƒ´½¨ðF¿8Yž€¬1ãL\ÆA“ii¿žeÃÓöGI’ÆWâ»ÝùjW]Wú–¿r›¿öØHJèHû¢?«Ù£ñÿ~ÈUîþþë"Dôƒãc·ùžxÇG¬ÎP掽1³Vsý÷’üê¯ø¯ª ­+G ÁwÙܤûŹÁci8¡c¢ûIDçÑŽŽz“ 4y{ò¤5ƒ/lvŒÚUUuÕqõ‡¨w†çÅ9¬oü·úu¿º†¼q˜¨ø·t|œ<³M¶P‡‰ ͦçñ \;8±7?›æQN¨Æ¹®Ö}ACÆ\ Ö¬’tóQEÞWëu×½ïÇ<é±ë{I}¥±ûò¤ÁêèÈuk?æºnÍukç]W}ñÅž É®ç¥}×§Ýú÷_꟞ ¼«ÕééK¤¹n¼ÇÆu½>-Ë£÷›4Ly"k®,›²ãºÉ ÏÑÑ‘+éãcï»Qï~·[Ýõ€ GÑu“å%ã¸ý˜¥`ÚŽ4R$¢rõzxüR«y“=qÛè=,Š£?樇| Öf³é*IoEš!ˆûœ^þS”Ý,rt4åc’tÇqÎS­¦ó>¹îòáiúãÑÑÑà ™úÆòîŒÚÑ‘ë6?xÝŸþ©ô4›FúK½Þ³Ú† ¨ªÚß¿ŽŽúg$É9@hHue §æÎ“f3lÜŽò>£ÇBΆhÛWqü&†+Š¢{<®@aDQ"aIU6 Gßõf÷è]¾|î?¹7þjªãIFâÇ# ‰ŒÚçI62é„qÎqûM\÷ÅA6Îøj ë²â8N_l¼_–Å4¿úa/«¯ +…Ä;ÿÀg{!碔~@ 0?(ß× ¤R xÕ³ üÈ‹z ©þV`þ zÓ£B‰…§W[>Š,÷²O›y”b†M@C¥äÔ/ÿ4ô÷zõ‘%)ü}%ÉK*Ò.ÞLÁ¬– šf ÙÁ” ò5±%©"(Jr6eÊ`·þ˜iÙ*C%cÑ´äA°Vó¾)Êî,±ítËoûŸÏ€’m3 PðúE\6÷QQ”^µ4Ù¿W‘q’-ÑyÂzè€$yºÎ÷l ùÚŸ„üÍŸ ëq ?+ï‡ôJãnšýi„ã ]A¢uð€^öÇ}1š= Õêü+HR¯–ü¨å•é±Ðú¿u¨j`Û6ŠÅ"4MC½^v07ÉSwáeÅ”ý|¦9 $œiöÉ´e¡—`¢›5Uü¦gA|ów…öþÎwÁ|îßMÕTJP—;IJÂzJ¹ìý=´j÷¹ézX¯£¼Ád_”_iPU…¸®HÕzß÷N*¼\!q$é•Êtô±§M~ŠìWê£ZEO¸åZ¯/o7ÑKååÿŠžÀ•n†ßê‹{YdT5Âs—ÏW!¦(¬[y—uÄ4ÍÐÄŒe’m@¸çÛ £—áyrÊq9‹óOvŽ`Íã(\’o0†a ÙlöoÐõÁ™°Æ0Tu=>cú¸P’RÛþ=›O„æD3rÏŠ`•Ê¡3êó*—½ãÈ *•<åfK‚X–…êƒ)”r Ù£õP~·¿ð¦×¢jþ`*=Ý'.ÅzÜuGUÒ|ùq™KcàŠR«G¡P€ªªc×\µ,ÀøÙ«D¾d ‚?¸Ú?ôvàz¢¬XˆÏï øÝÿ/Ô»~òø™?ì•E߸SOWn¾Õ=ỼÙMóTü ê¿x®Ÿ|,8±œï R«yû’ÁhY½òŒÑýlÛ5­gŒKàIÊãD6w¹ÜKÝÆë¤á†&ç%Ék·¦yíÖu)Ñ%•à,—½c¦¡­´Á§È‹" X–7¥U„gh¡l¼ŠhOÒë¢çŒŸ”C¯$ÐWw•aæ‰mÛ¡Yùb8’½X{Õ ÖÉ#ž4À3Ë‹¦iñõúhêxŠ3•J/2EÓÂ5©îú(Þz‰7›^ fsBÊÍVW*éJ89Ž÷Ž›•—½\öÚA K4ë7eµuÒÅqÂÆ)Í;ЄA¹ì]«ÞŸÇI.O øÜu=üìJo¼áý@ü`ïCzˆô §ág˜ÑuŠ¢Œm¬Þ8,7aÞ"£r’¼Úäü.¤¿øÄ7BÃOµ @ã£×¡ÿëD¹ýc@7*±Yóþ§Ò@7[ue›ÆOAü#º·OTwoœ#ï%ENªœÑhx×–Q—ªgËS •¢¯' ¢8¸›“·7X./Š$õî…²‰§™¢’„'Oz§1rÓ²Òk0$˜Ò¬aøUšD R¼‡ÏU¡™lŒ$°¬î Ó­DL7 amÛÐu=Ù»êÇKxÁ—©ãôŒS:yü‚¥ò§WÃ8 ¤`žRD¡´qM&CP”î$ÔCÔ/2L³Wq!h°•³jÕSR‚Ï. qó ¤ Rä¨$…=«ˆ¦i‰áÀŽãÉL£áÙžT&Ø4Ê©x×_ÿî_öŸ`Ò°†™]×QcRÑ4½¾O¥ÛJ¿˜$ÑßfÛ€ùçgïz#’ì"ñ[oGõâí}Ÿ“ñ¯(´œüQƒÆ²hã Û ªi ¿¸èHŠD™tònÔIÅQö§wm{¿O3²o¥ V˲ (5lnz/½®c¢ß3Z†çû7Ñó¼ʼnëN3ÌÔ ÎÈG•H`µ•0œ™BÔÈyY)̓mǯ튮¿LóœUÕSnk5à•¯|W®Œÿ½eÃ0à8ŽL ÿó[ ÿù?Aí׿ 2k°¶Ûm¨ªŠV«eÚ(àìpå7íùÞßÔITpè/³P&•o˲pÓ—¾?|øº‘3¤2Ì<˜TÆCÞUÓô\†’ä[Q–å©d°-«!4Ža-Iûý,Ãq3/ëwǤòýÐ/ý~ø¯þ Oÿí߆íкõfÙ»ÊdIåÛq¼¼VÃóî»(•`YÞ0ÎU ˜qÉl–àóçÏ#—ËÁ²,<úè£hµZØÛÛKu¬eYøGîßþã6ðîgxj^˜¼NªÄ,œIäôõ{ø÷È‹9ÌŠ+‰aL*ã~fÉB¡W+%P…¼\æxfqL*ß/øgÿ ÿýÌ/¢PüÈ6V™¬0©|_zë[¡xæOý”¿`•&¬˜ù“IƒµÝnãððgÏžlll`{{ÛKL‚gýÏ¿Äßÿÿ7ðï:À÷lxžÕ&ØPe2Á¤ò _øÀí¸ç;žàØ&“L*ãŽãÀ²,HºÞK™*IpœÞšÊf“ f1L*ßï}ë[ñä³ÿJÿù‡ýõ¼£dLf˜Y2 E6 <ã ~ò6VW˜IȤÁzíÚ5@>Ÿ÷?Ëçóh·Û©ŽÿÆo!nþõOgòá K6{ypp°è&L×Iå>öŒ7áÿ6Š‹¾•‰XEyXµ>;.“ʸaønÍZ –åEonö×Í2«&«ØgÇaRùþô»þ?þ´ƒ¥VâWQV­¿ŽË¤òý{?}ïxñOâ•?þØöòN.®š<,{ŸÍ¤Á:¨St:á'Å•ˆ3—I3KÁµk×pe çMÈÄò àèÓÏZúHàU”‡Uë³ã2©Œ+/ü|ìEÿ/ʨasÓ‹¦ú¦Ë4´¯š<¬bŸ‡Iåûè~÷¹ÏáóŸÿö1ïg™ÈŠ“È÷4¶Ï‚A2¾Œ÷ÃLÆ _Fyà1œ!ÖmüÖ¦,Þ3<~gÿžFC‚gD§ÓÁÞÞvwwaY}ôQÿ3âüùóÈårþöV«åo´-ÍöY°··‡v»G}–e!—ËáâÅ‹©Û”Å{bÆcRùžÆöY0HÆ—ñ~˜ñ&ãË(<†3ÀzŽßÃÚ”ÅûaƇÇï帧‘X´Å¼ª\¾|Ù=uêTè³ .¸÷ß¿ëºÞLÆ©S§B³#ï|ç;Ý׿þõ· ;vV\¿~½ïš×®]s/\¸ªMY¼'f|&‘ïilŸƒd|ŒA2¾ŒòÀc8C¬Ûø=¬MY¼f2xüÎþ=ÊÓm0¯*[[[°,+ôÙµk×pÛm·ù¿@>Ÿ÷·çóy´ÛíÛ†;+(µy>ŸG«ÕB§ÓA>ŸÇîînª6eñž˜ñ™D¾§±} ’ñÃÃÃ¥»f2ÉøªÉ÷4ÚÌ2¾<¬Ûø=¬MY¼f2xüÎþ= ¬s`ggdžK—.À@!8::JÜÖétKž§Í7úî寨ßß*ÔiÚ¼ˆ{b¦Ã¨òUy$ãËx?ÌôˆÊ¸iš‰ûfUx gâX‡ñ{˜|gQçb¦ßÙ¼§Qá5¬sà¾ûîÃÙ³g=ôO’øÂ¾¸íÆ%¡ž6ÁÙGy<ò¶··±³³3ô~Ò´y÷ÄL‡Qå;«ò0HÆ—ñ~˜é•ñe”Ù8ÖaüvOYÔ¹˜éÁãw6ïiTØ`[[[ØÞÞÆ<à/ºÝãöO"—Ë <6—ËÍäNŸ> ~øà N‡‡‡CÛ4év&»Œ*ßY•‡A2~ÓM7-Ýý0Ó#*ã«&ß<†¯/ë0~“ï,ê\Ìôàñ;›÷4*l°Îˆ½½=惠õ!pâÄ áЛv»\.7pÛ°cgÅ p€‰Û¼ˆ{bÆgùžÆöY0HÆïºë®¥»f2ÉøªÉ7áëźßÃä;‹÷ÃLßÙ¿§QaƒuFœ>}‡‡‡¾LÓô¿ü\.‡­­-„¶ …Û†;+òù|_ í½½=ffÒ6/➘ñ™D¾§±} ’ñW¿úÕKw?Ìd ’ñU“oÃ׋u¿‡Éw ¿³O£r“ëºî¢±ªœ?ØÚÚµk×B ¤/&}gg¹\Î_Ô¼¿¿Û†;+èš4K5ÊýLc;“-&‘ïilŸƒd|ŒA2¾ŒòÀc8C¬Ûø=¬MY¼f2xüÎþ=¬3&˜2=nD§Óñg€¢ÛmK³}LÚ¦,Þ3>“È÷4¶Ï‚YöI–ïåcŒ/£<ðÎë6~ÛžÅûa&ƒÇïìßSZØ`e†a†a†a2 ¯ae†a†a†a2 ¬ Ã0 Ã0 Ã0L&aƒ•a†a†a†É$l°2 Ã0 Ã0 Ã0™„ V†a†a†a&“°ÁÊ0 Ã0 Ã0 Ãd6X†a†a†a˜LÂ+Ã0 Ã0 Ã0 “IØ`e†a†a†a2 ¬ Ã0 Ã0 Ã0L&aƒ•a†a†a†É$l°2 Ã0 Ã0 Ã0™„ V†a†a†a&“°ÁÊ0 Ã0 Ã0 Ãd6X†a†a†a˜LÂ+Ã0 Ã4M8޳èf0ÌÜ™¦ì;ŽÓ4}K Ã0 3Ø`efa X–µèf0ÌÜ™¦ì[–…B¡°è[bÖ]×aÛö¢›Á0så}þ°ÁÊ0 Ã0 ÃŒa¬À3kËûüyÚ¢ÀxgkJ¥DQìÛ&Š"dY†iš(•J¼°2 S’$ùÇU*¨ª ]×á8$I‚¢(þùA€ªªcïÏ0i$ß ilj•±iÉ8Ë7³f)ûÕjš¦á¦›n ç84Më;†a&%Nžé3Ã0²,‡äS’$ÿ3¯™eb\y—$É—]AP*• –íqak(—ËÐ4 `Û6677ýR©Tüm†a X,úDÓ4‹Ež‚²¹¹ ]×ýóÒvÇqà8ŠÅb( MÓ4T*•±÷g˜4 ’oÚNkù’d˜\ÆY¾™y3kÙ/—Ë0 ŸùÌgüÏÇA¡Pð!†™Ãä9HP>i¯™eb\yÿÄ'>“'Oú“†a„–l°l‰Ë,”f³épŽŽüÏdYvUUuŽŽú¶‰¢èʲìok6›þ¶Z­æ ‚àÿ À­Õjþß’$¹¥RÉÿ[UUW–å±÷g˜a ’o×õdŽ~w]O†IƦ-ã,ßÌ<™‡ìÓñFÃà÷É5ÃLƒaò,˲Ûh4ümQù¦Ïx¼f–IäÆcâøøØ×Ûi_–íÑáàcdY…‰5 Þ,Kt›¢(°, ¦iBÅPvHúÛ²,f=x¬ ~HB£îÏ0ƒ$ß„,ËþïÁýf!ã,ß̼˜‡ìz‰œªÕê¢oŸY1ÒÈs”¨|<^3ËÁ$òNÇ”Ëe(ŠY–ûŽeÙ6X3Ì ’¶mû딂Ľ fagÖ•qeŸÖ –Ëe-ú6†aÖQÑh4üe|Žã T*¡V«-ºiK ¬ j˜š¦é/¼N2Z%I‚ ¡Y›èì;Ãd$ù¦ÄaI°Œ3Ëμe¿V«Áq†á'a˜i1®<3Ì22®¼“þ^«ÕP«Õü²c’$q_™Nº´`J¥’â x„qÃi%Ýe9ô7Ð[ôÍ0Ya|ƒeœYf%û‚ øÙ*¹ì3-ÒÈsÙf˜e`yzVÏãÊ!¿“ÃÖ#Š"jµ …dYögf(vt)2¤”T*_©±m{hŒ=ÃÌ“Aò= –qf™Y¤ì+ŠEQP.—¹¿0Sa˜< ‚€J¥â‡?2Ì23‰¼—J%†“'OB’$?2FQ”EßÖRs“ëºî¢ÁxÊ͆Ó:% EѯÃJušêõzh FÆ0Y#N¾ÓÂ2Î,3,ûÌ*‘$ÏA}%Zk˜a–•Iä¼³‚ ð2¦)Àk†±m'OžD³Ù„$I°m…Bªªò &Ã0 Ã0 Ã0+O&Ö°¶Z­ÄÏÛíö¢›·0‚! 7Ýt677¡( «KË7³ê°Œ3« Ë6³ê°Œ3ËÀÂ=¬{{{hµZØßß÷?k·ÛPUÕïD²,÷¥÷g˜e€å›YuXÆ™U…e›YuXÆ™ea¡ÖÃÃC\¼x±ïóóçÏ#—ËÁ²,<úè£hµZØÛÛ[dSfdX¾™U‡eœYUX¶™U‡eœY&–%¸Óé R©@–eܸqÃÿ¼Ýnãðð—.]lll`{{ØÝÝM<Ÿax×»Þ…;ï¼sQ·4u®^½Š»ï¾{Ñ͘ׯ_G§ÓÉÄwtýúu<ýéO¬§Á´å^ÿú×gâÙM‹,ÉôÈRŸ½zõ*>øÁÎìü<†'Kò0 ²Ôg¯_¿AðoþÍ¿™ú¹yüN–daZd©¿^½z¿üË¿Œ|>?“óóø=œ,ÉÃ4ÈRŸgü^˜ÁzþüylooÇÏ_»v B4ŸÏ£ÙË^†{î¹gèKc™ØÙÙ …i,;‡‡‡¸råJ&¾#jˬ˜¶|ÀwÞÉòq²Ôgwvvfz~Ç“%y˜Yê³³ÃyüN–daZd©¿îììÌÌXxüNC–äad©ÏŽ3~/$$øààív;ö¡ êN'qÛ¾ðX–µRa «ÔQ`kk+3åÝï~7>ùÉOÎäü³oøÜç>‡½½=Î÷͈¬ÈÃ4ÉJŸÝÛÛÃã?>³ó󞎬ÈôÈJŸ=88À»ßýn|úÓŸžÉ¹yüNVdašd¥¿Òø=«„G<~§#+ò0-²ÒgIuüž»ÁÚn·qñâEœ;w.vû  [ˆrë­·â%/y NŸ>=ï[b–Œ'Nàî»ïÆóž÷¼©Ÿ{Vò Ï~ö³qúôiœ8qb¾ŒY:NŸ>™œ›ÇpfÑäóyÜ}÷ݸí¶Û¦z^¿™,@ã÷´åàñ›Y<¤ƒ*ßs >88ÀÆÆLÓ„išhµZ¸víööö°½½=0"—Ë%n»õÖ[ñÒ—¾[[[ó¾%fÉÈår¸ûΈìäð–o& [[[¸ýöÛgrnÙE“ÏçÑét¦>†óøÍd¿g1éÈã7³hÆÕÁçn°›}¡ÙÇv»íwŽàï “eX¾™U‡eœYUX¶™U‡eœYVæn°nmm…f`(Þ=W½µµÊHfš& …¢Ÿà …å›YuXÆ™U…e›YuXÆ™eeaY‚±»»‹¢Óé`cc÷Ýwߢ›Å0Så›YuXÆ™U…e›YuXÆ™,²pƒ5.cU>ŸÇ{Þó?Õ6ÇÄ3Ë Ë7³ê°Œ3« Ë6³ê°Œ3Ë Ö$666¸“0+ Ë7³ê°Œ3« Ë6³ê°Œ3Yc!uX†a†a†afl°2 Ã0 Ã0 Ã0™„ V†a†a†a&“°ÁÊ0 Ã0 Ã0 Ãd6X™LclÛ^t3†a†a†Yl°2™E×uT*”Ëå¾m–e-ºy 31Žã@×õ±Ž3MÓÿ‰›Ôùüç?¿èÛc†a†™˜Ì–µaÖ˲ išÍ&Êå24Mƒªª€r¹ Ã0P¯×!ËrèÃ0úÎcY$IH’˲ÐjµpÏ=÷,ú6™5Æq ‚Ã0P«Õ Š"lÛ†®ëPÅ—[Ú¿\.ò,ˆ¢ÚfYlÛ†$Ip¶mãöÛo_ô-2 Ã0 ÃL ¬Ì\1 †a@UÕÂMJzp¿z½AP«Õ°¹¹ EQ`ÇA£Ñ@¹\F³ÙÐSæUU… þy‚[–Çq ª*>ùÉOâÊ•+‹~Ìá8Ž/‹d¬–J%”J%†á¯ ( ŠÅ"šÍ¦ÿY±X„¢(¨×ë‰×°, ‚ @Eììì,ú–†a†a&† Vfj8Žã{4éDQ„ ¾ˆ”ñz½î{<‹Å"TU…ã8pÇß‚  Z­ú }£Ñ€ $É÷¼–Ëe(ŠEQÛ4fÜ ‚’E’q¾UUU”J%ž*Ë2Ç(Šþ~…BÍf•J¢(úû'Á2Î0 ³XÇ€Ðäù¨<õÔS‹¾ †Él°f 'fÐuš¦ùaŠ‚ ôyPMÓô·ž2N#ò¦R¶EmÛ(•Jþ9ªÕ*677ý—… 3̤8Žã'üІæÒv õm;’+Š",Ë‚iš¨V«0 š¦ù2N!»Ñs ‚ê÷Š¢À², ?ª€a†É.š¦A×ubR’$H’Y–uŠþ<]É0 ÜtÓM‹¾†Él°fÍÍM4 ßÓ²H‚Š;€Ñ)*• A….Æ\kJç©×ëþ:Õ4zÔ ¥R º®û¡Á 3 Žã@Ó4†EQ Š"4Mó×R@p ©,Ë(•J}ýµR©`ssÓ  í†jµŠJ¥š¨a†a²%ÁÓu½O¢ÉËà{„°mÛÿ›Þ ¢(¢Ùlâܹs‹¾-†Éc¬{{{8<KeYPe"ƒUÓ4ضr¹œÊ $oR´pttä6®aÊ0óF×u†á‡è’w4ÆKÑä5UµZ-ö|ãœYˆ’`†a’!hÇqü0^AXÿa˜.¶mûÉSƒP™= W¹€&s(z ðô¢\.‡ç>÷¹#]w$ƒµÝnãâÅ‹ØßßÇÖÖZ­ÏÉår8sæ Z­òùü¢Ÿgf †ÆAk¥Rûü†aÄ–ƒ‰£\.C×u¸®Û·C™eÄ0 躎z½îG+Ðì¸(бkRfUÐu}ìÉN†YELÓô Р^CÑhŽã„–]Ð$'Ã0d€êºÇq I677Q¯×!Š¢‘I•9†éY‡‡‡#WêÉ`½ví`kk«o[>ŸG>Ÿ÷½®L<¦i¢R©„<—Ñí”°(X?4-š¦AQ”P9˜¸ÅþŽã X,ú ¼iš¡DÓ4Ù`e–˲P©TüATEžgÖÃ0ü5ÏË­÷Kz]½zO>ù䢛Éd˜ çT’$?Û:Ðs³µ3̪C^Ðh ÈèvвŒ®³†½Ëñɲ<ó\2#¬€N§ãÿNt:ßãÊ$C3}qÆhpœ$I~fÝ´½«tžz½Žb±è'†Á—¤:É›Ë^(&‹PVjÀ‹î r04ÈË8Í0«ŠišEq©½¬Áü @o ½i¦ÿå/9^õªW-º¹Ì‚pÇ×eèoò ’ŽE HÁ®V«°m{â¼ ³lèºî'“$ …BÁŸØ¼$‘†aø®¸D’Q(¿M\ÕƒY0’ÁJ^ÔP³v» . —ËÅz_™¦i¢V«A×õ¾õr–eù«,Ë}k*â „‚ øáÄÁX’$ù3ïúÍÌ›”P€a²) 𦅲îR¢$QûJ)1̺aš¦Ÿ}}«®ëþ û¬£‚ºUUõß–eÁ0 ¿ ¤HRƬŽã P(@Q”ÒM“–²,'æ#àP_fU¡1”b²,ûSJrJò¯ë: …TU…®ëe91òs£T=˜”‘“.íïï£R©àÌ™3¼8ä‹/"ŸÏ§2°Ö"Z£­·JŠ7àšÃŒHš1¡Çq“,)ŠÒ—X‰ ðãh[—u†žY¢5~K¥RHŽÉ5ìÁÙñàDf”`ˆd°LYêw´‰JsPmái-%Lå,«ë‡a!„&*ˆr¹ EQúòsPÄ Ã¬¶mCÓ4?YkµZõÇlh4}ã|©TòÃä«ÕêRô›‘ Ö ìïï£Õj…²s¢¥áBxжa¡A8j$RXpœ •Ëeض+ˆ£’t<ÏD2‹‚’aH]§—aÖ™à»"îýB™ãÉH¥õ}¶mû¡”¶mûª`í`EQà8677ÇJZF%Ñ(“B|ÉŽ3T™Õ†–0Ñ2)‚ä1˜ I×uX–…Z­Ã0üµ§ ³Î Õ`T nIŸ$Ih4‹¾ÔŒd°bgg>ú(¨c@áZ€7»Q.—ûŠ ‘5Xƒk6H¡˜Qo.‡3³†”Y–C^RRŒK¥+% “Rèþ÷ - ©V«± ”I{1̉Ƹ¤Œ’ÁòRTÏ;x½¤ÐMf5¡‰ZsJK9(J,NþdYF¹\F¹\†išK¥d3Ì4¡É óþ¾jŒ¼†ucc¦ib{{{â‹âĉÈår±Û[­666·/4ƒH)Õ…$#5¸~•e…BÁÈéŒCŸqëey¦{rÖI¾“ˆfú$#• U I!…ŠÅ"gn\XƳõ3"XwXÓ48Žò˜FIFaù”‰˜Œ‹ Jï+±ù–!ô `Ùžš¦A×õ؉“a:M­Vó']8úk:°Œ/Ta„–®‹‘dä,Á»»»ØÛÛC»ÝŽÝgwwwèy°··ç‡omm¡Z­ú™‡Ûí6TU Õy]öõ±Áp`BUU‹E?<*:³(Š"jµÚ\5l°ŽÏ:ÊwÁõ§ä±§P¯à`+‚ﵡ²l¬f–ñl÷~QÅOª1ÍHUU¡išNL!œ¤H%­‹]X¶§½ ¨ôŸò1íõÓë Ëx6©T*ÐuÝŸä'݉òÔjµõ^ÏïŽÈ©S§þ ãúõëîk^ó÷á‡öÿ~Ó›ÞäþÜÏýœ¿Ïý÷ßï>øàƒþö׿þõî… ž÷òåËC÷Y$Š¢¸Íf3v[£ÑpUUuŽŽÖ>Qýߛͦ+ËòÂÚ2f%/³’o:nY¨×ë®$IîññqªýUUueYéf0³’—uóÊññ±+Šb_¿9>>^è;eÖÌB^xüž.GGG®(Šn©TZiYœ<~¯ÍfÓ•$ÉUUÕÿìèèÈm4+«#/_3ªKÉ’~†AÉš(¤xcc[[[¾Ç¶ÝnãððgÏžõ·ooo/ýš…¸º«„,Ë s ^;i 3œu•ï –e¡R© ^¯§žI¯V«þ â2{gÖ–ñì`Y677c‹ÀÓ =“–íñ°m•J¥O,‹¨Õj¨Õj,‹e|ö8Žƒb±˜hѸ½¹¹‰B¡MÓP«ÕB´æŸõ¡#g ¼pÓ4qxxÀ 'e9ÕºÖ­­­¾/ñÚµk¸í¶Ûüß„’:åóùÄäe€JÏdhÒ'ftÖQ¾ƒX–…b±8Ö:뤒LL¶XwÏ Á¾ÆŒÓe{8ôÖ.;ŽEQBòX©T¸ÌLaŸ=T/¾\.û‰Å‚¯É@å¾1#¬{{{¸xñ"¶··ý˜N§ƒóçÏ£Õjáܹs©Ïµ³³ãwŽK—.ÀÀNÑétüû(ŸúÔ§ð|­V ûûû { q†‘ùµ”¥OE˜¦¹Ò³:xøá‡ñô§?}¦×™¦|Àã?ŽÔ“Có†èì°³³ƒ«W¯Îå:ë0†/‚J¥â—™!TÂlÝûÚÞÞ._¾Œ;î¸cf×X·ñ{¦iúJƒ “TUõoY–Q,¡( ,Ëb¯Ü˜ÐøÝjµfZ‘ƒÇïÙ`†¿V»R© X,úIð¸„WOuüÉ`m·Û¸xñ¢¿x>ˆ,ËØÙÙÁÙ³gSg»ï¾ûüs>ôÐCØÝÝõÇqãÆÄÎò²—½ ÷ÜsOª¤OódPªö,³lí…íímär9\¹re¦×™¦|ÀwÞ™ÙÁº+ÐYc;;;3¿Î:Œá‹@×u¿Ö¤$IÐu…BF¶m¯}_ÛÝÝÅéÓ§g:†¯Óø= ZæQ­Vz…$IB½^÷—„0ãAã÷¬ËGòø=9ÑJQIgÝYµÆÕÁG2Xi&&nÀÚÚÚB>ŸÇµk×R¬[[[¼yUU±»»;°ƒ.Cjí8á]–Œ§q¥u˜ñYFù&oÎ(Ἆã \.¯µ½®,£Œ/š¦…ÊÐÐ;¤P(Àqîks€e»‡išP%•~ I{V—–ññpº®û}ø2¿ •ËÊÈI—ÄÎÀt:T1î{{{}3ÿ;'NœKh·Û™é(•J%q›®ë(‹¾RxÂW¨=kȲì·9ZÓIϲËw±X9}}¹\öËY0«Ï²ËxÖ¡úè\*• ª*ªÕ*÷µÁ²Of¯,ã“S©Tà8šÍ¦?±X.—ýµÁ<>φ‘ Ö­­-är9T*•>a>þ¼¿Ï NŸ>ÃÃC¿¾àÍÞQgÈårØÚÚÂÁÁAh{¡PXô³‚išÐ4 ¦iöm£úIÍf¥R …BÁO:°Œ±êl°ŽÇ²Ë·$I~8,ËB¹\öû·Ó¬óÈd›e–ñe@Ó´Äþ¤(ÊRL€.+,Ûñ ªrÀ,,ã“á8LÓô+ðëÊ ŸgÈÈI—èezï½÷†>Ïår©Öglmma{{gΜÁÖÖ®]»†7n„ŽÝÝÝÅÎÎýEÞ÷ÝwߢŸLÓD©Tê[Ümzá[årÙÿ,눢èÏQ@ft–Y¾)|ݲ,hšÖÖBÙíTU…®ë}rϬË,ãóÀ², ¨ª:òDŽiš±ÞUf>°l÷ÃÞÕÕ‚e|24M‹5J©tÓ²,\FF6Xóù<yäú fOŸ>=Ô³äܹs8{ö¬¿&6zl>ŸÇ{Þóh”sÏÊüU,}Ų,†£££Ð¾¥Ri©7¨ -£G8K,£|S6RY–!IR_mÇr¹ Û¶ýìv¥RÉŸØ`yY?–QÆç­ç®Õj0 år94?Œ¸‰"f¾¬»l“BN:aì]]1Ö]ÆÓ—4•ì€88Òl¶ŒU‡µÝncccÃÏvxx8rŒ{.—¸?3Î TòE¨ª Ã0 ˲¯œ¬l€Le“o]×ýÁV(Š]×ýÚz²,÷%Ò`/Ðz³l2>h…îjš†ÍÍM¿¯H’”h–Ëeˆ¢ÈÆAXGÙv•J–eÁ¶m_¯±,k©&ß™t¬£Œ§essÓ׃mÛ†ªª(•J}€™ù2rÒ¥ƒƒÜ{ï½ØÛÛó?{衇pï½÷†bÞW 2P/A‘iš~QàU —¡¯ 4³D“ƒ©ª MÓüÐFöú0Ì`t]‡mÛ¡¾¢ª*ŽŽŽÐh4Ðh4à8N()_ôØU™üd– ’KAÐl6aš¦?M“õ ³èºîgºn4h6›~F`Ã0xòfŒäaít:ØÛÛÃÙ³gCµ–ö÷÷qñâEœ?ÞOÌ´jX–Õ§ˆhš¶Rë÷AXÊš±ÌdT*(Šš5µZg&¶m§zÔj5"ˆÞ'´¬„K0ó„äŽ<ªäE<9-—ËPUue&ä& Ѳ4‚  Ñh X,à À‹d$ƒµÕj¡ÓéÄ>{ö,FªÃº,8ŽÓWêeÙÖ¨¦…=¬ë…®ë0M3VYælw “ŽQÖªªª QCÙæëõ:O 1sÁqhšæ+æÁ\„,ËÐ4 𦭤žÃ0qPTAÔ(%£•Y,c­aMâÆ‹¾Ÿ™@E³W*iÂ3ªëišÐuFƒ•e†MÓüußiY™ò4Ž‹+sKmÛ~éh¢È(ªª¢P( ^¯/ºÙ 3’23Ù`¬:¬çÏŸG§Óñ?ït:~Ö|>¿è{š:ëdÄqHðz`Û¶Ÿ0ŒU†Û¶¡ëúú®=µ,Ïhe–J®—&',Ë~Vx†Yu¨¾*Gd—±ë°øÆi«ÕÂÆÆö÷÷±±±±è{š*¼ I°+³Úâ“ 3:d¨RXåÚ*õ’Äë’à8ÎÀ’qðûY¨*“]Æ®Ãjš¦_£i{{²,¯œ± xúºxWi.g\mÆQ\†ñ0 •Jªª²J~_,¤¯µ¼2L´<ŠÉ.C Öv»N§Óê»Jå\’XWÅž ÖÕ†JذâÂ0£S©TÐh4xœ â8žñÊd]××N—a˜4P¥Ó³M*ëÅ‹Ñn·±µµY–Wrj´{]{ÿY4Mã™D†ªÇÍŠMQôÖ²®øö²AOEQ„®ë\¢ŒabXWÇÔ22Ô`ÍårÐ4 €—|èàà­V ù|Þ7`W‘ubAØh]q(+Ü 3:º®§JX³V( ‡gB¡QaÛ6ÇY+]†aÒ²nŽ©ef¤5¬Á0`Ó4qxxˆ‹/®œñjYÖÚ®÷à—Új£ë:gÁc˜”PJAüº©<© Rñ<«l°f Š d‘¶mó$%ÃDXGÇÔ23RY› ²,ãܹs¸té¶··ÑjµpæÌ?Ó2bNž< MÓ Š"TU]t“fªØ¶½2K 3kt]Çææ&*•ŠŸ!Ÿ `Yl¬fÊS@°±Ê¬  ŸÓ×Í1µ¬Œd°öý}þüy˜¦‰Ó§OãÒ¥KK½¾Õ4MÔj5Ôëubfå°,‹†I …ÏÓì»(Š<Ù)™Â¶mX–Ųʬ%år†aô}®ëzßgÜO–‹Tk«Õ½÷Þ‹ÿ3Ó4±³³ƒÃÃC4 ìììààà`Ñ÷3¤ 0Ì*Â+ä‡ú‹ ¨V«kQ‹›Y~¨Æ6ì#–eùË7Ã0P.—ájF;ŽÃ¡òKF*ƒUUUäóy\ºtÉÿŒÖ®^ºt <òvww±···èû\fÕáp`†ImÛ¼^u–å•´©Tݦ‹iš¡p`†YhÌŽ¬äŒ z^¹Ÿ,C ÖÃÃC´Ûmœ;wÎ÷ít:hµZØÞÞÆÆÆ`{{N§/lxY0M“•f¥a+ä‡ÃÅR Š^ýUËZtKxÞUI’x9³–4NÇeY¨V«!CÖ²,Öù—Œ¡k«ÕÂÖÖ–o˜ð¿ôàË<¸}aåd…),ºÙ€£&Žã°ÒŸzFP;f18ŽÃ‰Á˜µ†¢ÈdYöíʘ-I¬ÀÄ/\>†¬èt:¡Ï¨ë²©AXxW»û³Æ8ŽÃÆ*ä„gßS4RÙ`]8Åbªªò8Ϭ-4n+Šâ§ÁÐ_ò¼Ú¶ AxRrÉj°æóy´Z-¿\M§ÓišØÚÚ íG¡À'NœXô= {žV±û³Æ°Î0é1M“ß Ã°m€Æ~V ¥R©@E®±Í¬5dˆ^VwÓ4Cºy^y àr’Ê`ÝÚÚÂÎÎööö°³³ƒN§ã{#É€­T*ØÚÚB.—K}ñ !œ´½ÝnÏü!°2?&fã­tÌbÔ'þ³"ß+àÌlÈ’Œ§Á²,Üqǡа88áR $ ¨V{OÃËZ©1%)Á²È6eE­¿ †IÁ²Èx¢Q’²,£\.‡+‘çÕ²,N¸´„¤Ê\­V!Ë2„0=ôÐC~á´&•É9sæ Μ9ƒ{ï½7ÔiÚívhÛ¬×dðúÕÐ1ƒÕ0‹D“<#{Ž:PÖäà`fºdQÆÓ ë:J¥ŠÅbl]>‚'1GD'O¼T.¦¹ðNË&Û†a T*qx#“še“ñ4D“J*ŠÛ¶û Sò¼òø¾|¤2X766pîÜ9<òÈ#¸té¶··ýmÛÛÛxä‘G°¿¿ŸzMëÎζ¶¶`Y}ôQlmm…:Äùóç‘Ëåüí­Vk¦%sXxG`VK³ Û”0×5¬Y“o€KÚ0Ó%‹2> Û¶}OT³Ù„a¨Ä”cáÉ1˜ÔX¢ï¡^_¸Áºl²ÍÑ̨,›Œ§!ÚA@£Ñè딀‰Y>R¬€·Fuoo{{{¡Ò5¹\n¤0àÃÃCt:ìîîðŒá³gÏ¢Ýnûᇇ‡8{ö¬¿}{{Û÷îN*$̳“#`N~Š>,³CD5sš Ìš|¶½æY§˜©‘U†®ë¾R&êõz_½>€½«cAåm†¡iÉÛªUï< «–Q¶Y^™QXFOC\âÔ¸IúR©ÄáóKÊÓÒì´··‡‹/ú_¼xgÏžõ~òù|Ÿ7öÚµk¼ŽA¿SÈ1ý>«8zMÓØó4 u,t]èÈØ„îÏÈš|¬Ð0Ó%‹2> Çq`ŽŽŽüÏA€$I~˜0Áë½Sbšž)Š@šd?ŽãyR¥?ISPEoßL"/›ls43*Ë&ãiµpŸYNRyX/^¼ˆíím±òöö6.^¼ØWî& ¡ Ãív{{{ØÞÞF.—’üv€IDATØ)Æ¹Þ H‰Y†øüÌ `6¡»€ò Ϋwf±>6†,É7ÁY°™i’E†¦i±I6J¥’_`žàg)1ÍѼ¡ŽȲwÜ …«ÀòÉ6OF2£²l2žîëÁPƒ•Â)<€ïY”]lN{{{¸÷Þ{±µµ…sçÎùŸ'qãÆÄmŸúÔ§ð|;;;©Û@J ‡@³I`4«°]€ŒP⃃\¸pah¦ÐI˜¶|Àã?ŽŒÔÌ×\½zu¦×Èž–¤‰II’à8Ž2_©T Ë2Oð¤Á²F3,EPÕ©„üîííáÂ… øä'?9“[ËÒø=ο±šÐø=‰Ž=Œe¿ƒ”Ëýž ŽŠY.HuüN ´N5mr¥$Z­Ο? \ºt©/ô M¢¼ìe/Ã=÷Ü“:L™¼«Ífs¢{Y+lx†Ÿ`Ág±¬ Ʀ™Ä+W®Ìà‚³‘o¸óÎ;±¿¿?R[Hι>ßz±¿¿?SÅ! cxZ( ib²T*A×u(ŠÂï„QÕX’¤^‚¥ ØÝÝÅéÓ§g2†giü†eY<¶¯ 4~’·IX¦ñ;ÕUƒ Öåa\š9ÙÝÝÅÎÎŽŸ~{cc÷ÝwßÔnš2Š­ìLºƒÐšÍ©4&§“¤ñ#™ÝŸ¤pb!òÿŒÉ‚|ÓµªÕ*âÌÔÉŠŒ§%Ín.s0Ñ÷§3 }¼®{ÿ—J^–`ÚײâÊ5Í[ï:ç „e’mÎMÀŒÃ2Éx\û’â±®³ú 5XOœ8J¸DŒ[¿»»;4Æ=ŸÏã=ïyß±‚ͦÁÊô³šœ6áyX5³Xæ%t¯1Š1l¸_ s3T‰,È7àMʬ´Œ3 #+2žöB¥Ä¶=#±VKLÔ`ôœm»çI¥qɶ“Ådcv†,“l¯¼ÃÌ„e’ñ8AàLîkÊPƒ5—ËÍtuÑÔÛÌÈð ËY0‹š¦V÷GÂè!ÁUw$lsÐ+Á“±š×³–o^ç±F8Ž÷“1Ã, c8gQM‰iz^Ðju²’2IõS-Ëó˜F?KúnÒ”¿Y Ym˲x¹33² ãAh,‹Œd}g=y ëáá!öööüŠ{_&Ö"öåsAGrâ#ã'EràåÇñÊ E¨•˃ÃÕVVҗǯ䇮g.qMVBÆ ãq#±™$¯hq†lÒúÕà¹h2†éöíÕ]ÖÄ0­Se}g=Hm°š¦‰{ï½;;;¸xñ¢ÿ³³³ƒ{ï½w¦õ¢fÁJ¬:f k¡ç±uBë×ÿ}Vi±èe–tÐ N«¯;ŽWA¬ÇÇÀ¿ùÀæfXq·¼øIO¡ÿ™ÏÎ.T:c¬Å„Ì*¢i½µ~£à8Þº@¦ö°¦Ä4=è$žŠ$ãÉ4ã=©IŸžÁjÛÞdc0ûpÀx}Ú¸åÉ'ýäB¥R˜HŒaV Û¶}«™áè fv¤2X¡ª*Nœ8MÓüÔÒ–eAÓ4œ8qgΜYo«eY«=3ic°GsDx¡»£NzÃÿ¾ë÷{ ‡aô+ÚiÏ[.{Æ©((÷|ð#ïêõ°‡@ðÔóö»ü¹µ1XÇYmù^&L3½×Ô0Æ3°Öoà5~)¡prYö’"Mz®(–Õ®N}bÐ8%ŠÞö`›67½>U©à›ÏžÅí_ü⢟Þܱ,kõ«0LŠ–‰Žé¼>¤2XÏŸ?Y–±¿¿ß^üüüù󋾟ÔdB‘©`¼Ì¸Ã ÷û€~¬ëcèÆ€BøÐ‡Fo“"¯|¥w¼m{ÞQJðAÉ‘¤[kZ«GGž"³ySÏ›,Šá0µÁöö»ã·¦ŸÙ8£pŠ÷`Ûã…&êz:¯)ãxMl{²u‡+ {WS"Þ¸)“­ƒŽ vo"&ú+Š@sHæ¾F£? T­æEæøÄÅ‹øÜw.úéÍr¹ŒZ­Æ“’ N0¹N 5XÑn·ñÀ ÜïìÙ³h·ÛKl³Nl™è:%^¢Éë‘øº?õþÿ_žlÞ·\îýî0l|‹g<ž<éýOÊÑ—¾4^’(;á÷ ¥’êvõ}½pæ5€ Ö)S©Œ—ƲÒGJý¨®A™V×Î$9CâÞ¡ŠÒ/¿››ÞçÓR(eÙ[R­â¯n»mÑOaîT*ȲÌrͬÑÉÇ•×á™X†¬­V ¹\¹\nà~Tæ¦Óé,úž†’)e~ïžq6à6e9AÏ-'ƒ/þC/‹äù¯Þû3Éû•Ëak˜&Ýx Íñq8cdó¸ç½Jº¾iö{¸lx†n=¯r`ë·îyFs—g_}6îüŸ«9;Ï¡3À²Æó~ÒÌmŽh0Œñ½«ô]×ú­1Žã X,BEî“Ò ¿íC×û'bâ<´GGýÙ™±°m›C™µ$5&Ë2œ€.ÈÆëú0Ô`ÝØØÀ7ÝΩ‘©x÷i× ¥{£õ«;årÌ$¹ཟõ”¨’bÀÆ÷xÊqÐ;Å0¼W«áσ÷uí¾áw{ÊOÒØ5‚í|¾Äk_ë=—.wü—;pË“·LþÌ3gœ2“¬m4¼Ù!Ã@¥’†OFç8ß™iöú¢e­½Ñê8 …DQDm”š¢LRJzÄÞ¼L ë:TUå±Y;(áÒ¨Û˜Tf±.q6 5Xóù<:ÚíÁ! ”picccÑ÷4ÌÅ»OS¿ì®íϘpîØ¥uÿé3ÀÓ?ä)(Q%åáëÀ—ÔûÝàÄÌòRæÞ¨¢hÂóö&ÍüÀí=%©³=)ì‘<¬ÂkuÝkËqà³ÈáÏüô3Ñy~ö£Æa-š8Ϭ”Í4 ² XVò2XQô†{nê«’4Y–×@Ó4(Š‚jµ:ùÉÖÓìïG´œ£XôÆò8ƒuÍålÞ8ŽÓ4Qš4)Ã,!ÑåQ¯êZè<³À²–ª$^*ƒ5ŸÏCUÕÄpßN§ƒ½½=lmmù¡ÁY%SáUL7s-u”ÉwÈ­ö=Š|Ä;PŒ1Xÿù[€·>{ð Ëådï+µ©¯Ú¯ë)MqûÄ%í¼0`=£5xc¦9°#ÞܹŸ»ós>ðì±6ÞUØŸ71v;’4šAzòdú}ƒ±ý²£X–Åå>F!®ɲ·ö4nâÒq¼Äy´34Mãu«ÌZbÛvß2¾µÐq¦ ÕµB/I­ëTY‚Ï;‡v»3gÎàààÀ÷¶¶ÛmàÌ™3h·ÛØÝÝ]ôý Ŷíì ü´þrXVw­\÷ïýÙ¶=]£OFïiÀS¹xCWDo½­Üßî;žóï„qëkl$g¶|öVïwÇñ¼±Áë\`^ Üg°=’ä…6ÿ|d‚%`Ô>óÓϜ҃Ïk3©9 ¦Ù“¿qê¤vQÕ”ïªA9*‚ௗ]Wâ”&Ç Ë6Aý*ÎKMëT×|bdž8ŽÃkW™µ@Ó4hGƒa}:{Ð`åª)‰[“dšÞ˜¾$3© Ö|>ýý}œ8qçϟǽ÷Þ I’pï½÷âüùó8qâö÷÷3ï]Z¬½ptŒ—èWœ)l–Âd¨ëÿéŸoV²'4áÑ9ïþœ'žÑ ìïèVBŸ C˜¼CQƒÕpå[â n–eàáÏ?Þÿ¦x7o_~é—Ç|ðÙ&Sk´ƒ˜fÏ33 Dqòš‘iˆ†£§ Ÿ¡rJ¥”y•$i|£³Tòžó†õ²ÂÉÆF¤R‰HE\rFU¹ŒÒ¡0wö*eœyFý¬(¦iB¼»’¢f‚º<¬) Èà ¤GeÉ.@*ƒè­<ò4MÃÙ³g¡iyä‘¥1VŒy  „¥FÓúJSqö÷¾øó?ï~6ø4V4ŒV´=ã1Gθ¼õœW{“ôIƒ†ÒÕ¿bÚDkpý]PWw ßWðÝ|Ž‚Üþ½À‹^Þcû† Ï(#pÝÕHJŽ—Qô2½3sÁ4Mö®.åÆŒ…ã8¾~NïŽãÄzPEQ íä n¼TòÞ3^F3-?Jjƒ•Èåre»»»eÙ/wÓétüÄKYÆqœÅÌTF=žÑ5j3Œ~£ãxŠÄ—þxÖzŸ%èn$·¡1À¶ÂMÞïitåÈ>õÄnŸ¾Š~ƒ•<·äa}ªÕ3XM'þ?ൠÆepY`ô9¾êõÀ?øÚðþ÷\ïf|:§W3áRf×°Ž«ÇA²>!,Ü’”Θ fïíb½ï£¸xihZü}¤õ°ê:`Y0ŒˆÝ+Š@­†ÿã¯ß?Ÿg”25™e’’ã1™ƒrndrLg¬ñRŒi`š&E,Ë0ºžjú,J°?°Áš’jua¡¿…Âtü#¬I´Z-ììì,äad*D€—4hPöÜ8È“õÂPœïß=ÜòûÞg “&²ìM¬„q² ÈÕär8q%pºPsbÇkÇ»?ÛŒ1mÒº÷/žgèuOý£œø p~;þF‚ºiô9Þ~ð’—„÷Wï™Y&·<ùä~9È\lb’Ò0Qf¹~Õ²¼ì¨I£k7ãï¨m”e ñ¡gÀùã/ô²X›f|XsZ/®a‚›Kœ¿xálžQF±,+›Ñ“â8ÓU†Ëe/¬—Ãè2O4;*“a‡=¬@K:J¥RÈ`M’2T98% óK¥éØÊS3X—…yWIÙj•TzfÔæØ6ðÒoêO®Bá(Ïû6àýW¼Ï–ø‘Ùçðª Ù€vÐKº„ÊĈ"€÷',›³ÝeußàšÛèwóÕ¯~öÕñaÓQÃ:z^+Ò^jô«^ü|W®à™_úÒˆ?ûdv «mOïe®ª½–Je¸ÇÓ²Ò×+—=ÞÜŒaɘ¤r I†Dœ.hüé×÷Bú­ÁÅÈxÑ¿&6¸¾6®·Ü²z2I¬ô,»®O¯Vžãx²ÉåQ–Îz½DØö|Ë­­A㔼¬Ik0$˜FÕLcÎu­ Ö…eó"Ã1^ VËþôÀŸþ£ðù¨lƯÜå6:»×Ô¨1JÆákùÇ7›ÂÃÏñÝ8Ã7úŒ¢áÔÑ6'—~V……¼CQz†]Ú²0i!¡*€;îðä>ÎÐ,•‡jU*Ó)×!^I 2íÒÔç®V½UõâJ¥.̲€Ç{Õðr°ÒVÊâ8j¨¡eeÛ#À $®þä²AzðZ8c•& qãwµZM ‡aþ!ÁYOMsp^¸’‚AG kk+Š7÷>¡ìiÃvhµZØÛÛz¢Ng9²®Žd°:Ž÷E˲g´6# Häh‘ê1î> ž'Q@ÌZËP‡±ˆIF°ì­ÅKðºøuá«·æÂé*â Öè¹Οês¼cN¾8útd¿à|¿÷« æïn×B|&c³Û£{]ò°Ç.ýI—èÄ•÷1κá%!Ó3òÔohm渆!i?ÁûT3’ l¿º P­ÎMÙ˜fþ):Ÿ®{ÝžnÁ4Nçùs¹Ÿ,`ÛöjzXÇQ‚uÝû!oO½>»$eḬ̀, ¥%_klYkNYá%iú“²k€iš¨EÖWZ»MzÎ\VZÿŸEl;]iŠ$£q…ô°Ä¬©“C¯¯i eSó°nll`kkk&7<-F.ùa½ð¿jupöÐ$iÈî†Öðîü›@¡{lœ¢^@Ï(ˆ  îýèÿü ž!N…ãŽÂ ‚áëACíàâ-ñ›­kÞ³E¯ˆyQ*‘ëÇ|eÚF>>G#þ˜Nb´ì%"Ó+1iÈ”¿t T5y-ëŒêŠCÇ„1P”þˆU¾õ[?0Ó{Ê +k¬F‰›øŒƒÖIÓEM;Ÿ™ŽãÀqœ¥—kÛö&Ò²ø32ƒÖ‘W«ž®M†É e½e®uçm»·d(k š–ØÅÞÒ’à1‡%C¯¹d ÎçóØßßOý3*ƒj·¶Z-´Ûí©=¸‘ëô•J½iEñ~Rư[V¯6»O’Âö‹EàŸývÏÈ£sǹû›MÀÃIŽû2ðÎÁø¹ÿ ”J°ª hV·³Ça*·C˜èO–Ô5~a…?ïî߉Êîß‚Ðõ=+p-ñ¥wJÝÏÉ«YÃj¨Ä%ø¥}’ô:znsÔæ-ßc)8”Ew’%mÜ— ¿…á'mW‰£ã’2ÏYê4LE‡¶ye ž§ŒÇ¡iÚÒ‡NŽEÒº%Eé œ$yFk¡ÀŠô,J¶çRÎfÆ“¤s»øèúôÖdÏ Jx7Œ%›dXôø=n2Ôy‡Ûϸ†”ÁdA(î™D#Úè=\ª˜F/2™¦‘@{¨ÁzxxØgäu:Bž–v»[»µÝnãÌ™38sæ î½÷^¨S˜Õ˜JÙòФº^Œ—=èÝ >ÒŠ÷¥¦Ò'è6‚I†|°¿aºsÃpÏ·Šâ…è’Ùèy(m ÷° ý6ˆ€ôŒýIò&e¹k°~ÂY€ã(Å\+`ØZ]ïj_ÿ"»=iN‚úìœÞ'ó”ïI¼LVëV˜_üVï…|ò¤§Üž<é­ M«äF¼òõôËòpöXLo¡)û1)çþÌõßç"½Ýi¾ȳJ *‰y冘§ŒÇQîÖ·žÕù'†¾ˆi‹–Õ‹IóEK’7™Õg”Q)Ûs‰–1Œx}fJƒ•Œwœ˜We¾^ãfÜV”ôm¡¿ÇôsbÑã·wÿöX“3’$ ×ë§(cæ÷ïÍ'Ú»PM>ƒžÒJ%^áŽ%¤'FÃf4Þ¸“FZŒüÐCÅ yZÚí6LÓLìçÏŸG.—ƒeYxôÑGS¯£DªKÇ.()dH Ü 8Ö ÇR NÔHÉïŽ>hÝg=“0XÓ´{^ßðTà%ßüÓ7@¼û¡Ã¼{>,„kŸÊè_Ã*(æGá%½Cã´…¾Ð\ëuÝv9ˆ/qC÷QŽ|0bí'"ªÝ¶zWÌa‚lò=QXÍ‹óЯ¿ÑóÄy2~tä¹íÒ¾x“^æÑYéHHp_ed°N€üº[ ¿á9c(S{}h܈*=³Ö!ãQ4Mƒã8}kŸ2})¦9úº6]÷"âpÏøl6{µ‡Ñ`É‚lÏÅ`Ë :Åuz4·» ¶ǺÎT8•ËãÍHŸD­6R_[„íž'&‘õ¡Q“››S,œKŠ$6ƒ ùSŒ£VQôÞQAâ¥`Sâ¢ÀFe!Y‚qpp»­ÝnãððgÏžà­ÝÞÞFc•ûC;%XŠûÂîüžò‰ Pz?`?z#ü­“&ÑG¨ãt'üSó1v?ÊÈûÞ¤ûÞ#†{!»õUq ÿV÷°öGú¯|?TöÐóÎ&ÍÄ•ã1{mTE϶ê»íR÷ØA/´982)ß]â\YŽaV®ž…ùÞ§Ò5 imj4„7vP.ômªkW£EÓBµGÇ%u›"”ï ðþŸfÊ?å8a›hÖ Ð"d<ˆmÛÐu=»Æ*àɲ¢¤WtmÛ“q YOZ³Já*€·õe ±Ì(‹–m`Në²kµþñ”¼1cxY£cEVy÷º9ïºÝg¦iéE—D}lÈí;Îä µ˜£F¾(´„Ôcð ùÙY‘'FÎ-3ʱªšÞÊ2©X*—¬}dF]O|ÄѨ4ZgÕgPC8šsÒÓ/Ä`ÝÞÞÆþþ>vwwû¶]»v €·v–ÈçóÇÑô°Z–§$DÆî3Úõô_ Õïz¨7ÐÉrwÖ@ o^3ôýdY=«…î:ÒÀLÉ 6:^{Ìw“áS¨Âa¹q’v÷úÜûHøF@øjäšB¸ÿ(ßïþ% 6£cѽ¿îuE1&2ÔFræá9²ùNc°š¦ék¾ô‡B"95£c¤øÚ»aÞòºôˆSÌ-«ÿ‹ê º®Ê÷u ]{×ÿ4-YI§²4LÑѤþ¼ë’“ è•ê›þ ƒh‰ç÷EÈx˲ (J6Ë8¦Ù“ýAJyS‹Å^½Ý8c"AðöÕ4Î<%-ÛÀœjKÒ` Í„‘µrÇé áñ÷°!Š4Ÿ™V?§±tlÅ×0&«¯‘ôŽ‹XÜ£,Ñ•åù{Y³ ãÀd3’$ÁfŒŽâ5©Bó,ÖÏýîÔŸC¨ ã¬Akô™tÿöÓ”øeB"Ž*Ÿ6E¢MIJM’–ÌÕaÔ)•ÎyòÉ'ñéO:vm-ŹÇ*7¶í=Åz=ÞõÑÍ€;RÖ\Ư©óhO@hfƒJÙtCrÌŸô䤯¿×*‹Óm˜è¥R'¢cqÐ U¼¾··Ù?4iòÌA¼!,0óm{ü-€ú}ïšGG½J§ò–&Õ½ïšöã@á¸÷xBèHµ>µÝnãêÕ«øâ¿8|ç)3®|ÀO<ÃÃÃØs¤øíŸ?€ðÌ/ÁúÑw…Ò‚ã:̓TS†P›& }á„O’ô2éÎÔê: Þþ+PþÆU¯ÿ5›¢„ä>šÓôZW5 “$ ˆ*XÓÌsœ( æR¸~ý:®_¿>½ À,Æð(™ÏˆìX@r˜×É“ÞFò?N-`Y\wé’½LB«ÕÂÕ«WqãÆ¹^wVãw±êÅK4«-¹DF KàøØÛ(mAP9 éÑ~Q(¤Ï÷BÆm©4AåòŒ é^qÆDÌ{βÒÛ´_ðqâúõë )9ñ›9j„¡ý$íË>8¹‡®£\îú´þê7Ænï‡ÑSPÆ=Ž“Ï®¼ûçsÌduîÒeIuüΜÁ:¨C º¹'Ÿ|ŸùÌgpåÊ•¾m½«ºî) IƆçm±OÙŸòŽÕ/ÿcØNW¡Ø=ò`V*@±ùö¯BêNjZ<Ñq¼xIr½RQW2,ÉC¬È«¢ß`¥vweX>ˆ3Å Ç%õc^xt2†ŒÖ“€óí6줢?ñR÷Üö_R·m±‰ªR|O×®][˜Á:®|žÂsåÊ”H5KiY(ýñ?Eã}™ºü<¸,"|­˜üJ…dÐ?ü­°ÿ PˆWÓ L:¿ø+”T˜¶˜¸Ÿ6aaöàšªQg³Çp:ø(Jxx™väf°”Ýçí·ÿ¯…ÕÆžÅ%ó¥l¢ŠqRÜ}³9×À«Â¢ ÖYŒßQ& ‘™èÂ22&S*µÁ¥‚à‰¼¦õ"oƒøâ_*²ìO¶QàY¨zÌXó–”•ž­m§³|ƒëmã<¬­œašâíÐüïO‡øåÇüØzïgQ4K‰c;½Ð!ÙˆÓõéE©—/ú¾Q”ôÞæ(~ â.Ó\×3Ås’H¸óÎ;§w¡˜Å%óÖèŒ}Ò,I–Cš3Ìöö6Þð†7à¥/}é\¯;‹ñ;ʸYSG‚2åF#´È¨KiEV*a¦B ²/CŠkwE ¦x nýœÞUc/«ˆ†Sò™aÊ{p)J0!9Þ4é¿üÿREÓ;):<ìîîâÎ;ï*S³`ã71‰‡uèq£¼äiéR±+÷ⓟðåYïü œÇÿb¬6‡ØÜ +>i"r"‹q•9âß5T¯@ºä±››žÞ˜v6‰úÓÉ“žÒhÛ}æ é^[[[(ø[ÿûôȆ¬'NœÀÙ³g‘ÏçýŸ­­­¾ÏègRNœ8 –Ðn·'급Vî SæÝ A7ä 4M@Ü [P6ÿ}x|EàǾ ¼íƒ€ªÂVT8ÏóîM–»vÚŸßÈ1ÓˆV¤Q#sP|8-sýBëg½„LA‚žÙ¤7Nÿ"ùë®SKDh¿#{¬átï/p¡Tм¸â ÝŒ1 ùª¼W*€,£lx_d0J&8™@cXð½.å}0tog;0©R­zcYùþ¯Aé»[º²¤(°¿ c÷¿¢øÄ;ßäézsõzoütRÖ Äf­@ÈË;Ñ÷Î4—‰gñi-Ø´;Ê,d<ˆã8Ùö®ÒLsô3ɬ9“ f-ÛÀøu)G"jiáy)ÃPT5<Ö‚÷Yt¼Œæœð«uwìÓÏcjÄÿ¤´‘ˆ Ž]‹!×MôÙ®ª€zªû£×‡v÷`RI¬s½%` b2NL2ñ(Br$‚ãô„0Í"izq&%- ôG"Ì‹ƒ£$Rܸ×Y¢™ƒÖýOüý‡`ÿ~$„;¦ïÒr¨‘¢›Mï\åro:%ûêU<þ ß0Òcj°ær9ìîî¦þ™”\.‡­­­P3Ó4QHáÕŒcàúÕaÞUB€—ô‡ï8€ð”÷‹ôí_ ¿h°û¯ÿ(•P.Î7oúk:­÷Ã3ÏÄ$»‰ÖGz¥âä›2îvsÙ”¡õ£†X âRZ»›ôbäkiüíî/]og4ÒB’û}Ý`½¶#ãÄ¢J¹Ë´åÛ{C}ÃTÕVÁñ,8^kOûIçî~ðä“þ‹• $JL'<õY(?ò"ÿ¶¢bóì7CûíW ñÁ›Q*yãE© ñŠF´FË·Frô–¦ 7@ƒïœQC‚)|ÙOZ0"ÑvO3$8øÜhõÀ„‘Ó3 ’)ïj_]&x¿Ùìß—B˜¥eÖ² Ì)Ü=Nk *Ì)4Ûhx¢o"ÙN½ßýÌÀ]KµO?'ƒ5ð¡™h·¬MÔõþH¸¸±TÞ'‡%û‹k„(ŸøÄЈãSíÅm˜µ»/ÖÅE^ÌCƉIBßEQD)I>Øjuø÷GrY`Û(^û_•ïÿ Ä›ÿl²×õ^’×`Ô%µC‡Ïl—˨~÷{!Ý™HP”¾c©ïiÂ¥•?¢É’qs[}çrþê¶Þ.?øõ^2Մ˦¼ùžŒC£‚$ãÝð‚̓·õzßRXÿ>"§’åäɤÛëÔ†×æ” ”R¾£çu•¤PBãqxZÚ[­.^¼è‡' š5‚…°µµ»>ŸÇ{Þó´Z-¿q±,+V¨)s'% „!ÎÖèC0ûX*—»¿üÁÐÿudfã…/~ñ €î lÂ7ˆ% p €`¢ßë) \î%̨ðB€»É£êux •ºžSUœ3è/1#`xéhFflô®G·$¨öw$I´¯ÃX¡»Öƒ€™Ø"gw°ÿض׎z}âJ(1ù:ß·µÞ@A²M¬$I½Êþó(Ç5ljoìl"¡2Áó$1¨©4M¹Æ‚}†Â …pž±˜ÉÄD‚ÃÀ(v½KhÉû¨DŸÇ´ôQJÒFн¸ÒŽLƒyÈx”‘C‚I»®Õ¦M³*õ:¯C]Q!ÛsÉ {¢7beÚwlzóÖÝ-ô.§2,qAƒ(•”˨85T«‚×m$)yP•e¯mÝÔh×í{Lä¶ífÔ´”åú[[p^'õÚò» B÷Yª*rÓº’@É„¢i·7 Wç»fu2Þ»ÿ†¾Ûvú±ÞqzÊA\dãÀ~¢EA€þ±Ó±éRHªÕˆM7Ëé9²Œ¡IŸºÌÏÒe}÷Aüž¯õO Nò<õ°„ÈÓÄïÝ Zþ•†TVÓ4qæÌ´Z-llløŸïîîâܹsØÚÚÂÆÆ}ôÑ4§KÅÆÆ¶¶¶&î(ÁNA³_'O†Ë—RTH¬ Œ‚ ãkHBQ„_ò~^õt8Ÿ‹ìðô§B °´ Ï“ @z`!¾öiÜ::+°-n®@@X¯ß@L@<‘p#cxiÄö#ð=¨q5‹Çõþ˜Öß§ã&ÄÈ™2(SßOÿxûx ™Ó’ïAI ‚"À£ªÑçIÕ5‚H§oFõÁãþAŽj†MšxдÞ$^Eñ¶iš÷bˆ:Íhb=Ž`míQ˜Vâ¾Y|ÏR[IIËÓ’ñ(I“±ÐzÒQµëa‹ƒ³Ë3+ͬd{‚µ_¿åOìbs³« }öÞÐKÙ¶½Ï—?H’ØlúŽ’±æiJ%ÀüƒgÂУ(X×aþÚgús¨* i‰Þ\Óʧí þ–å5°›˜t¼XŸ) ì?é"‰tlÛ›=•$ÿ}íˆÌâF×ôÚ‚SP< æ('@éõŸé…5›ÕoFÌJÆ Çqf·´#ªxBU&ȳÿàÓ}z’yó÷ÅzS޼¯±X÷Ba¾êí^)@ «W¤PèyÖ /Þ MÚ›HÊm¥2Ÿ¬¸ü5ĸe©R¬çÏŸ‡,Ëxä‘GB ¯óù¼_€øôéÓx衇fÿF„fãu½;öÜýŽŽ¼78©2¨$Ï#MJ3ÚM£ý2ú„£{h_8«ˆí®RÜ5´B_xTY–Ñ3* ćÙZÝãä@ò¡×û ?wÏ€¢œÓúë/œÏ"9QF÷0Ñúù8‡^ÜWíûq!˜ï~÷ ñÑ~Ýh É I…³77ÃKx¢^h2ø¢ï‹Ø÷‡$AÝü/áÏhÔK“»?%¤ ’ ‹‹º¤DPq]3úýëzºDJQ£>iù:ÕŒnKJ ; ×Â| [^dÂ¥LAõ—H¨¦aÉÓBl*É0Sd‚…ÏýO(oü¼µ'/üh(GM£?´Séšqì çOþå?×âí2]‡øÍ·ö—Ýg¡ëĦ²Yùû·Àþ¡·‡¬hºkOžìé¤OlJëUËùs¿  üwK*asÓû3M=îà{)häÒïôŽE@þ ­y7Œží½êÌ4mûú©ã`´¬‘$M¶ H¯zVhé­«ÏH$UL–¦ùç°Ç0.>Ñ~Y†ö®—„"Å‚Ä}÷F©K­û1½Á|±Árvqøå2»„²Ç?º‘ØÜŒ?&²½ï¿üË£W1j°¢Óéà¸ß<€‹/-н‚z‹òÞ0pÄé³üã²?ÒŒÒU.w–$¿„ž(âs"}EŠ)×ÑõŠÙÝ·ä3UŽŽˆÕ²€›nêÞf\àÀYá2`Ä h·ŒLè#§çÞ{ùu€µÑ[Û÷Ò‹ ÇÔ4àŽ;ÂL–Gé±£ïòPAò.ÑzÑyó›? Y^þ·Â '8QpT”BCâÜØÕª7È.(2Zß‹šY¯÷¯úÓ8¹‰#úÜ’²Í—ËžŒnn†¯Ì”N¬ü뮿õöØjö?û÷Ÿó¹ø›©V¡þËçCûƒïêíÉÏ@‘9–åÉ'9 BÏÀäÝRE`X¢ùxÝ>éë1Œø÷ å¢wï–ïÄk+ÊTç€3‹ã8³‹&°,Tžñ (¥v’]T*Þ»;ôÎDPš?ô®Ø\]ÁSêwýsh…®G¨P€ K¨½â_¡²Óó­ß2,Håþïº\ö^1Áq” ï`RLšœ&yY£“îqAäУ6å˜&yâ Š¢Ø­QîMÌD—œ©Õ€7¼ás•¡k«ÕB.—ëKiMaÀmV{žP‡åîl›Véϯ!š„È4»GQ”pr˜Nú;nT¥W¢g$ €øÜH_iÄdX·¼}å·t÷uékQ"BkV ò¨JðKÈ’ä Æ’Ô=Fî ‘@¥_d Ü“ð ô%PÒ´åÙd úp¯&=jÓì‡^$AcƒÊÔ’'-IG¤0Í`¢µhg¤¨£QgŽ–$k³¤CþêûýA,úìFÒ¹£…¨3 ­ô²òžQ¿sö¼¾‘4‡Õ]:.)½ZõÎ|TC†ÊÝS*ym¾ ’^qI©ýª’J¡·¬ÞúÒq”ÿ8Řf3;1×$X·™q ֘ĦþÒ +Ñe;ˆ1¿áéÿà]É 6‹Þ•º u q“HÕj/ÜR–!¾ç¡þÞÿÙ¿Ÿ¢ôM*Q„Ðæ&Pøá;¡üÄ×%F4”J½á…B|äŒ$õæXc—œ"&‚<–Õ+­Fç z´ÈH%nÜX¯(ÞyšÍžÁKºcȹ4S*• 677QU´m£ôêÿéGR`=™ïu„îd¦ý÷ÎùÆ\h9QÔD|d_­Ö3òœû߆ÒOç¼/¯þ-þÆyTz ðÁ†,SË ª¾Ñ½t³®HŒp¯ODå¬Tê-÷Ž7iÔ)hB…¾ŽF#üŠ3Mïšqý*XŸ¾ZíµÏ¶Ãý&Ž\î©Ñ¿[w?ü°ûš×¼fØn®ëºî©S§ÜË—/§ÚwÚ\¾|Ù½páBè³F£áþè¾½÷$ =O£áº²Üý£æºrÞûL’\÷ø8¼ïÑ‘ëV«Ýß_û#®ø¢¿ðþ]×¥}]W½Ë;Gð8QLnCµêºnÃuÝ’÷·ªºn½äº®êý^«¹n©8g©»÷ÜAšMo?QìµÕ»ÑîqÍÁm >—èã«V½öÄ!ËÞ1µZüöz½÷œ%©w/Ífò1ƒÚFÇW«‘ûŒik©äº?üÑ>yÉ:÷ßßg²/¬TÕuEÑ=þÈŸ¤ùt(Ê¢o$j5O¾ÉÒÑ‘w[¢èýŒ*w„(ö÷;×õd.:fÐuƒ×ŠŽ/µZïqW«Þ¶¸¶ Bÿg²Ü“ó8yÉ2qcxUUÝFp0ãøØH¡(ýƒ þq}Š™)Íf|DyÉÃúãññqüx>#‚”JÞ˜Ñlz¢Nc]ì€äº^_Hz™G!¥bQ”ÞX[¯'?Ö$޽㱊ã·$I®ªªîñˆLQÒ=ãfÓu~ïS¡Á¤Ñp]éë¯{')•¼ÎR¯‡öi6½ïS’Òw×Mßmêu¯Iú@´Û“N÷K'é<Ñí²Ü]Qì½êDqøëòøØ»OIê§gÂ8ã÷Pƒõ±ÇKeˆ^¾|Ù=uê”ûØcMvS¼ùZ­æ>÷¹_ôþ¨×=aM$u¿tÕuËuKÛê®g˜ÒFYö…÷èÎðùûñ]uÝê[¾ìèÖ=ÿÑ‘ëŠÏöÎC/¦££î9ë®ë ΨòLŠ{Ÿ@J½ó“BLJ1Cêñq¿P×ëÞ½&½·H÷Ô™k5¯­£ ÃP”øÎw|Ü{v¢¸|ÊŽëÆ¿ÀÄàlCw&Dýößs›¼îºnï{×[e‚v ÉGPvT5¾Ÿ‡å>Iñ¨ÕRÍ‘ùçLC³ÙÛ7N·]uƒU–å‘•XŽ{Ê ½i–m4Õ„eâø8<ÆøR HÕV[¶1|Xl4®:ÆË0ª¤›åèht«‹™)«6~5!Ólz“³QõtÐ IC¸ªºîÑ7ßëÿâÅŽ”èuK¥ÁiÇÇ鯹Z­Û†„sŽ È15)qcÙãvù££Ñ'£Œ3~ ÎçóÈçóØÛÛC§Ó‰Ý§Óé`oo[[[Èçó£»ygÄûoOáû¾¯Ûær9“{ Ÿê}fþn/qœöËðBq%Tµ»näéáóÉ÷.múûc¢Ê*ðÊÏ¡µ <(ìôÂ8ýPC~øq\wÊð×çš—á¯Ï¥0_jc©vçwKy†Î1,ÓEF zäTj‘!ÁR6qa‚ÐË IÖ·ÔˆÑòÕ*¬ç¼â·xáú²<¿¬pËÉý^«yaZÁìvqQh´VBd’"DK¥ää´FF×I»”‡’r†‘¤ù¥¬Ÿ7¶mOoÝ}á”y†~æôð¨Ǫ\–ô‡%¨¡q‰¢BW9œ}ã”´¡ZÌ´jƒ"×ýwjÚJI"fF†1VB1ªTÕa(™öWü&©@µ ˆœ{ïë••`Ô…!]‰¼`ÉÕ¤kÅ3nªËê­™Ž»&ÙãvyQœ^y¾QHU‡õܹsØÙÙÁ™3g°½½2J¯\¹‚ƒƒ¿,ñä“¿R)à®øBÒ ”Jž2©}  ©Þã ¢ê_øÑŸözQW+Ô~þcä„:`Qý¡:P‹" ýY~Eôê¥~Pþº_°+7ò?ôboD*FbœÁ¬Å7ÔjžÒ=h9×<…Z{kl“®I åWe šišaå½+ãv¥÷ÝѽrBÓáHR/%0ŠC'ϹC F}aÛ·¬8c•¦Š®ë(Å r¡àÝø‚~<™n2ŒŒŒq&‘ºU62×Ÿãæƒ»IÎP†~ÀëSô~Ö/ØÝ½o~ó•EßúÔ0 G#Φßg”œ¥ÙìþQ¬°ÊdÓ4QãÅi@óÍÿ%ñ3€ÞS*k à\z/ ¿½ùGî†„á «FudmœM ½#¢ëe—Tk>ŸÇ¥K—pñâEìííõmßÞÞÆÙ³gû3-Çq¼hÊ4€ðáÏ”w¢H"¼ú¬¿ôS}Ç)ß šwÊ ?Ôþ} PpJ†„ꥊ€ð­ñUkð-Þ P´ÄŬ!j–’ÐÂô$&-ZœE^úÒoòÄÛ6üúsÑ2,«4`Íšyõ¡i\g3œ‹Ä4M4¢iD)}â ;6%CÕuÏ c(áJšW Uƒ¢£Ø#Ì»NÛOdk—,÷¤‘wš$ÚÜìEÊPrE +†Ü}÷ÃOº$ ô>QjÙ˜d0æû¾õû>˜ÈJ’çì%Ÿø}ï-«ÆÍ¬”xÔ?#î½Þ+Uó[öo7 nýÄ£IBíƒ_rÙSWÅ1 V±ë§2X/ ð¹sç°»»‹V«åžÏçCÙ‚³Ä‡>ôï½_F&DÀÍ‘)Ûe ¶á…ñ_Ô”¹7ˆ«ðÂw%ô¦PŠù\€W_ÕDÈ#d‘•¬ë¨ÀßtÓw£PŽ~è1]ƒ5kß ÃLŠ®ë$)Q@éÅS‡•Œ…îV«ž37îr”ÑœJ R‚%\Gé¯IYH§ UŠÎ èz|ùÙAFú0/kÔ+MKZŠÅ^)2Vibðþå+Ç•q°š¦Ùo°ÒL ÌÖ´¾/Å>r!}ìÀ£ Š( ðjËÉ¿¸è[bÃ0 ŒaMö²ù{žUA’ þÊizû ]T³ü¤6X‰ lmm-ºÝC±,àŽ;þÀ㹘"edxëLxÆ*½[Ìç’»ç3hxBNø¬ Ϙe˜þâ/¾Á[ûõ’Ÿ‚ªfeý wfõ1 £¿ B¹<³ZÀŽÓ3Bƒ…]F›Bë¬hü ÈÀuœæ]+ïøYyÈøŒÎó–Jž!m7ÙSq ò²&-Ï$Ï.‹.)¡ù‰Uó¢8Ž˲Âá’TšIQNÈ4?~ Â3æ “=,ËË`B©¢pÙz04éÒ²ò+¿ò¿ðµ_û…á;ð Ñ8¢Æeœ^]r— Åç]µ¿Jð#­Ø š™–eáå/.ªÕ^bö°2«Õ–¢Yß’2lLB!¾kµÚ3œˆ ‡qX¾a„“xŒRÞ˜jwKRòqår¯h{ÁA§—HŒB–‰“'Ãõ'ƒ4‰¤( A†nܤ›ªzí£ðìU!6¸Rñ4óU^”ά ±C°¬^S†VØ`ýä'oC>ß¾£‰øÅ¡BÂ瀄‚ϱ‰“/t¸’°mâ efíqo{Ûó¡(žÂ˜¦X3Ã,š¦…gçmÛ³üf8­Þh$Ÿ>ÚÇâBu“ *AwIÆ#eÀŒ»†¢Ä·2ÄRôh’ä)‚ѤOÁcd¹—*hRhnUÕ‘”¹š ð´PfKš«XLÓ Ë7Í6to²\öî9:Qpòä¢[Î0Ãf¬R4Kä r R§³6¬°ÁúWøÎï¼eøŽ4mÄ|Å–»Ÿ H ½r4â£s¤î1ì)efÄ ”é ³Ì˜¦Î\.{?a(0•Â"hí&0øÔ‚Ð+Û,Ë=÷æf¿‘u SI±Þ½zÞݨÁ)IÃísÊrMÉŠ‚ç-{^aJüW‰DQé…àßšæ>W”þç´dÁë•Ëã…öªj¯Ú*@áÀ¡è@jOMó¾KQôä‚äuC£™ÕÄ4Ͱ|pO®+o¼ÜüÖ¯¢ò*TÍM”ÞöüE7ŸÉ#¯a]lxæ3¿„oxá ½?éMo£—)²K‰•¢/Fa£Tìî[Â`Oèñ¢Ÿ ³Jضg>óQø›ÿ_ø8 Ë0MOáå(2fUˆÍ,I!@uteÙóT‘!–¦¦¡ëÞ»&nMUµÚ«™«ª=ã–Bz ²ÃƒùuªÕžQKFu´O“A}µÑ£"OhµÚ«Ó|d”@JÓz÷÷˜É³GÛÁk5t®4XÖxÇEïoQY’gAl2šî—Jß}³é=÷R)l>õ‡}u©f2ëam·ÛPU­V Ë2´hnÿ$øîŸîû%âJÖ0Ì™H¾Ø7^ù-w-ú6&‘Idܶmœ}ö³ïøŽØ‰GÇñ’wpfQL"ß–eág®^EùÖ_¾ôL-ún&Ì$òM‚  ò†«0ÿìÑøè³x™3™õ°ž?¹\–eáÑGE«ÕÂÞÞ^ºƒ ¸åu@ÀIxá¿•ÀÃ,˜‰ä€õüïãÁŸÉ4“ȸišxÃ_þeÈ»ê8žGÕ¶=¶Vã|Ìâ˜D¾¿íúu¼çóÿì[òÆÎd’IäûýÿößâO<òíÿNóÍÞÌú 31™4XÛí6qöìYÀÆÆ¶··ÑV4Žá…ö6Á+QCÕ 3Ì,˜X¾á)íü`²Ê¤2~Ï·}¾éË_öã|uÝËM°‘Ê,žIåû®¿øvè(ù%—&KL*ß?ø¶·áß|ãï?øƒ¨]{++ÌTȤÁzíÚ5@>Ÿ÷?ËçócÅÑð Xž»DÊÎÎÎ΢›0UGò"®*ÓïU![EyXµ>;.“Êø]þ0þ´X¦'OzY€ëu/x™ ÖU“‡Uì³ã0©|›ßüc¨ýÚ­K%ËQVQV­¿ŽË¤òmÛ€ðõÏ_úèU“‡eï³™\Ã:¨St:lllô}~åÊüÆoü~û·wß}÷¢oa*\½zu¥:Ìõë×Ñétü5‹âsŸû>÷¹ÏáÖ[oÅîîîܯ?Ž|ÀýÑá{¿÷{qÇwà…/|áÜÛ=m²"Ó$+}öêÕ«xâ‰'výIÇðç<ç ¸|ù 8yòÝxÍkþ7ßü$~þçv;c“y˜Yé³?þ8®_¿Ž½èEK3†GÇïw¼xÇ;æÞô©‘Y˜&Yé¯4~¿ï}ïÃk_ûÚ¹_Z:xåDdE¦EVú,é࣎ߙ4X;Nâ¶7nÄv–ûî»ÛÛÛÈår‹nþÔhµZ¡®e§ÓéàÆ™øŽ¨-‹ºvIò ¿ò+¿Âòq²ÔgùRšÞþ2oXØ}LJ–äad©Ï.ÛÎãwöÉR]d[X÷È’öØcSkó"Iä{ÛgÁ _Æûa&cŒ/£<ðÎë6~kS ¿³O£É,Á«@§ÓÁÞÞvwwaY}ôQÿ3âüùóÈårþöV«åo´-ÍöY°··‡v»G}–e!—ËáâÅ‹©Û”Å{bÆcRùžÆöY0HÆ—ñ~˜ñ&ãË(<†3ÀzŽßÃÚ”ÅûaƇÇï帧‘X´Å¼ª\¾|Ù=uêTè³ .¸÷ß¿ëºÞLÆ©S§B³#ï|ç;Ý׿þõ· ;vV\¿~½ïš×®]s/\¸ªMY¼'f|&‘ïilŸƒd|ŒA2¾ŒòÀc8C¬Ûø=¬MY¼f2xüÎþ=ÊÓm0¯*[[[°,+ôÙµk×pÛm·ù¿@>Ÿ÷·çóy´ÛíÛ†;+(S\>ŸG«ÕB§ÓA>ŸÇîînª6eñž˜ñ™D¾§±} ’ñÃÃÃ¥»f2ÉøªÉ÷4ÚÌ2¾<¬Ûø=¬MY¼f2xüÎþ= ¬s`ggdžK—.À@!8::JÜÖétKõ²¦Í7úî寨ßß*ÔiÚ¼ˆ{b¦Ã¨òUy$ãËx?ÌôˆÊ¸iš‰ûfUx gâX‡ñ{˜|gQçb¦ßÙ¼§Qá5¬sà¾ûîÃÙ³g=ôO’øÂ¾¸íÆ%¡ž6ÁÙGy<ò¶··±³³3ô~Ò´y÷ÄL‡Qå;«ò0HÆ—ñ~˜é•ñe”Ù8ÖaüvOYÔ¹˜éÁãw6ïiTØ`[[[ØÞÞÆ<à/ºÝãöO"—Ë <6—ËÍäNŸ> ~øà N‡‡‡CÛ4év&»Œ*ßY•‡A2~ÓM7-Ýý0Ó#*ã«&ß<†¯/ë0~“ï,ê\Ìôàñ;›÷4*l°Îˆ½½=惠õ!pâÄ áЛv»\.7pÛ°cgÅ p€‰Û¼ˆ{bÆgùžÆöY0HÆïºë®¥»f2ÉøªÉ7áëźßÃä;‹÷ÃLßÙ¿§QaƒuFœ>}‡‡‡¾LÓô¿ü\.‡­­-„¶ …Û†;+òù|_ í½½=ffÒ6/➘ñ™D¾§±} ’ñW¿úÕKw?Ìd ’ñU“oÃ׋u¿‡Éw ¿³O£r“ëºî¢±ªœ?ØÚÚµk×B ¤/&}gg¹\Î_Ô¼¿¿Û†;+èš4K5ÊýLc;“-&‘ïilŸƒd|ŒA2¾ŒòÀc8C¬Ûø=¬MY¼f2xüÎþ=¬3&˜2=nD§Óñg€¢ÛmK³}LÚ¦,Þ3>“È÷4¶Ï‚YöI–ïåcŒ/£<ðÎë6~ÛžÅûa&ƒÇïìßSZØ`e†a†a†a2 ¯ae†a†a†a2 ¬ Ã0 Ã0 Ã0L&aƒ•a†a†a†É$l°2 Ã0 Ã0 Ã0™„ V†a†a†a&“°ÁÊ0 Ã0 Ã0 Ãd6X†a†a†a˜LÂ+Ã0 Ã0 Ã0 “IØ`e†a†a†a2 ¬ Ã0 Ã0 Ã0L&aƒ•a†a†a†É$l°2 Ã0 Ã0 Ã0™„ V†a†a†a&“°ÁÊ0 Ã0 Ã0 Ãd6X†a†a†a˜LÂ+Ã0 Ã0 Ã0 “IØ`efa˜¦ ÇqÝ †™;Ó”}Çq`šæ¢o‰a†af¬ Ã,ŒB¡Ë²Ý †™;Ó”}˲P(}KÌ£ë:lÛ^t3f.°¼Ï6X†a†a˜±1 ƒxfm`yŸ?O[tÖÛ¶¡ëzßç¥R ¢(èÍ䈢Y–aš&J¥/¬ŒBÁE$Iþ9*• TU…®ëp’$AQÿ|‚ @UÕ±÷g˜a¤‘oÐ4 ŽãÄÊØ´dœå›™'óýjµ MÓpÓM7…Žsš¦õÃ0ã2HžMÓ„mÛ0  ËrH>%Iò?c„Y&•wI’|¹¥R ‚ `¹ö°.Çq`Y–ÿcš¦¯Àž`kšÀ›Ñ)‹~'Ñ4 ÅbÑ?Ïææf¨ƒÑvÇqà8ŠÅb( MÓ4T*•±÷g˜a “o(—ËþßI2IçšDÆY¾™y2Ù/—Ë0 ŸùÌgB×- ¾"Ä0Ó < Ê'y¢X'a–…IäýŸøNž<éO6†Z²Ár=&.“Ž]I’\UU]×uÝ££#€{ttäï#Š¢+˲¿­ÙlúÛjµš+‚ÿ7·V«ùK’ä–J%ÿoUU]Y–ÇÞŸaF!*ß®ëÉ\ðïZ­æËØ´eœå›Y³’}:¾Ñh¸üëåša¦Mœ<˲ì6 ÿï¨|Óg¬“0ËÆ¨òNãqðxÒÛi_–ëÑak†(—ËEÕj€7+#Ër(|LQ^¨˜(Š~vÈàßÁDÁcAðC’u†IKT¾ Y–ý߃ò7 gùfÁ¬d?x<ÐKäDï †™Iò%*Ÿë$Ìò1ª¼“Ì–Ëe˜¦ AÐh4B²Ìr=:¼†5#T*ضF£á6¨ämÛþ:¥ q/†Y4qò= –qf˜§ìÓZÁr¹Œ£££Eß:³‚Œ#Ï ³¬Œ#ï¢(¢ÑhøËøÇA©TB­V[ôí,5l°f]סë:Fh–E„D£U’$Ö† Ùw^·Äd‰$ùË8³ìÌ[ökµÇa~†™ãÊ3Ã,#ãÊ;%NªÕj¨Õj~Ù1I’ü„©ÌèpHð‚±, årõz½OQÅ_ì x %Ýe9ô7Ð[ôÍ0Ya|ƒeœYf%û‚ øÙ*¹ì3-ÒÈó ¨0†Y&&‘÷ gð<®<Á39l°.Êø[(pÓM7ù?•J¢(¢V«¡P( P(`ssÓ #¥¤R©`ssÓÏ Y¯×}K ã3H¾‡Á2Î,3‹”}EQ ( Oî0Sc˜< ‚€J¥[ „a–IäJØœô!üÖoý^ñŠW,èQNŸU+ßñÅ/~_üâ3ñ}ñ‹_ÄW¿úUüìÏþìTÏ;+ù€ù‘Á7|Ã7,úÑM,ÉôÈRŸµ, /^œúyy OO–äad©Ï~ñ‹_Äm·Ý†ÿñŸÚ9yüNO–daZd©¿Z–åŽÓ„ÇïôdI¦A–úì8ã÷Ü Ö|>ýý}llløŸ]»v €×1è÷`L}>Ÿ÷C’øë¿þk<ç9ÏÁéÓ§ç}K3ãòåË+u?W¯^ÅSO=•‰{ºzõ*þäOþdêç•|À72ñì¦E–äaZd©Ï^¾|y&çå1<=Y’‡i¥>{õêÕTãæ(ðøž,É´ÈR½|ù2n»í¶©Ÿ—ÇïôdI¦A–úì8ã÷Ü Ö lmmù·Ûmìíía{{¹\‡‡‡‰Çv:P' rë­·â¥/}ièÜËÎüÀ¬Ôýœ8qwß}wfî©ÓéLýœ³’oxö³Ÿ™g7 ²&Ó K}ööÛoŸÉyy OO–äad­ÏN{ çñ;=Y“…i¥þzûí·”§qáñ;=Y’‡iµ>;êø½°:¬N{{{¸÷Þ{±µµ…sçÎ ½7n$nûÔ§>…|àØÙÙYÔ-MíííE7aªär¹Lt”ƒƒ\¸pÁ/1 ¦-ßðøãcgg‹ypS&+ò0M²ÒgwvvpõêÕ™^ƒÇðádE¦EVúìÞÞ.\¸€O~ò“39?ßÃÉŠ,L“¬ôW¿)éÑ,àñ{8Y‘‡i‘•>K:ø¨ã÷²Ÿ?¸téR_èAƒbù_ö²—áž{„ah&ñÊ•+39ÿ,äî¼óNìïï/æ¡1KÅþþþLÙE²»»‹Ó§OÏd çñ›Y44~’·Iàñ›Y$ãêà ñ°ªªêÇÑG;lj' ÛÜn·¹³4°|3«Ë8³ª°l3«Ë8³ŒÌÝÃjš&Úí6dYî‹•ßÚÚò]ÖÁŒd¦i¢P(,úY1ÌPX¾™U‡eœYUX¶™U‡eœYVæn°RL~\¸­)ÜÝÝÅÎÎŽŸ~{cc÷Ýwß‚à ‡å›YuXÆ™U…e›YuXÆ™eeîëîîîÐ÷|>÷¼ç=~ÇÊÂ"a†IË7³ê°Œ3« Ë6³ê°Œ3ËÊB’.¥!šz›aV –ofÕagV–mfÕag²ÆÂÊÚ0 Ã0 Ã0 Ã0Ì Ø`e†a†a†a2 ¬ Ã0 Ã0 Ã0L&aƒ•a†a†a†É$l°2 Ã0 Ã0 Ã0™„ V†a†a†a&“°ÁÊ0 Ã0 Ã0 Ãd6X†a†a†a˜LÂ+Ã0 Ã0 Ã0 “IØ`e†a†a†a2 ¬Dz¬E7a†a†af!°Áša,ËB¡PXt3†a†a†aÂÓÆ9hoo‡‡‡hµZ8{ö,r¹Úí6vww}?+…ap¶mCÅE7‡a†a†af®Œl°îííáààÛÛÛèt:€ ìíí¡ÕjaÑ÷´2†UUaYV¦ V˲P©T $I‚$Iþ6Û¶ašfèlÛÆÑÑÑ\Úæ8t]‡¢(þõÇÁ¹sçð‚¼€'Y˜•åóŸÿü¢›À0 Ã0 31#…·Ûm\¼xÕj»»»8qâ@–eìïïû^Wfr,Ëò¿qÖ±–Ë剮ï8*• …Nž<‰“'O¢P( X,†Úã8Êå2TU…ªªÓ4aš&4MƒmÛPUGGGh4h4Eq.ks ÃÀææ&ÇA¡P@¹\†a( ¸ûî»ñßù3o³ض Û¶CŸiš†;î¸Ãï•J%kÎMÓD¡PÀc=¶è¦0 Ã0 ÃLÌHÖk×®¶¶¶ú¶åóyäóyßëÊLy%I‚®ë#kš&*• ªÕêX×·, Žã V«…¼»¦i¢X,BUU”J%T*Ȳ Y– ä]MB’$8Ž3V› ÀeY‰^Z2–MÓ„ ¾\­V¡ë: Ã@½^Çç?ÿy\¹re¢ïˆÉ.Žã@Ó4H’Y–!BìvÛ¶}Y¤ "EQBû QQ!I À,Ë8::‚ °, –eùç¬V«~Ÿ§Ýº®C’$ÔëõPÛMÓ íK².Š"LÓ„$Iªªâàà`Ñ_Ã0 Ã0ÌÄŒd°nll:Žÿ;ÑétØ»:ELÓD­V€>ÏÎ04MC£Ñ@±X„išg]סiZŸñgÛ6Aè E–eÍf…B!dÔŽJZƒÕq†2>J¥ÊårßÚ^MÓ`YdYF½^ïk{©TB©TÀá’«DÜo Q'#ðd—"LÓD©Tò'…Ï#O=‚ @?B€dœ&MjµZhr†ŒÝR©˲P,ûúÔææ¦6¼.aš&Êå2EÁÑÑ‘!P¯×ýûˆß²,'ö?6X™UæÝï~7n¹å–E7ƒaf;&ÌH+yQwvvpîÜ9ÿóv» . —ËÅz_™Ñ ï !ŠbêÄKº®C–eˆ¢ˆZ­†b±ˆf³Ùça"Åœö¥d¶íDo)y.u]÷ ÀQ ëE=Yä"Ú,I’ï)%$IòŽà=E=ÂÌêS(P*•ütZ74ƒÞHI’Ðl6ûΣ(JH&㈮ÑNÚG–åPÿ ©ªª~”BÔ 5 ÕjÕoC©T‚$I(‹e¹¯0Ì<±,+vsžÐ{Â4MÜyçøöoÿöE?fÉ ˆ”yÉq¥RA©TJ¼ébFÃ×yt]Ç>ðE?*f±m›››~tج¡eƒÕjuâ>7rÒ¥ýý}T*œ9spxxˆ‹/"ŸÏûž f2¢^Ñ8ã, ò®Òq¶õÂèºî{‰*•J¬Çs0SØá8„¼µªª¦ +–e†a„>ãlÊËG4¤|T(™MòH’MÓúdS„Té´PUÕ7¤Iɦ‰£R©Û¶aF¨=Á¨ B’¤¹%(c˜$(j€ÆnI’úŠ> °t ·ö›²ÝÓ¸Oç‘eÙ6 ÈÚ—¢e臖t(Š‚F£O~ò“¼¬ƒ€¾±4n;-'’eŽãø¿ÇéF´¬cRHf-Ëòõ²(d@ 4 ض MÓðú׿~Ñ•Y1ÇñÇqÃ0Fv8iš†R©4’¡KùtÊårbHËÈëÆÆö÷÷ÑjµBY‚óùüÄ“ñ8˲Bƒ¨,Ë¡µkI¡T"HqŽâ8ŽoRèdÐh°,klƒ4 Qƒ•BÓ«ôL‚‰¥†…>3ÙÃ4M†á+®ãžƒ2@—ËeÔjµ¾þ³DQô•lÛ¶¡(Jh§I(R²x²…É*d¬ÖëõPø|¡P@­Vƒ,˨T*þ¤*M>ðÇtAbû8´äƒ<_t ÁårÙ_Ž2¯“hb:É %%üøø86’LÓ4‚=ª_‹Å¾H1ÚRhG³Ùô>ÆÝ-;±, ›››€z½Žw¼ã‹}ðÌB¡±Eq¬ˆÆ ´äH–eßøL Õ”&¾BNÌz½Žr¹ì;ÇAµZÅ3ŸùÌÑ*u¸#pùòe÷Ô©Sîõë×G9lè9§už .L­]‹ Ùlº¢(ºÇÇÇ¡Ï]Y–ûö¯Õj®(Š®(Š®,Ë®(Šn£ÑèÛOžÏAðo4®ªª¡í’$Íô^£÷£ªª[¯×G>ÇÑÑ‘|­VK}ì¼äeZòíº®{ÿý÷ϼ½óDÅD™WUÕUUÕ•eÙÿ‘$)VNƒ2 BlXGGG®(Š®$I}÷G÷OT«U·Z­Nõúó’ÃWêŸÍf3v›$I®$In©TŠ•ñY2yáñ{±Ôj5_ÆâdöÐ7î×ëuW’¤ï£ƒç*•JõŸZ­æÊ²ãñ{}9>>vEQtëõºÛh4ÜF£á–J%WÅTznT憫(Š[*•üÏ’äqP{ŽŽŽÜz½î·­V«¹Š¢„ô4EQÜjµê÷?ºõ zo¼ùÍoY^F^ú±±Ó4±½½=‘¥xk_wvvúJAìííáâÅ‹¡Ï¶¶¶V®Æ+…¥Ðï•J¥/+(?ù A3Þi×µÅmÎ4FÏOí™õ½GÿuV3*mšæÄáÓfÝå{•JÅT%žB^ÉR©ä‡ù’lP™¢ Ì’|W«ÕDOÎ"EÑ¿Ç8Ù¦0Hú– ³‚e|18Žã'äJ•2êùi­hÒ5(·À |Ë Ëö|¡ðYò´S.’±Aù8(7@\”Ø ÷A0¦T*ùKªHö£ç+—Ë~Ž‚¸¥Vä£céxòŽiš60ÔxÞQ6,ãÙ¤\.ûI! Y–aÛ¶_µ )ŠŒ’7R®Ò‰(a)Õ»Ò´‡"`DQD¥RñórDóß'–t2À³3ªÕ* …ªÕ*^ùÊW޼¤cä,Á»»»ØÛÛC»ÝŽÝ'{·Ýn£ÕjõuâÚµkØÞÞ ѬÄË À’$ùŠ÷ å#X5ž5 ­¢ýiÍDðÜQrÞç8J…JÃϲË÷`(˜•J%‹Eà$¥!Nƒ/ý¸«¬}ƒ”šp¡1`™”~–ñÅB™¬IÁÃq8Ž3t,7MÓ_ŠôÖ—ÒZÑa¹ –InÓÀ²=?hb„Æòjµê'È£äŽ$cqù8hÒ¿Z­öÕž'Cv¤D;Žã/©¢±9øSâúìäÉ“!½Š*(Š2©ª*Nž< UUý>Eãÿ¼aÏ´ä/XÆ2NvEQD½^G±XŒÍÕA碲“ƒPehXp0¯@ðZ4Y‡(ЉýN–eðòÊÈkXÏŸ?‰‚žÆ`=<<¸&óÆØÚÚZÙŒÃä! f-’¦øñQ9Qû Ò¨œa‹´³ ®.æ¨'ÍrÒŒgVXwùNB×uèºîºô}“¬ÑŒztvR©ä{YIQYVEñgì—Még_´î»Ñh`ss3±P™¯A‘'”ß ê‰g<^X¶gOÔPzNãÞç”#8QÌaŒe›Ê‘û¬óýŽã¼`Å娯ޯàÔ`ñFiòrqÕÈÄô®–LÖ —•HŸÛærØt™sIÎîHÀÅ}·¹]wYz`Î%Û"M2à½KºÀyaƒ5tq³—uä¼tåÅ-;—G;¶CV÷ÒSõ«RU©$•T¥Òó9§ÇUR½éù=õ{~Ï[µêju#F´ƒ­Ø…k¨àX””$:.]3yug ËxòSHŽ¢––– ÖQŠXú…W*H’äÛú/i¾aœ/íïïcccÃö lllÄÞ¤ž<¸Ýnw*ÇO R°£NP©wä8m9ü&õÞm¤üé§=aÏ?Éj>埤É`BVåÛêÖ#—ŒPZ-£\.mc0/üfá^¼,’ŒÏÓ4íüqbímïÀ«V«íæšÍfêBèç–í`¨ÏôÍ7ߌµµ5¬­­¡R©ØQ1+++¨V«®ü¶¨P.(…ÿŠ‹1¢WtÔ(1?O•èµ zßìììØÑQîÅÛŽO )N,ãÓƒÚù3òø{=åÞz4aÇŸg¨~HZ<ª^Fö°R2öêê*Ö××X}á´Ûmœ?~¢ êt:X__Ç}÷ÝgÇÌçóylmm…zú⿈}èCh·Û©N ·…Ç$¹B´âHèºî«œEf¡Di`M¢´)x”A¿¿¿÷¿ÿý¸þúë§~^Æ•oxì±Ç°±±EQb)z6 ¨i´_11j;ufüÎô‰{hcc>úh"÷”už4É'Î¥R kkk®qC¡õ4éð+ C“œyC[[[xøá‡qóÍ7Ïô¼‹¦¿G¼ÿ”ª!†56 WË–IæL­Žqòî7ÇokÐÂÿ°ðMI’\ž` »ôBú»Ýnϼe$ëïéB…¹âð`z£ FK“Ó‘"ÖÈs;mh>²þ¥¤ðññqïôéÓ¾e©åÍññqäãÑw†qõêÕÞéÓ§CËoÏKIíaåЧ…X:ݯd{½^·[†”J¥Àòq±··gŸOü÷8PøQ˜UK„¸ä»×›Ï¶Š¢ŒÔnhG~£0 yYDžårÙÕ’@DlíÔëYú[lvpp0ÐFLQ”Ô´—iË ëoÊårO’$—ŒQ«‹0]ÿÔSOŦ뎎Ž®Alÿç×ÊoT¨ÝßÎÎN¬-Çè˜GGG¾í Ößó‡ßÜ™ÆKµZí mµ4 ‡‡‡½R©dÿÿ¤séY2޼Œ||| À?!~yy…BÁþ̸´Z­Ðƒk×®ÈF•² µ¤WeLÓœº‡U|“†ÅÌ“— ëòMPøbÍ®³Î<ÉoEƧ µÐXYY±=SAaZÖE.Ó4Ú!N9E¶Ì[EXtÙVU¦iâð𪪢R©`mmÍö…ézò.Æ,Ë888pÉ9µ:‹«¦yX㮑Q*• ëzjÃ]ÆÇ…*·¯­­ÙF†aàèè²,Ûy«qÉ“7×:­2cå°v»]ßmA­nF!—ËÙáÅÄîî. …ÂÌC#â&Ia"ed0SëºÎY\xž4ñÓ «ò½¶¶†¥¥%{R¼²²‚R©”Ú\fzdUÆgå*Š‚½½=†Ž#ʃ&ÃÖ/¤‹Bòiœf1gz,²l뺎F£a§cØ}´‡¥|L¿¹BŠcžE÷÷¼ á´‹,ããÒl6íÞ¨Ôv¦V«Ù]=(Gúèè(öÎb¿|Yb¤ÖååeäóyÔj5œ?ù|€óþÀØŸ™„B¡€õõuœ;wËË˶Ç6¨pDÚ ƒÐOy'YÕTì÷ÔÞïßÓ¼ñ¹,ŠnÞå|†]×qxxhç*Õëõ…ùM7Yñ¨„éûq 6£äùQR¹\üŽ˜#žõIÍ4Y$Ù&ÈkÙh4 Ó´éxY–¡ªjl ¥Åbq*†8KKãÂÑ"Êø$†aWn¬ßvooÏî'?í…Z)•Js×Ï}TF.ºDÕÏž=ëÚžÏçGN´^^^öõæmnnbuuÕ(óTN{mm ¥RÉ·x̸—â‚’ªÃöRalRè\QûFÍY”oUUí~u$Kªª¢\.C’$Ôëõ¹îÊŒFe|†-÷Ã0MÓÕ†ƒÐuªªÚ¡‡ãLrF™ gQ×NƒE—mê/˲= Oûd¸X,ºÚÓLJµZªÁšôó\tƒZ­f·E"h¼Ìo«¤,3²ÁZ(pùòe´Z-\¹rÀtz4åóyÛƒ;/†a÷Uô3Xu]O¬UÐ ëûE^ØY)QàΰHÌ£|ŽqÚh4ìIzÔtÌb1¯2> Ôx=ŠÁº²²À #“ªª–ËeŸŸ#².Ûd¬Îƒ‘*¢(J¬ãhZ‹;”wžf².ãqašf¢‘ÅbªªÎ}_ú(Œ•ÃÚétËå°¹¹‰ÍÍM{Û¢#6àõóT&Yp‰rXÃŒCI’|{úM›¬²y¥R©¸V_©ßm½^G³Ù„aö*ñ¢ä 3 AooE?¨%Íáá!ŠÅ"–––°´´d÷×N[h%“]†Õ¨˜Wc•˜‡bb”̤ï1 ÃN×öÙYC9¬l°ú°¿¿³gÏbkkËÞvñâEœ={6ó…MÓÄ©S§” ÖR©4 ØI'Ø‹E˜¦–L}Áféa­Õj™dó…+Þ©ªjGT«U4 ÛãÊ0‹†¦ivîh˜ÁJ}÷ısppÀ†*3sTUÅÊÊ VVV\‹ê”£º²²2ׯ*ÃL ¤EÅ¥¥%¬­­Á4M¨ª:ðù4ŒY–í÷Q–É`ív»ØÚÚÂúúº+_u{{›››¸páB¦=­ÍfŠ¢ R© x*iuC’$_ƒ5 Â$¶4ðƒ„~VÞ2:{çÒ‡ªªvó÷µµ54 (ŠbÿVårÙöØ'-× “Ô°rˆü¢WLÓD¥RÁÎÎŽKÏÑ»‚afM¸Q­VíŠÑKKKh6›$ ;;;8::bÎ,~Q‡”îtxxˆÃÃC»R{½^÷÷§Y–S‘=mFÊam·Ûèv»v°Èúú:ö÷÷q||œÙ¸wUU±··MÓ°²²â*ŽAÆ,à„¸ŠÕ ÃH<)z˜'“öÏRèÉ0bÒ-l¬ûûû¶qÚn·‘Ëå°½½m±Yƒš¹‹(Šb·‹ñ¾$IB¹\†išØÙÙI…QV*•††pÎ2LˆžIž cá kdfjE#B=ˆ©¸]Bņ è-„–a¢B)~€eì­­­EZ`£%ÃjÂ0ñ3Ô`ít:èv»¶qJ}X5MC»Ý¬®®BQ”̫Ԏ&Ȱ ZaIÛêzÚ&P\x$]˜¦ MÓ8·‰aB0M34•"n&i¨è·Pa]׿ô^‡”ä "Òäa]"yXwwwÑét°¼¼ EQP(lïâ" ªj⓲ ¿HÓC­VKÝ" ä ]×íâ5 “vÖÖÖP­V9Љa¼)~Q‘$É•·š–ÖEa¨ÁšÏçÑh4X¡%ûûûh·Û( ¶›ušÍ&{¦{"Òç®2L0¦ibmm ì^Û “f†šÄ0ŒC³Ùèzï|•篳e¤VÑ«ªiZ­vww3m¼ªªêê?É0YdXu<†YdÈ«Êc„™4M‹æÈ0‹ÍéÇ…òVÙ»:{F.ºDˆÆk»Ý†¦iØÝÝÅùóç3U)XLÌf˜,ÂÞUf–‹JPKqVä&)¨öÃ0¦iN<§—eÙ~w°‡u¶Œm°Š  lnn&}?±¢išÝd›a² {W™E¤ÑhÀ0Œ¡“Ì<ÂÆ*ø¡H™IæôÅbº®³‡5F2X[­–——]ÿ¯ir¹Μ9ãÚ—x¢ÂdR¼ì]e MÓì•ò É=G0óH˜L3Ì"bšflõh Ãà Á ð Q>Ôn·qöìYlllØÛ4MÃÆÆZ­°±±ýýý¤ï'6(N’É2µZ õz=éË`˜™¢ª*ŠÅ"J¥šÍfàçx|0óˆ®ë®È0F¥Riâ…EQ`÷`M€HÖjµŠB¡`W `[ÚÞÞF.—Ãîî.¶¶¶°ººšô=ÅB­VãJL¦¡~b¼(Ã,ÍfÓöšV*ßJªäå÷3o˜¦™ô%0Ljˆ³$IìaMˆ¡ÖV«…N§ã*¦ÔívÑn·±ººŠ\.X]]E·ÛE«ÕJúž&† s.ÏdygÊ=¢ú¦iú®–³w•™WØÃÊ0ªªÆ6×)‹ï fº 5XÛí6–——mÃð÷̈ûç]×]«ï “EšÍ&$IâBfáPUÕµéÜh4P,yÒÏÌ-œÃÊ0Nîjœ(Ã0¸k 5Xs¹ºÝ®k[»ÝF¡PÈŒ‘J˜¦‰J¥‚VöLf1MµZe˜…Ã4Mhšæ óõ¬Ôú€½«Ì¼¢ë:O¨NîjœÈ²Ì6B Ía- h·Û¶‘Úív¡iÚ@®*…Ÿ˜¹† Vfщ³2°¬É0ÔÃZ(°¼¼Œ lmmaccÝn×%$¶V«ayyù|~¤ Ëym·Ûèt:S}µZ š¦¡Z­âèèˆóV™XIZ¾½4›M.$ÃÄJÚd< ¯w•¨V«v#ør¹Ì¹Ý €ù’mίc¢2¯2î‡aX[[C­VC³ÙD³ÙŒ¥2°I’Ø`M€HU‚ëõ:¶¶¶ppp€\.ç*ÀtñâEìîîbyyyäªN§ƒ¼NÕjív€•++V(Ž UU¡ë:¦òp™Å&IùÖu†aØŠ•ú­jšÆòÎÄFÒ:|È{êçyâ|UÆË<ɶö®2Q˜g1MFÍfÓ¶C4M›Ú|‡4“!’ÁJFª«««X]]ɳÚétÐn·±»»ë»ÿÂ… Èçó¸téºÝ.Î;‡­­-lnnÆv㺮£ÑhÄ*À0iïF£aOZ¨bd©Tâ¼U&Ò ã£B«í Æ<ʶˆ¦il°2¡Ì»ŒNt$y:‹Å"íÿŸ¦®gïj2D2X+làÊ•+€3gÎ`yyF¦cQ¥a/N­V —.]`Ë«««Øßßu°pq%fZ¤A¾u]ÇÞÞ^Ò‚É(iñQQU•Ç3”y”m/<¯a˜w½§,ë‹ÃÐVvîêîî.vwwí|ÖqY]]Åöö¶¯ð€rLÿŽ3ŽÞ4MnéÁL¤åà0fº¤AÆGrúx\0Ø7Ùö"zÆy–qÃ0P©T°··Çr¾`D2Xwww±ººjçÁ­®®bwww ÝM„ аó=ûì³xüñÇCÈ nª½Øt:<úè£øò—¿œÈ¹ƒ6žžyæ´Z­¡/MÓX¾œV«…«W¯&rîYèð èå…óE»ÝÆ£>Šk×®Íô¼³ÐߣBÞ&Öÿó éïi̱‡‘¤þUª×ë¼ø8ÇÐ|Tý=Ô`%á[__··Ñª %dÇIØ€»¹gŸ}O<ñ„¶†®ë¼2³Hxæ¯ÇÇlj¬ãÊ7`Mx®\¹b¯€Þ.Ë÷ÂsåÊ•D&;ÀltxÍf•J¦ilgƒ5;$e°ÎBB£Ñ@­VC¥R±·±þŸHÏZ¾dõwÍfKKKX[[CµZe]>çÐ|TùŽœÃ*æªær¹©Ýˆ†v ^n¹å‹ÅH1ö¦ir8ð"Qp ÿ§üëi(ÖaŒ+ßpÛm·E’oÃ0X¡/8›››SYPŒÂ,txº®CQ;d °ê‹E^‘ÏTèqÖ:|ú; š¦AUUÀÁÁ*•Š+rŒ=¬ó éïqjÄLJ’ú;ˆZ­†ƒƒÖáaÜ9x¤àYròäIî°„N§ëÀÕu‘Ð1àeMŠYÉ7/È0I1 Â0 ÔëuH’„Z­†¥¥%H’ÄÕ±™XHR¶`mm §NB³Ù„¢(vŸ¢(¶Ë=X™IHZƽP+2ž³3©3Xóù<–——±¿¿ooÓ4 +++±#¨“ALXžÕ”DHÍB¾&I’’qÓ4m½^¯×¡iÊåòÈýÁ&ˆ$õ7S:::ÂÎÎÊå²½¯\.Ûy¬<·a&!msÃ08b€0BHðÆÆÆÀ¶­­-\¼xѵm{{{â‹ÚÜÜÄÆÆZ­ºÝ.r¹î»ï¾Ønz!ò;Tå‰2H @ @ÜúCÐ÷ÜÒPÂõNÀ4å› .1i`Ú:Ü1$R’$î±ÍL…$d€íU ¢T*¡V«-Æü†™*IɸÜW˜!†¬'Ožt\"ÂâÜ£²¼¼ì[ѱP(àƒü ƒEñÎq°0áÀMLÇ`U0o¥‰é„푘±ÊòÍd$d<¿ë`˜qI“l›¦ ]×CCÛK¥VVV\žW† #M2„9Ã,6C Ö|>ŸH³à\.7•A²0áÀÓª¹3­ÔHsòC¢õÿR8Mùæ‚KL˜–ŒÁÅô˜Y1kÙæ]À…ŘX™µŒ1l¡†Y"‡­VËUÙéÌ™3©ê¨,ŒJÃt<¬+vÄýK°ªùÆM­ÿ_2ˆ À4qçããS”VîÁ0I²þ‚ökÀÞ]×Ù»ÄdUUíª×aT«ÕŘß0 3¿ŒY.rÑ%MÓpöìYlll`ww×þÛØØÀÙ³gk¡0* ^`ÂÊÝÇkÙ¯4ˆ+?6]ÖÖs„“›&ðs¿¼â³Ã¯m”ãÒ³(Á2²uÝšä+ ºgÎ๗¾tŒÄ03 V ‹š6¶ÒÏ* =Ã,Tõ7Šl—J%®aÀd†…q0-šfÍoÆ ’ÁÚjµP­VqòäI4 èºnÿ5 œó¾{êŸÔk–¾²â„IúgŽÁšQ Ãà ‘ÃgÑcŠEÿ…ša‘ ’<^k/NØp%I&«èºÎu ˜… Ùl¢)¼ŸÂÁ´h˜&0æo:Ô`mµZèt:¸ÿþûC?·¾¾ŽN§“êÐà…î£wÇø’$²-ÔÐ4­?ï¤Úo"]­oøðà—‚wàÕÿÜñÞÜn­Êx¯Mðo;Öu˲uM¢1@­Ã¾úUëº2lбr€®‡‚q£(Á‘a×qx<›Í±~VáÅš £i¾‘?/ºv-é+› ,ÛÌ"@mÉE±Vö°f ]Ÿh>Ô`m·ÛÈçóÈçó¡Ÿ£67Ýî8–Òlûô¥‚€1¢‡ß(°öF .-ogÐDZƒÕÒF‡c˜z'ÕT,ÆK;üý§ÃÏÿìfÀ$Ý0 ~âŒ5‘ùäs@çcÖu–Õö Ÿ{éK-Ï/ÃŒeÑlN–3ê ï•eÿh‘bÑ,öó°‡ïþ?ù“xž×À‹5F×­ñâá¶|ÅachÎIÝœ…a¦UÂVZ\s]‚ŒašE† 5Xs¹®ed%3uáÀ¦ÒÎÅü ½jÌcûyX ÃÙF}M)‡NœP†ïÄpâfà…Gkš–!\pϧ€W=xiMS´Þ÷à箺dEéh‚“¿*Á:O†áÉMDY]Æë…rqü¼¬A¡Â"å²ÿxóWOòÿ®¦á‹·ÞÿóK)¼Ÿa Ã/žñ÷òGÅcßömI_Ý”o'ìÌbÐl6Q*•P*• iË~µhª‡¡k¡P@·ÛE§Ó ý\ÊårI?’@Ò¶ ¯~P/Û7ñIè×€â[1^ßÑbÑ áu]¨ÚO”†c°–ËÖßÁpâ¼:À§¾æÞV«õ‹4Á2(_Ý ð •ËØùÜÛy«ªÀW_b…‹‰®TêWQ¶-€Þ3M“CdžQ*9ÞøQ¿7Š¡KùÔÀ 7µR±ŒØaÞ!EŒVðŽ± ~]_(ƒ•'7sŒ7¯Ü›c®ëÖXð/{ù£âïî¼3髟*¼É,$ç4)•Jh4,ûY# Z&*‘ ÖB¡€jµîÛív±µµ…ååe;48¤mÞ¼0¾ÚÿŸJeøvÆu$AÅ ÑIUeX†e–Ðy'Ó²ìªHmv^øWn£\L¾.ø¸ý—ƒ/MNiÖF–ªÈ…RþÅç›ÿ7ßôß¡}>¶G›Z̘<2M½>^þgPxnbØ®èM«ÿRεˆi†ëYvWØ ~ôäÉé?Ï”&½ÎŒˆ7Ä=¨å¸8cxæî»“¾ò©Ã«nç•I(˜(—ËPU•à³È4 V8þ<:Î;‡ýý}ÛÛÚét°¿¿sçΡÓé`ss3éGJÚVá‹{Vø®MLÅ`ÌZ_¯ÿÒˆ_TÕÁ°Yqrý+O¹¡¢¿1 Ãª.|÷‹Ý/{ò˾ëÀƒþ­fºå¬­ïý}à–ëú×éùB½|êï`>üÌ«ßÜ̇§I¶3‰Ÿy…·Q)zH‹ÅAã”=Ir1?ƒµÏÞpCÒOl&hšÆ›yÁ+Ûôð¦t—UÓÜã…d]ÓÂ`MÛœ%q4môÔ &õP80!Ë2EáÅš¬Q.O7‡°¼¬ÛÛÛ8yò$.\¸€³gÏ¢X,âìÙ³¸páNž<‰íííT{W¤nR£|0(ÒZÓb ¬¨^ùx„/P;o]"Ó´Â!‹EàÂÖ¶F3þŠ°Â‰?ü}ްŠù®”kòó˜¦5§±ç;;;Ào²¾'c°ÀT©ÜrÒï¾Õ*`~Oÿ2Jj'íc6ˆN]·EDü Ì ¼^# ³÷¬^cSÓF[ :FŠR¦ {¡æðډcÈOVÅœï ƒU×qõõ¯Oúަü¸ØX`–Þ™™à &R•—yüº~Ä_dæD2XÇh½|ù2Ö××Ñh4pùòå¹0V5MKݤf倛ûÿ£(ñ„ºh@I³Œòoð=²u¸½•²lM¶Ëeà=XÛrO_ùŠpÎ!“x ƒ9¤THpçšúÀùÑ¿…, ã©X´zÍÖœŸz`»XôG‘ù<Ö©NnÆ WoL£ ö”!cÕ›Ë5,Ø/—›Âx½¡Â~}ŒÅïÖjÃC„½zcÁz´zWç™)Gº õÁ=db1BÌïÇÂyX ÃHÝœ%ŒSƒ€I-Þp`&!tÝZPLñ¢Pdƒ•ÈçóP›››PÅnwÓívíÂKi$µ½Ì¾µÿ߃ƒx±',ûÓåvuøÄOú„%¸"² ¼û´õïîoßø°³o˜M…e”®xÎG¿‡hg}÷×Üžæ€õ ô+_žƒS«¿S«Â|>ÑDú4•ŒiÊChÒ™¨ªŽlÝ®[ùä;;ƒF_TƒUUr¹M·÷T–ÝEËèüÞ¢J”·´ å5ZÈÃJ•ßÙ5E*•ÁBHã ªÖ¸ÒuW.êÀ8£÷Õ8 ý$çaÕè3„¦i,×^‚tà8ï'&hšÆk ÅÃGÅl°Ñn·±±±‘ôý’¶ ÁЀƒD?êjª†àÜ"P9²æµµ°cø4c¿é&Ëø»ãŸÖ6Â9½•¾ŠEË#”[Ò„îKó}±“8gÿ‚k×€›nt&Õú×å÷ØLk?_6ŒþudÓ4§'ßãzWÈ8LK1¨fÓ™ {å»V³ZÄ4–gÕïYúó> ­/ûÄŸ‹áAÏË;A¥¼ÙF#øwðÒ¥ÒDa7óD³ÙD¹œáXÿ¤¡ÉËÎÎdÇ¡…IŽŽ,¹n6ƒÇå±zÛbÑ ÐS›=¬>éµ8‹1‰Þ〺!ŒÈÃßß5¤*É 3)¡T\DO±¬i'm‚XžƒHóAÃIDATE îØëþA“ EÀz§—Ë€ù¹s{Ûjè:ð¥|¿Ç* ̈ë#w §(,÷®Ë`-¸ô s>¿Ï¿ä«ÀƒÏ;ÿ_Ð}Æ¿Ë>•sÍp¼³Þ÷z?´™lÓ Dž½ºþjÜù©ì´D˜j…`ú­F}‰›¦õðã|ùO‚8¡W?ÈÈ<<´&Âaá¥b˜°¦Y+ûÞÉ Éý°P]UÜOãTÓü`úLÐ5fUU9xZ˜¦e4Šò\©Œ1ÑlÊ©X˜Ì‹_ïbv^È,Èpn¶~s’“¸ Öf3ÞÂNÍfø¢m€||œíw€½«©bÚ©Dr< c°¦­€ù ö ò/í‚?gÿ¾’ä„eQ‘"?j@]œ¿…Ù1¢gÆ0€¯\oå…Ò¾µ5!g–qhšÖ ÏqDéÀy„ÿùßXçª×–4¢çó§¿|µŸ+kÀ 6 —ÓË~/Õ-=¿¶†Áž³EXÆ.„ÈKa¾ÿòG_ŽÇ¾í±É~Ä1Õ‚ êÏÃ(—­ÉnZr޼Q¢ÁZ­Ž> ®T,ÁW÷ÅF?‚žã©SÖ1üŒ/I 7B%ÜÈÎ(AÅ:RGŠC¬B¡œS¿…«(=Äëuoi˜Áê·ÐåwœŒ’¦ùJjØÛ\È Þñq-ŒÚ!Y1+¨ÝÑlúÖz¸çžÿ+¦‡–^Ø`M†1Ý…À0!" a°š¦™:å¯#`~7 }€O=äûWá`Yv¼žAa°€e ö¿£?Ol£aý‰íÄçcÂ)Éëʧ°´¼ùUîã) PÜñvNî¨ àåß?XpIüÚé§€Þ‘³ï‰ç€§›öÇ]vEq°Ë‡M¿Ð“ë½Cï³”D¨ÆÉT=¬¥’qŸ´Ðl÷úx½2âÄ{œÞ¬†õ\ÅòÌFÑ/õzð áèÈ?o°¶/È$}æ¢X‡ªzúq͘IôB£1(w¥’Uê••ÑÏ5,týèh¶Ï*Epþj~éqêÓŒ/eXÏçýuë­_ŒïžRŠ®ëé×á‹Â´õM cj! Ö‰Ck¦´@.I@ý}€ò៵6hšk5žlTÛN ¨ (?ÕVþMÿãw}ÆpV·)òÅw[Fž‚¾AìYý¦k¤¾á,>`rÀ„Z¾+xÅ+Ü/›*ÜÆwQùjÿy8ñ9à¾ÙŽÐQ”þ§¢LáϨÀ×Û´>Cc°Vƒå­Õ­¿«¯¿:5«P¥èïç>¬(Œ_Á£q¨iZ†È¬Bo9,З™V6Œñ<¥”*‘T÷ÒÒxߣÂ]~9ÕÞÉu¹lýc´2¾¤2…)-ø=—¨ñ¢°³ßm”0KZLΧëÙ fOÞH¸Z-ÞÐø 4Àj°R1¥a[[[Sy†ñ<'}²°1ªr;"F°Î3›¾Ëð»ãMÖFÏd@–­Ehûä"ƒ-ˆëOø>÷dD–Ÿü%Çp,ÂÚ/&_›°ŒYŠÍõ¬fÀ5yç//ºÕýÿÜVÀ#o¾k7?íDC›}{·ÿg‡Å{£ÚÿëÜ¢®æwô©ÏçŸGV0M3ñ°H±ï̉2YñT­fMÐÓŠšah²33¹—åñ^ÞÔÇ:©JÙA¹ ^¼}ÀU58Ì\ŒÌ!Èh]Zrž“a¤§BøœÁž§!xu·$…‹¡@‡Èˆ)#Q¯gÍÇ‹à5Xý^’4Ñ.°ÙžþeS~ˆÉ’ÆV“±rêTÒW0ÞßÂ#“‘¨ÕÂI§m°F%—Ëayy9®ÃÅÊÄÕö‚Œ±!ÔjÁ:Oÿ çý×¾õè­ž•x]·dƾtR¢eÑRÚ#ÎТŠ×˜Œ c”á™ai"Þâc•ŠÿØ‚É0ü'ó;;¾ã톞ÁCMMÓ²]0oÞ´iœ¢Ã©ò»_4R  C ÖB¡€íííÈidâ‚4cÌM)u!ÐŽ¸ß ˜O>í™»|?R«Yß—šª#ºî*4À7òcÖÖPý@ñ¬g¿wÅ/¨¸…—°Ç§ÃªÀ4>ÅíôæÐ4ÿ…€"€ïü¸ú\­ßû ”ÔÚMaÒ0ã9wj¯óðaç·ŠNdûZÍlyX­Ç:%å8BH¡Ë%zWTÕõro6‡è.r›Spb˜æ09ö;Ö¸F'O$'‘¿Qg½bÁ•YÇËkšS™w˜—“ò±½EÉ‚ðó°²lMÀ÷ö¬ÜnöŽ ¢  Ñ°Æ”÷]áåZÓô)p42å²ÓÊtB©$„€a°ÈÌ@Ž—€8Y5d³œÃjšfê ¡ÆŽ'Ì{.)•œð9ï½xÛb’×bgÇÞˆ$?§Áˆ 5X[­ÖÀJ_·ÛE«ÕöÕ‰ØÚÚB±XtýÛçÕ˜ThúÒ¨ø¥ÙÜÞÿLþ?Áüú‹í‚SñÝfŽ’¾ãMNõ\¿¹Ó‹CLS݆¦@õäòQ…ÑœM+,ÖäõºŠÑ7,#ÚoŽ£{¶¯ÀYMÔ¾S._¹Ízî’ó®—ÀèWR–å~*¥è/qÞúe8ÞÚþqEO^³ 4_ÜÿGÿ-ã&NùÖ4m:«we-`’J)8.ý$æ+x*‹M”$ëGæ¢<;À„0cbooPYr!£©§Œ{I¤Þ¨¹¨â˜õÄELû;7EBŒRÉ[’†£²<ÿ†!LK¾3*9 dô‰‹0äL×…¡ö^‰ZŒ¡X´Æ€ªZw?¦é¼Ü$i°_pÄ\[ÓLÞÃ:Mýyï*0zXy)•¬ñR«ùq›ØWûàÀY`"bˆ¶yÑ8_ºxñ"vww¡O1?åøø«««®•Æ\.7òq&^Å¡[ [ÔfKœÿjZ?T5 ç¾ô, hO× ? ¹,£^·þ©^yJÊkÿ›ßÉõAû± I÷¥;¡½ñס}¨ïöwÖ|ö.àÿôL4Å2€iÎRôŸ*û=Fq»×8¥ýÞ÷ )wÐ^ìã¼­(–QÙoÿ×xŸv˺…ˆ7I´?…ã­Õ”FÅ=Ó? ”îŠô«O¸ä›˜ÊÇÛ'QU­É‚ç\´

Ÿy‚à¾Áa°A55ùÖ4 !ù˜ Í7ű$¾L¨º¯g¬Q‘aÀ½ˆäç­ *6æåÍŠ•€ÃŽ 8ëkÓÔ߉G¬­ .&Ä ­â‡õšN§NùWeÇw.FÆ,ɲ¸_’çSýׯÀ§>uçH—™Ú*Á×®]C¡PÀòò²ýW(F>ÎĹ b•ÛH.½Èr°'©Øîë3]‡ñü‹&Ë@I‹PT@‰äû.@þÎîÚÏÃnYc;0¾oXûÅü_IpìbMI—|1DAù¡¹ü>æz‹+5ª2m>ôñr»r¾%iøj´xÑ(6M§%ÇÚ˜dˆSƽLMæƒX[Ýè SGTŒtn1ú`eÅúÓõð\U&”iÈ7µ'Kºˆ^*ç-bÿx»EžèŽ,[b.Ë€ñgŸq·ðF刭®Ã0†8É$iÐø KOñîó†MF ­;k¦©¿÷°N[×jó¥TönÛÛóÀ!YŽzž‚+WF_üH­ÁJ!Ç[[[ØÚÚ;ybƒUeè­„ÒvÜç —×Æç…ï_wÂùŸþ—ì7š†ò}ÁÙO¡·á·u¡Guñw~å¯þGgÀ7=3º6G„Õ:†Záâ Ó?mÙcE‚e÷ŸÝÀ3|`lº»ó˜b>l\x#>§úÇNs .ù¶žïj¥aX/RÊóîóP¯[ÏZ×…ÝâÊ·°3¬²¹ë=–+çõN5VÊÉ£¾§qä,1#§Œ‹$e%è{€%‹£¼‡ì\1ð†õx«=V«ÖŠQ½>žž´2 ùž‹ÞÂIàZÜ‚IC·TÌOÿ½óÞ¨VýóðH·4Ð~÷ ƒÅÁ‡½O VïÜK‹Ç|`PMÜòCFh80àïÄ)9¨Õ‡4¿Üÿw½|“§u$92¤ëÐox£ÿ‰éغÞ_-„Û,ËÀÚßô“¦ûû^ÿ­ƒù©ܽfuÀøA‹,ôTîÿ †å@»¿¢J*l/§ñ­!ã¤;”Ø0E˜»¿0^æž“À1ª)¯U¸OÛ¨=:Ïvðè£âË_þ2’dù€gžy­V Nǵ},oű‹hpâ„%Іa­ˆyóCÎEu^ Z¥ýË0œ÷¹w1¼x÷3Ðÿüig…œˆPTZÁ!£@¬â»³cc^•Êxm?‰V«…«W“¯„‡÷2ó M>$i´\Ïa•FÃh6qè½–a-¯Áê½î Dív>ú(®]»–èuŒ"ßAú[dæíl&Q2q¡ëÃ'½Þ•΢ð’¢ èÐå2Pì\v§ªx¡Ä ‰,£ôúÏ® [H¢\Ð=‰Þ÷«®C–ïùKw»£Mèãfú;ñmU~Ä‹OêÔX×¹²2Ù1¢àF™1 ÖN§ƒoláÅ/>é{©4X;Ö××qéÒ%lnnâüùóØÜÜ íõúì³Ïâ‰'žÀ•+W\Û'Îu"ƒ, ¥„Ò*÷Ü¢^·¾'ßäÿ½ªÞ[,âàs‚¼ôãWìs†ÆýagÁî,…ùÊÏ;Æ ÀP{Î}žSÿ÷ÿn]X–G´_@ÊU©Õkƒ˜@ó}Â⟟B^Wò²–¡(~¶‰ÁPbÀ.Šd|»SÄÒµÐég ±?a1ßÔ«þâ9Úü8>>NÔ`G¾kÂsåÊ»ùHÞ&ð” Ÿ’Q ×s¼/~ ×g²L 6vDߊ]¿Âj©/ýðã?üÙŸYHסþBúoÿ%ô>ãþU^$¨¹ñÎNpÑ$qEP’ÆËÑK±+Ð8\¹r%ÑÉNœ:ÜËÌÃŰÃYM´TÕZ…óŽíµ5+·»V 6\ý¢2`¤Š$m°Ž#ßAú[d¦k£1zŸÅ¸¡jòQ”]Ð궸ÝÇ«å²É½áòbÇ(6o/@Û=v;nÅ|Õ0 îÁJ…N|ɇ?ü_ÑívçJ¾£êïDÃ)¡yšÞOñwä}1«±éMýò°²r)’Ô¯0Ù¦h=ÇÇÇxßûr#ËwdƒuccÃþ£qýÅA>ŸÇææ¦+Á{uu5´:ñ-·Ü‚b±ˆÍÍMÏóšpr#d>Ž+XÛ %@¾ÃÿÇ.ÞrNCˆ¨Ô4Èoø¦þa‰º|ö¯“Q¿ è¸%]½Þ2>LX†  @³ Dûá}D ÐüB¬™ðÏM5a£âx”€æ®Ð/Y wZvöéGÎBÔ@(¼ðÎÖuX 2(~}ð™Öj€q ¶÷ØoшôÓòò2î½÷^¼öµ¯ ñ3Ž|Àm·Ý†ÍÍMW¿c3jX‹ÖÿÑWVœ~‰£FøxX©Î oô{øýœbn¿Å/þ1ô¯¿Î®@g<üwP¾í1ÏçÝߣ°^ñÿG ¯ô­åææ›G{Ó„Æþ$ÝP677qç£4ˆ“8uøàó™qÁ¥$*üÊò ‘Y«YÛ(|i)xÒ•†$¸)²ººŠ{ッzÕ«9ÿ8òí§¿EfÞO›Šè%}B‘>ÃÆXÐ5z=¬ž<,og™•§=†±.ïyoé/|ô/Üâne<,§…(—ƒãzK%k¿ß{²X„ú3£xb ÿôÚ5äóùáçšÓÒ߉·³¡‹iç—Òý‘#BóúY_‹àa U^§‚÷9И0Æ———ñå/ÿÀÈú{¨ÁzòäI¬¯¯£P(ØËËËÛè/Z­Ö@èYâ£V)›x`Ðï9C‹ïbç]ÿ€H>N<ãMØ®þ0 þ‚ðAÑöCuÝ:ž¢\ê—Œ^²VÜib£õ_rŸK¢zN,#PÔç­CØE½hݲ àú9Ò­g ÊaÇc(ÆhÉÿ»‚,¶þ£b®ª ÜüPûßœ»Æ¼H+îögöaÿ ìüU?'_Z /Æ)ß¡á‘TrÿÔ)ëߊb=hÏ‹´ÑÁ›ç™XˆL»î‘¢ *êrÆmßg-¶ÕëÖçñ7Y_>8@ý¯îò¾ŸimçN–28ÃdÔÎl^ÛÔÅ)ã^f>±=¬“5tJU'»ä‘¡œzÝË~×5Íê— €éÈ÷LC%i’:¬íÑ,®ƒ&Pa×áš°x¶{Âxʼn˞Õ4˜ßðJ÷÷)Á•Æ—¨W^ûZ(ßðîp¯„¢Žz®ú…3—ËÀóÏC6’õ|OKë]½ùæ˜ìKzæÓ\¨ⱌ„X}wÚ y¿U«Cž½ß|Ï»_U]ã{ÜG?´­ ­´Ì’\.‡ —¼»»;²Q<’wU×a<;OvõY÷9Üé@_€(¯<•!Ú¶ ·² ù&«»M 0hò\.CÑ…)Á)³J³lì&PôeÒõ€ö€bçÈZÇ£û=©Ù”¾NÒŠ?êÞO! {fÿØïìæzÇžl…£ÀòØš@ÙGÆËeëoiÉ© _.÷Ÿ¿]üOåÀý3yÇ‹_bñóÎ xì±Û¢JG¬Ä%ßÀ§Z!$–ê(‰ïxZüФ}<7¢ntìÈò@ÅE?é½¶Y@Ã^Óf[ìBUýç`b¤3½(æÍa§Œ{™ù ý8e;IØE%Dº:ìX´²î c¯T¢¢óºÊ1GLC¾ Ø]¨¤8×I²Í Uª¦ë’]ïvqÇû9a6ìZ¸Vþ½Ï±),˜ IÚ|ï7¿øG€ÜpÞ[t½#@kÅ·æ÷Ò-Qÿ+¸¿±ñyüÔŒ~/ÓÒßãÖ ñ˜8°†žù´j&ÕÁb˾i3¤èÒĶ=åL Óî¾û™‘•ÊÖB¡€õõuœ;w8{ö,Z­Ο??Òq È68N²ÜL~¹b!ùö ß+ò¯¨Ê—} ­þé%OÛBRü ü+ 䱥L–·S PÊŽ°ù„‹Ë¯ð7Î÷ 0>a]c­ÖD‡U–r?¬wí}¨\­öçN þ޾¾ò3€Ñ—ÉæOµqó×ÂõE¹ìô¶?×ω•åá­~©&bTÂ>p[bk\ò ,•\ÂG¹Â^å³³^ØpàÞ…³¡©!ŠGÎè7¡<#ߪÑCð«M3Ú3t*ÇEÐõ4›Ö_µmŽ8’÷;EÄ)ã^fÞòC\•ô«æ‡w`άË4­cøÔð«Ì X 7éÊšS‚j¨ÍÓï¸{S†¶d¡IjÒVš@…„Ö¦¸ú¹}<ljA.ÔrÙõa]‡ý2²[Jm">x{µšõޏ܀‹Kòç ¦¥¿Çõ°R­“‰½ÚÓZ¤‰6-ZÆ ï5-¼.Ä;êêa<.ôœ=sÑqêa%Úí6vwwíPà CPÉ;¹¹‰ÕÕU»@APÞG‘óûH™¦»äoÿGRÕÁ@Àà™ëG%y- ²×ŸìÏï øËE;7– PÚH@]H?³sš‹ôX’×W´“ŽÞu=‡ÏÁÍÕ4 ÔsΫýÉ`Ô®¢À®æ[’¬ð_Yr‡ÿÖ1w~ XúYÇ£)÷ïCùP{¨8'´ÏZÏ%h¬”J–—•Þ?õ:Óg`U«Îu†õÏçŸÇ«^õß¼5šŒÄLò XkÔ <Í…½5YÄ<â¡ó%ϸ÷ί«Õà÷¹÷Ø’äèf‘Qtu¿fÓ@-Q(—}Æà†¥F¼‹*Ôvo/ø|T£€ÆE”"|”Gœ6ÇZ\2.9˜r¶ã0ôÄ<êb1z›$¿ë¤ëòD>؆ÿÊIFUÀ_ÌqÊ÷4Z5Ñ;ѷ潰ÊMoŸâEGý¹]Ñ7AãÐSÛäÐhQ["¶Eê|¯VhU•,¦Ça£Ñ/!Ò_£ŠRÿ¯RIGúÒ4ô÷8ÄÚ!Fcãæ—£^wõòE³9Ú¹üB½––¬¨Jª&Z,FòÀ’±4¯3  ¦•±°XOe™HÒ€@›¦ÕÖ¦×íP‘<¬š¦áܹsh·Û®øuª¶¼¼Œ\.‡‡zh‚»$ŸÏÛ ‹Çaäð±r`ƒ}o¶öy xXeÙ-‡´X¹²'çµÜ_=®Õõ•¢{²ªj?¬+l!ÌV|§QOVÀ2æ^ëSëæ€þØ^Òr¨þcëXå2PýyØyŸÖ3ë y¿Êqý>ç˜Þ¹: ½ÕzFü¬Y×"]ÔÍš¸¯¬XNl{•tox*E³ÖjÂ=…ôPõ¤®øÎùİœ{ïý»èr1%&•oÀšèD•qêö"þ†” <î ¦m(š&°´Z áäPHï< RAd ÇC<.´¸VYzÔãQ_Zª™#>¯“Í»êÕ1~ˆê´‡Œ»ïuH80Í kµÁþ£ãâme‚ï7¹¥ë¡Fì~÷´=¥˜æäØ«“扸ä;îVMTËh ˜¡sÂØåŒÕ"#®2xziÂ<Ðá,luQxxiTû܎Тpe+ï¼}äg¦(Î{—:²ÙPñ4ÏyÓ4üãÔßš¦µ(£ªÑÕ®=oW…çY<\o° åÔ¢ŒzȋȲ5™¦ÅN]·&CœrºèOÙ=~½-þ†©JÑ›T5é’{‚6îY$ƒõÂ… P—/_vU-+ X]]Åöö6Μ9ƒ‹/NvWS€¼Oô€ô‡º0ÞüÏ‚¿ *Î" }Gßzþí]¨ ¾ : ©vx‹0½Å;M@¥Ï@M4OGŠrÀÜ« §‚±àôà5•Wú»¬Ë±~Ýúž$ÁiOÓÇnÏÙoƒ~¨3MJ\‚+DVôz}ÁWú÷^²¶X;;ÂĦ^D¨V=¡šu „';¿³»Z­ŸCÆ‚ª×®E.H5 }ëÅ/””<©¢² ¯{ûh›¦Uè`ØD–á¬Ò”Ë0‹Š¯~xY»îÁ-W¤‡A÷A=vÇ ½EþÈÔjÖu”ËŽ 꺵@êׯ“äQ¼w¿…S¿çAï1ÒTäøøîÉn&… ØS„ÌáatA•a“š%y'ÕT{ CÞR{sB’tô¥q'ó|•_³ÙïtçÓÊZÓ€¥ýtS0#Œšø½[¨N˜ªŽ0äü¿Ï9ëu ü—ÿlð]D-qB°õmß5ÏöÌ ¡Zųy¨²[‡. xnig'xÎ#¶œ·zQ·öE®ª6)˜ÅÖOSX¤‰xãþÛWV¢¯ÎW«N…øRÉZô¤¼9/”nrꤿý(ŒO}ÍRO|=… K$ñZ‚· ½¢¯ýè}⇬­V Ýn÷ßèçî¿ÿ~ìîî†6Åž5>F‹†˜—?†•GþmðäV\‘¬‚B;;€þØàG{ú> ÇüõÏcçùßuÅ‚7xŸ˜Wáö Þ»E‹u/~9pƺ_ÛÅ2€’:éú.€ÚÜÂ*Ë}¥JÛë>mê0ªȉ,µ&­¬ºîeˆ2{‡Q>l£Üxã?Œv1)Ä×»Ú_’ö* GÜùªb!q{£á„£ú½Œiò"=sl Hµj¯,x½}‚=;…{‰ïaQ;”FÃ-ãx½²ú¢ªÑ boˆt£aÙPÔÇKµêž… {ótÉLÏÍ;15Mà#IªdÇô0M38žÜ1ô ©dܸŧÕjNEîYµ#HôÆÌnXtƒu¢þ«†%{ÂÊéP1¥@×(úwüÖÖúÕÊw†‡Í«ª»#•ü ÊekØííäk лC;èÙçÕ4`I«»ZÒ}È0 ¿ô‰AÝ,Ë0>ùuF8§½øN7ßl,ÌáWÃ[ç Ù´žI³é´G¦gB"¢n_ZrÏ)dm-Ûë82N²”JB ý•6ãi¸[¸`ðûºT~¬é¥p ÏïþFà7܉ê¥3¥Tä—Â2PûSy\]UtÅù·ªö÷{Âl}õw¬ÁÊQmx. ”ÖpÎOm–"ëšþ}„1¾—Ô'h`‘§k–-§‰o>_¿éÁû9ˆ A«Ìä©$H¿Ó" ®•ÚVV`üð*?ÖAñ‹l)ǾåHõ3ÄózìÙËı@Ç”¤å×}ð¾‚æm®^|P5Hql—ËÖ¢hÌQ1šsÝdØŠ×#æÈz£_ àÛ¿ý/Ç{°)fh‘1ï¬vyJ~³^Šo§p’4ÅõMª3J¿ß÷ƒªƒSÄ©Sƒüßý»;ññgÒ &ª|­i–’±'Žó¦;Ø‹“zÃÕηVsô‹iZ¿'=o:ìá¡%æTtÑÛÎ[­:ÀùýDoަÍ¥–Öd;”X6K…gÐxÍ¿·.¤Ÿ³7 ›K%4þÓw¹ê˜‰¤ñžg™Ò4—€‰ÍfÓÎd8†5QUë9ô;³¹‚'jµð5z–Dµjm;<Œ·ø_š·ýN¡EÄ|PŠ&ÊõÓÐ/‡8Ñ¢®rSÃ8}÷ÈEé½) !¤JŒSÊ‹ ~PÉ„ Y#ðÆ­Qå·ø3ö­ö†ðþ÷¿¿÷–·¼eØÇz½^¯wúôéÞÃ?é³qóðÃ÷xà×6Y–Ý:<ìõJ¥^¯×ëU«½^½>ä Õ^¯ú#\DµÚz½êÒ×z½׿ÞÁð9eð«Êͽ^ýž^oç\Þu~bçéõëd¹×+{½ßrkà<½^¯wä>N¯×ëõžêï¨×;øý^¯\îõvv<Ïã©^¯wØ?Ä‘uÞHõ\÷î}üår¯÷ÔS½ž¢D<Þ”ËÖßÑQðgë:üä%í¼ë]ïrýÿÞÞ^¯Z~Ü£#K0„çA¿[µjýÆô±ÃCKVÊåáçUë;¥Rt9ØÛ³äTQ¿³³c]›Y¶äd‚eàðpøqIFÅÇúÔSÖ5…ÉÖ°ýã°ã=Oñ™ííõz?ó3¿8þÅ$À°1¹³³Ó+ ò=ü”j½n)’I~ìŒ@z|g'þÇQ­Z2M:«'òùçzõúÎxO¯þ&ŽŽŽzʸ/ľί×{½Ã¿¸ÚëÕë½jÕ­#èù…ý>;;‚î)•z;ç>j¿öö‚uæÎŽÿ»üà`|ýÝ+—{OÝs¯u?ÖxëßPµj½âüî¥\vdä©§z½§^÷ÖžëKý ªVÝ:þàÀýn«×‡¿#χ& H^ÒÊ0ý=0€=W,•z{õ#ß¹Bßÿ¯z½jµ'ßôåà‰ï“0 š¨xñ{_øNÌ{=I²ZŒÐiŸzÊ_^ŽÂçáQç‡Ã ù#ëðÐÚ6Î|¨‡µP( Ûí¢Õj…~ŽöOÚ>Nî¸ãMî¡!{”ê›Úó€ùaCØbLI²úóÏ/z‘;×ôi14Å?tW  ­b”àòè– ûá6{{}ï 9½ûÞVß^_2<ÃvQ'Ù y¦¼×ÊžPàh¤üþ%•áT¢©b÷B¶ˆ':Ðç|¾¾wµÙt É‘WP¬ O¿§_J‡_ ÊÍ¥àj©d^<8ÿ…’ùm÷óÚ4–G Ê]¡^¾÷z"½x£t(Ä­\|6j}êÔpÙưÖ:Õªu-AzK .•€nxvü‹Iš¦AUUÌ:ÔÖÇe¯Ρù _/jäñkGå×áÇïØj6-BEk OÄÑë_/{ÙóI?‚‰Ñ4mì‚Kæïÿ V®þXûÙÌrÕvT8%JqÔj€$¡œÐNá k”ê3Q?íbÒ‡ÿÅ·æ¬ 9"Õªó>òBu2¨Htæ5ÖKÀ0\á(õºõYº>Eq¿Û‚"-=—ÈŒÈ8ÞU]Š×}0M/ýðÜs‘¾§ýä¿GóïßÔë(®ÜìÑ£j*1'Î÷<ÞŠ ^ŠEà©§bo³Có;Iò¿-o·6¯Žö¶·¾‡˜òEaÜ[d° lmm¡Ûíú~¦Ûíbkk ËËË7… MÓÐíþóÀ)á¸ÿ¤ÄVA¨þ²õoÃj¿íêÔ7à ©[áìì/y ”à ‰-~\d ¨=5x®ê«½»ùlðõPhe±(äwê€zÞ*Ôäk°!r©ÕÆ´9:Ú§xl¢ô𜨽õ/åaÞý†XO=H§y Ó«S‚úõ‰á¦^Ä.r^¹;x[+4›À;ßùxÒ ÆÍí€æÃw¢øƒ7Ù¡¸§Nù§ Po0jÁQ™É2 K>cp’„äU»aíÇ\ïêC>â䯝%3ãÓu_ø°³ùG ¨¿ä½áî{_´\bÙRN¡­†©ú.`~åˆýr}|0?÷4–´ú@E\Ûs0iEÇv‚ŠÒ0¨qý>j(/E>NÝ(¿tܨD*•zþüylllàܹsX]]u¥W®\Áþþ¾ý¹4ñØcÿÈy0>³óaM2IðJ5_i9+›M $8ßjKÎ0úP‚«8ô¤óoã£@ñ}Nvè³­†þ ó¿« "E@~€“#>áúF-†4)#åÃŽÀ¢9<&:}KUœk”Ëþ=:©Ø’w¥x’šq³³ãTו$g±=*4VVVÜÊp ù­”ÓbõУïɲ“ÿKÿõšâx&APjå¼¶ ¢V«aooÏß EÃ~„Z-Új`M*ÄjV>Õ~ãÈ1SÕxÚÌïÆÛÏ’€ÄqÖúðí•ì]kˆ2ŽÄ^àùüóHQDZÑuÕˆJdiÉzN´èVþÏ«ö¾rÙw}Å‚É$ÉùQDhÕDt=º/ÒQ.bãçbÑú®xQt¦rÙòðU«Î˜£ÏÕªÿ¢YPåÈ•ÿ1Oã®^†d)Í £Læ÷öœªÊAm|ûuªfÓ €¸a ¬´=H É •CÚF+/Þãíí9=_(çˆ^8 T”Œ±7Š@Ô7 ¾é"”ý«PþîSƒú\øã”#b^ýJ"Y¯c¸ËîM¡¾†ó ?ÔwP’€Ú)‘†$0¾ÁJÇöÚ£1ï‚¢·<éX¿–~^u@·ö3’ƒŽëýVÀ½÷~ÍÙP¯ûÎ H§ºôm±½iÀüœê`å2$Оè¿,þòð/^ ä^ç> U×è—öÉo¨À2tÅ1¸˨Ôúÿöc\Ã/Á÷Ulf&C–e{â¬&‡b©d¨¤0iî5b¼“Í´R*MžÚá÷ýjÕRÐAÆyY½ W|¤qÑ%Êûwž­žJžša(Љg¿%ä!Ç#£zNŽc°Š­¤±ËÁ$PÅÓ¸-(ÊŽÞÖJÞñCaó46L3xüjZ°Ç5¬¯e–"4MóÈÓsS:(2©^·¢©Ïª¯ ö½>v9q1bÀïásÁø)»aÂOJ4æÜL÷V*N§ rÔj°«‘ͦeŒ³à84‡ÕK.—Ãòò²ý—FcÕ4Ï|æ[ðö·¿àl ‘’Ú;=?D±­uòíÿàj–¥_*U ü6ï}©å¥ô¶ïuQz@ù,¬pa?ª>û$Xy²aï…5>Ça^< iFÓ4H’dçè×e@¡Pˈy0Ng‰$Y)+a²–Ê•Fc5‹¤¡äã(#5®Õ`¾åÇQ¹uK/océ{¿ÚªHÓ€'ü´qºˆÇõÞ’¸ú> d,R;˜ ÏDíEI†Ðo '' ÇÓtÙ)BÞË0½K‘"ôÿ•г=jïhº4¿BbÔ°_' UuÏÃj5ç8ͦÕ×Wì}ì‡$9σ"©‚&¼á¿ÀðTуT’"ö'ÝŒjzˆúˆŠ^zçž“.8fÒ`-«WßíÃM x HßëÔh6ÃÿᯠŸ–Ý“¢"P¬62Ä ,Ã’P0h”*ýÏŒú2¦Ö2)Ì•c’…ªî‘¢¤pºÀ¢ 3‡ ´nò8†P,Bƒý3¹¯9yþD‘ÂÆÄVCbµéþ!‡VÕÖõÁ‰‘ßd‚V·éQŒJ/Þ0´çÿUÕ1‚­Ïú‡Í¦Ó:ɯ%•a8FpPô…K ÁI6õúðŸÍÏs›Ut]Xi6-ªÜl ¬W€•×ú-7tÝzˆ‡‡¬ç™¹ÀW‡Nì(Y†ãä÷Ãü£?·d< ?âðÐÑ/¢&Ýxõk¹ì,Ò>qñÍ0¬Ë8u*¸˜o¹l-ÕjNM±ZÍÿóÞµTïÂÞÚ{tdéÚ¥%·1+"¦ͦõ¯¡éçÅ-•¬g¶´Cßk4ü¯A|'”JÁ­ƒR“¨.ɸDÎa7ì"VÇó¢Ãe\’wêà@ímƒ†§ËðlÂñ°ûÇQúÿ51h`a¹:FßÍP&^dY¶•)¿I Á0LÚð-HãkäúŽõ_Yvç†Öjþ¹Ôc˜ ”Zâ÷z¤÷JPôŽX×$èý“tjY& VC\:®T‚ËiipŒÒ5 ´'üÐ2üKVî*!þ»+Õ¿Pa†]×ñ?ñ¿ZŠ­±‚ÒÞ«#ÃÌ;¡+ó ŒjãQ‘ 1uoiÉz„{OcnCÎ4­•i¿P/ £ò+ò±¶æ´€\YqVу(•¬×•˜ç5U—BâN(ÄÇG‹£V˜¦þÆ^¯÷y3>ƒ¼Y*FP~¶$ÅséèiÏ0“`š¦Z‡0/'ëmÝ8L_Q¼v^ýO‘it^:žW÷nÕKèm¥?}”cDiwãç©õ’Ö…Ã̬EjÖöËèp L Ù%öKR`ž~‡Ô„Ïø1œa¼†Ï~öÐüI² 2²ÁÊd…ÀBž7x­æ¬*—Ëî ‡8‰jÌù­èS/?ïëį] àTB¤?­Ž‡MüZ•H’eÄ!§µß…RÉé7‹äL ’וּ¥bÓ4Ñl6ñÞçž lM`Öâž×X$tÔ Ú}ôÞðc’âL‹¤ËfE& VI’¬Á°¦AbHp–ÑY†ºë·Òëç%]ƒeðrqfFìííA’ù}ÿø!KÆ‹Ežì0Ù!Jï>Ê ëI‡Ç.hòD“ ÊER'÷U„æiÔ¶2o˜,y*)”^iä¹?ÎäJ왿 “Ì膭¦ôcôtSëÝ`˜Qi6›øõ×¼/yä_c•r%½Æê¬àŽKóEF‹.­žOa…9 ¸sT)°z¢á7¨dá 3ŠÅ"dÉDñ¿ëÒô‹NÇdŸÀ`¯G5)Èy6þ¨ðѨÔë–aypôzî"P•ÊdEŠÆÕì=˜»k?~Üì^·P§˜ìóüê¯âmŸü¤k%ŽªËŽ^ã6qL2i°,Ã]ÐHåa5`¥£¼ÈËR0ib‹Æ?ÌK„Lfñ-¸Ô‡*3¦ÉU­†¿rÆ5®½aÄ”ç)IÖýïqa¾¹Ã4M+ Œ*¾ìíÁü–×ñÄÉ š¦áo¼×ÿÔO¹VÆVVxa쬣¶=ÂóW‡}/áêYÌâÑ|øNn²ÊdÛûD˜¦U= Nw†=óŠ=P«¡öš÷Å"d™Õ9“TUÅÛž{n@¨Ù£ÊŒK6 Vj°¶ÜíÓ#ÎÎce˜”£Ÿß‡œž—*™L2PFUm}ž–0`†MÓpæºë þÙ·Àü–×%}9 +†aà5=„Ü[ßj·¬©Õ¬}]×ñÐC¡Ýnckk+Ú—M5X«+ü·ÒßÖLúÎfBù`<{;¯X2©fRè:Ôó_€öoaO“*&‘ï„æ3?Â9«Lj™H—Ë0k 6V™XI¥ÁÚétÐjµ°¾¾ÈårX]]ÅAÔæo€X!¾uG°ú§šðf˜2±|Êzç—“Vâqè:jÊ!Ô[Ïãà£7°'ŠI “ÊwãîßCé§®g™fRIú{¥tªUž§0ñ‘JƒõøøP(ìm…Ba¬\?Vë™9òJìH9ívûûûI_FâÄ!ßY胘EyÈÚ˜—‰e\×QùÏÀüŒƒÿò²¹ØgM²8fÇ!>ïÞÕ,ÊBÖÆë¸L*߆aÍQæ½pÖäaÞÇl*sXÃE·ÛE.—ØþÙÏ~>ø`¤fóó‡>ô¡¤/!Vüq<ñÄ“-<ÄtŸùÌgpâÄ lnnÎüüãÈ7a}}wÜqòùWaÞuiZä!NÒ2fu]Çç>÷¹ÄÎ?©—®ÿGè|×IüÐ[þ—.%v“yˆ‹´ŒÙOúÓxâ‰'póÍ7'rþqäÛ«¿çY®ôÈBœ¤e¼’þNj>Ç<ŸÇÜÏQÒ"q‘–1KsðQõw* Ön·¸ïÚµk¾ƒ¥P(àÍo~3î¼óNÜ}÷ÝIßB,¼ð 8sæLÒ—O>ù$ž|òÉÄŸ'Ÿ|wÜqNœ8‘ÈùÇ‘oxã߈»îº ·Ür n¹å–D®=NÒ"q’–1›Ëåpë­·&vþÉuø]ý­É?ËIH‹<ÄEZÆìí·ßŽ'žx¯|å+9ÿ8òÍú;ý¤e¼’þ¾ýöÛ9?ÏÁ-Ò"q‘–1KsðQõw* V1 ÁK>Ÿ÷Ý~úôiœ>}:éK•ååå¤/™ãÈ7¼ç=ïIúÒ™!¤eÌ&}¬Ã-’þ²JÒÏuùfý~’–«´\ëo‹¤ÆM*sXOž< À–ÐétB'ó 3/°|3Y‡eœÉ2,ßL–aùfÒH* Ö|>ååeWr°¦iXYYIúÒfbX¾™¬Ã2Îd–o&˰|3iäD¯×ë%}~´Ûmlll ŸÏÛIÞÛÛÛù} 3O°|3Y‡eœÉ2,ßL–aùfÒFj VÀJün·Û8–œÉ,ßLÖag² Ë7“eX¾™4‘jƒ•a†a†a†Y\®{ï{ßûÞ¤/"ë´Z-œ8q"0”¢Ýnã…^ðݶ/ÊþiÐívaF`ÛŒI¯9‰{bÆgùŽcÿ4“ñy¼f2Âd|åu8C,šþvMi¼f2X§ÿž"Ñc¦ÆûßÿþÞ[Þò–ÞéÓ§{§OŸî½ë]ïê]½zÕÞ||Ü{Ç;Þaï÷»ßi_”ýÓàêÕ«½w¿ûÝö9ßñŽwô>ùÉOÆvÍIÜ3>“Èwû§A˜ŒÏãý0“&ãó(¬ÃbÑô÷°kJãý0“Áú;ý÷4 ©¬œºÝ.¶¶¶°¹¹ ]×ñÐCÙÛˆ . ŸÏÛûÛí¶½?l_”ýÓ`kk N=ôt]G>ŸÇîînäkJã=1ã1©|DZ„Éø<Þ3>Ãd|åu8,¦þvMi¼f|XÏÇ=DÒsVyøá‡{§OŸvm{àzïz×»z½žµ’qúôi×êÈïýÞïõÞö¶·…îöÝiqõêÕs÷xàH×”Æ{bÆgùŽcÿ4“ñy¼f2Âd|åu8C,šþvMi¼f2X§ÿžFåEIÌYeyyº®»¶ãÆo´ÿ …BÁÞ_(ÐétB÷ ûî´ Jq…BívÝn…B›››‘®)÷ÄŒÏ$òÇþi&ã­VkîŒ0Ïš|ÇqÍ,ãóâéïa×”Æûa&ƒõwúïiTØ`¶0\ºt B…àèè(p_·Û ý.õËŠ›k×® Ü˵k×°½½=T¨£\s÷ÄÄèòVy“ñy¼&>¼2®iZàgÓ*¬Ã?A“ï4ι˜ø`ýÎ{Îa÷ÝwÖ××/^` @O>ùdà¾k×®…~—„:nÄÕË—/ãòåËX]]ÅÆÆÆÐû‰rÍIÜ£ÊwZå!LÆçñ~˜øðÊø<ÊëpÆEÐßÃî)s.&>X§óžF… Ö°¼¼ŒÕÕUÜÿýv‚´èv÷û|ù|>ô»ù|~*÷pæÌ°ÃK t»]´Z­¡×4é~&½Œ*ßi•‡0?qâÄÜÝ^Ïš|³_\A“ï4ι˜ø`ýÎ{6X§ÄÖÖ–½òAP~œååeìïï»ö¯¬¬„îöÝiQ(JhommÙ+3“^s÷ÄŒÏ$òÇþi&ã÷ÜsÏÜÝ3a2ž5ùf¾X,šþ&ßi¼f2X§ÿžFåD¯×ë%}YåÂ… ØßßÇòò2Ž] Ò€“¾±±|>o'5ooo#—Ë…îöÝiAç¤UªQî'ŽýLº˜D¾ãØ? Âd|Œ0ŸGy`΋¦¿‡]S Ößé¿§Q`ƒuʈ%Óýò$ºÝ®½äݶ/Êþi0é5¥ñž˜ñ™D¾ãØ? ¦9&Y¾ç0ŸGy`΋¦¿‡íOãý0“Áú;ý÷6X†a†a†a˜TÂ9¬ Ã0 Ã0 Ã0L*aƒ•a†a†a†I%l°2 Ã0 Ã0 Ã0©„ V†a†a†a&•°ÁÊ0 Ã0 Ã0 ä6X†a†a†a˜TÂ++­V Ýn7éË`˜©Á2Îd–o&˰|3Y&ËòÍ++vÓa†É",ãL–aùf² Ë7“e²,ßl°2 Ã0 Ã0 Ã0©„ ÖB®úN§ƒýý}hšfïk·ÛØÝÝÚÊ»ÛíBÓ4hš6“°N§“éÆMR2ž”|,ã‹ëp&˰þf² ëïùæEI_À"±±±ÕÕUhš†B¡€V«…ååeäóy´Z-är9lmma}}›››±Ÿ{ss/^D¡P°å¥K—Ïç§r¿ívPËËËS¾Lò$%ãIÈ7À2¾h°g² ëo&˰þžoØÃ:cZ­>øÁb{{›››hµZ€Ë—/ãÒ¥KPej+</^Äöö6¶··ñÁ~7Þx#ö÷÷§r.q œ?~jÏ“IIÉø,å`_TX‡3Y†õ7“eXÏ/ìa1+++Èår€B¡PÅÞòäÉÐÁB¡ A¬®®®Ö¬®®ÚçÌår8yòäTÎÓn·qñâEœ9s&3…‰Î$2>ò °Œ/2IÉ7ígÎLÖßL–á9øüÂV&v¶¶¶pã7âÊ•+™ˆ›g/,ãL–aùf² Ë7“e²*ßìa3òù|ì±õqŸguu›››xûÛߎ­­­L­ð0Óeä`gÆcVò=é¹X¾™q`ýÍdyñ¬Ê7¬sF»ÝÆÖÖVàþÍÍM;ä ©ó(Š‚\.‡óçÏ£Z­f&á›™>ó ßË83³’ïIÏÅòÍŒëo&ë̃ŒgU¾Ù`3òù<î»ï¾Ðýi9 ’ .àòå˳XÌÜ1Oò °Œ3£1+ùŽë\,ßÌ(°þf²Î<ÉxÖä› Ö9#—ËÍd¥$®óœ?gÏžÅÖÖÖÌBá˜ùeÞä`g¢3+ùŽó\,ßLTX3YgÞd> endobj 2 0 obj << /Length 3 0 R /Filter /FlateDecode >> stream xœÌ½M.9’¥·Ï_q—Ò"ßq~“ -ÔÀTC£tkª-³š©®–º²¡©j@ýzÙ!¤óˆÈŠ›Å@– é›/ÃN>n4;nüï?¸o—ü?âÿ”æ¿ýן~¸^Χþëþ—üüßÈ/×ä_jÿ ÿ™jL囋ñ[ÈýZùý§\ˆMþñ§ñw9'ÿ¾ö?PôÏ?üŸ?ü«ÔþGùï¿ï¿í?þó¾ýÓÿÇ_Wãßo>$iˆ¾ÿëOý_%£'RØÿñÏ?üÕ7–á“ö iüá?ÿù£ÿöƒÿö¤¾ÿ[ŠrL5„oÿïîúö߬ûw‡ê‘nü´;v¢ã ÿí«ŸÝ2?…W×u¹5!¸ør9ç”åŠ{dù§ãM .÷Aç_Ê+çÖZuå+Æ`v’üÝï7^~ôî•ä‚o¿ÿéÛþŸœŒüÿüí¿|ûýøáßÿ¾wýgëòWm¯«&W“<ÎOÔåuZ» Gžæ †ÃkW÷&ï rIþr¯Zž•Þ?Ÿ«ØåúzÖ{ÿz°Ú^Ù?ë½>Xñå_Ïjûçgíõ ^jÌéätê®éÄ•N¨xM'ª÷ètêÕ®éÄõN½â{:qµ4~þâšÜºúŠ—¢ü­]\¯åu1ìn¦öåÁ•ø*/¹šCº\z¾;ü·õ’úEµ\UÞB±¶ìÞzC­Zü/ª¥ÊBÎ¥âŸÃµë¿¨Ž\Üë*Q,¥V?¨$R%Ý–¨¯XÛ?^¯«ÿOüã®PµDÿ…»ÂõŠ5zYCþ›ðêåí/Tx4$d™u?¦Ñ–¿ÿÍoÿá÷h¶m>ì~Éî•‹Ï>J—c[Ñ•âZx>ÈSãÇ8ªýËÿðÓOŸ·5xÁ]ïò»ŠÉ*cC”®HwÃ% <¯h¯”ù\åá‰iÔr­}»%ö’¬(ÎèÇÚÌM BßÕT¹eÆXg÷­•—XÅN_EQ“;6ÔÖÒ7Y”9»Zdáô"¹#"Kþ›Ì“xµPäö½HnXQY­ßäæ¹J3ü…"з N¦L-¯1º¹õ"¹a’–”¿ÉÚL9×ârìErC4Cº)%U^X>Ž‹œÜ/Êý’Ì+s?'÷«·q.zŸ.ïúý0'Ýýâ“"_¥í)` ¼X­ˆÉõ«RÊ^¶”½(½Ü=LRâ’ô«´ÚKä~h…Øöh…<\ç[íE25 žI-},d·¼ü^mH®DMò{ƒäïð@dÍ¡H¶ %úQS”·Ú®Ô‹Z”‡3)Ê ß¥ˆ5ˆºJèu5iDö½®$7Ä)¡7#à —Øo˜ä†hFÈÙÞP,þÔ òç5cBxY-uÎL)’IXbêÂg¹Óœ˜2[‘1l£Dn$£PÛåPR¯„ û“àTyÝÒ—¢„¾ÆU%½îIÔ¯ŠWI}F¡¨Ž¿¬)¢H°Ûh„¬Žºfe|‡'1î'è׬Œ²ˆ¤éíê.Ï,­Y±ó|´²8êo¾HÖ†ÇPÈK EÑ…2g¥¬Tî—¤]N&ìx¶AÖF“VÈlWE,œÖKä~x­ô‹daF‘¬¯9)ƒ hÐ!/„W[“2H« cY~MJiDÅ¢é³Hžç+¯i)WÉÿMchƒ,†¡p1£È-cY¾ß¯4É„MhJä~Ò ¹&õöù%˜y5y ²m”‹¼Ü[š>&LÅÖ¬ô2Ìø÷h»,޼f¥¿mØ!ò²‚02í3ÖΠøºé+Í›ÀÞøàÐú²W‘Øa˜ŒCì³Å8`…ç{91˜K. pƒ R˜,`Ë á†cU3Û&%p“ÎaÈÆ´%3û4ëëjR•ŒäÖ1öCÝXßÌ`Ô./y¬ñ^–ÀÞãmäüƒÀg™sÇ,så¦a°®Ë0ØeéÖXs–Á²:d(üÖ‚™©ÁòÆ.÷êY–«Ò5FÎ8döM8"°,é¥ël± ç¶,€c‘ÊÇd¶–éQbév8Ê^K–ÀÝdC`*ÒfØ… %†iMh£ƒ2_ï#›"&p1ÜïqK`…t‹`Y]²Ôz ÁRâä~Í?e˜å™û”ß °¾H8ÊüLy €ù˯Ãß&–ò=ý-€sc(-€…V2µÚxŠ à€î9É–—&ÙD0½K>…`·\¿…oþ[ø…åø,“SV°`QìÕ{ª‚±›o àA`yùʲe„`ÙÅûµÁk5ß„cË›¹ÊU÷Œ&ËÓªËàfë)˜ÞtL`Ùv 0ã*"0™L`y›ËÕóA–«Üz‹e2 ©°÷°–­R…õ>A&ë…eÞ®ñcËZµos’Ì7$˺ŠÒô0Ö=XVÅ‹jìˆÀL "pB³Ûœ›ÀlI‚eð#^y£i„`6 ‰Á L¾û•G–º ¶é a!a1Æ‹ LC˜g‹1ƒ#î0¶Æ ÞV«µ‚'(,€ሿ´f˜¿lÍ2Õ팬֙åo †Áj `©DFxøc8É ÇD¶Ö-7ŽUÆû X.ªqî®™¿Ò ì(ÇŒ¸­à ÞŽIÿ×ûü~!‚K~µ|/Ckg±i6ÈØ–õtÇb g/v¡t.Œ2aY’¡8m<â0Êd;(;ÜÕ#»$ûí01 F‘Löe_i£Ì¹e^k»$&*Öÿ ´f± äÅ^n¶kã2L±»ãDc\°b'sŽQ&t÷ðŠÇ(Êï&_,‘{SÀÂûe¢‘Ü{\Í2`–ɶ)Ê(»€ˆ›”šÊ(ó°O†¥AXFˆÝÂÃ7aŠ4–mmšËèAJXÁÅ-0÷–‰¸ð«ÈŒ²ŒÅv¿ð4šûuXnÉ’¹7ïÂ.&4ÛÚ4›ík8÷¦1·OAÓÙ¶RãeêW²|î÷¬À°oÐv¤4¢e Ó¦™j”y8×rˆÔ¦™Õ( 0:ðà ¬{Ï^-YZ›Ñ$^Û2 l”• ïM_DlÛ=lóˆÙfȈÚýºÙ0ƒm[½}hŸ°‡p%´²•.Þ¡5ü«ÇˆQxP¸òVÝG¢MíÖP¬Ž¨%æ!ЈAx;ºe~:®IaH4°‰›#Ë?ou¹:ÿr\¸BO~y£?Ñ bxt„ˆÝˆ8*U òºÆ+JÞ5ÿš¤*{µ.,y~Ù+(œSĨÞd1:ï—ÒY)mUzV[ДT…ê=«-hZªÂõžÕ´-UájJUô¬½:c]¹ÚÁéÔ+\Ó‰+=:PñšNTïÑéÔ«]Ó‰ë=:zÅ÷tâj?'U‘ÜoØKäcR®ä”T…k9%U¡ZIU¨Žï’ªüŒž#Ùx7Ùd¹ø3bK‹Aþò?ΉAB# pD2æ”(ì×Ý-Ý΃Sµ\ûVce¸Š¯Åú® <6vïXYlìW€Ø´qsÌÀÞtøvȦ‹=à;¾‘.í§€XrÊ.%ð1Œ‘HN{À)(’dU¤z¿¤èB¬}ļ Ù¦˜ÈLòìWj ì/FûR3ná´ÜË`J3FR†ô奾…G ûÁ·o:ɺhÒïZ1º ™0Ê .EV ŸŠ¼Y”œ«’…Ѱ:¯Ø{uy?% )!bo#îàî”™ÆëW=ar{?"<§Ô L`20§;¬–IÓu½I`)8Á‡7ˆŒXd€S÷¾Î ¸bÜ ##xR25‚¡Õ‹~ôÁíåÚý0™Án—çƒIàÇ &aC ªîöæ2„ šé/1…)hK–w&âZ¢p~U„&Fd›)L"¦0WE&…s¸¼"x7f“8¿¤ ñ޽[gˆ.G˜‰I,æbw]…õ ²-™1‰›ÄÜ×Ê‚˜¥þAF±¬„€‡;F¼M^¾Þj, )VØxÍ7–„„ Owܰq4r¿ÅÝdk] £XÞ£²ÞüˆŠé Ë(¦æ1б.1q\| 8¾¤ÓÌ(Žp•‡[SÉ(¦`)£˜†B“˜bÊLâGeS—9lŠ8©‚û–ÃúM–ÞÕƒ0‡õë¹c˜ÞÊ!Lb]Æ­6$n9·¤©bÙËÉÔ™š)ÃaaÖb£æ°“ÛË*X¬ WyKÀ¶°,­âf$AL’*±Ìgä€@̘¦ƒ˜Ô• b20Ä:Ðo¬a²“5‡i%0‡I7Ä&É’5‡u]Æ–-¹”ä'‡ec0 eÍ`¶Õ5¬c­aý 'ƒ•òÎ2¸‡`‡hÀ2¸ä4ÜL`Æ<X ù,€Õî‡øK[&°X@Ømj€eÑÈžý$ãæmm4š·­La¹ÜˆÏf Mû5¡ûp8<üf¸ˆÑæi¤mS4¥mS4¦{}îÞF´æ´m§µeBuŸÇ°ç†…­QÝg>hjbµí†µyà„k³J Øv45³ÍLjÂ§Õ Ùͨh©S‘õ0, ªAÞªûD@çîÆO»cGj©Cõ 3eæ1?—bˆ}bÕ æ§ãM .÷Aç_Ž«A¨Ãã©§_ª¥&ïX©¥u‰ÛI5ˆ¼mèäµêMj½ZŽ<¿êέVÕ›‘Ü¢¿“ÎBh‡ï¹Ò£á{T¼Â÷TïÑð}¯v…ï¹Þ£áû^ñ¾çjªAô¬ý5H¯pM'®ôètBÅk:Q½G§S¯vM'®÷ètêßÓ‰«ýœ$ÈÆGHÞwjÇÔ \É)5×rJ BµRƒP'Ô Äêsìœ|¨ñžÕ þ—Sj džìSdÏš…dê#´%ÄôÚ“2GL~öラ’¾øºöÎ.{퀑¾d8ÙÆ-í€AR9ÔÜFÑøGVäzíå÷çŠ9hÿË.K+’áØÝ[¹J÷ª×µå“®nïKkÒ¨é÷ÌðjŒ}¢ƒ3òjë{Þœ”ï¥åWC$m}rãô5ßðé÷r,!]œ»Yøïáœ•ã¥a ·Oä,7œ{YÖÍä¬ü.Üv™›åÞÈš¦åsAûÂúZ1—ñNßÇ6¸W,FÜv¹H‘X¸ó µ\•Ç¥!ž¶‚㹎Opú&7 3ÔŸ›r¸4$îY3¾zÎòŸnî`q?Ä”G|!Ãñ­É“­n¦H)xšsr6…Æ»¬\ÊÓÒð¡[ßÅ81æ¼ÄC8v¸­‹S~ä¥A”1½ŠSná˳áÕ)paÌYÙà X(¯ýàNŠW(p`ÜsRJ.„<ǯÝà²Sî´ïÅš”²¯ñ*+A;Á¯æÏ¤——†r‚SΆµ¼½Œñ!W‰ÚÎ%Ú.“.¶ùèJÒ.pÙˆ Î7<’ò×Û.mpK•P’öS¦äIÚ>pÊWS²öSnƒ’µœ‹Šv‚YØåõ)ÓVyÁ)eC)Ú.ˆ ˜c*Uo‚§!a×÷öµYg|â”§Ó‡!ÅŒ€|8DÂè¶=Ý at KrÃvíwp“)ìä:(öF(ìd’îÐaØ]E91‰Ãªï ÄH~ð±ò@Œ1Š<쿉ñ>¸âÒ…Š¡’&Ì™@,6_󌻺fŽ‚Áqƒúk á¸A®uMÚŽåßm…$ÇÈ»´’1ŽeÍ„=Æ„ã"KkI˜ÇüÖ"ËšÙ©gnËŠ(KÆ<²Ç¥žb$#¤%Ãp¯>Bò^Œ‘Þ0³¶0‘!ûÝ$""3ý‰È$g""óýˆÈß(æ7‰d~ø©…a(ËR€m40AP–¢€ åxLeó6ÑTn˜§åŽ$•ñA3,’a[•¥H¦Ð44•¥Ä!IÖXDe¼gdõŒ Œ—¢o@/!|õ|ÝW)(K^¹CIL–’ •ûm¬™,EÎÍÅMH6¨ÞœŠC˜Ê¹É{·‹h,“CFjÁ!¯a"Ã'ç­N6@Æ<ˆãÛó³4d¥Mc cgP–œÑ˜Ålûn,‘UL1u6‹³,Õ]™ÅÛc›ù`æ‡òl1ªü²±I,›¿‘~-âðвoæ“A …ëúìžìÇ×»ãÎöì,› íkÊËìÕÆ£q·~š Œ0p›‘{&0M[&0â–ym)4Ùxº ÌÖ#˜VXŠ®60DàŠ¬ešÝ„àž&»›ÿ–ÀP/B_›nn+ËEoÊñ(ˆÀzÖ9!5€Q&d¹¯ÑÚ<r€‘?ñþ `Tîøµ5ŠÅšÂ^ñFÅ2÷aÜÜCKV1ec³˜2j±Y\d‚-ÀÌb­°`«˜Ä—lS>K¶Š¹}Æ*¾ð½{EV±ê‚Í=­bR2±UÌuÝVñ’T‡çD|ÝÚƒáæÌnM¦° N}YÂÆ+Éy†Êƒ1Ü…ÈK|ÍîÏsó‡0ÜÅqÛQ¢íâ ÍXî YºBKì¡»|‚ ã‚¤Û $ø`«³±e\º€h&Ca˸à#lÆ=É2.Ö- [Æß,Æ“a\ Ð‡ZxT§¨ì ò°Ô)µ",÷²¼ß…šË(Ã5wÆ3$Ax?Ý‘sB3Ê"æóHçBlÆ«C~kmÎ(Ëq?{yý >ä?šÊ¾CÿU…Ç÷.×5õýŸ]z⃠°7k?Žûñ“êÚ‰jÚ¬Ìüt\€%/«3?o‚îruþ希:<@¿üò£¬,8[ýÜñRß¡ËÝÞô6Ÿ:6ë´"l¯Þ…‰#']k&Ÿ]½º?é+4a½Â%âáJŠxjW²ß"ª÷¨ˆ§W»D<\ïQO¯øñpµ5ajÞ6óÏ‹ÂFkBq­gOGëR 9£¨â³Ç£õz×”âŠÏžv]nÍ)®÷sÂ0ù 1ÉšR­í˜2ÌÔrJfª9¥ ãj‰Ã¸’ê0ùá(t­bcþ±:¬huØ¿ýù\® ±-T¬!NJ.ag¼T¬;cVÖ⣃d ˜¿‹rÀ:ÿŠË1^Ó¥?Fü©L}AMQ«Ã\[óü¥Š¬°+ì OÚüþ±â š£xVÍQEÁ;¦ÍðFÍEEÁàî„:¢Çj÷9Î(ÅokwšÎ@:Rfp¾Š•¹#a”J£ÖK} Çä2T|à¬Ä8xÝÍ€ˆeÅÂ.é×ýyEÅ3F¦Ú¢Š„qóº(`G}Ëðªbd4*æÖaD¹Û)®0?#oWQ¡0÷[îäÍ]*&7„;|P,ëÉC¡tƒ‰XìdbÃ…>æ<³XV´KK#K,†ØËãyÞ Q4†ØË¹%˜$CÑuá<†Ž'C}•ÊÒ 2e©»0e W¤šñ'rS}RWò+¿R´YÌ@ÎøŽ<ð(™´Ldœ)Tçòc"G|Ý~‹¤È8‰c=1²ØЉ¦'IÆÄ<Öß0Žå5ÝÖ7ŽIþÀ8v¯­R$K ÷›–hÜ׿ÊO4f,јßrLcyâä½ Ö‹²Vèjk=,Ã*šéc#v›î'Î0&-؄ƺݩô,Œâ[[Ç0æ"‚1eÎ20V‡\2ŒIoÁ0æÁ˜N_cË¿—ÜŸYL‡¥1‹ùE0Ö÷,Ö¿Ô†}‚Æîuk;Ó“ÆIH½^iÆ0–žlÉž¡14€w"c}‰aqÒ‹0‹ucXL¹ä ‹³>á„Y\»ZãžÂÌbĆ¥¶»ÛÄâŒøèÔ-YO•q½Öd âòºvS"qé¹µ¡LbˆdVòD&1TFaÚçŒbS¤QŒ¸îú’…Yœ^+o!³˜oH0Fz·•ùñ†1¤ëuJÇÆ4@Lc®Ÿh ™Ó<~‡aÌ%šÅ¤&›ö‹ù*Åb–Œñ”ÊJ3E0æöiC—,o·ùÆÒ0æÍ!Á·\÷Ì##U¦ÚM:b ìµ2\ŠMM Å TÛÚXhË]f³å@(Fò²’ÊyÇK,Æ6oÔûݨYÜú¨L1±uáe;ÞÃÆ$ì<(#÷oØâ†ÆÂß™ìä< 㞸l©‚Æ¥î…ia\ð¨'˜ƈ3m9,ø ‰@ž‚‚qAޏðH0F¤’ô€1|«m ÉÁØõì8iêãˆÆ]Q„³w35Ž!Õ‰x`#õ ñ¸+ŠöŽ–˜Œ24ä–_”ûÉ^û¼WMe£ð!,wYÍÌuFTF‰ÛÙÉË]p³%Äe#Ž!0w1ÎVy™eQkÄHŒ3ØÜ¼–OŠàŒ2ᛌll–Î(‹35¬Á3Ê”º-=íúÉT~žÛEˆîe×­¢4ŒFÙ:Í@Zè"¹>\Ô”îE;!aeu§0#NÛë4©×­CÎÏ5šÕ(Êp€6Ñeµ)%$\÷²}ê"ñº—afa¦6Š"FsçˆØ½Øb ¡!Ûô@#»÷`§^#fÛÞihÛÞijÛÞil›ÞݱêÖ§u$bi.)æÜ-$Á?Ëx9¤£J’·ë?øISK²zw¤žž‹¿"™Õ5“KØßŽK9 †ê 1<ëjÿöнCÏ?W”pŸï&ÐOß“W¨ŒÏ ñ"®‡E$pÈÀ]ÀܯGD¢îbÄ‘G8±p|áRR?Ž(ÎÓ§NV¹ÿ¦ÚÑÿ¤´$\óáÐÒjSóáØÚzSñIA‰žÁ®ÉË^ÌŽ|X½Ê=±¸Ú³k~ÞðFÍg'V¯xO,®ùìÄAð{bqÅŸS•È–ªŸ,ÖsLÇT%¦–SªSÍ)U WsHU•œP•áyÙ¿Öúsª’¨5%9¦)F–žó9§þé6 ÷»(2³Êö*-[I/ò}˜÷•É{.ÛÇ6A É}.&a¸ò}D»dG—ûÎ Ge_YXµ>F–ÂØ?‡vå¾¶<{Ãý%…¬-QQDTR—l¯£” #’t£Âˆhr™!)cyINi·³iº×š)‹=wBGÇѤ¬ôÄÏý½Ãê×ó\,Ÿ%i(R)mO:}4ìLûÉôŒ=WÁUÜ»ß!æ«>¸N) LOé´¼–”LÁ!¦´ÏYzòmøv¦<À!ú|r0© @yÓáºÊ3i‡”iw:´ƒSDÚevu7©K3˜å\?Vž¹¬7“)BʺG]fžõ=½\Š”>kùÌ)“›ÍI‹²íÙs˜ÃÛ©î±?šÊE‰®¶’ŸHY|¹9c)„ë§>ÏùÊ禹žNXå$òePí°¯WI‰v„Öu_Þš®¤Êqð9nß:}îð¡¨ÎL„/MÃ5*“u“ÖlÕI!œƒ/DMV}UÕþuõ@ÅAɉԧòÎ4c®"P p»ïÙ¢NO”0 fßZ¡üDêw×És¶"¨‰ƒ¡†¦!ÖW˜³•O‚Ç"žê‹py‹_ô5¶L’;»!)É=Yù£p)+ýð>Yù(¡H¢ÓN)PÜ,éÚJSdÀ]ý“×ðûâ{ꑟÈðç¬ï”n†× ¹ø÷üa^ËÚ‹×:õy- Ô)¢Ø=oH2œŒläçðº=šÙ`NÚI…Ú€UÊëĦv?ÐkgÆal.Væ689Ã#Gƒ` jipËÚ–U¤VƒB7`íw²†·ÉïÄôFÚïs—ߨâJ+âÈü–¡Äá SöÀwý3è±b‚£[*×;#Ü(n†÷5&νXâˆûê§K›)Žñ]d°ïƒï—H’9Ž`z„–aö^ƒ¼ŸÚÖ–Ž“QÞÍú¼D‡Ìrʸ5&ÄrNüÂ0ïÇ ]ëÜ1¦9fXv+Ã8G¡sB tAlZaCôöºsÜ“"€qŽ´ˆÐ“Üë‚pÎ9 眽†qŽ?Ô¹#šÊ8g™ã†^šC5Ä8gmãœT_Œsù2Î9 â£Cg¦úç ŠÅšâi¿SD縄A{l}ÌnY¡A{?@YeŠ{¢]­Î5hHºM)F;gIb´Ü am í2úuk• Úå±å­y2h"º}©A;rÞÅ¥j1h¿@ï•làýÊý íÅBû…ù±”w†ígúÍ}vH(GT©>ÑÐÙж7È.eÓ R"û…Ðe¸7c]„Xå4—É.oˆ>OûÄØå”ÏØå”,uÚå85Ç/[˜ÌrS¤­r:RÍXåbøµ­ée«œª2V¹i:™åú+c•ó&’¬rÓ2Ë!Ô›çø«œòý«ÜГ(N)b Å•êÅp¯á¥w6—µ­^z‹ã”°ÐrÜC—17ŠÄqR;=8¾• Œ«·ãPÉ!~½q$?ñó]j1Þò:ö×`Ü´r¨¡VÄæÔ¿XŒcä~îåÓ.ãÚð7)^º~t³˜(Žìn˜c(ŽL?c«œž/HK¸>“1ßiõ,Ñ”¦"›áýN!†gìöž‹.W¶-v5 ×2Cpdj Øß"8ÇÄ Á¥0ÄõÁ€!xÆÈÁ›ü“àR]žÇš1Â34Ü×T$„gä¹[ë“.Exü~¶†.…ž£û92Äù`3c›Ë•©Ö½å&ÛÜÞ–lsÜèºö¶Í‘q vòP›Û<#òJèelsœÛ†§Ý•‘mÎÇÛbì¹ç[‘ls>>ÉØæ|L“±Íùœ&6ÎeÚ:WÇvÓœË2¦9. Û/JP7£1ÕÍq` ö~¦œOD“Ý¥&­[j6F; µGU³ýYF*:PŽéÞÛs¥í=Ñ|··%À£°àÉIMxæxçÖf£(••1œß ¡IºgVÏÈo}¯TF=J~ú!ÈÿúSÿ—oýÙIéø×!¡Ìµ eÝýøIõíD=òhº»ú»cæ§ÔƒhýèÄ3Mn0•²\q-ÿt¼ Ôå1ìæ§º“|Á(ÜOžz£ ?.sH`èK‘ÕŒ¬s—d–7¢r¿P#㯞 5¹þÞøD]Ç52kÍn:y„7N¯YÝŸ¼uSga}ªîÉhkÖeçšm“ÿÉ{ÜÙ&PÙÁ&<‡=öðÁ°‡óî§ôµ)|nªõ ¹Ï\ó×ôÙL5jÂLµÞ„ç°Ÿ›j½Â†ýÍ©öóoX¼¹úÚß¡øô"‰±YŘþŠh¸’Œ‰lƒl¨?¨$ý¢J¢ì`ÒfÙ3þ-…:\‹lÿeœe#Ü>”ý²¾\Ñu mv×#Eç»j žð ù'|û»üOLû•hg7Eÿ…“á+Öèe796N–e%û“GÖÌCF—4ó÷¿ùí?üãï Òè>$ŸÍã Y|á„]z‘¿-µ=ëÕÇŠý·?þá\âø’Ö¹î¡ìƒÎ…úU%îéÂ>§Ž|Ý}Ë*fÖúkɵÓà€­\ß÷#³ÈÈ!¯–K¶§àXƒ¦>e‘²þ“JÒ?_ßÄüé¨×î(‹ÇC×^¤’t£h{%üµûœ{]H*xu‹Öãœyx¤Šeë‹$”Ý]%ÿ()ºÇ+QÊ›=ΙO÷§hÉ%ï÷ùí£·Þ«ÄÜÂ>8²VzYPǶ‡8…t=÷"•˜»©A #1÷…Á‚»þè¶wEjüp´ü%sÕÈÊÝ}ЍL]•TRn\¥›ŸÔ¡í(KR&³¶!'·©%«#Û{‘Ò¬RrKÛõ’Ÿö‘í(+¸°À/à‹ÊÈm/+*#7ÊTKŠ:±Eê%ä«JÈ2õLª:³E˜gé·¬*!·-kûÌví•ã›JÇéÞs‹2_”8hoõ:\®;’œŒ^/Úk4øv#õ«ö”ˆ}­¹Õx`~·=Gƒ~2 €ßs”/S™¸QTsÏæ‰"œ)áp±GÿIåßî}݃.ûáí32• ›9¾8»Z3C‚ʾm¯ªÝ_tOM¢8þX= cB‚oìß¼Žt«–¾òÊÚ‹‡)ì/â)c¸¼n~¹‡]Öd;šŸ ä.Ø\ë˜x,Ej1ƒÈAOkæq|b8hú0…½ša ad/ÛWiÓ›Šaì‚ù–d?Ñ˦6[¿zNë7“aLÖ/iãWWeHL–‘A±Þáã—V›µ~umÖ VÏÊ 8= L{&ƒàüD¯ÞjL³wOÄ“BMÞ(Š5Ç yãÅL$ò¶FF±okàB_¥mZMÞ!¹"à6âñ¶{ˆ·N) Xñ–K·¦HÓ¶ëf÷‚$Ür ·­¼gîr]„[ Æm{ß÷Ð>ð=ô`c¶™ˆ²1B”5µnù–„[ˆAÛö¤l{Úµ¹Etm eÂkWº­ÉAt5E®\¤á/cÃj¸šË4[¹H³µÑkšk.ÒŒåñ`ÈÚ2MÙö.eùq3f[&«0kË4gMq–[Bœ5E³æŽÄ[[¦yK3¹ÛèuÍþ´8®ú/KyÊCð¯ îéŠSÞªýH´¨M•ÄîÛ‰zbŒþ3?W†¤÷É7÷ØòOÇ›@]Ãn~:.N¡.ßOžúqJ§6"Èõ)ÁÈwˆS‚¼ñ6’w2ÞýzÄ){Ín:y„Ù+6Á¨þäžØ£¿™ÎÂÈ*¨æ/P ´‡8…šðŠ6Å)\óÑa·Šªùü°ë)}ídÎMµ^!÷™kþš>›©FMø‚©Ö›ðösS­WøÁ°Ÿ8E¶'^6j]?LœÂ•§p%§Ä)\Ë)q ×ò]┟Qà›©¾ÊI É?V8­þøËÿ8§þÀž_©?v¸FŒ½K;À=E"|;ÊCÄx©cÛ9L:µ}¨"¼ÛÅXõ&»ícTŽÓèÉ^úOäøÖ~Žèµã»jŸZ êˆvvSG$¿}Œäæpsl'#yibT'´³'6Fö|ëEr}g}U"Ï7‘ç›Ü·§Èk/£.ËJÍÂŽÀ˜ÕñìÝE«|OO¬ÜÊc‹Q‚èIQÈ®ÝݱLøöåÅJ.p-͈ÕAÔk'V#Qî¬ØŒ\u¸‘ œ4±ø¾,]ä×ê‚t‘\« ÒEp*rÚ®… ɈêXrêTv)ÃO^ƹÆíäþcOwd—\…Û'{¯ä¡$È~V)ì“ØmӂѨg•¢:‰eêŽÑ¸ÀÕ˜GÖ৤Ž_· Hä§Hêôu{Yý}.™O)@À±½ à@Qn°¿Îàò.€-C±£¸ƒ˜äÌa’†0·v4æÌa’0‡Ç’%ü:¯«%úŽj5uYÆÇÔ…àcã?½‡.2zU=A—dÌÜžkŸY‹øïYB­s LZÖ32i¹þò”y0iÍ­ªÑy¨ÙRÌCÏ–ftª¶fdê!7–yÐz$Ò"¡Í†ÖeåËˤ¥viY‰Â¨e£¶Ë4µÔ#F-É-µ\ŒîCuH“vÔ¬Úc ñÕi¾ŽE#òР$¾Rƒ’Ñxè7ó•"ä Æƒö*¶€uI/>櫎32^½ù2^½~q;WGÓŒ™«#ÆÊ¥À½1wù2mîRà̘»Ç2ö.½8Œ¹û´r«þk²rß0n+YmlÝš22oõœ1Ö-ïOغmï[·ôdÙ¸¥h¨1nI“nŒÛF¤Æm{߸­TDÆ-)£ŒqËelÜ6â*·Üp6nqÐ2WÉŒyÛÈ(&ó¶½kÞ6 $6o¹lÞRûÛèÁ†n}¸í#êêZÈÀ}îmOs¶=ÍØª'¨á-—1pë»Àe¨ÞnÎ~†¯ŸWr_Í QÃWü‡ZÄצßÌצ5…Ì×F+‡øj®"YGÕu_ïð‰¯š¯R¢èBxmìÎ ¼6æñÕ\G åê4hY¾ÁÄå±"àšÊ¸\×ÔFÀíjm+ôî"Òr-Úq'â+?‰r+:èî•¥d·’Ä£¼kÒŽŠ «üׄUSDÂ]DPÅŠÐÖ*é:ž®î#ÁÔ‘¢ƒê'˜r±tÔO5•Ké™0KÍeš¥\?1Õ\Epµe¤ã0e$äà2¢,7…(KK‡)kî˜ ÿ´#»Æ,uJ)ð¯8EéI!Ç[µŸˆ¬Ì~ü¤úv¤ž:ô ÝÃ::f~:®¢(é!ä0?ouy »ù鸃º|?ùô‹…8G\ÞšROCç¨CÞ)ù˜R+=ÉÆ¯Fȱ×ì¦Ã‘GX½bÃÁ5«ú“÷¤8 #]çšÏG×Ñ]§&|At½7á9ìç¢ë½Â†ý „zJ‰£WÈ}æš¿¦ÏfªQ¾`ªõ&<‡ýÜTë~0ìß'ä8N´†¾k;&äàJ 9¸’SB®å”ƒk9!äˆØCVŸG:Ì…²S2Ž?ÿË)G†‹cT«=-UOIH9x¯ã\HV·RF0uïesÖ¾¯5gðî‰Jr¦¨bï2ï®à ‰*r¡h"é4r©ûüd„LÕ U 'Ãr¥¸¢ŽØÊ²Þ©bQ¤[Ò(°è•"·´O-]Émõ—ô®Ž9%ûGêûa”‹‹t,ý`‰9)ù‹ØâLdQM½âLhQ9µ ÜkZrœªxRqhÇaÿcÎÏîG-ž½Ûø ÞŽ5Ù]ùµµ#¡rk“®Drk›2vk+O¿Ì íÞÖ’Ƚ­£Oò×Ê»­¿(+‰¼ÜE·>“·›>Ó-™ÜÞ}*™ÜÞ\VÈí­US¥Pœ‘‹ÈíM=«ÞJ”N©8 •ø´wœªF&pÒ2:&pLðF­U`‡x}»Åä ZÏÁä „""o ‚2xwÉ!nÀKŸm3y½Ž|2x½Î.DöZúÂü•"Õ1¯×/Ưz.3~=I©˜¿R¦¯#þz-kaþzÏ—uþzÊÂö:úÂüõð×kEŠÁ/¡’é« _Ç€$ø:Ši}íe¤ì¨º!„cÇì$;b'áØÑTd;Š{0ŽÅ†ÙS‘qìH!ÊŒa¶\Ù&Ë•má ë`[˜­S¶…=ÛÉd ó¶ÀÃD\²†u ¦i ?Lö°ÕÒ³&°^±Ö&3ךÀ{ÉZ X[¹&™’F1˜nh,a2mE¬‹ŒÐCÝ |=í`º›Áo˜¿üÂ`û·’iLêÍ2¶iƒg `˜ÞNù}Ù‡±¹ýlÓF‚ `z©UoõT… l§Szq£×£LÀl±iàBª¡ZÅ[Á•&‘Æmtå=ÜFÊ^ªqïìw“š»ñ鄈š·‘’ºn#)R »‘Õ]„ÝH뀰ibv#©4~#ëÜ¿‘¿k!þÆ.$îFNDÜœˆ¸)}a7Rþ*¢n¤l;ƒºi3WÑ7R®!¢¯V-úFúö„à‹¢÷àéC‚o¼SúÌݲ'EHyÇý9&Ñ7ÒÇDßHß—}#ŽAøEÙ{þˆÈßEˆãõ0|m5Ä‘>ÈÐ F‰zXĶ~ â^¦`«8é›â0ŠÔ i £H·ŠÂ¦ñšÂñòö:þzOˆOËB*´ui»~Í`hÿçÄ¢ü¤0äÍúO„jfG~Ò½;QQÃ|`iˆùé¸.C&‹•†˜ŸÎ7AwyŒ»ùé¸4„º|7úiHϤòZøÜ¡0ß! É8Ïù çœ}ê°›ÓÒ½j7Žr­ |xÕꥯ‡ô 9tÌ5ŸØ×.b¥ˆ=5á "ö½ Ïa?±ï~0ì_ Q“Z^å_¡5r¯¹ê/êµ™mÔ†¯8ð¨·á9òO<ê5~0òß§‘ŸpJjM©ÖvL"bj9¤1µœ‰˜jN©DL5'd"òÿ£àd³XÂÏÉDªÎ÷ño>—¢Üâm;°ñ¢Ó^Ôv©Æ¨…"zoVcÑB½[®éÒBŠÖW™?Ê'Þ÷0©h_¸v.×|)_8>Qq.òvŠS4 N9Åu4­–Ë8Åw³q$òvDjO¶lè´oœ¤µ^Ú7N!¿Z£vŽsp òöŽë]jm—ö’÷_";Çõö„“Ë;N.ð¦CÒÃ5 3UyÅ)Ø®¢½â¤Áhø\¹·Î^m§8ù©dÞ¿’ù˜­á´ã5MQÔßAR¯yu†ðø°¯…Kþ¨?Fi!êÏI©ÐpºñvŠWÝ•xqzk}YŒJbŠŠöŠëp`K—öŠóe)j·8yÑÎ6ÞBú`·õÛA^úOQ;Æ)²ØrÑrítk8ÑX'ŸQ#X¢rë`»TÁ®u³q–ñvk·nÃIÆÛ?NR(ù¿”ášf^»´ƒ\G𛬠åמ¾†,É{RRmã½O>ÃS ^6牼YO_&o"W-£7iñ£7V*ÒäºïLàx1g‚ƒŽJ3Ý´ÃŒ»Z¸ÀæœT `>”ìõºdþzê™æ¯Ïú h±'O0ƒ˜4#Ìa¯„y̲æ±×®DƱgê=Js™%#Lä›¶šÄ$€d³xƒ‰L*M&²£Ùé8™ï¨ìˆþÄã®Ãd;½4ÃÎ Zc˜ÕŒa§…rŒcÒe0ƒpì˜Ô ÇNkvÇ|ј¯";šåLc—vïC˜•LaÒ' ³.á 0„A\·-û°€„@õ:`¶ Ù¦5Â$öôÖg{Ò,ŠYåÌ,ö:ù³˜×=ÃØëÏ’ÆÌCcHSúCc÷¾9lʈÇN¿MÇDAƒc¯—ãXfp¬_®†Æz´4Ö &+£Ø€Ù0ø]«ØS+†±×y˜Å^§qbóArÌb¾#±˜§ Áx¼BÂæC4f%"ã˜sœ1ŽùµD4ödÅ“¤ƒyìõþ×àXGÙÇí]ëØ_dk{Þ_ùi1ŽiëEXæ‡ÅX櫈Êõi{T–» cëõˆË‘r|–#k]ˆË¬1 ,G:•¨)ù A]ò5l(³h˜l"ëeÙ&(G:˜‡ Ì1~b²)ÒHŽœæ’lz­‰y¥‰Ì2"r¤<“d#l "í!9^lW-'IÅÜ6B1+<’m£4’yL ÉfL ɦÉ‘R}’#ëJD6 ‰ß÷W˜j2›j0›«4—Íph,›~i,Û¢H&vÑb’dilZ­hlHÓØ>_cÓ4MãÇeE‹H⻾ 3FšÇFDD@æ®èÝéÓ2w¥¥ãèÓkèH¼Ÿ1Ûþ'…$o·àLL'-)Éîà‘šÙ?”xÍÊíoÇ•ÂÚ!šð©ÕÕþí ¡{}¾ùí¸ „{=Á¿}¤¤È ìQ"dÍ:«()H¡äÄ,­ á©_¢D/ߊ#OqÒáüò¥>¥~”HœÇÉœ¬’ãͦî¯õ§‡°„ñ%±þ4¥%¦î³ƒ>ü¯P—èéÏåÌ)_G']¯Òô›ëþ¢~ÛIGøŠI×ñÆàœt3Y»ƒÿ}“œ´ÆQ ILL-‡$&¦–SSÍ)‰‰©æ„ĤxÈä«ìtkýY‰ LþrL`"$-=lN]öNŸÂ»KfN–ýY 8öÞ×d—Ð6J Ë{ɶÝ%ûÀÜwtnøa·‹AÊbO+4É&$e$4_/óµ¥ü„ó ¤­õþ¢QQI IZ¢?;pW#mIæ e÷‡$°cÍÓá RVzØñ¢ï“ŒO%/¹#Y‰öìI™ÜK®n„¹úoNëIîŸHPrÿVzž×,m5Ÿ‹1½î¡ sìœ'% E¤¤$ô¼ òÇ1ûfóžHaìI_Ct#9ûöHIYwœóǩҕ«§}í3O2¹Ûšx÷]"Å/½r°9—(~IB)Œ/·&.¼Vn),=ûë=ót¸ÁI8§¶naÖALíö—"¹åž‰ŽÆ°P8“bRù,.,ýtß{&^tWxDöL¼xœ+9ÑÍ]ká“ hààY“’>I“²Hßaêak…?ÄÔJËú¿c¶’èHÊdÕ®Ùú¸¤&ú3My3_œ‚›Ê°›³–TCRVú1¿cÒšë¼³IåO‰MÂòÚÙ•> œ© ãDYœxñ1Œ“öÑG:õÊÐ8ÒÄ'*‡ÊEšÎ”»ÊÀ9Їò†Î”ÅÄÀ9xz£œŸJÏtæ0¯Á´§n0¦=#_ãÚ—'®GÃ×,E1¼ö‘GܦÓ* ¶=½ßÛÞà°í=uh`›#ÝÛvìˆÛžN0÷L°d(z€7¾øÝx8³¡èá$€ëOÛ-À+]VŒE4ó›gã»D¿ UX$…¸_8g‰^©Lo¬0¾9Ɉá·ÓGÌ|;-/4ø&ÕŠ¡·1²߯"a~SÎ ÃoÎÒ¶ÎÌÚè LÅ ¼mñò“àù‚3O‰à1<ÁHÄiÈøfnF›ävÜžÎè3äöÁ˜ÎV¥¢£Õ„n–Ïv{c”0»-\F·1u˜Ý]Ȧ6-jcrs™Þ<›Þí”sSØoXà\“üÒ†w5&ûҋ„ oî-à\Æüfd2¾«a»á7=뿚ßd~›w¼ÑëÎcÏgc˜ùÝ>²¿ù¡1ÀÍû’Άñ»¾o~»ú¶Ùfl×°MÐ0Ô®éDmšc†ÚæÂ[eºWÄA‹ñ~¨Ó÷,µYÂÔæTLmÎHÀôFÞ5,o““€éÝO"Ñ…U‹YÝTãî݉Jj6‘‰Þä²ôFỆwã5KìnÙÝ× ±»5¶e4¼ïÖØ’%z·7¨ÝêûÔn¼‰Ú¶"Âv«Üzâvko8NLU„ïfÙ®ùm/Ôü¦sR ¿›®Äo[¦ñ͇$1¾1£iP4¿j€Ói5†ßÍÒðƯ68Ÿ‡dþ(ÔoõInÛ~"wc{^“Ûir›F0ÂÛèæ³l »m!Á››Áì6ŒáÝÊÓäæß>/z)é[.H…¢ïÿúSÿWNT)ÿ:$xy£ö#¦»?©¾¨GVIWsøêgÇÌOy6}¦ ý›Ì9ƒî±åŸŽ7º<†êšÞ£2êòýäé§·šð³Q+‡X1B ÏzǯÑ‹ü_þQïüù`Å1½žÕöÏOÛk ÏM§^ášN\éÑéÔ+žÓ‰ë=9: Öt¢zÏN§^ñ=¸ZšN?ÿµš«¯xFüòRKõå©Jø~I WryC`ë…¬”C­Õ"]Ãâªbîå ÇçjA®Ô«ˆ],”j ¿¬ä8Oå-±Èª%R-ÿî7îü9²üý?©:¼^WÿŸ/ß~T˜Ýýî’ík¬Qzˆ géŸ'_àŸ·-‘É“¾ý˜Fcþþ7¿ý‡üÝ£A6ø»ArEæâ³ Í û!±t°IyTëTB–ÿúÇ?œKÈâŠ:>ë»&iO8ŸÞU•5g™¶ReÀcßpÕ‘6÷ÃéÎNõûÐØ7\Uî9ý28qõ“Ðù±¥j®;ŒÄÒ’—Bœ]ìÒFÎÜ~¤ÄÇW*±ïQ¦’æ¦þÍ}-yØÖþRç'£,c ºš{™J››ä‘»,jî:^¯ò£(Jó"z)E`+¼H©±MVœl[e#.¢ßR¤JSà}@ÙH ;^€ˆ98X–W/C°M‰²F™¼­Jq£)~¡œd7׿dô½HeÐÅa¸Õ‡®>Æ>¨3”QÖj ³™AåÐEY–ÉâGæsÌ:|ütåâFY’gêÇhF×}E²>R/ >·«‡i<z^#fŠF*݆dŒã["”É-÷„E´8@{™J°kºÐ?Ož§~÷Q‘ñéíB™Ê°Û»×£Å}ô Ó[dÛ‰Åóøo;,A%Ù•Ùø*—,:L”Õî+º§,ß3ަ;y´©ô‰pL^C̆V^Ú¥…·-5å{yd‹½^…ô×Ä–é,1įxúžÜò"½rË€¹àpTÜô"nËÓðB¯9Õ™ÛRy’wôh s[.ÕâÍ_Ím[¦¹ íÌüÆÍrÕùk­râ¶¼¬®„”®ãžšÛx_{y…ÜÔ#nãSšÎ¼ðÀ¶©°à¿sv1¶ñ2kYªoÆvPTw?ÃmÄ„n±¯:wÀâiøÒ-¸ñ†ªÉØ>Á›Ú\É7¸MãÜÜ·\wµ\ïÙeÀÍo~7ß“Á-× ½ºa°-W5±3î¹ÅØÖW´%ÁÐæ6´ù¹0´Åp&¹Éz m Ð{½Álº¥a6âÀò„‡÷™­ºÆÄF]Ajkã%ÀÄ–Á’÷QsÕ»IgdRy» Ì£˜IãÝÁÀn;âð6ÔÄ%ù¾j ¯å2ô亙L¼–²œkëÞðš&Éà5ÒI9 !»™xn¼q,³áG %Í1[nÒ¼¼øÓ“Ù¥+¹Ãmp2³KÏÂ&¼¸y®™-Õ%ÙìÝ(afKÙš£–ٲĪLùû!0³¥LÀ;žˆavéaæì§=­™]ÇCÏè93»x±èúûÊ=˜ëdþ–ú†­ƒ&„i·¥ª‘mj#dä‘Éwㄘ²+.[˜]ª®Òía¯³ vÕ~Z.Ìl[¦™-e2QÖ«…˜mzN̶ešÙ¦ƒÙø±ì—1¸ÒŒh0³m™f6:Ö'^z2Û 1» h½v€LíÒ?þY¯v¢v6]ÈÇô"n›GÜ6mÑÜFÑ•rº7•Äm4³¬W;a[ç°Ü¦ËÛö2…m(-J|ÛÔ.=AIJРܸîªË²'póäbrÛ2MnK&7%£›Ÿ£Û–itó|JêòIMFwîÙ:î®ZtCÿáe¶ß&µ&wË‚Yq¹Ï"F·”µkÙ£Œn) qDŒnùRÇ2~ [Êbod&·åºÞÈLns™7æ½ØáÓ‹@àn²•Wî Yâ6\ÅD¹wéÄm”É/é¶*‰Û8hgxÖ Ú¨)õ1~BÛÜŽ Ý°AvŽmC[ʤïµÝöA[ÊRªn2‘ -e²Òc¹žÌÆ-ë'Å2—µkºÃ˜Ù¨N^‹ËšÙ(“É=7BƒÙ¦}Äl)“i\拃˜mË4³Í=‰Ù¦ÄlsObvO9’ýä+1ÛÜ“˜ëds1&ÛÔ¦‰m*#b£¬É O]#[WÆÀÆÒºÖΉݔ”Æ›ÂÄæ†0±1E¼¬Ðö±ùé0±m™&6Æ« wÞ"6%Û´“ˆm¯ÓÄ6eÑÛ[}ZG!Óü îxþ5"×(=©£y«ö#A°6»o'ê‰yÈEºÂktÌüt\Ä’BÜÇìÜcË?ouy ;ÿt^GC]¾Ÿ<ýô]:šp!áŸìo]nŸ·|‡Ž&È ï)yE =Ù¯FG³Wìfј½"ÃA½ŽêOîÉBúËé,Š”ð*=+|hJGÃõ>4­£¡z ÚÖÑpµu4zÚ^›³ç¦S¯pM'®ôètêÏéÄõžœNk:Q½g§S¯øžN\íçt4²ÿð2<]šLGÕœÒÑP-Çt4\Ë) ×ò]:šŸª¤Ìƒ·E6œñ3B•>(TŸ$,:rV÷ïƒTı:#)øéZ[鯀LÏÓ»ö´p´ênA¾™Z™y—>¥ŸÈ¼v³=yêò¨Egô);‚3£•<…ô"ÑkÇ >_½ÖÖ-ª¯YQt•µ‹ŒAîl¤$R›íGBão·R„_cGŽÊK6á£kQî ùFLa–°2ŧîÀJŒ¤LɯËËC¾[‘Œ2eéI”"Ë<ÜΚˆÃ§wÈ(a[Êð•Ŭ¾ 3J‹˜Õ Ïýž²Ýžã‹Ä;fÄÍ/F–²DO(3²%ž‰eÊR”¼$V#KYS eF–â}™.ùX,åòuƹc3²yõöÄfd)òûôÒÆfd)ª,]F–’ ’.#K¡²÷e)ɱ,¥ä0¿ÉYŠ— ÍÈ.—œ:ëÙ(O’Wg=÷²¹ºµ;J•²e"X&+˜hä%ˆ³+Y N=‘)4\’Xþóë¥Çeï«RRTÇ>ÛA‰V•²Õ%d¨ 'Né’WôÐ7¥¤N~NãÜ–ÐKɨRª¼_nÅZJêìg£‚é¶wë”*…„ƒñØ= ­¾ {@ç÷„é06¤–áºnšXLçzåkŒ•µ¹tMÚ…'LkHfŒÖÒZʼL˜[¦G´fÓJM1S¦5þðþxК%0Ìk–J±¥(øTdbã²¶‚'Llô@hSJ+0¶¥L¦Ì c0¶!«‘‚6ÄFŒm¼1›»—?SÛ¼L‰Ú¨._s}0µQ] Î Á S×ÝŽÜÙ,,adãiʬº#îŒlV¾0²MëÙ¬)ed›)BÈF[ZÊ)>‰Íâ ClR²0±ùy±y3±YæÂÈæ0²Í= ÙÜ;"6? &¶-SÄæ;#JÑcBÀæGÎÀ¶eØfPضÌÈRt[Ø$ð鼿'vP•blj±»díµž˜”‰ É~G3±K“I*í¿­"6ö¹Ê|"h|V›o‹œ ¢p²¾…lÖ«0²¥lÝxÝÉmj—ˆ×Âñ`Æ'^K;d„Ób¹æµ”ë„[ʤíÖ:2°¥¬ çç="Ø(’~O¢°¥lJ¶˜Öæ~Dk\$¯·Id¢u÷ÒÞGJXZyÊêšÆ2áZ®ƒ5vÏrƵyn„k[Fr/ïêmŸÜÄ6#b›Ž±MLjئDl󰉨(k=ëD{[ÊÜ ú3±KÄg&ã=ᅥ0¯¹ãÄk\æÅ¸í>âµ”5Áà[6rÜ!Ö®¥¬Åµûe\ã!¹07¬Ìë‚|Õy~Á¼–å&Ës¾4‰×¥ê­ ÕÉ„º÷„ lD𣼯Se`›¦°¥)B«õÒ$`—¢÷$ÄëåŸcTubKNăÕZž2Çö  …Xf–@ÿÄÀ:Ëà‰Þ" †5Ò—(l¬‘ªC-{‚uOK1utLkäS ‡ÚiÝÊ+,¥ÃZŠÜ–31¯[lÓUóZŠ.ù›{Gȼ6·$^£,. ”pÝdº–m¸®Í-5®!éqÛÄ#\Ã‰ï” MZ’û0±»ó››Dl4Óms“ˆÍò"&¶i ÷ŒÓP&%J†ö¶«˜ÖRËÞü Z›Á Zã^¥Îo(˜Öh\ܶ0ÑÚ4œhmA´6÷$Zã:·4Â5×ÇÀ¦ñ`^C¢5ïaxÍ­d^£,. ?›U_ lsO¶”ÉýíئŒ€mFE››šÍF¼6= ^›êˆ×fÄئDmnJööNŸV¢ˆ‰v‡ÎJZükÄ~QzR‰òVí'ÂH³?©¾©§ÁEOó9:f~:.)é¡D1?ouy ;ÿt^‰B]¾Ÿ|úÅJ¨„áCÛÿuR‰"¯ÙYùb"ÿŠ”({Ån6y€Õ+2\±ª?yO‰³(ÚÒ®ô¨t W<¥\ïIé@Ÿ&K:@õž•ôŠoéW{P‰¢§í—(Qz…k:q¥G§S¯xN'®÷ätê4XÓ‰ê=;zÅ÷tâj?§D 81µ†¾e;¦DáJN)Q¨–cJ®å”…k9¡D‰Ø-Ê ÅþŎJœò¨•(þ—SJ” Ç<Ž7;sWœ4=¹ÍéÃìÓα‹-Š—]ØýHöÚ›Ü$ÌψsО˜ˆ ™+â—CÚ§EË6.u»és X'ü1ÜŸ¹ä¸ã IYãJd աβی²º3ìöëê 5çD±N¤÷]1çœÒ>0©äYÌt‘ñ/îð‘Xp2”·«>g v"Iïòçœvz]\'ÛFw{Gr¦hgx!xzo3¾ Uñ#EÌÅ;w|O^GûÔhÚäJÊB“« vªpb®uçØ5XçF­*Êø8ŠÂG;,˜ÛP¨ WÏ5?¸9æ„E™TØnõJ¹H¡Rå?Rº3‹ø9Ö„õp®/:Š#…Jy´)£ˆ*” §ô£°æ|EŽá˜ç-ž*E;V üs¾J‘«nzÏ‹'}ŠÌW—ýí@(ps¬ùŠó°ãZ%@Þ5©nxqJ …ŠE•Hú*!uŠ´Ñ¥2(‘Ô)ùÕdÝ ‡DI$N!-’ü¹§ÀÀ¸c¢X'—e§À­Õ½Z}3‰S²,õ4…²î´8…r •Bâ¾®P¬3ã?ò-Á+…Ä)ÒN ‡Z‡úª¤2ÝǧÄ)ÄlxuóüDõÁì¢õ3†ÙÒµp™ \ý0ÌFŠ07Ó:fË0ï/Û™ÙH‚'Oêöª2³ñÙÒ™fÃãÖkÇ2Ûß_V`Ë4ñ~†¨À–¾ÏÌ ØPÖ-Å¥6ß“cÎ…‘ó:v‚©Ø+|g¢HçÆ1Àæ÷;lZbH¦òD”!62ú¬X.Û”±¥¾\¤wÙH*´B7²ñâïÈf}#[ÊjYQF¶”%—FO,²q:ä=¨ÙR…¯B„l4ÅÅEB6ä˜ïÇFÈ–¢PWˆ‘ …V SïÉÈÆu‚ó[@ÀÈžâ\†µ©‰pÍZ06g÷a`c1וV€‰ëÄй£–Dl[¤‰¤«3‡es‘2¯¥¤º68æ5ÊÒJøÀ¼&uãš5pŒkÜß[nu\ÓJS °Ûž/ØÈe¿S=1°±\·š]“ZÌëtN#æu‘-[é ™×¹"náѺ4©+άYLkœuQä¥RÞ µ”]2™')ØŸ+o€°K÷O[mÊØ(Ãt³’újš:3°¡ti}K ØÐ%È+îIЍàõ67lVÃ0°Q&oÌ;ÍÛ^§ ÑŽ<¤ax1¯!ãh~ÚÌk[6ô)^ÞÛïð¤•$y-eø«ÛÌg^Cgâ×KƒymËÞ×§0°M›…Jl*æ5®¡JujÁ×(ÛFC›UE msO‚6«°Ú,–ah+9›7N¶Š÷F‹‰Í3„™ÍÃEÈf­#Û–‘<…²(1³I4<ò×u™7%*ì‰b˜Ô;‘•a¶ìV_Ý>ÛglÀÖ_‚¼›Ù²ž´X‰˜û—in;ˆÙRÞíS!DÌF›Û•æk@A%r·i´³ëß…,††vG5oø*hg¨Êš”íÜóCÔ™à• ²°ßÄíìàTX‚¶l^qç<#h£ÌÏU ‘ q’¹¤éøPÈΗ<‚kf=!b£(vËüvn(bç ‰mÛÄFYÞÛy"6ʼzqjd#OÙ›^Bv/ÛÍìž»ggÐ%dçžg}tDÈÎ>lY)c Ù(K[ÐFÈFYØyZ Ù(sû“B¶Øù¯º· „l”µýŠF6Š’ äýæ#f£,î/MÚ(ó²ý™o m”][nKÐΗ¼‹›*‚6ÊêVÜ´Q& ™I÷Ú¶LCeñª3ß‘æ6ŠdœgN#â6ʼ»ç¨á6Êœ»n¿aE—s~NÍm(ìš“žß#­¹²â–ÉIÜ~”)n£L†vîy{¤? ‡èîÓ*• à8wãºft¶ÿsD†Q~R§òfý'¢L³#?éÞ¨¨a…±RÅüt\&Ò`i°RÅüt¾ ºËcÜù§óJêòÝúé»”* x pÔOôJ•ì2ĶÒ¡昣ÓJ•½f7Žœs­é{xÍꥯЪô —¸€+=*.èOq×{R\Pïs#†¸€ê=+.èßâ®ö VEM܆=Ï‹UFkBq­g³ê5ÏÅ=ϪaM)ªøðV½æ{Nq½Ÿ¬ÈO8Á¶¦$;ƒcŠSË)É WsL³bª9%Z1ÕœP­Èÿ CßÉ6°„ŸQ­Ô­Yù·?ŸËžR¡Ç؛݂T»¥Ç3žñRîôœ_Ý¡Ö7ø5Fþ z3.ûu-YÁ‘Ñ+fTӥß8a…òkŠÊŽ“ƒÆ>Ýõ²¢%+ˆ®çÇþYfœ â”<ÓYVœ^½ãI²}¾ÖÇu5þÄ~öÎ|RË¥ãŸ:V»ÏL…”TØH¶‡:üIá“Z/þ¼ð±ÕüV©Ö¨ÃŸNo¥•®8¹Zï¬ÎʨmŸmrß×uNMgºöŠc«·€Nhúøk;✠(S‡B㺫\3•{»Š¬ˆµ\VšÒæ.-X)/™;áNuØv§Ã‹‡LJיּò )œŸt6iÅJÑQ†t[±‚ïºVˆ¼ù¢+|]¸X±rÅ6Ýé-D-YÑúŒ•“V¬¨¤:-^Z±’u§Å¨+²¨jš'£4œ[½+ß<…{–·tiÅJÖßV·Y±’<¶i8·Z+V”¤áˆ­YªNÁAËQ«VH»ÑrѪÒƒ4©Üˆ8Z‰J¶BŠ©EËVè|§†#«÷Œ•ëf¢m”E-[¡<ò£ƒ’® µKËV(CPëùyU *+Z¶BgM +i©ÛÒÓÒ­n+oœå6‡ç ¸… ×=ß,¸“ß Íà–×2¸áß©öÜ1èoÍÜðîLÜ nÈ}›á(n8²«›àct㘣5 »‘áyUfÐ-ÿQWRƒnR®tgùMbȵ•6ž/®„¼>¦frcÎæêæË€È 2…#erCðr­X'“[ÞtéZa3&7Þ‚8ŽqÔÇäæCœ˜ÜµD7Åu7¹QQ\q=&7 P˜ÜhDŒÓGÃä6×¹Qv§©°àf “›åLnj¹YÁäFüiKx™Ü,arsB&7Êjžñ&7l†ºNV`rC3’Ö)Lnäº[ö 7ç$"r³iÃäæD@Ln>ü‰Éͦ!7–ž‡§1¹m™&·©ÈM‰‡¸ùø¯ƒ–‡Í}¹~Ø_~°»Šu–V–fw•ÆÈ»µàÌn¢äfF7NŸØKÑ]P›F7>çÞ¯dB·©—<£[&‡ÊËÇètûÓ rK‘:ÉÉmË4º‘F¢Å[…ÄäÆeÛÎ䆶sçbctË-ÕûœÐmnIèæ–¹©ˆÁÍ'21¸—‡ƒ™þïC™ÙÜ@f6N/YguÞÈæÃ¢ÙXæû„EF¶½N#é|Â574ŒìRtBAf6†bŽ‹e6Ú²ÏÇdf£¾m˜G™Ýb/1Ût˜Ýãqë4Tf6Of¶ÎdÄÄ6-!b—ªS`2±éŽÄk.Q´6ãO´FUʶ%ZóÊeZ›1&Z£kû¬T¦5æãN<É´`v i-eHv —o? ´//ÁAõ ã»)4¸6RµŒ¹Ìß·}$†Tej^wñGUþÅk<ü~ù¯¡Õ;ÎÇ¡мÆ§D÷¦ºŠåѰî²½¦-­h „¼¦Þr‘‰Á—Ý5iPã’¸Ô%P£ÌïÉL îª MãK Wèrñ‚@mDjì«ÂNXD¨6ª BuW+ìﴈ׸.ï£`‰×]qùe=i^ãžbNû¹­ïÀ6R ¶m vo`¼Sf®»˜bç=#\÷;ÊÞ`úb4®{óv‚eÂ5צaÝå em§4¬Q´f˜5Ë3ˆÕh£ÛÇ$«ícѬ¶ešÖvL4­Š„hÝǤÄùÅñÚÈ:4±Qtm!.eyçÂ"b£ÌWK°Ø(’Mò<ËŠ€Ýs§É"`Ûžk`m »÷N.¼wó#`„æÅ6Ý‚Ÿ–­ ö ÷7t+ÝVèQâþ'…+o·àL¼)-éÊîà‘šÒ?`ºÍΙߎ+GÆC¤!t««üÛ4B÷ú|þí¼€…{=A¿}—„¥È„ #ÓØQK^…,°éÎ_‹‚E/Þ Š#Ïp²áüâ¥>¥~>LœÇ¬r©Lµ‡eI YLÍguIKY¸æÓƒ´Å,¦â“j=…eç"½n‘ŸœX½Ê=±¸Ú³kn´ÞªùèÄŽŽ5±¨æÃkÄqî‰ÅNÒ’3"ä².WLÇ$-¦–S’®æ˜¤ÅTsJÒbª9!i)þÂ7w²­õç$-QKZþrLÒ"˜,=#nNË÷±ï3&_LËó,cG"åȽ±~žîì¤d;Úq¾²óû.)Óžv¸àÚJ"…¥'ÇÍÒ›^èÂJˆ=½îññá—\ijuœ*–åÇÄÿÕR|ƒµü·ÎÁM" ŠÙÐòóìG?³Sž¾ H-»´m?üðb׵峷€ÇÉ›ðжeÆS2$ y|X;?5Œçƒ--ãÙªò)ds\‰ƒÊCza§XvSÖhŒwD}ÂD1é‘c`Kñô¹Ty~¢Ï Ï=AÞžúŒSe›vƒ^Ì}©r)€ ôȘ[ÓŠù1èñéÝY¦ôp?Ìuf9ïô€-çäub6Ýߊ3çyMû`Næ|ÿÊ|‹a˜ó& Ĩçï’‰ôrs,w-×HFš2ñˆô­Š=º„v†ôÒ+™ÿÛ°"Òã@¸r8Ò·åõ^±Dú†ô u~mHßp ¦ÛïÁAz ~[ÂvCú†\++xnH?¼ûS'iHß<ÂøS’eHßðQüRzÒ7§®†ô QÄ0rÔ7J²EÍ„ú†ÞŠÍÔ7DŸ—šÇ ^®”G±–+£žôj†ô“ceÔ0¨— URƒz) jÕ1ê+2/Õe3ê¥6dõ(Ûyí êk}Á[toÓ™ôþ‡1_ëó2>˜„á³á­_Šyì!fž‰æÊ¯ðÀ<²¦¨'siLÄ(ÉËfל‡ê²´0õŸ„yÎ-b0¿D+Lx@R^I×tá‘HCÖéÚwá9{…E¼‡zf5´ˆ§€ºE|oÜæ#þÒ­ZÄ_ï™ò|H›ò:Û[€—·ø•|“ɲtð8(på¾±€—B™ÄÓv2€¯:¦|}•6µâ–ïöÂÁw1€ƒr 0ß±ÔâLºiù.‹ôZsËwY‡WØ^&æ{!Çð½ ]oX<Îÿ^®4Æ»iá6Áï]7…¦ï¶ðnÞÒÄw:€Ëâõ²x—Âk;hï¨2n§ ã]nÅlç#¸ ÛÑÐkHkànl †;$fÏGtÀóñYðB x>ƒé­°>¯À‘“'.ŒÑ÷ý©ÿK6¨BJÇ¿©oÞ¨ýHdìîÇOªo'êñqœâã©wÇè§ÚÏõ;*y nœâ„αÕ?}A¨ËcØÍOî´æ†º|?yþé&ülìÍ!O›/Âà ‚o¹‘ ,æ’óß\rã/±I¯Š/7ðÖø««²’›÷÷Íá›_÷í÷ÿôWõQ,6˜Q ˆȆ';fy/>k–½¼Ô\GÍß𿟾ý/ßþUþÿï8á{¨±ùtdÝL:M ÝŸq ì+ËaÖŒªîJ¦f*;Ø„«uê©ÿï8—‘Çužw )ô¬ÁxT•+Y6¢{¿+Û¯:’%_Ñ%ëä*éIwÀ‰ˆ}3Χ.b‰¡ìºÆð7T',õGÑÈ•ÜÏØBÑúÚe*WréAe(Ô±…õ—:λô0M±e×ËT²d”©[úkç]‰ÞÌñPaÂe–â(Ûôn¤J¾œì´‘3V׿FªäñÆ޳ÆÑ€˜d(b¡Zü>ÅEËG‡"• ej}P§x?ÊTŽd±ÉY‘Ñö¾—$ÉW.®—­oˤ h4%\©—©§é¡½Ú•õ`Ä‘#¹!¿ÔýSR©‘Ñ:õ ’:¾E¶<òÄ{2#ÛÞdu|·íMV™‘K÷ÖÉ¿ö©Ý¶ƒE%Dîe÷,G‘ʇlúWÔ™Ý(ÒÓ§ªtÈ(S‹ÃWuf·Ϊò!£LBÛ‡v÷–¨ÑT:d{K¤•ž'Ì#Ç1Æ ß_Áa‚Áóˆys]¸T:d”)S#\8ØgNΤ{œÊ†ÜËä2aQ/’;î¹™Ô0§’!÷±\#à`ñy¾¼m‡WÉÍ#-ûvlÙ¾Av5Ï—·×• Ù<‚€ö$Mz}­–îÔ1 CùÚ3áAe§G‹©œŠn=S9QÏ •£ê¢¡r|Â8¨ ``uã Œ“e0ô¨ê¯‰Á)0h5„S …B†pIWÓXZ§–º†1ßjyŒû0ƒÑ€}#Bp š4D`Ó%"ð¨„Àkÿ\“—k!ãé(0‚¹ÝƒÀ¦"ðh7 &°{"°¹ŽlFŸ<š@èå^yy\¼(R¯ ^Ó6¯½Lƒ—˼¶Lƒ—ZIÜ¥¥ÅØ57$îòÀ3xM‘—“—ç“×´…ÈË#ðšê¼Üóø˜È'EAš½õR/vÃÞz%‹Üz=ìß •Êê ¶â#IoMZ”éË4q¤DÜÆ†·±¢ÑÛôKœÉkŠ4x›~O0wå*2g5wù2Í],µm.vq‘ZxÄ_(dv‰¿ß@`){À6;Lb®Ž@l.#Û2MâŸ$æjĈ!îq" ãNjà È|ñØŒñØ\¦qÌm$,›"åÙÖ\6ešËß Äåf˜­¹Líg,Cß l?²-S\æÑb07z2˜m™scˆ˜¹ç æ<æÁ`Û2 d[¦lÊ¢¿k>)Û!‡ø®\ƒö0}aûX“§kƒF3N­QøÕd6EŠÌ¨M¡M“Wi€i2÷;¾Mf‘q«ÐŒÚÈi¡Øü¸NÁe³ŠÍ(ÒPp¶µi:÷;ªU«éŒ2½ÅÕx–2KeTCäUTF[QùQ¦°Œ2í:ÐXîõmNi*£å8Ëö^šË2fÛ/ f{&ó£L‘¹O7íãPh¶õi6Û{j8Û{j8ÛëœmuÎö–ΦŒèü(St6õi:ÛË4yœm‘b3ÏB³¹J“Ù¶C#ÚŒ$!ÚÜRÚLr"´½eôö§O p ‰¹”§ÿáf”žà¼Uû‘xT›JÝ·õĨw!D×+i½ 1ešÒg¨9N”n…i«( uŒ&„¦4$0zY;£!ƒ—D0åAç–ߥsËïÓy܉ Ü´v˜™Ü ÓUC¹Úh(·ü>”YÜÄäFæ3¹QÔ›™ÜèuJHn•q­ÙÜ*ÏšÍíi ·ò.’1Wôc ;ëmh"ãïõØi$Ëb£QÐL®ŽŸ‡f2æ•é°ê¥­;BrÅÜÝŒÔD®ŽÔDdD’‰Ö:¨´ÀJY€aS Ô~"2¤7 Æ­ÈvÖ¡ÔîPV$Æ™B cÄ• H âÊû31îøˆ«±ð5ˆ«Ù¢hWÒó‡ëýEÁ4þ5ˆ+o²ˆÈ¶ >ºø©V?¯wa‡ü>ˆqNä{ ô ש5FÆÁ†Ê—¡1HbN†„F/?a\§ yÜ[¢–´âq Ž«pŒ"ý"Ñ\F¨L­Ô-Ñ`–ë mŸ" ]Ú»F2ÊÈ&¡K#Ž+£H7\Ù6Eùq²½®à¥ä‰Ì¦{e(^´Ü”ÉìM "3w™ÀlšN`öô .û¦gqçƒ+˜©ý†Ë-l»0Ï5˜yÌ<ô fsK"4õ›mª#@›2"4ßRÚ”Ö¥½Ïg[Fjº%áÙ\F|6í'>ó4a>›{foG÷Ój—ìf¤µÔ©7Á¿Fl¥'Õ.oÕ~"ø3ûñ“êÛ‘zêuô4:F?}Ô¤{XíB?}A¨ËcØÍOÇÕ.Ôåûɧ_¬vA"0yÁJ=ÈivTí"oÙKYú%ú¯T»D¼SS»¢üã×)vÙÐØx:2‡ªWp: ÕŸ¼gåYÕü „ÞV pÎ+zžÃ~NÐgëÃþb=¥¿Dì‚ ÍT£š¿`ªõ&ðTã&œŸj½ Ïa?7Õ:°>ö¿…Ø%àäÜú~󔨅ë8$váJN‰]¨–cb®å„Ø%b¿[½$ø,èC± ŽõÔb—?‡Yð׉]Àx^¯¦H8÷kG\‹ÖWà,ñ•ç¸dŠ #Iývækÿ„” ¹V}ËöÙá=ˆ»÷r9PÄ•²3d¸gî ±(Ú[~驸’;GõÁ.S IpMý§´ ‡9¬b H¤uk{1r¦8«–ÿdœÂµXä Ë™â¬äËÅ­¤Æ(R›á\(ÌJß\ê>7ª!ýL*…Y/ºg¥x+%Äɵî|Æœc'7 ·^zt[Ú§†KQÿiY©æŸÌœ˜Ð+)/I¹Hû¢G>™51#íú‹#íËø‰$/U=¡WÌš‘üÅ“â… ¾˜9cå"R¼`8 <0kF½ J ¥ ÅSJ ¥ }Ü„šRºÐ7=q(¥ ß3’Ò…– ¯Rºhñòpl¡ …­°À•Ò…¾o”Ò…Â?Š’¼pK2i^tpª’¼p)^(ÐS )^Œ4>\kþ‘JU‘ÊŸ¿?—‘%|)"R© í¶¨HE¿sã·c"R'!~è%y /h¡8ãNpÿ´ˆZhàI†Û«†FÁhe‘Ø(B0-J†³ÌE+Uc¢ÈÐêÂtzRQ'\h­'U+e*ÂÆÖêTä˸µíV¨Ò¢R¸ U´]_î„*­oÕ_’µIŠœ]_D¶2«,£õE o_lRwÛ”$=-š¥"„~yuU„­Ît-*éQüT¬4½É= ƒŸŠ„gúZU²‚LÏ‹JV0œºÆ¦Ðäãéá{<¥+ìq[ ]™‡6(VôÚ…´D+¸ûr']éûF銨 zTWéŠ,î>HWtâeSéŠ&½í¥R¹¢w*ꀫrå|`{Ý \‘ù×JáŠvßO*\‘fQ\u+jª”­ÈÝê‹ÉVäúGêxÕ­ÈSÛã7Óª[‘I¾¾nt˜§t+8¿/°³¹%ðp®ê1$› VÙ\6ðWÑ\TmG4U"ˆåHäYš¡¹_‰¬Â2DNd(Q Ȉ…5e¿)Šg"ØNN Ì$,$ðëß+ó²Ü9-Â*+z‘â†äµf@/½nªÌÌr³3öÑÀ^·)|m8À—ýެ21‹ w°—ÿÿU0iÙ?2”X`/Ìá';1û³:˜¯gðL@hÿ(aÎ|d¤ÈÕÆïßÂóØQxÍǺ÷ö™„û‘IèY¿.¾{\ó~O‚Çþ"¦¦¨2ÛZ‰e bjdŸJƒ¡ã–õŸÄÔÔÊx,µ¯-ÿ?1 V¬£7ŸžÎiŸµj¶w½¢‡4¡Çþ¥Â~ÑÅØ$~Bª°¿•16öï¦ï.þOˆcty§>6%cwT–GÝÜÔÚycìŸXtom¹.:NâÝáf¹^üÝúæâÿ ™±_ïäml2¶ý)…Œ òBÆFyJ!ÃaSÈØ0O(dꮡ6>¿[û;…̦ú˜?ÓÇ ªÖ™ ¸ìóGòó’@îø>-ã‰Ûs=œžòÔ£†a~¬9Œˆ­¨c½ŒOÔ2?5Syä‹m·™8÷%"¨¯cˆÁN£i4V#‰ÃîÑ1ý¤²q†Mñš´t Ì–šQ¯½~˜°2zŽœõó–‚Û;Åù ÎÔO‹gÇ…×!S8sÆ|¶ôJu .Š4¬Ÿe¥ñ” U¥Å°Õ™¸Œòó Òòëë’9 ÔÍ™VÄhõ×}2ÑŒŽ;æõûbÄYuÜù-ù¯ï‡í¤Õ[˜Â§ó^»ÁÆÑÝ{íâ¨q ÚÌÞvFhc£“ÏRE¥…a«3-ð±R}M©,·%4†Qc³n½~ÖìÎG.UDg¹òRExV}§ÃVg%ìcÉî‰CÆWÌgÉî‰C6hîJC€Ö–l¸xÎ%k½v Ñ.˜jGŒòб(—YûµdÕ©9lÛ¯ù\±pÊ#â´êzïxÄi1›u> endobj 8 0 obj << /Type /Font /Subtype /Type1 /BaseFont /Helvetica /Encoding /WinAnsiEncoding >> endobj 7 0 obj << /Type /Pages /Count 1 /Kids [5 0 R ] >> endobj 9 0 obj << /Type /Catalog /Pages 7 0 R /Lang (x-unknown) >> endobj 6 0 obj << /Font << /F1 8 0 R >> /ProcSet [/PDF /ImageB /ImageC /Text] >> endobj xref 0 10 0000000000 65535 f 0000000015 00000 n 0000000145 00000 n 0000031791 00000 n 0000031812 00000 n 0000031835 00000 n 0000032265 00000 n 0000032134 00000 n 0000032029 00000 n 0000032192 00000 n trailer << /Root 9 0 R /Info 1 0 R /ID [ ] /Size 10 >> startxref 32346 %%EOF blis-0.6.1/docs/graphs/large/l3_perf_epyc_nt1.png000066400000000000000000002327351360743507500216450ustar00rootroot00000000000000‰PNG  IHDR¬öEYa )iCCPiccxÚ•‘gP”‡†Ï÷}Û m—¥ÃÒ›T) HYz•^E–ÞY–"bCÄDiŠ AF¥H¬ˆb!((`A³HPb0Ѝ Ü¹3qîüÈóë™wÞ9çÌŠ*’*àû¹Ø³CBÃØð ‘¼Ìt®'ü#Æx ÿJtL&V Ÿ—Î ¹ •#H Ç€•”.@Γ€ÜfÜ_>̨¿|˜ü?@¢Å}ãQßø÷¨pù‚„ؘ\¶Z¬ '’ÃÎôs±g»98°}øi± É1ßü¯Êÿ€ &Wà–¾…Ÿ/`ÿßPcC##øûï|„5ø¿ÿ€oziœEìÀßYT5@÷é'gjÇD ºîñ²øÙe8‡ÏÁá+ñÍøNü ü(~ÿ@ °šs‚+!”HØJ(%%t®† S„E"‘(CÔ%Z½‰‘D±ˆXMK)Hq¥b¤öIµKH-IËIÛIÇHKwHJ–aË8É$É”é–y&‹“Õ‘õ•Í‘=&{Cv^Ž)g%Ç“+–;+÷D•ב÷“ß*B~P~QAQÁE!]¡ZáºÂ¼"KÑN1Q±Bñ²âœCÉF)A©BéŠÒ+¶$›ËNfW±ûÙ ÊòÊ®ÊYÊ ÊCÊË*š****ÏT)ªÕXÕ Õ>Õ5%5/µ|µ6µ'êduŽz¼úõõ% M`½Ý³šÒšnšyšmšZt-[­ ­F­‡ÚmŽv’öQíû:¨Ž©N¼N­Î=]T×L7A÷¨îðü‹5©k׌ëÑô¸zÙzmz“ú,}Oýýný7ja  ¾š&6>5’0r7*0ê5úÓXǘg\küp-}­óÚk{Ö¾5Ñ5‰19fòÈ”aêeº×´Ïô‹™¹߬ÝlÎ\Í<¼Î|œÃäøpJ9·,ðö;,.Z|²4³XžµüÃJÏ*ɪÕjv溘uM리U¬#­¬…6l››ã6B[eÛHÛFÛvªvÑvÍv3\mn"÷4÷½¡=ß¾Ó~ÉÁÒa›ÃUGÌÑűØqÈIÂ)ЩÆé¹³Šsœs›ó‚‹©ËV—«®xW׃®ãn n<··ws÷mîý4ž:ž|Ï^/ÔËÝë×Äzõõ©ë»½ÁÛÍû÷3MŸ ŸŸ} ¾>¾µ¾/ýŒüòýüþ›ý[ý?Ø”< Ô Ì ì  j Z v .†„l ¹*šÚF k[Üà´áð†épÓð¢ð±šs7ÞÞ$»)yӥ͢›#7Ÿ‹ÀGG´F¬DzG6F.F¹EÕE-ðxGx¯£í¢+¢çb¬cÊcfb­cËcgã¬ãÅÍÅÛÆWÆÏ'8$Ô$¼MtM¬O\JòN:™´šœÜ‘BJ‰H¹*‘š”ÚŸ¦˜–›6œ®›^”.̰Ì8œ±À÷à7g"™3{LAº`0K+kOÖd¶MvmöÇœ œs¹â¹©¹ƒ[t¶ìÛ2“çœ÷ÃVÜVÞÖ¾|åü]ù“Û¸Û¶#Û£¶÷íPÝQ¸cz§ËÎS»(»’výR`XP^ð~wðîÞB…Â…S{\ö´‰ñ‹Æ÷Zí­ÿ÷]ÂwCûÖî«Þ÷µ8ºøN‰aIeÉJ)¯ôÎ÷FßW}¿º?vÿP™YÙ±„©ÆÚuÈëPW»¢¸âýá͇oWšTÖ¡É:"¬ò¬ê©V«>P½R_3Zk_ÛQ'_·¯néhôÑ‘cvÇÚëêKê?O8þ¨Á¥¡«Q£±òáDö‰—MAM?p~hi–m.iþr2õ¤ð”ß©þó––VùÖ²6´-«mîtøéû?:þØÓ®×ÞÐÁê(9g²Î¼ú)â§±³gûÎqεŸW?_×Éè,îBº¶t-tÇw {B{†/¸_èëµêíüYÿç“•/Ö^’¼Tv™r¹ðòꕼ+‹WÓ¯Î_‹»6Õ·¹ïéõëû}û‡nxܸuÓùæõîÀ•[Ö·.Þ¶¼}áçN÷]³»]ƒ¦ƒ¿˜þÒ9d6ÔuÏü^Ï}‹û½Ãë†/ØŽ\{àøàæC·‡wG׎=>Š~4û8ùñÛ'ÙO–ŸîœÀO?{Vù\þyã¯Ú¿vÍ„—&'_ø¿x:Å›zý[æo+Ó…/é/+g”fZfg/Î9ÏݵáÕôëô×ËóE¿‹ÿ^÷FëÍù?ìþ\Y˜~Ë»úgé;™w'ß›¼ï[ôY|þ!åÃòRñG™§>q> |þ<³œ³B\©ú¢ý¥÷«Ç׉ՔÕÕÿB,¾;E^ cHRMz&€„ú€èu0ê`:˜pœºQ<bKGDÿÿÿ ½§“ oFFss0ˆÚÿR pHYsNNÆÊ/¥tIMEã$9·-ç vpAg’Zó!%Õ€IDATxÚìý{œ#å}'ú3†á"¨ÁÍ`\ 2¶“LuF»Žcœl ß9«µšœxr˜Ý8ÒÏ4{6ç¼,%s6o2ÙÖ&¿“,´·o<“dã–iâà„® ‰×Þ»°c4€§À̈ SŒz`Öù£ô”J¥ûµJ¥Ï›W¿˜VéRÕúêÑó}®[*•JDDDDDDD>s×'@DDDDDDÔ V""""""ò%&¬DDDDDDäKLX‰ˆˆ&œišÐ4ÍëÓ "":&¬DDDN×uÄb1¯Oƒ¦\.—ƒa^ŸÑÈ1ÖÇ‹ + ,ŸÏ³OS±>^oúÍßüÍßôú$ÈÎuÏ=÷àK_út]Ç-·Ü‚mÛ¶ÙÇs¹œýá$ ù|Š¢ØÇ5MC.—ƒ¦i$ ×]w},“É@QûùMÓÄ-·Üb?§®ëxï{ßÛ÷ý‰ºÑ.ÆÛËår0M²,ÛÏ•Ïçñµ¯} Š¢ô¯ŒmòJ?ñßM쫪Šl6‹gŸ}š¦A|¥›¦‰O}êS ßDƒj¯_úÒ—pþüylÛ¶ ²,×ÅèùóçíÛXfÓ$4ÖÛ•ûŒíÞ°‡ÕLÓÄÌÌŒ=ÿ(ŸÏ× íÊd2Èf³ö±¹¹9äóyûx6›ÅÜÜœý\³³³Èår ÇMÓ„iš˜››C,ƒ®ëöñL&Ó÷ý‰:iãâß4͆xë7^Ûä…~ã¿›ØO¥RÈçó8uêTÝëÅb1˜¦Y×°I4¨Nåµ›3FEoËlšƒÆz§Ç3¶{T!Ï­¯¯WœoÅéÓ§+ªªVŽ?^9~üx@åøñãöqY–+ªªV*•Š}|ccÃ>¾²²R‘$Éþ@eeeÅþ]Q”J2™´O§Óöóõs¢NÚÅx»c•J¥á3°±±QP9}út¥Ré-^Ûä…~ã¿›ØO§Óu¯qúô醸&–N嵪ª•õõuû¸3F·±Ì&¿4Ö;=ž±Ýö°ú€î•J¥ì!½ëëëeù|ªªÖ K$ö¿5M³‡hšV÷»h¥q¾H’I’º:§nïOÔN»owLŸG2™¬‹É^╱MãÖoüwûªªÖ½–h¡w~O K§òºwŒ:Ÿ`™Mþ4h¬wóxÆv÷˜°ú€,ËX__ÌÍÍaË–-H¥R¬! íˆaÙl¶î§Ù‘WÚÅx»cB"‘¨«´3¾i’ ÿ½Æ¾¢(H§Ó Ÿ!¢a覼& ‚AcŸ•ábÂêb!¥••œ>}ÈçóÈår$©mÒª(ŠÝj#~VWW‘N§9w‰|£]Œ·;&$ 躎l6 Ó4Ù{Deøï5öWVVN§€óŸhèº)¯‰‚`ÐXçge¸˜°ú€XHI$¦²,ÛÃDeELÚ6M³.ØUUm¸MLü&ò‹v1Þî˜ Ë2‰²Ù,’ɤחCÔ“A⿟ؗ$ ‹‹‹Èf³Üv†ª›òºÓÈ0¢I0h¬wóxêVs’fff‹Å033cWRdYÆÊÊ b±b±fgg놄‰ŠI&“Áì쬽Bðêêª×—EdkãíŽ9‰Æö®Ò¤4þû‰ýD"D"ÁÆKªNñ*I2™ {‘hâ ëÝÖm¨;[*•JÅë“ ‹èE•$ÉÎ+O’e†a@–e{™3)u.²$† ùM³ïæ`íÇšÍfqüøq¯/ƒ¨/ýÆ?cŸü¦U¼:ë,í§!šƒÆz§º u‡ «Ï†™™lll@Q†X,†t:Í¡‘4ÄpšÙÙY$“I{~QÐ1ö‰ˆˆ8$Ø÷œC‚·lÙ‚ÙÙY{Ñ4ÐuÛ·o‡,ËŒ{š*Œ}"""ö°‘O±‡•ˆˆˆˆˆˆ|éB¯O`XxàüÉŸü ®¾új¯OeäNœ8]»vy}#÷üóÏãÒK/Å¥—^:ôç~ùå— …°´´äõevmß¾}¸ä’K¼>‘{ùå—ñòË/ó³<„çþË¿üK¯/±k,ÃgÔeøõ×_ßú­ßòú2»Âò;xF]~ÿÇÿø‰D¼¾Ì®°ü¿•ßIX¯ºê*Üzë­XXXðúTFn~~ËËË^ŸÆÈ---aÏž=ˆF£CîB¡€£Gz}‰=¹ä’K¦â}ï ?˃?÷$a<,ÃkX~ϨËïIIV–ßAä·ò;0 ë4qîÃd{öìÁÎ;½> ³izϧå³Lõ¦å}g>}¦éýž–Ï1Õ›–÷Ýoå7Ö ǽ>…±E«ù_8F8öú4ÆbZ>ËToZÞw–áÓ‡å7Ý´¼ï~+¿¹èù{XaЫÿ–«?ä[†aÀ0 @¹\öútˆ†N×u˜¦ øÞ÷¾çõé Œ k7L¥úûlõÿRõG Uÿ½^=–•ÌŠßõêó¸ÿ ‹^_`€¾¯;þ&½p_¿˜² ]×ëî&Ë2dÙjøöý߯e~ ¯¿þºp~嚯àñËÿ~°e7—÷àñËÇ¡' ë:¾öƒßÁspîÂ(Þôƒ _A¹\ƯÏü:Þÿþ÷{ý&€|õǨÞ&i’è>ŒêÔÃcÆDÓ´†Ûœ `+î¸mÇ4M˜¦iÇ5`5¤ˆX—$ º®×5®õŸq®2€ß¸öZ\|ñÅ8µmÞ¾§¶m³Ÿ‡ +ÖV2Ò°*Ös°*×¢‚½Ñâ1†ãßjõñBÞu_µÉc2Õÿ'a%¹êñfõáÅêùˆÄ8íxNçùè¨%™"Á0ª“QK–3hîÛ%àÿ,â›oz®ÿ¿_þ8¾¨}ð¡¿ÿn|êFÀ¹‹ÏÕ=ìË·~÷—îÇGJÁ¿|ì_âw~ôwðù—>ßÛü ÞsöçqúÍà²×_Ç–W_Å¥܈ +WâÝ¡¿Eùòÿ†/ßú«øØ}Ãú•ëHݘÂ?;u>ùèo¢|ùáÕËžÇÖk¿·¾õrlݺ7–Ëxýõ×qîS¿ŒðW¢–@ÀÝ%Džÿ ðÒKÀùSÀ3 „>\`â<ó*¾8Ìx™t"‘tÆHõOµ¸D« o*¬Ïˆ3ÑÐ!çø¿H̜ɚÐëÂ΄Ó|š¦Y—ŠÛLÓÄÞò\þúëxrf¦.)tç"XEQ I ‘H@Q牺¸]]Te ™¬?–ËáôÊ àÒï†u_§õõúß ÈçE©½¦ãä(""Ï™¦Õ ¯iE,ËP¥¡aÒù½õo|ÃëÓ&ò&¬­h¨%së]>ÆY×u×µ»éEMªØKŽç0$к7jõI¯+Á=¿JõßpöŸœÅ㶞݊ozGß8 d€Ÿz ÿì^‚×_z ¡×nEèõ¿ÆÉmÛpîÜ9œ• œø¥ïã«ïx.½ ’$AU¬‹{×ú¯×Y/~x 6oÚxíò>öpÇ?ˆ·ø?ÙêÐ4çp n¬^cíu9nÅÕøˆEÑï4ïÄÒöñ+ñ» —rþò3®ƒ÷8~øQ²ûf¡€ç'hK„¡3daÅ›HTÔ6$؉§¦iö©ž±J‘dÚrÀŽs;ð‘ÒGðÒK/ágþ’$á¦òMx"ô„¿†a@2%+Ù‹7•o¿}äßâ+×|Ÿ¹ñ3$ ¿|â—‡~ᮼòJÜrþÀ?^üøÃˆ›Êe¨ßü9¼¼óRü©\k ’$É>'I’ê’Èí?Œ7¡Ü<ûWÀÖ³À³a`ûÏ; àæk€EׇÔ0€\ÎJ«‰´­â¶õu+atúÌS®>^½ý37OT#uÀk¯Y‰¾¡8þQlÿ…½À]—XûLõýj±êï¢Ñ@“-mÝQµž/»à’W¿êu”Ñ´3a5|©ènD…sôø°i°âÚ1’CÓ4<óÕgðüÀë+$`%¥š¦Ù®âÿâ»R|麎|>oÜQ\ߢqôÑGõú’ˆ†N4ú;v ¯¼òJOeÂÚŒ²èÏç튻¨+ŠEQpÛ7âŠ+®°zNºz~£®'ÄNÚ\ó` 8qö[¸üáÇÿnUºŸ}öY\úÜs€‹ï¾7}â€\mÁÓt¼å÷~?tÑ? l<‹'^aóaë-žyé%¿òJHW^‰|"a½\u˜áÖ³gaî~W~ô£¸F–ükÜä:å;›]‡k(äå®ÃÛ›=¦—^3©û»R4XÃÔÅœ®ëвšÝòkTUµ[…“ÉdË^HV¯¢®#¥'¬›&Hw|ßïĸӼ͊©Ï= lnâÖ7ŸÂ©·_Œÿú¦5ìþèn\ŠK±®®™ ðú³ÀéÝøð>wßù8ðÜN`ËvàÍ׳ۭkIáGe/o>õàÖ]µOðrõïáŒ9I²Î[ü´cVd7|ܺMo¶‘$©qäMÖÖÖ¼¾,¢ŽZlsO• 4bdÚ™3gðḧך®„5‹úaº­hÀù[ÎãOsjãPUªªbEQÃÀK/½„³_ù .YYÁc?ø^{í5ÜxÁøîM7aí¨{ºw;†è?üÂÏ>‹Òµ×þñâ‹qþüy\uÁˆ¼ò ¾ú3?ƒo\u•=tÑ0 ä_z áPÈ~ÌSW\×nº 7}øÃxêÌ<ô¥/áßû=µäSúÑ…ò‹¿h`%ÖWHv‹ß½~È{ì¡ç¢!&ŸÏCUUȲŒD"¿ê*\ñâ‹V¯¸hdɪ{H«®©”õo‘Ü©ª•°ærV’¹ºZߨ£ëÖí"±•e«·òF©Úkyv¨*>é>÷ÅEG2wgýyµ$£á‰œÃüs°z0×Q«˜‹„µYÔ†O7K4;-Êæ<—AÞÓéØŽ:ÑQŸlº‰8Nbz‰hÄÒPõ3[½]4Ê(¨5°ˆQ‰êè%uÍSÏf³øò}[x+¤'$ÈoȈÜÁÕۮƿõÇqÉ]—àb\ ]×ñžÃæ0të»Ð4M(?­`ãÓLëZø^åFT?ä¶aôƒsäxŒ¨séºn7Ì:ëO¢çÔùœ—½ö.üqÜüæ7ãª3gð¡›oÆoÞ~;.j ùâûÌ)“ih课ˆõ}#Ë@uj‘Ÿär9hšf'žîµ7Ñ`#:7DëžâU(p´ÇQŽÓ“°f`}‹a-˜¦‰'>ý>WúÜðá7#±w/Vœˆ¦²Œ+\ùK¿( öT麎²¦AuR¦iBðÈÍ7×½a—x_uCÑ4ñj(ËrÃ{c“óÜ `Éë¿+M$]ÒíHô nllXO.g%Ÿâ W–k a“…‰X÷Ûh1¹;‘hžT*Jã¼Ì~t;²¡‘»“Öniè~êQ'Î…Í:ÝOŒ¬„1‰Zã…⸟è1Eõ¸XLpµõ œ£ DËjý˜¯\ô ÂJØþ̆C3ìÊŒ¨ü‹ï2Y–‘Ëå ( ¾ð_¨«Ü¸Iê¾ÿ‰D³;Y¸¦ØÐ‰Î=¥Â«s±ä$0D¬‰óɦ3~¶:…m§NÙÿ.>ô4ÇÚ²,ã'‹ED¬'Àk¯½üùŸãÜŽxýòÚ˜±•ãÇá: ë{Òz’æßAÍn[œ”6iÚ4[tÒî<«vf,..¶a×D¯k‘´3 «úçžïé’Íf‘Ëåð¿Îüwüîåÿ ¼ç×{XÚüñEilQC‹/]çc0Ü7•¨Ó4ñÇ¿öÇxö ‡ÍIkÄ€»E7™lìAúÓa$•£–DãJØÝe ‡°S¯ÄOçibNyºúç1µúïŒãñ 4 íoèÑs›WÑ0À4LèùÆD¥Yï˜KfCK©HÜ ƒ½±¸¸Øø—q¬ôgšµi2âßÉdc?Ÿ·{¨nÚØÀS»wƒºgr"ÑBý*è¢!A,Ô,)t® îë|\»ÆATŒEeع ž3ÎÄk©’„í[¶`ëÙ³¸üñÇ .À @óï©LÆŠ±³—ó¢‹ùÅ_DÜ]3 «wS’°µzÓVt ËL>}LÄ–3¾ vlUGN‰{gñ»¸~1•Q×u»QHô’¶JF×××{NTG!ø « 닺Eƒ¡aH¥RPüêoá’ÿë,ð7Ÿoš|M*]×177‡Ï\óoñS¯¿øì?cŒ»u3eÀM‡âR½V1!Vß®öZÚÃgU´^\Ow=Ÿ‚žzó À¡T+o¹Úp.÷HUUëX¬†ÔºÞ61t±þ¬Þ&M³Žg³ö}Uª$Y£,ÜÜEEi|n7Y¶ïóüÎx¾RêÛD¦i"ŸÏ#—ËÙs+Eå³ãš."6ìEøª•`Ó4‘ï;êO¡YÅ8þÖ·âÂíÛñ¶ÅÅúdWLI†x®ñ/ìä²éwW¢ºvB:Ý9–TÄi0bضsÑ€"R1’L’$†aOƒJ¥RH$H§Ó]5¸ŒŠsÅh÷¿E*‘ÄNÎǺ’œTUE:®Kä»Z“Äg‚Ÿ°Š/zñþ‰†‡j\jš†T*…•dªiŸù °ú_%Üûky©Í0B1zàKŸú"Ÿ|øì'å^Ÿ±?õÚ˪¡¿D—&›ˆÑËiÀj=¤b"ñy\DãþÃ"qm§Ã}Dëy³ý{Ý+”M’PçK9·H¬DTôxŠdquµþAιÞÝ$µéýoî8ç3—_LóJïmˆ$U¬F›H$°ºº:påÔYùïÖŸþ4núîw­¸ëärÖ{éŒ+§~§‹°vªˆbÁ+UUë§ðµ ’5 Ö؋Š( ’ÉäЇËrÙ=ìÖÙà Ô'šÎ Ã꽫íF}úYðV±„…UaHXúÛÿê_áÑü‰=ÿ¶‡–9"?IÁª»ê“óc?†Ÿ:…_½ùflýwÿøËU&«í˜°’.ïïÜc–&“˜š€•VW³­[ý°’Ñ4jÃÇ5Ç1CsÅ>Øâ;HBý÷Q—œI©sx—3!½gvåF Ÿ­®Ü];'½–dº‡TŠ^2gâÙ©²ÔËâd4Rî üÊÊÊèzPœ‹ñ‰8k2Åä¦| 6ì– ewtà¶oÜæõYø–ˆqÓ4‘L&qÜ=Ǹ’$a±:¼;ŸÏ#›ÍZÛï¹õDëâN&E/o³dTL›&­‡ÓkÁOXÝ[UˆU ¤R)|âÀE·ÝV²”Aw{¦ù‰ØŠÉ¹B'¬žUý¦›°*6!pjM¬xÚ ‘ÀÐd‰¤¨{¸·  9ëÛb¡"¸nsj±ú³¨µ‚æ\p&ŸÏ×%¥ñ·¾ÿfçN\sò¤•èºõýåNt½¶ Œ;¡I«çåM$Ã0Íf¡iÒétm½aËf­† Ö¨!>ZU¼[-Jä5çpü~i°ÊQVˆÅ=ÅŸ^ìiîÜBn®z›˜£®¡¶h`5”)°8#›è 1‹‹‹Cï M$vï£s.¨sñ/‘Ȋň2™Œ=ÄÖ9ßë!Æžs6€ºùâýðEÂZ,‰DšÞ …÷9Ÿó1€½[]rçÞF¬­U§VŸGGmÛ1¡šÚç¢ÃJ$¨-ΙEóE:ÅH ñ<κw³ý³5Ô�Z½Íù8çñêžÇ_ÿ*~??ØûÙ%¿Ç¸hŒI$£kŒqñz•ì‘ûc+è¼ ½ó1"Ö“¾ñpä5ছêïwìà/þØv øú‹Àe¿¬|xòÿØ|;põ‡€°kÇ.<&?ÖÓ©{ž°.--¡X,byyÙ¾­T*!N£X,°ºÝú»f¢eëvCE^Xíø¬D]i| ι«IX_ÖI`nn+++õ…»†új-Æé9XC…MX.QÁ™bc‰ña‰*€³¿|þüŸ#7kÕ¶Åb¢•\Q{[–n8÷?b˜èý×·Ü‚_ýª=w陵ü÷¾…kÞüf|êïÄE¥RãÜ=Ó´æNsk½GüÛº®#•JAUU¬w3ßS$a¬2L°ïYàÁ+€®¯þO`í-À[ö×¼xÓ͵„¨&„Šõ¸ËP«ìº÷nõý¢¢>I§,’\õ ‚â5£ädÇk‰µI¤&¯ã¬Ú)°’@çÈ¢4Fç¨R1ʦSïk7ߥj‡Û<MïçkÌ$“Éæõõ égj‘h¼QÏáA'>_bQ?‘°ÊŽçÐ\÷7_vný2ðP8½xõ0ß œÛ\8üWÂúÕŸÞùà?_é¸ñ#²µ}Á«çPþ_eœù|O—ëiÂZ(pðàAD£ÑºÛ8€p8ŒÃ‡£\.cïÞ½XZZÂÂÂBo/ÐbXßß~àoñÅúÅêž“¨µ´YI˜¨W#o!…Úð¢jÉdçWˆí3|8:Ë—Ü­ù2j= VÇè暑bl1>(1/Y¾òá¯àÏžû3hÿ—Ör11)•JÙ«2:ïãLjÃvo¹î:¤ffÙ±Û‰Æ9¢†aõ\©*`š¸R×qkuoï¦Cw&ªñslçr9är¹æÛ5#FÞ¤QK²bÕ.Îÿ£Zé¹#¬tñ\î-“Ü k+­îÓîñîú˜ûµ»ÕO½n ¦eû9Æ3™ 4MÃêêªÿ{;Eý@Bm¿kV‚s2·êvCªÝiæÜºÌYÏp®S"yD¬æÛ¼ŽÝÀ¤ª h:7—¶Wÿà}ï«5©r‡ªóÒU !柼Ãb[õ§ çW¡œyý ^=újOfÏÖr¹lWª777íÛK¥ …> …BˆÇãX[[ʇ%ŸÏã;7|øçXoÌ*¸‡" ÕØâ[ÌÁq|¹šIÛoücÜý ¿P»1« ícáÅ©µë @ÌUlVFLqòïUÞ—,ðJäì3÷ǬÅ.Ú Ÿl·?ŸXô¨tÿýxêßÀ­Ï>‹_¹æ\*ž¹øâÖ Ì8çó‰ù\°ÈwüÛ"Y]__o)Ö2P[ éÞ3À·>w ëìg^ ?Çx*•ÿ÷ªæaÕ­Dâ)¡¶‡¶(þ»Ý†Ì=ºSE})8{CÝÓ‘ZöŠÅ€ŒcEnñ$¾{äu™¤ÿæ:y–°8pñxìapòäI¨O‰DP*•z{ÖœG ˆù_ÿèGoç¯Ý5ÕN‘Ç· op8òÏÿ9’¯ÿ+\|÷'¬Ä"ë`ÃL/8×·­±Åx¿­Ïšª!“É ™LÚÛt¤iÖö†a}±Wç„Ú{û‰Rú§­äÔï½Ô5¿Æ¶èujš¬Šy˜2€DÎZ )‘žø$ð;oöòÏI>ä×ÉêÀó±‡É9\W$¨bQ¡uÔçÃÊ)Ô.n+À;Wì^\ll HãÔ^¼èÚÚJ¥RÓÖšvŠr¹ÜòØ /¼]×±´´dÝÐdØJ‡&†c[,Œ“H8ºuR¨ÍŸ€[w·ºVmû 3a%¿Æx2™ýjÀbWEt¿Ò òy«Õ=%™ôåÂF~6ö„5ÖµÀˆñîαôÑh´nE2MӋź÷^^¾÷××äGÆ}¹4eÆßNŽåη:…Ÿø™ŸñúO@7öï‘®ëxä÷Á§>ö©æw¨Î“ BK5 —ßbÛ0 äóy?~¼¶VXh¯Óܹ$˜¬R¿Å¸Ø»zdóVuÔVïU` ûÕ¼S1œW ïÕ4®?DžîÃÚÊÂÂæççQ(P.— …p×]wuÿ͆Áè:®®®ˆFä¥ãÛÉQðþ?o;pñò¡ÆxLÓD*•ÂçÿÙçqñÏ\ì>Èd•6ÎØÎf³H‹¡êYT·¸Ð­ÅVœÃ›áWõiÜ1>²d5 «WU >Õg"›µ†øÊrýðÞ žgêGž'¬ÍV)‹D"¸ï¾û쥶{ßdÌøÁðñ,‡‡Ñx$¾ÑrXõ7/¼€e¦1iŒ÷(›ÍBUUÜ ßPߊnšÖ^tŽíiˆ:ñ2¶ À¦iVe>êºy`.¬®¶OV‰ºäeŒkšY–!;–5XÃæ60œ5=L³6¼×½ÞA2É5ÆÀ󄵕P(4´‰aÃÿ@ `hñ]iÂ'¿fÞ ±ˆÈÆÆ0ƒZªiVÏ*ç ÑŒ#¶3™ ­iŸ?üR¶sÏ*ÑŒ#Ƈ޻ª¡¶Ͱ¶œÉç­ï]·<M–fGÁXø6a&]×G?™›hÜ# Ã`ŒÓÔ2M™L««Õ±_¬–p±]ÍÊ ÷J¥‰¡iLÓ´ZŠÁZ$æëŸ³*ÌLV)FÒ»ªÁšß=è”QÓ´†øærµ9¨Þë9&¬DP<|ò{Þãõiy"•J!‘HX•VËz>oä~ª4aòù<’ɤ5X‚5´q6g & €¡õ®f`5Ü'0œ=‡5 H¥¬žTŽfð•©HX;r¿vç^ŸÑpÍÁÞ?lîóŸÇÉ&sQˆ‚NôFÙ‹ÓH°VHU9ü—&“=wuV%Ü0¬a‡¬ ¢á˴뺵‡ÜE€&”®ëPÅ6«°â9—k>wŽhiš6œúŠ `ƒ-ªdõ¿K“UŸ |ªë:~øê«9‰‚G 1 lþÐy}6Dcgšf}°†QM¨|>÷DÞS·Ç6… †Q`hšfÍÏî‡`ÖÔ~éº5ìwfÆL¡§„µP(4ü~àÀ,--5ó ]×qËùóLX)°žº÷^œ¹ñF¯Oƒhìr¹œ5×Ïéß¿êõiõ-ŸÏãΧî´öØ»Jaš&LÓìo8°k²$zßSU,Â73cý_UãÇÙ4AºJX‹Å"î¸ãÌÏÏÛ·išfo,¼¾¾Žùùy¬­­y}= Ã@øÕWÙÅOÁ¢Ã*¸@×qQ$âõ]CªiÀ–Ï{}ZD} D^|ñÅÖúD“ÏçëGÄtËDmNw/9¦èMµ~__·/c#ÐÄé*aM§ÓˆD"8|ø°}ÛÁƒíÛî¿ÿ~,,,`iiÉëëiðܱc8·c‡×§A4\&ìÆÐ / |Ç^ŸÑX‰yP’hŒ“ÏÞìõ©õEÓ4«2¯ä|nð'$ò™†)Ýšƒ•¨öšgF­75æÂe¬cÂZ(P*•°ÿ~Dª½8årÅbñx¡PÇQ.—}74øê'°=ü‰ˆüÄ1ã© .à‚K4uø|>_›¥ëÀÜð™{‰ŸšLù|ñâµ!‹Dc7Êô"kñ±~Fï&ìM ˆŽ k±XD4µSÀ 8uAç<î'7•ËLÁ#–r×4<þúëµ^&¢ ËЭyPö†a%«««Àßü¨µÕÑ„1 {Ê{ð–.µn`OŒòÞ3±p'¹\mÿm œŽû°†B!”ËåºÛŠÅ""‘ˆo“T§­gÏrÁ% žê ’ß|Ó›ðoÄÏy}>Dã4űÊd.g õ2+Y]÷ú$‰z§ë:ö]ºxËða.CÁÓ÷üÕn:HS)«ñruÕëˤéØÃ‰DP,Q,XÃ5MC4­»Ÿ ¼sçN¯¯©ÎÚ›ÞÄ„•‚§ÚÃúÂo` ‡¼Ó4ÐQUàV¦iÀµ2°ö­ä`š@Oþ÷'qË©[€G~ƒC)t]ïm;›9tÞ¦Ì4­•$ÉZP‰£Í«ck$A4Åüü<âñ8 …Êå²]Y(—Ë8zô(8€h4Šp8ìõ5Õy%aSð ²Ñ$ÊÀÞ$þ±/?u¥š°¾þMØJV9Š’&Ôu^‡ò<ŽK¯PXg¡@2 £ûéKbâvù­i±˜µ5 ·§ ¼®V ^\\„ªªX_·ÆZ9`:t轊ðââ¢××C4N{}Dc¦R¤„_ºò—j·_÷—ýíËGä#[OlÅIçbKH=ï½*Ãj„lE×­žU&«S£c+`ÍcÝ¿ÓcñxñxÜw=«€5ü€(Èã45ªûRþÙ÷ÿ }óGk·¿ëRîYIoçs;Çþ-ðé{½>¢¡ëiÁ¥<¬ÊV±¦Y[hSþ¦FW=¬í„ÃᾓU±eN+Åb±íñNÎ?ú(·û ÏŒ:¾‘ÍâÝ_þr‹ ÁÈc\0`ÍÛ°º±ZßòÎ=4c‹mX½Oö®?~ýLc3Î×u½»áÀ&¬é,.2Y2]õ°V`=z°gÏž†E—z±¶¶†¥¥%{õáh4ŠÅÅE{ÕáR©„t:m/ô¤ª*²Ùlϯó®ßú-üe<>¾¿&Æß0 ”®½7x}Á4uÆãB@0dkb“,Ëömœ·JÃ4ö؆U™ýTˆ‹-ÑXxã¦iv׸ž5b¦]n+Iü¬L¡®zX—––0??ƒâàÁƒ˜ŸŸÇÒÒR_/X.—±´´„……躎‡zȾM8pàÂá°}¼X,öýzDã4Öø6 |熼¾dš2ž”áÕ±:°@3ÉôúÏAâUýäÅϽˆŸºÑë˧)àUŒëºÞy««¬ç”Tj¢«„õàÁƒˆÇãÐuº®#ãàÁƒ û³v£X,¢\.#^íù …BˆF£ö°ƒR©„B¡€}ûöÙÇãñ¸½àS×4 Ç.½´ûɈ†`lñ †Ñý0¢!kŒÖ±j=GÓªû¯Ê¤ê¾{DC2öØ®ºöÛ×b÷îÝ^_>M¯bÜ0ŒÎ kœâA-uLXÅþª"x`aaìὈF£ ÅœD"‘ºŸ}ûö!6ÜÞÉž={P(êlÒ4Íþ0„ÃaD£Q¬­­ÕÅbÝ_•¢p~yb,ñ °ÂNž[Œ ÕiÚ†aà­O‰à¾ûî³[€z}~Ae/+y`\ñÍ9Úä•qÅ8 Ø q˜¦ $9l’Fkl± :pîÜ3¸ò'Òë˦)2ÖZ6HÕ'.°DMt°jš†{î¹§aÉëƒ"#›Íö´¢Y8n;&^lfL4‰ßtc‰qÅúéjÓy¢![ùmg^;‚·ýÄOx}É4eÆãm\ÊÀÚʆíîÔ…®¶µ) H§Óعs'²Ù,t]·²Ù,vî܉½{÷¢P(x}=6®žJD M@$Æ5pè#VËáÀ&Z¯LÔDW ë ª*–——†Ù:o?pà€××cëj£b¢ ¦ëº×§@4Zšë\ñ‚Æ€O¿ýinÑDÕ²±1úù«¦iýµÐ1a- (•J¸ûî»ÛÞoß¾}(•Ju+y 2Ó49O›‚­Z1Mw<û¬µÕQ@œúÚ)¼÷–÷z}D#Ó²‡5{Ë2ÖBKšæõé’uLX‹ÅbDZîìù«årÙëk""¢ 0(VëŽsçØOrÃçn@øÙöp%ò9Ó4ë{X5s°Z ÃJV‰Þ_€¦FÇE—B¡677½>Ïžq¸$QHV+ý•çúQ ”o*£T) þDD>ÖÐÃê^h)›åveÔQÇÖH$‚r¹Ü°:°›Xp) y}M8\’‚óú(ðªí޲,[­ðŒw §ŸþpãSßðú4ˆFÏ€5 XE}²jšÖTnYFt•°F"¤Óé–Ã}Ëå2–––F{ÚÚ†ˆÕS)ÐGà “U ˜z"„È{ÞãõiŒ&æ¥ê° ëd³;—¨ ]­¼ÿ~”J%ìÝ»kkkvok©TÂÚÚöîÝ‹R©„……¯¯‡ˆˆ‚À°þgš&f·ogÂJóvƒ½JlöHG õ«Ö¨™|X\ôú4itœÃ X½¬ËËËXZZjºuM4Å‚¯zWu]G’C (À¸×0š `Ð2~rÛ6Î_¥`1 ~ðœÞ½Ûë3!½f¨œ»J=è*ajI«Øº¦X,ÚÃ…Ãá0Êå2 …¢Ñ¨××dã>¬dLXi˜¦ ù’K¼> ¢áÒ¼ö$éG¼>¢‘¨«£è¨Ÿ»*°c‰ºÔuÂ*ˆ-nÜ ‹EÌÏÏsu^дÖSĺ%½Œ®sâÝtÝzÑñ!¦*ˆ×Öõú](œ[l%“Ö^éšfýˆß Ãú¥ñ\ÅùHRãk7ã>Ÿ tÔ°A†-{…à·Ýr ç9Qàl^¸Éu(°ìõ4X .­¸î°²ÒdzҴê9a£JœÅûdÒJ˜2ë¶ÅE+áÊd€\®–9ÿ¯ªÖcD’¦ªÖ¦Yœ š,[ŸiÓææ¬çÃýc1çõÖ^G–ku»LÆ:¶¸hÓ4ëü›I$¬ëÒuëqâwÓlêzû„Õ™|:Lç5:“R±—ø·3áçcÖH’D¢÷„Õ0¬¿ùœ{ÙÅð^Ÿ Ñp}õœ×g@4r²,[½«¬wÑ€›°£÷)—³!Ó¬ï™tîm¼¸Xëí¿‹„RôF-±“¤ZB)NQUÚsšfc¯¢$ëëõ·¹oÆ=—=™ì<CQêŸ[–»›ßìµ:qwšˆ$¾—óköÚÍ8Ïç{ßë|?3œ-# z±íÔbõm€9úX'òıçð—üO܆íõ™–€#}Å=šÑÉÝÉ#:Ûš=ÆYWTUëx.Wÿ{*ÕøgÎÜŒ÷¿ÿ©žÎ9° k³ý)uݪ<‹Cšfý!76¬Ûææ¬ûˆß¥öÓN«ã²Ü¼²ÞêöÚ¹{ý×#¿3 c(2bÄ€(ˆœ½Øù¼õãµ#zÜE¯¾»`rŽH¥jÏ«ªµaÙ²\k@#D#D&Së1wö¢Fí1¹\íwY¶~wŽ CÎósp¤Î‘bظ¸n1úA\“µÐŒs$ƒs¨»¦YçØlÚ±s$ƒ{¨»säE³×÷qŽò›G}ïÀñá9 Ük˜‚if ¾ó7/â6¯ÏƒhDìÆF õÛõÊL!wÇ€øînÆ]×õM³F6ëu1Q÷q×Íœù“;amšÖúüÄkKR}‡œ$Õ¯«%ê7…Â1=ú|O/OÖb±-W.‹…B‡Ã¿–aX ©xsÄÿ××kÿ^]­ §–Ð FßÝVäóy«sùÉS"QK ÅS:6ÀzŒøü8sœ‰¡$Õ6a}½–à:‡Œ;O½Uo¹Hš› ÷CßÅó¸G8#ÎO–­Ï¸xŒ,w7JÁÝËœHXp»?¿Eá|-çù9“RAüd¹v¬Sã–ø{‰Æ8q¾†?þtç‹ÐHc<`‘ +ycäõ“oÅŽnÁ¼×JSkÔ1n"×Fꇋ/õ š¿*ÚÝ=Œ¢1\$žâûºU¯"PŸ4ÎÍÕOáÏÙL«ºN¿£»©û¸GIv3jR$¬­~DÇ„µX,bii©ã•Ëå®_´X,"NÛû¹†Ãad³YûCS*•N§í“ªªÈf³]?¿¦i •Q9•åZà1!¥Qu|·¢ëÖw(45Í*EB*8“Ñn;i8~¼ý}š%aâv÷9tC–[¯xßé³Û¬€ì7ïq?W7å†;Ñì&ñl6ô¾Ûç$^kmmt ëXb<`¸ìµ×8¶œÆÆ«ò›h\Æã»Oï®_¸Ýª c$z EêA%ßÅÔ@ñõÓj!PqѺ²Òù+Ë4rÁäö.Ö…B¡®·´™ŸŸG4…®ëx衇F‘vÔL8€p8lï6ivj6\ÒÙÄd•FeñÝlQ±L¦Öº§iíc¼›Ѝ•qÄ8«ñqw¥bÕˆÆ`,± àµë_óúR˽CøNk’èzíw‘°¸§ºÖwj³gq”ÍÖOÝp>·óGL½j m:˜t¿Æ(§tŒ+ÆàµË^Dcµøã÷ÚzÝ'ñ·tþ¾}»5 I¼bAT/‹‹V")êOɤ52K¬O#¦79{6Ec³3ádýjø:ö°ŠýW‡¥P( \.caa€•èîÛ·kkköðƒB¡€Ã‡ÛÇãñ8ÖÖÖìÇôjf¦6/•h”ÆߦiÖm‡ ¾øØBG£6–w »zß÷¿ÏÂ;àÜ P_¦9·@ÅRé"¼øâC=qÖOÎ]|.0[Ú42éž:"Ž9‡MºÛ¡ÄÎîÝZíŽà&z¬òùú^2‘°:“Q1¥DÌ·k6%£ÓðLqçóºwGprNõpÏós¿Ö¨¦tŒ3ÆMÓÄiõt­‡5ŸYïªafµé}î—r.ŠJ“gìsXE …ìÛNž< Àú`ˆ;ÇÔG"{èB7$I²{X5Íja}‡Æañ-8‡½›fëá³DÃ4–7P?Œ, {¿s®†/V·o¶À˜úæ^ÈLüî$4k–tˆ£U%ß9OÝ]É…ÞÀ¶mç‡zýã,¿Kùw.êùƒ{¨$Pû·HÚ½"Q â=I£x[ÍmsþÆîÝÌók6%£›Üʽ0g7Æîklö·Õ”ŽqƸaØó¦=µ4­ïŠŠiÖò]çß.—³z¹UÕzœOÇ4DÖB¡€ùùùº!ˆårÅb±ë!ÀNî¡Ã¥R KKKˆÇã‡Ã( -[.—ë>d­(Šb·\vSP Ë8â»™1°!kŒëºŽñì³_Õ†è‰t¯.í\èkfÆ:.Vèv®Ší\ÛÝ+%¾öÝÓhœ ™9çj5[Ü£™~æn¹+ù—_þ:.¹äÕ¡þ-ÇÛ×|å+í» ‡HĈ‘©(ÖwF³ÕCÅ}Ýsõœõ¨nÞ¿N[Ö s1êÞ¸ë(¡NWq<=«ì‹˜Ìdj ¬XŸ†‚­¯ÖC‡áàÁƒMçÑu«\.Ûϳoß>{¨A»Å›677[~Xž~úi<øàƒ(‹öf1Ĉ½«½ÑuW3¹¢(X\\„®ëÈçóXl2®"‹a½úM–Ïç‘Íf¡ë:dYF:FÒ'ãU×ÖÖpï½÷bëÖ­#{aÇ7œ8qóóóPUº®C­~Ûçr LõæççqìØ±‘¾ÆHËðÐ2 YCʮ޺u*j#"™ûÞ9‡Áº{4Do•{(£ó{¯Ó‚i@½R¢7vÜ–––päÈlß¾}$Ï?êò;þJ•“0ºy¬Î­¾œ«¹;ã¤ß=Íi´Dù],[®à;¨qÔÁÔö_íqcw÷è wÇl:]›.@“EÔÁ{-¿=ÙÖ¦X,âÀ…B8|øpÃЃVÚ-­}ýõ×ã¶Ûn«cŸÍ6n'A™ÕL_$Ÿ†a ‹!‘HÀ4Í– ZµiÖ0 ¤R)lll@–e˜¦‰ÙÙÙºžo/‰–Ä£GŽäùGß°k×.û‹àÓŸþ4TUµ·~aŒ“Óòò2æçG·aÆÈËp€ @‹¯Šy~¢ç*›­Í«sÎms&îa°Ýö`½çjaa{öìI>Žò¿ñ^¹ðíPÕŸ¯˜**øªZ«Ìó{a2‰ò{TÉê¸êຮ¢¼r.µÛ†aXe£íÑn1;£&S¿upOÖt:h4Šýû÷7Û¹s'k˜‚øp8ÿÝ MëîËÚ“eÙN<»a†ýÀšk¹²²Òõã'ݸâ»öz^_1M›‘Ǹ˜§øÄ8·cú(?>¢áȽ2©iÖ¾ƒÄ ¥")=öóeÂx¥ü>úÎ]ü €þV±×£HP»mÌ W¥®ƒ¢Ãb2box1Íu‚¶i¥1{ªiJ¥TUm+F‡FëV$Ó4 ±çzˆVG¶ÀôÇ0Œ†aÁªªÚ½¨íˆ¡ª333H$PUÕ¾-èÆß"ùg…–Æm\1o<ñ.zÏ{¼¾ä&ƒÚ>Ç@m`³y{‚»a‰Ÿ[ÿ[lŸ9ƒ×¾Òóù9WBÕu«R?%_­4$ã,¿wŸÞ d,¢i—ÍÖíD/.²ÞNÍ=a7®&†š.,,`~~Þ^~; á®»îêéuF¸zöH¸“Ãqë&©ì&Y666ìù®™L†a`}}ÝC‚Gi\ñ ÔVÍã°/§qÆø¡®üä'=½^Ñêoµ¤SÓê+UbõTšlc‹í—¶ãÉ7¾‡zxH6[+ï .œMýgù½ûôn{´Œ[*ÕzQ/¢fºNXÁ-&e7 øN{¶.,,tÜË)‰à¾ûî³?Xý¬FœÏOÖð¯{ eWë—,Ëu +åóyär¹®N‰­ªªv‚šÉdËå°ðqãŠïºÈ‰Æe\1§¶m{Wd.WÛÖC|¾Ä– ¦ ¦±Åöó¯á¯Þõ(¾ v¿Ç½¢tBNÔɸb\Ó4\|ñÅ-Ö€Wi:&¬;wîľ}ûnï·Ò-÷ÒÛ½pî%7)¼NX;«ývÃ4Mär¹†k’&é ±Aâ[{ ùÑ0b|•èû%VéØêOÍ #¶Ïv±J½iZ (é4c‘Æk1.C¶ÏsôPÂÔ«Ž k8îØãG\ˆf0š¦aË–-ö着buuº®7=¶îèÎN$Ð4 333v’kšfÝ}hpšÆVJ ¶qL!Èd¬¤`cêS±÷”Fî?mƒù!œ›c¢J“kÛùm€ –VV`J2R©ÉýHþáÉ*Á£Æ…h£ª**•JÏÇœ·¯¬¬À0 †I’?wuÜØ›hªi𕤮®ÖT&©4V{#0îî<Ÿcuu²FŠ ¦ib÷K»­_ª=¬s1«¬eýœúÑuÂZ,í †#‘HËÄ‹!\äOέmh¸&mQ1¢^=ñéOããßùÎОϽ¿‹&òR«:”aX=þ\-•&™®ëPflןù¼u;§1Q¿.èæNš¦aïÞ½(‹…j;â-,,`ÿþýˆF£…Bx衇¼¾¢© iLX)ض<ü0Bo}ëÀÏcšVj,fJØØàg‡ü‹Ã€)(ŒEê¬( 2Na¢Át•°8pªªâþûï¯Û<8‰ cyy{öìÁ¡C‡¼¾¢©À" º 77QºöÚŸgvÖúÿÆ·€"0Z,ïžJYÉ*V Ã@æw"‘`…ÓqH°Ø‡éî»ïn{¿»ï¾wÜqâñx]RKDÃÇ ÝÖo} ¥Ûnë뱺nUŽ$ 8~Üë+!ªgFÃt±¤¡ ¸öÛ×â†ož2å.„JëØÃZ,‡’P1 XÇOž<éõ5Ñ„Ûþ䓨ñŽwôü8M³†Uša%òŒs›7]·ÖÕU¯ÏŠh8.{ì2+/0 ÎǦ¡è˜°†B!lnn6ܾ¼¼ŒH$âõùÓˆ˜¦‰T*…íÛ·cË–-˜››ú‚Zº®#‹Õýd2™ºãÎßÝ Ã@,C^Ìæ¯Êçó˜Å–-[033ƒ\.çõŸ“ˆzTºöÚ¾m“$«—ŠÃÏhR¤¬]?X©§ÀøÜ ŸÃ¶ÿã²üš×§BÑ1aD"(—Ë( mï'Ž;{]irÅb1H’„Ó§O£R© ™L"‹ 5i5«] ëëëX__ÇÊÊ òù¼ý¦i¶}½\.Ó4ëVÃ0J¥°ººŠJ¥‚ d³Y®^M4I4 §¯¼²ë»†U鬅•˜¬’Ÿ¹¿66¸M¹e;´ís^ŸDÇ9¬‘H‘HKKKˆD"MÒr¹Œ¥¥%D£Qöº@>Ÿ‡$IXtlN¨ª*‘Íf¡ªªLš¦‰d2‰Du­ò|>o÷hŠÛu]‡®ë0 ÃZê\Qêž[Ûà˜]ŽåËçóX__Çìì¬='Hü_ôÌH’„•••®Ÿ“ˆ|À0ðäK/áÆ.ºœr9k»î¥J“Â4M(ŠÓ´†®³…‚æg¾ú30+W"}Àë3¡ èjÖýû÷c~~{÷îE<¯KJ=еµ5û~4ùDRé–H$Éd Ë2r¹Ö××!Iæææ IdYF&“ÁzuÕˆX,f' Ùl«««H§Ó˜µ\Ã0†ýª]¬(¤ëºýš‰Dù|étÚ~ìÌÌ ‰TUíêùˆÈGdãO;t;årÖÏÆ‡SÒd‘$ ¹œ·\½š‚æÃoþ0d™14<]%¬‘H‡ÆÁƒ±´´Ôp<cß¾}\x±XûãŠÒØ…Ë®ù› ÜK꺵)¡["Ññ[S’$»§2™LÚIm2™„¦i¬dS,Ù¯ª*òùL&Ã0°¾¾Þr£v"òUÅ©l¶í]D²º¾Îd•&¿–(ˆ.{í2äóVÕ’hºJXkàýû÷caaÅbѾ½Õ0aêQ?kÙ'“½7Í*JÇ×’e¹éœOMÓš&}ÎÕMÓ´“WI’:&ž²,× CŠ;=.—ËAQd«•ZÃ0ìÀJ–Źf2är9¬p×j¢‰ÑnÞ¹XU•É*M"]סª*·ú Àzìâç‘Íî`ÂJCÓqÑ%·P(„h4jÿ0Y 1ÄÖYa4M™LÉj‚ì<&z9eY¶ç¾.ö9¡L×õŽ+ƒæóyȲl/Ö´¾¾nŸ³~ì&±VK4QÚˆ$®ªJ“MQTTÛW‰åè§â(.‚úâç¼> ®{XizH’„õõuÌÍÍÙÉ£®ëH&“H&“Èd20MÓ^IØ0 ,..B’$ÌÎÎ"U]®S×u,..Ú=®Íhš†-[¶Ø¿«ªŠUÇftÍŽ‹y«Nªª"“ÉàøñãÐ4 333ö¹›¦iÏ«%¢ÉÇyQ4éÄnkìe¥ ¹póB»àB|èG^÷úT(@Ö"Oñxœs}Ľ—~Æø¦ ó"Æ;íàE4 ãˆmMcÂJÞuŒä#Á¾m_òú2)`ŸoØÿ+èßtÈqU66˜¬’¿°ü¦ cŒ“_ùnH0` ÷™ŸŸG¡P@¹\F(Â]wÝåõiV»²@ÌßÊÐ,Pä´Úê! Y½¦Éï­îßMÓL&!ITU…R³Ëå ªªØªªŠ|>UU!˲}?Y–aš¦—uOLe|ÓTaŒSP1¶)èãäGž&¬Ñh´i[$Á}÷݇b±hß/ðºYTĽJf“LÅõÜîßÛ½\“e9E*8÷cuÎm5MÓ:,ÚiÃø¦ cŒSP1¶)èã4I|Ùà X›óCâo’$AÓ4¨ª Ó4í‚OôœŠ„wçªvÂø¦ cŒSP1¶)èãä7¾›ÃJþ±eË–ºwâ™L&¡iR©æææìÖd2 ]בJ¥J¥ì¤–ˆˆˆˆˆ¨¾ía%o5ÛÒFɧišX]]…aP±XÌ>¶±±a÷¸Š9«ªªÖ%®ëÜ\‘ˆˆˆˆˆÚ`ÂJ™››ƒªªÐ4ÍNL…Gј°RßdY¶{XpØ/ Vˆ,Ëu+ ]"""""""_bÂJ t]G,³÷Q4MC,ƒ®ëÐu½aÕàL&ƒ\.×ôQ¯˜°R±§j>Ÿ¯»=ŸÏC×u˜¦Y·ï*¤R)˜¦‰d2ÙpŒˆˆˆˆˆ¨LX©)EQ’N]×›®ü›J¥+++^Ÿ6VjIUU»—5ŸÏ7]8•J!ŸÏcqqÑëÓ%""""¢€á*Á>áš.ZG䉺˜& (€$†aý4#ËÖiZsÿÞêþN‰DÙl‰Dš¦!™LÖõºŠÛ’É$R©VWW½þ3Q€0aõ‰nVðK‘°¶zœªÖTMkü½Õýİ`1'Õ=XQ{ðìì,òù<‰„×J""""" &¬>Ñ͈Úd²þwU­%³­Èrýs»ï$‘H •J5,I’ýï••Äb±¦÷#"""""êç°R[‰D¢«žSEQN§177À.¼eËû'‹y})DDDDD4aØÃJ TUµ{JEA¥R±­¯¯×ÝÏ)N#N@ÝcˆˆˆˆˆˆúáûÖb±ˆR©äõi㛂Ž1NAÆø¦ c|“_ø¶‡µT*!N£X,°zó²Ù¬×§E4Œo :Æ8㛂ŒñM~ãÛÖ C×u<ôÐC(‹XZZòú´ˆ†‚ñMAǧ c|S1¾Éo|™°–J% ìÛ· …ÇëæOM*Æ7cœ‚ŒñMAÆø&?òeÂzòäI@$±o‹D"G_5-­\kkköp” a|·W,±¶¶æõiŒEP?ËŒñö‚ú¾»± Ÿ>,¿'㻽 ¾ïn~+¿}9‡µÝ‡¢\.# 5Üþâ‹/â‘GÁ¡C‡póÍ7{} #uäÈìÙ³ÇëÓ¹¯ýëØÜÜD¹\êó¾ð øÆ7¾_|Ñ“ëê'¾Åy:toyË[𖷼œs‡cÇŽáØ±c‡Ã^ŸÊÈê³|ìØ1<óÌ3ž]ËðöX†æé§ŸÆc=†sçÎyr]ýÄ7Ëïàuù]*•<ù;²ünå÷`D¼×òÛ— k»?ÎææfÓË\€Ó§O㡇ú×o¶oߎ£Gz}cQ*•†þ~>óÌ3øîw¿‹+®¸Â“kê'¾`Û¶mx衇°cÇ\wÝužœû8lnnÀTÄø¨>˺®ã?øg×Å2¼=–áƒyüñÇñâ‹/âïx‡'×ÔO|³üžQ—ßÏ>û¬' +ËïöX~FÔÁ{-¿}™°:‡!¸µúð~ðƒÄ?øA¯O¨£~â:äõ©u…e8Y?ñÍò›&Ëoò#_Îaݹs'€úa ^ 6Æ7cœ‚ŒñMAÆø&?òe‡Fë&îkš†X,æõ© ŒñMAǧ c|S1¾É¶T*•Š×'ÑL±XÄüü<Âá°=É{yy¹åü>¢IÂø¦ cŒS1¾)Èßä7¾MXkâ·XR9z}:DCÅø¦ cŒS1¾)Èßä'¾NX‰ˆˆˆˆˆhzùrë4) Mo/‹m÷Âôø8‹Å¶›r-~ºNjÔ*¾à¼ï£Œo¿]+ÕU|ws|œ‰ñIºNª7 ñÍò{zMC|‹s Dù]!Ïœö±UvïÞ]Ù½{w哟üäPÓ£>Z¹ýöÛís¹ýöÛ+>úèP®ÅO×IÍ5‹oq{Þ÷QÆ·ß®•"¾»9>NƒÄø$]'5 z|³üžnAïJ%xå7{X=P*• iÒétñ C×u<ôÐC(‹XZZÚñqšŸŸG4µÏ%Ö]ó ×â§ë¤zíâÎû>ÊøöÛµRÍ(ã»›ãã4HŒOÒuRÍ´Ä7Ëïé4-ñ °üö,õŸb÷Þ{oåŸø„Ý:!ˆg Èg?ûÙÊí·ß>”ããtäÈ‘ÊîÝ»+gΜiz}ƒ\‹Ÿ®“µŠïJe°öÓû>Êøîæ8ygTñÝÍñq$Æ'é:©Þ4Ä7Ëïé5 ñ]©³üf«âñ8–——±°°PwûÉ“'‘Hľ-‰ØãÃ=>N‘H¤a tq~¡Ph kñÓuR£Vñ Ã~zßGßÝ'ïŒ*¾»9>NƒÄø$]'Õ›†øfù=½¦!¾Åë­üfÂê#íÞìr¹<ðñq …BuË —J%,--!#t-Ç÷ÍuRo‚ò¾2¾ýöY¦îé}$Æ;}VýtÔ½ Ä7Ëoj&Hï{Ëo&¬>ÒîÞÜÜø¸W×´´´„;î¸Ñhû÷ïøZ_xáß]'u'hïû(âÛ¯Ÿeê,ˆï{?1Þé³êÇë¤Î‚ß,¿É)ˆï{Êï Çð÷¢.9»×ÝÂáðÀÇÇ­X,âÀ…B8|øpÃð~¯%âСC¾¹Nê^Þ÷QÅ·?ËÔ ½ïýÆx§Ïªß®“º¤øfùMnA{߃V~³‡ÕGvîÜ  ~XB©T²`Ðãã–N§íqôîäZüvÔ½ ½ï£ŠïnŽ“?í}ï7Æ'í:©;AŠo–ßä´÷=hå7V ‡ÃˆF£X[[³oÓ4 ±Xl(ÇÇIÓ4”J%¨ªŠB¡P÷3èµøé:©7AyßGßÝ' Òû>HŒOÒuR÷‚ß,¿©™ ½ïA,¿·T*•ÊØÿ’( ˜ŸŸ‡®ëömÅbóóó‡Ã(—Ë…Bu+} z|\–––pðàÁ¦ÇÄõr-~¹Nj­Y|ÁxßGßÝ'o"¾»9>.ƒÆø¤\'5äøfùMAŽo ˜å7V*—Ë(‹P·Ê×°ŽûÉ ×2I×Iõ¦å}Ÿ¦Ï2ÕLÓû>Ègu’®“j¦%¾§å:©Þ4½ï“T~3a%""""""_âV""""""ò%&¬DDDDDDäKLX‰ˆˆˆˆˆÈ—˜°‘/1a%""""""_bÂJDDDDDD¾Ä„•ˆˆˆˆˆˆ|‰ +ùV""""""ò%&¬DDDDDDäKLX‰ˆˆˆˆˆÈ—˜°‘/1a%""""""_bÂJDDDDDD¾Ä„•ˆˆˆˆˆˆ|‰ +ùV2Mš¦y}D#Å8§i5ªØçgŠÆ1GÓ„ñî&¬>¤ë:b±˜×§A4RŒsšV£Š}~¦hÜÜ1—Ëå`†×§E4Îxg¬V""""X>Ÿg%ž¦c}¼.ôúÈ"Zj$I‚¢(-˲ UU¡i’ɤ}\Ó4{˜B"‘¨{ŽL&ƒt:\.Ó4¡( ‰DÝk¦Óé¾ïOÔ­Vqnš¦o’$!™LB’$är9;æ…|>Ó4íøï%^Ûä•ve|¿ñŸÉd°¸¸ˆl6Ûô{Ã4Md³Ù†ï¢A´Šeq{>Ÿ¨ªÚ£â6–Ù4)šÅ{·±ÞªlÛ½b«ÌÎÎ"“ɰϹ¹¹ºã™LÙl€UY™››³?$Ífíǘ¦‰ÙÙYär¹†ã¦iÂ4MÌÍÍ!‹A×uû¸xý~îOÔVqnš&fffì—|>o¹1M³!ÖÜ¿÷¯ŒmòB»2~øÏf³H¥RM[úMÓD,³+BDÃЩ¾âÖ,FYfÓ¤è%Þݱޮl÷gl÷ BžZYY©H’T9~ü¸}["‘¨ˆ·æøñãuÇeY®¨ªZw|cc£á9•••ûwEQ*ÉdÒþ=NÛÏ×Ïý‰:içëëëgQtúô銪ª•ãÇ7ÄÿÆÆF@åôéÓöý{‰WÆ6[§2~øPI§ÓöcÅs>}º!¶‰Õ)–+•JEUÕÊúúºý»;FÅm,³Éï:Å{§XoW¶‹û3¶»ÇV†UU!˲}›s¨o>Ÿo8žH$ìkšY–í•Ëœ¿‹Vu—$É’ÐJ¯÷'j§]œ‹ÛR©4Mƒ$IX__‡,ËeŠ¢Ø# òù|Ý¡—xelÓ8u*ãçpaA´Ò;¿+ˆÕ)–[i£,³Éïú‰wg¬·+ÛÝ÷Û0aõ˜˜—ÚŠiš/æ)9š}Ay¥]œË²ŒõõuÀÜܶlÙ‚T*eO$uvÆ6M’Neü(â_Q¤Óéºç!T§X& ’Aã½SÙN½aÂê1Y–ëzBú$U’¤¶I«¢(v«øY]]E:æ¼%òvq.XYYÁéÓ§±±±|>oÏÃN$ÐuÙl¦i²×ˆ&J§2~ñ¿²²R·h Ñ0tŠe¢ 4Þ;•íÔ&¬K$Ð4ÍnA½¥Î㺮ۓ¶ÅŠc‚s2ALü&ò‹vq._²,× ‹‘e‰DÙl¶«ágD~Ò©ŒUüK’d¯XÉ­h:ŲÀ$–‚ ›xoëÊvê V‰¡[sss˜ÅÌÌLÃø••Äb1Äb1ÌÎÎÖ•’L&ƒÙÙY{…àÕÕU¯/ÈÖ.ÎÅœ¼™™Äb1ÌÌÌØ•tA4̰w•&M§2~”ñŸH$H$Ø€ICÑ)–«N’ÉdØ‹D¯S¼wŠõnÊvêÞ–J¥Rñú$È:ÐjJ]×!˲=ž^ìÑäLJ‹,‰aÂD~Ó*ÎØ£šËårÈf³8~ü¸×—@Ô—v±0þir´‹eg…ó])º©Ÿ·‹õve;u «Ï†™™lll@Q†X,†t:Íá‘xb(Íìì,’É$7Ѧ©Âø'""â`ßs Þ²e fgg‘H$˜¬ÒTÐuÛ·o‡,ËŒyš:Œ"""ö°‘O]èõ K©TÂ7¿ùM¼å-oñúTFîé§ŸÆõ×_ïõiŒÜ /¼€K.¹—\rÉПû•W^Á\€[o½ÕëËìÚ<€«®ºÊëÓ¹W^y¯¼ò ?ËCxîIZÜexðŒº …B3'ŒåwðŒºüþÀ>€P(äõev…åwðø­üLÂúÿðÈçóóå5ˆ|·Ýv›×§1rº®cÇŽ¸îºë†þÜÏ<ó 677'*aý“?ù“‰:ß~=óÌ38uê?ËCxîIJXY†ϨËp/,¿ƒgÔå÷{Þóž‰IXY~ßÊïÀ$¬W]uEÁ‚ק2ÓpkkkˆD"ˆD"CîB¡€£Gz}‰=¹úê«§â}/‹(‹ˆÇã^ŸÊXŒê=-‹^_ZOX†Ëð–ßÁ4Êò;{}y]cù<~+¿“°N“iø ˜š/<ª7ªÒ¦å³Lõ¦å}g>}X~SÐMËûî·ò›«‘/1a%""""""_â`ò|>]בÏ窪BUÕ¡.cš&t]ÇÑ£GñýïßëK¦ e À®ë0M³é}$I²÷3M†aØ÷Q¥î~€÷@-Vï–išøú׿îõŸ‰ˆˆˆh`LX©)MÓì+ŠbW´ÅO'y ;íÄ¥—^Ь‘…¢(0 £®b®ë:E®ëø™wü >öòǰï­û°˜X$à¾kîÃÚÈår0 ²,ÛÝqn>Rú¶žÝŠk^¾Û¶mÃÙ­gñøåã+×|§¶j8'ñ˜+¯¼?ûÜÏâoüo½þ3¬8ÉŸ,ËP¥§•㜱êÔ.VE"²,C’$û<œqÖŽ8G‘t6{ qn’$Ù‰¨ø¿8?MÓìû†l6Ûðâ<ʉërŸ«óü_ýõ!¾KDD4†aà{ßûž×§Aä+LX[1ähÕ€ @°Xý=W=– Uÿí®«ýŸ‚³"»íü6l;¿ §¯ šâ{ÂÝ*Ênç1w£|³FPñ|íÊþ_|ÑëË&ò&¬­ä`UÚW`%Y@cB*Ê%µ„ÕÙÙcÂJ˜Ò°’5TkÖm&L<ú¥GQü«"N:…‡·<Œ³[ÏZ5ÍZ…]Wð¶Õ·áË·~•·Updzw`ëØŠ³¿roÞòfœÝzß}ÓwQ¼¤ˆG¯{[b[pãS7âý…÷ã±ÿí1Ü Þ€íoGø¯Â=d`Û-Û¬ë2`%„ÉÚu¼ïĸ³þïa;r;pkîV`£zÿdõoð $‡p#n´’O á±vr/V µÖs:©°D®£ÔnWÛµ8IÕç•kïå«…W§6aýû/þ=Âÿ& :>sãg ë:n¸âüóçþ9Þ}ß»1ûïf~5ÜØ@3 ëoºnýzZ9 Àïz÷^p¯»€•P& ¤ÓéžzKÿ ð“ÞQ%wΞO"êŸ%!*éâ;ÌÍ=T¾×áîâµ®ºê*|øÃöú²É#¢QÓ­Õˆ'«ù|¾iƒ§s‹H8Ý#jÄèÀŠéD"aß_¤t~~Þë?1‘¯0aup~qÊI¹±×Æ‘ð¨õH *{T YؽY?ü0.øæø/{ÿ 8ö~íÇ ³/ÍâÚ·]‹k¿}míy´ês‹Dwø>V{Þ߯3ì‘÷`¾§áoĵçü¤ã¼D™® Ö›Ö®\•`%Þé&·w*%×ãdÔ’ NœçÖk^!zp§œøb~æ«Ï`×c»ðBü(¿¦@UUÜ)ßÙpߥüòù<Š•<ê±Z+ñÎKwâÍ©7CÓ4¨ªŠŸú?…÷~ë½Øûõ½ÖßZ^¾ùe\ºïRë ç`5:¬‹@×ï‰,ËH§ÓVï¯ëýCn¢±(ÙÝswM4²tKsYû°R#ç(2Á—îdO×õ†SÄ¥øŽrŽ3M³á9ÄmÉd+++¶n§”ù•ȃœŸ1úMt¤I’Ó4íq͈†KAŒˆ3 Åb?ÿó?ßÓyM}ªë:r¹œýFüò‰_Æ×®û²ålÝ}œ-y‰DÉdw5cS2‘•²ÐRš]ÐÊ?%cŸºKÊR“Àú‚Ãg;¨kaì%¡kv_ÓÜ'ËÖO÷Pqbõ·†õÓî¶d²ñµR)@’¬÷sj ªÖS&Óxn¿ýÛ=ü±&W.—C.—C2™Ä¿ÙùopÍÉkÚ6ˆÂ(N#—ËA–e{ø,`U0$IjþE_­X^ª_Z»mÕu V+zoÃñêïsÕûmTï·^½O¾zL¢àºó×áš­×àcw~ÌŠÖwÞÁúÝQ6܈q+ªk<‰úÞMç4™VÜí¹ÝŒ¨q`êe$yÊ0 är¹†ÛDO|?ÓCˆ¨‘H8Ýy‡{ŒØuC$Šî5?Ä1ñ"ñ\]]É(w‚Ü©LXMÓD,ƒªªØØØ¨½éɶ²YÈºŽ•Dæâ¢=¿Î¹ª§ ‚â¾ßø „/¹ÄªÔŠÞDè%OÍzçæ¬$ËY¸;ÿ-Ë ¡iZ·9“¾f‰£“¢ëëèY:Ý<¹l§Ùuvû8çÿû1…s’U Ösߊ®¹œõÞ.v¨)ŠDRô’;04­šDºž#—³~€Z¬þ´ ȻۿŽh¨pÇ7Ðú½M§­câ<»)Pˆ}øóÇ€;¯”ÿÔ9Î¥ýg¬çywŸ¬ìM.1dÝ@m˜·xÛSÕÛ×­JÇU¿{¶ÿ÷íxþ²çQº¶„“לķN| ïxæX[ZÃE‘‹ðsåŸÃíÏÜŽwâÝØó‰=u/eO©²OøÇ~¦fˆÇ‘¯‰E}Úõ| J =2b§ÀžR¢Á‰Ï—¦iÈçóögºYÞá¤( ’Éd >ƒS—°Šd5™L¶Ökš•UQ‘Ïå e³HŠJ±èlÖë™ÉÔnoVánL««èÙ8“²}Òp躎l6[ß´žiÖÐkÀ–­ëÀÌ †ý6Köœ…¥ª6Îí&î× Yîœ<·z-ÀŠÕ^>?õ£ÖQ·œóªER*Â\Ì¿–§®z çvœÃ™?:ƒW_}A–el{ï6<üêÃøƒ™?¨-îò‹µÑ ×à¤ä”57wÕ½ô5¸Æë«'™¦i·œs93™LÓÑ]ÎÕÑš-€%zIÅk¨ªZ7pee…ó6i¬œñ;)‹&ŠÆ1òÀ½KP[“¦n÷Žjç—$Iuº)3U «a˜››kž¬êþëyàâß´zCWVê+º’dõÚÖ1ðz‹Dï’$5&šýö`õI×uÌÍÍauuµ1YÝõûlcÒ(bDãI"aýd³Ö±D¢1í'‰d¥†¼"±«¹wZ,Nƒ5\VlÇ%†ÏŠùÖîÅ÷«3 ë»×ñy|Þš¯'«.’€R½¯áXô=26~º+#Ô=Ó4‘Édì­QÄp[wüär9ÌÎÎbuuµnË0÷âDÎÞØf À¤$L"nEÉhØ‹?.öS1çç|m1•z7U k*•jLVMÓJ<ÿàZ࢒µèÆFç^Ä~{~ˆ†É±:®hY]]­ ñ3W"V‹{ô@³9ÑŠÒ_O?‘߈ÄV:ê·Ùrî³-ØR`Í· 1„7¦óÀEO— †fÀÌ[½Rb%Rö@ =—€ib×_þ%›Â¿k>ŸG&“A:î¸Ê­˜J¥ìÅô‚4DÐ÷œÓ¿œš­bšµic‰¦˜r'’RY–‘H$êb^4ܤR),..Ž´ÁÏ™, Î}xI5{¾©+ÿåtíÕpŸ›ðÔûßßÓS{š°‹E@$iy< !åõD€='šf3sÀŸnãüš±Åw@ÒjM·+$†Q‹ïdø“_víb|ÓP» ïZxîÒçðÈÍju>,F ýÂ`@ã–Xm¶ÕÒ4 ©T ‰D®À c‰‰¥iÍÁœ«Â;ÿíšêé!Ä ¨9޽ áü¶m¿Å¶išH¥R0M³§¡Š¢`cc£«ûR•¨t;+ß²Ü8:)—«-º'î뮜7‹éV=lÎÛǰ«ßb\£ìQ[;ˆ…PEf"‘ZòšÏçíÅÄöI€õyóHÅ|í‰oÒ´Úš!¢séÙb•ÎÏ€ Ëõ"îx—e|çw~ÏWã¯[ž$¬Åbét¥’µša8F6›µ?4¥R étÚþ0©ªÚq‚q7ê*âùÖ×HV¥Jë4\coÑkd8dtÝZ¸+®ÍC%"¯ÊðŽ ) øj¿¼ùËØókA"17(Ne8–sF]×ëG5L"‘@6Û~¬ÝÖabzgåD, Ø,aÍçk÷k:tZ°Å´šg ¼:‚}XýÛbí ±Ú.U5[k¨íJЬQDQšÇt*U»¿sçznÉdïë28ŸÓ#~Œq 6JLì¬Ñ­t:t:mï9?;;k÷pŠ$¶Ùke³Y;ÝqnvœÛ‡·?l oˆý¥+×0QÛCÝ@mšJ¯_!b Š;wq6¶8cßÙ‹/Öà©{>׎² ˜rm*ÌWÏY#–€ÚŽÉ$ $ÛïïP P%¼©ü\ñâ=]®' ëüü|år{÷îÅÒÒ†wÎV‚<:·¸ui¬ñ-ö)Ô`ÏMB&c çuV³°†8 /Êð&Îßs”À_¼õ/pï}÷Öµ²‹Ý3™LÝRþétº«Öx‘¤ŠÖvÑ’ßiXæX‰í¿œ-ãíz05Í*/œ=¢ÎÅ[ígÝÍ"kný®?f~‹í®Š "QávW¾CmWVêcѹإˆ7¯¢÷¨Yì:?Ç&j+z;÷sÙó°’„n¶xÄcÓh>>S}^HŽvm¿Å8P©²¸¸ˆDŸC¢ÅB`‹‹‹0 ù|ù|ÞNLÅÜW±Õ‹˜²ÀZ ィ­G vˆi$@cr(¶Ëœë~"VœûHWëköb}bZW³¢6@Ê¿÷PZ>ô-kƒ‡·ÿá`ûÃÀËo.}_mo{€l_úPx?ðγóDX¿¸ür`ëV«N¨9Î7 ëûa¦z^Žk2ªÏ›ØVûlè¨åMε$Çu6‹óêßïÐ8¿í|oopeÌŽ9RÙ½{wåÌ™3öm'Ož¬ìÞ½»òè£Öý[øìg?[¹ýöÛ;>ï=÷ÜÓòøúúz%‘H4?˜®T*«ãþK—:ÅË Ï;Šø®T*•O|â7Ê•Je½Rù^ü{­ãût¥RQGý%¿i/CàUÞÉéÓ§+ëÒzåž»ºŽt:]Q¥²±±Ñplee¥¢ªjEUÕ €Š¢(•t:]9~üøHþ®[\¬T‰J%®TVW+•õuëGœ¯_Ï»O£(ÃÇ^~wAQ”ÊÊÊʈþŠ>·²bÅs:]©üéW+•?}Æú>«T*•ã•Je½bÕÝ+•ÊF“ÇŸnr›xÜz¥RY©Xõ?µRÿ)WŸSÜ_­T*JõöÇíâùÓÕcâÖ«·-VÏ/Q}|ºú¸Jõ˜ìøÝyÎéJå»?ôÝ‘üIýX~¯¬¬´,‡ûv¼R©H•ººýéÓ§+÷…¿«œºëTí}oçt¥ö^6‹q»°Qý=Y½ßJ¥y º¥Ó•ŠªV*Éd“ë8^©llXñÿÉÿlÝç_ÿf¥òû­Û×»^©ÅàñJ}¼¹­;~†øgïV?ñ2öÖH$‚ååe„B!û¶“'OB¡ýoç˜úH$b]DË9E&ZÎO"êÅØã;@.¼óB(Ÿl1žD ?!/ËðVÄP²û®½êÁî óÅÅEhš†¹¹9»U_–eär9o·ìCsÅœ"]¯õ|ŠÞ%÷¹NCE§uNmüÛ™Lªª¶îYÕaõàˆŽãâÕÛ×QëéqŽôÑsâì Z„Õ;¢˜C­·È9»ÝGLG­Æùš ¥×•<¼|Њík¨¼ øÙŸ­Ýß %UÔz¥4?úž)![}mçâibµoñ»^ýÛ8¿*U4özwü[®þÝ«ƒ;ÿ½ˆú=Eo—³Îý7+»{¡$ë¹Ë‹XÆ2†Ío1žÍf‘Ïç›n¿Ô5ѳéü˜ˆ÷Íñ>Iš„[3·Ö/¬×ŽˆùVñî^wUAïÃzE9¿¾Þ|ع(·À þܽ¿öÙžÀœgì k(B4µ/•JXZZB<G8F¡PhùØr¹\÷!ëI>}ë[ÍùhDM¶±Çwµ@Z}Ûjë9Íæ8õɳ2¼Ã0‹Å°¸¸ˆp¦÷ÅATUÅÆÆ†½Â£aX]]O¢*¶pÏáÌçk·§ÓÜûzLüÛbUÒ¦sùD2)†&Ñ>RÇÝóËDh%Q_áWPKÜÄþÂâÿb¯aÖÐIVróeàÇ~\ye}büG_¶þ¿VLspåOÿï?µ*ä"ÉusÖÍ܉C«DB ßXsžg¯z)Äwn»‘­My÷SŒ‹ÕÕûÞ_4ƒú-ËfPû<8“G³z_ IìHärÖ:9Ýì8’ËYCÖYÆwäÙ*Áår‡ÂÁƒ±oß>{l|¹\nù˜ÍÍÍ––§Ÿ~>ø ŠÅ"–—[¥BGâŸú©Ú ¬9ÑšHScmm ÷Þ{/¶nÝ:²×v|À‰'ì¹'ñx¼îØŸ?ÿÛHJ×ø V«9{X§Êüü<Ž;6Ò×wÞJ*•ÂÊÊŠÕ`ÓgE¤Õþ“C‘Ï7_øBô”6[tˆ ë´µ´´„#GŽ`ûöí#yþq—ßͤR©æó«MXɪèy솻G±WÍcš€©ÉêœéX5¦Ø­6.F”ü€u>¿A >'Êïb±ØrßAù¡ünãÝ’`% âáiÔæ»‰9ǃó¨Å‚^"ÞÝëˆØŸ›k¹hœ-Ÿ·Û)"êབߞ­|àÀ„B!>|¸aèA+í–Ö¾þúëqÛm·5Ÿnš?û,Šþ°ã6Ôï³GSC´$Á “ÀhâvíÚUÿE°覎O^¼ø/ß8ŸÂDý Á,//c~„Û"Œ½ oA«.¢b'›^–ç¦iý¸{fÅJºÎ¤Ô¹°õlaa{öìI>¶ò»|>I’Z/>ãuC{,VÛÂE¬*: jM Q~*YõCùÏçašfo ‰i°ê4°’Tw»ž„æóÍîÛ-±"º˜–¡(­’I+±Íf[7@æóÖçfʾ ú­ƒ{’°¦ÓiD£Qìß¿¿áØÎ;XÃćÃùﮘ¨mù<6®¼;%©6«ÓˆŒ<¾…j,kY ÿ¿^øYà@õXÖ‚0a¥¡[ŒwJ¥jC&ÅPÅQÇ»®×ö½¥¢2Ólõ\ö–N?Äv6›ÅêêjóƒÆ[q®È+ô°åùb<“Éô´u «~ÍWŸ”è1m¶n¿VV€ÙYë9›%·šf%¾Ô•±'¬š¦¡T*AUÕ†±òÑhápÑhkkkvK¦iˆ97ïD,D°b¿(þ&²¬³ÏvšJc‰o nSÓðE”7Y7ˆy¢’hˆÆãd³Y$‰Ú\S±%Ä(Ö™™ZåýO#ML’dm187g%¯Íö=õÓ¶h>7ö„UlDÜl¸šØÜ}aaóóó( ö$ï»îº«ûQa ì1çG%Éš÷‘“U™±Ä7P·°DäÛ߆ô©OYzÖè‚éšAc4¶oùX‡mX Õ¢ÒÞlßÐãÇû{Nš~ˆmQ™ojµ·çêJ>_›g-c: w¤‰å‡Ïçóí÷¯6aMyêfñª^8W]S8Ôê|ëQ5<*Š•´¦RÖë8“b~Ÿôdì ëÂÂBÇ1î‘H÷ÝwŸýÁr®hÖ5{Iññ¹c²J¾1p|W÷v;õµ¯ášk®©¾LVÉ'Žñ6òù|}ï*`ÍÛî§Œ7 kÈÖâ"‡gQWFÛù|¾¾w5«LåˆÜaƒ¤@UŒ·A0¬dÕ0¬H÷üS/ç‹JÒÔ-®4lx}#“€5/†!IxßsïãBK=ñJ×^ëõÙM.—ƒªª[!dûx2Ó¬Í/ââä±\.W[55S½qXÉj&cÍÁ&òˆAÐveà4úOVEyžJqÄ@ù¶‡u`¢.#)xáoŽbÛ[·a‡´Ãë³"Žê\¦7ŽÅÉk®ñúlˆÆ¦åü§~Fd³ÍçªYÝbKyX îƒ,°$’«X'S·ß#ùKÃ'ƒ­€ËYåùâ"*¸ +`Ïù8·ãþîֿûð.¯Ïˆh¨þâG [iJˆ}Wæ?éè¼"vuîT]Ë;—!ŸÐ4­¶ŸpV²ÚÏBQqWU+¾Ý{y$—Ë5ß®)«¦×EÅÄ©bÿë » °`'¬Õ²ÿ˹/Czƒ˜‚IbMS"ŸÏ#Ýl‹ ç¾ÛÍèº5TÌ]YbEž|Â0 «÷)«îÒψ±ÍúzãÞ©D>Ðt±%µm({!Vøe¬O…àÎaòÀžoîa/’ÉUçhŠÔõB9µêaÍç­ùL"Yå÷ù”®ëVlèÞª$YC"Y'ŸÑ4­u=|ý&PUÆú vÂ:3˜À±Ky}&D#ach*èºÞ:ÖMØû[¿›Àìlm›šãÇ™¬ÒdXD÷½«b˜;‘φÑ8Ì€5_»“\Î*Ïiªõ”° …†ß8€¥¥¥†cžcÚ“ÀCå‡X©§à1 \öÚkLÁV]-Õ0ŒÖ{÷)¨¯äg2V¢ÊÕihš†Ÿ|çOZû•ËY£8ʆ&€a£c²°ÛI¥¬‘2냬@FAÐUÂZ,qÇw`~~Þ¾MÓ4ÌÏÏ£P(`}}óóóX[[óúzjt½®E•z œ\—?þ¸×gA4ZÖ:Kí{X¨%¬ù¼U‰wïÁGäSº®ãƒ›´zœº¡iVº¾ÎEfh"4”߬²½Ý6©”õÆ9¡Ë„5N#‰àðáÃöm´o»ÿþû±°°€¥¥%¯¯§FÓì±í†Ñí·ÑÑuniCÁ· Ó!auŽŒL$¬žU¢ a$UênÿIÓ´*ò++¬ÄÓÄhœCû¹Ú"YeYNUÖB¡€R©„ýû÷#‰Êå2ŠÅ"âñ8B¡ £\.ûgh°aØ=¬2'eSÞô–·x}D£¥Ð: θ~gEž&ˆ®ëØó‰=ÝÍ]Íf¹M”†ÆFÖÜÕV 4šfÕᙬ’CÇmmŠÅ"¢Ñ¨˜µ½ðœãÑÇ}¡š°rU $ÃÀk»vy}D£¥[?o¼ñFëdÕ@ý‚KD&üjØ©v¸£®[•ù ¯O™¨kº®×—ßyXÓ8ZI¥· £©×1a …B(—Ëu·‹ED"ÿ%©NÕö† QJ×^ ùŠ+¼>¢Ñ1À³ágqçew6¿O> ȺOIä/š¦aþÂù΋Ï8‡M†—òh½ïªaé4GPƒŽC‚#‘ŠÅ"ŠÅ"k8°¦iˆF£u÷CwîÜéõ5ÕÍ_¸à¦áÜŽŒm 6 €|ëì·ðî×ÞÝx\°ô?­‰&®ëø‘ïýHçÞÕ\ŽyšH Gëh=ü]–$©QÇÖH$‚h4ŠùùyÄãq ”Ëe»µ¤\.ãèÑ£8pà¢Ñ(Âá°××d|u+û”QJŠÂ- (øà±ÿñ¯ºÆ™>XÞåæñ4±Êß,ãÍ×¼èÔö˜Nwõ|D~Óvý¢.uµJðââ"TUÅzu$çL‡²W^ôË6²l·¸›¦Ù¸÷Ѥ3 h¦É/ 6€~ö0ÂEWch À•«Àïþœ×gIÔ·kÿǵØ>·ÝëÓ ‰†duÆë3¢IÕU …°ÿ~Üÿý8|ø0âñ¸},ãþûïÇòòr_sZ}³ª0ÑŒ,¾«CLXÉk#-ÃMÔTrîŸðÜsÀ Ÿcï*Ì8ê'‘W"͇GÐÐÈ:ÆëVÖÑë¦ ÌÎZ еÑUÂÚN8î{p©TÂüü|ÃíKKKP¥î§ÙýºÁE—È+#oŽ y®;ÊpQŒ›°¶±¹ì·9L’FfõÓ4qËù[W¹6M ³’V¢WÜNXÔ/¶$â<™äÜlê¨ãV¡P(àèÑ£€={ö4,ºÔ‹R©„b±ˆƒ6=~òäIÄãñ¡m›Ã„•Æi\ñ­ë:’\œ€<0¶2|Ý‘°V·¸ ö°ñ¿uÉë?Ì8ë'º®ã臎â×ñëõR).°D#3ÎoX!ØinÎZo†õêBW ëÒÒR]`M«Å9!)HtàÊ+¯´†º³2OúÅ€K³A†ÖUÂzàÀ¨ªŠååå†EŒœ·8pÀëë±qQ :6ÈP`ɰê°öñ38˜‚Eþþê¿·V¾fYNuÍW¾ÂŠŽ k¡P@©TÂÝwßÝö~ûöíC©TB±XôúšX=P¬ÐM õC‚M“• ]Òqùë&«h7üù¯²ì¦¡è8‡µX,v5ùZÌ_-—Ë^_“C&‰ˆ&—=RFQX±§@yãèx÷®]9@¥ë:Þôýòû½> €Ž=¬¡P››“·ô:·ý  [__÷úˆFŽkPPÝø™±ó¹çØCõ#§ß†-Wu5󨣎‘‰DP.—Vv .…B!¯¯ÉÆ„•ˆh²q¤ Ñk¯½†mçÏŒo ¨Ë^¿xÿõ^ŸDW k$A:n9Ü·\.cii ÑhÔ7[Ûp["""ò£ßÿðïãê³g¹UÖŽ§ßüØ ^ŸDW}õû÷ïG©TÂÞ½{±¶¶f÷¶–J%¬­­aïÞ½(•JXXXðúzlì]%"šlº®[ .år^Ÿ ÑÐ=ÿ«¿êõ)ÌÅ_ pÄ; IÇE—«—uyyKKKM·®‰F£XXXðMïjÝÞ}DD4‘E±¶ýàˆ  øÐßåßzÍë3!™w}ÿ]«â4$]%¬@-i[׋E{¸p8F¹\F¡P@4õú𰇕ˆ(tóü(pΟ?7áM^ŸÑȤw§±®pHŽ®VAlq£ºæ]‹EÌÏÏ[C¸<&ËrÃùÑä°×!0MÎó£`Ñ€â«EÜ¥Üåõ™Äßñ‹È>ü°×§AÈõ¦™°M6{K]çÖ8§¶âÔ% ¬Ë?tñx} LX‰ˆ( 8$˜‚Æ>þÄœ›Muíß]T®ôú4(@˜°‘±w•‚ÆÊ›_¸ÖÔË[ÆÿÜÍù«4<ž'¬…B¡å±b±ho¡C4‰ßt£ŠqMÓðc/¾ÈJ=yf”åw(òúòˆFã[ýΫ,»ix:.ºT,±´´Ôñ‰ÊårÏ/^*•š.ÔT*•N§Q,ªª"›Ízý·"ê 㛂nÔ1þîË..ºÈëˤ)4ÒØ^Ôqê#Ûp“×ISm”1~}é=xîª÷ú)@†Öà …ºÞÒ¦T*AÓ4¤Óé¦Ç8€p8 ]×ñÐCu4ù㛂n\1^úÈG€ÅE¯/—¦È8bûì‰8µm›×—JSjä1nxýÂy¯/“¦c«Øu˜ …4Mkz¬T*¡P(àðáìD8cmm ^ÿ½ˆ:b|SÐ#ÆMÓôú2i #¶_ø‹¿Àk»vy}©4¥Fã†×·„qn§<Ñðx2‡5cyy¹iðŸ9Óòò²gwÃ*ÃÏþýß¹]¢ سgÏØËða•ß™£G9\’Zåw»x•¡”ßyàÙgŸû¹Ódè·îù>¬n;wîP?,¡T*uü2 šŒo ºaÅøÇo¸{°’¯ ³üæpIò£¡Ä¸<öîǼ¾ ß%¬ápÑh´nØ£¦iˆÅb^ŸÑÀßtÊñË^{ +ù Ëo º¡Ä¸Ì!Á4|¾ XÃ}æççQ(P.— …p×]wy}ZDCÁø¦ 4Æ ÃÀŽsçUõúRˆê £üÖu*c›|jÐç–d4 ]'¬ÎùPbRv³9R½Ì±‹F£M¾‰D"¸ï¾ûì çÑhÔë¿ÓÈé: IV‡‚iÖ~Ó\Zl™ V§Óuë±Îß Ãú¿[2i½–¦Y?ªjý†5mÌMQ€D¢þ¶lÖz='Ó´žc}Ýú=—³~w¿žûšÅu¨ªu{&c½^2Yû½E©Mq €Î×÷ñ·øíßý{Êø¦ UŒ†}ýu¯/¦Ø¨Ëo&¬äµQŸ®ë9:††¬cºsçNìÛ·¯áöQW²C¡ÐX+ò"!2Œú„1‘he2É H××­ä+—òùÆäË0jɨ®[ÿIiZI,wNX%©1Auÿ®(Öýܬ×PÕZÒè|>§TÊzgäL¨[q¿Pÿ:ÎÄWQjãf¿›¦õã.÷Ëç­E±~Òië\F´+Sׯ߈˜—eë§YÃ…x¿ï»øÜˆÏˆh ß_ ¢Ñ¢Y#Pû *JcCŠ,[s¾¶¸¿“ó3 Ô^K|Ý$ XY©ÿœèzí¾â3*®ee¥öüºÞøzÎçs^o.g]—»AÆýyr_¯a««õ¯åþ¼K’?Ö)$Æ·ž=‹—¶oÇ[¼¾¢&üV~ Û 1Î9Ú½ë¦cÊIä€õÝ/êÿêLÍ8ëpšfÕ•¥VgjV×uñZîN'QïV”Z‡‘ó|Nœ¸º§¿OÇ„5få]ðþXº^KÀDÅSC»$ÌuS)L&koPŸ|‰d·Y%ÕýÜݾV»ß› ŠÐ*a]_¯UæÅß±›çovmƒ6¼IRcÞêµ§q!FQpÖ{™Ï[…‡³0?é´õ£ëµžvÑÀàŒñœî†w¬ˆ¤TÜ×4kï•xŒh´hÖˆáænHq¾ïÎXjõ¹M$êÏÁýyš%±¢nõ™?—âõçãœ×+ zñwY_oþeå¼^÷9$µ÷MÜ×= bR]þøã(]{-nôúDˆˆ(4­V7õÑ"êssµN' ¾cÊYgÇΤ.™´¾ƒ5ÍÔêË‚»óÊ9J°—„5—«ÕQšiVgêfP‡¨Ã9ÚÅß©Ùk8_ËY§Y]µ®G×­óõqç9ìÚõ|OïŸ/ç°“¦Y$ztD«ÁÆFýý¼3IÉ”D]·>ÐÇ{}Fä”ËÕQ(‹˜N$¬Ÿ|¾¾…ËYˆŠdVh—¤95KX;q7ZtÓˆÑì|úi0iE$zÎäV4fõ¢Ÿë:ý½›ý­'© éÅs<€K¼> ¢Yw·~È(ö¸£‡$©±1¨kÖË×,!ÔõZR™H4&hB¿S‹‹í¿ŸÝ#»y­|¾–„Š‘]­:ošÕ™ºMX»¹­Ók‰úè0Ñ»NX‹Å"<ˆ}ûö!‰´ÜClÚ/Ñ‘N÷Wù¤FÎ!‰ä=Ñ€ ªVœ­¹aA”ÍZ_jî-¯3·Ý†ÿôŸz}DDԣĀ Ó¬%g²\û^vNÛi–j8§­õ“Ãé¼êfº\¿—TʪƒOcNÓUªiÒé4Âá0B¡}ûÂÂB¡4MC±XÄ}÷ÝçõõØD²Ú®‚zÇ„Ç?LÓJVŰ\:Í÷ƒŸûõ_÷úȧĜ÷HäR¯OÅ3bqC–ûäG½,¸$†ÓŠxvNguw1uIp›ÝŒ* ²®Ö@UUdEÿyU$A4E<G:Æ¡C‡|1ßUŒ™f²JA67WâNÃ1ª–Q¢I!æ«‹¡ëb¨XàÃÙ³!æc‰UØsŸÚmÛ(z>Ä|11|ν(šs ˜&^ã•W¼þKygv¶6OÌI$óâýkÕ£äœ÷î,Ĺ9×FÜIƒ¦MVÚî|ÛszMžœqÝ깿÷½ë½þóŒ„iZ=¦’dÕ_œÃiÝ;GP£iNZ;&¬b¦»ï¾»íýî¾ûnÜqLjÇã‡Ãž^T³9ªDA’Ét¿ðM.÷ Õ"‘¤Zá\-RpÏårN¬ä0›­UEÒèœï\µ^<§sþ»3ç ε›išî„«Õ¢hN"‘.^ÆÑ£^¿;ã!ŠðÎ5$Äû.V‰½s‚X̺M #tn-Ô(U­õre³V¼ˆ…*Å|@çëf³s33µÇ;nQÕÚcfg­×v.r#bÐ,º6ĵ6û>s[íÔÐŒ¸iÖ´q6Àˆów/$(V±@Ïêjýk7ã¼±à¡hüÉf_Üí]Àˆ9gY$LJCy£cÂZ,‡’Ðh4Z7][§÷J$¹\-Öºi€p?o³ÅE£‹àî\èg‘›tºÖ£ŸLZ±½W»Z‘¤ÆÆ”~®¿›Ñ0$Êq~óó_p[çõ9±è©iZÓiÎKƒë˜°†B!lnn6ܾ¼¼ìõ¹M-&«Dþ%zZÜ[õÛš9Wóvïs ÔövþÞÏ\.çðC‘àpÊŒ?9.Ä{뎧Q-‚8ªÑ;£2,zhc1ëo7I (ÃØêϯDoöÊ “T\Ç„5‰ \.£P(´ÝD¸P(@]¯+QPˆ¹œb¸¢Øw7kÜ×Û½u”›;!×p8÷¶ ä=1ìZ IåTÞ‰¤Uì AÞ+Ú2Y¥a¹ Ó"‘"‘–––P.—›Þ§\.cii Ñh‘HÄëk"""ê‹sÈ¢˜ÿ·}»5/›­%«Îùlîmœ›¨û “Uÿ™m¾MõF,äCÞóæ™¬Ò°tµJðþýû1??½{÷"×%¥GÅÚÚš}?""¢IašÖ,[½\™ŒÕÓ%Vàîfîš_“Sš ÍæM*1 ¥›²“¨[]%¬‘H‡ÆÁƒ±´´Ôp<cß¾}\l‰ˆˆ|M,T¤ëµUMÅð]Uå ó4>šÖ¸XѤ붡¨]%¬€µ ðþýû±°°€b±h߉D8o•ˆˆ|Í4k‹!‰9¦ÍVU%—TŠ $DDÝè:aB¡PÛÅ—ˆˆˆüBìqÔ/ŒDä%ö®u¯ã¢KDDD“HlsX‰W_%¿Èç¹=Q·˜°Q ˆ­Ä*¿ëëìQ%ÛØQg= —¥¥%áõiÒñ[¼ « ?~Üë+!?˜´2ÜoŸGò7¿Å ëà4LýÄ‹/‡ …†¡›››¸JM>Æ7Ý(bÜ0¬áÀì]%¯± § c|“ù2a …B8pàŠÅ¢}ÛÁƒ‰D<A4LŒo ºaǸ®±˜•´ye8ã›üÈ—sX#‘öíÛ‡½{÷"âäÉ“€l6ëõ© ŒñMA7Ì×4kŸÕÕU®®JþÀ2œ‚ŒñM~äË„Çí —Ó¦ a|SÐ #Æ3+ae²J~Ã2œ‚ŒñM~ãÛ„°V% ‡Ã^ŸÑH0¾)èúqÓææ¬ùªëë€$y}%DX†S1¾ÉO|°Ñô1M ™ ¯Ï„ˆˆˆ¼Æ„•ˆˆ|E–¹0Y|¹J0V""""""ò%&¬DDDDDDäKLX‰ˆˆˆˆˆÈ—˜°‘/1a%""""""_bÂJDDDDDD¾Ä„•ˆˆˆˆˆˆ|‰ +ùV""""""ò%&¬DDDDDDäKLX‰ˆˆˆˆˆÈ—˜°‘/ù>a-‹(•J^ŸÑH0¾)èãdŒo 2Æ7ùÅ…^Ÿ@+¥R étÅb ª*²Ù¬×§E4Œo :Æ8㛂ŒñM~ãÛÖ C×u<ôÐC(‹XZZòú´ˆ†‚ñMAǧ c|S1¾Éo|™°–J% ìÛ· …DZ¾¾îõ© ŒñMAǧ c|S1¾É|™°ž…±8zô¨]p 㻽“'OâèÑ£^ŸÆXõ³Ìo/¨ï»ËðéÃò{ò1¾Û êûîæ·òÛ—‹.µû@lnn" 5ÜþÈ#àsŸû¾ð…/àæ›oöúFêĉ˜ŸŸ÷ú4FîùçŸÇÆÆ:4ôçýÞ÷¾‡+®¸ c¿®~â ÃÀOÿôOcûöí¸úê«Ç~ÞãòòË/ãå—_¶{²Q}–;†óçÏ{v],ÃÛc>˜'N \.ã†nðäºú‰o–ßÁ3êòûÀ‡?üá±_ËïöX~þ¼ßûÞ÷z.¿}™°:‡!¸…ÃᦷüãÇÇ?þq¯O¨£~âþú¯ÿÚëS'ê Ëp ²~â›å7M –ßäG¾¼sçNõÃJ¥RÛÊ<Ѥ`|SÐ1Æ)ÈßdŒoò#_&¬ápÑh´nb³¦iˆÅb^ŸÑÀßtŒq 2Æ7ã›ühK¥R©x}Í‹EÌÏÏ#£\.# ayy¹åü>¢IÂø¦ cŒS1¾)Èßä7¾MXkâ·˜¸F½>¢¡b|SÐ1Æ)ÈßdŒoò_'¬DDDDDD4½|9‡•ˆˆˆˆˆˆˆ «Ç …BÓÛ‹ÅbÛÍ›=>NÅb±ížlƒ\‹Ÿ®“µŠo 8ïû(ãÛo×JõFßݧAb|’®“êMC|³üž^Óßâ\Q~WÈ3'Ož¬ìÞ½»á¶}ìc•Ý»wWvïÞ]ùä'?9Ôããôè£Vn¿ývû\n¿ýöÊ£>:”kñÓuRsÍâ[Ü„÷}”ñí·k¥F£ˆïnŽÓ 1>I×I‚ß,¿§[Ðã»R ^ùÍV”J%hš†t:ÝpìÀ‡ÃÐu=ôŠÅ"–––†v|œæççFís‰F£u×<ȵøé:©^»ø‚ó¾2¾ýv­T3Êøîæø8 ã“tT3-ñÍò{:MK|,¿=Ký§Ø½÷Þ[ùÄ'>a·N¢ÅÇÙòÙÏ~¶rûí·åø89r¤²{÷îÊ™3gš^ß ×â§ë¤F­â»R,†ýô¾2¾»9NÞU|ws|œ‰ñIºNª7 ñÍò{zMC|W*Á,¿ÙÃêx<Žååe,,,ÔÝ~òäI@$±o‹D"öøðAS$iسKœ_(èZütÔ¨U|ƒÅ°ŸÞ÷QÆw7ÇÉ;£ŠïnŽÓ 1>I×Iõ¦!¾Y~O¯iˆoñºA+¿™°úH»7»\.||œB¡Pݾ]¥R KKKˆÇã‡Ã]ËñãÇ}sÔ› ¼ï£Œo¿}–©{Az߉ñNŸU?]'u/(ñÍò›š ÒûÄò› «´{£777>îÕ5---áŽ;î@4Åþýû¾Ö^xÁw×IÝ Úû>Šøöëg™: âûÞOŒwú¬úñ:©³ Å7Ëor âû¤òûÂ1ü½¨KÎîu·p8<ðñq+‹8pàB¡>Ü0| ßk‰F£8tèo®“º¤÷}TñíÇÏ2u'hï{¿1Þé³ê·ë¤î)¾Y~“[ÐÞ÷ •ßìaõ‘;w¨–P*•ìôø¸¥Ói{½;À¹¿]'u/Hïû¨â»›ãäOA{ßûñI»NêNâ›å7¹í}ZùÍ„ÕGÂá0¢Ñ(ÖÖÖìÛ4MC,ÊñqÒ4 ¥R ªª¢P(Ôý z-~ºNêMPÞ÷QÆw7ÇÉŸ‚ô¾ã“tÔ½ Ä7Ëoj&Hï{Ëï-•J¥2ö¿$ …æçç¡ëº}[±XÄüü<Âá0Êå2B¡PÝJ_ƒ—¥¥%)×IÍ9¾Y~Sãfù̈́ՇÊå2ŠÅ"Ô­ò5¬ã~2ȵLÒuR½iyß§é³L5Óô¾òY¤ë¤ši‰ïi¹Nª7Mïû$•ßLX‰ˆˆˆˆˆÈ—8‡•ˆˆˆˆˆˆ|‰ +ùV""""""ò%&¬DDDDDDäKLX‰ˆˆˆˆˆÈ—˜°‘/1a%""""""_bÂJDDDDDD¾Ä„•ˆˆˆˆˆˆ|‰ +ùV""""""ò%&¬DDDDDDäKLX‰ˆˆˆˆˆÈ—˜°‘/1a%""""""_bÂJDžÓ4 ¦iz}Dc7ÌØ7Mš¦y}IDDDCÅ„•ˆ<‹Å ëº×§A4vÃŒ}]׋ż¾$šb¹\†ax}D#ÇX/&¬DDDD4°|>ÏJŸ¯›²ÁØîQ…<·±±QP9~ü¸}›ªª•t:]9~üxÃ1Y–+ªªV*•Š}|ccÃ>¾²²R‘$Éþ@eeeÅþ]Q”J2™´O§Óöóõs¢NÚÅx¥bÅœøw¥bŰªªŒošxýÆ~¥Ò¹|w?v}}½ rúô醸&†Nñ¬ªje}}Ý>æŽQ–×4)uQ §OŸ¶ë5âþŒíîqH°äóy¨ªZ7Dl}}€ÕÂâ>–H$ìMÓ ËrÝêâw]×í–uçã%I²‡$´Òëý‰Úiオªö¿Åýß4éú} sü»+ˆVúÅÅE¯/Ÿ¦›xvsÇ(ËkšƒÆºx\*•B"‘€ªª glw «ÏuÚî@ ;C„f•¢IÃø¦iÖoü‹ùS©T Ç÷ú2ˆˆ¦Ž,ËX__·§ò™¦‰d2‰••¯Om"1aõ wbªiš=éº]Òª( $Iªkµq÷>ùA«w.æÆø¦ è'öþãee¦i"ŸÏÛ  K¿ñL4i‰uQ‡_YYÁÊÊŠ½í˜¢(ü¬ô‹.ù@2™„®ëö/Ó4íE8Äð_ç1ç‚3ÎUÈ1ñ›È/ÚÅx;ŒoštýÆ>0XüK’d¯XÉ­hXº‰çnb›ÈïugÏ*`õ¸rÈoÿØÃê²,cee±X ªªÚ­2bùk÷1Q‰j•’L&cWj Ãè8ΞhœÚÅx;ŒoštýÆ>0xü' $ ¤R)~fh(:ų$IÈd2öðG¢I5h¬'“IäóyÌÌÌ@Q{dL"‘ðúÒ&Ò–J¥Rñú$Èb†Ý.æ(‰á_²,Ûû°Š=šVWWíÇ:áÃȈü¦YŒwƒñM“®ßØÿä?­âÙYgqï5L4‰uÑC+I§2 € «Ï†™™lll@Q†X,†t:ÍÖK""""" 4 ö9çÓ4íáLV‰ˆˆˆˆ(èØÃJDDDDDD¾ÄU‚‰ˆˆˆˆˆÈ—3$8ŸÏãOÿôO±k×.¯OeäŽ;†›o¾ÙëÓ¹'N  áŠ+®úsŸ9s[·nÅÁƒ½¾Ì®Ý~ûíSßgΜA¹\žŠkågùرcøÛ¿ý[¯/±k,ÃgÔe¸$IøÃ?üC¯/³+,¿ƒgÔå÷ýÑ!‰x}™]aù<~+¿“°^ýõ¸í¶Û°°°àõ©ŒÜüü<–——½>‘[ZZž={F‡þÜ…BGõú{²k×®©xßÅ{ÃÏòàÏ=IX†Ëð–ßÁ3êò{R’U€åwù­üLÂ:M¦áƒ`* >jFGR@úÑ´|–©Þ´¼ï,çËo ºiyßýV~s+ù{X©+º®Ã4Í®ïïÜèÞÉ0 ˜¦ Y–!IRÓcÍ8ï/6i›1·;gÃ0 ˲ý¼â9Μ9ƒ÷¿ÿý^ÿYÉ'zo±‘x»˜unîü,èºEQìÿ7»¿¢(0MÓÞ¬\<^Ó4¨ª I’ Ë2t]‡®ë$ɾ °>/¾ø¢×V"""¢1aõP³J²¨7»¯“,ËPU‰D¾M$p­ž£ÓsjšEQIÁY¡nGì+K7I’ZVòUUmú|â\Å¿ À$ImÏ)™L6¼¾¸ÆÇ|¢æ?M"g:D"&Þ»f ²,CQ¨ªj'w☻¡Â™Ø¹o#Ýè6¾g¬º?7îÏ`"‘hûüÎëI¨ó5ܱ,îŸN§ë^WÜ®( öïß?Œ·‘¨g"[}8µjÜÐòûHÓ4=zßÿþ÷½¾TšîÕ4­.¶ÝßIªªÖ}¸ËoÓ4177×ð:⻞|òI¯/›ÈW˜°Ž‘išÐ4 º®#ŸÏCQ”¦_æÍ¾¤Ý^>Ÿ‡¦iÈf³uw/d;Í:?’$©.9p&é½j–Sÿ4M³cÚˆµžñv ÍâP×õºø{ ÝŸ?½·Ý|þœœ×ÖÍu´ú[öštÓtsW´ŸSççºYB)%ñªiZÝw›ûqîF$ç(wì¶Jbëóuîܹ‰øþ¢Î:–êD$zf÷œ£Sº‰#÷¨0gBêü܈ï.çyåóy¤Óiû±â3²¸¸h?Fœ·hÜœ´EóˆF ë†a'”‚(ŒD‘(¨ú•øÿÚûû ÇÎû>ðýÒ"MQÒ€IQ¤Ð´ŒhN²–“íØ^ÛAÛ÷Z&íB‚Éæz¼ÓYû¢-Ø 7ñ]µSYgíªÆn­w—(·Ý(W©gwÊÙjD‘çnÔgCÛëd‰ÖÙ¾Y£—>”ÅÆŒLqx4h’%S}ÿ8ý/çßOU× ^Ï9Àïí Ë_’Uu•*îÝ?lÔÕ°FoyýujC¢Ó~¤Óƒu¾zÝê(ˆÇñw>ð\ý7ÿøàñö¿ù7ø;—/[ñ­6°ªÛJ§ñÄÑ‘÷Ÿ!-5áÌf³}£{ìW=6Ç~ŸŒ^˜¤auš)KŸ°V*4  üËÿþ¿Çc¿ÿûVAvýºcå0#þK¿ä\ÐŒ"?ØRÉÕ›ÔÛØèmÌ3ù¤HÒu;ù~õ¾ûðß~÷ÄãÀ½÷ùÈèFIXÕç¨.Ãâš {Ödrðunzƒ§é]÷¾Ã>ÇY*m³ö‚e³øÒÞÞlïA®¬¯¯Ã0 ìïï»þ±žºBoïIz  €óˆµ²­&«R1Ñuàä¤ÿ5šf½.ïUò¥KÞÔè{å^~ïœâZ 3øA9ßoš¸·Ýæ4¤QDþµ÷æO“ˆN±ý·‡•¥ê(0‰I<%fíõy¾ú~j#½4’ªÔXNû7MªPèþF½À§¿ö5T««XÿÿÇá¯;ý ýd±ˆŸ×÷@‘§®% iZßüqûZ24]F۩댺RHЖ6a5M•J?Üja3· 1ù1WˆOS™Ìç­?'œ_IÐ4 ëëëx¶^LJ~ø‡gŸK,qÊ¡^´êõúÀº–h¶»¯6Ô˜¦Õ»¢öÚ+¼j#¥üß>tÜ)!Õ8"=˜v››½ÆR·e@6;8êÂi*öN=Zã$“xù—w–h”Ì8².ºB­zy¹¹÷æ;õÔË4&yÌsºî<¢^ïM­P{äÓéáñ¡$„®uåµ|>ÕÕվŖˆœŒºäå°^P§«„xÞ å³¥LXMÓÄêê*òù<.Eì ¥%&ó: Ô5­»áßàœ5¢‰T*hšÖ]ñV*÷ݺ…¯=ú(¥—T*ØÉ¤uþ5V¥|\¦ö>ÉsǽfÔã£Îs–‘dêõ:Fw- OV¿Õu+^ £¿ÓÞ¨’LN7ÊÆžDF°QT¦ðêËK]Û@z@eÔƒjØ¥–²ÙìÈKútֿɤót]·FúØ©£ƒt}ªud–+aÕ4tÞñ¬þÿ … Ó E$ «ÓU”Ÿýã?F½^Ÿhˆ"Ïîìàÿ÷±áÇ:lþäOvçU§e­‚Fê«• i(’¹lC¦“ô °§‡Ÿ çÓ4 år‡Ó¬ ¡rZ8Qs“ñÒi«!†üScïêr‘¹¡24×¾jvàW`„S]/@M@ÖÖ1ŒþõFÜ&Ïj#T¡€_ùÊÄëÈ,UÂúæÓOã—þóÿœÉ*E¦áÿ.•ðÿÑ?b²JtJ. £¶nôÂüƒãcü_?ò#xí½ïí¶vÿäããçãq<ñS?ÕÿC, k9%£Éäà¢@DkŽ6 ”Ëåé‡ü™fo¨$«öžvQE¢%¥.h¦iZ÷—årÙû^Q™Ö(£!€þ醅Âà´D™#®Îç·@ä4S8äuª)VÁ^ž„U×ñï¿üe\ÞÝårëIßüìgñ3ÿèaww—×x£¥f*• t]ÇráÖ¾ö5|àÖ-ÄN˜¿ñè£øìOþ$ÎýÈt_³¹¹9¼‘‡¿0Y“ P(LÞ£*•Yù?`U^Ëe'rAmøtš_Ç»ÃugZÐLmLrZ=Š/£ÒéáW;p²`w¡HX[­R©”ãý±X ‰Dbæm¼¶»‹/>ø ~‘ò™ñFÿ>‘@ùþC^g‘|çKŒ»Ôh4P©Tð[ÿäŸ`W®]-—>€lßàožþ†Ø–Ë&íîîŽ/ßMÓy€Ô}&©ÐÒRCŒ‡‘ô–Ê%ËF]ªl$v+‹‰©—´tº®ºaLÖ˜”L.ÅPýÀÖZ­†V«…­­­î}ívår­V €5Ѹj¿Fã„:{{x;— úpiÉøßÐuü‹—^ÂorAò™o1î‚z‰™d2 |ãKñCNÞClËà‘Ó<¤W¦Ñ°þo_á™W" !Âãa¡ë:Fw¥Ýd2Ù]|o(¹´˜=Á¬×{+`Ë<ðq FÓ¹]ß䯛Í&vvvîߨØ@"‘€®ëxþùçÑjµP«ÕfÚÖ=/¿ŒÕõõ —–ŒŸñýÍßÿ}¼þýßÏy«ä+?c|œõÓò½›¬ 7ä‰Â# ±­iÚèdUÓ¬ËÈœ?oU˜··§»-¥0ÄxPdá²ÕÕU¬®®â@µZí&¨ûûûØÞÞv^u[×­Dt}8{¸xÑùZÒÙ¬Õƒº¿o ãÍçÙp4ƒÀÖN§ƒJ¥20Ÿ´Ýn£Ùlbmm ‹ÅËåú¯s7¡7vvÐ^{í5ìîî¢P(87U*Ö9·ºj­’kVzxh%¤N—r™äÚ×4V`C‚766;¢+ÃàèèúÆÓ§R)´Ûí©·õùÿõÅ×þößêPi ù߈ÇñOî¹ÿ5‡“|ñdW*TaˆíõõuçÕEåZ¨öË+M 1î5Ã0Ðh4 iLÓD:F2™¾j¯ ­ÇWÔÝÜä¥ÈHëÞÞÚí6J¥ÒÀc£NŠN§3ô±/}éKøä'?‰b±8ðØ_üÅ_à¯ÿê¯q¨B{{{xæ™g ËÐ=xÿyÇ7¼üòË(‹ØÛÛë»ß0M´Þõ.Ž  ®b±ˆ7nxöþ~—áØ¦‰J¥‚ÝÝ]Äåz¨œúyµZ Ï<ó >÷¹ÏÍý½ý.¿ÔO¯©íxù=Y`…å}dIù­&’ó–ò{ž4MC½^G¥RÁêê*Ξ=Û&²½½ëׯw‡ø$«†5¬W†ÖóÜò”ÔÁ'-¿}ïam·ÛØÙÙ:{Ô q||ŒX,æøØw|Çwà'~â'N@Ó4ñ¿ñ þØù}¨R¹\‰D^´Ø ¯â{ì±¾…D£Ñ@ž½«¤ØÚÚò¬âàw>J¥RÁoýÀ ¹ºjU2œ.tN‘S*•páÂ…¹—áA”ßNªÕ*G ,1)¿VîU˜ÊïY©×%Îf³ˆÇãÈf³( £ðëu+1¬d5›µÖ;àbG¾˜¶î{º··‡X,MÓ iZ­ŽŽŽP«ÕËåFž Ó,­­i+óä¿ã°V¶Ûd%|DŒ;Ñ4 Ÿ?8ÀÏt:Is†Ø–Kht+ÜëëÖðDV¦iÂãóP©TºõcWñµK§{¿¼ÌÓÂð=a½páÂÈÇWVVX­@rr¨ÿŸ”®ë¼.%ùÆïøþêK/A×u&ßøãÃT*ü«ïý^à‘GXá ¹Cl7^dµjÍ«c²Js†ŸE½^GµZu—¨ÖëÖê½»»ý÷3'XH¾'¬™L™L¦{[–ÊV‡d2ìííuïÓ4­»¢×¤t]wžBä¿ãûÛ~ò'ñŸŒù"š'¿cÜI£ÑÀO>þ8ù?ÿOk•F¢9:¶e]…t:m-¬ÔhX£ˆæ$蟖¬W`šfÿ¥ËŸh7ÕªÕÐÃÑg‘Ø*Á£”J%‹E4›Mt:Äb1\¾|yò7Òuüâ¾ÀÞ' •¹Å·iÞþÎï úˆúÌ-Ƈh4øyÄZ|†ÈG^Æv½^·ØMÓZfw——Å ßy]~OJ×u¬¯¯#ŸÏ£ìtùÓ´zRå/Ÿ·®GÌs'ROX&h§R)<ûì³ÝÒÔÖ ‰4xá•Wð÷ƒ>HZZ^Ç÷‹øÀds7ˆæÌÓw`t]G‚=«ä1?cÛ0 hšf]Kx}Ýš[Ç¡‹ä1¿ËïI5 T*loo\3€5áâE+IÍçÙˆa'¬ÃÄb±ÙO]Çíïû¾ …hÀ¼âûSü çhS(Í%Æt{¡ˆâEl7 +® Ãê%bƒ È«ò{õzõz}ôàtšçÊ’ä:¬¾0M¼ÕjáÖ}÷½'DÞÐ4´y$è½ òU·bO!º®[=Hõ:à4ì‘h‰È*À#“UZ*ÑMX5 ·~è‡ØûDѤë@6ËU°i©´ú§»×Û#Š’nYžLZC‰–T½^‡¦iØßßï/ëu8>èÝ£€D7aÕu|áþûƒÞ "o4ÝùM¬¼Ó²èüÙŸ9Ïc"Z`} …‹¡¥¥ë:ªÕ*öí«cË\UÎQ]ZÑMX hñ8{Ÿ(šNWÂ“Ë Ež¦á¥¯~yö>QÄhšÆº -=Ó4±¾¾ŽÝÝÝþ†xuÕlž'K+š «at/$ÏÞ'ФÍM€ 2´DþÃÿ1ÞñøãAïÑÜuç¯-±J¥‚|>ß_¯‘aÀå2“Õ%7Ñ*ÁÍf³oÕ°f³ MÓ‹ÅpáÂ…ÀWë2M«÷‰‹sPTe³0 #è½ òÍkûûxÇ… AïÑÜq-Zvš¦Á0 ë²N¢R±¦?±g•ಇµÕjá©§žB±XìÞ§iZ÷ÂÂûûû(‹ØÛÛ úx,é´5àêbY†a°’CKã®—^Bê# z7ˆæª›¬®®Z£Ãˆ–P¥RÁææfïŽõu«óéúu&«ÀeÂZ.—‘J¥píÚµî};;;Ýûž{î9”J%Ôjµ §{ ˆˆ¢á¯~‰§ž z7ˆæª;U™ÊD´L*• ²Ùl|¹l-°Äi}tjlÂÚl6Ñn·qåʤR)@§ÓA«ÕB.—C,är9t:4›Í ©‹½«eš¦1Æi)hš†3gνDs§ë:žJ$Ø‹DKÉ0 4 ”í×f݆lÆ&¬­V ™L¦›˜Vå@ß"êãa ë:\¢ÈcÂJË uíî~ßû‚Þ ¢¹Óu©v› +-¥z½Žr¹Ìú:56aÅbèt:}÷µZ-¤R©Ð%©*Ó4Y™§HãwZϵÛxë_ý« wƒh®ºõ]¸J0-Ó4ÑÅQ ú\ÑcÖT*…V«…V«À¬iÚÀŠÀ2xee%ècÀÊ_.…[±¿›¶çȶë§ÛO*û,’Êg¡+·Ó×9QöçþÛ÷Oñe…VZÝUT‰"Æ4M|è‹_dï*-F£l6‹d<T«Àþ~лD!ç*a•$ÕI.—C.—›ªgµÙlbeeeèk[­b±ØTï­ëúàªc^ÄL. VÂ$IÚ*¬dJ~ ô’S9?uôSIXÕ„Ó°ý_’²Êéûl*Ûƾ-IþLåý$•Ä/®ÜV÷Q?ݶú(Ï•Ä;k{ÜMãYýÉxáôýtÛg"ï' «¦ÜŽØ–²?_¿ïë.vhz^Æ7À—(x^Ç8`•åµù‘ •–Œ_±}!~á‚>\ZB~Äø0õzÝêäª×|ž£ h,W +`öÁÁàÂ… Ý9¬ÓòÞÞjµZw1'é•EœÚí6ÊårwÞl6›EµZx;ž¯:¦ÁJÜò¶ûÔͺi4*Øng1>¹SÞY¶Upñš$5÷wÚ¶“ì˜ÛÃŽËé¾Q ÐIô’v`tªxë]o¹Ø¡Éùß¾¯‚]E/ê°>ciœz öQö^p'&¬sHpñôÿ›Ê¶œ~Çd;Àਹ¯“‰¹­žËò>Ãzò—˜_e8|ó•WðþgŸ~åW‚>ìù‘Xr:/dtIVüçOB•œ‡ö‘,êm§†>Mù?N·c TͰõOä÷IF² Û–zŽBÙNC9&u4Œý¼RGùÈÏØ€Øý+ëä+¿cÜÎ0 ˜¦iœ¹x‘½«äÊØE— V«¡X,bgg;;;(‹¨ÕjSm°Óé V«¡T*A×u<ÿüóÝûÄÆÆ‰D÷ñV«5ñö<]tI°«b°þdnÓv›–Š_ñ=Ó*Øõ Ÿ¿Žþ^þ8¬ÊsV¯þêéÿ«Êó¤Â«m¯*÷«" àý†Ãó°[!{¡Ùކ^"«Ž(cÒœ=}ï³ÊþÊ1˜¶Û:€ó§Ï]=}]Ŷò+ÆÅ£7oú3dR þ$Ned‹oÑ꟨ØnWÑ›Ž!ÓäO’¼4¬ß—‚r»|ú·úWFod‰°'‰ösg½©PÞ[ݶ}ŸÔ¿¤²µqÕ´mëüég#ÇŠÓc)+¯‘÷Hcð\WÏÏlŸß*ðá¿ûa<üòÃóŒßc›ÈoaˆñF£a-¶¤iVÙÎrÁUëÎÎr¹\wXðÆÆvvvpùòå‰/mÓjµÐétËåXÃ3™L·%§Ýn£ÙlâÚµkÝÇs¹öööP*•\og.Ã%í½DÒ”†õû=û&(ZüŠï‰ç¯ê°ÏëÊÿ·Ç<_zoìÏË£¿'Ò‰½çÞm¯¼›iñöç¸5`Å?=Néñrê¡·÷ä§a}žêg¦&Ëòܳ§ÏÝuyÌga•;'§·%9²'&ñ+ÆÅoß¶äpCšœ¦lvš'¯>8Ou(c|Oàæ˜Û£¶©²ïŸÓví ¦n¶e7£ì#Yâ¶mmÂ:>5D}QeÊk¶ÛûÀŸ5ÿ ¯¼ââàÜó;¶u]ï­’Jä¿c܉®ëÖpàjÕLäÂØ„U.W³¶¶Ö½¯T*aoo­Vkàò6ãd2èzÿ¨££#œ9s¦ûÝEäÿívÛõ6ú†KÊ*§úŽ É’JªÌ/U‡`IåU~À¯côbG´ÔüˆoÀŠq× Ñ˜°Z$aÚF¯÷B’§ÎS¬Šo$_¹NîhØÐõCÛí³§ÛÈŸþkÂê=Ú…õy:¼oV/®lG]ÍG~ŸøÎXÌ]«ôðMÚ0²ŽÞw1i’2luaŒ›Î2~Ç6€è,(¦þvPhã*Ó4{—sÊf™°’k®ç°ªsU'íU¦X,vOiÍuRt:WÛî—4aUÆ è Ñ“dSæÒÈo…TÕß{OUš€Wñ-âñ¸UIˆcxƒŒ+Öí½@òµ.ó×È[‡°Êžz=‚£ì•~ù^+pßkë¯cÜ4M<Þé¸KXenä$1,•-C6^Ç6üý/~¨T€Y¯° 5ÒÈ®N}8„UÎKƒ×&¬òD’¡6`&ÑßhßÀà¼e•:ÿƒsýuX r2§Y]Í?«ì“çпˆ¤t(È9­¡d‹Ó¾ív\yyº-9n ÿwS~#e;ò[šµí«4l”Û"m{®ºè&”ý“¹âêgU¶§<ç¡ÎC³Å„ ~ĸ¦i½QLVi®V/\¾|ív;;;¸zõ*J¥Rw¸“ããã¡'Ë—¾ô%|ò“ŸD«ÕÂ/üÂ/X k½B@ænÂ*Lä–ÎÞÞ>þñãž{îñt;óŒoxùå—Q,‘Íf{CÉ*°~ð°~p¥â½ŠÞ¢%û^‰Ÿ¥G‘¦'aR™š¤“Å©×JzmO¿ÿb±ˆ7nx~^•á[[[¬‘þ«¿r7$¸Ž^¿ŽÞ ã£ÄÑ?¬›B­Và /¼€xÀ³mxY~ËpÌ7ŽŽ¦»öäyX±+‰¬‡#Ó•œÊ{{N¬&KÃ^#ϱ‰WÙÇ’+'Hr—FoH·$ÓöKÕ©ÏnFJ jï´Ð˜¤v½ÄV½ì§ýÊÛÊçTöÏTžoïe¶CÚö¸ÌÛ¶MCyæÅgðç_ýs´Z­¾žÎyóºüvÒ—°ÒR’:ø¤åw  « 'ŽÅb(—Ë(•J#OÎQ+Çw|~â'~¢;Æ>N[•8©Œ¸™wGKA.Ã$«^{ežñ =öX÷‡àw~çwާ{ÉNÖ±\ˆ‹î-†y­„zˆ¾JÚVl ϼçÏwßË2îyýu|õ0¶¯AzL¤2˜‡•Ànï®-S÷PÓtJ¥.\¸àiîeù->ôàƒîwH½^º½‘ešÚiç)˯ÕdÎNÊ:¯IG½¿Sâž´½Î))Öff_|l7e}xºð4noxš¬Þ—ßN4MÃö6‡´,³iëà®Öb±8p_­VÃÕ«WûîÕ²"¯±·ÀÈØyXYY` K“Cý¿k£æ®yįø6 £7Ü]Ì8ªŒœZaJO|ò O6ã[à®OíGÁ‡Æ=QGÿ¹ V 7Ñ»,‘¬Ì›GØÐ?cýÚ×Ü÷°Êh¢øãªîÚ‡ÓÄÆ^ÖfeekkkH¥R}kkkÈd2÷sáÂ4›ÍîŠd€Õâ"'C"‘@&“ÁÞÞ^ßã«««cß»O¬ïüŠïíííÞœ="»<ð‰ïÿ„'oí[àùN7~äG\ïÐsAÎ(säZ°ŠÂÍÏØÖuÝJXݨbô*ÖD.ùãvF—  >éµõˆ\ô°&‰¹-e XCr¹.]º„L&ƒ££#÷µö”J%‹E4›Íî$ïË—/»ßˆ:÷€ÈG¾Ä7€¬‘¼¼‘üŠqøÚ;߉³ãz ¤q’çÍÈÏØ6MÜ¿‹'ÂMùÖ4~Ƹ¦iøÍ• Ì¡4¹@æ°^¹rkkkÝÕÉì—ÆI¥RxöÙg»-@“^:Çõ|"x߀ջÊ!À_büTâV¼;M{ZUš+¿bûž×_Ç×ÞùÎñO¬€ 24W~–ߪÄ[oáÛßxcº…ÆhéMœ°6›Í¾‰².\˜*˜‰ÄÈ1ñr1c¢Eäy|»Y•ÈC~”Ắ#¿™·zQ«èÍá“Õ8¹vyÀØþòü ÎÏ–Kž¨ ‘²ò,ãœæÌï:¸¦iøÿ¼ý6ç®ÒÔ\'¬š¦á™gž¸FÓÎΉªÕªç+šÑ)–ù´þö+ÿÒZÚª°Ÿ‡ÕHS=ý—# h½òÍoâ?úþï·nÔOÿdq0éMÕÀ8§H0 1M ÀÅe–Ú饭þ?Æý÷º˜¡p•°6›M”Ëed2<ýôÓÈ*Ýùš¦aoo—.]ÂÖÖ{E‰ˆhvš†Ÿélï’»°®/,×Ò&Z`íGA;›EJƒÕsƒÃ~™¬RD|Û¿ý·@šÃÂ#—~›tjýZñ•ÓuÛóÔOO_·¯¼F}~¸÷潯3ä*aÝØØ@6›EµZx,›Í"›Í¢\.cccÏ=÷Ü|>\""Z^†õÍ_ïN/©–ðZÐ;E4†aàž×ï~ VcŒZ‘¬ÀŠwޤ¡ˆ8Ól"þ«¿ôn,±¡¡w)Ĭ$Ô8}Îi¯'ôÓ?iÖa•Erýr ÿZÅi8'Àöë›;4¼½Ü|wîLt(cÖf³‰v»ßùßù¼µµ5\ºt ­V‹Cƒ‰ˆh6†›ßó=Aï‘'LÓÄüžÿØJJíOÃ*‚D ê«_ý*ÞõÔSAïÆâ°ÊµŒd3yúW‡µ0a½^Kýô9eô’Ô‚í=4åý³§ÏUË4z½¤"Àõ²Æ&¬­VkìälÝ$µÓéw4DD r?üÊ?z/ˆ¼3ìò{ìY¥ÑuÿÇê*Ö“¼ÖäXÓ?VÂh*mžÞ'‹±Iù!ɨ$ ÆéíQ÷^Mel‹Åp||ô~ÑùÆ‹÷áí÷¿'èÝ òÄß»~zaUÖá)ât]Gr™“Ué픡·çwn£W”1ú*ÃÖpX°tRcÖT*…N§ƒv»=²—µÙl°ÜeaÖŸiZÉdïòRºnÝg¿-¯I§øi×»ü_Ó¬¿lÖúÓu Ré=G•Lö[«T¬÷ÞÞîÝÖuç}ÞßïÎæ¦õÞõ:Ðh8¿&Ÿ·¶%û“N[¯€ÕÕáŸ}[ÛÛÖ~ ]·^¿»k¯ìC¡`mSÓ¬ûLsþûôÛ¿ýy䋞ÅÍæ[ßþxý»_z7ˆ<ñ}ßú–ua^²†"Î0Œ¾ÅZ#IÂ+‡zþô_Ò›Æàð^ûüNŽ®ÊUšJ¥P.—±µµå˜v:Ôj5d2™…›¿jšVÒ# ¥$zn"5‘4ŒþU½mOÕ„2™ì%¯Ùl/©K&rŽâÊ8s{9Ï÷î‹ÇfÛ´$…ÂøÕÆÓé^Â'ì·l:L¸6MàâEëõ²ö}°W2ÙŸðβO¿üË/ãàà•ñO$Bï|u:—¤ÑÊþ˜¦ >WbÚ4ûÏa²Ñ4Üõö‡ñ…>ôžÍŸ®ãî»ï\i“(‚t]GyXev‘Õa%ªrhúGL\z£ÅÕ*ÁW®\A±XÄ¥K—°¶¶†L&ƒD"v»f³‰ãÊ•+AÏHÒÓ'=x†Õƒ'=œÙ¬õ˜šM›Ù“¿MËÓÛ4Ù§I_æ•Ã+k++·¯Qo” š½WßÞ€£’ç†uÞ½¸–‘jÃ40Ø{Úí·UòÚë×û{þí HöçV<Ù•FmKÌkÔÄò°-ù  ZÿYûþ û,Ta.\1 œ§ðÎGßôžÍÝ­_Äçc1œcï*-¿wý:â‹ÞB«¡ލ«7Têúqr*µ)õ-©©u#a™©Þ®V{u>û뤞R¯[Û’<Émi}ÝÊgä}VW;wžÀþè':VW k*•ÂÖÖjµ666Ïd2(•J¡ï]µ'ù¼õGþÉf­žU7=º4Ó´’Q)|âñÞÐsµ±@M¶ «@rj,‘†yLmØ‘ç–ËÃ{í矛F{œ85 9±?ÇͶ‚uàÅgQ,ŽÏÐzûÃølÒ@zá3o¢A_ýô§ñÈ#Œž§F·þ—ÿ+ï~wл195)¬…Ò€q`< Ð?µÏñ-Œ^=Im\–JÃõî®õ¾ö†ly?ðž“N[ÿ—NIXÕÑ]ê(KûÈLõ¶úS;¬—N÷u[GºnëiÞßšÍrt•°½¤µÝn£Õju/_“J¥H$ÐétÐl6‘Éd&Ú¯I €›J$y/›íLóc/¸$ù”dRÓ¬ÂLMVíI©›F§d–=á4W ¸ùðÿ÷àž ÷„hîî»u ïø«ÿ¬wi ¢ˆºóñã­¿ñ7‚Þ GjO¤iø2`¼iý³¤sƒk«˜: }0V¯¤jÔTBµŽ$£Õì¦iȶ¿W29݈N7u¸ ëí®V!—¸±O nµZ(‹ÐõðLÊ0M+ ®sy¨ôQÕhôãµ'ŸL*ia”éC†AÑó­Û·ñÄáßa²ÒHLÎîþó?Ç;í×|ݦ¦õ••ºÐꪕL¾öšu[×ãÓþH¿ÄˆÛ¦ª08²i\Öiôcdv'¬‹¤ZµmчÎ £ë½!¾Q\Ó€–—išÑ_Y2âdhš:,MVæEÙ©s¼¥—CÏý{¿7ù¨°øÖ¾€»ÞÃK6ùE®Ð0ic¹Äª ýÔõÞœ=éÅjœö´©óóìÛœ×H².„,Ö_ýê/ý±ÍÅ»ÿò/ñþøOÞ[]¼°Rü¾d«è&‘: (h°M’ë˜R¨E6a5 « 9< zOˆ¼S¯s6ÍÆ~¶QI¤š4V«Vù³¹9|0™B0,aœç‚©åš}®T¡|ï÷N>*,îýÆßÄ_}è$èݘ™Ú»(‹õIÒ•LZßk>ßûn ë5…B/†4­÷\I u½7uEâls³ÿrw’ØÉÎlÖzM6Û[PS†hÊh;õ’‚ö+BhZoQHIFe ý”XVcsÛ¶p–Óô3u!B`p?Ù/5¡-ÀVÐ_ñÌ^¿çž¹­E Îá¼x±ÿ»’ïßUUÖ"IêµO)ô"›°®¯$DQ"sVçEaš^Fê*²B¤ôÔØ‡ÁIò)•a©øIåÚ~¶a+Ô«‹­M» šÓâfËæSs çÞûµ w@ÿ>IÄœ$Öä;^_·þµ_ºO.«'+ÐËåëd[ù|ÿ¥ì$a“DUa—»Æ_aÀ¾ f2i-RôLìÉM6;8}ÌÍ\@7ìç“ý}#;D×ñù·ßÆãS¾\ê8²x¤4Ž%“ƒßÕÈϰ ë4»°: `5_š]  k«Õ€¡« ·Z-Äb1$‰‰ÞW ÖÈ´¼ŠoQ­r0ËË_¶‚eΕ}ø¡ýºÞ¿êc2ÙK$¥b¬Vħ¹ GmxÛßüì7qïoÞëËq˜f¯gSíåK&­ÆNYUÔiÕw•ý>{C©SæfQ?™è6æf­×±^Øãu¦‰¯œ93ñ˪Õþ7—@Üöé¿qô.?Ãé mlÂÚjµP«ÕƾQ§Óq½ÑV«…r¹Œv» ÀZÈ©Z­vOšv»r¹Ü=™²Ù,ªöå¸FpÛÒKä¯ãèU2Ø»JAð#Æ£D¶ô†4JÏ–iö†EJâišýCeÝ\BÀíµ»i8?bûñÎãxô§û¾Û{Øåˆö^K•Óª¢Œ¡hó­üÖ4˜.ÆèÊ"Ijù¶»ë<ßw,Voª  +Yå°ßHø¶y½Q,s}I›b±ˆL&]×ñüóÏ#“É ¬tmll ‘Htw›4…ñÍÞU ’×1¾¨Ã5Íê¨Tú‡U^¼ØõtÚJ ¶·­¡m‡‡V#« wzså¸h ¿ü(¿ïC¿7—Þð~ Ô\B/,×@”¹’D€up-›Å¿sIi|W˹ryÊdµ`V‚z½k¨R$Œía•ë¯ÎK³ÙD§ÓA©T`%ºkkkØÛÛë?h6›¸víZ÷ñ\.‡½½½îkˆÂʯøVçùÉ7M3´C‚í+Ž[rÔ°ÜÚÒó«üÞjN_§’!¾õºu[¯)s™‰†ñ»ž•JoºÃÌs…°’Õ<¬D•|‘äûVI€c±X÷¾££#Ö‰!ÿWÇÔ§R©îТ0ó+¾9˜‚²Œe¸,RdšÖ"FêÂ1²x{B_Øc{}ÝjÉçg2IKÍÏ7œ–‡•¬Ê4ˆ¹ÐìƒC#nìàf³9ÐÒÝétÐl6§Ú }èp»ÝF­VC.—C"‘yRŒš'ûæ›oâæÍ›Sï-v»7nàöíÛso¯âÞxã 4›Í…®ø“?šÍ&îܹãÉ{ûQ†9$X†¨U«V‚pöl¯7+·†XªÃñy­oÿµZ-ܸqÇÇÇs}ß0–ßrOÀî{xØ?tœ¢GÊïIÖ†qËÏ:¸aùƒ\ovæF÷:¬žUÀZõ—çÃÂ:ø¤å÷TsX¯^½Šb±8Ów:Ôj5<õÔSÈd2¸råJ÷þaFÜ›o¾‰[·náàà`¦ý¢è;::ò,aóŽoÀªðt[@‰†988𤲣ò² ÇãȰòËú:pþ|oˆo>ßK(<¼JXEXÊo]·âQ:ª8 d9HùíU|>ÔÁ ‰/q¥5O®¡+—šš+aåâ` Iêà“Æw —µiµZØØØ@,õk׆ 3jií‡zétšó\i,i]ôªqËø€‡~˜ñM®”J¥î ^ˆJ.×ø“„´PàpûE ½A^”áa*¿ ƒC—‘”ßS_Nf _Êo]ÇÃôGHþWÿk¤ÀúºÏ3HÉš«J iÚ:øÜV žD¹\·Ÿ+++Ð7,¡Ýn{vâÍ㛢n‘c\bÙhô/€ÄÅ( ±-#âÕ¹ÒDóâKŒë:>wæLwÑ¥jÕjœ:žMça]¶†–’ï=¬š¦¡Ýn#›ÍÌ7Íd2H$Èd2}+’iš†ÕÕÕ ?+¢±ßu‹㕊•¤Z·Ù›Jª0Ķ®[—A’%š'ßbÜ4ñùW_íÞœiZ…à"¬ëªr´ÁÒò=a•ajNs`e¡R©„b±Ø]~;‹áòåËTDã1¾)ê1ÆMÓJ’IkÑ$"'AǶ ›dC yÅ·7 Üúëýô}gèY•duG,5× «Ü2)Û)àÇ]³µT*Ç‘J¥ðì³ÏvO,uE3¢0c|SÔ-JŒ›foEJ]·Vö-ùÈhAÛÕªµpëÑ’ð-ÆO¯V©X7§JX™¬’blº²²‚µµµû½®€Ø—Þ&ŠÆ7E]Ð1.sUËeΤùò"¶e0Ž 0˜5Æ_{ï{‘L&‘LN¹ºµ&«ÔglšH$¸2)-YH)™ì¿^*Q˜U* L¡iøÚ£"O?ª% `œ³J]¬LDD4o†5¬ræË&ùH×­aÀ @‘`øÊ_ý¾üåDߪ쮘§ÿg²J ×sX[­vvv°¶¶†T*…ô’U&mùIæÿ1a¥E’N3Y¥ÉfñÉOÿáS?Óœ°<^‡“#W «¦i(—ËH$ˆÅbÝûK¥b±4MC«Õ³Ï>ôñÑ’‘vRVúiÑ4ld¡ˆI&qóâÖ­ûœ´—´€‹Ž‘WC‚766ÍfñÜsÏõ]<8•J!—Ëakk .\ÀÕ«Wƒ>""Z"šf]²†hј&º«¨EÉ'?y?žzê-÷/¡ÃLViˆ± «\‡éé§Ÿù¼§Ÿ~;;;h·ÛAEœTö+`w—½«´˜67Ù»JÑÓé<Ž¿÷÷ÞëîÉ&€UX+ 16amµZH$}=«€uYux°<~ttô1Q„™&pþ¼õÿë×™¬ÒbŠÇ§¼äQÈݺõ×Ý_O¸ «g•‹,ÑcÖX,†ãããû·¶¶J¥‚Þ""Z2Õ*P(X½SD‹ª^zˆæOÓ€GýŒË'а,§1Æ&¬©T NÍfsäóäqµ×•ˆˆhÞâqL}?¢Ðu«bO5>xïÿ»{r¯—M.¸JXS©jµ:Žãs:jµ2™ {]‰ˆÈSå2çýÑb“Õ‰¢æöíOáüù¯Ž¢4Øð< \]ÖæÊ•+(‹¸tér¹\_Rzpp€½½½î󈈈¼R¯³w•Ÿ¦ûûAï‘7ânZë`ï*¹æ*aM¥R¸vívvvP«ÕÏårX[[X˜‰ˆˆh^êõÞ5W‰•aX#8J€¢Hw[Ho‚ -‘k®VÀZøÊ•+(•JhµZÝûS©ç­‘çÒi£¤ÅÇáÀeå²ËnS&«4× «ˆÅbÈd2Aï7-^¾†¢@×¹Â5-1 @ÀvÐ;B‹dì¢KDDDAª×õõ ÷‚h>Êe ÉÞ%ZV pî*M,ð„uÔårZ­ÚívлH45Æ7E×1®ëÖuWÙ#E~ó*¶9R€Â":Ê68˜&hÂÚn·Q,ï¿té.]º„§žzÊýxx¢a|SÔyãº\¼ìîròËoŠº@bœ×¦)’°¶Ûmhš6ô$ØØØ@"‘€®ëxþùçÑjµW'& #Æ7E1®&«ì‘"¿°ü¦¨ ,Æë°†M!„µÙlv¯Ýj×n·Ñl6±¶¶ÀZä)—ËaŸ,£Áø¦¨ó:ÆMÓš³º¹Éd•üÅò›¢.°¯‚sWij$¬¹\[[[(•J°.—#R©çúÑÂ`|SÔyã†a-L“Ï}¤´lX~SÔã:€48w•¦ø¢Kv£NŠN§3ô±W_}º®shÕl6ñ‰O|ŸûÜç|ßö´ñ ¯¼ò jµÚÈEˆ V«áå—_dÛó(ÃÓi&«4ÜÞÞ>ñ‰OàæÍ›¾n—å7ùAÊï B<«ƒ×°L'ôêà“–ß¡KXGÇÇÇC{×»Þ…G}.\ú(äVVVðÄOàÁô}ÛÓÆ7¼ûÝïÆ… °²²âû~Ób¹páb±X ÛfN^K¥Rxâ‰'pæÌ_·Ëò›ü å·ßñ xX~k`ÂJzuðIãûî wÜN†`—H$†>ö®w½ øÀÉd‚> ¹D"'žxbl‹¸¦oÀªð0¾ÉL&ƒûï¿?m³ '¯¥R)t:ßËp–ßä)¿ƒhtô¤üfï*)¦­ƒ‡®‡UZÕa ív{ìÑ"`|SÔ1Æ)ªÛužÄx\l‰fº„5‘H “Éô­`¦iVWWƒÞ5¢™1¾)êãUŒmŠ:Ob< €×Ѧ…nH0”J%‹E4›Mt:Äb1\¾|9èÝ"š Æ7Ecœ¢Š±MQ7ׯÂZ8ôQÑ¢ 4aÍd2Ðu}àþT*…gŸ}­V«û<¢EÃø¦¨cŒST1¶)ê|‰ñ€BÐGJQÊVÀºX1(ªßuŒqŠ*Æ6EÝÜb|Lsº9¬DDDDD´àÒAïEV"""""šŸ #è ¨`ÂJDDDDDóÓ€µB0Ñ0a%""""¢ùÐÁ•i®˜°Ñ|4Àù«4WLX‰ˆˆˆˆh>4°‡•æŠ +ÍÎ<ý—óWiŽ˜°Ñì`ï*ÍÝÝAïE@!è (b+…V"""""" %&¬DDDDDDJLX‰ˆˆˆˆˆ(”˜°Q(1a%"""""¢P }ÂÚjµÐn·ƒÞ "O0¾)êãeŒoŠ2Æ7…Eh¯ÃÚn·Q.—ÑjµÙlÕj5èÝ"š Æ7Ecœ¢ŒñMQÆø¦° mëÆÆ‰t]ÇóÏ?V«…Z­ônÍ㛢Ž1NQÆø¦(c|SØ„2am·Ûh6›X[[Äb1är9ìïï½kD3c|SÔ1Æ)ÊßeŒo £P&¬GGG€T*Õ½/•Jqý©b±ô.ø¢V«¡Ùl½sÇø­Ùl.MKnTÏeÆøhQýÞíX†/–ß‹ñ=ZT¿w»°•ß¡œÃ:ê¤èt:ˆÅb÷àŸÿóŽù/ÿ%žx≠ÁS7nÜXŠæå—_Æ /¼€ûï¿®ïûÊ+¯à•W^Á»Þõ.”J%ßkšø€Ï|æ3øñÿq<ðÀxøá‡}ßo¿Ü¹sN§;w&ʼ:—oܸ7Þx#°ãb>ËðÙß÷Î;xÿûß¿0e8Ëïèñºüþ×ÿú_ã§~ê§|?.–ߣ±üžÔÁ'-¿C™°v:¡;ž,—/_F.—C"‘z÷=×jµúZ¾¢ªÝnãÌ™3C¸Yt:r\ÓÄ7üîïþîR|ïòÝð\žý½ƒÂ2|4–á³[´2œåwôx]~/,¿Gcù=»iÊïP&¬£aØÉ‹Å<ùPÃhNž|AÆË4ñ=îuQÂs9üï=˶Y†/ϹÌ2ÜÝk¢„çqøß{–m³ü^žs9låw(ç°®¬¬è–Ðn·—¢å†¢ñMQǧ(c|S”1¾)ŒB™°& d2ìííuïÓ4 «««AïÑÌßuŒqŠ2Æ7Eã›Â讓“““ wÂI«ÕB±XD"‘èNòÞÚÚZš!mŒoŠ:Æ8E㛢ŒñMaÚ„@ßJs™L&èÝ!š+Æ7Ecœ¢ŒñMQÆø¦0 uÂJDDDDDDË+”sX—ɰ‹ò¶Z­‘ךõq?µZ­‘—ؘåXÂtœ4hÔE§£ò½{ßa;VêçU|»yÜO³Äø"'õ[†øfù½¼–!¾e_"Q~ŸP`ŽŽŽNÎ;7pßÏýÜÏœ;wîäܹs'¿þë¿>×Çýôâ‹/ž<ùä“Ý}yòÉ'O^|ñŹK˜Ž“œ9Å·Ü…ïÝËøÛ±Ò /âÛÍã~š%Æé8iPÔã›å÷r‹z|ŸœD¯üfkÚí64MC¹\xlcc‰Dº®ãùçŸG«ÕB­V›Ûã~*‹Èd2Ý}Éd2}Ç<˱„é8©ß¨ø¢ó½{ßa;Vêñ2¾Ý<î§Yb|‘Ž“z–%¾Y~/§e‰o ‚åw`©ÿûøÇ?~òÑ~´Û:!¤ÅGmùØÇ>vòä“OÎåq?½ð 'çÎ;¹sçŽãñÍr,a:N4,¾ONf‹á0}ï^Æ·›Ç)8^Å·›Çý4KŒ/ÒqR¿eˆo–ßËkâûä$šå7{XËå°µµ…R©ÔwÿÑÑ •JuïK¥RÝñá³>î§T*5°ºì_,›éXÂtœ4hX|³Åp˜¾w/ãÛÍã¯âÛÍã~š%Æé8©ß2Ä7Ëïåµ ñ-ÛZùÍ„5DF}ÙNgæÇý‹Åú–Ao·Û¨ÕjÈårH$3ËááahŽ“&•ïÝËøÛ¹LîEé{Ÿ%ÆÇ«a:Nr/*ñÍò›œDé{bùÍ„5DF}ÑÇÇÇ3?Ô1Õj5<õÔSÈd2¸råÊÌÇúꫯ†î8ɨ}ï^ÄwXÏe/Šßû41>î\ ãqÒxQ‹o–ߤŠâ÷¥òûn>/rIí^·K$3?î·V«… Äb1\»vm`øÀ´Ç’ÉdpõêÕÐ'¹¥ïÝ«øã¹LîDí{Ÿ6ÆÇ«a;Nr'JñÍò›ì¢ö½G­üfkˆ¬¬¬è–Ðn·»0ëã~+—ËÝqôöŸåXÂvœä^”¾w¯âÛÍãNQûÞ§ñE;Nr'JñÍò›ì¢ö½G­üfÂ"‰D™L{{{Ýû4MÃêêê\÷“¦ih·ÛÈf³h6›}³K˜Ž“&•ïÝËøvó8…S”¾÷Yb|‘ޓ܋J|³ü&'QúÞ£X~ßurrrâû'I€f³‰b±]×»÷µZ-‹E$ t:Äb±¾•¾f}Ü/µZ ;;;ŽÉñÎr,a9NÎ)¾h|ï^Ç·›Ç)X^Ä·›Çý2kŒ/Êq’³(Ç7ËoŠr|Ñ,¿™°†P§ÓA«Õ€¾U¾æõx˜Ìr,‹tœÔoY¾÷e:—©g™¾÷YÎÕE:NêY–ø^–ã¤~Ëô½/RùÍ„•ˆˆˆˆˆˆB‰sX‰ˆˆˆˆˆ(”˜°Q(1a%"""""¢PbÂJDDDDDD¡Ä„•ˆˆˆˆˆˆB‰ +…V"""""" %&¬DDDDDDJLX‰ˆˆˆˆˆ(”˜°Q(1a%"""""¢PbÂJDDDDDD¡Ä„•ˆˆˆˆˆˆB‰ +…V"""""" %&¬DDDDDDJLX‰(pš¦Á4Í wƒÈwóŒ}Ó4¡iZЇDDD4WLX‰(p«««Ðu=èÝ òÝp¡P@2™ÐkÉI&“Èf³Ð4 …B¡û\MÓºCÁòù<Òét÷±J¥‚r¹Œz½Ó4‘N§‘Ïç»ïÇQ.—§~>Ñ8nbªÕ*LÓˆ1Æ7-ªYcÿ•J›››¨V«}ç„0MÕjuàœ!šÖ¨xÖ4 †a Ñh²Ùì@ŒÊ}nË`–×”yĺišÝØÇã( ˆÇãÛ“bk˜¦ ]×»š¦u+/€ÔÕj€Õ¢sñâÅîIX‹/vßëüùó}'™) ÝÛår¹û~Ó<Ÿhö?9±bN½½½½}’Ífß)“ÄþÉÉøòÝþÚýýýÝí¨qM4oNñœÍfOö÷÷»·í1*÷¹-ƒY^SLëR«ï!õy>cÛ=ö°†Ìúú:’É$677X-2Ùl¶oèX>Ÿïþ_Ó4$“Éîêêmu!õõñx¼;$a˜IŸOä–=ÆE6›íþ_âñMQ2Iìîâ_}­Vzõ·‚hކųSŒNR³¼¦ Më·ëëëÐ4 ñxûûû}ñÌØvsXC¤R©À0 ìïïwïw¹v C†…ÓQМb|Æ7EŤ±Lÿ2j}}‡‡‡A:EÐ4ñL´ˆ¦õd2‰ýýýîT>Ó4Q(°½½ô!-$&¬!Q¯×Q¯×±¿¿ß×ÂÇG&­étºÛj#¤õó–(L†Åø(ŒoŠ‚ib˜>þ···aš&Fw!¢y™6ž‰Í,±. 'mooc{{»{Ù±t:Ý·h*¹Ã!Á! ë:Ö××±»»;P ÉçóÝÉÞº+Ž u2!¿‰ÂbTŒÂø¦E7mì³Å<ï®XÉK/м¸‰çq#ÈÁ¬±®ö¬V+x¦Ç„5dÅßÕÕUÜu×]Ý¿J¥‚d2‰íím¬®®buuçÏŸï&•’J¥‚óçÏwWÜÝÝ ú°ˆºFÅø(ŒoZtÓÆ>0{üçóyäóy6ðÐÜŒ‹çx<ŽJ¥âx9¢E2k¬Ë%lΞ=‹ÕÕUœ={Éd’k L鮓“““ w‚†“á_Éd²{V¹F“ZiQáadDQÁø¦eÆø§E¡ÖYÔeˆ¢Æm¬ËÉx<ΩL3`Âr†aàìÙ³¸~ý:Òé4 ÃÀêê*Êå2ÇÀQ¤qÑ¥S‡›¦‰x<ŽB¡Àd•ˆˆˆˆˆ"=¬DDDDDDJ‘éaý£?ú#ü‹ñ/ð]ßõ]AïŠç–å’ŸûÜçðàƒâÁœû{ß¾}o¿ý6þÙ?ûgA¦k¿ò+¿‚ïþîïz7LWX~GOØÊïÈ$¬ßúÖ·ðž÷¼.\zW<÷ /,ÅqÞ¼yí¯ý5<ñÄsï7nॗ^ ú'r||¼ßû7ðÖ[o-űzy.¿ð AÞDX†G×ex»Ýú]cù=^—ßgΜ ú]cù=a+¿#“°¾ë]ïÂ>ðd2™ wÅs÷ïþÝ¥8NXYYñ¬…±Óé}xy÷»ß½ßûÊÊ žx≥8V/Ïåûï¿?èÛËðhbnaù=^—ß±X,èCtåw4…©üŽLºLr¹\лà‹e)¨_"‘X˜aP³Z–s™ú-Ë÷Î2|ù°ü¦¨[–ï=lå7V""èºÓ4»·yM6"""¢ñ˜°.]סë: Ãè^ Øë‹rË…Žåÿöʵa0 càurQ{µrnšf÷¹ê{Äãñîsu]ïÞ§w2™ìÛ¶zÜö`Ã0ºÏ¿sç~ôGÔ³ÏúÉw%ña¿ßé»,—Ë}Ï–Ë@ì:%ò|‰IU<ï;ïä9êûÉ>Kì†Ñ]¤A^«žrÜÙlñx¼{Ûiû㨯_´9ÚDDDDN˜°ú@­ K䜞/É™T’³Ù,4Mƒah4Èf³(—ËÝ÷• î°Š®iš0M³¯ÂmF_%[¨s©˶åùöäC‘Íf»ÿO§ÓÇ«iLÓD>ŸÇöööÐÏOÞW’võ=&í¥’Ï´ÙlâààÀÛ/>Âä;Q¿i°Ç¯H§Ó¨V«ï%q¤Æ‹a8þ<òù< …4MënOÐ}MÓú^ï´ •ì›®ëÝ÷–çHLåóù×ÙëB¡àØp¤ž nbÔ4M4 †Ñ=§í»ùNäØŠÅâ¿qZfãyÔòy˜t:t:Ý÷ûoÐûݰÿnÊo¡øìg?‹Ç<è†"Êmƒ¸üÖ©LÓìÆê°Æ};9¯äœøÓ?ýÓ ?¢PaÂ:g†a@Ó´n%X 2õÞžÈIBè¦7I^»¹¹‰z½ŽJ¥Òí¹’÷R+ºNIª—òùüÐÇì½S£ ûë²Ù¬ëçÓü¨?ÚÒ`!q-1¦6\Ìó;Êçó¨V«X__ï&óŽIßÏmiOÀÝ<¿P(Ìt,“n“¢ÉM£ž®ëh4}œ¦iv{ù¥1GÎ}õ=ÔS©²Ùl·QuØ>麎z½ ÿCmÀ·7ˆû± ÙãûÏþìÏ‚Þ%¢™É9no8ýÔ§>…7ß|s¢÷bÂ:„|ÈêŸÚ³¤‚^Ï'%š…¦iÝÞ”aBuî£4¸„éÇœ(Ììo{r©—añ2ü;›Ív_ÔÞu¾³¼Ö~þzQéf/=©ÔÆyµG^M@Óé´ãôymXb*,ûA$µq@·ÁIFcªëÄH²i½J¥ìÔN<ù“÷zì±Çð½ßû½í7Vô U©d4!~ö,ÍC½^G£Ñ`õÆš'LDã©?ìê‚[êï…P“Éaçž4$ɰWþÆP$¦ÕžJ‰kµñD*³ÃæîÛ1A$òž}úÐÿ[¥ršs­6<©kÈC!#„ÔóÔ‚š£~ϦYGféÖF£z½Þ—œn €añ80IBÓz¯‹Ç·…µ®Té´õZ·LÓÚ¦}? £wÿ¼ZàuÝz¯yö&«Ç.’ÉáÛ!=š6øù;ÖÓySê{Þãîp8Â"0MõzõzÙlÛÛÛHš¦õ7½˜˜g\Œ¢i“oKbwÔùg½xL§­Ûö8rÚ®ÄúÞrŸ´ž.d6°?£ØcÑ4{ïg[¥¸+Ÿ<¾z}ô¶’IÀ>  ^·¾[Û{ýç@ÍLJ©ÈËèûö´òùüÈyÿDó¤öüË¢‡š¦uãZ†íÍ#¶#K-Ïê*Nu•Ó üV~ÇW¾ô‘R@ÔEìd}Y˜ÑÞ»iFߤ4NcssÓõ”bÔï‘Óë‡%¥^4¾2am4°•Ëáñ¿ø‹^¡¡V‚uÝúÿîîà‹% ¨×{‰œZA¶¹ºT*ý·¥À“BÏþ°ºÚ+*á’€¨ ÃÚ7uUVMëßÖþ¾óþ©I·}? …ÁºRéUΤӀ}.¢T²åqû~Ökœ v§ÿKÂ’ÏVèãqë=•D vó&Þ±äF£J¥‚|>ýýý^…£Ñèÿ~4ÍŠ ð¾K§ïG.ùÜÔ$Jâc{{ðûY]µ^kšÎNßÄÚ‘N÷ï³=VÞãã꾘æðŠúZ!ç•Sn½×I‘}{N «ŸöÇå3´K§G79U„ ësµÓ—A“Q×(•؇Íõ\HšÖ+ϳYw IöóXC’SÃŽÚp[±jßfµ:øÛf½I×ñØoÿ6>±2Ü­aë¨ÃùÔaæj©ºº4jBâeT‚7I}CÊÔBaøoŒú\‰91¬>äDêiÙ¬sÂ*çÓï…üæÚßO^{Š ëâ’E휮¤ôÿ’^PMÓú†ÕªÓå9r¾NÛ˜…ÑK°J¡ÿx¡<þø`¥]8ކU˜åóV2ë&€’I빓V|œ*ëãd³î{xE:=ݶ¦YÇéGÅ«måóÖŸâåfw"¶`GµZíOTG—Z1´ß/7ù¼ûXò+~’Éé^7Ík¦9—¦Ý¿i[%’í/ííM÷^KÄ0ŒîÐyõ2^Ùlׯ_w—¤Ú{Ð園ʬÓoøF¾x|°‘tXZKuݹ‘ouU¶—¤êz¯7?ŸŒÕ‹ûUÔ†TiÄujärM Ç/I‚ý3•Æ-ûçao̵}oÇbøú}÷¹ø–£AQ¤rëÔƒaObeñ¡™zLí ¥Ów"ñ,!NêöxQ‡Âqº/›u2Ê´eñ4¿gÓÔk~/þ¤XÄÿkòw¢H.!ç¨$œ2\ÖéJ2d–ÓµÜ[¾„U~laõHåÝTÆ ÆQIÀ0Qh¡§P’JLR*£ṅõ*N›p¬Ñhô RW&•Vfµç©P( &§öQò7j˜¹*ïõf:*pê9gXZ¶?ì=§©t;(r³Ó4¸LÙày³ÙÄ[ktt¢iZw¡®¹Vn~'4ÍJ>NáÔ«èWC ¼ŽÈ'2·Z­v‡æ½¹¢r[†îºnè\$ÃFA8ý• œ¦5¹´\ «´no§]íeûP#¢ôÛÿ̓ß=9±zS&m}&ZpÒó$ótì ɵå_ÇKé:°¾ný_’/©ËúN½GÓŽ˜Và#«R©@Ó4looÏ6'Lz3eê…Lù±7fd³ÀõëA6Q ¤·T~3¦Y…•}ÚŸ¶6†ŒÈ7müí3 ë5ÃFF +¯fHæ—'a•ŠÈi-]ø\•‘ÝÿýÜsøoÿôOñþÿò¿œëEa¦ibuuµ{]ƾÞ'Y“ Ñ@6™ì-8¦® J&­ß‡aÉ|b.^¼Øš>•zÝú“!×é´Õ˜Éá‡D0MÕjµoÕ\™ë-Cé~S¼¦ödjšó0zÉeä¶ýwÌi¤Í°‘t£:7üœ¶çR  k«Õ¤R©¡Çb1$‰Ù6$_°xÝáÀDñ%¾ üüÏãó¿ð xœÉ*ùÌ·2Ü$«…Bû«aX•õ|ÞjÄQç_jZoa059Ú0.šI±}ñâEç¸fXÅuÊ¡w´‚Œñ ÕëuT«Uäóù¾slî«cKêÔã(=˜ö¡ùòÿa=—åòä ¨ÉuIX[­Êå2Úí6 ‘H Z­vOšv»r¹Ü=™²ÙlwÇÄd<µ­•¤Ñhp¢3yÂÏø¾õïþžû¶oÃúoýVЇMKÄ×2ÜÁÈdè-pg7ílZ"¾—á6•ÓQ3ÝJ½ ý%šQ±mšæðº‰iZ•åtÚª/q/ Í&èòÛ+š¦auuÉdÛÛÛ(—ËØÜÜÄáá¡ûëh›¦•˜ž?oý­¯÷VaÏf­äÓé·FV¼æ´Où>$8•Jakk ±X¬{ßÑÑëÄÿ«cêS©Tw褮%¸z:t,›Íb}}»»»Ñ[všBÁïø¾úƒ?È…ÃÈW~ǸJ†vu+OrÝGŽ–¡9:¶‡Î¡‹Ç­^¢ãÓ0 F¦iÂ0ŒîedäR~º—›ÙÝÝí»o*£ߣ@ùÞËÅQ†0¶ÛmÔj5är9$‰‘'E§ÓúØ›o¾‰›7o¢ÙlöÝõÞ{±½½ MÓðÀ P(°‚¿äÚí6nܸ۷oÏý½½Šoxã7Ðl6ûÞÃ4M6¾Ð€f³‰;wîxòÞ~—áªjµŠýýý^̳wu)µZ-ܸqÇÇÇs}_¿Ëo•$¬DR~‹©iY~O¢R©àìÙ³Ý52dûûû899Áááaw˜ïõëׇ'©¦i å­T¬!õ««Ö_½Þÿ¼xܾË:•§¤>iùØ*ÁNW¯^ÅÎÎÖÖÖºÃFÇÇÇ}­Bª7ß|·nÝÂÁÁAßÉ(KVooos‘%`µ&Þ¸q¯¿þºgÛ˜w|V…çàຫöÉìDªƒƒO*;*¿ÊpÑh4Íf{ɪiö®«MKEV¯øU~ ¹Ä^_lËe˜héHù=.¦fáwùí–ÌCÇã³-ŒdšÖÜSuH¯\Ú‰çU ¤>©ÀV ÞØØ@,õk׆ 3jií‡zétº{Ò軾‘ÂT*óæE|ÀÃ?Ü߀ãìa%»R©Ô]áÑ ~•á*MÓUçî5œË·¤¤7È‹2ÜÏò[HcL×úz¯‚MKGÊo¯.'Dù톮ëX__G>Ÿï[j*ñ8‡÷†Ô´uð@Ör¹ŒL&ƒ+W® <¶²²Àê2–“Cýÿ$¤Õ’ÈO~Å7 ßéQë+Ñ(¾Å¸B×õþQ2õºó¥kˆfDl7 \¿~]nX½CL "ÆÇ1M/^Äöövã»÷_JFðŠßç°jš†v»l6‹f³Ù÷X-8™L{{{}¯Y]]x[ÿÖo1a%_ùßo”Jø[ßÈ•©h‰ùãB×õþÑ2õºÕÅ44GAÄvßPwÆÈaîä‘ b|¹®v¹\ž,Y5ŒÞª¾N—›¡Hñ½‡U†©‹ÅÇt]` ‡(‹Ýå·c±._¾<Ù†t¼ôÞà ùÈ·øðöç?»r¹ ™–ŒŸ1.†L²åœ<Dl÷ u¯VÙCž "ÆÇ¹xñ" …‚ûEÇ4Íj´4 ë·àúuû]¾'¬¥Riì÷T*…gŸ}¶{bM5Û0ðé»îÂeö°’|‹oßøÆ7‚>\ZB~Ƹè2Iä‘ b»;ÔÝ0¬Š8ãœ<DŒR©TL&Ý%«šÖ›ß](p ƒ%Ø*ÁãØ—Þž˜®£õ®w}DŽæß·î»oò¹D>™9ÆOÉJØ\\ŒÂb^±­ëz/®«U`Ö…fˆæd^1>Šišî#uÝþ»»ËÅÈ–Td'À™rAy¢(2 Ü|ç;ƒÞ "Ï5 k-‚JŪ´E„išVl›¦Õ{Äk Ó©T*(—Ëî#Óikô“Õ¥Ù„õíÏžMÑ¥ë8xûmö°Räiš†|>o-ªÁ2"Ä4ÍÞ ®xMKDÓ4èºî~Þ*-½È&¬o½õ+ó]†ë¯½ô^ù"išLV)rº«_ÇãŒoZ*Õj›››Î꺵ò/‘"´sXg¢iø³w¼) ¦¨2 ¼9âßDQa†Õ»ÊHŠ $ë)´d4MC2™tîTÒuàâEŽ8 ÑìaÇñïßz‹?Y·~õWƒÞ"_$“Ik~VŠ˜¾!ÁDKbà2eÂ4{É*GM$V3™ÄÁ‡?ônyæ3>j-ÖAa¦iâ=ßü¦uƒ 1†a€Í0´LLÓì­K`wñ¢µR6ë6ä`¢!ÁÍf³o™ëf³ MÓ‹ÅpáÂÏ—Àv«;/„(¢ Ãzˆ<§ë:~þï¾ÿûƒÞ¢¹;÷ÚkîNK¥^¯;'«Õª5—›‹0Ñ®zX[­žzê)‹Åî}š¦¡X,¢ÙlbÅb{{{AkYžËÃS„†ÁEÅh)<þÁË}P$½òÙÏZ•t¢%Q¯×W6  ^¶·ƒÞ= 1W k¹\F*•µk׺÷íììtï{î¹çP*•P«Õ‚>>™›("8÷‰–¦i¸ù=ßÃ!bIÿ÷½CÝiihš†t:=8²Z67ÙxC#MX›Í&Úí6®\¹‚Ô骤N­V ¹\±X ËåÐétÐl6ƒ>&¢Èc+-‹w|ô£AïÑÜu§u0a¥%Ñh4œG?–ËECcMX[­2™L71¬V}fõq"òç°Ò2àHŠ*Ã0˜°ÒÒºØÏral‹ÅÐétúîkµZH¥RLR‰²¿¿ô.yÎ0 ®†M‘uß׿ÎÊ:-–å4«± k*•B«ÕB«Õ` Ö4m`E` ¼²²ô1EWÁ¦eç¼&Š MÓðŽw¼#èÝ ò…¦iýõ–Ó‘šDn¹JX3™ ŠÅ"jµŠÅ":Nw8°$°•J™L‰D"èc""¢xûàÀZ=’(bîûú×ñíïлA䋾u74ͺœÑ\­¼¹¹‰l6Û†¨.ÀtõêÕî*›››AEÄS‰„uÉ¢úÂ/þbл@ä Y!¯=LS¹ÛÍ“b±®\¹âøX.—C.—›ºgµÙl /¦bœþ “`Ð$Oÿä¶* ~ú^2ÍA;}®ÌѯŸ>îTÎ¥O_¯þeOÿŒÓ×Ù<|×ø÷½÷zþñ0¾)꼊qÃ0ðè׾Ɗ ÆËòûO>ó<ý¿ô!Ò’ó³ŽÇÓ´zXyÍUš«„°‚úààpáÂ…n€Ï2¸Ýn£X,B×û³—Z­†¾û2™ ¶¶¶‚þ¼†Óa%JP>ýWƒ•DIV9½<}¾‰^²·¯ýlL‡×èÊkÔ„è%¬öûU’ð&OßC¦JÄ1˜àêÀ½Ÿ¹ï¼÷ðR¤â›È—1nyä‘ ‘–”å7çgSüª£ ô®ò64WÃMf3*-IDAT «=xwvv°¶¶†R©4ÕFÛí6Z­ÖÀ !ŽŽŽËåÂsÙé¥ËÃJštÑKŠ€^R˜U^£>G­ìékÕÞFéöQÕiåo˜ÂéŸú7‹Èî¹íÄÞ(&=™£$ìÚn»=>ì³·-û÷bß¿,ðróeÜ9¸ãb'&·pñ=O XñF¯¡Á€Õ€’EœÒÂò+Æ_þrøWQ•߇qe¡ ª¼F+§·w•ÛN#P Xeâ¨FJ'“4Rªå«ð@ ûNá'åw§Ó læU7 £7š@×™°R·>iùíÍ$dmm­o¸q"‘@­VC.—úº7ß|·nÝÂÁÁ»– ii—y¥2ôH†;IÏEÎÑÑnܸ×_Ý÷mO߀Uá‘…ÏF.v¦& X/êjÍ@oѬE›SZF/A•}ßÄðáŒv¬d$â=´UvæU††3gÎ8?Y惺‰ß*úçR /ȬØtFËß“¹‘„Õoó*¿ï»u ÷Üsÿ-)¿}_ßbÞuðnï*ì»™·@Q'uðI¹îaõK"‘@©Tê;Is¹:ÎÈÞÓ‡zétzøBP&zócd®ž´„ËüŸ×` ‰d²i™L?û³?‹ïÖ{ã¡iã~øa”J¥á 2êªÓ¢+É“E®£·ºô¢Ú†•HOk½ÄE.…4LÃWÃŽR©„Ç{,mϳ ¿çÿÐùÉÜ ×•žX¿¦Lå1¸ª;пú9Í,—Ëágögñ|À×íΫü>úã?F;Œ+`Ëô · €²âVÙ*Ãîž£Žˆ¨ W¯Âj„¬ ÿÜ©ŸÞ¯¾F}o£/©çvÿCHÊïY®Â1­y×ÁûÖE¢Æö(2UIbZwñ|‰}{L/ ©ƒOZ~»îauç^«ÕpõêÕ¾ûf½4‡ ™Q[r¤Ûxê–&VpH¥ÖíDsæI|½FûŠÑ€óœºE‡5×Õ©—T~ ¤Ù® «RTÁ|†BÓ€yŸýR }$a×c.±âYXH]lHÃàjð´æÛ÷¼ü2¾ó;¿ÓÝ“×a•YNåÙ°U£e‘.CyLæmožþ¿+67a5¨H¢ª.À'×M—ß)_e;êeíd±½‹Êm)cå€òNßC¶gœþ+¿WöyÐIôÕ¨aÓZÔEöóO>Ÿ:z£uò§û IlÞáù8}¾L'p» áòyЇ:Mö‚9™wE×u !Ê%±¡Ž²Q“Mu1¸¸í9@ÿ*ë²(«ü+—”¼®<èå2’3ú§Žx³_Þ²‚þ†y(ïe «ê ÒIgØ^+SƆ0RÏuñÍü~MXWVVú\©TÊ“ŠÅb(‹H¥RÝmìììôÝžˆ«uªÈùlîñ X? UXøe™'7ìÇ_z“/žþß~ÎËP«¾@Íd^1>´UÞ¾ª¯ÓyßùüdÑŸ`ÈáüàÓlæÛÞ¹ƒ×~üÇñA7OÞÆøÑöú4ÊÉȹ„’\LòÜÔ‘ìýöõœ~œàÊ:¼¯z)4`°l·j#Ð[PPM(ä·R~äßúéóåõjBº~ú¹Ég˜>ý|5ôVðú/ßWE¯1@.%½ÇêªâÃÊ…Uà'nþ‚àEÅó±¥—R?ì ì µAGýŽ€Þú£vÕ·N_yô÷ÜÛ×í— , ÿ¼T÷E”a­Gb?ߤñHHO¯44ÉZ!öÆ5Y—Û~%õ’²Zþ¡r{Xϰڈf?^åòkOÜy_üÑ/bcVà—T*…µµ5\ºtÉ:st¨VÇõËÛ°R‡õe³bJ!0·ø–8î.³´L²°>óè/,Õyë»èýp…ý¹šGŒ¼F¥º€‘T6^%èÍÙ–KÔø©à,zófc‘1¯òû®—^£>:úI똾¡cXråÍ z—jÀ:ÿí£+œ>—øé놕ô7 »¼: DޏòöíVa5œ^Goà.º+ñÿ~ñ÷ñCø!ß?ƹÕQN†È%t²s@ ½ßñ$z#$Ù”ÞËQ›rºÚ›]s»ûñ!ÿwû¾öÛqX1¸Žþ+9ØaV,™þ›3¬ñhÔ±ÙÏ#7ÓqòèuÖùqÝ+AÜhÞÀ+¯¸ü@-/ºÔl6» ³-wÉd‡}•J%är¹î‰2ñû°Nþ²Ë–ÈžÅ70¾%pÙI«ºSoœ<¾‰ÞðšŠW1nžJ$Ó´®Û§R[Œ¥—Bz”$Il`øedüp¨üßï„™æÂËòû=ßü&ýùŸþÉ ØéÞ&z•õ æ7íËíô„¯Q¯É¬!ßtOë(§’É$ÐhÌ–¬ªCÛ¥üÖ ö F‰ô–®ŸÞöÛ6颒^}VqôƒV’Õi¹NX5MÃ3Ï<3pI$ T«Õ¹N$ÓO8O¢¿Â@23Å7À$Ë Š%?fyôäuá©YcüÿùGäü@zÈ}ê(~·ä¡Ycû‹ëoáܨ'°‘crR.ÈZ‹ð©^¶-df®£Àº4«‡u{ŠÖÃz7²hä27âlÂJü-!o`ædp™°6›M”Ëed2<ýôÓÈ*-%š¦aoo—.]ÂÖÖÖL-1DDsSFÿ\’a…¥z‰ MÓðKwß=Ø»ªëV-Bå”èTîÿ߃ޅè*ƒåAH躎‹Å·«KOª¬Kà4sÙù¹ˆà¼ÌiKW—µÙØØ@6›ÅÖÖV_²  ïþ ?""‹ z?„Nd B¡€ï|õÕÁœ.›A%U /«h<&«¡ò}·o»OXeR4“U:56a•%®Ÿ~úé‘Ï[[[C»ÝF«Õ ú˜ˆˆ,2—qÔ5ûä2Édw??øÀ¨åø‰¢@]TŒh麎•¿üËÑóWMôæeʪ¹LRÉÁØ!Á­VËÕXv™¿Úét‚>&""‹ !•Ë8YÀkšGÞ°U‚C8׋hnÔK¤-8Ó4ño¿=¼‡UVGæ”ralk,ë^4˜ˆh¡Èáí×è³3л(8Ï0ú+9j+o5 ¬ƒŠ&µl2ÀÞ&Š]×à¥äh ®®ÃšJ¥°µµ…Z­æxéšL&ƒR©ÄÞU" ·=qXIkV¯(i-Æ‘/OÑ¥¡7g5 ö®R¤¤‡-¶ÄHš‚«„è%­réšV«Õ.&"¢éH/ë¸EšÈ[q°"OѦƒ1N‘¥ë:.¿õÖàgÁQ34WsXÝhµZ(‹AÑl6Ád•ˆ¼ÅKØP„=ðéOã»ì—Ä4` {g²JS˜[ÂJD¼¼ yEFp°âNõ•fߺÿþþ;“vƒÞ3ZTLX‰ˆì*°†ìÍ{W)âÞyó&¾ý§~ªw‡ ®ÀO3aÂJDd—V"òç¯RÄ%¾üe|íÑG{w¬ÃZdŒhJ'¬Ífsèc­V«{Í×e¥»¬4›¦õç–aLö|·LÓý>€6A¦i“=? ß *^'Î%Æ8E•g±/aC¡àUŒ¿û/ÿßóÓ?mÝÐ`õ®æƒ>ZZdcW nµZ¨ÕjcߨÓéL¼ñv»b±8pqáv»r¹ŒV«Èf³¨V«3¨aÉ$P©ñ8P>½l…®÷'nšfÝgÀþ¾õyN:m½VMœä¹ñÓ¹(ÛÛÖóä9Ù¬õ§ëÖ¶e?ä_ÈçBÁºo}Ýzýæ¦õX¥ÒÛÿx¼·]^{­÷]·^“N÷n;½Nž£i@£ÑÛ¶ìŸuVW­÷÷û·ídw×Ún½nmK¶-·…úÕÚ÷Gݶ¦YÇã öýQo«Ï—ïðÎ'ð£?úÅ™bj?ã›< åç™ Å§¨ò4¶Yq§ð2ÆoÝwÎIų ^*Žf6ñem†‰Åb®/i#×rÝÙÙq||cc‰D×®]C§ÓÁ¥K—P«ÕP*•¦Ú7].^­DµZí%Œ’pŠdÒJ˜’Ê*¡’\%“½ÄOŸr¹wŸšø&“V¢*ï“Lö8ašÖ¶åÚÊÉd/ÑöÛN$¡vÛI¡`ý‰tÚݶìÏ™f[öÛNœöÇͶœ> §×5›7ppðÊø7œ‚ßñ= ðb??eÅÉ4­„xÜŠséYwêaO§{Û‘F{ƒŒÓû—ËÖ9¡iV#G¹Üßd'Ç#?õzï|z-²Oê¶¶·{Ïé6’4€ú5@Oõ—B` k[ÃdT£dÔý³³7ȸÙÖ{ßûÓ}¹.,BŒMcÙcÛ4ûë)£ž'e½Ô‡ÆÑõþ²×‰aXÙ †LËïA2Ù_wS·+£Ðì¿YñxoŸÔßµ3Cý‘ûeÿtÝzŸ>4]Þ#™ì%«rÛ©‚$£äµÙlÿs$é“ ‡¦Yï£îÏö¶’œ€ôoÉÿ7ÝòÁœ68©QÓ6ÈLÓ8U(ô¾ûg[,~Éý<¡°Å8ѼD)¶e$U½n5ØKD×{ ð’L© ¦i=¾½Ýk4¬V{ n†u[Ê[ixŒÇG7ÊU«½†z{ýHêh•J¯Á_nÅ% wr¬Ò0ê4bLž'ïkš½FM§„U}®=‘UjI„Õçª ´|Æ2âNîSGUêþxÅó7 ¼Sæ¯6l»{ÑH'c¼ð 'çÎë»ïÎ;'/¼ð¸—ŽåôÞnïsz¯gžy¦ï¾ë×ON’É““ýý™w•"Æ)^¼ØÆ¼âûääää£ýèÀ}¯½vrR.[q~ýºu_¹|r²¹i=vrrr²»kÝW(œœd³''ÛÛýï±¹iÝ/çÉõëÖóóyëþdòä$¶î“m,í“““ÂÇONNprr²ôN:sŠ—yóº 'Æëxñ£üžF¹Ü+Ã¥\—²~{Û*³³Ù““xÜzlwwðõÙìÉÉá¡u{¿÷žò>''ÖãrûðÐzž¼†¼·Èå÷áááI6›µHúø¡‘§ö÷{åE¹Ü«3:ßVë‘ÙlÙõ /œüÆo\hûS ¾zõ*vvvƽÏè ÞN±XÌñ±7ß|7oÞD³ÙD&“éëYe/&©Úí6nܸ۷o²íaFÅ7¼ñÆh6›XYYA"‘`ÍyN&ë×û{øÞð§|~ôÐßr¹¿7RZ·9ÖÂKgÑ»n¢ª`¡œÖl6qçÎ@¶=¯2œæÇ¾–Jz~€ÁáŽêsì† wT‡ŠÚ§¨C+eĆ”1õúàÐû|Þ¹ úä'¿€gžy7žxâ¾~Žó.¿í¤wQ>£JÅ*Ëeú’½<ßÜ´>7u¨ë¸ÑN#ÆœF©= Ã†Ú’7¤üS^˜Gù}||Œ´œ¸¼îê«×{#*äw$î• RöØo YÇGÊ)©ƒ¿ýöduð¹Ía—Q‹7¬©àúéÿM¸Ö뇃ƒƒ©›‡y”á~&¬šæ\!—ødȺº€ž¦ žcFïÜÑuë¶}ŽŸ:ÐiÈäÙ³ýCÞ%ászŽ !¿x±·jò ôϳÖõÞ°KÀº-s¶Õ„UM>Õá˜ößRûpGI´d¸½ý¹ê²¯êûª Àzõw\×­JÓî.ðå/·pæÌ+xûíW}‹`~åw­–pLÆ …Þ°Y©ª‘NÔ•Ie4Hù=.¦¼0ò;•Jõð¨>¬ÜvË>÷XÊWi8“Ûj9®.تî‡}ÕõõÞú4’Ü ÃÏc·ë^\¼hý¿\ž}¡Ty_9]·ÞW’ÓrÙz\“EhØëŒNuHµ¬k6­:ø¤B—°öºÍ°VIx衇N§»cìG}¸´Ü¤2,•?MßððÃwã[*oׯƒ¼æÔ˪Áú!– ùWG(.WQ*•º+<úm^e¸休9ÑõzïÇ;™´nK/—:OZM¦äyöŠK¥Ò«È(u5zù“riXR+1‡‡½ymò\û"€‡‡ýÛÝuу!‹ Ó´*Uûûý {yîöºdrºòœ—¥g1¶öWGûOÿÓ'ñÝßÝÄÁÁ›ã74Gó*¿ï¾»??Þú¿Ì“6ûlwWŒ¤EÄøŽ mXÃGÙ„µS¡\Axž‚ŽñõõÙ[è§•Í.бÂ> 8,‚Žm"¯yãÆõÿ ¸þ™ ’¢blšH$¹ v,ãEã{AÅa-Â4JÖ°áuX—¾ÙÆRö¶zã²²­,HÔÂ4ù|ÿM´ûlÐÇCD.iô~¸åBßeô®Ý X«¯+Ï/€?ö .LCD´@ ôÿÆÍÈÕà d³Y<÷Üs}×bJ¥RÈårØÚÚÂ… põêÕ ‡ˆ(¼œÕ*¬Eš6\‡µ ±&"¢pŠ£?iÕ`%­’¨½aÄò|½„µà<€³°æÆž…•ìª «n=öôÿ6ºÌ^†a-²DDD ÂGÑÜÜjµH$úzVk™kux°<~tt4ð\""r0n.¦:œ°† ›°‘H V«!—Ë9¾æÍ7ßÄ­[·pppÀ! 4ÒÑÑnܸ×_=íO߀Uá988è>Ÿh˜ƒƒƒ@+;,ÃÉK’°ešøfùMnIù}||ȼQ–ßä%©ƒOêÿôŸþÓôÎÛÅb1d2Ü{ï½Ýû’É$êõ:>üáceeeà5¦iâää¥R)èݧ[YYÁ™3gðõ¯=‚ušø€?üÃ?Äoþæo}œHd2üÉŸü >ò‘²}–áä¥T*…3gΠÓé,LÎò›Ü’òûüƒÈöY~“—¤>iùÊ!ÁÍfs`èts•2ZtŒoŠ:Æ8E㛢ŒñMaÊ„5‹acc­V«{ßÎÎR©R©TлG4Æ7Ecœ¢ŒñMQÆø¦0 åÖT*…µµ5\ºt ™LGGG€jµô®ÍŒñMQǧ(c|S”1¾)ŒB™°@©TB.—ëž(œÄMQÂø¦¨cŒS”1¾)Êß6¡MXkU2®¦GQÅø¦¨cŒS”1¾)Êß&¡œÃJDDDDDDÄ„•ˆˆˆˆˆˆB‰ +…V"""""" %&¬DDDDDDJLX‰ˆˆˆˆˆ(”˜°Q(1a%"""""¢PbÂJDDDDDD¡Ä„•ˆˆˆˆˆˆB‰ +…V"""""" %&¬DDDDDDJLX‰ˆˆˆˆˆ(”˜°Q(…>amµZh·ÛAï‘'ßuŒqŠ2Æ7Eã›Ââî w`˜v»r¹ŒV«Èf³¨V«AïÑ\0¾)êãeŒoŠ2Æ7…Mh{X766H$ ë:žþy´Z-Ôjµ w‹h.ßuŒqŠ2Æ7Eã›Â&” k»ÝF³ÙÄÚÚ ‹!—Ëa?è]#š㛢Ž1NQÆø¦(c|S…2a=::¤R©î}©TŠãèO-K+×ÞÞ^w8J”0¾GkµZØÛÛ z7|Õs™1>ZT¿w;–áˇå÷âc|ÕïÝ.låw(ç°Ž:):b±ØÀý_øÂððÐuét:èCðÔ'?ùÉ wÁº®ãSŸú>ðÌõ}oÞ¼‰Ïþó¸ë®»P*•|?®iâ±¶¶†G}tîŸI˜Ü¼y·nÝZŠG¯Îe]×ñÒK/v\,ÃGc>›Ï}îs¸uëxà@Žkšøfù=^—ßA•…,¿Gcù=©ƒOZ~‡2aít:C;>>vî—Z­†ÇÇäxg9–°' çß@4¾w¯ãÛÍã,/âÛÍã~™5Æå8ÉY”ã›å7E9¾h–ßLXC¨Óé Õj@ß*_óz> endobj 2 0 obj << /Length 3 0 R /Filter /FlateDecode >> stream xœÌ½Ë®m¹•%Ö¯¸M»[|?n8’ಠ™. p£P-•RigF¦+”€Qþz1ÉEr’;nèܪ¬8±¸×âkrp¾çÿÎ~1øß÷üW®îË~øÎ¼¬‹òtþ…Çÿý»ô²\.òŒÿKˆù‹ á‹Oò.žÿðõ¡âiXc-þ6ó6ýÓwÿçwÿŠÞÿ„ÿþ<›?þñ_þñ»ÿã/ëñ¯÷›ãç#Æïƒ“¿þEþʉ3A£üñOßýÅÆòa<ÚŸ¾û/ÿ?úoß¹/ÿýýßhJ!ï¿ü¿ßYóå?½íûw—úÁ4~˜»Ñ‹óx‚òœÖöÈ¿¼7ÆØâo ¡$tá}ðÑ>CÐîa²,º~’ñw­µØükРžøÕ¯í›^Äßÿ#†ÓúúÓW†³{YSv¯FW^Åg_\$ù»ßOxùÞÙWÄ׿üþ‡/ÿårXùÿùËýòûÿøÝø½LýçûªZrÖ”>ÒW8úºuvHÜØÍ.ŸÝu6iÈ]HrƾJ>;íïulSyýö§»õñ•ÜÙo|±cã^g·òð>Õš—wè1Å›ä$rÒ^%'v<ÈIõ{•œ¤ÛANºß«ä$wrÒÝ*rúy\·Ÿ.® ¿u/ãˆë%°Æ®cú‡/Õ ¯Á\’ÆÆóîp_Æ%õ‹z1·P(5Ùw7Ôè%ü¢^ ²·¼‡Ïåš}¤_Ôø•—ÉœR-_館N„—(¯¾@ñß›—‘0àïg‡ËHÖ_X–)”àp†ÜàÕ+D½wþH¿bÉo~ýÛ¿ÿ‡ßýêÿöã!­ìÍWW G+¼’/)| ì¢ÍÙVî¥'·ñ}h}ÿùOüᇳúÌ™Ÿ„ð‚1a’_œÃ\²C»±„t[Êhs©ê°l4n²£ôR"N–4pMÁøyp¡¾Ø"møfâz'²p4›£üÚ*¾YÙ_åÙL/ã±þ Â6|“cÁÁÔø—«)–hä›ß,쯷ôª Ð rBQ8s,¤cm~› ÍJ¾ñÍ“´9œc}–6|“cñèÎFLÏG¬ce“Å'ÉæEÊ ºÂ62‡ŸšÒ}Ëí«Z00iÚçÄ€ÅÄÐ3%2s(¬2ß %&0ºAÚâË>+fÃË,°½NšðIÅao9Ì+VÉËì@(™;T@ ü¤ËأؚðEŽ$Z'¯î0`'³óø$7(‘ÇF[Î8¶}2àJãH¼‰í½H¾¹®˜½åeG“ýe/mEßV„_¸AÙ;™ÞÇ‚zy/â› m+îC“¶B®Ûâ¤SbûV·‚•ßWÐÖDÚð­‡`­u€d,TŒQÚð-¬w_ÆàMaXà_¨H ïYìfJFv-c𶬴E“¢ÉEÖ#“¶Ñˆ±É0öÑ•F%8àô`·²wjƒw(8WÝŽÚüdÀ¯ÉjÔö˜–ÃË Ñ¨¶ ˜ Ÿ¶ñML÷cNlã=[úlS…w<û€m|\jÀ÷lsê{ê9`m€ |&ž°û‡ fŸü„m "ð^Œ&¸ÍŽ@%©Á¼‚m,1‰,–†y ¶9/‡F é€m|’TÇËà€m|ÓeãËÛø&v7H`Û~#ø!+3×°íd^°®)æ¶Ñ–D<ÃÛø&¸!°rú5nã?i²ú·9N ®A{à¶ð,ØjÜÆ7#/x/Ä¢qïYà@Ú?¸¡Gl&¹‚ⶸ]¾ø/î‹ÿt“‚\Rºòf…nœ l ö×ÙÉró¨bz1žÐeˆ@ÝЮ ÝǸ@’·t; ŽugÇvc÷À2ù~,v;\ؾÁV+ìΰp{6^dÅnt‡n=΂;Á»â`@¾Ã6Ÿ,wÔeü¿êNðÆi—Vš:Hƒ7ú#¿ä“=ÁôG·áÞüè9út²Ü:Ø ÛÀT7ÞÃG’‰ö xã Oé/àÍqa¹PàEçðzòÜÎÿ,8om¼=Þ‹ €ò»9„ì&`âw.Ô»)$à‰kȧÁ›Ë¨þoœ‚’rŽ7ðv/ ÝàœŸàí(\¢É7®A·|§¼o·BoÁ©jj²Bo™_J´w 7¾™°Ð8çÓ©;ÊÀ>Ÿèmã0p[»½Áƒ£ ´L7¶ ‡Ò‚,Ê;ô8]Þ›ƒéæn¾¹u'z£oÙ]gOô†IV1—x¢·ºÆ7ôÆÎºS¡7V;@ioЫ‰åò>¤½!§ ·ìB8ÑlQû`âÉus×yù6 íAo<ÀØåþåÚÉ_†Þ˜4ÀÀ“·óz'"trrÂ5và,`É´›wÃnH´ø\hlŒÆn²FdÎŒ9Õ%À`|<@òˆv‹Ö ?5þÄn"+ð£d¹ö6u ™È|å`»qkíAøo´%€nô¶é tã6ðÑù`N¾;p¡5кÐI¡±¡Ñ Üü ¿/pSmƒÍÇ1Ìpƒô2r8L>YÓvÓ)ÜvrÌð¬Æ·­ð7µóLnó„àjµm˜ZW‚aBRÇe~2ÝèÏr2]§£ÛbçŠ7A,¸) &Êop›z Æ£GYq¯ùŠ{ê‰Ûüdtµ6…ÛäA¼¬NÜv¢G¶±)â4n{âvN½?Û r€ ¤AwrÝÀmOýEçÖ•®‹Š?“oT¢”%¸B t‚ n£Í€Â1×|à¶!âÜ4›Ç¦,¡&Á•·-¢La¶—R–€–nâ&olÊ’Ê 䶤¾Ø=Çÿ8´%ø&ï$\Ϧ-)´»GkK´v&´¡ƒiÉ&>Èu7_ÛrL— ÿj ¬zÕÃæ]ßWÌbµ;{Œ‰ÝèT'ž$‘¢[›Ööèº{ è~÷°Ù]‚š²,º~rÝÃFM¸íºùª‡Í_`zƒŒØf îÖ».5ž0¯ƒ¿ˆK5ó¬P¸²{É-pÏqg™(@Ú•t‚Õé]ˆºxÔ¨~ïº@ÔÕ£F÷{×¢NÝíEš•j ¬Í¦^$'ép“îô*9±ãANªß«ä$ÝrÒý^%'鸓“îöc5Ôay(K¤k5º“[5º—[5ª—K5ªoò¨ù‡O¡#€T²+?ã¯bV•?ÿ{þ*¾nþ* PQøU³ª_j¦H ñ˜2Q ^cH´SAVE卨m̯b@¢G Tl ½§·C}u bc ´•ª`umm›·J¤®W$¢à”ö%a££zSÚ”ö%qE nU¿x›Ôöˆ^04ÃQ—g-o ï“§p ‹GžÅ{X-ü¤ …ª!ϲ;ÈêT™²-Pë=äÙ@¿™:IÛꬎ4ÒëD¤ü6g“)·×¢rV 8ÞÑaϲ´)o°«X¬"_bÿ©êò,ÚÈw+RHøæ”gý«fçjÓj8†R íèê•mÍ.£P›jë’ vÕØµ /qӨ͈Äå,jY Û úAšââÛô“ Úv"Wئ(Ðó´] ¶i‘òôKõ€íÜtx¡Š EÃ65ÞdÆš÷˜†mÇ…6)øv»¬°í=¦ãM‚h~À6Í9„C' | ÛžfÃýòló6©»¦ Ö°MM@M¡qœÑ°íɺÄÇ.«aÛe^Õ8BͶåäFú0”¶½£ª1XסY)Î3‘‚f7a[¼… ²ö€m^àÀ#ì±;`?½ „öÀmO 0õ6îÀm¬G`šªWã¶£š­À¡·=Ç…äK8p›ëŸÀ„dQªkÜæžVljpõÀm:mnhÇn“±£RÝ5ð²›ÁTŽ;UnpÛJH*°íÄKÕˆ¯ŠmqÊ »i:0[†›,¤ ¶mÙlãzÕ”w̦[I®Ž^Yf³7`rj¦(Ùx/˜“X·4dÓöH·‘Ε­3`ž ûvC(ĦïA 2bTֈ͑€cóÅЈíè œc¿ŠSûµqà"rú˜ÔøaK§mÞàØ 4¬vv¤B@”žæµ£Çl°mÞԃמ=âîÉá`¶±Wžz‘Ê7f›fC0â6ž¼6v‹Ž,¹U ü )Ôîcœüª…˜íèöïD"O¾yœgjPp® [eÈß"ß$¿ªab1לD£˜¨Þ€ô– ç_i3£RFÞ «&Pêq¢v£IŠ=¤)†öÈ £RCò†˜Ý”)6óŽ6 Òn‹Ê”¶æoÞ/6câqžÒªƒñ4â%6X&è$ÿH´twÅH@2”¤Tçž–º\(H¤ù‘h1”jÑY*­IÅyR4–+7󮊬6K¨}âE)ΙQÃcE–ʦ8Çèé@/«RšÇyWF&^¹f8KU©`áõ›*>9u‘‘‘h“Är§ú¨`(ˆ‡fÓȼÎj½@ ¤ Ò¦T0Ž‚†Ã DŠ¥jã¡Vé¨FÚ^ä=»yœg“l7}e«T0Ì,@wíØÚðÍA­TÁ8n0—1;¥ÁáÀÖØd[ótj¥ß~ÈAæÏ&¥€±âÚ³¨25ƒ\©œ Œ{k2ºWše}êŽã¸U´†ºËвIä çô÷Ç2{Ùš”Ãy%fP¥Ì<( ŒÅÖ3ÈFÎ0“,&õÀ¹ŠrñûECÝS5µ¿æ¨40t’wÑ;Ñc ÛUS¨­¤»¤40UBJI­Mi`*úf`[kËJÎ02lI¼2rV@ È翵)í¹£Æôt  ›êèº. ›[ϵMñ†;·£6c…§a@£6­/ÙÓòçwÔvŒ¦5ÙRµ£¶³Ø‚.šnV¡6Û°>ÎEÑ£*Ôv†AÆžAKn‡íT_TË¢v©ÃçºFWÃ68ZÁư- OÄ¿ò‰ÛÉ¿¦IGÃvÊ/!<_ìÛ 6ì#ÓÛ (Ï FØ^ ÛÉa 51,c‡íÒ3L1"vA ÛWå0ÛéѸ+†ÉÝu¼ÆíÈhS¬xÌÔ¸¡Øqöиè¾ãl·¥hàXMCsœ ŠîpÃîÜ›êÏDjð¥€;Ð’[0“7ÀÍœ=÷pÂÓÀŽ2ÃúÛV¸¦pÆó¯q;xL²tƒ‡†mPZ*5tW-ÛAv —XÚnÙÁÜâ-5l£)0‡CK«a;0?-ƒÓ ÛLHæ³uù¶1oŸåkØ¢ ”tÀ6Ú<ͤîDm§&ù\ÔFNXƒÖÝŠÚ¾23Gàþ¨í™L&FlG=PÛ“Íà:‹¾]£6¦š½!Jŵɗ9 OݸFmfš°ÖÔ~y*Ô¦5ÌËuÔötŠ3Þ ­ “™®¹”ø1U䇞¸q·€°`6íÀ ±€w ~&Ô§€›l³ƒTëÁnãAíBcšÝ&Ðuô!œì6ÁZú±¨Ùm’{ ¶+^7 "ì ¿MÿÀoöáà )©Åxm ·¿·Ð1QsÜ牮åä¸`VËÝî:ݸÏC®[ÔèÆq U^…tã= +èf5ìelAÞº1¦/·âåô®dV™ºiå‹"O<¡›ù ®O]³Ü ¢Ä¥Ü¯rÍsKâ( W˜Åç¦åi^`ÏmÑkbÆ…ºÉÌ¢,ezÇs\Ù¹“å[Pã*ÂÜÁr3“\'ËOâ£åùd¹ÉÌ‚ï‰ÑÕ<7;Àºr°Ü‘zHˆ /5ËMÖ`Ûm YnË6 …}Ãr“uvƒçÁrSðÇÞ‡“åsÁ,Ö¶*šåN4ðZRð¶L'Å$Bîd¹ùzè™ ¼]è™&6–;’ÍÍLëq²ÜÔ‹¦BSÍrÓigD¨ÞXnlf*¹¶Ay3zb-™°%,7®;\ÊÙ5iC7ÇÒ,Hƒãfî çE{ö FÏBï“–>ؘGé/ŠÈæ‹fÏ·½ßP]öyü°LíJ7¥î†ÏíÑu«c q7|n®AMYV]?¹nøT–è'ßbøŒ¸{psÐõœ¸vÕðI7zÜŠ™a2²Þ6|Îó:€áJæk3ŽÏåóºÌ'~†éS:¶*ÝéU[;¶*ÕïU[•t;lUºß«¶*é¸Ûªt·MŸ Ý^>ÃöÙz¥{½›_,^E©Žï¦ã—~IéŽïæão6¾FSºßÙ?)Ò¿hdŠŒ+ºeÝz¹eݺ¹eÕÝ\²êNnAAO¢(M4#~ÝšWè¿ÿx/è¬Å¦MÊ˜× ÐÌj¥¨*ÊSR{ aµB˜3¹@š³Ò–¥L¡{zTU%šÕÊdªö«Ä°èdÓ:CàmNÉ%æÕêèhk)é±-™E'“_Á9Óu%>Ä,«EìT%åU%IÕLNt…”0%\&Ÿò™:[i ‹JÆUH×ÑÛ¦h‡X¸ªd¨Í5l£L¼˜U%ƒ6ƒÍ÷’6¬”°¨dDÍàJË» XX52ª= FœÔK5S#Ñb(7Êk5h?tš[«5íµ¼jdÜ‹Îùµ9Cæ_52L3Å §âÛ^MX52hÃ’7=UePàªaþ2´qâèUgÝ’hŒ{@·ƒ©¡•=‚ï7èjó«™®*œ£Ì¸2ØÐÇ -°æ@‘àŽÊ¨ y>gZîÄCÃvñÔ{›# Q›È%bµS Ò+‚bC±'h3ï]5-ƒ·ÆìÄhP4ÄhÌÆn²Úo…ÙLÖ‘f^pFcv¢9'®é™5fG¦÷N-³ƒù~'h.—ÉѶ-XAëã|zRkjÐŽ¸pëÔF]´c¢Þ/ûvfÓÒPX¸á„l¦Û¢-‹ Dc6i¢¡çJ>0ï‘b®ò³i‚KØŸØÎNmPîßL>ðmÚ8qªOîmÚM«+¶•¨Ð ŽL0ô³h‹°$ç; ¯ g…áõþh&5 \¥´™%Ûæ›YL¡6> ¢£òº¨íYô†E,Ä”©Q;0ÜŽiX¢PÛó 'PhHj㛧ô aÚ&…ˆ–—A£v vÕ’šÓ—Fm1QÛa5l{jœhR1e‡mzZѸXk9`›¯Ñv’Ú¥¤`›¶L#YYÝÛb“)5lË0sÌÍNÃ6¶’9ðks©Ð°-$Pˆå€mš MÆ¿„’ºHÉ8¤Âºr]ù¸tCnÜñ^h.È£[ ˆ¬œü6Ñ9I»ŸäÎâ²ç6lì¶ã€\b8ùmgâs¾4r"í(÷  Ç™±q¶ 4» ™3PJk½)ìô9Ä^Õv½(즧€Çìšw”Ænòé8–Åô¶»™èºÆÊ,ïømWˆ}!ØMGš9[$æÆp3Ý;PÑÛ îL--8ÙêO†;¼pô˜Š|2ÜèPr°'Ã͹Ò]Ãèm ÏŒ ¹qÕšã¦Õ«ß 7¹OÐwÏf¯ÑÛI=ŽÄÒ ŠåÆmÌT6åËx­ökB¡7-zt‹‘ßXnæ•7L9²Ü:`wc¹Y@É<)]7–»²ÐP|Ý妇`nœºb¹U4âÆr“ù÷äÞqܸ§miÉ<6Ž»ò>xD¯ƒãfær²K'ÇM::·÷4Çž,pnÜ¿æ¸qç‚™ ýzÕ,78 Üã”è–›åbõý&Ô,wÂZ‚Ê›=ùd¹YKÀØ»™æÖ{þÆr3CËc¤“ã¶ìŽâãŽ;Râ«®tXW·#¾ðnʺ-ƒ¬°N¸“Q—1Íù/(¬ŠÓ¦%pÐtâó¸Vy¡njó¥kdþî·ÿÛï®9¥'ÜöÌ­/Jl€9VÂà¿X5Ž~8ÿ@&õ^Û†úñSeúp+ù"ðŸšêßÿ?ü׿ûíÿúñé~Ô«SMÄâ%K¸Ê}ÚkÛ·LÛËQ¦=Ðu Ç´ÿÃÿõ§?þë7Íùƒ>Që´ËhÕžTf›¶jû–i3—ƒeé;{è3ǬÿÓÿþÛo ë_6g‰í·LFlµjû÷ËÝ’(λͿ¶2ñl¿Y'þmÿw¬ñqq³»Ò$gpx€gj۳떾È<¿€.7¡Ÿ} ÖY·¥×®;:è9÷!¨G¿¼f<Ów½“½9QÜt|È>–¿ ¢Ãß’ãÃzŽd\ÙÑ%®Ÿc5£(¹Ã“úúf—³t¼îö²µ:.þºçËæê¸z@l=_¶WÇé±u|Ó b¥`+Žï•m7 «¥"„¥»½KX"£ÂR=ß%¬–Çk–îù.aI×aéŽ?æ ‘ësÖA9Ü+&¿õrËbëæ–'„îæ’'„îä†'Dn!•Qög\!Âêñç{ÕÚ UžLø%‡öÏÉ÷Hü¤7 ¦1…¥cfº[)wJI¹BPN³O¹c4*_ˆúŠ…Ù.Ec S‰ FäyêxJÄÓЃäåcÐín´B£ò‡ð/fhí ¾Õ!`@YטPóÉÈÚ"šO>Mm¤Ñ¶yD`U³maÚ`m•KD}¡c–Èk–Ï$9_CG·xZqúg©ýtÏÝÃXž˜íP>kÒÒÒKžé Ån¥ÞókkKb”•m¬wV+Sw7êj+Žhi¶ŠZÆC°@‹H‘V ÝSñ…aB½VJ®éÈ@,)ö~ÐRi!­/kKÞµi,;Üâm­¨R×€[ë¼sÍLb%g*¿Xt›Õ—] c<ÙæDq‰Eê9±m«ækZfIÎ5oA–VÍr”­²(Õçå!c)œfèøÜȆš|ûб(W CÁ}oT[š&ÜS¶ÓÒ¸xIÄC›]KàkYùÔ:¶FŠÑ2íakÌ’®¯Ñ1ßôL-ÛjX‹Áujm‹m«´ðN½-ƒk±jѵõ¦Ft2µ}n¤ù´ÌŸ°¨nW” ¿}5+Ý-™h¬ìÓä!”̙Бcë“jÑAÉ–‰ŠIݶM³(õ-“cb’-w•þ€Z@:-O ‹ï½ê¤dÉÒü”ùµ4õL .Kd3E°i_­YW*cM1†W‰–'«NRf‘¼š”°Kã’Ÿ¤lÖØW4®Z\†2e L;XôXreð£Ú‚c,]Äü dø"²hkÌÓß)Ù°¶xr-£fç)* ‚¬ÊÍðñç¥6z’ò¡ÐKehÆÛ5šÓ@ÏtÍT¨µüèY=Áø¶k;ÎKyl‰žOœ¯-yq‹®Ý`tby »†G æñÃZÐ*'ÌçJ_›â#¼á|¦ÎÇÜ5¨Ò8Ÿ¹ÁŒ¶2ùÄùDNªd—Š=q>ÓFŽ+©÷©qžñ⬼Þg©q>ñ¤ ®Ã£Âù„K@²Ó¶[Iá|J’7´”Î3¾»D—R?­ èÛžAò-î}zÂÀ¥Úúèceyÿt©p>Òo¤Ý"G7œÇ‹>Ó-½ž0ÏX–u~p¹Á<£×Mz Öo8éÅGÅOœ,CœjK¼Á|$_.j‡\ó ®¤Ú æ+#âb áDù(у¸û-¯P>$ ·gbÏåéÂí­1ž(Ï€õšp÷¤Qž^63-ȆòXŽR3γyƒòAÊW¤ÜY+ò¬'ë¥o”ByƘƒÓÁöÇåYÀ4 ±o(˜[ø2w <¾Ê”çÆw¢<“"N· åƒC×̹ÊÓÆÑ\Ú¨J£<A>6[ýóhdDr0ý~P0ïqRbe wÂ|`æwPCK Ó¥VúÎ×§ ÃÍrÂéék\äµs"½aZœRç<ÒSVaz܉ôânáŒÏ-­ÌÎÒGïÍ`ò[–’(¾ÞάÆz¦{ˆ$[¬Ç!¿šmpþ O/–o“|‚=9t©¤ì=ÓHЬÓ¦ž. €Vãe{¦fb‘âØO´fê01Š›ë ö,jN_UŸË ötÚð¬{÷•©/¬2ž“ïH½”yÇ™z öd¿YÐÂõC«Àž9Ιû¡/íÆÕgÖÆÖg¢Ðž® pîÞ\qõ¢Ê޲ ÜÓÿ×]nžî W ë^ó;W_¤z­}ä$ÍÕW$ZŸùi®^Û7\=ï$ñÇ/o¸z–NØ—GP\=}•Àµ¸~¡ï\=Nw¶‹QŠ«gÊrVàì_Õ\= šöË;¼gvhˆtX´ú†«,‰Á$÷ù WOç–Žè·½Â{ÖÑ5ìLGmÍÕ;BäóæŠ÷àw33rt†G3õt¡HÆ.)¦¾y̦Ü?ªàc%=Ko™zHÄž&N¸7Ir£ÔÜ¡WÁ=á½Òg¸7j®^Ž:+h¿{žA&poe]6¸7’?g¶ØÆÕGPwæ÷>T=~Øp±œlùAj·Œkü«×~Gkûë’÷MïW”ª}?,s»Ñ…Z(óœØö(Š:·¤kS¥vÔÜíCÐîar[öíQ™!£Ÿ° }úÑ›!ü¬â–š`V^¡Û¿A@`µ1àß_ÝZ‹Ç„gǘ‘𑾮[kiOt¸RO¶#Âí3»Î'Mƒþ]0 LZó”æÞ{^Ûî ÁÓÑë9ÛTÛÅ!œËî¯.»ÿʲûû˾’´™(|Ô¤C=gÝóçÌy#55„O 5¹ì÷HM:üʲ¿%µŸ¿$Ýqaˆp—þþ\b¡wó_Ïœ«;Iô§Î.mf?m›´¿¬—X!ù”¯Y@?l3Ö½¸@}aªÔÅ}¥ÿËz1TÉ”B…Íáôû“†iq£c¦Õncß›—‘ f}¿ØçPÖ_0TWReÛ$š÷,%ŠBÀ#IøWlCùͯû÷ÿð»_ýáß~üã1¦Õ•í«kÀJýãîªK}ÚòTЫ£úÖ6 Ày–K‹±ƒJ©F2T°xÉÞFÕª“¦–Q<:9Ž–„“õNÔ¯f–ï{æëXq‹CÀdtÔµ™A]àSøÍãp6-ùšÁkÎH|&Fá7è”{o~ɀȦh8-ºo ™M `rOfwG%1ÂÈEÏ€“j{~7ÇâtcèÊMGè¾ò¡å?„ˆîå“,$^ÄBçâ’þ_´0\ÐeQn¹²”{ã0xôú-îÊ’úoÙ:ɺÎrožõMßÿº$>ôÜŸ0öŸÙ#Ÿò„Òû0‰+@oRÈLҔ§ö@fO²×”fŸ_fY,…Ìl3$ 33tGfú)0*­™¨52s‚óPÈÌØÖe23Õð‚¾+0ÓñaAD…Ìž&x,m ö€fæ7òó¾TØ,ÙøÇý¡À›%Ü<ØÌÞ–lÅf· Í,CXf@v±çòÐÀL²£…¸-”Ff)uœŸ+L!3s1hd–:Í`Úú5 ™™w'G¤¡Y +ðؤœd£}ΊFgÆSƒ3£˜'¦+p¦£Ê¼ž5³<¤_YŸ‘6¶~4ËS×àEéð91V6=cÔœ³]ÁTÁ3Œ<–ø`œÛ¹UÀŒÜ ¶evÌó0n8…Ì‘Ìà-v–yëÆ2§åDo3x÷8;kÀÌ_Ì5Ð3ÏOxNsÌÌ<’d¯°LÎw¸ÍiX¦¨ò[~™%3O‘FeʾËÝ¡P9²ÒÆØ hXŽêŠU°Œ×æ@VPŽ ³§1™òÆ”T52ã-ï&«ïÅq¶40S<›T«€™IõÓ©pù¡1ÈqE4 ãÜZ;¡™¯œd³4ƒùb9žÿ ƒhN(ŽLܺ@¦‘8–•“WHL´ Ò¡s}¯rÓ½F1¹×.È@ i8–·¬²åÏKK#q`f›qj(¦åòÓ²"±µ«¡˜2Å`â£/k‡,¬p˜Î6nœÔMNt³éj Í'Ó3 ÷c ¹äHO½®Ë٘丞·]±âú¦¿X$ÀCa‡âhS_„j‡u¯6Õ…ïŸÄ±ûƒ €k¡uL Áfc‘Óz·h¹Œ«ýàAl¡}}2Èee·4‹L;îRÍ Ó§÷]ã¡dæÛT¡dE0š? ¿{ÌÒ}s•4{¬ÕP ˆ™Ôrª5{¬7Eâ½iÅb°Mv!]…ÅÈ8\Š!„‰P»ö¢Ôç®SHìÓzEk,f:©±V›ò‚©.ËsiiD¦¤66F2špà•‚d®¢“Ö€ÌM:% ËâÁÉhX¡lÈÞ—éâ5hTp™}DÆS„oò„¡Y ç=Ôlrü«Yž%Šö¢'̻ޯ˜¦êã’1çv£ŸÍ4qµ‰m®»¡DsxÂl®AM¹-ûöèº'Œšrßyó‹=aÓH1EpaNŠ«Ž0ž_|ÎL'û7ä3OìĆ+˜Ü‚ ým–ù$‰f—Ké.íÎ ªçOpN¨‡ŒÂg8'ÔÇF÷|uÙwçÕóýe_Iz©t~Ô¤C=gÝóçÌy#55„O 5¹ì÷HM:üʲ› d÷bx8c%nùÁèNnùÁè^nùÁè^nùÁè^¾Éæg¼L¼äB§)Æ•Ÿs2±«“ÉŸÿÇ='_•EÓMx=£¼LÌ¢d¢BÇ I¸ºU?ÌRtSÔ;Ó–A_•‘p–ÆÉ©< ÔƒL¥d]nLN·¸™09¬{,LÁm*s7Åäà”I3¬úÙàV]_ÕBÁ/%7›&ä±B‹…vê%36Ô.4솩—¤RáQ]„°ÜdoSùª¬!K?4–!(e[õú!*m͓ƆBÒ—/¦ÂÀŠ S5I±i©´) ᡵ i)´)C1Xtê'-m¿Ö%Ñ „¼9šøòhÁCÞüL¸CCnZsÆÇnÞeó3qÃʪ¬‘¨ÉG£ʦ5C€Uy™,†ÜêæfR楪ò3©«Ù/åhç™h”§ÉjF9šÔUíêh˜_ßíÁÑ*G‚Ê Ýh— ›Íý#<ÆÂè– ›â®až]ŽnÓÖDºr‚‰ÖF“oõnû¹Œ^ik0ü2ÆèguMczšE¯&ÆçÞ×,³[Ù°e¶KEìcv+ÌÅÍÈéÓcݘf·:@k˜N'<Ó3e‚£æ–óâ¶°1Ëq,óÁ,3aFÐàL¿9\Í+‡Åe`ã•óz|483 9©fôܸ崰¯›i?Ÿ³Õ¼²”¶é×ìÆ*»Åùlc• ò=ãØxe.ýäè¯l߯+Cx°±Êx+t“¼†esÑð¬Âå˜VÎgc•ƒ‚zÌAj˜‹eEfÚòen̲[ÜÃ7f°S†¡zãš-ñü‘Ì6®Ù¬âùÆ53ÕÒê¡™¼þˆnÐÀL¾ÃåOíÇKÕ¬[6“}4û,jß´¬ü«ÙØzÓ²ù®÷ÊÆg?,s»ÒO6³Üz›ØöèºY1»4 ®÷µÕ®AM¹-ûöèºeSM¹ï¼~ô-–Íàj" Wæu¼lÚ IÜ©pê)ÿ Ù6ç™èpe ‹[°áâ™]æ“&QÜ£Íà¤{¾opâ6ƒ“Â'œdç²ß38I‡_YöO°m®$ý)¶MéPÏY÷ü9sÞHM áHM†p.û=R“¿²ìßfÛdâ>È"Ó]³mêNnÙ6u/·l›º—[¶MÝË Ûf œJfÈô3ÆMb‹ióǾV*ê§4!ýÖ† °êV5:=yÅ|–\œ‰©' ~q&§tèæŸ8äWM­«¾9ù8ËhzúíN¯õäU(óFšÇ’(àö4x>Õ«„9®a@YjÖwíM¢öÃôx¢`2ð•⪢aIå©ôL1Î"ší½¡cM±Ì"š ;ŠæQ(¦´êh’7b 3£—Ú…ŸÒª£¡í`ªTRË<Ð$a‰kœW Ë‘NõSÊeVДȊG’OE©hÜ+ +Z*JU£¬—8å3w£—‚¦OhbªJW“—D ©ÆY<“Ãê÷T›YÓöÊ™*‡4ùý0#c³Q‘@i]UÖ?Ù©òñÑÅd«B⑚­²i2û‘Ï3u ƒ:9¾¡çÏn Zbm3• u†e½³Sa@™¯®¾ÍÞÎüŒbŒª©ì•’f ¤Ë~UÒà­©ÏaÕÒ0øiªLsPZ³¤‘ÈaUÒ0ÄÐ3 H]£#–qÄUEÃø°G¯˜£RѤÅ=>§UEƒí Óü““вkÌaNJE³†㈆êÔ©ïÍY1óì3¼jhðÖb N€k>ú¸e×ÜÙÍ€öãªÂÓ LKþÔIkPökÂÊèeF¤)H¦¥4N•´‚ä`œxðwÛ„Âäj׌•ÓH®A¹¤%m‰ÆäÇYA2ü§åS#r k˜‘†d¶™q£(DÎE¨‚dQ–t É…v»G͸ar:Ø’ :cä6P¶‹‘]ƒrTY94(¯!T&Ó´52%l ¬L°,«<6Ï@ü šóbéÝ9.É_d^"ª7h¶‹«ÑÍ~ñR8 yzQid.˜ú¡‚f&ä ­9°6ÁRÍ:TtÃf·Ü’6›ÅH|@ó„K…ÌbÉCD}®¡™Qw j¯®&Ê,»as]c¨48‹Rü!^͘ʴklÐÌb³#vMcsXS54³ièñ56Óˆ2‚™4»Åî¢US£³w#¬igÖ'™¢Ð™¨ÈM{˜@§<•YIóÌà}›qR³Ë`S§)LsËIùivÙ*&[3Ì,…>X@Í1/v? Í £é£¹eúNPAs\=‹42Ó+fá±4ó´ÄÉc+ƒ¦yÍ’Î ˜éFÍcÏÓÀLve¹"4³,¥¼žó¢‘™že?Ì!/z˜™;hù¢‚æ5׿ Ø·Ìt}šÑ—˜ãš}B³¤J{6R3ÞfvÌâÕ3ä%Ìt´®™Ñ4%AÌÁ.—½FæÕ!rCf³„ùjh¦%rù6®Ù¬’ †æ Š13…ÍŒpŽb8'q-yEÏLl3 ÈžyPÍðÐÕ¬sT7ˆÂg_+±†gŽJó_%q Ï~õšÑèÌ!Ž0xÎ ]è@Á³¬Ö° >óÑH\óaã&‹\÷ô½Æ<*}ù³™¤öEóæÛþo(Ÿ‰ü°ÎîJG¥îÎíÑuëb q7pn®AM¹­ûöèºSM¹ a{ô-ΘˆüÑ‚›3õ²“¥èf˜«2¦Þ6pÎS;ñáN™àq†nŸÚeFñ3LœÒ¡6€èžïÛ8„Íð v'¹ì÷ìNÒáW–ýLœ Qƒ‰ø gëQÏZwýI³Þ¨Má3’æËΕ¿˜5_züÊÊ›hÎÂo· [/·,[7·L[7·l[77ŒÌ€D 0ëç²…S²›ÖÎÿñ^ g¢¬n$>„Ьc†êT —°”²£/{š‰=KȫœZ•GÎ)TRL‹§¡°ùHL…Ù\§Ås¡*1¯O»¨§K2‹',6ªÂ\‡Sã$ÍTËáÉ[,žnIŸ[²Y ž”£» µ°ÎÞ4wšÕ×2åªÁQ1~¥˜U…S×XÍRªÂÉ«…¥°ÈÞÔá$ÉÈÛm ¥šE‰£r‚–V%NšæœÂòzS…“Öœá•Õõ¦ '­YžÈÀ.ÚuZKäi^õêvM [YSxjÖÝêtM§‚E³îÖÔ¿•…ª[Zí±!•¥ô¦þÆ¿ìˆjª,¤7ëÌå6”ˆ ÖU³Î“'£öfÕª¯¤Ää.J©¾8T)€¸Oå¸äRû¿dZ‡%Ir¡ô'9û04"3?ä½Ü 9/~‘YªqNhDNåefà Fäì–hH ÈŒÒ +‚ÂcêÞ§cƆÇ*nJ#2 3·§FdFŒ ÏÈuQ o¨%çÊER`?Ø¡AÙ.ö“ŽÉÌÓ8·”Y;aFþm¨Ì¥~Ü!4(•VX#3sgN–BC3K+§'Ï®FfÚ'çÈ52ƒà†íA!³ÖîoÈì»…ie‡}@©@™,ËÈ5(3ýà©Ñ¨ŒyÌX} ËTÄO|R°Ì<ˆó¾Q¨¬Õþ –=óú³†e&Ïð¦QYg“Ô°Œá/ªPÙûÕŽ Q™0·ÜŠ –%xž…Ë>.™!.{·:®]4yjl¦ ña¦Ṳ̀¹Ëì%küƒ+B3NtVJØXf»D¶jŽYö~¤ýÖœD¥÷$«;ϼ°Ó¥ wr¦yÆÄmL3×k\> ¢™ÙxœÏi6KFLЪHÇÆ2Û5ê[#4=ÌfgŠc ¹´S<¸hziM,Û9æ% §hº ó«h23y2á ¡åµÇ ¨Z-ÈÃ4‡5:hü~”ÒøÌì'õ1‘møLkð€ Ïi-×°Ása¶<»Å7î€ç:Ýo4>»uˆ;hæ9+™J£tQ“Ó0½æ8ÞPº¬¦ñ ¦ëºÝšw6KÎù ¤ë’ßiÃhz¯ƒt^£[?^¸òX„em†OþÙk7óW‹7¿Á%d¶Ï9Á+=±@¹ø*ÅZžÉmÏ®›A{ÍÊ—ƒqcúÙ' bu_üíÙu ¨žõ3ýì[l L$2p—m ™p&T‹ùÿ Ù@×ó;±âÊ6>ðpÿüª9EÉjžÌ¶7»Ü ë¾?Ã8S¨ħX§âc Ýú¾»ø{UgÝ÷',þJÞ¶ 2C0¹JtMK¥ç­ûþ¤yïD§ñD×tÔçâ_$:éòk‹ÿmFÑ”¼8“‚ù÷*l¨Ý¢ÍÛP;ÇÕSbƒmÌw†Ïo°û¥Ø¡nÝ`;ÅUƒ»ávJ¯~·iñÞ¶æ¿»ÙRgÈËÛu)%»Ãv”ÑCwØN‹qq‡í  ]Cw\“cöv?Vä¿×Ž|3MfZ¦­ð›@¶L[ãw^¢)7øKèë‰ÞKté†Þt¾†Ó ½ÃY¯Ñ[Jië_£8C­f¤Ï†â’ ù)±»a¸Ñדq³F]o(îU€ç†âŒê)46÷åDo±ÿ-ȾZVÓÊeið¦ÃòÔÙoΨ¬‘¿qCpÖ|°‚«Jr€ûðZnߪ ߆ßÞ­¾~=…²_£#oVÕ(ÎÚwve4ŠÛ^ug»íš ~g»Œf§½BxR€²±ÝK Ðéö‹ÿÈÎt›•ëÔèMhsËu¡™î²”‰Ü™îÅZ|ÀwP>|Ól7‡ªÑ›me˜Ë6ø&Ÿ3¼†6®Û¬N>|KyÔyT|¬Ãt/)|äiÉr»!wH«ánƒnZN‡¯é†Ü’æÈpºI“#ê{‡î¨Q#7Sƒ<†ý¹Ñ‰HC·_ Ï;vû5ÅÊŽÝtõ›r€ÂnæW-SêQØ F:,¯)ìvK¹ŒºWRÐØmWž »wÞ {qõÐÈ-ªËSºþ€î…CݼŠýz\xŠj±5„ç÷ü·_ò"ìàß`vžiEvÈ.«kÎÚÜúð–ÙQ[Q^í´×ý¸µgÂï~R£ü«íZÛ_—,­oz¿¢Øìóøa™Û~œo²kžÛ¥`rie©Ù‡ ÝÂ:å¶ìú‘3·í«jÊ}êÑ»!ü¬²ÀK‘+‚Åæ$—k‚0øFóû «,èŒp?ÐÕuÛê ì‰ WŠ9v<¸}b×ù´|ªRRý.¥ÐÒ î†'Ûൎ­h÷~m×;^ƒ^fJpG¿Ï㋃y8»•‡÷ÉÖLO–{ä$rÒ^%'éø!'ÝïMr4ä¤ú½KNÒq''Ý­"§Ÿvr©©¸á¶ì$ ÅåÓöí¦P݉aÑH ^Á¼ñÌùv¥êSH,?^ßÍÞz˜?Ö N™l°ã¾ÒKúe½°vNÛ•w6ÊÑKQ½üê×´n‚ë·_~ÿKŸß3²‡ÿ¸üåûÅò:‡²þ°J¡Ê£F²xÉzðéI¿bÊo~ýÛ¿ÿ‡ßýêÿöã11S…ëcúê0WY$_’8Ÿ‰tTè~µ éþôÇ{Ñ«6/%•íôyf°`½,ÙÇ’e² ×MãXZö1Þé d:rÓ<(¢ZÁ7Ÿ¨ÀÖŠJ oš£iµ¢/³…ûƒ>`[­Ï­­¥ «ô­…|LxÍÞ5éº.9ÈX÷Äñ‘%48³ÔUb¹4D“|k[’E†€bã›3³¬ߊì ‹Å£‰K^/†*o¹±±µµ,dÆB6å{®i€Š´µ4dí"d¹Q_xb6±øÈ³Ö ^¡õQÚ¦RFÞ2§MuÒ´¤!‹’ÿ‡ .ÏÍÞ¡‘ÄÁuSfÍ>§Ü!広+„­7\7sgfÝmË©Õ%iW|jGhªKpú€ÂæäºYˆ!gÈ MÕ Õ%¢_Îï©Õ%ÔÅ‘e ùàº%ñFz·þ´ºÄ³ºMK¶«ƒA ÙL«K‹/ù’è–×"þDbÝ´%Ü›ðè´¶„É6¨Øx§-áEÁ‹º)ã´¶„Ej’€ØÁu3Ë<¾×µq›¶„Å`CÂ5¾c7 €ÏÍeÓ–x"d09ØÅ‹4—PeÁ6m‰#nà\¶+FkK˜ Òqn ­-¥Sn‘µ›¶„ÓÅò?v¾;ˆßØ¥zòÝÌß"çÊÖƒïV’×S«ûðÝôý2&rùx?6tpj&A–¼n–@þÕlÚl½éaó®÷+æ±ú¸z̹Ýè'¬Ñ!mbÛ£ëî-8¼»‡ÍöèúÔ”Û²ëG÷=lÔ”ûΛ_ìaã˜gm¸,Ë]O°/à¡Äÿ­ø×Ìó:‘áÊö%·àÂE?že>I‚×›Xqˆ‡Õé]‡ˆºø×è~¯:DÔÕ¿Fõ{Ù!¢NÿÝíEÿš•l—šÄ÷ÈI:ä¤;½JNÒñCNºß›ä$h0ÈIõ{—œ¤ãNNºÛù×@}½õ¯ù×èNnùר^®ù×è^nù×è^¾É¿æg¼W §@°·Ô>–8¯üÓEçæy\l ø¤ ±e‚a3«&ù§jr®)GõC¸…x ù(_¢´-¥ó¨ÆÁ+TžÈ{v)%dò¤-J¬S·)È>”äÛ{J“˜ŠtˬEÃäÜÎbe,Néch]„Å|ÂtòÓ#S†*¶´×üRBO^3öqƒ,ó7mJž3±VK[‘P§nSbJçdýЖ*z$6œ”Äe“6í¾‚3T±r2󰹯4°à+‹Í.î+ ÆÏX!Q0Q™r_q%»îîhØ™6%I•éº*7¤¥ŽžØôAZ¹Å…´Ò•нèKÆŒÃSº¥]µ`Í„·ys_ÉŒÐiÆIV_Wî+Îö‚l{ô1vš¨BÙô1 ;çu¥Ž¡6¶Ð˜ÜÚ”:†Ÿ4žÑ6l«›÷J™uÞBݼWpdmMý=¥ŽÉ˜³u¹=šÍ{¥š€} \4»÷ ^.Ѷ6¥A[ÀäZà´«>†VP‹M§žhwï•ê€ö¢v©¨×ÜB<Î$“è–ŠzâZèî]Ñ)+¨y‘(ºñ:º¥¢ž¸wÈ ÚXüæ¾B5úNIÛ¬¨'„íAÎcôJÃè(@e=.Õ#¹¤¬&v€9m¤M™AY0»>FHé}õ^‰òŸ(Åå0=1OÍáŠ*EA£ÒÇÐ…YÉÅN-&Ó'Á¤¸¯Dç²Æ$šÉRŽ«{úA÷• ºi¹Ç ¨¾Þ¡›õ‰4ÍL¨ ;1¿§‹ Ôó;t‹}Áäîq¥‘»:±6=ç@!wª’¡'Ûv\íæ½â°pƹx"7vÇÕ½è¡5rSg,3®Bn`Rìzè¹%5zm𥑛Ô6ÑY!w’ª Ö·¸Ð ¹Ã ×Rä}p 7Ña¸dlÈ_tdÄ®…¹Y©‡Ó•†8 ¹ëŽ;¢Fn¹?±ŸÅ4rW&ºõ…¹é‚5°Íœ¦‘›ß,ÌÎØ.4…܉¶á’zèŸFn.Ùˆ+íÀ-Ùƒ˜Üܼpk¹éÔÚ÷ô@îHŸD±z¹¹É•‹ß„X+4r³ò-føðuó_ñâÐ,Y34r3/6Ù·KÜt]uù1&kä&RiQÈD!wd†¤Öv 7#g{@îܤfn#·6…ÜQ Þ&ÓJwâŽÆp·$ôÁÏÒàffo[cÊàæ™3ÞQ²kà&g“Çm­›¬ nkìŒ=›©;*^œM4pK4þpÞ€;Ðö@ ©n™ž¾çnŽ…IÞó)ÀÍý4- ûCB䇭 »meÞn ·Ë;vÇ\Wûf»YKÖ&l‚lžf»Åe1¹RÍ Þ¬MÏœ¨³V\wÆïb÷d8˜nÐÉãɼA7nt›“íÆt³¦@´Þ6 RL7“Dƒ,K»˜4t3K´É ?@7ñƒËƺºíê?¡¡ï5Ÿ÷†{AAÁ/E2´tÓõMò™˜“éNLm—¹ ºÝ̸²·{1ó mppó-&%1 ðàÁös [ûÑkà¦K™mâÔÜôlÏþñuêÈÝÃ<‰6äö/†ÞwëÛÆs+7 ƒçŽÓJ#7sX-7§æ¹3W»Ü¤DÍsÓSDQÓ[ž»ûNÚ“çVÞäÏ-q¡Û®5Ï rþ1Ì<·/s"·dQ÷8B ÏANA7È Ò¤ÜåณM¸q­=9nÖ `ø&l(à–¼k©¶ƒãNâþdS}Ãq'úØnŒÕÀͼ–ºKÂÆqGÒugàÖÖØã–ãnx"OŽ›FHÁΧÁqGÂÖ)|0·W³€2N¡)mY»éjùW³:°õ¦ô]ï7˜Ï<~Xæv¥Ÿlf±æ6±íÑuócvikîk«]‚šr[výè¾TM¹ï¼zôMÐÑ–®¿€=9Ç®:†dáÔ3›Ëߎ tžØ‰ W6°¸.žØe>i’Ä](šF+ÝéU£•tü­t¿7VB&Ãh¥ú½k´’Ž»ÑJw{Ѻ’í§Ø@¥ÃANºÓ«ä$?ä¤û½IN‚ƒœT¿wÉI:î䤻ý˜ Ô³”Tñ"µ]³êNnÙ@U/×l º—[6PÝË h ì ÙÑ™D3ë׌ Îk#èÿ|ËÊ QKštêîÁÝÐÊ¥žštW™3›^ò”3’‹3Û# !¶µ­Ú˜’˜ÁÏÆ˜)²$¿jcÀІPs*">%g==FþãO0@â@ž¼R¥û—e%ïþ=u^Ltþgäè9RXÕ1YÜqcjºÕDE‡é©óøÖá‘ä 5õ1,¼—ƒ7âìbœõhâµÓ—‘´£¢^LTt4~ŒÕZÔ1Š™ÖŠV"¥8³=2¶´à¤ÆêÚ{JÃ%aƒ)Û‘ì‘ʬGév½”• ”ÄÑ'zPÜF³ª+Î<1$©(} c)‡wv*Jã˜M•îÍ­­Ìt¢es$QF§ªô1•¡Ù%µ™T㬫'Êè–SVº6}Œ¤nö />ÌÔs<+ZyÚo–ÈÁ‹>†s [/*ãL=ÇC°Ý7m’Ù*} 5K"¯Y¥Hg„» ¡‹ÉÔs zežÏd”r> ô¯Št&...6SÑñÐk™Ç\ŸO:¥GÇHzhJïLEÇ WR!ÞsM½R¤Ó0ÏŒ®½§éôôÈ64aJ‘NEˆÝŠžÃ¦IÇ‹k±½9lšt<ÔE‘£Ò¤;Ž…ñYÚV… èÜÇD­’4)Mº}T*^Ø–”&Ý¡í±Íå¤éŒçðO€eU¤{Z@K-ÉY)Ò¶r‹HÏY)Òi{ÜrVztVëÄŸNN£pz¾·  ¶9ãZ‚ÿ¶Ñæ°Ý–¬a›èaxvÚ1]a;Iv\WšéG¡6ãmñ€ µ%þ «Zž…ÚÉp‘=/Àµ+s$tRÛQ›Áô M7§Q»z±ôáðœ¨Í2‰¸¯zH£†m \€,m|o[a»´4Ä=i§†mì>¾”*Q ¶y»çòD{kØfгômù!lgGЩ†mÞ&=aF¶ñ^NâLrÀ6óË;€ƒoŸT°™Äf„–hØÆ{ØŽq(Ø–‹Îâûí=Û¼° ãØ– •Z~Ñàn°Í.æ((Ø&Á²»PÃ6.pããÛYª.­?…Û©¾pÙîq¤qý­gXáv¦Ñ$awÜÛ`Aܯ¸ZÒânKÓ¸-Y²óĦq›7x5ïP›¹!¨”/­7…ÚÉ3 »$N&µY´tO×¢Q›©/¦ƒFm¦·à’·ã¯P;³VÍHb£Q‹’È;Ƕ µ3“ò&ï£0f¶ÉK€‘Ànå¶%©DÀÆŠ}MÃ6ßã­DU®a›†t ø¹°nó›©¶°ùŽÛ ĮĤvÑþ©¡›e»ÑmÊo ›!µ(Ýáyn:¡‹—rEÍqó¢KôãzËq¡ZrÜ<•%'nûÁq£f nåÜ9n-ÜØÁWØ]ÆÕÙ+=l ·çmài›?n:2Üä«^¡›ÖÊÈ€Pû†ãf}cð}æi3€cw¸Ú8nV½¢¿¢w'Ç-Áì¹¶ØlÍqC¬vèf~ÙhOŽïYÈ(¹1ñ »£„>vaÝänìðýÚXnKýúc¨ÝXn ˆÖÉr‡% üÃqK ×ÐÍBNO‹ßf‘s÷xÂnü¶c¢£ñê ¿-õ&z-ŠßfÎpÏ,þä·%…¼ï)F6~›†ðæ·zàvbñгMiÜfÆ ™ºË¸M?8–ÈYU¸Më®±Õ¿Ízô­lÓÜ’›,‚$älü6¸ ©… ž¢¹ ìÁú·]¹™1‹Ôã„î4¿M’g††À ¹ù} ÁnÓK›Ëa¡D”ê‰Ü|/¶Û)àfIª„kU7ɾH ƒ¸YÏò^JõÏ›Ç!øÁÒêÿ4iˆæµÅÊØüIQ§§6æQûËŸÍäÀö›зýßP_>ùaÝ•ŽJÝM Û£ëöÇânÝ]‚šr[wýè¾ TM¹ A?ú&(®tH¸L)À*&WM ¬î€›‘e»êG¬­·M óÌNt¸Sfxœ Ûgv™Qü #¨t8¬VºÓ«V+éø±Zé~oZ­„P†ÕJõ{×j%w«•îö¢t!\0 Ÿam=‚Ò½ÞÍÜ/=?¥;¾šº_a”êørî~é¹Ó”î÷c–P<¢Î¦0m½f Ýz¹e ÕÝ\3†nÝܲ†nÝÜ0‡²lÔ‹F<Ê_·†–i ý÷ï„©+øhf¨Ä«”ãڽȼ43ª(ЕTRÃRFKå;Ég8cJXÊß{y1 §\-k±¸(Y¾Rè)^ M SGI]Dû #%æU3S!ZCp¯5%™E3ã)ž3Pk «f†>É.MYkfœcµPÑ-•lVÍL€4†ñ„/,Ñ753,îGûŠ(ñ!®šKK¢pM@)FkfèÇì£()KY*ßI’Zˆ‰ôÛ•¶¬“âz†"ïÕYøN"eFàJ©a5…25ixdÈÂ}Ó²”¥È¢iÉ»+ëóMÅL¢ÂRé&maŲ0“jòª˜IŒNKÝô ù{Ṳ— #c+=ÅLd2]g¢ÐJµùÕlX”Ù3.WÖæ›š–·d¢&QíUÖæ›º™@…Ëc¡«.¯ºV8ƒ¼]ĦQ½Yu3ŒÚ®Éоé_Ý ‹Ë¹d‚øÜWV曺0cŠ&k0Z7ÃÊy=[ a±…2ê›ÖÂ:+#9§-”™a3‹ÆÉÌ£Ym¡Ç+Õ¬~5†ÕJ…Kt=N©2Æhêf¤Üweh¶Qc¼1ilO,USжÐR.)é@îDœ¹ŽT ¹…Í!¨„¹éJTªõ-ŒQ#7‡ÒT¢þ@n–ª%ƒÙ¢¾;rss5r§Å´#wfùQIû¹YÔÛÓÊä¹É¾`}{6I ÝxÏÏœåºñhäÑÔkè–îa,È Ý¤PÖ,î먠mySkèfˆoæ‰ìð¼BwRÙ¦5tÓz„óŸZ(¬†nfy¯#©ƒ†îDßXn®KÛ6ÏÍrÚðã‡Ö5ËÍrf36Ο,7Cyq;ºÆÌ*–›Rÿ4'p“Ïu -¤ƒçÆbC’?yn‡Œš+¸Y~€©Õ ¬€W¯÷R"×Àu®¸éºñuc¹åž0ÌÖs²Ü‰OŒ£f¹+yÙ'Ümã¹rÙ±9²m<7¤†(‹ø“çÆ0q§õ,ÏÏ ŠÆ¯cóØxnf,7ƒ¥Õ<7  ¥ƒ­æ¹éìg¼ŽÜ’G?û^mi㹕§ÝÆssqí¶ÚxnÃœÇ ¬ƒ;ynpÖ3Cu<™nJt¾‡‘jà†€ÎÉ…·¥hÔ~p;nKù 0º ¡lKngz¨þÆq3³¶9ç·m¡”î€mÚö$ÿ‰Ú’B¦GãÉp3:– £žÂmNÏ;“«`›öÕÀ°iwÀ¶˜Äs´-=­†mj± oæhÔ&´ûÜkØfù£é!©a[ì6×Yj³ÛBåÞn\ÛÚNªa›‰gÜÈÚ;´r§Í6öüã§MæHam›9TH¯Õœæ®~;‚;jÌ8,¢s‚Wz²¦Yþ÷ÉmÏ®$qÊ›í¸ãÆ ô³OÄ:ë¾øúÙ}»¨žõ3õì›,£(†A0š=ç»–ÑìyïƒÙ†t’þ†,£ëéHqep¸zÕœDÇÆð’xš–:ÔºÛËæ¬¸H·žïÚ³âj"Õ=ß6hÅi$Ý:¾i%]IØV)ZLÅÚMÂ’.'aénïVS•=„¥{¾JXMÔ„¥z¾LX-‡K',ÝñÇL¥)y¦Æ ”ËîU¦Þz¹e*ÕÝ\3•nÝÜ2•nÝÜ0•f‘´*„ ËÝVV[éŸïU~6’—¬VIÞCÕy¢IÇ×vO2z¢–Ôø#®ÄR]·ÒuÑÝ8:¤1tµÆUënü%i “ˆˆ”f›ŽÜ¶ŠÒÍÜ@½H`ÝPáµ8¦»z¢ јu&Æ<3­¢÷E“Ý ƒ ª)èø‹„øxZbŠë•ªMQ*œJyŒµìÚLªÒáùV¼máÕª€šØc:Ë!µ‰P=âž ëQl¤%^¾j²œ¦WÏ–k[ãªÇ¡Š4Wg[qY4â³¾—\tã™LJú´vÑä$I#ÊT»µµmªˆ½L,Õ_Ì’ÛKÊŒ‰H"5%‘¨µÔ•´Eo1—”-z¬&•6§¼ZíØút› M¶WìµÖ‹ëw•J©´œüÓµûh ’àËs âòÌ|Ž^ ˜,Ë&VT^†ná’âKH¹U¯5ñ½‡”›‚è©v‡6¥ÓñÌ«‹i’²›!Ub_Zô¥¬n§dÎ’tÚÆC•É d†-ëÓ³àISY$•G¤d´iµ¦RÇ×L˜­ _”LEk«XܳÒîÐÃsdEcÐÁ¥†%¥[|³Ó‹C~n%ôT2æ\íãQ@Ï‹YÂës>ŸµÚ9 ô yÅÊb=Òô,^@[ä†8è/O–¼î ®€>‹ªÜÛš¤ž,B»ãÜ ô4]æVÞòzZC (ÇÇp=»É±Ïè©. âŠäO ç±Ï¡kÏ6œg ¡õµ§RÝp^{—m8OfǹÇ£q>ÉßxµÆù$±Z9´z¹Î'÷"ä4Î'FRÖÇ6µá<+¡ƒÕI©œ0Os+0Í· ®ÎK´akù‰óLªê%ȹœ[ñSÞúfRô¼^z$Ö‰ôŽ¡·à.›Æ`ƒz,"Ž@ìYêi‘½3@=™úBSÝ(ñ·1õQêp<:Sϼ"ti(õÄz&Æe„XG$ǫ̃ úxø¤©—PHû°»SÁb<ÿB=ïpG¤–AzƒzI£ÛòêžX[ŸZ ÖÓ¨[ ÆŸ ëÉ*ZA æÔXÙh¦ª“©÷¯ò²ÏxÖ“ ð‰‰¸3õŠŸÑP/a‘Øô“Ô³JFñÔwPÏBÕíõ+–Þ0Ì4ʦO¤g×<"ü7¨—Œ·•¥¶ß@½TÔ;‘ž!ŒØ§! )¤ç‹/‹í]*¤çg¼$ÀyÃÒ÷b¥Ôk¾!} ËšÀïtagEzVD­ÁÕ¥¹!=fEÌaúéÙ°è1Ùé™ :¤IŽ é¥8èÈw¾!½ÄãýðéiföÍeó GïèÈêMéòžBz4²²ŠÏ'CÏl·+ã¡€ÞÖµ~œV6 ÷ F\.®ùlo@ïù;©ÉÎe¿GjX_Yö7¤&7“'¸¿ô¦ žŠ+¾)Ø‚éO”ØÎ{âÛÍ˺ÖóLêkf_ûËz/þcŸXó¯iÃV½„Z™ 5E&‘úJ/þ—õ’%•n 5`ŸÿRCùOñ ß3®„ÿ¸Œ»Z³çPÖ_0S"U:L1IŸ9†¶YfÝxȳ2ðŽ¯‘ïíi4m9ܘ¸ mþÅ<=ç—³-‡[»71ŸbiA0s¬µÅ¡PlÏÌx6•XÎÍ"V|UÄzª9ç–nì.He3JÕÎ/5¬d$L'×ÌýÎ/)ÜØf–Ùù–ÂÍ@î‘‘˜8Ú+:1ÃH|/a1k38jWæŠY èáBKáVcö²šm½Ä ‹K · ÑŒqÆ¥ŠŽÑ«úò„c-I 1eÞ·(g—–"V¹ˆÏêá.-IÜøZñs i)bű{¼÷ìh^’¸q| ëÑ»ËK7iÂ'[¼¯ËK +™1–Œ+¶²äp“•²vŒ¤,%¬8JÆ*v3³+K7îLôcâuV°"|N-—«K·†¬á9oŽiðžŠkü`gØãÜÔ§â—²µ‡Ö¶dp“óö”@á|=‡›/a#X%Tšì’¾M:m’P¿g6¼A«TœÏÄ´Þ.ùÛ¸Z)d×’{Ùz ®IwvpÞ-éÛH:bUôrj0Å©öâ¾Õ46À3ÞSpMކxÈAÄë3fA†Rüs4<ëlMRµ,Ýõž06ü”)O$ß5#¶lZžèÁ²# +ÙDCv›BxL ²™Ÿ6m°FlV,Oz"ö  ±3˜lmiÔ5b`Z)=o£ì’®€Mòòš`ã‹À1¦8ïhÀ&Ü §^3'ïD4מ¹Þ]ô’¿Xá5¨rù¢‚ëBG7ÞRx¬—‘Âkê„–yk¼¦ÿ Njû¤‚k‚É2oרýj¬)´.,æ`bÕhͰv,³BëB§ÊÌ(ì­ÍÊ'lpíèsô`Œ†kfl3u›²‚ëBÿŸw ­™!ÿ„˜ªáš¯97ƧðºËÞi\Ã5#È—åÐphM+¬áÚ.µs5\Û%3é×fe4\3Ã-ÚšB^£5]êSAƒ5#Ù‡_¡u,m> K£µÃzààµd­™Ÿ»Œ»M£u±3‰ªk,wî=ÀªÀšÛ”ÝÀø«Ù”¦ºo¹0JcÂè¦Y‡v”±NiNfNôC ýA´ÆiZÍúõ´ãtv«áEÁ´Ôý Î:³¦…´¹ð)œ–c7NšÂiL dõP£‚飿ðL+L—À /Ï(LK¸ñ LsÃT¶Á4Ûìä´V”.Á+Öy…iE×ɯ(Í©qsŸ£» 4ßZÚ Óœn¨1þ¦e•‡}[¡4{‹8žýU(]h¨Í{V˜fœø:ʦ9qú =òÄŠÓ%Ø5b^á4Û†‰tÃi~3L.­á4úEpYqº²?§FÁ4ûq~ŠW+L,€›8¡pšï¥E†Zqºx©d9%/…ÔúD­H]ÄŒ=˜x…Ô$äð|µ|qa‚W¤.Rwð" «‹¸ L]±ºˆÇêsJV X¬ú ØE<@cçR^óƒ •!4¬xMrî9Û¢ž¤£ÂD²›.C ¯éTòœµ ¯y…,2¿ìî!°â´0s-3ÈÓ"Ùq¤7~šÆúEs²i@ÊÊ+ ilð!ZBy~Ê׊Ÿ6¯v(:â®ü´X&ð(~ZÒðŽf¨±¢ýÃh†šEÊ¥W~šys&k†šá¡erŠ¡Î_Q€H9‡‘dE3Ôh  °‡…TuÖ,¤â¨Éƒ÷ì«f©ñÞ¢)P5mµÞ‰_qԀ̩ù‚_îBÅQg:ŸLՂ樉lS 8êÌwC²Õ,5óÅ!Æj–:[µVŠ¥–¶I\𧦔=éUñÔ™ õÊ`ÅW–š™Ö!o婳U²Žbª3ëw}ÅÆR[ Ç+OͼF“¶¦:Ô÷Lµ[èXóÔÂ×!n<µgzƒÑ™â©ù^™ƒÔ\µ[R3ÕâRõ°#WMÉkØ’4oͶ45š·v ¯µ±Ö¹Ÿ‰ kW6êÃ>?tÃjF@Vo¶?þÕ,Ül½éóó®÷+&°ú8ŸÌ¹Ýè'ÄæÚB>1õèn¢9|~Ô£O‚šr[öíÑuŸ5å¾óæûü8fÓA›-L5qÓåÇå!d'ÞòŸæñã­a¯ÈàÆò·éñ31c¢ÓJnÁ¦‹žEË|’¤›ñ.înkÏŸá†Q=„OpèÇîùîô•eÿŸ•¤— Ô÷Hn¤¦zþR“!hRÓC¸Oj2„sÙÖW–ý¯áñãZ)X‰’¸åñ£û¸åñ£{¹åñ£z¹æñ£{ù&ŸŸñ§´õÿ7w.I“ãÈÞë0¿$žÇÐ-´™ÍH÷7…ƒ$Â=²¦FÝj¦õªª•I?Ãã¢ÞéèÇø#á4ÿõb8Í9%œ¦l7€ž)ÄÓ´í…Ä~JÛzd J¢Žˆ##‚h¿¤–Lû”˜”ïHŸp¬ä·[²8“—¿'ågÒùÓŽí¼*Gð&‘\ §9¶×¼œÔî©ê*kA߯ýz¾â?¶{¡ÀM³_Ï¡ƒ µB\ªÿ’Já(šBªr)Dƒ¤¥ý¥*A4ºL5ˆ²^í½ [ã~+Ÿ‰chJ£6ˆP£r{J£6ˆW`Âö÷”¥ölÉ §ò¸J—šƒ”ÆÒ%‚f@¿¤ØÂRl"Å"Bg;aËÈ™!WaHäLbAªL‰œ98d«LÑb‡n”)Zìø9\©‰GpžŽGí®I´X >©I´Øù³4òk 5³[ÖãẌ5³ßÈ&w¸FT3u?Ä×ÚØ‹Rê~ˆXÛz&pHìÌùcÔ{°zP÷CLœ¬ž"ÈûÏgÿÖÓ›âh³ïÀ¤Š‘;Ùφ̬]ñß«é‚ü3‰žq™Y†<“ÃðÜWgõ;elD¡ðr!QTbgH BU”]ïsÍ€®\»¢Â(Æî­Ø¥3꺻&«xn•fÊçZȹ«x¶·ã)%p†Ža {ÔZ…3Úémh`sãˆ8eóHìðS6C”ð£ Ì~žÇÁ!:Ϩb²ïyå3J±íxå3êZo‡êÍi42§'¢pºOYuá4’[ie…Ó}•ð}œºÊiÈ2›€Šép8Á4:Ž-M+¦»3N'ö«+§ñ›®_§»¨ë jИ;²MA uÎ)5.W@mßc( ¨! ÄÔºdBjì'?$Âm(©Ñ⡵Jê5½­)ªQ$dî)©±ú 쎠îP:wà•‚—ïÌjÄÏlâE9VQ½rêï‹P ³×j!õ²½<~YP"žÔ’"%X7RZ.¬SZp ´ž?Wƒ'õ@‘x[j»´¿´¢ÉUH=I#  ÆÂm=3€ú$K9½<ÔûÔ#§ÉöQN£++Ö‰)´ÆèÚØ­=x)кp¤€ÒºO¶}>i½˜Z7ç̃ëΑ3¸.dæF\».hÝX÷UZã=ÀC>p½ÉðëÎûô×…Mg‘cÑNí|ªÐ\0õ¹g"®·æ`]Š­Wÿ´©{ápEu?YUTc§º*PmKéAÏÕ66ž ÏHêLg¢¤nôÈþµ£çƒÓ§ïýÀé&ORµý$½K(ª»šâ‚j»×Ú¶¥.TÏÕYæŽ4øÃz,ZÇ].Z´'¿<³øÛ¥?`ôM=öWGÃAùÌã4·WŽÓ“;:¯‰ÉG_CûÑü¡q¯-ô…S)_Ë>z]•)ßW^?ú3zl9\†ï¡2Ó«‚l1à‘Í"8¿©È¢¥œ‘ïÌíï)È:4O¯ì¡qœ^„ͧù®|—†A%“#A%[§ *™žÂû*Ù:…ÏeO%[»õ7ËþA–·ôWY0l59ò¶Ú:Ýjz ïoµu ŸËþÞV[ÀúͲÿ‚ì‰^dã\¯o ²zŒ·Y=Ê[‚¬å5AVò† [ðö\†½}5h¾¿SdSÙþ »àÿ¦È6xhž6w£“@b=$_v²Ï¿Õ뀮Ïcj;ýÍzôŒåv²×?K:O;«÷e\¹´þ†k¯ã”ˆ•èmº­Ò„WÝDû"pV:øÅ·ÁIó$¡ÐBi;Ûµ­·'i¤ŒâÏ«o«Õ{2â{WUÔV‡÷c uZEvp4zÏï’ŸøÞáIÇ­‰(;ø}¹!¼{ ^èê¹½¶hŠL• :;´ZÞÚ vÂs=û‘fc7j¢ÊNNncx±OžmŠ,[QØüv4ûWvÅ }=·C±ÍåHš‰+(ô”½#ŽCRdOìHÂùztxhž}ºê¸²gÑg+«4=KiÛ¨õvÒt8ižmºÊ/l·•írgE¿éðÑÜût¹vN?Ø4%¸Ÿ”µ¶Ôо½ý”4ÙήÐ~² ¹0žO„×7røW9^eö oy/ìD²•*žnв-"ÌÚi>yT½’ êkÚÅz•²’¨Ü»‘YàÞÉÞD™Å…Ù‰}½±ÉVŒ\÷+[boT$n—-ŸÀ 'ÚlrEf9@Y›¥l¥e-Ø)œÇ>½·´Yå4ʸ<÷nä´F(§±Ì®)§KuqV1}fN£QLÈ×yÔ^¥ô!µf„Óùäz Ôú,QR/Î>—TI+_R%u.œ ­¸Î¹Ú…Ö'"b õáÿø õ¤è%…uçdN5~Ñ“î…Õx®eOÐVãktþŒê±Êsû`Tc)=G>ú!!õÁ‘#©Oª·@]Yã  î\4B@½ä;¾C8½–)oï|àtZ²á­ëROñÞÔRTÛóŒRAõ”ÔáÀk´ìÝPå5ªÂÔ'ÅSpΪ¸†xïºÄ®3=P×6FW”ר‡ãŠQà5úK:И×frµ&ð:ÛÌg²DÒ4ÎtS^C›ðòFר®õ„\Ÿ?Óc÷×6sœ×(ù±µüEk\i¿¥_ThØuîdþÈëc Ó„×¶ñ=ZqmwÝYìeÊxx‹{=1Ûöbãî¦ÜR&öŠftkLˆ½î:"/* ›ñLì%ÊlC€½¶ºV °—=àõÔ¾£&ľЉì#%6bY<ÈNüÏmWûÚf@±¨j_«N«ö5ÂÈÔ!ê‘ìd1¯ñ“»J2¥<Ü>€Sôý¡ÀFÚø.b ¼¶!³ïr¹šÔ+°¡Oop)¯1äy°Êkäýz ž ØöJõ1ÿ°LkÙ§LsJ6°þzéS¨ýåñßð\>ùÏî•¥Zùè :é,5JµòÑNA¦|­{øèu©V¦|BøèÏHµµ­®€ÙÌÓ4ß•jѲ,¨öüø#ªðÿ[ª­x7-†ÜtžÓäYç†êžØû.~›4£ú µv¬#ÑräÈ_ÐÖ)¨„¦§ð¾„¶NásÙß“ÐÖ~ýͲA­¥Mm†Ì7äÚuİÙäÐßèΰÎAw›žÃÚ3¬sø\ùû3,jýfåÿ ÉÖ>BEØQW»¾—4Ûp·DÛp˜·T[=Ìk²m8̺-íx¹6€ý›ºôGgÙöþù^"íU„p˶ÃkRmKâLZyÝZ÷\¶=؉:w”J[¸&ñªˆ¼{ëÏ;&|Õ·tÙ6›ñ¸Ý¥£v–m)Á£%ò. ;ÉüÜ¢H &ï’DcÛŸä]Bíù•0zâðÿÎ )ï·®‡5.Ú>q䙊ÓyÇH¬ÜVNûƒš5/7ª™’PÙë4f"1 “çš ·°ã·o @´ûWáÝØ^Љž’®‡V%f*ìXª?Sgá6“¿}æt[$§×Pa5 1Ygî?5æÇNt‘ÜŽ%x©·Z4ÑDÒÓj+yó öŒpÚ^ͦ¯ãœI Ò#9ðÞèû#²-%Ltt?hæÔÁY’*·Å «ÍRHH ?Q’å[Êâ5±Yµ˜µhQzJ™h!éÛ5³º8[ÒÄZª6:[Qù–î¨Ù:ë·‰3á'úGŠ~ëÊÿìE2kÇ΂½³p¸'kâmÊÅÛÉ#%”¤ßyæsP#Æ+ÓøÕçLšS;Ý4WUõĽ6Î&îfn©ýe4àÿ»®ù–r«¨Fšñ#FTöµ)©Õû¨¤¶½DùJj³r¼ˆp uãì¯@êÊB€¢ºQµq!õ¥D>þßêA^cõ —)»*k€õ B Õ0ñv\C`ug÷i`õà˜‡Àj„el'º²º³×õÖTé!Àº±rûkr7Àz¸ÐúkÏú°Î¤¥´F”ˆ'‘ß´—üVXk[¥õj§âXZ¯ø‰ýì ´¶í_·X§´•2ÍÖ|ž:êÕ2Ø.&õà­¯ ÆEwiFA­ÊÓ'¨wzrzH7åôÒf÷×ÓCë(¦1i‰ ˜¶Ã•gAÒPJijBiär»ô8}P7õX•Ô5HI¯Ìª!¾z¹eEõh^\ ¢ZSO/T£!ÁÞ¦/ª¶ k»TO}ϳºpo…µAw?óÕ°ŽÝ`T‡¸5a5`G¥‚…ÕXš}o©µM†Õ«¨Èk¢¨†DïÑjXðd?}•Õ0Ò¼à¯Â[ÈU`…5Æ<âAaÞNH…5*'ÐsƒamC °F”‡*¬W»˜mž «1ä éÊjÜWþ“ÊjDÔnùOYmC”¦¯¬Vaùf5Ò0w! e5Dj$ª‘K„ï*Ø’ôª¤F†æçÔ+qßO\H~RX½*2œn;¬‘·ï/kHÇdà ¬ÑÍ#È‹VAàǗп¹K)¬‘ÄëV®ÂÚ^5Ž»J€¢ÝÞøDQ ñÓ#"ÖXcž«!a¯7[Åt—B9ÝG«)§»ÜdÊi,=“…Óç½®r%Žw Â…iÍŽþã}ÍíHv«µkÓ]­Íñ^ímþË3xÇoY·`ë|åH†¦õž~T»,÷ää³/è¥ö°¼¤I›ç±O‚?ûÊIð¬ïÅŸ½.Û꬟“ÐÏþŒpk6"CZ…aþ®pÛíÉ`oJf«ä³}S¸íyÅ©ÏcœOá– â´ze#=€zŸ 2§ºJá–§ò‹‡ŒíÎåØ_QÔê‡~Nâ’Z}Üpì—ŸM¿[üoˆ¸¼½ó\m°kK¯nºå3 ó–ccÓ]9ºéô$¾°é.çsñ_Üt‹a¿[ü¿BɵW8xÆ‹Yåµèá o)¹á0o)¹z˜×”Üp˜7”Üž‘ª²Êxµüo¤ÜÂBî¿×`<Á™ƒ*й@mC7ø”ê‰fyµ± fƒìk*’%iƒìn:&I,9¡”Ìz¿ÌW –§FØXYEÜìí¼-}×…Zi@’Iìû‰Òo ;»ñÛ?6•TG½êµQê“ vNìÊ” `yvò¥ •UËí‘ÞœãyDÁiF½ÕsJšˆëé96&‡ÙÛÔ@33s¹ÚÙRþ*E«ÛXÑT\ªåhƒ}Õtk6¡K”v¿þ×Ͻà×Ïúüó!*A×)"$VHr^Ua.— NyF6XVa·³äq O9Dz„‚ÃE›[Z…ÝÖ¶½’¤èüá}öïÒÝIcƒAڥ엜kÒ”\ê0mƒhêroà%Öìl뫺ÛÚÀ+'ÓËÚÛ»¡ Z{²— ÍöÚUþl«{¯˜í3µSy·reaÙ`JG¼ ?س…C&UÎx¥ñ-œØwdƒ"ñr¡R뚢»Â¶í7÷¾«ïîÉ oe/‘uMÑM[!°šVß­{ÿ&—#l¨üœ¾{‹â6(Bïɹžö´©÷”wØítúþM”Pbc}õÞº·oæ’ª·w½x…Ï×z–£Àܸ"?> ¾Jêmõ5@¥¨šf„ú!c õÊÞûO¬“¢­Xǯ’ô¨X_5¼®¶rƒÓ•väì2‚ìCN6€}pý…öÁk{ãdÚˆöÊ:ED»YJ®FG´Kîˆv›íî—Ôp?Øçá^¸ ã'Üy¸w „ÀõUè þ‚é‰Ò‡´àT¤ÃcD4P¦£ný–TÓÇÐ'vRðáaué+Õx é£ñË[d:ºšz×j…ú"–ë™Õbå:Dò´-¥zþÉ^‚åë1ð vZ:åúöÉë£s"và:ì+n `•æñ”"¦Boö¿U¶7¤:Þ—âƒíŠÉFT¶gÉ4 l‡=»#hí«Ã‡—|W´w~ÖD´¯×ëíkIû/ѾÂÜî¢ç窯RÈ|$¦:®Õ³ ƺ™rˆƒ±Ž"¿ôΡÖú@XßNJuˆŸ™­nV‰¥«¨ÛD(÷=@Ù»úuÔlço Ôµ'A€:ô»&˜~©éþ¦C<½‰yá|å²9Î8_ °lÈ3Î!š&-š£'±ë} óŽ|p¢’ª‡U¬#‹t×XçÚÈ õ¥ÒÒB Ô».ª#–‰_4Ç—bPÔíWOž…@}ýª¯0u±O²I„éèfËæ¿0}™ý=\Ž‚Òôþ*LG‹b L·ÁKX8_±þXRž£¯‡#žc ô(TžëçöE{ÙxÊݱöFà²ôÿüÿälØl endstream endobj 3 0 obj 33249 endobj 4 0 obj [2 0 R] endobj 5 0 obj << /Resources 6 0 R /Type /Page /MediaBox [0 0 1080 792] /CropBox [0 0 1080 792] /BleedBox [0 0 1080 792] /TrimBox [0 0 1080 792] /Parent 7 0 R /Contents 4 0 R >> endobj 8 0 obj << /Type /Font /Subtype /Type1 /BaseFont /Helvetica /Encoding /WinAnsiEncoding >> endobj 7 0 obj << /Type /Pages /Count 1 /Kids [5 0 R ] >> endobj 9 0 obj << /Type /Catalog /Pages 7 0 R /Lang (x-unknown) >> endobj 6 0 obj << /Font << /F1 8 0 R >> /ProcSet [/PDF /ImageB /ImageC /Text] >> endobj xref 0 10 0000000000 65535 f 0000000015 00000 n 0000000145 00000 n 0000033468 00000 n 0000033489 00000 n 0000033512 00000 n 0000033942 00000 n 0000033811 00000 n 0000033706 00000 n 0000033869 00000 n trailer << /Root 9 0 R /Info 1 0 R /ID [<7030C6040E75E4BF5D8333CC4C29B67F> <7030C6040E75E4BF5D8333CC4C29B67F>] /Size 10 >> startxref 34023 %%EOF blis-0.6.1/docs/graphs/large/l3_perf_has_jc2ic3jr2_nt12.png000066400000000000000000002767771360743507500233330ustar00rootroot00000000000000‰PNG  IHDR¬öEYa )iCCPiccxÚ•‘gP”‡†Ï÷}Û m—¥ÃÒ›T) HYz•^E–ÞY–"bCÄDiŠ AF¥H¬ˆb!((`A³HPb0Ѝ Ü¹3qîüÈóë™wÞ9çÌŠ*’*àû¹Ø³CBÃØð ‘¼Ìt®'ü#Æx ÿJtL&V Ÿ—Î ¹ •#H Ç€•”.@Γ€ÜfÜ_>̨¿|˜ü?@¢Å}ãQßø÷¨pù‚„ؘ\¶Z¬ '’ÃÎôs±g»98°}øi± É1ßü¯Êÿ€ &Wà–¾…Ÿ/`ÿßPcC##øûï|„5ø¿ÿ€oziœEìÀßYT5@÷é'gjÇD ºîñ²øÙe8‡ÏÁá+ñÍøNü ü(~ÿ@ °šs‚+!”HØJ(%%t®† S„E"‘(CÔ%Z½‰‘D±ˆXMK)Hq¥b¤öIµKH-IËIÛIÇHKwHJ–aË8É$É”é–y&‹“Õ‘õ•Í‘=&{Cv^Ž)g%Ç“+–;+÷D•ב÷“ß*B~P~QAQÁE!]¡ZáºÂ¼"KÑN1Q±Bñ²âœCÉF)A©BéŠÒ+¶$›ËNfW±ûÙ ÊòÊ®ÊYÊ ÊCÊË*š****ÏT)ªÕXÕ Õ>Õ5%5/µ|µ6µ'êduŽz¼úõõ% M`½Ý³šÒšnšyšmšZt-[­ ­F­‡ÚmŽv’öQíû:¨Ž©N¼N­Î=]T×L7A÷¨îðü‹5©k׌ëÑô¸zÙzmz“ú,}Oýýný7ja  ¾š&6>5’0r7*0ê5úÓXǘg\küp-}­óÚk{Ö¾5Ñ5‰19fòÈ”aêeº×´Ïô‹™¹߬ÝlÎ\Í<¼Î|œÃäøpJ9·,ðö;,.Z|²4³XžµüÃJÏ*ɪÕjv溘uM리U¬#­¬…6l››ã6B[eÛHÛFÛvªvÑvÍv3\mn"÷4÷½¡=ß¾Ó~ÉÁÒa›ÃUGÌÑűØqÈIÂ)ЩÆé¹³Šsœs›ó‚‹©ËV—«®xW׃®ãn n<··ws÷mîý4ž:ž|Ï^/ÔËÝë×Äzõõ©ë»½ÁÛÍû÷3MŸ ŸŸ} ¾>¾µ¾/ýŒüòýüþ›ý[ý?Ø”< Ô Ì ì  j Z v .†„l ¹*šÚF k[Üà´áð†épÓð¢ð±šs7ÞÞ$»)yӥ͢›#7Ÿ‹ÀGG´F¬DzG6F.F¹EÕE-ðxGx¯£í¢+¢çb¬cÊcfb­cËcgã¬ãÅÍÅÛÆWÆÏ'8$Ô$¼MtM¬O\JòN:™´šœÜ‘BJ‰H¹*‘š”ÚŸ¦˜–›6œ®›^”.̰Ì8œ±À÷à7g"™3{LAº`0K+kOÖd¶MvmöÇœ œs¹â¹©¹ƒ[t¶ìÛ2“çœ÷ÃVÜVÞÖ¾|åü]ù“Û¸Û¶#Û£¶÷íPÝQ¸cz§ËÎS»(»’výR`XP^ð~wðîÞB…Â…S{\ö´‰ñ‹Æ÷Zí­ÿ÷]ÂwCûÖî«Þ÷µ8ºøN‰aIeÉJ)¯ôÎ÷FßW}¿º?vÿP™YÙ±„©ÆÚuÈëPW»¢¸âýá͇oWšTÖ¡É:"¬ò¬ê©V«>P½R_3Zk_ÛQ'_·¯néhôÑ‘cvÇÚëêKê?O8þ¨Á¥¡«Q£±òáDö‰—MAM?p~hi–m.iþr2õ¤ð”ß©þó––VùÖ²6´-«mîtøéû?:þØÓ®×ÞÐÁê(9g²Î¼ú)â§±³gûÎqεŸW?_×Éè,îBº¶t-tÇw {B{†/¸_èëµêíüYÿç“•/Ö^’¼Tv™r¹ðòꕼ+‹WÓ¯Î_‹»6Õ·¹ïéõëû}û‡nxܸuÓùæõîÀ•[Ö·.Þ¶¼}áçN÷]³»]ƒ¦ƒ¿˜þÒ9d6ÔuÏü^Ï}‹û½Ãë†/ØŽ\{àøàæC·‡wG׎=>Š~4û8ùñÛ'ÙO–ŸîœÀO?{Vù\þyã¯Ú¿vÍ„—&'_ø¿x:Å›zý[æo+Ó…/é/+g”fZfg/Î9ÏݵáÕôëô×ËóE¿‹ÿ^÷FëÍù?ìþ\Y˜~Ë»úgé;™w'ß›¼ï[ôY|þ!åÃòRñG™§>q> |þ<³œ³B\©ú¢ý¥÷«Ç׉ՔÕÕÿB,¾;E^ cHRMz&€„ú€èu0ê`:˜pœºQ<bKGDÿÿÿ ½§“ oFFss0ˆÚÿR pHYsNNÆÊ/¥tIMEã$9·-ç vpAg’Zó!%Õ€IDATxÚì½{œ#Wyçýclƒp ÆhlL5,˜™R¦±MJ&x`;[‚72½oØRÜ» ä³àRñ ðÛ¹ã8xÿûßßp? ˆaÐIf?ûÙÏâÉ'ŸÄÙgŸ Q[äT×uZ³‰™aYE±ãÚO²Ý?äa0Žã`aaÁË?2 £!´+N#›Ízç’É$ ÃðÎg³Y$“IïZñxº®7œsŽã ™L"‘HÀ²,ï|:n¹V¯Ï'ˆ^è$ãÝÎ5ËÛ òJòMŒ›QÊ~*• Ýåw‰DŽã4llÄ tÓWši–SZ³‰YaPYïöz’í]à¥X,ºÁ?CµZueYv···Ýíím€»½½íEÑ•eÙu]×;_*•¼óù|ÞÁu]×àæóyïœ$I®ªªÞy×ÚÍó ¢:Éx?ò_*•\nµZu]—䛘~F)û𦵼OµZm‘k‚dÖu]W–e·X,zç›å”ÖlbVTÖ»½žd»ÈÃ:axÈW*•òBz‹Å"DQ„ae¹!,LQïgÓ4½°Ó4~ç»4Á× ‚AzO¯Ï'ˆnt’ñnç$Iò" Àªª 2IòML3£”}Y–[ÞïÐï1,:Él;šå”ÖlbTÖ{y=ÉvÁ:aDQD±X$“IìÛ·©T Iè;Èf³ 0E† &E'ït`4A¥d›˜%Æ-û’$AÓ´†ëİè&³1/ *ë4W†¬†RÊçó¨V«(•J0 º®C„ŽF«$IÞ®  hšF¹KÄÔÐIÆ;˜ÒnY²Ù,Ç!Ï1SŒ[öóù<4MÊ"†N7™%ˆyaPY§¹2|È`0¼7LEQô¸“¶ÇivY–[ŽñÄo‚˜:Éx§süwEQÍf¡ªê¤? AôÅ$d_d2d³Yj»@ •n2 t #ˆY`PYïåõDÁ:ax^Ò‰AAøPHð Þ·oâñ¸JFóŽeYØ¿?DQ$™'ö$ûAáCV‚ ‚ ‚ b*!+AAA1•üÔ¤0,>ÿùÏã/þâ/ðÜç>wÒCé›ï}ï{8÷Üsqî¹çNz(}sß}÷ᢋ.šô0úæ±ÇC$A.—›ôPzfyyçœsΤ‡±+fYN{ì±™\Wî»ï>üÍßüͤ‡Ñ3´†O†Yž›_|1>øÁNz(=Aë÷ø™õõû¿ÿ÷ÿŽX,6é¡ô­ß“a–çf¿ë÷ܬÏyÎspÅW`uuuÒCé›\.‡C‡aqqqÒC雕•¬¯¯Oz}³µµ…S§NMz}qÎ9çÌäw ̾œÌ⺲²²2é!ô­á“aÖçæ¬@ë÷ø™õõ{VŒU€ÖïI1ës³æÆ`e:„Lz»"Ø– Ú1«r2«ó’/´†ó̬ÊȬÎIb¼Ðú=Á:Ìâ®giiiÒC f€Y•“h4Šh4:éaS­áÄ<3«2Bë7Ñ ´~ÏTt‰ ‚ ‚ ‚˜JÈ`%‚ ‚ ‚ ¦2X ‚ ‚ ‚ ˆ©„ V‚ ‚ ‚ b*!ƒ• ‚ ‚ ‚˜JÈ`%‚ ‚ ‚ ¦2X ‚ ‚ ‚ ˆ©„ V‚ ‚ ‚ b*!ƒ• ‚ ‚ ‚˜JÈ`%‚ ‚ ‚ ¦2X ‚ ‚ ‚ ˆ©„ V‚ ‚ ‚ b*!ƒ• ‚ ‚ ‚˜J&j°–ËeT*•]Ÿ'ˆi†ä›˜wHƉy…d›˜wHƉYâ§&ñ¦årš¦yaqq™L‘HP©T iÊå2@–ed³ÙIWÑ$ßļC2NÌ+$ÛļC2NÌ"ñ°jš†D"˲pòäI@.—óί­­!zçËårÃy‚˜fH¾‰y‡dœ˜WH¶‰y‡dœ˜E&b°V*:t‰D‹Å¼žJ¥‚­­-,//{ç—––P,'ý]DO|óÉ81¯lóÉ81‹ìÊ`Íår8rä$IB.—Ãææf_»/‘HÄ 5€D£QïgˆÅbÞùàd"ˆi‡ä›˜wHƉy…d›˜wHƉY¤oƒ•¨‹‹‹ž€G"lnnbee¥§kd2œ8q‡ÆUW]…r¹ŒÕÕUè8)jµZÛs=ô,Ë¢°¢+[[[øÌg>ƒo}ë[#¹þ(ä¾÷½ï!—Ëakkkr_1är9Üwß}#»>­áÄ$ÙÜÜÄg>óÜÿýC¿6­ßĤáë÷¨ŒDZ¿‰IÂuð~×ï¾ ÖJ¥‚ d2¬®®âÀXBöúú:¶¶¶vm¨ÕjÈår8tèŽ;†cÇŽ!‰xBÞiBœ>}ºí¹sÎ9^x¡æ@˲~?pà.½ôRœ{î¹C¯QÉ7œ{î¹8tè7ò h ZÉI‹Åp饗âYÏzÖP¯Kë71 ðõ{Øò ÐúML®ƒ÷+ß}U æ¡‹‹‹-çb±b±X×]ÆÍÍMÔj5ÜxãÞ±h4Š#GŽ`yy¹! ¡îÑ ãœsÎÁóŸÿüб{]בN§¡( òù<&GŸùÌgð“Ÿüdèï7*ù˜ÂCòM4cYÇ,ËÞ±ÅÅEüð‡?ÉûÑNL®ktÓ7ú…ÖobX\\ijŸýì‘l:ÒúMLšh4Šhô2<øàúz]_V>yÂnµZ­«w`;4Í“0;Ïwƒa •J¥ëÍ€˜ H¥Ûn>î N#N#‘H Nü^º®C×ulooR©TÃÿAX|–eA×õ†cŽãÀ4MØÍ‚À0 †Çqúz¯d2‰l6‹D"Ó4¡ë:p×]wäó‘Œó É61ïŒÓÀ©SÜwßsûzM_+÷¢®¬¬4§•JÅ+ƒÝmwE–e”Ëå†<Ž'N ‰xy±‹‹‹ØÜÜôΛ¦‰D"1Ñ/—è ÇÒi€ëëŽã§–ÄãìgÓô_cš&âñ8A€,Ë( €x<ÞÎË•~]בÍf{ÇqDZ°°€D"T*]×Q,!òù<,Ëòähq$ß{]בL&aÛ6J¥L&Çaš&’É$‰„çùO§ÓH&“XXX€iš°, ñxÉd†a4\Û¶m¤ÓiϨåÆj¡P@±X„¦iÐu–e¡X,âŠ+®Ég$'æ’mbÞ!'Æe1ý¿nüÒ/Uñ’—ôWg£¯`X__G:Æ‘#G°äÙ Äb±ž Çb1;v +++^XÏéÓ§±¾¾î=guu+++ØÚÚB­VC$ÁÑ£GÇù]}bš€,3E@’ØqÇ’I@˜áúþ÷—ñõ¯Ÿ€mº.¶m˜¦‰B¡‰¿¬(WðEQ„eY$ ¢(B@"‘€¦iEÉdš¦AUUO™Ïd2Þs X,B×uhš6²Â$ß{ 3°óbš&LÓD©T‚ ž ‚Ð Û¶mC×uˆ¢Øðàd2Ï8ÕuªªÂ²,†MÓ`Y²Ù,lÛF±Xô®-ËòH¢š!'æ’mbÞ!'Âp_ï]g†i&ÃlA0á8{ìuøàÏ… Å"ÓÿæÌê#€¬…¾ ÖH$‚õõu”Ëe/4˜÷qê•¥¥%,..6”φ(Äb1Ü|óÍž—bâ§nÞrKÅâA¼á é3 Ë EÑèa<ôЧqË-çC–eˆ¢ˆßÿýþÝ¿; Å¢A°°À ]QT•)à<œ`“CUÙϦ ¨ªT*˲°±q¾üåËêÏ 2Òiv=U H§ˆ¢Ó.¼ð"ˆâ7Gò½|Ï.Ü‹oY–glòãŽãx›&–Åä[–eï9‚  T*5\/Ì€E™L¦ã8DQD¡Pð" $Ij˜ “†dœ˜WH¶‰ià?è/¿¯HÆ÷.¶n”&“ì\©ä˜A¸¾0½F×™N^,<òR©*øß8xð<|ò“@ =Ã…F,ö6œqF ¯}í Èç—`þæoþ¦ïí¾ Ö­­-¬¬¬àäÉ“}¨a°¤Ûö1ñ<< BȽ>ýâ8²ÙlK¥]Û¶!Š"$IÂ×¾v^õª³”sUU=¯‘mÛ°mïyÏë…?ÿó?Ù¸HƧÛ¶aY®¸€iš8uê!üýß_…g>ó™øÜçÎE:ÍtbIbúõYgUðš×<7¾ñYøâ/ðôv¾oÛ6^ýêwà_þåßá›ß|ÏxÆ7ðìg_dò9uýþ=xõ«ß QÞó@?uÃöQ(°È²DâO Š".½ôRœqÆ}}޾ V¾ cš&–––Æø§#vWÀ5MC"‘ðò9ÛÁOI’ lÛF6›…¢(( m_›H¿ó;€(¾(ô|sÈ#G–jµñ˜$ùdÒ7ZUÕ7^ ^ð†ÁB øïÍ× C’€ýèQœ:5ºLbrÃtæù4 à =wÉdÒ»†I’ ˲WIºR;¡"‚ ö<&“É´ÝÀä9{Á[‹io|ãñÂ.ãõ¯Áq¤R)ÈrßøÆ8qâ?ãꫯžôÇ#&H2™„ã8žc¨Ç²Yæ¨ SÍyÑQA^†›o~7®¸âoñØcؿߋ_üf?þn¼ô¥×ࡇ–ðÚ×~‚ `{û(þý¿×qÁ·ye,ÕND*\w3táBw„Ž=8U厤ÆÈ²­­-œ:uª¯ï¤/ƒ5‰`uu¹\®msaÞ|˜˜,\X¹‘ÊÄL&Ã0 ë:dY†¢(e†axí_¸7Õq‹ÅŽž$^TiØÎ&IbÆj*ÅBÚÙ CØ|"ÚÍf¡ªjËF÷®O ¾cšfèB.BÃøxþs0\·T*y‹ùn£¦Ûf7ŠÂæ!¿‘Ýu׫&=4‚•ÊYxüñ³&= bΈÇÙÚùæ7¿þëgâðaæ•ä:–ªªH$PUš¦`¯7M¶&çóÀ•W>…7¾±€Ûn[õîY’ô‹øÈGþ¯xÅ%€_ûµã­o}ÏzÖ_ceåzììÜ6鯂˜ét—]v=´„ååÜvÛF¨Žò¯ÿú‰$)ë¥à=õÔ3qÏ=?Áë_ÿ< …z–?ªi׸¶é*iL,…îݨ‹rã3K˜(}ç°®­­666BÏïƒÕ¶™RØÅ!Ó÷ôðbAÓí)ò¼ rMӼХТ T*yÅ[R©DQìjœ†ÁåQ I“Ÿ {•T*Û¶aFæ1Ïd2Pëñ×AÏ|»ðJËe“çƒòëòðY–Bq¹<ókض EQ:îh÷‚ žr1‹XS¦D‘­EÁ#Çaç~ú§ïô0 ¢-–Õ¸iYì~â8ìgN&ÞgLÆùÆÌ?þãspþùϘôÇ FŒeY°,˻ߌÛ¶ñ‚|?~üUüÎïü£§WqÆ H¥N#—»’$Á4MïÞ¨( R©ðÔSᡇþçžûÇÐõSEßýîÿÁ¿ü‹‰ÍÍïࡇ"-:“m_ýê åãÅ/®á†V±²Bë^„ë<‚P‚$–µŽ••ßÇ_ÿõ~ë·Á—¿ü D1‹;ïdЇþ0.¸à¿"Ÿ9¶N^ýO ËOƒ(úþ^ÕQôŸ›N³µ8Ÿ¾Sj7ôm°6ç2Î;–ÅBS›ó#MSÇm·ý÷ î¸ã\pÁmP¿÷{2Þö¶³ð±½¯yÍù^aHöºW7«üº¼{ Šk  LSÀk^Ãd>H"ÁƛɊrä¿nbLèºÚ—Ý0 ¼ó‡ÍM&™L‰Ä),,,à¼óþ#®¼ò¥8zô¿B’>€ÉZ",/³ùÀêµ<Í;—ÍîÞñÓ¥6äØéÛ`ÝKpcõïüWüå_F Šwbgç1üöo?ˆdò,.–ñ®wƒ¢œï-ŽÜÓ,”‹½CFTUõ¼­|€+û¼5 _ì¸'*¨¬«™òÅr·§AŒI¢°ÜY‡‡ˆs/§¦i^®ßÕ^XX@>Ÿ÷~/‹ž·•Ëž¢(H&“Èçó0 £ÅËߌ(Š-…2™ 4MC6›H¦gî-åÓ2öC~§¨P0AxèºßÎL˜Ì~~oo¦ªLŽ›¡æßY4GÖÛ¨buz÷óA:ö6ÖS©ŠÅ"¶>¦R¬Bi3–ü‡ÿðÞøÆ¿Â-·TðƒDqôhÙ;ÓM/B$ò¯Ø¿ÿH’„GÙ¿û»ÿ‚ßÿý{qäÈ»½ç}ö³@<ÅC5Ŷ-¶UïOÉ0 &Dzìo¶7p%<½)“ñ#ˆù#QÆç›ûaQªª"•Öºb˜ ×á]5Ø|hÞ¼¶•™uve°nnnÂ4M¯—åââ"dYž«BLÜX-€dò—ëàæ›ozÓ›ðÑ2/T¹¼‰xüÈç™UQ;¶é…ýf³ÙÐ>£ó†v«Æ»`‚5ïF>Œ bƒ0†(bÄðßvªª†.¬Í­Y$IB¡Pðòy:-²à=L÷"¦‰†ê}œv¥æ bRóôýÌ=^ªÊŒØD‚×uvïR¿(OhÞ”²mÛ‹æXñ]‹Óeõg˜ñÈ1^c@×ud³Yhš†/¹ŒsÏý’ÉÛðÄâì³ÀSO=?øÁ;pç—ââ‹ß‰hô2|úÓ¿ŽÿüŸ/ª2¹L$€×¼†E–}}êw„ÖÎaž)Iòe›#ËLWì¶¿v‹íT ’˜,þð'qóÍ·áÇ?>Çq<#“;•²Ù, ÃðÖ0žÞ$I’çtEŽ£`{»Q´ñoÿv7þðU(ŠâEX–¿Fò^¨ýD.jšÒÌ<íù÷m°ær9lll`ii ËËË€Z­†µµ5”Ëe;vlÒŸi`XÏN¶Ù¶Y–§FØÚZÂe>ü|¼÷½E¨ªÃ\7………ºâ^òv ƒ÷Û`_Ñnw©yä ßÕÅɸ2Ob¯ôâ÷ÊÂBcË¡ ’$¡Ú\ú™èYnU’ö¨íNL1ŽÃöW¿º Û¾ªªBQü›¿'Ù6»Ï±{–………¤—øÆh0Ò‚o ñÍX¾ñ›J¥Â.‰é¤SF]oo°ž<ùUÜ}÷ñâ×°hµL&ãmRäóyÄãq¯ÝG&“,¿ïxÇ“ø§z€n8¼Î ¨V¯¯-Õkq0yâ^ö xÒé§C×Ù\ÙÞfº”,3}^Óú¯ìM¾ ÖJ¥‚ /g-ˆ,ËXYYÁòòrÇÞN³€,û¡wºnâ7/ÿMï\ØB|ç1oWi £‚”B6›ònì’ÄþÏçýœ¦”Ô¶pÃ6¨¸f³LØU•M„xÜ¿ö8à“s—N4bJ0 #ÔKÏÿ¾AåëB-¬\ÖlÛÏe¦Í Þ#¸¿›Ži²×íæFEÌÁBZã",…¥—q~ðƒ?Ä|ÿôOÇ Ë2âñ¸Eä8Žçu`›½¬öE2™ 4 6¤ÚãXÓ4$“I¯*+üÏÿù?ûnò ˜æ3êáè/j;® ‘ÉΦhįQ¬¸‘àAå^SÓ4ñÿý€}èÕxó›}GK}Ê£V«á¬³Î‚(žHltàX–&î{ÑÏóÆÐ¼±C!áÝéË`ÝÙـЛÛââ"b±vvvfÖ`åÅ•+ZÈ[yàµÚ(‚Ø¡³ê‚'û»Ð|W+¥<–ð*ÎÂO ˜ÐΨ Æ·Ë2»v2éǬÌüݲü ŒügN/†'÷è þ‚OÊlÃC׃4Ëm3Á&^<%¸!3Ì"\Íùœ“†1àcâ›Hé´ŸKÂC û)H ÷'ö|cˆQ”Ö9ÇóAwãiçUw¹2”Nû÷®˜Y–åÕXàm¬tÝÏ›V¿ÿû'ñçþb<ñÄ3ðãÿ+Î>ûKxç;#Ð4f‘ðüuî ã­Ò¸—Œ÷o׆ªƒ™{ØDQD:ÆK_úR¼ä%/™ôŸÀ×EI.»ìA\yåwpæ™ïF¹üÄbmÛH$RøÔ§þ¯xÅ%Þ}FQ‚‹ß‹\ÇñÃ#yJÔ?ø†PÅ~·÷*¹wàÕw›» X–…D"EQ¼ üR©„lÖß„çÅÞ­‹KK‘– ç†ÑXc" éÎý³«ÖZ­Ö²³Y«ÕÚöfšcÃ`áÀȰÐÖ`m¼€ºÂ‘J5*ýÁjž’@ì/üç ?äÿ0êÊzGEÖ 5z…9Í¿óòÖ¼xE'# ›eÏ—$ö|¾ôíÚˆˆȳq7|o¦ÿÝ£‡{6DQòŒN^q®¹*v;‚J´ª²EZQü|Štº5ŒžË’$ù9›|žjÓdsˆ+@šÆÆÇw&¹"Ãs£lÛÏMâ9K€ÍÉ4n µóh%“¨‡õ³ßy8?ßpjÙq6¾àP>Ï¾ÇæÜÓd’3èyà»ü¤<Í7\éáëfpc¨Y± ¦‹ðyÜÔpo}ë)lm]‰³BCЂs‡Ãç¸mÛÐuÝ g»å– R)àÿð=X^>²,C’L†…l–)wº¾XïAùb¯kx?Þç˜çlqxïN9ò½ÂsÛya·‹/¾¸ïÆóÄàðnð²—]ƒ/}éK¨VK^ë2I’°°`ãå/ÿE|ãY¾ GŽDɼ“í~”­‹Š6‚À+¦NúÓ³H°Òts/É =òuJ×ýû4ÿŸG“5GBñÐò T q¸}rÍ5׸×]w»³³ãÛÙÙq¯¿þz÷Ê+¯ì÷rCãöÛow?>Ôkªªê~å¿âº‚ëºù^°íº®\ÿ¿d×-Ý캊¸ÄÇ]wû"×uµß=>·2×­Vë—Þf?—J®›Ï»n¡0œ÷p‹®ë*!Çe—}Ͻ|w¥>>ó¶ëºU×u›Æ_*–†./£æºë®êõòù¼«iš+I®«¾Ïí^å7„jÕuUÕu‹Eö{¡Ð(ßÕªëŠ"“+þ^\æøï²Ìž#Ë­c)ÙXùõ··Ùõù±R‰ý,ŠLnýÏê¿£,w~/N¡Àž¯õ9Ï4=‚×埛‰žÙÞöÞpØò2jF±†O ¥’ëJ[c]—É $5ÊhUeíímW–e÷’K^á¾àÿ×-]÷æ›ïtEQt5Ms/ºè ÷ãÿg×u™ e2¾üs™*‹®ªª®¦inµ> dYvs¹Ïycãs²Z­ºš¦¹ªªºù|ÞÝd11³&/³6Ã(•J®(Šî{ßû÷ç~î îyç=ìÆb;n4ú¤«ik¹ë2ù„Öã¢Èä¿ù8á3kò2­óQUUWUÕ¾^ÃïùÍhZëšÝ¬?½±yéÛÃÊ«´>|¸áx4Åúúú¤íïÁàù õÝj˲p¹r9óú5{› :€`(­ æ5ÌPüë´Å¤`?èßàÂú5š±Ðè•LÈôð>="ÌT,ú^§†z§þž½|¶võׇ}辶æ8Àko>¹ÄžÛ )Kÿ |õÅìûË8YÃ¥«—âžkîÎ7£ðp`Þz‚3H¨Š 4î4ro+'›m¬€Ûü^<„½]8mXÏÑfïT0T—ýU¼ú·ªöž/ª(€âðj!½WçÈdXn¹aøs*èd ީ峆U4ë”èê8~|vý‹:ç?èëoG Ÿl–ý%©Ñ«Î#gÚy—òyv¯M$ôzq¿ýÛ7âWu ?ùɳñ©O݌Ç/ƒ¢°pÞûïWP­ž‡¿ýÛgâž{îÁ¡C_E6û}¤RvC¿íx<îµIÛØxvvü4&›{·B7¯ ÷¢F"/ÀùçŸáõÂN§Óxÿû?‹÷¾7U züý¥'H»´ž‚DEŸ‰acYLÓô*ö²Õ<z÷zòHª ©TkØ/ÀÖj ï}¬±X ·Ür ¶¶¶¼pœC‡aqqqÒŸe €m®<Èp` á¡ÀVýĬ?W3^9zýw¾Ái²÷H–oyå¬S!o<›`äóÞÚÝ\0Z¯¿w @»‰¯×¿þùåÀú¸óõóÜ8Mx°nn¸@ ÜÜ «€¬<üÁcÀÛ¤%à‹÷ßüÛÖA†ßg¡ôBŸžz&pû?ý<÷½ÿ~|ÏýÞð¾¼cyù¸í¶ P(Œ>9”W¸vö§é¥‰õ®Ãc»$Ž Bã†LOðšóÍrfY~E5^æ¯é½‹¯z?{_û±i\ºÎöÁòM×M&}ùåñÍ*9ñF­Ü ¯'|½õÖ[wù%Àç1‡¥tp‘qº®{-x/dÖwTôBv`cco|ã÷ñìgÿW]uQý:ŠÅ"²Ù,öï>ö±8$éÚÐñ(ŠMÓ ñ<«iÉ'Ƈ®ë^hïO\ˆJåëxÑ‹žI’ IÞô¦ßÁüÏÆM7±Ê¨Žã X,BE¼þõÆ&+ŠÔû{“‚OŒ^ÜMQ//^–e¯ÏËç´KÇÉdZe”·ŽiÞd¡µsŒìÆ•»³³ãÞu×] ®Ý`ˆð¬¸—9¥RÝý_tYhªËB¦JÁø½æP_©~¬ÔÈ®ëfÜÆ0VÍõÂŒyøt¹ën§ø š®ÑÚ*×ÇËŸZf;þÏÕú˜Ã"#”Ÿ#ï²P]µ~ ·>†æP­þ9µÀ¸ø¸ƒ×âßA&ãºo~”=wéïXŒeÓ´ÆñðÏ­Õ?“$±/,ò×®«žXª¿w>ïÇncŽJþßlZÃW:1Œ¡jÕu/¿üGîóž·áæó½Ä¹Ue¡a=…¾V«~ìd;Âbs2ö&}†ÿ„ …l³+IìxµÊþ Ÿ¬VÙ8UÕ=æqÈ,æ³ó8xÜð.bŽ(¤lr‹C···]UU]QÝL&ã‹E·X,Nuî´1kò2Éùؠ˸,D’$·Z­ziÍ¢\b¹ŽRêEß!†­ßýQ­Vëëév‹,󔇰ãüXµJ¡½ãd7òò´~ ÜÍÍM>|¹\Î;vâÄ >|›››“¶¿w…×¢CàøÍ%Ib!¬<Ô7X±7S?f÷ðø^QŽ/¬VQ˜³¦ð›€x^ý|sA¢˜çV©£9,7xí€c›À«`O~Œ!ÌûÛìÑå!É*€*˜W”ï,)ô,°¿_ÂRP €žô+Ðw©LøáÀ‚œzŽço2·[&Ãâ-_eãáãäŸ;à…ç1™ _ø@üÏ‘û{eU@È’Ú¸&yt÷ðŽØÁƒ?ãdñþ÷ÿÀó²ìîaäa:ôiÒ4@{Û&A3 „W&jw=Ód^E ð>òxT*üµÉ$ §H§[ß7x­DÂ÷¬†uñN§}*w/„¹ Á÷|no3Ù-ÙϽÄ©*{>…iN„tš…uLTƒ"Çi}^ð¨]G^•R’$looCÓ4/lW$×1dt]G2™ÄÂÂÒé4²Ù,t]G±X„ ^𝆠øÑ0ÜÅ‹/’7iÄd³ì‹'ú&™Lbuõ=ÈfEd)uï‡xwz.Åáa²à×êÞþ¾<ä¶9ĸPóŒÄÆ2¤lšÖðqÂC}›«FóÐ_I╪5WE/ì>ŸÏ»²,»²,{•z‰Á•¼LëúÝ/Á°ßv¨jã²Ç—ß°abÄ( »ßhšëf2´~w¡T*¹ªªº’$¹ÅbÑ m¯VÛË.×÷y7brŒ%$¸§OŸîéy¹\•J'Ož„eYˆF£ØØØðί­­!²,œ J%ÀqXsúR©äyQUUE±Xô<[Äô2²Ýö˱¬ÆÓlõ2Fû b„ˆ"»ßh ë#­ò>Ë2®ë:âñ8Òé´×ÿY–e˜¦ÐNvy?wOÌ}¬‹‹‹ˆF£X[[C­VóŽ×j5¬­­`U„;Q«Õ°¹¹‰cÇŽ!aÕ`ßö¶·áÀ€J¥‚­­-,//"‘–––P,1ld™)\÷ÔE½5H[Õ àGÿxá/­Â‹™ÁŒÐtšåur$?þ1°pGàZ)àÿü„–6˜V”JµÏ…ýð¯ßü÷ÀO}åÝ™&Ú"øã¿ðF#BÓ|EŸk`XíëÏb¿ÇãlK6 †´Î°íÝ•“ä9©Å"{í¡œ&ùnæu¯ûC¯)v¶Íþ®ñx£F³[xî'Ï5î•N²ÅKôõ{Í^P”Ñ\wN™fß ßþö#8yrßýî& ¶$³ªÖ–—û—Íf‘J¥N§‘Éøm9t]‡Õ4_R©2™ ¦3Ȭ˶ã8^øyØæˆ®ûj@>ïËq ÿ\?@Qðª»îÉÛ̲ŒsÙ. (‹ iŠÒ¨b:°o_«:ê³Søï,Ò·‡5›Íbkk W]uŽ9‚#GŽàª«®‚išX__÷&@;x¼|,C¹\ÆÖÖžõ¬gauu°³³ãçÄb±¡ÆÑ;ŽŸ{ä­çPþr©…”ïé4ÁŒÑTŠ­àGb@<¼ór¿( ïƒúäû€ÿù·þÝÀ0€g-¿äúo,À›n¨J5æÛ5Qâ¹|ß»¸ù3üš ¿X3dØxö_ hM[KªêWuâ¤øûe€í+•b†isþ oŽ9ˆWil×Nƒ|‡ÁúäYá+Ïÿäy›í …½. Þtr˜ ;Ï•Ý#r4ÍL«Œï†O|âKe ¿ù›á/ø{¼úÕ¿ Q~ã7¾„d2 UU±½½í)HÜ 0 ñxÜk£ÀÖt: EQÂç1õ̲l;ŽƒD"QQ(B7L¸ÿ¶œ6§!&€¦áŸ_úÒ‘\zeÜqÄãq‚€|>ßRœ.¬F£ 4:¤‚4÷n'fƒ®}X+• jµš'¼¼«išžà/--A–宯*à‡ ¯¬¬xãôéÓX__ï:)jµZÛ÷¸÷Þ{që­·¢\.7„ Ã0û¯€Ì‚Ko¾ßs°±b®¿ò¨æqå}uÝ78ÿùÀü—ºÇTbÞÐÛ +&ÃŒËkÎîwg ßâ¡”¼ð‹…ð×`ˆ.Ÿ‘–>;Û…6rø–j§mÕ9ºƒmnn⦛n™gž9ôkJ¾à¾ûîÃÊÊ dYÆÒÒR_ãŠÇAp«òIjïi öåLÁŽ+ÎÊÊ î¾ûî‘\{ÖðaJYØÜü7üÝß<3NC×  …BƒáÉ•¥t: Ã0Ïç!I’g´f2†R/†‰pxõo{Ýæ¾“Ëåpûí·cÿþýC}ûi]¿»ÁUUU;ó"Ïé˜ÉfýB”}°rìîþö·Q.—»F-öË,®ß|ó°l§ÓL¶›¿fÚ7œN¸ÞïúÝÕ`€ T*,..B–eÄb1¯ ¿ww¸Pçr9¬¬¬àäÉ“ ¡ÆÍœ>}ºíd¹øâ‹qõÕW{»Dp@ù^ã±Gîx¶h³ÐZ®‡›hÌÑ”êÇ4Æp—{2Æ ¦AòùFC·UmŒWh7á$É7¤ƒÇˆ®,--!âÔ©SC¿ö¨ä.ºè¢]Ý, xõ«Äã·Ùtnœ´Û˜˜£ ‹½Àúú:VVVFríiXÃŶmlnn¢PÐ Iþx2™ E(ŠmCz3M© ’$¡P( ‘HP(ð ð6Q¼ÀO:ã=Wê¹æ«««8tèÐÐ×ði\¿»4VEQmÙWä„<'„·XSÕö÷Èæ¾W¶Íþ@<²)äÅ×ïa«Àl®ßŽãt܈áûíÄl°[¼kHp4E6›Å7ÞˆX,†ÍÍM9rkkk^¿Ò~8tè4õÑ£GQ«Õ°µµÕq‚F£Ñ¡|YšH—¡¡Òw¾óM“̓æ…B’˜Ñ¼[¤M9³&ã¶m{º ÏU媌ãŒ%ð‚Ã`s&›eÀzýç~æ3øÁ9çôu©¾<¬[[[X\\løÝ4MD":t¨çØõl6‹••oBð„oÎêê*VVV°µµå%y=zt(ßeù€`ÕzÁü=íEþ“°\ÕŸú—֦хIÊwtº‡zéôîÛ ¥ÓTxi2-2Þ‰tšéœ…‚ïDªªâî»Ï¥]yÛf_ Wâm›)áù|9ë<Þ”çä_Û­àß2 ²ÍqQ/‚^§*ž ;ªÓ‡dn2™N7–A˜|;»Oñö|Åbk?ï°/ˆÏ…9d–dܲ,hšæm¼7döLäÀ¨±m? ¡ÛÊN®\wø½$èás‹7À­/Hw_|1í3‡µ'ƒµ\.CÓ4T*¯|¿išÐ4Í ØØØÀ±cÇzª~‹ÅpóÍ7{ÞØ ÜËùÝbYõ5OóˆÖ VÓ4qEü \€ Ø^\éÌwò|.XÄ蘔|±m¦C‹L¾Û¶³Ùí† …ïi¦AÆ»!Ëþ^Œ(2Ç!` P( ™¤4^¥Eé=¦Ž/,|íàÓ`q¤gd›cš¦ç} Ú\A'£mO¡Ýeš½WÊI§Ùó‚жýŽ \Á6ŒÖb—Á×ðÜèx|Æ-øÁ™%·m‚ ´§ñ8õ \äÂøühéÿ 6‡¸kl~ï@ûÜì`7’­­¾‡Ù“Áªib±²Ðž»Ê{¯nll —Ëõ\®=‰tœÝÎïèª[–å/3AËcýÞSsq&ÆÏ$ä;H6ëGÑu4Xw+ß–E…Áö8“–ñn 6Òi‚ zÇæÓôJÞœ0,œŸý¸' ÃoÓÁÃ&ç”i—mï­mšLWä2í8¾ÑºÛš‘Â{}wë×Íe¹Ùâæ=ìMÓ÷ø´kÉDQÚ7 ÝcÌ‚ŒÛ¶íå¯6W¿æÝ#÷|¤Ì0àmÄT•ïìúïÂæÕ„v º¬[[[¨T*¸ñƽdìZ­†r¹ŒcÇŽyÇ–––ËåZ†§ ˪ÇI°j¿^ûÒ¦¤n̨½üw&=d‚ØÁ{¼mÛá—xyý^i®n@7}bJáο xëºY–Û¶ý˜ ¸•›ò‚5úp`ÓlÍ/%ˆÃËkX–5ýiíàÕÞ ?Z“‡ù~!ÓdÇùçlWdŒ6€ôÌsW766°±±áå³Î )6{ض Apö5g{ýX b–I§C!MÓ®R³Çw÷ˆÙ XO,›ÍΆÒC»çDè:ðÙÏ~i´rÍ+ÈO{=1—$w˜‘Æf‡E' }¯y\…åw¬ ͱžéÉ`ÝØØÀÒÒ,Ë‚eYXZZÂÆÆFK»›iÅ0-ët&Sæ%IbÆjóæE°44AÌÁ.5N}‡f¨y{”·JL9AcÕq˜¦ u‹R˜fã}† V¢GÁÄë_ÿ¼Ñæd7÷ᡊº,,°¯ œLNú+!æV7mV[e³¾Ç4¬9²¦1E{[)b­gº¬¼]Íòò²wluu¼V7ÓNCj@êP=ðw?b†ÖliÛΆ³›M™ ÁJJ 1…óWu]Ÿn覆‹ ¦à̪‚FŒÇÑG¿ ÃóWý7e ¶e±pÆím_^In‰!òÇüo°m{ô!ïýâ8½éM†Ñ}NäóLŸ !ºÒ“‡hÌUD"“w_4l^«èn°ÄŒÓuÁÔऊuÄ”À?.ö†aLλÚì=mžo²ÌæŸG~IôÀ¿x¾þõ³Æ*ôÉ2ËÅËçýã’Ärëf1l“˜ZŽ;c:£bt­ã‰3Jyþiósz-T¦iUÓ'=]še‚abìw»}8M0®’ f„æˆB˲:‡Ô ºPÒ!¦„TÊߨæN!?‘*“¶Í”?6¹1—‰£ªÌËJ»ìD¤Ó6b±#ý¨m³HУÖÖ”kbÄ|å+ßÁÓŸþ§PÕµI¥]gÑŽãïŠb£.ä8{ª/긙{ƒÕqšÖÙ`kJÂS/1óLR¶Ã°, ÞQ¼¬õdX•_¾££it]ÓÌ´Éøç?ÿC¼ìeO~!¾‘ØMG±,_ß—e¿Í ß줜թ£g«iš8|ø0VVV°±±á=VVVpøðáïÆææ&*•JhxA§IQ«Õúþ€Íß<òHgežòˆˆ§|þZ ô˜¿ªª»—sIb 9í>îiÆ-ãÍðTê0ºzXe™ að}ºí¹{ï½·ÞzkK5ãííÀ/2°qéÆäªGSÁææ&Ž?Ëê%F¼?F%ßpß}÷aee›››-ç¸jÛvwù&ùŸ{VVVp÷ÝwäÚã^Ûéä0ê)^QZÃ{ý ø+Ïùã›3²ìïÈëúÔnÚX–…D"ÑðH× ˲¼Ÿ›IB¥ Ã@<Ǿ}û°°°÷œr¹Ž?Žo}ë[C½î¤Öïn†Ð¾MÏU æS›&yˆf¾~÷ãê•I¯ßípà-où…Á>\0r&8š¡ù1Q¸ÞïúÝSHðÚÚZÛ*a²,C–ehš†µµ5ÜrË-]‰D`š&LÓD¹\ÆÎÎr¹–––:VîTZûâ‹/ÆÕW_Ý5)¼k¸1÷,--!6„¶‹QÉ7\tÑE¡m£Òi?"±c˦aA}#§žõõõ]+ݘôÞ©7{[ïj2É hl懵×bKŠ2µañNÝ¢ç9g¶m#‘H@Q8ŽÓv£Î¬'¶m#•J¡T*AE8Žƒx<I’¦f³wuu‡ú>‰õ»Ž|ík¿Iº(ü ™ {’®3¹äae´FÏ,|ýV7Ž “^¿ÛñÓ?ýi~^Ódë·ª¶_ã§÷už »ÕÁ»zX·¶¶P©Tð¶·½­ãó–——Q©TºîuËy=pà€Æ°„J¥²«>P¦ÉÖp8§|Nç–61ã”oNp#±'ë HÒ`fˆ™g2DÛ¬º®C Û9o^Û%)ÜËÚëfÌ JEÑ3<{§ðû¤ Èçó=¿~–™´l‡ñÅ/~W_}Sû'ð°_^_€zF˜F7M¯}íw¿OÓ»I$¬…X2ÙØÂ†˜ºzXËå2¢ÑhWaå»2ÝbÜ&K.—€†]™ÅÅElnnzÇLÓlWê^[ÃÃè°CdÛ,îʾ0NùZëõ9™ Û¢ÝÉ=˸e¼™v›ã¦iB–åÖ É° Yö‹s4?wrþlÛn ý•eÙó¢v‚‡ž.,,@Q/Šj/0iÙæðTj&º‹Ã‡{ðæ«ªïe‘ÍbüL‹Œ›¦¿ÔÞrKe8‘aÁ>ÃÍn?ót5X#‘H×ÜŒa³ººŠ••lmm¡V«!‰àèÑ£}_‡§vp¾ø_¾ˆÿíß̋Ĉ–|­2ÞuÑ7Mö¤ª)¯ŽJm¦Œ7“L6Õ&¨£ëzxº‡m·¦²^˜c—ÑíòBÇE/Fe/Æ*§T*Á²,†t: Û¶Q,§&$x’ŒR¶9ºÎ6#H§Mx8{'T•*¸CaÔ2nÛÌᩪ̾üÄ'®aÜ1ø…{™'ÄÌÒÕ`Åb¨Õj]CxÁ¥H$Ò×ÂâÝc±n¾ùf/¼x7ms8ÁµûöoÞÞ^¡ï´3C»dÔò ø›écÉ_(tžh`2αíðhGÛ¶aÛvïž@Q ß Üåü™´²yÞ‹¢ˆL`SÊ0Œö}ܰ•eÙ3PÓé4t]G~Ý#Ç)ÛAüŽl¥§u]ȳJôÍ$dܶYT® 0uâ¼óФ«¿0m¨Í5=¬±X š¦a}}=Ô ­ÕjÈårX\\Z’x$x’4´&Kûöáà<ØúÄ^úòĆ|!˲zó°*ç”ÃJôÀ°d<ˆ(†t4Æxsøfšó˜˜“6X»ÑÓÚPÇqèºÞò™2†×œ›¦( DQÄ‚×Ç4MïõÄèñ;stëæ|‚˜‚‘ë#¯»AÌ =µµ‰ÅbX__G.—ÃÚÚZËùÅÅE¬®®Ž¤÷ Ø6@@°vø.±eQN1“¨ª¯c÷Ô²iÐp^ò®$‘hÍ_åJ}[`¯2¯(sQÇ@–e¸®Û÷¹àñ|>ï…Y ‚@ å±,¶¦sE¾­\7„Äìà8þRÛ¶Y3é4{QØí8”¿½èÉ`|£•·®)—Ë^¸p4E­VÃÖÖÖHÃdúe{@€@å¯sGP¤y;ŠŽÌITbïÑN1M³³wUz—ù9ñ°ƒ`kb|H‹òM§Î¡æ¤·3J0Š=ýÑVŒ=‚ë´m³ß5 Ö9§gƒ•Ã[Ü4/¤år+++m›’O €Ú¡ M°¶6A̺îŒE¹l·ÃI#†÷„o=nv.Ô©"¶i²”Rü‰)Ã0 ”J¥ð“ÔZŒ˜z –$jM³Çé)‡u±,¶¦Ã ³ü¾ÐÐI¢&ÂÄÌbì˲z+ŠB!½ÄŒÖµ§¨‚NÈ23Z bJH§o,C–åð5]×Ùƒ6YˆÄ4™ŒlSfÚ ÖÓÃܬ¼ð/ê‘bmwqˆ™…‹tÏŠ;å<3J˜Á:…G–ÙÎÏ´E{˾þõárÍÕb‘Â׉™„çh³Ÿ­Îë·ã°Â96X%©*Y—lëa%ˆ†G;öܶ¢Sx$AL1aǺ*<½ ªlË_×'ý ù|— v2V‰FÓü FÓ4»çiSè;Qgn VÏqZß4ï«©qâVVV&=ö¶4Gú¶õ°Ä Ã×ôž .ÑâOÌ íŒ¯ö`±=b4ZÇA:öB¨ƒç¸—SE/÷5³KÏF/íµ Ã€(Š(‹Þƒ™‡7³Wîß]ô”ÿýe³L»§°HbNà!Á]uA˜«ºÄp˜ï` €ÊnÖ-“ƒÊe3OkÊf{Èå3MÚ¥'f’D¢Õ©Hù«>²€b±ˆd2é?üûRUétŽã ‘H@ضL&AÇ‘J¥¼×d2Ïã†išØ·oŸ÷»,Ë( Ïó¼Õ ²,#Nc{{¦ibaaÁ»ã8^^툢(ž›Ïç½ð`A&–»:Ù"ŠÀÏýÜgØß‰ò÷ˆ10nçKGÏë7A˜ˆÁZ.—¡i*• "›Íz“¦R©@Ó4o2ɲ&ÔQ¬ëæÉ¦ ªÙ¬f`Û´ƒCŒ„QË7§¯ªïäa%†È¸dh•ñž‹Œ{^éïô=ÛÇôúšqÑnlã`œ²D€G¹‰„â7Ø&ˆ0)E2V‰Ý³«ÖAYYYÁââ",ËÂÉ“'±¸¸ØÐimm ÑhÔ;_.—‘Ëåú£`Õl¶±èŒ(RH01Æ%ß©”ߢ'HÞ‰!1.çU%uÏ“ô ™o‡áŒM? à8¬ë˜eY{ª1&!ãõLƒÎŽ–µ'ë½Ñ³Áº²²â=¶¶¶ZŽñG7¶¶¶P«Õ°ºº ˆD"X^^F¥RA¹\F¥RÁÖÖ–——½óKKK»Îa±mÑlݱTUò8Cgœò-I}ô` Óü bŒSÆ'ÜÃJJý÷rÚ·~ÂiénD¹«Äˆ˜„Œ;Žï/êh°u3 ÚÒ5$øÀžàÙmK›X,†õõuD"ïØÎÎ61øÏÁ˜úX,æ….ôE°Ó6nøò—õuR؉‘3NùÎd]ïq7r-!ÆÃ8eÜ4à ֽÐâ„?cÕOÈ2ðµ¯}ŽCÄh™„Œ ‚ï/êXpɲ¨:0Ñ–®k4õvb†A$i0v+• r¹–––F=ïmµZ­a’õÂË?ÿyTD„vä‰10nùnÛc˜ FĸeP¨žwÞ°¾c‚艱È7üÈ™® ¨ãPå=b¨ŒKÆ“ÉÆß{nic@½¯'ácÛ6LÓô~×u‰DététÚÛp6 ‰D‰DFÝhYt]G:ö^3ŒK¶9¬ë9 Ëb+AŒqʸaøáÀ] VÚ¨!:Г‡umm-´S,Ãââ"––– iNœ8Ñ5ßÕ4MT*ȲÜ+¿¸¸ˆh4ŠÅÅElnnz×2M34Ä©#ðÕ“'ñÅ—¼'ý-{†qÉ7ß±´{1DuýOžXbŒKÆÃ‚uzîáG^ªƒRUU8Žã­¦iB×u‹EX–…D"Y–aÛ6Òé´W4‘H@E8Žƒl6‹B¡MÓǽ0ãyalú‰÷~LL½È°*c1DÆ-ãÁ:J¶mCi·‘hYÔ†ŒèHWƒ•—À~ÛÛÞÖñyo{ÛÛpøðaÏÕÛÞˆ8,\ïî®®®zísx’÷Ñ£Gûûdià!ùÇø·_üű©ÄÞe\òmÛlmïIywœ=¯¼Ãc\2.I@¡Ðxíž#Óllc6j:ér¼@w,U%@ ƒÕZC ÖŸŸ®?Ÿçó&:<¿LÓ„ªª²,{ß­®ëžá ²,Ã0 Ȳ Q½çq#vž›~R‡×˜±,‹)òºN‘ÄH™„Œwõ°îáô ¢wº¬årÑh´ÅåaÀ~~gg§£ÁºººÚÕ ‹ÅpóÍ7{kW-tLÀ¬¹È×!f‡±É7Ë2{“oª¼G ‘qÊxžóW'±SßKר¦RPÑÝÈ”B®Ýc‡ªLsÅ*´Fd7¼‚k‰ã8^è07h÷ã–mÛf{‰©”ÉZ5¥Räe"FʤÖ¢8Þ Fb&éj°F"œ>}ºåxXñ€aÒ\z»ovÓÝ+7Zb¶T¾y¸dW«i6nqĘTƳY¦Ìwç{*0fš$ï= LÓ„,ËpÇó®pÏ)7xç5WuÖOêXð+¿r[LJ¼LÄÔ0 ÖQêºáH²Ot¡kÑ¥X,†Z­Ö±7ïün{4 ‘í&“‡•˜GªÕ*r[–_Ož f Ól è9$Ø4)¬À¾}ûa9­¦i"•J!™Lz÷JUUaYR©R©”gÔÃ'Ÿ¾ùÍ¿er¬NCs€$ù­ÉlÛ¦u„ˆ®ÖX,†X,†\.‡X,jÖj5är9,..ެ w_Øžz éø &AÄá8®}úÓýûb±QÙqVbµP %ˆ˜I‚iM=[X…=¾[Ö҆ÕFÇqP(X;IòŠ.@©Tò6Äø&,Ë '/ÊDìQ (ò©[Ç bŽàKq_5"„žª;v +++8rä–––ŒÒS§NassÓ{ÞT`x^ßúʨB01·8Žƒh©Äôæ¼½xœ§1£½«ÃÉx^&׌h¿g’É$dYý~I¹-–ŻؘÈH“Û=¾ÑBÌé4SC§Ã†£ãP8<Ñ=¬±X 7Þx#666ËåZÎ/--ayyy ^dCçÁ±oÿþI‚ FÆ_ý*öïßÞ®¦T¢BKÄÌÒ\'¬c8™®3eGí³LîGEÏà €ÂõÆŒmwÝõ$ûEQ(Œ˜;L“…†ÙÞ`Õu¶ØÓúMt¡'ƒ`U€;†ÕÕU¯r€¶aÂEþïsn·܋'=‚²m튟‘±JÌ0ÍuÂLÓl_pɲHÙÙ%¢(öjM Iž|ÒÂ3žAÄ|ÂóW½¶MaŒ»1³ôl°r†Uo¤ÀßùGœõ³?;é‘Äh°m,ž>M»òÄ\bYþž ïõÙ¶€õð#fQtýohÀ˜[xІiÖÛ6…Aë7Ñ#]«Ï,¶ ÷…/œô(b$d:¶ \tÑSxËOÂNÌ%<Ò·k„ ­ßḐÁ*Šxàì³ÉÃJÌ-¦iÒ† 1÷ðüU )ëY–=/«a¡Jc*•‚aÈðê(ÄX°màg}½à“ AŒ^8Ï4Íö+å¯}ÐwÑ¥™@ñÄ;&=‚!´!CÌ;í -SÕ»¯)]´>|ËbC–$îlÛ~ ÙfD‘=Ç/@ŧ{Ø{ñçQÙlŠ¢À4M¨ªÚàuåÇTUE*•B¡P˜ô׸g0MàÙßùGcÒà ˆ‘ ŠÀ+_Y@ÅO‰á1—VI’È`%æÊ7#æŽÅ:ˆ¶È²ì}o’$Áu]ï\ÐÛÑüÝjšæ…__C ŸÿðØbóŠ+&= ‚ ’XÖç(Å€*Sïa-—˨T*}½F2X‰™`7ò PŽ61;ìVÆ-Ë Wxl¨·b!ˆI³ùÞúÑpÖ+_9é¡DWv»~‡n8:H°ÿ ¢O¦ÖÃZ©T iÊå2¶œÍf'=,‚ $ßļ3ˆŒ[–åo:òʶ¬G3V© 1a‘ïOF"ÐhÓ‘˜bÕQZê8L²Ä} &vÁÔzX×ÖÖFaYNž<‰r¹Œ\.7éaÄP ù&æAdü±›oÆÇn»íƧÓì‘Hº Ô˜8ƒÈ·(Š%CL5ƒÈwKýÃâqf¬¶«úN]˜JƒµR©`kk ËËË€H$‚¥¥¥öÕÆb† ù&æAeü?ðœûàƒ@±Øø(üþ.1!•ï ESÌ òÍ‹¸Á¶™WU×ÙúMÆ*1Si°îììb±˜w,‹í*Ž~ØÜÜôÂ.f ò öÏ^“o`vå¤\.csssÒØ9öšŒÓ¾·Økò=«2Bë÷îT¾EA€˜Í²¨EaÆêו¡õ{6˜ÊÖN“¢V«!‰´øá‡qçwâĉ¸ôÒK'ýúâÿüŸÿƒÓ§O£V«Mz(}sûí·ãСC“F_<ôÐCøÊW¾‚‡~x"ï¿ùæã>qâÎ?ÿ|œþùûn™E9€»ï¾wß}7¢Ñ複Ò÷¸ï¿ÿþ‰½?­á³Ã,ÎÍ{ï½ßüæ7ñÄOLäýw#ß´~ŸY_¿+•ÊDÆ>èú}Y4Šýû÷ã{õWìäÖÖØ?C?Ðú=^¸Þïú=•k'¡9}útèdyÚÓž†jµŠ“'OΤÐU*•™÷þýûqêÔ©I£/î¿ÿ~|÷»ßųŸý쉼ÿnäÎ>ûlœ‚˜%H¾‰y‡dœ˜gH¾‰y†ä›˜6¦Ö`Xâ7/5½¸¸8éáÄP!ù&æ’qbž!ù&æ’obš˜jƒ• ‚ ‚ ‚Ø»Leë¬R.—;6.—Ëû[u:ßíµÃü Ã÷8ÇNŒ–QÊw/ç‡õ†=6’ïùaŸWùר‰ÑBë7É÷¼Cë÷äÆ>\b`îºë.÷šk®q<è}ºãù‡z¨ãk¥R©`ccÃkóçÙí¸{9OÌ£ïQËI'ùt죞›ÄøÙŒw“ƒY•oZÃç Z¿Ç;vbüÐú=¾±O 2X‡@¹\ÆÊÊ Êå2n¼ñƆݒ ;¾™h4Úñ|§FÍÑhtàqonn"‰À4Mär9”Ëeììì —Ëá¹Ï}î®ÇÝËybv•|ZN:Éw¥R™ê¹IŒ—ÝÊx79˜Uù¦5|~ õ{üc'Æ ­ßãû¤ ƒuhšæÅÑ7 Kºç+•Š'0Îw{í :t¨í„½à‚ v=î^γè仗óƒÐI¾É÷|±[ŸWùõ؉ñAë7É÷¼Cë÷xÇ>)È`Ó4Q©T Ë2¶¶¶ÛÉX\\ÄææfÃk‰D×óÝ^;(‹‹‹X]]õ±X Àêê*$IÚõ¸{9OÌ£”ï^ÎB'ùŽF£S=7‰ñ1ˆŒÏ«|zìÄx õ›ä{Þ¡õ{ïÈ÷>×uÝIb–Éår¡ý“À²,~¸B4E­VC$i¨jÖé|·×û³”Ëe¬¯¯<î^ÎÓϨ廗óÃü,Aùtl$ßóÁ 2>¯ò=α£Öïé;1:hýžüØÇ¬c¢V«¡\.›ït¾Ûk§uÜ“;1>fYNfunãe9˜Uùžô؉ñ0Ë22Ëc'Æ­ß³/ßd°AAAS å°AAAS ¬AAAÄTB+AAA1•ÁJAAAL%d°AAAS ¬AAAÄTB+AAA1•ÁJAAAL%d°AAAS ¬AAAÄTB+AAA1•ÁJAAAL%d°AAAS ¬AAAÄTB+AAA1•ÁJAAAL%d°NŽãÀ4ÍIƒ FÉ8±—…üÓœ"&ɱW YŸAôJ'wÇ“9A ª*A€®ëžÌs Àã8PU•䛘F%ÿ™LÙlûöík¹f6›m¸Ä0h'Ëü¸aY–dT’$˜¦Ik613 "ë²,·]Û’íÝ@Ö Ç‘N§0Ã3™L6œO§ÓÈf³˜²’L&½IÙlÖ{ã8ˆÇãÐu½áœã8pÉd‰D–eyçù{ïæùÑ dÜq,,,x.†axa7Žã´ÈÛ òJòML‚QÊ*•‚axà®™H$<%ˆ †E7}¥™ ŒÚ¶Mk613 *ëÖvþ|’í>q‰‰‘Ïç]AÜíím(.ÿ³loo»΋¢èʲÜp¾T*µ\Óu]€›Ïç½s’$¹ªªz¿kšæ]k7Ï'ˆnt“ñb±è—¡jµêʲìnoo·È©Tr¸ÕjÕu]’obúµükšÖpjµÚ"×1 ºÉ²ëº®,Ën±Xô~Ê(ÿÖlbÚ†¬wZÛùóI¶ûƒ<¬ĶmȲ Q½cÁP_Ã0ZÎ+Šâýlš&DQôª—ç»4Á× ‚à…#´£ßçD'ºÉ8?žJ¥`š&A@±X„(ŠE’$y†a4„Ô_|ÓǨå?. ÀÛ¡Þ'bt“åv4Ë(­ÙÄ´3 Yï´¶7? Ùî2X'ÏKm‡ã8]_Ïs•‚æALŠn2.Š"ŠÅ" ™Lbß¾}H¥RÞyEQv’mb–·üK’MÓ®Aà›,ļ0 Yï¶¶ýCëEÑó„r‚Fª VI’¼]þ( Ð4r—ˆ© ›Œóâù|Õj¥R †axyØŠ¢À²,d³Y8ŽCž#b¦·üçóy¯På?ä›,ļ0 Yï¶¶ýCëQ¦iz;èÜ[|š¦Mz˜±+H¾‰y‡dœ˜WH¶‰y‡dœ˜&j°nmmacc£åøÚÚ¢Ñ(,ËÂÉ“'Q.—[&AL;$ßļC2NÌ+$ÛļC2NÌ3XkµÒé4dYn8^©T°µµ…ååe@$ÁÒÒŠÅâD¿(‚è’obÞ!'æ’mbÞ!'f‰¬kkkXZZÂŽïììb±˜w,‹Q=±{,kìoIòMÌ;$ãļB²MÌ;$ãĬ±+ƒ5—ËáÈ‘#$ ¹\›››}… lnn¢R©`uuµå\§IQ«Õ&ý}“F×û{¾i‰{8ÎX†HòMÌ;$ãļB²MÌ;$ãÄ,Ò·ÁÊ ÔÅÅED£Q,d`ss+++]__©T°±±cÇŽ…žï4!NŸ>Ýöܽ÷Þ‹[o½µ§1SJ*ØvççØ6w'ŠE@QØë, ›››8~ü8¬x^G%ßpß}÷aee›››C71D, 0Œþ_—Í2yÝ-é4ÛÐqüÕ•WâÕŸûÜH>­áĤÉår8~ü8¾õ­o õº´~ÓÀÊÊ î¾ûî¶Õ{ÖobÒp¼ßõû§úy2ôõõu,..z“I–eD£Q9rår¹!” l ‘H¦iÂ4M”Ëeììì —Ëaii©ãk¹ÆÅ_Œ«¯¾:tLj˜2R)@€L¦ñ¸,3Oh¡HRøk3f ¤Rþ1ÛD‘½FUÙÏœR‰ý/Iì‘LbIQð‚ÿôŸðåÜ F%ßpÑEa}}}èc&†L2ÉäÛq˜<6“NšÆž°çµ›ÛfËb×ö»®û¯Ñ46²Yüº¢ óÓ?=’Gk81iVWWqèÐ!œ:uj¨×¥õ›˜Ö××±²²ÒQÞv ­ßĤYZZB4í{ýîË`å±í‹‹‹-çb±b±X×C‡u<Ïãé+•Š79‚?374›ý0/¨(²ç( SÀ¦Ðsåž?OQ_Ë•ùNH°½ d³¸lyßxë[‡þñH¾÷8Ù¬/»‰;fÛ)¢È¼ýüwn؆·€yÀ7q ö3¿.Gü ÷Žh§›dœ˜WH¶‰y‡dœ˜Uú2X#‘2ÀæÔjµžÂ ^žûÜ•Y\\Äææ¦wÌ4M$¸òG̎Ôk®Œsc5ŸoÿIb!¼Ù,Sê5y¤¶·;¿—(6zV;¡iøê•WâÑ!ïÎ$ß{ÓdÿÛ63Pù† ÷ä “cn\e3hœòÐöN²6_t½GóÆÍ 'æ’mbÞ!'f•¾ VîE]YYiˆ¯T*8~ü8¢Ñh¨÷µ_VWW±²²‚­­-Ï8>zô褿+"ˆi2¥YÓ|ÏO:Í~æJ´ °ðÞlØ¿Ÿïd¬rxh¤i²×¶ “œQH¾gÛfòhš,B€©A F4y5Ñ3$ãļB²MÌ;$ãÄ´Ñ—Áºµµ…••œqö¿ 0ÛBØL“ M’ØÐv)³}…×§Eß'fÇaá¹A™ÇÙ†Ið›J1!k>΋!5G AĨpÖò,“i¼'¥ÓL9 Þ«R)œóƒLzÄ10á?ÑSåè i^Û £³ÁšNûÆ./æÆ+O½í¥Ï¥—>†ZBn&¿i§RÝV.„í„-“™»H±—Fë„ѼÑIC‡{ºÚàÊVµÊ^cìÁïiüZ‰„o¬L‰ SR$ i¹ÄŠÑÛ“þˆ= ÷b…xÒj€¯f%tö³$)€Hvˆ“¨®³I³_mø£?: ‘H¤¯¡õm°À¶™dðÖ0|7Ã0€èg¡ÜÆŒUÞ}ƒG›(Н4Ó.•‚‡Î‹oÏUÀ÷ìóÜé~UÀW>¡ñþN‡ÛÁc™Lw‡˜òyf†ýáyîO;w=Ac#›ezúövë¹°:gAøô¶,?Iý3¾ñÈë$“ì^‘Éø§œG½W^ùIļ‘ÍúŠP>ï± *E¼G<ß­—e8’Ìtšú¦Šã–#AظÒáhã{´ÙuQUÄ0I§»|ÂG „¡(6¯ñ)ÁSy„£®ûç¸c+/IÀù/÷áÔ©ûúú »2X777aš&¶¶¶°M²,S!¦Q!Š0~ûËp6¿õrÓ¿³—JÈ7 UP(¸ð=ª@c7 ªÊd8¸AÈŸfÒU$(ô<’ ¸+ÃC„ùó‰V;ì33L>ÏþÐÁ?<àç­Î@˜¯mû!óí†kš&²Ù,E¢(ÐNlÛÏA—¤Æn=wÝõªIÄŒÁõp9Ãñµž×„Vo§a0Ù/}48¸R0Y-}Y +†Çwã9¼^gkënœ:EiÄ. ´,°,&÷² (/ýï{»õcY›¨¼Z¡Ð Ôóy£(¾ÁÊä; é0{ÎÂ;t^ñ9Éøµ7" ¾þ½ËJp¿Ÿ??mHãã/üFËâ›N– h 7]¿N´5š ®Ùƒò´~_Ëå°¶¶†h4Šååe,//#bmmÍËoUxTÇTÀ=¨|l;KKHX X„£e`;Š.%oF×7S²Ùðhž:1®Ý=^ IוÃ0°o߇®ûÏ£ê¾{„B¡5, !®}òp}…w×±,àÈ‘;ñïÿý_â—~é'¤öëH:F:†ªª°mñxéæÎï™H°ÿŠx=ŸŸúÔÇ~ð9È2ðÓ?}錄*bÊð•hF2Éä‰ÿÏï Á4q^³ðƒd¹ušr½ž+EÍsÀqZ§±,²ä@²êy}Édë ù(‰¼R½r)H³m þ‰·³µ=Ÿ÷[Ȳ'žº!ÀÎ[ÓdòÏwQlÍRÙÞöƒ„t½†¿A´ƒ—çH¥üMéT žŽÜB]±¶i$.ùW(glzâ 0ÕÿæBoç$½’a&FƒÊ7Æ'M_ÖJ¥‚ d³YÈMæ´,ËXYYñ ØY„çGZV{ão$pK9˜€É·ªëð];înç»Ü½ô3åù¬€¯¬ âíüQ,˜¦ ­‡'@p§(NC’$8ŽÓ”¼zÄìeÔ ß¾|%€ïR4ïBðxó ‹ÞqýضýsUþò/?JågðŽw°u®PÈã _ø!¢ÑWãŒ3d¨êME¡éš&Òé4dYF±X„ P™L©T Édù|–%àŒ3j8uêO`Û¾ùÍ+pæ™g®ç…4σà43 ÛÛE8N¯|åãd°îQ ÿ¥ðu•ßJ²ÙÆ*ê\ž:¥V„­½ÁÄ}÷¥pàÀ24í¨¶ámÛF:F©T‚ã8H$8xðि^"@[z—F£×2nÜ÷ Ûtm7÷û^ÅVjsˆpó{F›–Ǽ¯r§EžÇ'“l’:^øœçàk¯{Ýp¾Lb¦á!êÙß~™Ê[`Ÿ©Yó£g‘HC· áÁs Õûë¥R€ð3…LòdPUýö|€¯7ñ=Öf96 2B‰öðÍC.ƒ¼Û†(®%I¾¼;a´KÉ ¢¾òëÐß[üÈï@øßB&¯¢q©_[¹ã«¹µ}pÜüÿ~ì Ü.pêN A Š"î¿ÿ~¼ìe/ëëZ»Êa­Õj-ÕjµZÛÞ¬³ÏF Ð“ ^é¨yµão¬iLŠS©éàÉÌÜËÊ_Þ¯AÝÍ»ªë:R©¶··[ À^H¥RPš¦!•J!•Ju4ZÁ`CDrýïE±nÈ|ö³ïÀ…Vðò—çðÊWþdY† žáiYìúŒ/ žñ`Y’ÉdÃg$ ‚ À4Mˆ¢ˆ‹/¾x(ÚY‚ËÑÈ.Î{Ì5ÿÍy–`B2?>‚†‚axÑ‹~?|IèÞP·Ð,]סišÒkYVêªEögßÁyç}çW…¦ieÙk‰ÌoFÁMÓ :l{šöG^^k&“eY0  eš¦µÌÏT*…L&A  …^óš×Œåû%zƒ+*¶FzùŸ¼W°¹×2l9曬îÍJ{˜!ÞDÁ‹~´<©¹¯r;xk=©öî­-<Úg?b¾°mßX̨õâ”…DÇAṈ̃稆¯ÿüùŸßˆµµ@•¤º·óy@ZñòY¿ž§¹~FðgžW<–ªÇWvÛ´àm¨›¦ Û¶=Åþᇞô×K ,ãivÉ$Sëy /EñU%ÙÉá†bKí£°Pâeç ó{gòmÇc†§Cs¸Œ·[‚óy–w=HÄ)˜äŽ"³>?ó™Ïà‰'žèïbnŸ\sÍ5îu×]çîììxÇvvvÜ믿޽òÊ+û½ÜиýöÛÝãÇïúõ’ÔzLU]·XÂàTÕu ×ÝÞfoT(ôõrMsÝ|~ðaT«lá窮(Šn&“qUUíûÚš¦µ¼NÓ4W–eW–eWE7øüý …‚+Š¢[*•Ü|>ßö½K¥’«i?B+‡€IDATš«iš›ÉdÜb±èn×?L©Tò®ü9ŒíímWÅåe\wÝu½>ø÷×4×móõO©äºŠÂ~–e×ÍdØ›µ£PÒÄê4$&/¥’ëŠ"›vW\ñC÷üóop#‘oµO¥RÉ•eÙ•$ÉÁUøçr}™­vúluòyö‘$v¬XdKÂn¾ÿ|>ïJ’Tÿl%7“Éx¿73¨¼Œ›Yœ“ý Šìo)o>noûS¦Zõå§P`¿óÿ§•b±UÞƒs ZeKCè—Òî¦Ô…Y“—Y›ã"l­äÈr£nÖ|Ûåº:U­ö¤[q=E’$·Øç=¨P(x:G’Zõªb±èé=ù.Š[p=Ïçó zˬÉˬÍÇ^)•˜|JRû{öö6»§‹"“Iþ>Ÿ¯ß¥Rê_XÅ¿¸¦õ}3ØÞÞvAhѯ‹EׄîêÚö¶¯guúR%&“É4èPAv#/}¬wÝu—{Í5׸lx\sÍ5î]wÝÕï冯(&K©Ä„p`D±ñ÷>…MQ†h\´ƒ®ëº’$¹¿÷q×mÒ#ªÕª«(JË"ÌÝNŠ|µZu%Iò^«(Š›Éd\×mœÛ»T^ø5z™Dš¦¹oyË[fnqæ Œrõ?Á`loû+pµÊ¶ÏM™ \Îz1 Û!IR}¦è)Gw/¿|Ë}ùË?ª¬pƒ4(?ªªzó"8GúùJøï¡ {NJ[Ø8ùfP&“i;gHᙂ{:®ëO“^ÿæÍ›–L¯p¸bÔë€ê/ Š^³?3ߤi˜?üà.™5y™µùØ™ŒëþÜÏ}Á½ãŽo÷ýÚf[³ZmR‘¸µ°½íŠÂDÇ›S|£´|³ºZ­zúB7}#“á›»ìù’$yz Z³MÊá°û ‡_OUÕ¶÷¹Y“—ižÕ*[nø£W}š‹^>ß(“ªÚhÀf2Ãq*yƒÕ4÷ÑóÏwÿîòËÙ…4dYv …‚+Ërß5®Ë6aše™Ëp;ù-‹킱¬®Ë<ª…BÁ=~ü¸{üøqwcc£Áã: ¦v²4k*» ÙÞþ™àq4Ms>ïa×-º®[W²¹Àw]·7c•ÃVUU]¹I{Ïçó=íØÞÞîÉà­V«n4m¸ñ̃ÜÀ¶·[uQ®4˲¿ïr¿`¨(Šâ*вkyÐ4Í}Ó›þÔ½ãŽo{ êöö¶'§|<ì}›7c¸Ì …žå<Œ|¾UŸ *f£Ø”"…gzSlûQp¶·}…~{{ˆ÷„LÆ·BܸÅb±í`ËgªV[„XŒ>éºÅ¢›yïãîW|Þ‹âq]wàH‹™ZÃgm>vƒ®ëºŸþôiP<8ü|>þgVU_~¸L‹MŽÒàQdòU(¸¥|´1§þÜ £y}çÑ]Ü[f\rã†+ú|³ë@Ü3ÊiŽ+•J®$I zI·(0άÉË4¯ß|/Ž?‚Ëv:ªÉaÕ/Õª»³ºê¾â’K\UUwñ¤P(x^N.íž§ªj‹ÉdÚÊ+×ÿîÕMÆÇb°ÞtÓMîÁƒ&Óu×]çê^yå•¡oò±}Ì=xð`_žÖÛo¿½ãóïºë®ž¯×χo 9áë&‚ùE]=!E·%”–½°ç¯¥nL°Ÿ Þ5ßõ( ^.›¢(®,ËÞΟ,Ë® Þ^Ä‘ñB~f¸Q|ø²ÃîÃç=̶àºnàf’ÉdØñªëºý̧¢Ë^3AƱ¸S¾]w°X¿¼b‘齆vÊî•fe6rÂBMÓ<™ÿô§¿àæóy÷íoŸ›Je]MÓ\AêóÁ¿.ßXiü|Eo®ˆ¢Ø5y{{»ï ¢à@<7¦PÍÍq ϤÖði€+Ýüo8ê žím¦ÌöéV«U÷³ïx‡ûðyç¹k/x+B[¹¾é†Ü»¢Ñð02nd”Jue'üýø8眻ܭõu×uÝÐZ|£‰ß»:mVñuaccc¤ò2Më÷(ááŒ<Ì–ëG–;îøä(ŠâÕ¡è'2&ŸÏû›A ˜Ó,èAe,@sÊ—!®<óñuƒËV¡Pp‹Å¢g`v[ûùý©Ðæ†Éõ-AzÞ„¥õ{÷ ýmÀðìþgÉZÎ\õ® x{=êé'|Ó#˜†äº¾lóÜQ®· /zŒË*_?¹-Ð\;†_¯9µyÎæóyÏíí£•<]J–åž^·yé«Jp¹\F­VÃêêj˹ååelnnöÔÖfss¹\µZ k‰“Éd¼ÊÕJš¦¡\.`mt²CìZkY~“çxCõ¦*ºÁꇲ\ï¹%°[Ÿ‹SŸ}&ð‘g4wêÏï±ø®mÛ0 ýè%xÚÓ¾Ã0 IŠÅ¢W!TQÜ|äfÜ&܆¿xÀ«Âõýû¾€We¥½ Àiš^‹ þ½nooûŸ–{m6›ÅÞü‡Ø¯ï(õGýóxía{xQ< €T?&ÐêÏK׿@¡þŽV}[B»*Ü3Á¤å; Ëê¯u… Û²(ìªë€ñ2¬‡^áÂg@yÓÓ*Çñ>ìšæWÚåÕŸóœïàá‡ïi¸¾ã80MÓ«ÚÌ«=óJ·«ì¬( ‰Ç¢($ –eá=ïy:î¿ÿðŸþÓ I24í Ö×OyÏ ~ÖB¡àU^ô>Ÿ,{ïY*•*ç…±›ªÙ¼o2ïéÚ¼Þ𠪩Ôèú"Ši”ñqPïÇîɾ,û_Þ›÷YäÓ¶í\zĶmض]ÿ™Ìô$ŽºîÝèLÛ†®ë°, ‰àž×½ùͨe³Èf³È„”þðßÿ=¾ø3?ƒ¿ÿ}¼ë}ïc-™x‰K(ñ–·|gUÁ«_}ÀÑ–k<ïyã]ïú'üìÏþ‡®»«¤ÇašfÃ:Ïç½¶O–e!•JA’¤†5!NÃ0  üèG?©T Þ ²ÍhšßŽ…·r‰Çýï²ì÷ .‘Áöh¼Ê­$IEñxÜÓ=²uùRUµAaàÔ'? ý»ßÅöö6Œdår±zëD».¯ügÞ¢.¸N[²ª¶m{ÕÛv1 –e¡P(ô¼~K’„B¡£^¦Õ²¬ùk‡ ÛŽ ‚€L&:Ï&Á¼Ëx¿ë­,³5œ7;Ømë1^—Ï ØÿÈ#¸rk —îìàžw½ UMóÚ,Ú_ÿº×îÎq†Û¶äD‹Åyç×çs„wÈEÑ›'R…O_ïoƒÚ­]BLç’F/ßýX··ß~»{ðàÁ¶ç¯¼òJ÷öÛoïx î¥åáÃ>ú¨{íµ×ºúЇ¼ç\wÝuîõ×_ï¿æškºZâýXëšæºù׆p½œÍ–ãõ—jµž?¤º­þé|Þu#íºç~®Ñ5UtÝí”Ûàt]×ý翺ßýü½Û +á;#E¡è¾ì°›ÉdÜ›oÞa»><Ÿ´yãB³æ¶z<«®ëJ®ëŠkäëjýxÑe^Ô^¼=ü» îD©õG¾þ^ÕúC­?/_ëºn©þœÀwëç¡ÅúÏÅúçÑê!xhGµ8*ùæ¯Û ÝàŠ¢ëŠgßïmG6W!ŽÅvÜóÎû¨‰|Ë}ÞóvÜ+®ø¼ûö·¿Ï ÑýØÇ>ÓÓ.hxÑŠÆß¹§‹WTxú:÷ÀµcT9ê£Ú¡Ÿ†5|Rð†Ó7{¢v¯jêºLŽše¿Z­¶ÎßBÁì#wÜኢضÐXXR&“ñwÝóy×Ú¿ß}ôÅ/ö ]æÅºä’uï¸ã‘–°3îy½ä’W¸ï}ïGZ®_,=ÏZ¡Ãäà!jÁ¼A¾{? y™Æõ{Øðfí¼HÍÇóùÆ\SêëŸoôªr]†Gimoo7„ͪªê^ð »ñ¼ç¹?ºür/7ëGzž!bÈs¨yív^¼§³­ßýV+bÔT«U/â‘{GƒÝ,¬lÖ½ëïh˜™¦0à½ÈXrX¯¹æ÷Cúûè£zÇ}ôQ÷Cú{å•W6o7Èf£÷øñãÞäÜÙÙq<ØPqøcû˜{Í5× íëªëßÔæd“ñW­V]®û¾ã+\f8©®B¬i®{ðz×=ú¤ë^ôÆUýhôI÷²g>àÞõê»Üíím÷ã¿ßÅnsŸñS÷gÎxÊ}ÞóvÜ_ù•/¸ÿø?³Äþ·»n~©)œPuYXqX´â6¨Ü8)ÖæÆjÞõÃm×7*]—†býó÷\rÛm5U×u7<,º™jýó´3> õϦÖ?_Ñm5jwɨ×QÉ·ëîþÖÉ`â (}Ò4Íý§?í>¶,¼ÃÅbžÂ !i.ÆÅÓ5‚ŠO¡ë‡N¡:__õBÖÛ)8ÁÐöR©äÏÏúdá R;šó»ÃÚ7‹E÷ºC‡~g$»ŸærŸór›SMÚDqöõ„UÃ…¼Lãú=- #i–Íjµê ‚Ð"sA£Ó“­¦*K¤ÈûÐúÝ?Š(íMÓ\ !´ÁPZª«i[ŸúêY³·yH0d³Yhš†ÍÍMÄb1,T8‰`}}Ý )hÇââbƒkvvvð¬g=Ëû€wmþs¥RšWÙ¶¹Øî¤ÿ£ã8øµ×ü>÷ŒÏáô DÞ PÀ-|ï¿}øðœwÝ€g=ëY8ûÏ>ŽKž}P/‚ƒt:3~ô3xÏ_ü.ü­^ý†£øö·Sxû›ÎÆZôù€ ØR†Å…ÖCì Ý— èÅ×ÁBxE°0Ü8X¸.ÿõóX8o@ÐË/(~—ÀBt³h snGØóò÷ï†PW;‚!ȽÀC“'^9 òÝð•8,ü+,R£¹¹y©TB6›ÅÃý(¾ó¢5„ç†t: Y–!Ë2¾tÍ5øµ÷¾Û/|¡®hš&âñ8dY†eY(‹^(• ´Ž¡TB_Xk¾],†‡ìäó,|­‡^íc£[³mQôC¨g…i“ñQÀÃÃl›ÍêÞoótAè/¿N)‚—„7eÏd28zÙeø›«¯ÆegŸ³n¾E˜¦ ˲¼9†¦iH¥RPUŽã •JAQ”†0HY–a‰„Æ_(`Y,BØu_‡ÛnÛÀÂÂEi³ÏfYˆÞnÃÞAðÓOF̼Ëö áé¢4GÃ9‚  Z­úOªç£‚àÝSžÊw2y1œBa°]I¢gº¬•JµZ­a·€çi„£G¢R©`cc'NœÀêꪗÆéÓ§ÛN–{ï½·Þz+Êå2Ö××Û^öñ<ø‚¸œ9õGýÆšH$ ª*–Ô%@ 1üÙâ¨Ê2½àïpÕK¯‚ ÿÜŸ;üY˜:ÿ ý sO_&åëïeÕ6áTÙúÏvý<À<‹Fýx° nX*õׇÙ3¨,ô÷ «`Åœd´zl¹±ø^677qÓM7áÌ3Ïéð†)ßpß}÷aee²,cii©ã{sÏj˜¡”Ífá8Sd ƒ-¼üEºÎþY€{ñttU`B$¶î‡•s’eæYí×Ã5i&åí]YYÁÝwß=ò÷™Ä>lx¡$€é×c×?²YÏõeÖ‹W‚€g…ÿ÷G?‚`Û€$y^NEQ¼¢Dž1Z´(€›á­QUµmñ š¦!‘H ŸÏ÷UpC’|oWècße.—Ãí·ßŽýû÷ÿâu&¹~‚l¶»BoÛ6ÒétC% ñoË哲’$5†"px²ímf)'“l!Ÿ×Ý»!Â×ïr¹Ü¢{“yX¿&f|C;ü<+bÊ7^lÛ†eYPųcú²gL“)#ý†‰|¼ïõ»[ÌðÎÎŽ{ýõ×»×^{­{üøñ†¸öaQ,½˜ú°øúnÅžú‰‡.•\7Äõ þ4SpÝO¾õ“ež«U–¨óæX~E½5Øö¶ëV¯p]íí®»}‡ëç¿ò"Iný=Âòƒ¹m’Û˜[Êÿ—šŽáð"Oa4Òªÿ]Æ™o1 ùvÝþrZÂ:¸nkniCq%Þ›qÌI ¼8Q²<ºÏygœ9sã\Ç ÏËë7—z(ðÞQšæºÛÛ^ñ³`‘¡û?þq¯jS©TrEq¿}Çî+.¹dè¹~³–;8.y™Äú=l¼b‘mÏwnsוz?I^ô+˜¼œÉLh¢Í&´~÷:v¿ßöö¶WSƒ?x Þ·X,víÚPÝn é÷#ÉaF£^9kÓ4±¹¹éíú,..öíeÍår-;0,‡´!2Çf:P,Â$“Iˆ¢ˆb‘-d<ìþ·¼øÆ7€x’,ã—ŠEœyË-øÈõ×÷ä9í‡a_o™Ù#›õ×QJÎà ¹7U’¤žZ|…b÷Qd“+x-ÊO f]Ƈ-Ÿ¼.†$9P”,Òi¢(z²Ì帡R/è:{pŤ¾NÏd^Üñ´~ž,Ë2Ž;†o¼‹‹‹ØÚÚ‘#G°¶¶æõêÆ¡C‡°µµåå¿lå“!bqq››› ç‰Äp?9729: ø­Oý.úàE¾€ï)àÉ»€^ï‡ôïë*üß <›h æ¤ëç‚×àkºiŒJýXˆ2O汆ќÓ[¾¾ñuÜóŠ{0l¦E¾ÃtOÃ0ðgžÉd›‡p Ò rˆðþÆAD‘Òž¦‘i‘ñn˜&‹B »5e³²5l]ÄqXÁ½t: Û¶ÇñÈ·¿Í4­D½îuЩlñxŠ¢4䈪ªêç«f2Lù—eÄVWñOïy~ñëw¹W˜Ùî‡tø_ÿë'xâ‰?B<‡a0Å^Ó4‹ET«U‹ÅÞ뤛]g÷‘àk{nLŒ›yqIbË¢aˆÇãXÑH.Ç<ÔW–åþŒUËb7‡b‘mä—JlG“dy*è»è'˜ÃZ.—aš&666pìØ±Ž1÷‹‹‹XZZ‘#G°¸¸ˆœ>}ºa·guu+++ØÚÚò’¼=ÚuL½HE®Ì Vø\ùs8ó¹gâuÿõuþñ_ÿ`×Ý¡y0²Ù°ê× êþ\I “sA¡Þ^Ì%¥}f˜´|LŸÐ´Ö|ÏʉøE¾“šH°'¦£ÊoÜ­(Ì–Îf§«Ú/á3 2Þ <ø‡ë†áoš‹âðuÞ Þ²,˜¦éU˘Çô5¯y Ö^ð¼[€?þcȲÜb¨¶¥>à_°nÑ™Y‘m |¿‘ç.ð*Á~ç;o°Ž'Ÿ¼º¡rû®ß´žè¹¹t)øÄL0K2ÎI&“øÊW®Æ܆³Ï~N[eÅbÑ˵Ça;œÍ‘ÄÔЗÁºµµ…ÅÅņßMÓD$Á¡C‡°ººÚÓuŽ;†ååe¯:Yðš«D|óÍ7{;@ÍçÁᑸg”c÷ô¿ƒö¥ÜH4ÍÆP–`±$N²þ³ç5‹VãV­¿¾y>dBž 4¶²!f‚IÊ7wœ†y&ï¸ã¼ã7~ƒý¢iLј’CEñÂÓi žv&)ãÝà‘îa­T¸“ _ýšW’E¢(z&¹ À;f„òâG¦i¢ (ÃQ°ˆ‘0ͲÍÑu¶©—N³=Yf-Ž ÃÀ[Þò6œ<ùf¼ï}g㪫xœ·öÿ&¶ÍÞ(x31MRië#FÞÔ™cdb¤Ó¢¹wµ§*ÓÓd;÷MùÕ-ÕQƒ¹ªSöÉ»JŒÓô뜆ѿwÕ¶™,½§d¬Î ]û°Æb1”Ëe¯ZX­Vƒiš-a¿[[[ü¦ÃÓFC¸:—]€|ⳟ@"ÕÔcêæ;;›ô° ¢g …p]Ù²,(ŠBðˆ¹¦]„a$Éïk: x/J2V‰"I¾Ù·wð«òÃâ¹w•Ë®ã°G³,Ó½ƒ1¢èË·išýo´7RÈX#ºzXc±±²²‚¥¥%¯/¤Z­†S§Namm ‹‹‹«ŽM’`5ö ×ô¡?„CµCƒ˜dg~ ˆFœ;é¡Ä@ô½ O3+ÒÑz\×õÑ«€ßlž Æ€ã8ý¯ë<¬+++-Çr¹Nœ8Ñpl}}½ç7ç9°±X¬íùH$2Pëó <~{@¿}žbŒC¾ög¨ 1jÆ%ãaèzxŸx]×ûoeÃÛ{tÚÔÉfÙÒÆÏž`’²ÝL*Å”ùL¦ÁjÛ~ny`‰™wàŸþéë=@t¤«ÁzàÀ†‚KœvÞ årš¦¡R©`ù±ÙlÖ»f¥R¦iÞd’eÙlvWïåTñ`Iä]%FÇ8åÛ¶ÃŽÙ¡q§2쉱KÆ)ãa8NxïUžêÑw+›l–Í‘vùºÎÞzºÎ=“–í0¾ùͧpß}$)ÄÓ¤ëL~5ŒU¢'¦MÆ- øÿãûø•_!ù%ÚÓ5$8buuµçG/¬¬¬`qq–eáäÉ“X\\l($°¶¶†h4ê/—ËÈår»û„ç€õYå(=ô1 ëB=2NùVÕÖh™¶¡c¤ÌCb¬kx¶Ý¾ÐØ®Â&3™Î…ÉT•ŒÕ=¤e;Œ_ÿõ…˵e1cµT¢Ê©DÏL£Œ»n‘IDGúÎaÝÚÚB.—ó¼íM?¯¯Õjžq‰D°¼¼ŒJ¥‚r¹ŒJ¥‚­­-Ï«‰D°´´„â.š[Û6 \Vÿ…ë"jJ͈wK‰ùeœòÝŽÐ ©ª?I}2 2.Iáâš»Ý ¢ØTì€Ø‹Lƒl‡ª³Ø¶ß^‰ÂÔ‰™F—e`gg£1$ضÃû{–žsXMÓÄñãǽÎÆÆFK8A'b±Ö××Zãììì`ƒÿ¼V,kyß^ðÚ© l"ðõ^âäiñ'vÉ8å`ë9ï øÕû‰$ˆ·Œ7Ó.Ý4´`G3ŽÃ&¢4VúÅÖ°yÓdÇÈsµg˜´l‡qòd 'O¾…B“Þ’L²ÈŠœ!ú`eü–[*­)L¼";AÔéÉúµµMÓpàÀd³YX–å=²Ù,8€#GŽôämD"X\\ô~¯T*ÈårXZZB4í8)jµZ_βáOh`E—,àáß}¸û<)ûÄ.§|­Ýjº†»Ä€Œ[ƃðM÷°%º¥•S3Ž$íÛÒ4+G¦I÷‚=Æ$e»Ù¬—½ìU-‹É&EÍÿ?{ÿæÈYÞùÂ_8áÆÈÈøŒÆÄÑ`m²o`Ԍ؅¬R"ç@Æpš¨ÃyŽ{7¬´îл{i3'a7й¤Mö$×(鬔=‡vâÝ%­¸ÁNÈÙ  CB¬Ið†Äš]ÜÂØ]Œ8,©÷§ïRI*ýþQ*õ÷s]º¦GU*=’¾Ï]Ï}?÷s?#²ˆÿÉŸ|¾²Ý•ŠÚs5“Q¨oâb(‡ukk š¦a{{»k0ì~~kkkè7n6›(‹¸ë®»H$páÂçù^\»v­ç±/}éKøÈG>ÒVÍ8›²g\'é@í³µþéÀL [zöööpñâE3ü­§­oxôÑG±±±½½=繡ׯ’ÅÆÆ®\¹2Ó÷˜‡ ïDíGé}¬ïVe†¬¬¨ÙÒ^…•ÂávûošŒð/(Åb/^Äg?ûÙ™\^ö{>÷¹'ð?ñ²î L.-b¿¥èÑ,ðÃ~÷âúë¿¢l·a(_­ªÁ †K‰ŒÁGµßS‚kµ~ã7~£ïyëëë8þ<êõúÀÔàz½Ž­­-„B!Üÿý]©½èWZû¥/})^ÿú×w~’´û$ðxäq|ë¾Õÿ[Sl–‰$^¾|y&ן…¾àÖ[oíÚ6Ê-Õ¾éÀå2SOÛÛÛc †e®6ÜE8Ü{œ®ëz[á6d­_?Û¶^â½`aÙÜÜÄÙ³ggbÃçi¿ñ…/| >úr¼õ­·¶ˆÇ©Í%Fì÷$»qôÃ/ûí…®_ÿú_"ÿa5ÃJ–žqÇàgXëõ:"‘È@c,"&e ›Í:yôãÔ©SЖ–Ðh4ÆÚJ² q૵¯âÌ™3#_‡Q˜—¾ö‚Öº®÷N‰ä€L‘yj\­&½°ŽMÏ`®+/wпÓæVP'?´Ý‹|þñ¸É‰&2UIã¿ÿûßÄóžwEÙnÓdæéÉ@‡5 Lu]×Ñh4 ijµZÛPœD"Ñ–6£ë:’ÉäÈïe|€ èóÀßýÝßáŽ;îÔ@¦‘±™§¾M³Ý5M³·ÃÊÁ7™óÔ¸Óì½óLßêÀº>Ül”¦µKL>‘ø¥í^|àß·½í)¿¿²D,žÆ¿ƒ·¾õx¢‹Y¤S‚c±šÍæÀ‹ˆÝ]yÌ ÉÉ÷JW“5…›››ØØØpÊo‡B!Ü}÷Ý#¸Ò&TuàcBß áú7_?ø…¨1™§¾*ØÎÿÍÞƒv†èÉ”˜§ÆÝô‹%ö-¸T©Œ¾‡jgç"'¿´í…z;ï|ç[ýþZȱH¯T€oüdËv{m®MÈ1C9¬±X Ùl¶«¶ ‹·‰ÄÀœûÍÍÍ9î±X >ø Ó±ÜÍF!þ½þVý­ë:ÿÁÇqgû¿ˆ†LÀ<õ݉Ùo•+™~iܲz;¬}ׯîï®V>‘øi¿;1Í ÞúÖK<ÖB¡wñ0Bú°H‡fó—‘J=è÷×BÀPû°^¸p8þ<Ö×בH$œò×µZ ;;;¸víšSelt–Þ]¢U ú õÃ0€7ÎöË$dX&Õ7Ð>¦¸¥!sfw3hySϽ‡Çq<£Qΰ’žL[Û˜&ðgöHJ…+™)³Ö8XVo|ã‹ýþ¨$ å°Êâìb±è¹uM"‘ÀæææÌ*šƒ®ÚßÑ·¨ÿ[–Åý)ÉÒ!cjÓ4Û7ÞvíšÈÐ+`ª{—˪S±¢6ñ‘røÀžÀ¯þªÇe–ÅÙxtøð‡ÿo|#Çåd8†rX–ÓÚh4P¯×íkb±ØÌª‡M‚aéÓ\3PYF$É’Òw†•Ûs€Ó¯Rß‚Kã¼ÀþB|åñÇÇ«_ýuA}’ÀS.þç&vv~E=¡ëê_N,‘ ¬ÜI$¦iØÜÜ„¦i޳Úl6ÂK‹€eÑ´þošfï”17œ"!m â™L–™pXí'ï…®ëH¥`0ÏâdÆÌLßǘ¦Éôâ+³ÖxçøÜsýª®3[†LYkÛÍßøÐàÌÙÉ”™—Æww[ÛðuY’I¿¿|uX†çZØF£óçÏãüùó¸ë®»ÍfG¿¸üÕ³ÿªÿ9¬žJfÈ,õ]­ª¢KÔ2ñ™ÚptìY“ PàÌ™*³Ö¶`Y*Þ2p ¾r™Û5‘©2/jn¨gTN‘!ÚaÝØØp‘q?'ah4Ðu½g'ØÚÚB$a¸téêõ:ŠÅâhŸL¿îñÁ—ØQÈ”™‹¾qRÞ 5 !dÌKã™b•J¥w:V2æi¿eºu]i;ÕÏ!-—Y£€L…yjܲZÉ/žAf !è°ž:u ëëëˆÅbÎ#‘Ht='a¨ÕjØÛÛó<Öh4P«Õ°¾¾…BX]]EU¦”†Åž:|jðyL§$Sf.ú>ÆI­1 ØÉܘ—Æ×ÖÚÿï¹~µRáÌ™ó´ß€’ïOüÄû/_ÒuU}Œõ Ș§Æ+•Öߺ®wëœKóÈ|× "‘677§ú¦«««X]]E­Vëš•=<<€6ç7‹žGŸ>ýýŸFü5fX±$Sf.ú>ÆqX½Ê©2#æ¥ñN?´g„ž+™ó´ßº®Æêñé_HŒ³«dŠÌ[ã2‘ë™lœ8"Cá{Ñ¥NúuŠf³9ü…âÀ7žóÁi8È'sdjú†+ZIg•,ÓÔx>ßú»ç?.í sbšÚTºd4Ú£˜›|ž4d.L[ãî TÏ Ë¢ý&C±pk¿qíÚµžÇ¾ô¥/á#ùH+Z”W£¯ÃÊNr"ÙÛÛÃÅ‹aøPJ}\}À£>жTžp8Ü ÓrÌÆÆ®\¹âË{Oˆw.mòÜ•ÎꉤX,ââÅ‹øìg?;×÷¶ýù,¸Ä€ä‰Bìw½^Ÿû{Om ÞÕ«0$mø‰CÆà£Úï)ÁB½^ÇÎÎŽ³vµ—qÔ è·6‰ô<öÒ—¾¯ýë»Ò—ûÞÜá{rbX]]E$ÁåË—'¿ØˆŒ«o¸õÖ[±½½íüßék¦Éè;ic{{{è"xÓfZ6¼\n¯lFw‘kN$›››8{öìÜmø4í7 L÷ü@û “6Ä~[fšLs nš­lßžYî“ȉ`Ü1øP3¬º®ãüùó¨×ë…BÎó›››¸pá‰B¡.]º4ñ9uê€ö´„F£1ðfÐEÒUA•ajú†ŠXjšÆA;Y(¦¥qÃhŸXòœ…ŠÇ¹~•ÌiÚo@cþê¯îëÖµeµW«!dNLSãéÀžH»»~d†rX·¶¶ izè¡6ÑÆb1¬®®b{{gÏžÅ}÷Ý7qƒ"‘‰D[ÚŒ®ëHޱ±p”i4dÁ˜¦¾Û`„’,ÓÒ¸¬ïú v4é’dnLÛ~§Ó=ÖõårÜîƒøÂ45·j…õœae° ÉÀ”àZ­†f³‰{ï½·ïy÷Þ{/îºë.gªw677ý^›Í&B¡î¾ûîá/`ßþö·gÍçK$d&Ö÷1†a —É´Jð² LKã‚çúUB|`ZÚ6M”骜jY*kæàÀïJN(ÓÒx8ÜKX•> tXëõ:"‘H—*iÀ‚?<<ÚaM$žk^c±|ðAgÁy"‘íSÀ‘}Ô†µ\V‘‚ÈŒ˜™¾]¼à{¾øžïñû£’ʬ4e`†Ì‘YÛoÓþÓúbw ¦P`š;™ ³Öx.§JÅ ¬‚MÈ tXC¡ge°ÎâÓ& M4ÿÆ-¶´áV ÄG&Õ70y3BfÉ$ï4Ï«¨2G¦a¿à–[þÏ~vÇ8E×jÕïHN8ÓÐx¥ÒrX¹DLÊÀ5¬±X ÍfµZ­ïyrÜ=ëê'?þx‡•¥´IÀáž,+î­ùz:«++~7“±Ñ4àÙÏþxûÌ“d~1u’,ROÉs6Üx£ßM$bà k,C,C±XD,ótH›Í&ŠÅ"‰„/e¸½øÖ·¾Õ@oY¼)À Ød™‘Bׯ’åÄ0”o ¬—ˬšJ–1Û¦izϰҮ“jÖ .`ccçÏŸÇêêj›Szùòe§šØ… üþ< ø/•ÿ‚z¬Q•ün%!ccš&~â…/l-!dIáþ«dY[Âa³ŒÑuDgê$YtÝùË{vUשu2C9¬±X ÷ß?vvvP,»Ž¯®®b}}}âêÀÓÄ4M屮eÉ$+«’@cY¾÷oÿxÝëün !3Å3%¸³*!B¶lꊛ3øH–]Wiï=3dL“YŽd$†rXUøÂ… ØÜÜt*‡è™&¼t­ag5nÏ9#$`†Í«WÕ%¥kËÁ4YI•Ã"‘ǸÆ(´åd‰0ŒÖ>Ãi¯ñ¶iRód$†vX…iUÇ›5žkü*:«di¸ñk_ã,YjzFç (•ün!caÀ³žõu®Í&K‹Tzï¹~U%#³”+!ËÎ×_ÏY&²ÔôÝ»Á`òyàÉÏ^ý‡+Y2¢QS칤ƒ6œŒÁR®aÕ4­=ªc*톲$Üö–·°$²äôL'#$à†±m¦¶“¥E×uÚo2UN†Ãªë¼1BH€`…`²¬üõŸþ)nýÊW¸¬ƒ,-†a´/é°, \ö»Y$À,§ÃjšÀéÓÊQ5M5»ÊVB žédå2«ª’ÀsÓ§?¯±Ê;YbLÓl H&•ÓJȘ,§Ã»»@.¬­q!„ˆžéd†Áà# óüÎïü>øÁâöÛo÷û#ŒÄW¾òìïïã¾ûîó»)#óè£bccÃïfŒÄW¾ò|õ«_Å 7Ü€ÍÍ͹¿ÿ8úÔ¾f¯{Ýëpã7âæ›ož{»'!ˆ:€§žz O=õ”Sx"(\¹rßüæ7}{Úðàľù裢Ùlâ¶ÛnóåýÇÑ7í÷ü ºýþƒ?ø¼á o˜ûûÓ~‡ öMƒj¿Òau§!t‰D<ŸÇ;Þw¼ã~7Œ£oøoÿí¿ùÝtB†‚6œ,3ãè›ö›Úo²ˆ,dJð©S§´§%4¾ƒyB‚õM–jœ,3Ô7Yf¨o²ˆ,¤Ã‰DH$Ú뺎d2éwÓ™ê›,;Ô8Yf¨o²ÌPßdy†mÛ¶ßð¢^¯ccc‘HÍf¡PÛÛÛ=×÷$¨o²ìPãd™¡¾É2C}“EcaV@-ü–Åò‰DÂïæ2U¨o²ìPãd™¡¾É2C}“Eb¡VB!„B!'—…\ÃJ!„B!„Ða"õz½ï~_õz½ï†ÌýŽzí4?Ã4Û=϶“Ù2K}s|ZŸaÚm£¾—‡I4¾¬úžWÛÉl¡ý¦¾—ÚoÿÚ>l21<òˆ}îÜ9ûÌ™3ö™3gìsçÎÙ<òˆsüððÐ~ÛÛÞæ÷»ßÝöú~ǽvš\¼xѾçž{¦Òîy·ÌŽYê{˜ãÓ¢Sß“¶ú^&Ñø²ê{žm'³ƒöÛÿ¶“ÙBû}2ôÍÖ)°±±D"Ã0péÒ%$ d³YçøÖÖ"‘ˆs¼^¯£X,u|Ðk§E­VÃÎÎNÛs“´{žm'³e–úæø4ðÒ÷¤m£¾—‡I4¾¬úžWÛÉl¡ý¦¾—Úï¢o¿=æ óðÃÛgΜ±¯^½ê ýô=iÛ¨ïåb\/«¾gÝv2?h¿©ïe‡ö{¾m÷ :¬¢ë:4MC­Vk{*’‘H$°··×öšd29ðø ×NJ"‘Àææ¦óˆÅb8uê677ÇÇn÷0ÇI0˜¥¾‡9> ýô‰Dºo’ù1‰Æ—Uß³n;™´ßÔ÷²Cû}rôý Û¶m¿dŠÅ¢çþI`€VºB$A³ÙD(j«jÖïø ×Nû³ÔëulooOÜîaŽ“ÅgÖúæø4?‹[ß“¶ú^&Õø²ê{žm'³ö{1ÚNfí·ÿmŸtXçD³ÙD½^à7ßïø ×.j»ýn;™AÖIPû&™/“è ¨úö»íd>Y#An;™´ßÁ×7VB!„B! ×°B!„BYHè°B!„BYHè°B!„BYHè°B!„BYHè°B!„BYHè°B!„BYHè°B!„BYHè°B!„BYHè°B!„BYHè°B!„BYHè°B!„BYHè°B!„BYHè°B!„BYHè°B!„BYHè°B!„BYHè°B|C×uX–åw3™;ÓÖ¾eYÐuÝïE!„L:¬„ßH&“0 Ãïf2w¦­}Ã0L&ýþXä„R.—aš¦ßÍ d.Pïó‡+!„B›J¥Â<91Pïóç»ünQ¸£5étÑh´ëX4…¦iÐuétÚ9®ëº“ –J¥Ç¹\Ùlår–e!#•J9× ‡ÃÈf³ÎuF=Ÿaé§o( °,ËSc½ô Pãdñ™¥öóù< …BÛó‚eY( ]¯#d¼ô,ÏU*€¦i]ú”çh¯I˜Dï–e9Ú ‡ÃH§Ó‡Ã¨íqà ëÉdP(¦ibeeÅé ¹\Î9V©T°¶¶æt@ vÖÖÖ¨ÊÊÊ ÊårÛ1˲`YÖÖÖÚÒÐ …r¹\×µ†=Ÿaè§o9.kùzièÖ·û85N‘Yk?“ÉxFú-ËB2™tB„LƒAzvã¥OÚk$&Ñ»eY8}ú´p¬T*mK6¨í1°‰¯ìïïÛ샃ç9MÓìl6kt‹F£¶¦i¶mÛÎñýý}çx©T²Ãá°mÛ¶ À.•Jαxæ.ºá®B&ÈÂoB~úõM‚ŒŸÚ‡ÃNÅJn½@¦Á0zFÛ„IõîžYÔŒ+S~'ƒ3¬>FQ*•L&¡iš•‘ò×Çd ´%¹\Îؘ¦90ÏžyÑO߃ ¾Iñ[û©T ©T ™L†}†LÌ =‡Ãaär9'õ‘ 3©ÞÓé4*• NŸ>x<îdǤR)¿?Z`y†mÛ¶ß j0"‘pY§$)`ÑhÔÙ‡UöhÚÝÝu^ë.²$©d„,^úê›jŸ,½ôì¯tî5LHP™Tï2C‡¹”iBè°.0¦iâôéÓØßßG<‡išH&“Èf³Œ`B!„B–¦/0î”˲œt:«„B!„“gX !„B!„,$¬L!„B!d!Yš”àJ¥‚ßú­ß­·ÞêwSFæÑGE( 7ÜàwSFæÊ•+¸ýöÛýnÆÈ\½zÏzÖ³°³³ãwS†æÜ¹sÔ7l4›Í@~ïW®\ÁG?úQ¿›14´áþ侇ñk¿ök~7e(h¿çOÐí÷¯ÿú¯#‹ùÝ”¡ ýö‡ ÷ÍQí÷Ò8¬/}éKñú׿›››~7edŠÅ"Ξ=‹D"áwSFfccÛÛÛ~7cdjµ._¾ìw3FâÖ[o äw _'A´+~7a$hÃý!è}3(Ð~ÏŸ Ûï 8«í·_½oŽÂÒ8¬A&ˆ\bG!ó'¨:I$¼‰‘ùBN–™ j„ö› íw0àVB!„B! VB!„B! VB!„B! VB!„B! VB!„B! VB!„B! VB!„B! VB!„B! VB!„B! VB!„B! VB!„B! VB!„B! VB!„B! VB!„B!ž†zø…¯k½^G£Ñû8!‹ õM–jœ,+Ô6Yv¨q2,¦ ¬­…°²¢ɤzèú|Úð]~|ðz½Žl6ët„D"|>P(h4Èf³¨×ëMÓP(üh*™2–„ó¹¶iѨŠår@<äóêX2©þÇÕy_üâí8{ö±™´ƒú&Ë5N–j›,;Ô8•h88è~Þ0Zcz]25îN¥¼¯cšÊé½ñÆ›ñ’—ŒÖ_fX³Ù,’É$ ÃÀ¥K—ÅbÑ9¾µµ…H$â¯×ëmÇÉâ‘ɧO·ÒÊåVô%—SÍdTTf’”»®•Šº¶¼w¥¢Î‰Çjµå¬­ÿkNÅâ¼üåÎä» ¾É²C“e…Ú&Ë5~ò(Ô ©Œ¿M³ý¸iªq»®«‡e wÝx\9³€_ïï«çu™‰]YQãôLFó›¿2ògðÅam48{ö,  !‹9‘žF£Z­†õõuçøêê*ªÕªM=±”Ëêщ®“RIE_D¨©”rK%%bËRÿÊ9…B»ãjY-G´3½À²Zm ‡Õÿu]½>Uïupd³ý?S<®Ú i³ýî¨o²ìPãdY¡¶É²C‹µ5åô>ÝžŠ›Lv;ž^d2ê¼l¶5F—I%@“kÉÜ}íLÆÛð"îv`«UõïÁú;ï{+%¸X,¢V«¡^¯c}}‘HF›››C½> ¡^¯#‘H‰Dœ¿ ‹9ç»;Œe©ÙÆqEaÊyÜßo]O¦ü÷÷Õ¬æÚšrF“I`w·%PAÎFÕ£ÓIŒÇU'ØßWï·¶¦O‰ìd2ªs¥Óêo¹~8Ü>sºˆPßdÙ¡ÆÉ²Bm“e‡»»ƒÏY[Scg‰+”Ëjܜ˩ñwç¸9ŸoMy¥ûV*j|¿¿¯×RÉïoa ‡µX,boo«««h6›”ø‹Å"êõ:¶··^#ŸÏ#—Ëaoo×®]ÃóŸÿ|ÜÿýзS4›M'Ǿ“/}éKøÈG>2t–ÓTNœ{]¨L¹÷Cf4MSu™íŒFÕëK¥ÖlæÊŠr&óyõ\©¤:ÁéÓÊ¡ìtV‡AÓZ0“Qw¶YR¤-Óboo<ðžõ¬gM÷Ç8fú€G}Ð4 «««3i;YL,KõQ è¸û«ô£µµVÊûÆÆ>üášY{hÉŸ‹E<üðøñƧ~mÚo2²–oœñP'çÏÿ<.]z67ëmŽã´ ý£hªÓ© ‡Õx!î=yÕ/Û0•Rcòp¸åO ƒj¿GrXvvv°½½D"Ѷ ;‰àüùó¨×ûw°f³‰b±ˆ³gÏbuu×®]ÃÎΊÅ".\¸à8Á^\»v­ggyéK_Š×¿þõCÏòrY T-«)‘™I ÝáK¥Ôò”Cêå¼J'p§Ñ®­µRlEȽÄ*‹ª9Æ^躎x<ŽhTy¡2“ëÕF÷¿Óbuu‘H—/_žî…1;}À­·ÞÊÁ‚P.w§âH«VÕñQ3ú [[k­‰F½£Ùl+bº½½óç~&Ÿ6œÌŠ\NEöå~&}¢“ÍÍMœ={vê6œö›ŒK.§l°{VË yÞ0ZK¥ÜT*À'>ñ ˆÇ/"û±©·“öÛt½ý·Îå”ÄÆÉ¿©Tkì ŒÆ ‚ˆ0 Ó¾x1î|$‡UR$ÀM,C,ë+v@yÖÍfÓ‰æpœÝõõõ¾Î®¤,œD C‰OE‡Ãj랥”i©e©sö÷[Nß§>u¿û»„Ï}î‡Ã‡5¤R)ÄãÀøŒ8‚tú…ÂÇ×° ë˜®Ñy8†¦iˆ»¼UÃ0P©TP©Tu©Ü²,ÄãqhšæœÇaš&vwwÛÎÓ4k¤Fèmº®C×u„Ãad]^¸eYÈd2ÇŸ5Œx<ÞÖîiC}Is7 àíooàßøS¼øÅ/F2y#þÉ?ù‡NdSn²fZ¤õÉO~ <òûøÔ§À-·Ü‚;î¸gÎ|Âá=œ>]ÁOüW¼êU/Ç+_yÃñû)>úèOáÛß¾¥’ÕÑT_•5ÞA"ù¿®ëÐzŒ„¢Q ñ¸á8­ßúV}&ß5Nú!KRâñ Ò%rošêaY­,wêšדÀ«¼®P~þçŸ7õ¶SÛÁÄ4[5-ƹ½‹ý•áˆ;X8,îúÑhï­>Ü«i¶þŸÉ´2gªUà—ùÊL¾+j|~HFT*Õ**4Ñhw†¢ØB õºY×\ #9¬YñJ h6›ÎŒk?¼"4îÜùS§NP³¹ò¼ûa´ ÊquϪþæo~ÛÛÿ…°\å½Âá("‘s¸ñÆO"úLSÃÿü??÷sY˜¦ ]×Q(‡Fñ†‘L*KFaR©TÛ Ù0  †á8¤âÄîïï#Ü1%Z©TgRŽW*$“Iäóy˜¦é´%BÓ4”ËeT*”J%X–…B¡ýø fg£N[LÓthÃ0L&±»» Ã0ÉdÏç‡aY Ã@¹\F½^Çë_ÿú©ÿNÔ÷l­Éohšf›Î¥Wù÷÷_€Ïþø®ïº†Pès°í<ã¿‹—½ì…Øßÿq<ïyÿ››¡R©àðGñÊW¾ŸùÌSø½ßû2~û·ÁßýÝüÃøw¨T>øüçßç>÷1˜fýXÇ:Òé4þäO>èY>ýé"c¿ð Ž·½­ˆ?û³‹€d2‰T*…HäfØöo#—ûS4oÄææ?Æ=÷œÅÚš…Xì>$“v>c<G:F*•B¥RA¡P@4u4J¥œ ô w?=ðªA?¨ñåD×[¤ÐkÐ.ËF¼(ZN«;ðbY­Ì÷^Ž­àNg“”xÉbøêWŸÂ´“d¨íà"Î^<ÞÚ^Ãí HF™×¬Q<ÞØÏdÚ5.Y/¢g©áá긳ՆɮéóG,™—¿ü§ðë¿~=ªUÕžg<ãkH§SÎñAK>LÓt2kR©t]o ÌèºÃ0º‚E“Bû=…Bûò‰QT^‰[–Jíkú*•–É”-7Üï#hk”…J¥‚tŸÆÈõ/]jâ§úËxÝë~ÏyÎ7œÀ¸èKÑ^2•­ÿd„TªuŸtÚïé0LàË\®}Œ±(¸56êñr¹ìdTjš†ü±Á¯T*(—˨V«¨T*ÈårÐ4 ÙlÖÉ´\[[C:vú˜dxÉ„™û=%ã±ÓVK¦\:ÆK_úÒ‘õ2²Ã*Ôëõ¶*Á£V2k4må³û¥šµÆï,½¢Ï“âžE’µœ©pÏÔô"—½³‘þÌÚ¸N[ßÀìn`bHÒCLR·ÝçJÊ6ÐJ×–4U™÷rLÇAR½d&H‚„kk­ Ö½ µÌ›J¥5‹Ã@ÚÈ´/Öóz¤Atæy!û?É¢ÀJ¥mû¬È‹bÃÂÚš²á€Ú¥ îYÈr¹åT«­û“dÃËL§{ýÞ éHAw€Ý=SåFjHº˜¤ŒY–…oû½xõ«_|øíß~ ÿæßü |æ3;N J œª(•”L_øÂÿ„Ûnû<âñ¸“9$©ïœ¼ØqFÅÖÈ2‘jµŠB¡€b±ˆ7¿ùÍøµ_ûµ©ÿVA²ß“`--Ž:Æp¯µ+Zé¶²NY´ÉAÈ^‘–å]ˆÑ²€&~øz¼ìeÏt G¶ŠPN6ŠišÈçó=³Ád‰…d¨Jë2ï—Y–ådËÈkR©”Ó?€••G¿îå*“òZMÓÚ‚¹î{¤®ëø¥_ú%üáþáÌ~ûe·ß2;.c|)XèÖg?mJ°d>±—òû÷rDÅÙtëFô´2-E·b[Ãá°c»ÝNèÊÊŠ³äheeÅÉàê|?YjçÎV”÷Ÿd\7Ž^FrXkµ666péÒ¥ÑÄy3ngÉåZ[ø£ÒnAаr|'ÑÈlYJ¥ÒЩµkkíþžX$‡'žØŠ,šq†i xÜÑ0 u“×4 º®#›Íz¦{ÈZgI—s P( “Lâå¯~5>q劳îyxXt½U);Ÿ÷î Uø|Á=Õ •î†oϺ½åqtNêÃU*­R¿RYmw·÷ñ€d†É4[Sxš¶ä~±ONw°EÒpEårË™M&ÕóãÌÎþâ/>?ù“C¼âp5î»j‡‰L&ά“;bnYΜù<÷¹ŠßÿýëÉ$Á°¶¶†nx ^õªŸÀŸÿùãø«¿ú1ì÷( ïvR%»Èkà–Édœ4MÓP¯×£—EìbS%Þ%³ìÃèJ¶Ïë4ó’5âCIÑÈl¶µ]žª|žsŠ,–J%˜f´Í†K`Ã]Œþæo¾—¿üÿ‹ïÿþÇðš×ü'm\Õº®#—ËyNX–…h4Š|>ï™ö(³úîÙüL&ã,ýÐuårP*•œÇp8ìÌXõC‚¹R€Rœ ™­’¾ö—ù—øÄ'>á·D†f‘ì÷±éòœ•Û«»ÀQçZûY;©ê}”Ö$Ht/‰r/Ëý¹5Û¹”°“Î:3ï/iÞ¤ñB–ßMš1ÐÉXz±GàêÕ«öwÞi?ðÀ£¼l.<üðÃöÅ‹GzÍÁmÇã¶}t¤þŸÍfít:í‹Fm;›µíýý}ûHN:fß®V«öÁÁóÜîî®Ífíh4jg³Ù¶c“0¥Ë,6Õªm—JýÏ9:R?ˆü0î/¦TRÏ É8zñ›{î¹g¢×W«U;Ûš¦ÙÕjÕÞßß·£Ñ¨½¿¿oÛ¶mØÙlÖŽÇãmzÏçóv<·«ÕªóÜÁÁN§ít:­tÚ¶¦ÍäsïïÛv*¥$2sŽŽZZJ§ÕãøûêµñxÛù»»¶]=ónõǨìîªïÕëµÑèÀvMª—yÄ>9K:í~6«GG½åT*•ìp8lïzœP­Vít:mï÷Þc¿úÕsî_¥RÉ^]ݲ5M³£Ñ¨Çíh4êyÎë}÷woÙÿè=jk}ÿèèȱ/öcM/‹Ô{)ZfoÐë£ÑáßOÌêÑ‘ºw¼éMuû¦›~ÖÎf³öÑÑ‘]­Víx˜ÏçíT*ÕuŽÜS©”3nœ&‹¤—aX”þ(·îaqëÝã>bOÓé´­iš£Ýjµêøš¦9iëvœöfG7O“qô2r•àÍÍM‹Åž› /BteX …ÖdDxãñ82™ J¥€w½ëßamí·pÓM? ˾û»?ç¤EIn·;*æÎ Ÿ‹Þ83¤ü ~Ù„O°î¨N8¬Â¿Ù¬ ãºëЋ·Ø`Á( Ø=^,ù¾÷ý:>õ©«øèGwS>€x\EËå2’É$ªÕ*r¹LÓÄÇìÇð\WO"âÇVásÉ/“ð¼a¨i¢ýý‰Òâñ ×xzá•ïcš­°²> ³²‚²¸JòâÜE¾¬2pú @ª€‘‘ŽeM€» 3ªLfÃÚÚèûéuž+™?á°÷ X¹\vÖ!It:íl7&[~e³š³ €ãs€jõgGúLš¦á­oÕQ(¼Ý4£T*!™L:3¢d1*·fMöxv§ûÝ3Når+±Ã½¥»>¤ººÓ¾ …VÈŸû¹TÛš·jµê¤F³Tj–z’ì IöÊ Èf³NáÈÇ ¬{åöX*©þP.·2†®ÈzNÉmK†ˆ#;¬îRÅAĽTLìŒÌ®J”¬T*!—˵­Ã‹ÇÕx4™ Oo­œ¬þîs±JeÌN2Ù{ý[?z}8IüwÏ>-‡¥Ÿ•p‡q—ôºßëxý]›ƒÑ^›øI5 ÕViû 3 ¦ ¼å-Oà†þ/üôO{LDW*¨Fu ›†e×]÷C¸îºÇ`šÏFæíGøûîGü·"õ‰Õ>ú.—[{È ¸ñ‚e€ôX.0i.³£²`ÊÝŽdR=/{ˆcênKçÆÆýçVÓÔkúuÐ¥N ^X–å‘A|¥›žÈL©ÉðôÈÚs·C°»»Û6°%r>nL¤ßú)¶ÅÂ]¨¨î Íÿèý$þæo¾ç<øƒ?XÇO|7Ö׿ˆÍÍ’§C:­{‹µ$ÑiœÉn÷P@28:–5ËîàD4]¸Óegd‡P%±u]w¶¶I$Ð4 «««~žž¸·†é¼¿J °›^7ûlVùASqX×ÖÔ¿½fŸ, é¯ý&©w~íTjt'¡PP_’euÞ¥$Ÿ®· »¤Ó­ºònÜ%0ås޲ÏÈ4nvîç¿KÇéÎåpëk^ƒË¯zÕäï~ìÇøÁüïø“?y–÷ 89Ö‹Jå€g«ê¡¯ýMXoû-µeF¸„|Ô•*ÖV-{Ýà%]8—ó¾›Hµ)4”J©ªÕ>:p—ú“ݽevÔ òƒ¤”åéÓÞA‹QFìÑèX3Æ$¸HªÙ0Ñûd2‰T*˲°²²âÌ@Jº˜{€ãv<¥o:vfDe?nwl;йÇô(x­¸ ËaôJa#©jëÞû\¢IÀh»B2 2•á›{NGl°e¹uØBv )Ú%…°d¿P™ e¿v2;FvX‹Å"vvv°ººŠõõuªüõÖÖêõ:.\¸à÷gòD×[þ—û/‚ì›ÖáÚ¤NÓZOä´ærjжzhÆ¿¼ñ—€yŒ\oX§UîfûûÝ~ùÒ:S*Åsïl{<®œT±6ÑèxyÓÄåÀS¿ø‹þ¶gŽ|ë[ÿ;;ÕÞ'†rØ*•.½Dõ2¢ÿûK€T»'š©Tù|«h­T‚´¬öª“ñ|æÿváõ‰ðίt¿: ”J°>õEäþÅ×ÿÄ{~J•ÜG*å]ŠÒ­iw€ÅëÆ"³¿Òç&7®ƒi¶ÛûdR м$P©¿ø‹ˆç<ç?À4ï@8 œ;÷oQ©|©TÁ©¼›N§‘ÍfgµŸãÇ«§ª¯Ó§O#wmA0*ÃìÁê+²o”L>¡^¶eµìn¯x™Ä’;¥!öùßý»:ÖÖÞÓ–æ*f²?n>Ÿï©ÇúÕŽh«Õ±/ñÒ¯~ÕïO1wd¥—›J¥U®Dnß++ÝëT%‰j¤â³¬÷ïœ1å û”‘:'ºŽï¹õV\~ÝëFzùHk£ÑÀÎÎŽ³íx…õõuD"¿¿ÏïÉk‚Ï]Rº'šÖÚü1z–Õ4ÊÏ\Få3·#üÝ!ÇvåÞõMT¶ w=cm 0;Æßæß|ñ¯Ç°{°ß¾àHêÁKx\f‹¼n0òWVZž„a´7Ü0Ôu2™V ñ ¨½¬«ëUŒFf r9ï4ÑàèG~¸|ÙïfÌ”J¸é¦O;Âz"é±î ‡ä0êºg°!o¿aÈrU™Ä”›‹ìÍÆêrw?†ü³u¤Jš+k;Í’š[·©BhÙ÷÷ÿpã†ROüÞP¤º®LšÈ}e¥ç¹I„hÅãd…ÄC}ßùÎe¼÷½?çºêsðš×<©”²Ùl¹\+++°,kèYÒÖ¶¦SØfZô]å3Éþnã"´|áòH}ƒQÖ™/2ÖËf½ãЕJkµ†¤GÊ×išÊY}Ï{~ ù|¾mö(•J±hд‰ÇÕ!™?Ã`-#dšxí7¾á÷§ðx\uýNyºÇò§O÷ÞÚ¼¦i"—ËÁ²,d³Y®C™à¦F‡ØóJ¥µQxµŠ/|ö³#ÁGrXe“a¯7‘H ‹áððp!Ö^E/$²2Rɉ˜¹‹‹–ËêÚù¸ÉäÌtkðùÏCûv »¿÷rD_yüœe!õ×dÿ} ß  G6bæÇ#¡ŽJ¦âXHê£a(g³3Vsåfß¹½DG)& :Õ“‚Ì ½öµ6Æîß1néJÖŸ9“#±‰^?{*¤´çºå¼•HVlšWÝ"BfìÁ-öW׺S÷½Y â½ð…W‘ÉüsX–…?þãݾ¾TΕýFG%VZš»V]ß2 ²TÀ½‰f§§>m¢ÑV÷t¡Tˆ?¡ÈW.&Z–âëzÙÙƒ4•RßÕßþí§ñ3?s/yÉsqï½· Ÿ¿ ÉäµH–~ÞYlÏ‹ÎM9e铦M¾î«TRc.1,’ïª*¶Ÿ+KU$‹ ÇÙØÀkýþçÌÚÚèëéG-˜_(P.—‘Ïç9ƒ:î•î]>:ˆ\®µƒÌhLx¿k k³ÙD(êz®×V7~ãÞiìj iêBÇeÈÜÙ±š ®òÅòÙ0Šªô'´îÑ{¥‚8 à]=Fõâ,˜fïY$ù0Ñh˸®­µÖv~XwUÝNÄQåÌSàqÉ ‡•á_[û=¼ç=#¦%¹5;mï±ÇžÓ2 î݋܃£Q'™Òi•¡óã?žsÒ|‡eܵ§ÓBvk’ï@k{*wŸ—l™BA ¾UãÕóRxlÒÙ¹ÎY(¹¯yq‚sRÝf3ŸW1è·¼å;0ÍþÙ?ûwH&“ØÝÝuf>úÑÝãm7þ/ll¨­WüÖÝĈh½>‡ˆZ¦¢Öt3àÔW%ó[YjŒq×阄Ý]ðÔø«W_á6e°¬îêåã ëP;Çõ–e9³ª'¢X’Üè$h“JõŸ -”þ{'Á]o-Áê7µÝoöbLFrX‰"‘r¹.\¸à̤6 \¼xÑ9gÑ蕹ªëúà‹ì# ´ªJÄÌ0wÂðávãT*yÀAYl2ÊÚ™5wamÐob€ÀXVËaµ,³oI~BNRwK&9& ®ëºŽB¡0•õ¤~àÞÖIJÍ,÷^7Ó+uB"ìÝ_N+r6èÞ"Q62Î"Yj¦õ¾øÅ—áÿã·âüùcmm­K“²µP ‘}®®zkk­`Ê êí‚WU{@§¤(ƒûK—õÔç÷º& ô Í í·A*¬w.·Ðu¹\n9úB?¤X8«r£s©tŸ+“\²×{*Õz¾3ÝMþq·‡i2ò k¡P@6›Å]wÝÕö|$Áöö¶/b¼ü8÷†ÖC·¯íë4˜ÑhëÇ÷M¥ÛžÞ‡$'÷ú;@E—>í‹dû2©×å. &ãG÷öœÍÞ;‘ ’²æOsYû$³=’Š+›rO!r-·(ÓDË‹õ¾Õ«†A<®f`íç3µ=â–ŸÎB3jö´r<[q,ãL]&£DêދЋ u!“ JêUð dÒ3 Ã@¡PÀþþ>LÓÄÚÚÒé´Sõ:ŸÏ/ÿxHœÍÎ>¼ì´LÍdTß&•ò.!?Ì~X3fd‡5‹á¡‡B­VÃåã³gÏž]È™U¡×w\©Tú§q¹K wÒKø‹´“å¡3BY©TPt?}šéHdi9»YÔà%ÙUàH*öÊö1kkkmëšdÕ²,'åwîÙ îz¹W%“Sqôœ ’_0½LIéÕu9è—Qäø%žt- …R©ÔbgÐô*Ò‰TŒêe2êßi:£“°ìô]KßÓ4aš¦“î+{QÇãqìïï#—Ë£¨’ÌpŽ’‚îug7‘^Y5 ÈXkXB¡677µZ F#PÅ–díj_cot@I ètV‡Ò÷°ƒ B@¯”²L&ƒT*…L&ƒêñ>ÃRé^ îÉßÉd†a8³©sìH‘ w¿í|ÿR©ÿÞ&òeovßVªÛ…”KÈ¿`FUw¥œí %0Ëœ¢7%d©% fVËå2t]Çþ ìy")»’FÛo—€J¥5¥&ûowê@œUêãDÐkEB/r¹t]wìsg@R Ü-¦çúÁ3G}ÁÞÞîºë.‹Eç¹ûî»wÝuöööüþ<]”ËÞKdÜBo;Y*^êŽÎõ$t&x¦Ëæ¨Â4² ˆ¯×þ\ápØI+ NŠogvM8vŠqd³YT«Õù:«²¿õ ¤z·ÔKèüVV”ó E12™VQ>×ÛÑ(`=úõžû€OŒ¬‰G¦„‡Æ4Ålßia´]@iMv$ètVM³UDÒkóyïòðîý¤ÈÒ3JüܰÉçó}÷^xŽ ¹¢TRý ós$“jì&ûi®¬œHgq†µÙl¢X,b}}Ý™]€íímììì`kkË)Ì4ìõb±˜çñz½ŽP(4ñÌ­×oªëzw:°ëf~| &›yéè^¿ªëzwtQöy“Ù:¬dBæ©ñAT*íK&%MLÒâóù<’É$’ɤ³¥£Tû*™Œºß 3éWШ³àŒ¦©~ß±.)ÖÞö,d_õk˜Ù'Îf[û€wŽF|ýê"iÛ4W¿úëxßûtÿשv®o“½ý$m·Wÿqo‡1ÌŒþkcYX$÷K( ˆF£H¥RmkU̦‰Ö½–Åè/‹yê[8ÞºÍÁs»& ÓËߣæàrŒï‡Ä^Âa¥}]×Q©Tœ5ªB©TB¥RY¼ý÷:.Ãàuêõ¹<®ÖÓÏFü'_1»Ï%³l…BwðwA‹¦m@éûû¿ÿÿ7ßËn¼±µu‘¤ùí¿Û í2Mc½„a5ÞËÿÊd2Δ¹cýÉZÕÀ!³©B¥2xoéÎ>‚'FN îǵk׆:¯X,¢ÑhàÒ¥K0 ‘H;;;Îñ­­-D"†K—.¡^¯·¥ OJϽWÝzÎ>‘1ñCßn©ö¬~íNq_[ãV26~ÛðN*à¯xÉd™L¦i"›ÍvÍ–F£QÿfPû!³«s$´;¾ íí·Ìöz¥vÊ$ Æ¢iP_Ó_ÿõïÍ7í±PPÁY‹,i¾¿å$»"_X4K}¶N2Çë˜wwwQ*•ppp€ýýýà¥ÿZVkÌÕù<3 †f$‡UÒ}·¶¶Ðl6ç%Z gjûܽ½=\¸p¡Ppï½÷âÔ©STd§V«a}} …°ºº:¸ÚiNŸî~Îs@ï5x§ÈˆÌ[߀¬·/Míã°ÊÀbÒ(ɉÅâÊ•+øÍß|=Òé4ªÕêl·0è¬u0 ²mWÊ쌉FëëÏšë{¶áó ^,¢¶å#޼ ß4H§[3£»» ù›‘ÑXDKÝ-7år¦iv-mšËÌj§céE¿=¤e{™\N~’I ë ö,Ò:ô0ö>¬{{{Žs*yîÛÛÛNè…¤Äb1'Å8‹9iƇ‡‡Îq!‹¡ÑhŒõ½´ÝÓau?Gg•ŒÁ¼õ tï a†wÊ£;š7h?9Bzà‡Æû¡ë:sø£?ú]ïÌ™i"Åez ÜM³µ ¬¦©Ù)I¥ì¬Ö+çÅãƒÓÂfývMÛnzf†Mƒr¹} % [A›Õ"ž,¢ÆÓéö±ºeYþ­S'³Zm#IÑJ©"ûM{Uô•ý²Ãau®{ŸS26cïêëº#üÕÕUhš6ÐYZiÃNǸví¶··vŠf³9Ô{¸ñrXMÓìлëÆ2&óÖ7 Œ½{,áYp ˜|gnBàÆûñÞ÷~¥Ri¶Îªe©(¹¦õÞ2—k­IÚßWg2­ŠhíëµÞa̪EÓ6 |É~ô‹ÓÍm4¢Q¥Ñx¼U˜‹u –šEÔøBì1lš*mWÓZöÛÝçr¹Vÿè·|£3ÐC#;5:¬FÉÀ¸Ñ4m,êŽîlooPùô¸téR[ªq'×®]ëÙY¾ô¥/á#ùêõºs]Ëò¾ŽišÝ3¬á0#ˆ'„½½=<ðÀxÖ³¦Ÿ7+}À£>Š hš†ÕÕUçy·l-Ëò¸[õ}‚ØØØÀ•+WfríyÚðA|èC_Ç_üÅëfŸ*)û‰öÌçó탕pH—•b±ˆ‡~7ÞxãT¯ë‡ý„ÊNÔ§¯q€Ëò Êtδ’¹#ö»^¯\f7*‹d¿…r¹e*-ËB¥R™Ïìj¡ 5²$Ð=êîo•°ÉDÈ|Tû=ÔÖœ?ÅbÑû¸œ={Ú* ß}÷Ýh6›¨Õj};g¿êÃ/}éKñú׿¾­£Œ´Í[g™U²´¬®®âÞ{ïÉ wVú€[o½ÛÛÛmƒÓl_F¡ë=6ÜóðD±½½Ûo¿}&מ§ Äÿñcmí“3ùœ™ŒŠ’Óù\677qï½÷âû¾ïû¦zÝyÛïaÈç¿ÿûL~¿r¯Ë‹ÇÕ,ÒÁAk€{ï‡JæŽØïi;«ÀbÙo@ÉÒ]ƒc®³«©”ZGzp ´ï¨1MåØr½éÔ1ø¨ö{à k$qÊY뺎½½='ê“H$Fžeí …BÎñF£át÷ߣҩùžzB¦À¼õ-«tÞ…9´"' óÖx?ûf[õWŠ+q±ç‰`‘´íf*— …îõ#0ÓléY4—Ë­ñË\gWáÆDá°rVY ÛwFª¬i.\¸€û￉DµZ çÏŸoÛÏi±X¬«„v±XD$qŽ% ìíí9Çu]G2™œÊ6M3˜û7‘@0o}w³ë9° ‡é°’©à· ~õWŸž}åT¯ÊŽdiYm ™ ð¿ñ¹éÌ®ê:Q²p×õVòJ¹\žÍìj.§¶ qOå ƒ8 ÃÈE—÷Öz½]×±³³Ó¶ùp/ …666œ! ¾…ÍÍMlll V«9‹¼ï¾ûî1ÚØ½ÞÙ4Íùn¾MNóÒ7Ð]Üz¦•$ 9fž÷Â4•ÃúÖ·Ny­eµŠÑ\Ëwñ[Û‚ø˜ÑèÞäk¹ì[%j²x,ŠÆËe5~ÿÔ0 ä'Íf‘J¾bà eÇ÷÷9KpFrXkµ‰DÛÿu]G(ÂÙ³gÛrâû‹Åðàƒ:ëaÝ׿ø$†ÑB&‚9Ð'S`Þúv{cÈ<ðÓ†j sÓM{ÞÛ7Mza€ƒûŒßÚ¤išH»~Ú²Ô€½RQvB°8—ÝcÃ0& ¸Ë¾Öù|kW®Ë^†rXëõ:²Ù,Œc躎l6ëäµËìê°B¡PßN0èø0Hô¦s*ܽC1(dªÌC߆Ñ;•Ʋ]$SÅ ¿ó;¼àˆÇ§\I¶ !'?µ-ˆ¹²Òc›²a.P(´¶é ý'.üÖ¸ÔÞ_²R©L')U¯™³t µ†5›Í"‹áþûïwžÛÙÙqž{衇°¹¹‰b±è÷çiC¶znùÑ™WIH@pg¾ôLw·,µ¿!K€a/}éWñ£?zv6ç½€øŒ¢1 }¼¬ÓT3M»»jOg•,šÖ^`*õ¸%ÓR3Ða­Õjh4mkS›Í&êõ:VWWŠb«««NYìE¡s§š™è dΤRí«ç +·´!KD¹ „Ãúô sÐY% ‚i*Û>Ñ®¥í> S›a%KË@‡µ^¯#‘H´•–ŠÀîÈ_¿RÙ~á^Ì-ívfXOŸn¼xÆ3hÔI`q/m²,Ë{pÃt`²Dè:ð¢ýÉôƒîý¡ñ‘hT_t]oRËda©TZ;†*ØÎÉ$2ˆk(B³Ùl{Nöa]D'ÕÍñö±m)ÁÑ(ptØ67&Eê Êè÷<‰7²$̨ÀV² ¤Ó€e){ÎmøÈ²Ñ9$©T*“9¬¦Ù½ÇY::¬±X õzÝ©Öl6¡ëz×blI>uê”ߟÉA > mkü¸ð‚³«dAÐue²MSïÞ‚¯²}M8Ì*×$pŒµM¡ ´N»}bªJp>Ÿ‡¦i¨o˜ä.Àtß}÷9U„'Þðw†ô"$ ÈV“é0dÉ1 5Ÿx¯¾rYEäÝhúd!°,©¬¡P.\ð<¶ººŠÕÕÕ…šYT”ÒÉ ÁdÙ°,õˆFû-Ðuf¥ Wår9s¼õ«²©¼ey/ a@“,é´ZÓ7´Æeùô”÷$&dtnZ ë#d…õ~j†PkT‹Å"ŠÅbÛÖ5‘HdáœUÁÝ)8E– ]o­Ó¶,Ë»8G4Ê}ÉÈR1Ö «e©|âpXy½tNÉ‚bš#ÓuÚxÜ—LÓ}2I-ð¦æO C9¬ÅbØÙÙÁÎÎ666P,ýnûH´Í@årJð„w¤²­ ˜no@–)0fšæh«8«é4‹í‘…GÕ!`Q=Ük¥RAzÔ™Òp˜59N(C9¬;;;X]]…a0 «««ØÙÙéÚîf‘è§›¦Éòðdép;¬„,3•ÊÎ* L¥ÓL!# ®·–x ¥sÎ4‘€á.Æ>pýj¥¢ì·eµž ‡„?¡ tX%ýw}}Ýynssœ­n‘N‡Õ²¬éïÛGÈ‚À‚bd™‘¨üÈÌçrêf@g•Ãn¹åÃÛsV·&Âí¬J§Ö 8}Zý›Í2ƒ€a «{j(ò»Ý1Íö LÛ ÷k"K€TMÕu+Yj$ý}d­ÇãL&Á²€ë®«¯ñh”EõH`0Í–\Ëå²÷$’̪îî*ÛM}“c†vXƒF¡Ðî—vÝ•$K@<Þ§àR¡ÐªÊDH€1Me²GžaM¥'Á0€§Ÿþøð+ƒ1$@hZK²º®w¯_­TÔ¸eŸŽ*ébiÖx¼•zÀ(²Œˆ=ïY5ÕCH€;%˜ašÀ7¿ù5N–šžk´+UP‰AFâÁPû°ÀÆÆF×sÅb÷Ýw_ÛsÛÛÛ#7¢^¯#‹y> …ÆÚ6Ǹéšrç 2cf¡o =¸Þs«é‘90+ ¦É1 ñ‡Yk[°,•`ÆèûR2óÒ¸dúö <ºs† é` ÃzêÔ©¶‚K‚—¸Ç¡X,¢^¯·9ºFÙlÖ)ê¤i …ÂH×u—Îî*ÏŠªƒ±˜æe; 4UaÆñóq÷o@ôø±ÀÌJßn8À!~2ïïYéÝ0Ô¾ ûû~-d ˜‡¶…pX!o¼Ñè¯sËbu`25æ¥qw]Ãè¡qØI¦G"lnný…Z­†®ç·¶¶‰D`.]º„z½>ò¾¯Édëï®Ö“Z1Ò<~t² Òñ\ÀÚ×͘d©d-ç8 åŒÊó)(Ç5Óñë¸}ÔÏzöÓÏž Ñƒ™¥¾–Æ=òºÎH%™9³Ö¸™\.«ª’„Lȼ´íf Æ+`e…Ybd*ÌSãѨš]Ô˜ÜSç³>Œ¼†µV«¡X,:ÙöfTšÍ&r¹\W•°F£Z­æÌê†B!¬®®¢Z­Žtýö U³ý}Nb¡ @òøÑIÊét;³YC^ׄr4ÍãÿJ­Ô4”“ ¥ã¿V\íOØGk8w|ìôñ¿9('÷´«.§üÖ_¿·þÍ­3ûºg­o]o¥Hz¦Óa%3fÖr95q:Ô^Üy"S`^Úv“Lög}Ö¯Z–ê ÕêÉ º“©á‡ÆeÌÒ³î!}z «®ë¸xñ"FÛó;;;ˆD"( #¥ ommauu@û~®‡‡‡ÚSŽc±X×ûŽÂP“@ Êi+C9†€š¡ŒÈC9x»PÎÛ–d D¯ÂÊa´ÊÊñõÜÿ·\ïe¿wþø_ëøõ§ö–ޝÝëùäL¾êYèžzê)Ôjµ¶kôL­‘MZɉ¤V«áêÕ«3»þ¬m¸IˆxÔu:¬'„z½Ž+W®àÚµkS¿ö<í·›juÀ@žéî'±ßƒ45.ó´ß‚®÷‰µ¸¸’¥GÆà£Úï¡Ö­­-hš†ííí®·ûù­­­¡º³³ƒ .xï×!ú}¸§Ÿ~=ö._¾ ËjeCêº>¸PÇ<ðš뵦tšXP©´b ¢PŽœ¬Ÿwßÿ²ÇÎl£8”³WBË -ÁÛÑ,¿Ç¾ëùú÷ÑJ÷•™Ù# ç„N‹¨Š ÎÊa•¾5à¹|ù2Q.Øb5gJð æòåË3ìÌÆ»[–Õ†•éÀ'†Y9¬ó²ß½0 Ã[ãŒY„± ™9b¿g™·ýîijæF.Çb¨'ƒÆÌ®nrOIDATOÝa•ˆà½÷ÞÛ÷¼õõu4¶Ô/ööö … ëºSìððÅbF£oZq¿ÒÚ7ÝtâñxWĨ+bišÊøÏŠº éh9ˆn2P3•“–L´f@“P3™Ò÷+P¨Û>ÈLhçóatWãuEË ö9'‹ù:¡#’H$ðæ7¿ß÷}ß7õkÏJßpóÍ7css‰DÂÙbÕê•<‰ë´‰Ãææ&n½u6k´çiÃM³˜é;ÃÊtàÅêê*Þüæ7ã%/yÉT¯;/ûÝI.7`XR©0s‚û=­ídÜÌ{ ´—Ôð,,æÞÖƒ,=2Õ~\ÃZ¯×‰Dvù ¨þÙ³gû?uê’÷tÿ= áp+õ kqw¥¢R&Üxmû"i¶q´få4vÞkäæ4¬_!E‘v;žÏ@ÍÞJq¢ðñµÅaôªÍFûÚN25æ¡o7,^@æÍ<5nš­À{_3˜LyÛoÁ0€ýÑO÷Ö8÷Ô&S»÷ÑQ.Óv“¡è°†B¡©¦%$‰¶£”ÊvGe‰öööœçt]G2™ú=:³!Ûnî|áN$ŲÓÉË¡µÆÓ}nª’®8‡2›évN-(‡²„öâE¹ãçz•¤’ 5cªy´Ë<~ïÎ-Ù÷}cúÚ³gè°’y2/­eM—vd³,FC&fžÚvcšÀO|®·-ç¾ÂdJø¡qÓlù¤mK;,K9¬»»c_›œ:¬±X Ífs`„EZ‡B¡‰µ¹¹‰ Ôj54›M„B!Ü}÷Ýc]Ë0 ¤;KÀ÷øhmÑâ& 区K©B[q¯C¥Ãz(gµ‚Ö̬û¼•ãódÆ5‡Ö,®lï"HuÝ(†Ÿ¡% Ã4ô-I³Ìn'd\¦eÃÝ™b}3tVÉœ˜æøhiÜ0ŒÁû 2f¡q©fR’Þ^(¨ í7‚¡ÖX,†l6‹íímO‡´Ùl¢X,"‘HŒ´µ Ï|÷X,†|ÐYëµæc†žò*‚Tr+hweÆ3yü¯Ì¤Êýf ÊÑ,£•Æ«¡5»Ú¹VT¶“úmy•<~f-<³Ö·çº>µi%×<‘¹0k‡Ã*ÂÏL2oæ1>1 5^÷ ®2cæ¡ñ|¾}ž(«1J¥Âì24CU ¾páΟ?ï”ÃT^ûÞÞΟ?ß³Dö¸„B¡®Ô…qè*J#ùó7zœ‡r(Ý)ö²TfS)<”BkÆÓídF¡R†e&¯EÇùÃW«è_‰,<“ê[dìY9Õ²€~û³2¦aÃe00õ,ÓŸ­tIÓ4©q²0LSãîÄ'Ȟ˩iWV¾&C2p†PÑ–íím‹EÏ­k‰677Gž]žUTe€o¢ÝqôJ¯M£åH–ÑÚ?4îñ÷µÄ¡œ…߯BWá%³§ÚožåáÉ1pöI*zpàCˆ¤KöÝ•3¯$ èºŠ¡§R»vÄãÔ5‰¡V å´ÊÖ5õzÝIžEéíiÑó&†š=•þ"cüÎÓÅ1Õ Š&UŽî5âºS†Ã®×2eÜ{Svaš,O–ÉjïÚžÌT™d¥I@ªÕ>)ﺮØ“€âÞÒ¦mv–i‚d4†vXÙâ¦3 ±Ùl¢^¯O%}`ZôÜ£Ò‚r$Ýkd;š(T¤}×qù¨Nnçõ™3=5ÎÙ&² 5N7 ®×&¦ïödL&&n GLf‘ ÙaíE½^ÇÆÆF÷K>ây¨VUÑ# ÝaZ³«ñãc:ÚÑ^!:«Ä'<9¼)%À4U*Y8lößÒ`F $º®â-žµÖ" Ç=D1MÓ[ç„ ÁPE—‚Œç@GŠ%¹ûM­ôßZ3°ì[$hÈž7„)"iš&·û K‰eI¼[ÚЖ“€S.·þ6Í!‚„ô`©ÖžéRH©ß¦Yh)›Y ÕSûÂ~@J*¥¶üè9·8!Á¦âÚ]ò,˜Y[ó»Y$€,½ÃÚ72oA[Ò¡ *¹I‚³«d¡qŒ'À“%@ê( L#ãì+ 0}·³‘=o †Ñ^Nà >2CÆ`©V #%Ø²Ô yüj=«ÉöE%d‘8:ò»„LŒ{¯aB–‘\n@`Ú'ƲZkX´w]g!12‹.Õëu‹Åj6›~–.º:†q\åà8Xî^U~wAÈBÃ{ ï2rN‚‹eµVl8G¦¹“18ËŰ½½íw;Ç&Üž@|íÆî“L øB‡•,+2Ðé[U2ez ,j©OµMŒ{2Õ0 ¤Óiΰ’±8ÃZ«Õºf)›Í&jµšßmHWdÞ4›_Ý>›ª£{ý*!AÅ4™&IO«6‡ÅmÈR"˜A@H@Ñ´öV'%˜…òÈŒ•|ß}÷accÃï¶Äs sýõíÿ×Týn)!£ã9Ð)—Õ:mBŒŒi˜öN–ÓNŸþõM–Y­Ñ·6!C²ÔU‚=ÖOÞÈU²X–åJÆ 8îX gXÉ2bYÀ×¾öµÞ‚9óJŽ»A<çúU2Kë°^{Qš&ðŽÏ¬ÑA! ¦);Þ×a-—¹ï0 $ñ8ðÚ×>Òû„\ŽÙ2$°P(¨¿uý¸v4 äó~7_Öz½Žz½Þ÷x£ÑëÚ–euG.-‹Ñ27f©oÀµ­{Àn\BæÆ¬4žN+SíiÇÝT*Ü«’Ì„YÛïh8<ü“Þ©’¯3K‡Ã-ùöÝk˜!X%xÔëud³Y§#D" Äb1@£Ñ@6›u:’¦i(H¨fR) €2Cæ¥o˲T>“Z˜BIfÌ\5>êÉ™—¶ËeõoO‡•)ÁdFÌCãÑhk «atXÉÄ =ú±±á<¤B°û9y {­D"Ã0péÒ%$ d³YçøÖÖ"‘ˆs¼^¯£X,ŽôÁÐaeäžL‘yj; …1†Am“™0/[–*§Q(oM&AuBÆdîE—B¡‰„óÿF£b±ˆÕÕUD"‘¾¢Ùlö<öÄOÀ0 'mÁsJ<'žZ­†}èCøìg?;õkÏJßð•¯|ÅbÑIÉÇãjvU‹XVkÑ9Ñ‹E<úè£3¹ö¼l80D*4'’½½=|èC—¿üå©^w^öÛ4UŒ±¯¾©í‹Øï©MÔ¸˜ßÇÿ\ªTTpœxd >ªýÚa•EÚ²;{>†¥Ùl¢X,â®»îB"‘À… œç{qíÚµžÇ®»î:ÜrË-8{ö,”à\øÃÏ)§•œhN:…Ûo¿/|á göÓÖ7<ïyÏÃÙ³gqêԩ”*>ÃpöìÙ¶ú,˜µ ×u½ÿ–6\¯}b‰Åb¸ýöÛñüç?&ןµý6Mà÷ÿ›½OVÞìì¿H²ˆýž•¾ÙÛo‘¯išx•Œµ¨i‚Ö|T}U%X×ud³YD"‘¶AÐææ&B¡t]G½^ǃ>8Ô›Öëulmm! áþûïïJ=èE$éyìºë®ÃK^ò’¶ÈQõ¹AC$Áí·ß>0">.³Ð7 <¢ïøÃ>|s$($ Üpà 3»þ¼lxß(É9#'ŽX,†f³9>ûýç<þøã½õ­iÜŸ`Ä~Ï*è8kûí^dš&nøÈGL'ãŽÁ‡šaÝÚÚ‚¦ix衇ڋŰººŠíímœ={÷ÝwßPošÍf<úÎÎ!³Gî´„F£1ðfÐIW•`Ãšß ð@fÌ<ômš&+ïßðņw")ñ„L‘ùh¸å–oôÎ  ÃJfȬ5nY-ù:ëWé°’ è°JE±{ï½·ïy÷Þ{/vvvæÜ뺎F£MÓP«ÕÚ€ò¼‰öööÚ^“L&Gú`Nz²uü0Mà9Ï¡ÃJfʼô +_ûÓ"ÉÜ™—Æ{Vz'dFÌÓ~?öØŸõOy'dÌC㦩VÃ0pË7¾¡žd†LÈÀ”àz½ŽH$Ò]I$mé rüðð°o$FÖÀzíÙ*õÍÍMg¿×f³‰P(„»ï¾{¼O˜†ê,Û“3O}ßö;¿üøûý‘É cî6œ91/m‰|c¤º„Lƒyh\j?꺅ÿí»¾‹³«d* tXC¡çBëííí±Þpsssà69±X >ø Ó±ú®Kõ ­XG@ø8µÌ·µ!3eúÔå¹=ƨ%™;óÔ¸{3{Bfͼ´ O?ýqƒ÷®'dšÌSã†aà€+™ V)nP«ÕúŠVÒ ¦µH¼³ôö¨8ëûòôãâÜñƒ,“êžûýßï÷Ç ¤'ÓÐxß”I]Wvi•dÎLªíT ¨Tz¤¼†zp‹2â#Ó°ß–eáëÿÇÿÁÀ:™ ×°Æb1Äb1‹Åž¤¨ëªj !Ç0 ®Ó&Sc¨mm.\¸€ œ?«««mNéåË—ÅÙ²“ßhš¦fX×T¡œU®!KÄw}éKÀ…> CUÁfQ&²lXuM–®Ó&Ób(‡5‹áþûïÇÎΊÅb×ñÕÕU¬¯¯\Ú}–De:5 ¦Ö¥ãõ/{5diadž,;=õÍt`²$ Ì’!d†rXUøÂ… ØÜÜtbÊ™ÕæÆñ'í&¿[AÈL¸ãúë™L–˲ϰr0DŠ®ë½õíÞÄ’€bY~ñïÿÞïf%bh‡U˜ÆBì™cÀöãÀƒoô»%„ÌÙèŒ%d¨ÈûüçûÝ ²dÐa%„ ,KU&dÙ0M \ö»„LÌç~ø‡ñù·¼Åïf%cùS‚ !„,• ·þ ËI¥¢2„˲˜E@¦VB!‹‰{F5W³.Y6Êeå°V«~·„‰IÑF“°ð)ÁõzFÃïf2¨o²ìŒ¥qÃP[’­­©}´+õ·iªýµ Y&¶áå²zT«Üª‰,£EaagX²Ù,êõ:@Ó4 ¿›EÈT ¾É²3‘ÆãqààÀï@HO&¶á– * Cg•,£EcagX·¶¶‰D`.]º„z½Žb±èw³™ Ô7Yv¨q²ÌL¤oÃ’Iõ÷þ>U²pÐ~“Ec!ÖF£Z­†õõu@(Âêê*ª\ßA–ê›,;Ô8Yf&Öw¥äóêAÈ‚AûM‘…tX±XÌy.‹-m}±XD­Vó»c±±±áwÇIÓ7\Ôj5F•Çà¤iœ6üd1±¾óù@­ÅªFh¿Çƒö;8µoŽÃB®aí×)šÍ&B¡P×ó—/_Æý¯ÿüàqûí·ûýFâÑGÅÃ?Œn¸Á歷̕+W×a¾ò•¯à+_ù ®»î:lnnÎýýÇÑ7üõ_ÿ5^÷º×áÆoÄÍ7ß<÷vOBuW¯^E³ÙtÖñ…+W®à©§žòíýiÃCûæ£>Š«W¯âE/zQ`l8í÷ü ºýþƒ?ø¼á o˜ûûÓ~‡ öMƒj¿Òam6›=]»vͳ³Ü}÷ÝX]]E$ñ»ù#Óh4ðüç?¿§£²ÈÔëõ¶(\Ph6›¸víšoïÝ‹^ú€ÿøÿc ¿k ø: ¢]ñsF‚Þ7ýzï^ôÒ7í÷ü ºýöë;§ýA°k¿/¿Wg…B[¿Ï‚ØQõ2޾½nÑ jÛƒlWüüÎiÃûæèŒ£ï ~ÏAn{m í÷ü ýž?ãèe!×°ž:u @{ZB£Ñ´¨¨o²ìPãd™¡¾É2C}“Ed!ÖH$‚D"½½=ç9]ב”2ð„ê›,;Ô8Yf¨o²ÌPßdy†mÛ¶ßð¢^¯ccc‘HÄYä½½½Ø”BÜPßdÙ¡ÆÉ2C}“e†ú&‹ÆÂ:¬Úª»% ¿›CÈT¡¾É²C“e†ú&Ë õM‰…vX !„B!„œ\r kP©×ë}·’¨×ë}÷·êw|Ðk§ù¦Ùîy¶Ì–Yê{˜ãÓú Ónõ½èµÓäâÅ‹ö=÷Ü3•vÏ»ídvÌRßßúž´mÔ÷ò0‰Æ—Ußól;™´ßþ·ÌÚï“¡oΰN $ †K—.!‘H ›Í:Ç·¶¶‰DœãõzÅbq¨ãƒ^;-jµvvvÚž›¤Ýól;™-³Ô÷0ǧ—¾'mõ½sæŒ}õêUç¹ÃÃCûÌ™3ö#<Òö·ðþ÷¿ß>wî\׹ǽvZ\½zÕ¾óÎ;íw¿ûÝN„g’vsœƒYê{˜ãÓÀKß“¶ú^&Ñø²ê{^m'³…ö›ú^vh¿O޾9Ã:!±X¬«Ô÷áá!  9Çb±¶×H>y¿ãƒ^;-¶¶¶°ººêl=¨]Ó8N‚Á,õ=Ìñià¥ïIÛF}/“h|Yõ=¯¶“ÙBûM}/;´ß'GßtX'$ µ•ûn4(‹X]]E$é+Žf³Ù÷øÁÁAß×Nƒ½½=4 lnn¶=?I»‡9N‚Á,õ=ôÒ·|–qÛ6¾IæÃ$¤ƒ ê›6|9 ýö¯íd>Ð~ûÓv? Ã:%šÍ&ŠÅ"îºë.$ \¸pÁy¾×®]ë{ü‰'žèûÚIi4ØÙÙqÚÚùyÆm÷0ÇI°˜…¾g­“~úž´í³î›dþŒ£ñA:ª¾i× Úïù¶ÌÚïùµÝ/è°Nz½Ž ÔëuÜÿýmÑ÷t|'‘H¤ïñ~5G"‘‰Û½··‡P(]×Q,Q¯×qxxˆb±ˆ›o¾yìvsœ‡Yé{Ö:é§ïF£±Ð}“Ì—q5>HAÕ7møò@û=ÿ¶“ùBû=ß¶ûÖ)Íf<úN‘H^º{z¾Ñh8‚éw|Ðk'åìÙ³=;ì‹^ô¢±Û=Ìqf¥ïaŽOB?}OÚ6ê{¹Wã˪ïY·ÌÚoê{Ù¡ýžoÛý‚ë„躎F£MÓP«ÕÚ€Šd$ ìííµ½&™L<>赓’H$°¹¹éèµÓþ,õzÛÛÛ·{˜ãdñ™µ¾‡9>ÍÏâÖ÷¤m£¾—ƒI5¾¬úžgÛÉl ý^Œ¶“ÙAûíÛçÖ9Ñl6Q¯×xçÍ÷;>赋Ún¿ÛNæGuÔ¾IæË$:ª¾ýn;™AÖHÛNæíwðõM‡•B!„BÈBÂ5¬„B!„B:¬„B!„B:¬„B!„B:¬„B!„B:¬„B!„B:¬„B!„B:¬„B!„B:¬„B!„B:¬„B!„B:¬„B!„B:¬„B!„B:¬„B!„B:¬„B!„B:¬„B!„B:¬„B!„B:¬„B!„B:¬„ßÐu–eùÝ BæÎ´µoYt]÷ûcB!S‡+!Ä7’É$ Ãð»„Ìikß0 $“I¿?9Á”Ëe˜¦éw3™9Ôúü¡ÃJ!„B&¢R©pONÔúüù.¿pÒ1Mår¹ëùt:h4  ɉF£Ð4 º®#N;çêºî¤‚¥R)Äãq@.—C6›E¹\†eYˆÇãH¥RÎõÂá0²Ù¬sQÏ'dÃè …,ËòÔX/}Ô8Y\æ¡ý|>B¡Ðö¼`Y …B×ë—~šÖu¦i¢R©4MëÒ¨®ë´×$LªuMÓ`Y–£Ýp8Œt:p8 €ÚΰúŒeY0 Ãyèºî `%êB¡@EtÖÖÖœN¨ÁÎÚÚšs­••§“É1˲`YÖÖÖÚÒÐ …r¹\×µ†=ŸA Ò7d2çÿ½4)×rëÛ}œ'‹Æ<´ŸÉd<#ý–e!™L:!B¦Á0švÓ©QÚk&ÕºeY8}ú´p¬T*mK6¨í1°ÉÂpttdÇãq;›ÍÚ¶mÛ6ûààÀ9'Úš¦µßßßwŽ—J%;Û¶mÛìR©ä‹Çãv:vþŸÍfks>!£Ð©oÛVšsÿ¿T* ­oy=5NYißýújµjpÞË­kB¦—¦5M³«ÕªóÿNÒ^“ 2ŽÖÅ»¯¡iš3ž§¶G‡3¬ D&“A4E>Ÿ "2𦵥¥R)ço]×FêîÿK”ÆýÚp8ì¤#ôbÔó –N} š¦9»õ7Œ¾;_C“EdVÚw¿^(½û^AÈ´é¥éN:5J{M‚Æ8ZÝf2躎p8ŒjµÚ¦gj{4¸†uAÈår0MÕjÕynЖ’v )Ã‚× †?ñÒ÷ ¨o² Ì[û²~*“ÉàààÀïO–q4MHWëÑhÕjÕYÊgYÒé4J¥’ß)°Ða]Êå2Êå2ªÕj[„%÷uZãñ¸µ$ÏuKdQè¥ïAPß$èø¡ýR©˲P©TœB „L‹q5MHИDëR8©T*¡T*9ÛŽÅãñ¶¢©dx˜ì3†a “É`ww·k ’J¥œÅÞœŠc‚» ™ ¿ Yúé{Ô7 2~j?;+¹õ™ÃhzPf!A`R­»gV5ãÊÏdÐaõ©ø›L&ñŒg<Ãyär9D£Q”J%$“I$“I¬¬¬´¥„É $—ËaeeÅ©"¹»»ë÷Ç"@}‚ú&AÆoí§R)¤R)xÈÔ¤ép8Œ\.ç¹!AbR­Ë6§OŸF2™ÄéÓ§FY[`žaÛ¶íw#ˆ7’F}Xe&÷ÀÅ]ˆCRÉY¨orR¡öIpYÜeY6†ÕºdH†Ãa.eš:¬ Œiš8}ú4ö÷÷Çaš&’É$²Ù,sà !„B!K‹.-0î”`˲‡‘N§é¬B!„BNœa%„B!„²,Í ëÿñãw÷wñ}ß÷}~7ed>ûÙÏâ…/|!^øÂúÝ”‘ ê#O>ù$¾óïà½ï}¯ßMšw¾óxùË_îw3Æ"È:yòÉ'iW ÃÀÎÎŽßÍÚprß|þóŸý¯ÿµßM Úïùtû½µµ…H$âwS†‚öÛ‚Ü7GµßKã°þýßÿ=®¿þzœ={Ö歷̗¿üe|Ï÷|n¿ýv¿›22?üp ¿ó+W®à _ø‚ß͉k×®ò»‚­“o}ë[lûÃ?ìwF‚6Ü‚Ü7†ßÍÚïùtûýüç?ßïf í·?¹oŽj¿—Æa½îºëð’—¼‰DÂ歷ũS§Isó–·¼%°ßy³Ùô» #ñ¼ç=/°ßuPurêÔ)Ü~ûílû 7ÜàwF‚6Ü‚Ú7`ÙpÚïùtû …ünÆÐÐ~ûCPû&0ºý^‡5ÈUl°ººêwHªN"‘H obd¾Ð†“e&¨¡ý&Ã@û žéw!„B!„/è°B!„BYHè°B!„BYHè°B!„BYHè°B!„BYHè°B!„BYHè°B!„BYHè°B!„BYHè°B!„BYHè°B!„BYHè°B!„BYHè°B!„BYHè°B!„BYHÂa­×ë=Ÿo4~7‰ ¾É²C“e…Ú&Ë5N‚€ïk±XD±Xl{®Ñhàüùó8þ<îºë.d³Y¿›IÈXPßdÙ¡ÆÉ²Bm“e‡'AÁW‡µV«agg§ëù­­-D"†K—.¡^¯wu(Bê›,;Ô8YV¨m²ìPã$Høæ°6›Mär9hšÖö|£Ñ@­VÃúú:  auuÕjÕ×/ŠQ ¾É²C“e…Ú&Ë5N‚†oëÖÖVWWqêÔ©¶ç±XÌy.‹1ž ê›,;Ô8YV¨m²ìPã$hŒå°‹Eœ?ñxÅb{{{#¥ ìíí¡Ñh`ss³ëX¿NÑl6{{úé§ñå/µZmžß FW®\Á“O>9“ëÏBßðÔSO¡V«ñÆAR«ÕpõêÕ™]Ÿ6œøI½^Ç•+WpíÚµ©_›ö›øØïAšÚoâ'2Õ~ì°ŠƒšH$‰D¨”½½=lll ÕÐ\¸pÁóx¿ÑïÃ=ýôÓxì±Çpùòåé|£di9<<œ™Ã:+}jÀsùòe'JH/._¾<³Ám8ñ›Y9¬´ßdû=‹€ í7ñƒªïïådúöö6‰„S [Ó4D"œ?õz½-• “½½=„B!èº]×Q¯×qxxˆb±ˆÕÕÕ¾¯Ù‹›nº ñxÜ3bDˆ›D"31¬³Ò7Ü|óÍÔ7ŠÍÍÍž[L m8ñ›ÕÕUD"‘©ÛpÚo²ˆý¤©q ý&~3î|$‡U"ƒòfnb±b±ØÀ¨þÙ³gû—|úF£át÷ß$8˜&`Y@<Þþ|2 ”J@4êw §õM–jœ,+Ô6Yv¨qTFrXC¡•2  Ífs¨ˆ~"‘hsxeí«;*“H$°··ç<§ë:’ɤßßÕÒaY€aºd³@8ì}PÎå°¦®•Šú7WŽk8 ìîªS)u|·ö¢¾É²C“e…Ú&Ë5N‚ÊH«Ì¢nll´å¿7 \¼x‘HÄsöuT677±±±Z­æ8Çwß}·ßßÕBbšÊ14Må\Z°¿¯Ž­¬(Qª‘—ËꯎåóÊÍdZí‘å@ ÕË!õ¨»s¬¬¨ØhT=/Np4ÚrŠ£Qu¾ÌöêzëûÉåÔëòyuN¹¬^#ŸKÓZmž''Uß'™^—e…'“ ÷>±ëîÌ7Ò¯<§ µM–j<øˆ/N{g.†÷ÿçr­1uµê=a”Ë©ãB<®š¦®-ã9GÆáñxk¢IÓÔßëë7Œü9GvXC¡¶··Q¯×õª¡P¨ïBíq…BKÓILS9z€K:ÝrLe†Ó=›Ù™~+瘦åî®ú¿üÝ/"âu~çÚvï㽜ÑqY[SNu.h¯ý&¢ø2€h×ût:Å^Ÿ§óxç5 å§R‹‘мLú& ]Wý\CÒOçB.§:Ä N>G¨ñÅG~^È`Ã|‘û”×€F(Ô£TRöÖý¼\hÝ åššÖÙÓɢțÚ&£bšªOiZ+CÍM¿¾èÔø|0 uû‡•6¼ÆØ•Š:Glf§íÇÕXZE]WçËP<Þ²Ån‡S ´jµ]ƒ™LkB,Vcgñ7ÄNK†§sû#é´z­iv·»V»ŠQkæä°Öj5lllàÒ¥KSwP—™µ5õcºoÞcK÷lf/âñÖ,$ÐÐ0 iÏÛBºrR©(’IÕŒRö˪ÓÁÁd×î±86›UF¢L„ BfúÝÿ7 Õ%ý½RiÝD4­%ßÓ§g0‘Wˆ4Vw—BA‰}QFödꈙ“GÉ:‘h4ÚÒ¦;ã¤PP2I¥Zu$š«ãå²øÈu%ûÇíTº—z¸‘ãÙl» ÖuumYžÒÙ':ï}ƒî…Ò þ×ÿÕï_‚ËRú³¬V&š™L«ÞF*Õû¼•u- ~‡ÃíÃ)Éö’ 1Y&%ýSœ  µ|+“iÝ[¢Q5füüçßì÷WG† 3óÑ w*¬û<±ËÜ–ŒH™äŠÇ•-=–ËJî÷/3œ€ÒÙþ~ÿl¯••vÛŸNwÛf©7#«{Ø1¨Ÿ£Ô¿ÄÈkX¥öêêêtZ0Üë<9…ã S$È P¢ëg$G¥S0c#£•^£iә׫iÈf£°¬ãß$|œ-À°ŸAzÐÊ–‘Vá0Õ `%ƒ›“Ià%/™Ïg'ÓÇ4aýÖïŸþ4Âî(»B“–þÍ׿ùQ¤þáXϺá7ß1ôå%¢éN#QF:nQ °ÚCæbèÝÑÒÓ§[¯'·ìñ÷ˆÿ¿Ÿ@üÿ;âo¾æÿó7H™DêAùíG¡ñÝÈÿ§›çò¢HU«-ï RÁMO=å÷¯D†D~6÷àGjÈE&DÉç[÷6˜¸µêÖ—T*•V@g¥v‰îË5:c"&ue¥5à––W¼±óž8)ѨºæÙ³ÏžÿE|AR¥nEçØ0Tÿ霡’¬ÑŸÌpŠciYÊé”þ ¯qÏxêÚ2¬™,ÉÚÒ´–³)ˤ:qÏLyÑ9 ÛÝR©Ox½ß_=€e)íN·Ç[3™áp{À"UÚ‘ô[ -¸õèÆm×eéD?]õ;–N·fmMÓûý,i °Gä°ï¼óNûâÅ‹ž¿xøá‡=ß?›µíx\ý»¿?ý÷M§Õµ…£#õ°mÛ.•l[Ó|ûJÚöÁÁìß'Ÿ·íTjðyûûÃa¥’úaå˪Uõ¼Nw¿öàÀn¼ýíöüÓ:ûÏ>Eî¹ç¿›0UŽŽÔϳ»;Üù¶}ôž_µíxÜ.}÷–­½ì önþÀ¶Žì|䢽›®Ú¶­¤ÚvþŸ³2ÛÎfíhè«öÁ­w¨ƒ.ªÕÖßѨm§þÉã¶Ï·õã¶Æv~€RIMkÓãþ~»äKïzÄN¿¢Öe„òyÛŽÞò´]Ýþl÷÷cZönºjgoû€¿ãì#Óêù½ˆs°ÿƒ0ïŸt"zÙð “Ï+i„ÿÑ1©”’–[‹ó0˓⾧¦Ó]]k¦O/Ëf¿§ÁÁAk µ»ÛnsŽÚï ûûª?ÅãªOåóí&y¿¥ÇýýVŸë5î;8hïsnö÷[ãýýö!Eç5òùÙ|7AÓKÐúã0”J¶ xSDÂþ~k( 6¿ßØfQlüînï~0KÆÑËÈë™3gú>ü¢×‡×´Ù C|¦~Ç;ý©…BÛ£Ò5šð^Že/¼~°l¶ýÎ$Qˆ^×”ÑS6ÛóóѸíÖI§q—|4Úû5GG­s4Ͷ÷7w<;ôÁ'¿fk±Cåt¦¼ã¥­¯*mŸ”þ¡GlMk?÷àc_jéK,ð y‡ÖݧGoýíƒâ‡[Žm6«ÚoýÙ‰GûâO§Rê³f³¶}Çÿyf¿å,˜wŸ<:š^Ó=ÀµíVÜ,lú†‰å-2GGý"h6<èö{d/Èà^Ìêînûíú訿ɕ×Ïr ½»ëï„CÐô´þhÛJCý4*-V«­¡ƒm·lÞ°ÁvÒÍ8z¹è’áU¾oÁq§¸ó¶'AªnIÚ–ó¬fس‘’úëU"7n•æ ‘ÜÉe”_-ynA¤R­ŠB>¯Þ{m­•WÑïGÌç[{úHÎélš;ÃôpÙÂMúK&£~&MÏßûVär¯ð”“oIÈ»´~ô•7 úÈ =K¦kí&ê3fÞòð­CTÿ¤ã:wÜ Ü‘WßÇÚZëÅÃ,—ŸL¥Òi•S.Úÿò,Dßùàoh-Â;ΛÆòŽpXý|²^*¾4­Ÿo))—Õ¿’fÛ¯ ¸ çÉšQ·}7Íöúº~¬ï!Ò©æZœkÊHj¤icñ7àSŸz™ß_9FVãÈmY¶ÙóéFÒÚÝCwÊx8Ü¿œEgq—Y ÅfÈr"ö\~ãrYi6VÚÊåZiÞ^©±uiÈìÙa€½½=躎Z­@•¼Ö4m!×µº·uZ‹Þ'šì5´|,K9lîª++­EKî·8ˆƒœO©º!×XYi½Fc¯kŒH§Õµ¥xŒP*µžt§’ÑûIÙWdR còq\­Â´ÂÈåŽûÝw¾ƒý³@>jµ]N!šFö_½¹‡þý–Vtúú"ô¿ù§ØÿÔÿ0t3ýärSÂM7¡ôÀM½Ot/VEGâÅ×rɶUæ¦ÂÚ˜¸¨Ošk?ÜkÔd}›òBb~RôHÖ¯y \zý¼R|È÷`攨TÔ÷6I­¼qyÕ«®Â¶?ï÷W°ÔHµæÎ‚+@«f‡8£²^T5ò·32Sg!Çw@k›E±×R hÄYu(ÓéÖüÐ2JÖ«’ùòÌQ_P,±µµ…H$‚õõu¬¯¯#‰`kk [[[~ž.:,·&Å08úV.«Þ·»ÛÚ˜iwW…Œ:÷ÊÍf[f:q{ûRhõÖr¹U¶ÌëK•棒JyoÄ7Œ³:é{/–¥ r&Ó^å¶'“î ÀúWïEfÅ@2©$·¿ì¿õ—•†’IïZê"ü¡÷£tׇ•Lª+dþh ¿ü ‡²L…Mù>†šLw3ÕtÚ1<•J«ò$Y,:‹hI"‡—TÝ{@K¥\‰Åõ’¶ñx»iK&»ƒªA"žNÆ’C¯ýlÈÈLª«L¦ÄÉåT2ÖÚZë'’¬!ŸWZ(—Õ¹RèëÄ Í’IàÆÇëÜ£“ŽeGGJs¢ÃÓ§{›rY -$ùÎk ÀYõ ‘rÈCð¼+Wpó£ŽvýQò‡í3gÎØU…?ü°}æÌûðð00ùГжîn˜…P£/hÍçç³zß;‘_VK¾{íàÁA«ªÁB/Ôõ&ˆë-†]Ó+éìîªuR¤¢ç2É~‹Hm»µPC*ctrtd§ÂU;ÿ²ßl׫¬u´Øû˜ýß~ÄNßýMç-Ô:¥¬‡4o×Îf³v6›µSsXX*•ì#Í{=W­úW\k zS­z›-¯eïìC—….Êç²Ø1m­õîÑ‚fÃçÙ;×€Žcƒ¼Ê?ìïw-?Š¸Ì„a?°m·ª×ˆ±ØÝ=®þw\trÐu:*xÒ~Îþ~K{²ÎyžÅà|ÿð~“J WÐ`w×þf$bàg~f¤Ë4ÃzxxЭ¾”d²{ÀèõÅËôœ{Qîþ¾zÏνx:ß[î›AÉ•ž¹œz”Ë­¯K¶zIĽ£¡,ùXúÔ]]oe§ùYcHfW5Í;cS8Þ|¶~ñ"¾rë­#½ÅÈ)ÁÐl6=Ÿk4þ}YôrXÝÙ¬Ñõö· ,«U­^/Žì0Z”qœÛk2MÕf™xŠ„3,z4 …–ïcšíë°A`Y¾±tó)’£ŸË©@‡l_­v9Ïlö= Áø. …Ör(©kãuÎqµÚoE"£öQg|Ï;gßsÏ=m©¿‡‡‡ö»ßýnûÎ;ïôm&Ú+¡ß>[²ÿW4: 4µ»vúqëì«?f|òkÞùcýRd¥NvçëäùTêxïŽ}×!×4¿lØç•ZÒ¹‰™óô®Fí’+/âèèÈÎçóí׃|>ï\çààÀÖ4ÍÖ4Í>pµ¯T*Ùñx¼í¹a9::ê™~9)‹¾2*ƒR„FÉÎv~ŽlÖ>úàÇÚŠ½êúK®™K;m)7GGSŸ4MkÓäþþ¾Fí££#[Ó4gɦiŽFåxûg8°£éÌîëõþüêµÑhÔN§Óv©Trþ–G'’v¬>sÉ9'›ÍÚù|Þ>::ø¾ó„)eÞŒ»›×´=zeâO“t:m‡Ãa;;ýOÓ4;{.ñ©V«v:¶£Ñ¨£yIõ›îÕGœýªØ€l6k°¿ô±µíëspp`çóy;•JuÝ·: š ŸfôJi—í¬d+Ž^È _35­­vÚÜ££#ç9O{Ú9¸;þ{ßN¥RÝãjÕîÜÃìcü ì~ög‡kó8˺z‘Ï«%UšÖszRí·l ¶ ·ÑÅC´/Îcnd9–ØÑq÷ãf£mw*ðÁA÷رcÛ¿¹ìÃúÈ#ØçÎëÚõܹsö#<2ê妯¸E¾W¯{¯mÛÝ=çèÈŽ¾àIuWðº3t®×Ëçí/|ò“v8V†·×ew·KDù|Þ0¶c)h|Äãqg Ífíh4:öµ³Ù¬N§Á…¦iönNP­V÷¬XÜ"–x<î ŠâñøÔ€  vl{ð lœµ¯y»½í;N %›=–æî®Ú1»ã¢GG¶½¿]sŒQ©Ô§ïx Z<::²ãñ¸3ø­ºàˆ»úQ6›í¼Š£é…¬gí(qëUÖÀºÏí@I;ݸÛï~߬‡7$}Ñí w²¿¿ß³ÃIð "ö¹ì+<zvl»ÕßÜHßÙÝÝuú©³¡m;ý3z÷¯jÕþo½ÕþÂOü„ý«ïyÍfÛì·ô…|>o?pö¬¼£išÍfíýý}ç½{Ý3‚fçÙS©n ©¯²Ÿ¶Åp}åóGÖtjšýõ÷¿¿g€Ð¶•‡½ÿ‹®$øØo¬!çŠþG Ç Å€÷<‰öûà +>Sö÷÷'ž¸™G½ *„Óâ¤æóíÎë Üùa¦È\ÖÎ7»xñ¢ýðÃOõƒLëÃó4à=vD?8°ûÏ$e³­JÇŽZ<WÙ!f Ô{¨Áîî®­ 9²éìxƒçƒƒƒ¶s¼f¢ö÷÷íjµjW«Õžƒ}¯Áû0ŸGf¥¼eiK*•²óù¼ç,­´³T*Ù©Tj"C´ÁŽmOé–JµwŠý}gcl‰Ð;}Áãû•xKöÿÍ>zϯŽT“¶í¶A® H;<ш۩“ȹÐo.¸gMÝz¶OI;$pÒ©·jµjh¼ôšeu÷Åjµj§R©¶ÙÉ„ðú>¼8::ê«ÿ££#ûŽ;îþ‡YæÑ'%AÀo:ê«ÌîÃf³öϾõ­í’ƒWdª=ð³»»ÛeÛÓé´]=ްK&m·gtòæ;î°?öêWÛÖ7ÚŸ¸ãûž³gc¥RÉÑø‡Ÿ÷<ûðÁmÛVÚÎëÉ}ÁKëA³áÓt@ÆÕðÁÏÎêqöŽešv!“±¹îº¾³è¶­ôâÎ èå,ÊØA½Í¾PöÜAN±;ë@‚ÿ½^㸔̶ÝÝ]ç~tppàŒ·Ü×:88°üÇܯ_i,¦ÕçY¤PîÓÐùÛþyºÆNd©Zm¯žv|þ°È„Ò‡Þÿ~5ö;¶ámqøiš:GÚÑ‘íÙ6£±»;Ò lZö{,‡õðð°m6õá‡ö­:p¿ïÎæ–6'wÄ¥ãjYO¿ç=Îv'Ï=Ô9#sttÔÕAdp.ÑoÐêHò:÷ AýËI:hgTßíì…DýÅ‘¾Üt䦿5«&7yØvð;¶ÝÀÓ##Üë‹ì«i×DJ2Ø?:²íìOÃÖžû§#åíôà¶¿w_ÝÉ zÐ@Çý¾¢÷ î4Ø÷tê³mŸSúŒ×yñxÜÉ(6Éóî˜dtöSù[f «Õª“=ñÊW¾r*Ÿq^Ì£Oß³}g”`êØÕ¯óyû±»ï¶ßì¸ÈÓq`Rf™¼fMÝÙ™^ýjµÚ W ßFí'_ðÛ>Ö¦¼æêM793°½î+Ò.MÓœ¬ž£££ÀÙði9¬’R>oÆ ‹Ýr~œ‹,6k¨qD>ï¤Á}æÁ=Ç 20w#KL:ï92Vðz}6›u^㬦Óé®,²|>ïÙ™1–kI°^²Ìäyy¸Ÿ ‡ÃŽÖOšýžw5}wÖÓ$ãÙ¶eìïï·ÝŸ;yçÞàèë¸aíÓÌâ Ú­À¶¼g¿qŒØÍÝÝݶ@K×RÙýAR5­µµDûfÛѨýÛn³?öêWÛï|Ñ‹ ÷SÉxD&¤äþ1‡õ°Ïœ9Óf|ï¹çûÌ™3ö<0Îï=¼>¼¦Þ drÔùOGŠ^Ÿ*ûíÙv*eÿÛŸþiGÌ^i^t_·ó)FR„hÛí3"è~)ºHú•×ónƒž]Ú§Ó­-aŽîº+GGGÓ†Ýn¢5ï-ÈZ÷*w³ß=C bŸ;gìvwwíp8ÜwyJ/N’ýö#óEƈ^6lXJ¥’ý+o|£}Ç­·¶~_qúlײ¼|ÞþÜm·ÙD"ö΋_ìèï/îçì?~Ñ‹lÛV6ö/îçºfTѱè¾W™0hÜÑoò ¶½mæ8óÅ{y_çõ:Óðe|îÎ$“L¹t:mW«ÕÙ:¬W¯^µï¼óNÏ7yÿûß?ò>¬ƒffyä‘¡¯×ËaG+‹ôev¾ÿkÛ£(nCêNctGíd°/3ˆ^Uö£”Á¨¤fÉk:ÓIÝü‡ÅÝ:":Ò‘¥N¹‰õjïÑÑ‘ËåìŸq¨Q™¦¾m»÷ ¬Gæzbl¸é&ûgoº©-ÊëŽôöоç=O;7g1†î{fPלÑ}P(•JÎ`Åæ1à™µ ÉtFÛ³âñÙÏHíïïÛ&û­; Uظí6ûé[niŸµô 3-íèèÈ~ò/n9I6kÿ÷W¼Âþýw½«ç)ÒÏ;9iýý²(ä;rúp>oÛ.»1 GGGö›ßüfû§~ê§Æÿ¾0/û=*éô|÷9•ÉA™Po BˆFvwwgh•J­Þ²¯’e3lrwww¬€R©T*8*c®IïqnN’ýö(á2sÜAAË‹zñ_ðû›?ðö×_ô"û÷#‘Ö޻ǚ‘Œ‡£#ûS?ú£ö#×]g_ûàíÃg?Ûq ìWÞv›g4ª_²Î 2Ûö^^áõù{]OúVgFA¿õÞ¼‘ñd8îé‡d³YûöÛoyÒh$‡õᇶϜ9Óóø¹sç†ZÏúÀØwÞy§S°éž{^½ê?<<´ßö¶·9ÇßýîwÕ¶ÎÎ2ª8{Ö~äºëmëºwÙ¯~õÇì7½é~ûÌ™'‚n޳)J¢ ©TÊYóæ%št:ÝUQ¢zƒ £×`}Xƒ: Ó.#T«U;ûâ„Ì2lú¶íÞ7°a3×c# è¿÷Δi)BäŽ.Ë€Ut ë0;#Ç2°žfp#ÈÈ Ñf9à™— wÅ÷RÉ{uÖ| àŒ’ÙÒÅî®ýØÝw;ÿ•µúïáfÿ8½Rî;}û-·ØµW¼bxíIžiŸ¢7âtŽ”^çª4Î=`V6|Þö{Tæ9Õé`ºÓ å^‘N§í»"ûC·ßnn¼Ñ~çÞ`o­®ÚD"ÎuÜUÙ‡¯ ,6Ó+3lY8)öÛ:kbÈ2³QøÓßú-û¿½âÎ çWb1ûÉ3gÝvÚo7¿ÿ®wÙ_xæ3íRG@Ñ« c¿¢dn$°4¬–Ý ÜˆS*52d镸-Óìo[[[#OMÕa½óÎ;:¬2K+éÃW¯^µßö¶·Ùï{ßûœsî¹ç§ƒ\½zÕ>wîÜÀŽàÕYF ÚØ~Þóœ0ÏÑÑ‘½½]³ßõ®Gì7½©nkZ+DœMq\Å»…&é‹^ë$êHücVÆuVú–×yᑹîqNÖ¾x÷Ý­Á¨Œð=p;ÀGGGö»ßýíx|ö2]f5à™§ †¶Ba>à.âÕY;@ÒX‡ FÞsö¬}>s²:îYÖO}ô£NÊxçZçªјÖlO©dÛÇ[—õ?­4Ú Ó„ž×,l¸ö{Ç_Ûÿ‡aØ?w^Ó4€ ÀYOéUÇB²fj¯x…}$[òç*_={Ö~ú9ϱ¯~ô£]¯›Eà{Y9 öûx‰ûÜét %? Dqðd=ò §Qª°»‘û‡Ì–ë¬v^c˜`gáJ/§´WaÌi0—5¬çγß÷¾÷µEc®^½j¿ï}ï³ï¼óζç{5²Óé½xñ¢Ó9í3gδuzÿûßoŸ;wnä?JJp:¶¯ÞtSßsdðÑɰ!‹Ã¬ÖYéÛ¶{ßÀ­_Ãtíƒly¢2¨õ<¿=¥Òo‡€ŒÇ¬<ó´áÃà*xë k6¥8–{ 1Ùú«3¥J–/tÎ^öJ×4Í~åm·Ù?ïyvýMoêݨind8‹T)M>³°á~Øï~H±Ð¾ãÅ[î8K@]ô'…W:{:¶²W¨`ñº ¨”³©þDqì÷€3Ã+X–6ÉR%Û[ßýÝví¯hËBóZn!»lL:!%µ-f½œ(•J9Õ¯'­–<*ãèå»0"…BÙl{{{ˆÅb€z½ŽP(„íím„B¡¾¯O$0 £í¹ÃÃC<ÿùÏwþà\[þn4#µÓ4x|ØsM<ýñ#ôÃ?Ü÷¼p8 MÓºžF££~dI™—¾ËRÿ†Ã½Ï) Èçó¸þúë[Ojšzx¥Rû©Ãö%²üÌ[ãƒÈçgÿ™s¹Êå2J¥R©À²,är9¤R©¶{@6›hš†xGÇÑukkkH§Ó‡Ã( H¥RØßßGØÕ‰ÃápÛÿ…ÝÝ]uÀ‹2 PŽßÏa£0 †Ñn¦A8ÜÓþøÉ"iÛ²€LØÝmÙ_]W»Ú§~åWðË—±ûÅ/âó·Ý†J¥‚R©M4Y*A×uÄãq„+ RÁ7^óì¾àøV4Š’ü¶…P©¨ß¥RR)àà õf¦ÙÝÈü IE㦩d¶¿?ûÏlš&*• ÀaH§Ó]çüÅÿ8žùïÿ={îsñÏz^ýâ#“LâGGøöõ×㿼ùͨëºs~¶Óî]ö~\R©”sŸ™%©T kkkH¥R-;°À tXšÍ¦#ÞX,†‡zº®£^¯VWW¡iÚ@gµ“ §sÜÿýÎûõ¢Ùlö|§Ÿ~_þò—Q«ÕH$?Éd2ø×¼xÙËfüu“E ÑhàÊ•+xòÉ'gú>ÓÔ7<õÔS¨Õj8uê"‘ˆó¼‡ÝlC×ueŒ …±>Ç$Á£V«áêÕ«3ŸYÚðaqõ¬°, kkkˆF£¨V«Èår¨T*ˆÇã(—Ëø¿o¾+™Lë¦ X–ç`PNìþþ>r¹ Z­ŽðlsbK%`eH§»Ói >*•é9½S¦^¯ãÊ•+¸víÚÌÞc^ö»…‚Ò¶Œ- X[S>¤ú¿…ßü7ÿç>ö1üÙ¹s8÷È#øÌ;Þl6«t¢ëÊãM¥ ¥ÓêoËòyO#0Ä~3? ~ÙïidÆÜ÷«¿Š½_þe¼øo„¦iÝA@ÃP)™loäy€ã}w >ÒV÷¬ª®ë¨ÕjØÙÙÉy-‹¨×ëØÞÞvž“Üy8uê5e,Ãý÷( £GÃ0ð³÷ÜÓZ÷CÈÌSß@kœÚkLÙk½Æ 4­5&'Äͼ5Þ‹aá¦i¢P(@×uD£Q'-WœÓx<>ÚÚ#Y7Ú«_e³*ßíôéVMSý]­z¿Æ0Æïlù¼rTR©é;&¥Ò¦Ï‚EѶ—´ …ÒétïµmÑè|’@³Ÿtv5—Ëá‡î¿zê)ù,77•a|P§ Í$Õ ñÓì=~1 £{ýÞ ²ª„,8•Š·Ãªë:LÓ쟟ɨww[g¸Tn{Fæ‚»&AÛìê46®$Äg$^؉TtïÚKÕ²úê 'Šk(B³Ùl{®^¯#‹-”“ÚÉ qˆ3 ·,¦c‘@Òo ]©Tº ŒšþHÈ‚qpà_tf¢z!Îj瀨†¡¦»r9:¬d.èzkìÒ¦iÎ2‘%À0¼‡Û’Óµ„‰âb Ã‹ÅP¯×jaÍfº®w-Æ–T`ÙtxÑ1 £5ÃJH1 ïÀŒc¦¶•! L¥RA4í]Ahm0,…‚ZãšÍª‚HLÛ%s@ô¦i¶Í‹Ç9ËD–Ã0`†wÀ±\æfðÄa`•àX,†D" ¬®®:û2Éà ÙlâòåËØÚÚB"‘è[ul^hZÛnYLÓDز8!%õÖy×þ«„,å²Ò|§´àc?­U-Zû¢J Úþ>—‹¹bšJã¹\¹}?­­”ñ Ù6µ“žã•qª¸“¥f¨*Áù|š¦¡z\zß]€é¾ûîsªç¡€Å1ý‹D–UAš~‹ YpÒioùz¦{-!$@ôZ‚=•M*¥<áTJ9ª¥U2WÄYzÔ $ÀôËó´ßÑ(·û"m µk(Â… <­®®buuu!fV‡¥\.«ÍˆËeJÈRaG%=·³áˆ˜|¾Û\Kµ÷‰ÓߣQFò‰¯XV{Á%nIF– /3Ýœ)—UàP´Ïñ9q1”à ¨5ª—/_œ={ÖYÃ$GhEãñ$ ¬¬¨É 7ÜΆ,+#¥“0d™*U²Œ¸ Š –eµkkVI†rX‹Å"vvvœÿïìì`}}›››~·dÚô\BL¯|i”â2„ÃhmŸÚþü€ÔIÉ¢a‡Ó4Û5}ú´*MH€ñZ1¨ëzKë¬ÄN0ÔÖ¬®®:Õ¼VWW±³³ÓµÝÍ¢#ûš1"O–ÎåRLÌ3:ÏjØ$ÀôÚ u`FA¥Â,’Ie¦u]o·áœm%KJ[À±×ž7„3Ða•íjÖ××çdfU¶º år¹ÿÆò„]×{Þ57XL³[¾žÁËRNªû…ð“€v̰rÖ‰, ^ÛÀ·9¬^9ĸj†h_« …ün÷XèºN‡•,º®²ÝôMôÚ„€àUa²-ÌM¹ d2Êq¥æI@Œ™®Y'âÉàŽ# mAGΰ’ í°. N±%YEH1Íî,_ë ËŠ×D©çvápkäŸÉpDG— g¥T²tˆì °3† àÄ8¬†a´ß*¦ÛÀâµ…pW±B–€^㘾õJ%õ ÃJ€a¨”É®õ«¬=@–„θKÛ˜œÎ*‚¡·µÙØØèz®X,â¾ûîk{n{{{è7—5°±X¬çñP(4•­sºfŸL“•#ÉL™µ¾x'~3Þ+830›€öLÀ<Ç';ï ¬{‰Ÿ)1/†z¸W䙦٠8ê:uN2Ða=uêT[Á%¡—À‡¡^¯#›Í¢ÑhPëc …‚sÍF£l6ët&MÓP(&ú ]éî]º ™"óÐw§ñïèt’Ëyו'd æiÃãñ!Ö¯V*ªSPãdBüŸ¨¤išÝ…ó™$SfÞ7Œîd]ב•}ÊX[† ÁÀ”àH$‚ÍÍÍ¡ð±±D"Ã0péÒ%$‰–plmm!‰8Çëõ:ŠÅâDÔs†•0/}·ËyÀŒ“aøýµ%bž6<bý*#ôdJø1>‘øy—®£Q:¬dêÌ[ãUޭ㔂0µMF`ä5¬µZ ÅbÑyȶ7£¼¾Ùl:Îm(Âúú:êõ:jµš3« …°ººŠj禓#Ò¶¾ùòdFÌKßþçÀVB¦Ä¼mxg5l ‡ÃÊ`2!þO”_juÖÕH§ˆ!SÅwʺoýBz0ôV]×qñâE'…@ØÙÙéJ'èG,ÃöövÛÖ8‡‡‡TÇ¿Ý׊Åb]ï;\Bfļô}tÔþ˲ú;¬Ì( SbÞ6<—þ^©ÐY%SÁ¯ñ‰Ð,šGfŽïrÓa%ã0Ô k­VC6›Å©S§P(`†ó( 8uêΟ??Ôlk(B"‘pþßh4P,±ººŠH$Ò·S4›ÍžÇž~úi|ùË_ölC×z'Y0BN$FW®\Á“O>9õkÏJßðÔSO¡V«y^£­€9ñÔj5\½zu&מ· ï ì{ÚsôOõzW®\Áµkצz]ÿìwŸ}…ɉCì÷ MƒcpËjrëºÞZ«Í;N2Õ~å°nmmAÓ4loow ŠÝÏomm ýÆÍfÅbwÝu‰.\¸à<ß‹~îé§ŸÆc=†Ë—/w3M³=Wž—N4‡‡‡3sX…iëPžË—/;P7æ T¦ Ÿ(._¾<“ÁŽ›yÙðNSÝ•Ì çO³rX…yÛïÝÝû “‰ØïYé˜ïÜMWÔ\ŽH' ƒªï)Áüßø¾ç­¯¯ãüùó¨×ëSƒëõ:¶¶¶ …pÿý÷w¥ô¢_ií›nº ñxܳðS×ì“kq99yHtqa—Yèn¾ùæžúæúUâfssÓ©ð8 æeÃM³»`‡gÅwxN24 >oû ´ .uͰ–ˬ zû=-“¼˜÷¼TjýÝ–IP.3Øxw >p†U:Í Ž#"&ªŸÍf<úÎÎqêÔ)hKKh4uÜ®5Oá08dfÌ[ßmÅ™óÒ¸a¨zJ­ÿw êY˜L™ùÛoàçþOºgW C¥K2eæ­ñÎõ«N:°®3 C†f Ã …¦š– ë:4MC­Vk{*‚“H$°··×öšd29ö{.Å ”  <á5*¸T`¦ø¡o]×oiÃA=™óÔ¸a´g³w­óûÿ·ww¡q¥÷Ǧ-}¢ÄÙ—Œ–¤J 9wEÚx`h £«b´¥D`]Ùd`z‘ƒ.r%è]Dì›È «lÈEâKz•qrJ)ÅgÙ')ÕYCp:xL¶lhª^œ9ó®y;ï¾+Ï3£óŒö?Îÿy]YaÃ%D&öûäDzôè|¸óØ$Á4µjµ:VætÏê¨ÕjªV«½í· …‚nÞ¼¹ô›Ì}²*IŽ$[Ò²Q-I’ÊÝïƒ_É=Iw$mIº;ÇÏqº¯e€z¢4â{æÁüQ@d’ŽñÁÐvgè¼@âQJ£ýÞÚ’¾ÿýÒÖÖ¿¦ýöq $ã÷îõÛð“““~Ç Óݱ ¹V˲T¯×Ƕ‹·K¥ÒÌõ«µZíÂuƒ×|ðàAïƒ5¸£Ù¢ÆFŸ‚Ïò6%x«ûµ¬ùÉÍî÷·ä'¯Oå'­·5œ´Þ‘?*û¾¤î¿íî÷ó$·—PÒñ-1%ÉJ2Ægxuâ:? "i´ßRK_ûÚ_Ž?lÛù»OAæ%ãår?augxwà÷ßOû×™k—àÝÝ]yž§íímžžöæ¶{ž§ÓÓSmooË󼙂E[o‡ýcà8ÎðÁ÷îùwAy28÷D~Ò<~[~"9-d4èÌz4ð3÷å'±ûÝçÜVêñ¾üdvcàß?ï¾ö`ä·»¯tÒý˜¨â[š° 6QÅxŸÎœI$$Êöû;ßùøâN:g’¨b|0aí¿cC{ŽÌa•ú 7‰G×”J%Õjµ™£«ih·ÛÃkC‚Ö< Ⱥüé¸ÁHè;ê”Jþhé¦üÄrp4ö ûX`¥û³4òØ£îµfäÞí^gCþcu_s¢~r{»[Ÿv÷‹™±à&&ì[dt&úÑþZÿüÏ•v5€XËT‡îUZ-öÀÂæJX¥~Òêyž\×í_cYVl[oGÁqÝœ'ßjEßkiüwKýÄ-’7¡~¹ÑýzGý©º5ùIgp“gË}-«ŸXN3)‘½èyAÒº/?¼Æmõàv÷yR r«[ïœõdÑ\7ñ¶í÷d’Ø"gö:ÚÆf˼óŽDI,rìÿþïZ[û›ñ‚Ñùð@½óŽôôéȆyì«%̰‚#nFw´ët:r]7’)2Qºp‡àÍ—œÍâÈY\øº-?Y[&!»'?ñ ªÜV££À´õ£kÏ-˟‡ Iÿ}AY0ÍXê'®GSÈ–Ÿ´Žº:ž¶Œ¹8óLo·íáy9@N ö5ŽÍ–a‡IäÜñ±«Bá÷?øFÎ ®_e† ˜k ë<\׸ëXÚÖ†·˜ô§·4œ@Í2é¹wä'XÁDûòà[òGXוNË)îHz«û}[ýÝ|' ·ÙRV&ü;xkêʪû^ß’Ÿ¨ÛòGŽ×ìëo1ÑØM<`ˆvÛŸ»á¡9÷ÑGÿ«¿ø‹óÉ…¥]= ”`r—DŠð"KX³hl‡àà\³`êì¨{š|VéÆÏ@\¯9ª®þèí¦úÝëþû@þ(ìWºu 6? FPmå3a]D0 |Wþç²ü6è$X‘ŸÄv8ßôé’r‡5¬0’mû{ä†6cUàßÿýôçþ›´«Ä¢Ýî7ÓÎvædtÂ:¶{ê`w­ñ$ôžüdut×Ý÷5ž4Îêü >—ûê¯?½%?‘mwÞS OŸ{Ý2“;¥FßÛ-ù¿ÛµòŸ<ï-ùÉj[~¿ÈH¹æÚt)o»bòov‚É㜛äÜþgG–õÇiWˆ…ãøÍôP²Új ÷DsZx kžŒOÙnûŸž 1I­ËO$¿"?Y FQ'͸œ÷^i4!›w$ixàËdôw;˜Ð®ÉÝRƒ§²üÿòÿß;*·ÔßùØäÄ”mû_’ÿ‚iÃÈ™Áõ«L'ƒ‰^{íLÿ÷…ð?Ȩµµ‘öÛq†×zs2>aÝÜe¯\ö¿‚ã`F§•O½«þ±,A‚äÈO|öåÀNÛøÉF_[ê'¹u OëF©ï¨?²Ý–ÞüäMýêõ_¥ýB™:¸x%S'‘{c;sÃ|éK?ÒÄ5?ív¿ƒÈ©µ5ÿVäà`$a¥óK˜™°º®«F£1óu:´ß˘±Ö ymË‘»=ðd[ývËò“ž[êîm¨?˜ûÿlý›>ø÷?Ñ®«?Úº!uV;zñÛi×<”©ÓWVUE®mnö÷™xž67óȱv[ú·ûݽ;¡v†Ìà¹N@γd†Ž—ä V,)²5¬…B!sGÚ\(èœßPÚ“Ç%uô3ŒÊòY˧`‡â-éÅ×^è³?û,í…2¶©`™;ÓKsé׿þÛÉ…Ì €‚í3ÆÖ°Òvc 3GX-ËÒááaÚõ\ØØ²@[ý¹5õ¦±5{šï­ç)›kÃ% §C{l½[—qq?LÒnK¿þõO%ýÃx!7õȹVKºsGÚßgÿDcæk³Ù ¶N§£f³™vݧj·Û~½ïÜÞ%u0á,ËULb§Y‘¿c0cSÞïÜa÷=ap€‰Ž˜hkKzûíÿ˜\ÈÞȹ`—÷¡½6‚mƒ%,5%øþýûªV«i×}ªÞtÉÑÞƒ3m6äOïeÔ946êtrÂÚap€‰‚a¢Ÿüä¿..dgwäÜÆ†T¯û÷)½ýHX‚±ç°özåGׂðHý3Nƒ —€Yž3éÿ! Wh9váò Çþñ¯^ÜÃVbh„•Ž„`lÂÚëÕ;l^ûüž¨Þ*c.œð‡Æh·Ofjߨ,þCŒh·¥7ÞøŸÉ…ì€ ܾÝ? ¾w¯²¿Ï} ––zÂ:m-¬ëºòùä“áäûûÃ76¶$:x.=Ïóôá‡ê7¿ùM*׾ȴø–¤ßþö·j6›ò£FoæV˲dY–Æ…Û]w:5 •J¥©;Œ%oMº“v€°9.«;4êÈ7ÇqÆVÛfý*Œ1´¤ƒŽD`®cmvwwU­Vµ½½­J¥2””>~ü¸·8{ww7í÷Ó×jùGÙ0c&bý* 64lT°™ccS‚WVHXaö @”æJX-ËÒññ±ŽŽŽÔh4ÆÊ+•Švvv²s4G0ú´!¦ÃLLƒÁ.<ÒFâ¦fÚßO»@dZ­Öä°%Í•°Jþ.À»»»cÒ[–ÙºÕÈ8ŽŸ°²~¦r΢„±VVV´Å†b0Ô£GÒ®«v»íÏ"h·ýÙ@Hs'¬H6‹IŸ~Qº-éçiWˆÁÖ`¬ w¢d–ã8~ÇúÁ›.! '¬¹P.K*K’vE€˜p¦.£`ö ³666¤“–p 23w Î-[÷5`fÀ$o½ÅÌ¥Õjùߨ6gÅ#2æ&¬Ž$:v@7ötÂÀ ­VK›_ùŠÿb17am‰V0ÉÚSÌ`Žƒ–wÀ8ív[ù«_1ºŠH™™°²;0˜‡„¦h·ý5Ù$¬0ÌÆÆ†þî÷¿§­F¤ÌÜtIb:0Ȧƒnèa¤µµ5º;gÅ#Bf&¬b„dÓɉôsÎ݃Ø103aÈ¢{÷üz6¤‰66]EäÌ\à 0Ï;i×϶¥z=íZñX[ã¼lDބޓv €ðö÷¹¡€d>au]Wžç¥] Ä7LYŒÛvÿÜJ #–Šo’Uä÷(ÈŠÌ®aõÑǬ+W®¨V«%~ýeâ[’ž>}ª½ñƹ‹•<ƉäÇʳgÏr÷‡Úqýò—¿Líú´áù‘ÇÏæG}¤gÏžésŸû\*×_&¾i¿“—÷ö;­¶ö;?òøÙ îÁm¿3™°v: Ë^¾|9ñÃbY–¾þõ¯ëÍ7ßÔW¿úÕ´ßÂB …‚®^½ª«W¯¦]•…ýîw¿Óµk×Ò®ÆBž?®7ÞxCW®\IåúËÄ·$½ýöÛúò—¿œËXÉcœH~¬<þ<—mʾð…Ô®Ožyül¾þúëzöì™>ÿùϧrýeâ›ö;yyo¿_ýõT®Oûyül÷à‹¶ß™LX§!Œ*‹___×úúzÚU_J©TJ» —²îiY&¾%é»ßýnÚU_q’¬´ß´áù‘Ǻ§]çeâ›öóJû÷Mûy®û¢2¹†uuuUÒð´Ïó¦ÞÌyA|ÃtÄ8LF|ÃdÄ7²(“ k±XT©TZ,oÛ¶677Ó®ñ Óã0ñ “ßÈ¢+ççççiWb×uU­VU,{‹¼/\ßä ñ Óã0ñ “ßÈšÌ&¬’¿ð;Øjú2ÍÓÆå@|ÃtÄ8LF|ÃdÄ7²$Ó +àòÊäÖ¼r]wêáîëN=ßjZù¬×Fù¢¬w’uG¼âŒïyÊ£zQ×ø6G˜75¾“ª;âEûM|›Žö;½º'â¡=yòäüúõëçëëëçëëëçׯ_?òäI¯üìììü[ßúV¯ü½÷Þzý´òY¯Ò÷¾÷½óoûÛ‘Ô;éº#>qÆ÷<åQï°u#¾Í&ÆMï$ëŽøÐ~§_wÄ‹öûrÄ7#¬¨V«*•JrG|ðJ¥’êõz¯|ooOÅb±WÆ\å³^•f³©£££¡ÇÂÔ;ɺ#^qÆ÷<åQ˜ßaëF|›#LŒ›ßIÕñ¢ý&¾MGû}Iâ;íŒ9ï~úÓŸž¯¯¯Ÿ¿xñ¢÷ØÙÙÙùúúúù“'O†¾üà?8¿~ýúØsGËg½6*/^¼8ÿÆ7¾qþÞ{ïõzxÂÔ{žräCœñ=Oy&Åwغßæã¦ÆwRuG¼h¿‰oÓÑ~_žøf„5$˲ƶú>;;“$ …Þ÷–e ½&˜O>­|Ök£²··§J¥Ò;,zV½¢(G>Äßó”GaR|‡­ñmŽ01nj|'UwÄ‹ö›ø6í÷å‰oÖ …ÂÐvßžç©Ñh¨R©¨X,N ŽN§3µüéÓ§S_…ÓÓSyž§Z­6ôx˜zÏSŽ|ˆ3¾“ˆ“‹â;x/ËÖ-‰Ï&’&ÆgÅA^ã›6Ü ´ßéÕÉ ýN§îi aH§ÓQ£ÑÐ7T*•´»»Û{ü"/_¾œZþüùó©¯ Ëó<õê:ú~–­÷<åÈ—8â;î8™ßaë÷gÉ[&ÆgÅA^ã›6Ü,´ßÉÖÉ£ýN®îi!a€ëºªV«r]WÇÇÇC½%ƒÃñ£ŠÅâÔòi5‹ÅÐõ>==U¡PmÛj4r]Wgggj4zõÕW—®÷<åȸâ;î8™ßžçeú³‰d-ã³â ¯ñMnÚïäëŽdÑ~'[÷´°F ^¯÷æÑI0/}pxÞó¼^ÀL+ŸõÚ°®]»váöµ×^[ºÞó”#?âŠïyÊØßaëF|›eÙ75¾ã®;’CûM|›Žö;Ùº§…„5$Û¶åyžÊå²šÍæÐ—ä÷d”J%žž½fsssfù¬×†U*•T«Õz_–eiuuUµZMK×{žräCœñ=OyÓâ»X,fú³‰ä„‰qSã;îº#´ßÄ·éh¿/O|_9???O»yÖh4&žŸ$IŽãHêOW(‹êt:* C»šM+Ÿõڨߋëº:<< ]ïyÊ‘}qÇ÷<åQ¾—Áø[7âÛ acÜÔøN²îˆíw6êŽøÐ~§_÷¤°&¤ÓéÈu]I“çÍO+ŸõÚ¬Ö;íº#9yŽ“¼~6‘¬0q×øN»îHFžc$ÏuGrh¿óß$¬€Lb + “HX™D È$V@&‘°2‰„I$¬ˆT³ÙT§ÓI»@lˆq˜Œø†Éˆo˜Ìäø&aE¤ªÕjïbÀDÄ8LF|ÃdÄ7Lfr|“°2‰„5AÁP½çy:==•mÛ½2×uutt[ÏHpíN§#Û¶eÛv"Ó<Ï3zІ¥ãiÅ·DŒ_&´á0í7LFûo˜v.“jµªJ¥"Û¶eY–šÍ¦J¥’ŠÅ¢šÍ¦ …‚†vvvT«Õ"¿v­VÓýû÷eYVïCy||¬b±Ëûu]WÕjUårY¥R)öß/Ò—VŒ§ß1~ÙІÃd´ß0íw¾1š°f³©èððPµZMÍfS’ôðáC«\.ÇÖÃsÿþ}êððP<Ð+¯¼¢ÓÓÓX®5øAÙÝÝí÷‰ìI+Æ“Œo‰¿¬hÃa2Úo˜Œö;¿aMØææ¦ …‚$ɲ,IR¹\®®Ný°S.R©T.ì­©T*½k ­®®Ær×uuÿþ}]»v͘ æ&Æóß1~™¥ßA9m8âDû “qž_Œ°"rFC¯¼òŠ?~lļy`1“ß0ñ “™ߌ°æL±XŒ|n}ÔשT*ªÕjúæ7¿©F£aT╇ø–ˆq,'©ø{-âË ý†éòã¦Æ7 kθ®«F£qay­VëM9Hë:årY…BA»»»ª×ëÆ,øFüòß1Žå$ßa¯E|c´ß0]bÜÔø&aÍ™b±¨›7oN-ÏÊu‚ÉÞÞž>|˜ü/ ¹“§ø–ˆq,&©øŽêZÄ7Aû Óå)ÆM‹oÖœ) ‰ô”DuÝÝ]ݸqCF#±©pȯ¼Å·DŒc~IÅw”×"¾1/Úo˜.o1nR|_9???O»Œb—`@&‘°2‰„I$¬€L"ad + “HX™ôÿû4²/¨$JP%tEXtdate:create2019-03-28T17:36:49-05:00T¾=@%tEXtdate:modify2019-03-28T17:36:49-05:00%ã…ü-tEXticc:copyrightCopyright Artifex Software 2011ºÅ´1tEXticc:descriptionArtifex Software sRGB ICC Profile †2tEXticc:manufacturerArtifex Software sRGB ICC Profile\~=Ÿ+tEXticc:modelArtifex Software sRGB ICC Profile1(‚¡!tEXtpdf:HiResBoundingBox1080x792+0+0_Ýx+tEXtpdf:VersionPDF-1.4 G:xIEND®B`‚blis-0.6.1/docs/graphs/large/l3_perf_has_jc4ic3jr2_nt24.pdf000066400000000000000000001045671360743507500233100ustar00rootroot00000000000000%PDF-1.4 %ª«¬­ 1 0 obj << /Producer (Apache FOP Version 2.3.0-SNAPSHOT: PDFDocumentGraphics2D) /CreationDate (D:20190328153323-05'00') >> endobj 2 0 obj << /Length 3 0 R /Filter /FlateDecode >> stream xœÌ½KÏ%É‘¸¯_‘ËÑ¢.ýýXÌB ˆ„4Ø#˜… UÍžiU·Tl` ýzcæááæ~™Y_#ÁêGÞûùð—ùq{ÛÿøÎrøŸïùOíáÓüν|Èò×ûþü?¾+/ßñ‡P›ü_sK¹~ò)}ŠEžÅßüÎÇÔñá¿éï¼Çgw`Ó?~÷ÿ|÷ÏèýOøþùÛýãŸ~øôßýß?¯Ç¿Þ|ÍŸBÌLA>ý7ùT g‚Fùðßýìcù0íOßýçÿ‚ý×ï§ÿ€þþ?4•”[ŒŸþÿï¼ûôßöýû‡úÁ4~¼'öD/!ÖW ׌îoñ£sηøPÇxûkö;¿<Þí5CYÙùÿ”Þ{óõÙÙê~ŽÏG§¿úµÿäË‹gûÿ€!èûÿô™¡Ÿñ¥¤äÍ¡½Z¬±…$èðw¸!ãûà_|úßþóÿÁ5þ7Ÿþ˧?ü‡ïþÝdº_ì+8^=Ä’ªÏé«}=uçÁdÇaø<®³)7< 3ÁùW«g§ãÏÏuìK{ýŽ¿>ØmÌrî÷~ÇŸìgèìVþø<ÕºW è±ä'ÉI:œäd;}”œØñ$'Óï£ä$ÝNr²ý>JNÒñ 'Û­!§/ß!¾ãÕ-´†¡â·áåq½ÕÖÁ¬ Lÿèåa; —\+1;Ü$ÇÝ®NÂ/ëÅ5ÜB©õâßÝP³—ô‹zi8ÈÑû\ù\wåõQª¹Š+>öö™NšéDx‰öÂemy‰ïÝËÉð÷w‡ËHÖ_xÝ+µp†Â'àÕ+å˜c ñHÁ?YGò›_ÿöwÿû_ýñ_~úáÒÊÞ|vjê¯â‘¡JŒ¡¯Õ÷xîe$·ñ}Ò¾ÿü§~üñãì†=sî/BxŠ1•˜€¡Xš˜KŽ¥ÒAn¥¢-”ö)xÿ ®ä¦xç[Ʃ’&®)¨Si­t‘VÞIÉ.O¶ƒ·ª©±­ãýu²\îÕ#zv1JÞɱààj@†Ø"´É;;ÞÙØ_k8nõ•žlMáÊ¡xPŽéå †’„߯ÌxeÍ…§ô•c#;Þ¤ ¯äPb—ÇpsåÐCb&[^™)Ò¹F°¼ ªkƒ£÷3¨àY›×ç@Ÿþºñ\ ÍûÉ|­JÊXÏeÇS9É;˜ÞkÁ|Åû{jÙëcx%‡°µ8–/ôÔp6¥;ÐIå5‚!‚(šoÝK^É¡d¤ÍÕÐKÍÒ]Ä;¹A8‡h‹¯’}ì E¶%\iJtYÚÀ‰gì–¬XÂ;ç’a,Õ‘–’Œ%ñh²¿å9,sÂ;¥-ãÜ¡9–ðat~ïäXb)Ò–zr˜`¶F®›',b@sücÁË.Šõ>dJ…Éà ^vQ,_LÆ›½¶áeXÖ'í±à©cØ8jØš$ÐÇ2Få¢ 5¿qéc$”\‘yáðdîMËŠ{U@4Þ!ïÄái“`]Ǿ¶RÔ6¼sR,žK¸?°EÚðÎI±è¯dW[î&àð´¹üµÒb‘EÄÉ \\t¼&^¸†"èOŽNN®×MЧ ·Zé5âèt ÄǤÏe¬WèN”8:ƒ_ë;È:Ä mí58› 2µ$‹ýê“^j-ØÐØ¥ ¯œôŠ5)#ÏN^‰Q&½òý1„e"ŽNçšøT䜢Ðvé䕵ëX\Å ýàôî0½Tx{âì€ÈR•cqv:öÃàsç–[Òµ-ãä^ôŠþŒ%w°ˆ³S&½ò­×ÓP×ð× Ìœu§6›¤ØÿŸfšËÛaÃí¶ߘp@r>a'?y<ëãÛ§4äØlãbÌ 8ô:`»aŽK\K9`[È7‚Ôó Û)á„T¢?p;Ù€O5° Ô)íô€¬°OFØFo ø‘¾°yDðÿ±¯l'àa‰ó–0°çbm¸{rØaÝápÐF8`;ñLZ(l§ðŠNn¥¶c²Äj¶¶#n¬ê-í@mðÎx M¾¿a‚ÊfXÐÆZªà üÚ‘d^cjºú´9_A€6Ú°ƒ1W%ÚXSÀ= çp‡M”” hã9àlÁ©k;hc±‚ ÒØÆìj&» ;j`;ò4†Rñl“*œÞ¨lsêbr‡mv‡ë¯Ýa«)g=µpÀ66¬;†îÒÛx'®unD9`Cé.#K>`ÏQ>¨øõÛ|'_n‚ ¶yŒµ±Ž¶¶1pœþÚø]ÂýîÞ.IGŽF¬EÿŒ°í'l·OñSø?‚ÜxÀí…7 7ÚÀ±áž«r;²–¼Kß‘;@XÆ`dQ6ä/ù”â ܹ=ÓüÜ¡±ÈdýëÜàïвg ƒÜ\uHZ)ÂŽÜ8¡¼°ÈþÈÉ­–Žñ•¹#Þ:@8È °5Ë”Q7ÈM0«$ܶ7(*Ü“^ù=Ü€-ЇAoÁ ¸! ` Úĸq#»N¼(¬àÎ$5Ý» ¹I™Ø*—ó‰ÜØHEs 7V ìP<ǹq1ežÈ¨S·È_¹`JiAn¼ Dˆ³ ¬žEnÜx+8A݃Ü1`ΉÛÝäÎ(lb: `_³þ ºÑNó¯tc’ #ÁKû Ý8uØ Î ÝŽ”(ÐÐݰ¹ã”È«‚Fª*èYävœ®ƒz7£+®ö¸q÷dL¿)³j;ð9pf9ïÀ ¢ d•UÎ;€$‰KPäµ ¸kYÜÆ(ñZ‡OÜÆ;0mð<íÄm*VJÃ5XÜÆ¶V068BåÄíü¢Êº¸—t0C”9àÞÞ)ÀØŽ^çç«%!pCx©88 ìƒåæYŸÍs€³Vœ Ö2Ü =œePè©&Á’x^de@ó ÛŽqŒ«Rƒ…m´,%àð„mkxÊÈlp3q´­z^<°Ýئ¬$ȹ1°Ë¬€h›“EÙô$ ïŒôœØ`Ñä”ú¬žC)µ 9[ÔÆCoÃFµµÑVÀKø$lϦ%ÁD=ÕŠ|VKB;dǪ“³ZRät)Ú\¬NQ®gS’t –ÊÚÔ9á Ï1½Q’t^Õü ÚØêDåjÕõµ:HÙ]µl‡Ž’;Nf­hCJNÒí$'Ûï£ä$r²Ý~Ì=Â-œ”Êcî1¶“§Ücl/O¹Ç˜^r1}|•{̼O¢Ø’[l›ò¼OÜê}òçÿùœ÷Iì›2ÂZ§€ÞÌ­ºpÚg]¯$r\¢¶bŠ©bÉ­ÍOÔVL1rs* â°ˆ†‰êŠ)¦&Ê/ J‘(µ·Ure].ÉßÌ—v­Üå•Á(UhÒö`—Ä= ã|Bm/TkKÛ¢TÁc„ëTØOÔWL)Õ×W--ç$ —$Ö—iÌOTBõ¦æÁD}ÅSéER UŸKxç-¦znÔ¦öÛ”V¥J¢ƒƒÃÕ•¥É8Ÿ$Q”GÕW¤lœOÒË7W][ÊF­B}X¤ .ÃÌxç”S±#¹æ\¼¨óSÁ;§œJG ˆÙ‹:?Qo1åTHÚ8Q¥©U{üJSN¥ƒ €1'¹R5Î'žN2€N1;à«^…ú+Ì.‹dŸª*VDréà›Q¬Ä½AÆ–iF±ñu^Û6çîSò¢³KÝ(V0±ŒµU{}êÆûÄaß|]Èžv£Xdç*dgg+8Ð5`Ûš¶mÞ'%•R³îÙÅJxa­²KBÎø…U¬`àØhyÌoþ'E§ªÑ­äºª8”¨}çrfê,¼8Ä©#IƯ‚:åä°ùŸtGÈè”(‹è(ópú¨S'™ãæ€RA˜Žè¨ÌÆXB¢ÎˆÕବJÜT+•¯$d™i¿ÇP‚ïUÕ.@õ>Ã^YÕ ¦„  ‚3½ªV€a!ÓJ.m8>'Ù¥ªê“6rÞ+ø…’ àðДM ¾´yR2Ðh(@D×’°Ó^÷”ûɆÛ4ZávÇ>í¸M&nžÜÛT¡·’bLBy·ÈëC>a'¢F^”¢I³¸ÝÝ‹ÆMìö‰Û-¡;]º·[' äDóhq[|éÿsâv«¢X­øß·›*éZ-n7:ÐC|#,n7*ÃqŠ‚¬Ám<“6ê¶ãvÃ}žjÅ n7pxx½¸ç2•z¥Ô·Ñæ°­‰ÔâvshpÜn4‡ÕBO’·+p«!Ðâ68«P¡vøÀm€1†Í-lWZ20Ju°ÍÓŒû´)¬YØŽ¸6kLN-lÇ‚õh´0·¶¥ $©ËÑ7÷“NÖª·µ)àfÄ%WÔŽÂׄ’Ä eQ›‘\`eIOjczôÀ’ÙQ›ž:¸nƒZ* j£ ûÁˆÇµÑÈðdq±¨M·zg)YÔÆj’­÷ºÛµ#SâÁÔ¦oG÷êþgQ›ÏáåÕ×µÑ.Ü4¬¾µÅ¬VD-»µñ\É‘ìD=P›ïu¥NÔÆP NÒ‚ÛÔF„à@=pm¼¬¢ÚÞ,n {Iš—C %ø§L—­ð1Íç‡Í˜º=ŽŽî'}Çn¢[#«Ñm Ïsë2=”õ<ž›¢iƒpѪ?ynáJ}ì'vË‚Õb‰vÓ»Ëѧ¥÷»!­DÂnüøÎs†èõc7„ ‚Äq8±;ð…@Vä^µØ¶HHZ±Ø—xÊ 9…»ÑŸ¸®©‘Ð`7_éxeçz`w pÃ7Ôv`·2€†‚òê»1U<VV¯&ƒÝx»ì¼Új,vÓú^ºHêvª°(ø¶»9N¼,Z:°;0ìbJIùoÎ=Ò~ðêÞxÀµ(·ËÞÄŠfêr`Ám!ʆŸàM+W‰à!ßðÜÜ› >ªŠ»›Eoй¯ÒÄËÝ¢7CêNä/‹ÞŒ'¸›ÛÞVKj ™½Ñ_“«)èÍ&ˆz¡Ÿ,wèX”·±Ø"]Zð]¹qƒÝî³¥ú.·ÿ†ÝáEïߦ’ Ån 嵐ùHV¥ #Y°*c²qïŒ]÷^Íøý­TASöet(ySªTÆóªB²£T‘\M£µj1:'QÎQmk1JG>(9 !Än,JÄ\©á‘¶U©B„Ò\Žº*UÄm¢ðص¡ý=KOõ!9öƒvÌ´_ ¼µl¶àÂY³%ì­¦ Y> fKäIDkÒ3˜ÍkÂh­æ†Ù¼ô#Ál¶Ñ 5¢9 fwZ´ã¥´˜ÍÎÊØIî`1»1Ü!´J:@W-3&¸Ñ´bv%4€1îîÄìÒ!Ü*ñ´µ‹¯º×3 ]Ê‹A‚^Ãe,h%;Ƽ MÂŽ¸#šc ÚÅ1úÛ—ú´3íZØ+U©ZÐδ•ÑJãNÐf8#úëšäÁ‚v¦½Ø¹¦Ñ(´y©2¶SS}XÐÎ Åbz½p´Så‡õk—íDEkêUÃl,j'æ‚ñœ¶?P;I¢€ÅÍj3þ4RŬ'Ø 6C&.…¶¼†3šâ“íÄ(4—ýÀѶ1€¨F¾mºg%:Ÿ+vØFwŒF‹YØNŒ„ª=ºµÑDëíA·¨$¾Ó jó9‰rÌù@íDzº,¿µ±b¸x„µÑ7Ž.N\?@;ÑQËœõ¾2 Í=ðœ{ 6FrÆÙOÔf€g ÈNÔf ,a̾¨Í0·D6V™ƒÚITá•2Ê…Ú)Ü¡{Z1-p“ŸðÈæw䯢aq4¹‘AnºS€•Ó©(k¸íÂ|ed*±ÌvaÒ*\ŪٵÌv&¥”Ö•Ö-³íÉ0ì°ã6vŽQ€N=6^;“ÈLô“×&÷¤AίMóEˆ mãµøÆ\Pz+Y^…­$<œÜ6¸æ’˜w À-™Ã:Oo:¹m„ÁkD½CVàcQÙK˜ÚÆmCàáñ*ÔXn[$@oʓێ´¼'&“9¹mrÍàE”ï·Ì6Ú3,ùäµ x6‰]ñàµé±BÖì†m04H~ ¯ ªe\üblܘmz ‚)Hz¤,³6HÕ¹ö¶Ñ†“)íÞñÚ‰–ºÔËÉl’.wxÇl¯\³a¶FRKˆoxm¹—ó‘Ksãµ1qì åö“×fl:°2ªH°óÚ]ÃÝýÉk»5ŸŠEm2f™¬¶bžåµ=mДyÚÚ¤;·@ç²£6Y{\VAƒ­7^|†£w{ÇjG ¤zƒÚlË…Î`oym`ÄĹñÚL \ðî ¯‹âÐßòÚ„û®+Ýôç5CR.é«,˜M2J<5>ÊàËGárØü  ómïOè,Ç<~\¦öH7 šÖÄûÛãæDpä·s~y¼Ûk†²´óËãVÌ1Aéõúü5VÌ\$¦œ>Ù5jxj# œêZl¿ž¤ãax²Ý>hÇ\è¶SEó¼!S{œe{}6¾sþ¦(Óñ³9ð¥ßIR¶ãg“à«ÁNiÊöû1c&þD~¯å vý1kæÖËSæÌ­›§ì™¶›‡ š¶“',š¹É”)Ž~Á¢YW‹æ¿þô\pfÓ”Ÿ·r¼;¦‚–³Mô Ó¢iídÊQr‰-¥Õ¤ÉŒj°€]Ku5iª8ÅØ¶e·hYZzÑãSD2¶¥Õ¤)ùg#Ä/mu5izILª¨ Zq‹š…I˜g:Hx«š¥Bœbæ7A¨†5 µ?öœØ-‰áŽš ÿwi¾‰2µÕ´êY-3=j"0ˆ{VÏÒ‚£ &ciκŠÚU»Ü°ƪ‰õËISsV_qVΩ9ªz³u·èYxÊcOE$ëiÕ³à±P!íŠtÚz]õ,Ôÿä‚W² ²üªg¡pïq&d‡@«ž%¼h®ê-ésuQ´PeÐd}c÷n ÏìTr@Å  ü*¹25WÓh«Îf*êrôB *†©hñG:²ÎÄðwt&sV»”Џ wª©hñ™Òsìj éÑ­Ñ™•Êý“hZðæ5:“Ñ`®3šBÚê¢igm@S‘(¾žÜ‰3 `qªçêÌ#|Q,Ó½×¼"]‚3©Žo¤ic*ýI±ž19!´&ŠË¢jaŽªÄÀ`¡®..SÕ’(Êtý2uj:'Åz¨ 5ãÔÚ¼WÄ•4ª¶—º¨Zlv+ÌËf¾ª”.ÚÒŽÃs[5é™BsJ”Íîxb²GÈ@d ªZT-Ì/VµÒ2;¢ÈOÌ4^À…(V<üqQµà¹x§‹ÅpWU ´3†©j[ZU-€ZTï Ìy ϳfÂoâ¥á»Bö]- O™57äfEˆŒÌSº#wdµ â‰ÜÔl:ÚÍDfOH¹oâÄb;HIŸs “An/VUr¸ÊJ 8üynT†0øšå´à–L¸ø¡×À¸™ òùRÜÌoâöÀÝy 278À-C0ÊÔÛÜÜ}ÜÁÇt7¸¹ÚSñâ®aq»g|‰ wà6nHPz[©ÅíîÖdË·9Œ9±à[ÜÆH˜dÚkx¼ÅíΰNVº¹Åíî‰ayœ,Û¡¼Ôǵ&ÇÀô¢KQ»Qy™—ØfÔƒ\lj2°Ýè•qà$’ÈÂvn8‰DÛLÁtwêvaa;Kú_Ð’0(¶³¤‰±×µs§å*ybP_"ïq­°¨MmrMhr¢vŽ/°RQ3SZÐÆï€’9%½O hg†³vчXÐæPÐ’Õ9Á‚v¦±Áó€ø´ÑƼ—Aó [ÐÎUlÚHµnP›á7ç!êP jÓ¾ÝAxzö-j3õ%ÈB=P›z{¾´¤v 6‡É¯®KfPÜ^ÇNaå@mt±\’ eGm1eã¢neê±¹üL†îÚDzƒÜ®i‘›¦:føšäÆA¢ûš÷'r‹Ñ™”-rËÀ(ßiYn¦ñ§3`=9nÆ{2£µJËq—Ìk•)œÛÎq–Î`E„¸é_ÁRµH¼§å¸™&—°g€›ùÆÎÞúÉq'n¢ -sc[R¯6ß5Žß7Ýt±Š1Å7 °¼ûàÆWä¦{FH’ørGnFh빋ŒóMcÞܱ¿hÉËêCe‘;1à³\qùñNnIvb¸µÈÍš 3-©EnˆuŒ…oj³ÐÍpV‰ëvþ†nÆ ­Ìßýößÿþ17óB6™£¢&»F@/XãÔèùBæÁÒUÛ6u˜*ów³z‹cžé¿4Õßý÷þùï~ûo?>Ý:jšiƒVÀÆ\¤LöÚö5ÓŽŒnb9ÈþÒ´ÿÝÿû§þù«æüA7§uÚL‚Vè%ã±Û¦ík¦Í| @î˜ý¡Ôœ³þÿ×o¿‚¬ÙœŽ>¼µ÷9¯m_á·àq6.s¢®ê¸ÀZtíOV]Ûÿ3&Š|ù.ÌÙ=Ò÷«óÂòõq7út½–žë·ò_˜“Ôõß÷`¸æ8ú­Á‡áÃةס$](„?ìÑP™…išcAþ†<Ö³9aà‘]¼NþãgÓÌH*ꑳÌOÃÎRˆÝvû°:/Ž ¶ç‡íÐyumØz~Øo熭ã'½V f\Øÿ\Ü£„%]Þ„e»}–°Ôü",Óó³„5sÉ…7=?KXZÕe–íøc.IE ±6=Wš}ëå)‡­›§\l7¹8ØNžpq æUC tjø¼‡CZýþü\ésat˜p;K¢‰ÞÃ1L× ~–«w.ÇêµÂ.5MÂ)XÜs1”±†’ó5x?ƒè5cqâ½îñz)ì%Õ¯™™³2pPXhL’/Š'>Ž%ºÜh\•®©KnN_µË¶º9$fÇËETµhÂ;Á.å–G"ÇÌÀùë¹UëÊ*œµ¥`ÇÈ·ÕÑ!ºs]u+=IRÐë†Rem yÒ±d¦féy§Õx?°D¨»-£Ñ¸?0B…I´³³ÇN:– WµÍé™É¶OB–'A¶Z@Ñ‹‡ÒíAÚÑ—N³<$}µ6R×9)Ù1Vˆq´Úe_T²L«IÝ¥O:Ö¾êdù\fRY1ñbBNÊ)!SOôÂlÚ˜^q2ëùȲŸ£qUË2™6Ë5p׿êeñËÆ«§èF©w2ž”ú_QêxzZ¨Ê$d&¿Î…ñôýR²Èűþ˜þæCªÃæ£àqbÑ´æç×]a¬ÌS8e¸­êÜ-ÌÓéUT» Y˜ç2…VÕc‡y.p‘énàõ ó|?@…¸ã<¤Á[ê ’ˆœ£Ñm4@Oe!ÓºÆq@ Ћ‹FfšÌ '‘óÚHËŠ{«KÔI-K}´Þ<ê{[ËÞnPO—Ú Ê„é.r»\mH¯>$åz"=M¤ÀM0…ñDzØDI7¤ÏÚpzYæòDzšùRxÐ ³ *hì зÆBÝxK‹ ÐwM|™´Bìôâ«ÁJ–åÄùF«¶Pëëm8Ͼ˜y ŽëÈà|¦Í” ÜO˜ç—;ò†ó…K!¨[ßô`ÕEq §¡¹ìcì'ÐÓJÌÀ¹Þ=D\zs¹qÓœguà’t©œg‰ãNs£‹óÌÚ#†šã‰ó™9HÀ±tWOœÏù&…çé6ѲNkúļ4\tÃyæ ßÌÒÕ'ÎÓq‚É S}ƒó䎫†pn8á¤î½æ8Ý`>»—2ãAóôhöÖÞÀ<謱¸g½Q/c~½Ÿ¬Ïkqh]Yª„‰íœ§›©ÔG-‹ó\}80×à1p"p´ì6_Ц­äü†¥Çk `E-»èÅ»¡Š‡ü ô¬ý ù¢»w@O OÉí G/éðyEÄzâ°üô\Þ€ž"-ÓŽµÒO ç,ñÚáö°½æôÆâ©`ËR#X£ø)èœÑªŸ2Ǿéýmê˜ÇËÜžè'DÍ­sº¿eÑÞ¶òØ£“¤Ðc-¯/w{ÍP×öþÖî ÏGç;öôúò¦Û/jaòÒ€¨.…~ÑóDÕ7ÊÞ_hz¼`aÄÍÁ$Ê?¿¯ÇM¯óÞ'þ‘m§üés¸Î§ÜÖùg&Ñ»ŽBX¦£çµí¹!Dzb‘Û‹’–É Á´=8„sÙã£Ë?³ìñùe_IÚÝxû©I‡vζço3çÔ̾©ÉÎeŽÔ¤ÃÏ,û[RûòíC>¶´Ð/0:äg° Oà×Ûfm'…ÏÃeÚß¿lhô¿¬—Ñ&Qiú@E°í%0øÀ—î)[üå^â/ë’¬d§*Ìûôs­ÌâF«·>`ßÓûœÿAû~1ßCYáœJÔnR‘ÎO8W¢#ØGRðOÖ¡üæ×¿ýÝßÿþWü—Ÿ~8Æ´ú¥}v jb ©. õ,’Ì´³:ÑÑu\S~ÿ×?ýð\€<ÓnÏRj‰ä¬L%l s ÎX¢`>éM:¦ôJ.¾ â«—;µ²‘¼#^#K/ö4<[©æ-Ò (+ÖÈB¯—йköAÑåF¦3eŽ8…:jÕ§†)2Ò§ÅQQ“ÚøYAÅ ñP°4Ì(ic[W´í® é`Dÿr-Êh¯£ê)'¶Ñ°‹W*Ñ•œA:â†3íºeÄá°ß0.gŠ£¬Ê&zÙ uŸF:ɘ™g·§Z'F§^ÔØÔ0”k$aI>™€õ¯D{*ž«~ZÔ¨³¢•ÜB\rJgZ?M5÷ +ÕËJrÓd×drÑak@¤žC”úh%ÞîÅò¤ß¹ŽI3öŒ³ÃÇXñk,H^2²‰QܵëÖä¥tZ$Áõ–‡ê/dfŒŒÀóÔ‘ÚBY*§ñ÷ x]¿²ä”W¹4t´øË]7-2[l¸êІºdd_çA¤ýP—¬ƒ|ŠIͯõ«KÕ4væ*£Lå±¶$ŒX²œ¢+N›–šilªÈ'¢Ð–¤ƒì ø46¤ßÓ"3©¶4w±/ù¾égìÓ6^%þØÔç~DçEe$þøFüh,otKºA;æô!† êL8å @TôKªA¾®ò(4‰°ŠZkzP'H{QNڶ䌒¦7Jœ.Û¨ÿ¹ªûɤG ¯´-ÉùœTÓ¨ú–dƒ&äÎË ™ºñªîÇwl§KYÛ–dƒò΀×hú‚È¢n7‰Vf¾È@ئõÅbvÿ!G×Ú|-2·—rÉŠìÐÌhÏ’ž/‹ÎXû4xNL¾ˆc©“0ðŒ¤9¿(…xN>¾îDƒÎ, Éȥ뜯ðÌš‘†~=+:'W1DwaÑÎòBzpæS’©GzÅf€€dy@°b3ê¥ÐÃͼÈXŒcÔaX±™eC¯kqƒfV½“æD-Š¢7¿C3›°}¸þêÌ*ûžF_š;} &G¼a3U6@ 4Óh&fh–r‚-2w!áp Ì´rbF׆dƨÛ0`¡#˜<†ÌÑÊb3nxÝ™›+ÓëLnÛ`s)@šÉçXpΠ^V*(8WI(6pÅbs¡´Ò릙y7X¸g°ô›+y_=Ypf}ì[ì0ØÌ¼¬ò2ÆmЙ5¼Ñ;Ó/ͦ1õE$´èÌuÏ“XÑ™î:gñp¸6Ùâsaõt   ‹rº©—  M¿¤+3Me‰Um°Y¹ É€t^Ô†ÑRnì£åŸ Ëɧwª "“»€Õ"4P"Ù`¼.ņ{@Þ£éÇs ¤ÑtWFØøg\©hJš;pSm$Ûè@-èTåX:¾D#tqåFËCK©‚Q&jc¡#$¡ò–…Ž+Í[Àrßg–…öâöó¢¹Ä%_Á¡Ýp©÷É^¯<4ˆ—‚öÆC«`8ˆÍB4=Ÿ(‰œê îg½ˆÞ"t$w‹ÐÌ&3f:J& 4Ø'‹Ð‘®Òi0“ ÙdùëB3M¯¬©v|–GËi>ìýB³ïXV^mvü¤–iq°~Ðûå]úå)qÏí‰~ðàâýr{Ü %‡Åûe~y¼Ûk†º¶÷·Ç½_ÆÇž†_àý˜s(0t çîaï€SŒ±¼Xý§ÍÓÞ/÷9¼Oü#ÛVÂrÞô²YæS$ ]îšgfwI0=—„~x¿˜!| —„~y¿Øž]öÝ%Áôüü²¯$½”ŽÔ¤C;gÛó·™óFjf߀Ôdç²?GjÒág–ýë¼_X¹KƒQ{yÌûÅvò”÷‹íå)ïÛËSÞ/¶—¯ò~ù‚oIÔ|â¥K6Ø/ø–øÕ·äÏÿó9߬ &UQôõ`îÜî[2­ƒ´¶¹)àv‰QÙ…ÁÅ+£†â•r>ù¥¼%U:’‹Zm½‰ê)Þæ¶Ø_˜2nÑÀ4ðS¹ åT FcúŒ ¦¼¢yuVLº-ê).Õ-ÙÂø Õ-%Vàœò-ÍryÌ…mM—Æí—™ñø2U¥´·Kßm?b^*ã^2‹8Óbl½K …-Í‹&¼â­)§ °(a<Kˆ¸ã±&K*áŠ1ð2Ð¥²T¶¤ ©ß©,•-ÙÛmwN¬¿yÛˆoÄa\Nus0!áû!UcƤ ùž\½Lª›aç©“FÙyØ=SÛ"7|«wh5IŽ3¶!4–T£´vt¯—N~hÂmAå @KÄòðL° bÃ8ÔÛih/È2ÌŸB3Bö²ˆlW{Øиvæ¬w„^ bCèŠòá3zç: Y>Ú£ú Ób4ó› ‡£ ¢EOq™ø-]Iß7Y:1 z"íÊB3 zúwXš®aᲞo4hç¾ Âæ¡íç5½ñÏi!FË>3 øÊErðÏ$å · ÿœ%‡¿^Ÿû|ûÉmÈÌšYàÊh[¹g|Ö|ŒëÜ^wòæsfjÀ†Ë¬Æ¢3#n×g‹ÎÌMs¯¿…gŠ· …ç,Iï® ßÂ3%Ê›7²ðŒçèŸ4ùÖÐ}u¸P€n·?ý‡Í˜¬p§ZXÖ–Wå+?©‰¡øg͘ïzByÍãÇenôSW3æýíq{bM‹s~y¼Ûk†º¶÷·Ç͘cŠcOÓ/0câd ¿ŒÕÓÃfL¦~b¦VpüáC&ӧ͘÷9¼Oü#ÛÖÂrÞ<‡Ë|ÊMÏÌf[²=?o[â6Û’Â7°-ÉÎeζ$~fÙ¿s%éobÆ”íœmÏßfΩ™!|R“!œËþ©I‡ŸYö¯3cF–pkQ„¶Ç̘¶“§Ì˜¶—§Ì˜¶—§Ì˜¶—'̘ ’Á‹ 7°þ_2cB¼ZŒ˜?ýÓcÅʨ߸ª&ZØ’Ê}@Õ`¢0Ýb8*!ßÙ©ì(·ÿ| «’œE¢ãŒ@/qÕÁtZ6¦Çg‰ù®fÉX”@£—êH!ü/Z˜Ú_Lî8Œe…ZŽ‘¾Žþ÷©\uIFGNË×ô-Tt¸‘¼.JUˆ+à¤äMIî¦*¬ä|—²Œ’7ï L-¹Ý•,EDŸ RV= v=Mk`Ç{Q”R3n££!‹Î‡Éÿ¯¼‹¢©Ÿ±O¥®ŠQâý{» Xòï·Ú©4£…é¢ ¼ö«-L¹V3~'[dÓ¢y,ÝhaÚ¢%(ØôY¾’1çP;”®JÈù~š€«ówùJ¶Õ© ¯Îè` S_Ê–!›IE-6u­Õ›8Læ÷›FêM¬÷~^i• ŽI ¢3»k0 r†p_ú¾JýÆEžÌ¹p£Õ°ª`ìSToLò¤zéî+n¡>¬[©Êç &Rõq¥ˆ­É¨`âê PÓªƒ¡Qü6Ödt0h‡½´f£ƒ¡yð2Ô¼ê`˜µñÊ Pó¦‚)wÌ\-FÃô·í ü¬Å¨`ÂÏP‹ÑÀPOwYP0«€YR^Ôj0X­;¼8±(`Ä`èΕ ñ+=eÁܰ9ݾó;8Ó¢ä.„g.þ Ó¶Ø,ñÎW$Œf*©o+¶Aæ-lÝ 3cÓY‡wh¿VdfSò—Ø ³„¦3­n‹AfZDsž),4wf2žW’ænbê 6Ó6ëÊe³Ø ¸»5×››_4ë›ÓF_–-ƒÍ„Ö«Fª…fÜoeR©Eç– (xÆ—%äÌÂ33™Ý!nŸ%мŠª]AêQ?h»|ÛÿšÇk"?®³{¤£^ëåýíq3bÏý¶^Î/w{ÍP÷þö¸õrLQû_¾ÆzI? ÀV`YÎð°õ²87ZG„…°þv¬—÷I¼Ïü3{Ç9þ$.3ÊßÂ~)Zë†íùy£‡°•̾QI†p.ûsF%éð3Ëþ ì— Q÷Þ¿…S{´³¶]£YoÔfÆð-RÞËΕ0ç½ôø™•ÿ:#&þDiâ‹S=eÅÜzyÊŒ¹uó”sëæ)CæÖÍ–Lu Jë™"ì—,™m Èüןž Èl±Z[æÐS`“[³2ØîŠh)­¦Ì°d›ji)+GvlõòhmæWKæâIÚrZ-™EÜ.é¨åºZ2ƒd,ÚíVÜ¢qâç?TŸjË[Ãdr3’goÑÇ„%uZ«Îªcêt¾n¬|w«cœäj‹V몎iKØbkÎjcZ¾ôí¥USÐY¹Ü¹‹Þ­¦¢:ƒKZ¿‹È©£÷½È=­nåqu)n¬wwkdÒ«MMkgµ»[#ÃÇf,9XãWî™Çôj««NÆ/!&mÀÏ¥¢ ÛÀ¢’ ë‚9ŠE“¬™W¬³ÀÝmÑtKö°Îòv·EÓ-ÙÃz¨V#³díÑm?SíÒcZ š«"§3»ÓmϤ6á¢üžœ ø‘2k¢™í)-öL¦¼œzÍÎÂv·=“1]3UUÏnÕÉÐ\<•=§U'Ó#xgº³[%³Rbçep«dúbÔé%­?”µ0HµƒöRWL_òAu´»2}Ýäš–€ŸÊøƒ¡vcFœ²ê ïð­ÎRvw¼Ö{VC\g%»[%S—4ÁøÛª’a>Ñ õîVuLc¸Ë_¼÷dC}î,òìnWò*y¶‡vPyƒ´Ú矲enØY€å pg÷. :SxçF·ð1È™°Ð sŠ«aÇ€s e:lØÌ¦Û²¾B3YÕ¯{ƒÌ)0aÝLAi™mqJdN¾®anšéAsgƒÌLd>óó`NR±~ÎkæÄ‚îíÒ'\–®úŒ4_™O-¹± 0'×Wƒ‹féíž™Af>7w\f„Wž65ËÌ<“mTföó;`Û¢2íwìÓ@åÊÌþ³€„EåÂøŠë†²¨\˜2g‚A岪®-(Öh¸Æf0™vÌûðLf.Ä~y[XLæŒîh)ÊL‡{¯«Ádêˆgt·…dd–Å@2Ûú4Ð[L¦ýç¶-ZP¦ÃP]Ú’ôY@Ô€2³IÞ{‹ÊÔ€O°2¨Ì »w¨­…et–—\ÆlîÔÌ—‰ýf 0ÓHuN-8ó•Ó¸k±Y’%}¸…f)ã’.uþA5w~óM™žÓB\óÌ쯗MÒÂ3àè¾w6æ¹®i§7æyc óL3Ô´HXæ¹­Æ ïL g<øæ&lØfÉœ–¹SäÆ3óù } ÇL/Ä™ûÄr̺7À,ž ÓZjæÖ̆c¦«Öz¸AXŽYâ+¯Ð=Ë03ió͆Y¬¶“ÏÞfÖ¥¸þ_^¥ª_΋WæÆ/ó˜NÞr°Ëß~¥ Ù¸e“Øwc—ãêÿ²ñË}}¥AfŽàrjÜ¸å ™b/^~g—ã,°±ËaµòY~™@zeX·ÐÌ C³²ƒ…fZ€±þøqËmÉŒj‘™Nawˆî†Ì~Éül™i&ñnÜòšfÃåÕuÕrË…Y&ûmÙ岊{¿\V‘ô`˜;—x¼ÓrÌý60n°ìÀoL÷É3ßÞÁÓœE|é^Óœ1ûãe”]ž†DaQÕÉ£’2ðh)å·#xF™§-óžà#=y¿3—¯›=`èµô\¿•=sNr¬ðýõq‹æ5Ë«çú lšÕñjwÀ‰Œƒû¬M³Rކ[¥|È~úxYååLÞçÿ‘­»ŽüógÒÌ)K¶Ñteœ}²Ë­Ì¯íû[›òaÚ´ƒø&Ö¦|7·¾Ÿ]ü½Æ²íû,þJÞ¾ CÒ|”èÔmÞÎÛöýæ½Ä· :åÏÅè¤ËÏ-þ×9KaJ¢ž<ūnjœ[/O9·nž2rn݇¶ñK­‰¶EŸC«ã ‡6Zë.ÊehÁ=ÎjìŸmͶŒÆM¥s B[•Ò@J¹|0ÎJžL?*Å”r© J3”áËPÛf½=륚}ŸÄËTSwFfÖ·[t;â> h2ººd’y:)¤´K3$õñZ) I*G)ñRq|+~иÙC—‘âh¬6Ѻ:Ä£‘‚ÝE»²ä“¤ ìI ¥Ý´šúfuì;Üý±Èœ3µc>‘\RÕ-Øi ÜTÆÝ<Iô;¬‘Ê™mÐÍŒÃÉ™Ú6LÏ‹ä‰éŒn•¼Ar©ìygµHNûçRñÌB¹Go¢…ršG{Xž\Áœ¹k]Ƴ‚9M‹=Ö:Í1‡ ÎXIýZÔâ9šK„ºÁs>¸$y³€N‹§¿¯Vƒçl»lâ;ž‹ùdsABØl¥µ\ ~-šKñéå\0§ëû´¤m`޹”Ì+Xè6•®š—º­npÎ8È,¸Áy1uð6nÜ9ýtãΙ2uæAÙtœÝk¶±ç ·^Y#Þã²Hí5{ÞÓs ÝŒ¡eÏq–.ËáÆœ‹£E5‘wæLc÷d™ó²Dçí¼yZBðvÖÜd<ßyó°&3Ì9!;Æ÷Ìy\’ÊìÌyX3çîÜ98·4Zö¼,và;_‹OŸÜù¦y²çî.æ¾ñçmÉÐs²çáv'=ô;EíΟW#@§luG†î€î^‹aà¼Zšß¸óò†+ïk¨æÆ•§¥–ÄΕçõÒܸò²¸íL9Ð1L+îΔÓuefqÙ¹ò~%!Ú9òÅfgÈI~“4•!€¡ËQàãWiÁï~”2 ü¤AÇE«~zÈØú¦÷Gô c?.s{¢Ÿ5XYçt+wTÉC%ìœd}ky}y¼Ûk†º¶óšgM¬cŠcOÇ—wÝ~Q_ê!âKº5Š[`æqBÚz£üý…öU€ ]HÀŸWÉús»zܼ:Oá}ÞÙ´qÆŸ>…ë|4ÿ©T1^JšJ Óéøóƒ³|ÃÙ¯þõA8e6ƒpô{ýùÁŽYêèVþø<ÙºÛmå9r’'9ÙN%'éø"'Ûï“ä$h0ÉÉôû,9Iǃœl·†œ¾|‡--´Äœ]D)Õ ãý­¡¶ÇrŽ«’{ãœóõVJÓ ¦b:œÏ™\ëáÇzaJ(W=+~„ÏôR~Y/ԇꮼ³SÎ^šéåW¿öŸ<ùOø‡¥Ïï)[ñ¿P?}¿_¿ðŽ‘6-Q¬”ñ)‰ÅPYNiIÁ?Y‡ò›_ÿöwÿû_ýñ_~úáH¬‡1¦Ï®AeÞ#Êû-HHsÃR×@Qù³Bÿø§ž‹Gõu©nT¨‚ÀªB&cm[òƒ•HÓ‚JÆMóƒ9ðNŸŠ£eÛA†Uq®áWò)ÂF¬Ø‹j.ºU˜-Ü…–¼\ð"{uMÖ)Àe©ÎÒ!·yQ£ô%GX.Ò¸ê@5CpK‘#y®a N›–a™YëfåE#Ü]å(3æ¡4ŒªRœ ÄXª‰rbÈÕk”ÁkŽ0Z†Ðɱ‚|Ä&¼æÓ‹Q•ªýÈ–bâPRƪÐ àˆU$ûîJG¬TÛÀ˜6(›–,a|Ì嚀܄—rGlkу tæqÉ–©âð +Ò*InöޱFYü¸©ÔeAH^ô@8'¼ààC¢Rmxç½dx§ S6=$MÖ3Võf4_m9ä%YX ^ZDgòRñˆmT(Q‘"mŒË–â5 ÁDx e©xĶ šÐËeI–Å*X[õã¹¥äQ†`ÏãT|uÉ&ïÌÕ7l•´-ù²è š‹L¬.5rª¢,è R¶µ%cX–ä_¸”ÚRóˆyàuÚ][r†eq>OØ9t¡ß5²”  ‰2*ô%e¯L\GÓ§d®ë”fÍD»{‹8:ýªÌ%„BÃSQtnÉ–ÅTI$^ÚÚL/ÇÇj-®ÑnE¿¤ Ó mAë¶E¦^›ôÊ"wØy†¤mÉ&£ä5‘å€G–fºêsÉÖÝÑ\1,‰Ãd™ý`DËÃ’9,³¤hpµ5ññŠLÀvUè’mÍ.fY“¸$ãHpJ\ó£­‰ hPk¡ï¦ šPeaH9y÷QÐÇŒ²µ!¼:,ˆhwÔ.ýņ*ëoA›ªhL2%9:´ õ¥ØÈDÜA»HʼÓ;hcÝ%f¤SwÐÆÚ¿h}”´ c\©Xômí¼”n|± ] BŸª"É Ú|¬ø– î«´Afø2ÔÈhó9PJñ¥î˜ÍǪï×ÄWÌæÜp)õìÄX`0›8ÅbåÄb0›Ï1 °Ç–Ì&ù ƒ/Ù1›Ùò@§X)`6-žæfÁì²–½³Í<ºwœ*;-d3Ô+r¡¬ìB=hÁ”j¿![Õ>b¬'dãeþ²Üì]Xþ;e¢öÙ,ë1/Òž…ì"¹ ‹KâVa![Ú’÷>Ʋiî^ö€lšq_¯÷›ÁlFéå|k1›¹‚CÅ2ËÙ1˜]HA8Y¯oÚ\ÿ ²¡ Í-ŸXäÐF[¢…-+¬ÐfÂÔ‚mеî Mg·ž ¹“´+Ù2|Mñíº„?í ÍÙáôe¯,h3b‘q¹·´«úûâ솴+3\\1l³%õcöä,Ôæc F 'j3ò1^™êUàå¯AÀJÞí^wÜÖ3Ú‰ÛÒq#•t7gZ1 8ÜYÞÄŠ^'pZùÅ:s7‹cß…¸yQ~†]ÃwŒ/¥¶Räö] ¤€›Ý‘;².µÀe:‘w8Cl éžÈÍäÌ©¹.Än‘›ÃÔÓ;ËBw ¿VÆöì6%ÞvÒË Ý˜]Ž–nsë¦sœÁmâ=ÆJÉ;l—ØðZ'q‡m¹B°«ã:6°Íic„ÍEe§WÜ.’’YÔþVÜæs ‹ep›m ÷|Sc»âv‰àhj¬‘V± ·YÀŠf³Eûdp›Š{¼ üa9Xm.0K`_lñŠÛòVïÂØ·q¯3Fµô¢ÒŠÛ$-±f&íoÅí".SLBn;nñŠJô,=xí"µA=ò†Û| .؉·Ù†»´àòŽÛ\c&þnzR nó¹°²QYÕ·µ t¬èµÀ67oC*6°Í§œ.lØq›‹"GÈË( nóU3-À;nËH z5È nËb¦) Ü–jg˜xWÖÌ·®fkÓàæ8AF8[Âcàæ;A*½ÇÜü9(û1½ä/îȨåŒ;÷Àml=îÇŠ-ÊnSÛ€«ã¤Z- ŽvŒ07VK‚cÛ|¤ËWÜq›Úì=0¹”·ùN|V¡ÆêH¤Ì=¤æ6!«’„~b£Wï«$Á !ë8œU’0;WÊt²;jË !”]Èl•$¸ ZçÐf•$äÜ„¤Ó†ÚddYÃöbœ’„ù H@Mü¬’„{jLC—`nf0-züÉpKzðé@n¶¡¯æÒ~›jžDo–z2ÜLY¾cl‚e¸éó¦µèj†[òpï3‡Ž$a@1&¿Mm€¸"ÞðÛŒ2‘OÊÁo3|Ü 8‡ƒßç÷]лѪH 1¶/YhW‘ày\ßoømêŒSͽӑÐåHÝæ~›: NëÎnSP19õ¨Út$IÔ’X¾7:’Ä$_2¿Õ‘0ƒƒá`·©aHrŠÛÎnƒ÷bÁŒA›Ž$O.cÓz0Hõ®Ü4$& ºr€¶´ÝWÒ¦! ¼|î¥Ì6ÓÉ£³<ôVCI6­ª3Ϧ#‘0PŠh7 &ŽQå±&IÊÄrÖÛ]~2ôSR# ËL«íŸÔŠÍÖ'ýdÞõþˆA¬_>÷Üžè'åÕOæþö¸Ã }Df¿óËãÝ^3Եߞ÷“S{~ŸL`‚’€Û7Y}ÖO&Òƒ—¸µ¸ä<í'sŸÂû¼?²i%,§ýAœe>EâÐUHx^ÇÓ鳎 }ñ“±ý>êØÐW?ÓïÃŽ ýö“±Ý>è'³’íR ø9r’'9ÙN%'éø"'Ûï“ä$h0ÉÉôû,9Iǃœl·ó“ 43“g¦–ú)?ÛÉS~2¦—Çüdl/OùÉØ^¾ÊOæ ^(=¼Èg’êG¼PþñA/”ØJz©V²X4·êŃ$=¥•"  ÝT4:æÐ­)©~ˆ9f@M-в˜¯9“;f›L‹î¦ÀJ•„¬á8‘¼Ñ¯Ôµ9*÷§`œP ÍÞ/E–eõéÅ E 樊Wæ9¸, ´‡,¦^( áH⡦’qYn ¬L•×} ^T¥LŸ¦ÀÊ ÏÝ¢ßf ÀD1ž*†^*¹‹§BÊ/Ñæ »õ+Ì~ ª,â’òæƒB±¯åÊ» ÎMð¢—MLÙ>%V‰¦¦ET©,åêØG¿CŸÊR®ŽÏ-öÓÄŠz·ÄÊàpªñ‘êæƒÒK.`¼eªñA©LWØzV«ª_aJ_,zÅxjFÁÂÌÈyQ¥ÉèWÊ«CÎÍQi¤mú†û5/&žÔ ã•z¡©PÚŒ~%¿h§¥[…´ýJY Ùm.(1׊?tiÛõ+ ¥R#Hv›~¥âzÙîì­~+ òýDö› NgŠÐì—²u21äŠ ‡¥nu(8m—‡S›Šk´m‰(‡¥nèzz®>‹•7ÇÍ ŸâÐúçx¯“[ÇmøvåhT,t£a)[iJKí:ñ†©ù2{‰gõA:%ÉÁÉ6£aÁ!Æø“ªåÜ^AHB­Ñ-ýåÍ ›ÒiÄ–¶¥|´5Ž–¦&$IÆR×ÕJð”ʆÚôƒ¤ýPkƒÚZC"…~‚6ì¼ÃYïÚ4tf¶!‰g”mêà]ÆEÚâMR««¢¸4˜M+›¿«ÞÐæcØÒKi@[¼PJ¼[VЦ-0ò 8Ñ¡Ð6sêQ‚ h‹JhÙ lXA›ÝAäÆiÖ€¶Tt¦û ´ùJðrr%ã…R˜e@B ÔnŒy•f8P›u<€Ñt¥9P»D&+Å};P[žcÅl±úYÔÆ¿à†Em¾ƒ¢ÀÆ…µé­+"d½ êæ†¢¡þm†þÆ úÖ1ÐÆ€ñ@½… j3µ@%O,jÓ›äΓcQ›áÃUlzí@mŽ%¹2>-jKÎ.šÃÄ3Ê¢6 ÁúÀ¦x 6Þ™b½4ûµé2B íÙï°&qÍQ©ÌÂ6=}pNËÕ¶Âv•zà—š×Â6ýÌ\-]}F,lW¦Ým*_fa›NN,àWK:`¨Q˜–§m‡íJ“ 8M–ga›>D©„k5 nËP긪-l³˜+®{á-ls”˜'ÝLØ&¶a¯pÀ6žÃ+‹r -lS•Æjʳ©@é_#ã·üP>nδÈM˜Åœ|!Ú w QÍ$ºñ>Ì.Ä1Üßfå󱑿MBºóéà·Ëzȃññ›×7ÆÙ‡KŸe¸éIXñB¡(ìÍH Â+Ã]Ä};fñ\µü6ߨ @·´yç‡ã§å·YÜH­/»Í þ‚[ċ׋a·i(ì¹Óûàà·‹˜•Êcº1µ¤»rB7FÙÃåô½A7/˜Ôh¶!7‹„‹W<‘›&_.ÒŸü6ÛÚ´ímÈí_šæÛÈ;Ïd̺(ŠÜü[›Þ͹i1v4þ §g‘›bÄ‘Rr5È-Å ð˜æe°ÈM¿È$%õDn¦‰O‚(rK±{¼Rm¤¹%Ù­JZ~Rs[Ÿ4h¾ëý Íå5—¹=ÒO] š÷·Ç-‹5-Íùåñn¯êÚÎoÏ4ÇÇž¦_`ÐL,ÁqÅJz4é‹‹'›Ñ€;ÍûÞçý‘Mka9ížÂe>å&‰gáå¶@ÙNµ@IÇ—Êöû¤JÈdZ L¿ÏZ ¤ãa²Ý>hÐ\Éö›4¥ÃIN¶ÓGÉI:¾ÈÉöû$9 Lr2ý>KNÒñ 'ÛíÇ šTR99ì1ƒ¦íä)ƒ¦éå1ƒ¦íå)ƒ¦íå ƒf‚PðbJpð_0h²(æjÐü韞2hŠ’öªV¨òžµåˆ™aÕŒ‹‹c½ÃpfvEêª 3%Q5Òôy«Æß%U4Ûâª_i²ƒÅôBõÎ,V—Y‚LÍ(Wâª_©¥:³Vi+ÉÏüŠÔæTˆMetF€.Ú•Æ‚—Ci¡ÄåFŠº\4)^ט–’~%A¾ Ñ·•œï²u õ£kuUî’Û]¸Žm˜w/q_ŠQ°ä×(Ù+ã,ùN¯H‡óNœöÙ´)X2vzX4¹]WnEqÒv ²^×&£_q¯ðnµ²à.ºk׉Ω©;3ÛšQ°0½‰F¦K›1hv‹b{Dä.­Ý¹ÕÞÅÖ”Vúª`ásMë@J[¾ë×IFwi¤JWK•B¯Ã0Y©¸¸È•–æ»4`uF¿’hê¼ÔÕz‹‹\ùX†¨­ê»8£æ&,oô«vMÌx‰жv°“ w|u F»BϬ\©·¸¨•9  g5‚Ö`”+K¶Q´Qo1©•…û4%k”6£^aj?Ýyi3ê•Ĭ8\±k2ê•HçûÞšè°kÚÔ+ÔþfݨɨWâ’ݸf£^É/œ _–¶E½Â™g-*M«z…YóÁï{ >©eÕ®0[挢–U»Â°jÜ ²(eÕ®°T`öqy­«v%‹Úê2ÔºjW89@FÌÂUÔºªW$²Äæ´­©ã@b3žLt¾A6sT_¨Ý1»Êú”¡î±˜»È§†-ÄÅlú $°&J ³ ‹:ÞË`6¿`×&0¯˜Í #Ì»à5Ðzmš3±Í¾t1ÉЖ1“ªv·‚6í™ ( [ÛA›m8µ¨ÅÏ€6Ûšï,Ð[wЦ='‹þym±uFfI~mî ³« ?ƒÚŒEZ6ÔfÜí±¹‹!΢víÑWÔnR ¢·(*A‹ÚgöÌØÔndžB†j‹ÚLiŒEw‹íFƒ¦I9@»J®ÑNíåÚø£‰´RNÔ®’À|XÄ,j£‰€1t¼µ+‹>.»‚—mVË“w¼ÁmYF1¤·™Ž™/Ô±[ÜæRIIvܦW;çRżkq»2±v\<·!5hŒ«¾Òà6³K‹8°MsfǦi> Û<âÙ]ñ¶і"8eµòZØÆj†ŠUý¶ÅíJß—GvƒÛD`%ˆ¡¸Mä,ôú ns”€—‘çÅâ6%ß'Üâv£õÔe_õ1¸M£x½2†bp›ˆÓ´é[Ü&¸µ+U¿J“€&ɵ?ä„òqk¦EnÞª%µˆÁí6¯åÀlýn;IJ”%{ô†Û´bÈÀÜXO^MÂ-­öë×n¼ À¶ ‘½”q}úÈ|ؘÙ$³‰ÆSå£Zù¨Æ¶?iÎ|ÛÿŠËk"?®³{¤£^ƒæýíqËbÏý6hÎ/w{ÍPw~{Þ 9¦¨ý^_¾Ê ™‹ÄšSf­áYƒ&çxG„ U±þV š÷9¼Oü3Å|Ç)þ.3Êߤ)N”íôQ”t|Ù l¿OÚ „P¦ Êôû¬ ª4g·š4ÂÅåÿ-lšÚã$(Ûë³Éñ¥ç‹¢lÇfÇD˜$e:~8=¾s~Ò”í÷cvMü‰…®äŠÖ3ln½ÚÜ<íl±Ö±ÀÝpy€er]B{,p7ÉJÝ£ŽÁ·XoÄJ‘àn4‘cYÕSÄ7}+†dK pWroØÀªøe€»±BG©×c¸i->XÜ&Ï“ÉA÷ràvcÅ–>¯3Ü~|fzé|7Þ‰7‚û*nó•q–¸´À]yÄû•PØw¥w’c!‘·¹’8Ÿ¸ÇZftq,nWVˆè­k–_‹Û´[×Ì­|à6Mè´ÿkÚ] Üb€ž%n,p3lG%?›ÓJ3ºèA §Åî¢Y݉`'Ï]¤rÒp‹ÝL¶<ü Nž»1„©¸ÞÊÁs3´d† ‡ƒçf‚[³‰–ç&4äNŽoç¹›¸l^»·ñÜ’7b>yî*¡ª—Ç„å¹+oráÓÉsÓ} ` o˜nÚQ°9AÝñ,Ó]Š’WEùxËt3 põ'r ‡/.f*ܬ,7c_ G[mXnVð]kuXŽ»H$Þõ˜å¸é½ ·¹y%¹c/ù丙ª˜¦d=A–ãn/ÅfE|Ër£í®™p±Ü<ŽÞ¥qt,ËÝéËv JË­Þ #¦wc¹ñN#¥‡En‚™K2™“åfÌ2ø§ «e¹s;B,rgæÐ¥uMo ÃrÇ®þ6¼Ù,rS:¨4r§Xäf VÇÐJ Ý"m0ñEltó‡àXq[ĺñN° WÖ ÝœøUñϰÈͪÏGGf˜åàPB/ê¹e›•« UIù`¹5¹Â¬nTÁ×tÕfn,7¡ÔO‘È@·øC3 ú t3••Å¡èÆbB¢NyÈ/ºi W§zóÜÅUBI_Y«ÙåifþVÍœB~Z®™?x´^óÛ<£ÊÌÓÒyO𑞼_MË×ÇŽ ›ÅØyûûÕÜy}ÞÞyÍòê¹þ‹'.&z”CÖâY™8ÁAÂ!*ý Y<×yŸþG6î:ðÏŸH3§,ÙKÓ•¬öÉ.ïζۇÍTy1|n=?k§Ê«éÓöü´¡*ßÆÏ­ã'­Ÿ+ û.õ~E?÷ aI—7aÙnŸ%,-hs–íùQºTƒ°LÏÖ•¬ðMÇ3F9Ôž<83n½÷‰e™EÛƒÆúuï4 Æš£5­„‹A”>Õ3o&Ú6‹( 匚7h¬’O‹A:¢WH-_Á¼§D—\Ãò*Ë£© Æ()R3™hÓÆÓ*ê‡ͱ1ÆŠéíT¤W ˆRˆI²j‰MŠBoÄfvMxLs³¨i3(²ô²>BýÇEÈ"'«u6jcbx’²Ú8ƒtÁÓª©a$ÉUõµyUÕ$©•G!ñÚIÈ´‡EUòjc•ìZJÈâ@0« A2r¶\:pé"²¨k¨aèèHKÒ¦Aé dšÛ$2AK$újl¤¬Ž%ñ^—­#) !fÝTi¬R4hÜi ìjÖ’†ÞS2i™^Ý.\9BјlZ[ÜiWl)«Ík›è¯AQ F¼vÒr’º¬~XÜиKËk \ÇÓ«Mm ø ¼&+‹qJù ¥eæ½M³Žq'-óê§ìG£1˜â„ûÀj6¿m(Xªq~Wœ´ÌĸÁ]é8Ñ(áoƒ–™/øË›ŒEs8ÿÑ=i4Ý¡žê€T%êþ€zÉðy/›…úÂ2H³˜ìõ$åŒc×[8 žU;ÃrÏB½Ø@»ôzB=m®@ÝK7o ž…¯”;ÔóJ×܆C“ÕŒõ4Ü»¶C=/4)`ëk8 ^ì Nc½°§"|Ì,ØóÉÞï%0`/É~‡Áó{J›FÂÄìíU.¶O¬o’.þŠ{Ú°¾IÑàžµâî†õ–Ö,Pß$F'ÊÇÔKàV ëÂZ¨g`ê]~ƒz†BI¥Eר[ꕜsCzR‚zÛ„éiG)™%¾Ó‰ôæ”o©ôÀÐ(ß é*1‡õ Ò“e¹kùnHO‹ª¦6ðÒ³Þbî—_Á†ôÁtÐLBÝŒª85#¥ÅôLBrÊ6 çúÜ•Ð6 ¯â2’n,7†UŽ‚Ç}f€žªu Í05m@Oˤk=Ä ÌWãj¤I#Ö>.Bô@,mMZ=}zî—0JUèñÚ.yŽ•ª,Ðóµ˜rzè,гÑÓEO¹A ôŒn2LÒ ôUÊôâ¨ëqUá•IŸÊúÀ“5>-Ö±‹+ ˜O¬§m—ÕБnXÏš©7ššS6¬Ç‹°Wô>·l}“ô˜ PoÙzŒî"«tûº"Ê,WÏŒ#1q[Ϫã˜bÓË[_5: Æ[/~3MéÎÖÓ‰~%ßûþ¢û3ùŸŒ=Ӗ髵1öE|[\LöÎ×»&If¤—Çœ¨ß™úÌÃ{U]ß™ú¾ÖXÜ€^’ù‚ ÛÅñ¦^2ª_…Þw¦¾éë¸{7žžH,©é OOrHmd°<=“„9²¼áé3eɼð–§ÏL‚ÞÆEfž&HÈ,Õ  }&#q¦*ßa‘>‹˜xuœHÏFõƉoxúNsûUç~ãéiÅ{‡¬¸ñôÜ ôæqí¤çÈÅÝ+¶7<=)W]hn ^ê£Ä«õõÙ¯åîwž¾ÑëÖ¥±Ç–¥g‡ ëcžšòÐùdé’™Þø Ò£‘4ƒÖûÝYzÖdÅ––öéI8æ]S ì,=݇ãåv¼³ôÌý4*õœ,=S€çk¿ŽÀ–FnäÁÑ“à =M>X´b˜dÁ‘`ÏYf '’Ÿ´ÜhMüÄVýô9öMïhUÇ<~\æöD?àE7¬sº¾5Éÿ¨=oÍ~Ç—oÐí5C]Ûû›Ú;¦8öôúò¦Û/êb=¼|o⛄'tfò(,Qó×6Á²Ê7Éé|þ@W» öW¿öŸX —ÑþágͱJ} `¥…€ ¸iÔ î=ÓMoמ?ñ¿?ýŸŸþÿ÷iÿ¾ nÌy„pÎ<ë|4Û«nâZaWEœ}öžMÛƒCˆôùŠzÛîCXÛ¹ìíÙ›å3Ë^Ÿ_ö•¤˜ ¥ÜÆs¤Æ7R3=R“!XR³CxžÔdç²?GjXŸYö7¤&7+Œ…Ÿ{S­.-´¤n ÅJÊaç=ñõ&cÛ‹ãT°)%ÖÏ™rý/ëì5¤+pØÔ±þíÒ¦ˆçáEVœ)X?ÓKüe½àåÌ• œûŸmüþK<Ã÷ŽUC)ôUÜý·…úÊú _î…®›Ç<øžJ££åd$ðOÖ¡üæ×¿ýÝßÿþWü—Ÿ~8Æ´Rçx˜þ*Tj4ºÊG­“‰žþk¦ëÿúÓƒX}]*BÑw>Ç”—@[r¨5ލ€Ðè<ß4‡šKXÒÆ"'•ÅDnt×}'·ÜÜ(¹ÖŠA-¹?P)¤/ê°¹?´Ø¹ñ@r¤/„+- gÿG"7û?â•qf÷Ç+ß;iv÷‡m¤•äp¸@¤O>qjæþ/ÿºÜ1žç§®Ú˜Èè=qjô‡höD™S£UZ$ÌùˆS£FE÷{!ÿGÕJ:y‘EâÔ¸ÏkAÉäÔ˜g~êˆOË¿2qcw‰P|oG"ÔUƒLH¨µq¬½nL¨åÙŠj~‘H©$&oÓÂÛȨñ³«™ FðB´ŠQ‹L©1æGµÈ¨ñLPÄož«ˆR£á{/¿Sjä踃†)uÕD/ãȩћ ßË]EœZî$¼2Ì©aŽ\Lªõ’îV R§Œì1©Ö½L÷º"Õ•N›LªÑZÄI“j<º^9&N­©@v, JfpkêÉ©ÔiÍ?~9rÑTCöC7í¡öῆ¦Ñ“‘;?šýˆèÕW”‡¯íÄ<)ÇÈõ×BhP8Àæ|`ڵ±·þ×ñȹÄùLŸ?¹ó ® ¢"³ ÿÙÈáp¡ðRý• ¡?¹óÞW‘@}˜ö׌Üq$pÌ9b8å ˆs0B(¬§hZ¿~ðÎBÜNgþD8Eÿ¹Ã·ðpо"wxæ³_–Ÿlû"w¢I‡ÎçL n¦F3ÀÔôØÔøΛšÞÂ÷m?gj X?Ùö?#rçѬç¬9 §"wxŽS‘;<˩ȚåXäÏò»"w~#.FÎP7ªö÷AŽÿ?.æãbÞNq1täºp\×¼¢_Þvüt¾í0©ËÜ£w ÚcºBsÁv¡YñÒÞÓz ÂÝ”‘A2ÜM¨­q¹•q·Ÿ‘¨D+={xL¸äCþ!´?nKQKû‡^´X.…ô†Ö‚pô„p„ˆ®d¡8ˆùw\/®d!_ õHà–L)tT-Ý…'”ûrÑR­¹RÚâcJ2ÇXÊ[€Lp~¥¼Ȥ„¤kü®i ØÔ²nYÝÔÛR }5¬Æ]9©„¾‚X@p•'´?t% ÍàŸ%£§º…ÇÈs[è™ê#ç멤:$WdZ™×:µ->¦¿KLH$W„¤¬ãzj[t Ú®Uõ=:&ûÞw’\dº^ê[t ºÀO÷D¾öè˜noM¾HtEER©cŒTWôe×:°0ò|GÕõ¡én’]Ÿà®Ïwh+¨‘.ùŸÐUPeêo~¶ø˜  ç'4Ô±²"ÛòKªëÝœùõ¦‚Xuñ¸¦ü’êšbmüœBSAõẠ¤ÕÌVñˆ«‰iZµßm ©Å¶+‡®‚ î6Ób­n«Ð0ìÝeUfÅM ÓéS;VÒ€JË×t*<†ÑZVÝämR°ÛÑZ=š¯Ù+¡uAo’š§{šáZ},â5ÂHà²_øñZÕBÓï¯ñ³&“F?wÂë†2 d’\å ÿ<+p"âµØ‡Ø|]ñ…„×ú*æx%¼F)é×DxÝ.d-Zx¶ìtPî°ñŽÝËÒ7ÀÎH1]ño`W4`ŸÀ›·™ ‰op-ï•Ǻl€-Ü¥© °Q_ùZÑ[`#,¡W»6¸†DvµeD ×)-¼®ú3QkÃk@ß ZÙນ"õ ®Å>ä‹14Æ ­…‰»ì±¡µðÝñÉûÖH¹5M~ëî¹ØßÀγu‹±q ÖPžžõ>m`-Û»¾½VWAV[ÛÖ¨$cÒȆÖÆW©„ ­Kd\£du]á® ×H&~×åÝcd<yCëò%¯×i­;é¤Z×(-lh]¿f¦ ©KŽÜ€ý, iu‰ÞeWëÜLóÙ±ZN€A™"¬Öýp1”°ZMíÅefy‹åݘ5ìÉlÔðbavµf†ÉÔZ -ñ†©µ Ïc¦µQ뤵oÌ:—6ã46býÄgº1kT3J‘<¢í7î1þ}i9…”²ñ ¦¯vÎ0€'YÈìRÀ0ê:“À0 X|-•aZÆâYƒ€íLÍbœnÏÌ1˜Ý@ÏË:–B·+œ¡µÓ†…2D£±‡‡t2F#Nýƒ{D}7FøÜNÂgôiI&W2>ÃßîIŒÏ¸ÅjÁu„Ϩò88á³l˜<²õAe„Ƙ00Bë˜í#4êçôõeb€Öf-¦œ2@k7™¶H tíá2@£‡×—c€n¤›nt• —ŠÎø,7™/á3ŠJÉÿf£Æh«.è3H‹†XniX©Ç-ü²âZî¥  ã÷ðÒ⿆Ñ“Šëf?á¬\ëøgXÛ‘yjT\×_>eI®¸Î?>0íZáØ[ÿë¸â:—8ŸiúŠ« ˆ†°ÉÁ åQN*®rÀA¢€ÆwøAÅ5áû˜Ñ%E>¹IÁÕÀ!çˆÝ´'ÎA ë)n•gnSÁhæ¨`z ¬‚ñ-œWÁô¾oû9L­õ'ÛþÁ5šôGWL¸™ÍüSÓ[`Sã[8ojz ß·ýœ©)`ýdÛÿ ÁõE¯öê ò”àÊsœ\y–S‚+ÍrLpåYN®)U-LŽô¯ß\Ÿ—×ý^ðÿ ®—Õ:.ü¾¤9Ù'º…JþÊždƒR¿V‚³iÅè•·ƒúÁîÁGH¹ûpP²Ø=ø¨¹ïI…åÍÞéþïöš+iÝ…å˜fSàr™e Qø t«´]Rôà_²xË;Eh‹Š—õF ¢dr ¡˜Ÿ%W‹åy£C­¦P—Ò\ró>‡›œ\ ù…Z\ø9f:Uø_–µj–«eÚÔ‹|øè ¸|°Þ—e«#ç}%,Õ›|ø)ª£õ&>ZæeÞ—e«êU—K¬›|ȉŸ‚‡¥Âù2MUeŠq}ȃ7}9|ž—µgU¬õÝÜ÷¯©¦õ%ÿ=6j©a8Á÷}Šn¼šÈ}OÚDMä¾O±FÍ侇Øg Ëì½+¾Æ¯6±µ»F[ ¹ïsLsôîû'ÎVÈ{ÿjÊøtÂ! xïѵҴ Äæ“ÿ¾W×]øÈ‚¿ dªíW›RŒ½&§¤VÆi¸á—C}Çi¼ìVÑ„aº—È0Ý7cd”F>^$Bi$º>Ë((M‚P¹Ï5=í Ò)ÁÉoC¤¡Ü>&ðHc,” ÆX 7QZËŒŒBhü{+š²!4dÛFI1­>2?¡Û]caBè6Ú‚ÚˆÐú±[² ásƒúâahŒÙܺ]ýkt÷º¡Ž³Ï ñý |4¾œ·gk@ãcœ]$„Ƙç°BËMÊf˜:LÝðø-Bè&ÿ¿ðÖF€n÷=õÐXòÕf‚:#t/!4~åáAѺ#áA ±àú'”ãyWi<°!4>ãY=î3ßð¹…‹1:÷`xn±>ËÏ Ýz××™àÏRì½¶0³Žyœá3Œô’kNÕ‚ðY×ìȲás±7@£Ú¹}oÇùÑ·â ¾Ê]/Ôx´#´<¨ÿ…ÐO‚­P<†èû‰ª8côUb€ÓƤ¨+ž Zm! f€h …º „ÑÌi£Uñt¾¶iˆ[ö9Þˆ4JA™4ÇLàµØÐÆ£Ñ%©ü¥¡Ù„@+æÑMPÿ¶ÜÄ£âÁ,Ü…y4Úf¸6ËDFè©’L¤eÌk0‘F°U­it-1’it9æÌ¢!²yÏdѲŠäÔšY4Vh¡Ì¢ëç3¦Ñ¸¤U.`­wgÛÄ4z¼ÇFɈFW°^Ëòe¼JK¸'F~ds<#]é„@D9”CðeŸÈ¾Ï“ÑèúD!•y4úµyµ&æÑšdmº'ã4ãj™”Œ»ïwqí'¼ã4| ‚Jkƒ‰FëMúˆF#U¹[82óh{îu!+zq(FiÙIyB“É1Hã‚Á°Ú¸oùà/–ùË«ê*› ÎpÌêýã'EÖÎÂC¹òϸº#ÍrcUë¯è=w—Yç˜v­pl®ÿu\fKóÚ¿GfÍE3ôÑ7°>geÖ"/6ÒåŸGÛŒ|NfÍàHI@ñ’ð_Sgu,pÔ9ÓKz"Íy,+ÊŸPZ1á&ÑÌ¿ôXþâ[8/é-|ßösò—ÚëO¶ýJk0já&ŸZuÆÍØhêOt@Ð{`kã{ø@ ½‡ï;°‚¢ÖOvþÏ[åŸÐ­å,LJSzë6É)Áu›æ”âÊÓ“\·iNh®Âº]Ï8Wÿ†æZ£æúŸKrmoškñ°^ÛtEÍeÜ-оqw½ºwë´¬žxw½1'U+FWQórÄZ;Ñ]E9Ö Gu¹7úIËk!ƨ”é¾"Ôdµ2h ÝÝWTBæ,êd_Q¥á1£Püü¬³/êãOQŽÉV ‡kW³pø5´µ+úŠHh-EÍõÎ͆®Œ1wгZ¿‚äŠô„¼ò‘ZOQr}b}:6As•£¶Kµý£š•V-Ý~¥(¹Þh8·«~Õ(¹Þ±Fy¿¯(¹>Á€ ‡ ¸>bæ5”ÿϬN¬ÓÖÑ„Ñ×ûK±‡´Ûу1f¹zÖfj”\ïX|¬¿WÔ\Ÿø8eÏ8Ë5TGíè¿èšëýu¹@ÓÓ5WÙ*Ïë)—>%wt9Œi#òН°‚ž¯¨»Þ¡flÏ) ¯äÓï¨oêÆ '­eøõrEéõŽ™§½¤(½R z/5ºöq‘%t´^Œ¶ê®öš‚oÿŽ&Ž HÁ·G!©ãHåÆúĬƒŽ®‹”“íañò¬½6wèö~Eí×4·m‡*#BDGð…Û+z..Ò Ðýî)õ•[án¤8쀭’¹•©cÀ†Cú¶. ØE»£,KgÀ†ŸÜ³~°ÑRÔ³D°GzÐôÒG¼Öz¨“Ó·Ox­ÕŠÅ<‡Ë”ðZ«Îú#%ÀÆØÕ %#`£€|PÚ °G¥†åþ$¼†Ü‘­”4Á5”¾)Bp­cÞo"â5ô2ùù*«Hx±×«©^CŠªžnDx­:k±ò„Ø*êzYBlý ;'ÄÆ.Oå"ÈFÚ˜ űq­7›ÒAˆ­ê±G b«0T×bë%‹uS!ÄÆÏBEBl½d²HBl\34Î&ÄÆï¬\7#ö˜n%`c¨yÑj즻í-"¶ÎfÍ>°1$OÖ>G°‡¿‚Н±¶'˜AÄkŒ¹TLx+Ƽֵy¨ áµ>´Ò ®õŠ*RÃl@ç&F Æ/B™ jüL5ž²@@­‘vÅDXjü.ˆhó`IŒè Ë`Î÷ä¦;ZžG6"#õó¸b¼ë‹½2NË;|yåÞXÃYjÐF¬ï¨¡0±Î±<ÈwbLjY·/m20žÛF­Ÿ ¶Që+¾3µ~b-ŒZËXئÖW¬ÒÀÔmšçþG^-'¢êqgÌ«[‰9ñÌ«q¹lEJ˜W·GmkÊb̬1æi¡Ì¬‘`èm¥˜Y7\Ä’¡™Z#vMnÔº¡®µbj­o±ç˜ZCw6ÞÃÔº¡,ˆÉfL­‘aëá7L­åÙ6o½ÄÔ²'G2µÖ`x«/AÔZ~äQ¦ÖÈAµÈ3fÖPÑ=#›©5îÒ› 1µ®=Dó2³†t¾êFíÌ@êGcfÖ¸K/œÀÔº]±o Qk\ÒÛ_1µ®•¾åD­ÑîÓ‚T™YËØàÊ`gf–éá°GÌb¾¾PLªqá“M¤ºŽ泎ó`Õ²·77ùõÞàòÉX¦œj¬ÞhŽÿÃÑþà?¼ƒ3>Ìl‚¬/ðÈLèšèЬýùmô,ú 3×O‰²¶È¹ÃþçqYv­rÍ\ÿ€0[/|ñåpñ ¸œf+ªL\¯œ&…Ú|R˜­·|›s¾„ ´÷¯)ÌFTp:b< tΣ­)kÚ´JœroNsD1ËßôÙí&>!™å¥ÐnsþÞüló?!ÒFó¾»¶’Îå:jtzÆÚÖMsÂèô&6£ã›ø€Ñ OË÷Í?htŠa?Ûü?C©-åESaÄBøO)µÛ$§”ÚmšSJ-OsL©Ý¦9¡ÔVôiªO‘ÕóBmŠ2í¿ÏõèVÒ†ÌiMˆTGX nAݲëÊ/Е&g0sqË`ô&ƒ°µªºJt(=ý è=ÏŽ÷U/mV•o4Þ»_ S÷Á¤…Ó^„Ü©Wú6Ï® ’O©ÈÏœC÷Õ‚÷_,>”ˆDTºQe%M•wH•±èÿ‡lz­œiÉ«ÔbgkLZ=m`Cßv^›g}­0ègÓû¾È³”C–Œ‘g iB檕A¹ªÜOº§¶|yÃL97DÝ–»É ·)jLò´ŽZ‘ ±É§à¡úš;>óJ­è´Œ‘vû†Æª@´¨Ý¾_u¢¼o8pnðægŠÁ®sÈ`ÒJj¯ÅÈ{ô¶¥28Ô[Ĩ›z+K»´¢ššðž-…[µmô°á: cºhdŒÜ'N»ï¼)¸!¹J¡àMVEÒ5r¬Z\MM»ÑŒMVå[(Ö³© ±zûx¬¿ŒÁ2MWkU»gGf'ýV®éÑ2˜¸Lq¨S)ƒU{YMã}ä½ ;Šs/Õ •Á´5ó^ìd¨r­âð*ÝZ<ÚL7…¶”PðXÄõ†2F*®\Óõ¹§KûYMÓ•ÁÛÜ·2ˆla3Ý'ä§Èé¸)¶]–> ¹oH—1Mï]Æ›¼–€Vmj57EO›eXUòXKoFumã;Ä7T×îîË?·:¬ÀC 6P.ô7Üèårç†çè¿ú}3žãž]c<ÏT¥qÃóü†NОãE C€ØópÏ›úo½.á9+Á 炳ëŒçCf oaÄsh¢ù±&Ç„ç㇖MÂx®u”=N„Wõ’áŒçøaõ„tÍ\DKÎþ Ï1¿Ð1_ É èšÃçQ2б8/~Èx®k3i”á·ñxŸlÆs Ê[6“·Îq!x†á¼)lØ;Êp.“s,¤ðÊ:"t•(ñö(0¬«Â.¯ï²§J"o‰Õ•çX£AuÝ€>hÔ5¯º‡/k#¥Wh`ñ­‹¨Ž­ó´> endobj 8 0 obj << /Type /Font /Subtype /Type1 /BaseFont /Helvetica /Encoding /WinAnsiEncoding >> endobj 7 0 obj << /Type /Pages /Count 1 /Kids [5 0 R ] >> endobj 9 0 obj << /Type /Catalog /Pages 7 0 R /Lang (x-unknown) >> endobj 6 0 obj << /Font << /F1 8 0 R >> /ProcSet [/PDF /ImageB /ImageC /Text] >> endobj xref 0 10 0000000000 65535 f 0000000015 00000 n 0000000145 00000 n 0000034273 00000 n 0000034294 00000 n 0000034317 00000 n 0000034747 00000 n 0000034616 00000 n 0000034511 00000 n 0000034674 00000 n trailer << /Root 9 0 R /Info 1 0 R /ID [ ] /Size 10 >> startxref 34828 %%EOF blis-0.6.1/docs/graphs/large/l3_perf_has_jc4ic3jr2_nt24.png000066400000000000000000003004061360743507500233110ustar00rootroot00000000000000‰PNG  IHDR¬öEYa )iCCPiccxÚ•‘gP”‡†Ï÷}Û m—¥ÃÒ›T) HYz•^E–ÞY–"bCÄDiŠ AF¥H¬ˆb!((`A³HPb0Ѝ Ü¹3qîüÈóë™wÞ9çÌŠ*’*àû¹Ø³CBÃØð ‘¼Ìt®'ü#Æx ÿJtL&V Ÿ—Î ¹ •#H Ç€•”.@Γ€ÜfÜ_>̨¿|˜ü?@¢Å}ãQßø÷¨pù‚„ؘ\¶Z¬ '’ÃÎôs±g»98°}øi± É1ßü¯Êÿ€ &Wà–¾…Ÿ/`ÿßPcC##øûï|„5ø¿ÿ€oziœEìÀßYT5@÷é'gjÇD ºîñ²øÙe8‡ÏÁá+ñÍøNü ü(~ÿ@ °šs‚+!”HØJ(%%t®† S„E"‘(CÔ%Z½‰‘D±ˆXMK)Hq¥b¤öIµKH-IËIÛIÇHKwHJ–aË8É$É”é–y&‹“Õ‘õ•Í‘=&{Cv^Ž)g%Ç“+–;+÷D•ב÷“ß*B~P~QAQÁE!]¡ZáºÂ¼"KÑN1Q±Bñ²âœCÉF)A©BéŠÒ+¶$›ËNfW±ûÙ ÊòÊ®ÊYÊ ÊCÊË*š****ÏT)ªÕXÕ Õ>Õ5%5/µ|µ6µ'êduŽz¼úõõ% M`½Ý³šÒšnšyšmšZt-[­ ­F­‡ÚmŽv’öQíû:¨Ž©N¼N­Î=]T×L7A÷¨îðü‹5©k׌ëÑô¸zÙzmz“ú,}Oýýný7ja  ¾š&6>5’0r7*0ê5úÓXǘg\küp-}­óÚk{Ö¾5Ñ5‰19fòÈ”aêeº×´Ïô‹™¹߬ÝlÎ\Í<¼Î|œÃäøpJ9·,ðö;,.Z|²4³XžµüÃJÏ*ɪÕjv溘uM리U¬#­¬…6l››ã6B[eÛHÛFÛvªvÑvÍv3\mn"÷4÷½¡=ß¾Ó~ÉÁÒa›ÃUGÌÑűØqÈIÂ)ЩÆé¹³Šsœs›ó‚‹©ËV—«®xW׃®ãn n<··ws÷mîý4ž:ž|Ï^/ÔËÝë×Äzõõ©ë»½ÁÛÍû÷3MŸ ŸŸ} ¾>¾µ¾/ýŒüòýüþ›ý[ý?Ø”< Ô Ì ì  j Z v .†„l ¹*šÚF k[Üà´áð†épÓð¢ð±šs7ÞÞ$»)yӥ͢›#7Ÿ‹ÀGG´F¬DzG6F.F¹EÕE-ðxGx¯£í¢+¢çb¬cÊcfb­cËcgã¬ãÅÍÅÛÆWÆÏ'8$Ô$¼MtM¬O\JòN:™´šœÜ‘BJ‰H¹*‘š”ÚŸ¦˜–›6œ®›^”.̰Ì8œ±À÷à7g"™3{LAº`0K+kOÖd¶MvmöÇœ œs¹â¹©¹ƒ[t¶ìÛ2“çœ÷ÃVÜVÞÖ¾|åü]ù“Û¸Û¶#Û£¶÷íPÝQ¸cz§ËÎS»(»’výR`XP^ð~wðîÞB…Â…S{\ö´‰ñ‹Æ÷Zí­ÿ÷]ÂwCûÖî«Þ÷µ8ºøN‰aIeÉJ)¯ôÎ÷FßW}¿º?vÿP™YÙ±„©ÆÚuÈëPW»¢¸âýá͇oWšTÖ¡É:"¬ò¬ê©V«>P½R_3Zk_ÛQ'_·¯néhôÑ‘cvÇÚëêKê?O8þ¨Á¥¡«Q£±òáDö‰—MAM?p~hi–m.iþr2õ¤ð”ß©þó––VùÖ²6´-«mîtøéû?:þØÓ®×ÞÐÁê(9g²Î¼ú)â§±³gûÎqεŸW?_×Éè,îBº¶t-tÇw {B{†/¸_èëµêíüYÿç“•/Ö^’¼Tv™r¹ðòꕼ+‹WÓ¯Î_‹»6Õ·¹ïéõëû}û‡nxܸuÓùæõîÀ•[Ö·.Þ¶¼}áçN÷]³»]ƒ¦ƒ¿˜þÒ9d6ÔuÏü^Ï}‹û½Ãë†/ØŽ\{àøàæC·‡wG׎=>Š~4û8ùñÛ'ÙO–ŸîœÀO?{Vù\þyã¯Ú¿vÍ„—&'_ø¿x:Å›zý[æo+Ó…/é/+g”fZfg/Î9ÏݵáÕôëô×ËóE¿‹ÿ^÷FëÍù?ìþ\Y˜~Ë»úgé;™w'ß›¼ï[ôY|þ!åÃòRñG™§>q> |þ<³œ³B\©ú¢ý¥÷«Ç׉ՔÕÕÿB,¾;E^ cHRMz&€„ú€èu0ê`:˜pœºQ<bKGDÿÿÿ ½§“ oFFss0ˆÚÿR pHYsNNÆÊ/¥tIMEã$9·-ç vpAg’Zó!%Õ€IDATxÚìý{˜#gyç‡CŒíX¦Æc4vL5 !âêL'!@È–L–ƒÉö‰¼0¬{— ­{Ó¹Þ,R蜓ÎJl6i¢Êž`òUèÅæ°Á*2á¸îI!á ‰I`Œá‚.¬Á&†€~<}W•JU:µ¤*©ïÏuéšiÕAUÒý<õÜçcív» †a†a†a†‰‰ú†a†a†a&VX†a†a†a˜X +Ã0 Ã0 Ã0 KXae†a˜ƶm†õe0 Ã0ÌD`…•a†afÓ4‘N§£¾ æ£i,ËŠú2f*°¼OVX†a†a˜‘ÑuðÌ‘å}ú<ö7ó73ê‹8êØ¶Ó§Oã½ï}/LÓÄþàâÒK/u¶kšæ I’ ë:E†MÓ`$IÂSŸúTç¸b±EQœsÛ¶üÁtÎgš&~ò'räýfPzÉxØ6MÓ`Û6dYvΣë:>ö±AQ–of&˜”쫪Šr¹Œý×¼ýío=ÎmÛÆoýÖou=æ0ô’×÷¾÷½ø×ýW\zé¥e¹KFé=ž³™Yá0ò.IRè¼Ïr=ìaÛ¶±°°àäéºÞÚU,Q.—mÙlº®Êå2²Ù¬sžÅÅEhšæKÛmÛ†mÛÈf³H§Ó0MÓÙ^,GÞŸa¡—Œ÷Ûæ—·ÃÈ+Ë73m&)ûù|>ÐÊoÛ6Òé4lÛv › sXú­UüÉ(ÏÙ̬pyÿô§?ÝóX–ëi3‘R¯×ÛÞŸa¿­ªj{oo¯½··×ÐÞÛÛs¶Ë²ìlÐÞÝÝu¶U«Õ¶$IÎßÚÕjÕù[Q”v.—sþ. mUUGÞŸa¡—Œ#ÿ»»»míýýýv»ÍòÍÄŸIÊ~¡Pèúœýýý.¹f˜qÐK^Ûív[UÕv½^w¶ûe”Þã9›™#ïýŽe¹ ö°F …|åóy'¬·^¯C–eèºUU;ÂÂ2™  ,˲SÒû7Yi¼çI’ IÒ@×3èþ Ó^2Þo›¢(ND®ëÈår2ÉòÍÄ™Iʾªª]ŸGVzzN0̸è%¯aÉ(ÏÙÌ,pyäX–ëáa…5bdYF½^d³Y;v ù|€Iò,ضr¹Üñ z@0L”ô’ñ^Ûa ñ.ÚY¾™YbÚ²¯( …BÇyfô“W†™'#ïüðøþúëñ;¿ó;Q_Ê@ðüâ4^ï¿ÿ~¼ñoD*•ŠúR‚çïø§1;Êü=7 ëUW]…ç?ÿùX[[‹úRÆÆêê*677£¾Œ±±³³ƒsçÎÅâ7¢k™%.»ì2–‡˜§1»ººõ% Ïáñ'NcvÖæpž¿ãOœÆëêêêÌ(«Ïß³@œÆì(ó÷Ü(¬óõ\Nœ8õ%01båaÞÆ,s8æMæqÌ2£1²0oã•9ó&³>fYa1ËËËQ_ÂXI&“H&“Q_æQæmÌ2‡cÞäaÇ,3ó( ó6^™Ã1oò0ëc–‹.1 Ã0 Ã0 Ã0±„V†a†a†a&–°ÂÊ0 Ã0 Ã0 ÃÄVX†a†a†a˜X +Ã0 Ã0 Ã0 KXae†a†a†ab +¬ Ã0 Ã0 Ã0L,a…•a†a†a†‰%¬°2 Ã0 Ã0 Ã0±„V†a†a†a&–°ÂÊ0 Ã0 Ã0 ÃÄVX†a†a†a˜X +Ã0 Ã0 Ã0 K"UXšÍæÈÛ&ΰ|3óË83¯°l3óË83K<.Šm4( Î@XZZB©TB"‘4›M 4 €ªª(—ËQW 3,ß̼Ã2ÎÌ+,Û̼Ã2ÎÌ"‘xX …Òé4LÓÄÙ³g•JÅÙ¾±±d2élo4Û&ΰ|3óË83¯°l3óË83‹D¢°6›Mœõ©×Æ0^’É$n¼ñƾ²: “’o@,xX¾™AXZZ•W^9‘sóÎD ­5Æ=‡óüÍÄš¿'atäù›‰šQ×àCyXið}H«Õêë]„…Æ?½±ód}ô†%4›Í¾†)‹Èf³(‹Ðu–eul×4 ù|¾ëýqÂòÍLÃ0`Ûv×û–eÁ0 ‹E†1Ñk`gæ–mfÞag¢Ä4Í‘Ja%/êêêj‡rÚl62Øý¬+ªª¢Ñhtäqœ9s‰DÂÉ‹]ZZÂöö¶³Ý0 ¤Ó驱ÌáÐ4`ص³mÛ0 £C©Ô4 ŸwÏõ{¿÷N¼ä%oÆk_ûWxûÛ¿„·¾õ¹\Š¢À4Mäóy,,,àùÏÿ\yå;ñþ÷_Žÿñ!NãþèðÈ#ŒýžY¾™A›´ Ã|‹E,..BÓ4d³Y,,,àøñãX\\D:F>ŸwÕr¹Œt:=‘€eœ™_X¶™y‡eœ'd,ëXXX@>ŸG:ÆßÿýßýYC…Àææ&ŠÅ"N:@$Ïnmm!•J ÔX8•Ja}}«««NXÏÅ‹±¹¹éì³¶¶†ÕÕUììì Õj!‘HàÖ[oè—ÎŒ—}è~¬¯/~ñ›¡(¿Ë’¶ üÕ_ý®ºêœE»eY°, ²,;/1Aæñ?þÇë‘Ëï~÷7pîÜ{‘ϯãäÉŸÃ÷~ïkñ¹Ï=Ãø0Z­ïÇÓŸþ·PÕTU|~&“e²L^)¦ ¼å-@"ñÜsχñ•¯˜(Æ{ß,ßG Ã0 ’ÐyÞó¾oÛ6LÓt¼£²,ömH’„B¡EQP.—a†#ÿ~dYF½^‡$I_×ý¯ÿu"÷Ì2ÎÌ+,Û̼Ã2Î Š®ëκ$“É8k]×5$IeårÙq4I’I’`Û¶³ÖQõzÝYß¿á oÀ³žõ¬¡®gh…5‘H`ssFñàS§AY^^ÆÒÒRGùloˆB*•ÂwÞéxq9&~úX–ðlærBé\ï' ŸiJew;A!¹¿ñ<üðSN§ñ}ß÷7øÂƒxžøÄ›ðÃ?¼„ZíÊиXØ·ñ±½ OyÊ#°mªª:/¸ÀÞW]$ PÀ4lØÝ…£8d2töÇâ-o¹?<™¢4,ßómÛÈf³Î„\*• ª*ŠÅ",Ë‚ªªŽ‚Jã¥T*uœÃ0 ”Ëe˜¦‰\.7”BÚUUqóÍ7OìþYÆ™y…e›‰ MÓ`Û6EÁ£>:±Ïa?±Ü­Iü†Ó4¯©ªªPU–e!›Í:çÌd2P…Ba¤5‹ªª¸âŠ+pîܹ¡ŽJaÝÙÙÁêê*Ξ=;”‚D2™ìOá Ìä1 Š¢tÞË^ö(’Éžóœ'á‰OÜDZcoÄe—5å¯\.ãܹ×à{¾çÇqýõWàÕ¯~>ó™÷à}ï{Nž|!vwI_EQÏ/A’$¼÷½UÈò!ÞV¯¬kšx¯PÊe ù<ÉüºcÁé…ª^G—mµZçù½üè>„sç&÷@`ùޤTzeȶm‹Å®ÉÖ²¬ŽQY–Q(:Ž5 ù|…B¹\–e¡X,"ŸÏ£T*¡Z­t]ô0˜UXÆ™y…e›ZìK’„ܶ̕m”ËeG!EQ ( TUu¶«ªêx >ô¡Mô:YÆçR2ƒ¢¶H‰%…–Ö!´o&“éZÇÆŽ8C)¬d…1 ËËËQ_ûL¢ëB1«×'û9ä٠ò€w¾óø³?[wr>3™ r¹^õªûqáÂy¼øÅŸÇ/ÿ²ŠO|â8>þñ?ûßý=÷£c{û“ø•_y2žýìg#›ÕðÌg>¡CQTU{{{Ç ¥¨VÅw¢ªÂšÏé´¸¾Ý]@’ŒÂ ëG òRf2d2™P™¥0[šhMÓD¡Pè;‰jš†r¹ I’ ª* …‚3‘S¾3_UÕ.Ë#åŒÊ² I’ãN­Vsö“eµZ-ꯒa†™¦i†ÏÉJá“ôl(‹e–e!—Ëaoo¯ãGÏ5Û¶}Ñcýûú2óCP:E4Ò €³F·mÛ Ëà8œTUuÖóÙl¥RÉ1šÌC)¬‰Dkkk¨T*¡Í…©ù0L&#5ʯì‡išŽE.ÈOBë]È“ÀÖëõ®ý-Ë›ßüüñÿ4.½TÅßýÝáG~äÇ—Ïçñïþ]ïÞQ:½ÊŸi ¥rw—î'3Ò€ÈdÄK×Å÷¡ªÂZ«¹¡½cŠŽdbJ>Ÿ‡$I(•JÐ4Í)ê@‹ʇ )EQœPÛ¶‘N§!Ir¹œsNïäN µ-—ËNq.¯Â ÔÓ™ËåËåœñ5¨÷”a†™-,ËrrñÈHIŠƒW™ í^ƒg.—ëz>”J%gß f=ʆ †Œë½ 꺮;ë÷L&ã„âÒÚ‡äKUUT«UGI¥uàmÛ†eY0MÓ‘]Û¶»Ö8óÀÐ9¬€­­­Àí¬°ö‡µ^Î!²ØY–å)£4òù­V䤢p1ñvï­¨;ˆ—ȶ…ò}É%oÂîî{–˜‘1 Åb±#¥T*uõµÄ9Ã0Œ—|>ïD†MÓ`ªÕ*Ï1s y:)'¢h(¯úB…B¡£Â){<™qáõ†zÓ“HÞü5‚ ÜÔãÇ;E·Ž"¦)‡eh…µR©`kk ËËËXYY ª™mll Ñh`}}=êïfd £3?Ó«ãÑÿ%I(´å2à‹ˆ Å›kêZ;÷¥Í…2Lвm»?´W‘ö¢ib{0È2åɲ²Ê ⢅¢ß*È#æèà ïæê¡=(º®;ÏKËEE­&òÞÓé4^óšÿ·ÝöýQÌx•Lon(=C¨š»·R»¿µ›7¦Õ9‚ Z÷ŒËJíÓȲDçÝÝNêþè:|ûÛ`˜²GC)¬Íf[[[ÎÄâEUU¬®®bee¥go§83hgŠBXX ¤÷0 :\*‰m¶m#ŸÏã²Ë’°íj‡RIÏ|šÇéë¤0ä0òy¡œærb’3 ¡<çrâºt]œCQzub‚Š$ùsJÓé´êÂ0L4PíÃÿqFÆjš8§e‰ç…¢¸˲xÝuW¿ø‹Å—¿|W]u¿ök?UuŸ}þ"!Þ Þ€¡e OD¡H’…7¼áoñÅ/ºŠçÂÂqȲJeozÓ»ˆ{¦û/ H&oA.÷llüžøÄëñ¢}!ꟇéA>ŸïhCï(ÏT×u”ËeȲ

å?S¯ÜL&Óâ;*£yŒ+š†Ž¹ȹç=Æ4¿ÿû+±²ò9/ø\C)¬.\ï¿´´„T*… .̬Â:Œ!¥PVE¾ÿû¿ˆï|ç­¸çžÆ•W~šöhšxp‹–…МW¯Â*Iý•ÌRI„úpòQÉ3{P‹‰-uÌÀP©tå÷5Mƒ¢(]…•˜ñbÛbBW”ÎVNŠ"¶e³âß ›e‰UÏåÄ|R,râY„RA¼Ï!Ë¿g&#øºÞ©ÀzWçe–UÄ?ÿó·qà ?ç>÷2¼æ5ÏöÿVV…$YÈfCU¯q>ײ„Lf2€eñ—yVV*ø¥_ú!üüÏÿÞúÖŠÅâƒü^ûÚ—|žŒ—¾ôqþü·ðÎwþ´ãU ŠÅ"Tµ„W¿ú7ð‘ÔP©ü6^óš ÜtÓ€o|ãøð‡/À4¿…lv/{ÙåÐUsáÔ©N^Øÿù?ÿ'yä$€WDýÓY(|— ï(-ØàÙËÎJIc¦yQUUuZÂ(Š26Eu Úþ²×ñãjI‘šæ¥sÓ³C×Åÿ]cfgg‘rY¬UDÊâçÑj=:Ô5Œ”ÃÚjµH$ºÞ ëÍ: ‹ƒ‡øîƒô­oý66~ozÓ©Ž>]‹Ær¹·":¬> IÝXê_:®8qæèP.—0«r¹ìxYËårh;f|”Ënè?{Ëd\¶Zíœüi*)æo>=ý ««QßÓ¯’jšâ¹Q«¹¿g¹ ¼æ5_…i~ñ ²¶xß0 ¼éMwâÁ/Ã÷ÿ÷ãŠ+®ÀÅ‹‘L~ùÈ]e¥R ²¬à¶Û¾Óü>ð,¾ó¾þu _ùÊ·‘LVð?‘À‡?láMo½EN UÞ1øÇüßÎsí]ïzòù<¾þõ¯ã?ü‡âŽ;JPUº®cuõÿâ±=‹÷½¯3ŒÎ²pp-ŠÅd`’aÉd2¸þúëqîܹ¨Â# u3ðæz«óR1&ïŸmÃÓÚÊØ¶Ý³QÔxCiÃ.‘":ÔZÃ_+JEÎ0Iê4–RD' >ËÞî׫h{.ìì<Œa§ï¡Ö¥¥%$“I‹E¬¯¯;žÔf³‰Ó§O;û̶Ým©öÖÉ4MüþïgqçÝqî$l@°wd°²Ê …dQ!€……X–åR:ŠáYƒBžQJ XXèT,s¹îH±Ø™F`YbÁÒ«ƒ÷' TRf™Ù‚~ï\Nü~¦)”Šâ÷&Zð¾÷=ŠK.Y¿¨:="iŒ %Pvþ¾ùf’ôblm½©ãsßùÎÜ`¶mãOþäóxùËŸYÞtö¡Š«Ô†¡WÿÉR©„cÇþ…ÂïàiO{"¡@>ýéáÊ+Ÿ Ã!ËîÂ&Ÿ ˜\.çTüefRV¹â©‹aôž›)}Ëk’¤N¯ª¦‰}ØV<~¨p›i ã\uZÜ!/f¯z9Š"ÖÞtFÛrDQW ‡óÔR½ïñªÅå½Nº# gýžaÚÃJ™[|<“É$677‡=]lË_Õ4Í©ê ì7a“G• ŒL\ñzW‘#FE–Ø»*ÔÂÊŸ·Q,º9~€P:½………ÎÉÛ²Äî´åá&ê;[˜¦ë5§Bz‹‹âoò¦& Ã}ŠË² M“ñá_À£¾Õ1ˆZ–å´ìð†YÓ4^’$¼þõRàûÆ[.—– açøã?~}×û?ò#W¸Ò )–e1Nêuw\°r3[”ËåŽ>’œI¬¬ºPZy ²Y·Ž7 ú£¡å¶AÏæðк¦T*ÍŒÌjš“ÅEWvüPj¢w{:Ý©w è§ÌÒZgØš8¦Ùý™‡ OZaM¥R¸ë®»°³³ã„ãœ/þ¦¼ÐƒèH§ZwqÖoôÑ 0ò¬¹Ìü@²K!Ü$’$äú§-hÚ‡iš §išøò—ŸÛV±³ãŽEY–'žSÞKQõã­P_. ™Wq â~÷ö8`V)‹Ž‘DUU(ŠâÜŒC¤Ãæ÷bq_(Þ˜ÏwFÏ”Jî(z$N¹©×'æKo] åÄZƒZiù¼ÙLFÈšw;MˆLFÈœ(v'ÞóŽtšŠÚuÏÕ$Ÿ£è2“(ú:Rk³ÙD"‘ÀÚA=â4›Í™-¶D‹ÉÎ÷„²J1îµZ éƒßb±8Yëb€ îFÃL MÓºZð¤'ý>>ñ‰Çw)ŒÞ ®W¾7u]M–jÃp«mA…Ç‚&ÙA>ƒB1½÷X.»UX)üm1ã¿]w(–%î‘óóÉ-…ƒ“"G‘;´h²=ñP?ÿó:r¹\ìCÓzA9xþg(-šX¾gêwJëMÓ`šf,U"s³øx{Öû¡HYÇ–Jn½‘Ñ¿+ñ™þèï×äÝV*¹²ÌQx“&ŸÏO½Gsæ vvv°¾¾Žååå¨ïi(¼– /år¹Ã"CÖíl6‹z½>9eÕ>xY`…•™TØÛÄš”¨Zí ]:ÝÙnITx VJ³ÙÞ-Âz S¸Tµ*>׿ dÙͧsQ!"ª šÍvæ’RŽYÕ½C˜ ‡]Dx´ ¢÷2®9Oض+‡ÞÔÊ—¢W.—ëð¤N{5n2·å¬1Ñi˜0MÅb±£ MÜŠ'‘A•òõèòòy7’d“Âò½9uþÅ>AŠ>¦¨¯gŠ 1TÁݲ†Kí _® <¼½Ÿ Ãpºu ðEXýäóîs¹Ðo0ñS*¹2 t§0’$ÌX“˹Ï(Š‚ñOJmØ5Ä¡ÖV«…J¥‚••Ç» ›››ØÚÚrÙA=­TY8•Jno4H$õÜÒä…ŠÎø™Lív{b×0!Âg#Ì>zH¹WúìS^ÆA¾ƒ Ðv/䥇w­ÖÞä}¨“2I o± oØJ±èæï‘‚é}‹®‚I•r½ùnõº› Oí[r97,póƒ€Î‡ƒªûûþûv McýE úæ‘¸Êø$1MÓ)€äUVÅX0ï.+ 3“75 ´p§pàyã(É6åøY–I’P­VcãIÕõî|8ÓsªªºíÉcINªHJÞÔ^Ñ8T‰æjUíôÆJ’¸oѤZMìã넸Tž7§t=ê©jšæÐm•ÈpNk“°}Dõóàídè –sax{k“œÓšÊ¿Fé%cd¤ C–ÝâŽa÷ä7ÞÄ‘¡ÖF£V«Õ¡¬+++ØÞÞ¨k«ÕÂÆÆŒ“W*•Âúúº3hšÍ& …8Â7 ,«ûaK½™&yPÄ<«³`xï—k[>¸Ã(z‡Gç!¾³°9©÷ûžRhCœä;ˆ –5þÉÎï}ôS«¹J+µ_ñ/(ÜŠÚu%Ú 7#¬]“·]K’Ùo¢ö2íÈKonß¼-èã.ã“Â0€••ã'~ÂD:]Ä·¿ý;ÈåIÒ±°P„¢(3æ;ì÷Q.O¯þ48j²­ë:t]Ÿj‹Ãèìߨ wôæˆzŸTyÔß>p˜y—ö뵿¢tËyLtú¡™GWåjE2EF‘ r9áa#—ºE¿ˆ3ot€· &ÌcÆy²‹/´_¥RA³ÙÄÙ³gaš&’É$¶¶¶œíH&“0MgÏžE£Ñ@¥R™È@ÉïõËLÊ%bB(Yaädƒnd¬ƒ×¨Ø÷aöØG†P6syzíÞë Ú®C|§½®sÌÄI¾ýø[Ö˜fï–NaH’ð€V«^Q?T VëÎÓÊÝcT8–ôqgŸ¶ ¼úÕMüäO^ŠZ­U•ðoü8t=íäû‰÷çÌ:ÑIêÎ×›uŽ’lSøo­V›Š²ê-DD!µý ¯©¦‰¿m»ó8Êñ;,ýÎÁ2_g›%ò¬ê!ëN]k¦ Î"é´ºëm_E_‘Ü‘Ç[¨éd(…•Â}766Ðjµœ÷ÉZ 4´À»ïöö6Ö×בH$·ß~;Nœ8@Xvvvv°²²H$X^^žX› 8ÇD½«À`Ѝ áY £Øgû h}®Åè³]ƒðXöš2è‡KÞæ^÷YB¸²)¨A(ÅFÈö݃kôãøã'îú‰C~‰ÝÄM¾ýhšÖaŒ¡âE£0¨…ü(3uk©7YkBÄ]ÆÇ‰¦¹ý´mÛB2ù2¼õ­ÿ?"µT*¡P(Ä*„ršyf™£$Û–e!ŸÏOMvËeW Ê£ ‚ÒJ(L’R@” ¦?ó&ã¶m;r|e•<¢€»¶ ªoCQetŒ÷x »ØVÂJu@üQcT±ú>Jbä>¬ÛÛÛŽrJqî›››ÎƒB R©”bœJ¥œ0ã .8Û‰T*…f³9•/DÓ4Ôš²tE-l¡šîqü"„’ÕKgÖ!”±^¡¶½Âum*q]Ç÷»†ÚÇëÿÍ#Ń÷ƒ®Ó8¸ÎBÈuz¿gúœ «ý2: Y]ù÷Wâ?ò (cN޳|Û¶ ˲: 2¹Üœ…«R%‹YŽ«ñÆPnV¿§X±è&]•ËøÙ¯}m"—g'Ôʈ¢{ƒªM%oêQ`Þe›<ª–e9ùÕÃʰ¿XØ>~ÝAÓ:ý¼Hd{óö,¥¨Tö@μÉ8µZ:ì\¬i2t:ÊY&ÙN§ÝÜР¾êArJmò¼µ8YöÜ2‚¾ k³Ùtpû°†áþòò2TUí«¬nØðêêª30.^¼ˆÍÍ;ƒ¢Õj…~Æ}÷݇»ï¾Fé^<ÞT#ëÀr(kcB1 ³Êèíu¤µP/e´ŽÞJ¯qpAç0MÀPÄ5öònz•@ÿÚŸ,F2\/hÐ÷@Ÿo [a5àK*#8¿”Îë?Öò?ÈÚ?xŸ&ŒäƒÀêÇ?~ÊSpù‡ÿçÊÿƒWà'“’o¸ÿþû±ºº UUGªÆ­ë:¾÷{ÇñªRe¸¹*3•ËâI0H=vê/0‰¦¨TB2Èêë-oéU®5Í]íQ¥*2ýÊrp‰a*—œÉ8ýDVWWqþ;ßÁÍøšã6‡O¯²ªë:òy Š¢L.U„ŠJ¥‚{î¹Çëyã<òD•J¥‘÷Ô¿º×¢šž/Þi” ;ââ¢gþiÒ¯ b_Oqû¹euuçÏŸG£Ñèµ8,ó4ëºî´[:,~PÍ oou@üß+ÛAòí­,Mçð·Ô;jlooã]ïz×Ðó÷@!Á[[[8uê*•JGöÚÚÖÖÖ°¼¼<² tZwîºë.Üu×]X^^Æêê*t„ûé•#{ýõ×ãæ›oz x…F×õ×o'ïeZŸí4î¼^E¢G9|ýÝdžxZ¼¬|écÀ/½¯{Ÿbø¿ç…‚ØKa¥JÅAÞM¯73(4GólWœ+KJªêù ÛvïA2ÝïÉ x’X$dh x ïdcA(°êAüÐÂðù?IµžÕ~9.yñ%©Ü9)ù€ë®»›››#/vΞý".^\šË"@Ħ^f|ÛŽŽ¯…W~¼PÜO6ëV‘$± NöÆÿPsÎ…qnz?îöì º‡jÕu}S_žÝ]±b ŠQ¢„bO¶ÍÍMÜxãùšã6‡RVs9‹‹‹0 Õj¥yLFžQÖÖÖpûí·ã~àÆzÞ8Ï߇%›ÍvµVªˆM‹í°PGªŒJá¼@ïj©Õª˜~É{J¶< »æAL£RÇÍßãVVù™¿-ËB±Xœh;Š& üý¦©…^ØãÛw·gŽËËË#Íß}Öd2‰r¹Œ;T ÛÛÛ8uêTG…±a8yò$tT¾õÖ[Ñjµ°³³Ós€Ž»´6•”&LÓ<œc-@XTš¦Pu„{_ Ûžû1àó_‹î þãZWOz§"X,º}Bþèz §w}¤;I±Z.œ$[ošbAí]ÿæ7”F+¬¸ÞÍ KëœæÐrxÏ7}_<€VË­þ›3ÜżaŸü¹nSÕ¿ë6ׄ} èšn–;µ·€ß~¸äÍâóm[<å>ôkÀO¤Äy/¾ñáÑçÄI¾ýÜ}÷•ÈfϯÂJPéàý} Ý¯½½`“)ußÝcƒ¼•‹‹Bs šãÒi1fÊe±}qQŒOï8„‹`wW|v½î¾ö÷ÅûA]¾{–øE•o¦Dœeü°¸-’td³Y”J¥#›Ÿz™WÙÎçóeydƒ<)«Þgå²;^*CSM‘@øEyЦ)¦ÒÅÅðB5< ϬË8å^§ÓiÔjµ±ÌÍÔÊÆ®‹åœz{ò®½›Z÷‘ˉ%È,g)Å…¡rX½qâ†a`gg[[[H¥RXZZÈr×Ë›H$œíÍfÓÞÿMë´ú ¤°öÊ ýì—€½:°ò~àSwºï—ËBÚÿ÷߹營» ¡ˆ•óÀ½Ø{"ð4ˆ§Äâ¢ØçŸž ¼ÿ—r¢#'ÓY:ü&`ÿ¢ø|ºQIv“A.Y^þZà¯=ñ 6€·_*¼“Û«À#/Œ7‹íÞFf4ø¼_—m«¿üÝ)àyºÊn߬¤{­E—ÿ!°°åÆJP¶¹‰Åßs^À{<©…*°´,'ÜØ YJ0>ò`÷u€ôºÎï—Öú°ÿ—ûÀ¹±‰’CœäÛ‹iš¸ôÒ—ÎWð¸ñ7Ç£|P?~3¿e¹]»@J\e|ärÀ™3gP.WP¯×YQ=bÌ£lçBã‰ÒõîÖb¥Rç"ß²Ä~²K­=(C£T1Þ0æÜ˜fYÆ-ËÂââ¢cHßyƒå.“q'A­÷€ÎžõÌd¹­ªªX__ÇwÜååe4 œ:uÊ 7#•Ju•ЮT*H&“ζ¥¥%loo;Û Ã@:Ƹ¡æ¾œ"=1Ñ»rîüð’gW¾Ó}ϲ„f\«ô»"TV‡PÚüüŸo?ÿÓBú_ðl÷}I3þëÿø‘”Z­vøÔ=a†jqC>• /©ßëÊLŽ¡<¬;;;XZZêøÛ0 $ œƒ< ?½<ö@æõîg¾å[À5¦Pl³rwµ^Ê…Íx^^d¸† Á9¾Ô7Ö°÷}ÔjkU–Ãò\ÈàP°ƒ‰ùöòѾo{Û/Mô3"#Ÿ?•8bDe|,Ë‚®ë•R÷ñõ¯G&ãVœœFOÊ#CPÉØa £¯iºîUÅ%/|áD.wÖe›(¤Ã•Éôþ K¥ð=Û}âÇ,ʸ~P9Ü•Ù-YÁDÏ@ k£Ñ@¡P@³Ù„yëh …‚&°µµ…õõõ ¤R)ÜyçŽ7Ö«²}PóišLMÓì_Ò„«8ù'árø¹œP¶χH’k^üzøÂ×ùö1 àü—(¹Z´‚no.)}@pµaÊ ÿãoýB}PeÏߦ︎ó)ÀCp²ó3>õ=À»е€ë¢¶6AßàzCËÛÃ~†4zWO¶¶‡­5ÃúµŽ™8È·ª~ý‚ VmæàÑÔ‰›Œƒm/|áßâ§úxÅ+¾]ÿT«Uض ]×±;OE£„½¡LAP%ì°…¨$¹ñ¨4Ö5 ©ÛoÇ?¿ò•c¿ìY–mÂ0 †16YÄ¡Å6žÙae¼X,Ž¥°íÏaè³Ã@ k¡P@*•BÙ“°@¹«Ô{ukk •Jeà x‰D¢ç è·ý°PO i1M³¿%Ò†ðÞÉ'³, î{(ûƒÞ?{)ðÑßè܇.À@·2å·üp«ãÝÊ¥×# t;!ýûÛ!ÂuÏñ~eq˜òÁ¹$„+¤6„ÂæS!¤ö“‚ÞëÁ)õÙ>èýŒ¨åÛ‹axøá·MçÆ§ ›G##N2Þ *ÖL¢Òh<ˆ+®x~åWžY~:Êå2,Ë‚aÈd2ì]ªJB±£Šâ–Ô Óv(Á‘VŒù¼XEær1¤¤°zÉåðÉþa|åÜ `vd;Œr¹É¾´è¥‡¯ÁÞ;]³;… Köäy…¿ý?ÔXõ}Vw2ëûÛ¯pú{­úZÝ ¯àz«ïæÜ ‡B~…↑P6{ÍG{è݈ äïø2žùÌk£¾ŒÉ`šìaezR, }§^j5—\ò|üãÏsĦP( \.£\.=7jêP[0ªdÍvœi—É”bQ(–ûûnµB!XY¥ëËçÅJqoOvé_vw MÓ ËòP¡“ºî6 (•„âªiBœØ&ÈD‰išÐu…Aúª÷€šLPS¢Wp/úzX–––:*‹Q;ï„8hÖicYžT¢3$ÀèžÜM3XŠ?ðUà#—¯»Ì}¯T YÁ!±€Øî}?H1ôœ÷šet·ñ*§fÀ95¸^Izèøî’ïÿþíÖÁ}Uî!Ha— Â{é ýÖ :½ÇÌX°m÷Ý÷¿ñ¶·ýZÔ—29Ø#Æô€ê¿b1ï÷¢ªªêä®Îd¡%j~IõÈ=&ËB)ôºÈÈÝLUDdÙ5Ϊªx`z¿ª~M„%3z#‰¥Âc³øýÇÛ¶Q.—‡ÔuaС¯¿^?½¿20ÃLÛ¶BKƒF¾P&ž—bQè^·m±Ì?"µ£…žM‡´ ôUX‰DW#áF£T*[%ÕK&#Š×úŸ‘^«Jh;ÿ—«ø—Ÿ~ ð:_{o`Å/è=z>÷ʽ$z)›~ï)]/­5l„熆ФŒîüWÿ>‚{²2‘¢ë:^ñŠ›çwÈæ&ËênQ«iZàb¾Z­Æ7˜V[t}þ매,ïj̽±Î#ËER\MSh$»»Ý«qâDÔ_O¤˜š¾[ïßõ_DõÜ¿8¾{›WaÍ¡ÛÛi¡S¡$%Ï{¼Ðx«ˆ‡Evy«ûJJtPDEmiÐÙ®ÆK Áù­ÞÏa+l,yÿûÿ/|ðuQ_Æäà`&„l¶ÓžA•àƒó²,ÇSa%ïi½î¾º/¾Ó´ðz`ëuÂ[*¯Ør9±MUÅ+“šM6{¸ðafì†ÅÅE(ŠÒ3w•l–Å?ãX0Íþa÷ù|ðmšB³",KÄe{ß;ÂPÍœaCij£%}©$ìvþ)ŽºÑ1‡Ä4»Ó' oújUX±^Ha]ZZÂêê**• VWWÑjµœZR`‹Å"–––bÑ\Ø å/y¿O#ëÀ¬þÞŽÿðx]÷½ÀåÏëžTÂÚÝAU}mϱZÈ1ÔŠ%0wÖsœ²)´a×Gù¶þå óôÒéÙàK>õ©kð”§<%ê˘,¬°2øÛèêºÞ¿|Ü(—ÝF€q@QÄ¢ƒ#beYÈçó(‹¨Õj}÷”›ª(bQÈ´ÀÙç0Ê!Eb¡F¡à*¶d-(—Åß^G‰,‹Å4…ÜxáBÔßN¤†1tK&²ïy#kzMªŸ©5–PÜt”\.w%ªàæý %éP9}V(•JPUÕ ¥ò`:sæŒSEx”ªt“&“/¯ÂJi`ˆpàÍ¿~©!³npg~ ·òÎWº=¢þ‚B¤ŒÒþAÏÿ*Äç©öŽRõ]áÞQòÒæC¶Ó­—®tÁ é ¢ë:žüäÿ0ßÑ{~­„að>ŽlÛvªÏ ´úŠ›VÁñ¢± N#ŸÏCQìîî%fYÍ Àm¹T.÷V6-KlÏçÅþÞ…¤$ ³Zï‡-ìeÙ £÷BÛÝíþ1ÈíW¯y…Õ²¬¡£^L³·~Å i ™ûB5MŒZ­Ó¥MPÒð-µµI$X__ܶ¼¼ŒåååØyV Ræ©p¡,wV̧0±PÒi1|èCÀ¯®¹ôRàRIüXTnÌÞ«B*òâ/Š$£3œ·ßo´&§ÏÑÑ¿ê=Îè±]Ûÿ4 ^›Ì$†aࡇÞ4ßðæúæ˜Q ÖÖÞyžŠ-Íqó®2±Á0 Ȳ<´Ê4ç$ RÓÄbn”ùŸ”UjŸ-`BQ¥pû u"K«U±o½n(7>É2Þýc?†›£ùv#'°ê@DZýº’õaû1SD@PÕvÃpCܽ ©_æ'PÍj + rT+• *•JGëšd2[ep4Oøi¦iv ï„£ªbrLœ^}ÐÄ„[²ŸF‡xS(GÔ‹ W ç5 <£= €° ¼·½ "áJ§tp?l•š;Îû.»ì2^ï2G Më4 Û¶ MÓf¯e7ˆa<˜¦9›­ÇsónÕÑaÑuWYÍ庫²š&ÐZMìG±£aSÊó^\ìN'c†fTù>rýÜÉÔúWMs‹ëùu#òª c÷žÂ@ÖJ¥‚­­-çï­­-¬¬¬`mmmª; Å¢[§"hŽ –e7ä‰Ê Ÿz“»½@Qݳ_þ'îá´ <¤aë½ ù{©†}F/úõ?ef]×qõÕ?w´&o†x¦zkåóyär¹x-ð‹Eaý&ïaˆ‡“w0k 635,Ë:b€Dl¦ñVªf±ìÍ7­Õz»áÒi±ÿ°åcK%•¦i†æc÷êr¤VÓºI¡Ð[î ±Ÿ×Áfš_”i ªŠ‰"LöÉÐyX·¶¶°¼¼ Ó4aš&–——±µµÕÕî&îøû˜T˜ÂNÎþœû^PUÿoë5hx«õzICx6ÃêWP%á^‘ä! #(<ÙOPÿUf¦1M—_þ¼£3y3 ÜE9=—‹Å"$I:tÓù±Beþ©çB:-X«Ÿe‘a‘aØv°Ceæ ªSN]¡Ð¹Ø¦ñÔ*ÛïK¨×¹×IÄ„­Í-«³UôÜTÎfÃ=öT¤Ë0Ü/‚ 3Þg‰$¹!ëš&”WÿóFQ\å(ƲßWa¥ðß••ç=ò¬R«›Y¡»°1X;›Z ¸äÏèôZúóSé½~zÃŽ%$E·×%–ÐÛ;ÖΆ™kLÓÄoüÆæ?Ås˜Pf®¡…:=w5M©ÒäD!y¥Ü z]XǃŠV0̈ù;®PtàLB7S*u{x¥»mL±Ø.)IIíLl ìÚqÕ¤¢Íºîê]sAà'— oCº^wCu…œû¿?U_Že¹E’üûP³3p«7O5‘HD}ÝÑ/• ËJ©A(£~üT¿Â t+䵞ƒZ‚PX{=Dúå-‡å¿z·sdّömüÈÜWCÙx°,. È8P 3U‹žr¹ìT¶ ªÚ]õÆ Ì0=ègh¯Vź“Ö§´ ÏåftAOÊjPNݘiºaöT±w&o–éùèBÉd\碿 êLBE’ Ñí}CÕªñoØ—@!ë3f1°Â:«ôš³º†~ðò ˆß ª¢SAíÕ¦—•”Z^«0c¦Õ:9ÿá2G*a…éE’8ŸÏ£T* ÝalP¯E?3ëâbâB¯‚4~û·¥ß̶Î5Mq#½æzòQx}œ¢*˜¡è¥°zu9 S¾G-+‚âö …Îß ¨ÕØ +£ƒ0÷ ka%Ò¯°ú~…5 ½ 2ièí]e˜°, <òbvÚ0GJÓ€r¹ I’¢iccYn^*ÃL˲ôšÖ=H zØáì‰ACs9‘ׯ¨kÂò³M³[!•$aÇ0Í9*Îì_¸QX0åŸᨲªÀêêj×{•JgΜéxosssè‹h4H¥Rï'‰‘Ûæ„U,BÂjèO²è:½¤öÁK†PJËèÝUᆇ­›dpuÞ#À¤ä; ˲ð’—|™Ì‹£¾õÉr„'î¸1m÷R,ºëY˲ iv‡í;7® ÑuagÏÜ¥lašf`^¶atwA’e±:R²GæFÇGœdܲ¬ÀȘ°UUÓ®®{{SýÚÆa„WçÕõÁІÍ1}Ö'Nt\"‚„{*• F‡¢Ûl6Q(œ¢Nªª¢\îWê¶E Ÿ»LÓì¶ÀkpCx½úKó`ßšg{½B‚Ù»:÷LR¾Ã°, O|â~Ô·>yl{b€fŸ(dœð‡½kš†B¡0ýP`Ê››ùã%JÙÛWJ3áøS¡l&>ÄIÆ{…F°0—s»»ÌA÷_(ÌÑ ŽN_…5™LN¬ßêÎζ¶¶°´´ÔñþÆÆ’É$î¸ã´Z-œ:u •Je¬×ÑU=Ò@¸ré/Zäm£„ãxk'òÕ13@Tòý?þÇOãOþäèZá˜éå{…W75M¹i÷‰£òÄq+ðÄЍe;ˆ°‚Kº^k%“ N§f˜¸Éx¯‚bÞv6~ªÕ9q¬Ïl¢ùtˆ,‡µÕj¡X,vŪ7›Mììì8^ÝD"åå呪=‹½Zù$ß[ xw VréàÅ #É4ä;Mó˜/DWlfšpßÊH‰JÆ ÛvC‰^-&õÒ9¡ZóFÔ²\²íεa„+¬TefáÔ‰G·m;Pa5ŒÞÁTs¡¬bMÃQc¡ ­°îìì R©8/êÓ:,X^^Ɖ':Þ¿pá€ÎãT*…f³9¶›6 £{QcB„ö*~àg:·ÕÏ 9t±Çéà¨G”¨ä[Ó€k¯ý‹Áú Ï:sQpv‰r7MQÛ¨ó=sòrošÝ hך;¢”í^PÁ¥tÚ-°D­kærÊ·¬®oâ(ãþ9<ŸÊê‘ÉþakOVX ÃÀ-·Ü‚ÕÕUlmm9¯ÕÕUÜrË-N¬û loo£Ùl†ô­Vk¨›+•‚…<,©2D¥àÄçúŸ|‡Ã¼T-c†bZòí‡*¢ö³ïú+˜3í6˜m¢’q"¨ú¯v‡†ÎÙl÷6ö¬ÎQËv/¨‚êî®›Ï7óÔ^Ì­&-q•ñù—¯!›•œÚTk(“™ƒ«ƒp$´òÑHaÝÙÙA¡PÀ‰'P.—aš¦ó*—Ë8qâN:5·µÙlbkk ëëëÛ{ ˆ‹/†n»ï¾ûp÷ÝwV3öcYVwÙlÊ_ 3 ‹„èÕ²¸Pd,ÙÞÞÆéÓ§aN ÔhRò ÷ß?VWW±½½¸]×Å|w$¼«LOVWWqþüù‰œ;sxP‹ƒÀyý0ض0ñ/,ˆ%IUŠùø2MétºãU<ðR™¦éüßOÚÓŠG×u,..âØ±cXXX€³¦Î•J§OŸÆ½÷Þ;ÖóF9÷ömXV·f*Is¼Î=¢i4ã”8ÌßAX– UŠ* ¦Ú#Up}n-OÐ|Øù{ ¶6¡UÂTU…ªª( ØØØÀ]wÝÕ÷B‰ Àah4¸pá*• –——{VîUZûúë¯ÇÍ7ßÜa1 K- ¬L9¬† (Äê+诰Æ{]sdY^^F2™Ä¹sçÆ~îIÉ7\wÝu=ÛFÙ6ðõ¯bú9|LìØÜÜyáÐiÎáa˜f÷BÆ4MÆškÛ¢—j&3s•탰2Ê9³, ét™L¶m‡ꌃ¤H˲Ïç±»»{/icqqТį¶¶¶†“'OŽ}rþîÇ;ßù9\sÍ5ãý"ãÎõ°Òü=®n^â0aš&žð„gίñ…qu Þ×ú³³ƒf³‰Ûo¿½ç~+++h6›}-B'OžìªHæ…âé½a Ífs¤>PAac@Hb76ü-lŠ®ôKû½¹”éÁ4åÛ$O{ÚÞÑ(¸ÄDF”2Þ‹ÐTQH§…µsrSeYvÏA ÂUdø’$ Õjuàãg™¸Ê6Üyç|ß÷}'ê¯hºØvp_Jfdâ*ã¦i¶å£hŸ`¤¯‡µÑh ™LöV²Êô‹q_ZZê,•J:¬2KKKØÞÞvÞ3 £#\i"U'¨MMØsÚpâꙉR¾K% XœBá™8`YÂ"u$[âEÔsxPù±\šñ> –eu…þªªêxQ{AaÕ Èd2NÕQ jÙîÅ¿øXÜ~ûåQEÓåˆzX'I\eܲ¬£ga†¢¯ÂšH$úæfŒ›µµ5¬®®bgg­V ‰D·ÞzëXÎX˜Ã:x=“Mt÷a á}[¦“”oÛ¶†‡•ÖX3YïÎÙ{Á¥!Éň+™¢T¢¬»»»0Mº®£X,²,Ôëõ£a ëÃ$e»_üâÕxá ¿?êÛŸ.ìa„(dü£}^úÒDÔ·š&ž;<džÒWaM¥RhµZ}C¨àR"1œÀÅ»§R)ÜyçNxq¯ð…^åë۶ݽ°1à*¬–ü—뀿{¼»=h¤€‹*1}™¤|{1 ñ¢¶ 3-¦%ãD"°.Á0P)ʉÚé¦É²Œ’§€‡®ëÐ4 ¹\®ï¹H±UUÕ™KŠÅ"4MCõHU@™¾l‡!±e.HÍŒ¸È¸,qû³eqÅù> ¤°¦R) lnn*¤­V •JKKKcKO$$=Xe¸ÞUMîý  kX‘º Ða†qË·aˆ¹N×ǘÇg ƒ-ï1gRsx¦iN™*gZaíÇ0hÛ¶¡iZ×=‰ye@¦)Ûð¡ý®¹fòy²±c 3¦)ã†aàðÒ£]pÉ4ç¢fÂ$¨­Íúú:šÍ&N:åôoD"ööö6N:ÚÓ)J .u=¸U¸^TÓ®û¶ç$QßÃô§T¢ŠØs"°½ÚXç6Q¨xïX1͹“%Ã0pìØ1çå­ ìßæÏMËd2e N[Ã0ÆW™š{îù?ø•_ùÛ¨/cú°Âz$0M?ü¢¨/#zØ(Ø“ÚÚ¤R)lnn¢R©`cc£kûÒÒÖÖÖ&R‚û0µ4êÙƒ•H$ܼV–fF0Msö½ ¶ d³Bð`ËšŠª ïj.7wJÓŸ ÝÒ0ŒÃ…Á‡•“ŸQTUE»Ýz›÷ýjµ ˲œÊËœf-–e.äabŒmÛøÛ¿½9êˈ–#ÚsxRXWi¥Ö5Fà N&“hµZØÙÙ™j˜L?‚ž±VP¤ì{„›Óz”C˜™!›r9{ö<¬º.”TYÿ‹",ÆoY—e ^û±<:ÆEU'Pp‰‹wâmmÃDK½þïQ(°Ñ€™OLÓÄë_ß>ü‰fžkû2°ÂJP‹¿—²Ñh`uu5´)ù´±,×C†ÞUÓäÜTf¦°,¡Ç†1y«m‡‡ã[4@ÓÄKUݪ¿µZ¸ç”'t&€C{ŸL“ Ll±mßýîOs@ 3·X–…׽/#:b¢7ÅrXgZÿz ìÕg¢3ì—Å&ÜÖ5 c¼a’Ý+Eh®¦uoËçÅvRjËåð}±O½.BK%ñ^•1=X\ì~/0̓aæÓ4ñÿãßD}Ó'›ú ˜)`Y®½ö9°í¨¯$Bl›×>0· «¢tGy†[è,¬$I@ @¢ÝÍQDÌL@)xÏaÍçÅ¿{{"”—þ&êuñïâ¢[§T¤i”J\d€˜°TSë0¹?†Á &Ö†¿ø)Q_Æô᜾#išøÚ×þûÑþ¹y4C‡Ï ’ÔëÔ·à’ßÄS‡3±Ç²„qFׇô°ÂKÊ)…NÖëÁžSò–zápKf )¬Q3ÃÀ +st8qâyQ_ÃLÓ4aÛ/:ÚÓ°¢ðshæÖÃJQ‰^‹sx=¬¶-Vþ€"XYefJ):<› vÿÉü†›bQX€üŠ'·`¦ˆeM àR©4——lÛF>ŸÇñãÇqìØ1d³Ù±×—0MÓi{C¯b±Ø±Ýû·˲N§¡ëzÇûº®cqqǎô°”‚#€eYxä‘n¼ñš¨/e¼‹â†¦qÍ‚#‚iš¸ñÆï‰ú2˜ ¯‡µÑh R©ô=Q«ÕŠú^:Ð4±îö:{‹.ÉpSYvá”ÃÊkrfFÚÓT­ ¥uwW(¤š&òNëõΕ þP 3e £»¯úXòWç0+NCUUìï‹ò÷†a N£^¯-ÏÝ>0lÕRHÍd2P¶m÷T’5MƒmÛÐuÝ)šeYòùÓì6š2s‰a¸æšWÍ_4¬m‹Xþ £ )«¹œp’é:+¬ÒWa¥þ«³ˆw¾£èØþäƒÁ!a8$˜™)F ÌåÄB,Ú¼H`bHXªéÄ Í ay½™LÅb²,CÓ4ÔëuH’„l6 I’ Ë2ŠÅ¢ã1M§ÓŽZ.—Q«ÕP(°¸¸Øáõ‡ýâñ¦ßM–ed2躎B¡à»°°€L&UUlhÃ0ðìg¿}ìœdnÙ¶ˆè©Õ‚Ÿ3ù¼X¸s̓#ƒaxìcsó¥¯yÒ^Ûhû\Ýüd™Û¢K~,Ëê~ˆî¼xýs»0:-ÃDŠ®S¾ö€ÖlV„øz•[ùebŽªÑ9LTÓÄ¢y\ù«T; Eégš&p/J¥nM=è³2™¾9å’$9žÊ\.ç|w¹\ÆA.»ªªNÕeUU¡ë:TU…,ËÎþý¼¨†aôU2u]‡¢(Nd)¬°»» Ó4¡ë:ŠÅ",Ëk(ó¬ ë:®¾:7û6DòªRLØïÈ!ÀG˲ð¬g]6?€ÈÍVÕîù˜R® ®ÿ1"}Ö¬®®v䢴Z-4X…÷#°õAóàŠ@¾µ{› fbÏP=XËeñï\=˜£ÊÈ‚'‘#7ʹ¼Vö1~–,˹£†a~_^C—mÛŽò*IR_ÅS–åŽð` )îwœ¦iPåƒ9ɲ,çe™®µX,BÓ4TçB¶E‰§¦Xì¬ùãKñ ½ÊûŒ¡jiGè÷cÃ4MìíÍQË&ê¦à7NÚ¶êÎ믑)ÖäÌ™3X]]úÚ{â×O‹s\v°œ>·´af€\N¬úVK¥°_^403HSq$…Õ0æ>üBl½J«mÛ(‹È(Þmäå”eÙÉ}-u1HÕf]×!Ë2êõºó¢k¦ðc?G-ì›<Û4¿O•|¾¿ç¿PãÈ4Åb¼XtÇ•ÿ÷Ïdæ²7s8,˵×>s‘žn¢×|P7À}Ÿ•ÕC1·!ÁªÚyèa5Íð‰4ŽVM†ñáÿz.óyaõ;b ?fö [И¦é(`£is?$IB½^G6›uæú®r¹ŠÅ"lÛF:†$I°, ¥R ’$aqqù/iš(•JŽÇ5Ã0pìØ1çoUUQ«Õzn§¼U/ªª¢X,boo†a`aaÁ¹vÛ¶¼Ú£‚išÕ–'ròð°ÜrY 8*~T*¹¡Œ{{î~´Ïfݦ÷Gì7b‡aøÚ×þ»Ó–/VÆ`–"ÓììQ»™/"UXQØ)l{"‘@2™úÜþuŒmÛÝÖ3k@õYQ~Ì3Iù&4ê&Yá;•Ëb"e+73f¦!ã’¼¦Š²C=†cc9^EÁÞÞžS XQ”/¥ðÞåº"(”ÎAûz÷ñ*ív;ôTUí¹ÝK&“q”Øjµê„K’Yîê4d; a`¨B×Çþ(‚AÈçãÇ]í™rPƒ _^CÅ‚¾!Á'Nœp×˨-mR©677‘H¸Õy/\¸@ ú¿7¦>•J9¡ ƒâ[]h G³0cfZò ¸ÑZ==¬ŠÂ“-3V¦)ãAPuà¡*DzG‘€(eÛ4Å‚~aáxÅ+žîÔ'˜’äz8úæH¥Œ““ÿ™ÏL Õ’'ð¢ëÂ0´î!åt_ ¸A‹+1± ¯ÂšL&KÌ8H$Ên³ÙD¥RÁòò2’ɤ㽠¢Õju ²AqôÜÞª¸m 3v¢oì1a¦Æ4eܲº×Óƒôùd˜QˆjþܦOxÂob}ý/FSVu]¸i31pzUmbcæ‘$*×u·ðô3žñ ¸ùæ?pÉxoÎ4Å@’¤Îmj½¸mgXþgŽHrX!ø•J·Ür –––°¾¾î¼ÆÅ‹áE’î»ï>Ü}÷ÝNX²¿oŸ,Ë匛&ð¬G£ºu&b¶··qúôéŽ^„ãfÜò ÷ß?VWW±½½ @ey°Þ‡ÌÑbuuçÏŸŸègLz‚#Ú ÃàÌ>¤Óé®—¦i0Mš¦E}y‡¦R©àôéÓ¸÷Þ{'rþiÌß~$IûÁüÒhýW©º©e‰b1‹‹ýû©2±„æo*z4 ¦1{±,7"ý߸oxÃÕã¿©|^(¤þ‚bIÀ}èc­Á‡¿®Üh4°µµ…••¤R©ÐJ‚ƒ(FH$¸ãŽ;ºBÂèUZûúë¯ÇÍ7ßè vúõQ S>3t}M¯z] ˜R©…_&¶ÐüÝKÞô×à€[B`böbQXy‚ž™ W´Ž£®Áò°†S§N9=™ˆµµ5¬¯¯cii ‰DgÏžèC …‚Gï'Nœ€Žxùf³9T("·mÛb€xòsðÐfâɤå›eñ íÁj\Pƒ™Ó’qÛî\sëº>¸wUׅljq°, †ç©iÒé4ŠÅ"ŠÅ¢cpÖuÝñÌê^:ò΋Eç˜ydZ²í… èQ~v(Aó9µ«!e•`e• ! ' Ãë,Ãó=Ö›kò°nllöaJ¥RXZZÂòò2 …Μ9Ó7ßÕ0 4›M¨ªÚ+¿´´„d2‰¥¥%loo;ç2 iŒoºÆƒ@`<‘3cZò À)Êj­Ô4žÀ™±3MÏå:×à}ôTõ‘¬ìG8̯Pær9ضí¸ À¦i¨×ë0MétªªÂ²,‹E§*h:†,˰mårµZ …B‹‹‹Èd2±èÛ:.¦)Û^Êe7½£§AfaA¸b½Þ"Yæ>ÛÌÀD%ãé´°©X–…̸¼¦)ªýR5_vDÍ5}V*}ûí·÷ÜïöÛoÇ-·Üâ¸zà˜ü p5²î®­­9ís(ÉûÖ[oé-Ë!PEˆ`ƒVfrLS¾5 å‚9$Œ™Ó”q¯h÷UVaeßÛ›~p¯µU¬/0” þiÂR 3AæÁqÊÁqaŸEûåK’UUï–ŠZ‘b«ªªŠ-˲³)±óÄ´×'µË¶,«ÿ|^,Š`¨;sx¢’qÂ0 TÇeH$c +ªG‚¾ k£Ñ@2™ìRB) ˜ í.\è©°®­­õõ¦R)ÜyçÎÀµ… Š¢ˆ… hr‹fbLS¾3AÆ+«Ì„˜¦Œ‹n¿öBÉ¢ÊY¤E¡?/1‡þJ¦pîÛ!–!ýé^åÈ;ضí„“B{˜öú„°,à±ýdoù6 1é˲§à G2N"ô¼Q°m±®ñÎE¬¨)ú*¬‰D"°2X¯âãÀ_z{Tœ´ì¼Á–I&rÆ!ß…P,Zî"‡r™T•# ˜È9¬Œ[–XŸ¸÷ %ó/f˜žH’Ã0 ª*lÛv¼+ä9%…w^sUøÖ'„eçÎ} ·Âª(b.šowZsfÌŒSÆ©=ÙÈù«úË)NGš¾E—R©Z­VÏÞLœí‡éC6.TÕÇ-ËBöxV(¬Ü$˜™l[<:<¬š&6PQ®ŠÇÌ0þ.¤\…â¯ÐtÄ9vìXÇ+(§Õ0 äóyd³YgÉår0Mù|ù|¾ÿ÷Î o8pÏżªºò]*ñüÎÌ –%Äw ´?¤¬ÖjyÄéëaM¥RH¥R¨T*H¥R )õsZZZšXîQ±, ×McG†PX%~ 0óm?öc߀i²L3óI¹ì ŒJÆôD–eÇà €Ã~#"“tý,Z-–of>©×{¤t†ÛG˜(Da±B•UÆa …U€×××±¶¶æT& ã å‡½3sȽ÷ÞË‹xfnñ¦¤JÆ …,ËìÙ‹¦iö6‹Ôt;êKe˜‘’!ˤ7úQ’„g•a<ô-ºä‡*‡Ñ+®Ê* Â^ýµï5É̦ |Ï÷|Jü¡ëâÅ0s„·ÅgÏþ”€XÐ{K 3ÌŒ°°0@È»®³²ÊÌ$¦)¨F*ÉÑÌ ­°Î_úìg¡6’¢A;ÃÌ>Ø^Ì0sUN Öu;À4M¤Ói§*aÒé4LÓ„iš]Uƒ‹Å"4M ÜÆLŽë®û6÷Ó¸3Óˆâ!FÇr™[Õ03× ë•wßïþ»çl¼aæ I€«ûX·Ì$ÃÌ ^‘îë}òj·ŒÓSU÷E]èºÓ4aÛvGßUÈçó°m¹\®k3Y~õW?Ô;˜çwf†Éd€“'?Ù=‡³w•’¹VXà¾ûpùæ³YaeæŽZM(¬ÏþÀ¸73wx‹_÷íOÉíʺP¥Ké óTçóy¼¥ 3VœZa°ÂÊÌ0² œ;÷î¹Ç[Ua`nÖ¯þóP¾“€(Ì0óÇ÷~å+"’Cj˜9C–];LhuI‚Ö@TUu¼¬º®~‡ù|º®£Ä Ç©cÛÀâbŸ‚K¦ÉáîÌÌbšB/í’qÓä^ÂÌÐ \%xÖøÒéÓxìñ"®Ñðzž™3,ËÂÿîwYYeæZÛ ”¿j@ÄÞA_ºhȽ¸•EH§'Ù õŸµíîµ]ÐgÑþ^2™ Êå22™ Ã@.—ëðºÒ{¹\ù|5®Ê9U CȆ®‡³m ›á4òÎÌ T^£kW®Ì ÍÜ*¬Ç>øA\õoï2Q_ ÃŒŸ¯~àxöw¾ÃáÀÌ\ætˆI¸ä  +ÕÏ!…5ì8UuVÃÿ殺Òþ^(,˜rRýŠ¿¢(Nðââ"t]G&ÃÌiaÀ5×|8\¾¹%3ãè:P«qmf<̧ÂjÛxÊ‹ï>ýrŽfæ’'ÿÁà£/y ^õ…0Ì1 £·ç/&áÀƒDÔúmKªÚ¿Þˆ,wŸ{˜èÝL&ƒ|>¨y«ÒV«U¤ÓéÞÆf¬ë;¿sö@13Œ®‹ùm (†€ùÌaµ,|îñ¯Ä^ý„¨¯„a&Â{žó\õ VW™ùÅ>hÄÚ³ m‹2”L ™Lf Ï©¢(( Èf³„¡àرcÎ+NG}+s-æ Ã`¯63—†˜š{æh3Ì̧‡UQpé7.Áe·^õ•0ÌD¸«ÙÄ‹9¯‰™cú†\e2UUïMQ´Ûmg[½^ïØÏK¡P@á 'Þ{ 3~L¸üòÿÛùäó‘çb3Ì8 T†BÁê¬òn"‚S™˜ˆ½‡µÑh ÙluÌCÿðöŸ¸HCÆ0Sgù„÷If…•™F•q¶Ì3³À¨òýðÃ[®|ûzæ2L\E¾É»Z.—#Q2˜Ãƒ™‰­‡µÙl¢P( ÑhÖàr¹<бŸþô§qïOÝ‹çáyQßÃrùÀm(˜ØsX7 ƒ{ƒ2±å0ò]* ùÖ4ö®2±â0òÉŠb!›5…ŒÛ6N‹8xVX™‰­‡uccÉd¦iâìÙ³h4¨T*ûÌ—<7üü QßÄrùÀž'&öFƹPw#ßáîTö™#f˜qØ5J¹\F.—s•Õ\ŽS8˜CK…µÙlbgg+++€D"åååŽü›^H’Ä z&¶V¾&îVÆ?÷à·¿õ- Xt_Ù¬Xø,,ˆ9Œ’‰ˆÃÊ7õÀ ¼«ÜO›‰‡•o˲ð÷wßœe‹‹BYå¼UæÄRa½pá •J9ï¥R©‘òDf™a¬Y³@£ÑÀöövÔ—9,ß‚y”‡y³£rXÿ¹Ûnÿð nø˜ªŠE}½ìí‰g ºê¼ÉÃ<ŽÙQ8¬|¿àû¾Ši #Œm÷ïoCæQæm¼ŽÊaåû’O÷øÇ‹?vwgVY7y˜õ1ËÖ^ƒ¢Õj!‘Ht½ÿµ¯} ŸúÔ§pæÌÜxãQßÂX¸çž{pòäɨ/clœ?çÏŸG2™Œô:¾úÕ¯âþáðµ¯}-’ÏE¾éºÏœ9ƒ«¯¾W_}u$×>Nâ"ã$.cöüùóxà"ûü±ÌáW\!^×_/vø·vv"»§Qˆ‹<Œ‹¸ŒÙûî»ÿüÏÿŒo~ó›‘|þ(òí¿Ÿwþ<°ÿâãáoœ9¹â# ã$.ã•æïf³É÷;–ùûíoî½wê×?.â"ã".c–ÖàÃÎß±TX[­Vè¶‹/–Ç<æ1ØßßÇÙ³g{?K?~ç΋ú2ÆÆÅ‹ ò{zàðÅ/~W^ye$Ÿ?Š|À¥—^гgÏâÚk¯ÅSŸúÔH®}œÄEÆI\Ƭišøîw¿Ùçó.ˆ‹<Œ‹¸ŒÙ{ï½_ûÚ×ðÌg>3’ÏE¾½ó·õ£?J'fT>â" ã$.ã•æï|0å‚çoA\äa\ÄeÌÒ|Øù;– «7 ÁOØàý™ŸùüÌÏüLÔ—Î0}E¾àÌ™3Q_:à ÏáÌ<3Š|óüÍÌ <3q$–9¬'NœÐ–UhÃŒ–ofÞagæ–ofžaùfâH,Öd2‰¥¥¥Žä`Ã0N§£¾4†94,ß̼Ã2ÎÌ3,ßÌ<ÃòÍÄ‘cív»õEÑh4°ººŠd2é$yonn†æ÷1Ì,ÁòÍÌ;,ãÌ<ÃòÍÌ3,ßL܈­Â ˆÄïF£XZZŠúrf¬°|3óË83ϰ|3ó Ë7'b­°2 Ã0 Ã0 Ã0G—ÇþæoþæoF}óÎÎÎŽ;JÑh4ð­o}+p{¯mƒlŸ­V –eáIOzÒH×Ç{bFç0ò=Ží“ —ŒÏâý0‡£—ŒÏ¢<ðÎGmþîwMq¼æpðüÿ{ˆ631Þõ®wµê§~ª}ÓM7µoºé¦öm·ÝÖ~衇œí.\h¿êU¯r¶¿þõ¯hÛ Û'ÁC=Ô~ýë_ï|æ«^õªög?ûÙ±]s÷ÄŒÎaä{Û'A/ŸÅûaG/ŸEyà9œ!ŽÚüÝïšâx?Ìáàù;þ÷4 ±¬<´Z-T*¬­­Á4M§™r¥RqöÙØØ@2™t¶7 g{¯mƒlŸ•JÍfgÏž…išH&“ØÚÚøšâxOÌhV¾Ç±}ô’ñY¼ftúÉø,ÊÏá p4çï~×ÇûaF‡çïÙ¸§¡ˆZcžWî¹çžöM7ÝÔñÞéÓ§Û·Ýv[»Ý–Œ›nº©Ã:ò¶·½­ýÒ—¾´ç¶~ÇNЇz¨ë3/\¸Ð>}úô@×Ç{bFç0ò=Ží“ —ŒÏâý0‡£—ŒÏ¢<ðÎGmþîwMq¼æpðüÿ{–ÇE­0Ï+KKK0M³ã½ .àŠ+®pþ©TÊÙžJ¥Ðl6{nëw줠Jq©T F­V ©T kkk]Sï‰ÃÈ÷8¶O‚^2¾³³3s÷ÃŽ^2>oò=ŽkfŸŽÚüÝïšâx?Ìáàù;þ÷4,¬°NÕÕUGî¸ãè){{{¡ÛZ­VÏc©_Ö¸¹xñb×½\¼x›››}…zkŽâž˜ñ0¬|ÇUzÉø,Þ3>ü2nFè¾q•žÃ™ ŽÂüÝO¾ã¸æbÆÏßñ¼§aáÖ)pë­·beepæÌBÂøêW¿ºíâÅ‹=%¡7^ëÎ]wÝ…»îº ËËËX]]í{?ƒ\s÷ÄŒ‡aå;®òÐKÆgñ~˜ñá—ñY”žÃ™ ŽÂüÝïžâ¸æbÆÏßñ¼§aa…u ,--ayy·ß~»“ íu»íF2™ìyl2™œÈ=œ zÉø 7Ü0s÷ÃŽ^2>oòÍsøÑâ¨Íßýä;Ž÷Þ¿ãOàë„8yò$vvv7>†áüøÉdKKKØÞÞîØžN§{nëwì¤H¥R]%´+•Šc™9ì5GqOÌèF¾Ç±}ô’ñç?ÿù3w?Ìáè%ãó&ß<‡-ŽÚüÝO¾ãx?Ìáàù;þ÷4,ÇÚív;ꋘW666°½½¥¥%\¸p¡#A1é«««H&“NRóææ&‰DÏmýŽô™d¥æ~Ʊ‰‡‘ïqlŸ½d|ï‡9½d|åçp†8jów¿kŠãý0‡ƒçïøßÓ0°Â:a¼%Óƒò$Z­–còoïµmí“à°×Ç{bFç0ò=Ží“`’c’å{öè%ã³(<‡3ÄQ›¿ûmãý0‡ƒçïøßÓ °ÂÊ0 Ã0 Ã0 ÃÄÎae†a†a†ab +¬ Ã0 Ã0 Ã0L,a…•a†a†a†‰%¬°2 Ã0 Ã0 Ã0±„V†a†a†a&–°ÂÊ0 Ã0 Ã0 ÃÄVX†a†a†a˜X +Ã0 Ã0 Ã0 KXae†a†a†ab +¬ Ã0 Ã0 Ã0L,a…•a†a†a†‰%¬°2 Ã0 Ã0 Ã0±„V†a†a†a&–°ÂÊ0 Ã0 Ã0 ÃÄVX†a†a†a˜X +Ã0 Ã0 Ã0 KXa¶mÃ0Œ¨/ƒa&Ë8s”™„üó˜b¢€åŽ9*°¬G+¬1Ã4M¤Óé¨/ƒa&Ë8s”™„üó˜b¢À+wš¦Á²¬¨/‰a&‚ŽeyŸ>¬°2 Ã0 Ã0#£ë:/à™#Ëûôy\ÔÀ¸–I’ (JèvY–¡ª* Ã@.—†á„)d2™Žã‹Å" …4MƒmÛP™L¦ãó …ÂÈû3Ì ô’qÛ¶™“$ ¹\’$AÓ4Gæ ]×aÛ6r¹Ë733„Éÿad¿T*¡\.>3lÛF¹\îz&0Ìa ’ezO×u€ªª]2JïñœÍÌ aóö ò®(Jàܰ\ {X#fqqÅb€P>³ÙlÇöb±ˆr¹ @,X²Ù¬3HÊå²³¿mÛX\\„¦iα´Ý¶mضl6‹t: Ó4íôÙ£ìÏ0ƒÐKÆmÛÆÂ‚ctÑuÝ »±m»KÞ#¯,ßL„Éÿae?ŸÏZùmÛF:vB 3.ú­W¼É(ÏÙ̬0Œ¬òþéO:tn§}Y®G ÍDFµZmK’ÔÞÛÛsÞËd2múYöööÚ:¶Ë²ÜVUÕÙ¶»»Ûu>@»Z­:+ŠÒÎårÎß…B¡­ªêÈû3L?úÉx½^o{§¡ýý}G¾ýò¿»»ÛÐÞßßo·Û,ßLüé%ÿ‡•ýB¡àKçÚßßï’k†ýærUUÛõzÝÙæ—Qzçl&îô“õv»·¼÷šÛi_–ëáak„X–UU!˲ó…úÂ*ãßžÉd‹,ËNå2ïßd¥Ðq¬$INHBÃîÏ0½è'ãô~>Ÿ‡a$ õz²,C–e(ŠâDèºÞVã=`ùfâG/ù?¬ì{Ã… ²ÒÓs‚aÆE¿¹<ˆ å9›‰;£È:àÊ{¯¹`¹VX#„òRðm»ç±”§ä}= &*úɸ,˨×ë€l6‹cÇŽ!ŸÏ;Û3™LǢ囙%zÉÿ$d_Q …Žó0Ì8è7—3̼pXYï7·3£Á k„ȲÜá :•TI’B•VEQ« ½jµ …ç-1±¡ŸŒSjµŠýý}ìîîB×u';“ÉÀ4M”Ëeض͞#f¦è%ÿ“ýjµêëà(fœô›Ëf^8¬¬÷›Û™Ñ`…5B2™ Ãp¬èä1õn7MÓIܦВ€=ðþ ¸Iß úÉ8£‡,Ë]!¿™Låry †‰½äR²/I’S­’Û.0ã¢ß\Nï1̬3ˆ¬ÓûAô›Û™Ñ`…5B(|+›Íbqq a_²,£Z­"N#NcqqÑÙN‹’b±ˆÅÅE§Bp­V‹ú¶Æ¡ŸŒS^ÞÂÂÒé4œ…:AÆö®2³F/ùŸ¤ìg2d26`2c£ß\.IŠÅ"{‘˜™§Ÿ¬½å}¹žcív»õEu,Ë íÑgš&dYvbê©O)¦Þ"K&Ì0q#LÆ Š"Ú®iÊå2ööö¢¾ †‰^òϲÏ̃¬W8ו™zÍÛƒÈ{¯¹VXcŒeYXXXÀîî.EeYH§Ó( ÉÌ=N³¸¸ˆ\.Ç´™#Ë>Ã0 øpHpŒñ†;v ‹‹‹Èd2¬¬2GÓ4qüøqȲÌ2Ï)Xö†aÆ…=¬ Ã0 Ã0 Ã0L,y\Ô0.šÍ&>ùÉOâꫯŽúRÆÆ}÷݇믿>êË<òyä‘XüF<òó˜ÇàùÏ~Ô—20ïÿûqÕUWE}c#Nò0.â4fï»ï¾™*òÀsxü‰Ó˜}ä‘GH$f&7Œçïø§ñzß}÷á…/|!‰DÔ—2<ÇŸ8ÙQæï¹QXÿñÿº®ÏÌÃkî¾ûnÜ|óÍQ_ÆØxàð¥/})¿Ñ<€‹/ΔÂúçþç3u½ýˆ“<Œ‹8Ù»ï¾{¦VžÃãOœÆì<±¸–Aàù;þÄi¼Þ}÷Ýø‰Ÿø‰™QXyþŽ?q³£Ìßs£°^uÕUPkkkQ_ÊX™§ûi4h4X^^ŽúR°³³ƒsçÎE}Cñä'?™åaˆËoÔh4¢¾„¡à9<þÄiÌÎÚÎó÷l—ߨÑh ™LF}Ãówü‰Ó˜eþž…u™§©T ©T*êË`bÂ<ÊüYæpÌ›<Ìã˜eFceaÞÆ+s8æMf}Ìr•`†a†a†a&–°ÂÊ0 Ã0 Ã0 ÃÄVX†a†a†a˜X +Ã0 Ã0 Ã0 KXae†a†a†ab +¬ Ã0 Ã0 Ã0L,a…•a†a†a†‰%¬°2 Ã0 Ã0 Ã0±„V†a†a†a&–°ÂÊ0 Ã0 Ã0 ÃÄVX†a†a†a˜X +Ã0 Ã0 Ã0 KXae†a†a†abI,ÖF£ú~³ÙŒúòæP°|3óË83¯°l3óË83 D®°V*T*•Ž÷šÍ&N:…S§Ná–[nA¡Pˆú2f$X¾™y‡eœ™WX¶™y‡eœ™"UXwvv°µµÕõþÆÆ’É$LÓÄÙ³gÑh4ºÃÄ–ofÞagæ–mfÞagf‰ÈÖV«…b±UU;Þo6›ØÙÙÁÊÊ ‘H`yyõz=Ò/Ša†å›™wXÆ™y…e›™wXÆ™Y#2…uccËËË8qâDÇû.\¤R)ç½T*ÅqôÌLÁòÍÌ;,ã̼²ÍÌ;,ã̬1’ÂZ©TpêÔ)(Š‚J¥‚ííí¡Â¶··Ñl6±¶¶Öµ­× hµZQ_ Ó–ofÞagæ–mfÞagf‘¡VRP—––L&ˆíím¬®®ö=¾Ùlbkk ëëëÛ{ ˆ‹/†n»ï¾ûp÷Ýwt ÌÑf{{§OŸ†išc?÷¤äî¿ÿ~¬®®b{{{º_3s¬®®âüùó97ÏáLÔT*œ>}÷Þ{ïXÏËó7hþ«Þ{xþf¢†ÖàÃÎßfgôÍÍM,--9ƒIUU$“Iœ:u F£#” èB‰ Àah4¸pá*• –——{K r×_=n¾ùæ@‹ÃxY^^F2™Ä¹sçÆ~îIÉ7\wÝuØÜÜŒúëcf€ÍÍ͉-xg¢fmm 'OžûÎó7hþî%o£Âó75£®Á‡RX)¶}ii©k[*•B*•ê2pòäÉžÛ)ž¾Ùl:ƒÃû†‰3,ß̼Ã2ÎÌ+,Û̼Ã2ÎÌ*C)¬‰D€ ÿ­Vk ð…¥¥¥…—r_½V™¥¥%loo;ï†t:õwÅ0}aùfæ–qf^aÙfæ–qfVJa%/êêêjGü{³ÙÄéÓ§‘L&½¯Ã²¶¶†ÕÕUììì8Êñ­·ÞõwÅ0cå›™wXÆ™y…e›™wXÆ™82” ˆØúb±ˆS§Np§R)”Ëå¡/ (Þ=•JáÎ;ït<¶ãP‚& X¾™y‡eœ™WX¶™y‡eœ™†VX‰677Ñh4œ|ÕD"1öäðD"Áƒ„™[X¾™y‡eœ™WX¶™y‡eœ‰C)¬;;;X]]ÅÙ³g'R½Œa†a†a†aˆ¡ú°¦R)§6Ã0 Ã0 Ã0 ÃL’¡«¯­­¡R© ÙlîÃ=˜˜Y Ù¼$êK`†a†a¦Cç°nll¶¶¶·³ÂÊLÛ/Yaº˜& IÀµ×>?ú£Q_=Ã0 Ã0 Ã0½Za5M3êkfm–(JÔW2†!^¦)îC–Å«ZÛËeqoªêþ­ëBI-ÜývvîǹsQß Ã0 Ã0 Ã0½ZaefÛ²Yño&#”:]Û2™¨¯.üšÓi`wWüN åTQ€\.Ø«jÛB¡%…UUÅýõóÀ2 Ã0³eüàq|ë[WF}) Ã0ÌIaÝÞÞ†aØÙÙ z4©ªŠååå¨ï‡ @×b(•:•SEJ,)qC×]ÅêõþÇ”JÇñ¾†aŽ*¦)æeÃ0™Œ0@=Ëd¸öÚËÙÉ0 sDZa­T*ØÚÚÂòò2VVV­V h4X__úž¦)õz·—Q–]ï帱m±° Å‰¢ˆ÷LS„æ¢Hºˆ…Ç]¼ˆ'ßÿä>€a†A:íwçUÏ rYìã…”XË4Í RñÌ’$Jëx(êÛc†a¦ÀP k³ÙÄÖÖÊå2T¯ë €ªªX]]ÅÊÊ ’ÉdÔ÷Å`áá³^4mx±\ç )¶m±¨ØÝuóN)””W@¼GŸ_.‹kÈåÄþçÕu ŸýnÔø·+®ÀCW]5¹/ža†¥X‚_¯»ƒab‚iŠiØ0D-€AŒŽšÖùìÙßïÜî­?„$‰}¨î{T†aŽ&Cõa½pát)«€ N¥RÎ>L<ÈdËQ%¥ÒÿÞâ¢XtøÉçÅÆ{Œ¦‰57 %UlTU¡c:žUÓtB»è8MëtÛÜüZbÇrÙu/.ŠŒ˲†* öèe—Mù×`æ Ó®¡q¢£”Ë ù¦sZ– dáa˜)¢iBÄ]e­6xª…¦‰yT$I ‰¸ÖY`†a†Ã4Gk-9Rk«ÕB"‘èz/¬7+ƒZ¤K%±0ÙÛ‹ U¯ZÍ݇ªòæóâoï6Àµ€[–Ù0ĉ¼1Ç'lÈ) ŠOý Øöðñ?ˆg=«Œtú.(Š‚BAF[¥ªîyJ%|ãÝï†ùOÿ„ç{n®²²‚ŸýË¿ÄeO~2>¶¿k®¹W=ýéTÕ­0U(°‰ž¦)À ÷ß>! ¦iB–eHAÞSp€›\àÛ×]‡Ï<á ¸ócÃ7Ÿð€$IH;†g>ðžðæ7w]“½» Ã0ðÙÏ~6êo‰™aÊe1çW«ÝÓ¦e‰©Üû ðW §¨žr†afËóûajÂPKIYÿÿÓ?}^ô¢êC)¬KKKH&“(‹X__wB›Í&NŸ>íìÃÄrÎ bá–e±^ÎfÅqdÑö.6Þÿþ/ãõ¯¿×\s¯xÅŸ¢XE$I°m†aÀ0 œ~ôQ|iF.‡g꺳@ÿò[Þ‚ýÝ]œyÊSð³gñ‹Çíw߀“Iär9lm½ ¦iÂ0 ”Ëeüâ{ß‹/?ïy8Å"LÓÊðÁ¢ÿ¿ü¿ÿ/dY†mÛPUßûå/㺃kÑu–®ãø߈¾þz<|Í5HXpà…­×ë¸þúë£þ‰˜‚äê“ögøíóçñÿýÇÿˆ”,ãñÿëáù¿û»ØßÛÃaš&lÛÆ-—]†ä-·àÜç>í LÆ /¦iB’$Èqö¶mÃ4M¨?öceY¼°ß{x_úë¿Æµo;^þÝïâ‡ø‡ñ¿>ô!dHÙ,Eé2*2G Mó?å¦ÓbÁQ(¸¹¡”2R(ˆ EÓ ½Ÿ´èð“Ï‹ã2™ÁŸ= ü‘núÐ+£¾æˆA©|~èyašBOÈå†S^)¢’ œ¹ðÃ?|~èC{XËå2 …n¹å–Ž÷“É$677'ÿ2c½# Ãpתª¢PP‘ÏÿëÝlö¿À4M¼öiOéGÁç ¼é«_ůþêžùÌkñ½ß«v…પ JG²K%ˆ ’'²,d2”dçWVpÉc Ã0`YÞü½ß‹kÿîï8vÌQF[¿ÿûHœ9ƒg<ãøØ£â’G'Êdð‰cÇðÍk¯Å7¯½Ö9¿7ÕÂ4M(Š‚j@‚Ÿ³ß›ß h,M¾ñ  ZÅ®ÇË»ººõ×ÏDˆmwz@k5á5]X‹LFxOéA‹•AÃo÷ö:ÿ–$W9¶,7וaæŠ2óbš80Þ»å5Ä~…Bg§ƒ0l[{öööñŸÿó·qã×àÂ…»üdÔ·Ì C8¬êuWM§Åî ÛÚz¹Ü£¸xñqøÒ—.Ň?ü~äG®ÄÛßþ1\{í7 Ã ïMù3 Ùl²,㡇ÂOýÔO uC+¬©T o}ë[ññ>ø ‘H@UÕ™,¶Dy—Tµ0—›íz'¶m£\.CÓ4¼ð…·á…/< Ëú!˜¦é¼ÈKI/EQ ë: #L&ƒÿò_t”J%d2|õ÷~Ÿüâ¡nocïoþ¦·Y…b…«UH2™L‡‚Ð…¢ qÐ)ˆ'>íiBñì<@¼™ªª]y×;;;8wîÜÔf<*˲óÛ¦iï¿´]–e躎r¹ I’`Y^þÄ'¢`Y¸qm ?´µô!h=‹ÄæfפpòàÕU 2 Æ$9¦Ê›LS¬nBÆ“:È*蜰|cmoï¦~&j¨2»ß»)IBäü5ëH|dyôÊìå²wg %f˜YŒ„aX–Û^’€ÏþëøÃ?üCüÅ_ü! … ž·»kãMoú^ûÚ§¢Õjá†þ ŸÿüŸ€c”WUÕyV>ý‹¸üò·`yù«øð‡m¼éM&®¼òJ¿õ×Â̦é†ìÒ2ãôéE­ÖÆóž÷e¼õ­W§"y°,1‡¿üåßÅ+_ùnœ?9®ºê¤+¡iºãà’e‹‹²-öú׋z2ŸúÔ¯A’®ÁË^ö”ËeضG9…G½†q ¥R–eaww’$´ZaÝÞÞÆÆÆ–––êêê*NŸ>õõõ™ëÅêM³¤ã(ÒIáUãR€u]‡eY“" &YÝS¡H×uÜþêWc¿P€©ªÐuïxÇ[œI²P(t îA•£L.‡/üõ_ã¯î¾Û(¸ú o€ ˆÅp±Ü •ú×ô+ùÈDN¹<[azš¦¡X,"“É@Q',—,x>À §•$ Š¢¸ùwšÉˆÕº®‹Á)I€¦áxÐ"å ;‘ɈŠ3~wÒ øOYK•ëUòje2âÖ.¹ä‘Iÿ<Œêu Ku©4ÝüMêkJí_¦A:-<·þâJÜÿš‰”ƒ˜Ž!S–e|êS7âž{þõz²,w¤yáwÞù*<ÿùÿªúOC¹ü d³Àóž÷tìíí¡X,baa™LÆQBUUÅߨàéO¿üÇe?^F©H’Ã0ðÞ÷~ Ï|¦¸ŽBá ¤ çólÛîk¬gކá†ã*Š˜oýׯ ^ð{øêWÏ¡Ù|1¾ùÍk‘L¾—^ú%|ò“'ñ¤'= ÙìC8sf ?ó3ëøêW?à¤ÐQä£×záÂ/àÉOþ'|ûÛ»h6Uœ<ùlû,²YÓÑ”>»ˆ@(9åj>ò‘GðÜç®C–ߌB¡€zþ0C)¬­V •J+++X[[sÞßÜÜÄÖÖ–£ÈêiÝÙÙÁ‰'B÷o4H$õÜz[¾”JB,KL*ƒxïüضX4躮ÅEqÞQªÒâœÚÙ¶t:­µ5H–…ß~ðAÇ*Hp¡P€´¸;ñ}0Ï=¥wõY§Óâ- 7¼üå(ˆµ{?U7Ô˦PpGãùöâ ]Šú§¢I“î4ŠÐt±òÕuš¦aooÏÙW’$Ôëõ¾ÖÂ(1ϲ:KoÒ4òy·j 4|š—W!‘¤ÎöOþjÞ'NœŸøuÅMÆ£‚„ªºSkÐÜoYn Uus„($—DÓ0Üßšì‚ôÌ/Å”,Ëb»m‚8.dY ™y¬â˲=} «‚,Ër<<œÈ0øú×ã _x:~ù—Nd ¥‚|æ3ßľð \¼øøÎw>@ÁýЮ»®Œóç/þ?øÄ'^ÛñLñ^Çç>—Àe—%ñïÿýÓ`š:¾ûÝed³2Eš`Δ€IDAT»ß-Ž©V«N[Ðó©Zuço±®Ìá=ï^ò’àñ*Ižô¤'MüûfŸ>^…Ñÿ+ ûÅ?ÿó·ñ•¯¼—^ú^¼éM)ض‚'<á=xÑ‹>…åågCUÙ#o¯è:þ%/ÊåMôfüÜÏ}Ïþ3qÛm¿6ò½y—TÂ?p$éMØÚzÓX¾»¡ÖF£V«Õ¡¬+++ØÞÞÆ… ú ÷öö6*•Š“C¸´´„R©ä i6›( h4Ä"–¼(ãÆoÄ¢¿Óé|Çâ™8Hµ µ g³ÍÍ-K¬}e¹ó˲ išcÑóaÑu]\QUÔëuGyÖuï½õV<ç›ßĽî:Üðt/¼wwa~èñ°þó;‚“.ˆbу¦›0¥’ÐÀý‰N´êbÄK¾½rsØ)$³ýÐ4Íñ†f2H’ä(¡$ó^C äóyd³Yär9‹EÔj5H’ÖÅÅΆ¦)h&Ó)Ãx4e¹û¼„ \²ì>߃ø½È±ëVàvo“Þ§mãvÜö"®2ŠÒ\’˹Eì¨+‘e¹6úi¥)”:%‘’jšâ çõBá½¶½PU]ÛÎ81 š¦9–}¢X,:óÇcó˜¡s e{|Ð:‡"düP ú7¨áõðÐBÿþwÝõt¼ô¥ƒ®ëxík_Ç<æ!Ï3'ƒÿôŸh‰óýc嚃ñõ|b|úƒÎdYu–Mª Üvˆ§B!8RÉ[#ï#ÓSb#XÆÇ S,ËêP:½òàOÇ#9÷T KËø¼óYŠòBHÒÏ tmªêæ‘f2b¾ö.û‰w¾ó†±uÏ›”J0R[›0.^¼ØwòÒ®­­ayy­V «««¨T*X__lll ™LâŽ;î@«Õ©S§œcÆ…ÏM¦i¢Ñ¸çÎ=©cRÊçÅdFÖôz½[|ã¿€npãÅ%Ɇ¢”‘ÍŠ°]o(c¡P@&“ ìVp%“Éàëü ¾ùœçàÚ'<ÁíEãE’`ÜÈ+Ï+¦jUH«÷|ÔHÕÛv†VÍaPUYNò0q‘ï ¨ʨaŠ–e!›Í¶mÔjµŽÉ¸X,v(²º®<ÜEÞå4ø 0~jµª…žñ¢aç¥/ÅÕ$k"ì¡SöªU!ßµ`š°ÿÇÃüÙßügg!«ª3ù%®TâtJ1dÌ:(`íLò¹\çpSˆeœe<*ü¢ ˆßuw·ÓKJx‹XÐñô^¡à>cɰ÷âÀ¶m§8]/„¢QÆe—=ˆbñ)]EÍüj{VEÞÐKz6Ò>¹\º®#N£P( X,BUUìïï;ç¦ïö °l]×Q,‘ËåP.—<-ê)uC’¤Â ½ã§ Ë>ó@’T*ÊeW™¤ÂbÇuï>šæ©[BPÎ÷4ƒã†e¼7¦i¢\.wÔ¿ 4/Òúž<ñd|§ùï}ïKâÔ©G:æ:o:žm»ÝðÂænÒ5¼7Àõ-<Ðs„ê„Kâ¾´©­ÍÆÆÖÖÖk @eêyi)×5‘H`iiɱä4›MìììàŽ;îp¶///c{{{¬ƒ…r}‚Ð4 ¿þë ÞúV oyËûñ ¿ð¢ÐÞ£P(ìãî»ÿ­ÖÿãĈSR=…óîîîÁ0YX¯2JÖÀAy⟈'^{­ôãLÈUSÀ%ª›˜ëÅ = ÜT¯'q‘o?äá‘åΉpÈ^.—Q*• Ë2²Ù¬£´æ‡7çÆŸ+=L>Nþë_Þò–ƒ?ò®ûˆV! ……<úYÈ?~L¼ÊOüg(—<êk^ n¯HoõƾHÒÔ´CªâtmqRPýÄUÆ£bq±ÓîçgeÒ¯ÐN Ã0ºªSÓxÜqKÞ@( ^£kµš;(Úg8ùæ4þ©7I+ ]ŸG¡pÞ´úÜL&ã¤Å—\vÙecý>X¶Ã¡ßÒÛY sÐ~‹~’ ò€z“ô^//ªiº @¸ÁF×Åüî_Öôkdž® "—s¹0o°ŒCKu]ÜÚ1ÞNŠ C‹mK=JɱE©^¨ÎÁ ÕØÓéñyáé:è¹BÁ•ÞgŒ·Ã4xêÛߎ§?2\‘ÛÚloo;Ê)Źonnöíý·´´Ôe½p᮸â çÿ@§â›J¥Ð÷cPÕ;qù凪> [[âAF}äYlïþÔßý©ŽxJý-_†ñÎ@×Nô‡¬å­~nHÕ%žx~E ®¨A\¯¯q•ñi`šâw£t)Ú7z\“ÈïÎårŠm'%Šª®ëeù|¾#d_<ûX–]×aÛ62™LG~ mÛFWªO‹¾ÅÅEn®¡_Q%ñªV›7 |õ«ï¼á vwwâa´@wÓ3ž‚g>óGñOÿôx<å)?ä|Žˆ"¾Ër#ÔÐßð(ÚwbWþ%@×±PÌ@Mý2¿øT”~IÈ2¥“zs;‡mZ ¸‘ÃÓÀ-®¾È™õ€…8Éø$ k9Uü-—ÅÿIIi4þb4¤4Rޏh5°èD=Pî UD¥}È; ¢~voÕLðŒñæ–{é¥dJJŠ‚’رÓ@×~BO®-zùn±>·±û'ØK{ÞeÛ…êÚ¶ MÓ IªÕjhhø0Qa¶-yDöö„ ×jBØ3ñå‘ð÷ú"÷÷Å>Š"&£R)xÐîî¶OŸ>Ý~å+_Ùþ…_ø…‰ÝJœæïI±¿¿ß. mY–Ûªª¶UUmçr¹v&´ñþtûûâ'%q,ÄüH¢°»+¶Q­ºÇѹf’wzɲû hÁW«u> ëõvãĉö_^ýÄÖÞÄ<Ïßõz½]¯×ÛÕjµ](Ú¹\®-Ër»P(´wñ.†Xµ…üÒ4­hLcÚÞÛhùÎþ¾¸èRÉнgõz‡üÓ|Øù{ …õõ¯}ûU¯zUûôéÓ@õzÝ 40¼½ççž{îq\ÞI2ˆÝÝÝv&“ 8ÎÌeY>Ôd^¯w>ƒIQ»Xš^ {R&ÕÛ÷ÎÑO± RržÏ÷Ÿ£t°=ÓŽ”Aäe\ŒC¾ÛíöÈ‚Ý]1a¶Ûív©Tj …vµZm+ŠÒ~ÝëÞÛÎå‚çR6ûB+ßl[­Š1Gs™#÷´aŠ«ZC𫉽½î[ÙÝm·%)ÚÖ4Ӛçw?ÝëËq±¿¿ï,¦Hé¬V«Ž¢©(Š£˜û¼¡sO…ýýþV RI „°‡œ÷\C®Ö¦%/QÏß“À«¨–J¥ÃÇa!=ÌkÄóÿ¬^±)ú¯sj5ñȈÂÓqS¤€ á„Q7¨¥”´ˆ †çïá©V«Îz=“É8óhµZx^¤)+l½0n¬×]#÷´¦îv»ÓHTß¼·]ÿùwºà ‹ûþ¾XHÒZ¯P_–_©Qä¥oHp2™tÊY†íím4 ¤R),-- ]±ªR© Ñh`ssÓybçàĉDXµÇñþT(§³WÄ…7ØË^/ŸÿPhŒ×Ý–‹`¼,ôÎ sï›y¥ ÂÃ})ŒW9Øßû• b¿ ‹Î°_ãàÚêžsxKeÛǧþóªdÕ|÷¶©×£ÈIËår°,레ɋC8:‰â† !ÜË!§Z•ö<û..ºÉ¥S ¢ùQûEZ–›NÕÅ龂Æ)z¢¯cwŽBãã(ããf”V-þ–Ô®À†fšfGIow*êAEÎlÛvÂv…Z@M ÿ2Œþ±™ƒTÁ¡\¨{}àhȶ¦i(—Ë]ùzãB–;Cƒ~~¯È ŠHiNS˜õö£¢Ißû ’{JTô~ƒÆvÊrçbp{{"·52nš¢ÞŒ,Ë=»ô‚êJâgÈfÝv”Þ6d㞢¨vªªŽ¹-Z‚ò -K¬ÓÓyÀ²`|Ï@ýIôn#Açñ—¼Ÿ"Cå°z{!†lmm ¥¼žxÜþ*¾þc¤€sû×0ÊÁq´¸SîùõzT3Šyôk”±•|÷‚¢ LÓtʵL¹,–Åb°æ&I±t'r":OêÑ3q”ñÃ`YbAB¿UP·0Û¶Q,jÚ¶m;EŽLÓ„¦iQƹh1M oYö÷Å¢<î C*dšnóÀÃ(—ä±¢/9Š*1O²íõêè(´5(ä¤Q1.híë÷¢Rõ왩zN7CF›ZÍô¯¿äë Ä\YæKÆmÛF:îèÆF±ØiŸÈçÅ4DA~Ùõg¥VG±EÓ\íZ–û÷’—$Ô=U-+új÷1ḻÿïßýÝßmŸ>}ºk[?.\¸Ð¾çž{B{衇zn6Z–{_¯×;Š+ Ú½óG ív[î³] 9Ç~»Ý~Ö¿äP´Eñ"?™¶È+Ýmwç‰î¶E)Qó}N.àœÞ÷2íîBJ{íð\X‚Š8ùWÚÁ9°S`Òùã–ïv{´¨zÝM§‘û ‚~T«îɼ•†f„a /M£0Ò¤™tÔ´çðIÒ+…¯V«9¹~^vww|©q掕½=77´T Ï%õ'f×ë_Êînp^Ó(Ôj£WOó0Iy‰Ëü=*ûûû#×ÛØÛs‹Ç©jxí+÷³Æ'c…d¾” «(ã+t0xþîO.—k˜Khéâ‡IaœUŠžM’‰ä°ƒZ(Ðl6k5 §0ò®R3â~$“Éž1ñÔÌx\ô³üQ¯±CAù™½(Ax1ðЙ#êEðƒ_nPÄvÞLÊm•=çò¢ 3Y>8§ó%Àõ¾UϹMt‡2ËŸcC„É›jûŽÏ ;„xN˜¶|‡a-dBò³‡‚ŠÂ’œÍFX4:aé”’—ˉíÅ¢xoXãúQ#.2> ÙJ½îH„)¿O’¤OT÷TQ”‰äŽ Ãn„R©¿«ßßtÒß úBà ŸÉL¿Ký̲lprT‡ ‘$ï) ~þ~} -K8v( %VË“ú Ñ>ä¢\€#¬Ë8µ–$jLÓºå9ÖSÂÛWpÌP×°\Î;d§B¡€T*åôiàä®ÞqǸ뮻°¶¶†J¥õý„ÒO8-ËLaí¥lJÊW¿¼²v~Á9®&€›öÅÿýÅŒ€îüTÿö Öôlëõ…e5,äR>¸ÿñ9¸ÊêâÁËÿ™Ç!¬™‘0M1ßÅ #IáR¹œÛCnšt]D„ysMg ‰9$”FI¿¹išX\\tz‘îîî¢Z­¢P(Dêëm¸=(Å¢xÕjãY„KY%FÑgúBE¾ C$ÆÙ¶[ ¯^¬Ï)mWÕ)Guëº[Í. 2Ô ºÐ÷6*gb)«A½S5­SóFÊÆ ªP, yö6lõî“Ív[,zþ¤ž¯@ ¿›ú*¬;;;h6›¹©­V FËËËNáååe´Z-ìììD}O]Õ°¦iö_ÐÛÕmÃÐ{¢ RØêõ`–CŽ'…•Šù·{×&äý¤kRüdϱa·Ÿ8·õàžz-òKèÎWõ~»póz©pTùà= Bg††òW Ã8¼‡ÕO¡0siï\Ï‹‡-м·5#NcfD¼Åk5MC6›E©TBµZu”SQá:Ú"é´ØA)úÁƒhww̽Ûㆮ÷žTRV¡ örkÊ[˜‡Õ€ëm4Ð]œÈ«4è•-@<]$©³X’¯ +p=§Vý©ØR˜Ð’ç8ì+êUh‰f@І؂x¦'†Aµ%”oÀ•Œaá¤q@s·|=3ŸäóÝž¢7¿ùüÝßýo¼êUßI’ ë:,˺ÍÄ(—Ý 7„$‰Ê TI–Ý8fZÐxñ†îÇU÷«8Òù@×uì1[VŒôİýC¨ò¯iŠybŸ;¨{&“ öv)—Åüoš1JÒ0T­ºJ.ÂÓ Ñô»ñB_…5‘H Õju¼Gå°ã¬¤zé×OoàpÉ 61^¨¥M®ÇvZG)µ^•rT½—•ƒð<ÒµúZÿM +ý?èÉS+!\é²á¡É SÁõzo‹žì~­'u¨p`Êe£ÙÞ²ÜÐÂ9 Vs½¬±yˆ1c‡Z(zu9Ë~ë·þ/{Ù.Ló¢oi=.‹Û|Þ ÿ êuG­_¼­ ¨ÅÌ0áq@U§Þ‹yÞ±m¿ò+‚t:j|ÉçÝbÐ4ÿE¶Xíå%E.d9Ø€ÃÌ4Ô¶†æjI ÿ})WëYŽÐöNÞS/£ÎyZ¬”Ën‡±Y˜Šû*¬©T FÃQR[­ Ãè*®D¡ÀÔt8Nô[«[–5Xß;ÝE‰¼Ûh…ys½ h?ý<±AÇ:Žž=_òjæzGJèü~óp¿ûA<½G UÒim¸Ey¡ B±r9±Â™óÛL"º‘‰^'#ÕRÉdöqüø±µõ¦¨/¯rùbòHÊåºcàfˆcÁ²,”Ëe†ÿñ?Àg>ó’Ð}IQ]Xp£Ë#Óï(š€ž/¶=ºsþé\bš&²Ù, …r¹ à óO–Õ™«Z«EXLȶŪZí\?Ålž6Í÷î›&}sXS©–––°ººŠJ¥‚ÕÕU´Z-'˜Øb±ˆ¥¥¥žUÇ¢`Üä=P9„+DA ¥Ÿ~ÞÙ¬o_ozmóŽ@ ®§v½Yy_‚[åw©Bxš©âñ¢ï^òß“=Ã÷8"¥ i"„f¨¢1Ô¬Œ [Ä=¼a<øÓ"5 øÙŸýþùŸïA­CY>¬ËŽ JÌà†|>t:íT¬¾ãŽ—áþᱡǷ‰Ò"ŠÅî:/SÁ¶Å3¦|ë£iƒaŽ ¤¬Öj5Ç¡¤ëbã… é‘Z¢ò»årDl0d98˜'® T%˜šò’—Æ[€éÌ™3NáAÊKOÓìWšÝróT©T Èå§Ò>z퀨~Ûk»ÿšiM’˹¦%¿6ˆôW¦‹¯ð; (ßOõàßŃ{ö†hè4Ì9¶ ¼ýíƒaƒK 1\9ã¸Yf†ÐõîÅL­üã?Þ‡Ÿÿù¿?|¥ìqcâÅãŒÓ4|¾½½=är¹ŽbÒÞ  „×ÖM û ¨Å©ÍŠ…å2S ÃÀ ®ÕjΜM2]*¢Äª+E ÔjBÖcÚ7†TŸ¸=èk"‘Àúúzà¶ååe,//Çγ:(º®»Å£ú,\€PŠ‚öó†{sG½Û½Bá÷Ôúä :cšË½’å=&,NŸ<µ½²€ùÉÍÁmù£À ÎÀUZeàÆ¿½_:ù¥¨¯vbüÖoý+ÞñŽ÷ãoþfˆ…0åμˆffŒ {¶má1y!Þüæ˜õLµmáUâqÆ ‰®ëÈår…/ C( $ÿÞ¼†Ñ™ßˆm‘­£ÉëT(ˆødÎ=eŸÏÇ2ú€XÌÌŠy›‰ †atÕ%ðʾ$‰ú.Å¢PTîÈñHÅΉ;¨x: tÌT >ÂþJצÙip!½·zlÈåÜ ŠqʆhyõU Î@ k¥RÁÖÖ–ó÷ÖÖVVV°¶¶õõ÷¥×¤l,Ðà:z÷Yíµæ9ŒLÚÞTÿùéœye+h@RÎf/çðbQGƒçâ£;ͨ/fr<þñ¿†ÿôŸ†üÑýV†™!‚B¨ÛØ{IŠ¥Õ‰7”Æä¯lšQµ²,øu[Q‹ñ‚ž™>Åb¹\®K¾ýu#½µçbÇŒD Œ±CÎT(‡ukk ËËË0M¦ibyy[[[]ínf ]×»2ƒ„Èøó\mt‡ûqû‹•áæ¹†Y8¨õR ¢_«³Ïvf® ªo¹ÙÃ+3%à NТ¼\.£À¹qÌÑ‘Æäª¤ú¡®G±zÉ3Œ˲`FלãºE3Ï,ù(ú*¬Ô®feeÅy<«F#êëïI¿Ç.…µˆp…µ WQ *¨¤ùOîÛî¾PÿS ¼(’vðònK§»ÏÓKàL¿*ùx 1Þü&Ó4ãé]-—ݤB†Z·H’ÜQ {¦l1­˜ÊDeY]u7%fLd ¬°Î"’>‰wyWÉÛY9>ºÀ°BL½æe¯‡5,ä7sð>Û4» ªŠËyLSXÙmÛÍÃÊ03Šw®÷¶D˜È80ÍÁ|çoÈ0C@ëMë¿™QXûõdŽ,Æ£0 ¥g±°ØcÂ-ÌŒ¹VX©Ø@] «¿íŒïv y§AŸ¡ÀUHƒ 9Éžc{…ò˜¾í<Ñ3!•Ý4ÍÑúMòBš™A¨;Œø¿t:B¡0™ž«¶-úêI’ø7›‹“~^#MûÖjÅÀŒ ­[tÝ-°dÛ3Vlšú0Œ¼–åv„™I8Rmb ÜÖfuuµë½J¥‚3gÎt¼·¹¹9ð‡Sl*• ÝžH$Fnc½…>ÐúN9£þã¼ÖLÿaE„÷A5’k¡Q$Ö!f–IË·šè-Ë­*jÜÂ'™Ø3m›ßDýûr“2Ëg³bœärâ_ªâ_ ”ËbáBîL3f™~ÄA¶½ØFÛ–!Ë®ízqQØAffÏÊjlˆ“ŒŸ=ûI|ç;Ïž©ªµpÁÄè«°ž8q¢£à&àƒÐh4P(Ðlо"ÉdårÙ9g³ÙD¡Pp“ªª({6ÄۣɋaÝʪ¡XRè¯ò÷Ÿ‡Ø0…Tòœ3ìA"è§#x·sŸÌ™`Zòí…”Uñk2Þ%†9 CU]=0¨?åØ(Å ó*ÃaãLU]E•šb23AœdÛ [òG•×jœÊ GeüÏÿü"žñŒ‡0m-Xa}C‚“É$ÖÖÖ~ Âêê*–––`š&Ξ=‹¥¥¥ŽâH&“ÎöF£J¥2ôÍ…)¬¡9~ D/T¿sŠO"7L7,”¸pp.H¯—a×0Ü's&˜–|{¡ž{ 3 ¢ñ0dYè„ä©àØ SýCQ„VQ*qäÂŒ'Ùöbš&~ìÇþL³SaUvZ2ÃGßÝ}"þÓº4ê¯æð°‡ub ú³³ƒJ¥â¼¨íÍ0Ç·Z-G¹M$XYYA³ÙD£Ñ@³ÙÄÎÎŽãÕM$X^^ÉjÖ»)0ǯWñ"­adtæ öëƒv^ Ý=]ûÁ!e±fšòíG’„lsÁ%f’D)ã~4Í­SÖŸrld2<ÿÎ9q’m?†aàºëžÏ¥˜CW¿æš_Ç _xuÔ_Ïxà5ØDXa5 ·Ür VWW±µµå¼VWWqË-· Ü“5•Jass³£5Î… ˆAÿ÷†§R)'taŠ!Ê eYÝ‹z‹j¢¿iÃí»j#\aM£w!§a›!+ /˜bÎ4åÛ Éz ls†éCT2„¦¹kÓ4¹ç*s(â$Û^h^W”9pØó³&Râ*ã÷ßÿOóalçÔ½‰1º³³ƒB¡€'N \.Ã4MçU.—qâÄ œ:uj ok"‘ÀÒÒ’ów³ÙD¥RÁòò2’ÉdÏAÑjµ†º¹0¹ Ìñ£ð|o»Bó½' BÂC~ ôVX3aÃ’ÉpHpÌ™¦|{¡Â3#W+§Í0>¢’q?4¿ÓÇ0 VX™CÙö£ë:ÿø_\UsXK=3Nâ(ãÙ,pÝuÿ.ê¯f<̃ÒSRX766 ª*677»Þ÷766þàV«…J¥‚[n¹KKKX__wÞãâÅ‹¡Ûî»ï>Ü}÷ÝÕŒ‡rF’ÂtŒð~®ÇþDý‹*1Sg{{§OŸ†9Áç¸åî¿ÿ~¬®®b{{»k寎ìa‹•C¬®®âüùóýŒiÌá½Ðu7— -*• NŸ>{ï½w"çŸöüÝ;ïü|ík?2ûVì}š¿\…¨ço²€O~²…g<ãɻשaYÜph >ìüÝ·JðÎΚÍ&ÞúÖ·öÜoee§NB£Ñè[A¸Ñh`cc‰DwÜqGWèA½Jk_ýõ¸ùæ›; ?Mî¦iåð†çûçÔ*ºÃ~I&{ºac,!Kâ¹sç&rþIÈ7\wÝu¡m£h­>ò­‚sÅæææÐ ‡a˜ÖÞ ]w 5º®OVaM§E%VŠcÁÚÚNž<9‘9<Šù»_úÒÛñ‰OüÒØïuêpáÈ ùû0Ý8z‡ù›eà—ù°¬Y·Æ »Š<Ȩkð¾ÖF£d2Ùw2&!$d P(8qôþÁqâÄ èKh6›C÷ 3äÙ¶¼°ñ¾å7AkyŽ dB˜†|{1 WÞíQ<¥†Á +3Ó–q?þpà©ä¯òBûHµlû1M?ú£OŸ}ï*â&ã–eq:Ó—¾ k"‘èê2 †a ÙlBUUìììt¼aÁYZZê›1 étz¨Ï 3äö`µÐ©€z &ÜK^ÊòpC‰§+ˆ=Ó’o/Ômäp`Ëâb^ÌÀD!ã~,«³•ÇȹÛÃ| 3÷ÄA¶ý¨êÓæg1Ïù«‘7Ïç¹»3}C‚S©Z­V_ »·òX“®F9…kkkX]]uÊo' Üzë­Cݘ$ у•VÕ÷` Z«(ïÁ:)x²=Ó’o/–%dÝ0F .—)´e`æƒ(dܪº ëØ½«¦ÙmíäÅÔ‘ ²Ýù™À±cŸ˜…Õ¶¹ilÄÄQÆm{Dc;s¤HaM¥R( ]¥° JÞ^ZZês¿¶¶Ö7Æ=•JáÎ;ït–·¢Ù (J°‡50ôÀEY‚ëeíUXç©2LK¾ RVa•†õ”jšX@ðÂiËx?£fC±(z‡¸ñÄ®‰q“íZm—]Ö€,óBƒq’qÞõ¬GÑlÎÉú#ŸªÕ¨¯bn¨Jðúú:šÍ&N:…íím'¶½Ùlb{{§NB³Ù:ÙºTz{ÜÅ í2Ñ™§ªÀUXýÛˆ ö7“¶…bÁÌ ã’oèûÐVËšƒÆ~L™Ôˆu1ö ÁŠÒ©¤Ú6ç¯2LR¶½Ü}÷¿ “™“Å<3SLCÆ ¸ä’OÏG…wËbãæ„éëaÜFÕJ%°uÍÒÒÖÖÖ&VÑlŠÅΰ1 OŽŸ÷mÊgUá†í¯c²hš«¨ªª¨RÉ0!˜¦‰Ì°%ÕY¦˜C„¹ª`È™ ª}‰9•‰ˆÏ|æZüíß~Ô—1>øyÃx°,à²Ë>> «·Ï3RXWim6›h4NûšT*5Ñêa£j‰·þ6!Ö0㦠·µÍa±íî .Åûµ‡k2¡ø zs1ñ3L¥sÝkªPš¦èX_*u.4Å­d&>€ósöì'qüøc!I×E}) 3ÄúåÍØÚÚ‹úR·Ï3VX jqã·h·Z-4‰‡È JP”chVž*…è­°¶Ý’m ÅÔ0€=߀eK$3™ŒkëyáÎ03†·ÍHFŠ^©ÕºÃ}%©Ó«Ê9ÞLœ9ó<ç9ñXO1̸±,àê«[ø¾ï›#»¿Ï3Êa„F£1ÑFõã ´«ßS*C(° €I¥÷Ù¶hF/ËÝÊ*à ˆ,» ëÀ…gL“ٙś¿:t;Ë^UձÔ]YvÇ+¬L|ä#oÆ»Þu2êË`˜‰ ËÀòò[磶ap8ð›Â7‚ZF…Zãƒ<¥&„—uœë ó%e5—ã‚7Ì¡ gs˜dÿ|žVffñŠú@ù«¶-µ²Y1着ð¬öª¨í/¼Ä0SD×õùXÈûñæ†3Gž¹‘sÃà´‘)0· kÐzܲ¬àà uIÀ°…ym[(ù¼X ù‘e`qÑUVs‡)fŽ:$f¡Ñ~Êe±çÉ•™A £Ó):‡•ª7ær"šey—V&BLÓÄwþAÔ—1^ ƒ»0¯{Ý¿"2,®Øvpïnfì Ã:+øÇ€mÛÁÆPNë¾÷­ƒ÷Õ)MS(ª™ŒøpÃïysQs9¡(˜&‡0cÄh +µGÚÝú²f$ü kÏÊïDXSî^d20ÛëßrY¬sªU7’Á08´žqxÇ;¾…×¼f çº.dšŒþ¶Í5g¦Ä\*¬¶Ýý^h8°‘«êG0ˆ 꺘˜u]LÎ$Äa‹YæI›ä,²m»h •]ï É01Ʋܩ5PYµmú[¯ò LÓõÎ2Ì”øÒ—>†~ôò¨/ct ¡´..б£ëbmÄ)PÌ'O®#“Y‰ú2Dzܴ¾jÕ}Ÿ×ôS£¯ÂÚh4P©Túž¨ÕjE}/¦Ù-?¡‚+¬Ÿý`ü9LªèëTú0E“2 -3eHüì +ªŠÊ03Š7ò*ÐY.7Üݶ9ß›™*¡Ñ`³F¡ Æ¢® JçÏ¿ŠÒ_·ˆnêæQ(päM„Œ-‡5‘HĦ¥ Ð=7†ö` Z‹Ø6pà @ëj·$e6VF=üæáÃ̦)æQ`€â3dÅa9efËêß@…uÜéÆÈL™÷¾÷³ø—ykÔ—N6+<§^#©iT¢¦É¬¬2h°¾þÅx÷Œ§Ý ⣠+«‘Ò×ÚJ¥°¹¹õu˲ ÊªÈYõÎytç¯Jp«¨€–«ªÿöþ>Ì‘ô¬ïG¿kXl¯½ÚÕzY¿hÖ,Û¬rbV͈€96¤Ú>±ñB:G ÁC¶sp¤³zù‘\±ôc~óÒ‰\¼ŒH 'ô%n¼û;vÀ]aÀ¼êta‡kl3e`[»ØìNy4Þõ[Ö:<}—J¥Ò{IU¥þ~®KWw«¤ÒSêûyê~{î›ia$TXVׇ2²k­ÆÅ–Dšd²7ÁÅ4MdÝ2í¶jgA*ºÓ`% ä?þÇ6î»ïæ ‡1˜BA)ô««J‰—VQÌÞ!cP.?ò#ÁsÏ…x]Õ4¦ !##¬F£ÏÒn·Ñh4‚û@4­´išÀ*wñÞAz~*U¸Zí> )ñQìxœ+‰<##¬~—†¡=Œ‘ò'òuø®ïúRpåü”-P¹œ2ZÅX sÄŒ„‚ZMéçW®|(˜k½®döŽ;Töd­¦ÒÔX½:L•|éÒ%lnn=ö±1MoºóMý xï_uCO !º®téíšœ°Š‰8ëëÝß=÷ùù¾+ë>q² T±wß÷}ß2ïéGÚò­®ª¬²|¾[í× é#_*qޱ¨Õ”ÈŒUÝÝO CÉ´®+y½vMɯi*%ŠÙ“‘`)«»1Mßòòo¾ ½mjL2g¤Ú/{!Òé!û³ YL³wËœ®ëý2/ʇŸ°ª)Y õ:‹˜““<ŸWJûÑQï\)Õ‡ Ýl2щ†Í)fí I&ÒYÜYÓ´;#r* V]×ñ=ŸÿeœšP{XãèFX¥÷`2V…`B"L<Þ»+C×õþ"c¦é6 ˜dìï·ñÚ×~b>'¯Õ”×çêÕÞçËeõÓmIJ1ñ‘yøûnšÖ+»4L—ߪO˰½°Íf­VkæÏ°, _ý•_­ Ö)fgØ¿ „üèˆÅ5ˆïÌ[¾“É!=† Y‹XÃãñÞåY×õþ‚K¬‚M|f²-¨za&î½÷¥þ_H½® V¯Z’Ö˽ڧ’Eɸe)Ò3;ÆêuU½:™¤º¤j°¶Z-Ͻ°­V çÏŸÇùóçqÿý÷£0cZ–aH|!¡²lÒP†ªe†ÄžbA%2æ-ߥRW?¸‡µ\fA27µ†Kû&`Èþ'öÂ&>²(Ù’I൯ýõáíɦA ËìíÑ(%=,RÆ¥V×X57&Á²TƒZMU­.xXRÆ6X777í‡xdœÏÉcZ­t]8 ¶··‘H$`._¾Œf³‰JeÆÃRøN ÖÕU dÒX%¾³hù6 cpà·‘øÎ"eܲz[<Öëuÿ•zBND?Zª}Í–‘"Jõ:3ÈHAȸŸÙ3~ CEUÓie¬RÆ—š‘ë™3g°±±T*e?2™Lßsò‡F£ýý}Ïc­V F€X,†µµ5Œun7¶2/ÛS“þâˬ FæÆ¢ä{uUýô¬˜*Ð`%s`‘k¸®÷nM5 ƒ+™‹”m'Å"ðñ´Ñ')#pÀÈ*é!Ïç•Ñ:ÔÁ> ΖJ,Žw*Yt)‘H`kkË×][[ÃÚÚF_TöøøzŒßT*5u½>æÔÙ_uøTEíg¥ÍJ|fQò-kþÀ¢KjSTÐ_YB¹†»}.¬ŠMæÉ"eÛÉW}Õ3xÕ«^2Ý›% Á¹¯›E“È‚ñtº[!؃Õ'‰]r3lR´Ûí‰ÏgÞüUoîZ-Ûÿ5¼›Ñ8Ð`­×™öxЍT*¸xñ">þñ/ôsý^¿ ]Ë!Æj.Ç-NK„¬ßÍfsáŸí÷úíd*ÔúžÏ/ü» óAtðI×ï±û°6›MìîîÚ{Wyg5†íƒM$½ò•¯ÄßøÆ¾ôe˲ð²ÜËÔÞU! «§˜µµ5$ .ü³§•o¸ûî»±³³@­ß¦©¨÷<é:÷vœRvvvÆ.‚ç7~®á–ÕßÎÆs/ÓßO[[[8wîÜÂ×p¿Öo7å2ðž÷¼ ¿ó;ß>þ` Cíá+h¬.²~[ÆOüÖÁ¥ŠèºjÛ4ñíbQ)úhЦR)¬­­aggçÎÃ¥K—fP"‘@&“éK{\•’¨’N§ºëIgS?BˆŸò-ë·§’cšTdH ø)ãñxwöÀt`©†JÈœñ[?þà>‡×¼æÆx/N§UTõêUÖ( ¾3/O&'¨\,ªàRI=¸¾Œ‘Üh4Ðn·ñÐC }ÝC=„ûï¿ßõÎÂÖÖ–ÝïµÝn#‹á˜ø<º®ãÜçT5`)–gY,ÎAÅù–ú†a çe˜&“4XI`øµ†×jjv29Ä`uç 2Gü’m'ñ_ÆÆÆ×~!«þ’à·Œ†ò±Œ¬lÊPÕ4U%ÄÁHƒµÙl"‘Hô¡’,Èñããã± ÖL&ã¹ç5•Já‘G±7œg2™©.β,|mçk{÷«•²æ)ß²uhVBæÌ¼×ðz½ëwèœa¯a2æ-Û‚e/|á“lÕD΢dÜ0Ó4F˸e©öœ ă‘k,ó¬ 6¨x€_Äb1&‰·}åÛg6e1½€ŽòéÁJHÀø!ã΀ÒPï<×t²@ümÁ0€/}écÞʼi*… èaâ#•BŽHIr9¯~õjÝ^_öö¨£¥!—Òéºwáryº“’SOèÛÚ4›MÄb1$‰ñßdÄ·¨î=|B†2•|(¹÷c—ËTxH(™VÆ™E@¢À4òmgäó¢Ý}„x2ËúÝc°Z3 ÈL„Ö`mµZ( h6›MÓP×3G·à!!d&ù>y}ºÎ ³È8Uvf•ïŸ{ÙËÔÌ !dV¥^¯wë–¬®*Yg-2%¡M ÞÞÞF"‘€a¸|ù2šÍ&*•ÊxoN`ïmbf’o7µšòÐ3ÂJBÄ,2n÷>‘P3‹|Ï«_ÄûÞÇÖ$´Ìº~ÛGÃPÆj.§„LI( ÖV«…F£ @,ÃÚÚA"K€ïò].ÓKOBŬ2žËf‘T;²ÕUõ({lBbVù^ûÄ'¸o•„?t”|ë[UÊ{>¯ZJÒX%3JƒõøøJ¥ìçR©Z­VÐC[(ûûûAÁWd<íø"ߦ©”ö³g#Û²iåaÙæì´Ì,ã–¥<óé´ŠBU«ªì¤óeÙäaçì4Ì,ß{{‘ß·ºŒ²°lóuZf•ï4€oùÕ_U2~t¤Öë²lòõ9Jƒuؤh·ÛAoaèºô|åøø‡‡‡A#p|‘ïZM)ìGGÊ{A–Q–mÎNËÌ2žL*¹gL2IƒuÙäaçì4PGYNYX¶ù:-3Ëw:­jjD<ªºlòõ9Ê¢KÃ&Ä7‹ÅúžÿÓ?ýSüÆoü~ë·~ ÷Þ{oЗà ?þ8677ƒ†o<óÌ3xæ™gìMüAñéOû·‹Ûn» [[[ ÿüiäLÓÄ·û·ãŽ;îÀ]wÝ´ÛÀ… ¿_„Eü$,söÊ•+øüç?Øçs W„Eü",söñÇG»ÝÆ=÷ÜÈçO#ß}ëwÄ ‹,øIX櫬ßïÿûñæ7¿yáŸÏõ[yð‹°ÌYÑÁ']¿Ci°:ÓÜ *­ýö·¿oûÛƒ:!#™F¾à·û·ƒ:!cÁ5œ,3ÓÈ7×o¸~“0Ê”à3gÎèMKhµZ÷"$ŒP¾É²C'Ë å›,3”oFBi°& d2™ž Ϻ®cuu5è¡23”o²ìPÆÉ2Cù&Ë å›„‘›:N'èAxÑl6±¹¹‰D"v»X,†ûû‰”o²ìPÆÉ2Cù&Ë å›„Ð¬€Úø-›ƒ3™LÐÃ!ÄW(ßdÙ¡Œ“e†òM–Ê7 ¡6X !„B!„œ^B¹‡•B!„BùŠw¾óï zËN£ÑÀM7Ý40÷¿Ùlâ‹_ü¢çñaÇÆ9>Úí6LÓÄWõWO5¦0^™žYäÛãó`˜ŒGñzÈl “ñ(Ê×p"œ¶õ{Ô˜Âx=d6¸~‡ÿšÆ¢CæÆ{ÞóžÎÞð†Î}÷Ý×¹ï¾û:>ø`çúõëöñãããÎÛÞö6ûø;ÞñޱŽs|\¿~½óŽw¼ÃþÌ·½ím~ô£¾9ˆk"Ó3‹|ûq| “ñ(^™a2EyàN„Ó¶~S¯‡Ì×ïð_Ó$0%xN´ÛmT*lmmÁ0 \¾|Ù~NØÞÞF"‘°7›Mûø°c㟕J­V —/_†aH$ØÝÝ{La¼&2³Ê·ÇçÁ0âõé%ãQ”®á8ë÷¨1…ñzÈôpýŽÆ5MDÐó²òØcuî»ï¾žç.^¼ØyðÁ;ŽòdÜwß}=Þ‘_ù•_é¼õ­ozlÔ{çÅõë×û>óøø¸sñâűÆÆk"Ó3‹|ûq| “ñ(^™a2EyàN„Ó¶~S¯‡Ì×ïð_Ó¤|eÐó²’Éd`FÏsÇÇǸõÖ[íß •JÙÇS©Z­ÖÐc£Þ;/¤´y*•B³ÙD»ÝF*•ÂÖÖÖXc ã5‘é™E¾ý8>†Éx£ÑˆÜõÙ&ãË&ß~Œ™2NÛú=jLa¼2\¿ÃM“BƒulnnÚÂððÃÀP!¸zõêÀcív{è{¥Á³ßܸq£ïZnܸ‘B=Θƒ¸&â“ÊwXåa˜ŒGñzˆ¸e\×õ¯ «

†Éø=÷ܹë!³1LÆ—M¾¹†Ÿ.NÛú=J¾Ãx=d6¸~‡ÿš&…ëœ8wî†Æ]×í~"‘@&“Áþþ~ÏñÕÕÕ¡ÇF½w^¤R©¾Ú•JÅöÌÌ:æ ®‰LÏ,òíÇñy0LÆ_ÿú×GîzÈl “ñe“o®á§‹Ó¶~’ï0^™ ®ßῦI¹©Óét‚IJ²½½ýý}d2÷lTNúææ&‰„½©ygg±Xlè±Qïò™â¥šäzü8NÂÅ,òíÇñy0LÆ£x=d6†Éxåk8NÛú=jLa¼2\¿ÃM“@ƒuÎ8K¦{í“h·Û¶È}|رqŽÏƒYÇÆk"Ó3‹|ûq|ÌsNR¾£Ç0¢

_<G¡P˜úõ„ŒË0ù€r¹ ˲r¹ 0M+++ö)‹ö±z½Žõõu{’”Ëe¬¯¯P ÊÊÊ jµš}^9nY,ËÂúúzOZ¹\F±Xœúõ„ŒÃ0ù–ã²—oL³Ë8å›,šyË~>ŸG½^Ç“O>i?oYVWWmEˆ¿%ÏNœò)¯ázM¢Ä´òþgög8{ö¬íl¬×ë=[6(ÛSÒ!rttÔйzõªýœ¦iB¡Ð¹zõjß±d2ÙÑ4Í>vttd«V«x¬Ò§ioo¯ç5@7Œ°á%ßãB'Q†²O–‰AòìÔWܽ† ‰*³È»Dgãñ8·1ù Öcš&Ξ=‹££#¤Ói˜¦‰ÕÕU z0 !„B!KS‚CŒ3%AÒ€s¹UB!„BÈ©€VB!„B!¡„U‚ !„B!„„’¥I ®×ëøÕ_ýUÜ}÷ÝAÅ7®\¹‚{ï½7èaøÆõë×Ñn·Cñ?º~ý:n¾ùfìîî=”±yë[ߊïÎ/Â$~¦9{åÊüîïþnÐîáá'LsöúõëˆÇãøÅ_üÅ ‡2\¿ÃO˜æë•+WðþÃ@*• z(cÁõ;ü„iÎN³~/ÁúÊW¾o|ã±µµôP|css;;;AÃ7Cñ?’±D‰»ï¾›òrÂ4g777ƒÂDp ?aš³Q[ù~‡Ÿ0Í×ÍÍÍÈ«×ï(¦9;Íú½4ë2²L2™ 2™LÐà !aåaÙæ,™e“‡eœ³d:–Q–m¾’ÙX6yˆúœåVB!„B!¡„+!„B!„PBƒ•B!„BH(¡ÁJ!„B!$”Ð`%„B!„Jh°B!„B %4X !„B!„„¬„B!„BB VB!„B!¡„+!„B!„PBƒ•B!„BH(¡ÁJ!„B!$”Ð`%„B!„Jh°B!„B %¬Íf­Vkêã„„Ê7Yv(ãdY¡l“e‡2N¢ÄWñ¡Íf…BÁž™L¥R ±X ÐjµP(Ðl6š¦¡\.ý]2”o²ìPÆÉ²BÙ&ËeœD‘@"¬…B«««0 —/_T*ûøöö6‰„}¼Ùlö'§‹|XYLSým@­‹Àú:°ºª^S,v_$”o²ìPÆÉ²BÙ&ËeœD‘@ ÖV«…sçÎb±R©”íéiµZh4ØØØ°¯­­áàà èïŠÌ‘b8{Ðuõ·e©ç Zöö€dRýmÊ0Õ4 PPDzY VS†¬œ¯VSFïw(#÷½ï½ O?}Û\®…òM–Ê8YV(ÛdÙ¡Œ“q1M¥G—ËJ'·¬àÆ2•ÁZ©Tpþüy¤ÓiT*ìïïOä}‰Åbvª#‘HØ¿@*•²;'‰bp®®vgÏ*cè¨òw.§ Ïb±û>1PÞßs9 TRk: Äãêw˪A×kökÔóò÷Ñpíš2r[­çã©§bs¹vÊ÷éÅ0 äóyÅÛ²¤PÆÉ²BÙ&ËeœŒËúº2XÅPõRmŠÅnpÈ g6¹a¨×omÝ‹}ìî‰Æ2ñV1P×ÖÖÐn·(á¯T*h6›ØÙÙyŽR©„b±ˆýý}ܸq·Þz+~øa:)Úí¶cïæ¯ÿú¯ñ|`ì1œt]yHr9%x–¥ 9@ D+÷öz#”ƒ0 ñxI§é¢\6ñ ¿ðr¼æ5¿‡ïýÞÇ‘ÍfÇaYÊå2VWkÈf³Ðõ2n¿ýö“qÖ`YŽŽ (UÄ4—=Ó4Q«Õ ë:4MƒeYXYYA©T‚&Öª‹¿ú«}<÷Ü{ðÙÏÞ à»|ÿÎç!ßðøãcssš¦ammÍ÷q/#"€JƒŠÇã”—Ëe$“Id³Y¤Ç~õzº®£^¯Ûï5Métš¦Á4M¬®®boo¯ç3ÝŸS«ÕP¯×étÉd9—àëŽ;a°.Nç59ÙÜÜÄ•+Wæö½r 'AòÎwþ*êõ8VV®ù~n®ß$hdýn6›=†£_pý>œ¨Èf§{¿aH@H‡eY0 ò'ÿ‚—½¬…?øƒ'ðÂ>‰vûÕøå_~;®]»Š{îù÷xÕ«^@éD_õU?‹¼àå°¬}À£¾ _þòÓxî¹Cܸñ2L¢ƒOd°¶Z-ìîîbgg™L¦gCv"‘ÀùóçGN°v»J¥‚sçÎamm 7nÜÀîî.*• .\¸`Á^ܸqcàdyå+_‰7¾ñØÚÚšî?³¤èz×-•z êõ:~í×>…ïû¾ÿ>úQåa³, ÅbÑVƉ¯Çg>óüéŸ~ét¦iâoþ欬ܗ¾ôƒøÀ¾õWoÂÙ³¿‰{îù ¼èEg°¾~þÎßy¹­¸§Ói†MÓpõêUÔj5hZÙlõzš¦!ceeÕjÕ~}­VëSÈMÓ´'=†A±XD±XD6›…¦i0 º®C×u <ôÐC8<<ôýûž—|ÀÝwßÍÁ†a”Ëå`š&VVVìß Ã@¡P°("S^ŽÓ4ÇÇa’ɤ-wÕjuà8jµVVVlYN&“0M…BÙlÅb¦ibooÏþ|]×±²²b;[t]G:î™ò·Ì-MÓ iZÏëvvv°¹¹9—ï—k8™”Z­·¾€dÖê¾”Nw½ôÕªúûÄÏÔã¸4M•móº×ý3<õÔã¸ûî'}'×odýž‡±Êõ{y°¬nÐɲTö z^é=ôÍøüç_ˆ÷½ï×ð²—)G„áX|Å~åʧð‡ø |ÅW´±±ñ"NNT¯—ašuÔëJ?WzÏ—¤ð¾÷i¸víkðÑÞbŸ¯\¾µZ¹\×H~ôÑ?C³ù>¤ÓJ§‘8Ò•+/Å3Ï<3ÑõNd°Jª@&“é;–J¥J¥† ;ìïï£ÝnÛÞ¶±»±±1t‚JÊñFq‰ÂÄãqÔë|Ó7ý týK” ×j†ýÚl6‹ù =dâ%/ù5ü‹q õzwßýk8:ª"™¾ç{þGG/Ç[Þò|d³J‘8sæ‹ØØø^üâ,ªU‰d¾ýäÑŠbˆÒ^«Õppp`?¯ë:Ö×רÉ$ÑY7ƒ¢§ò¾ƒƒ˜¦‰z½Žb±ˆt:B¡€jµŠr¹Œ ¼õ­oõý@ùö1>Å Sò[³Q§‘iYLÓ´#¦{{{=h¡P°£ªNC37N( r¹œõwFaËå2òùŸWÞ~Ùrë­Oàðð ¾^+e›X–5Ðy9LÓ„éXàåïÇ{l.ŸGn¹H&“=ºÎƒ^ÇÁÁíxÍkþàÓŸþ~é—žÆK^òÓ”.qñ"ðÉOžÅ»ßý~ê§>Œ¿øKÈår¶óÜ4M¼ë]ÏáãÿV|ÿ÷߂߸/£Óùe¬®®Â², \½zÕs|–üèö>W(¨u;ŸWÑÝj¸ÿþ¯Çý÷}ßûo½õÖ‰ƒF¬âYñJ h·Û=9ñƒðòÐ8sçÏœ9@Esåyçï§]×me\ ¸Z­f+à‚¤&“IX–…K—žÅcý·žsItÈiôýáÿï:ÿ„ïû¾?ÓO¾ÐV(~ý×ï9ù<õH§B᫼~ìñ{˜Éd%—–¢iŽŽŽÀÓP„d2‰B¡`{‹„R©„T*…?ÿó?÷ýÿDùîÅ4Í‘7~q,H41›Í"™L¢\.£^¯#—ËA×uÛ(Ëf³=õq‰Çã}ò6oÜ×.Ʋª†¡”Ãä¾P.Ça”JJA7ŒÞý!¦©Þ#©üʤRés9uÓ0MàÁß=—k¢ŒG ÓTʃÓàtÔqñØ;š¦Ž‰([–’ÇAçs’Ï+¥eŸP<®¶¯¬®vÇrrk˜”íhãÜ>!Î?¹·8‡€w¤IÞ/,Ùg¨ÜŸ$k¦^¯Ûç¹óNUÄè©§ímr:<ÜÁ¿ø~¼âmýKê0M³O?:sæÿ‰3g>:—ïˆ2.œ†©ü.r(r!ò'¯ùò—ÿ^ö²—akë¼îuðÙÏތ׿þ»·Y)*ÒµkÀûo¯‡3!Lì§èiÚóP.çqt”G­6¼ÀÒ µ?™Të¶dÈøÉD«DQ777qáÂûùV«…‹/"‘HxF_hš†ÝÝ]4 ûµ—.]B,³ÿÎd2Øßß·S t]Ǫ¸Z—/Ož¤º ²O„KÒk%MÑt£ëÀý÷c,%=þÓºétƒì€\n:ÅcRf5TÇáÞ{ï™ 0 §Q¾-ËÂêê*âñ8 …‚‚],m—ÔlÁ0ºQ~@¡¥RÉÞ[ªë:r¹Ü@ï^Ô…ß)ÚºÞUè7 ÷tÕ´Þã^d³ý7/|á̃Ó(ãQÃ4•¬ÅãJÎâq%k@¯œH © Æh<®^[»ãqå(E±̺”‹ÑZ«©y!)üðÄOø£l‡s›è⨔Œ1Á­Ð‹c4™LöÔË`gW‰*ÈsNË]Ài'“I{<†aàî»ÿxÁ þ-J¥’I ¿üËŸÄÏÿ| ¹ðÎw¾†¡æÜÑ‘šWµÚ¢PøAÛÐ5MÓÎÂqR¯«¹óþÁ|"¬”ñ`þ}ïkâe/{z)"¯†G<ÞÝQ¯«µ°wMîYVWî„RI9þŠEYGUM›½½Þ5ZÓº £_™„yØ 7u:Î$oh·Û(‹h4=ϧR)\¸pa¬œûýý}looÛ)Ä7nÜÀÎÎŽýÞf³‰ÍÍM$ ;𻳳3tH£ÑÀááa(óçëõ:jµš-ˆìè‘„æEÉú£‘Âwž/M‹”p&“‹12£Æ<åeò ¨B Aï’½â!–ª¸"£¢\X–…jµÚãåv* ÝýÚ £ ^UóÒé®a`YêfáL—CBöë̓yÊËi[ãD¹¬d«PèutFWI1ŒùÜ$B«ª¶÷¬bˆaáÜÂ"N,9žN§{î}†a`}}¹\étW®\Á3Ï<Ó—A3+˼~‡gTI²Ç¤Æ…èA’%&NÐyP.«y0cE"PNñcAÖw¯b–âJ§aµñx×i$é÷.pýŽΈ¾èI·ÝöOðÆ7>Ï®£ßú­?†ßû½ÿ>üá¯x®³g•:©Oβ”ÃÒK¯5¹\V?½î–¥ŒãyÛ ÓÈËÄ«Ðl6{ªOº9¼Õjõ”Ï–b<*j;íÅϹùÊ[,¡ë:ööölÃUŒM§7O×õžu?X]U ñ‚¶bDŠyË‹ßò „Cá‘ýÅá—¢AîT«I«îF Ëê.ø’~kšJÑHªDªºÊ‹°ºÞ5(æ™L0oyYÖ5|™1 u_ÈfÕ½A©ÓÎW¹Ç˜¦R¼%uØ™zV¯×íÔIgD è“1ÈýP ˜b±ho_)‹=kÍ<åeY×ïE㬂î,^t£J¢9ePŠÐùuY]U ¼3bäì’0*‘GÖ|YÇGùHÊåÁ)óâ8ÊfÕùdO¶®ßáÆYÈSþ–uLÒÆ76þø‡ŸÇ[Þr ßñßr"Ïq¬¬tù<ìí?B­¦ädHMÇÈ3¼L”Üh4°¹¹‰Ë—/Ï\½,‘H ͉w¦'DIA‘j ù|޾˞L¯ý”Â<<ˆìóË ßîý§ÅbñxÜÞ錆8Y6cUö™J±ÉØ—©,Jº{ KES Wq'Å7 ,ƒŒ/ÃZ8[8];é#Uªu]ï©~-8÷MyíýÓuwÞùø÷ÞÿwÝõû¸çžß€iþ! …*Í®6/Q²IÖqÞJa¾Z­Öç›'”íÑ8uÙ#*Îúxöi<þøŸàùÏÿfXV _ÿõi©‰*öuî¸ã>|ñ‹ŸÂýмøÅÊP4M÷\–>Ô¹\n®Õسô”FŠb±hGÏ…|>ß—5æ'’ÁR¯wÎx¼[]ºPPF@¹¬ô*§²ŸÏ«àÂ4‘Êa®vö)»íJL¸~ý/ñK¿ôïa†Í!º|.§õ’tßtzòmw£l€Ri¶Ê¼ËÎÄ«³åJT‘q§—-™4ñ‘¬âóŸ¿ ¿ó;€iÞ‰R ¶Ç¥V«Ù%È‹Å"ÞùÎ?@¹ïi€÷9Òõî^ `±Þºj•)¤ÙS&ýN5Mëñf³ÙH*šR™@Oÿ1IÍ‘y8(RjšÊ@h2Ic• ƲÔÃk-—ýnîc’Áã|^æâîîñïÿýÓ¸í¶CÜvÛ7àñÇoF,à _ø³ØØx~ï÷¼Çѫؼôä¡á:m(2©ò®ij'›Œ(Q¯w{„IZŽ(C@×x•–Nš¦æž{?”üLöî)‘–„8ç£8 K%µæKQ®½=¥€çr–]ÄHŠqÔëÀîîñÅ/¾úЋðÁþÎ;‡ƒƒ,+Žõ¯€O}êýø¹Ÿ»…Bv¬ÞÙ„L‹´Þ0 Ãΰ‘~Ú¦i¢^¯÷ìožg»˜|^­µAõm§Å+šËu+QW«4ÈdHK,Ù¦L&ñÜs¯Å?ÿçwã/x5Ló[t·Ý~Ób/ˆÜ²UdpLl°V*ìîîbmm Tùëíím4›M\¸p!èk‰;ú(¥ó%º´··‡õõuÄãqüôOÿ0Þõ. PˆcooÏî™%Ñ…‚R`¼¼íN£1—S}•¦M ®ÕÔ{ÅKïTœd?‡»ú!ì*›?öcÿ?»º·yâ=”6,âõž9‡eM¿ ×ëƒ3¤·c­ÖuüŒ›–ã~xR÷ö¸G‰ Æ4»^vg1I ”< ݈)»Ÿ¤*’^Ûê>“Ëå®ÓðÏI_*u£³ÔqȸÈÞTé ß¼»WÚ­·ˆ~Q.«×1e7H&2X[­vwwíbCN4MÃææ&666†–Ê’Z­¿I´¤8½‰étÚ®pÇðmßÖ}½×â;n4SŒ[¯œxiB-Ê‹¤’¢( “ŒÝÝFƒU2ˆr¹ ]7 i·Û†›¦ueì¤s…½'è¦ÙJZ­D2MÏéoç|Ï ¤_©D¦d¿’xÕí80 5>IÛ¹Îx\ö§ÐX%ÃI&a·œ½µãñ$ªÕn%^?*£2¤Pä°ö@ñxüÄXí:ôœHÛ©¢+:• Û/æÕ®…Æ™g/g 7C ènåËç½[ÉHV€è$8&2X¥É°——"“É •Jáøø8´«¤©8ÑuÝsßÞ<*üŠ:ÉM@ŒQÎÆ±ûþâ,MÈ ¤™õí·ßÞ³¿zTÚ¸8Qd¯†W$Spg w§Báœoõz·O¤<çuα’z½ëÉ)ò!ΦõAH ¹[¥PY©TÂêê*J¥’Uzª=YN$ÅWÒ|½*†Ñ]Ÿ¥¶‡8Ë%»K¢ªRÁÔ)îR; ŸWF.}6$ Ôëu˜¦‰túÀv®xEèE–I};x¦ÚÃÚn·‹ÅúžÔê& Diç±Hjµne<©R,ôÓ俌ӼŒœ êõúIë¥É¢ðRxhÔk†«4x1K¥¢¼²2™K^6„‹V5&²— §žôR¯×aY–ÝsX¢DNt]G­V³£¨X__·›Çd?ª§Î¬1/ŠEo§8 t”••á…ëêuUÏcàOÈpË=ÔjuhZ7z:èÞOvø™È`Íd2H$(‹¸pá‚ImµZ¸xñ¢ýš02Èî“¢‹$—›`rH•qïª×5r@)ÙO?}Èb<¶œ‹%)2ëÎ3s"yhGGý9:ÒÁ[ª{xpŠZÉ’1¹|ù#Èå®áöÛÿ!ÞùÎÄë^÷yhZÒvnÝ>¨* XÉ–³W1!aA”viÑ1 YF‡© ÝŒ/ªUö«&Á³¾¾Ž;ï|7ÖÖ^k«µZ]M&ް–Ëe Üÿý=Ï' ììì}=‰ìF}Õ[Ç;¹~}½·‚”sã+ ŒçÆÄAHOl¶ÿŽ$õç#Øîä´cš&ÚísxãŸôP¼×uá Ê…_(¨»‹aô¬â„‘ÊbuW‘&Èî¦}„ àòå6¾ã;ž·¾õMøõ_¿À¯Â4M˜–J¥÷§’Ðaš&jµ4M;ér°>¶± t‹( Cú¢V¤˜X6ûÚžZå²·úL¢ÇÄk*•£>ŠF£ÃÃCÀ¹sçBY¼l.éGjÄà”»Êúzwöɦ‘l¶k¬:7Èq}J‰=çfF@½§^W¯•*8@·Ù ÕH¢ë:^ô¢pä£R*‰årJ– £ÿîâ¼i²êFú9I)Iiê7O¤ë=ãÈ`YÊå2î½÷à#yÞýîÏáû¿ÿ/>`¿FÚÑDQ—65ÃŒUÓT·y‰¦JewFGIT1 Ãî,Ù’¢ÖKVoÏËÁT{X[­b±¶¶¶F­V+´Å–ï«ôÊ =¥’Š@¹#Oª—‚ºã¸«4ÊÐ#Uú˜¸{ú8_;®ajÝêPbp¤ÓݘŒÙOäN›NÓ€]×ñ·û@ø¾®z]É«sîI”ÕK>GYÜâN™“¬ID­ÖÍ,pžGŠbQitš6Xæ4Í?ã¸Xìn ¥—!ú”ËeX–…û±¿ à¼ýíWðó?ÿoƒ!S!¿¤o¯Wå^'î¶a^…Å 3RÿÅ0 O²¯²Ù,r¹œÝV/›íîbtuy˜Ø`ÝßßÇöö62™Œ|éÒ%4 \¸pkkkA_“'^‹r—ÆBz¹³H>Ž×hÔݦPPѬRÉ¿]åb”ÊùÄ€†GÀ £Ûû¤P=vËêF~¥j®wo¯ÆYÄÆ0’xË[žô0¼æmM{gI§û Æjµ·GŽ—ÇªVëßÔ*iö±™óz­äȉ3É}~iÚ,wOë wo©¤^+Ê5­;g€Á}x܆6ñDö¡ ZxUÐC"d&òù<ë’$b 2Z¥‡T–ŠÀ„DÃ>ñ‰§ðÿãz_‹&©V ¨Û¬»%‰6¬ív•Jvtvvv°»»k²ãFZ¥²p*•ò<Þl6‹Å|‰ÜzÙ¥A\ Ù%îd¢JM®÷¹£Y³âÛ8Ñ §q*ÝÇeߢ4ŸuÑÙ¤Ó}L¢Êno£äƒ)Æ3)ßnêõ:€l8íyw¦À¼pÊÁúz·ƒ§<¤ÓJ¥Ð™È×(§–¤!W«JÞäsK%%çΜò»Tp™é´2Ü%Úêl¸ì–]ÓTcº‘`iˆ[.ãÎv{î_q2> g¼ýíï³Ï>Â}¨d,Â,Û€Ê}«‚Té]_÷î÷îì“-ÅÖ¹7õôvwS,ozSßñ‹ÝÝRß–>]§ŒoÿöCÜwß¾ó; ozÓuüÓú3øéŸn‡áž-ŠEåˆY]UŠËÙ³ê碾++]ÅéêUïl Ÿ»l†ÕÕÕ¾ªÕN='›U>4'AûHx»ŒJ~ËŒišxñ‹ÿ1r¹vwSpÛÏÎt`ââ \_W¿zÍÙ³½úÕœ˜È`•tßíím´éfâ­00µÀùÚýý}\¸p±“ÀC=„3gÎPžF£ @,ÃÚÚZÏ>I‘í`Nt]oÁ¥Ób°.š|¯AÈ·W¯~m8+`Õ_*ïõZIÚ…ŸçCþu6» ‚0Èø Þõ®C|üã·â‘GÞ‚ÝÝ-|ìc·bmíkÆnóAN0M¥5)ËÁúݲºû¶½Ò³òªÝÔžaÔë]çæœkN„]¶u]ÇúºÚ»ç®¿Q«u—4iàü™& V~º·2qÒüèžG.—³“òœË VŸ‘Ú{{jÝ—ŠnNLS9¥î‡õ2^݈)˜ºëþþ¾mœJžûÎÎŽ=!)©TÊN1N¥Rvšñññ±}\H¥RhµZS_¤—žÚ‚K$Ò!ßn h6ïŸýDó (ƒ5îÕÞ¤e™˜0Èø ,ËÂOüÄ/âg~æGìèÓ×~íí(Duc“l<Ôu¥H®tº¿Ø4 ›µw?'Ù^c]_W¿;·cÈ^m¯=Ûã°ÀÐ`˜ePE–Ü…f±·×û÷‚íð"Ý Æ‘)éL0Ë=K”twyf]ï "ì2îüº>üáëø¡ú#<þø ™/Ð~÷wÚt`g•Q2ö÷÷ñž÷¼7ß|³ïçž—|Àã?ŽÍÍMhš6´}TËY¹Ý«*û¼Èå”'˜¡ëBÉÛc=†;î¸Ã×á„aý„®ë¶“ݽÏYý×M½®Ä¢Óñõ« ‘_éÓ3©Ê.E××½çU±¨”Äz½k¨x©’õxw7p#Ž&é >¨óãÚþø»¿/ºõV4›Í‘Ûì&%Ìë7ü‡ÿð%¼ä%o›ÞtGGG¨ÕâX_üov;o"M¹Üí"YaîjiÃÇùeˆÜ‹ÌKç éVàå ×Y9é}â$3Htð;ž~ÿl’÷wFp||ÜyÇ;ÞÑyÛÛÞÖ¹xñbç£ý訷 å±ÇëÜwß}=Ï]¿~½sß}÷u{ì1Ïã^Ïy÷âÅ‹}Ï ÎÁAïs\.7Óũ5h2WÉ‹ç‡|w:΃>8ò5ÎoýÖïu ”¡~’É G°PÆ‘—iXô>.;;ï¼ô¥2—kž˜ díÚ5µÈãèÈûuGGÞÇöö:Eß³YuÏ4ÖÌc zýF:î\»v­s횯k׺Çr¹~=ljóµK…¦ ¿ðNG}9𦾴A2V­v:€zíÕ«ÃÏW(¨‡œ7VB¡Ó)•†ëo£ÆêxÝï|Ã7Ìå+ ëúÝét:ÿÇÿñÑN,ö_;{{{ös×®©¯y˜ §Ójé ×®)¹r?稬ٚ¦djÔd=8Pk¦¦)™Íf•Ü•Jê‹ôþ££ñåoL#/##¬‰DÂ.g­ë:ö÷÷m¯O&“™8Ê:Ì‹Åìã­VËnãü}R¼:ÄX–Þ^|’þE"É¢åÛM¹ |Û·=ô×NØÇÁ‚–ñAü¯ÿõ[xè¡Wø{AEÞk¸at=æÎ½¨RíÙ‚YãzÆ}N6»ø`Õj7Ó0Ôø …@îƒa•miÃÇíL<ÞÍè•5VuÇÙ?gêB‡ÉB©¤¢GGG]eÏ-ÛÕj·§õ ¹Ÿ¤™W¨Ï4Õ?Á²†ÏŸqõeMÃ{¿é›ðÆ9|½a•qxï{oÆ/þâÝÈf»Û™ÆÙiàêî4L³?R ‰¼Æã½½Ô-Kmðj\.«çÇY{“ÉÞüggƒƒƒÁ“} 27'ª¬i.\¸€‡~™LFçÏŸïéç4ŠT*ÕWB»R© ‘HØÇ2™ ö÷÷í㺮cuuuª ´¬þ5"´ûWÞV$r,Z¾ÈýºÝþHx[6 ¿_RƇqt´‡|SÐ_ÏàJˆ†¡&i¡ Ú²\»¦ Qpüî¿âqõìí©ï%VFHßGXe»^¯Ûkº³…s.§lý9w±òqܸÅ]”úk×”,¬®ªTu/$­q}½[ðË‹yWOO&Õ˜#°‘2¬2nšÀµk|Çwü ¿¢áÕÌu½·Í—T@—´[÷žÎb±×Xu¬É¤ºŒë(t;^Òiõ9ÕjÄ.^k£yZS¯ÈÂX”|;1MÉ, ¡|‡©ïi±HƒÕ‚q/j5૾êü³öÖ ¿E癈°È6л…)’v™ôçe4T„IÆàÞ{ÿßôM_ì—"©ê]Y™=fÚþÓ¤±ö° ¤R)<üðÃös»»»ös>ú(¶¶¶P©T‚¾ž><*⇷¥  òÜ©ð Ñug[·Ê÷°¾ ‹Dn@dixÿû¿ˆx\ÜÏÚ·‘ÒÎFê¼DÙ¸ºÚÝ»Ç9AP«¿ù›üžLvÓ€¥ý»‡ Œ‘k£Ñ@«ÕêÙ›Ún·Ñl6±¶¶fW[[[C»ÝF£Ñúš†¢ë!Rlöñ‘Ð!5B]Ââ]çüZ, xì±/áþûç_Ár"Â"ë$òÈþÕõõˆ¬†¡ª£Já—J¿„  \®]; ^7w=M§½£^$F¬Íf™L¦§¶Tvî“V*;HÜ…ÍXp‰,#R8Î4ÍpVÀÖõp(ññx8ÆA|¡Vî½÷ƒÁ+:NJ%V¤&¾a’Édt:Þ9Û}Œ³Çœj,K5Y[†Œb¤Á‹ÅÐn·{ž“>¬a5R¯Ì¬P·´Q z$bH¡:õ{ˆ]ða˜wAô™$sÃ0€—¿üÂe°â¦iÚ²Í6Òd·u4!# ÖT*…f³iW k·ÛÐu½o3¶¤Ÿ9s&èk²I&û‹„>Â¥žD é „Ø!–Teî¥Z*öö€§ž: §Ì2#Îý«‘Nw2µZ7‹ P(·¡gd•àT*…L&ƒÍÍM¬­­Ù}™$tßn·qxxˆíímd2»jpXaÚY6’ÉÞT±P:d4nTâ+RÛ0ŒpÊa>ó™îb K‰´fE )l÷ÒÇX}Xc±.\¸àylmm kkk¡Œ¬ºõ˲§Ü8 KaLS‰Øƒ¡Oy'ÄLSmE6 =|ò^,'Î]BfÁ4M|èC/Š–Á©üe$ÒíÎ0 äèÔ&#Ë`ÔÕÃÃCÀ¹sçì=¬a4TwfJ(òä‡!½IÓìÕB]t‰Ÿúõº®5ÑUâ’-c+¸Ä”`2&†¡õºì:®ë)Á}ºË`­T*ØÝݵÿÞÝÝÅÆÆ¶¶¶‚ÿD˜¦>o¼÷fDB&À²B¦¼2gBç„t¶ô dt]ÇË^ö– ‡19!ÛF‹a¥RHô–0ÛÀ˜{Xwww±¶¶Ã0`ÖÖÖ°»»Û×î&ì„"OžqFWC§¼wÖí»Cˆœ=«~ÒIC–Ó4ñ¹Ï}Kôü‘0 ’PÔÝ`fZ$i°J»š û9‰¬J«›°âÞJú+!âî5Jù®×ƒY"œ2ošfðÊŽÖ! >a¾û»_­ý«„Œ‰e…‚Ê$Üé(ÕŸH¨+ ôîUÅbA{,ÜÙY¡/ºDȈ~Š…a‰ºÞ[;tNÞcÈŒH-MãÒI–“x\ çáÈ| Û}„ô1¶ÁEzeÐ {õ:ÕA9èQáÎd ¥Ájš¼ß0ŒnM: É2b^÷º·²à.YZd—i\p PÅNy/ =Km°º |RŒB×™KO&Â݃5”2ÎB4ÄG$ªë!lictΙÑu?þ¢·›B×U['BFP.‡dÿ* Ò¸n‡ž±ÛÚlnnö=W©TpéÒ¥žçvvv&D³ÙD*•ò|>‹MÝ:guµÛψBä’ŠýR2/ùú» …Ò`e˜`é™§Œ;1ÍnŠd( .e³\׌Eɶ˲ðÊWÞÊî0d!!ã…Bˆì$”Œ4XÏœ9ÓSpIðîi¨T*h6›=†n«ÕB¡P°‹:iš†r¹<ñ¹ÁÊÈì_¥—g©˜§|Ê¡-JMh2¼!-5ó–q'¦ »M(•öÑ^*)ÛNLÓÄÁ?:ÖBaѱÈ”Œçr@±ÈÎd|F¦' lmmý˜„F£ÑÓßUØÞÞF"‘€a¸|ù2šÍ&*•ÊÄçl¶úý«d阷|½ÙW¡tÈpÞ-5‹q'šÖk†Î`%KâeÛÉ_üÅÓÑ]:Ãx"ž!㦩Ò•úJ½…„’‰÷°6 T*û!mo&¥Ýn£X,öyWZ­†ÕÅbX[[ÃäöN€sD¢kX#ddb!ß@7ºÊý|€Jsû¼#S±(Dhö?‘¥#HÙV‚ ÑÛ¿J"EP2^«õá×[,Kí$¡gì=¬º®ãâÅ‹hµZ=Ïïîî"‘H \.O”&¼½½µµ5½ý\ô¦§R©¾Ï…iªì”HehQùZæ-ßB©Ôý=”Ñ&gU(²T,JÆËÖ×»u ØW›Ì‹E˶“ßÿý?Ág>³-ÝE0 °ql4BÆ-Kµe¿z5DÑUÃà¶¥ˆ0V„µÑh P(àÌ™3(—Ë0 Ã~”Ëeœ9sçÏŸ;Úº¿¿V«å™Bʃ8/ƒu^ò (…çððÐö€:;D¦¨Y‡‡‡sSv‚ZÃÓénFA­V Ÿ¼[Öò¬ã!g^ë"×ïAü¯ÿõÕ(æôÅ‘H ë÷<2A­ß€ÒYD¶MÓ ÇÙÍâÑEtpß Vñ>ôÐCC_·±±V«Õ“ZàÅþþ>b±t]·«“£R© Õj M+VZûÎ;ïD:XøÉ »"wsÝÂLñäe5×ñ2Ù{“F¿AZà Œ$OÞ#ˆ¡yõäýp·  Õ€‚ë\‚qò¹u(Ãw=Fm&“Áwãwã5¯yï_ϼäîºë.lmm!“Éè]g™Iœlmmáî»ïžË¹ƒZÃ躎lØRÃ~Y"ÖÖÖð]ßõ]xÅ+^áëy¹~{Q«/xÁàî»_¼Ø/Ô/Â`€,²~Ï£Lë·ivƒ™¡Ézt§î¹“Éd¦Z¿Gîam6›H$#'Žù(¯þ¹sç†?sæ å’Ïtþ>.n§IèÒÇŠeèiè7M(ÃOìêT¤3ç8^‡26 àèä§×ï”Ñi9>;`Ïãs‹J'?5¢«¦ïµNÆlœCŽéPFkõä½yà®ÏÝ|£ÿ_ߢäè]gCa­Õ"¶‘œŒb‘2îäìYµ÷ɲ¬ð(;N¸*ò%ÛB­Ü~{éôïýUL‡³¨ %Aʸ,¦i†K'§³?Œ4Xc±˜¯i ™L¦ÇÃ(¥²^™L&ƒýý}û9]×±:a/§2¯ëz¸&GPPÑJIÛu“8ynʈŒCzºÆaùä˜ ©‡2*áz Ð5F ®Ï”×XPÆh]CS¨žŒÍ:y­×=Ñ‚2ž ŒR'”‘ì0jÃãÀàÌ•©Y”|Û_Ã[:,Kå,Ó`]*-ã@ïz^«Õ “L9çï3‘&Ùv²·¬¯Ï'•Ÿ XwÌ Ã‘)Á‘a¤ÁšJ¥Ðn·GzXd£u,›yP[[[ØÜÜD£Ñ@»ÝF,Ã<0ñyBU:ÛOÄp„¤äf=ž×Јº)B‡ò¾$”¨¡käzu(±ÐÝ/ê•¥'{NÓŽÃñþAÚäÇK'Ÿ•ò~@ÍÓucò¿äÛ™šÅß9¸ÕUzÜO)~ɸ ë][°^¯coooêsÍ…õuuÃa §¥ÇoÙî…[;HðÌWÆCDbë½È0–ÁšJ¥P(°³³ãi¶ÛmT*d2™‰ZÛðÌwO¥Rxä‘Gìý°Ãö| BÓº ÎB+¨Ö¡ŒÉqä_<óét7¹?樂z¾*½Uƒ2.½è7M×s’âsÁ¤;uVIDAT›E4PFÞÞÉõ¬£7ºê5¦«ŽËy†­OY 6fÇ%d[Ú€ùÉ7Ð:™aðJõ'MSƒZ]U‘Õ0EÂÈ\˜§Œ ¦©DKÖò@•ñÉ=%ŽŽT…à°9ŽÈL,B¶…Z xî¹§Âç|$KÍ¢d\×Õ:žË)%458ß"ÃX}X/\¸€ÍÍMœ?Èd2H$vƒáÝÝ]ܸqc`Õ±iˆÅb3M§ÃIJ¬¾êÆsa÷àÿû õgÀ‡ß4øu¦©”§¡«™¬§ï=ž€2” (Cw±ëŽ>ÆÑÝêN÷ÒèÀƒ†¦A¥âÓ!¹&ô0«|]ÿ†a,(½Æ0”ajš*wÍù™¹œ:^¯«hS©Dcõ”㇌ †¡ªK–Ëõ`É(zûMÆãÌ&8Eø)Û‚aׯÿ ¾á"auZ$$òø-ãºÞUBUƒ€ëvdË`M¥RØÙÙA¥Rñl]“Éd°µµ5qtužFwrL’tF÷¤ªÃ;’gYÀ¿2€4ÿÈçªG´\V®ÔB¡{\×ÕsɤJ×C‰Î6 ŒËx†uô¦úš'çqÞûÜëC*J;Ê MÃÛHv¿†,ÉŠœ{„Õ²”Ü꺚ñ¸2JFk2©Ù¬·Ü2’€R¯×qtt4û §AŒÕ\®×X%Är9à?ÿçÒéo z(ÓáÌÛ'ÄE.§Öp˲“L"ÅX+Ð5Z¥uM³Ù´Ó…çQz{VŠÅîï‰#¬bx«?ø9 ýÂ~£ñ‡~øŠª')Óùÿ¡”úd²×ƒ“-z¡×¸Ô´þzwŠm ê=ÎÏ•ÔaIíužB?yÞé<ò²#4×ÏADÔé{0Msº ‚rY)áÃ<Œº®œ/¹œJ{ööØÆƒ,ITµ÷…îñÓõîO]gš;™é4P,þþí¿}hö“ 2 ]Qt•Dб VAZܸäv»f³é{šÌ´8çÃĨrÙÛƒþïÏ| øç¿ ¼÷z«:}óÏ/;ùÝ€Š2y'‰ÞŠ·‚e”J$SCo”5ŽþbIÆÉ¹jÇ’èé=J– rNÝc8—VVT´?W–eõNËêOÿ¸ïƒ,ŒdR-§¾÷ҮՔ̆Z§Ý†èÊŠ’sYç«UÊ=™JYt‰,'Òá.T—H¤xž_'j6›ØÜÜ úzlœA£‰&‡e)Æ­¼è:ð… ðï¾xá[»Ef§ÑXƒ2@ …^c½EŽœ”Ñ M£[íðÞ—Z…2dÝéÀ@×àŠ®ó‘H#¶¥ú}Â=¬õAˆÇ•¢.‘ÖõõþïÙ,•t |Û÷T¯«¦®†¡B·ƒö[)#µTòÞBˆÔëë_,ª{F±¨RåÕ=*RÊeõ3t-mt}öó…0q„5jLä•×<ü°»b“èbñ$œu Ð|mï>=Ññe­Ž;žs¾¦UØÈYWŽYèM÷ÍBµ‚‘qye|&ÑmU3Ê);ª‰ÉdoO³±½òµZ7¢(§ÊÊŠ’ól–{óHèX]U²>s„UוÖ$“‡Ê5 †\»v gφð&mšJÿÑ´^ÇN©¤ŒTËRÏs>‘!8{ƇÂ`µ,¥lk–Ö`%g¢ÉQðÿ üù/ô>ŸÏügÀJw+ê:‰£7ª¡?‚š„ª¤ (cÔٺƫ“Ü·¤2ð –1·s‘´à4·À!‘g¢²r¹kéª7+£Õ4ÕOBB†ý Ø>%Eà *'$tXp÷ÝŸ>Â*E!•QL*£´PðÎB ƒ“ŒIh .I‘SqÜSï‰ Kk° yåoypß}½{SãqàÚ5e4 îè©»ïªTù…ÇëÙË*QR¯*¼’:<ÌØVÿC>#Î,â/âØÎf­É¢«ÎÅ‹ÈsÕᬛZÙ)¨˜ÐbšÀ§>õ)¼íms¾Y‹¢îÞ®$Ç,K¥Â«~iêo©q@È”H陜ŽãR¯w-N,Ke †Ê ã2rø¶‡5L8·áM4Ažÿ?€£á}Ì™–ëÖ{j®¿ÓP‘O¡ìñš,ºE“ïV5Ù“Ïœv^Iª° Uu˜, †!:Ř’þB¥DÃ0X†,%¦ ¼øÅ›Ÿ|ëºJ;“~²çÔI.×m[¨×iUâ ¢¯ÌÕ`­ÕºÛ>Üϯ¬¨çŽh¬F”‘Öf³‰J¥2òDív;èk±1Í^yk‚˜þî÷~Uÿ1÷þRg@JÇèj¼^T9‡ï‚J8ùÌYæ¶ÌI}ÆóP2‘Q.{GW 1†Ñ[/{Ÿ™×®]…¦ùœí"û¶Þtx¦ò’bœèȦi";/Ù«ÕÔùåIpï¿&‘Ä·”àX,š–6@ïï±&ˆÀ|9ðuè/r¤£7­ÖB׈Ã{©æxàm0Z˜º®¬KˆdºÆ½‹,KyÓ}T ‰¦Ù»‡u"d’0BDBN2 üíß6ü=©i*cÕkß6çY ’¡»¾n Zõ± ŠE5 õóàÀ[¶é¨_ F¬©T ;;;As&ÆòÊ›èKrïQuöC•×–¡¢¦ƒN=¬ ’P<ùÌy:;‹˜=RKB‡iª›€®[}=‘û~ÀTRHÄ0Œ®®1QtÕ²TÚc¡Àh =Ù¬…zýæéO`YêÑ›Zæm"dÁˆŠâ[/mÃè¶–L§Õƒ{­—ž‘{XFŸ¢Ðn·Ñhøì ô‘x¼»n›î¾’ƒ0 Œ: ý«;âšF׬ 8Ÿurƒ ÒæßjFú¹2eé{mN§{!’Éë¸xÙÃPû•r9«$üú¯bú½}✩³Ñ: 'ª~—‚ÓiåŒ98Pº ò§‚©Š.]ºt ›››A} ét7]½g‚ Ë(ûØ—º)¾nݨ„~òe:§ UYX*{ó7$½ÆN"dGŽUˆ†J;‰(R\lh³yÃP­ÇΞUŠ{>¯zfsω•Jbú‚KëëJÖYP„Uœw†þ«RÌ€œj–²J°Ð—~ðBÀ3zRAý¦—Qi¡?òêDæeжÂ0ƒ™Dg¦ Ñe&‘J–LÕªêS­ªÉÁ9A"Ä+_Ù½µÃ‹bQÉ?3$ÄŒ\ÃÝ83j,K9eÈ©g© Ö¾Úƒæ‹àËÿ³«ä8³'ëè¸ÝV5Ãî1èo³h’'ã K…ˆêÀ›ÓÃÈà,¸40,™ìn’J&™F"G,öëÃ#¬õºs2¨”÷z]¥Âs» 9¥ÒVi.oš*SFöeK-:!O=¬ÍfÍfsèñV«5ÓgôÜEïlI‡Uj¢kÜ–áR«AEQ©‘,B¾¬R€92oßs´›{ÃyB,bý”NþÄ/žò^.÷÷GÍçUF4dJ££t÷°uÊHû¥dRmc*—»éîÌ ð±­Í$4›M {"$ ”Ëe¤R)@«ÕB¡P°'’¦i(‹0O@Oé—ZDßÓçÕ›ºKäTƒŠzͱ4«s“åaQò Pâ¥9¾¶¶†ƒYû‰‰n¿çzÞð‘W÷F¤²PÆí ý«„ `‘ò=ÐcÉ+™#‹’q]ï¬CÛÚ¬®ý•%!ý¤Ùü^ýêW}éä”°h×uå?¸¥Cy:ÙÉF¦' [°ý •Jagg±XÌ~îøø€šò»ÓøM¥RSåцœ¤äà]tÉð¢_Òé5e°`™ˆEÊ÷@ ƒi4dn,JÆ5MíFì_·×6!#bý~â‰ÏáÍo~Úû aP‘'¾„Œ?÷Ü',—Ù’‰ŒÅ‹.Åb1d2ûïV«…J¥‚µµ5$‰¡“¢Ýn<öÔSOÁ0Œ¾´…E'µ‡Uw¾@\ï?áYÇ{ÈRÑh4ðÞ÷¾ÿøÇ}?÷¼ä>ýéO£R©Ø)ù{°2%øÔS©TðøãÏåÜ‹ZÓI•(`Ž2H)ë§Žýý}¼÷½ïÅO<áëy¹~ Ÿýì̓Óݹ–ŸJdýöÕ‘}¢upµ|[Þ2.ÑU¦¼Ÿ*DŸtýÛ`•MÚ² ;N{>Æ¥Ýn£R©àþûïG&“Á… ìçqãÆÇn¹å¼üå/ǹsçìçzgÑT§þs¹ííÝIbxËYΜ9ƒ{ï½/yÉKæö~Ë7¼èE/¹sçpæÌjvßœ³,*8çÎëñ Ïƒy¯áµšz~h±ŽIzû‘¥!•JáÞ{ïÅ­·Þ:—ó/býž}öÙÁÜÞq*‘õ{^ò ,F”ÁÚn_öv®3ºz*|Rù«J°®ë( H$=JÐÖÖb±t]G³ÙÄ#<2Ö‡6›Mloo#‹áá‡îK=D"‘xì–[nÁ+^ñŠÏQÏM@ kè°¾=\õ¨¬” Ö%%‘HàÞ{ï韖yÈ7 §|{B‡Èd2¸í¶ÛævþE¬áõºÊlÚpž)“§’T*…v»=—5|Ñë÷-·ü’É¿‘ò}êõ{^NÇEéà€RGú   šÆèê)dZ|¬ëöö64Mã>Ú#°©T kkkØÙÙÁ¹sçpéÒ¥±>´P(ØyôîÉ!ÞGgZB«Õy3pÓ—F&[TãèFXu 6JK ÁJ¦bò ¨”à>Ï|:Íý«dî,f W?G6œgJâ#‹Z¿…DbHõÕR‰Hâ;‹”ñjuH…`FWÉŒ4X¥¢ØC=4ôu=ôvwwGæÜ뺎V«MÓÐh4z€²¼3™ ö÷÷{Þ³:a%Ⱦ4²´ãçŸ={R‚lMC|eQò-ô¬ñ8=òd®,JÆ{Ê 2JYp‰øÈ¢×ïu™‹–q`Àö%B&ddJp³ÙD"‘èó®d2™žt9~||<Ô#{`½z¶†ÊÛÝÚÚ²û½¶ÛmÄb1<ðÀ”™Q˜PÆ©ž,K=LÇs„øÀ"å{d1BæÀ"dÜ0ºÓžjïnL“(â‹ÖOr¹¯Å7~#Û2‘űH/UƯœ—Yi°Æb1ÏÖ;;;S}àÖÖÖÈ69©T <òˆ=±FîÛó@×õ®çÒDo¡¥/~ ¸óMj_+ƒQÄG%ßè™'°wÖ3MspQÕà/评, ‹\¿ÕÒæk¿¶ôe“SÄ"e\ü‰Œ®?i°JqƒF£1Th%À¯MâîÒÛÓ`OwÍŽÛ³@ì*ÓI`ø!ß}«§¡!KBÀ,2îtÈíÁÊýÚ$üX¿à*H§ËÐu:dH`ø!ã{{*xÔ·†¯¬GGA_"‰#÷°¦R)¤R)T*•¤ïtw]W›ÿØÎ‰DÓTâÛçX×4FXÉÔ,e„UÓ´nK›äɃ%¢/í=¦ÁJ–Š‘VàRO"ÉÁpö¬éÝÒ&›eµwi¤¬ÀÙ³F¿ÁJÈ”,„5 µ‡µÈ>6dy¸zõjïôÈ“%ò,¦“¥…Ùd™a† ñ›¥Œ°ö`ÐkJ¹!dIˆÓ8%KŽišÝLB–϶d„,}ûW ™‘å°®øÉÿ8û@uÖ×ÙXž,5æ(ùfJ0‰(–eõ;¹ž“%¢Çá¨ë ‘™Ynƒµ ÑþÑ×±‚*Y.4iîdi+lo)Á$’†Ñ_=•ë9Y"t]W«eù<‹df–Û`ý/_þòAFWÉò‘Ë©®«h+{R’%¢¯º¤T€HD1MWÁ¥b‘z YzúÄ—Ë,"F|ay÷°¾÷³ÀŸ<”¾—ÑU²œ”JÊXi"K÷ø‘eƲ¬nºd­Æ*ïd©0 £]eOaâ¡7X›Í&b±‰ÄdoüÇ7þe·¾6!!djù”‚“Í2ºJBÍ42^*•‚6!c1“|[–Š@}„x2­Ž¢iZ7ºÊlâ¡5X[­ …šÍ&%üåry¼7?úgÀÍ/~ýíA_!žÌ$ßN ë ¡Ä7t]E£,K=èµ'2‹|§Ói%ë«j g6 ³È·]-¹NßíÖíím$ †Ë—/£Ùl¢R©Œ÷æoýzà¿¿*èK d 3É·“xœ©d$”ø&ã¢øÈ>¿ƒ*A$pf’o1Vs9f‘P2óú],R¶‰¯„Ò`mµZh4ØØØÄb1¬­­á`Ü´™8:,IH™Y¾ 93˸iªÊ’««ÀÊŠzîèˆÎ f–oQæ©Ð“âËú­ë”oâ+¡4X©TÊ~.•J¡Õj=´…²¹¹ô|¥ÑhLaY2(ߊe”‡e›³Ó2³ŒÇãjïÓÁpõª*0A–M–qÎNÃÌò]­F^™_FYX¶ù:-3Ë·e)8Ë&QŸ³¡ÜÃ:lR´ÛmÄb±¾çñ_ÿëÅoýÖoáÞ{ï ú|áÊ•+K5a®_¿Žv»mOúÓøô§?[n¹[[[ ÿüiäþüÏÿßþíߎ;î¸wÝu×ÂÇí7a‘? Ëœ½rå žyæ™À>ß·5|?°kðƒ°Èƒ_„eÎ>þøã¸~ý:^úÒ—Ff çú~Â2_eý~ÿûß7¿ùÍ ÿ|®ßŠ°Èƒ_„eΊ>éúJƒµÝn®_¿ÞyÇ;ÞaæÛÞö¶ÎG?úQ߯Ä5‘é™E¾ý8>†Éx¯‡ÌÆ0¢

*• Z­._¾ Ã0H$°»»;ö˜ÂxMd:f•o?ŽÏƒa2Åë!Ó3JÆ£(\à p:×ïQc ãõéáúkšˆ -æeå±ÇëÜwß}=Ï]¼x±óàƒv:åɸï¾ûz¼#¿ò+¿Òyë[ß:ôب÷΋ëׯ÷}æññqçâÅ‹c)Œ×D¦gùöãø<&ãQ¼2Ãd<ŠòÀ5œ§mý5¦0^™ ®ßῦIùÊ  æe%“ÉÀ0ŒžçŽqë­·Ú¿@*•²§R)´Z­¡ÇF½w^H¥¸T*…f³‰v»T*…­­­±ÆÆk"Ó3‹|ûq| “ñF£¹ë!³1LÆ—M¾ý3e<:œ¶õ{Ô˜Âx=d6¸~‡ÿš&…ëØÜÜ´…áᇀ¡BpõêÕÇÚíöÐ÷J¿,¿¹qãFߵܸq;;;#…zœ1qMÄ&•ï°ÊÃ0âõÿp˸®ë_VyàN¼8 ë÷(ù£ÎEüƒëw8¯iR¸‡u<ðÀØØØ\ºt €€A<õÔSݸqcè{E¨ýÆéÝyôÑGñè£bmm ›››#¯gœ1qMÄ&•ï°ÊÃ0âõÿpËxåk8ñâ4¬ß£®)Œ:ñ®ßἦI¡Áº2™ ÖÖÖðÐCÙ¤aw¯×"‘H }o"‘˜Ë5œ;wìô@-ívFcä˜f=NÂˤòVy&ã7ÝtS䮇ø‡[Æ—M¾¹†Ÿ^NÃú=J¾Ã¨sÿàúÎkš¬s¢R©ØžAö‡À™3gô¦Þ´Z-$‰¡ÇF½w^ KˆÅb39ˆk"Ó3‹|ûq| “ñ{î¹'r×Cfc˜Œ/›|s ?]œ¶õ{”|‡ñzÈlpýÿ5M Ö9qîÜ94 ;Œº®ÛÿüD"L&ƒýýýžã«««Czï¼H¥R}%´+•Ší™™uÌA\™žYäÛãó`˜Œ¿þõ¯ÜõÙ&ãË&ß\ÃO§mý%ßa¼2\¿ÃM“rS§Óé=ˆee{{ûûûÈd28>>îÙ ¨œôÍÍM$ {SóÎÎb±ØÐc£Þ;/ä3ÅK5Éõøqœ„‹YäÛãó`˜ŒGñzÈl “ñ(Ê×p"œ¶õ{Ô˜Âx=d6¸~‡ÿš&ëœq–L÷Ú'Ñn·mûø°c㟳Ž)Œ×D¦gùöãø<˜çœ¤|Ga2EyàN„Ó¶~:Æë!³Áõ;ü×4.4X !„B!„„îa%„B!„Jh°B!„B %4X !„B!„„¬„B!„BB VB!„B!¡„+!„B!„PBƒ•B!„BH(¡ÁJ!„B!$”Ð`%„B!„Jh°B!„B %4X !„B!„„¬„B!„BB VB!„B!¡„+!„B!„PBƒ•B!„BH(¡ÁJ!„B!$”Ð`%„†®ë°,+èa²pü”}˲ ëzЗD!„̬„ÀX]]…aAƒ…ã§ì†ÕÕÕ /‰œbjµLÓ z„,Êûâ¡ÁJ!„B¦¦^¯S'§ÊûâùÊ pÚ1MµZ­ïù\.‡d2  ëÉI&“Ð4 º®#—ËPie’ –Íf‘N§ís‹E Ôj5X–…t:l6kŸ/£P(LýzBF1Ž|@¹\†eYž2æ—ŒS¾É"Y„ì—J%”ËeÜtÓM=ï³, år¹ï=„LË0yÖu¦i¢^¯4Më‘Ït:m?G„DYå=NÛrÇ‘ËåÇP®§…Ö€±, †aØ]×mP‚].—(Îúúº=IÊå2Ö××í󬬬ôL09nY,ËÂúúzOZ¹\F±Xœúõ„Œb”|@>Ÿ·ÿ$“r®YdœòMÉ"d?ŸÏ£^¯ãÉ'ŸìùÜÕÕU["ÄÆ‘g'Nù”Huf‘÷?û³?ÃÙ³gmgc½^ïÙ²A¹ž’ ×®]ë¤ÓéN¡Pèt:ÎÕ«W;:W¯^µ_“L&;š¦ÙÇŽŽŽìcÕjµÇí¿tªÕªýw:îär9ûïB¡ÐÑ4mê×2 nùît”Ì9ÿ®V«¶Œù-ã”oó’}yÿÁÁA€ý9N¹&Äo¼äYÓ´ÎÁÁý·[¾å9ê$$jL*ï²;ß/z»¼–r=9Œ°†ˆ|>d2‰R©@ye4MëIËf³TªX2™´«C:ÿvòp¾7Û) ƒ˜ôõ„Œ‹[¾MÓìßò7§|“ ˜—ì;ßt 9É}‚y0HžÝ¸å NB¢Ç¤ò.2›Ïç¡ë:âñ8zd™r=9ÜÊÅ"LÓÄÁÁýܰ–¦iÚû”œxÝ  /ùeœ,‹”}Ù+˜ÏçqõêÕ /,!ÓÈ3!QeyO&“888°·ñY–…\.‡jµôåD¬! V«¡V«áàà ÇËÇ­étÚöÚâ}ç¾%&É÷((ã$ê,Zö«Õ*,ËB½^·‹€âÓÊ3!QdZy—ÂIÕjÕjÕn;–N§í‚©dr˜0†a ŸÏcoo¯OÉf³öfo@),RtCÓ´ž¿î¦oBÂÂ0ùeœD™ d?ÛÕ*ÙvøÅ8ò<,+Œ(1‹¼;#«€Š¸ÒÁ3;4XF*þ®®®â¦›n²ÅbÉdÕj«««X]]ÅÊÊŠ&JI±XÄÊÊŠ]Aroo/èK"Äf˜|‚2N¢L²ŸÍf‘ÍféÜ!¾1Jžãñ8ŠÅ¢g+B¢Æ,ò.-lΞ=‹ÕÕUœ={Éd’µfä¦N§Ó zÄIK&“vVéÓ$Š‹³‡¤‘²LPÆÉi…²O¢‚S_q”!dGÞ%;2s“Ð` 1¦iâìÙ³8::B:†išX]]E¡P`û,žxâ 4E~$‚´Z-\¹rO?ýô\Î?ù€gžyFƒ72’F£ëׯÏíü\ÃI4›M\¹r7nÜðýÜ\¿IÐÈú=J¦¦…ë7 ÑÁ']¿'6XÅ@Íd2H$TÊÀþþ>677Çèîî..\¸ày|Ø„vqÏ>û,ž|òIúó’¥åøøxnë¼äP Ïáá¡í%d‡‡‡sSv¸†“ ™—ÁÊõ›„Y¿çááúM‚FtðIåû+'y±úÎÎ2™Œ] [Ó4$ œ?Íf³'•ÀÍþþ>b±t]‡®ëh6›8>>F¥RÁÚÚÚÐ÷ŠìÅwÞ‰t:íé1"ÄI&“€¹,¬ó’o¸ë®»(ßd,¶¶¶¶*˜®á$hÖÖÖH$|_ù~“0 ë÷(™š®ß$h¦ÕÁ'2XÅ3(æ$•J!•JôêŸ;wnèqɧoµZöäpþNH˜¡|“e‡2N–Ê6Yv(ã$ªLd°Æb1*e@~ÚíöXýL&ÓcðÊÞW§W&“É`ß~N×u¬®®ý]‘ 1Mõð"âñ Gè?”o²ìPÆÉ²BÙ&ËeœD•‰ V‰¢nnnöä¿·Z-\¼x‰DÂ3ú:)[[[ØÜÜD£Ñ°ãx èï*ò˜&P,É$P*–Ôë@6;ã±VëþnYêóãqõùñ¸2Z‹Eu¼Pðƒ®šÖ5|“É ¿ÅÙ¡|“e‡2N–Ê6Yv(ã$ŒLd°ÀÎΊÅ"Ο? Ûx8•J¡\.O<¯|÷T*…GyÄŽØúaGËÊee¸ ÊØ†i*cT R1s9e:_·²ìí>§ÃPã’h©iªçâqõ¥Òès ʰµ¬~ƒUŒY1Xóy ZíØ¡|“e‡2¾üÔjêþtŽÕêhG§e©uÛ²ÔZ/k¹83å±²â}®lVݳÓTŸoÀÖÖ½xÃþr®×MÙ&ËeœD…‰ ÖX,†4›M{¿j,ºQ{b±Ø©œ$b”V«Ýç CÝôÅðÃreE½.Vï‘÷ÊM]Ó”!*QI1$…x\) Ù,°¾iÕõ®ò’Nwë$†o<®® è*2¥’ú ]¤Wµ¦?¾A8¥'ž¸ÉäÇæóOƒÓ*ßäô@º®Ö=·³Ò0”CÓI.§ÖsY5­ûpfâ”Ëêž´·7øs- X]íÿ\¹9×ò££þ÷‹ÓU~—sÉ=°R¹‚ÃÃOòR¶É²C'ƒòºA*§ÞoYʦç³Y²#'2X677qùòeß TÒM›¯³Ü¬‹EïÈbµª^#Îlvºèc:­ù k”ЇÛ0Ôq‰„з܉¤O‹ôÊJ÷úœJü.Ê‹ËîsˆG^ÒŸ˘Ÿx¿ÿ!„Ì‹ZM=Ĩ[YQëš‹ù¼Z¯™4ºÞUd½–µ²\VçK§Õzè\Ce‡§s¯³…‚ú\1„-K=’Iµ>;dg„T>cœûT2Ùuf&“Êy¹ [BÈ鯲ºº–sÞ²$ëPr'å $SÓ0ºr*ve©µ>¡ŒU±ÜNÑY˜x«”Ã^[[[äw¼Tèzï¯d²7² ¨›þ t]§Œ« ¢PPF (é´ú\ËRÇeló^XK¥®€ŠÔ&“ÀÕ«Ý”d'N7V¯œÑ€¿ÿ÷Çááõù^ !„Œl§ †ŸdšÊpu®k…‚Z7Ëee̦Ój]ç¡{-º†à ’ÉÑk½Ü«ÜÑOy®P®M •z²œŽn‰$ Ctx\ÍÕz½ß•¬9MSç—9)YÎy" ätR«uí`¼ízbPšf7ðU(tQ"SάÉ̬׻¯w“Ïwƒh^cp©ŠEu>ç˜ç•¨«ÛΗ‰«omm¡R© Õjy¾†=˜†#‘KùgÊÂèµ(f³þÞðG±·×MõÃÏùsQU}ǽæQ“xÑß!„¸1Œ®2t ÓìF E9–5V”`I»u¯½ÎµOR c± o2Ù»]cÖLB!óFÓºiëƒ 6z¿”2'™ â½AF’ïÖ9$[Atwê¼àt¬;•u™;bœHš~>Ü}÷ë‚þºÉœ9vw˨׻2-—õõÁFk½ÞkzÉh6«äéà ?ëÅ]­×{3sâñ^gé(ä܃œ<{{½»I™xëöö6`ww×ó8 ÖáHê–;M* ÈžVBÈpt]-îâ©ϺËòN‹¤˜¤·[ìI4ÅØ©œ¸#˜Î¿-«[„hœ{NfÍ´™–elQFÂ…DòÅà£ue¥›~è^;Ýû·%¥iÝÈd&¨¹æ4(s9¥¯‰aôn‡]Î9¯½6?œsÖ9w77ÿÀ÷ýµpAºF¡8;wE2eëŸÔa‘{Ã8A™tº»dTf лdR{`Ýe–ûÓÄ«áÌE"ã\Ü!þ £³Š¨sÑÆ/¼ ë]ƒR¼ðbL 3Jctzß C)KGGÝ Û~Ònß9¿/œôàN ”âqš6¹sR¼Ú"o„œ$j ¨5ÑY°Ëk³dH0ÉR¹ô:}¼Òmåƒô2é®Ëu3äÊeî©>­È}Û]`’î,‚ tï–¥î^¥×Ö¿I;zŒKس'6X`º®£ÑhP%¯5Mã¾Ö1àGˆ?Èf1L½<ìÎB“ {=œÊ³ö´‹úÊŠÚ-…$ÝÓySš…Z­{s|úi¦”͉ÚÈÿL¢íδÀY Mg±8B¢€3]w¤õÐÝ‘V~år7Òéµ–Ë w,ãœ{ÉéD¦©ÒuNùY«)ÝÃYfй¥kŒý *ƒ&,Ll°V*ìîîbmm €v»íím4›M\¸p!èk -†ÑÍO'„LO±Ø-Væ.©îDž/'K·•Â~ ÎóIôÌkÜ++ÃÛ|Hšq2Ù,‹BçüŒÍÍ÷xã<¾þSìi¯¸×NBNÒ2Nê|^­ÑµñšgÏöFu܆g6;Ûž·yÁ,¹ÓG±Ø¨:«åJ‘$ +ûâÌ× íu¿'ýLd°¶Z-ìîî¢\.Cs™ùš¦assH$A_—'RÁ*WÊÆ¬}='Å«ª-!d4¦© Ù?4éÞŠx\ÍýaÎ"I'“*vóJ»†Gh5­·]ˆs¬´®’uËo£šŒÆÝn‹Ò¿&W«]'›8EÑ—B.W¯Ž>ïiŽ(‘ÅbYÝŠ»N‡r¹ÜméèDÒ‚ÝÌSw8ÍLd°@Ÿ± ¨´àT*…ãããЬõzwãu‘Êž»Y:!¤1NÅ“)½¾¼zÿŽË8ûKÅ ç‘qÅOöBjZÿÞ\à é"Žx¯µÙÙKWªcS™'ad}½›Ò®iêïlVÝY{&o\E+(Â<¶ÓÈ´¼‰*õº2N¥‚y:­²ÔŽŽÔZ-{Piˆ’¨Q,ö·s"żÂP5W×uèaÜ̽`&ް–ËeãþûïG:F:Æý÷ßf³‰ ¯gbÖ×ýo'á…³qNdÔ)Ç^¼öµ1|êS1 ÇöË=D> õZ ¦i¢X,â@*•_±,õH&»Ùe Éիʹ±ºÊ¾‹$úŒ³´\î:ô‰ëëÝ=Ó++½…“¤Ç¯W¦e˜)‹ÈårH&“Èçó§Z˜ªk«ÕB,ÃÖÖ Ñh Õj…¶Ø’i®ÎëT”ùìÀdm0F!UǼp+ØbPZ–e¢Œ;‰Çã}Ïéºn{bÄh5MÓ~6›í1ØDù¥=£P(ôœ×ià†r¹ Ó4íó†aPÆCYÜ\ŽkŒÇã¨V«}Æô<1 ëëëÈž¸ˆ-ËB±X´[2-+RíNúÚÉ"½²¢pç¢-ýó„IìyÓT²müCäμI‡½êe8Z–…ýÑ{qûí×pîÜP¯B×ãH&“¶<‹\麎z½n϶»II™GË ‡Ê«_-¯ï>çœGÎÔ‘s÷ØÒ>y·t]·?coo¯ï¼–e¡\.ãèèëëëÐuΟ‘4ñl¶·Ú£³È+1“eBR!ÝŠ»®+c5™ìVñÃ0P—âèu&zá\se7 £gÍÍf³özoš&êõú@‡½—“Ñ}J§ÓH&“#uç¸ Ã@.—C¡Pè»÷Ƚ.#—Ëõƒóó4à\·ŽÔÚ.º<='Œ™ª'“Õ0 ‹E [7Ò4m®zèîa`bƒuÛÛÛÈd2v ð¥K—Ðh4pᬭ­}M}$“ã¥rI3`@)2ÅâxïsG?ÂcšÀ¿ù7‡øÃ?¼ùü?ëy{±t.蚦áæÏ~ç>ò¼p‚»Ç¤‚;ް;vy­Ü4’ɤíý…®ëX]]…¦ižÀ=–z½Ž|>t:R©4•¡ÏçQ­VûŒôþáÆÙ³gm§Ë²±¾® ÓB¡×›~tÔM‰±,eÀ&“Ý׈ܻxùÞ>ñ‰~ÿ÷ïÂÓOßÓTï={ö3¸÷yFë¹OÀ<ÑþÅØÌºrÉ®\ù{¸é¦;ð?ñ)߆oùƒ›ñ—¯zZ/{™­è‹E{!Îf³}çð¯{ÐyÄ©S.—mçŒ33Á9ÿ‡)BíJ”ŸõõuõÌ‹r¹l+jÕj«««¸êê!éB¦iâàà 47–°#²›N÷*1c& ‘¹%ë;2þãìC}pÐUâe½Ôs¥R×a)ë”Û0ô2¾Äh“ÿ£dn‰Âít&ŠQ)¯wfy¹³·äž°¾¾˲lå9›ÍbooÏs^8Ï+úšÊñx|躩iš­ã”Ëe¬¬¬ —ËÙç}&NÃ0 ¬¬¬Ø÷¬z½n?_*•zç†aPÖ}BÖsçm7_ìŽz½nëÄNýGíIÕ4 ù|{Ž‹( X]]µu«t:r¹lÿ-Žk@„N榦iv ÀéÌ}^(—˨ßñ ˆîãèš7u:θ/n·ÛøÎïüN¬­­õ)ú»»»¨T*xôÑGÇŽ´6 œ9sfàë›Í&b±ØXç“e? ÕÕn› §WÏùO‘Ož3 Íæ-xË[^†Ûo¿—.¥ð5_ó~ê§¾¯ýÝã¸e©j“îüv—Ež¯T3àž¨²xË ,›ÍÚæjµj§DV- ·üØ!uþüXŸS<¹—<4ÐyÈ‹×gø%ß°¹¹9Ö>qIiœv±¶,¥È¤Ó.]z¿ù›ÿŸÿüG‘L&ñøãÿ/~ñÇðñKøîï~#à‰_ýUüŸ¯znüÓŠOþñã=ׯ÷yÅå¼++ŽÔJËR.ýZ­·l·®«çy·™PÆ©ó@×HõJÝw"k…;;â}?ôCøÂÃãÍo{^ðó?Ó4û TqÞÈ E²Ä£_«ÕzŒÖqåe¾†{ašË‘â+ë¨Sùuß‹L¤D¸³"Ɖ 9³k&E”-§#T2F9Aç-/A­ß~S¯«åÔí÷–ín…5۲喩A뛬ã`Îl®y+¸óÀ4MÔj5ÛÀöB”|Y«MÓD>ŸG2™ì™—¢ïhšÖ³Õ%NãÃþ0~çw~g®×ÅõÛMŸ^á#N]Õ)û¢ŸÆãqÛ1#Æ ;º>ˆqÛÃdÌI­V³×ÑAY`–e¡^¯ÛN(g惌Ùhk­V«¶ƒÉk«•Èn±X´ù"Ëét…BÁs]¸téþæoþ¯F¶˜È`m4ØÜܸ?ìþûïÇ… FîgÝßßG¥R±Ûãd2”J%Äb1*å¸P( ÙlÚ_ˆ;ÅÔklƒ&K±8™×ܲ,Ôj5>‡FãïãùϨ'>øÁ—âÝï~^óš¯Ä÷~ïÍÈåºÞÊtÈf/ÈÄHóÉtZiW“V¤¹é&•ó3k.›ÎÒ[d\œùE¥Ò豟ì†oŸ;‡wÚ©5NÞ÷C?„•_þe\|á ñÜ÷?à…Ÿÿ<^øùÏãÚí·Û“B&ŸWÔJ˜çâ:ùÆSxdñW\$j8(íã%/ùnüÑ=€ÿëÿº¹G”dñ€\6 ÖkªU×ç:'Åâ€FÛ"c'çe)Ùu/æ²ñUŒKÉk›Å™R¯+95M%ߣ檌չ¬Tƒºa¨kpçãÞ÷ðÃvŸw¿êUøÞù/ÕÍêä;#Ö™êïL%®Õj¨Õj¶ƒæâÅ‹xôÑG§ÿn†°è5Ü/äß7p>ãQ÷§GÜ­ø8#ò»3RådнYÎ/F‚SùrŸÃs*hnåʽíÄ©išfGÇDis{èÝ ‘ YîÌ7ó’— ×o¿ÙÝ}ÿæß¼/|áøº¯»ÙVNkµZSѹ%ÀXé²drÄPïÖ9ÿ¿aøÙŸýY|èCšË8¢º~;3éÙ>ŽÝ#k’ÛÉ&ë¦;òí^cën<·×°eJù™ôÒÁG!²œÍfmÃUäÆéô2 _ûµ_‹×½îuxç;ß9þt&à±ÇëÜwß}¿á oè<öØcCÏqýúõÎÞð†Î{Þóûï·½ímŸüÉŸ´_óàƒvÞñŽwØÇßúÖ·v.^¼8rlƒ^“LŽw}×®]ë …N2™ì …ÎÁÁÁ€×©s^½:É·çâè¨Ó)Ôcã¾ÎI©Ôé¤ÓÎÞ^÷¥’÷k‡]ȵkêÙl§S­Žþ܃ƒNGÓÔãà ;÷g\½ªŽïíu__(¨ÏI&_ïµkÿýÚ×vŽßò–ÎõW¿ºsýÕ¯î<ûò—wr¹N¡Pè …ަiMÓþÿ:áò2 ó’oyß8_ÿ SµZí$“ÉN2™´¿£QßÓ$ êómr9õ¿Ìf• ãÚ5õºA2*räF}ß…‚:§üÌåFö¥îÙóàõ¯ýDÿ¿q b ÷¯Ÿç¾Ö9888—œs¯P(دwÞkä!÷ MÓ쟅B¡³··×¹:ÓM§Ó¹zõjçàà stt4—ï@äOÓ´N6›íìÉýÇõíííuªCþGGGt:m¯Q…B¡S­V{®òôú=ýw¯–ÏJåo:Õjµ“Íf;Éd²óÚ×þvç;¿³ÙétÔÿ¾Z­vJ¥RçÚ˜ë ŽyÉKT×oç­}oOý-êãè÷ªõ7›ÍvªÕjçèè¨gÍÇzHº8¿où®§‘—‰ö°JÖíímlmmÙÞ˜v»J¥@UF³ÙD»Ý¶÷ºÆb1d2Û“ÓjµÐh4ððÃÛÇ×ÖÖ°¿¿?•çfÜÌXgqwTN ׈C¦\ž¡ “iv7¦Óêï•q¥)TX@>XÆ–ÏŽ^JauµÛùØËU«uë€KÉV'ñ¸:¦©qf³ÞŸ'U9|隦Þãþ²LS˲z_?Šx_ñá#¡ëÝM –Ͳ 9?ÃÙtð'7‹–o7ƒ¾Æb±]×mÙ–”?S²z²仿zÆC—€¿·ôù”¤ô¿y؆)[ypÐûÿó “årê³óyõ»lÐ*¼Ûî8¿0MSŸ5(£XT¯qÎMë¯äLßS¢°TêÎ ÷5;Ç"Ñ4Ó´çi6›E6™ ›'Þs¿ ZƧEŠ+y9ÅS>,ªä¬Pí.’'ÑCÓ4팉šÈ~lI×.—ËX=ÙîáUlKÓ4Ïm ~0ÏÈ™ì-”ÂÇÇÞ³—N§qt2¯œ{_þò—û~-Q“mÓ>øÁgðÅ/þþê¯>‚Ÿù™3xðÁ¯@¡P€i¦Q.ï}¯z­{9DMÆ÷mWú¶;ÿÜBû¤˜5~é™]*—Ë( Øßß·SÉsßÙÙ±ØAd2™¾´¥ããcÜzë­öï@¯á›J¥Ðjµ¦ºÀqš½ËÆb/åPº ´Å‘LÝ©«™¦šmN…D×»õ¸½n(ñ¸z¾Vë=.郔Z©6¥ëýƪ³ƒt/—U'ð½½~ãYÆ %Óé~ëháéõдÙöØ:ß÷_¿»šJ¹¬ îR ÐuÜý¹Ïáð¿qúÏÀ¢åÛ‰d»¿Šü‰ñèÜß8×…ûdÿªUXÿ£P-ZN÷?Ïåçûx]GµªätuUý>nîg©¤dDRÙï£pCÂ]ýaFY,/ùÓT úó]ƒ”ñI29$]g% ŒÅB¡Ð×âKRje»Rè$ÌË Óì:ª £›¶/÷’)½¼^û#MÓÄûßÿ~<óÌ3¾^BTd[øñÿ~ï÷~ßó=mìì|¿#í®[€'Q’qËRª±{×ìPS»ƒ,‹e»à‘S‡Çã,F¸DŒ4X[­Úí¶-¼Ò‡U×uÛ#³¶¶MÓF«n677íÉ!Þœa“¢ÝnüŒgŸ}O<ñFÏZË®#KEÒA{®í«ûðL}œÒéþ›¶4ü†Wo)klƒ/R"¹nÖ×»%Åàua鵬þ±„½ÚÓµ®Ýw^ð<ýÉOÎõ#ý”oxæ™g†I(—ûƒÈRú|Q½»òy PÈ!y"b㥵€Ôt'æfS*M§­U«ê‹tÏ™°•õŸch4¸~ýú܇±ˆ5|ÜU#öÅXge­V³‹µ‰‘4nôÈ Î÷¼R•Ö¦?o­Ö– nÀlÙ-âÐ=ß¾ðÜzë­ö¼y°èõ{RÞõ®Oà×ý ¸xñNär?ÜslÖÖ4$Xdý%S³Öõ[×ÕÒQ,öw:âqÀ4 ¬¯ç‘Ífû*è“ðÒjµpåÊܸqc¢÷aÝÝÝE«ÕB&“¦iH¥R¾ôþyàÐjµ°»»‹K—.akkkè èÆC'Ë“O>‰ÃÃÞÉbƒƒ µZ º®å‘Ã3G¶½…Yf°bÚq”ï‚' H?ÑhàÊ{ß‹Ï~ö³sý8?åP Ïáá!x*<îÌk©®|4Ïæcõº’¿c.™T7)Bæˆ5éôb«ωÃÃù*óÂ"Öðip÷™”{@¹\¶+»'“IÔëõžÌš\.ž´I‰XеíWú¾¤ ¹ïCår÷^"IîU¥RÃóx|ö@ ^Çõ7›M\¹rÅŸïc‹^¿'ûJ üëýyüÜÏÝy%¾!ë÷(™š•°­ßù|79CnÅ^Ëž³ [µZdµéÓÌñññTë÷Hƒ5‘HØUžt]Çþþ>šÍ&R©”mÀN‹u,C¡PÀÖÖÖÐ=°Ãù;ï¼étÚ3Ç~À—Ëå¡‘ÕH iÊBp"49‰=¹åaÞŸã‡|À]wÝ5t‰[oY__G©Tš¯|»¢2² TRç£ì߈2[[[vÌ ^Di¸žL&±··g§ùN]Å}ˆÕm]c2—ó® 0.bPz½_5KÕÔ—Y*Ÿb>îZ#[P„a[Dêõ¾ckkkH$s]ý~ËÏÿü³(_†ïú®/áÁï™Ûõ“àõÛHü0¶~ŸÄ"1R‰|®Îx27¦ÕÁ'ÚÃꌪ꺎F£ÝÝ݉Œ×J¥‚f³ÙSÞ]rçàÌ™3TÈX&‡ó÷I”Å'i`‘6V·WÛ0Æ«ñM|gÑò-H=!ÑCËå²Ý¶`®ˆ}B:­ž’.1dùJÆÇ%ŸïM¯Õj(—Ë=mRfÍZ’לÍv Õ9÷‰Mf¼ŽS…0›í¨Ò´vÒhë(Ü‘×|~p/®z}†¢“vÙV{S ll|¿ôKïXÈg’9 iãfY½éH¦‰WþíßÎå£Ã,ãÃ’,ËêÙ¾ANÏ›öš¦áÂ… xøá‡±¶¶†f³‰óçÏôêŸ;wF£çuº®Û“!‘H “É`¿ç¸TUœ/¹vVuŒöØc}ÿäOþdçâÅ‹}ÇFq||Üyì±Ç¾ïúõëC{Í« ­WSaiBãt9ž”RiþŸ±d̻ɵßòÝé o$žNw¯V«B¡0·k³98ètñ9d*æÕx^XÔ>н=%ÿò÷^'™L·Æ_»ÖÐÕ«ªÃ}:­~‡I^ëE¡ÐOpŽËoŽŽ:lvöóìíõŸgÈw1Ï5|Ñë÷0J¥RçÎ;¸óÚ×6:×®]›ËõúƵkÞ²ç$—SsBÈfÕ£Zíÿ__½ªÎ9 ¹É0ˆ££N'™TãK§{ÇàæêUõºlVéSš¦>C^ãÌåÔër9õ𣣙¾ÊÓ²~Ë×çüºŽŽŽ:Éd²S(:{{{£¿Ë…st¤dÁ-'jç I&g_¯ŽFËú0äÞåüþ¯]S2žÍö^ߠϹvM]·óW¯v>¾½=±¼Œam6›( hµZvÿ&]×Q(ì4‚ÝÝ]\¸pÁnF<ŠD"14'^šÏŠ—3YºÂ<¢ŸÎ‹ôJ& gQò tû¯vÿÖ—LN-‹”ñaF7½\Výømoš*z"žñ\N nuupÙKç{Ùúø ªŸ°|®ó‹šG´Ò£(ÒTd³½÷G?¾‹) ‹l›¦‰Z­†ü@<ÜÛ,$j鎂Iø,—ëf8+ë «Ê>Îÿ~ÜžîÒŸPõ^u’ôûqq—%a‘q ÷ßìn5jµÞ(þ°±åóýë¯iª(d:ÝLjšwßörYÉŸ×6‰AxlÏB:­nŽ’Î-­0Ý}ß¼L¯Ú^óãàÀ;…Ù«R|2‰kÿðó(ºT(J¥ìjÁìbK;;;ˆÅbØÝÝE¥RÛ`]^ë˜išÁígš÷”trjp¬ KyŸ5]…,$˲P«Õ‚¯üî•Î+•}½Æ%ýKãq5™g-”w÷„:S´Ü Å´ˆ2&çÒuÿZA¹SÊ¢žþ:#år…B!¼Å!©¼’nîÕ£}uµ›¶ä=#_жeË„3ãÔ²,äóù`ZÕF7ÝýÙNãmœ J§ûçA2Ù}¿ÌIm7ÍÞ¹#ë«8{œó_æ›`Y]#ôà ßÇ»Fk­Öí ÞîaYƒÛŸÅãÞιïÌ‘‘k£Ñ@«ÕÂÃ?l÷_j·Ûh6›¸pá‚ýÜÚÚ*•Š/MƒýB¶FôË÷x’åÁ4»ëD½^_ÜB¿Àb(„¸qÔ' ”ûl6^å~м4M•z~(Ô^{BýʺÈåT¤ÀéÑŸW4¬ÿË`š& ÀiVí@LhX]íþÿ¥Oð £ìá^_W³ q uf¤°R.—›o@I SM뮉å²2ÝÆj>¯ÖÒAÆ›ó"¤;ǰ‚{³»Í—ŒA²œŸÛýÂÔC¬ýake<>Þ¼SeÈÕwQ(„.ø5Ò`m6›Èd2=Í‚õ“°¯S¨æÙÜxZd¼ó;·,+¸4ƒEÊR䃜œûá ÃX\öÀ¼[æ2ÉìÔº®ëz4 Ò”Jê!Q*¿,Q´D óë¾ ifd®äóy|ë·þg<ûìUIÙÄ#äÀ0åÝ‹tº;Y©ŸœEÀWW×SXIÖ¯rY9 e^½ÚÿÚx\9þF¥çrꆴ²2Y=ççxE;›‡îeYê²ÙÁ-Æf¤Á‹ÅÐn·{žk6›H¥R¡4RGaFp«W¸×Ÿ‹êöÃdÿÕS‡s}¯×ë(SÕ“ˆãÜ&Z,‘ËåÂ]‡i+BÒŒ¥Ç)‰ º®ãK_z1þÇÿHÍ?ƒÕ4•ò.Î’qdpšyvš*¼’± ñ|>d2¹¸*Àb ŽÒ•J¥ñ÷HW«½[%æÅµkóûNBîˆi°¦R)4›MÛHm·ÛÐu½o¯j£ÑÐm:¼š†aD[©„”T'§ É I&vƲ`ÄÉ,mªa(zR,ö·f M£±At]Çí·ÿ{|Ó7-@”VW•£; ó‡œ*T®]·°ֽœd¬Qÿš+Ïõ‚T*…L&ƒÍÍMT*lnn¢ÝnÛi‡bÀ‹Ed2™¡UÇW@3Дà}+gFúŠ‘S‡at{3ëºNƒ•œ œýÈëõzxúóÕëá1VIdùƒ?xõW¯^LÂÔÕ«tv“…#ÑU]׃+‚J"ÅHƒJ¥4M³÷]¸p©T péÒ%»ŠpSÝÁÔ@£PÙìün ,$u*‰Ç»úñÂ1ÅbЗON)Îú Ý·= ]gÕlâ ú§oƃ~E·‘â Òekáë·ÓÛ¤X¯­M,Ã… <­­­amm-T‘Õa,mJp·êÓNÎmo†a,¦ÿ* ”sFõI¸íÂФÂûÕ6†œjLÓĽ÷~¹Ü÷=Bæ†Ä·ŠEs±=³ëõhe8õ¬S®ße°jêáI“×sçÎÙ­kÂl¨¦Ó^}t—øŸ-‰C¾qšÌÓ4ëŒYFÇ =N»Ð4¬ì CÚ2†aàÛ¾-z- ™”P­ßaF ÕõuïjƧ„± ÖJ¥‚ÝÝ]ûïÝÝ]lll`kk+èñÅ]ùY×õåé4Ó‚O!Å¢´µ±+Û¦ô¥“SŠ®w}r¡Ú·mštâ™0Màßý»¯Áÿð>luUZZVˆ„–ZMÚ‹°~GMOvÖ¨9åsu¬=¬»»»X[[ƒa0 kkkØÝÝíkw6Ür¹ð”›yçÎÆ©èÓ†¬c O‹”þ|„,‘wYÆMÓ ÇþU€k/™™Z xÞóÞ³¸µœ2KÀ4•Èúç¸Äãþ÷èŽ(# ViW³±±a?'‘Õf³ôø‡â® ¸rã¬â7š&.«à®,Yø—:{€ÜûWCSaR×¹ö’™)•€O~ò—³–3K†„´7 Mý0#VfðŒaz÷ªÆbÑÜ_aF°Jý<…MÓN½÷å´« Á •mFóI¤ÓÝzV˜Š~ÅãÌ8 3³ÐJï§\ù%Á³ð¬Ç(:%Âʬ¶ñ‹.Ewí¡…+õnæ=Q,ëÔ ôi%ì¬dÁ8E.4ÑU ·\7!SP,wÜñI¬d©1M•úžÍ]šÞâì¥êÆs`ì+™‘0EÈÒ û´ÍE§w1Œ€s›ÓÉÈ2¡‚( ÚÚÅHY Dg UÁ¼°++jƶόaÝÜÜì{®R©àÒ¥K=ÏíììŒýá²6•J <‹Å¦jcYª´DYˆJz`BÅ<å[д€JßâÒê¤Ë"d\0ŒÞBz†a ÇõŽÌ‰EÊ6 [ÔŽN/¨5Ò)W~ÉâeèvhÑuYw+yãÕë2 ìí©ªÞ§œ‘ë™3gz . ƒ|šÍ& …Z­µ?¶\.ÛçlµZ( ödÒ4 å F¯q¡{C1oƒ¹TŠæd\2!ßÂÞP¯3ÒDË"e\H&U=üÈ<B¶ ¢˜‹YË™ñuª JÆå˜Éf•ñê\ÐA<ÍÌËRúý)gdJp"‘ÀÖÖÖØqØÜÜD&“a¸|ù22™ ކëÛÛÛH$öñf³‰J¥2ñŹ÷;ªÜ,âA£%,J¾¦F’E³h”ž!Kx¨ö¯óoYFF²½ðv,vª BƉ°’ ˆÇù¥aŠ=¬F•JÅ~HÛ›IÞßn·mã6‹acc­V Íf­V FÃŽêÆb1¬­­áÀ]Ai 5XYUõT°HùÖuÕ)‰‘&²H‚ZÃu½»u:ðžÚîE­=ñ$(Ù6MàyÏûËÅ95ë)%Hܲ”Ìý«dÆÞêë:.^¼h§»»»}éÃH¥RØÙÙéis|| @M ùÝy®T*Õ÷¹£p4 Ãèñ -†óO‹’oAµçZP™ VäSÉ¢e\(—»)Á÷ÔvÂþ×KCP²mÀm·=' YZ‚’q ] ¼Å$‰$cEX …Μ9ƒr¹ Ã0ìG¹\Æ™3gpþüù±¢­±X ™LÆþ»Õj¡R©`mm ‰Dbè¤h·Û=ûì³xâ‰'ì1¸š¡òÈ“@iµZ¸rå ž~úißÏ=/ù€gžyFÃ>‡¦ä©ÕX%8ä4 \¿~}.ç^ÔîF¼ó@ÈÒà™ã¶pšÍ&®\¹‚7nøzÞE®ßNJ% ¯S‰'ºë÷(™š† Öo@é,…B€5eVVÿ™¤ÑÁ']¿Ç2X···¡ivvvú¼ÚÎç···Çþàv»J¥‚ûï¿™L.\°Ÿİ‹{öÙgñä“OâððÐ~ιöóF@„ãã㹬‚ßò (…çððÐö€ ª~mš\è#Æááá\”'‹XÃ*¤*ïô(Nó2X…E­ßn¨§ »~ÏK¾àÖïx<@‡#‹…ÑÁ'•ï‘)Áâ|×»Þ5ôu8þ<šÍæÈÔàf³‰íímÄb1<üðÃ}©ƒVZûÎ;ïD:¶óò‘§À .‘P!ÞÅqãi˜‡|À]wÝÕSجVž{î¹ù/üù|ï„bû¤Ð³µµeWxœ‹XÃ8mÂPEW¥…%=ù” Ñ y¬á‹Z¿¬¬Éä0ëëªÄ< %²~ûÙNÆÉ¢×o!ŸWÛ:s8ÒÓêà##¬2iFMòq¼ú…BÁΣwOŽ3gÎ@OZB«Õšiâ†ÂÏ¢§†EÉ·i7nŒ½ }0µP,z«×½+Jrá?Õ,~ ïf݆nÿ×ö¥bѲ-[ š&ÉHÓ©fÑ2.¨Ây‰¤ÌGž‘k,ó5-A×u´Z-hš†F£Ñó”'“É`¿ç=«6Íuêß /Jã›þž %߀RâŸ|òÉÙ].+ÃÔe©cî²Ü¿zªY¤Œ N‘ ]Á¥ ï-Ä7‚íx88Pr½¨¸Ÿj‚qao/À vêˆ<#Ã3©T ív{¤‡E„ÝYyÌ ISÛÜÜì;fœxª·¶¶°¹¹i—ߎÅbxà&º0§ÓÛ4Md³Ù}¥è:SÆN ‹’o@é¯xÅÑl‹­Ö•ÍZ­7Õ·\VÇz«—qÑ?å,RÆ…d²+¦W|ï½`¬KD²-,, ŒŠû©&HO§z=À-AgZ’™Ë`M¥R( }¥°Ù¼ÉdFî_ÝÚÚšã.ŸùÈ#ØËYÑl\œA¡À*’©W‰û>ô°"ágQò-¼àŸ›MÑÉå¤7Ž2PÅ`5Mu½zµ÷õ^‘XrªX´Œ½þ¾PlñpŒÎÈ¥!Ù®Õ”"¿P™Ëü! 'TÜ&™ 0€ÄÌ‚È3Ö¸ .`ssçÏŸÇÆÆ2™Œ]þºÑh`ww7nܰ«Œù»ôö¤8íÓ…¥Úx‘Ï+#€Mâ`VùÔúkù±ÇãÒ­AuZr9*èd,üqA™íd¯ ûjŸjü”ízxá ÿp±VBFà§ŒÝDÃÀ¶è1³ òŒe°ÊæìJ¥âÙº&“É`kkkdtu‘Hvc N­&Ö„ú;,ékd©PQÿB¡»¨§Ó€—TŒ[BÈê*píÚ÷?I†at×qÓTs‚ë9ñË^þòÏ-NOaUk7poo"‘cì£b´¶Z-4›M»}M*•š[éíY¨×»ë½9é´r'Y–ªáMÈØÛSŸ‘žyÓ¯8Œón b#úf°ŠAšLö;f¤Y6«æA2ÙÍg£ÂCæ@ Y`„,ÓLSn{4‘gâžÒâÆ]¥±Ýn£ÙlúšBà†a,~¿S:Í(™;éô˜)Á–¥úîu÷.­¯+g ÷2‘à4XsÓºé¥êµªƒ<î…B•Ê™Ò_x¡•¯M Y²54õHä𡉣¢ÙlbssÓ®2$Î!X–5ù@ %iÚèÃPJм›p[ÊŽ¿µ“GÏXN^qn›Ù<9ž;ùÝ qø2 JLS‚øc¬’Þ+-jŠEu÷àƒD >‰Â3Qê¤ì9µ,5a²Ù^Ç !âì/¼0æ­«âÂ0pÌ¥cdÖ(âÌ~œªÉ|>¯´£ZM=Óî¸C)>B±¸¸Äü$”‘š†2<˜Öè'<€ºÇû÷d¡ Öü€ÏÉŸû‹p|¶×ׇ2Zã'×儨BEU³'çpžGƒ2t×"PÆpÊÈ%¾bšÀg?{óð€4>#$ˆŽ=°a(…äà€+‰õºÔhCO‘âyÕêxNGÉ®±,5G¤uÛ2‘S«ÉäÒ-«ÛVRŠïìØyF¬Íf•Jeä‰ÚívÐ×b#щ*›&ðæŸŒ¸·á(Ñ8ydãÞÊ‘nQ#'«P†[*ŠêÄmâä5y(C´ïâGItèa—©qlÐk$RkCÊ0uß­“s¡ p¯ñÉ~[n§œ˜Z ¸ãŽ[‡¿È0X­šDÙû¨õ¼à¥lK–Æ*‰µðS?u}¼¯¯«É ß½Fzi; ë%“,6F£\ …D÷ëõþ>Ã…å~Ið-‹ÅB×Òf¢"É$ð©d7ªéù(ClP¬®aêÔ«êPiÖõZ1l½†(FlÃ#”Y×yçÁ0›'ç¸f Ê@­:¾Cã伌RÙÏ›?yóäûáZãÉÿñãž{†¼@×éM'‘Æés¸ÿItB"F.<ýôÿyR!*%*ÕÃk;ëR(øX!xXæØ¢  ’@i°¦R)ììì=Ω˜¸·ÁÆÑ:T¤1ÁE“Ö¡ ¯"zLýFe½‘X/£³½ 3É“ïD?«{)A¥,K gTUŒíÕ“÷:ךòÉëdϬWÚñ)Æ4ç=ƒo’CÏ"‰0aé|¤ÁJ"H6Û[o NE¼TVVº (û$äärÀêê =´MS9m¤‡vù¤°J©Ä,²SÄÈ¢KF£Ïû×n·Ñh4‚ûH†ztŠE%ôîØ^‘Àq÷jK˘´ã=¼£¶²Oµ†ÁÌ8¢±ÿSÖ =ôBÝÂMeôç{'Ï—Ñ^ R°5t#×Î}¾ò½*B;èTƒÚƒ»dXpÏ=1øõ:÷¯’È“Í*e‡íÈ2R¯º®O^9U*¾Ó!IBŽ´à›ª- tõÕU%ëGGªV[ùþ!QZÙó[u½WŒRëäub¨:òŠ/ºò"à– ¿”éQ¾kp®³*‰¶‡y³Òjggvwfv~óÛïLbÖ3?û;?Ío~ÿZáoå Ãkú!t/ªü,9¯ æÑ¿—tý‡üÆõLR·t÷Ö^äÀo”[þ\ü79·¿íáßâû׾ϻD#‹Yµ=QAíÐÈ!Á7²^uŽ(³³/ÖïÜáÞ‘Tx­ð _Ä7X{'t³±JçäÞ`š kÛ6\×Mïd ô_Œ&˜ç8îÒô6.álþ$Ð…ÛߢøÃ}‹$Zܯçú6¢{q3U¾?ú(fH0â 1ɲ=í»B0Q†²ÌöùŽÕžl7þ3 §uPƲ~wœÖ X]õ³Î—!׫뺡sa]×ÅÒÒ–––pýúõð½÷b„ö®®øÇÿèý!lŸø Ì‹½©fÌâlj¿6Û ã×o‹ œd™o|€§ÜeqÀo¬Ž´XQYg;Ø)ôÅ£ãp‹Ê\Öwœ³UÞû>¯†ÿ+l›&¢sžÃz>Ôžçõ|-0È8®ë¶mlnn†___‡ªªØÞÞ†çyXZZB­VCµZ¸`–eõ>ä´+Àß_x‹s¾ ø=±7à7€¢zä‚^Ô>{xƒ[¢M¤qä`ƒ•ò3ŽŒ/݇+ÓØŒ«þü‡ùЗ1–Å=´)3ãʸãøÓ:,«ÏŽ­–_Ñsÿ`@lëÌÌ –——¡iZç—®ë=_ ~ Â4Mìîî†s]¦ibyy ( Êå20'¯ë>èEýÍoâèp¿•Má[´ÐÄG¾û.ÔÑ}ã^’Á82¬9¹íGðÀC”’q=Ÿ”JÀ³g_„g›[ÖP†Æ÷ 1Šðçi߸Á)L4ØVUUGzs¥\.£\.Ã4Íž^Úf³ ]_MÓ†GoF÷›ËKð{BÃî‹Utï¿ÌQµþøýaRþüÖ~½«4±Æ‘ïð-oa%£Œ#ãÁÛùF#b…`Ëêž H”Ð8² ø VÃø.¼çlE|¦„Þù®T®ëâÉ“'øá‡r9w?Qù€Ÿ~ú ¦ivþŒž7ó;;\´€øoÐONNr9wZuxÐu§ƒ`OÔ²mOž<ÁéééXÏ›fýmY!/Öƒl°N´ þŽËTÒ|\piu•[ïM¨à|Øú{à«mÛXYYmÛü®Ã~%uCDîÙ³g8::ÂÁÁ€æ œm{ržÞE•Jð«¡ÿ–4óàÕ‚j6›¹5XGÍ7à?ð Ùl† ŽÚ®‰&ÊÁÁA.;@:ux°X³6M¤¼¬iÕ߀¿‹2‡•Ã'^P;ß@zÏà€ŸñžgñÀÓ§|13¡‚gðaó=ТK†a`eeªªBQ”Î׫Õ*Ea°m{{{‰ 5VUÕ¾Ç._¾Œùùù®áË] èùøn˜ý]w#Õ@ÿFi”K8[Y˜ E×uèªXÇeÔ|À+¯¼ÒÉ÷'Ÿ|ÒýC ÕòßÌsq‚_7/Ç-:Ü0΢¹B°aðá~•Ëe¨ª:ö:<­úðgnX–Õ»+§tL¼ þŽËTÒ|¿qÃ_o£Äí™èœQŸÁêa]__G©TÂþþ~W`5MC¹\F½^Ƕ¶¶dff@÷°×u‡¾q{ÞÊÿþ¿_ýè¯ú{þÞ Û¶ð‡úrÏnJYZù.¼ÙÙრ!Œ†ÿò=tÈäE|ICc’fý½±Áý…I̳ÁJ ]!˜‹ERr7X ÿ‡ÀjÞB”>àÄB껿!ïK$I«Õê}sã_@’4B§t°w•»Áº³¸Wºç¯É€U’\lï*WT¥‚ }3?pUl’DìDC’{«ã×UàzÞB”2Î_%‰qK’‡»“¬BG%$o+{ Hfœ¿JhÁ%nBOõðáü/(3=½«FÞ—D·Áô@­àÎ$#¾!I Ô`劓TP=Ùæöd$Ç9·mS«|ðAÞ—DwH°ewÿø¸­ Éç£ò¾¢Ìܾ};þ›66Ø`¥âsàή JÒ調wvüň’·‡Õ²€ýKàN…""’ G vv8½ƒäÕh°ÁJ©³ÁjYÀŸÿµ?˜½«DDD$"®G@²j4ü‹|¹H)¾ÁjÛ6\×þƒ?Þnƒ½«$´‘óMTÌ8É,q¾¹@$ lä|sW76ò.IBØ9¬®ëbee¶mJ¥>tâö¥yÀ…ß`%P¢|@foµÎæEqëÊI*ù¾s‡Ã%IH‰óýÁ¬£)UÂö°®¯¯CUUX–…ÇömÔjµÁ>| W'%Ê7Q¤šqÃð‡—­®¿þ5WU¥Ü%Î÷;þÙEJ”oÇñ_*2Û”"!¬®ëÂ4M,//EA¹\Æ£AWÑ»€SBHP‰óM$¸Ä·,`qxñEÿ¿;;þCP©<}ê?ñÍ=å$q¾WWýÿr?VP*Ï(Ì6¥LÈk³ÙhšÖùš¦i7J¶7Û¶±»»›÷eäŽùöɘÙîÙQ%Îøì¬¿uÓ?úÛ}<|è7R ¶8lyñžEâ|ÏϾ÷IÆ,Èv¿Ž*•ú»`uuÙòPô{VÈ9¬Q7…çyP¥çëß|ó >ýôSX–…yI0øì³Ïò¾„Tâèè(÷†Ùáá!¾þúkLMM¡Z­Žýü£äž>}Šååe\½z¯¾úêØ¯;m¢ä!M¢Ü³–eáÛo¿Ííü¬Ã}¢ä!-¢Ü³_}õŽŽŽðâ‹/ærþQòÝSooçríi% iå~ êï¼êBÖß>QòQîÙà|Øú[È«çy}žž†Þ,š¦áÍ7ßÄk¯½†×_=ï"¤âùóçXXXÈû2Rs||ŒãããÜÿ}ŽqõêULMMårþQò o¼ñ®]»†Ë—/ãòå˹\{šDÉCšD¹gEÁË/¿œÛùY‡ûDÉCZD¹g¯\¹‚££#¼ôÒK¹œ”|³þŸ(÷kP_¹r%—ó³þö‰’‡´ˆrÏÏàÃÖßB6XÏC¸HUÕЯÏÍÍann.ïKO•®ëy_e`”|À{ï½—÷¥S QîÙ¼¯ƒu¸/ïYåý÷:J¾Y‹/ï\‰r¬¿}yÿ;P7!ç°ÎÌÌè–àºnäÛ•“““žs6›Íöƒº&ËD£K’ï4Žg!*ãE,%•ñ"æu8&­þŽ»&ËCɰþ¿LÃúEÞ fYéº˲º¾Öl61==ÝùÐ4­s\Ó4¸®y,î³Y VŠÓ4 ¶mÃ󛕨ኢ$¾æ<ÊD£K’ï4Žg!*ã×®]+\y(™¨ŒË–oÖá“eÒêï¸|‹XJ†õ·øe¬YXX€išn|0 £ó¯ª*t]Çîîn×ñÅÅÅÈcqŸÍЦi=Kh×jµÎ›™¤×œG™htIòÆñ,Deü­·Þ*\y(™¨ŒË–oÖá“eÒêï¸|‹XJ†õ·øeÖT»Ýnç}²Z__Çîî.t]G³Ùìš øcÒ+• TUíLj®×ëP%òXÜg³œ3xK5LyÒ8NbI’ï4Žg!*ãE,%•ñ"æu8&­þŽ»&ËCɰþ¿LÃ`ƒ5cç—L›'áy^ç ÐÅãQÇ9ž…¤×$b™htIòÆñ,dyO2ßÅ•ñ"æu8&­þŽ;.by(Ößâ—iPl°‘8‡•ˆˆˆˆˆˆ„Ä+ ‰ V""""""¬DDDDDD$$6X‰ˆˆˆˆˆHHl°‘Ø`¥T™¦ Ïóò¾ ¢Ì0ã$3æ›dÆ|“ÌdÎ7¬”ªJ¥ÒÙt˜HFÌ8ÉŒù&™1ß$3™óÍ+ ‰ Ö1 ºê]×Åîî. Ãè³m›››™½ Îíy Àac6ຮÔC¨[^Ï+ß3>IX‡“ÌX“ÌXÛ/ò¾€IR©TP.—a4MƒišÐuªªÂ4M(Š‚Z­†ååeT«ÕÔÏ]­V±µµMÓ:7åöö6TUͤ¼¶m£R© T*A×õÌÿ~)ye<|Ìø¤aN2cýM2cý]lìa3Ó4±··‡z½Žjµ Ó4ûûûØÞÞF©TÊì ÏÖÖêõ:êõ:ööö0==ÝÝÝLÎuþFY[[Ëìï“Ä“WÆÇ™o€ŸT¬ÃIf¬¿If¬¿‹‹=¬c¶¸¸EQš¦J¥RçøÌÌLäÍ eè§\.÷}[S.—;çT333™œÇ¶mlmmaaaAš…—$ãEÈ7ÀŒO²¼ògNYbýM2ã3xq±‡•RW«Õ0==ƒƒ)ÆÍ]ÄŒ“̘o’óM2“5ßìa-UUS[ŸöyÊå2ªÕ*Þ}÷]Ôj5©ÞðP¶Šo€§ÑŒ+ßIÏÅ|Ó(X“ìŠqYóÍkÁضZ­Ö÷xµZí 9Èë<¥R Š¢`mm +++ÒLø¦ì!ß3N£W¾“ž‹ù¦Q°þ&Ù!ã²æ› Ö‚QU·nÝŠ<.Êy‚›d}}ûûûãÿË¢Â)R¾fœ†3®|§u.曆Áú›dW¤ŒË–o6X FQ”±¼)Ië$%tEXtdate:create2019-03-28T17:36:49-05:00T¾=@%tEXtdate:modify2019-03-28T17:36:49-05:00%ã…ü-tEXticc:copyrightCopyright Artifex Software 2011ºÅ´1tEXticc:descriptionArtifex Software sRGB ICC Profile †2tEXticc:manufacturerArtifex Software sRGB ICC Profile\~=Ÿ+tEXticc:modelArtifex Software sRGB ICC Profile1(‚¡!tEXtpdf:HiResBoundingBox1080x792+0+0_Ýx+tEXtpdf:VersionPDF-1.4 G:xIEND®B`‚blis-0.6.1/docs/graphs/large/l3_perf_has_nt1.pdf000066400000000000000000001066671360743507500214510ustar00rootroot00000000000000%PDF-1.4 %ª«¬­ 1 0 obj << /Producer (Apache FOP Version 2.3.0-SNAPSHOT: PDFDocumentGraphics2D) /CreationDate (D:20190328152433-05'00') >> endobj 2 0 obj << /Length 3 0 R /Filter /FlateDecode >> stream xœÌ½K¯mIv×Ï_q›v£Žâýh¸a*Âr ¤ÅÜÔ’ŠE›LÂ*0ä_ï1fÄÚ{ŽX'3ëÞ亨²$ŸÜq׊Xñøb¾bÆÿ!~ øÿ~Ãÿ_ŸéËýñ‡ðSµ_ßáçÿþCûˆ?¤>ì7þg¥ö/±”/¹Ù³øýÇb.üÓú#†ñwxÿÁ¢øáÿüáŸQûñßm¿½ÿñŸþðåïø?þ¼ÿíÞ{ý’rEûsIö×?Ù_½ñKPhüÃö‹Ñ}h›öÇþóÁ?úo?¤/ÿõýß(j¥Žœ¿ü¿?Äðå?~Z÷ß=T>ãÇ÷‡=QK*í£öXRËsÖñSþÈ9„G~¨ 9üã4ì}»gõ§Ç› Ÿl®¿ôÖÑ€:¾G¬A—_ÎúÿÝoã—Ø>Hßÿ=Z³ªúãÏ´ ÏÔÑBša|‰5‘{©Gþê÷o¸ü&ÅŠ¾üþÇ/ÿùŠ)„ÿùËùòûÿðÿÿ½}ù/Ö•bÌ=¤‘bûªºR9ëzjå¾ñÈ`n,<¼rý×´Öæœ#ö‡ëø¨µ·™Ë8k–²çš0ÒGŒ56ÛÙ_ôü؆œPo«Ov:+<:]jþÎ&h§K >ëô_æQœ¨f¤1 µàߦȈÑoˆ€ˆ°ùðµ ÒJ°}|ä9XîJW%õWÕRKùè»Æý;СSsóñQzT˜ÿ&|û_ê_~ó®Ï5Äÿ‹ÙÒG¡ä†f`=|”škÎ)ß‚íËoêjË_ÿöwó·wkŽß[~öã{)†1gÌ_ÊÄN{3ßQŸ‰úß”Uí¿üñ?þøõ¬× ~’i õl¹|‰³Ô˜Ú¬9’#b/EY²½ ï-ü Ž£b¢;KÚ «`K* ËðÎÆ¾nèžÌ 97l§\th÷4Q Êš-È€N°2¼“mÁzùSAgU̲Á!C}mIù£Œ:Q'ß™BüèlKÄÔŒH@³Ö­ ï¬xg¯ eõcÎÒRÜÏÙ,G}¯'–éevV‡}ù£Š&z|ëÌèžfexåØ2V 飔‰¹”º•arÆ ò(k-‡>²5xgSÐ…\b)áf·¦¤ú¯CDþ”cµWB&(lJÂØb®¶×?‹0O:hà%,šòD(V„²!. k y”ÖW^ÈáiXEx UR9•øÁ/ øo>Õ{è­'k}Á ¯ÞÂ`ô¢aÎYW$ëB«ð^ŒnLVTñBMÇ,G]üˆ:ÖÅ/d307øfß 0›¬ LÌÒ»½§á=×4å?ÎX;£[ÃÞsÍR<…IP¦”áMè„1ñŸ(J±°c9SÇ 1+ŸE ³>£uVT?ölЬ #‹î_Ð9ÑQ6jáSè§Ö×b­Œk~Îöù†f –Jº¦§|èà~´ç& bÁ÷æ5ÿ°LÆÕÙ\”a4ÒJð6ö6/ø{¢{Aû&´¨ö½Õà©Q0tÙú/cNM´ŸŸÙ¼Zк1»á…‡Ùí),tÌÆÒ¬³g%г![e¬ùš” cœˆ´Šð¾×¤lœ»Xÿ¦ŽPPo¯I‰Nš1´:Ù‹c²/"jF(r öÖF²÷¡kPÔK+¥XÓ±4ZglºÖŠš°Y„¥11X×öVv(aµK#¿ædûÀ¤G׬bi47'{­àLÍ[eFwT ½ }ü°¸þŸ6Ýöfr:˜|Cr#gKÇŒ7$C´xr«&Ï(’Ï2ä2Ñõ-bÌ’:ÀB/Ì’Q–±`ú2Er®1q wÅ’+pÝZC¿å’•BÊ”ØN$a¹öžÖ:$“—˜ú¯2dîÿ<-H ’#9Û!;-\ ’¡ŠA À,_ðHŽ\™%ÕPO$ó1,ÍØÐ˜“É,KÄGܼvPFûÐ.{¯kOe–µ†‡ÖÜ,Û;1u[²Y(\fY&éÇú<f¾sÌ©Xodæs`F½Ÿdæcør,ý’_p¶öasˆ{oðt¶6€ÿ`d;ñÌ~Ć_!„Õ“Ï,K“£±èÍ2Lûm½Ÿ„6 r‰MßÑ|‹¾¶²vni>ÇÇÂÞÄ<¥ù\îÜ3ûÉikJ@Wí}ÖúxʃÚ* XsqI%žÔ6ï Z±¤A5˰€5#¼°ÚÞ‰©5ûFüÖ,œF}'«ÙÆÉM8Û6(°¶ö£°ÇIkûìYYwâše¥¢SkºñÚÊ8/êúOlñŠ9TÖÆá‘Í…šÐhø<™me‘BWŸ'´mc ¶1/C'«™R­ _…íøÂöø’¿¤/ù+È¡Þ`òr ŸàŽ„2—ϸËÒ)`ÍH¨=ÝÁÍ^/K‚9$iH±bΕxÃ6¤l4ãc›ºb¬ÄS=6“²Iš}޽1Ý©õßÛÀç-zj£(pç15_¡ÃÇÄa9™½v¦ŒÎ\LóÈfs1I “Øx*M¨“s‰tغã« • á.BC¸ ÕêPE„ÆžÒgÅTÍ7mÇ ƒúM„fÏO|ÊB¡ˆÐZ—ˆÐl–=¯ÎZä)¡! BŠïaõÓ¢E„Q!SžÂÁV2Dˆ†Ì1rc?å…èCöB´œ§í»ü¢;iº¹(R´ê"Eã ü |Ì'r46ΙÓO9z@*gWôž9‘*ñœob4„|ÌÙ­ú©æ_uI•‚怙Å5ôED/G£1’Tnr´hUŠf¼°@«msËØ^†bƒ‘Ç}'3CïBãÝôõ’4ÍØÿâjÈ!J»žWQz`2b?ªfÁRQZÖ°ŠÒ+ cÜߢ4ˆŽe_Ãäð+™iÅÂ*ø”Ê)sÙ•-PÆÜè„n3D¡Œç Ïä˜S¾aƒZc›í†eîÄÜO[ºKÓa~$p­¯Q±ŒÿÈxäâyrù`¥Ã2ULõ1óM–¶F•«ŠeÌ‘ÐZŽåfܘ|CÇÇ/©W°Ñ‘ÐÒIµ¡Ålvî%²¿,;â‚/T"eÇÙh ”È✤èóŒÙhEÝ[ÇGº”{y«£¶°o«#&bŽ{ ¯ßŠÃº ¯ßR¼ìrþáõ[4:ñîòéõÛö1hˆŽ¦´”éõ[zÑ Ùì$ezý¶q ¯(}N¿­\h˜‡f«¬Áë·h!v˯Xƒ×oÅ3 ï­ßеFõÞ‡ á,¯V`¡Ô±‘Î!„~·–e¥öȾÀ¤³öÑ“´VBMÞyß7‚WÏõ¤l^5O1iÐÖdÍÞêX©;[¸«á…ƒˆ¶öNq˜ç°wÞÇÙª·ªè—h4 £Q¬ŠÚ·kjQß=”hªóʼÅQ‚0,“ •^y>…Ç{`EÞâ)7´ž®"¼îÃÍÊŸBÅŠ4…ž¨vÁSÎ{Å2–^Æë̹}b=Ó0À­/„ –KÆ—  K¤P,Wð}®®7,÷üQB¢à_oXî˰°ïú ËØ"0~XÆ­Þ°\¡ìà…eب)–kø€ÊÐÛš!‚eÌ)¹/Ÿ¿b9ͼö²n*–S o/¥½C–­cÜ?Öb,ÇIÛdÚFBÁ2Š0C+ÞSO,G3iB0iýÆenHØX.†€Ùüf¥,SèAfÛÈò»Ç<šY‡q;!„Í,ãšêqËÙïÌeeᾋóžÆ²ØÅ{Qïé2.>óÌ'Ø YF@…¶v£!ÞûDÓZ½$‰!Þ{”¡(èãd´y 3zª¬ú¦xïñ[cËIi =hÅö«ÓæfWûŠTN[t´ów' ¶è :·Ò‚ZôîûÄñË.¤>[âQÍ2T10™û k–a¦3Êÿ„µÕ:6¡‘OZ[¨FŠ ›ñj^sfœ(7^ó¹0°EŒ×h\ž ÿL_g’û•¼nôpÓA֯׹h©š_øäuFE(|½§¯S⧆©¼Ž >Â$já.kûð;¥5ä± qy{'Z'º¨Û%&)­¹«€?wI›Êy§=b=åa͸(¹—ÐèY=4Y¯ØAE5ŠÀ~hÕåD5‹h÷²ý^@¦Cb¼š. &é"ƒÖÚ Ô\¾õ¥>¨±2"ÖÓÒúÔèŠL­~ е:5Ôê£PÓù‹ieƳƒÓôðRþ_ñª‚éÁÈ¥‚ÿ“Æ‹Ò" ŒfËrHÃBC…ÐÃLwy,ñFí]Çck¯ê×ÀëÅuÏçãsÄîAm òB:ñL‡5„®Ð6‚úõ! ÌD$pÖ^»G`„`»ð«n}ÈsèÃ… 1{ðä çM”ÖQ.«×\íòwÜʲP™~ýFÉ|YK–Ù%F³TfU52Zæf÷ÐÈ5|@.Âæ\ãÚ¦ŠºõÑImŸ¿QÃç!ž*sžHfK Œòž'‘ùBô_êKuhéX_í×ç.‡ %Ìågá_ö6>è×ÿ¬î',û3~|Ø#µŒåÁŽT×g?=îVïõôëë/7@>غ\yÚ¯ï?wxýu~}žÙÂŽ Í¡óÅ£~ý‚7/m<Ûùò¿¿þ{¾€ðÈàäpðÜ:u_³NÌÛô,~³Ôü\Ìl‚º˜¥ÏûõýØ~¿>+<:]jþÎ&h§K ¾É¯Ÿ§w1iþ1¿¾Vò”__jyÀ¯5û†¤C¦ûy¿>Aï×ÿÓ?>å×oìL°À¾ø…ÁØ‚ôëˆtK^©m…‡'4g+Ë^©m™s>Ðne+^½Ó¾ÂS£t#lYºe¯Öhš‘¾2uFˆÌ=tsEž ¶’ñ±þðgÚ‡iPJã’Í©•‡Fþ¦` S«‡û( úsæ*[ëkÑ&êHP ÖQ“VWÄú6ÆC4îè’„Üš¸êGMárû6žº}[ã O¶Lh©ÖaMüGTQR½êƒîØ_Öxñ´îÝGúv(·¦ô·¾­ñ<%¡- {x÷QÀþßzh®2ï> ÔÅò¥÷µ±B×·yóaŒ²¸´)–HF&@…[©M2¾­;x ™Ôæ²Cr*ñˆ£i|*ä5[ͤŒîŒ+ؽï; JU Ë’Ò©C¾&kàÁÏÁÓµöX;$Sµ­I×£wƇC û¹A qOV¼³¤˜ÊRzzC$B¿K6:UÉk²ºÐ¡s-“hOb‡äYÞÑËŠ‚èÔ%_Ó¯Œfñ2cPÏbˆL<¿G5“OÏb‰ä€êÚLÃïE,‘ù£ðTÏš#½ˆ%’ 0 8®3ǽˆ%Ý2Ñ›fžíU ‘ôÍWÚõ­ºêM‘‰ù̓V$¦Hý‚&¦È„ò”£Ñ¤·Ã™Ñ½såèML‘ôûáëÒêÍ.ÆÈ„UÄ iÙ[d§G}›Ãjö¶ÈÌ3áqŸ•µýf3h¾û0åSNe6{’$šqcv³Q¸¼ÔÊlný k4ا)³Ñ…ÍN™ÝÛÍd;‰Š2›ý[\&Evë ª‰;/‹ ›9r*Û¹­ÈÆcÃÁÓð7hWÏ­ƒ- íÊÓN¹—«ÌC»Gîs½S ]w9LY³9*´Ñ ¥ñXÉ¢¨@;Ó¶Na¹A›î´ð² ´Ñë˜_q } íÔQ7Vj4K¦B›AFeØze6ŠhrÛ‡0•Ù(K »m_Û‡0“£¥ñgƒÙC;18/«7Ú èk+LþEmý÷JíÄí (YÛ¢R›mÀRÛqMŠm|A¼Ñ[³­(½él˜­+ªWéÍø$Lw“Nz3Þ¿Ä•áhãÛØŠŸBÉ7|3 ˸7|GznãµA*¾#yƒ9´œÙŠo†-¡%uÍ!Å·W)¾#ƒ0ƒ¦„ÞŒâá)õO„nÖFÅ,ÙÐ(½ùu&¯ÍBèÍ2î¹+h@éͯc4ó¦°Ð›­Lé:º¬ôf“…lùRèÍ.bðòJë¥ôÖ06¥÷QŸ§·ªJo”…Ú.Ç©Òûè2¡7žcÚ”í:Uz3º/Aè7v37ÇuÒùd75Y¾oÉ`Ên<×éXßÙÍW1®y¥ïz0@ÙÝ;Ïèg0¼ÞØ]h7§…~ÞåîÒ>&“Z…vgwa,d×ð‰Ü E)ÙvBewj »A§/ÙMÙͽ¢Œ0–ïÙM ¾ ¯ä<»##N15ûÝZ‚T¡WX¿²;28(]Nde7`‘òŸ—Ê!ìf{£”v·–0Ž`ÛNv3àˆ’Ãê0!7ON@„I+y‰[#Ç„Ü$à@KK»[C„ÛÓ²r®D¶™Ê…UíÍÌS›ç:§éz¡‡6Ãä¹ÀW ­0g†ÔþF6Ú-xbóÑÂP§ s­ÅDJùJÀ$¼fÜÇÄ-w\3²Û}KwZK0¾°š9öð¥cÔÕú¥žÔ<P™“±Ü¬#ìƒ01;ãÓGó<¦5]£Pšç[ nÑA M‡Û`ÂFÏheN©t°Q4!tä²)¬G @S¼nŽГ)á˜aSünð̬°öVgZ’s¿bŒ…Γ6 <ßòÉf” ~0,œQ„© [“Í“+òÎ:Å$hfÛ™”hÛ†ýcÌŸ+ãÜ+9zâѤ?'í€Rm³nDnÍ ζ˜ â0˜‡/ÛVþW¿ûßþî)Sy¢‘¸ïL <û01l5YV1ÀøÏŸüÔ¿ùþðÏõ»ÿõë?÷+­MúÙ+™H®­ÍÛgû²oùìL`,!åþägÿûÿëøçoúæ¯Ûìå³S@Ë&t±œo£-eßòÙŒØÃöã&?ùÕÿñÿÝ7Lë_÷ÍL4Ô©ÅÛPKÙ7ù‹W‚|:Û–×Õþ4!’ņù|Zû×ý?ºO{¢è9g ÏñÓãq6“@ ôÑ_žo€ÿ`ësýåé@ÿ¹«zÿï¾$„ILÁFHqT⟠ûÈ‘y€ªÎT¿*Äèé°Ÿ÷º}â‘›,.(<½ný÷Ôïø3ì ‘Ä HÍß!…MÐiÁó?nt'Ú<ùc5Ý.UÛYØíwiÂ7Åþà§À£OP"Æ|,øç¨å©è­æð& aÚÏ¥÷îã  ïþó¯z.­#<]øýS Ö12”à,Z´ƒ¼J tÏÛ¢Õ ns—•uoÑšµÐ¦›58‹VAÃëèÛü1jq-f¯*ùÊø;jw­\¨?_öóÑ‚x#" Ž+—ìhÅ[´ÆG 3›V6è~[´\C§ÂÛ¢ÅacBsŒŒ^œE‹Ê,Ãâ«iùx»÷Fp­\vƒ1‚÷F0­a¸ÒOúéß¶Ý@.3NÛçîmZ4S@ŠZfíaé/›O ò´ôzå,>þ§}ÔC]™Bê¼#*˜ˆÃ–==yŽíQ!ñ"“a'ïˆ y™¡ûøŸLËJ*6y(üQÁ\m¬3=Œ¸sá?£1c· LëCëLæLÁÅw×°¬ø ž†Ô´N©O.©wOûðÄ‘Yëæ¥]'Ý 3 ÙGñdJ¹r!ÌÜ}S¯–VމÅã"€"Æ‚G6f).ݘzÁfÎÒ}oÍ—%kÖà#€üqÃÉÛ^F.¹b3ÓÇÿ UP³·õ|¶à¬\rŸËÄÊy[¹x@%ö¼Ò&C2¼ùv5O*¾ÈÿÈÙL¨•o+×ü๕@’‚²‹ýÑ.Á™¹$ÍE’ ûÆcɼí\è6Æ Ö …u§iüu§ÂšÁ<Ü ëŠê˜£eèWX×ô‘Ê+Ÿ„ºT"aŒfb Âº0síëØ¬ÂZóŽ(¬y X_)PÖÀJËѲ?ß` e™QøŠrƒ5éy ž²šKKÃÿÉåÆêL×e[þ4%5ã–J¸†NQ2ìWv`eµæTÙ¬Æi-L7V'æk§Ä3n¨f–ÉXé'ÕŒXʱíc¯ŠêÄIô;®w ªSeÄbßþ E5ÊhÁÞ©ëÕx'º¢ï¤A‚j¾²0/›íõŠj51s8Õ NêXùë¼ °Ú"‚xmÎfšÀÚú(_qªkKNcÜŠYZ³Œ1±;+àÚžc~¨å/S`£l2 À…åâ¢~hîËW¨£›ÁB%}Ï‹Û4Ûy/ȶयÌÂl–A›¸’¥ ´Ïç<µí9žŒZYaÛz¸z)e &a‚ðúµg¿Ö²z ³%1êʼnîÖ˜Ú+ìTwŠnè‰üÐíÊUvW&Àç­èöCЦý‰áU©ÞØ]ýésA{rxN;Ù]¸‹¼·a7áR€ùwvcËnÝîìfœd†X–í‚ke74¥¸¯ÒSvcÍò¤úN¯¦ìf„VkWvs¡`VÞ:E·N\A7žÊ}̼R8)º01;—t«èftOW¯×”ÝzŒ@ÙÍȹÎËV–l#ôNå\>¥7ÛBl.—ôAoIsÑ[zSéŽèâuÉ’â›RœgÝ"‚ßšdñÀ7¶ öµ½¾#S¾fºâõ5Þ1²‚=ß̉^ì°:Ëã[c(¾5ÖoŠ{¦Ö^ŠoVÇÈñµû+¾#ϯ´K4z ½5&CéM‡zÁ„ù”ÞÌâÒÂ>ô-ôf°ð2/ô&{£=ãÞÇÀ*½AaA›7xó1R;uüoþÓ¹¬ýNàÍ^é?7#½¸ˆ»™3]yZ ûQvc°2Õˆ+7v“¥õ•QCÙͰÝ{¹±»0ž¦Žuš¢«RÌØR Ó~X„Nº¡›Á㼞§lù9HÔ[?ŠîÈ«$¯ü¬‡‰DÒ¾)¹yµs¥/qDÉ ˜1MÞ›ˆ›‰e 'Ã2Ÿ¹5ÙŽ›8…‘3ã&t3ãè`’¡unÞ„ÃCPsížÛšR°Í @^B‘§6cPhkÚüòОËz¸¿Y˜Í½C´CÕÙ\ üä-ðybkˆÇ6#Bf¹B7…×üç¾Úp3Œ ˆI¿.qÍÓšqŒF^Ý*°–8A5#Br¾ÒÑ ©Ù &Í[ñRjÞµxSÉâ§4BcOºD˜õ…iű ¥ùU99Ê Ò|*§+ñ0š_Õx Ç6lxsCçy’eíba$ë¼Ú©9„'—âä*€žŒÓ¿T0o a™Ñ rq5BûHgˆÀñÓÓ1úÉ»òÓ7$é+¼Ÿ›aGtf/ç©¿ È¿n_ˆxd/*<¾n勪͋rÝò`•‡ŸZëþ.Žêzh¾C„€c;þƒí˜öÇ»Þdý£ë¥îïÑõ–ÍM»^ÚðMAМ!JÑIÿ± £–§‚´š‚:ï^´ë:y¼ú烊ø—ÇB°üºå½l•Q¡Ð±3áÚÚ„ÐÜ ÊÖÌ{Ä-qẋƒ‡Õñ”Sènló*pc8::vy6b€èÜL æ-ê•ùçx49.!ú%owaÝè>© öu1ÔŸ(„rûë<ôT§CU‰‡õË_©aò¨Ù¨ë2øIe}%A¡W…-å϶U8ÅŠ)Kƒ¹V1c F¼n¸Da·D˜‹®ÔêR.mY½£yP}þnwˆ/R­v!<¬Ã¸kS"ið²Zh^q%ø`Ùu¤0Úª—iˆõ©ÂšQ2®ä×/_,u‘Vó.ê–³áƒLáçY±¼î®´ëì>ÏQÀC%ûò⸜¯°Æ,À!vIâx‡“Éî¦REsJKûôÐàù´õVF w ÉŽpÀüº9ìWì/gö†¹ê¢zMdË[1ز ‹¥H^Y“ÊFZÐ]ø@â}µak”ô˜úøãµ•¯‰¬Ù+obÉ2×DÖ´Ñä¾Cx&›F¨wǾ¿î|5ïèk3òêcÌû]bкˆ¯qêEóÓv¹s€Gw«¯iì3 Gúqçkó¹/;dd ¹§Øf[Øß1¼u“ObJ‡uP)F»ªóšÄš<:Ò…â \\€IÔOì2ÕÖÄ›’¯YÌëfé3YÇŸPf§yÖ$fq^[¿Á‘ÂRðŽ6Àù“11ÙYß=ƒ™ò›¾Ö•¯†7þØý˜{3igJP´_*¯ÞÔóTTÁÉw¢#ׯë…O¾óòßÄ ?Öµ¾ßñyeÆüãÀ;­^säS¼ãæ7³xÇqÇ;ÞÚ¯~oíŽwž*{_‹¡xgòYnuW)xÄûÌŒ‰ºã½3>…×õŒxÇ;w1Ôx]Û¬xo´Ðú³˜©xo4DÑMõ ÝÍ>K?p«wºÓí—hr^óFéÎÓc•BÙ2•Ý+)®¬ËÝy£pcæ<î|GaÈ çêtÅ;s¥påç=ð^™à:ÝñN£÷ˆ#…6îx/”^ùžšÁ §Kr|ǯå{äà»z(¾^ ©Ñî|çYÞÏÞ÷× ß '¦GîíÎw:Ly§wÊåÎ÷Ì[ÉA¢¼&|σá5=z'<ó–×õóáÑ´Êϸß*„§ˆƒ-3öÝmBøŒmº³ö'„׿ãy%6cæü„ñ,¼íºå;ã1¨É¢òÞ„ñ(äR¦‰þd¼ù:oÞÞý#Gae–¦hŽc…< ytÄXo”g` F9Ô´©ë1ÏÂ:^–cå¼=™+ÞbÙíô,ÄWaÄËôŒÓá”Èk,/ÓLÅë8ÿƒqì™ß ‡ú]˜ßyMf…=s6¡Áa¬¤´¯q@éZÊJ{‚Ô¼vÝC˜§Þ†E¹”Ü…ù’1ŒWƒD˜Ÿ<’þÊSažù=ˆ‚ô‰0Ÿ™ˆ„˹Î;í3«è%ô­?홼 šZ]Zí™…iVÊ„÷¼—þ™æ÷Ìš‚€`»ãžÑ6hòNB§¸§G”—€_ Zp¯ŽÞ÷âÍ=pOÇ25¡KÜ'&,W³÷ô€_·DÞpO/$oŠ»±‚{&rY‰qŠÃ=Ít—¿óÀ= -cÉ:Š}àþ¬LpÏ'y9Ç r>p¾VpOÇ:i¶îF:p™°}Å÷Œ‹¨q­¡½šg(Ù%ë{Úó9ÆÕÔ èžöšâæ }4 2ó ='ïòÖÈ*ïùèaig¶€­µì# ‹5¿Ã^ü×ëÙTF}/ðÁz½^ã`=»c”·f¦¬g¦¬°u:…=íPßÛ|KõG|ă {ÞXÒ)›Ò¤qƒ}á%YW¾Ìƒõ…·Š1+ìštÊz€Ó6ìøÞƒõŒì3\ÓUY¯ýp°žJ\þyëžõ‘ÎÞ²ÑêõðÄz¨/òðõRA}°Ã/å’0õœd™…[™ð¨gÀ}dÌr'=´ˆfWü“‚ž×IZš¾rë™4¤Ïú2¡8Ìs¡ª­ìWzʳ͌û\£B~òÖ5¬ñKŠô§<ðžÔõyÂxññ àéNfŠ›ù‰µf2?E´ã=7¼“³_—nºOæ¥y%WT¸O`cã*¡ÞØÎK]é0¶½Á£÷{TL4¨dg#FºUYûJd0wbIåúäMU½ïûxë¼4„GiÂÝL3-p„i.Çê, âËÖï<Ôù ¼Õ$m™Ø3]¯/Q¤£¬pI]ÈŸ­8†½x £¬p¬ä2Ês¶3Qõ’ÏÏyó ­·[þû z³Ç·ÑÓœe…IC>3Ï@ð@Ÿìc' s½3å0ÏhSÔ<™Ôvt’œ—ï@ Ú÷v*ÈùNz‹ãVö½7‰…ý«2¯PÌ–üøC.Éþú'ûk (]=èðIíø”öwüè¾í‰zRY·z@™ûÃŽŸªù±ì2ªgšãºÙÃÞ·ûVz¼ òɫ۟,ÄrÝ»õza¼þôI~Ñï\­vità;ˆ0‘¡©ÿæA¼£ ªöUu=äðZ³o:<2„›O¯Yÿ=ëî³ã> £l÷r/9ï¬YÊl‚Ù²èɳÄ:Ú_öüð†7‘žëvVxt»ÔüºÝš Ý®Mø¬Û™J”€ àúl£¥¡µ—AÃÞ¿]dƒVRèavÅög·†\•¤_W‹9Nsí‘þÓµ©ÅrÚ0¹aÔœ6¿¡ÇÿAºý ¥x7Åÿ‹xBhЦb7ôðvÄœ Ô¹[J°ßäÆ4 u5æ¯û»¿ùÛ¿»5È'ÙùÙ€fÄ›XͶ dHÜ6!åy¯×ßÎòßþø‡çÒ30íÂëÊÑÂDcK(äÆpGó`œ:µ§¥Ž•qÔT (’ë”äzÇ`*î`i²}ÀŒ¦½Ï?S}Þ‡®·wΕq”—"¢Œ ©<2xºŒ£¹òì.4Æu #Z^—Žæåuª;ÛНÃЃ¡ÈG¦|lVö¾t4—Ší¿ÐåD™>ñ,ÕúZX–,«(ÜV¶2ŽZ$)yá5$ÍtìWÆÑ…ZÝ FM€O¼•M¡õlåÕNé}ã(~ùˆL;½\{)¹L£lA`¶ûd›)»;GùÜŽ#±–g—j4ãýy5a=·ršÏ%'žä} æ_H%š¶xLeyT—k3ñbÔWO%»¦'ïÃÑ©¬t£vÆše¯¸}”U—o43M,_¹üÏ©º«GùSn×i—$†¢ÝØÜí£çÍå…Ì0޶½çøé})Ÿ£‡l®{'RwYG­³ð`–I$u—vôìÈîî e}ôU탑i¸Ô£|Îêp·²,õëFù4\òQùI<ßײèu@„E.û(Ë ³òP˜}·^÷ãfZÊޯ̼Þùº ×zóÝc9¸¤ü:^˜Àx+¯¥Í¬Ãï(¥]R>æF;3‡ëkÆò†w˜¥÷öž±2x™g¯{rYÖ6g¬Ì¥!Í<4¹»ˆE.©õßYŒ2™©\¯›rϯË.©5%]'3oM}Ï×ÊË×9tÛüSñ x,–ÂóMa€™Y€n¼nk.ðꉯÛô3]yM×Úš°ùÆë*üT^Ówæöá5½Än¯É¹÷sÊkŽ ƒÍò¯I¨tÙá×Ó`‰ò ×¼µÁlráŽk;ßôæàšÇuyÇuÉãä6Ͼfžb›¯xÏ<Å65;öÉ:z`›vNžr2£Õíì©¡Ôæ™'»}:ß©?ÞçµhÂêô3¬ÖÆmVë6(¬¶§¼e—yVÇé%óƒÕ´þ ¼vµYYÍÛ“mª7Vã;^%ßX}”)«ƒlzÂêó9kzð(©„‘OX³ÈM|u|“ôÆj;ÌHìxcµ®Üן¬¶ë)Þ@ö¬>Š„Õô“¼77e5¿ž§E>(«£]Rú:#¦¬¦sòÝ™Êjž½~K ë8½ž¤°flëkê+«£MaMç›k~Æ%è>¡¼ÎoYø^»W^@•×c­¼¶®9Q^wùLå58ïß©òuñ«¼f…“®T¾N~J{\g™ì‚ë,bÐz—Ö„Ö9ú‰yÐZô¥õüi);ÉÜ;pý ¥‡È}JéÉ Ý¶ïÁQLgYÝŠifËy‹8Êé,‘›™t~ØÚ–l*ÁnÛ`³Ì©W*\óRÆÐËrÉ)°F(°§l…ì£-l^‰öcØì¬÷ôV`£Ìëlì£Ì;gÙ«ØvYFgZýr#v¾ƒ:G멱Rn(¨³J¹ÔY$ µ¶@AÍp1\uòN5¿Øéê,j§‚ZFAÍopÄRó:ÑYHÍï{Ë{˦–ü\|2–AHÜöu’ºŠ.¨¤®Òc‡d- ¾’ÝðgÔ:x‡`­b Ö:]Ô˜NBö Žå'åêX Bê¨ !uÔ†©£Š¬žÔQ&Š’:³'5ϱ8*²Êà6Av,j×ðÈŽYè%ÈŽ^MTbó>-g5b£Ìï_BìãÓ±£,ðCÄΞ5‡ˆÿBì(«K‰} ©ŠØÚ@±½&®À¶+ºÞ²ƒJØjÆR [›"¶Ž¶JØQaî­“ò±³ŠÑ"bK©ˆ¼5ó±e‹;DlQ¨[ôªCÄ~Â"vñëê±e{?Dìàˆ’›©aß–ÜCÆf‚F4¦®î,ëŸ;Iá«ÃG²¼HÜa–óˆ-w¡T~0|á³ÚñÍ+|áýmOÔSÚòÒÛI†õaÇOÇÔ<^yAw×Ê/7@>xuúñÓãÁ þ‹÷°Ë/ߺ¾;ŒøpèHZ±Õ^ÿ7ÿ’BÞëõM†G°%Ç…C$Ü÷´÷¤xD§Ý×ü=|èóº Mx>tÁox_^ð\·³Â£Û¥æïÐíÖívmÂ7….@ ¥¼j¶….h%O….h-ߺð ±Ùn“†0˜©ñýBl@ô±ÿò?ž‹ Èóˆ xyG°q‰ N*Ô4_F·c î2ÒÌDáo…¸Dw)][î•Ô5_ Fm^àåfÎr)b}Ib¹ïhI^!ÎÓ+4%yÓ¥8GKvw‘FÂ+SßFüáEðB]óípÞ”WŠ»‹”:½sÀ•â—C¾¼ˆñrxɽT1^v¯‡RÔrVÌ&Má•©o‡ëøË C¾4w©ÕwùYæ.$=|Í…—¦¾=®b?.ý°b:Se gŇBéËí$:F" 2ÄŠ©9Äí4e ÍÊé ËeJˆ€˜ƒË”é£Kj·“¸–j8¬˜nPké;¹FoŲF‰˜Þù^£»¢Ôìífµ±ãf5¹;JÍúü2 Õ$!Óiü5¹;JÏÚ²„H‡Õü¾¥ÔLä¼ÕÙ̪‘]€Àô«¸wM©÷ß6•Z$@@|!µˆ-S´×ZÝU¥çT‰°®îŽÒÃvn{kÿàS‘BëÜÅä¤uK>æAqÝŤ®¸®¤¸F—qç•Ey ¶:còš‘ïS^“>o¼òZCO„×⣼NpvðZB^ç¾R\'±Y+®ÅèsàZbÃ×Q6Škut¸Ž~±¸ŽòœâZ¶o¥uøiZó40Õhö µ¼²_1Î殺¦Wûž ¬VïôÁê Ì=CÜf2ßSópf®ƒÕâžSVó¹÷üQVG‘„ÕQöXeu”½RYÿÂjªPXÛgKW“n´>š™ ·â„Öñù#àÄ ÅµD@(­ùåoÒš§¾ß+Ni­JkÓPl[\±bÛŽI¿|+Æ(¹›3>Ÿä®YVœ’[" A[¼,‡ -Ñ9Jî"Jîâƒ1ÜY›©‚¶¸ôUÐŽ6>Ì}r´}dr[½Ç·eMÞälgZp£jƹ¥•cIÁ­²‚‚;ë"à>¾NÀ­~V÷ñN·ú|•Ü\¼o­ý³UKr*ÍB7~tAGŠî,×Êî³L¢$ÆLÙu·vÓ#üxQvg ”Qv3ð[v?äìä^”Ýìä·à5XÀ¿QÐÍ;–Þ]|ˆÙbP9Äl‘PÝÌBã´Aw–E·5¨ ]¥™‚î,NOEwöNOEw–@ØCÐÖWª -á׊n6Ùí­*qg/--k–wb?3pû­ÝÀ­ú¦€›ó–)‘{þ´…DÏ"wõâ”JÜE˜¡wòR› ÜŠ=ÏmÔìá ÜŽE,î˜ä•n–y#ˆ·Æ ¸ã!Œ1g‘¸z*qG¿êÜQù&àV³‚;¦Ÿ”¸Qäí*Âmõƒ+·Q׈ú3à«"·V4Ž˜¿ƒ#fÀ7B¸}ôÇl–Û–½ %ÈyžZ®ïû |ðvL¶ìÕà–¼i³QÁ܉Ԏø|¢Sƒ«X̰©1»€3D´êõà"gR[­ïû ésš[«ã}!µuçÉmM ˜²Î[«ïÌfšzy¦DL7(˜Ý¹]¥Hì—ÁëaP¹ÞWš½Ô}Á8ì—~8‡Ø/ÅEÓÆx§þ;NBµ)öËä­ À‹os(wm^öK§Ãw*˜×<=l=§œi£SÁ|ÍSõôöxÄ 8Í­G‰˜…nn†kª½º§ã´“Sù:5ÌkÆ&Ñ›z:ÂüçQÁ|ÍØ$*XÏGœ€ KèY ˜2Ozñаž6ëåp˜^$R@¼½½‘ÎcÝ«S„%ö€’±‹sŸ˜>R@¢z“Hme;"\Ì}ï^NÓMz×H…~„ øé|ÕS!ÊëèðßxÝ}—¯ñ¼,8áõk¶‚›[¯/óà+€íª7rw9Ú äæÕæŽ Bî&®%w•cAJî*v$%wéÞ§£ä.b¥Tr9D¢ä.r´IÑ]įè–I{ ÛûYr‹]ç ·Àè ·x¦rKô×An92~»Ê—or‹7á ·œzRrg9zu»ø«äÎr,KÉÅ`­äÖÓž¹eKrg9¨äVǦ’[ÏßÈíl|7r¿ Ï7p;{õ ÜÎÓu÷[D¹ÛcVÀ-îõ¹}G+¹»ìYJî.r'q³äî^]9È=”øžÜ©Ëvmä–)F ¼ Úk‰ìνû™yÈÚS„ge·¤_RvO9+yÝ’ðëº}•¢[EOºe†èöæƒÜ͇X)¹k“ê„ÜU|¢Jnš\íÁÍÉí¾[À]ÅE&à®â(Vp£ÌQȲ·‹OÁm—S¿œg î"^Vw‘¼L ®€»HJª î")¬ÜZ‘‚»ÔŸw‘‘Qpë‡)¸µ î"C£à.ñ(à.€¨à.’LÁMo¥.ÜÅgÂPn«þ¨àÖ¡Qpñ¿*¸5^Á}Œ‚÷ñîcÜGSÜlÊO»H®-·ö˜pû|Ìs›ƒ÷ÖâÛE„c”Û¼=à’.Op,²™€»K@„‚»7¿{*¸iqË_À]ûOƒHéÙ‘»ø„T î,c àβ¸uËUp3Ï›ûwÒ=DÀ­¬JnŽÈK(Rpk €;ª˜+à>óàŽ ¢ä¶ÛWœ8.1Ú+Bn<ç¼ÈJî(C”ܱ‹r³ÈÍsÆ !wRñXÈE`Próö“÷BpG9€rØJ¢7q)¸£$$Up³?ÜFáÁQn¼Ò…K(¸£¬*w\¹›6½ÜÑGâ(·£ª+·£øÔ•Û’¢C±‹[TBí£DBtËOÛI4)Æ!mK¾…¶æ QhGa‰R;.æ¥6oЖ<ÆìXü|u¸Àà±¶•œfúå±±?—“ŽåO |ZÿîšëC~ô_÷DE<8BŽŸ÷Øcõ!úËó ð¼zýøéñÿÅ»òË·„ Ô^Q ö³Èû¡Ÿ š,N´ÎT¿*<áé÷Š}³á‘$þ_±þ‹ê÷`…‡÷ZjþÞkk‚z¯µ Ï ¸Ævú=¢¬Æ£ã¥êïq7…µA{^ÛðMqø‰w·ZyUêSG-OEÕ<:ÀL ó\·þ|èÀðÙþõOÏe`˜˜ pö,OfÒéSRõân´Lo5o”®éüs5x½XLô£Õ‹Æ0j?ôbW_ N/Ìà…‡o QöÎUÎXF §Ñƒ?%NÙÁÛߦýâ# ¢û092FðaÉûÚÆ(>Œ@l¹ƒÖ Gð–ÇÇ Fà4º1‹O7 îáÁkßΨà¿`†àϯŠG";¿*Žªº†øw2ךO7à!;oÍXüÊ“w›ž!Óò…¿|QÞ±2™ýÞ'pæë™Üýæty+î3 "pö¹éÇ÷Hn0yÁ¡"pÆ´Y‚&p–×YŠ "4—cº¤UÔ ‚·9pÖ A¾ºZÄe?u§ÿ+F\P~ [ñ.(¹>b¶®.(gp¼Þл œ–:{”ÿÞ½ JŒ “·úàg_Ÿ¼ÝP\PïÕ‹ÕåôÕ9ƒº ö}ÍöÜ,Þ%œÉë ßóT‡Ú›ÄˆôTð€òÚŸó9xÍÓNnÎ ¯ñ¹Î<"¸æa'8$¸æ!)7׳+æ=®‡,EÁõ#€Šk=[«¸nbOR\×îE*ÅuõÇ •ÖUN¦*­‹Lu¥u‘”ÞJkZí߆;Áu‘㺊kF1¸ê×%*’=®‹Dv)®³8š\+Ÿ×â„V\gÉFsñZÒÞ¼n~×S`gq*°³¿wByåbåu/“ò:‹ãPy­1~ÂkŒP^kHÑ×'¯ehNp;Ùün?4 nݪNp;“ñ Ü®§n ÒOn¿­ò·»L®“Ûoõmáé ÛŽ 7lûN±-Óos[¼†(ºƒóœ¢ö‡¨-U‡¨ÝýtWQ»ËAewSº »õB0aw•)¦ì®ÕožÊnÍä}°;Ë®¥ì«øo‰íRxkôðo‘}nðv3Pá­R¿Â[ü}øÀÁn±Ï+»‹„U)»‹(Êî"©Þ”ÝEzs³8—½²[Ý·ÊnõË+»2·^_¤ð.²ý+¼íZûW‰Â»(<¼Õá­ðÖm\á­nj…7gÉ[µRxÝGÞEi oŽPxYY ﳬHü€yzÑo•ÞEb±•ÞE™ßEâ­ßU®üQ|Wï Vz«€¶è]%çÚƒBo p¦T¡w¡Héݧ å½½åBè]½^|³“xcÚI´Lì$â~Txg‰Txë!€ÃN"ãªðN2ÝÞêÞVx§¨6ï$7Ÿ(¼“Ïe'ìÖ«œ”Ýš7IÙ­)¼£„2*¼£PQxGÙ±ÞQÔ…wlþ’® ïø3‚7ÊÐE#œÈ'ìf=NÅv/»Š ;Ê ¡ŠìãU‚ì£åÙÇ+ÙQr„*²£ˆuŠl 8PdkÀ";þ ²£\©ÈŽºí²£¤dGõ¨ÄŽUäflf$y+ð ì(Jº;Jøœ;Êy(vT…HˆåžÇEìø­76®ðêËoF‰?À?—¿ÎþÁ“Ÿ·àÏM}…¼?ð‘šðª3†àüíq>0¼üå)¥þj„þö‘n‘ço‡èW_Ðß¾%œ G¦|™c!ö?NЙ’dDF¥¯º áép¿~߬xd/<<¿~囪MrÝfð`•‡k[ëþ.¾íz‹*8ñ ü03o*U[x´ó×yí|©û{tþ²±jçk#¾)² 5&ÿ›Y©Eµ<YpTóDdAç t9u†übdÄüËcqX„Ý’ôµÊLÉ ÿ\¡äŸë]'Àõ¨2²`½é8°| ØÁu6ýsp½Èî|p=ù˜–»Ìî…–à¥NÀ»`‹ðâŸ>/éyNÂW±”ðå§ ¯1á¹§¸O _$Lù ¼†¢„/>Pù|Qž ßK<àïù^‚2bñ½ÄŸá{ ÞUvð½½Ä-݈ïEÂ$¼— »†ð]côÐó„·Ó…ôü*¿M éKÞЗO¤õ"YnÀŸ½,€/ªÉ*à‹D €Ï?'®³[Þg·•ð§ð"„/þ†´ðGsðñ¾H*öðE|àá‹äÐ?¯òÛ s>=Á „g¸Ákó»þ4‚á«ðõgL2 9pC¬|ϺQ+ß³düR¾g{•ïÇ&®|O >å{RÛò]C¾Çq Üó=Šð=ö;ÖÕÿ­XG™¬Áºzü¬ÓËíõqÁ:öþŸÆz¬jt®þ^{,Š{̲cn°ÇcO°GÉ'v€=f•Q…ìQ¬ÒÙcÖ²ÇüÓ‚{Ì»%Âá0œy°ãAÙñìzlÿ {””CâϯÄG•ŠñgâÙíÞV'ˆç} BqøxhZññ° âåÞŽñzQÅx…8u…+âõ²ŠñQ®]<%%ÝëŽ7ö¿>äkÆ‹¹K²¿þÉþÚ‹Òõ×CáŸÔþˆ7iÇîÛž¨KÍ|ù©a¯;~j]¹×Uö¾Ý·úÓãMO^Ý®?YVŸøôza¼üôY~ÑßzB’ X ãKI<~:«x÷þÚ‡ ÊFŠíkªz<Àáµbßlxd7ž^±þ{Ö-s`3~E-òoöÔìˤabr+ E¬g¤ìùá ï°«çºÝ*ÔnךŸïv›ÈÚíÒ„O»ý—™D9¨4(m•ÄÃl¨s¤~÷Ï{<ƒT’{cLõ¨PâÏÄÔ_WKò^îP¯ò­ôêÅVåÝ¿û-¯N€¼¿üþï]¥¿ Áþ—ú—߸Šw[ü¿˜L- a-7û^‹†¾ÇKÔo-iL§PWcþú·¿û›¿ý»[{0&3íöülôJ„Ñ©&š¾§6îáÔL_Ñÿõx.‹CìîFÄð-aòÞ5Rc¸ä† ”•ÐëR\ÇJn°w}©7ä¡41{à•—*_¡Ö'½.§ËŒ¦Öc³¥j…Nû,+a®ä†“®ÒÊÓ#Ðó÷§Knuçƒ'N0ö”ŠÃòº‘·§ (×%˜ª‚KoXy5SŒë¹÷ˆ| Ë*£©ö鷺f ŒW)¶”«ÙR\é ééÿ‹Ü0 ÐL͸œâJo¸@ˆd—4ñæF6¥ÔhMIë¥Ö”ô¾‘1^´ÓÛRré «]ÍZi¦³¤ìîD¼•¹ü†, £q †•­ü†¡õUß •EìéTV´Ÿje¥ã놩$‰«^]–yÞóy™ÈRYù gíyµêQ©fALÕå7´úÐ÷XxVäîD´Ç†‘!˘ÌÄ~ĸoƒZjîND–AÜ…êgÊVj.½aÍ<és™ë»š»ÑžƒŽVƒuqwé YC¬ÍÔÛÔ]vC¾±¥Žn´pƒÔ݈,ƒØ‰Õaö4\vC–eHæ{ @H}_‰hïó×>n¸ì†öXÆzìfÝIó}#"‹ 9b*™Ï/M—Ü×ÖÑ1îͦ$sD^Wxò¹)ÓÚTÁMw·P`–‘Gл•¹ô†|çØf<+{ûÔì–ÃÜtóWSaïéZ?rÏàm±"=xÍV&p*e«ívµø{¶â âë$òÌëO–…”°š¡('—àЪc€r5›yþ.Ýz Šw3›–ÿ\7xZuà0MVæòVÞçÒ(›_ëxÓk²Ò§R´Œr©Õ¼«kz.À<Gá™]±«b’ ŒW¿C›é8—­èd6þÓÈ2›³íÚQaF…Ù“3“f¹ÙvÐÄêhwd·òQ ííC‘]éÅ´_ë[ˆÍ Ô¡…ÆIlÞóÐÑúnTbóÜ.0êçØ<ëS±ÐVX‚›'çòîË“Ø<ï;ZÉÝ®±b[ÈXC½›§+Z×~$Ä®ô!¡Ç6„Ø,c¬ýÈwb3ù.¾{`‰ßˆOÀž“&ÇØ5x뜛a†K)¯w ±ñÕ°Ë-î ²+«ºFù…lö±U“oÄf=àØ[oÄæcÕ®I"ÄF?fì²sy–Ù•Wµc$ç’„Ùxgíè¶Åe6­·-¾¶a6ŸÃô³¬^'³yœ´ƒ¾  í£=´í\&¤ßQ…ö14mV{‡m}§B›+×–$Ðæ±KÙûÙv®C:¶È+…6[‚ÏkkQh㕹†’BUhó¹–M*»A›­Ä œ}Õ'Ðf3;¿Í¶9¥6ŸÃ6×W˜ŒR›“rD4tÞ¨-SÓ Í‹ k)>œÆAem^\<(3×›°Ýâò›|&l7žÄà äQÆ n+à éuÜ„íF¾w9!7˰¸!-‡µ'w[™4Ò…ÜV,HÔ@_;ÑÍ2À¸¥VÚneü›ó Ý™‚zo1å“ÜhZÉÚ%O{ró1LZh­ÜÈ=éÄKKŽòäž¾ªÑàNn<–ÚæÜ|é´ÞÈçBÆ«ÌY¥àÆcæb¹ßÀÍÇ g³Å#+¸ù\­/1WÀ=í¤6Å*7¯iØ ê'²6{­¼”¨n~W†ÒÕ–ì(ä>(äÆs½Ák3r³ñhÅg²öÑî£ín>í"`ªßÀ=펌~º ÛxjÇkcp³>üN™ú÷ñån~9úb« ÂmaÐ.D¹}–yn³ÃÂ>{p›u ß,wae Ö¥(¸êÜì0°/I\ÀM÷4zgë” n>7eÿ|·‚‚›‡ë’ênàÖžVr³¬Ak&ºe©=!wõ ð`ÿÌ7‰»a ÍaèêMäÆò›Ìjh7rWzP d `/q—:iÜ‹Ü4?`´¾B‚Dä^7Ë\’ˆÜÙ<Îì,7™;[pCn‹À^äf–ªn¬¹‹ÜºQ¸Íã%âDYÆ7G :s¯kpó:FÀ ¸q›Qe`*oCˆç6K4v´;·Œ7¶ÒëÛ<»AR/'·Ánó±Êù_îàf°1>e„ø‰•$Ðþ€Ue†B7?¡5,«%æzp3Dy…>”¸™T­E¾µÞÀÍ2Àüè7p[d3¤‰j{‚›¡Ír‹Ÿ˜IcŽ ‘y7SµaHfXPpó´ v‰-]*¸]ÊjØ n.NÈtŒT¹ÀÍW¡ú-Ä~u\cM–ƒ‰!x˯Ŀ–'‘¥OÆ5|Vû#.¥yÅ5¼¿í‰zJ[îû¸¼õaÇO,£÷ºï`w­üòxäƒW§ëOÏG5ø/ÞÃîù¦˜hv{tÏÚí£1 ´Î`£ äœA1 ïÕúæÂ#Ã×’£Âƒ±î{Ú;“dz:ëRówp®Ï[Lƒ4á;Ä4øá o=×íV¡v»Öü|·ÛDÖn—&|[L¤Ù!ßÂq‹iJ‹iÐZ¾-¦á¢r-¡R¢‹ìØŸ5ðF ä)–Ltî• Ûfðúðh4瓹ÿé¦/;Äl)äuÒ•9Â_§¨GC)éÛaÍÜ⯔†µÑ`Q¡RYP,œ…·IŸN¹t9?K”°ùÁ$;¼¾$ àÍ}Ðé×Ir^gñVˆ‹]¨71U­-ɇ 䳆³ö—ìnEäcµ.k†•U;°ý°ó#›éw½’ªæ[àf2ÊV„Jq·"2jWH±¬26À{Yrë–¾‰ÀÑå5˜¡T‰ ÓdIÔ@íõ²€ÞÝøöÃB×:³´¼ÒÜ¥ˆGÐ@iîRD ÈV3ÙÞÝøöâ)PÚ0=¨tQˆÂСÕ)]¢ìòƶõ Ò—B,Ñ eQ.Ì  ‰èÐz¡ú™ª  Ó óX¦ PŸ Û…Y¦ ðô{ÏÖ)ú0•ôm¿ªAb˜Äúu´‰à!AšÂMï 3Àó$é2 Õèc°l*¦LµFÖ(1ÙP£»цÿz©µ$¹+H2@[÷¼l»Ôø^Áþ,[–]Ûºh;r1h O9»RРv¨¿E±”p™}ÍÀõž«ÜFB©mµ²¸K-‚§†ù¹Ö2¬Õ‰àÑyY*éqz…ùÛç•qYbk• Åi§-ŽZÝ­ˆg;Ûúçø ìMó«²Â}eÔ€0›.~:; ý'³yi$Æ>-û€0]ú‘ `]©ÌëûZÜwfÏÄYsY…”ÙCò®+³ TóéPfW0À‹Ã¬BÊlŒÆq´u M™=?VÌYþ„Ù´k•˦̦ݲ-•Êl”…hӨߙÍÄ£Ðá–ƒF˜Í¢h‡Næ Ú…Ô¦™Úø‡‘kuNQh³l`ÿÜh³‚Œµ´Ž+µ‹]ª›JZ£.ÔfÀ„ÁÍ7j¦yèuëæJí£>¡6Û‰½e[š6µ¹ü"ĵYïÔ¶¥Y.šRÛ «í2…)¶ · ïYÏ´ñ;½‰<vƒ6義5sJ+µ$Ryü*µun)¶Y0“%`ló•ÅÎ%ζ5&E±ÍJ¬#¦u¹a[ƒ+•Ûô£q@ó¼q»ðJ‰É<úÛ”®lJÆ·™%Ü:È,ÖÊmÆÇ0AŠ;*¶ùF€†9šnØ>jl³•ÅNü·¶YÖ µ¶Á615£´¶²vÏãi»~TtŸy‘r›‡»IÛÇ?ã‘)à¦ó¼XÔ€ùuÜ|%þh˜Hí75Ð8È%Fyn·¹¿´¼pï±Í¢1éeYyl³,c+Àמ¢6‹Öñ›)ŠíIÎŒ±­äŠíÉKØ@Vd€beÔ…«ØfšÔ6"U;±ÍêÐ-µµù¦èµµ9Œàã%’ µùJÌØ͹©Ô6úÔ0«Ðæ+-¬w‹ÓÚÇ+Ú(›<û¹À¡Ðæ'`ÙïŒE m–¡ý³4í£÷ÚG#Úv{ùIk>@gΞB룅ØGO±õ9%¶˜[ÆYÍWòfÝ5Ø(¶.mK­]¥Àæ+{ÇÂXRª{R52 ãÆk™ŠkíÅ5߈žÝÞaÁµ~¶ÐšßV `µq®ùXÐWLV\³—G¾tZÅõñœàÚÈÇÞó×Ǩ ®Y†1Ür„ášÜñ×…m ¨ Ý™Ü;&ÜmFcCæZÑ6j©¼P~öTÌ¡¦‚6ı„©2WV`´É0HÓaé*hó æåö§Æfƒb¤ðRÚEÐÎ4ìG,Ë\n‚6Ë µ='‚6ç„ó¶R(±™Ç*ô+dS‰MÇsƒ ›WS„ØtéRFÝ2¿[ñBlúÌ¡¨– ­Èfƪ.™M‘MÇ3Ä9ôÌ]Ðæs¹®É"[}ôÊlúܱîÂlýÆl&fÄ7Vº e6?'ý·ùJ˜Íú*ÈÜ–ŽÑx”ú%¯nf³Ñ5su• [£5Ù­¡äfÛ¡?l¡RÉ}´AÈM׸½ÿr3~¢CQ+én!¡¿`dÖ^¤ä>Ë$`€Ië ùnÞD èX‚+ÔQÉ͘ü?qŽt#÷ºù¬\,rKG+¹ùJ`ã3ró?j½'%7Ë ƒ÷X)ä^W‚,ÈyC7½ðxAX¹"ÝtÇ÷K?ì#ŒÄfãÜ À¡ãÜ(Ãt${îäæ½n‹<óFn @Xj”%º~u°$æí¹aúˆå°á_ËEÇÒ'ƒ>«ý _Íõ?ºo{¤žÑÞ¬;~zÜWEåÐ_o€|ðêtýéù`ÿÅ{Øë¯ (yZç䓟 (ÃN+R¤èõ/(Xà½Zß\xdøFrTxpµºïiï)ñ,†¯µÖü¼×ÚFN½ÖÒ„ï,à‡÷» X…ÚíZóóÝnY»]šðmÁ™·—As¤ðX°€TòX°€ÖòH°Þ÷‰?G?,² üéŸ hÔ3¯ëäZæÙŸµ`0y ÄJo¾Ì´<¥´¬-Õwê@;æš0—ڒׇ©cÇ×nײׇ{Šò:’Ûr}_€h—÷©÷;%Ômg,¹^õÌtÇU_×é±HÜNÉ"äÔmeã4eÐsG]ÞÄV¿SD%ÛÙj}ßx8_Z泌Ž.šy…¬·&~'žÔ ¬ÍuÖZ}ç ¬LùŠf%Sk[¿Sù@£®eÚ fö—?A5ƒú¿äöÖÅíÄN‡¦¶nÍûþC>‡Î‚æfÞ”6Äí”𒆥c¯âuÒ¢ñNhÚKNÎÚ§S‚Bý¨„UVßÃe-™ËóleËé$Íë!¾o?´ÏêTÃLYìAœN<¹hõd+ïD,kh|6-¦Gñ9EžÝO;õ^âwâáKèÿëðE§–ùš­LŸ:KÆ´g÷$~'ÎO<µ6uª™×tµ¬«9î°=‰Û‰G0[VjäN5ó5]™Šv¶±ç²„ ÐLá^ž™ž%\€ÇJÛž{‘pzmZÙ¦…^¼6̶ŒEѦk//@XœÛeÛ«Ä Ð S9¬¾êã&SŤíU£€ìâxžx\ðÞ$^€nÙvüoâxÒ.k/@÷˜ýµúºÄ ð–,ÔeÔê]â˜Æ2˶+ƒ*×¾¶9>c9*nØÆsèä:V šbÏ­ˆ¬vÇ6;¥Œë»b›Ï¤[ÀRl£¾Ð0£–ùS±ÍÁÃö¿¡¶ñÏ@|òׇy}ê@7Ãþ˜I ·;ºÑc¯£ž¹Ñ„¹°¢”ÜÌÍäGÿuOT¥üŒ8~zÜ}%tÄè/Ï7ÀðêuýéùøÿÅ»þ—oŠ¿·ßÊÀØgãÀ&Ë`T'öÉ¿ ø÷z}“á‘$ÿ _¯þ‹ê÷ˆ ° Õ•­5?ïʶ±SW¶4á;D¸Ævú=BVÚñZõw¸»Âæ²ö¼´áÛ‚èf ¬Z6×§¢´–ÇÂŽj‰# ‹ XÙÚþ\ÁxGü럞K9À€1EPèm º'}œï(‚öQj¸n¨¥ø(¦*˜—Z0J×(‚Y_ÑñÃ2ë¾¢*S±_iF->Š }ŒÐ™´¸XY×0ç–-HóB Þ“èÃÖy,SFë>Œ€ßú „=ø0ê¦õÊ‚:xMâ;ŒŸðçj 6{‰" ¥wÇ÷|A`nª¸}ù(>Š  ³âÎã:xŽ×‡^ù±ŽO\A¦Ñ6í³ÀØ}AbÖØ5’,ëFà!Î|õ‡}ž™eÅÇP-€f1ÍE1C×@‚JgÏ:þ=cðŽÀ]”>S÷qtvb×ÐÌ| /[Eg­Tӓʺ$pÞôÉ“ìH­s§¶ž%ø@»óv¶b“y–". `Ú±fH@mm5h z¥ì<‡X H€ô6Ã*ëHà‚ &¯qþ‰@‚ÙŠWޙӕ-Ñl] ÞËxòFDG€E\÷yÙÙ‹ÓÇÇ÷5°½k&Èu"jò6DGð¾ébò.Ä·j|<Ö5ŒtÙ‡%æ F€®}9÷&ÖŽSy‚»\†ìI#­#x§ëXÛ,OÌxa{*Ž@¹ÍSƒæ&K£Üæ )%ž9¹Ýû.¶TÍ·"ÜnŒds¤ñܦÉt†—c_¸M/V`~÷açp„ÛŒ1(ãuAˆp{:kÄÉíQIÐ׎ð)·;ï z%¯Pnw¾­ íÎ+ùÊPnÔÆ°ÀwJR…vãx’Rhc»šô>¬È6…vÃF€7n‡ @߉: §;´!é ü?Ûã¬ÐFÚ_· H¡Ý›ö²0oÐæBѼûííiê,9Ú]¡ mfÑâ %i Ìlþ›ÙòV¼îöQfW¨\!;Êlæž`äàšÊìʵݮever»k'ߘ]yhñålRf¯Óe•v7fÛù™×Ja6ŠBy9†”Ù™}>ç±ÍçJªWè­`›e˜m}O0á6(üË'r›eñú ƒÛ,£0´£T<·ÏÇ<·í141ûÛleêËbÛ’4@0‹›7Û–beúˆ'¶­7Ãkwl³ ›|¹–7Ë01ƺºiq›¿ËØ0<·92!¿r° ·ù03öMÂífw<§+QŒpÛnÆÃ c £“Û|'û4¯ÛM„ÛÖ¦SÚæ.Üæ;Q[Z+ÙcÛ’n@WØéóÛ¦ÞÆ|b›e¶¬Ê<µ-Ц¿â…Ú‡Æ,ÔfÙÌÓB^Nj[‡½³8 µmÐó†­Çc›Ïè;<Âc›Ea¹ín¢6yŸ l3…I…ÛÚ%<¶ín÷YÁ¶¥>yÁjÛ׽Š´ù”EM-_«@Û0ÄkË×.¾­j )ˆeß@ò`ü€@›‰µ˜€ßvÜÚfíÈsì¬ðj%¡' ^YÝ# ‘Q±~ÌM¨F^oY˧6‚:¾înP Ãý8ßîܶœTøs‡ô¨¬]Ø;z^n²¶PY›ZR˜Wx‘ŠÛLGCÚ 3Uq›÷bbâÛoܶ¤nåu«ÊÛô½¯=<ßäíÌ ˜–åÿ&ogfÉA;W0†ÊÛVär™Ójõj„þö‘nÁÇoÏGèW_ß¾)¢ÐK´³ð$Íx6¢ S³1Äé/'¢À¯Þ7)Ä ϯ^ù¦jyüËu¯Á“Uªwû¨û{¸·ë-°@ñ=" ü0lj›4Ôµ';©ÉÚùZ÷wèü•lJ;_ñmÁ­Yðg¡kµ>\ µ<\pTóHpAçUt±AÍ_ .(>¸à_ .À"ì–³vô/Íî.Om}B¨nr¡F³ƒ2{wXÖ;бs@‡*²âRý[×$«¡/]6†ìŽ?æ°µ´Ú©Žë6 ‹¥íË34;Ô ±8Çl* ½öÌí"º¸lÄéÏ}PÅHaÅUSÊþ°D£.×Rß·¯B¯@·É» /;k Ókм㰯¤`«°Xò¾µ†x¢4E^]½úÀ}\tã½táµÊc â´ÿ ½MßÈx¶A!fgYž…ž^Çm ô8=ºÒ\./ʼ"ÍÓ}èØíÔBa·~¼5Ñ ©„­8w"ácwú:¢µäŠlF¡W¥+d–b}µÚš¼2]ñ]/e•wÁBê`BÕ´î4n¼qª›é…vö8ä×åËm]±^›û¾j»m°ýJMõšÈ–íZóv¯£ö{*[>óœ®;üP(qƒÊã•îT š5ÛeâF!^{Me¦«ÀØ—u9ʺ¥ó[3¹ðLÑ>‹ÈÂ&¡<´®äÅ(ô±Ì”Y/ %Êx¨éšÉ…—´mGE$ø ñªô¹Óí£ÐkÖ(ÄA6/ûÉn—®‰Ìôÿvùx%„ݸ&ráž=.ó3 ½r]x "Aûm»Ðk×<3-îúê­¯‰lAþ<¹VzœN¿.<RÚXyLºvY´id u[¦økw®‰L:`Ÿð@!“\3™§p^þ¢Ì«ØfÙàÙä5\øÛgÒ¦á̲ÚÇUhçHöD.´PV×!7ÆÙ}€k"Z½ZØGw¼ T—›ø©`„ƒóÅòš;àÒ£EëÔF¼“«¶ˆ<s'=On¼Ï¤g&ÌwRs%=Ã@÷zÝû-¤çåÞ€ÃåËÒ›»æåï?@Ï{Ñ–}úúÁ+2ûu9èzN\lËûž”ôÜx€ò}KÄú‘)ÓòbœO@¯ÙÐwf }åì8@¿BÔ/¯‚ªvÉ£_Îó~RT9WÒÁyŽ@<Uï ‡ÈIpmg zl¡fGŽcÜAÏQ‡²[ÃÆ«€¾1| í¬÷çÏ4w>é9ÏÃ3{x•ñ.þZ⺂è`<æ¥]_Ö™x0þ|Ð3×ÉlF~Âx¦I7r¾CžoÁ‚J×N&§@‚iÇ; îo‰Ó´u¥üyÊØ4Íï )…< ±Ä·ý€<_‹öÇk+È3&ÔÒq¬U%o” ÏPŒ;ä5'ÕyˆVæÆZɈÈóÉÖë¶£”—Œ#ä)°5¼¢nT ä驱‹J¼C¾Ñ仺òyJxvV?|uÄÙWÞ×­œ·µÃ4µ¡Þ9ÏŽÃÜ€8û çyB‹tlwÎCZƒ¸ò’Ö”ó ·m/7íÁùB«Ö°DTwÎ3Ëg™ôóôîF|…»@ßì.ë:.™D8ÏÝ *º£íär庼sž¡1¹ôt9Ì3ÁFÍm¦K÷0Ìó×\sÙ—n+ê©_ÎîöFzBè¼î²PÔ³°½CJõ,,;(í†zòò¯í@¶ÂµÛóºÞѶ××’KÁåh<‹Þ—;â{?ÞÖËÄð4@0·"‹Â|)ß)®<‰‚²h99÷%†)®‹·t9¥C7y(ñ J6„1êèàaׂ›õ!¥÷ÝŽ|ŒÈâQ<+rùÏWfw·#SÕ¡³ Ã[||Ê.A£¥±»Î °l%h ­¯¦X”ºf2•h¦†àX}ùà’xå«Ç,õ&ϤõÜJЈ®ÍVÖÑÏÃZýû„³æ ¼³üÿÍK¶ã8’D罘<ü¸ŒÚEOjRÕû?sJ‚]W*«_dR'gÂã¼4¸gXõ·jK;ƥχZUžQ¿½f|Œ[ÚQmÛÅ÷«Í 4êöºkòB‰6[Ú1ö¹´g•Õ­YFµ-W ü«ÍîÜU p>ífK;FÛ¸®Ç2[· ùöw[Ú1nÕxne_·n%Õ¶D1‰h:çÊŽ±U½²¼ÑdÛQ~m(í¦*—ϵHãÊÇf»Î>Þ›ó¹©öYçáöÅ*4ªmû|„µv‘àÕ_‡¶/Õ¾ZFµ }ýLÓ슺ÎþZd{Î0ÖPâ˜ý—°«ìÁs-Ò8Þ¾<§œï›UhT[Ÿëmï›UhÔ%”v-'©6M_y®EÊþºïV!.!–ÿˆ·W•‰Ù_µvù¹ ¸µç_‰ÝköÇ«¢ÿÄøf/öP3³W­^•JÛ³•b™ÌŽu>µJîQ!³õŸm<•+`Ofwï ‰ÙÅ¿„vÃm&µ[ÚΰÝ6¿ÝävÛý“Û¸wl·ÝË×ÛmÛ*m1o4±ÝäÙûq9¨ˆí¶yw&¶ÛöFë†{OZ«èèxž¨>i}í Ö=0¸ƒÒéj@iáÒ¾]¦e¿œ}‰˜V›#Ü1Ý6<[`Z·-RQ‘§$¦ÓvÀtº¥À´Ú戀˜OÞt 1n0ÍÎä˜N·˜¾žè¬+TMç˾A:ófÎì– 3ûé¬]N}E:k»ùþ“Î줳JFÏé¬}NÅ@:kŸS½Îi;à9oçxNçYÞþüNË]ì#Ÿ -ÛÅë«K@W뫵ÆÚFa(q>ÇáÆÕ-×tnð9®`êNdzšœŽçå´÷ñÌ—7U·sNç›éœŽ¶9Bq^çæ¼V›+açu>•àulðê"Àu4ê×qÉ’¹×”fàZm®ì×ñÐì 踎N¢ [/":®»²{öYq\«ÍžšÓZMÞGœÖjóK0\ÇÑl\à¸m××O txÐ@é$©œÒ°9œã®ÓÎÏ?7&ë)ã? Ó€ÉomÆdµ¹èw&«ÍÆæ`²Ú>(f5 ìÙ 9Ú ×åús8Ýi­p${93¡ É­C‚ɪÞnc@0yôÀdU™?ögE(j抔šY5“l0Í<>Þ]¡™ë–¥rÅמR¹RA*WŠWhe-5¨Ê1¥ÄÚL*«BÆìW”Êéš •c"ÊÄ´+åJ‰ É\!Ï ˜G“£Š9íÒ¹B¨Q:×Wé\ìó’Κ%36x,@é|y2_ÁHçJZA:WÈ0JgÙ'ÆñÖk!6Jçºà“é\ÿ@:ëfY7…t® :¤óhƒ@6í\‰~hgM2}ÍÃQDóÙPD«€…Å#\DÃYp“"z´9@!¢G›ëyˆhmg±ˆèr:c(¢µìÚ¸eçµ\&Etå×"Z·ì“ˆ––4Ñ]WWÖîtÀÿØ!×Ê•&ªÇÓ¡ ]Ù@µÞéø½£ß’:Ÿþˆymw§—  HÊ]†Ÿ¾`NP ¢çR [k¿|ápÁ×MO?ÝîŽð+~zò(Èn»µë¢ÏäQ@ÈÄ…váB›Æ í K+…v9}üI¡]ð‰ƒÐ®ÃPh—3q¡¡M[…v]Þôõèšö()° C,®° t vaè »`O…ËüÎh6-TØénÔdM€2w…öylÙ©ñck¸#,Í æ#9£]É8µÞiMø½£ß‘—y^Ç?íÚn9N?æ² ×…á§/8ä­£5ÁùÂ à‚¯›ž~ºÝšàWüxìõÏZTÍ`|ÛZEï[­ E‹€©”î®?_´&}OÇg·Œü= M·ô ¾˜n†]Ï1{å½$LIrù IòxrH’ã îw&øÃýŠ3AL7GþÂMnŒ›Ž3ø œ »ÖŽë{ Dîr&ðw9p”[œ cxµÿ¦%‘Ç_ÿØ™ ÅÑÝ™ð¯_àëÿÏ™Kö>—òÓܨ—˜o膜b”ªöúªÂ(¯Å>T`Ö< «;y9/LÊW-Ü×â“íl>°8ö”óša«C£ÝGÕºH·LoðQóÚ=ª«:ò¯P›&ßZ°HE‚-ë…웪 /žúµÀQû\{2âàv²^›½z‡fÀÏTf0¨ rñ\‚ îŽHœÌd‚…ކ¤—g'ŽÖçÊ“iFëÑ‘óÂ,Ü£#çµúäè}\T›¬Ž9¯ÝG3ã›KO¦¼Óq>§ÜÎØYÓP÷Ù]Ó%·¹/„6š†ºÏW÷ikN}Í›ßÖ”ú²çÙ4Ô}õVÎim[J}Ù›Ó4Ô}vWÌ©m[J|YÒ¥iÄ;{+.`O‰/‹šG뉯ݺ1ò^~+ çÝÆOHwy@3B³‹"†Òªç»`;’<·|×â#nÍ#²|Â$W} ë£Ö/oŒõQKWÇËã}ÔooC¾ ¡¥…‘ï²ðÖõ…]¾áN ©ws±dRïäH½#zJRò¤^1 ¤î+‚, u_¦vTËÔ`Yz¢úDfœ¨îý3ªG›E·‰êÞ‚ uÇD\’ºó4ê^üÉÕj3˜9ª£(ð+/OTÃͰ1QÝ=zJRwôf’º#IRwÌJ¤>ÑUHjAž¤F€+¡;K¨>]!$TÃ{”X"‰Õ© ¬F'I¬Æ…‘Õ˜À›`ÍÓ$¬»w„Dë´hÝ<ËŸp¤DÂuÃ€ÜÆ<ØnŸëJn|MÜö1DÂ6ê%l7/B‘°‚X Û‡çG¶S°} ?ô·?¿Ñ¦4¶Y(3¹+2$wAÑ’{ÇÇ“ä.0‹ÜZå|Þ’{G…({+žQ¹û¶ùçäV›oWáW€ äV›+bG·Ú¦¹û† ¸ wß8*9`XÀ. ÜÚÊ¿ vœ@îÜÖaWØñ|Ýjƒ7të\Œ¶@·Úüóé莛i߬ó²+`\tóv€Üj2ùêàÖ©;iÜ:šCÖÁ­6·Ú,M p÷ Pwºûwêv·.Á®À¹­£YÊ ÜŽ+˜ÝÜOÈiéØŽ3™ï°½uö`;ž.ŒÛj2Ÿ¸­3)ÀW7/ܹ­Í¬3¨£À'àØV› )pÈåØŽ¶9œÚ†÷nb{µHÂv;š Û!mÄîØN_c`;ÊzÙø¡úÆay;ÄF̲§à®¨ºBÁ]!R(¸kÃP‚»¢Œ#w=<‰JÅ]wÙPq6ÿ¼@qWÌxâN»„â®P7”Ü•!©¹ëáA(H‘”ÜZ8Ú¾<ÜÞBJîŠÒqÉ­¥PM™Arsg”Ü#J…”Ü£ÍGÆÜ¹Í%7/š’»”ã&¹ëN¨£¦D%w:Hî´OHîŠï-%wE±FJîÚˆ|—Üu£xvÉMÿ47jŽPr­>†J* ÆQsWxþ©¹ëîï157‹•PsW8®¨¹ÓyöëÏí=ü±qAÝçQï\aú+yÿ¼²uj¿Óºð»Ç¿#só¼úÕÝq s`!™ðÓ¼çµ>¨™ü—oœ€_ðu×ÓO·›üŠg€_~żPå±Ò¢”ã µÝk^lÚE²:DýO|Ú¼P5-¸ËÐÀO÷ÂdƤÓ-ë"<‰t;3üŠê7ü =\óH¥ãÈ_H¥Ç³C*gp¿Áïø ÃÀGL·‡þÆbÑ“qßq …aü¤µÕz­½ŸwyÒAî21ð0·¸T»A«ë–XÌì] Í] ÿ÷¯ûê+È;ÃKÅ÷´,aÝ T5*ÃöbKIÊ©`#ºîes1̼L×Ðjš¼Îi¯¶Šäè ®Žû±˜‰×®/g¦#©~4š,´×Ûâ&†âïj×j—>!Ò/\ „&†Þ71 V×{qf+t-u‰ñ4ôs‹ÁYý,îbÀܨ®u.=ElƒÈsYÜÆ€™9cHå6Le:—æ6Ÿu*“é3"m¸$ëШ˜}|®íQpÁnÇ©•-gn óS [ÎÜRÑçÖ<7†ª‚ç¾xnŒ×¬•>gh “àN-j9sc‹Gϲxn ³ŸÎR,7† ý©u-gÁÞºxn =á¬Å+.œ–:µ¨¥»n,²4F¿ž[<´qÅSc¼c^¸C‡³Ññ)¯Â,¸ þÙŠ\€s_ªÝ . hâ)q: .`ÞÄ©å,_VEC¦weü˜ .Ìwõ</¸ÀíÆËcR[cÁ;Ïë›ËŠw™¸Í€Á]|‘r›3ŽÈíêï0±][·£˜ôÄÀ-»ƒ…QÜQ«ÇÎÒÁ­énööÜ'JÜ'÷‰˜ Á­5‚ç[NpÑšà>Ý+FnŸdºs»Ã;BnwÔò$·ûö™Û}{Ãu‡M‰¸îp>‘×¹ÍyÝ …¯ -ò…A¯I òš8 ¯Q+%ñS¹¯½úGÂ5É\£ž qݰfSÂ5æ¢×"ߌ&^£BRâ52‰×ð‡’×PþÄõh2i\7,;E\7è“Ö´$Ö 9ÒšÒº!I@Z·ú™Ö Y#$¨Â½ ¶×C~6zm"6j!Øk>%dïÞÿˆìÈ¿bðDv¬ØþJ¦Ù¥à4ì±™½ @v>G¶VŸð}:²u<›dÇiNJÙŠ6ø¹8²£¬}vœÙQvÂ>ÆìhšJ ÌÖ.Í•fëTŒõ`vxgbðV×±}Þr›¸>vx‡ÅÄ¥Ã[m®Þy‡wT ™n,À[ñ Ç¢Ã;ÝÀ»ïý£ØŽã9ÞIûWÉé­wÉ?ŒFo½yùÊC§·¶3b:¼ãLæ x«moÁîÌwìÖøÈÊÙgiÌwx«íƒÖÖ.} äðÖfÐÌF︗öuzÇáæ(ÃáWðÞ±Kë¯ïÀàü&;»/B¾Dà#†‰Þ7ÈîÕ–Êì^ R”6%Ì»Ãj„IøÁfœDËUYpÂã$,éÀ8ɱRлܮ<ÈíŠ ‡”Û2;|Š“È˜`1 Èm™¦c™r[Vùô©·ëxŽëíÊK‡Þ®ðjSo3O½]ÿ¢ðæ>)¼eú0•á]}ØFÝ]ñå}ènfÙ©»+–Y¤îͦvƒìfTw…G›ª»î TwÅG•²»bENÈn-2ª”Ý\H„²{´é…ë[,wBÙ]aö§ì®øˆSvË–`ÈîJõ ÙÍêÔÝ\:º»b‰8ên–÷ ð®pÅQxÓ{DáMÏ…7=ÞºûHBxóò.á]a±û±¡a]êËQ §ôp4\@.sC½×ÒðûgpO.§¾L óo9ÒØUv5ð·/¸ ¯ þ¦µÏ“ðß¾rÛ›·!ÿv»¹Wý< þö+‡6®d|ŠÆpã^ƒCS¹–!™úvl?Y£âOÚª2ãË0DÕþ÷488A&­néHO@ÝO\S%ÊsE‰™Òí<öWòí5ûx_0:øC^5mlÈ„c¹õÖ_#uÞzû·þ%áÖãþ¯Ãq슷—ñÅ)õ.¯C:È]^æ¯CÓB„û6„æzþ7¯Cq§Ã¿os:Œ·±E Å£j2ÄP·zC‡²?†Ò®{»¢ˆ–È>zß1…f4úð‹qm½ÄÊŽcP{Mú2óh,Q@q?à<}euˆué6„?*mŠIŽ+é1½cêÑèƒøŽqÖºœ>Šïð¾Æ…¯÷¨c>ÐhlQIñâ[Ç´ªu]| ßHp<`ëh»Õ˜f-WôÃæ;ååž,d0Ú`z@ßÑØ¢žâ1.)MÙ·iùíqÓ¯|ŒÅæF#l˜F!¸ï1Õ!ÇnW©ŽíÊvø hcfUÜËÚ# á]oݯ\Bãò–(«=ør“¿Ò›£qìïÙ•Õè+”®é4„ Öµ"Ÿ†I £qìöÙ•Õh G¯3?{rød&e×qmžR+>øžSC4}´éÁ={rÂã$UK{mH«íR-||vv©ÃWO–+ȬaÄhDjÍ‹“Œ®8ö:;2Âq£Ñ“kp÷‡Ä·ìÒà±ä£#cH>ǘ|öd,X?‘aã |[‘bƒ‘j4ŽÿÏžŒ‰£±ÅzžÌ›÷œ’e)Ÿ»,™ð6Óð õ %ÔW¤ ê+êÝ$ÔLÜN¨ßÓ1úli¨×7±oG½"Î~¿zóœ¬@½–N²„Q¯€´EÀêOX\êOÞ=¢žå)êOÞ þL/,P¦×¨?Wn ÖŸ(ñXß‘I¬æ·K¬ï'ßu°~~¾o¼w }ßÞ ßW>X'|‡4¾ÃÌ—ß‘ÏL„ktÂw¤Úá;{> Ïb`™ðH»½rTSOÝè¢ÈŒG1ŸÌxxn2ã‘éÎŒç]MŒç‹OÆÔÃIŒ/ã güÞÿñ{Þп^€ñ;E'ÏÌ<+rÌÄ6¯FK ñjÉœñ}'VVº*\\®pU`¢8 ¯FÃðQ¨ðUÀdDÀ«Ñ_J>ªAùëì€W#$â~y+`þ àÃ1³<$¼?^3gNÀç®À‡©Ä‡9NxuGêuwXЄÂk¯` > õúµ%“¾þ 6‹t>Nx”ôʈO«ÕñaØÉY">\>rÖkKåÎzµM#(QŸl&D}˜Iì#ÔçsêãÖúä–!꓉‚¨W£KÖ+Ú‡ßnt]$Ö¯Bxc=K¢‘õaÚ5”õ1›Äô`}ìõiµ&²>­÷ÈÐÍ”n ÝT÷¥ÈM…«3En*¬ )r#‚´ çS#å|%°¡æY- ©ùZWWóompap„E5_ £(Póu'b¡æëÎ` Ô|e¡š§‹ ©yÎãªy!Hb>·¹˜—IÀ¯bžé÷$æ™Ob¾& 5_½QóòrLÇ6Å<×ËHb¾&ñÜ’-ñB1_“І˜çúIÍc‘”$æ¹(Kó\î$©úº2”檞˞$U_½dQõÕnIÓWTßJš¾®ŸA¯F|? éëÊ/4}]yÌmÍôÿøŸÿŽÀ endstream endobj 3 0 obj 35142 endobj 4 0 obj [2 0 R] endobj 5 0 obj << /Resources 6 0 R /Type /Page /MediaBox [0 0 1080 792] /CropBox [0 0 1080 792] /BleedBox [0 0 1080 792] /TrimBox [0 0 1080 792] /Parent 7 0 R /Contents 4 0 R >> endobj 8 0 obj << /Type /Font /Subtype /Type1 /BaseFont /Helvetica /Encoding /WinAnsiEncoding >> endobj 7 0 obj << /Type /Pages /Count 1 /Kids [5 0 R ] >> endobj 9 0 obj << /Type /Catalog /Pages 7 0 R /Lang (x-unknown) >> endobj 6 0 obj << /Font << /F1 8 0 R >> /ProcSet [/PDF /ImageB /ImageC /Text] >> endobj xref 0 10 0000000000 65535 f 0000000015 00000 n 0000000145 00000 n 0000035361 00000 n 0000035382 00000 n 0000035405 00000 n 0000035835 00000 n 0000035704 00000 n 0000035599 00000 n 0000035762 00000 n trailer << /Root 9 0 R /Info 1 0 R /ID [<56BB4CA6AC6EDBC523D4E54DEED618B2> <56BB4CA6AC6EDBC523D4E54DEED618B2>] /Size 10 >> startxref 35916 %%EOF blis-0.6.1/docs/graphs/large/l3_perf_has_nt1.png000066400000000000000000002420061360743507500214500ustar00rootroot00000000000000‰PNG  IHDR¬öEYa )iCCPiccxÚ•‘gP”‡†Ï÷}Û m—¥ÃÒ›T) HYz•^E–ÞY–"bCÄDiŠ AF¥H¬ˆb!((`A³HPb0Ѝ Ü¹3qîüÈóë™wÞ9çÌŠ*’*àû¹Ø³CBÃØð ‘¼Ìt®'ü#Æx ÿJtL&V Ÿ—Î ¹ •#H Ç€•”.@Γ€ÜfÜ_>̨¿|˜ü?@¢Å}ãQßø÷¨pù‚„ؘ\¶Z¬ '’ÃÎôs±g»98°}øi± É1ßü¯Êÿ€ &Wà–¾…Ÿ/`ÿßPcC##øûï|„5ø¿ÿ€oziœEìÀßYT5@÷é'gjÇD ºîñ²øÙe8‡ÏÁá+ñÍøNü ü(~ÿ@ °šs‚+!”HØJ(%%t®† S„E"‘(CÔ%Z½‰‘D±ˆXMK)Hq¥b¤öIµKH-IËIÛIÇHKwHJ–aË8É$É”é–y&‹“Õ‘õ•Í‘=&{Cv^Ž)g%Ç“+–;+÷D•ב÷“ß*B~P~QAQÁE!]¡ZáºÂ¼"KÑN1Q±Bñ²âœCÉF)A©BéŠÒ+¶$›ËNfW±ûÙ ÊòÊ®ÊYÊ ÊCÊË*š****ÏT)ªÕXÕ Õ>Õ5%5/µ|µ6µ'êduŽz¼úõõ% M`½Ý³šÒšnšyšmšZt-[­ ­F­‡ÚmŽv’öQíû:¨Ž©N¼N­Î=]T×L7A÷¨îðü‹5©k׌ëÑô¸zÙzmz“ú,}Oýýný7ja  ¾š&6>5’0r7*0ê5úÓXǘg\küp-}­óÚk{Ö¾5Ñ5‰19fòÈ”aêeº×´Ïô‹™¹߬ÝlÎ\Í<¼Î|œÃäøpJ9·,ðö;,.Z|²4³XžµüÃJÏ*ɪÕjv溘uM리U¬#­¬…6l››ã6B[eÛHÛFÛvªvÑvÍv3\mn"÷4÷½¡=ß¾Ó~ÉÁÒa›ÃUGÌÑűØqÈIÂ)ЩÆé¹³Šsœs›ó‚‹©ËV—«®xW׃®ãn n<··ws÷mîý4ž:ž|Ï^/ÔËÝë×Äzõõ©ë»½ÁÛÍû÷3MŸ ŸŸ} ¾>¾µ¾/ýŒüòýüþ›ý[ý?Ø”< Ô Ì ì  j Z v .†„l ¹*šÚF k[Üà´áð†épÓð¢ð±šs7ÞÞ$»)yӥ͢›#7Ÿ‹ÀGG´F¬DzG6F.F¹EÕE-ðxGx¯£í¢+¢çb¬cÊcfb­cËcgã¬ãÅÍÅÛÆWÆÏ'8$Ô$¼MtM¬O\JòN:™´šœÜ‘BJ‰H¹*‘š”ÚŸ¦˜–›6œ®›^”.̰Ì8œ±À÷à7g"™3{LAº`0K+kOÖd¶MvmöÇœ œs¹â¹©¹ƒ[t¶ìÛ2“çœ÷ÃVÜVÞÖ¾|åü]ù“Û¸Û¶#Û£¶÷íPÝQ¸cz§ËÎS»(»’výR`XP^ð~wðîÞB…Â…S{\ö´‰ñ‹Æ÷Zí­ÿ÷]ÂwCûÖî«Þ÷µ8ºøN‰aIeÉJ)¯ôÎ÷FßW}¿º?vÿP™YÙ±„©ÆÚuÈëPW»¢¸âýá͇oWšTÖ¡É:"¬ò¬ê©V«>P½R_3Zk_ÛQ'_·¯néhôÑ‘cvÇÚëêKê?O8þ¨Á¥¡«Q£±òáDö‰—MAM?p~hi–m.iþr2õ¤ð”ß©þó––VùÖ²6´-«mîtøéû?:þØÓ®×ÞÐÁê(9g²Î¼ú)â§±³gûÎqεŸW?_×Éè,îBº¶t-tÇw {B{†/¸_èëµêíüYÿç“•/Ö^’¼Tv™r¹ðòꕼ+‹WÓ¯Î_‹»6Õ·¹ïéõëû}û‡nxܸuÓùæõîÀ•[Ö·.Þ¶¼}áçN÷]³»]ƒ¦ƒ¿˜þÒ9d6ÔuÏü^Ï}‹û½Ãë†/ØŽ\{àøàæC·‡wG׎=>Š~4û8ùñÛ'ÙO–ŸîœÀO?{Vù\þyã¯Ú¿vÍ„—&'_ø¿x:Å›zý[æo+Ó…/é/+g”fZfg/Î9ÏݵáÕôëô×ËóE¿‹ÿ^÷FëÍù?ìþ\Y˜~Ë»úgé;™w'ß›¼ï[ôY|þ!åÃòRñG™§>q> |þ<³œ³B\©ú¢ý¥÷«Ç׉ՔÕÕÿB,¾;E^ cHRMz&€„ú€èu0ê`:˜pœºQ<bKGDÿÿÿ ½§“ oFFss0ˆÚÿR pHYsNNÆÊ/¥tIMEã$9·-ç vpAg’Zó!%Õ€IDATxÚìý{œ,w]ç¿N8rB&©$B’9\¬¯‰ ›Πˆ"V ‚8_zâw=kÎ*vËüÔÕ¥{9«ëeNãþ~bæ»gò²s֣”žM¢a•. ÙeS€ K‡À)9B §’>¹aùýñéwUuõ½§/Õ5¯çãÑ™®ª®®O÷»>ý~Þ·»»»» „B!„BÆ%“¾B!„B!¤4X !„B!„$¬„B!„B VB!dÊñ}ŽãLú2!„¡Cƒ•B™r\×E&“™ôe}ŽeYð^þò—çt]ßýÝßÝ÷±„ôC'o·Ï²,ø¾]׃óض~ô£0 £oy¥|“I0*Ù7M¥R O?ý4àþà ?é¾ïã—ù—›~Ù+döÏÿüÏñôÓOã²Ë.ƒ®ëMrjYçl25ìEÖu]ï8÷S¶ûƒÖàû>æææ‚ü#Û¶B»ŠÅ"J¥R°oyy¶mûK¥–——ƒs-,,À²¬¦ý¾ïÃ÷},//#“ÉÀuÝ`±XìûXBz¥“ŒwÛ—·èó~å•òMÆÍ(e?ŸÏ·\å÷}™L¾ï7,l²Wºé+qârÊ9›L {•õn¯§l÷É.™8åry7úU\¸pa×4ÍÝsçÎíž;wnÀî¹sç‚ýº®ï𦹻»»ìßÙÙ öollìjš<°»±±<7 c7—ËÏ …Bp¾~Ž%¤W:Éx?ò¿³³³ `÷Â… »»»ýË+囌›QÊ~¡PhzŸ .4É5!â“Ìîîî¹[.—ƒýq9åœM¦…½Êz·×S¶ûƒÖ !_ù|>é-—ËÐu¶mÃ4͆°°l6üï8Nvà8NÃsY¥‰¾hšMÓº^O/ÇÒ d¼Û>Ã0‚ˆÛ¶‘Ëåd²_y¥|“q2JÙ7M³éýd…>ú;AȰè$³íˆË)çl2 ìUÖ{y=e»wh°&]×Q.—ËËË8pàòù<’Ð ;(•J VŠ !“¢“ŒwÚ¨š¨ÒNÙ&Óĸeß0  …†ó2,ºÉ,!ia¯²Î{e¸Ð`MRHicc.\ÀÎÎlÛ†eYÐ4­£ÑjF°j#­­- æ.‘ÄÐIÆ;í”Òîº.J¥|ß§çˆLã–ý  `þ:Ýd–´°WYç½2\h°&)¤$†©®ëAX€(,’´íû~ƒ°›¦Ù´M¿ I d¼Ó>yžÍfQ*•Ëå&=Búb²¯iÖÖÖP*•Øv •n2 t #dØ«¬÷òzÒ;4X€ä%ÍÍÍ!“É`nn.PTt]ÇÆÆ2™ 2™ ÂÂD1)‹XXX*ommMzX„t’ñNûY˜¡w•L“’ýl6‹l6ËÅK2TºÉ¬¦i(‹ô"‘©g¯²ÞËüNzçÀîîîî¤/‚(Ä‹ªiZÎ+Å“t]‡çyÐu=%‹¥Ñ"K&LHÒh%ã½ì³, ¥R çΛôÊ>Iíd6ª³t*NCÈ´°WYï4¿“Þ¡Ášp<ÏÃÜÜvvv`<ÏC&“A¡P`x$I=J³°°€\.äæ’v(û„Bˆ‚!Á '|àÀ,,,¡d„¤×uqõÕWC×uÊ<ÙWPö !„=¬„B!„B =¬„B!„BÉÁQž|{{‹‹‹MÛ+• fff0;;ÛòuÝö·âCú~ÿ÷ÏþóGü‘%ƒ'žxO<ñÇ»‡óÍÌÌ`}}}às´“o`42~ìØ1:th(ãŸxà¼ð…/œôeLíxxàüÙŸýÙÀ¯·|sO7£˜Ã_ô¢áWõWz=çïÑÂù{ïç{ßûÞ‡ùùùÏA|tpþÞûùúž¿wGÄùóçwo¾ùæ¦m?ò#?²{óÍ7ïÞ|óÍ»ïyÏ{úÚ߉}ìc»·ÝvÛ¨†“88ÞÉž¯•|ËöQÉø»Þõ®¡àx'w¾IÈ7ç´t“¤9œó÷èáx'{>êࣅãÿù†\­Vá8NËŠ†'NœÀìì,\×ÅÝwßJ¥Òàáê¶Ÿ„>|GŽ™ôeì»ñv’o€2>L¢ý†÷I/å{|$eNÛOã¥|$Ìgûq¼ÔÁÇCæ³ý6Þ¡¬ÛÛÛ8sæLÓöjµŠíím;v 033ƒ¥¥%”Ëåžö“FfggÛ†3¥‘¤Œ·|”ña³´´4éKØwã¥|¤Ìiûi¼”ïñ‘„ùl?Ž—:øxHÂ|¶ßÆ;tƒuii 'OžÄêêjÃöóçÏ@C<þüü<ªÕjOû Iíä Œ“é‡òMÒ å›¤êà$­Œ­Jp'¡¯Õj]÷wãk_û\×eøéÊöö6n¿ývÜwß}C=ï¨eüá‡Æúú:¶··Ç÷a‘©d}}<ðÀPÏÉ9œ$…3gÎàöÛoǃ>8´srþ&IAæïa‹œ¿IR¼ßù{lk'¿xñb×ýÝ8t访þú‰ÇX“äsøðaÜxã¸æšk†zÞQËøå—_Ž#GŽàðáÃãû°ÈTräÈÌÌÌ õœœÃIR˜ŸŸÇ7Þˆ+®¸bhçäüM’‚ÌßÓ$ßœ¿I¯ˆÞ¯|´­M”N¥¹ggg»îïÆ¡C‡pà 7L<Æš$ŸÙÙYÜxã=­öèeüòË/§|“žX\\Ä•W^9Ôsr'Ia~~µZm¨s8ço’dþæ¢#ço’ÕÁÇæa•UÅhØAµZ n„nû I:”q’f(ß$ÍP¾Iš¡|“igl«T˜ŠV/s™L¦§ý„$Ê8I3”o’f(ß$ÍP¾É´3¶`X]]ÅÊÊ ¶··Q«Õ033ƒ[o½µçý„ ‚ë†1ž÷¢Œ“4Cù&i†òMÒ å›L3#3XáºnöùùyÜqǨT*Á1ýìO+¾¯Œ*]WV,/«ã4M£iÊ‹öªv]Àq€\Ní·,µ¿Ý9“€ã„c‘ÏAÆä8ê¯<—ñù>àyj\¦Ùø™8Žz˜¦zضúlÛ†a0"Ú°çyð<fkÔó<ض ×u‘Ë厗s†­®!9Ž×u¡ë:4Mƒišp]~]C×uzýƒs]®ëÇi],¯þ+«Ç>øóçÏOúc& )R<,þõË}¨ëa~äaah—,š¶R°EAÃWî_QÚº¥­Dó™äöYXP狆»e³áïb«²^ù¸A,¯ÙÞþ:"-#É„‰×݈Öꢇår(«[[íâÆ^«È—¸7¨ƒP¼™Q¸à¸‰ÊœÔŠ…Aöï‡Å÷iëø~(àR ɶÃð)ØÒ€@àןG÷ù<Ùú£éÍ#ÇÅ‹êrõ×gꯗ‰Ü`×_gÖ÷ °2ïy4MëªD[–…R©MÓ°¶¶(ôžçÁ²¬@A7 #Pü=Ï ¶ëºÞPáN”QæEùƒ#®ØË1žçÁu]†B¡MÓà8ÞûÞ÷⦛n¶h¤×U«á²2¾>úÅc=†7ÿÔ›»kY<Ï ¾»èvÈf³Ð4 žç¡T*Á¶íŠcš&lÛ†ã8Èf³ †dTþòù|`\Ê~@ºbІl6¼¯išpŽãÀ4M”J¥9•sù¾ÜQ#U OÃ0‚í–eÁ÷ý@Îå:åx¹f¹¦l6|W^yå¤Å†´@ xIQ 3Œæ@Å+[G¸á'…8€Æ*Ñb¸ ñvŠbFo1*ÚMïâme|FCÞz)üA¦Y䯩‹²p"ôš&1 œ$x^XcemM=ÏdTØ-0’ Ö>Å=— WŽ£ÉÌðë·€òoC‡”ahB˜%(£²~,¬úÿPÇ}þÅŸÇþîx³ófYC½&|ñ_Äf>€·Ì¿7ýíMxúé§q÷§îÆß÷ǸþúëqË-·àfãf€ã9p²LÍ„ °€§þÛS¸sþN<|èa¼ê^…WüÒ+ðÔõOá ïü®}ãµÐ=0”ßÁC¿ò¾çûñ…—|Ÿ}ëgñèÕâ3§>ƒ#µ#8uÝ)Ê£óÜŸ‹~ù£ÐuÏ|î\úm—Â÷}½î(>sýgpÉý—àöŸ¸™—fð¼gž‡WÝÿ*¼åµoÁ]¸ ®ë¢T*Á0 ¼÷Ú÷âÅÞ‹ñé|‡#SÓ4ø¾(èbx†(îrž¥¯-á±kß<ú'rÿÖ—¿?ýòŸÆ|ô°°°Ã0àû>~ôG»»»“§©Àq”Ì‹²ÜJ®5(Y—°?(‹0-¯xò¸çïÁ÷~è{ (•J°, š¦ß»†a`aaÙlº®£T*![׌2™L`° lÔ>¢ÞKÁu]8ŽÃ0‚ãÂ1:M^UÏóc±Ùl–eÁ¶md³Ù¦sç;pãZÓ4d³Ù¦ëu'ø Ëå2þÓúOC¿Ò?) |<41šûÔùiE¼WÀz)/æQ*…ØÐ³Û-$ŽÆêt/”É4z¥da%*c㪻AȠ躚Ϣ‹uŒ„œh°öe…¹zqA&òÒó€~À:JiGýÿ¨Ò{^,a[6ÖÖÖP´Š0Š~ò–ŸÄÖÂ6>°l6‹_9ý+ç1›Íâ·þûo©°ÄbÆ9#P¼ ÀíÚÈÏåOŽñR她ǽ·¿úv€{ÆÎÏ{æy€ |ñƒ_Ä«Þð*˜«&®Ï^GöQÜò»·.}õ¥¸ü{.ÇjaPý*®}ìZ7ýíM€ ¼Ö|m8pÊß¾Ãÿ|‡ùÊè/pÖˆžå¬|æÊ–Íf•äªÏºd”Âs:êÞ³ó¼G¾påðÒµ—bû«Û8Ë2Ám‘R8bg§Ç•p@@xtùQ¬×0ûÐ,þÅ=ÿ¿sõïàÑ…Gaœ3 AÃg^ÿ|å…_ÁG¾å#ЮÑð‰?ù®üçWâ3w~üö#(ì„^Uñdº®‹r¹Üà=/c”VFf§0ßVûô>–þs¹r#Œ2M³k˜r/ÇÑ#ÓhŽiT4â_Q/‹@“È™Cza!ô¼z‹çí¤‡»ÈæÖ!Z!‹éÑô ÷ÅNYpÂh!yýg?ûÙI/HHIeÚ5eöB¯Q•í^ë8NàlÊFVµî½÷^<òÈ#}k‹jÕ1šÑ íQ ð—½¦m°m–eÁ©W Ð4 ¹\;;;¥X,âæï¿¹\ç:,™¦‰8Žƒµµµ®‚ÕR;ÿ›óÀo¶Þ7ûç³6 ¼´>”1.è€Cm{[d{©~\Êp¾Æ©o_ƒ2Pu;PFé2Âpf½þ¾q[ÁPFcس¼ÔziÏc߯H¸—(ÙMv›õý >g©š(!çeÀõ],›ËÈår˜7çqÿÑû±à/4LXoüäñ_û^üµã;Ÿý@}îºé¦›€Öß'ˆbP¡åê¡d£ßdXJä‚,n´w‰vˆ‡ÞK%ÃÔH¤õ@«iTZH ³iGŠÌHØq;//™n|_é8Òß9þ='ÍXõ}?H-Êvqí:±Ê^’"Æ¢(Òº®é"ñèAÒ ÃhH)ÖRJz†(øbÐÊùâ†m4-D¢…äØK/½tÒóÔ"•Äeá%^wc? u:€ÆÅ×uaÛvͦ>//[¹|ßG±X îIm’ÈÆV úÑ{-šögÊ™V,‘ÍfaÛ6¾õ[¿7ß|s_c¢ÁÚÝŒUÈýš”] ç“ü7É7+ (wˆÓZ[[ •ôH„w¥ŸIAƒ2âÆ*ÐlhÈo’ e¸:hmˆtºaNïW'ýA%é™j]S#ò ¨¾¿Š/|ó x¦ð \Û…eYØÚÚêºÂüÃïýáæ²!¸Pß·„ØëPß½è”w߀ú~µÈ~ñøŠ¬˜‘ÿ£2f!4¼}„ùàQ¯}¡þZ«þÏ$:7˜h-Ÿ^} ^l»†pa¦$‡]Gc>¼†æ{‰ èÂ% þ·,eÔ™fz½­ÂŽ÷‚„·‹òßx…ïû8vì¾ó;¿sÒÍÔã8j¡¥—þð£DŒP‘1þ¢‹úRó"Zƒ@”`9‡x7}ßêˆìH~¿®ë0M¹\.H¥ˆ§µº>Çq`ÛvÊ$9w/^ÕND è­°‘vßø~Xy:›nCUæÈ\.×Ò %¶„ܲø¢>?¸G`aa!¸ŸdÑ%úZ1Z,¯4ÍJŽÏf³ɸÔ×ô­ûï(G¬=Ò«Ýh{6\;,$+pÑ¢@í Âì;rhöŒFé´8Û<-H1 â5² Œ3¡1æ~Á‡å(¥B÷ô@Ó´ _5úC¾gäv2¡ŒÅøiµúv3ò”âfƒ†»û¾|>\‡®ëA¹¦i …Éd…?ZdL¼²âŸËå‚¢hËË˸á†&ýO5²Ó*ÍiTX–““T ‰8“ÂubxŠœˆ‡¦U ñb q/f7z•mñ>µóèC§£^¸7¤²ú4åS‹ŽÁæ]Ã0Édš ‘ÊžÔj‘AÓ´†{dmm­¡KA÷ÏÑÎ#躾紧½Ú>4X»P*…y|Ýxô'Ei§xEÇiÊ·#$iHoÄ On¡q„ÆTAG’•d™766F+ãÝæÈ¨:ÈþV´òê]Žo…Ž0tZž¯uy½Ÿ>ÂmòìE¶Åß'z^xç‡ßÙç I+l[Ý#ÑB{¸ U)>×Kd¾¾ %Ñ<îQ©ê.‹«RÉZ” L&(+Ѽ;!Z­zcc#8‡¼N?ºÞÔÅb1˜ƒ£¹¥¬G3yh°¶A ´CŠZd³YüÆ{¯ü·¯œô%Ò;. rt`1T¥H'k’V\Wy †Z&ù¢™L&¨**¿kkkÈçóAxˆ¯mÛM7ZUl$¤W¢íÊzÅó¼`1>š—ÚK«2BFI½€nϵezEjèºÔ鈶¡$“‡k 'ìãÔŽR©¤ª!¦jAy&S„eÚa ÛF–~\‰_U” 9š¦¬MKn’HÄXFzv±Xlmm•v3™ Êår è—Ëå–ù§íòGSc¨zhÝÒI¶G1#û¢Ó EÕ |!>§nÒ£J4MíÊÚÉd ¨ü¨P_楒$á8Ê™´³³·óHIéu*óp6›mˆ` ÊdAƒµÅbgååžÛïÁ-÷ßÓ2ÕgÝ[oôƒ´  âMF€ëÖ@ù×Û#–5YKÌf|[©>Ã5› µ´¸¶æyê—O \@ÝkÑV¹\çš¾ßx¼œƒÊÝTaYê1hÞªmÛ -^¤•ær¶ÊçhÅÞBhع9ab9Fž{õmÒ×7~)Ný!­’äyp¡‘÷2êÇ¹Š±óeÚ\³ÕÇP…Ý „Æh ÊšXy^¬¿·(¨‚OOœ}øÇþ?Æý‚«Ýb$?šõH’‰ö ägVz:KÓl6›<‡t„kŒR©CøL]Aø½ÿô{øÅ×ý"ð“û+vÃu›Oêy*QVšH¹®ú_×Ãmñ¼%Ï c"À¶ÕtÔ¬>xA~µ´Š.²’Tt}ð¹ZRør¹Ó›¦¬DÏmnàNÁÁ—/û2^ú;/íý„¶ÝÚ ƒS<3ݼ:€:.ª°öª¼nm)…×q”âmšw~+Å?“Q×íʽöl¶Ùøt5f1lãcnµàºêšŸ<FV’zù¡Ñ4U‘¡]U†V²Þ ]W2Øïëâ^ Ûï@Éjü²¬0/!î=Ö4eÇ“-Å–{Æ÷ñö|8Ÿ}J±mõõô«ù¾ßú+´¬”*žÍ8b¤:õcÌú#*.>”S¼&Ž£B<Ò'*®kPFñBý¸54ÏzäýúõAÖPzJ¢nÙ¾¯~[Má¥R º®£T*¡T*y© û%IgМU)xÇüëé‡kŒµµˆ#ÎK € x†‡R¾„B·òdRbض•uÐnYhÅ9n°öóºhSAÛn¼¦V†@7£Åóš·éº2ú™ #|/Qèãcm÷ùÅ‘fsÅ¢ºŽø,W,®‹o}ì1ÜäHÿŸã”"yz~íµ>NŠÃ$jR"ÆÑl­ÃðlöÒØ3—ëÿ×YäÀnÖ³ð†É|RSA?ÓT´Ê¯®ë0 £9ÔWÂ[ÅcY„ú ‘it®¾}§þ·[ É(:…dGt^’ŠÅÖ뻞çÁqœ  Œã8ìIO>¯äyP1•êë”óé'•«8±]²„ ½6îSÞÔì0ªG‰ä^G©¯oµ°%™U§¥ŸjâÒ›ö‚‹ÆBuÔ=#©'&ÂB{ÂZËhŸšbDίVEþ¤žÛ.EÿìÈ9¤&Bô·ÖÁ`)$V* T«Õ Ú¡ã8( ˜lnnâøñãXZZçW30¢ßZÓëjm• ‰8lpPxêx˲ò”:âûÃoEÈð°cÕ‚š0t9¦¨Z4íÅ(Œ†qÇ—C%t!n!d³­ÃÊ•?-áå4VÉ‘Úv]ñoÙÃêÿZÅìÛfÕý"?Ô€ú]ÉCÊ‚Æ*™Ø*‹Å"|߇¦ip§eµêÄ!…Æ P:› ¥‹ùP•¨7"ÇÉ~ù߀úm+"ŒnжB’c€@¿ B÷¥¨™xƒ$Bb­~l©¾-ª ºh¬Êý™¥=º> í›Zy£,pAcY™£ûM¯¶m¥R ¦iŽ¿-“%R8ÞG(³"+qfÔ“)ˆìŠ­ÈnÔ—ú"OñBzÑZz¸ö¸šÕÊ”‰·˵xM7ȆºçþsÿïÁB¡€ùùy”"Å=6771??“'Obff›››X__ŸƒPzn&äÊP_ì (†®ÇVq|º2ÜûêÃ$…Y™>Ù”ãº-O‘#CdŸ(ü¢0ÛLñ4EC]„÷n¶~¼\ÃV}»„Mê‘kCýµ ‘k3 ®JõsJ•né9¬E®[ޕ׹hl µÏƒóJ¥°n 8žúÉP’‚x{ê/25$£í+Ń-xmë%ÕÜãooĶɢF½FNò›7£ÑvÄ ñ “a”6}t»ÿ—¬V«8}ú4fffµZ •JǶ---a}}½)l8Ɉ°Û.]ƒšдB u‘aê«¥R–I E4s1Œ6á4Y÷€$èI(‰Ãb1’`,+Ñåå5ð¢Þøèg?Šý¥E6›Eá¿:¯ÐSô{É4ww´q«jÙRÕ^Ž‹¶TÓ4Ì<÷¹À«^5é‘%‚¸nnÛ6²ÙìhTQ죈’/J³‡Ð ºVß¿ŒPAw î!ÝÃM´6è²m¶µûY‹/Vµ3²mþ7ìÆŽÇHÔЈõÌö ^‹/ã¯Doøäþ*™'uçΩÿE7ïÑg6VÅÃ/ß…ü'˜ÑŸ…^Ö×ã26Iãàââb`˜­{0F÷O…·ùŸŽ½Äwëa›FÓlqS¸ÀùBÓç@Hb‘~Œïj[;2pŽªª‘}5Œï§(! @Ú×4é0Òç"ôª¸~¸ãwষÝ4éKOa! “U5>³ÙæH£¨Á*Õß õ¿¢qä¸6íÞØÞöIá¼v8N(ßQ¢ÞÕðšbüyPÞAÊè’>½QD™¶bŠ^†æâcb´Nc¤BtáEÚFÉçÕ6éÓ-è: k€‰Ê“Ïì¶_>ô¼ü‰'¼}Ò#¾zS¥Ëc¯WQ}X™–Ûv‹"QZyDÉX8X«Õ6T*ÌÏÏO­‘E¯Úå½®ØõÕ›ÆùÅ÷}X–×uáºîÞ&|BƉ…påÊ“—o¡"’SùÙŒ iFtÄÀÞñz7¤HEdš·`Á{·‡µ·Mñò·(Ññ¼òøâ«$‰Å•i‰Ése EW¾t]Up“‰E¼¥ÑŠ…­òÒ;Å`·[YcäFW,«Y±÷<¾ï÷ïe’¾À’‡Í¢¹š&†£¸OJùwbֶȽçEú¿EŽÍçC¿®‡¯—ûJöE‰.îêºzDûË·úŽÞÿ€÷¿¹²‚“úh&Á^3êÄ»Ú5gUr”¥®„q“Äs°R©Fj­Vƒã8M¹ªÛÛ*ØøðáÓ¾Þþ([Q}Üà†Mµãr-y¼…B¡¿žMÒ;”.™:‚¼Ñ[ÎÛõªnŽãÀ¶í '!iÄó"9PT8bm[Ì”J¥ÉW•´mõ#%7p6z1]Wm+Ô–¥^'Fèñ‘¾àqƒÕuU˜‘¥©°C„èÄÒêw2I=œSL´|@˲ú¯,¡»ñ|Ï(íZ[LÇ Á8r¯Ä÷eê ‡­ LÓl-Ó¦¶E‰èD«½v×»‰:©¡«wU gI…Z¹=$„L±²²‚¥¥%loo£V«a°µZ gϞʼn'°¸¸T žFÄËZrÂßÿ8Žã \.CËk9Ýð(B&…¬Lõ4Ö6³ÿå,¶··q§{'#Hêi˜–u¨Õô6J‘eY0Msðü§~ñXi Ïó°¼¼ Ó4[Û(Rˆ’d©à ©­XZZÂÒÒÒtzV4žjþ+_†–“´ã8a¥~lóV¡V„Œ É9ªã~°N- VÛ¶q˵·àú×\ßÿûØ6g’hGE¸F Vóß8‚®Õ|¥ØàÀÞUÉùlN( •ðh(`«-„ôˆŒÔ4ƾ±±×uá8r­}(j4_/IŽBÏS^…ø}aùÑ$• ¢Zxž‡L&ƒµµµÆxi9«O@¦ŸƒvN¥¡*Hå¹:f´Œº‹¦ÉÚq%ôú›ÈL&‰†úÖ]€ÙÆyêº.þÍ×ÿMÿyH²ÒM¥$˜R)Œ®Õu ¼†®s¹ïû(•J°m[[}äD ‰AÚÊEÀ°[2¤mŸã80 #x4«ÒvFòS£­SÆM´—ã( %^¨Ÿê®$5´ ml0V¥÷.¤’ƒ€*ªt¶^þÈ‘#SÓkµ#Z mïiÐÞÃBcYön0˜L’X‰uÇ6Z„¿ø¾k?u-.}õ¥ý¿Ç ¿(„Œ Ë » d\hÿY¡Ïf³ØÙÙé^]RŠE«ƒJÕ\Q!cDtô޽âEy×ÐXÝw\HÏ)Œdõ•¤2£ H@©Ô¿zÑÐ’/Òù K5×××±¹¹lØÜÜıcǰºÚgŽ[ÒháEm‡çya(˜Þ'v†“Iéaˆ£40È¿mÛxÉë_üÄïaÛÍUI I¾¯”ž¦Z*]æië”ëU[’B.½T%dÜ}÷§ð—ùJ¬­…aìm{ÅO²ìÈÂB˜˜È}Ò†A·‚–|6Tž* ÕdíKÜn[›¾Ú½ppssKKKAë‰'°¹¹‰[o½uz{±F+áÅq¡V#ú·mÛá¤ï¢?ï* V2IêýW]¨N[[Pò ‰wGMðýþ8têGHBK "êu뺽«çz2qÞüæ{pèЇ¡ëWÁ¶íÖÞÕiaóéÕO+ýGŠˆ‘ÑíOìyEãm}?Œ@ò¼ðG9Z¿a‹½àرcÁ†ÕÕUœ9s•JezCƒ=´WÌ 4)3®ëª‰ßG }.Ǽ>2Yê2›_PÆj Ž‘–Œ¾ïãûËßcm ¥L†“Dãy-ÒG;ý¯óÆ×† ç:xûÛƉ?˲ ëz³wÕ«?†m¬Jh¯ã„¡½R ‰:Çé?x«T*á7^ö8tú2Vì²7ÄßǶöé–yFîùv« ºÞ¾§â84WšZ¯j}MØ®ëÂ0 å•êGáDM&M}‘»aAÛ·À§îDÙÁV!m›«å$Ñ òûØPž“Ï«pwËrð}ß§C×õömu(%~˜ÌÍ)•á½dˆÔ;Iö¼åº.ªwVñö§Þ|4V{ÅuÇä”GiU‰Û0WŸáàÞO1…hÀ÷<€?úØÁ¶í°Ê˜ †©Âÿg€ïµ˜St>à¸ÖO¯Ã8eàj\Ýÿ0_L#]¥ºjoçrCš–ÖöêÂ.mü†=]ŸcK2|úɨ³, ¥R †aàgßô³¸äG.¡±Ú ×UúZü·JÂøûYtJàïÝþ4X àÝ?ðn¼úß¿[[[*,̪ïcò6™<À½8V³—écÏ| ó×Σ¶PC¡PÀMo»i°÷`80I0ù¼ú nR|L4(4Åb±Á+ÕWþj±Ø¢¢!£Ç¶Cý²k{MmÎI*†Ñ[¢çy°, ŸüÝObæûR: ¢m¢¢Et½uQ£”èq`ee¥iÇúú:N:Õ°íäÉ““¾Þ¡ðÅg}ÇÃ~0ÜØÏ÷ÉB4$ x€÷-­Åð.ýl}m KÚÒ¤¯’‘áymVé#Û\×…eYÐ4-(RÓsþªÌõŒ2 c&ªƒ6…°G[yÈsÀ^#v‹Å=Uñ$¤Wz±R©„ßœÿMÌüÛU,u¿ˆ¦m‡ùâQ2õWöéú¾ Õ?-¸$ÌÏÏOúº†G>¯¾ÐH5½/>닸ꪫ’ØýV+•Â8B&…ä~-óµ}ßïÞW’)§­ã3R)^ª©Z–…\.×u{Ï_•Ä„ŒË E¯)„=ƒÆEv­ûÎ÷ŠïËËJ§áï!ž§D­—v¼A¿Õµ à6¤? Xz}KùäV¿Sû8ÚçàÔ÷[í†ç5ÅK~è?ãßÔ'ÿAJb;ξYÑ FGKÙí+?4’GP|ƶmœ;w¾ï£T¯zØóýÁÄdBø~É×Âõ4¹Pë *‰ï+¯M.—šÐA’\,«÷Ò¥RIEŤuÍPÚÀÈÃ4•çtg‡ G-hÈaÝÞÞÆÙ³gƒçGŽ™ÞÖ6‚ç5}ñžç©BKƒèô®Ë~|$¸@æç€òG7Å`ÍdÔ¢ £HB™›ëP¦>åG{l d2ø¾r/«ÔŽÓº€!CB›kkÍù|Ñuß÷¡kºŠ[Ch¬úP‹3Àà¡’Å¢Òih¬’"öX¯UÝ}ßGÖΦ»šû‚ £`îž8(÷¶ÛnCµZmع¹¹‰ÙÙY”J¥é nÿýܧž\¡¤Ó1!£ x_¬ÿïºâÑWA™VHch*ê$¡H±¥¶Ô•ýhiÈårAËžÞ„ÞU2B¤¨§¤«yžšÆ76Âuö í^Í‹ìT$ßßûX–šï©,“!!À-‹ãu஼ ¯¾òÕÓí]G2Œæ-Vàî‹K¶··Q(pøða”J%¸®Œ£Gb{{{Ò×Ú?®Û¼Í4qS¥2ø9i°’¤`Ú5PеhðŽÓ{A™v8óöHbÉ×=JÝVê}߇ëºaÛ2¹\½(çÅbëÞt„ ×U"&²,y«Ñ 0Çqðæ'Þ¬ŒÒV] 4 æ]u]UƒÆ*!Å¢ .ôØÃ²,ü«oÿWø¾Ï®ýëk'}ùýã8ÊBŸ›S?VÒvÞ9xâÄ ˜¦äöD1M¦i¢P(àĉ¸óÎ;'}½ýáûM ǯ~5~èßì|b¬2¶œ$¿hå JÎwvgÿéŸ`~êS{;q? Ò#¥’Z°îÑ먇­Û-ÃÉzŠ®1 Þdäø~£˜Åa|ßÇ_~à/ñ3_ýàùÍÅX¥>CFˆ´ÿì…b±ˆ«½§ž:…Kþüà['}õ}P*©'ÃPáõüý:—T«U¼ûÝïîxбcÇP­VQÙ‹grH¼M„Ï=ó :¶+èVŒ$ Âuýà—BwÏ|ñ‹{Ï_¥‡•$¹95mwÒõ‡z–e5xWû"îæ"d¸nûÌ ß÷‘Édð»Ïú]”üU*ù$AÌÍÕÿq,@õXmC©TÂW¾òü»ßùwé®.IRA»<ìR©„l6‹Wž|åàíjI]§áW9“®{Ëuý…“‚m7æ¥ök?¡p°V«¡Z­vô²JÁ¥™™™I_o´PÜ=ÏÃk66€ßú­I_!ƒã¹%¹#á¦HEÔ±¬}ݘš$@¤ë«ððÐ6<Òqü”þS¸üy—÷ÿFÒáðd øõʾ­œ2•»*8uç©éÊá#$†aô°þmÈNqú˰ʾ®F \ÌŸ0—ÌÏÏ£P(´ ÷­ÕjX__ÇâââØ[ÛT*•¦V;{Å“ÜUÃPIȄس|Ç”ö ÿjà’€­-¶³!CcsxPÄÔA×ÐHÏópù?^ ¢÷”J­C½iÁ^eÛq”ÈÅñ}kÞ®ºêªá_´ôÍ!¤F¡ƒ7Q_Ê‚û°pœ°ZÔÎŽú¢±:q?~+++8zô(Ž;†ÅÅEÌÎ΢Z­b{{›››¸xñ"Ž?>´7]__ÇææföÅÅEœ}µZ GÅúú:VWW{ßo2HƒÜ@w/Òò²z='wÒ'c‘oÞ×"ÿ×#4MS²ÍâId„ŒKƃ5C¡×ÔEË ’A„Á 9¥+ã“ív:ñÿ<ÿ?aüøænßW¿ 컺Òiñ0’:1ïªç©¹ß4™ 2Tà“'O­k*• æçç1??ÙÙYÔj5loocqqq(ozñâE,..¶<ŸxvOŸ> @ÝDKKK8sæL7‹x™"ÞQ×u{_Áq]U–’>‹|8—+® lÛ[x˜&ÃÝÉH—Œ;N݆tzW—´˜š=ÏCn•qÏS¹Ûô®ŒO¶[â¨üÕBaeeQ†ì{Ʀ£tR¡KêSnƒ3i\¸®rL ¼/¦„K¢Ofggaš&VWWašfPˆ©R©`eeeho*EœÖ××±¾¾<ÔÊ€îüü|ÿqô-°_ûÚ×àº.Ö××Õ†HH°ïûp¶m÷f°Ú6û­¦˜íímÜ~ûí¸ï¾ûFöÖoxøá‡ƒ×¬O4-ò³ûUvŠÅ°d%IëëëxàFú#ŸÃ )‹–0 <®‚ú~ŸTR÷ WاŠ3gÎàöÛoǃ>8’ózþöýYIpÿKïÇü[F”»Ê÷©AæïQ<õümYÖÁ cÏ_-ÃztLMÑÁû¿ŽûB«Õ*Ž;†[o½5ˆ™ŸÅúú:–––:Þ/^lÛZçСC¸þúëqäH½Í‡ç†R©Û¶aš& …BãM!Iäñð×åªKŠ9|ø0n¼ñFüÃ?üÃÐÏ=*ù€Ë/¿GŽÁáÇQùøãðž÷ϨÕÉl\á/k·…ÇQÆ*å=U9rûØÇFrî±ÍáP¢iÆu Ž3 C³½¹f(ðÔ2??‹/]¡×üÝ2ˆËîš½k4¡‘ŽÃveS„ÌßW\qÅÐÏ=®ùÛó:¨‚Z¥Ri<ÞUßW^U]ç½0aDïwþ»‡uvv««« B/7ÉöövÇâNzÅ:t7ÜpC“ïy€¦Áqlll`mm­9¿É0š³Â}­—?IZ˜Å7Þˆk®¹f$ç…|Já‘*ÞξíÅê=Z†ÓHá¥Nø¾ ‡dŽÔ±¸¸ˆ+¯¼r$çÛŽúôœG£GÕ@“'Õu]uhèÝ`Å…^§©c~~7ÞxãÐúqÍßMÅh<>pê3§†¯¼[–Òu(çSƒÌß@e\ówÛ̺Èb£mÛpkãèÄQ,ª(ê;Gtð~çﱬÛÛÛAèpñâE*¹ûðáÃêÕj׃&"gÛËVÅièm"{`lòýÐC0^Vƒïû­ d³ÝÃ]$LŒ‹3¤Æ&ã¨ëØ:«· þÂÇ¿£d¨Bý@Ù'Æ%ÛMÞ' xêž0‚ÐÈl–­ùHÀ¸dÜuÛ¬‘6T‘¼b±ˆ­­­ñ |cƒºý”s°—bJÝr3úaff+++Ab@ÅÒGŸ/..6T$s™L¦÷7‰x–ün¹yºÞØG×)Ôd`Æ"ß¼¯\í–o =Kqt½³2îyj±†á¤OÆ%ã@ñŒyQÏþ—³ø“ÇþD)B#(®Jöã’í¦¬#øð/|x4¡‘ô¬’ã’ñnëåËËËX[[ koÒ…žrXgff†ÖÒf~~ÇŽÃÑ£G±¸¸T$‹6%^]]ÅÊÊ ¶··Q«Õ033ƒ[o½µ÷7‰täîj°†úõ`‘%2Æ"ßü‡ŸñÃ/ƒÓO«¦(¥fÈ@ŒKÆ;²‹Eض۟¸Þ3éO†L;ã”í=ýðwÅ¿k²ï‡ŒG4bp'«jdGYìβ¨ã¤Œƒ'Ožû›®®®bii)¸QâÆðüü<î¸ãT*•–û»¢ëÚuõF×{+NCHŒ\¾x_Ÿm¨Ü¾¯dž¹d@Æ!ã® ¸g€ÜçÄ¢Æî¹ý|åÞ¯àÓ«ŸÆ¡¿=ÔŸ±*i œóI Æ!Û­j¾¸®;XaBúdÔ2^/!ÓbA!Ô‘aÛlã”BÆ^%X˜í¿'¯n=²«wPJK>d2*σ+œdŒT¾lÝó*¤ïUyMSýÇÙ£–qÛ´GкˆR x× ï¡ӇšŒÙ,K¨®«~²Yubæô‘ŒZ¶È(«¹|¨á‘\˜!¥Œ·µë)NQC ž§ ,1Ý)u\Wv¥RXp]·7k¹¬4VɔՎ!Á­ºÓÇO@HÉå€\}êwû]|Çw|‡R€ÚMñù¼R^ àÜ9e¤ŠáJù'IÀ ¡uá¼½"=† I#÷®æój®§ì§Ž¦*Á§NB/…˜¦…±5#N®ë"“É4<ŠÅb°OþMÊ·m 8pàæææ`YÖ¤‡•Jº†KŽ6!S†®ÚÓhè· ¨º•C\öK—R 66£f Cmc(’zh°’Ô)œîû>^v÷ݸî3Ÿ¡Ç‰¤†§® ï¸p¡}EëlVyY;aÛÝ!dRøÀ©Ïœ¾w•µ ö—œ8q¦iâÎ;ïÄììl°c~~KKK8yò$Ž9‚S§NMúZûÂ÷}*ó$Õ0$˜¤šH@ÏÙw¾«ÏyNc(0!ÓL øÇ{ÿŸñ‹Û{Šr9åIêxž«b“äRV¤¡zXÅ»J§Ô¾â’Z­†w¿ûÝz÷»ßÍÍMT«ÕI_/!¤Î4µm"¤o¤à’ë⦻îÂÓögTPHzp€áãøæë_ß^®u]y_Ûõ@÷¼ð8BJOý³{…ÞÕ}Ë%³³³ žUA° ûÏŸ??éëí¶ü ûÊ8I-uý¦¶²‚õ×¾/ùçÿ|ÒWDÈðXžzîSÝùl¶½—Õ²ØÆ†$¨þ@u¸ÞUÛ¦wuŸrÉÅ‹›6ž«¯¾ÀòòòЋn¹®‹L&Óð(‹ û£Ïãxž‡L&;–¿cÛ6pàÀÌÍÍÁ²¬Iœ#EÐHjqÔõ‘oÜwt× iÃP¿a]e¿Õ|/Ê;!IľðȆ§‹û¾Z¤¡wu_rI­VÃöövǃdÔëJÒM&“¦i¸pávww‘ËåÉd†j$If¹\F¹\ÆÆÆlÛÞÃ÷ýŽïgY|ßo0X=ÏC>ŸÇÖÖvww±³³ƒR©”J㎠2$ÕÔOÿù…/DŽ^$’6,à¹>Ø›÷)—SŠzÛVÆ,=M$©xÀC=4<]Åq(óû˜ƒóóóX__Çüü|Kƒ´V«a}}‹‹‹Såuu]—J΀ض MÓ°©Æiš&ÖÖÖP*•`šf`Lú¾\.‡l}•×¶íÀ£)Û]×…ëºðñrêºä¾¶ÊQí×u»~w¶mC×õ XS¹\®YÂã¤ÍKÓËçDÈ´s3«A’4rï?â©gÆó¿íÛ&}%„ŒŒþÒG÷¶àèûÀòò¤‡AÂÁI_À(è©Tx_ýÎg§n!•({Ž+•TºRl°2Œlo†sçΕ€ Ãhøq5M¹\žç5„ïJþ¨œCŽ5wwwÛ^ƒiš÷GÉf³»±±„kš–JY`H0I=®‹G®¼’+Ißò \è*CÈÔc¿÷¬ßÃOk?ns] Ÿ¶¶Ú«¥’ê~±µÕÞûJö%©4X à’3B¸0yZUq&$Uxîaè;I#¦ ·TjªÅ@HjЀ/_öåP—J Ñ|^W.«ÊÀ„DH¥ÁJHÚIc)B<Ðu¶ð#©† ë$µXÀ#$|.”:Éååå`Å8—ËÁu]äóyäóy8ŽC+!¤/¨ÐBÈtâyžò®R÷#C‚!Á¤%­ZÚb|ú¾­­­ €„]€ÀC"Чi𠆫e"„(ô®’4Ãül’v^sýõÀ 7Lú2HŠ ÁJöÄòò2LÓ„ã8MzH!ƒàû>ç’j˜ŸMÒŠïûøž¯|xë['})$EÐ`%£ëzàaÀ°_BÈP`ŸJ’f¸CÒŒëºXØØØ˜ôeB!„BR VÒÓ4/«mÛ-«çóyضÍ"„B!„¡Ã¢K !–.ڀ؉® ø¾Š¶Ð4ÀóÔ£º®¾¯^Þîø(Ùl¥R ÙlŽã —Ë5x]e[.—C>ŸÇÖÖÖ¤?FB!„BHŠ Ášz5X=O–b°¶{i†ªã4?ow| –œÔx8°aAðÂÂlÛfuOB!„BÈРÁšz‰¨Í埛f÷¼v]o†#GŽLú2¦r¼÷Þ{/|ðÁ‰…sxw8‡Η¾ô%|îsŸÃSO=5‘± "ßœ¿ÓÍ(æïjµ:‘ùóww8ŽèàýÎ߉4XkµZÛ}/^ly³\rÉ%¸páî¾ûO /^œ={vÒ—2uã}ðÁñÅ/~W^yåDÆ2ˆ|Àe—]†»ï¾×_=n¸á†‰\û8¹úê«÷|{¼®ëâ›ßüæÄÆÂ9¼;œÃç¾ûîÃ#<‚oÿöoŸÈX‘oÎßéfó÷C=4ƒˆóww8ŽèàýÎ߉4X£aqÚݼozÓ›ð¦7½iÒ—NHW‘o8uêÔ¤/žàNÒÌ òÍù›L œ¿IIdëáÇ4†%L*4‚aCù&i‡2NÒ å›¤Ê7I"‰4Xggg±¸¸Øàë82™Ì¤/=Cù&i‡2NÒ å›¤Ê7I"vwww'}­¨T*XYYÁììlä}òäɶù}„L”o’v(ã$ÍP¾Iš¡|“¤‘XƒP‰ß•J°¸¸8éË!d¨P¾IÚ¡Œ“4Cù&i†òM’D¢ VB!„B!û—Dæ°î7¶··Ûî«T*Á W»ýzfuÛ?nº'z\ÆKídüüùó” Èÿóóó ¯‘¸ñnûÇM·ñ'NœÀÒÒRд:~ì´Œ—(ÚÉxÚä»—1”ï´Á9œsxšT¾Ÿxâ‰à˜èñIþ®÷"ß”íé„ó÷ôÍß4XÈÌÌLC ñjµŠõõu,--avv¶£Ôjµ®û“68sæ ªÕjËÉcÚÆK:“6ùîeL”ïýEÚdœs8‰ÒIž~ú鶯Kêw½ù¦l§Îß$e¼4XL­VÃúú:Þö¶·aqqǶ·ãâÅ‹]÷'m<Õj›››ÁóV¯›Æñ’ΤM¾Û‰ò½I›Œs'QZÉÃ4׃È÷4—t†ówøº$Œ÷à„>7Ò…J¥‚'N`ff§OŸnrµ·cvv¶ëþ¤çÌ™3˜™™ã8p•JçÏŸV€¦q¼¤3i“ïNc¢|ïOÒ&ãœÃI”vò0­ßõ ò=777•ã%áü¼ù›Ö„R(‚¸ó¸0H|yÔ _­VÁè¶?iã9räHC¸Bœi/éLÚä»Ó˜(ßû“´É8çp¥£–ÿB¡Ðpž .4É5!à›,ïîî¹[.—ƒçQ•眳IÒ†¬wšÛåxÊvïÐÃ:a<σišÐu=Ø õµm»i6› þwº®ÕË¢Ïe•&úZMÓ‚p„vô{â?„LŠn2®ë:Êå2`yy@>Ÿög³Ù…²M¦‰qË¿a( ç dt“eBÒÂ0d½ÛÜNúƒë„Ñu=ð„ Q#UÓ´ŽF«aÁª<¶¶¶P(˜»DA7—â¸pávvv`Ûv‡Ífáº.J¥|ß§çˆLã–ÿ PóŸÈ0é&Ë„¤…aÈz·¹ô Ö “Ífá8N°‚.ÞÒè~×uƒ¤m©8&D« ’øMHè&ãRHL~ t]o ùÍf³(•J=…ä’$&!ÿš¦Õ*Ùv ‹n²,Ј%ÓÎ0d½ÛÜNúƒë„‘ð­ååe,,,`nn®)~cc™L™L ûE1)‹XXX*ommMzh„è.ã’“777‡L&ƒ¹¹¹@Ida†ÞU2mLJþ³Ù,²Ù,/ÉÐè&Ë€ÒIŠÅ"½Hdª†¬÷2·“Þ9°»»»;é‹ *t U¯')ž¤ëzS/=š¢Fi´È’„ ’$Úɸ Q­ö[–…R©„sçÎMz„ 埤…N²ÕY˜ïJ¦aÈz§¹ô Ö„ãyæææ°³³Ã0ày2™ …Ã#Iê‘Pš……är96Ñ&û Ê?!„ÂàÄ >pàÍfi¬’}뺸úê«¡ë:ežì;(ÿ„B=¬„B!„BÊÁI_À°¨V«øÔ§>…k¯½vÒ—2ž|òI<ùä“ïÎwÉ%—à{¿÷{'=´žùЇ>„k®¹fÒ—16¾ô¥/áE/zѤ/cjÇû¥/}iªŠ;pO7£˜Ãgff¦&'ŒówºÅüýÆ7¾333“ZOpþN7I˜¿Sc°þýßÿ=lÛžš¯½òàƒâË_þ2Ç»‡ó]¼xqª ÖßÿýߟªëÝ+þð‡ñ†7¼aÒ—1µãýð‡?`Y€mOúJ!„BFû°’©ÂqÔ_Ïr¹æý¾¸nøWŽßÙQ]W=^ñŠI„ì'İ,ÔóbÐuõDn×ÖBù5ÍÆó”JJö CÉ¿ï«çrœçù¼úÿþá“6!„BÈž¡‡u‚ø¾zL3®;œs,,„^"Ë2¥œËþ¹9µÍ²”ÚÊ«T,ªcl[½Æ0€r94VµÏó&ý©í|_}_Q9ÙëçïºJ6,kt÷ãËËJ.‹ÅÎïåºê˜R©ý1™LãsÓTŸƒÈ²,¬D Ôb±ñs+Õk²Yõ”Á»¶¾NÓ€ %÷oxÃïŽæÃ!„B#ô°ßWJ«ë*¥Q×ÕsMS ¦¦)¥ÓqÔ±ºzKL3¢Î!n,a”ÏÚšzm«ûã‘GÞ>顈Gvåóáü'úºD\•Ëat˜<$Ò2®ŸGÓ|uL&.Þ‹!+†côX ´–—çR1rÅPÔ9£º~\Ïo¥çIT¨œsé4X@V,4-ªaKñÕ‰na®½ ëݽMÓ ÕÑ ¡¯Q¯#½Øãg÷?!q,+T^¢©#bJ(|TÑZ…´‹ò% ©­í¹\èYˆ†ëKªLü7°ÕŠýövÿ+ô„Q/–ÿ‹¿‹§ME÷G NÇQ2½GÄ‘ûKöe³º™a¨çüÇð†ILûš¨çS¼Ž‚ÌaQÇÐìäiE|þlåŠÓ*QÎÓI·ïe.”ø|>jh°ö‰ï«UÉ£¡4Y¢‹dxÈÊà áÚ„½Ïýé„ã„•¢eµ>®<ˆGsmM)È¢xŸ;æ™ÇsÍ£yÛ€zT¥–¢w¢d‰1+J{/Äçm1žGµA&‹x£þūϕ‹§iH4®«c,+4ôDvæÅ©¥ yu²ø*ÕÔ£²êya=Y¤ïT4å£ñr½¦ÙÚXˆß/büœ9ó䤿²T"Q‘³h«x*%íI<@óbF4/s¿3.G Ö>‘•3 é„‘åѨ  Ñå²+®˜ô‹ä.3g²јžaVKâK»½,aÆa^ôÕ¯NúSJ5ñ<©â,¹ÜÑöZ¢ÅÃÿÄË)J9ž#—S¢%UϣӜܣb0в ¨í@ãñr®èón¡ýñi5ú›—Ë)…?ŸW×0hz€ä{Kq³^¼dòˆ\‹/a<Œ;¾x×›DnåoUÔ/SšÜÙlsQÆè5͵âyu­¼ø­Ø«7J®©P~ª錄ي÷Q: H(k+GÇÖVãb £›’ Ö>™úI§Õò{+¢Ë–­â¿¤±¤ÄqEckä¨ÕÝ.%’e‰+øýµd)_Þ#žÜj,Ñ%4)ãwüÐMú›H$¾¯”БäLF+sµ«$°— Çî´+c'De<Þ/&ºÜ%&û¤4°T>ˆËäò²’a±bâÖ‰ï7'v‹ÌFÏ!ãK(®ÝËN¬ÃEªÀKaº¸Á*¼¤ÿ³xŽ¢bÞ*7(º¿U¡Žl¶1 Pb(ŠX|z.”HŠ8:NóšÊ0VÄ76ÔûˆB>¦©ÎAc5¹ÈÔ}ĽžºÞü›Ñ‹±—›øtÛ*‚*>GIš~¯öNƇ, †vèQÉ…kHpϹ’ýÄsê‚£ GåWFʉŠ…¨A(вô*i—¸(=qúIâÍdÂ_:¹–n³C\£ë…V†lüWƒ}mÚ"•ÞÚ"ŒhåJÔI)»hï#Iô‘˜ÂøkK¥FcM4Ѻ£•W¢¯‰&—GûÞˆÕÐjy?ZöÃ_9éu§]©bYXiõYH|Q?´ª¤ÛlõyǪ·ýÍÊ þeÿïLêD×äÄ™mÖJâôú¥Ý4vhUÁ1Ц)‘ËçCoï¨Øk†¦©ùF>[2y¢SY.®ƒÉwÕJ¾YŽ$b1\D‰ÿìr~™~h°ö@©Ô½o"€Ð²Õ´ÖÉ<’/†¤ôM/ @’@$J{\kÊçÃ%uQÆ»…F=F’äÑA4•af{w‚Kd}S,†2Þq•Z*1‰a&ešEŽãK˜í´‰u´¬Öý•¢žQéû´° ž·2>ÛÅîär÷K|_ü}[yò{a‰w=MBdÈTÞïšÜ¨¯IZuóhÊz`4_0©$ýúö²0#9Ÿñ[þ¤’$ãyJ ‘<~’Nh°vAV»Úg¢á´óŠw$ªlËRy;ƒ²XTJ»xl%0›m³™Ëõƒ2åúžçAç,50¢¬ô¬”Ä+1 Í¡®B´ìhœhn”v½¡•Ó~4âN×Kö ¢¼'-ÄP Åôb<§µ*<¾öooxEHÒqœÐ$åHz¡ÁÚ©FבnÆ*ÐÚ;ÒMë(Ôrº(ѽ^éQÙv]š¦MÜÐsfäsp]NÝÏårÐêïûp#Ùð¾ïÃqœ†c  Íó•vF#<’dm0‰-œ¤2Ã/É^q]¥² ¬O¦É4*˜'¼_ ÁÚÛîâ]•ÒcC\Þ±, º®+CnÈÞOÏóÏçƒÿMÓ =Çq#Oˆ‰qŒº‘bš&4Mƒëì8 Ã@¶Åçâ8Jõšá¥R)0  Àað} Èf³±5l=ÏC6›ÅÆÆ|ßG>ŸG&“iš°m;¸.Aöe³Yض»îº oûÛ‡úÙ¦é-9Ì’JÈ>`y¹¹¢n’HšM¦×Ur^.s‘L/ºÀ#û¬´·¶7D>¯”û^JõA©T‚®ë(•JÈf³gP J×u¡ëzðWŒNñFÚ¶ ˲`4Mƒ¦iÇÑu]¬­­F¤eYX^^ŒE3æõ{&ÃÏÆ…ïûð<Žã4¶bÈÊä=e b¨ …¦÷‹R(`Û64MÃF‡%4MÓ°µµÛ¶áy¶¶¶š®ymm –eŸéýØáìÙ³CûΦ•†öQcUv CMÞ“Å]×'îÝ'$Šd[$ÕX%dXlmÑXMlKÖѶ_dʇ؀º_" ÖJ¥‚ùùù–Ûgff0;;;‘ë uJF÷ÖVã½¶6ôåÉÇ,—Ëð<–e^O]בÍfƒP(‹XXX@.—ƒã8Ð4 kkkqày^à錿6—Ë!7Àoôøk—Ëå‚ë“1t3TMÓúº¶lM3:Öí1V Nª|*‚À0 ŒSÇ c÷ µËb† "Û†a¡àÑ…y]Ô£>¨aëy|ßïYNÉÞH²Œ÷Ê ½DÉžñ@RÙ^¶Çg3ÜøØc¸ÿu¯ù%¦A¶…¾jÝ«Žãàò}>ñ \wÝu¸ôñÇqðK_ÂÅ‹ñÒûïǯ½¾aàk?ÿóÁñð/î¹³=„ç?þ8.ý¶oÃeßýÝ€G}Z,Úm”L‹ŒKtÃØÇŒ”É—::B6 _×áû~xˆã#.¿²Ò°‡„ù‰¬ëëë¨T*8yòd°­Z­¢P( R©P¡â•ëç³Ý«c(ˆ†œ«÷´‰OZý«^ý¡×Q\¨}ª†[®¹€š$×z ^[[C¡P@©Tj2{1 GÍÚÚZbrfÇÍ(ä{˜¸.{ùGß²zJs]7¹~ðoÄ«^ú*¼åß½>iåóù&CQ×õ–žúb±ˆL&D†£iZƒ¡ ð}Åbš¦5,ÂÈñr}bGï QÄ8ƒxT÷‰îQ£\"Z- 9‘v>ŽãQ †a4÷ÑóËxã‹;_Ó }Òe¼–¥{mÑ’T:E5Ä—ú½¢òüÒûïÇâG>‚gžy&Ø?33ƒç_z)àûøâ³ž…ïýÇlù›fgqýÜ^÷ŠWàˆ´¡«J×ñôÓOÃ}úiO?ý4î»â \ñ¢áyÏ{þèïDíYÏ RMÓÄ÷}þó¨|ýë¸+ö¶F]ßÉf³p]õW…üÇìkÌ5X···±¹¹‰ÅÅņí'NœÀìì,NŸ>Z­†£Gb}}«««=W*ûªƒJÿ=MrºÓC¯^. Ê(5¡ Vf#£þ0ûw0ÿ–úJ— À‹œC¯Ÿ'rnÍ×BãVÎ+Q´Nì:<(#¤ÓL±¾m­~-òšNŸ¡œËеóþ[Ö•| YÓ¯ª§h˲ðÛ¿þÛ8õšSxñÙãW|~æQ\zã¥øÃÿ¿øÄ/6¿Ro„–ò±¶¶ÖÓ ¶ä]Û¶ ]×aÛvðr ðÈFCÂ%=ŸÏJ‚(6¦iFnôÇX”)9^Î/JM6›…ã8°mŽã`mm­ÉXŒîŽMm4MC©T‚çyÁ„l—ãåýu]‡eYANwôsÃ×÷}ÌÍÍûmÛÆsŸûÜ‘ËPÒe¼ù¼º†Ñq+ÑjiE´(\'ž’Þ iZ °È9£u¢Q ²Ï4Í@©.ù¾Èä¯}ó›XüÒ—p᪫ð-/âë³³8XÿŽ¿±½¯~ã8úìg¯7 7¼üåxææ›qó{Þ¼·Ü_²¸t!ò;ý¼~ÏuñÿȽ¢ipë×ø?ÿº®#ŸÏõ<ÏÃßþíßâ_þËÑužvÙnÅ^ŒU‘™h´LtŸÈ—ÈC±X.‚ˆ Ê›_$%\æÙL&ÓPÃ":‡‹á7¦äÞÓ4 ®ëâò'xñ_þ%pþ©§0{ð ^ré¥xãããÔ[Þ»þ ë:n¸ì2¼ñóŸÇ¯½þõ¸êª«‚¼°O|âxîWà;~ø‡ñMÃÏôñÙ™¦ Ó4±Úaüwseeex_|¦MÆÓã8,Ë ~ï¥n‹ëºëÆòûò†K.kÛØ9w7Þx#^úØcxæŸý3|üÿþ¿RõV«U¼¦®<øÔSø‡pÕUWáê…|ÏÊ >ÿùÏ7Ü×ò»õç·Þ,êDëËÈyÿ4¶PdÛ6>UO/ü™È}]D’q?÷¹ÏíÛ51ƒµV«¡X,Â4M\¼x1Ø^­V±½½Ó§OP+´KKK8sæLÏ7‹¦5{£{Á¶ÃVŒ’Ï„’Ý¿å+†¥ eÌ­A~ÑÓ˜Z„5|°ôAlý@} ) eœŠ.£!46ã‹]Ùú{ÉÿQýljk(ÄŽÉÖ¯SðÕfýX9ÏFý½¢´\›;‡´~ä\ƒL:výõY„†x·þˆ=JôzFÌ(å{XH;ƒ&ùöêŸUäsÊçóð<óÓƒC~€\«xÝû_‡òÏ”ÕT„ú¾Êê”êÏ£²Q_¤ÑM]}_6Ô÷—CS‚®ë(o”q×ú]xü‘ÇQØ©{­ºWQL¼æ×Š¢º~\]N:y”Äû$žP™p———ƒÐö d2xžä\—J¥@áÚè¡|aÔSЉ¢ˆþ˜IÞ¹àû~ðc°³³ƒãÇ]f¢LƒŒw¢®OÅX-‹¢-ß‹ã8(‹MŠ=ÐèYò}?ÈÓÿ€‹UlÛF6›ÅNÝ%7 ;É´ K/~16t]M­Ünõð­—Ô¯ï£ÿå¿ væ .yéKñ¼Ÿø ¼ìoĹ.2+‘íc;~}Rw¡2ÖB¡€o|ã#«C0í²E"ñz©ß(iHQϦ(™²ØÑªÖÐz.ÖÎÅ7º •k‘ó†EŽbèº*Žù“?%‰Èn¯ßÚÍ ˆV&Ó$ãÅb2òVÅ(™•{!:o[–ÔS‰Fɽ ¿27JT˜ünüø‹_Œ—½ìea˜á÷ù|ÿìsŸÃsÿìÏ‚î#OüÔOÁõ<üáÃãÂUWÁ¯§\‰þ½Ï4Mí±¹ûzßÓfÌñE×Vó¶lo7ßËç=×öövßó÷Ä Ö'N`ii ‚°8þ<4ÄÓÏÏÏ£Z­ö|nÃh ³î•bQÅÆkZ}r÷}õƒÞkÍlÀrý¯€}G"äá`";54•íˆËR/¯‰_g¶þ”!á£ÙÈáËPGÊ)Öÿƒ¢e,´B |€„|d×r=ùúcŠ;_¡X>»~>=2Jù2Xõ~oðè«Å÷Ì~6þw›û@fo‹ä¸¬¡ñ3–çÑ…J®¢Þy3ò¿]?^D¼E p šcdÁDd¨i õñD£ìú6JV´ú¾lø~1X_¼15æZóD½³³ƒb±ˆ¹¹9är9”Ëå¾V{>h÷CÝ_cóÍiñv¸®Z ÜK.”x¥[TѺçÔG±m™L¹\®Áó(+Û@¸@¯+MÌÍé,:Ôt(í­ `Ä¢~ø½ïÞûÞ‘~'êm;ß7ѨQÖ!˜fÙ\WEd³Ý=«®ë²,8ŽƒB¡ß÷‘Éd‚Ú^ëNÄé–rÑmN ù¼ú&mí$Œi‘qÇQq¥l´òØG³Ùl`˜Ù¶ÝÐuCöË⣨wÿÞïáÙ/xŒ×¼¦qþÎdBZBC øâÕ›5MÑg= øöoÞÿþ`ûåPg;£s,÷Ù˜ˆÁzæÌT«U”J%¬¯¯7ìëtSÔj5ÌÌÌ´Ü÷ä“OâÁÄöö6ûö°¶ì·jÛ½‡ûP–lЋ§e"ôzIåúg`Gž·ðªPFCʈ°¡ ËB#RĶåëïãEÞ#n„f#ï£Z­â^ç^<òÈ##ù¸F!ßðÄO`{{‡ZÓDèIÏB-D¾3Þ1ó|ä·>2„7‹=϶ù½åÖŠr-žw»÷â‹>_ϲqÍᣤ×nMRÜKrwŠÅ"²Ù,ÎÀ”êî‘j‚â!•jº-ÂÇÒŒ¬Ãfæï^(•ÔºE\°m;a@y@ã‘S‡TÙTJE³â³gÏ¢V«u•©A˜¦ùÛ²†?­¹õWÞ.a¾í"ezÂóÔźn8G†2.S ÃFtð~»ÁzæÌÌÌÌù`•JçÏŸÇúú:–––Z–Ö:Mò×^{- Ãbì5­EÈc¤z~ƒ\E*öÄ=¤€ÊT#Šu/DÃŽûe˜L¦£È•|ÀóŸÿü=çd2JÖ·¶†JiFËï¬T¯h˜†P’‘ýÌZ}DqÛ#ž;îA-ØH¤A¯ã¨Qí"4tµpß•¸½ò×xÞ1ôak%½L³ê¥ilÛ†ïûCñ¦ö„Uͯ¦Ç ZÃØ—}J–––0;;;ô9<éów/x^ëpwYp› ŠLFý*x’æy{j›‘VWWQ©TF²°1-ó·ï«…—a†‹5›Í6lÌf³Ýï ¹±Z•*Ž¶åšæ{kL ªƒÝ`=räHÇý‡ Väæˆþß+¹\ï]gZ†kZ÷˜1 Ž…QŠëº=i!Ée\ò=(AŠã„?ø-ô])PFE¢ˆµ::‡<÷Âô²¤Ëx'J¥ÞVì¥ÈÌDä_úìðÞ;Ó,ÛB>ß:„RtÎ'#ûŽqÉw¿Håk]‡J qµJèC="2,m^¦>TŒŒ„¤Êx/ôÒoU Ð d¬F•œl¶ûj»DϦ)娇žÈdøL³l Ùló¼„Aî$©Ù°ë†+HÙlïJÕ=‘d/ÃPöV!íýà8NÐÎÎ0 ¬­­õ–rgÛaYbylmÑsšú2XãÉÔÛÛÛp3338räÈÐâÖWWW±²²‚ííí ÉûÖ[oíë zÎÉbô®í;GRÍ0ä»_ŠÅzJ…µ Q\b…\×Åòò2¶¶¶h¬’™„Œ÷B/vàÀò¿°ÐØ2#“ Cv%Ť]A$ßWŠ’4…å½—X’*ÛB<VúXwÍRÖ5Š„Q6t@0¦² Ò~`2.S ˆé^Ôáb±Çq°µµÕ¿^ÝjÅ'íøPéI>T$XB‡ß“ÁZ©TP(P­Vƒ˜o).a›››8~üxP&»WZÅ»ÏÏÏãŽ;îJmb{žZ¼ëe>ì{áDú‰Æ¾Tiq%z³H‰w ¹¿˜ô(#ébTòÝR-}#ç¶nÈ‘cQÖ¹xBz% 2Þ+ݦX)H3üǽWkk¡ò.E8Ú¡iSŸw—F¦I¶-KÉ·¨’³ê8666F[‹ ^®ô†l0NûÉÑ"c%)2®iÃ)hžÉd‚êím%Õ¶Uxå0ôGë}¾ƒZâÓƒ¤8£Ž°S@¶~žh«A¡Ý!Å¥8c ª~…´ê“Î#è²hlçgD®OêÏÈ>aÏúµ6¯)E®k ôd° ÌÏÏ7c›››˜ŸŸÇÉ“'133ƒÍÍÍ i{ÌÌÌìé&éµ^’”~°,µÔÓ®¤»¬BD”|ß÷‘Ïçáû~C¿2éÝ è«'½šòù| ‰±ÊüÕýÃ^å»J¥º8K¯(yOÊ:!1Æ)ãݰ,¥£tr2I®SËP`éÉ-ùLÙlo½Dy'©"I²-D Ô”J%X–…B¡0½B×Õ ;RÃ8e\:tíÛ¶;W[÷}•ä-)½F³8[/ÊHU*õKW ¹Ý¢†¥`"ì(E‹ý/Ï£…¥7½Â[PÆb¼8£t>ˆ@u#ç5ê¯÷"¯“G Ô8F›í6”]$Èç$ç‹vFØ#] ÖíímT«Uœ>}:(g]«ÕP©Tpüøñ`ÛÒÒÖ××ÇÒC¯WzéàǪTj_ìBV."»]×E&“A¡Phê[ÖÉ£ªÞ_¯4È(qœzŽvÆiNÖ®·L‘׉Êq"dHÈBz'òù< …B¸*ïû*¬WrKÄ}U(Ð[DG¹ ø¾‡åå<4Mn ‚hȺ놅DŠz¤uðÚú_i»%a‡¼…H eiA&Ñ*Ø-±,¥ßçrC/£žI1:m4{E5tîWëel 7³Ãþ^»j´k¿ž~:z´;G+¿žñ‚“‹þß·NWƒµR©`qq±¡÷’SÏOˆaè4`z€IDAT£hn¼Wº­ÚèzL‰±m¥´sÍJû‰ú9%߯\.dt2˜Œš†<îøDî@MÄrœ˜·JÒ†,Lvšn-Ë‚¦imÅ–—•r3ÎP]iQ”4<¨¹hV6Ú)Hd¬è:°°°ŒB¡0¼öxÒ[Ķþ¿ÀuoVŒ›”>d# ”¿â‰Ñ¡ŽqëÏ£½¢%RMäÆCØ[ºåu Ñ3ÕI±'SE²´Ç@€b±ˆ\.׬W‹ü† Aˆê8”ŒÆCgÈÿšÒqõ‰ŸFâ¿ѵiåçƶûòþ¾NÝÕ`™™ij$\©T0??ŸH#5Š´™ìdK6ȶä_´ÂBèj‡RpJ¥óýH¢é؋ؠ«0ÓߘHo;Çi® ìy{_òï†Ý”·]€ú‘œ&YüîyPÒÏ·;§¬lëõc¢yQQ%_Ê…E„Uðs½d>Âð8ÙÖ)/*yy·~[‘c¢¡tÂþÂZ}ŸäVÉy‹À}#îÝý£û~¦ÉÔÒ4 †a g·meEÜñlàu¯Pо¯©ïà¦ú1khTÜ[yN⊽ÖÞù®õýÛeQžG CÙYÞˆ#÷ŠäØIE|yÿR}»<_FxÈ}#a”2 ÄÃå~£ 80¶½÷iÖqxž×:ü]úòjZ8ïá¢JüþÆ…Ž`Þw·]<|öá¾^ÞÕ`ŸŸG¥R ŒÔZ­ÇqšrU···„=œ’€®·ùД¿-Q§ 憘¦9ý ¸Iêé˜Ë]O¨/æ‹ì·JR‹ë¶/Iàº.òù|sUàVÅɆM¡SÑh|12e¿(ÈQ¥PŠ´{.ʶ GGcf´HG;¯F¯†Fü˜ø4ÒË´²û$ÔNò½ä}ÿ5à Îð˜ûØ@}špà}ï{ ïxGix)N}µâ°ü¼ºNêÎ^”þ¸Lç:Þÿ¿ÞÏ66$ÕÄ3=¢–‹EµÂ9jo« †¤‘¡ñÁ~ozÓÁþu“bQY»ÿþÿ <ü½¡×½S¥^Ò™Váëñî (ïµä¨·ÃÙë@*µ§ô—ÌOd°ú¾£¸u}]£añÓÔÓ³Áº½½³gÏŽ9’˜^«Ýhåm¹ˆÞæÎq²œ¢Ã¢4dêÐ4 «ÕEàëbžÏ燛óDHÂp]UAUjqD‹s4ôZÍçÕÍ2Ž”‰p dxž‡Çÿßøõ_ÿ‰Þ_$½…MSµ9[Ö,«£GGçòº÷õk+_›ô•Žßï3ÕP¾ùÁoâØóŽaõÇâÖÓ§GCCOëúú:677ƒç›››8vìVWW'}ý]ÙÚRùªº®dºç4¨}±Öº‚$!S€õþ'‘ý­÷@û«?iØ^,¡isWIª‰gy4ç”¶´¼¬Úë½à"l“!ÕF‹õç‘V, á‚„ì|>“'{Xt”¶0pþ3¿ ^©òú¸O&„m÷a°Ö{þ>õ¯žÂk¯y-~~i ?¼¾®ÙŽoßÐSÑ¥ÍÍM,--Áu]¸®‹¥¥%lnn6õgM"¶­VÙûJÓsäüÛ —˱Ï*I6 ­7[%Ú-o…¿¨B"ÇI\4B†M.ê2ŽãÀuÝF¹·¬ÞŒÕ =Bd›ô)Ãt­-L¨J¥k‘s´j@OÈäóy\{í‘Î=WsPr»à5¿<ÿ'^É"4dâhZ¶fjþÜ~ùé_F6›Å¿ë]ª70Õ}EW«ôW=vìX°muugΜA¥RI|h°iªGÓ"d±º]Å u¡n¸ë¦»àTl¶ú|GBÆÈZ+ŽÔfÃm o3b€¤›R½1¼Ø¡¾ï£X,6÷é³màÜ9õ¿‡Æ~¦ÒÇ´€Ð`¶¡ÑêÛüúös].JÎEÈð}™L¦iÂuOÀ÷#ºUäg¡œFsR_~ðÓ?Jc•LßïÑÖ¬×ÃF¸ØÎºû—žsX£Å•fff&}Ý=ÓR/w]õдÐýº¶¦Âf €«»øùÿÀ¼U’l¤Ú` ¯óþÏÀø®HX|åò¯ÀüN“$µ”J*Ì,ºNS*•š{ Û¶Ò˜êyQMÅôÈsÍy§4>ɘ¹ýöÇqë­ŸÇoüF¹\™LL¿q <ü2½ÇËrÐE&Ìòr˜ªÑSWjþ-•·¬æEG²¯Øs•à©Äq”Û5 æ¿úñ ?½P$ÑX€ó ?¦r8ŽzàÚ†>]x¬ |êYŸb¥k’jl[Õ’ k*ù°m»qU¾Y`-†ê'’H|_-Äœ:õ~臶‘Ë©Ú!ÁzºT§=JŒe©yù\·h@-" ŒUËÂ~û·‘½å.¶ïsö§Á꺭ó–tàÇþèǘ·J¦0Ïå—z]iÑuõ£àãºÆuøû/ý=Þ“}Ϥ¯š‘ -l¢ëŒRh©añÑ@hœ2T—$œ|xö³?ÙÙÍÍÈÂK4:` ,æEM©Ydé„§×Å×åW°|饸+‰Ü÷ôl°®¬¬4m[__Ç©S§¶ðœç|…B¡1Û‚Š Èu9Äa2A,+ì‡Ýób÷\z)f––˜ÊDô`°ÎÎÎNE¿Õ<¨ ½URw´*p˲ð©ÿô)üÍ›ÿfÒWNHÏØOÚçÏÅ3€v‰ŒðsÏüîø©;&}É„ŒŒ\Ðê ¼ã8ð<±ŠdËËl4O¦ßžyæ —‹¥r8è­‡ªm«ö„LžDÐÒÛ·Ø6.ììàçÀ[ð‘:}ç°nooãìÙ³Áó#GŽ$¯µŽ°Äû:Óp–eá#ïûžûðs'}å„ôÆà.…Úî增÷¡Âh¢É"®ëâ9ßõœI_1!#%W7V}ßG>Ÿo®ò.ýnh¬’)Áó¼ã±*5ó°Û!q˜ô°’iÀ‚Šð< XÄ¿>t››“¾*’ z6XÇÁm·Ý†jµÚ°}ss³³³(•JÉ ö}@·W^ð/€_ú(ðÞnyh©TÂÚÚžg>oÒWMH_¸ÿ&ŒKlÀXS Œ Àwšò¿]ÿ[ä´n±c„L/¥R=w{(™ªÐRC¥ë*žÞ&2Eœ>]Áw}Wl%‡°(M'úŠÃ$d4ÌÍõPØCئÏvñ•ŸýYŒ£Gb{{{ÒãidÃV¾°<Ãq`|<™:l0_ø¹Æb”b™ä}ßÇ}q¾û²ïžô%2¼Èô^{V Žã`-F–Ï+åÞ&2Eض‰l«‚IÝÄØóÔßžªÜ2bêH{$'²Yüæùó­åžìkzò°ž8q¦i¢Tj®ìbš&LÓD¡PÀ‰'pçwNzLJ)‘R©>€§W€¢¡&ñl6˜Ä-Ë +/@UÛ#d p]À|ôOÂ8È‚¿ûÕ_Å‹þø1)\*•ð·¿Ï}7ÃÝI:Ñ´° ý;_üN¬åbƪm·¬]@H’ñ<šæAÓ"‘kE¨hšnº¼m³20™8†Ñƒ“߇êrÑ¿›zg‚<¬ÛÛÛ¨V«x÷»ßÝñ¸cÇŽ¡Z­¢R©LzLÎ]¥îšˆÁíy|ß§w•L%Î]_GöÙw„+蚆—ýÞïá«_ý*× süįÿDXy”¡i*UÏó<¸®Û<§Sy'Sˆã8X]ýLl#‚vf¡Ì“i¡µS°, ¦i² i¢«ÁZ©T0;;‹ÙÙÙŽÇIþj­V›ô˜ÑPoªm. ¾4” B)yo)bçèû›’ ßøÏ¸îèQ‹Ejâïµï…fQ¸I:q] žÙ¿~ï_ãO_ö§xžzpa’LŽã4/¾t깥禗„ŒŽ……ÊAuô¨‡±;Žƒ\Ž57H3] Ö™™\¼xqÒ×¹7b¼çy7…å‰%dZˆ­ »®‹ƒÏù.\·¹ Ó4Q,ñµ_û~ð+?ؽW!SŠe©{ð÷ù{Üxã8N˜BÈQ.?|ßP¨bŸ/f+2a\·Ëš‰Uÿk@Mâ™ <σçy,¶DZÒ5‡u~~µZ Õjµ£—U .ÍÌÌLzLÍXPFk]¿oÈ]T˜ PdšØØhø5p×½:t lé[xÓÁ7áYŸ|e›¤ßWöèÆ†šÓàúÀs_ËÕæJ=™BlÛÆå—Œk òûºUY%$a8N‡²TU`¡Tr9X–ÅbK¤-]=¬óó󘟟G¡Phî[«Õ°¾¾ŽÅÅÅä´¶‰¢£áæ°m»ù¦à‚™&b¿®ëâ¦[o@Ëh¸å–[0sß U’Z¢A¶mã¯xå¤Çqðo¼Ú{lLújé×í‰a ¬ ¶O9Û¶I„Dè©­ÍñãÇQ­VqôèQœ9s&èÅZ­VqæÌ=zÕj«««“Ok"UõZ&t» ÁJ¦Ïó`hõð± àªß¾ŠE Hj‘V6ÙlX@ïºêu4XÉÔãyîºë!|Ï·]®60ýšL!ž×ÁÃj ,iY€iâßþÚ¯acƒ«3¤==µµ™ŸŸÇÉ“'±¾¾Ž'N4í_\\Äêêj2½«€Rbê7ŽmÛÍ7EyÒHÈà8Ž£r>t°5I-®«Â€u]µU”B”Ï—T=\x$SO©TÂ[Þ² m a“½Ée*4d²x^‡–×E¨"Kr`©„S««Ðصƒt¡'ƒVi]S©T‚páÙÙYÔj5loocqqqÒcj TŸ£<Ã:«ç‘¬„¤€,ÄC×CÜ÷}8Ž£!¥k™ï«ò”ç˜øG¦ ñ®>ç)oDíÈ,‹•I"pÝ6ÞU‹0ù<žø…_À¯üꯢ̅Ò…ž VAZÜÄWB*• VVVàºý. Ž Ø>³ìGrW}„J‹ê‘)Åu]–'饠 ŒÕxé†ödç‚ìAI¦ŠLF‰¬ë–pë­·bÉŒWöyÛVÈ™0Ùl›)ØBص X ÿïW¾‚l6KGéJßë´òø·=xhTì}¨paêúdJñ}¾ïs²'éÅàÙó´mÛØÙ‰ÄÁ{ž*OÉÕz2HÔÀư±qÿðÇñ‰¿Úèϳ (—@+I.>TGYS1 À4a-,лJz¢§¢Kià?~ì?bñà¢Ês’fÆ:T¥2ê SŠëº &éÆŠÿµy³´@Ð4M…™¡Vís¹ T„$‡RIÙ˜º\}õïàä‹O†ý)ûÁ²ØÂ‰$AñS@6 §®¿pÁô¾0X}߇mÛ¸îºëÔM³5é+"d8ø¾ÏjÀ$½”ïO÷¾»J¥0bÆpÿý*•Š;™l»±í‡çyxáó^Øz’4$f,ËÂÖ{K’þ™H[›••,..Âu]Ü}÷ÝX\\lÈI:qâfggƒý½V*ŽÃUK2 Æ%ßìÁJ&Ÿdö§øÁúAåY}ð¹Àé[Ch2lIúY]]Å‘#GF6‡cþ†l›ílÈtáyÀåǯÏÖM„t «‡u~~µZ-(yÝÙõºN MÓh°’TÞfd?ðå?ø\vÙeTØIºØPn¾p¡³Á꺀eu7j Iž\üðØc1Ú‘ ž Öùùy¬¯¯£V«µ<¦V«a}}‹‹‹˜ŸŸŸô˜!„¤€§þÛÃ'öп•D¢Olnâ /yIãvÏSU@ªËËÀÖ[ÚéãÒçãã|œ+=U >~ü8VVVpôèQ,--5¥gÏž  ?~|Òã!„’<ÏùÌgpåm·MúJ:ºëâ©VÅ,(MvvÔ_B¦ Óþ‰Ý:ÈpéÉ`ŸŸÇéÓ§±¹¹‰õõõ¦ýKKK8vìXSa&B!d J%Øš†·q…ž¤ ßÇõ>ˆO½ìeÛuØØ˜ôÕ²7êEÅXp‰ “ž V@U>~ü8VWWƒf€2f“·J!$%ø>à8Xð3 ‡$iöñWW_7p1†¤€¡ .år¹I_ I =¬Â°šÁB!-qx¹ V˜$iĶñGÏ{na¸/I#õö¾ï³*]‹.B!c%›…Ãjï$­ll úœçLú* :VÉ¡ÁJ!$q0ÿ‰¤]‡Fï*I+:àú.eœ ¬„B‡ã84XI*q‡Ñ$Õø¾Ï–6d¨Ð`%„B#ô>‘4ÃE2lh°BIŽãpuž¤×e¸$I?4XÉ0¡ÁJ!$Q¸®Ke‡¤†K’´ãº.eœ •¾ÛÚB!£¤P(Lú¾ïOú)Œ" ÆVB!„1Á Ø$íлJ† VB!„BÈžaÁ%2 h°B!„Œ‰r¹<éK ddø¾Ïp`2t˜ÃJ!„BÙ3†a0$˜ ¬„B!„=Ãp`2 L!„B!$‘Ð`%„B!„’Hh°B!„BI$4X !„B!„$¬„B!„B VB!„B!‰„+!„B!„DBƒ•B!„BH"¡ÁJ!„B!$‘Ð`%„B!„’Hh°B!„BI$4X !„B!„$¬„B!„B VB!„B!‰„+!„B!„DBƒ•B!„BH"¡ÁJ!„B!$‘$Þ`­T*¨V«“¾ BF囤Ê8I3”o’f(ß$)œô´£Z­¢P( R©LÓD©Tšôe2(ß$íPÆIš¡|“4Cù&I#±Ö'N`vv®ëâî»ïF¥RÁúúú¤/‹¡@ù&i‡2NÒ å›¤Ê7I‰4X«Õ*¶··qìØ1ÀÌÌ –––P.—'}i„ìÊ7I;”q’f(ß$ÍP¾II¤ÁzþüyÀüü|°m~~žqôdBÙ/¤i¼”ïÞ8sæÌ¤/ãÊxwÒ4§í·ñR¾»“¦ùl¿—òÝ4ÍgÓ2ÞD¬nŠZ­6éËKçÏŸÇÙ³g'}ïP¾{ÃqœI_Ç; ”ñî¤iNÛoã¥|w'MóÙ~/å»;išÏ¦e¼‰,ºÔ醸xñ"fffš¶úÓŸÆ?øAü÷ÿþßqã7Nz#ç‰'žÀO<$ħaŽ÷á‡ÆW¿úU\yå•X]]ûX‘oð<¯ýëqõÕWãùÏþد{Ü<ðÀXYY™ôeLåxï½÷^<ýôÓ çðîpœxµZ /yÉK&2–Aä›ówºÅüý¡}o~ó›Ç>ÎßÝáü=8¢ƒ÷;'Ò`†!Ä™m¹ýï|'ÞùÎwNúÒ éÊ ò ñ1éK'¤'8‡“43ˆ|sþ&Óço’D|øðaa Õjµ£2OÈ´@ù&i‡2NÒ å›¤Ê7I"‰4Xggg±¸¸ØÄî82™Ì¤/=Cù&i‡2NÒ å›¤Ê7I"vwww'}­¨T*XYYÁìì,jµfffpòäɶù}„L”o’v(ã$ÍP¾Iš¡|“¤‘XƒP‰ß’໸¸8éË!d¨P¾IÚ¡Œ“4Cù&i†òM’D¢ VB!„B!û—Dæ°B!„B!4XÀöövÛ}•J¥cߣJ¥Ò±És·ýã¦Ûx¢Ç¥a¼DÑNÆÓ&ß½ŒIŽIËx çðvÇ¥a¼dpùžÆïz/ò=ã%œ¿Û—¸ñî’‰rþüùÝ›o¾¹iûg?ûÙÝ·¾õ­»7ß|óîÍ7ß¼ûÖ·¾u÷³ŸýlÃë~äG~$Øÿž÷¼§é¼ö›nã‰rÛm·í¾ë]ïêkÉßõ^䛲=pþž¾ù›k™™™ièyU­V±¾¾Ž¥¥%ÌÎÎv‚Z­ÖuÒÆgΜAµZm9yLÛxIgÒ&ß½Œ‰ò½¿H›Œs'Q:ÉÃÓO?ÝöuIý®÷"ß”íôÁù»‘¤Œ—k‚©ÕjX__ÇÛÞö6,..âøñãÁöv\¼x±ëþ¤§Z­bss3xÞêuÓ8^Ò™´Éw»1Q¾÷/i“qÎá$J+y˜æïzùžæñ’Îpþ_—„ñœÐçFºP©TpâÄ ÌÌÌàôéÓM®övÌÎÎvÝŸ´ñœ9s333pŽã R©àüùóÁ Ð4Ž—t&mòÝiL”ïýIÚdœs8‰ÒN¦õ»T¾çææ¦r¼¤3œ¿“7ÓÚP …Bw‰/ºá«Õj Ýö'm–^é$ß²_rùâ26LùäxBö¨e?ŸÏ·\é÷}™L&P„Ýä9J+ùä|M¦‰½È»ïû˜›› mÛnHÙ l÷É.™8;;;»vÏ;l3Ms·P(ìž;w®iŸ®ë»¦iîîîîûwvv‚ý»š¦ÏìnllÏ ÃØÍårÁóB¡œ¯Ÿc é…Nò½»«dNþßÝUò;*ùäxBe²}}¹\Þ°{áÂ…&¹&d¯t“gÓ4wËår°/.Ÿ²ó5™ö*ï2 .\Ø5M38e»?œlÛ†iš abår€Za‰ïËf³Á Œã8Ðu½¡:¤ÏóÐh4prr’ô¡eÖ”'¦¶mû“®§%­ÕjÅbq¤Õ&ÜO”´Iñ­/…ñMY—D쟜œÀ󼼸¸¸üЇ>tùÔSOÜöáøò§ú§/////_ýõ˧Ÿ~úò¹çž‹}û4Ÿüä'cß7x¾É<ß´ø¾¼\oŒøÃ^é{šv<ßÍ?_’ñÍ2-ßÒP†³üÞžo2ÏÇ:øfð|7ÿ|+Üëõp~~>vý`0@¯×Ãþþ> P(`ww×ßnÖí4ªV«áðð0éÃØºóßc|ÕŽ“>„­;_Æ÷椥LÛ¦óe|oNʳm<_ÖÁ7# åÙ¶ïÊÖÝÝ]ØÃ‡`dxC¥RñÇÉϺ( &Å7À§ìc|Sž1¾)ïX§¼Zë¢KºiA?gÞN”vŒqÊ3Æ7åã›òŒñMY·±„uZÀ?zôhæí³¼òÊ+øøÇ?ŽƒƒƒMeÔùù9ž{î¹±7–µîõÕWqpp0qH‘888ÀË/¿¼ÒçdNiqtt„çž{Ÿþô§Wöœ,¿)-¤ü–ÅVå7¥…ÔÁç-¿¯mê§­tV*•fÞ>Ë{Þó|àH|Œ5¥ßîî.J¥îÝ»·Òç]wŒ?ùä“©™'Cév||¼òŠËpJ‹ÃÃCܺuk¥e8ËoJ )¿]!8 ËoJ‹Eëàëa½~ý:€Ña ƒÁÀÿ"̺H8Žú s]À¶ÕŸi×·ÛêÏuƒûýÒ/};¾øÅ·¬ô¸ã´ Ï þoÛA¼F1MÓò‰qÓ,Kýÿ¥—Þ¿Òãc|Sž1¾)Ïß”uKXK¥jµÚÈpÛ¶±³³ëö,ñ¼ qŠªtÊí+‘:—¨¤OOè¤ò>în7¨ëI m«Ûä²ã¨Ûwv‚¿v{ô5å>ò:Ž3zù»yØÛ ulÝ®ºoÔûÞj†‹Áù oÄ—¿üÆ•¾§Ûã³HŒKò¤ÿéŸ]ø{0-IÓ£Ç^˜Þp¡Ç´\±¦'}€:6¹lšê>r|ãúyIŒê÷ÑÏGžãÆñ¸ǽ¾ÝmŸm«çc«VUL ‰q×UÏW.oû++ýLß”gŒoÊ3Æ7mŠãŒvIÝ϶ƒ:Ìÿþ¿¿eîN£ Ô0žƒƒôz= ‡C <ûì³±oO#ÏSÅ¢ú“$¬\.»®ºlªb)ÐrYU< Ò+—àÿŽú×4ÕsµZê¹õ 8 n“ úɉz¼ô¸Ôë@£¡^coOÝ&#Ç/÷‘×–OUy½¢,÷)—GÏUžÃ0‚û[–:žj5xýœÔõûHòR­÷ïÇ£+GÙ0€'žx÷î}nåñÇ’¨Y–zOËeõ9Öëê³ïvÕ{Ûh1.ôxÒMy.‰õp©“Ï]bLžOb\'Ï'ñ%Ç#qP­ggê˜oÞT×Ëíõzp=>%ÆM3øî6Áí¢ÛU×KìK¬O÷Žúžw»*Q•ûÈ9ëåĸçŸúÔjV ßñMÄø¦M‘z—ç©ßýwжÏþ=+MÆwºéu&`´Þ1#ñẠ®“:OT\E½–ãupž!M®«žSžËuUªZ êEFpY¯çFÄêu·oþæ¯Ìõþ°ò÷å÷lzý[èõo=ôÏM:£¥¾¯×û%$ _‚Ž! âí¶ŠŸN'è 7Ò‹iÇ'± ŒÇý¤<#Ó Ê鸉—ö°j3âi_‚Y·'Mz(ÏÎÔår9^¯à¼ùp0E=Ç<µŒpÏMš¬+I_FÖc ÄNgz﵈jéËŠbqv/(òßTÈ¥²$•ÎAoŒ&ªVÇ+k޼öšº,ÉgTù,­ìÝn0’ÁóÔõá†_I*„Lïøú××ó¾0¾ç'qÔj©ËÒKÊBêK{{êúðh°0I[-uËR÷—Qb’$HüH¼éÿJXõé"úñé=@z=LF±HR, ¸áQqz}ÐóTco¹¬Ž?ê»P,ŽÇ|§ ‡¿`õ ë¶Ç·LÒ{%ö¤¾3i:_T<èõ¢8¹@Ôgç1úý$þ§‰*Kã¼Ö"õéMÕŸ6ž°fQ³9Z¸I78Q^5›ª"ä«×‘hÝ$”ÊMTã£T˜%é›Ö9©QNzsôÛ¤¢ÕéÉg·TŠ¥7Ƕƒ†špƒdTEìÁƒÑ×–ãÕùìlô1q+G’ëɯ.êØ ×[ÿg¹dšS¹<Ú«(‰§ EÔG•I<‰ð4œ¨ï€4ú‹F#Þô ù]’×¹ö9…c1N;ÜH¯ŸSÜž¤b1^C«¥<”˜•Æé=ÔËÍpìʼnpŒÓú1aA n½¤x¼«ÉˆÅã8p]×ßÕÐJòÕ¯œ{Õbຮÿý>†aø÷•û9Žãÿ•Ëe‹ET«U8Žƒ_ûµ_Ã÷|Ï÷$ý¥VœJѶ‘é Ò:/sŸ›Í`¸–Tæ%)•¤NÓhëÈ0iõ¢÷É.é 8; æÉ áÄ!jݳ³É£"âTÄ¢æL/Ò°•æÑ;ÛHFéq^ÿ!J8!œgh Ñ*íí©z»”‘zÙ*×Q61a¡X †¹l;T{ _®V«~"ªßæº.<σëº(—Ëp]†aøI¨$òyžr¹ìß_>¹¯Ü'üÚžçÁq4›Í‘×,—˨V«¨V«hµZ#IqµZÅ­[·F–oßf²z›ôäÄé¡ ñĘQã–ûI\ëñ«?Vb9Ü(#1åP†!÷—çÕïcÛ6þÍ¿ù7I¿E™§ÏI2õ©¥_ÛÈü==1dѵZzãi½^ŸÙ(õxÛ¶qqq§žz*éÓI=š^¯sô̪Eu<Ä%å¹þ»'¿OŽãp„Ì2"Lë¢íÁ„õŠiŽ.5-ó7tÒSX¯×G*‡R1vNg¬’iš&LÓ„a¸¸¸ˆõÝétÐjµ`šæH¯‹ôöH‚ªƒeYhµZc‰0‘Ѓi·ƒ}³âð<Ïz-ñgY Ã@½^INõF™r¹™dÉwCÉF£á'¨€ê9¯V«þýdsIõïѪ{íçQ.—ÑÙ@M0©§¼’ÞT"yr½²â6$rÒå¨ûËw»X,ú±òÔG££nôé7oÞôËI£ŽÂ#¤ìù‰Ÿø ‡Ã¤ßºTó¼õŽK’4þOªïØ&…Kƒ8FõÿG=—a#±¨ž‰C^S¿¿LY’×Ðáío{Òoq*É ŽØ>LX¡*-¦9ºšÛèf½ºÝ.lÛF£Ñ@·ÛE·Ûõ{MUa>‰X™IzMõØIÖr¹¼ðëÐv°,•°ÊÂ0ó,$fYÚí6δÚO§ÓmÛ0MÓO¨Nb>qÜï†ÜñM«´·ÌyÊÚp²pr'•pýz© ë$9”^JI2õ$R¿ï$r›üþÍó½×u:˜¦9ò<ºi Q²M& l¥¥ ;¼À¢žp†I¤>LÙ£Çz±X]nÔÔA_„1j=¨©!ëV¯×G¦7lôõÓJ¶.’ršÓ"¶×Ö'¬2—cÚ>“;;;0 ÷¯&ú5 ¿5ììì,±ž¢Yd YÁtÚt_Ïó°··‡b±èÿxÚ¶íϳ¼¸¸˜8•=”%íöê{S%”äKzSô™I½–ÒS#Éd¸÷ïmÔ+úúeý» dYšÍ&ªÕêÂI¦nßûuÌeßvž4N®2Yu–eMì 'ŽaÒ0" ¡þ}0q*Ó|çîm<Ù¤õqœ ‘¶ÛV'¬úp™Iå[»Ý†acCþXI§,­é3‘çyØÙÙñç[›¦éWl[­crÃ4W¿ÐX»Ý†mÛþH©0‡{&UÀ%#½R/â,Â7ŒÂáH…|Ó·¦YìñAÌÊbnÒØ! ËÆ¢XÇï “Õ|á*¿Ù¥/BþMûÕ_ýU¼ñoœëù¶:aµmU¨Ojô<–eù=«DY"eÏ _=Y•ÞŽ“““¥{_ˆÒ¨Xþ1MxvÇq`šæÈÂv2;êw"îü6Yp…h¦©)ç,=’”Ê"uVž¤Åáº*ž·qM$„l ïPu}ÄŒ Ò“Rº/ë“è;|æ3ŸÁw÷wÏuŒ[°ÎÚ ¥Ýn³¦Ìj6Ua?+|›Í¦¿ /QéC$cì‚@}/ÂC‹Å"†¿½mÛc í%!Îô&}a<õ˜`uõjµŠû÷ï³¾C‰±,•¤*ae²º8™+þ Óç†OÂ/s¼W¹«>G{‘5¶2aÕçõM"4{™(‹,KUZfUÎMÓ„çy«&¨/CL”;;jXYœdÕu]ìííôšÊ¼þÃÍi!”2 xÚô&ÇqÐl6ýäT³c‚JiÐlI*›L'Côõùß²’µÜ®ïÿ^{dÚv„i¶• «eM¿Ý4Mt»]&«”Y–5{“ã8èv»Ë yw]Õúã8AM©^_ʯÝV÷™ôKµüŸ<˜ü¸Fc<9–BÜuƒ?yÃà¯áг°ì§kY:ÎHkð´í3ˆ’fYÓ‡·ÛmX–…³³3Æ1¥ŽüdǪ‘W’€Ê(Û¶ýÆ%镦z½Ž“““L&ž‹ÚÊ„Õ0¢[!ÇA»ÝF¹\ž¸"êBd•ýùªÕøcÓÖI& pf{®”˳ó²f³‰“““èv×U cÔ$ï«ýP!óÙ{ä4Aâ¨sõÏ ^Sꡖ÷I>yOä6ŽŸ#m$³Šr©Ð·Z-©¾R.2›ßÖ%¬¦©ê‚áòÛ4MÿÉ™d¬e]YQ•äryvmI§÷òH¬÷bE›$©2 \V½b†$|aŽ£þ #èkµÔùt»êº¨DRÓÂoà¤s”d£ÑPÏ!Ž£>ŒðY*ã’\w»êrx%!ÇQïþÒËU¯Göþ}£Pˆ÷™d´UL[xcâê×z‚?)IÌû²¸ r8QŒÛÐUVÌzlTÃñ<¾’ßGe:IV9ýƒ²Æó¢BeJS½^g¯*¥^·›ÝŸ6é%•v%1•ÎùîqJÉjleÂzv6zçyèv»¸ˆ3€^zŸêõéÉè"-šQÙióþU±žÔ£4©Ò=i©ÌiKh.2ÜpÚ{0éu¦Ï$ÕêÜ“¾þÄxýmo›ÿœ2`ooúÛ1uõkIVYÉ¡ ÚÛ›ÚN˲üi€~ÕáPêx¤!1jÔÀ$2Z!ªHF ÐBd{&ý-l6›ð”W¶=“•‰éúlUÂ*£IÃ帴FÎ,ॗoZïÓª1øiNQ#º…¶‘=J¦9ýÁD)&ƒS&埮ë¢Ýnç³2oÛó7’ʰõðãÂ#V€`Ú‰ _O 9ïúˆIT«Uu|ÎèkÙv0O¾^ºÅÉ“ÚvvÔó9þb¿?ü[+éw>2ŒR/ªmÛ†ëºñÞ‰R@f­¥‘¾-Lxû'Î/MÆV%¬2T¸®ë·˜Ì,äåy“É*Ѧnìv»“{”,köâID)U­ŽžÑ5›Mt:üU4šÍ [w¹I§¾r¶>e"j~ŒŒX‘©q±ŠÅèÚ¦©×ÕýeBµ,\&ë+„˪“uì>õùÏãssîã—²Ö›^Ñ÷<ívgÓ¾D)“¶ù«úJñ2ßTö"åÐúäm]Âzÿ¾*ÜoÞ¼éäÙÙÙì`”Vå¼Uv(W¦ —eÑ#à/XE”!2ºtR1Þívã¯Q°I“¾w²ž¬G0ÉÕ\Ü¿½_›ž¬ÊóÄí‰g1¯y† ëÊåùÉô!RŸÿüü¯—q¶=o5ö(1Z}!=Û¶y“F˜¨FYL3¼ªýÕs¼ÿ[¿5é³Z+9ý4$¬¶mûûš6 ÎýN©­IXõ†âv»‹F£Ö<ƒçÓVÑ!а·LZìÔ²¬É«E–ËÜ2kooòâÕŽãÀ²¬ä‡JÊ–JQCià6Y#!¼¸œ¸y3øµ$|Q¿QÛ²_Ä…üÏÎÆ‡;Ž“||§ ybÛÁ¨t_ËâúŽÒ"UÐÈn ’¤Ê"“Òµ>H»­¾›2*B_p²ÕÂoܾ<x_dêèÃ}-ËBµZõ÷5¥ôÚš„U¾®ë²,na@¹µ»’Îqœùiˆ2@Ÿ*E†'Úb.Kº†» C†ýËŸMï­ŒZÝ›rK6ˆZçÑó<4›ÍüïÞ×¾^ÿné[……M=°èjì‹l¥þÌ&íO”S¶½ù—ÇA»Ý†çyœšA[“°Ê£Ùì²ÒN¹dšÓ[,ÃTåÁ´Q¥S÷Þ$ËR•ÑIIsšW¡ÄMjáÀ™)×eNux¯k} ¾¨Ìðj™qzÂø}J-i›ÛTغ®‹n· ÇqÐét’ÿ= ¼õWoùÚ׿zŒŸ°öz=Ôj5è—mÛF¡PÀ­[·FnË*Yd)v·¿¢Yù1 ­&s´£Ø¶ÍBšrgÚÔkîu=’Ò«³hˆ¬.O4§IŶ,¹’øÞÙ>êĦý>1ùÌÏS }ÆôòVÅq˜¦ Û¶Ñjµ8äwÕô`ý»­¥w¯†Œ|ËüÞøÎwÎõr×úý>Z­ƒœ«“·T*NOOqûömìîî&ýö,¤Ý–÷sÎÞÕn7˜0O”b2MfÚv6Ƥm"NN¸ØeR·«â>*|e¨äJ†[–ú’É6,Æè¹¨ýÒ„ }àwŒæ$‘QEw»ÝNW\æ_Ê÷²ÆT˜¶×;å^±¸™ –eÁ4M@£ÑH×÷$ ²â;0y<ý÷L!-—Ç?<×U?Âò¼z¢*]èáý¶ xð¯özs¯ò~­Õj¡R© +/ • V*£P(àôôGGG™MX]WÕ1LÓ™¯wÕu¹Íe¬i„!«Š²"M$ àFÑ2xeC%en›ü@ß¼9š¤F%¤W&nE‹’":œ°6›ÍÕÆ÷"d"™D^.SÃØüEJµv{òTßUñ<;;;þV4[;ªÌ¶ƒÕªÃ‹<8N¼9ßÕj°õØ$^ÔïÚ`0ÀÝ»wQ(Ãáý~·oßö¯ÛÝÝÅÑÑÑØ°á¬8;Sö¹ ÷´¬·Mô—\×E±Xïi’}…‰2¨Û®¯t(pØ<Û°T«Áþ¢l¢D…N·Û…çy›ë5’Þ’pϨaLÖCtE^^'ÇqÐl6Ñh4ÐÈKÝ]ïÝ”•«ÕÉjQs¾åw§ÑˆÿÛ“÷ìZ­VóS@%vÀè^úíY5÷>Ù(ÍlèvQŸ’xF6ÖȲ٬HSÉ”ÒI½«'''Éï£Çé$´¤p•Ų¬ÍlÑ$›dZVÐ;NXùÛA1­»#Îqìííáìì,; …Emó Cëõ¹¢²½R½>ùMÍ鈞kÃápäŠ~¿J¥’‹$nÜPî#‡DNwM”BNÑ€3ø„ÛÆÚí¶?'ÝuÝñmdÏ8¢ š4Æ4M”Ëåù‡‚¹nÐ s}\wò¦ÆDk&¡¨‡r»ÝÆÅÅÅúcZصN§ƒ££#\\\ P(àöíÛ¨T*€çŸ§§§¨Õjë-(×@VX/—ìº,´)Êeoø~#‹çy~ïêTÜ_˜2,ª$-ísëv9ìŠRÇq‚ú­ëºðžŒS ëËOe„m”qgñáa¦É=()ºÝèÎÛ¶qÂ!Š”qáý²çåyLÓ\|X¼NVËfÙŸY×ô—D¥RIú¸–gÛøïzWt«L»­÷µ×Æoc+eV€Û¶¿wÏäuìäM´fábÙ²,t© Ù6·$ TÐ÷‹¿Í]¼GI²`Öc(AÅâò!(½«K ‹·¬`²‹ Ž0˰kYßou"×Åç¿Ìñóç-ÁrÕ“’S4¥£Žãà¯=ñD¼Æ©°e„ãŒöB-µv»ÍÅ5(ºÝèå5lÛ^nqÓä‚K”¸eÛ]×…mÛ‹Í]õ¼ GÕ0TW/p2/¿‹.¹.îÿÙ?üÅ¢ú…TïԤŕg¾EšˆfYþïO?8;VÊju4Ç\¸BošêÉXq¡„yÞäÞÕ¥V™ìÍFwJ˜,°hu£Ýn£Ñh,Ö»*uý‹ Öwrd$aíõz¸wïžùÖ­[¨ÕjIãb¯Ý¸ïЯ3 Õòµû¼à*Á”!2tìMozS¼°A†2β,œ-2iÞã‰ÖdRï* F,¼ß¤e1Æ)dGÉEX–ÏóÐXt¤À²“g)•®ªEï¹çžÃ`0¹ñôô¥R Ýn7{óZ]~Lo]‰3ŒN⺮Y,Cݵôd±8Ïõ:d(3<ØÛS åÀÕ¾ÚÀbóû¸20¥HT8zžo‘Ý Ôƒ¹¥Æ¢+{ž‡v»½¾ml(³ïõzhµZ¸~ý:ºÝ.ÇñÿºÝ.®_¿Žgžy½^/écO[XfîŠ ÷c¢ ‘ üX%ŶÕ_øWƒs›(CÂõ﹆Kº.W}§Têt¢Û —ÌFJ‰EGâ6›M4xuwiÑ”E•(׿sç ÃÀñññXA©_çΤ5¾«ZŽã8I ÑZù=¬a÷ï«äTære¬—!lÛF=n¥¼ÙdÂJ™²Ô‚b¶Í„•Raѵe(p¬!ñ®«öT­VÙ¿% øÈG>2õNûûû è÷ûIo<å2`³ ~ÛŽÞµ›(#üácÕêhe¥XT…xT/+QFÍU¡w].¸A™²p+‡SŠ,ºÌ\ەɊ½5/•J(ÍØÃQ毇ä7žr^œ‚Û0T… |¢ ™Ãó''jÈ c›2ho/øÿÜ•y&«”!ÞÕh€…ç¯rý J‰pûy\±$m[Å<§ðm•Ç=z”ô1¬EìÀ/—UKüÞ{¢(s\×^Á1 .´D™dÛ£akÛvüÞUî7L³ÔüÕr™Ã)5Êåù;û-ËŠ?ÝcÚ2Û”[‡Ã±ÕÃdÁ¥B¡ôñ®^µ¬¨ÊŸ2f,Yõ¼ñ†—V‹•wÊÛ­ô8Ž3ß‚Kl¤¡ Yjþ*QÆÅ^Ÿ@FE²wuë<^©TÐjµ&÷‡8::B­VÛøÖ6ý~f2=‰mÛñ†Ö†ªÜÄÙò†ÖÏ`_ýå¼Ã{™ø½ýeqÅjµJ¥ƒz½NOOñèÑ#ܾ}{e/ztt„ÓÓÓ‘ëjµŽƒÁ­VË_äÉ0 tXí4VððàÁúÞa]€@¶—2¡’2ArV¾úk]ýkB%o«ËÖÕuez®â¨h\=—ÜGOÛÑî¯/œY½zn¸quŸû¡ãu´ã*_=^ޝ}uLgWÏe_]–ó‘ÇÔ¯ŽÇ¹:žÆÕk{W¯]ÕŽI«_ûêr]»Ý»zͰÆÕëJòºüägžÄÿQþ?Öò1o*¾ÇZä›MÕúÈÆZ³Mĸ¾fÒÜ•yö°Îæ!(cç}«äqÓ¸ˆnt4´çß­bÄý]¨2÷Y】M•ߺ¥æ¯Í)‰ŸÆ²¬øå{§Ãc[êZ¥RÁññ1ŽŽŽ"·®©Õj8<<\iïêDZ»»; úpã;wî T*áîÝ»‡xæ™gptt„ÃÃÃØ¯á¥q;ƒ:T¢'ª’SýwÊÖ.MN L®\„¿ïz…âAaR¹ÎÛÃk8Èã‹* ÐýÊWç9­Tðšv¹ºœI‡§o¥R!Š˜œpê‰Z0pÙ·#ÎãÃÇ»J—ãÏÕåןx_½÷Õµæ&â˜0çi]CÊ$ñ÷_ªrÙEP©Ö+“Àhïw8 Ósí¨y¬Þ¨«C¸²õØ0ùîDUžå¸ôÇÉñÖCÇ5"B ¹¾º~ÒGãiÿ†ËЍD¿qþ>\½O¯|#=º•Je¡qô©^Ó†2K[aSñ „bÜ0Vßú(=¦áÄ,ªb®pNj8 ?6|Ÿ8yÉ"•ݨ¤Ñ˜ð]¸¹HCIœ†¦"KfÂÇwõÚ¯œ¯/a]wŒ‡sÓ¹æ¯ó7ÜHüÈ×)NBç3Í¢¸uúÏkަ”m²ü¶m£µÈ¼<Ûæ\mš[1>É\Ãi«=žÔ ËøùápˆƒƒœŸŸÀÔ/Å´}`_yå|üãÇÁÁUÁI½'†RáüüÏ=÷ÜÊ`¢¬:¾àÕW_y®±Vyïéë<!vÇƇ½Rêàå—_^ëk¬» ‘ÃÅ\WÍÙ^•"æŸÇI‰9::ÂsÏ=‡OúÓkyþM”ßb©ù«á¡” R~ËÒuØTù=‹išñVæb’¹!uðyËïksÝ{ƒö÷÷ñì³ÏúcæK¥ŽŽŽ°»»;õ ñèÑ£‰[ë¼ç=ïÁ>ðÈÂ\­ñ뤑¤TØÝÝE©T½{÷VþÜëŠoxòÉ'ýE€ «O#s…'MïÖçdʼË:¿t||ûlì×0 c³«ŽéËûK!+*+C[eñ ÄÜgØ„ªxˆ¿ò'Ñ ›Šq1¶egË®ÛP•J»<:UöµTÐ…‰èŠ7m¥MǶmÛ‹­·áBí»]¯uùJ<˜p};¦9w}šÛF“¸8u¡ð1@}§%O’$\:äŠÝ}AŸê’_™Ú"Âí\.‚$U¶/³¯/I¾lw%m T£@U{Žú”÷TöÏ–ç“$Yê‰r|‚íÉL ý‹mà+·éŸÄ4MÜ¿£ðµ,®„M€kQ‹¬Ûáá!vwwý/J8®T*xá…üÒV•,¯ÍTK¢U9ઓ[lñí÷°Jë¹¾§ õ#/ñ˜¡Ö_ʆM•áŽãŒ÷>MÚ¾ÉàÚê¶.‚!Œ;*ñ;P7e¨ûDíkK[mÓõ“¹Ö6‚Äìäª/ 2S_dcoŸ²ªRñö6FÆ:¢G·…óžFŒûDm5¦_çA•%ÒÓ+ŸÑF÷'—áÕò{­o£%Ëò{Ý:Ÿêàx+’¯ƒKý%ÖÈÇáp`À*ÁBö|d•½ºk%+Lê…„­·‰øþö7|»ú‘{‡M¨ŠBA¥œhM6ãžçWè;¡¤ã?r ¼ëoö‹ªçI†Cš*—áJl\TŒ"mª~Ù 3‰$>÷1š ²ó)YáëFèrœÞåâíï½BIÖÁçÚ{Õ¶““x÷¥\{<<NV Ë4óí5¹ÌëtÌÛ ÚÇqð—ÿð/pù‘”ž}&«”®ëâ­üÇÁ<¦(Õ*°ü`AݯÑPß ™Ó:©q¾6.R¢æZ Rzð8" ÝøÛ;“mÛñö^õ<ö®’ol•àçŸ~mû÷mTxºuh‚? ”Ïóðæ»om•eK;åŒëº8üØÇ¦'¬€JJÿëïS-ñÒ#{ö Rª¹®¿‡ÕÂÕ¢†¶ÖÆ·µÙéAÍ/]¶l· æ>™W—M¨%Ý=°BD‰ø/¿ó¿äv”{ßù‡ˆá{ß;{…Ȩ¹¨œŸJ)»‡U_Á¶ÝV ÑeÐ\É4ùLXU°Ÿ XtcRÒ*û•ÙÚå6TB*“ä«P­õÒ›Õ€šG•W)!ç-‡[&Qî½ë7~Åwü?UÃcª¬&Ê Çqâݱ 5åöÕ>Äqö®$J¡ØÃ)›ô­¢Õá'ÛkÉßð¾Ã÷ÍýÔ‰-º´1-¨äUV‡ ·¸{æ¼ÊâFè¾ìÅ¢´‘eø‰r¬ö¹¿€o*Vƒ-ãÃd+¢ ò{W=O œDÖ&ØksÊ,Ïó¢Ò£õ’­•d”†¾Õ :ç¤íL s¡F“Êeïê>²%“Þ XEðÛlat!Zyœìo|õ/?ñ2po¾ÓÈ ¨TÞà"‚%Æ[Po Þ ¬á´Ê;ÓÄÿ÷Í}ìK£ãMS0Û6eëºê?²ÿe«¥F„·– "LµÀXÜEšˆRf®áÀ¦9y ³m&  ¼mÉ&ä3‚Q 2QßúÐDtJ¤/Ø©'˜òºöÕíuLΛÂíi“ƒ,°¶ï5#‹, ‡C„¯IìÙº2zÅÆç6¥™mãÞw};öå´U¡wÁD•2Ïu]üÝ?ùµ êýó`4X£kcXþóÿüT¸?éÃ&Z˜mÛhµb.ü" kž8’F:îÑt´ûJR¨÷`î!ØÇPÉcG{Œ$¡úÛþ½Œ³ÕRx_‰ÿî^Ûßß»2ûŸ.ƒ•"¢ôr] ?õ)üÅÂ\ׂú1>§iPæ}áÞ=Üü£?Ì2ðõïUsS;e5’@ß} ÀüoUbwEa¢r'Þ"cív¶{W-½šÀhòy‚`ˆ?0Ú£i@}÷§vxíœê„ÿçеÃÃ䈈(`Ûxý»ÿ |ßËß7z=)'¾é_ÿk|ý{¿Wõb”€õ;jXpª—ÕÀUÔU«?xô!-̶íxÃ=OÅ{ÚFx憵¡zK;¾·g˜>’3<QŸ÷I‘ò»J0eS£÷«ßñ0é#!Z‹·½þ:¾üý߯*­õº ¨U}HÏãBK”y¶mÇë]ívÓ5šÀ††{ê;yj׸š[Ž`çI8eØ/­Ô5è÷û8==Åþþ>*•ÊĠн;Ѿù³ßŒon|sÒ‡A´ï~é%ü¹w|B](ÕbJŽ£þ•aEp‘%ÊÛ¶qq1c/È4ô®ÊÜR}a¡Æ ¸“Ȇ=nÛ6žyæôû} ÿ†ÃÃCܾ}µZ …B/¾øbÒÇJDD[¢ì–1|ï0éà Z=ÏÃ×¾ññMßôMÁu†ܼ©æï•ÁJ0å†ç© ¯8«×4‰ÞUªÇTúãdh®|ÿü¡ù}*mÌãwîÜaøØÇ>†R©äßP©T°»»‹ããcܺu Ï?ÿ|ÒÇJDDÛÀ^¹öJ¼!dDYcÛø“¯ÿ_Fç¬5Àå%ÐáDmÊ—XÛÙ8`Ûëjh¯ ëµ0Úƒ*+ð2!MLJÃ!>ò‘L½ÓG>òœžžb0$}¼DD”w6ðÛßôÛ³[䉲Èqðç.ltÛ¢œrgvÂZ­ggñžpQ&Ô‚fU¨Õz\ýËÅŽ2áñR©4Ò³ À,äö‡¹­™ü£7ý£¤‚h-†/<Ľû³ìÅ¡­{…àUocã@-˜$ ˜5 Vï­ƒCî3èÚ£GÆ®<>>Nú¸ˆˆh[•·}çÛ’> ¢µxí{ÿ[¸ü[ø‹øËI ÑZÅÞ{uUÜ«?ÖÛ{Psâñápˆ^¯7õNr»ÞëJDD´¶m£œÕMã‰fxôoáÕ¿òjÒ‡A´v#óWÃ;8ŽZx\M¨¹©²ÝLLVsäñJ¥‚££# ‡Ñ«1‡C¡V«¡R©$}¼DD´8•òêú›ÿ¾ûýßôa­?µÙT7n¨=‡ØÛ[>aÕÕ*Ô¼ÔFÒgMëpíöíÛ888À3Ï<ƒÝÝÝ‘¤ôÞ½{8??ܾ};éc%"¢-à8VÊ-×uß´\×E¹ÙTóSïß\Wm_ÓnËÍ[m#Xå÷$é3¥u»V©Tp÷î]œžžâèèhì»»»Øßß[˜‰ˆˆhªÕ*‡SnyžÇ-›h+”Ëeàä$HLåòɦ‡`~*wÚ×µ ðíÛ·qxxˆ~¿ïßX©T8o•ˆˆ6*ÖŠ’DU¯s?Ê?-‚U5>:PÃWlÓÜ*×ô …BµZ-éc""""Ê¥Fƒ“ì(ÿV>ô½5ô—ƒ¶ÒãIå‡ëºË–qܼúLV·V"""""Z™où•_Á;¿üåÅŸ  `\P‰„†-ãÇ<@å¯þÕùèA­ì¸5˜¶{X‰ˆˆˆˆhµæÃêAí©Zp&«äcÂJDDDDD+ñ{?÷søô{Þ3ÿÛP+·’>J &""""¢•øÆ¿ü—øÆŸûsó?óUiö°ÑJ\û·ÿoú#Þe0ÑLX‰ˆˆˆˆh%žøüçñŽïû¾xwn¨'}Ä”vLDDDDDËsüö7}ŒjÌMS9 ˜b`+-ÏóÐó›gßÏ`'}°”LX‰ˆˆˆˆhiN±ˆ_ÿþïŸ~'@@ÌNX"&¬DDDDD´4×uQ.—§ß©  î³J±q+-­Z­NOXÛÊàBK4&¬DDDDD´´©Éª5oõ"飤¬aÂJDDDDDëãBõ®^€CinœÃJDDDDDëS†JVËË>m#&¬DDDDD´ÞÕ¿LViALX‰ˆˆˆˆhõ<7“>Ê:&¬DDDDD´z€FÒAYÇE—ˆˆˆˆˆhõŒ«?¢%°‡•ˆˆˆˆˆVËJú(/˜°ÑêxºIåV"""""Z.8˜V†sX‰ˆˆˆˆhu,÷“>Ê ö°Ñj˜êŠIå{X‰ˆˆˆˆh5º.’>Êö°Ñò,¨¹«å¤„ò„=¬DDDDD´¼:¸Ø­{X‰ˆˆˆˆh58w•V,õ k¿ßÇ`0Hú0ˆÖ‚ñMyǧ‹ÝÝ]”J¥¤#†Ã!=zÄó]òù’:—I&Å7üÓúOGZ<ó®ßïó|—|¾¤° Ÿeøjž/©s™dR|³üηu”ßI½,¿gcù½šç›G*Öi_ÒIoV¡P˜XÑÏ#žoºžo‹Ä÷¬ÇåÏ7]Ï·ª×fÎóMãóÍc‘øfy–o,¿YžåYÊïTÎa½~ý:€Ña ƒÁ`kZ2(ßß”wŒqÊ3Æ7åã›Ò(• k©TB­VÃùù¹mÛØÙÙIúЈ–Æø¦¼cŒSž1¾)Ïß”F]^^^&}Qúý>P*•üIÞÇÇÇ[ÕOùÅø¦¼cŒSž1¾)Ïß”6©MXŒ¬HU«Õ’>¢•b|SÞ1Æ)Ïß”gŒoJ“T'¬DDDDDD´½R9‡uÛôz½‰·õûý©ÛSôûý©{fͺ}Óf~¿<œ/)“by9_b>é~y8_Z<¾³øY/ßY<_bù=é~©;ßKJÔÇ/Ÿzꩱë_zé¥Ë§Ÿ~úò©§žº|ê©§.Ÿ~úéË—^ziäqúЇüÛú§zìy§Ý¾i³ÎG÷ÜsÏ]~øÃžë|Òv¾ˆŠñ¼ÅwœsŒï|a>Ž1ž‹Äw?ëeâ;‹çK Ëïqi-¿ÙÚÁ`Û¶Ñjµ"o?88@­Vƒã8xñÅQ«ÕFî{çΔJ%ÿö~¿£££Ø·oÚ¬ó½^§§§c×gí|izŒç-¾ãœÀøÎ–á,Ãól™øÎâg½L|gñ|·Ëïì•ßLXÒëõF– ß6qxx@m°»¿¿Á`àw·÷z=ìïïû·ïîîâââfޞĹN;1Ñn·aÆÈã³v¾¤LŠñ¼ÅwœsßyÃ2œexž-ߟøÄ'2÷Y/ߌílbù½ò› kBvwwq||ì®R©Œ-þðáC*äÿ•Jeä12n|Öí›6ë|Ä;w°»»ëoZ¾oVΗ”I1ž·øŽsNã;oX†³ ϳEãûK_ú’ýþiþ¬—‰oÆv6±üÎ^ùÍ„5… …ÂÈâƒÁGGGØÝÝE©TšÃápæíi;8??Ç`0ˆ,<²v¾4]Þâ;Î91¾·KÞbœe8é¦ÅÃW¾ò•‰Këg½L|3¶ó‡å÷¨´œ/Ö‡8::Â?øAÔj5ܾ}Û¿~’Gͼ=mç3 pzzê_Žz\Ï—¦Ë[|O:'Æ÷öÊ[Œ³ ']Tççç( °m¶m£ßïãáÇ~ PÏ—¦Ë[|O;'Æ÷vÊ[Œ³ 'ݤxÈêg½h|߸q#“çKÓ±üN_ùÍÖ”jµZþ¸óp0Èør½~0ø1ëö´Ï­[·F†+„eñ|iº¼Å÷´sb|o§¼Å8ËpÒMЇ¬~Ö‹Æ÷;ÞñŽLž/MÇò;}çËÖ²mƒÁ†aŒmh\«ÕP*•P«Õp~~î7·m;;;0óö´ü Y [KŸ¥ó¥éòßqΉñ½]òã,ÃI'²ôY/ßY;_šŽåw:Ëo&¬)$ËLŒÝæ8HþrÕ…BÏ>û¬¿Y·§í|fÉÒùÒty‹ï¸ç4MÖΗ¦Ë[Œ³ 'ݬxÈÚgÍò›t,¿Ç¥á|»¼¼¼L䤥 ‡C?£ºógÝž5Ûv¾ÛnÛ>ïm;_Ú¾Ï|ÛÎw›mÛg½mçKÛ÷™'}¾LX‰ˆˆˆˆˆ(•¸è¥V""""""J%&¬DDDDDD”JLX‰ˆˆˆˆˆ(•˜°Q*1a%"""""¢TbÂJDDDDDD©Ä„•ˆˆˆˆˆˆR‰ +¥V""""""J%&¬DDDDDD”JLX‰ˆˆˆˆˆ(•˜°Q*1a%"""""¢TbÂJDDDDDD©Ä„•ˆˆˆˆˆˆR‰ +%ʶmxž—ôamܪcßó<ضôi­V"JÔÎÎÇIú0ˆ6nÕ±ï8vvv’>-Úb¦iÂuݤƒhíë›Å„•ˆˆˆˆ–fY+ñ´ë›u-é Àu]˜¦9v}£Ñ@¹\´ä”Ëe†Û¶Ñh4üûÚ¶í«×ë¨V«þmív­V ¦iÂóÎaM‘v» ×uqqqá_7kËv C†ET%†(IQñ= ã›ò`Ó±/ó§šÍ&”øâ¿ˆ/~ñ‹<ß%žïßø~ög6éS‹í'ò'ñ]ßõ]IÆÆlÛÖ5«>_ÇqpzzšôiÅÆ2<ßÖQ†?ñÄø©Ÿú©¤O-–ßù¶ŽòûÎ;(•JIŸZ,,¿ó- åwnÖ?ýÓ?Å·~ë·âÖ­[IÊF¼üòËøêW¿Êó]âùþàþ éӚˣG¶æó€O~ò“<ß%Ÿ/KX†çÛ:ÊðÁ`ôiÅÆò;ßÖQ~?ñÄIŸVl,¿ó- åwnÖ7¿ùÍx÷»ßZ­–ô¡lÄõë×ñ¾÷½ç»„áp˜ôiÍå[¾å[¶æó€ýÑåù.á-oyKÒ§4–áù¶íe8Ëï|[Gù](’>­ØX~ç[ÊïÜ$¬Û¦T*ef¨Ï—±»»›ô!ð|im¶­LÛ¶óÝvÛVžmÛùn»m+ÏÒp¾'ý&EaÂJDDDDDD©Ä„•ˆˆˆˆˆˆR‰ +QŠx`Ûê_"""¢mÇE—haž‹“o·mõo¹¬þ¦=O· 8Np]£Ôëê:Ë Cý9Ðn«×=9}}ÏS·†ºì8AÅ¿ZU÷5 u]†V‹§¸®ú7w®ÜqŲFcPñ#ñlY€iª8m4ÔóJ¬ ÛVÕªºŸiªÇÉclØÙ‰>ŽrYŸ®Û YÏ=—‹ õ¯iªãþú×ßœôÇ@DDD´4&¬9Ôn«;à:©¨K麪BÜj©Š²ôêÔëÁs¸®º=j/lËR÷9;S·›¦ªLK‚ສr/I¢Tðm[UÔ«Uu½vTÚõÞ%y< wq¡··TÒ â¯'¬’@Èÿ»]õÙk>UG}Åb?«~þ¨X³mõšrÛÍ›£·K,‹*†›žÄG·«â âCîwÿþøkJ\¶ZÁuòý‘ïHµ:³’,Ê{d£1Ùh¨?aÀåeü÷Hä‰nµª>›ŸÿùÕ~>DDD”/Žtö”ËAC»tI]]HCy¸Ó(,ÜA ô:Ø<˜°Æ¤W¦¥ç¥ÑP®þ¡HoŠ$k˲mõÜRÉ•¤ALÛ®“Êy½>šðí퉛ÞÛ)•|@Uèà>Žzî½=u.’dŠjUUôåñR÷<õ•ÌHE_?¶rxð`ü¾ò¼Q Œ¼žô¶Jb~v6úºzb ¯-z½å?›<ÑÐv;è‚«ZUïu±ô.ê ’ ùÎHo¢|^ò<ðØc£I—$yõºú×óÔóV«A,„LÛïÅ—ÄUÈqLJ²ÃÇN,£„Ÿ«Zï]ÅçÒíïǬc’ã€óó/¯ö`ˆˆ¶˜Œâ’ß¹¬“ßÍz}rý%ŠÞÀ^,_ý*GÈl‰ A%ñK¦äÅ¢ª„ó©Ïé÷z}_+Br¹=œdº®ê<Ð뀎ÔvvF_CÇ„uM\7è¹ët‚‚ªX­dK!•ßr9ÚêºAÅ[†¶îíEW õž"ýy —PÏÕ¾®¼GÑ{c…a¨dRzO¥0•÷!Šþ¾DÑGyÕé¨ÏF’ÖU÷øm‹v[Å”$üQ±Öjö>ÆIð¤w\Õ»(4@0ô{š8…߬ûD[èå QôJù¬ï‘üVÈãôߢi Wúã-+¹Ðn«ç $pà¿ûï¾%é·†R.\Ò;$ödDšt@Hõ;(õ8iü”ŽTþ¥¾§?vgg¼ž"¿;Ž43a͉=†Â ø"\êSŠ$oÐË:i—XÒëÿÒ™¨ç»y3(£[­èú\¸î%\ÎG%¬r?=ÒŸwõ)&¬SxžJ*;ÑD+œJàè:ÑLy>}hkTÏbX8Øâ$«'I‚$3á šO£ OƒE[ßòjÖ åǤ²LŸæ ݲ¬ 1 Wž¥ò4‰$—ò½?9QÏcÛê6éÑ—^~h6ƒß»ð¨yÍE®„4ÖÊ1u:“óz½/áÞ½Í}6´Q½’BïI’‰é) âr]»Q¿³¾O@0"M-Ön«ÊÿÅ…zþfSì ¢™ÖÀ«?¤.9é8åuu…Â6÷¡l‰»¨˜Ð;OÂSê$ùŒš¥—UºEÊA}”ä$24W:¾ô‘eÒ% þ›èÀÑ_c¹ Ö)ÚíÉ?ÌqèfÜž+š>T”æ#COØ3M´y2¥C~öö¢G¨È0y è…‘ûHÒ*½9zFî#ý““à6ÓTú´!kH¹®XÉëYÖèð¯ð”Œ8ºÝÑ™Eki5ÂɤLy’ø“Xêt‚Æ ©ÌK2)÷Ô“LN$õž$=^xõ§fSÏýûñë\Qõ‡NG}?vvÔ9Å¥E’ ùN±1v½LS½×ú(Æz=è@Šš>Œ–òX¡'‡“lªC§\ž‹yŽ1&¬úœ¦¨ÞYIg½¬x-ç8«7wyïêuõžñ÷vsÂu}­ `t É–Ê¿(—ƒùõ“„<ŠÅxŸó"ñtr²ºu$¦gír0Mµôβ1fµ¤‘DÊX}7 `|cÜÎðˆIvˆ¤Ö –_I”V2W`ŒÓöÑ{w¤‡HŽW¤õ¡]á^¡ð‚áVz`zåy“É™^á'‹’÷e™­–êÅŠZI’VGQ\7h™µP]”på}™d. –í­’•äiµ\7XT†‘/ÚhFÙÁ„õŠÌ11éœ7Fy$=Hi—L´.ž7¾çm¹=,6ʤÊkxAŠI T¤Ešz døž ã¤Õ‘…`dy½>Ú€Âò3˜D­ŽÌÍ”r›¶ Ö+²uDZž!Z5ÙiÒP@¢<“U 94/]dnˤÕi6U4L2i¢¬“9©ŒçmÅ„õ çÎPÞÉ/¬Ò6‘ù›\ -ò>¬4 ,ç)YØòãñ¤ i¦ÉmQh;ÈÊŽDÛB¦zLÚFƒ(/¤ `²JùÃGbÂj²Å&‹Ö@ç2ik¢<ó>ÞT¨¥B»­VæÖM´ LScïj6DMu¢hì]¥°­MXg½Ã¥‚ l&)ìt:~åœ=2$,KUà§…Ÿiš#õz¦iâæÍ›~Òæyžc@ЛÞh4FK$¹œÕ`"#nÞ¼éÇë¤Q–e¡ÙlŽôþ›¦é/—Ë~Ï­ëº~²)Cëå²~{µZõ“`I(…žÐÊñœœœÀóög±¬$c\4›ËWÞÇñ{µ¥2=O9®W²' çÖï#ÿ—é«^ oÖ}¥¡%¼ø]œFP1!kL»¿Tê‹Å"nÞ¼ Ã0P¯×GF#è÷•cßÔjµ Ó4ñðáCüøÿøJß³(iˆí(Fóô®zž‡îÕ’«úç$ï½>2Fè庾@¢ž°Ù¶=VnÊã<ÏC³ÙôãH•Ôù|…$zzÜéÇ­»¾.‡Ü®RÑcß¶m4›M?ñl4èt:#‰âèû|O¥rQß¡¨× ?ÿÁÁÁB¯·ˆ´Å8UŠ’xÂztt„~¿?2tm0 Õjù‹â†1RX-˲ִbªëª'/ñï~ù—aœœ¨_Û–i 'íöøsxžº¿T2ÂT'xœ^ñ<õx zÓÍpÅÙqF_Ë0Ƴ©8Ë&Xår0¹ÀuÕåð9Ùöè:ä²³V™Ž<¾è÷UŽ»Õ/ÉÚmõüWÉŠEõ'?€Ž¼öÚØó}Ïá!þðé§WûùGH"¾uŽ3}Á%Ó4WÒ°²he;n‚{rr‚f³‰½½½‘Þ`£ZnÞT'jšãt]­ÖxŒîíñbÛ£q_.«ëñfš*îõû_÷¿ímÀÿ0ðþÂø1„¿wò=•÷,jL_µªî•ìÊw<ª¥=æ=JX“Žq9íVk¹}úÚí¶?L]zäÇA»ÝF½^G£Ñ†­^×óL€ÑÊ}³Ùô×+Ð{ìõûHEvooodƒ$€úÐty]y¬>Ô\^_O:dn¿kx¸¹Ð_gïj#Oyœž(Ê úô8–òZß™C×Û‰&¬½^§§§¨Õj#×ß¹s¥R wïÞÅp8Ä3Ï<ƒ££#.ýš’ç-U6I•úvgjå6•Úíè^ýUá°úØÒ㮫:Ëã¶Ê›¦é÷ȳa8;;›ø=½¤79(—˸¸¸€iš¸¸¸9–Y­Õó|‡ãÜ7Üš.ç¿îuÒ$ ±=ˤ˜—Eež¨Äy«Õš;Y¥üJ[ŒoÓºœQÓBø[Uêó¦'•×zë¾H£$œÒÓõû!Ó¤ô))ó4BÎbš&vvvüF_yniä8>ó™ÏÌ]÷L,a½sçvww`d?̇ÀÈxúJ¥‚Á`°’×ûKáyªgR>Ìz}f¥–+ÁQRñ­“Û“H¯RfÈðzŽJ…4ĸiÎW¦w»]?!œ´RsR¸š:FG •ˉ5^¦!¶£HudRÛ4È´Z­µl!GsˆšÎ!S£¤$Ü€ ÓNüŸ×¼èRZb¼ÝŽž–zR©O[žGÉô46_X–*ìçn`” ¯s|Àú¢”>çççøèG?Š7¼á k{þUÇ7¼úê«888€aþÍ4³†IF-¶”Zž§VÕÉÈðå¤àå—_^Ûó'Q†G™§ÓOh¤¢0k±/ÒÈú @Ð`5}Eo%sœàO†éãº* ³í`y¼M19::Âþý¿ÿø×þ°â^Ÿ´”ßQöö¦—éÝn—‰ê¢&M‘ø ¥÷oÔïÒ´5Cdm ¸_Ôy5íäàà/ãxrÂbHËJKù-oMÚÃU†ÝJä¤íê€`.¸a#‹kÍJ@3Õy°$©ƒ¿õ­oëqOXƒNOO'Nà‡ûèÑ£‰_–÷¼ç=øÀ>0qÈ‚i.¸‚¤ãÌåÆÙ¦‚’³»»‹R©´–;Ößðä“OÎõC0­ü“A#I3¾e©ÊãÉÉôžŽnW}ÁŠÅÅçyβ³³Ø¼è-u||¼¶U&“*ÃuRŒû/ó0ïç±wÞq‚dˆ^/!<–¦ÅµÒdÑ:øÆÖóós OÁ~¿‡âèè»»»S¿ ‹.­- µ åsf¹¼Ý’ˆïI¦ý.ëû<@*œ†¡†ÞF+¶mÕÛY¯sºïß]ÓŒ^9:|Yïyi4ÆÇvÊk˘¡ oEÑÒãÍæ|Sò»Ýnì=?ã8êû£/’µB¼žœÚvÐPT,½Dáó”ÕeÑ<`v’(½FüÏE¬ŠÅÔŒLCl‡ÉG×éÌ^@oÓ[é­LÔ’úõQcFå»"±)½ ò&5ã?€‹¬ã‘¢ø\…4Æø&É¢@úŠê25¤Z­N]Ï€Vડé-/¿Œ7¦}Ñ¥[·nM½ýúõëT+|9ôÿ/"ªþ<ðþpÓZL¤•“¶SñÅuUEgÒïóȰuËòW¸Fµªšñ¥ÀŽŠeIfe{ù>LZŒ Ty¾pE8j¸•¬J],r(pŠ$㦽Ð$ÒržÚÞU}DC§3½aFVhÔwCÿ®“+Ö‹.¸·e’Ží(Ž3ûã“¡‡©ŸâÕ˜"ÛôéÛÑéôa⺴ŸkJ¥)ÆeÀºÉ¼RÙÂK¦†lÕˆ„eMk “ΩOê®Fí>ó¼)í‹.Õjµ‘%´eü¼>Œ V«¬HfÛ6v&íÑ9ƒŒhZ´ls]{{{~Ë¥çyþÊjõzÝOdeõ+ÏóÒÿ£Ak³éøžfZ½wlØúÅÅ|CæÙG˜=<*êµ·hUÒ,I:Æ»Ýùò/éyJ¤bbYÁÐÚðöKú~Ú†o{&½·'G½>i‘tlG‰jË “­§ï;?édÂó=e›>Úˆ4Åø:毺® Û¶GVÚ•ý…×¹Q¦yžúÒG9Ȱzé5%KÊœðH¼i£r®:9^íõðú:‡÷z½‘@ïõz°m…B·nÝÛËiQ‡‡‡888@¯×ó'y?ûì³ =—e-×»Úl6G6tàï-Ôn·ýÞÔyƵÓv[e|K9M;mçÛ¶G+7YFF©µÊ×ÍÛ»ªcn~¯aâày“瀳¢žIëŠí(“¶GÛxŒËbZá0Ï0rJMŸ뮶ÝMßS{Ý+í¦–>¼^ŸŠåyACRx´‘ç…|o£Õ Ûp#Y¬„µßï£Õja0ø+^Ù¶V«å8==ÅíÛ·ç^ý.j‚v¥RÁ /¼à/µ½L"¼ÐÊÀ`Y¸÷…/ÀؼÙ~`ë·  ™Öߎ¬„r¢Ê˜v{úZ[§UZwŒëLs¾oe{ 뽡Ţjm?§ô@H·A«ÅÆ ŒÛdlG™µ—6ûŒµ6–»®š8.4©2›àVD´˜$c|Uƒ¨dä£ëº¸¸¸È’êyªÒçºão¢¾6Hø{:iô§ô ¦\¬„µÕj¡R©Œ¬*vzzŠJ¥‚ããc œžžú“¶W¡P(¬äK²ðg`Y¸ó›¿‰“ý¯Wr>DºUÅ·tÒ¸®Úò˜=üÛ.Ñ&¬*Æ…´ÆÏS1Msù…hnß¾í_·»»‹£££±aÃYõÚÅ*Í&‡øRª‹ÁZ2ÊcVÙŸ¿µ0QJÍÛ,ûæ-µ¦€ç©žÓV+h©ž4 ŠÉ*­Xœj×¾ Ø² Eð<5heÞ°j·Ûþ(±V«•Ñb2*•J*“T]Ô¾Ñq ž^¹œ½/mYÀmþüUYŽÉ*e€L¡›gÄ—išÓ‡ˆY–ªŒË<ŸpË&W㥔³,ký ëQû™-)jkÝY]ñ=ÊÕž¢#ÿŸ´=·æ[ÚÌ„µR© ßïûIêp8„mÛcsU{½€`§4X´Q|ð뿎o~ú餟h¦yG¸®«æ;ɪDà8ó…«ã8¨V«Ó§tt»ãû˜êøý „íìLn¤q]®ë®H$“UJ‰Tì§^€Lÿ~‹óoH±ÅJXkµ°»»ë/s-à ‡Ã!îÝ»‡;wî V«­uíMy×ïþ.Þûk¿–ôaMgÕñ0Y5ŽÃL)3æ Õ™ûRÊŽõ¬XPJÍÚ£r% Š…É>iõ:¿´Víö| îŽã¬ø{˜| õ×,—¹øXBs§N§3²×íÛ·Q©TÏ?ÿ¼¿Šp'EË"»n°ËÀ<þÿãÿˆÿðíßžž!D˜æ|C‚¥×éê{(·,Ëš¾rª¬zJ”RåòôQ„3c|^–ܼ9ÿdq¢99Ž ·yÌl„\åÁu»ê»°·o©nÚˆXÛÚ ܾ};ò¶ÝÝ]ìîguÑ…´>ñK¿„¿þ#?’ôáÍdšó {÷{WeSi¢ ˜wñ¼™©€°Á†2Ê4Íå{›l[}õ)ü̃I>Ñʹ®«*òœ¿JbÛóu†Ú¶=}¨¤irÕSJ½v[Å}¸ãyLÓÄÙ2‹¸ìì¨Êx¹¬~ Z-öªÒZÉèÚEFÓú‹E®ÃÍ›êKÖhp릔‹•°áôôÔ¿|zzŠýý}&}ü9Îüu’ 9 Z’4ÎÃqµ¢d·Ë…4(3dg™8bí½Új±åœRoR¦ÛíÂ0Œx{Ä»nôzœƒG¶Ì,¤µÖÍ“^ĉb‹5‡õôô»»»þH»»»8==Ûî&Mf-XE&u¥ãÌ_çv]W !«×ÙÃJ™1Ï>Núœæãy€aàŸYüØôÑÅS­¶ Çq°··‡û÷ïOÎî8ø‰J¿æ8ªþµ×’>z¢•r¯Vfš8Ø49—R­Û *öá ¾Lé˜:Ü]bœ=«”"ž·xH.4˜[ÓäÖÌ„õúõë# .‰J¥’ô±¯†mãµËKÎ{¢l©VÇwµšA·Û¸©ökÿÉ‚÷Þ¼© ræ”!7nq¶Åž:Xzž¸¿6¥˜aLž¯mšæìŠ;÷¦ÚÛS 0‹.„:u‹2;ÏFš­03a-•J©Þouþ·¯}óû({ÊeüÖ/þ"Z­,ËŠ®´{^{í5Æ7eÒ<óW'—ìvÙ»J©W­Ú¸y³‹‹‹±Ñ23+îD)$Ël,Rý˜9ØqT6|vÆ}å·ÄÜsX{½îÝ»ç_¾uëjµZÒç1ÊóâïRìºø·ßøŒ8é£&šOµŠïüÔ§Pø±ÃÉÉ vvv`ÆheÇuñ»ozVʤ‹‹ÑËÍfNg$ÆeeàÈQ2œ×G)纪bïº6ªÕ*nÞ¼‰³³3¿Ì޵ØQ u»‹wüOìyLV·Pì„Õ¶m<÷Üs #ןžž¢T*¡Ûí¦g˜°ëªù†áo´mÛ6l[ý ´Z­à‹àyø\?à ڔ5†?ù…_@µZE±XD£Ñ@»Ýi‰w‹Eüݯ} X¨SÆ9Ž˲P,G†¿ïíí¡5©V$½«2F)eš²Ð’‹V«…F£½½=¿Æq\„[nˆRNzW©Z»®;}TÁÎŽÊ„Y¯Ù*±Ö^¯‡V«…Z­†|ä##­¶mãüüÏ<ó ŽÓÑÛZ­ª¤ÕuѾÚü©Õjáìì ¶m£Ûí¢Ýn«ó¨VÁÙ«”IÕ*~îßÀ_UÆ[­nܸ1²¥iš“+óD)fšª(—:‰mÛhµZ~ã£a~9>±н«”bú€n×ñ{Uïß¿Ïóâ­­Ñn«FVÞ)E–é]Zoi6UÌ=U·N¬„õÎ;0 ݈ ÀahµZ¸sç>ö±%}NJ½X\×i”ãu´kfnÄM”FÅ"Þð]ß5rU«ÕB·ÛÅÉɉ?ºà›¡ ²íÑ:¸´¸×ëu¿WÕ¶mÜŸ”zžZdŒ½«”RÒQŒ&§Åbqòªïa–µø¾!Dk°LïªÔ["ËuË ÊuÚ:3Ö^¯‡Á`€Ÿû¹Ÿ›z¿ýý}<óÌ3è÷ûé\¯{{p¯’Ò0Î飬sg¬RÓh4ü^VÎ}¢,sÝ aõT²Xä^|”ZzG‘m;‹ÕI‡=«”:Õêâ9¥,9Ö`ãyj4GÌl­™ k¿ßG©TB©Tšz?IR‡ÃaÒç¤”ËøêW¿Šx×»’>¢µ˜4dìääívŽãà·s¾Â7å—ÞÖhÛöH㋬CÀ†GÊ¢v[ý+£¶m/¶µže±Q†Rg™fºÝntC¤m«á1³µŸu‡B¡€G%}œ q ûߘ|µ4_Ò‡I´¿’ã8#×W‹Õëu¼ù—9éÃ$š›ëŽni# æé˜¬RV…{ \×],žm› +å†leÙxS¯sÞê–›™°V* ‡Ã±ÕÃz½•à¦ÅÙå%þÒïÿþä;ØöXeŸ(KÊå²j®·¬‘ëONNÔ¢ŒoÊ }800ÞÃ:Âó‚ISr™qO)å…V‚°HÏ,Q MÝʆ¶^¬„µR© ÕjMî;qtt„Z­–Žù«W~÷3ŸÁkÿÙ6½•à (£çjÞÓɉJZµ1”årYÍa/emE³ç“ÈÜ&IRMs¬‡(w8˜rfjÃ$m½™ +ܾ}ƒÁÏ<ó ÎÏÏýÞÖÁ`àoi3 p˜²ùr®ëâ;þù?ŸÜÉ (Ã\×UIi¹¬†Ê4›á;$}ˆD ét‚-¢†(—U£ÍÞ^°7‡ŽQFÌŒïIg¼«–(£&ö®²CWbmkS©Tp||Œ££#ܹsgìöZ­†ÃÃÃTõ®ð·­™rö°RfÌóhµÔ –TbÂã*‰2hêò¢ZUYîÎŽú?‡IRF8Ž³Ø‚KAC921aÝÛãÊÀ f Ië`0@¿ß÷·¯©T*3WNÂÌadDµ¥ Z­Ñ„•('bUè%R†¸®»Ø0Hî½J9bY–ZwCÇQ¤‰° Ùâ&Ü2Ñï÷Q«Õ’>'Ù³ÏgÛê¯Óaï*eZä–6†1:,ضYy§Ìs綉Ƹ³ªŸ÷ê9WQ/sèÓwËdt´¬KwÚ£ {õÉÚÚñêW¯ç¹ÚÚ1”¯Û¸:ææÕåÖÕ}eͬâ„÷ÅÞ{û½øìû>»‚7-Y®ër¡ÚjÒÁ4ÖÏyÚ¤™;a¤ßïãààNJVgŒìa­VU…¾ÓQ-7¬ÌSFMÜ·¯\mŒaŒSÆÍì]m6çߥނJŠ¢~®:PI’ ‘,#Hà,Éjýêz=¡sBÏa#HТ’\ic’­wB·;W3 º2*ql H»W×w´×Ñ_OÎGê{’oQ½:Ϊv›Þ`\=gñê}Ø“ ÀÙÕcn^=FO¬«c×?&W{ÍûÚeAb+ÿ/k÷½:¿ÏüÄgðêðÕy>õTš«A†(‡d;›1ÒÁD„&¬i9ÌFæ|HRÍVʰȊ|£$¬,è)ã&6ÌèâTø=¨d®•lIâÖÂôžRA²$=—÷¯ãB%SúãÃ_¹òÕëØÑ2‚žÏ2‚DU„/GK£=›Òó)Ç%|lgWÏ)íºÞÕ9v¯n+G·äI¥”e©dÕq€‹5õ&Ô{4i¡U‰W‰‘$Á•øoѹº=ï:}UR½QHßÞB^ jyÍy·ë L™Úø8kÁ¥UoAC´BsMí𮂙Ã)Ãf&«‚# )dfÂZ©Tp||œôq®Vµ:_ë¬.U¡Ÿ•È…O%yšöòáÅ> Ì®Œ#îç§%«óÛ»:wéÉ•7ÙCOzr‹¶ÞRyÏdûÉŸ$¡‘íø¼:å²Úê#Îi¬˜v×:F·¶F'–€Š+}ÕðÂ3úë{ÚóHò)?QIn8I–†}»ée×ÑCÐøbB5Ö„ãXÿ>Ë9EÝ'< `äý¿zm¹Ox ’8ɻޣ ¿Nø}í"X<'fn“6«‡Õ2(µælÛìu¢L‹¬E˜™°öz½±ýU‡Ã!úýþÒ=ªý~€JŠ'Ý^(P*•VæqçØˆÞ«o[T1ÚÓ)ÄˡÛÃIK8¹_¤·4ã‹ïjðªÀ¿‹ñ¼úˆEE5– t$W³îŽ‹8qµ]Ç,ážwi\‘!åUŒ¯¼ZÆøöḎç.cvò.·éïiø²NÊ&;ôl°°îŸX©‘ÏiV…ßÁhc QLëŽmÇqæ[8Û|ЊmºŽÂd•–±Ðà矧§§³[¿'è÷ûhµZ €R©„n·ëiƒZ­–ÿe2 Ýnw¡×Zš¾÷ݶ ÍÿŒ^7)iÙB©ˆï[ïþ×ÿkЋEzŸ8òF™”T‹81õÕ÷™õZqGPÌ1n›ˆqÛ¶U…Þ4UE]¯äÈž¨“O‚ÉC݉"lªüž{KÇæ™ïº¨E)ëBýÖl{})’¨£¸®‹r¹<=Yu]õÇ‘!‘mmP«Õà8^|ñEÔjµ‘…îܹƒR©äßw᧵ЇÅŠø¾³ ü/ŸW•ˆ¨éÚ6ÔðV&«´€MÅø\%¬{{@[[EN*Õá:”ô8ËÿÙOsÚTlϽ¥a¬^_jšÃ€·btáÆ›W@°P¢ü‹ï/L—DÅqœÙ=«¦ÌÕÞ$©MziYu›G[¦ÀÆÖ^¯‡ápˆÃÃCj±¦ýý} ôû} ôz=ìïïû·ïîîâ"Îâ1ë`ƒ•Š-ñ½U¹ùKß, vª""=Se÷“~·(‹6ã¶mãoüÆo''Àý«`•V~ѽɒÄJå#'syi36]~Ï•°®³wÕƒú}ð ~.¼†Ñ©÷1ú›áBýÖì@Õ‘V±8ã¤cs—~º’TÅqœé‹¡«™§í`4±ÜÁèZmí:i„±1:mGîãhÏik×Gţ܎Ðcl0~Wdãû°Ê"N…BÁ¿îáÇÔCþ¯©¯T*þÐ…òÀá-4—Å·¬ì® ˜í5•9Ç@°ðÀ¸¦…m*ÆÿóO|_yÿûƒ¡À«Ú³Þsž¿\·°¡…m²~2÷–6ë$s½ã6ðÈïŠüÞèÓNd䎜š,p'‹åéß_}Fêû¬ÿ¦ÙP¿u@°ð£¯,òHsIªnÛöôx—žÕyFèÛñé»ýÐcj‘u1ô…'m¨¤U~”û†ø“uäÿ@ðÐãÝÒR?Æ÷ˆ'¬áíoƒŽŽŽ°»»‹R©„^¯7ñ±ÃápäK¶vRFsÚX|7 Z·ïc4Am`ò°–Eæ%…l$Æ]ï~é%üñßÿûã·É¨ÛÞöÀŸ~‡º|ãߢ9l²~2×–6ë¶h§VT‚ÛBd껩/ %œ ìA}‹èµ¯žÇºúÿTåž «Í%‰:¸w•ŒN÷Y½«ú0t‰™²§os¶ê:¼µSBxÅðZ qêWç¥oi©Oi‘ó‹êœðß7Œî®ãuIb'¬þÿ‡ÃáØu"î8ÃáÐ_¼ißž ÏåÑ£G¿,¯¼ò >þñ£ßï¯nYÝ“råüüýèGñ†7¼am¯±êø€W_}0 »»»ªb°ÕZ'?Ü ä¶°¢øðòË/¯õ5ÖZ†ÿí¿ÿþ;¿ÝüÁñ;Êp`ËþÊ€ŸúŽ ÷…±¿ŽŽŽðÉO~o}ë[×òü›(¿ç¼N«î­,bt¥öE†Tbª'Ò+e h˜’$fÒðÿîÕó´ ’*Ù-c´K¶Q‚ôÜÉCЛ¼Š•õgò»ßïO\ÁwY›¬ƒÛ¶={8pœU°ËmœHr·‰Uô€†O7j K *Ž[W÷—Q‹œâV’zê;c\=FE“-úôéazâ]Fð]j‡ŽÉÔî&÷‘D^ÿNélà7~ã7ðë¿þëøã<×Û53a½~ýº?–]·Ì–6ý~wîÜA¡PÀÝ»wdžL2mií÷¼ç=øÀ>àéV‚=R¹$-‰÷îÝ[Ëó¯#¾àÉ'Ÿý!ÞÔ·b|¯TÚjÇÇÇ‘ Š«²‰2üÿõ€Èu)å‡»í¨½†«P?à ¼5qëÖ­µ”á›(¿çÚÒÆ¶ËRs¹×A*Ãi"[ˆE Oy¹€ªXß@PÑv¡’ÝFGé{TËÜB÷êöºv_ AÂì@õø60ºøŽl}& ì›­?‡$2|T^¿¬ƒ$>’pxÀqá¿õÊo­-YÝt<Ö‚K¶=OÛ}>Û&\·“!ÅeírY»o”p"Nrõç¿lí±²¼;á5 Ý.s‚¥ÑÈ ^çýÅ÷£\*Ã~÷¤ é£ÍLXK¥Òj“@­V µZ ·oß»íúõëÔ0ùrèÿߨ6¶b¿PZ­Æw é«lPîm"Æ'VpŠ.pcGíÁZ.=«)é°¢lÛDl˱Øöz÷_•d*«d>­ô‚:í™Õ;ôíÁ¦í…­?¾ 54L_Yæ2ꉨ •@œh×ùûG#è “ÖÃÈÐÎÿùóÿ3žÂSkyË6]wÆ”lsÚüÕðÜÐm·Šß9ý{0­£#\·ŒêkD±Ê€²ªÀÛWµ»mlu§µØTùí8Î| ëÙY¼û."/òúBO› 'ùáÑxQ#žÂ•ý*FG…”1òY|ùÅ/¯å°“¨£Ìl ™´÷ª$ñÜK›0GÂÚï÷ý±î•Jebë·ã83Ÿˆžÿ*=<<ÄÁÁ¿üv¡PÀ³Ï>»ùw‡!šS¦â›h›ˆqÏó¢+86®*ƒõdöë£\ÛTùýÎßù|÷w÷ì;¾ø)àMÿ©I°8•dËlºŽkþjµ=Š€ñIšX «,G]*•F&\¢P(À¶môû}¼ð 3ŸëððpæãJ¥‚^xÁÿb-3_vaá¥Ú‰bÈL|-h1>±‡Õ_-±¾Þa’´•6U~?zåüå~øGÿHÍÃŽ.i¸]þÚOªË2LVÜÕ‡Àâêú:Fç¤ÝÀèþ¨í«ÇÔ¯®“…XhklºŽbÛöìù«“„Þ¢­+a½sç Ã@·;ºF¥RA­VÃîî.Z­žþù•Íw /½=ÁÊpó£{~­ØRñM”ËÄx½^NXõzOZVY¥­³lùýŽŸüI|S£¡†Bîì¨áz<·¼ü5à›Ÿ>ú¯ÔuUŒîg* ¤žc¸‰Ñ9˜gW×W¡*CÍÇt F+äe80­Üªê(®ë¢^_ UDÃb1OWf&¬2$à#ùÈÔû}ä#Á?øAÖD•¬Ø§aG–F—áÜˈ(1cÃ=¨ 7W¦ð )—ÕʨÝîè*À€ÃŸÞ÷W‚ëÂ+ç†/ËÂ?]'u }lœÅSˆV Ö Á:Ù—Ô§åшÇgÝ¡ßï£T*%¡µZmdx°ÜþðáäÏI9ÁèæÕQ<¨–Ì›W—ïCøLV‰ˆ’ãatéü68t‘òÇPm¨…•Ü«€—Æö»GÑC…‰2dæâbí¶ú×Â.ûƒÊ–DDWfö° §@jÞ†yu¹õEá<("¢ôã\;Ê#ª!檞RP¼ª˜pñkÊ Ïó¢·´iø±×€_Ü>ÌnUŠ'Ö*Á·o߯ÁÁžyæìîîŽ$¥÷îÝÃùù¹¿Ô©bt({T‰ˆˆ().‚dÚ¿;`ƒ:å†ã8ø™¿þ×ÕÈYõÝÆÕê¿ÿ€s´i.±ÖJ¥‚»wïâôôGGGc·ïîîb?ùÕ‰ˆˆˆÒ,jˆ»l?C”®ë¢ð£?ª¶m:; Ö&øï‡Àß±€û÷“>DÊX + V¾}û6ýÍ„L&LDDDD1ÔÁ•°)7\×ÅáßÔë€lkãAÍÑþ¿ýÚΩÈ-9(¾Ø «XÕfÂDDDDD”/Žãà¿øìgUbê–Õ0ø2·­z\9˜æ4wÂJDDDDDåõ~_®TðV«<ºÒ;{ViA3·µ!"""""Š£ö¯þþãOü„|`~6“UZV"""""Zžëâ±?þc¼÷ÃNúH(G˜°Ñò<ÇïyІL´LX‰ˆˆˆˆhi¶çá?øƒj«&#飡¼`ÂJDDDDDKó<ßóÕïá¾Â´R\%˜ˆˆˆˆˆ–V¯×j¢aÂJDDDDD«a¸ŸôAPžpH0-Ï…Ìlh…˜°Ñò\p±%Z9 &""""¢å1Y¥5`+¥V""""""J%&¬DDDDDD”JLX‰ˆˆˆˆˆ(•˜°Q*1a%"""""¢TbÂJDDDDDD©Ä„•ˆˆˆˆˆˆR‰ +¥V""""""J%&¬DDDDDD”JLX‰ˆˆˆˆˆ(•˜°Q*1a%"""""¢TbÂJDDDDDD©Ä„•ˆˆˆˆˆˆR‰ +¥V""""""J%&¬DDDDDD”J©OXûý>ƒAÒ‡A´ŒoÊ;Æ8åã›òŒñMiq-é˜d0 Õj¡ßï Ã@·ÛMú°ˆV‚ñMyǧ4¢¥1¾)ïã”gŒoÊ3Æ7¥Q*Ö‡*•Š]¥Rá8zM¿ßÇùùyÒ‡Áó]ã;žmkÍÍÓù2ÆgËS™¶mçËøž-OåÙ¶/ã{¶<•gY9ßTÎaö¥‡( c×ÿþïÿ>~ù—Žã Z­&} k÷™Ï|Ÿýìg·¦Yåù~æ3ŸÁïýÞïá±ÇÃáááÆÏe‘ø€`ïz×»ðîw¿{ãǽiÿøÇ“>„Ìž¯ã8øƒ?øƒÄÎ…eøl,Ã÷éOŸýìgñÖ·¾5‘sY$¾Y~çÛ:Êï¤ÊB–ß³±ü^î¹~ï÷~oîò;• ëp8œxÛ£G"¿,•Jïÿûñä“Oâ}ï{_Ò§°v_øÂð…/|a+ÎuÕçû…/|ïz×»ðØc%r.‹Ä7|ÿ÷?¾ã;¾ßöm߆oû¶oKäØ7ék_ûnݺ•ôadò| …Þþö·'v.,Ãgc¾¸w¾óøìg?‹·½ím‰œË"ñÍò;ßÖQ~¿óïLä\X~ÏÆò{¹çz×»Þ5wùÊ„U†V*•"¯ê©§ðÔSO%}èD3-ßð÷þÞßKúÐ7ªV«%}™=ߤß;–á´NYŒo–ßùÆò›å7­W*ç°^¿~Àè°„Á`0µ2O”ŒoÊ;Æ8åã›òŒñMi”Ê„µT*¡V«Lðµm;;;IÑÒß”wŒqÊ3Æ7åã›Òè±ËËËˤ"J¿ßÇÁÁJ¥’?Éûøøxâü>¢,a|SÞ1Æ)Ïß”gŒoJ›Ô&¬€šøÝï÷$?¦ŸhÕß”wŒqÊ3Æ7åã›Ò$Õ +m¯TÎaÝ6½^oâmý~ßoáštû´}‘fݾi³ÎG¿_Η”I1ž·øŽsNrŸ¼œ/± Ÿt¿<œ/-ßYü¬—‰ï,ž/±üžt¿Ôï%%êáÇ—O=õÔØõ/½ôÒåÓO?}ùÔSO]>õÔS—O?ýôåK/½4ò¸}èCþí?ýÓ?=ö¼Ónß´Yç£{î¹ç.?üáÏu>i;_ DÅxÞâ;Î9 Æw¾° ÇÏEâ;‹Ÿõ2ñÅó%…å÷¸´–ßìaMÈ`0€mÛhµZ‘· V«Áq¼øâ‹¨Õj#÷½sçJ¥’{¿ßÇÑÑQìÛ7mÖùˆ^¯‡ÓÓÓ±ë³v¾4=ÆóßqÎ `|ç Ëp–áy¶L|gñ³^&¾³x¾ÛŽåwöÊo&¬ éõz#K†‡o‡8<< ìïïc0øÝí½^ûûûþí»»»¸¸¸€™·'q®ÓÎG ‡C´Ûm†1òø¬/)“bñ‰Ì}ÖËÄ7c;›X~g¯üfšÝÝ]û¤«T*cˇ?|ø€ ù¥RyŒŒŸuû¦Í:qçÎìîîú›V‡ï›•ó%eRŒç-¾ãœÀøÎ–á,ÃólÑøþÒ—¾äßG¿š?ëeâ›±M,¿³W~3aM¡B¡0²„ø`0ÀÑÑvwwQ*•¦Áp8œy{ÚÎÎÏÏ1 " ¬/M—·øŽsNŒïí’·gNºiñð•¯|eâãÒúY/ߌíüaù=*-çË„5ņÃ!ŽŽŽðÁ~µZ ·oßö¯ŸäÑ£G3oOÛù œžžú—£—Åó¥éòߓΉñ½½òã,ÃIYþ¬‰ï,Ÿ/MÇò;x\Î÷ZBïÍÐï÷qçÎ ܽ{w¬«}’R©4óö´Ïùù9 …lÛ†mÛè÷ûxøð¡ß”Åó¥éòßÓΉñ½òã,ÃI7)²úY/ß7nÜÈäùÒt,¿ÓW~³‡5¥Z­–?î< 2¾\ï† ~`̺=mçsëÖ­‘á aY<_š.oñ=íœßÛ)o1Î2œt“â!«Ÿõ¢ñýŽw¼#“çKÓ±üNßù²‡5…lÛÆ`0€ac×j5”J%Ôj5œŸŸûãÍmÛÆÎÎ̼=mç#B–ÂÖÇÒgé|iº¼Åwœsb|o—¼Å8ËpÒʼn‡,}ÖËÆwÖΗ¦cùÎò› k É2Óc·9Ž@ÒÁÁ¿\u¡PÀ³Ï>ëßoÖíi;ŸY²t¾4]Þâ;î9M“µó¥éòã,ÃI7+²öY³ü&Ëïqi8ßÇ.///yiiÃáÐĨîüY·gͶï¶Û¶Ï{ÛΗ¶ï3ß¶óÝfÛöYoÛùÒö}æIŸ/V""""""J%.ºDDDDDDD©Ä„•ˆˆˆˆˆˆR‰ +¥V""""""J%&¬DDDDDD”JLX‰ˆˆˆˆˆ(•˜°ÒJɦÁDyŧö±áîÝ»0 cm-<Ï?ÿ<Žq||Œ^xO<ñÎÏÏ×òZúåöíÛk{?)}’ŠñMÆ7ÀßV,Ã)ÏX~Sž±üÎ.ö°nØÎÎ … R© Ãðo¿~ýúÔ/‹ e˜dwwwbkÍîî®ÿš…Bׯ__Ëëôû}<ÿüó¸uëVn¾(ß21ž…øãÛ,©ø–ÛY†Ó:±ü¦ö±Í¿Y”9YŠo€1NóÙT|¯êµß4–ß”wYŠñ¼Å7ÖŒ) i)YÕëܾ}üàqtt´±¡p”]Y‹o€1Nñm*¾WùZŒoŠ‹å7å]Öb> endobj 2 0 obj << /Length 3 0 R /Filter /FlateDecode >> stream xœÌ½ËŽhËq%6çWœ¡=`1ßn )¸Í†ä=RS”ݺ’›`´¿ÞkEäÎÌÈ,ò²Îվࡄ[µ³öÎWäÊxÇÿ…ÿæð¿_ò?µ‡o÷Ã/܇Yž®Ÿðø¿ÿ¢|øŽ¡6yÆ_sK¹~ó)}‹EÞÅó~ácêøáõï¼ÇÏnýÀ¦øÅÿù‹Bï¿Çï%ÏÖÿáwßþþÿÇŸ×ã¿Ý|ÍßBÌLA~úGù©ÎòÃ?üâÏþ0–ãáÐ~ÿ‹ÿü_ðGÿõáÛ@ÿ7šJÊ-Æoÿï/¼ûö?íûo_êÓøaMì^BÄü«kZÇ£ø£sηøÖZA1¦˜ý3ûèý!ìS–E·OêG©@n?ÇhÿæÉÙÿ¯~í¿ùòAøíßc4ÚÕïÿĨr ÞçRCÅ«9´kl! Žü»ß.pùeð_ÿöÛ¾ýçÿ) ÛÿùÛùöÛÿð‹ÿ[™ù÷Õ9´¼k=}¥¯tõõÖÉñÆf>°ðòÉÝgS@«½÷æëË€äAF­»®~·–»÷î£6ÿIï³áý=u1 ×’ß\lvhÛôûúbK÷ÛbÛÞ¯ÅþqLð]´ÐÞÂ߆ˆ ­^–`àÁ—Çtâ]ý.–˜¿q'<äŸÔK á#¶˜£÷åêË9:ñؾÛGøþ¥ûpò/Ôo¿\=nCÙÿ¢öíR$à|¤ŒÄ¯ü'ëHþê׿ùë¿ùÛ_ýÝ?ÿáw׈ö åO®@Í^.§ØJú–:®g_k)¹ßK@|ÿeÒ¾ÿå÷¿ûᇯ¼¥X÷Gá¡aLX†“° %9\Bø@Eó¸@ÑJûBýp¥ùî“ÜbË E¬hÂ’âιª5fa$>Y¸Ú\ˆÃàrêØyiëødgw´Ö+ØïæzÍQÚðI'má#ô–z—¶ÆMCw­°óÇÊG¾œÿ¨Š<üé£Ö„rë‡of|³bh¾°Ÿ\È]ÚðMŽ%v^·ø9xÌ °É㓼W3Ùí‚s› è¥k¾Ò_åsü襂Ïê2’§À5Õžñkôò.íÊ¡¤LV"aY8BÈ~¬˜OVª­tœr6á“JÀÖâÏ?°=&m “Ê j Ìë#5—}’ÕÆb‘ЖÑ;ÈÃŒ-{fÄ7¹C…\Mìè›#&0…ä?896(/1óå: Ÿœ+†?¬ø\qQßãÁdw«q&Šç m ûÆ ª1È7SIÞÍËø&‡ 6sík¦û“¹Ì#:œ“^eC ¾õЫÇüD"7úÁþú‡^ù-ß± ­ËØ >†åü{y›–H^l«ø v¦ÐÈv #ŒLÚòÇ -/ßLd –6|“[Ó2ưÄX¬’¢Œg§M‚ Òyi±I8;a,Ç™ 朼´áO'Áò›tY+žö,âÅ#9g'pYp? -t0 iÃ'ës ᓹb§°Ì”þpv:†‚Eä{€™MêÒ†orkp²å›«â]hÒÖ>ž#î ë€}ãJÓ?ú$X=¾Ñ+®iÃ''Áâ“ ’Šîô=|r,†‰qû±š#ë\¬” ³öpò>䓵ó–Á|0Ð&‡ b•hŽ ,ý¥„Ó3_AÇö»ŒÓ’CrIÚ@t“bÎOx+È7qxÊ¢X쨳{¡$¹ÿù)ˆéMöúÏæ2üÿÓm`sYÀà¾q`‰3q¸n܆ÑÑæêÛçŽ1»äoÜ®ØTÜX$Œ·“ ûPR‹áÆíŠMm8%ÁŸ¸Í÷z]äX.Ü†à–±á¹×~â6‡‰¯PØ£ZCŸ®Å ¶qƒ Á4éA6° ¤Àv¦j½`»á› ½œØ lãæIÔÁT½ v؆´Aæ=*ŽØ&߉‹ÐÝ/ØÆPðK®þØ.ŠV ºlçúA¹ ].ÜF[B§¥´|áv鼘ëé‚mt0Èø“¶KÄ/É5¯˜b`dá"9È·s‘KºW/ÜæíÒoE×ÃàvÆå,ˆYhp»$lýÚRƒÛ`ë@Hä ú…ÛW\Á ^òÛ™ÄUqZs¸p\AÅŽ6ï/ØFæÝAøñ‚íÔɤº6î±¶±6`ÌÐwÔæ¦uPnÔ µ±Ü[èjjs$eJrâ,jc }KàjJ?Q›lF~®} Ú ÝíÍ€6F äĶD6F@J¼@ŒºÄÎÉ5nA›„†ofÒî Ú `rC/ÐN„¶VƒÞs´Á|UØê“ZÐÆX Ýdï„ÙÐÆÙ8)á˨í'j·oñ[ø¿Ü<µ˜pÄþ\À ”b€K¹ùmÐ> |YàÆnžçw7.q[Ź<›mØ;p)¥Ü wû-à(+wl€»§†»)“køíNQ2”àv~rW‡ †c÷ pã‹Ä] pw\<‘º*¿]„6‚Ê÷¸_8; V/àÕ*ŽÌ Ü8„?Øw#8Aî ¸±[`÷›twãBθ[oàæ±Ôtá6ˆÖ‘ª{`›Œ{"ÇRnàÆ…Œ—b7¿ å~‡®£À]Ëø_O%ÀÍpg àûûÍpõ ³ðõn€,ö”o†À}ð¼Æ/à.ÛñzêpWG8n€¾›á.$pD‚n·An'ØPܪ!ÜÈÍÕ)8cÅ_ÈÈ‘‚Àªÿ º¸ž8–Å@7ÖÓŽä[èÆ}†õ…*`èÆRƒ~0YN ÝJÌD«vc7è' Äë;vc‡°’ÀKÅ`‹Ý8LL,íÆî +Ln¿°w?$3ÀR¹ 3Èð) v ·è‚П~Ñ"7~)˜ó p;œþ a£Nnã†üà ²„¯·û©À .ÅáטóÜ`Wq20)×Näærl¥ÈÍíq$Óèë‰ÜÜe­*¶Y–›'°Ø€]u¸=‡ÂcÇm¼æ:6ènpïáÒÁn–|á¶w`R†ÃbÒî3@‚ibMiv)梪E!6ïân§Ò·WR6~"æ§æEUýÿHS€€˜ ™°uÕË¥B«ë#@@Î "A½h¢89ˆD$Ó$Ÿ„8™¦üËdU|CDÆT_€© %ïÕÃ/ïAöÖ)Tu ,ãXjFOI#~FŸIÛÇè]@R;ühÀ¨]ä¢Ô¿,IÄßɺñ {! W1^¦~øP*ʪ«4Y¿ì-N•Õ³3~8¢°ø/dwø€BZUmö»_N &ÑD,zNj¦(Ée0- QYéÔs8ü$âu(ôtQfý¦*†ÄNœãáà:¤Ú ¬ðG î¾ë0V«fÑUäxø‚Ä`iKT`ã5Ò(‡™ºÃ¥bN»Ä¨×)Ô‹zi3~é#CRŠHOç9¦n…S˜§…m‡_@vñѺgê$s€t³èÈänÅè¼w÷E÷K~j ©¢×T>Cm‚vi©\°Í‹47œrE=Ûd ÝW5êØ¾ò–¨~œý¶ƒhb€O]Td¶Aô´ùf’ÆÜ­@°ˆàt·\! Ð¨¦”¸ƒ#¡PoÒ.ÜÆ’°yNÅí‚{8¶áðda»Ñ†A´/Ø’9¬ju5ž°]é2b©¢ö´°‹§*µÄ„da›nŠØÙVD“da;œNÙÉzÁ6n*aZÿ¶!ÒòÅq›ئñ;†å¸q›&š Ô.º·s§þ·D±‘[ض.¶y¼\¦CW»`ãîàÔê ¶ÕÀtQ/ØæEŒõí^ïoÛTâDaòÛ™xr¨º£¶iî»Ý›¨,l'j •©þ‚íØa‚ÃÛ Û ¶&ÔrÁ6†‚#UÕÇ¢67’5¦•/ÔF[pè-ÊefQ;Qߎ{ Éa´¨-ÖqàŽÚÕ,j'…“{Õ¢vm 8±ÔÎ À>#(ŠÔÆb6Q´Š•È¢6ºk8[ôz¡6ØŒ‰Ckj'rfû(çÊ¢vä½A’¾\¨Å( ^ûÔ¦¹4W›N¯¨çd£ž2Ä—íKr{*‹{!Çs"·§} {œÃÅp-±Ó@PåRvä& L ä.†›S ¬B½nZ0inðíf¸+­ÿ¡ ®Í"7ÐÄM¿Ú‹å/Ÿ„iŠÖ°ÜMì=¤‡tAw#¿ VKmŠËÍ£Ú0»Yì¯X@b1);n°¢ %xíoÇnò« PÒÝ´nâ¤ÑCîÂnÐ&¸{'ÒÒ‰Ýx¯ç(8|Ærã:¾µOXnÙJ›• 7»Ñ©èÊþìN¼š°²c9-ÏMë` XÑp7ûã-2®»Ás7žNÀ·Š'–çæ9¨M-&»±üŸírþžk Žæ Þb©~eÝ6Þ‘®àè’^Õ¼Ù†QÌxGñé¦*ð&ÿŸôØ]àÉ÷ø:¼@ x3èÍuÜñîæ¹ñIœ´ÜUu{ðÜ\M,¿^ÿÏÝa“kþæ¹;¯cÕÞuðÜàRR+Êe,7=ø{©^vÔ²Ü:¶]Öòà¸ËENll»9O7ʃã¦w/DË$¼ÙÁqƒ«Æu —ÏÉo‡p"Ä zðÛ¸˜èñj¿ùíòAãج8îD¡,Ððô5†[-LàÞ‡§nèýø“,_´0}Ö÷J¿1ÖÄ^饪%¥5tZÇ£×Í;5¨1…Q#ÏÊÚG¯ÁLYÝ>yÛÂdæ«›nž|‡…)á²,A¤sñíðØT¨W"ƒ}üK²1­³:Aá•íkaƒ„÷Îê6 •ëè]2fÓïëfé~3{ØÞ_¶1í{ú³Ø˜Ø¡YlÓïë‹-Ýo‹m{ÿº)vº.Eaó_³1ÙNÞ²1™^^±1%²÷©A<£FùO™Ð½12ý´ed*&¿WZÝS…H~ 9hzËðQÿ” >•žlb ψ‹«FáýJص•ÔÑræI<œK4ÚÊN[Kðª‚.Q}*A”üf +Õˆü(qWVRb #¹A¬hd¯®ŠÒ±fj¼¤áa* Eê+”$•-Wm+.·T…^²ÑU2² âq 2£8Têin„”(^c%«C¥"åÌ€Õ*ç©”]àí`ö+ÝeI;¥‡R°”ôÉ+Y)FY)Érª¯"dBŒÀqâC¥–}j„V©Æž>šRˆhÞKUJ•u|Kg×vy·Z!üi RiFW? $×’E†+M]*U‚¨bi­ˆ­¨t£« y iŠ–£@žôS‚@„4ª½›´=ALJÑ´TÊ‘±Šï<祻V‰ab dŸ.¾ú•räC­âóí,zðê²2s v\úóÆžê<„Ita•‚äC­¢ê+3Þ«†CYééü_eK«Øk”\©,ÄÄüTÃ.îb˜ ãˆ]¨Éh+±Ý µr^ß3ÚJÊ»> + Cì¶²b6¦š­¶s \PiÚ%^9¸ÔqŠñ¦–]âAl*ÜZŒ¶²£ošau÷Š cŠÜõ–ªXkÝe^2¶‘$¢ü­u—y©'Ë“*!…ïÚJZpHbãË5ÓA3)U¼gdºPÛwNY\áÔŽTK%ÕŒXØŽr&^Ô‘¶i ¦‚ ÷|Áv”ý€< J ÛòPLq!Á§ ~Ð’ØžwئÇ3&µéÄŒQÔ¨/Ú˜àfäNÁþc:'p‹®Õ‹Ü·¯´(42ž'p ”æÞo¹ã¶‘z ‰ú/Ü&“èV˜†n†|‚J¼Ëbã´À°![ç.Ž›!R ñÖ4£‹n¦"Cã¥| îJ}7NþÃUŽÛ‰§[ÔÀh Üt0Üต ¸Á50h-øÎ;pƒáºéBnÚòy¼J¹XnÐ 6#󎹠›¯e,ÊXiÝdR°/]ô|¹ckÅ?¨\È Vc¤†4_È öŒw.&.äNFe\ÊíBîHÏœµ(&ŽÜ4ùJ Œ2C¹ñ(c•AXä&¯×1/µå[ä&‡‚‘fŸû…Ü1 Ìcp!7£Ù‹¤á†¹ñM´†£ž…nºÝ$:é8wì¦eªâÆíŠ¥» 1$©ÅÍb7ƒÔF›11eŒ ÂoU´1ØMG8¬ð¼C7f膑näXhϼt#7Ú°³`ýÝÜ¢P"ªyÐ 7õµàø! rÓ:¶@/ƒÜäAøAsKXäLÝ>.äÔÓxºbô ºCäÅšV¹Ç@·ØÝôй ›!…XŒ ÆnnzQáäþ²‘©‰­_BH{Ô¬ò£hxÙü¢™éÓÞßPüyü°Mí•np†¦ãÑëVžNùךŽG¯ÁLYVÝ>yÛÐdæ+ýÛ'ßahb¹øa§.z„7 M8íÌèW)|ɨõ¶¡iÖ ¯$ uóð¼|Z·ùäŸÃÔÄõÃôûºõCºß¬¶÷—MMÛ®âúü9lMÒ£YnÓñû‰o‡½f®·íþëÖ&HX»L«õéYšÒ.SS !Wô!§r“©+ ØA±Ìt&Ã]ò¯ûuHmi—«äÇA2_Eþá4&Õn÷à6ùlæQi+–¶dåßiº$qŽë=ôÈ¿œ3÷FцvÈ„Kþ¥u¬I™ŒßÄ_p  blçp—ø[pt€IN4!wñWäd »JSڤ߆MbZ61ÈtúÊ/ÿJñïˆßÞ¥ßJq·å :™Žƒc¤_&r]â¤:Ó".é7}vbñ·3Wц£ám:¸åLŸtÇ”¦2ªì&¹2r©z1¯Mþ¥°Úp&«ÈÍ@ˆ%ÿFÑñ§ M;¹àÝ%ØHœzöA.»ü+©pšzÇcQ7ñ7ò c;²ny«{î¥J¦2] Kµ{ÇG†(àËâD tÞ½ã™Ë5Úy¥­nâ/9™I=ú”RE ’0¾.§ð-ƒÓÛ‘î >¼`;0á¡Ëšýî€m‰Nâ-”/ØføäûRåè[Øÿ‚N"ólÓ6Ñ%Zqt‡m¿b{±Í¢ZÚa;xšî[ñ“¶™k ·$»8Øæ{…úôz‚¶88ú¿ŠYÙ€6¿ÈX»PE¹oP›¡ó1K 6¡†…ÄŠ'jó.ó…ÛPnÔ¦W fWz;Q;8fBÅ&h¢'‹ÚRb§¯ƒ®¥Am¦Œ%'MWiQ»‹CÆc޶°ÍD|Ø-\éÂmÆJ€M”.Üîžóy²íYÜæ8CV☸ÍL^Ìê$m™ÅíÚŽ@ Q)YØf2@^×´˜¶éÉÚbvá ¶o*5õ^¸Ý’õðʶ¸Ý¨þ˜Èn îŸ4ˆÍ7 p˜OìB¯¸É¤ÈÌFén0ùÅ1Ó›Ówóà FnH ÛÔJg𼪯µ°ÍÞ˜3ÞëbØ®t–ÉÀ6a†,ló@tæ Ôý1°-æEtØÄJm`»[Ètmñl׌ÅÓØ  ¶™†«R5BÏâ6>⨶ô!]¸]Œ%Ój·Á`5–QHýÆíJµy5§·K㪃©Ðüp\†K߇š¿&ÞÙät@wä/̲}sÜLæ¦ÖŽzC7.äe(=8n'¹6Á0…›ãÜàïsA·—µu ~¾ Ûñ¬á”úØ.Ž›i °œÔ Ã9бdÛµ 7^#Ÿ”¿7 7-\ ÙÞúÍo3Âtr†áŒ",öU Ý ŒÑk>Ëoã—Ø@œAÌ5¹Åë‡>eíd·™ý°UæÅn7»;Nq¨]ÀM£³¬øgÃn3üµboÜ.~ó¿³°]$¬ºÐnv»J¾ºÒÕÙ̶$Ы©iμÛ<ß Ý$×ov»Qd}2l[ܦî›ìTT°ì6Ãå=É']¸Í¼Lç¯~»2®ÏÅ,t`q›Þi íTä ·¸MÝ(« Þàv¡’“V˲ÅíTè8á’Ò¸ÅíL¥'dͬlq;û“‘Ù"7³qƆƒq6 wÅÕOâÒþ rg/Ém[Th³ 7˜jÏä(¾^ÈŠÜ/MÛ r'Þ‚À§›An ›…ìUUf0ÈM‰ Û›ÇÔ rg²m[’ÃÍq‰iònk‘;‰ãDÉýwpÜ´Þ‘WR™Á w"wÉ,Ÿri Ž›ÑǼfY3~ý3ªÐ$0¹œ]þ–s&'ÜšõRü¸oC#óï~ó¿ýíkÀ8U€ééÙ¿‡ê OMŒ}!ñBˆuÉÞ45vþ<Å`ͽ¨áÏó¯ÿŸßýÓ¿ûÍÿúõ¹~Õ}nŸ³wt[é´l”sÒ¦í;f]Éí1Æ›)åÿجÿýÿõûßýÓwMù‹¾'fÖt6Á¬2½{ÏYïmß1ëÆ{Ç­°¬Ð›õüßó$ýÓæÌ¨t°)û~Íyoû;²§þfmÔIRDtþ¨Å=ÙþfuÏOûG­Ÿ[òœÝ+ý0C)¦!÷öLíxöº)׋S[ŒÂ8ÎAØg?à öYëÒÛGo[”í”ÇÌ£Ÿ\é³âÿŽë¼ia¦[ÚG«€uªÊÿ‚,Ìû)ž€ñʆ>ñú)63ÊB%éÉ£ùb—¶à§éùg0|fkh>úÛÒ¼ï­ïRÿ23Ÿ÷‹K.â“YrÓóûK®IÏ·%·ýÝØ\J—r¦z¯ôçÑË[ÆfÛÍ+Æf–"üH Â/½âÿ´µ9í¶æy¯´¦“ZI½óR«ìЛTÎ$‹fAVͱªcuh”Šj®å¤ÇUÆpÁÓ&ˆtR5"{5¬^ÝzSÅ&“¤õ‰”'D§Ö\Ïõ‚Fc» tr£ä‡wm7^0W]+•K…wï0 Ð9c­Ú¸[/˜8?£QãW̹0ø—ù‰RAc’Ü> .°] “*¦ðZÓG-ZlT¼)¨ɫ.ˆöBwU}Ä›‹^3 ýã“ÖÑæ1 ¦E±Ö{=ÄËŒ‘Jh£|§g-„ÍŽ!ý'F‰ic•?L#¡3LÊ4X•E ?Æ¢G±V´Ì…MgŒ)LN‡Ü_ªÎ$T=Ãä[Í{}“J' (1ö‚JÄ\¬#F^/þÐX ÖÀ–¼©z /Viy%µDÑQÄjEÚCÊT`Cj‰nP+U-þ!e;˜£!h / Ñv“F­•žÌúÙ¼ëÆèkÜHÌé¥"ÍCÉ¢kg5¿Q7Eªß=”,®÷$õö’njÇjü(äW{Ðñ”ݬÁ›ðDìy‰ÍxHYÂ^˜g¶%­ÎÆÒdÈjLu¨»†Œ wªÈÜ +®×CÊ’i‹%”\ÓÅ£òi’2+êOã®$+Ìqƒó›œ¨íZ24‚¶~E‰Žú§IÊ…ÑÇ ô¨u¿7±‡Ô`GRŽªc¢~{‹©¡ÇsxRq2i¢T˜PR.¢”J¬yF?ÇIÊ¢Iy­¤qו•¼;ƒ{Qá/ýn`Oì›hÊi!–?¤ŒÆœ9‘ëénS&)£OuŸ ã›e?ç‡_Äkå8o¬¯û’ò§Xh3ˆ WŸ`= É}õTrô8RÑPO•&•‹ÿê¼# Ø'PEµ ÊÉ7ÔÇæ$ÙµO°ÞƒÌcÁ&~‚õDÄTY·$~‚õ^ô¾ÈŸ`½k|ºÜ¡^¼¤èÜu"Ô·½@Œ…zZ‚°8´ñÔÓB–3U­7Ô'fMp4+|õ¬Zʰe-®j¡žþ†­ÐÔÚo¨§mç¾j ¯êñY,^ Ï=°C=Óñ1pÙ=€½C}—Ò†­hÀÇõE€p3»Vw¨oÔËžÒãràÖµQÊÊ"=£¨W{Â|ÿà–Ä0.óLMqÏk9íçq²m|‡ÅùÆ` ®ß¸‘ ÎKléc!´0ßè„UºfR>P¯ ÅVö ÊÑÌêòŒët@aïâòDu½?Ayœ¢ZcaíåYG‰QN©}‚ò4WFM·x£|c±Oôþ\Ë;Ê3þ…ßOµÝ(‹—§Ün”¯’~”©êò¼ìpáFyÚ˜*jg<@¾²Þ2ë)ù[g¢JH!Uk'Èã¬:Ûùò#­ví‹Q8_Wèž8Y›\ìî7ÎÊÑ]cmœç¹ RÁûzOÇM†.:½”ÖG«-èéZƒêó`Y++ûj.Uó’6ŸæJu²0/‚óÚ“l`žtŸl¯9ù,Ì‹PŸØŒ ÌK#3¦o–¾‰0ÞâœgŸ58Î Þ,}ãUžZ²Îc €Jo¿Xz-ô裪Î-Î3 ”EoÝøªÁylp xõÁ=p>EúeæöG_$Ý|¦dwüØNcVÁ嫤ßÇå2Í <ð´$"DÜ@¾HÎØÈD7Ê»P¶ÖÁé”3Ef$¿^B8#ë[~ÂÏ㨀q,Ã}ìz¦ýlÓ ñzä¶ZËgü<‹+ã3ä¯. /-ÊÏæéé=‹û¡¤ºõ0xÓÿÇkþÛé5ó¤Ön¤Ïô1åº±Ì =Ýd)–xµÂX¤Ot óèsì¢AzM,âùôIòÒJ=Õ é á¼ÚÀé1X³ñôé#ó!EÍ©sà|¦Yy/‚óÌhËV­`~}jôü~’@/!® sTÚ²@Ÿ¤²'v§L[–$a­¹ô¯™¦• ËÅ–¤]’SÇŸÔŸ’­úÓK6²OzE9æñÃ6·7úa TÚ€êšØñ(‹ T²3¿4„¶ÕHC°ÞÂ>e]öãQ[Q^?Ã*Œ!ØGŸ áG•¶@djqìé÷èžEV&ðJþ·¶ˆáqÁä ëæúzÝ"6I{¡Ã+¥Î"¼}f÷ùh¢K©¿û.1äìнÙa¼g_a¼g_›áN+nÁÛ{{(º»ÃWgï¾·‡Òá=Ã}Ã$Þ®© @ :ÿB|cFòöoh°³drØ®û6òOØëÂOë%€ãgbOŽö÷’~R/2Û‡g*yf®ÿã½Ó‹¸y0·n¿tLâAæ¾~ûåf‡\CÙÿÂ;\Ø©1–E„ú*>×ôg¾üÇ~YðŸ¬Cù«_ÿæ¯ÿæoõwÿü‡ß]cÚ]Oþä0 /jÈr4Q½MóHmŸ”þÛ³²þ×ßÿî½(YSêž<e ²áG¥û½2 +«Ña¯tŸ˜D78$Rñ^é>1ü‰¥º²Ägú½Ò}dN'Uib¯tŸ¨þ‡Õ ÔTºgù”$9©D‘g*ÝGHstìqš”ÒTº´,e¬~™b¯tiO(Xƒ,šSé>JF±áN+¾ï•îSLÿóí¨rO·º¦w-/gªÜÇÜé]Þ†®d¯r³ða´oªÜGÖ r{'ª"Sç«ÄZ !ªºÔÔ¹Y"/fw{ûÈè œÊám ÝG)ÏsºgG1.mÛ ÝGz2»>wg/tÏoF•Iå½½Ð=€Ó9[k´K¡ûHɰO½’)tÏ=G5ã¤)tω1·‰vÀºçØAGuøú›B÷‘)F3“fé{{¡û(ÒvO#š8ì…îeεͽ٠ݳÍa•›B÷l‹k%×)t¯ïUÿ¬ãVêž³K$ä&&RSêžÃôJÈ2…½Ô=W30G—¦ 4¥îyB6¯A+x™R÷QŠÇøœ4dr/u/—(æ96ÈÔº,+é]ÖM0¥îY3>ƒ€bS„)ucÙ'nJÝó“‘µîKÒrö[ލ(ʉJý™¾·åˆâ7=–wœoSêžm´¢Œ€CSêž¿ÖÚ8ǦÔ=ßÓ£/(J>’Ámf õë1¸Í0ëMÊ1¸Í”¼¹ONÚàvÝX¡·™@¡¦¦Â-n³Æ“-«jq›9ª ›Ámºtlœ­Ámzt„ÅÓànâ¤P³&F>€;‰sâÃï[àf¹ÁWžÀÝý ¸Y¸B3~„ ¸«Ÿ‡ïÄm:”%^XÜfÁÅuÓ%ùÝû"bã°fújò>:›8E_f¥ƒØQ25ƒ‡Óå7ˆÁº‡Ô±z ¯<¼*Ä&•&‹gÕ"¥bõA\C*4j‘N'€8%F£)üòR,ìj¦õ-3BÚªEè§|6eìÆ\ÐZ5À6Û6™×vbµO5#Ì‘CûT3‚ýÙ„C3̵b4#gʶ`F3 HeÖg±x?ÆÁƒXÀféî¾6ó^a?l ĸøÛàÃw^[ü'|}”;Êk³Øh7¾ÅkÜêŽgfÜm¯3#.B0˜oîš ƒÙx/+™‡ ³™!®ÌÍp ³ëÕ,ÆØ`v`Eáu?Ì]0ËÏhÚ!;H…Yÿ©b„ù›6qÒ`6ým—ðn1›9¡¦@l![\|(· ÉÙÔ‰âì§žýÙ´kbïÆ¼-d35ʺœ-dfìrÏ…d!;ÐéU|pãÙ!J:þ_ ds,›"Æ@6ÙµÅDXÌ&K–'îYÌF[Ú4#d5Æò®´E¨Œ8ª¶/þ¤¦Sñ~Ñ•ã³Þ_±Kõǧ`Íí~DüÀ¿Ì@Øñèu? Ș§+Çñèõ!˜)ë²^wå0S;ï~²+GèR£IBÁ^öäˆz–‹tåÿËqäX'vaÃ+X† /:Œló)Ü+÷Ò»P4œL‡ï9ôÇ‘Ãtø¢@9l‡¯Íp§•­ê{{(º»ÃWgï¾·‡Òá=Ã/9r€ƒ >Ä=ú5GÛÉ[޶—·9L/ßçÈñ#nà”©ˆvŒ,þ1/ ¿{IüËÿxÏK‚Ú%ë%¡‰.©À¥AƸIä%îP[èñŠ&·Ò%·Ó¢z° mÈàb8zÄ+ZŠZŸfÑ8,•uØÓF#ö­)^‰¹">V$*Q77 c$Ha×PÖrñ1Õ¤°»I0Ïí´€Ðz7C@baJD7‡BawÊW T˜ÊŸDA{ŠWÙ¨½¨k™áòÚ2«§´{J0àÔ>¤d<%°(uµe£€üKý•Z¹—KÀÄßù“¦­K¬ù5å«”w™?•­š–˜ã!×=O*[5-ŽÓ÷©_LE’“ ùŠÎ ìÒÀ¶zxJdðSµ :ož8!—êô”` CÕÙ¥f<%èô4=XR3ž¬Ó建ާf<%3_‡I(uã(‘vߋԣD@×kèÝ8Jñ‹F­ìŒ£DÚUÙG ºYMÅivÆQ"±ºèC”ÙïŽÌcýL.{ã'ÁÀ“©´È~+¥Åõñ5‡­Š½ 65rÆCg­OÕ óüÌø¾—˜UÕy9.Åãè©TW-ùâReäh<$¨ÊðujN[-j‹RŶ©"§ÃCÂ-#DNÆC‚Gt:Gå¼UÑâ7kžˆ³ñÚb15ç­Š–xdTÿ(úåÆŽŒiûšúö‹.X{ˆ%ñëkª%Óc×°`-Åâ¦ÞÙ‚u 'V{̨¬±O›^×€5¶lÒ¬q”©äcÙÁÄ#DR4cœkþºC/·ƒ5»[*EƒÕjˆKJÆ`5Yr§? »ƒuò,Ë—û8¬9Í?jë$â t“ñ‘ Z¶Ç~aÀšµÖ§=Á€5 VÌðZ5Ñ‚5 E¾…¡~6`M ˆyº§Y°îŒqÝÚ6°æ{®Oc¶ëΔ•ó„°VkkëŽ÷ÔS"îZQ ÖL-¼·ëv«¹Ekz.ÿhS-\3'fUÍÂuw»×†…ëÖv¶ÄÂ5Ù‹>­®ÕŽÉŒëÁÂ5=ò¼½ \7zb…ÇtfñZr^„š-\‹ŸÁ\M ÚÌy²îJ Ú¾úÚM”«eø´ZЦ/A›FYƒÚâE1½ï,j3à}™œ-jÓ̵-ŠAí&YqŸƒeQ»%©º]‹$æ°¨ÍäðRàx ³q•È»JØ¢6Ë3שJÔnaï}ÑðfÁÌZæìHš'xǸÛö-x;:$—‡§Ù±›'ˆ 샥;v‹ÿÒ†`†Ñf:›þGmœ÷LØü¦ÁnW$Îsü £]Š°Ñ® ÓVûùÍh³(F~öÇ2Ú %ÆÆŽ[ÜrÚÅøËœ6‰ª>.܆Ó.uÅÉi3/Ìò¨¶œ6]ªëºí §-îÑésN[Šu7œ-§MSåC§Í1OËi羇ÅnÊtP·Œ6ch—ãÓÀn<ŒË]Ôb7_X>„»É‰,´±Ø¥pø” vK= ´±ØM)(MëÍÁjó\Çß`7³|ÓGg0f–Õæ~ç:,W«MŽ)?.–Õ&©ÍƒuðÚ}÷2ùaŸÝ+µ~ÚߎG¯¿zʧýíxôúÌ”uÝG¯ÛßÌ”uǣﱿÇtf¦3XÙþÆTê¸éàÒ¿dë{Ûþ¶Ní‡wjOÎ3ôö©Ýf” œtèî_³Þ4ñº:|Ïz#Þ3|Ï·QKg±¤÷MpÚ£»{|wŽñžã‹ ©¥Ç{Ž_²Âáutký53ÜÑË[v¸£›· q¶›W,qØgùŒvÿ±tÁ–)î_ÿð^ \“„aSK …èT¿$In×íÒÔ3J[Ú*"ÑÜ6Mxl«{l…IMÙ¨†Ÿjß!-Í„W-§ÝÇນÖòV‰Zǰül[q›)ŽY§&­±pÓº î×­»²C—ó•V¦Õ­’äìZéë6M¡‹V®å[ݘ}j ],_9õ’­¹]KÀô33Ëjk[$Ê\[VÂÆšMË?× µ¾Š ‰fåTk}«D{ÊæŽÚX¯iÊ\Y :ù¨³\Ó²pˆäSl$O´¹ç¶]aÑÝVIl4+!X÷n÷Ïe£HøR€:ÒÊ„Ã!þ¹a×w–hšjÉw¼Ïü-»n[ÆÄê®&"¸ðUõèöP8P¨dl)ó/l¡p•qOÆ–ÎMSM@çvÿd±éÉíZü:Sû@O«ì‘&]š©c:3E-%†ÅïRÛ­g·+ ˜æ±8ôœv3 µG­Ò¥ïys…=$¯K¡ªÉœ0t†³ö’vÛUB½ÔÝ;—…í¦B¯Óµvyç&ÐëÔ†õš6AÚM”½Ö]Gw@g]¦]GŸh]¶¥]GÀ4²k(­ZõÉÝÚ»³:‚ ¡Ú–vß½ó;k2-Hl%AÕÛ›¹•—{ý[F¹¸óJu{·ÛSwYà¶)(-p3îp%\µÀ-ù£gzr Ü‘A¢™³ ¸÷`ÇlIÆ»¼< fS[œW®!ƒÙlÛÝ`6#Ô¶€,ƒÙT%çÙNM*³i‰Ù2óЖ¸íE ´i´r+û¯m1Å,TÛA›v¦-þ΀6[ð”m¶m¶+ Ú`ñÚcÄ· MwYÁ-h³™ªT‰fAm~e[µ ¶¾2.Ð×–…ä´™Öµ>žþ³±61=iÞ-d÷é`ẛœˆ®ñN\®+®»I†eñºõ-·¡Ák´lî÷¯™(rÅêX¼–¬™{Ò6ó o•l& ^é-`3Ü 7þ`É,`7¿›D,`3O.÷FO¶l¼ëÓ¦n»3öaä,`óœyÖ}3xMoÂ6,^3ób˜¾X¯kÙãH,^£myBX¸®T/Ïï®1äµ­ŠÖÌ ¹|^´Ç€­óQÖ'`'fÊóO銃Óv{ê5 Ø>ïIÆ-`;ìDÈ‚>^€í™°aÆœ¶ß}% j‚¤’±žŸƒÓλ ãà´Y-c¦•°¨MóùŠ×µœ¶$’Íë½ÓfùE77ݲÚ< +¦eµI,Û8 «Ý$Lîa- «ÍXÒ•¨×²Ú¤±akYmpýnÕ2±¨M®¿Mo.‹ÚL +áé Uµ«T£תí"„çæ· îÒ  MÃÚž,hÓ³"F-jÓ³ü~,l3jˆ>¼%,t£¿¸²c[èf­àl¡»Ð¼”†ÁBw12ƒÝ%n5D,tËÌãhYmH·yÆ!¼6cLfj ÝÉb ,t3·ÊÆ[^›³SX0ÈM+ôJEl‘;§ÝÏ 7ºÞÜh-rSÃ9ò¢ZèÆ'[X…núlm¼nJÛc bê–²Þb·fI}nÝôõZ©2‡fÎo õõŠ¢.Os˜ºš‰9Ž?Ž¢¢üƒW«Š~:‚wôŠyZäÖ_éÉ;µë±øÇ³×írvÖÏ ì³ï±ÌáÖ ®õeË\®1)*I–¹ýü.¬xexxÿüš9eÉ¥˜žŒ•ové>éòEËN~ t¶Ë7M;ù1Ñ]¾7Ëj|À“Âç/îåHpwwùî,ã'³|q/¥ËOfù%;]Ï ®Kï•=zyËNwtó–ÎvóŠ.}!9@Œø1C1ÓýË{U=¥r&éÌù ©Ã&ᤈ Rn+AÂ7â3Se„F£ñµe*cøEL`55úéî 8es\°±»"±q[FK4Šß]ëÒ6‚ß&"m‰š`HT©B–eÞ’!¾¢Ñè~ý. ­2jFÌ:hL’QGh§w•Hr”ÐÃÅT\„™lìÚ®I`¦–•»*Øò;¢‘vOˆ7>©~bEóy4nº„¶û £mW&´¸Ç¼¡±J^‚iú©•›Æ{Šëºæª¢Øò¢q×'0”n•”Dã®Pàþ¤&BMøGÀxȤr˜_ö/4&É®“o¢ú œ<ükÑ(JJ›Ë{ós’_G(YbæéåéñûáJûéÊäƒÆ]¯@½GÒT5lË»^NÑËa—QXþ!dZleCUÍÆ*9v„ùU7Sì1–oÓ,Ô°ËúhÜT 5ì¹ÓÐV)q+!sû¹òu×.T¦òœÑ$hÜÕ ¥Kèåp8Ec•JÈxsE\xOÉ}Ò1k-·d4¦£0ÑÊ[†Æ]ÅÀðÀem•BÏ}Ò1‡>ëc qW20åÑÌ„¦]ÉÀ¥Z&UPª“JJÇtï3ä àï'Ó=8ÌĆhÜõ û\õáp'Ì*4Rlxè¸lª7i¬RîBéX·Ÿx•¡¢}æˆz­èó#moþæi–œêñæ·ÂZ'ÌwcÌ—Ý}Â|ÛN˜Ç6Δ\æƒQŸ0ojL0_·Çå}ÞSóY”Oªì|bä,Ê‹Qqe3´(/óX®åiæÛ2üZ”§F|î×ó´¶qü}²Y/ƒ›i -ÂkÍì©W=žæïaÛ:¾34ÏÈŒ•…ù@xZú¶í°ÏϺi'žI,Wøìð¸¥ZŸ~CÂK=?ltÉ7Àw¿{ÝÏܘۅj¾47¶ ÚÊ,ßÚžÖÌ"¼Ü¶3Žÿ@x¾¸â„sÛ6ƒð­ì¹ „§u%(> žVÃBr@<ÿrÅÕßè%áùñLx¹j±[ˆçL–[ÏñÍxBO#Ìʨx`<˜Š•Ôù€ø 3kòñ¬3· ëÄWš¼c!^¬&yÝÄx±(Íœpo£;`€SGÊ æs¾Ï$°Ìóà†i¯9`žÞÛFZ˜÷t´š~ÌKɪ°]ÕVÝ*kœb›y-ΓvËdqž@°9¡Y çÙÌR;/E_`Ùyz,›ÈÁÎcäy%#<Øy.Èrx:Ùù¸éü<=ü ÃÏã«Öõq/[~¾§šíÁêlYõ´Ç9hy}hfxŦ`_La¹ìÑå^ q€=¨Ó—m ìKؽg°g¹%l9ОRvÙpР=³g¬pÒîñ—gôî©hk«0º…ûÂéU²ÑÀ})ö.7p_üÊpÁ}f gÎû`Ð^†3 —Ú3YÆ.²´Ï h:½Ê´§M Gk8 hOOÁ•iÿ@{p‘›Ï¢=5Eû¥fÐ[7Ó¢=Ut2„E{ÚVûtE8à>µ=·Í÷DM·¬…{N;ý\ìîY l[Õ•³ïÌì×͇àæp4XrœRÑêÕøiÀZõ§—L‡ŸôþЦsÌã‡mnoô¢æ‹¬kbÇ£²¢!^BÛŠµ!ØGïaŸ².»}ÜÛC3å1óè³!ü¨Ú p ²<1™šPÅ?¼Gÿ‰Öö'Z ÉGâ’r4ŠPvW¯ 'a/lx¥ÚÀƒ·Oì>Íù(Հ߅"pÂ&¥>ùÙïjy qÉT©vrô¾ÞßT·Þ[lép_lÛïÛ‹-¤»-¶éý^ì/^Â`IC’Xþ½AÖ»M:ßoi30­XÖÔ/õÇMSù§õ’¤Fx¤ÿÖí õ½x‹m¿ú5 [ÌV÷í·¿uúKGÕ89Ùúí—›Ñmeÿ‹N÷=pf‘tцÜ3æë˽¨ÿÉ:”¿úõoþúoþöW÷ÏøÝ5$`‡1¤?¹U\¿ð‚(Ë!7„5”öÉì5åþî÷¿{/”Δ˜/”µDN?KÌ3— ƒ»æ61%æ™F' ³.GÛ”˜gŒ‹ ²Œ–‘ßJÌão>B l+¼ú^a¾€ûápû ý—˜/.`ÓŠƒäÅOšóYôh¹4ˆߎóYjtWª‘¥ööVb>‹Ø’R×r¦Ä|¦ÎªjœÄ·£Ä<„"jBÁªkw{‰yÈH¾!œˆc*ÌcÐ]é#QÌ^b>W–|*ÙQ‚™ó¹Ò-» ¦*Sa>ÓzY3ÕtU˜g[jµ‚ ©0æ†F>⢬É^a>C¶‡ø†ÕÔ¡ìæÙ†5qQÍT˜ÏC[`ºR±™ óØHò Õ¬Åç·:™N·XÊRDQ§æù­Ô| M4»¦Â|ÆQn,^ªW…y~¬D ÓüvT˜—9uË·£Â<ßó\üTeì{…ùLË (!1ný¨0ϱ [ :ν¼ŒÁ,„¸ö ó2?ü5¿æù^[žÆ{‰y6¡_·»JÌ˲Ԍžƒ¶m%æez•j -ú¾—˜ÏXÛJŒUÖßÔ˜ç7A å“¶­ó\z¢kù·£Æq›Ë‚e n«ŸÀÍþŠª€ýî"›Á²@úÂÜÅS[Ÿ“c€wq€/¢ºnN7DÄx7¹{œ±,qp‰l êœOàæ81ø,4zwñ”ÿu;Nà.Žf·„«BŽº K/ˆ¸ã6—*bAý ®·Ù[lj˘ƉÛ\1òóžÁíâèÞÒ€êÂaÜ.®~¨)AÁrÃm2œò*7¸ÁmrX8I¾FÅÑ·¹`XÌŠå 'ns(tÃq¾”·‹@ Ýéô n˦£¯õêÙq›ãÄ9n Äw"wq‘Úx Ø rs¸"p¢?¡[–Ó~Tµ*r#òáêKþKÒ©ÿºÍ‚7î\ìFnÜà „ ¬Vó’ofðk™ö¥¼ýÎXðÆåÉÈÌXã…ÝYÀúA ‹ÝöUŒÎ+Ûf°›)æ þ´øüv³ Mo!ØÍLuŽ7ÊLìšø*ˆñ‚n½3"»+f ›Á}z`ÂÝ„ÙðÔ.èÆH< ?ˆþ€nqØbÙ Ý¡Jþ>Ÿ%¶ÏBw?‡’ƒÞ0º7¹xZèæXƒ)º —+މŽÅ@7æƒÛ¬<ì¿î xØ!wdŒv¾ë½«È%Ò6°µ¬´Õ’Hf¡öèEKÂOEòMá»2ÓµDíJÌž¢æ$þ¤fC¶¾éÄðYï¯X“úcM_s{£¦Z¢­žÔ2&v”Õ4¨-vù ŽI'j‹‹0ÓkT¥Am±•aãX‹ÆÔ„‹ôéT%¾£6]pº©\M'js(É3h®]¨-ö×$$ëDmS]S[9Q›‹ ÖE8µÅZÖKÙ ¶Ø¡f§–4ƒÚ'3ù:U¨‹\&.˜q¨/ÇÚZà+GqÑ<‘›µçðW­ÔºƒL“xØ™—V½ïÒ‰ÝôwÝ<} v“ 3ÿ8NðƒRêW1äX¦»§*|[‹7Ó]$C9-óédº kmäádyn:Gâ2ägyn´á\¹ÒÍsÉø€?Œñâ¹™kãêöbxnúiÆ Gýâ¹ ‹Qàþìb¥>xîLPcÁÈOxnÖäÅ‹^Ì Ïi„¨Ã‰ãà¹#6qư,w–š¯7ÓÁr³0)–³…›ãfföéc9î,™Ù%öqqÜ™S±Àþæ¸9°€ÅxÞ9nòWä›]½9î,¾Jô ëÇyôpuA Ç„:äšrqÜòÍJg³zqÜ,±#M¤/Ëq3‰lëÀ|aÐ,ÇÍ(Òåtd8n 0Í89t‹-ÓÇÆ?Ç-Ã=)\ØÍ6ˆ£äûÅqK} c¾n/åmFĪe¸©É%ŽËÎ0Ü•1 ¹æ4šv†›Ùžq_·6àyg¸yt âËpÓ±;O/ËpKcO[ùÝÜtŒׯ¾gn&&ÆZxuz»î,oÅÉoØa<Áw…ÍÒµIÕ€ +WíR-/[ß´8}ÖûÊ¿g?ls{¥ŸêV™GØñèus¨•ykk½>3e]vûè}‹“™òØyóè»,Nì¬E’ ½6›èKG¥"XƒødsZ'vaÃ+ØÂ† /žØm>e‘Ä»PdÌ ¶ß·Í ²k›Äôþ¶ÍißÔŸÅæ$î‹mû}{±…t·Å6½‡Í)²f„à ¥7_²9™N^³9Ù^^±9%2û©÷gþ°?itbËntúÃ{Ëè$^³O*.Å¿KÁS3·ä_–«€l¬^ûÔ°Ílq}mõ»ü ®ÑU™šÈ%,‚å"3È‚Îül »ü[Y²3<2•‚å XY+­h¤M»øK¼¼°âY)X>$›%ÇSh‹ê¨Æ]þ­ü¦®é²Ä]þ•´€q„Ö´‹¿•² €&i8kÚÅ_Ö U“¶íò/ƒžý´â'l³­ ÆÔwØ–©ãÜŒô·e9NaðñÄmÚ]Ñóc“Q©¬3sGüZþî¯Û›àÌ[".àÎ,pž$‡÷Ü$S'ÜM¿€¼lÀÉžÀMÊYiñnPpã#ìÎ7aõy »3À¡¤P‡‹ÅmNGh]g°ã6Ùž-ŠÒà6¹3r–ÑÀà¶0+#©ÁmÉS™òg`³ññJŒ‹êÿep›N^I2"»d9nÉŠûíì¸-ö¹q°/†›¹¿b˜²ÆŽÛd$KÍí–ᮼ&¤BŸOÜ&»ÄÄ&ƒž-Ã-Æ;'a'pg1Z …‹á.R]+¯+Ög˜,ÖÌt1Ü”ØòÌcnÚØ"ؽÈ,ÃÍ )°¤IS[†›(¡u;ãÅp3Àz¥° ·Mþaâ ÃM»ÎnUb6 ·$'‘¤-ñb¸I2ø¡ç›ßæ(cy\C,¿û~-¿”ž 8Ëp£qJÃkÁ2ÜtÇrÊI] 7c½Í.†[ÒÝ¥ê@õÝËnIbò÷ÃÅ$‡¥n7ÃMkx(§¾–ᦕ5@,â\an&®Æ¦²çÅpg&ñÁ4²ÎrܬTI;jŸËqKvØÔ;« -Vóh¬IñÅ‚êjqjâ±%‰ {4­ò£jyÙþ¦ÍéÓþßÐþ=ùaŸÝ+µ~ZŽG¯›|zʧÕéxôúÌ”uÝí£÷­NfÊ:û軬N™ú>Ÿ}¤à»V'b®GVÞí_1p½muZgv¡Ã;Eç zûÌn3Ê?‡ÝI:ÜM!¶ß·M!²o›)Äôþ¶ÝiÛV\¤?‡áI{Ü—Ûvüzv\¡Þm½M÷ßazÂ#²,(Ôúk¶'ÛËkƧ£›W¬Oô3·˜æGŒOm™žþõïÅ;µXwQ˜ÉàFH?Ž#Í?ËôÔèñ¨Ï[J»íÉSGδþÚVwÛU],ž ¼wïK‰3}¤n«¹‰ÂÕNrJÓVÞ„j2­E“¥¶â6Û}¯âS6©1oäòA&á@°ÐìL,å¹Daœ„ì!–7ñ½ju+mBƒïè8ËÙhÌ»4ùåC%‘š”§yŒÅS‚—ÙÖ¶º&’e&lm«jB]7ä¹GŸÒø•¥ÉtÅoYCšZ_5M¨=ž_ÍR­o%M(ÌS‹4â¼˯,Q¸îÅĺs»(Ü -øX“8 ³¨ó…¥Me8iÛÊ™ð›,Æé½(:“ÖLÁ4ш…'¤¶¼Ö{Tù‰jG(Zçžùï4jÌ—D;Ò]ŠÂD]~™=¤M–ÁSÿü¶J&|χøøWö¸2½†È7cÚmO”8ì îïÕV¦(LE~*Íi&ŸžÜn{¢ŒRYÛVy ÅÞÓYoe™žA8ªŸéy«`"¹q Ä<½.pòvÓS²°þOÐ÷ê& s‰±šQ >~…KwƒQ2ŸUk]/[ùêgj¤ÿ~жºÛž¨»©sÅXieénȦΔh½&ã{)xJ±½ÖÝöäè¬Ê,3B ÌW¼|/qkIè‘심,×Kêƒ<Ž¿,JÛª–0ÊNM`:¾-ák[!:€óîy)Ù}é¶,#a•¥º)T×aÚšÞHîWzâÀàÜ|1ÛÇ—lOn3cÐB«'nÓI?E!ƒÛTµ`EF‹ÛMçŸ@‹Û™3ÐynSÐg2" ÜL¤ãÊcàN\¶øXø,p'jäªÃ[õn‰é Oàæ09õ®% ps(· Fs·ØV FÜ{µÒHä¦*7 ûÿ‰Ü%2È0»áÀ»#·¼ÆÈ²¨SØ‘[^“ê±âÀ`›Qƽæäk'rSãìWâiƒÜÔTw±u)ÊîÈ-ñ\ý 3À-M’/Ÿ¸Í&¬îã3¯¸-)LÂ%qj0¸-—{iË nÓ⃥‹*ئþhï±í„m¶Z5äÂÀ6?økõÈ6°-f0Œœ¶®¶å›šÿ6°M+ ®ÓÚhø n [ã°gE¡fÇm¾Fë‰Û JölKwâW¢ÏÛ|-,#¤m1²x†¨(ì°MãŒ[&]Ûb[óŒ³QÔÛ`›Ÿ,èmĨØ{OÎ…Æ”¶Å^‚€¿«'n‹Ÿ~ÀA 'pó=“ÆÞ·æÔì†'pË8ñ‘aÊ3À-©i˜­e*Ùæ‡Wü—‚T¿n~:°›—g¥Oxn–Ç–;^Ø a«‚kvGæ(ŸvbƒÝät·è>‹ÝLTÇú?Z$ÔbwdDSß¶cw1,‘Æ^ì¦û˜³>‚²-ÓÍš’ ®É)ÊÄ~.¦›©WõÁtljåÔ´®–é–R”ÓËrÝ…aXþÉtvpÝ’÷‘^27×ö`gËuW±Ó<¹Ú®›ß­|¾¸î"T÷ä×´\7+)ên®›ÙØ5Øëæºñ T=ùÎu3 ^ŒO ëàºi±’‰²§–ëfùéiBÓ,Z®[ò?¸'ÈØrÝ’ÅW3|_\w‘@V˜o®›ˆ–ª…R-×M# v?kÞóƒëö¬=÷X ×Mû"¶î ÛÍÚèm°l7mh¸X'´ïl7cÇ7ðÝE¤¥0Ò³[¶›L+`|¿ð[h¡“âÍvË9¿Õ”† ÛMËF Þ¢žl7aO.Y»3l7# Á“{e;ØnI0-iïn¾Û1hò‰·Œ73hö™&á`¼¹Ôm\Ë–íô¤áªÜl70RíŠa²Ý™õP›Ô¸üÛ=*º<-@ê^¥å ãï•?xµZà§#xG˜§jM𕞼Sc‹¤“;ž½n’ª¹…õ{ç 쳟aû¬ÇâÛg쬟A˜gßeŒª8é!0.Sг¿hŒª,² é²Y,Aƨýô.¤xepxÿôš9eIC—ž||ovi Úžß·‘dk“²ý¿n”Ú7×w©§'…‡_\ruJÛ—Üöüú’kTû¶ä¦ÿï°KªF*ØÂJ%Ô[v)ÛËkv©£›WìRä 1ŠGrñ?b˜J»aê_Þ«Ôç(}>EËyC÷'ï %YÀ§hy‘¢ìÓ¯Þwga½{ÈZ" £ñPrú•¨Ï;ñ4Á6¬üä"Xe-xÆ$™A¤h9)‡ƒçxÓè9齨âPÀ£÷MÑÉŒ|¥$ÚN4Š–S»È<èNsŠP¯¹©:+Ã$ß4«6˜g“;1‰Wµ+«NI‚=ÂA ûø¤Æ44Ö6H9=õ9°V»ºÓS‡9jjmÊ{”½‰A1~Ÿ§4úT-—p®0Î-úÞô¬:Ù+Zð~×w2pvÌ«4«$ ‘’å̉¹áq–öžB©%Ë™ø†&®QS»Ê“åA]M=äиë<¥úO~*@àe®¸Q²\Œ†5CxªZœžé…Ÿ’匣‚ ™£ Z™=Ö,Å#Y-ž˜Ÿ“l!BÈ™‚ %AÍ«ˆF|ï!d¾É º0*«û´KÏxsdVÑaæ]|î´C²<–V7ó™6ËAÈÔ?3!^?+Yß!gæ6R\Ü%èNQ SÆ¢–M„¦®‹Í¤-Ú†¿}™Ã ¡ ^w!ã¡¡}~µîR4kÈ#Ù «äª×»€qa¥ÓèÎI®z½šÑæñÉv„Æ]Žî¤¹òøÅ¢q¤éÝë“LŠ·ö‡Žù&3áyJã&J3ŸKma”ËCÛ.K3ñ&„ߤմ@¬NRÖ )Óþ+U$¢èzÑÈBYƒ”¥Òô(…Æ]œnYRFµŸ²BÙBU4µ)±–ßz(™ã‘¼ôZ/°˜ÉCÈ´9³ˆ †á§ØÛ˜ûûòE}èW‹û]8µÖºŸáüV ñÄyz¯Î2>'ÎWÆ/øQ’îÀyÉøƒUŠ‘ Î3Ûàz¤y=pGÐ…‚æ æqÐN4Š`0O}ü–Bj7Ì'ócc>`ž%%®”â,Ì'¦Ó-O„æEk’Ó”tÌë…ÿ¤—9`>²ðÓ ¢=`žEZ‘èÉ èùbÎŽiZ¤S¶é¡¤§#˜Sßñé#c{Dûëo¤ ߘ•m¤gcNiÄÙHϪ¾q›‰Az‰òZ w!=ø¨4Ž9æ é%rI ƒ¤ éi1 ™9›Òô´œ Ò)íBz6’—u~Ü9;ÒKJAŸa€çŽôb÷S‚|!½Dй&AÖ'ÒKäV mÞ<;ÒË‹$˜Ë…ôʲ<á`è5Æ,€Œž[`zéÒW±@]H/µ.µ q¹^Bœ2.¥0nóVm¸8¯Äü^Ò³±­ÚUéùÕ¨×Òôb£F~Іz±÷pŸ è(5 ôWU¸…ʳ —°žÞŸlée°®³(n¿ ^Soú>"KfÝ1°²ªô}³¦ÔõaáîõL|,ùHãõMÞœo¬—¢¬8“ƒß7PÏ ˆÍ—‘=ø€zG9ÝêÑè¥÷à÷w¬÷L½­xÖ“§«3§Ÿ…z^±%ͬwêÉBÑÕh$W³P/&)ž×ŸêÅñ cT÷ õt‹J‰aVšV00ó‹ÙÙ¡žÌÅSÉà<ÝËÔ%"• çi•b`ŠKŸpôj¡‰,ÚQ.œïO7d` a,Þ¦Œ!Œ (0öî/”gôo‰™-ÊÓ3çâç™Ï­åÇÅäàçĶçü<({üÔ;øy†Þ'f.W€áç)µârFšƒŸg ê<«°üÂêl½¸½Òm/Tñ_èLŸá¶ü›Äð˜nŽ&8ôçvuÄ~õkÿî?ïoÿþÏêR™ƒäøò4 -5ΞiÌÇ×µçoü÷÷ÿåÛ?áÿ¿#§Ö÷ ÆÂ§Wj6 Lz5öùh†>))ú.¶|uÈG/vxϰ½ ø÷ ëk3Üiʼn•Krx¿·‡ìP÷ÐtøÞJ‡÷ ßÛC9t÷ × ¶1†5ü¹ØF–‘Ù¤[Ü|eŒÙ¾ß|hûˆŽIiZ£Gß°†ŸÔ „%ˆÜà‹ÖŸ\³—ôÓz)’ÿGfRþ¬Ûç—ôïå¿Pq‹,›èÈþÞ1¯zf†Žo•XG24³Ö\Wþ“u(õëßüõßüí¯þîŸÿð»kL;ÕüéÛ0{á˜úKà¨èð‰_Š)föÿýþÅbf¦†7}S[dVrU·ì5¼eÑ QHk ˜ÞôHÆkŒ)d¯áÝ(Z;©Î,m{ ï&U1$ϘÖ÷ÞŠx7/1V°ò¶ˆw“R!eõí(âÝhpƒX4g£)â]%G¡ó#+Í^Ä»öˆuh¢ÉøvTñ®tPŽñÉÖnÊxc¥¸‹?´©ã]©d¸7¦`êxWæ~ÙÞÛêxWÚv°*#ÔÈÔñæ'Kžþ±¦Žw•-iØ(‘iMïÚ˜ñiZpM%oÉ.«FXSÅ›ïlUoLïÚ˜ÅSú—UÞ«x³Íaµž¶½Š7ûÂYÛâªâ‰|@žC¹ªU¼™U¤'fàÌZ©{+«À¶´·mYn8ø,U€´Nú^Æ›ƒðcߎBÞ&<ƒß y³-§¯BÞ•š@&ïñ’@ÑTò–‰ÍH¦·L!j ê·£w·[ÿÄÓí…¼keÄYv£n)ä}µm…¼™¿¥tÇÌM!ïJŒD«Hw¦7ß ƒÓúß«Ž7™By`ÁÔñ–Éá´Ÿ¦Ž7G{sUíØ¦Ž7GÒ±^#«¤)äÍ‘LåèQÈ›mÕµYÿ{Ëq#M<"úWSÇ[?Ÿscêxs˜‡Jkì˜:Þ¦_%,ÓWM\zÍj1›ÊRô Ù´hîj 7dã| ¿÷xAv•Hç§°…l–¦¡% ˜³Í@‘>Ï€…l$¤âlœGÙEBœ×€Ù ‘|£ÐOÈfÌÊ~ÄwÈn+¦ÛW?ÙyVG)Ckh ›é%À"4%tƒØìγøa>»¥Â`®:êLÀÖ€œIx°d£ŒÌØX¥'šìÉlÎý1 ÍìtÔx€~mN›Å:Sþm^á)‹ö6ž Ý$ygã"6QÚg,>?{' ÍA€µîªí ÝBg˜ƒkñÂì&Q‡=X&ƒÙ àûăْ$Š)°ž˜S³—R2˜Ía2Ge×lg³›†2¶gwÐn4K,Ö¸ÄvÐnH“¥ÙA›Ca<ÿs¯ï Í-¥IeUÚä èµÐÕ|j@»)zîXÍÑm6ʬ[Hû X³ Q—©k%*÷Ü̬›vÎ…2`Ýlé?ÖÜìë¼v´n’w4Î `Gë&öb)ÛN´nb,© Zs7®J$P ¤û°­_‚ë/Z5-bI%-ÁTdƒ\ÑV?a²ùKœ¬“El¢ù:±[Ø™e‹ØhóôUã¢ìÔqèuu.À¦¡ Ò‹UŽl$¶ÅÑì€Í;ÀÍ­³€Í{%‡‡û°€-Ñüô>S¦×6ëð•qˆÍªˆ~2Ë;d3©†É:ÈÆ >ù°€²YáSKõù ²%Ri†bZÈ.åC+u*,ÈÆõ‘@èa —NÀ~<Ðk ;³6[|X( Ù™…©ú\2ÙY2µÏ»Q!›¥\—C™…lÜS½¬ÌNôˆOàÅlлµ§³-^’Q5]˜Yau‰i³1é螸i ÙÌ›Øën ›¡©yòq²O»ø8ÙL§Ë‹…lÌn¸¼[¸f1¯0¹. ×dYezùYØf8cÅiW§Û‘Ö†žü@ßµ#Óæò[ÔNá‘ë,bƒ%ažŽqe[ÄÆ¤êbI,b'fäŒy b³œy\·‡Al’MV´ˆÍjmŠÝ±ÓDħ|§ ¶ø¶Í‚ªoú¡ÄfÖ?‘XOÄf fFg©”l![¼*&†Ȧ¡cn–7È®=ï÷Ÿ…ìP÷CuèEHCK`õ"Þ0q³1Nf Ú3˜í¤Šä ƒÙÞ²y³¸S>®­·^d±V/RéÕP>ÃìÊåuZ½ƒui/íÖ‹Ð'´ºÉ½ˆ8n¤Éö½ˆÔ›â«Õ³[S7º¸Ä¥:¸«¡—Äv;Ý¿Y$Ýk{æ)—¨j¤ˆE¢÷¡ñ£š¶?![ÞãyÚvÕˆ /í™QСB3䯲ka¶qqÿ 'd3I­a—wÕßÛt]F7"ñ½ëú6ºFƒOšÀ¼éF¸a«ák¸É²KýÄm¾Çxú¡å³ºÆmoú]5B/´òˆ…F5B‡·¸Q£‡˜I?V5ÂÒ£8ƒ^­j„ƒ,ù¹¦­j„I˜ûd“¬j„E%1ñÁ ZÝß[ºA«‘bçóø[݋ԧ‡WµªNgúuªFÏ®¹(¾Ë¹„þ>jcùJµ…ñ'5¥²õMç’ÏzÅNÕ/‡5·7úIY}(è 0&fý žÙ]Î%æÑÏ03e]öãÑëÎ%fÊcçÝOv.¡·$Ûè\þ®o ƒqíàng½ŸÍµ„GTR2ñDûËt-Y˜±Ðé*aæ]X¶ù‰w–»ñ]0n {‡/º%ôǵÄvø.Üß3|ϵd§•­¦ä{{ÈuM‡ïí¡txÏð½=”CwÏð'¸–©FžÅü-×ÛÇ[®%¦—×\Kl/ßáZò#ŽàÜ5çBcý¿xÑo#öÃo£öa ãŒßFÛ#OåùeT‘Z5f&¹­>‘Ö…K~+KÔèy.žø¢ÄL”÷§^ÎElSxÔ ÉµDØí3)lj‰Ú+„iHÁ¸kH7a÷Òp”g’·rDt›€\T3˜¢è-†hÇ8Õ•,QÊÿÿ›»¢lÙQøŸÅ܃ XF’Ÿ$û?Q #©è›LÞÌpÏü½×Ün—E•T²­]G.IZ"@)¡QSÂÆÄ­Rb’ê3Ë’=J¡$õÓR)ICžæúص¨”¤‘ô£®Õ/¯ÜÝQT¶Äœò„.Dȳ¸»Éyå ]ˆ4Aónt è×»R*R¿Õج´ÈA Å¡]ïžpCzFGíÓ’¡J›é0°Ã­|aéÄAÈq.˜ðé©÷HA4´•JíUuKDÓ_¶ÊegÀ(l¤—")ƒ²3ÚW wdPv†VmÔW%¨‰²3:îê¢'j¢ô ´Ú4Ú¾&JÏ@ÕRYôK½bzìZѯ¥g€µ¹—ú\¯Ð„cÙEz‡&Dr­"kSoJÏ@êÆ½X”z‡&Dz‡\%¨™4 <øofoB„«©Žs»^ó–ŸñÈz‰ ZB"ó “Z(?£Èm6 ²–-Aã~ÆýæC¡Y¹èo:§}Ž|ÁVˆ5ÃÆB"\²°ÊôE-=ÏZ“§46p–+¹ˆ×œÑŒÖi:gm\fô7ƒs£qŒÒ|Y²/5Æ( µ¬”…‘Òðõ+×b- ¤¡–‘@?)W³¾ë+¢µfLTK¬!´†a©†\ëYÊÌ:D¸†˜ ³YÌÁ54%\ûÍS4 Úr&¸Ö|aË™àZõ çÆ®U×ȳþlÇí©ç›0á6”ï[`Û°9à6´¨ÑìI&ÜÆ˜ ÌR·‘¢ñ¸*@À­é]–à¤&pãC®·JúòÜäy=:¥h4ºÛR4d¡»®DÀ­×ªøb”¢Q¢LKÀÌù¿ÉlÜ8Í+ÀBÚR4ç‡ ¸»%tŽ)•ÁyKÑçf½Š ¸‘\²g¸qšI÷R½Ü8Í`BÀ­™$ëmÀ)tS#nã,©&ØÆIV-,½>`[o9ŒÍ¦~B°­ëÕùh‚m]ýYk’`%—LéÍ ØÖ ܦlÏ´ ‹ºtC§§WÖj=¨û1tOÛàù¤îÐ ]씹e·ˆFnùÏðÊcFnù‘ÝDä–ðþk¶]©Ð:a_ß±C÷_'æ÷&p ÚzKåÛí!‡3h‹´%°+öÜq¤­¥ë—Ťi7y–óÒö8ÒnôáH{ äÞ~’#íG¬GÚí«¹ØÎ!7R›=k{ ¹:¦I-Ã¶ÜÆËåo†mÜqôE›<¡vB«ó¶ž,FmÙP5ˆÞp{ÀÕÀâX·±rÝr™ãí‘éü(Þè k™™o÷ÓÍ9Þî5fìp¼5+)ÞF {ÎAsŒ·ew40ë¹¥áx[&[}%S¼ëàŠ+ÇÛøIKoñö€™ÖÒà9Þî°x,¥xÍ`S€ûo#¹Ü7Â[¼ w"‹±(Þ–-|Í–šÇñö¸b:ÇÛxà$ ]á=ÅÛ¸Òo•ûG¼Ýµ)†}âml,¿Òoã Ð@PŸ*Ž·1õÇrug¼=â•úeÕOÖúËW¢…è¤)ñ¯Ércô¤ê÷ÝÑOPˆkÿs;rœ–¼¯åœ}ô’[»ïkù^ÛøÑœMy^öí£ãªMù½óüÑïQýʼБ÷~¸¤\^%°§‡eà•R÷+x· 0Á†ô¯)û9h8<YCýàt4Â|_•gÑpJFtÀs’‘ðs†ç$#]Ÿ3<'ûŵò#²8ï!ðÜ=Ô~ÎðÜ=Ô‡îs†@öËh¹Ò³î—NÉ~|ŒS²å˜ìÇGùóe¿‚ý˜lo‰ÉŸÿ­û¡î$ê~ÿüo˜ÿO÷Cÿ%k ¥Ôò›'/XrGz¢£Õêžà‡hÞx½GY[Ku„4~VwY/Ÿ¨Tç=ë …õä)Äp ujun²…‰ ôê[¾xÑç,ªë©ñt°«rÙP³Ó[ù‰±îÊ: :µŒ]£çôÊÂó¾P*a6+r}j÷¾PÈž–EºxRí|hDìRC™+œdÍO³¼9G\疘˙G§Åe‰‡¯…)4:?1¨ ùiÝûB©Òê•pOüÄÐÖq‹xz”ñ^ã^ÇëÝíð°ûLN-?ƒHŠ;R|ϨÞjò V"þ %)dK*ÏÌ*moéò¾P8ù°«n‰J¶a[l5 ·q-Y½ ^eÖ®HR íõ£G~b ܺy6lü×jmêßdÕ÷íŽü„çøÄ@ù(iÄ*o¥SjàvSr 昸æÏ+²Ñs“íÕø ÿæ»E=ðßÿ÷ú£¤˜±,×ØÙ‘QJPÉr {—',ÃïÓä‡QSTs,àµDE0Ë¢·tþÖ]®Þ±ìtÀT'f,{mÛx ',W×4ÇÓbÂ2¹í ôíòüz¬ sÊ­¶bÈŠ1W­ÑZd+d#5»§ „æÎTô¯î¦Ïíºœ© Õxô™Š.O¾9]‘"S7ÃÅÑ tÄ‹fãÁ} U—3èZmë|ƒƒÓÇí›7ÈFaÔ‹“¿ÞRÊúÙ¨hMÆÊ1fÃÕÓó¾³xÛ³a`éÆÈŒÙÚKlGkôsWmFëÚcÉ£5jg^+·­‘dRfsÛ­1”]?$´VÏHoO@h­a³ULp=½&Í”àZ5Âl<^„k\Ó.ð.-‚k5yôB‚kÕ³¡LDkh„©ÔÕ̎зwRôÔú± UÔ |ÔºZ,ɘIjuù¬V#M@­’žÑ„ÓêÂÝ­Èp«Yëß—LÄi|ï•$£ñ WH«U§çXDÆPðû'†ÖÒìDÆ÷Š3ßÒ ¾ÒzšþÜH÷Ù€ùeÅ ¤!Þn6J Ó ™Ò 濤çOš6J ­?éº3µªŽ^E@=OÅ_訧àêß‹@­‚«§BR«RfÞ ÔŠˆ¥{ÌÍãúÁApÃj\Ͷv¬†‘¤¿a«+…[|MÅÀŒÕXGÏc CX-¸z+0V_%¦bã©vÛ]FìK 0[jÀ_Ë\“¿±%.–ó Ù õ»&ç0d'2 dÈÆ˜ÇNv.ËD—bØcI+`#!!¯'ˆl¤ŠÝVÈ6böfúØ}DÇg°<ÌMöæ6&^ñ¼Ø6ºK{ä…`eÏ­[^¾›ÚìaÍ!öø©RÍ|k·øÙ•¶áø×´yäÊñõàØ›âëÓ·»Å{ÃvŽõ²[€Ýc]Ü`óë…lRB· Vƒ»Øx|\ß!Ûf‹­e‡ìÝ+¶ØúÂÀ6#1¶Æž)À(ÅÖ(Îó-ë_XµËñµ|/ßp|=b©,Ç×›uS?ìàFºØp ±d˜`CHw_”_o0›ª‰qXo¯¨Koö˜Åm2ûíœá«é>Á#GºÒÔ½àb½&GŸý€'@9•/ØlÛIÄÏ~ä$â¬ß‹¿}v\äY¯“àÏ~.([|¸¡%¹vÇ>¨ ¶,QMo¹ÈîõùI]°]~Tò²¸ÑUû¯¨ Fq´:²@GšSU#˲ìBòí;K‡<©+Õ%n‡<üøf–¸j®¡½R듎ÞËY\ôyȃ÷rú­|sȳ³lßÌò¨„ÈD—?/òN,ǺÐn9¥òaŽ©„Ûaþ|•P¶Ú½— õë7dÂEÂkòš@Àµv ¤OôãM©fô'’íöƲ<…ëƒÜ‚ê•hE—£!›lÔ’62©—vg‚;ß’Þd¬¨£Qéy»§(ƒ‘Âeç;‹+õÀaàúMÆäWe T{}ͪ¼I Fcz,¼Ûa ì"‹!ûæ`$(ƒEm&*ÈSÇÊ`S_£ ÿ׈-J%`ŒD†|3¸É`$Ÿeã/7xÙäÈ ü,.ä¥%Ž9²8—Œ:ý,zvžCÆ"ÿ,¿òšÉ@Sg£G¦£vCüœH_ïÏ: aÐØ2ðÏ2œ]€0…NY 'y VŽ‚° ÝjðE§^e°¨ÅQ.×lWô Tõ†ÓÆož˜[»2\‘d©¶k§;úÊ`ä¢QÝéôº\†HF'\ sì‚ÔúuÙ"N—Ãz¶ #«:ÍU ‚Ë›¢H y $ƒxéƒü#¨Ùr¶WÆäW×*Æ{üb‹ÔÆèQ—ÁÈm€vrP›¶8™oÔ—¹ãu!>^ËXµðÐŒùê…+ ƒS° 6.)¼osmÒNßc-c|,“X ™µn<˜ŒE¢C¯Åš´×‰.eeóÜßV 2æRV ¾xoÓ;E®Ǽ ’î+r ½¬AåW×RÖYy¶ÖDn”'¯¥ŒÁâ ¥ 9üX7Ø Ü¡Ý·YDû‰ò#f˜o(?/yzÖüå; ÑŒ¦ÞP^Y®%aD=|H­áí϶‘Pº® ÅóÐCpÅñ¼2ãþ\p> endobj 8 0 obj << /Type /Font /Subtype /Type1 /BaseFont /Helvetica /Encoding /WinAnsiEncoding >> endobj 7 0 obj << /Type /Pages /Count 1 /Kids [5 0 R ] >> endobj 9 0 obj << /Type /Catalog /Pages 7 0 R /Lang (x-unknown) >> endobj 6 0 obj << /Font << /F1 8 0 R >> /ProcSet [/PDF /ImageB /ImageC /Text] >> endobj xref 0 10 0000000000 65535 f 0000000015 00000 n 0000000145 00000 n 0000034138 00000 n 0000034159 00000 n 0000034182 00000 n 0000034612 00000 n 0000034481 00000 n 0000034376 00000 n 0000034539 00000 n trailer << /Root 9 0 R /Info 1 0 R /ID [<04A2D892CAC4E9BFA6C9F6AA98A37E66> <04A2D892CAC4E9BFA6C9F6AA98A37E66>] /Size 10 >> startxref 34693 %%EOF blis-0.6.1/docs/graphs/large/l3_perf_skx_jc2ic13_nt26.png000066400000000000000000003170071360743507500230130ustar00rootroot00000000000000‰PNG  IHDR¬öEYa )iCCPiccxÚ•‘gP”‡†Ï÷}Û m—¥ÃÒ›T) HYz•^E–ÞY–"bCÄDiŠ AF¥H¬ˆb!((`A³HPb0Ѝ Ü¹3qîüÈóë™wÞ9çÌŠ*’*àû¹Ø³CBÃØð ‘¼Ìt®'ü#Æx ÿJtL&V Ÿ—Î ¹ •#H Ç€•”.@Γ€ÜfÜ_>̨¿|˜ü?@¢Å}ãQßø÷¨pù‚„ؘ\¶Z¬ '’ÃÎôs±g»98°}øi± É1ßü¯Êÿ€ &Wà–¾…Ÿ/`ÿßPcC##øûï|„5ø¿ÿ€oziœEìÀßYT5@÷é'gjÇD ºîñ²øÙe8‡ÏÁá+ñÍøNü ü(~ÿ@ °šs‚+!”HØJ(%%t®† S„E"‘(CÔ%Z½‰‘D±ˆXMK)Hq¥b¤öIµKH-IËIÛIÇHKwHJ–aË8É$É”é–y&‹“Õ‘õ•Í‘=&{Cv^Ž)g%Ç“+–;+÷D•ב÷“ß*B~P~QAQÁE!]¡ZáºÂ¼"KÑN1Q±Bñ²âœCÉF)A©BéŠÒ+¶$›ËNfW±ûÙ ÊòÊ®ÊYÊ ÊCÊË*š****ÏT)ªÕXÕ Õ>Õ5%5/µ|µ6µ'êduŽz¼úõõ% M`½Ý³šÒšnšyšmšZt-[­ ­F­‡ÚmŽv’öQíû:¨Ž©N¼N­Î=]T×L7A÷¨îðü‹5©k׌ëÑô¸zÙzmz“ú,}Oýýný7ja  ¾š&6>5’0r7*0ê5úÓXǘg\küp-}­óÚk{Ö¾5Ñ5‰19fòÈ”aêeº×´Ïô‹™¹߬ÝlÎ\Í<¼Î|œÃäøpJ9·,ðö;,.Z|²4³XžµüÃJÏ*ɪÕjv溘uM리U¬#­¬…6l››ã6B[eÛHÛFÛvªvÑvÍv3\mn"÷4÷½¡=ß¾Ó~ÉÁÒa›ÃUGÌÑűØqÈIÂ)ЩÆé¹³Šsœs›ó‚‹©ËV—«®xW׃®ãn n<··ws÷mîý4ž:ž|Ï^/ÔËÝë×Äzõõ©ë»½ÁÛÍû÷3MŸ ŸŸ} ¾>¾µ¾/ýŒüòýüþ›ý[ý?Ø”< Ô Ì ì  j Z v .†„l ¹*šÚF k[Üà´áð†épÓð¢ð±šs7ÞÞ$»)yӥ͢›#7Ÿ‹ÀGG´F¬DzG6F.F¹EÕE-ðxGx¯£í¢+¢çb¬cÊcfb­cËcgã¬ãÅÍÅÛÆWÆÏ'8$Ô$¼MtM¬O\JòN:™´šœÜ‘BJ‰H¹*‘š”ÚŸ¦˜–›6œ®›^”.̰Ì8œ±À÷à7g"™3{LAº`0K+kOÖd¶MvmöÇœ œs¹â¹©¹ƒ[t¶ìÛ2“çœ÷ÃVÜVÞÖ¾|åü]ù“Û¸Û¶#Û£¶÷íPÝQ¸cz§ËÎS»(»’výR`XP^ð~wðîÞB…Â…S{\ö´‰ñ‹Æ÷Zí­ÿ÷]ÂwCûÖî«Þ÷µ8ºøN‰aIeÉJ)¯ôÎ÷FßW}¿º?vÿP™YÙ±„©ÆÚuÈëPW»¢¸âýá͇oWšTÖ¡É:"¬ò¬ê©V«>P½R_3Zk_ÛQ'_·¯néhôÑ‘cvÇÚëêKê?O8þ¨Á¥¡«Q£±òáDö‰—MAM?p~hi–m.iþr2õ¤ð”ß©þó––VùÖ²6´-«mîtøéû?:þØÓ®×ÞÐÁê(9g²Î¼ú)â§±³gûÎqεŸW?_×Éè,îBº¶t-tÇw {B{†/¸_èëµêíüYÿç“•/Ö^’¼Tv™r¹ðòꕼ+‹WÓ¯Î_‹»6Õ·¹ïéõëû}û‡nxܸuÓùæõîÀ•[Ö·.Þ¶¼}áçN÷]³»]ƒ¦ƒ¿˜þÒ9d6ÔuÏü^Ï}‹û½Ãë†/ØŽ\{àøàæC·‡wG׎=>Š~4û8ùñÛ'ÙO–ŸîœÀO?{Vù\þyã¯Ú¿vÍ„—&'_ø¿x:Å›zý[æo+Ó…/é/+g”fZfg/Î9ÏݵáÕôëô×ËóE¿‹ÿ^÷FëÍù?ìþ\Y˜~Ë»úgé;™w'ß›¼ï[ôY|þ!åÃòRñG™§>q> |þ<³œ³B\©ú¢ý¥÷«Ç׉ՔÕÕÿB,¾;E^ cHRMz&€„ú€èu0ê`:˜pœºQ<bKGDÿÿÿ ½§“ oFFss0ˆÚÿR pHYsNNÆÊ/¥tIMEã$9·-ç vpAg’Zó!%Õ€IDATxÚìý{”#ù]ß¿gXgñ®ÝvÍ:6vÏSm`~°%¦lsJ6y`צ¡’L²ó{ ¥¸IoHàqÕ/ÃIÂe@•pÈ6i?ªÃ¥7ìóÀO…›µMœ«ÂæLU\b³í›Êfw´6κËÛƒÀúýñíOÝT’J·.Iýy£3#U©TRê[Ÿûç\·Ûí‚a†a†a†aæŒóEŸÃ0 Ã0 Ã0 ÃdÁ+Ã0 Ã0 Ã0 3—°ÁÊ0 Ã0 Ã0 ÃÌ%l°2 Ã0Ì\×-ú4†af&°ÁÊ0 Ã0 Œçy(—ËEŸsƱm¾ï} 3sXÖO6X†a†a˜‰p‡•xæLÀ²~ú|Ù¿ÿ÷ÿþß}g ðÐCá·û·áy^õªWá¹Ï}n¸Ý¶íðâ$ Žã@Q”p»ëº°m®ëB’$¼ìe/˜¦ EQÂcA€W½êUáñ<Ï÷û·‡Çu†ÉË ï·Í¶mAY–Ãã8Žƒ÷½ï}P…å›Yf%ûªªÂ²,|á _üÚ¯ýèv~â'~"q?`˜i0Hfû·_øÂðÜç>²,÷È©mÛ¼f3 Ã$².ËòÀµŸe{t8ÂZ0A`mm-¬?r'‘Úeš&,Ë ·U*8Žn·, •J%<ôñûÃL‹A2Û´œòšÍ,“Êzž÷³l¬#Ë2šÍ& R©àܹs¨V«DJ (íÀ²¬Ä#K‘a˜¢$ヶÂAWÚY¶™Eâ´e_Q†‘8ÃL‹a2Ë0ˤ²Î×Êôaƒµ`¨‘R½^ÇÑÑZ­ÇmÛ$i Ñª(J赡G£Ñ€a\»ÄÌ ƒd|Ð6@(ížçÁ²,AÀ‘#f¡8mÙ¯×ë0 ¸þ‰™:Ãd–a–…Ie¯•éÃkÁP#%2LeYÓHa¡¢í ®ªjÏkTøÍ0ó ´žkš˲ ëzÑ_…aF¢Ù—$ µZ –eñØfª “Y`xfÃ,“Êzž÷3£ÁkÁP]ÒÚÚÊå2ÖÖÖBEE–eÔëu”Ëe”Ëe”J¥DZ)&¦i¢T*…‚FÑ_‹aBÉø m9f8ºÊ,Eɾ¦iÐ4—ÌT&³’$Á4MŽ"1 Ϥ²žg}gFã\·Ûí} Â(ª$Ia:/5O’e¾ïC–å0•,n”Æ›,Qš0ÃÌY2žg›mÛ°, ív»è¯À0cÁ²Ï,ýd6®³ jNÃ0‹Â¤²>h}gFƒ Ö9Æ÷}¬­­¡ÕjAQø¾r¹ Ã08=’Yz(•¦T*A×õ°6a––}†a†‰à”à9&ž|îÜ9”J¥0•Œa–ÏópáÂȲÌ2Ïœ)Xö†a&‚#¬ Ã0 Ã0 Ã0Ì\ÂV†a†a†af.¹­è˜ï~÷»ñ+¿ò+xñ‹_\ô©ŒÌ§?ýiÜyç¸óÎ;‹>•‘yê©§p÷Ýw}#ó¹Ï}+++ØÙÙ)úTrsåÊÜqÇEŸÆX,²œ|îsŸ[Èuå©§žÂ»Þõ®¢O#7¼†Ã"_›/ùËñS?õSEŸJ.xý>}}ýþÿá?`}}½èSɯßŰÈ׿¨ë÷Ò¬wÝu^ûÚ×b{{»èS™\ºt EŸÊÈlmmaww·èÓ™ƒƒܸq£èÓ‰;î¸c!k`ñådו­­­¢Oa$x /†E¿6^¿OŸE_¿ÅXxý.ŠE¿6Gai ÖEæÒ¥K¸xñbѧ1ñ¹° ÓE•“E½.™Ó…×pf™YTYÔk’9]xý^ Ø`Ñ«Clnn} ̰¨r²ººŠÕÕÕ¢Oƒ™sx g–™E•^¿™<ðú½pÓ%†a†a†af.aƒ•a†a†a†™KØ`e†a†a†aæ6X†afD<ÏC¹\N×uÃý«Õ*Z­dYF(•JPŠ¢ýõ†afnà+Ã0 ÃLˆ,Ë¡á™ß÷Ã÷€$I¨×ë¹ßÏ0 Ã0gް2 Ã0Ìø¾ß“ú«ªjEÍÏ[[[ƒ¦iPUõLÍÔc†a˜¼°ÁÊ0 ÃÌ=¶mÃqœBÏ¡V« M×Íc¬­V žçÁq˜¦ ß÷Ñl69%˜a†ab°ÁÊ0 ÃÌ=º®C×õ¢O#,˨ÕjásÇq`Ûv®ó$ÃVUÕÐ@5M¶m£^¯ýÕ†afnàV†a†™žç…5©Ã‚–eõ¼.IRÑ_ƒa†aæ Ž°2 Ã0̸®‹sçÎ…ÏUUE£Ñ€çy™Û¨£0hš×u±¶¶¹A$öa†a† V†a†UUÑívGÞ½^¯Ã÷}ø¾I’¸v•a†a2`ƒ•a†a ">Ú†a†a˜^¸†•a†a†a†™KØ`e†a†a†aæ6X†a†a†a˜¹„ V†a†a†af.é1Xwvvpùòe(Š‚ìïïcgg§èód†a˜¹"T«U\¸pçÎC¥RçySý ÏóP.—Ó4Ãmôÿ,|ßG¹\†ã8=ÛÇA©T¹sç°¶¶Û¶‹þ9†a&“„ÁJêÆÆVWW+++ØßßÇÖÖVÑçÊ0 Ã0sC¹\†$I8::B·Û…®ë(—ËS5Zƒ 4›M4›MÔëu8ŽÏóÁÀϲmAô¬¾ï£Z­¢Ñh Ûí¢ÕjÁ²¬©Û Ã0 3 ±6N{{{ØÝÝÅÆÆˆyr«««¸|ù2±¾¾^ô93 Ã0L¡8ŽI’P«ÕÂ×TUE­VƒeYPU54(ƒ €®ëÐ4-|/E4éuÏóày|߇çyP%ql‚Æà!;ì›Í&J¥|ßÇçÐÿé¹$I¨×빎É0 Ã0§Mh°Þ¼y°±±Ñ³Óúú:Ö××q||<ÒÁ2wxxˆ•••0Š;êv†™úÉ7À2Î,>s'ß¾/ƒP@’’¯¹îà÷ȲxÄñ< Ëx‹ŸŒÊ4š¦Á4MȲ Û¶Ñl6!I*• $I‚,Ë0MÍf€ˆÒ’jY Ã@©T \ß÷{RUU…;à»yž~ž¦ip†a„µ5hšUUÃ×Î s'ß 3eXg–‰Ð`]YY‡ÿ'ŽÃˆk^:¶¶¶)FN†a$¢·–eåÞÎ0óB–|Óë,ãÌ¢3—òíûùŒÏQ VUÍ6X³Œã¬ã§$)ŒT꺵º®‡¦ªªðOޝª*Ǫªe9ÜXÕò½Ç¢(p]’$% VhµZð<ŽãÀ4Mø¾f³™i„/s)ß 3EXg–Ð`¥(êÖÖ®^½îÐétðÐCauuµ¯72N§ÓÁáá!öööz¶]»v «««xä‘Gp||ŒË—/cggÛÛÛ¹¶3‹OÕªÐût½WOœwÉ7À2Î,6s-ߪ*£’‘V;]º‹,Ë™5Ÿ®ëf}RÌÐ ‚ 48%IÝ”e9‘L)Åú€ó´mŠ¢„ §ïûa*pÜp¦s5M¶m£^¯þ{-s-ß 3Xg–•DÓ¥ÝÝ]¬¬¬àòåË888ÀÞÞî¿ÿ~t:Ü^–ƒƒìïï÷¼Þétppp€+W®ÝÍÍÍ0-jØvf9ð}@Ó„¡Z.Gµµì <@;ˆ l¨T†Nè½.#yô“o€eœY|X¾óCi¶q£5˜¦’ñmé”e9¬}­cLŸWàís²,‡ššÍfx¾tžY÷tiHôxÑaùf–ÖÁ™eå¶ø“••ìîîâðð0¬W]YY©ÑÒææ&677qppè,L5²ñc­¯¯£ÓéäÚÎ ‡ª ˜¦(åÒ4a@šfTòEå`µZ2sÎ÷ÅsE@1\·7hR.‹ýÛmñÜq„*Ëâ3TU|¶çEïuÝÞR´ ·¾XðWu7¾ù›oŒü;ô“o€eœY|X¾ó#IšÍ&*•Jh#ËÂø¤hf¼ŒJ’ÄóV+2Fk5-Tê÷~:?ÒÅ.]º4TÁ^Ãû£( TUÍL©•e9³FUQ”ÂD:·¢ÏcTÖ××qÏ=÷àùÏþÔŽÉë73/Ðú½Hò½Èë7sº\¼xq¬õ;Ñ%x{{;;;}=-“xMEmWWW‡nÆwÜ—½ìeì¹ÛIŠPº®ˆˆzžˆœŽ£ËY ¥ªQ:q«««¸çž{FÓ4ŒYËøwÞÉòÍäbcc/xÁ ¦zL^ÃGCÏѸ‰³7Í5œ×of^ õ{šNG^¿™ya\hÔŒ¦E$)»#/3Yò °Œ3ËËw>\×ÍìÒÁÒÏ4]dX¾™e‡upfÙH¬;;;ØÛÛÃææf8<øøø×®]Ãáá!®^½:ñ®¬¬ ¼†mg’œÌO¤ðº.¨EÂ2Î,3,ßÃñ}?Œ°Ú¶ ÇqÂN¼u¶m š4M ËoèýŠ¢ FóĘSå›YfX¾™E%4X;ööö`YVO ~UU±µµ…+W®pöQ­ŠUYŽ"ª4ûô,Ô AÓ4Fßý|ßç(ÌBB†K¿‘)€è= IËxA˜¦™x®ë:‚ €ïûp]¶m£ÙlÂó<”Ëe¨ª ß÷ašf8kµ\.C–eA˲Ðh4`J¥Rhà2 Ã0ÌY%4XoÞ¼ ™óâ666°¾¾Ž›7o²Á:hžDõ©ÄŒFèN˲ ëúD)s¦iÂq†*…ñz± ÂÈFÐu†aýÕ™3@õ¤£Y}‚6Û–eÁ9IŸ h\–SƶmX–hµZá5ÞûÞ÷ýSL÷ä‘…zòðØd´˜ŽIÁKûä½éçiô“cç=e× ×ºø¼SÛ¶CÃ÷]Çq ª*dY÷##–a†aÎ2=5¬Th~­ßlV¦t½7Š:ï)ÀA R©ÇA«ÕJl÷<®ë†ésY ¿ïû¨T*PU5TÐUUE¹\†¦i$ žç¡R©@Ó44 H’„r¹ EQ ª*‚ @­V×ù—O4ª‰aâAFÐ(úOçt]7”o0 £§)O{ÂÈ-—Ë0 µZ-TþdY†¦i°,+<í#Ë2$IB½^GµZ ÓòžÿüçãÛ¾íÛŠþY˜9aØØ.ß÷FºlÛF©T‚®ë¨Õj¨Õj¡qJ2iYEA»ÝF«Õ‚mÛ‰tR’MI’BG Q¯×áº.ÞüæA’ŽpÇïÃ{ßû÷a-xž~žçypžçá[¾å[ŠþY§ ¬Y´ÁÏ U<¥ÏóôcĤI’B£4‚PÎh]"Yaz9)ïe†a˜äV˲póæMÜÿýPŠ¢àþûïÇáá!vww‹>×3ëŠTàÓÆqœ0*”Õc¥ú à8ªÕj¨ÌSD©V«…Š|©T‚ªªh6›aúEW«Õjh 8Žƒf³ -ÃZ¯Õj ƒ6¹¢æ%t?üÃ?|ú?$3—T«U”Ëå°N2$ËårŒ;Žƒµµ5xžÊ/€Ðè4M¥R Š¢$d—3A T*amm –e…Ž–8A jÕ›Í&^ò’mXÖoâ}ï{5þõ¿~-¦)¶3 Ã0g‹„Áº¿¿k×®acc#L~øá‡qpp€«W¯bss³èók¨{¯a V2=Oìk"•/Õ{(q,Y/ªJQ›ìóŒšÃô3-Ë UŠ~’BO5rŠ¢„Ê<s¨V«‰×fÚT«U¨ªšé¡Ñ ®ëfFæIF$ä›Òx-Ë‚,Ëa„4þ~Ê 0 £G¾G<%ŠA¤d+J2"*IB¹§µ`¡4Þff€,Ëa„À™Hû•eqO#Ù/—£ë!}»ñ}qÍ´ÛÙÇ"§î‰¯ÍfdÌÖj_…ÕÕOýu†a˜S ¬a=>>ÆÎή\¹’¨WÝÝÝÅöö6®]»Æ‚‡@¥KÊ=ˆ›w½.nÆŠÒ;ŽÆóÄ ZÓÄ~£ŽW¤šÑx§Ê8ÔüˆÆÁımkkk‚­V«§Ãe»ÝF³ÙÄÑÑšÍfÒnÚí6«ÌÌ TÜôüK jÌY,—ˉ†J´]UÕžÆFB'L­Vƒ®ë™©¾ic•®_M‹œK®+ž«i$I(ðõúüwøfÆG–åžTßeöÅ=+~ï“$!Ûµšoº6aÖAµ*öuÄ«ðÃ?üî¾ûÓEu†aæ#¬‡‡‡8>>ÎóqåÊìïïóÖP T³ ¬­eïb¿¸—™RâºLµÝäG%92M³gvcµZ…¢(ac"j$C ’¨ùѰzQ†)’ïF£6ä¢uÇq`YV"ª*Ër"%“ê©ó8TÈè¤((½&Iâ:Wq=;N”%®2̲C'Õ\g‘¾({¨VË_çš•eôüçÿ î¸ã‹Eÿ Ã0̈t:·ãþ{¤÷œÏ»ã­[·Šþ~saD†(¡iȨ“Þ×¶Å ~c•"Nõz=œéÆNÊ>5@¢º=Ïó IZ­êõ:«L!ض=ptL\¾é¤ êTNŽwçm·Û¹ŒU:… ×¢® µÙº]àè(RÂUU¼Î0ó†çeßs¨$e¨ko¥õU > £Bý…ëQ™ù#+ó,ŽïûïUÔù}Pçy†9K¤&TÞÿþà3ŸyÁHÇ #¬4‡õÚµkØÞÞÆÊÊ €(U]„™$A näªݼ³RœúÕêP£ ßéN²œm¬Ú¶ŽuÉÂqœ° a,ËJ̰#uŠÉríQ¡€sã"&AÀó¼°”‚ ¦t¤Æk¨ãN×uÛÆ?Q†¢iI4¾Ü{ž¸^ãxm[\Ãdžš(Q_îÖË,¾ï‡ÝßÓP9”eYÐ4 ­a'ÝIQ¸® ßoãüƒ¯C»eøAJ5ê®ë&æÈ×ëõp §+éoY5í¾ïÃu]|ô£+øƒ?xUÑ?s† zGGÓÕ†Ù6£pÛááahœÒV×uqxxØÜÜ„ªªl¬ö¡ßhÓŒº‚V«b¿~út¼e–QkÛ64Mƒ$I°m;áÑ#e¾_*¯apg~Õ V|ÛŽ¢ªª*”ÚrY(Æ’$iYî5`}_t½Ó>ó™W„Ûi,_­V ѬQ}’$ lº–ž5 ˆkúé§-ú§cΪeÎL3Ê꺽÷Ï®^ýj¼éM7F:Öm{{{èt:ØØØ€ªªX___úކӤßÈš·HºÌ …þ˜MOÃ4òú•J¥Ä‚H)¿ýþ^‰m>éä1 ÊjÆMï 8lÓïŸOàœ˜Ü¨jÒÈ“$a´Æ³²Rÿ,+Z÷ö>‡ÿôŸþ ï{ßCDäéßü›÷à‡~H†ªFé“ú€°­¢(ðýÈ1dÑy5Q$ ®ÇjUì³i‰t‡™$›™UUá84M £­i#¶Z­Âq´ûÍo™jú5È):Œ ˆJSâÇc˜yÇó<”Ëåp.v<ŠŸ/IRxzžUUû:B Jã¥qeý³äòéÌçÎõF°l[\sñk×ó€‹?TôOË5}œQÅHd°¦'£@¥é×4MØ7†!dßó€{îùÜÈÇ¿RM]×Åþþ>(âJ,“mV¨.똅,gµ”¦B‹p¼Û)-¼Í¼în€`ZÞq÷ä˜ã <ãÒÂt V Â7§ø™¾ô[ÒNœxlò&“¬?úè£øš¯ÙÂÑÑ ñU_|þó—ñ»¿ûÿÃÇ>¼ñÇqBg A×”çE™ Ž“=&žA‘ÕAM²¼ñÁÒçh’b—¥ÃÆe‚nô4Ã3 Z:©©PúyJO~¶®Ó®ëB×õ„ÁJ¯éº¦¶O‹ ˆºíNò»Ò5I]ÙXefÍWe¢l×nÜø üò/_E£Ñ€ëº(•J0 #éç8Nb&|³Ù ³ÔFi.éy“ûiIHG°jµè¾HŸáyÀßøäô|fa ‰³€º´Ó%Ïz›¤OXµ*îIq•ÊÆ'’}]žÆÑ¬Q kÜCäº.°··ÇÆk4ž&Oɼk"5_J~NУ°“¡ª(JX·:.Dds‚zr¼q‘h†«sòÿI "¾&„ñjLv8f0ynâžçÁ0¾ ßýÝÄ×ý_ÀuµÐXt]7oþ|B¾äGîÀááï`}ýLSÁþè{ó“Ì®0·^ -yî!I½©’ IVNóÚZ&[Rò8²ÒUó,‘i/ö(^mJ ¦ZµtÄ&®˜—J¥0;êÇÒ{OH¾6IT5þ»R÷ß¼cff(âiFØœÌ0Æ»QÿÜÏý þôO¿ˆ'žh…i·º®Ã¶í02šŽ¸˜­gUº”…¢D4æl‡'5çÌ굤ª£¸yƒÌò¥KÕjbMž¶<Ð|xÛŽîs” ªªQ3ÊaØvv¹—,'Ï™î¿yt´ad޵QUW¯^Å#<‚ÍÍMâòåËa]ëY Zíïi0Mñ‡š¦nhYVO™išÐu=±àjšÏóP­V¡ëúh#h(º:ó&¡žÄP "«uLn¬ÂX„¡jÇΑ™:A‡äô«¿zùÈGð“?ùö÷¡ëÉÚë4š¦á'ò_áŸÿó·ãw~ç+Ÿ£iÉ@"uÕ^b;­ «ÅïpšÖ?üÈÌMÓP­V3»ñõ»^¯£Z­ß"*œQ‡^ š<  qO›§V~^͉Yâ鹺®£^¯Ã¶í±ýÈüÞóžWbcã&®/Y–Q«Õ ë:TUÍl²4 J‹_¢dPÈröȬc¤/qr¬Æ•|=_¾³jþ˜å¦TÊ6)ò>M¨ßH\Žãs¿±œqÈèM¿Fcþ²ÌºãÉa£¬‰ ¸ví\×Å¥K—ðÈ#œ©±6†‘m°’‡mL§`4³Ë÷}T*•DgI×u3½4cu,Ïä´¼ç2¦Ÿv;-ý„"­•)Ÿ’'ºjš&jµ~ã7~¿ñ?EiàÎ;¶m‡)”YõCT[ô ¿ðïzèY´\_8d¹÷‡È3Ÿ„™T¿:,rJã.¨ Œëº8wî\ø(—Ëáý…ê~hnj¹y©©ÖhÚòi9~*•ÊÄ3(+• vÂ,¤ßÄÓsi¤^\ï!ȸãúâÆœHûýìg/à§~êyøÀþÎÔÏ•®«¸ÒN—5•¢ òz÷£6qc–>C’¢mébÎ$ iƒµR‰zጠÙ+ih§ÏN¬Ã|…ªÚ›…äy½†ªi&í(ÓŒF ŽÅO<ѽï¾ûº÷Þ{o—h6›Ý{ï½·{ß}÷…ÛÞþö·wç™ëׯwzè¡©SQºÝv;ùš,w»­ÖtŽßjµº²,wëõz·ÛívkµZWÓ´n·ÛíªªÚm6›™ï;::êñ…NþÕ§ú3u»Z·Ûmù>ú-ÛÝnWžàZ'Ç‹cœ<2˜…¼Ìš·¼å-EŸBH­&ýh6›]UUÃçGGâÚ9::ê*ŠÒU%ǧ0“0Oò’‡E¼&§E»-®§ô²Þj‰ëF×»]ÃïØ³¦•º!Öëõ®,Ë]]ÿFsttÔ•$ià1M^íz•z½Þm·{ZóFß÷)Šê4´­VëêºÞ}á ÿ¯îúúÏweYîjš–ЋTµWG…´xEÇìw\M®ÊtÃèvã?Eúš§Ï]4yY´ëq^h6…­qb„ÆÔ¨‘QUqü8t‰Ë¡<†îM÷,B×£Ï:1cz®•Z­Û•$ñÚ8òrÞ0 ¬¯¯ã‘G Xª]}ä‘GðÎw¾ÛÛÛØÙÙßÌ_@*•Þ,;Ûîßùt¼ÏˆÒc„Sò¾÷«î7Ol(äÄôbÿQýªQƒ:ÎùÐo)C¤e NŽÇÀô"ÊL*¤ïGº6‰êGi Í4Ï, ¶=Z.Œã—d©4ý¼ fn¨T¢Úì4ŠEUO«[ä(P‡Vó$oß÷ý°«½›'w²Ô¨ÊuÝéÓÌ|à8LÓ å N¥R8'›’‰ì?Ññ·^¯ãMoº@G»ÝF£ÑHèE“F €äûã©¿ý¢@éèk:EãÜhÄíCŸ“®cM«r\¿:¿Ìb)’¤Þþ ÔøŸêAÇ…ÆjÆ!›†®’×8ô™A ÞŸý•¤dô4Þ\S’Äóôè@Ú>® u¾ÓéàêÕ«aºïññ1±¹¹Î^ÝÜÜÄññqOÚð2B€âYžžÒ`šffJd|àôÔ¡ûŤ͒€¤ai`¼Gé”âzμxè­Í•ÀëŒð<`mí³™Ê$)ªƒ.#Õ^ç.LÖ>϶“]¨PªÕjÔ…£Õ}…w<ø®wýË1C QL­Vÿ¦º^œ±ê8ÎÀZRÏó ë:‚ @¥RAµZE­V ¯ñô{+•|uÔ¼ŠR­™ù%˜¦‰oýÖ§ÐnUÂQQ­V‡Ž%"£µT*…Í“ˆNçv¼úÕw$ K2'5XëõäÒJ† |…Ô«!n Rê. RÓ]ÈÉèUÕèܨC* ^w4CÍϘùdPÇùqQ”ìæEt/˜DÆu=Y“šU«šUZB¥(Ô 6¾AÔü/nPÇýQi§NüõIfxŸßØØ S [ÙŒo?+Pkÿøâ4­æ.¾ïÃqœÌTªë˜‰BOJaàM‚ŽÙÌ:¥Ù¬ã¼¯ßùXƒ™©áû@£!Ȥ™¤óãTHw§™ljzÀ1"cµÙŒYîßµŠf«6£íú¾¸«:þŸ×¾vz¿)31Ô™‘°,ñ§ž§FGqÇAµZX‹êû>TUE½^‡¢(¡‘ ˆzݸñâyÞIMâðëˆF·éºÎëœC]¿þëïÄk^ó`eµm¾ïçv¾ëºŽF£‘˜3ˆ%->‚ƒ^£šÐqë=Ó†(‘6`ÓÙDqÀ0zp2z©Ž™^ÊûÍâ> óɰ&““7†iÄ ¿n: ª‰ñî½ñóék(ž ëIƒ5nàÆ¿l5MÜß²n“d¨ž?>>N¼@sXÏ¢‘‡þ¨”¹8ްog¦¸RwÔ±ÒzÇ%®»«˜Ü`•‘ŒhzFñ>ùè5"- øìgÅë{ïí|²"¬„dú_~a&ަEÁâQ”aÑÕSÁ²¦ã]2Mq¬f3r‰ç NËŠºÄU¢_¾e?(–f.4x振áóFµÝW¨‘Ìi«AÀuÝðaÛ6LÓìñô<¦i¢Ñh ìâêy^è\5 #al¨ªšx¯mÛá¬ÚAø¾–½È²Œ ¸cðœbY$I‚®ëPàŽ;V¡ª*LÓ„mÛ#—~¤ï”®(ËÉå“®#ê?Ô$&žºo¾N‘­¬¹Äq#5[Èjš¤ªÉýèÖ’Þ/ý=™ùšóOBº ªqÃ4aÍÓOѶsç’ÛŽÒté¸éhªaˆíi9¤È,ÌB QOéØû¦-ËçÃq5ÇÇÇp]‰(øâÅ‹ÓýôtÏô¼¤GÁ²F¯+ð}o»ñ6¸VòƒlÛg†*"QÂä5¬Vê9]yƒPiýƒå_Ø‘àùàʧF3¬û]Ô¾Üóo'üÂLœZM(«µZ – C¿ÌSÃuÅc9”š&r5éâ§ayyÎö£>ïY+w¿þñŽ3¾Áªªì¢ŸSZ­H $³À²¬0ºIN&UUáû~1ïèJûô#n°¦QU5aœº®‹z½>Ô`u]7Q*£ilÛF°m{èX fú¸®ÛÓÍ×÷}ضz½Φ÷<„3VÇqÊg¥×’x‘¢O©ŠÑ¹ ¯ó+—{ƒñ(m–Hï‘Õ •дh{µš=„|•Ü xq˜Öß+m|’lÄÓÓ)2 D2:ìÜt½·¶šê±éúH¬AO¥‰_ ñs#§NµÚ+ãõúôåûüÆÆ¶¶¶°³³ƒ­­-‡Æ°¦ibcc«««Óýô‚ð¼¤W-k{üǯÕF׫Õ*]Açp‘‚–eõ¤¼œ ’õ¡’üÂçÇ?^ÜKD¹mz Ù~Ð}æ5HRäÊÔüÄ>õ‡ÂÈÎÝö 5KS«‰«ò?1^ºñ"k~\@ŒRö<Ï £ ¤‚늕“ÂV¾?~/Ð{— wä04MœíßOiËÒˆç –]^²êÔˆÓü{žF@顪jON'1òŸµ îgÊMŠêXÇIÌÅLgeÄ›õxž—XCt]‡mÛáø7Y–Q.—qëÖ­ÓûÏ043[–åD H¼^Ù÷Ų'"CZ­ÖPÙèýœÈßHPÓ1 ºµÇÓÞ÷d‘ž­?E‰\7y­Òõ™'».~츮¨(ÙQ5UÞhDæt ub’Ôà,Õÿè¨÷µtæ Éï‹vi#³Vò”Õ@ ˆTôµäS‘èIÿÏjÂ9“+ݸš'9ñL?ü0¨‹p!FÖŒñèWš÷ £ÿèaZ¤«â5_þš0E³' i8£@ÔBÚ[*ðÅ :?ˈŒMÊ_ #OöÖï~øèïˆZCMë½’ïÿ{À3?4 ¢¶ƒŽiÛ€|âBí÷ǪՀŸ}«0¨¹©A_(d–üÒ/}"¡ÐZ–U\íªï CÕ²z»hX9=(ƒ, bÚùZYÝX“YÉ‚<?þ^àÿýõøêotpcó+‹ú Ì-´jšc]×3Ý›7ªúr¹Œ &ënM«í(j2d½+oÞžðdY1j‡î€$»Ô–Yhâ"5J ©R©@–å°ldÔ(U?ú9IeYîijDéœ9¤z¿£74åSUÕ0™Ž©i,Ë ÇÖ@«Õ ×~†u]×qûí·ããÿøT~&r6ÞR«Õ¿S–.3Ir5*Š‹šmGêE|9‹Õ¸RJr?cRɈL7RÊ2NóúéØT¿ol£(Ó…ÈÉ$?f†ãrHA3’qD D<Í\U“ו0Öjɤ­ø52 §)]Si5ë4Õ•ó€¨QÝÙÙÁÎÎNbtÍêêêÒ«AûEYÓ5YG8]KcÛvØ-Màü+Ï£Z®¢\.‡ʉ©bô®·&¢úÒz=r ®p JVÿº×÷½L¸-Ó¹Ò:wú qxå?þù?lœÐÕG?™ƒ¨Ï|¹,ÞÛlZðrÜj5à{_¼éÿÆþ)ÜþùÛ'ÿ[,!Â'Ò׳:„F€/|á‰P±$åz¬, ÓLXŒÚÓ_’„UÐ/g˜Kt\ËbZä­‹eæ† H6Çȯju¸-‹xJ}¥RØð(/®ëö5XÓÖ,ƒ1+ Kç:Ì VU¶m§fgFÇ£¬ êŽoYVî’‚{î¹wÝu×Ä¿ÓjIH’„z½ÞSŸJµšy¢ý c3nÄo´”º® £· è–Ö8É2?vúÜâµëÌb’®)Çh×Lg5W¢cÛvo6í¡÷ÉûOÜq’¾m{²èpœ$38‘ð<Õ®îííaoo/¬g]V'éÕè'xà J‡J{§{Ò" à­o}+ Ãè;8{$\ˆFD£.®zgœÊ2Pþl¾cA¤ØÕ½ÀW|Eï>¶-~àú7@’åX¯÷!Ÿ™®ñsüÄsÅKƲ$‰×óøèv½m<ýÀ§p÷‡ïùç_v¨±€ëº}GJ¤SöáyY¾«ÕäÔêx;»¼ »ƒzÂk¬öËLÈê<0 Öd Ç™9qÅ7íc!eaô‰D~èü1 F£§ÑMzÿQŽÛêÄ ô6<Šï“Õœ)q©ªjÏÚ@déÿô­VkèÌNætè7+^–¤œÇú¼Ê|Z¬©¢'¸MÏv´¼RJrÚHIXt ßïMìšV3Mã™e «R‡üñyÓ‚Óª9I¬´}ؽ#Ý)ÞÌŽ“>ƤÖ%)…Õ¬8¿··‡ÍÍMxžÏó°¹¹‰½½=¤ÇÝ, ñÙCýRGò4 ô<õz="O²,ËÂ@´!ÖuÓ«í3!ºè¶sîïû‘”¥ VEîmˆ4ÞöMíMgª Æ'”ÒG¸.ð3?ó•“§*RóŽx~É$ù6ýPÕìcNY5ÍÈmD­öâšQ^¸˜iaˆO(¢%(+p1z#–Š¢$ÒíÓT«Õ‡’çy(•J=ÇdXÆ#žýêRÓ3Ué¸yh6›=Ÿ¯(JfÍ;¥C3Å2hV|Üéí)Þåãíh¬Kœ¬?{üã)²5ìøñhgš~‘Ôi,Ãýº 3‹G<ñ+Mƒ5«¬ñm$%´mØØoT ÉqzŒ0^ÆOšv^»cFœ€+W®„/loo@8êfÙÈR0ÒÂ7lᢛ:Ý\ÓéM„±F?IV—늻ƒiІA½ÎÍÁT*Bú³ŒGRæäGC?œŽl‘Ü—¤\½åÀWÿÒd¹6éÕ>wòL9&U¢Fu×Ys?õ©ÃÉ:SWé¬z×A³ˆQ¢˜YwÇ™, ˜Üó–%†ŸU*ÑÀ>f)¡f/Y%÷造aŒ×7++íV×õ0­6 ÕÆq'ìÚ?î Fñ¦Jý Öt§à< —¡i Ã(v^3Ó—xªvš¬æëñÝòdd¥ÒS ê—n`DQÔAô: Cœë¨•'y÷ºgæ´ó!ËW?h>p|¿xÃ"2LÓÇÉsl ñÍ2p§PI2—œ¨S]YâaóYô6v§DÒŒ¸DtH¦©V0ÔC›$Ò…ˆ&e îžkšâËèzƒ•¤Þr¬82ú§àjšˆž•JÀ~øÚ¿Ÿ‘›€ü5¸ýŒŠ*¢n¿ãþ¾Lêcµ¶öÙPŽ©£')À¿ôKŸÀ—}ÙñøQ*„ègÜ Já¢Ñ5yç0fEm]WXã*Ü’$®©FC¤˜7›‘[“YJÒ5oqâ5­“A¦Gs-“çãe:“(*¼öëL(ŠFKûí›6X‡s’$-Õäe‚æôöKͦ”Úøóøí ÏŸµÑH.ËñåœnŽÓëëÖõ|ǧ´â¬Z¾¬N§ C\Æå3ÅÖŸ‘ ij²š ¥åRÚó IÙ÷%Ç)>uwVœ/úN“~EÇñÅ–”öAÄ Vêt˜9ÒƒIÃàôØ~Pë/²ÔDÒPÔÐ?2J)Š´ºZ %ˆ¨i\È)í1ó‡p,jgÖhþ*ðÛM»¦‘õÙ<9{JÇcB‡Î½÷6Š39eLÓÄ{ßû^\½ú]ãHµ¹¹³TÇŸ³:ŠÁÜ“Z^g…‰~CムÀ'>ñl˜á>‰²Ð¯ÖT̹L.r5¥kS|¶“9[u˜aI)Áy¢¦ÃR‡™ÅÇ4Í%L陋é1€yÑ´È µ¬Hï’$agÍvêÚ‡}‹Ì HUˆ« £úÖHîâÇŠ/ÃÔ?'kiÎ[Âï8BJ_#šVÌÀƒÓàL¬Ô).MüÛ/ 'í]Ö4-]MSC¾¦@ÉR>heÕÍVMQÓMjuˆ4[Q«ßU:È`¥$úøo‘ލÆgÂŽ‹„Èhå’§©@i,ŽããÒ¥7†¯ëºÓ4¾ökÿ.]úêñ>€Ú7Z‘ûˆeœ4Þz5f"ú)¹•J=ö¢=nýªxÐ×LwꥺÔx”•©ñ9§ƒ:Ç¡´àAûÖj5T*˜¦™û¸Ìbašf(Cý %š Ì´¾4¬ÎÔqÄRžn>“§AY yX[›üÌÙC–{U”´ Q?ƒ~T*ÑûÈxÍ3Is:Kiè·ÀÖÖVφ<üðÉ×vww‹>߉ȓNJ{?úuH †¥(zÞ7Žm'%Ñ’a †R'Eƒ2¡ˆO6†¸ÿ Šš‘Aëy^XcH5«ñY§ÔÕwX‡àøñǘ¦«ª*Z­,Ëâë’ž¹ÚZM,ËŽ#û´rO˶mgwÍ%#RÓ¢B:ƒFsLÒ‹ª¡fâv)2J#›ˆxc/Ê YŽ;Iè¾µTV*“51R”ùî×H÷@d ¥ïNŸùÌgF:æù+W®`}}=ñ¸rå 666z^_tú-~T* ä3X³nÔ=JA¼“.ky¡®£ép°„ÈuCøµ¬@¤Ü…¢q4?úÿ^õ\$ e3Ç£CŠ\bŒÎ´Òx©#131ž<úè{!˽ ES&jâ›w‚:¥üÒ(¥¼…K 3hôsÛ¶Q¯×Cƒu”:VÛ¶{ Ô¼u¦@2}XÓ4T*•Äè2póŽž‘e9—Jµ§Ýnw¦¿9sºAÐ3s5{¿È—8(‹jTK¥Þk§VÞ˜iØ´²<Ôë]eF‡ Qr¶ôkŽDñ$YNÖZdz(-¸š¡ÿkÚäCæ5ÉÅó<”Ëå°\²Z­¢\.‡÷0˲ðàƒâóŸÿüHǽºŸú¬ñ>/Ô@ ¿që¢ÙÌ®ŒïBÇ /²èûÑŒQ‚ôuGJ¦ Ë]ƒmºœÝÆÒ…0ûy-K¼‡Þ;¨‰QpòçB©C¬äËÜŽ|âÅK^Rʬý€vÃlo¶®k’_ËÊvá3Ì)’eˆú¾6IR’äÁqhÚ`‡§ïû¨V«‚¾ï‡MÏóFBUUEõDëI–º®cmm-¥qTƒšçÄQåä{X_AWšÍLÞã>ÖÍ‹}/þÛ¿-î|—˲2g®¦qñgªÕ"õÄ4{ý‰ä›¬ÕDjn¿¶¶¬(ÉÆN sZT«"òIëø°žé*¦tƒ& {ðÁ¢ûàÉ!JÎS]×ÃŒŸJ¥‚F£‘XOÇA¹\§«ìíí<æLÕ°ö#>©_c&‰ðv_\7Ûm¢BDówßdmKK ¢aR€l—˰‘5ÍfÒÐTúJóeÇ¥~r¾sê:Ëxð?þÇ{ñÏÿù+û7á%{²Ú<Ž‚a°±ÊNVja¼ÉE3Ó¥ûiÈë¬ë:F¢&5‚¡5¡²,#‚ž15²,£V«%”ªcÍÛÍWQ”¡Ñµ±ðÇž³†-ñ†‘œ;¸Rtj^#EÌr“î-J¾2¶{g¥fA©¿@oÝt­¶Ηx oü5š‚B%¦ibmm-tˆÒ}§T*¡Z­f«€Èj·Û¨Õjh4xþóŸ?ò9Þrpp€7n„Ï/]º„Â~ÀÃÃC¬¬¬$ÆîŒË°â{YŽÆôÓ‘i6å@,ëd“¥Á¢îÓBÿ(ç°^ë%-ˆ»u4 ží’?Ëk£cpd·ßçVÑÛ0j©¼g¨H|Ó”ïiðWõW¸ãŽC<ïyÏËÜîy€÷¶¿…~)gÆ4c¥ffÆ¼Éø¸$zÈÜØ)ÕVUUX–5´Çœ¡{H¼ïëºC£›ƒÒ|³ºº*Š2¼·Bjÿ©ï§?¶ãDŠñï‘Ö UU<‚@ÜW-+–ø¸iâ‹1}å´YÙî— Q#1Ò‰âêÕö [âãúT:@Î NÝO–EÆû‘NdLÛ äWËc°RS×´*?Ïý-Ë‚mÛ §&EL© `Tæ‡ãÔZ­VÏýŠÆ¯†1´¼e\nÄÍò¡‡B§ÓIlÜÛÛÃêê*,Ëšj ëÎÎööö¯mll„M: ÃÃŤL°ò¹xX¿ßoí8ô’Øt”VAcºƒ<Ü$$$”ßlŠ× £VGoôr\ï¥Ñì)~Z>Fku)B¾§Á'>ñ,¶·µp¾Wׂ§¿<ŸvašÑˆ#féXT‡tÊc:%7]_Úß÷ÏtÝè°y|üŒ‘£5dÞó™ Èž…\«õÎ;Éê§ï¶“‡¼ÍÚ¦ÄY’mê&UçLÉbé±´¤+JTM4 •ôx`Á»œú¾øf‘0CÎ’Œ"«3uÃKÝ0£SÓ¢ûÄ(KÛ¬!çjºÜ#‚pòC–ñ9.yú%LÒºû¶ƒƒ† <øàƒ‰t]ûûû¸|ù2vww§m½yó&677Ÿµ²²þÿÚµkX]]Å#<‚ããc\¾|;;;˜e½-)éýîïtãš^¥i«œµ~)iÁi¶¬ÁnÊô9©ªøŒx¯ 5Ñ›Òkc¼12zÆq\ŒÞ%øŒ1oòmYù”‚££.4Mƒ,g¯'ºàðQÀ½}pW j!É]}—–y“ñYBã°‰¬æDýtÃÎ2t©‰ŸçyCkME ›5åQ0t]ŸÁJ3IÒ‹JÚ€'ÍmÎ’l›¦Ù7\’ÄŸ8ž2Y.GM©!MVÈ4¶õÑ[*¦ëF¦õÅòü r–d|dƒË²øÉÓÑÔQ¢£ŠU5̦i†Y¡–e%ÊCÈy:“2A¸îp{gç¯]»UU±»»Ûs³¿~íÚµ©ó­[·°¾¾ŽðAÜN§ƒƒƒ\¹r€¸ˆ677ó5:€mgwzŒ¾«ø·Ÿ+˜àyÙQȬÔ\Û>h±KG5k5a ôx®‘]:AaøùkQã:8Â:„"ä{ž—]ô'<ç9O… ZÖ¢+I€Ôù`o[¼øÑ4ë~]6˜¥`Þd|Vô«ÔHŒd|ö#«F5ÍSïFÖ(\P—zÓì½Î©KN–lI®÷³"ÛŽã„Ͷ² Œl JÎjŽ”§;v˜xæÍwŠäȸ®ÐÅF1Vm[Xþ.dGH)lµZ¨Õjh6›h·Ûh6›áÿkEXÖy£'}8ßétðàƒÜéÊ•+èt:#wtêÇÁÁ‘–°³³>„ç@"y}}½']y-”ä0žØ`€¯ûàgžÉøˆæKÉ÷´@˜ÜHM|DW_6VsQ”|÷#ÞT¬Žãà[¿õæÀ}L‘Ö’> u/ "pn–´ÔÌ›ŒÏŠ¬ì¥¬Ž¾ªª&š(õ§7*K‘䎄ÆkЦ Z[ÿÒOyÛίÁ-(gG¶½!â_Í‘u-ä]êey~"OSeÔôFϋʿêõ¨+vœZ p]Ü;ÉÎ!,ƒŒûþpG|œôòLéî$ß@vÓ(Þt“²Óý=üp|Œa¨ÕjÓIõ%oÕä'˜¯ xçWWW‡T“àO~Ò'Pþüññ1¶¶¶°¿¿/ŠAŸÿÌ3ÏÀó<ìììdn×õá¿S¿µ'w:0ýQ/~ð?þ2{QŠ/]%ÃŽëbô:T ‘Ñ:Â|À³ÀÁÁ}ôQ|ä#™ÙgL[¾àÓŸþtÏÍež'Z”ª2×uqíÚW†Ï©¯WÇA´àdÝd›Íù+â8£ìììà©§žšégœö^ýî iã“¢Ÿýp]7óþAïËk„꺞kLÍÈT«â‹R‡^ºŽÓçÕjÍE$u>ú(ž~úé™ÖïIæqg ÁJ³#)Šš9ü@Í\tÝ9i®DYy÷­V³÷wœÑ•oE†*å êº0ZS]°ÿË÷|¾çþ`¦F⢯ߔ]šÒ…âø¾øóÑr“ûÙUõtdÜ÷}X–…r¹Œr¹ŒR©„r¹ EQÐn·óÕ’æ¥\ÎÿC»®Ø?ËyKc é࣮߷ݺukV¿i&NW®\Á<æÌ¯®®bgg›››/ˆ[·n%òìãÜqÇxéK_ŠK—.}ný¢Õ¹£«ÔàkîúÙB4Õ8éïž7‡|ƒU‚˜Këß<Ü$戋/âž{îÁÇ?þñ©{Vò wÞy'.]º„‹/æ:ÛŽ†_=ðÑ>IŠ%ÌlÉŽ$PO®ƒ¢ÜˆL..]º„ëׯÏäØó¼†Oßïõ¿ô‹¤RÄ4KYH7\"¨.5¯Á:UE„p]¡­-P÷›õõuܺukê ý<­ß“Bó~eY†¢(‰F]ä$¡Œç%ƒñÔHê‘eš“¥óÆSŒ Å4…¬ç‘wº‰–Ëâäãh§‘߬ëQÔ5æúÚ¿ÿ÷ñ£ý(Þ6Æèa,Ëúíûù ̬¿çE"@ciFqÀd1ËIJ à8l[Ì•¤Ô™dÜÄ$R ó”yU«Ùöhˉ½sñâE|ÃÝwããŸýìH§sþøøxè¢OÁA‹q^VWW±½½8]$»ŠßqÇxÙË^Ö·1T?£?N<—=ŽmÛù½Ú² |ýw¯xEöv  ¬Vó÷s¯¡7•x„‘[p>ý<²ººŠ{î¹wÍ`†ß¬ä ÏÆÆFîó†yüetض¿ówþìØ|Ýôšv‹¤UŸæ@1sÉÆÆ^ð‚ÌäØE­áEaíWóG]³ãe¾j_§Õ¥q,l{ẜ®¯¯ãž{îk–ß æiýžß÷ÑjµÂqqÙ¤f_qâëVívºcðBâ8BïÊc¬’SÖ0Dæ$Eag:VÜ`µ,ñšeEó†óÜ#ëõž&4´~OCçN³,ë÷°Iq$I<â)ÄqyV!ÿñk` 9¨Õj¥R ¾ï£Ñh„×u¦±jÛBþòBÃf³ °sºûZ¿ãd…˜=¯§Õòêê*6ægpñý¯‘~óëëë0 £¯Wåøø;;;‰¢ìI888SŠò®¬¬„ÞǸÝét¦²È³ kµ^‰išÐ4-ŸBAnIýçžJ#håtêü\p„õ)R¾ÓÄײ~â,Ë›Þô3y}?uýäí¶Á,ó$ãÓÂ÷³“Ò}FÍ5í7F&«áAµ¯SŸ: Æ’µnŸe‘íxš¹¢(Ð4-ŒÌ½õ«4—Ä;í¨!v.Rz'Áuó—¬Èrr¨Õ¢†dô<~c%«ˆ®©R)2dç„e‘ñ lkÅ÷£ŒV²¹Ò IJæ§MµZE¥R ³s¨QÒЈ*ÉZ^ý,"9M£ªQê:u©êG¿U n¤¶}úÍoÆ+ÿøGúMÎ_½zN—/_Æþþ~(¤N'iÓét¦ÖÎzee×®]K4pÚÛÛÃúú:Ö××±ººŠÄåºnØõê4ñ\¿>)–eAÓ4ü›ó¢„2BÝú‰Ì›-^st3ffÏ¼Èø4±¬ìþ'½sú¼]U³/ z$I‰ÔL¦X–E¶Ó2§ë:\× .qƒ•²õ4-Š@e)ï o¬Š/>ÚI_—ñL„´á«ªÂÀ¥7†!žgµž-e‘qгËGÈx¼VèM>:š} ÉuÝ„ãHœOÈ€‚¥R ’$…]~s—‚Pc#]Ï.6 Ñ›€ ×Aûõ«e5Í잀øá›ÍžkééòOà˜RÛúú:vww±³³“9ºfccÛÛÛS‰®"•çÊ•+¸|ù2666ÂŽdñ¡ÄÛÛÛØÚÚÂÁÁޱ²²‚x`¢Ï'D3Êr+£ÔõÑšiAD\w¢¯Îœ"EÉwœ¼MÝ<σëºhõ™—çžû|¼µ-VÌ™`d|Ú¸n4$¾ÑŸZm<%=‚Ä=£_Ã%Â0ŒÈ¸ Î4lÀ²Èv–“DÓ´p”MZÉ'O#@Ó5 ¹¼Ó Œ¾ë8M’ÒÈòhƧ,Ï]Wíe‘qÃÈg`’3ˆjUóVãMÏóP­V¡ª*ÖÖÖ`F¨ƒišß÷C£Õ0ŒñšëI’¸q©ªèø·Ôó¼Ï4ÅM0«DD׳ Vê*ßGœ*Ý7oÞì6›ÍîC=Ôm6›Ý›7ov»Ýn÷ÙgŸí^¿~½;MnÞ¼Ù½~ýzßãÒgæýÜëׯwz衾Ûe´ó«×ë]MÓF{“ªŠ›Ýn÷Ú¯w»F´­ÙìÝÿ¨Ûí*'û¢5Úi0Ã&/“2mùîv»Ý·¼å-¹ök4º]ÃH¾fÝn­–|MQ”nóD.It‰f³ÛÕõèy»Ýíýê£â?qÚíÞ73sA^y—Ó^ÃgEZ„[­Þë‡0 £[K_H©íÍÔZ¯iZ·Õʱˆ7Ý®,‹G½>xߣ#qSÓ´ä}fЗ<:êÿ|™¥¼¹~OUU»íÔZÝn·»Š¢tkµZ_–eño|í?U²ÎË0zï;ƒ8:ï¡ëƒÐ4qqG?HöçÍ ¼~O’én7ÒkêõÑDjRŽŽŽº²,‡÷£££n­VëÖ3Öù£i­Ë†1ü>’…¦ö¾f3ß=(Å8ò‚<;]¿~½{ï½÷ŽþÅO‘a_~T:kÁHZÃo6#­§^ïo1·ºÝ®ÜívOñâaæwqDÞ˜aô®qqÏ›]=&¯YâÙsͨjï*_¯÷×î™B9MyuMŽ"ÂYiòXõ®‘:˜טúqt$4«v[<4M\oYŠ«õºPÀu]¼·Ÿò]«‰ýe9:}Ö³hkøi^ýdNUÕ®¢( J\y×õ¤Mwª´ZÝ.Ð{y®‚®#ÃèuȤ/òø¾ékzEÊðú!ËÉ?g«%žr.Ò~鸓,ÏÎÇžå˜<::ê*Š’iœv»Ýñ‡õz·+IùeÕ0’×V¿÷ÑýeÆzÝ8òr~ö1ÜÅd”Ùx!a»sDMi¨õXº{¡@Ô˜VÝÑׇhœÄ09qÝÞÌ'êÈí“ì9,…8ÌÉj9³ fuÙÃf[¦/å¾P×xJ9l4DZVV:W¹,ö¥¼¶z]¤cù~o=9uÓi6ÅöEªX¿z#fáÔäëõ¯ÿA|ðƒÛ‰µß¶#Q òËB°íÞÚ»xoDiŽý œætC$ ·hQ’¢æHŽ#ê÷J%‘9oíbÏ8饖µa½³²Àeú2îû>Êå2ªÕ*ÖÖÖ`YV˜\.—£ùÙ®›lU Y‹—Wåé"U­Š/×jåoo¬ª"}—ê\û½‡®‹9svf Ö~öb¹ç®¦¡+Äø£û~d¬Ê#×è²ädž0€&´Ö +[ð}?¡´d-àt#pœ“›EÖan¦Á3Ìx¤g ×uQ*•`¦ZùA0°†¢(‰ÆKÔÔ,ãôö;PÕ^A–³AFoñ–$ Ã5ýYš&ö—¤¨ëi­6—Ê35ùúê¯þ|ñ‹ôm·!ËÂ9éž¶“<ćÖëÉÏ:•$!÷¥Re›ŒÞ¼Ð´rú4›‘ˆ™ hÞj\ýˆ—ò!fÊÍ;Õ(/¦i¢\.Ã0 ´Z-´Z-AÓ4¡( Z­VTjš½ãaâV5Nd´V*ÑZNÿæ1Z)jQ©D#hú!ËsÙGá¶¢O`kÌ@üOgà Cž?ü 5–í&'ƒf™›fädŽËx¿Ÿ.pà{%%ˆôÆ9\Ø&¾ŸlÊáº.TUEýDa-•J‰&JAŽnf²,#‚°û# ž'.BIÊÖ¢ò2ix`!;è0yÔä‹dݶŽ _ÛS_Ò)2 $›9N22J:•e‰h+)í@Ô6v’ëC’øú˜3dYøÖÖ¢×âËå ?w­Ö»}š a¶mÃó<´Ûíð5I’P«Õ²vîýp ÓGxK¥þ–µ®'e”ºðæ™Ö@ÕLótš$M™3a¥ÌܼÒ26´Þêúè+¿ „(=8G·W†!Ò©¿q<ô¤C¦gÆê ¤gK V­ãpt•Yh\7é¹O¯ýªªÂ1ܤ(J8¾ ‰Fss #Šx2Ì HgÏÄÑuqˆjL+ôãvÇN`YùZÕñ¼|úð~#hÈ‚©ÕD4аm66—ø¤"¾fm3MõÄqX–• eYy»@[VtYjé13éë(KÆ)û 4niu·Û¶¶¶†ît||\ôyN„ëæ_;©þ(1Ê&šš52D´Õ…Höp‰ 3E,þ¾ï媫Kì’eÕÆh£œfÎHgݺ®›ðŽSŠ/¥õöŒ8ËÈÁ§÷<ý_þ ~|wWª€PBØPefÌ  1ZÓIÄgÒ‚À÷…rîyùGºdİrM‹êU¨n•Y:,KüyÉ÷çyâ5-YÎÖï-KˆÕ¤v™çyaiˆ¢((•J0 #4Vs½¤èjZïrœìk„ÆÌضèY0¬¬pT´ïH®ëÊÊ 666Š>×±‰ û0B»‡hþé¨E“ÎWÖ!ŒU€#¬ÌH šíMžHÏóðÅ/>î;(*ÒÏãÃ[˜9!ïüá8q"KÑWU5¬Iu]·W9©Tz.:EQXþÅK^‚ç]¿.>„£ªÌ)0¨áR<ÓŒ‚4ýR‚'‚ò%i²æE®;¤ ýfN ±Š"lÇa¢_Ü3/9½2N5©ñî¿áX€>© ív;_ªÃL‰ à8dY†ï‹&5q‘M/áýzLDº‰Lžýû‘7Å—•ù3§qÛ¯_Ÿ­I²^MÓD=Æ*€ÞÞÞ1²œ-ËdÈæep>Ò>>>ÆÁÁAÑç5Uòf¥$RÂ<m£Q–‡ %+'Ìœ#ËÀÑÑQv˰qNÛœ3L¼¼‡jR‡Up½ÕEõ¯û#ƒ51҆ºœuÀœ"çÎõúÎ}ßç=J’]×!Ë"Èéô>“ÉFâFñ‘A BfœÎËôÁu“"S«e×]§”¦9~{ rüÔëõl§c¼£5 “–>•îfg‹žëÃ?Œ<˜ã÷X~>®/±_¦MâÃã§’R)ßhflÎDJpÇëºb A€¨éÑÏ<\ùâ`é7ð0½”`†X–…Ï|ìcX à׽蟃Éà¶ííí¢Ïa¦P¿˜,O¼mÛÐã 5¥ÄxcmÒ ô¦]AìXdæˆøˆŽ8Žãô®â ‹ƒÒ‚Ó++(ÌœŸ$6j Lé‹1N¸ë•¯äH³Ðø%]ŸB<š!Ò¯¯ëŠë"îâîØÌŒ D¯Iñ}Žã u¢·H’FZ´‡Ç÷vßãy(—Ëø¿¿á€où–¢ ¦·}³Æóĺ›å©I(ó”ú(ËÀþ5ðߟÓ[ÓÃ[1LôKÛrjöd°†#š8˜™OFò¼kšx¨K"'$ aÀ0s‚ãP÷T÷$=} ˲e% ¼Ód]dÜD†™¶ ô—JBGU·)ý7^Â!IjiyŽéòŠ¢@–e|LJ>üÖoýS0}8‡‡‡0 ‡‡‡¢éÇ¢ÒO™—$)Ræ%éD±©‰ºT±Sòj‘$nSÍÌ%©Îìr¤§I7^ât`f™ ¦åržJ”„0ÌœâyÀ=÷|nðÈQ ®6ñõÝ÷ŵAp£=æIë0Æèê‡ã8‚ ™9Ù×MdP¾ç?þGÜyå ;èç˜ó®ëâòåË8<<ÄÊÊJ¸a{{W¯^ÅÆÆVVVðøã}®cѯջmÛÉ…_>éö j¸”ž ªÔ¦OôÐfã•™âö‰‘ Öxz0uƒd¥…™CLsø>žç… 7¨† š&Z¦žÈ9¬Ì"ÀŸþ韎± $›'²Ì: SYe{£ói¾j=]Þ‘åÍz!¬¼þõ™L«1 !ìi÷uf˜À÷Åzü¹|º®‹THÏˮۈ7Õ`gŒ¼>ÏóD+Ž1 Ùž?ðŸÀí·wotÕ÷sçİJÓk}\ã·m1¢F–G7>u3o˜™âyB ©V£,Þ¡ø>Vßô&üÒñ1¤x$ªÃ`£uá9õêUt:\¾|{{{888;;;xÓ›Þ„N§ƒííí¢Ïudâúøg?þq|ñÕ¯Nî@¹ítµxN²„¡ª¬ì0 AŠà“Ï}nÔD†””tj»ªFÚﳌ3 EÞÔ1ß÷ñ’›7Y¾™…„’»>úÑ·á‹_| §Éo˜öHÓ ÒQÑZMè6ãÖ½2Ì q¡·äê{äy€mã³ßüÍø£r·à½²ž2Z9ƒ`!¹m}}<òööö°³³Ó³Ãææ&®\¹²pÍ–*ÝøÜïýnÿŽïèÝÁ0’–m¿¬ 3§ø>Ïÿý[<_ˆ Cxã¯Åç¬ìIg ×î‚ ª_e…Y0¨¤ëº¸páÐlf¬Ñž9ÛûE—Xö™9%¾4÷µ7+•pÓ^õ*¼ùÎ;ñ[¿ú«“ë,’ÄzÏ‚r º_½zÛÛÛ8<< 7®¯¯/\Ýjœø:þÜO~÷~ÿ÷G/X#l žrR»Šh¤Í´t3ÌŒ‘ed§÷ªªHy÷}¡éÓ¡iQ§¦<©4 3'äI ;;+&ÌÂáºÂWnYVÿÎÀ®Ëc9˜…%r,ͪ è:<σeYøG?þããub–†ÛâOVVV°±±Qô9Í„Ï|éKB‰ñ!Ò~Õ“èˆ"« ³@PÈO¼ç=²24MÔ2Å0š&ŠGúuf˜9Åó†ëé®ëâWV¸á³x°¶öGп‹õÉ8†Y4È!3 Ó÷ᬭA9™£­³sýÌsÛ䇘_.\ŽŽø><? (CDS9€Ê,ÔoàÿÑÃs77{wÐõdtŠ|äts2Ìü§\É÷}üÀ[ÞüÀ}º 32® ø¾1þÜU†™cò¬4'»ÍŽ&Æù¢O`–ÐEáößøF@ЫÌÒàyÂîüÙïüN¬•ËÙ;QcŽ8†ÞN 3ߤNAÀ¶m8±‘žçá_ÿzް2 ‡ï/|áðŒ`f)ñ¼á=lÛæˆ*ÓÃR¬”ñøù_ø<Þô¿Þ$ž°Ã,4ÍÀó¼Ñœxwl†™s|?9;T«U”Ëeø¾˲`ŸŒ*Y®™…ÆBm”Χ ³@ÆpÕƒ V&‹¥N ø€ìÊøÒÛ¾Tô™0ÌÔa'ó {k˜¥Ã÷…^þçûûxÕË^Vôé0Ì̠囌լt1EQÐjµðÃ_ûµÉüa†Y~á 'å²xœ¤»çDÌ0sJ|Yö*• jµt]‡¢(h6›¸þßþ^õÜçòúÍd²Ô+àÏþòWñì+_Yôé0ÌÔñ<Ñ û¶'ŸÄWý³Vôé0ÌÔ±íH?·mZžÚ½ È7èaæß÷ñ…/<ÃÈPÔE n6¹Y³Ð¯±Z­¢V«‰F‘Ž”Ë, ¿õ-ß‚².Ãôai Vù¥dàÏ[M¼â¯(útfªø~¤·üÝ[·8åYJl[Ô¯ú'…O¹:a“'‡a€ ^óšÛñc››Qêo²œo1Ã̦)ìRñŠ¢DÎGMrMk67cú0÷ëáá!:ÎHïq]@þkàc/ø^úW…¯¸|¹è¯Á0™Œ#ß@¤“¿ûŸ½p¡è¯Á0}WÆ©\O–˲òEWáÍasJŒ+ßÄ÷~ï_ãoþf?°¿ÏÑSfî˜T¾=O¬åšüÉããË~ù—{‹)J¯áÊ0)æv¬M§Óa8<< <ë–eå~¿ô$ðûG?Œï{Îsø`æŽIåÛó€{îþ4^þ£?†¿ûs?Wô×a˜&•qËÑÕ ày^8Âf ñÔ†™!“Ê7 "O_úÒÇð›¯|ðZ•ëS™¹aòbV|­|öãÇíßó=¸¬ëœÆŒÅÜFX¯]»†ÕÕUxž‡Ç‡‡‡ØÙÙÉõ^Uþí—} ßþéëX98`o;3wL"߀0X¿²¦á#¯{^ò/þEÑ_‡az˜DÆir‡ªŠèj®T`z#;(™S`Ò5ܶEäéÛ:߃K_öeœîËÌ“Ê7 \ë:PZû8>ùêWã™ÍM¬ÿüÏýÕ˜e. ÖN§ƒƒƒ\¹r°²²‚ÍÍM4›Í|xê)|á™BP«±±ÊÌË7€W?ó;ø‹ã?Áëý׋þ: ÓÃ4d¼V–\×)dž'BRå2P* m?M¼KÃ̈IåÛóDþ’Ëø—Ÿü$žû®wý•&dëwµ*–âJ92VÿÞ¯ýZÑ_Y`æÒ`½yó&`}}=|m}}=w½÷¼OãGJŸÅ¥·¼¥è¯’‹ýýý0íbÑÕãÆL.ßðwÎÿ¼èío_˜ÔšE•“ÃÃCìïï} Ǥ2®(ÀïüÌ÷á/~ññßþíJ%¡áSÇÔV+Û0m6 1Xy ?[L*ß?ýo?ó³oÀßûÓÿŽ—~ðƒsïX_Táõ{<&ÖÁO2d¾»óOpëk¿vîU^¿ƒ¹¬atQcee¥çõÏ|æ3øÀ>€‡~÷ÜsÞü¯ÿ5Šþ*¹xÿûß[·náøø¸èS™ëׯãÒ¥KEŸÆH<óÌ3øã?þc|æ3Ÿ)äóÇ‘o:ï‡~/zÑ‹ðu>ˆç?ÿù #ã‹('ð¡}úЇ°ººZô©Œ|ÞO?ýtaŸ?éþÜ'ž€êy¸øß§^üb×ëø›ç?_ìø?ÿ§x̼†Ÿ.O>ù$>üáã¯þê¯ ùüqä;¾~ÿ“þ+Üñ «8®½s(ÏiQF€Å_¿;N!ç>éú}×'>«¿òÓhÕWá‰ÿôŸðÂ{ïk]…×ïÓ…tðQ×ï¹4X Í­[·2/–óçÏãèè?þxøþI:›6Ng!/– .àÆEŸÆH<ýôÓøó?ÿs¼à/(äóÇ‘oxîsŸ‹Ç/}éKñ²—½l¡~÷E”@ü=,ܹ{ž‡/}éK…}þ¤k¸¢(ø‹ýQ„*Îx¿y ?=>ò‘à3Ÿù ¾îë¾®ÏG¾ë÷ö¶xqA~÷E”`ñ×ïO}êS…¬ÓX¿ÛW¯âü]wó7 !ç¼~Ÿ¤ƒº~Ï¥ÁOCHÓïâý®ïú.|×w}WѧÎ0CG¾àá‡.úÔ&¼†3ËÌ8òÍë7³(ðúÍÌ#sYÃzñâEÉiQ© 3mX¾™e‡eœYfX¾™e†å›™GæÒ`]]]ÅÆÆF¢XÞu]”Ëå¢Oa&†å›YvXÆ™e†å›YfX¾™yä\·Ûí}Ybkk «««a‘÷îînßú>†Y$X¾™e‡eœYfX¾™e†å›™7æÖ`Dá7µšÞØØ(útfª°|3ËË8³Ì°|3Ë Ë73O̵ÁÊ0 Ã0 Ã0 Ü]沆uQ9<<8|øððpà¨AÛ‡½wšßašç}šçÎÌ–YÊwžíÓúÓ>7–ïåa_Vù>­sgf ¯ß,ß˯ßÅû©Ðe&æ‰'žèÞwß}Ý{ï½·{ï½÷vï»ï¾îO<n¿yóf÷ÿãnë[ßšxÿ íÃÞ;Mzè¡î[Þò–©œ÷iŸ;3;f)ßy¶O‹´|Ozn,ßËÃ$2¾¬ò}šçÎÌ^¿‹?wf¶ðú}6ä›#¬S`kk ð<?þ8666`F¸ýÚµkX]] ·bgg'×öaïØÛÛK¼6ÉyŸæ¹3³e–ògû4È’ïIÏå{y˜DÆ—U¾OëÜ™ÙÂë7Ë÷²Ãë÷‘ï¢-æEçúõëÝ{ï½·ûì³Ï†¯Ý¼y³{ï½÷vŸxâ‰Äÿ‰_ýÕ_íÞwß}=û¦·{ï´xöÙg»¯{Ýëºo}ë[CÏ$çg;³ÌR¾ólŸYò=é¹±|/“Èø²Ê÷i;3[xýfù^vxý>;òÍÖ Y__ïiõ}óæMÀÊÊJøÿõõõÄ{(Ÿ|Ðöaï×®]Ãææf8,zØyMc;³ÌR¾ólŸYò=é¹±|/“Èø²Ê÷i;3[xýfù^vxý>;òÍ넬¬¬$Ú}w:ììì`ss«««…ãøøxàöv»=ð½Ó`NÛÛÛ‰×'9ï<Û™Å`–ò}rÒO¾é»Œ{n§qm2§Ã$2>LU¾y _xý.îÜ™Ó×ïbνØ`ÇÇÇØÙÙÁý÷ß \½z5|½·nݸý™gžøÞIét:ØÛÛ Ï5ý}Æ=ï<Û™Åbò=k9$ß“žû¬¯MæôGƇÉÁ¢Ê7¯á˯ߧ{îÌéÃë÷é{Q°Á:±µµ…ÃÃC<òÈ# oI<ŸfuuuàöAƒšWWW'>ïýý}¬¬¬Àu]ìììàðð7oÞÄÎÎ^üâ}Þy¶3‹Ã¬ä{Ör2H¾;Î\_›Ìé2®Œ“ƒE•o^×^¿OÿܙӅ×ïÓ=÷¢`ƒu †æÑ§…„òÒãáùN§ Ì íÃÞ;)—.]ê{Á¾ä%/û¼ólg‡YÉwží“0H¾'=7–ïåb\_Vùžõ¹3§¯ß,ß˯ߧ{îEÁ넸®‹N§UUqppx“±±±ýýýÄ{ÊåòÐíÃÞ;)ØÞÞëëë¸xñ"¶··¡(ÊØçg;³ÌR¾ólŸ„Aò½ºº:××&szL"ãË*ß³>wætàõ›å{ÙáõûìÈ÷¹n·Û-ú$™ÌùIày€(]auuÇÇÇXYYIt5´}Ø{§ý]±»»;ñyçÙÎÌ?³–ï<Û§ù]âò=é¹±|/“Êø²Ê÷iž;3xýžsgf¯ßÅŸûiÁë)q||ŒÃÃCÙyóƒ¶{>wÑçΜ‹,'‹zm2§Ë$r°¨ò]ô¹3§Ã"ËÈ"Ÿ;szðú½øòÍ+Ã0 Ã0 Ã0 3—p +Ã0 Ã0 Ã0 3—°ÁÊ0 Ã0 Ã0 ÃÌ%l°2 Ã0 Ã0 Ã0s ¬ Ã0 Ã0 Ã0Ì\Â+Ã0 Ã0 Ã0 3—°ÁÊ0 Ã0 Ã0 ÃÌ%l°2 Ã0 Ã0 Ã0s ¬ Ã0 Ã0 Ã0Ì\Â+Ã0 Ã0 Ã0 3—°ÁÊ0 Ã0 Ã0 ÃÌ%l°2 Ã0 Ã0 Ã0s ¬ Ã0 Ã0 Ã0Ì\Â+Ã0 Ã0 Ã0 3—°ÁÊ0 Ã0 Ã0 ÃÌ%l°2 Ã0 Ã0 Ã0s ¬ Ã0 Ã0 Ã0Ì\Âëœ\×-ú4ff°Œ3g™YÈ?_SL°Ü1g–õâaƒuÎð<år¹èÓ`˜™Á2Μef!ÿ|M1E–;Û¶áû~ѧÅ0S‡e½xØ`e†a†a&ÂqVâ™3Ëúés[Ñ'ÀDžI’ (Jßí²,CUU¸® ]×Ãí®ë†© š¦…Ç0M†aÀ¶mAEQ iZâó Ã3êþ “—A2A(s’$A×uH’Û¶C™'ÇAÐuå›Yf%ÿµZ –eáܹs=Ç´,+q?`˜iÐO–éuÇqªª&dTQ¸®Ëk6³0L"몪ö]Û–íqàkÁ”J%˜¦ @ž•J%±Ý4MX–@(+•J%¼HÀ²¬ð=A T*Á¶íĶ *• Êå2<Ï ·Óg³?ÃäaŒA€µµµÐáâ8N˜vA¼M"¯,ßLÌRþ«Õ*ÇÁ'?ùÉÄ1Ëår¨1Ì´¦¯¤‰Ë¨ïû¼f3 ä²>hm§ýY¶G¤ËF½^ïJ’Ôm·Ûákš¦uéÏÒn·»ÛeYšØÞjµzŽÙív»ºõz=ܦ(JW×õð¹aá±ÆÙŸa†1LÆ›Íf7¾ uUUí¶ÛíùoµZ]Ý£££n·ËòÍÌ?³–Ã0Ç9::ê‘k†™Ãd¹ÛívUUí6›Íðy\Fé9¯Ù̼3 Y´¶Óþ,Û£ÁÖñ}ªªB–åðµxª¯ã8=Û5M ÿïº.dY»—ÅŸ“—&þ^I’Ât„~Œº?à b˜ŒÓëÕj®ëB’$4›MȲ Y–¡(J˜Qà8N"¥&þ~€å›™?f-ÿñta¡‡>~Ÿ`˜i0L–û‘–Q^³™yg²>hmOï°lç Ö¡ºÔ~A0ôýT«¤o SÃd\–e4›M@¥RÁ¹sçP­VÃ횦%v–mf‘8mùW†a$ŽÁ0Ó`˜,3̲0 Y¶¶3£ÃkȲFB‰¸‘*IÒ@£UQ”ÐkCF£Ã0¸v‰™ †É85¨×ë8::B«Õ‚ã8a¶¦ið<–e!Ž1 Åi˽^upý3M†É2Ã, Óõak;3:l°ˆ¦ip]7ô S´4¾Ýó¼°h›:Žñ.d~3Ì<0LÆ©‘Ý dYîIùÕ4 –eåJÉa˜y¢ù—$)ìVÉc˜i1L– 6b™Eg²>lmgF‡ Ö¡ô­J¥‚R©„µµµžøz½Žr¹Œr¹ŒR©”ØNЉiš(•Ja‡àF£QôWcÃeœjòÖÖÖP.—±¶¶*é9f8ºÊ,EÉ¿¦iÐ4—ÌÔ&Ë€ÐILÓä(³ÐLCÖó¬íÌhœëv»Ý¢Oâ¬ãû~æ¬'jž$Ër˜SO3šâFi¼É¥ 3Ì<ÑOÆ Ê"ÈÚnÛ6,ËB»Ý.úk0ÌX°ü3Ë YŽë,\ïÊ,:ÓõAk;3l°Î1¾ïcmm ­V Š¢À÷}”Ëe†Áé‘ÌÒC©4¥R º®ómæLÁòÏ0 Ã0N žcâ)ÁçÎC©T‚¦il¬2gÏópáÂȲÌ2Ïœ9Xþ†aFÀV†a†a†af.¹­è˜Nò'‚½èEEŸÊÈ<óÌ3¸ãŽ;pÇw}*#óä“Oâå/yѧ12Ÿÿüçqþüy¼öµ¯-úTróîw¿wÝuWѧ1‹,'Ÿÿüçr]yòÉ'ªÁ¯áŰÈ׿ÊÊÊÂÔ…ñú}ú,úúýÆ7¾+++EŸJ.xý.†E¾6G]¿—Æ`ýÓ?ýS8޳07¯8žçá¥/})^ö²—}*#óØcá oxCѧ12O?ý4nݺµPë¯üʯ,ÔùÆYd9ùä'?¹ëÊc=¶P+¯áŰÈ×&€…‘^¿OŸE_¿¿íÛ¾ma V^¿‹a‘¯M`´õ{i Ö»îº Š¢`{{»èS™ýý}¬¯¯c}}½èS‹EüÍpãÆ¢Oc$^üâ/äoM,â¹âðð›››EŸÊXç¾Hð^‹ø›/ÚÎë÷é³èë÷êêjѧ‘^¿‹cóqÖïB›.TªÑétŠ<ÅSass“/”%„å;bQåd}}}!•Ó‚e\ÀkøòÁ²±¨2Âë÷`XƼ~/…DXqíÚµp>Ñúú:®^½ L§Óaá…¤ª*,Ë*ú·b˜\°|3ËË8³¬°l3ËË8³ˆaÝÙÙA§ÓÁã?Ïó°ººŠ½½½pûµk×°ºº Ïóðøããðð;;;EÿV “ –ofÙag––mfÙag‘S7X±¿¿«W¯†Åä>ø .^¼@xvpåÊÀÊÊ 677Ñl6‹þ­f(,ß̲Ã2Î,+,Û̲Ã2Î,*§žL)ëëë8<<Äññ1Ö××Ã<ì›7o†Û‰õõõ3‘GÏ,>,ß̲Ã2Î,+,Û̲Ã2Î,*§n°Þºu °µµ^·nÝÂîîîÐ‹âøøxaZ|3g–ofÙag––mfÙag•SO Ž{wÞùÎwâï|'677±µµ@\ý  -‹'Ÿ|=öXx†éÇþþ>zè!xž7õcÏJ¾à©§žÂÖÖö÷÷ üõ˜E`kk úЇfrl^Ù¢ÙÙÙÁC=„|ä#S=.¯ßÌ<@ë÷,F“ñúÍ é࣮ߧn°^ºt @²ó<€ããc l-=h&ÕË_þr¼á oÀîîîi%fÁØÜÜă>8“׳’o¸ûî»±»»Ëmú™¡ìîîâž{î™É±y gŠf{{>ø ¾æk¾fªÇåõ›™hýžÅ¨^¿™¢!|ÔõûÔ ÖAé+++‰Âo¢Óé,ÔeæìÂòÍ,;,ã̲²Í,;,ãÌ¢rêëúúzO í¬®®†Û666i3®ë¢\.ý[1ÌPX¾™e‡eœYVX¶™e‡eœYTN½éX–•¨å ‚ob{{[[[888‹¼xà¢+†ÉË7³ì°Œ3Ë Ë6³ì°Œ3‹H!ëúú:ÞñŽw„Åß#mg˜y†å›YvXÆ™e…e›YvXÆ™E¤ƒ¹òƒ.‚aÛfžaùf––qfYaÙf––qfÑ8õV†a†a†a†É¬ Ã0 Ã0 Ã0Ì\Â+Ã0 Ã0 Ã0 3—°ÁÊ0 Ã0 Ã0 ÃÌ%l°2g’[·nÃç?{ѧÁ0 Ã0 Ã0Ì ëÌ0i‚p]ñPU@Óúïëy€¢do‹Ï·ö}@’¢ÿ‰ÿÿçÿ|7$éÓEe†a†a†a0–Áº³³ƒƒƒâÊ•+X]]E§ÓÁöövÑ߇Y@‚0Ma¨jš0DM³×`õ<ÀqÄ~’4›âuÓÛèy£!ž«jÿÏ4ÍãÆýÕ†a†a†ÀÈ)Á;;;ØßßÇÆÆVWWˆyMûûûØÚÚ*úû0 B¹ X–ø¿$ #µÝj5@×…±é8Ñþ¾T*b¿V+2N@–“Ï%i°±Ê0 Ã0 Ã0Ìb0R„µÓé`oo»»»ØØØÀáá!@UU¬®®âòåË8<<ÄúúzÑß‹9e(7ý½ß§VKî«ëÉçµZ”Ê ãÖ0²Ó„Óïe†a†af9É`½yó&`cc£gÛúú:Ö××q||\ôwbNJÉ¥H©e‰è(Õ˜V«" "Jû "n¬º®ˆ°ÖëES†a†a†aN“‘ Ö••Àññqøâøø8Œ¸2Ë‹²½æyâ¡i⑎–†xÄI#žض8>« Ã0 Ã0 sö©†•¢¨[[[ ã´ÓéàÚµkX]]ÍŒ¾2óGˆGÏõ¥¶-žK’06©{/¥ùÖjƒ»ø¦ÕXD–,ó~†a†a†a›‘»ïîîÂ4M\¾|ppp€½½=¬¯¯Ã¢.:ÌÜS©C”"¤•$¥ZSEž¾;KÚí¢)†a†a†aŠbdƒuee»»»8<< ëUWVV¸ÑÒœÂ%mµD½éÚš0Pëua´r4“a†a†a˜y`$ƒõàà[[[xüñÇÙ@]ªUa ¶Z‘!"Š*I"ºš®Aefñáx Ã0 Ã,#×°®¬¬À¥3S‚ šU:Ž#ŽQ«‰N¾„¦ñœÒZ­†?üÃ?,ú4f&ضÇ{¬èÓ`†a†™˜‘»ooocggN'sŸííí¢¿ÓRày": ã2žÒb;¡(É10„iŠÈª$ ãÕ÷£ôà¬ý—Ïó Ë2¤“/]=ùa¿íÛ¾­èSc˜AÛ¶¼“‹Þ0 ¨''ß÷Q©Tº®C×uض Û¶qß}÷}ú Ã0 Ã03r ëµk×{{{™ÛÙ`ÏM‘êõÞHhˆ.¾ñ×M3šo'Þ°¨Ùÿ–ˈ]4Hq—eÚI{â à8ÇrbÑK’EQ ª*\×…eYNÚ!+Š‚  ILÓÄ7ŠþZ @ÈrµZ…çyPUŠ¢À÷}8ŽMÓ ªj(÷ÕjªªB–eX–…Z­UUaYJ¥ ÙlâêÕ«E-†a†a˜‰Ù`õâ¡=fê0VdTÕ¶]õ¨º.ñm–%R=O쓞ƒ ˆèj­¶xÑUÏóB%Ýu]˜¦*ôªªÂˆ}YÏóà8N¸­V YÏóàº. ÃÀÁÁAÑ_‹9ÃÄË*<σmÛ0 F®ëÂu]ȲŒvF›ìV«˲àº.šÍ&ä“âôZ­:d†a†a–…‘ Vf¶X–¨1M’qÑ4)köiÜx5ÍþM”¡°ïû¨V«ð}²,CQ8Žƒz½¦ARz¤šQˆ«(ÎU%4^fÖ˜¦ Çqˆ4^]×áºnèL‘$ ¾ïC’$´Z­ÐÐTUu Óñ²`c•a†a˜ec,ƒu,Vîd€IDAT®ë†Qª ¨ªŠÍÍÍ¢¿Ï£ëÙм|)õwžñ ÃH(â’$ Uè¦(×¶mhš†V«… `Y.\¸EQQQ†aFÃuE&ÑŸýÙÝå}: 3žx≢Oa抑 ÖìííassW®\ Æ'\»v ‡‡‡\75¾/þë®ýôØe ÆÓ{©Þ”Òx$"M²,³bÏäÆó<˜¦‰F£qª‘F2RI†5MK¥’$¡^¯£^¯ý1ÌBS­Š²E.]:Æç>÷…¢O‰a¦ éH_úÒ—Š>†™+F2X;ööö`YVO„KUUlmmáÊ•+X]]-ú{Í=ÔXI×E½©ï‹tÞFcvŸéŸXÈEÔX¦^¯'Rsµ¬-† e”¹® MÓBý‡JD(ÃŒþ­×ëØßß/ú´f®É`½yó&€ì:Á ¬¯¯ãæÍ›l°Áq"ã”ì6ÛήOß÷Q.—¡iZ˜NKõtT/w¸® Ïó idYF¥R®ë\GÊL ¥“ÓZD²¥ë:*• ,Ëê[çI§i\Lº×0£“ºOK’”i¤ZV4ÿ8yÎâÚÏ›ÞÏ0g »¦ªâ_Ï÷GY÷G@tÎg˜y„J˜Ïóàû~Ø@ô˲ày$I‚$I}KDØ`e˜$cÕ°cee¥çµ~³Y™ÛšJLS‰5M†aÀ÷ýpÌÕÓY–Ö‰ö#˜¦vÔÕã]rBÍ“t]¥iÚXÇbÎ.A„²­R©@UUT«Õ°¦™d«^¯£\.‡ãaß÷CeAUÕDú9ضÝãè‰os¾ïCQ”Ä,Ô輄#*„’â·mñ¯¦‰ÿ—ËÂYÅý‘˜³ˆï‹ì"Iÿ×4q­P{ š5.IâZâÛ3OÄ»¼“QJNOB’$hš–Y Ax/a&#¬X]]…iš¸zõjIít:x衇Â}˜lÈXÍš™:.Žã$¼wŽã ‚Py'Õ¶†r¹¦ŸdÏ4M躎f³ ˲°¶¶UUCb<}%‚pΩ®ë0 #4(â}& ê’Û¯æÔ²¬0J2MòIT×u{…Z­†J¥^dø†Ñ·–”"´¦i¢\.£^¯‡™T‡Ú/úJJ¸¦%GJ¹ntÍ"BdÛbßEhÆ0“âûQÖA£! Ðx’éþñ[…¢,Oßfy ÝF’¤Ä½À0Œ‘ŒOêßÁ0L~FްRªÝý÷ߟx}uu»»»EŸ¹Æ÷§k¬ÆÓ À¦i0M͘&œ^%I‚a¨V«‰ý„ãdâ)*õz=4TŸTCH‹yžÑÌÙÆ¶í°4+—šqµÛm”J¥Ðð¤¢ßx£v» ß÷Ãô«AM\7R˜kµ\×E¥RÁ_ÿõÝøñÿG3hvrz>²8¤rˆ}(Å‘a–Y×A¿’—<· Êâ±MgrŽ!ä̧) Ü.#¬ëëëxÛÛÞ†?üÃ?ħ>õ)ÀÊÊ TU]ÈÚUJE𥳋jqFMûµm¦iB–ehšÖ‚GgÑY©TÂQÃjð4MƒmÛ¨V«Ð4-L­ÐcÄùºõR=ŸeY‚``Ê1ئ Ïó´\JÇGNMÓD½^e«R©ÀH •òÈ®iFiú´«ªª¸ï¾?Ãþþ—Áqžsr¬låÚu³ÕAHR”2Ì0ËÎ$ú=•”AÀ¶—ÏóP*•Ðl63žç…Y3ôœî³6`)8À#ɦ8Îú†ýý}ÜÿýxüñDZ½½íímàþûï_Ø"qÓœÍqƒ(•¢”§Áç`Â4ÍЃH d»ÝôjµŠµµ5T«ÕÄø*ܧڻ<Ðþ¶mãܹsC#Py1 ƒU¦ª‹®T*X[[ d ]×ÑjµÂÚgÛ¶óv©ntÚM»l[\ŸµšH[ŒÎx×»¾O>ùÔë©¿Ž+Q‹J5x£ÖØÉ²pd1Ì2CΠq ‚Žã Ùl†ã£˜å¤Z­¢V«¡Z­†zá8*• šÍ&šÍ&ŽŽŽ¥R ö SVâÁ6V¦?¦)KæáÑG_Œ§žzñhÐgŸ}¶ûº×½®ûÐCõlûÕ_ýÕî½÷ÞÛ½yóæ(‡ìv»ÝîO<Ñ÷õ¼Ç»~ýzæy Â0ºÝz½ÛUÕ‘O¹/š–<^«5ü=ív»+Ër×0Œ®,Ë]]׻ТtŽŽ2÷o6›Ýv»=½“>ƒŒ#/ã2 ùîv»Ý·¼å-§r¾“’–ÍV«Õ•e¹[«ÕrÉ­a]§"ãF·K—™,w»ô‘õºXÑçòÌùMöþ<œ¦¼µ†3ó¢t»ïyÏ÷¼^¯×»’$uÆÀ÷ëºÞ­×ëÝn·Û=::êʲ® §%/gmý> šÍf·Ùl†ÏkµZ×8YpëõzWÓ´n·+þæ†atEé{?HËÅ´8::êÖëõ®,ËÝVEnÊðúÍ,µšXóûqt$ô*Eévßüæ¿èþ»÷ðHÇ)Âzxxˆããcloo÷l£ù«4ú&/;;;ØÙÙI¼Öétpùòe\¾|÷ßî¨á¨8ÎôFÉQ[þxVmž`mÛad²ÕjA–åDô)ªªìé[Š”ï"ð<årkkk0M¶m‡žñ<#cQCÚív'’qšsÜÏÛG|M‹Òrk5qý"b:,Q`’,4Y^žtà³&ãL>|_Èù÷}ßw†%'€X#,Ë ›úõ‹ŽQçUª§fjårårÛÛÛxê©§fúX¶gCµZ…eY¨T*p]Žã„¿#ý½+•J8å€ô¢,¨/ǨQVÏó`šfØ92GM÷J¥RØÓcY=¾?»l¾I`gÆÅóD¦e‹†Ðk²²Ç‚@ì É4?Ž»îzv¤Ï9%x·nÝiÿƒƒìííõ¼~íÚ5¬®®Âó<<þøã8<<ì¹ ¦A»ÝÿÇËžhÛ6.\¸¦¼P÷ÒøM™›K,EË÷iãû~hœ’‚Auª§Ù1Þ±×uµµhÜ ®÷R)2N M‹f@žÆ9æ)˜wΚŒ3ÉñMƒpàöÛ?¼·‘ÑZ­VQ¯×¡( šÍfXþ’üŒ süš¦ih·Ûh6›ØÙÙÁÝwß=³ïɲ=LÓ„¦iaY¥Çu’V«•«ÄH×õpB Hߢ²ªølÔR©N]¨×ëh·Û¨Õj|¨V#Å~кï8âÞtZ†-Ë8Dz„¾”w÷Õ 6Ç:f»-ö¬u?„QkÛâÞtm@XÆ™8–%l˜V+2(ã}=€ÈùB]ïmD©ëÂ)סd9ÒÉ&aä«eY888Àë_ÿú0eàõ¯=\×Åîî.VVVrçÚµkØÜÜÄÅ‹¯SJqÜð]__G§Ó™ì›Æp]™”xdÖ²†·ç§y‘µZ-4ZÇ82ƒYLŠ”ïYây.\¸P ( X×õSs¾˜¦ð’·Ú¶ ÄsIê5Fkµh¼Œ¦ vR5³O×U”é–#Á²Êø²àyâ:1Íée T«â>—å%§¦¦iÂ÷gžyoxÃ7'F¤éºÞ1K­Õjªªz_dÙžÕj²,£\.‡©àº®Ïĉ©ëzfZ05kj4É–ç‰ë`Øv:È”Á¿)㇔úÓªöbgr ×ëѺ—[ JÿÍ3ñDÓ„£&¢²¬iÈõÈëúú:ÞùÎw²,lll`ccW¯^Å;ÞñŽÜÑÕýý}t:ÌZØAE<ª; AüñÆuâ:N2j3,ÍŸk¡r?«Å›)Ž¢å{–P½5\¢Î¿ä„™Ã:‹Z–¸ÞÚíÈI¤ëÀÑQ¾™Žƒ¤ÙޏгÈë2Ëø2À?ü‡Ç¸yóPÕÑ0”-‡j¤²B÷?Û¶¡ª*êõ:\×ÅÎÎÅç>÷ó=µoýjáâF«øŒâ:ͳl燺¾Óè¡8½«Õjh4¨V«‰èê´¡fñó 1{YÆÍ¢ÉœÔ9>n”Ò ïFc¸þ8MXÆÇÎøQÒÄËåþ™0YXV²ŸtüÛv$§yÔ<Ã×È´³Õ†Îaít:8>>î1Fãi=£Ðét°··+k>aÐqëÖ­¾Ü'Ÿ|=ö±»»;ðh.*1ê½Ñq"/3ÝØi4¤ã8a£ˆ¸!J‹h<ʼn Ù‹aoûÛñœçô¡ÍäØó°†3”NX«E F¥Ȳ‹}èg ªo ®ß~¿£ú%ò¤û~¤¤§•EÇ¥&Jdx®­­åš G’$´( b;;;¸~ý:.\¸0ÕßrÙ×ïiB5£­V ŽãÀqX–6×s]7L#¥Üa5¦“¢i,Ë‚ëºð<Ad¦²’¦^ÝÉIºé~Š]3õúp‡gúVY«õ¾‡ÖïÃÃÃÜ ¼ðú½PÓ:UúP¹œßV‰‹<õÖé§¢Q´ßÒKFç(ÑI~×é࣮ßC VØÛÛC§ÓÁÆÆTUèÚßßÇÊÊ \×…ëº8<<ÄÍ›7±³³ƒÍÍÍÇ^]]í»íå/9Þð†7dzŒÒøþø‘êôFÜôò,¨ªŠR©EQ Ë2TU r¦x677±ººŠ7nLýس’o¸ûî» ½P…ˆGA¨N'Ï‚æyb]Oî÷6ÂKW.‹ý‚@,¼´°.CR‚mçÿÍFeww[[[39ïyXÙrÀ’‘£Çó~€h†&Ë2t]\_¤ʼn+Ü”šHÈrtM¦©Õ¢u!2N%üÖoý)¾áž7³ï¼½½K—.M} _æõ{šø¾Ó4Cg%e׸®vÝ¥9ñÍŒŸ%š¦%f{r˜P×í¯šfd˜JRò: {R³)F£dæX–8ž®g;-iýž¶± ðú½ضx®#IBF‡9¹M3[¯²¬¤±ëyâuEÿÔ‰¡ã\ºý®‰quð¡ëêêjè‰q]ûûû¡×‡ ØQ¸téÒÀí”Oßét‹#þÿiÉÊ4Åó<_¥RéÿÇuŠ¢ V«¡V«Áó5ÆnZ¥šuº§Ù¶ßüÍß ·[ Ëw㵯-ú—–ílÈ@¥¿9upN ”yGý:Š`ÐçRvÝcÒˆ8ñlŸìÏ÷¾rY¬á£Ä#4­·¡ÍiÁ2~º³QÑŒ£ªBû¬®+é(¬aDzérõ¼Hçv™Ô÷.“\Öè‹Ò€]× [bb¼RÝ+A­²ã^™ ìï¹®‹ò( ÙCH7 Ea$¥: ×uMgE)¬Ë!S ó ßÓ‚"3€P^Ŭ:%tÚÈrä l·Å¢à Êô½^Qô[BÒ©Áˬ2ä|x챚ɱ—IÆ Ó4C2î9¿páÚí6$IB£ضÞ¯³Rÿ¨JÓ„ÃèùÏÿu|åWþ>Êåÿ#Lå¥ö¶m#†v}¥ô_×uñƒ?x ßÿý¯»qÓq–ílÈqAÿWe`ƒ½ytΛfo45^Kš¾oQšý j5qÜQËÊd9Ÿsw°ŒŸñÚÿa)µ¾/ö×´l燦E£f²t¬~rH6 Eg)²Z8ŽvæÖ4 Š¢À¶müÒ/ý^ýêWt¬‘ Ö8qãõðð®ëbooW¯^8a{{[[[888Àññ1VVVðÀLíL¯³yÿxž7(\Ì÷ªµ 1 ìå{ZT«Õ°aÍ &£’®ª'¥EpÁJµY×›,Ÿ^wÄyA¤mÎF¹—eàû¿¿ ˜ÄE‘ñy…ÆPó Âó<üùŸYâZòNr©!íGÊ—ê T*•NŒO • ðƒ?ø ~ù—ÿ~ï÷„Åiš&<ÏC³Ù„,Ë‚Õj•J%Lõ$G,E^ÿüÏ#c À~I§ÂY“mÊË[_<úñÅ¿y|ûãd¤PÈÎСTËt¯„ôÐCS=‡4Íf·kÙÛjµZ×è·‘™;NC^1ª|w»Ýî[Þò–S=GI’ºÍf³+Ër÷/ø¦n»Ýžèx†Ñÿú9‹ ZOÆ¥ÕêvëuñÿÓ–—4ó¸†Ï#ïyÏwŽz_o6Åß³Ûív !+·ßþîw<Ñ­Õº]ºëõz×0Œ®,Ëá{ãÿ×u½Ûh4ºÝ®¸O©ªÚ•e9¼žu]ïÖIhP«Õº-:¡ŒsÕ´è|Oƒ"åeÖïi¡ªj·ÙlÎìø­V·+ËÃeGÓÄc”ãªj·«(ÑšØo¿QŽ{Z-/¼~O³)ä.-Ó­V·«ëÉ×êu±oÖ= tý {½>úú\¯×»²,weYî†Ñ=:ùv»n3 £Ûl6»ªªv5M ïEš¦ \7Æ‘—\ÖÃÃC†N§zk©žòÚ)º:­îw+++‰´…YBM`†y7â©Ä¾ï':â9ŽƒÆ2ºÚ˜™pšò=.T¯äº^óšÛxšÒ^]w9ºûN аN ßO6­*šEñ"¡fIÿò_¾ _ø‚Œƒƒ•„ç;í-¿páð?ðkøàºÞ =ÞM ‚ ì£o6£ªjèÕ¶m­V ®ë¢R© ÑhäÎ Ô4PU…—¿RA"5xY9+²í8$IšIŠoµ*¢šŠ’¯©¦‰Ç°ò¶û75ŠCgq¯T'x–8+2žšg=ìþJÙ1Y‘UzNÙ(ž7zÔ“jU©&•îùÃÞ?j0eÚPf…eYX[[ ³kEILˆ û 0ÝÉqr¬†a`}}=Q Cµ«»»»XYYÁÞÞ^ØelžÉ SQô0âï¡–îÔ|B’¤‘Zø3Ì<O#¬×_ˆúOó¥b¹n4:£TJ^kƒZ«ŸEdy|Å(=|žšXca î£Í&ðü[|Ë·|Lóµ¨ÕD—fS¤ZQ'nѤ¥‰õõ¯ÀW|Ed€âZ%ƒ“êMã5ÉŽ[Ó4 žç¡T*Mmæ©aL×Ã)ªf– \­ŠI§{‹ç‰)×ñº¾øZY­Fk9DÓ÷–Qª³(-˜F€0LߺM¨9d?}‰ä2>a½HQĹP{ž”ã¼PçhÇq iZb$5“„>ã‹g¨Ázpp€N§ƒGy$œ¿t||ŒÃÃC\½z5|mss;;;888˜k¯L|1$T5_Ƕ¸’WZ–eØ¶ÍÆê¸˜–¼æi‘pêÈ@Qø¾x-¯!¤ëѵDc˜þŒº¾Ó*¤ùêæÇ †fœzžEQð¼çý4‚àµ(—©¤fðÄ#©Ôøˆº°ÆçZªªŠjµ ß÷Sr¦:ŽƒvìB&åcZJFÚ¨`’­z½>RÃH×÷<ª'Í2(e92f5-šÊÞ‡ÐÄ÷¡Æ2}åÞCÎÔ˜> -9) ® en'F0}p¡ÄÓ½Ä4£¿' <ÛªªB–eø¾Îø¦(9‡ƒ ¤b_±øº®›ÈîÑu=Ó%…aú‘ˆÈE:EÇêADz?,h@Fk¿ñ´O£!ö‹ëÚª:Ú„qÎŒ ¥óú¾ßû½—ÌݼlÄd ÒºCN´ºŸ01¨™•œ”J½™;42@Q”Ð`ð}?œáíy^82-®Xжt„U’¤ÅÊøaæ†jµ:òX>*¡ûÄ rWÏÎÐ<™†’4\ùWî‰ÀÌUMÞƒ)⚆fœã8(—ËX[[CµZEµZÅÚÚJ¥,ôèÍ¡)ÁëëëØØØÀÖÖ677ùLt“<>>Æ7píÚ5lll„]ƒç‘A•ÒJ¡aD Ò zÞê4íl"š9®~dŸC;9¯ISƒý“㨘Nô÷ bÛBöƒ @³9¿×ò²@¶E ¬_¤€™,ÏŸÇ–é9D©>UQ¢Ž’šüÚ¯½ï~÷»!I×Â÷Pi‰ªªaSCêô+Þ§ÁqœÐˆ%dY†çy‰Ò†Ó4ÃHÿ(P pE×2f‘ r:a%Ý$©\×À4–aÓ4ᜤrªª UUGÊæ´m¶mgÖ /ýbh„u.ªª†£®^½ŠõõuÀÃ?vž÷zÏë/Tñ4êœc#<‰TG´x¢ ‘>;é±$3ó2¢zZc ç7Rå“çܵr(A„]!i!ö¸Ýç©R. ãµVãÚßE…ÆsÔjHtÔ'þìÏÞ…ßù·…Ïã „hp&°xÔTÓ4¸®Ûsß¡ {/b !˜¦ Û¶áylÛÆÚÚ€Ñë™)$=q¡šÊ–rœd<Ã,¦™}/–åHO¢}¦qϦFH­V ívš¦Á¶m”Ëeø¾^»kkkaµ\.‡F®eY°mÍf3ó¾°èÆ*s¬ÍÊÊ ®^½š¹mss›››sY%D÷ÓáûyžXŒ£Vë‚@9i¹î,n:0 ŒLÂhäk'Ç™¤¾1™•LÚ=?ý· TNŽ;©óŠeY‚õz=\ˆY>\Wdrè:SXThÖp²ÎÉ„¦i %ŒN6B%I kŽâ³îÈyÔo¶*Ãä…Æñù¾×u!IšÍæØÊlÚÆõ×É×L“ço3óƒç‰Ô°À%õȲ¢²Áqd›"¡´Ö˜ÊÏܤ«ã8¨T*‚º®':½S¹eÛ4›Í¹ìî;-r¬€¨Q½qãàÒ¥Ka ë"ªÄ aÓ¶-PQ’s½nÞüV¼ô¥ßƒ/û271›h¡ {’ŠÉ;Ç£™düNÃãC—£^snÆçSº±îBœ">|”dÊ(ðó %f¦5jàt¹ÅƒfDÚv²vjQ©>5ÚßG­V ›ö¥ VEQ2;üjš–yM҆ɋëº0 c CrmM¤8­~ÛÅX¦hû ™” ÄbYù~tfbLsð¦~s}ãкßlŽV·ê8N˜~7.õÅÑ4­oZ0ÍÔ^˜&°¦)<óc.¹R‚wvv°µµ…½½=ìííakk ;;;Eõ‘ ¤®ú~´R‡e9zíÊ•¼úÕw@Ó´Åõ`ÄËn§a°Òõ`¼æKåŒ×<Œ×€)@¶‘[;9¦3á÷]",K¤ RÖ"EpþÇÿøK( zêå˜Ù‘§¹3?¸®hžT*‰ÔÇßýÝÏâ—~é8¡´ÐõO«§ô_Jñ;Í×0 BR¯×Ù`erC¥ƒÖvÛÎŽ’&Ó¿y%¬”ÌÆjŒŒR°ý»iFÖ3H®%šú‘&Þ™¿º>ZÝj¨T*aÊ®Èf‹nK¹Ž[V²>Àu“©«cË`ÝÛÛÃææfØ•pss{{{=ãnæAë@z(°,‹×žóœ§BÁzà¿—©8,,&Ư•ˆ:F7ûñ4ŒgLK蟞\?ù®|@¥"2­–¸HQUùÈGp„•aú!Ë¢éF«%t̾°†7Þ–ØÇ÷}†‘0Xㆥy¥›`ÄëVãô›“ÇN%flÛÚAšæMja` G]±¹ BŠA?Š,Gi'Þ°—ÿÏÿYô/%µZäTICå€ÃÔ|Uí ‚ ¬=¥G©T‚¢(¥ßÏ=i‹~Äj5*nŸ°YíPƒ•ÆÕ\¹r%|m{{ÂQ7‹B,õ»‡x!u<Âú×ýáoù-«×ŠþÆ1\ˆzÎ8:Æ'£#‘CÆâ(£d|ô±C黣˜ê€ãÉ'Ǭä?ܲR«E]îÈ`•e²,ãé§ß]fC2 # y¹éäÄCÆi»g¬YŠK«ÕZÜ f®¡ÚéAPwëxÆYj×Izé¢R˜ ¾/ÂvÃ~”ZM(ªªŠo≢Ïzé 5š–mpV«ƒe{Ô©T*Á÷}Ôëõ0šÚjµ–+Ø•¦ZÍöd‘¼—JâGŸÐÉš+ $kUWVVŠþy¦Žï'=.¤K¼ím¾6òo=Íy D-æ¸Hèm²$Ÿ¼6~D”5¯‘™'ˆWð]’ÏU;lsÆ£¬q¹Ñˆ:’*Š‚§Ÿþ³“4ø3þ#1L Ꟶþû¾ßc°Rä4>[5n°RZ0GM™Ó‚²fúEw‚ ™"Y«õ¬A¿qåR7óýÁ9ÑD|] K(¯3JÓðÿ¼öµEÓ¥"ý'Kûn,kpä´qCNÇZ­^k²,/¯Ò¶…1 ô·ôëum™Üë¢CEÒýˆ¬qÙzÏ{¾ùdÒ*cz’aX–0Þ¨ÙóR Œg§£ÇeÍ{¬x l?nû»À÷løñÊÃw3Ý-ض“£šD—;74Xÿà¾ü¤GX §Z7žQ8ñl½8A„Ù ää¡×$ Ötúoºƒ0ÃÌ’aÑÕt¶ª¦õ¦¦Ù¿îo©!å‘ ÕrYüÚ¢X–è`U­ ¥rQã,!Ž“Ô}€(è ˆgx*°ëº0M•Jår.\@¥"R÷ÈP](ãÔó„<ªglû¾ˆ| ˘’ë̬4[54ì^ô¢}H’4úïMc_¦•@„Óîv;n”5K×¢GyÈ[&ùÛ«ÀñWÿjHúù(Ÿ¯9Æ«þ÷ÿŒWÿáŽþ{-0®›¼Ç>úè_â¥/}Møüüù€÷½ï“\Ã:)®›à8âæàæ(¯×E‘¤ï ÃuTMÑuñÞûÞ¢‰…Ä4{³›²tŠšö‹¤ÒlÕ¬F7õz}h=!ÃLŠïûáA+u+×a]W,EgRd%I|yj Ój ãsPæW¼Ø]–9?ú1Í^ûKU{ÿ²íçyÃ;þV«Uض Y–ašÍ&ŽŽŽÐl6çËPuѬdAÍÔó¼HÏäˆqa¨íSŸ{¬ÍÖÖVÏk;;;xøá‡¯íîîžÚÉ‚¢ þ]i„MšW¼âÿ Yþ!xÞˆ)Á¦3æ%}¼q_æÉ{³Þo@¤ßŽr3jŽøzâ»xÀû_ø? žÓB ôØ _¾ü£——1¼î•p]ñK/"žH¿Œ/¾àGðÁo{^?æÏ¹ˆFR†÷önâyÏ{søüÕ¯þ">ó™Ç8ò3 ”KGðh¡¡Ü¹xŒ¦ y´,qW%Ù”$qÓHGµ%I¼Ÿ”%מVæ8á]ø±oú&|{Ñ¿ÏBé¿årôçÌRÔ=σ$Iežç…£jHq¡NÁéñ6 sÐìFEQÐh4Ê`–-kÛbI’$±ü,ê4¿¨@·ŸBçûIe1½Ž‘Â_³³FvpøSÇ0„íEc*þ·LJ}³¾ï£T*A×u†I’P=qV4âѬyÅ4Å7ÍáÅæ­Vý÷¼H Aµé÷{žpÄTº2Ô`½xñb¢á±¾¾^È ‹$ Öóè÷'½“þNþð_‡“‘ÿF„¡(c4c°ñûÍ 4‹AA³iœŸèà#~¼/<8xðÚÿÖ+ð–%.†ÿh9‡X½ùµÀ×oÿ°Ô}à¿2ù>lj¼ 4ZU£º‘øÝXQ€;8nLøŒ´ü~ÅW¼¯~õ«Ãç?öc+°íß(ú4šŸ§iâ.™çfFE2ÔÅM–Å5cšâõ¬Œ, mѲĘžÇqœhš¹aªŠg2œŒÌ`(ŠdâÏbYýõMß÷ÃÔ^ûÄIA"^–e¸®ËÑTæT¡6íA&¥Ef-;ô-qKãs¡/MJ,‹µ7¢¹>ÃÖò¬EÁq¦R§ÇŒÝBUUüù*ñ¯iöw¶dU>™¦‰z½ÏóP.—Ãì˜ú"DÉéb­Õ¢ÔõA©èq=:ná÷£`ùj°®®®†]ÿ3lÑGà Àí·_†ãL0ÏYÅtf€êHÖ_ÒqóžW0dßQÊ+(ÝB?ùÁ,+º0èGþý¯þ×ßé}ï¸ÞÆk›À– üï×ßýÍèBó}àÿ|1ðÊïøbÒÃI5'ÃÂëgQ›š”ãv»‰ŸýÙ>ÿË¿ü8Ny¹»Ù9a€ìh=y(G%~cPUqœa©Ø†Ñÿú¡;õ¼¤%-(²ýÄÃpмUI’Â4zÏó×¢(°,k1”fiÈÓˆ|\Y_Íu³k¸–ø…íyÑ—ôýÐÙ7¼öý Éž"¿m©4Z6¶ã8‚š¦…ÍñlÛžÏõ›RH·%‡ ]¬‹ ‘Ü)ÁÄÁÁn܈BS—.]ÂÆÆFÑßc(q{ª§u^Ô¯xŵñèç4:§Ï{”èj^,ˆ”ÛaÇ¥†I¾/¼8šÖ;3È€oÍ~¿alâ¬QUà=@ýÁä!ËÀkåì߃êH˜°2~/ö}ŽÕgÿôO?SÞ_òb¥Ý´ÓŽšMòÛ³²4Fɤñ}¿'ý7þ°™›ú&æLàºîPÇ#5š¤‹÷‹¾. y¢Jy {3S(>’–IEéuªèúð?oa£<êöÛŒÝçUUÏ”4g-®ßRt5^j´dä6X]×ÅC=„N§“x}oo«««°,k®Ó„}øz’Ö?}ßÇ _øÙé À0Y‡ZÙž|µ²y¾ƒ=v}RšR³íøw-ADcõÔûG…nÀä ßèÃeúBÎÂ4Mhšljä^’E™ÓzZX–×tôò?) ­iÜ(ãŒáyÔl©Ã0Ž˜IwýU%s¶ªªªœ±Àœ:Y¾Ò˜æð„§S]³j@'%ž%3Ëûšï/¥0oXVïÏœ.9Ž3èð<•J²,‡ŽF]×gç´§ÍJwåÕjvY®ç“ÁµµÞÓ‚«KðÁÁ ÃÀÅ‹aYVØHÂó'/5$ bgz\Mâý!Ç뺨Õj៕²Q‡)ê MµHe£NŽí¶øw™v&r긮výí¿¯Ÿ¸NÈ`ͺv¸~•9M(¸RéßÈ–:ÆÏhÒ°cÒ'(‚4*¦)ò@Ëeñ¨TÄz?²—óœéÆ9®ÂŒ„a™¥¾Y€ø3:ØÀqT*4 4›M´Ûm4Ù9ƒ@œ(‘gŽ/½dײÄ1t=Û“$Ë܉ê‹'—ÁzíÚ5¨ªŠÝÝÝžèKüõk×®ý}ú’G =/YÃJ^Eãoì!‰Dƒ3/ú€×‡Í>%¢I³cé=ñ9MkkâßQ#¤2„Ñ DFë¤Ðº2ínÌKŽïû0M3ìvGú5e ,åk #Ib¡§±qò¸3K¦ª@’$xC”e×užxUU{¢® S®ëâ‹_|` £}XÑSǶ“á^MÆj5RÌl;z-ýÅ‚@¬ñ€Xß›Íäc_Öu“sQ–ÕÑ;GPóTê1H¬GYvÇeYh6› ãL×nJ×$N:={'m€Ñ‚RIè1YºK*•èX <xhJðÁÁ:Þö¶· ÜïÊ•+¸|ù2ç258nšIÖZÂÕpÀ@õS1ܨF?ƒÕ€H¹õÐ?Š*!߸â'ÿ+ð;¯~äq¡QkúqFÙJÔ! Ö€i5qpÀÑÕ!ÄËk*• êõzbq^Ú±«ÔÅ—:餻E3Ì A@³º½“TygÈþA‚£( ¬Láˆì€z84 EVçf $E:®hQQ"Y&ž—l0’>ùÿ?{æH~Ö÷Â_¯×6¶±ìZ;ë—ž5¦ÚàU<à­Îˆ;S2OLvqJ!0dç§Ó¿OÔi¬;r¤ªƒ{O†£VY€"©AªU¡ÂÎbw–J%T«Õõ:‹Ea å›,ž'B«^® …0ãý¦¦çr9ܺu+éu.̤ý„F'WPtun¢RÔçm6darTS…0"Ç=Ç 3MqE¥ B¸ÿï[vÄ…´¨Ã'Û×Lê¡>4)EÓÄŸt\#L:ÞÈCµø85̙ŶmüèÖýÒ"Q%jSEªcµ, º®£T*Áó<ÿž„Õ5iÏÚjñ<µZ ò'ß5èO*¥è©ÁqFÓ³Hªò«7ZmÔÉBãFã¿F ’$Å÷´4h8lÔ{’b¶ãY’D„7UžªÙ™aÍçóèõzèv»£¬Ôp)—Ë%ý™"÷w²m†aàääž7Çqó„Ƕx©°ó4ç 69ŠþÀýÏÞý@ýâèð¾ßr×ùã:¦Í¤4ââíÿ¯7ÎzK5–%ŒÐðØÎ(ƒ•ì¼Ì:ˆéN–Ik›I’ßú­á?þÇŠûo÷Šœzž‡âmKÖu]ضÓÓSȲŒ6N“À²„q3Kà¢X,B×u¨êóÉPÒpIˆe¥È6¤’f3»ŽEYæúÕCøYÕqÏó`š&\×…ëºðËuß4¦dŒ4BsWg½ì©±’ªªÐ4mù£ú þLÀ(ƒÔ¶Ås2:>&­ÄšÃzéÒ%ìííáÂ… ¸xñ" …¶¶¶ÐívÑn·qtt„[·náÒ¥KIžH,Kì)QJ¼ëºÐ4 ¦i¢P|ãÌ1›™y³FŒ6] z'‡Öù/ED6ˆ @þàmß2û{;üñç¾2‰@6\”0®‹©ãˆ½5G÷"ÞéZ-ã7“$ŸùÌ‹ñö·¿¶mC×õ‘ëƒêX-ËÂI@cb£uMÐfFYAá†$ ‘%æyt½ryØþºVnW‘=ØÕI ø}î&8eЊY:Ô\w–˾\.Ãu]4›ÍåתÒxUlŒª*póf2_ÚË`Íçó8<3¦9,ÇTB…V¥ÒÀh•åa%È4‡»¤1‰„ŒÏƒiš¨T*þ5ày^÷ºçà[¿õëñö·‹®4aƒUÓ4”J%ލ®j[>­ÏŠɊlS§ëqbJs+WÞl)2ãkÑåL;MÓ.ã³®ÉXm¬B™¡4÷p¹³Vb¬¸ ç„÷z=t: …©¯ÑétP©TÐívý׬ÕjþEÓívQ©Tü‹IUUÔjËœ…"°mÛWPt]G¹\†,˰m'êÉrëM ƒzP ¢“èšÐZMÜiJ6€—İÁ2owbf.’’oÊ ‹Ò¢Æp‰«4V)¸¦aÀ’¢ªëƒZ‘ph@’¸3c‚¤eŸ… ÓÆq<ûÙï "©ÔË ÜA’-eÃÃ2S)]×ê<Æe­&6®àµœ˜M5Ùvg¢’®ª"‹qåÛe8S†I-YqÈ/³¦iÂó<4W)ƒl¬&ÎÔ±6qét:ØÛÛ‹õܽ½= 8Žƒ«W¯¢P(  `kkË?ÞétP¯×ç^[±=½…æRð•Ã0Äæï`þh¢‚ÉMCˆ1åÐqÏÑ%jJð3/™oe š$1kcÝòdœÞ2­&»\Æz‰epó¦¸)ßj5zNÏWM”$e|VlÛ†$ICsµÇÁéé}à°‰ºF6ÂX„¢5¯Â•òX* ¿ÞÎÎ`LÚ´Iφ.ÝãRÓ¶V%ÙD„5PÑ4%¢p™%CdAÆgÉ0MÕy<2q‡ÐK«)`ik\Úí6z½ö÷÷ˆ18/^ôS©‘ÓÅ‹ýã»»» ç¤Gé¶á¨“®ëÐu](--ˆÑ4óD'%Ä7ˆzS‚Šºå¶ÇRš?BZÅäù­ÌÒIJ¾É{jÐ9¦\^óxCÏtØc2G’2>/𦠬ûسð·ÿöçˆèÁÂcÌÒD”wVU3Nâ`YbcØÙ‰ÞšMñC³³šMa€Ž{>uvÛÞÏ£ÙÉ)i’5Ùv/|áÅ©ºôééœ_spˆ+Q« eí^NfdEÆãÚŸÔÙ}æýÛq„—JÃJÍŽ’}&QfN ^ª… ŽÇ¹qãqaÐÿƒ9õù|ÞO]˜‡qʸã8C^¥‘Â.„Ñ:«^mãvgޘϗ ŒR@©8\£êÜ~½yÒyh–*§¯$ä›°mˆ ïÛÔ”cãš8® jýÇd’$e|ÈYãn§§-4ß `a]ë@ùE F2©ÆÛq Ë¢j uc;ó¡‘$š?5WQD*Ôü8* ¤ç¤È@ “EÙþ‹¿xÛj¶SˤlSó+Ú»Sæh`â“Ð\hï¦}Ò4…áêyƒý”#ª©cíÖ\.7TçÚívQ¯×±»»ëÊG¯×›ë=Çí«“¢N00˜›:+6FGËLÃðúO;¥P#,–Ò;K´—Y˜$ä›0ÍèàÊ´èÑZõ×MÙôzfV’”ñyp]Š¢@–eßhýèGÿ’4<Â&€k5¡8mo‹Ã½¨) ‡ŒÛ¶02©€ýä$Ú¥i⹓Ò)(jÖlÎîT’¤èyZdøÎ[çºF²&Û¿õ[ƒçɫѭ5MÈÒÉÉ`Þh«5Z¶ÁdŠ,ȸëÆO™Û`U”Á>I½3NOÅÞW­f{>ð†²vƒ•èõz¨×ëxàP(ü®“.ˆ[·n=ögögøà?YGµ·N³J —æ1Xg‰®úïg_ÿ>à¾{øqËëðËŽqùòå¡Áe³lù€'žx{{{8>>Ž<®ªã.M’õµviwž§¸öööpýúõ•¾Ç:÷ðE‘$Éo®Ÿÿü;‡2¾¨$Q( ­RŠÍ~ מJÒ@©¢2$¦ÕvëúøS2V30³^¯ãòåËxüñÇWòúIìßóð;¿ó:üƒðéÅ_(Ø@+ 5Àc%~mÐþMMVAš÷︫çycçË3é…tðY÷ï©)Áq‹­gñ¼t: —ËáÊ•+#©ã˜ÔZûÕ¯~5Þò–·øyùÄ8gòD!ÎQU!Ò‚gqàÌ£÷hÚà=‚#i˜¥CžÄk×®­äõW!ßpÏ=÷àððpìñ(}Ûqض=±‹¤$­Ñ~ÌRÚe†9<<\‰áG¬s_Çq išo°Ê²Œg?û«†¢RjÒé_®+ Ó`Êì¤NØó*hº.êLUu8åRy3’ù°¿¿óçϯdOjÿž‡}l?üà Œñë:Ç0 “”|;ΰ²nÛ6lÛÆÉÉÉü/º,l;VÄ̪HË™o6SS¢&/›)ɲŒ«W{økmí ò™ &ÙöŸÇÃ?ì_XóÆ„aŒ–ëLM† Öeʷ늚6Ý«ÞÀâéÅÌZY·| z(MÓLG*0Ý‘œÁÉ,—$eÊÆ*3Át`×uaY,ËB³Ù¼Ýå:¢žÏ0„s„r&Øö¨ÍIjKP}1Mªª Y·m¡ÔϺ«ªÐÇ£Ú0™bªÁºµµ5µvZ 6¢b¦{·&e¨·J3Z–üܽÀ£¯þîÿ|÷}âÊ£IbÅ…YÅ¢ð‡Ø¶½üq6ž'êDh8š¦qÓ&1ø™][[¿Eùùõ-ζ­/f\w ´›¦‰J¥EQP*•P­VáºnôÏÍí˜ V‡)K,˜oYªt€ŒÎY©TD”•Ë•2ÏF×°FEX'FŸ\øµOŽoº¤ à&5 xÛ·ˆÇ_ÿbqqpÓf ÐoÛö샴ãP. Cõ䄽”LâÄ "é:ðÉO~h=‹ªÕD]U¸µ+ÃÌH°>›2fEA¥RA©Tm>p#/&3D5 Bj³s[÷ûq4óé’$®qʘÌÚ9¬Ë€B„ëºSº›Ày9ð7hò ›æôÔM‹ž~Ì0KF–…(†=_³%jËu. þcï=“J¥ÙÊô¦–,Šë K’„C‡fANOÅ¿–e •0éºUU!sôžÉ0áqdUQ É4ÍáÓ"rÏî6‚Øk§ÓÁÑÑ.^¼ˆ|>?V1vÂaÍîfíNßèm8ý‡Àßþ4€—E?Ç4ÅOœzTVò™5âyÞ|ÊL­6è0)Ëbs§ÚT4Ï0)`V'ùJ VÃÕy=ÿ 3Ó4é·ñ÷wRnØxe2H”Ø*ŠP­Eè2ÿâç~÷°îÁˆ•lÛ6.\¸€N§ƒ\.ç?¾¿¿K—.¡P( —ËáêÕ«Iž!*•1uÈ6ñu_|ì·¢9ŽPî[-ö¤3©Á0„½éºîô”ਙe–%dúô”•o&ÕÌR†dÛöj V×5Q€ˆªòõÂ,‰ZM¨®ëÂó¼ñ: =‘a2†1fl$%rIð›ï{¾¸µ•ôR™”Ë`=88€ªªxä‘G°¢|>ÝÝ]âüùóx衇’þ=PÎ-+ÚûN½äƒ†eñh&˜æô¶¶mûe)î¢=Gd$˜¡.|<{˜YäŸ8žÌ²Xþ˜L"Ë“³dlxê©§ y^qáBÒËeRÆTƒµÓé`kkk(² ÀO&èø7’þL„® 2õ®^9RüŸÿÛ°+ßuÅ‹qM*“R>ó™ÿ6H#ï»a ®¶-ŒS*!xv“,kz@Ó4M”J%8Ž¿ Ä4uÜŽ#Üü†!2jt:ÌÚå£elF˜ã°ÁÊdIš¬†W«ÀK^RÅw<ïy¬—0#L5Xs¹nݺ5òøáá!òù|ÒëKØ“óìÏæŸXÁèU*q*0“Z¨æ €P¶GlöbÖÍð“)‘ž8ÍÃ&¸îô-Øq4›M”J¥é]በ£§VÔys}*³FhøÀÔñd–ÅéÀL&W¿äƒï¿Ø·YßfBL5Xóù ˲ð¯}-žýíßžôr™Ë`Íçó¨×ëèõz‘Ïéõz¨×ë( ©‰ºŠ®©âÿŽãàÕù—GÓ†›ÏXÖ`^B9âÅØÓä” ŒË²<ì}ùeI/‘a&ŽžnÛƒ9ÄŠ¢àæÍ›Ñ)Á4w˜aRž˜ÊÎõ«LF‰³‡×j5¼å¥/e§ I¬9¬—.]ÂÞÞ.\¸€ÝÝÝ!£ôÚµk8>>öŸ—LsPrêyòù—ÃA«‹âÿ¶-<ê¾N˜ !ËB”wv<ÈÀ ÍAQWN!c2L\ƒµ1nn_©$^@UÃþ¸6•I–eM®»¶m.ã`2 uÀG­Vƒ¢(xª²ÁÊDË`Íçó¸rå ŽŽŽP¯×GŽïîîââÅ‹#™’Äó‚¥I6¾þyÏ~‚$ ŒVŠ *`ƒ•É.Q£TUhûl°2fš;Ž39*EÔjâ_6V™aÛB_qG”vPÁv8»‹gÀ3ÅuÇìᮋgy¦iâää„å›K,ƒ]€/]º„ýý}t:ÿñ|>ŸšºÕ ÁŒ¯W|ä#øâ×|Íè“Èh¥t`_+L†0 @Ón§GÝTU\ˆ(ª I• ¢©µÛÿç±{LÆõ«¬ ³¨ê°ÍY«Õ† Ô*ÏLe2ŒãÐ)\³Ê0€i’®âà[ññ’}ˆUfff®aÍAƒD!· ‰a*Ðo?ÆL&ƒPgI×u3…fÃP”aÝÆ²n7¦a˜ À¶|¤+º\³±Êl µšp:†ÿñk¿ÆrÎÌÅF¬>.ð7~áëðà¿8ˆ¨2Ìáy¾û%/áÆÌFâ8B¡'LÓ„¦iÓ»Iº._L&Ð4à®»>0yþ*Ãd˜FC8Ö=Ïãl0fn6Û`5ßö¿ÁËŸûܤWÂ0K…¢«–eá­Ï}.{,™Ä4SÇ‘¬Ç™¡jYl°2™@–Çû+òÌÆ¢ªBWÑx<³m°þÅü |öÙǸ«XLz) ³ThnŸmÛxý_ýí`6ÛŒ6MªªÆk0fY|M0™À4¯ºz•#¬ÌFbÛ"á… VfQ6²éÀþèUÂ߸ñyV\˜Ãu¿õ·>‹÷ÿÌCîŽ;¸³qmNÏóP«Õprr2ýDš½ÊדrLhµ/})é¥0ÌJ¨Õ€yÕe\xæžfÀ,ÄæFXMàÏÿaôÿõ¿Nz% ³txÖSÀ¯~á b ÃlÁè*užZ» K—=ùL¨Õ€ïyêŸã¿õ[“^ Ã,ê?ðuüKHqJ9f©7X;ºÝîl'9À_>¯ éKŒW~ÿ÷'ýf,sÉ7€Æ?éàëßõ.ô¾ç{¾0)f÷<¡ì¨ªhÖ1Sg`6X™52ïnšB¾ßð‘GðòüÁ¤?ÃD2¯|Â!S)þ>>øW…|0éÂdœÔ¦w»]T*t:€ªª¨ÕjñNV€Ï<ùÿÁÇJ%œOúƒ0L É7å#áï?ç9øùŸú©¤? ÃD²ˆŒmNÃ0P©TâEW9˜Y‹îáµÐ*þ$~å9ÏA™#¬LÊXT¾MSlÃçéøé¯ÿz|Oœý›a&ÚëÁÁ¶¶¶à8®^½ŠN§ƒz½ïdÇÁ“·þüþÏ“þ ÉBò ÀR<ÿþû“þ 3–Ed\ÓD¦;)H~g`šsœuC¸.`]eÖÂ"òmš€úõŸÆ ~í2®çw&ýQf„Eu”Z ¨¼¤÷zxù÷}_Ò‡ÙRi°v»]´Ûm\¼xËå°»»‹V«ë|O–±ÿÜçr7“J•o@tæ1LZYTÆ% p]¦i¢ÑhCµXé8ƒU’Dz<§È3+fQùv~ï‹Øÿ/ßwårøß9K†I‹Ê·mÚÿ^öïÿþñ—}wf–B* Ö7nòù¼ÿX>ŸGïº.þÖ?úGIŒØ´Ûí¹k’æøø8é%dŽEå2ž¥›@Vå„nÜÌl,*ãOÿ÷ÿŽUÅ‡ÞøFHå2P.‹k«T«â'Œ$‰¢ÀRÏx?[,*ß·¾¿ñW‚õk¿/Õ=a²*#¼ÏÇ¢ò­*~øÿþ’„_úíßN½Œóþ Ri°Nœ^¯7õ|EQâ7èH×®]ó7ˆ¬aGE:˜‰,*ßfŠÆ¦¬ÊÉ7píÚµ¤—‘9•ñ=õÎ]¼ˆW\¸ "¦''©OÆ{øÙbQùþ7½¾íƒÌLXVe„÷ïùXT¾;¿þëø…¿üK´Û©7VÞ¿³B*›.Mº nݺ…\.7òøþáâýï?~ù—÷Þ{oÒa&ž|òIœœœà¡‡Jz)3óÄO`oo/éeÌÄ“O>‰O}êSxñ‹_Œýýýµ¿ÿ<ò ˆ¨ê›ßüfÜu×]¸ûî»×¾îEÈ¢œÀç>÷9|îsŸóOd…ëׯã _øBb↓=ü×É{œr/2ïáë_s¯×Ãk^óšDÞùÚ¿¿ê«ð³?û³øÙŸýÙDÖ?+Y” ûû÷oüÆoà­o}ëÚß)û÷àÒ¥Kk_û<ðþ½^HŸuÿN¥ÁLC³µµùøÛßþv¼ýíoOzé 3•yäþóþÏI/abÁ{8³ÉÌ#ß¼3Y÷o&¤2%øÜ¹s†ÓºÝîDeža²Ë7³é°Œ3› Ë7³É°|3i$•ëÖÖ …ÂP1±mÛ(‹I/a†å›ÙtXÆ™M†å›ÙdX¾™4ò¬~¿ßOzQt:ìííakk ½^¹\‡‡‡cëû&K°|3›Ë8³É°|3› Ë7“6Rk°¢ð›Šå …BÒËa˜¥ÂòÍl:,ãÌ&ÃòÍl2,ßLšHµÁÊ0 Ã0 Ã0 Ü]RYÃÊ0 Ã0 Ã0 Ã0l°.‘N§3qÞW§Ó™8yÒñiç.ó3,sÝë\;³ZV)ßqŽ/ë3,{m,ß›Ã"2¾©ò½®µ3«…÷o–ïM‡÷ïäÖ¾úÌÂ<öØcýûï¿¿ß}÷õï»ï¾þý÷ßßì±Çüã7nÜèß÷}ŸüÝï~÷Ðù“ŽO;w™\¾|¹ÿŽw¼c)ë^÷Ú™Õ±JùŽs|Y„å{ѵ±|o‹Èø¦Ê÷:×άÞ¿“_;³Zxÿ>òÍÖ%°··‡B¡ÇqpõêU T*ÿøÁÁ¶¶¶üãNõz=Öñiç.‹v»£££¡ÇY÷:×ά–UÊwœãË J¾]Ë÷æ°ˆŒoª|¯kíÌjáý›å{ÓáýûŒÈwÒsÖyôÑGû÷Ýw_ÿé§Ÿö»qãFÿ¾ûîë?öØcCÿ'~îç~®ÿý÷<7||Ú¹Ëâé§Ÿî¿éMoê¿ûÝïö=<‹¬;Îq&¬R¾ã_Qò½èÚX¾7‡Ed|Så{]kgV ïß,ß›ïßgG¾9º ù|~d6Õ7¹\Îÿ>Ÿ:‡òÉ'Ÿvî²888Àîî.Î;7òæYwœãL6X¥|Ç9¾ ¢ä{ѵ±|o‹Èø¦Ê÷ºÖάÞ¿Y¾7Þ¿ÏŽ|³Áº ¹\nh>U·ÛE½^Çîî.¶¶¶& G¯×›xüôôtâ¹ËàøøÝnûûûC/²î8Ç™l°Jù^‡œŒ“oú,ó®m×&³‘ñirUùæ=|3àý;¹µ3ë÷ïdÖžl°.‰^¯‡z½Žx…B—.]òÇ­[·&ÿô§?=ñÜEév»8::ò×þ<ó®;Îq&[¬B¾W-'“ä{ѵ¯úÚdÖÏ<2>M²*ß¼‡o¼¯wíÌúáý{}kO 6X—@§ÓÁÞÞ:®\¹2ä- †ãÃlmmM<ôE»(ÇÇÇÈår°mõzN7nÜ@½^ÇÝwß=÷ºãg²Ãªä{Õr2I¾»Ýnª¯Mf½Ì+ãÓä «òÍ{øæÀû÷ú×άÞ¿×»ö¤`ƒu T*?>,$”— Ïw»]_`&Ÿv?~ìûò—¿|îuÇ9Îd‡UÉwœã‹0I¾]Ë÷f1¯Œoª|¯zíÌúàý›å{Óáý{½kO 6XĶmt»]¨ªŠv»=ôOF¡PÀñññÐ9Åbqêñiç.J¡PÀþþ¾ÿ“ÏçqîÜ9ìïïCQ”¹×ç8“ V)ßqŽ/Â$ùÞÚÚJõµÉ¬Ed|Så{ÕkgÖïß,ß›ïßgG¾ŸÕï÷ûI/"ËÔëõÈùIà8€AºÂÖÖz½r¹ÜPW³Iǧ»ìÏÒétpxx¸ðºãgÒϪå;Îñe~– |/º6–ïÍ`QßTù^çÚ™ÕÀûw:ÖάÞ¿“_ûº`ƒuMôz=t:Ñyó“ŽO;7­ëNzíÌúȲœdõÚdÖË"rUùNzíÌzȲŒdyíÌúàý;ûòÍ+Ã0 Ã0 Ã0 “J¸†•a†a†a†I%l°2 Ã0 Ã0 Ã0©„ V†a†a†a&•°ÁÊ0 Ã0 Ã0 ä6X†a†a†a˜TÂ+Ã0 Ã0 Ã0 “JØ`e†a†a†aR ¬ Ã0 Ã0 Ã0L*aƒ•a†a†a†I%l°2 Ã0 Ã0 Ã0©„ V†a†a†a&•°ÁÊ0 Ã0 Ã0 ä6X†a†a†a˜TÂ+Ã0 Ã0 Ã0 “JØ`e†a†a†aR ¬ Ã$†mÛðÕO•Ëeœžž&ýQ†aβ,£Õjù¥|žçA×u4¤—–YØ`MaÃÔ¶m¿èz’Ѫ( $IòÚ„=ð “4ãä;Ø8, –o&ë$!ûFžçÁ²,¿Ã,ƒyå™a²È"òN:|£Ñ@£ÑðÇŽ)ŠÂ×ËœpÓ¥„ÑuŽãøi_žçù8(ý7x,Øt#Ø…Œ Âo†I“ä{,ßL–IRö%Iò;VòèfÄ‘ç8²Í0Y`QyFVqå”ßÅàkÂȲŒF£b±UU}¯ µ¿#E(%†aøŠëºSóìf]L’ïi°|3Y&iÙ×4 š¦¡\.ó5Ã,Ì4y–$ †aø© “e•w]×aY¶··¡(ŠŸ£iZÒ-³<«ßï÷“^#”ò„S¥€É²ìÏa¥MÍfÓ?7Ød‰RÉ&MDÉw\X¾™,òÏlãä9¨¯„g 3LVYTÞ)B+I—2-¬)Æu]looãä䊢Àu]‹ET*ö`2 Ã0 Ã0 ³ñpJpŠ ¦$xžç§#°±Ê0 Ã0 Ã0ÌY€#¬ Ã0 Ã0 Ã0L*á.Á Ã0 Ã0 Ã0L*Ù˜”`˲ðó?ÿó¸çž{’^ÊÌ<ñÄÈårxñ‹_œôRfæúõë¸÷Þ{“^ÆÌ<ýôÓxÎsžƒ£££¤—›ûï¿?“ò d[Nz½^&¿÷ëׯã·û·“^FlxO†,_›’$áßþÛ›ôRbÁû÷úÉúþýÞ÷¾ù|>é¥Ä‚÷ïdÈòµ9ëþ½1ë«_ýj¼å-oÁþþ~ÒK™™z½ŽóçÏ£P($½”™ÙÛÛÃááaÒ˘™v»k×®%½Œ™¸çž{2ù]Ù—“,î+{{{I/a&xO†¬_›Y÷ïõ“õý;+Æ*ÀûwRdýÚœ…1X³L/p"‹ ³~²*'…B!“71f½ðÎl2Y•Þ¿™8ðþ ¸†•a†a†a†I%l°2 Ã0 Ã0 Ã0©„ V†a†a†a&•°ÁÊ0 Ã0 Ã0 ä6X†a†a†a˜TÂ+Ã0 Ã0 Ã0 “JØ`e†a†a†aR ¬ Ã0 Ã0 Ã0L*aƒ•a†a†a†I%l°2 Ã0 Ã0 Ã0©„ V†a†a†a&•°ÁÊ0 Ã0 Ã0 ä6X†a†a†a˜TÂ+Ã0 Ã0 Ã0 “JîLz ³ <¨ÕËNOÅc†!þÕu@–Å1Ó=ýô½xÓ›þ4ée3 Ã0 Ã0 3ް2™Á0€bqð{¹ Ø6à8ƒÇÉXMT$ñ»¢Í&ÐjõúuÜsÏ“I$†a†a†a&ÀV&3( P© ~×4Uu] ÑÆiøùAd9éOÀ0 Ã0 Ã0Ì,°ÁÊLÄu†^­&"š², Çu€š6ü»ªŽ© Ã0 Ã0 Ãl#)Áõz.\€¢(¨×ë8>>F½^OzÌpQÇIPŠm±8¨í R*‰´[׿kšH§UqN¹,^ƒjEé5··EMiϯaã—Ö´½=üûÎŽxoz_È^Ã0 Ã0 Ã0›ÍP„• ÔÝÝ]ôz=@.—C½^G§ÓÁááaÒëexžø G:©‘m‹ßONÄ¿Ž#Œ@Eçèú ¾“êA)å–ê=ƒÐûèºøqœQôZ?´ÛüÈòà½)]WQÄúÈ8UaÛ¶0ZE¼}†a†a†aξÁÚívqtt„ÃÃC t:€ªªØÚÚÂ… ÐétÏç“^3ƒAÇ[ê€ëºÂ@´,ñS©à R@<7HÐÐm6…‘[,Žªã׈†ñ:Î ý–Òs©®ÓuÇNN†£§q©T€›7—W*Ë"Ú®=‹$‰5M3Œ“bœ|,ãLöaùf6–ofÓaœÙ$ük.—ôz=ÿÿD¯×ó#®qév»ØÛÛƒã8CU*•¡èmŠc?KXÖÀà¤T^`|¤’­—(ù¦ÇYÆ™¬ÃòÍl2,ß̦Ã:8³iøVŠ¢îíí §ÝnØÚÚŠŒ¾†év»°m•àü‘ÛÐë8Žƒ«W¯¢Óé 5tšv|“ð<¥½Ä¶Eó¡ aJõ ­–˜/zzÊFiÒL’o€eœÉ6,ßLÚq]q¿¼ukö!,ß̦Ã:8³© u ><B¡€B¡°´®À“„¾×ëM=>?û³?Ã?øÁ‘›PZ ™§º.ºðr$59Žqùòå‘ú¥EYµŒ?ñÄØÛÛ!`V i ŠM­&RùG(Ö§§Â fcU«Ã®×1¢iooׯ__êkžõ=œY-¦)F™•Jâ÷jupŸ´,q¥1jõz—/_Æã?¾´÷çý›I ´ÏÚ;f¼3itðY÷o?šÏç‘Ëå`Û6vww—¾ÀIëÖ­©Çà¼úÕ¯Æ[Þòìïï/}íË@–…¢:kç^fùìîîbkk ×®][êë®ZÆï¹çž…¼D\W(ÇáñO4Ê4Åï4w˜ Öàa`¾ëZ–£Upxx¸tÅá¬ïáÌ|XÖ ó€7ÔÑžfv—Jâx¸+<ý_–‡¯Óýý}œ?~©{8ïßLZ ý{™c$yÿfÒ¼:øP—àýý}Ôëõ±ž–EqÒ…·µµ5õxÖpq£V1Ö…g‰¦‹n·‹gžyf©¯yÖd<«8Ž0+q]Ú¶¸F]WDLé÷p&i 2¦wËTdy0£x¶m£0MŽã Z­Bº­Ñ¿ç=áÊ•`™ú1Ë73 ¦)®'JáT3mö÷:F›±|3› Ë7³jõ©W/u ,ßguŒÐõ¼\u­iìïß‹~ôž™ÞkdÙññ1lÛF»Ý ¨ªº”ºÖýý}ìíí¡Ýn£×ë!—ËáÁŒ}<-PØûäD¬4¢†jݘåâ8Žoxº®‹Û_¸®ëþ¬1ŠJ™¦ Ã0àÝö´Z-?B¥þ8$ßËfSdPM–gKÓ¥ýÞq„-3MÆ©^°AÞ¤ò Ó”‡Ôë×qíÚŸÌöÁú._¾Ü¿ï¾ûú?þã?Þ¿|ùrÿòåËýÿñ÷[O?ýtÿÑGí?úè£sÇ£>Ú¿|ùòRÖ8 Uí÷ON¿7›â‡™ÎÍ›7û•J¥ßjµ&>§Õjõ›Íf¿R©ôeY𜞞öoÞ¼9öø*åeU2þŽw¼c%ëM•Šø!NNúý°¨hZ¿_­ŽžK²5éïÅÉÉI_×õ~µZퟞžÆ>¯Ñhô%Iê7¡5hšÖW¥¯ªj_UÕ¾®ëýF£ÑoµZýjµÚW¥/Ër_Q”~£Ñ˜i½''Ñ{ÍÉI¿^úªäeöðMæô´ßo4Äu¢(ý¾,_SaNNÄsšÍ ‘HÆÍJ¥ß×õÁ{¯‹UÉ ïß›MPFçÑÛèº lû‘4›Ñ×,G_'Õêð½Ž÷o¦ß²¢(ñe•ös’ÏÓÓxû8KÏ—åñçœ;HÓ¢e™îAH·£×œG^|ƒõÆýûî»/ÒxôÑGû÷Ýw_ÿÆ‹}ó+d]Ë͛ь,pzz:“BN4›Í‰fÜ÷»yóf_Q”~¥Rñ•ùfà*$ƒC–å~¥RéW*•~³ÙœÙ‰C7×MWxZ-±1ÿÜ´1Òæ«ëâ'Ê!¡ëz_Ó´‰ŽÓÓSÿXPÞšÍf¿Z­öUUí+Š2u­dxžžžö5MëkšÖo6›}Y–‡ ØI4£Æø´çËò°³lY“—,^“i€¶XºVTU(¿³øøNNÄu<HŽUu6™^„¬ÉKÖ®Ç,¡iñ”rRæƒçÍ"¯äœ¡k-Š›7ÅëŽSæQ²ÑVæûýìÉK֮Ǭ0ÎI=î¹a¡R™î\‰ºW„ƒý¾g]zÚ¤ë&Ôw]Î#/~J0 ŽêDIóXoܸqæ °%i=MY–çy(•Jð<ªª¢R©ø©²AÇçy°mÛÿWUU¸® Ó4Q­V#Ï#\×…mÛ0i&I’ ª*,Ë‚®ë~j®mÛ°,Ëïr*I4M‹•É$ Õ-PúG±8{7lÛçKÒ %˜Zˆã­ÖàwMÅÃÎŽ¨«i6›Půmn6›C)亮û²dlÛ’-EQpÈ_¤QÃ0P½ãyŠÅ"<Ïóe_’$´Z-H’„f³‰Z­˲üõÌòŒKU¦:AEßëòh,fÓ?''£×Ê,PZï¼ Ër:*1›G°É%¥ÚNºÿ8Î ý˜µ„+ØH“îAuÈqDÃ1MßA^×E™ç‰Ÿà¸'†!,K쥊"~\WÜûÇÝÿƒòTA4mrÏ:/\ÆQ©½.ˆiÆ»WhÚ๖5ÿ}i„ µ{ß}÷õŸ~úé«öé§Ÿî¿éMoš9E`¬Ã»sóæl^ë¤8==Š^Rd“"?Fcèw‚¯T*ýjµ:Um4~ô3ÝjµZ}]×û²,÷UUíW*•¡ÈêÉÉI¿R©ÌœÖ»J²è LÊãJcо(Šø §îNJ; S©ˆ×ŠðË EßONNüˆ¦¢(C2ŒžRÔu–taEQüè~ðZ¡tõe}¯áÏßj ¾ÇVKx3%i¶ï–`}öG_¦yÖ›Íøé^›FÖä%k×㺈*g˜¥,Òõqz:>â|þ<*ÈÍ›Ó#Tý¾Ø¯e9^Ä–î“’y²&/Y»×ÉɉС*•x2HÙ.Á½¢ð“ÞcùnµÆŸ·H‰‡,Oï÷L î÷ûýûï¿¿ÿŽw¼c(õ÷Æýw¿ûÝý7½éMó¯| ¬ãb *“i%¨`kšæ§8†ÓqFì4…>œ¶•Ú›²¸¹&u£Ô¸vÚéé uŠŒ\UUH–&¥~ëºÞ—$©_½£HF*qrrÒ×4m¢3džTøÓÓÓ¡:Óu@Jײü:¬ðdp]CͦxœîAi¿­ƒ¬ÉKÖ®ÇY99™ÝyÒh  Š2[*zø½%Z9×ôE½Î¤->îº%þÞ}ó¦xͨïëôô´_­VûÛÛÛ³}™ “µëqVæqª„úÔ[`Ò9á`@`:;ÇitŽÓº&­#]?³^ÛÍæäëa¡”`þ…x`( »µµ…ÃeN Ï(¶½þ6ýŽãŒM1¤TEY–ýT]Ã0 ªªŸIÝIõPÎ ¥4‹El¥8NB’$T«UT*?Ex–H&{T*³¥¡Òu2)=в,Ôj5¨ª EQà8 Ãêülš&\×Åéé)Êå²?²¨ÈGQÍ`ŽWò­èâÛ€çyÐVØv—:ì‰÷\bê “)¨“c8°ÑÝ$Ë<2I¶-ö²Yö/פ J’ø½\i„ãx®®Þ“4M¼^ð¢n©ãº`âuLsЙÞóÄïô''ã?ƒa RzÇ=Êè>äº.,˺Ý=~XÇó<Ï/¡ú†oø†uþ ™ Ш=EN)ŸD¹®×~<Ïj–Åb‘3q253šE¡§?m0cd¤ôG©Q"Bs·§E…dyМƶÅÞJ ™¦Q©ˆ{ß,‘'ZoðÚuÇž’nµ.+ÓœÍ Oe2&§9e¨YÒ¸ç›nŸÇÔ4q‘á9Ëçh6'7mÊw9>>Æ<€z½î?öÐCáÀññqÒkÝh¨».¥çš¦é+ì•JÍfÓ÷ЩªŠV«5¤Ðó¸».³(”ÊÛjÍæ‰ ãº.Êå2vvvàº.Z­ÆÔxµZE¿ß?7zU]žc€I†¨ˆÐ8\ØÞ(ì­póæj"¨áníIAÝà‡¿±7,B·ÛÅ3Ï<“ôÇÛ8dyrWÑ µÚ 2 e=Î9“"˜”’Kÿ/…’g$cÓ4g»¦$I¼þ¼×¡išØÞÞ†išeÍf­VëLÜò†,Ïf´Qt5ι¶-ž? ’eÇ2NéÂq¨Vç‹zÎú¹Óˆaíõz¨×ë¸xñ¢]€ÃÃCáàà…BáÌFZguéÀ”Þ«iÇ ÒzƒÆ'Õ©2Ì*¹À_Ïóü1/Õju(E†Zij^“T"dyzÊ䤚P@\St}‘±:Kö‰,OÙ¿Ï¿–\×õ3Ýh  ¢(hµZ‰g30ÓÑõA=ê´?9`‚÷ç „özŠdNBUÇ¡™ÆYî_à¬N½^oÈX%.^¼ˆãããœÃœ…5 *ô_6d¬Î:¿‘aVAœûmT#0šÙK?‚ ]Âó€ýýÏámo{/J¥?âNY8dÀFu…CÏ –œB¯r]w¨…5”Ö«ªªOE8›0”Í,¥ùÀah´LðwVç×ÃP—`šÃZ«ÕP(P(péÒ%<üðÃ]¥ùaÓˆ[è\*•°³³3RˬUec•IŠbq¼ƒ&¼ñ†1$ÇäÐ"4Mcc•ÙHl{ ˜W«Ó³kTu èÂÛždº—ëº~dÓ \ð”ö®m¥ŒŸ8]…MÓ„ëºCÏ%#T–eÿµƒ‘Vñ©þŒe"ª!Ӥπ39– Í¥ Ê+¥MF¡ªÂãyvvv°½½íG.AÍŸišxÍk~_ñ¿‰oû¶¯D¥RÁéééÐß_–e4 躎ȚkÛ¶Q,Q.—¡( NOOý¹ñ45!*ê®( †ŸýS­VÙX=Ãи™ixÞ A^”Ld}TL–¸³Ó錣”Vqˆ«oÇi@³·†¿¹ÒÀhÇq¸V•IM?=œöNóäH¥f“¡¾³ê¶IFTG?ƒã§ÜÖ„”RÕ©Ùlú3S ˲†Ò.é±f³éײÓûÑs(jêºî¡iÚˆÁJk´,k(âEœç0³cšÑzи2(RÎËeïE¦()Ý\×E>¿¯ýÚïÇý¯ÏŸºª9-•J¾þI²ëºîBóL%IbÇ*ÇÞÓ©SoXƧõ«‘¤Á¬_Ó\Í m&š;ŽŽŽpáÂÔëut:¤×“†±œqÔñPQœœœøJ>yÙXe’F×Çg ÏÓ³ükÏó†”S†ÙDÈk®(ÂøÌ²Ï–.‘Ãtð鸣é~E‘SÇq`J6†0D<Ïó Ó4}§¡ªªß18ldܼys$Féša(’KPJ'ííéóT=ODž¢ì¹à˜ÜZ­†×¿þͤ›~-(?ÒY­Vqzzz{Öv •Ê)._þ'øéŸžn¬=¥¨{±XôÇÅœ• ³:ÂÆiTæL`«›H³)®-V‰Ö˵Z W®\A>ŸÇññ1.\¸€ƒƒƒØi:Y§XÿN2X ctã·, wÝu—³w]wäM^A6T™¤1ŒáNQ™A=28Ž¢ÑhàæÍ›]e6Ë ú2—IP iZÔ €ß'c“îñA#V×u¿6JX‚u©¦iúѪJ¥â¬áÔßYô2P‚Ʃ뺾±ìyžÿÃ{P|Z­éié–5Þ©i™Q,á8þÁ?ø|ã7þ¡_ò4n.¼$‰û‹,Ï—OæÃéà 3/ž]úÜï©Óo%^©³\üVUUqéÒ%\¹r…Bívû̯º>Yð¢æŠ‘··X,.Üa‘aVm{Ñ]w8¢ §¶„£Ü1‘Ùd #Ù™ñ×iŒÔ™ÒÈ‹Šn1Üðˆ:ñÒãÁî¼Àp­)Õ¼Rꮢ(þ¼TÅFû‚,Ëe¦iFî³DÅ(­”¨Õj¨V«~1u€eâ3®ËiqéÀ I¢¼¦ih6›øÁ|9~ã7¾§§§glKRüy« ³¢Æ‹yÞ ‹Àq„ó†UøtsGÔƒAãuwwN.\ØÈ”áVkúÆÕÞݶm¿Û¯a\_äš=”ñJeÐT&Ì,c&&ëØ¶pX¦ÝX0¶aRІQ§à`P8¼Þu]‡çy¾–Æ|ìì쌌ÃÑuÝïFD×õ™ Lª«u]×ÏVÒuÝ/BŒ™éx°³#þ?­‡V£1*÷¦ib{{¿ÿûOáø}èº>2Guì[`Ò‚eEë:ÁfI4¦‰ýòéÆ7XÛíöÐv»íGWÏŸ?ï§ o"T@…çnèÁ<¥ÅhšÆQ(&•Ôj£^tZM:®m"®á?†Ùd&¥E&E°N0sAÇñSyé÷཈¢¦Á뚎{ž72~F×u´B¤¨ k8ESUU¼Høù³FD)Å8ÜœºÈòž`7Ôi†fð+uÅb¶m£ÕjỿûÝxÿû_ È7úaÖ‰ãŒf‰Eaã3(›MqœGÓ¬ÚcŠÅ".^¼ˆ~ô£3g§ÓA¥RA·ÛJªT*ØÚÚáÒ¥KØÝÝMúó.DT¤”„Õ¶£6ª‹X8=©g.Ã$À¤è‘¢ ŽÛöà9”=À0›Žç‰=>mânÛ¶ŸK£”ê9Éè¤Ù–¶mû3)ÃóOkµ$IŠRªªêϨŒC”aºLt]Çöö6€Ñ{j•sLccYƒ”\Y2ö¥çÓ¼Ò`ÖXðµ$)]°Ì»®T&GF)‹2 ׯ³â­mã yÈaÈ)J%‚¢¦ÕbùŽ‹eY~yF³ÙŒÜ«iÖ7uú¦^¶mO¬}ÃÝnW®\A.—ôz=t:\ºtÉlwwõzív…B!éïlnh&¦}R¦¥Ntãç›*“e¼ÁJ׃išøùŸ>ò‘þ—ÿåùÜÜ„93hÚúÒ)ý6œÚU3î8Žß=—®GškJ ¦iCçR@EQFNQu¦ÁzÖ´À]a#¬¼KÒ°ÁjYdY½­¤?8Q—9=ÍFm7sö Ý&Xòäy"k’|/ÓÔô`+3Šëº~4•šõQ„tä< Þ·,ËB«ÕZØnº³P(ø†)?̼‰gÓoœŽ«ÑÐuþö<Å÷`só&+LÓÿZ-À0¼æ5ûøéŸþÇCc+†Y–eÁ¶m4C“æiÆqð¾âº.Êa˜”ú ÀïU—•ÒO#ª‚ÐüLΦØ,k8}WQ g¨þyX6&+,LZ¡´÷àvZ« #6E>¸TCÎOúÜ+lÛ†eY¨T* eáȲ¼´ *wöz½¡:òùüÆ©AexîRðÿ“š ˜¦é{ Çáz&P}Ç$d·7ª@Q”T¥2Ì*¡Ô±UlçÛÛÛ8==z,ªa) 1¥çQUp6i°n•w]wÈÐÕ4m¨¼‡ ¦IaxÛf¡(Ã#U4^¢fV;;\³Çd׎z×£NƒšïQÊ.0NR°’$-œÂ»lîìt:¾‘Úëõ`ÛöH­*u>wî\Òë]ˆju¸°ë"®QžD×Þö¶?‡¦i¨V«~ã îVȤê|=MÆjƒ=’Ì™#ª¡Þ² ñ,Áè%Õ Óxi|‹eY¾#45 ŽŸ ÞwèñpÄLÓ´!#—à(êÙ ÊߨªEUÓ5e2a݆‚PÔ XÓ6'Å—™³f¼£¥Á߃*Í׎2FÓœAzg¡PÀÞÞvwwÑn·Ñëõ|‹»×ëáÚµk888@¡Pð»gEmóN¯¨¿‘ë/|áu_Q`o4“$)ÞlÛ¶ñÂ~çÔÔa†Ù4V•(CŠAØ`u]º®¬Žã@UÕ!—jUa¤RýP¸ó¯mÛ‘µªœ%q6瀩TèùóTYa²Ž,›\7}Þ¥\.ûϸ{ºiš0Ms¤/Á$5KÜQ­V¡ªªßõöÒ¥Kþ¼Õ‡zÔE8ëi°¶-ŒÕà†®ëƒß£vUþâ/þYªšR0L$)^‡ëºøñÿË•Ešf“1 ï'%¨Ëv8ý—²s‚Þojªìð4h© %ܬ‰FÒp¶ˆtÈR)úØÕ«=\¹ÒÁë^÷p}“}ÂæÈÉÉf5P2 cÈ.+—ËðBu‹Žã`{{ÛÛÛ(‹ØÙÙã8h6›¨V«C?Áñ2YæÎ\.‡K—.EÜÝÝÅîînæ#«€0XÃQ§ s𲍆ãŽÀLö =9ŽbÂ3W™³Ša,eJ¿¥zÔà1JVUÕo¤D õG ÚSSÀ¸zž7tR½¬ ô—qY2—.ý,¾ækþÞûÞ/çè*“y¢êW7 *õ ¹×F†a X,¢ÑhøNÏR©4Ô±7ªÛü¦q jTëõº?º†ØÚÚÚc•ÿ-ƒŽqÓÌø@€IDATtÓ#J¥tµüg˜8„>c §,2ÌY"¼ßÏC¸^ˆ£4_"x­‘JéÀ|%$8ƆPU5ÒiªëúÆ+(L<%:Õײ,ÜqÇÇ·~ë×àè*“}jµá†©Y‡êË Ã€a(—Ë#AŠ”–J%†1b¬8÷‚;ëõ:ŽŽŽüŽŽŽpñâEìïï'½¶•P.Ú^ç7éúèEðñ?› V&SxÞðܽI›»0ÌYc¾š¨k°•z¿©þ4œæK~£ê¢ Ö¬—é0Ë#ª~Õó<†V«×ec•Ù l;[õªžçÁ4M¿‰ œ–Š¢øŽKJÙUº®GTUÅÉÉ LÓœ: uS¹óèè»»»~ZðÁÁŽŽŽðàƒnÔhÇí NʈH«t ª»qß‚I”æ¿¡é"Ó0Íé³W‰à( †9KLcÿ5¼HÅ‚ŒØ`GààøªW¥&L„¢(þè‘ <ùlcBw§ŸÚ¶È+ñ†a@Ó´Û#’’þ ³8®›ÙÀžç¡V«Á²,hš†J¥â™ÔdoÖ²I’Îtó×;àâÅ‹þYít:I¯mé(ЍY"O£® ¥}gg€0j‰«W?Ž—½ìÙI/™™€ ˆázư¬ÑŽ×†aŒìˆL?d˜³€ã,®ø1n°Ný0M¥‘4á”ü AË0Ä´z=Ï>îº.ŠÅ"$Iâ(<³QØöêF‘- Êl »âääÕjuÈ0¥~g1JºwªSݤ¨ê$(uÒó<¸®{;|püCú$Þð†/$½ÌÍçYbÖ\N¥@ ãL­¤„‡qÇqü¤ׯn®»YE= à8ÃŽÇI,Ú´#Ø”þ5î„–0§§§ }sLZ©T&;W‚ÎÛ¶Q,¡ë:«ÌÆ·Ü) ¨5l¨rÓÖåqGÒ X·»CÃóDŠãˆh¥e ·°pjµ~ï÷¶°·WHzÙ›@…0X_ ˆT`Àî§7éL­ã¢FÁ¡Ñ„eYÉI’Ðßc¡×)•Dq>ÓÄãqŽ3Þ`5 #²65 Õª,yžç+(d°»þã¼ëìDb˜æt' ‰išh4\êÁl$¶.ƒ•ºõnooò,È²Ì†ê ¹3é¬IöÒPª–®ë¸÷ÞÏ¢T*ãÏÿükñ½ß{7^ÿúç%½ÜÍFЂ0* e‹ÓW!<µšØÙ´ÿ _&Þã ìãJݨ–.¨L›¦éÏøbÖˆçÅ¢ø·ÕZüæ§Ç?ÏF¼4_UFÇÉ þç_û{¸ç£÷¬ä+_·|Q_}ÐëNݰ7®Î‰d2l¨QÄ1®+^£Xoð‘N]W|idÔêúô.»e ÞÀiê¤Þ†‘ôN~ƒÁx²hÙ/GKé\j’¼>Çñ:ŽãŒŽ†g«;G2Ù&iÙ§(¿‡ªŠŸÍ3´ÿ0‹‘&§ºTÊ"ˆRGK%ñ¼ed?zžç;=ÏC­Vƒ¦i3i‘ñ Z;®NÛ4ço„ÞßÉXm4¨V«¨V«¼ggˆ;nݺ•ÈFðÞÞŽ`âE1É`þ³?û3|ðƒÊß„PdìÕ)ó€O~rø±yÇ8:Žˆ„Nk7j-³ bŽlC8f)»»»‹­­-\»vmED³*ù€{‡‡K_óÜP/úY•%ª¥<ê* ‡Í¢Þo–ô]bÑyÃäððp&Åa’ØÃƒX–ØReYˆ^±8ÉYCJ‘é+*Ž#ÄŒêUǪª#£j¨S°$Ið<ªª¢Õj¡X,rD+aö÷÷qþüù¥ïáiÛ¿ƒÉQ"÷›¿ù›øŽïøŽôgÑ03Aû÷2JðÂ$½ÑõáYÃáHjЗ=+µZ Õjš¦ùÆ*×y§ƒyuð;òù<*•ÊX!íõz¨×ë( K¹x¶¶¶°¿¿?$ôt‘´Ûí‰ï1-uy‰8Tª¾ñÞáhQñ µjupMRq5Gݘ¨f’F1ÎÖUüØGçü2 Æ4<Ä‹J†d%âüeÝ_e$ÓÌj IÈ70èf•Qdÿ gÈ«ê`üJÔi\nPаe#)'¨0Pr&ùßøÆ/cgEUGŒ¦ù’Ájš¦? J’$´Z­³3ꌑ´lñ¼É>jÛ¶ñä“÷à'òï&ýµ1"M2 ßÒƒÛªço$9 ×uaÛ6t]÷;µ7›Mvìdœ;.]ºä·°>>>öÓºÝ.ŽqáÂt»Ý¥[·Ûí‘”JKÎår8wîœÿþÄ´ð4ÆÙp+´­``t2âG5M®“ÆcxžHö¼é³UeŒìVTàw ¢¡Ó¨ ^Ã$‚–e@¼‡6ãù“¯ÛðšHB¾!jÑý€V\Ÿ¡ªãSxÇAƒ«Õh' Õ±ÎúºÌZHJÆAýip««V';%_ýêÇȾA¼N‚µªá+ Ï.–$‰Ó“ÆuqÏ{ß‹»Ÿxb©/›¤l‡‰j[Q,ŠÛ=âxík_»É• ›KpTàšI“ŒS2aðk±¬Á×2o°É4MN“ß@îÈçó8<<Ĺsçppp€xŠ¢àÀÁÁÎ;‡ÃÃÃ¥¥&är9 åæ!ŸÏ#ŸÏckk …Ba育mEª]›‘83÷– ]}&ÆGŠÊ6 gQÌé<Çw/E‰wU«˜U FÝÄ5Zç¡z{–aMë–obž‰0s¬œ4Ñž˜µ.ZÓÄ9‹6cVBR2 2ÉgáoþÍ?‡¢¸~¶HCci‚óSÃ5¬Š¢ø#˜”˜—üÅ­-øà\ïGEÛÑÇ–ìÝò<±é4€¥ˆ(bÔ\Â0<`ßÞ§EHéëq|éÓÀ”ÄûĽ¢mLD*·×4͘܆˜ç:¾‡¬[¾‰¨ùe+Iß¼UuòMÖ4Åyo§$7ontS¤,“”ŒBŒ3õ(ˆmÛPW®<Ç[mp,,Ëp]wdT 93Ïœâ¼i†ÃÙAÈÑ:é9Ó°¬Aé œUQ߷늽Ƕýú“í6°äÖ$e;ЍÑÒ?þãâu¯ªÕ*'¡¬ê ¢iÂÀ ÷B îót\ÓÄm y®TFoÊ4†-Á{[šd<ìP”Éåq0 ãìíÙi†®úÃJàyxñõëxÞ3ÏÌôRw¡7ᆽ^N'rfÓ<ìïïcww׿P¯›ÏçñðÃû EÞW–ÇÛt£«.f–˃‚*rÊS×ÓàU¨@¤Õþ¿{À§x˜apV\|ðÊÊóã¯ÍÀ`,Ì$h.ÄÜÓ*†gœ.Òá÷Œ°Nù& cô~J5zKÃ4µÒ„$÷†ýyš±±šj’q ºés4Mƒë¾­Û²tVR”•¢®ANNN6?ý—º}ËòÀ¥†¤QV«ÑgðjÝì8Ã2|ó Fi¨ý'Õ´ûÃÆ SÖR$%Ûa¢‚zO=õûø£?ú~ù—EÃY7gÇ÷°qƒ› 2VI7‹r¸ϧfµš¸N¢ŒÕð9 ’_&žç¡T*A–e6XãbYBΩ?ɬ¢ Ò™Â÷?*.ë}dç„÷èà=ð“¹W¾_6ã½õÎ8Oêt:ØÛÛ[jÄfÚü×\.·”‹dî>-46%n`ˆ:ÐÆF׋ªFþPà] ¨m@~Ùô×vÙ=zžˆ¸!šMSà\Ì^ç)ß~íðŸ|Ñ¿g„uÉ7±òQbáxªc%å”j«e™5ª fÝ2>ï´-2DƒFj0šJk”az&fô‘‚mÛ³uáv¤§â3ò¤3@çq^Ub>;ë–í(\—ôF ¶mûY¯xÅc¤ç­ý;Y9"×Yu‹¦áû{¥™šWRýhPÖMS¼¿èÈ6{$49]„4Èø2üÒ®+Ê= Ã@¥Racu,Kìù”åbÛBöÉñþQ‚À ¯{ܽƒÚù‡x¢ÝÆÓ3fÈÄ2X³ 凿÷©3Xuˆ:Ô8PƒRÎ= "‘Q5~4óô—ª€ù²x£nÂT Õ8F«ŒÉ#h&GògbÐÝ—ƒ_©#ê^¹´”wêþ1îNÑ`¯ë©ñ&3›Ã<:!e Ò`½ª¢(cg«f*sœQ¯­çˆ-š‹7F)aC’ d(ómgG#s#¶Y20«Uq¯¡‘g€cš]E û(4Nf%IDzHî© 88>- ß³ÖJ¸~uVlÛF¹\†,ËPÍfól8'A¥®;\^A÷‚°ÜÓ˜Ÿ :‰¨ðG¼m¼ÁJ{_Ôw:QA±/ºJ…÷ÁH’ a°Òß68û)¨Ð«ªˆV–1Ù`õn?'©×ë%½Î…wQŒUP‚Îw"re´*JtXkRšÎ$¥h\'߸§†ÑŽ¾Ò”õ0™ÇóÆ;en@p†7G&]Ì3k8˜OÖ¨Úî…ÓÊh€`0•§_©%Ý0ÄïÕ*G‹˜l›4ÑÞiŠÊ¢Qª¥BÍZ¨y09g™7©´º™U3oÆ ÖJ–kæéâdµgùó­ˆXÖut [Áý’æ3Ñܽñ'aÐ W‡èÈ;‹NN ŽƒŒñé¶fOÅ¥±4*x Íf£É0gšy‡ÈÎWQ«Õ–WÛ ê‰hTc'IB §æ’—|\7@† à8NdY%w¥&#’êPUu¸ŸÇ¤ûß›Î4¦9ŸMæyÇI¾ç€ëÏ §lˆ !:ÉËÊò?3w&½†•ÜëMÓ„mÛhµZð ¨êÔ:m†ÉÁÉIq —~мե@ KTà ô(XYa&@™à†­§œž ûpåb•%ày£´“ÆZ0Lª:_—eYéh´îíœCMi¼ì\*w„½½^ív;éu-áÎý TU…AéXã—4”n?¦>ŽE Û¨Q:‹ÜtÌf2ÕiÛ¶ 3àñ›Û`ÝÙÙö“Ì&Q.Ïw^Ð`Ue¨CðBМá(¥$iï?“YÈ3„£XéMSú¡è‘iŠûCTm-«Ì Ì[­dÛöú V*ùB+ýPdUUEèxÒdf.î?ðÐC!N]kV :òªWo?0©™ G;£Õ‚(™Öåf‘Æ\µˆÇ昵Μ‚u ÁÒ\éÀ†1_± ì˜yD2a•eyÈ©‰ç :2–J¥}š£“a„ìD[C3„Ã,:Àx’íð5!I¢8=Êw©$ UÛiˆp1™f–f‘¶mÃ0 ‹Å±×ÄÊY« Ù猵ÄÙø.ÁD°n©Z­F <ˆj­p-À‹Ñ5už^B…¥Ãi¼LLÈSi*•Êä†;;Ã5FAT—ÍÑÒÄÙhƒ•:íU«Ã¿$IÑÝS)÷\«F§õš&°ÿS¢™QÑ£fˆn¿óFEë ±¾O0Sð<º®£T*AQ”èT2Ir%ÿ–Åžs&µÌ£§„~j¼‰ã%¦ÙdÅœY;T£ÜšÇ)çãÌü†³tmâk‚YË[¬®Ço¸dÛ6TU/ ˜‚P”®|\–‡£ýÒøI&QîXü%ÒK0‚«n©Z‚½1ƇòøaLNÊ*[Dç§1:„½Ö¯ŒÉ Å¢ø×ó<_Áq]7ZÙ©Vùfajµù Kf…Ì[¯¾$IÂ÷ó7 ¸(–¬VÌ™ ©H„mÛcëW—òf,çÌš1 ±ÅÎÒضíùS€%Iȹãk™¢Xº>ê¬ñ<±°FƒÕ”²ÑVMŒb5 ¡°ŸøÿøöácÁ¨Tðb w^4*A¼Û·ß§ KÃL%h¬jšÛ¶£ VÚ¼=oØ»hY\»Ê¤×½CpT:¥â8h¸.ð÷ÿ~´s†Ó¾˜ o¿+K6OËm†Yyæ®.4OžÎ‹åë!õÜ `¨ÉR¯'¢‹Q—²<'vgHMŒmˆ™0·¡&Qé35£UÃbÍ–‚T1húÄö3…V °íÁ\2MÓP«ÕÆoòš6¨_"æŠÆ0k œ½5ê_­VGç­ÞNùzþé)¦Lêpœá-x¬ÓqqçûZÖèŒ`†Y!ó6 ›-Æœ9î¼xñâȃ…B!éu-…bqÝŸiöޮ盔*ÙàA®;¦ŽÅ›&ñõÉÌ@0ƒ@–eȲ<ÞA£iÃ2M粇‘I)qì®ëú5OC{¾ãˆN§Q3S&Dդά¨Û¶ØÛ«ÕÉQ%ΦaÖŒ¢D÷3íå wîïï'½†•B{òÔŸj:l3ïááã“4& "*jC4\b}ˆY#"¨ä E“*•ʨÌ‹ƒ=š'LÞø…;x0Ìêg°ž‡V«…R©MÓÄ5áyÂXåfJL†˜+R×…1j"k¦ÙŒ¾pfµf Ìî‰Y¿J5ªô®CÝH6ºé15ºJ³–ˆo|ù°w° àÜÑ—Y¶=thš6l°†óqªUž+Æd†ííxÏsá¸Ñu¦iŠk‚²eØXeRŠçEù+R–Λ»62éÀ4gožç8Îô«i ‡¤r{ädµÊÆê†r't:áâÅ‹Èçóc=Î,“~S-w¤–)Œm”Öá™ A%KŽãŒŸ½JO nâœfÃdˆ¸&Úç+•Ê öI’XÞ™T¥ZMu´So YŽvÆPƒ=UƤêëÏó†šIA#]W¬<'õLp‡mÛ¸pá:r¹œ`—.]B¡P@.—ÃÕ«W“^ëÌÐ>uã¶  wýe˜ãºBÎ=Ï›<Ç0&N£ã8C×@£ÑÊŽ¢°ÇI5Š2씫¨–ììD[º„ªÒ$&af |N­_­Õ„±Ê} Î wDSÏçó( ØÝÝE¥RÁC=„,Õ»†g°N°;³ì3!ö¸×eO;³ÑLUò&¥„û:Ž3^–)¸ÕšlHuœ<Ï08=žI„I"GÍòlÛö÷q×u5Ü–5œ)C3³™3Ž^ï|ç;'>éï|'ŽŽŽÐív“^ol‚C¸'FŸÂ©’s¨g˜$ð<@–Ýéæ)Ë0ò¢ëû„#¬ “jµáú¾‰:K©?\vRZߘµãy"…ã8(‹p]•JÍfš¦áÿøûûþHÈ»ap– ƒ;·¶¶°µµ5ô ¥tüÆ#ÏM+ª:pÆLŒ°ëW1N†Ë˜Œàº12&Óðéµ- ×u'GXƒ›8w{g2D³³ 6oøL†!ñ$ë#û|¨Ì…aҊ늚½¸®‹jµ -l”ÒèŽyGÒØ6¬L"LRQ";—JƒS©°îÂøÜÑëõÐn·'>‰Ž£®iÇ4ý¸ ³©ˆ‘ªS"¬Ü%•É0Á‰LÛÛÛ(•Jð"F2yž7|„G91LJ‘ea°V«UT«ÕèýœzmÌ+Ó¶Í} ˜D˜T‘äws'(‹`ÖÄÌÆsG>ŸG½^G¯×‹|B¯×C½^G¡PÈTÔÕuEÞüÔî©•Êð  ˜ôÊ&–EsX§È8oþL†ñ<šÐá@Ó4(Š‚b±;4crdì;"™ @Ñ'š!<–Je~Ç#§3 âyÑ~–‘NÀŽ3Ú•ansç¥K—°··‡ .`wwwÈ(½v펗.]Jz­3¡ëƒù”oÁ‹….1Âq„ør +s°mŠ¢ R©@UU”J%œžžúÇ9ºÊdŠ>Y–»º.׆1*1Ã,ȸi#ú¹¢ˆ™ª Áù|W®\ÁÑÑêõúÈvwwqñâÅÌ4["‚‚㟀;e3œ2k´&Ã˯]×õëúEñGȲ Û¶‡•}Ñ>;éå3ÌTøÂ¾°Ú‘LšÆéÀLbŒì8*•JÒËc2€è|éÒ%ìïï£Óéøóù|¦êVƒ˜¦Pè§FX&£~ó'™MEQDc1@DXT1UUý“@¨Wm³ÁÊdM>üá6¾ìË"ôÊ^T–9ºÊ$È8ñcýœ™…;‚¿är9 ÿ'«Æ*0h9u+XJ¶Ìe˜$1Í‚‹Emb˜ CÃ䃨ªê×­ŽDX6X™L ËÀ£þ§hÇcx@+Ãd¨CÆ*ë(L îXü%Ò Ù¨‘-³àcOßö߀6€ €SœYÉdÏ£¦Köx%):œ.ÌdÃŒü˹¢(C›!…ŸG91#û¸|n–ÄdÏ‹ö¹ø{ºiŠža¦°ÑëØúUÀ7>xÅ+DÍjg%0‚¦1¹®;ì ÞL“ë–˜LcYÂî—:¦( lÛ†ã8Ã+›g2B­ïðþÍõ}LƩՄß%Œï¤a9gb²‘+xDWMS¤H>õð ¿üÓ'9¢Êd’±£Êå·Ò²xœ “i¨aä4ƒu$e˜ë¢˜Œ°¿ÿÌ@vK%±‡».ïßÌF ªÑÛ±mÛP%ID—عÈÄ`# VšÛ7RפëâêÑTàÆO±RÃdj?Ò!¸ÙÀΧ’1™‡D{d¸üm4Mó V†É"_ó5¿7pÆœœå}g‡³c˜ Ê`-—Ë¢ã;g13pgÒ X%Á1>ZpÀ*±W‡É,Ž|ÅW\­Ï–$1tÛ4Ù`e2e‘óq|'IrÖp§l&‹Xà8!Çz¥Â)’ÌFày£ÆÊå2 Z©ˆŒÇ@çw†™ÄFFXeYèê‘id&EnÞLz™ 37žôzWÇ+êºÎ&ÓPߤ‰Å º³ÁÊd Ï£¦b.ö`6’`ýªçyØÙÙ$Ib<;Õ™ÙXƒU–)d†1(lUp“q(ØoÔÁ]ö˜ ƒ Öi ½®ëÙ4†ÝåƒaR„e‰lHÇq ~Â0É8<ÿôý_¡ª*ªÕª8P«q63i°¯<Íÿ …G7Zb2Oµz»iª O%G˜˜ ÃuIòr>Y–G;eó('&åØ6ðMßÔÅžpàPg˜ &HàììàÛnÞÆ* <îë-ÌLl¬Áêûð]< ˆ‘6 “q†º¢eœa6ª{²,k´Aœ“9Å’I1ž'úßû½:¾û%/áÔHfã°,!ÖÿéMo\ø/ÿepP’Xgaffc Ö~«ím‘vðMûÀÎíìxg6¿>Û0ÄÆÏ%fƒ ï¼išÐgMãh“rÈÇøÁ÷¿¯{â Vޙò€·œÿ8þ_> …æ“1̤Þ`ít:èv»³ä¯ÿ“×ã%÷½ø«ß~h ¨Bü0LŠ˜K¾!Ò¿>—wî(ɤ˜ydÜq€gžéB’¤Ù*qt•Y#óÈ·mÏ~ö1~æ%/Á?ðI†˼û·¢Ç÷àO^ÿzv¨3K!µcmºÝ.*• :Ñ ²V«Å:×vmüá[Æ÷\»|à÷×€#«LªXD¾aýßžû\QÌÊ7&…,"ãš\¾üïfO¶m®‹bÖÂ"ò]©Vé]øÚW¾’ŽL*YD¾% ØûþO@þ./úèG“þ(̆ÚëÁÁ¶¶¶à8®^½ŠN§ƒz½ë\Y–ñÏ\u¿þÅl¬2©cù€·~îsxáÿüŸœJƤ–ÅöpàWõçgOöŸ÷Ëçóñóèh6“þ±©×ëh·ÛI/c.ööö’^BæXX¾ ÕÊ”²“U9i·Û3E¾Á¢2®«*~ôúu`{[ÔiÛö u°ªŠŸ¨Hêéi"³ýx?[,*ß·öö dÈá˜Uáý{>•oÛ²ðKoyËìý‚÷ïlÊÖIE¯×C.—yüÚµkø…_øüò/ÿ2î½÷Þ¤?ÂL<ñÄxôÑGñâ¿8é¥ÌÌõë×3wÁ<ùä“xòÉ'ñ‚¼ûûûkÿyäþèþo~ó›q×]wáî»ï^ûº!‹rO?ý4z½ž_Ç“®_¿ŽÏ}îs‰½ÿ¢{ø}_ù•xÙsžƒ?ûöo§“Ä¿ðâŽû|ax_/O<ñž~úi¼üå/ÏÌÞ¿-CßyeÈþþý¿ñxë[ߺö÷_ŠþêWgFfxÿ^/¤ƒÏº§Ò`í‘rÁ­[·"/–|»»»ØÚÚJzù3Óívñ¢½h¬¡’f:Î.+ôz=ܺu+±÷Ç8ù€Ÿù™ŸÉäw d_N²¸¯$©¤ñž²~m&õÞã'ß¼¯Ÿ¬ïßI}ç¼g‡¬_›³JƒuÒ—?îbÈår™¶IŸ) dñB’•—yä{Úyi'«kÏò¾’äwÎ{xvàksvæ‘ï¬~ÏY^{–÷Þ¿×ïßëgyIe ë¹sç §%t»ÝL Ã,ß̦Ã2Îl2,ßÌ&Ãòͤ‘T¬[[[( 8Ô(Ù¶b±˜ôÒfaX¾™M‡eœÙdX¾™M†å›I#Ïê÷ûý¤E§ÓÁÞÞ¶¶¶ü"ïÃÃÃ̦0L–ofÓag6–of“aùfÒFj VCÝÝ …BÒËa˜¥ÂòÍl:,ãÌ&ÃòÍl2,ßLšHµÁÊ0 Ã0 Ã0 Ü]RYÚU:ÎÄQNgâ|«Iǧ»ÌϰÌu¯síÌjY¥|Ç9¾¬Ï°ìµ±|o‹Èø¦Ê÷ºÖάÞ¿Y¾7Þ¿“[ûZè3 óØcõï¿ÿþþ}÷Ý׿ï¾ûú÷ßÿ±Çó߸q£ÿ}ß÷}þñw¿ûÝCçO:>íÜerùòåþ;ÞñŽ¥¬{ÝkgVÇ*å;Îñe–ïEׯò½9,"ã›*ßë\;³:xÿN~íÌjáýûlÈ7GX—ÀÞÞ …ÇÁÕ«WQ(P©TüãØÚÚòw:ÔëõXǧ»,Úí6ŽŽŽ†[dÝë\;³ZV)ßqŽ/ƒ(ù^tm,ß›Ã"2¾©ò½®µ3«…÷o–ïM‡÷ï3"ßI[ÌYçÑGíßwß}ý§Ÿ~ÚìÆýûÿØc ýŸø¹Ÿû¹þý÷ß?òÜðñiç.‹§Ÿ~ºÿ¦7½©ÿîw¿Û÷ð,²î8Ç™l°JùŽs|DÉ÷¢kcùÞ‘ñM•ïu­Y-¼³|o:¼Ÿùæë‚äóù‘Vß7nÜär9ÿÿù|~èÊ'Ÿt|Ú¹Ëâàà»»»þ°èiëZÆq&¬R¾ã_Qò½èÚX¾7‡Ed|Så{]kgV ïß,ß›ïßgG¾Ù`]\.7Ôî»Ûí¢^¯cww[[[…£×ëM<~zz:ñÜep||Œn·‹ýýý¡ÇYwœãL6X¥|¯CNÆÉ7}–y×¶Žk“Y‹Èø49Ȫ|ó¾ðþÜÚ™õÀûw2kO6X—D¯×C½^Ç<€B¡€K—.ùãÖ­[úÓŸžxî¢t»]ùk žy×ç8“-V!ß«–“Iò½èÚW}m2ëgŸ&Y•oÞÃ7 Þ¿×»vfýðþ½¾µ'¬K Óé`ooNW®\ò–Ãña¶¶¶&Ÿ4¨ykkkáu#—ËÁ¶mÔëut:ܸqõzwß}÷ÜëŽsœÉ«’ïUËÉ$ùîv»©¾6™õ2¯ŒO“ƒ¬Ê7ïá›ïßë_;³^xÿ^ïÚ“‚ Ö%P©Tü<ú°P^z0<ßív}™t|Ú¹‹rþüù±ìË_þò¹×ç8“V%ßqŽ/Â$ù^tm,ߛż2¾©ò½êµ3ëƒ÷o–ïM‡÷ïõ®=)Ø`]Û¶Ñív¡ª*ÚíöÐ <…BÇÇÇCç‹Å©Ç§»(…BûûûþO>ŸÇ¹sç°¿¿EQæ^wœãL6X¥|Ç9¾“ä{kk+Õ×&³>‘ñM•ïU¯Y¼³|o:¼Ÿù~V¿ßï'½ˆ,S¯×#ç'€ã8é [[[èõzÈårC]Í&Ÿvî²?K§ÓÁáááÂëŽsœI?«–ï8Ç—ùY‚ò½èÚX¾7ƒEe|Så{kgVïßéX;³:xÿN~íë‚ Ö5ÑëõÐétDçÍO:>íÜ´®;éµ3ë#Ër’Õk“Y/‹ÈAVå;éµ3ë!Ë2’åµ3ëƒ÷ïìË7¬ Ã0 Ã0 Ã0L*áV†a†a†a&•°ÁÊ0 Ã0 Ã0 ä6X†a†a†a˜TÂ+Ã0 Ã0 Ã0 “JØ`e†a†a†aR ¬ Ã0 Ã0 Ã0L*aƒ•a†a†a†I%l°2 Ã0 Ã0 Ã0©„ V†a†a†a&•°ÁÊ0 Ã0 Ã0 ä6X†a†a†a˜TÂ+Ã0 Ã0 Ã0 “JØ`e†a†a†aR ¬ Ã0 Ã0 Ã0L*aƒ•a†a†a†I%l°2 Ã0 Ã0 Ã0©„ V†aömxž—ô2fí,[ö=σmÛI,†a†Y:l°2 “ÅbŽã$½ †Y;Ë–}ÇqP,“þXÌÆ4M¸®›ô2få°¬¯6X†a†a˜…°,‹•xæLÀ²¾~îLzg×uašæÈ㺮C–eOŽ,ËPU¶mC×uÿ¹¶mû©`š¦AQ€a¨T*0MžçAQhšæ¿ž$I¨T*þëÌú|†™Fù€Z­Ïó"elœ|,ãLzY‡ìW«UÔjµ¡Ç ÏóP«ÕFÎc˜y™$Ó¶mÃu]X–PUuDFmÛæýšÉ‹Êºªªð<Ï—]I’ ë:$IÀ²=aMÏóà8ŽÿcÛ¶¯ÀB¨kµáÑ)•JþEe§T*ù¯µ³³ã_dtÌóÃLcš|@¹\ö'“ôZAùggÒÆ:d¿\.Gzú=ÏC±Xô!†Yqd:HXFy¿f²Â¢²îy¶··}‡£eYC%,ÛsÐgRÃÍ›7ûŠ¢ô+•J¿ßï÷OOOûú§§§þsdY:tüääÄ?Þh4ú’$õûý~@¿ÑhøÇEéëºîÿ^©Tüךçù 3 aùî÷…Ìo4±å›ÎggÒΪd?x~«Õêðß+(× ³l¢dZUÕ~«ÕòË(ï×L™GÖi?¾†ªª¾>ϲ=;aMår²,£Z­UU‡ÒÇ4MóÿoÛ6dYö»C'/Mð\I’üt„qÌú|†‰KX¾ UUýÿå/Ž|‡ÏagÒȪd?x>A^úགa–Í8™–QÞ¯™¬1¬“Ü–Ëeض I’Ðjµ†ä™e{6¸†5%†×uÑjµüǦ< ´J&¢”†I’(ùžË7³ ¬[ö©~ª\.ãôô4éÏl óÈ4Ãd‘ye]–e´Z-¿”Ïó<躎F£‘ôGÊ,l°¦Ó4aš&Z­Ö‡E’¤‰F«¢(¾×† <×-1iaœ|Oƒå›É:IÈ~£Ñ€çy°,ËoÂ0Ëb^™f˜¬±ˆ¬Sã¤F£F£áSe¨i*N NÇqP.—Ñl6GMÓübo~Ç1"Ø…Œ Âo†I“ä{,ßL–IRö%Iò;VòèfYÄ‘éi™a “•õ`dWvð,¬ C‹Å"žõ¬gù?†a@–e4 ‹E‹Eììì ¥„‘Rbvvvü.’Íf3éÅ0&Ë÷4X¾™,“´ìkšMÓØÁÃ,i2-I ÈÂ0YbQY§6ÛÛÛ(‹ØÞÞ†,ËÜ[`žÕï÷ûI/‚‰†RÀdYöç°ÒŒ¦ âlÄA©d ³)°|3g–}&Ku–`C†Ù4âÊ:eHJ’Ä¥L ÂkŠq]ÛÛÛ899¢(p]Åb•J…sà†a†a†Ùx¸éRŠ ¦{žI’ ë:« Ã0 Ã0 Ü 8ÂÊ0 Ã0 Ã0 ä’‰°þÎïü>ðà«¿ú«“^ÊÌ<þøãxéK_Š—¾ô¥I/ef²:bä3Ÿù ¾ô¥/áÇ~ìÇ’^Jl~ð¯{Ýë’^Æ\dYN>ó™Ïdr_qGGGI/#6¼‡'C–¯Í½èEøgÿìŸ%½”Xðþ½~²¾`kk+é¥Ä‚÷ïdÈòµ9ëþ½1ë_ýÕ_áË¿üËqþüù¤—23ŸøÄ'ð•_ù•¸÷Þ{“^ÊÌ<ú裙üί_¿Ž}ìcI/c&nݺ•Éïȶœ|ñ‹_ÌäÚ}ôѤ—0¼‡'C–¯Ín·›ô2bÃû÷úÉúþý¢½(éeĆ÷ïdÈòµ9ëþ½1ë ^ð¼êU¯B¡PHz)sqîܹÌxÒ‚|×w}Wf¿ó^¯—ôfâ…/|af¿ë¬Êɹsçpï½÷frí/~ñ‹“^ÂLðž Y½6líἯŸ¬ïß¹\.éeĆ÷ïdÈêµ Ì¾':‡µ×ë¡ÓéŒ=Þét2åA—B¡É vww“^BjaùU9ÙÚÚÊìÍ`°Œ xߟG§ÓA¯×C>ŸÇþþ>àÆþq"ŸÏŸ‰ó™Ï,ýµW%ßð¹Ï}ív;³Ýì˜õÑn·ñôÓO¯äµyg’¦Óéàúõë¾,. Þ¿™4@û÷4™šÞ¿™¤!|Öý{í)ÁAïÎ#<‚Gy»»»ØÛÛ0¹Íñ¤÷Ì3Ïà“Ÿü$®]»¶îÄdŒ7n¬Ì`]•|Bá¹víš“a˜q\»vme#?xg’fU+ïßL ý{Ùò ðþÍ$éà©7XiÀ-¥Àƒ>ˆ^¯‡v»=”†f’Wòe/{Ez]†‰¢P(àmo{¾ú«¿z鯽*ù€»ï¾ûûûì½d¦²¿¿{î¹g%¯Í{8“4»»»xÛÛÞ†W½êUK}]Þ¿™4@û÷*"ñ¼3IC:ø¬û÷Ú ÖIé ¹\n¨ð›èv»œBÃd–ofÓag6–mfÓag²ÊÚ Ö|>?ÒB»^¯ckkË?V(p||ì·mÅb1éïŠa¦ÂòÍl:,ã̦²Íl:,ãLVY{Ó%¨ÕjØÛÛó/*ø&ö÷÷±··‡v»íy?øàƒIW  –ofÓag6–mfÓag²H"k>ŸÇÃ?ì‡k:¦g˜4ÃòÍl:,ã̦²Íl:,ãLIÄ`D®ü¤‹`Úq†I3,ß̦Ã2Îl*,Û̦Ã2Îdµ×°2 Ã0 Ã0 Ã0LØ`e†a†a†aRIb)ÁLv°mÀóM›ü<ÇÏ#dYü€ë¦)~×õáó<¨ÕÄù€xŸðsñ×Ïs]@UÅ,‹`g$ Õ¿†x_I¬Åq€×¾ö^¼éMšôWË0 Ã0 Ã0ÌØ`e"ñÉè3Ma,jš0ƒF$00>éueÑüýädØØ £ëƒÈ«¢ŒFx%ú³1 Ã0 Ã0 ³°ÁšBl[tAcÌóD„“sœAd’RjUu.KéµAÃ2ÈÎŽ02)mײ†#©Q†*±,#1lÄFŠŽ2 Ã0 Ã0 s6˜Ë`­×ëh·Ûèt:¸xñ"¶¶¶Ðív±¿¿ŸôçÙ c8jI鱊" JŠ€Š20t5m6 ƒ6ɤ4_JߥZP‚D†a†a†aÒÀÌk½^Çññ1vwwÑëõˆyMõzN‡‡‡I¦ÌbÛ˜l6‡›Q­æé©ø]–Ås2RÇ5Eò< \œ_«MOáe†a†a†Iš™ Ön·‹££#¢P( ÓéTUÅÖÖ.\¸€N§ƒ|>Ÿôçʦ)ŒÑJE¡†1ˆnÏry8b:+²,ÞÃó©¿ÁæH Ã0 Ã0 Ã0id&ƒõÆ€B¡0r,ŸÏ#ŸÏûQWf:”†[.‹4_MNÍ véTëC\†a†a†a˜´sÇ,OÎåri”öz=?âÊLƲD´“ÆÂ´Zˆ¤ñ-EBÃ/ò¾ŠÂÑU†a†a†a²ÁL+EQ÷öö†ŒÓn·‹ƒƒlmmEF_™¶=h DQF»òR­ê¸n½³nÆÄ0 Ã0YÁóDé Ã0 s¶˜¹éÒáá! ÃÀ… ívGGGÈçó¨EYb̲,ŒÆe¡ ä˲`Û6<σªªPUòS<σs{¦Õ)uYc˜ÎúŽ‹e Æ·…G»ÂXå !†a˜³ÇÌk.—Ãáá!:ÎP—`n´4êú{–o¶®ëÂ4M˜¦ EQ ( 4Mƒs°«mÛEQ ͨ yž‡Z­˲Ï}îsñmßömI%Lq]¶mû?dœº®;d¤Z–…R©Y–Ñh4¦Ê¬eY¾±Ixž×uýëEQÿýq-¨ªêË·mÛÐnwlû⿘ôWÅœ! Cd5ÂЬՄJ³ÂÑdÊ\¢î…T¶bÛ⹞7èá ªœ!Ä0 sV™É`m·ÛØÛÛÃÕ«WÙ@Ã7bj~t– ¨“mÛÐu§§§ð<¶mÃ0 ȲŒjµ©Ð MUU!Ijµ\×…$IcÛ¶}e^’$ÿ|Š:µÛm\»v-鯆ÉŽãÀqX–娕JÍ ÌÈS«ÕP,Q­V¡ŽéžflÛFuÌdŠœ’³G–eH’ÇqüÌ]×Ñhô{{{ImLÊqÉ$ã1µíáf¥ÒÀð”$qLÓÏ×õáÞÕª0LMs-¥×£sÈÈm6Gǵue†aÎ.3¬ù|¹\¶mcww7éµg׊AV½ÃîmmaRj#E}ÇñŸOSÔ)¨LK’]סë:jµvvvÐh4†úZ­Ó4¡iNNN&F¨(â$˲¿N2ŠÇA³Ù\kj&³z‚{UU¡iš]Œƒã8°m®ëúQL]×!Ë2LÓD­V’EQÐh4f–£J¥UUQ«Õ|ãÒ»­ùSdV’$´Z­‰2õÙfù¼ Ä4E”Ä™:Õ“ÑIÇifwµ*ž{{«…m;;ƒÈgÔe!I“›V«â}'%°ˆ3gÃ0ü,›Ï~ö³I/‡aRÅLk.—Ãþþ>êõ:ºÝnäsö÷÷“þL©Ã²’¿é’!)ËrìÜp­¦i~Èó«s'.¶ |êS¯^Ék3Ù‡jB¹ùÃÌO°ï€$I~ÏG}/}éèõ>ëâóŸÿfÜÿóý²Žb±MÓàyz½—á+¾âŽhšŒããã¤?à áyƒ‘œäô¤žô»¢ŒÚ;†!ŽÓs‹E X¼¯zÕlï?³Án ÂLÆó&fŶm˜¦9±a ™ôC ½,ËcQŠÎ/•J~£Šô„iI’P­Vý÷Xezíª”x&yÇA¹\†¢(~“dP×u?µ<œ^­VÇÖw‘e­V –eÁ4M?EœR}Wç Ï¡ªŠ:?YdxÞ`”Uµ*öƒR Xe_Ódƒ•†”†ÓS6T™ÍÁó<”Ëå‰ýfÁ4M¿OAX¢2#ú!G©$I°m¯xEþ¶ªÀ0Ä= RŒ3|ÆKÃt]ÇÅ‹uüÎï¼/}ékñ­ß*®QË^ô¢$ýÕ2'h`Fáºâ‡T-˲jÛâÑj î®+ôM2MÎùryЯZ¾·„û´ZÀ·û]xÓ›îžéóÌl°Àññ1lÛF»Ý ¨ªÊu­Øö|éÀÁÚ¼b±8TßLm vÛé¬V«ØÙÙñ¥Ri(º5 Žz2ób,Ë2F…ŠÔo2d—1Že™©à¤@xÞ žÏq#7l[lÈ”ÕK>eVP£ Þ*Šø ¾Æ,Ð,JËi›t³!£YÓÄã{{ÿÀ÷.å;`Ö‡m7’åAgÝIP¯’+I‘\’¢Á0›‚çy~‡eYsïùTRBÍeYF­Vó3Ôˆ?ù“Ïà+¾â7oþ(*•†m §¤êïí€H·7M±/+ЏwD57ÛßßÇ… ƒ}\×…qûþ÷¿%鯗É8ž'¢™Íæ@.ƒ²G²I²ç8ƒž¦)zйÔ/hüªªs×5ŠËeñ>Ôû ˆ$õúu\»öäLŸgfƒµ^¯ãèè»»»¸xñ" ×ëáààN—.]Jì“FHÙˆ‹çy0 ®ëúF*ÕÖ5 Ôj5xž71µqÕjåržçÍd¬2̼P}N¸&¹\¦¹ÄÒÒ<äË‚"¡ª*”ˆ`àß4Åñ““Q‡¥[²<š"C£;f½ôg`˜¶ZâÿA¯&—kg“°'œ"ô”¡cÛÀöö@é-•„’pr"ž[g÷ UJ„¢ÔLS†IÔ[cVÈX¥ò¡Z­Ë`u]þðø×ÿúÅøð‡_‡g?û*¾å[þ¾ó;¿Ö¿?‘Óˆ"IŽ#œ„wÜ|Ã7ˆkÓ4i޲,ž~û #3H¹<¸/D]—• ðñÿ 6Z™ÑŽí€I2Fú õ' #³Tÿº®cê]@x"¨§èºÍry`¨Ž»L£WÕå÷'˜É`ív»8::òkφ§boo/^ÄÖÖÖrW™afQ¨®OÓ4´’Diº¥RilC™Y ŽªQ)/ ³l‚Í$ÂPMƒx^2´£"ž7Øä£.‘E×I5³®³TïMÛ/m’”Ý.ägú»’':xk¥ñ1ä4! ìµ+ “¨=¦5Ÿ3MÓŸB@)¹tN°±¤x.E~L9K?ûÙç?ÿ“øÕ_ý4nÜøüؽ/|¡xRÔɈßðUu¸Ükµ¬Ñ8•fd€Ós!w†Ð›‚Íó\wø~ (£5œæ;ðu0 «pžÏd°Þ¸q@ta¡P@>ŸÇ7δÁJ£04Mɇçy(•J~ý]”·‘šÏ,‹y›Ë0gÏóü4uºéSÚmP^I–QOJJ¸l€°1åI\ÔµUF c ¸¬U¼w\dy8­ àZĬbYƒ”ñ¨¿kÁãÜY—É:d¬6›M?“l{{{Ä)OÝâ)å—ôÏ`_Ã^ö²_‚eY¨T*0Mà=ïy®ëâoÄÔqd/ß{¢öÕ¨Óç½?#ŠÙ¨Tg\ç<”ËT’åI•Š=Mè1” ÕêƒRÒ%)Û:ÃóœÔëõ"7êæ¬P.—ý͵T*áòå/Œ <·, ÛÛÛ0 Ãï軳³ã× ò¬P&mŒzž‡V«…““_É(‹þØ#JÑ¢ MÓ ëúí6ý¢.‡®‡Ri0Ï‘XçFjš"¦R ¦)ÖcuÕ>YÆÈþ…ç fUòö°Æ@¾æý»‹ÅØMkµš?«z,óµ˜Í†d”Rh]×…eYCý3dYFµZõæÑ}¥\.£X,B–eœžž¢Z­úŽ„CTìÕ*ðßx/ÞûÞÿËÆêç?ÿ†Ûi:$I†ëŠ´úq°É,‚a šk- 5r†#¢š6ê$¡ûÈÎÎ䨽,gÛXf4X …¶¶¶`ÆqÚívýq7…B!éÏ”Á´ÇV«EQð‹¿ø.|ÕWýÿªO¥Z½Óæ9ŒL¡ŒFchfuÞmµZø‰Ÿx^ùÊ.þú_ÄÏÝr…baÛÂ8¤4ÛAW¹ ²ðá¾Õê ‘ÙºÆ 2›9Ž£ ôóÓÓÑç†p”ììy%Ã6¸íno‹Çé}ŠÅñe>] ÒhD³›ÆÌM—jµ*• xà¡Ç·¶¶pxx˜ôçI㶤Ó+• ÅÆ»Þõv¿yÕ§Rgß8c:&I Ãð=ÚQH’Œg?û·ñu_wŒÝÝ/úŽMžFÑPi8å‘¢…aâFû<â†0®.c—d¾$E7ó`²K­6[Í©çyðŸÇ#<‚v»k×®Ο?f#«4ë$¤».àyBÑ7 •J¶m5Sb˜´AƒŸeÐà¢Õjt%5Ma`R­§®´Ò´¢öúK v7]ÁÆ5iÀ²F*JŸ®T3]™ÍÀ²fëžHiöy"\×4Béœ`'îZ­†V«Ïó†F!«( J¥,˂뺱:Çkš†¹ VŠúa®ëú†8ÍO¦(²ëºCõ‹®ëâÒ¥K¸÷Þ{×ô—cñ½Sz¹,ËP¶mã]ïú |ô£¿¾ðßàà@Ê+•F£BµšPâMSü?x=xÞ Ééï=¸ïпaã@Ó4œœœp3IféPV˜iŽ“Íæ°ÆqÏÞ¿'9©éßïÇ3×Ön·‹\.‡ýý}@»ÝF·Û=sÍ–(Å7ª¡ ÍÄ«V«(•J(‹¨T*CŠäò‚«ªP ¾é›~Žé;XŠ5 î¥Ë`U:¥ƒ…+¥ZmX!«ÕÄzÉXe²¥z麸Fh&/E”Ls¶(~¹\†¦i0M3ô>ƒÁq_A·mÛ¦ëº?Ë›¢`%¥çÒ½‹jã–¦H’t;Ýß›}1þ;²ýÏEuŠüÏ "Ç’$AQ¨ªŠZ­†Z­EQüÈW½êUkþ ŸM(rêyžß— ÈüÀS>ŒO}ꇮ?ÏÜ4Y"D¥2ãä8“Ž…»¢âœð<È l¬2‹ fêŽ:· q.‡å9Jܨ”‡FË0Ëafƒõøø( ~ ðC=„v»K—.awwwÖ—Ì,tÓÚ m{ ¨F¦ir*“Z‚i»W¯þ¾ã;îÀ}÷ý.Êåç#jæêÜétÆ>¾ªÎö=Èy ¹^ãÆÃ‡À“ÒÁœM’ï¸Ð i@È«ã8xûÛ¿ ¿û»_†ûïþÚ§e9ǃM Â3+Ó ¥§™¯YíT™f_%ÔmÞôFc¸>O–žzÓ4ñþÃ×øÑQê¾M=‚±G‘,ФSä‘'#‘š¡9Žã§Öƒh%¥ÙÎ CdØ ¥ÏE ¡‚]]%Iò›΃ªª¨V«kÍT:k²],º¸reð™É1¯ëº "Iƒº¾ ¢Ä×Ä$ÏY’qêä–cšÐl ¶(š_JT*ƒl(Nߌ 7á#çbð‡p¿õ[¿…Ï|æ33½×LÖN§ƒ^¯ç§¹xñ"ŽgžÃZ¯×Ñét† àn·‹J¥â_D”´,,k °š6¾À8(xR=jœù«Ìæ“„|Ï‚a 6Ýà\<Ñlýë ÖÐÎ µ„ÏJ¹¸ˆPdW±K»Œ¯ Ǥ4ƹDE© w8'XkJߟmÛþcd„ Ù`´S’¤¡š×àëX–5”¼”ÖKk·,Ëï„¿ ©™gM¶à~W¯þ:ßÅ{Þóý¾“f›RJ»ªùàÜɨÎïLº9k2N)é¦9ȼ¢,™““ѽ<œ~.Ë›é'ggê)àºîS’œ ’$ù=EñÇz’3QQ”!Ç"Õ¿¯|å+gã9W ë8nݺ5ÓóÛí6ŽŽŽF6`kk W®\A¯×Ã… P¯×# åy b}IBºêª8éË^uó&ý$%ß“ •tIŠ>†Õ¤pÝÅ•œr9}Í•&‘åJ4Êø*¡ñäkó‘B¾¶È¨Õj#k°‘Ÿ¢(~$–"´ÁcŽãøéÀ“Ð4 wÝu×Ò2~4Mó6ÑçlµZ1O|e›š]9ŽãGD(zZ«Õð»¿{‡‡9Ôjßf3Û¶o7÷Žþ òÞlŠë!Šry´á “>6QÆã ëƒ9ìÀ Zåx̲ß-ØÄ.H­V2Hƒý‚¨ªêgæ¶mûûGµZ[_ 6îËL+Ía=88Àþþ>r¹€Aª0 ºÇ¡×ëù#3‚†n·ÛE»ÝÆ•+W¹\»»»8>>^ÚÅü~«ÕAJWjµ$Iò~‡¡.wÌÙ%Iù‡ã §ÊV*B©H‹± šÕÌ ¥úsþêI£Œ¯×O GLƒ(Š2¤8aôNÿO©¾Áó©Deš¡Hʇ¶$KBQœ†ç>öÓN§ƒ\.‡ÃÃC߈ÆÁÁß )˜?ãÆ Ã†o>Ÿ_Z}¹<ê«£Óðõi£i³Á˜³GRò= ËÌ÷rœò‘c˜?*jƒÍq3V™å’F_Ô )ø»m |ÞT0Û¶'F5)µ¶R©ŒmdD#D¨á¡ª*vvvb§øžð2•M”ír¹ì+®5ÛÒu® |ó7žG ffÙ›7-erÙDŸF°;(£YK±mےʲì; Ã0ü1š’$¡V«a{{Š¢d:fjÓ¥n·;$Ì4‡µV«¡P( P(àÒ¥Kxøá‡cGWÑív#½5“.Š^¯7öØ3Ï<ƒO|âh·ÛcŸã8ÂCfÚæêº.ŠÅ¢Ÿî4­áÏNL7Ýnׯ_Ÿ¹à;.«oøÜç>皇àÌOEJ¹ac;]'Ÿ4³qŸÉóÄ5Üj‰Î|Ý‹—J»ÝÆÓO?½²×Oj_–%F<‘bS.ÇoÐGý ŠÅâH3¤I× ï ¬AƒVQȲ¼´¨ièt:¸~ýúÌeHqHëþ½”N>innXo)‡gB3ëƒöïi25/›¼Oƒ.êà>K†eÒ˜¦‰íímß¹yzz ]×Q,ýûÎöö6ᘤZÓjµŠ›7o¦ÆX%|Öý;V„õèèÝn…Bªª"ŸÏÏîv»8::[À=邸uëÖØî3Ï<ƒO~ò“¸víÚH>>1KÍA-Ýø‡ÿøy?B;Éf¥èUS Î7nÜÀõë×ñÙÏ~v鯽*ù„ÂC9ÿóÌ<7s]w¤V. P$xž'"«)[~*¸víÚÊ”$÷ðUž+¼³3PàƒòH]vÃÓZ­Ó4ýšÀr¹ìÏ7fLRª.ÍMrŠªªê7¶³‰i¹“ ƒuÙ¤yÿžŠŒ:ŽÇqüÌ0Ì砑K¶-î A•NÓiR ãž)hÿž&Só°©ûw$i8£’š<¦ª1k•†ÊE¨$ltRj/uˆOƒA: ÒÁgeªÁºµµå¶mãøøNù|Þ7`gáøø¹\ÎosÜétpãÆ ÔëuìîîNŒÒNÚä_ö²—AQ”©9öQËRI($QÅ0 ¼ð…¿Ž«WïõOkÒ4n6“h3]Å ¿UÉ7Ü}÷Ý Õ„Àr¹ŒF£±ÖqˆÊ‚Ç4çÑYe쨂EIz_Ô9žöx]ŠLx/÷<¥RidTÕžœœø×9;ÉÃ= j`Ôsóe®ë¦*")vww±µµµô=<Íû÷¬”ËÀ?þÇ_À¯þê`¢5 Yž'œìaŸ —5%íß«pllâþ$ÜX2ˆe æ©ÉÈ8eÜPiGø~AoÁQ04³{œ1JuªYa^|¦Ö`TÕ¶m¿ÃØ,Æëùóç'?wîᢋ#øÿE0Œèôßq‰_ù•ßã8øð‡ïz¼Tÿ8açèêÙ%Iùž¥—J%?B“dñü$â:Ãf=¤UÆg…uUJLx< =¤\.ûÍ+‚£hjµÚHdTÔŠ,†8F&)*“¢±”þˬ†M‘mÚïMÓÃ{ÞSñeƲ†gL›9¦ƒÏ¦Èø8ÄÌëèlJÓ\o0‰Æ–ǽˆ5ŠTÝT«Uhš†b±èדH¥n–4sµ *»N¶mãèè—.]šè¡¡ºW‚º ½2…Ba¨#™mÛ(.˜hNÝC£ˆÚ¬k5à=ïùSü‡ÿ0z°Ñ˜\g·„ñvLFIJ¾'á8TëiB’¤Ô¥‡‘¤A÷¾qŸ§\æTà¤H£ŒÏC©4ˆ*³kµEšCH†h©Tò• jp¦Z­Î4BfÚµôÈ3ËgSd[Q€““^ùÊmH’Q×å ÌæÈø8t}Ѓ ¼UºîrƒI¶m£\.CQ”‘l5Ó4ýqeã²f*• J¥’ßKDç´†‰,ek>ŸG>Ÿ_Z*Àþþ>öööÐn·ÑëõËåðàƒ.ôš³ êÕ«€ïüÎ?‡ªŽ¾¯$oB@F1ëÌ8V!ß“ 1/†aÅJMLª«ŠÂqmé™ô²nŸªã›¤ض ˲àº.Êå2€AIrj‚Cd™æYj¬”VÒ.ÛøÀOâßý»ÿŽ/}I<2)+Œa‚dCÆG1ŒÁ¸¾p/ Ï[^-vpž1õ)ØÙÙA¥Rñ³jTU*‰B’$´Z-ضÍÕÌd°¶Ûí!ÏL»Ý†mÛÈår8þü\…ÖQFn>ŸÇÃ?ì×`-£€ÛuǧÒ|Ê úéO¿¿ð “Ç×XÖhZ"E^¹«¬O¾'A³Vd¦þR5ƒØö ~<#ãLŸÛ?S¦“¡V«¡ÑhŒ½ft]‡e‰†6Mö l$Y“m@ìºÞ@µZÅ·~ë AÎÿ¿½óqä<ïû÷l¥ršˆ åËY2×®ÁK v€ê¸8B8ÀlŒ•„lÂ5М­mÑ’õÒD&‹ÂâMÈHYdÙC.ˆ³6ѹ6jï‡4A"žwE14Š­§ÑR—4²¦âÉ’%ÅØþñîK‡3äp8äüà÷Òqøãî3Ï<ÏûüâôâDeŒ±“N©iRÞK¥Aoëë§±d_Ã0`šf¿”JŽ’Q¥ŸUS­VQ,ÑjµÍf':ªvè¬zÓêë:ªÕ*ºÝnßð•]e^»L–sf%•Jz‘Œ‹°¦ÓëÌI—N7ê†/ BÜZ¾¨Õ„)ì­ØDgJ¥áÆK†18§áõ$š,BƧÅ4›ãÔ{»ÝF6›»ÁS,Q«ÕXWº„DQ¶%¿ò+/àܹ‹}}oâA»„LC”e2Ýj =^,ŠÿÊ}CÙGÆê¤jš÷ÌÇ hšÖïÀ+çXf³ÙXd°ÅOkµZE.—jƒ-›-íîî"•Ja¿ße,ІûîŠÝÖøÜçþo¿}mâg*ÊðP«± ‰VgUv¥Û‹Iqµœ&iµÄ )&þ6‰0†sÉr¹Ü¿Fòù®â _øÍ¡ç9þ‹$ ©Ç×Ö­z½Z.-2Mo¶y­VC6›eÖL™è°ÊA××®]ëÏ_êõzÐuÛÛÛýçÖ××Ñl6GÒ†£Â¤ÁïÖˆÎ_ÿõßãÒ¥{ú\y´Z"JË› ™#G3ÊÙŽq⬛;…,Ip¸õPU{{{ØØØè7Yò5ÛuE’‹‹Q©ñÐCê?Ï.À$I´Û"ZZ¯d{ccÔVªÙ0éðÙ¬(j·ûy´Ûm¨ªùÆ”ËÊ»&½@×u …¡aÁr6õfôpã qÛ,×4ílGEüÛ4M<÷Ü<üð}ž?»V«‡ yB掌"Idú~œ×…›¿8]2]ÞÚ$#n× Yn4MÃêê*òù:ŠÅQÙå¾ I Æpz{±(äÛm±ÕQØtZû,òù<Òé4Úm6}$ÉÁ0DTÕ0D†ä¤Qd’r¹ EQ†6"e£%6SŠ7žVé¤:±¾¾ŽõõõHFV%ã²½¤1¯iÀúúÿÂÇ?þ¦J&$*Ôjâ¿Ö}#UUé°’¥Æ:`Þ4Íþx6™yÀëƒÄ‘løò—?EQúó&¹ÙN’B¹,dºXãù¼P«Õ`šf4M>Ÿ‡išýof›ÅO+ jToÞ¼ ¸|ùr¿†5ÊŽª¤Õršm5æMxöÙóø¥_:ör ñS7Häd™i·6kkk(‹H§Óýþ„Ä Mÿ½~½‹Ÿþé_†¦1˜$iÔ³ªªPUGGG0 ÃÒˆ¬ŒjµÊ> À“ÃÚl6±¿¿ßÿ÷þþ>677Q©TÂ^ÿDTU<œVk÷TE‘-¯…qÃÔv7œ”»¦i¬Ó KO6;HãÈwLøð‡oCÓþ_øÂ{ptÄfK$YL3&Ò4M”Ëå~5ŸÏ£^¯cuuµß\‰ÄŸ‰]‚á ®¯¯CÓ4hš†õõuìï‰"¦9¾~Õ:K6ŸÿzBâkXɲ£iÀŸýÙoÂ4MFTI"Èf¯}í+g³ƒi³ø"kTg¡V«¡T* EQ‹Å"޹A™ &:¬r\Íææfÿ9Y•£n¢Œ¬íp>¦ ó­–È•çmGd +!d€a¿÷{Oq¾*I ¥ðçÞ?½rû<eU\øêWñCwß½øß}N8õðYÚk«%þÞ†1^§¥Xœ êûÚmq¤Ó£;i^>_ÓðÁú§`Î;&ÈŸ^þ‰²Y೟}üàÃ^!3!kW¡4Í{ò!QEše­Ö æSÒÉn/—Ë(‹œ«º¤xN –t:ܼy³ÿïË—/£P(„}®ÔjΈ ÃXîÝÊz]8o^/|Ã]Å=ŠY* Íc7°5MDT³Y¡¼Ž—¨V…cm_c6+"_ÓþýªU±UÅÝ/¿Œï~øÃóÿD>ïÔi/&5¬šæ¿Å «%gÏuÃZêÆ8ç_¾ßk·'jµAl>/>Ëïo²„«ÜœTÕñóÐC/âî»ßöÒ™‰v»=ÔdiY“gH²·Të8™.³’:«Ë‹g‡UUU<õÔSèv»CÏïïï#“É ÑhD2MØkÃ¥È"‡Tù5‚¥³h7ÄóyqÌ˸ŽFÃÛÚbQ¤ÝZ×*køü ¸•®=Lîw£!î§x¾Ôéà-ËÆKÜI§G–X¤Ë˜¦Ø‘©·å²÷ I‹â³'9¬2=Ýigk²¡R:휪ëåü¥³[,úû 'J%üÙ³ÏâßóKÆ©&¤Ó wß}5×!.ÈèªÜ`Ë>$!“ tÙìðX>û-PÓ´¡n²œxêÜétP­V±²²‚F£MÓúF£••\¹rN'ìóÁÉÏ“Ýö"O­6¨>÷ƒ¦ ƒÝÍÝsǽ_Žß8>žìp¦ÓÂ9°nÿ*Š·÷º!; z¡ ùÓk‘Q%‰[S±HašB¾¬c¹‰2Íg¸,Êf©Ánï•s¬üDì[­A€Ÿ÷Ë5hÚ ÛoÔÿfÂ>ÉG–°Ë¾k†üÅ_ü@|6' q@UÕ¡ÒŽ F86ÒüÌf·}§ÊÆÆ¢oÓ¹âÉaÝÙÙ¢(ØÝÝ©‰³>¿³³öù ašÎ¶o,VYëypà/-P¦ð¸ßÝd ¯Ó{ËeñÝM½*ŠRi´ëï¸÷8nêÙc §{ØV§9ÑB:«¥Ò°|+Š÷ñ1†!>cœ|É;r3&öÑ•›IÇdz¥2ËôvZ ¾°îÊQ0 !FÏ=w+‰5†aŒØ+´ÛIܱWI[]ӆ˛dF$õ8™è°v:t»]<ñÄc_·¹¹‰n· ]×Ã>§>N‰­Í æN­æ¯ØD¦:¥Ú-4·ߨ˜”ZÁ’QÝY"£^0 Ì ÐXÂY’¨ª:‡UzÓbuVíõ(^V9ÿ·^Ÿòkw&eTµ\ïõ“j/£²öÑ2da¬®Š}©Þ c8Ð.û²?ÿî°—JÈLØ{0%˜$ûBeÍç‡oéö ²¼LtXu]G&“A&“û:Y¿ÚëõÂ>§±,\ø½8˜VÆ9«ÖF7³ê%òãª×½5cjA8~I¨[>k @PÍ$ ~^ı:¬¦iBUÕùïHJY†qΪ<‘q«Ìh4DdÒKšºÝ)u3fo3°BÄ0F7¤_«ÕÎ:I Q{à×Ã^.!3aßx4Íùî#²ìÕ?ùü@·[E>‘d!LtXS©îܹö:}á´©ªêâ„?›.Õn»;«Àp¾›µšÐÓã¥Òø:Öq4 œB¿¤HŸª ÿN«jù,ÈP°4éÁr_Ä4M¬­­¡T*Íßa-•¦ß)—ÝU‰u6‰Y3*›ù9¿tzúQN$R´Ûõ&U^>/ëV †ÕÕU|ô£‚l–p_œzøé GHÔ°·ž Öò&™]ÀÚUxpXs¹z½ÞHw`;²áR*• ûœúØ›,4VÕ4}6;¹Kh±èîûiþ’N·nMn„S8KCW»3Y:{øqZ³¯ýó–dVTþ(•J‹iýžÍºM{åà`²Ãhmó*Ñ4á©ÌZ3Jb¦ ÖlVˆ…Ü•7 GGG(•Jxæ™áÑG3³}!!ù^„øDFSí´Û‡•ÑUbœÚËåP­V]Ó}{½šÍ& …BäFÛX/ˆv»½ØNn‘"7òùÉÎæ¸”ÉEÞØ ˆˆfÞòïiqj¸duZ§ùLQuã"ÝAXÓ€V«…t:½Xwr.gEŽ2²OP#_H¬±N¹’Îí»õ¥R ÇÇǬ}Š*²ašl^&ÿˆ««À¹sâaGÓ€{ïï[[Õ .¼ôRØg2Wœ"¬µßÇ–Ù5>áƨ¹ªªB¶år€+ÆS—àíímt»]\¹r‡‡‡ýhk·ÛÅáá!®\¹‚n·‹J¥öùŒÅ0ŒÅt³Î[˜65 ™èø¨…hLÄrT'±Θ.ýVÃp ¯]›òÜÆ!Çw­tþCˆ…±xå®(Áwñ²qC–k:0 ÄDªXMÓ†¢Qì*9Ór7ž4M<¯ª¢n¼Ýü[>ü G Iý!w!ŽŽ€ÓSñ°“ϯ¾*^st„šMüß| ì_q®˜¦9$ÇKâÏD92­V_6%K¹¦¹ÊëÉŠ¦‰ ¿%Z1ºé(‘³Xå~»Lf†A ‘óååõãpm|à·~ w¿ñÆT{——år9ìîî¢Ùl:Ž®) ¨T*‘‹®!ؽö¸¼ ÝŒ'§¡S“Èç‹Exa£¬ á$n8€»Ãè…¬íýG˜®ã¯†a‡×N£)¾n˜gç6.°(<©âü/l\Þ?ÃùG”…×zLŠ°Ê›ï´Ž4›LsT”ŠEq?4 ƒÆÍ44Â:¬×Å=ÊIwhš}ÀóèküD±ÓéÁûwÅ0ŒNÕ8òÞ¤ªƒ/6“ªŠ~ ÒÆ²†ÿ†_+ëÊeçСiŠRû{TUØqrlà¤1„ ÂÍa=:[h¿™e¥\”Zù½wÊýÙìÀßQUqíŒÉý^*…÷¼ùæT_åÉaN«]£ëz?]xRa7ä7GW×u¤R)ߟoUì¦iÎߘ—JÎ:×QQÜwÌäø™ikHK%ñ¹Ó^Ì2÷³wϵ;‡i Ì&–10>…×J Â!=8û;¦‡ï“(Ž{?~/>rñ#À“æ-ß©4M[\:p­&dpœÊÙ"ì’X%ã'ñ–âW«iŒªzÅ0Ä}IÞ{ÆÍêž–ZmúÍÔ²hÙvÂa•‰¥Ñ²ÄÆ“¨“Òr ›lO›ÍÏÆÆ·a ;ãl-''X¶¿u³9³ÙÁ$‡rY¼>g5,·FRíÔj@½®¡Ñhàˆ¶Ã|GäúryØN+—›^ÜÍÜ^s*‘Îå4eˆƳ©Ÿ=6àÜXI~ï4ŽÎjÃu¨«éY”|K²YyïžÓDùFCÖt@§Ô'‰”qûî5I ‹–q`°âÆÂẕë4Ýtt$ ‡[·œ7†ä=MUcŸäCÖ§ÚÓxå܈yecäób½ù¼Ð/ CIU…±cF{8v²íF6›E£1¿®ª‘ÿùü!Oðà@ÈŽÝV“õÍÇÇâqt$nzÕê๠°M(aËøžËt Ã0ð­o}GGGñ×åvd²SKnÈll šÑM²çd™) ¯ ùpº_ÈûÊ­[ÖÖ{“£+Éfç·ÑéÀV¯t:ôz½~ƒ¦T*…ÍÍÍ~ªq·ÛE§ÓÁææfÿøúúº¯Ôë}ZUÕùìÖ4âìugcCüѧ?Œ:jSýpvëtåæ³'é’³Ô[Ž«ý&;KNñì3kgŸ;Ë,Ø€Y¤|K¤Ã:ù–ãdêõÁÎÚ¸BUJY¦­“ĆŒÁmLGYO:͵# ˆz]ÜS¬‹lPd7üdiçsp0¨’Íi"LX²í„¦ixûíŸì¥=é~t¦9Ú”ËÍË– Yä.ÔÞ˜yéôð‰Ëè¤95 l7 ÷M—_ü{ì±äEVånëÞžóF¡t<«Õo 7/^tŽjÖëƒû…×ß+æ×ËÔ)Á³"ka­óZONNˆ Cþ¿5§>—ËMœëÄ«¯ÿ{.ax¿ù—˳íP”à¿a’ ÷šÑ¦kÆ”÷ð@8È{QÖUÖ ƒY2÷”³Ïª}n„tÛ"å[bÀsÏýÉ|2A¤T¦,8uLíµ?$q„!ãÀ dÚ‰¹–|øEŽk›šòºÑƽYYûdÑ×f:=¿hnÀ„%ÛN˜¦‰sçVû&C6Q_ÖŠZÙÉfG=mëÅÌÍÍ…¶Œ·ZκÜ0 ¼÷½ÿ¿ýÛŸû'òŽ&ëf{‰öõƒò?7Ýk÷!ªÕP#™Qeák*•ªsív»h6›X__G&“A§Óq}o¯×ºÈ¬¼ñÆxùå—Ñétëhºí‚WƒCÖ_ø5PjŽÚ„cyŒéRj'ùÔ²ÓÆÙkƒ,«bP‡*;ú§\¿Û9•0u©n·‹^xßþö··£iv–MŒ¬úÚÞp$ŸŸœŠ%ñÚé°Î‹Eêo7LÓÄ+¯ü«¾-,ûkÍÙd¯Ý.²©‹”ç½½a¹÷Û’¸"õ÷$™š…Eëo‰›o·Û8ðÓ„tÞXgO‹ƒtôq6=»EÚàÓ2ÑaõZl=q¤ë:vvvJ¥píÚµ‘Ô7Æ)ùóçÏ#ŸÏ÷/:©c%Ú´Ó¶5ˆFEAÙ³Fw­ò/×äÕi•N¢—ÌekÇÝ2²õ;f9•,¦wvF*Ó›S¶ÔöÊ<ä.\¸Ð—o+Õ*P«éÈçç¼c-Mct(âT*•~‡Çy°ngœ±nÆHçú©°%ù¼o«Îá—‘BFƒæ¡Ã­¿ÝÐ4àÃþ.€ï0ËÀ³­›)¦)z‹£zÇÍ“'"õ÷¼F&…¡¿Ç!3 #WÖ Uf†_<°¦Kö4ƒqT«Õ~½ýâXYY€¡|ùn·ëëµêÖ©wß §Õi„j IJޤ—ư~5Õ1Ꜫ˜¾~vÉX”|[1ƒl9©;=úD–Ž0d|¦iúsX[-ÑÔB%·n±{ɉŠlÿÝßý(eX϶obY‘ã,êuçN½$„%ãíöèt"hµZ‹K–é¼µšè+cíªî´+º·Çôõˆ01Â*…:(TUE·Û…¢(#¹ò…B™L…B‡‡‡–h©Š5ûŒš)ð5îCFí~€l.à%Å«‘ú:«ž×Îö¯”×·¬ u[RþR›åoǸqö9ôU\ C¾/^²Y¦qy•b1â5ãÆ0Ú´Ô„£ÃÇï‘øÒñkkâCY_DÎC¶%2È/9o¾ù‹ÿ1ø“”cÇüL* ±'Lwª&D:ðqÐóheýµµ\Î!-Ã`éŒÆ†‰k§ÓÁÖÖÖPJm¯×ƒ®ëS7ÆÐOSsšÙ*¿£R©`kk«ß~;•JáñÇ÷}’žçó )rŒ:y²¶Ãë…ÕÆ ­x–Ôb/u+*„SéäK¤1»“™…húT†hÎD C¾Ói»aùµv+µ#åŸÂÑáî"ë«áR«5˜_GÈaȶDŽÇmµÄ~x.÷Æ|öQjµñ÷’h”q§j"Ù Ц¨ÒÖ±Ö’f³b³›“±ÅWÓ%Y¨=u](Ä…0)Ç=—Ëáé§Ÿî_X~cUlzÞ}O§Åõǯé<{3Ng¹ì½y’ áàÕ!:ïj#^ü0n$ œÑÛë­Kl!˜ZÜ"‚íœ@%ßV„8zIi ËG;4dˆÂqMsW»ªªN_òÁ&HÄ0d[¢ª"®_ÿ:L3 ›­œ) ui¸“¥$LwÊ”ñ¥¿'áf«ÓY5¡u žÄ45±nÈkÀs„û_þ%ýIñï4ÎRaφ_{5t Òg Òƒ øKÍuJGã컪éÀ)ßOæNò ‘|çw¼ïN‹¢©†Ý`1 áÈ–C–– drî–¹Õh4&µ7M¦?’ÀR¶¡z­ò}ãÆ»ñ}ß÷Àô$‡\Ú»øÞ ¹Ó!‰A˸œfÇwÿ7äìë(È!¾¬éRÔ(•†E zÙ„@Q€¿ÿ£ƒ—ðÈwâ­éÓǬgýìß&¦ž:µ£[‡p˜k„¹©”X4 ¸ï¾®÷ ™tZ\ö¬ƒrÙ¹Ñ! ^wvX[­EqO nµÄ}, !Bš’7ßü>þñg½€”óq©„„ˆ[YG #Éq-°Ô#‘$ÖaÍfzÛÓ!weøúŸ;yïùeà=ÿЦðü6\žW!œÈi²©ý8œ%ˆÈnƒÆI$q¨*p×]Ó _F>?w@(øtš;’$v4 TÝf}llþà€)$Ò‹€¢˜h4¸xñ"^}õŸúÔ#“ߨn‹®{†!êõööè°’Hâæ°Ê‘6S¡iÂnq*Kt å’ØÙ”àY‘ ‘rà)Â*we~à1àú×G?"RÉÚmàK¿ü¼uÜ5àE-bº&Hª‡ïËiÿüÏÉMgd»vEÖÑÆ†¨ñÍ ˜ L"ŒSsë‰ÑUÓd1 h·Uhš†£££ÉúÜ4…ØåšÄ§jº©¢«ÒV‘³²³Ya›[ËA8+8ÑxvX­Åz½ÞÈs’ GàÌB»=찎ž#óOÿøÒï ‡UÎ%DÓ¤ ˆÈå8‡3ñ –¬©mÑPeÌkÙu›¸°·Ôj/"› $íö ‚jmåžÍ …Ï]yQ Ãy#}bíj³‰ ™²ž¦i(‹ÞœÕµ5aà°y‰1ž;¼—ËÂQ­V‡Ó}/^~¦qó&ÁLtXWVV°¹¹9ò|ÅØóÀšÝè)údÝ•ùÅw¿yàÉá×É®¿“lûi"¨%ˆ¦H²1³2É”hš†Ò$ÃEÓ†½5\EO"L:=]UUu|t•˜ Ç^†¢×²Œj•%$V\¼8ÚûËs„UQœëRíȹª‰f¢ÃšÉd&¶ÀŽ"Ò~WUur:°a ù'ï^v¹È—©¶[iÀ{ݨŒ ªs[kNk "¸çÙªdé‘3û€ ›2Œ4‘#û„Yñ4 Y$ȲSÏÓ Øo€Ä ·²RO›í€wy§³šhÛtÉšBæ)ÂjôF§ÛIC8“NÍ“Z>¬@¤C8ª²‰kÛÇg‘¥@6ú8gØ.ß„ÄUmj­iÚøQ^go2TÍ$éÈÊ#;Ž)Á¦IýMñì°êºŽjµÚ$œÏçQ¡VÿÕ4m|„Õ}2áÍIÌC8—ò”¥ãªb¶šÓ4†kZÙ3„¸ ("Ur↠Ójªé‰µOn!BNÓÓ4m¼ŸLÓïHÛÓ5øëÊ[° ] ²¬Š~ƒæE±3ÂJbŒ}B»Ýö^ëGH„QU±ãj¼¢+S¹öR ñM­æœ%3b›7ƒI^1 Fd—OëÎÎEÁõëבÉdúÏçr9¬¯¯cww—/_ÆÕ«WÃ>ò;qÆS>?ÚÑÃOéÓD44 6N" ¡\ö¸!°žÄ{t5ðAó„„„i Õ¬ªª»ÃÚ;'À7KIDAThp<‰5ª:ZÃê(óš6}}¶ªŠ4œr™Žk™è°v:ôz=<ñÄc_÷ÄO`Ýn7ìs*ðžhÐÛÑàÏaDduÁFX qAÓ<¤¼«*tØâÔ¬Cv&$îÈì׫ܭa7l’0eÞOù’¢ˆ÷9yÅ$QLtXu]G&“Ьè§Käñ“““°Ï À  4±!LÃ%‘GŽ›¸!S*Mn FHD±§˙ڞ:¿³;6‰8†1èì¸ ÃrsÜF£Ž4ÎóÛk#›uoCLÅD‡5•JáÎ;#Ïïîî"—Ë…½þ‰L=§6‰R?OLy'$ÆhÚp6»çè*ÓÃH ˜¸±Î†y$æ¸eÉŒàæÙzAQx,Ö\.‡^¯‡N§3öuò¸5êÙ¬xLL—”­„­äÁ+‰<ÒŽ™:åQ,g´OU¿Êë‚DktÕucVs¬{.¦i¢V«¡V«aoooô…~e½ZÙd$ÑxrXs¹šÍ&z½žãkz½šÍ& …B$¢®Ù¬Ý±÷LSÜ ì”࿆•a°’T俣Äs‡`¦“P* î¸ #e˜4$ÆX#¬kkk€ãããQ™O§ý;¬öI$žºooo£ÛíâÊ•+ØßßG§Óé?šÍ&{ì1t»]T*•°ÏgUUÝ#¬í¶óÅÁñO$È]K× ÃÝ% ‰1Ö$˜‰³W­Ða%'›ÙýL°v{Xàg‰8„­"Ë—êö©·ç 9ã.//Êår¸víö÷÷Ñl6Gޝ¯¯csss¤1SpMskŸ]ƒQCH„ÙÛsHyo4D{÷tz¶ÝJB"‚uÌp»ÝöÞ˜»í$âÈ”`Ó4…\—ËrÆÐÝòAHPU•åKd&<9¬€è¼½½J¥]×ûÏçr¹HÔ­:¡iJNyí2ØžC°~•ÄETÕ–ò®ib;Sp³†Ä{ýjÕKÇk¦É“Ðh "¬ÿõÁ…R¯Vµ5àø˜©À$ö¨ªPÇ¥Ò{œxJ ¶’J¥P(ú¨:«’‘2Ó77ÇY $&ŒDXå ]¥ÁCbŽÕ¾ñÜpÉ> ‡’NR%àK_Â.ó„YÎA€ªÌFXɬLí°Æ dž4å²0æí;=ÕÓÇ †:«*t’(¬£T=7["$&ˆ’½³,Yèpn6I ¥’0·'n6¶Za/•ÄÏ)ÁqñC°¬ï+ÕE­ª¬2d!Ò³gB"ÎÐ VÑ)ì%2eR¤¿{œ¿ ðZ ±À0ÃМ»¥² IR Õßíöp³B\H¬ÃªiÚ¨Ã*£ª5ç´:§$¶ EXe£%B‚5³WUÕѹ}nðZ Ç0D²W>Ï4I’L CĈêõ ú»ÕbVñDbS‚MÓiÓ>ûo@tVI¬á V’ddUϵ«„ĹCÙ&IÅ4uIÖj‰0,»a,O„U…pXYE‚ç™”„Äk:z’$ä^ãÏ#Ël’@ä ‚±ãÈ àˆs$‰7a zf„0Ô!˜õ$a¨êÀaeÃ%’4 XYùG<úÖ[L_'‰D:¬š¦:¬­–x( û Ï$Öaí§Ú˜&°º (àŒU’†šŠml„½BÅ4…/ÓÞ§Ê&X[ {ù„ŒÅ4Wþü‹xéÂ^ !sÁ0„/ú•?üC¬ÿê¯Z¾›­ÕØ\ŒLEbÖ~ k£üèt&$¨ª*ä[ÞIŠâ3ºÊŒq XýæçqÏÇ>öR i–hš†ÿö®wáÝŸþôp&ÁÞðê«Ì. S‘X‡µO» üÃãa¯‚ÀÉçóƒÙ„$9ÎÆ—ÃÊzWqòyàotðþO}*ì¥82ø«¿ó;¸Üë &t2‰tXûó)[-`­ ¼þ}L&‰BÓ4œ÷»E±o$øJ¶¦QêUo½õ~衇Â^ !“Ï ³äƒð¸k{;ìå„y‡U×ut»Ý©ÞcÆ ú„’˜·JHñ#ß’ýå/ÓY%‘ǯŒûNfŠø`ا0©T çÏŸÇùóçÃ^ÊÔ¼ýöÛ¸|ùrØË˜ŠW^y÷ß?Î;Ê÷û‘oxøá‡ñ¡}(–²G9„¬¼òÊ+±Ô)?ò#?Ú÷ϪÃ?ôàƒøçŸýYÜ …Ôá‹å¾ûîÃíÛ·ñÞ÷¾7”ï÷#ßÔß‹'îúû¾ûî åûiƒÇ‡8^›ÒŸVGÒaÍi÷žÉdŸ¿té.]ºöÒ}Q(Â^ÂR®=,üÈ7|æ3Ÿ {龡œ,–°oêðøǵ‡½f?òMýM¼öïMýâ¼öi‰d ëÊÊ €á´„n·;Ö˜'$.P¾IÒ¡Œ“$Cù&I†òM¢H$ÖL&ƒB¡0T,¯ª*ÖÖÖÂ^!3Cù&I‡2N’ å›$Ê7‰"çNOOOÃ^„º®ckk ™L¦_ä½»»ëZßGHœ |“¤C'I†òM’ å›DÈ:¬€(ü–]Æ–)O›,”o’t(ã$ÉP¾I’¡|“(i‡•B!„BÈòÉÖ¸¢ëúعSº®o5îø¤÷yA®{‘k'óežòíåxPçôÚ(ßÉaOª|/jíd¾PS¾“õwxk_§dfžþùÓGyäôÒ¥K§—.]:}ä‘GNŸþùþñ“““ÓŸÿùŸïòÉ'‡Þ?îø¤÷ÉSO=uúéO:u/zíd~ÌS¾½ »|Ϻ6Êwr˜EÆ“*ß‹\;™Ôßá¯ÌêïåoFX`kk …Bš¦áÆ( ¨V«ýã;;;Èd2ý㺮£Ùlz:>é½AÑét°¿¿?ôÜ,ë^äÚÉ|™§|{9Nò=ëÚ(ßÉaOª|/jíd¾PS¾“õ÷’ÈwØsÜyæ™gN/]ºtúÚk¯õŸ;999½téÒéóÏ??ôÿ’ßýÝß=}ä‘GF^k?>é½AñÚk¯~ìc;}òÉ'û;<³¬ÛËqæ)ß^Ž“|Ϻ6Êwr˜EÆ“*ß‹Z;™/Ôß”ï¤Cý½<òÍëŒär¹‘Vß'''€T*Õÿÿ\.7ô™O>îø¤÷ÅÎÎÖ××ûâ'­+ˆã$ÌS¾½'ùžum”ïä0‹Œ'U¾µv2_¨¿)ßI‡ú{yä›댤R©¡vßÝnÍfëëëÈd2c…£×ë=~ëÖ­±ï ‚ÃÃCt»]T*•¡çgY·—ã$ÌS¾!'nò-ÏÅïÚqm’Å0‹ŒO’ƒ¸Ê7ux2 þoíd1P‡³ö0 Ã½^Íf>ú( …¶··ûÏ»qçαÇ_y啱ï•n·‹ýýýþZíçãwÝ^Ž“x1ùž·œŒ“ïY×>ïk“,?2>Iâ*ßÔáÉ‚ú{±k'‹‡ú{qk :¬ ë:¶¶¶ ë:®]»6´[b ÇÛÉd2cÔœÉdf^÷áá!R©TUE³Ù„®ë899A³ÙÄ… |¯ÛËqæ%ßó–“qòÝív#}m’ÅâWÆ'ÉA\å›:<9P/~íd±P/víaA‡5ªÕj?Þ.$2/Ýžïv»}w|Ò{gåòåË®ìûÞ÷>ßëörœÄ‡yÉ·—ã³0N¾g]å;Yø•ñ¤Ê÷¼×Nõ7å;éP/víaA‡uFTUE·Û…¢(èt:C@ìd ½gmmmâñIï•B¡€J¥Òär9¬¬¬ R© ŸÏû^·—ã$ÌS¾½Ÿ…qòÉd"}m’Å1‹Œ'U¾ç½v²¨¿)ßI‡ú{yäûÜéééiØ‹ˆ3ÍfÓq~hš`®ÉdÐëõJ¥†ºš;>é½AŸ‹®ëØÝÝyÝ^Ž“è3oùör<Ès±Ê÷¬k£|'ƒYe<©ò½Èµ“ù@ýµ“ùAýþÚÖÑëõ ë:ç¼ùqÇ'½7ªë{ídqÄYNâzm’Å2‹ÄU¾Ã^;Y q–‘8¯,êïøË7VB!„B!‘„5¬„B!„B" VB!„B!‘„+!„B!„HB‡•B!„BH$¡ÃJ!„B!$’Ða%„B!„Iè°’@ét:èõza/ƒ¹A'I†òM’ å›$™$Ë7V([[[ý!Å„$Ê8I2”o’d(ß$É$Y¾é°B!„B‰$tXˆ Õw»]BUÕþ1]×±¿¿?·ùݽ^ªªBUÕ…¤ t»ÝD§(aÂ’ñ°ä Œ/Ôá$ÉP“$Cýoî {ËÄÖÖÖ×סª*r¹: …2™ :R©šÍ&677Q©TÿîJ¥‚«W¯"—Ëõ/Êk×®!“ÉÌå|u]ÇÖÖEA¡P˜ûïKÂ',C¾Êø²AN’ õ7I2Ôßñ†ÖÓétðôÓOcww•JNpýúu\»v Š¢Ìm‡çêÕ«ØÝÝÅîî.ž~úiÜsÏ=8<<œËwY/”ííí¹ýž$z„%ã‹”o€2¾¬P‡“$CýM’ õw|a„uÁ¬­­!•Jr¹@Q”þñ•••±‹Lepc}}Ýu·f}}½ÿ©T +++sù]×qõêU\¾|91 ñÎ,2ù(ãËLXò-S‡“yBýM’ mðøÂ+ œf³‰{î¹7oÞLDÞ> endobj 2 0 obj << /Length 3 0 R /Filter /FlateDecode >> stream xœÌ½K¯.Éu%6篸C{ÀÃx?X@Sp› ÉM4zĦ([*ÉM 0Ú¿Þkí;âÅS¥,VÝïÆÍŒŒ×еßÿýgþ›Ãÿû9ÿS{øöÛï~æ>|Èò·ÏŸð×ÿýgåÃwüE¨MþŽ?sK¹~ó)}‹EžÅß÷3SÇþYÿà÷ø³{þÀ¦üÙÿù³Aï¿Çï¿•¿{þñ~÷í~öüe=þû½Á×ü-ÄŒï)ÈŸþYþT G‚FùÃ?þì/~1¦ßÃOûýÏþËÅ?úo? ßþ#úû¿ÑTRn1~ûæÝ·ÿôiß¿~© ã»g`oôbý(aŽèù?btÎù_êoÿXý®¯w;G(3»~à?Ýæöî`u9ÇŸÏ>ñKÿÍ—žìßü¾@_ÿû?ó%Ågü()ù†Gsh-ÖØBlø›ß<€ñóà?2øö›ï¾ý—ÿ‰3ü?û¯ß~óö~#£ýÞ¾‚sᣇXRõù+}•«¯·Nã:ö¯,à8ê/ŸÆ}4壔Þ{óõeñ5|´îxŽ~·–»÷î£6ÿIï«áý5u1 ×’ßœlvh&ÛôûúdK÷ÛdÛÞ¯Éþ~üñ]´ÐžÂ¿ .Z½ ×üÀƒ¯íÄ»ú\,1;ãN˜äÕK á#¶˜£÷åêÓ9:ñؾÛðÑÂ÷Ï݇“ÿ…úíçOÛ§ìÿ¢öíRä6À9øHC¼>¤à?Y¿äoù«¿ûû_ÿâ·ÿú‡ß]_´_(vjêÅñè…þ-u\ľÖRr¿§€øþó¤}ÿñ÷¿û¼Ý±îOÂCÀ7•˜p.ÓGÀßôœ=¡¢y\ h ¥} 3ŸRÆß¶Œ½ˆM˜Òð\®t-KÞÉ[Éÿ-`żëÞg!ޝì쮆]úH…sP£´á•üœ˜o{½úä|¯MÚ ݵ†=À•µ7á0ÁùÊOñ‡Ëá’OãaÞ™ñΊî}©KÝKñÒ†wò["ºË…\!ûÒ+›<^™HH¡±a’Ç mxe ¯ûª`ûÈ…ÅÁ&?Á5œ¤ +ÚpiW~JʤŽß|ò\͆%@§Zñ™Ò„WòS––¯t -È+±O*¨a’0#9áø}.â•ü”ì1+‹F-Ÿ¥ ïä•ÊþÜG+±`ÚexÉpt.: Aì̉:Ò†w®)ÃK° SwMŸãþ`XLËGÃI÷IdœñN®Pü–øÑ+p {Y½Œwò[">" Úp £kÒÖÈs0Ì)w^(xÙܱXt„ƒÅýÄ6¼lîX¾ s§‚tTð2L. :0D./¾mïÄÚ4@|`©©æÖåãkþ»Ëk[/Ù…"߂ӓ¹8-ã[BÿÀÌ` ^ÚpzÚÚ² Á-WþNOX[ï¬ø²t[âôäµe1†–[Ò‚ÃÓÖìc¦Jçö’ÕÆÙ œ¬Ú­ÏUüËæTÚÃÙ \¼K&,æ–q ¼´µyÂùJìeü”ãLÿèkÃb}ðùÑÇ mxåÚ°ü°[’¶á•φ ”¶1mQÚpv:ç@Újèw”¶áòÊÚe..‡S&mx'¾eù¼g@6zä,a}Nú\Åå hÖ¶Œ{{mØÀ—”œs6¼óÙ°¸Óþe’½)÷?·VÈÀBç¿B3ýÿó@m€sy;È}wøÈX¶‚{í3àöü 8ÀÙ7¦’W=Ž×ܘsŒ Ê.º;Bè¼!wðœ§èF1ÀÀ-0uX=É;p©Kh¸‚Z¹€·4N?¶³lLÜx‰-wàŽh‰k·¹|w °$B¥¸ù]XPï¸Ý1Ÿå¥'nË$`ã¥NÜÆ ±Õ±Ó«ëncpØgµû|Á6Wø• £¶{n¬¢¢†m\žKàkôlw³V±ãÛÛ]L)qÊÛ^v”E'ÌÀ6úkql¿ÛD+|%7^° .ˆC Ù$… ¶ÙQÀë²®šmÂÜv¥ØÆGD|\Í!\°þ°²˜óœ.ØÆ¦/è­ôT.ØIÃ=…‹¬_¨Mþ†¥é nP»q7•̹ya7nžØ–dxvƒ]R_§¯Ü±‡snw`7_ß Á^{;±ó)@{3ØÕ?nç”óU”¦ðÖYÀm–”ïàÍþp`A0Ó ÞXe­„i 'xsúZô˜=¥¦;xóâ/Á‚)MÞÁ›ýå‚ÙÈ:†ÞøKl„šUŒ2àMdšÌ¤¼Ù‘ë¸<ÇNØÁ›[¦g|EÍýožcÑ~æ ¼@8䔓²à¼yÅã·„Cày!d7z7Pƒ‚ŸÓ͹+vzÄ,çAŸôæJ¥ÈÚÛ…Þ¸¬]ü¨4Ø¢7°BJüÝè]+HÇÂɽeÑ¢§«MHʼnÞèRdn­÷½;·66|½Y7„ç±,ät£7ä„MŒCr³î.ÒlÁ….ôFØÖqÞ½9V^\½\è]p‡:ÜÙ`7zã‹`ÜÊÞ…ª¤ñ%¢òÃù…P†Íû5«ÆÄn`iäÝJ~pb·çEßxi;±›k1>&,ì‰Ý¾€×AB#¿&¾ðÖ‚`íåtY…Iås4&\¼›Óƒm ÚÝ “ò€¯Né…I¨à­ˆäcx7¯-ì>W/è¦LQ²õ©KÙõ%¼a2$7¯`ô%”¥0¿Í… »ÙæpH’oùäÝ"æƒx;¸Õ—ౌ¯‚@›.ìÆ0r ¸Ç‹xcaƘ3wëKІIZ¦ »9<ì·~zcw Pó¨ýâ6Šÿ¸²ÛE¼ñÎÒ ®­xï@Yó¹”»Cúà Gå6»ñ€Ï D^é§!Þìæsëñæ„c) ¡ÝØx´(¹‰ŠÏb7˜H Ø›N/O«/IÌ­zƒÝ2‘ ¦¬Á`·'MÆ™¬é‚n4QÛP]:¡Û{vÍ=Ùnèö…à‰Ô›x{`"Ö¬ê¬Xèf[Ê–xC7–CÁÉ)ù„î@½g¡&õ†îà¨lLYõžºñƒ2 €tƒmtÜ\­Ý†>Ñ7Ž;2·œ Üe°¥¸Ñ±[1 ùn´õˆýÙÒM»ñ%¼ªû˜K «Öœ¾F»ÕO!öiω–%þI?º¿ê§ðY߯˜•ú0l¯½Ñ Üüž_¯; ä°ù)¬¯w;G(3»~¼í§0ƧË>÷SøK¼€1$ô9ÉßõLˆ`å¼$ x|É âmÏ„çü­ƒþÊ’•°ó÷ ¶Ñˆ5J/™waÅË÷~ß7–wë™`{Ù3a_S'Hã©›}o²Ù¡™lÓïë“-Ýo“m{ÿºghh='±,¯y&ØNÞòL0½ü0Ï„ï±üãe⚘Xó÷XþÝnùÿãÿxÏòûaù3„ðY)É&g H7M!Þsƒ$JˆK§CMcô¡9‘%Ä)€°ƒ™Gþ&ÍM”§\@ÓžB—Ç(".± CžÚ)â$¾ÝòJÉ&‹h™‚±üƒwð….öÐÌM­åÒv6ˆYÆò©(Í‹u?QD\rè'øº¨Ûh®[r6 d%Š ^Ú¨¯œà•D'È€IÌ= "b]rä>ˆØ ÂwSÚ-ÿžúЈ…î’±ügªfR­¢*L¼[þ!É6ŒÏ‹J'ecù/_ À;$ÄôRAÍñ–aÌJëcH¥ Ù¼hC*5åø‚˜7ÅôR1tìlHÞ²zÕXþÅ%,² ØZþ±¿RV 8Uµücþ+­%"ú¦vXþÑE—»Ë?4dÏ|D3–líØkH"(¥n,ÿ§!ªâôË ûaù§Y*ªV0õÃò_ êÆ,bði·üC¶, N]&9;cù§ª‚Y½ÝaùÇ\€tÉ·d¿Ûþ©{‰/iÇÛ?Ä©ìÞ#ŸB©·M˜Ç0eØñ¢½Ì”½x#©CDÇ%9]ÛŒí‡ÕgG£´áò•¹‹sF.¾Ð>öhdYñ ª¹‹f3Œ¡©GUÚ8TDÙÛ?¶F(JlBžkÑfª¯A*8¢8Ç~ÙmÿV^“è6r2¶Oñ@¬Ÿ™é!ç¨'•í.<² ù°ýC2­=‹.%ãø{ cû$ït¥‡*§N.Ø(—G˯ZþàæV/ÛðÂm ÍfYæãÀm|)ö'槆ۘ~L›.ÜÆÚ`†z¬n*Ɇ;—n*±…ѦG?–À =AÚ Ü4NÐ ¸Oà&]ì cuõnš2pÃ;±…অˆ[\á¸y-q¿TÕ Zàî´½Dì¥Fü« „Ïâ6û+Z?^ïvŽPfvýxÛ|4Ƨ˙~°ù@Ð!^;õô²ùrHÿ¾dªzÛ|ôœ¿uÐ_Y²¶cþÞùÛF£!žrͼ +Æ¢aú}Ý¢!Ýo ÛûËæ£}Mó;4“mú}}²¥ûm²mï_7Ea €p÷×ÌG¶“·ÌG¦—WÌG ã@ Øäï1¡{c>úÃ?½e>*Aö@þ‹˜Š£”ã(†Ã‰DÝ oíÔÒS΄Ðæ'ÁRÒd´\cÃd_•¸Ë±x«5.®DõƒÄ¦ŒÒ†] ±N)\4ñGÔB¦ŠðÎB ˜ì•ÊK>8(ÐﮄČ÷ÂeÒÖ>áöâð UâS(l”¼ ²bÐÆÛ›!«#äÒéD¼ &õƒ\*-Ô££„d ý¿Å{àÃ=*Ê K¬’¾³ì¢l‘¸éœ‚x•Bôþ˜<RfèDl.u—d[k§/\ÖÇÔr¨t°%}@õSÚ.ɶÑ’Á1¢¶)mei‹‹XÕ ”¦~*p< »"K€Xé»([%Ð:¨«Z è—\ÀÇbmÉË6ê*È&ÊS˜_ÑÃUJˆs»B¢`Ï–(m» ‹™‚Lƒ=(âU¥„8·+}l!È@Za£ú]-ÜÊâH_¤md±„tÚ&P)"Îý*ß’»o*×°K²U ( i"ïTŠˆc¿RþÅSS7QÃ.È¢;œPƒÈâ•âܯòs(ðkÜ%ÙB¯Wü3 `o’ln¢ŸÁ;5IŸ‰yN¤ÜšŒ{ú õëêŠ?n’l‘XíÐ’ Õ¼K²ôZ…Š1È·äM’¥³™ÕTUó.ÉæNGZ Pöy-»$›¹Ñk¬EP‹¼I²8þ‰L¬e—dñ\á|iX­»$K%M Ïk›‘d!¨‡Lu…LYÝ%Ù>h[uª‰—ëU$dkÐQ’ý’õèÂmÜ_8r*ÅXÜN ‘ŽÍ‹‰Îâ6Ä«”©v 7p‹ÑÖWUkXܦw×p;p;0â,»<ðÞXCÍÅqùÂí,ÊHœ»rá¶ÇW¢·EMoqÛÓÞ,1ÞþÄmñfwŽîOÜŽJ"‰™ó'pÓ?¾0^AC pó9 nÕ;dn>T+œcp[šr¦ö?¸Í›‡!¸9 lÞc¯±û·å¹Œe(ºv;nËs¸é*åÄmi£Ã¡úPܖȧWë‰Ûòö<õg'pó9@ ¾;>È­i!0Yrà–›83b\Ñ˙У¸ âÀm6‡µ bá5¸M‹fÄLÀÆ·i»kŽûl<·á6Û°Qfò ƒÛ`ÑÈ÷%Ãuùjp[¢‹JÀÝX/Ü–çÐAWWƒÛbbdØD• dp[,šêjOÜæ;S£E-”·%*¬¶:ôÛ·%>Ñ“‚ÞŽÛò-ÕŤŠê·9ÓÉó«õFÞq[VèçÕ Üà¶¶9ÆÅö·¹W–•!g'n³‘a®ê*ì¸-6gP— Y·5J3_â…Û4,cÂ@‡ôÊj#p=JRù’û×mGtc»g̘»‘;d‰èÓ(n Ü ˆ _]´²¸QÛ4,Á"·§÷Kî ½>‘›Våˆõƒ™YÆMä!Á÷ýDnL%¸ ¡¨ŸÈ-ñrØ¿M“Íä¦i/2“O­r3: Ë ‰,^È-qv¸ì@ëE¹+-4µ ¹iªb¸AXeÜÌU†£Cðɸiwqˆ"ÆÍÞh® ƒUo-毈3ß2nROl2ÝÊ–pÚö]Ï7ߦ a ê[aÙ6¶ŽšÆál[â³û‹usþŠø°eðáF`¢Rî…o[¡1fÉ®A]˜„­€õ£·È¶6½ zÕKûã¾ ­Ìßüêûõk޽LËèbØöߘz›É]ÀHéeP}é˜Ð¡™¦¥¶ûËÇ)Fh®ã«ÿÔ8ÿîÿùÝ¿üͯþׯõ«Nqû˜½ð+à@W׊}Ц팺jÞqJhW½¡5êÿðýþwÿòƒ†üE3jztª®zºF½·ý€Qƒ°QÚN =zÔÿéÿÕØÒ?ṉ-{f˜íט÷¶` f"îi¹Q¹UƒÔšl³†æ§ý¿£ÛÏÓH¼F÷J?ÌVúX‰·Ÿ¯Ûk=ÕÖsý© Åk:¿ë×Û¦â9ÄÑmýÜXüåzšÕ‘×;–œaÊêWMǬU6ËdåKfê×ëin'sÀ+‹8Ïýë'ÓŒ(Ë.I3å‹]Ú²š¦çŸÀ¢™­ùèÿmò¾¶¾?n/N¹HDfÊMÏïO¹|À>å¶ÿ¯[‘©pø s¥‰à-+òÑË[VdÛÍ+VäÊ<>ÌŲyÞˆœvòß«_)7 Swf ÉMT®±Ä£œÈBM5=Ys…iå*cñÄð†F£ÒjRÁªÅÑfTZôoÎ)øª2s׈ÜÊz3,‘Àý‡:ê" #rE3©•TFþH4‹^Ã0?­;å]s6ªÅ·–‹Whcª|Þ¼¢ë÷,æé;j—­ÚxÄÂztjö®ïV zÒ3ڇИ$µŽµ=˜§:4ÖQÙ.k$3QccïÚ­Î4òˆþ»z »$0•I{¬ì~zÐ'?¢Xå‡aYÒè7ërùJÏߦm»† .3½Œ®‰dâ×%)C£¦ŸßÃeÐY¢ÿB7œ[ýž°+¹íþÛ5yÖíZ.Vû( âÆë¥ö¤Eèªæ )ú’Fc’L;‘1‰òd\´~h dgØI»“—Bœs'³dGfÑÕQ @ͽLu!wdÑ <¥qíeÆ•Ž$Ú¸«¿¨ŸŒô­&¿k¿dí˜95¨PŠ£ÍNfÙÇPÆ“R´qìdF TtJâP’²®6K–Z¯¼¼‘ÞQé¬eÓ¨gœ!n¦±7,Ô3›©¸ª¸ü)Ö'ÇXï?Áz,Xš'Ö3\4ïõ‰¾I¾ü„zÆ=bz»–ô= ž>J¬§¥eª¨ÜÔ~æB= Þ ™Ë¶ÅzC½äAs¯7Ô{ÙhmlÐŒrÆJ àqÆ]ÐÃðl9žèÝi·?qž&c¼À1aò…ó|§‹;»Ý8/•FŸÕ¸,ÎÓêM}²›WK0¦hZDRæ…vἤ y>„ æyŒsZÕØ/˜—`Í„ÑkŠÊó§I#û¸ªv˜—ð<ºËhC óò>æãkZÕÀ<qÁ7°1éês¿¶ å¥1ðÜ„åÅÔTñtå¥6KŠã^-Ö(͢΂҅òR”/òº—Y5fizÞf©Au¡<1 ‹Ø.”×®ØbZhÊ¢¼4fzÇŒÝhPžEP%gq ʳOQK§t<á{µáë òb{g–Âqä È‹Ù75WÇÆ1 ¯£Œ´„]/6hœ?,u¼0^>‡In£ ³(/.¶ŒzÑo5(¯fh0¯¤[Ä ¼Nl,óÊVGGðë,o—X;€žî1õèY›L¨‹ÁòäôL±ÁÄ ƒ'[ wÂ.Y/ö çþìnd <€^â i]Õ9·HOÿÊ>]Hïš,yt)\HO÷ÐäV©Ez©œ Ò–ÃhìÆCG¶°Žk¾‘^’m4íûé¥,`×iÄ×AêÙ™*XkZR¯”¥ú1ïêɰ± pFŠ¿H½Xƒ£»ëMê5¬0cÊMêk‘zZ)¦hH}¡ˆ&?ûMê ³õbòÒ€&Kêqz~[nŸú*ÕË[´ÍzÚ”ñ–æÆí3H½c®tú¶OH½‡pVA•”YRåÛÊvyzñjAìÊ,©—ò<Øšwà õÌ–‘9åã5¤>36?ñôßœ3SbMRyðäôœÒ°Œ#§'ùάWêÍé¹R —d÷7¥ÏôóÀË4ø ô˜îØC#f}Bé¹YöhÈ7Øs¸@ºzQž”žÅ>1«nl+Ké1ô¹r§ôkÉ”æs¹ ¥Ï¬aÑZëµÞ”ž‘º8°µ´O(=}k›–·»)=3܇aŽF˜´cÄ ´¾àAê3OO’tœŸ’z†GÖ>î&IY>è|=¢¥¿nþ¬²å;)ò–šø§ChÕ?½düú¤÷W4’cßmc{£Ÿ5½¤Žéù•Eû)™‘_ªÑä$Åä˜ËùãõnçunŸ_í‰Ñzu¼cMçOºý^m,®H–Ìd:g¨@_ª,ÆÁ»UË?ÖÔx R<™©.¾Ð×릮uŸÿʲSþö9ÜÇ£Ù&¥rí»ÃDÈg‡îÍã=Âøêã=ÂøÚ÷½â {o ¥Cwwøêã=Â÷ÖP:¼G¸¯á÷ã¥äl¡1‚!Ð[—j*)ÿ-q¶“ bX]÷Õšÿ´!.ü¸^ÉX ‰DõO÷’~T/L %u-º—bzÿ I iý7~Îtü¨öÏ7ãó)û¿ð.J)¦`§¼Îú1&±œ_R🬟ò·¿üÕßýý¯ñÛýÃï®oÚ}JþìTñãìr9Ó}™µ„=Yë'•õöÔ¨ÿí÷¿{/¬ÕÔˆ'[êQS¸5â“”py*…öñ‰yU¤¿¢²ç^#ž•4“ä§»F|’`”JU]ó^#>IIçNý„È,{xVßô4Šÿ€)©¨hs,zoS">Ò „ïm¹hùø§ÆäQ.ãjÚkÄGHV×ÜH¿gjij™YÆLøHßÿFƒQÑbï[øH—åG߳׈4”PÈÔS#>B–…îüèn¯Ïç¤$“ĦF|-Ök™õ½F<ÛÄi|<·×ˆØ¤,ùÇlî5âc¦í¥ÐKIÛ¶ñ^dÛxn¯ÏïÄÍÅÛKÄsÆ0+Ø[‰xŽÙ%ðD5i›ñ|€82W{/5`—UÙ¯ñ|®ç dJÄGFiºXøðÛQ"SøA}ÙðB{‰ø˜ÅvX½jœM‰øˆÃSkNó;÷ñìϳ¦¸“˜ñ|e+…9Ðd>¶ñ‘ªB†(¬_¹—ˆ™z—[¿*ij d?¶"m¦B<ÛîÑR‚ç`¯åwoúxäf¦^ %‹\ÈM!oÜ‚¹±˜ >yE‹ÜtÄxÀÍ ·è,¢¤ÆŠr;MøÒ'âï*¦ˆò³>§U‘PóÔYMÍ×¹Eòbz­mU$`uX×îz8‘[÷M‡`U$’«{î†nQƒÄµ®—ŠGi¡À®"ÉR2`IFEBoÖ¦ÐdU$’.¹ÓÁ¢žÐ)ñÐ=qªOv‰¤KŠ•NÌ'vSüMê¿Nì¦Ô¼QŸCEâ¹÷ý”3ŒŠ„/aò¬©Ùu$ŒÇUQ†ZCu$R!gÁ/£svóht$ ö¯}QO£#áÝ9dw«#ILüÝûD}ƒÞI(ë­Ž„•üð7sŸ‰LÓ"§‹s(0{yˆ•‡’$½—Q’ÐÙTzi•$’ ‡B£¬–„ 6c˜mVKUïq§)eµZ²ITj0JòG·JV-˜‚¢äÙ(I"¥¹ÖY‘çïÈÌme}‰Ñ’Ðͯslµ$r?®˜ra7‡]¦èvb7^™}iCŸf•$’<».þo”$ò’>…tQ’°›Òª~Á—ý5bŸ–MŠ)j ãŸÔ–*•/úk|Öû+†ª>mûÏØÞènþϯ×'rØü5Ö×»#Ô¹}~½î¯1†8Ö4ü fªF0Áb¬/ûkàhDyÕÖ/ù†¼í¯ñœÃçÄ¿²l%lçýE¿m< ‡´ð*À [¿éð=[Ÿþ¦Ãmý}úkØ_á¾W¶¢£ï­¡tèî_a¼GøÞJ‡÷¿ä¯V@yÄ·ù5 ÛÉ[þ¶—·ü5L/?Ì_ã{¼!À€)B”.1‘ßã áwoˆ?þ÷¼!¨y²Þ˜Z±¾‚)¸Ã"> ÕDz©½èFÒ´.‰bm©½¢ÄdMã~ò[Ý*¾$Æ¥l¥jÚ-Á‰¢Ëcg¡zÿQÒjZ¼©¾RKÉr‡ <_¦E=…]ÖÇ90Ïí²~_û!¸Rw¹b8¨ˆÌ3©š4ê Ýc^fú³Ât’2-¢órùÈ ÊQ9@¬ˆKp" Ãú+ªYŸŸHÍÿ)wO•÷Ô3SËýÈú4ÿWæ­Û9ss‡0vÃÄòZKrÂ?LmÉRÙêVñ3!?O5G*[Ý*Ï?"q*’2lˆNB2+zjª»¬Ÿ«äz¬T,ú´g§:ý!‚×ܳ©í‚>G•—CFjÆ‚!;[Û.è£ 3<…·Ô3„ÿH&?uã X©0 ­HêÆÂ1®Š¾ìŒ ŸY*GM×™Ý.èKæð6}f²Û}™”2u±Ùï‚>3r/?•ìwA?axe-vö[Õ*¶1½áp\Êa«Z%.ašs0r~bRA̺¸z0]Ï Ò ŽI"™õTåhÜ!øÊÜGÝŸ²U8C­“£q‡`‚fÌ‘sÚÊV±»Z–•,'ãA ›ª¢œvIŸÚ¼l­9o…«Dm•VÜ;D£€6½jrÞJWq ©®rs£›šý4‹¿åqàvd6U†]¸Í óØìãXܦ^dYþ-nS;X™éP £·÷›>Õà6Q.àL&5[[ܯ9 ¢*F n§ØÑM»²<·ù\_"QÒì¸Íǘž`¸±ܦmª>i· nóG/SvÐæ@.¦ñÀ€6 €L?4Ÿ;hÓJÃÒߪà1˜-Ÿ |Mêed0[LŠîY6îm7Ì/ŠÇeË`¶Xpv­Z2ƒÙtˆH=+¿l61·éhªÆ"}hf]ÓzøCø§6¨"6­‰>,Sœm®zh⃓OЦEd¿i›±­¹ÍÌm0›¯Ìq™Ï f‹;Ä6ÃÝøCt©=º3˜ÍwbScªj91[ Eë†6˜-ŸùpŒ³Õ|¶Ü# f‹-è9R³¥í±ÛÌæ¨²ÇÍ€vÒ—Ì d@›Ï1'ˆS‡ÚzIËumñÀh©¸1cѸDTnÞ:¼f h³­Çe5 - 4TªuЀ¶´…eZ3 ¤ît^+”OçÖϛڀvòr>â°‰ˆ@ÉÌ}qª}Ñ´vàvøheø&ž¸Í\àkÅmŒPŸ‡aÔâ6vîýyÂ-Ý–z;sÿYØfòö´ncÛ´™2ÿ°2t›I—×>²tÇ×fìãšØévca:á–n×N_µ< Þ–n³¨NÂõ¢fXK·›Ûè–nKúï8Í·†n7K Ýn’K}zEXºÝüã:~ÒíÆ>ËcÊÒm Ï?V>K·›3ÔØÐí BÛs³t»æÝ¡ÊÒíŠc‰#}ÌÝfª¢Çr=è6 ·L2þÒ0n&!¢R¼°›ýl^Ë–pWÉe¿nbC¸kÜ¡Áî‚Û=d§e®-ß®”+—{œåÛxe¢'ÈL÷%Œ='K·+‹½´ÐnèŽbC\þ*–nSªä׿o趤6ïÓÃØÒíÂÔ§Þùv¡7ê²±Z¾ÍPïÇ Îòm<×)Ù lÛù6š*ùâÚÕò#€®]ŒµÔrmnêÇ l¹vaª¾E-ׯsîñr³\›ýAªÉjµ\›ú%ͨï'×Î’Ô?g5Ø~Ù¬VüT@Ó¡Eu–R4\TÞÅ¿kVû¬÷7ô‰sßmc{¥Ÿº›Õž_¯Û·jÚÌjëÇëÝÎêÜ>¿^7«!Ž5M?¬–Ä=!±¨4ݼjVK2 U†¥ý™ÕžsøœøW–­…í¼¿x·ñ”gS¼ 0j’±¾f’a‡ñá{&éðá{fµ}¯ü$f5éÐݾ:Âxð½5”ï~ɬY §E9^3«ÙNÞ2«Ù^Þ2«™^^1«±Zä<ÈÇøß÷˜Õ@Ö7£Úþéµ"-”žgU¤D+Ð #€„#òAÓ .!? Ò‹s…0…üålnËZU¢QÎôóSÇ‹"8³ü ’Ǧœ•ºau*g Åç‘PŠŠƒNÁD5;%íºYIgDs‘ê£#]Ð!]NkNÉ»”ߥVËKKÎO!/q¨î+Z­äöò¢_ÃñXŠ‘òÁ0žÈÅk%E‹yÆVä@)»˜ÏìP,õ§ÚÔœ¸SjbÑ­œg¼N©FʧtZÒðTÆ <…¼(Kj²óÒvŸYž˜ˆÒvÂ.$/ Ѷö$BÅÀRΖnä{*?—¿téù)áE™œEb‡>¡t‘ï©–ië*%ç¹YEr¦¥J7HuFÀg°e‘­çfeGÖ©ú]¾—P ¤­~—ïk ¥ 3(Ö•¢óÜ«Tõø¶•j0>ƒÐk·•¢óØ«T…àìMmb FÀw{”N¥ä<÷*G°™£j4¾cµC\£‘ði˜‘%5"©ÉÈ÷¢”Y“’ù>æ‡wlÍF¾oOÛvùž¹‘iEwÍ»ŒW6Ɖ¨sl-»ŒOÝÑãÃ3°ÉøE*m—þYË.ãcßEFx«a©Ö]Æ/´¼`j¬u—ñkbæc P¿¥î2>_Bïq=reS»’üŒËžvA¶gXª.Èæ^[Ú격Vô3ªÚB6 ¾·4ƒ,d³í±[ÈN”š²þò–µ,M¶lú¹?±q²q{l²îϴ̲y³Ì ¯éÿ”±Ó¯ÙÆÔ#u ÞŽ×Tß¶¸RW¼f[®ËqÞà5uã®2Q¡"å†× tyÅé¼Nôç~*Á¼ tÓaÁ€6Ÿ«IÓ€6ûc|Ïð7 Í¶ÍmP›ïܴεÅùœdEm‰n£^L? ¶hÖŸ ƒÚ|Ìn$[0 ­æÌ}h@[šÒ–mÚÁ™$ž ÍOÏÑO¯ Úü’ǽ`Çl~/eæ1˜Íèô­æ²ÁlZAY›sÜÛ³i‰ ¿˜_A­ó“Ó8§´ÙæWزm¾åe1 Í!ä'ŽÀ€6ͧå¡H;hKo¾…~l@[ç‹ÑÇB= h ‘{ÂC h³ Ûk^;;fË„=A³Åtãf„l~e²žþ‚löÐÛdMãø¿VeæËf´®¥€°º|p <Ûâ­,\óì‚bL*iàš9¯|.®qvËi®=/ÕÁ°™*b1×R: ·º…,\‹b…G[¸fmbì–a¸´pí$ TOjê0ˆM3$>÷ÏÁ°™f8˰ÑVŸðI˰YfâÉ(t0lož3 ›á]uº [’-Ì4K–aÓ.•ݰ Y–ͦ™ädÙÌœyŰlZ_Ńf#Ž4›.à pÐl¼Ì±çD^C³YWX5ø‘¥ÙL¾„]¡þÍf¸ÕT,Ï®Ü3i†ú<›¡_+fÙòl&Þ~²#<œòñ0<›…y÷&óÁK´Ö²¿x6«td?wúÁ³;c¿fÉõƒgãÂÄ,øÏxvÜÝù¢XºvúYD»Ñ1 W­ƒhCÂð+ÄÞmºGÔégymÿ#•%Úø”'«‹åÙ\ŸvÐ쌥[I–,ÍÆcR@w¶SŸç) [š ¾ÙÙ[h6£nûJDõeSZwÉÆCU**µ|ºˆhlÓ˜öiÿo¨ç@¾ÛG÷JG½læ´ç×ëv­žûcN[?^ïvŽP'÷ùõº9m Qû]?~ˆ9-‰·e®†^6§á®a ‚”=øë1§='ñ9óï”xçüý“¸(ÿ5éÐݾfŒiâLuøž1F:¼GøžAmÛ-]*¯¼nQÓÝÝã»cŒ÷_L-=Þcü’Q Å2,-SSýšUíèå-³ÚÑÍ[v5ÛÍ+†µÜXY3uŠß—½—äÿ1­ýÛÞ‹Wkt‹5¦5Èy!Iõ!ˆWÖ´æór‰li«=DOæ}™ŸZª»i-잢V¸]ðß’Ê6æ-{Lk}wko¹î‚¿Û’´â6Á?ïN«%’ÁŸñcm¦MâÁØÿá˜Q5ó­ºÝ´ÖXcÒÍÏd}¤Gð§ËñR85JÃKŽj»)£5·[Ö°ú®•á‚ÞÚVnˆ‚n˜24Ôµ‘–U%'å̤ØúSlˆ‚M%¢ˆQÃ6¹_\P—¿hc]¤Ý²ÖŸT–]c§ö˜Ý¥]ð÷ø®¢›…muüYŒj©;»w»àÏl+n*àiÐ~ÎvõpAÇá‚e¶+õ‘ï\|c_ó+óIgÌÅ#ø·]vì¡ZÁSk÷è¬-+<­E§üq e*œ4cô:3À=‚¿¨0¦/O[y!JéIaÒSÚHÔ%NëAgŒÚ`éw;SÏ[m!f7Ú\·ñ±»àÏTq%Ï5e¤µcmâèÎÁµcä÷ø`÷²âsäÃ#-R/[]!M§´Br:í‚O:%ΧéÇÜëSUˆþ¦î !cNŠÍ–VË Òè,€´¶,óÕbñ†¡³þÑãFKo^\vêbŽ¿Ü3øó™Ýín´uÒè8>Æv³Ôvš·Ù_JáÎÄizƒ³j[]vÑ·llvç'³ì…Ý&ÁÑÝeK-{@7UÑ+/³…nQµ™ÐÊB·”EX©ž-tǺ»0èÆAk´¬è—ä¦VË-›ªEnžÏ§\œAnÕ:cÞ]:›ö°ø$3ÀÍÇpbgÄ—AnZ²[ñˆ¹%…/£—uïä–´ƒOæŽÜ4å²¾7ŸZ–Ó¸%ö¬Æ²;p³­ñ¨…Á7Súø`€[ß)JÙ|·8´<ù¯ pS_«’·Ä=©£ÎÜŒ—ÚîiܼޥØÚ@ĸ%Tìqø0À-´oÐÜÒ_p~ö··„­mGnM‘;#&vÜ–<5B nK†Ü'cÁmÍ»\ nÓlÑ-©Ám¶a&G>>Û$=þI(l`[|†žlo¶%rk»±vØæ;³”ݰÍùÒ°»¶é¾Ô¼Ÿé» lK˜sæOhN6di+³mI…ùDÕØf[|ÜØ–çÜÓßÛœpñÁÍT¶Ä_iÜÆ%Ê/[ÛàNOv³¸›Vú9 Üj´ŸaEr³¶ùL ~qn·qu˹ýî€upîÈô§3øÌwKX᛹e]—÷œEnž §õÅ"7íž,­º÷CY¨¾#7(¾-çîbƒŸ“b(wgvžè`ø¸qRÜ€Õ 8,åîLv·rÕ[Ê-ÕGX¹R&ÅRîN[OžØ–r·ÂŠì3 ÊRn:Ϲ>ó¾ZÊç<0@mðãfÈîòŒß¾ùgYÆÝXˆ"NaÈ2nÎ)V5ŽÇvÂÍ~ŠD¶„›žiý‘$ á&‰ün,áf„\_¹B,áfžØÇBgw5ŒÍnš6¥$§ŽÎn¦L++}¡%ÜÜ„r3¦ ¸)Ô`GMyÎn:Þ•UÀnœ‡ð$f°„3ï'K¸i-|²ÇÂ]ãRl 7SKK¶+/‰å¬jɶÔ^É-Ù¦ýôñвd›ã} Z®ÍÐÆíê4\»Rg²ÒC ®Í„Ëñçë¥;]^–.õK›U„󻦶Ͽàõb^ƶg€¯ôäýnmÛ~¾n÷wÚìmϯŸ c¿[ܶŸ¯›Üæ(gÏõGݪ$³€´Ü_6ºUŽC¨ÃíR¾dà{½”çv&ŸóÿÊÒÍ#ÿþ™4cÊ’Í0Íœ‘ové>éòE£Mž¶7Ûå›V›<­oG—ïrß5¾ ´Iáð×RS^|Ò廣ŒŸŒòŵ”.?å—LpH!uCViz¯¾çÑË[&¸£›·Lp¶›WLp4·A–Ã;Á¿Çgìo|¯z¦Ü¶Ì’™%D•ú÷Q=‰hBE2¸{ެÿÂlyiÕ%@£QãÒ“qÕ]b‰Ÿ]PèÚ9}Ý!m8©Á\Cº¬[E$M¤ 5¡[Y4*l·0k ¹¶ë<$ ?i9Ú‹ºÜ¨wr}Ï…Fc‰køÀ©ýó®ïj-E² V8ˆLf£G´³ÜЏCc•l6ŠÅ=mÞ‚ø2£`Þ‹Uƒ“Ym â ÛñV|Oò” {ßH9b㈛V.4íÚÈ™‘ÖÍùRÈÝÌhS0 ×ÁŒ¦©÷”¿uΣušÐX%¹lc*e&=‹­ª b7mw] §~ª&ÐÆ ’cSíneÂWmÛƒÐhÔ~‚dv9©¡Û˜ˆO‘Yï)¯mÌ>ⶈm×HìÜR² qWTºÙ.ƒ‡TîkÓñœAdjóDã¦4(mסþ`wÐ-{HlU'5$t#ãI—Vú4²`çÜÈ|Rÿv×ðoÓŠÃrïjƒÂ”HO2,6ä…µƒq0N• h¬RBBw0_[ÒtM^¶\\0Ó«ýµJ›Â§ëH6t#<·å²Ïj´~ißö,‹³bšâŒw*ÅÂ2 à¥LÖ µàm¡8 îh̔Նq€;C¿@òÔàpˆy•µà.ù,Ÿ’Ü¥¸Û“çË¢»f ôÏþu‡ÉŽÞ aøŽîÚ¸‚ º‹e.¥i ³ðÎÆÍ:jáêò­‚®…÷$uVÂT`[x—À'M_›/xgãæÇaáö¹ÞVm" ï¼ÇŸ$–ÝùÄÕlÑ],fø’¹ŒÞù¤{" -¾ÓF—q",æÃ€‡ æ îÏ>ß3T×¼˜÷ëªxÚðŽurv€ûž[uœ-ÀÓ>Ö}žéê,Àk­®• Õ<|q ¾ž¡­p9 ðìo™©¾,ÀÓDŲW“q€ç“.­\Öà¥Næ¾b}·ç¥ ìsþxÉGðô´/6ŸD¡àù$Ý­šFEXœçÇ&îƒçi¸ ËÔia^ Çm»ËÀ<Ä:,J¢–_ò·UYèÍ2oÒ¯ÒôÒ'R™èf9M‹ô,eÌ<㎷HO\~ Ÿ\Þíz¸g(Ý£È?àž–ç¸Ê~Æå{:¸<Œ34ýäò¬ò÷ˆ6p/+â2ž\ž™òV°æÉå™Ê`¹)d¾1±þŠ÷:ÈÉ|¥vÆêžd¾ÒKâ!ÊJæ»—ÌãÈ\^RcN?Ž“Ì f–Íç½æòAç)¬ìƒ?éüæÝvÒy†IV¿„C祴Û*6}Ðy\Ú.¶…‚†ÎsËï<·Æ¿­ÖëIç™>c“-geãG¾¾èüÎø:Oûø¸] •gU4øI/ •¯’\f„¡òýKæ‡*ÏÄ?On̓ÊW¿—–8©éÏÎA…”[({5Ò9ĆJûdöm¿ýýïÞ x3µØI ‚×¶tÖb/EŠIP:Ðzë[-ö"e¼b¡(,m[-ö¢<ø—èAhë^‹½ˆ2%÷š¼ðï½;Óì°†»kJw÷Zì…·:ÄÌ¡”ß‹±ãm- Éà(Æž)µBÂ*í7[ŒM#OÙ]ÅØs+ Ò’í*ÆŽigí- Zì™NœØx×bÏtº[Qж§`›˜ÁT_j•ïµØùÊZX.Øk±CЧ¨ ùvÔb‡üG-‰ÊnG-v¶áß1©¡Ö[ßj±ç*:ï|Özë[-v~§ ™¨|Ë^‹]¾¥¸X Ñ|;j±gr›'••©Åž™€¦K!3­áÎÀO:`{1ëè·£{–ⱆ(úJS‹/‹˜b)mÛ*Fp`­C8ïZ}¯Åž%’o•í1µØù¶7vÛUŠ=Sêk–«Ö´ßK±gš"¨ËYË­o#Ø]ô§Ö9ÞK±³-Œ=I¾³½;GÞ¥FÒ¦-ÑMÆ4àKv¨VpßJ±gš¯fi*Sˆ¨tC ]«­oynä©å ¿bç'vOÅ&w©ÃΕƷ;~;ê°³ɰµoGvNrÈ’XêªÃÎî°Q±(Z~¯ÃÎ̼k­ëàöBì|g̘)‹j ±ó[ É{¯m[žC’=ùí(ÄÎþ0!˜Z?EãJíÆ— ý|Õ†fA;Ó¸‚‘ú´ÅGq6·9ãƒpH/Ð._šB½@;‹Í ²‰j0 hKz“8”²ièÓðÅ ²ùFì4 A*œÈÆÔÓ º:î]R!ŠyîʲùJˆo¬êÖNÈ.™T$éûÌfö³ˆ“ÚûÙìnDgIJKbJ³ìtoÄæ÷;@$îüt"6;ÃÑxôv"6{˘…æå Ä.¬–ˆ{1‘Þ bóSxÛÆì.Ä.Œ‰q}Ë b³¿PIûÜ…ØLǪz5û°Ù]c0RN`—$‘C5êØÌJç hO’ód›ýxÆ´îOÀæ˜>´eaаٟàgV¸Û›ýQ †cQNÄ.’;”l@/á±Ùþ U°Ä ¶Ì±o®½MwÄæ·xïb7æ†Øü”Ô1¼’ʉØò)Œ©¢ù3ˆ-s"fËT?­Álö×¼âÚÚì.»Ö²ôf@»Ä.¡’ƒwÐ.b92Í Ú2ð,—f:A[!uZ Úü”ZS` È Úüü+pfYXÚì²\sYôº´%õa•ôwámö×(¸yâ@m¡¨­aÌ_ó£ô_7‰]l»ãƒS+7pÓ5e‹Àíö8ä¸Yfº»žÔne€[Êv1Tß7Øþ &WÍ2ºñ΂ap9ƒÜ‰v§ænæhölBwôÜx!á¶´žoà.ªñn:õa³·æÛ ÜLõí°5ÓÜ…Û/ê y 7‹3»DUIâ„î ™f„Ç[èf]P@Öñ†n€ ­ ±ÅvA7:¸!1þ†n0º„_Ž t3Dw.(Lº ›å†S ú”nI9ˆCÒõ2°ÈéÐn “¢ÈY;®&f‘›Æ8¼j~žEîN»dfø ¹±ÅBK¥^È)'`‘šnƒÜX¦¬aúýFn!±WO¸‘›C˜†^I¹Ñp¬Æå ¹ÅEs«^È]yÁ›ôÒ²È$‘›6`¬TTDxÓ™Á 7ËàUÅà ¹=ýCLÉêb›º"Üm r‹d)’^½(7Åi,¬k)ÝÈ-† ÿDîÜ!ØÄ†?ÞЋ´6œNìÎÌÉ[›C ßõ$Ed3Ü䥜ØÍ6ˆ_yi­žÝ¥V£K"‰XE‰Tœò'vgQב¢µKQ¬.XøëE»s¦{óò—°Šb°gBçxB7FÁ¢ø^FOÂJê@. P;¡›“hzÚÝ|Ÿ-e°Oè–6ÈÖÖž„’½×RŽ'xgÑ$Ja°À›éiónéÖ“$ú`4t W®Ñ“ðeÕÕ!Z= «¨c¢q.ÚÍA;þ4üÄêIø\µŽ:ÿFQ"UÔ§^|5¬¢Dä/ÈÏcM¢„"³JÇNðféH,zÔ Ù%‰iòi÷íÎdòoˆéo¶eægè ‹FUBJö9è™U•pèM´Né@o~J×cOôæ¬àúŒU¥«,Aw1&J˜¬²„…Ô!`Šã‰Þì[6iåP«+Áµ‚ XT݌ѕ$­W‡¼e¢›Ãñåo>B ±»ö¼e¦N`a“v˨ÎBH?È?!öiË£/žZ•ø'µ²õMÿ„ÏzŨԧ-ûÛý¤¼û'<¿^w m~õ»~¼Þí¡Îíúõ¾ÂâXÓð#üÜè©ú®=?x_äV¿â ñ¶Âs ŸóþÊ¢•°öý ¶ñ‰œUaáUx±&sÓïë&ónýLïoû'싺•l|o²¥Ã}²m¿oO¶lÝm²Mï?À?””Ú&qŒ}Í?Átòš‚íå‡ù'|ùlò ƒàRþŠùÿ_4ÿ3—«QH‚œŠñW¢3Ö–tÀ% JÚDaqéwgMçD8e¼jÜõ;2f*I¬–zÇCèË,µ¨¯¤°¸Ô;,XâÁ†•ÍKb¬G¦m,.1tÆßlÿw=Ø®ÒIÄõØþqe…%ÓJM«eûO:¦²Bju- ¡IZ×FobiË┯uu©Ã9œ–„ ÑšTÎ dË%Ÿ†éRWQÒ2VÙïZ€V˜ÆIFŒñ2-öNj¢dKÙÿ“»Îà©·ÿ!©ˆL"mMÜñ‡9ür…Ó÷©lenÄÜWM*[™ùÎê³kBõS—øiN¥,æÑ"‚rª‡ñ ×Ǫ±ýÀD#'ªÑTE¦•`K©ø%«Öv™–@ˆ˜ßÞŒíŸÚªV“š±ýG9 É'ucúwn‹="¯ìÆôÏZاN,Ï©Û£Ó ³2qú³;mÿÒªì»ìv‰–%93$>‘¥²3–‡³ÒGÎFö»å¿|ˆSª„ngo ÿ$„I(¤m«pÃÎ ÎB„q+‡­Älƒ¸lÁ9ÓgÒ©–Îa+q#þ ûGóläh¬ÿjEBÈâSâ†ÕZ°—E÷™£1þã1–Æ*²[sÚJÜPÄêzŒi—6cü‡¬ èW‘Ÿ©HØŒÿ’ðuZ>¨¸X>÷Ò_g^jQ ç¼ ´…1m 2­­sÞŠÜd ‡`‰2OeÅñ¯=~‰~Ñúc¶`‡Khã±\=:ÑÕYÔ–ÝýÕ-jÓ^ò´°{2„¼úfXØ&Ÿð¹ª~Ö 6Íÿ %„µÅVŸX§QÏx8ÌÿÉõXÆg†ÃüGØÍÚüJŒ[A/‚µiþÇ×¥µÅ"¹¦%…¼µi0ÁQGôöŽÚlÂ~˜Ö3ƒÚâ04¢²Å¨‡³ÝÙâkjð¹a[Ì)q9£Ħ-!Þ'b³;@+6óhÛ[=)p"_ˆ-µâp“õ–/È¿‡Ö’‹¢.UÈÃ, ÀNÈ.ZÉ“aõþ„l¬2¼¶*!h‡ é©íh [!˜Œ1ç²ÅZB¿1U¯ÈKæjXJ d‹Å×óp60˜-óÍI%Ÿ -CǧØÐæ§`w—ÅcP›Ÿ‚# TKjóSŽ‹úkÐfoXþ¹ hËÈû;@[,î8§®W0 -ÖÿXÛ°±ì -®ày®ŠeÖ€6»ëa,äÚâñÁB‡µÖ´Õ_àÜÚò)€Õ4n@[úƒ,PTÕm@[Œÿ𨀶zŸ”ŠŸhóJP÷rQI‹Û µÃ%Ówâvd¢ñ†4fq[òmƒ–~³müày–'n3™@w e¹è6)uÕ=Ñmmð¹ã˜Þt›î´IÏÿE·Yf°²<µL«¡Û,ô-6%Y†n^u¬îSº ðžö¿ƒnó›SOêké¶°%š(ù ÝfL©oy\â–n3? ĺ)\t›q³ø‡i»è¶÷ø–pÓm¾Îr”:Ó†n3 (¦›õoÎ&ö hK·™eYÑuZ,Ý&&0OÍèƒo3?Öp+|›n™š§·Ü|[ü €íý¦ÛLÔͨÔoºh‘XýX¾çÔâo~J¯LaË·…Âã5­ÄÏø6ÓúhbfË·«ÀF`ÚŒ›pó»’§qó"ÜLÖK*.lâbÜØ‚ïÖnÆÍóAsü'Œ$TuI –qséÉïÅbhw¡UÊ WCK¸‹ÔzRévÂM k£ñß_ØMRí*ÚŸî:¨b¿åÛ¬=Ù“¾ãäÛ´-ÇŠ3 §Ñòm»Çænêß{1rË·™/€IÑܲó°¶!6è²?u­ ÿU­ÿ4ÊiûwíGŸõþ†ÒoŽã»ml¯ôSwûÑóëuCNM›ýhýx½Û9BÛõë}ûÑâXÓô#ìGL9FD/QÒ»ö#È2LÏŸ˜Aç+¦ª·íGÏ)|Îû+‹ÖÂvÚ_<…Ûxʳ%Þ…cÒ°ý¾mÒUÛL¦÷·íGû¢þ$ö#épŸlÛïÛ“-[w›lÓû°E–^hQHükö#ÓÉkö#ÛË+ö#: 1Ë2}ý(Dk?úÃ?½e?*RõgÔÂ(õ¨Šd禊dIðH¶H_ە߉jJÖÇ¡Ä+mF™>˜×ÅUñ1-Ѩ"‹˜ìð òΘŸÒ-ô|öàçàˆEÚv]¤ÄE8ÈüBÙE_0â|{t‘žÊ$žÁjBWéNßVGÇñUº…~¤9,9u²* ŒåÙÖriwÐuÝMâêJÙU‘NþoMðÌ礪ƒ†b„á2Rêõ©U¼=¥[ «‚ÚÇ”º(aKÛµ‘¡°ËX[^&³ímcµ>,™ÓÇړʉÁ½` }h*JßåY¥®~$rb9ìU¹%Ós µEQVºH³]ÙБ’{Љs³æ®žÍÃÝ»º]œí,ƒRŸúêړÉïÄÎkÅiq{¿ë"k†àÜ눫ßÅYŽrkWûDåZ¬ÝʤaÉgœHŽ«†]œÅ·H˜ºV Šc·²É3:‰³t »8ËÏôKÓZ)(ÎÝÊ©JêŒ+C»<+Á°¹ŠhDp(7yVœ3˜9N<ùkÚåY ¯AZǿѶ]žåb?YkÚåY¬!è`’ÎÑ–w› kÐ!Ø&iÛÚΨ{¼IU]5ïmç¾Ã>×(ïZv– Š=…HÙxµì-üÅZv‰–CÛ’µîm§&¹‰šYÚv‰–¥ƒÚŠã©uWFb£Gn•&²¼Ü®¢i¥Š è-ÒÚŒR¯M"È.Ô.<ÝÃ×ûíòñþ,f‹R;A5볩Ȣª^ÝÀ-fS3à;6z» ›Î ®Ö2Ù2ŒÃS,ï+ËiètŸ%$} Ùlc$QPU€l´u,yÒƒe1;3é2vlR<7˜M~¤ÁùÂl´m>ü´Ùæf1ƒÙh‰OùN‹ÙtgѸýxb¶´%罜8 Ù´îDGBÙlîNc¨Ål¾r¤5;1[fVÌN¢~5 Í6É›[›_¨M]}| j³m˃gP[Û4ŽàDmvqz‡RÖ¢v–}ç†ÝÒ 6Ûw‰™ÿ.Ôæ;q?”°µùܘ€t 6 ‚ø`LœPƒÚl£7ÀÈ·`P›¯t¬½Øë…ÚÒäžRG›± 1/ƒÔ%³…AmžlsœHw¡¶˜^«_COÆ„ÄC‡ž}¿P›mOѼ´åOBIÚx­••/Ðf¸ô”. hKМZïó Ú:cqÝs;hËf ‹éЖ6_ÕæaA›ßâŠmè9¡'pÓ«¬Iæ½|6䦩§Ö•:Å 7³€b‰òübÛ4¥d‡S™nq8þ6Û®ÂàË\׺œ˯⾖nÓõÇ('©Þù6ÇS˜/Ä…‡p’U\QI®!Üò2üóÚE¸5cfaF–pÓSeH•ró1f¬×»ÑÒm~IbM‰p7½µÛS°t›ÂhT.~Òm1ÙN²eÛ´Ê) y±íB±›PC-Û.Âp±ƒã…Ûb`Yn^–l£ ˆM"!F–l3*Ô g ^d ÏX{K¶Y?Žz† ÙFfuæ>·dm`€99–lKˆáÜ=–kK>™8#-×fEv±9·‹j3]“ðû‹jÓVE®ï'jÓ’ŸødUdab@æ†Ô—­GMœ¯$“ sSÃ*Tí.Ûß´}ÚÿZ¿9ïöѽÒQ/›éùõº)§KÖšÑïúñz·s„:¹ë×û¤1DíwþøA¤\$š’ÐPû$ h à¹ŒÉÛÿj,HÏ9|Nü;UõÆ)ÿn#Ê?… I:ÜͶ߷Ͳn›YÃôþ¶ i[VÜ?…I{ܧÛvüzJZÙ½Û|›î€ åÄ‹ ,¾¿fG²½¼fH:ºyÅ’©…j:Ò÷ïID ò¼ìHÿö‡÷âóÛì*IO 3íã<ÒCóQI²êߊii+BŸJ¯é’´muBÔXÔgö¶–Ý&Ú¦Htºò–Ó&Úb7ˆ8N£EZ®»hÛö°üVž!´ÐбÂP•¦´‰¶–ñB†#Èg–­Bm&¹Ó1_Ì7­nBŠ£«YŸ%–MjKÉI œ›´_È'•ºKZè Ðòeäkm«BQz•m[qŠÒ’€Duf%6vÕ<ä€Ô¢ˆP­»M¸… !›"·žvßȲ‡75V1YÒBcŠ£%¹wš–¸@K‡_q9,Œü·]SMйMÚ¶² ¢)ðš(ƒmÞmÂm— ³áYMûífK L°F`N§)N´ºGDàí,\ò“"ëÏ÷¢¶¥Ý˜ä>4Iäx®îÆ$ÈQ2ɘãV„:‰ŒÍ<¢IzL»1I*¨NkKgÍ’%Ý2ã z§Ý¥­›zŒ1i.ÞžžR Ôd$/ó(åéY¶ä £N@⤷¼¡–CbÔ Ó³Û’¨2j=D‘º:k–,é»0§6mI÷h$œ«„µV…Z/i·%Õ=(­—­w^aö3õví’üricŠž… Êò^Ÿ* œ•ÄjˆªaPý#Þ¢­•J nšÅJž KÓ•¤1‘Ïd­’Ý–T}˜á[øËM¾å?¬+½Fï[¾Rí„›ÞÓ&àÒ±ø©XØY¬díX¼3y‰¢éKmX÷Š…î Õž‡ÁÏB7 ,âÁÖ¬ÖÍa^.™ º=-À8½jƒ³Ðíx®%—ÙÝBƒKŸ-tûÀж02fèvLvÏätµ_¬»3²žEÂk» ‚À-vKºé•VTËŹYQO¢òC»97ëj/ƒŸåÜM&3ŽüùéÆéŠšKz#Ý4 qmÝÜì(§åÁaI7¹„÷Àg¤ûn˜œ›Åéba ÓŹi˜Ã¶Áçç߈™ÌÂ’nv¡gضNÒÍ2IƒùÒÍ @:Ö¹›tWIÿ×Hä/ÒM›8 ”›)¥œÜÛbd‹ߪ—¡Ü’(^ò©¦›rW–­šÁå¦y2Ϊ¿†rWÆn/9ÐRnn°3† ÊRn:$5 l½(wŸ„Y×Rn™ŒH3­”›U„!í)²”[r.G@K¹±:+£<³Äºe¢JóêTøõ’{./«ŽºKiaï0õ½ò^-»÷é¼£Ì˰ô 𕞼ß-KÛÏ×m<ˆô±õ\*ãÒä˜áõó}óÒåì¹þS•Âñ˜¥l÷‹¦ÊÀ`S#òü˜öùœþWnø÷O¤S–”oiæ¾{³KS‰Ïöü¾Ý#[;“íÿuCÓ¾¸¾Ka:)Èûâ”kž¯}ÊmϯO¹ìâ}ÊMÿ?ÀÖÄdšäÿš­Éöòš­éèæ[Se"sæ {™²ôEœÇ Ÿª¨mËÒs¥CÒjhßβôŒ^¢¡*h|†-K/Ö* [5ë·²ôR¤/KN†»,=%naŒ´î¶,=EÙ,jÛ8wõ¥ë $ë®ULLYz:¢¶†9œ¥Þý.“-@ÔRÍŽ-KOyËfëÒ7ÊB?ªËoué×i‘%͵Nø^—žOŽÉ»ëÒ³QµáZÅÐÔ¥ávÕî±eé©ZÀZ±8ûh=&¦0z ßκô’±6œŠQ}¯KϾ:Xµ„…­KO%@•p‡v×¥§Ÿ”F1[—^âpVJ….[—ž¶Â*gnêÒüvZžsºÕ¥sóªQÉÖ¥— ®.©×´Ë½.=mg9Úë¦.=ŸÄ}CµÚhÜêÒSýQÏT)¶.½6:JǺÆ{]z‰7òšÏñÛY›^ã›D‹}צ—ä:º“µªýV›>K±s6c–Â5ÆÄ4fkÓSáà[Š­M/žÎ¥°Ê–ŠwÆ2Õé =³‹Øâô|2:¦jÒ ¦8=O¬D[ên5µéÕ ,`uI³¿(ø¯±¯Ê;°žZ€pc=Ó[I¢¹xc½$ŽY™N¬g,†®¶þëѨs¥,Ö‹Û0ÀJÍ;Ö³òEo~9µXÏP)ÀCP‹ìõfnÄÅXÏâDKùu@½X¾ÐTz¾¡>ó’¨ú˜õL€ìL=õx-~Ú~@=^ËS7 VY¨gV#¦Ó"=u^`Ôiܾéù9¸yGæôRþ£¸=p =®æ^°mÆF¶HOsxÂðÖ>>³ÖÊ óµHÏ' n;•÷¤gÛ8ŸÒÓÄ…I j´H¯æ¬4SkZ¤çû$Fs^Õ†A1²ãÐK¿üÛ¦®1ı¦óÇ'Ý~¯.ÖSâ%Þ³1À“YÃ+º”wC°%Ðh_Reö´¿¸«ÓÐõ‹_úo,㈫è7ÿð±2W7DaVº&°Kž‚Ëî£x»öüÿûîÛÿòí_ðÿ@.«‚æ¼²qμûx43žTÜ|âZ¾:ä_½Øá=Âö.ˆß#¬¯pß+N¬W’û½5d‡º†¦Ã÷ÖP:¼GøÞÊ¡»Gø¬¡`Õzá/Å6òÀÒBkôu u#°Q½‘퇛mÑÑAZªnÿ9«`øQ½¤No¾ZÄpðgzI?®—Â<ß7’òÝ>?wRÅÑÐõóÍÖù|Èþ/¼ƒô–ýo%/«§’3²àH¾¯$ü'ë§üí/õwÿë_üö_ÿð»ë›ö]ó=·!½øº0LMWRþ'þ&¦Øÿ÷ûë™2× Iïž~†ù¬rÝèèF³èWö*×t»…æGþ5SåºEJV#iÍ?Û*×-2e³-©°²W¹ndGGyS庹—¸a4U®µ Ô5¨ÁT¹®TWú'Iíß­ÈusRYzfm3E®+> rW!ð;Š\³ÍÕÜg½í½Èu•1q”ÔjÕ[‘ë 1‘žNC}¾¹æc¢T}›)rÍÇ’²¦Èuí”÷!÷OÙ«\W¬ˆ—†Zy¯rÍç’«›©rÍoÉOk’©rÍoÁ˜Dkòí¨r]é¨+.ÆN«Uo)fø-Œi*ºYL•k~K~¢ã´Ê5ˆí³hKM‘kùö©78Š\Wê@=“f/3E®ùúìYMm¦ÈueÖg£›"×ül›™-ìE®kcÕï#aŠ\ËÚ@ü¦=Säšß™ò²4š"×|N˪ÉáÙ‹\ó1÷èL‘k~ VlÚ4L‘k>—;œ)rÍç ¹9Qf˜"×<;ì¦Áo/r]E?VòÈ/gŠ\³;5¡™úÖòߎÂÖüË@隦Õ¶æ§Eã,¤½W¶–Ý×.0•­k«ø‡xHO…©lÍ6—´¢Á·£²5Ç´¥Ú3•­å;Ye{´%ÝÔ!—Y’ã5ë¦k¦§ ,Cu5sݵQЂuc Ö֌єúóêaÀšµÅpKN+¼k>÷À k–,f5ág¬ér‰iî'X³?î¾êÚ ÖTÖ‰ÕJ4ä¬ùÆ––¯€ëVË×Ï55`ݨTë±FÍTgÀš‘#Øp³HÅÖ¼ùÔÔëã ÖT‚-;Ò°fw@\Õ(oÀšÝ5€ °úkœ²½ž¼k~KÀ æÇ=µƒ5û âZ¢eïw°f%U×Ç;w°n…áÛÏøv°fúÚÆ-,`Ý «W=7ÔŽÖ ?â“Í 5;Ê.L3‹AkÚa§—ƒÖò\ÌÂX°æ§¨ Ú]`ݘ: _’ÇÆÛÁšÝX§†k޹<å, X³¿Xñ!j/ßÁºIyÉ´O°fÛˆÀŒ'XóS||PhkYn¿ ׬ن8?skvBšÄ€5È—ª¢˜Al~ФyÓÞvÜ–IÁÀ6o¸-_âÛ 84¸- ”«¤€S2I· ·ut²\ýÄmiô ²fp[hÍ´È ü„øE\ÿuCå‰Û÷¶Ü²póì—™n[Êê¸c.àfQø±¸™ÕALÕ„c€›&ÂBãø`à;p3¡ Vcš¸=˜á°·[ànŒMõ~þ†ÜBåžWä¦Wè¢à¹™dùOÈ-…&G2Èͳý¬(gÓ>Òu|‚Ü‰Þ yÒJ‹ÜÑH¹£©×è pÓ¨õìg Ü,k[òê¸5Pq8áXàæp¿D¥k¸7õDµZàfx—zØLÐ&åÍ“.XЦóÄSÌË‚¶Ôn[ùp-hW¦TÍŸRl9uøò‰”µ%»T7n?ƒÚ¬øúD/[ÔfùÜ(®Ÿ 6“v¦u)ÔΑ^kì¨]ÅJBnЦÃ*»{€6£Æ MŸ!¼³ ªl@;yûÜÚübºª†~ƒ6!Ì…!¯XÐf­­‡)YÔNô…W‹øÚíY‘ µ%¼¿³hQ›ýaWÌç j³dªx´ësµëlדs 6ÙkU*U%ýÖûœý7=L p3Sªo渙äs” ¿€;±Êzžé‚-p;ÞI‰vøp·“<´s6 p£_ö™ûP0O)£`ú¡É ¥%Lžk€;0 úsd7àA›ç|`ºÑücᢂ‡Q0 ÿp\gV=R%ëvQ÷'n3ÍguëÊ2êƬ?§Äâ¶ šA«á‰ÍYƒ³­v¤Ò]5ûÁC¬vÝAÚ÷K˱kG$R;Î ÙjGÄ‚­Ê®¡é¿ú22ÎZíH)»²Ã(GăbQ©¡¡+ª¸<Ýʾ*†:²NYå+Šã±r³íJ6×Äý»žÀ]qSEìÈy1Ý•€›ÒÁèFH©EÏ—ó Ü|gÙø¯Ñà»NËèF(š<ÊC£á½ sF7¶@÷‹Ñ°h"hÆP?YÝaV”4 ³F7²ªÉèF( >ñËV7BJ©ó*Y ïý$qÖíDnù™¼pëIè`ì–"ÂêI€¤iQ«&¡ øÈFK"ÞpKWfµ$G½*ÒNÜæÈwŸôñí±4_ó œ>#tãQ»«Aª9ŒRk*[ßôù¬÷WLU}ú#¶—à3ò= ã´û—ΪN_ñÈøÇ=2b7Êâÿ¿¹kI’å¶{f‚dñ{ ÄÛ÷#AD²'B~zª mQÝlV±²ÀL Ñ¿$SäË22ªgØ2Žòƾ•YS¾ìºrp=úƒÌs¥¼stý~ä Š–û–q7æ >dèõ5ùÞ‰n@o’œÊ¤ãs"ºA¶Ÿzyéá.'ÊÆ¨¨µ–&•“§Š–»-ããü¸~?š!ûfñVm‰ì7@¯™Â”q’·ÜÆäf¬v99»Ž?ÈWxäÀR×ò²OÇHÅßÔ[ΔŽ!gÉf}s¹Ò1œHœË•ޱ*³ççºÖ°,¡[î\léÍÏU×ñ„C=鹺Ž?XÞ“í|šÑñÆŽpx<åö,E17JÇ€£äåæÌ7ŒàÈm¦cÀV¯ôEÞæî)”Wk´šûQÌN4wÏ9€8¯ò¦\×P>(—s±åcFþêî·<§é†dûì òPjÆ#ÌŽ’]ÓM£xêθ*™R3ÐYáÙ¬|É”š²ØlJqmð[B¬«Ùd)”™¡I;Û]}Ì¡ SsÙù±:ïfþUÙ_Ìʸ»¡ÏÑ'oĆo@3eŒ»O42bƒiކ鵑&!×d%Í0j÷@K¨­=;K ß¡v{P…½w¡¶ê,ê¹7ßÉçe Ë¬5¢"Ô^ycIË„ÚУj2áœP[%šå^jc>AÂ`áQ2°¿ÒùÊËpô8¡¶NWëzÀ ´5-#ôE™f÷Ùèj7Ø$ÌVKÒnل٘-Êu^Vê„ÙËòx-ê“0 Oò¦^¢ a¶êôòê ëwVÆ´;W”!ÌV=çø«fë]Oö•#A€­ 鈶ëådÀt¥ï.`ØÈ>·±”8lÌ×’:„ä°5ûôø`«äî ·â•“q:b㧸$9Bl½Ù±§ô±uÛe‹[±g¶Ãìˆ~#¶æ€ã¹ ±õ² þv¾s`weh¾ Øš˜dÚ%áõ¼?–YKxÝÕÅÈ])ñô™#¼Öd”#´`ëEñŸs€­éBç•S§V[.`~Ù ™aïÅjÚÃvyÐîyç›0lË•}$j^Ùt ÛÕ"]t>¦lÃtXÐÌÁ¶ì”ŒØÏg¬½į̂Øõ.0j‡gKþ„Øaø†¶ŒØ²8®)FìPáTb/Bl¨ylÍc²õ–òEˆ­Õá-¬À„;ÀÂ$ìôFlù)ºAWèH€…Oú.Îîpd®i‡yŒÙ~:–tqÅÙ¸ÃigÙ]qö@îÂNaá8ÍGÔêø²ꂵCö|…Ï0ýV¬¡+GÙÃ?4d#°•MhÁ9%=ãpTv²Q«/ÿا ²Õ¹Ç„²5Ñ4>+™›1; ÔTï÷,cv’sa¶À˜1Ë4#ÈŽpxïKÀbĆٷlŸõ`0bGÔD› ͈¡vʉjæÐ0bLJÞ„Ø2æQÙv>™;‚G²ŽÚŒØ·Ç^› Ù¡ù4-†ì€†5§©^3dã;cÚý—²aä°ì¯åFÕ$QÐ<=\ËgŠö5M`Ëvpe¿¬éI±xKôèœt%þm²Ý}SÓûnö7¨Ä½Ž¹µ½2OóšÞþëÄ5YÒÑôÖ?0í^á¼¶ç¯×5½µÄuOóohzòŽUkt5Îïjz¹¥†/‡¥Ñ¯È‡¿­é!1J°3H ^ÿž’Þ‚9¯ì›žà¼n=õìÊwnÊA4á{rNø¹Â÷ä ÝŸ+|OÒó{åG$=L8ï!MøÞ=Ô ?WøÞ=Ô‡îs…¿!é=hiÒ=þ¼%éñoIz4Ëk’Ïò×KzH¢—Xηå$½ô°¤÷ï?ñ†ùÿ$=47²®Krš‘ãÈ:¿ ˜$bˆ5w|3‰5•cS×q­šUg¢é¡´ªÇô¹úxªœÛÛ"¥êSN×%d÷ßjßÔÏÎÎo°jl›Bª8Ç/o/4ƒjpºž”aÍži€a5Nó0«ÝµÂªÍ”¾ÌfLšHÚNB“çÔ‰ÃÐÓº.áDèš²Á^4Ÿ\ÿ­í61V=Ý c®PP=Ï®£çn˜W«çà€}jàÛì¿“ÙÐAËÑ Ƈ›<«­Ÿ®K}º_o»v¢ÀãMQ»§ˆAo¨µrܵ÷ãK‡Ÿ)ÿýYöÏuxŠ8¢FrÙhÚnm—ptu©ãu(C,ÿ±Ö†ƒ¼mX9ösjmÁ3ÄJ•ô]ÙÖp· À°¦¿Þ¢'‰‘”žñ¯ÊH·èIâ}%uÃY~oXÐ(C;<ê-mÉQMI7ÕÞp–ß;?³™ÕYmM´ Îò¶cås½¤±8µöxÊAײ)Çþ™§‚º»ÙwfO9$ád¿ÊZö”ƒÚ}çÕ¯eÏ8„Ç«­xUOËÆŒýhÅQ¸“'e»¢ Ú¢¤[%ÊAÞ² VR|«D9 ØÁªP[õªÞ¨¾¼¿5¯êÁûŠ#·þñüÍ•Œê¦b ‹¦\QÃW>ûà-iï¸s[¶ÀݽÜÏÀ=´áâë·Q•PM„`ÜꃘV}-ãvC9|Ü’ŒÛ8í‹€[†BM›deàF1W4n˜S ndÈÒ0p£`kØSÎÀ-cNaàž…^;ñ€[»Š™GwƒÔ`ô'7Ê ÜU!à–±ØêX*7J’•¶3pC'Mðodî¦ íÍJÀ-ß¹» 0jc`VTÂvC9sØ´â‚m”òó†m¬þ”ó0lC}¬Èƒauë}—T3jcH~À†CBm|e·¼ Bmý)E›¤ÅÔÆ>õ„ÚøÊÒëôk!ÐÖzý 1hk) UÄ1hCr=$8ƒv…tØ 'Ì®°lÞ”;C¶^æf¯Âlܳjµ©„ÙSÙ¶JeÂl ƒ1 (–ÙŘ-c' ³QÙw¡A6†f°£ÕVÙêúЭ ™ w‰Q«æ![=ÌIB![µu{'¾¨ì1jwõ¿Ÿ¢ÌÚ€M¶dÔFÎç©ÊeØ.¨K*š=l«¿Ø Ò=l?Õ+‰ Û:Jš’H°Rd}>¶ñ,¡áúÌ!`ØŽ ^â[bØŽÅg2lËm4ó¸âm¬Õêe¶%²‰GÈbØFÛá÷¬›Œa[>×KÛNIÛèrºLN>Âm¹“rVA)¡6"9E¨¢Äm£G½•2hãcÚ¶b´Cô%ãv ûyÆm/G}š¸­IŒ§ªú ·‘OÙwVÈn£»®,xêtW¸M­« ¸1Ÿl¼wAÀ­Êä‘À®pÕÑæSEÀ­‡.²tÀ¬«(¸·ÞÑn#©ð$½rk6ÜcבûÊ\#ä†gSXÕß7tkþàØ9aÝZž*Çߥ]tc }µ%Þs¸ ëèÖ-ÆuЯ”ãÐ.º$èÆ¯Ì§¼ còœíZ~‚n|'\kVJ(‡Û˜o½>?ÂmP\£ín<nËX=HDØ­½Ù2‚”3CôÃ_VùpÈ^®¤à}&…©ÿ:)pŒ¿©ó};ÿã^È¿üê^™hT§ôí¿~@rè?Jßúã¦Ý+œ÷üõºÒ·–8çµ?þŒÒWª"ãå#öªÒ‡îÆ( NI=üNé+HlÊhÁ€Î0K©ï`ÁAwÚ.¤y ÜŠÊOˆ}˜p E4á{B‘Nø¹Â÷„"ÝŸ+|Oìs»EÞû?¡öéŒó.ÒŒ/:w댟k|Ѻ[¼Ï5þ†à'ÿ Íz@ôñ–âwMò–äÇÓ¼¦ù]Óüõ¢_A®¡¼ä@ïÿ³rj^ôûï¿ß«ãëOóÜ1 òâÌÒPÉËBäܾÙ%¨kNôs&§•y"•£©ýp¾¶.®Ðë Q2’Š¶Ô†?è—}®$Ü” #¼¥á‚9,ªŒkÔ†ÃBÀvoìþ¨•8,tÖ7»ªâQýäK£}ëHˆæ³/A];BîKËÛ¹u.³6­@Emå ú4»ø˜èŽ,+c#8F±mç¤÷‘= Ñü¡å?‡;y¡I×Ò 1œÚ³hÍ2Ë}Î@k„Æ„<5»TlÈ·¸ìâîst¡±;b¸kÜø\—@vi_ó¼‡KÕí¡yQö»Àû@ÞØ-xé\µ»åjF‹S¼-(•ÌPü#ÞŽOÛÒ+c÷S}׎+ÞFîW3äóØ“·ê¼âí ßêý0_ñ6.¸¹`^ñ¶¾~ΘÇî„(¦oá’Á;%ŸoÂà’¯gðŽf¼q.ˆVÔDàz ì|oýJËTcðdÍñÙ)YWÔ ÅÑ2t®°»!CgØ3xãÛÊ¡Èyݤf$æu3“›FÔêÈ4ozÌAy÷V“^[&L2ØÔ“hnemÓââmå¤@ôžá¶òÉeÌÓÈŸv«Ô~+{+ß«lž|Fn{>_ÚˆÀ_èN¶dllÚpd½è¢/ˆ±²íäïcs¶’AO@kͨ[d÷ 4—DÙ’ë÷€°,WG6á®Á“ÁLµ‚möë™—u\<ƨÖM¶jж#s#‡è½ˆ¡“~=¶‘žºÓ:OCË Ë>—‡ÞóМÒ.ƒè&hY“Ñw“ClÚ}dmd˜ Êu]`·B ¯“½Ö¤õBúv x?‘ ÍõBz¤…Èw-EéBz4˜:N»Ò÷Ç·â¹¾Ã(¸î¢µ é;êÉêyGx¤oÃד]Pßµg¨—)ú‚/¨‡ ™ãÞ¬Œôè”tøª éUü3ïBz˜§ÞúBzô-’û;I¾ èÑð©=¥íçÕ½Ìèjb/ WÒºË^@ű¶Ý+õzH•É£ è¡GYã·@¯ýs#¼€^‹ ­½Ðô ˜õ>Þ@0Y%ØôFº‰­У·î±9½€â©û½€Å—FÙ_8gØV·ìpá<Ê„Âö‰»`¾i?÷]ÁÏ8UÕDË æås®m×ó<ÖŒó*Ùís<”Ðc p<êëðˆÏj¤ äÕk×ä‚ äq;Œ©¾@¾‘%;ƒÍjåÞì xá|>•8 S/xá<~ñiá¼n\3 ¸p¾ .ß&Ì?GÝË÷‚y4ñ:oÂ+ ï¾Tì‚ù§ù>ÒÌ?꺒/”’w>¼úCÏíáqÁ|ªèÿWŸÀ; —DÚÉ\Ê£kà0=ó‚ùô ËNJórlµíJ¿ æåƒ²ø[Ì'Mß;-Çóž#í·Çóxµ”çyÇóC—v€‡ãyÀÌpáŽçåF˲Ší,ž¯ŒŽÏwhK¬曺›çnpÅñ¼5–¸cùâóXãÕ~ù}ûj{Œ‡%ût—Å}ÆòΗ„@ß*EÝü¶‚|ewœ%œÇBœ)ã¼&õÉ'§C Ã<®]…ùÇL|c˜×ßg‹Š˜Ÿu¦–ëÀ0¯MG5_E« Çê•Ó”ÿç?þ]6R¥ endstream endobj 3 0 obj 34245 endobj 4 0 obj [2 0 R] endobj 5 0 obj << /Resources 6 0 R /Type /Page /MediaBox [0 0 1080 792] /CropBox [0 0 1080 792] /BleedBox [0 0 1080 792] /TrimBox [0 0 1080 792] /Parent 7 0 R /Contents 4 0 R >> endobj 8 0 obj << /Type /Font /Subtype /Type1 /BaseFont /Helvetica /Encoding /WinAnsiEncoding >> endobj 7 0 obj << /Type /Pages /Count 1 /Kids [5 0 R ] >> endobj 9 0 obj << /Type /Catalog /Pages 7 0 R /Lang (x-unknown) >> endobj 6 0 obj << /Font << /F1 8 0 R >> /ProcSet [/PDF /ImageB /ImageC /Text] >> endobj xref 0 10 0000000000 65535 f 0000000015 00000 n 0000000145 00000 n 0000034464 00000 n 0000034485 00000 n 0000034508 00000 n 0000034938 00000 n 0000034807 00000 n 0000034702 00000 n 0000034865 00000 n trailer << /Root 9 0 R /Info 1 0 R /ID [<894D4D1CC21AAA5E5F8E4F25EF75E7CE> <894D4D1CC21AAA5E5F8E4F25EF75E7CE>] /Size 10 >> startxref 35019 %%EOF blis-0.6.1/docs/graphs/large/l3_perf_skx_jc4ic13_nt52.png000066400000000000000000003123431360743507500230120ustar00rootroot00000000000000‰PNG  IHDR¬öEYa )iCCPiccxÚ•‘gP”‡†Ï÷}Û m—¥ÃÒ›T) HYz•^E–ÞY–"bCÄDiŠ AF¥H¬ˆb!((`A³HPb0Ѝ Ü¹3qîüÈóë™wÞ9çÌŠ*’*àû¹Ø³CBÃØð ‘¼Ìt®'ü#Æx ÿJtL&V Ÿ—Î ¹ •#H Ç€•”.@Γ€ÜfÜ_>̨¿|˜ü?@¢Å}ãQßø÷¨pù‚„ؘ\¶Z¬ '’ÃÎôs±g»98°}øi± É1ßü¯Êÿ€ &Wà–¾…Ÿ/`ÿßPcC##øûï|„5ø¿ÿ€oziœEìÀßYT5@÷é'gjÇD ºîñ²øÙe8‡ÏÁá+ñÍøNü ü(~ÿ@ °šs‚+!”HØJ(%%t®† S„E"‘(CÔ%Z½‰‘D±ˆXMK)Hq¥b¤öIµKH-IËIÛIÇHKwHJ–aË8É$É”é–y&‹“Õ‘õ•Í‘=&{Cv^Ž)g%Ç“+–;+÷D•ב÷“ß*B~P~QAQÁE!]¡ZáºÂ¼"KÑN1Q±Bñ²âœCÉF)A©BéŠÒ+¶$›ËNfW±ûÙ ÊòÊ®ÊYÊ ÊCÊË*š****ÏT)ªÕXÕ Õ>Õ5%5/µ|µ6µ'êduŽz¼úõõ% M`½Ý³šÒšnšyšmšZt-[­ ­F­‡ÚmŽv’öQíû:¨Ž©N¼N­Î=]T×L7A÷¨îðü‹5©k׌ëÑô¸zÙzmz“ú,}Oýýný7ja  ¾š&6>5’0r7*0ê5úÓXǘg\küp-}­óÚk{Ö¾5Ñ5‰19fòÈ”aêeº×´Ïô‹™¹߬ÝlÎ\Í<¼Î|œÃäøpJ9·,ðö;,.Z|²4³XžµüÃJÏ*ɪÕjv溘uM리U¬#­¬…6l››ã6B[eÛHÛFÛvªvÑvÍv3\mn"÷4÷½¡=ß¾Ó~ÉÁÒa›ÃUGÌÑűØqÈIÂ)ЩÆé¹³Šsœs›ó‚‹©ËV—«®xW׃®ãn n<··ws÷mîý4ž:ž|Ï^/ÔËÝë×Äzõõ©ë»½ÁÛÍû÷3MŸ ŸŸ} ¾>¾µ¾/ýŒüòýüþ›ý[ý?Ø”< Ô Ì ì  j Z v .†„l ¹*šÚF k[Üà´áð†épÓð¢ð±šs7ÞÞ$»)yӥ͢›#7Ÿ‹ÀGG´F¬DzG6F.F¹EÕE-ðxGx¯£í¢+¢çb¬cÊcfb­cËcgã¬ãÅÍÅÛÆWÆÏ'8$Ô$¼MtM¬O\JòN:™´šœÜ‘BJ‰H¹*‘š”ÚŸ¦˜–›6œ®›^”.̰Ì8œ±À÷à7g"™3{LAº`0K+kOÖd¶MvmöÇœ œs¹â¹©¹ƒ[t¶ìÛ2“çœ÷ÃVÜVÞÖ¾|åü]ù“Û¸Û¶#Û£¶÷íPÝQ¸cz§ËÎS»(»’výR`XP^ð~wðîÞB…Â…S{\ö´‰ñ‹Æ÷Zí­ÿ÷]ÂwCûÖî«Þ÷µ8ºøN‰aIeÉJ)¯ôÎ÷FßW}¿º?vÿP™YÙ±„©ÆÚuÈëPW»¢¸âýá͇oWšTÖ¡É:"¬ò¬ê©V«>P½R_3Zk_ÛQ'_·¯néhôÑ‘cvÇÚëêKê?O8þ¨Á¥¡«Q£±òáDö‰—MAM?p~hi–m.iþr2õ¤ð”ß©þó––VùÖ²6´-«mîtøéû?:þØÓ®×ÞÐÁê(9g²Î¼ú)â§±³gûÎqεŸW?_×Éè,îBº¶t-tÇw {B{†/¸_èëµêíüYÿç“•/Ö^’¼Tv™r¹ðòꕼ+‹WÓ¯Î_‹»6Õ·¹ïéõëû}û‡nxܸuÓùæõîÀ•[Ö·.Þ¶¼}áçN÷]³»]ƒ¦ƒ¿˜þÒ9d6ÔuÏü^Ï}‹û½Ãë†/ØŽ\{àøàæC·‡wG׎=>Š~4û8ùñÛ'ÙO–ŸîœÀO?{Vù\þyã¯Ú¿vÍ„—&'_ø¿x:Å›zý[æo+Ó…/é/+g”fZfg/Î9ÏݵáÕôëô×ËóE¿‹ÿ^÷FëÍù?ìþ\Y˜~Ë»úgé;™w'ß›¼ï[ôY|þ!åÃòRñG™§>q> |þ<³œ³B\©ú¢ý¥÷«Ç׉ՔÕÕÿB,¾;E^ cHRMz&€„ú€èu0ê`:˜pœºQ<bKGDÿÿÿ ½§“ oFFss0ˆÚÿR pHYsNNÆÊ/¥tIMEã$:.$¶­ vpAg’Zó!%Õ€IDATxÚìý}œëö]ç NI6Ó*M»4>i»š–Æ<Ý´rÏôfK"·]hõCåeá@†·s1Ù.ì"m]`X ®kã{§µxØÉØÅ¢CzZÊ6V å¡wæ`‘ÂfqZµ$Çéc޶>$m(sÿñ›¯,˲-{ì±ìù¾_¯y3–,Kž¯~ú}~ß§3‡‡‡‡`†a†a†a˜ŒqݼO€a†a†a†a’`ÁÊ0 Ã0 Ã0 Ãd¬ Ã0 Ã0 Ã0L&aÁÊ0 Ã0 Lp]wÞ§Á0 Ã03+Ã0 Ã,0žç¡X,Îû4˜SŒmÛð}Þ§Á0'ÛûÉ•a†a†™ÇqxÏœØÞOžg½ýíoû¼Oâ´.^¼ˆ÷¾÷½ð<_õU_…o¼1ÜnÛvxsH’Çq ( Àu]ض ×u!In¿ýöð}¦iBQ”ðØA૾ê«Âãyž‡W¿úÕïÏ0ifヶٶ  ËrxÇqð'ò'P…í›YfeûªªÂ²,|á _üú¯ÿ:èqþÃø}φ9Ãìõ½ï}/¾ð…/àÆo„,Ë}6J¯ñ˜Í, DZwI’ŽûlדÁÖ9VWWÃü#ÇqzB»LÓ„eYá¶R©Çq–e¡T*…Ç) °m;|/m‚A T*¡X,Âó¼p»išïÏ0if㣶Åíí8öÊöÍœ4³´ýr¹œ¸ÊŠÅ"‚ 6渌š«ÄI²Q³™Eá8öþÈ# }/Ûõ„2s¥ÑhFÿ W¯^=TUõðàààðàààÀáÁÁA¸]–åp;€Ãf³n«Õj‡’$…¿8¬ÕjáêºþnÆ¡ªªïÏ0ifããØ³Ù<pxõêÕÃÃC¶o&ûÌÒö Ãèûœ«W¯öÙ5ÃLƒaözxxx¨ªêa£Ñ·Çm”^ã1›YŽcï£ÞËv=ìa3òU.—ðÞF£Y–á8TUí Ó4 €–e9¬ýVi¢ÇI’ IRªóI»?ÃŒb˜Ú¦(JQà8t]ï±I¶o&ËÌÒöUUíû,XçŒ,Ëh4€R©„3gΠ\.! ƒð}AÀ²¬žŸ¤ÃÌ“a6>l h¢“v¶of‘8iÛW†aô‡a¦Á({e˜eâ8öÎ÷Êl`Á:g¨R­VÃÕ«WÑl6á8lÛ†$IE«¢(᪠ýÔëu†ÁyKL¦fãöbÒîy,ËBì9bŠ“¶ýZ­Ã0€s ˜©2Ê^f™8޽ó½2X°Î*¤DÂT–å04€&,”¸Ahðªªöüt“¾&K ³ñaÛèwMÓ`Yt]Ÿ÷¥0ÌXÌÃö%IB¥ReYÜv™£ìÆ0‹Äqì=Í{™ñaÁ:g(/iuuÅb«««áDE–eÔj5‹E‹E …0,Œ&%¦i¢P(„‚ëõú¼/‰azfãö´8ÃÞUfј—íkšMÓx“™£ìU’$˜¦É^$f)8޽§Û™ñ9sxxx8ï“`zQ%I Cz©€’,Ëð}²,‡¡d$L£E–(L˜a²H’§ÙfÛ6,ËÂÁÁÁ¼/a&‚mŸYÙkt¾2¬0 Ã,DZ÷ac;3>,X3ŒïûX]]E³Ù„¢(ð}Åb†apx$³ôP8M¡P€®ëanÃ,;lû Ã0 Ó…C‚3L4$øÌ™3( a(Ã,;žçáÖ[o…,ËlóÌ©‚mŸa†aº°‡•a†a†a†É$ìae†a†a†a2Éõó>iñ¾÷½¿ök¿†ç?ÿùó>•©ñøããŽ;î˜÷iL§žz O=õT&þFO=õVVVP­Vç}*©ÙØØÀM7Ý4ïÓ˜Y²‡i‘¥{öñÇÇ{ÞóžyŸFjx Ï>YºgŸzê)¼ð…/ÄOÿôOÏûTRÁãwöÉÒýúøããç~îçÏçç}*©àñ;ûdéždü^ÁúÜç>ßøßˆ­­­yŸÊÔØÜÜÄÎÎμOcjìïïãòåË™øѹ,7ÝtÛCÆÉÒ=»¹¹9ïS óO–îÙEÃyüÎ>Yº_777F¬<~/Yºg'¿—F°.#ÔsuY8{öì¼OÉËhËvÏ2ÇcÙìaïYf2–Ñ–í~eŽÇ²Ùâ߳,X3Ìúúú¼Oaªär9är¹yŸ“–Ñ–ížeŽÇ²ÙÃ2Þ³Ìd,£-,ÛýÊe³‡E¿g¹èÃ0 Ã0 Ã0 “IX°2 Ã0 Ã0 Ã0™„+Ã0 Ã0 Ã0 “IX°2 Ã0̘xž‡b±Øócšf¸þ§X,†ÿw…BgΜÁêê*lÛž÷e1 Ã0Læà¢K Ã0 3&AÀ÷}‹Ehš† ày^âû\× ÷/—Ëh6›eA P(@Q(Š2ïËc†a˜ÌÀV†a†9&²,‡Â3 ¾ï‡ïI’P«ÕR¿Ÿa†aN ìae†a˜ ð}¿/ôWUÕЋ: êñ·ºº MÓ ªêÒõýc†a˜iÀ‚•a†É<¶mÃqœ¹žC¥R®›F¬ÍfžçÁq˜¦ ß÷Ñh48$˜a†a"°`e†a2®ëÐu}ާу,˨T*áïŽãÀ¶íTçIÂVUÕP š¦ Û¶Q«Õæ}i Ã0 “8‡•a†a¦€çyaNê(‚ €eY}¯K’4ïË`†a˜LÁV†a†™×uqæÌ™ðwUUQ¯×áy^â6ª( š¦Áu]¬®®†"7‚ž}†a†aÁÊ0 Ã0c£ª*ÇÞ}½V«Á÷}ø¾I’8w•a†a`ÁÊ0 Ã0s"ÚÚ†a†a˜~8‡•a†a†a†É$,X†a†a†a˜L‚•a†a†a†É$,X†a†a†a˜LÒ'X«Õ*Ο?EQP­V±··‡jµ:ïód†a˜LÊå2n½õVœ9s¥R žçMõ3<ÏC±Xìù1M3ÜFÿOÂ÷}‹E8ŽÓ·Íq œ9s«««°m{Þ_'Ã0 Ã$Ò#XI ®­­!—ËVVV°··‡ÍÍÍyŸ+Ã0 Ãd†b±I’põêUB×u‹Å©ŠÖ FFµZ ŽãÀó<A0ô³lÛF}‚Õ÷}”ËeÔëu¢Ùl²¬©‹m†a†™a[›v»ÝÝ]ììì`mm ­V €è'—Ëåpþüy´Z-äóùyŸ3Ã0 ÃÌÇq I*•Jøšªª¨T*°, ªª†‚2èºMÓÂ÷’G“^÷<žçÁ÷}xžEQzŽMP²£Î±Ñh P(À÷ý°}ýŸ~—$ µZ-Õ1†aæ¤ ë•+Wkkk};åóyäóyt:±¾¿¿Ÿx¼V«…•••Ћ;îv†Éƒì`gŸÌٷa( I½¯¹îð÷Ȳø‰ây@’x‹ŸDeMÓ`š&dY†mÛh4$ ¥R ’$A–e˜¦‰F£@xiI€Z–…z½Ã0P(Bëû~_诪ªp‡\›çyáçišÇq`Fø^X]]…¦iPU5|í´9ûf˜)Ãspf™ëÊÊ  Óé„ÿ':NèqMK»ÝÆææfOˆQ»Ý†a=Þ[˲Rog˜¬dßô:Û8³èdÒ¾}?øW°ªj²`MÇIÇ!IRè©Ôu=µº®‡SUUøGÇWUŽã@UUȲî?ʋꎸ.Çq ( \×…$I=‚šÍ&<σã80M¾ï£Ñh$Šðe#“öÍ0S„çà̲ Vò¢nnnâÂ… áív/^D.—¸¥Ýn£Õjaww·oÛöö6r¹î»ï>t:œ?Õj[[[©¶3̼fßÛ8³ØdÚ¾UUüŒKBXíHt}ä.²,'æ|º®›(ú¤ˆÐ ‚ œ’$ônʲÜL!Åúó´mŠ¢„Nß÷ÃPà¨p¦s5M¶m£V«ÿ}-™¶o†™<g–•ž¢K;;;XYYÁùó籿¿ÝÝ]Üu×]h·Û©WYö÷÷±··×÷z»ÝÆþþ>666îúúz5j;ÃdAö °3‹Ûwz(Ì6*Zƒ €iš¡Œn#O§,Ëaîke1}t\9îŽà8dY 55ð|é<“žéÒïñ¢ÃöÍ,;<g–•룿¬¬¬`gg­V+ÌW]YY«ÐÒúú:Ö××±¿¿ßSY˜rd£ÇÊçóh·Û©¶3LdßÛ8³ø°}§G’$4 ”J¥PøàL*3úN§3r{<¯6Îg?ûYxžÇá ÌHö÷÷qÿý÷ãoÿöo§zÜYÛø§?ýiT«Uœ;w.Ux>sz©V«xüñǧzLÃûQa%`EQz¼”ªªB×uø¾ßöKù£t Ú7ºOT<&~¾ªª·ÅÑ4­GÄÖjµ0LX’¤…Ê]ÝÛÛßþéŸNõ˜<~3YÆïv»=µÂG<~3Yæàãғú²²×u±¾¾>õVaøÚµk#·ºYnºé&¼à/À¹sç¦~îÌrqöìYÜyçøØÇ>6ÕãÎÚÆo¾ùfœ;wgÏž=Ù/ŒY8Î;‡‡zhªÇä1|0ÃÄ^´}LÚ÷œƒÎ-ëäóy\»vmªÞ¿™¬@ã÷-·Ü2µcòøÍdšƒ;~÷T ÞÚÚBµZx㬚 óÚær¹‘ÛGqÓM7áöÛoç•Kf$Ÿüdÿù?ÿX]ý³©wÖ6~óÍ7³}3©X[[óŸýì©“ÇðñÐSnb&ƒÚìÛjoÔ1Áã7s’Ðø=J$ŽßLVÈår¸óÎ;Ç¿{rX···``½ãVZUŒ†8Dÿ?j;sº±,À¶ƒñ»iš&Ú"NÂ÷~ï'pxø€ožÚ9²3Ë Û÷x,¢çò4ÃöÍ,3lßÌ¢ÓS%Øó¼¡?ÇÚâD«—¹®‹b±˜j;s:ñ<`uU´Dl6»¯kpT³$AÐÝ_Q€W¾òsS?W¶qf™aûf–¶of™aûfëˆôlmmassûûûa÷Ýwßz;³X¸®Š’$Dâ¸|(•€Fˆ;+¥+`=p±íÿ<Ëÿó'mœYfؾ{IšÌišEQŠÁÌâÀöÍ,3lßÌ"Ó'X÷ööàº.ö÷÷ˆXzUUÇ.Ä´¶¶Öç•Íçóx÷»ßV«î3Îvf1p]!!ƒ@ü®(@¥"¦$õ‹Ð8岨Ô~²,ŽíyBä’Ùx½VÇ™&Iö °3ËÛw:\×M¬ÒÁÒ÷4]dؾ™e‡çà̲Ñ#X«Õ*vww±¾¾6ît:ØÞÞF«ÕÂ… Žý+++Co‚QÛ™láû"·èŠBÛ ˆthèÚ Æà㺮›±V‚‰H’Ø/;'Û8³Ì°}Æ÷ýÐÃjÛ6Ç «“ÖqØGƒ§®ëÐ4-L¿¡÷+Š‚Ê´WÛ˜¡°}3Ë Û7³¨„‚µÝncww–eõô‚D¯·ÍÍMlllp6Óƒ, au&ÔëÃߣi]Ñ:U,x†a²‚iš=¿ëºŽ àû>\×…mÛh4ð<ÅbªªÂ÷}˜¦öZ-‹eAÀ²,Ôëu†B¡ \†a†9­„‚õÊ•+Ð'VÏçqåʬ Q¥W’†{R!ËÀ Ô.òزSa˜Ü£Ÿ$Ô£€ @@cŒ9ä˜4ÎØGïÿG?:vÚSv]èºI’ ªj(šfAMA¯ÿN$¤¥&…ì’zÛÛDóZƒ €{jB‚–a†a˜~BÁº¶¶†\.Ó4qáÂ…ž>L/^ ÷a²ãˆBC¶ ‹"ÔqDµ]Çû¤…¶-„n<<ײ„ÐÌ#Uߺ8é³4­ÛNf¡¹£ fö “u&±Ó P*•Nì]×E­Vƒçyܲä Ë2{Ó†aæ„‚µÕj¡Óé„¡ÀQ666ËåÂ^­§ßâSU»TškZ×SHžCÓ !BM³[´ˆ(—[o”ö¥ù«¢ˆcLsAÞ¶mؤ¢SâyJ¥Ò‰V±, (‹]ß0¯•mÛ(‹°, ×®];±ëaN†r¹ŒR©„r¹œZA€b±ß÷ûî Ïóƾ_âø±›žzmÝÞ›ÓIJ,ضÍB˜a†a˜¥áº´;ò_Î ÞM¡år¿„h½zU„òÖjB|Æ اVÛÙ¢¼¹¸h ‚–e%V§$ÏS½^?º~?õ碰L\èzž‡[o½…B¥R –eõL®Ëå2TU…,Ë=ï ‚år¦iÂqœOšçy( ¡À(ÇÜÒ®ë†Û+• ‚ ÀÆÆ~øáÙ|ÙÌLðqJû&‰Vß÷áº.\×M¼/\×E¹\?3úºvǯëzß½?-­®®¢X,buu5QÛ¶B¡£P(àƒüà¼ÿd 3uJ%ñÃ0 Üúú°noockk +++º¡Â€¨"|š‰VÜ¥>§Æxa¹–Õ-^Dù§T8i–”ËeÔj5(Š‚B¡û HBÏ0Œ°Ñ½iš0 Š¢@Ó4X–Õ“ƒåydY[5Ä?(•J¨Õjð}¥R FŠ¢Àó<8ŽƒB¡MÓÂI8‰Êb±×u!IR(,dYßà×jµÐsU.—Q,¡(JXè„®…NTUå0çUÇ,Ëaû-EQà8Nh#d›Ô„¼™q›u'ôà+Šã¨Ç“,ËaÏLÛ¶¡ª**•JϾ¶mÃóõz=œxSX$‰@Ã0z¼A«««‚’$ÁqœðóhâMÇ·m»G@÷'þyŠ¢„‚Á²,H’NÞèýIç9j"^«ÕBAÛh4õ-·Ü‚›nºiºfê‡QÓ4è ‚  ª*t]ïË”3H6Gljî£ë:,Ë‚¢(°, Íf’$¡Ñh X,B’¤P„މ^ß÷ûlWÓ4 èº~‰mI’ÂkŽbt]‡çyÐu=©qž÷¼çÍûÏ·ÔA.*Díˆ"@h.º0F yº®Ãu]^4C·º¾ç‰g#¥×HRrô ž½ûûó>s†a椸¾Õj…â”ú°º®‹V«X__‡ªª,VÑ ã=Ž7Ô0ºyªÓ‚¼O4ᥠS4ô±IÍW!&í$ä¢bN–å¡«ýš¦…ž'Ó4Ñh4 Ë2|ß=]BAèºEQzc£B5Š¢(¨×ë$ià¤|ع2‹ ‰Õèb€©°I¹–eÁ4MhšÚª$I=÷ϸè]]]íë«IB(9êêºnè™WU5 ‹ÕYC¡ïǨIH’„«W¯&n›ä^d²ÏÒ4-ô¶ÆEl¹\†ã8‰óÓˆç‰<ÔJ¥›‡¼®ñ÷Í:}†a†É×SX§ëºØÛÛy\IÀ2½œÄÜÒ²,¨ª:t¢N!Àº®‹Ó»v9Ì#eÛ`& Õëõ0šr‘ ÃÃ,=Ï;q± /–N¢_3¶ÝmûGÓ„¡°RòÚ¢ û È£gšâ½ñßã'°÷³Å"‡¦ip]7 ×&è5]×ÃE˜Ó ‰Õz}xnYîF ‚“y3 Ã0Ù"ÌazU]×Åþþ>vwwY¼ž0ŽãÀqضÝWl…B}iŸB€O.ÂL EPq f³ ˲ ËrO˜ä¼Ã¹³îñ ‚n´GÒ©RNಡ룽kñ°R ÿ÷$âÃÚ8Ã…ÓbK|‘…о¢eycO#A Äj­6\¬2 Ã0L”Ķ6ªªâÂ… ¸ï¾û°¾¾ŽV«…óçχy­ÌlˆNèëõzئT*¡P(„o›Í&jµ{˜…"Zé™ BÌ`\WLò‚aÂ~ÐÅb¿°PžHædÑ4-Œˆ§©jô2õÉÖíÌ4{픪ä§]ÿV”îñ§‘’Ã0YÃ÷ý¾˲ðÛ¿ýÛó>5†É¡‡ukkkˆþîº.VVVpîÜ9lmmÍû\çάóg(¬Œ&ôF¶m‡-<f(—Ëa¡-‚ €Tøú2!Ëb’O_§¢ˆUEk¢­µ¨es²PµçQ¹ÏT½T*…mŽÎœ9nWUµ¯0×¼ñ}ñ#I]ô}žmb$„×"U’Ķãx®î1fjƒ]¨ö÷¦â—áy4MÛÞô¦yŸ:ÃdŠë[­ Ã@»ÝWy\×…aÈår€ÝÝ]\¸pëëëó>ß¹3«¦çyp]·gÂ#IOð™…‚ZuP¿]ê3jÛvæ&âYÄ4ÅDŸè°Iºª qZ. ! Ë,VO’h¢(8<< ·Em=îu5 #ף%a£Á:¦)<øÑ×EØ Vê‘ Û+—»]7]ö¸¸. Vf± Þó@w¼°m²,'¶Ü‹ŠÛÍÍÍyŸ>ÃdŠë Ã@>ŸÇ}÷ݾH¹«÷Ýw.]º„­­-T«ÕyŸëÜ™Vúf<Ìu]”J%Îe *–Ų,T*T*”J¥ð}šBØ)ÇÞ'ÓýÓ^+Eé Jó^·Gs"†Im‹ŸxX¹$ͦôS©ôF5½¿“'Ô¶{=þÇÅu…úÏa²Žiš0 PŽãÀ0ŒÏCjÿÇ0L?×·ÛmÜwß}aŸÕN§ƒV«… .„¯­¯¯£Z­ö… Ÿ6¦•kYVX˜šÇSÕTûe ˲`Û6 IRO^@ÚÕÕUÔjµSeÛ¥’ð~*ŠðB“ŽÓ-”D­<Êe1YOÓ]‡Ûz2ƒ ÅUùÍQ1iYâ÷øüxÒ žYØ¡ªvs·f‘p]A„5¨R8Ã0“qÝÚÚZ(LôM:ôl?ÍŒ[̤T*õ$Óû¾Nðu]‡sÔ£¡Ñhœª =³üæo>Ã0P,,Ëê a×uÍfs)Òƒ ͘f7çâ`Tx¤¦‰ZDCƒ&-®+~,KˆT²UMë>ÃLSüNbÕ¶[o^@)Jé]útÿp3 Û¶Q*•N¬¨™ïû0M³¯h j9pÔsZpœnÛ¹YÝ~×w:ž¨+‹Ô~'}H™‰æò™¦‰J¥I’zòŸfÑøÙŸýï¸vm’t+n»íõ( ‰ýy—q!Æ÷»^Ôè|ÄóçïA>ªm Aš”wÊ^SfAÐÐ3©TꆕÓBGÔƒJ‹(ôÞ¨÷^Óºyª£ Öj¾ïŸXø"Ý? 3Ó4áº.4MC±X sD=ÏCù(®œ"Û¨×w4gTQ”ž0]I’†>¿h~§ª*Ì£Ü ÚŸÎ#ú~Ë6Ì–Y6v]¾ïC’¤°h¥$I(‹Ð4 ¶m‡½¿ÇA¹\W Àïû‰­eèuUU=¥4¯£"ÚÑEtoø¾¸o%©»é8bÁ“#h˜e@Qz@u]ÔâH“Ò4ׯ­­assëëëØßßG§Ó ½$N—/_Æöö6ÖÖÖªÁ§•´ÞUz Ë² Y–Êå2æ} sl|ðÃøÔ§^ƒ·¼å¶°@ °Xa¿A &Ã&ùÅ¢˜Tи÷Þ{±»»‹µµµS‹ŸÒ7ˆ¤\>MÓNM•Tf¹ùáþ^ó@’^Ðóz±ØõJÖjÙ­è]1J“‰ø¹ÚvÈï ¨XÒ(NùÊzøO{ÅzH°Úãu˜Ûoÿ¾ð…7á︌·¼åܼ/Ÿ9¨%A"UÓ4ÔR®¸L£v‚$I¨T*0Mõ£B^œæ<(B'.V‡-n2L– ܃äK©”¼ðbbž5Íyàu$R/]º„û'x}}—.]ÂÎÎΩÏiõ¼tE—’ŠV`±Ê,÷ßÿ·hµrøoÿíëú¶5bB}õj·eáyéÚ»LŠãˆã›f®[±(¼›t4ˆV*â|“nMÚÎ0' …‹Ç¡^»Ó~Œ$‰Nß÷{ÄBïù¹aý?e…&ªç0)O>ùþçÐóú>ð|èCšîÂÌß÷Q.—aYE m­R© ÑhÌ¥€Ÿ¦i‚ŽãÀ¶mø¾ŠUªg0ÇœJÆÝk˜aø~·NÁ<)—»Îˆ8Úž„$‰mÓ\×¼9ªÕj5l]Cär¹S<.¶m÷xWf óïük¼á >tâLá®¶Ýí¡¨(b"ž4ŸAÐ8£Â˜ŠÐÐ6*œFóžèê·e««³;G†MP¢öW,gΈÉí,Ê”Ëå¾Ü=jW•Ä$aÀ¦iÂ:FošO~²…¯ùšO¡Ù¬÷¼þû¿ÿû¸ãŽ;¦ÿ¥0sÁó<‹E¨ªŠF£MÓBÁšTÁ¶ÓWµ>.µZ ¦i†ù°„,‹gȰIù°ûÖ4§;¡g–jw7o‚@Ì—’ìÔu V@xY§)º¯«V«ØÜÜÄîî.vww±¹¹‰jµ:ïï(s¤Y ‚žçqõ_f© 6ð¢½?þãwŽ|å¶FçÆtà(j4„—^çúU*ƒCT C„¶p¨3k¨è–ï÷¦›ÄCé  V³”§~ä!ŠS*bã Xµ‰ VY–îG8ŽMÓž”“}W>øÁ/ Ïþ}ôÑGY°. Tç£^¯§.L''ödYF­V +G1 qIæ=**B×¹Ï0“ xœ'ž'žKºžÜŸÛu‡/Ȥië7×íîîb}}=,ñ½¾¾ŽÝÝ]ÄÛÝœv(aôpf˜e [Ü¢;(¹®;V5à¤7í¼•ÚÄ Ú?Z„& ¥•§?Çg˜cãy"ŒÊÿ¯®v„’ˆçóM ß÷aFXmè>èX`| ›Þ’V°R¡›¨—Õqœ°H”$ï.U€UU5üÇÁk_ûÚ©'Ì|(—Ë}m`F¡ë³‰:„ªª}bµTÏGUíži"uè½Yýd²yïçé…wÝÞ|ßŨ5ZZß-Ì—ë`cc#|akk €èÇÊôýÃ9ŽƒB¡Ð³Î ³ ”Ëeضy¨:ÇŽpÝnÈ.•û‡ðÒƒžk¡ ö‹‡Ç Ë3æIe˜yAbµ^6ÚhˆœïaoÏó ËrêœÑôç"n4UUá͘é¦(JŸ‡Êc¥ˆ#EQªÂôšišágÆ?#*ΣŸ='Ïóð-ßò-³ø13†*ï®®®¢T*…EŒÆI¥²íþÉôI½=(ô‘n‡ žÛÅ0º=ZèFàóµÊC%ÈÛJQwi§ƒ¶=H‚ëô䩞öâJi¡‡/­û¾ N¬‘:Ã̲oz0O£…ªŠ‡ºiŠüNíMÚ— 0†…úƽ—´Ð?.iÞS© OIĸSÁ¨hu]¿þ뿎'Ÿ|r¬c°`MIt¥Vš©a5åÝp80³,ÐDñÛ¾í'ð¬g}¸ÇÓq\¨8R|Ž"I\êŸY>¨§ï$¸®zXG勎K¡Ô4­gA*þyƒÒ’r]äˆ#I’Ðh4ÂÏTUµç3¢âœDP\$+ŠÛ¶ÇÊud¦‹mÛa_ÒJ¥Ò“=ˆ¨X=îß.>™TÁ4 iÂv“Ž= Ïn­Öž\¹ÂËÌ uº•¢s¡I«iÎ¥¦<ÔAŨUà$PÚG|AÓó€oü%üê¯>¸ñÆÇ:îu°¹¹ÙóT=8þ:#Þ4è†þνV™erÏžy&I¡_ìÕ`˜ÑÄCÛëõÉk’XG´Ú¶ÒyTdèºÞW5*FU¿tN¶mŒÆˆ Vß÷¡ªj(r“ÉTUEÍ4'<σeY= цAïW¬Áàœ·xOV´Èq†ÒtñãºýÇŸA 4HR÷|‡ë¬ÈBSŠHŠƒrCǬAÐ-›t½ñjÑ0dú¼è}fÛ6ŠÅbßx_.—±ººŠB¡€b±ˆÕÕÕ£T2¯oß pñµ_{þ꯶qùò6üec]ÓuÈçó=?X[[ë{ý4€Äã!.Ì2ÐgŠé‡Âó<Ð49ôô0 3*ª4œxEîqòX)ÿœÚÔ$m‹Á¸˜ˆ~Þ ”—A¹®ã $Q½^MÓ`YVß9QNæä ‚ žÑ…aáê´h2®gÕu…`Mòðx^"sÆÐ“0T‰Þ4»yªñÈŸY|ªT€W½êþÙ}À)ƶçz­°¢°$ÝB$fãŒ;ý"ï)UŽß–Õ+˜©&a€¢ø0M…BžçA×u”J¥0’ ¦ Ùl¢^¯‡ÿêºÞ'XE'š¿B³ ˜æÇpÇŸëš®§ªÀ§ ÓoRAã3yž¢6‡'MˆÀp0ïaâPH°iµÚ×àÛ¿½É 3ª„8NQ¥a8Žƒf³þ®ªjOk˜aX–]סiŠÅ"šÍÞû—žcà ï©,Ë÷M*¼T.—Q«ÕRäe%ï-½G×uÜzë­‰Åxøy{òA€b±Ã06(,˜þ~TŒIÓ44±ÓI\·›Ç÷û¤¢t{FvϹßKÿŒZMÌ]·w>hšb²ÿòÎÑ{™Åc-VÊåñó”£x^W,J’°%”Ž“|Ü$½²º*ê~$Ý^®+Ž#Iâ^ê­ÁÓ[ðû–ËâG×½ÐK¾QU¥R ¦iB×uT"'Љ·(£BË=ƒvû™±¾·S™Ãêyã—Y¦Ás­=˜#ü£ŸY”ë^0§êËMx’È0ƒ1Mà§~ê“pÝÿ=¶X-•J‰+ÑI £iB‚ÉÃIXÉ[%M‹ú¼aÏ ï'ýùIT¢ð^¬Ql{pŽjÔÃeýB7I4”JÝH;vò/.“ŠLÂu…­Їm/º(J׃Úh UÛtü¤êÒLC*(³mq/Ž«D­|ö³ŸÅ7|Ãð©OUÃJÞÑçÕ"h4=b5NRá¾4 ¥Ãè¬ûûû¨V«áÏþþþÄž­V ív{êÇ¥ Û¥…C#§ˆ`?…gÜ!8+û>.½­$æ}6Ì"“UŸ&oûçqéÒá©§¾ͱÞKæã9€qG¨ªÚ·jÇ4ÍÏd¥Rã8=“ÊUõÌk40 Åb®ëÄÄ¡IMR¾ê¤bç¤8 ¶mC3¬O*…›G1¼i=ìIDÅ$!ËÃóF5­ßE¡ÂI·K4OP’º½%K%ž™T°^b9.p—™e´qEûI½¬¶-ìcÔš_¹<8Wö`HTá Ûò¼Þy×íŠÎø½AáÀqhˆÚÒˆ÷vßìû>>üá5¼øÅ/Ç·~ë?ñ=Žžñ¢|¾ï+Rïz:á‹/öæîî.r¹,ËšjkµZÅîînÏkkkkØÙÙ´Ûm†V«uô%§…Jƒ,ws,+]h°ÈKòÃÌðÌbÝ 8æJÚ¤ÌÛ¾ å¯ÓëŸÅ,‹nãÇ…Â e9À{ß[Ä]w©J©<—Q(l2î9u]71V×u˜¦ ø £ÉG\Œ†Û¶C1™vâ@ǵ¯ªª88?·#ê56Žëú˜§Ý¶‹Ñ‚@ªüSú;ú¾ßÆ>ºMMÎã“íIJ„躘”‹ýï÷¼þ^ ñ< ÁD§ÕÆ]·Û3wR(ÚRQH¢ýŠE±}’!ÂK6­(ý•èe9ù¼Hû¸®‹rÙ FËår˜žQ©T i7Måû¦q"qŽ»yýþþ> ÃÀÚÚî¹çžžëºØÛÛÃùóç±³³ƒµµµ©\Ä•+W°¾¾ÞóY+++áÿ···‘Ëåpß}÷¡Óéàüùó¨V«˜V¾­m‹?¶aˆp]½b¢€‡(M B¬ÒâΨ¯5P‚¡£öõÌqtÞö}\(lÃ÷{+2 ±è6~|¿;  š¦Á0Œ0´u(GÈ4ÍžÀAwEQÐh4à8ŠÅ"jµZÏßÀ²¬Dá§ë:VWWCÁmi3Œ“¨~Oy°YI;8Ͷí8dYN\,E½^O=!¥"0Iá“FõPÞ©¢ô bUí÷8%iêÓâ5N¯Ûvº`ÒQÈQHö:ÌÓÕåro>v±8yHyThÓyAì¡Tu DJ‡/P%,¢gYdY†ªª=÷9¥~L3‚T’¤P°NcŒ¿n{{ªªbgg§OŒE_ßÞÞžÚE\»v ù|kkkáypÛí6ö÷÷±±±@ÜDëëëS­  ”°…æ ƒ*%2•iæxNÊý!nÅ”ûO™yÛ÷q¡É²eÎÓ`N'‹nã“@á]²Ü Œ ?I’Æö°RëUUGVä¢iêõz‚ ôTË7¥Ðã´çY©TfîùT%S¡¿§Ñ¶ Ó4ñ¢mO.9Îâ†, Ï¿£È…I¡ç•mwçmºÞ=¦ïϦ%Í¢qZm\×»žÑA=Få§ZVoÝ›AC–pnu?ŸÝŸq JïÑÐÚhx0UâÎåîÆw|Ç‹ðò—¿¶mcuu5Œz°, ·Þz+ÇA­VC­Vë{Æhš6u}ÍcMJû—ëÚí6î¹çž¡;mll Ýn‡áÇ…rc“re¯\¹=!Èù|~jqô¾ß»rBßߨ‰y.÷Iîµ:MãÜ”ïšÆÁíÅœR0OûžT!X’&ëÉ,?‹nãã`Û"'i¢ÍM*21ŒhŸÕ¨`M»M«æ´ŠnÛöÐV2”g˜ÔÒfž¨ªšï*pºl; Eýê¯Þv쨚RiøÄ¼Rézº¢"À²&”†ÑÛEC"El£ÕãVˆ]N«“MÈòð9¿¦ OhÜVâkwI¢7z÷«Õ„7ԲĿ£"æ)œzšR˲`šÝÅѮƄmÛh6›ø…_ø6|îs/Å[Þò7‚FµZ •JõzW¯^E­V;ѱ?Úk›æ•Çáº\.‡\.7t'2ÜN§3µ ¡øùN§ƒÍÍMìííÀЛbØçö³Ÿ…çy¨V«C?W–“ó!Èñ‘|„Ç1®'N…ð°Ê)ßK!Äi ‚Kºûûû¸ÿþûñÑ~tf_Ç´í>ýéOŸh14Î_]\ªÕ*üñ™~ƼÆð“"„Põ<‰“%¨Á³áha¥èƒ}œHžJ¥¶U‰Dñ ‚NóBUÕ± 5ÀÞÞî¿ÿ~<ñÄ39§e¿ÇŲ,ü«õï§;¨Ø "è6¡â—Ñ}'uòDEvRš—ª ±°Ar4~ÏR$.ûøÇu»‹!²<8,—Š2)JïbJR!® è_`¡èB’„€q$6é™AÕÖmÛÃò¢vbqT8Ý4MØ8µ>‹:Ò¨@Z–%éúè¹CsðqÇïë¯]»v¢'Ýn·±±±»ï¾;Œ™Ïår¨V«X__zC\»v­'Î>ÊM7Ý„¼à8wîÜÐÏÔ”·^\Ñ >þñs8ð |ˆðÛ«)÷§*¾ Òåšúè [À¨¹ Õþ‘(tÏž=‹;ï¼ûØÇ¦þUÌʾàæ›oƹsçpöìÙ©Ÿ71° fþœ;w=ôÐLŽ=ï1ü¤ýè†ç<Å#n¨²nš{ÈuÝž0;Y–á8ÎXbR’¤°×jѧilÛΔ`„|>k×®M}B¿èã÷¤X–ÈÅ~òÉ#ÆïSgØ{M³[%U’z+¦w‘Ô4)T±sRiü¾å–[¦~ìÓ2~ÇT97Šï‹UvD^V×M~/õ9çs‚ @©T Ÿ…B!\LŒ×$ (z‡D´({ü"F'AR¯nšƒ;~_×étF¾‰V‡ ÆiÉårØÚÚê9Ý$ûûûC«óßtÓM¸ýöÛG†Ô—‹J›½ù°»û?ñå_þå<ã…ߺ“EçBˆ[ãyrü¹r¹î¼óN<÷¹ÏúW1+ûÄ„gmmmä~Ç!š¿ºEO-kkkxö³Ÿ=“cÏ{ ? È 4L¬F‹$i©JoôýªªÂ4ͱŤ¨ê¨ &(,xÑ£…òù<î¼óΩOè}üž×uþ½Tõô8y¤ÑÚIÇ™eJtˆ9Þ¢ç©Òø=9wœÓ0~'áy½)NTø+Šmw_“$áÜ*ß²Ü+X=o¸í‘UÓ´0\·ÙlBÓ44›Í‘ã²$‰s±íã÷4=)dYî[$¥9ø¸ã÷uù|†a \Uét:¨V«=IÙÇa? = ÈË»²²®>FEt»ÝžÊ ŸvÕM–…1“Çõæ›ÿ¿xÛÛ¦½th^ÌqrÑûrÔ:€‡n¾ë¨Ï0ѵÆU>&ó´ïãB=!EAœŽòþÌø,²§EQz+;&‘@UGA£PéÿIÄdÚª®”÷Ê‹¯ÉœÛŽâyÊårØ7• GÙþ0\WÌ¡H<Æ™¤EMZFµaN›¦è©[,vÛËĉ Ѹw”*[d‰æ±:Nò¼I´‘)£X,Â0ŒžÅEI’ ªêÈ1™ªüvëî,†`%ïñ4ž9×]¸pívçÏŸÇÞÞ^h¤ív;liÓn·§VÎzeeÛÛÛ=œvww‘Ïç‘Ïç‘Ëå°¶¶ÖsC¹®Û×l…÷ÁbÀy­6<Ï[Žpà"„ › Ò V¸Ä¨Wò®£Å­{ß X˜§}OJ0M…B!,wžö^aN‹hãi¡ ‘ñ•óA$yXÓä±Ú¶Ý÷L‘e•Jeæ“F£±y°Ì¶'‚P¬’=LcÜ‚n;4UíÍOV'dP¯Êˆ”œËnãV©ˆœåFCü$…›G×Éö£Cº,§[¼)D½Ûî¼þK!÷k ­­TÄg,Š`¥ÐçiDõ\ŸÏç±³³ƒjµšØºfmm [[[Sñ®"”gccçÏŸÇÚÚZX‘,Ú”xkk ›››ØßßG§ÓÁÊÊ î¾ûîc6­ü¥û’\®…ŸýÙÏâ­oýÃå˜À˜¾ÇÑ`A䋦¬ÑüUbÔý]‘•ŽŽ1h2\T ,ÔôüûŸÜ>åïóµïI)‹PUµ'yŸz°2LœE´ñ4ضX!O²˜´jœ”¯“ô¾A­\fÝB{W‡°¬¶D¹\†¦i=IšœOšÃ꺽ó$]^VÛâÕófïaæÄ2ý,³Aú1\U{Ãwg2Ï|¥"Äcð<ž§‹pÄ=ª“BVº¿)‡U’¤©ˆëëaÀ;;;aëšV«Õ³ÚB±íÓŠMßÚÚÂúúzx£Ä›Ïçñîw¿;\šÖçŽ3X:Žƒ ¸gÎü>ô¡ëðÕ_=•S˜/ML¿7)å—¦g9謄Mº÷â)õ8‹5>pën¾gÊßÅó²ïI(—Ëxõ«ß]ÿÉP .Åâ 3SÉÆÓ¢ëãO¦ãaZM÷óÎ6ËhÛqHœÄHÈœ©ÐÌ 0JÏK.,¬²,¼\à ×0'Ï2ÛxÚ5?ʺBwTË™8A X,ö2×uÃg@£Ñ˜š¨T%¼oE¬Fc* ¥×G¡7q×m«ÕÂææfªÜœ´Œj§³²²2õ›dœÊq¶mãÝï®Á¶¯°Døi{XÇu¸ ïq ÄjÒd± Q8Í÷ïýè±÷ÇÓ» sn¶9Éó°ïq±, A@Qj(—»eÞ}ŸCª˜Ñ,‚§Áuûûs§ 'Å6‘ &ðisN™ù°,¶Ç÷r9€ï‹ÞQ(ÂáE¶S ¿?I”J‹Õ¬±Œ6>Ž)Jׯ)”w\MešæDm¹&»6)‹LLë\¯›÷…œ$ã ˜´‚AͨgE-4 ÊèzCgy àäø9hÜÞF‰í¯Ù×EÎjÒwêO¾öÉ~ÙÇó<8ŽƒZ­]wtµ‘‹V0§ÛžlF,öô¨Ã /Y–5•ð0†™„¿ø‹¿Å#\B½^ï[l‰öGU”Þü¾8ºÞ[l†àèfžŒÛÙ€æ<£Ú—%aÛ6|ß?±JPÏ¢ ÖiqªkZƒŒVp”¤ò6y¯Êî$Ç'2­Ïñ „pô÷a8HÎWU08Ä7>È,Xí„ãÇGägrÏLéKXL\×…¦iá¤E×»1t+s:ˆöÝÿ½Éé°°`ÇqX°2sã÷~ïGð/þÅÇáºJŸ UÕ®Øæa­TÄ…ùFáüQf^PXï8P ‹ãyŠÅ"œØ]×…eY¨§œö(Šr”'Ë‚u©‰zFáyÞÉ÷©s1;kôÁ“¶š¯ÑS½âqT”›‹Á–xBž[Ò¾*úŬ2`¿SÙxˆ’ïº>þ`Ï0‹ŽeM^fPèï ÂK¶m§j_À0³Àqø¾_üÅŸ„¢årïöhš¢Œž/U*ýÇ`˜y1i;£ÕÕÁÛ¨0™mÛ(‹°m…B!«'=–“`]´Öiqj«ç¥/©>—Æê³Œ*ˆzW+H'؆‰Ë¤ãƒ=ŸÑý- ÅűstI$ «$ï*]‡ûýçÒP­ÐEC¸4Ml_êõ s,ÈC4‰`VXI–åÄÖ6–eH`†‰CK¿ôKï &öª*Æ{Š2ˆzK£i¢ˆVâÿŠÒõP¹®¯S,sÂ0cáyã Öaã?å§êºŽF£Ã0àû>êõúÜZƒÉ²|Ô‹õô‰U¸~sssäNÎl Ôœi .ÑDãÄWÁ5ÌÎÃ÷„ŽÂ?ú&0“òQ-« ؘ(Ïï\ Öñ†¬IûS!&š'jGç·?ñ·¹°ø¾ð(}õW·ÂÁ–&.€¸ËeÎAZ8|_Ì"O0—fÑ¡Â1“~e¾ïœ°¨ªÚÓ"B|žð®žÖ‰3_J¥~üÇÿ~ã7n_3 US*‰ùQ<$©bj¼¾M“dY,xò³ƒ™Ž3þx>¨® Õøˆ&SUõäY1ÈÃzZ>¯O³Ó"V ‹“¶àÒ\¼«$(gåùóÑyI•tãP®)…ÏJA¿âç£Hý“Ƹ¾àè;b?Î0¯©‚^O­&Îõ¥.àéç=ØÚšâm¨úï¯üÊÇñ]ßÕoã²Ü­Ì,²Ü]8¥²qÑ´ãõƒ %ø-ŒlÛ>ñ|'†DX£,Ë8{öÛ{¥$u[9%U†OZ³TÜšª 3Ì<˜Fgß÷ÚYaAÊ,¦oœæ…Ïëwvvæ}™‚ŠÑœ(„8æUŒ’FtFñÑõ°’%XëèzJ}_ÄEÕL¼=ã÷=|Ùêâ5ª}/)£?ó}°ÿ‹ˆëøì÷Ú¹áç§D®‰Ž[.‹§l|™-8ú±¨"vé©öÏðÈË^†oçï´àP¼ßú­N¸(S,²H…ã[=©/bRè° AµšøcF*§ÜôÌé.,–Ĥ-lâxž7tÒP¯×Q*•Â0²Ó<É`æ‡y”R«Õ`šƒ'õI¯Ó㛽¦LÖ‰÷ÿ„r¹zQ ÃȤX²áé§&‡µX\õ.Ê\<¬„øUäˆ01¸ QMˆ§O¡ ñ°y²á •Ñ-bD ×¢anbíftàÿóÿ~é¿ q[(ñY*?õÉÁžWÏBóá‡=#Õ‡Ï ïï­¨LaÔ…B7ñ&zžêѾô’$žøžï™ðµ¸WéºëÞv”g7ï3š2Ž“þFÄ~¥’¸7NÒ3é8ÝE QÐý±º*þµíäJ'’$Dk¹ÞßõÁžÜ5-®;Ùû¨j”aTQ4›M8ŽÛ¶Om3?lÛ†ëºaÛ ßoR¯ºjYã· a˜YÍ»V”ã=ƒ @0 #ó…ñ Ã8µ‹Ÿ×Åóp:ö÷—3Áo” έò–ûwèIý›fNNžœÕU1ñ¥´a—øÛ<÷WºŸAs´JE‹*1$…½æ5ÀןE!j5ñ™_òÁ^ïªç‰Iõêªx* ð–sÀù-ÀW‡œã?ÜûGâüJG¯Õëâk5ñ”u]ñ9ž'ö3Ž~’ºŸDåGßð wÁ÷—¬gžmwmiÔ¬*º‹*Š"l5í"Uô Ls°HN*½lÂ^ÓV‚Ó4Ñ/¢Ñï“$qîñ÷ÒSû(¶ûW^÷º“ùî]¯7ÐmmP.—ÃP`ÏóFNj$IB£Ñ@£ÑÈôˆY>|߇mÛ=¶7îxOáÂÄ$EmfVD»Š2z~?Œ¹8ª˜±éËa½÷Þ{±»»;°ñù¢’¦¥Í\Œ– ò|’È«7„X,ŽØ—¼0Ÿ|/ðž×/qw»9à³LS= U ÃhÑ µ5M RO|©ÑÙŸ Àº¹w©KQ€_Ž=%]WLª›Íî(c£›çPz±Ïñ<àc¿ ¬ý¼8?z ’%OSéHÉÖëb?ž/ÂqÏs‘Ë}C¶Ú=„]4âš\$„Ès1óŠÚߨc»n÷}qÅ#ËÝØjêŸEûkÚ`…T¯÷FØv#ä&-%Ñ'ΰ`¤±wÏóBaêû~ØÂ€zï‘Ç*m•ÈÓØ/™/år¹'¬1mÑÉ(ÑÞ”4 ²)3YaØ£uTá=ꜚK* 36©Š.-ik×uO>t‹0¤“‰+ZBr&Á2zsS ò4Õ뀬aûòØ>ñy.yœ>û" Öì†úÒgšìþ!PpÕî–Œ_0!<›ÑkJúŽå”I’¸*chšÀ[ÀÕ/ç—ôç"¯=q9®KNg†¶3˜ûíû¤¤Åÿ“JSÆ ‚^¡I‹D­ÖŸs t½dÃŽï8â‡ÊbÆX¢‚IÈ‹Ÿq$Iܧ¦)þi®=þ™ÌX˜æðÔa۶ʾ€˜Ü4 Ȳ Y–Q(¼T†É"Žã@’¤ž xRi‡4XVwý‡&KL2w±, öQdÒAäùìº.jƒ*Š1™áTÖ´Îbªê8SȵE£€®X5!„ä²g•  4º­Y’+Ð;±NšK¾óW€ÿ5ÂØPMð6qÌèW?þ·W ÚOt´¦!ÈÓ>(‹Gïj*][,v3ëð@BäZˆˆ~w§ìAâëªÕº+â² ¼úÕŸ€ïK$ µšpDNÝ܃@x÷+•Ñ«DŽ#î]‹£N†ò>ã5Žªv=©Ñ§ZšW¯§¦‰ÏHë–ˆÇÏ¥…W˜™ãºÃÓ›I¬ á%`YÖ©Í!b² õ[m$Œ)“ŒõA ÞÇsy&+˜¦°Éqk(•J$ Íf¦i¶mèº~2ó~f*œŠ¢K4ècæáÀTà%zï´h/SBÀšfra•@nä®%-òëúè0GÀ·}›˜ØëºðU*â<⃀„^¬Žáíi¢ß³ñ*+Hö.SN,åèQÁ'ž3&R,Š?kÔæeøã?~{ϪûÈ9wšüÊ(ž'Ä0:”*x4éĪiv«à¦™=U*“ÍÐ$©{?°(Y*|ðÐHÅ‘F囆ǧâÜ–eA×õ¾•aiöèTسÊd ÊÌ˲ ( êõ:$I‚a¡§Õu]¬ ©ð°¦Í_ÛhÓ6¢j¹†Ñ?úSU^øÄ'€{îF·@RÏIøž¯¬ð+ç€ßzð¾%ybî(ä¼ùü õ½¯%=˜dt <Ñ¿Ãô0õ”„wuDTdÏç óÄJRo(1·4L¤\î®CÐ]’€K—Ú}a/C{å²x2HÒxK™õº°ÇÕÕäûÃ÷űÉs>jq…BÜ)d6me›LŒA¤î»µd˜¬@}$Rg²`†É45N%ê«ÚŒ¤ÜPЇëº¼@\›››á Nñ׈EíÙšf…ÐuÝÄ0š¡˜æhïÅgÖëý³¥hH¯eÿù÷€Û÷€ÊÝÉÇò77êÀ¿ûqaƒ&æWVÑ­þKXHÎõ”ÑíK·a9¡.„÷ÓFr%affض07{ÉdÀ÷|Ï øùŸO™ÌKÞýFC4í=zÔj¦ãÛNWª• €QÈ, Pæ˜x^rú|c……†eb˜¬P.—N¼ë¼À;9ñÔ’i@¹ÝÕXLRøË²,hšÖ·©ë:,Ëš_w,àyÝš7 Àõ}/®­­Íû¼N* 1²õµJ¡AFÓÄ 3hu†Äj¥’¼´ÿ¾OÏú°úâXþ;@iÈ9Pø°bˆs ÊºI8Hï…¤\Ó¤±“ú˜Òç_I KHצ‡™¾ß»8£iâYûðÃǿ؆žfå†zƒ’MO:«ªãO˜´Ëüaø7—wf¦Ã `ÇqÆòšJ’Ämjñ…RøÒqûJ0Ç‚zJkâˆÇc`Û¶GšҳsÔ3Žæ…Tž½{©qÝñ4~’w•PU5³§ª7"I]G ró£âù?>Ö{®ßÚÚ:Ñ“œäôhíe.ÕáO”®Û›tÏRxíˆ/,Zµ­/ÅïV²¢r²Ô+K’ƯŒÂLL¹\NŒ£êð ìû"•äààä#jÈã©iÉÅ.ËåÑUÜu]ÜÑRäT"zïP‘ ]Ç-—Y´¦€‚ZÆY£œîA Œ•…¸1fD¹Üí D69 ÏëvMu> çGôù¶—þÔOá}×wuʧ"‡Õó†yª =Äã,UM¡½aïO½Àkz_ô|O•ƒÆ¸Aá½-D^ªƒäjÂá!¤ÓÎ=Nñ0O’"ÚƒxäòxÛÏÝèxЦ‰4 ÁnØÒý01Ë03@’’35†µõð}¹Šn”Jâ¡G‚S–»÷!‰Ò8²,îkªŽ-¾ñš$%?ÇÝß$bÃêóÌ,¡6LIa–µ b•N–"s¦)à¢ÍAi_TЯTûAW *Šøå…’eqÞ…HaLÛîÿÌèŠZ­&>»PàrÌ#§ _°, ®ë&zW‰¥÷ÇÁ²zë¨jr¼µçu½°ÀQkÍØ õ„§¹cÒ*…‡í9]<²»‹O·ZcöõÐjµ°»»‹ äóùD/m˜Œ1*55UþR¹œ| ZUKº“‚Qm=Ð/=\*è÷˜&iwȶ($jËBx˜F1>›µÇBò7×Cný.€o¾cšÁÛ¶ÅWb˜ Ai׎Ó/XEˆSÓ4ÃKŽã,Ƥ…úSo`šð*J¿"™$)I|&yUÓ H~&Òd†`/ì±±mõ„¿9}Í‹`ÞáâÈÁð²Æ{mO ‰U]¼xJ• é>Jòp†8+MГ¾\jÃGÿ+¾°Ðh¤/àyŠ¡uƒ$lÛSûEišÐ4m¨X´Ø?ÍÅ|ŠHÓ%è_ÁMZ(¡¶ƒñ¹ž+€C‹£”þ˜Ôìœ<¹Ž#l½ÙÄßïï}™×“w1—Ëaee%ܰµµ…••¸®‹V«…w¿ûÝÓûr3ÄŸýÜÏá÷ M Ķ…q% (Š"þ`N5…I7™‡dÁ:ˆ¤±l8M³¢JÕ|ŒÎKå"J™'ž^Mxž ÷æŸÆ·<ò¥)XGAb5Mu_†™º.æ¼ñE`ÑžWx>J¥ÆìÛ™M*@¦iÉñ΋DÜÛëºÝð1ba\‚óÇó3®ë¶mT*ø¾ÏóP¯×³·I7$Ù÷´ž;”~ íÕõn¤‹ãô/bŽZˆ¤gNRg†øyÓ}«ªÝÅŸA×vÌÅÉë···Ãäã(ù|kkkX__‡a¸÷Þ{±ˆù®¾/lcÐÀýž'žÀ<ïyƒËYVrXG2Àøc˜çRBrÞ¨ !@éï(bpÈ®‹^ÁšöÐ Dsº(½SŒë&?ó|ßÇ3oþ'ø–G™|—Ú2QH‹U&ƒP1vJÓüÚ¯ýiüøß MÓŽ†ç lûQ.—Q.—³ßÒÀqºyåËpßQŽ+¡iÝðá4ÐÌóðu¿û»øø˜9P‹NÜñhÛv_ј4NÅÌ-jDÞ̸`¥ÉåÒýa´ô;-†ÆðI2 Ñ4pCÚ¹AÝðâÓpÏóP.—³)P£P +ì&™‘s,jó²,"€îJU±Ø­%0I}]µKQÆ_¨“ë:î¹çž¡;ÝsÏ=ØÝÝE»ÝžéÉÌ‚Q=Xÿøý/Üpþ|7ç J¡ÐÍefX•JÿJ„!½mPˆ-µ‘!†ya%ôi*ñÅèH3xc›9q5x÷}ùóçÅÆ$ºýoA+ÊË2if–’h`чX„†ÑïÑjÀ$R3ç]?°Tuùï»h!¨¤¿‡iŠŸRI<“’Ù>ü®wáÓwÜ1ï³?Qn½µë ‚®ëöU/—‡;3A´¸Wœ$e±(þþ²,V¥šÍî>…Bï{)w{”X¸­Ì‚äL‚ låtâb5Z!= ñ‚­Tr¶-B…h1ªú»±)R¡Ñÿ [EF=þ×çr9är¹ž×ÖÖzƒiû•+Wúö]t‚ Àmo{[WœF'“†_y¢Ó@¿”ôº>â÷( „y‰ç4Ìdxž'&4Š"l<¾jF+r£ì [ÆœFú'4¢¤ù×|ÍOÁ4MT* ½¹~#=«ž'~¥ƒ§‡âñã+æIù¥”`=†H®í-ÄBÿ_f¡šZl‹WÒš j‘¡èV@Ì[E,Àã‘ôþÜ[*Ž*NDQkãäñEï‰øë Z[…éÇ4»¬Ñ[=nNA X,B×õù,:Fs”“ì2 ¦sD÷£¼ +~ã‹44o+•ÄØçºÝ×N)×_»v­ïÅyŸ×Ô* ˆÉ|˜û!œúû‚LBÔ+Zp°‰á•yá=u ´È“åÕTfæ ÊWê±qMë}ŸESt†9¢•ò]·×¬=ÏÃüÀ{ñ“?y …Ÿ€ªª‰¹~}P‹ªÚ$ˤúo°¨HÕõtϋ̻¾2ˆ,ó‚ºk'š&LÒ²|4zO7¡¹C!»-†RÏQÊ/MÊư‹;Åøe„ºEuhôöŠÕT½å§M´_©ÔCª› Z­TÄófX>4…Ù‹âÿ§üÙq}§ÓÁþþ>ÖÖÖî´´’õº.ƒÆº?~Ï{ðO¿ò+Å/ºžœ71 º"•ú’F’zªF1!o£E­Œt–YZ(·>Nx#Ù7<é·7ÃÌ™ è:6]W¬ÆÇ[xžMÓ It]G©TíQµínÅ÷4êS>y`NUíÎkņï‹Åȉ…ªi q9ªÐõÅŸ¦)nFjtOä£'M¡¼¦›µ0|&3Ð…üG’Ô¿ž^.—¡ªj:±:¨6ͤP5^z>D{‹idÒ: &)ⵤ\—ÏçQ­VÑP)·Óé Z­bmm ù|~Þç;6Tq9‰Ûööðæ«WÅ/T½pP^EZlôŠS":­ç¤0\`ºb5Í¢²‚ÞœWæÔ1ÈÆÿò½ïÅO Ë;§b'¼BÍ,årtTÜŒ=Ï CŠÀ¦i}Åizˆ7JÓ‹˜aNU%Mhle“Ï6/Ëér騷ïQN8€îŠ‘¢ˆÅ~êe}©ª(C÷‹UfÑ”PªïÒbš&$IB%mÄE©4Ú¾ÇÁ²zO*Éžiñ†™:×_¸p›››8þ<Ö××{DéåË—±··¸pá¼Ïu"† ÖgýÍß w×]ݦáiŠ·Q <®='…á­dkš±#¥N=Aìîî¢Z­öí°¾¾Ž…-¶4ȶ=Ïùg=«w¦Ü$B FÇlýÕ{=  6¾?+sêIê¿ Ï98ÀÍT Žëö‡o1LÆ1Íþù%\×U#£I®@·:50çÉ5³ 8ðßÙeYh6›Ç«ž×ÛF†ÚòE'ꔇGn."V=öž2Çd QEàz½>žíËr²}G¡–ž”< ˆ}éYAÏ Í;×¢ ð… °µµ…V«nÌçó ›·J 𓏮‹»oº)ÝA, ï©JhHš2„×T:úw”.f§3ÑvQÎ|ìcxÑ7sò›¨0F&*u0Ìh¨ę̀ùJ(X‹ÅîÎôï ªŽ “aT¸|ùa^ö±P”Þž¤ÕÎI #1Ì1å[.—aÆdík CtH â_Ã>¸ÑaÄTÉ‹½eŠë£¿¬¬¬ -¾´hP…àĪӮ‹ÒääºèVý6v+|¤£W°²÷”™ÃRPÿÉÕ«X$X ƒ‹-1 EZ§¨ïû"_•&Ü”«Í fAÑ4 Pp&Ë[·[Š“4‰çg“¬#hª"KÔß7º/Un2Íîÿ=H$IˆVÊUe2Åuó>Yâ8ƒ{û>ëÆ”föã@ä‰[”÷‡l×"ï—ÁTfj(JrÕô?ü0>sË-ó>=†™”b4 ÏóÄJ|Ô»Êb•YP\¸té‘£ŠÀcFxžˆ44b˜ `šÉõN}߇mÛ£+¼;Žð¢–JÝ2òQ¨µ*NÊ$×ÿÙeP_jÛ¶ñš³gGOb|k B¸ªHö²z]ü¨ !Vyá’™2IãêÕF×¾â+æ}j 35êõÑsˆ ÆŸÔ3L†)—sçþÛð*×Ixž˜À×ëÏdšAÝõÊå2*•Êð0xR»ñ~¨Q$‰Ã{—€¥¬Iz4X–…Gï¸ct|™®Àô Ây“¼£iž#,V™`šÉãð§Þ÷><÷öÛç}z 35Ò̹ÃüU†YLxÓ›¾€?þã÷CÓ¶ïÝrÙ®ÛmÓT«qq1&sPaÞ¨iŠìÞQ} ×u!IÒð…*’4L¬2KÃÒ Vß?qÑjY4M×Èòp ¼ªG¿ë×(&Ò‡ø²Xef€ãô Ö `š&žõùÏã?þØÍûôf*˜¦H)J¬Gp´éû><ÏÃû_ûÚyŸ.Û ãûÖÖoá˾L½³e‰©§ü¨0J†™®Ûít½«æQŸ_ò¨ ¶,¡x5=§§ˆ¥¬ž×_g#8ŽƒfÚêwÑú2zóXmˆpá1ZB1Ì´‰Öàð}¥R ªª¢²¿?ïSc˜©_˜‰BÍä ÀB?Ã,”zG‹2¦ lm=jõ§F÷žŒ÷vb˜ BE"“Ú^û¾×uqpp0ú@šÆ…ÁN!K-X{"aŽðj×]t:Ý ÚÎDXZÁm=öÉóoð_¿øE¼ë÷¿[íW…¬îÑÿ ½¡ÀQˆŠÁ¬ ˜ @)ð<<ýG„ÿ÷»Þ5ïSb˜©S.'ÏY<σïûÝ<'ÇI×÷†a2HÔa*IyäÞÞP`Ëî)†Y@dypkÓ_¿x EËýO™,¥`æüíý÷#p¼âÝï¡Àº‚³Ñ?õ"D8M%_ž1B’€«ºŽ_yÉK`q¯Ifɰíîÿƒ @¹\†,ËP¶mw½«ÀàÞ “q¢k-žçÁ4M4n;ZäPHfqœÁ-Pƒ À¿þë¸íG”Å*3”ëæ}£hµZh·Ûc½'Œ <ùƒ?ˆËßÿýPUµ¿Ê¯ á]-@ôIe˜fûÄÜ\QØ6úÈGPvœy_ Ã$2©bžN´\.CQ(Š×u!˲×fæÆqì6Ni{Oÿâ/âWÿå¿D½^ïí'½æ9®}"?[’D ‡ã8ð}?ìhP(ðœÿøÙ¾™‘dÖÃÚn·aZ­@UUX–•ê½TÕ½ý=߃Æsžƒ¸xQ´ qÄr·Qæ…æ9Ž}bn.ßÐÆÿþ÷ÿ7¿ßÊ+ïLÆ8®»®˜äÈ2Â÷‘Gµ/WÛu9˜9QŽkß„mõŸx(làÁO~¯ÙÜìí%lYbBÃ}&™dZömYbh6MáúÂ'>ÿ¯ÿ ÿý¯þ º®£Ùlv# f™õ°noo#—ËÁó<<øàƒhµZ¨V«©Þ+Ë€êÛh}èCÈÿÄ®ð ýb…“X¬2'Ìqì9Ú_ûž·ãžnÀOüÂ/Ìûr¦ãÚ¸ãPÅT¶mîÉ+ Væ9®}"?[ýGáŽúV‡‡¸ôm߆ï|ÛÛzw’$ö>1'Î4ìÛóĂ̗ù½¢Þ€ªâ'ßóì|ÿ÷ãêÕ«¨T*,V™ÔdR°¶Ûmìïïccc°²²‚õõõѽÈßǵûsÜöì÷㟾ýŸŠ¼T®Ïd„cÛ7ïR¸xÓ/ýøLæ8®û¾pšÊ²‡R©„z½>ÜÎuÙccx¹ àþ?ôw?Œµg= ÓL^”Ñuö®2'ÊTæ(žhcóß¹‡îlã}_ü¢P¯çª2‘IÁzåÊ@>Ÿ_Ëçó©ãè; þàóÿ/ù…¼(¨´  ïã®feV«…½½½yŸÆÜ9®}À]_õëx×+_¹Ðml–Ñ–íž”ãÚ¸çªê†bUDlY©‹âÇóæ}™#Y6{XÆ{vŽkßæ[¿€¿¯×ñÚÏ/¾ïæ›ñÛøÀÂåËh Ëv¿NÊqíÛ÷7¿ù°ñìÄw¼óñK·ßŽ/ù±Ü„5£,›=,ú=›ÉÖa7E§ÓÁÊÊJßëO>ù$þçÿüŸ¸÷Þ{qË-·à÷ïþžÿ‚U`ÞW39=ôÎ;7ïÓ˜>ú(}ôQär¹¹žÇg?ûY<üðÃxòÉ'çòù“Ø7÷½÷Þ‹Ûn» ß²ÿSÿ?±¿¿¸ž{˜&Y¹g}ôQ<ñÄsûüãŽá7þå_¢fÿ:/})ßò\]YÁSwÞ‰§^óƒÏ|æ3s¹–IÉŠ-L“¬Ü¯4~·Ûí¹|¿Ç¿Ÿûñã?_ùM<ûæ/Áu¿üËø3º†Œ×q²bÓ"+÷,ÍÁÇ¿3)X;ÑÆÁ1®]»–x³\wÝu¸zõ*|ðA(Š‚—½ìe¸|ùò¼/åXÜzë­  Q®]»s¿¦'žxó7ƒg?ûÙsùüIìn¼ñF<øàƒxÁ ^€×¾öµa1„E%+ö0M²rÏzž‡ø‡˜ÛçOc ¯^¼ØÿæÃC`ì>+ö0-²rÏ~ô£Å“O>‰¯üʯœËçObßÑñû†ïûZÜöÜoÆáááÜ¿ËIÉŠ-L“¬Ü¯4~ò“Ÿœ‹¸˜Æøýþ%®{îsv[ü, Y±‡i‘•{–æàãŽß™¬Ñ0„8ƒnÞ7¼á xÃÞ0ïSg˜‘Lbßpï½÷ÎûÔ&<†3ËÌ$öÍã7³(ðøÍd‘Læ°ž={@oX¼B#fÚ°}3ËÛ8³Ì°}3Ë Û7“E2)Xs¹ÖÖÖz’ƒ]×E±Xœ÷©1̱aûf–¶qf™aûf–¶o&‹œ9<<<œ÷I$Ñjµ°¹¹‰\.&yïìì Ìïc˜E‚í›YvØÆ™e†í›Yfؾ™¬‘YÁ ˆÄo*,³¶¶6ïÓa˜©ÂöÍ,;lãÌ2ÃöÍ,3lßL–È´`e†a†a†aN/ÏzûÛßþöyŸÄ²³¿¿3gÎ ¥hµZø»¿û»Äíö¥Ù> :|ßÇóž÷¼‰Î)‹×ÄLÎqì{ÛgÁ0_ÄëaŽÇ0_D{à1œ!NÛø=Ꜳx=Ìñàñ;û×”ŠCff¼ë]ï:ü¦oú¦ÃW¼â‡¯xÅ+ßò–·~îsŸ ·_¹råð»¿û»Ãí?ú£?šj[ší³àsŸûÜáþ膟ùÝßý݇ù—9µsžÇ51“sûžÆöY0ÌÆñz˜ã1ÌÆÑx gˆÓ6~:§,^sø`ø±½½\.noµZáöaÛÒlŸÕjív>ø <ÏC.—ÃîînêsÊâ51“q\ûžÆöY0ÌÆñz˜Éeã‹h<†3Àé¿GS¯‡™¿ãšÆbÞŠyYy衇_ñŠWô¼vñâÅ÷¼å-‡‡‡b%ã¯xEÏêÈù/ÿåðMozÓÐm£Þ;+>÷¹Ïõ}æ•+W/^¼˜êœ²xMÌäǾ§±} ³ñE¼æx ³ñE´Ãâ´ß£Î)‹×ÿ³Mãrý¼ó²²¶¶Ïóz^»rå n¹å–ðÿÏçÃíù|ív{è¶QïT).ŸÏ£Õj¡Óé ŸÏckk+Õ9eñš˜É9Ž}Ocû,fãûûû w=ÌñfãËfßÓ8g¶ñÅá´ß£Î)‹×ÿ³Mã‚õØÜÜ á¾ûFppp0p[§Óú^ê—5m®]»Öw-×®]ÃÎÎÎH£NsÎó¸&f:ŒkßYµ‡a6¾ˆ×ÃL¸»®;p߬ÚáL§aüeßYœs1ÓƒÇïl^Ó¸pë p÷Ýwcccpï½÷0ˆÏ~ö³·]»vmè{ɨ§MtuçÒ¥K¸téÖ××±¹¹9òzÒœó<®‰™ãÚwVía˜/âõ0Ó#nã‹h<†3Iœ†ñ{Ô5eqÎÅL¿³yMã‚õX[[Ãúú:î¹çž0A:êvOÚ¹\nè{s¹ÜL®áܹs†bèt:ØßßyNÇÝÎd—qí;«ö0ÌÆÏœ9³p×ÃL¸/›}ó~z9 ã÷(ûÎ✋™<~góšÆ…댨V«áÊAù!pöìY½¡7ív¹\nè¶QïÃÂVVVŽ}Îó¸&frŽcßÓØ> †Ùø‹_üâ…»æx ³ñe³oÃO§müeßY¼æxðøýk¬3âܹsØßßÝøàºnøÇÏårX[[ÃÞÞ^Ïöb±8tÛ¨÷Ί|>ßWB»Z­†+3Ç=çy\39DZïilŸÃlü¿ñîz˜ã1ÌÆ—;y ?]œ¶ñ{”}gñz˜ãÁãwö¯i\ÎÎû$–•íímìííamm W®\éIDLúææ&r¹\˜Ô¼³³ƒ•••¡ÛF½wVÐgÒ*Õ8×3íL¶8Ž}Ocû,fã‹x=Ìñfã‹h<†3Äi¿GS¯‡9<~gÿšÆ댉–LOÊ“èt:á P|û°mi¶Ï‚ãžS¯‰™œãØ÷4¶Ï‚YÞ“lß‹Ç0_D{à1œ!NÛø=j{¯‡9<~gÿšÒ‚•a†a†a†É$œÃÊ0 Ã0 Ã0 Ãd¬ Ã0 Ã0 Ã0L&aÁÊ0 Ã0 Ã0 Ãd¬ Ã0 Ã0 Ã0L&aÁÊ0 Ã0 Ã0 Ãd¬ Ã0 Ã0 Ã0L&aÁÊ0 Ã0 Ã0 Ãd¬ Ã0 Ã0 Ã0L&aÁÊ0 Ã0 Ã0 Ãd¬ Ã0 Ã0 Ã0L&aÁÊ0 Ã0 Ã0 Ãd¬ Ã0 Ã0 Ã0L&aÁÊ0 Ã0 Ã0 Ãd¬ Ã0 Ã0 Ã0L&aÁÊ0 Ã0 Ã0 Ãd¬ Ã0 Ã0 Ã0L&aÁš1‚ €ëºó> †™lãÌiföÏ÷3Øî˜ÓÛúüaÁš1<ÏC±Xœ÷i0ÌÌ`gN3³°¾§˜yµ;Û¶áûþ¼O‰afB|Œe{?yX°2 Ã0 Ã0ã8Oà™SÛûÉsý¼O€é®ÔH’EQn—eªªÂu]èºp]7 SÐ4­çý¦iÂ0 ض  ( 4Mëù<Ã0&ÞŸaÒ2ÌÆƒ mN’$èºI’`Ûvhó„ã8‚º®³}3 à û?ŽíW*X–•øÌ‚–eõ=æ¸$Ù2½æ8@UÕ>¥×xÌf…Aãv{W%qlØ®'…=¬s¦P(À4MB|–J¥ží¦i²,bÂR*•›IJ¬pÿ P(`Ûvø^Ú‚ @©TB±X„çyávúìIög˜4 ³ñ °ºº.º8ކÝAÐgoDZW¶of ²ÿãÚ~¹\N\å‚Åb1œ1Ì´5_‰’d£M$L£E–(L˜a²Æ '(Š i»mÛ°, ó¾ †™ˆaö϶Ï,iæ+œëÊ,ÃÆí4ö>llgƇk†ñ}«««h6›P¾ï£X,Â0 d– §) Ðui3§¶}†a†éÂ!Á&|æÌ hšÆb•9xž‡[o½²,³Í3§ ¶}†a†éÂV†a†a†a&“\?ï˜ívþð‡qÛm·ÍûT¦Æc=†¾ð…ó>©ñôÓOãé§ŸÎÄßèé§ŸÆu×]‡oüÆoœ÷©¤æ}ï{žûÜçÎû4¦F–ìaZdéž}ì±ÇªÈáÙ'K÷ìÓO?•••…É ãñ;ûdé~}ì±Çðú׿+++ó>•Tðø}²tÏN2~/`ýó?ÿs8޳0¯4<ðÀxÝë^7ïÓ˜O<ñ>ñ‰OdâoôÄOàÚµk %Xí×~m¡ÎwY²‡i‘¥{öX(ÁÊcxöÉÒ=ûÄO@&Î% <~gŸ,ݯ<ð¾þë¿~a+ßÙ'K÷ì$ã÷ÒÖç>÷¹P[[[ó>•©²L×ÓjµÐjµ°¾¾>ïSÁþþ>._¾<ïÓ‹ç?ÿùl @VþF­VkÞ§0<†gŸ,ݳ‹6†óø½dåoÔjµËåæ}©áñ;ûdéždüžkÑ¥N§3tRÕjµÐn·çyŠse™nÈçó™¸QN ¶ïá,£=,Û=; ¶ñá,›=,ã=;¶íá,£-,Ûý: ¶ñá,›=,ú=;k§ÓÁöövØ£(ŸÏãÂ… ÈçóD,¼aᤪ*,Ëš÷wÅ0©`ûf–¶qfYaÛf–¶qf™‹‡µZ­¢ÝnãÁ„çyÈårØÝÝ ·ooo#—ËÁó<<øàƒhµZ¨V«óþ®&lß̲Ã6Î,+lÛ̲Ã6Î,"'.X;öööpáÂ…0™üž{îÁÙ³gˆ•ýý}lllVVV°¾¾ŽF£1ïïŠaFÂöÍ,;lã̲¶Í,;lãÌ¢râ!ÁbÏçÑjµÐétÏçÃXñ+W®„Û‰|>ªãè™Åí›YvØÆ™e…m›YvØÆ™EåÄëµk×›››áqíÚ5ìì쌼):ΔøfN'lß̲Ã6Î,+lÛ̲Ã6Î,*']ݹté.]º„õõulnn7Ä èFKâ±ÇÃ<‡a±··‡‹/Âó¼©{Vö ?þ8677±··7ÇoY677ñè£ÎäØ<†3ó¦Z­ââÅ‹øèG?:ÕãòøÍd¿gÑšŒÇofÞÐ|ÜñûÄë¹sçô–‹¾ûî»Ñét°¿¿ß†gXOª¾ð…xÝë^‡“¾$fÁX__Ç=÷Ü3“æÉ³²o¸ãŽ;°³³³ÐeÉ™“aggwÞyçLŽÍc83o¶¶¶pÏ=÷à+¾â+¦z\¿™,@ã÷0{›¿™yCsðqÇï¬Ã VVVz¿‰v»½P ”™Ó Û7³ì°3Ë Û6³ì°3‹Ê‰ Ö|>ßWB»Z­"—Ë…ÛÖÖÖzÂf\×E±Xœ÷wÅ0#aûf–¶qfYaÛf–¶qfQ9ñ¢K`YVO.%|[[[ØÜÜÄþþ~˜ä}÷ÝwÏû»b˜T°}3ËÛ8³¬°m3ËÛ8³ˆÌE°æóy¼ûÝï“¿×ÖÖÆÚÎ0Y†í›YvØÆ™e…m›YvØÆ™Ed.‚±òÃn‚QÛ&˰}3ËÛ8³¬°m3ËÛ8³hœx+Ã0 Ã0 Ã0 ä+Ã0 Ã0 Ã0 “IX°2 Ã0 Ã0 Ã0™„+Ã0 Ã0 Ã0 “IX°2 Ã0 Ã0 Ã0™„+Ã0 Ã0 Ã0 “I&jkS­V±¿¿V«… är9´ÛmlmmÍûz†a†a†a˜%alkµZÅÞÞÖÖÖËåˆ~M{{{ØÜÜœ÷õ0 Ã0 Ã0 Ã0KÂXÖv»ÝÝ]ììì`mm ­V  ª*r¹Ο?V«…|>?ïëb†a†a†aœ±<¬W®\¬­­õmËçóÈçóèt:ó¾&æ€çÍû,†a†a†™c Ö••H¥N'ô¸2ÌIaY@±Øö¼Ï„a†a†a˜i3–`%/êææf8m·ÛØÞÞF.—Kô¾2̬Ðu Ù‚•E+Ã0 Ã0 Ã,cW ÞÙÙiš8þ<`»»»Èçó°,kÞ×Ü2dYüÛhO+ D,Ã0 Ã0 Ã0‹ÏØ‚uee;;;hµZahðÊÊ ZbR@¹ Ôj€$÷^Ë4­+RK% ^ÿ—¤®h•$±_¾/>ÿe/»/{Ù¼¿ †a†a†a†1–`ÝßßÇææ&|ðA¨ÌDXຢP’ª¦ŸãˆÛb×÷û/‰ÖAxž´µðÂ>Ë—çým0 Ã0 Ã0 3Œ±k>ŸÇÊÊ \×Åúúú¼ÏY@ÈCêºé«ï¦)rUƒ@ü û%ëyâ}QOk¹,<²ª ìïÏû›`†a†afc Ö••lmm¡Z­¢Ýn'î³µµ5ïkb2†ï ‘©(âG’„xLK¹ T*â}iÈe¹ûš&¯ªŽçÕe†a†af¾ŒÃº½½ ØÝÝMÜ΂õô°º*Bp)§4N¹ †¬–%¼¢äaõýtŸáºbÿA9©ƒˆ†»®øi6çý1 Ã0 Ã0 3c VÏóæ}ÎL!:=/Y°‰µšØ÷lFºÏ9ŽW”¼±²,΃a†a†a˜Åb¬>¬ Cxžï´~á8ý¢£ZÏØ¶8Æ4eq® Ã0 Ã0 Ã, Ö½½=lnnBQ(Š‚ÍÍMìííÍûZ˜)B8–Jâß(TÐh÷Óu‡ Ö ,H)„˜óM†a†aæt3vHpµZÅîî.Ö××±±±èt:ØÞÞF«ÕÂ… æ}MÌ1qœnÞ§¦ oh<ìw”‡”<°ƒ¤n_Õø{d88˜÷·À0 Ã0 Ã0̼K°¶Ûmìîî²,¨1÷—ªªØÜÜÄÆÆr¹Ü¼¯k)‚ôUq§E¡ D£¦¥Ëûtá žç¨p`"ºm‹Ÿz}p'†a†a†aNc…_¹rúÄ*¬­­!ŸÏ‡û0“ãû¢Ân¡pòŸÝl ¡ÿ“וίXÿ÷¼þ<ÖQÞU‚ /yžð¶«8Ì0 Ã0 Ã0Ìéc¢ÖN§“øÚ Þ¬LzH *Š‹uÝéŠ~F¹Üm-¢Oé(H°ROU 9Ï´RIça !VËe!’OÚ›Ì0³$X–ŕՆa†aŽÁX‚umm ¹\¦iöˆÓv»ög][[›÷5-4¦)åˆJRzÁZ(Oå H’ &hš£Å¢ªvE¨¢ˆs¤×£¢5mU@ÖBaxñ&†Y4|߇iš( ‚¥R Aôí3AÀ¶m˜¦ —VŽFìÏÑ. Ã0 Ã,c{X-Ë•+Wp×]w…U‚ïºë.´Z-ìììÌûzÇ".êT”t=K-«›{šDˆj¿š&<·ºÞ©†1ú3$ip˜otþ\.nuÓÿ>7ßü}Ðõ Ý&#AÓ4që­·¢X,†?«««(—Ë$ Íf•J•JÅbA„–ö'ñiÛ6VWWQ(z<²¾ï£T*¡P(À÷}(ŠîkÛvŸ¦ó* ,X†a†Y Æ®œÏçñŽw¼úЇðÉO~°²²UU¹ØÒ11M‘Ç'I(‹¢@‘$uÛÏ4›]Z.‹ÿ“-Åÿ“<™ióF]·>­âKbr¹gàû— (Ãc‚=σeYøñ×aY*ä²e˜Œáº.,ËB$ ²,Ãó<¨ªŠƒƒH’ß÷”„›UÓ´PxAMÓP¯×Ãã–J%hš†F£îgFèU­T*¨×ë}dzm…BªªB–e¸® ß÷¡išÍ&Wlg†af)[°îíía{{kkk¡Guss/^Ä… °¾¾>ïkZX ! +á…%OªiŠÜOj ë¥×!4u}t+šQH’Âñ9¹¢P¡¥÷9|Ó7}Ót¾˜,Š}ŸVÈ“iÄââËå24MƒYé‘eybQh¤‰í IRGuúqW¢¦Û8³¬°m3ËÛ8³Œ%X[­:NX%666°··‡+W®ŒeÜÕjµ/ÿµÝnÃ0 ´Z-Âa «&tBhLÛ¶e”ª–f¾©ªB,GsOé†1Û »žçÁ4MȲ ]/á3ŸñP.ï…^¦—½ìoñÏÿù?Â7|Ãë ëç –¦èy^8Ù{§t]G¡P€ã8pF#üÌF£ P.—C,¡i´ Õøþþ>._¾<»/íˆE²ïÓ€mÛð}º®C–e8ŽÓ4C/&‰J×uA&Ä`Öag–¶mfÙag…‰ÚÚ âÚµkcí¿¿¿ÝÝݾ׷··‘Ëåày|ðA´Z-T«Õ¹~Q„®÷:ª*Â|ÓˆMUظ“FÓ¦çAM‚Â" Ã@­VÃ;ßù¼ë]o Ón·±¿¿ ¢ Óúú:I1¯' åN£­¢ãŒ×FU“Ã~§iš=-7ŠÅb˜¿…òåTUE³ÙÄÇ>ö’c}®®ëh6›=¢”B„)„x‘XDû^vhA„líààF²,‡9ÓårŽã„‹&Ì`ØÆ™e…m›™×uÃöcôcÛvXž íe¶qfÑ»èyöööBqJqî;;;XYYIuœííí°@…[1D…o>Ÿïéû:O¢=H'%š‹š%lÛsEUUEPUu`Xä´=PñÜ@*,³ˆ…dÕ¾O*ˆ%Í2¦ÌÃD¨¦ipårͬݜ„mœ9i¨"þ¬ƒض™4PS4å)ºØIõ8(º,‚°²û¼#ÆØÆ™Ec¢¶6—.]‚뺡‘¯¯¯CUÕÔbuooív–eõ… »):NêϘ6¶-<¢•Jú60ƒHªê›,ËB£Ñ€$Iá*à¼=›“æ¨Î“E´ïI ÜäIÿFî ˆ¿3µrq]7l3鱩¢4áºnªâZ­Çqr‘$Šm<ð3;þi±q&[H’ˆNš¥`eÛ>ÝPÆaÏžhª’aCÅgô8’$¡V«¡\.‡s­yÀ6Î,"#k»ÝF§Óé õå­v¼ÝÝÝaÑPã8×®]x³<öØcxàú’ǃïwÅ©ïw{˜Rñ¥IæÒA ¸Ã8¥R©g¥î$VâlÛû9óª³dooïz×»ð%_ò%S?ö¬ìüqlnnBUÕ¹¶ò}–e…UœÉ{Im†Òˆ=Ó4áºnèÉ´, «««e9,¦E‘¨ï(µ2f›tnTX‰&ÑÿC’¤….´äy"zã‰'þÏ{Þ;üÆÔ?c‘Æpfùh6E¡˜‡z·ÞzëT}Æof0a&IR_Ñ=Ïó`Û6\×…ªªa:ɸÐÜyXÿùÍÍM<ú裫÷¿™yCsðqÇïTÖÝÝ]´Ûm¬­­AUÕcÝ@{{{XYY뺡—öÊ•+¨V«X__zìaÕ‡_øÂâu¯{]bãI°má ¥Þ¨ñqÅuǬ–Õ g4w¦þ‹ªªÂó<”ËeT*•žÅZ”$i¢6®ë¢\.CUÕ°b¯mÛcµÒXdÖ×בËåfR%xVö wÜqÇÜd;T„‹ f¹\†ïû¨T*=+Ë´"M¶ª(JOØ-æŠí{Jí‰Êå2LÓL|Ð{ž‡b±ˆJ¥ÒW‰zQˆ.’¦Ù?f˜¦Xø:8è?†aªújlnþæLÎqQÆpfù(—Eñ­­-œ;wnêcø²ßÌ`H¬ÖëuȲŒb±.’’¸Óu}*Ï–J¥‚ÕÕÕ°Z=AõCvvv°¹¹9u± ðøÍÌŸIçà#k.— oV×u±··®ú€‡sçÎ Ý~öìYbˆnŽèÿO ;2M ®ãTut8¯ëŠŸ³g?…/ýÒwcoï6¼øÅ¯E³ùœï¥*¥ºH+qÑðTI ¸®¶ŠIãA"ÏV½^‡ã8( 0 ãXý%™.‹bß“0¨½ (½Aø1S;€IDAT°×u½Ç›:nTÜ.kµ …lÛîó„Ò¹-b±$ÇéVW”n«+Ç^Óø×F–ÕyiQËlãL/”›7¯ÐÅ(¾/ìžž­ÿüŸOÿ3ض—›èüÉ0ŒÐ®i!6úlk4(—Ëp]7±ðäq1 –eõàb±>óžzê©™|lãÌ¢2Vk4 Øuݰ$ö8âumm kkkáï?]•Y[[ÃÞÞ^øšëº(‹'úÅ躘:ŽXÕ·”‰N M³;Ù¬ÕÄ6Y ÷wâÛ¾-õuÀ²ª¡@ŒBÕåâyvŠ¢@Ó´°Âi©TB­Vëù®-ËB¡P=¤ŽãÀ÷ýžÜ ß÷ÃbJäÙR%¬`Çà¦Ã¢Ø÷$X–MÓF†¨K’„F£R©æùL³ˆQ£Ñ1þ¢ÏYD± ˆ1„nÁhçƒèë¦)Æ$EãLôöf&ïÓ¬Yfgz¡<ð㦈°Ñ ¿O¢§ûlõ¼ÙV¶íåeÐb=Ùx|!V’¤™FQÿù  IR˜þR©T`Û6~îç~n&ŸË6Î,*c]"¢“ÃV«×u±»»‹ .;Œakk ›››Øßß“¼ï¾ûîýb(šÑ0€b±?çÔuÅCÓqº-g¢aDZP,žA¥òîð5]×CHÉú€˜l;Žƒz½ž¸’mFØ72LÛÉK­f(|’þOa”ñBº®/tÎÞ"’û J+÷¹Ã‡z(õqzè¡Ã‹/Níóëõîÿ›ÍþíÍæá¡$õîG\½zõP–åëW¯&ûêÕ«‡†aʲ|(Ëò¡a#ÏçêÕ«‡Fcìë0 ãPӴé}7ËÀ´íe\ƵïÃÃÃ÷¼å-s9׃ƒƒCY–›I7œ©×뇒$MtoÌ“«WÅØa‡‡²,~R©$¿žtÚ/¾ï¼ì…˜÷ÎY–5M;l6›‡µš°WÃ8<¤ÛíààðP×Û)ÙyœZíðPQhŸ«©žO²Üý¿ª6͹ÙË"ß§•¬>³¢Ð\QÓ´ÃZ­Ö³mÞöÂã73K&±—TÖV«Ã0Ðn·áy„qý×NÞÕiU¿[YYé [8il»ëeMŠ€TàêÕä÷š¦Ù“G’$T*•г™&w4ÚÛk–¹Úï"3oûN Uê­T*sï—Uξ/<Ÿ”jÛ"CÓÄïA ]wð6ò°Ú6Ë=ƒ pŽë£P(àyÏ»­Ö[aY6~ñ¿_ÿõO `YVØž-Zä†+åÈÒë \@,àD=µŠ<ñÄ—ÍûkeæDa+¾ƒƒƒpQµP( T*¡R©ðs‹a&€t]‹[b^xë­·¢\.‡‘¶¶m£X,¢P( X,¢X,buuÅb±güŽR(§çË_þò±Îm¤‡µÕjamm­§Y°{T7º:¬aö2S.—€'öÌRÀ^ÕÁð¢6›Â³*ËÝÐÈSÒ˜YPÊe!ò¢ÎßÑDIE“DuêCèºÇ~ôG¯ÂuÝžvjl4MƒiŠò”Jߎ÷¿ÿ`Y/çyÐ4-|>ʲÂQà»¾ëƒø½ßSŽÞo†^]Zá.G‡#Eî¼ó'ð‘Ìû›eæmÛÐ4­çEE%£vÊ0ÌxP0UUáû~8.K’„ƒƒ¸®ŽÏš¦¡V«%F‹’°n£{Ó0Œ‰œ›#=¬+++èt:=¯QÖe©i*ROJ©{ 2LÖñ<ïÔ¬N›¦øt¿Ç>°,á塯fA;è0§ j¹–dÏŠ’M$*ñŠ~†ll|{8QY]] ««6›ÍPT*b1çï|~û·º®ãàà g<‘ežç…«ö=ô{až¡a°m;¬HNUÆ76žéKÓqøÀpÇwÌûëeæÙ_EQ__D®\¹sÞ§À,1ž¬®—.=¾wFÒØ­ëzØÅ„ ¯^½ŠZ­Ç‘‘ÔñHUÕÐK𔑂5ŸÏ£Õj¡ÕjáÀ®ëö%c“Z¦¦Ã‹Œï v]…B’$qø/³TÄ‹W,3†!Äg©$Â{©G$Ð-’ˆ^§ ¼$s!æ ¢†u„iš=5!f Ù­¢ôÖgp]±èBÎ(‘K*þ¯i …ßî9õo6›¨×ë‰ù¦4tP±¦$È —.µñÄß¾î8NØsœ&G¿û»Ïô<óÌ·Ã4-¼úÕß ß÷Cïj”¤v˜âsÄÜ(ºf¾P¼Ê²œøÙÇi!52$8ŸÏcmm ›››X__û2чv:\¾|ÛÛÛX[[ «/ A ~¢stUÜ.Ú|:ëèf\Ù³êûbm6‡÷w¤I»¢ˆ‰¼aZ(ˆWQ„¥µ¨FC Âiªö2Lß÷Q.—û*à’—1>9(•JSØ‘¤î3î‹x£7¼a—/ÿ¾ök?…û±/àõ=ߦUÅTUÕPtÜpCï}ïxûÛ®äSúL×}-Q@'‚ð;ºÿþgãñÇŸ?µï‰û¨Òœ,Ë¡÷œ&ÉÑ^óÓþÌ“(Täû"‡zO‚ç‰cŒSÉý(p¯zÕýÆ+JÜnLÓ„ã80 ¾ïÃ÷ýžˆ– pÇ_‹W½êeø‰ŸP¡ªï†eý×b±xT ¯WPú¾Xԯ׻ÏÓìÖíˆÃ#M¥(Jèa¥®žç…cÂý÷?ŸúÔKƺ¾TU‚+• ªÕ*VVVz 0Ý{ï½ØÝÝÅÚÚÚBV;ó<±ŠO Z J†‹R}ŽaÆ!žoeL³·ÕE¥Òm±áûÃk¹Ü£„®‹cÑÀ HR·Ú)ÃŒMàãPhT4¢Áqœ07tгtœvt<ŠŠÞž×}Î9Žƒ?øƒ¯ÇÞPÅŸÿùß@U_Ótg6¨ªŠr¹ ×uñúןë)N8b!Y†$IáØT,¡ë:TUÅ-·ìãòeÀ×Ïä<™d¨VÔÓIcò¬«ª ˲°ºº ]×{ª@‹JáC›“pø¾x7"ê}tiT’*`ssæ—È,8žç…Åê¨eذú#A æí×¢Ñ0€ï¿ «*d³•J· _±(þ¥Zq¢s%º—ƒ@ÌÙ®\¹’$Á¶…`ÝØÜÊ,‰‘!ÁB‘zéÒ%Üwß}=­kÖ××qéÒ¥°½Í20¨­ Ã,#‹lÛâ®îQÈ•¦õ/8™&zr,*•Á½•¹x3Mh…9Eá®ÑʸŽã V«%†qår9,Z4Šßø¿ÃoüÆßõ¼F =41¦ðÚý¯oÄ?ø]x晟Baû™YA‚FUÕ£|Ùþ},‹òƵ°¿®ë™ëWyZ VgTA¾Ùl¢V«¡R© R© Ñh„áƒÔk¾Ñh hšl ÄÇú¾ÕÕÕ¾jÑU$=©¿4\>~ÊÔgèæšfÿ~ÑñR)9¼˜"ƒ(´rý>Ì ñ}¥R)ñ¾(•J$ ªª†÷ß°ÅÌrY,ÎÇ#JÖTCÏ*íC÷B­&~Òår_¯øŠdpœÿ;ï¼>ªÕGqÓMÏŒõ]¤¬€ÈQ­V«¨V«=Õr¹Ü„ ýbÔóú™AóÏóŽÍ0Y&«ÖèýIžB«ÑñÁ“îárY¼?M0¸ ÃL‚çy¡‡)úš,Ë=¹œA„Uu]OÌo%/Vr^’XÈ¡‰‡çÍæs (rÏDG–Žñ5_ówp]'lR.ߊrù ¾÷{o‡eY¡·lVP¸=O)$¿\“¤ÕUqM¢±®×ë,Vçå¶Q;Šf³þMF!Ë2*•JøwÖ4- #ŽCùs¦iöˆZÛ¶±ºº ß÷Ñh4NÄi½ýTµÞX.wŸ9ŠÒõ* ÝE!šìõz²¨•e2LQB ?ü9¼üåïÇ+_ù¿Ãpû(AÅ3ŽþOtm:)JŸBy£XVrk¾46Z.‹1ý#ù? ËâYupÐÄ«^õ2(Š˜ÝrËßý¤ ®V«ØÝÝ ßÝÝÅÆÆ¶¶¶&ù;Ì$qý+ZƒžÓ³~ˆ3Ì<¡pÞPøEŽQJ½.&±iV÷(ôEQúÃæ¤ð}?¦4i§HY–AØSR×uØ6 ª:,kµïyC•Q%IÂÖÖ/àÛ¿½¶¥‘eaë´£(À 7܃ÿôŸêa @¬’èCU<ðÀŸâÎ;oG­V ?£R‚À=h³DÓ´ÑMÏ`×÷vü1[çЇ‡ò…ÉCÚh4Žm†!ì+žÛ* ~ùaû£r¹Œz]Ø®mÛ'ÚfúlÓ)Æ=R@ï( l¶R?¶-&ûñ…QYÇ,—»‘<®Û­™À0DxÝëÄW}ÕíxàW@–ÿ Þð† =û¤Ñ#Ž#æRô|4Ra‹Ý|Ý(¶IÐuñ¹O>ùr¬­ý€¯ƒçyǾ‡SyXwww±¾¾ÏóàyÖ××±»»Û×î&ë¨jÿ B¹kQ’B2¢†YFŽ[Ám¨Ñ{²ÙìÞ“I!¿IP¡ «Ì¼  ¦„çyxôÑ ”ËÝʸï|çoá¾ûÞzfžõ¬‡ñ}ß÷¾žãQJŠ,kxè¡ïÃïÿþÇ¡i@³@U­ MfÈ“IŸïy.ÚíKxâ‰ÿŠJ¥Ò7á¡PåY V £ªá‚ @±X sâ¨Bôq¡ðÅx^·iš¡ˆ5 #lhÛöÐ÷Ž#&o½5ý9Ð:‰i&GÝÅSCâ—=ªâ¯®‹÷'y§t]¼Ÿ.Ÿ"€†‚¯yÍð¯:ZÐððÃÿ¾g?×uQ­þÄlákš&¼÷Æð¢‘ñ ò¦y¼ÐtÃ77¯Çÿú_ SI3)X)üwcc#|<«Ôêf‘‘e±–ÃÅu]ö®2K7(y{ÆD£Å$¾ÿa?î\‰V³f^D‹ÐD½‰¾ïãÍo>ùƒp]Åb¶m'†ݪ¼€£Š#†±4½,™ñ!±:«|áxîv¼÷#Ðím?L¬]q©i£çp@·j/ &䵚X ¢GuŒ”bB ªÇ¡VÏ9ßϨ 51î=Iª`{»·&P.÷¡ž1þ¯þjwÞùT@¥Ôq£æ£Qè>:®ï¢ÑÞúÖ›Âsž†C$uk4Ou‹+¹n÷%ú‡&’$×u¹à³´A0pA&Ä vüÞqÝÁÊÆÁ¶EÎZ¡ & ƒUU…¢(añ]×{D@tr@!É–e…^×aH’”‰ÔæäñdY#W’îò.*Šö”.•JxéKß…|þ—àºîÈ…"EQp÷ݯÆ;ßÙßÑaÝ (BaÚ‹)²,*ÕOc,µ`ÝLèb\­Vqï½÷ö¼¶³³3Ý«Š" %!H°F“ŒãÏtj7À̇ÅÌ ðÆ7ªøžïé¶¾X]MŠö—6 !`iº{¼®V×»Q –%&š6~žÃ,=ö/qñ¢¸‡G„s}ó7«øþŸñÜ’$¥š”«šö;®‹9]˜¦ybb5-qqýHñà2yÃè>‹&ß?³–ê2ÈU.w[ŇMߊEÏ<ózüá¾Aà…íÆ¨Žµ&#¯)Ë+—Ëa¾ûoÿöö^ŠK—€Z-ÃèæWªÅ½ù'ðýàñi|úÓß7ÕïEÓ¦“â•tÞÓJ©)XÏž=ÛSp‰Èçó£Þš)ÈC]…£³As ×u\õÌTñ”Á‚5P¿Hš¯ÔjÝ’ûàpáAÛM"fYkßú­»PÕoÐ]ežVhTš¦…ý[³$<˜lCdY³jÿB§å8éÃ)Ÿróhñõ8’Ä‚u Ðñ$ÁJm Çëʰí®S˶ŸüÉ6r9ͦx1Äâ ‰± à8ŠG…p¨Ê6UE§È—RIê 3WUaï£dÆ?þÇWpéÒÓh6Ÿƒ{î¹aªß åƒOŠN=k.—[¸~«IP¯ºè€EኪÚÍÏ£>pÝ}¸÷êÌ8~63%._þ~Aïh5n|±Ø›ÓÊy:Ìi¶m¼ÿýëxéKoƒ,ÇC¶”p²2+a@¹ª²,s “Û¶§îOJ­ò¼ntMÈëDDZ¬áùuž'&ÿT…7ºo£qü^§¼ØºøØ¶°ƒAžÄù‘/"ŸÿÏ(•îÓOþžóœ2<¯€ù‘/â/þâùxÑ‹~Æ»ÂýËe!hi¸•$ihÁ2ámÿ§÷HMS^`mí\7ÀÙ³?Œ×¿~qœhT$𸤮Lìïï£Z­†?Ôö&ëP•àø E¿G×è÷ÊýWgHŠ2ôÌì à‹_üÆcç@4é]†Y4|ß[Ì â?þÇwâþà× iÞÀþ³îuL^V^deÒ@-e¦i/¾ßŸz庢¯euë Â4… ˆ¦caØZU &çCt®—Ôª†9}Poö¤œçrÙÅåË—ñ ßð šÍ&þìÏ>€3g>†7¿ù¯ñùÏ?üû=é•J7:Ó¶Å¢½iŠÿ*Š+I“/~¼öµÿ€?ù“áû~æ""†Ñ˜R%§Ô9¬®ëââÅ‹h·Û=¯ïîî"—ËÁ²¬L‡ SkÔP¢%IÂðâ†ÄáÀ3D‡gß÷ðÆ7ZêÇ>Ã,#Žã„¹HŠ¢†ý ÏóðÊW¾†ñ-(•JзÊ.ËòÌ{Sá.®Ä¤Áqœ©÷Ü¥º€®²,g4¼wõ‡‘€M3É?8è†X2L¥? à¿ÿ÷üÉŸ¼ /ù?=ÚW„ñ³»¨¦PÕn¨ë ñZ¯÷‡°gmH–eÜpC·Ýöúy•s!•‡u†aàìÙ³°,+,‘ïy,ËÂÙ³gqþüùL{[5­Û¯‹ˆæC$ ¤A,ÜJÆB¡à¯vnP’?Û8à ÇqT*X–… “êKìyÀßøåaØS½^[DIzmÚh卉L*<Ï›I™®w=£T¼¦Ñs-ª%BõCâ˜æà<¾´}ªKÂ0„m‹âD¥Òk#Aà™grxùËŸ=ñgŸ:,P_²sÓì<UUñ©O}-¾ù›WæømÎT‚u{{ªªbgg§op‹¾¾½½=ïë …´âBqíÑ^^Óªl•ÊÌcezØG?NÊýKó>áåÁ÷Å`ªiöÎ0à†çä1õ}ͦ˜Y–˜„ÜrËvOq‰¤¨¤ê½³€ïefÔZ£žÐË%ºaµãâû½ïÕ41q;üéù­ä›o¥’¾7d´. þ?܆ÇÁÞðSýLj• ŸFô ö9§‘‚uív÷ÜsÏÐý666Ðn·Ñjµæ}M‰ÐÊ%潫®‹Ÿèºt—JÔãêy~è‡þñÔÏz:‡I ¾¯§‘‘‚µÕj!—Ë!—Ë ÝòW;μ¯)2\M뮾E›íR¨Jt°£‰ÊR‘•4E €@=úw$>Ó n,X‡à8”o±d‹2 3eè9`ÛÀö¶År]’$VÐo¿ý×yÁ‡Y<σišŪãtCv1(œ×÷û{§ƒB(IO#’ÚØ0§Ç‚4©À—mÛ0M³ÇÎ÷ön[˜1|н{Ytiee×®]›÷y›h®*­êŶhÓÞ¥Ëëó­|QyÌ󡤃Ñ7@¶¼ÉƒiYLÓY˜šaN߾雾ˆë®[‡,Ëá³A’ ˜¦‰F£I’àyÞò-l2KI¹\F¥R8ᵬÑÅ‘l[ˆÌxˆe´gå8¨*‡ð2Ç'„@%‘ÚhAà!ºEèLӄ뺂†I’áû>žzê‡æ}úL FzXóù<:N_uà8Tpie%ûÉÀ´:_%tœn¸05Ó^|Xœ÷‰ýîÓäÕº*…šÒ[W N X,âÞ{Ÿ‹b,]ž6ÃL€mÛpbËñå2ðš×¼oy˳z^WUº®‡Õ€9¤žYLÓ„ªªm•¢ÐF­Ó†µ€ –%¼Y¾ÏÞMf>PQ%ß§‚G,« Ó4Q(P*•P*•šÍ& ÀeY€ÿú_߃öÏþbÞ—À¤ •`Íçó0 c`¸o§ÓAµZÅÚÚZ&[ÛÄs*¨°’JµZwuÆ÷ýår»{H'öNŠhµ´Q¡»ž¤@„§AC¶<ÊÀqxž‡ûïÿ Tµ[L†[`0§×uÞ”€˜„Ë2ðè£?Ø^×uȲŒb±Èb•É<¾ïlaõ‹ÃsQé‘A‹þõzrq%†9 TUô€Q0ŠÅ"dYF£ÑÀÁÁ À¢(au]×ñ¡}o}ëð—ù'xÛÛn›÷%0)HU%øÂ… h·Û8þ<öööBok»ÝÆÞÞΟ?v»­­­y_O"Ñö5ô;œkAŸü—€ñ\€“ ©%X]t…jš¼TõèÇ:¡ó_¨§ðãÿ€Ù´4`˜E„Z|âùà8À¿úWÚÓ´V«êÌâ@¡ÀÃ'©®ÐíS™´E¡B(Æh¯,ÃÌʇ€R©„J¥Ò³8£(JßbÍùó_{ï}>øRÄT‚5ŸÏcgggÏžÅöö6îºë.(Š‚»îº ÛÛÛ8{ö,vvv2é]„!GT2ìx5¹h#a?M©»E¢á¡” à(ôzAG1N£x>í¨±BC7¼7Í9ѹØ)ö=E¸® EÑðÿß¿ærp80sÚ ‚²,CUUÜw_ Å¢x6üÖo½sä‚N­VKôÀ2L‚ ô6 ë©XÍd9¹R°ë¦/ªÄ0'A´2µmÛCÃޣ躎gžiáóŸ¿gä¾L6Yt‰ ÑJ­kZ­V.<ª‚ð ¨Î ¡Ûjµ°²²2ññ‰h5``pØ ýÒ\„PÒ{Yǹ|BH6ÇØ?~ü rŽÃÎgTk³ZGºêÃ3ä¤ì; ”“mš€a|#dù7,Ï>˜ÉÉ’O‚ï÷Üó4þöo Šò<òȽh6+$ \vÐl¦Ô˜ecÑm›Äª®ë#Uâ½KE»³þý\w:Õ|™l°ø6.l—œO–e%öÀNB’$|Í×ÜŒ••§æ}LJR V‚ZÜÄW0:Z­ÖÖÖF£ÕjÁ0Œ0´8—ËÁ²¬ð¦i·Û0 #¼™TU ¤§…e czYé’–2ÕÅxÎBú`éÅ*{¿‡Áù©&DÁ%Úw„G˜[Ñ¥,Øw ÿ¥òJ¥ÒWd†aÒ’EŸIþÑ?ú=Üu×?@Ó4‹ET*ÝvOœß}úXÛ. 0 #U@<ÚLQú‹Rº.Wó]–ÅÆ©µÐõ®ŽãlÚÞþºy_3©B‚ÓÐjµ°¹¹™jßÍÍM¬­­Áó<<øàƒX[[ë‰/ßÞÞF.— ··Z-T«Õ‰ÏÍ4{W iõ0>¡òêžç-×DÅG×3™6$ØÁìŠÅ=¬Ã¾êø¹Ž:]Áêc.­mNÚ¾Ó@á¿´:.I‡22“EÓÏ€g?ûO{rU©@ çwŸN–öMhš–jŒ”ý$˽ýXY°.Ë`ãù–,ËZT, n©´XLM°¦eN',д²²‚0Ô¸ÝncáöõõõÔnþ4P/ÖANÔ NÞÕÓFA×Ûh ë­DÙ?Mnj šFBTF¿^ƒóX£çNïFôœxkì;Žã8Èç·›½3̸dÑÆÇ%Z8Æó¼PœŠ(Žãp~÷)dlÛ÷}¸®›8y÷} TêzO]·¿[/¼f“e°q@Ø0Ùî$ÞUfñ8qÁJ¹°Ñ~­W®\ n ú4¦>ŸÏì;ŒJ¥w Õõáå×)ìD)c¼ÂEã cuZà[bñY·maÚ&æë–e¡Rå‰a¬+++=y®ívÕjëëëÈårCoŠA}`'ÁuÅÀíyÌØ£ïû'?Yñ1;ÁM=ˆ‹º$Èû0LHDîiq‹£kŒïç#Ù;ë¡ßC:Ìσ•00$øÙúl¼äá—ŒýUŽ"+öåƒ|66næÖÌTÈ¢ý¢joá1ú?‡ŸNݶÇA£G´¡i6…5Œt^SYîæ 2‹Í¢Û¸e ÁÚl² T*¡^¯óã)àÄ+ÑétP­Vq×]wamm .\_ĵk×n{ì±ÇðÀ$æÑÆC^heQ–…8µ#ñaÔâàÄ 0»àhm4žP¸ÃÄ!A"qˆ8ì¡„~ì#9t×NØWÆàâLñ¼Û"zoo¿ó;¿ƒýÏìOôu¦aÚö ?þ8677±··7Ö¹|â_^|<]lnnâÑGégœä~\Μ9¶*sœ®WÉó¼¾ñ¾R©p8pÆ©V«¸xñ">úÑÎäøY¿ÇÁ¶íž&ÛâT’º?£8s¦7|ž9hü¦¢G³`‘ÆoÂ÷…=6Â~‹Å" ÃX®"©§€½½½‰Æï‘U‚Ó&[³òÒjµ°½½••Üwß}}¡ƒVZû…/|!^÷º×…qùQâ¹{Ñ&ÃT`‰ª¨&M`N„:Ò瀎ƒ‹þÚʈýÇq.D (ܺ‘¢¨H».DÿØ8ñ*ÃtIû'ˆóõõuìçöqùòå1.4=³°o¸ãŽ;°³³3Ö¹A€n¸a&×Éd—™NNz ?4ÆÛ¶ ]¯ô´æð}¿Oœ²XÍ>[[[8wîÜLÆð,ßãàûþÐú¾/<ªã@žXæd¡ñ{˜½‡E¿£¸.ðÒ—~åòÏÂó¼Ô…ŘlAÞüqÇï©yXãaÃ0 #Œ£ßgÏž€ž°„v»}¬>PI‚õêUñÿ ÂBÀœÂ!ÚfáK Ã@”“ XãØ£<¬ÒkŒ96’Å- ò}‘°ÒÃ8éZà†öìDÜIÛ÷0~ù—?†Ûn›³\dÉÆGáyt]‡ã8¸tééžœ=ß÷9ü—éa‘l;ŠXêõ: …*•Ê|&0eq–öcÉ›fÂ{ÅÆà¥ºÂoT«þ~ªÃHòŒÚ6¬olüsˆpà¤^°Z¾&ðÌo>Ì †À<ì{‡xþóÿv&ÇfN'Y³ñQDÇôO~ò§z&9¾?‡¾WLfY4ÛŽâ8šÍä†è¾?¾Xe–“E¶ñ\î>”J·rð)e¤`ÝßßÇææ&¼ˆ›²Óé Õj¥ö¨F¡˜ü¤p5úŒ­­-lnn†å·WVVp÷ÝwÏä ª,Ëð}?\?Q|ñY¶rxšÐ] Ñ?¨!—$FãÂqX¡…ø×4ìÜ=ˆÜØAÇ3#ï§TƒÆ¤¤cÔ\>`ß‘5û–$?üÃ<¸3Ó#k6> Ïó`VW Ð4µçužø0QͶ JiJŠ  XæzŠ‹‰ï§Oø=.A ìÑî<ªÖ;mÕÆaçµÚ 'pL¦)X“¸÷Þ{±»»Û#bÓ²µµ52Æ=ŸÏãÝï~wxcM"Œ‰`D^%嫪ª ÇqàûþÉç°ŽÛÞ*m ªàÛwÑ诨;lÿ$Œ® <î±é˜.{€}ƒŽM9¶ïŒê-´}"®žÇL•¬Ùø(hQ²T^ô¢u˜¦‰ xòÃô±h¶M8Ž302ÌóÀE÷&ÁqDÅÎJEôO9.ž'’0ƒ@üÄÇßï ä Àm3ªÈ»¸64›oæ^«ÓĶ…7Óÿ/•Ä¿ŠÒmêLoƒÏ/—Ûoë Ö“`œœØaxÞàÅ1×uCã×4 åry¾“û4s'½U‡AÕ{ã ºDýÞX«$¼® WX:¢3Ilº.ÌtEô(±jí§F¾ zžx^ïçøG;wG¿æÿÏÞßÇ9ržu¾ðÏŽó1˜X¡œ×¥íªb{p5#^’@)YÈÚ˜%xðd=ûGZ7´7 U‡9Ëš£bÙ ;Tl6mÖ¼¤‹4Ø,! ÚÌɲÖÐÅÉ.‰5ÒE’Ä]Œzþ¸ûª7•Þ¥VIº¾Ÿ>3­*IwI×}×uÝ×›xî_Ý=ä8;¦%߃xì±P­þ£y_.³‚œ•Œ÷#êE•e@׿†a@UU6V™±É‚lÑÍÇúNôÈ)Ú#âûB‰÷D½O|æ!bQ#o˜#÷é÷~þçû=žKÊv/ϯa³î÷ „à_~!ðÄöoÿé-À¹-Àú«0Q§±Œe_õ…À«Uà—¿ øÁÇÓÄ×|à8yÍk†Ðâ@ÑDÑJíösØÃºÈø¾y ¡ÝùdSDÓ ªaï &ÖgUا2¸¡$³D˜¦T¶–$¡{½‡–^†À÷ÃuTÓâzDꓨ뽿XÛY?@¿s™¡¸ûî}Ná˜&Š"6dño±(Hßÿ&å_×Ń ÝJEÌU õ”h(¯SüÍ–Þ`z¯É¦©·3 \Ñ~D 4ê#Úï÷¥÷sÐ?,×…0jÓ>ŸB‚£ÇȨLûì4ã8MçëõzÃ…=»§ã8àîÚ$·‡5 øË?þø@{.ð/n¾ö7{¿§@ùRÀúRà^?ö½¾r<5FH{–qݰ1<ñþѯø¦yË ›'FçAšË„vGm[ìhzîÿèGç}sÇó<|û·a–ÏóbÅ–%t˜T«Â¶âÜÕ¸®ø¢HßäUUU±ÆV*áÆ¡ãÄ×eò@1cáûÝ?mÛÐGíË´êP˜¹ãˆ`/ª$ •tâ~F¦¢ˆÝß9D(-½ÁjŸ†Œ¦­ÉŠÀSŸ %Çèïétz*=tWÞMB“8B›zá}>WF·j!ݽãýóg„€ªjo/¨±Ò¸^(ðÖéÓÆâyaØL½üyøâ ôN1hw2öp$¾WõåÃÿ‰ëû»ÛoÇgo»mø×/žw¼¹®‹\îOç=,fT\Whœ¤}kiIRì†ô[[[X®‚Áœv' – Qg¡†O}ª´Ã,"•JÕj’$Á¶Åí·\ËF±¸¢¹«”'JÊ7…åÒ:Z«Å×TEéíMMƒ6+aè–Ëa {Q'‚Ò†IÕÜØ²ü‡ø üýßÿ4G‹E!¹6Ít=Á÷…1lN€$e>`hƒ5ZQ¬}š žVel–±Ç!™Ò…z°S ÔzÆCƒ5z\‰¼.õb Ñ*„‘Ûo¸ýŽËè6t«‰q:Ž˜ŸûàŸüK`§(苇@û!àw¾(>A(œæmß|Ã[Åßšœ{- }Mk÷Â赯þ4wj¼¿±ÿwK•“9´½X‡ð&/¹‡Åóâ²þ裷a}}þ¥è3 …w‘±7É@.î´ÌbQÜ ¢ŸA¹RŠ?_QØ-2&–0xöÙ<\w.›Á 3SìÓÝxŠ ‹F×в´ú=åÅ%Ãû-+¨² E_À¬Âqk5aQÙ¶øÿÒ~ÙgGÒÁ­ë8-–÷^ñŠçÎ{xó"èÿQ%¼i›ÚÅ¢Ð+–h§v ÁzçwâÂ… ]Ïg)»ýtÐqªaXõó‚ú͈¢üU ñ:yÀ±äñä]÷4AZžb9'v_ÿàeŸîþR)†c£Ô¿#,4ð ?Þ@’=â’¡Ž¯á»Ð1|%aƒC³—ˆbQüTÿù?!xàÖyçl‰V¢K*.i í$Ž#”U‹?m¦Ôjݯ¡‚®¿QÐg¤…sÔjB™ÚØóJÓâF,3¨¦ƒçÿã¼íöáÒ¥yŠa¦a¨G<ƒ‘z&Ì~"…<êö¼p­õ}±Ö*JXI3mÝ<ëµ´^…™‰Iʬ¦‰Ÿû­oý*¼ýí·Ï{xÓÁ0 ^”èÞ`5 Rÿ.¡n1Ð`][[X;ËôЦó}¶ ÜäÑëg'Ckˆ|Ï~çÓzî¡wûÝ­^\Wx@©lzûËÜŸû<à£o®½.þiáÑ>€ÿõBà]/Lÿ¢u]ŒÉ È€£‹0ÜYÜ8G uZ®9Ûû2#×éyÀg>“Ã?þÇ_5ïa¥’Pl¨Šª†9ÔR —ÁªªBY¢ªwšÖÛ-GULFY?dY¼¿®‹V¥"ÞÉn(ó&ÈD°€w½ëKqÇú$³4ƒÅ²,hškñ±põhhS›*è&ã—I©§ £0ßr9; é½ gfdèÝ­zxÑ‹ÖpþüâÚ!1H(ãúŠ$…›3“ì6ѦÎæ,}«e¥¯'3oOÆh¿p÷t’Q7r¾ŒÓ–.‘@º« ¯"aÛÂX­VCýÎÏ;ïþÃ?¾þåý?—6wL @¶£Tf¦Éy½.Ö¼›oþØòæ|¤Åú§U•åô\è^ S,c H’æV°`Ù¡Ô5MKÝåËŒüÁ—Ì{X 3'Œ>}Ó›îE©tS ÛR*eVì¸TÈ8¥*¤’$&k¯<ýä¦àäØ1ãC-™¢TN½…ò' PlÉó„Áy–¤ð¦ÕMH BHÓž´K¼²ô«i¦ÿ~ž7L_˜1q!Œ»Aݪ=^›†ÐX¤|Ñ>P>_ÄÿàÃÝžÛ4%üu·Špß €_âšüÓ1 q®‹¸ÁÌ̺ÿÓ‚ïº@.÷§P”ï™÷ÐÆƒ<)/Td@ÓÐ¥ÀÌ’¨Ýu]|þó?7¼a˜‹a4âá+¿²„—¾ô(È0pœ99T¨#zˆ(#¹HÞH6:™'¾[*• Ë2ªY÷FÛ!Q8/é.½R„è3C¬Ífûûû¸páòù|OïäLóBÇ —ƒ)Y!xªxËuˆŠ¶ÃÒ+TÝ‚0p]W„ª*ð‘é¾@O._³À€8wî4½d¨sUœz~™³$ÙýIJ€vûÍ{X“A¹§@/r¦] Ì< hA@ä÷e^ÑÉ:¾/”°qvì‹E1wÇíyë8ÂmHùÞL°¶†…r¹ÔS1Œ9é¾–naÔ‹¢¤§±‚Îô!ZÈÙó6Bë°F Û+5ºS©B<|?ì—tp ÂoéØ ¨ 1ÝÓm ß¾e”h9öš.¾ïŸ}Á%òÊ$]êé ˆII ,ç¡2S¢_õÔ¥$?×IÕ³“ž°^•()ô¾^O?N°e…ÅC€t7/•‚iïib<ª:Ø*•‚°Ó?;wŸjµæóýŸ1äp6 k¶¡î¶-~*T—ì•Ã03€º(b ×G‰ÊŠà„’ôäˆq„Næh°6›M¬­­u¡…B!Lǯ]»–)ƒ5MæÇ-\Ì„³fÝvç¹§XaxT¹z++F¼¹orX„Ç6í%# „'Ô?›ï–ÉÑê‘ÄÌ«`”çF}ÄÒ¼2d ö y`˜  ND¦9ÇùY@U£óÆóÄ&'.ä‹BfÚ¢*ñÔóÏu…b}_Šôéí@Š^¯<Å$䉣M+je,â9ÿl£¬ˆÁ*~bQt&ëztS‘k0g -9¾ïömRø¥Ñj”¾/6¼(·¾×ÇÕ¨3Ë@ƒ5—ËáÆ]ÏïííÍ{ìIF(ŽãŒ¶è“'s˜—XÀ=ø7ïþËÀf"ßÀ0B“ÊUržô«¸íÛêcø°]f©Hs–xžmÖy–V{êÕš€µo)ÃŒ€héáÃ÷ýlyWÓ Îa Vd@wH-m Qú4#0Jµ ¬¯‡mzUj%ƒ²ßXÇñîE«Á1#ã8@>ß‚eY³ u×uþ˜¹@K¶išÓÕ[\Wè(QY­r¾é3Ð`Íçóh·Ûh4( =Ïk4óºÎ›^k¯ã8Ã/ü6â^Ì~X§Œ0<²\>ù PÛŒ ½=¡þéç‘ÎÐÏY žë(BÊl°®ÉšE¾ïÏÞÃJíe¸èsÆxžpÖëaáFEl{ÄÈ™YC:€ÐÈ’y€i—“z÷õ:Ôj;t¿ëõ~ž—ÞŠ„™;¿ñ‰FcïyOmviYš7ÌJ±±¦¬G ŠM.ˆ´TÜ<è„|>|>ÝÝ]´ÛíÔsÚí6vwwQ(Ïçç}Miv(wuè…Ÿ<«Ãtë1ü‹ë"ìÀqÄ,Ôuà<·û={é-„÷Õ‡0lAÞ_*ŒÄ¬ÉjضmÏÞ»Z©œUæ qÝ0壼D”ë…Á¶…G“ÂnŽ„ÒT*…Ò.Ì0ÄÅ)ÊôÃ25¨?“r³Ì8üò/ÿ5~é—^3ºlŸ§  Í ìÊ0sĶÃÍvª?0¼nž­šÌìªJðÅ‹±µµ…óçÏcss3f”^¹r‡‡‡ÁyYÂ0ÒZ&9£)óe 8¼«_û|±[MäNÞgú…ù WxI9}ðéÊBõJjC5Sj5ÎsbΜR)žz©(¢H-llœÜƒ¢tGhš˜¤N]H ÇíUìˆYI<ÏË_üë¸ÿþë}’eÅ5 ü;:l;,¦dâø¼[‡0+MÔÚ·º»ë†›,¾/úFë%U*gêÒ2”ÁšÏçñè£b»»»]Ç777qáÂ…Ì[z§ ­Ìÿƒ¿Þô3À‡ÿ =×#j4&oâ9°ƒòa©-Î0÷’ dƒ¨ÃæLÂ6V™3Dzz–¦¾Úg^èN"ï57$©·2ÅÞM&Û¶ñÐCÏé}‚i†}p0Ü&-×X×Åù²,6N–©8³pD—MÛ횺êPŠ ä–”4œŠËišoEáÍ¿%c(ƒU€/^¼ˆíím4›Íàù|>Ÿ©¼U‚R†’xž7¼2ïáÔ³jÍ;€ïxððÄ6?íäÐÍ¢ß[ÊsV} n£a8c•a_·gLÅ]²à½bV–d%ì(#Ò›TYUQÆ+FÄ0¨=uÔöü…_ø<ÞûÞïíý"ŠæF1WU!¯¥¯åÌ\0 ñ¯,‡ë¹,‹pàÔvMŢ㇑W’o× ‹°ÁºT m°¹\®oñ¥¬­fMض=šRCÑ[å2PX™GGbæ™&ð}?<êE¡‘ZÁà^©¬û0#lgSžV˜õ¦~| 3'(]©—óÒuÝÙæmû~<'*îreUfJP{2Úü™ŸùKüÝßÝÞ¿êõ¨›4º.”yfÎZB©»uñpO×ÔTÝ|ÔLå²oEá(°%ddƒuQ nQ]ÂuÝѪHz8­ú{*øÑP^Ú 2084:ojàâHÌÌ;˜z9RØ )ç\žÉT‡¨®ëήÿªi†ñÈT¼HôÑ™÷×Â,¶-d¼TòþùÏÿ~äG¦½¦ªaÒ7Ü!¶êäQ5ܲ¬ém²kšp&ñ&âR²´«ëvo"Ú¶=\þ*õ IGªHnø ë -Ufê‹aXMjA´Måtx^wl%=ïºá*®2ÌÇ™^ïÕd(@ÜH8€™1´Ž×j´dÿ~ægNr¿z^ÿ†É(Tï+Šïûpgº›¼!³´,­Áêûñ pÇ=úbE9|?,Z (ÔÌ/lžî!n`&õw*Ž4 l¨23ÀóB9·m;¾Séyb»¾\2M^Ô4i¯¤o†™3Ô÷=MDn* ŒXžGÌ1®|; ´ÇoÁ ×[˜aæÄ$2žÜ‡1M³·¼'K­2Ì0¶Ž‚ÓbKß÷Wøì£â‡>ùÉÞ‘ 3'&‘o×Ü÷?‹×ê |ªÌ0c’ÙàV«]×Ñl6ªªÂ4ÍáßÀ²ðΗ¿úùóâo"w5êK^ÖÓ挘T¾½«ŸÃëÿâÝxèöÏàˆoL™TÆ]7^ŒÆ4MH’”MàûÂÂfz†éäòí8§ù«¿ûÃø¹\?séÒ¼/‰a&ÖÁ?ü·ø©Ï¼ßûíxì?ý§y_³àdÖú³³ƒµµ5¸®‹Ë—/£Ùlbwwwè×ÿý;߉ÿxã†ð®ZÆjš.£C„óÆ&s†L*ßîûŸÅ³wüYðÕ_ÍiL¦˜T¾ÀG?Ч_ú!¼îÇ|Þ—Ã,™4X[­.\¸ÈårØÜÜìßs/ÊåâC_x/þÝKÞ! U ¢ MZj“Ôãy†™Ë7€¼ìÕø‰¿ù6X™L2©Œ»n˜Âçº. Ã@½^ï] ›Ã͘3dRù¶,@]kâKm%IâP`&SLCG©ü³¿ÆþÝ÷ã¿~Ã7°žÂL…L¬×®]äóùà¹|>?t½ÿ5_Šÿö‰¯Â+¾þ¢ ð‚öL=<<œ÷¦ -‚«Î¤ò _ûZ¼ö{¿wÞ—2Ë(Ë6gÇeR/—Mõñ+÷ß#UÅ~ó7C*•€b1|$ûÝde“‡eœ³ã0©|{‡Ä¿øØCøº¿üK<ôc?¶h–Q–m¾ŽË¤òíüÖ_᾿ü%<úoî³,›<,úœÍ¤ÁÚoR´Ûí¯÷}iü%¾ð-_ØÝ¶f Š˜Ëµk×påÊ•ycîL*ßÄ¢ïZ.£<,Ûœ—Ie¼õøãxê%/Áñ8ÿÖ·â¥çϋ߃ ^¨¼lò°Œsv&•ïôoÁ¿à:~ã¿üh Z xeaÙæë¸L*ßkõ[ïøüûßþíy_ÊD,›<,úœÍdÑ¥~âÆÈår]ÏÿÉŸü ÞùÎwâ7ó7q÷Ýw¶¶¶æ})ñÔSO-ü5Dyæ™gðÌ3ÏIüóâSŸú>ýéOãùÏ>¶··ÏüóÇ‘o@ô-ûÖoýVÜqÇxñ‹_Œ‹/žùاIVäašdeÎ^½zó73·ÏŸt ÿ²/û2|¶TB.—Ã;¯\è&»`;ÞY‘‡i‘•9ûÔSO¡Ýnãå/ù\>ùŽ®ß_ð¼çáîoøüìÏþì\Æ? ²" Ó$+ó•Öïw¿ûÝøŽïøŽ3ÿüièàŸ}íkÑ\p%+ò0-²2gIuýΤÁ CH²¶¶–úüßøF¼ñoœ÷Ðf ãÈ7¼ç=ï™÷Ðf(x g–™qä›×ofQàõ›É"™ ¾óÎ;ÄÃZ­V_eža–ofÙag––of™aùf²H& Öµµ5 …X³ã8(‹óÃL Ë7³ì°Œ3Ë Ë7³Ì°|3Yä¦N§Ó™÷ Òh6›ØÚÚÂÚÚÚí6r¹ööözæ÷1Ì"ÁòÍ,;,ãÌ2ÃòÍ,3,ßLÖȬÁ ˆÄoJ. óÃL–ofÙag––of™aùf²D¦ V†a†a†afuÉd+Ã0 Ã0 Ã0 Ã<ç-oyË[æ=ˆe§Ñhছnêûßl6ñ¹Ï}.õx¿cßívžçáE/zÑXcÊâ51ã3‰|Oãø,è'ã‹x=Ìdô“ñE”^ÃbÕÖïAcÊâõ0“Áëwö¯i(:ÌÌx×»ÞÕyÕ«^Õ¹çž{:÷ÜsO硇ê\¿~=8~íÚµÎ<ó›ß<Ô±aŽÏ‚ëׯwÞüæ7ŸùÀtž|òÉ©y×ÄŒÏ$ò=ã³ ŸŒ/âõ0“ÑOÆQx gˆU[¿)‹×ÃL¯ßÙ¿¦QààÑn·±»»‹íím¸®‹Ë—/Ï;;;X[[ Ž7›Íàx¿cß»»»hµZ¸|ù2\×ÅÚÚö÷÷‡S¯‰Iå{ÇgA?_ÄëaÆgŒ/¢<ðΫ¹~S¯‡^¿ãšFbÞó²òÄOtî¹çžØs—.]ê<ôÐCNGìdÜsÏ=±Ý‘w¼ã{ï½·ï±A¯ׯ_ïúÌk×®u.]º4Ô˜²xMÌøL"ßÓ8> úÉø"^3ýd|å×p†Xµõ{И²x=Ìdðúýk•[æm0/+…B®ëÆž»vín¿ýöàÿÏçƒãù|­V«ï±A¯TÚ<ŸÏ£Ùl¢Ýn#ŸÏc{{{¨1eñš˜ñ™D¾§q|ô“ñF£±p×ÃLF?_6ùžÆ˜YƇU[¿)‹×ÃL¯ßÙ¿¦Qaƒõ ØÚÚ „áÑG€¾Bp||ÜóX»ÝîûZjðŸï*¡½»»ìÌL:æy\3>“È÷4ŽÏ‚~2þÊW¾rᮇ™Œ~2¾lòÍkøj±jë÷ ùÎâõ0“Áëwö¯iTnêt:ybYÙÙÙÁáá! …®]»KDLúÖÖÖÖÖ‚¤æ½½=är¹¾Ç½vVÐgÒ.Õ(×3ãL¶˜D¾§q|ô“ñE¼f2úÉø"Ê¯á ±jë÷ 1eñz˜Éàõ;û×4 l°Î˜hÉô´<‰v»ì%÷;6ÌñY0防xMÌøL"ßÓ8> f9'Y¾~2¾ˆòÀk8C¬Úú=èx¯‡™ ^¿³MÃÂ+Ã0 Ã0 Ã0 “I8‡•a†a†a†É$l°2 Ã0 Ã0 Ã0™„ V†a†a†a&“°ÁÊ0 Ã0 Ã0 Ãd6X†a†a†a˜LÂ+Ã0 Ã0 Ã0 “IØ`e†a†a†a2 ¬ Ã0 Ã0 Ã0L&aƒ•a†a†a†É$l°2 Ã0 Ã0 Ã0™„ V†a†a†a&“°ÁÊ0 Ã0 Ã0 Ãd6X†a†a†a˜LÂ+Ã0 Ã0 Ã0 “IØ`e†a†a†a2 ¬ ÃÌ Çqàûþ¼‡Á0gÎ4eß÷}8Ž3ïKb†a˜™À+Ã0s£X,ÂuÝyƒaΜiʾëº(‹ó¾$f…±, žçÍ{ s&°¼Ÿ=l°2 Ã0 Ã0ccÛ6+ðÌÊÀò~öÜ2ï0‚ènM¹\†,Ë]ÇdY†ªªpår€+£P0MÓ (Jð:Ã0 ë:,Ë‚ïûPš¦ï'It]û|†–~ò ¦iÂ÷ýT›–Œ³|3ó`–²_­Vaš&nºé¦Øë|߇iš]¯a˜II“gzζm€ªª1ùT%xŽ×kf‘WÞE dW’$”ËeH’€e{\ØÃš*• LÓxž‡`‚†³m¥R)˜$¦i¢T* ÊÆÆ,Ë Þ—Žû¾ß÷Q*•bah¦iÂ0Œ±Ïg˜aè'ßtœrùzÉ$0¹Œ³|3gͬe¿R©À¶m|⟞÷}Åb1P„fZ ’ç(Qù¤sx½f‰qåýCúÖ×׃ÍFÛ¶c),ÛcÒaæÊÑÑQ@çøø8xNUÕŽ®ëããã®c²,wTU ŽÇjµZG’¤àoZ­ü­(J§\.ëºÞQUuìófýä»Ó2Gÿït„ “ŒM[ÆY¾™³ä,dŸ^_¯×;:''']rÍ0Ó`<«ªÚ©×ëÁ±¤|Ós¼^3‹À$òNë1qrrèít.ËöèpHðœ±mªªÆÂÄêõ:±Ë’<¦i\×…ã8e9V’þv]7ØY¾V’¤ $¡£žÏ0ýè'ß„ªªÁÿ£çÍBÆY¾™³â,d?úz ,äT­Vç}ùÌ’1Œ<'IÊ'Àë5³L"ïôšJ¥MÓ ªj×kY¶G‡ Ö Ó¯åçyAžR”´Ã,",ã̪2®ìS®`¥RÁñññ¼/ƒafåeõz=Hãó}årµZmÞC[hØ`ÍIÃÔqœ ñº—Ѫ( $IŠíÚ$wß& ô’o*Ö –qfÑ9kÙ¯Õjð}¶mE@fZŒ+Ï ³ˆŒ+錄×j5Ôjµ í˜¢(Yš³×¯_‡$IxÛÛÞ6ï¡ ¯ßÙ'KóõêÕ«øùŸÿyäóùye(xýÎ>Yš³ã¬ßKc°¾ìe/Ãk^ólooÏ{(Sckk {{{óÆÔh4¸råJ&~#Ë"q×]w±Y𳋶†óú}²4_·¶¶ÆXxý^²4gÇY¿—Æ`]F–i¢@¡P@¡P˜÷0˜Œ°Œò°ls–™Œe“‡eœ³Ìx,£,,Û|e&cÙäaÑç,ç°2 Ã0 Ã0 Ã0™„ V†a†a†a&“°ÁÊ0 Ã0 Ã0 Ãd6X†a†a†a˜LÂ+Ã0 Ã0 Ã0 “IØ`e†a†a†a2 ¬ Ã0 Ã0 Ã0L&aƒ•a†a†a†É$l°2 Ã0 Ã0 Ã0™„ V†a†a†a&“°ÁÊ0 Ã0 Ã0 Ãd6X†a†a†a˜LÂ+Ã0 Ã0 Ã0 “IØ`e†a†a†a2 ¬ Ã0 Ã0 Ã0L&aƒ•a†a†a†É$l°2 Ã0 Ã0 Ã0™„ Vf&8P*@±FxÌuG{ð}À¶Ï‹+Óü>¶ T*Àwˆ1]½úEóþІa†a†À-É'vwwÑh4Ðl6qᬭ­¡Õja{{{Þce2ˆm‹U$aD’©ëâyÏ‹š¶-Õjøœï‹s%|Î0Äóª*þu]À²Äy’Èrøt>hšxÏcqqŽªµš8§ÑxW®ÌûÛc†a†a¦1ƒuww‡‡‡ØÜÜD»Ýär9ìîî¢ÙlboooÞãe2‚eå²0m[ü«(âßZMüKÈrüo]¯'|_xJéÜjUüKFfôù~èº0N%Iüíyq#•a†a†a˜Å"0X[­ö÷÷±··‡B¡€f³ PUkkk8þ<šÍ&òùü¼ÇÌŒ‰ï‹É –JEäÍ$Ï' ŒÔ¨W4j˜öB’Ä{ÂkZ*‰¿Ëeaüš¦02ÉXIÞUbÔ×3 Ã0 Ã0 “-‚Ök×® …B×Iù|ù|>ðºK£ÑH}¾Ùl¢Õjõ|Ý ãÌxllã0Šeëëq'=Oa¼µš0(MS¼‡ëNÏkiÀÁ0Vap.ŠG´—|,ãÌâÃòÍ,3,ß̲Ã:8³LÖ\.h·ÛÁÿ‰v»x\‡¥Õjakk n¤ÂN«Õ‚®ë1ï­©˜3èøªày¡)I˜KzE‹EáÙ¤0ÙR)îõ$\¨×ÅÿãÏW*âÜz=4dËeVkYÝÞJU¯Æ‹:,4¶E#M¾éy–qfÑaùf––ofÙaœY6+yQ·¶¶bÆi«ÕÂÎÎÖÖÖR½¯IZ­ÇN1Ÿè}\×ÅåË—Ñl6±»»;ôñUA’BÑ÷…W3êñ„¡Íé<8ψ×Qh/o¥ï‹c”oZ¯‹Ïp]ñºz==tXQF)^&úÉ7À2Î,6,ßÌ2ÃòÍ,;¬ƒ3ËJ¬­ÍÞÞr¹Ο?F£ýý}Üwß}hµZCï²4 v=ßjµÐh4p᣻¹¹‰ú©‹mÐñU€<«QƒµZÆ#µp‰­iÐkËåÐpMCUãžXIŽŽÂçVÙ(íG/ùXƙŇå›YfX¾™e‡upfY‰U ÎårØÛÛC³ÙŒU ¥ÐÒææ&677Ñh4°µµB¡€B¡0µªÀý„¾Ýn<>ˆ?ÿó?ÇïÿþïwÝ„²Žë†^S çefËáá!.]ºÔ•¿4)³–ñ§žz [[[== ClmmáêÕ«S}O^Ù¬°»»‹K—.á#ùÈÔÞ“×o&+Ðú=jí˜~ðúÍdÒÁG]¿k>ŸG.—ƒã8ØÜÜœúû ü7O‚Jò²—½ ¯yÍk°½½=õ±Ï ë®w÷*efÇææ&ÖÖÖpåÊ•©¾ï¬eü®»îâ^ÈÌPìííM]qà5œÉ ÛÛÛ8wîÜT×p^¿™¬@ë÷4ÛHòúÍd…quðX•àíímìîîöÜi™DûM¼µµµÇ— ß=GŽ8_tY`g––of™aùf––ofщå°îììö÷÷SOžÄ`½óÎ;ˆ°þèÿ_6LS´acuy`g––of™aùf––ofщU v]·ïc¨-N4Ãq‹Å¡Ž/ä]íQYŸYPXÆ™e†å›ßûy†¸ßQŸði`@±¨àþÏ»¦öž,ßÌ2ÃòÍ,:·$Ÿ8<<„ã8h4€B¡UU§’׺½½­­-4 ´Ûmär9<øàƒC_Ø»º¼°Œ3gë ƒ \î^Gî}^ÚÍœ(•Äñz=}÷˜YQÆ™ùãya‹«O|â¸ýöÛqÿýσëŠõCÓ„gIUŃ֙׿ÞÃg>óyü»'^+I€xûÛg3N–ïÕÀqB¯¨¢Ä{„G ÔRiøâtŽiŠûa²ïxÔûZ* ﬢˆ±¨êÙÜ;Y¾™e†å›YTƒ•š«)®?êÇzíÚµ•LÀ¶,Ñ#•ÚÎxž¸™FŸ³,ñPUq|}=T0ñ/·­a˜Õ$Í „›\¶-þ¦å÷±Ç®à+¿ò pÿý¯…¢Ä7Ê¢†,íöÿ/y‰E9À‹ 3>•Š/ײU«Åï[ä½¢iÂ¥{ÝÆ†Ø˜ígÀêº8¯º. U×ï•4n†a˜Õ¡+‡•âÖ“Ïõk*¼ DoÚ²ÞLËeqCuݸKŠhÚë†Y ¢^ªƒ¡xolˆõ#ê-Н>lûÇsž£xmß÷÷<Š¢@Qض +¹1#BaºÑÜÑryø{Ÿ$z’4œ·5êMõ}ñÑEIßäa†aV Jp¡PÀÚÚ È§­V+hw³ªai7_M Ã~e¸èUÂó<‹ElllÀŸfyH†É8ž'ŒÔõõ°xÔãtp7t=¾vX–…r¹ ×uÎÛ¶¡( 4MƒMnZ†RIl¬a¸ù(¬šnšâÿ½rPÓ 0xÛæT†a¦71«išÐu÷Ýw_줵µ5ìííÍ{¬sÁq–«š¯ïûf|A¶mÃ0 T«Uø¾b±ˆz½>óÏe˜Yãû>*•J`H>ç9áå/ÿV|÷w1TU(Þ†!ø^y}²ÜãŶmÀ÷}8Ž­&Oçʧ†çyÁÿf¾?ÜPò²zÞhÆ.mîòË0 Ãô&f°æóy<þøãh4¸rå àܹs+ëY+—‹„að}µ^%§€eY°, õz=¦8‹Å˜b åÚ÷}(÷ÅL€ëºËã8©ùûQhó¥\.CUËð}ïÿ‡ð3?ó0>ÿù×AU¿š6™òïV]’eŠ¢ô5XÉh¦9U.—aYª¬ù3(=E–ã²IÆå´"t=l;3,ª*^ãyþË0 sÖøþâ8ånN>ÑjµËå°½½íííà¹UE–—§mÛpgä]Ã0°¾¾Ã0…º×yiÆj¹\†®ë¨T*Áûlll T*¡R© R©Ìû«aæˆëºp'ØPjl>® ‘Ç”dÜ:M<÷<ŽãÛ¶±±±óç·áºe ( Þô¦ñáÿ{Ü|ó{Ù¶m;¸&Ó4áP¼åضò陦i}_kÛvÌÈæ°àÅǶ»[¿8ŽðÚ“(Ø6P,wÜö;âºâ8ÕP …IJÂjõ•J¼ÆÂ4ˆ…ZM„É3Ì2bbN2LÖ0ŒÁÅïA÷§³ f°â¾ûîÃîînðÜ#<‚û‡‡g3¢Œáºƒ{Ç-ä…*—Ë0)kHa?::‚,˨T*ØØØ€išñê8*• \×íú«iêõ:ŽŽŽ ªjðÿ£ÓÊ£ãÞL¶ #Ð0Œà÷4FXù<ÏC¥RA½^‡çy±Ù˲‚Ïs]Åb’$áèèõzøÀgñe_öNü³ö–˜Áêº.~äGÞƒG}²W°%IB­VÃÑÑQà^“i𨨨z|Qª¢(©ý"ty_%I‚¢(0 †aà¸_.&s˜¦0"76âÆ¤$‰MSò@ªª¿“ñ÷Ɔ0bé5Š"òH)2ˆ¢„êuñÚõuñ|Vñ’ÄE ™åEQÂve “,+l6 tß: B‚Ûí6vwwqáÂ…À³ {{{ØßßÇÎÎNP˜i•°í°ÅYây<σëº(—ËçÚ¶ MÓ ª*666RÃ-ËB¥RA­VC¹\¼O$ åråržçÁ¶í˜‘©ªêP¡Æ’$u…^Öj5T*”J¥ÓpËþ_v¥R ¾MÓP.—‡ÊÛ#Ï—¢(œO{Æø¾3¼LÓ„$I]aâ¥R)È¥ô~¥R ÕjŠ¢àààÅb1¨˜›v>ypUU ŒP]×£O–eüøoöEù¨ªØ°":Y <‰$IÐ4-5„×ó<˜¦‰õõu(Š\3}'²,gT6ɦkŠz\ÓB¡«Õj07žxâ‰ùþèÌÐ8ޏ×PÁ"Ã[§%Å9ºtU«a%êaRWÊååIqa˜YAë~²ÿð8p11&k¸®¸g Û?»ßûbŽ˜¦ˆ–™%ÁÚl6Ñn·cÆ*qá®dVj\~Vø¾ ‡ @7W iôËå³m;0RUU *‘”{z||ŒR©xO5MëzOY–¡ëúT[hÔj5ضÍIÏ‘¦i$ ¥R ²,£^¯Ÿ¶ÿ°Q,¡it]”ý¨ÁOÅkEªªp]ï}¯áL“wfºÇ1º¡ëzêÆD­VÃÆÆTUí’;òv^¾ÜƇ>´{ïý§a³¡—³T*¯U¾ï^N’‘(¾/~Ç «õFOIëE9²,:.L²,ÞØ¤±«i*• TU…a$)0vÓ6dYŽðƒ<Ûšß7{šŽ#䌔‡QnüóØPe˜e†¢&Á÷ÃþÄÉùÎ0ó„6C'õÛDçÉúúèE÷Få–aO¼qãÆìF1èÄYîFSQ—jµS\É;³±±„Ð&¡Ü¹4¯U2×M×õÀ› Ä %Qx$…øöú¼Y@Þ)26 ×u#ºZ­ãŽz}éû‘e9¨”JÞ*UUcƬ¦i±ÂbÌxX–DqiYVà$tµZm¨ÂH$¿Åb1æý$ïh¹\Æ7~ã7â¶ÛÖ ªùÓ×Ð|Vp||Ë;¥Ï¬V«±9àºâ5´!Õ«wä°=%G!Í Ü+ª€ä¹T*¡V« Œ>`²‡ãyóÙ¹÷Þ{;÷ÜsOçè<ðÀ{î¹§óªW½ªóä“Ov²ÌO<ѹtéÒȯ;8èt4­÷qUí¸ONN:µZ­£ªjG×õçkšÖ©V«N§Ó©×ëUU;år¹ë=ËårGQ”Ž®ëY–g{ ȸò2Ozè¡3ùœããøßº.ž;9¡ÇIG–åÎÑÑQpÎÑÑQç8ù ¨×ãã(—ûÏS1†NG–;Eó¶^œ„ÿ*J÷5.#g%/Óbçd/êu!‹µÚ¼G²¼,š¼,Ú|\vNN„þE×»ŸÓ4qoIãøXÜk’Ç«UqÏJ~^š*¦iâ½'ÕjÎúº;ï¯h$m>f’Ò]¦õ£Ü“êu!ßµZ(§Dš>v|,æP”4JU;ÝÝæÈò’Ú‡Õq4›MÀææ&TU <®Ëíz¥1i)òõõu¨ª;{¡"4T8ˆŠÇ ¢V«¡X,²¬À›• ¤|>Êñ侌̰xžh‰¡ëbWÌ4üÛþó¯?õ¢*Á±YôÒ-Åü¤üŸA9EІr%+Ö‘WV’Â]o†™žzô†™>ã¯å®zˆ¢Ø6ð=ßó1xÞßQ-Š"< i·8 s¤H=*€t‡XRµoÛ=^toKt/—FãWÜ3ﯙ™ä•§<èQ¨TÂô“è{ŒZÍ×0„+J÷\I‹’e!Ï™êºñˆ8× Û³}Ý×]ǨYy·4›Í®PßdHݲA_è üTßßO¥¦i0M3V¡T–宼Êa¡ðÝaB_)Ç“a†Á÷Å‚st$Âk'T¼@U}üäO¾ ßú­¯D¹\†e‰…p𡵆!æ¥ªŠ›8Ìõúh¹?l(0ó€6pxÙe˜Ùáºã¯ñT¿ V÷Ê-ÿzøîï.…qoKkoMs=néyb³·—ú\.‹{½&j¼ÂРÔ[o}vÞ_33'MÓ rϦmüK’ÄmY˜©cÛb^T«Â@¤2å’ûÁ «U—Ëâ凎*’ž'^«ªb¡4ÍÐh&¦XˆšafŠeÅóÖ†™ –% ;º‡ŒRÑ7Ù.Êq€Ë—Ûøð‡€¨R,ƒâªæñ¾/"€Ò<£ýP”07Q–Ãêô„,sÏV&Ü ÇË5V‡}´M™qu¯¨þFÕï Êy•åñ Ö›MÓÄ£>Š|>ÃÃCœ?;;;±~ËF2‘¹×¸—ȻʅR˜EÂó⻽´«çy^ÏÖJš&¡bq´zÃCiaÔõÙ÷ñb˜Y!I\H‰af µ"Ó´°Ê{¿¢1žçÁqœÓÅù¤×‰Öw¼ç=¿†/ù’¯ÂoýÖA=;Í­zJ¥2¾Bíå-Iq“Rq˜Õ†ô*@È„m§;ÐÖ×Ã[ ,²”„Þ£×fõœ'&M‡ô}ছÒ{Ows8`/^Ä£>ŠB¡€F£±ÔÆkÒÔë‡Ç‘iÛöT{”2ÌYP­v/&Tušª0§Q.‡ÒõõðyûÙ4·¨/stÄ}é˜ÅÃuÓdR 'Áê§y3 xW‰j5ÝÈ{ÃÞÛnûCT*8Žƒb±ˆ_ø…ϯ5MÅbñ´_wïyÏ×ó7i°F7RGÜËÆ5,£kD­&º<ÜqÇ} df1(§ã!—åxÝ MKKOöRµ¬tc3éuíuQ˜I²=íÍ—Ô>¬ÑÖf³ Çq°¿¿‹/.eȰªöÿUq’$±w•YH ÃÚ I’˲ppp0°¨’¦uï¦Ñß4*•0G–#Ú™E„ Q²j5,L1%¿R©@Ӵ̤|X–xœ& XVz0…ÙŠs,üÎïÈ8wî•(—ë§÷' ßõ]ßs羕Šß÷qtt”:×H {º‡ÇLs|ï*Ý[-‹BƒÃ–„dXÓ\{òÉ'çýU3#`Ûáfæ0Æ!¥C%—VÊûŒÞOú…ò¾/Æ S'-áT0Éó¦é6‹HÀ`m4( ˆþí8r¹Î;‡íííéúœH\êµøŒZ™Ë4MX–5צ 3TìÁ¶mÕ¬'éš´q9\’YtTA/`ÇIåb\ÜÓ­ñ¨Ò:olÛ†,Ë™³¸˜æhQT )jOú~÷{xž‡JÅÂ_üżãÏ=Ÿÿü+øÿñæ Ú¼¢(øïÿý7Q©T (ÊÀ(8QXÐ:í. E «ýÒg{§.5ròPI’‚^ß÷Q©T‚‹Å þû/à®»|œÞëõ:,ËZʈÆUÀ¶…Ž3¬w’Â|Ó Öa–ÛõõÐ8%gÀ$Ë4¬Y½¥ÙlB×u´Z­Ø S×u¬­­@à]ÝÜÜœ÷x§Æ´ "B,²,÷ܱc˜,㺀ªÚgUÓ4VRB©¨TºÃ¯¨’õ´p]Š¢ÀuÝÌ̽¨B>+|߇çy3iÅL˲àºnÐaø×…­.½Ì0Â. QçB2´Ñ¶m†]×ñü/ÅOþäá™g®¡^¯C’DÆåçYP«I8MÓ°±±jµH²mà7~ã:*•7ÃqœSï«Ïó`ž–Z¥;2\5MƒeYAZïû°, ''?ßå܈¦ÝlmmÍãçeÆ€Ú©êpÞIª4'œl¹E×uäóù`²Â@ÍçóØÛÛC.—Ãþþ>vww—Æ`MzNG<’ϋݡ'¦i7qR0h·n™[1Ë -’¤„0 bg“kíyt]Ý‹ç …/ÒýnVi.äUªRê•™i!‰á1+0¾Ça¨ÕjCÝ7hÇ¢yzáyâžtp "Ȇ£ÜúÑ_Äññ1jµZ׿*…º§iœL-EQp|| I’°±±‘ÚÚY<¯»°ëvç­ÊrÚŽ¾Ç0us’áÀI(ÅbÇéº?™¦‰›nº)D]$|߇išX__G©T ÒÊÒ Öõõu†7nŒ<®[Úívì‰f³‰|>¿tFjÛoÇœ Ò0Ì¢#IbC†nì¶móF Ã@(¥R؇x6Ÿ!nä¤üFCnûå±R¤a6—&É?%C‘ èJ¥³,+¦¸†Ó4‡ò:%Ï£´MÓ`Fðž¾ïŠOÒHbf…Æ[e¤«É+É$)ÝŠ‚ àQ2÷›>†iš¨T*(—Ë(•J¨×ë©÷#Çqº6³Ç νpá^óš×Œô»ÜÜl6Ñl6ˆp`ÇqbÕ‚¡ÀwÞyçHožUÒŒÕd í´ ó“Y$)¾8r80ĽƒfœÜ*üI&aYV`ƒëºey,/%yW ‘#(Z8Žó2éº˲bŸcF×=?ªÄ}@¶š¦¡\.ccccê¡eL:®+î ôÓ 2V¤†ŒaÕRÓìžKŠââŸþÓ÷£X,âŽ;î@±X<õê¿÷{? 899ªªxÎsašjãb½™Õ†<ž$Gž'ª*òT“jRÒ€º«[G‰ö¦„±«ëÃw%IAëp¹\îÚd4M3ˆ˜‰>¢Àpí­Õj¨×ëp]årµZ ²,£V«A’$‹Å¾-%IêZ·ic¶Z­â]ïz¾ök¿v¤ßæ–B¡€­­-lnn¢Ñh Ýn7ªv»+W®`gg…B!¨¼è¤ýØÉèäî{ ˜eÁuÅ¢L›jÑ]/†YE\7ì‹7Í>Á¢åF¥Ë3äyTU ¼HQƒU’¤.#“ ‡££#lll ,‚äû~à‰§˜Zt<@h”PÕÓ(þeš&ªÕj (‘‚DF3ÞK)á¥ô}P&*|S*•¸ÿ î ¾íÛþ¦ùZ(ŠÒe(Ú¶8ª¡jš†J¥ršï)ŒS]ï®zj\×ÁßýÝ{ƒy!:SÔÑl~!Þð†ÿTÏÕê‡?߯˜AAÚ¯¸Qt¹Lþ= i…[“΂I MAMÓÓÖÉR©Ó4!ËòÀ‰b±8älÛ Š‘š¤Z­¢:ÀÂV¥ë>F÷½q¹™ ÑÂ-ÀôÈ#€ªÜ¢@»%I’¿ct‡óWfÑIæø†Á²Í¬4êU­N7 Øó¼.ï›QOjÒz­(ß•òhF1IT´ •¶Ë†ïû0 #ðú&=¬dTS…Ø$dˆ–NC•¨ OÔxÕ4-¨.KŠLÒ³ì8LÓD¹\”+ ž¤€3±a X,âž{Žqtt„ƒƒƒ.%×qÄ=„tѨwœòW I ½«æíû>êõ:>ò‘0íLUUÜq‡‰ZMæ>öÌLéU¥Ú÷» …M årG™f?zêZR,Q*•bóµV«Q-äMC’$Ôjµ ¸Ø,çEýŒË͹\/^Äã?ŽG}4Öºfss?þxÐÞf°íôdhI ˦q+‡3Ë„®‡ ¶aAhì´æW«³i]CFg4ü—zŽ’‡UŒ£Û£iY666Ÿî?ÉB7~›,ŒD-׆Éc%C³X,Â0 H’»ß)ŠxF{ÝE¿J9æS•ŠÈG¤Moò¤¦]3tJ¶I3ÒþçøÀ¦ÿ£­ Žã`ã´ ßÑÑÞõ®×öü©`tFä {©sÈu]T*•À‹UžI4é½Þð†0﯂YrŽ©_(TLiÚ5ÞÊeˆ‡))]Ç©go©Tê*JGQ4ÕjGGG8::Š­§”ÿß+Ç4 µæ¤ûÒ´ ~ÄĤíÑnDŽê•+WçÎ rX—%8Šï§+&² «ïs80³¼„¹÷?dVrJÎ:Œ<©t£v]70ɓ𼉓1›Ö2^gYdY†ã8‡“Ê;’ey`ΫaP%óŠ?JŽ©_…T¾w]躎b±Ûµ§ü*ß÷»)s*þDŸ\×ÅýÑakkk¶?Þ’CmI’pîÜÿMË›÷i¡‹äAM:ÙEÁýÐmxì1ñ7)Ú¶m!ãižÓ ûKrM#f–x^<·Ôó„Ü•Ëa…ÞišÉ²ï ñÿAulÛ>m¡$Ò((ƒî!Ñõ5¹á—¶Á4Šq8‹ê×}A㜴—÷-»»»ØßßžØßßÇ… °½½=õÁg2X£a-Žãpu`fi0 ±hÊ2‚¼#Ž`V* 3KHɈ*¾ïF(žIÃM–å¾Iår9á%ïëÆÆFàÅŒîd÷ó°R%ÇègõJÿ9#–M’¤®{')c¶mw1ý*±’ñKïüÀ ÓéLé—Z=*• <σ®ë§AC½‡Zg$õËhˆ/F¢¹®†7~¾ßB¥b…¹ú*©šê[ì`f õ›'9£¾½–ï˜0m¨ ð {Ò2êõzל¡ZQ¡]´â}ÚÆì¨Ü¼¿¿ÍÍÍ y÷ææ&ö÷÷‘lw³,ôrùSˆ „K–Åγa©y ³ˆØ6õ³‚o†YEÊåÙµ­!hw¹WE` õþB-t]‡,Ë×ÕuÝÔ|بrC»øÅb•J¥o~Ó¬(—˱÷a ¼ßR©UUñu_÷ug:æe‚”ßz½lžœÖY æEZ+`òâ^bša¿W¿ú÷`Y4MÃññq ›ýPÕÑ*¤2̨ø¾÷Ê.Q«MÏX¥ž¤IC’<«–ea}}ØØØ@©TŠ¥vPžÚœI†ÖŠëò3ù- H}Æ'áf@ôÃ!ȳJ­n– jŒDÓÂE³ZÓð¯œÁÚÏÀ§\ŒO~ò³ ±sÁ0£àyÀûÞ÷>ö®2+GÔXA1ÄŸéÇ<¬®ëÂ÷ý™n„’Ò“ôèzžDVðFìjbš&ŠÅ #¾öÛvº!š¬Õ¥ª\‰É.¶F‘M˲`YVѱ±±ª‚ðÞjµÚµ¦V«UÔëõ¡Ó†ÑÇ¢!Á‹w2Me\nZ~ww<òHì¹½½½y_ûÄ$©VÅÎâ'>q„˜óV™åÂó€?û³ßÀ»ßý¯ç=†9SÎÚX˵¤ö5Óºq÷BÓ´ÔªÀÔÞ†[X­&Ôfæ oø¾ø‹ãÇ,+=D>êˆ÷}¡;±ÁÊdE™^ª«Ê[­Vƒõ{Ç4Ït×yEQ‚e‹ ˆÈŸ´JøãpK´à‘Ïçç}3Ã0D˜@ô}~üãÀ»ÐÌÒñÒ—>‚òOnaÙfVŽd;޳ šVBÊÆ¬+s“r•DQȲ¼0J3]¨·®maL©§½4±ˆÎjåË+“E¨ø×¤X–Ó4¡ªjWÞ©ªªsÍý—$)ØŒœF‹˜³‚î}ÓøînY¥~«ý¼«€ØiòýrÑ 3*øÀïpþ³’œ…c1­:/AÊU>k4M㪅նí ìÖ0 ȲܵÎ^ÎÅ`‘d»4&Š£¢’â(Â`U”³iÅœË ãÔxX§¨ïû(‹PUŽã$ÞËYˆõYQض½Æu”iyƒoÙÙÙé)Œ”d¬ë:vvvðøãOåC¯]»†ÍÍÍØEär¹àÿ;;;X[[ã>Šv»óçÏcwwÓÈ·í'“Õª\ß_,a`²Å<å›pÝpgœrëî¿ÿyóþj˜%! 2>,gáŠ6‡§ “É´MÓfZp‰™‹$Û½ ªÀG‘XÉqtFË:ûbeÌìYd÷}ÎîûÃW&cµ\.£\.£T*Å")g]½}Z(Š\Ç*rs«ÕÂÃ?Ü÷¤ . Õj»-“rãÆ äóy …àA!Ç­V FT½8—Ëasss¦åø£îõ´P.†…yË7í<’X[–…où– °íy3̲0oŸ6®ë_ê8N¬‘<Ý3è¾Q«Õ&ÄŒéfd;êñDtÙ8T«\xYd—$!“õúðiT¥– =j3F,ŠÁJ÷™UµQn^[[ŸN‚Ün·§ò¡F€KØÝÝ þÄÎOô3éÿÉüÚqH ûvÅb1rŽ·‚»xÖç=ˆé3/ù&|?ÌÇÄb|÷Ýß9Õ{a–yËø°¸îpʺišAnÓ lÛ†eYÂcÛvÌS!Ë2:Ι_+3E¶ûašf œ{žØÄÞsYN]ÆGq0:ŽÇqbí¾’aÁ‹T·FQ”…)¸4mn¾qãÆ\>˜âçÛí6¶¶¶pxx}'E?ƒùÏÿüÏñû¿ÿûØÚÚêyŽïwïú¾Ïó`Y€ÅÜ…Ç0ZϘÃÃC\ºti¦;§-ßðÔSOÅÞ«Ô¾I’ÄÿEô™]*“A¶¶¶põêÕ™~Æ<ÖðQvŠ»® UUcžÓ~çÖëuT*ø¾x³˜³cww—.]ÂG>ò‘™¼ÿ<×ïI¡ Òc$iøÐI&Ðú=­¨Æ4aýNR© ¿¦a•ìh% ^±}Q¼«D¹\^x…tðQ×ï[Úí6Z­V_/+í¾DcÜÇ¥ÕjáÂ… xðÁƒ÷[[[Ãîî.677ûNˆ7nôÃË^ö2¼æ5¯éc¿±‘þž®ë¢Z­9Gìa=CˆúV†«sEF×0ÑÞéy)çnnnbmm-ÖÂiZÌJ¾à®»î Š"ô½tO]’eàÉ'?Y–ö f–‹½½½™)óZÃÇ ÝtÓM=½ž¤°T«Ulll{žÇqbùBd (ŠMÓ¯ì¢+‹Æöö6Î;7õ5< ë÷$8ð³?{{{¡ÌJ·¤Y4hýžEwŽEZ¿“ŒªÇ˜¦ MÓRuz2Z©Âû¢° ›£ãêà7çóyèºÞSHÛí6vwwc1î“°¶¶†ííí˜ÐÓ$i4}?c’ÒÚ齚¨¡m2¦9†ó²Ú6 »»VBht~€IDATàž!ó’ï(ÑÊŽ¿ó;Ÿ kVZ˜iê#  g+€h…ÈZ­†R©„R©„b±Û¶aG’¿£ÞÔjµ Ïó–B`Y—íAÞ¥?þã?ÆG?úÆ@ž=oüüUf9ɺŒ÷c”~«”¶ ŽB:ÿ"õ4]un¾xñ"Z­Ο?ÃÃà  Õj-mZ­ÖÔvMFWH …%çr9ÜyçÁçƒ<Àý0 ¡À÷Ú@!j¹\F¥RY¨–…ƃ0Be ç1u! Üa©a.݉ÎZ¾Ó ½\Q€¿ýÛÿ I’àyl°2Ó! 2Þ ËŠ+õ¾ø¾Øˆìe°R80€ÀÓªë:ŽQ«Õ`ÆiX½ß>vpp°²—‘,Ë6 *¿÷3ZÊxûÛÃ4/Ëâ°ÛÛÛØÚÚB£Ñ@»ÝF.—Ã>8Öç *6TjGÀœ2†7>É»:ìšR0n g\‰ý¬å;hŒëºÐu•fjdAÆ£xP,ŠÊ‘º*ôõ &¦à†®ë²–,ÞÁ,Y“í(´ùž¶–;Žƒw¿û/°¹ùÿ;o¼øÃY–qËíj’{‹ž×;R€ZÖȲŒz½˲b-ú¡( « Ä-Ñ?¨ÅMòæÞn·Ñl6Q(¦ò¡ÛÛÛØÜÜ &Jò}óù<{ì±`h’ÏÆ»J°wõ ‰ŠXý K¡vй>BÏê*g+ßI’ ûûßÿVH’ÄÍß™©2OObšÂP%'ûS–Åæe‰ð/+eÛ~˜p01Ê0M“CÇV€,ÉvÛ2-ÂÜÃuž*¡~ó7ÿað\¹,ï‡ÕRfZÌKÆÓHê(¶-”j5Œ2HkYCÇ¿™ s¬Y’m‚ŒTßa藍—J%èºÇyi°ÆK’8Îk>Ó‹¬Éxr³Ýó„>CÞÖ´tT˲&ò’³ö3Ùáæy`Öô³±{áXZl ïuôXCž‹ÏuNÇB ¶K Ù °ƒÐ{+Ÿþ½bТï8¾å[ÞÆ•"™¥&-JWÓ„¢#¼Q>$I‚,Ë])£ôàãfžxžØˆ «^‹pHŠH¶ü`c•Y$’ò+Ëbm?:ÿ¦-¿–e±Ñ¹B,µÁ:È]¹rÖ*†/Dd#nTöÃÅh†¡‹áóW¸AÛÏ¥p`Zô$Ì-,x^T«ñP1R²9‡•Y6|_xR{qtO ‰Ù¯a<³ÚDõMþõ¿þ$,ËB­Vë›ãÇ0‹@¯«’”ì8J]1–Þ`í'Ë+W`i”Pàhî lŒf€:‰ó- oHKèÝ_5 õ‡Áúâßz1g¹•U×u!I*lœÃÊ,¦9ܦd4\,UcÛöjmX2 KR—)—Ûöñþû IROeŸa…Q7]*• {WWŒ¥6XÁE—VFa¡¼Ña<ŽþécØÅÃÆðÞL q£’Æ–öZ@²”¹ŒÞÆ­ƒ¸QÞgü·¶nÅG¿ö£#~‰ÙÆuEµTÂ÷}är¹J=Ã,"Š2x#&Z§@UÕ˜Áê8NW›†É"É"wÏůÿú×ÇX”™Ef”:†aÄÚ1«Á-[[[Oj·ÛóçLX©V€~úyL ã²ßš@!¸dtz'å2Bµ”÷.§œÛëgK¶0Rzý©x ׯ\Ÿì;ÍÉßó<Ôjç°µ%òùVeo†Y †QУk¼,ËAH0=Ï- ˜E@QâÞ'Û¶ñÖ·VQ*î™b˜,2¬³Ô÷}ض£££y™9c†ò°Î£"Þ4päU®Tì» a¼ ê 4EM“q;ȰB£6y^¯ŸAC·q«H«ú<ÊÞð¹¹ FÒ(%¥|•ÄœY "-û¢‘e9˜¶m³w•Y$)ZlɃïûPõºxÞ¥è!Ãda—cÃ0 ë:o6® ·ìííÍ{ 3£\î¼RÞU 4òè²û1:úz'cTÆàb Ý^PK2Ì×âý"Ô8í½‹§ÏK‰sjü¯2‹xž0X©bj´àR¹ÌÞUfyð(Â×qœÓȱa ¬0ËÄÍÉðv»F£1ïqMYîo°®Lü;å¯Â0´;Ê×RGhXò\JH7BÓž« w®j²E‹‡Þžc%å}–POµ¬¸­€Êu ˜e¶‡ß÷}?V§@’$8Ž×uW+†Yh¢iѼÆ3‹ µ „iš\hi…é ~ä‘G0L^ë"`Y½«Hºî°ei—¡gUAÿо&âáµ£ôïì¦GžÍ^x)ÿïe8»‰óË}®©š8¦b4ƒ| öQf¥ä›Y)¢Þ¦A¤yX-Ëâp`f¡ ݲ,¨ª ‡”eŽ aYœºäœæø­L¡T¦‹¥®lÛ½=¬É÷¥&Ú›TFƒÕB>K÷bñ×~|©’p/¢ùó&º 0E9@è•%w§2Æw·ø~wøûJÉ7³2PûŽaÓ—’^TEQ8•Y8hƒÆ¶í ˜a–…ry°ÁÊÞUf© VM[ÒVçô1ì¹jÊs½ˆæ{½½‘ÞéyÑseô.Ò¤£¿Í1íâ›»RIw§(§c­(…B¯ªbÕsñÚj5\k [×J%Üý‚àä«¿z:¿ÝI+¸ÁáÀ#âºá6ï ÷ç ƒ“šÞú¾0"kµôüšGôŠ"ª llÝwkÇ ÇãyâG¦×)Jº¡K纮xø>¾ï¯ÿzÞßìÜáyÀ,2®+¦ÿ¯þêO±w•YJzEÍ”J%èºÎõ˜€[.\¸Ðõd¡P˜÷¸fJ´åÇL&¼†e´õÌ s5ˆþ¢5ôµU¼â)ÀøùDY-îaõOß÷¡ÁçºB©þúN|\ÿÏuÀ}¾ølßkRàµHÏG¾ýÀπמv8*ðår¨¬Kî[îû—âoU;¡E¦(á5§%¹#Q.‹±ëøP"†„¢ÇǸÚhà£W®Lÿ7?cÒªë-l8p¿¸ ^gýà`pÕ†^hšx‹ƒKÐJ’87z‡5MñÚz½ÛhÉgݯĵÚv÷5“–:L% :ß4CÃVÓEÁ¯nmáOö‹d úЇÅq.PÃ,4–|Í×|o»Ã½'™¥£×-ßu]H’ÄÆ*ã–íííya&ô«©Ô+TlªTæÒÁmöW,)åðô vsXœ(í2È(´¬PÁŽÆÒ%‹6=ò,ðåÖ_*¼Ñ›ã]4N¨HÀ«?”Þ.V›S庞(S{z=õnUä%aqR­ŠÏ¤$ÍaU-vp!Û&úÕŸ‰|O ­õ.\¸€|>ß3\6s…ŠÆdæ!Áê鯍P«úB ކ´&Ï„q—æ5•$ñø"øâ·æUO/Ò¤(Àѱè‘úÊ>ã£×~ñÿüï/ªoî}=„Q]pð@ÑûŸ+^{´*/ž3!é0ßâ†zÞ ’$ŒÁè\¬V…±˜Œý¬TÂÐÚ´ÐÛ$µÚx!Æ’ÔÛ#Ê, ¶mãèh Ë‚3+ƒªׯ ž·› 3iY/Ü'›éÅÍŽãàüùóh6›ÈårÁíím\¼x…B¹\—/_ž÷X§ÂÔ¼O•ŠLƒlÀ,<=@Üh«TNC!zV_ð1àáŸë]!×ð;NÃ}+Àãê]1t7‚tò—¾t¸62%é ýŸ®½Ž®ª¼ÌôI«¿3•Ö5¸RsÕª0Msp5[Iê6(©xVòµº. ÕryøØÐ¬VcÆÂóÄr9,´!)CÌ0C–?ú£ßÏ^H†™i{Ú¶mst “Ê-;;;PUf¢=C>ŸG¡PÀææ&t]Ç#<‚EÊw¥âšI&ö®RUP*Ï-ÒâAx«§çYfóüL]áŸ}ÀÏ…|uPß”2pÏ«$‰÷ó<ÀýêtRÂ`Ã’Æ™D¡½Ãèä]f΄™\²,ñTáÆ0Ò·HeYÈ(UÏõ<1úåsFI;gBœ™™@àÃbÛ6·ÿÈ:–%îyE6 šçT›ÀÝׯãc¯zÕ¼¯d&˜¦P/\×eå} Â“¾/~¸Aëɱ,w§œœÎûŸ~zÞW5SÒêrí%Á÷C§¥h%xþýÑÈ­%oi·Ûxøá‡ûžôðÃã¾ûîÃææ&ÖÖÖæýU E¯5~¬‚4ë8 oõ…‘Iùjžþ ˆ0Xa©¦Õ£tãQ–oøwÀÿ}›;¨Ê©‚и¤Ò°i!Ä„9±ý é0‘¬ûe²£L¼!cšâfÚ/ì6ºiÓëJ:4ŒxŽ*ÃŒˆãŒV‡Ê¶m¯Hѵ©ãœÞ¢ ·ë†•Ýi.G¡4€^¨j÷¨ªñ›´m‡ùë½6µ #Ü–e\m4ð©%¨ôž)ógÖÑ`!’Z~â”Vð‘d˜dÜ÷{ߣòîyq#U’ÂŽ Q*•°v‚ë†Å'ÒÞÿT¿üýïÿ~¼fÞßáŒðO³ä’zºa¼A3+hmΉA…!GœGN {^<ÑV|€ø¡£m&OÉ]¹‚/Q'¼emm­Ë¥0`‚Ž_»vma Ö^ŒìòN.Ù7^ í%/+-Pe?ñ€õÛ¡W•BÙÒŒ¾/úN`€úºð¹ D˘4’ÃNÊš‘›:L®ºÙ]ÒòWdž¼ýŒUÛ‹Sµ:8/YQFëCÂ0)ô+ —d)½«TlŒ”ã(t"e™p]aÒqM •Žh´CZ`Ò& ªŒ¬OÐû§ýHic¦qGÑ4ñ>Ô"*-^–ÉK®×ß÷–ßÓD ªËå*×Qù"E:í5Å¢øW’â­¾€ô÷º7I,«w«²qjhšØäÕõ°‚ý€EíÙ[o=ÃâìI:–)ÊséóWÇ©­AD{­'ß/M†Éa„ó7n’$#_È)ÑoýSÏ{ê~×GÜp¼åÆ]Oîíí÷efˆJ%¼FqÝ+îE+ñ¦5½Œ¾—à|A|‘£pÚ$½ŒË´jòð20 â 2Ìd–d\`‚‚KÑ0àä¢Dí‘(ì©^go)“I,ËÊv èn5µ5’eñy1“»ÞÑVJɹM¯I¶ýRàä$4L3|Ÿ~÷¿q”ñ~ëÍ(ëÄ ‚g+äy‘eÀ²œåò®Rá< lq„ÊpòZ)yžw"U&IáN“¯~Êó°ß'ÉZ±wRŒ‹ª†GC«ËN²;œëº°,k¹ åÑF]RŸ¢útŸ¢{õlOs”Jñu>j‹ô2$Çé\ñn·´Ûm4  …ž'5 ˆy]³u¤˜Â9È ê¸]¡O}ÎOÓ«Ò X éÆªÚ[‡(Þ âË,i÷бrž¨êÑQoÊúºíeº¡0™‡îÑÃ`YdY>;«ëv{ÉóC7 ¤ ­r]±D;äi¡³@zï_‚zZ÷‚Ú?eX aB,KˆŽ®/@þ*µKó §yïɃ@ó&­þAYžo„Î(þ†#Ž`ÛâÝ“ó}¥R ‹W(Ï0„L“çÓ¶ãrŸÖzjÐØvxŸè¡} ƒ[òùïå@äÃRÿÓ~C„ €sRW J±‚èU:›N&£ kß™¦ MÓ¦›óçyB‰qœôÍž,{¿˜…ƒÒ8E„øæ¯nl„•1¨ø n¹,”™ènŒ‰¸‘šìy ˆszE"$#T…0pÍÜv†™® ©Ñ–T¸Å0·®a˜Œ`Ûýƒh#²«ò;É9¶|±íxÔLœcĘ̂÷É÷ýÑóW“½A©Ÿmš[ «Fó† sÐþxZ8ðX•)zŒ"a$)ô &õEzP¥2xýg2Í-“¿EöHö0w]ŽãÄ'†euçfP=ͪhX½Ñ#5-gpÐæ¾‚ÐS;ÌÆÎj’c†€Zq‘¾áœî¤«¡ÖÓú¨ë¢ÚïÁï¤3 Á mÊ]íB’ª׎#æB¯*Ø 3g|?ìeÞ‹’vuHy'9__OWâ“¥Zf†ø~÷Æ#mÊŒT˜¼¨–¶ æõ\)})XJƒ5ÙÒÆ0 T“³Å¶»c*û)ñT‘7É0) DûNd¦µì"ºÂ"-+½¢ÝÑÑbUidVšAû*=wè£s½¦LƉ.ÕŽãôq§6Hªš^Á69aÒŒÒ^î.†™Ѷ¡U RzxÓ}%¹yÞ˜5©½ùløæ7õ¡Ð+JÈVÊ] Ür†™*Ž®×îiHA—Œ§Ý ØXeËê.,?nAUÕÅëÝÇ0¨m/‘ìûaÞ Þ»Q¨úo”^a 3#’©Ô”Ê‘j°ú¾XüÓ T«âÁÆêJ²ô«mÛÝ»•–<²î1D¾©ôž¨ÑâI.†‡ç3ehÍî*\@Z>/êÌC- {aÛöô v0Ìœ .@p`Ç [ÏLnlF«;1ÌŒ±íî:w=ÃÞ CÈ:åa3L„¥ ŽF€¥ö3;ꈪ¾6ÒóO+¹¦É{2X5ˆJ<Ÿ˜9A©Jžçuéå]e˜Âu{·¦œí˜'*™ØÍ0 B´:°¦i¨4M±Ð“¡:it ¨¡÷d˜3@’ºÕ˲P(ð}QÁWU¹Ç/Ó“¥4X‰ž¥áÝ¢ø·„nƒÕ„0FÓ61ˆv3«Ì\¡Ÿ:Žþ+¾ç5¸éþ·O†ÿzþ<Ûµ¢bQôLÈ8Ë&Ë8gÇaRùþ·üG|ì{¾g¡[’-£,,Û|—IåûÖ=Ž¿úÃK¸íÒ¥…6V—M}Îf2‡µß¤h·ÛÈår]Ï_¹r¿ök¿†ßüÍßÄÝwß <@ûôàÅy_Ñx\½zu©&Ìõë×Ñn·ƒœˆyñ©O} ŸúÔ§pÛm·a{{ûÌ?ù€øÃøÖoýVÜqÇxñ‹_Œý—¬ÈÃ4ÉÊœ½zõ*žy晹}þ¤køk^ðüã'ŸÄ|ä#øó½ÏÞzküä/üB|æ¿þW|惜Û5CVäaZdeÎ>õÔS¸~ý:^ò’—,Ì]¿oºýv¼èsŸ[hÙÈŠ,L“¬ÌWZ¿ßýîwã;¾ã;Îüó']¿¿ì˾ Ÿ½ÿ~ä._ÆåË—Ï|üÓ"+ò0-²2gIuýΤÁÚn·{»qãFêdyðÁ±¹¹‰µµµyj4›ÍØ×¢Ón·qãÆLüF4–y}v/zÉ7üâ/þ"ËCÆÉÒœçM‰×pA–äadiÎ.ÚÎëwöÉÒ|çXxýdI¦A–æì8ëw& Ö~Òë‹Îår=ýEe™& ­ßhžcG¾½nÉ’÷¹ÔãýŽ s|´Ûmxž‡½èEc)‹×ÄŒÏ$ò=ã³ ŸŒ/âõ0“ÑOÆQx gˆU[¿)‹×ÃL¯ßÙ¿¦¡è03ã]ïzWçU¯zUçž{îéÜsÏ=‡z¨sýúõàøµk×:<ð@püÍo~óPdž9> ®_¿Þyó›ß|æ<ÐyòÉ'§6æy\3>“È÷4ŽÏ‚~2¾ˆ×ÃLF?_Dyà5œ!Vmý4¦,^3¼~gÿšF!“U‚—v»ÝÝ]looÃu]\¾|9xŽØÙÙÁÚÚZp¼ÙlÇûæø,ØÝÝE«ÕÂåË—áº.ÖÖÖ°¿¿?ô˜²xMÌxL*ßÓ8> úÉø"^3>ƒd|å×pXÍõ{И²x=Ìøðú½×4󶘗•'žx¢sÏ=÷Äž»téR硇êt:b'ãž{î‰í޼ãïèÜ{ï½} zí¬¸~ýz×g^»v­séÒ¥¡Æ”ÅkbÆgùžÆñYÐOÆñz˜Éè'ã‹(¼†3Ī­ßƒÆ”Åëa&ƒ×ïì_Ó¨Ü2oƒyY) p]7öܵk×pûí·ÿ€|>ÏçóhµZ} zí¬ Jqù|Ífívù|ÛÛÛC)‹×ÄŒÏ$ò=ã³ ŸŒ7…»f2úÉø²É÷4ÆÌ2¾8¬Úú=hLY¼f2xýÎþ5 ¬gÀÖÖV >ú(ô‚ãããžÇÚívß×R¿¬isãÆ®k¹qãööö õ0cžÇ51ÓaTùΪ<ô“ñE¼fz$eÜqœžçfUx gÒX…õ{|gQçb¦¯ßÙ¼¦QáÖ3àÁÄ… <ò!½øÌg>ÓóØ7ú¾–„zÚDwwüq<þøãØÜÜÄÖÖÖÀëfÌó¸&f:Œ*ßY•‡~2¾ˆ×ÃL¤Œ/¢<ðΤ± ë÷ kÊ¢ÎÅL^¿³yM£ÂëP(°¹¹‰‡~8HŽºÝÓÎïÅÚÚZß×®­­ÍäÎ;Aø v»F£1pL“g²Ë¨òUyè'ã7ÝtÓÂ]3=’2¾lòÍkøê² ë÷ ù΢ÎÅL^¿³yM£ÂëŒØÝÝ v>Ê€;ï¼@<ô¦Õjamm­ï±A¯ýÂr¹ÜÄcžÇ51ã3‰|Oãø,è'ã/ùËîz˜Éè'ãË&ß¼†¯«¶~’ï,^3¼~gÿšF… ÖqîÜ94ÀŽã?þÚÚ …cÇ‹Åbßcƒ^;+òù|W íÝÝÝ`gfÒ1Ï㚘ñ™D¾§q|ô“ñW¾ò• w=Ìdô“ñe“o^ÃW‹U[¿Éw¯‡™ ^¿³M£rS§ÓéÌ{ËÊÎÎQ(píÚµX‚4 bÒ·¶¶°¶¶$5ïíí!—Ëõ=6èµ³‚>“v©F¹žig²Å$ò=ã³ ŸŒ/âõ0“ÑOÆQx gˆU[¿)‹×ÃL¯ßÙ¿¦Q`ƒuÆDK¦§åI´Ûí`(y¼ß±aŽÏ‚IÇ”ÅkbÆgùžÆñY0Ë9Éò½xô“ñE”^ÃbÕÖïAdzx=Ìdðúýk6X†a†a†a˜LÂ9¬ Ã0 Ã0 Ã0L&aƒ•a†a†a†É$l°2 Ã0 Ã0 Ã0™„ V†a†a†a&“°ÁÊ0 Ã0 Ã0 Ãd6X†a†a†a˜LÂ+Ã0 Ã0 Ã0 “IØ`e†a†a†a2 ¬ Ã0 Ã0 Ã0L&aƒ•a†a†a†É$l°2 Ã0 Ã0 Ã0™„ V†a†a†a&“°ÁÊ0 Ã0 Ã0 Ãd6X†a†a†a˜LÂ+Ã0 Ã0 Ã0 “IØ`e†a†a†a2 ¬ ÃÌ Çqàûþ¼‡Á0gÎ4eß÷}8Ž3ïKb†a˜™À+Ã0s£X,ÂuÝyƒaΜiʾëº(‹ó¾$f…±, žçÍ{ s&°¼Ÿ=l°2 Ã0 Ã0ccÛ6+ðÌÊÀò~öÜ2ï¬:žçÁ²¬®çËå2dYîäȲ UUá8Êå2VF¡`š¦AQ”à= À®ë°, ¾ïCQhš¼Ÿ$IÐu}ìófÃÈ7˜¦ ß÷SelZ2ÎòÍœ%g!ûÕj¦i⦛nнÎ÷}˜¦Ùõ†—~òì8<σmÛUUcò©(Jðë$Ì"0©¼+ŠÈ­$I(—Ë$ Ëõ¸°‡uÎø¾×uƒ‡ã8Á6M€ØÑ)•JÁ$1M¥R)xŸØ£ã¾ïÃ÷}”J¥Xšiš0 cìófƒä*•Jðw/™¤÷šDÆY¾™³ä,d¿R©À¶m|⟈}n±X !†™ÃÈs”¨|’'ŠufQ˜DÞ?ô¡a}}=Øl´m;–²Ár=&&3œœœtEéèºÞét:ããã€ÎññqpŽ,ËUUƒcGGGÁ±Z­Ö‘$)ø@§V«+ŠÒ)—ËÁߺ®wTUû|†…¤|w:Bæ¢×jµ@Ʀ-ã,ß̼˜•ìÓëëõz@ð9Q¹f˜i“&ϪªvêõzðwR¾é9ÖI˜EcTy§õ8úzÒÛé\–ëÑak†¨T*eÕj€Ø•QU5>¦i*&ËrP2úw´Gôµ’$! ½õ|†–¤|ªªÿÊß,dœå›™³’ýèë°Ý'fô’ç$IùX'aQåd¶R©ÀqH’„z½“e–ëÑáÖŒ`<ÏC½^žë×òÀó¼ O)JÚ ‚aæMš|‚eœYÎRö)W°R©àøøxÞ—Î,!ãÈ3Ã,*ãÈ»,˨×ëAŸïû(—˨Õj󾜅† Ö `Y,ËB½^í²H’ÔÓhU%ص!h÷ó–˜,ÑK¾Á2Î,:g-ûµZ ¾ïöí ÃL‹qå™a‘qå 'Õj5Ôjµ í˜¢(AÁTft8$xθ®‹J¥‚ƒƒƒ.EDÓ´ Ù ÝPU5ö7&}3LVè'߃`g™yɾ$IAµJn»ÀL‹aä¹_TÃ,“È{Ô³ +oðL¬s†*þ‹EÜtÓMÁÃ0 ȲŒZ­†b±ˆb±ˆ $Œ”Ã0°±±T<88˜÷%1L@?ùË8³ÈÌSö5Mƒ¦i¼¹ÃLAò,I ÃHmÂ0‹Æ$òN-lÖ××Q,±¾¾Y–¹¶À„ÜÔét:󓅀ɲôa¥>M¤¸D‹pPÃ,,ã̪²Ï, Q}%ZP†a–‘aä¢#%Iâ4¦)Àk†ñ<ëëë8::‚¢(ð<Åbº®s<Ã0 Ã0 Ã0K]Ê0Ñ`ß÷!IÊå2« Ã0 Ã0 ììae†a†a†a2ÉÒxXß÷¾÷á7~ã7ð_ñóÊÔX¶öO?ý4ž~úéLüFO?ý4>ÿùÏã'ò'ç=”¡ùÁüA¼â¯˜÷0¦F–äaZdiκ®‹ýýýychx Ï>Yš³O?ý4n¿ývüðÿð¼‡2¼~gŸ,ÍW×u±³³ƒµµµye(xýÎ>Yš³ã¬ßKc°þýßÿ=ž÷¼çáܹsóÊÔxâ‰'–êz®^½ŠÏ~ö³™¸¦«W¯âÏþìÏæ=Œ‘¸qãF&¾»i‘%y˜Yš³O<ñļ‡0¼†gŸ,ÍÙ«W¯¢ÕjÍ{CÃëwöÉÒ|}â‰'pûí·Ï{CÃëwöÉÒœgý^ƒõ¶Ûn×|É— P(Ì{(Sãõ¯ýR]ÏwÞ‰»ï¾;3×Ôn·ç=„‘ø¢/ú¢Ì|wÓ kò0 ²4gŸÿüçÏ{#ÁkxöÉÚœ]¤5œ×ïì“¥ùúüç?¹\nÞÃ^¿³OÖæì¨ë÷\û°¶Ûm4›ÍžÇ›ÍæBí N›ÍÍÍyaª¬­­ef¢œ,ßýYFyX¶9;–ñþ,›<,ãœíËv–Q–m¾‚e¼?Ë&‹>gçâam·ÛØÙÙ zåóy\¼xù|Ðjµ ëz0‘TU…išóþ®f(X¾™e‡eœYVX¶™e‡eœYDæâaÝÝÝE«ÕÂåË—áº.ÖÖÖbÅA(ÑÜu]\¾|Íf»»»óþ®f(X¾™e‡eœYVX¶™e‡eœYDÎÜ`m·Û8<<ÄÅ‹ƒøü‡~wÞy'±³Óh4páÂ@.—Ãææ&êõú¼¿+†Ë7³ì°Œ3Ë Ë6³ì°Œ3‹Ê™‡SˆA>ŸG³ÙD»ÝF>ŸÇöö6àÚµkÁq"ŸÏ¯t=³8°|3ËË8³¬°l3ËË8³¨œ¹ÁzãÆ ÀÖÖV01nܸ½½½“¢Ýn÷¬šöì³Ïâãÿ8ÆB'3³§ÕjáêÕ«xúé§§þÞ³’oxæ™gÐh4pçw.Lo6f>4 \¿~}&ïÍk83ošÍ&®^½Èâ´àõ›É´~’©qàõ›™7¤ƒº~ŸyHptwçñÇÇã?ŽÍÍMlmmè_æ¸ßÅ=ûì³øÄ'>+W®œõ%1 Ƶk×ff°ÎJ¾¡ð\¹r%¸É0L/®\¹2³–¼†3ófV+¯ßL õ{Úò ðúÍÌÒÁ3o°RÃZ ?€|ívF#†¤ß®ä _øB(Š{_†I£P(àþûïÇW|ÅWLý½g%ßðâ¿ÛÛÛ¼{É d{{wÝu×LÞ›×pfÞlnnâþûïÇ—|É—Lõ}yýf²­ß³ðÄóúÍÌÒÁG]¿ÏÜ`íÞËåb‰ßD«Õâf!`ùf––qfYaÙf––qfQ9sƒ5ŸÏw•ÐÞÝÝÅÚÚZp¬P(àðð08î8ŠÅ⼿+†Ë7³ì°Œ3Ë Ë6³ì°Œ3‹Ê™]Ó4±µµLJø&¶··±µµ…F£$y?øàƒóþ®f(X¾™e‡eœYVX¶™e‡eœYDæb°æóy<öØcAòw2§cÐq†É2,ß̲Ã2Î,+,Û̲Ã2Î,"s1X+ßo :Î0Y†å›YvXÆ™e…e›YvXÆ™EãÌsX†a†a†afØ`e†a†a†a2 ¬ Ã0 Ã0 Ã0L&™[+à ‹ï®þ­(€$‰ÿ;`Û@­6ïQ2 Ã0 Ã0 3mØÃÊLÇ C<G<çyás€00-kð{y°¾”Jâ5Ž#¥RøzU φa>—a†a†a˜lÀÖÇóYN?満iš&½ð}ñ¯i ƒR×…”ÞW’„aI«*°±!<¥Š¯bQŸ‡išó¾ž•$<.ª t:½Ûf†a†a†afŒl°ær9ìíí¡Ùl¡Á¹\Ž -Í‘ƒƒ³û¬IÃŽ†a†a†a†e¤ÖF£EQÐn·‘ÏçQ(P(ØX€ëŠ6+ëë¢Úî°ø¾Ñe†a†a†YEF2Xóùà Ã0«ÇH!Á­V ûûû0MjÂͧª*¶¶¶vUˆæ—ÃÝL¥w¥`ÏJ@šw6ùYÓÆ²,§ÛÜzÒjf˜Äu]”J%ÔjµØšåy À뺨QÜá˜×V:´Úˆ“ÔuÝà!Iª’Ü}߇eYð}¿ë\ÿ4tCêÞÁ0C`ÛÂ3ªë¡ñzt“¾/ŒYß½¢ôwµ*î‰ÑûbµÊ†a˜ÑÉÃzíÚ5è2VýXéœUaœœRIêJE¹;IJ%ñY³ô®Z–…£££@f˜EÆ÷}”J%t­Y²,ãààPI‹Wà8*• lÛî9ŠÅ"\×ÅÑÑ`¼q’–ea}}ÅbÅb¦iÂ÷}hšß÷»Æã8 Àa¨T*Ø8uMÙ¶ÝõY¥R ¥R ëëëxâ‰'æý0§TŠßo(Ì·Z^O×÷«~{:’$Î5Mñúõua¬òž'Ã0 3-ÆÊam·Û©Ïõju³¬øþx!ºTý7^ï'ËâùiyXlj)áŽã@’$(Š]×{*ñQÒ”s˲°±±ÑUœË4Í¡ v†;î¸#PèIY¥ÿo¯s¹‡ðjàû>ŠÅ"t]‡Òg‚’w5MÞmÛ†$Ip]]!ĦiBQ@’$ȲŒjµŠR©ß÷áyljɽeYÁÆP½^G½^ÇÁÁt]‡ªªÁxJ¥ ÃÀÆÆF0UU¡iŽ¡ë:ÊårlL4ëõ:ޱ¾¾>ïŸÉ0âKF«m‹É*Ë÷O“$‘º"IÂ[­ö®áÀ0 Ã0£2RHp¡PÀÚÚ ÃÀÅ‹ƒÐßV«…K—.ç¬ “„èö²Ý\ð<·KÉžf•mÛ0 ’$¡^¯C’$ض„—Ëeض ˲PîQÒ˜Â)K¥RðŽãÀ²,èºx¶dYF©T‚$IÁ±rŸ2ÉŽãÀqœœ6ªu]7PþiÜô¾€0LlÛ†ëºp'ö¼¢(±pÏJ¥¼ß7}Ó7ç2g‹çyðÇFW®\œ;wn¥<«D­6~Ø“¢ˆ\!rèø¾þÏ߆/þâsøã?>†ã8(‹(—Ë Q®^­VƒçyØØØ€¦i°mÕjªªá…¾ïÊr¹xƒ’Å[(ÇÏ÷ý ‡$Ó4Q¯×/çy(‹¨ÕjwLQ”àòZɲŒz½Ž ¨ªÚåá$Q?›ªª8::‚išC…{V*‹ÅÀ[ˆ‚4÷ß ¿Ìt1Mår¹Ë3õÓïæy$I Ε$ Åb±Ë³H^ÓjµšZ$©Z­þz½>²÷¼\.c}}=˜äµ¢ª*TUE±X ^3,Ò;m4M ¼Í£{bVÇy§ ÃŒéF®ë¶m(ФYiš6Òýa˜!Ç2¾víZÌ›úÄOt®]»¶pÖzV899é(ŠÒyå+ÿ['ê´999騪x{¡iZàAít„'3êi%êõzê`µZÊóS¯×;š¦uºŽ²³X­Vcã¥÷VÓÜÎS ͳµˆò’µ×z½Þ©V«A$y 5Mëò”’'~˜¨:—¼æ4?Òänü±‹‰¨—I×õÀs›ôVFÑ4mª;é“"Ërªg:kò2ˆEœ“ÌüX4yY´ùȤCz™¢(]×»îKÇÇÇr¹ÜQ%¸êº<úÝÇèuõz}áäeÑæ##"D§ô54gâa=<<ÄÎÎ …ööö<ò.^¼ˆÍÍÍyÛàg‚ëŠÇ46ÑÈ“ |},ˆrL+• *•J—gÉ÷ý 5ê ¢â/Izyx†maC¦4åF¡Ü»èçÚ¶=³ÉYx¶Vß÷aš&Ǫª¨T*§4êѯT*Aå\Çqænår²,ò,T*H’„r¹ÜÓƒhÛa+¨AbH-8GDHÐùÅ"P«•ƒâKý¼•T]8+躴Äa˜^D[Í0 “ŽeYA=ªC][+• TUí©7ɲÓÕ¨n€XÄZ´Èžã/xÁõ ÍšaøøÇ?>﯂Yb,KÜúëM†öÀž&7nÜ‚gŸ½u¤×Œd°¶ÛmìîîâÂ… ØÞÞžßÛÛÃþþ~`ÈŽÚ‡µÙl"ŸÏ§>ŸËå2Ùו”ä^X–"œ+ÏópÓMEœœÜß‹_?c-Úª( TU…ã8°mš¦-\ÏTªFìºnFã8ÎÄý0³Ê"ÊwT‹ä.Yp(JµZE¥RÁúúzÆ= ´9ây^P@+}LbÑUUb¯ié…Ð #ì}¬ªñž’…›¦™9£´Y=[_Ò*Í{·š–íå‚ÚFõ7Ú„µm;¨Æî8ÞñŽWàêÕ8Ž(¸gšfÌAàûbnõSø£÷.q7¨b±Çq`š*|¸~ÝÇúúéT¯}íkÏì;a_-LSØ0GGa¾^j„¢ˆ9“f°ú~ø¨TÄfhÚyž'ŽkšøÏ.\øjÜÿS#{$ƒµÙl¢ÝnÇŒUâÂ… 8<<ĵk×FîÝÝ]4›ÍÀ[ ˆªÃº®£ÙlÊ«I5ø3B¹Üm°R Ó4ƒ8ʽó}?–Çö¡ýW|øÃŠïù)ð†ú~o#¸V«ÁuÝ .-¬WEUÕ ïƒr—‘E•oÂuÝ`óEUÕàf:LŽh­V ^7.ÉωÎZ$Óv+•°Ÿ¤¢ ö*Q>åh1°ê,ºŒgj73ŒgÔ¶ÅæL4Ú Vµس::,ÛËy9ÿꯞ‹­­sxÁ nÆcÝ Yë#”Ëå˜N%Ë*løÎï|J¥o ŒXª…áû"2Ç÷…â?H;-}ÌG]×qáÂgñº×‰çÖ׿ {{â½+àÓŸþá3ùnXÆWßm̨w6E¦™¦0&£º•a¹ì儜ªª°‡j5ñ7µàTq?Òõ°EuG‘eà]ïú ®\™¡Á:ˆ7nŒt~£ÑÀþþ~WÁ¦¬­­áÑGE»ÝÆùóç±»»›j(Ï‹¨MÅdE ‘¢­(Êcì{ˆ’Üó½ –Ë@´ÈMô&°L,¢|“Õ÷ý Ì´\.íýžv8öƆXu=\dÓ¦Dµ:xç;r¹¼4sì,XD_$Hq„ÜÚ'²¬pǶÃ磽|³ µ‹¢MYI’‚ðLêw|V°l/6¾ïò,ض„÷~ðƒçð¥_zŸúÔïâûß~ßýÝxÿû.V°,qŸ)—_ ß…kµ$I ŒUòõº×Øv¨ÀGUáÕRqë­4Í9õú*ÞX­ßõ]Ÿ™ùwÄ2¾†¡Au=O¤GGáF'Ý7'ô‚&ß'©ÞÑ=gŠzfE|®ëŠÏ²,aÌFûzOÚIòæQN¦pß´Ûíàùv»H +H£ÝnqüQZ­.\¸ÈårØÜÜÌœAcYâ_Ê#=::ÂÁÁªÕêØý=3®KLUUQ*•ºrD–E•ïJ¥EQ‚ßz½ž‰ê³4ß’;Ù½¼FÃä³&ߟvÿØ`ŽE•ñ,@Jî hSF×ãh¡üŠ£;ã¶mF`–‰ö§õçøøš¦ùågËöb‘&Ú†a]H7{Ï{6ñîw¿ÇÇUüâ/žà±ÇÀ¿ý·ÿ½kÍ÷}ñž¤Œ»®M«ž¦2…Æj¹Fñt¾˜¿½ÔAaÄÊA(rÔ±!IÀW~åÌô;c_ è>1È> ï)¥}Ðù47hÓ$MSÕP¯"c5jÄz°¾Þýº´EÏL§ÆO”‘ V@äw5 ¼úÕ¯Æùóçqþüy¼úÕ¯†ã8ØÛÛC.—ê}vvv°¹¹‰;ï¼3öüµk×Ä ß|>V«5Ý+ ‹ åZ±Œcp•Jáÿ£;«…/c8ð"Ê·aÐ4 š¦ÍÕC¡¾–æWÐ|“¤pžL3ÄÑóÒ•žEÅu÷½ïûfú‹(ãgewÜ!ä˜ ïL±™iL /ÏàüÓ¤' ¥“œÅb¥è n(ªC×õ®û)Ý#Î*,‘e{úP!¾I°m»KŽM³{.Q­4‰Öùóyøþxå+Ÿw:>ñ^ôžÉ[ )òT¸,©¶Dç¸aô–$ •žßISgÆe|¾ll„ñý tŽ^&Ý'J%!“dJR\N%©¿‡Ó²ÂZI+E 8Ž˜góùά­V+ˆcÂ>¬¦i¢P( P(àâÅ‹xì±Ç†ö®¢Õj¥†ô›Q¯n’gŸ}ÿøÇÑh4Æþ2ízÛ¶ Ó4Q*•ðÑþ=<Ï zŽ%ì»n|±[4MCµZ=sV«ÕÂÕ«WñôÓOÏäýg!ßðÌ3Ï ÑhÌäÆáû>lÛÎD/à ¤ç‰…qP¨ü4PÕ噃¦ |çw~/}éûgöY]ó„e…!ºQeƒŒPÊùé%w’ÔýÚa¼²I(¤ÖëóbÛ¶ƒZ “ ç5†-ƒ(T8¨ø õn6›¸zõêÈiHðˆëwÖ¡0ïI¼üžçÁ0 ”J¥Àhu]±™Y­Æ#*•J—^Fu ’ÐüÒõxxoôVHyzôÿ¤mIWéÿ®Û­ô÷B×õ®”›F£ëׯ”©qáõ{¾îßo:P/aYÝç—JâÇ E©T*ØØØz]·íÞ›žš&d¼T^¶{A:ø¨ë÷PÖýýý †=š„½½½íímlnníYmµZØßßÇÅ‹S÷›ý.îÙgŸÅ'>ñ \¹reì/±T꯴:Žä:|êSÿ3uQÊ3ô¹ËÌ< ¤k×®ÍÌ`•|Bá¹råJ°:)ÑêI"¦M­&”…j8> +SPÎ{ÃǶ·Ám·}`&Ÿå5<+¼þõ? Ó7RnIY&Å‚”àb1TD(ÜвÒwÄ‹Åt¥Û0zïÖS5vªÌžÄ4Í ÐX±XÉhõ<¯ë5†a Z­E׬ÄÀ(f}}=ö:˲ú¦H’„Z­† œ;woyË[ð§ú§SýÝiý^lÛ†eY¨×ë=epHß:88ŒVÊÉ‹æwS¤ ë! æX?“$…‘¢hˆ~™1š:FÝ`•e¹Ë»zåÊ´Ûí™lÈðú=l;žÿœF¥7@%©;-äà@ÜcÒ<þUS.—Q,λr¹¿®Es"Zä’ Í&½>‹ÎÛÛÞ†_üÅ_ÄSOM¹èÒÚÚZ†ã8ƒØ…Baä0†ÃÃCär¹àšÍ&®]»†ÝÝ]lnnöõÒö«>üÂ¾Š¢L”®ë⇠{h²ì9µ^QU<Ò‚,Ëç÷‘R3i2238“ÏÏòž._nãðð{ñŸþS’~¥’0R£{œTmžd¾_X/ ^ëºâ|ꩪ*êõ{P*ýJ%70ì2X“ÿ„àû~Ð#Y’$‹Å¡{&S›z U~§Ï¨×ëØØØ>—¼®š¦¡\.Ã4MT«Õàø úªª¢Óé^¨i¯á‹²~/ ®ëÂ0Œ@žEã8£ªhƒ”p*jIúÖÁÁ¾û»ÿ¿xÅ+þO8ΕÓPE¶-”⣣£`¾‘>w|<»ë¤u—*«NºÁº½½f³9“v2¼~ÏE y^·ÎC!éÑi¢iñbHï{ßSø®ïú¡Z­¦F¦ÐÆ!uw(•JA»@UU»Öwªs@m€02‚ öh§º£t>‘e¹ë½¨¡çyÁ}…"}hî?ðÀxæ™gFúÞFªL ã•*Œb¼ž;w®ïqЧoµZÁäˆþZXV÷.)~–%„(êö¦Þ“âÚ¯þ굉û†Rn«ËCVä{®ë‹…µŸ¥w•òTi ÕjƒwÀg‰ª¦{,Ïš´›Íç¥þiÅbÜ`=‹€…E‘ñyQ©¹ÜÑj½@¨üÕëé‘4£ìy†÷(+ðH:ŽƒbÑ úCV*•@9Ë †dzž×uc=’IñÙØØ OMÓR IÚÄ=>>†eY(žöíˆö0–$ ÇÇÇrìy^¬"k±XŒ)Jó†e{zø¾J¥‚Z­ÈÏYE!Ž?ôCMÜzë#°m;&§’¤à7~ã—N=ôBn^ûÚïÅg>ó5д0bᬸ¡uËøü!C4-‚îói½â%L!<þÏP­VƒËh´g²*¼¢(±u˜6 é5t?Ñu=8„i{ÔN³R©L-•o¬ ÇÎxòÉ';—.]ê<ðÀ'Ÿ|r¤×^ºt©óÐCÅž{衇:—.]êùwO<ñÄÀs¢HR§#ËŽ®w:ªÚéÇ—Ëâù£#ñ·¢(ãÓ“êuñ:fqU^ÆeZòMçM Y–;ï}ïõÔcµÚÌ¿–N¹œ½9¤ªóýürY¬I''ñçÄ¿ÇÇNµÚ}¼Óß%GLS^ú1¯5<«èºÞÑu½£ÎH ÞúÖýŽ,ë“““ÎÉI÷½«Z­vªÕjð·,ËN§Ó999‰)y^’£££NµZíȲÜ9¢aÊuµZ­ïû¥q||Ü‘e9ã(œ…¼duý^’òAôû­;µZ­óßy¡#Ër'—ûtç­oÝt¯N§÷½ãäD¬ŸŽXËëõy“Áë÷òqppÐyã½óË¿üñà¹ããø®ëοü—ÝÑu½#ËrGUÕŽ¢(UU;;;Ÿîèz§óÚ×¾«óº×½5xM¹\îhšÖ©V«z½Þ‘e96g’œœœtt]ï(Šü{’¦XÌqäe*}Xóù<òùüÔB¶··±µµ…F£v»\.‡|p*ï ˆÝ E!¦)þŸÜ@&a/|a;p}£÷vd˜(³–ïAxž‡[oÝÆë_Ÿ‹å,ض˜Ô;nÚÇ*8µ*ùóÜV„-•Âï¦R¿¦Q³ùÞ¯ÍÒîþ¼e|V¸n¾Ïå÷’ZMÌ/]×aÛv6„¹¢¢U.—GŽjp»»ÿ¹ÜG<'èÏÝ©×4 ¥R)Ø-§]v Ë",ËŠy­’‡•Ú%{U&½^ãT{—eº®÷-•5–U¶G…r‘ÓzÍS5ß´4MÓpÿýWñßqw ·¾ïí]4MÿúW?Ey øìgŒég½:*På]ò®fÀa¿°°ŒO†i 9¤:’„ òå/þbø‡oÆ¿ÿ÷M¨ªŠ«WÿNNîÁÿx ×®}ü&ÜvÛ·@UUGâØmÛÆ›ßüüÍßü6n»íGck/Õ O¨®ë}Ó+$IBµZ j ›2wFµˆ“ÿÔOýTçÒ¥K]Ç&åúõë'žxbè÷ÅZ?9 =§½¨V«]×;µZ­³¶ödggçצz}Ì|™÷nà¨òÝéLoÇ•<+Õj§£iáó²,vúêõNGQº_wr"vÿFt¢Ôjâ±è;ß³àà ôF=ƒÖ©~ÌÛ£3Ë5|躘õºø]d9”eU ?ò éºÞ9ˆ¸½eYœtŽ;årydo¤øµsttÔ©ÕB|¯óŽ»¼¨år¹S¯×;GGG-:ùpttÔ‘e¹£ëzàÓç"1Oy™çú=NNN:z½È•,ËÁ¿Qï ÉJš‡çäDx™*3¸÷GH×õÓ(€øùuÒÇaC×'[7³Â¼åeÙÖﳤZ2¨ëÎ]w}®ó¾«óàƒâ» #¦Ž;õz½sppÐùú¯ÿ¿;º®w^ùÊû;ªªvê=”¤“““Î _ø±Îcýɼ/qbfæam6›Ðu­V+¨þDV<ŵSÕ±ÍÍͩҹ\.(Ž3m$i°—Ô²¬`×â¥/¾á¾78æ8âõ‹°!Ád“YÊ÷ (U–à ¥ªöŒ£ž[–ßÉvÝpèηmœ½ó/\W<¨Éz–I^ïYÍc¬ÕÂâ ‹Í1OŸ6äŒ:$ëõ°)9’¢¹£TUÓ4X–º$)(84 tïžOÑȽWŽ}¦ïû1ï—,Ëp]¾ï”3ª( ŽŽŽ‚ö7TØfUY&Ù„ïû(‹¼¸® Y–qttÈs¥RÁÁÁAjÞ*atÿÐ`ë8>~3 Èu_ { !IâÞ´¾ž^ 5,Eš,2«$ã“@Ñï{ß·ã¯øøÆo¼ª* Ìù¾‰ç>×Á·ûvƒ¥bK{{{ÈårØßߪŒeJZîu¿&å"Zd* •–æfÑA¿ š& ©K¥îçšjÛaÄdÿ9Ïç²¶ë ã†±8 }:ëyVnQ¾³UæJò¹dÔU´ ¯¦iA¥FÛ¶cEúÆiï‘lûүꩦiØ8íîžü\*r4ªÁI†v¹\ŽæÌrÝÐp]7/ïò].—aÛv Wš¦Á÷ÕØÚO©&$rÔBI–eüÓú~”J_ôAM®¿º.ž›°!ÃL ‹¿ï¾ÿŒ¼ ‡óçÿ6(läû>4M›¸8+2Ð`¥F×>úhÐkµÝn£ÙlââÅ‹Ás›››ØÝÝE£ÑÈü® åèõ>n÷í¯ª(‹íù`VÇqð_ðOƒ›¿¢„ ¡£"/Ëayþ^seQ%ËÙËSíG¹|6ó:êu¶mñÝgÝû¼ªø~تb’mcE ú& <2Z‡©¶èû>\×Zù¡"Éžª”šÖÖ`ØX]NÈ@UU¦ivUuNBºõÇr¥Çt]ÜCL3¾)'ŒÚÐûÿÏþY˜÷—ÜGIÛb˜,@ýë«Uª¿–‰jçËÊ@ƒµÙl¢P(†)€ 4~ô‡‰Ï:Ôÿ( ê”\ ‹ÅPùfå’YTÇÁ¥KnW÷2ŸG³ÙD³Ù ÂÇé ûm4¦ÃY¦ŸŽ@eÕ“Dw© Ã,?ýÓ¿ŽÏ~ö?Ï{™Åu§o°Z–Pî¨ÊÑÑêy®ÃÞö¶w­jµZ£×‹´ð^Y–c­ ¢(ŠÒ³‹çy(‹(‹‘PËéÛ˜´á;³\PQ¥r¹|Z¬«ÿ:hÂH¥½¨ºdÛñ¿I¼9*É2´aCk®a]é®ë…ìÆ‘ü¬óß!ÀfAãÄÉvÏvñ°‡@Ðx‹lxÓ¹o$»'=B·ËM ¡¶¶¹žƒ»cm1¼D‡ZjóG@k©¥vàè‘æt¹6iÝN.'”.e$’<™B:Ö«$SäÂ8ƒ.“TÆ£LØ%Gß¿~\år½ùµ^üýH­îv•]¿ŸýÔãßó{Þ°±ñ³Qée°Š‘êF¥RA¥RI„gUðZ”ú-8²Y«Àû¯’¤¡òï>ÈpTÆYhišúi·•‘#!oÙ,õC’°·^²ïUJ´“œ9skkkƒE¾´60 cìÂHårÕjÕ6eH,sS2?t]„;nll z© šæ]üÈ0”Aêö²³cõó&$ÎHh¯D¶ØU;0MgΜÁÞÞÞÉf¥Š ¨Õ¸Š‚P+ rToܸ8þü ‡5I†ªàµh7 Ã3¿§XT;ˆÜ=$IÃ4M|ô£ßÀ³Ï~gÔC‰=A!ÿRpDZ[9û×’d‘϶ÑTq2/ãóàà``d–ËåA!³IÂvóù< À¦iÐu}ÐRmVùª„ÖèUXúA«sÕfœ= Ñðné—ͪç2(€Ä{q±0úºV« »cñ°h e°6›MìïïþßßßÇúú:¶¶¶¢ÿLñË?ÊfÕ£a°h IFßÿý›”Û¬õºò¦²ˆH²©V‡=ãNüzž‹Eܼys`hîííMUȨV«AÓ4d³Yììì°™9AÆ* <¤² /UEsgonê=’dóO¢&Å[ºººŠZ­6QºE«EùšPëþþ>*•Ê ,x{{ûûû¸xñbâZÛxe„¦i3$˜^V’$4MÃ{ßûî~‡ h•µo’–ñ3Vàù|Þ5”l\fñ„x¡iªÕª¯±jšÊ«j?\.?Fï‰;b”æóy”ËåA̹sç`š&jµÚDúV"¨ª£%Ð`•v5ëëëƒÇ¶¶¶pxxˆn·;ÒÞ&{ž…³žCOIðoŒz‰Àí ©ÕRFÎÆ7«Ò@>?ì9râ,‚DHRiµZhµZ®ÆªÝcÔhŒê¾bƒ7L} qÇ­€ Ò6T+cÊH˜¨gHBç°ÚsU“æUµãep†)˜!…—±E’‚¦ixòɧ£FbHåX[KAa]JÆ¢G$©˜¦ MÓlyÑ66ÞŒlÖj­uá‚Òg¦iåŸ:7ãÖÖTèüÞu‰7^ƪÍfGŒU©l].®éÑÜ¿Œ}X—…0=ôj5ÿ†Ú„Ä ]×ñ7ó‹µkYFZ-õeeª¸ ¾¬Êe.Ú’Ža÷—¬V«ƒ>|„$z½ŽÕÕÕA‹>ÕÃ÷Í(ÕúEÄúà`Ø-Gõ[6 3 ’ÄŸjµ:ÈU ƒôÛVÏ®]¡ijCçÜ9õ!Hô„ö°¦é·ç”kÃ0B5}÷Ê%$ކw¾óÜ! ‰xR[-†¥FÃÿ3­×냼'B’†™±G˜¦Ú¤aH/I+ív{Ÿ†sçÔ´ýž°ßg³jCǾi}tõ, 0†Áº¹¹9òX³ÙÄ•+W†ÛÝÝzN¾äó£¡/¦i†2VàÖ­¨g@Hx4Mí[¬– k‘Ç{==H…w/[TB( L’ˆô>::ô†Îç1èIc•¤»Ü{q挒QíN¿³3ìa•Í}¹‡4µ+âB Áº²²2TpI( Q}"Ü"„’4ÆÙˆ!ö~„$yWÛívèzBâFµZÅÆÆ<ÊeÒxtdyW I#õz#ë]·:z„Ùx–ú4†ò® ,²/ Ö\.—º~«Nh°’4¢ë:ΜYÅê*CZ¢i*mà«_z$dVH•g¿H_¿¾«„ÄMÓ!‘çΩÜTIm88 w•¤Ã0°ººŠl6;Øwêív[}‡Û Ï0°mMü;‡µÓéàÆƒÿÏŸ?Ÿ¨Ö6n‹wÓ4™·DR‡¦ixøáçá‹\ä¥ ]÷_¼pÃ’$™F£hÚp8£ý7!i ÕjùöR­×Õ¦óÑÑøa¼~­ÎH<m°jš†—^z ½^oèñýý}är94Ć ëºÎp0’:Tß±¡ü â´x é!Èqªiû®’DbLÓD±XD£Á\;’nÚí6n¹Äøš¦ ƒÏç•±Ê çtª­M§ÓA­VÃÊÊ t]ü4 ¬¬¬àùçŸG§Ó‰z>¸µ÷0 ƒm HêÐu¦™ç"†,šfUx BZ€’4Z­ÖÀÛdô¨’ôÒjµ°æ±›\­ªfö N7¡<¬ÛÛÛ(—Ëh4#Ǥ @­VÃöö6®]»õœ|q*t¦!iÄ.×o²lhZø/]×éa%‰Ã4Í!“®G’TZ­¶yË·Y´7ú 7›ºžÌ“E˸µZph˜Ù»ÉRª$$qm;â]­9vgØ’LJœd\Ó†;Ø{ Œa° ÒâÆ™ûÓï÷ÑívQ*•_£Ûí¢V« B‹s¹Æà¦éõz¨Õjƒ›©\.£ÑhŒ=9]·ìkk*¤&›UV[Šò„5DÏA·17Â%ßnè:pçÎ?à}ï{ ­èe%s!Jwd¬š¦‰jµÊ"{$q’mAוœëºÎI25q–qõ7Jd”P9¬aèv»ØÜÜ uîææ&J¥t]Çõë×Q*•†v ···‘ËåÇ»Ý.šÍæDã’õ‰*V þ––$b6¬A…!E³Âæd^Q®EÊ·Ðh¨ŸvøÄ'¾ù|†ÁV2¢q7¤:°ZFOÂÙ¶£ëÀ믿Ê"ad&ÄQÆíÖv»MY'#ÌÌ` K§ÓA¿ßhÊd2X__„÷z=t:¬¯¯ŽW*•™äI4˜aÜiÊøÌByXƒÐ”CžkB¸”ùJ¾u])ûbøò—¿ù|~(€Y¥wÔÊÃ4M´Ûí‘0JB܈“lÛ1 à›ßüS¬¹•J%d â(ã’¾'ËrMÓ(ëd„±C‚§EraíýZoß¾ @Ýò·=¦¾P(öu£Xt_°3‰;4 Â€‹ç -¨Á #ö£ Ë{»`)ßv$œ&›<°^›¢NfMT2îÆÚšHp½^G­Vã% EœdÛŽ®wï^C±ø«Q¿E$áÄQÆíÞUq(Qg' ÷°f2™¡<×^¯‡f³‰J¥‚\.ç{Sxõ€àK_ú:Îà1©lÇXæIñjÆ ÊEÈ1•ÎX•×Þ€g®k¯×ë¯¾Š¯|å+3ŸÖ¼ä¾þõ¯£Ó錼F»m){ÓÔñÿøvl$¿Ìt:Ü»wo.¯½H„ÓXÕu}P X×uhš6hBÒC·ÛÅ«¯¾Šû÷ïÏôu£Ðßa(—5<ýôù¿‹$®ˆþ’©Iˆ“þ²Y ú¬28ýÈ|\ý½pƒUè÷ûh6›xæ™gP*•pùòåÁã^øMîÁƒ¸sçnܸ1xÌ­åRç¯6 ªòF|&Y(C4(‡U³=§âõ¥ˆ“ǹ·oßž›Á*ÌZ¾µà¹qãÆ`t0]Ýn°šxüñ»C ÈòqãÆ¹,vì,B‡ûa£ù«F†a Z­âܹs,P“Ræe° ‹Ôßa0Œ6C$—Ñßó’o zýmGÒ™e°RÖÓ¬ÁÇ•ïÀà°ÉÖã,ŽºÝ.¶··‘ÉdpõêÕ‘Ð/üJk?òÈ#(‹ƒ¸|@yžÖÖ†½¬ú2»4„o £ŸüÌÃ9a÷®@PÓóYw¶Cœ/•Ð=>jÙ]œD±†aò o{ÛÛ†ä[Ð4«¥¦ixüñ_,DCÒÍÖÖÖ ÂãýÖ£&T;)fAêÚ"å[°¦º®Ÿ™YüÜÉr…Œ»aÃVæ>‘i‰‹lÛyÃqñâ3Q¿5$%ÄMÆóykãQÓ4ìp§xh°v:lnn…Òöû}t»ÝÐU;¦æÖ³U®±µµ…ÍÍÍAùíL&ƒ‹/Ž}-{å1Á4ÍåÞ…¿âÊx”J¾AÆÓ zí—ÇäšNZPžRƒ„ºº­K!ÏæÉÏ£K)ßn躎l6‹vÛ»J6!ÓµŒªç°s#^×uX"SÙvò™Ïü>ò‘߈ú­!)!n2n¯¿aÉ'µµ¹rå ö÷÷'ÊÝÚÚ Œq/ xùå—7Ö$†1T«À­[Ã-m•àUXžÇ ìÞU³5Xó5 ½¼«”Ñl_ƒš )Ö0¬ŸÌeuW)ßP¯+£TÖé²ãô>2+-ãNLSå¯Þtl¾q±C¦%jÙvòÛ¿ýg(ÆÏ{%Ä‹8ɸ,Çóy Õj1˜ø²ð>¬a''Ö ö`=A<€å5õ‹œÓ¡ÂpÃ8¢Å¸mŸ<Ï/šÃ89Ç©“¼ÆÒÂhXoîU…Ý ëòÉ|]xÓ§ß„‡<4ù{:%³oADZÓ´|×jS¼ !3`–2n§ÑPòm{g80Y$ó’m'ò'§pñ⻢ž.YB!ãR8ocCépÌ#~DÖÖ& $\réÈb88È1.ma€à"Mm(£ÒËtžëözâ5õzm;e׸àñ¸Çcß»ó½xí;^û­Œ;;£ · I3;;£õ t]§ÁJR‡aäQ«Q®I:‘´=V&aH­ÁêÖÖÃ4MÞ5 1rÃÞ¦âõÄcz›‡{!¤,F{¦J!&7ûËi„ÊgíöѺ=V¾¸ùÅÉߨbí$ÕxethšFÙ'©¢Ñ0Ëͯ5!Qbž88òyö^%áH­Á Œ¶´Ñ4m9=PN~7ê',E{Mý† ÃÒJÀpñ¤‘s¡úTÔëêw Þ†µÓ :×¾yqò÷WÿùW'}7cÅ… Öߺ®sCf\Úm•ì~544MÉ9 ¨'扎YJÝNRˇ?|܃!i¥Ý¶¢Ãh°’0„ÎaµWë÷û# ³l3–®B°Ëè\[VWîE`ÿQ÷óÏ@yXMS··^ê¿§¬§B©CŠY¨ÕäøGà;ÿ ðêÿ´+M88Úye°ºW2€¯}M]¯\úOºg²P†gÊ l`8V×­ÝŠ,†=²ë¯ýàõ¨?˜Ù`·±–¶èŒ®«o?Á™àèDd[¾1Ëe%ÛAÏÑ4ë§Vsï›åõœá×ÏfÕnÃ͛ՃH6r C×ùz.¼÷•Wø!Ì]·Ta8wîÜ@Ÿ³:0IŸýìi|âOF= Bæ‚ès†“°¬+++X__y|¦!›-º‹¶¦©~œƒkCy+½îYð«aÚµˆg³µŒ¹|ØÛÞ÷ÿ€}—çHõ]ÓT†m¹¬®ñƒïZPFzdPFã€3gN ļïð±¦õššÜ{ë°´ÕR×(­}îcÀÎ5§Ÿö˜—®çNGŸê¾òaàCß7|Ž”Î][ÏÝ/ÕkÀÁYàÍoÞô£À—õϱYbe¬¶ZÁà,0Må5M%«Å¢’ €#—’؆¡ UMSÆfc±ÝV?òíº¶¦î!·9ol¨±H'tûsœ×)•Ñ»ºªÆšÍZ©aŒVͲ÷¦k4”Gؾ$†·¿:sÿz¾ŸÀB±«nÃ0°±±Áž}$•\»ÖÃéÓ_F6[ˆz(„ÌMS_¥õz+úu9Ik.— ,GŠÅÑàÈõbšæh¿ñV:ï[û";›UÞJ3¯ªßºTÓuµxÖ4µne‡ .•ËÀcŸ·Jm¶ÛêuËee0ï¨ÖÕêP”ÈcO©×ªV•1¸³£ K¹¾Ì¥…ÑüÒrYµÔq®ñÅû”ͪ×nd-o°ýs“@IJÎfÕû–,PùSà3ŒV>:²ÞxÀÏ­fÙó—“o±ÊÛ(ÙŽGõºå­ÕF•‰]WF¤i*C2ÌsdÜgΨ ù zžxwÛmkãËÅð~ðPtU°gì¿É4í± IõW‚‹ßà'¢ !3Ç4Õטišh·Û¸éìQFˆ ±mk3-¦9¼†Óu=š½½ÝŠxvÄ•Eº[*›„-ÊsööÔ¢øÏ_®¹<§ÕR?Ù¬u¾„Í:ײïþàþÐ:c-øíã´/è³°<¯{{Ã1¨ÎõbÞe\’?j7*½ Êè½i‹xìažRU8à÷Ý~ ©zœÂ| ·–6‘ ëÊHä꪿ÁjšÊ(4Íaï!¾‰¬Èºµm;ú:ãxãÖÖÆ;_îÏquŒ}ch¼æžRêu+r`=’n^y塯Þd$¥È×W½ÞÀÚÚëP„6X»Ý.ö÷÷±¾¾ŽB¡àiüé1)$"éö¤î…V‘4¡¼¦k° %kµ¥¼1êÎÕ0ì]m4¬°J NBhO^ÏİQ[,*ï«}׆»‘öŽoþÕoOýGëüêɹn=Ríi>¯æUvym·¾§ W%ÆÉuÛ°rb¯éñ™5¾ªâÂɹ)[ß:óW#+Z ›%ö;å²&ëDÓ”"žHç¤VWGÃuÅc¶OÛ´¡¢“lnEíáN1>&°"6I+Õ*ðçþZô3„ÌY›«Œ0 GGardÙ e°jš†Z­†\.‡L&3x|kk ™Lš¦¡Ûíâå—_Žz>CØm·…ß-XžI'Ù¬Z¬7jaÝÆhx¯W~¬ïu¿xx†Z;F P@nóð‹¶ó4¸·¥)b8Dׄ¯—=` nCŸaÖ•Îëø‘õ˜—ð®Nœ`$¥˜qÝ3ÙnÛrâV´Èi0: íí¹x’{,­ªy¯~øÅ¶”Ø«I Æ"+:/3ºnÕ˜Æ "¹Ù¤› ¢GòyàÝïŽúÝ™9¦ |üãÿ„BáAÔC!d憵ÝjµP,é]%¡ e°noo£\.£áh‰R(P*•P©TP«ÕpåÊ•Øä»Ú×˲¸™É!¹mÎT'WzÀ65#¾V³¼¬yeD®BW^¡@,/a@þWý½—^ëû<† K¿¶0Nð7åµ%ï½)CR4—<$÷\ J0yã¥Òµ¯Ð}g±œ+5Äè”k•ËÖo?¤ÄÚš2 ¿ùͨߵ™“ÍúÐ' i)ýÒŠ+²¡b¿ü¾óœJÉMþß4­:²A;œ¿Ôëø—_ùJÔïÊÌÉç­%@£Ñ w5*D #²Ô¢‡z=<ô`¼¹@ƒµÓé ßïãÒ¥K¾ç]ºt Ï<ó *• r¹\$o€=žÂæÌU«êwP‚øú¯½Mà ûÞçd³Ê0•PJ@V¼ ,1Ë'ç´½_~ȸub¯Flœ¼Žß”V¡Œ_Êë§cìÞLð\ÞÕzݽ-‘1Ví_"µšªº;”W‰0±o Š|Ë=R«Ê†#Âuð] ×p“µµµaC2È(¬{J×­qÊø‹Eõ˜óþ‘ï·è^Ëåp¯×ëí 4X3™ îß¿?òøîîîXZ4òY›¦|sÈ­—ryç'‡{; Û îFœœÓ*ÚzŸÂÛÛiByMÃ|»…âz…ññmA—&‚óA ŒÕ‚Ãkõ“±²JùÜi·•ò\àHN¨^ƪP« oØ,Ñ—,‰·htÓ4ãUp©Õ®$âÞzîœúm÷¶d³ÃEõœ‹âZmºh7ƒp–…ËeeDH;5YlÍòõËåñB0ˆì{«ç1Œ °ž²(µ/¸½¼ü² ¦Í— žù¼ Ãòì8O%‰=šfkŒ¡ëÉØ”‘MHÀ=JD6%¼ÖnØy´aèÅ$›,“è¹§&Ñ·sÞH4X …úý>:J¥’çy“ð»×5†Ö‚ÞÍ`­V<ïß»ñà`¸ŽÝ*a¾^:Õ¹‘ð[;&¬v/v {K½^Óõ9cØûfÇgvŠðö(“™":O×uìõ-—Ý («ò\©GÏI-nÝ{t]ŸmË µ’E‡³ ÓÎŽ{H¡p¸pž¤×à<î·yCaûOJŠU@íw(çâò³MS}4Û%ò·ÛÂÜîÝ‘c’K\¯[á>Â,S:h&Ùï°¢KC¬YfqÑVk4"Ì’+HÝ ÙD”~éíî¶i&†]XÏ>™ŠPk¡P@³ÙD¡Pp5Hûý>šÍ&J¥ …BÔsZoøîäH%ÒbÑÚų/Ä+õÊû_оËgbØcš‡ÕŠÆ‰[µ[·â—â…u®]$ôV®'9¦aÖròza×C¼cGàºS&–—Â~/´ÛÁƪ„ÝP’:œNHõ˜1YH°[^Ý3(ÕÛÃTÄe±2CÚmqTÏлÚh(Yv.²«U«ÕÈÛ:î¶ðö3@Yo€`ïpgšæüS:¤=žt㇒Èy6«vüÝ:ˆuf“Ì-o”ÌPU‚/_¾ŒÍÍM<ÿüó¨T*CFé7pxx88/Ø7O Ãðþ˜JÀ=,¸V¾ð-Êôkåb'‹aƒQªåŽ\ûä·Óp†KQ$7¯åšãµ«!Çèv]’(,GOŽvÛÚ½q –ÝCzMIL©V•ãNDt,”Ü$šfå9‹IÿjB"BêM©R3(¸$ùÊÎ곂›¼sÑMæH»m‰¦ióyÏç‡u½¡^yÿvh„Æ–Pk¡PÀÕ«W±¿¿f³9r¼R©`}}=Å–œxîÆ;«º…g³À9(c±âb&”‡Ón –¡<¤Nêp¯¶ë !λ*¯½wiõäZ¼Ï–Y‡‹> x{Ò0LC•ÄI By  CíªKDŠ[^ !n0vÁ%»¬çóV”X˜uB€ì™‹8Î$Š@Ó¬ÖX¦©ô»Ÿ§ŸFh*e°ª ðåË—±µµ…n·;xÜ+L8øîÆ;ãÑ¥Ú–=,XÇx ¦Aa¾­“s܆f!6.l·qrîœSH|õ‰¯òw¶Ÿq &$Æ8× zPÁ )€T«Í7§’±±1œÛ7Öb^tÞYŒ:ħº6 cü°w»‡”Ñ2KËØk’0 ÃýÀ¥OoËp¬ÎÊ“¬*º’‹êg°†ùœçå5-B m„«à+«Œî\*D´=ð^m ¸À! A:…ÙÕràî< ÀÑn[a﮹}†¡n„v[yQ¹P' Ã^ëNÓ4ï G]·ú¼;õ8×-À·D=€ybÞ§ë}à·ø?YzíÊFä–záÓæúÉq Ý ZSœæ»)Vð]BLÓj2ïzЭ… ! ­õâD»ó„Ä”VËŠ 2X5Mõ핊¾7o2ü‘$]®õ¨ëº»þ®×­ {Üt$¤Ú`u½9 ¸û#À OºWïÚö¦–á}¾¢^˜° )1Õ„ÌÕJÌcñNc•¤g öL[~ìEÚ5M³ .ÉâýèH¥(±@I ÒMFÐuÝr"†2TÏœQÿß¼ÉâKªC‚]sBþ÷C@édÚ~9¢n=RÝŒM ʸõË!eõ^2cÖÖ|Z69«’@¤«ÐjµÆ« JHÌ‘p÷v»m-䥃ï$á8Õõ`ÓÑ0TA­¦ UnȤÚÃêÊwä€ç¾[ýí·öYÃhH®óªÌ˜CJJ­¦”¿k˦¶4 ‰Æž÷$ y Ü°W’$$æhšUXl(·/›­§AHÂÚ”Ñ4A¦/*!'¤Ö`õŒ•/ç“W¨|Ããq ª%°  †p¹¦„ÌÓ4"IR‰3xh±ã…Ý $æ´ÛV‡Ž¡Í˜r™Ž$ñ˜&°ºjýßn·­‚yL["c“ZƒÕ3¿Ïø¿ÿ|TÉc½€aã—Ü>’V¤¨˜à[]RpZ¹„Ä)äj3†„¡i–:6M“ë25©Íaº9Z-u÷l¨|S{.ê ÷DÝ€wxoÙv,LËBæˆaVHp«¥vå¹ð!)À™‚­iöö*Ö1´Œ$Ó ‘0‰|ë:PþâF@IŒ™T¾Å»ú‡ß÷}ø…^`1125±õ°öz=Ôj5t»]@¹\F£Ñõ\Ó4ñÔcY¥³ (£ô¦ËÉk'3‡,iäP9Ú¿öÊ+À?õTqeZ€z½Žàu½‡ÉB™F¾×~î¾{烸[©¸‡$$b¦‘oÃÊ}wðÉOâæïÿ~ÔÓ!) ¶Öíímär9躎ëׯ£Ûí¢Ùl†z®®ëø·_ÿºå]­C…ûzEŒÑX% fù€ïü‹¿ÀÃ?L¯‰-Óʸ¦i0M3\ÕABÃ4ò½ñ÷ÿ ÿãᇭ6„ÄŒiä{­lâ—?õoðóŸþ4ÜûÅ2&±4X{½:Ö××™L•JGGG¡žÿßömx÷¿Èo¨ÜTl?Cbôò Oæ3ÈloG=B\™…Œ7ðá’7o†;0­|á _À‡œm>H,™Zë:þëéÓø™_ùÊ8™±4Xoß¾ ( ƒÇ …Bè8ú·ÿðã¡Õ†Ê[50ÜÆ&AŒãHÝn‡‡‡Q#r¦•oøëŸù™Ä{WÓ(i»g'eZ×NªI çÙêuõ“€Š“i“‡4Þ³“0­|¿øOÿ„_ùÍߌzS‘FYHÛý:)ÓÊ·žÍâÏ~ìÇŸŸ6yHú=ËV¿›¢ßï#“ÉŒ<þùÏûØÇ¬þ«OÙ~,êMÆÇ?þñ¨‡0S¾ô¥/áÎ;`™å8>÷¹ÏáÔ©SØÚÚZøõ'‘o¸uëÖ××ñØcáŸ=õTâ•i\äa–ÄåžÕuû·Ùõ§Õáïyè!ü·›7ñw?û³ø¿oûèk<òþîSŸÂ½“üª¸y˜q¹g?ûÙÏâÎ;xË[ÞÉõ'‘o»þ~â‰'ÐívùI$.²0Kâr¿Šþ¬gÌ,Öàïz×»¿F‰‹<ÌŠ¸Ü³²WÇÒ`í÷ûžÇîß¿ïz³ ¼÷½ïÅÛßþv¼ãïˆz 3áõ×_Çùóç£ÆÌ¸{÷.îÞ½ùçs÷î]<öØc8uêT$ןD¾à=ïyžxâ <òÈ#xä‘G"û,‰‹<Ì’¸Ü³™Lßõ]ßÙõg¡Ã?÷K¿äùßà"›]xâ"³".÷ì£>Š;wîà­o}k$ןD¾©¿ãO\îWÑß>úh$×ç\y˜q¹ge…«=L 'IDAT >®þŽ¥ÁjCp’Ëå\?{ö,Ξ=õÐgJ©TŠzdL"ßð;¿ó;Q—{6êqP‡+¢þÒJÔïë$òMý¢–«¸Œƒú[õç@†‰eëÊÊ €á°„^¯ç»˜'$)P¾IÚ¡Œ“4Cù&i†òMâH, Ö\.‡R©4”¬iVWW£!SCù&i‡2NÒ å›¤Ê7‰#§Ž£„Ýn›››Èårƒ$ïÝÝ]Ïü>B’囤Ê8I3”o’f(ß$nÄÖ`Tâ·TÑc,9I”o’v(ã$ÍP¾Iš¡|“8kƒ•B!„BÈòò†|àˆzi§ÓéàÔ©Sž¡Ýn¯¿þºëq¿caŽÏƒ~¿Ã0<ÛfL;æ(æD&gùžÅñyà'ãIœ™?O¢rüS?õSÇgÏž=>{öìñ /¼p|ïÞ½ÁñÛ·o?÷Üsƒã/¾øb¨caŽÏƒ{÷î¿øâ‹ƒk>÷Üsǯ¼òÊÌÆÅœÈäL#ß³8>üd<‰ó!Óá'ãI”êp",›þSçC¦ƒú;þs‡XV Ný~Íf[[[Ðuׯ_<&loo#—Ë Žw»ÝÁq¿caŽÏƒf³‰^¯‡ëׯC×uär9ìïï‡SçD&cZùžÅñyà'ãIœ™œ O¢{öìÐc/½ôÒñ /¼p||¬v2Ξ=;´;òÇüÇÇïÿû}=w^Ü»woäš·oß>~饗B)Žs"“3|Ïâø<ð“ñ$·L‡ŸŒ'Q¨Ã‰°lú;hLqœ™êïøÏi\¾5jƒ9­”J%èº>ôØíÛ·qúôéÁßP(Ç …z½žï± çÎ ©W(ÐívÑï÷Q(°µµjLqœ™œiä{Ç矌w:Ä͇L‡ŸŒ§M¾g1fÊxrX6ý4¦8·Lõwüç4.4XÀæææ@®^½ ¾BpëÖ-Ïcý~ß÷¹Ò/kÖÜ¿d.÷ïßÇîîn P‡ss"³a\ùŽ«<øÉxçCf‡SÆ5Mó<7®ò@NÜXý$ßq\s‘ÙAýÏ9 sXÀÅ‹±¾¾¸rå %^ܽ{×óØýû÷}Ÿ+B=kì»;×®]õk×P©T°¹¹8Ÿ0cŽbNd6Œ+ßq•?Oâ|ÈìpÊxå:œ¸± ú;hNq\s‘ÙAýÏ9 ÖP*•P©TpéÒ¥A‚´Ýíîv¾¹\Î÷¹¹\n.s8þ< Â¥úý>:Nà˜¦=Nâ˸òWyð“ñS§N%n>dv8ed:¨¿ã?§q¡Á:'Ο?N§3p〦iƒ?—Ë¡T*áððpèøêêªï± ç΋B¡0RB»Ùlvf¦ss"“3|Ïâø<ð“ñŸüÉŸLÜ|ÈtøÉxÚä›:|¹X6ý$ßqœ™êïøÏi\NG=ˆ´²½½ÃÃC”J%ܾ}{(AP1é›››Èårƒ¤æÝÝ]d2ßcAÏrMÙ¥g>³8NâÅ4ò=‹ãóÀOÆ“82~2žDy '²éï 1Åq>d:¨¿ã?§q Á:gì%ÓÝò$úýþ`ÈyÜïX˜ãó`Ú1ÅqNdr¦‘ïYŸó¼')ßÉÃOÆ“(ÔáDX6ýt<Žó!ÓAýÿ9……+!„B!„XÂVB!„B!±„+!„B!„XBƒ•B!„BH,¡ÁJ!„B!$–Ð`%„B!„Kh°B!„B‰%4XÉLét:è÷ûQƒ¹A'i†òMÒ å›¤™4Ë7 V2S677M‡ I#”q’f(ß$ÍP¾IšI³|Ó`%„B!„Kh°.qÕ÷z=BÓ´Á±n·‹ýýý¹íŒÈµûý>4Mƒ¦i èõz©Q ÃD%ãQÉ7@_&¨ÃIš¡þ&i†ú;Ù|kÔX&677Q©T i …:J¥r¹:2™ šÍ&Ö××±µµ5ókommáÊ•+( ƒ›òêÕ«Èårs™o·ÛÅææ&Êå2J¥ÒÜß_=QÉxò PÆ— êp’f¨¿Iš¡þN6ô°.˜N§ƒ—_~»»»ØÚÚB§Ó\»v W¯^E¹\žÛÏ•+W°»»‹ÝÝ]¼üòË8}ú4çr-ûrùòå¹½Ÿ$~D%ã‹”o€2¾¬P‡“4CýMÒ õwr¡‡uÁ¬®®"“É … \.ޝ¬¬øÞ,ÊàE¥RñÜ­©T*ƒkf2¬¬¬Ìå:ÝnW®\ÁùóçSs£ðL#ãIo€2¾ÌD%ßrœ:œÌêo’f¸O.ô°’™Ól6qúôiܸq#qó„8¡Œ“4Cù&i†òMÒLZå›Ö„‘Ëåf[?ëëT*lmmáÙgŸE³ÙLÕ™/Io€2N&cQò=íµ(ßd¨¿IÚI‚Œ§U¾i°&Œn·‹f³éy|kkkrÕuÊå22™ ._¾ŒZ­–š„o2’ ßeœLÆ¢ä{ÚkQ¾É$P“´“O«|Ó`M¹\/^ô=—ëÈM²½½k×®-þÍ"‰#Iò PÆÉx,J¾gu-Ê7êo’v’$ãi“o¬ #“É,d§dV×¹|ù2žyæ4›Í……‘ä’4ù(ã$<‹’ïY^‹òMÂBýMÒNÒd> endobj 2 0 obj << /Length 3 0 R /Filter /FlateDecode >> stream xœÌ½K¯/9vÝ9ÏOq‡Ýƒ:æû1l–ÐnR·x`xdË¥n)…vI@Ãß¾×ÚdDp‘çVÖ¹éH”dÈ7ÿ<|ùã~qó¿ÿä¿9üïïøÿÕ¾ý—Ÿr>dûõù~þï?•ßñC¨Í~ãæ–rýæSú‹=‹ßþÉÇÔñÿðÎ{üÛ=ÿ`Ñ?üôúgÔþ{ü÷_ÚoÏÿáï¿ý·ŸþÏ?­Æÿyoð5 1£ý1û×?Ù¿jaOPhÿø‡ŸþäcøÐ6í÷?ý§ÿŒ?ú¯?…oÿõý?(*)·¿ý?y÷í?|Z÷ß¾TºñóÓ±7j )}ð¿C W·¶ŸâGŒÎ9ßâKMˆ>}øRJ.xbެþôz¤Ë6èúKý( Èí·‚ñÑå—½þóþ›/¤Àßý7´fTõû?Òª’ÛG¨5x×ñhí£Å[HÆ‘ûw\~üGÆßþîçoÿéñ¨öýöŸ¿ýÝ¿ÿéßýõüë ®õײoó u…£®·VîˆW>æÄÂË+wíMù(¥÷Þ|}H¾†Ö] G½KÉ‹Õ{÷Q›ÿ¤ö»àýoê>b@­%¿9جP[ê}}°­úe°µöc°™?¾£ŠqŠ¿ . ­H€H0yðUðh%ÞÕàb‰Ùù“;áª$ÿªZj±Å½/G%·¢k-†ïö‘ªW|ÿÎ}8ûŸP¿ýî©qiÊú½°n—"§ÖÁGo±cų!”ý]mùË¿ø«¿þ›¿=š³î&´ûµxìF¡„ÔÛ·Ô±7ûZ±Qõ³ZÂýwiTû/¿ÿûŸþ:Ýuººï²¡ADÀ”˜¾…? þ«ÕɉÆö¢,”öÍ÷úQ]Ç0•NQ±eLD gÂxÚwÅž›oV†wŽuÁðÔø‘Bª¹uÊÅh÷ÑÓK'!*ùñ\Ç;Ù¬—o><çb«¨€eŸ õ5´£ ±-ÁùʶøÀµ‡ÏY¤düËðÎŒwÖ\ðœÇ†ŠOc´2¼“m‰¨ŸÑãïB¬.x¼2Qp °ÇbCד•à…mÊTÿÑCì.%«ŒSÓß`­ƒZB ö^RÙ”1(>|ôœX` ùÃ_ãÅ÷»T:ZgEx%±ÊðÑ¢ëÉA¦Iå÷i˜|%†?E ¨•á•lJöƒG *n–áü>¥²>ÎC,Ã<Þ™0DlJtcÓÈä|rÖÌ„w>0ïcîXEVÆUÉúj´çPY­ÙöðñN~ŸÙÿÑ1Úë_Æ;Ù–Xð}\7ð`úÙpBòÀXù<ǸðÎ ëñ[s=±‰Pð²{¢¢@ØvþðÓGÆ€þÞ*JÙ{®âø6CìÂs˜Ô˜àÎ_óÇœ[ã9Ìr´¤çðN~œ–ÓQO»',žãP±ô­ ï|&,fWÉsf”á÷„ås˜HÁ{'O»Ç )äŽÖذ`í v2O´¦1õ°vr]¶ e8#Ö¦šÃz‰£{%@©ZÞÉoÓëx.ã{ælk'bäï%Žwtµöbê$¿ð:e IX+Ã+ï)‹êÀßB·¦`õ”uÊf{£ã¨D¬žÎQñiN“Š•›5«'Ø+kõáÿttÞÊðN´ekg-˜P X†åÃfãû˜±cwrÞÊ2víkÊâ9|ƒRœ©Q˧¬SÍÄdÊ~ªÒüs”õ+BÆÀÿÿ5± :—ÝaC÷AnW0‰@Ò>!w@×0/йƒÜ ÂT*Xƹžs[To¹3wr}ôþ 7Æ®,-ÌɃÜ(²—B;¬+|´ /6”ØY¤‡|@;ô¢«CC: +Yﺳ ¤ØÆ¶ŽÖ7ìÄñÀvªX É@P;§/ùM ;à‹Hƒ'í ™¯bÖ9B;’Œ±cXNhÇ€ å/—v@c‚É”0Õúm?F¤%ÛpÚ±CþÄ?6\…v²ú¸àhãcÊãcíÐΘz¹zÇ™|C;xkŒ¾@oîЧ{K'´3æZ æ¥ÚþŠÞs‡8 ]Fq+ý€6¤½TßXýíBØg´PfCSHÍ•^K<˜ãG A¯³Q–+?Õ«•ÙèAw±&þÝÎlŒiÁg¼òÁl q羡è`v®l ä…ØfcT€^?å2avIøwöfã1T—}³™§ÌÆ—LÛ¶žƒÙ«ÑPÀÌ.˜±­eÌøv03 béC2³KAða{<ÉÑ14CTbW޾ VÆAìÌ5s†5#6~‚F©+|Éæ@bû›Øí[ü¾Å?Ú_Œ3DþÇíP?0Ì ÿ/ÐÆ c‹ós?Rhs¥D\Q´ IŠ!Æ^a6?«ã8ÛdWfÓFI<”SÚ Ï5—ÁÜSÚæB/9„;¸¹¶°òÝÜà[+©™¹ÉÛ(´«Ÿ²ø î@ä4%†MÞÆ®éÆî®ò6vk,ÌÈtÊÛ/ Ø]‡ü«òv£lœ(Ÿò6„ì©S(Øä튙D¾•SÞ®Ü9+'î)oƒT汸6yºØP&5UÞ.P¾ˆ/º=' Ƕ5åíÊMõ‚¢e$ÌvÊÛr3 ¬­ÝlD¯Ør[:åíBé êI<ѽ½SåmHaw ÂÃ>Œ}UØ2Ô†Õ?¶a7>ú“±òÊÎn¾í‡hOy[§—°;˜…EtcŠBdCÂn”A…ø5dãÝìz6µ9E7‹_’IDŠn´²C.ƒdt¢›ïL¯n{«¢e¥ß{«¢;p«€òêAn*â Ãçlù+º9˜»Fÿ`7›‚‡GóÁn¼ÿ®—‚’ÆŸãDÖº?ݤ÷ëØ¸ÄK¨è@;Ø}0ôT¹îìîTA:¥ñz ›ª„K˜½è†Zƒ‰!ØÈ ì¦Q‘li&€)»Q†YˆÍ¼ÅƒÝ˜SXµØ©è¦ ª_ÒŽnG{Wl˜‘§¡ïȘ}>"s»ôÁyↄÜ.béB‚Ķ|eÑab ³“ÛAiÀrm"Bn”5NŠ<´c!7ZR¹AŽÝ^Éê |ö< Bn¤ò áõ ·Ã:€Ýi…ÜŽš:Õê|’ÏA{ÄÀ×|e˜7ÄÔRbóáZ$“Üè4æÑs’›Gïý0Ã)¹ñ2,î ›¥ÄL Õ%w»c1R¤KS®^À¢òµN…ÛÝL(ØÅÓ‰m4²A:†Þl£ K"ç¹…¨™£ßñª¡®ØÞ²R› áG|hw³!Ôi‡¶Y‹±íwÛ­„Ù(Â`”iTZ‘͇ b¤6l'Blö ´€@Zb£Ìa9aÄëAl|lLPÏÄFuØ¡ÖO9|6Û% ˶ïÀîfUÉ­ÆÃ:ÒÍÈ}mì*+®Ù5H¥ iZm# 9Ú„}ñ±`ÞŸ€8ÿ5I{„/Ä~¹î .L'ÿe`á‹á ŸÕýŠ©OOúݱ7jÁü67}‚ž7»µýôzì@ŽÃSߪ»š°ýôz¤Ë6èúËÛá ÒßñÑå—³þ_ô²A¶ pCÝ› [4)„ý •?§€…g­ÞPxåó•° á½¸ˆ¥7Åf„mEï"H}èk½ïûл,hí/,¬ßÔ|uýÅÁf…2ØRïëƒmÕ/ƒ­µ=`rj€üNɳ¼° •¼° µüXÀÂ/ÄDÉjêe}ú…˜·ÆüËÿx/& v‰ €Nq­õHÏRr‹¢‹1ÿHTÑhLDòV j£, ÷p¢y[|Z‚ E±™‰ ˜‹´[^y[|"¾4‘ÒÌZŸ¼8–ÃüTS#%Þ™ZŠÃϘ‚)3Ëb.fÖIa5R:Vד7•5Q‡¼->j÷„°@ùò°Ò4€÷Í.P‡|L>«5(A…¬ƒÕÜfô©[+Ój£¬²¡…ZL@Jb£„òœ ? •òb£,4cù™>•²Ø(Í¥•®)+Ã;o‹OŒ3€¾hf0¨4õq°šòúÔ;ÉÇäcÚ32Ë=(ÓäC’ôXj3Wvªb£ 4×a¶ÄQ¶Å´qh˜|S6JO5ªô˜xM4ÝN%2egÞ‹Ô¶˜yn‹ €ªˆ½Çƪ‹‰Ò¬\ +±Çú…/ £LL”?‘ÝP#ØšÍ|™„8Z(ò´ d·…  qêÏÙkH&O®öìW %=û•j|Oáme<ØÍ2©HrLBÃ`u˜û%‡- t̺aG¥ì7Ú˜ûð´÷Xs2É5Ç="€q##ÀýhJ裕橸æ¸$º'ÇjÌX:>6ßë7‚Z ¥3«grÁæyN[@@‹ô¤Ø*Îæ˜ìŒVï¤ o˜Í¡¯ñÕ^Ù†C=céØ½Ò‰h…¦>l›+ Í3~èKjÌã”Ú Y@{@†¾SÛ÷DKGq O¨Ý0OzMSeß°?BeTIŒ'¶i«¢ oðP±8¬®Œh” Ût¤b”æ"ØŽlK¿œ=ŠíÈè#¬¬2Ø,Øntx$3îØŽ“ømöåÛhfu¹¢i'¶1£0Ñó iPnçÄG yݹ]"€›ŠkcSn€Ûœ»DÞÂÐÏ6xÊmn‚˜KØ—òÁíhͼÂÑ”ÛøBxaäzp›‹©åÑèÛÉB °>Ç;…ÛŒ• g,åv®˜V9Õá‚ܦ}³†»cÂmúÍùâúÁmŒq¡†R±zrH4ûƒÛ´€6,?6~á6ô|ÐŒ}î[T@§7¤Dp{X@ D”vp;ÓðŸ/!D¹²iô;·YDAcD^(¸ÑóÒÃ"©àƼ¨ÐKÚø¤Jî\ÐW(Ã=©äÆœ©¨ë¬»@Ý*Ý ©€;“ÎØ^Ú nè xk`@Ðn|„èì1÷„‚»Ð„ZÊ„ú¸)…±e¥-* bè@Õx€»pïÁ”žÖÁ¼ÅøŠ×”±é ¹ñž>g²™‘›?¡[uÌýãÞž–é„¥Ji‡7†ÁaBÇÒËo³—õÈùÀ£‰·'°¡ðÆRÀ•2¡/ðÂð±—åoL| âBùDæt×t¶ÉÜ‘’.c’ƒÀÝÃx¢þ„wv„@Â×è¼S.¼+ûƒÍ)ðݰf°À›K&ŒÀ­ÞÁœÿxc:…n¨!&®õSèfTcK‹ÉE o_W,þ¨ð]¨õäÞé†÷–x£-ÕÜÂ+¼‰ L2 ß)tã9ÆïÑOðÀ{ ƒVtC«ñ ’(5覂Ë]¼¦t°{+v£ µQC9ÐÓéì†çRÑ,"Ìí@Ñ͉–ÇèÆs- “D‚n<‡Iäfè» ;X·›QŠn”¤=$[:ŠnV‡50ã.ÝìBâ8 ;0J D¾^Ew°IÑÎt°;p `uaÿÙÙ‚M²˜KHÙÍWbÂÆN¡›£ܵT•Ýx'¾r-C¸Qv£ŒÑ%Œú„Ýh&>fëŽn ˆRt㕘ïm†ºuż°ÉÝ*ò «ÛÑÝh c°ºmåŠîJãn¶Ü†DNð¹½yuþÝrSQ‡Öäz8É­âJnæVrSóÁR>»>y€ö–ûi-qd‹È<­%ŽþŽÇà#äv 1tµ #’›Þÿ\ã@@Ò°"aÈ·C¡cMe@L¸M×?ú•F°—re OLCšRnÓIËÒd.få¶£ ^r‘r›Ïe0¯ £‡p{{nå6„ƒ©µÖ±±®ØfQÀL©C™Ôvfò(qœ£ÙL%â{nw i‚~oËM°M÷r†ä@>ÏÉ«ù¡ [Ç' =¦iKHÍ"ìö3VIH͈OU¹œf0IÅîU.J;Õ¡•-}1ðv„9Ƹ†ÀÏ™úÅÂÃ>«û ÿÌìÆÏOÇ^©¥ ··çɦѭí§×}ñ5áÛO¯7Aºlƒ®¿¼ ý=ÿÊpl¨ôŒ ’n' Þ ÀæÓB}#ü9…2YÙˆ{ǤŒfed25+ÛÎ,Ñû8ÅõBÍrruÃT˜y8¯™}]ô[ˆ¸}X¬:êŽC&‘ <°Ð£ù•K^Ü„G‹_23"Äi |k†®1ž2}Ù„ÒméI<å4UkeYõÛX>Bå‘X7ÊðÊÛÒƒ®¢Í×™¿RV7ò(­Ç¼± ‰Ri!˜º#ÝKŽÓ¢Yª¸•˜-\6¡öYàû°ôD¦¨Ð¶“õ®‰e’îo¨V~­ .>d¡zãÍã_Úˆ{Ÿ† <ÝüÒÅ0)F' ef˜–1®–> “˜xž. {Y¥þxÍW31†ÔxzÍÊÄ4INÐÊiF„Jýñš°fÅ„œÎÎêÅ4I7Ï9Z¨@õ»mÒW|â4ž£¡øš°x.2뀷©PƒØ&Ù@T2{g¥yMXô™³:’Ô ¦I†ü£0™¡ R‹¼',žƒÐWx$ÀÊÄ6IoTų–D1MFÚàl]±,‰i’†"@|Ø›jÓ¤÷«IL“ôû1àÇ|{5‹iÏeLôhöküýbšdà=ϵçQ$¦ÉÄóX”‡­¬ˆi’vÌ_óuÔ"–ÉHC(×øxL,“že{ƒT­b™ Œewy¬Z·K©ñTœ‘¡V1Mš…Îa mS9µ?w·³ì­xÅ6­éŒÚÁsr›3šûmöyVȆmž£å6Wá†mzƒiòí‚ÎA¡Ša,}Á6ÊB¹íÆ‚m|`š@¹­ŸØ6—%…Û<5Z4x(ØÎÌ?ƒM¼Ž ¶3#˜®ÇÕƒÛ(ÃîÒ¦/]¹Íí¥aÝ7;%¤ÜÎ>ç–šE^(·sa[BE¸³#´«™Âml!˜Ÿ~º£•Ûɼ8Ø'€%€ó¹å7ÊДvm!îD“ammD )¸ÏjyžÐO¸·]w€›ž9JPy,}7>0vSÎ…z€ÏAþ¿ÌÐ n|ü–°§ž‚›áœ€³Ù{wpSÑ›/ž¸36 Æ\ù±Q¸‹eÐàq¿¸ƒ›ãQ± FÖw²¦4¦b9Àïf¶ßO¸ÍOšðѪ¢܉çÚÜu¤_Áx·0¹“?Àë<`?¦‰‚;ÓUË#nõ·†-¸9Љ^žAK7þ.ÄÎL¿„•˜Í%¬ÈÎÌJ㯤ŠìÂhÄ‚NçÙ²#¤.g¢£»B©aˆÖ°+±¹¸‡á;]ÄÎt—D†—}éŒé×ãÚ¨ù±‚h3w€«LêaÛzGöJ¥6>(ø”†£|£67¦Ë¬Ðný­@Í'©Ð;æ\`kŠ6~­÷¾"À®s‡)`ã#抑k<€]ѳ@Ïï|n6(ìW ñvÄÔ L8`'KUr¨T`suã€Æ|›1Z Ócì l¤³cO'¯éҲмô¯SbüЧ¸Æ?ÒŒž=pÍÄ3]džkîb9`bŸrv¢:à¯óà®U ‡¾”×4í{JOéÀ5³ÎxzqÌ¿£¸FF¸äaÑP\ó¹B¨Z‚k”1² Ûi>px˜=_ÛŠk~ú6G ¹âÚ××9Ý ×žªö ­}øšÇF¥¸æÑìŒÎ´OpÍ€hlnŒŠâÚ1†ñ ¨Øpíèì¾ò(®äWo‡žâš_ヅ#®·f ®™¿2Wª 'µ“EÈÂcBµ“…„]â¡R›eîÔ¡µ· !ØfYªmº­Û±Ûš@†m }1`ÃväH0XZÅ6]´O Å6蕬öñÛ {YI¥)­ØfP+-GéI´¸×G¤r›‰µ˜ p(^Ânhüø@™^ãƒÝL̈TGŠ e7‹àuÜ[vsb Ç.ÚvC]ÊqtAØíÍ;|ù.•ÝÌ1áó2h*ìfbNì>3Q²Û›…0æKj^ØÍ¯ÇøÚ‘xCÙ²œÒÈZ°Ã›yK‰vxÓ-„Jc·x{3ˆ2õ`7Š<Ï\ûi‘æIåÌ»=ÓŠÐL5™gQ|Ž0(»YFY4¤ÓF¢9ó”Ý"¯èvßtKÅ‚nÇNrûOá@·cÂ,œÙÓšÀ80W/s*ZÇ0£[ÚB ë×™«WÐ,jÌÓ««ènÌ™ÂÓ©æÄWt7Þðsìtã3VV6ù%让õ›/`‚n&ª-¼5ľ¸¢»2¯VÖĬ ›ò¯I1èæ-W©£÷r¦è® ÑÉWœ„¢»ZJ~ˆ'7E7Û™Ý<è'䮌)×]Jîj¨¨Üt³wÓ;†ƒÜt¤;划»r&1±ð$þJnlÉÁÓ#o™Jn=¹ªäÆa^læÕ™äƵÞ÷z)¹ ¶–èè4<É]h¯N<úÖrz"RajæƒÜ4lwFÚX”°’›YOZ›gFÉÍN×ÜJ´ZÉ]í¶‘0oÍrWn¸çÌÀ¯àÆðè¡7€»rLÑèfÁš n•–Ü…nwW|5Ç´‚½ã±Ú™ØIÁ²0n4ʸù…R½¼ nfMÁ›h“ÜÁÍõ÷dƒSpóeü4r¢)¸é´c:±áˆPp›‘‹`¤`SpF%óxâøèî´ÕùsÒ¸ ‚ Ú ¸uL匙³yÕmúòUO_³®*¼=ý:$Q/;¼s3ÎŽ ¼·Ï rw¶,Ñ—¯ÊÝÉ8˜­œðΜe#Eú!wcÍÒ‘ßÍC(ðæº`ö€qe‚Â[S)¼5 e“»-™Gžw)¼é¾£çmDï+¼cáíFiz´Þô¦‡v]Ù£ðfò*üÑZ•Ý(bî“™ÙBàÍ}úfçGxG» <ê§ÜíÎ(¼¨ôÖ4&JïÀÈK^Çe‚Ò;Xj2È7€BoMQ¡ô¦¦¨z3 ¿AŒê’Ò;ÐÏìÓ(½QFßsQÞJoM$¡ôf°A¡Ü6¤A¡7Ï@(Î¥w°;ÈâdŠÀ›¯Ì®MéFà­'”ÞL¡ñlï`ׯ@(£(ðæ(bä4%òª B ZYÇy…7¿(}ÿml…o”Uæ>ÙÞ…õ•Ýx,1aìæß|ñ¯z~%»±þÊ¥äïì®4çʆn†7ù+í£¢»D~d7Ón*º™É‡áTib}E7Ó¦ðÐBñ@7Ë:wW‹˜tkÂE7ïƒÁWžI-6“‰'Ö±ì†ê èvü(õ‰YA·^ú¤è¦Q!ñ„Èh¦ Ûóâv´d¤ÊRtÓyÂÔc5+»Q]¡—wvó % Mm3™i¼x'†ƒÝf k…ãÁnM¨ºÙL<ï[êX|å`7uo~†‘wDÙͤ—ô¸Ä¡³ » '´ý!v;» 0v;Kb‘ܘBÊnFŸ4^^šNÉ›Ébý »xÃSc)»i°õ¼ktZØÍ{.ã' WxÓÉ¡bÖTvxã±D9í´˜0d…EN‹‰Ë F‚ÚfyÊn–y¦ïèá`7>!/âY·”Ý(ëžš± »QÆ+Bʸ@DÙí˜Bù¸ÈDÙÍú2/_te7“ 9(‘s"»ùÊÂS„CåvóJžJ#Úø>Ân–yÊÁ£™Ân”áwÈ fôUvó*Ÿ0=ŠnM½£è¦ÖÝÂà4˜ðÈ£“ËÅîAà©2L'Fÿ9®Æcù‹1Ÿ×ÿŽ '_Qwï^©ÇNñ5ž‘p×yùý·×øŒƒ1Ÿ9tƒv7Bû ±öz ½þôv,vy¶@~ú4"ÕRõX"îörøÐ”=˜ªÿ9…¬Ëö&Ä+_ð‚ÂëËVz”mZ¤ë¶‘«§¶Öüxµ³Flõ¿F°~[Þ‰}5s·|qÈMª’!—šßòá [†\ëÿz$==¼8ò–EA¾I°ÕòV$VóJ$ÏaŒ¡$´öK‘i#ø—×â°ìª%Î,ÙRì@;»ŽbsM;=è2/íÌyÎü‘Ã/€ÂÕ#yåé}L…«KŠÚ'fm¸&þZ*SkÝ¢8 “eÏä%Èßx eÄ’ÄŒðÒº(ÇÚªÖË(lvlYPxØ Ž[<ÕÎÂÍcÌ5 }à ”-êq ;Ølqº}ÑéZƒúh‰Ga²šƒX?üœó  «åÐ$·û"ÂåÃô–[ðÒ8hŒ€ÒÃpVxª·ŽÌØd¨Z×á›þxe§ß¦Î<ú£µc ¥ƒs­8&¾½Ê5™W2·unæ]ðv· >WážÀ솴é—¸§Y`z´¤½^Ã<-¢lé$ï žæ³GSîÌúïð%ÌHài ‚ânñöÔžñVw½·û×0 èÅ`ú™@cì8µî2ÏÐrªšºåíÐÝ5=IÄ4㬧Û_ó˜Wº6m<^O®êr·DK}žÏó–îú9ÿ…Þÿ…¼t`NdúH1î•YfFaµÜš6‘}cŽþÆ,ñã+–Ue&°ª¦™ÚóÈô­3ó„q§ÿ1žP½&2}¯Ûì8ïéÍ%y+μEÂÙ[Gc±°ò5“™"·ñòˆ€ñž:é5“íŽ]Þáü ¶ê·7ï«CyÎlQž¯FçJ­¥ZzÍdÝyûæ·£ÓŠi.Ûœ}ÕŸYÈ žÕŒ:Þ&ï5“Ù--fjo‰ï™ŒBN›<îpðf|¼­€*­´fE÷\Ü ]™Š32Ž*±²â=“ñ c ÆHêÂã»vç˜É•ö ÄHË9EÄ6Sï¾w°žVá^G¢¡ô–s=öv‘L@ÏP&»lpôOAO3Ñ5÷éy0s‰ÄßJzËc‚!½NÒóƒÓæ:ϱ éI|^ïÒs©‡zFUÒÛ’Äwí¥ ßÜw z®ŒõZçtÐ[!Ó»œÐÛªó1Lß³‚ž ©1ToöVЛ¯²µs1/ çƒÌÕ=s«(èm-g&{þ=—kž ¾¤ô§ŸÑž0Ô€VÕø ç™F¾Døü„ó<ý‰¿¹ôNÎSžË~å¼=íã8Xþb”ÂŽúðA#;sѨ§Ÿ#—t‰V‚z; ÏÀŽò‰L¿o‚‚ú-LhC}åA¦›öŸ žMϼ½¤Ï¼§‚áŠÊBz^ë驟 ÞÖ}¦7#¬OýùûvµõÉ®Câý§PÏ‚o›‡{c}äåœè“sÂúhÉõb 9ž¬™·qåT¦¸»²že50yÁ'¬Ì’ì¹ýô“õ‘‰Y°Úkh'ë¡ÚeÞ]§z"¬ç—¹´™}c}´˜UïFÚƒõ,±±ž>òVêÌ+r±ž•9ÞÍÒúÉz†{[ïgb}`8Øm´ÁžæË)ÞNÚSrŒƒûL¬ pºc´6Üóˆ{£<ë‰{†n0ÇLßqÅ=x1ƵO¬¸ ›LjÒ‰û`W]·}m¸×˜„ ÷ûè î5ò`Ã=Èöß©-´=ž!Ýñ…î9z +¼æS¬g7;3ÑϩӿÒ°ñžo#tòž¹09±¦Þ«¼ç¸Sø;Œâ^Ct6Ü3„Áñq¶GpÏ·f† Œž —ÿš’óŸ†ö´%˜s<°ç7Å®æÝzìAeæ½›É86Øó°pgò¯ÔNØÓÃ4+Ó #¬÷ŒY ×=냅˜`͘ ìƒçšÄÎ{wVØ{R~òrZpXX¹ÉúzZpÈ˺˜öSßM'§ÀžÙC9ê'‚= CŒ=ª!œ°g(}Å.â?ƒ= C2›Â)ØóâÒê\‹å´àX¬V(FþÁÞ5¼½+°wÌÌ(œRNØ3$ _÷ƒl¬g‚ˆœ¯ð±õŽæûÜnaRXBÞc A½/¬Çûð‘®Û6ÖÓµÏËhÃRa½úè7Ö£0иUÇ:VÖÓýMå,ÎmPXÏ›”Ñìád=ÕWž<™B„²žíÎú§¬Ç[1Ôî²E êñÒÎôl墹D>0KVçgõD= C¼o‹ÜPï˜òo›ŸXPϬ3ÑwÄõ¼¨†‰fFâ õŽ©KhoŠŸXpØZš>³àðš jšãÔˆz>ˆ•<³çn¨G•4Ž„µ¡žV̆ROÔóIFsw×NÖ³\ú\NÖoc ¬çk¿ó˜´#V†1U‘íýX$K~þ‰äI¦ð_ÁâOY:þõRÄ'µ¿âšýøyéÛõ`혟?´pulû)›×«•׺ý¸/„wm]c«?½Þéòöí§öœÚþ Fa~yýé“&ü¢ŸÎ3/YÆ0(°ŒÔ‹¼èüŸT`F ëæŸ^×ëA÷š}èðÊ'œDx{Í®ý׊XF¬waD%`¯Ð½Ya<{_ía<{_ëá:W܃·÷¾¡UèÎ _ía<{øÞ7´ Ï®ßð—yI))ÍúŒ‡äṩª~EŒ†V’!7Wfôû$ùÙ¢~]-÷žûÒy"êÔ’~U-¾Gfö£\èÎ{j)R‹%b²-¯‰~ç>œýO¨ß~·„žœd-ÓfÞE•‡Êëa{žaï(ëÞl GÇÒæ%©ù)Y]íb HÃñZ¶du<²- ´ýúÜæ©ó¦ëÔu fæ6‘ù4/íÙyRÃ"j£²ëW_yL‘1ƒþ±Ìj˜ë5OO-&ÕGœ??W¸Æ(}aIÝʲëïP—\ù²²-™[YdN˜GÎV;®h¿>c•¼™*˜íE–(âzdžÑH÷‰ôFÊÖÎ-,ÂÚ½ž—”­,Ž —;[ùóRQæìâo÷qüX–;[Ù°åË”%W«Õðô¿,W¶Æè֙꒫5ÒþÌÐP—\­#-a¨Ëe­üó¸ô±-)Z·QkËe­{ Ú’¢•eËcý¹«%öË’—•¼ÖÏÈŸë2áHC³qäҺï‰nÉÈÊ"¿µûã°K_õD¿äceÙò"—ÿ= i\ÎKÙ’•Ï=S'òz›ëa>¶L–|¬Vv/9þ×mkà¬-avÛëaûÃ1qÉÄÊŸëúH3ØvÕùÝÀtüõkA"ÂXÿÌñ´¼äöþ^BÚÈÓXKÑÚh‰ƒî"á,VP_ø¬œõ–äç.[9Ë|z…³ñš… Z°×{– myÉñ³·um߯[^rþ=Þ¦ïðv6Mx?žnïyxð¶È’VàÖaÉ݈«½Qä®KQ‘ËxíR¥l¯N=%0ïçYš)NEX|ïô_Þ–°´b¥¯]C° V0ŒIþìCBaí³¢˜Sý;(æý Wsš„ï ˜c<V§u¯Tó®§Bb^©tiq¢Ï9 b&&x>Š‚˜º/e+Ù éÄìÖSSø,Ò7ƒ8Åvät´cCq„4|5*…{]V±R˜&û¥h…p‘®Î7eÀ¹®š@8‹8.ÎYI»Rx‡|*ôòâ®ïA8G)çøq’8§ï’˜ Á 1žz&‹r˜/ü„ÃŒhY½p8¯ßB9œƒÐL@œï­EœEÆój¶¥ÈL¯ØÂ=A0o•Ê}ñʵ+B_^ð¶L蛓ìlB_~·g¬¾¼¾ê{ðݦ‚À7‹r"f+wôrl–Ö zù¹—¯ ìEVyPØ+ã­ìåwyH¹²7'gWöb˜‰FÑË>-8ör¡-¯øÞ³HÁËQ_˜¼‚7çïKÀ:Š ÞœçÊPðæ©)“ÛµzÞ £P󂻴󶸩++po*lsY÷ÌͶP¾K[šUîÞ$^ÿ}‰×ɪâbŸ‹½Lü²@uå-ŠV%O€dF*pÃjˆQÞ†¹ÊV܆´ 1‚Û­Â[æ‚Yº+ÀÙeàŽÛQ?./ç|ÆA€‹®Û•7„uøº!Jã…º!ž‡ R¾Pwk‚°×RùçÏÙËæ \ vùȂ֕ºv7kÿ\æµ Lï/%Ô U!Ô Å.o] }Ã**|ƒ ßj~¡MMamäfƒˤUÄ5†›ùAZ¾™â÷Á;Á*f/¦±:¨Ô­f¿ŠÏjuð+ªÕò¦ l3:ÈÚV‡ÕŠùå°Úå‡C(—+°€ÿnD;÷bXÃgµ¿â£é—ýéÛõ¤2¼÷)££cÛO¯Ç䘞ÛæØêO¯7Aº<†}ûéõ°éòüòúÓ„5DÇhÈèÑúR¨Á„5ÄÄhyžáÊÌ…õçÖð¬Ù‡¯|Â6¼>±ô§XvÛ†Þ…Ñt‰K…ï¹ÄûÖ ¾èïWXƒVøZ׹➋Þû†V¡;+|µ‡ñìá{ßÐ*<{ø¥°HÓ´CX¼ðka ZÉ[a ZË[a RË…5üBÜ@ä1ˆ‹í™2ìÇ ø5nà_þÇ{q±‹bßM”‡Ôà$r ¬¶œDÍùÒ ’_:OrË}°ö“¤“_®‚5{Å#²3,Á=^ƒö1h°¨ž)H´@XDðÄ“5~ZTyzZ­#ÅåâWú|f[ͯ{ ªô<Ú“©QŒl)-w¾ÒYµ49©k±¦$~¬¸Z((†,~,„,n,¿/¦½U'zîŸ*Ë_ò±G£Ke¹ðÕL%·æ”˜àúÖœâeàIU”ù¾XÙR/–[ ©e¾K››x±Üªj¥&ʼÔÓD—Ÿvê¢Ð‹÷+uÑèÛúÊ. }_=;‰"XÏÎg'ú|[ìãÙ‰:ß–Ï‘ýªÍ¯NíìE›‹"ûåvW8‡årW{•ÍåDƒ¯‹:ÌŒe÷É{Ù³$™ÛlÑãë¨=>÷¹ò·1 rÕ½­öƒœ–«\ï×$ÑÞWxðÒÈEy/ëHååW«}imí½®Ÿ%/¸ª%Ï6Ù°º`ÞŠPÂòÄ× X¨¹Kø:n¢®ÆîÖ…+lMkØÏ†Ö´®6ÂÊÃ'0&ÓFײb¥,Þ%ÈÊ®ñiËb–Ú@› ;hWŽiÅtv™Bi/ëþFY±èn”-‚R¡ì£”åY8'˜åa¯ç±º ¬]®[ÄÀbJœ¬«q{Cm“¡m[Ä€`x‹XË„µqcQÔJˆ„¢6ɾ¨¬åm²7ÂYqt+g¯(A¬F (cY0¯¤å»¤i“¤J €ìzŠÛ$ÑiŠÛ4ý‡Â[Þ ö oyÂ~€^P›$ÀO‰K)°åãù\ik§=² ß|f RVC ³wôÀ‹BÙÈL2á Ôª‹sCmT1W˜[e¡t{\GO©Û½–­ÔmWè›·•O€ÛdÌ…¸<¾¸ s.o00-Àµ<ù€mî߇mVº¬°ÅX*°¥óïÙ”„µ¹^»‹°6—•‚Zu_+k³Ä)ls¯¬Í²õksD kóä;QËÖÝQÔ²Ï(iy­Éò””µá,Ц$.Œeo–Aè{\ÀC9·…,Îf‰àSÐ’ PµÒÖ–> ÚæSžÕf+f9ª ¸…³y®%,]%„ÝjZ9ËŠÊrœŸ. iuz(iï† i·'„´Ûˆå-@ž[‘«^ôa¯*«Œðb`€J·î‰«Ú¹ÛÖ˜:ån CéàÖï ¹ñÒ>µ›öì·°€uQ„-.`@×òs|*å†*’ìÊ\ºzW3Ã Ý Q%J^”^ sÃ#È ¤È j±É{`Àó%òpáxenXC¹aÊ [<°·ÐVœ¹›Á׈f Úe>Hßi%t`3„ƒ¯Z¥Ú ľ٠nÓ–Ú ‚@Tò**ª½ ®“b3ÄuËU‹sn&™ï›åÀÈ©ÍÀn3+‡Ú ä\Ðf3¸S±\›Õ@ô´ÍjàW«HÚ<ÿËrߌnµÀ¨ñÀ ,Õx >þÍzp7¾„-öâË®ÿâ/Ç¿ _ ÿ5\m,}ÓõÿYíoø1®~ü¼ôí•zÚðp{æÛ~zÝï^óáúß~z½ Òå1ìÛO¯»þ¥ËóËç_íúÇ&I•õtúi^uýc'ocáµ8áÏÉõÿ¬Ù‡¯|Â6¼¸f—þ”gR¼ £á6Ö _s³Âxöð=·±Uxöð=×ÿ:W~׿UèÎ _ía<{øÞ7´ Ï~Éõy“Z‹¦W¼æú×JÞrýk-o¹þ¥–W\ÿ‰jH %YøúwýCz_ÿøÇ·ÿ…úñu}^JŒä/aõIñ Ø[ô.!?ÉS˜6êVw¿_UâW½öÕ:Xb~.xŒ5®y‰«Ú^VÏE¡^<³ÆñD좡k‹Ö^äw¡bìfÒ8–-JsÉ«â.VÇ’ós½ãsÆ‚é•Òc˜ «Á±1–Š·¡ ÁwÎC:4Ý¥”UuO«O´ðÊóÅ °¨'¥®š{JmÏŽ|`¤¶*îi½-msI­ªµ'¿!­þë°öUs§'bé³Î?±Ë”n |ZCå+5åk.ZÑ=ª[xV“ž§Ú“ÖEÏT©^ްÆÅôT½8¤ÖÄ•jò=/y€õíÄ!µD*5æk^&‰Å¨AœR~m=æ{Z¦5T½FñL‰*_ãîšZjK›ojöšVµž.Ä[­¯iUëå8rÍ«Vûb’«yÑìç¯Z½ž]fΔG««W¢–U«’* –U«¿k׺jô£SëªÑÇ5J¢ÖU¡b"µ7Ê‘™·”¼ë­¢;‚o£Œâ——ª.X!™”üþ² á–S—2¸¬®Œ Ár”DÖTç.‹^Liái£"Xù£ ž¦Æ Á«wö ð@ÜÎÞ•¢ÊÞ“¹Uú¨Ì•¨™ ½âdØÐ;Ba6ò¶ˆx×=kïÒý»ËT˜Ø]]…vÝê€?¸»®Ê¼+'•¼â‰ÞÐ+ÿ½r®MÙ›¯4ånX—îÁÝÊ]¸E¬íSîêÜ à¦õô”W·G%n’£¡‚Ümˆò°lOe‹ Xöݲ…¬ wë›ðW÷®ºå Xà¡üÝúÝF»—éòb´€"Ø·%}€˜¡VÏn¸Q¸­ÌTIX"…ÂtY­¨](œüšì5 …-ùAþ\¦?niþ áä‚» „é·[X á.áŠbÞ¦ò¼RP\‹Pzåq•}Xy\WǸp¹®>{År]íJå*®9¥r•š çý¹•Îw…‚¹W²"™}]òÁä*Q åZ„×+“-£uþœÉµŠX»"y{L\EZ"×5xTÌ^Ú¿R™íX*TÖ7 •«Jt‚e”-$*׺ªÂfAs­“å+”몄)“ëå}ó¢§7BcÞ]´@Pp\ëám:†«¨ŠáºFd*†kù>†uj†kù! =x`“„ý÷%a/¾n1oX-'$GŠÃù *TQx“@EÎ^Q»ÈÂóà‚ Á).sUe`J mDN—„£èMeã½Ð\fg»øYì +wãS¨ÈÕt]ÂܘUÞÝ£cyW¨+éºQÂ㺒ÂP™EèîJ\¾Òw+º²K¹/ÿ¾2WÓú)sã°¢ÌÝŠ$w¡»êÜF ¢VÞJþBÁmôú”¤/\=éŠÛ§ BÚèä !XC|•´š‘PQDxØìnù8›Ýa…V»ƒ ¬ÚÖµ§Ð ×ÊWð2³Æ2zÞ {œ€—Ù ŸšÔüàVi[¸Ë쌛ùAúrTAcÖÈ¡íÜåK³?Ëߌ+ø´þ7¼$WG~^{÷FEÝÅ=²`ûéu·>fËY°ýô~Ö.qß~z=²@º<› ?ýHdA&ÔÐl_»¿à" Š/–§k®u†ÖJ~X{TÓ¢äéP­«–/Oñ äjuZ;ÔÒe°fÅÀokAXí_½»5Ò@BBzO«ª¿†Ku]íNëM(¶ kÎŽ·B 6üÚšƒ¼r²UÑË›O*¯üŠŒÀ PxVP8¹"Euõq‰qM Üeé+‹û®P¸Ï¹Šá. 7I¢nE™ºb¸]wÃ(†›ÌÅ0  Ïεb¸ÊÕ4ŠaµÓ+†±á-sO0LŸÜ3ÓÃu=ZªOžR¸^‹^ø[ue þV‰\RþV±+€«ÑQ—õ–%pY£ŠÀ¥-‚¸´õ…Âß"sBø[š¢y0&ⲡ+€Ëz–T\òº# Ëš0X\®d á²&½V9å®.k@¸È& .¤ Æ ‡©Uù[VáMù[fl©’·¬Ù¼e½PHÁ[Ö,¯ƒ»EN½b è Ëîº82µõ²²wÙ·|&ü®ÞƽQ¹,ð•-QEàº&Óø–M8^á[Äf® .kð‚¸\çÂ:5ÂÃ$)ƒ³„*ˆ³Ü ¤ Îë].b¾ñiºp8_IÁY´ e0Þ¶¬*ap‘3œ á|eÞQÑÃE’ŽL óBØe} †Ë†ÚÃÅës+†³ìT‡mȯWön-øò{»ïÀw=°½±×­{€¢×­®Æ½k¾³½î¬pWߦðu·¸,à]“ÅlÜõ*¯àÕMTÉ+Ù{6òJ˜ã†ß5¬~ÃoÐ ƒ× eƒË"Xm ¾ëMþ®!&/†l’ï3cvñ×õ¹C xÍ"ppwK>¢V‰jUÓƒ¤ÝØ,’ÛJ-ÂOÁnZ³à)uÓ:ÀJÝ”ekô¦k£Þ שçÍø —BmaÐfÓ‚ÜøpuÅmaT Û¨B­˜²µbvÓ›Ýa½Ke3;D5IˆÙa+«3¤`°Ôì°&ÀÚÌ2¦›ÝA"ò6ÃÃV–4º œØÇþfr€]59#…ºa :WêÊÕ… Ýp ºze¡÷¾(PaDêàJö®æŠàҟԆɅlP5@ ÚweÜ Ì)e÷²³¡_²ñJXmÄÀlqäË¡ÞåÛ—oRåˆ%áòÛÙ¼Lðy ÞqŠä;œàéà+510¨Ä¯Îm¿½îÍZ‡ã<äÞîFèo¿A#Ö^ÏÁß~{=¨@{}5Bû‘°‚êiÉj­0ÛλQ•iW|/¾uº¯þ|¢ Öåû â•¯xÑáýå+}Ê–]?]w¼Y¥û¤Ê½Òù .Ð*ßtKç+¼`«ò½^®³Æwã].îÕoiUºOª|·—ñ“^¾ø-­ÊOzù¥ƒR̽ ¦üZŒÁVË[1[5oÅh5¯ÄÔà"46èG­ýbŒDüËk Fµ „%[>’å4"qB£ $û«g±—ÕÒš¦_?‹›«Ë#P"Š©¾šQ×OÓuiˆ÷±»qžLžY\>ÚoÍ-¸VeExŒ ïó$.ÖV=µÞüíºøºäÄ “¥!k±?F*Öáâm—,“ž‚Ì£ô/wÏYÙªô÷U¥D^Š$O°5}©_´þvÝo‰ŸWµ¿­1î(«–° Vög!5º9ºq¤Â}ô:ŠÃkÕEQ&¯vÝ[é©]£!\:ãÄÖÒ»É ßߨÊ eÃé%wŠ¢WÎrÚì´\³¨Öþš˜tŸ]?¯j¿Øä½Ï«Ú_EíG!ÞwÍLšìûú`µ…6;̓6¾½/«êߢ|¢Þ.©‹8¾¦¨¹»ÆëêæéZL=({«\‹ÂjŽÉYòbLôžŸôž %_•5±¸&¨&fW-£²}OͲš}P¶Ú]ƒ a»«˜}0]V8¦gÙLñ+„ñ{bâ×u8‚£«¬¬àÅêêçÔžÂý5/é&[F¿ÝU8&¦ºnüc3{+Þ`ƒqzþ,ŽõdpüƒW·ýÆ`¿óØ8ìı¦ 6>Ó}ò–\‰ûê‰T “‚kC…Ç­kˆð¸‰irƒr“Ûn•ÉÍ]à×õ|ΆãÚdb¯4®Yé)H®’âecrÕé­P®khÌÎä ¸T(‡kµ+“å~˜ Ê¥k3•A° °+1ͤÃe®@2—"[‰ŒZ§ÏFæ,/U2¯¹´7:³Â‹Ú+™“v@Éœ6Ê Ÿ%EïÎçpïSŠæ ]S6«| €öwóÛæ«ë¼Q:o[\ßÜbkK”ÏkHÀŽg¿X(íÄ i‰ƒÜ)í¤©Êê,«QQ»6GN¦¾ž°‘:.¹T»Õ’½!;8©«ž¾ùTÀ"’íJn“¬òÉìMæh—z ãÂë"N¯ ØšÁf#vñ[áJìíƒ+±s Fˆ%iÌÆí,Æÿ ÜYÂA•ܹHî\{e6o¸_+f‹Ërc6ï$Xy.A yÃï îœ/ ²gê¼Ja•®…ÛšÞ~ãö}§ÄÆlæpËßav.Úa¶^°A{Æ{o¸ÎY¶¼Ù¹Èþ+ÄÎå–ÍÖyMŸ±±:KN‰ Øû@ °—‰!°Þj`/ϫ󶓮¬Îí°:¯7¢m¬æKŸ¡RT_úÒ;O…ÒYDŸ Ò¢´òô"d^âô£U¼ǰq:,À:9­â›€šòîjeXAÍp¢N³l*ÆŠèTÔ¾  Ny“ P§ò9¨Ó›¾q:‰yA1%iôžØ0­Ù6NÇôG8͸èïÉ×(Z¬˜¾é7DG¯­pZŽÆl˜ý^Ž‚èý™ÑA!4ï'üž¹C}æ¦o·ü†hV¹ƒ :ÔÏó7Á²Ü¸Q™æ“ÇÏÏäèÔµ9T1( ‘CÑçË!ËŽ"X*w(–C9i6µAt;$ÕE˳¢•ÆL±Ð]`ÌF¯V¡qÈ7qW³Ý+¤%Ð!)p%Ð!ÿ‘9Hœ ÇáÊ\2PÒ„”Ý¡xSÁßýüo:ã¿þÉþ•½ÕŒÒñ¯—Â>©ý·ÌìÇÏKßÞ¨'¤qå@;gǶŸÊsìô¥+Ðý¸r ƒרêO¯7Aº<†]bX÷»Á Òåùåå§Ïšð‹>&ϬC¡V¬dp5‚C¥Gÿ‰‹éW†6G‹YË~è’rU¯G6Ü+öaÃ+pòàí»ög\ж×wQT<»¸bÇÑ´Þ§äEfº1ýYûSðþGuOÕ{ƒm®ƒ­õ¾=Ø6u—Á–ÚÏÁþeþPº)-4R4¦pFm-ÔÓÿüãaRIlLEU #6Oß÷£ç_WK2csÄätgäÆóáèZÍ¿ù ÿÍ’—úo÷ß–Ja×þ2õï–§-ë_t®‚L9bµ˜_ô×—sP-ûwy4æ/ÿâ¯þúoþöh-a¶ç“r• å<Þ½E71þ³}2~ÉWð_~ÿ÷ïå+ðu¹Îé÷>Š‹.’mÉHXâ0OAÖO”ÛÈHè°Q}+‘‹ qö ï¼N~dÈü¡$ÈÉôîMׯÎ,UÞCQP[5y¹¼„viDfʱ€OU’‰ô}ÉK˜yhÃ'hqÖ–à–Û 3=¦›»oÝÊ–ü„Ù|œ1¥nõ÷˜-s±¼=¥Æ;óï_¤:ž):ð<åç‹•$…ÎCàiPgRŽèüHR8áJ!;¸ÏöN^ÂȦ¤ì­ ê >…³¦„'ŒŸW´µÀ×ñØ’­0›™ÒA=ˆË=‡™G+°pѶQ¶¤+<ÊFºBWª7,õ‚–¹h]`öh6%º±q°¾†Oeexç=dÔN+4oˆ!„…é8X–¡•9dyÉXheµ„8$±—kYs(Åᤑjü]®¶1Že¹ýpYY²² ?”Vœ5¾,÷ ‹{¬Žª5Êê’¾Ð:—Öf÷'†º8+­,åªËV¶Ü†È2ÌØ†²N·%‰¡µ3£dŒG[®CÌ9àï ZÕFÙ’ÅÐʰ:ò˜•KD Kr¼ö²Ð—D†,òøÛìLåePo®÷DWQŠŽQRPüœ7Ü.äd:„¥’•-© y§`#½‹}˜èžSÄ,â ¯¬Óý9w`áò Fd*Èg¾â©ôò[±lÉfȦ@a‰@“•ÙÝ‹}ÜÎÉç8̦›’~™X’RîX2£lÉhÈ2,F, ÂQ¶Dœ×,‹ “܇% }&+yBÐÍÊšéås²2;X·¬¶cïÇ›P§ä×”ó¯…(µE² øNmžŽÂ£49Ô¶d+¾C›ju‡oã AíâØ!vÀµB»8ZÒCÇ7 ;´ Cé\bqÞ¡]øø«ù¹Ú…‡xL†´A»Ð\ƒ­Åm–yìXŽ'´;ºÛ@h£ ÈÀÕ ´ÑŸ†YRmí³¹Æ ĈhöevçîXÝüÊìN#þ7Õx0eŸ$Ìfw;Ñ.”~0›õµgKfÛí­X<Áæ§2»3¯g©0›e%b¢ì ³Q†ÑèÚf³ñèNõ&ê+³9-#q:íÌݣ¡)ÌÞËVhsGuþ€6ŸÃÞçG¶k¡6‹V¤žÛ¡ƒ7mX¨­=WjwžLÃîm[„B›EØU°••Úî ,ÂîÒm6²€yìíÎûA¼ 5´»¤¼ä…öVŸ`›wÜÆ¨Øæ;;àŒæÀö^¶reØ£1%lŽ)·Í  ¿¸­¯z3aE·U›Æ«wf¦à `>ÑMçLÉãsíìÆWö˜Gs+»ÁR ƒ°7乚 ²¶Càîvh±ñ´Â!p[b=D9ž·Î$¸;ï¸ÇȤ!,‰ÀÝiF ­'»yåú#!*»mX"$¾|²›b.§x°ƒâ`´“ÝØa -tÐ÷d7›‚´ÁäžÝΜS„v²Û¡«œù§¸Íë7Z½öÖ ÝüÞr¢;ð›7`à·íÐ1vX7DxE·y>]Äf¸ ›^ tÇ»z¢›Œ~3­lC·Eaúžâ)noc%ènv$<Å–ÚnÞèLκ›9#åÕÝ|g )Òîý¹Ý(Kƒø‡ÀMÿTÂÍn–AØHÃM£ì¦ÓRõÔB”Ý|/èùÞ-bbJ¾¤ÞìÁôÊðnÌ„ÓðùOvo-v7ºï‚}½ƒÝxÊo*~–­ìFY……âd7˺‰Ÿá`÷^¶²›†¡N™e">­»ùª=ˆþÝØ»}ÇBš…]ìÆÇÂ^2MoìæÅíV–’üÎnÀÚFÄP·¶³Û.|ÇwíqDVvS”Gw!›§ƒÝÉüõõ”ÝtÒ • ôÁn:°“o〘 ›y±œ8ãt3BètÛM÷ÍÑTt ›e´–A>A7/\ˤììæµ{2²°›ùA¡À ßå`7º€ç}®á”»˜Aßñ8Y©ì¶»Ÿjº®Â›I"ª1á`w2zü4±¬ìN¦EBº°eì75ïƒÝ–‹šüNº“iþÙ—r›M€Kš†›Õ`®¡§Ð,ñ t›ñÑ„Ü €¨”›Æ¶#äfZ ï’)ÎJn”á›A*Ê;™Í 7äÞ&‚›ÏõR°“´Üf„o±€f¹™¡;Edvr³ »Øz’e˜È-„Prïe ¹cÿèø!U3;)¹µçŠn$Šn&{‰üx!èF}ÍAû§¢›miP­»émŠn^˜‰7ù)=¯äæ ±ŽÙÜFS„ܬ®àí)ùƒÜlf«˜Œí¶–pŽì™|öBF2¼KئS‰ÿÎC–¾ÊðYí¯ø”úåSúöF=© }‚ä7;¶ýôzAŽé¹ã`Ž­þôz¤ËcØõ§÷C¤ËóËËO?Ê!жΣ¸_‰/øP†˜h-ŽVÓògÊð¬Ø‡ ¯|À2¼2±ô§XÛ›ÞE‘z×¥Þ×½ë]C¤ö·CÖê ½7ØVá:ØZïÛƒmSwl©ýB}:txB}-”A*y-”Akù±P†_ˆLÑò­¥¼ú“ƒþáÅ`ØÅí„ÅG87I'Á"/”Çj5;˜z)…©aKb¸[³²åþÂܨï\*ÓjßѹyÜ3)Ô¯•u·>Ás“:]ªã¹-Vrx‡œK±“WŒK¬#îÒðSØbfør¶2H‰¡ðÚ+ãr—!c°7›ÿÅʲçOß+Uý½2XQ³Ðü ¯ Uí6°¦´\jÈ2¼:Ö ‘Ò*à ÔÏѹ´… @eˆÐnì±¼‡ L­l^&À’Pè—5Ùa–Ûõ ÍŸ^ˆb~Á´ž³5á\.ÃúŸÊrµ!Ëzñàññ˜óq¾â¹r›_S•PñV§*¡â>Nu„ ˆë;5 ¨µÇÚ͸’š„ lÏI¨fvB»†!u àÅP½Mô.¡üˆ—É3u‰Guv)À3§˜éf®ÍN"Ïwv'€’–ÒÔ³_oFxoE)À ÐÛ‹i¿Ù/wÚsÅ<¨üž9,—²:ç¡ka)`J· ÓÇÂré!˰ÜTc{.J¨íP‹™rr|®=´êZ†i_ G‰ …þ²¦ÓÌV¸×–$T€Y*;¤ÓT͸¶† `tJö6Ñ®(þ}Ä²Ä h}y¹þp•2þü7¸aƒ6ãy°VÐíÄ„®ØÖªEN)´˜a‡6ƒBæ¼Nµ¡`£t‡ñQ¨ ²~47×pP›Á²Œ/¡6ƒðlšë@¨me€àºS›ÆÌñµ™<Æ—z5S¨ÝyXÌ]öÅvgÆÍÜcvåÀ6sé5 ú«l«§]±M—@}"Ø&C"†+˜K±ÍwÆá­>°M÷1THºlÓZ Z¤ÑÁ¶:²Ût vÎýrb{ukîØFæ´!óÁ¶ºÅÛê­Uloµ-Z`ñ+¶ù\ó±Í=§oÑ 'ãÕR·ÙøyPíàvç…XãÎ¼Ø nu/+ºY>¡K6¿Ý,‚PÆŽ¥èfu™¨ò‰ní‚¢›ÏY´€;ÑÍ27Sè†®Õ pìò‰n:÷¯£Lº9b Xª&¡(º·v£ºŽ‡ ežìæ;±¸áíTvó9×(c?v³ {›±ŠîÎ(ÞÆ<’õ@÷6še´œ'6K*/‡ (½¹]ÆÐîôέXÎê‡t#ôvæ3‡ 9ɾқW†ûäkÒÈÜæGjó¶2·~X•¹íúšÌ£þ¹{ÚEg1)*s3ª T·sv›Ì­Sl£w&ôK’ÃFïÄ *?ýŒ½™køŽÝè]uÖýp'(½3(ì1νœôæ)Z(oÃ=¹ÑÛŽc‚Œ=MémÎÞÌ£‘'½™ž‡ã®žô†ð ¬ÍˆæÞaVÝÅæOzì¯Ø™6zóÖ º¸lYnôæ{µdoéŒ.z“/Õ˜«g£7±ûHðJoök.‡Sèîù°÷ÌñzsªcʦáôSz£Ì£yn(žoyPf¼RáÍWB® 6ÄÊîý1 ÈìùHž»±›EÆ6[;ÊnË×-lü`7ÊÊÐéâÁn–AvË)èÞ[ѲÌ):eVA÷öÜŠn† ®4â9Ý”:Žèæ+ñÁÃ%ZK¸€ô@ÈÍÃöR«Ï¹y#t©<4nE7Ë&åÔ¥Ýü¬˜˜¨°^èÆO fpîk‰Á¾- èö")Í6Ÿt÷l^¦ÝP¶‰çQ8íèÞSAwæ—WÆv;1Å–w~HJnú¹-ŠÍÂt”Üæ“ÇúØ´’F @Kn3²IÉMï+6O»8Èê òîÄëó° C>ÅnõÞ+¸“]D¦©à¦³×‚“ÇwpÛmØÏê0 ¸ys¥3í®à¦/K8專ݨ(aso>ŸàNæÂÁü&"7êÉUìí·:ÛÜlDm´,…Ütšc ËHÕ«äV¯¹’›nå+të@*ºùqºM…´£›ÕEmÄ4)ºéáî(²YÀÝêýVt£ s#6La7˰>ˆÝƒÝô¶ƒ_eD*»Ù…°ëÝ Ú‡[ÂnjPt³¶1±|b1‘E7Šb/$p8ÐyWfú…Rtot3F¤@æ3C·Îï/‡ à£MNm—Þÿ:–¾.ðYíoøm®~ü¼ôí•zÚðŠc7¸Òl?½î«¯ùØ~z½ Òå1ìúÓûáÒåùåó¯HLÉ} ÿëÍpZb@X,}´øó xVìÆW>` ^\±KÊ3%ÞE‘x°µÞ·=ØöÕ¶Ôþv¸ÀúQ“p«pl­÷íÁ¶©» ¶Ôþᑵh"ÿkáRÉkáZË+á‰ò9t—dáà4\€—­áøÇ·Â Ëë&¸‚nCLÌÓŵVϦG–`“ÄB~òøêov¤ÚÌ/L5 ž§å˜j‰ây2+VöÃD[b~î'4wƒN+Ûe Á0”!3ª–3ouqhKØ\ã(Z`Æ)WWØ+kO6?ÉH¥ã±Q]^5`f®¦ÑÖôf&`¸¯(̼-C+fµe&¢ûžBFC@CIih6¥¬ 03òÓ>n‚zaŒÃ­MÔ@3£Z+˪W;êttF›î4~ÙnÄ5oµ=VW˜™¦=3.Doeí¹·0ó¢FÌÊéÚ(mU€ mÄÀñ bi«\h|M—)ºÏ“Í1µŒ3ñ,ë«ÌÜÐ`c6ŸH¡]åÖ' Öà0ÚŽ2S€-²zx±>W*–ׄeYO-µbþ»êV˜9öiv©æJ¨T,¯ ›yt­g™_`^S=3àYÙª^´³‹¦ÈVj–÷„å;Sèè µ3¬ 0žã¹éiz©Ô,¯ Ëk§¯-1s «þ»•Q±¼',ÓaCŸá5® 0ÓýCknÅ\¥5® °¥Ë†Î9<%5­ 0/ð˜I#¹@M«l9ÿ‹wÑæWM«l7§†_£æUfYéµD?Ê ˜¹Vâ8ï`E«Œ¦8æ&@¦ey4ày×.^Z¬lÕ€‹åaï­™_˪[òE€lœ¤«uUùÿl…¨uU÷2‰èè+3lx¶Ã2 sí¾Øˆ·B6pS8wc>à¶mùr)¸™Mc>ÓZ(¸#4y¦ƒL'·½m þ#nóýØ0¶´„Û…ñr oNyçv á|‰e¸±…Û,s73œìܦͶ>gC„Û"f¼©ïÜ.ž„Ô먳p»X¢ÓÌÀ>¿s›"ZÅ®'€Wp¦²k ]XÁ]¼tn–¡mÚ«¸·ܬ(`™Ò½ƒ›ÏAFŠÓ’(à¶C{ Ð þ7Ëz„N\ƒßÁm®-a¸ÜŲƒ†<-n–EZÓ›éå+¸ùJ¦ªÂ˜¶Ü| {ߊ#à>Êp37ûR ;¸YÖiÍÁn–yf%¾7Cx2ÖÒtõ¸Y†ÕS[*u7ßÉ´<1[rŽÜV$“íàæ+“Ãb˃÷+¸éã³ 7ŸówJá¶áÓq¶ïÜæ+±:¯³¦Âm–e³óвpÛÊ*–ê8XeŠ~*±ØÑ⯂ûk^'e7¯7¾â5wv34kÄÅÆƒÝþ¶ ]bÀTØx¿P2¯Í o:X¼F;àí™Eë>n¦ð† ß\§ëÿ€7©8NC†x£ŒÂå<´¼Á[‰©ðf$èHù}Ò› ‡àwÒ›‡cšÀFož¡Ä¾¥7©ƒÆ„q YéÍ2@ÔåQßJoÇ+t0,#lCéMG sWÊ5¥7(À•>£7Ê0¹/9¥7Óz1;é­ôØèÍc‘ñðí¤wù°X6o„¸èMo{¶§½± #¦Ç7z3ÒÐs†¥“Þ`C¼õD»Rͱ›y(<ß:ɾŠÝŒ Â(öM¨b·†‰ØÇ˜=ÁO±TÄîn]¿‚%UìFS[ŒØ•ºÙJBjJÈ"u3üÓtá«ÔÍž[Äóh¥HÝ|.™™©»3B¡õ›©R7˺ ßá½õ ïÉažP=º»E‚¶¡B7£m°96I‘¹»¡áZþ*sw›Xrã“×-Ø ¥EŸO™»S±nLÂ~‰Üøkž!Ÿûß‹Šn;.›œeÛÑ›#¬º!Ö1®F º™S‘WÞ;º©Ç'Ó¤†EAì%¼¨‰x±O§æ’bG/Ú $s‰]”oà‹¹Äî¹#ÄÔ\»D|Çtèi'·¥ô é)勽$[vGx˜`Ä^bøNž¢ö’qXñLÅ`’Ï\ñËj0Iv~ÚOµL²9ÈãôTªÁ´iºcÔ`b‰0&Þ"Ô`Âðìb¤%PƒI¢ZÁxö±'‹Á„ÑÁ.#™LPÉ.$“†Õ^Â"ñtDDN{ ÚÇP¨¬ö’DQ—WÖ¦Ó^’xVèNT{ Ê öߟ€;Qçp³ÌßV n”a ®l& n†²[¬‚…° ¸Ù¬à2Ò4(¸Á·Ì~ îĈ§`Q¹“eì¼ ƒJnÚ6M¸)¸5k‡‚›+ 3r “ îD1¸ß²§€›ÍlÁÏ…%àfºVÊP„܉‚|÷%]KÈmYÞûˆHÝÉ͘ «îÝLºûhëŠî”ìãÙìèæ;±-µp¯èN õ¤i‘'ÆnýéËTf®pç.O™ýsxéXþfÌÀ§õ¿á½¹:òóÚ»7*ê.îQÛO¯»ì1[ö¨í§÷›°vyŒ»þô~Ô€ty6A~ú¡¨L¾EZCÚ—.1ø¨èŒL–†”ð•«ÞŽxÖìC‡Wòú»{ú¾¼f×åß"nÀ*\]ÙZïÛ®lûn‹+[j;n`ù¬Ý2+¾80j\‡[+~ý‚ 缌·Tÿ¡ø‰×¯A÷h­¿; µ¼<°UóJôþ/Õ6¡»Æ_ˆhOìÀ¿þá½T-ÖÕEÐ`ÑÕ,ú>µÚ¡Ä†žF¾ÿ–Ò;€¡›Š²•ÕõÜšÛ‰\îo‡q›ä7(.ܰëð@Ôp·¶¼\hÙ#-Ü<1­¸%vÀ}0Úx&Îl¼ªðV-ŒMcÑ8fÕÊr ý ªÕ³¬º5v ð*É0³¹6^WøøºS§R |Q†ye4–ÔŒSnm¹.{*9óXgkË-€tËC×/õùžëšo€ÆÝHç=‡Oµäàq£Ë*×KZÓ @ÕÇ™^Ó^êšn€G·Jžq$.OºB ”þOoeiI7P쾉yâ«×º¦ОsY?ùhýL—«¢3¹ë“o@zЖK÷AénM7`hæc2Û‰Å{Âòà¯X3›pç9égÂJÏç[˜Ðó²Û¾<°‘“¡¥á6ØÉmw¡ßI1•Ü´MLçèAîHçaºœEBn:¼ð¡ìÐÚNnfP¡BnšIP²|XÉ]ì¶LÈ?ã¶›–×`Äá,rÓOƳ¥¼ýi'7 ¸É®¢^ÉMK/þ貨 ¹‹Ý>~Ÿ(rÓ¿æðåj1»–›aõ¹-DÈmQ±Þ»é½YÉÍ"]_a%÷æ{r³ PÌÃ^,ä6·Üœú;¹iºN³‰‚múëÐÄ+&O°Í2W6ÛAÛôÉaL¯›I¶ùc"É¢‚ ¶Íù‡íhž:l³Œj­‹áÀ6Ëм+׋`Ûbæí¡;¶YæyRmä–lÓÇ›âÈ¡$ØfY´g"ˆÛGQ]CïÄÀÆ:(´b›Î¿^ïÜVÂm 9°L0µíÜ6ï'°7M´C5Ó¿1v@ÑÍ+•Cšé­•ÜΑ±$Or[ŠÙû°±’Øó‘Ñ–±ä^ÓiïäÞX#äöYá«´“Ûñb+ˆ˜#hGÉ혬ÂênTæfÆ”Ì3w*s3•J.æG÷DæfŽ‚l“ƒÜ[•¹5‹ÊÜv+rÊá¢ú*s3ú£‰¬"7ŽTÛå¹y&:@:PT‘›‡£¯íz—¹¸S™™5TæF3iÊÜšR_eîfʼ&LenöœG-GŒí”¹™ë½Ü®•¹Ùi‡á ÏTæÖÃå*s7Æë5?3Ҩ̭)óUæf~K½6”‘¹Ùiì#[Eîf) e Ю"·&ÓW‘›'îTËVvogÚUänñƒ÷¿Ì›ÇTäf`ºÍý¹5 ¿ÊÜHmiÜZ¤27á‡/Ò273 `ÀÚ’™›³2ZÐq9dn”¹p{UæfB¯pp•¹™`äºJ‡ÐÍçè ¦ Ýx®…˜.…I„n¼Ä‡ûì¼ ÝÇ Œ>þºywÓ´#Ï/F(¼y¯}ªÐ§Ò)w3ðºÍ¼XBéÍ00{¦ÝzS±ÆÄÏy8\Õb‚©ÃÈï™úL-&PžŒ‡CîÎãzeÌX¿Á›%˜(0¤Lxˆ¸Sö‡Øm·b¥“¼s š!añ€7o±cóØ+»™+΃‰#ዲ›yî\dŠn:ú›%ÈJºyÑþ¬žÖzs-#e<ÈM/?fìÌ:¤äN¤3–ÌURróvˆ¯[î•Ü(£‡tb(¹“é³9ÇOÈM‡1»>ò(¹éò†TÛVcI²OÆ í7f Oœ—iyp«ƒYÁ2¦·™¹(¸µ/&•´H·!Ôpc£ps5@Ôš—a*¹é[oŸ“›ABfB”ƒÜ‰ßUžŸ9ÉÍä¼wdmSr'»{³f?,+¹·ç„ܼžÃbòG÷„܉WTB¾™à”Ü[™;QaåÍQa7ŸªL-eñz n”ÍËãn¦þ@G™Aê7ß ö`˜NpoÏ ¸ysG£áÚÖ¾Óî4J§µDß9À‚]WàF¼Ú—C˜örÏ™|;blþ™§ÎþàÍàÏ[ðŽ'ßáO_©‰i‘Ç•î:_»ÿöº÷@Žr3ô]Ðß~ƒF¬½žƒ¯¿½D ½¾!¿ýPAÅ´lv̾ójAeFY‚¢çŸQÁºxP¼ò /6¼¿x¥OÙR÷§ëƒ7«\}Û[Íï;·³hý¯G¬×wzQ-¥ù›C>¬9ëkͯùuO÷=äRÿðætˆÓ 8Oùµ€­åµ€‚­šW jàeÏ­ÓsðKi (ø—× °îª¥ã+Ù2s@åHãt·-JÐÒ¿1’;UŒÿP‚Q¸jÈ ïS³¶ÂÕÀIg$¶”Úfaµû5! { ¼¥ës¼Fu˜ªåutË|cüL€‚Õ¾I¢ðˆÆ€.öM¦³ó ¯·þ9jŸ¯MAg”¾ó÷…È´´/¾)Ëv§0õ®¯Î)Ì©Ùv2oi½V/”,ŸÝu¸’§ ,-ß i`çæJ¯å.þø§Ãú™1>ÎÂÕAEÁÅßV*žŒ«%Ì“ègÖ+ô«‹ŠyՒ鬣L|Tƒt]ÒŒÂú1³7¿×“碓û˜c> Î ,Lá*ÜÜT fæcå½×êªÂ÷›Ùû½§Nz¥­µ°ü¾&KГî¥Ámbÿ1ÎFïK=õÑ{c,ÒrDÜ£GÞ®I¬Öí°oFzï³8¬¨ËCG«yô•{caóÉRãoj¥÷$vL•¯›½/«ÏŠù’¿]Ò¾¬N+Œ T±~Í ê¥÷LF!&è°°°® 4cÅhHsnÔUƒÖÛÕQXív¿±¨%Ÿ§Ñ?ú5“7S> W%š©°£%%u¶U‹Öˬ1™ŠoÎä-S1 EšÙy²}Ħ lÕ¤™WxÜ líA;í’?›É,d–óóBÞ=§²eæü1ƒ9ÊV]š™…!êBÕ…~U¦QøíEYz0oIÄQXí¢?›ÉÌ ª]Ðo•—)‚íjs÷%]䋱;äy\sÞRq@žÁLí6˜íX¦—bƒ¼ûie¦àÜ ŸìPã5Â'€Ó¸:õÐ;&*mó†%=8â+Ý›Ž’žÊf¥õ/~BzÌ Jù ôñ#¸;±ôzžº³@ï ï˜Žµ„4Áª g¦Öç‚Û ô`V‹Æ¥;è隺 • znõöàl  1»ÝBè¹·`mÔ0>,ÏØâ¸ôb#=Óßâ륑ó~#½™0ß4º …ÁÛ‘>A=O€áE5®¨÷F²+1ú†zžnÂ÷±fê÷mNP¯'6ÔóÜuW–ý õ(ê‚\ܳááÎE½á>3MÉ}ÿî†ûlÚ\¡Ç÷ä=-X3`pã=—>šC!ìä=ïèŠñº÷kã=/³r¦0}"ØÛiW`¹|ÆûÄ é¨¹¶|ðžçOËŒ`ã=³”£lƦn¼çyEäÍÑSÞóDà“"lã==5£+áä=6ïˆAvþ3Þ3{x¾ý€ïíºë|¥Ò¾xÏDÙOƨ÷LÄã%ókØo—`(ìÍýœú³³¬°7?44é:²„)ìíb p»—9¤YB xI­³´ŠìYmâJǨ°ß®ÞPØ›#u¤6M#e ƒÀ[$8q¾²~\4@a(Ÿ¬·K;¢¿ímbÅÑ =6+ßJóu õ@= é|œ¹#7+ÎQ(±Þ"Ú%`ˆ‡ub.^B¸XqF”ÿ¿¹³ÉµG®ðÜ«È tZ")’xÚ=1`öü3pyÐöÈ«oFHº<_èݬÊÌÒCÐ@¶øt)JÔ'òð0bLïÊ~ƒ½ÿê^®x•AÅ)žUÖÖ!ó ö~¦åªÚó~ƒ½§.3Ñ3ùgqL$¾Úao…£ŽÿÓîC{ÿÙË]zgOãû=ã§Æ#·ÌÝ%ù¿þÛÿ5Æ\VÅ(=þõâƒÚY:Ûñ‹´í‰zR9ò Œ1ÙÕ0êžèQB^<fü½î­ú„K@“Û­O» ÐäóÉóЗð«ëtk=¼‘gÏ£[0¸ñ¢Öú»› ,·'Â>}ž¿µªh‚øû?¯_Vx®_þí?~S›}¡óÀêâ{] ÞFÝÖbÍ&ÉŽ_?jþbÿýòå¾üÏøß0ÿjL>=Ò‰N&=M mÏ‘aïc<ð,ûv«Ð=Xá½…ýYàß[Øk¡ö•Å ž1í¹ghÏ>÷ ½Â{ Ÿ{†þÒÝ[8Ÿ¡³Í’ĦßÊ63ڼؾB›M:m-*µ;Ù~Ü2Â:²-¿Vyú-ÇHú©ZÊ1[º1Qî|^µ”Ÿ«eüã믶¤þ¦¯ÏŸ,¢ý7f̼ý ÛÑc[–ÓZ=mÝóâ»Eoö˜j©4¶ãbþòçü§þ×Ûi—ùö§Ðb´1Ä·d Å·Uæ1ûÀˆ˜WÉàñÿÿùïÏEáX›¤°ìÿ}툲ÛîËV?FðýD¹ŒçöÅbŠli;æwV6~Êæ!–ˆ±ÙŒÉ§ûêòÊn âãFYLåÃneGJOi2:Û×Öæ¯íƒÒ‡ ­ì§¸’IZÙ,‹•–IÊ£š´Ì\•ãȘIí¯¿¶¥S[l§yÓÎ¥½'Óz„ž“öìe¯¹£•¡'Ï@³(¥ó lMÑþÖ–N[×»•Í\•GØ»Y]’È“Vݘœ¾Ê²äªŒÕe =ie~ÛÍe[G–ÚV9êíñ_k»4m’Å#œ·É"Η×e—#ÜänÑcb“6 7iç­Ûr=Ä´I‚J?o>à´Y¼É1úÕ+ 3A¥ý˜VT%Þ¤—f¿‡f‚J‹5(ý25‰7iç-³¦&ñ&í<½éMTZ™¼©K¼I»ÎCùM]rSÚ)ù<,a&[c_ÞgZÊVwI”I;M^dÑ:¯DªV–fÈËê ‰'Rµ2éÔy‘0“VŸô€<Þ“×ûk»ªæÃÊ«D™´Óú|‡²­÷¾:gÝyž„™lÎÍnH¶WU ù’‰¬L"LÚ)ÒÆ„oŠ"^&­¶xWU+“w+g‰0iÏ¥Î>šÍyñꣵ_—hû²ôz ó´ÁiÞÜc˰4Ÿa\Ç×p¾±€ñ˜–j¿’{ªJ y¼˜g»Ù-JãžV}˜ qO|‡Ë £CÌ"Ų¦=_±lçIw–­LQ¯X¶²7Xîë®X¶2½Ųÿ¤´N±lç„P.û¶˜ý  f+Sø+˜ã(˜c«ÌV¦°w0û%ËÝS0ûm `¶‹PÀ*˜ýâ·ˆäxSÉñÒÉ~ž|®É}Åû.ûUHŸQ8Çß8û¥È§MáleEî®ÂÙÊa g¿”ùÄçp) goßHpÙÏHÂ^Ჟ¢Ì.ÇšΡe€³\†rÙ.YþVYl‡…Z`qlNIñ^>é;““ LÎû7ÈlÉrrv'2ÙÖ¢Nî*ŽóÊSÀäëÕ"°JWôYÐxÃ@ˆ4Îàiœ Òx};HvÂG § `xÔ³¿÷”ðÃã<¥Yœ0øSš{­#mûÛ9¸V›Ïíx Äp>GâpO³R¸¼ðFÔ0Ï#†1\p-Ä0Õ¨†+~“®øZÃíè+ä/ö¿þâümwì6Ž¥»M_çÀÞ†’½ %{aˆ ìÝή°»ñ°w3‰à ãlr¸êÈ2pxÃp¿œ>[û¤%Îéë¹á ¶Xk2¼Uû$»ÈY5ŠÑýåkB¥Â&7·1ÀQ£ðØ™³¯C£°t—ò‚C£°,”sØŠ H¨XQÉ@ˆæ<”A8ÄŠQ&_\Švžü¦Š•‚ÄŠZ À@¬°džrW VXËõ<•,F}z„ »–¹ªYØiãåC³°²wšE¸Ë‡fa¸=ê‡\aiEäiA®¨AÊP¹b”éy+jBç€\Q°U®°ó¤@®¨_ihµ @¸ðô¥2Vá‚„ òz| ¡YT8Ô,,}§êÐ,0¦‚fá ˆ7±O7ˆèïA¬Èú:ÅbÅ'…ŠÅªŸµ X`‹U{( {š­|Hgï=r?JŠæ»Mf¬9Ÿ¶zÙ(ì_Ç’¥•>iâø¨öGÖƒöËM0ÛöD=¥^…’a†áÐ'8(¶\fÚ‡óÞê¡O¸4ù¸íáÐã&4ù|ò<ô#&޼˜ˆ³slØð¨‰#—#•%ð®ŸiâÈëbi“6‹ëÓÿ˜&ŽIɧG:QMB§Í"ÒžêÑ$üËø,O€Vø `¿L¬ðYàß[øœ‰CûÊ2ûÜ3´ gˆ Ÿ{†^á½…Ï=Céî-ü GrãþæVí§L¬ã)jyÌÄÁZ~ÀÄñ+.‰l®l/ë6†Úßã’ø¯]y§KbŠMc³¨a©,mRlŠÿ’Ý,9b:K²Î¶gÀ•Ufæ”iK±éýkrסܖ58%dŠQœ«N#J‚S¢Ÿž`–°#YÒs¶VϹo±m_sºªâPl*?W ‹NåK‘ÌœMW;­Lí™E°Gl:y´q‘Ø#xW7Ø#`O(–§ã5©³í£sÒYtçnot•œœ¾À/Whqc^Ó¹–®ûÔàXU}) þ̈K;ü+®¸Ã…ÑÒáXTé-þˆåë2E²ÃqÉÙe‡5bÁÍÙaÀZê¶À‘´«n ü‹6m[ 5ì:‹ÞVÕ`ØVh XåÚVIÅieÒ„-I.Î`3؇®oâ–$§;!¦k‘æDxèç À–gN;*í–!9ôS)9vÒœq4Ý“ P 6tí¾#lª 7c“ œV&難Û}kñ¦÷GÓP‘¹ýs<~JºíS&‰€à4‘ìÜÉß u¶…3y„€pWa'@k‚¾¶EüÚ*—Ö”‚5â|í]JN±!’h|;Ohì&74ž~q°#ÄaQ ö_“önX‰["íÏÊ_7#ÈO)…ýN8 €Ý‡0{*(ìËîò\[0CÈ zPøe\¯…n€7-^¹Ô= ôQîÁ!/ÀëçÉ‹º„×ä†ÇäÆ_[i}Ph­Áú èTä†Õ} 7¸>€Ü`:r½¾ùyr§É@‘{;#X”ƒÊ]?o¾)ànüÍüÒïÁÝxÞ¼òYwƒ}Ãç]ëå¦{ÐAê¦Ã盫~/HÞ\0y3VãIÞ ïÉ›Ûõº4b½¹]£[…î¸8}Õ•¼k¬$¯õ­ô†¼ù´=“¹ö'2Ps3VÞŒ¥:‚7_Ÿ 7gÐÈÍa\¬ìÍXA%{GÛµ¹`¯Sx3¶ëù1{Ç]Ö¯ Ø›0b9Ù›ØVWÒÁéz5ˆß‘Á /;E®æS‰àJ;•v@¢^=Úƒ F‡õN]÷è 8x”Plù9}<âu›†ü&t‡Qv’’CMÕÂã€%aêᎺøîúNw I€ºCMx4ÐÎõy æÚ÷àpx«:Øy¨5c¼º‡ƒ|‹ƒê€~EÕáÒs‚à"p}‘_.Œ:ùƒÎp˜ ƒÀð ÉÁÙ ãL¨ CIª ðj©aUÕ(H ëKžE4U†õíh×;ƒ|©2$Å¡2`çØw{,¹ò±Óúå*°+xVú¤§á£ÚŸX¹Úñ‹´í‘zú±t¿ZšÉ£a8ô †‚¶Ý< 8ô —€&·=zÜÓ€&ŸO~ûiOÃøhšl;êÙ-wü“ž†âŽè<^}ËÚó‰ž P:XgQœêÓÒ0¡1ñôHêIàô 4¤=uöÊgix,‡£Âç–ýÂ{ Ÿ[÷Žqoás–í+Ÿbi° gˆ Ÿ{†^á½…Ï=Céî-ü KC¶\}8r±,gÏXXÇS–Ôò˜¥µüþ–†b3¤žÆ·Íö|ÓÒ`;kÕÒð×øÂü6KƒÐ|%íý˜Qޤ°ßXt ÐþбiûEA° ¢Y4dîîQUç‚FRŃÇ^©»ÝŸbXŒÛ©ì.˜Õ#Ÿ‡#Wª³9 <Å…2,ñ«1žBÏÛT]د‰Ž…S~¥mýXí± ]¯Ä¡­7sתÒZÔ* †mô™ýxÊ™×¼nœ'So‹\}Ñ´ÓdÊgá°§ÒàÓðy—ZŸ¹C›%+–ÙUiè˜Í×®JCÇ”ñ†Ïš­_Qê®JCÇæðºo3m¨ÿÜœ–ÖÝ•†¥£ÙDþê—ÛCÖúÛ¢JC‡pÐl"uÎæaÛª:Ãh³ˆ-mUa”Émm6¿ú¤AùhfK*1ŒÚEbh6“¿údÇ6ì–ü«ƒ-Ëny+“‰yˈþ€¸–Uph’mÈ?úØŠê ­é井ZWµªm*74ì£2å†BÛTnhõi}ÓÄ«C¶ª½Ä´)4˜Åhvd÷_LËMÅ6UÆjGjð4œö&ûÇÔL!•;Þýr¿O^ø.C€.|d$oÃÉ«æ‡HÞ‚ù$oÁ†K’·Ü©[ *u=ðOÄ­e Æß n{†g¸µu;ùÀ\[·“W èµE5y¡^_p“ßTôºŠ/ªaUMY(èµ¢w «j‡¦ ÜÚAÅtËiò&·¾ï;}Œ[;OQ¬Ì Ë$`®/¬HOÚ¯55ʼn27¬h¹¾ü$øRæúZœ¼¶+V×`a|­Lžàkû×åâAàž°¡¶"%bÂâb¨€À^&SÛvyù*ƒÀV&ë– °Ÿ'·EQleÂæ,huuÍÊä„­H‘ºau§)Šý<¹YŠb#FEq,S»9J~SQÜÓÃÁKåS°T`?hn ‹Ä.ˆ4Þ P[ÆyoA¼qxLo˜Ä ƒ?¹Ý…,®À‘Œï‘\Ð%ˆä‚…Z"¹$ž§Höã‚]ErIĵ"¹`#‘\0Ð’ –¶Éåë.8É債&r¹,ør’ËaL .gœ.[ŽÎmÙv¢ &@dóo `Ìåh˜«×„q.ïaœñ$ãœÞÃ8œ§0¦ï„0¾n1LK 1œ±ÈI Óz@ çkÏ8 œa2'‡3v“ëÓàp,RģÈá̹8œ±€¾œ@$0½C$pè-Ødä(Θ+?h{ ,×}Œ@.g›âWÌI¸uXAbŸû ””Ä6)L·Q±Ý,=CµˆvùÒ(C´ò:¬ w>Ph< C4Ž¡FŒ2å=Ô:ê©FÔ+<…ÛC ß0"Úus AXýéqÔ øð HÝÁ Ù¡;ÔËóEÝa–i-u‡º ;ÔËŒKÉ–~JµcˆÉŽzj£ T…ǡᵇöàI6¨;Ôk"«Š·DPq× ÅÁR›·bCmä/Ü Cl¨õë°E€ ¹¡VŽfán8âü@g¨¤CáÕ‡)4xR2ê •eè V¦h„›»¨/TlÉ¢ÊÊzŠ7å»Ý –-ê ñ¾,ךÿóX¾³ò'ý ÖÿÄâÈÕ_´uOT´/9:pèì»mN££‡>ã´ÉÇ}‡w4 Éç%ðÐ86ƒØhƒåÒ~8Jƒe@·dæû6†,ŸéhØlÎ<>ÿãË‘ÿ Q&7&¡É×°¼^¡‡¹¡-Ú>ÃÔ` â¨ð¹q¯ðÞÂçĽkÜ[øœ©AzËnÉæŸw5xÇSD¦Lñïm|0gŠ¿x÷6þ„±a²¤Š}Ûzߟr6„Jž²6°šÇ¼ ¡šßßܰuŸ¯c¦3~ôÛæ†¦æ†ÿûësñl݈æ†QÝÄ÷·ãã>$ð¹ccvò_ëMäó&/näsAhG@ºíË[HÛ¼ªÄ¢Äî¤Í2U,:Ü\P,Æ–Ae‹†ErÊ6¥•ó [4Š.-b×P¼hTBñ¢ÁðEñÂ2Aêy*^høÏ ^4ÄV â€Bñ¢î÷.tHCÈÌLHÙÂVü…A-,XŸ0²EåÛxÈfOƒ/hk xÏSñÂÊÒñ¢ž‘š(^Ô×€ºE…=˜ºÅ+Š $‹Š4®”,*r®R²¨í½dQß›Ýa!CF5 © ]zû _„너aç mUĨ؎DƒQ)©dÔ+x-EŒ Ï3EŒúŽC¿¨xé_„›ý¢b¼~è&éïvP¬Ëö²0ØOœ ïlåpSlÏz(>¾‚gVb¶—‹b6ð‘š,x³U\…yìL ¹‡_ m{]„û”‹ÐVŸ7?{ÜKÁV_Ác?â¦hãc^-úÑ£fŠfoÖ½®ã‘íŸi¦hëmÛÒöÔóÓL¡™°z¤]|z hÓæYÊ• ãÁ*µxVùäbüvy*B•>h僶 í5u 6êòè³ôiýv¯òÁgéU~ÐÊŸ¥¿ƒ´ò'¬µúÆŠ2>‰e{ÊZ*yÊZÁj³V„j~kEKKsË1ëý׬Eÿû˜±b°£ydʺÙÎ]¶àXÆô¦Ž~¿eK¨ÛÏ=ã0ó:òÔ/µq×ÜqŠ¥!ðɉ¥æv´QX<,eÞ—zl-žQ´ä#òÒEFN˜n²ñ{cº³u—À°æ5 UHN‰gîT’cÅ£QX¥%›ö§‹ö+’K®6ªšò ‡Ç(T%yÅF‹Q8~v\GYí"Gá!%¯£`jÉ s—®«ŠÉ ¼í£°yœÊ:Z3 ‹N³–¯ç>|-¢òç<èȈô^æ@ïFï—»0€»Öo€{ƒå,€{«$&Ó;?L_ª>™±20{¼ŒY¯ðÎpˆxÓÏàmy'ÎK·_ã};¯ìØŒ6ˆ‘âÄ67ŠÒÄŽ¯‰½ðzHl~“IìÑŸ1e ±—oŒ«óBF‘Û é‘/¯0 z'¤[ ô¶wX¿:%8ç·À½ÜymµèU“×s¢Sƒ?ã-ªÓÎ6Õ=D5yfÓº˜:«³éÄÌN;'<=ú4ôL0›&D2›¦‘ÀlƒåòfŒÏ ÌŽ…Á²Â5x6.@ƒÛýCh+œ.hÿHXøï·ihoEÐh˜1HîÝ‘ä¶ú¹¹} J‡;ì9 ¹CÔ<àÛÊ–òfÈÝZ …0l¬¼žŽ „+#¾Ýïð Â$ŸÁq÷5('âMB__xÈjBˆ‡ø„xÈkBˆÏpòÛ7þëu&7*?6Êo_<ÿ@ ©W.a¤nüˆ@©päa¤nïÑí…úl 0®BPHêÆ‰?’X™„'(“Ôãeè%u£‚½¤nüêC/±l+×w@¥’ºÝ nùDÐG Ô ¼@(‰—¡$ž©B s³¡¤rPE¡„™[‚bR ù Ý$B7©^t“ çfOj¹Pî ¸#å_þîoúo³ endstream endobj 3 0 obj 35462 endobj 4 0 obj [2 0 R] endobj 5 0 obj << /Resources 6 0 R /Type /Page /MediaBox [0 0 1080 792] /CropBox [0 0 1080 792] /BleedBox [0 0 1080 792] /TrimBox [0 0 1080 792] /Parent 7 0 R /Contents 4 0 R >> endobj 8 0 obj << /Type /Font /Subtype /Type1 /BaseFont /Helvetica /Encoding /WinAnsiEncoding >> endobj 7 0 obj << /Type /Pages /Count 1 /Kids [5 0 R ] >> endobj 9 0 obj << /Type /Catalog /Pages 7 0 R /Lang (x-unknown) >> endobj 6 0 obj << /Font << /F1 8 0 R >> /ProcSet [/PDF /ImageB /ImageC /Text] >> endobj xref 0 10 0000000000 65535 f 0000000015 00000 n 0000000145 00000 n 0000035681 00000 n 0000035702 00000 n 0000035725 00000 n 0000036155 00000 n 0000036024 00000 n 0000035919 00000 n 0000036082 00000 n trailer << /Root 9 0 R /Info 1 0 R /ID [ ] /Size 10 >> startxref 36236 %%EOF blis-0.6.1/docs/graphs/large/l3_perf_skx_nt1.png000066400000000000000000002571161360743507500215120ustar00rootroot00000000000000‰PNG  IHDR¬öEYa )iCCPiccxÚ•‘gP”‡†Ï÷}Û m—¥ÃÒ›T) HYz•^E–ÞY–"bCÄDiŠ AF¥H¬ˆb!((`A³HPb0Ѝ Ü¹3qîüÈóë™wÞ9çÌŠ*’*àû¹Ø³CBÃØð ‘¼Ìt®'ü#Æx ÿJtL&V Ÿ—Î ¹ •#H Ç€•”.@Γ€ÜfÜ_>̨¿|˜ü?@¢Å}ãQßø÷¨pù‚„ؘ\¶Z¬ '’ÃÎôs±g»98°}øi± É1ßü¯Êÿ€ &Wà–¾…Ÿ/`ÿßPcC##øûï|„5ø¿ÿ€oziœEìÀßYT5@÷é'gjÇD ºîñ²øÙe8‡ÏÁá+ñÍøNü ü(~ÿ@ °šs‚+!”HØJ(%%t®† S„E"‘(CÔ%Z½‰‘D±ˆXMK)Hq¥b¤öIµKH-IËIÛIÇHKwHJ–aË8É$É”é–y&‹“Õ‘õ•Í‘=&{Cv^Ž)g%Ç“+–;+÷D•ב÷“ß*B~P~QAQÁE!]¡ZáºÂ¼"KÑN1Q±Bñ²âœCÉF)A©BéŠÒ+¶$›ËNfW±ûÙ ÊòÊ®ÊYÊ ÊCÊË*š****ÏT)ªÕXÕ Õ>Õ5%5/µ|µ6µ'êduŽz¼úõõ% M`½Ý³šÒšnšyšmšZt-[­ ­F­‡ÚmŽv’öQíû:¨Ž©N¼N­Î=]T×L7A÷¨îðü‹5©k׌ëÑô¸zÙzmz“ú,}Oýýný7ja  ¾š&6>5’0r7*0ê5úÓXǘg\küp-}­óÚk{Ö¾5Ñ5‰19fòÈ”aêeº×´Ïô‹™¹߬ÝlÎ\Í<¼Î|œÃäøpJ9·,ðö;,.Z|²4³XžµüÃJÏ*ɪÕjv溘uM리U¬#­¬…6l››ã6B[eÛHÛFÛvªvÑvÍv3\mn"÷4÷½¡=ß¾Ó~ÉÁÒa›ÃUGÌÑűØqÈIÂ)ЩÆé¹³Šsœs›ó‚‹©ËV—«®xW׃®ãn n<··ws÷mîý4ž:ž|Ï^/ÔËÝë×Äzõõ©ë»½ÁÛÍû÷3MŸ ŸŸ} ¾>¾µ¾/ýŒüòýüþ›ý[ý?Ø”< Ô Ì ì  j Z v .†„l ¹*šÚF k[Üà´áð†épÓð¢ð±šs7ÞÞ$»)yӥ͢›#7Ÿ‹ÀGG´F¬DzG6F.F¹EÕE-ðxGx¯£í¢+¢çb¬cÊcfb­cËcgã¬ãÅÍÅÛÆWÆÏ'8$Ô$¼MtM¬O\JòN:™´šœÜ‘BJ‰H¹*‘š”ÚŸ¦˜–›6œ®›^”.̰Ì8œ±À÷à7g"™3{LAº`0K+kOÖd¶MvmöÇœ œs¹â¹©¹ƒ[t¶ìÛ2“çœ÷ÃVÜVÞÖ¾|åü]ù“Û¸Û¶#Û£¶÷íPÝQ¸cz§ËÎS»(»’výR`XP^ð~wðîÞB…Â…S{\ö´‰ñ‹Æ÷Zí­ÿ÷]ÂwCûÖî«Þ÷µ8ºøN‰aIeÉJ)¯ôÎ÷FßW}¿º?vÿP™YÙ±„©ÆÚuÈëPW»¢¸âýá͇oWšTÖ¡É:"¬ò¬ê©V«>P½R_3Zk_ÛQ'_·¯néhôÑ‘cvÇÚëêKê?O8þ¨Á¥¡«Q£±òáDö‰—MAM?p~hi–m.iþr2õ¤ð”ß©þó––VùÖ²6´-«mîtøéû?:þØÓ®×ÞÐÁê(9g²Î¼ú)â§±³gûÎqεŸW?_×Éè,îBº¶t-tÇw {B{†/¸_èëµêíüYÿç“•/Ö^’¼Tv™r¹ðòꕼ+‹WÓ¯Î_‹»6Õ·¹ïéõëû}û‡nxܸuÓùæõîÀ•[Ö·.Þ¶¼}áçN÷]³»]ƒ¦ƒ¿˜þÒ9d6ÔuÏü^Ï}‹û½Ãë†/ØŽ\{àøàæC·‡wG׎=>Š~4û8ùñÛ'ÙO–ŸîœÀO?{Vù\þyã¯Ú¿vÍ„—&'_ø¿x:Å›zý[æo+Ó…/é/+g”fZfg/Î9ÏݵáÕôëô×ËóE¿‹ÿ^÷FëÍù?ìþ\Y˜~Ë»úgé;™w'ß›¼ï[ôY|þ!åÃòRñG™§>q> |þ<³œ³B\©ú¢ý¥÷«Ç׉ՔÕÕÿB,¾;E^ cHRMz&€„ú€èu0ê`:˜pœºQ<bKGDÿÿÿ ½§“ oFFss0ˆÚÿR pHYsNNÆÊ/¥tIMEã$:.$¶­ vpAg’Zó!%Õ€IDATxÚìý˜$w}ß‹¾%/IÖˆZHÌJÆ5€5ÁñƒU㈠$7ÕØ¾D2™ëçÆ²µÏ=I·=ñ*çç¸êÉ:±}ìõÓåœÄ‰&,é²c¯]Ÿ¸Ëìü+¡ËQpìX³LA-S¤ÄÌ - ðÜ?¾ý©úVuuwõïêêÏëyæÙí®îêªîO}ëóûsÝñññ1†a†a†a¦`\?ï`†a†a†a˜,Ø`e†a†a†a ¬ Ã0 Ã0 Ã0L!aƒ•a†aœ0 áyÞ¼ƒa†a&¬ Ã0 ³àø¾J¥2ïÃ`–ÇqÁ¼ƒa¦Ëúlaƒ•a†a†×uY‰g––õÙò-?ÿó?ÿóó>F¤s=ðÀøýßÿ}ø¾×¿þõ¸á†¢íŽãD‡¢(p]š¦EÛ=σã8ð<Š¢àU¯zU´Í²,hší? C¼þõ¯öéû>¾ïû¾oä×3LúÉx¿mŽã C¨ªíËu]üÙŸý4MJ^Y¶™y1Šüç‘}]×aÛ6žyæxžº¥‡aˆ_ø…_èº0̸ô“×ßÿýßÇW¿úUÜpà PU5!£_ýêW£çxÍfqe½ßºÏ²=a-abmm-ª?r]7‘ÚeYlÛŽ¶mooÃuÝh»mÛØÞÞŽöµ±±Çqº¶‡aˆ0 ±½½J¥ß÷£í–eüz†D?$ÿavÉÛ¨òʲÍ̃Qå?ì×j5¸®‹Ï~ö³‰Ï«T*Ã0áØd˜q´^§‘e”¢Q¼f3‹À¸²>èý,ÛCrÌÌV«u,ÿW®\9ÖuýøàààøàààÀñÁÁA´]UÕc]×£íûûûÑöF£q¬(JôÀq£Ñˆkšv\­V£Ç¦iFûåõ 3ˆ~2ÞoÛññq×5°¿¿ àøÊ•+ÇÇÇÃÉ+Ë63F•ÿ<²ošfâ3®\¹Ò%× 3)­×º®·Z­h»,£òs¼f3Eg\Yô~–íáàk t¯Z­¥ô¶Z-¨ª ×u¡ëz"%Ì0ŒèÿžçEižç%“—Fþ PŠ¢ä:¦¼¯g˜~ô“ñ~Û轚¦EY®ë¢Z­&dryeÙffͨòŸGöu]O|yèåûÃLŠAëui•÷ðšÍ“qe=ÏûY¶óÃkPU­V °½½ë®»µZ €HIè¥ضøËºA0̼è'ãý¶†a$”v–of‘Gþ‡•}MÓ`šf×5Ä0“ ÏzÍ0e`\Yçke²°ÁZ¨‘R£ÑÀ•+W°¿¿×uá8Eék´jšymè¯ÙlÂ4M®]b C?ï·0 ¾ïöm„aÈÑ#f¡Gþ‡•ýF£Ó4€ëŸ˜‰“g½f˜20®¬óµ2YØ`-ÔH‰ SUU£´RV¨h; İëºÞõ~3LQè'ãý¶ªªÂ0 ضjµ:ïÓa˜¡GþG‘}EQP¯×aÛ6]`&JžõzPfÃ,ãÊzž÷3ùaƒµPMÒÚÚ*• ÖÖÖ"%EUU4 T*T*lll$RÂH1±, Q‡àf³9ïÓb˜ˆ~2Þo› 9g8ºÊ,ãÊÿ(²o Ã`ç%3QÉ«¢(°,‹£HÌÂ3®¬çÕm˜|\w|||<ïƒ`EU%Jç¥æIªª"¨ª¥‘ÉF©Üd‰Ò„¦hdÉxžm€˜ÇjÛ6æ} 3£Ê?Ë>S4zÉ«¬³ôkNÃ0‹Â¸²>H·aòÁkÁ ‚kkkØß߇¦i‚•J¦irj$³P:ÍÆÆªÕjTŸÇ0e‡eŸa†a8%¸ðÈ)Á×]w666¢42†Y|ßÇÉ“'¡ª*Ë=³T°ì3 Ã0 GX†a†a†a˜‚ÂV†a†a†a¦œ˜÷LŠ?üÃ?ÄoüÆoà¯xżeê<ù䓸ýöÛç}SçóŸÿÅÜð^>¦½†ßqÇøÅ_üÅyŸf.xý.Ó^¿åW~ëëëó>Í\ðú]>ж~—Æ`}ÙË^†·¾õ­8{öì¼eêìììàÂ… ó>Œ©³»»‹Ó§OcsssâûÞÛÛÃåË—ç}ŠCqã7.ÅïN¿ _Ëãï{‘à5¼|ðÃëwù˜öú½(Æ*Àëw)Úú]ƒu™ç°–™Ó§OãÔ©Só> fÆ,Óo¾,×2“dY~w^×eú½—å:f’,Ëï^´õ› ÖdkkkÞ‡0¦áÕaŠÏêê*VWWç}3aY®e&ɲü/¼~3egY~÷¢­ßÜt‰a†a†a†)$l°2 Ã0 Ã0 Ã0…„ V†a†a†a¦°ÁÊ0 Ã0 Ã0 Ã6X†a†a†a˜BÂ+Ã0 Ã0 Ã0 SHØ`e†a†a†a ¬ Ã0 Ã0 Ã0L!aƒ•a†a†a†)$l°2 Ã0 Ã0 Ã0…„ V†a†a†a¦°ÁÊ0 Ã0 Ã0 Ã6X†a†a†a˜BÂ+Ã0 Ã0 Ã0 SHØ`e†a†a†a ɉyÃŒ‚ïah (âÿ¾/¶ÑsA þè1³\x ëâÿ®ËGšjPÕüû ‚øõž'þ¥Ïq±HÊ]ˆ×äýÚO½ÞûsHþåç†a†aÊÆREX+•yÁô¡ÔzžPdû†€m–ÿÉßQ¥+÷–\wxŽñAT*â=ñþè°¬äç8°¶&žtü€xŸçÛÛb?ÛÛÂ(‘Ï?Äs®¿Ç²Äk>øÁ[ðüó/ž÷ÏÆHô2(ƒ þÝèu•JYÙØ2Eq§ëÝdTÖj±¬¤¡kfmMüKÐ5×ë|h»|íÈFmº|?6Få}¥ÉóÄ1olˆ?ËŠ÷ýµ¯Ý8›Ža†afŠ,U„• Ø÷‘ e ¥µ^Š,b¾GmHÁUU¡„;Žø?E_Hé¦ï +*Ój%×ëâÏu…‘P­ŠÏ Ãø{¬TÄãýýì}dA‘#¢Zû³m¡xË‘(Ú½.^?2HEž<ýô Tõ%³ÿ!™²!*G#U5–uz,Ëy«%Þ»±!䆢ì-%Y$òÈ‹i ù"¹>y2i4Òçʲ™%ǃPa“\Óõ£(bßéÏ cËó9žl°2 Ã0 CYˆ²N,;Áófi…¡°²œî¦)ô´½B¶—`xúéÛ¡ªê–Ê`ÄYƒ5â—†¢J¦)~ð´Âªªñ¶^¨ªPÀE¼Ž"CûûçȒKùsó¨yP”Ø@Îbc£û{úÎ`oïI\¾üåÉ8AQs’M²ÓKN«ÕÞ¿7Ñë©×ÅïI‘T9Z: ª 4ñã+W¦óFr šÔõ“þ®VVžÎ 0 Ã0 3ÄÌe’ofù`ùf(M’œ°º.îUTNU­ÆNWBÐk€X‡K”á–"T¢ë±Òhü€·u~EÔÁ—¹¯E³)žÛÞNÊH–Q†q6¥ä’ñªiñ¾&l{!#•˜šÁzxxˆøRlúðð¦i¢Ýnt]‡-u0´}RLºŽumM,4A6Hukl .YòMÏAÆ'ݸéz1ŒÑ#ì²Êm‹Ï2È7SL¨»û4× –ïòU›G¤'PY×¢mt"Ã1Ý3èn|'ï§ZMCžàˆ¦uÀ€ØÏ¥KŸë{)²^6ä€U¥"~S2:ÉÙA™–ºÞ]æ—§,HÎ&K—- ï|xxÏó`fäé?«««ð}?ü0Úí6vwwsoù¾D5GÅuEÓ¢Õ?U–ÕÅ Ÿ|ó•ñIC©¼ûûbñeC³ü,“|/éNÕý°máñOwߨÈËÎÚµµ¸#¼eÅi޽Cqÿ”»ÈËŸS«% ùµkkâ¶·w¾óö¡¿–ïŦVKN ÇŽ#d$Ýiž¦'<š¦¸ŸµZñõBèe°Ê¤;ÌËÆfQîEÕÁê+C.è/=‘€dŠ Yi6E0«ÑˆSÀ™á˜¸Áº··‡K—.u=xxˆ½½=œ9s°²²‚­­-´:®…AÛÇEö’ézïQi(]—„sm-Î%'%½1 C„yfÆ,(A$¼‰aÂqT*X–×uÇÁ¹sçð¡}hèÏè%ßÀüe|\hl )¶Í&/¬“Ä÷ý±¯?/µp¹®‹J¥‚µµ58Ò]3|á _zÿe–ï²!×8¥[–¸omo‹kšº¼ËŽZùõ¾T¢šºt³ýýd#²z](`b[³§Téñe¾w— YÁK?n4â׈c0M`uu¸ú'€å{ð¼¤Q°±o3Œd4Šd£Ù2CÎ ¹1L½¾8:Ú$(ª¾hP .9Ó(µ›dŠÖ.rè÷"=>‰§ommakk {{{ØÙÙ‰žê©§ëëëÑsëëë8<<̵}’F|CO§q¤qœb‡ßƒ €šZÇã80 #ò²¹® «så5 è“ö}¶m£Z­FÏB1ö}ÕjŠt¥AEQÏ…aß÷ïïw¼Žã@Ó4èºEQà8N”vBÏÓ±yžEQP­VQ­Váû>\×…ïûPUªªBQ¸®‹0 ¡ªj´-†z½³ôÙ?öc?†£££¡¿ï^ò KÆó@™ä]¦býE èhÅiÙ÷<¯K=Ï‹dAQ„aÛ¶#Y¨V«0 a&^Kû&™UUUÉSEφ­ãj÷<Žã CèºÃ0ày^$Ÿô¼®ë‘,"õJ¾V]×…¢(Ð4-’iÏó ªjtÓ¾êõzt­­­EßË /¼0ô÷Z&ù.+4ÞËó„¢.h4ânë4∢@mâ÷Óë)]-=Ri˜¨E¦ú5è º­•VîeãÀ0’Ωi5#L3)…ïÒ¥Køà?8Ñc[”õ{£÷Ôdˆ£4+~ãñƽA€ ŽÏ àºnt/P%zŽÖgÒ/Èá(ŽEërN’¤(J¤ÃƒŸÿå¿üÜtÓM8<<œXã#^¿»¡IòTZwi<3yH–™¬ý¢YW¯^¸}ÐÅrã7â¶ÛnÃéÓ§3·“·D&«Õ2)ðÓVÞƒ Àöö64Mƒëº¨×ëp]žç¡ÕjE ³iš‘‚^­VQ©T"CÖuÝ(]öm¬­­A×u´Z-(н¤°ûûû°m;R¤À0Œh¡%e†°ã8‚Íf3¡t;Ž]×#¥› KŠØÒód“"ßMÓÇ :δÂNÆë°œ:u wÞy'>õ©OMô7¶ŒßtÓM8}ú4N:5ô±ù~ÜY. …²:Ïš›0 Qz@Ȱl˜ú¾íííHîu]2 HÆÉÐÓ4-zŽäÐuÝ(z ƒÎ÷ýȰ„¼eÉcÚó,ËÊ4 ÓÙ¦ifÖ†Ay^›‡Ó§Oã‘G™èo6ï5¼,d Q§L 6êh{z¬@V×ùaYôÉõõu\½zu¢ÑŸ"¯ßE†Fr”ºêÒm\U³G˜›Í“^ËDºŠì ¦r ‘aAäl'=‚œŒô~º¯P6ìh§û—a¨V«ð<•J%2FéÞ@ŽTMÓ"ý‰2y(X ë;tN_øÂðÀàæ›ožØïÄëw÷4Òõ5m2ë*“ÒÁ‡]¿gf°ÊiiVWWnÄ7ÞˆW½êU==—ä1Ä á¼£@QMJ» Ã0Š¢R”r{{†a`¿ã6'£•ŒPzÞ0 Ôjµ(bJJ?E"Óa/1¯"­(Jî×Êpz£–ÓbuuwÞyçH)Áý˜¶ŒßtÓM#{æI™æo¨Ò䟮‘„ä¼ÙlBÓ´Èq"{²lC¯—èr†ô#úNûnöè?È3K677qË-·LtŸó^ÃJß•£›µZÒYêûÝcÌä¥5m°2BŽŽ&º†yý.rF²ºêNÊ’‘Mº_q©iZtÐužçE†%éUäà¤LÓ4Ñl6‘Ò^ä µ½—~“u"C7ËIŸ­ª*n¹å–Fâ0,ÛúMNAúú×ÖĺJQÓa§y0“cT|f+yåùÿƒ¶‹¢Ì¯è™"©™‘_Qzbòx4›Í®š‹")ÌËȼe¼•ŠP f­ôú¾J¥’ˆ4Ò5P­VQ«Õ)¿ôªi&c•¨Î*o‘ɤ¨ò]t¥>²1Z X¾ûãyqÚ¤çÅÒI¥NZ–E#©/•/¥PrvˆÒm©¬@äMgÈôrpöê=Œƒ³è”Y¾å”^y|Œì`uñ™x—à^¬®®bss3ѽŒÒ(òl—aÚúC‰N ”z[¯×#/¥ëæa¨Èdy/ó–ñyBu?¥ó’»R©D^qªÃ¦ têlµZE«Õ*•²P–Y¾‡…"P4;E¹ø°|gCÝã©Ã´aå?¯ÿ0ÝÙœz¬­­Á²¬È±†!Ðh4àû>,ËŠêò)Ív‰è¤¦i‰ÞDžÈè2QùÃØy½Þ¶c#UÃÎÀr1³+œ={;;;ØÛÛ‹Š¸ï»ï¾ÜÛÇÁózÏÖJ-Ù—:_Pº# <|º®w½Æ¶pÃ0è¢ Ø|£Uë¼§*íûq¸÷Pô¹%½‡/ö¡™§Œ÷bÚ]ée¹'o¸eYQ„T×õ(å*í¨úz`æJå{ÖþMé¤<É3 ƒ@DØP],X¾ãŽÓtßP”îîÑÙï³µ£t_PµZ-zÞó¼¨ 5ޤZPñyÊÀsÌh”A¾iÍ¥úSª‘f–€ã<òȱÌ#úèñÝwß}|×]wEO¶Z­ã»îºëøî»ï޶½ûÝïž÷ùuòú[÷‘2iå[6LUU=>888>888VUõøJ–Örp||\?>>V éyí¸ÛøÔ¤ç‹tq6;Ç?ŒÒšñ9ä?¾EPŽÓŒzÓõ‘Þ–ÉÁÁÁ±iš‘ÂR¯× Ãs¯Ì4(ª‚Ü‹¢]“ûûâÚ1 a”Râ™ÙR4yDQ®Çƒƒããz]ȵª ÆŽ¬V«ÇŠ¢W«ÕッƒÈX%êõú±ªªÇÍfsÞ§ºÐE^ò2íëÑ4‡“S¦ØŒ"/'LÓÄúú:lÛŽ¢®/^Äúú:.\¸€••\¼x»»»ØÚÚšw@xt>/2eá£óŸ|PG_¹Ù‘ëºQ“¤F£Z­†ðÉ€žü`ÜUÔêì€Òq ˆt[9­f?õa”#§è£sìÛÿÓwÙìk­s®ÍÎ÷ët¶Ów¡wþÔÔw ãv¶ÑïC)Ë@œªìt¾ŸªôyÒçiÒ¿Méó½Î÷«‰ÇÚ ŸýñÏÎû[:ršâ°Ð\^¹S.µú—»4Ê)ïs#„@Å»~˜…Â÷íí8Õlý˜lÂ0ŒÆÈ5‰üÇŒ¯ýëó>¼BC#”¨òb{[¤Têz<µžçAÓ´è@ëþ•+Wà8N4nLNáí5΋aúA¥²”Šî8q‡tjôUUƒ™'ñÐCEí³ŽŽÐn·qîܹ蹭­-ìîîboo¯0-«‡æsè6sàºnTJuzÑâ캢ÃQ¼û{ÞiþHüÆ*DmiuøÏ,¬²­A|‡¤¼éÒ±¦KNä† dLxˆ 2j]&âzYªÅU:¯i¤¾?­óú\ ±+×ÜʆšÑùL¢ø×Ç“—Ÿœ÷7:uÂp´::êÈ(Ïú¥YvT‡êy^æè—™CÎrªT ä´¨×S8Â0î"ï8¢‰G³_;\‹š$mÈÙs‹Ñd'Ý@fsËM=σe O/ýp'º“ã˜f—×w}ÞøÆ7Îû«(ÛÛB¹'Å^ž‡ WëGÝÚDŽË ¢ûA¯ÙÑ “lj砦{mhšp¶t–pY3sbss31ë‰:ºÉ7’I΂š €o`¤&BŽãDŸ÷÷÷Ŭ/E†’  ‰ì13ý"‰‹Ì(ç¥ iÀ„ªI ÄÝŽ³ ùõ@ÒM¿.ëÿL7äP0}ç–e¡ÑhÀó<Ôj5ÔëuX–)+À¬‡ÅI|ô‰nù«¢ÛaB¯íu¾>ĺ ³”P4Už…šž‹ºˆPÇú´±hÛvÔ  ‘"F#чf~×ëuèºß÷Q«Õ ªj4·Ò0ŒÈ¹¥ªj4&$‚h ½š¦EûÇ–Ðñ6 (Š×u£×Pw}ñ[‰1XûûûxüñÇqùòåyÍ…Áó„ãEŽBå5Pƒ H8}߇mÛQöŒmÛð}¿çH=†–ZM8W2ÆÑˆ ÙIŒLbÊÁ‰ôàÖv»õõõÅ7RSÿôî–ÛZÄUE@ñ˜ž)]½%&EÍzž™‡?wˆ¿ïÅx¹ór@w5"…–2 ¶··aÆäÇX2¾i…6 ¢ï*„JÑö"ŠJ‘ó´ ™/Q鼌ÚíÎ{‚Î_ÇKMï=¨E eE&-Ë‚ªª‰ÈV†¨Õj¨V«‘ñGc¥E®ë¨×ëQ×VŠPÊóiÌ”eY‘áéy^d X–˲†!F4Oœž¯V«‘aI†. ÒCi†æöövôÚ œÂr¥•jš¶t㮨#ê ¥×¶ãÔßaÙÞÞF†QÔ´V«EιÇð1L^|ŸS|™á¸¾Ýn£ÝnéÀžçu¥ýîííˆ /*PýU%ØËÿ6ªáGc _#ÿ~f^䩽ûŸÿíâ_¼ì_ ¢Tðg?þgø•ßý•„‚Bãir+-4^IÆÎxŽÒ¹Év^£s¿³/"ZJ¯S;û)g²ÑÑBÒ(¥ÇMˆõ`ˆ5)4.Û0Š7¯Ï²,T*•ȘÄýÈó¼h¤š8‡ØµmÛÛÛÑ¿Íf3Šmll`ccÕj5šgIÏÓŒKMÓÐjµ¢ôÏýýýÈ@lµZÑûÈФñ#W®\A½^Œ]×ÑjµÐjµ¢×꺴ò>N-„m‹ú>ÇéýÇí(rlYVôû„aˆµµµÈYY8BdÏ›—*1…G.Á`˜¼œØÜÜÄÎζ¶¶¢ÙK´PáòåË8þ<677±ºº:ïãžNÅW:µ}.ß÷Ad¤º®+nì 8 Ã,$Aпþ. C<ñİ]<ücëÃ4ÍDm¥æÂƒˆ†*ˆëH©~ÙìlÓ¤×ÊVЍFŒîT_bØ K¯ë¾Ñ9¦iÔ¼:ƶü]ŒŠÕy?#]Î ñw<òŽ ŸÐâ†Â YªÆüëS}߇ªªÑ5G†éÁÁ*• Â0„¢(p'Jɧç]× >Ó4£ÔM9¶^¯GQS9:Y¯×3¯k£GŽÞ¸‘Í¡Ö&Áþ~Ü@Ï÷ãÚTú:m;ß¼í `YLӌҳIϡߧZ­N>“&/é,8*ß çÄ2ô˜š5ŽJb½¬#Xð;(éµÌ\ðýÁ “æD½^Çîî.Z­VVVpîÜ9¬¯¯|ðA\¼x›››‹{£q¨€åvô>‰ëºQãò0*Š/~ ³` ºlÇÁï|# º¦c¿¹ŸL§†±ñç!n~$wl®wžâÎÏÄÁ¾ Bq!%&/½ŽRˆI©¢†jâ2JY&£}BÚ@ÜA›ºVS=n­³?J–?›l¹v<€0„ é˜ÜÎ瓦ïyã{ð6¼m_jq¡fø­V1<üA`{{"#‚jEA«ÕB­V‹šÝÚjµ°±±Ó4ÑÉ^ÝY{¡Lññý¸[* }ÓŒå×÷E{€RÊ©vØ÷ýDê¯Øÿ”, Z«(p@k©ìu¶Š8+N“¶t^O†ê>’Î@b-¤H¬ØáG¥\vçýTfBÇÀFìÌ(Âú» 9é¤0L`úÉ 2R³ØÚÚÂÖÖÖbFV‰ŽÝ®üØõzªªÂ¶mqó—=| S2\×…Ñ4bcˆnÞa¬­ Í>od…¢¨*âÚ$#®HÈJÊv瘯t“bDhˆGZ|Éh%HÙ2¤ýË_aºkuÂI@µ¶@¼¸·¤}@ú<¹[¶ñ½³Ðñ1¯†JY]t©¡™®ë°, µZ-•Ç©É(ŠVcÊMzù•WÚžg‰ö<aFÁ˲ºšoM2HÉ‘ç"^×dÃÔ@ïLÙÁ‚tóÅ4 âõ¦ÈP9ˆÜÔñeÙëbÙ'öŒ¢p„@|=!;¦}éy¹œQ‡CÚ&÷‚ÙFl¯PAžï™²àÈ¡Dzí7ËØL—~eõ¤q;ž*ˆ¯Ÿ¸å±[ðâç_<Ô×u¢ßÆ…6T‰ÎZ9¡-t/^)TU@„È^0-‹C1…Ƕ…r“¥ÐP*b—G½ ÑÂO×EqTÞ~òé Î"z«Ó¶A5Çy¤•ª,ýoP×ê<Ÿ“~=3–•ŒLMÏó†Õš†aEEmÛ†¢(Qô³Ñh$1 0zÊ:¤$_‘¾ÃD³æe—”e2üä2¬´¡G³Ùç…üÙéõ´"ý_n²I zˆ¸a ìœ%£—êi)uY—öHéµ²à7~íÆ9~9Ó%¯“e!!'wúwOˇœÆ.¤tY’|õrJ‡ˆ2H#¹1¥ÿ;¬"}Ž/í—>‡ I2„ÉyJûª é@_Cì´— lH¯‘ð¨peŸ­¼d/Q_2ÔW{M•¨=üéÓ§wÖj>ª"`4È£Cí÷dy \W«Š"Üöì*b Šçõn5Û@2êétò&›MeÍÓ!)#Û*YNð¬lƒVÆ~Ó:ã>’²ž'¹'ëø¥czrïI|ùò—ó|ã'vwwqñâÅ艋/âÌ™38{öìP;*,à›Bq¯×Ñ·#hÝ%h’Ÿ¦©òž'"P“®ïõ¼x@'ú3cP¯'ý)ò\Dª_êªY=ú{ÀOþ¤ø¿aÍ MŸ”—~ø¾¸vTUì·´.V¦Hø¾µi«éù•4êežç¡R©DÆE´h|LµZMÔ2Lò¡+&ŽãD‘|J5;…ÜE¬ˆÓÿi /`Cá‰Ðï{OŸsZ ”Kcz½F뽿çwžŸ÷ÙO ËÄ"6¢î EÖë¿—‹8•—Î%}NYFâ"R€[Õõ/^ÄÖÖVä‰ÛÚÚÂÅ‹‘žÏZ\ ] „‡î6êûû±Òíº˜8ª* Ö 1Ñ>Ïl†¡.ˆ²MH^wj(¥¦Óo~ðlgsµÚž‘§®‡¬Mrísq83]ÖÖò‰ï¨loo£R©ÀïÈ2ÍC¥Y©ÔÐ&êKu« Óòñ u¦Tàz½Žãããáš)Q´Ò ×ìåa‚3j˜‘Y¨¤Dº€8µ•–nêGQE! º²s=œ9s&z‚"«4›uᙦNLÝ&a´ÊZ•ªŠÐØÁ0Š+•xÃô‚ ç%d±"%† V­V‹]ø2TÇY”›¾ø~¶¼çU^t]Èt³ÉF+3uò—`‹mÛÐ4 FÛÛÛ¨ÕjQ*AÛfò”2¥¡u~¨è}€Ø1ï n2@§Šù"La©V ]¥”_‚®fî\$›+­¬¬Ìû˜&‹–aëõX~óòåîS¸¿õúdR{…H¹·¬é† ˜Å‡ ú%,iáõ}?ÕÑuà{¨;%˜ºÖ²Æ†ÉIe'/šÆF+3Gô ›4rƒ0 á8êõ:4M‹:û²qÊLÜÊA¢ÔöíĨ£Ø$ºwT!jãê?®Ü`Ê §ô·¡É2Ï×@a¸~Þ0+"=½Ï:^;y2;âÔÏ`M÷çUûhüš&Æ‹8­Loä.†d_Ê×þûOÖh›¦X°ÿâK"íœd‹FdA-‡e·ú¼ž‚n'£uÖ³4xpòd\A1¶Žã`mm-š‰Z«Õ`šfbü Dc˜Yâ8|ßÇÆÆ*• t],‡ò¸."Í‘•rfFlo¿‰@ ±è¯Ùùã±Õ…di Ö= Ð×uç)ì‹'_1ɨ)ƒn0Š"ŒV®gezA³¹2xìïÀÎ_ý«É'øÎ7íݤlÓP÷^¤e<Ï kÇÉvähZ<>‡aF Dj_üÛjM¾WmÛØß߇¦iÑxš¡¢X 3ÛÛƒ«<ÏC£ÑÀÁÁ Ãè/—Ô4†Æ]0̇ÍÈš$igl°²¿±Ðœ€® »»»xðÁÏ]¸paÞÇ;<: *’#ϼ’÷]»–Ôt\¸é;¬‹»†™#‘Ý0„«Þ«WQ&:`ÊÍãÊ À+>ð|üWµ{ÛÞ´þo!ßr:c¿…[ד.Ò<‹|?G Ë43é,÷Iã8NµªV«Å0TÓCÛuˆl šÛ'Ïú+‹F9 qž7=vÓЃç0ì/Ó®ëBÓ´(ÒßS6=Ä= äKÁ0Sbn«‘ÏÔ…ä„Üp‰X__Ÿ÷qM3•LR:ò§~í×ð•W¿·ÉOðð~\(ÜyjU)Ex˜ù¬¾¿`mÓ˜Âá!94’gÞ²ðs/~1v3®uø~î~à;Si¹ýlH’Õ 5ŸB‡)'d¬Ê³ò |,×þôi|î-oéýÞ{V»Ÿä›Ñua‡ÕÉGp|_ì»Zå9ÄÌ@caø¦L!9!?ØÛÛÃåË—£Ç§OŸÆæææ¼qt:ëŠ/êš$‡]KÜüøãøä·{òÉ€VtOD[óbÝTû1Ž»)ÄgqZårC2í#R6UUØ•_ûµ6´»ïÎ~_/Ñ£(O/ç½ag‰RÍ—^3L#2266âÙ­ezà8BD&¡Ù¶ Ó4Q­Váº.ÇA“Æ:Y!J§¬@\wrÔðÙoßòïqxö ÂpPh®¸^>åø àÛ;\ý'À÷Ÿîïô™T3šiuŽÉ`ú€íÅMѪU@í\kiÃ3Ïu®‚çsN~I]4S» BîH.ÙXe–JƒÏãc Ï @xéxà&6^¼x«««°m{1Ó„ÉKœCW>>8Àá›Þ”|Ò…Pè›C横PÐÃ0_„hœ”àz](ö¦ÉѨe…êÙR(êô‰o|#"ßE/?‡œR˜…¦ £5ìóšôëó¢ªBžMSD¡,KÔ×Nb|S:t}4†mÛp]­V Š¢ ‚DÄÊ0Œøº¡t]93¸‰dÊlÝþîOí_¿ü†8BipÂT7¨¤g¡ ™z²"®3ÃÇæ8⾕^/G\| ×uÅÌ_" [‡p˜à¹‘La™z:0•LM°Ó¦$œØÛÛƒišØÜÜÄý÷ߟðØyž‡K—.áÞ{ïÅ… /ÚÚ‰¦¶Òß º<.¿ýöno¥”Z94†!”í<‘Ùz}ô«XQ„Wܶ9ʺ¬P00©ÏÑÕ'pW/‡H/E_ÅàÈÎ0Y£¢ëB®k5Ñ–aRŒÚÉqT«UT*´Z­(ºš 9fdÅGA2êX«ÿ¿ ½a¼ƒÌëè—0¨|ÎY饔Êìy¹Z­rºþ ð¼Þ_q†ð}š«ÅÑTNBax15X .-ן?º®ãÂ… ]›üüùóçç}¬SåÞúÖî'ÇismšÃy£Ç¹ù›¦¨™å‘7ˉ u¦I2W¯^Í~_¿)TK×é³ú2Ê\b ¡ “fÏ, ƒÆ~dA锦iÂ0 looÃó¼Þ]VÉ99'´¬î“rœÑÆ¥õ#ofu§o6…¦†"«gÒÇÃ$ÐõÞÉ]žçáOžÿñ`l¬2L€x\ SJ®?<<Äý÷ßß÷EgΜÁáá!Úíö¼w8,žp|'È«?£gS·àYašùæÄ2åDÛÔ‘wJ þ¥,g ½¶Ÿ"Þoñ ¸ä¹FeüÆ`õzáa‰¡†Ð[⢫`š&4MC½_†J¿zn@o“ª³6Íø¦åûÂ8ÌêRoÛq ï((Ê𶪊k±ÕŠ?Ÿ™9ÏüÁ38ñWOp4‰Y8|Bˆ¡ëDÓæ}fÌ´¹~uu«««}_Dõ«GGGó>Þ‘è Ä€ù°Ç{[QØA2GÝŸÆ¥Ùä:¦‹¡D"žø‘':ï‹ßX¯×{×yƒ»Lúþä Vº^¶·Å_½.þÒPÃ×ÞÆ†xݰ†+54õ[-a¨³£t*ô ª·ÿ{·ü?n™÷!2ÌÐLDM¥yª:âµÙÏW-9×÷L,C^ZZó™•í7©¨unMœC§‹ðÜ&53S'È—äÖï'[ƒ Vš[V¸AXÖRï R™Ñyò¬bfxÂ0ÄÉ“'Q«ÕÔüö/ývã”æÉüÄÀ?¼«÷Zíy“ídMMôö÷{ï—¢±;K¨mT£wTÂð<ÜþÎwâ–/~q¶Ÿ=C‚ S)DsT£Óqyå2^ò¯—{-³˜Œã';Ò›H©l¬–žëŽŽºº§ÙÛÛ¬¬¬Ìûx‡#DïÆ1t fYLB?‚Áµ>]9Ë#R­v7^"vcc2ŸÁŸŽÜV*À£~µÛCPwá~˜†+Â8HmWUq¥aÆ$§Þu]|à•€¦i¨T*Ð=½w­ªï‹õ8m°~÷«ßý5afå³MÚhSUÑ)oošë3©{É0̲ùRkk€çák««øê 7Ìþ|gDµ hÏ¡«ìÂó¼ìq6 ³Œ53[6V9Ùjé¸~}}¦iöL÷=::Âîî.677r´Ÿ¥Ðhè2Xƒi¥Ò*Êàç$oøò¾¼Î|=òÂ3åCCß¹‡*ÜðÕ¯fot‘¯U‡HÿíU†jókÀ†B6–%<ôǵ{… «´3çyø¶•oCµZÅÁÁ^ÿ}¯OŽ£!ÂP¤F·FeA|Ðþ¾õt°qk´'A£!ÎaÞ5ž'¾ÇJE‹mOîx‚ rÌ~þïÀ×n¼q~ç9eTPþÂa(‰£ïû0Bc¼ 3'H- l¬.1ן;w‡‡‡¸÷Þ{qéÒ¥(Úzxx´9<<ÄÙ³gç}¬#AûÈ-à;½ã»êWÓµ£¢(Ù©ºÄ4kNm»¿"U«MN¹q]î9hÖ°ŒŠH!ÿ–ÿï±úÌ3Ùïõ1§N½äȲ&Û,ÉuãýYÖlR©ÑL«%ji]7;ÇŠ ðì'žMdÝô×oÊ~q¥’ÝÝ=DlP¬çs½£±4y2¦át©TÄ}§Z׉®‹k¤R™ÌàyKSàü2Ä:.;Ø!24EœÃ0d$ƒ•^ß«KÌõëëë¸páN:…óçÏãž{î¦i¸çž{pþüyœ:u .\XÈèjn4­Û`&7l¸ßØ™scC™æLa¸4«{"«ÅG4~,¨ª*²jXqgß2©°†Þã÷˜¥â º_¸p!]Ón·±¾¾Žõõu¬®®âèè{{{ØÜÜœ÷ñG вîo¤äWÑ[ÑÌŠ\ õ°¬ÞƒÕ& ¥…Õsô¼¯×ÅñÙv¾×÷BÓâ”4?â8q šŸ4ƒU–ÕŽ"ܽöµÙï¤?¡Zi€³H4Œxd¯ ä4v£R×Ü(²W©ôŸp^¯‹óv]!÷,ß3GÓúûB\×Åïýãß>&=©#Ž\e¥ÿ¦QÐm(Šxo­;/i~½~ š¥<.A0ÙæVÆBGñÉŸçy¢'ÁrØìLIÊ= ì‰Y®—¬®®B×uœ={º®GãnÚí6vvvæ}¬#ÑÓ/Õ°fú™'}‘T«±1—8Ž)D!ÉËž÷ÆnšÃϸô¼ì”-§P<ú`Z¤U@¤Šu÷/¼ðBï÷NRÇVUñÛgÉФ£P$c†Ñ[é–#M£Cž¨©ªŠë'}YuŽÌL ‚Š¢à¶§oË~A?ù‘ÉÊbÄúZ„ÚÕ<¤¯"¤ÖR{&íoÑu@õ}_4\∳,°±Êt81ï˜*àul¶„í¦!Vò;JiW'UÉ«91êu…’ MËc<ì>ëu¡<ô‹(%¾?ŸÂC =3²~f@G½ýŸÇ­½Ü™“ Qó¥ôçM#å=o6 £ÝÞ sòFÂòÊw¿ïÂ÷…¡0ËœmGN±w”xäaYÉàëºÑ('ß÷ÅøšÉëdØŸ5+‹”z[LSd¼P0ƒ•ÊH'zÞûSÙ¨"ÂÚh4€“`EžYHrWÏT š=2L‡ëÇßEñ{ô‚‘Ó}³®žmˆª“FÓú7`š'Ts”·Gf7«!˜—wݶËÂgÏâö|öŸ?i²‡Ùn4€Ço¾9»>˜¼C&k¬RÐ4¡°3§2ÆWè)e¸ßœÚ‰LQ—p]‘Ýjá=o|ãäö[P(°Oø¾EQ i4M« !ëaØqD »&µ&ïÜ™5ä@‰¿¨é8’†¥Õ÷œ"ÞçExZÊÇU²9Ö ðÓ1Ì(ä¾Ýq &E¹ ÖNF`æ}™ì2UE˜6Zu §Ð ƒi&½Éº>O7Q¯çï|9®Â£ªq÷ÈYáw<ºŽOþò/ãÉïøŽÙ}ö´È²ù%…üÚµk½ß; Û²Ó…{îМʼN’I)ôÕª¸Æ{ÍǤ “hÞDï-Â÷=#(€NAªõz]8kŸL‹Fê7Ï{‘Ð4éüƒbÈ 50ó}qpó2Ñ¿+¥(Rý*Ã,0¹|Ú!¸N›é¢Ük§!G¦=hA­­|ßÇ›¿öæ8ª:ÍTEI* E2XU5ÊÕ$žFCü@ý†ÝÓœÍZ-nâ4*cºŽoÜ|óä¿¿yÐãëð<À6¯àE/zQrƒ‘j3KŠ"ßñÌÃ$"¬òçR#´´¬W«"U™š±mlÅ}”‘)$ßKL†¢ÆOæ'¾$¾Óf3þýkCì´`€GF×c¹*Ê5 ˆûÀµÞ¼LTíWR—rÇ`5 C´ð50Ì(äºO£$YxNäi¦ttt4ïãž@üùJwÚ`€ ` øÑ¿ò£X½qXÌQ³³gÒÊ<̓¥N³ûûñvE5¿4†Ò:©áÉ0Fs.NÍÙ¸˜@øæqñÎ;qZ~ÞÁl"«Ä ¹#³$í,êǤÇMQGÙ^wkªõ¦h®+4Ãt8.Jšç ÔÈð[×€ëÉïfØ.•e@×Å:[Ô5P®7ÏÍ3h®x‰ ‰@Ä /¼€?y£iÓ©ge˜E$×mÊË8ÓE®ëÊÊÊâ´ xâþœ™e¤B !nŸø¶Oà/ì¿à $‹^]_'­Ì{žÐ>{)$4Ïo?ÖP‡F_¯—O¡ï!³úIà'î{ŸXY‹?}M³¼M#ñ}çj–9Š4]ׄa 7+Ó4—.šN¸ðÓëe÷üA·ñ“×iS¦:*E‰×¿¢6/Ò4±Æg9¹FÉ:X4vwIïÁKp÷÷Ý=ï£c˜±ð¼œåêœÌdpâÂ… ó>†é ©}<*ðWþ#ª³n¹çyâæ\„:¢~ÔjÝ#r¦ÉiµòGµhd‰ï×Ùx™Ð€þ×ð;ÿãw„4ËÀ„ªŠ(IÑ Õ Ç™îâ[”YÃ~E_K¦@ú”5~T³ÞSøËÔ£´à";îdÚ·k÷¢àÎkàÙäÓŸþô§ñº»_×ù~Ýža N:s '!XÆ™. =Ö¦Ýncee%š;>×ytê0Ã0œ}3Ï‹;„±³*Ñhtâ™FÝm–’2M[hcu,ùúFœ¯'O=}÷oáçþõÏÍöÄè®TôšJU»øÊF«ª÷š´mq|EMëL1¶Œ@6XÍóGfÿü¢¨ I‹¡ ¡ ú™U”ËÓ¿ rÓÅ$ËP&ÄÄe;àš ©SýýÏþ>vOíŠö5 w¾a|¸^åŒM+î:¼$LZÆ5-§š×œ÷™3E䄦i‰4ª££#´Ûí©¦ïîîââÅ‹‰ç677AÑÞÃÃC˜¦‰v» Ðuö(mï5ˆtÈ^ÔjÑ\:eÖÑ ÃŸ_ô¨ⱬøæQd#ß«iÎMÁ™™|÷ñB/Ú¯x/~ñµ¡v91 C E—Šb¦Ö¢Bë†ç‰ëqNÇ;3ß÷E ~×jU¬ÿ4GUfн ‘6‹Ü]¾³‚™ÖgÏM¶=FVpÙÇçÞò¹¸dYd’êô Èkö‹Ðx‹Æ`ù~ü"«üb{;î“ ë…Π+ÜúÍ3W™tEX|ðA\¼x±»h‚<õÔSØÚÚJts\YY‰þþüy¬®®â¡‡ÂÑÑî½÷^ìîîâìÙá»" }:ºpžzê)Àúúzôúõõu÷!:€A¥^+gnø©E?¥Ï0R94Þ²Ä_svE3‘o ³¯V?“®í^ÀÑk_;³óîbØ.·ó†ãE1Z©£ªmÏÜù53Oc#2XI&‚ À™?ýÓî÷ôÒ_ûE_‰Z¢{¡ªjdxæ!‚è=€ˆZ7ÜïŸ8šÏìãæ%Ûî`ÿ'Åÿ}߇išÝ3…éΉ2R=Oh†!pÓH0EÇ ´t\”ÚK¯§è©aLßÒoÿ¦Ù]»Û9¾_ÿ–oÁ´Z¢ÎmýNã¢{]€[ïÒAýv€¹Ž‡Ì5ÖfPþüÑÑvvvpéÒ%è{Qô›ûì³ÏÂ÷ýè$ƒµgÓ¼n¾2´p. õºøÞ‚™õ (ÂIu9ÞÛÛÃ{Þó<þøãSûèIË7|þóŸÇîî.öööÄ!žv*"?ȵk׿çŒâQ.‹D«µX™Ô1;µÐíîîâÉ'ŸœêGO} Ï<_Ä#š2xî;¾#ùD¿¥ªß-À°=Õ¯oªA˲¢?ÙÆOôºµµ5X–Ïó ëzî÷Ï‚K—.á=ïyž~úé©ì&ë·„ïÚ÷Ë=ˆ2—jü†M ÃØqKrêj*}|$H/0M¬×g¯ÛdE_é¸ C8÷ö÷Å=‰2TæÝ-[QÄ1×ëñ_µŠÝvO>ùätŒÄsY¿e:MÆ"ÆÂð²ÍLZj5áÀߨˆûft¢ÿ]P‰R¥"èì+ÒÁ‡]¿ga=<<Ä™3gpß}÷E9ó«««ØÝÝÅÖÖVß âêÕ«‰<{™o¼·ÝvNŸ>-žðPí‰Q9y?uçxlÊ@JGÑkü²HÝÄN:…;?úÔ§&þQÓ’o¸é¦›púôiœ:uJ<‘òDêzÒÖzÝÕ«øÖ¢§oQºUÓ§Oã‘G™Ê¾g¶†§¡þ,{¡ã$»N~å£í~O¿ñYút¡8åé ,aMBéƒ<¥7D¶Ãþþ~TïjY‚ @«Õš¯Lb}}W¯^¸B?Óõ[Âu“Ùaöþ=- × †Š½é·#CÕ÷ÅóY:ÊÚÈRž:Ó©·Eí¬^Ôã­ß7ß|óÄ÷=·õqð€ˆ®Ê¢§užs±\QVjè5K‡ ¥©ÝŸK#ÄT5¿¾O×Xˆ󯆸¡3êÊÃ0ć?üa\iµðWî¼s¨Ã>;;;Ñ$¬òsÄ$f¶®®®våÁommEžG9 !ë½½¸ñÆñªW½*Îɷߌ›iÊxž‡·¿ýíØ|ùËñçýn L)Y]]ÅwÞ9Ð#>ê¾§!߀PxúuïNë9qß;…ï)>›››¸å–[¦²ï™­ái<#³SÇš.O¿ñ™gp²RI¾§_°\Vúˆtc"EmÈ´ËyßCÔÔůªj¢±ÕDæiœD†­®ë‘jYÇA£ ËõõuM| Ÿ×úí6€}iúXfúµ…ÑRÔGÉQ-ÑõØØ C¡ŒR×Ö›¦­ßý £2·õ»Cäór_c@Èx1|bÓŲ„—Š?Š’¬gVÕÉ5dM×l“AªiÙ×ð8¥Šä 2Máô¢û±iÂS¸® Ïó`ÎÜt>ñš× µûgΜézrš#möööpxxˆ­­­è¹«W¯ÅÝtF‡üÿÜB²„¾ïã­Ï=‡[ßüfÔ©Ž”)<3“o@,îŽ#d]^^ô¢Íûë`JÈLe\¦aXv…ÕjŽþoyîùDCÝ-Zs2oƒu‰‘?ÃŽãtÓÌG¿ÍyÈvÆ«å;ãç2 VrªÌ"Ó6 åÅõŸTâ±HeLLs[¿‘ ݲœõÜ¢“ÕøÓ0²#ÿmÔZ»¶& SŠ˜N) > ÃÈ¥Æ~š¦!påàwüôOão|#ªÕjäôÜÛÛ×/_êsNLrÌ@VVV°³³ƒõõõÈ“sñâÅÄãÍÍÍDG2ÏóPI{ÎQíLýÈøm‚ À÷ý/ÿ ð½b&ËÌäõ5ÏqR墾'æÒS)93•q¢†µSÅ‘Ö^õ裢ÖF¦W—`@xø§O ¥R”<ÏÃu×]=ÖuÍf¾ïgn£ŽÂ`<ÏÃÚÚZdä†a˜xMY™‡l«*PÿPú¹ a¤§é²”_< ¥7ÝÙŒYxæ¶~#•ašå,C*°ïÇõžT?6N{4B) Çû’¯kú,ßïîv ôïv="ADÆ)=Vº®Ã4Í©–Ì\£]__Ç™3gpï½÷bss3êH&%>{ö,vvv°··‡££#¬¬¬à¾ûîîƒú(*Aà{ ’ÚÄ”‹™ÉwŠt‹ÃÃCüÉ+_‰»æý…0¥c.2îAD˜ªˆÒ|'™5uë­·¦æÜ ½5p2P*cU×u÷èªÛo›ü|£Ñ@‘2R”ÚÕi3ÙN`‚~M Cô7X©aÊþþ¼¿J¦ ÌKG CÉÖ ½æ.â:,¾§Ú6“ÍFQªëq$–š8d«€ SyM Æa ÑhäÎÚ™' ÝnãâÅ‹8sæ Ö××{Þœ&5¯ôìÙ³ØÚÚŠ.”t òúú:ÞûÞ÷¢ÝngnÏ…XZv*x°(]n™…d&ò"í”kßx#ž¹çžyLI™¹Œ{ˆõ\ž`†!n»v-ùž~ —€âϳœ3òh›eb–²@ÍZDÑ¥ ²u0@ ýÓÖk5‘j³éÛÌèÌCGÙØ~EèÀ¾h>jZ—V¶¨uJFcu‘&;lŸ|߇ïû‰´^Ã0 SòrÂó<˜¦‰ÕÕÕD÷Ù³g±²²ÏóÐn·ñÞ÷¾w¢¼ººÚ7'~eee"Iú~K?ÃL“™ÈwgÝZ”±¡L¹˜Õ@È:9tE™|ßÇåïý^¼E¾±jl.Ÿ=Æä`V²†€þêœ/V;½dš:û²~Ãä`–ë7uŽü(‹"¢4Ç—š„dõ¼™µò5뛲gDÝà‘CŽÊz½^¸¾'Ο?]×é€ð°lnnbkk ¦iâÁĬë]G¦O÷þašO0L¡Ñâ iÂ"Ìf˜I!§9ôØÑL½öµó>J† M´×"‘ ày^oEQG¶²O|9˜) òø1#7·›4žÅu…¥]‚fcÔLO6NÑL¯Ùl.„]tâèè÷ßßÝÿý¸çž{°µµ5‘Na3¡OýêO<ñ„ðFr‡`fÁ ‚ì}vÌ0¥!Ý@$ Ì2 BÁ/F6Ãdã£k\Mf˜ŽÞõ«¶³m6ÃÌ–¬±½3ò„3‡î†Ñ\LˆkÉó„}0…¦E³Âu]¸®›Ð©ùÞ¢ê†'²Ò677éÁ´ý©§žZƒ©!ÅŽ~w|þóÀ»Þ5ïÃc˜ñhžq#üà>€ RwÀ0cá#™º+žïгîa ŒeúÇ=¯2ŸÕDÌ÷E$h•k¦¼Ø¶pªGºø&_¿†â/mˆ‘Ds„m[Œz¡9Âé×Oè‚5eM7G ‚`&]{gÍõ4IæÂ… }‡/éßjç#Áõ¿ökì‰dŸŽw­G¶÷ò“¥Z¨˜%ÆGW”Ô²ºå>‘FB´Kz „aˆZ­†“'Oâºë®ÃöööÄš&¾ï£R©$þ,ËJl—§ ‚•J%JQ#\×ÅÆÆ®»î:¬­­ÁqœySÃ$»ÏFÏHÖÓÔh‰a Mv ÖåIãû@¥Ò]#Äc™E\hÐjuºÕºß£ëý#°Ãu]¬­­Áó<躎F£V«…ƒƒ4Òé€'ŽŽŽ°··×·¸zooQ×B£@,ì)Y<¼ÿ~|û·ã;9òÄ,:>€ ¿xáþ—ßð†ÂÌ3ÌH˜èRÔ‡í©øñMþçxÍOtZþ¨tÞ·¤—@¥R®ë¸rå €xŽb«Õš˜Cuò4Ÿ• PÃ0ÄÐøN§É^8Ž œ7:m¨ÕjØß߇ªªÃÐ4­tÊà* fŒ·ìi°f~¸n•)0‰j “-Ñp]á½l4òÏ VÕRt© ‚–e¡Õj-lŠï°œX__Çîî.Ö××3 Ò££#ìîîbsssq¢®î-ÙQxñûÞ‡¿ãø¡yÃŒKÀëC"lÿå{¾úéÓó>B†™ ‰•BßÐÒ·¢0Ä«ßô&ñÿDÚðâ8Ê'ŠëºPuI)Óuõz¶mC×õȘ ÃÕj52]×"šô<: ±š¦%öMPwɼ ß\×E«ÕÂÆÆ‚ €ªªÑ¿¤€)Š‚F£QÚ&rÁ—³!& g˜y’°¥ÒýÆÁqÄ_«µð ‘²ð<žçEë¥ø¶mÃ4Í¥1VàĹsç°³³ƒ{ï½[[[ £ôòå˸téàܹsó>ÖñØÞÆ{ÞøF|ïßù;ó>† € 4åíNËàÿKUÑHo3Ì¢b#jJS©tô åçuW¯Ša"ªºÄýôzn3 –eAUU8ŽƒV«EQ°½½ EQ ªjä±D”– PÛ¶Ñl6aš&666ÑtÚož™}¾ïGŸi\×…išÑ{×ÖÖ¢ùE™8 ²t;‘dú®K!bÊç¥dÜÃdÖfÙX-Q&eœcÑ0 T«U(Š­»´&û¾Æ‚Õڎˉõõu<ôÐC¸xñ"vww»^°µµ…3gÎ,T³%"šOiY€®ãßyšKä`JŒ"þªM­†çþå¿DðÓ?½T^7¦ät”œ0µP*Í£¤ ›¬=õSBqÑ7eš%•JÿíšÖmdÐØ„~ÔëÝźYŸ5¨û%D´’"•Õj52ŒªÕ*¼Îl,]ףƺ®ÃuÝh6½~P•ê©úáº.4M‹F¸Á ûûûÑl@˲ÁDS™‹‚çJ'S&wÔÉó„’Î0 @"Œ–ŒqíË«ò¨™0 £ì’¬4ßz½˲Ðl6#ƒvÙ8ˆ.ÀçÎÃÙ³gÑn·£½Ò„…(³FÓðá—¿ ú`Ê‚øÏáÒNšßþüç£èÔ‚ާ(’ͧ"®‡rÝ|PæØxf#¢Z­ÁÇ€ÏRU53­Ôó¼L£O®uÃ02^E¨Ñ€y‚RнÏqhšÍ§Aö²±LÇjYÇ)e4A}húÎ`õ}á­Yp%YE*- 0¾3‘æ£.¨±Ji¾Õïç5clj Üý%¬]?!?XYYéÛ|iшwÕj¨.P÷/†é‹¸OÚÿ‹ Ï»oY,çL¹èØX¾/U@Ø ñÍ×|)£]örB©¿r5 Ãè9ªE%<σªªQ– Ð~~{‘gþ³ëºQ¨ÕjÑóYoÈézgœMêëêùý‘ÁÊ0 BÂwÞé-3ªºMƨt" Ã(­LÓjmk4X[[ú}eáÄø»(.¤¿AÏóJé¡e–”¨¾PH¯ë,â,çL©è8£U$D6Žoó}oøÆ7æ}¤…AQ´Z-looGŠ‘ïû¨V«¨V«‘âT©T ( ‚ @½^ê¤jµZôžz½žˆ¤ñ<×]w]ô˜Ó÷ÛNu«2º®Ã²,Àó<¬­­Edža¸-)>H´TEzcþN¨ S¨IÞ²‘n˜Dkì¸Ùoªª¢Ùl–®<"/¥5Xƒ ^ß©›Ô†PSc;¨ÞŒaJC'í×uSºº .ÇqðÀÍ7ÏûH …¦i888ˆ:kš–ðÈëºE[å5ƒêGiôZù5²ñx||Üót]ï»]Æ0ŒH™k4Qz°¢(¥UÎ, 0ž´‹ñsAô>_ß_¨‘ S©kÆÙ[ñ‘#©ä œtâ2—|•Ú`õ~÷KxÍË>ÅQ'¦”8^RÉÓì„aך©ÕZ­ÏóP¯×q+Ë}&ýŒ=y|LÞ÷ÌŠ^ÇV&|¿k¢ €>éÏË®b–ȶ 0üüÕ0l[È}sŽý ú¢(µ Ç ªª‰nçÌd)­Á ØÛÃÞ7—£«Lé_J<¬Ô,…3L©ðàÅe]Ýó<„aˆƒƒ’»ì§×¸ƒVÀvò¹¾)Ál¬2 F¢IÞ0ª‰ïÛÛÂâ-¨±jÛ6Çiš¬w͈ëç}Ó"<ñ~ý‰'ØÛÁ”ÿñŽþÒiŒbÛ6+¢L)ñ>žjÞÎ&‡eˆ^.>º.…aØ[® ª¸3ÌD±,a¬6›…š7ìyjµ*• Nž<‰ °¿¿Ïz× )m„5xô«Po»&î |sfJFð&@ùò ™ËÚæœ)9>à)@:Iæé?ýSX_úÒ¼ŽaF"ç×ú«“Ï÷›o»ˆc<˜åŲ:°u©'a9౿_y‚ jDÇé¾ó¥´+žxêw}+‚ãï‹a FðZ@Â4-j¶´ŒmΙ’£ …']VùÿüèGñÒÿñyÃŒDxÀ«“Ï÷5Xf‚ŽÍ™G¤ÃPthÒõ¹GUÃ0„ã8ð}AD£¾–¹ÙQQ(¯Áú¹Ï•×Aý"GW™ò€òé¿ô§V‹f'2L©0º£þÙø¢§bõ¤R©t=g4M‹FÜ0ó# | ]h8Œ) aØq4v:½|qµ:÷5Ò~ Àiš…h@ÇÄ”Ö`Õ¿ø;øÂêßåSJ‚ÐÂÿŒÿ$[Á0eÃ×%ÕõEÿê_¡}Ï=xó¼®Àxž—9R& CÎÄ(¾hï@¢†µoÃ%ßïN3`˜‰³‡®Zí.TunÆjÑÜTšùÌvC1)§Á†ÐW.Ã=ù·øæÌ”ý5O·è]s¦4„€ûk€ö·%ƒ5pûÇ?Žkï~÷¼n! ‚ au®ëF/ŠÂº® Çqˆ®Â†aÀ÷ý(MÎ÷}hšÆ™c hè2X3TÛ'Í¿e˜"“˜Àä(`B‡ïû°:5³š¦qDu(§Áêûð¿ó>ø¾Ïž¦”Ô_ýïUƒçyì”aʉ ¨_H–lÿö¦›pž×õ2FT«U„aEò<σã8hµZð}•Jº.œ`–e¡Õ1*• TUE†°mÍf¦ibcc#2p™áð} z ‰TÉžk9GW™#$‘ 08Â:C|ßjTëõ:;üˆr¬šëç¿Ú­¿À+SN|_¤Ð8+ŒL9QªXÙ \û½ßó[[ó>²n*}¶Q`Ì‚ˆ6Ôhn÷Q ¿ó>­ó¾^ŸE¯ÏóP­V¡( t]ÖÇq"Ãt]»©ª½ŽŒXf4Ô_ðSâÿaöîôîy<ƒ•Y(|_z0H=™CÆ÷}ضe†èºÎ³Sr¬ŠãÇ×õa¦ç!0LÐ4@Uáû,ãL9 ‘ ©ê–…ÿï[ÞRLMžlÍtölƒL-cß93C³ÒuÓu’²CWŽî…aÏó¢ç9 1YêugDÛ¶aFv„5 Ù`eŠ„JÒo|°ç® LÑx´, ®ë¢Ñhð:¶à\?ï˜Õ*7˜`JLG ‚€eœ)%žtÊ(Í&~ç‹_d¥cB(Š¥aÂï„ETU…¢(¨×ë\£:%4 g…ëºÙŽGÇ(–y¦ŒxÞT¢«žçÁ²¬¨[úþþ>ß7J@i ÖDJÔNygÊŠR— ‚A°Ìçäºë®KüeÕ´Ò‡íííÈñU­Váû>jµjµ<ÏceoÒ„µ})Ø™ÑUß+7[b ¢Kp/&ìŒñ<kkkp]ªªFN7vê—ƒr¦øéŸ~…”)5žç±òΔÿË@U2XmÛæô÷œd´!Èø ÃÍf3êNKM—‘ ˆ+¥`뺞0\[lHŽ¡È×…\w}—aloÍ&Àz ³¨øú-ÙA0±tw˲àyšÍf1ËF˜±)­ÁzõêUT*,´Lyá”w¦Ìˆuª©äF“e{{º®Ãó¼.%•¾)Ò‰®R˜.Çcˆº>þ ˜E¦ŸOkÌfKaFÝ~}߇ahµZ¬•˜R¬¾\»vmÞ‡Á0S…”†)Áûub1w]Ø@4;”™ ªªFVœö;Kz§¸óºÎ”1 Vß÷±½½ Ã0x~êQJƒ5 [nù$ 1Sj‚ `%“)%þ> };×Åó·Þ ww7{ä3ªªrYÁ¼Pßc§#SRüÎ_/?£ï†1ônÇã8œú»„”¶éÒW¿úUN `JM†l°2åäs€þm<.Ð{äÃ,">„ÁJY2µšè˜Ê0eÁ°Ýór7\ ‚–eamm žç¡Õj±±º„”Ö`ýÒ—¾ÄÍ”šôLE†) †èoÿ,`÷¡‡8x|ßG¥R‰FÖžç¡R©D5_é®Á–eE5aémÌ ”`šÝÄÎG¦LtÒÞ{ÒlæÚmÛØÞÞ†ªªØßßG³Ùdçå’RJƒUQ€çžû,ÔLi ÃSù˜òRpí÷píïü<¾ih¦ªë&C®ëÂ÷}„a˜˜» µZ a¢Z­vmc¦€&~§~éKE¤‰›‰1ecÁš# T«Õàû>Z­ªÕ*ëôKN) VMÞð†Ëó> †™Üp‰)=ž‡?»áN{MÓºŒÎ^kF­VîÀX–…J¥@Ìyæu‰‘)e—`„Î0e….1¥ÆuÎ|PÓ4Çßß”é×/‡.Sßì5M”­AoýMUÅ_Š÷)Jl×d}½^Æ0 ض Ã0àyªÕj"êJÏU«UÔj54sÖ”1 qçŸü žúWÿjÞGÂ0“'m°ú>`Y@NïaFuªó>z¦ ”Ò`U…kž˜Rãû>7¢aÊ‹¢àèïý=`A2 ò¬A K2X{½O×cƒÕóÄÿû¬ôzJ ¦šÔ´WÓ´( xcc®ëÂaÌ3¶G¾õ[ñ#?öcó>†™Òà^ý6˜²ãº"%$µþ8Ž×uÑl6ÙñÎä† V†Y@ØÉ”ÏóØ13º®G냦i8>>޶ÉFgz 1M3ª–ßÃLÎaJÕ­f8ºlÛF«Õâk€ŠÂw n·Û8<<œ÷a0ÌT`ùfÊÎ(2Î =³(Œ"ß<–ŒYFÒQÂ0®[M­ã–eÁ0 ^ß™¡)l„õðð¦i¢ÝnÞ`Û¶ç}X 3X¾™²3ŽŒ{žÇ =ShÆ‘onšÇ±t”0 £«n5 C¸®;°Da²(l„õüùóX]]…ïûxøá‡Ñn·±»»;ïÃb˜‰ÀòÍ”qdÜ0Œ…gÃ,/ãÈ·¦iab ÍX:Šª©õ; CÔj5˜¦¹ß™âQHƒõðð{{{8sæ `ee[[[Üô),ßLÙWÆEa¥†),ãÊw}˜VÏ 3c&¥£„a˲°±±J¥MÓ8³€™B¬O=õ`}}=zn}}ký:,K$îÒ¥KQ:J™`ùîO»ÝÆ¥K—æ}3¡¬×2ËxÊú»§á5|ùàõ{ñW¾eCÍÞö÷÷K“5SÖß=MÑÖïBÖ°ö»(ŽŽŽ°²²Òõü¿øE|ô£Ń>ˆ;ï¼sÞ§0Uyäœ>}zÞ‡1u>øÁâêÕ«8::šè~Ÿ}öY|èC¿øÅ¹œ×(òMÇýàƒâå/9^þò—ÏåØgÁc=†Ç{ «««ó>”©3­kù±ÇÃÓO?=·óâ5¼?¼†Çg>ó|üãǵk׿r^£È7¯ßåcÚë÷ááá\¾Çq×ï›o¾O?ý4n¾ùf<þøã3?‡iÂë÷x>ìú]Hƒµß—sõêÕÌ‹åúë¯Ç•+WððÃOüË-'OžÄåË—ç}3áððpâ¿çÓO?OúÓ¸å–[ærN£È7Üpà xøá‡qÛm·áU¯zÕ\Ž}\½z–BƧu-û¾¿üË¿œÛyñÞ^ÃÇãñÇÇ¿øEüµ¿ö׿rN£È7¯ßåcÚë÷3Ï<3ƒuÜõ[Ó4|Ï÷|O¡¢s“„×ïñ |Øõ»«œ†¦×Åû?ðøøy:à dù€|pÞ‡Î0¹à5œ)3£È7¯ßÌ¢Àë7SD YÃzêÔ)É´„y¥F0̤aùfÊË8SfX¾™2ÃòÍ‘B¬«««ØÜÜLî{ž‡J¥2ïCc˜±aùfÊË8SfX¾™2ÃòÍ‘ëŽç}Y´Ûmììì`uu5*ò¾páBÏú>†Y$X¾™²Ã2Δ–o¦Ì°|3E£°+ ¿©h{sssÞ‡Ã0…å›);,ãL™aùfÊ Ë7S$ m°2 Ã0 Ã0 Ã0ËK!kX—‰½½½ÌçÛívßYXãnŸ%ív»o{óqÎ¥HçÉtÓK¾òüîÓ”ï¢+“dZògû,GÆé<™$Ë ß¼~//Ë ßt,¥X¿™¹ñÔSOßu×]]Ïýèþèñ]wÝu|×]wÿÌÏüÌD·Ï’G}ôøî»ïŽŽåî»ï>~ôÑG'r.E:O&›,ù¦çËð»OS¾‹v®L7Óï<ÛgÉ82¾HçÉtSvùæõ{¹)»|—oýæë8<<„çy0M³kÛùó籺º ß÷ñðãÝncwwwbÛgÉÎÎ677£cÙÜÜLœó8çR¤ód’ô“o <¿û4å»hçÊÄLS¾ólŸ%ãÈø"'³,òÍë÷r²,ò ”pýž›é¿Ä¼ûÝï>þÉŸüÉÈ;AÇGö€üæoþæñÝwß=‘í³ä‘G9¾ë®»Ž¿üå/gžß8çR¤ódºé%ßÇÇãÉp‘~÷iÊwžíÌü˜–|çÙ>KÆ‘ñE:O&É2È7¯ßËË2È÷ñq9×oŽ°Î­­-\¸pgÏžM<ÿÔSOÖ××£çÖ××£üðq·Ï’õõõ®èt|+++cK‘Γ馗|ãÉp‘~÷iÊwžíÌü˜–|çÙ>KÆ‘ñE:O&É2È7¯ßËË2È7}nÙÖo6X D¿ûèèhìí³dee%Ñýðð»»»ØÚÚÂêêêXçrppP˜ód†£,¿û4å»h×2“Ÿ2ýîãÈø kµHçÉä§,òÍë7“E™~÷2®ßl°ˆ~?ôÕ«WÇÞ>¯sÚÝÝÅ=÷܃ÍÍMœ;wnìs}öÙg wžL>Êö»OC¾‹z-3ƒ)ãï>ŠŒºV‹xžÌ`Ê&ß¼~32eüÝË´~Ÿ˜Á÷ÅäD¯§Y]]{û¬i·Û8þªŒºV‹vžL>Ê$ß¼~3iÊö»—mýæk8uê€dZÂááa$ãnŸ5¦iFyôiç\ŠvžL~Êô»OK¾ólgŠIÙ~÷Qe|ÑΓÉG™ä›×o&MÙ~÷²­ßl°ˆÕÕUlnnâÒ¥KÑsžç¡R©Ldû,ñ<‡‡‡Ðu{{{‰¿qÏ¥HçÉ GY~÷iÊwžíL1)Óï>ŽŒ/Òy2ù)‹|óúÍdQ¦ß½Œë÷uÇÇÇÇ3ÿ&ÀÞÞvvvàû~ô\»ÝÆÎÎVWWqtt„•••D§¯q·ÏŠÝÝ]\¼x1sï8çR”ódz“%ß@9~÷iËwžíÌ|™†|çÙ>+Æ•ñE9O&›2Ë7¯ßL™å(çúÍk9::B»Ý€D—¯Im/ãœË"'“dY~÷eº–™˜eúÝǹVé<™˜e‘ïe9O&É2ýî‹´~³ÁÊ0 Ã0 Ã0 îae†a†a†a ¬ Ã0 Ã0 Ã0L!aƒ•a†a†a†)$l°2 Ã0 Ã0 Ã0…„ V†a†a†a¦°ÁÊ0 Ã0 Ã0 Ã6X†a†a†a˜BÂ+Ã0 Ã0 Ã0 SHØ`e†a†a†a ¬ Ã0 Ã0 Ã0L!aƒ•a†a†a†)$l°2 Ã0 Ã0 Ã0…„ V†a†a†a¦°ÁÊ0 Ã0 Ã0 Ã6X†a†a†a˜BÂ+Ã0 Ã0 Ã0 SHØ`- aÂó¼yÃL–sfY™–ìó5ÅÌ–9f™`yŸl°ß÷Q©Tæ} 3UXΙeeZ²Ï×3kÒ2ç8‚ ˜÷a1ÌTåe}¶°ÁÊ0 Ã0 Ì뺬Ä3KËúl91ï`ä©Qš¦õÜ®ª*t]‡çy¨V«ÑvÏó¢4Ã0û°, ¦iÂq„aMÓ`Fâ3MÓùõ “—^r†a$oŠ¢ Z­BQ8ŽÉ<áº.Â0ŒäyeÙfæE¿5~Tù·, õz¶mgÞ7Â0„mÛ]÷†‡^²LÏ»® Ðu½KFé9^³™E!KÞóÊz¯µ`Ùް€ X–@žÛÛÛ‰í–eÁ¶mBYÙÞÞŽ.°m;zO†ØØØ€ã8]ÛÃ0D†ØÞÞF¥RïûÑvúüQ^Ï0yè%çabmm-r¸¸®¥Ü„aØ%kéÇÃÈ+Ë63ú­ñãÈ¿mÛ¨Õj™žþ0 Q©T"Eˆa&Á }%M–ŒòšÍ, ÃÈ{ZÖû­íôz–í!8fæJ£Ñ8Våøàà zÎ0ŒcúiŽ$¶«ªz¬ëzbûþþ~×> ÇF#z¬iÚqµZ›¦ío”×3Ì úÉy«Õ:–—¢+W®ëº~|ppÐ%ÿûûûÇŽ¯\¹½~yeÙffÍ 5~ùplšfô^Úו+Wºd›aÆe,ëº~Üjµ¢Çi¥çxÍfŠÎ y$ëýÖvz=Ëv~8Â:g‚ €®ëPU5zNNõu]·k»aÑÿ=σªªQç2ù1yi$Þ¯(J”’Ћa_Ï0ýè'çô\­VƒçyP­V ªªBUUhše¸®›H©!†‘W–mf– ZãÇ•9]˜ /½|¯`˜q$˽ȒQ^³™¢3Š¼Ë²ÞomO¿`Ù¬s†êR{†áÀ÷S’ü—uƒ`˜yÑOÎUUE«Õlooãºë®C­V‹¶†‘PØY¶™EbÐ? ù×4 ¦i&öÃ0ã2H–¦LŒ+ïƒÖvf8Ø`3ªª&"¡@ÒHU¥¯ÑªiZ䵡¿f³ Ó4¹n‰) ý䜚 4 \¹rûûûp]7ªÃ6 ¾ïöm„aÈQ#f¡´ÆOCþF¢i ÃL‚A²Ì0eb\y´¶3ÃÁëœ1 žçEtŠ–ÊÛ}ߊ¶©ã!w!#¨ð›aŠB?9§Fbt#PU5‘£ª* ÀmÛ¹ÒϦH Zã§%ÿŠ¢D+yô3 É2ÁF,SòÈ{?Y´¶3ÃÁ뜡ԭíímlll`mm­+¾Ñh R© R©`cc#±”˲°±±un6›ó>5†‰è'çT“·¶¶†J¥‚µµµHI'È1ÃÑUfÑ´ÆOSþ ÀaìÀd& Y„NbYG‘˜…g¼’õ¦½~ÿ÷?VVVæ}š¹àõ»|mý.Áú‘|®ë.ÌÍkÞÿþ÷ãmo{Û¼cêø¾Ûn» ¯zÕ«&¾ï§Ÿ~W¯^](ƒõ7~ã7êxGåé§ŸÆg?ûY¾–'°ïE2Xy /Ó^Ã,Œ¼ðú]>¦½~ÿ¿ñ7Æ`åõ»|mý.Áú²—½ š¦áìÙ³ó>”™° çyéÒ%¬¯¯c}}}âûÞÛÛÃåË—ç}ŠCñŠW¼b)~÷v»v»­­­yÊL˜ÖoÚn·ç}jCÁkxùà5<†×ïr2Íõ{uuuÞ§—^¿ËGÑÖï¹6]:::ê«TµÛmÎó É2\(°µµ5• eV°|Æúú:+; Ëøh,úïž—E^ÃY¶Gƒ×ïÅe|4ýwÏKÑÖï¹DXŽŽpþüùh6Ñúú:Î;}1‡‡‡0M3ºt]‡mÛóþ®&,ßLÙagÊ Ë6SvXÆ™Ed.ÖÝÝ]âᇆïûX]]ÅÅ‹£íçÏŸÇêê*|ßÇÃ?Œv»ÝÝÝyW “ –o¦ì°Œ3e…e›);,ãÌ"2sƒõèè—.]¹sç¢bòûï¿§N <;{{{8sæ `ee[[[hµZóþ®f ,ßLÙagÊ Ë6SvXÆ™Eeæ)Á”b°¾¾Žv»££#¬¯¯G9áO=õT´X__çƒ÷¿ÿýÑ~¦—.]Â<ß÷'¾ïiÉ7<ù䓨ÙÙÁ¥K—æøí1‹ÀÎÎ{챩ì›×pfÞìîîâÀã?>ÑýòúÍZ¿§1šŒ×ofÞ>ìú=sƒõôéÓ’m¡ï»ï>aoo¯o å~3©î¸ã¼ímoÃ… f}JÌ‚±µµ…ûï¿*®§%ßpûí·ãÂ… K32€ .àÎ;ïœÊ¾y gæÍÙ³gqÿý÷ãu¯{ÝD÷Ëë7ShýžÆH^¿™yC:ø°ë÷Ì Ö~é+++‰Âoâððp¡(3Ë Ë7SvXÆ™²Â²Í”–qfQ™¹Áº¾¾ÞÕB{ww«««Ñ¶ÍÍÍDÚŒçy¨T*óþ®f ,ßLÙagÊ Ë6SvXÆ™EeæM—À¶íD-|gÏžÅÎÎööö¢"ïûî»oÞßÃä‚å›);,ãLYaÙfÊË8³ˆÌÅ`]__Ç{ßûÞ¨ø{sss¨í SdX¾™²Ã2Δ–m¦ì°Œ3‹È\ V@äÊ÷»mg˜"ÃòÍ”–q¦¬°l3e‡eœY4f^ÃÊ0 Ã0 Ã0 Ã0y`ƒ•a†a†a†)$l°2 Ã0 Ã0 Ã0…„ V†a†a†a¦°ÁÊ0 Sp‚ð¼ø±ï‹?†a†a˜²3·.ÁÌò†BÙV@ÓF{¿mw?ïû€aÕªPðé1³ ªÉç®VÅED+ñ^ÓŒ=}½) °¿ßÿz£ýÒ9û~|Í‘¡þÅ/¾cÞ?+Ã0 Ã0ÌØ°Á:ähOÞ¨")Þ®+”`M,+ù˜¢0ªG\‚@¼—">ij5ñ:Ãè­tS4ˆð<ñ¹­V¶-G²È(퇪& Uçäº"BEß‘ï‹ëõØ@Qñú´šFQ’ß}wt^·Ý¶‚[o½e²?4A¿U/ÈÙBrppÜîû݆¡üß¿©aˆ×Ñã ÏeÉj::K×m«TÄ1‘üR¤³ÕŠevÔh¦¦ÅÇ×!Égúztý¤¡ë“ prúìì¼ÀÛÆý)†a†aæ ¬" …²L†–e‰ç)Q6R5­„Èu…²\­&ùz=i¨ÉF_/ãsc#i¼U«q«iƒÚ4Å>*•nCÀ÷űêzáEIF£(’Eûó<¡ä7£¥ùÖëÉïkoïI\¾üùÑ–‰¢yµ¤ôëz]ü{[üVi¹$CÕuûG3ûmÄ~÷÷ãǪ*>oPt¾tT*âÿäСè.½fÈÑQ`´´xÃàÔw†a†a„N–“u6¢—®E¯£LI9dɲ*r’“nGhZR„mäûÀ/ýÒðçÅëˆÈjE·Z#)­VvMžüþJEü˜TsI?$Eˆ²Þ;¬Ò¼¿ŸlÎ2ªr,£¢ˆão4ÄþÒiãBß¡ +åó…"¦²3†RmM³Û!³¿/t´¨Q*¸ivGTÇEQÆ7(ÓQùq ÉI‚Ηªgx¨mg†a¦8™¾e{^êà¢ÌJz- ¨oé0•JüÊèRÕdYàö¶xL*ÉJ÷Íb]‹²ºèqºTɶcÛˆž§,Ì/|aøïŠ Ö!‘SuÉ@UÕl¼_´'ýž,ÅRLjŸõzl°Š€Íƒ0 †!Ô_|†ð}Ÿ•ù³… TZ É3Ù»'/jƒ¢ûó&Ñëû>E‰ä,ø¾ß÷¡i”ÎN<ÏCØ)ºU¤A=O¨ª UU¡(J´?™  ª*4MC†°m^çî!¦¼ýSŸúÔ¼¿N†a˜¥ƒŒù>H} #î©•aG¦œP`Šô© öH6¨×¬”á(÷¾p!OrvY–Ž–GgKžÒ:~–Ú,—U¥!;²êÒÙ¦l°N¹™Kµ:¸)ʰŒÚ1wÖLÊP¥h)ù¤°û¾ž²ÒNF&=O <íË÷ýèq/#¶Û¶v»·½ëûÒx^ìÆ@íG¿EmÚAÏóA°KG#E–º]×äQ62³äŠö-Ë,Étš¬ÏòIû }N+pÁLž0¿#É£ôûÑïqï™,C0ÝŸ#ýû«j²ŒÕ^}eŠDºgÍ8°Ášj¾bšÉº9&& ÃHéÃ0¡Ô§ L‚”vz€DäŠ~Ùè ã@Và³ €AìííáòåËóþÚ Ý<é¦<Ï 4Š€øm)êH£¦iÐ4-Ù$ çIfajšÉ!í“^+Ë_£Ñ€¢(°m¾ïÃì܉F‘±aÐu=Ó(íeô2 +Êih„½f˜qPr/òŽ“FŽÉÊW¥Ò­ŒQ³AßOÖ‚Ó1‘ClPyyç©NŠj«ž~zªú’yÿÌ ôD9Q%­ËeVéÆv2dXé^éîë@wÓ×M Ã$ë³ÒÍôêuñœÜ}¾èHË•1ÉÎyšW$'¥äÊò4¬.ÕKÍHGVÁX4l°f@!z*,N/NeÁ÷ý„q +är¤ˆ C*"õ}?JK¤÷Ñëä(€(ZÄêžL#efñ3õŠT’<‘ñ¦(J”V›Nµõjµš“î¸ÄL•  #ÍMwOž©Á½d`Êuž…—#¢Õj”‹9+ê˜õ9KÆÁ0 †‘™ À0£ G()ÒHu=ÔŒ‚:¬7›É(Ž¡¤k”šky®]¹³<]š4ûšŒ?¹þIÞ?¬ƒH§ý§#E†!R›Íá¿r*&S¨)E/ÉQBÙ˜#ùm;ŽV¦§3T>¤Œð}?‘Q&ßh€¨Ü‰J™ä(ùói{µZ…¦ið< ÖCó îºr_yå+qtt„o|ãØyík£›y:âIÎ’ç¢ÀÆ*3 'v¥[ûg5êË2夀IÜ›‚@Ž@ñœdK~,f  TIËŠûÐè:š#¯ëÝ×?ÔLQÆ0’óÖããá8ljʌȉO¦ìDb>}¶ {šišPU5QVBÏS†e¡É÷?Ož+‚î’MÓðéOzÞ?ßRA)ݬÖ6X%l;ž¿86®]–¦Á¶í¨–nõ™gðÿþ¯ÿϽñxðÖ[qí%/骯“¡tH2Jû!˜2Õj52^G¥^¯£Z­f#³LÔãºøÒOü>síÞò¶·áÎF>⼪ªè*ù¦ìª Õ0¢è>Ã,¤ˆ÷‚jç&Ýœo\äèÕ¬‹ÁòB‘Rù19k(«@&Ï8¼ à8NTæˆ,ê—ˆ{a‘1¨ªq4žJžä^­V+a4’1I&= ‘íC€^½ (;'M¯ÞErÐ.ä !9•”b»¬i¶‹¬Y³†Bžy£ëøÜ[Þ‚{ßû^¨aˆ+W®ˆÓEÞùŽwàô‡?ŒôÈ#xîo„ò+¿‚[¾û»çý „£O‹I¥2ÙUÏóðÎw¾/}ó›ñ¯~ó7ã±*ƒÞH.uÏ‹;µÐ|(†) ”²Hd¬AضhP;õ®'m[h-òäÞZ-1Ý÷ç7½ xê©Ìæ2¹I·h”¡šØF#Ÿ»ˆòtä}Ólš90é®Ôâ2½Oj0;ÌÇL¹óh×)š†!*• V¾ùM¼óñÇñgï~7nþÆ7ð/¿ö5Üð[¿…u]ÇNãUUû¥§n§qÝ8/3mÓ5JíO)¿_vM*O³—œ3 ù³jî˜$r”Æ—‘aIÛ{õ: YÛTÈcÏŠ Õ¢z^ÿNÍŽãÀ¶m†‘ËQ?͆,K˪ºÞ‚Δ\k»Ý†iš8<<Œ€Ö´5rmoÇnN×ósÒé/é<ô¾z} 47!Ï•O†ýíetS³ 9ÔoZn°~†Ï’‘ù5PÆ@'ÅŠŒÕ¨9WâÖím(ýò 'MÖ5DÝh$ EN“5/Õ‰q¤s‚xþÉ:¥î§¿XÇßiÖ (É‚¦PäÈqbï|á”r,Ñ÷º¦Ä§ÏÃ'ž}và øÓ},Š`Éó·é~ȵ~‹‰| §Û:ÝB #SDQT‹G©Úé†F 3MÂP¨ÂŠÂ†ê2‘Ë`5MëëëÑX@¨ëëë¸páVVVpñâEìîî.¬Áª(€þúÏáKº[MƒïûØÞÞ†®ë¨T*huŒA˲¢ÿOM‘¨íí¸¥ñ ÉØÃ@ƒÿ¨¥ËÓë 2”󜻜ŸA"48-O½‰<˜0 ³#f–賨M"_²¹ ѼI:†Õ~ögñ·~+B×…eY0M3®R(³’ë^ø~\ß:J ò ãÐuã¡}YÐ,’ßA†{Vý9ɬçÅ%ã~'²Qœ¾ØQ“ Jž C±D­›oÍ¡©×ã´í8}~ă¦f5ôÛ¿»®\Áë®^Åw}þóxßóÏÃBÒ}Ë×¿Ž·|ýë8ûõ¯ãgÿò/ñ-r«aZ`Æ•kÙ¹$;ˆ(Å?ËAÄLJ§k W" Í_§r'Jíæ93k„“››8|ûÛqøö·'º­~÷ïü¾õ…ðß¿ö5<úú×ã¹½€0RDWüî»Ñ ¼èïÿýÈ)¹®(¸:S¶ u)Ì{ÿ C±ëzw¤l°ªj¼V‘ƒ3ÃóâÃÃyÿr -/ä[_µ ×%ãS¼Žf—’lA˜¿ÎsØ™YC™ERU˜Ù1Ð`m·ÛØÜÜŒ S‰¹V„¼}ñ}@ëDÉ@¥Îs¾ï'êz½ŽZ­Ïó°¿¿?æ'Z C(ÆF<‘ç{š7,ú\¹ñÕ(,ù€­(C»ÚÆ™mãè×üƒ¿)Ø<˜Fuê¨ÓË8¦šõiB"(E›zï÷Šæ’LT«qSµI5Â*9“ï”ÍÔ4-ê’ ˆû¦lXú¾×¡êûøøí·^¸ývüÏ¿ñ7Nz=çyþÝ«_“/})pí®tîÇ´]­V¡ÞNQýF#!3´/Ø6ð÷ÿþðŽÇ<DzL2†±Ó,KÛìu }œ¯xÏ{ðŠ[oÞY2äzTù§ñ}ŽãDé½FžçÁqœhª<§”aæÅ0 › ÃLëÊÊ ŽŽŽϵÛm¬¯¯/¼‘*³½ ¨"RaÕjÉôÈ &Ö%uQ¨×ã(¥¦ÍN9æT£É@ÆY½Ž_¶,†Qld–.T’çZ-Ñœ…ì°¶& ÏQïÄÜÌ,”¤1¯Jîªi´N‰ã8‘H†¨ö¯Z­BÝÞþÓ€éÑh¡H<9©|Rd,53Ù/Dd.P½m­6qçá“ÿèáóÜ%8”ökšÉ[·ïû°m;r^º®‹ †f³É©¾Ì\ _µ~ å¯Ùdcu™h°®¯¯£ÝnGFêÑÑ<ÏëªUÝÛÛœ:ujÞç4†À ¢9oKgbœ('37(/Î"Ã×gVw½(ЗdÛùRÖ'E£Á‘ÑQ©L§A‡ëº°m»{”…´4-;‚˜¾8§5ß“3dÄAOwü“iŠ?*ÉXòL—yA~ƒ¬:mêËÑl6#™5 £X¥!ÌRáºbɤTuE‰+X,™\ëææ&vvv°µµ…½½=E^Þ££#\¾|çÏŸÇææfÔ5¸0xÔÎ_êfø*,ËBg0%AUüÁúÛ¶m£Z­;º:/L3N[äÈB©‚ÉM5 ‚ QÛ§( þÓÖ^þá‹T Yl•e¨Qa%%Š"BqÚyVWíiÀ†ê\ ñëÕEU6V¹•)Ôã°ÙäÛ/“Íõy^T¯×£¶åpîÜ9¬¯¯|ðÁ¨‹p! ½@˜ãu¾¢!E®”+†YªFýþkqtµÊ d6ŠG…˜R‘·×[?Â0įüÄO R©@UÕhædK×ñòOZ|@«%þö÷Å¿½®5ªÑž¥ã¨ÕJŽcC¥”¸®ðMPŸ.6V™"ây±‘ ÄYl¬2½È5ÖfeeçÎËܶµµ…­­­±"«”nœõüÊÊÊxQÛº¹ej¨àxu¹;#ÃL€©Ê÷ ¨ADtµN¥"Ñ«) Ó—¹Êx¨AøÈY×Zò'‚öî.ª×_Ÿùä'“†æ(–ð<œF\Ö1E•í^(J2ý7 CÔjµ¨KtºÛ/ÃÌCƩ̞ TGf¹ Ö~Œ+È»»»h·Û¸páBôÜáá!LÓD»Ý šLÈ3`‡ÂãBxî¿=ˆW¾õ­Üd€™(S—ï¬ýø÷âàÇ=xž‡ ¸>‰™8ó–ñ^Ôjâß‘ŒUÇÅTºŽúÓøÔG>£ZÅK-‹¾%¢¨²…<¶V¦V«AÓ4˜³¬Íg†yÉ8÷ d†%WJ0 š*íîîF³V'ÁÞÞ.^¼Øõüùó籺º ß÷ñðãÝncwww´±:}xä‘péèa¼ýWu"ç•Iž´ä¼ø¿a?ßÞé1ÝÌD¾ Þv €ˆ®²ÂÂLš"Èxj4£(#«a(º6¾ü»¿‹€ÿã›ßÄ=>Š“¶ÍÆêQDÙîEÄåÓ2ddðÚÏd1/§t`††\ëîî.vvvpñâE\¼x;;;c ïÑÑ,Ëêª=<<ÄÞÞΜ9@¤#ommÞÕ4‡¡ø‘\ÅÝgïÅK¿ýÛÇÿFej*©?/ç{vç=5£›ÎÅïl ¤×{¿^†lýÜ0ul€µÔg Kˆá u:o«s<ã|~¿ãš23“ï>€£#<ñÄrŽÃ`˜œAÆÓø¾è›U­?^@”Oé¨*þö?ü‡P­V‹›”-E”í~loÓä9ѧÀó<¸®ÍUe˜4‹&ã “+%øâÅ‹ØÚÚŠêXÏŸ?‹/â¾ûîyëùóç£Ñ8”vO=õ$òé×××qxx8ÚúèÛ!8|íkŸŽ2}ŸðcÔÒ‚ˆ|6!R—ÉPDýmÂh£æQJçy¹ªÒy^•^çÐÐ#7£t§J“‘K”iíªw>ƒ¾» õ=¦Ûé<Ö˜}Z÷çè³Î¾œÎvú[:nU:¶ sôœÓ9>µó™ŠôÑoNç u¾§ó9Šøœ7<ô\yÛ•Éÿæf*ßnç\S ½üÓøŸý,ÌöϦv®3‡ÎY…¥iÛ²lñµ¦@ÈcØ9Žy|Aç_ZfhWÍLƇÀ²DI騙ïžçÁ¶m(ŠÂs(—˜"ʶ ÍU%ñ¤‘»kk•¨ìƒÆó±³…É¢è2¾4®l wÏB‡Õ;KZÙ5Ð`¥ô_ò¶ÀÙ³gqéÒ%´Ûmlnný¡—.]Âáá!lÛîŠÔö»(ŽŽŽ†7uô¨ýÓz=¾íÛ†Ü§ØØ2Tš]áÊšV@Â&SGÒØÈŠ Ò™†u|U“&z×øÒç†(ÝHÑ9GEÚ§|±iã GŠéýŠôUzl"6leé5²KÇæKûSRÛÑùüÔwþá¿ûa|òò'‡üâò1SùÎ-~î¹çŠ]%Ç ŒêkÒóòk(uˆ® 2V²Hr¥õø’÷ôµäC8…Td;aìÎ6-õ±c…äË@œ•]@×ÐÙ·Šøš±Rû—¿:oº†èzw¥×øï¢ÚùŒ)7°˜©ŒÁÐA€0‚¢À¶mAÓ4‹u½03¥²B4©MŠšlÓŒÇú¦3,Ë‚aÜXȼeÜó Z]a#¾wˆƒ&âûð„^A÷šmôÖÃè5Nç5i§2¨ÌοVçsHGÐ|Lˆ{>¾t$u]¿ó¤G¥íDZÏ’V'wÓ%¹¹Ò8{xxˆ‹/ö,à>::êùÞ«W¯öüìÏ|æ3xÿûßßU<ž†ÚhÓø»·¾õóxéKÿ+€¿ÞýbâÇ$¡p:d‘R»à óžBá-:ÃÜÓã”ê`)Ú™÷½½>+¯R-VÚ•§Ùfêó/]º„w¿ûÝxÑ‹^4ô×6ˆiÉ7<ù䓨ÙÙ®ë‘g4Ó ÒÁóåÅÏãcŸþìÄÏ3ò‚©#ö$ÊF¬äØ ä×d9yy— Ö´qWGv:¾¼à§Gi'L*º3)äsN¿ãÔ‘”kùµ²¯´1VO½Çì|‡.°óïwðØcaÌ{ ÏÂó„"?”Ô)üûÊOý6þ÷ÿõzÓ'„ÝÝ]<òÈ#8yòäD÷;óõ;ê÷¥i"‚Z¯‹hª¦õî¤\×Åþ,æúFŠX™g&ÊÎŽX¿{u¬ß3í LÆë ¤(Hêìd¤R¶`ú8Ó—X}Ÿ²ɉît>» q_WÐ}e‘ #]¼†X/ Œ+ ‘ÁMÈ#>)šK¯!ç9•’^ÙOÏÏÙÐ6 ÒÁ‡]¿Çî<Ê®¬¬DuívO=õvww±µµÕ÷âìבøŽ;îÀÛÞö6œ={¶{£äýÐõ¤gò[¾åÃØøÒ_K6í™$Å’” Š\È?R–’[VÈP_ph ÓåË—'¾ïiÉ7Ü~ûíÙ7‚5Ä“ÚÏþ \ëUÓü:´€úˆ@@\+ºôÿ~FW/ú»zÉdkÈýLŠ>„Ä1ëïÉ9t°³³3•SœËÞ‡ ѧܺºë « ñ•Ÿú)¼õßþ[ÔëužO¼@œ={§OŸžø>—õ[¢Vò,¥²§ A¥¬×jµñÒ©¬€2Wäè+¾éi;e”¨=ö=H±¥’^ÇæBY§GYm:†sĘ Äú=ic(Þú=5(À$?Ê˜Ò ŒMY¦¦Õ›,m7 r†«¶g•¦_ŸGI—†8PC·‘OÑß ’Žu9›MTPF$¸ÆGÕÁgn°ž>}ºïöS§N^ º8äÿEkÀöEÀtEG:Ó+œùêWãb9ý°W„„az0Sù’͸Rÿej5L”þJž:J‘ç U©™¹Œ@QD*p.1¯ÕD~e½Ž@Q°½½jµÊÆ*`¾²íyB4ó¤µ{ž‡Z­UU†!4M>=­ÐS¶ )§ò}Å“^'¿–!”X±‚J¥DdÌ©÷Q¹…a”VÐm@qtÊBòÞB(Õè¼OŽ&Q½ß eÐe!—¥ËM˜"¬ßá´_Rô¿‰þ2PÄ´äy“6bq-ª" ¡H5qå ðÚ#à™g€—ÁGg¾x:ðÏožy 7=vÓЇ–Û`ÍòÖïîîâÁL<7ȃ¸¹¹™¨{¥üyÙ+³¹¹‰K—.EÏyž‡J¥‚¡Q祀ñ ý›p¼ä%/+uÞ]„”^¦ÌT¾¸þ@öpÍ ñ–µÈ1¥dæ2>EÉi¬:®}ìc¸ç†loCUU˜¦É³‰™ˆyËvÞ_®ë¢^¯.»”ú'÷¡ ²ÒE.I‰%¥ÖE\'§0RÊ"¥G’Ý€02+ˆâqžù’äªt¬²áIÆ,ÿµÎçËõ‰ÔCƒØî<ßèl£ã¥×S/ ¹o‰Ü?cø›b ý¼eq6ö ȳìR&I/YˆŽôâCs‡t]lÓ¤í~Gð5-™9gYb¯VïV:y߯Û~UƒeÛpN¾o¾ñÍ8wõŽßq<Ô!4XO:•h¸DL#U8{ö,vvv°··yßwß}#í‹~‹~¬|ââ‹õPŠ”W¦ØLR¾£›>Õ\HX ëÞô"¬3îJË,•ñx^Ž!ô®‹o¾ë]x;€úó?ÏM•˜‘™–lëz>;% Cxž7z½µaœeE3ÇEn†(÷ºH—nd¥+RŠ#ÕÈÖDzJKÒ¤860Ø€I*²>›Ò¥ÉA@ÏÈïÇátz eUU….:çõi–ë÷XÐT €ƒM½C!c¾ÿ?-kY:¢®Ç²*½ç/?ùI|áŽ;ð__÷:|èe/‹^îû>Â0„ïûÑý5p†UUa×j0 P{{{¸îòuCÊ@ƒuuuuª9éYû^__Ç{ßûÞ¨Õö(ˆ~Ð^…ž)¾ïã¦Ï}® -˘Egªò=×¶¾íÓÍÁ)ò æ+〸Ÿ’8" “kº®ãŸüÁàô­·²±ÊäfÞ²…ÓQsCcÝäÑWÔ }ZŒãø/R¦Ž¬ì“r/G›õØÏЃ 6Ò“ ¶¥uÈ0fÜ(É; #“1fp1̤‰î·¾ŸPlj"R‹rSf–“ÈîV†"åºñk czÑOB¿tl4E6‚…älM ¢ ±Ž¸9•<–Ǭߵ€ (ä?z’Ø"7Tʪ-%£s˜#Š"ŒÚµZ žçA×u¨ªŠF£1½ž%` Áº··‡ÃÃC¼ë]ïêûº3gÎàÞ{ïE»Ý.Tj°Ð^‹d71‰O|Û·‰ÿPWºÅwB0Ë‚‚8Õ½‚DçDßÔÛ®ŸžÒgÓx†)›ul¬2 Å F4¶m÷Ž®Öë/Óèþ;O‹#ÛA R|=/®…6Í¡òŸ)‹‚þTU…¦i £• zöeb Áº²²‚«W¯Îû8G#´7wþŸÑ$Æó¤•”ÒÁ0‹‚‹Ø`íè1¶-ŸFÐ<ÿí†ð#“^Ô¸~•)‘§¾mq]7êNÈ0‹B?%ÞqœÄ "«fÑ"O2¾/ŒQ×£MõzRÁWnb´Ìá§—ÊLzâû¢;½i%¯” äuš$)ŠMÓ ëz"õ? CTRÓØ`Í`}}GGG8<<ìeÝÛÛ Ü"Qÿ×ÿÈQVj­(q¾· Ž1‹E*ºê8B´÷÷;Žhøì 7LþsåZ†™#a(eú>¾rç°,‹;3 G¿à¡ëºÝµ«Tû¸H0Pi†®K7,f‚/²Ð2=K¨Úqbc5âÎÁAÒˆÕ´ÜQý0 áº.\×E†0 c`éJÖ¶ts¥e —Áº¾¾Ó4qáÂ…Lƒôèè»»»ØÜÜ,TýjO¨B®Éc y­²€j=U¢ëØûíßžüçòuÂ*ª×ÔëøEË‚a¥n<Á,}é’éEñZÔ%'dÉz¿y̦h©aLDNÉ@ ‚¾ïG†æ° ’t],‰Ãô9šŹsç°³³ƒ{ï½gΜÁææ&VWWqxxˆ½½=\¼xW¯^Źsçæ}> ¼On­¡O{uTìêq R‚9-˜Y¨#$­>ºSuuϽâ“ÿl ó»™0Œ„¢ÄŽnjë¿¿?Á©4ë˜ukfŠx^ïm®ë¢šîPCÝhç­¯ÈÝz ½ÏAqæƒ6` óMu×´‰ª¶mG£¨hšÊ¨}4M‹ Ö –Ò)›Ë`]__Ç… °»»‹óçÏwmßÜÜÄÙ³g‹]½(t#³i|0Mr8é`‰”ÁZ{Ъ)‡ÛÄ›ÎÐç2L Þ`Yêõ {R*ˆÇSÔŸ!Œâšp:ÏÑåF:;•š„×<êÂCÜ8‡ †fÔHÊí¼‡öG×^™ëôøó—!'2}Ç”&·|ð¼øùÏûè'F¯`Ifê_2Âjµî.½Ôx†NhÒ×#³ðT«}lAB§™÷ú6‚Þ†!ÇA†‰HêþþþDô0MÓà8€å¬_r¬@l´¢ÝnGãkÖ××±ººŠ££#ìííasssÞç”ìôëÔ¯)…~Þ7†ÉK*c øÐxû >WGW™ÂaÛ6TUÒ¥&B¡8µGZ Êf ƒUî ž©¨wž ”1¹“¨ü%õ9òküÎûÒÇ£!VôhDµó>ñH ±èKé5d˜Àšôrö†‚8²Ýùž(pfu^ 9v+è¼GC<ƒ’Ëß—†äùË3-é{ì¬+—Wðõ%“û½çˆ¦ 9}%âµÓ‚›!1#ÐWÎÉ8K|¿»Dp²"©ÕjuâPUÊ ‚`²÷º!·ÁJЈ›ô—Õn·±³³ß/Pøå×¼ºóyH–`è°f ©™Á­aé²LtÁBÉ<@2BÄ0sƲ€7¾ñ9üÑ;߉÷¾÷½“ݹl¤%Rí‘Tªòè døÊäqü¤_SEw&{ªôÙ€V!2ŒÈ0¬IÛé5YÇ׫‡ˆl,¥S÷òœ“™z]žR%uÞ*ðä?z_¾üåX|údÌÔ©f=3Þóú§ú2LN§Ï ÖYf:Ê]ª[­¡ Vyô êK'IíÕ±zž3šç¶< m°.ᣈ Vt]õ}?6¼Y gÎÚ€ª#V\;üõ_üE¨ozÓèû!¢B>º•Y†)Aœ<ù8~au·|ðƒÀw÷w>ï³Ë‰>à9ŠtNªæq\Œë»ð<äÉ Êd¦þÉéçÓ†º¥©Ô3L®ÛÇ`Õ0½LGPïyÂX¡Kµïû°, `šæÌg}S+0…r¯ Ô«—$‡&:)EÉeËVgJ€Àâ¼½ ìgD6^ôÜsãí"ÍU¦ „!påJ·üå_™S™ªQe˜)CÌ´‡aÜßY¥ÛöH(†éEßI,Ó*7ÚÞÊ’aˆY©CÖRÚ¯çy¨×ës³t]ÇÚÚZw¶%¡Ô+n•ä’äËõ»ÞtwU†):{àw2µ2<“×®]mßD´–j̦àÜñoL>e‘{03"ËXDD'³nZ²Ió'=O(øºÎÆ*3QæbkÕë#;4ÇmÛ¨V«“í@?ªªBUÕ¥l¸”Ù` €º\Cé]RÞw4ÖFn„Á0‹@G®}ò¿È N píÚµÑ6"ªÊ: Sp(aæ†Ï~vò;—û0ÌéçkÉLý›F$ÊuEQ¸aˆ†JK86ƒ™>”ÛÅ4ç¯!ËM¥N¿º®£ÕjfŒ 5vZF¬ív»»»wttt4ïsIB &äîŠ>¸Ã)S:r†’²#+ØA€Ã[oÍ_ç Éa%YxúOÿ/Ü~;n˜ôÎmðµÀÌ„^J|Ï”ài8؇¬çc˜Q°¬ãx©³ø¸¸®0PGpÖ»® ˲P­VÑl6 Y':ñÑm Äõ“ÚÑÊÊJ±FÚ„€óaq#y!DJd˜ƒ:²r< ļI†Y0xËç>‡•¿õ·&¿óåkÂÈÌ «ÇÚíû~w– E¢&M¿VÅ 3‚~ìªÍõ}q1 )ÇA R©Àu]´Z­™7Sbò10ÂJóWpµóÊfJ$æ¶§oÏ<ó ^–g'”\ŒŒ†ÉMËpÐþßžN”zL Ž®2 Kq–3ÇF…Fz,q´†™=©ž§“ßùö6Ðl•lÛ6ÇA½^‡Ñ·#3o&a-"­wIé’ €G^ˆB®Ô€Pô—3%œYp‹¿¬`{®ž8Ñ»Ö!„ˆªR«Ì"øÑW¼bò‘¡‹3Ö†YxzEž2S‚uŒ·^û¾PìY9gŠÄöï C!Óõzn‹˜¢ª¾ïcŸÕ` Áº··×•’rtt„½½½yûph®^^EzjI;m1å¡ÑˆÇ( >±²’ý&2V p³1fañ}ÀQëxÙk^3ù;ˆ9 3ezù[‚ èÖSÆ1XÉXm6§êb˜e½ ‘¼±!Úç4:-ËB¥Riš…­Ueº)ÂúàƒbgggÞÇ>ë×SOôó1Žg‡aŠ‚¬d›&~÷K_ê~ «UpÊ#³8ÈŒv†!ðè£_ÎgvÆF1Ì,è§/+YcøF¢Pl¬2sÂï%»!†wÂ8P«‰ô߃ƒ\órÇÁÆÆ`i»í.*åk£þŸ¦ž»å“QŽp çàpê³€PoDù^¿šm¢1«Ì"Ñ£{¤®Ÿý¬kצÏ€ËD˜¹ãgiøÛ†ÜQ•ЏY°±ÊÌ Eé1ÂÉÇðÙ^Õjî¡®¾ïc{{º®£ÙlfD 3å­a5ÿÑÔs·¿3úo±Ð²'YPº‡Š„÷=±(Æjl¬2‹EÝktaˆÛn»6t.NffD¿F4™eKÃêÚ¾/"Qºž[Ág˜iP­öõAA#ÛÐb¸7d¬6›M4 6V˜ò¬~F£°~Pc÷Žªª"ÝŒ Vf©×SÞJ€‹äâOWÀ>8jÄ,>ºÇxÔjpÞõM\¾üìt>“ƒPÌŒPÕ!šõr¤dEdƒ@Ü(¸#0STúe´8à6ÇI6V¹gÍâS^ƒ5=ŠÉq€7þ`¢3êÊ'VÄpx^Ç™2 ø¡?*!>ðžà[_øÖd'`†Y4<Në+AïÃ/ÇÿøÏM^ñÀNLf6„:H™éÀ!²)ž'ÏHM%# ƒ#«L!¨TzÔ±ö žú¾ˆ®9ªõªµZÕ‘»†Un²tttÔõQ˜™­’Šºm­Vq:zøýÉ¿üx¤³Ôj©.ÁðÞ{ŠW|àøá—þ0ð÷ÁÑ"f±©¢Û颪C@Ó> E™pÚ€ N›gfƒx<«;†a·¢¥ØÛvŒY¬«««8{öì¼s$"¥atÍgzîEϱ±Ê,,a˜‘!cÛQËà©(ò 3kBĵÙDªŠÀT5œühÂ`eýŸ™6 `¾b&v Ïóº£C²3…Ò%÷÷ÙXejIøË™Ê†!lÛ†çyh4œ\RæZÃÚn·Ñn·ûn?<<mç•þŽVè™i3Mùö¼”ÁJM ¤±MÜ ™6S]Ã8R޲ú> iÓÑÑkʳôL]¶Q/­¢ç8².Å» áh1]’adf"ãjµ½“}>øàÀ}íííáèè(ª‹]YYÁ™3gpxx¥ìííEžVVV°µµ5\u¨ßè'ÿB\)ò¦N®{g’1S`&ò ¡³G™`Ž“P^lÛ†aÜ™ ³’qqijㅷ,ÀQÅÚ=²­ê"6T[ÏàK…ÁŒe)ï'íùìÍ ƒÕGf­+à ÃÌe]*Še #¯»¶möm´Z-vÌ/S‚I¸ï¿ÿþ¾¯»ÿþûqÏ=÷`kk+aÔ¦Y__Ç… ‘Ú§žz €¸0èÿ”š@ÿ6^}) ^ißžfÛ¶¡ë:§K2SaVò}p =¨×£üà àº./`˜É1+Oá"³ ÞkzfSšAÔľÐwfº˜¹l“ fÔ°†é6ð¸Q$36³–qÒ‘Xª©±ÝÔj5„aÈ#k–Öv»ÕÕÕ.#”Ò€ ÚNÂÞ‹•••ÄHœÃÃCìîîF†n¿‹‚æ¿æ!øþçˉ<ß÷áº.GW™©1+ùN 9_lÛN¤ö0̤™™Œ{ˆ …€ò3Ýc†vù$vvvpéÒ¥Ìížç!®ç`°³³ƒÇ{lªŸ1í5r#w_?âˆÇâð¥Q vwwñÀàñÇŸÊþg²~w Õ0Dr¤G‡„3&ðÖωV«Lé¡õ»_ßq™úúÝA×3êW)%8ñœßW¾=σçyp*¤ƒ»~L ^__ÇÑÑööö^™4{{{ˆºö¢ÝnãüùóXYYÁC=Ô•zЋ~©ÆwÜqÞö¶·Å3cÿ¯ÿ ¬Ým'ãdâ3û˜…ƒ<‰—/_žÊþ§!ßpûí·ãÂ… =·»®ËÆ*@8ÓŠÃ$™É.‰rµ‰L^"WJp ‚Lƒ€Y\Ξ=‹Ó§OOe ŸÙú­ÐÕ‡SQM_ר½ßº>È––Z¿Ç õc&ëw‡L„ä]Æq2B±‚0 Q«Õ8²ZFÕÁFX×××±¾¾ŽÝÝÝžžòÔlnnæºÀLÓŒòèÓ¯?uê$ÒÞ ºøÜ瀻bƒÕqw,`¦ÏLäˆ{ÅC,è¾ï³Œ33af2Þê P©ëµÃÀ†*€p³&73“íŽÁÚjAt¬–~‚ HÊö¿ÿ0ð£¯LÍ2c˜Ñ˜•Œ‡!üVúIt¯Ça(¦ôÐ_ht 7XZnru >wîqï½÷ââÅ‹ØÛÛ‹þvwwñC?ôC8<<ìò¬dáy¡ëzb?¡]]]Åææf"íÑó¼h0pnžx"2XÃ0„çy}b¦ÎÌäƪ+r]×åìf&ÌLÆ­ø¿® àc· VÚCˆ¨ª†ŠÙ®ßˆëRû-Ý® |ñÕÀ÷ÍûëaJÀ,eܶïCH¬åPÐñÒ#º†!666 iëïÌà”`@DYzè!\¼x1sÓÖÖΜ9“ËC9ùYéj”¶{öìYìììDŠWVVpß}C,ÖAÿúï‰î4æƒa¦ÍLä›ð¼Hyw]—ë³™™03€ªúWŸ^ûÚÄgu)1d¬VÁõªÌÐÌtý¶ÿŸþ'€´'R‚ñ1à‡¾Þ_ Sf)㪠hß ±–‡è=ÆÆqº:ê…aˆJ¥‚jµÊÆ* §Á ¯Ë¹sçpöìÙD!øúúz®ºUâìÙ³#±ëëëxï{ß}N¿ÚÙLÂxå­Ñœ>×u±¿¿?íï’af#ß„ïÕ*‚ @†<ª‰™ 3“ñNô)Êû£7ÜxIBæ©«0«ÌˆÌtýÖ„±Ù¦R#šDJð“¯þY~‹aú1K¯ªÞ€äz¼ ÀDÜtÉ÷E½GJac•I“Û`%Ò-±§ÅXŸ£iÐî ‚Ç®ë\¨ÍЉ\GA¨*ËâE)cÉxˆhôŒç‰QÃø7ÿ7°çùFQ¨ñ|UYÃÌ€‰¬ßu@¤A5û"²äºn<žì¥/íŸ2Ì0S`2>Õ‘o¹ô4Ý!ØóRCZEV$§3irÕ°."ÔùšÑ0¥Ä÷A)®ë²Œ3åB,¯ª€úÀë_<ý’è%‘ÒƒP€Z`c•YB!Ûi_:•0)P„2ÿ¶ÿÒ;•’a ŠçÖ¿AlœRëAê…a˜˜{†!Çáñ5LCGX ¸ S>|PUø¾MÓ8ƒ€)‘®×X.`¨ñ¾¨R!ÆÙ¤ŠÕjÂkcu2ÆX§aÒ”6Âzò¤ø·kžÔNÝ×®2¥Ä Š¬÷0D§M°ÕµZ–…Ÿ}îg¹0³¸¨Q“wüÁÿù0 7>s£Hog˜%íG¨:DC<7õB)ÍÀó<ø¾§Ã3ŒDi Vrΰ2Ï”’ŽÁêyË8S>ê Ñ<Òó ¬Šì 'dûÚxÓ+ßÄÑUf1ñ(ÑíˆýÖÇDÝ^œÌ,4¾è² ëÖ>rmY§3=)mJðÁX™gÊ‹´¨³Œ3¥¤£Øh5¶Lkµªÿ¼Š¿bü•y!ÃŒF@Úþü뎟úÊOáFõÆyÃüÿÛûÿhGò»>ð~ic°95Œmõx5‹%!iµ[{’ø!%ï91~.Ñ%Yf3ý$^)Ü'wžä$±tÒç9aÃé=W»<°.¹Zr¸Ív6øn3ó'øÌæµoçFCÓšÙ˜­ö 6îóÇWŸR©Tú­R•¾z¿Î¹§ûJºR•ô©¯¾ŸïÏÕ¨C­-¨¡ÁÆ. & -q £m+ &os•ôá­žçpr2r—¬1Ã: ÍCÛV"Ý»w/éC Š#] À•Ÿþiüî3Ïp5`ÒS£¡æ­†ÊpÏó°»»‹§9iÙÃJDD›½«¤«ÓÆ¿ûøñøßÿûI Ñê¹®Ѱ^¯×9 ’ÂV""J.žGºzýG~ÿôÿñ¤ƒ(>C<Ïcï*-„=¬DD”:®ë¢\žez¢ÍòïÿößFƶ“> ¢xD44:ŽƒV«…³³³¤Ž6{X‰ˆ(u8$˜tÅÑ´M<ÏCµZÅÉÉ Z¢…1a%"¢Tá‚K¤3×uÙC[£ÑhÀ4MÆ<-…C‚‰ˆ(UØ»J:ËårŒoÚ ¶möm¦¥1a%"¢Ta錋ÎÐ6¡À§§§I i õ k§ÓA&“A6›MúPˆVŽñMº[$Æ+•JÒ‡M4–ᤳeâ»Ñh R©pz­DjÖn·‹Z­†N§0MF#éÃ"Z Æ7éŽ1N:c|“ΖoÛ¶á8{WieR»èÒ7Ífá8žþyt:4›Í¤‹h%ߤ;Æ8éŒñM:[&¾=ÏC½^ÇÑÑQÒ§AIeÂÚívÑn·qíÚ5@&“ÁÎÎ[jH ŒoÒcœtÆø&-ß²‡6‡Ó*¥2a½ÿ> ŸÏû·åóyt»Ý¤-nß¾ô!¬E»ÝÖò3g|O&_–Û@×k™1>™®Ÿ{ËðíÃò{ó-ß…BµZ-éÓˆ®Ÿ{XÚÊïT&¬“Þ ^¯—ôá%ζí¤a-îܹãœ:a|Ovÿþ}ܹs'éÃX ]¯eÆødº~îa,÷ËïÍÇøžL×Ï=,måw*]štA<|ø™Lfäöÿðþ>ñ‰OàSŸúÞÿþ÷'} ±z饗°··—ôaÄî _øÎÎÎpóæÍ•?ïoÿöoãío;ö÷÷×~^‹Ä7 †Ù|Çw|{ì1¼óï\ûq¯Ë믿Ž×_Ý_ìAgq]ËwïÞÅ—¿üåÄ΋eød,×óÒK/¡×ëá}ï{_"çµH|³üÖOÜå÷§?ýi|çw~çÚÏ‹å÷d,¿—Þßþíßž»üNe†6nií~ô£øèG?šô¡MµH|À¿ü—ÿ2éC'š ËpÒÙ"ñÍò›6ËoJ£T ¾xñ"€áa Ýn—ûœ‘ߤ;Æ8éŒñM:c|S¥2aÍf³(‹C›mÛF©TJúЈ–Æø&Ý1ÆIgŒoÒã›Òè‘óóóó¤"J§ÓÁÞÞ²Ù,z½2™ ÇÎï#Ú$ŒoÒcœtÆø&1¾)mR›°jâ·LÜ/‹IÑJ1¾IwŒqÒã›tÆø¦4IuÂJDDDDDDÛ+•sX‰ˆˆˆˆˆˆ˜°&¬ÝnGÞÞét&nÞ¼ìýëÔét&îɶ̹¤éÿØÇ>¶Òû×éÅ_<òÉ'ýcyòÉ'Ï_|ñÅ•œKšÎ“¢EÅ·Ü®Ãçg|§í\iTñ=Ëýë´LŒoÒyÒ(Ýã›å÷vÓ=¾ÏÏõ+¿ÙÚ€n· Û¶Q«ÕFî»qã²Ù,ÇÁóÏ?N§ƒf³¹²û×iooÅbÑ?–b±8tÎËœKšÎ“†MŠo@ŸÏ=ÎøNÛ¹Ò@œñ=Ëýë´LŒoÒyÒÀ¶Ä7Ëïí´-ñ hX~'–úo±O~ò“çßÿýßï·Niñ ¶€üÄOüÄù“O>¹’û×é…^8¿|ùòùƒ"Ïo™sIÓyÒ¨qñ}~¾\ §és3¾g¹Ÿ’W|Ïrÿ:-ã›tž4lâ›å÷öÚ†ø>?׳üfkvvvpxxˆýýý¡Ûïß¿Èçóþmù|Þ¾ìýë”ÏçGöì’ãËd2KKšÎ“F‹o`¹NÓçg|Ïr?%'®øžåþuZ&Æ7éüá£X,âúõëKŸë«¯¾šºó¤Ùèö¹Çßi½–i:?÷Eb|ÚµšÆó¤ét‹o–ߤãç®Sù}a ïÍ(ؽ–Íf—¾Ý:nܸL&ƒ[·n Xô\ŠÅ"nÞ¼™šó¤Ùéô¹Çßi¼–i6º}î‹Æø´k5mçI³Ñ)¾Y~S˜nŸ»nå7{XSäâÅ‹†‡%t»]?–½Ýjµš?Ž>àËœKÚΓf§ÓçW|Ïr?¥“nŸû¢1¾içI³Ñ)¾Y~S˜nŸ»nå7ÖÉf³(‹¸}û¶›mÛ(•J+¹lÛF·Û…išh·ÛC?ËžKšÎ“æ£Ëçg|Ïr?¥“NŸû21¾IçI³Ó%¾Y~S>wËïGÎÏÏÏ×þN ÝncooŽãø·u:ìíí!›Í¢×ë!“É ­ôµìýëÒl6q||yŸœï2ç’–ó¤ñ¢âÐãs;¾g¹Ÿ’G|Ïrÿº,ã›ržMçøfùM:Ç7 gùÍ„5…z½: ­òµªûÓd™sÙ¤ó¤aÛò¹oÓµLÛô¹/s­nÒyÒÀ¶Ä÷¶œ' Û¦Ï}“Êo&¬DDDDDD”JœÃJDDDDDD©Ä„•ˆˆˆˆˆˆR‰ +¥V""""""J%&¬DDDDDD”JLX‰ˆˆˆˆˆ(•˜°Q*1a%"""""¢TbÂJDDDDDD©Ä„•ˆˆˆˆˆˆR‰ +¥V""""""J%&¬DDDDDD”JLX‰ˆˆˆˆˆ(•˜°Q*1a%¢ÄÙ¶ Ïó’> ¢µ[eì{žÛ¶“>%""¢•bÂJD‰+•Jp'éà Z»Uƾã8(•JIŸm±V«×u“> ¢Ø1Ö׋ +-Ͳ,Vâi+0Ö×ëBÒ@ÁÖšJ¥‚\.7r_.—ƒiš°m•Jſ߶m(X¹\F¡Pðï«×ë¨ÕjhµZð<…BårÙNÃ0P«Õ~<Ѭ&Å84 xž7cŒoÚt‹Æ>0>þëõ:Ðh4†® áyÆÈ5C´¬¨x–Û,˘¦9£¶m³¼¦²h¬›¦ ÏóüØ5 •J†a`lÏ‹=¬)Q­VÑh4®ëâÊ•+þR¯×ýû,ËÂîª¢³»» @UP®\¹‚V«5r¿çyð<»»»CÃÐêõúÂ'šÅ¤—ûe._0Æß´é}ù}\ü7 T«ÕÈ–~ÏóP*•üŠѪL‹ç pŒ²¼¦M²L¬{ž‡K—.ù–e MÙ`lÏéœwvvvàüÞ½{þm¦iž×jµó{÷îÜ—ËåÎMÓ½|ÿíééé9€ó/~ñ‹#qM´ ÓâÙ4ÍóÓÓSÿ¾pŒ²¼¦M±l¬Ky,¾øÅ/úõy "¢­“ËåpzzêOåó<•JGGGIÚFbšáÄÔ¶mÒõ¤¤µP(À0Œ¡V›pïQŒ‹ñàâaaŒoÒÁ"±,ÿGGGð<–eù ­Ê¢ñL´i–‰u©ÃáèèÈßv¬P(ðZY]JJ¥Çqü!_žçù‹pÈðßà}Ág‚« ™øM”“b|Æ7mºEcX.þ ÃðW¬äÖ ´*³Äó,±M”vËÆz°gP=®ò»8ö°¦@.—ÃÑÑJ¥LÓô[edùëð}R‰•’z½îWj\×:Ξh&Åø$ŒoÚt‹Æ>°|ü—Ëe”ËeT«U^3´ÓâÙ0 Ôëuø#ѦZ6Ö+• ,ËÂ¥K—P(ü‘1år9éSÛHœŸŸŸ'}¤¸®ë·„Ë%þ•Ëåü}Xe¦““ÿoƒ‹pÈ02¢´‰ŠñY0¾iÓ-ûãŸÒg\<ë,὆‰6Ѳ±.=´†ap*Ó˜°¦œëº¸téÎÎÎP(àº.J¥jµ[/‰ˆˆˆˆHkœrÁ! žçùج‘îØÃJDDDDDD©ÄU‚‰ˆˆˆˆˆ(•´lY~ò'O<ñDÒ‡»»wïâýïÒ‡»—^z ™LoûÛWþÜ<À›ßüf'}š3{òÉ'·"¾Àò[?q—ß›’¬,¿u”¶ò[›„u›lÃ…`+ >U,c) Óh[®e¶-Ÿ;ËðíÃò›t·-Ÿ{ÚÊoÎa%"""""¢TbÂJDDDDDD©Ä„•ˆˆˆˆˆˆR‰ +¥V""""""J%&¬DDDDDD”JLX‰ˆˆˆˆˆ(•˜°Q*1a%"""""¢TbÂJDDDDDD©Ä„•ˆˆˆˆˆˆR‰ +¥V""""""J%&¬DDDDDD”J’>ÒmþŸË©Ï0  PP÷ÕëÃg@¥¢þÇuÕó»îè}ƒçuõ{¡0x|¹<ù¹)ýGÅÓ,Ÿc0 õ7‡—® ´ZƒÇ˜ælÏŽ]áºÀÉÉà1Žœžªß[-u¥¢^;Še©¿‘Ç´Zê6ynßÄw«¥ÎµRQÇODDD¤&¬B’®pEW*ÀÐh¨ ¹$nŽ3H…TØ¥bN®».P«©×’ç&–58F¹P¯#ÇlÛêÿrLáʵmW®¨ÛŽÇ ΩÑPÏ=©b.¹œzîRIýnÀýûߊ¿ð^IúcÕ’$ŠÓ’§`|V«*v‚ÉmÇšX­×Õß >cÛV—8”Ø“$×óÔc$.%–äu õxÃP rmÕëê1—³$†á8,—Õy•JƒÄØqÔ}rÞž7œ4W*ƒc§R¾æGïk¯}dmŸ9é-Üè :‚u}©g;¬~ôGŸ@.÷çzm&¬+à8ƒJ²T@¥²+•ÇBaP•Ä ôü{]̦©#ï!0|Qƒ„a¹Üèk·Û¿Š;w^šÿɶœ4Šˆàg%IW8a-•q纃ë¤PôF†?WI6§5L__^sRf8‚åòàv×>Ï`ÃKðÜæaê©Õ†ß£àóN»öÆ ^w…‚ú,öö~À‡{B"""ŠÔ‡¢Hr'£Àr¹áú$…RÿFóŒðïQXÀ î6®+,—Sõ¥\n㘦ªÛȱÿ•× vX]½ÚÃë¯y®÷‹ ë$ñ³¬A2*•ëZm8‰”ÊcPðwIN%¸¤ g8áŠăõÕ3;îofašÃö¨„pÁcZ´OËs]ßáÏ7øÙG%‡áX8= u-TA7-îfËàãV96|N«jË¡»DDñ›e:Rи©ERy~çHƒi¥¢¾×l[=NN¥2/Çáyƒï–¨N ©V«ÀÏÿüG“~ëhÒ‘EêIᘒ)n"ØS)u„ðˆBÇQQ¦9œ†§2MÊ;Æ ×ÝféÀŠÊqfÉ>ð¸sç+s½ÇLXÇBN憃M~/—‡{eìu ß¶*ã’U"!­uÒ¸qp0ˆÃàô¢q¹Šk…ˆÒaRÏÀ´Æ™B‘¬¼?öØp%H¦ „箿üòøÀî$ýVЊD­O±&¨…‚ú. '„2bEÔƒƒÁ4IÓ‘¢DUÞÃu)YCâRŽK^'ª2%ü˜£#`oïÇ&ý±l-ÇÄ0\7T¹dÛÀ½{ƒÇE‚‚ëN…;¢„ÖBaðšÛ† kˆt¿‡ç¹«íE$J’ç©á½R £½Ÿã _"J'©dE™eNypz 0Ú»\ÈN’€ ž6n:Ì½\TÖÂÇõÅ/ÿ~t ß½ÛÃùyÒï8M" ¨üxÞððCÛV±ašƒÇ{‹ÂI¢,Ì(1L%¹Nùè—Y’ ~g¦›Ôy&-zéÒ Cþl+•á©L ?&¬}R·,ÕBvv–ô­^pƒ´såd¢õŠª4YÖ`±/@}ɰ¯ ié·m•V*ÃÃÇU˜¥²^àLFÆ`äP¸b¶èP±IëL^Ë}ôî°ƒuí¤‚ïyƒXÒ\˜Qâ)W«èm ßÇ‘<VkÛƒéH2;¸Ø¢”½áøYz0Ù0±~±&¬ívÅbqäöN§ƒL&ƒl6ùwÓîƒç© ÞÖ®všß¸øÒã»»êß““áŠ1Q”M‹ï´ Vš€ÁB™§$•&é] j4+2JÏ©,Ng𣽑³ ? o¶Î‘Ciªä1¾g'Ó£¤÷=¼X\­6œH.²8"­Þ&ÕÁWAFÕjà è¤x\õ,ŸØÖn·‹½½=8e<»Ý.jµ:À4M4d‰î_V°Â¨nYè(—[í¼QÒ[T|ËíIÆø$‹önÐöÙÄøN ×U jp%ÅryþJ“|KfÅ2€ñ=Žôž·Ë“z’ íŽZƒ‹È¥Oëà«$å¬e F|D­ÃØÔÇÊÖn·‹N§ƒããã‘ûnܸl6‹[·n¡×ëá©§žB³ÙÄþþþL÷/+8`o*ÍoR|ÉÇxëªÇBaxN-Î,éçº.\×E¡P€1®Zî3 … Fð5 —Ë!ª%Ž{î _|qîóÛ¤øN‚e ïÕ+G}·ÈÊÀðºe¾ã¼¸Õa|÷­–dT†Gãõ¤Í’æ:ø*ȈYU:œH+OXÛíöH… PQ»ÝÆ­[·™L;;;¸}û6ö÷÷§Þ¿ ‡E1ñ ó2'Íûže.%c›ã{ÜB{Á…8ïß–ÏçÑívgºX‰˜O0é ÷ ¹®‹F£1R‘&Šãz‘\×õ+ÚfÿC©Õj0 c(‰I­ü˜¦é?6êxåX§i·Û¸³ÀŠãâHGŒÞcU §=æÇñ²pò躮ÿ¹ËmÁÏ6øxÏó††.I2+I]¡PˆL,Ëå²§–eÁ¶m´Z-˜¦‰ÓÓÓ™bi^æœJyÁÉÆÏ?ÿüܳ)ñY%^VË–E‚½¥´¹¶-¾¥ÇTŠ\nzƒ m¶4×ÁçÕj©eû ÓäÚÛlm«O ú^¯7õþL&3ñùë·~ ŸùÌgÐétpx¸y{W“8éE”J|°F† †¾ ¨ÞÍ`e?8œP’…q½‘Rñ—ı\.û_©T¦&§ó ?Ï<•ûY“‹Û·o㓟ü$Þüæ7¯ä˜EÜ1þÒK/aoo¦ibgggâc']s²ƒ1)1gÛ¶ËÁ†ˆ`£Å¸á¶‹˜Kåryáä0möööp÷îÝ•>§®e¸¬ÐÛj # ö8Qú4›M¼ð xì±ÇVöœi*¿WÁó€+W#ldKJ?)¿;ÎP¹ŒM(¿m[í³ U6Ë¢¨š|=o=©ƒÏ[~¯-aíõzcï{øðáÔû§],ï}ï{ñ¡}(5C&‘áR1·,khhŸô ëM w´'Œs–Ê~¸÷*x߸ÞÊ(®ë²,”Ëå¹{‰ÒdggÙlv¡ÖIâŽñ'žxb®/‚¤ZÑý¦ÒàlŒ(—Ë83¦gUÛìððp¤iYº•áž7Ø ²\æ\¨M²¿¿«W¯®´ O[ù½Y„FV‚çüÓÍ$å÷ª’U Ýå· Ww52LÊáàèÒÇ¢uðµ%¬“.¼l6;õþUHrn†ëºhµZ°, ¦iúI¢iš8˜óÀ¤B¿î¤1—Ë¡–¦n»”ICŒ‹u'«Ò˜!/ÒàR©TÆÎ³¤Í’¦ø^Tpˆ¤ç©ÊçB°ùñ-ûén[<Ëá†ÎV«9e)8åhÛ¶GFŽI'Á¤zWp …\.çwÌRW“ÇÊô•·½ím+màHc|ËvI–¥ÊäY¶å¢íµ¶„õâŋ԰ þàÿ§Ý¿ ëj©‘^¥`/“iš0Mggg±Ì‹£ä¥!Æ…lÝùBí•õää„ ª¦Òß‹*•Ã"s9‘¤M‹oÙ‚PɪiêWá·m{hŠH0á /V'k\är94 ˜¦9vŠÇ*§šu•Ó£V=B&ñ-»pt Íbm k6›E±XZq̶m”J¥™î_–ë6¼Žƒ,Ø"óò …ÂÆ¥ù$ãq’¡ë­þ*2J`ÒÐ^Ò‹ñ}tÄ!f-íñ- ‚IC‹ëVü­Õ6g~_xw`xÇ€`"­#Æ%›òåº.NOO×Þpšöº^ã;ΆuÒÏÚV@Í;ÙÛÛC»Ýö'q?ýôÓ3ß¿ )ÜW]¦8ŽƒÝÝ]¿••÷í–dŒ ÛV›U|Èê»¶m³•RßórõýˆÓ'˜´+üQû ‡“œóós|û·ûJ'mñíyê'—ü+oÓ&Ìïó<Ïï%•9²j;0HHW1uÄ0 T8db¢´Ä·ã¨aÀÜB‰æá'¬ívÅbÁßmÛF&“ÁÕ«W‡î›E±XYè'ŸÏãÙgŸE§Óñ3ÏýËô«R¯×aÛ6NNN¸PÌ–‰Šo ÙZ6§”DUÙŠk‹J§´Ç÷¬dQ%¶#Æc–ý¶Epû(©\>¸-UðñÂ0 !BÏóðéO¯¿þúBÇöø¶,»¹œê9•Æ–´V3‚ÓDd+2™*"#θžÁz¥­4Ã4b¢:jµºÝ®ܶm£V«ùc×qýúõ•,ÅžÉd&^Óî_T.·š1ò¶mûÃY‘§(IŸ0ÍÅgÇA«Õ‚mÛ¨T*œsM#’ŽïYx°»Ë}'ç5n˜¦û$) ö–M{^IH …ÂÒ#5 ÃÀ>ð•¯ô¤#¾eñ¤´Ž2•„TzOA¹üËÑf锆ø.8g•æw¡V«!ŸÏµn#ŸÏãðð™LÇÇÇh6›kÙ;,.®«*-‹V\<ÏCµZÎÛ#-ɈZ­Æø¦åyƒÅ•t!(Cge{³qóû¤·K~‚ÉF8q”½·Ñyyår™£‰¶”̵,Ë_àˆÓ hËÔÅi{]èv»¸uë–¿ÇR¯×C§ÓÁõë×ýÛvvvÐl6G† o’Vkñž'˲P¯×Q©T¸­ ¥^«5ßð1ÇqP­VýU¬‰6Ù¦&«’|JÏfÔ…BÁ¿/80¸FÙ[ž¿P( V«1ùL)™s4IRmÛ†ëº~‚Ê¡½´ Y,,­#(.‹Å¡ ¥¥6ز:mÃ`µZ-´Z­DV#Z„ãÌ>ÜF ;::Jý*‡D³HëJÀ’,JRLHËф‡WÊcd!"^«úKªºÑjµ†âTæ ³qƒVŶÓYFSº]èõzC7t:äóyí’ÔE&yW«U‰tÎã£Mẳ}H²ÊEÃhÓ¥u%`×uý9áü}"¥ñsžÄ3øw¤·uï§\dOö/åP_Š«Ô4¯ NÇOR{½lÛ™«Ún· 6ÞD®;_‹¥ÌW=Õm'nÚ Ó¾ ˜¬’.d¥4ìé'Ãr%Q€J¥ÂFOJ­F£V«…r¹ÌEöˆ(µ.‹EìííaggÇß{IZ}{½îܹƒ7n X,ú«oªYÖz½€«ÜÑfš¶Ë„ëºLVI†œ%Ûb/SG Ãðç”rÏbZT½ÿ•­V Fƒ‰*­·µ¡E\888@³ÙÄéé)2™ ®_¿Ž|>¸yó&ŽQ,q°%;üÊð-.>C›jRêyvwwqppÀd•6Zµªb½RI.Y•J¿išLPiefÜÚvnÒóÜ_›1Kë6ë´%¢  ’¤FÙÙÙÁÎÎÎÆ÷¬³]Á–ˆtT¯×ý9JD›Êu{U®[x¾+ý´j«®‚c¶V«±G•Çð£y]ÔUÙ€ûêÕ«þÖ5:$ªbZ±ìAɹF´éÆÅºÌËޖѤ/×MfKéQ­T*¬ôÓFnËwïÞ½¤‡(öá ÍfÇÇÇþ ÇÇǸvíö÷÷“>¶µ‘Š<‡“£ <ÏC©T‚išLV‰`Û6 Ã`¢J± ®x½¬`C¶Ø¹® ˲X‘'mXÖpe§Õj!—Ë¡²Šæz¢”pœù¶)›W«ÕÂ¥K—à8NNNpzz:מ©D‹²íåæ÷Ɉš\.‡££#6°PêXVÒG@›è0é㊕·®%&‰ÖÌqÔ¿žç±òB4÷&¦´98˜}Ñ%‰_îr@Dº»° û­†·ùp‡½«ó°xÊŒþÿþO®›(ôwû?¹þ j)s€  Öÿ½Þ|%ð»Ùÿ¡©¤eÞu] #m-Û»*ûªÊJªDi1k±[²ê@}ǯê;7Xÿ ×ä÷yI}BêXàx=Õþßô¯CÕsä¹ÂõZû‹hQúÏaí'Iá54ØûÁPÂà @*…^ÿ>Ñüß =‡$°.Ô•¸ÍÄàËK¾¨Âk^™M€ýŸJè5ú¯³Ûÿ9ÞRàqÁ/ÀÕŸøÑ'ðùw}>éwü¥)­ëRÁJÔ4§S~r0å÷(ø#€þqÖ"çbtäƒFdÕ÷yqJÚÓvÚ«V«8::Šgô¬†gLÉÐÛ2TBec0‚É€Jd[ôX¶ú™7§–iT½D¦-ÒÆ2,o—•DÏ›4«ƒHÏ«'èQS§ä= 6`Êw¥ÞƒŸù•ŸÁ‡ð¡NšHOÚí6jµŠÅ"žy晡!R¶mãöíÛxê©§pxx¸™½­@í Nœß"s7¸ËÏæ Íå +çhGh ºU]+7›@zÆÙ›õ‰6˼«¨ª’oš&“UJ½I!*+ZÇ6”]†þN*e*O0±­T¹´ì(«;j ºoYÓ>Ѝ²*ü~@%¶6É´¦åw.Ç!Á´˜ 7nÜ€išh4#wJÁX«ÕpãÆ <÷ÜsIï|ú-~ÆK¶’Úù}2dõ*™ó±I•uR*ð[»snÈ=Q Ó½(ý¸0mŠI{TÊÞÚ± ÔwÉ*’:Ö%æ'sc5Ç: -êMÝnÏ<óÌÄ]»v ÝnN'éãO?¯ÿ!Ò7DNz5¯ô„ƒáùu»PþKý« ß >Wƒy%ôÿ ÿÈûÐèÿ.+÷nAA©­`ßì5lÛvzdÖM†s„´6æ™ÑÑjµàº.ç¬ÒFpµèR”z½ŽƒƒƒøÊv)#ùÕAD)t!›Í"›ÍN|Ì_íõzIïìd…Zôs4YBjÑ‚ò´‰"qCeá— Ô0ÜàD¸õ)Ü+$ÛÂ`o¸s|–¡½5Ä3<†Ö« Tÿ pØ¡j#‡Ëv ²’´uÍŠZÜÈ Z­†ÃÃÃÈ„´×ë¡Ùl¢X,nÖÖ6í)ZP ôh®l[ƒe×eÕ·à— TŠ™lñQ± œê]ZiåF¶,Kûã\öÌ“í ì—+‰mðñÜ¢ˆ–0-µ, žçÅß#¥iD’Ñ Òà$ LÁíEdXêZ6ÆÖÇ<¸ F\gÌßxýÇË”yi¬¯eÃ[¾ê-I¿s±k4ëÖ^ç¯m9ÏóÐh4à8ŽŸ†\.Ã0üº¥aGª†££#ìîîâèèÈñ´Œ ׯ_ÇÞÞžzê)\»v ÅbÙlÝnívÇÇÇxøð!®_¿žôû8»Ð\Œiq/¤õe[ÃâÃ{·•㌟¬Ü›eÖ¿1ŒÑIm®;XÅq÷àyxûÝ»I¿ Ë+^­šR±œT™—Ź‚ {MÚÎU¯ãW‘Õ /׿j¶=øls9õ¹NÞoY£KozÞðfžádƒñõx@*áX­GÔ¾ƒW(Œ¾–m«Ÿàk{žŠUyþðß8Îèk…ö xÿýû1~ÉiµÆ—çÁ¡À[«•`@]Ÿ- æËxLê;S¦¯HÂja(šýû‚m½²½I°ñ)ª!ÊÆ`K))SÊã›öQy$¸"¼íß¾ _“ûš¤ßí•r ØÖØjµ`šæêF H1n˜Ï—(fõúè×­Op¯ëºð<…B…B®ë¢Õj¡\.¯ä;´P(àèè¥R '''K—còù<Ñl6qãÆ‘‹EìïïoVïjÔ>bU¨¹0eA©ÔËJ¼ò….­ÉÒº¼È'•ÒpeØ4‡¿¥€A‚•¨†ú ×Ú\W•òøBaÊsEU²K¥Áÿ …ÑŠy¹<úZõúàqãÖâ|°’=n3.ÓŒ>¾`å}èóòÆ'¬Á$@þß?æÌ»ß |à |ˆ)Ò?Åœôbúo‰7ò8ÿzöxÊÜÑZÿ6YðÈíß–ƒªLÎRÆÈûzŸýØIhõ°VK%’ò˜¨¤óà`ôvyþ\nðÏ7*V=oôš“ç˜Tã'êoÆÅoÔmr{Ôpí\.úï&É倣£áã﫚&¬“ 9£Fò»‹Á¶i9g$á¿É!º!6ø¶FíÆfèïg-S‚¯þ¾­/}ÛKxpçÁzÞÓ5)†‹‚¥{Weº’ ²0Œ’‘z Ž£µˆkÆEs¶möm¸®ëá- 0M…B¶mûÃ}ÏÎÎVºîƒiš8;;[ɺAµ ðáá¡¿uM§ÓA>ŸG>ŸG6›E¯×C»ÝF±XLî]Ÿ‡ÿ‹Ø¯£˜¾µ… g¬@Í©“aRÀ`xÔ¸Eú\wPñW–.]$hr_¡à÷öErœÁÁ‡+³áJºÞý$Ö4 ªôH_3L¾%9ŽJjÃi"+›À°ÈßD5¼ÔnsNøN•@þ¨¤P0èm‘Õ¨ƒ•ÒT¥5ØQ'­í"‡áXŠz_ëõA§üc(*VMS%—óŒ‹ÄÝ"Ã+r¹Å^kZâUhOKœ£H£Õ„×~õöíùŒéÛh4`Æv®büˆéUK`ü‘ã HÁ"géÞUªl/ct71K7ÑŠ0Yçy~â)ɧôžš¦‰£££±eI¹\ŽuòU-r{!ø‹lqžÿÖét°··ßb.1’z´´Ž= ªrlu®VZŽxbËL"œ4<ñÞ+ÊLªd«Æà>ùÿ<=7ëX9™V£ß€â8 | ~åÃo!30º5RA7Ø“á8ƒøv]•\ŽK¬¦%vQIé6÷€ÑR¢Š4Û¶aYÖv¶– J…zWe(wªáñ,é³ ðýw­–ú99áPvÒ‚mÛ8:Ú¢½Å, ¯Ðë@%¬IOß• UT¹2î¾àjóB’5YX,ÉÝ“öæ^¿GgãÈ©Á¶U˜£w•ÃÙ½aÜB™Q½û² þ¼Ø³0Yá}[r-ÇqP­V‡öB]T¹\F½®&«§º¡6F©žÃÚétÉdÍf~ŽpyäyþÄŸÿÀG7ÊœUD´FËÆwÔw­ 3bY9@‰XEŽ:Ô‚B¹ÑŽã ­œ¸2.T2¼d< ¨$A:YrÇ„{Fgm@÷ÿ®wÍõ”d\´èõzèt:±n6›8>>º­X,Bz{»Ý.jµ:À4M4…Þ/`´yäË€=O´BëŠïà(¦ Â*æ‰M°® †Mö+óáï}×uãÙ{µ •ˆÊ¥t*‰­@%Q/9ODrÝAâ9mOoÓ?л&¸Fkmîïö/GŸT¿‡WKæ>oRÕEº’¥7ôà`8–]WTœËµ1ëÐ[šÉºc\èú¶Z-X–ÏóP(` ÃÀÙÙj[“$_“mnÖJ’Î`"*·Ñy”e ß&I™i¢w÷.¼þú\‡0ÒÃzóæMÏ^h.àþýûØÙÙjuÈd2þÿoܸl6‹[·n¡×ëá©§žB³ÙÄþþþ\¯Óh ¦ÜLÄd•VhñíºªÎ» R ­« ÷Ö8‘JmÛ+i¹ö{Nm z«Â×Õ¢õé%  Õjóïlã䯬5¶ õ­@í/ n’JïTҽؒŒÒpãŒ$¤A¹¿ÔÖ`íåw_£1<’`“ÉâI–eùsRãîõ”y¬®ë.7‡5˜tÊ(ù.=ºG& ÃßEã“ c0Ç!ƒGÅWæ\4/‘!Á>D±XŒìÅív»h·Û¸uëuíììàöíÛs_,–„8âLĉ€õÄ·,¼9z;7:£ø­« ‡ U1¯8P/R¦O\{á!¾‹Ö“e¨nTУæÎl$M±µÅ6¸@®¬éš<²È—i.Ö@C±YkŒ÷IÇú¦'¬¶mû½ÍårgggñŒò‰`š&J¥ÒôÄXzC£9ww÷Éq~£Î%j~IXÛí6LÓD³Ù\½zÕ¿pîß¿Èçóþãóù<ºÝî\¯!-ñá÷>õã¾iã­+¾£xž·š^'¢ ÖãTÂZ€êýÄpî·ðüU*Qu †TŽâ;òwýž¢ðÞw…Âh¥€C7ÖÚbd»÷*üÞÒ™Õ«P .' ½IÒ+##¸woø1Q=5” kñ¾Mï] &ªµZ-‘z–4Ð …A/©Ýÿr ŽbPŠÞFâ$©BcyoJê…eü|¯×ÃÞÞnß¾ /ŠIûÁþÖoý>ó™Ï@ö•µ¬)kJȪr´unß¾üã±ö¶¯:¾ॗ^òŸkꎭÖ`.m½½=ܽ{7Ö׈»  ’ÊÀÐÇࣙžh¸„Áʾ’ŸA-Œ4-Y•yu¥RôýR1þ0YU³ÙÄÇ?þq|îsŸ‹åùã.¿…ëOërû?˜Ðè( jDÀ:뮫*UÕ*pé’úW†Öj£É*-EÊo™C‡µ”ß}2•qÓÚÒ=ÏC½^Ç¥K—`YjµNOOãOVm[eøõºúÞ¹tÉ¿«\.«×÷¼Á5hšêËñÞ=5¤þôT5¥ô —:ø¼å÷Ú{X»Ý.®]»†§Ÿ~Ú3ŸÍfÑl6±³³3ñ‚xøðáÐ8û ÷¾÷½øÐ‡>äY°íÑ©CÃ%-‹­[jggÙlwbØt>®ø€'žxÂ_á§~*z䆜‹î ¢­qxxYqX…u•áÛÆ†ç¯N݇ÎÁ`Qj±$1é;ÜqTcOðâ²,U)ß²MÚÓlW¯^]y¾®òàŽ~0 P é¸vìB [7ïV6Q«ùGð:ˆ”ßÁ^ÎUYkùÝ·iëmxž‡F£˲Ö3ì×¶Õ÷ެþ.?Ãé‡öoÝÐfÑ:øC Ö¨ÊÎ*ölÍf³#½³³ƒf³‰v»=ñuimYH1_®ëºÒ¹xÅ`ñ ¨ŽžXf“ÇßPj­+Æø‰¦óÐ8N~±ÿû¤ÞÕÔ_éš·¨7Íá/ ­ÐüÖÛ Àü†~U$5ìÝŒX‡@’ÕÌ˳’’¤±&œ]ÈÈÚxkq¨Š™šF®ë¢ÕjÁ¶mT*Ü[å骀 Œ~§ÈHŽÌ™èµk×FnŒsK›v»n·‹ÿ¶‡P“»¥õ¦ÛíúGðÿ³šXW·Ñ’ÖßãÂw¨‹\P ÖãA­Û@ùKƒß=Y?È…ZœÉÃlóRm{t¡Ù×”¶ÒZc;ÔdL`žöÐ`ƒd5ްt]5ôжUïéÉÉfd´°u—ß›Òv^¯×ýa¿KmG#‰©L9“­`dªHÔõÅkn&–]¦z^™LÆê -9ÇÇÇC¿‹Å¡ÉlÛFiÜü¡\ýŸ’²Žø¢;}†*ñ\-˜b²®”œö¿ÞÁch…ààj¿5 ÿâyƒJ:÷ঀuƶÛïÈ7•ŒFÙ%¨˜^u²jYƒ-g*óÝ"k-¿±¡Uí¯÷1Wª¬‚nà”áo• ‘[ûÖ|>k׮ᩧžB±XôW$ nJ¼¿¿½½=´Ûmôz=d2<ýôÓK¿¶-ÝñÁý„ˆV(ÉøªÄ3¾)&k‹ñþ–6þzÔðÈ~ýÀu[=“êÁUM-Kõ&­xSwÚ|ë,¿ †á‘¶þb€uŒÎ¿^ÛVjóÝ:IÖQÒ¨^¯Ãó<œÌ²znpè<½<׉Íèt:8>>Ƶk×ÏçÇÎ ZÕªªûûûØÙÙñ/”ðä|>gŸ}Ö_!m•C”MÓdÂJ±J2¾ ÿç ÑŠ¬%ÆsNk·?¤Ì‚Ÿ°:ŽƒŠì 7©~Ðj *2‘êô”å?µ®òÛù©PïSÿÿ~ý«‚øæ¬nB·Åf]1nYª:’¦edžªüßu]œŽ[*¼u“,fÉ¡ó‰¸`Û6jµ²ÙìÐê_ûûûÈd2°mNÏ>ûìJ_8›ÍNŸÉdâ›K+«2Å$‰øöë·"Ñ ­+Æý%<øóü.ñ2r»¹é Ñ+4·¸cÛ{ÀW…nìÏcýèo|Tý¾LÈzžj¨‘á iÊ(ÖQ~§m¹˜F£V«…J¥‚B¡Ã0&olì ­ÖKëwáÆ0Msh8 ZXŠÅ"vvvP«ÕpóæM¬{¾kl¸°iȲ,IË9‡zѦë‰ôì(@ÍUÔþmŸÂôJ=‡gQ 9ŸÌo½ý?ÿàÆ/ã/ã{ñ½ó?© W”_*6ÖP¢ÒÒNâ8ªÕ*LÓÝ¢FzN=O]3i<\èõzxæ™g&>è™gžÁ‡?üaïœMå#cež4ãºîðÞ”¬¤Ð¦sÔ€Š×üá¿_úo¾4Ø‹Rz“Z-÷¥` Ƚgôöÿðÿþþ(‚™ÉÞÁŽ3è bƒ<€Áê¿GGGjJ |_8Î`z`¡À¼`là±;Î÷g¢†‹Å¡áÁrÿýû÷7:a0y³y¢ %\iÃWÕ'üñŠjxô[È=(•TeãìŒC¶h#¸¿Tö–~E¶Ìà¼TJÇQþIuRŽôª:°»«¬\æêØq…£ÝrPóò-àþÜçó=ýÙ)èðð0éÓ^9™\M¤#˲f[åŽhSäTCxx%U$$«•ÊèP.¢3rfôvÛ¶‡‡+Îôdì¢ôIríG™«ê÷ª Ž>XŽ$¢¹þO«ÿS†Ú~KÖ™00º—k'ÀƒöàÎ|/¡×ë¡ÝnOœ\Ýn·`¨×uÓÈDk"ÝHC G6FôH-µú+WT„å:m˜Ê·bìü뱋À¸.P­ª.+Vº)å\wým)®ë¢Z­â[³ÙѹªlØYŒ µB¿ 5 ÇÀ`«­ †·Ýʈ±GýMù|Íf½^/ò½^ÍfÅbÑßTxÙ¶­V™ïA¤ ˲†cvw“>$¢åxrѹ¨çyØ}ì1Ua²J›hÞÁ0¶=MÀd•6ÀºWn4(•J8Èåð#¿ú«08¢r:ê»Vþ_‡Úÿýÿ— ’UjŸó#¨„4¡¾‘ ׯ_ÇÞÞžzê)ììì %¥wîÜÁíÛ·ׯ_OæWÀ², kXå ­´Z-œ nð¼ÅŸŒ( lÀ{๣ŵçyøÚ¯|…óUI;Cû YPìä„É*mŒuíéy~hoWåWðëø‡xÀÕ±ÃZP‰g •ˆæ zMsô ±ö”.êB>ŸÇ­[·p||Œf³9ò€\»vm£[jµZ8Îüf “&ÇA.— }a«"éà°Z€Û]´Ãu]ä¹'ijhjGpõkÆ;mˆuõ®~ég~Î_ûkØ{Ë[ýÿGµà¯“A‚z•ˆ0HNÇÉ!±žÓY]Ô*Àׯ_Çþþ>:Žg>Ÿßèy«À`»^Ⱥš}ˆÖ`(¶ßl…' Œ›å&19Šh ¼àèÛf²JÉqâïzõ{¾¶mãÑ¿ö×ý‘Iú”×KVâu0ÒëB ÛTâ)É*0ºÒ†ºü%“ÉL\|i,¶äy¬Ð“6lÛn‘çp`Ò„ã¨5•Âæ^E•hCø «,°trÂd•6NÜK X–…ú¯ü N~áÆ/R¦‹*Tb*³¾džijeÞ¨Ó×´=÷ÂòO‘n#sB8d’43”°®£i“h ¢ÃxžÇ„•´å—冡zVY– ©×ë°m{t`¸PCy]¨R@mÜ.6…sK×Eû„ÕuÝá =¿H#Žã î†bŒÓ†sÝè0vGÿuÚJCà ƒ=«D€ßhå2ªý-ÍNOOõHVm¨äÔÅ`SÙÓThÚ[ºˆ7%}qò7˜Üô!­”ëºÃ·ã°¢CoÜ¢žçá?ÿyU!ÒcˆZ-àÒ%µ³G­æ'«GGG›‘¬zÌ3—wt0Þ{5ì·†Ô/~”­{XG¾Ö½1QÌrán¨\Ž1NZˆZWÉqìüþï'}hD+çrº‘êQ­×U=¦?,¾Z­PÉjjÉp^§ÿ#U³2óLï…þ¦6ÛS“¢uÂjÛöpKŒiruIÒ†ã8£-i.ЉfT.¿ïo~3eH;¶m«é2lz‘ˆV©^W ëÁ_WOu²êa°oijøî¸…hiZ'¬žçÁd‚JšòN¤9Çqð ¿û»LXI+®ëÂu]5"¬^W#eâ^j•(MúC~qvæß”êdu*a=íÿ~ºÄsÑL´NX]×eÂJÚÙÒ†h |í+¯°÷‰´bYÊ2¬Àu'1 ÒQ¡0Ô™ªdUEr0X±—=©k§}ÂJ¤3&¬´-¤Š«`“nZ­ΤgÉu9‚€¶ZâÉj½ÿ¯Õ‹ ¨á¾ÁA¼D×NÛ„uh;ÇQ+q˜ iÄßÒÆuÕ¼Çj5VèIK­V ÿÓw|ð&­·§-#‹CnÄʧD1«×U¶¸ÖdUL’Ed`f\±7E´ýæ÷W.•Ô¹‡ÙV\×…aÛ*Æe“UÒ”eYø‹æÏp80i¥Õj ¦.Ù6ËpÚž\¹2tS«Õ‚mÛëIVƒ»\Ö1X< P « &«)£m«ã8øÎ¯þjõËÉIÒ‡C´r¹\N-ÐÑ_úHW–eÁ4M¼í÷~+½“V†*èòNÛ¢ZêDjµZhµZ8=yõ"@ j¸ï)TRÊa#h›°º®‹â/þ"ð¯ÿuÒ‡B´rŽãàoÞ¿Ï^UÚ –e¡R©0Y%­Ø¶=¼0¤ë2ÆI®«~úIÁd5¶¡ñ.TOª5ô——ÙÆÑvHðŸýÅ_Ä[¾ë»X™'-ýçÿïñgÿ¯ÿKÍY%Ò˜ëºp‡+¾“vÇ^8ÏqXg!ýÙ¶ß»jYV¼ÉªµM @ªW•_%IÏ„Õó°óê«jóa" =þOþ ~í/þEÎç#íµZ-µ¸‘fü½WE¥Â„•ôgÛ€iÂu]Ôëuœœœ¬6Y•mh57µàTÂJKË„õßýóŽö·}+ó¤­Ï=ú(¾òôÓIQìlÛÆn©”ôa­ÜÈ`. IÛÀq€B»»»888Xíö|.Toªìj™{T5¡eÂú5ßò-xõ¯ÿõ¤ƒ(67ßò–á–y" yžZÊñT*jk2"MxžÇ}´iûô“Õz½Ó4Q^E#M ƒ½SsP½©ÜÁR;Z.ºT(X™'­¹®Ë}ûH{¶mãÿóæ7…{ŸH+þÖ{DÛĶñÅ?úGaYîÝ»·Üsµ4 zP™ jOËV"Ý-]Ðm€ßüùŸÇ•ßüM®G@Ú±m{ÐÃjÛj_J"Ý•Ëøÿ½ûÝËõ¬¶\‚ö{ àÛ~´Ç„•ˆˆRé?ùI¢•s]Wõ°–Jj§xЖ°mGGGsüT’ZƒJRi+±‡•ˆˆRçµ} í~0éà Z9?Y•…Ä8?›¶ÄÜs·]¨^U^"[=¬DD”.®‹?rv†?hµ’>¢•ó+íõ:pzšôáÅÏóÛ†Þ{xš€³¤žÒ€=¬DD”.®‹F.·ø<'¢sãóŸW=«ÜÚ†¶A½8Ç™¼à’ µòï%ÜÉŒØÃJDDébšøùGAƒóúHC/ÿÛ‹÷v»ÀýXÒ‡B?YTìàÎ¥KÑû{ªP kjQ%¶åPV""JÛ¶¹G%i뇿áð¦¿úW¹Ðé¯ÑPÃON`Ûvô¨*Y­€û©ÒXLX‰ˆ(U<Ïãp`ÒÖãÿïl!ݵZja±þ<í± ‘ p/UšŠ +¥ÊR›Ê¥“UÒã¨ÞÕ³3À0ày,ËÂÙY¥T‚Zp’ôÁÒ&à¢KDDDDD´<Ïvw“?Y-•J¨Õj0 C%«6Žˆ§90a%""""¢åypp pW®\ÁþSû¨äúT+P½ª\T‰æú„µÓé Ûí&}D±`|“îã¤3Æ7él¡øÎå€rŽã`ww¿ð—O7ŸV‹+-(µsX»Ý.jµ:À4M4¤‹h%ߤ;Æ8éŒñM:[6¾ÇÁßù¿ÿüò»™_Ïp›ZZj{Xoܸl6 ÇqðüóÏ£Óé Ùl&}XD+Áø&Ý1ÆIgŒoÒÙ2ñíº.Ž¿ë?÷‡?‡ÌßÍpø/­D*Ön·‹v»k×®2™ vvvpÚ_›h“1¾IwŒqÒã›t¶l|¿ãÁ;Ðx¬·üë·\ðV$• ëýû÷ù|Þ¿-ŸÏsžHßÞÞ^Ò‡°Ífív;éÃX9Æ÷dív{kz*t½–ã“éú¹‡± ß>,¿7ß²ñýö?õv¼õÅ·jÛ«ªëç–¶ò;•sX']½^™Lfäö;wîàŸý³†O}êSxÿûߟô)ÄêîÝ»[qÁ¼ôÒKxá…ðö·¿}¥Ïû…/|_øÂðÖ·¾ûûûk?¯Eâ~í×~ ßñßÇ{ ï|ç;×~ÜëòàÁôz=îŒÎ⺖ïÞ½‹×_=±ób>ËðåŸ÷Áƒø†oø†)ÃY~ë'îòûÓŸþ4¾ó;¿síçÅò{2–ßË‘:ø¼åw*Ö^¯7ö¾‡F^,O?ý4vvvÍf“>üØu:¡–/]u»]<úè£c¸eôz=<|ø0‘óZ$¾àÿ㼟»|6¼–— ŸŒeøò6­ gù­Ÿ¸Ëï¤â…å÷d,¿—·HùÊ„uR Œ»2™L,ojmÃ… Ö‚/ÉxY$¾§ýNx-§ÿ¹—ym–áÛs-³ ŸíotÂë8ýϽÌk³üÞžk9måw*ç°^¼xÀð°„n·»-7¤?Æ7éŽ1N:c|“Îß”F©LX³Ù,ŠÅ"nß¾íßfÛ6J¥RÒ‡F´4Æ7éŽ1N:c|“Îß”FœŸŸŸ'}Q:öööÍfýIÞ‡‡‡[3ä€ôÆø&Ý1ÆIgŒoÒã›Ò&µ +€¡•æŠÅbÒ‡C´RŒoÒcœtÆø&1¾)MR°ÑöJåÖm2nSÞN§3q/¬eï_§N§3q‹eÎ%MçI£&m:­Ëçg|§í\iX\ñ=Ëýë´LŒoÒyÒ°mˆo–ßÛkâ[ŽE‹òûœsÿþýóË—/Üö}ß÷}ç—/_>¿|ùòùÇ>ö±•Þ¿N/¾øâù“O>éË“O>yþâ‹/®ä\Òtž-*¾åv>÷8ã;mçJ£âˆïYî_§eb|“ΓFéß,¿·›îñ}~®_ùÍÖt»]ضZ­6rß7Ífá8žþyt:4›Í•Ý¿N{{{(‹þ±‹Å¡s^æ\Òtž4lR|ú|îqÆwÚΕâŒïYî_§eb|“Γ¶%¾Y~o§m‰o@Ãò;±Ô‹}ò“Ÿ<ÿþïÿ~¿uBH‹O°ä'~â'Ο|òɕܿN/¼ðÂùåË—ÏŸ÷oËçóþøðeï_§|>?²º_&“Yê\Òtž4j\|ËÅpš>÷8ã{–û)9qÅ÷,÷¯Ó21¾IçIö!¾Y~o¯mˆoy]ÝÊo&¬)2éÃîõzKß¿N™Lfhôn·‹f³‰d³Ù¥ÎåÞ½{©9Oš.Ÿ{œñ¶k™f§Óç¾LŒO»VÓtž4;]â›å7EÑés×±üfš"“>è‡.}RçÔl6ñáÅb×—TœRžIDAT¯__ú\_}õÕÔ'ÍF·Ï=ŽøNëµLÓéø¹/ãÓ®Õ4ž'M§[|³ü¦ ?wÊï kx¿hFÁîõ°l6»ôýëÖétpãÆ d2ܺukdøÀ¢çR,qóæÍÔœ'ÍN§Ï=®øNãµL³Ñís_4Ƨ]«i;OšNñÍò›ÂtûÜu+¿ÙÚ"/^0<,¡Ûíú°ìýëV«Õüqôá_æ\Òvž4;>÷¸â{–û)tûÜñM;OšNñÍò›ÂtûÜu+¿™°¦H6›E±XÄíÛ·ýÛlÛF©TZÉýëdÛ6ºÝ.LÓD»ÝúYö\Òtž4]>÷8ã{–û)túÜ—‰ñM:Oš.ñÍò›¢èô¹ëX~?r~~~¾öw’ív{{{pÇ¿­Óé`ooÙl½^™Lfh¥¯eï_—f³‰ãããÈûä|—9—´œ'߀Ÿ{Üñ=Ëý”¬8â{–û×eÙß”ó¤h:Ç7ËoÒ9¾=Ëo&¬)ÔëõÐét`h•¯UÝŸ&ËœË&' Û–Ï}›®eئÏ}™ku“Γ¶%¾·å ¢µ[eì;ŽƒR©”ô)ÑkµZp]7éà Šc}½˜°ÑÒ,Ëb%ž¶c}½.$}¸®‹V«5r{¥RA.—0hÉÉår0M¶m£R©øµmÛ V.—Q(üûêõ:jµZ­<ÏC¡P@¹\öŸÓ0 Ôjµ…O4Í,1FžçÄã›6Õ²±Œÿz½Žƒƒ4¡kBxž‡F£1rÍ-jR<Û¶ ×uaYÀ4Í‘•Ûf-ƒY^SRVëžçù±k*• ÃÀØž{XSÀó<8ŽãÿضíW^ÔF€jÑÙÝÝõ/@UtvwwýçºråÊÐE&÷{žÏó°»»;4 ­Ñh ^¯/üx¢i¦Å8T«Uÿ÷`Œ1¾i“-ûòû¸øo4¨V«‘-ýžç¡T*ù!¢U˜%žƒ¢btž2˜å5%eÙX÷<—.]ò-Ëš²ÁØžÓ9¥Ê¿øÅóB¡p^«ÕÎÏÏÏÏïÝ»wàüÞ½{þcr¹Ü¹išC÷Ÿù÷†áÿàüèèÈÿ½P(œW*ÿ÷Z­æ?ß"'šG8ÆÏÏUÌ?:::7M“ñMZ™'öÏϧ—ïá¿====à¿N0®‰V-*žMÓ-"ߤŸ„ñM›nÑØ–ÿr¹Œr¹ÌZ™iñlêõzäv D›dÙX—-l.]º„R©„K—.!—Ëqm=r~~~žôAÐx2ü+—Ëùû°ÊMÁJKpFF¤ Æ7m3Æ?mŠ`%¸  ‘nfu!i§2- kʹ®‹K—.áìì …B®ë¢T*¡V«q <i‹.¥\pH°çy0 •J…É*i=¬DDDDDD”JÚô°þ«õ¯ðÓ?ýÓø¦oú¦¤%vÛ²¥Çç>÷9¼ãïÀ;ÞñŽ•?÷k¯½†?øƒ?Àþà&}š3û›óo⛿ù›“>ŒØ½öÚkxíµ×x-¯à¹“>Å™± ×OÜeø£>Š¿ówþNÒ§9–ßú‰»ü¾qã²ÙlÒ§9–ßúI[ù­MÂú‡ø‡øº¯û:\½z5éC‰Ý /¼°çùòË/ãþÑ?Š÷¿ÿý+î»wïâ7ó7“>Ź<|øp+>÷»wïâ+_ùÊVœkœ×ò /¼ôéÍ…e¸~â.ûÝnÒ§83–ßú‰»ü~ôÑG“>Å™±üÖOÚÊomÖ·¾õ­xÏ{Þƒb±˜ô¡Äî{¾ç{¶â<àâÅ‹±µ0öz½¤Oo.o{ÛÛ¶âs¿xñ"Þÿþ÷oŹÆy-¿ýíoOúôæÂ2\O,Ößú‰»üÎd2IŸâÌX~ë)Måw¢û°öz=t:±÷w:jA]—¤a-ŠÅâÆ ‡‰Âø^L6›Ýš/ƒM¿–ã‹ÙôÏ}V›\†3¶Ãò{s0ƳéŸû¬ÒV~'ÒÃÚëõpãÆ o¢|>ëׯ#ŸÏºÝ.jµš!™¦‰F£‘ô{E4Æ7éŽ1Nºbl“îã´‰éam6›èv»xþùçá8²ÙìÐâ 2ÑÜq<ÿüóèt:h6›I¿WD3a|“îã¤+Æ6éŽ1N›hí k¯×ÃíÛ·qýúu|þ3Ï<ƒ‹/P-;ív×®]d2ìììàôô4é÷Šh*Æ7éŽ1Nºbl“îã´©Ö>$X†äóyt:ôz=äóyìïïîß¿ïß/òù<ÇÑÓF`|“îã¤+Æ6éŽ1N›jí ëÇ{{{þ…ñðáCN½(z½ÞØUÓÞxã ¼üòËh·Û[3áŸÓívq÷î]¼öÚk+î¸â^ýu´ÛíXWm#=´Ûm%Ú0÷ïß-a+¾Uá¹sçŽÿ%C4Î;wbÛòƒe8%-®„•å7¥”ß«Žo€å7%Oêà©OXe³]~O?ý4z½ÚíöÐ0„°I­’?þ8 …ÂÐóE)‹øÈG>‚oú¦oZùsÇßðÎw¾ûûûl½¤©ö÷÷ñÄOÄòÜ,Ã)i;;;øÈG>‚÷¼ç=+}^–ß”R~ÇÑÏò›’&uðyËïµ'¬“†7d2™¡‰ß¢Ûír mÆ7éŽ1Nºbl“îã´©Öž°æóù‘%´›Í&²Ù¬_±XÄíÛ·ýûmÛF©TJú½"šŠñMºcŒ“®Û¤;Æ8mªµ/ºF{{{þ!¾Åþþ>öööÐn·ýIÞO?ýtÒïÑLߤ;Æ8銱MºcŒÓ&J$aÍçóxöÙgýÉßá9Óî'J3Æ7éŽ1Nºbl“îã´‰IX5V~ÒE0í~¢4c|“îã¤+Æ6éŽ1N›físX‰ˆˆˆˆˆˆfÁ„•ˆˆˆˆˆˆR)±!ÁD¶ P(,þޏ®zŽ\N=§m«ûLSý­šç©Ÿ\Nýë8ƒXö< Ñ~|¡0ˆÑiGÅ°ç©Øö<àôTÝW¯«ûåwÇQÏiI¿#DDDDñ`ºå$¹TåØuÕÿs9 RQÿ¯×Ue»\žýyeôpE;Êɉªp7êßr9ºîº@«X–zÞ\NÝæ8ƒ¤7—SIªç©ÇU«êùLS=Æ0Ôßüïÿû{ðøãoOúíßH®«ÞGÃPï½e©X 6”Ëó5D8ŽúÌ€ÁßÉç|y!$VÔó4ê'ê¸ÊåA¼O:ïFC_.7øÏS·ÉqÆð1JB+¯}t¤n7Ø”JƒÆ—BaðÁã=8>&ÛVç(×K¡0HrÿØ[ýgODDD´nLX Ô`&t©$KeÖqT² Wj%W©÷<õ\Á„R^§RQ•ÒjUý½Tb%I4ŒA…:üürì’|†Ÿ_*ÐÁ„Ñ4çõ7r›mggêwITE¸¢E^ÇóÔëïî’˜àùŸ ޝ\N¤s¹A/V¹<ü>JO”ë½ÞWáñÇWºjµ=Ø€úWÞ[Iåó‘ÏCÞÿFCým¥¢>‡pÏbðïäš ÷NÊߘæhO¼ã¨ø7ÍA² '¦9ÜøQ«©ŸI¤‘ãà`pÕëƒÕJepŠ\n4¾ÃIu¸±GâRRõ¼³ôÀÏ)x]ærÀýÐò1@DDDìdu¹=œGª~%yÓË/?\î?ÎõšLXç`Ûƒä3Øc þ­T†{j …ÑäÍu ¯TÒ¥×*—S·ïî*ÇQ•XÃ$ B^G†“©Ô^¹2œäÍšH+àR1ölÆèyÎKz–ÄÉÉòŸ™aŒ&)¦ <úèK¸sçÁò/ ÛVÉÚ½{êwÂUðˆplHϼ${’zžŠeiÔi4Ôï’pNëÝ 6Dȱžž_?ïy“?`´Ä0Ô±­zh¹aŒÆ÷"ÇÄáﴩ•M3ïH "¢MNƒÆÕÂSˆÂ¤1RGZä ’<ö˜ªWH]%ØׇG‹±¥»ã8( 0 ïœ{ý&OcL ÑjµP¯×‘Ëåð-ßò-Èd2IŸÒÚ‡‡‡ç`Òì\×…mÛÈår~ìyž'P£) p]ŽãÀu]†r¹Œ\¿@q¶mÃó<¸®ëÇ­<¿a0 cj<†ƒƒÿy[­Z­Êå2,ËÂg?ûÙ¤ß.ÂhÂ$¡VKUD¤Â"­æ“*R‘–ŠR°‡F†v_GZãe¨­ô ´Zê12Ï¿RN&£Z㣦YŒ;7yä;¶ZL˜Å¸QÙìWÐíÎ×BOñ’¸ &¡ûá8­Õ1(ëWW—NIX%>%nÂsævwÕï2êF`ñ9Ë´¥qIÄ´ÙK¥Áb³t@„{æYÔ’f\$R¦#ƒF1YÐQ(Ÿÿ,‰\¸/ΑYqv,¤ÝV'¬ÒrœË /äC‹ñ<¶mÃq˜¦‰\.‡\.7”h+÷ò¯iš~…ÞqXýZ—Ü/wIP…mÛ#þà놓“†áWæïõ[$þé?ý§xõÕW“~Ëb%s¥ƒs˜·M0F …LÓôcÐu]?Ù 6|×uaY<Ïóc;—Ëù i0ö$•Û Ã@.—ƒišp»R›ê?VŽ€ÿï¼,ËB©TÂÁÁ<ÏC«ÕÂéé©ìíí%ýöo€á”8±mÛo$‘Ø•8kô›Wƒ ¦6kËåp²Éöœ¤E|+9·Z-¿çÒ4M ”Ë塨 ¯ Î÷”^|ù»ƒ\z+¥çtUÊå2LÓä¢E[âÒ¥áE5¦-1 +ó´ Â+ž:Î A 7pr7€Õ“i%ã/¯\¹Ïó`š&>ÿùÏ'}¸©$S$fÙMƒô²u «ã¬w˜dT2Yžc²ƒ,Þ"?Ò« ¨DAÑr¹ŒZ­æ‡¬×ë(—Ë8==]IÏN¡PÀÙÙ†_±§ôÙÝ]ß\‰3‰ Ã0üø»råŠë–eÁ¶mxí< %ãÄÕ[5—•ôœÊyÚ¶=XlFæ>Ëÿi~2B-¼pž|¯ë{²6‡mÛ(—Ë~=Î4MøS­VýÛ,ËÂÿð'}𩳻«åOOÙH¸¶.a]W²êº®?l¶R©Œ$“~âéº.†Ÿœ†+Ì…BÁŸó9­¢^.—çJˆçUKz’ % ÄQ /œÕjµ`ŽŽŽ†âUz÷Ðjµ`YLÓ\xþ'ѪÈv*’¨2Y]?]\Ù=¸øÞ42jÈ0 ¼ûÝïÞªió’ùβÇö¦®×Ž™àí²Á4²º{pm ™2$ §ZÊ;8EJ¦-É:ò=gÆØ¸•ç´m;ò;°ÑhàÊ•+888ð_[F•Ëe<ÿüóI¿õ© ÃÒU·ÙĦÕت„UYZ5im“^¤GIæ| I&]×EµZõçëY–…Z­ÆJ=-%¸ ˪È|eÏóü!å€Z€hZe1ŽyÉDË89a¢šÏóP­V#Wt—5êý%˜ƒ£$©Éêéé)<Ïç?ýi¼þúëIŸZ*5êû@ªë.Š£rlµZðýT²£ M‘½<¥2¿ê-5ˆ–5ψ¥F£áù•¹@Áø&Ú4ž”Jªâ¾ kÃÉÐY`”Ê"/Á¤S’Pé­T*þõÏ^¨Í%+ÿJãÌ2mç2{\ùoÛ¶¿uY01µmµZ ÷´ðBx¬~Ð$s%¬ívÅbqèwÛ¶‘ÉdpõêÕ¡ûÒÆóf¿dH‰¬tÚjµX™'m8Ž˲ü¹Ó²ð+´Éd®ªNqžç ­l*ÛhÈ ¦ÁÔB¡0qË(Ù.£Ñhø¥ÍdYƒy~ËÆ»Œ®‘Eõd¡+¢kY–/áPö|Ó2‡ áÑìfJX;jµºÝ®ÿÅ)-kÙlp||Œëׯcgg'ésZŠçy°, ggg8‡6G½>[/k½^¾Ç†Údž§z›Êåô%«²‘$”²w£$ž"¸™LG¦i­¸^}uqo{FëQ.¯f¿mIV¥AÞu]…^#óL‰VÁ¶UYͪ5Íc¦„µV«!ŸÏûCB• æóy"“ÉàøøÍf3µ ë¬Kb×ëõ‰­ÔDi5˰w˲–ž+D”&¥Òz+>¶mÃu]”Ëå¡5d› !sü Ãð{Jƒ×¯AšG«¥†¯jAšp² ¨y ìy§8U«ê_4¯© k»ÝF·ÛÅ­[·Éd½^Nׯ_÷oÛÙÙA³Ù6œ³l/C¯¸…m¢Y{W¹Œ>éäè(þ¹O’ÊŸO}’Ôj©\¡ßjµ8T’6Z«5~hdxn6Ñ&³mÕZG8»®‹F£á¯Ó ‹“Ea…ŸVM†þšæjU’=O]×Å•+W€É*%Âó’>ÚdSÖ|>N§ã'©½^¶mÌUm·Û€‹/&}N‘r¹ñ½«²'%÷ð¢MfYãÖF£14çŽh“U«juÉU’ž(˲P«Õ85„ÖÊqT\ ËoS#êý1˜2rŒ#È(IˆB˘)a-‹ØÛÛÃÎÎÚí6z½žßÙëõpçÎܸqÅbÑ_58m&-ÊÑh4`š&‡uÑÆšÔréº.lÛæÜUÒ‚ô>­²ƒÈ²,Èü¤U¢¸xžŠëerÊV«5´ú´mÛUCDK“޽$§•Í´JðÁÁšÍ&NOO‘ÉdpýúuäóyÀÍ›7q||Œb±¸‘­w²í+ó´É'z뺨×ë¨T*ì]¥çyjÈä*ëàÕj®ëÍO%Z·IS–f!#ŽŽŽü=TY¯¡4a8n¦z½˲P.—Q*•üÅeB@åRRîˆB¡ào£&uQ×uáyÞñŽwàƒüà\Ç1SÂ*Ij”ììì,Õ³*ãnÏd2+éµ-•†/Û¶ýmzŽŽŽXQ¡Ø¬#¾áá6¶mûs—LÓä^«uŸ,B³ŠâÚó<ÿË—{g]±½,Ù’OF¿q¿]šÕ¦Ä8-O͆Π&œ’TÊO¹\öt`Û6,ËB«Õòÿ>—Ëù ær9+·z½Ó4aÛ6ürIve™ÇL + æ¨Þ¹spõêUëše¹Ùl¢Óéàððп­Ûí¢V«¡ÓéøojpØÅ>¤áß«Õ*ŽŽŽ¸ÈÅj]ñ&…ã›â¶Îo4€óóåY†¿@‰ÂÖÛ­–æ¾ÈPwÙ˜ó®i^ëŠqÇQ?l;_=I4%é N û=ÏJ*eqA!÷I¯è¤EÙfݾ­\.ûÇvpp0Ô1øè£â­o}ë\ç9SÂÚl6q||ìÿ~||Œk×®a©7¹ÝnûɃnܸl6‹[·n¡×ëá©§žB³Ù\êõÂïm.—cežbµÎø XDqXwŒ/›¬zž‡jçz¦IÖÛ²MÓ,‚[Õ˜¦é7¾Íc1îy*Æi6Ò)‰c¡Pú¾’ï2ÇqËåP(üÞÍ4å6«Ü}åM³<èøø;;;pŽã`ggÇÇÇ#ÛÝÌ£×ëù]ÅAÝnív×®] †#ïìì¬tÈÖ¼ÝÐDóZw|çrÃ-—žçqÛŠU’eø"ÇÁ•+W`š&NNN˜¬ÒXIÄöÁÁ콫Ò3âº.ªÕêÌ=DbÝ1¾ì‚b:’!³¥RièçÒ¥K°,ËVkY®\¹âÁµmÛÿ.»wïNOOqpp€J¥¢u90µ‡U¶«‘à€ýý}ܾ}Ng¤efV7nÜð·Æ‘apÿþ}OŸÏçÑív>É𠪬ÌSÜÖß@tË<+ä§uǸç©!Á‹Tzd0÷Ÿ¤Y¬;¶Å¬E¶ì©Í2ž•TŒo£à¼Qé9u]×ohªÕjS¯eÏóP¯×qéÒ%†ÓÓÓ­ÛÙd¦V`x®j&“YêEoß¾n·9¼`ÒE1©G÷7ÞÀË/¿ì'ØAŽÃýŸh ÛíâîÝ»xíµ×byþ8â^ýu´ÛíÈç°m·¡v»Äöüë.ÃUŽ/²ñ|«ÕB£ÑÀéé)“UMt:ܽ{>\ùs'Q~@ËÔ©dk &«ú’ò{™QŒ“$Q~·Zª ß&¶mû½¥²#I.—ÃÑÑîÝ»‡£££™¯eÃ0ptt„ÓÓSœmt²*uðyËï™]ZåÀ=é‚xøðáØdù7ÞÀ+¯¼‚;wîDöúãÁ[¤ÖCÚ¸ÿ>îÞ½‹/}éK+î¸âPYø,¼Ø™m«!7R†qØûv»sçNl•¤ÊðBaþ†ÇV«…V«…ÓÓSVð5" ëª%U~£ CŽ#/¤/)¿§ÅÔ"’*¿ç™£½ ‚[¸H2j\×õ盺®»òQ=›œ¨ ©ƒÏkí ëíÛ·‘Édü¸Óéàþýûh6›ØÙÙ‰\Z[LZ‘øñÇG¡Pˆl1 ïoæ8+/[L S©<¬R\ñ ï|ç;Ç.z®Ì³'i»íïï óZ¥$Êp@5:ÎSl3YÕ—l¥·ê2<©ò˜mô@«Õ‚išZTZi<)¿ãØN&©ò[‡~"ÏóüïYàHVÖ ëužçi=Ÿt‹ÖÁgNX÷ööFnk6›¸yóæÐmÁ¥±£\½zuâý/^ ZäâþUX¡§8$ßÜ™ƒÖ%©—¨YêŽã ÑhpžÍ%ÉúÉ,9¨eYܺ†–’TŒor«¬ÊmYÖО¤ã0¿ˆÇÔ„õâÅ‹C .‰I­0“‹Å¡áÍf†ZeŠÅ"nß¾íßfÛ6J¥ÒÂ'¹ÌþfDóH"¾57„ñMëTŒÛöì1.ûl3Y¥y$Û³­ÊØ»JËH2Æ7-t¥zÖD•â55aÍf³+ÝrûûûØÛÛC»ÝF¯×C&“ÁÓO?½ðó…[v8‡•’´êøÔ‚2­‰óW)iqÄ80Û`٪ñ(qÄö, æÉbKDq‹«üÞ²HŸ ùÍårLTSbî9¬ív{hÜñÕ«WÞÚ@d2œÏçñì³Ïús°–y~`t™M·uÄ70\áá¶M´NëŠñY¸® Û¶¹( ­Ä:ËïiE¶leC´JIÔQÒÈqÔëuär¹­Ü2fÌœ°Ú¶üã#K^#›Í¢Ñh,^¯‡n·;±—U\Êd2IŸÓDÚmiSÇ Õ„JD­þ¿€`"ãôNû÷*¡mõÿÖ€ê]Eà>`¸6-¼Àùx$Ûý 9ŽÃ!d¤FctŽŸlcÃ(ÚdŽ3>ÏS×cÔÒÍQï…eM¾^ …Ñç±mõwQå˜<—f­Ž£N;xZ\˜tU¬´a½Õ\Côô¢ å8vwwqttÄDUs3'¬B¶¸ F¯×C§ÓA±XLúœ†XÖŠ[v\WÕœgP)-bÏNNFÿæýgàwþ<ðò·¨ÛÚ™ÀÂGöhˆTz¥bL"Mx® |ë£ÇW© Ÿð,³Ý+•Å–p WÚg~ofQ(,öZ*íwÛm|áÎùŸ3…Z­á¼f©B[5‰o@=q.½”e8a öŽ;y>ù{i„(—Ç÷‚Ë2ƒóžÛÉÉü½‹${‹Î=Xäµf½^ÿÉ?™ÿ¹S*fŽã —Ëé1Z†¶^¸Ø[ùà2k>§åï´""ŒßcxùçU+sq²í0wÂ:N§ÓÁÞÞœ”­r®c/ýE`Ûª"_.«'–Ê= n3M5ôUZ4Oúac°ï©µ¬Ö+÷Ë0ZO²C·K|ÿF^ó ÿš­þ}&€Gç?•õ°f2™TmiÓz'Pù3µ|?Lêm:ˆø}Z0ܹ1Ë<ÛðÐêþ<áÏì}ÁG–~Ë7]µZ…a¨Õb*ì “ºƒ •|Ic¸${r Jr'¿K<\éÿ~2å5eî6úÏlt’ÿ×0H`‡zó1ØÄ¿0Ø>sä|]Àî¿x à ¤_„º[-øŽ4ÌT¡®Å÷…æ1TàÞ‡ã{cùˆ¦&¬ív{dÕ^¯‡N§“ªÕ Ç —Üo†oh¹”@¨b8H_AOV¼_ž« @hÒ;)CÒªAK´ü«ºÙjÜß„+7÷0ü…s©Ìó5–…I¢*DÒ¨¤6ü¥´…6f•`zî-•ÞP¹ß0Ô\ó“9Z>˜¬j¡Õ´QÌEFXP•Òõ˜$ë0–5¼ø–ùÐÒNXç‰ý “ £†k:8#¢æhÏB†ÀÑÑ”aŽ£ªdÅj™÷,ð] ®åoêŒÌ’í#£H‡–,&ñ\,,˜@†ã+Ü>Ë{®¯…¯çpy!ß%…ÀÒÚrœ»»££üÇ"º¡¨VS?óŠqåæ…†ß¼yÇÇÇ©­4ärÀÁÝî†a¨ŒÖuÕþ©9°'•6“ ØàF]†6TåE[Ǧïõ1·OJÞ¤±Hz8åøÒY´D –õgP_„Vÿ¼Â[?Œ{ZýRF‡×úÏ--¶"8œGCáÞÕTm\˜È¶Ue]VÔU½Ѥs–{é¡‘†Uî5­N+-ø5¨k0°ÕÂùéÄÆ˜U¬¹áºêËòÔ갪<¥Ò< kµZÅ¿ý ü.Fë6iày£= Õêð\ÒBA]OiZkA3'¬{{ƒñe½^oä6‘ª-pdRu@ög³Àï~…DÏqFqsüÅ"â[–U—ýF¥AGfJÓ—Q_pý"õû †{Þ`8V°•»PXß¾¥ ijŸ/m”™«"áýBçeY@½®bÞ4ÕB,[6ŽÒí§þ§Ÿ‚išx÷ÿî¤e@xdnŠŒº Š»q“̰^¼x×®]¹=­[ÚDêï³ê8~ð?þ ð¥ïƒÚ°‹hsEmÃ8u¨¤¬Æ+Û!D-|B”2Ázµ?µc–5¹ÈâH«" ˜±ž³õ‚£pmÛV;Di`¶yÀ@ô8LÓT1œJk4ëv6žçáÁÿò?ðñHö€%1 ï‹]«±£+aSÖl6‹ýýý¤s9€ðÑ×>Š7ÿwoœw$}DDK‰Z¬Ô7K{vh[Hoªe †?®ºE¼…ÁñðÊÜW0XõP#k+å¢z== ¶â˜$8ÿ:œœ2Q¥k4øèù(Þýgî]•¡òå2{NSæMIÀZT€;ýÞý»ï¾÷óI ÑÒ¢VO&ûE­HG´¡ÂsXÝy–l4€+WToÓÙ™ª€Ì²Çe*±,ax —:ÔÈÿù¡¶)’­šN0º‚ë† )Cm§Æ)ƒ[¯Õ,Â;‘Éñâyj¸o©¤zîÝc‚J‰“°œÆu]ضrñøjµÔœÓR)úàÔëP©3óÖN§ƒããc\»v ù|~ìÐÃyæu:@>Ÿ{&“A6›]ê$=ÏÃ_ù¡¿‚“Ÿ=þÔûFWÂ#ŠAœñ- ˜9Ž3~8Q b‹qI Ë£‹ôÎ5$XöEåºp¡’ÔþëFnÕT€JN%y0¡ >ý´íXÚ먟„çgG ž´º;0Ø£\V‰*ÑŒâŽñ™c zWÿîÎßÅ›ž©M¶(“Q6å2Ò 4SÂjÛ6jµ²Ù,2™Œûþþ>2™ lÛF§ÓÁ³Ï>;Ó‹v:Ôj5t»]jØq£Ñð/šn·‹Z­æ_L¦i¢ÑhÌôÜQÊår¼[!õ­#¾Ž¢ëà9ÙË‹‹iPŒbñTïå² -Îs´ ’ÏI=YåÐýüJÑÎ:ë'•Ê”u¤·?jø¸m«¢BA5³̧­+ÆeÛÞIlÛ†ëº82€ÞŠOÔ¶U¢êºêbcƒÎF›©9ãÆ0MÏ=÷ÜPKK>ŸÇÎÎqõêUܼys¦ÝÛÛC±X„ã8xþùçQ,‡6 ¾qã²Ù¬§ÓA³Ù\èÇmÛ8˜uæ7Ñ’Öß¿âÜ7’kˆñ~O¦ã¨úFÐÈ(ÀÿzxÇÿ=[ö¾5ÄWÔ1Î ¨¤€Ãt·Þ:ë'cËq*>w¡â2ª3ÈóÔJ¿GG,ïi.ëŠqØšõz]½¶ƒÕ7zžªqÏS LMXÛí6z½žy晉{æ™gp||ì·ØL{>YÈ)“ÉàÚµkèv»èt:èv»h·ÛþÊÄ™L;;;8]po«Õº‰â´îøjµZÑ v­ÐÚbÜœÖè¶ÂC=P?ð»Àø?ÿO \¬PG*ý¨y£ÁC8è߯^Rê[gù- Vùó³ejÝ=ŒB^.3Q¥¹­3ƧÍ_mµZ( ƒ)Ë”ÅQ•ú«•© k§ÓA6›Ã^,‡†Ëý÷ïߟø|ù|‡‡‡C+“ÉdüÿÇÔçóù©‰ð8®ë¢\dc{ø±îø–eÁó6Ë’e¤µn·‹»wïâµ×^[ùsÇßðú믣Ýn<‡üG\‚úÚí6Y[f¡…"m[­òÛh V€?9Yù{Eñ:ø¼å÷Ô„5ŸÏ£×ë$‚ar°Õf’^¯‡f³‰øÃ(‹¸~ýºû8“Nî7ÞÀ+¯¼‚;wî Ý>²Ð{X·Þýû÷cKXŪãPž;wbh40MS –GÚjwî܉¥²wîy@Ë€Z½·Ïó<”.•€oêßö9àþ¹õ½©”q%¬"îòÛûA½Ñ…óÇépT…¼RQ[lÐV‘ò;®øâ/¿gòhܧþ×§pPëǶ…áíÃÆñ<`wW%ªµšJRg]žRCêàóÆ÷ÔU‚óù<òù<šÍ&òù|dB*_,gêuít:¸qã2™ nݺ52ô`œIKk?þøã( þ¸|`Ì6\)xëIëb¸qcUâˆoxç;ß9߀Šq˲pOV¿ oZI[iß_á1ë(ÃpM {ì—ÃG?ñQ`÷`ܰ³¶÷”ÒCzƒâ(Ã×Q~[&`þ«ÑÇüØÛLýDzTEœS<¶’”ßËné8κÊïqÕmÇqà|«ƒo­k4f›¿*8¼.6Ú¢uð™¶µ¹~ý:öööðÔSOaggg( ïܹƒÛ·oû›E­VjÑ ºxñ"Õe,Gðÿóð2ÚPìa%"¢Ôq]ÙuöÏ’>¢•²m[%¬ž”JjÍ .DC:i`¤w5r+'¢1a%"¢ÔùSŸø¾ú»¿›‹ç‘v<ÏSà µ›×$ xP -ö]­×ë(3Îi LX‰ˆ(]<ºÛÅ×üƒô‘­œã8øs/?ò#À½{IÑj9PCûmŽãÀ²,œ-ó¬´å8‡•ˆˆRå?ýÐá—.]bï*iÉu]\}þyîOz2vÝ«V«øçþ0 ÛNúÈhƒ1a%"¢Tù¹ÿê¿Â¿ÿîïNú0ˆbñ5/¿ŒÇåW¸—6i¯^¯ãéoýVäŸ{ŽÛÙÐR8$˜ˆˆRÅ4MnùAÚúáoøà¯þÕ¤ƒ(vŸùÄ'ÔTÖŸþiŽ˜¡¥0a%"¢TáJ’¤³Çÿò_æBK¤=Û¶ñ¿½éMxS¥Â­ÉhiLDDD´.LVi ô®_‡‘Ëq®6­{X‰ˆˆˆˆh%¾ô3?ƒü¯þ*r÷ï'}(¤ ö°Ñò\ø?ü°ËeÎ[¥•aÂJDDDDDË3 ü¿å[ð§÷÷“>ÒV"""""Zšëyxöõ×QàBK´BLX‰ˆˆˆˆhi®ë¢Â=†iÅR¿èR§ÓA&“A6›MúPˆVŽñMºcŒ“Îߤ³EâÛ4ͤ›4”Ú„µÛí¢V«¡ÓéP@£ÑHú°ˆV‚ñMºcŒ“Îߤ3Æ7¥Mj‡߸qÙlŽãàùçŸG§ÓA³ÙLú°ˆV‚ñMºcŒ“Îߤ3Æ7¥M*Ön·‹v»k×®2™ vvvpzzšô¡-ñMºcŒ“Îߤ3Æ7¥Q*Öûý†óù¼[>ŸG·ÛMúÐRa[Z¹nß¾íGÑ ã{²N§ƒÛ·o'}k¡ëµÌŸL×Ï=Œeøöaù½ùß“éú¹‡¥­üNåÖIE¯×C&“¹ý7~ã7ð³?û³pGû¥´?ó™Ï$}ká8>ûÙÏâ=ïyÏJŸ÷å—_Ưÿú¯ã‘GÁ~û„-ßpïÞ=\»v ï~÷»Wþž¤ÉË/¿ŒW^ye+¾㺖ÇÁoþæo&v^,Ã'c¾œÏ}îsxå•WðØc%r^‹Ä7ËoýÄ]~'U²üžŒå÷r¤>oùÊ„µ×ë½ïáÇ‘K>ŸÇ·û·ã‰'žÀûßÿþ¤O!V¿÷{¿‡«W¯&}±Ëd2xüñÇñøã¯ôy_}õU¼ûÝïÆ#<’Èy-ßð§ÿôŸÆûÞ÷¾XÞ“4yõÕWñꫯjñ]Ë™L_ÿõ_ŸØy± ŸŒeørÞõ®wá•W^Á;ÞñŽDÎk‘øfù­Ÿ¸Ëïw½ë]‰œËïÉX~/Gêàó–ß©LXƒÃÂÆ-­}ùòe\¾|9éC_‹b±˜ô!ð<—°H|À?øÿ éC§‹+Æ“¾vX†O–ôç³éç™ôû·H|³üÖã{€å·~Òvž©œÃzñâEÃúÝ.÷9#-0¾IwŒqÒã›tÆø¦4JešÍfQ,‡&îÛ¶R©”ô¡-ñMºcŒ“Îߤ3Æ7¥Ñ#ççççID”N§ƒ½½=d³Y’÷áááØù}D›„ñMºcŒ“Îߤ3Æ7¥MjV@Mü–%•Ó6–šhYŒoÒcœtÆø&1¾)MR°ÑöJåÖmÒn·#oït:÷ÂZöþuêt:7^æ\Òtž4j\|ú|îqÆwÚΕ†Å߳ܿNËÄø&' Û†øfù½½¶!¾åX´(¿Ï)1÷ïß?¿|ùòÈmß÷}ßw~ùòåóË—/Ÿìc[éýëôâ‹/ž?ùä“þ±<ùä“ç/¾øâJÎ%MçIÑ¢â[n×ás3¾Óv®4*ŽøžåþuZ&Æ7é?_.†Óô¹Çß³ÜOɉ+¾g¹–‰ñM:O¶ ñÍò{{mC|ŸŸëY~³‡5;;;8<<ÄþþþÐí÷ïßäóyÿ¶|>ï_öþuÊçó#K Ëñe2™¥Î%MçI£ÆÅ7°\ §és3¾g¹Ÿ’W|Ïrÿ:-ã›tž4lâ›å÷öÚ†ø–×Õ­üfš"“>ì^¯·ôýë”Éd†–Aïv»h6›ØÙÙA6›]ê\îÝ»—šó¤ùèò¹Çßi»–iv:}îËÄø´k5MçI³Ó%¾Y~S>wËo&¬)2éƒ~øðáÒ÷'uNÍfþð‡Q,qýúõ¥ÏõÕW_MÝyÒltûÜãˆï´^Ë4ŽŸû"1>íZMãyÒtºÅ7Ëo Òñsשü¾°†÷‹fì^Ëf³Kß¿nN7nÜ@&“Á­[·F†,z.Åb7oÞLÍyÒìtúÜãŠï4^Ë4Ý>÷Ec|Úµš¶ó¤Ùèß,¿)L·Ï]·ò›=¬)rñâEÃúݮËÞ¿nµZÍGðeÎ%mçI³Óés+¾g¹ŸÒI·Ï}Ñß´ó¤Ùèß,¿)L·Ï]·ò› kŠd³Y‹Eܾ}ۿͶm”J¥•Ü¿N¶m£ÛíÂ4M´Ûí¡ŸeÏ%MçIóÑås3¾g¹ŸÒI§Ï}™ߤó¤Ùéß,¿)ŠNŸ»Žå÷#ççççk' Ðn·±··ÇqüÛ:öööÍfÑëõÉd†VúZöþui6›8>>޼OÎw™sIËyÒxQñ èñ¹Çß³ÜOÉŠ#¾g¹]–ñM9OЦs|³ü&ãгüfšB½^N†VùZÕýi²Ì¹lÒyÒ°mùÜ·éZ¦múÜ—¹V7韘¦éßñâʼn‹ egggglkÍÎÎŽÿš™L/^Œåu:nÞ¼‰«W¯js¡Ðì–‰ñMˆo€1¾Í’Šo¹Ÿe8ʼnå7éŒuðÍÅVZ¹f³‰G}wîÜÑbÜ> endobj 2 0 obj << /Length 3 0 R /Filter /FlateDecode >> stream xœÌ½K¯.;r8¯_q†ÝƒÚâû1è ´„6dHmÐÃ#¹$¡íkµKî_ßkEd&#È]÷Ö9W¹QW°ë|‡'“d’\Œˆÿþ›ø-àÿ~Ëÿé3}ûûŸ~>bªò·ëOøëÿþ›ö'þ"õ!ÇŸu”Ú¿ÅR¾å&ÏâïúMÌeâÿUÿCŒøsX`Ó?ýæÿúÍCïÿˆß%·þñ~ÿí~óþi=þë½!öú-åŠñç’äOÿUþÔg‚FùÃ?ýæO~1>ÆÃ¡ýãoþãÂ?úÏ¿Ißþúû¿ÑÔJ9û÷ÿiß÷R?˜ÆOkboô’rÿhéžÑú•?r!Ä‘_êoÿxú}~¼Þí=Cù²ÏüO›s†ZÞ­®çõç£Ó¿øËø-¶žíßý† ïÿÇŸJ‹?Z)qàÑšÆÇÈ=Tþíïdü6ÅŠ¾ýî§oÿñá7þ_¿ý§o¿ûw¿ùß'ÓýžRéc¦ÜJõ{újG_oÇç࿲‚×aù<ÚÙ4Ý #ö—a&Žù1Ò¬¥„¾÷ìÚ^B)­Î0FÌÇlÛ{C˜ó£ŒÑòÇlÓûÛ+|ä„~[}sÝÙá¶î®ç/Xw‚_w?„÷×CðëîFðÙºÿ2*ljnF#q_Ïô‘rô(!þ\(ù½pì;I1}ÄZjË£œhœîNâ¯êeæô‘FÅDÆÏô‘~U­¦[j1äŸé$»NäÞ¸˜ü½ùÛðä¿Ô¿ývuhFbÿz Xí’"ƒ•ÇÒ×\sNç@þ§êHþê/ÿúoþöïþâïÿù¿?†d¯òŸý½Ìˆ5i~+BPì=â{=gÞ¬¿-Ú÷¿üãïúéû¯V2ÂEÄY cj¹|K „ÈGÄÆ?ZG[jã[œã#¶!µQøÛ_´ð“¶‚FèUšðFJ¡áã”ú‘J£w FýA°mr;׊-Ð1"iÃ+9Kì–ö1ðzÕ&¼r°7ìÉ QÕVR-m)ÄΑDì¶Eü‡×ViÃ++^ÙkC[øèeÔž³4á•Iý»1¶Påü§ˆ7¼±b{¡ç‰|i Ç%¹ò1”ÞKÒǰ7ã}“ðí9•Yùµ$¥ÎqüœòU¡4y#>B¼>vDí1ã¤Èà!z#aMg´ÊØ1lÂþè\˜ €§FÅg2ÞÆ1Ԙ؀>ZϱI^Çei=jäÉŠ‹¦?8)œŒÊán" xÝý•p|GÁܪöTxÙSÏß°ªc R”×UüäŠtìl4á›çÙ³¼°â…Dní³ãàÍdëÀžÇ?Ÿ)LŽÕð¢{o¢¥ZZ,ò±^toMnªÜ±w¢¼¨áEøc™hÄ÷+YÝñB¬T9lÓ‰(Ÿ´c£ê.Šlh¦¼G¤r!Ð=ûª-\uõ†Ì\7&4À­aÞÒ„Þû’ƒÇÑì]”`àG½·%¿]i½å&SæÏõÁq²b›Y[ð>~ Ü2|(GJé" &ì¶Úﻌ›2÷‚¥¢¶äƒ¬h–Äà¹1¥ /äZàÈ´BŸ#¨ŽÃqŸ`P@oCÂ٘Ϧ 3‹¬VÆÙHϦŒøî8)YNTÆÑh϶e )w~ÜŒ£1ù-biòBH8OÚ„Êûº| <9Øg£ax¨êÆ"Ô$M8˜GˆZ}aê&±i‹6á…ÏÆÄ 'Žî¸í ø |©Y«|Ó?Y@Q<ÿnÛÂâ´añ Åk1ÞŠ» >¦v`1å9žßZóÆÜ’p[Z=À¸ãFÆŸ°¸éã:z³|ÚÆD=àÆ’4®çëÙZ;иd,nj}ŒŽqAW „IQÀÂ1nü¬Ë˜ïÇ×öJ-q‡ãH @ÀiÍú˜Åc`;6'¾M©u䘓Üj&sˆyGã¡>{Ù!9FÁäQJ*;*cEÙ.…å(¢IæÝ6v\æmÙ…‘’4ÇP°±ñÀÒŽÍ2Îq2óŽÍ'#øÔaàŒ;T+£/tŽø¡%oí;>ã¸C¼øšú‰-@c3PÂxÏ¡y-cƒ ó8wˆÆGÃ÷à47-F³¿Þñ9ŠÞ–¤#vvZ¿.m ÒòÊ€“Üti,J³ 2)zÂ4—‡L„Ö §)ÐDƒO×v æ+±CpÏÆ©ùJ\%ómP!éã$bËËîrX­Ã,¼¹âÖ|%Hö­ÙV"hÝK'W¡CkîƒÜ¦œ«®9HÈÍ5 v¼Ž¡Ap—+yî€Í‘$^^CŸ³ˆÍwâí¸ôz°€‚—GÛ1›‹0°Þ“ºP[_gÈþt½’vë¶Ç·ü-}Ë:rcÒ¸Y°TT v!:e¬UÁ Lõ@îˆCÜ0˜ÒO䯒C ÀVo§ QB;ï¸vŠÑ€Ë„˜PÄ ]=Ü™ûaö¸±’8#´EÎO€»ä ØênñÍp~òg¸=s¡8OànXr¼fÊÄ=pgœ-¨OU<€[£SÒnyñà\Ì·œ#œÜ‰Ÿà6”F©HO·Ñ5´•ŽAÖ·¡É@}€<ÙÜÆtpZ–(¸¶@œz¿xàN»2“BŸÃm^µB|Ð;Wq;ò€ã¯ñÒ·ñ¹{Âiï=p22Ä*àó¤mÞÈ£BuTeƒîL ¸xÇ Ýø+ì‚àü.«äMB©¼Ì¬Ðw.ë*ÿ¤ÏW‰ÃÏú~Ŧ9/¢é™Ø½àAC®_¯3x5âðùñz·÷ åË>?^'¯ êz¦?Bþ)ôpè€;çþ2U˜¡¨|ð¦ø|-ù6U¸NàsÔ_Y³–ÌA‘4³¤×¯‹æ]`Ù)#ÛóWPFó  ý¾€2š;UèFð>Uh·WØ‹"ë{ëηuw=ÁºËüºû!¼¿î‚_w7‚¢ !¡ÓÖM™»½FúNÞ¢ ]//Q…®¢ ‰ƒx [†:ö L\°LÜ¿üÏ÷˜¸<­ùýbÂz¢ œùc꣆Vx µóG+Õ Y[ÄniÃ;­,C½ásïêçËš–¡=Gž£~¾´2Èç½AwHúœ#ã GL¼¤‰Î_’#ãD<(IT“’Œ§h²· ŠWIˆ€ê,"¥žBíü!= zäK×`Í,7éÑ Œc3éS´Iß@ME*HÔÚ!nÒ#ÌFCMÒ&ÃÄ%’Ð|…ô(Å2qÐý* bEFQ-‡]›*&dÕrqГfÈøæú^øTÆ^­_ºyHø‘KHç€Mx¡!=2íõBf(æÅrØ“uˆQ·tKÆQ°lBJ“%ãHy%`œ0%¥ 0JÚJš¼i«¯¨Ñ@ô°lv'Ü„[,òqd´°[šÜae“^¸sšî±i,ƒæÎ€í.ÖÜ2-‡ö:Ú¨áËÆE*¸m*÷Pƒeã0Š&ËÆ±‰Õ\8å—±ó˜5a b6ªÑØ -!ø*U¾]¥cÜ×*â(¬y¥ …ÿ—i œF 6y6«ÞçõÞ'ëò>ìlèbè¯Ù³qÀ¡Á5‘¦JN>¤)¼·Ô‚.-ÆF€A@~§…MºÂéà‰ÀAê2¾Ú[Uëi-žŒ3û¨OÆqâ q+ŽÇä! Ø'\¬ˆ7 Ý[«åâíÁXP±‹Tœ\ò( ³êè!‰Eˆ—3¿69]%›ß"ã<Ó†7/ËæÆXE ò<ИS.‹•0摉aÚ=÷ôÑ2„œ©¯t`Œ,ƒ¢ÕûƇ¨·ºlÆTe©«éИ\\ÆFnC >Žiõ %·Ä÷ox "F¬SlHc&[E#Y;"Ó8›¯µ? û±uA²ðfbR+˜Ãd¶Qõ,Q쨔i¬£×QQÆ¡2ŸË1—CÛa9&šÎOe¸ÌÛçû_1Ö3o=27@±C3m烶º.öT‡ÍbïòšœÅà8°§§ºŠÎ|&<šúj8xci ÷ªd8£.>H 4|Π+ð «ûÐBA -_›r:.`Û3Àö@hé?P”ÌuÍQ†Ñç}¸-F³ £JãB¦àLºø¼üÊHJ“MÄÉ)]XìLºŽû'Ê~u8M®¨ãS^dÔl‹Gï ‹ÔÂBN Þv«7Ï5-äsiC.p‡Öto*P6 ³#ã°ïèËRåä8¸æc…dÝÆ×bAΤ*ÚÜ[ˆÍ‰k·ë"XÄæ dB`TÝ1›"46olêÓâ@[\­æÙå‹VÇ޵Îq|l?·!7d<||½y"·º­µ¡§Ê!7O ?>'tÓŽ“å‹yèÆqoES9ÊC7‰:Œê’7=tgÖO¥ ºdbÌB]Ÿ6è¦ûÐEt;à|… U éC¸±þÜ–cŠØ´77-OägÀMÁò¡²Þ áèçJŸÐõH8º;Zîÿå-®Q·” «á”C¯ª%‡)1-Yã]*'T-uwoI=y#-t‘œS£áW(–¬ñ ctÐ@lò\¶ÆƒÚé _sÓaËêÉ‹­O×| ÿ¯(-[ã´hü!4u¡oÔË¡|ôÐÅ‘—ªSÇOÌÕ;òFhNhÎÒ6>.=Gé@% _kÕZð‰€B)DÁVÕ•÷2£ šÑÂ,âøÙªºò^f4R“bƒ–çš±P š¡„½YÚðÎ¥QŸ§%F·Ö¬ýï ôíl¢™·Î 1UÈÄÝ=0PL ­[óô I㮺B‹_^ÕÈØ š¶ sXó‡9hï ¶ g÷ Ÿ²§¡mêË{éd›@ÆVŒömZ£CýêªðcG‹ôóMƒ+(mb@Ü•55„wªæ÷ŽÕXâšØZ iB 5ƒ}7Űةš?;6àcµ T—uëÑÆÉõ(ä­(¿Ñ£“ÃjS­­U›È„\û•VL:6ËÀ{²æ 8c XRmà ¯Ý \oØc l“g< E ÇS jù½[ñX Õ„sêÙZ˜Í¢R8Úpo¸¹ÂpËŠ¹s÷bŒœØÄ(B¼öb¹9Ø;b@ÂAµ–ƒHbjëÕ’síc`óó@I“1ÐÜP(¯Ë «”ãVH©W{3vƒYe½Jëxo–œkÜ\¹ñÐíÍ’sù#4~±Åãÿ7ä\Å©b‹˜z·ìÄ`Èž ½[›AýZ3„\Ú„:ò7ors¤wZÅjáŽ=@š>ãÏ“w&o‡»’Ü(HãЇ’ÇwäAº3¨àè(¸;näš"P@v€é ¨ÂåÖ4rÍtË>‘8=x”F[™+í@é"0YCR8ª›(€±¤~ 4ÆÀáÜC;PZæÚ²×”fLbæe"ûÇ£tj8€•ÉKìC§?ƒ*8€–à"C×Q:ˆF\è¤A¿¢%¨ ˜Y”³Û0zâÃàÛfÅ2‹Ñb±lEÃ’†,9œî ;H“Nņ™%+îgâ¥u†¡§5î0-Á@ ãêILlózgu&^ìc|û !9©e˜sK×cÎÆK«+v|”Ûßaµ ·]NB½;°Ûpêb»íhÍ©cá JÉ‘rpM)¥ö6!,æ¯y™Ó`΄ ;bKüF%aDÕ£›VôÏô]ÑÍßÏÌm¸ÍODA<Ü6Ü.ô”9ˉÛÀtÜ|8§p «•HYž¸ÝñùnŠ~â61=B€nr*=nã³C#+ ZÜnÑhÀX5i;܆,!æà=óÄmb%î›íý câ5ŸÌÔ^é†á!Y¶~½Î–á&\$Ýóãõnïʧ}~¼NÒ]”^ï?ÿIW›„û&*%ée’®…A©™†ÐþœHºuŸÃþJÚÅû€¿~Í|êWÐtìp£k\Ï_@×È<]ã‡ð>]Ã!xºÆà}šÎl°É’÷y:éq[y×õW$TU&Ë-½ÃdT Ê­½ÂQuø+hVq@+ó5®nëå-²Îwó[ç;yƒ®ƒFA«ÉdÆ«_È^I3á¢ëþÇÞ‹™¹[ºŽ ŽÚÔ)A%‹B*P" X‹-qà\ƒ j@lW[·†`ÉBiªxެ«o—6Z襭ƒBeG!—Q¥­[¶Ž¾±y¦.ÖÄA#­ %zdB›ŠQ›Š¥ëÖÿª]mÝb“õ¾lƒ{aÑu…ÉÛæ¢ý^ŒE»8 A}®‹­Vøl}ІýÒXha ÐYE+£X¶.| /šf¥»ÑAìT¢ø#>†ƒ1g‚Æl7"›˜ˆñ¼‰£K55§´ôeO˜´-DzS ˜dà–vÆA¥~1ƒ“ú¦¥êð6 Q|œ'UߥžÅˆiÊù¢->7ÉiÑ·[›Ê²&Ðy˜Ã•­0#‡S‚æ<›´fß;•ÎÒÐRcJebo®è9jïèqˆö>y”îÊ ƒyýÄ4„!Ž®|èÁØGò!˜ïáè²ØkÎÚÔ GÇmSÅ𼉞kä{ÉE›Êâè É2û¥*§eÏ ›$‹|UÕPt‰ù˜úÅKD\.ËÁ˜Ÿ"†á‰£²:¨ÊaÐ`DÁZ¹aè¨ø·>4:g2­ ëdözŒ>ÕMø½šSkAL8˜aèò‡ì1M;{±®½±WœaQ/˜`c1tGCìb¸Æ2 ]â¡Y¦…3²:†&f <¦n:7Yk C™Œå|:#á|ˆÍmòc›\–ï‹ú-$€äÞ—‘RqÆ­Û3ÿfëXä7sYz`Nܹm¶4æ̤¸ñÝG“]íY̹‘¹¿D„òÈ<%_ êÞí‘Ú ÚQÃw=2ÓÔÛ{§½û@füÃÑÈ#Ë×rÈܘV’šW=0W¦¤Œ¸Y$¬ÚsÕ¡õi0®æVÅýž9H`fh]£ñOµ=0WòÖ '­¸\È INÍ|3¾÷mŸbéõÀ\èµ I£O0Œ$6æö;0³»D˜*9²4G.2]6ÔLï±7 nºÌ›q‰ã3(M6šÅ÷dîØÌdpøD9ª_gŽ07°ÐñgN ¸¬ÙØá3Ñfžmö¼´d¤+¥Æ)ö;‡Ð¼¿IÉtÝ”¢Å(ô‰;FKŽN|Í+å¬išûЛ¬›Ci1;3¤jˆ­Ý´DÝÄšiyÞqšñ,©÷°j2¤¡`ÔÅ!5-à½`TI?‹…jII1)*ç°Zøß‚…ov`M xgäùÐõ¶h-1€8õ˜ùØáZr—⃥*[Öâµ°žƒ1Qz‡ZÀ£:ŽGɺB±eõ&“ÄÆ¸C6§7ƒPqeÇl âV×Lx´%á#Ó'krb‡Ú’€pPªNÝâ¶D¡±])wÕ™i5*ßg?ùn¢nCos@ækå@ï0xóüÄÆ£7¾@Øžë)W3³·Ö¼‰ºØÓjÍÛÀ›Ái ¯éïF·´ŒûäolŽÞÃëæÞMh'lˆö zã¶Ò²&%ØÐ›ù‹±ß4­ã†ÞšB¹Dn‡Þ•h³+ŸÁ7ÄÙô ðݱIÓ4nð]?2­4di‡ojÿA Œ ¾Å›­ö© äà;.Uk#ðÍo†F3úTqÊ÷d]ŠÉ^8àŸO)Nyøf й*¼;øæô€_á’€=|‹õ W‰j%¾é²Ú˜Œ\¥þ ¾™!]º‡‡o&a­µª'Ñß郼mVÏ ß™Á€A¨ŸÀ7]7BÁÿ}ߤ¤¶@:á›É°ë(¸Á7‰ <4%òóoì”Z4£èS ,1OòŸP>¤0×¥ºýOS¥ßT_·›“öäµ¢&-)ãa´Ž¦XJ wÓ/KsÓQ±¤§Dõtˆë] +‘ù€«D®0Šd*/I–Œ{+³±Wì¼µ3lˆxoeÉ*W"£e²6Z[µÇToOúHûå2¦…ÆQ¢FJFRZñÙÊ ³I3ä»­KZ*ÝÉx‘ι\ Õ¬9 £öÑ“–(•²¤÷N惠t ‡†ªg'Ó|BõB¿0tÌ:¿3‹q"/¬³ìÖ¤ÆôÇ$øt4]jö]û˜y 5¡¾TŽï½ÑØkÁè‚®Ó°65†<1…\hW£5ª1iVph}©ª;Ÿ}x&3ë˜Æ¬FŸ ’O¶Äiíjhôkëd3Ÿ}¤.) ìm,Üô×>f#îuÌ+i£5­Úë±â=ëkcØáY"OÍ‘‘A?ymäŒi–1Õ„nßžŒ'ñ c1nsý6èçДGz­F£GzImÈM5‚ôÛ¯BÿÒ#î>2f 5½å†ôW//HvHϺ`a`þù¤ïdèÉò‰ô- ¦9âô=“ -^¨ëžiòëõy"=§ÊĶª'ÒsZ½µ«²Ü†ôLM†Ñ¶Ðû‰ôµ±03ã÷>Aú"9]K¿î%ô;Ÿ¶Íô Ò—ÉŒ¼Ø9±H ïh­^øè¾p‘ÚŒíDz³ÄÈY&ïDúƒ’™/=_—ˆC{æÇäÖÒ öø½‹gÀ<Àž9~¡óeø7°Ç‡Œ"Ô½ ì…,+‰Ù®O°g¬VshpøöØ#@WVU'ØoÛÀƒ½nÝ84=ÆödåÑ%´Çþ€=™,²GáûÌÒßoßñžt7ûàÄ{&öÌñ6NÁ>NSñ‡-Ü3G-6}àú÷¾C¬¿ÑÕ¢½´‘ íDû$µ!C^r”E{ik؆MÁÕ¡=Ý®êu‡X´g# °€ûS®OtQm¬ˆ;O´—Fúš=ëíÙôHŒí©ÅO&VÑ4#íÙˆk"9Àžo… Y!`Ÿ„0ÆgL×K-ØËŠœñë¹ÎâýsK}ì)DN1•~€=ɦ~7°çÞb½‚G<À^*X&&Ué{Ù•Ô “ëù—“5Ó%ðZ¬OLÌ$6éº;,ÖKã`Ò‘ Ê,Öóµ³bM'Ö'Alæº/ƒõÒ°g,õõld9k ó‰õlŒLº3.4X/æ~óÈëÙ˜qtÚì§\/ã|bËõòZî©O¬××âCÎ|b½¼–Igû)ØKÓ|´K[´P/oeJ×r Öb½|‡ÌeìX/m•µJâ)ØK—™'1Ÿ‚½4”’féñX/¯˜f5Ùy¬×oHõ:Ø^s¸®‹õ2ÍÌòÝ·J`°^ž”Œ½åìåïð@RKÂ÷SøœlùI Kj`-ÿt^´êŸ^" ?éýëç5ŸÌÜÞè'eMƪsZ¿ªØY%ƒøK娂äc½¾åýãõnïê·]¿Æª÷ê|¯5½|Òí/d™ò ?+6ã8Ñ]‹îˆ‰ö£m²‘î[LßUè¦ö}½N6>çpøW–í:åoŸC;Mˆ*å²ßV:¨…LR;{¶mï !·Ù¢ .˜}®íÅ!$&\jRû‚m{‡…ï­¼tèWÞ÷üþÊsÛÊ»!|ÁÊËüÊû!|¶ò¿ŒÍ”òËÞé|\Å™º”ܳ‹¾HÆ4‘“ ú™Nê¯ê$ÓJ]# '¾_Ã`ú^ ¸BÒÍôv?׋›‹8ëг-zgßB•ÿRÿö[Cg®¡Ø_¥¨”0~, ’3$ýv~Ô†ÿ©:”¿úË¿þ›¿ý»¿øûþÃï1Y¢Ÿý½ÌFxÐv¥ˆDHÔãüÙ&ÎýÏÿøû÷"qiy~ªî”B¶ºÍЀÈar{Ñ@h SÝzhn¯À<…XIH bVS)MÝ” XF&cÖ• õ£Zgg#ÉlPºxû| “˜É5—Å‹› „¾sšÜ^™ªn•‡cIÁTÝaiûÌüqM‚]%ü `¡?Êd-Ñòq×ÝaSxL2‰”'µ° â*_EMQs{‰ãۤȹ„$ŽÏR ŽœÔ@H L¯’ü0Jo=g(”=GÓÄ]v‡o„ÎŽ7Ц.„ÛûšÅý­KÊ£”MÙv—Çd)›Ü^ÃG•.rÛ¤¬¹½XXJÚF'*뚘c’C(ïìm…ÕÖËLlÜsÚ¦¹½&K\e’¶k?¤jr{ÉX˜*Vów¦j ï°Íì`ÞG•— 3±f ïd1üBÇÌB(¤fr{ñ¹ÖX/† üÕ*¼“ÅœºIéÅW1™/Júî&¥_U×=•º©¸#m)uͶ–†IéÅWbÏ\ Sn‡…ÑÕnš†Éç%-ëusÕÚa ÍEAkì¥i²yé7ÊXD1bRá¨ý¹áè©~"³6Í]J¦î=ŸƒÉå%Uù[–6º·f–`Ê©föM/n•˜žm”™mmMæ„køzb™ÊÑdñÚ¶ffvÁ»"”l–aç¾å `S¡µ('}Ì$ñÊb,Œá—焬¸JBÉ+ûýys6I¼ä±¸fÇR@vcF@ÌPS– ŒÖÏß%º|/oé๦¥;<ãû3¦žvÓž'`>æf‹Î…YH×!ñèÌÌ‹ë{t@õÎTã5žèÌx¼€e':w’ ܧ[t–lŽ m,:×æÎ¹ƒgœb³6žËp€éð¹IžüÖ=>Òªwvg‡Ï³OH2ãÄçÊj¸Ì^ª°îð™qt­á¹œ|®BÓÝnvŸ ð÷ñP¦Ñãseã³e=>Ó˶Y|fÜ0ö:ãÕ|®Ìk¾®&‡ÏÈËh#1Ì_ø\%2àxá¬Åg†£•ÈýU|.,¶@Õãs‡ª{ 9 .Õ<Ód‚—Œâqº°Œíúލ1/ؽ›=Ts ©rÒóë<%Ng\ÄtÏ‹:g‡Ó´æ÷Ö æ§I)ßw‚Çi¼ÑÔ ùä]«Õº\“l˜ÏÇ÷p6m’Eóp-Ìæ³‹=\3DËT4K¥‡k¼}cãÌ~à5ë±CÈfï^ç¦ÑMyî,Ò4RÇAgÎ¥›W¥é²Fì›ýmÆÑ!5ó|²_­@á:wÁ5låé¼ X%x¡¸ƒêbAÂC5-ªKFòPÝc2×sªót2ªƒjIû|iÕ4¨®¥vPM„òLÏC5ÚÆ“¤I‹1éµx@5)ü¶äý]”f^Q¥œ ÝIú(u » âUÙ6Aº:ìñ‚4i¡uEyAºüq ¦°0Öwö‚4ŽÆ­tí@»SfPSó|DÓ±Ë=ôà»à4ÃoÖÉñ8•Èfì§éÉEAØ&Ó‘‹ý Çi´iÑ5û›æ9 Ô'FòlÔ,É ëEå\ÔLßN:M ^x –¶uÁZ¸F“оªÁ9´æÌ‹‘¸-Zã±”•î?ð:2oþs y¼ŽÎÅá5?Êâ=^«™áŠÇkÃXCqxšN»¾,žÇkÖ9^"£Çk>Wÿˆ|-•‡Ÿ‹Û7³V-¹ÐC7Mçr:—ÐM·Ú¥} tã¯Ú­=}7}HAQÍ­ÌÒ VVþIMûÂø¾H~Öû+ÖÍySMknoôSª¥ׯ×y¼š }øüx½Û{†úmׯ×éÃkŠ×š¦_A&àCyfjùþ2}˜©Kñº`ò¦?'úpÃuâ_Y¶–Ìy‘¦4óikS¼ 0;‰äzþiô¡ÂWHó ýÞ§í3ß[yéЯ¼ïùý•ç¶•wCø‚•—!ø•÷Cø!úÂ:Í«â×÷}è;y‰>ô¼Eú^~ˆ>ürŽuº 5hÌŒäüyr.Zrî_þç{ä\žŽœ3D%Á‘s ¡zôñ„Cºt4Îeé—%˜Â;ط̸të—<ÓOB/êÿm™ •õGGËTn‡hòÒæÌ¿R]ð6õ”dm tñ7ãLŽœ«V3*É“sÙ4eSyGØ9óJ¨õÁvk T×ùÁdSëÅTÞ‘þƒ91,£íf²òÆâŒ ¬@ùð¥nì\[Ê)EXÏÎ-»wa} K4|±ªµQJ3•wÄàƒPdæÍTÞþ&<óÂú@‹I,•Õ[ÑÅë;—ÞWúÆÎЧô›Ã`’”ƒÂ_Ž+K1-ccçr~lše8ë/“ö­57š.âò] en4aUËt4ó#> t ;O·&VÃÆÔúº†«Ël\£%ëWò ³FGÖek¦¬ÑÔÞÙØØšLíy粬Õäø:*ËϾd¶ÇCY¨¹ÒÈ7ò³Ô¼Ñv†\­yÕÞÙÈßškG?æÇFS‹©½£m£W‹£íÐVÒ˜‰• mÇ`†g;×jŠïHÛ2׺ñv†X©Õ߯r<;BoóÂhãモ¸óÐÍ´â+Ç‚‡n"ƒYÝt _윃n2wÖt“¹Ë 5tã¤1a÷}€vèæicîŒÉ×Cw×Ðˮ렻6KBxèî%UYCð€îÜ+tCÛ4.tó€®›©xæ.›™;즉tÑл˴†dÝUbÞä—ÇnÚ//Ýb®]÷ˆÇnû#Ø]šñ6ðÐÍL“ë¼zè.Ìx9/ä®´.¬{zl¼A6Ü,/gî~ÜLuÚ°2¹È] ¿åq»¨KÈ}‡8ÜΆIÙq›ÔèZPÛŹlxÜÎÃîI‡ÛäÅÖîq›~ƒ 9N» oænqèM˜2“öè™MävžðèM¯†‡xóàM(Z:Ý&tw«&îB·qjðàévû’m2÷XÎx›ÄÍ2ælȸ?Ÿ1z‰·ôãLº ÜÍ)›ÀóÃÿm·$ì~@Ý ÜÕz.·‘ 7{¸;Ë ÜÕžª ¸Ó³Ÿ^ôæÜÑ}¸»Yçï&òZ¼MÞ,©öNþIì-¾Kä}ÖûvÆ{?™¹½ÒO·DÞúõ:£Ö‹!òž¯w{ÏP¿íúõ:‘wMñZÓò+ˆ¼B Þ(M¼L䕞è9RFg°ÄŸ‘·Îá:ñ¯,ÛH漿xÍ|Úª¸ø.ÀltŽïù}:‡CØè7„/ sdžÎñCxŸÈ³;ìKˆ<éЯ¼ïùý•ç¶•wCø‚•—!ø•÷Cø!"/³àÅȢ߼FäùN^"ò|'oy¾—7ˆ¼R:­ëÐÅñß/yP ÷‡ÿòZÉ*êw‘QPo¿|ÀOrf[z<äZª+>õQoÉÒxÌd²"5Z¶–YlOËuÕå¡ÞiâèZ¶§¹ýjõ+u‹ÚÖCeó 6†çFE=\‰[Ä=|Ñ­Z{BvQ@­ÖU•Gl‘ËW·Õ±ªòP9¦©®GQEZÛ,ÁF„´²‰Í :iišÖ¬A¿ÊcqlPÔ»1­£bëŽÅÖó±õ±ªò°ÍXÛp,ž3ƒµá,ÁÃ:þ·1VÞ!Ñ1MÓ±x ÿð±b²bz\Úps˜Êâáe+°£SO¿7¬Úâ«ÖȲxÃT²ôÄ“rˆÏ™‰õèX¼òQÖ>éѱx.ܲSU6,ËÁ>Öªž‰ÇÔüOdB§®~ïØ”MüSOΚP¬ý¾SU6,^a )=ožq|îÙÙªMëÅY˜@t}è²ÅÜø°/½Ea.“a¯Î¤PiÿÜX0’‡ÙëxgÜÁ{Û(<×¶Y õßÛFáõeè}§ð–)¸wgQh´‰\‚½o^_t¢Þã™Í$oQxl‡eJÜa»&<‡Údéú2Ën¨]X:ÔîÎãÁ£6ãäV ŽGí:- äq‚ò©{Ü®ÙÚs=nsæ+„Æã6Ûƒiæ¶‹c¤=l£Íx‚{Øfáʲ·Áö´QDlWkXó°í8HÛÙ9mxØ&[²¢qÇ á ø‚çsüÞgòÌÃ]ýTžöè×ÞwýÙ\Cˆûâ»1|E:Wƒ_}?†bóðW¬µ@{)á·è¼­——ø¼­—·½­›7½ŠóLâ¨Rû%FoØÐ¼ÿñ‡÷Bóó.-ã0‹¯\nŽ¢¬‰!Xžºµëé?Š©.B8®È¯Aï~ßaRgZŒu˜zÅò“µ[ëp´jÆh«´½FM„ÍEæVÙ0…²ÌX<†ÕëÖô`Y½ !ä1óª ËÚ–ŒÍŠŽ%õX°ç!çÆ–Ô›–œÔ‘àÂe‰¬²¬Ãé#-õ|ÌUQ„™ì„cKê5‰n»l‰ƒµO–µ­1GÜ•‡l²òÉ2›Bˆâ—‹îHËö=C·œ^·6ìƒçô ¤‘HÖ]Þ°ál§`‰ú”Â)§×èm{¦z¬ÃúWÝRyÕzŠÎ<•gì˜3Kå1¶éY­É '–Ê3.ųk®v%ñ•—gR„N9Yd^¶³k¶Æd¼Àsy&÷̤n¼¸c¶n¹<ÚInÒd²¸É2 »|¦³çl™ÝcãòÖ(é°¶i¶Y…& ›,£p² þÒgÑ4áxsköïÄ‘1FáòQWØÀœÝ…“eT¯k^rCkÚ½¡5d¸þ¤åÜÐ:ÙüÉ­1u‹ž­%'óCXnhL~– ¬Ýmê°š½-ÞÎa5>Ý&J“:}«éº2•x¬fh£Ìy°Î–ë÷`»õŽØÀÚX»t›p=%7ê%è\`]ÖÆþþ|¡>¼™`¢òvRÝïªZß%î>Á;&ÅúPwk‚¯ô£åîÌÏ×Y4lbÃÞ­__Ðq´üùù:wÏòî¹ÿ ¯KUa±Lûæ»^g|ê–о‹.|½$Ÿ9“ëü¿²t÷‘ÿLº9UÙåN¶ùf—[e>ß÷W9õ`òü ¾„Í©—· â È<»×â@”ªÁ/îérÛ¾ï/Ø#äw€ÄWì5ûàñC„^k,‚…2Úk„ÞÖËK„ÞÖË[„ÞÖÍ„É;HªÅ!ÑþŸçؼy¯ ž\ìw™uÍ!si]Ä"Z%ï2ë%›ÜŇ{ov4ZFúõ²ÒÇåDm,qÁôjËEÿH™uA‡FGêµE>²qËCU¯ÔVµ¼S N—Y§ÙÎ[Ñhm4”Üá1Lkz`º®¥³H¹dÿуJë•þm—¼? àÅ:»Æ¬Ù:¡æÐhí¹[{5ñÖ»ººhm+]NBp1&Š mÖôÀPÄå Æ.ɤºº¤ÖZ:]ŒTòÃU]}‹¸@£µ>0KÖ C£5? Ñ¤>…‡×ÞÕÕÙø¸Ë¢©Hþ©­Îá˜`4Š ‚¡"¬­>¯·Qÿ÷/xß½%PcYAÑèlŽ<ޱZcƒ.–Q´‹Þû—1P+½»¤ÒýËê?¦dlÖÁAËãÆ !q…Ž ^tï_vißÚ­I‚EŒ–] Ö&A†yÅ”¡±KeëªqF L7HiÝÈ|r Acñ¡ cѼ0m,H0¬ŸDÏFƃ&Ζ÷›6šmŽø›6#¶jú×FÁì‘DâÙÈŒ„YÃÁ”,õ7-5‹O踿éN@ŠTLî}Ì(s0a©ñ¡;™].2ýºô‡eg^+£çñ½²¾6ò‰ïÑÕ2Ýð½Ù3ß™«Íî ïÓq2¾Ï ‰¾·éêE9|Ù†ÔløÎ½¹¸— ß»#½7„цGo/–¸Ç¸=3šº€ékµÞêÒ³_5GÝ!}«Î¶å¾Vk[Ýž>þÁ ®Cz–Ï[07¤oÑÀCzèæ&BcCzª.¸\f¾žtHÏèùE«oX_ÉK? ìõd0W1Â ë …Š‡¦Ü°ž&é)ºa=³gÓ™Ãzòhö»9¬g†9³]=Ö‹ø¡½6¬g‹3öX_¶‹Îa=Þš£¹êÖWoÃú”f5Öo"Çz&:]´æ†õü”ÝÌÄa=Y{ ØGd{°'‰gðŃ=y-Œ&4-òæÁž_Ï\!ì‹K¹¡=Ÿ\ñjÚSp3‡Ý£}v6´gøÜòP¸‚D‡¥uެƷ>ðl‘‡;à'˽Éǽ¾+‡éŸÎ"&‚hünã„<àÓ•ÄDÃm€¬·ÕøÓF%l ð»£þ6ÀgãÊ ¾Éã¾>ú°üñømцàûpµð£•Þ7WCØm€ßLN“î»qµØÐw¥C‰´ù‚${©9´o^èÛÁ>Xp~ÀÞ];ì[´ž;د}x°Öoûâu¥ìÝMàÀ~:ÅÔƒ}ñŸ´y¢Ñ™Ý±~ÚBŠ;ÖÖ¾C½c6w¨wAŸ;Ô'é¿C}´àõÉ+bêÉ8Z=ÓB½Ïü¼A=ƒ÷Íößäúé,ê%s·}rƒz“h{‡úh/ìPï2¡îPŸ­/ÒõÝ"Ê÷s½~Ãça¥¥’äOZ°êœÑªz‰wü¤÷W Ÿ×<~2s{£Ÿ”5ñ¥Îiýj+dâ¥ÊYA2_^ßòþñz·÷ õÛ>¿hD¾„^ﵦ×ϺýECllP'ãl½Á¤¸ƒ*PÊÞ¿:Õt¡/f‰ü“»zi|Ná:ï¯,ÚuÆß>…v>štRª¿ /´çcç©.¿÷lÛ^D¸!n*¸8%‡‚k{qôP‚\ bÕ>Ûöþ ËÙ⽕—ýÊûžß_y9K~åݾ`åe~åý>[ù_FfÊx JÖ óL¢Oî !­ŸüÕ“Š®ˆ¨ *ôœþUÙ>ß Óc3î*}üO'é×uBÕ'[¯üã½d×Ë_üeüÆ‚ðPs~÷¦Ï߆ ÿA3ý­!1×P쿈þ±Þ%Å6¤*(µ±+×ð?U‡òWù×ó·÷ÿÏøý1&©Ê}ég¿A§¥‰ºï3yf²ÈžÚ8y\yÁ¿ÿÇ߿û)t‚ó@ëcù¤‡I“ýé#ÏØ±×Ewš&)@TøVéý[J‚º$ Ð̤nGeêˆzµ5Ñfûd "2Ý^×Bš4KÒÄÙü& ZU…m&Kl*¯ŒR>˜:'l ªÓ&i3i’*É¡üÒF&m«ÐI/Ô”J–î˱Ðl‚~c}† ý»Cá‘6Í’"T­Š“M¼H›fIÒ;%7£û&VcáHJÅG¡';N@Ñ ¤U椒¤ã€f3MɤHâ T3‰± eSå„/l l4y,›I|%–*ªcÌš")´®ÝZ T[ Õ\Žë¥ï "`nÒVY,åþ^ôÅ(­äªÏiФY{Ö)Ìø!ØVMŠ$ÖÁ¨-ãü‰ý%USå„ÏuŒ$(É‘XVK•z¸,Y3EN¤¦F£ÑW”ÕÔLФ*EG0À8õ9Sä„óJ° y®›I|®2°›e^ݤHâs¬=ës¦ÈIÚêøÔ± “"‰Ï |Ç)v¾4L‘“ªIÃæú9†I’$ŸóKUNGš«È‰|<†Ï/¯œ&G’Œ„lº¤Ê¥“c½ËòÈjcÔ£CÓ‡8?Öìbs0’Ø–C¯×aË84ÏÑfr>Lá豇–£µŒ¾aÏĨmŒ¨3›¯ìXº&m&CÛ°Y1ç!ݱË]’GžËØq]øe‰¡¼=­eqN×é%“!I¦7 ë•̠鯒ä {ììçÛxȞ壄­í€ì™é©0c㢃ì!ÉøÒeXõ-¶ëk«{ÀžRñ®„ (`SFÉx~„z 6¾ú˜½q3ì=hÅë¡¥yBö cK‚lYÇÙb·G_%ÍÌuØÍ!|@öhLÌ7¯“å!òtzû'Ý™¤rªÞ6vηr<›p•5°ül‰iÅfÖ«Vñš mñÉs€=Xí©$.öØÜt2(ñxÝiA ·Ùãõ`yYl8ÅV׃° HŒº?\óœ”G˜ñpÍ‘ÈÓ8ðzŸ ½EÃëAT£ ]¸T‡×Ü<ºÖ<àzHYPL[l¤®Ùnõ”Å+ÆCö s0ó貞²9ó’¹–²ïf qŒd‰ÒÁÝ,7‡‡lÎ@d¸pB6žKhì£Å²OœŒS6¤‡lòXž×s²¹®8Ò%ɹñ=X׸ᒞ'd£ @‡=6ÄÆ6¢éõký×™ì}¾\OЃvÌü #”6O1›†ÑrjO¿Ï×»½g¨ßöùõ>¯xMñZÓô+xÅÄàfhÊD—WÌtÆámÕä{(Ì·yÅu ×yeÑZ2§ýEþÒ̧­°Öwáeg—\Ï_À.̓WtCø vi¼¢Âû¼¢Ýa¦˜Ý{+/ú•÷=¿¿òr–üÊ»!|ÁÊËüÊû!ü¯QucÄÛï5^Ñuò¯è{y‰Wôü¯ø ¬DmR¸² ™}k÷O/²vyn6àŽŠo"$’àÌ tj H ³SO_ ½V V¦k›)n­ Õb½Öèåü¤EbqîÚTÑbÈKx4VnšÐÙµP}t¬™[ C<Kr¬]`-‚Ô¦(Ì¿gX»Æœ=P7’¶YÖ®p±ƒªƒ Ûé‹Õ<Òµ­Š7óŃPGÂüš8mjê‹ ©P÷E%#€Òm¼/.„ŒX»¡ÌÎ`y»mJÜòKq¬]%åÊe\K ™FÍ6ÌÂah;IPØ£9…X’˜èg@‰• 4SÙ„mqL i2…MÈÚáôù”W²œÆ"B¨¬=æ±Ò7ÖŽÑÀÔ’¥Í±vhcÍšk(ýfíøqc;im§þò¢–±Ñv)U¾XŸs´mZXµ,¢n™mýp@–…™Ž¶‹+†œmmíš§ì×m‡k,`X“XÃÎÛAï¿X¶m¼]¡>ÞäÈÕèy»‰]ƒ.DçŒoÇ,R¤Ì¤ÍÔ6‘¡¼T’šLu“y¬iãòÕ¤ÍT7¶ ÓÃá—éå¼ÃÍz‘¬¦r\.—e%¹b BÏÕì¨;ÖH#NáUyÞŸ$IÚ[çù–Rvên‹£î°!ÉÖÈHª)n"mØRj*­Õ1wÙ&U®ÕÔ6Qö1â«Ô|+þ⼌ã[ów¾‹¹ÛP›Ih®õÞQ›ÃÇ‚Oup¨Mæk]ÅßCmaîÊcÝw¨Í›/bzÀµå–(+)6Û´îãJI,¥¶'lF½±ÒÁöT½_uì°=ÄÙ¢_ÌŒ‡m¦‡ÃŠIo Ûƒuâ“ûÛ,+p"nyØfð ôЋDt°=$“a¿®Ç²QwØ'ñà¾KVøNoƒì ̹色%ŸºO{ÀfÂZF 1Ò;ĦÔi‡Ø-jú:€ÞÙ¤É`«)Å#öœ½ÜT‚l´àâ ¿UĞݑ¬u öd6f‰Hb³ ŸeˆÍ6ËTå[’ÏT4ðÎ#6ƒòpTªFyÄfþ:ìñ’ã‰Øl“Ô{©á[xDIýZĬ,Õn˘GlJ¤M̨å@l&«ò ØŒzÁÖ åì!6ìxô°ñŽÂ _„§ö€Þ2öE­Ð?Ü žFñ°™~¨ßþØ£ø’ô`‹»DØלc­ðvà5¹Å6ï0Q×B#&,@É^‹¹\bAێטÝå\P¼â|sG®y¼f$¦Ò4îÀã5#0Kg9Õx6 êOt©Çk†aŸ3âÀk2X9Ý¶Ðø¿*”‹GlR~¸® q‡Øƒ9ýè‹¡0Y·Ð»Šr…²‡d0Æ!1é²Éëá; ÿõ=$›>îo>TõÃÁœËwÖµønîΣ6ý!·‘Ú‰ÚÈ4n'Ÿ¸HbÜlù†Û$r–øäp;P×À:ê m¸ —kPõ.È[ð·s×°‹ÜŒ¯ƒdÑK¸ ‡Ü%!x3aDä&íŠÇæ‰Ü¤^°#œ²63Sã©áYÏ%lÀë0oÈÍøP¡ró˜HUåy"7¿ß$…³7 :`Øåòã›ÿ˜…¾÷À-,¢LöÀm>VñWñI[‚Ï—Bà‘›X1rTkCn( ãú²r³¥’–7lÓ="Ý®lã6ZE–7Øî’£i¶ý ¶÷´ ®»á6뎎x‹k·™ƒ„~ ·é‚ÑT®O›Ë)£kkyÜÆíAN.¸ µ;÷Û«pCm =£ªžEjs#ÑÛ¬'j:µ´ôžÛa­öOÔæJCJUnƒm‚*v–ƃo°-阙ŷÛcB¼ÒQÖÍåòU¹4⠶ㇹÚæq£¶)òÇÚõb3?CmfÏMo\ÚJ{|±Ô†&9’vߨ=˜ÃVéóï¦íè1q¥¿£µG œòG5«³ýMâîÓþß0-ÞùÉÎf3ÔÝúõ:‡6Åñê÷ùñz·÷ õã>¿Þ§î®)j¿÷¢îj“@bHPùߥî FÐ)ºS5ÿùPwë®ÿN]¹ë”¿ÍŒêWwÒ¡§p|ÏïS8²vžÂqCø G†à)?„÷É;³ÇpS{§=úµ÷]ARW9N~ñݾ"««ŒÁ¯¾Ãxø+V[`^vˆ³o1x¾—×(¼­›—8¼­—7H<ˆžÔCéª<!}&õ‡Âûx/ o0¡äcY¤b„ɄՖeÁM=ãF1ÅEhóÅwÃOÑðFé–Ác-ô˜/EgÔ`<*•b+ËÒV,…G­$Ϧö©QMa(Uª’‹63Úª+RYc:Á•gkð-®&‚j•¸¹³Šæ²+°ÈÄòï=XÕ>n#ì`EGU+’.o\éŸ=.…Ç|-½5ŒlÝ(WwÃÔ!m uÄÌ4Xûd1xÌ6%1 òQæ*&"ŒÔ#¦s‘¦b£ðhvŽj4Ç  År"¢ëwž,{²<êU1 5G°t³aðhÛ)Œ‘jòÁ”áì†8´k5yÖž±¤§vE«‘6_v¶áÛA—wÆ.Ó<ô1³Xu&‹, ¯±HÏ¡h[±Séà rÏdŠˆÈ÷˜,¾!N¬3ËàQýÍ3«= ZªeððC\Ŷ6Yèd1x…‰®1e³„×>XùöR¾g)†ÀcЖ,Z”¦n ¼ÂÚ\7i0k°³HÖxeòžX/Tð…²>×-ƒ—év}{‡OØ <£&cš­X/c ïd5³uKá±J«2I<à$+¿8<îÂKÕÝ‹‹Á3ájLæá8<)‘¥Er'+œØ <¬¡@ƒ´Ëá‘POá óš£[%Cpx56aÎàƒðLêÐI–gíW†Òö;bcÎnIpØMc¹–·,v³¿6òå‹à±›DÈCàzèfzÑÄØ÷«jÐ…ñ©Ãû"›·¡w`¬-6üìzSRÉq^QAz3%:dh=B'zcjµ*,îèW߆Þ$¡™“U¡vGï$†¯èÍK²h©øOÐ;U¡šë‰Þä_Ò=v w€(8ªl¤ ½‰´®¯Ïr ÷|2£oèMÅÒn¢à@o€·d‚<Ñ›ßÈ«évôÎ÷Á8Ðò¾t;xgMÑžOðæ!ä_âoÖ¾« ØSÇ»9fáŸcw×ZØñáÉ;gõ†ÝtÜÀº÷ö vûcì±»}HÒü¬ÇØc7CGÛMBmØÝ˜ ²_;aÃn›uƒîaXzèfgüƒÑúgÐMÿ¥ þ‡tk¾XAˆ »é ©oˆ‹ÌÝ™¢gý »:ênºaÃn‰¼éµaw¶É6ì.Pq8ÆÌv3M#´;Ý”­ÃIdÃn—yÃn:¤JÌv=±›qàRÚsžØM§¢v»snØþ¤üKÃñàM9Ÿ©÷ë¢Þx'UñàûN¹ûªÓêê J*«'ûï*™Zߥõ>Á;&Çú{k‚¯ô£eöÌÏ×96þ†Û[¿¾ ãhÙ½õó}zïžåÝsÿ_—ÊÃÐJpÍw ¾Îxܯ¸aÚ÷p‰¯×í3'rþWî>ðïŸH7§*YËxóÍ.·ò}¾ï¯ zêÁóùA| ×S¦oÄP}v¯Å¹òн¸¤Ëmø¾¿`,;ÀÚn_±4ù ß~?D÷µ&•–˜à®¼WÌÏ÷òÝ·uóÝ·õòÝ×£´Ä´‰?ÏöËöýË{¥òäV¿ë°7æ¬ÏIusBQcë{F©²²šÊ¿Í$ÓâSÞ=4ÇùáA‘Zd,溟Wv©ƒ¡ŽúQ‹ds$S`WÝeQÐhi?&ö×Òõä0¼#þÆ“E m¤¯BìlÄG¸mRh´ÌKÛ3çeE#†i©¿*õVh,’HO*€Zh©J ¢±KB r4B™­—G,Ô&k‡`ä_ÔŒ‡Úh ÕZ ÐÆ,.W!v(8RÝéòĆli,•[>ÜF@Ò.6ˆO\}o{+»ä’BìleˆÇµ4RÛW!v2£aÄgIb²æˆB;g§F'’¬A¢0: ‹?®ÁRá¿ãIª2º.Ç«l¼p>W!v}-„ÁTu@Ylx*õœZßó ’H¶²6Ú‹’6â}÷Vfc 5^I÷/dãù ,c Wº«µK°Òü寪LšwoåB…µEî6mì’$H·2ŸlåNíÈ>6¦¯˜²Šh3¶ éqÐ'[¿•þg'³qE^aÖ:ÁÈÉjÞÚ­y‚OB—êékq²j7wCTcÒyPñOF"™IÜ‘µËa ò`¼é4Z Ÿ\¦”OÏNfýK«+1¢0"äF“µQ0¾ñWZÄ [5HÍk#3ÿ(ÞûÕˆZ9}à0ÍâÕhÍLì‹mתXZ˜]ÖØ)¸G¹J¢¯]™.ßê –.U?®]$D®¥<¶ü]×tq~­¾ž‡ùÊ\•XÁ[öv0O¬%Ä…ô ÌN‚>u0Oúpð`ä ÿ,Ì3çÔ2nå€y6¶ ¬VAËü„†<å¯ÊKa©d ?å6‚­rG©x”§¥yEAx§…:¦VñÒq‚<7Ÿ€_n'ÈKF½L ãy–ÔjIéy±`?©ò6”Ÿ 8­óò3ð(O¶‚jwÛFöôx¿l(/vÞsÎ'È3ô#²0šîcò$™ù*]]¦ÍêœéÎÔs;A~f¶a<©ÆÎH˜Ù ÆË âbÃøÁˆmÈhZxuÃx²’lî‚¢²Ñ†!³éè'Æ‹íPusxŒ÷Æî ã¾€é#~‚ñÂÏÎØªòd­ÆLœO@žl¶ã•°ry>Ùóͯo OKùxxŠ ä%Ǫbì‰òì(~E’o(Ï“TëºuÆf‡îs.§ åiÜFý*ªìPžÞ©Ù|O˜—ÁbxcŽæ% rÀuì<Ì“ï¬æ/ÙÊÃ<éhZY£f7˜—HJÎ#çæ…K|ŠxnHO#wÔÖÒ3ŠGÍwé’ßM&î"=“&ŠSJ;±¾1âS韈ôLÁU ß0è°}`w@ÚmýÄzèØ#Tê‰õl˜ÓàÄzÚ>2$ Ÿò`¶^å6ÿì){Côzôö’ÊoN§H/aYšfŸõhÏFŒ´Åk–ì9‘Œîr¾$ öüxø:wáLöšÞc|kt$y‚Æ3Êp´6Æ}Æ-Æãc < §Ÿ’<‡­¾î òlÄö“õäåIœÄ.œ³ /Of¬Q¯åyy2aò)(/ètj åù·µÆåÙˆí&Éõ”—FI N”g#‡n¡ñ@yùn÷:ï(ÏÆÒžå¥ºX8òÚ½uÖ¸£¼ŒG…§z ¼4BCŽéÒŽ,ʳ1õp‹ô(¯zK(/£ôO0žMwgÑÜãÙ.w¢ãåIõdÍÆË“KõÀxy0¶ÛÈc¼>ØrÐ,ð㥑Á©êwç1^ËI§(ÏÓ3!`_ò¬ƒxi«wlðlÃß'(é@xyp<:Õ É¿Ó`Ýï”å/Êz:Vœå– èðOZ°þ‰­ú§—èÆOzÅâyÍã'3·7úIY`êœî_C$ôWù>¼ýãé÷úñÝÞ3Ôo»~Å·IÆkŠ×šÞ?>éö-°z0~4 Þ !®ÐçFkÿê#]½yXqw0SâŸÜÕN1þÅ_Æo¬Ü ÄúÝ?üIsì´=e@%+T¦æ›RÍtž=·Äž‡öüÿýôíûößðÿ~ Û ÁœW6Î…3o#¦À”"ËïBÝè þJ ‚½g×öâ6õ š z‚m{è[†<ßÔum‚k{‡¡Ô$qÿ{+Ï·•w=ÁÊËüÊû!¼¿òrœýÊ»!|²ò‚£Ì•—þT¥ÌÉ4ƒ-ôS¥á=õEœÞô}àþƨ#$ýQ¦“ú«: ¼ˆ˜®Î³“_A¡º^J§*Ï‚Ýóç¨ÍXÿ”›î·á#È©ãÆZlê‰ýú;Ë@Ña•i§##¯˜Ø²ÓmøŸªCù«¿üë¿ùÛ¿û‹¿ÿç?üþ“Ý5¿póÎF%|Љ‰ŠrŽØ:'‹ìŠþÿøbÑÃØM ú4Cslê0L§Á/¿2åÅ¡YœBÁ'íâ†ûœG ؃Üpƒ’ÚŽ×C ˜]‹·á5?˜æ¥rÓáµ|̦LìŽ×C …íH=B¶çÆ!5Ÿ(åYRÕ!o“Ñ,T³-/6@¡šiAËBµ¼l<ÛAµ´­½ã šƒ2> f¡zh¾Æ»“ƒjù¼%-Tóv?Z¨5–n[¾ƒj™´…xØ#”Ñòí{f›{—a‰šOÐ6_iP›Å ±ß®¼¹µ£“ŽSZ4âõã º½g¨ßvýzF¼¦x­iù4b$B?’FǃwiÄ@æËYnã{Ë_M#Þ”PY ˆ´?OqÁ‚œWöÍHp^3Ÿ¶vå»·qI®ç/à’džKòCxŸK’Íã¹$7„/`íû‘n+ïzþ‚•—!ø•÷Cxåå8û•wCø×`3+;Œ,ÊÖ[,¢ïã%Ñwò‹èzyƒEd: ¨·ÀYágYÄ”=‹ø‡¸Íþ4±Ñjpó¬q;f‹’³n8%¸¥º²E Q“{[KÖº©<¾£-[q:§Â–ëª>C³!<˜€v±ˆcZ¢­Ñlp¥Ù‘¦¥mµâŒÑèVƒp娡e`.Þ¢UgÜ ûñë—œQ¾È2&‹ªbæ§²ôÅiݼ™ok7Ðf¸;f Ka–YjÍY7ÂG]´Y#ez+ŒL´…±uç#ݬ«jëcUŸŸì¾aXëFgÚÂÇüÔ†³n¨:°±rD‰®ÿZÚtÆ Zù‚•ßê32”e"hS­ÌSöXQ:í÷†Ýz…côà¬Ép~vƒ{¿Š{÷úT=:ãF´ÑH=:óÓ=æïNËÁ³_i9xÌ‘=mÖ±˜‚NÃÁ½]©/n¾'gÝ€ž°Œí†ƒg¿Š¶þ°Ð=;ûF´ Ú³£‡-¢bhÄÁt‹7)Ö‹£ºË6Û‹3pk·íÕшẫ ø÷ÎÀaÌž½n¿ÒÛfà0¦T‰7µ.ÒÆù]f»hÄic%l`ñˆhûÿ›;»d×q¿g1§(‰Éed!yI²ÿ4hÝðMMMfäš·ëÃkýXÔ'"«|¿(Àa¿eÓËVáî¤ÇäÇzª£aÝã†ÄnïlóÌí69»@¹}Mþ)•Û^„D…Û£[R¸ˆ5……ÛÐ&¯¸]™ÛpÙ9U¸íøhÉ)à†¤Ém"$âRíp—€¾I:Û%MÚƒÂ%Ý@n|g nß_¤Û3¸ñ5ʹp»ß2bŽn÷[F–¾€¢fhæÂí±^söE`ncwüœ˜IGœ!ô ¸] ®öˆ_ç6~©±­Á‚mHŒ ‰Š8dòöÁ ¶qÂgû5¶ýè"´)Øö±x•l»:bá¶+¡±an»ã3tá¶o’ùKÜv‘4$á¶‹±1•…ÛîÂmü,$¾·q,%.¨pÛýŠaGnc#ÂÿÌm·ë¶ýB'Ü^2{\¡[ÔÄJ^'Á¶»*ãælãkTk@°ÔNÁv(ÛnIû_ëAq”•ܽ†¿ùãûD ¹½¤Î¾>Jî*ÑúDî 5Úß.æDîÎ9väæ·x&· •±ól”ܸè0…Üh8Lï«™Üü€Qrßœ~”È}sNF"wçº ‰ÜR£!‘»²î©ä¾Ø ˜ÈD5å6Fè±$Ü>NXLà¾8QBÁ]ðÂË÷`Ûh7'A¸)ÿFÉç½ðdt×P¥ºeM“Ðݽô Jî)¯–JîÎÊf"÷àÛ#“›Òh¹;¯?¹o^ %rëÃGÉ}³=5‘{È/]sHÌ„rÓRïƒÜel­’»sJÙ¹)©$‘MHb•u§<*’ÐmÛ,{qœÐ}rŠÎºÏ€Ôºk$¯.tËÚýkŠö2ô.ˆÐÏ }ú?WÀãOªŠ¿ÜÿAÇ÷‰ü‹Ïî‘Í›tÅ÷§/|³ÍÐ_¾°Û÷®7>=®+¾NqíwøtÅv»Ëú4œõóY]ñ.½iÐ&m¾§+6¼rØÛ”==®¿©=1XÔy¦ùà‹4ϳ€Î¨}CZÄ“À${þ‚À䇠“Âó“O˜ä¾ -Ò³·…oh‹¾Çtíe×ß(¾ëÇ _á Õwý–Ö«/ÇðWÈ‹ö'ôçx‡›Oé‹i' Œi/O)Œº›'$Ɔè«=PÑÓåwÊœž%Æÿþû9£â¸:)Œ'gÀΣ¿ª0Ý;z=Ñ&g Œ¢Bšè’6E)&;Oê>“Ä­yV —Ï›WÕ*L䟛è³ç+jQÂhÖÂNÅÉŽÃY+IŒçòD“œ0*öŸY·ßh6j;ãV¾¨/gLa§âຽrBbìœï?ïÂc‡Ÿ`Ÿú]YbDÁ´-FÍ»³SqÊåvIŽœµ|,½ŠS‘Ì-¨À"NEž%X °S‘¬ƒ½qØ©H‹9ú8©þלE­Š»v²-;´PçìjT¤è÷ëÁ.þ˧FŶ½0¼õ¤LíÊ·Bû’\t…ö)á;6Öi”ß!Ԇ녈!Øöï Ûˆ(ÓÍ*Øö•N ¶5¦‹#ØFd8’þ…Úû˜ÌLí&ú•Ép(Ôvé1\˜BmŒ‘C“©-‰BíÎݱmÁ¶«mñ4líîH¢`Û·&SÁ6¾Ç÷8c{¸·i?áÛþÎv¦…mü‘êm ·×+Áþ=„Û«p,1½kœšJß ·]µ =F¸í1ì°` ·Ýàlá¶*¡Œm7Ó“€±í1ì‹ ¶]@ õQ°mž-½£* Û é†X,Àv—%ÑŽ½Œ¢ñ\g`û™ÅÛ{ÊNA`ûØR…ðÚUÕ¶§±ðw‰ùÂëueö¯Ï¸öz´‘|#À^õhßÄ^»†÷ök1ØùŠ<(-*²½b¬þ…Ù×͵¹Ó›¶-#ƒ.A[ ±@›ží ÚbÙ‹Ÿ¡M¸ø€6 ÷ Ú‹'hŸœß•¨]9£"Qûàw‚Dí[NA©]¢`Ebv‘•3ÛæzøžÙ T¨‡ كݛ Ù]Ð+ÈIéNVdwJíHÄ>.zMRbK½Í7±%¡"[jo'b£mèv=&bw.û‘ˆ-õ;±o®?úAl Ñ$bWÎØSdW.Й]¹dBvç$¹„l„¬¤ îʉ& ÜìŽNÜnœ\—¸-o^‰ÛÅ|åöÅ/^‰ÛpƒÇòOÁ]WÙREv‘¢7 ÙR=1»b>íËöÉlú^‚6•Ó}Aûâ‡âïçXÚVõ0Ý^ª¢¯ñVKGü‡G{:þòž 7¶-,Æ >²§ã`eqü‚Æg³›´Å÷§¯ìø`u‘>>./¾Ïò½çþ'Æî©í¹{!³ïQ±£V@¹ê@ ûo ŒÝº†Öb«4ÿ; ŒL… Ð#“ç ç© çÔ¼ g}F}p—¹Í£ìû+RSûÐÓA|CkjJ£Ä7¤FžkÇônƒÞûÁà/‘iȾ¿1ÞIr<ô ¾0VàZg€Ä_!7Þ÷sZ=†ú”ܘvòܘöò”ܨ»yBnìèxÒÏÛ–ç凞µÆÿ<×RÑß2P¶!3pô°,K7B¡¥44™íàÔx” ÈÍi6(z£,¥[7ü¬˜:£ÌÁ€ V/Ùt!Ù å•&o¶‹è8Vtî(ƒõÆ;šù˜mÏÞT<ž…BHԥɻzGÔ‡:ÊdÉqœ´>´±ê›Öí ;Qľl°{ɦEsTÚÁ¯ã(¬9ŽÂdÑÑK™6ˆêHsýþ>Hgy,;N–wmŒuG$‡Zl°{á¦Ûv僄0Z”Ÿ×¯}­ÊHa›°AÑ;G—À¥y—mÜ6{àMÏûgv®škƒÕË7]¶J_ßŒà“ ºþØoJ€¶Ó+^¿É§°;7#Õ×msï)ŒA ÀØ K6HM[Ž£‰‰ØíŽŠÚ mvOáÞ¢,¥v/ã´¦0<š¡ˆÚ<`²K V$²7Žr`ý´=‘mz¥!”ÀfG[Ÿ¯¿²i­¡]Ú`÷V0k ã+‘²|xßSGb© ² ‰Í†{ÂY‡tŸ¾‰ðÞÃÈaލ ’‰­F4߯XŠìœ£`“´xC˜×nÜ‘ËëÏSemw¢´ ²éÝ}vPܞȬFb³h·AÛlLa© fƒÝ{ü¦°"gùb×z¬û¢Rýž1ë?¨ŽšVt…”ê×Í.ÖDu<ì©ñ­Pj%Õ]Sª#òMÅL•ê>H·!³1lz9S¶£#$¥M(Û!KvÞêLºäÿb» “÷RGÙîQø½*Û—ÌI '½2ÔQ%¼k‘¿¡„w1„%¼ûEB-T»”HsŽ1²äà—œ À»Ó’GïÑgJñÂc²’çK ¿~» –~y™h³Lx8è–¯PyL‘w[Ozž>Ù²QYßfýdý)s'±þà”¡Äz(Hv¬øÊ/XߨA•Y=ëÏÐúé»\G%½=ÈYI/jy&ýÅ&×OÔŸtS&ÖŸ¬´gÖwv‰~²žŸK‰õRw3±¾Ø¢w> endobj 8 0 obj << /Type /Font /Subtype /Type1 /BaseFont /Helvetica /Encoding /WinAnsiEncoding >> endobj 7 0 obj << /Type /Pages /Count 1 /Kids [5 0 R ] >> endobj 9 0 obj << /Type /Catalog /Pages 7 0 R /Lang (x-unknown) >> endobj 6 0 obj << /Font << /F1 8 0 R >> /ProcSet [/PDF /ImageB /ImageC /Text] >> endobj xref 0 10 0000000000 65535 f 0000000015 00000 n 0000000145 00000 n 0000031355 00000 n 0000031376 00000 n 0000031399 00000 n 0000031829 00000 n 0000031698 00000 n 0000031593 00000 n 0000031756 00000 n trailer << /Root 9 0 R /Info 1 0 R /ID [ ] /Size 10 >> startxref 31910 %%EOF blis-0.6.1/docs/graphs/large/l3_perf_tx2_jc4ic7_nt28.png000066400000000000000000002706341360743507500226560ustar00rootroot00000000000000‰PNG  IHDR¬öEYa )iCCPiccxÚ•‘gP”‡†Ï÷}Û m—¥ÃÒ›T) HYz•^E–ÞY–"bCÄDiŠ AF¥H¬ˆb!((`A³HPb0Ѝ Ü¹3qîüÈóë™wÞ9çÌŠ*’*àû¹Ø³CBÃØð ‘¼Ìt®'ü#Æx ÿJtL&V Ÿ—Î ¹ •#H Ç€•”.@Γ€ÜfÜ_>̨¿|˜ü?@¢Å}ãQßø÷¨pù‚„ؘ\¶Z¬ '’ÃÎôs±g»98°}øi± É1ßü¯Êÿ€ &Wà–¾…Ÿ/`ÿßPcC##øûï|„5ø¿ÿ€oziœEìÀßYT5@÷é'gjÇD ºîñ²øÙe8‡ÏÁá+ñÍøNü ü(~ÿ@ °šs‚+!”HØJ(%%t®† S„E"‘(CÔ%Z½‰‘D±ˆXMK)Hq¥b¤öIµKH-IËIÛIÇHKwHJ–aË8É$É”é–y&‹“Õ‘õ•Í‘=&{Cv^Ž)g%Ç“+–;+÷D•ב÷“ß*B~P~QAQÁE!]¡ZáºÂ¼"KÑN1Q±Bñ²âœCÉF)A©BéŠÒ+¶$›ËNfW±ûÙ ÊòÊ®ÊYÊ ÊCÊË*š****ÏT)ªÕXÕ Õ>Õ5%5/µ|µ6µ'êduŽz¼úõõ% M`½Ý³šÒšnšyšmšZt-[­ ­F­‡ÚmŽv’öQíû:¨Ž©N¼N­Î=]T×L7A÷¨îðü‹5©k׌ëÑô¸zÙzmz“ú,}Oýýný7ja  ¾š&6>5’0r7*0ê5úÓXǘg\küp-}­óÚk{Ö¾5Ñ5‰19fòÈ”aêeº×´Ïô‹™¹߬ÝlÎ\Í<¼Î|œÃäøpJ9·,ðö;,.Z|²4³XžµüÃJÏ*ɪÕjv溘uM리U¬#­¬…6l››ã6B[eÛHÛFÛvªvÑvÍv3\mn"÷4÷½¡=ß¾Ó~ÉÁÒa›ÃUGÌÑűØqÈIÂ)ЩÆé¹³Šsœs›ó‚‹©ËV—«®xW׃®ãn n<··ws÷mîý4ž:ž|Ï^/ÔËÝë×Äzõõ©ë»½ÁÛÍû÷3MŸ ŸŸ} ¾>¾µ¾/ýŒüòýüþ›ý[ý?Ø”< Ô Ì ì  j Z v .†„l ¹*šÚF k[Üà´áð†épÓð¢ð±šs7ÞÞ$»)yӥ͢›#7Ÿ‹ÀGG´F¬DzG6F.F¹EÕE-ðxGx¯£í¢+¢çb¬cÊcfb­cËcgã¬ãÅÍÅÛÆWÆÏ'8$Ô$¼MtM¬O\JòN:™´šœÜ‘BJ‰H¹*‘š”ÚŸ¦˜–›6œ®›^”.̰Ì8œ±À÷à7g"™3{LAº`0K+kOÖd¶MvmöÇœ œs¹â¹©¹ƒ[t¶ìÛ2“çœ÷ÃVÜVÞÖ¾|åü]ù“Û¸Û¶#Û£¶÷íPÝQ¸cz§ËÎS»(»’výR`XP^ð~wðîÞB…Â…S{\ö´‰ñ‹Æ÷Zí­ÿ÷]ÂwCûÖî«Þ÷µ8ºøN‰aIeÉJ)¯ôÎ÷FßW}¿º?vÿP™YÙ±„©ÆÚuÈëPW»¢¸âýá͇oWšTÖ¡É:"¬ò¬ê©V«>P½R_3Zk_ÛQ'_·¯néhôÑ‘cvÇÚëêKê?O8þ¨Á¥¡«Q£±òáDö‰—MAM?p~hi–m.iþr2õ¤ð”ß©þó––VùÖ²6´-«mîtøéû?:þØÓ®×ÞÐÁê(9g²Î¼ú)â§±³gûÎqεŸW?_×Éè,îBº¶t-tÇw {B{†/¸_èëµêíüYÿç“•/Ö^’¼Tv™r¹ðòꕼ+‹WÓ¯Î_‹»6Õ·¹ïéõëû}û‡nxܸuÓùæõîÀ•[Ö·.Þ¶¼}áçN÷]³»]ƒ¦ƒ¿˜þÒ9d6ÔuÏü^Ï}‹û½Ãë†/ØŽ\{àøàæC·‡wG׎=>Š~4û8ùñÛ'ÙO–ŸîœÀO?{Vù\þyã¯Ú¿vÍ„—&'_ø¿x:Å›zý[æo+Ó…/é/+g”fZfg/Î9ÏݵáÕôëô×ËóE¿‹ÿ^÷FëÍù?ìþ\Y˜~Ë»úgé;™w'ß›¼ï[ôY|þ!åÃòRñG™§>q> |þ<³œ³B\©ú¢ý¥÷«Ç׉ՔÕÕÿB,¾;E^ cHRMz&€„ú€èu0ê`:˜pœºQ<bKGDÿÿÿ ½§“ oFFss0ˆÚÿR pHYsNNÆÊ/¥tIMEã$:.$¶­ vpAg’Zó!%Õ€IDATxÚìý{œãv}ï¿6r#´„Ä..1—–FÃP ¤• 4§K:E†Ó,Íöò³Ë´CáœùÛ|KiÛZ´ßÃ9Xj•–Ù²=åŒ`JÂí«Í¡-ÙŽw/—!Yo$ó!Þd“Á¿?4oY²å»=’=ïçã1]K²,ÙïÏGï÷ç}ÛS¯×ë`†a†a†a˜„qVÜÀ0 Ã0 Ã0 Ã0Q°ÁÊ0 Ã0 Ã0 Ã$6X†a†a†a˜DÂ+Ã0 ÃL0Bض÷e0 Ã0ÌX`ƒ•a†a&ÇqÉdâ¾ fcš&\×û2fG`yßyØ`e†a†aƲ,Và™]ËûÎó˜·¿ýíoû"v;BÜtÓMøØÇ>ÇqðÜç>çwž¿ß4MpH’˲ ( À¶m˜¦ Û¶!I.¿ürÿ}…BŠ¢øçBà¹Ï}®>ÇqðÒ—¾tàã¦W:Éx»}¦iBY–ýóX–…Ï|æ3P…å›™Æ%ûªªÂ0 <üðÀ|à Ç¹ò'ÒòL`˜aè$¯ûØÇððÃã¼ó΃,Ë-2JÛxÎf&…aä]’¤¶ó>Ëõ`°‡5f„˜õó,Ë …v †áïËf³°, `²Ù¬ž¹¹9˜¦é¿—ö ! „@6›E&“ã8þþB¡0ðñ Ó d¼Û¾fyF^Y¾™fœ²ŸÏç#Wù…Èd2Bø › 3,Ýt•f¢d”çlfRFÞ¿ò•¯t|/Ëõ€Ô™X)—ËõàϰµµUWUµ¾¹¹YßÜܬ¨onnúûeYö÷¨olløûJ¥R]’$ÿ5€z©Tò_+ŠRÏårþk]×몪|<ÃôB'ïGþ766êê[[[õzå›I>ã”}]×[>gkk«E®ft’×z½^WUµ^.—ýýÍ2JÛxÎf&aä½Û{Y®ƒ=¬1C!_ù|Þë-—Ëe–eAUÕPX˜¦i¼P`Y–ýêÁ×´Jˆ§<å)øoÿí¿Å})=ÁówòIÒx½ë®»ðÎw¾ét:îKé ž¿“O’Æì ó÷Ô¬O|âqõÕWcii)îK‹‹‹8räHÜ—12Ö××qüøñDüFt-“ÄùçŸÏòp’4f㾄¾à9<ù$iÌNÚÎówòIÒx]\\œcàù{HÒ˜dþžƒu¡ž«Ó¾}ûâ¾&AL£iüضíÛ(Ú¨®ëú÷*„cÛ¶ÿ¹Qçzàâþ9†a˜!=¾öµ¯Å}9 “(Ø`0„Èf³¾Ò­i€°›Ëå|%–”ÿ¨ó(ŠMÓ I,ËB¡P@©Tò÷;Žƒ|>ߢìÓ癦:ŸmÛÐuÅb–e!›ÍÂqœR-I’¯Ø :¯mÛ¾QJƽ‡¶EÝOù|Š¢@Qß@!#:N£V«ÅýsîJ‚ Éyº®ûÇð嚌W2äHn¢¼œŽãD.Zˆô¬’Ö4Í–Ê?är9ÿÿd0Óø¡{²,Ë—ó(oì8Y\\ܱÏb†™$hÁ’tÒ©èÙÒëÂ8£! Àiše¹\Î×·št¬eYPUu¨è†™Vb5X×××±oß>¤R©Èý•J333m÷ï6„Èd2Èår¾²4ÞEišÈf³þñäቚPmÛÆÜÜœïQ*—Ë!Œa( ˜››ƒ¦i!ã4x|PY^c7‚÷Ðl4¼¦n‹EßhPU5t}ëëë8~üø­Ý)ß–eùpú­„0M–e{¾É€k÷»Gx½}’$ùÇÙ6àº@7qÔ4­/ù åé$EhšÙ2ÎŒZÄJ¸aÙžlhq›:ø ’Žãø†¡mÛ~”MpaðžKQÏÇq`š¦åBÆg0†È777ýg éfEÜ! i666üsìÄ‚#Ë83IÄb°®­­ayyÙ÷pÍÏÏ£X,bffP­V¡ë:*• OA5 #îïj((ÿÍ4M!°ººZ]+ ¾w±Yq''M¾AC°y2ÕuÝ÷Nuƒ¯ºŽãø }.—ëù96jX¶“‹ëº~Ô ‹\ô$YÒ4 ™LƲÚ¡ÍÐ3Ì0 ‹E(Š!òù<„Èår!=ˆÒ>hŒu*I’PLÐ$Ï2ÎL$õæþûﯿ⯨øÃö__ýõõw¼ãþ1o|ãëo}ë[ýý¿ôK¿T¿é¦›:ž÷¶ÛnëzL\lllÔeY®ëº^ßÜÜô_¯®®úÿ/‹mß¿µµU_]]­¯®®Æ}+Søäe\òMïKårÙ—íf¶¶¶êår¹¾µµë5êz½žËî|¥Òh¯MÓêõq|Eã’—Ý8‡3õ¶ã|cc£§÷ommÕ5M«ëº^×u=ô<ÛÜܬ˲\ßØØ¨ommÕs¹\]UUÿdss³^¯G^vÛü=)lmmÕu]÷u¥ LlllÔ‹Åb‹N<¦T*ÕËåò@Ÿ].—늢Ôs¹\W]m”ðüÍL3ƒÈËŽ{X+• jµ333˜ŸŸ÷WrªÕ*Ö××qìØ1ÿÂÂÖÖÖ°´´·}ß7´Òô¨ÀÆÆ2™ ´ìkF’¤C™eZåÛ4M…@¨¸P;ù †åƉSÞBéW>àõ#9_¡È20Š[+½ó9ÎhÎg€¦y×7.¦UÆ™ö7‹ Ñó(ŸÏò,lll„¼¯4WÐ6ª‡ iZ¨¦@¡PðSRÈ‹xé%TÁ²¬P±2×u¡ë:~ú§zä÷ɲ/͕؃…ô(¼¶Š¢Ò4 ù|ÞOo >“zMOŠBUUlllô‰6 °Œ3“ÊŽ¬óóó¡b+pòäI\xá…þÿ NûûÓé4ªÕjœßÓ@´3VO™ßØØˆû“ë’äýMÓ&ßÁ"_T0@¨°WâÙÚ‚tùybx1¢ŸvT&à—¶=šó™æøCŸ§MƧ ×u}#”t £TUuàô˲Ëå ( 2™ $Iò+Ê—J% ¬®®hÔZ PM ',•J‘ X¦iúU僴+RFa™Ÿüä'ñ¢½h¤ßËöÎ,Nç8ŽŸ>|¾PgNȲìWpIX|,ã̤kÑ¥ÅÅEpÐjN§AQ«Õüû$AÅd‚ÜNÆ*Óù¼ç2Jbb&]¾É+ÒK-ÓÅûKå×½…ÿóÛP÷ o:Ž'Ž£X?0þäah¯ù!l{øßÝq¼ï'×v&]Ƨ òL9ŽƒÙÙY T¯Ï#ªÂ,šfÛ¶Ÿ¯W*•ÉdP*•üyŠªiš†l6 ]בËå|i§Ïí×û%IVWWñk¿ök¸ë®»Æö]²lGc†_(Ñqär¹ò3©¦Ð(x×NF&Ý«™TXÆ™I"Vƒõ†n@µZÅÊÊ Ž=Š¥¥¥Ž­FNŸ>Ýv°ÜyçøÔ§>…J¥‚#GŽìØ=‹%QÈ…´°±:ÆP-gmm þð‡qöÙgõÒG)ßp×]waqqªªúá<£†O Å*•J=ɰey?Õ¨Ä=“F¶Xî8¯yüHNE¡À£p.;€õu(Ïù:gx·,ϘÎfÇ=îm8uêÄHî¹Ó0‡O*†a„¼¦Ô«¸]䎪ªÈf³~ñ3Ã0üj©TàˆÚ›ð+¦š¦ZŒUUõz=tîR©„l6뇓:®VNËËËxôÑG±gÏž±}¿“8òl^“$IB`nn®¥Ê{shªÎkY–_MW–åP8Ó`qq'Nœ@¥R y:G ÏßL¾wïÞþÞ8H²ìM7ÝT¿þúëëW]uUý¦›nªøÃ*Ùº\.ׯºê*?—þLÎmÞ6ŠÞ~ÙÜܬððE ¡ ÄEQb/4ÓŠR¯o²Á—4ºª4››õºªŽå–w²@À(ä»^Ñ*V1ˆìªOûf½üGŸÉulnÖë£*·±Q¯ëOûà(¿¦z]Qê[ŸÿöЧÑõz½¤”êõºª_xI–ëõz¹\×_÷íz©´³E^&eO Á2›››õÍÍÍz©TŠ,jÔÌææ¦ÿì ŽWUU»–Y]]­+Š*HŽt]¯+Š* HÅ6{x>ÐûwŠ’—I™¿GA°0Vð9P*•ê²,×KÏöR©TϪڋźªªuUU}}¨ÙdÂðüÍL3år¹ïf}{X———±¶¶†……%fffËËË=­¬DG±ó°oß>^Xõ~ þ?N(‡B(§‡ÐuÝÏó›¸×ÛȲ¼„¼QxEm;™1¦˜dùÎçó(•JÉ®óý§@ýáû¼|èëp]ÏS8 „pÏ=#É…p¿Pƒâ8ÈÞpJnȸ. Ýóq@R¡<ñ8ÎÓ"C–óy@×;–e5‹ý9+a\mü&YÆ“‚eY(—˰,Ë÷ZQÿGêcL¸® Ã0üqiYŠÅ"4Mƒ,ËÈd2¾‡´›7“ÞCí@€F#*VCۋŢߤ—|õaÛvá¥èº}@ôUáËå¼±ðž÷\K/½c¤¿Ën–í`a,òœR‹²¨¼R"—Ë!“Éøm]H¶ƒ-fØ‹šv³Œ3ãƒÆ~°@¥£PšŠeY~ÊÉŸøD¼â¯èë3ú2X)tàÈ‘#¡ªbªª"•JáàÁƒ]Cöïß•••Ðq¶mûƒ!•Ja~~>T‘̶m¿¢nœ?†ã8~O;Å`b ÑÉåFWõe”M%]wt•mà)<ù¼§ìœ:5ze˜\ù¦B-ƒ†ï)Ï~Î=)ŒB-•(ÞyÔ—~ 0ïÆœ¥ccsðsÙ6àüý)(¹´§>yy PÒg! Þý Èòµ=¶Û´âUõ„|ŒLªŒ' ·§Þ‘Á~£”Tð©÷5m <£ÐÛl6Ûs¿(ã!8öÇûËå¼U£^Ÿiù¼÷>IVWû_Êf U²ìª†±]à,]…]I…òÇçwîÂñãßéo³[e;ªæ†®ë~õæn‹˜º®#ŸÏC’¤–~Ûl¬&‹Ý*ãLwÇñ/…¡…MZL n#èØ`ŠŠŽãÀ4M?õ„ŠʲŒõõu?~¼¯ëëË`¥äìùùù–}ététºcü;½waaÄüü?Ü ò(Ëwn*Ùùd¹QþtX¨ò˨Υëž6N×9(BÀ\: yïPù NìŸÅƒÞ?šë 0‰òM…Q†ªZýØÇ"û¡,6W†¿!˜&l97¼áJ´¢@”êCž PÏýw@U‘Ó.úå'lùš¹z2 ´1X½éÛ°yUEáõwàû>eèûbeÏÝ v)»AÆ' ªU@UÙiÞ§jð²,Ã4M¿+ ¯Aý-襎%QÆå¤öî»èÒ‘#GP(pðàAÀúú:VVVN§aŒÂK—òù¼Ÿ0Ü©?XOl÷i£*4þŠÒ>œ×²<…¢Û ªm7-Ûö”pIÊŸ9ßSÞ•gùno®‡Ã¥•wAªí‡úË¢Tº Blç:<¨_9ñ¤E(4°UOü—žýìÑþ.„a-¹Fƒ"Ã…¼÷ó°/z„øÙ¡Î%`ýe ¹œŠÜo>:ôµ×a¤ïžˆ\3RU@yÇ/Ãuÿ±í°²mÀzïP jô> à1¡ãÅ30³Y„ŒOº·Ð×í8 ·”¢ w†×ûÇø…”²Yȹ$ImVв=,­Àb‘$A~öÙxð¶>KÆ3] ¶öhBoFh5›¶ Øt‚ VÇê÷H¤ßq¼Ð²2#ö2è_5¹ÿÚˆé%7—óÇ“ªn/îØ6ßÖ…âŽöº)RÁkY_û÷2¸®jOÃÆ*ÃŒæðYr„Ñ6Ê uº®£^¯ûŠ„m‹õ¨Crã¦oƒuffGŽA¥R U g¯¨Æu]!†®zBÓFç”åÑ•Q5MÏ`-Û‡Yõj°*JãÙ,Ð&ïÊ0¼Ë—eïã)Z:—óNQ*EØã²Üªñ P¤ ¨iÀз–¹Oôõɱ‘{ÿ/†~‹ r¹]k°š¦é¯Ø÷3wCzîKoÛÖQ/Do}Zp¿óàpއ.â%}vT ‡mC|úK(äVËÑcÀ¶å•9¾D(xl{?TþB ɵª6^ÓýÏÍyC¢Pðî³\yQUhf¯÷7Y°Y²ýjaÚÒ—açûcƒ¢ ü l:ßß>ðŸöçeá xã¨9 ’¯‚ƪªª~Ê^ Ö|>ï‡ïDN m794 OK¥ÎÏ,YöѶ=Cðä°Ý³Âu½ýtÎ\Îû,IjJÄfÆyUK¥ÒÄzY&‰PšUÐÁåº.²Ùlh¬I’äç{Ó1®ë†Ž!g¦/ƒu}}‹‹‹¸õÖ[§Ê@mƲ¬Ñ¯LŒRé(—‡?Aƒ¢“ñB«C-.¢&‚ILmÎgšá®7d¨¿¦È 0 j×mhY”ßåÕÍf‘ûC ¸æ¡k»àÄ œ³K«Þ™¦‰ÕÕÕÑïã)¨HAù…'Âùü<¡å×õôÒn¢+Ë€zþgÅ@!3XµQ¢Y\åKÏÀ)oņçÑuãÆ‹QRe8í‹2YVSø¯¢@zø¤o çó­žÏ Á¬¹F¥Ð¢“$®ë¶ªº-ê†á÷ùȸögåèõªfÙ×4\üw7ÜË„ ’üår®ë"“É´¬¤°4C-b´Í_ ¢ª* …BÏ!!«iFÔKS¨÷Ø¿§\nÛA´ÄªÉNèxË´Ý C~«¢@®oç^°]ò]UýF¬”Ï*IhT± ö |‘ÿëê«ÁŒÛ¶Q(|c“rRMªØø…0¢ŠÏȲÜWj‰¢(ÑÞX×mHÔ¯knÎÛç8í³MÓ ÃÍfÛ†s¬qÕÏ#Ô£IQ:GÜ´C’F—ªÂŒœfïÃ0ƒaš& …BèouuÅbÑOáÈd2X]]º°Ü8é»JðÒÒ–——QmS´†ú6M*†5òMk´£I Ý<¦@#·¨ €¦ ¸_úUèé; Q”ƒÈþÜ£(ýð¿@zígZCÍHù|oS‹Ç6h½D(è-´-ýúà•WâÜ/y‡¿ðø¡ü:*è3 g}î™·n'‚vù¥J¡†…‡QUØ…hTíDË{©i?’V–¡<öƒ°?öóPÕs›¾³èÏRžuÆm?p¶'Ê> Ë€e¡¸ý&¿+Óo¼¢µGmO“„m±ÍÿMû0IŠ¥§±Éá}c¼lÛÆêêjè¹ ëºïeu]·ëB§®ëmÛ 4£i\× ‡;N£òVP.(•R0\× Ï ¦dضgÔªª'Ì® ÷]7#û‰· üwËÜŠdʱm¶mïH>4ÃL+äIUU54׋I’„b±8Ú”BÀ9¬‡¬¬¬DîŸtƒu,UÉÐêäb*åmwŠ^ #‘k)B¡i÷vEP¹Ò=ŸàéIö†öÓ߀ú÷oo¸Zƒß3UZ"«¦YñSUï;ê•.öÖÏÿ<¾rá…¸fç¾íØ¡Ül¯øËÎ~v&Ó›ÁJÔ×·ˆÙi™Lg£UyÆýPfÿÀvÿ8UE>ï;+„eX}Õc‘ÿ—ûœú,Ud+x½ô¢gC¹£ àiÞ†`A˜¹9ˆÛèiž~å— TïÃææ<ã´ÓØ#OY»ðÑ…p˜Ñ „@&“ªª‘Š>yMgggCýëÚ!IRφ¡$I(ÏE^Ô¨œÒf¡˜ñ`5]Ijô—àÙ*Je@VÙ+:ÍØ¶l6‹ò(S‰faÛ6 À$ICAb†£oƒÕGK• Xô¤¶m~b'Å´Ó‡Û¶§TôR•†b%©¿KÁ¤¹vP3<"Ÿ‡=›‡úÖ«ÛÚ LZÄ—Ÿú(´G?å¿_ÛnVäK¬'+%Õ„ÕÈU’eÏ@%ë…òQ»ý8¼b5ÁÜlúÚGÁÜß¼ê7xцQ?£ûÕ‡ UK°¥7w­áBHw} â¼_îùœËâö;0÷7{[ö…ÎußÏ=/´Zlär­5Ðä·¾î+•ueÐõ.à7ÌÈü»"þ @„\ærpÿçGaœY‚ë«O6·lWS”F¿šN_+˜±B9IŒÐ±­¤S…^Š·¬þ<é~¢ô6çŠi6Ä£C§›B¡àë4 Ì0½C•|©b{såwfçé+‡u’ *©AE=ø˲ÆS•±—óQ7õnd³á÷DQ(xÊN·ð3!B ùW¢pä)˜›ëÞ›²PðöËW¿â‚}~åßÐ=ÓýPéß A% Ëtƒ'‹¡æÄrPÌ<ÕÿmäŸ|+ò˜Í·ò]Ÿî(’¶½ÝŸqåeçÃýÆ#•.ú Ôsþ­£œšOµŒ½b±awÚ6 œù·ÆNY†þÿží‹-U5–ÿ×;ÂùÒãÛÞ¦AùÊQH[¸òw?×°¨÷M78L36¨Q»WåZEñŠ‹EÊ~<êm¥MÓûÛØ`cuÚ™››àµ_bcuL8Ük';ª ã8fgga\×E±XD¹\fc5 d°®­­aqqÑ/½¼¸¸ˆµµµ¸ï¥#šÖ°‰¶[ÁqËþРHÛ½.÷×à:Á-„gh’î =ûK‘mŽèKš é^Ør[,"œE§0v*çKY™DB¹ª”ë777‡b±8•…ˆ†iÅL…íZl¬Žûÿ;”?þO°Þur$96îµK+7Ç}[»Z¨4 …B¦irDB‚é+‡µZ­bee%ÔåG¡ˆÌÙÿ ÷ƒÇŸkÛ ½SõZEŒÈçe§éä¬L8 ÞûÁ¡O÷ãà‹_ô_KO¹¸çžð1. ¿ágxbÕN'×ê“-;å¾äçak‹a…AQ°ú¸ƒÉM²!Ù`ÛÈI¤7ßýA”¸µÅ½Ë) (µk4(¾e¬Z–ÁbÛž‘:;ë ÆÕÕ‘[•l¬î.LÓ„ªªÜrcœ[kØ{ÞCpo¿?ú* (¾´M% ÐÞÿ‹@>LÆ«ÿáét+¨¹ÝñÁ|æ_@Óã’ÿæ|3î¯d*¡~ÄÔ?›ÕÉ¢/ƒu~~©T …B!dœV«U?u~~>î{ ÑË‚Mô„¢úëî€õõŸþ‚ÕŽ¥uârÝíê.=ະÉwž’;XURú`õºÿå§è¹. Ýp>ä×ï÷ÞkP–b‘õ]D°:0‘ÿ•{¡žýé¡Å wmÅ×|Î-]ü8ß ·šiŽbo^“¡¶¼~ƒÕàùîõdù…w@¹öÒV¹ÝŽ"ÐÅ þ[£/Ë7ÓÃ0 (Êh«? h [  iž!K5ÊeoñdÄòZ(p˜Ý†eYñUµžl("K“˜¿óEhOß@ö7ߺ–O=±›ŒÛnz\)  ë(¯ ‹ÛµB‚ÅC …hGÁvEpë£çz‘rð3°~iÜßÖTaÛ6fggýy]×§×X¥–—Í ½Û½~ÎÓwVÃ0 ë:8ÚžJ¥päÈ‘¿n4·¶í–UvmåZÌ=ò ½8-DcV“¤Ö «SÎj3²Ö…¶ +mß”R «7xóãÆPüç!¾QáÌÔÓ âþ³ ~ˆaSµ›äY¹´ |¦ÀëgÚœv ÓDá³P\¹Ôßd€òµè^¥r. Þ hMÃìËSØ<øÇ …‚U¦ Žãø6†Æu ‘Á(–vÈro•ãĶÕ€™ÝmÛ$‰½«Qá·†0S‚BÆ{Tø‘® é(ò(-û|¶‹|äóÞ#*èUmÑ-·ŸAªºý‘nÞ;ÙvMÌÍE6@F*×Ì`õ`q1î/n:B ŸÏcuuuú‹)Ùv£ J0b“BÚÉö{0 Ož77wTßê;$8Nã–[nÁ‘#Gü*ÁGŽÁ-·Ü‚t:½cÞ+M­F[ˆò>AHs³Pf¾9|*A³Ú‡Áêº^ÄX>˜ÿã Ìýï @½ùŜœ¡ÞoY«ù2SK”± òß…óø«aþáC¿ðß/ó˜ý ª çä“ý—®Û*vÖÍ ½Îå€Ü½Ö¶¯ ·ýÊS.ýmçx¡ÉãhIÅL¤À”J¥ÑäúQˆ€a´ö›ÞaÇ{^ôR ™ Ø^Ñ0áy7eyæ¿¥±±ÑÐ݆¢é-°’ë;¢ p]˜®ê§¦Q­¹ƒÈgÝ÷âýçV±è)gÏcËâBòã"ŸÏ#—ËícµPðD4-bCÝ8ÊåP§„ n ýÑÊa­V«˜™™ÁÒÒ–––ümI¤›Œ:ŽÓª´8Žgü]øHÏz_4¨Í«ÍhO)m"—Ü{·˜yjôgt›íl›‹ÈìRlÛŽž¨z¸öZˆÎêüÎ=)(×B€UÊKÏó_ær@ÑÉ4ö+ äsª~ïc?ac£ýäGáìmÈåùË7³†ÎôU¹ï+XonÎdZ÷©jCRDã¢_ÇÓEŠE.P½›°·õ…‘†¶OB@üܯ@\ŠE¨ª÷˜‘eO¯*¼ê @¡ësûB‹¥Ràq#ÄþLÓÛ®iÈÞnÎ åÞ‚=cÛý†Ñh1¨ª­ë­ÿ÷÷78ñ˜¦ !DòCåGªKÏ!Eñ–ÕRÊ#¤”©AŒÖlÖû7Ÿo´ÎDçäá¶MßëÚÚ8€ååeÛÑ£GqàÀ¬­­ |!ã¢[Á !D´B/IÐ9ÖèYŠFñǾŒØfÕ/C8ióç»®×sÒn©)Û: ¶+Îtð.qNß®ÅqœEƶéì \Z…{gßY!ʯ{_˶“©9&XQ ìùs9@zú`öS~#ò–¸¹"ÄS_MkºŠâ›Ýôýœ“‡ò²ó›/ˇê`Ñcög~æSq‹I>ŸÇÞ½{‘Édü\÷£(:4 u=úœý­~…Wx`v¶s†nØ6°w/Ïã±§O÷ýö¾ ÖZ­†ååe? ˜8rä–––pøðáÄzZÛ2¹½ ßùCy@”Ã`šÞoÝu¡ j N–oŒêɪª€ëBˆ£Ë„è/–ÙU´ ~¤é9—&žá¶£BçÞ~i@æÏhÙ¯œ_m7V¬»Â83–eA4MÒU.Ú꘼ø…Bc¬Q7œzÝûÛÜl8pi(Ðâ)°”Ë݅뺰m{w{W£Ò¬Úñ?Àæ÷gÚ>>Š|ä#¿ý¦öÝ ²ÙÆÐ¶«oŸ¯ÛWN-¶ÃÎŽb±³Cå9Ïù÷˜¿ØÉÃØ6ж¶¶P.—'£Å“$y®ÓloTFÃÃI„âÚ#ÈåÚ‡ÑÚÉûFap¤¿‹Þ9‡Y3 ïA&ËxÞ¡CxÆí·÷õö¾ ÖJ¥‚Z­æ‡9tèR©”ß«5)DEpq]·mHp3ž[.ê3¿ƒ9y«³'ßu[5ˆ`¯½(CSQÇ" ¨v!œ45›g˜&Ú«’È—=äÉâÞ½mßßKÄGá[¿ÕÖ{ŸË¹§}ªEÆ5U`#gBq‡·g˜ö†là!oš&Eé/—É4ᯮŒJ5 m é]:EÝPà$±±º1Msº½«ŽƒÂ‹?¹9´„ˆ[¿ói†lF`nÎÓõöîæò^Û2ééOèzzõÏl|¦ií‡}±ØÛÚU°I„i§ë3¡Bz}GÏÄ MÞÍy§Ah±”ƀ붷 Ò©ò=D‚l/öð¶Ù¶ˆZ´¥‚NÝ0MOWTU@×ñ••Üõìg÷õU ”ÃÚŽÓ¸xãDѾªÞöv! Áß.÷è{±9û þ*-Ft%hD´ñ€$Áø|îËy¯»)J.É´¡]þª¢¹ÔÇY†óíhƒÕu-óÈRg¡v.yuÄF°mH® ùÕ²ÜÛdË0C"Ë2òù<„0 £e†V„Gµ]µ¬Æs›†gT»§n( gzìv¢zlO¦ ëÛsP ðéW7ž†Ì« ÷E¿ŠâªŒ o˜nm¶°‹@”ÚEúánv„“`%àÄ{TÛÑœwµŸÜôÙìø*ëQheõf[!ÝÍ¢¢<ÛüøÂ ñÈùç£úJ`£>¬‡ÆÒÒff¼b+* q•‚;éíã}€üB ¡s6êP ïµ*»°]:' ýû)È$È\Af×’oG’”ùQV,ev†$Ë6á8r1»í„ðêHÎg°ñ{UHê áTÆë>]÷”ÛnŸn]÷÷PåC’¼çN±È‘fãdd¼TplG±¬†±J y £wƒÕ²¦Ë£·µQU7Þx#Ž;†……T*|ø0R©”¿¿R©øUˆûe ƒ5ؽ™æîÏÍ4“ŒÖ íÆ=«ÓÁNÊw;„m=LFé …E iBúEåO¬ò³ §ì%ù[àüäg‰ôl°îJ’ ãípg¸•wÛ:Ž—”sfòH²l”Ã'†(GI6 ½ù†Ècì'_÷‚axj•ãÀss!=ÊüÔÓûËCïe]h|L‚Œw²¬_°QÕ†±J¯û¹†\Ž“™‡¤/ƒu}}½åõáÇaÛ6öïßcÇŽumk³¾¾ŽZ­†¥¥%ÀÌÌ :„jµŠJ¥‚jµŠõõu:tÈß¿°°0òjx¶mGOøÍI¤ÍZG“5D±¹O’¼]ôqÅb´2Ó\0Œ™<’"ßP÷~Þ_tîIÁüx#ÜG.r¯ù“²üÔG¡<éNïXP޽%‰™¤Ëx°àÒ@XÖP 1äybƒuòHºl]úÇï´ÖYTmèßYìâ¨å$Ì}ð÷ẞ®_,PU8o»Ù[Êçá¾ûÀÞ½P®I^”idd¼±xWèîý^/ðEOk¥RÁ°¸¸èo³m‹‹‹X__G¹\Æââ"ÖÖÖºž+N·´¿9yò$o`Ðÿƒ†o:öCú¡“ÂÐqÒn2XƒBÚ­²Sš-·l°N>;)ßíèä]m9öáóàV•€¿ùB8Ä%—Cñ÷ïÀŒGd¼C…»nëß'™Lx’™’.Û@¼!ï³OªÁúÀÞ²’Ïwm‘{óùXýÐY-‡YŸMaÖõ¼UÒÒAè¿ñýXîg72 2Þ‰X¼«ý@Ï®¶7rz2Xu]G:ƱcÇüm”»zìØ1ÜrË-XZZê)d`ffóóóþëjµŠååe,,, •JuµZ­í¾3gÎàÔ©S!/p©Ô>¼¼çUø Ö¥±;Î@‚ÌëÎP­VqâÄ Üwß}#?÷¸ä|ðA¬¯¯w}ptRh\ÀéÓþkégžçë÷_gßýò°².ËÞŠ8ËæD±¾¾Žûï¿,çÞÉ9|†*¸4dŠ|Þ{æpªÒx©T*8qâNæ²Q„ù»m£ÃÆ„i6Öí7Å>×k_±ºÚÓC!JÍ*ÍMï?ÒÖ&Ô?}y×óì&hþî&Sƒôù; Ó4‘Ïç1;;;¾vN¦ÙZ³¦Sú_Ôû3ï!@íkòùÑ_ç@:x¿ówWƒ•&Øo¼Ñ_q©Õj¨T*¡v6 ¨Õj= k­VÃòò28€ùùyÜxãþövtº¹3gÎàî»ïÆñãÇ{ú|·WÔ¶ÃKÁ ¨QZ»¢´ yXV«ó6Øg—'Ož›ÁJŒZ¾Oá9~ü¸¿ÚÑ!´Àu%Õ¨ Ü,ÎÚYnÙh޼˲99?~|,ÊN¸çð(Fâ]Ý^½w/ïÎ0Zƒhšu×õŽ‚‹fïã2X‰8çïnôT0²O‚FiÒ½}r9/¶·\Ý &‡I¶@ó÷¸äHæü…†a —Ëass¥AÛht‹„to!†&vÓl_p&ŠàØ(¡òù|†ï.€t𑬕Jóóó¡ð{{ z(ûéÃZ©T°¸¸ˆJ¥‚cÇŽù±ô:æÀv*­}ñÅCQ”йڵIêZ° 8ÿe°J’7úô²ªjk‹2^•ßæççqÝu×áYÏzÖXÎ?ù€K.¹KKK¡ÕÑ(úVÚzÈû×uQœûPËnªË¤>òñ±|_ÌèYZZÂW\1¶óïÔÞ/=\ŠRD¶½«BxÏŽl¶1Ý ÀìlãPIj¨@Cáç–d;î»î:\~ùå#?wÜów7ÆLÃ!h´ þõËXUŒ°M)˼z9fhþW;™¤ÎßQX–MÓ†—yÃ謧—Jž‘™Í6ŒÕaóvé0€Skš!¼ßù»«Á:33Ó²âB}Xû1RƒPˆñ‘#GZǾ}û –P­V¸í5^¡l×sI–û[‰A«cÖ¶Y¦§…’ïN´ ‰Tø4rÏù÷ÆU…óÃçœï\©ª@ñ÷ï…rûûbù>™ä‘Âqœö©B4´\†á+à=,¶½«´¸¹é-"꺧Çln6N¥iÞ6fÅ¢÷Ç΢É'©²MŒ:Ò1l(û²74^òeÞP1ÿ²†Ò“߯•M§¤ËxÇqFSdI×»ëéŠâMì…Bx‚†R‰ÃnFDWƒ5N£R©øíjjµlÛnY%¤P`övضjµ UU±¾¾ú¼œùùùP'Û¶‘Édú¾¹v«Ýs@§³FíS”Þ{1m£iaƒº]H3Yì¤|·£Ó ¼ôøA~þ¡mthö·f¢åØ4‘ûÂ"¤=;æo—IIñN×Öb° á¹G³YÏ(•e Tò‹›&¼y[©Ð4Ö/v+I–mú¬Q¬Ö…º0M䇿Ø6òúï†ô»#÷ÅìI—ñ¨ëIDÁv=Ž®ÑŠlmqÑŽòØn¤ÓiÌÏÏcqq ~IlR jµŽ?ŽÃ‡c~~¾ë* ¾ÁŠÃ„³íb\ZZò+×j5ÌÌÌà†n@¿t*¸ÔqŦ ¶³€l¡ Ë:Nª:ði˜±“òÝŽNU‚í^ùÕÏFPå¡6Kmõ YöVW:´/`vIñ(,ËŠö®FdL3º¯oÇàæ˜uªöÕ¦Ç*€-MEiÔøHr•n¦wvR¾Á¾íñPÕLJ VEñìѶ Ф ñŠ#ƒäÊxd8°ãx“ìÆFËñ¤§hZ#•E|w“TÙ&º.¶w}#ÚÜwòÅO‡$m;ÊâW°úÓ°±:¥$]ƃDFÌô‹i6tòÍÍù‘6ÌäГÁJFj ~9ìQÒ\z»_ÇSÂGbŽ)V—<[ÌîcXùnG·¬Q=€í÷~öžÙ~Ý…&y.´ÁôÁ¸d¼–eµ.šÒbcÁÅLš‡;F0L€–mÂqœÁ«¤ÂÁEëC}ÅCÇHoÞyO“<â’ñ ÃÊ;O胎¨bqè^ÛL<ôd°^Ž*•«Þ¿¿/Èq%bw£Ýjyß!5ŠÒwA¥~àELf”t« µË¾ék0O<Åß à5Ño¶ZÃŒ‘¶rî%Dkô$ñ\ÌL?¶í ÷ŸóO÷B»û&HÿçÿÆ}Y ÓÂHÂß©•Mpr—$6V'”ž Öååe¬¬¬ø¯WVVpèС‘—¯Þ ú’ÔX~g†I8¢‹ËÞ²ZçêÜÕ_‡uÂ…ôòçÇ}ù 3ýä:q¯TfEñêÌ'ö>…7>‚òÁ_dµ†I$#É_m×Ùƒ™HºV <uaaŽãÀq,,,`eeeìéÇëºÛD5vœ¾û¬2Ltó°n¸;r{nï‡8’™X¨W_/P„;ÃLìŽ(óÚÖHOÊ_z2¤?kÜ·Å0-ض Û¶ÏמõÚ—™&{S§ˆ®+•º>tè¿<«”DÚõ4íèamg˜* Wã`&†v9¬BÒÙ´l—Ÿ^‡þÚÍn§e˜DB‹4R®"ªmÀ0“FÇÃ=`¯nAÝóÏþkYæ 1&y¸®‹|>R©ÔÓœÉæ¦—󡪬»O=yXp®êÌÌLÜ×Ý!¢WÑ]×í?.žŸaN§ÛÔ³þ¥uGDˆ™"e~n®¥¢ëz­X9€™DBºK¡àýõ±ú"¾x'Ô—ÿ0îÛ`˜¶!ÍfQ*•†ï½ª(œû1eôl°NQEM…Ulj^räž3Ì„ÐIÆß%}¦u‡$±ÏL,-id¨ær!w•™Jv•ÁÚµà’ã°'•™h:]²>~>´—7îKd˜‘Ò¢Ì[VË–(¡wÜõç~÷m0Lï˜ft¨d;¸…ÍÔÓÕ`M¥RÙo5JAqz»â2¶ÍZ 3ñ´«ª'A@}îÝ}ža’MËŠ¼m·´1PNÑf&—–È™ Á*ËT&Û†ûãƒØXõ†ålçr^þ*þûW™äÒÒ¶É4½È^ B6EÖ0ÓÇÔæ°Fk  Âù«Ì„cÛv[ù6ßû(OæÌÔaÛvX™È_íg‘ža’F‹"O!Á@#U¸·}¹7Ÿïo¦ÎÚ·M`˜„ТŸ ѽ ¶^‚ˆÈfúè9‡•X__ÇñãÇý×û÷ïÇüü|Ü÷Ñ{:qùHf ˆÊÿ°mÀùÎÅÀ²ÂÂL!ï“ë¶,<Ú¶§Û³ÁÊL*m+JÁB@þW·Oß[]û6¦#¡‚KÞÛ\TÌ0¼iñ]’¼ˆšÙÙ–ÈfúèÙ`µm7ÝtªÕjhûÊÊ R© ÃHT^k&ÓšÃÚÑ`ec•™RhNg˜iÃqœFnŸ$µLú¦ÉÌdÓ’ÊÔ,КÈ2Uk1̤’sJÑ àMèám¹œ7÷wÒ §àõõu躎}ûöÁ0 8Žãÿ†}ûöáàÁƒX__û~Úâ8Ngƒ•a&œvE—…£Á˜]@D?aUåµHf²i •Ìå¼65b»HðvþS6÷•2Ìà„ä\ˆÖ‰›ŒØ(=^ÓXÉÙôäa=|ø0TU…AîøªªBUU躎Çã–[n‰ûžÕвSˆ™še\ˆÈ:4 3ñtmSîpÀL6-áÀð¢"]· \*y¯YÖ™I¥¥Ú{³gðòYY‘ÙÕtõ°®¯¯£Z­âMozSÇã:„jµŠJ¥÷=ÁqZaD”|ÃL8®ë¶l³mÀù›/xÿa˜)¢“Áê8^k3†™d¢d\Ï©šËyÓ:Õ`âÐwfRéÚ•Š*±Áº«éj°V*¤R)¤R©ŽÇQþj­V‹ûž´ÙˆZ©ÜÞÑHäf˜ &Jyw@9õQ®:ÃL~‘*¶ïŸLÆ3V9BŒ™tB…h¶) òºî©/¼ÉL2-ÕÞƒ¸.Oè € Ö™™œ>}:îë ‘9¬Ü•™blPOÿ#¬ÌÔá{ŸLpÑè=Y.³~ÃL>Á–6Öʃ°^ò®Ð~ò²r43Étô°Ê²Ì1ﻞ®k:F­Vk©Ü \𙙉ûž"‰ —ÀýW™© ª¨˜j5H¿ð¸/aFŽŸ÷´Ûd\{ƒ™>h^7òÜÞ–ýÅ"GJ2“MÈ`5ÍÖ¶¶xÑéÍ`M§ÓÐu½m¸o­VÃòò2æççÑÚFUýÂy>Bˆ–Ðê 3ÁD³m@}âí,ßÌÔáË»išÛ‘`Û­ó>ÃL*AïªëâÔCPÞü²–ã4óW™)"Ê`eôØÖæÆoDµZÅÁƒ±¶¶æ{[«Õ*ÖÖÖpðàAT«U,--Å}?m‰,ºÄáÀÌã8€ò½O²Ë‰™:üš¦ ärP`u5î«b˜Ñ,¸d¾óÈ=‰S;˜éÃ4͆wÕqXÆ™¶ôÔÖ&NãÈ‘#X^^ÆáÇ[öÏÏÏcii)ÞÕv°ÁÊL3QE ”+„:sÙíÄLŽã }æŒß{U÷g¦ ×u¡mÇúZ°±tAÜ—Ä0#Åq†r¹LØ`eÚÒ“Á 4ŒVj]S©TüpáT*…Z­†õõuÌÏÏÇ}O‘DV sNäf¦†f×.øðÊýq_ÃŒ!n7Ÿ¤’HÀLŽã T*Á4õ¬ô{¿÷%1ÌÈpÙl««« ÝŶ9¾iKO!ÁAR©TUÅÒÒTUõÛÝT*,..Æ}?‘8íú¬R3†™pš#°¿=ËZ<3•8ŽƒË¾úU@Ó¸n^Žã “É„þ ÛedÇñÿßL&“ñÿoYæææ°gÏÌÎÎÂä¼²%8Ÿ;ÿt/r/8Î!ÌTAƪL½V9ê‘iCÏÖIF]2›“ž˜)¡¹,¼W1õ*€yf 9¿RÁYðZØ0 ÈØ¡0;×u‘Éd i„mpíífž®ë"ŸÏccc²,C¹¹9(ŠÒ¾õ3RÇñ‹D–žù—¼°ÎLTƒ 4ŸlW{g˜vôía5Ô'ŠJ¥ÒµN/D毼’ÃŒo"ØÖFUyîgv†”qÀ›ÏϤÓ\¸GdYö Ï^ vA¦'IJ¥RÏïŸ&vZ¶ ªIàºàÉœ+qȸߒ,Èv=†iG¬ÖjµŠÅÅÅ–ßjµ ]×Q©TªªÂ0Œ?'XmavŠ’oÀ“q!¼MãyŸÙvRÆ ¿B0<G–9$¸×u[BUUõ½¨ ÏÞìì,4MƒªªÑ-ᦜ8d›Bà’KžlØØØ}ß=³3Ä%ã‘:ùv=†iG,+nZYY‰Üøða¤R);v µZ ÄòòòPmsØ`evŠ8ä[Q˜¦—‹ñ̸‰CÆ Çqüh‚¤Ôèh—ºSôbTöb¬p–e¡P(Àu]”Ëå]§l®ëâñ¿šçrf,Ä-ãQ}ã9·ƒéFWƒµR©`yy¹ë‰jµZϺ¾¾ÞöáY­V±¾¾ŽcÇŽfff°°°€µµµK(”I/Vž]P̘Øiù&„Øö4 Þje4yf*‰KÆo>'ã,)5:âö@6+²,£™¶, ¦i"×Ãs~WUU}µP(À4M”J¥Xïs'ˆK¶]·¡š¸®Ë•¯™±çü |³p3ý12ëÌÌLÏ-m°°°€õõõ–ÊÂ'Ož€PO×t:=T}hpd³ì‚bÆÊNË·mÛ$ Žè¯ü‚çvÚØˆûk`¦˜–ñ Žãàm—^ WVA–t嫟´!LÓl¹'i—T©K¶-«ñ÷È#7A|ûžPçêÀÌȉsþ:tî`˜t5X©ÿêNÑiPÔj5ÌÌÌDî;sæ N:Õ¹¬ax1òì]ÝÕT«Uœ8q÷Ýw_,ŸÝŽNò >ø Ö××±oß>¿!Ë2, þà·€]àa:³¾¾Žûï¿?–ÏëOÙ9ßqà¤Þœïj±m{öìñ_«ªŠÕÕU8޹¯ÇÓ4 ¶mcvvÖ7r…¡câ¦R©àĉ8}úôŽ~î¸æoÀ‹ñœâ6þôOÜwÝ %uGÊìBhþî&Sã`Üówۮ̮tð~çïĵµéZ|úô鎃åî»ïÆñãÇ[‹ëºÞL“=O Nž<‰'NàØñÏT¾Oá9~ü8„!~ø<ÈçT½2~ìzŽ?ÞWšÆ(ÇN•¤„' UUQ¯×ûÞÜ^*•àº.\×…$I‰S0É`ÝiÆ1Žã­¥Û¶‹—¼Ä…bÛÜzo—Bów7™㜿pÑ<^ W|ßUÞ/] V ºðkµ*•JÏ!Àý Ch&j’'.¾øb(Šcï‡BÉ2‡×0¾Ü’ò°“ *ßpÉ%—DÊ·ã8سçeP.ú'=1€¥¥%¿ÂãN3Ž9œpמspþù^<;ŸÆF°µMÒXXX@*•Úñ9|ó7Q,z*Šëºxù¾}ÞÆ„~ÿÌx¡ù»›LƒqÎß@Àä½ðR˜˜]Å :ø@}X=Ú÷>*ömOÔÁ°„jµ:ðÀõ‡móäÏÄΨå›xÖ³žõ‡Ÿ`gbg\2xëãÿx@–Q.óú#³³ŒS¶Y…^|óÍxñÍ7s­ &Æ)ã@SMËb9gzf ƒuœ¤R)ÌÏÏcmmÍßfÛ62™Ì@çs]× ?pVæ™Øµ|^˜ä3ŸYƒúƒ±Œ3±3'„xÅ…²¥ÊĸdÛuûO? Ïž>$]çZL,Œsþš ÀQãx†éÄå°^8Äââ"Ö××ý$ïn¸a¸“ú=?&^F-ß®ëâ^ÄÆ*“Æ2‡ÃSv ³³˜ýŸoÂ&‡310ÙvÀ©¤ ÎáC¶ ƒu&FÆ5¬Bx¬·0=«Á:??YÞ:Nãæ›oös°†É•¥–$®ÐÁì(;!ßÄý¯Çu›É©âÉìvRÆÏ`Ý„õÅ8/îÛg¦˜”mIÔß\ÉMlÞ03}ìôü²؉ÄôEâB‚ êë:Š¢(Š^ÃadLB¥|;ŽÃÅg˜Ä1J'E¥_÷í%!òù<öîÝ‹={ö ›Í޼÷¡ã8Èd2¡¿B¡Ú|ÝŒëºÈd2°,+´Ý²,ÌÍÍaÏž=˜…išq-ŒZ¶UÕûóS˜&fÆ1s803 ={XƒE–¨ìuTá¥ìÙÚ Bˆ¸/aÆŠ¢(œîÄL=¶mãâ‹÷ÿp8îKI<™Lªªbkk @#­\.Ì ¢g+õg%TÓ4(Š!DG#Ù4M!`Y´mÅÕu]äóylll@–e!077ç-TL±!G-šÇñ"Âf ñà îKÆôMWƒuß¾}8tèPËö±„ŒPE2†™Bzè2ÁÌtãº.n»í×ẜöÔ Ë² IŠÞ†ªª¢X,Â0 ¨ªê“Bär9ß`´,Ë÷hÒvÇqà8\×õ{(#ú&Rœ^‰-ËB¹\ÆÜÜ\× …¥É #IJ¥ÒÔ/ÈõÈò` M]>‹Bƒ^T!„¿rQ Ê+#, …n^ãBU@Û`YdYöC ŸÏûÛ£ Þi.D$ >òq8î3¦ú>™ÝMíÖ[¹È3=¬‡†ªªþj(‘N§1??……躎£G&*ßU_8ë,$ÌÔrêÔ¹¸ê‡'ùåq_ ÃŒ Ïó&CÞúŸ®Šûr$I(—ËÈf³¾ñH¹‘¹\…BBd2H’×uQ,!IæææÏçý÷‹Eßã…mÛØ³gÿZUU¬®®vÜOy«ATUE¡PÀææ&lÛÆìì¬íBˆq;m8ÿþr2à 1µaÏ ×Å×\÷U0LWƒu}}µZ ozÓ›:÷¦7½ ÀÂÂR©TÜ÷ÀSr.þÁ€§=-îKa˜±p×]gãmg} Å}) 36¾ô¥»ðÓ?mwT¼« DQlnnú•€E yïTUõ½­Ao&åÒ9èØà1Aã±^¯·½UU;î¢išoÄ–J%?LûOž<™(ƒõåçžË 3µüäÌÈÏŒû*f¼ÜvÛ'Q˜= |ÎûR&ŠNÆ^°}L¯ïÙ)Ú]Û4¢?ñ}€,õíÏ3ÃLŽƒã>ŠXg† «Á:33ƒÓ§O·l?räHÜ×Þ—Üu‡3S˵ý9ä—.Ä} 3>\/úÀ×ð¢gÜLqhèNÁmS’ƒ€øÎc «^Ú$,0ÌÈqÔ󘸯‚™pÎêv@:F­VÃúúzÇãhÐë7ŽãàÂï?y%f!~è2®ÌL7ù<~ãÞ#xÜG>²>v“÷2é`ó©ÆL5Õû7<ú‚Ä}̄ӓÁšN§±¼¼ŒZ­yL­VÃòò2æçç‘N§ã¾§gÿê¯Æ} 3þú¯¿÷Ÿó6€ÃȘ)硳>ÌT™©D]ÍõT]™a&•}ýëxäe/‹û2˜ §§*Á7Þx#qðàA,,,„ŒÒãÇcmmÍ?.I8ŽÃ!dÌTsÉ3îçfº)—ñÒLÀïÅ}% 3R¨…­i²ÁÊL/¸ürü"G0CÒ“ÁšN§qìØ1¬¬¬`yy¹eÿÂÂ:”˜bKçƒ0ÓÌÖÖ*®¹†UfºùÀ>ÃÊü¸®‹|>\.*æ“ÉdBÇI’]ס( ÇA¡P€®ë¡Šž¶mÃ0 ‹E(ŠÒrEQP,á8,Ëò{¹2í¡öð’„–jÍ 3M¬œ<‰?b}œ’ž VÀ«|ã7bii •JÅßžN§•·Jx}ûfºáEfšø/ÿåùøÍßä…™~1MBX–2XmÛµ›1MÙl›››BøFgЀ²,Ëo‘Ó|!²Ù, Ãð^¦;¦éý«ë>¹ 3m!xÁ‘ ]sX›™™™Áüü¼ÿ—DcWÜc¦ž÷.¿g?ð@Ü—Á0cò€g<ãvö> €eYX]]…mÛp©+et:ŽÓöyJ½Ry‘¸?P«Gxú +õÌ4Òiî`˜~èÛ`$öá q_ÃŒ§Ö7qõã÷e0ÌØ0ô-ìÝû!.¸Ô'Žã@’$Ȳ MÓ`YVh¿mÛþ_>Ÿ‡®ë¡ýªªúïiö¶FömVJûĹíGP*ÇØÅL5ŸùØÇxn`FBÏ!Á“Æç?ô!\çq_Ì۞þ£¯ò+⾆ æÿ8õ‚ÏÁùîç (ËßpŒ4¥t† º…‚çU+½Öà¦éy£Ð4 —óŽ/¼ã)-4ê³èx²,(ŠÛ¶!I,Ë ¥¶mûÿ GÕ4 †a@Ó4ض\.×âu5 Ãÿ.—C.— —iãÊ%^xö@1S‹ëâ—>ð<áMoŠûJ|„ðþxhò˜Zƒuï~€ýÔOÅ} 3ØWßâY—™ZÌ#?Æû^þ9üÁ÷“ï]í¥}s¢\.ldF¡(­çîå³LÓ„¢(¾Qéºn(ì4X‰ *V ¦œÖ(ƒªÌøƶå1_TÕ÷†3ÌÔaY°¼uLzŠž.Ô¼NlÒ¼PhYÞŸëzê“,{…ÏTÕÛÏC1¹L­Áú¤/~½êUq_ÃŒ…{î¸Ï;ÿ³q_Ì۔ Nàôc¾ÃÞ§>±, ²,‡ Ê|>ßâe톦iÈçóœ?<ЫGõfÛæï˜™J„mãǯxEßïsœpmzPt Mmù|Ãè ÚÄÁ…Àn …¶í¯¶í}&·µO.Sk°JB }íµq_ÃŒ…ÏýóðóÏæ‚KÌt"Ë€~ùðñÇ<†•ù>±m;TðrR©]M3ªª"“É´MÒ4 sssØØØèûó÷ìÙ:?{cÃÈ{%}€’ÝÏBÃLB`Ïç?Ǽõ­‘»É+ªªÔ2Rƒ¨,7 ÖæiduuøËŒz¼X®Idj Ö'>-‰ 3œ¹÷QüPžêšiÌ.F–œøŽ~WÂÏã}Q*•Z¶išæ±Á–6m“eÙ_ P%tlÐèŒ:à§íö1 ŠW­Oð´p×u9$˜™>, Ÿ¾ä>q]ï TËj¨èQ©1_:lˆ˜J™a—a&÷=õWpN:÷e0ÌØøÑWÄ} 3¶5w.¸ÄL-¶¿=}’¤ Ÿ÷ÂyɃª(ž1˜T/æêj#”˜®™‰Ÿé4X߿𸯂aÆFêž{p‡¼3ÓŠmã{<‡3Ó‰ãÛ­ƒXÆ™iäÌW¾‚3éƒÈd<us3¹jô`YÀì¬WÕ‰—©4X?ÿÏÿ Á!6̳úô§s3½8¾ôÀì}b¦“íøG˲ZZ 1ÌÄcYxÏ®Ãç?_Àêj÷jèI&—ó†«ãx^âLس˜›ó¶1;GâsX+• fffJ¥z~ÏÖUWás[[xMÜÏ0]D¾à\€·°2ÏLɸ®ã¿Û6J,ãLÂtgï*3 "ßÎ9/Á_~ÿgðþ¿; EyBÜ·04²ÜšÏ 6 /çU×½¼\Óô¢þ›QïO–½÷›¦çÉ%ƒÞ²Âmuš«ábUA(GWVn>†>»ÙئóÑ1ôYÁsuj³Mï§ ÏÔ"ˆ®©ùû8qâ"œ9sN_¿Cb Öjµ ]×Q©T¶¿ 5Ô¨¼’$ùýæ&‰ #߀WY’a’̰2ìÊ0IcXùî·ÍÃì$ÃÈ·r …+ž1‡ú«0>I K]÷þU hd :Ž÷>2°aé8Þûšófƒ3C"õ™‚zÏšf£òq.ç?h|:ŽgtÓõPþ®m‡«4÷j°’Ñ+IÞ{È>ÎOš,ŸÛ×wžXƒõðáÃH¥R8vìjµ<ˆååe,--u}¯¢(JÆ$šaä¶¶¶â¾†éÈ02î8{Ÿ˜D3Œ| !¸à“h†‘ïÝ^ù:Ê3Úœ¿+Ëa#híÅ ïiî=+˽½¯ù˜^?«Ógëëwáøñû»Ÿ0@"sX«Õ*Ö××qèÐ!ÀÌÌ ¸—3°|3Óΰ2nÛ6{W™Ä2¬|›¦ÙÒ+—a’Â(æo^ŒaFM" Ö“'OÒ¶étÕj5îKÛQ–——㾄‘R©T°¶¶÷eÄË·Ç4ÊôÙAVÆu]ŸŠpÉi“‡i³ƒ0¬|Û¶=ñÅ–¦Q¦m¼ʰòËåPìÅ—p¦M&}Ì&2$¸Ó ¨Õj˜™™iÙ~ß}÷áË_þ2Ž=Š+¯¼2î[ ·Ývöïß÷eŒŒ'Nàĉ}§5÷Þ{/n¿ývÜwß}±|þ òM×}ôèQ\|ñŸøâ‹c¹öQ’y%I³'NœÀ©S§bû|žÃ=’"£")cöÎ;ïÄ׿þu<ôÐC±|þ òœ¿ßñŽwàûßÿ>¾ÿýïÇrý£ )²0J’2^iþ®V«±|¿<{$EFERÆ,éàýÎ߉4XkµZÛ}§OŸŽ,gu¶¶¶pë­·v|ÿ$±wï^?~<î˧OŸ€ØïéÔ©SøÎw¾ƒ‹.º(–ÏD¾à¼óÎí·ÞŠË.» —_~y,×>J’"£$)cÖqüä'?‰íóy÷HŠ<ŒŠ¤ŒÙo|ã¸ï¾ûðœç<'–ÏD¾yþN>I¯4ßsÏ=±<{$EFERÆ,éàýÎ߉4XƒaÍ´¼¯zÕ«ðªW½*îKg˜® "ßpôèѸ/az‚çpfšD¾yþf&ž¿™$’ÈÖ}ûö‡%ÄÁ0£†å›™vXÆ™i†å›™fX¾™$’Hƒ5•Ja~~>”lÛ62™LÜ—Æ0CÃòÍL;,ãÌ4ÃòÍL3,ßLÙS¯×ëq_D•J‹‹‹H¥R~’÷‘#GÚæ÷1Ì$ÁòÍL;,ãÌ4ÃòÍL3,ßLÒH¬Á x‰ß•J0??÷å0ÌHaùf¦–qfšaùf¦–o&I$Ú`e†a†a†av/yûÛßþö¸/bÚY__Çž={Ú†RT*üð‡?ŒÜßi_/ûÇA­VƒëºxÒ“ž4Ð5%ñž˜ÁF¾G±t’ñI¼f8:Éø$ÊÏá ±Ûæïn×”Äûa†ƒçïäßSOÔ™±ñá¸þŠW¼¢~ÕUWÕ¯ºêªúßøÆúý÷ßïï?yòdýúë¯÷÷¿õ­oíi_/ûÇÁý÷ß_ë[ßêæõ×__ÿÚ×¾6²kŽãž˜ÁF¾G±t’ñI¼f8:Éø$ÊÏá ±Ûæïn×”Äûa†ƒçïäßS?$²Jð4P«Õ°¼¼Œ¥¥%8Žã7S^^^ö9|ø0R©”¿¿R©øû;íëeÿ8X^^FµZÅ­·Þ ÇqJ¥°²²Òó5%ñž˜ÁV¾G±t’ñI¼fpºÉø$ÊÏá °;çïn×”Äûa‡çïɸ§¾ˆÛbžVn»í¶úUW]ÚvÓM7ÕßøÆ7Öëuo%㪫® ­Ž¼ÿýï¯ÿÒ/ýRÇ}ÝÞ;.î¿ÿþ–Ïq÷à G'Ÿ6ùÅ5³ŒO»mþîvMI¼f8xþNþ=õ ¬;Àââ¢/ ÇŽ€ŽB°¹¹Ùv_­Vëø^ê—5jNŸ>Ýr/§OŸÆ‘#Gº u/×Ç=1£¡_ùNª|kkk˜ŸŸÇÉ“'C Ò€“¾¸¸ˆT*å'59r333÷u{︠ϤUª~îgû™d1Œ|bÿ8è$ã“x?Ìpt’ñI”žÃb·ÍßÝ®)‰÷à ÏßÉ¿§~`ƒuÌK¦GåIÔj5¨y§}½ìÃ^Saä{ûÇÁ8Ç$Ë÷äÑIÆ'QxgˆÝ6wÛŸÄûa†ƒçïäßS¯°ÁÊ0 Ã0 Ã0 Ã$Îae†a†a†a ¬ Ã0 Ã0 Ã0L"aƒ•a†a†a†I$l°2 Ã0 Ã0 Ã0‰„ V†a†a†a&‘°ÁÊ0 Ã0 Ã0 Ã$6X†a†a†a˜DÂ+Ã0 Ã0 Ã0 “HØ`e†a†a†a ¬ Ã0 Ã0 Ã0L"aƒ•a†a†a†I$l°2 Ã0 Ã0 Ã0‰„ V†a†a†a&‘°ÁÊ0 Ã0 Ã0 Ã$6X†a†a†a˜DÂ+Ã0 Ã0 Ã0 “HØ`MBض÷e0ÌØ`gv3ãSL°Ü1»–õøaƒ5a8ŽƒL&÷e0ÌØ`gv3ãSLåÎ4M¸®÷%1ÌXhžcYÞw6X†a†a˜±,‹xf×Àò¾ó<6î `+5’$AQ”¶ûeY†ªª°m¹\`Û¶¦ iZèý…Bº®Ã4M! ( 4M }ž®ëÏ0½ÒIÆ…¾ÌI’„\.I’`š¦/ó„eYB —˱|3C;ùFö‹Å" È|f!`FË3a†%J–i›eYUU[d”¶ñœÍL íæí^ä]Q”ȹ`¹ö°ÆÌÜÜ …ÏøÌf³¡ý…B†að–l6ëÃ0üã…˜››ƒišþ{i¿Bd³Yd28Žãï§Ïäx†é…N2.„Àì쬿èbY–v#„h‘·aä•囉ƒvò?¬ìçóùÈU~!2™Œ¯1̨覯‰’Qž³™I¡YÂòþ•¯|¥íÜNDz\@‰R©T—$©¾¹¹éoÓ4­N?Ëææf@h¿,ËuUUý}-ç#ÔK¥’ÿZQ”z.—ó_ëº^WUuàã¦Ýd¼\.׃ÓÐÖÖ–/ßÍò¿±±QPßÚÚª×ë,ßLòé$ÿÃʾ®ëþ{é\[[[-rÍ0£ Û\®ªj½\.ûûše”¶ñœÍ$n²^¯w–÷Ns;ËrÝ?ìa×u¡ª*dYö·Q¨/à­Ê4ï×4 €·â#˲_¹,øšVi„Þ+I’’ÐŽ~g˜Nt“qÚžÏçaÛ6$IB¹\†,ËeŠ¢ø–e…Âj‚ïX¾™äÑIþ‡•ý`¸0A«ôôœ`˜QÑm."JFyÎf’Î ²4ä½ÓÜN°\÷¬1By©íBt|/å)ÿ¢ Ýd\–e”Ëe@6›Åž={Ïçýý𦅔v–of’è$ÿã}EQ ëzè< 3 ºÍå 3- +ëÝævf0Ø`Y–CÞP l¤J’ÔÖhUÅ_µ¡¿ÕÕUèºÎyKLbè&ãT` T*akk °,ËÏÅÖ4 ŽãÀ0 !ØsÄLä²_*•übœÅŒ’ns9ÃL ÃÊz·¹ 6XcDÓ4Ø¶í¯¢“Ç4¸ßq?q›*J^èAð5ÐHúf˜¤ÐMÆ©= dYn ùÕ4 †aô’Ã0I¢“üKö%Iò«UrÛfTt›ËiÃL:½È:m¢ÛÜÎ ¬1Bá[Ùlsss˜ …}ɲŒR©„L&ƒL&ƒ¹¹9?)%…Bsss~…àÕÕÕ¸o‹a|ºÉ8ååÍÎÎ"“É`vvÖWÔ Zœaï*3it’ÿqʾ¦iÐ40™‘Ñm.—$ …B½HÌÄÓMÖÎòÞËÜÎôÏžz½^û"v;®ë¶íÑç8dYöcê©O¦Á"K&Ì0I£ŒEµß4M†ÍÍ͸oƒa¢“ü³ì3“D/ú çº2Ó@§y»yï4·3ýÃk‚q]³³³ØØØ€¢(p]™Lº®sx$3õP8ÍÜÜr¹7Òfv ,û Ã0 Ó€C‚L0$xÏž=˜››ƒ¦il¬2»Çq°wï^ȲÌ2Ïì*Xö†a¦{X†a†a†a˜Dòظ/`TT«U|á _ÀÅ_÷¥ŒŒ;ï¼OyÊS⾌‘qæÌœ9s&¿Ñ™3gpÖYgáꫯŽûRzæŸøžøÄ'Æ}##Iò0*’4fï¼óΉ*òÀsxòIÒ˜=sæ fff&&7Œçï䓤ñzçw╯|%fffâ¾”žàù;ù$iÌ2OÁúÅ/~–eMÌë>õ©Oá~á⾌‘qêÔ)Ü}÷݉øN:…Ó§OO”Áú·û·u½ÝH’<ŒŠ$ÙO}êSe°òž|’4fO:‰¸–^àù;ù$i¼~êSŸÂK^ò’‰1XyþN>I³ƒÌßSc°>ñ‰O„¢(XZZŠûRFÊ4ÝO¥RA¥RÁÂÂBÜ—‚õõu?~<îËè‹K.¹„åaHÊoT©T⾄¾à9<ù$iÌNÚÎó÷d”ߨR© •JÅ}=ÃówòIÒ˜dþžƒu™¦étét:îË`Â4ÊôYf8¦M¦qÌ2ƒ1²0m㕎i“‡I³\%˜a†a†a†I$l°2 Ã0 Ã0 Ã0‰„ V†a†a†a&‘°ÁÊ0 Ã0 Ã0 Ã$6X†a†a†a˜DÂ+Ã0 Ã0 Ã0 “HØ`e†a†a†a ¬ Ã0 Ã0 Ã0L"aƒ•a†a†a†I$l°2‡€ãÄ} Ã0 Ã0 ÃŒ6X™‰Âu¹9À¶ÃÛÈçL0 ï¸vï·mà?þã"œ9sNÜ·Ã0 Ã0 Ã0Lb7X×××ã¾&ÁY(x¯e(—]÷^çóž[(š”JÞvÃè|¾ãÇgðÐCçŽýúY¾™i‡eœ™VX¶™i‡eœ™ç‡W«U,..ÂiŠï\^^ÆÊÊJhÛüü<Ž9çåN,B–åkÅ¢ç$e@’¼s9oÛì, (ÀêjôyÇ;·,{Qû ïªê}¾ãxÿjZãslÛûlEñöI’w^ÃðöiZøüÁÿ‹Þ{¥±ŒYÀÛ—Éxç(½ëPU`}ý.?~ÿX¿w–ofÚag¦–mfÚag&‰X ÖjµŠJ¥Ò2 ˆ“'ObaaªªúÛfffbù‚’Ž€i†ó:ÉxSÏPÂ3ÜÈ@TÏK<½W’Û77áµÙ¬w×õŒIUõŽWUψ ¨Å¢÷o©ä«i ãð®Ý²¼ãï8º]oxLÛA†n;dØØß׸aùf¦–qfZaÙf¦–qf‰Å`]__‡Ýœ„àôéÓ˜ŸŸÇüü|l_̤ „÷¯ªzžë6¶‘‘&Ë ƒ0 2:£z3s9Ϙ”$Ïàmöª6Âô~2”£Î›Ë…÷7¿;i¬,ßÌôÃ2ÎL+,ÛÌ´Ã2ÎL"±¬ XXXÀúú:[ö¯¯¯CUU,//öïßϧ Ûö DY‡Àvò6 …Ó2aùf¦–qfZaÙf¦–qf‰½èR;(T¡V«aqqkkk¿÷Þ{á8Ž?À¦ËòŠ ‘'•éõõu|ä#Á7¾ñØ®¡_ù€ï}ï{X^^æ" LW–——q×]wÅz <‡3ãbmm ùÈGpêÔ©X>ŸçofœÐü]­Vc»ž¿™qA:x¿ów¬E—¢¨V«8tèn¸á?f>•Jayy mßwþùçã²Ë.Ãþýûã¾…±£ª;Ÿ—EsH‰,Ë£ª/%Œ}ûöáÊ+¯Ä·¿ýíÿìAå.¸àìß¿ûöí‹å{c’ USïÏq¼ñÛn»-–ëá9œ7ét§OŸÞq…žçof' ùû /ÜñÏæù›7¤ƒ÷;d°Ò*a¥RÁ¡C‡J¥P­V±´´4ô¤R©–ó,,,øŸÙ.,áüóÏÇå—_>Õa –å«–eÂmj4ªªj‹Áè8lÛ†Š¢@Ó4€išþvI’ü÷P‚½mÛpB¿zœ,ËP²,CÛ¶¡ª*¤€Õì8\×õÏ<7!IE _Ó4hš:W¯¸®Ûò]¿úœfR©jµÚŽÿ†ƒÊ7à)<Ó,ß„mÛ0¶û)=ĸ;ŽEQ ( $I‚mÛ°, ²,Ãu]\qÅOáÊ+_Œ'N„^þòG (ªz)„ðŠ£mlÌ㢋.Šåšxß=X–ÇqPìT$a œsÎ98}ú4~øÃîèçòüÍô-æG餟.D¸®‹B¡àë\çž{n,…ŽxþfÆM*••W^Ù·Þ·Áº¼¼Œµµ5,,,ø633ƒååeT*•¡Ë^¯¯¯£Z­†VrNŸ>íδbÛ¶ÿ÷È#)ìÙ3‡;ïüG¼àOŹç¾=ô³xà³ñž÷ü&žýìK|ãð&:22鵪ªúJ¶mÛ(l÷²Ñ4 º®ûÛƒç¼ 5—˵›ŽãÀqȲŒR—ò½íúɈ–$ ªªBÓ4X–…¹¹9ÿóÈð ëw]†aÀ¶mÿ˜ aÜlpáLsó}!pûí·ãyÏ{ÞŽÿλU¾O¹%§ß„暦AUU†!„/_ÁňæÅ Z`Ñ4 ®ëú¿»ªªØØØðÊw¿û»ø?ÿçÇxík+¾\²@c%ÉV°B"-Ò8Žƒ·¼å|<ãï×¾ôé–E™àù›Ï×,×Äí·ßûîûÈÅ—f‚†6|îsoÁ“žtgÎ|™Œ76/¸àþŸÿ§Š'¾Ëï¼›e< ضíËAóvó¶B¡àÏ‘ÍrIÏ I’ ëzh<¸®‹|>À{^ …ŽF«†aÀ4M¨ªêÿEEäÐ8s]ןÓ4adYÆe—]¶ã=,Ûɼéß ÎBòF²Ô<€ÆB' Y–}ýðž/¹\.4&„°, –eµ\h,ÖÓ|ß|Í4×Ó‚=à=‹Å"V·ûFå–î,ãLRéË`­V«XYYÁ‘#G0??J¥ÀØ©T D¥RA:ø‚fff°¸¸ˆt:íŸgee%ô:)Ðä4ôÈ«CƪªÐu’$ù“(m'EÛ0 ÔjÏÄ+_ùz”Ë:\W‚eŽóGøÑ~„sÏÝ„$Ý„Gý&týÿíÉëÔLÐÀ ¢öQE‰Œß^hwި튢øÊNЀ¦ï t]ïj$÷Ãúú:Ž?>²óõJÒå;èQ$È‹Ò )ÁAY6M–eùžHEQü<)©Í¿£ëº°, …BÁ/D¯ i0‚ ™ßýÝ'ãqT5ù¾^$ š¦Á¶5œsðêW_‹ÿûûû^iN £œøüç ¼ìeßÅ•Wºþøø×}2>ýéKðÍoÎàþèËxæ3[W"-K…,Së§×ùÛ½1ôdüÅ_üsï8B’.ãÓŠù|BK’äS×u±ºº EQ „@&“A¹\öe“æ[Ë´pDÏ-Ã0Bç ±sss0M¹\Î?>8—X–MÓ°¹¹é/Ž’ÁÛLЀ¦9¡X,"ŸÏ{ÏVÛÆžþôXæp–íÑ\|\$¤çOÐ(Z„Éd2þ3Ç0 är9llltý|Û¶}RÓ4ÿùdY ÃðI‡£g¸@ïÏ ¬‹‘,„ÀææfÜ?–q&¹ôe°žá¢ž¨îß>¦ÑW9›EÀX £ªêvA¶;Æò}v#É2ž4„0M3´€‘Ëå SƒíRÉ_0¢Ÿvç!…ÿÈè$e𯷮ë0MÅbÑßGç%#¢lUU;*çårsssþ¸§4‚méýŠ¢ô4Þ5MC¡PÀž={ ë:Êå20;ëqˆ–íÖH—¨gC3Aã“<딺A BÏŠ‚Q%2â«™b±ÓôÒ¥:=oši7¦še8ʰÄyD’¤ñè9CÀ2Î$•=õz½ÞëÁ•JÄ­·ÞZ…YZZB­VÃ5×\ã{_‡¥Z­v4›¡ÕÖQäѶƒ”òà÷zÇß‹sιyÌc IžÒ©ë@§Ðγ·c÷H¥•WW±þã]^:ѯ|^ˆÐ°!÷í¿O™%Ì4Aú­ã Þ–Éu=#±\ö•l»õ\†‘m{o0j2êZLÓû£Ï!„2™Æœõù°,ï͹ i¸ä’;ñ½ï=%¶ï=‰søNA¡Š”yò2Ã^)Jáƒ÷݇§>ú(þèÙφOy¦hŠ^Ð4Í7È»ÙMù¥<9 wõ=Û¶\.7XŽ7 !¼ÕEñÞvˆ¤aB`ý—96yIÚü½Pô €–Ðñr¹ù›S½Œ`è+ЈÎi¹eÄ-/»yþfÆÏ òÒ—‡•¼¨‹‹‹¸ñÆýíÕj7ÝtR©ÔÈ®S©R©Ôð'¿å-xê5×àyhä˜öbÒÌÁƒç`eé@IÁu½çq¢æèlÖS uQ#ƲÖB¡¼ã±^NÜòMùkü0¿¡ŒUÛö”Ì„U‰v]@>§ ÷-k°ÎYØ`•å°S§ù<…‚÷á”Íé|d¬vúüvÆ*à½.—Û¾®{ 3ªŠ×¾¶ >…'n%ä íÅ+hš&LÓôCÖÜfò*•Ë實iøÄ¯ÿ:Û6þ€üá?ªòþEA©TêiìʲìçÈEB+3’äý?J˜›Ù^©éš2â8áy˜>/Ÿ‡¿ŠKa®ëýE/—ææ€_þåü²ƒ1M²„¼ùUCPdYÐ+ÜW(B©”kL)l”NÓ*ãLDÕ? h5˜N‘yíêqб§Nê»AßE—Ž9‚B¡€ƒð¬dŠoŸ†Zõ>23ƒwþïÿçV*ÈårÈçó^8R_çòKU0›67“¦Ï{Èrr½¬®ë)3ÃVe *I†‹þã?⾳ؠÞýÉŸ zé¥z ëêrRO¹•€ÏÎ#Èéq@¹è[Pª·Àƒ­ü¶ù¼§_»®wûdÐF¤ûB×»{ws¹Î¡Ë]úMó¼PÌÈ ‚AÝBøÈXmçqêI’pýG>‚ëoúñÉ0¸2¯aDUÃðÃ{ZÙ!ã–¼ Á÷Pü<]£ã4FƒÈ²78\7,ü¿$É‹ øÈG€Ë/ì;`üEx Q¨r@ƒÑ6T,¯LS~q>Ÿ÷«´+йÃ0LÿDs¢kŽDU¨¦ñ À7,i;îèÑÜí#êZ¨[॰xiH¢åsh>¾¦h¢'NàÁìë{èÛ`™™Á‘#GP©TBU‚§!›ªÊ•ËeȦ‰—Ÿ{.Þ캘…®ë}M¾†Ñ‚˜ä‰;—‹Ö²“€iF‡…±íFòo/è:.ûÃ?Þð†¸ïnÇ *Ôr>tŸ‹0miç !–!Çãʃÿ |Ï…¬DŸÔÇ4Ê¡Hò¨À !¼ý…‚§ãÓsƒÆ¼izÿnGè¶¿@Ûî/!¶Å"0;‹‹_üâÑsC…Q666ü<ѨgÁPÆj“QêC‹ÍÆ)­^÷;>,+ZÞ(]¢×ß–ÕåFϵ൞'T×½cÛEñ öËáò—¿|´ãf—@aáB?|»Ù£JF«®ë0 £«L—J%¿ˆÖ0‹5Œ7dFX_’‰*ÂŒ®!c2Ø5À²,Q”ŒÁ`N7m#ã°¹kF°F€Ô´À̲´=j[7š  ÑÅ]Ûåˆ_xá…}ÍëË`]__Çââ"n½õÖ©0P›±, «««Þ¥ª8϶Q*•ʳåx 8O± <ŸQ ( í:i‹Þʼa´WN$©¿°fUÅ#ïyžqûà0h€IDATíqßÝŽBÕ;766 ¹®§¸Ž’Q*)´ˆ2¤Œ». mmiÊÓ·`Û{[DIJ°]•Û·æÜÑvÊLR”h[ÓüèÜèÛ –$yDYµT|g{N ]\0¹™R ¿úë¿>ºßcc†¿`Y*•ÍfC©!Áêï)ëÙ¬÷;F-麧Á6¬$¹\{c×»xo_.ç k¡Ðºú’Éxçê´(ü\Ãðd1hÜU¼AEs÷(#xd÷¿ð…¸âë_Ý9w]S,ÛVU<%wuuÕ¯$Ý‹;Ê*þ»ZK*GÿhfFK°Ýr:)r!H0t–ªQÓ˜ë4Ö¨Âz/­%ÛeË ²G˜ß9¬333°m;Ô£i aó…Ųü‡q?? EF¶< (/gF&…ûªY;•-M*¥’§´©j´Ò£(ž²Õl´æó­È6§ÞðÔn»-î;ÛQ¨z§$I£÷¨ò|’äýÖTwÈËRÞv-@½ô!Çû(dŸBq Ow/—£NΊZlwŒ$u°LÓ»82ææ<¹mžhqŠ qœeµ·¦UŸŸÅ•£ûU¦ ylîù 4Z.QË ªÔ;77çKãªß”h‚vµ¶Ã_[ž¶í *E ´{äržP{=–J½=—¨ R¹ÜûsŒææ1ð½ë®Ã#_ýêXÎ=PÝ‚^ PEQ°µµ÷eO<¦ÙÜù!š` ™ª1µae¶ V¶¼ñ@õTU …»Ò~òˆRÇ —ÍÕÛw}¬333XZZÂòò2ªÕjä1“Z!Œ”ÛîKY¦Ð@Ò/"•QH ì(‘åÎ+õã&Ÿ÷îIO© ~‰’ä)Uä® ö!H1¢Ü):®Íý<’Já{W\ϽƄmÛ¢1£^ õj[¯¯©Íº?ÔAà¼ÅÛ>7ç‰XðYR,z9;ë½¥í¹MÓ{ó0c¥9a•" š —f9§±O½z:|ïÿþœçàW‡û¦š`aEQÏç[Úaw5H±XôWÔ{.VFîÊÙ nï¦ÐD…ËrãaC)à öêjà¤j]äEz8õj|ŽjtD¦ªj(¬·Ÿ–IL˜B¡Q2—ë;#µÿÖǽ~ +++‘û'Ù` UOìb†÷GzŠëv¨UAFÔ¨Œ:Ï( Ì`ùÒQæ!v#Ÿ÷>+—óþtî­§ª …©]¾ª¢xÊ™ãxO‰„)Xq"„€ëº­Šõ(dˆzH’{rtñúF«îEób$‰DP§o>¾ãf>ïi¼ZQßõÄi‡¦5f;i-ä}áªín„òMƒÊ‡ëº0 Ã/fDJKTè…t©–åýfd 6„…BØ]OOlbüeËjÌ4’A.I±õ-eâÇqäóy®Ö»ÃáM ««ÞԞɴ†™ÇÄC°=൮¥g”ñOŽÓPaŠÅ§ã5¯ùl_çèÛ`u’ZœgZ¶†-nkZŒõº!ê öfh‰Â×òù†‚¶ÌMUÕÞ–{@…—*›h‰ Õ¡‡˜)×”z6jõ±ƒ×¿—¨æ(/,}9háisÓsÃjZxžp]O+Ñ4ÏꥡȀnðˎЮ8R±XÄìì¬ßó”Ži É5ù’ûb1ü»Òb­Š ²HhYáùŸ’¥yÞcPnµeY(•J¬€ò F©cTŒ›Â;¤íÎÁŒ–`¸.Ù2TÔHQ6TÇHðñ”ËŸüäñÙÏ>ýø8û÷ÉN!‘Ê|ÁJý})¼TÍ…N0 Á¦ê£:× P%W ‘¨4’ûlÛÛf­Þ% U#¯3VHi 1Ê¢]Þ‡ŒÆ¦–6Œ…íB¸1Í;#ƒFh©äçà5R³VËòc軂«™&ò¢¶ ë¢j§BˆÎ¡_”ƒL¿¯ó1E”äraOi¯P˜|pìªjÌ+¨LÒ ö|¹\ÎÏ¿fF‹a´¶6,>.€Æz]P¹ z,ÃÓ§¶ÇøK¿öµ¸oqj°mÛï;Oa¾” Âá½Ýºë¥ã@R”Кn¡ðm?þ×ô|š Öµµ5ضõõuÀüüßXs§i‚ ï“?A–àn;¨À‰®{m¯üðÚß>ô×e¥AÏRwòBÃH ñP*•x<ôHÐT ±Aæe€år‡š"® 3kÃøÊµØLÍ6RV\Ïû×Åñ>+‹Õï,//ãðáÃH¥R8tè:„T*…Çûù­“Dd8°m£ð­ßBæ-Ïókm Îöò!Þ¿£,«:ˆ2HHÉîtL/ç¥"·fKžfm×å•ÿ˜iA ë£íL^õaè!ÏÛ0{q œðÇe@Qµ!=ùqÍ_FôI¥‘KÝ+Åbç1DÆ+¯à&ÇqºéØÆÌ¶= jªýR.÷'Øî½<Ÿ2m þªåryj”sÓl¬!*M&Ó(†…e5 º{oúx¾?\°€7f©ú’ã@ú­_ô¡¿ÅHªQp瓞÷W?Ѹ®‹L&3ãÁ0ÚËÜ(“Û^ËèûÒS·ƒÍMOŒuÝ{].7Ölm»U}²?p7æžsÎù/Cùk)ïªê·âûJ›:HèËÃZ­V±²²âWV ¢ª*}vR0M3²L´»g››Åô¡RƒÆU•$¼YqrO:ëõðvŠU¡D\rS–ÕnÜÜN$¹E‹–g˜X‰Š 0zïö(Ú#õ˜Ó§|å(äs÷Âͽ¿±‘*çb[ìþý«šóÒÛÔšæí£"dt’NÕÂ;ö­a’ˆëº=¥´e”1滸=3zš[0M –Õ¨,:ŠÂDAo«C•âI¥"“Öˆl(¿ý3ÿçï®ÒÇúËò”t áh H¯=㙾¡´ÇqP,'"'•äp˜uÐNP‡@Rû©ÜB¢õSjõÞN­i'²PF €m_†Òg. ¿'X˜p;B·úò°ž}<=ºfUµ±è"D#b¢¥‘¥²ý…¶ž„dzÂg2žœ @¡]òl…¹9ï°Ñ‹ù@9¬µZ 333-ÛÚõfM*¶mû €ƒ¸² õ†‘}HxɤŸ*ªÁºèQPA£fhI’–U‚hš7cëzkÑ%J tÝFÐ:·C˜hz-(6Ùlã!Üfò¤U½®X³×?X}{·´RI‚UðŠD–Kv†ímåKWñ‚MâiY$ÍðâHP( I’­Š˜¢x×—Éxÿgcuâ¡E™±4RÛh”úÜbÏ6Š×"dî‡>¸ àh~(–tâsž˜7yW‹óÿ½J¨(Þ`»ªª¤pîéÄ“Ïçý€7º*3TZºTò䀋1@¢¼«T)¾‹îÔ‹'ÉWY4oη²VHj&¨²Ér8èP„t­(hÁtuµ5q5ê¦ #ÁayÓ…™L¹\º®Oœ± ùÿ\ƒxÝoŠýC/„ôÍãȼâGßã8@æÿ÷4¸OyœÍ'„Ï—o ‘µ÷‹É‡«ßÑu`k˺ÞH¡¿mýkœ?AßE— ÃÀÉ“'qàÀ(ŠEQpàÀT*9rd|W:b¨QðؽO²Üðuð@EBRšHÛ)àÁU#nD(ô~ñ¥æI5˜]ÍùU¢{ä(B·‚‹&m–éN@UtÑ´­)Œ×øØs}»6TØZQ þ¨nçšú½ïþâõí«©nnr—ö)Dz,ÿè¥àRKÕg†I8ä]Í%å™MyÿäâlsÈÜ\÷BJ®¸)¡).ÔÍò/¯Dž3 #Ë€ǰQxñ§áþö;[?„z#F´‘½m,÷¤‰SË*^ìÜòù<4MÛyÙ߃ò f ÿÕ[=¹ÙÜDéõ·BûêÛÛ˲¿ü#”ÎyV?ýä–24Bxª}±èýÛö«1ÍÆ ¯O Ù •ß¾ Öt:÷¾÷½( ~•à7½éMxï{ß‹t:÷ýôL§PÉl6 ƒºhP?ç#_>M¢Q1•¹\kY;šXéßæÂIÍÞ× óÏt§c8p0aXz™¤bÑ¡98ا—v4É_ÁR Èîû?í_ns¿"¹pIa.#5ì ?L¢‰ÈÚëÌ´Q(°ºº ;0Ï !:/ÞwuWé™Ý …C“ @Ö^FUí-ÇÍyïq(O¼£±¡XDÑÉ@|ÿÇ0žøNÀ²|U&ê#eX}ý²œ†óÃçC~íU­¢(ž’^,g„P>!/xª,IR<‘Ž3²²Õ-ÃDב[ÿMÀ²`ÍBªz¡à‰Yùßåm×úÛM€mÃù³Béž×x«A³³^X[»¨NêêÐÜõ#Áôm°®­­áÀ¸õÖ[±´´„¥¥%¬¯¯ãÀX[[‹û~z¦]u`SóйHRÿ }§ã)/ªWŠEø5Ï©¤W³²MîøNmл Œ$·„I>#H^†-*ÔãL©ä=ÇC—CÖ§,7ÆMÓÃÇquå Š?ú¯€eµ¬½ò񵯯ð"VØÝÝ ¿9¼Â/<Ö5Ú€½«Ì„Q( ªêø#Åúô–í’£QY¤Êjã·Ä°¬Æ ×E1µ égž>,£ôÙçúü÷0wèy°þ´~ QÁ0M(ÿôg(}ð"”¬½ä)¬1ª.H/èúxŠ2!LÓôÀcËÙ&׿öAS!?ªÖÛÂvx€T*Â0ÂÇmlr¹R¦l0o}&ŠñÈ_¾Ù;hsÓS¼\×3^›"­ð¡:ŠS_k­VÃòò2: ÿ=rä–––pøðá‰(¼Ô¶rê6¥§þY„+§Gº­ÐE®ëýQ u‚&× ‹>êšŠÅÆûÛ}fóD¤3r:†ãUÝî#:W/ãd;,´FDïmîÀ7PËe˜‹_€ü“oµÌgv5ÔÚƒVÝUUõÂ]×m)°¢Tbï*31†!D²¼«@èy@ýƒjeyžSrQqyË‚§—.cYPnx^äGH°ñås°qòR”Þu&¬k“LJz¹ õºÇ÷6´©¯Íì,·äK ¦iÂ4MlllÄ·HÔ[ú©ICr(SÏw3UõÖ@dÙ“ùbþN¡/eá©­z>EšMÉBJ_k¥RA­VÃÒÒRË>ê¿: mmÇé\ˆf¯Só{› Ø(…É4Õy;…t2VWÛÃÍVfêéè] æ‰ö« ;Nk‡è $oM› ð¨÷Çi ²F% ⊟Bá·šÿCE¬% â?/B»®s1fwcš&t]÷ SMÓ`Ûvëâã´Î—œÁL®ëÂ4Íó0ELë=*ú¾Ý}Àþ“Ï@{ÜÍž®3; XJ¥€ª´º Ì¿¬Á¾ü×:Uë BeP77=ïQ?ž#Ij„M€Çi7á8 Ã@¹\î¼ð8nÅ[ÜÔ´özõvñ-÷ö¦VLzžÍÜÏ}·ÿ5ÒˆzYî¢âOÉ¢~ß!Á8}útÜ÷ÓªEZPø_iüúý¬²‘†Ýì BݪÃÔh¨Ûé©Ð\Aµù¼Q+­t>ž|wc«L½zI†›å»ÁŠ;îðûœ XF8;[}¤;¿²!š×gô›RȽkròä™§¹øŒ¢(p®ë6æüæfŒ 3aX–…\.·cŠ»m7:Ýõƒã¹çrÞëÙ(î~îÅû!ÿÃáF%nxj“¦m­Û…_œ'½ ¸ì²þfá)öIóZïr\×E6›Åêêj¼Æ*A×ËE6±ßpâs'}驯BL°’ªÂyΠ\ñ½þ?[×wm$Ð@mm>ŒZ­æo¯Õj~[›I(¼äºnÛ\&Ç” ¿Ñ®~”Ò°ƒ3{ó ßlÐÒùƒŸ4šßÛ/šÆ%Öw!}¬ýh!ÔØ.(·AO®,#ó×ÙÐ)È9y¿âu.([eßµm{÷Bÿúo ¨5ÞhÛ»v^f ]¼¦i0 ÃÛçº^‘ *ËÏ0ˆã8;éþ˰{d“¾äº€|ý‹áËpŠeØ?ûÇÐ~õìÆd©¢Q”/Ÿ\IAî/ã!ÊøÆj±Xì\ƒ FBrjšÈÿËë€÷¿'/õ‹ð†(a?öUP2{ã¾ô‰b ¶6ëëë¸æškpðàA±÷oÐ(m~¿¦…µpÛK;-36'þGå¡öB¿÷ÀL<íú ûCiµT*5d*— Ë—,ÃÞºª¥÷,Ãïg§¸ä|âÙ/àEÄë‡g.“ V¦WÚɽ¢(p¯\[óŒÕbq*£˜Ý Õ'ØÉž“Ny úoß­‚ÌÍ5ž!MMR)TQˆFæS»G“$yQ¼ÔžÕ† ,‹Åd rÆ;`Ðÿì"O•‘$”Jî1Aì'_õ†TÜ—?Q ÔÖæ–[na˜ŸŸÇüüÚ†Á»®7ŸòRvâ"ÜM¤Pð‹iz•Æ ^Äi?$¦Û¶GŸË×®!¤°?úä—y¡‹’ÔÚ}P*A¹ïS°Ï~µ¿ÉqZ+†L/Œ¥häv H, ¦\„ýÕ˲Mãl÷c ‡ŠJyd³Ñ k’Ô°1€FË_¦?º¬©T †aàØ±cH§ÓX[[ÃÁƒqøða؃Ô9ob}}½í¾J¥2ò¾®Í = Õ‘Qô¶ôAo Jgð}ŠÒÝS;%¥¦™öì´|wÍ_:†ŽåB’%dW_}UnÒH$ Pßp\MÇ6àæŠ}ç¡ÊrtΓLvZÆ£h&©(<ÇN†î&¢ªG*u*ÙvæavøÈG.Á™3çŒõš’ ÛAzšß¡Pˆî!oßð³áEŹÀ:zÆûè0K†jfáÜÿŒÐÛ¹AòIšŒc4XÛèäößÒ žŠÜ/ÝÝX(§ºîé:M‘ÁäEmü@Å}£†Ó}å°ªªŠo¼ÇŽÃüü<Ö×ׇ2^«Õ*#·SA§øMàGA¨½^UÁ¥ÈåDx‚Oùx/¨ñÒº;-£ôâéb&–8ä{T سòËRÐι¥½áh-+–d¿Êr#g©_WNÙžâñfÆÖ‰9ÙlK] çEpÍ·¹ÜÐhX.Õê9¸ï¾‹Æv­IífzŠ é!íʧѤ@À² ² ÕÆ¶‘»ÿ/`ë¿ôk´Ékõ~0Ý¢ñZQ²I¢ŒcÈÓ&+QÓ"##Õ“+(}øbäÞ·¿¡ÒõwZä¨<¤\î iÛÜMmîÃ4^P©TpðàÁžB†«Õ*lÛn;>ŒT*Çqpë­·¢R©`yyyè›m6V‰Ž«€4‘7£iÞlÜîͪÚè­Êì*â’oÊãë+çÃu#cìì­« ¿äRͽЦÅRÄÌ ã]] /õ å±&¡ÝM\2ÅNWLegu5l” ÞôS(ô—ß(ËÀïüÎ]¸b>†]H’l73–‚K¦Ù¨tT(À²¶×!% ØØ€í&bU…|üƒÐŸõÈ|êüJîAO8+äÉ'É2Þ©‹Ç@Ђ íeu]_OßndàÉsAT]oYiÖSz nã<îÁéË`mX__÷½«û÷ï÷Æ{9ÏÚÚZä¾jµŠõõu:t033ƒ……”·LÃ'>q¢ÿÐNk¦Š1í$”3»Ž¸ä{`ŽM^‡,Ð48îöh2l­'ÿ.ÌǼ1´­ùùç‡Ù÷ªr8pÒ‰KÆ£B´Îëírò˜Çq¼Î'D𧢂úô8MI’í ¡”’1 HWÙÖerÏýLã7’$¿ ¥ª"÷•7!÷æó‘É„;çQ½I ü›3É"©2Œ!ì=è% ®ª( h¬¢i`­<è½êÿ:}¿ºMÙXLz2X+• 8 °m‹‹‹X__G¹\ÆââbÛÐÌÂÂŽ9‚¥¥¥–}'Ož€á›N§‡Ž£w`ié±oßËÇÿ­Üÿt‡|^AO 2Á˜¼6Z"õϼ0>qû­á¿ßÿe˜_œmk´ÞêêÐ_3Fâ’ñ("Wä võŒ‰f4ˆiz©ú;;ÛÙ{Ji8IZÛM’l [²m/œe2Œ½Öu8ÿtoHuiqÂmÇú’S¶ÙÃÄÕœ“OReAA¡^ÌiêÖ!Þð&8úñÐØ ª6ª È~³ë¤4Hº3=¬º®#NãØ±cþ¶••Û-·Ü‚¥¥¥‘„ tµZ­í¾3gÎàÔ©SmÈ8ûìð”§üth;åÚùØvkVt/…L†¤ ¡Z­âĉ¸ï¾ûbùìvt’oxðÁ±¾¾Þñ=…÷àyÊ劈¦ö?žÍή ˆ‹ž¤R¡ÆÍC€S ãa}}÷ß,Ÿ=®9¼®ë¶Ê=kcÁ4ýVƒ¼©$Ÿo<2s9¯Õ-µ0ÙÜôòºFýST*œ8q§OŸÞÑû÷ü݉РE bv(³ìÊ*Ìï¾&”)Ò’‡ªiþJdóüN ]Úu3] ù»›Lƒž¿ƒ ”ÖÔŒm‡-Ц0/翉ìÍ×ûÊJTÚRñŸ~PU¿À[TÇêÐÇôéàýÎß] Vš`o¼ñFÅ¥V«¡R©„ú¯.,, V« %¬tîvtº¹3gÎàî»ïÆñãÇ#÷».ð´§½¤BÛ;¥¡ð¤²C”’9˜ÄsòäÉØ ÖAåðžãÇû+ Ó,ÏÍJiÂ9ÖÈEWÀþèC!ù–¤F»`ZÌ4M^³I ÇEÙÆ7‡G “$ØXšæ5-!ù¦ѭÃPá´q—ÁçüÝIŒ¿íªì>;i¸’n/I î ‘õƒ¤‚0 hþÞiùvvþnfdáÀ¶Ýܦç}Ûã!öì…#yŸ)«Û“œ¢xÿMRÈ4@:øÈ ÖJ¥‚ùùyß0àW Vpÿ0tÊM¥Rm÷]|ñÅP%2ÄðdöG?º-r{Ç Í+=QEj8Yc¢˜ŸŸÇu×]‡g=ëY;þÙƒÊ7\rÉ%XZZÂüü|ä~Çqz[™lž›^;óÿðÿµæ°¿t Ä%Ïö·Qq]o¬²óªzrXZZÂW\Ëgk"2ضY[‚L¦Ñ~ð<¨ôˆ+—Ãc\’â©æ½°°€ë®»—_~ùŽ~î8çïn„ŠF:Ž·"Я5é8žö].ÃøÈ³[ét4¿çóýÛĆá©Hü,š¿»ÉÔ8ØÉù»™¡Ãiî§jÀޤ\Λ³È Û1ìx~‹çó1Ò®ç*ÓÒÁû¿»¬333-+.•JétzdFj}ûö‡%T«Õ¡®ë}ìb‹k¿¥'R°±RÍ ÛÏÎLŒK¾‰QTדîþÔ×\ÐØ ªÐòçȾçjÞ ¯jp2çUu¿Œ‰\¤ijîÎt†Âzé±V.{„®{¡½ÍùŠ»‘”í ‘t½ÿ–íIÚ… óëW·¬½Q=Ó¯§\’ºwôc’K\2>’p`Òß)§)"Ú†Zæ‘ÁÚÞB‘ÁbvŽ®k:F¥RñÛÕÔj5ضݲJH¡À$샒J¥0??*àdÛ62QÍÛzÄq€+¯|çwwË‚däd¥ÿß,¥ªÊ’ËôÄ8ä{$ÐÀp]È/º$œ·¤(Èi²ÜX=Ž#êÃÇ0ÀÎÊxd…` îÈì¬çE5MoÜf2Cx9Ó¥A\ów¨à€Pá€~Ø^Ì¡N6Q⃵ 4­ÿß?—‹î¡ËLqÉøHƒí&s¹È28©°c?+ºB„õ˜á±ÝH§Ó˜ŸŸÇââ"°¾¾ŽZ­æ V­VÃñãÇqøðaÌÏÏdfiiɯ@\«Õ033ƒn¸a¨s>ëYwâ /kÙÞµ'’ª†Wë§µ ¯æ3}0ù¼Ê®´‹ ¯”mÃ9ÿeh™¿WWQB£ÎGPä…à0&̸d¼Çq½ÙXíÊÆFcÌZV#é’í ®ë†•yRÂ),¸ÏÉײ<9hþݛ՛A«µ³2?ÙÄ!ã¶m‡[çP(×09U‘,Ë;%=&ú‘qIòlbÃà‚KqÐÕ`€b±ˆååe”ËeÌÌÌ„ 0=z+++˜ŸŸG±Ï_p~~>RÉN§Ó¸ùæ›}¯î 9„¢BTñ¾÷ ùm ]RŒŸöL;%ßDOáÀ‘%ñ±~¶ÂwÞ‡’½BN¹MA¨—"·½Ü}ì´Œ7ÓR!˜óWý¯Â=K%oÜó©hÓž¸e;HË A…—zÑA\×÷®ªjtç½A<ªÌä’§÷Ð\.Ë ë2ò=”ì(„ð¢I¶¶ÂÛƒ§+ú·‡uÝËé''.³sôd°’‘ÅÂÂFß>333²Aâõضqá…?Óâ`ÊdšVÛ!¼ƒã¨0ÁL£”ネÒRHC)I‚ëÌô¥´Ð)Ù`e‚Œ[Æ#+ïr Âu½b ¤LQ@]÷¶‹¤1ƒ³ÓówËÂL04¬MX°m7ýÖÛåœÍ¹†WÉq¼y›ŽÛåÇ °“2L•ßÚDPÊvé úÛ<šÕ{ËêOî%©‡î"ÌXèÉ`¼U*W½ÿ~_ã¨bÖ/Ù¬'°×^{)ÞóžÆöH[Q¢WèóùF|<Ã$ ·×\¦6ò]¸îr›È_¾ÒE¯Y fvšÈ Á»<.=Ÿ÷ŒÒf¥Œò´xœN‘rNÚr79ÒÚÜô,4-œùAo"¬ÐsD=–eµ†ÓJx›ço¾í•2 cÆ[¿¡Bª !‚r Ë %>FÎ0;-¶‡ærjG#Ëm+::_|,Jÿ0Óp<Ž3ŒÆ"MËâ^ñ“LOëÊÊ à8ÇÁÂÂVVVbkLß4A;ŽY–[ŠÈ—?ܽ™R±ØCu&†I(]äÛ¶ýíç¡´r—jg&¿BpT©Ó)† 2.Hc£t:ék1Ò4B¶½m° ¢bµ-,#Ë ù‰Êa˜qÙ{•„RU£õÇAîyŸ(𦾿ó· PŸŒI] VjWsèÐ!5¦„ìIAŽ :¿çž°D3Ì":% =+õ¶Í:3øŠ|K3íé¥PðJ)³S ÃÛÆL'Žã„óûÚ-@º®' ¥RCwÏåÓ„ä:mݧ4ßSûJ†ÙIZäh„‹WT‚XVCV Îk(l¼¶ã óÆF¨ŽdËi™É¤'+ÎU™™‰ûº{&“¡jbÞ$ߢÛ<¼íaå8f‚q§}x¤ã„ã¾²î8áâíB &i8ŽÉ}œnLÓ«L xÊXð–K%ÚU ¡9Üýô]°?ò€·ð^,²¥FÊiçb}ƒaÁ ³ÓDæhá•”&µÿîܾÎ{¡iÞó¨?õ=_— ªB››ÿ»./ÈO2=¬“Œ,7Z~å^ßýn8Nž“8˜ ¥mØX0l²‹ÁÊm ˜IÀ÷®ú±ÓK6ëÓÕÕpª š¯þd£þ?°ëò¡˜)‡€¦yÿoÛ­ùf¢°, ÿégv*ËšÚ¶×?I««ž••­Ý‡ëº¸ü¼ó›¬J÷áË¡|yÎÒŠ?Ÿ‡Öpr9oC›qBó?×/`všŽÅhÂËå .M7|I–å%ç„õ–b±m¿³ ÏA”“OÏmm[¶-//ãèÑ£¡mÿöþ=Ì‘³¾¾¿ÆcË®±ÁÔpm ©Éˆ„$R‚ ‰ ÛDMv=‰{ß%RÜ»ígßXÚÌ&@Bg¥'ïn²£¤7ÒæYzÈäIÒp6AµñrH°†.XB&nabì)Ú66'½Tߥ’Zg•T‡þ~®«¯™Ö±Jý«[÷ñw¯­­ù}NĬbºä¾kClóÁÚ…XßmmD Fd 0ÍŽX7 ®[¢ðÑ4 ±°úÑUÃh÷‘нý,Ën¤ò+‰LÓÄ/|ýë€{ê{w%æèQH?ö¯a>GqÑž2¾O¿¬Kh7X™ÊƒæÍ0 d†T@öu¤h2[];v ),ŬnônC¬‡îH¸$$ ¿}$"ÐEÂ%]ogËÎçt: EÄzŸôÙDAgYVï©5½ö]uîÝY³‰‚NtÎ\ÿ¹Ï…¶"6½—¤ö4|1Õ7ämpòaxöm·µ§¾÷H®gl_‹Üú1HÒäëóD½ˆhžLÓì ÕU1zb¾µòk°, òˆ±.–ÀŠz?…×Ðk<w²‡•{ôIô´¸7ÎîÀ91DDålƒ0`]^Y–=šµ±Êç92MW--¿ñö ÝIöˆ†ê¤Ë;²YVæi¾zngÓ‡$Ùåf±·CÖÇ›&’ OÓ©CþyJ°P«ÕpîÜ9ç÷'N ™Lú}=‰-:úícÆ=ë(*ÌhXß~°·™HÌ)Á&†aà·Ož íâëbѾæØX¥A:²§º3¥vÕ¸½X›ÇQ}š·¾Ù»ˆÑI²GZ%iü¬è²lgYߨí×aŒ«®ë8}ú4FÇíëëëˆÇã(‹›&¬ªö=õª]ÈFŸ å”` )i„nCóc ò±WÁÊÛ>ÃÂÆ²,˜¦‰ø­··Þê÷á Õ+';Ji†a´§Kªj»¶ÞUÖ{1+^Uù}@ó¥ë:r#¯+ÿòùñ;ûDƒ—Õp)Kp­VC.—ÃáÇQ,a†óS,qøðaœ]‰Å¼*î™T,ü†6Xëõ:âñ8âñøÀljõ«ÍfÓïsrˆÄî«,»¶º‰=ÌÌ } ÿ½ø6ÍΌع§'Rø8Õö¶hš}M2eÈÍÙîõ ¥ù†_€vÕRÇã… >‰PuH¥:àj]rÒ# C¬±X »»»~çDÄ”G÷è“,·—€HÏü&çDRt¹¦ÕT* ÔL†{°R¸tdÂ`o‹,ÛY(‰¦ådPu—áz Œçvt<º·è# ‹ŽýWÅvb6˜kV§¼“ÛÐk"‘@³ÙÜ—¸›H¸‹Åü>'G¥Ò{»çzh6Ù`¥Ð3\¼ë¿vI¿×‹)ö& +§W¾#ÈýU,ÚÓ€ûRã× yÁi°ºZ£b6°{ p€.¢‘uŒ°Z–=ŒêÞÂiapÊ;µÔ`M$Èår}§û6›M”J%$“É@mmcOÑÚ¿k>ïšÉ)Ár–eAUUT*v—ý÷Ûÿ²2Maæ,ípM“œÿ1´¨âw&ó ¯™¦‰W>ü°Sh‹Çtº³wÇ"QXtä•qí3i~ì{¯xçqœA@m#e >uêNž<‰ÍÍMg´µÑh`ss'OžD£ÑÀÊÊŠßçÓa_ïcWWMáŠwû}ˆDž1în@¾Ü5boÇmŽ®R8ÉÅÜûÌ‘eÙK­ÜmåB*òžišxñ‹_ì—¦ÙÕî5«ƒÄQÇt`û'΋¿õ4Ÿz‡s·Þ#·§ò D"µµ5”J¥ž[×$“I¬¬¬jt° w]×Û iòy@’P­Ú‹X•S¼:Q0ˆÞJóノê‹îàZœ*Ë0tN$ heÙ—ùí†afe2¬@Ñ©*,Ëy‘Ì˲Úý5ì,¡°Ù7ëÑÕù¨o] é’ù}ˆP#5Xv£Ul]S¯×éÂÃ2û­c®¼aé4L0 [àÀ…ø0m–ÃØk î¥×Ó²œ¶Háçä#0Œ¹µóy{tKQìk(à[ÀRˆiš= ,—ÛÛºi¯S¦RìŒ;Ñq°ÁJáÓ1ˆÔµ…Mõo®B6{•3èšJ19$µÜ`Ä7jWw³ÙD½^G2™ôûœô˜,jñ…R©ö¶œ*IQ I¤ïý°±ì"°µeßnYû×=…‘X«bqn5uY¶¯%^?4k†aÏŽ‡ÈËa†Ý›cY­S×:o¢P³,Ëž&†v†w¢%îcFQà$ê˜c-&“a£€æC–í²úþÏïë^õìP—¤ÎTUûvŸ–rMÅÉ€ÝE$Œt×Ë9ƒ€Üõ©—£X´'tÕIb†ÂÇDrÑ´v狨»st•º=%8,º{%»KwóÜðŽ<Ò ¯õûP‰¦"Ë2ÿå?hWh¬Ï}æw_ÀJ E‚ä®ÑÌX¥ÂäJä§=íWPÌ¿Åñ¿PøÀOì»_’ìÊ<㓨×èªÈ€-ˆLInm°–J%¬¯¯wÜ–L&±¶¶6Òó÷vô€!FVÝ¥ûùóÐ>ÿtÈOoB½ë˜ß§JдñÝM’é÷ï¨ųIdb@æ¾ÃïS¥ÊËïHÔ1¦Ù¹½k.ÇÊ æužJ–³uSîU¤ŸèùXÑ)É¥Yñ:¾»ïØÒFìׄÎ鿺nw²³ÜÛ`ÝÙÙÁÂÂBGr§X,6òóEB¥^½9øüç¡ßÿ6än|ÐïÓ¤jÚøDE^tÐÈ2 ]õ-À²þ鋸„|âUŒ ò 4™¦=íRßÛúÉ4ííCØ a¼Œoäg<Ž‹wÜ40t>¤ªÇ­–ߟE•×å7`/_êèx¬T7ÒåÎ1¥tÚ.—Y“ÛÐk½^G©TúBÍfÓÓÛÝÝE2™œ8ë°ªö_°­&wQ¹ß‚üê==f¢QMßn²,;Se€ª"óDÖ“GÀœä/cÜÙÒÆ½ å”4Í®e2í=.‰Fåe|+ °ñc¿‹Ïm~ßÅv`ÊLÆ"š/ã[èH¸¤ë¨M:À¼ŒqÓ4íž~‘JÒù¼]QâZ@š„×e8L[/|!ûÿÑ®­™)¶.#šÏãv9.fÊPù‡×¡Ze¶kÍÐk"‘ÀÚÚÚÈ?^óç›Í&–——±¹¹Ù÷±O<ñ¾öµ¯9•½§ÙÞÅašÝ‘ª ¼èE>|ÜFçÏŸÇ#<âëqŒßðøã£V«¡Ñh8·‰5Útu·sÞV«ÕpñâE¿cª2¼'SGno³¢VõzçÏŸÇîÇ1N|÷*¿…b0~ôøÓG齌‰Q~{=sq\^—ß–e!ŸWJ‹…c(¿£Î2øuðqËï¡S‚kµ–——ÛÉ‹`o½^÷tDµûd–––pûí·;sæãñ8J¥z>ç‰'žÀƒ>ˆsçÎ!™Lî%0ØË,išÓô/ß<“c§àÛÙÙÁùóçñØcùòþ“Ä7`WxÎ;ç<°¿TU…qjŠ{¤h/‘LçÎóµ²ãEîf~ámoódc>Wž )Ñ`õË$ñÝ«ütH—0ßiÎ4¹…ƒ(¿www§^7: ¯ËoÁ4ÍöTßìÄ‘¨ƒk¢)ÁgΜÁòòòÌN&cee¥ãB]XX@³ÙìÛ{sýõ×CQ¬¬¬è\r5¶½Ä4üN8°’É$Þò–·à%/y‰/ï?I|Àsžó¬¬¬ìû2xì±ËÿÂm¾œ ÓÊÊ nºé&ßÞß‹2¼ÛÍ<2uƒUÓ¸ö/ ð–·¼Ï{Þó|yÿIâ»_ù ˆ¥ÙÖL‹Qøˆò»»cc^fQ~ö³qùå®ûL“i€(Q·üöl «—jµÚ¾©bèx¼LÁ{[!XÖ¾ ã¿ϒ*¯â°{,¿ô¥«¡ß½ÌI{Ä´ñífYŽÀµ×ú}ZD/cØ+Ç{t<Žÿ:žÌ*¦ÎËø6M@º´Ù±ìŠÈO^—ßpñâýxÁ þÁþElk@4†@îÚH$°´´„“'O"™LbgÇ=*‹#=ß4»¦{PÑ!òÊ´ñÝMÿÌ!ÈϽßïÓ"rxã–eÙÿ1Œ‰+9ºnOæè*yÁËø6M@yèáÉ„úS?å÷©y^G€ï~÷Þþö½z8«4@6X{ÿ‚s¡L’àÉÙ»#¬0^Ä7°7ì¥_…üªËý>%¢žÆ¸¢Ø-Î †G+; ëÆ†ßŸE‰wñ HÍ¯ÂøÚ×ðSL®AáU| ÿøs‘N3¾ir#7XÝI–DöÉ^‰—¼ÜÚ&O´è\’ìzaXvE‡sÀ(€&o7EQ`î‚ô’+ü>¢}¼ˆqG.7ÖÃóùvFàj•_ä=/â;ó†ûÿõ),Zq +Š—å÷>û6(2ë)4¹¡ ÖÇciiißí³ÚÒÆ Šbÿh ™&SS¤ÉOÞå?è÷aÍ„“ ÃÀ“O^É%!i–eAZ\´3'õ!Iö–5ÌYC¡bYPO /ÖiEÐ?}õR~õa¿ƒBn¤V]×qòäIÔëuÄb1çö••œ:u Éd±X ÷ÜsßçÓqÌìœñDdYþõ[nB.Ãa%:¸Ä®el¬RØäß}…“ýš‚)ª”Vß÷†ëü> ¹‘¬«««PUwß}wGŠëD"……¬­­áĉ8sæŒßçÓAQ`qÑïÃ š™ø‡?lo4IA–eáÇ[­IÅ8˜ÂÈ4ý“ÏT†ap„•"ë®Ç/¾ã¿ƒBnhƒµV«¡ÙlâÎ;ïø¸;ï¼ëëëh4~ŸS'® ¡ˆ2Mÿù¯o~IÊïC!š Ã0ðò«®ê›¬Ã0ì‘UÖõ)lÄÁGX)Ь¯<ŠO¶^è÷aP m°Öëõž›‹iÀ‚¸ggÇïs`Wæ=ú¨ß‡A43–eáË^ùè3ü>¢™¹éÛßî»w_w¥Ÿ(,ܱkŠyíDó±¿~ÿIÊù}C“.Åb1ìîîî»}mmÍïcÈ4M¼é9Ï®½ÖïC!šë›Ï„|ë‹ý> ¢™Ðu+/ö)£i@¡à÷QG´OEX3C0EÕg¾ô <ÿÄ¥~EÀÐÖD"f³‰Z­6ðqâ~÷¨«ßžÝ£¡M†aÀzì2¿ƒh¦=úhÏ«¦²)”4 HÿÀ—|º®³ÁJ‘uøð§pôèü> Š€‘¬‰D¥R Íf³çcšÍ&J¥’É$‰„ßçä¸âÁ™:’"KQÈ×^ðû0ˆfÆ0 |û¦›zÞ§ël°R8é:¾tÓéˆaÂ%ŠªÏ|æQ¨,¨É#e >uêNž<‰õõuÔj5ç§T*áÍo~3VVVü>‡axæ3ŸÉl]~ä+ôû(ˆfæÅÍ&b¯{]Ïû…ëW)œÊe@¾°( GX)²*¿óªÕcì!O ]à أ¬gÏžÅúú:J¥Ò¾û°´´´/1“ßnh48ÂJ‘dš&®ûÖ· Ýô,¿…hfþòá‡û.RÍdü>:¢ÉÈ2Ú)®5 VŠ$½¼×^(@Qt¿…"`¤+`g>uêVVVP¯×Û‰D Ö­vÈd8ÂJ‘dš&޽þ{¸ë§¿í÷¡ÍŒÜgíªe±ÁJ!fšÎt`Ã0a0SĘ&°uÿuøåI¿…"bä«‹ÅL&ý>î¡ Ãà|1Š´ ×^ËE|Y†aôœJ–ÏÕªßGG41º {{2ްRÔhðÿ¹ì½xìG~ÄïC¡ˆi kqnŠ2Ó4ñ±ý¸}EU¿Š|µÚw—¢pèj°EMå¿~¯” ®_%ÏD¶ÁJe¦iâñDzâN‘Õøâñš{ïu~¯Tìóz{ VÓ49ºJ‘c€rÍ—ñhìa‘g"Ù`59ìDÀ/ÿò—ý>¢™ùö½÷"qà €lÖ®EB: È2t]g…ž"G’€ÌwÿþÇî.·´!ÏD¶Áúÿ}øa{=QY–…ýpiú" ¨C_ù ¾sõÕÈçí$Kå²ßGD䑽$K¦i²BO‘#KÔØ94/½ÔïC¡‰dƒ÷`¥HûÒV ks«ß‡A43ß~è!üÖƒï‚i²±JѤë:¬=ºŽû_ô"Î OE¶ÁzèÑGÙ`¥ÈzîîÓñ´ë®óû0ˆfæÿÝùUàúë±±Á¢œ¢Ç²,&¤¡hRüå ^ÀõÙä©H6XEÁ‹®»ÎÉÂG5_}èYÀ3Ÿé÷aÍ„®òwFyýr¿…h&¸~•"K–ñ•K.a|“§"Ù`•$ W<ø ß‡A43¯|Þ@}Õc~ÑLüàuŸÅÏJE¿ƒhf Ã`…ž"‹ñM^‹dƒ÷> HûÞå77Þè÷aÍÄvµŠÝ—¼Äïà š®_¥(3M“SÞÉSo°Öëu4ñžÄý($&Šoæ?>êO_ë÷á 5IŒ_ù7ƒG®¹ÆïC'j’ø¶, X¡§À›$¾9ºJ³ðt¿ ŸF£\.‡z½PUÅâˆSÄ,‹ëW)ЦŠoæ7odްRpMãëG2aÚ4ñÍÑU ºiâÛ0 –ßä¹Àް®®®"Ã0 ÜsÏ=¨×ë(•FÜwR’8%˜mªøPNüŽß§@4Ð41^(ÙÛ«’(ˆ¦‰oŽ@QÐMß™L…BÁïS ˆ dƒµÑh V«aii ‹Å°°°€jµ:Ú (г17QÐLßTƒ i(¸¼ˆq¢ š6¾ Ãà+Ëo ¢@6Xwvv‰D¹-‘HL´Ö/Ì677ý>O‰Bð c|Û¢Q»f'Å·E-¢xÍNbÚøÞØØýúÕ(ÆBÔ®×I±ü¶E-Â~ͲÁ:è¢h6›~ÞÜèºî÷!xjggçÎóû0|Çø¶E1¢vÍNŠ1n‹Z Ï<þøãxüñÇEü~y衇ðo|×\s VVVæþþ“Ä7`§‰ýë_C‡á9ÏyÎÜÛkA‰/åš=þ<ž|òIßÞŸe¸-(ñà• \³<ðšÍ&n¾ùf_Þ’øfù|A¹^Eùý‘|ozÓ›æþþ,¿mA‰¯åšuðqËï@6XÝÓºÅãñž·¿ýíoÇÛßþv¿h¨Iâþâ/þÂïC' ËpвIâ›å7…Ëo ¢@N >|ø0€Îi Fc`ež(,ßuŒqŠ2Æ7E㛂( Öx<Žd2Ù±àY×u¤R)¿hjŒoŠ:Æ8E㛢ŒñMAtI«Õjù}½Ôëu,//#£Ùl"‹amm­ïú>¢0a|SÔ1Æ)ÊßeŒo šÀ6X{á·XœL&ý>"O1¾)êãeŒoŠ2Æ7I ¬DDDDDDtpr +Ñ¥ïz×»Þå÷AD]­VÃ%—\Òwî½^Ç·¾õ­ž÷ºo”ûg¡ÙlÂ4M<ûÙϞ蘂xN4¹iâÛ‹ûgaPŒ‡ñ|h:ƒb<ŒñÀ2œ„ƒV~;¦ žM‡åwðÏi$-š™÷¿ÿý­×¾öµ­cÇŽµŽ;ÖºãŽ;Z/^tîßÙÙiÝvÛmÎýwÝu×H÷rÿ,\¼x±u×]w9ïyÛm·µî»ï>ÏŽÙs¢ÉMß^Ü? ƒb<ŒçCÓãaŒ–á$´ò{Ø1ñ|h:,¿ƒNãà”ài6›(•JXYYa¸çž{œÛ„ÕÕUÄãqçþz½îÜ?è¾QR©„F£{a c}}}äc â9Ñd¦o/A1Æó¡É ‹ñ0ÆËpfù=옂x>49–ßá8§±øÝbŽª{ï½·uìØ±ŽÛNŸ>ݺãŽ;Z­–Ý“qìØ±ŽÞ‘÷¾÷½­[n¹eà}Þ;+/^Ü÷ž;;;­Ó§OtLA<'šÜ4ñíÅý³0(ÆÃx>4A1Æx`NÂA+¿‡Sχ¦Ãò;øç4®§ûÝ`Žªd2 Ã0:nÛÙÙÁÕW_íü‰„s"‘@£Ñxß°çΊHmžH$P¯×Ñl6‘H$°²²2Ò1ñœhrÓÄ·÷Ï ¯Õj¡;šÎ Z|{qÌŒñð8hå÷°c âùÐtX~ÿœÆÅë,//;ÁpöìYÛÛÛ}ïk6›Ÿ+6xöÚîîî¾sÙÝÝÅÚÚÚРå˜ý8'òƸñÔxãa<òNwŒëºÞ÷±A–áÔËA(¿‡Åwë\ä–ßÁ<§qq ëÜ~ûíXZZœ9s€ý<üðÃ}ïÛÝÝø\Ô^s÷îÜ}÷ݸûî»±°°€ååå¡ç3Ê1ûqNäqã;¨ñ0(ÆÃx>äîc<° §^Bù=윂Xç"ï°üæ9‹ Ö9H&“XXXÀwÞé,v»÷z|?ñx|àsãñøLÎáĉàL?ìB Ùl¢V« =¦iï§à7¾ƒƒbü’K. Ýùwºc4–ßÁ?§q±Á:#'Nœ@­Vs†ñ@×uçÇ‘L&±¹¹Ùq*•xß°çÎJ"‘Ø—B»T*9=3Ó³çD“›&¾½¸Åøk^óšÐMgPŒG-¾Y†,­üßA<šËïàŸÓ¸.iµZ-¿"ªVWW±¹¹‰d2‰ŽÒ€='}yyñxÜYÔ¼¶¶†X,6ð¾aÏñž¢—jœóñâ~ –iâÛ‹ûgaPŒ‡ñ|h:ƒb<ŒñÀ2œ„ƒV~;¦ žM‡åwðÏil°Î˜;ez¯uÍfÓéê¾Ð}£Ü? ÓSω&7M|{qÿ,Ìòšd|‡Ï c<° 'á •ßÃîâùÐtX~ÿœFÅ+×°Q ±ÁJDDDDDDÄ+¬DDDDDDHl°Q ±ÁJDDDDDDÄ+¬DDDDDDHl°Q ±ÁJDDDDDDÄ+¬DDDDDDHl°Q ±ÁJDDDDDDÄ+¬Dä]×aY–߇A4w^ƾeYÐuÝïS"""š 6X‰È7©T †aø}Dsçeì†T*å÷)ÑV©T`š¦ß‡A4Œ÷ùcƒ•ˆˆˆˆ&¦i+ðt`0Þçïé~Ùܽ5™L²,ï»O–e¨ª ]בÉdØÓÊÄT°t: EQœçåóyär9T*X–EQN§×“$ ¹\nâÇjP|@±X„eY=cÌ«g|“fû…BÅb—\rIÇó,ËB±XÜ÷¢iõŠgq›¦iUU;âSQç6–×&“Æ»¢(NìJ’„L&I’0¶'ÅÖÈf³(‹Ó4qüøqçÉçóÎ}š¦aqqѹHŠÅ"Ø”ãÇ£R©8¯+î·, –eaqq±cZ±XD>ŸŸøñD£ßâ~±–¯_LÓÇ8ã›æmÖ±ŸÍf¡i|ðAçv˲J¥œŠ‘W†Å³›;>ÅcX^S˜Lï_üâqäȧ³QÓ´Ž%Œí µÈW[[[-­íímç6UU[¹\®µ½½½ï>Y–[ªª:÷mmm9÷•Ëå–$IÎïZårÙù]Q”V&“q~Ïår-UU'~<Ñ0ƒâ»Õ²cNü¿Õ²cXĘ×1Îø¦yšGì‹çW«Õ€Ö… öÅ5‘†Å³ªª­jµêÜ×ßâ6–×ÓÄ»(… .8õvñXÆöø8%Øgš¦AUÕŽibÕj€ÝËÒ}_:†aÐu²,wd‡¿†áô¬»Ÿ+I’3%¡ŸqO4È øTUuþï~Ü,bœñMó2Øw?h'r* ~Ÿ>EÌ(ñÜ­;>–×ÓÄ»xN6›E:†ªªûžËج6hËÓ4uJn½¾ ˆÂˆ1NÕ¤±/Ö f³Ylooû}DDŽ,˨V«Î2>˲ÉdP.—ý>´Pcƒ5º¦º®; ¯û5ZE$I½6ݽïDAÐ/¾Eâ°~ãvóŽýr¹ ˲ iš“„È+“Æ3QMï¢þ^.—Q.—mÇEáµ2&]òY&“q¦øö"q¸§ÿŠûDÒ UU;~Ú‹¾‰‚bP|ç0ó+ö%Ir²UrÛòÊ(ñŸw¦?…Ù4ñžÉd iŽ9EQœ™1étÚïÓ µKZ­VËïƒ »2"zÃÅ:%1L–egV±OÓÆÆFÇc€ö42¢ éߣbŒS˜1ö)Júų»¾Ò½×0QXMïbtV’$.cò¬fš&Ž9‚­­-(ŠÓ4‘J¥ËåØƒIDDDDD‘Ç)Áæž’ ¦g26V‰ˆˆˆˆè@à+³Q EfJ°¦ixßûÞ‡›nºÉïCñÌùóçqôèQ¿Ã3/^D³Ù ÄßèâÅ‹¸ì²Ë°¾¾î÷¡Œì–[n Ägç• ÅƒW‚tÍž?õWå÷aŒŒexðéš½xñ"$IÂïþîïú}(#aù|Aº^ÏŸ?ßû½ßC"‘ðûPFÂò;ø‚tÍNR~G¦Áúüç?oxð²²â÷¡xfyykkk~†gjµÎ;ˆ¿‘8–0¹é¦›¤kvyyÙïC Ëðà Ò5¶2œåwðéz]^^Mc`ùAºf')¿#Ó`¢(](L&‘L&ý> ˆ(ÆCÔ®YšNÔâ!Š×,M&бµë•¦µxû5Ë5¬DDDDDDHl°Q ±ÁJDDDDDDÄ+¬DDDDDDHl°Q ±ÁB¦iÂ4M¿ƒh&,Ë‚®ë°,ËïC!""""ŸqÖ², ’$õ¼Ï0 ,..TUE¡P€$IÐuº®CQ¤Óiçñ½·¢1 ( 2™LÇcu]G:îûþnÅbªªBQ”}ï§(ÊH¯AO¥ReYH§Óeٹݲ,‹EhšUU‘Íf‘N§¡ª* ÀeY0 c¬÷’$©ã=ºŸ/®w »+®w,ëºÞñú½žKDDDDÞ`ƒuŽLÓD¥R …}÷ëºMÓœJµ,ËeªªBUU躎l6‹ (Š‚J¥‚ãǰ¯²,C×uäóy(ŠÃ0 Ëò¾ µx]MÓpäÈ §Á›N§qüøq¨ªŠt:ÝQ9Ïd2e†a ›ÍBQ‹E†áŸ¢(eÙl¶ç{»†Ó4‘N§;^[4¨Å9‰ÏN„^ wÃFQär¹žóçÏã‰'žð;$"EŒŒ†Ñ³X©TœN#¦i:wEQ ( ¶¶¶œ¿Y¥R®ë$ɹ„Q‹"÷qY–MÓœ÷×`ljªª$ Åbщ?ñq|"†EÜ»ß×MÜ/®ÑQйˆç³sˆˆhtÝuQ‹ú‡$Iû:T'!êtâõºë-îï3NýÏ4M|æ3Ÿñûc" ”KZ­VËïƒðB­Vùsç°²²â÷¡ìcš&ŠÅ"t]G.—s’¹\ÎyŒ¨¼çr9§#ž†a8\¹\©’+*Ü£[>Ÿ‡ªª£­¢à•bq,¢â^(œ×w7Ýç^£»â6÷Ȭ¦iÎû‰Û%I‚išÎy»ÏÙýEãn<íJ¼$IÎë¦ÓiçµD#äk_ûdYÆ»Þõ.?Cc,ËËËX[[óû0öqÇ·ø’/‹NÇŠišX\\t:¦­ÌÊ ŠÄ0ÝI÷kˆ˜wwþȲ<Ñ´~÷q‰Ž~¯óÈ#„ªÒä2œ&'®q¯©~¯%:½^ñŠW„*^‚Z~‡U¯º`Ya2™ŒS×3xÜî2Tt,ŠïѡY–…J¥âÔËDY.f¦‰ïMÓœ<¢3Wtê»Êå2»s¶R©8ƒ›››¡Š–ß4މâ¥÷Þ{oëôéÓ¾¼÷öövkkk«ç}¹\®¥(JkccùíÂ… -EQœçd2™V&“ñåØ'9×°ÙØØØ÷÷ñ3^&uÇwøò¾.\hår9ç§P(´ªÕjëÂ… ­B¡Ð’e¹#¾[­Vkkk«%Ër+—Ëõ¼Ÿfϯx™T¯É°èUNB”’$µTUm¥ÓéV.—ëù½páÂ…V&“i©ªÚÊd2-EQZŠ¢´ªÕê¾ÇzlŠ¢´Êår+“É´âñxëï|§ßíÈÂv=úi{{»µ±±ÑºpáBÇí­L&Ó’e¹•N§[ªª¶TUu¾k.\¸àÔ¯ …‚¯²,;±Óýšƒ¸Ÿ_.—ÿçr¹Ž˜½páBkcc£•ËåZ­jµ:R])“É8×¢(ǶxaùMã˜$^|\«ÕpøðaÄãñž÷×ëuÄb±¾÷èÉ=kÝ£ƒétÕjuß(`¹\F6›`÷ê‰^¶  êèØ î5½ó…øÓ¿ÝSiEOq±X„,ËSx1µ·X,ö¼Ÿ¢! 15î–e!ŸÏ;³eªÕjÇc‹ÅbÇÔGñxI’ö-[Ñ4 ù|étÛÛÛ$ÉerçUà̲ß‚(OÄŒžJ¥âŒx‰‘)ñý)fˈ< Ý3Äì£Yal÷&fŒx½~_ŒBŠÙVétÅbÑ©S‰ÑIUU÷Õ—Dþ±D*—Ë9qR(z.Á…¸2™Œ³Ôª×÷™˜ò;n]£\.£X,²,lmmyöYŽŠ1NaâKƒuss¥R ÍfL&Q(‹ÅF¹\õz€ý%X,ýþ¬œBQ¬·à|‹)#£®Os'^Æ7<þøãC“$ “Íf;¶PUuß^µ~µZ /^œùû¹ Ÿ·^sÑ ,—ËÎ5&˜ªª"›Í:ës¹œóüQ+øårÙI4ãÞG¸I’"‘­^¯ãüùóØÝÝÙ{±üö‚ØZÓ4è¹ÖX¬ µÃ¢P(8k’»GãÙXŸ(¿‡ÅÔ´X~î½äÝ·‰5ß"q¬X[ÞmÐzâµz}ÿˆ™?b¦…¢(ØÝÝÅW¿úU=zt¬sðµÁzûí·£Ñh`}}gΜÁÊÊŠ³¼—ÝÝÝ˃>ˆsçÎM}±d³YX–å,þïÞ_”ÂmggçÏŸÇc=6Ó÷ñ2¾»ÂsîÜ9˜¨ÂS©T|ËFHóuîܹ±æ• –áó&–¥ÓéŽQN‘7•J¡Z­:Ó.ÅÔÈB¡€l6Û³¢? ‘q>•Ja{{{äç„h°ÎRÐÊïiˆFªHr$âÔˬÿ\åQ~‹©i±ü1ëͽ B$%»ˆ}Ý£•‚dàLïÙ·G½ÖE"½^ß^µ…jµ>øÁŽý¼‰¬¥R µZ õzKKKˆÇãh4cÏoA‹ÅËå°²²Ò1 ¡Û Bþú믇¢(Sϱ9å7ºD܉ÊìßÇ‹ø€ç<ç9Ç·Øìœq}0¬¬¬8k’f)ˆe¸Dv]Ã0œL¼b–B¡€J¥âdàvWì½X^¢ª*¶··C¹åؤÇgZ†©üž†˜n.v3àˆgð‰ò{Ö,¿½#êXbKܦiZGÃÒ4Mg /Îè£Ø¶I–eg 1w#U¿ûqbî<>|€=-A\îÿÏ’h¬íKáÔø.‹PUõ@Uji6‚ã³$Öô¹{¯Å÷„¨”ˆ™ ©T ¦ivlÃ"FOûM›š¯koD-¶Ýk£ÙP% z1>o¢¼I ½7½Hj'FIu]G:Þ—l¬{ .Ï—fI’¹$gOçÁbê@¡PÀÊÊŠتªbmmÍuäĉû§ëºs1Äãq$“IlnnvÜŸJ¥fúA°±J^b|‹ÖýÞ(z‚ã³$¦ûŠÑP1B*¾3ŠÅ"Òé´³F§\.;YUÝ2™ÌKb6QŠm6V©—(Åø<麎ÅÅE?~Ü1Õu•J²,ckk …BØÚÚB:Æöö6 …‚3ÅWüˆ†©hTºo£þÆa‹³{ÍOO$H$C×M%“I,,,àäÉ“H&“ØÙÙÁîînGoÏÊÊ –——Q«ÕœEÞ·ß~ûÌ>„b±èlYC4 Æ·¨PM+ˆ1>­b±I’ö­#Õî5¦årÙl‹‹‹0M³c]¸¢(#¯'¥` {l‹i‰ƒ*ÑÁöŸ÷šQ±K€‘·fŠâèfŒÕ`‹­{e k6›#¯™:uê–––ú6€‰>ô¡9¯7ËÜb[& ¯)¾tLW$òBÐb|¢’/Öÿ¸×õË| tîSJÑæØ^\\„¢(ØØØà4qê+Ì1>K•JÅY>´;2¹‡p0ŒÕ`£¨ËËË8uê”s{£ÑÀéÓ§©£ˆÇãçċ͌gMÌ1çèy)(ñíNYN䥠ĸ´Ä>Äb›™­­­¡US|£)Œ±],!Ë2c’FÆ÷Z¥Rq¶®4Mªª¢Z­²³' ÆNº´¶¶†|>“'O°Ó¯¯¯#‘H X,ú}>c3 ƒkû(²4Mã”0¢ŠÅ¢SIVÃ0&Þj†hÞÄ,Τ!Î0 äóygGOÃaìk,ÃÚÚêõzG–àA©°ƒ¬×fïDQÁ¢þDfGÑ£^(°¸¸ÈÆ*…J6›õ<Ë(QTˆ©¾’$9u~‘ ‰Âc¬k­VÃòò2î¹çžÐ6PÝØX¥¨S‚‰h?±gª“ïQ˜T*X–Å¢*•Š3û€:á6öÖX,]×±°°à÷±OMlÔKEŒo¢ÞLÓtÖ.±—ÂJTÆ™4’¨“;cvµZec5ÆÎ¼²²‚R©„F£Ñó1+++~ŸÓÈ8]’¢Œ V¢N¦i"•JA–e(ŠÂ}·)´ÜUVƉìFêñãÇ%’$ñúˆ±×°®®®Ö××{Þ¶++ôUº®³çÈ¥X,"—Ëqú$…š®ël¬¹ˆÌî,ߣkì«a~³'ØX¥(³, X™!Ú#¶.à¨*…]±XD¡P`ùN„á{fS4ŒÝ`€ÍÍM躎Z­ÀÞTXUÕP­kåtIŠ2‘ý”ˆlbt•(Ìt]–ït ™¦‰J¥â\l¬Fߨ ÖR©„õõu,,,`ii Ðl6±ººŠz½ŽS§Nù}N#áúUŠ2]בN§ý> ¢@po Of•J…u:ðòù<Eá´ød¬k£ÑÀúú:ŠÅâ¾/~UU±¼¼Œ¥¥%Äãq¿Ïk(n÷AQf§>Áž.–ÏçYɧÐ3M–e±ã…4MÓ`YËôæiãÒAcŠÅ"¶¶¶˜„ƒ"‡ V:ˆLÓ„aØÚÚòûPÈgc7X‰î¾ûnÔj5œ;wpâĉÀ¬vàëºÎ¹ï!ö×è8˜ªl6‹r¹ÌÆ*Eë.tišÆŽ0á>¬F±X +++€Z­†F£øìÀL>£ÌüÐu{¶»žjYö}{³À!Ë@Ô× Ã`áNN>Ÿ‡ªªì¬¡H2 ƒ±M’¦iØØØðû0(ÆNº´¹¹‰[o½¥RɹíÌ™3¸õÖ[±¹¹é÷ù tÐ+óºhÚlßÃ0ìi¼ãÐ4À4{ß.,.îŒøÝ4ãÇÛ¿W*öï©”}<Ѝªý¯ðÖ·¾Ÿýì‹fûaø€ëWé 1Mš¦±3’"‹;ÐAdš&$Iâ'0æk³ÙD©TÂÒÒ’3º kkkX__Çêꪓ˜)ˆDðD–d³öÿUuÿhäâ¢=)ËözÑa“x½\®³!¸¸ˆÎ0ðç~ŽaØ?…‚ý¼JÅþ]–íû+û5Åí¢!n¿ŸxmM¶¶ìQSUmO6Mû•qïÿçpîÜ—ýþ³xÊ0 VjèÀkûjÙNѧë:ªÕªß‡A4WœLnc5Xëõ:šÍfGcUXZZÂææ&vvvÛ`=Ƚ4Ţݰ“${´{)L.מB{ü¸ÝÌåz7ú,˽TU»™É´§Úno·gšvÃSPû'—k7ˆ»§èjš}Ÿ¦uNN§í×Óõömîïo÷qÔrº®è?ÈLÓ¾®Å´÷ƒ¦irVE–èhg‡ 4•J…É–È1ÑÖ~vwwý>Ÿ¾,÷P_È™æøÒtº=*¦ÊŠ`¹Ü¾OUí_¥b?®PØŸ¡·Ri7R-ËU5Œýàtzüì¾ÕªÝ`–åΑ[ =ºJ½Y–ÅŠûˆ‘þ 4 5­}­äröu(®Ûƒâ Ïœ¡èc"=:ˆÄŒ1–í$L´ëêê*šÍ¦s{³ÙtöaM$~ŸSO†aDbô)Ÿ·+¤½Ö|êº]™kHuÝYµ¬ÎÆ_¹lWr+•þÛLÆžn+¾'Åt^À®‹‘QI²™^%3¯Çdˆã‹R§LP‰YÅâh¯T:gô{½~, 8tÈîêþóºGUû×m¥|ìcÿÊïk.,ËâTxЬƒž{ƒŽ|>C‡!•J9‰ôˆ„‰÷aÝÜÜt§õz±X kkkˆÅb~ŸS_aî©1M»1*Ëv…´ûTDå5—kjʲ]YëBQÁ¶$F¼‡eÙ£žîQÚn^Ö#Яà Ó4YqŸ1˲×G‹5Öƒ>nM³×ë:³,»Ñ«iökˆ™Ý×u>ow*‰k°P°£(öuR­îŽèô¹õÖÏø¿?²™3DOQE¥³h±ßª$IÌÉAûL¼«®ë¨×뀅…¨ªèƪÙkH2Ü[÷ô\‘WT†Å4EwG¬,{35P’:צRp…¹S&(DB/Ó´g¸gˆuØb-¸¸þt½=A¼†¸_’:§ñ‹™étûº*íŸîëÕ=Ó@UíLjäh’4¸sçðáó~”sÁJ E•išl¬ÒáŽwŽ®R·¡ ÖF£f³¹oªoØö¼ó}ô)›8ÏÕ½ÍmqѾ­cTÕ4¡H€$ÉÎ:TI½èLˆ!]¯öÍÊfÛs½:¾Åí8|í”ÑõvK*ÀÄG4è0ŦH*&2P‹ßN^äV­¶§:öjÊöíÅâk>Å\b/ˆyËÀà…y£ÇU© ~½qjšÖ>ƈñµ°C„^pïÁä!‘H,•²_¾{M¨hlŠ­TÕ¾®Ü!mþÆ9ç)étûúÛ5årý"îkØuÏów-t Õ±ÿœ"ãR*åÝß"à,Ë⬊,Ã0B50@4)vÎÐ0C¬ñxÅbgÏžE"‘Àææ&Nž<‰ÕÕU胲‰Œ ^¯l×ëu4 ONÔ0 +6ªj7–z•d÷èIß6¤Ø{Æõ%¶¯A;Ž|~øTMë{ìíƒÙØè?"Z,ÚYd†U¨E¶šrÙnôk´Š,T£6÷ZGWVpùOLø¡æ@Åw¹<8»Ð8ÄÞFcSmíÏc¯z÷ÃpÂgkËž†«(öšP6ùŸ¹–µ¿‘(FW‹E@zêëHá×ö¥«î¨Oš&rz éJj_‹¸cº¯H¯](ìKÅ-fKHú^çÊ¡C½[Øn†ÑÞyk 0 ¼ý£õæï1À®r¹=zêî…­hEi/í÷Y‰¬Nb˜·Rig>i¿Ý×¢èñDz±ž<‰c3úsûU†wãëâ‹d”ÏH¬/½4#¾~å_Þý«/†ñÐMÀå—#ýsWôÌ ÿშ¼ý”çJìêWþ»³P—!ýÄ¿‚eÝä|}iP\ªC}ÎßB=ôä~ôJXO^J鹸rísxοºÌó,(±mÿI¸eSÐåóƒRöÒë²_€w+ ú RŒ †a ǽ i–î»ï¾ÖéÓ§[·Ýv[ë¾ûîúø×¾öµ­÷¼ç=­V«Õºxñbë=ïyOë–[nqî¿ãŽ;ZwÝu—sÿ-·ÜÒ:}úôÀ×¼÷Þ{>FQ”ñNjc£Õ’å©?›jµÕRÕVëÂ…VK¾êë­ õÙ}I§[­í}Á~PËþgï¿ûÉò€;]Ç­ª­V.×ÿq.´ZŠÒjmoï?`ñ|Ynµ …ÎÇomu>>—³ÒéÁD¹Üû¹’dß×ëøÆ•˵ϻZøÃâe³ˆoñ¼þo¹•ô÷ž‡jµ\¸°ÿïßï52™Á)—íXt] ét;T;^KU;oËåìç¥ÓíÿW«ûßc{»ó:P”Þ×S¡Ð¾zÅq·­-ûÝw°#¸p¡÷uÔÇ x™–ex/j÷ß—:‰ïQ&*/\h—¡îï¾/Ÿ¹þý­ôËë­­BµÕÊåZnG+wíï·¶NÜÑj•ËûCµûûf [Õj«<Éu4„åwÏóÛÚj¥‡}¯Òض¶Æ CqYÈr»Jä|e”Ë­ú/­ŒÚ~ÁLƾtú…f¡`ˆ×(—Û_I­Öƛ϶Ò?ô@ëÙÏþêÌ>ƒ ”ßn²õmòÐ… v@ŠºÍ¸uòß“ÄËX Ö{ï½wßïïyÏ{Z§OŸÞwß ×8vìXëâÅ‹Îm;;;­cÇŽµî»ï¾Žÿ ï}ï{;.¦IN~¬‹a{Û.qTuê/Øí÷}²µõ¡V«Õj•O~¼•yemßc +;vÃmX£ºWe>“±oÏåìÿ÷j„ îÊH¿ ø “4"jV ÖYÅw«5¸Â“ËåZÕ^ ¯~ÆhøŒ¥_‡J:ÝjÃßSU[­jµ•N»ê:ÖíÕ?¶c×Õ.—÷ÚÉ¢V¡(íZA¿÷ ÇA1í¾ŽfñYÍØ¬¬~•á½H’4ÃOpζ¶zwžtÛÞ¶ã;¶Ë|QƒÞØèý|wCTU[Û/»µµýêŸmµ¶¶Z[[ýCû‚iµ oÿ‡–¿¯¥^ñÉý2ÛÛ­òßßÿ·¶œ·Åå3‹2ܯò»—B¡Ð*Ì A~PlowVs¶ÿÏ?¶äç>ÖÊd:ˆÕj»¨}œ"Ô·¶ì׿_¸`_nêË¿n_CâE\/Z½ñg*™,·ëüï}¬Ý¹)~ö®×ޝ¡½×û곟=“Ï%Hå·ÀΙ ‰²_Ðö¶?½:ÏËeûg”6ûâ,Ëvœ0 äôÚt_l.[Õjë¯~ægÆ:Ý‘¦»§ˆ=ït]G.—C<`'f:uê¾V"‘Ø·_ëÎÎ ‹9ÿwg%N$SÏ£kº¤i¶çwÆT7å?ÿ¯{SsãÈœ~Rñû:— Zrþ1{.£¦µ7^ìEL»uSÕvš`1}°Ÿi÷¸áÔ¤¡üŠï±‰iÓ"í¤ÄTqq¤Óvœºç4åóíiÝÙl{ªw7±NU‘kÛ˜½Ìb•{^ í[o†úKŸ@Nºš© ¿—E»\ ÉöUÓlOM´qð°9\îëžkkAŠñH¬yZ\l'2$ûéZ×ì8~¼=¾Ph/}K3z-Ë‘$ P@¥²7åð —ú äÅ4ð3¿…ì_.8[¢‰"AÓ]¿™Ìµ(|P$¨ü9{Íi.d222Ñç»qïÚ)„ìϤØ6Mé@¤þ÷Ÿeõ®~Fûë§»š&³Ø‚ãžÀ‘¿„{ªoùçÿ.繚àcƒúÕÿò÷=åÛÏÂÆç? GÍ._¶ ÈØõ*I²c?gþþ¹öõ&.Ë‚ºa½XñQ,òóž„ú_o~æß%{Ýqné4N£pé¥X›Ág¤oÿ¹×ðØDnY팑¢^Óë³T”vÒQQïËdÚù4Ü{WÊrg=-¶¿ot½ý}#¾¯ÜÄ.!¢¾ç¾½ë»é;W_/þðãucœòH Ö\.‡D"Ñ1‡]¬]¿¾¾ŽR©4´Á‹ÅL&߆ó¼x<ŽZ­Ö÷¹Íf³ï^¯?ü0 Ã@©TÂÊÊJÇ}co÷aí½-t}¢ ½aØÕ†a´ (IBáÕŽÅ¥ã¨~ürÈ2P9s9Ò¿ú_ ‰ K¥zo½R,¶“¹ín4ºZ­†~ðƒxì±Ç<íYÅ7<ôÐC(•J8qâDÇ{vŸgá‹iÚ…Õ´kNE)Öžˆ×Dvl‘ÒZìýÒÝy’͆üõeï*;ìs9;Æ»V˜î}qþ²<|ývÄ•J%<ðÀ3ym?Êð^|O46Šî BŠÒ´ºnßﮈÌí¦Ùdž,Z[€(d dȲŠÌÞzO]Ò°/3'ñ˜Ùþ:k/é?ܵ Å4±µÚ~k‘„lßg{Sï­-ÿ“PonnâÓŸþ´ç¯ëWùÝ‹a(Oœþ?œÄ6~ét{ýLê _Dá·.…¥¨vÿç%¢ðÂ2¬/ß ýª7Cÿä3!Ëí¯í—Ï¡|ÀÖDóPÚûœ>KãY€ò>ç1wk¸[¯¿KÇ) °‘3ìï·;2#/Nåw£Ñp…¼”ò»ó£6Ø9#r´ˆA¨½gã÷tº3Æ*•öFï£pwÒ‹ü©½äÃr†ªÚ[HÓlçôè®u=GÔÁÇ54Kp­VC£ÑÀ©S§œ—f³‰z½Ž……'xÐl6»[³ÙD©T­·ÞŠd2‰S§N9·÷³»»Û÷¾+¯¼7Þx#Nœ8Ñãsì܃Õ0†ìš!‚D4X{°=Šè°È½îܾ?”RH#wû×±¸¸3g¯„´ôfûNI² Â^YoÓiïö6=à>Œ£Gâºë®›Ù{xßpÕUWáĉ8|øpÏûÇê¡´¬v­vÝ+ÝEÑÓ& UÑK×}]•Ë0ñ¿Âøî+ú¼È5T,Ž–¤šÚNœ81°òì…y–á½øžhlâ ^ütgÔÓuä¯ûïNæÜJíZ]_ú"‡×ö¶}—h„Šë¥»G–íKo{ÛndŠ=€;t}~=’Vï³7`ë«D"£GâꫯžÉëûQ~G‰È7 Ñç MCîÊ»þtäˆ}5ûg(üJÐ4X¯x-”ÍS(\°~êuŸEáίakËn ‹À⛟Bæ©Òè{ý š…3N²~²Y´³üF”ß³ŠoÀÿòÛí@lߤëvP?nÿtËdÚ[܉zT¹Ü®_u'ÝÚš|Ö£˜*°½ÝÞ6aZ}¾·zuðqã{èk½^G2™ì¨üˆílÜ6Nå¨^¯cuu±XÌÙ.Gpÿ¿Û ž¦+¯¼Ï{ÞóFì¹´ Ô¾å™aØÁ#‚¡{>Š‚—¤}#TÖgïGêm‡PøÕK¡œßÜß¡(È(€ôJû×}e˜¢ØØ@A¯˜…H<ÇÑ£GÌÓ˜E|v…§_|ã4<ÅôC¡ß|«ÅE»ðqǢ虽}î£ÚØp¦ÿ?ÞnÓÊ?ûjT¶ý0Q¦æó³Uh¸d2‰k®¹ff¯ïwØßCh°ŠY•Š]Þ» õîkCôÞ—OQ*Ø“Ôý[uçó‰Î݃°ýf´»¿n‚>ø¶¿´7÷<û;öç©ü×ÿéyOB) ì› ó‹ö?é4d˲gÙôngðÇlÒi¿?ž¶~K`å÷¬:ƒP~‡–eµgΈYc½ÛîNùJÅ~¼;“7÷îÛgÙ˜÷á‹bÒ:øÐÖX,¶ïEëõ:‰ÄÄ“˜b¼¶¶¶ïâ½îùòÓN‹Ðu½cê˜{ ™žÜz1<ï&zÌ66öõzäúËP¯û,Òô–ö¶=ˆ)/=;ÝþEEƒÍ;¾1×ò¹×h÷e²˜ ˆ¢TÊnmNRîÀÙ¬ýôA³Q2àÂ…hW¾ÃÈïeî ÖK.iÿ:d_îeÃFQ2g »ãT¬ws\ öµÁ¯ƒù Bl½ÁjY@êFæéÿ[Ÿú6,˾4 @: õkïÃÆ?ùÇ_ëçï‚¶þ¸ó\ô”½Ïzùe%”ŸZ‚ü‹·Øõ©a׳$ ÿ2èµ¶Ž:!ÆMÓë]Ûö•ÊþÙ–íDíïæS:šH$œÍ…E¯¦®ëûÖªŠ©Àæ»èºŽF£UU÷MN&“ˆÇãH&“ØÜÜtæÁ뺎T*5Õ‰º/Qžu4u< ýD»„£¨bn¢èL§|V®€T PÓê^¥ã5^ü}(düˆï±FWí7lǸ˜‚ânxŠQÔ­­öœ\Q“ÓÄÔÞ1 "G“˜ñî÷ÔBŸ_ex·™ïÛ×ë ¢Õrþ›ÏÛ_Ýæ¢ŽÒóKĽúoÖ ä—¡§·P.³n$A‰mÀ‡Î˜1hw~|™ÏÞH×:SÉ;ˆuw÷5äé)È/¿ ² dßú0 /»(¼ÃïÓ8‚ãP©T‚¹ÿj6»¿sÃØ›æº63û±‹‹íe{²<ú”tÙH Öd2‰ååe,,, V«¡Ùl:Ó›Í&Î;‡ÕÕU'Ø///ï»OT¼WVV°¼¼ì¼W,Ãí·ßîÙI‹völ°Š„KB¯¬¼{·¥R€7Ê'?N¦E:¸üˆo˲Æë¡t'’PÕý½ƒÙl{‘\.×1’ëÚéwÇÛí_±~\’&š)E„2°sÌ,é’¸&T:2™ÎJúâb{RH#’1ŠËI’Í<¥ZEŽ ÕÀ Jlϼ3fÜã¹»|P®ù2 Ïz=-÷³#õ¶HïúElüÁ›±ø/þ Òs.C&¨wxûyÑè‚ã@ïå…¾I¶€öÀT÷Œ™~=ìårçL4š‰‘² ”J%T«UÄb±ŽLgΜÁúú:’É$ # —¬¬¬ Í –H$ð¡}ȹ°¦ï.ü-Ëîýî[¿—åΠ½„îß÷T«€®_¨*$°±JþÄ÷ØY°Åº ÀŽw÷óEÖÒAëðúX\´ŸÖ]ÏKûû_®G 7?b¼—™4VÅtwUíY91M;η¶öwvöʉ'¶‰Ú—[«A”ض,k~°ENH§a)*4­ª —äÿòï°ñ†WBú©WÊh U7å×~廿­ï@&sý|΋z JŒ@±XœOnjض#“Ù?%Ö¤ºgšM²Ó§ÊÌÜH VÑHíeaaÁI‡í¥îÔÛÓ…ÿÐ1£¬‘@{t6C>^Æww졺+âîžE™(µH “ÍÚõýööc%G¤ñº w›É¾}†ÑN4Ö'hÅ©£êµ?$…ß,c{œz˜ò’NÃzé«Püù¯C{ôÒÙCKJ¥ÿóéÞ0“*íí¹D¡0ëŸÛèªÈÅQ(ì¯Û›¦]aI§9å+Fj°öÕsçÎ@Ç>b³Zˆí%÷?÷«ÓH¥8RDÁ1Uo|÷Œ‚1ˆÜLâ»`cÞMS,¶—~s*yMÓ´©+:–˜rÊW7Å‹Ìâ(f‰±“’fi¦SÝ÷_ó˜—¾[×/å2òº íßé·[¹ q ¹ÌetU̘é·^OìýÅ^ÅP©ÁZ*•°¾¾îü¾¾¾Ž¥¥¥±7ö‹{ôI¬)š¦Ò1ÉND³b2> cZÖþ™3b?HËêœIäMÓ°5n¸¦µ˜bo؇_ë‘—CyqÕLÏŠ‹˜-©i£ï¯N4©±gÌŒHìė˽ɾѲ÷rTÑ^gM4–eÍftUdÃsçéT?bc54Fj°®¯¯caaÁ™¼ººŠõõuÜ~ûí3ßœÞk¦iǯÈVº/VûíIé2p ,‘üÊ(9hôÔ‹™ DÝD…~è(”;‰†a´¬°oÖ4`këò½ŒñWrç{"ß’¨S‰] ˆfiìœ#¨Ø»ÈìŸõ¸w qÖÍÓL·m)«ÅÆÖ îȺ«H{½´´äÜ&FVÅ‚ì ëÞ\^|ƾ-Tm‹‹C_“×ÉXœJÅ“aOÓìŸ<˜h–FÞ·oq±½ÇŒeÙã®ÑU±6àZ«·—Ì=k@$†gç ̓išž}F£ãöõõuÄãq‹Å@M{ÇãðûÆ|Þ®½ìµB¥Ý‘ÓÍ4=ÍSC43Ná_©Ø-L7—c§&G¿Î™|ÞŽÕ´UòE¨Õ*{)”,ËšT¬xú N=ÛïC%šˆ®ëÃGX5‰2hŸ§ò Z­†\.‡Ã‡£X,Â0 ç§X,âðáÃ8yò$jµšßçãÃw¬ Ñu{ ˲ž|²ïó'LÔGäÓ´ã:Ÿ÷äå8…’‚¦WEG;û¤ãGìÊ­-6V)´†&£1Md¾³åŽ~*ÑÄN{‰"Y¡.#5XWWW¡ª*ÖÖÖöõþ¹o_]]õû|ØuvM{ »*/[[ö٬ݢ=¾çó¹¥…R&cg¼Ö4»Ð¯T&~©l¶…’(ú%¤©J‹öZír™• µ~kXÍ=ýŠE¤ýå~&ÑĆ.cÒ4®ã ž†6Xkµî¼óÎ[ZZB£Ñ@½^÷ûœ`YvÝ]’zôÈ‹9ñÅ" ü‚žëòL“ ÷(:Ö<‰…Ö@*5Õ4wÃ!šZÏé’ù<äŸL0X)úuÊTþÓ#Ðíã°þé[LuM¡6p «e±òA} ]ÃZ¯×ÇÇ>N¬_m6›~ŸdyÈZírÙn‘öé·,6X)<öõÈ+ŠÝh0ˆ5߆Åßý1l<ðC~ ‘'úM•,üÅ+¼ÀÛü>D"ïéº=ëѲ8ºJ} aÅbØÝÝõû8'ÒwO³½ìc†Ñ{Öd:Ík†B.žx^;;8)ˆº3¨?û;;‰Â¤W2š|žK3(:ú®ÑÛÖlmm‹žÎeìIDAT1Ó#õ5´ÁšH$Ðl6÷eî&.Åb1¿ÏÉѱ¤RÙ—XÌ>èÆ„Kt;l(˜Ü#Pú«ß åMÏõûˆ<Ó=URÿËïr¶E†iš½gÈ2§5ÒP#5X‰r¹\ßé¾Íf¥R Éd2P[Ûtд}=ñªÚ»3'•òû`‰F£‹Mó$;°a0áS÷>¬úý/bÇ EF÷«–Õ¡<õ7œ@@‘ÑwÖ#ÑFÊ|êÔ)4 œÃ¦Ý•sN¡§0q’І'=”]¥0°,fs§èq>i¹ é•ü>$"ÏŒ²Ï0Q?C“.ö(ëÚÚJ¥RÏ­k’É$VVV7ºª»çûö©Ùä󣬒Ä)ôB–åÉZ>& ¤ r>é:;V(ZÜëû¬ÏÞãŸC½ã&¿‹È3ûÖ°?n¯[%ÁH V Ýh[×Ôëugºð° Â~RUÕ®Ýôi°j¨¦9U¦$ðØ`¥ ê˜JfYÐu‰ VŠwÎÔ.AF5°ÁJÑѱ†•ëhL#M v‹ÇãPU+++PUÕi¬6›M'ñRà }ê¾^²Yfå£pè¨Ä™)Ì4;×j/.2çWG=]GYÊ3“5EÊßüÍË`ÜÓR)l-ü&ry߇D4;Ü?Æ4vƒµŸz½Žååe¿Ï§·1Ö÷M9PE47•ø1{*»/‰­-6X)$ ƒ…4Eγ¯¼ø·ÿÖžë^.û}8DžÒu½3C°¦q]ųk9ûö a•¤ÎUniC¡3`Ê{?Ý Vf¢¤ sgPÍŸI@ìU~‘gLÓÄÏ^[€rû˸.ƒ"É0 {Û&˲§w¥Ó¬xÐX|o°šF\¯×‡îÿ:Œ¢(‡MÝõ|f$¯Í:¾L”pÉ£¤ÂDó‰q´3¨j– å5Ïòû´é˜Wl›¦‰o¿õ€\ÎïS¦fž1žºä;ÑR:Íä146_¬F£ç4b±…ÎÉ“'që­·"7m!>â…ÁJwÂïS¦fž1nŽ•ËÀÆgÐD|i°6 èºÞ÷"X]]E<‡a¸çž{P¯×Q*•Æ~§’3 šÉØw[P,riMo^ñídÜSÕ}«ë@¥Òûy–ÅÆ*Mg^1.8±®ëìU¤™šwlÀ¹s1œ8q½ß§N„1¨VY~ÓĆnk3j 6›Í‘ß´V«uî‘êÒh4P«ÕpöìY@,ÃÂÂ677±²²2ÖÉ™#,HwÓ´g)°ÁJÓšW|{SºiZÿç0±Mkž1.H’„Ô_DõÓ×ú}úa~Äöý÷¿?öcŸñûÔ逘wŒ; —ØSNSðl„5‹!™LŽôØ……¬­­õ þö¾¯B"‘˜h½2BOŽi¶sÖpùya^ñ=ˆiÚSÜ{QÆ:MgÞ1nL°+=4SóŽmË®ùî?௻ÎïS§Âò[f¹MS:šH$°¶¶6·tQ4›MÄb±ž÷=üðÃ0 ¥R©ó"’Æ4í©“q:8jµ>øÁâ±Ç›û{OßðÐC¡T*áĉC;‡ {Oa:˜J¥xà_ÞÛó2v磮ê?ú•EÀææ&>ýéOÏý}gQ~ë:ðšo} ×\sëÜχ‚I”ßFñx|®ï=‹òÛ4M¤¹… íuðq a­ÕjûF*›ÍæÀÌbÓ4µxww·ï}W^y%n¼ñFœ8a'.pÖ¯æóûÖtQUnyvÐ>|GÅu>ôhOßpÕUWáĉ8|ø0»×2ñÄvŒwQû§Wè?>÷Ó¦9;qâÄÀÊó,yU†wÓu ý«G}9' –D"£GâꫯžëûzY~ †üÐwjœ9@Q~Ï;¾Ù”߆a@e¦<Ú#êàãÆ÷DS‚Ïœ9Ó3³˜ÜÓº êiºòÊ+ñ¼ç=Ïé¹t¦ ˜æÐEÞ¼Ž–x<î[ƒuÒøì O2™ìx\üÊ+{>¶R±ë?½¦³^}Éd×\s/ïíU.èºY–™Å~5X½.¿Àøä7qÓ!®_¥6Q~ûÑéèuù ÀîX/ç~.L¢>—ë,‰ÞG÷´„I§EH’d7XÙ"¥€ð2¾Ø-Ò®ø¶,;é’Ø‚¸ÛƆߟE™ç1àâÅä² òÝ,bû—_òŸñôø•?ŸÈK^Ǹ®ëxãõ׳§œ¦¸k<G2™Äææ¦s›®ëH¥R“½ / /ãÛ²,»uÚ5ìdvØ«*·;£ùóº 7M>zˆ Vòçõ/yâ‹xà¥/õûÔˆxã†aàeO=ÅD14µ@f°XYYÁòò2jµš³ÈûöÛoë5LÓÄ¡GõûTˆöñ"¾½m› ÃÞ©‹tížB™Írû&š=¯b°ã|í»d)€Kþò2¶+à&ÝÂeÿñ'ü>-"‡×å·lY¬tÐÔ|m°&“I=Ù% |èCB½^w7.Ó4ñ£W\ÁEOä›YÆ7àÚ¶i@Œkš=Ú*Â=XÉK³Žqáú¯~•Ks5ØÖuà_ˆ¿ìe~Ÿ.@óˆqÓ4qÕUWù}ª#7XÝI–D±^‰—¼Úgœ}]ûyéc—_îÉñyÉ‹øÐs‘ª®·GXM³wL×íÄDóàUŒßûþá·¾ûïñ~ŸÑ¯b;÷ºsøúßüoƒ(@¼Šñc.àÒ>™ß‰Æ1´Ázøða,--í»Ý“Êö¬OnwxÛÛü> ¢Ùé³F[Œ¨f2v#Õ4íéÀL¸DasÅ#ßÃ[N>Óïà òœrÇ ¤6¨r&EÔu÷ß,.ú}C¬ñx|ß&Àa`Y.;†›™t‰"H×u¼¸Ù^÷º}÷©j»+:îS){¿aÖ‹(lòý;(ï8ï÷ayJÓìòÙì•Ê("^uÙe\ÎAž\–`¯˜¦‰þüÏ3K0EVìu¯ …}·»¬Ï3Ñ…×Kžþt–ã9ºnÿ+3¶)¢ ÃÀŸ¼àì)'OŒ¼†µ^¯c}}KKKH$í„/]z-àö ×…ÐAdßå²ßGD4™·¾õa¨Oÿç¸Ãï!ò˜®™LpêKD^³,ËîX'òÀH#¬º®ãäÉ“¨×ëˆÅbÎí+++8uê’É$b±î¹ç¿Ï‡èÀ©T:Ïçý>""oÔþÏÓ¡ùŠß‡Aä)Ë䛾'ﻯoç?QØqº;yi¤ëêê*TUÅÝwßx<îÜžH$°°°€µµ5œ8qgΜñû|Aé%òšˆoÃ`•¢IÓ€¼pûÒ—ú}(DžÒu@yæßá²~ÐïC!šÓ4Ù!CžÚ`ßyçwçwb}}FÃïsüä 7ìz"Š˲pÛÎòùvb%¡Zõû舦§ëÀ³ŸÇÍo»ß‡Bä)ð¬{ÙeP™\€"ŒKóÈ+C¬õzñx¼cd€3 X÷ïììø}N€Ÿúæ7ý>¢™zÁ¹sPU;™’HàAº<ãb…‡"Ç0õÑ?Ãß=“Û5QtýâéÓL*FžÚ`ÅbØÝÝÝwûÚÚ‰„ßÇ?/Ѝ+ž|ß¹újärö«eÙ·†½… Q˜‰Äa–eqJEN¡Hü-¾Øhp„•¢É²ðàW°ÁJžÚ`M$h6›¨Õj'îwºúÅ0 {J^(Q×|ùËøâ¯†eÙ{±d[üNf¦ioÅĤE ìÆ7E–aàÓßûžßGA2Rƒ5‘H T*¡Ùlö|L³ÙD©TB2™ Ĩ«eYxî3žÁ+E—iâ}_ýFç+Q¤Ó@ZÒñ›¬ðP ËœîNÑ¥ëøæ7ú}!#íÃzêÔ),//ãäÉ“XXXèh”ž;w›››Îãˆhönüæ7Qúæ+ð{³ÉDG=“cST|õž{ðL®ñ£(2M<ø}߉6EÔã_ø¾tõÕ=×4í¦i;e2öí†ÑžE&IöcŠEû6˲+Iöýé4Ǭ¢f¤k"‘ÀÙ³g±¾¾ŽR©´ïþ……,--íKÌä—+|ßdE‡¢ì©+!ÇŸp€vnYv&¢°kmoã1niCQdøÒ«^ÅõÙYOûìgû¹Ÿë{¿iÚ?¢¾’ÏÛ RÑH—†{Ö¼û~I²ÿ-—;¦ºÞnØR´ŒÔ`ì,À§NÂÊÊ êõºs{"‘ĺU·W\}5¾óÆ7ú}D3c>zÊÏr~ß›aFOÞw¾óÚ×ú}DÞ+ðׯRTYš×_?ð!ù¼]g ÖtήýˆŽy¡W§û5²YûßBaÿ€.#7X…X,†d2é÷qô¬›nŽóû0ˆfækßy~ÌÕ9_©Ø>QT\ýo ~ë­~‘÷†¦q„•¢É0pþª«:2°¸lmÙ ÇΧÌêR(—íú…ߨ ÖPP¦J¥H;âü’+ÄÅšŽBÁï##òÆSO=Ť4iŒoŠªxÁ œk>oOÕÝØðg”Ó½VL'¦ðš%˜ˆ‚Gù‘+:¦Ãd2죡1 ì<ã¢È2 ƒñM‘d)*~ÿ˯ÇÙ³/ÃñãözÒjÕÿ:ŠiÚûÔs}k8±ÁJB½FRM8rÄï##ò€i¢þÔS~ÑÌX–ÅVŠ$Ëyäœ,¢™t‰è’åöeDaÆ =E™aŒoЬ°Lw/€\®ý{>oÿ›Éx[²,»Q,Iíu¼ºnÿ+¶à´w¬Hª)ö®•åv}¯_dYn'›[¹ïé~ñ™ˆÇ†ý»¢ØïÓoµ˜jmöó¥ý:GŽØï-ÎM„Å7Þ„üÁsc}Žl°E„¢ØŠ»& #Ã0BQá!š㛢*L2îËP4º„qiâw˲·ïéŽI¯k¿­™EfcÓ´Ÿ§ªíÛúí[ë>¯î©Ùâ9ƒö¼ûâŠ?£ø¿h<÷;¿îY~ÛÛ½[«=€sãµWÙ`%Š Iâz Іt:Í =E–®ë¡©ÐM"Œå·¹2™ö¨d?Ý4ImiVwâÌaïØFw#sPƒµûyƒ~ïw^ƒ>?°ÁJ¢w(ìX™§¨cŒSTqIͬD!IÁËÈGDD Ã@f”!¢J§Ó~EPà³×ëu4 ¿ƒh&ßuŒqвIã›#P,¿)(;ÂÚh4ËåP¯×ªª¢X,ú}XDž`|SÔ1Æ)ʦ‰ïB÷6¢€aùMAØÖÕÕUÄãq†{î¹õz¥RÉïÃ"ò㛢Ž1NQ6M|+ÝMˆ†å7M ¬FµZ KKK€X,†……TGI½EpŒoŠ:Æ8E㛢ŒñMAÈëÎÎ ‘H8·%‰7~yyÙïCðT­VcßBã!j×ì¤㶨ÅC¯ÙI0¾£ Q»^'Åø¶E-Â~Ír ë ‹¢Ùl"‹í»ýܹsøã?þc|àÀÑ£Gý>Oœ?>RÌÅ‹Ñl65~y衇ðÐCáÊ+¯ÄÊÊÊÜß’ø€¿û»¿Ãë_ÿz:tÏyÎsæ~Ü^ Jüs£ ýÂV†3‚/H#–ßþ R  sZB£ÑD¯Ñ´ßuŒqŠ2Æ7E㛂( Öx<Žd2‰ÍÍMç6]בJ¥ü>4¢©1¾)êãeŒoŠ2Æ7Ñ%­V«å÷AôR¯×±¼¼Œx<î,ò^[[ Ìp6Ñ4ßuŒqŠ2Æ7E㛂&° VÙ¬’ɤ߇Cä)Æ7Ecœ¢ŒñMQÆø¦ tƒ•ˆˆˆˆˆˆ®Kßõ®w½ËºZ­†K.¹¤ïTŠz½Žo}ë[=ïtß(÷ÏB³Ù„išxö³Ÿ=Ñ1ñœhrÓÄ·÷Ï ãùÐtÅxãe8 ­üvLA<šËïàŸÓHZ43ïÿû[¯}ík[ÇŽk;v¬uÇw´.^¼èÜ¿³³Óºí¶Ûœûïºë®‘îåþY¸xñbë®»îrÞó¶ÛnkÝwß}ž³çD“›&¾½¸Åxχ¦3(ÆÃ,ÃI8hå÷°c âùÐtX~ÿœÆÈ,ÁQÐl6Q*•°²²Ã0pÏ=÷8· «««ˆÇãÎýõzݹÐ}£Ü? ¥R F÷Üs Ã@<ÇúúúÈÇÄs¢ÉLß^Ü? ƒb<ŒçC“ãaŒ–áÌò{Ø1ñ|hr,¿ÃqNcñ»ÅU÷Þ{oëØ±c·>}ºuÇw´Z-»'ãØ±c½#ï}ï{[·ÜrËÀû†=wV.^¼¸ï=wvvZ§OŸ阂xN4¹iâÛ‹ûgaPŒ‡ñ|h:ƒb<ŒñÀ2œ„ƒV~;¦ žM‡åwðÏi\O÷»ÁUÉd†atܶ³³ƒ«¯¾Úù?$ çþD"F£1ð¾aÏ‘).‘H ^¯£Ùl"‘H`eee¤c â9Ñ䦉o/A1^«ÕBw>4A1µøöâ˜ãáqÐÊïaÇÄó¡é°üþ9‹ Ö9X^^v‚áìÙ³00¶··ûÞ×l6>Wì—åµÝÝÝ}ç²»»‹µµµ¡A=Ê1ûqNäqã;¨ñ0(ÆÃx>äî×u½ïcƒ,é—ƒP~‹ï Ö¹È;,¿ƒyNãâÖ9¸ýöÛ±´´8sæ ;úyøá‡ûÞ·»»;ð¹"¨½æîݹûî»q÷ÝwcaaËËËCÏg”cöãœÈãÆwPãaPŒ‡ñ|È;Ý1Æx`N½„ò{Ø9±ÎEÞaùÌs¬sL&±°°€;ï¼ÓY ívïõø~âñøÀçÆãñ™œÃ‰'À™~Ø…@³ÙD­VzLÓÞOÁ5n|5Åø%—\ºó!ïtÇxÔâ›eøÁuÊïañÄ:y‡åw0Ïi\l°ÎH©Trz>±>>  sêM£Ñ@<xß°çÎÊ é±XlêcöãœhrÓÄ·÷Ï ¿ùæ›Cw>4A1µøf~°´ò{X|ñ|h:,¿ƒNãbƒuFNœ8Z­æ 〮ëÎ?#™Lbss³ãþT*5ð¾aÏ•D"±/…v©Trzf¦=f?Ή&7M|{qÿ, Šñ×¼æ5¡;šÎ Z|³ ?XZù=,¾ƒx>4–ßÁ?§q]ÒjµZ~DT­®®bssÉd;;; ¤{Núòò2âñ¸³¨ymm ±Xlà}Þ;+â=E/Õ8çãÅý,ÓÄ·÷Ï ãùÐtÅxãe8 ­üvLA<šËïàŸÓ8Ø`1wÊô^ë$šÍ¦ÓÔ}ÿ ûF¹¦=¦ žMnšøöâþY˜å5ÉøŸA1Æx`NÂA+¿‡ÝÄó¡é°üþ9Š V"""""" $®a%"""""¢@bƒ•ˆˆˆˆˆˆ‰ V"""""" $6X‰ˆˆˆˆˆ(Ø`%"""""¢@bƒ•ˆˆˆˆˆˆ‰ V"""""" $6X‰ˆˆˆˆˆ(Ø`%"""""¢@bƒ•ˆˆˆˆˆˆ‰ V"""""" $6X‰ˆˆˆˆˆ(Ø`%"""""¢@bƒ•ˆˆˆˆˆˆ‰ V"""""" $6X‰ˆˆˆˆˆ(Ø`%"ßèº˲ü> ¢¹ó2ö-Ë‚®ë~ŸÑL°ÁJD¾I¥R0 Ãïà š;/cß0 ¤R)¿O‰°J¥Ó4ý> ¢¹`¼Ï¬DDDD41MÓX§ƒñ>O÷û:Ó4Q©TöÝžÉd Ë2€vOŽ,ËPUº®#“ɰ§•‰©`étŠ¢8¯‘Ïç‘ËåP©T`YEA:v^O’$är¹‰O4Ì(ñ Åb–eõŒ1¯bœñMó4Ø/ (‹¸ä’K:žgYŠÅâ¾çMjP<ëºÓ4¡i@UÕŽøTŹu ƒiã]Q'n%IB&“$Iדâ«Ï,Ë‚aήëN°»X,°{t‹¤X,bqqÑyãÇw\`â~˲`Y;¦¡‹Eäóù‰O4̰ø€l6ëüÞ/&ÅkMãŒoš§yÄ~6›…¦ixðÁ;Þ7•J9!"/ŒÏnîø#Q¬“PXLï_üâqäȧ³QÓ´Ž%Œë µ(0.\¸ÐR¥•ËåZ­V«µ½½ÝÐÚÞÞv#ËrKUUç¾­­-ç¾r¹Ü’$Éù@«\.;¿+ŠÒÊd2Îï¹\®¥ªêÄ'Gw|·Zv̹/—ËNŒyãŒoòˬb_<¿Z­¶8ïãŽk"¯õŠgUU[ÕjÕù½;¾Åm¬“PØŒï¢ÖI(|Æw³Ùlº®C’$T«ÕŽXf\kX"ŸÏÃ4MT«Uç¶A[˜¦é¬SrëõAä·^ñ= cœ¢`ž±/Ö f³Ylooû}êA“Ä3QXMï²,£Z­:Ëø,ËB&“A¹\öûtB Ö¨T*¨T*¨V«½,’$õm´*ŠâôÚ¢÷ë–(HúÅ÷0Œq »yÇ~¹\†eYÐ4ÍIBä•Iã™(Œ&w‘8©\.£\.;ÛŽ)Šâ$L¥ñqJ°Ï Ã@6›ÅÆÆÆ¾ŠH:v{v…E$ÝPUµãw ½è›((Å÷0Œq 3¿b_’$'[%·] ¯Œσf……É4ñîYìWvðL VŸ‰Œ¿©T —\r‰ó“Ïç!Ë2Êå2R©R©Ž?îL •’|>ãÇ;$766ü>%"Ç ø†1Naægì§Ói¤Óivîg†Å³$IÈçó=·! ›iâ]lasäȤR)9r²,3·À”.iµZ-¿‚zSÀdYvöaû4‰Š‹; ‡˜FF%Œq:¨ûîúŠ;¡ Qïbv¤$I\Æä6XÌ4M9r[[[P¦i"•J!—Ëq<E“.˜{J°eY$ ™L†U""""":8ÂJDDDDDD™Ö}ìcø³?û3¼ä%/ñûP<µí;yä<òÈ#ø=òÈ#øîw¿‹ßøßðûPFöïþÝ¿ÃK_úR¿Ã3AНéš5 ëëë~ÆÈX†_®ÙGyW_}5þý¿ÿ÷~ÊHX~_®WÃ0°ººŠx<î÷¡Œ„åwð隤üŽLƒõ{ßûžõ¬gáĉ~Šgî½÷ÞHÏùóçñÔSOâœÎŸ?¯|å+~ÆXvwwñÙy%Hñà• ]³÷Þ{¯ß‡0–áÁ¤köüùóh4~ÆÈX~_®×{ï½W_}µß‡12–ßÁ¤kv’ò;2 Ö+¯¼Ï{ÞóL&ý>ϼõ­oÔù>|G Ì95›M¿a,W]uU`>;/-¼¤köšk®ñûÆÂ2<ø‚v͆© gù|Aº^¯¹æÄb1¿cd,¿ƒ/h×ì¸åwd¬Q´°°à÷!x*‡fz Í^ã!j×,M'jñÅk–&ÅXˆÚõJÓ‰Z<„ýš}šß@DDDDDDÔ ¬DDDDDDHl°Q ±ÁJDDDDDDÄ+¬DDDDDDHl°Q ±ÁJDDDDDDÄ+¬DDDDDDHl°Q ùÞ`­Õj~ÑÌ0¾)êãUŒmŠ:Æ8…ÅÓý|óF£ååe†Ñq{©TÂúúzÇmÉdkkk~.ÑXßuŒqŠ*Æ6EcœÂÄ—k£Ñ@½^ßwA;;;XXX€ªªÎm±XÌ—ˆh\ŒoŠ:Æ8Ec›¢Ž1NaäKƒµV«A×õ¾÷ïîî"™L"™LúöÁMŠñMQǧ¨blSÔ1Æ)Œ|Yú°°€µµ5¬¬¬ô¼_Ì©/•J(•JœcO¡Âø¦¨cŒST1¶)êãF¾']êGLUh6›X^^ÆæææÀÇÿã?þ#>úÑbyyÙïC§€ÛÜÜÄéÓ§÷­Û˜§qãxà‘KÛòò2Ο?ïë1° §Y)•J8}ú4¾ô¥/ùòþ,¿i–Dù]¯×};–ß4+¢>nùíkÒ¥^–––pûí·;sæãñ8J¥ú>ïùÏ>Þð†7ôí1¢6Ã0 ë:dYF:î¸Ï4MȲì÷!ÎÔÂÂâñ8Î;7÷÷ž4¾ছnbÒƒ‰éNî58‚eYNg…¢($ÉïÃõÜÚÚšo–á4k+++8qâÄÜËp–ß4¢üN$so–ß4k“ÖÁ'j°Š)õzKKKˆÇãh4žj<ß÷: Î{FuN½¦i}GüE¢(€J¥MÓ:•¢®( TUE¡PpîLÓ„¢(eº®#ŸÏCUU˜¦ÙÑXuÿ_¼w÷ûè¨ìëºî4ıH’˲zž×ƒ^Oúf¼ûÝq̳Í`À§?} žxâòù½éžƒßn†a81aš&$IrâÈ0 §CEÿùc¸á†Oà÷ÿ{xë[/xÁwñõ¯?…~ô\qÅ_#Nã#ù%<÷¹ãª«>…›oþ2ž|ò <öØKqÅ߇ë®ûçs=tè8®½ö,+{îy1^óš‡ð¢ÝÓÔö"iüø·JÁ‡?ǧ?ý=;6ÿkTã[4ØwÃM×uT*§q ìﯡ( r¹666ú¾®ëÐ4 ’$A–ed2™¾³LÓD¥ReYPUu_Ü †átêˆæ°†¦hT‹Eç\EaÎç`ŠÅ¢ówãV|Ýï3¨á)îîkRŒ$[–å4äÅgño|c¶ô>¢ãDŒípu!щèîàÄmîNRQF‹çˆN÷^¯ï®uÿ©ªê|ˆÛÝßyî>¢î%¾×Äí_þò—}ùìãTc5XÖ××±¶¶†d2é̯WUñx'OžD½^ŸjC,s¦Bˆ×Y__ïøÝOÝ;˲:“²,;·‰ÂQTº®;•ël6ëŒlJ’„­­­¾ïÙ=m·I’P(ÉdP©TP­V‡NïÍçËR!êÍ¢<7 À²E‘ I*, H§ÓjΨh¥‹€ªŠ(ʵP”ŒsŸi¢-žJÙ¯+Ëöí’ôf˜&ðñŸÀ?ýÓylnÞŸû¹ ìÞ¿ûï}ô8>ñ‰Ÿ\}õ—ü~ðíFQ.w O{ZÇŽ}¦©áõ¯?„¿ù›8>ùÉïÇ•Wþ'|æ3‡°°ð0®¸â ë:.¿å¯áСùO}z|÷Ó=ÅV|‹QK÷}:nªª"“Éx2Ú(¨ã<¾WG›{¤tTâ9¹\®ãöQ¯U ]éq7fqÖ¢C@Œög~êÔW4qà ŸÀöööTŸõ¤ÂãÔŸhˆëÚÝá"~¦}]1óÇý5hÊ~£ÑÀO<1×ϱ={½ÊÅî8 îF¦išN¬R©8ß;â{ hwú‰ÛÄl™îNRQÖ‹EH’„L&ãÌÂñÚkæ$INãW’$¤Ói¨ªº¯+¾7Ä1ˆUQž†w¿ûݾ|þŒq ª±¬;;;ÐsJ€f1ê:©D"¥¥%œïÄ¿¨`V«ÕÀLG »3(—³+ûù¼} õ+TU…®·¯Í}¯&)xå+í;4mÏ|æ'}9+¿cœFã¾Þ»Š@ç”è`­º p˲œQ!1õ^Q§.:¬Ü»¢b.^[”-NEß=R•Ïç‘Édœ†‡¦iÎc®»î:¼éMošëgvcÛݸõÃ0:ê:ã², Ùl¶c‰ˆ,Ëü–e!ŸÏh\Ø×È´, šfÏ¨ÚØØ˜*‡;FÅ 7q åryàùö꼑$©çíýê8Š¢àðáÃÿ4rŒS°]ÒjµZ£>¸^¯ãäÉ“¸çž{:zaVVVÐl6ñº×½Î}V£ÑØ@î& Ó¬£SøD¡i666 (гT|a»{ÅæEŒzŠ·4 {”ro€×i`º…¦i?'›µŸ'Iƒ˜ãÐu»’ím /âeãÆ7`gœfÊ}>Ÿ‡eYNãSTE‡I±XD&“éXï©( ÊåräqË4펖Qéº}ý™¦} iv‡Žh ê:œ™ €}ÍJ’ý>†Ñî$F€=cÁ4^ƒ©'OþΞýuß>#?Êð(rOWì÷}Ó½§¢{ª¸h¸ŸkYR©”Ó1Õý|Ñk¸Å÷`?bê½èÌk¼{ùLÓìXÓïu‹Å¢“·!N;e”ŸñâGù=k"ÎD'|:v:åÅZKîÎüÅÅÅŽŽw÷t\wN1²™ËåÉd`߇£¢î)¾Qæw¼°ü¦Yš$^Æa£¨ËËË8uê”s{£ÑÀéÓ§Ç=[ÇÇçôÑÙ…áââ"ÊårGÏÚââ¢Ó«çî]w a/ù|{ª¬`öˆh:Ý9ò)î3Íý Ö\®}[>ßY™ÿnm‰é¿Þ4Vû=ÃØX ‚yÇ·X§Ù½þYTtÜ7δۃD\[¢1iO]ow öu+®ãrÙ~œ¦Ù¿bÚ¼}ݺ¯î:X:m_¯†aß׫S¿×Œ!{Ø×ÏjÞ1UbtSÌvë•EcÒ=ËGpOW×¾xEQP,; ýÝIÑúeê½ÐæLï×6¢å‡°Çv÷o‘„NQd2躎ãÇïÍêÐ;êEÝÊå2R©”#¢ã³×2ñ}“J¥Fêqw9M'ì1NÑ3vÒ¥µµ5äóyœ'AÐnlºË×\®ÝØ:¯ÉjÕ~J¥½öÛƒÜhQ"+©,ËH¥RN£ÕI[UUgDK$®SzÝÓ'Óé4aYšÑXDÜí­YØAA<ÝyÜúm' ´–nbJ¿û9Ý™±û½X_> ‘Äs’רÕjø…_ø…±ž3öÖX,]×;öh ³J¥âùªàžBØëv&ßÑ^ƒ3fhÅb±ï^rA6JR#Ãh'ý’åÎì×¹ÜðQNIÚ¿nÔ£¤4*±lD4DFQ±Ö´{Û²~YCÅs¶¶¶¦ªHÐÁ#2êFµs’((5(ÝÜ™µE<·Aå»ÈÜ.ÞËÉÝý÷^¿ƒö¤šIvê«Á‹Å°²²‚R©„F£Ñó1aÊ6Îø$ÄT_û½ì¯"Œ£«•ŠÝM§íF£Èl-Iömù¼=#A$5å³X÷Ý=ÚJ4KîÑUÀ®„L{Í ê'êE$Ò"¢éôýì·ï|?"QØ´ÈA³$zmIUc¯a]]]`o$ÜK˜¬•Jeê¬s–e'Mê^¾!F{DœÉ2×”Ò|U*Ï×Ìþ˜íŸíívâ0˲os¯ï®TÚëM…B¡s0Ñ´ÄKwÆ]Ñ›N§Q(œÑU"¿hš6pÔžˆzsï-,~úÍJ“eyê=~irc7XG ±ÍǨ,ËNÆR(´×œjš=ýÐ4푟 »2ͤGä·J¥Ò±±zPìÕ÷÷MùU±U’{J®û2UÕþSyY_#¯T*H’äd5àdðI”:„t:Í ù*ŸÏ³Ó„¨‘Ôν“›è¶/4ùkì+lnnB×uÔj5ö¦Âªª†j]k¥Rq6ÆU±hW²톩¢tn}aYí$0üî ?‰ ÖƒV‘6Œöõã&®îëK~X\\Ü·—¨¦iNò¤î‘+‘=¾;û*Ñ<躎l6ë”ïì4!ê\[êΔ-vIpoFá3vƒµT*a}} XZZ4›M¬®®¢^¯ãÔ©S~ŸÓH&}2M{ÔÔ4íJw.×Ù`-—íéÁé4“%‘¿¼˜îî5wcUQìFª®Û׋$±“‡ü‘Íf!I4MsÖ‰ÊΰF 4o–e!›Í޼—.Q”™¦‰l6 ˲œ½}%Irò°‘c5XÖ××Ì£nªªbyyKKKˆÇã~Ÿ×@º®O´iµhß*Šý1µQëT9-‘ü4ÊYóR©t&UA¬S XÛšˆl6 ÀÞ—²R© ŸÏcccÃi¼M±XD:fc•¼b±èìôÁ훢o¬ëÎ΀ÞÙ±’É$‰vvvß`Õ4mìlz†ÑYÑîµw*ÐN²Dä?GWMÓn„ªª}-d2ý£’Ô¹6•hž*• ,ËrfÚd2hš†J¥]×ç¶77ѨD–ÒqroEeYÐ4 º®;;|¨ªŠ­­-Ž ­am6›ˆÅbûnë·ÕMˆÜÇ©Œèº=RÀü5DD¡¾Ý¶z&ïewäHR»óFtì°ã†‚®Wvßr¹Œ#GŽÌlon¢iˆ©ÀD…iš(‹Ðuétš‰‘°±¬Édñxù|§NrFRNŸ>í<&È&Ù«ŒS),4M›ÛÔ˜bÑQµ¬öh*ªÝ{§ bÛ‚ L§'rÓuªª²²N‚»¡šËå8ã…Æa ¾[o½µãöx<޵µ5¿Ïg ‘AlÔÀ7M{]*+áóÚRdõÃ@.‘ç]'\ EA$²©TŠ Uê0vƒ5‘Hàî»ïF­Vùsç'NœüÈ*€žÉ¢úÑ4{_Õj•)Ć×^g.5M •êlœ‹ìÈ¡`Ëf³0]™ñÒé42™LßÑU¢ ²,‹ëªéÀÈçóNyM$L´†µÑh ‹aeeP«ÕÐh4ŸlÉ0Œ‘×T*l¬R¸hš6“ébù<à^Ò'FW™÷ƒ‚*ŸÏ€3Š*¶Ñ4­#ÑQÌs©‘ŸLÓ„¦iL,Fû†1r611˜(L¼5ÒõöUÓ´¯’d' cb> "‘IÕ8I’$lll —Ëqt•BG$œ!Šº|>ϽS©§±¬Íf¥R KKKëU×ÖÖ°²²‚ÕÕÕ@g gô‰õ ›q:dú)Ûû¦ŠßEŽ2Y¶³kZï-ˆüfYòù<ÊårÏkAUUf¦Pûj3ÙE®ë°,‹S©§±¬õzÍfÓ™ ì¶´´„x<îìÕ4º®\™×u6X)œ¦­Ô躽‡ªø?Ðþ°GV™÷ƒ‚È0 ¤R)d2Vî)28˜±lƒŠÔÏDkXûÙÝÝõû|u˜irº#…aS¿†;qj¥tçøàuAAT©TP,Q.—™I•"EÓ´¹d~'òS>Ÿgg# 4Ñ>¬«««XYYA,О* ØY„ƒhœé’ªÊŠ9…×=óÒPèºî$éàº'Š˲€qM‘¦iLÓdlhâ}X777Æi½^G,ÃÚÚšÓˆ ˲Fîyçt` ›q¦¼÷bYÀñãÜW•ÂG$¤a¥ž¢F×uΠHy8‹€†Ú`m4h6›NãTìêë:êõ:`aaªª¶± ؉ F­ÐtoãAÓd>Õ4î«JádY³þR$†Á)’iÅb™L†e8 5Òëúú:’É$TUE"‘€ªª¡êù³,kä‚߃¥€Ds5í«¦±“†ÂÉ4ÍP}J×uäDšv¢âž«4ª¡ Öx<Žb±À.<777Q¯×‘H$œlŒšFì9é9wúU¦Ùs´ÙË<Ô¸=ñ¦Ùþx-‹±/LÓî!Tèÿ8˜¦é÷!Ш, $†ý_ ýµ(nS;‡„®·3•ãë_ž|êÛo†$ŒËëW)êÄ”wÆ8b¬mmTUÅ©S§pöìY$“IÔj5œog½ê¾¦Ýó´5­}>ƒ†™r9 ›ÅŸú°¶6ýßc€yÇx7Ã0Ø`õÙ(, Èþì¨þ^ʉKüÛìòÂUår@ÎÊy;–d²¢Ÿ| Ëè.]Ô³=„ØæúÕñuÅýês–µ¿_O–Û_¢ê ˆª‹x-Ñç(ª1¦iíÓ~¯Šþõ?ÿó;gþYùã–eA×ufö€øó‰âB\Ý× ø6Ívg¡xMÑ1(fl‹ßÅu0¬ì¯Tìb^¼O6k“,/Åk_{ÿXçéÉ>¬‰D‰D+++#=¾Ñh ^¯c}}½çý«««ˆÇã8{ö,šÍ&Nž<‰R©4òëw3Msä‚_´-=k°Š†`&cÿu»ÿºÞ5‘$ûÍk6k?.¶ÿßkïQ’JRÿ”ÇÝ ¸tÚnçrí¨¢ØéÕX­TìLj¨W”ÁZUµ#ÎM”î‹‹ö¥h*Š]‘Ïçíÿ÷+àÜ)n»¯Æ9šw|ãïÁ* 2OژŢý÷é÷y‹¿µiÚ×Á 7ÛÛvÎ3^:=øbÏ·,{4°Ÿ“Ï·;DÜÇ):•DçŠ$µcɲÚ%·JÙ÷õê¤é5•]U{7š …vÐO:=ÚzI66ðÿž<‰_Ÿü¯:1ÞË8Éôh¢cGÔ\D‡LŸŽžl0t Ò÷þéŸ`~ïfä~írd~åúŽÇI†ŽÂ‹ªPîØ«ôöË€…ôA‰m®_»Õ½-¡(šÝ}òâë »änŒ..vV7òygF;¥÷Aɯ•Ï£øÈÛa]÷"'¤Åñ°•³;Së·Á¸ðB\xÍ¿°ŸöØ„öå@n5†K/-˜M‡cb¼R© ÍL#³¬v ™¦w¹œO¢jÜÝ`ì˜ìnŒ@>kÁ0%§Ú\©øÜ瀋k®œ¸¿xòOý3 b¿qÁ0:êÙÙ¬ýÕÉ´'~©j»7Mû¶²Ý;”>ÊòÇq×xÛÈŸÁX ÖZ­†d2Ùñ»®ëˆÅb8qâDÇ}Ã^§ßâF£Z­†³gÏb±°¹¹9UƒuôÇzØÎ&*¹Ý ­ÅEû1b¤E×íé^£3bd°›ÍÚQë>hY¶#õ:©îßE£°XÞøÚQ:ª^ï?NåD4æ'}¿9šw|ã¯_õlYp±hÇj¯Ï[4uÝŽQÑP ×îÇŠÑ|1ÊØ«3FLUû9Çw6âúU®%É~ÝãÇícÇÓëCµw¼‰šŠÛ¨ Èî÷èÇãÏ0k»1ÞKä.uwIOúÝ]储½w—½š†Ê/ÿŒ+æÓÖ3ž éÒ&”û/ù÷È|hò‡KOÑuŸø¶”’}¨*Ì?ú*¿÷màWÞì}ºç}¹\®U­VG:&UÝûÏÆF«µµ5Öùì³½Ýj ƒïï¥PhµÒéý·ol´Z.tŸœ};eP¼xù^Åw«ÕjÝqÇ=o¯V«­t¯x™µ Z-YÞ“.Øq)Ë­V¹<Úk©ªýœî×åÊåV«Zµú]SîÇû!Õ/^¼4¯2¼UU[[Ó–ÓA‘ÉØå¾ª¶ZŠÒ;–ÅùÙþ™_im¯þ±sׯÆÞå¶µÕ¾ªÕÖFa»¥(­–|ów[éûº}{«}‰¶Z­Vk{»µõW[î¯ÉííöˈK&—sZ&cÿ ˵Z¹7ׇ>n³.ÃçU~÷²±±ÑÊår3;7/\¸`ÇF¡`Ç„úê'Z²Üêˆ!Yîý§wÃå²'"Ü·ß÷Iç§ðš´”vZòµ´roüìþ¢;n×r9û'¶_tPýjœï&¡ßëmlô®çíϰ¯¤~¢\~_¸p¡¥:•íàaµµÕ®bÈrï?óÖÞ×*ÿöãûnßÞ¶c_|=‰8ÏåZ­òê7ZÕ_ýXk;[´¤ªíŸ^o"#Ëö¢Ø/ä.œýP­¶.˜–}Žå²«5Ü$å÷H#¬¹\‰D¢cûúú:‰ÖÖ֋Ű¾¾ŽR©„………©z!-ðn6›}÷zýÇüG|ô£E½^ÇZ׺-kÄÂb–!» OŒÊ ëå4ŒÞÓoeyðb†~½èýžÓk4'S¨ÂfssïÿûqÙe—Íý½'oxà°¼¼ UU÷]gã¬ã³,À|ç(?zåä)±Åºâ^kE·¡{Šö0“n>n–´XLcyyçÏŸ÷å½gQ†÷3Îve¾°¬ö<@1ÕUf ½ý/a|ã(|(aòÿáƒÀß^ Õ}YïÍ·23{âŸéóGá,Y®ï]ΊâLƒF€,? †ñ\@y.û0œC‘e(=¾¾º‹•BÁ1“î>cß0dÖ‹ýúGx;ºZ*•pï½÷âСC³þ v˜UùÝ-ˆëW-Ë.òÅä(ëo€þž¤Æ±ñÔYÈ/½ æ·¿ìéãP$ÛÛrÏ×I¥ì×pVHX–=ûE–!»êR¹W¹ûX{nwñí¡õ»L¼é¸ßý¾cû½o.?9—ÃþL”ßbGŽyšGùÄøvËÿ ̯@ñÿ:ý3‡yÙß úíwB®<ÈhH;SÂå‡Ïa1ƒúšoCþkœ×ÐþÓ—¡|ßó (vò,§š"r­( ðã >»*¨ë| R¥%—³§EŒ¸=‘¨ƒ[~m°Öj54 œ={Ö Ôf³‰z½ŽS§N9·-,, T*í›6<®f³Ù÷¾ÝÝݾËóŸÿ|¼á oè9eaÔ5¬ÎÌÙJ¥=¥XìÝ(t¯áT”ve¤Ç°9ËÂÂâñ8Î;7÷÷ž4¾ছnêùE0î¬ÆÇƒþ‡OBy÷­Ã,’\uOSÓ {´>$¸¢NkkkX^žå¤²þfQ†‡’˜/e2vMݵÜÃ0€ÅÞDî*™¿Ñ¹Ä¤v#ŠEÀúö^}X×íõKjzÊþš)—¯ðFûñè¬Óˆ¯¢îÙ÷^|5É2€wÜàvß>Ú••œ8qbîeø,Êï^ Ã@Æóí F#Ö»‰l Ò%¢ðóÛ€¬t¬‚¿ý÷(ü+sï»áíöm¦‰jñ×Eݼ½ÎwMk¯~ZüßFæú?‡ªºí’4^ç4æU?©"CýQ~Ï»± ̧üÖu=PÉòÜ ³ÒFêÞ Ì8º7%6ýgº7àZ õŽÛQ¨ëȾ6†êý/†õ¹¯Búå·#§(À­9Øéã\æçóËÙßi‹‹c%ÿ™´>´ÁZ¯×‘L&;‚TÌ}w¯Šy´^jÐÇ'zÍQsT*{v)W™îÑY±²XQì»_7êÛÖÐÔfßÀxkXßù¤W<¿3vÝ+úEÌ»jõJ( / ŽYÅx7_.õZËÜO.×1[Æ0é]ÏÕ*, X¼å›Øxö¿…ò×§;aÄ׋°¸ò\H'~éôèõ^öyož±=ë ½³}Œö ÓDª|Ò×V!·¶!?cê þÐUH9¥s0±W€ÉrÏUÿúÝ8þ¡_…®? òC÷"óއgzn‘ËÙ#j>u>Lb1îÛvd¦ ýÏEñOo†i…ŸüÒgßܹ'tzÀ¬Å½Ñÿî1õtY…¦(žx?*_}#¶6þ3¤·¼fþçç‡rÙ.;æð…3´Á‹Åöõ¸ˆi ^5RÝ> Àž– .÷ÿ'1j¥&—dSogF´ŸÜù ËêŸÉ—5bñmÆèÙö, Öçÿê{os¿€ÝC&ÒÎq†Ma1ÞË8Ùß'Ò«qZ,Ú;¹Ü¾J¨õÙûaÎΣ¨Tì8wgß=~0̽- ·¶€jz® ¼èEPÿäí÷Ø“$v/Czé*OÏÚ†Ž°& $“I,//caaµZ ÍfÓéi6›8wîVWW‘L&=é…YYYÁòò²ó^±X ·ß>Ùz™Qw[–ð¹8ÂD3çe|cìÁº—ÆÄo”ÜCä¯c¼Ÿ™4X+û§ßÖ^² ll@ÿù?…ücKŸø"r¿øk(¼côãîêEÁ1ëØ6 ó¸6¿ð86®ÿeÈ_yøÊÿFÚ¡}äǰõ‰énÞWÕ1gÊøÄÞñsš:HÓ™eŒ{>Ýݲ©¤ 7O X\söËÝØ°;ò5 öÒpäó½wË£p)Kp¡P@©TBµZE,éS§œyîgΜÁúú:’É$ c~'“Éž•íD"}èCΨî4IœFÉ\©¸:ÒÙX%Ì#¾1FW³YX?õ³ÿzèÄ ¢‘Ì+Æ{1 ¹AYØ'Q©ÀúoíöÃ,ÚÙe¹ƒO(ýþ·¡û~à‡/‡ÄQÒÈñ+¶-Ëšzº¤i²d!ý»©Ž=ÓÓÒ¦ È7w<þøq‘IzvŸ'{h‚Ç÷*C°a‹?ý=TŸµy%5“ºw»Øn°«ô"'{µ…ÏH VÑHíeaaÁÉøä¥X,æÉEâ¾8ºYðbþ:Ñ8¼Šo}ÔD_{‹ê¬ŸûE¨Wø}ötxãýŒ•t©ßÖcî×ûÍ?¼Yõ?þ7Ì?½ÒÒu»B/‰D.%»­<ÿ,žä¿YÆö´1• Püõ§P}Ú¿€ü3û—8õ¸DFiVæI˜UŒOÒ`íU|ëgظ"yå';b¼û¥÷&ÄôËF!2Rƒ°×¨ŠÄ'NœpÙë†ê,ˆJH2NÛñ­ßýÔÚ*ÔÕH,©)œF*üe¨V!cðÖÀDa0Ö´I‘Ù]´8»+ð• ²¿“€d=…Â}UÈÒ5(ü@ûnUåàÍϸٯóïxÚ‡ž<íi$;Ì·>ñMH×~`äŒÖ“nÇM4.Ó4GO‰vñ ØÕU2²ŽÜ‡³v t„Ù숉†‘¬¥R ëëëÎïëëëXZZ Å~ybq·X:Q(ØÓ¹²é vö°ÌóÍt`ˆí‚‰ÂÊ0 ,..bcÔÅHù|ÿÌî (PÞx%2¿ý£~Ÿp“¬ï³þìcë=ß´ôf×­×ú}*D=“Ýݲ€ì¿lÚëQ_ƒaú¯~ x(ßÞz’Œ‘³­¯¯caa†aÀ0 ,,,`}}}àÃAaYE¢e¹i1…‚vÕ#¿€‚~r1ë÷!yÃ0ì½3 ˆÂÈÝX©Ò“ÏÛ=•ƒ«(Èü6§ö’ÿ†Uæ÷•ݺŽrâwº«DÑPü"ý?†ò:{ MEr¯¹—ÕjhƒUlW³´´äÜ&FVÅ‚ì s¦×T*ödöím‘BÌïC#šJGzxMÛ›>mÿÞ•>^L# £±«¦ÙÞ3uO±:d÷íöt³!}íÍg‘Z”œ5PbÔlv/9M‘_ –eí«Ð[V;ÿF¡àȘë>÷9È?|;Þ)4ú5X-«ÝqX.ï%G5tHïp59FNºV¦iÂ4õ–+€—3Û E3ƒ@¤Ñ+—Û »jã\¿Ja6hôiŸ½¬ØJᇰÑ5«@QìF«hp[H Ã0Pv¥2*™Œk¢@µ uqªekU¿™hd¦iî۲ɲ€T Èœ|H½ÙÞWUØÌI.‘o°"æ¯ÀÚ:E˜{ø´+ª®·gÄ…ѨÙS X\”±½mgÅîE’ìF+Qé:ýùïA}Ú_¡úo†œû™ö² lmµ÷é# Ã0qÕMì²Ú®£gÔ¯WöÈâN´gäëòòò¾ÛJ¥Μ9ÓqÛÚÚšßçÔáÆ_í÷!ùNÓø=@á6JvI1}’36îé’Ê¥ŸCõYï„|ò‡û2qº …Œ; v±ØîD·CYf%…Ú`=|øpGÂ%!‘GVÅFã'¡ëœöEÑÔÝcÙ‹iÚ=öÜ4›Âl”ýûòy»Îú<…eYøÖ·Þ ãlÊ;ß ‰‰!)bDcµR±ë$[[Ì!@£Ú`Çã¡Øoµ˲Ðl¾˜EÚ°u}Å"—‚Pø‰-ÊúÑuÀü»o¢\¾ÂïC%ÛÙ³u|þSoüù7ÆÆ*E‹ÙµcÁÆÿ}?$ýÓLªD#9K°P«ÕP*•œ±íM†+®¸Š^lçƒ'ŠË‚“µK¥b¯áL »î O·âR¹o¾ËïÃ$šÈ ¯ÝƇ±éÏþ;«9î%™´éçÚïC¢y «®ë8}ú4FÇíëëëˆÇã(‹œ&|Ùe—Ùs"Ù‹CdYV{SÉ.¦i7X™\†Âνöi]‡qçHWßÕ`Ç$…ÓË?õ)\ùº²±J‘dÞÿþ_@&È•¢½NõrÃH#¬µZ ¹\‡F±X„aÎO±XÄáÇqòäÉ@޶~ýëqp+E•)¨îUæóy;E¼i¶“IrJ<…ݾ„K¦ ?nÿh”wߊúËý>L¢‰d³€þõ—áæßüM¿…h&,ËB,ƒlv¥Pðû(dFa]]]…ªª(ö˜V«ª*TUE.—Ãêê*î¾ûn¿Ï©ÃóžóS¿Sd)ŠÒ1ç·P°ëòl¤R”†Ñ¹¥Øœ2“q:gˆÂHô9JÏþ<™"Ë0 T*_±{gØX¥ a­Õjh4¸óÎ;>nii FõzÝïsr躎ç\Úäж½{6kÿ*Ël°R´ìK¸äê¤Y\´—q…Q¥b‡²Å ¦ˆSÄlGÎx¤ a­×ëˆÇãˆÇã'Ö¯6›M¿Ï©ÃŸøÿòëý> ¢™¸ñ›ßdšæ÷‘Íξ훶¶´Ó°ƒ†ÂJÓìpÖuŽ®R4Yð±ýÀ|­Svkèk,Ãîî®ßÇ9±Ë{ŒÓl(’t]·¬ŠÃ`§%EW¿¤K²Ì-›(¼Ä€“ivMy'Š3gžÀË®¿ÇžíÈ8§ m°& 4›Í}Ù»‰„K±XÌïsrœ?ÿrT>÷FÖä)²^üâ°{éæU½«ºn°…•iÒW?‡ë~ë·†î§MVøÀ.¾ÿ rÙïC¡©ÁšH$ËåúN÷m6›(•JH&“ÚÚæóŸ:žvô¹~ÑÌzôQ˜­rÝ*E–;áR±hgÂÎçá¬Ù& ³ôUÁgŽñû0ˆfæ«_½7ÜÐ`%…¦2Ò¶6§NB£ÑÀÉ“'±¹¹éŒ¶6 lnnâäÉ“h4XYYñû|:¼èE€wê‹~ÑÌœ{å+¡Êrt•"KLÖÿðAèÿÿöî7¶­ë>ãø£Ä]fÇ¢-Ûmå\eñÔ4õíÚ¦•Pqí$@åmÀÐd#æuC X/ wáF÷MÕ‹fØ„ÙØ›!صXQ 5°µA…D€5&à¢k€ÐÃEÓðÅb‡iÑÊ2׌)£Íßj/r–%ñß½WçðèûZ$>¸+ë _Ü­³~[£‡åyópBõÜsz÷‹<÷°ÂEÅ¢´oßsâì"¢éhÂ*ýÿ¤µñèš0 ›Ë…Ûí lÊ¥KŸ7Ý 1årý¬%_Þá²jµª¡¡!}÷ÿU£c»MwˆEµ*ýÅ—~Gg®ÝpÄÐôÁÚ759þ¦»‚>×ñ„µ¡ñˆ›É›Ö Öj5…a¨t:mzLM•ʰé.‰â¬<\W.—ëß<ñ„4ÊÙ¸¡\–&KõuíAÀ¦KpÒÉ“Ò3;~V?CÃ÷DÐÑ=¬ÃPÙlÖôxV¼u_ý©Ü€£~ñµ‹:uÊt/€ä\ºôùz/—٠ΗN|èIi´~…pMµZÿ¿Q‰ YlVÝúNŠçÀi?}f»é.‰ªT†ëËÞ©åpÍ»'aØ.úÆ7¤ÿTçïºËtWà€®—÷“ám¿àŒ<œîþ‘¶Ÿ˜2Ý 1÷ßÿªÞÉz8åÐ!éì䤪ժ鮉’>üÎ÷UûЇLw°všÏç5;;»êµt:­B¡ÐÑûü㟛°¡¨ù–¤±«Wµç÷ÁRqd¼)$®BÁ"Qó].K:qBA±ÈVX'Žú}ì˜téÿMÏ|ñ‹¦‡X;a]\\T&“Yµ¹S*•êøýgÏ^UzÛO¤Ñ?6=`¨ùnøòË£“¦¬#ŽŒûÛÏhqño¥êÿ0a…U¢ä»\fÁì×w”=¯½&ï#1=8 í„5 Cåóù¶ÿ¡Z­kÇ–——•N§{Þuøž{–õÉ]ß‘FíÚ ¢ç[’v¾õ–žº|·é¡ëŠ#ãKKÛµ{÷néØC¦‡¬%ß7.(‹ìöëD­ßÅ¢ôÄé7ô…[nalÄ"¶M—R©T¬´)•J’êËòù|óß;µsç[º÷úOc맨ù–¤ÔÅ‹Òm·™ °®82~ýú{LXW”|ß¼Â%Á°MÔúÒè{^ÖÒöíLX‹¶Vß÷U(:þ_œëçkµš²Ù¬æçç7üÙ+W®(‚æÕàþð·ô“3ø«…­J¥’ž|òI=ÿüóFûÑM¾%éÕW_]õÇðð°†ö:½Ñ7z”ÏçõòË/›îF¤.ÕoíøÀ^3= Xf~~^O>ù¤._¾l´ÝäûÆú].K£gòR¹Ì¦KX£Q¿+•ŠÑ~D©ßå²4þ¹»õO·ð«5¾ƒw[¿Û. .•JÊf³«žV«Õ†a¬WToT©T455¥£G6×Ì{ž§|>¯L&³î{vìØ¡ýû÷kbbB’töìŠ~÷=ìL†µFFFtðàA½øâ‹FŽßK¾%éöÛo×ÄÄ„FFF$I—®ÿ¾Fï×FÆ»MLLèÙgŸ5vü8jxÃÇ>v§±qÀN¾ïkyyÙØú^ò}cý®V¥ñê‚4šS¹\^uŸ Ш߃ƒƒFŽGýéë_—¾ü¾÷™ù%ÂZïàÝÖïžN}ÌÍÍ)›MîÞPÏó”ËåVÝàÉdT«Õ6\–°cÇÝqÇÍIô]w]Ô¶¿;šXÑ¿<ÏÓÁƒµwï^cÇï6ßRý O:–çy’¤Ò¾}úˆgd °[:Ö®]»Œ?Ž.I¯¼âiâ×?ªß¼Ë÷}{VÍç s…7kÔï^69ŠC\õ[÷gcÆwðnë·•×êK¥Òš¥ËËË’:ߥìÀKÚ¿Ÿ«O°Où‚@áÛ÷Š¿ `£82.IW¯®èãK$¾ÔÃ"‘ó}Ã6ÁLXa›¨ù.ë÷h—ß=)ÄÁÊ k*•ÒÌÌŒÂ0l¾6;;+ß÷åû~GÿW^áÊìG¾«Õª>ó™«:vÌôh€µâȸ$}ô£¯ÝW¯ò X%r¾Ëåæ®KlHÛDÍwµZ/ÙKÏ<Æbˆ•Ïaõ}_SSS:räˆÒé´%I§Nêø¿ñÝïþ¹r9³2ë‰#ß’´{÷U®°ÂJqeüõ×/Heñ VX%r¾ƒ@R,™„u¢æûðaIÅ¢*ÿü‰g°"&VNX%)—Ë)“É4?(Ýnð´kà5sæ–ŠšoIz®ßl–¨饗þHJý»é¡kDÊwµ*MNªZ­r…VŠü¥ZÕËׯsB±éxÂzã&KµZmÍk q>ÚÆó¼æ3]¿÷íÿ•Ê¿âÌ<¬%ßoœ9£—þ“õÀ°[”ŒW«ÒûûÃ,†µzÎwHÇŽ©Ìfb°X”ú­ Ð »w³$±i;aÑÔÔԚדz¤MÊeiÛÊ “U8kiiIõÁs’:¿è'{÷^Óìúuî—FGU.—ùB7•˺º{·ÞoºpFÛ kc{ë~R.KßzVÒý¦»$bÏk¯iòà%%÷x)À¤_þò¿ô§ûjâFm8çäÉæ?²dN*—µ04¤?a… bbå.ÁqعÍÚÛsÈÞxu§Jƒ˜î˜3g<íxã ®°ÂYAp+œF¾—ŽguajvvVSSSò}Ãe,A˜“FG¥Ý·ULwHÌÒëwèg—>¡Ï™îï?¥Û¼_÷ONšî ¾ÐÃ9A ŽªÊsX£Ž®°‹E9rDa®zhp.—Óôô´Òé´R©”Î;gz<’¤Ñj Ìms¦»$æç/íÓŽ½{MwHÌÒÒvîïƒÓl8ÁÄ®\®OX«UÓ=C:š°ÎÌÌhrrR «v ó}_™LF…BAš›³c’xñüy]¼ab ¸fù7»ôg}·én‰©?g˜ûûà.NÈÀI££Òáì@¬ÚNXK¥’jµšŽ?ÞòçŽ?®ÙÙYU*æ—â.//kxxØt7€Äüú7»LwHLH·ÜrÍt7€Äpõ ÎW™“ˆYÛ k†ë>‹©± ¸¡ÑÞxȰIË÷Ü£Ò½÷šî˜ ·~½hà¬jUºõ­ÿÖ'YöG±á\Æ#›·¶›.¥R)-//¯y½P(˜îû†ÞÚ¹SWwï6Ý 1Þ¼§}ÀYA }àúuízúiéŸ0Ý ,y€Î´½Âêû¾jµšJ¥RËŸk´§,¸wthhˆ3—pÚ—¾ôqÓ]3>.Ý÷Î×x¤ œV8‹ Å·¶WX}ß—ïûÊçóò}Ý i­VS>ŸW:–ïû¦Ç¤ññq–"Ài7Ÿ_ÓžÉd455µfc&ºÌß\$‡MiಠtøðaÓÝ€C:š°Jõ]€§§§•Ëå†aóõ– Ћ×/\Ю={LwH÷°ÂeœAœ:ž°6¤R)¥ÓiÓý8jûÒ’*ÃúÛtG€„”ËeÓ]Ã’wÄ­í.Ál¦åü@¿ºí6ÓÝÓ à2&¬ˆ[×WXHÒÒ}÷éà§>eº@"xä\Ç Ä +À*ù•¯˜î˜jµÊý}pËÝ‘–l–KÂeì€$0aØ$A°dºÀ„`1a…«¸GIàV€MR.—y+œuâÄ Ó]€ƒ¸Â °IØt ºÃ„`“0Y€î0aØ$'Ož4Ýè+ÖOXÃ0T¥R1Ý ä®#ãpù†ËÈ7laí¦K•JE'NœP†’¤ÉÉI:uÊt·€Xo¸ŽŒÃeä.#ß°µWXgffäyž‚ йs熡òù¼én± ßp‡ËÈ7\F¾a+'¬•JE¥RISSS’¤T*¥L&£³gÏšîù†ëÈ8\F¾á2ò Y9a]\\”$ù¾ß|Í÷ý-·ŽÞµ³Yaj~~Þt7Œ#ßu.æÁµÏl¯Èxkypñ3Û òíf\û¼öŠ|×¹–‡~ÿÌZyk«E­VS*•Zóú¥K—tæÌAàÌ–ñO?ý´é.ÄêòåËZZZ2^ô._¾¬‹/j``@¹\nÓßK¾%é…^ÐÔÔ”öï߯;î¸cÓû7[ò'[>³AèÅ_4v|jx-yˆ‹-ŸÙçŸ^KKKÚ³g‘ã÷’oê·ýlù¼6ê·©ZHý®³%q±å3ÛøÞmý¶rÂZ«Õ6l[^^^÷Ãâû¾î»ï>Ýyç:xð é!ÄâÍ7ßÔÄÄ„énÄæÊ•+ºråŠñ?Ÿ+W®hÿþý0rü^ò-IŸþô§uàÀíÛ·Oûöí3Ò÷8Ù’‡8Ùò™M¥Rzï{ßkìøÔð:[ò[>³ÃÃÃZZZÒÞ½{¿—|S¿ígËçµQ¿‡‡‡Ÿú]gKâbËg¶ñ¼Ûúmå„õÆe7ó›z%ßq´'¡UÆûq<ˆ¦UÆû1Ôp4lµúÝ®O6ŽÑP¿íSGV˜ï}ï{+<ðÀÊØØØÊØØØÊ#<²ríÚµfûâââÊÃ?ÜlôÑG;jë¤= ×®][yôÑG›Ç|øá‡W.\¸[ŸMŒ ½‹’ï8Ú“Ð*ãý8DÓ*ãý˜j8¶Zýn×'ǃh¨ßö©Vîì‚Z­¦|>¯\.§ tîܹæk 333ò<¯Ù†a³½U['íIÈçóªT*:w çyší¸O6Ž ½‰šï8Ú“Ð*ãý8ô®]Æû1ÔpH[³~·ë“ãAï¨ßý1¦®˜ž1»êÙgŸ][õÚã?¾òÈ#¬¬¬ÔÏdŒ­:;ò­o}k峟ýl˶vïMʵk×ÖsqqqåñÇï¨O6Ž ½‹’ï8Ú“Ð*ãý8DÓ*ãý˜j8¶Zýn×'ǃh¨ßö©[ÛLO˜]•N§Áª×588ØügIò}¿Ùîû¾*•J˶vïMJc§8ß÷†¡jµš|ßW.—ë¨O6Ž ½‹’ï8Ú“Ð*ã¥R©ïƃhZeܵ|ÇÑg2Þ?¶Zýn×'ǃh¨ßö©[LX7A6›m†áôéÓ’Ô2/¼ð†mµZ­å{ÏËŠÛòòòš±,//«P(´ u'}61&Ä£Û|Ûš‡VïÇñ >7g¼X,nø³¶æŽõl…úÝ.ß6~çB|¨ßvŽ©[Üú Ž=ª©©)IÒÜÜœ¤z6råÊ• Û–——[¾·ê¸ÝxvgaaA Êd2Êf³mÇÓIŸMŒ ñè6ß¶æ¡UÆûq<ˆÏÍïÇ00ÐwãA|nθkù¦†o][¡~·Ë·ß¹ê·cêÖ„äóù晆Æý!’422"iõÒ›J¥"ÏóZ¶µ{oRZ-H¥R‘ûlbLè]”|ÇÑž„V?pà@ßѴʸkù¦†o-[­~·Ë·ãA4ÔoûÇÔ-&¬ ™˜˜P©Tj^Æ—¤b±ØüÃ÷›z%ßq´'¡UÆï¿ÿþ¾¢i•q×òM ßZ¶Zýn—oǃh¨ßö©[++++¦;᪙™ÍÏÏ+NkqqqÕ ÒR}Mz6›•çyÍ›š …‚R©T˶vïMJ㘳TÝŒ'ŽvØ%J¾ãhOB«Œ÷ãxM«Œ÷c¨áhØjõ»]Ÿl¢¡~Û?¦n0aMØ[¦¯wŸD­Vkžº¹½U['íIˆÚ'Ç„ÞEÉwíIHò3I¾ûO«Œ÷c¨áhØjõ»]»ãA4ÔoûÇÔ)&¬+q+ÀJLXVb °V€•˜°¬Ä„`%&¬ˆU©TR­V3Ý 1d.#ßpù†Ë\Î7VÄ*›Í6: ¸ˆŒÃeä.#ßp™Ëùf °ÖMÔ¸T_©T4??¯b±Øl ÃP³³³‰i»V«©X,ªX,nʲJ¥âô¬f*ã¦ò-‘ñ­„—Q¿á2êwÛfº[I6›U&“Q±X”ïû*•JJ§Óò(è\”Œ÷C¾%2¾•™Êw£Ž$Q¿á2¾ƒ÷/®°"vù|^ƒƒƒ:þ¼ë曑q¸Œ|Ãeä.s5ß\aí3žçž¶>îãd2år9=ôÐCÊçóNáA²ú!ßGo6+ßQE¾Ñ ê7\×w5ßLXûL†Êçó¶çr¹æ’SÇ™œœT*•Òôô´Nœ8áÌ ßH^?ä["ãèÍfå;ê±È7zAý†ëú!ã®æ› kŸñŸß´¥pè_ý–o‰Œ£s›•ï8E¾Ñ)ê7\×ow)ß++++¦;ÀÍØ%`%&¬+1aX‰ +ÀJLXVb °V€•þ­í³22¯]%tEXtdate:create2019-03-28T17:36:51-05:00«ûs¹%tEXtdate:modify2019-03-28T17:36:51-05:00Ú¦Ë-tEXticc:copyrightCopyright Artifex Software 2011ºÅ´1tEXticc:descriptionArtifex Software sRGB ICC Profile †2tEXticc:manufacturerArtifex Software sRGB ICC Profile\~=Ÿ+tEXticc:modelArtifex Software sRGB ICC Profile1(‚¡!tEXtpdf:HiResBoundingBox1080x792+0+0_Ýx+tEXtpdf:VersionPDF-1.4 G:xIEND®B`‚blis-0.6.1/docs/graphs/large/l3_perf_tx2_jc8ic7_nt56.pdf000066400000000000000000001003431360743507500226350ustar00rootroot00000000000000%PDF-1.4 %ª«¬­ 1 0 obj << /Producer (Apache FOP Version 2.3.0-SNAPSHOT: PDFDocumentGraphics2D) /CreationDate (D:20190307162241-06'00') >> endobj 2 0 obj << /Length 3 0 R /Filter /FlateDecode >> stream xœÌ½KfÉ‘¸¯_‘KÍ¢>úû±Ð¢‰( h!hEU³1ÝÙ­.60Ðüú9Çü^w3÷¨ÊŠLÞ“è®øÂã»î×ÇÍìØãß¾óŸþ÷=ÿS{øô§Ïß¹—Y~»~¯ÿí»òò¿µÉïø1·”ë'ŸÒ§X仸ýçï|L?üóøÁ;ïñ³[?°é¿ûïßý zÿ3>ÿF~·þøÇ>ýÃwÿõ—õø×{‚¯ùSˆã)ÈOÿ,?ÕÂ7A£üðßýâcú0íÏßýÿ‰?ú_ß…Oÿýý?h*)·?ý¿ßy÷鿼Ù÷ê¯ñy½Ø½„X_%Üo´>ÅWŒÎ9ßâCãé¯Ùïüðx·÷ÊÌÎøOé½»œž}Û±ž×ÏG§¿úµÿäË‹gûÿ€!Œçÿùg†R|Ƈ’’oøjíÕb-$A‡¿ÿれïƒe|áÓ?úÿsü}úŸŸþøŸ¿ûO”×ýb_Á¹ðê!–T}~O_åèë©ó8þ#+xö‡Ï£~›26Hóõa˜ñ­¿Zè9%W÷žMÛƒCHéUrw­ùx A·=7„Þ_©µ{;F ›žß^îú-ùÉug‡Ûº›ž?`ÝevÝíž_wÁ®»Á[ëþeTöÝ´ÐZà¾îá呲Õ(!þ\(ù^8¶^>§\bK'‡»ÿM½ô^¡e¼Hû™>Â7õQrx%_Bñ.þL'Ñt"÷f{áb²÷æ÷îåä_¨Ÿ¾_ª‘è¿@«‚Ç`°òXúsŒáHÁòÉo~ýÛßýþ¿úÓ¿þøÃ1$}•ÿì ÔÔ_ÅkBÿ”:„ _«Ç|=GެߧÑ÷_þüÃçÏï¿ZíÉp?‰ˆ ²ÆTb ¥DŒÐz‘ ¢ùW©h ¥} ¡¾r -|€ôÝ2ö;¦4aN¹/›o­Ç¥ Ϥ<à ¦§—; \w´u<³³¿Ž ]ó+A²p=wiÃ39LˆíUKÍ=Fé¯ã™ýa[ú„Æ?pMÔ—àü«r,> ¤q¾§Ø«4ᑬ¹àü`¢rð®†$mx$‡Ñ[±5È×<ž˜ðÄLõÅ÷^³¸ "\€0æÚ%½b0÷Ñ%ÂÛ°?ý}Ÿá!%·–±ÑÙq©r()ûÑÖCÅ gò1÷„9HŒ¾@` òrÀ‡°´›ÍGŒïÅ6ì“ÊjØÞ¡­¬PíÒ†Gr(ÙcR0 ø ¾>^/â™\ RÙ_|qFvÛ’ñípL°@˜¢ŠÉÈ1iÃ3ç”aÖ±s0é^Æ’x4Ù_Å´@ˆL­ó¡Ò–ñL.PAú C®=JžÉ±ÄRär©øê˜2ìKÝÜX¶‚‡Ý;–ƒÈ®c)eW†‚‡Ý;;Æ•Si2ö‚ga>F,_닊Åb[Å#±4@=‘Z÷>/SU9s²µ<›0ÊËx+Ì•iØÜž8ué '§ÍÝŠÉ÷-a'YPœœpïÖ^1WO•Îppò½[Ѷ 7| ç¦]3/a–q2« §&p>pýŒ¹ªXò6FaåzßråÕk$qn#§Ãð<³xbÎDZ6i¹$½Ê·²‹1Y­ˆ qŸl °CtÊÕq#vt¿÷)ž—x‡äT¤ Ï»·)ž‡£ ìõã[xÞ½KÑ$é5ÉôÆÀ5B“ÇCðØ´¸“œ<Ç%Èó°Õñú±aj{Mx †/e65l d¬Ù¸`4HûÈ6\è<ËJ{ °8Ü­Ž TÃ4E‡­Œ¥©N㙸³j÷cª4Nóýp 6ÜÉPó™8Џ³í4RStÀDœÝp@5º+¸h0Ò¼C5îT¬ ^gdÇjåîrøã¬ÙV3¶ —«F£5%öŒ§Æ®å "tImÇkïq£àJ®¹€í‰6ÝAæpbCLyµâð;—vÈö!áC,Õ•¼c6ÛjÆŽÄvÙA››9§èãÕ¤@›‹^!3á+;jóÕk£¨]ãÛ˜ÉWwXê({ä—뜴iOÜnŸâ§ð)¾º1™ ÂOÌÝïÐÍœLìý¾C·ÇÑä%‡yȉk‡€qU@î+QÄÓMÀ†È ˆ=纳°vˆ’é°qFk˜Í7$ì|.¼€nhèFS¢ªÕ”ZèöxWÀP_3Ð]ÔQH;QŽ‚…n@>IÁ¡ñtã™@ûë Ý…ðŒ Uú€îá Qù@î  »#E¹#-rãb*T‰'r¸°8õ€îJ0Ä}ÆM¹C7ÚpLÃ%'än¾EzjdP*¡ºå^Ç«cdujd˜‡å· C[J–£Ã@Ý©£ÉptЃRÅ$È„å¢ øWªX4S6]za¨@É#¡š§©’¹ VK¼»¼4ô:U2<“¼M¶´D }ªdøô$¬¨á zš*Ú<Õð&Šcª†¢£îËçG鯊 ‹ß¸a±MTÑûtèaqMMYȦ”î/Mj–  ØYið©Û/){ü(wZꚟÃ]ë+–Zºïšž«À2c«vMÏÕW/5øBÙzŽäu‚Â-æ™ì4=—i Jz²4izŽ$œR8}Ùz¸Ü0¦f¯ ¤û´â¡ÎflÌÜî룾 TcÛ‹é?S ÷â)àë$ƒ®‚fçê«úD úhj¯1¾,Ì‘ÇHøÃ-=WšçZK_8,Ã=rIº'©!ƶ5;× ¦×F|aNJÇ(0ÜʦÚ0ØõÒdÙ9Åbæ¤Ù¹‚ãÕpúÄÆ˜qP„ÝpIØh^ñb'ÉY³sÃ-ÀÖ[‡×~¯»L8\ØAõÒêIØ:ìÂèždç6|ޝæzŠ˜øŸqgÄꔀ†n ÉR€¦Ñ3φUÕà3-_xœÇ&>W`~ï;_|†††áv߯‘ 9I;×|¾$z8êeÇçšÐÖ6´pà3†Òqø|»ªÅçBéÀT|ÆúöB‹–;ñ™Úp©X»¦i#çûJÚÈ9,u€jꀆØB`hÞ· Ï¡séè̃H Ÿw.6,B9šÆBÌ[Å|ƒ8²\. i3æwâs&³â„møÌÛÝØ ÍÐs¸¯uB‹£Mq¼0.DGâ寢ùÊBà»ì íÉ^—âà¡ Jóµ°.É Ó2¸£X= LóhÙ ‰tÇiÞþ3Rt…jn |i ‰ß¸9!Ø“¼œêA^\Nã¾ÐHM£qnÛ¬Õª¹·¼0úx`5­¸ž»¥ŽÆÂK¢DDÑhMéÃu1Ê]làÚãù=àNL¼¯ÉÍuŽÚÀæPBÂâß ƒØ²>"Öç²ùêB fó™â:ç}mš”Ïpl·-Ö ³âïƒí÷“srsÖq!A¸8%ëD¯ ÔS=‘›Ž¿¹é@Õq) ’wƒnÜrˆÒÅâo¡»nM‡›$Ð]qÃà(ôË ÝKbÀÒFBw‡°€­äå$Xè.t27È)ZÓIBÐ…{ºI a)!Äõº 媂7¼ÄgÝÕ"¬åC´Îµx Å@7pÕÓ­ûS¶”Ræ¦Â[ÐèÛ2H’ º1›Òa7Ø÷`v=· ¹3¶mõãʲ¸M÷ô0îƒÛQ®:ì°±¬¸c «ë:›À šÈáÀ-¾÷,lsÙéÎ(4Ÿ…mù=:º`Õ·![ã= 7ú·!h©t㪲¸åÁû ·©*AO¯CÞqÛ¯ÛÛÄ…ŠEaµ¸Mª _ŒÞŸÈ^xqB:‘›}€p|¹ Ojnñ·Š.ûv7ä\ñmØæ©ÉrG؆̖¨Ë…Þ‚íˆÅ åí‰m¸³  9"Y‹º6åîA|PÐöÜŒ}èEyãåÈ=cüímî6L‰¸ã ̦؛éåÛß'j^®øÛ–]ÛÍñ'‘™Øø /÷VßO¯×ø¼^ì‘^ªæåÖ§Ç 2ÖÅËÍw{¿¡Ììüð8/w½àXÏôõ¼\ŠÖT Ð'=ÌË¥ Prâ*‘;¼Ü:ó¨?²f-¨ƒþÜ To3¢ËäŠyX6~ÆôüüŒ Áò3vÏó3‚ågÌžçåôöú^Žnënzþ€u—!Øu·Cx~ÝkÛy93‚¯âå $%ºpS™yŒ—³<ÅË™^âåLOðrt¶ƒú½ÿ~ž—£c›æå~ü§§x¹B­bv`„y¹L­„Ä 'lvßÒ’¨µgÃpíõ´ÚºÔ…N—·Ñfì¾^âãðTžß51Góy¯T9»´ ×^l}úaPU€vT%* ;J[<Ý(1“Â’ªåtåuU|_¡á%ÌHÝÉèÎ-vÌ mÐx‡¦ãE½nâ…+*{ÉÚz€ó b@º‘aæáÚ{aÕr¨ÔCg(y¸ö^úôk¨”ƒ +EhE/˜•*œBZî–>Ö¡KáE†b)Úzi‹6XDÇ+˜ˆzkdô¢ ð0š–ª4ˆA%ºøœR‡kïÐɰÊj? R¶5m=t®Þ›kiÚz te/]ôeŒG\{‡J†“TèÝ…É,]›Pòj½† ýÜ/• ”{_E_.]ÌÜ@@^¨¹œªJÅüÞ°ì?Þ„NuÚ€@“fôrÈ®TÌï +tj¦î*ÓX½6 Ðk5Ò$vÌêµ¶/0W"WjæsÃ:ÙÌqìå´ýïL¿Þ0ÔÞJ­ôޯУ±%°Íån©A›£Ó0‹N¬ç•ªùܯôÆâbÃJwQÛ<#…\®QÈ6@¾²àí|adàø^Òö‰\Ä@AiÓÏ ›]:äoe@@[O8¨I ¡fmBà´œ *æ.ü½2!`'`ò©Y›°Êзê0þÔ¢m.JØ)ž,kW´ oà0–$d' CÙ³Éè"lWÙø=l£êÄ›¾VmCðŒªøû2¾§môË.¾êneŸÝ´˜¡ŸÄwÉ+ïäë6Üb¹±/ËÛ ø».4ÿ†ÛøC€s.2|‹ÛŽlÎóOÜf¸¶Z¾~Ãí‚!ø„mnnZ¯Dš4°]ñ­ƒha»Ò‚1q„ lË•ÚéÙØ®Ž¡©ãca»@a # í€mì6Üxs1(YÜÎ @ {=¸-–>¤q½ÜN8Êøb1¾·å(¹Å4ma[ž ÄtÀ6¾ÆýJ!å€mô†SŒ“5îjÛ¸°²U†i×â6žÂõaÇí=„á•q,žÁí„û¿ãÆQgnÇN²=BÀIn“q€„*^7·É_Æ%hhÇm\ J¡¡·qWáªÂzˆµ¸¶ÐRìQ(‹Ûœâeã·ÀM[Ï~¼ÐYw å:ÓY§ÀÍ<Ò‰¶ÀÍ«SRR)p³ïJþ\ün,p“ ô'8qMt),µ·ŠVHš abq›b&®èádaoŠ1F—[Þa›ipÜ\7m‚ ÀÏ*n S™¦¢¸-VíÀÈÅrà6„Ò†Û¥·1ëXLÈ7éÒT³BÀEÒhy¸oXàŽÓŽ…÷ã20ÈÍ@z—è¡wäÆÍÚ)e‰Vf›¤.þ6 @±Àe7‘kÎ'pwš4æÛYà¦7y¹1JÜì¯cíºÈC¸éÙ€Â~)p£­Ò?kB r3H{°ºïâInÖ®‰Û„d¡ ågX:åG¹‹Øü o÷fïOX¯÷ø¬^í‘nèÚ:´õéq ªðbîæ‡Ç»½ßP¦v~xœ¹»^Pz½þæ.‰ ´ ‡‡™»â(a@b¸Mÿ–˜»uça$qã}À?ƒê}òGpwìpãpLÏÀáÈ,‡c‡ð<‡ÓF¤€âpÌžçîÔëôž¼“·•7]DJÖAo™¥·cø€œ¬ÂK™µ7Cø*þ¿rÔnsf¦¨§¼­—§<ÛÍCžíä Šu§]¦êõ¯jïß|.¶®1?–æð*öœ—Ð (nšÂ‹LN J´Æ,“ËÌ4H½@NÒV•EÞ¹±8ü ¤1±ÎtöL]Çè(±›@]YšCó­Ž<-WX¶'(}n<³8cQPÿ¡ùÊPJÒ¶`r´è4JT4MeQ(’µD•ÙÆ½054(ݵ£(Ï QžÑú ×(“B¢Ï4½nÅ"SsxÐ[¡Üà}ä{-)“68ýõ¡[I­jϑߣ±‹MÝ) Zr«1 ÿÕzÒ^†E¿ŒQ2Pljhžn·tÉStVíÌ`PNЍÉÝ%eR`ã)"‹tW•IæTL+CBØ&YŸ¦iM*âOeœ$νö…ÇsžÒ¤Ç@Â\h¼©I;>*O⿨Æ{iKšÃ+¯VJÌb*ébV^¡GkÃn”¯Q œϤ‰ úùŽ„¢ð"ÃkN8Ö™Žkšh–º8†‰Ã£8¼8l³Nè£.œðâð°çðu4UËáEÌqÊsÇÙQÓÄ`%“PÑI0inÇ6“¤WGÇPx…É~ƒ˜À §k ^ÌnpO"èHÐê ¦u<'Gxtɦw­ìd¼”²'ðB¢– ·gÇÉY^ãñÖbágrEà1[íX¢üc7hsÂ%Œ3Ž–eð ŽRK×÷ªfðÈÒ‘Å’¥¼úâÂ!lLeOptÇÅ98óÎÈAMàfná!K0Â51½òûLÁïdð6ÔfB-®aõlºˆ/D;`Û7îJM³…mºh÷Š·la›ïèS é‚tÛ43ã]íã*°°]xã1XÜa›!ž¹&æc;`»'±OóÚÛ36%.‘´Ã6óe­p$S9`›†ÓRéY’Ø&;æf{¨…mK'ZØ®E¼Õ ea»Š5˜2°]$Ýßm™µ°¶™ÚwܶK”8HH Û¥{†+ÎÛ…ñÚ ga»HTF|×¶™e/M …íLëĵ,ÞÛ’²ž'jgú&0K]?@›ÆC®XÈlP›Œ&my Amô– ýúžAmͱ„~¤µ¨MÛ2A¨ÈmQ›Ö^ÜëW@ŠAm<’&‰>’•YÔÆl`o×ÔFwµ3“IaÜxuƒÚÌŽŽ ]Cml3¦fDCŶ™4µÆÿµ¸I؆zçôÀMû=$¨4îGƒÛdT$=ðØ$·Ù›oqP‚¶%@4IˆÊŽÚ’¶Â7œïz 6 ¡a=Njc‚°/˜‰Ñ¨þD >`›k€+.4ØÆËÑ×)¶ñ;Ç×êﳫ¼›ÀÛ€›^w…a$p;ˆI¸YF¶ê ¸E›Á„ \°À \€¨T.Y|nüº¶aÞ߀»0¢/óPŸÀÍtÖ¤<‡ «€;Ðc[´„ØNà. õ# °Åm‰ÛŽûÀmÁû)Ö7·¸$³I-7ܦîð¾ ·ù8Á1žâvcZf¡sÃmf<PĸI &h¡i Åw?57ŽÖ†ÛMPË/-nW¦²À-‰Û<ƒ7B<Åm&ߤoÌ ?p›‡$¦·™Âníeá6H¸p#O©n¦Æ:C3Îo!7„ô;ùŠEnñ`!ä‹Wܑ̆9ÅóE nÈ™d:¸llÈMpÂ-"›EîLWú5Y¹ÅyPäÒ[È]3¦Ñ½…ÜŒK£Æ÷vä†Bƒ ;^}GîÆ|×e´äΜ¿„½<Ó 7ÖÅYØK‹ÜræRGè¹…î”eްŒý€nb–Á­`°dˆäpÙØ°›áÀœá¼»a·8@äÊ£Ån&f)¸[…nÛ°;B{¡¡8²lØé Vï]¹cw 1nˆê— "g$¾Ã'UaŒ2IýeI£±@˜žDõÜx ãía)sŸ.‹ÌßÿöÿþÃcNÕ8,†a\«éúZéÐñi=ÿḌÛ÷ ,?“[•ØÖŸz±ßýïþåïûwï¹÷z ê— ÌÒ³¿æøå×¼hì® 3> kóEÿî¿ý—ßÿö+–ðÝ;æE!ôí¯é¿*ZÔ“¨»è ¹ÙñÌGmH¶?YòÍþŸ±Žç›{žo÷H?´ -òY}|œö”2TÏõ£øçù’c~ç§Çèû¯~ëOpÐï. 1‚Õj‰y$x‘®LÚàèëO?­¿!FZŸÍ ¬â}ò?›æ²¤Mwò×»ÜëEš¾?„Ì1½ â#èɼSÓv ÀMëæ»Ô-”°ƒ×_ÔmýMß±þËѮ¿Ä¬ÿUøK¯¿ÃWñÓ¥H½’ä%jæ)~zëå)~Úvó?m;y‚Ÿfº¦ d¤žžNšœþËsÅå>g²Û,¡åI²lJ%¢‹ZÝÙz‚xr&ú™‹ü[’ 1-—qf4jƒÞXªjðrP¦‹ü¥(!ù/}ô“¤¨Šâ„¥•±#oڴŬy¦â„ÊÚÆS›â:˜ ÐÓx V´%æ˜c´D—¸£Êº<#2‡–zMRQMïf®–:²Øf½ŒÇbxÌSuáDy «z9¨ŠAoÞ! SÅ© ñL=º gÌ¿ä™æsK™¥UÕ•‘2™Õªw±6KÖ Ç-’#Ç3ÐoÚÎXg“5õœ°Rž¡˜^»3ß¡™E=9^f«*®¬Bs0âMqðºæ<^ÙóÖEx)ÏðEW‰åäÅá%eÓâ«yçÌ” ÒÈÍ„ñ$ærc¼ëv´‘úÌËà­ŒG 91ØúxË(64 ™hÖoÎ{’¶J¶1ãG5ñ(ßžîïmÌoÒ˜Šå#I†¶Î/VßIÃîëÅsñÖ<Ì98N€—îÌo2Óe,y ˆ&ª¹‘Å&ȬŒÇ–ºŽ,á5Ìl^ì÷Nö¬†ÂI¼^žFª¹“É•3oWéژF Ž9_&WO¦Š@e>Ưújž†Ö<7²cé"ŸQA†æ8)/3v²:öêµY›6¨1赕ÒúЙ)nYÔøØ–B§ŽA$}ndF½¶ÚÛ0ˆzžûÅbWJýB&Œ6Cc“¢‹â Ú¹sRefldF80î°¸± ëÔ];ÙñbÏפg˜lšñ*#ú…·TvÆK^ À²†qnd¶Uü¯·«±J¥™±‘™Ñ9Òûi¬¥ˆNÒÿºèú;½ãße ÚQ>Ð ‹ž,ñM”ç•×kLoÀ³Æm:qž&ð ànÓ0Ÿ… fþË|Â|¢?†Û/¬20O4$×c‰Ì3¹/æ:Þ׃FùÌ‚¤áNG½Á<˲a§Lâ ó|)¬06œ0/o…á\òAy†‘a™Ê4ÞP>3•Íøæ(Ÿ¸qƒ}ß`>K lfFÇæsÉ:¨z×­l`óŠ|*n0Ov«å±‰˜Çû½ gn0OƒÎªEý ˜gíBà|­£Ì­Åy–5užÙLë‰ól„ªÖ¨ˆ8h6ìTæÄГŠ7¨çV†þœü‰ôôŠz!À†ôt~ËÌ÷†@Ϩ”SõÒV)|MŒûÑXƒ ×}®Á^±ŠL^r€½4ÖZ™Yþûñ&… щöÒH=Ê]³§Ñ~4Ö–ë…ËíG#VûìÙ– œ†7À~¼¤Áx©ìÇ‹`fk8eúñTó‘ Æ‚ýht@1ìGc¢®°—F&°),Øozf“8…úÑÈ’©—Ä®uªLä_Þe¯ŸÔ#p-Ÿ¥e’[„?…±õÑ:~zˆx|£÷G¬ž×{|VïöD?PŸ…sï´>e±¯¶òØ b_¾f¿óÃãÝÞo8æv}j«~Ü£ï{­éýán¿hˆ…žñ÷½ÇIôÒ¢… ï4]+Éx /z1%VLzG_“Œó®ÿȲ]§üés¨ßgäL•ÛÏ «!d“¡Z±÷¬Ûž4úTw^ûLÛƒC€¼ÇBåªÀ1Ýöüs þž[yéЮ¼íùù•ç¶•7Cø€•—!Ø•·Cxk忌͔òJƒ¸Mx§Ëq†€Õ ÑþYEÛIfž"ÈÄÐòÏt’¿©“ȺðÐ ™Áç¯É\Ú^œðdÔ XþL/æ]ÄI‡mÞ:é|O#ÿAÉü^јk(ú/¼£Ò%ÙѧX4„ɾXÔoIÁòÊo~ýÛßýþ¿úÓ¿þøÃ1&í8ô³sPxA˜¾•ÀTpe!Κ—:·îÿúóÏÅåŠÑõ®tBÁ,@…&–’¦R}±® Ì1n¯T_’OÊæ$f™­¢Ñ†É /þSbAèåQòäù%æö• £ö†ÎǶ‘è«ãl 2\‰îØU¢/< Z°X{9Îg]žHÛD*‹Ò¦2}E©ÖÁ¨V±Ù1‚á®ËÅb7‡Â|À Ìr9±-ЀX™IÚF¦/ñ—ã÷èÜ£XéÆ8q§ÄNT( ?`é=%AOŒ’œ,ßÀÇ4jw]ž(UJ˜×Q¢‡i˜˜aìcsŒ*a[Tuyˆj≞…” Qeúbw~ÙB©¾$ë,ûcþ¹6t·¼XEpN°B‰õ¤  J¨f`©5cÌSLQÌ!L_Pãx;¨Ù¢º†¬ò|±3ÖG+i,]Vuy8Ïá þjcž/vãXGadÿ EÕåác‹Ü&ÆPTž/éÈô#.~µ*óȇ2¦ª<_¦&ŒÀ´PU–/ޤ0^wP¡ªº<üVa•¥a‚M¥ùЉ†PúË1#4U—‡ÅJkc¢šÊòÅ1ŽÄÒã[}Uåák÷Ü“¹CWI¾8‡‰â4“¹ÒîR2¿,Ιä{Ñy±~ˆRÂ…¦J_²PAÑ©$_ü^ >]e·"jj& °_!4ï ˆ’osŠÑK•›µWãŠ^eù✰Æêu‚#‹Ý5¤"ݳGËH‚Jò%Ý‘´Žn´©$_¸i^/9À$2eÚ]EJN·˜ cTù½¢TšTåÚ¨tº˜Ø$O ^ïJ³÷ƒ¹ƒ5•ڧɰ@jl°Ái&P˜ógqš>®Òñ§ï@$¶N&. Ô «Å<,€¨¥HÌ:h¨EúliîbÔÈÚ‡çb€Z^긡v î‘5«ïl쨙QPa¿êƼ ~Þ'¨‰yòB}jI9ÏŒê–ÍP PÆM¡×µdŸ*óØ æ3ëxÔ…¶ÿ2©y]zÆâ÷z 5¯àå×g±ºXx5X]â‹Z–¥\X]%ÉÝÂ\Õ`VVt'T3ú¸Æ;.ÎBµàÙz/Õß Ö$FÔ3 X'¦½XWŠk¼‚^nÖ\7ì‘q5°ÎÌ:ý¾ Xs%Z›“oÀº0÷ Ë[‡z€5‰7áÜ6œnÜ›÷i³8ͤžÉî¥+ƒÓ¸YâÈdÃiŽžÛÃâ4^¹sŸœ¦9mÇUcqš%U™¥}$w·@G<ãU8Î5ÆÂÛ÷ÚA¨Y7Ì;Ö¢u–wÅ÷úh_Ì’€‹»ßhéP¢ì“<¤ìÈêÝ—¤µ£6`tŠL³‰N@0Aʴɰqõ]:¥ëÊc5¯$+]×N6syµiЖ³(¨m\¤ZV4 ï W‚Sº¦#à|\;³³2¸7´ÁìÕjÒ}“YÌæn(Ülõ ằòÆQÙÙ⮃pu÷me0›'œá•cCoÂ5a[¸Æ!iJŒ3˜Ízde¡ÆŽÙú4˜‰6KA0˜-‘¥,f3³Ò­ÜMÌ&ئùÃb¶ì®µÚV¾¦ËM›Àl@;P£Hó¢2 M‰·Ó±ôjÓ Ãb³ »h³ ÍtW¹Þw‹Ál¼sW¯`P›ÂPw»µ™/ÕKiµ º¹i hGfp_r¦Anr½¸En¦Êý¦­ rK^•õHƒÜ$ŒÝº r37úH-rK]ÌÓHa‘;ÐO¡Þ»EnšmVB|‹ÜŠ]Bßä–ŒáSX°È6ÑuZ䇨9ÕÃ’WXmäV€ßM*RMFXÖÒ¶Wþ4 þÂ?H*¾Õû#6Ï~PëÝžè_T¤âúô8»—ƒ"ç‡Ç»½ßpÌíúô8©x½âµ¦áHEúyÑ!Ôe Çäb¤dÏûðû.óiRqÃuâY¶Ôy¼TïSÖ¦x`vjÉôüÔR?HE3„ –úA*Ú!·òÒ¡]yÛóó+Ï!l+o†ð+/C°+o‡ðU¤"¤uJïâí÷©h;yˆT´Ó)Í‚6“t?E9Ú¡ê¬;´™c×Mƒ ´é °žiQ›Ù]¦CíÈbÄO§A›ôdÎþæÚ hóu”œ±¶7§…m€WI^ lsjOga›4âÛ²6)Õ•b×Â6=@ݳÁ¶9%lç×(›8´ ‹Û$c' fq»™Ã¼á6î–˜kq»jYÉâv¥+ÇdÈ.ànÅfÛÜ.ú"ˆ7äÆK6K‰ú´ÈÝXÏv)ºZ»´C Þ‰¹¾çºð&×·Â6ðfvúJçß‚7gåï²iÞEÜ^¤Œñ Þ^+¿'x/Ø‚·$9ö· o&\^ôêÞ‘5SnöroµY—ŒïÒµ8dÁ›zÿfx 7O]¸®w ÞÅ(XxGœ¼{œ¦„CÜŽ+[óÞ$ô×ý#àM®>ÝñÝ^ñ·µ›•5‡©“? ûzñÏrxoõþ„‰ñ~ÏêÝé§jo}zœL«IqxóÃãÝÞo8æv}zœÃ»^ñZÓô ^+UF%?Êá¥Êêò• ÞÅ>Íá­s¸Nü#ËÖ‚:ïžCõ>ed|`6&Çöü<“Ã!lLŽÂ092ËäØ!<Ïáéö!žthWÞöüüÊsÛÊ›!|ÀÊËìÊÛ!|‡Yø¢EQrãðl'qx¶“§8<ÛË^J5²ü;´Ðü%ªbð~ü§Çj/H¼ÜUì$åÂTA›%óp ƪЙùòÖ}JÈ+9ÄíëY‚¶*`ó*ÇàµUºuÑ%æU‡º$$õÛY¢‰æ`JœvW$„mds¡?q_>Ö%¿à¨Í¬…úº»r¹ˆr*JÖ¶à’5»Wr^åy¨O)M¥ä¶ÊóoS!¥hk°”_š:Z)y%$¢±ã½4¥¯k"¢T?ÓIä^àR3KÕfêa‹¦+µ­ú(<&©Ê·x¥º~oYÑøÜ´;W§Í BØÎµaŠ™…HBÐy\ýFáõµõª7±wA»*õõ¹eI5.çÙLì]{ñºV©¯ß[–2ôr‚­ÁX‚&K*Õõ¹e9+‚±F}Ç€Žé¬[£añ˜†nZFkÚX<åR“añ­p·9’¸2+P‹XŽî5o,ž¢kÖ6a&Âê·©²æ-ú®¬˜ÜZ ‹´½Ãâ±béttÇ:iÏë“U«¡ñÂ+ªi©Æ®XÓä¶Ìתí x½¤vR»óÂaïÕÞ[RÆb7ï–8ÀíÀn"Ö´“mØÝu¢¿ »+oÍ{Å-vÓ÷sY™ v‹µØM;¬Áî䢸0ØÝ7|{JXènƒ¤¼Øe Ý͸.Xèf¡à&¶èfº®²0Ê@wéd„Š< ›yùbÔˆ\l¡›‰ÅÖ™´Ð]û¹¾§¡;wÅ‚mÈMÿ…›±À%‹Ý½+-p“Õ]T…nº «{Ð7ž©(~ Ü´c.׆ ¸£ŽÁº€ W×±ÀÍ@°¾@Ö7ù™>C$-p“@Ãutñ¡¸™EtEÞnÀÍÐŒDwãp7ÝwVxÏ Ü“š2¸²Ž´Ýp»2êl3¸ j0Ýàva&¶{i6Ün$rîé?p[Å‘m¸YesaúÆå1”-4é·éJ„³qϘÁmI9{ì-nÓ3C ÓàöèÅÀµ¤/fÙ ®1Ë­²pM:qqx® T}Z«¸V.×t˜ûïÝÞ†ØL:qÙÍwÄ&›à'ëo›àïÐi ØÂNM’˶xN-!Ývì:l†­{X6 6÷j›ÄÜO Ûäóœ³M#¶Ü9‹G1ˆÍ6Å!ÄN ZûÈ 6iGEÄÆ4ßk{$Y·ŽBlÜ'X„ i ›ì¨÷3nÑ`vòQ§Û0˜MkGÏ4 f µ¹ä\ƒÙl»dû³“«K,Ø0›Äm[DþÀì$%8—”§1›úZ_!R³é©™êôf2˜ Á‰ œïíe0›ßSÞ³{3èd0›Õ[—a1»WíÃf@›_[9 ,h£í o h÷¨S]XÐfJô°€Ò€¶M `A»›D´{Ô¥m¾Ãòµ± Ý^SÚÍÞW´ée¤n\ÚLJàÓBhÞØ‡,ÏrÓ 7ÚèÍr‰½YvIˆ½çÈÝr’Eo¦ VÈÞÆŸZ(¼›ÄkâN1ª]¹ÛÌ-?»”1|Æ{³ÿ'ÌŒ÷‹|Öo÷HG½("o}zœQÃ]DÞüðx·÷ŽÉ]Ÿ'ò®WýÎ_Cäå"‘Å©ÂÃD«ÈKYF§ü-yë$®3ÿL}¹ëœ?Õå ò¤CKèØžŸ'tZL;¡c†ð„Ž Á:vÏSyjá®þ.oôh×Þvýù]9†mñÍ>"Á«ŒÁ®¾ÃWÑyø«/P†8õŸ·õò¡·õò£·uó¥—“i$êl¿Dé5–÷ï?>–×bµ¤ž†ãä´™!éü(-%mf CÒôRm©jWáMlz6Úݦ!:­ì5)§ÁWmo¹jÃpUfVœ² 3Vlšzí¯Ë0\UtO…± ‡ÅJ¶ê´]¸j÷ÕÆš(SeÃCúJý=GÛ…“ö"nÍi»0›Ûm3nMÕçÛYe[UváL+éT-[wÊ.LCèTÆ[O:*¯ëD‰ÅP?Òµñ¥;§=…6éïè¨î’¶ 3/Ôtší®jËpÖækhh6ÙO/SEìR^oÜ:ÚîX3 1 cŠÓYÿd1zد+²«³üÉbô8Ukì¡jFy«Ò{tšÑkxç”Y”DÚ’eô0ß·Q£3•Í2 3ÎjKÎfÓ q¢{JÊ2LøéCÜh¡£HC™–Úžfô’Λ†Û|še‘‹¡Ë2 á"Öù=®ð² m³é%iË0‹gÍ6½TÍè¡m£: Ÿè@Ò¾ÒNöºJ‰w½”~æøPŒ^Ðéÿ:]Ï׎Mšï¬y²=ì<7ÝtñKËè©Ô‡½;ÍèÑ]{ò#§Çx «ÁÞ«6©s|í–q¡3^u…S=EémÈ&¶7ÕJ‡·7w÷\¹ ¸«Ž¶´À°3ã¤Ù7àæ=Ê-pûðÀFe–2,pwÃÌäæU ¸!ƒÜ´AÖu~ rÓ ©<,rcõÛJôj‘›±! J rÓØ¶r'[äîE;Úä®I#©EnæÑû)änMG¹[Ôô–EnZÐ j›æW¿2Q[ä®QDZYä.];\ÐA¨A Ý´t+¬1ÐŽ”‘ÔBw :ÑŸ…îZu0 Ý•Á“°Ð]¼¤]®'pWe€»6g/ R Xà.&-­n¦\þ¸ Ñ+˜]H¡­aÌfÁËÅå[Ìæ¦Ó¸¬1»ú•3Ô 6#*—é×"6«G.Ð"63rJ¹±S bÓÓ*M—‹ØÕÐõ±1Ç*ØÊ"váËͼ{±Ù_Ÿ»øR½{Õ³ ¤(ÝÛâ6K,ÛPÛÉÒ 6“C.Î{CmH¹}&1µ¨MÒh]rµkcΜ¶y¬Ì¶“wær1°íyänIÏ¢¶ZxÜP;êÓ voӟ 6i6•`ß 6ƒ•ßÚ†Ú]ÇjÔ)¤çÞ´¨Ý³&ªOÔö3ÞÝ¢6=ã”,jãâQÞdVÞî:GƒmÆv/o> Ú,z»x¸ ´ñŽ*²kí®Ùï ´é¾djÚ=ê`¸ ´ƒ¾7ÐvºˆíÆŠ“K µ°ÝôÎ3°Í‹jM•Am:Ý´™ Ú¢6DuŸZÔŽ:•ªEmÖ)™äVù¹+Ó¯,…Á 7õÊ6cè,rÓ°¿ä2ƒÝ¤ÊZ¹ñ†ÝYNZìæ| f»kÒ‰b-vC–[ì.âÛ0÷‘Ánô×ëtÞ°›µ[§cÎ…Ý&¿çû+ö¹Ò“÷šØS§Ø°µ·>}@Ç^“{êããìÞý–wÏõø½*Uˆ¡mE?Ÿå÷ª¸›Æ„¦¼‹K|¼‚Ÿ:“ëü?²t÷‘þLšwʲ5Ò…óÉ.·B~¶ï`zòAóÙA|Õ“¢oÄ0}z¯ù.€(E†ÜÒå¶lß°F–»Ì >bÈ ¶`ñUl_)L?qîÓ±}[/±}[/O±}[7O°}Tñ^Uêð%²ÏP}y®hž\ìwUv DУî>ïXƒî®ÊNã°\Oјlˆ*&€Fc(:ò‡ŠÃk(WÙV•h IrIUvñ Ó³ÊA­YEÿx×çWéïš}ÍbAB[ZÄ–¸Lj¬—¤#ùº*ñá]×´ýWf.4&ÉtÁv 4¾‘Z mbȹp³ì—BU›)†^ 2G£aþ‚Ø=/Ф9/M øéN—…¡,ÈäðVf´ò¯i*UòIQva­ë}ŸÐQ”]Ê-Ÿm4j{•ê•“Ú Á’gËÍz8{eÞgya¢1I² )Ê.ÙË[ÆAzÉt±dx?'é‚d' ³ÌShL¬6v²„­ô‚hÔv Iœz:½Ï†¤R?¡Ñˆ‡Þû˜ݫףÊïã‘uZ]±Ð† tº¼"5è°oï|uhÂCç6–äÓ€…A.+jÉ&éTº=«Ôþû˜h¸ ºø¯“âc#'ŽËl„Ÿ*lD Ð‰:™ñn–QY8ž\gj\4&ã§j| ­Ú`±Øb|Rúcl`¾ÁâÐ(çî ì_ÃÁY’L¡Q(Ä÷z­SðÚBaÒ¨¢ŠÉ½%ítH&=#å?ÆþE£~…;—«²É>VgoÃwao6TïÌlÖéÔ¨N†IÕ²¨Þ™mæ»ÝPÑËþ¹¡:S­å;ÏÖê¸-UV¼ ÔéÀ_¦'ÆêìºJ-¦óî³Ðîé-h.iƒt¦[]H¤c[Õ@¼Az¡Æt7Ú@<ÊÜß@½š¢­¨Ã6m Ž­¬b†,¨ MuÇn ^L¹Ô ÔM¶Ó Ói[Õïa0dÛʱ¶a:¥šeòß03P”¤`0œ¦–1 ¨SJ¸¨çörm^²Ò±XúµÈŽg*§™ ÙÅF5¤6d/AçúÛ4Ôò'Ú •ÄÚLT¼A;%°•/bæ}í_íïgwtw‹ê>Ñ=èÂɼKνɛìð6ÝÀ{Ò9vxæhYxÌk¯® ïTzW¦àÞ³©áj0b©f&6Œg`Ý]¤xGx§“Qì½£Ð1jêÅB¼$O]ûÜ¢¼)šgP^j(Æ¢Ú Ê탳£¼Gk±”Wlè󼪪û Ìw§cŽ7˜oô€›äìŽóÕ[¿€žŠð¥6œ÷ºˆíŽóÙ*.i£•kâŽóP"L£Áyf_©y\qÉ5ÌóRZ~g晲t•bßpž~Y)}ucU°ùôÌä¦3âô$ûVÈüôIâçc-Ðwe{úÒ´‡Óô^ËÅç[Ô©¸O¤¯YKâé£Qî-Òc¨r ÒóBW‚ÎôxÛû=€~ºül0OHWã€ù¨Wðý$#D?Ì7k-¥ ?Ê€y\Ph?=D0¾Ñû#Îë=>«w{¢ŸGþËñNëSYÕÎr’óšËûÃãÝÞo8æv~âÅy‡ =ú¾×š^Þêö‹W_x‹ÛU“áÌIN¿ÍÓFý­œ" Eò¬C&óù]=N)ÎS¸Îû#‹vñ§O¡~Ÿ‘{Rj? /4ÜOë½çh|±C¸&)¸4KÞ‡`Úš·Ö±A·=¿ÃÜòªxnå¥C»ò¶ççW^Î’]y3„Xy‚]y;„·VþËÈLù®4L½d›ÖY§ žDÕ׳‡¦ˆ§Tû!KÅðW¥õl/E¼ÁéLòðÏN·uB;#N$¶ùÓ½DÓ˯~í?±,ói’qÂuÞY´ÔiÌTïSV0ë³ð²SM¦ç šúA2š!|ÕÔ’Ñáy’Qï0Uà—íÊÛžŸ_y9KvåÍ>`åevåí¾Šd„´ Åˆßßc$£éä1’ÑöòÉh;ù*’ñ D`ÖÌ•íì=Þ?>HáI”IJ³üEi™ÖH$ÎPx"­7FPcŠëí­£A€råã]¦Þ©¢'T"Y€ZÛ¼*z"ÑúeN¡±›:š¸K±´ ‘^ÜrÙ …—gÓS-†2ŠÃƒÂ.ÉÃøžåðDÑò¢O±>Êtlæ×|…Z”ÄJÉÅT‹^ššø4LVMÞ¼¨š'©õZâµ°EÕ<ª«Bd÷Cbi–©¢%ÉkÍ5UÃáyZ^Zqã™U[Ê• KBS^åòÖËæ›ÚÆáam±ÕÆÃÚÆáA}L,ð!m‡W çÖ º`ê‡WEßë†ÃcöYæ’U>uÃá1`¶BWÝ:;Ãá±ö•ÃPŸ¡ð* ñ2gg(<ÚŸc«I<Õ³×UW ú Ô²7ÃRAÚTÉv‡ó[—ËA•<É‚‡™¦ð´Qß Ãƒ]žTÉùÞX^yfÜ8¼Š‡A+ÇUò„à ‰F9±Kñ£¢ð:ÞÔ é)èIÚ2Nä ÎÑ<9<&›r7£@ªÜx[_ïeUòDf›Ù±gålH¼€1¯¯©Š'|$+i¦&œÃ¸Êë€WÞ%6½“ÃÛp;½Z©žY‘ܦ›ü4êZØÎ,ÄÞ¿ï°E‘8!à¶Ã6ͬ›ÛÚdðj»ò…í¨ÍêF-ƒÙƒ¿Oõ¸c¶ðw±ñï˜MÛ%׺“›ÁìâXƒ²aë÷Ö˜-¼v$nº~`6mžø MÌÆuUÈ_וÆlÞr±µîOÈfÒ‘Y,MË “`5@vgX?Nb#јM/—Ìf“Á ¹¶¤i1›T@Áÿ/âÖ`1›)X›ÏñºpëFá•Ræ_7//ZçÂl’\A6d>0»>,“:;0›–YŸ™þÄìfhK‹ÙC„½r-·Ál²cž)>ÆÙ7K0N†ëÃùÈb6«xá˜^f[ Ú|?‡–(œí–^ à„RwÐffh"ž Mæ,FHXãú0 Ý²D,ÅqG[Ðn’}¤‘<@m@4æR/h£­ao÷qè,h³¨”º4h³ž'.ý6ä9 Ú¬)…÷ÅÖ­hã‘t! ~XI hs Œ ø´iÌæ—knA›ý)·oA›éRðô\ÆýhP»1—D®§i±5$õƒ,žîÒV€á!psï·tr3·Õp‘³¹  ’ç‰ÜŒÄÄr)ÈM¹ :8›eþB¦Á}ˆz»™ciÕÈb÷pTëèüŽÝ”™>Œ‹Â`7åÁ :‹Ýø™Çç²cw ,:W!#ƒ®±‚C¡Ïõ»ÙÔÃ̹aÀ›Ì ›nq»yiví)íØMÒº¿Ä‹Ý¼ì°èt<Û±›¬ùÖÛŽÝ…øUâÊŽb°›Œ"äãÈ´+;vó¶k¼YG–`ƒÝd q˜k¾^»ùKˆ Åòm°›×*ÉãËgÉb7w6´ó87‹ÝhÃíˆS–Nè– µ›´Ð=âDo/H ÝLÿZ¡uÆx ·”„ü;œ6ä¦oÆÙOà&õ73òÜDã˜=w7™MÌ@—¿î.¡—TTýÜôjöøi¨‡¸ébQ¤Ðf?€›N3ÃÕæ”¶¥âž) ®Åm&>ƒ{½ÁŽÛ)âCï'nß’ ·£¤+¹tû ·¹â+0nA‹Û…ÞwR‡ ·éFÜpǬXÜæ‰N8ëãõ· ëŸ^Hþnèz™ƒÿ«­aE#ϲÓÌâ=›È¬T?ÓåLlN¬"MÚŒG^‘ ¥¶UÞ$ 3‹É±j¥icp]¨YiÓÆà\_bK’’¥´¶2Q]c¢É2êq•n¼ú’ÄRQŒ¬M=ë›äŒÕc)Åá Zºƒ$ql7¢[*Uõ{Çòaxûîª0cÕiƒ˨á¼]kZ©«ß;VÚp­d¹+«×<ÓãáŒT“6mQH¬šÅt°’j¦RYŸ;–5 FÄQƒ!ð³/ßDc¥²~ïØ‘y•åÄ‚4i“‚d}tÌÁÁù¨ÔÕçŽÅ÷ø¸<<ËkÔ6…DןÝEmS`IÄiž©I›Äí;û‹¯­ÉÄàA±Î˜×aZ‡®L ‰á‡ÕÅ&.ü5k“BН6Õ¨š•I•ÿFeLYƒ¬M œK öÅ-Õ¢M IRŒ]ŠJ-Ú¢ÀIJ8€˜.dÑ)!‰ã˜…‡«U[0H aõQBik5ôökŸ`S«¶(kÏÓà6nò$6R’«ï3)¼‹ÀÛp[(‚­}à6ÜC E¦Äâv¡¦Üáf·3gΰ¸M_ë\<„‰¸ãva!L× gp›±Ú•¯!f< ÛbÕj÷ÚòÛ€A Gr}P¢¶ù=G?|/VnƒÛŒ…PF7ƒÛ´”Æì¤¥ï¸MKià\›Áà6éDI]{ͦÆm¶AÐòypÀ·É¸Í#aq›Md.†Ââ6m¢Iˆìpàv§™»1ƒY9p»£«^x)‹ÛY %%˜?p›—.¦äŠùµ¸6šeq»Ó<¨Fé ·{ Ç+Á Åm¶a¦ÊˆE±¸Í`¡Šþ@íÎTÓè?¤´;3qcÉ£Ü~´æx µPp-ŽP9 Ö28Ü–ƒk6`Íš^ç|X°&‰ˆÕ¹ø Ö5{+.K9i¬›,$“ó׬لºËZså°qxåhÍÐ<,W–Z‹Öò= ¬Z³¿–æ35\ãpæ¹üù€ëFw¤ä/W ׌,ئƒ±xÝ„À’, ^³øs¦÷S<ñš`Ièû×dî’'ÏÞ¼¦[ Šux(ÌYȤ‚á}µ~ÞMÝYÈ.jb•¨²Y–<î¨,c!›~;)¶+6ÚBv”ôldKYfÒ¾CÔ¦C†SB¬ìH_¹L~Øï˜Íì•#Áy:1›Á@“9Þ0›8YyVOÈfÑ6¬¿?$mF%òîÆ›§±%ô,oº#¶g”—'bCÿÀ¡tÅç±›/²ˆÝà‡ëa\± «ÔB΋—t«›ìW€„~EóoˆMχoˆÍCY±1½…ظ;Xíü”´¥šÔŽdl’mŒ‚ë A {!ËDmpME ãwÙŸpÝ´ÊØ´ÏX÷ ±EPè¸2ۉآÆ>"*7à†¬* å¸b^ÜàŽŒôejz ¸{ÃËÆqQàfZ‰M¢Ù7ŸÉB8Þ…¸­—Šn¾/~{7õW\qWÜ¡nŒedÇèe€›LaDZº7ƒœ±‘Âpì´ÀM¢8tùiYàf]ÉÉm¸ÍÃn…ßâ6¿68örà¶8,Qëm'ns(²§j9p»KòQ쇱èmüyæîî6q|‰öÝmÞ–‡iíO’woöÿ„yñ~‘Ïúíé¨Eß­Oóh=÷EßÍw{¿á˜ÜùéyúîzÅÑïýá«è»\$˜‚n„¼÷(}‡K3ð ‡ ÉÇÿfè»u׉¦œÜuÊŸ?‡êòGxÒ¡¥qlÏÏÓ8²v–Æ1CøG†`i;„ç <µÇ:%¯ç¼Ñ£]{Ûõ¤x•ãdߌá#r¼ÊìêÛ1|‰‡_±îËì@`zŠÅ³½Óšª."A¸)„Kj,„²h<.`¹â¹ZwŠÅcÜbsWMªÖ“eñ ‹æ‘|©Ñ¹u‘xôfØI³6婬¦ðb©T)ëî’æð˜ E €$i«šÃ3bÝ;eYÈ&ŠÔ¹âðè9of ½êž%EøËÀ¢šƒ1è4MãI¿Àê‹ÂtÖ=™Æ…ÌÌðØtEŒ=¨‚"$.¥Ö–ð=ªr"äÝàd|1i OBµ2ùci«šÂc™¦ÊÌ–Ü?=©b"|f˜ÏzJŠÁÇ4¼_MU3xK×Ê5Êì4ƒW²qW|žX£Â°àwÖÛËcTßÖÍCTßÖËT_e‹ C ýy¦/i¦ï/ÏÍ“[ýNúCßSøÃêM("±t—c§]", }`µñå3šðo£ÑX¨È—ÛúÝØ½FÁ{?j€ÐÝ´ S(=*%Tj£›.Ö9ç*„«NáV_W( TqMQ~Tؽ¨èW#;®zìR[`ظãh4¤_á%St~øðõõc¡0ÆJè.Çž™ÛNê*Œðn¤‹¹p¼2ßt‡õÞÞ…¡ôÄ>êÃ;CüÑT挲êÔñïrìdþVm1²´‹ú+4‘§»ÔÚ ÷gÒe¢±JN QærÕ4ÂU㺾»ª±³-SÙ/×h‚áÿhf†º®šóÁ€K7yfTb‡QŽß¬,‹†Æ$¡£RNX»v­€4Å#Ê,…דä6c#¯Ð “ÆHHÁÜY˜>J•ÃÆÄO!\U¿|ÖV‰ÔYËâŽåA#{odá¼F=”±Tøï,ß %^!Œø#m˜` ¶T…ÑhÙÀŒ¿¼È-´UšÝ¯œ0ªñIc5| Í$‚³P2h4„ K º;¼’ˆó¾r°e¬m¼&õþ¹““äÉ u˜ Ñh8AÆ!Ìxj3œ óo1ü$Ž7¡æ?7r’}ÕË=³]Ó‚UjÜÇÕwà Jz.ñ݉oﵓimwAH´¥Wœ;™,^[[.¸dMœ‡Y JSƒ„Òîs<ƒhï̧®”¬h¬Rócld†(ºp'^½î~:ua£ú÷I[ï­²·á¼B)|uâ<qæàǶ±0O…Ôo&|ƒùLƒã톰¡<$f·]9Á6”(h#ÎÊ(¿Xû0”§Á¸Œï~ <JR ánª6Î/ûúÕa7aâ¦I¥g@^" îœRŒ—÷¬ñåßÀx 8Yu3-ÆÓl\±¯Žã¥ÑåvÑä¥$-^-  ò )}&س O‹30çŽ_µ(?ê7õ|…ˆl(ß;¶8£ B9P^¤V…såÅœ!„ºv‚|Ï’l¹Z97ÈwVÉ©©]´Œyሼ¿Ë€YÇ(Å€ÐßyæV«)],7g\DÌwìÃòäžêõ˜äiJÅÝ€‹¾œ ORNêÝ7€5?C:j=\Kl@^ff&Ý@žô!ÖÅÅæOg²×ù¨]w‡yš„YÞ/ø|‚¼¤Qõ7ű<ë­yÒ~¢¼ð™1ÆtÉ2å‰ê!Im½å9?+g†ò]B}1¡C‚´(/@Ù‘…qƒyN-DW_ðX˜g¸`²ŒTŸÎs“DHu~LÅyöY=«qB,ÎoöèAZÄ—hþ}¹áÞO&nP©]æ ÂçP£<ëkîPÏð z%iß^"ÔX¢ RúÏt’¿©Ç‹ˆuÜr?;ù.Õô’*µúŠk³ÿÇéó/¹é¾w/'ÿBŵhÕ5ýÞa”_$ÿ4Ö*º“â)Çõ‡ÿä1”ßüú·¿ûý~õ§ýñ‡cLz×|áæeMµ.RɧDû*ˤ•7èdSúðÿûóƒ¥}U•X­K7U´l*Sc†7Rh#“K˜ÔFý[¼J‡5ƒù°îbë '£Åa›e[÷b²éL=×Ëeærtð}¤sê8¨Ÿª”H©Žôe¾«tNµ áòŸ¿ NUa©b“¬œR™Þ©TN•e«D– 8Á­*,•´lÅàG\U`­Zqrb[ )³]5Ÿ‚¹œXiHFq¹Ë#ýÈå4îtç0´ °Åb8–°«´=•–£“øœµY…¥rÆ%žXa q/÷ï*Õ7¨aŠ%/DU†…ßc¼küf`hø qU´'ûOÿoýq´-à©™oç˜~Žma•Ð $7§,CÇ÷·ŠÒÈåÔså´pQGi¶e•ˉÓ2~©ê¯ða>çõæqâ¢páÒ¨œŠª¿RY°~Áw(*SÜzXQõWdQ˜yøë‡ªò8É Z©Œé‘6•ljý‘?¸7AUX8Îȃ?M%râ3ï÷MU`á÷\¥GÆõ=•ɉßKmFT†¾J°ðk¾Æù ]%r’Éõw^ÊÀtXwÁ *µFfuŸHúæ.$kɤK]¬Ñ©0PAuãæn¸¡Ü…ð ªÙVüІjÞ‡øþІjù^iL5šv¨f›ºc T7†‰–4ÌŸTóÂÀѪ1“LÉÇà³°CuãYïëÕ5bó îÅ:›óPƒ×”øYóÎEÓpÝ =uek¸æÁ&½#D \7f_T©ášm¸;ÀÎÀ5gCOÃu#áý‚r×cñW£~˜ëÆd‰ë h¸æD)Ä0pÍî$ãáu×k¼f›RÓ ^sa–÷Úx¯±\¤eïWÐx-o—ÜÄQ Û|;rE#ÆÅÀvsL9¥Û|qy—šls”®Ò­QÈ&Ûlë%Üf`›Ï¤\ͶØæ>Â5<ß\Ã6¿§2‚Ü–Ù\’Ó­ 2eæÅ%?I‹è&–¯­¹C7`–¡åã”X䦿•Ë÷¤XäÆÌ*ÑÏ"7‹úaÛu=€»0Ûåða0¸ÍBæ@ ÑdaM®”ܺÐ]¶ý%Ûi¼?Ê%żvL?ãïÐW‹×´¸uÜŒh±ªëÈŠÖÚ·MãµãÕ±ðÓˆÖdˆÔÉ7¢5Iu·„?#Z3P rðlF²î…,á½d›dÝô%½IÖ,Ý<φ•¬™s‰4›dõ¢ùš7í¢ÛoùšÀËb£þÀk¹âR^¬|mŒC¾f*üZÃò5¦QÉì»|MO´kx]é¾”ot¯éiênÜÄkˆúêF4âµ”iª‚Z¼¦ð·”ÕM¼†^ãÖEjäkÚqJòp3³ò5»“b픯«‘KŒ|M¥ƒ‚*1›¥®X9Ý1Õö%uY9›A–nb…•³+è.NiÅltÇï_¡³kÑBå!fcšV›³ƒ¹áŒœþ”'r6ö‹’FÞMmräÃËz‰ÃþÊŸ†ÑŸ­OR›oõþˆÝ³ß4Øz·'úÁµyúŽ‘!„³ßëÃt{¿á˜ÛõéqjózÅkMÃ7P›ÖQõYj3Ƽètù¨ïaQ¿™ÚŒB`ötuîío“Ú\H°0ç‘S‚Bœ)Tõ>EÂåÎ{âv‚K÷üW?¨M;„ ¸úAmš!|µ©w˜*ó÷ÜʳÃmåMϰò2»òvϯ¼g»òf j30Œ©fñ€|ŠÚ´}#\3vmK(áš_H·`geëžXb!º‘­ûÒwÙšéœÓºb­lÍ4&ë&²²uÓœ´•­Y©à5œV­lMÝM^n“­«’r6Ùºj=k—­Y½ú† +[W&£¿U”MºÎž¬tMÈ[o`¥kÖín·ûÛ&^g­©lò5Äœåca%ì¨ê&a{í3±IØÔê&m¶½:­„íµ§”•°“L i“°ýòT>$l§éC‘°ѽÜwÞM²²ê0v²hã°qò§aXgë“ôá[½?a[¼ßã³z·Gú©š>¼?}‡WZôáõẽßpÌíúô8}x½âµ¦éèÃÈ{Ñ×]*Á>J2-?Í.¼‡©üfú0ñºÄ ˆK¨üm²‡ ä<²oZP€ó ¨÷)kW>‹p‡dzþI†`9$;„ç9$Ù<–C2CøöPï°aÙá¶ò¦çXy‚]y;„çW^޳]y3„¿{YÕ¡EQ¸žbm±‡¶“§ØCÓËìaJ52‰•Ë,ˆø³ìáÿßÜ™%Ùj$Aô¿SÆä° -¤Ô½SzîA™Iõ$®éO¥|\¦ä@ºÇ°íêþþ o³¿æVˆw#46n¬jÌϤ1ŸÈ«3º¾x¨~_7Q5P´e»Ï­ ˜«Û|È_0Õýˆ–3“I?Bçš]2ú1W7&(¢ãÍ] zæêE-¬l,(¾êaš¨UëÕ¡°ª—˜±ƒCtð¨Þ¨GôœÁê¿ _A£T”÷œ±Påâ)“¨yEÒ²}•Œò\^ Û‘^‹rO=/õ²Þ.š¹×ŠÖ§è¸y…"¾ä6ŒjëÑtÆó‘ÄS»ˆs¦¬nÑÖ.Îag¥®ö¡Ìʼn,—:Ä9Äí*÷Šͽé D‡9UîØè:NçPòTƒ{®b=®Ûœ,n ¾Ö-4ˆ÷„µ¡Ð=Úš2Él«hóS8.UƒjpOXs³­©Œi=mËÚFè9 ªÁ5aÏI¿¡mm£²’Ø ø„c-|²¶‹¸[aý6{ˆnÅ`ùAâšlö[QhEÄ ˆj.`´"âºôŒû¡k‡ˆ•üÿv$÷0ܼvˆ¶Q糫É=lõöwZMæámlbEçžnh´–¥²ÝLÒF•Ù×DÚh2ÖO©j aé-ó0zß%JenÏï‰ÅÛè0V\ÐQl7D‰xЃbŠ‘çª(µ‘WûJm” }[¨½ïÑ®ÔÞwreÚêñÈ ´çQÌûëÏ€@é‡sÞŠ”@Š9o'О/dz>Úð1iê ´áVZAð3¦ƒ¡möçYiým¸¦ÔŒN 1Ê"hÛX$Ü ´q(=ªÚ ´q,s‡´»õ]ó€6^Õó·ýÅ3ÎôÃJ 0ÂlØ%L¶Y¾Ñæ@¨mñ5,Ôk…Úç˜+àBmŒMØßç,Ô¶ý…ˆÊÔÆfÖèìlø%ÔÆØ¼\·5!Ô†KÞ´@;ùçíÓ팫ÂÐ>sbý5,ж«I/†¶m‡’ñ›9LmKú‹Üp¡¶y9#ÔÆÄ£œ.Á6~³_‰¶Ûf:/þÝ)ØÎ§ÀضKÖ ÷ì#®(‰à!÷BpÛ¡ÁÊ}D%w§š/î2ŸiŸ Ý•-ŠôÅ=¸~FB7Šeú£—ؽñMWv[‡IÏ vc³Õóÿ”Ýeç”áïJžNb·Lie÷n- o2±»•Ý ŒZ%vÏ%Ÿç *ºñ“QDÑ]¾¾avA¯Öo?´m̃² Gˆ)²áÜ®þ‘¡ÈFèæiƒŠì9Ñ:ý¦ yÞhõ}½§/dc-³úc¨ÌFõqC'f7kàx½N³;‡&f7N¡OÌnœ?œ˜Ýøõ˜=?EWçÊÌ&¿,1»pšsbv±¦é÷eh°tÉ2´×ÈîKÐÞÉ¢~0›2³7¥0{nǧÀÌ^¥ØNböLØ3ÆJ‰2‘N®Ä^‘®ìA™bïdõ öyÓ„ÕʉÀ¦Äê•›/«ïlû±{ˆjöWý?ˆ<§ÈiÿyJëÓ?üvÿoÈ‹÷‰ü—Ïî•Jâý׬¼qŒp¯?>°Ûû Ï‹½î ^§xî×ÿøñ¨–L½Íç»mï:ˆs}³!±yÛ¬;ÂçįU´@Wò§…,ê¼Óbð"Íû, 3:>a"b‡ÉJ’=ÀJêWU²’ôÞ·’lú¨•$‡ð‘æØ@ßî÷]DÛcº÷²ëOTÖµcЛ¯ÇðÒºöHëÝ—cø'ŒÄù¿Ð‡£# n¼å$¦¼d%¦½¼å%ênÞ0dìÍ*ú¬üI Ó­±™øÿßßKE„ÐKÚ4bJ/s2 ̾p Ç–Ÿ"ýpl•Wk½4ÎlÙ9G¨£‘_(•VVxþCàØ' jìîhšÙBÒ’$ÃRì_#ÂqC–bç¢a½R˸¿Ó· §‡¾aáÄwMCè<â'’áוüÄʹPö#q UÏuäÞ û‰-ÊruÏ„æD~"Ò´ÆM€;~âÊüú(ì'¢ÚŽ—ëpAÝŸAÉ«Ðù℟ˆúA®Ø…ZÌÀÚ¤,Ò±P‡;½(ë9Ç#ÁÒá#Á¿'?Qò:κMóq¿TÑú†á'Îçép;n NŠ}þòí{Œ­±¡ˆ p‹Yc_ØOìlŒ9 ÅOÜâÎ t¿ ?±Eöß(‹º‰ä ŒRÈM<¾æ3{ë:íoÂM,,Œca7Ûy5áÉvÑÔݽÞ7á&V.p7Ї”G(u£v?S£6v;§÷t¾á‚¦kˆ#£±)KõUÈNDk%×!a'¢jl‡¦7ì'®‘ï6zû’dĦΠû‰ð}: ,™ ñbŒÆvbtä|«£ÂìÕÏçgbÇüDeö<ŽõœÏÌF¥0´(=«,+³!ÔF´º2ÚŏÄì *´‹˜7 í9º­0 ò⪑2ýã†+³·ÁÉUJí­Ë™3¶¡._aÉØî[¼ýÛ¨øI:¾`6É ¶Íª‹¢ÎŒmK¶‰Jr‚m+åJ§ÇضíÂnl›2Ö€`Û$è#ËØ6™ÙÍ¡6~r­ñ&`j[ÞdäÀŸÔÆK.µm&zÑ’¦”4&Ø6‰–vÄܶúµQÔM¸mRr–Âín¹Yq™Ü¶¿H¯`rCç¤S!76£”\!7Nê! ¹íP¢ ûTÃÝÇr›ºKØcr›‹ÛJNFú¡ÛÍù-"rchDé!· ƨÏ{zBnšc6 ¹“ù!ä¶‚š‘ò-ä¶T_¯+ä¶È€øTr›Gݹ&„ Qq/ZŠ ïÖ#ÿ?Ãû@ÀÀÍÞ„…÷<©ÂªšÇR%xŏïkƒ*½Ç%z¯ì *½QÑ0Ì:¥7šÏ‡1÷ 7•ÈOôžØ‰r‘zS²t¢wEGÿzÎô®Á>¡·Åƒø+Má½ó÷±ÂMŽ÷½´Õ")Þs»"ðæoç oº ‰ÞøFóŒÎDï.À½èÝùMá½ÄÛ?Á»ÑJIÙ=ƒêÎ&v£­–&vyŒ3»Ã¡MèFÏZ¥PtïJüŒîÍãJº±·x«*ºW®‚žÐ½q¯‚„î+˜>ÐMA™ Ý+G6>ÐMuè¦Ýetoñ@÷¼’×âFÁ)è½¸Ö ¸wþvVr¯RÜ4‘{£(Ñ Üëœ.²üó¾ËáfÜe0Ú‚ïl݈ðjïÆoàåñp1Nð•=­+›Œþçì¾É²ï¿>²ã•Fúóu§ñ>Ë{ÏíoxÍZQÏgzG4Ç«^cC‘€e·–ï?±5ÿ¶×ØVH!ÇÒæÛjÿwzL… Ð+“ç†ÎûTs:¬äf¹« ¾¸ËÜÎQöý×éxXŽé >a;ÓQâ®#Ï5DmW¯ëg€}G¦ ûþÄ 8+/ê ЃøÀ °§;Í9ˆÂy¬’„Œ¥¼ÖÜ1íä%ç1íå-çQwó†óØÐÙ¤™Â²ý‰ñXØvüß{­í+U_ Ò¥Š~—Ë\Ê£y“5k!´WBêü0ð:@sL„•88Y_çn=ã{k,Ðç`±*M;ž,þ=üÌ9ÈZzÍå;Ä;‰!ÖÇ—öslþê2nNbT`œƒs;K%ë2Mj¤Ðì9X¬`ÓùÐ"2J¶ÌÁf›N¦énµ®‹ø…+¡‹‘hoèç`‹D«øz¸Ò7¿uÙŠ\æ†î¡Í1ö"Ç™Jyå ÎÁf•›ê<¥³ŠQTÐX¾®«¾Û–K¤}ÍA¶#‘éµ2A¶#QGÐ]Úu…ø°â{o;³á¼© «Þ4oo·-IðŸƒíÊp$ËkžÞbœl&ã÷Ï™cá¿&²[.wS²sáÚu=Ä–¬z£kvÎd;…‰{r¬‡ÙçL>‹1c^Ôó`kò&K´8Z+{“ûrAâŠÜ;j”·¶%å:.45ZÑJNdÍÁf`Ή 5z×Í´X'˜s"w©7‹–JŸóû½”s»¦1tŸÆØc´ðœƒlQÖ¯^Ü›cÉ£ÜÎ*iö«ó(­Ì9áÈE¼õ´’¬×4¶ EÏ¿šƒbSJë²y»Å§¬äSÎ1ËŸ¸§ñ1ß 1S·ùXUŸÆ–h³ç4³¡¾iR&Âãñׯ¾â¡ÛÔBø£S¥·Lø.½s•ðxöÊæ à Zày¿0å{ÙY±S¾ïKà'ºï»g­{áÂÁ wX•TâKáeš'šÀ’v‰Ú¤ wü,}Ë)Ü- P¶$¸[£È0çîÖú,2‹îøUª«p?“=<úFán5,¸cK24”î&|‡Å¥t?“Oâ @è~f­dºŸi=^CénuCAU¼›¹×¼¬âÝü%>‡CpTwÚœâݲ;ÝGÆ;hÝŽÙñ‹¦ £ÝTî¨Od7ñu´xÑ´$×x ì&ÃFõG» µ¿°›×ȯç.*øÂ!AŠöTÆVÑn,Ã,´ÛñDÍE;~u‰À3E»OØ,ŠvK­ŠÜ#E»™>L¥h7];Ôwe;æ…­)ÛO×û±‘áí7ÿÍþŒ‰ñ[ä^=¿Å4ûŽñûŸ0 ò— È_¸Fk¢üŽäJD˘_¹¬DÂüÁYdôë|æèW•ô’užHìΰñ¿!½WJÈ ïÜ>ï zŠ!É ß¹’pýÆÍ‚èwv˜3èwýÆÐ#Ü…ÞŸ úMÚÓ%ÐÛËÅã"Ÿ 0+è%‚ ƒ¾ëšcw“ó;ÄWn®œß¿!ûàJ›O²¼hQ²[Ù]¾½|RÊxßå‹îw^+(ß §’f¾W®ªžÓò'àçŃe¼~ øiÌð=†ræûÆußQ8˜ÖR™ï{Yè\ø^¸æmæ»tSO|Ç˱ÅËèÁwŠË|GyêX„$¾£ï¸×jõRëô¹ ¾ÿöŸ?Kö:N endstream endobj 3 0 obj 31858 endobj 4 0 obj [2 0 R] endobj 5 0 obj << /Resources 6 0 R /Type /Page /MediaBox [0 0 1080 792] /CropBox [0 0 1080 792] /BleedBox [0 0 1080 792] /TrimBox [0 0 1080 792] /Parent 7 0 R /Contents 4 0 R >> endobj 8 0 obj << /Type /Font /Subtype /Type1 /BaseFont /Helvetica /Encoding /WinAnsiEncoding >> endobj 7 0 obj << /Type /Pages /Count 1 /Kids [5 0 R ] >> endobj 9 0 obj << /Type /Catalog /Pages 7 0 R /Lang (x-unknown) >> endobj 6 0 obj << /Font << /F1 8 0 R >> /ProcSet [/PDF /ImageB /ImageC /Text] >> endobj xref 0 10 0000000000 65535 f 0000000015 00000 n 0000000145 00000 n 0000032077 00000 n 0000032098 00000 n 0000032121 00000 n 0000032551 00000 n 0000032420 00000 n 0000032315 00000 n 0000032478 00000 n trailer << /Root 9 0 R /Info 1 0 R /ID [<83BB8D1597357460E59660EE5E5B88C1> <83BB8D1597357460E59660EE5E5B88C1>] /Size 10 >> startxref 32632 %%EOF blis-0.6.1/docs/graphs/large/l3_perf_tx2_jc8ic7_nt56.png000066400000000000000000003107331360743507500226560ustar00rootroot00000000000000‰PNG  IHDR¬öEYa )iCCPiccxÚ•‘gP”‡†Ï÷}Û m—¥ÃÒ›T) HYz•^E–ÞY–"bCÄDiŠ AF¥H¬ˆb!((`A³HPb0Ѝ Ü¹3qîüÈóë™wÞ9çÌŠ*’*àû¹Ø³CBÃØð ‘¼Ìt®'ü#Æx ÿJtL&V Ÿ—Î ¹ •#H Ç€•”.@Γ€ÜfÜ_>̨¿|˜ü?@¢Å}ãQßø÷¨pù‚„ؘ\¶Z¬ '’ÃÎôs±g»98°}øi± É1ßü¯Êÿ€ &Wà–¾…Ÿ/`ÿßPcC##øûï|„5ø¿ÿ€oziœEìÀßYT5@÷é'gjÇD ºîñ²øÙe8‡ÏÁá+ñÍøNü ü(~ÿ@ °šs‚+!”HØJ(%%t®† S„E"‘(CÔ%Z½‰‘D±ˆXMK)Hq¥b¤öIµKH-IËIÛIÇHKwHJ–aË8É$É”é–y&‹“Õ‘õ•Í‘=&{Cv^Ž)g%Ç“+–;+÷D•ב÷“ß*B~P~QAQÁE!]¡ZáºÂ¼"KÑN1Q±Bñ²âœCÉF)A©BéŠÒ+¶$›ËNfW±ûÙ ÊòÊ®ÊYÊ ÊCÊË*š****ÏT)ªÕXÕ Õ>Õ5%5/µ|µ6µ'êduŽz¼úõõ% M`½Ý³šÒšnšyšmšZt-[­ ­F­‡ÚmŽv’öQíû:¨Ž©N¼N­Î=]T×L7A÷¨îðü‹5©k׌ëÑô¸zÙzmz“ú,}Oýýný7ja  ¾š&6>5’0r7*0ê5úÓXǘg\küp-}­óÚk{Ö¾5Ñ5‰19fòÈ”aêeº×´Ïô‹™¹߬ÝlÎ\Í<¼Î|œÃäøpJ9·,ðö;,.Z|²4³XžµüÃJÏ*ɪÕjv溘uM리U¬#­¬…6l››ã6B[eÛHÛFÛvªvÑvÍv3\mn"÷4÷½¡=ß¾Ó~ÉÁÒa›ÃUGÌÑűØqÈIÂ)ЩÆé¹³Šsœs›ó‚‹©ËV—«®xW׃®ãn n<··ws÷mîý4ž:ž|Ï^/ÔËÝë×Äzõõ©ë»½ÁÛÍû÷3MŸ ŸŸ} ¾>¾µ¾/ýŒüòýüþ›ý[ý?Ø”< Ô Ì ì  j Z v .†„l ¹*šÚF k[Üà´áð†épÓð¢ð±šs7ÞÞ$»)yӥ͢›#7Ÿ‹ÀGG´F¬DzG6F.F¹EÕE-ðxGx¯£í¢+¢çb¬cÊcfb­cËcgã¬ãÅÍÅÛÆWÆÏ'8$Ô$¼MtM¬O\JòN:™´šœÜ‘BJ‰H¹*‘š”ÚŸ¦˜–›6œ®›^”.̰Ì8œ±À÷à7g"™3{LAº`0K+kOÖd¶MvmöÇœ œs¹â¹©¹ƒ[t¶ìÛ2“çœ÷ÃVÜVÞÖ¾|åü]ù“Û¸Û¶#Û£¶÷íPÝQ¸cz§ËÎS»(»’výR`XP^ð~wðîÞB…Â…S{\ö´‰ñ‹Æ÷Zí­ÿ÷]ÂwCûÖî«Þ÷µ8ºøN‰aIeÉJ)¯ôÎ÷FßW}¿º?vÿP™YÙ±„©ÆÚuÈëPW»¢¸âýá͇oWšTÖ¡É:"¬ò¬ê©V«>P½R_3Zk_ÛQ'_·¯néhôÑ‘cvÇÚëêKê?O8þ¨Á¥¡«Q£±òáDö‰—MAM?p~hi–m.iþr2õ¤ð”ß©þó––VùÖ²6´-«mîtøéû?:þØÓ®×ÞÐÁê(9g²Î¼ú)â§±³gûÎqεŸW?_×Éè,îBº¶t-tÇw {B{†/¸_èëµêíüYÿç“•/Ö^’¼Tv™r¹ðòꕼ+‹WÓ¯Î_‹»6Õ·¹ïéõëû}û‡nxܸuÓùæõîÀ•[Ö·.Þ¶¼}áçN÷]³»]ƒ¦ƒ¿˜þÒ9d6ÔuÏü^Ï}‹û½Ãë†/ØŽ\{àøàæC·‡wG׎=>Š~4û8ùñÛ'ÙO–ŸîœÀO?{Vù\þyã¯Ú¿vÍ„—&'_ø¿x:Å›zý[æo+Ó…/é/+g”fZfg/Î9ÏݵáÕôëô×ËóE¿‹ÿ^÷FëÍù?ìþ\Y˜~Ë»úgé;™w'ß›¼ï[ôY|þ!åÃòRñG™§>q> |þ<³œ³B\©ú¢ý¥÷«Ç׉ՔÕÕÿB,¾;E^ cHRMz&€„ú€èu0ê`:˜pœºQ<bKGDÿÿÿ ½§“ oFFss0ˆÚÿR pHYsNNÆÊ/¥tIMEã$:.$¶­ vpAg’Zó!%Õ€IDATxÚìý{œ#WyçcÀ7®±±Á©æbqÙÀ”pCpœ”Baâ_/¥°ñÏ’¤´îÐ,$»T-“ Ù¤Uv7d§—&ªì&íeòK¢ ½Ø $AœHÜ @Ðpéì‘ÍÅî²5ŒÁÜôý£ú”ª¤’Tºu•ÔÏûõj{¤º*=çÔyÎs;ÔjµZ ‚ ‚ ‚ ˆ”ñ˜¤@AAAQÂJAAA¤RX ‚ ‚ ‚ ˆTB +AAÌ0®ëÂ¶í¤›AASV‚ ‚˜acÈçóI7ƒ8À˜¦ Çq’nAì $ïû)¬AAÄÈX–ExâÀ@ò¾ÿ\ôö·¿ýíI7â ãº.N:…¿üË¿c ÏþóqÉ%—øÛMÓô;‡ °, ’$lÛ†iš°m‚ àšk®ñÓu’$ùçv]Ïþóýó1Æðò—¿|äý ".ýd¼×6Ó4áº.DQôÏcY>ò‘@’$’ob&˜–ì˲ Ã0ðÍo~ðîw¿üuîº.~ý×½ë@ãÐO^ÿò/ÿßüæ7qÉ%—@Å.åßјMÌ ãÈ» =Ç}’ëÑ k¸®‹ÅÅE?þȲ¬k—®ë0 ÃßV(`YÀ0  ÿ<¹\¦iúÇòí®ëÂu] äóy0Æüíº®¼?AÄ¡ŸŒÚÖ)oãÈ+É7±ßLSö‹Åbä*¿ëºÈçóp]×_Ø$ˆq4Wé$JFiÌ&f…qäýSŸúTßcI®G¤E$JµZm†ÝÝÝ–,Ë­ÖÎÎN @kggÇß.Š¢¿@«V«ùÛÊårKÿ3€V¹\ö?K’ÔRUÕÿ¬iZK–å‘÷'ˆ8ô“ñaä¿V«µ´vww[­É7‘~¦)ûš¦u]gww·K® bô“×V«Õ’e¹U­Výí2Ê¿£1›˜Æ‘÷AÇ’\YX†»|‹Eß­·Z­BEX–Y–CnaŠ¢ð\EQô³C?óUšàù@‚«=q÷'ˆAô“ñAÛ$Iò= ,Ë‚ªª!™$ù&ÒÌ4e_–å®ëñUzþž ˆIÑO^{%£4f³À8òçX’ëá!…5aDQDµZ :tÅb€ç’Ð Çqàº. ÃýE½ "IúÉx¿m€·@œ´“|³Ä~˾$IÐ4-t‚˜ƒä• æ‰qäúÊt …5ax"¥r¹ŒÝÝ]Ôj5X–Ó4!BO¥U’$Õ†ÿU*hšFqKDªè'ãý¶Þ¤1Ã0àº.YŽˆ™b¿e¿\.CÓ4 (b¢ ’W‚˜'Æ‘wê+Óքቔ¸b*Š¢ïÀ',ŸG>ŸG.—óÝÂø¤D×uär9?Cp¥RIú–"D?ï·ÃgȺJÌIɾ¢(P…0‰‰1H^A€®ëdE"æ‚qä=ÎØN Ï¡V«ÕJº|+ª ¾K/O $Š"Ç(о+WLƒI–¸›0A¤‘(³Í4M†¤o F‚dŸ˜zÉkp¾Ò/1 AÌãÈ{¿±RXSŒã8X\\D­Vƒ$Ipù|š¦‘{$1÷pwš\.UUýØ<‚˜wHö ‚ ¢ ¹§˜ Kð¡C‡Ëå|W2‚˜wc8|ø0DQ$™'$ûAц,¬AAAD*! +AAA‘J›t&Åûßÿ~üáþ!®ºêª¤›21î½÷^\{íµI7cb\¸p.\HÅotáÂ,,,`}}=é¦ÄæÄ‰¸ôÒK“nÆÄH“;Êø=7 ëW\o¼«««I7eb¬¬¬`cc#éfLŒíímœ9s&¿oË,q饗’<¤œ4õÙ•••¤›04†§Ÿ4õÙYÃiüN?iê¯+++3£¬4~Ïiê³£Œßs£°Î#¼æê¼päÈ‘¤›@¤ˆy”‡yë³ÄxÌ›<ÌcŸ%FceaÞú+1ó&³ÞgIaM1ËËËI7a¢d2d2™¤›A¤„y”‡yë³ÄxÌ›<ÌcŸ%FceaÞú+1ó&³Þg)éAAA‘JHa%‚ ‚ ‚ R )¬AAAD*!…• ‚ ‚ ‚H%¤°AAA©„V‚ ‚ ‚ "•ÂJAAA¤RX ‚ ‚ ‚ ˆTB +AAA‘JHa%‚ ‚ ‚ RI¢ ëöö6FÏíõz½ïv‚H3$ßļC2NÌ+$ÛļC2NÌMâ¢[[[X__G³Ù,--¡T*aaaÐh4 iêõ:@–e†‘ô³"ˆX|óÉ81¯lóÉ81‹ì»…µÙlb}}«««`Œá®»îò¿ã¬­­!“ÉøÛëõzh;ApÇ®ë°, ¶m'Ý’obâ8Ž“tBŒó É61ʾ+¬õzÍfËËË€……,--ùnFÛÛÛ8qâ„¿}yyÕj5égEŒëºp]7ôeY}Wí,ËB±XD>Ÿ÷ÿLÓô·3ÆÏç!c°, ¹\Œ±Èóñýóù<ÞùÎwâÞ{ïø}’|§˲`šf,%P×u,..²¬Xçv¦i¢X,¢X,ö”Ãa0Mù|‹‹‹(‹ÛbFìöŽ É81¯lóÉ81«ì»KðÒÒR×Dîܹs¸ì²Ëü@6›õ·g³Yò£Ÿ0Žã@‚0ò9r¹E¦iþw®ëÂqH’äÇC¡P€(ŠpÇÿ¿¢(pÅbårÙߟ[LeY†,Ë$ ¢(Âu]躎|>I’`Y*•Jäµ4Mƒªªþ÷¦iÂ0 T*À?ýÓ?áÂ… ®$ßéÅ4M˜¦ Y–Q,}9E’$AQ_×u}ùâòÈe+»®ë‚1I’ü~ÄÏÃe•<ŽË+—ïà5]×…(Š¡öÖj5‚˲|…»T*…äÞq ßu«sqh’Œó É61J"1¬œ••¿sœ>}úvŠf³éûØ£Ã-²,û \ÜÍÖ¶mȲR*mÛ†(о‚X*•`š&,Ë‚ e¥R ®ë¢P(t)–A¸EªT*¡P(@’$ìììtí'Êå2lÛMäƒH’„Z­†B¡Ó4}e¶íÐþßùÎwpæÌ™©>g’ïýÁq_´mŽãø/d¾ Â½jµ’ÆÇñ-ý\i5 Š¢@’$T«UX–å+“A%³|A†·/Ôm…¸X,B’$0Æ|E•/긮j¯¢(P¶mûÊ)÷.p¥RÉW– …|ðÁ©?{’qb^!Ù&æ’qb–H4Kð­·Þê»Ü~ûíàGqþüùžÛî¹ç|àÀÊÊJ’·”:¸ˆÃL>.‹þ6Û¶‘Ïçqøðaäóy_QÝÙÙmÛ!«išPUÕWDs¹ V«¡V«ð,°ù|š¦õTVøŠðââ"EA©Tê{O\ÑîeÕjµZ-¤tðý·¶¶pêÔ©‰¸löc’ò ÷Þ{/VVV°µµ5ÕvÏ|ñ…/¬‚MÓ°³³’Ã(eh[D¹%•˸mÛ!ÏEQ ªjle•Ã\Y–CÿVårµZ ªªbggÕjÕj;;;þ¶(ç}RQȲ MÓ|Y<ùö³ŸGydêÏŸÆp" Ö××qêÔ)|ö³ŸÚ5hü&’beegÏžõ“M ¿‰$àsð¡ÇïÖœ:uªuË-·´Ž=Ú:uêTë=ïyOëÔ©S£œªÕjµZÕjµuôèÑV«ÕjÝ}÷Ýþ¿9Qßur÷ÝwÕ†Y¢V«µÊårkwwwྚ¦µ$IjI’ÔÒ4­%Šb«V«ùÛeYn•J¥–¢(-Y–CÛ‚”Ëå–¦i­V«ÕÚÙÙi‰¢8ðÚ•J¥U©T’~\‘ì§¼LB¾[­Vë¶ÛnÛÿ•"jµZkggÇÿ¼»»Û’$©U­Vû·»»«¯ð~”õYf?å…Æpbår¹ï;fXöK^hü&’€ÆobžE^†¶°®¯¯ckk KKKÈd2¼ ì­­­X++ëëë]ûqßy8r䀰[B£Ñð¯Ex1žŒ1är9 †i-än‡ÜÚ(IR—kn¥RñãðªÕjOK¨ªª¾[¤iš!ëS/¸ ãA‚ä;>Üu5˜H‹c†¯¼¸¸ˆ\.Ã0Ïç}Kh¡PˆeõŒ«ÍeZ×õPü3†dœî¹Ãƒªª=û| Ù&æ’qbVJam4ØÜÜD©TÂêêª/ز,cccÛÛÛ]®¿þú®ýlÛö;C&“ÁÒÒRÈm†¿ð7Ôû¿ë)KæüùGá¼ës€úSI7…˜xŽÆŠÅ¢oUå–Ð|>﯂=vx_ô˜Š²6ñ˜Sîò(IlÛ†,ËS,%I‚¢(Èår(•J”ìˆE0;uœ‰tg²=EQÀëëÖ^,À_˜áVÓ~n‘±ŸðÒ–†aø} X,Bůã8þç(yçúGÒ!Cǰ.,,À¶m,//'ÚðY€¯¾Þª5ÏSU‹‹‹þJ€ä8AH·yÇ4½ì®Ãd…%öžÉš¯Fwf-—˾•5QÇ‹Ñf,V¦àýÄþàc ~õn¤°Ñð G0ámÛ¾u3Øç‚Iù¸ÂŒ-•JÈçóE1d=5¶U–eìììø“õi,NqE•²wð 9—½à¤\UÕ±&Ø|Ñ5XÎ%{ÚzEVSb?ác® ¡1—÷‡ EµsÁ‘{ÌCg ^]]Åúúz¨¨pÕÕÕ¤ï)paêµÒÌã%ø x¢¤Ypm;µîœîÅ'ýV冊¥ž)^ív^ášœ¿¿â×&Ý"At]̪˭–ÜÕ—Çñd||’Ì÷ãDõ=I’P*•FŽeíÅ´'ëC•±"¼¿pEU×u?–tR AÏ7>§¼÷- ûMgFiEQüXSn)勆ÁDbÌ¢² Œú¶¶ØÜÜŒÜN «Gçä¡UUý$ãÆúÌ-Ü"F sRÏ®˜z\fãÕp¤7£”ÂÅoÇÄ+Ÿ û½_‡zcÒ­!’€Çˆò‰FgNÛ¶±³³MÓ|÷[n5MÓÍôÎáÌD¿%ˆ¸®‹|>ï[÷9Š¢ø¬§±PJTbÚð8QI’|Y ZLyhG0£ôAbh…•‘ò‹¨z[A¸k ÿ#"°íÔ¹p3“…18ϦzÍC}ÍýÀ=Û(/ÀAƒÇšòÄEA÷F~\àM˜;7UUõÝ…-ËJÞýž ¦±ãIÄ:€qÈOÌ®ëÂ4M˜¦é+ªŒ1?I+M…·Z ­°ñ`Œ ]­V!˲_h1ÖýŒöVVRAÈòø «ëz÷HºÿaÎï8ÈçÛÛÜë¾Øu=c?ëz j±Ø~Ì¥R·…Õ4=}ƒˆ‡iš~­Ç]÷~”´!ŠP~ê1pÀ¾üµ£ŸÇq¼¾R*EË÷X|!J†=oðö\.úÞ²,ȲÜs…ŸŠ¸T>§×õÂW*|ç²Ë’n͆1Ó4GJÄì<™)«Ä¬cš&r¹œ_—žÜ}'ËP +§ÙlF~׫6ë<Â-O•J‚ @Ó4¿®jW€uZg¢“n7_F½xL³möÜ£Ë901*þDò³¾EñæôêSþŸ¸eyÅ¢gì*¼CÅûãu,ƾÿÏy±m¦i¢V«Å›43–Ú˜ãRÉ“{÷èè2¿woº,æ„ñ»Žë¢òâßö?JÒôƼ>6 ×~b,lÛöUQ}/]ek¢à  ‰¹GZ–7¸‹ÞÃð:H'|¥pŠo±è ìI+òt]G¥R=ÂB%/éW«Õ(³/1S¸®‹\.‡ÅÅE_öÇA>Ÿc •JµZm6=ÒRÎP.ÁKKKÈd2Ðu'Ožô]N:åï3ï•Õ`ÉEQP(º'®»?/X]÷'Å]© ¶K’Æ7?FoWIÇ iÅ¢×LUõæCÕ*  @¹ Ý’à~ùQ”?ýLõ•_$€ÜíYÍÓ­FÜ7Ÿ'š&Pùõ:„~1pÜçF Xjc¨Uï”*¬¼iâs/M™³m˜®Ùi'Ôæ"7 ο; û¦5O~ƒtÓ‚¿M’ã¿àêî½(•¢/jY^ßçÛ\׈}E×uX–å—,ãï>ÁQÅ_Фˆ*ŠZ²‹ž õ’7ïfÛcº(¶tAèv™—åö #?FU‡Ø6 ï’íÄ1MÓÏÛÁËpÄõànÄ-I1ûðÀªªB–e‹EßË T*¬< 0tÒ%Ã0 iŽ;ú>“É`cc#éû™:QÊ*§T*aqq19¡5MorwU>8y×Dz¼ëFM°x²›½çR(´½+ïkƹ\Fñ‡?ÈÏ@ù¿ p‚½BÏ-·|»¦…cYr¹½ÈŸºÝkŸmOú>ü ³GT©˜&ÝôHlÛû+•õEgg@–a~÷»PEÿc÷ÚW{ÄÈ:ïþÜo=¦é‰ óß›_ñ"ðÖàœýVÄŒçþ!É@+¼Òë,=÷­¿ ã• òk/‡,:žb@“ú}Dz,ìDüþ|1“‡ŒÄYy—$©gŒëT1MotíRÉëX¦éý¿\n»·tŽÓ¢èÉ£¦yã狌¢88¿ƒm{},‰gB„àu#+{¡¼~pœ÷…mÛ°m;Ù&ˆ1àq©ªªúcxµZ…iš(•J”DiZaÍf³¸óÎ;±½½3gή¿þúaYsKð>ÆqÙ2Íð$™±ö¹æ(U«µwå‡éºä)«w='ìÛ x÷ÔézÄ'?µZ[aÕõÐäG½Í¢`Ñò.Æ)¬=0 #²ÔÆ@x qÊÜ`ìŸÿ3ˆÿå§L!9£ÅÙ*¯»Èÿ·$ì®&à<à/ÐØïy£/€Ô㘿ûuH7Ý„òÛ¼ñ³!¿Ú³ÞàÍáEê‹^Õ©o¢Ï]ïÀ¶á<ëG!½ÜkŠ(‚•ª ‡Éý%ªÎ^¾˜ xnqØ÷˜>îRW™èµ8Úoâ\©äãyœû4 O)¦8ÇD±mº®‡ÊÍ(Š‚|>¹¿ã8pöæŽãø9?(^•˜%lÛF±XŒ´¢’ëïþ1R k£ÑÀÂÂVWW±ººê7ïðII?w.î‚+’ÓL¼Ä-\Ã(AŸý¬2ƒâT$){_Ãu½}U5½ °R€mÛ# ÀÎÓ^÷ÿÿWÓoànë|þ¬WŽÂyèp÷–5ð´]CºòK`ÿçãþgó/¯»õÌ“_ê>Øu!º Êïþ$ÉAû+/„t<ÚMýÁzW?ÓuOt}¢\Ç š&$Mö=3ø!‡ÄäˆãY*• ªêô&ë®Ûíé ëžÅcYž‹y”À‹û«r…7ÎâXµJq« £ë:t]G¹\½#¸<»Ù3Mù|…BÁ·ª:ŽƒJ¥B–(b¦0 Ã×&—ßdZaÝÚÚ±cǰ¾¾îwûí·ãرcØÚÚJú~¦Ê0q]ôP¦&ظñ“QôSTxÌR/:ãQxÞ¶½—”õ§ß…ò£‡÷íHÆEé^¹ÅîëkZ÷u{Y‚+û;Ó—¡'ÔŽ³ñj°ÏîCÖNËŠï~Ìœo<Íÿ(½ð[pþùÁð>® è:œ;?yš`Fjÿ\/¿Äo÷c_ƒå?=Æw~©{gË‚úºv¢:MªŸëºŽû´çÁüïÍÎC}EÔu‹‰^ÿØSZ‹B¥«¯Èrw—"¦KdføE™^‚Ƽ؇΅Et˵­ó•Š·g‚¤Z”B¢ ƒ  g>Y–ý“¼Ô_©TB­VC©TòÿÏzMCP,ÁCµZ%ÙMC)¬Ífëëë8qâD(^ucc«««X[[›kKëÈ kÜUäqeou|’î ¼~ àM’‡°Lº×<Å·\ê-äïMÒ¾~/Äü~{GËŠ¯È+Š7éêGœÕ[Z%‹dä2Ž âì0äóžœÇ•×…{É5¾>'dŸ –=ÞÇ0UõºÍß© Û§¿Ü½Vò;W@ýî»Öÿý”—ßA*öá.´þ×WàþÔ}'ßöì®ë?ó˜ÿû·¿8¬Í ]ë6¦ ÎþW8G^ÑS)%'‚é`Û6 Ã@¡PðãQ9c-fŽ c~©—ȱ­TjÇV*Þ»¨RIÄ}?ªëòHŽÎmŒ¥64þÀÀC.—ƒªª}“$ñÄK–eÁ4Mšà3 O®`øä“ÄÔJa­×ëh6›¾p'N “ÉÌuÖ±VQœ½·¯aŒ¤°š&«¼â£uO•¼‰‘(Õ¿ù^xfÒÏG¸“RÉSʉ©ÀÍ]KÇÁå†?v§4+î!`IÜ#/ ï`YÐïÿ^áŸ~i¤Œ³¿¸¯{$Ìnd¡¬½È¿Fg›õϾxæ3c=Çò‡²žuËÁúÝ{C:… xò«‡ü;^Èv$Àu\\tߦû{ ,Ë %M2;V ÇIfRTVû½›zy­ì#Ü;yq±Û‰7Cëø€ãxu·Ÿò%è+M Þþù¼÷4 ¯®^‡{ï½j_ïá ¡ëzì:ܲ,ûñ­4Á'f®¬*ŠBuTSÆH1¬½8þ|Ò÷3U%ÖèËD -N˜~~ƒ<æ–ψù}¸žÌ±ö¯—@ûîo¡\îX9ç®Ñ‹‹Þ0\’(Š™ŽãŒ¶ #Š`_8 ë«7NÕí]ÿÞo¡x×ëàžýJ¼ƒð„o›nÞ^Vmë/žàéÁÏøY8w|²ëΣ™H±+>~Ö¯~ŽÖt½ÝµØOü¤<;,P’Ñ(ÂÝÙ»p]¤¢ªž÷f¥Ò;ÜP}ö]øÒG§¼€p@p]×ÝÓ4Í/åÁkðÙ¶L½Ô¸Êj Øó(E­æ-º0Ö®žx2]zËWÓ„èØ¨½èçPÝ^@µê%Þ®V½¿àÍúúY\{íW“¾µ¹Ã¶íP‚°¸ïEQ(F•˜i‚ek(™RúJaåuX×ÖÖÐl¶c­šÍ&ÖÖÖxY„ç×uLjӸâØiMb̳ª'BA¢¬¬ÜŸ+p›Áäê ?näóýa€Aí?\ö•WLìñdxæìàøŒÙÙ3aLsf”UÀsŒ ®}–Ë{µ·ƒ¯Eþ~1 *Ë”¼¬YµZE©TÊRJ1ªÄ¬Á½ ÃcŒ”Õ”3´…Õ0 loo㦛nÂñãÇqüøqÜtÓM°mXXXHúž¦ÂDb”¦ia šŽ:“iÄm‹(öŸE)¬5® úö¬ÃxþNäYpWš”W½÷½¸áÎ;÷©AéÇ€¢)=µwüºã :ƒn¶ÝêÖuá:ŒR Ÿ{ïÈ}I~ù7=™Øó§e®è+¬ŠØOø‰Ë1ûÓÏAzî×#Ï¥¾íJìüèm]¡€'Ì’Œ‹mÛ} ÅK’×ulÜOu IÞºÿãÌú\pže»Ó4!Šâ2BKÆ9Üõ÷ ŽÛ³L, +'hUµmÛÛÛØÜÜD6›õ­¯q¨×ëÐ4Íï\)æ.ÅFš¦ùI–宺wU d0B·rÊÖ~Ás®;¾Å7bÆŸŠk­Î¢|ãÖh}⹬¸ç ÷¯Fá/F%ÊÃ’1Tó à­ñ¹ƒ}â0¸„º×þÙ§h‹·ëö¿f¯…“r¹½°%&•¿^@¡ BÀ®|%èµÖÍ,Êø°Ø¶=Ð=rØÄ4#ašžÀ§¨Ä %(šƒ Û†jµšt3ˆ}â Ê8à-ÌÛ¶¤›BŒÀÈemdYÆÉ“'qúôi,//£^¯ãøñã}Wk8+++XZZc wÝu–––Bƒµµ5d2{½^ÇúúzbɶíÑWc¦Q惻ür‚V\`°…5Ÿ÷LaãšBK‰95]Ìš|›[~Þ}¾øõ;g÷rÏzù½G[Yâå»þGé§ž÷žvv^åôO†¬¥ŠÒ[Ì4Ãë8ýª6õ»]QôÜôó''öœç‰Y“ñQˆ³ˆ£(Êt­R¦ÁHV.Š^ø¼)«ÀÁí ¦iB–åíÚ~Ð8h2ÎÑuÜÞg˜¡Öííí®ÏÜ5øúë¯÷݆£Ùlbuu°°°€'NøÉ¶··qâÄ ûòòò¾¯þ9Žgob=VQøIX2vÍRN°FGg†WþâÉç£0U*ÞŒ|Ü”ãtÝ›®Çʇ3·ÌŠ|1 c,÷ùðG!Šž(¸ûbä>ê³>AvÞô?#µZãOÚ²$I{¤÷XÂß9Å·¤êº`ï:ƒREôä^×aýöçÇz.‚(oÎLè)ϳ(ãÃbYVòî‘Å¢§¬vÕI]÷ºà5gõ›ƒ Û†A‰–QÆÏðäºnß0"ÝÄRXëõ:Ž;†••ÿ;Û¶±²²‚íímT«U¬¬¬`kkk๲ÙlWù›sçÎð:ÿwPñÍf³ûîG¯ë:òùÖ󏸧ˆY’ñQÙ÷R5†7v”¹h:Èòàœ|³ÎAí d]=x4çÐÂÌìKaÕ4 Ùl§OŸö¿ã±«§OŸÆwÞ‰ÕÕÕX. XZZò?7 ¬¯¯cyy™L¦o§h6›=·=òÈ#¸ï¾ûº¬À£ÂCmïí<–*èþ8ªÂjÛÝ“AŸ+jR*Š:x:È£ÑhàìÙ³xðÁ'~îiÉ7\¸pÛÛÛyqض ˲F*Âu¡õx žø´otíâÜ ìÇü¨·{.Âæ«º÷Ù½ÂÍ7†¾«¼ûÑ "åJþ!Ïk`O•Û®„}î:H?𸱟Ӭ²½½‡~x*çž•1|,Ë ,¤$jaåLS³Ê_3IœƒÔëuœ={çÏŸÿdfeüž4‰ß§|J|ü$S£0ã÷ à¤b)‚ÏÁ‡¿*¬|€=yò¤¿âÒl6Q¯×±¼¼ì¯Ò,//£ÙlÆÖf³‰õõu;v̯åÊ¿ïE¿›{ä‘Gpÿý÷ãÌ™3c?LÓ'J¥Z­Öèzî+Éÿ=JL«ëv[XUµ­Œªjj,ÓÙ4çΛšÂÊ™´|Þ„çÌ™3þ è8p Ò$VÖÙ—=·YQÔÌûºbT÷Õaùáû.{ÌKÂÛ@Dw\«t,ãgÄ®i½k K§n õA𲧤K$™3g¦2Ù ’æ1|,Ë‚a( °,ËϘHÉ^À4Á çº:ä¹ÿærñÊzï7ÓRX9i¿'A±X„¢(d]&–åõã!ññ{Zò ÌÏø=^s•bWÓŸƒ+ß³×ëu,--…Üx9›àjEp{œs®­­aaa¡+îµ_ l&Ó;žìÊ+¯„$I¾_þ8L´|°“Œúbˆrù LÙ 'íŠ_]œÖÀ: ù€«®ºj"ò `râmÒ‘gx¦wÞÌ1HµÄËw!m[;å×\‚½2§Þ1ºéêîIœãì%»Žˆ•„¢$š(:qVWWc%À•´áqaŒA×u?>«P(@„äVâmÛ_m ö‘ýÂ4½&ìî¦Ûy†[ƒ¦1†ÏÂø=.¦i†¼Èˆ¼¸°¢DNfLÓ[WÒ4ÀÚ¼åŸ~Ñë¨Q/Ãð¶ñD™1çj|ü$S£2/ãwù|¢(BÓ4‚€b±ˆr¹L5WSĨsðÖ………®—z½Žl6;”’„»olltuŽ#GŽ@È-¡ÑhL­ãFaÛöôV£bJ‘šâ¦ñ8èãBÚå˜Ü¢Œ£–€g>Óÿ¬ÿýwí#¾õµþûüÏ’¶Ú°¿þ¤ïÿN×q¼Ü°i‹°>tõ¾>¢?³ ãƒpÇ/ /Š"DQDµZ… ÓMÌáºÞD6*C¢ª ÇiïÂdétoŸ —Kçé]×ëSQ‰º]×;>%9žcd»¦iÂ4Í™O¢Â0úÖø Þé ÃKvÉ3íašmç6fØÈÝù«ptŸÇqÚ†IJU6Êy•qÇqüÚØù|…B!TŽ“þ’Iˆ k6› n6›°m;ä´3saï…mÛh4eÛÛÛ¡?À[ÁYZZ %p²mùŽAcšp—°©GauÝðàÖY¶†H-³ ß“ô pœŽŠJOmtù…3W„‹ö XU=ñæó öéK =÷ë]ço_çÁ'CxÅ ÷íùý™ƒišÐ4-ÔAðØØp-0êþø÷ºîý[×\îýß„ùè­¡]-«} ^6†;èðô¹œ·O±èŸ*¤Ô2î~–å͹‹EÏÓ8—ëžsð¾(¤¬Î‹l÷Âq_Y‹“ã@ß¼ÎëCºÞvé¸|ÞU*µ«)T*¡]ü´Ÿ}ƽ åÍÇ£ôGWC”Š‘çS˜ó‹24Z¯I ³ ãq`ŒA%)cí,¾€7>+Jt’$Q„_@˜'ϫհ‹€ed¡Áò§wž%QlosÝö¼EUÛ¯Qì.×Êá—î|­éºwÝyšÓüÈv/LÓ„ªª3¥¬ò–^,aœ¡|ð&Hó[žâÚo"²çòk½ÿ‰(~ò(—\(Ú^'2Œð…ω{ Ã4=7_]ªU¨¢×¯xè¹ßÇJ%(`YG!w!,+‘4ôó*ã®ë¶m”÷=Aæ#¡X0ëeyãØ¢è lg?qou“Ïu: ^'0Œd^­<üðíßüÍßl½æ5¯iÝrË-­÷¼ç=þ¶S§NµŽ=Úºí¶ÛZ?üpœÓÅâá‡nÝ}÷Ý­»ï¾;Öþwß}wëÔ©S}÷©ÕŸgww·%ËrKUãíß—V«Z 'ËÞ÷ýŽ‘åVKQÚ Ð´è}«Uo_U³¡8ò2M†•ïV«Õºí¶Û&rmUU[ÕN¹‘J¥Õ*—_T«]ò*Š–Ë-õú <õWþ¾%^rßDÚzИ”¼ŒÊ4ÆðI"F f dÙúju¤—DµÚjíî¶ÿ-˽‡x¢7IŽáIŽßã0²ÌO‹ÝÝV«Tjwˆ4­Õ’¤VK•wZQ3Þü>$ËÝó®=j·žj•2§¼þ»³ÓÚù»{Zâ3¿ëMÉøÜ«G3%)Ð Qìê´µš÷uùÍŸnµ4­U¹úþ»Q[­ÝonÏýÊeïKEñï}w·Õ*­žkí¼õ÷[­Vòò’öñ»“R©ÔÒfu •¤ÞïUmË3ÄAó·J¥-§½ôEñú¿~ðûJÅ“UQìÖWvwÛ“ºÝÝ‘õQäe …ðâXy±N–——ý“¤3õö$à cQáI|ïÂ…s$ ¢è}ËÐÊý¸‚þó’Ô?&•ûXŠ{5(%©÷Jß'E«Hƒ7 iÈw\‚«ã¢ˆá„`Fõ(ܯ¼A‰\ÌW”Ûñ TQ¸W=â3¿böHRÆaÛöhqMAÓæðlî<#Ëé*CÄ#Í²Ý Ó4§›= Ëòʤ+µsoò¿'ûjàOþ⟬è˜&àöß_ ëù²Ü{á“ÏÛ¹"Éßa\/àNo±éØYéå—ÀVÊX;|PJUðRo¿Vª¦s3ƒeYéŽW’íà¶Î¶[V[!º/nå®:œ^ f¹­8öªÿ7èYн½¸7ƒŸ¸Äs_ë1ºŽ§}ðƒxüýÐP1–…ussËËË`Œ1†ååelnnN½0ý$±mïr‹9m%ŸxÂ¥QWñxVÖ`Ö@²t3QùÞ{«ÅO–Ã9œ‡—_}¼¢@|ÿÿ‚p÷_%ýXˆÆÄ= ú`š^¦ÞUµ ©rŽB–½ŽÒþ®gÀ}iG©´=ËPîæ#°¿õŠÐ&?IpÀ×Þxç“ 쥳WQ €×W ïß~²¥1QUïÝè8Þ\>jøERX‡Åqœé&J¦Ù;Ão”2«ªmwà¨ûâ‚Ñgz²χ§¤/—ÛVÀn£[©„³ëëøêµ×uú +wÿ=qâ„ÿ·¬N³0ý4à%±dÙsÝî4Nº®šÄ$^:KQ ¤¢¤Ja¦ÿûÏȱ{Q0†êk»Mê!ß{îüêÇGÏc!Òü¢!掉ö˜f»¶)y IàºnºÜ£à.ˆXÿú<7GxßÈ2v» õ¿=;¼¿^0µ¬ÑÂ踛0OC2 …U–½ÛôÊKIõ›™g¾NA+i/ãSgŽ› årÿL¼<>5MUDJ¥ž}yÄŽa Æ©.,,$ýX†&è«iájœÎUš±çÑŒE»órÿî^ÇðÑuÌx©$H°¦0“‰ÆyDœËýRÓ[i3 ¨—ž†üú§õ>¾R™Ìl€ b2tüjÐ-g\׳ø”Ë©ZS$–e¥Âºê~ì‹ý md •ßÙ{>‘ ÀãY9º>zNJ~úýÎãÃ-»Dû)¬½ÞE¢ØnÄ…3M/— ›Âˆ­°Î\.Á›'wεmÛYXÇZ(`¬wÐÿnq1z9-(€3d}¢•Át3r¢™Ø»ÎÀþ‰ßŠEègOtmW~v¡ý–OÓ`Jð&‡²° #-ªFê`ˆˆa©˜Ð›r¬ºëýÛ ¸ïö¶!)ÃuIòæuºîyÑÉòøýo¿óø¤`mafàÞ2©J¶Äö‚”ƒ+•¼vç$™±ñ¬‘ìår`Ö8VsÇq&çï8½—öxv.éd†+¸»îLé×ÆØØq{¦ 8ó9k?«´vẮ}J%‚GѰÈ*Iß6Aø¸®ÛúѾJ>䄟—IKsbþáÙSO¶dš~àqPЇÄHÏÁôßæÈ“ùRÉ[315¿íD<¸|§®¬V*ÑÙ};…¾W03I¬,Á°²²ÒõÝúú:n¿ýöÐwIßS$®;xì‹ Ü^\ìNÄ‹8Ëd¼ØkÒøˆ)áºî؃»mگ܂²ëËýæñ‚@q{Dº0 #~àZç><‰bšþ‰ƒ‡iš(¥A2 ÈÕ*°77çsôÎ’ô…Â^„‚ûÍC“y¢'®ëN´¦üÔVV£æñŠ(¼½ †b ÂzäÈ‘PÂ%N6›Mºí¥×ª;¯Ç:¹’$*\J쌱±S¿kZ{,Ö´þýBÈÂD¤ž|&vqyÓôV\bþ®ëÂjh®M$ /e“XöTžát/±ŒÅD=|%ì…`Yíp-àh¢Ï.It2Ç!u™¯y H¯þÆÝ‚'•vú2PaÍd23[o5H¯’Aœ^i±ƒÇ¹n;,Õq¼ñ8$wŽÓNE—4eø"æÇqÆŽõvRF‰YÂ0 (Š¿ØöàGƼwyi€ËûÄá«2=Îíº€ñ_BiG÷ös]ØË]Ùv%œ{Ò4É–Cù@cš&ªCŒÏSÅuãyã¨*ý¸cp`bXÑ™p)Šb°ßñIÞ8lÒQRL‹âÜ(­TC,½t-Șft "Ûî9 2FY ‰ÙŶíø–ÕÎàYö&ÜäåE¤Û¶§“lI’¼¹N÷„evír°RÕ·8±æ³»<D±½ÈxsyŠˆòÆr¼ëO ™xlv8ƒ¿,ÓJÿ ­°nooc}}ÝÿãuZÓL¿*2œ^ —ÇsK7 .@ûã8!"F©W‘ß~Ý‚-kpCS )¬é¥+á’aDÿX½YÖÿ®M«ƒÄìašæpÙ$‹m*u]o1“ ÒÂT“ÑH’ç·[,†5«½š5ªêýY<…µTê™ UUÛëõä%IÄ%ñRMù¼—à† obrÄVXmÛÆ±cǰ²²‚ÍÍMÿoeeÇŽC½^Oú^Æ¢—K0_´, üó{5ö&ôã*«†Ør©}\°ë â8´2˜VÇiO`xí¯(Yã•Ò#~Hç3߆ôºgƒ f ×u‡/íQ*Åσåö" ðr£Óš*IÞ½XlìÈO"wâ!å3èÓkzÄó Ìð:ýÄásN"žliâ «®{òÜcÁÞ§X¤¬z KaÝÞÞ†¦i8rä ÃcÌÿ3 GŽÁñãÇSkmå®Z½Tæ@Qö Ræî®‘زöV×-+¶yѶۋ3]µÈfxæcš4Ц•…•'“éEçl\ì‘,¤›’¾‚ˆëºÈçóÐ4m*îcŒíeÎ&//"E0Æ&;¡/º¿ãÚ¦aÀ¾åPÚ|jdÙI×í?­á!±„Ç O÷…©%[âÉú-Tò•^c•w_‰UÖfmm ²,Ȩô,Ë2dY†¦iX[[ÃwÞ™ô= M/ë*ÇWv]©6xO¸yâë‰O„ûºe°¢§·Fõ'Çi»ŽU«Þ©òùÀ3nž¤þ›^Çñ&ì<[˜,û.\þB cÐ/ü ðÐ(‰áäî‚ø´ç'}®¬ªª:غÊgÖ1ùb£m󬦑&Qo»ã„‘“xÆ[,C+Që5¼ì¤,÷×h±§›Ÿ N•©$[âõ•zÿ¶½W„Þ©Ì1ZX···Ñh4ð¦7½©ï~'Nœ@£ÑH¥kpŸüâ%\à 3/"ÐÌJ¿rÖ¹€Ó¼ŠÒmiäÿæe>¸² t$¡ä)ˆCmKúéŇ’&¤_Y<á ΂&Š`Wü(ìÚå]7ûë¯Aúþï$}+›b±OY5Moå0æj›ey»K’W£›Æ<"ML<~•‡`Ì3ºö›6)JÛ‘§_s†-¬p ¨Ær¹E™¼·L„Œû Þ ¯ªôÃ$Ì@ k½^G&“A&“黯ËÚl6“¾§.ëïì8Îp.‚­œý (*€’7*çÅEïs¹ì ÊQ·û{·Cxó­Þ‡½‘›× –$JdDŒGȃ ˜z]Â++‚€ê‡Ûaþ<ßuaÿÍw!þÚÓ“¾‚ˆ c •AæOždlg'ö¬YQ¼1™&ÙDªÖp'B•ÕJ¥ÿb WDÉÅ•Ã0`YJ¥Òt’‰uÕ¨ @.4©a …uaaçÏŸOºc1¨jÌ —à'wÝž~-;;ž²ÚâÿxØéºßPƼü½{þwi§P˜-‹ðA!$ß{~ÛŽ˜LŠô=Ò´@lµ®¹Ôÿüd(·^šô­D,b%ÑuOY"yF0)$A¤ >Ö[o;DÀúÄÒ)«íöDäé ¢ëÀ¹s×%ÝŒTà8Ž¿3µÌ×¶M–¡` ÂšÍfÑl6Ñh4úîÇ.-,¤/)K¿±{PÂ¥ˆbÛ|:"êËþÕSHE–å½J¥p^œb1:ïAšp]rK#!—ཨ‰Óà¡¶ oÕdgâÛ^G“tbfàuú"±,ÏõZY å ˆ”1qw`Û·G¡à-hÆ}Ï“»/1.SK²Äéq"½ÄRX³Ù,4MëéîÛl6±¾¾Ž¥¥%ß58-¸nÿ3¶uÕ4»3§Nù‡¿õñ·£øè)Á¿óS­¶Ø4Oœèå”>B2¾÷EñÆhç ÏóÍ÷¶ÝþýÊeO [ •#ˆY gÙ]÷Æñj5^¡÷ƒBK"I\×½>e±>Ü]_&`]å¹—†©ÅS~ÃQ*GŽœMº©À²¬áJ’EÁ'ÍQ®Š‚0Ø ’H±ÊÚœâ˯éê+’DVsb¶ðãW¥‡ëz óƒjÉDÒ†1š%ʲÀ^pk×<ؾ9]¦Åñ}†1àÓŸ~yÒÍHœ‘=:á axYŽ\ŽjÍ ±V ­´Þyç0 'Nœ€a¸óÎ;±±±L&ãDZ¦‰A.,±]‚§éç.Cû\rK½Hˆ~DÕäãÖzIldzî»þW˜Ÿ»)éæÄØØ¶W9â¹þŽTïºÞ¼&Xžƒ Òˆišey¤|ì÷þX^ŽÞ˜Í¢T¢|4I°³s4é&$Šëº`ŒM&&›¯¼ B;AL.7ÙL ŸØ +'“É@–e¬®®B–e¿ÜM½^ÇÊÊJÒ÷ÓÅ kl—ài» Œpþr9]VVZ°J!]lÛ×@ùé‹R ììá\üܤ›KccÛ6^uî\ÏÌíqÑuo^3kÊ*c ù|>ô§ï™Ïcþ¿;É#X–…\.‡C‡aqq&™˜SeYÐF‘wÛ†ùè­p/ÍDVï ²zÉ@É+½E˜±cW{¡ª^hH±˜ômC0´Â:k˜fE*–uÕ¶÷GaÒ\*ŠÞ_ZæTS9}„<Ö‡®m×Þ~ ` ®»z„+DzpÿæÚkÛ…ÞGdÏK~\7x oµZEµZE¹\†eY`ŒùV‹è{¶ýgX,Q©TÐjµP«Õ`FÏãˆdá¿Ë(ÖUçÿÜûÂøJ)wôrœôW%˜wšÍ+“nB¢ôLœ7Æây,JMZgŒÄÖ~nÄõz}`9AXVHø¾H¦™p‰ÃÓ²I©D«qifÚò=ÇqÚ® óŸ^Ú¾—oÎWŸéå—$ý¸ˆ$i²²ÒÄÙš°_ò_Bß/.Æ_Øs]oá}^GŠ¢Q}Ev¼ 7 ‚€r¹ûøy"M²Ý‹¡²¨Úv(éŒyöPßòDÿ³axs&à ËjÒ<þñìËuÒ*ã¡r|qá…‚ùŸ®“ëß‘¨ÂÚh4"݈yFâãÇãØ±c£¹ºìÑoMæûá8©½ÁSX]7ù~ÙÃÓìÀ²òÝΟ5Ÿ¹î¢ÿÒ·á<ö9iq"Å$-ã0&àW.þ]èŸù÷(Û íµZü5GÇñÞ³œ8Òqèºîÿˆ Æ÷[\\„®ë~«‰Ö÷œÒ&Û½ªöêÞ$a/1<¬æ+Cs$Uõ”UÛž=Wøyc?ִʸŸ8oX ÃK:P«µ˜²FÎ Mâ¢Fõz›››‘Û×ÖÖÉdpúôi4›M?~ëëë#•ÍéçQà8NüN‘òÙ‹m{}µZM®’ǘÇ~Êw?:йŸ¾bG^%AÄ'}ö÷ž™ôc#fˆ´Èx×ûÐCøé7´ðÓå‹`ší±PÚïž<˜çÑã‰#9’4¼×Šžðj]…Ò"ä¤V«1˲ ë:ÇAµZB3ã¤Q¶{ÁÇø¸e?Œw> âseÌ늞/ðŸ—Ê×$ÏââGá8×Meê™v·m{ø±Æq<÷€ï³,§ÖÐDŒÆ@…µ^¯c}}}à‰šÍfì‹noo÷|y6 looãôéÓ€……,//ckkkèÎÂÝ{É,c,žü ŒÞü6\79…µsâwPÙ/ùD§ÂÊžúêHÙPUÀ=ÿí¤1C¤Eƃ|üãÇŸ\öWÖ{Yˆx…Àó3Œv¬ª®6Ü'm}ìtE¥ÀX–;‰ ÿ]eYöÇ]×aš&Êóâ'݇4Êv/†²®°¿ôl”ö+åÞá{Ö—ž÷¼Cz*çN»Œ3ƆO¸T,ÎÄ\‰YX°´´kßååe,//c{{»ËáܹsªéšÍfGò£g̳<öÏc×`‚J«ë¦Þ(<·ì—|Âqœö‚ŒëÂýÎe#Ä]üÁ J/Hú©³DZd<È™3ƒ#?}ñ@óhps¹ìÕY•åñ^G1ÌŠó¬!Iž²Z(x}x?âåy‡Yàܹs8{ö,¾þõ¯ïûµG•oÀ›ðœ9sbMxBò-Š_ø-Ê(}@8sæÌPa“d’cx\øô§Ÿ?’Ç̼…:ɲŒV«5ô¶à÷årŽãø‰ Óæ‰ÄÖýf?ÇïAôufÌ›’Z‹€öÏb>g=ó¿ÉÔ4Hbü;~5Éø7b,ø|Xf ÞÞÞîžf³Ù7ö8Ý:é7È_yå•$©ËǾ—Nêºî`…Õug6õ-/1ÅÕFîdii 7ß|3žóœçìûµG•o¸êª«°ººëeÐU²IQzïuû~¿D2¬®®âÚk¯MäÚ“Ããð©O} /yɹßyEÅPkšX^^ÆÍ7ߌk®¹f_¯»_ãwúÖ©ÔuÏßÝ·ï•“Éûú¼ˆÑàã÷$6†%‰ñ;cl°…Õ4ib;Ãð9ø°ã÷Hemn¿ýöÈTØ“àÈ‘#Ân Fc¤ŽËؘ5X‡Èª˜FÁ³´3«wÏ“”ï~DyÌŠœ˜möKƃ|ᣎ7|ÿ×’¾ubÎIB¶£à–ïHw`ÃðÕÀ¤ß¶%ÿm'’´ŒŒ_eÌ“ñŽ“hÖ(2™ –––°µµågÛ6òùüHçë5>Dz°26|¹ì)+5¦7MLZ¾{Ñ™PŒ1Z´ ö‡ý’ñ µÍÇá• ¹@‡$d;Šžn“®ëYŸ:²¥Ú6 ¿örÊ¢J $I¿êºžeµ\&wàHêbXÏbeeÛÛÛ~÷­·Þ:ôyÆ®ÁjYûºpK+¯=8i¨k|&%ßý`Œuûž§8="Ý쇌sø—Æ xêñ듾mâ°Ÿ²ÝE¡ÈrwÙÇñUËò”ÒÀdÞ4½÷3¤ˆ¸$%ããW cþ’±ITa]ZZ‹Ðt²Ù,î¸ãÔëu¿Qè—4ÌäÉ5»9[ÅaÌ+ãP.O¶ÏSn¦-ßý`Œ…ÜÅ$K‡D«ëÄ„IRÆ9–åà†Çþ ÿǤ1G¤A¶C0†Å¿üŸØ)}¶a„kâš& pþü£0~ÿÉPïýnÝð˜ßz ÕH'"I›Œ÷­¿jÛÞ >°¤Î%˜Ã뺎ÚQ\··Õ/V§~\gEñúû¤=U•J` øòÝÇqºW)ÉNì3Ó”ñ ÿ`9¸îª{“¾ÝTáº.ŠÅ">ŒC‡¡P(DNLÇ1†|>úÓqŒ±ÐçNÇA>Ÿ‡eY¡ï-ËB.—áC‡°¸¸Ó4“~œ]ì—l‡žË;ÎA~áWÀ\×]÷*är¥ i_üäPMaé úË?oëîÄ>„Œ÷œ›]‰Kl k0ÉO{•xi?k¶öƒ1OçŒR̺ð î9]– ¾¼r9Ïëy\e“¬«é!Ja-|èÐæ#$› B<ò¹ïâÈOìo¶Ø´“Ïç!Ë2vww´cЪÕêIJþº{Ix}V®€*ŠI’àºn_%Ù4M¸® ˲ül·Žã X,¢V«AE¸®‹\.I’R™­x?±ÿæ»íi°íM¼âGà8á4|¾|—‹Ïø.ðÕϸћÓÌIˆ1„êÆwbž`ð1à 3Pa=räNœ8Ñõý~®ºŒ‚ ôV¢úƯ:Ž'R©-¬Zõn—‡¾Œz˶í{Yê‰Ê€í>éZë‰ùÃuñÀù§àµü‹I·$5X–AP „ȲŒR©Ã0 ˲¯Lº® UU}…Ѳ,ߢɿgŒ1ÇqüIe)"¼@E_ÉŒÛÎjµŠ\.ç[Vøÿùø%ÊårìsÎ-® ûüÊo¼…‚—Ÿ s4‚à½äLÓ‹mU”Ñ rjS’Çqœp}>×…ó§%Ý,‚˜8»¥.\|<éf¤Š^– EQ ë:DQ„iš¨V«…B‚ @Eèºî[Lóù¼¯€†J¥MÓËåBÑN·ßÉ ÑŽ±EŠ¢À²,h𿻏¸EQ Ër¬óÍ;–Î gdB¿¯ë¶ÃŸ"ß¿¢Øö> sb6aŒáW_íMF;½ibI ¥Y‚§ ¯aÖ“8°«ª÷§ëž›ð°qí¤¬¦‡.—`Æ Î ¥•˜/ýùŸãù?(xa² T3J»³ò$"ýˆ²,D]+FæLA|K¥ªªþ¡ª*ì½vȲì'$”e–eA–eˆ¢èï?ÈŠ:°4<ëª$I°m‚ ø +Ôj50Æ`Yt]‡ã8uežElÕ+»ž­ëz Í¢ØÃx*Šíšvp^CÌŒ1¼P–»²\'¶ÂZ¯×±¹¹‰'N ›Íö|yL:±Ã¨ðL¸ïú¾~òJ¥ÑòóÐ"xzaŸ[€pÙw’nALCQðzéPÒÍ<FMÀDq´ã¢Ž ¼èDQŒ|÷ÊÝ\¼u]×W^A¨xŠ¢ræ.ŃŽ3M’$Á0 Þ"ÿónQöÛªë:LÓ gÅ=h4›P4ºn†~CQô¼¢¸…5A˜›šòÄ|â/€1Ö¿¼q ‰¥°Ú¶ MÓÉd°°°à¿ººŠ……ضz½Ž;î¸#éû eñóý䉞ð÷šeÅOÆDïÃt%ßîg¿ñò'%Ý4‚˜8ï{_Å¢>þ‰Æe”»Qƒþ\‹»þ­¨®ëúßñXTyçX®€êƒ¬ÆÄy¿Z–Q}×c(‹þ÷Q ¯pÀ-.å¯-ˆ.û10,U[ò "Ac^Ÿ·,rÕ#z«¬ÍÚÚdYÆwÞ‰L&ãŸÍf±¼¼Œ \ýõ¸ýöÛ“¾Ÿ^eV{&\rÝÁn] ž,¹®Û~ÎôèÒAԄѾûI'Ý4‚˜8_þò­ØÙ¹<éf¤ AP­VQ(ür3‹‹‹eÙWv\×E>Ÿ÷Ëݨª UUÁC±XD±XŒåÚkÛ6:äÿuæ‡èÜžÏçaÛv8ÆžEÕ4M(ŠQ±¸¸è·/˜T¸®éºndÙAàA)Ëíºò‘BüD‘|aŠèÏ@ ëöö6šÍ&Þô¦7õÝïMozŽ;†åååR›½Ææž —RâÊœ&4-za–ëöšæ¹^ïì$ÝR‚µ Sú§“nALÛ¶qäÈ)!I’°³³ãg–$)d¥äÊkçxÁãGù9ø¾Á}‚–ÑV«Õ³ ²,÷ÝDQ_‰-—˾{° :„'—k‡'Ëÿ …ª’ÂJ¤ÆVo¸d”èË@…µ^¯#“Ét)¡KKK!÷`¾ýܹs‰+¬½¬«Þ¶ —£Õ!Ú%¸XôÜy¢‡~Ï›Ø_‚.cŽÓž§Ð{€˜+ ýÂpÛmG“nIªé§ìËÇÄ=f¿èÕ¶…i¢RQý±{,K3Å)Æqd.½”&*D_*¬ 8þ|×÷I·½'ŽOéûÉGẔ9¨ºî=Ož>_Úq3üHêtà8\WÜû7•"æÛÆ¿<îj¨ÿîÇ“nÉÌ¡’â’J\× á3M@»î(OdU¾Í/adg™‚H<É›ÄëPÒD…èÃ@…5›Í¢Ùlb{{KKK=÷ÛÞÞ€Õ5mø~òQ0FEµ{¬=®(Ý‹¶MÆé4ð…/<„ þ–åýF1*]Älâ8¸ëOõu-n$÷¡8ð–Ë”bšÞ"c¥ˆù_ Õ–Ù˜ R¡¼ÐMÓbcX k6›Åúú:²Ùl¤BÚl6±¾¾Ž¥¥%d³Ù¤ï©çDÝqœÞƒ¾ãPgéÁ ҧÖ'’å·~ë|ß÷µ¨2á-ÙË2É61?0Wñäsÿ–Äš˜óÊʉî^Êý€pôÄSÄ|Áº•B“‚èO¬²6'OžÄÊÊ Ž?ŽåååRzæÌlmmùû¥ÆXt-7JH0¢и“<ÛÛßÁk^ó`øK]­ÔÄÌÃ>ñõ¯ãÏx•Ò"æžq_èVh…xlw`‚Hq2D'±Öl6‹Ó§Ocssëëë]Û———qâĉē-õÃ/LE¯ W‚˜!GÀòò3Ú_u•˜GöàƒøæãŸOÃvÇA±X„ªª!Ï"îŠÇš¦A’$0Æ ë:4M M(mÛ†a(•J$©ë’$¡T*1˲Èr2$VPGµ,r&æÛ¶£GчX +àe>yò$VWWQ¯×ýï{¹ §PÂ%Ûn×cERX‰™ÇKzÅ I¯jÉ3.ÄñèûÞ‡³O|"î½÷bZ‹éƒišp]·Ká±m;TnÆ4M ìììøÖ<˲B «eY~‰œÎs¸®‹B¡Ã0|¥—Qܪù˜lZ 扨Z°mo¡†”X¢±VÎÂÂBßäKiÅO¸äº^]–rÙûµJéÞ‰™çöÛÅ€ëz/zs†ëº¸æ‡Þ‰‹¾’tKÒeY¨V«ÈårÑ“Ä=TUE±Xô?G)ý\Ry­TÇqÈmuDüµ#ìlïe;¤çJÌ ‹a¼ù ­>xLÒ Ø/| «ax‚TÕë †‘tÓbl^ð‚÷áU¯údû Ó¤LXÄüÁ€—¾ô¥¨T’nLzaŒAˆ¢EQ`YVh»mÛþ_±XìJè#˲Lä³ã~i bhlÛ›¯è*~n%["æŠÈò’Œ‘—#1¡-¬³ŠëºïíÀãCÊeÏ5˜^ÄŒÃóÜþ\×[„±mÐŒž˜7Þ{ïsñ›O}*jo¼:馄èé ÁK`êz;¬$ykJz¤_Se¬]›{…F]‹ïϱ, ’$Á¶m‚˲BŠ·ÜÞØÑY›UQ†EQ`Û6TUí²ºÅ^UU¡ªjè¼DPrbNà²Ë6 ('’nJª±, ¢(†”b±Øee„¢((‹©\ ˜'¢†lÓ4!Ërϸc‚˜Ez*¬Ž“tÓˆà@İR2bžyþóÿ¯~u#éfÄô0 äs_ÀW¾òO4–À¶í®2(²,Ã4ÍÈýeY†mÛp:&<öuØ’*¶mãСCþ_¾Ÿ¿ô§—K¸iš»JÌ=.Ñ ƒca¥šOļÂÃË^F“xb>q^þ30¿û³È>þ²öÅ ê]§(НxKÚpøw¢(úÏX’¤Ð¾A‹mÔ9Oùíµãº^‡¢Èåü0^Ñ€¬«Ä<Á=…ÎlÀ…%\"b1÷V×uñÚË/÷ bÎpàÏþìÅøÑg=+é¦Ääa úçÒ/ÞˆõÓ§»’ĬbY{ʪe…&ì¦i’œsGOoj•B™ˆX̽…Õ¶müÛï}\ˆ¹Ä0€o|ãÃxòkßìì$Ý‚˜(–Îà^õ“Åv™‚˜l{¯@aù• xr,ò$ æ ×u=OGU ¬ÔLlj•$/ïm{²ìýñÌëQ³±écîÖOä#xÕý÷S2bîp`k«‰µD’ob¾p?öEÿp*Ÿ¾†ñ«CÇRDZqݽғ`Þ@¾gaíÌÖL³ŽãåroþÁôjrõÈ|¬ˆÕ©„r%R×½ýJ%oJÏKƒEy‹¢§°Š¢·/_ëEo}Hºã•»Ç;¯,“·ršH½ÂZ¯×±°°€L&3ô±®ëâ[üǸè–[’¾ ‚ˆdùö*VxÝ•WÒ‚ ‘ZF•qcåK_úx‚KyˆÔ2Š|[ ¼æ›^üÞ^½lß ErN¤ˆqæ(Œy"þµû_€§<úJ4òoFIzÇû^Ûåâ;K8s iQJƒñŠ–Aè=]âÊ)¿®m{ß9ާ,óT²ÜÝ6bú¤Vam4Ð4 õz€—Ì!X¨<§ßòüÒw¾ƒ'¼éMIßA„W¾Ç+g³¼üì3ÀŸüIÒ·D!Æ‘qÛì]Úý˜#dª%ˆi3®|kßý]o¶½7K.‹d]%Røs®¬ê+‡-)xÆï­ùÚ¥(v—ŠO“+® ø^ú~{5Í›w1æYvóº.¹ï©Mº´¶¶†L&Æîºë.Ôëu¬¯¯Ç>þ¡/|7ýéŸâqôG¿J¤ŽqåÛ4Ë//cí&B“bÏü)ʯ» .ÈM’H'ãÈ·öì-H}ÜŸëºA¨” ‘Æ‘oÆ<ïßÛ_ÿxÕÉÃ#Ç6…¦Iò¦Z¥’çÕ¼»ÎÅ­°<¿+c@±Øm9&F'•ÖF£íímœ>}°°°€ååelmmauu5Ö9øáÆgè‡ð‚›oNúv"Ä$äû‹_ü"þ[ó¿ãÊ+_MË{DêWÆÅ·½}á óyhšFÉ–ˆT1®|Kß=ì¹þš¦ Û¶Që49DBŒ-ߢ‹Úóþ#¾òîŸ8·¾ãIßÒÔáñ²Ü­˜+¸†ÑNòäºÞÿ5ÍÓßÇSpyB( wB(Qlëü|>õ3ÍèÄS€wŒ(v'Ÿâ׎s­¨kGÑÙžÎk¹ï¾k!ŠŸê§Ra=wî ›Íúße³Y4XÇ?ü±áoz…w“\__­ÀÌõzõzËËËI7%QÆ•oø÷Ÿùÿá/z‘?é™EæQæ­ÏŽÊ$d\+• ËòL[WçMæ±ÏŽÂØò]*Áq˜†Û¶C5ng…y”…y믣2‰ñÛ¾ä~°W½ åßÿý¤ogd†‘Aè6"Ñ^ÇBñczíÛkIêíHÊEà9ϹŸû\²üʾ±¼×êwí(x{‚×î<îìÙ&.\øf¬gËI¥ÂÚ¯S4›M,,,t}ÿàƒⓟü$n¿ýv\vÙexð?ÿg|ö³ŸMúVÆâî»ïÆõ×_Ÿt3&ÆÙ³gqöìÙ‘‚÷'É<€}ìcxðÁ¹þ(òÍÛ}ûí·ãÊ+¯Ä^ô"\þþÎmo'r“ -ò0IÒÒgÏž=‹ûî»/±ë;†Ÿ?_ýêWñ†7¼Û3,ãi‘‡I‘–>{Ï=÷à3Ÿù ¾ño$rýQä;8~ÿÙŸý>øÁâõ¯=ÞùÎwÎä\%-²0IÒÒ_ùøÝh4y¾ãŽßðá§= ¿@ã÷@.» øÚ×¼?þ9Šï|à’ï|´½Ž -]t÷ÜsÛÛ—u­¨kGÑٞൃÇ=ðÀxè¡ =~§Ram6›=·?>²³<æ1Áîî.îºë.H’„Ë.» gΜIúVÆâðáÃ3AΟ?‰ßÓ}÷݇/}éKxò“ŸœÈõG‘o¸ä’Kp×]wáiO{®9z4ñç8.i‘‡I’–>ËÃ÷¾÷½Ä®?‰1ü‡ø‡Sñ,Ç!-ò0)ÒÒg?ûÙÏâÁÄóž÷¼D®?Š|ÇoQñ’—¼@òÏrTÒ" “$-ý•ß_þò—QX'1~¿ä%/Ių‡´ÈäHKŸåsðaÇïT*¬A7„NzuÞû±ÃýØ%Ýt‚È(ò À_¹$ˆ´Cc81ÏŒ"ß4~³ßDIe–à#GŽ»%$åA“†ä›˜wHƉy†ä›˜gH¾‰4’J…5“É`ii [[[þw¶m#ŸÏ'Ý4‚’obÞ!'æ’obž!ù&ÒÈ¡V«ÕJºQÔëu¬¬¬ “ÉøAÞ=ãûb– ù&æ’qbž!ù&æ’o"m¤Va¼Àïz½XZZJº91QH¾‰y‡dœ˜gH¾‰y†ä›H©VX ‚ ‚ ‚ ˆƒËEoûÛßžt#æíím:t¨§+E½^Ç·¾õ­Èíý¶ÅÙ> šÍ&ÇÁSžò”‘Ú”Æ{"FgùžÄöiÐOÆgñ~ˆñè'ã³(4†œƒ6~jSÓO±hSã=ïyOë‡~è‡ZGm=z´uÛm·µ~øaû¹sçZ·Ür‹¿ý­o}k¬mq¶Oƒ‡~¸õÖ·¾Õ¿æ-·ÜÒúô§?=±6'qOÄèŒ#ß“Ø> úÉø,Þ1ýd|åÆp‚sÐÆïAmJãýãAãwúïiR™%xh6›X__Çêê*c¸ë®»üï8kkkÈd2þöz½îoï·-Îöi°¾¾ŽF£»îº Œ1d2lnnÆnSï‰qå{Û§A?ŸÅû!FgŒÏ¢<ÐNsüÔ¦4Þ1:4~ÏÆ= EÒó¼r÷Ýw·Ž=úîÔ©S­Ûn»­Õjy+G ­ŽüÑýQë5¯yMßmƒŽ?üp×5Ï;×:uêT¬6¥ñžˆÑG¾'±}ô“ñY¼b<úÉø,Êáç ßƒÚ”Æû!ƃÆïôßÓ°<6i…y^YZZc,ôݹsçpÙe—ùÿ€l6ëoÏf³h4}· :vZðLqÙlõzÍfÙl«««±Ú”Æ{"FgùžÄöiÐOÆ···gî~ˆñè'ãó&ß“h3ÉøìpÐÆïAmJãýãAãwúïiXHaÝVVV|a8}ú4ô‚žÛšÍfßcy½¬Isþüù®{9þ<666 uœ6'qOÄdV¾Ó*ýd|2nÛvÏ}Ó*4†Q„ñ{|§qÎEL¿ÓyOÃB1¬ûÀ­·ÞŠ'Nn¿ývžôâè¹íüùó}åB=i‚«;wÞy'î¼óN,//ceeeàýÄis÷DL†aå;­òÐOÆgñ~ˆÉÑ)ã³(4†Q„ñ{Ð=¥qÎEL¿ÓyOÃB ë>°´´„ååe¼éMoò¤ƒf÷¨ý{‘Édú›Éd¦r×_=øî€74›MloolÓ¸Û‰ô2¬|§UúÉø¡C‡fî~ˆÉÑ)ãó&ß4†\Âø=H¾Ó8ç"&ßé¼§a!…uJ¬¯¯û+GŽv½i4Èd2}· :vZôsXXX»ÍIÜ1:ãÈ÷$¶Oƒ~2þÌg>sæî‡~2>oòMcøÁâ ßƒä;÷CŒßé¿§a!…uJ\ýõØÞÞöÍø`Û¶ÿãg2,--akk+´=ŸÏ÷Ý6èØi‘Íf»Rh¯¯¯û+3ã¶9‰{"FgùžÄöiÐOÆo¼ñÆ™»b<úÉø¼É7ዃ6~’ï4Þ14~§ÿž†åP«Õj%݈yemm [[[XZZ¹sçBҀ瓾²²‚L&ã5oll`aa¡ï¶AÇN ~M¾J5ÌýLb;‘.Æ‘ïIlŸýd|ï‡~2>‹ò@c8Á9hã÷ 6¥ñ~ˆñ ñ;ý÷4 ¤°N™`Êô¨8‰f³é¯unï·-Îöi0n›ÒxOÄèŒ#ß“Ø> ¦Ù'I¾g~2>‹ò@c8Á9hã÷ íi¼bõ¼NWLƒI–¸›0A¤^2Îá^QÛMÓ„aØÙÙIú6b$úÉ?É>1KÄ™¯P¬+1ô·ãÈ{¿±RXSŒã8X\\D­Vƒ$Ipù|š¦‘{$1÷pwš\.UU©6q` Ù'‚ ˆ6äœb‚.Á‡B.—ƒ¢(¤¬Æ> QIæ‰É>AA´! +AAA‘J›t&E£ÑÀÇ?þq\yå•I7ebÜsÏ=xúÓŸžt3&Æ#<‚Gy$¿Ñ#<‚Ç<æ1¸ñÆ“nJlÞÿþ÷ãŠ+®Hº#Mò0)ÒÔgï¹çž™Jò@cxúISŸ}ä‘G°°°03±a4~§Ÿ4õ×{|å+±°°tSbAãwúISŸeüž…õ_þå_`YÖ̼¼âð|?ú£?št3&Æ}÷݇ûï¿?¿Ñ}÷݇óçÏÏ”Âú‡ø‡3ÕÞA¤I&Ešúì>ð™RXi O?iê³÷Ýw¤¢-q ñ;ý¤©¿~àÀ 7Ü03 +ßé'M}v”ñ{nÖ+®¸’$auu5é¦L”yºŸz½Žz½Žåå夛‚íímœ9s&éf ÅUW]Eò0¤å7ª×ëI7a(h O?iê³³6†Óø=¤å7ª×ëÈd2I7#64~§Ÿ4õÙQÆï¹QXç‘yê(Íf‘Íf“n‘æQæ­Ïã1oò0}–y”…yë¯ÄxÌ›<ÌzŸ¥,ÁAAAD*!…• ‚ ‚ ‚H%¤°AAA©„V‚ ‚ ‚ "•ÂJAAA¤RX ‚ ‚ ‚ ˆTB +AAA‘JHa%‚ ‚ ‚ R )¬AAAD*!…• ‚ ‚ ‚H%¤°AAA©$q…u{{;é&ÄÔ ù&æ’qb^!Ù&æ’qbVxl’o4XYYc,ôýúú:677Cß---acc#ÉæÄP|óÉ81¯lóÉ81K$¢°6 Ôëõ®Á9wî–——!˲ÿÝÂÂB"ˆ †…䛘wHƉy…d›˜wHƉY$…u{{¶m÷Ü~þüy,--aii)±C£BòMÌ;$ãļB²MÌ;$ãÄ,’H ëòò2666°ºº¹ûÔ¯¯¯c}}|쉙‚䛘wHƉy…d›˜wHƉY$ñ¤K½à® Íf+++ØÚÚê»ÿ<ÆÖ×דn:‘r¶··ñÞ÷¾Ÿýìgkðò _ýêWéåAÄb}}÷Þ{o¢m 1œ˜[[[xï{ß‹ûî»/‘ëÓøML>~7ÄÚ@ã71-ø|Øñ;ѤKQ4 œ8q·Þz«ï3ŸÉd°¾¾ŽåååžÇ]zé¥xÚÓž†ë¯¿>é[ R€i’äý™&`YÞ÷² ?~×]w¾ð…/ì{»F•ox⟈믿GŽÙ÷v³Åõ×_»ï¾;‘kÓNL›l6‹óçÏïû„žÆob?àã÷e—]¶ïצñ›˜6GŽxsðaÇï‘,¬ëëë8~ü8$IÂúú:¶¶¶&¶ª’Éd°ºº ð^^^F³Ùì»2y饗âšk®!Ÿûˆãt6M@½Ïª T«@©0œ>Áu×]‡+®¸bßÛ:ª|Þ„gii ™LfßÛMÌKKKxò“ŸœÈµi '¦M6›Åu×]·ïz¿‰ý€ßI$:¢ñ›˜6™Lf¤ñ{h…•+¨Áwaa[[[XYYûF¶··»\Ο?ï_‡ Èç]÷>FÛ‚ xŠjµ Bø8IÊeO™=>ç’obÞ!'æ’mbÞ!'ÒÊP k£ÑÀææ&J¥VWW}×Y–±±±íímÔëõ±´°°€µµµÐy677‘Íf‘Íf“~^Ä>cÛ@¡VPÁ³––JÞgMó¶¹nû¸Ne5ø½¦¥Ò÷%r?$ßļC2NÌ+$ÛļC2N¤•¡ÌLç΀H“?æf³9Vƒ²Ù,Nœ8ãÇciiÉ¿¦aI?«™‡1O© ”ÖJ=ºîYE%©ý „?‹¢çö[,zŸ5-¼½UxàÁDî‡ä›˜wHƉy…d›˜wHƉ´2”ÂÊÝšÍf—k@³ÙÚºº´´ÆX×÷«««X^^î« ÃcYž;mV×õþxÜ)ÿè¯|r4Í³ÂÆÝÿG~dgÎL÷žH¾‰y‡dœ˜WH¶‰y‡dœ˜%†RX¹uee'Ožô¿o48uê2™ÌÄ;“ÉPr‚ ãºÑÊÜâ"P«õv£çz@¼óFÛRʱíá”ëJ%ìœfH¾‰y‡dœ˜WH¶‰y‡dœHC']ÚØØÀÂÂŽ?ŽíímlnnâØ±ch4ä2Ž_IsÏÄu½ïâ*«ºî%- bíøÒÎï¹›nç9:3ûr‚É“Na„°…– ‚ ‚ ˆÙfèT© ØØØ@½^÷ãU(;!tPïoŒyŠ"cíý!lÕì‡ã´J^ã”_*yÊoPñåed:EOéåI“‚ûF»mÁíAÁyì^æR‚ bþʺ½½ I’Ðl6‘Íf±´´„¥¥%RVDQ¢•Â($ÉS(ƒÖPn-]\ ï˘—7/S©xÛ¸e·\öÎ[,zn¼üx~Í ¦é)ÈA%9¸o/Ë+A ƒëº‘±9AÌW½÷½ø ‡ ‚˜k†RX³Ù,`ÓK"5B´Kp”+.¯MÊ-—–ÕVE1¬,òM®¾¢è)–šärácTÕ³Úvø¸BÁÛÏ4Ã× nçȲwmÃèv=&ˆ¸X–…|>wV‚š ‚èã´WC÷¸¨Ùħn¸!é–AûÀÐY‚WWW±¾¾ŽF£¹ÏêêjÒ÷t %l˜»Ü“I’÷™[>-ËKÄ”Ï{J¦izûòcUÕ;&¨xÊr;nµ\îþÞ¶=¥™“Ï{ç Æ©r…•ÇÖÄ(˜¦ UUaJiö-gÌø4·‘ &ëzJh¿”îºîºî½”ö¸÷~N;Õ;A‘ †Ža][[àŽ‚Öýö=‹i”çcPa<…SÛÊ¡izÇóý¸¢xÿ—eïÜ|~ÀX÷9£æšÖ>ž£ªžË±,‡\Q잓Ȳ§ðîìŒöL\×…0étÇÄLÁƒ (•JX\\„ªªӚ˶ýlš&\×…F+5Ä<Á˜7ØÇåbÑSX h× /äðã‹W·Œ ‚˜+†VX).,=Èr[Á bšá¤H€77ÎÙƒÉø¹~æg¾YþKH’:ÖuÝ=Ëè`E·©îŽ„+· £dyø’6PÜ{åNÍšH-Žã@„‰.2˜¦ eO¨4Mƒa镉=WGÓ4aîùÀK’9Å’ bXLÓ{Ù”JíÞ4§¥·íx/ RX ‚ C—µ!Æc˜d0Œ1:3EP.{ç- p¶í½ïƒŠ¬¢´ßó¶mãê«ßøgÏç‘ËåËåð˜Ç|››‚a}å–1†|>=ªnMàžŠÅbߨæÎvþÝînôý ;_·mŒ±½û§ëY€1†\.ç/4Œ—{kÏ=Àu]ض uÏ·]UU0Æ"û\¯~èº.LÓôû†®ë°,+2Ö"KXäõll¦i¢Z­¢\.£X,†¯Å­Q1àòoÛ6¾öµ¯ü\‰ô`Û6r¹ÜXñØ\†÷ÓÄwßõ.üÏ׿,¸ZªªÞ ÏãP¢oº[1U”ÞûA!ø|hМ˜Ïá:äÏ÷u]÷ûî±÷ßãè8û·‹|p¨ûIaÝÚÚÂÊÊ $I‚$IXYYÁÖÖÖ¤žñÜÀpWþâ(¡–e¡P(@èºî'- “¹®‹|>AP,ýz«|Žë Ãб¸¸˲ð_ÿë ñáÿ_T«UÔj5Ôj5üê¯fñŠW¬£^ÿA‹E_a.•J=…6xmÃ0Ïç÷_a´,üß·¿¥RÉ·¨ÓDz¬.åÊu]†1p€ã²UÝ jþf®ëB×õ®s¸®R¹Â+I Àiš°,Ë·®rJ¥ …Bh5M…B¹\Îÿž_7ŸÏÃq”ËeT«UH’ä÷Ýà9ŠÅ¢¯Ð‚_oqqÑ·¤Ú¶?ûÞ÷ðWßþ6þöþ‚ @’$(ŠÒ~®š¤G*ȶ ˜¦ßvRXç ˲ IÒÈIÄ Ã€mÛ‘ ë8‹E}±m<ò{¿‡—<ôîÿæ7»úo³×BL°öZpIòâKÈÓ‹˜s>ýéO'Ýbp'z|ŒA¿¹¶eYÈår°mÛŸ›/..vÍÏøÜF´Z-T*Ȳ Aü¹Ÿ»äóyÿ<|îÃÏÅ•\Çq`Fh¾ÿ-..âŸÿùŸ‡¾ß¡]‚×××±¹¹‰ååeœ8qÐl6±¶¶†z½Ž“'ONüEcþ-Ë2dYF±Xô•¿Î‰µ®ë`ŒAÚ[U¶mÕj‚ —Ëaaá?akëJ¨ª'ˆŽã`kë6Èò^ÿúÛqñÅÿåò(‹¸ó΀ >ÿùï!—»’$á3Ÿ¹¿üË÷çSUϘó«¿z–õÈçóþ„=* WVUUõ-Z¶mC×uȲú]ø÷Üž/ˆØ¶í/”±m;;;¾ÕŸ¿âÀqªÕªß?¸Œr¹TUÕïð¶†y Þ^Þ‡¹gKp\v§}ÿðøÇãþüÏýEÞÿükñØNxÌk6€—Þ{/Þ`xÂžÞØt‚ƒb±H Žs‚ëºp]7r¬â ÷ªª¢X,†ÆvƘ¿ÈÈßû|Ñ>8ŽóùŸí|Qïþá ²(Šþ< ðÆx~¾A÷er¹œLÔ»‰·—ÿ›·ë³Ÿý,Î ›4¯5çÎk=z´U­V»¶Ý}÷Ý­£G¶Î;7Ì)'ÆÝwßÝ:uêT"׎BUÕV¥RiµZ­–¦i-AZµZ­ÕjµZÕjµ¥ªª¿ïÎÎNKÅÖÎÎN«R©´4MkíîîúÛ+•Jë™ÏüÓÐj•J¥–$I­R©ÔºúêO´Ah-/¯µÅÛ÷ÿý¿ó­……?iU*•– T[o{Û{Z;;­–,Çk÷^»ØÝÝmI’Ôõ¹\.GjK–åÖÎÎNÏkíìì´ªÕj«Z­ú÷Ë?˲쿳³Ó*•J-QÃ×ÛÝm}çE/jËdZ_Õ«ü¯ùñ»»»­J¥â·EEÿÿ/yÉKZÿþßÿû$ctn»í¶¤›à£iZKÓ´V«åýŽŠ¢´TUmíìì´vww[¢(ú¿)ÿ\«ÕZ;;;­r¹Ü5†ÔvC§[€IDATjµ– -I’ü~R«ÕZ’$µ$Ij)Š:_¹\ö÷ %×à²$(ƒýà÷Êû6§R©ø2ËÿDQlI’y½8ÔjµÖ¹W¿ºÅ £«ñqBÓ´ÖÛ.ûÍÖ­/úë®ãÓ$/qHÛ> J¥’/œZ­ÖE±¥iZKQ”–,Ë-EQü}ƒ}£V«µ>¸·<ùE±KþZ­–?Nr¹Ð4-Ôù6Ž,Ë-MÓZ¥R)t~lµZm•Ëe¿¿V«ÕV©Tj½ùÖ[[ç/¾¸õw/}idlµZ­o]{m×wü¾ù½þÝÿû¡S¥Ri)ŠÒúƒg=«õžW¾²U©Tüóów jµZkmm­õk¿ökIÿı™µþHL–Z­ÖÐõŽ*—Ë-UU[årÙ—sUUgN^æqü—jµÚE±%Š¢??êÜÆÇ¸r¹ìϱù|·R©´ÊårKÓ´Èy'8àûî;;;=ßýE^†RX¹RÚ‹[n¹¥u÷ÝwïÓcÿæ'EçD€+ ýE±õÁ>ÜRÕ–?Xõãê«?ѺñÆ›[ªªú¨K”Ë­Ö­·Þß¡õ“?Yo•Ë­VµÚjš×M@¤\nµ4-4¨öRVƒðIqÓ4-rBÖy]f­ß¿ürïsÇ=Ȳܒ$ÉŸxù Ã^ÇšÅÁu?^`ÕjuààG¾ùK¸Õ +·ý¨Õj]ŠŸœX<çÖîn«úìÛZQëB4áÙ_øØDQ”ÖÎÎŽ?Îq¥­—Ló6N©TêR(wvvüñRÓ4¬ã ™\¹¬V«¡ _8jµÚ‹z q«ÕjI’ÔEÓÚ¢è½`‚ìîö]!­Õj­R©ÔRUÕo7ŸÐU«ÕÖ7_ð‚ÖûÖ×}…žOø:ŸÃ¬ÉˬõÇ´#Ërì‰y¥Ri•J¥Ñ@Q”HrXjµZ¨–J¥®FÞ÷x¿ãó8¾S­V[š¦µ$Iòûÿ¬ÉˬõÇiÁÇ^þ{RY–ýEsEQºÞ|‘18ÏŸWF‘—¡]‚Ïxaa¡ë»^µYç™`lw¯âÿ(8sæ]ø«¿úE\|±=0‹éãw~üÇ_*yå å8ÀñãOÅææ®Ÿ|i`ØcýÓòî™ó5Móc[eYŽuñ„<éÔ( ø®mŒ1Üpçø±•Ö‰¢ˆR©äç/¹Åv (J(&š1ÖUæHET«U0Æàº.E‰ÕGx_âï,MÓ|wdwÝêérÌËÍÔjíþÞ‡êº}³æq—âž¼úÕxÕ߈WMñ7$f‡(—J˲ ‚Ÿ ß\„çṂó—r¹ìdž[NÌq?†OQ¿ –e!ŸÏû¡*²,#ŸÏCÓ´PxØââ"4Mó¯K!Lé'˜Ä¨S&mÛö+ˆ¢ABóž`è\/J¥Rìñü 2”º´´„L&]×qòäId2@£ÑÀ©S§ü} |"_.—!Š¢Ÿ&ÎýÈ‘·à×ý–—ßIIæT*•ž±LAx|¶¢(¡¼Œ2©á“vÇqPÖ4À¶}¥•+³}Çgÿd:ë§ž ŽÓ?ÆÌ}@Ì<™€P<ž®ëþ{…+œ‚ ø•DQôå¹Sqè¤Z­¢X,ú &õOonš¦¯`áJ(ß§X,†ZÀ{7vÆŽéÄq?^ŸçÏþæöÞªiÚDæ9$½ÚÂÊ_ÈÇŽ }ŸÉd°±±‘ôýì+| âV©TË墷£ê‡>t5^ô¢ï¶ÃÖU]÷t.Qì¶ JR¸TMç% ëk¢T*@¡0À ¤ªƒ-¬{´R©$«|ÀSZ5Mó¬zü!E¤íD’ÚÚþÞè=¤>™æYuùÊtœ•Á(ãAø‰¯kš× Æ@mëÒ¸XV¸ƒ[VôD½SuÝг¶þþ*H?ôŸX•Çq°¸¸’aÓ4!I’Ÿ‹+•€·‚L´×©°Æ­7ÌÜ“·ºðÅ(MÓ<åÓ¶ñ#¯}­?é²m»·¥i¯fðDäŸ ¸® ˲B™¬yr¹Z­æ[-kµšŸðŽ+–ÕjÕOè§(Šß÷x?ôî•JÅÏäÎ=x‚˜`ùþ’$u%´é„[Sƒ ø‚b’~x90nZÌH|~s€ZaÍf³x×»Þ…üÇÄ—¿üeÀÂÂdYö-®ó _m±, ²,‡^î’$E»{1[­@fÆÞ9¼¿rYëÊ(=]ŠÿÅS\+ïpÃðô1®oÞçÎ~ÒË二¨ÜÛ‹€;eß Wʹ¹wšŠm§»¤(zíì˜ÐwÁ.)¬]­9Ü:KYžÀuÝø‹õľ1”ÂÚl6±¾¾Ž'N`uuÕÿ~cc›››¾";¯–Vî9tpþý—@yà]€-C·å.¯BQôÞóÜŠ x êœr¹=àI” {rcgÿź¨$õ÷%Œ³ª®ëíý¸©¸sòíºmò¸8N÷ùãÆ>É2ÿíß/yÉøí˜#c£ÐiÁ÷’IŽ¢ô_d1Ͷuu•Š7 ç+Sè:r~¡)zœçÇÝÞ÷,R–^›‘®üìw_ùõOKê)Ï Á:ÙÜýײ¬.‹i¥S¡üš£ãÔ¡æ–ž'½óÃǃZ–'ë\ž«ÕÞr»û/ɲ¯”h{ÄUDxÒ5 ”|-6®ëY’IqM5¼nd©TòcF-ËJaå¹?\×EµZJ9¬NÃÂOh¸Û¯¢(~":"ïÇ’%“H˲üLÍ€—ЫT*‘²:mØ{æ£0”Â:ˆóçÏ'ý8¦JgØÒükZüy@¡àéYÁ÷$u[WÇ"®‚hžÂý`ºãI;ÏËå wœîD8Žã­²c u$Aí@pï/üÂdÏ?㌬¬Þ³„[ù40Ív¼h¿—Q¯Eþ|: ñú+Û_tȸãtäüâÿ® Þ’Þö¶î.h›ù[@}#Z7‘$ouË0Àîj†tù'žö/m§™+›Í¤ŸþLÂcPƒýA’¤Øñ¨‹{2Œ°uÕ²ÚñÑüs§p–a&^²Œ~ðAH’äµÙ0†ÁðÓ(ñ¨¢èÝ˸™·‰©ÁÍN 3ubÌ#Çñó"Œã}@ãP,ýŹß[€Ø,«mD¡Vîî»¶¶†f`2Ôl6±¶¶ÀKÊ4ô+/0ˆÅò[»,(qàq©Á¾ÄóÇD-FíM5Ìjx¿{Š휑wfAå>ÌÁgR,¶÷á I'©ÎfùàýžiŒ÷ZÿûÉzŸÄcΟ‚py+,>òiþÎCÀ£†TOaÞC0 T^·éx¶[¼÷~ÓÌï ZüóHØãÀO~SúO„oá®Fõ%oƒiN˜Mô†g…‡±WêùK=(tªê£¦é-èéz÷;e`†½d—ïìx1ëü|£Œ•Š2Zh€¢L?I1Åb±gL/ÃÔžåWÓ4Š ${^(G*B4x˜Êºgh «aØÞÞÆM7Ý„ãÇãøñã¸é¦›`Û6666°°°ô#™ £º». >ñ+#½Èy¡Î~%Ia«ëÈ Ê¨Ë÷qœþ´¨Ø¿àŒÜ4½í×Ò4ofÏ{°`!É(—Ë(‹B'Ü-­ëG ×£ ®ëúå8€ (¬@:ë'rÙí·þ÷X$±¿üH'^ÞÔ¡àZÿ÷(ýÇûÃvZYÒ;ŽGHìÝ£0M šû/^ór á}\×+Å (ZDÝ+Æ|oÎæt,ž6é ñ/>Ü=fõZ`SUïCÒ)¯Ã†Jáì¿Åb8Â0¨êhVÒ`ú{"qLÓôkŸž²Ê³bG!Ërè]Ò OZV­VÇr“'ˆQá¢(Ž”¹˜<ù‡ÀÐ k6›ÅwÞ Ã0°´´„¥¥%œù!@’x>œÐí ÔD• «F…×nœº²°ðç¸l3æ…E¨j·0ôJ¦ií7¼`÷¤à5ÑF='o1³8ŽÃ0ÀÃââ¢o‘ê7Éç5K£5¹ALžà‹W÷ WôáïYÙo`–àF£f³Ù¥ŒŽRçj–à¤ù@=ꄞ1@8ú}€xQ÷ä%Qý«¤ìÅ aLך ù¶WRšàwQ/¥^+È]Ûz=»^/1IêŽ×вÒFשìP¿.lÛ†¦i( ¨V«ãŰŠâdŸ/O3îË%N‰£`y¤(ÅÞ)øúIHüöú†õ»÷B~Ù7ü}óù@2¤½ëG-0E%SÓ®kgÑîTN%)Æcæ E$ï#11…•¯ntf5­T<Ù^\ôä§Zþ­âXa²nø¢Hq¤Ã0|·]ÇqüZă$©«ï˜¦ Û¶‡+– 4lÎܪ*Ë2ÕR&ìPn»˜ä@ÈMë“8OPaí¥ò™u”BÌD<…C×G›Tu¶'n jÔ ,dKð`jµ$Iò d̤Ÿ­eµPÆyÉtÆhçóžL;ªã„ÝÐ#ž¿½®ÛÜ+ibÿËSP~ÏÕþ)¢*/DM†:+¢H üÓû#åœçµø3Ñ òXð…œ‘qœvíÓ¨º¾|\TUïß4‰"R‚ã8°mÛ_¤E1¶EJ–åÂjÛ6LÓœ™24¶íMU*•É8-p§£8 0OþMèÉÂËt‘Uu xR©]]Ãu=/!>9™²s Kp&“a8}ú4²Ù,¶¶¶püøq¬­­Á1p6ÈöövÏmõzFcª Ó4¡( Êå2,Ë ×§‹Ü¿w qÈ9©ø(ÂQžC’z»÷ò‘4*ŽuÐ,Z–G¿ï [qg AÇñ„¿9z¤Q¾c¾‚ª(Šÿ76QñãžgggüÉ|ç, JÆûÕ^`¿û~8w~*Ô´ÐùT.ûÙÐ9¢D-Îê}©„®:ņÑöÖŸD—Ÿi”ñQàn#¯Äëº÷ò–¤Áò+Š“WVGðà!ú3/²n]®°:Žƒb±]×Q.—gƪţ†â&/TÄÀuãç1Ívq„$˜Gw–e‘²€W Ãðæ6½„™×ë.—'3÷ÀP1¬²,ãäÉ“8}ú4–––°½½=–òÚh4°²²ù=OètìØ±ñV¼G€g‰Á½à kg%Àûz514^OÊÕË:,3ëAçä™}‡!Žo¿ëqwƒaÎ`,b¬À¿éfù.ÀL4s㸃÷»s¢ãºûøEaet„Ò;ÖïÞ ç®/ùŸ»BkƒtÃ㻎ÓõnoúN6Íöùpïúx—V+ŠíÊjS‡‚¯²Û¶¸ç†ç¸ærçÊÙ ËÉÄ=b¸B0 ‹agãz™†ø÷’´¿.l<_KÐ̓¼÷‘4Ë70áÄ2“&îo­ë}—°- пó›á/;—Ðc$ã’.ú8äãO ‚wAÆ€âÏ}·g{yózÁõ]ˆ]ƒ¾¢Ä ÙoÒ.ãýp]¦ibqq¹\îÞÀÝÓÀ0Æÿ‘øêŰ)à‹Åv&vblfY¶G—û×U«Õ°³³“LÙž/cDœO^€ðsÿv ’iÛíñ¼×åòùø6~®R)°ÿ^QïW™^ìï¼É¸aÈårÐuº®C„ôÎq¦MºvüÕæ|ìaÏòÖÇøÄ_)>ªþÂuÛ>ôƒp]è?YïZìϾéMCßÚP k§ëÀöö¶o]½þúë}·á8çÙÚÚŠÜÖh4°½½'N°¼¼¼o±¦iBUUFô頻a#·ëY÷gÙŽÓžÔŒ`-H¯sr…µ³ ý€cÁ}â-k8·bžð)!Ò,ß|r>ÑŒÁ‘h\yˆãúí8^Ÿê3±WUÀyèpøËÎe晴7Ÿ¨A®Xì¿=4¹à³ âÖÏ=wTErX—_î«8M\׻牅Ò/ß_yä¢ß²sÜDaÁ¬ýâ&J¥ž2Á›Ð5–FÉwÇ$Í…=g{ó†ïû|×I‚·X©ò_ÿ§ÈvkcÇ)OÙïýÊóó¤‰4Ëx?¸µT*…’ÊpWƞض¸–Ï{¹\w€q‘eO¶‡=W¯‚ÝÄH̪l c …B•J%«è$é¬y=Ìsø‡G ~ý€,‡ÆíN¡ýzêïL´-^úe°è?!×4@÷ÆÛ†%•`j;€ªbqñ£S{\ó$ã–eAQˆ¢Ûç Æâ—úãõ0!"þTÊ0PzÍG _ü?z*¬|zò¨æ )ƒãù;Wa|ü•Ñê‚ëúçvþþ^°Óu¿÷k(—=ˆ'»pÝu¸ö3Ÿê1ÄRX5MC6›ÅéÓ§ýï677ýïî¼óN¬®®NÄe _§h6›=·=òÈ#¸ï¾ûú‚gþüçŸzÉ«mc¹Ñ1jñÉqùûvÚ_Œ«à­O|¥×õ&T“RXûÍ´-kúyàyVÍaé“J¿ÑhàìÙ³xðÁ§Ûö×îE?ù€ .`{{{¬‡mÛ“_I祟VÅ—§‡!j+¨ª=K‹m«fWq]¸î^Œ†¬‚u¤ï¶,Àm]µ"£úSÿ»K¡å«æ¾®ÒçÅ´²öz4º.;)¶··ñðÃOö¤1Ir „mÛ]“™r¹ ×uû/ä‹íšÑÕª7&:Îö‚Ï–‡U9çÎKLaU¾oÂsæÌtÇ™ì¥Sžeš†¨vÆ@s¥¸ð€nÝÙçi/Cîè÷ü\^‹Œ\–eôôÅ›¤[’öbi,Œr{’œ9s&‘Éì>ˆ^ò_«Õz[™¢ÆTYžL&ë |•tKiJÝõ¦ER kÒã÷$˜eðØ õ bÍ×ð{9ìˆ_ü $Íës½Üx; \Q¥Ç- `þ†7¿²,H9åç/ooìX€åÕ¯‚}^Ӽ׿÷[¾tߘ¦ Y–g&õXDMhú¡(€aD–L’e€Ijd§(ÿø{YîJ Ev?éëò^.…ÄÃA}GÛR¯iž—ÂÏü̧ñ÷_Œ;o¸a¨G0Pa­×ëXZZòS~FààŠtpû8ô‹Íd2=·]yå•$)ÒÅ!.Œ1ȲìOF£V¾]*…뾓Ò`Y˜^«$ƒòšw¦ åËAxF(>¡ç±~Ás ãeÍåÉnxv°dii 7ß|3žóœçìûµG•o¸êª«°ººŠ¥¥¥‘®,g31:4û ¨R'Fn |iY€~ç ýc]å{ÏG–»›#üÜ¿Eu³Ei'4 â8ƒ U•JÁØžî-*O8PaÍf³¨×ë~¹šf³ Û¶»V ¹+0öQÉd2XZZ %p²mù)WV®¼÷ó¦åŠigyÏ( c{ÿ4Ùáƒn/W]‘výN¸?#‡K_¹©ÕÆŸ+Š×±æeØg’’o íA0UúYÝûùÒÚ6*’îG©¹“AhU/Ð)™aÃøóžØÎE$î¶ „³öN AèßUd¹o¸õL’¤Œ÷c$ë*AH«lÇ…'¨I”`µƒñn&:„)ðΤpžÊÎ×õøã0›¯ }§üòsÏ>Ô\ {Çó —ƒû¼—Ï~v×NĄ́ú7ßñËÿYH®‚AfAÆc0 cö³ŸeÙû+=·€ÅÅp(J‡ÂÊØžú|´·Çc¹ vñK»ô“¨ÈÓD(‡‚(vÛ":/Ñù™;qòÒ¬“&–º´´„••¬¯¯ceeÍfÓŸørV×u,--Mdfuu[[[~Ñâf³‰[o½uìóö#è.Ùkå+¸€ÁãYcѹLubž–¸—ªÓ%¸“^É:¸Óx¥ÏJ\´íîKTœ+1IÈ70¥ »m‡å*êü{uåúÊwT‰™ kµê‰_Haåuz °Û?ñø ¡ó,.†›•$xÄjD’’ñ~D%\"ˆaI£lÇטO¼ cÅ@ð¤wöÎ ÇÃ+ú•ö _ÞÿþÐw‚,uMgÄ?ÚSo÷_½ªêùnÆ;;`/ÿ‘·ÄKÜ8`ýÅ m-ù1®i&Í2tkŸùØUÃë{y3ÜßùèÊL1 v¬¸Ø6 ßømØïødß•ö^©f:=¿d>÷§¾°óR«œ~ª ß¿XŒ25 ³S©TÂúú:ªÕ*B ˜n¿ývlnnbiiièÕŽ¥¥%°ˆ§ÍfqÇwøVÝýˆùˆ*ßÙ4׿jໟt&êˆòvÝvÀ]/ä¨gËÝ„ùÈu\T}žþÂûÓ´‘ÓÄi’oîî>ÉWF£–¸÷Ðÿè¹_úƒpô€¨wT_ç¢hùR¡ ÀßÜ5Pv®uÜüœ|`žõÛý&M2>Çqâ[— o0ë"bdfI¶ã ë*wq4£åaL@{±^Ó`šž%¨ü³gÀî{XýÐŽŽÛ³"˜f‡g‹ëBþÍ|—•¡çãe«ç ® þt@(ñeðT½"¶øôŒ§nz‘çR“Ï{ÏbÈ,©“fe¼P( T*¥ß[¦×\žãºžàt*›² ¶gè2ŠñEIòR+´J0žôz@zfÏËô2¬quƒwEU°BmîL¹S,F{ƒ žóÜ„ÓüYb)¬\IbyyËËË÷o_XXØ·Nâýp årÙ/[ÓK·T¹£ËØTŠác{)‰Q<%¹DNâeÔyãTÛo*ì§|SŒ_ß|›÷Qç ‡ ü—o ( «þº/B½í"ˆ7^Û= îÀºç#¡Pë^ɸՖD}²ì·ŒwÒãÛÃ@×»ý¡"@Ò²= –e¡’dî ¾taëœ4qø„OÆt$0&£ôË_‡ôKÿÂþœoG_ªk’^ô’!Y‚ %péž‘V’÷ïwýÐ@I‚§†7kïhs¿‚ ¦é]Ço“ xÏ NªùI£ŒëºY–“_x1 OpzµÃ¶Ûš\/Áˆ0Ýs™ì¹¦È$© ð¸Aþܯôl&¯<%ã<'+³’@ˆhÏ ¥]±sÚqÙ±ê°^Œêúú:Ö××C¥k2™Lꂱ‡Á›K·ð裙ÀgoÛ0}¢#Ü®ÿŽpÓOß<Ñð‚ñ€7iŸÄkØ63ã8Ó‰_3±¦ÞíaÂd»ßéX¦»ðÞþö¾ñI_õ¿Ž V}мFO»šô;˜,º®ÃØ›øÆN¸ÄƒÎÈÔNÌ©p؉mÃy_Ýëƒ|æ^­{äk?¨*į·Èȳ¬îYhùûÅAB–Qúö/AQFÐe•»÷R ïz]ÃɰžoÇeYéÈ ¼Wg´'<ãc?×oÃèZYé,µW(tLÇ÷jî9{Î/½³¯+0wéí5=ã廽ùÔ­·v *wŠë—’«“ÎÿE,…•Ç®nnnbssÓgžò”óxà a½/vŒ*†2î\á .ñu¸@dØýãÂ¥4å+€D|lÛž~Â%ïBÝÚdŒZ1âe_±^²w\åIo#”}½•Õ`Ô(—`b>á Æc°m;žu•áìÇ›— ö‘‰¸wÍž‡¤ÓÅ¥#A’¿Û[ÖQxôÝá×ÈÞÀí8Å…Dé †˜ô;Ž÷Å^ŸVÕvIûΕ!ö®'‰{Ú-7EE¼@¢*Šé̶JŒ/a“ЏÕÁ!sªÚ]è½}3XÜýgØNxÒÒ™£µWaã–O€½ö·½ª&½.3(ªEUÛÓ³¨ýxžÌÎê…(“OI,…ussËËË`Œ1†ååelnn&V˜~’È2°¼ü[øþïÿ÷:ñ°í®Z\ýŒIۢʤFÒ9™Šóë¥8v$¥™¼Þ÷M fw22Aìßù(Ä«/èQ/1ˆ/½*ôUT±¨ùßgP8 1_pøR©Ã0â%\Ú ‚Øg\×…išã+¬eczUf£ó=Ôc®Ã¾ÿ (m,øåÀ¦®I1/ Ä4÷Þ…B;eéÞ‹€O¬ #FieMk·Ù²"'t¼r 9¥MÛ¶á8Ôa¬HӦˬãº)¶?q]8ïü+Ï|òÀ<9A'Ê Â±„ò»^SAˆ^0‰ÊÕNÓìýÊ㞣qêÓïVîþ{âÄ ÿ;^˜dÏ:Œ1ll,xuùo(ü‡Ã¡ýTµ÷Öiñ@„3x$“¤°>Ê%·_:ñiXA{õb&Ù7ë*'(;ƒ–æ°íï@¼êë¢ßú/}Û+ *åzÔ»Œw§¨”ìÄü“èñVÓ4û[X¹‹ 1G0ÆËå ªêxîÀ|ÅoÂÊÍ—Á¢õœ¨Yo§,ŠPNÿ$d¹[Ÿ:¼ÓÅ›)ð´SYîZÉäJî@݇k·ü~"ÆþHi]ºpëjªˆê{Âè[ù£Êè:Ä_øq”þÛE]ÓìÎ…õ^ÖRGòàN§ È·¶Äz­eq›Ø "%ûEìÖ`œêÂÂBÒ힦 Ôë—BE&á#]ù¥Øç )»{ŸÜ?|úòN­xPýUQìí0*¼#Ò²á\0Õú“‹)¤¢—Å;äÕ}öõ~ùÇýÃ;qûWAxÍË».\Ù½æÕC%æ‹`™2îFÖ³‹ž ‘+01G˜¦é—ÿ{²Ï‘E)¢{8à¼ëoÚã{çü!*ÏFǤ?øjè\»—åîõ¤ÎÒÞ¾uh×Œï –u5ü {j·|HIÃd~^áñ×ûºè‡N Ô„ç91ìçþB×|Ç„ GÑ"=â;Öz9\F9+„®aƳˆrOÐ^s#^Ç8 ÖU`…u^ùÇü2sè \Æ^Éç“ _D”Ò/ÈØŒm°®¯Oî´K+ïp†Ho !h7XÿþýPŸþãÐ5L…é—„ ªÓ™SO–ƒÎŒUUqÜo|D©äýDå®LºH@ÐÚݽ?þÒÉÞxEq]…BÕju:ÎJѤû¤›&`îÿ•ߦK«îÕñîô^`ší·›aú?v*ý–5¸yÞáá ö¤@µ‡.¤(³©Ê"-¦’Î>,ý…ÎÁ¤kÁ4áþã·øê½¦Ö£/hdÇ –æ«øAÙé›ê4Z·¶ºeNêX-ËSÛ†Þ n–$½Á¢âË=óë[Þò>|ÅWüïîƒÂ1†áyæOéì§ë€ú¬†+ë£D]{ øòùÙ´7-—™·"Œ5ƒU%}”ó§>£þ–°líí¢¼é3U …GâHï„OßJ¥Z÷‹~K WYMæžOfŠeµRògÕ4àGôÏðâß\ôéE’B¡€T*5½5ÁÜ׃ն=Vƒ²öw­çSV$÷‚B–Õ~«è¼M r:;šl¤‰U®Û7«ëôsÍš¹Ì6ÍÖO¬ú=äÞû¦ÖŠÒ®x˜& À ß™’ÏûkB& AÑêU'lU#åMꜢxæH¡à½ÿ4³§§À9 CÍa€®ÇŠÅ"~øá¶Çö÷÷}M#aÛ6¾ê«:¤Äò¦ö&“íÂÕ6cÈ0Ú:þJéF[Šˆu£û;7üAù)ƒR‚ éÃÐã<:âîoüìs 4¿]¾gEA&Ój ÔµÁÊ\=]‡e+0Ívža´<×ê—<úO¼%ôóÅûgšý}+ªÊ”­³„eYýS {Í~$3e@°*Ûö±ÃÃneîÎ;ÿ·ßþÙE_Väp]¦i¢Z­Nç ƒNsÙ»;Èd€Ã×\Aú»^Ùz^ÇÈÃðÇ©ú~~];¤A·d!‹Ó1lŸfOÝ®éÕ!Ñf‡d‡Í¼3°„æe(iˆ ¿xìæ}hA¹Ïô%yÈw¸”Ÿ¶,N´ËN¯ø@PæÛRÞ¨ª×¸x¦‚4?‹‚cf Ázîܹ¶†KB<_ô¹O…_ø…¯@:ò—PU W:JHôɶÛgÙ6 õÊ‘Çû¥»n+ö•x¯axcLooGÒéž½ù‚{wçD@Az#ѯ¿z˜¦9Ÿléa ¥~! …ÛxôoynW&ȃùŽ—+QlÛû Þv¤$|Ù•SëeÌJþ£¤? 4Xc±˜ßxÙ ó|âçº#P2¬Z»˺ËN¹yú3¹BÞ_Èå€2z´µs]/ÇÉqz+M’n¥xôƒ]žHÛö"¨"÷uHâ%TUx®;ÑdHoaë0Êe*µÄöíÞ«h4VçFØx…tÚ»nmyŽ^IFÕ*´Q‘5SM¡ììô®ëmzùð3HÇßšÖ]âl®÷½Ÿl{Û`ä¦G¦?'Ò0ú7`ê'Gd91Ms~éÀ²ÑÈgudGº¿ú{€¦…gr™&Ì{¾;Tþ§å°ËëÌ èå —a g=)hhƒÕ²,\¸p;;;888ðvvvpáÂ…ÈÏd•Ð{p3µ, Ÿýl¬ÿ ?ýéöß;£«a=¦(py·% g¬”àÎ5Äã˜OÙP¿ò¥Èj[@XjWP‰éRL²Ù‰sYh¬¡¯Á*¡xæÎ ]÷hšç¯ Ž%‘ÈkX€ƒ ‡eYÓoPÓc)pª}ñ_¶YŸþ~êIt]Àüµ;Bפ‰÷J—ÚÕj–O”Foé0·æy^Ç’?ÿ@*E I‘µmþì›BI)oêW*ûc¿@r¹Ü;úz–Ê`­T*Èf³8wî …lÛö …Î;‡‹/F:ÚjÛÝ!uÛ¶qÏ=Oö~Ñ‹^|ä#íu¦ñšfhÚ®rÏ ¦Ûª‹1+%8èö éèë{ºÓ鮹©ŽÓí§ 6ÅèQBÈĸý4ZÓØñ“LŸASׂMÄ%¡ˆ¢ñqgú3·û±™&ýï_ÞöXÇhUo\Í­/Õ¶¥5â5 15}9ë‘§Ud&Η0l»]ié0XíŸú}¤ÏÐÿ]¬úç©ç¡ý“ç÷”áããþ=,ûŒ6nƒ™! Ö½½=躎ýýý.Gðñ½½½E_OO¦ø®‹µµµî'‹Fþ¼çuËçÛ%'•j›±äóœçÐBÊØÊKgH¤cßÚ\Ut½k˜{˜bœ•lLFÈ4é]Íå˜ o{{õz=²©Á( C}ôøóÎ;»Ÿ|*9ù×ýOè/òjH{ÖOHÇ<´—ý²h¦æm¸ÌÇ“ó^ÊC¯²mÙÐ{uz$dR,Ë á$^:ÇFüU=¦š´!÷YFJçËÄë0ažÂ>NQ<#Tt¦T Pÿá‹Gzö뢓ó¬13çK'½R¾N½ðޏšÞ•9¾¶íšƒš!…eÛ„„ƒh°Öj5Äb1Äbýk=eÌM£ÑXô5…"©GAcò£ý(þã|Nûƒ®¼À ú”qø¨`¤‰idÑôTàGE"¬® UíÎËåZÍ”z­YT<ȬèaUæ NˆizŠ™4L¼[ãúzwÍi.ç팈͗‰kþ¶¶ZaÑŸÕßqágÞÈ, „¡³çY}úfË I[ª­ë†ÇkZß”÷d2Ü#Y‡‡ƒuý0)ˆ\&e|0ÇÚ¬­­áÖ­[‹>Ï©t²;Žƒ/ú¢w+СHöŸ®Áþày¤1ÀAêv¶_¹ í[^UQNÓ]B‹ëº#¬ÒS¬íeªê=(m¶»ƒ4[êe8Žw¿`V&™Žã„Ë;‡4ŽD&ƒ¶æ"Ò(¼síªªWsÌ>’Ra:lç‹ÔoOä ,•¼=~ˆl„¡;–&“¡÷‹Q ¯éì1”àBá´mÁ[>õß¼Õ{PU‡Þœr¹ÖŒß0¦é|K­'Ý Œ°Æãq4 Ôëõ¾Ï“†K¡5¡ Fç::Žƒõõd÷ÄŽ€7Ð}Á=p>ü7N_káù¾©P­"W{0M¸Ÿ~Ó‚ÉÂqg4/¥ëÂü¿þ,üX>T«H>ï}po»«í¤Çˆ]Û FWɬ˜kÍÓ Ñ9rjc]0]·w„@FP­ºU:¥æÏ8ÑU#c#÷-Ñz@BAÁBe™ßÁáá)Œbü2ß‘ É4»+ `Uîl’”t°È<áy•5°|b8†2Xãñ8²ÙlÏtßF£b±ˆD"á§G Ñe$+À²,4›÷u¬Ü_ý_Üü/~€S/s鸯D»ƒ“?„ùäëÂ2WœQ½&¶ çSwõLƒç‰çCyõ‹ÚÓ´VJp/úE_ ™”¹Õ<-!†á¥î†aYž‘*?étË8•øš6\PB²ë)˜?ãÈ¿a*°ÿ¼ÝùˆTÊ‚LÆš°9e£ârÄži¬ùüiUßß½²å8Ù`´f2,kˆ"Cu ¾téêõ:.^¼ˆ££#?ÚZ¯×qtt„‹/¢^¯cwwwÑ×Цµ<b º®‹õõ»º÷ÑN%ÿô&à8€åô߸ƒ7kêNd‘ôLì÷šò1Ô—4N£çì°·”.Ž[[‹¾jrV™É8%Çu[e‰U¯w _w*õ‡2J¦\ö‚A§’®{k?GT]g*ð¢uˆãÊgž€óÉŒ¸tÚˆããÐ὜¡ˆ7“!Ú`-<Ë2 Iö—O6ÛJ%éáЗ‡¹—E5¬€eÝßßG±X ]“H$°»»Ùèj^Û¶Q*Ý×ý„€7ж½›}>à‰' «ÃE¬I"‹fÞþð Úœ;]ÇíÞ]!¥{0!‹À²,”ÂrQ76ZÖÚ¢PhÕ׳ªzvH'ŠÒ½¶åL~uKG›sRœ{¿ò;¿ŒÒ[¯!óɯêÿæïÓoždO:ç\ÒÉ 8;Þ¶[V©ë¶…B-ËûI§{øK¤…µªúÆiÐoÔe¢ÊP+Ð2ZetM­VóÓ…u^4ëë­›tpÿ•mk#àŽ zgô»ÿHÞÖ÷sr9o_æÞLÍ8‚í?|6´üáÑÔ~]€™òKEÏæb+Ù‘KëÔë$xpÚV)mgÃ0Z)Ã÷>t \ÿ}(›Ê#*æýæI†Ò1Û~ ]•Ô‘3òLH]ÿaÀy[xJX  _2Ùí˜ë×Ñ—,–¡R‚ƒÄb1躎ÝÝ]èºî«FÃo¼%M}lÛ†ªª(BÒÒ|S>†û¼»‡ú¼\Ž{4Y,ãDX¿ºê—ÝÀ‹ÐtN8èg°–ˬ]#‹¡gÃ%ËZ9ï¡ãxYp­{—i¶î7Šâ­EÖ_,ËBZ¼…Ž®qËŒŸyÖÀµ‘˵fMžn3Ö’bJ2$C¥¶Kû^Mó~:6:ëü5R/û_!QÕð²†BÁ{›afK“h0²ÁÚ‹Z­†E_OJ¶¢GG…zšÐ¥dw†‰þìÏË‚ñ;÷¾ý«û~–Dd';FÈÄŒÜ5Õu‘Rm­èz÷&ì²MHTè™MÐÏò„är^D@×½Œ¡ áP( 4½¬6†a@Ó´V e˜qhY^Dê´ã¤ñOCý{Ï¥-³²ó%²|ä^`šc¤J2匌À@G»ãxÂØ# êþù§ ~êým­Ê{5‚s¯\"›õd[êûé_‰.S3X£J§ñèÕðÜðÓ†º¹;ðñ_üÅ}Ÿ\g+¤'‘%cœù«°m¤Þü´ÿ«®wGX{dà²Pz*9+R;ŒrU«ÝFC*Å ‡³ŠišÈJ‘r/M&ã Èéóò_zú›?€§uîóòÑùóyïmGNX+B†À4ÍÁNö!~ëm? íü³Ú6CU  •J­’Àr¹Õ‡ƒûhtYyƒ50¥€·w~üã¿:¼Bç€ãÀ}â3À ^°èË!d ’ò> εOÂzê+üߥ Šã™HÔék°®@ª‹4þcš/ bŸ¦À¨­áòÝÚ¶ax ~`}¨w}Ê·s¯Ëåc|þç¿@H*¼iviåúƒ¯ކû,é/@ƒ• bZò†7c¸òxËÛØyo蕉£iAÆc–2.´Í  ¢(‘ÖJ$ø%kRQ¼5˜N÷ž•J¢Å<ä»)á3M³Û)ÙÙ]æð°í÷^M“Ä`M§=yä JÌW¾MÓœ O·Öõ=͘t{”q4¾-L­†ummmª‚-#rŠÅ"ŠÅâÔFæ8Ž…¿ÿ÷ÿ@—i7 ®;Rt©ÓH&¤“YÉw?\×ÛèÃZ¼‡=&ÐX%ã0ïé™Kˆ†á¥¥e³žaZ.{¶©åbÞ{¸DÞ½ñ2!ã?:×AG¨´—Át²§Ó4X‰Ç<å;ÔÓ‰m#“i/¹ Ž£ Ž÷êÄq¼×6Ê&KÊ@ƒ5cèŸi"ùóF;;;8:êê|æ™gð±}¬kQuÎWj6{oÈ!ŧÒAoØú ÎsŠ>õzׯ_ÇSO=µÐóE¾àé§ŸF¥RA½^ïù˲z¦Ö¤Ržw9T¹éfHÅR©TpóæÍEŸÆTöð~Œ<¾)¸®·—ì´#E­VÃõë×qëÖ­…žÇ(ò=ÌþÝQÆßûÞO÷”{™ x~Åí7òHQZ =ûoDÙ¿§¹8*³Þ¿VÓZ{ø –ÅXíyÏ!c±¹¹‰oø†oÀ+^ñŠ…|þ8ò=ÌþÝRÉÓG>úÑzëÖV[d§ý*5Àðµ©LŽ²ãØ˜óÚ¿ÏÉ.õ«–ÈR”²ŽL°,XÏySh³ÈTÊs¶ô+k’uAùŽ¢ƒºGrk¥RéJ=Ðñ¨Þ¦à.Ñ'M 1f{H»¤Ê +èé4=”¤?Ó”ïN‚›'²TpjŸ¶¶PxËï/{e–L•YʸÐÓ3,¼6Ã4[§ M”¨,­óï0<…Ý𜒥RKÈ’IXVï4óŽþK]ˆâÏ~˜¯|uMëPųYO(s98Ïÿ’Ð(j6Ûjb× UõÊ0(ßËM$ Öµµ5ìíí¡V«ù #ý>†ÑžÜ7]ÒuCw{Ãðа©2¯’^LK¾G%X»ZýÇÁþ–ƒñ«1 }FȨÌCÆÇ ¯{ › ?dzÖ×[Ý\Éj2ï=<—¬w¦ibccË{P<ï¹à8uøíl0LÎ6ó’o7P4í8@áß}e½£3R>¤Ó°>ø’P‡ŒŒß$ûì°üDrk<Çöö6.^¼ˆD"““@aÄ_ŽÓž&ã8t]‡m£{ø°¢Leǃ•Ê éŴ仓~ —Ú8ÈWÐJ8<¤×‘LŸYÉxÙÏCÌU;‘î“2œžc VŸyÈwÛ²×wñ_žÕÀ=ñ“­YÚÙ,°±h, c;i¬’ ó’ï`tÕ4åƒW»;ž¤(@>l¥¾}6ˆ¤Á x9ü›››þB§î#¼áRÚO;hrMë«ä$“WC¦Ç4仓ž'x™¾ˆŸÎÔ8d0™!³ñ ¶m#Ö4`΃°¥Ñg§ž-f-ßAʇ.°ñ;øÙì¹ÖEá<ö0€D¦É<ä;XÖaÛ@öÏ~Îÿ+Œ\wV#Ë–ÈÐk°É’tŸ k¼4ÍÑ6±Xl¢¢s×m7JÇí() É0ÐSI†eRùî¤gÄ ÞÍ M6¹û“90mÒ7%x†.w©OÍf½eÄF{g—YÊw†ú[ÞíãïÞº5 ªþ‹EdÕ˜µ|K ìßû,´ü\à+^óŸ·¬¢¿0sål3Ð`=wî¶··»Ÿ¥7qZtÎcUU‡7BU­ô›!H¥h´’Åàö¢×¦Ã³h•,9®ë¶œ¦ÙÞhÀqfªÙè:Ó~ÉüH&ÒÇ¿øUw÷tH²ŒHÉ4}íš_ˆªëí}2f8’! Vio½ŒõwÛ¶}o|:íé8Ãìý£Ÿ¼ŸEÑ/Â:ç,IBfJ[½¶tJM§½ »G½I†J¬ù&óÄq·þ Ô/}.ÊÇÇ(½ýí¼Æ^Áô ωBȲÌx´m@¿q¤Š¼í[ V™ÒÁ}—D²Kð´›mù0z„\§Ô‹‰™ãô°J]—2LV ¿Y‡axZM6ÛÚçÜ!8—óF^2‹žÌÃÒ_øë€®·)øâ—BÊ=Y‚NGóài¤Þú9ÿX*嬒,ò ¬¸ÁÌw[VÛ¼msû4óóÈrÐË!3JJ;!Ë€eYžÁjYž6“Jyi3£ Í€ëz‘+ÀKGcö ™7¦ ¤>ñSøåÏÿü¶ì™à¼JMó"ÿ„,ÁŒ°ã·~wÛ¾-ÑÔQúÇÕgè¦KµZ ØÞÞF<ï9>Ã^Àü»^tž¢(ôº¢À÷Ñè™NI¢N0å½ûØé  PhMˆ'dIq]·%ëAM]ÜòSªÑN&½¨*#«dºÿ3PþW¿ýz›Áªª­å“,#–eµº¼+J—G5«¤“¡"¬–eáâÅ‹¨ÕjX[[óßÝÝÅ¥K—H$°¶¶†«W¯.úz|NGMúضÝfd·Õ{ôiVcYž—³³Å6!Q¢_Ê»t3…버,==ç çóSsÆäržþDc€, ÓÒ=(•Z§hšç$d¹Q¼,–[Q¨®v†2X÷öö ë:yä‘¶×ñx›››ØßßÇùóçñðÃ/úz| £½”©ßŒÊ~E L#Ë@0å= E‹ÈJ6? ŽÓJ9“ôß­-ï÷|žNJ²X4í´VõtÏê/šæ9ÝéP!ˈišÐu}èæ§„C¬•JF=ôPßç=ôÐC888@½^_ô5ðAÐÙ>pþjWN*5õ>„L0·¬@y¶ãÐ`%KOg´ilÛKù•å (žÀZ@Äa"Ê}'ìK@–ÉxL§é$Ã3Ð`­Õj¡Ãƒ% Xã'''‹¾&žÁ*ʈeY½£«ÀÀ"UÇé›5Lȱm;Ô`•&ªh¬’¥Ç¯_ÍåÆn.`Û^$õðÞ}M:Ë™8•¬Óp:’³Ç@ƒumm ·nÝêz|ñx|Ñçß“`DÔ¯ïëÑ Ž3°é½™$Êôªa-•N•r¿ó!Ë‹_¿jYc9`‚Æ*÷tEdæ/\p*÷d¥pO£?¹œ{»ÈôE24 Öx<ŽF£J¥Ò÷yr<u]$AÛÔ¶m|õg?Ûß#ßG™§®O–_n™LVÛ¶±u×]cY›® d24VIôI§þø~€Æ*Y),ËÂ?ù'÷òí}ÿ‰z š¡ Öx<Žb±ˆF£úœF£b±ˆD"™¨kg„õ ï¼Ó[¦ÙiÍfûj0L&Q¦mÌÇ)ŽÓj € —ÈJ`Yî»qc,‹Ó¶½~4VI”QÕSƒÕqðsŸøÒì¬DVÛ¶ñÉOn"ý–º·3D†d¨.Á—.]B½^ÇÅ‹qpp€J¥âÿ‹E¼õ­oE½^ÇîîÇ'¨”8Žƒ×4ÞƒÁó€þÙTpH”éÙx#ÚóªJA&Kï˜±í±ŠOuÝóMeÄÙþ¿ßó˜ŽÓsæ=!ˈiš¸~ýËzþ¯P'!#ñìažÇqåÊ X,vßÜÜÄöövWc¦EѵmÏSÕ–Q*F+=—dp:RÝ]×+ñ+•RÖÉ’ã;fLsèlÇñœ7ŠÂn”$zH&ŒÌ…·mO^óžxþó‘ºpaѧHÈÔpwßýUøÌgõ7ÿo¶e'#1”Á x]€/]º„ÝÝ]Ôj5ÿñx<™ºU¡3ûQÓ4ïÎ ^ùtÚ›iJ •ŽP./úŠéã8muN…íS²zX–åõ"1ºªi\$º£þ–Õj’÷¿þú¯™LV ˲ðìg+R/øŸÀs–)‘‘Ú`ÖÖÖH$}Þ;Ô²,¯{j°O¼ªzw‰ ÏãpáåÅíH)0Í–ÇžUÁ¶mäÖ×û¬®ë9llÛs4úõ€„DUm©ŽÓÊŒùëoÿI¼÷9ÏÁ7S/!+Ä'ã7 ¿ÿ>¤¾í9ÀÿÅè*¡jX— ýéº.^ñüçw¤é´g¬nmy­# YR‚V™»Ê>dÕp/zÑ‹º V×õúè%“ž`V YÄ.©Áލޅz½ŽW~Çw,úô™*¿øþ÷ãÿ{õŸBýé·SI!#3r„uдV-·mÛÐï¾xÞóŸX­² 0Yj‚5¬ª€­ˆãÈ’â8Ž—)RˆšÉx[y©ÄD²<˜f+X2\€aà‡>ûYü$SÈ aÛ6îþ{ÿ ®ª¦*‡•4Xƒ8Ž¥ßØš‚ ‰:j@K×ÍÌid@ÞýÂ(B–˲B»¥Êxavÿ%ËFg³k±OM3…ç+Jר2B–™‡þ®^ý1¸.Un2+™$lä!«‚mۈž™Ìif{*åý§PhEÙ6ïd©él,&ôºyL3Ühš&R©Ô¢O‰q]¯Tþò/úÐǘCÆf¥ V?ŒÅ4M|ù—¹Ÿ ]÷ øÇ3\%ºJ§ YVlû_ÿ+tÖ0!ˈdtúÇééœ!dÙ0ÍSÕ£PÀ§Þÿ›ÔÇÉD¬tJp¯42BVÛ¶Q*¥Û½–ŠÒ1„•%&“Á_>ùd[Фax¾¦“e¤WtÕ0 ޲!+ƒa‡ÿ÷M<ý¦ŸÀkßúÖEŸYrV:ÂjÛ¶ç©4ŒEŸ !SÇu]¼ï}ž×Ò¶}:„LÛÆ'}Ó›ÚÖuΜ'ËKgý*r9À0˜LVË:Ûô¿€ê«_ͬ21+m°Z–]QÚg°²"†W¿ú†§ý0?’¬"†¾ä%~*™axuQa锄, mU§½~æ¤R)6["+iž6+ðƒõ: V21+k°úõ«R,BÈŠñó?¿†7½éÓ½óËYv, G/y âñ· ™¤ï‘¬mY¿…N㇊E¦“•@ú=êVŸJ&ñœ×¾–Ž21‘7XkµêõúȯóëWm›+‰,ãÊ7üÉŸÜïÿþ{=ƒ•id$¢Œ+ãîzïþ{áðð>ü›ó¥H§½4`ê=$JL²‡Ã²Ç •J±) ‰ãÈ·ëÙïø$`šøå{ïet•L…È6]ª×ëÈf³¨Õj]×Q!íÑq¼é+¿ø‘a9&•ïŸú©Oà¥/ý#(Ƈ¼è*µx1&‘qÛr?üeøÜK>Š7¼!†ÿçÿ¡ˆ“h1é×õ:¹—Ë0¶¶P.—}I„øL"ßšh… ›EÑ0P¢N¦@d#¬{{{ˆÅb°mW¯^E­VC±Xúõ–eáu>Ê‘$’L*߇‡7ðÆ/ú /ºÊA”$‚L"ãšâàW^ñNþö{ñ®w§±J"Ǥ{8¶¶€l9À®ëL™$‘b"ù¶,Àu‘;ÑÄidDÒ`­×ë¨T*ØÞÞ¬­­asssh¤ëºÐ]×sÓS™'cRù€O}¼†Ÿzò?{òME‡DŒ‰eܶñðË_Ž,çÖ2±|çr€ªÂÑu˜¦I9'‘bbùÖ4üî·;,ËBž:8™‘4XONNñxÜ,Gÿدü Þ~ãÆÒ§-ú¦Šl‚gIå~øY—pû… KÝliåaÕÖì¸L*㎦¡à8Kß„fÕäa×ì8L¼‡k~ùý#$“Iäóù¥Œ®®¢,¬Úz—IåÛðíïxÇÒ§¯š<,ûš¤ÁÚoQ4¯ÿªöÏp—i.}³%kÅZbžœœàÚµk‹>…3©|À[‹Å¥ÏXEyXµ5;.“ʸmÛ+á™_5yXÅ5;“ÊwζñS?÷s(—ËK;wueaÕÖë¸L*ß®ë"ŸÏ/}*ðªÉò¯ÙH6]ê· nݺ…µµµ®Çÿøÿ¿ð ¿€_ú¥_½÷Þë=¸äÞ‘Ç;;;‹>©ñôÓOãé§Ÿö‹øÅ“O>‰¿ú«¿Â _øBìîîÎýóÇ‘oÀk$ö†7¼wÝu^úÒ—.½|GE¦ITÖìõë×ñ™Ï|faŸ?­=üêÕ« »†iy˜QY³?þ8^õªW-äóÇ‘ïàþý¼ç=¯yÍkðþÃXÈùOƒ¨ÈÂ4‰Êz•ýû×~í×ðæ7¿yîŸÏýÛ#*ò0-¢²fEuÿޤÁLCè$‹…>þßùøÎïüÎEŸ:!G¾à×ý×}ê„ ÷p²ÊŒ#ßܿɲÀý›D‘H¦Ÿ;w@{ZB½^ï«Ì²,P¾ÉªC'« 囬2”oE"i°Æb1$‰¶‚g˲L&}j„L 囬:”q²ÊP¾É*Cù&Qä¶f³Ù\ôI„Q«Õ°³³ƒX,†F£µµ5ìïï÷¬ï#d™ |“U‡2NVÊ7Ye(ß$jDÖ`¼Âo)N$‹>B¦ 囬:”q²ÊP¾É*Cù&Q"Ò+!„B!„³K$kX !„B!„g½ãïxÇ¢ObÕ©T*¸í¶Ûzæþ×j5|îsŸ =ÞïØ0ÇgA£Ñ€ã8ø‚/ø‚±Î)Š×DÆgùžÆñYÐOÆ—ñzÈdô“ñe”îáD8kû÷ sŠâõÉàþýkŠ&™ïz×»š¯ýë›÷Ýw_ó¾ûîk>øàƒÍ›7oúÇONNšo{ÛÛüãoûÛ‡:6ÌñYpóæÍæÛßþvÿ3ßö¶·5{챩ó"®‰ŒÏ$ò=ã³ ŸŒ/ãõÉè'ãË(ÜÉpÖöïAçÅë!“Áý;ú×4 L žFÅb»»»°mW¯^õööö‹ÅüãµZÍ?ÞïØ0ÇgA±XD½^ÇÕ«WaÛ6b±†>§(^Iå{ÇgA?_Æë!ã3HÆ—Q¸‡àlî߃Î)Š×CƇû÷r\ÓH,Úb^U}ôÑæ}÷Ý×öØåË—›>ø`³Ùô<÷Ýw_›wägög›÷ßßcƒ^;+nÞ¼Ùõ™'''ÍË—/uNQ¼&2>“È÷4ŽÏ‚~2¾Œ×C&£ŸŒ/£ÖÖÖúôÚY!Ÿ)^ªQ®gÇI´˜D¾§q|ô“ñe¼2ýd|å{8ÎÚþ=蜢x=d2¸GÿšFëŒ ¶L«“h4¾¨óx¿cß“žS¯‰ŒÏ$ò=ã³`–k’ò½|ô“ñe”îáD8kû÷ ãQ¼2Ü¿£MÃBƒ•B!„BH$a +!„B!„HBƒ•B!„BH$¡ÁJ!„B!$’Ð`%„B!„Ih°B!„B‰$4X !„B!„D¬„B!„B" VB!„B!‘„+!„B!„HBƒ•B!„BH$¡ÁJ!„B!$’Ð`%„B!„Ih°B!„B‰$4X !„B!„D¬„B!„B" VBȰ, ®ë.ú4™;Ó”}×uaYÖ¢/‰B™ 4X ! #™L¶íEŸ!sgš²oÛ6’Éä¢/‰œa Àã8‹> Bæå}þÐ`%„B!ccš&xrf ¼ÏŸg/úˆGÐ[“N§¡ªj×1UU¡ë:,ËB:ॕI*X*•‚¦iþër¹²Ù, ÀëºÐ4 ©TÊ?EQÍfÇ~>!ÃÒO¾ P(ÀuÝP›–ŒS¾É"˜¥ìçóy ÜvÛmm¯s]…B¡ë5„LJ˜<Ëc¦it]o“OMÓüǸ_“eb\y×4Í—]EQN§¡( Êö¸0Â2™ …Àqlllø $—ËùÇLÓÄÖÖ–¿H …¶¶¶x ÊÆÆ ÃðßWŽ»® ×u±µµÕ–†V(ËåÆ~>!ÃÐO¾å¸Ôòõ’I`r§|“y3kÙÏd20MO<ñ„ÿ¸ëºH&“¾"DÈ´$ÏA‚ò)Ïá~M–‰qåýCúÖ××}g£išm%”í1i’…R­V›šÇÇÇþcº®7³Ùlóøø¸ë˜ªªM]×ýcÕjÕ?V*•šŠ¢ø¿h–J%ÿwMÓšétÚÿ=›Í6u]ûù„ ¢Ÿ|7›žÌÉÿ›MO†EƦ-ã”o2Oæ!ûòúr¹ÜмqãF—\2 ɳ®ëÍr¹ìë”oyŒû5Y&‘wÙ…7nøz»<—²=:L ^0¦iB×õ¶4±r¹ Àó²tK¥R°m–eAUÕ¶îò»mÛ¾g=øZEQü”„^Œú|BúÑO¾]×ýÿŸ7 §|“y1Ù¾h5rÊçó‹¾|²b #ÏtÊ'Àýš,“È»¼&“É •JA×õ®×R¶G‡k„é7òÀq¿N)HØ ‚e„2NÎ*ãÊ¾Ô f2/ú2!äÌ¡ª*Êå²_Æçº.Òé4J¥Ò¢Om©¡Á: S˲üÂë^F«¦iP¥ÍkÓé}'$ ô’oiÖ Ê8Yvæ-û¥R ®ëÂ4M¿ !Ób\y&dWÞE/•J(•JþØ1MÓ¸V&€M—L:öS|oH#Ž`ú¯“¦º®·ý´Š¾ ‰ ýä{”q²Ì,JöEñ»Urì™ÃÈó0²MÈ20‰¼#«€qeÊïä0º`TUE©TB2™„®ë¾gFZ`wEF”’\.ç+5Žã ̱'džô“ïAPÆÉ2³HÙO¥RH¥RÈd2\/d* ’gEQËåüôGB–™Iä=NÃ4M¬¯¯CÓ4?3&•J-ú²–šÛšÍfsÑ'AY%zÉsP_éœ5LȲ2‰¼KtVQ–1M¬Æq¬¯¯£Z­BÓ48Žƒd2‰l6K&!„B!dåaJp„ ¦$Hp:¦±J!„B90ÂJ!„B!$’°K0!„B!„H²2)Á¦iâ¿þ×ÿŠ{î¹gѧ25®_¿Ž{ï½wѧ15nÞ¼‰F£‰¿ÑÍ›7ñœç<‹>•¡¹ÿþû#ñÝM‹(ÉôˆÒš½~ý:~ë·~kѧ14ÜãO”ÖìÍ›7¡( ~ê§~jѧ2Ü¿£O”Öëõë×ñŸÿóF<_ô© ÷ïè¥5;Îþ½2ë~áâo|#vww}*Scggûûû‹>©Q©TpíÚµHüä\–‰{òq¢´fwvv} #Á=<úDiÍ.ÛÎý;úDi½îìì,± pÿ^¢´fÇÙ¿WÆ`]EVi¡@"‘@"‘Xôiˆ°Šò°jk–LƪÉÃ*®Y2«( «¶^Éd¬š<,ûše +!„B!„HBƒ•B!„BH$¡ÁJ!„B!$’Ð`%„B!„Ih°B!„B‰$4X !„B!„D¬„B!„B" VB!„B!‘„+!„B!„HBƒ•B!„BH$¡ÁJ!„B!$’,Ô`­T*¨×ë=×jµ¾Ç ‰2”o²êPÆÉªBÙ&«eœ, 1XŽŽðµ_ûµØÙÙÁ… °³³ƒF£á¯×ë¸xñ".^¼ˆ . ›Í.ú{Z)\×ÅÆÆ2™ÌDïq×]wÁ0Œ¹œ³iš¸ë®»Éd`YÀq d28Ž3—óÊ÷|±, ®ë¶=fÛ6r¹\ß×9ŽÓõº ®ë¶í¡>ß0  …¶÷s™L¹\®ïç,#”q²ªP¶ÉªC'ËÈÜ ÖF£b±ˆÝÝ]ض«W¯ú {{{ˆÅbþñZ­ÖvœLF.—ƒ®ëÐe´Ú¶ ˲`š& …,Ë‚eY]a&“A:†am†¼.ø|˲°µµ…d2‰\.‡B¡0ô¹º®ë¿¦Z­"•J¡P(`}}[[[MÓL&}c¶—‘aš¦ÿ¼w¾óxê©§¦þÝR¾§Ã0žã8H&“( H&“þklÛÆÖÖ–o0†½.“É`kk [[[ØØØÀúúºï‘÷yçŽamŸ‘Éd°¾¾Ó4}ãwcc××d2 MÓÉd¦i¶Gð=r¹ÜPƱ`šæÜœEPÆÉªBÙ&«eœ,+Ïž÷Öj54 lnnÖÖÖH$P«ÕxžJ¥‚+W®øÇ777qtt„ÝÝÝE_sE"7‡‡‡mÛ¶ Û¶‘J¥ (JÛ㢠+ ÐuétÚ7.óù<ÏðÌd2P¦iBUÕ¶÷%Þ4MBÓ4˜¦ ×u‘Ïç}ƒ2™LÂqèºEQ|cÒu]ÿ³UU…ã80M™L¥R)ôš%R%צiªÕ*@UUߨ"Ƴ¬bX¤Óißȵm¥R ŽãàÝï~7žy晩ÿÍ(ß“#ò¡išÿ÷ ⺮ïLÉf³H¥R0 Éd‡‡‡¾léºîËw6›õ/®ë"›Í¶ÉŸº®#•JakkË—wþkÅ0VUétºK†³Ù¬/¿ÕjÕ—aÉ:kEA:F>Ÿ÷="·Aãt]‡ªªþ:ÕuÝ7pÓé´ÿ½är98ŽUUñáx&Ê8YU(ÛdÕ¡Œ“eeîk"‘èŠ$œœœàÎ;ïôÿñxÜ?ÇW&Þq†áŒ½Èår°, ªª¢P(´¥dlmm!•J!™L¶)ÄŠ¢@Ó4_‰–HS>Ÿ‡eYX__‡¢(¾ñ¥R …BÁö1T*$‰E^V_ … Ãð#Žˆá¦ªªo`mll NCQär9¤R)_)CUwIaÌçóÈd2~”¨3Jd˜úQè“Éd蹇)óa×'”J%?}R6Ïq Õ^¨ªŠ|>ße@Ôëu\¿~}&)ÁA¦)ßðôÓO£R©àܹsˆÅb3=÷Y!N‘GMÓËå i²Ù¬Ùìt¾Hä\UU‡¾w:ö Û ƒœCÏåùãÒomô¢sm(Š‚r¹ìG}ƒ†úwÞé+ ³ä,îág•N'Ë"©Õj¸~ý:nݺ5³ÏàþME¥RÁÍ›7ÊÔ¤pÿ&‹@tðQ÷ï…v ~à°½½ xøá‡ ­ð»“~÷Ì3Ïà‰'ž˜IÔlZ8Ž˲P*•ºêÏLÓôëá$]Q Ñl6ë§úÉqAÒ…|>t:Ý•Ò8 $ ;­|1Èóù<ÊåòTÕ~œœœÌÅ`¦|žÂsíÚ5ÿUzÕcбzxx膩T Õjªªbcc£gT¢£ƒ¢QQ¨ç…­ëçÚµk}emZœµ=ü¬bÛ6666üÒA jh$“Ét5ðs]·­f<ø¾[[[¸|ù2>øÁÎÔ`=«û7Y’Q$=6f)ß÷o²DY¾›cpùòåæÛÞö¶æ}÷Ý×¼|ùró]ïzWóòåËã¼U³Ùl6Ëåró¾ûîk6›Íæ£>êÿ_{¬“G}t¢s˜Ùl¶Y*•šÍf³©iZóÆÍf³Ù¬V«MUU›ÇÇÇ=_«iZSUÕf¹\^ôe¬ó”—iÈw³Ùl>øàƒóÿ¢úpãÆ.™ÍçóM]×›ªªúòÝl6›¥R©©ªj³Z­.ú´Ï ó”—³²‡/+år9tíåóù¶uÚl6›ÇÇÇmϽqãFSÓ´f¹\n6UUm–J¥f>Ÿo¦R©¦®ë]ï«iZSÓ´f6›õß¿\.7³Ù¬ÿÞò¾Ùl¶y||ÜL§ÓM]×›©Tª©ªj3•J5S©T×ûæóùf6›mÞ{ï½Íïû¾ï›Ëw·Šû7YÇÇÇÍR©ä¯7nøò^*•šÙl¶¹¾¾>·óáþMæÍ8ò2rJp±XÄÑÑ677}OÌÚÚŠÅ"jµö÷÷¾¾óyÁÔµsçÎðBÆ’6üÿ2Q(J¥Újï¤~T×u¿6U¢Ný¢CR‡:¯($³$ßÒ‰W"ù¶mÃ0 T«U¿#s¹\ö³úÕH“åá,ÉxT˜$×u]¿[vp J&„iš(•J~C½\.ç–”–Hº=à¥ñKÙJ6›õ_#©ôRs. È666ÀoÈì"¬Ý.•J~f†dYlmmù÷ÉàûÀ7~ã7N=šCÙ^}‚ML¤S†á7õ“Þý>O²ä¤wƒô!qétº-{mV2”q$Øàt¤IX9Ð.óÁÌ»›7oâõ¯ýHŸ5’ÁZ¯×qpp€ýýý¶®bº®#‹áâÅ‹¨ÕjmÅÚœ?mϳ,Ë_ ±X ‰D¢­#™tç\&d4Œ4Xr]·­«o6›ÅÆÆ†¿Q Jݶé Y,gE¾E©Íçó~3.é$-Ýoà®»îB6›õÓ¿ÉòsVd<*H>×uýÎÙ½îâ ¦ÎKã=×u±µµ…r¹ìwT¯V«þØ1é<].—¡ª*,Ëò›•߯³˜¦iØØØð›Z–å7DëU.)Ã÷½Îߥע(þûÎÊöê #ú$h ÚE¹ZΜT*å;]eüX*•êÙkhu~¿qã Ãð{NÃ0`FÛz-•JmïÖœr–PÆÏÁ²Š #PdZ\ÊÄ‘|>ïï³2;^Ó4hš]×Ûöf¹Ï†á÷ Ž™;GE¦R)ßÙ(Ç*•ÊÈÇ‘ V©½+¨ŽÇãˆÇã½B‰D›››¸xñ"‰NNNpëÖ­6oÏîî.vvvP©Tü"ïx`âéQ(Ïçý?¤eYmÍbEA*•‚ã8ʼBœù6MÓWvøÍ¸:7µ^MÈrsd|ˆA)NyL2s‚ÊDXWi©WUÕšš¦éßk€V ¨mÛmÙ>årÙ ëz[Gù~”J%Ÿ÷ ÚA™Eó¾?RÆ—a'¡¦imr亮﬇£ŒÄK&“¾G5J&ŒôÍ‘ŒOqúHVŒ¼ðQÔ~Ÿq¸­Ùl6‡}r­VÃÅ‹qõêU¬­­aggñx»»»h4øÚ¯ýZ?ú:ˆz½Þ×n4~w˜÷k= s¢dóëœó8ÍHd2f-/Ó–oÀëè7(å~–È,RÓ4ý( ‰.³–—UÞÃçŒ(RUÕÏ\àÏæ F)幩TÊwvJZnpVv.—ó›rfIñöýH¢¶sÃgÅ,åe÷ïUB¦-Èx¯ÎÌÉΨ0–ÃÃ0 ¿¼KÞ_ …°ì`FÄ$/¹Ÿ-DflÛö£–€çdìlt'ÍWE¾%í\ž+sâû}Vg–޼¾Ÿ¼†¯—qäe¤«DQwvvpéÒ%ÿñz½ŽË—/û©Ã‹ÅúæÄË0ã¨#^4¾®P(t¥.ÑX=[,«|çr¹¶ÍPä[68MÓh¬Ë+ãóf³R P‰šJz­®ë¾s(ˆ¡238ø˜¬KQ®Ãê“Â"LÓ`˜YÂËe;:Èt‘mé&Œ\Jæ(ÓÁ²`*5l2¾¬3½WF¶t"å%t~’¨"é°;é{ɨ$EQ ëúTÞ—i#%âhL§Óp]…B¦iú ¤éªP*•à83_&`äÖµµ5X–…ÍÍÍEŸûÜ‘öÏ„¬"l FÈ`\×…ëº]Æd2™ô^ˆ2#ëI²o2™ŒŸ¹ 5J½ê‰V¥”¬AÅ\:Žzßæ2–eù#޲Ù,¦$ÒH'v‘Ué®+¥€Që½jŒd°®­­awwÅbõz=ô9«Ü!lµ„DÛ¶©$2itù"J÷ññ±ŸÒØ©|—J%är9lllàðð™L†ó‰ÉR‘ÉdຮßyÚu]lll„vËí…D¨dä#©$ÊH&Ö¼Èk©TòË5=#×°îííB¯ªÁ*õ}„¬*"„„“Éd ëº’õ"a€þe!ù|¦ibccƒÍùÈÒ )î2£Q΢£dçär9èº>’‘Kȼ‘ŒIñ•ÑI{6-ócdƒ58ö,aÛ6ÓUÈÊÂúUBúcÇA¹\†a0 ÃWºÅó> Q¢Gž,¢¸çþÑ4ÍŸÏ(È `…&È\afª‘(#s~K¥õ¢1²Á GGG°, •J€7TX×õ•®k Î#dÕ`wkBz#ã $ 8•Jù©Á²vFY?\k$ª†á˧È}?Å=•Jùu}òzÇq i …‚?ŽFRˆ™O¢ŠŒœQUu¤îÔd>Œl°‹E`ssÛÛÛ€F£½½=Ôj5\ºtiÑ×4dµ¤y±¾¬Žã´Eˆ:Ç5BZH7_Q`$Ò4Mfß• 82h°–Ëå¾9æº.EA¡PøB8c¤s¯ªª0M“#g"ÌHk½^ÇÁÁ …B× Z×uììì`{{±XlÑ×5®ëÂ4M˜¦ ]ס( ëûÈJ!Ö“É$Ù!˜È\½Î4ÆT*Ã0`Ûv[&B– Ã0P(ÆVÖu]÷çNJ3%B¢J.—ƒišÈf³~ã$éTͨjtÉ`=99Þ†?‘H ãäädé Ö`B¡À.vd¥p–eáøø¹\Éd’ !=Èår¡ bt]÷›rPÉ!ËŠmÛ0 c¢ÈT*å;A™òK¢L&“€6y§~¿ŒUÃÚh4°¶¶ÖõX¯Q7ˆa8<<„¢(~z²*Hz#àu-5 cѧDH$é7+ðæ«RÙ!ËŠëºØÚÚòõq‘ÆKì|M¢L.—6ýZRF2X‰b±r¹.]ºäGRëõ:._¾ì?g™ 泞BÂyydUèjpÃfm6!ݸ®Û3º*°î›,3Édù|~*N—|>ÏZnY ÀeY,ßXbFްJtæÂ… mÇb1ìïï/úz&Æ0Œ¶Žt:ͦdeFW !™LÆOg”H“Ì‹äÞOV ™«ªëúÔÌÐñI¢ŠŒ!cài¹Ù`Çãxä‘GP©TpíÚ5Àùóç—>²*X–Õ%ÔL «BXóBÎ2’uÍf‘L&Q.—ýÔ±~ÑUB–×u‘L&¡ë:囬Ò­Z«ì5°ÜŒUÃZ¯×±¶¶†ÝÝ]@¥RA½^_‰fKl AVÎZ%¤Ã0ÍfýÑúú:R©;dåc5N3"JV’ ð'|HŠzýòóy£¾àèè.\@±Xô{øá‡qáÂ-úz&‚©¿d•¡ÁJH;2ÂL”÷t:ããc«d%‘‘„4VÉ*bt]Çññ±¯ËÓX]Fа6 ‹ElooûÑUØßßÇÁÁöööüÆLˈ¤…²ŠØ¶ÍØ„±eA¨ÜUDRßYÇGV•B¡àËw*•¢¾³bŒa­Õjh4mƪ°½½X,æÏj]6\×@e…¬.Ì ¤:)ÉY!—Ë!›ÍRÇ!+‰DW™E¶ºŒUÃÚ‹[·n-úzÆÆ²,*ó„rF( ìY@ΖeÁu]FœÈÊŒ®’Õd¤«¤ûîíí¡Ñhø7 ìííðº/#¶msOž¬4aCV:Ï Öz½ŽF£Ñ•ê«ëúÊÔ|2]rõ0 @Q€`ÉŽešæýûßþÛ«ñþÁ‡}šsƒVBZ{=kùȪ³µµEc•¬,®ëÒùxƪ†õàà/^D±Xô£ª« ÖÕÁq¼uÈåÚe2€ëzFì½÷>gžyî¢Ow.¸®ËÚŽ¨âºžw…ÌÎ"&gÛ¶€Æ*YYd¦0g‡k,C¡PÀ•+WÇqtt„‹/boo–eMôáµZ­¯\«ÕP¯×gþ%pÜÇâ±ín½Ýu ï§™Œwü®»€dØÚòWU XÒ`ž+ºê7|Ó¸çž'gzMQ‘ï3ïÙÚò,Šär-OÊ°í©½ÕPDEƇ…{=–e“í †aÐX%YV·m–e±‘Øc¤Ö`°eY¨T*888@<G"‘Z¨ÕjÈf³þB£XÒŽëõ:²Ù¬¿t]G¡P˜Ù—à8½îÓÂu½0çˆR&tî=É$N{?€÷¶†d³^º¯axU«áïü“ §¬ey/ž!Q“ï3캞7­ë7MO˜ÛlÇ“Çl¶%Øbÿ§÷!ck¨>vût®·Q“ñ¡¿Ÿ³¼ÈP,«l ®ë²,”J¥EŸ ‰(Ë.ãìz}6i¬M]×qéÒ%\¹r›››¨Õj¸xñâP)Ã;;;H$°mW¯^E"‘hk³··‡X,æ¯Õj(‹3ùæ65B‹}f¸®gef2#¿ôðЋ€:N+è¤ë-chÙòUÚ¶÷º~°¾ˆ®*ÊXç7 Q’oï{šƒ’nY­|ì(aší…Ìãâº^¤Ö0¼÷›Æz.<#U Ö úZÅ‘ƒLéÿt×Dî»>3Ýﱃ¨Éx¹\NàË£c’ Ã2Èv? …£«¤/Ë,ã¹\n¥zèáÉ`­T*]¿Kjðùóçý´áAïÑh4°»» X[[Ãöö6êõºŸ~P©T°½½íßÜÜœYËê¹Ô4ÙvwAå$DÑ0c5yž4–åéý¹œ÷I_”?ü«(ÝWÀ{L/•KÓi@SÝVðJÓMÃ+ÞùΙ| Q“o`NŠz¡ÍZLËò L×õþ?.Šây=ÊeÏÀœÔ@—èj:í½w6šË[x¨ãmï…ó£?ú6²ìóô»/—‘ÿ®Ãú¯Ãññ}3ùJ£(ãa†Ó4ýß]]&]ÇXÙî‡iš4XIO–YÆMÓ„eYœxpFÊ`­Õj¸pávvvüÇ,ËÂÎÎ*• Êå2vvvptt4ð½âñx×¼Ö““ÞÂÿ ßx<>³—¾ûÝxî3ÏLý«ˆš|;Ž3%Ýq¢W'LžFd=(ÛÙl{”U ®Å3U…kÙþS=Ýî}9M pËUd3Ÿ‚a*È)%ÿýÅVVà8•CJµ=Î鹕¿æ‡Q«¼~&_kÔd< qDÒ`]b £Õ¤`c£Õ¤ P’I¼z)Ë Ûý¿2©TŠ#>HO–UÆs¹ …GØœa†2X³Ù,âñ8®\¹â?&µ«W®\Á#<‚ÝÝÝ¡RÖÖÖH$üßëõ:ŠÅ"677‹Åú.ŠF£ÑóØ'?ùIض=rÚÂ\”Çñ”Ýi¬™LK1íõYÓDrj××½ôÊ0 …îü]xz{á ç¿8ïbK¥å…b‡¼¯ÿ=ϧ×u ]¯¯|ä#¸úš×àå<2Ýï ³“oxòÉ'Q,»2úaYÖ|24mº²ç8Ýi·R=,¦ÙS>­s”õ´Å-Êu2éu[_÷ä5ðÙ¶ $S/‚m·|P¾8ËsËeäko…²ýVdëëa¾èÿçSwÁü‰$“·K§»ö„+_wþÁWÿ›éý=Dmöm¤R)¨ªê§³áÒ °,ïgXgÕ(÷@™KV­z?å²ÿsôàƒø‰{ïÅÇ>ö±©^NÔöïQa:ðjP,ñøãÏÄH\†ý»“­ÓnšÕj•e+@¥RÁ»ßýî‘÷ïk¥RA½^Ç¥K—|K£Ñ@­VÃææ¦ï¥ÙÜÜD£Ñz3n4(‹¸pá‰.]ºä?Þ‹[·nõS*µÄóðÐ{ß\°Ó%¸Ù< KmëÿyØ8|;Œßÿ2T«±¹Ÿ?¾Íƒ> ¢²‡‡!ŽH]×aœÊ‘ã8ỗŒZ’èf'¶í9aÄËbYžðÊc·ÝÖ½Î,Ë;.Ï+¼÷9–†f?¤R=ïuñx÷Þ{/î¼óΙ|QÙ¿GAºÁS¡_~dÿž•|ÑÞ¿ƒ8ŽÇqØdi…8wîÜXû÷À.ÁµZ ‰D¢Mù‘q6AExå¨V«aookkk]u¯ýj`c±XÏc·ß~;^ñŠW´yŽ1·tI`:]J3™öY-¹\{¡§¦yÞÖVûóÂÈåï}'šæ)7r£–È—a„~±X ÷Þ{ï@ø¸ÌB¾OáE¾½¯bõ©ÔtŒUÃh«€÷w vøËnkË3 ûâ¼*oÒ,©—\I¦€ªzJt¹ì-ˆ§‡mKïŽE4Í1£ª€zºWÊÛu.Mkù„ O„;ïÏbóãH$xá _8ùߤQÙÃ{!]R]×ÅÆÆ›t K0³%• ÏæÑ4Ï 3 ª ÷öS¯‚®¹@¡ã_¼ö_û™ï‚ë<Ú–·Vt½%㲆$«_ˆÇãh43Ùã´GÙ¬²ÏÊéõý;ˆišœµºbŒ«ƒ4X×ÖÖºÞ´V«!½˜²Ùl›G'ˆxëõº¿8‚ÿŸ&s5XÁ-mû¢8Ã4[3^„tÚ¯õUm…¦ hLÇS®óyï# ÀL—‘J¹L«ü¯Z”À 1›ý®¶6Œ€1Š!£(§–rvxã>•j p *Pù¼çűs$*ò-D"ª$û~†f¡ÐÞ°K×}õÑ4OFÄ ìõ^bÜXd¿Ó" >_ÚUKӦާIpI™f+ +K0—kêêº÷½;È·›+j2$ØdLQhšÓ4ÏNäÉu[ã—€–Pöº~q “Ù2$¹\+¢\ ªÈø©—‰ h@JJãÔ¶Û׈¤Ì«j+éÂ4½SýøÇ¿ßöm1õ¯/Ê²Ý ‰BÑ)3gÆã×󽀙ݖKÆMÓŒDÃ'²x¬ñxÜ.,^M˲°¹¹Ùö\ÖÐëzHs¯€IDATÑÒóp]@ôê ÜKê¢ô@ôvÒ?fˆ’Œ‡! —oÏEÇy3ÚÅ´² ‚BÖ«[U»Òu= iß±¢ åtL¥-†ñÌE¯ÒC¹©Àñ`„SÓ<N6ë½Nú-+K,«½cÙ¾ò¨Ëv®ë¶m”ú5c<+ÈÜ<éÒ é÷¡ªž~%÷“`·ûà†-uQÒ ^×áüÏc뿆ìWykFU[kbà^/5ÜrOÓ4 TB.[Þ¯¯yÍl¾¢e’qÓ4™5x²*°®÷÷|[–'§ƒ„PÊðdˆsTj–:ß_aKï$-³Rç$k ð^óß8ú÷Ђ›7o6äG~¤yÿý÷7ßö¶·5ßõ®wùÇ._¾Ü¼ï¾ûš>ø`óæÍ›Ã¼ÝPܼy³ùè£6}ôÑ¡žÿè£6/_¾<ÒghšÖ,•šME™Úi‡}Hÿ㇇Íf¹Üú=•j6óùöçT«Ý ªê½fHŽ›ÍlÖ{Éááéƒù|û9ô9ÕtÚ{egy™&£Êw³Ùl>øàƒ#}F*•jV«ÕÙ^H¹Ü.‡‡ž¼ö¢Tò„¨Um6oܾ¢„›¥’·&‚d³ÞÒM¥šM]÷>Þ_/Ï›á©MĨò2m汇‡¡ ÚsÏÕª'§"Óšæ-·|¾·ì–ËÞºè·×·/ÿRÉ{ßyݹ‡Ïcÿ–R©ÔÌvn`g]÷6j]÷î3^­†Èèñq³ü–Ÿhf‘oêwÙMõEO5Õ=ÕõÖ¢7åð™æaºÜT_õw¡÷„àóCïÇÇÍC5Û<üÙOûݸánðÖxV÷ï ªª6oDõ;mDÉÓõE§ºqÃ;®ª½õ¬ÃCo“ïܪÕv=ÿÆÖóTÕ>Q†t½¿'ï×ï<šMï=E r|<–¼ e°öãää¤yrr2éÛL̸k*åýºÖD6;ø6܇´ÿÆ7ºwϰÇú‘Íelf³ÓQ$fmÿÌ‹E¬ã0ê LUÕÞËeo#™”tº]þòùv˜Nù®V{;]nܘ©Ð{KTÖ¿ìÕA]çð0ºFi?­ðŒÊ4Öä7šú4äzÉ)•FòcN…ããù:3—mŸÕzœ‹crú'ÝR’‡Ð_†âÆv%]q]÷/—Ûo'ÁÓä i6[±ƒtzÀméø¸yÃqýS(•Ú¯]}Ù§ýÏGR§}q÷ï Õjµ™š÷&6-D‡QnÜh ¢xEBœ-_N¯cÐr|ìýf\Š<îÆÝÏhEªãÈËÀ”`¡R©àÚµk¼–ÛÒ lÑÍÆEꜤ栋°¦,£HwN”ã´ßÚên $E¤Ò u!ç( _$s@Ê“¦‘ºl)^g×uû§2;±¶Ýžª®i­±B–èÖH/‘Piš–,ŒR'1$’M©iÞš.³l¶»‡Æª¤òž"U·~‚ÞOP®e†±ŠN¡«w粜Qª>8K̽yä4Èç=‘’&émvRßÙYÒIGŽò°Õ¼÷–ÿ8òè¾Í je SÖäyauÖ]œ¦+òù<²Ù¤ü3ö>´ÚÑ ¿³,y:°¤|KŽ~\±‚˜Ëy?½6qMó—\®}²‚ (Þk3™VÖ°÷¦Óc?4­5¹¡³oL©4õ›ÂPk±XÄÁÁÿûÁÁ¶··ý‚ìe¤s&Ÿa ×iu} Ö(šÖäØK¥û­Èø‘RÂ!°ÃÙc â.ða›Ý$Ha§tØí¬­–çH;j©˜‘Â%v€ø‰¤æNŒÕ°}~Ùt?ÒBæ¯.™c¬:ÅyÎkaþË | ݵ`ÿér¿}”Û?õÜs é««Õ)W:K‹Î‘Ï·šÇËD0‘_–3ž LÓ\¼¼ƒèE2I@„\Óü†mhZû¸¥SCFötéU)½i:× ðò±Ns$]~c£Õ/Dî{™ ÛF¹œ÷Û7t^ Õp,ËZÎîÀ2…àø¸%Œ22/8³ ðd%d¼äPÁ21Jû1A¯1g3ð`e°`ssÓo½··‡ƒƒ<ðÀ3N?+ÇÁ+_¹é§mãÃ"£ã}H÷n$È 9’ù¼·A·YÒý‘©Á†Àäl2Pq6˜׋ÙkÈcýÆÎH»i`êïÄ^h©Ôûë:ùUÇ4ÍŘÅ[†QwàüÍ=°?wRß÷e€hÊŒtqœžroÒõ–î´0r+ÙØhõž[䥓ùcÛv4£PÒ¡n˜¶Ò€'ÌÕ*¶6¤— dß?äh)o}¤ÒÀ?ë[”ÔhŸü ¨wôøÍP”·h°ÓÒFÒ¹e®_0ÒT.®ë.IS¤?PZÚîÀ-•®ì’=Ó)PŒ&Ä@ƒUÚ^oooûíîîâèèµZm¦ƒ°g‰mÛX[û^_~¤•¢½ó„G$¬ uØ|È^¶Ò+ Æj?œ¤v_N[óe0ºn«eh'ÃÊ÷Œreþ)ÐíÐ$«ËÜ£«¢„HXè©- ÞRé%äïUb˜ê:Ô³ŒišÈN!…| ®ë q*5PPíí"Pw¡¹V+›LUa½ø›a¾ð;û: SYŽë«âl”®»š(ú—#«Šâ=G :Ž×úS!<>½>üT!£Cix"_Ú±±á­ ]ïvN†·´h'ñyÃ>1X«º¬QÕ Žã Ñx‰/Cš˜ £$ïj\&­ I)sÝVgèN[–Æ*†ªk’6ýA2™Öè™~H‘tó™zŒ’$+ŽeY³‹6u®y̲Z¹¹ºîmÂë.“™ì6BHŽãÌoF¥ž…‚§¬çráÏ3 ä~)7›GÆÍ{ëáø(—aÅw¶éìak"˜®'•YÇÇ­ê©Ð ,ž^9,ËìpŸ6RO=h³ÎdZi/RÚ4~ d C¬«„4¤ :F4íTæ‚3"Å{.ˆ@Ë”=DRÃ-†µŒFVMˆ …œA¤¹ÅÖ–·‡)&òX„ÜŨ¿œUfÖ¤Ãu='NçìS uÊÌÇmZ– SÑÉ´™[S)š.•¼ìéR·±Ñ­Ü;Ò?ù%¾aT—d~ïéÓL¶ôü\®Û/Á+rö˜{¶Œ8c¤æ4L¶–A+º4KŠr4x…8“«,†`Óß` ÖåÉ$èÖ Û’Až˜)*ôbœ¶6ýR)R6‰¶m÷ï¼±áýÛÁïŒáÖ_¯ŽssÂ4=Ç0¼%)ö9»¸§PuÚÞ .1HGÀ0Z³Õ ™6¶mÏ' eš­Î^B>Âü—¡P$ äóHmßÀ3PåX§ßS*£¶¶¼5Ò¯Q<9[,¤qž4±ÉŸfT«Ýºt³¶ä‰L¡ÇÚìììt=V,ñð÷=¶¿¿?ì[. Û¶¡(J[?#é0ז⨪í¡~‰> ™LK±cJ‹®_3%F“H'’*JXá¿tÖôkw>‡›I.×­èolx÷ùxÛöNÙu\»DÎÌ¢M[[c —m{ºÍ Þz„Œƒëºƒ“Ó¢‡ì»ŠŠÜ{*ÓÆF«>[T(ñ}nl„ßJd"F2Þ˜œMæ>ÎFÊ:9áE¸)¨ c Ázîܹ¶†KB<_ô¹ëº¸ë®­®´óR íÂ(ùéÞ‹¼ƒÇç·^(xÞGÖ§’aéë¡ì×;¯Õ[A£Õ4Ûdœ!€Wó4õæ3½F A&>щi`šæì¢«Ò(c€1,=˜òùÖ¼÷N;#›m9Ãдвor†±méyzŸÃ¼ãa0:´p¬±Xl©ç­†aÛ6 #×5:(Ô©#i¿aÝTg¼ËJJ«dúv¶íþÃå,h275›õ<ó2_OÆõÆPMÆFAŠ¢Çœ(e~„Ì‚™(õ’›+™dé4¶¶Zéº2"Uд–JÔ+!g˜¡4V‰àº®ßcfN8V¹Y C§ •J×®]ó?þüÒ¶±m¯~õ‹ðêW·?.Ó ÚdWŠ[{E¦¤£ð P¦Ë“Ñ髼wÊ« ¹ŽH¡4刑*2ˆi@p]ÀüÖ«P¾ì‡RÕ¶€“,I®Éç=Y•²Tª¿?ˆi`YJSì#ûGï…öôï#õã­þ’`Ðl¶?WZÐH¦ÍL;½w6LB'qè2´ÁjY._¾Œz½ÞöøÁÁb± …ÂÒ¤ kšæ÷âüî°lºþ¼ÖƒRÜ™  "ˆæ=äâsœÖðw ¥üPQ'S¥ÓsÙY§½`‚!S)o¬¯‡÷@ ˜Ü`•&¨Rs纀Sù+¤Ž¾Õ?žL¶ögIƒ”=` 5™Ó®ñ³ Ì?ü˜w¿ºæ4í™`°µÅù¿d6̬á’Lø`h©ªKp¥RA6›Å¹sçP(`Û¶ÿS(pîÜ9\¼x•JeÑ×Ó…t,Ë‚ªª¡Ó:ÔŸÿQè¯}¼ýA‰°µ“ ½:÷z~¤“¶Œö3 «dtú6â°¬n«OQZ™Ô¯v*ýÒ ›Æ*éŸ)d¶í)ß2Á@ÞBU¼³ík×üß=]§\noÔÇ=šÌ›©F¡\¹÷|bMÐa®ëáûnp 2MlÛžM„5—óœŠÄR3T„uooº®£ ˆèº]בÍf±··‡GydÑ×ÔFçØTÇqpÛmë¡r«~î:Ô_Óy½5]ïžÍ'¸îЩ’r&5µÒ=QéÛ!ØuÃåXU=Í}A…wÁ »ŸŒÙó†œ!ÇYÉcµTt-d]„¬ê:$ ض=\:°Ô8)JO]ÄHšP¾èk ÿ‹»Ûï×3 ˜ FÈ4q]Ê´…+—ó„™øÒ30ÂZ©TP¯×ñÐCõ}Þöö6êõ:jµÚ¢¯© ×mOkqÏþý=,ueGÊfß«0©WTj„hUgz²¢pm‘ñè›RÓKqÑ´…j!’=O™'ãà š‡-œæþÚÛElé7p˜¶ [9/Ä¡´xBz1Ò(›dÒs¨çrÞÿs9&ª,£þ”Þ{oàý½l/¥dÞL»OÐM“V„ÖZ­†X,†X,Ö÷yR¿Úh4}MmtÚ¶mãó?ÿûqñbÈ“OS…‡n ©ë½S‚G0X;Ç»2.};÷ŠBÍ¡M£¤¹Ó{ƒåYEÆ¡oFAdÒ8]â:c¿ ͽÖÊ÷%d º~Õ²B M-ÓûWŒÑêízÜ ‚)ò„̃‘ VÃð ­UµÕý.H.×ê=CïËJ0Ð`][[í[·}žc£ªžƒEºW{Þø;ºuôÓú>Ûñzå+¹ÛËè¶v'Ó`¬ñsšÌʱ,Ï“ŸNÓYCÆghyO§}o={sѧOÈH˜¦‰ê0¥a]%1ÜV?ÅæÃ„ M_g{'2v¬ZímŒJÚkŠV†)ÁñxF£«;p'ÒpimmmÑ×Ô…¢´2¾^ò’¯ïm*Êt ÇŽ¨«D’‚®®ËèY}t½¥(Ùv«U2 ÒDo ©”ŸîHÈ2"ÍÅÖøIçÆŽHlzêÒ“‘: ƒÇhG•`(ƒ5#›ÍöL÷m4(‹H$‘mcYždÙ±mÙ17SîÃ$Ê =šL<à,´>“ e°-£UF×Ôj5?]xPáE!£Lóø¹ŸSqï½=,ÑS«qäaðR buJó²ÎÇ®92mzz)m{®IIO¥Õ"³a`ÄÉ0¼QeŠŽB³­Érã8N¸CÒ²¼T­R ¹œ§ëˆ:"T¹“(cYòÃ4sÑõÆxUchƒU7ÞF£Z­†D"±èkò1 O¾uÝÛÄŸzêUÈfû+í#×h÷0$‡7 2/úΣ”‘3"˜¥#ÍmÒiö< ³£oÝ“„÷Ëe˜÷a²ÜX–ÕÌų¬–“|kËÛËe¦“èÒ3KFêç:ÇŽ1øÌ2T ë0Ôj5ììì,úzÚFKU¸ûî2}ÅEÓB‹ôr9Þ$È|q]·wZmÏL ¥¶¬5Óô”&«d–ô°JXIUaY,» ËMWC=×66¼ÿW«¾€'ÞzY\ÔCH” -ì8žŒsF6 0r„u™èT˜‡éBæuaÔŒ¢t׿rÈfó¼Q¹2°žoFÙ¹V8¦‰Ìƒžsûd¯VÛÊBYVºêWÅ!ØlƒNCB–…B¡€rgš¯aÐãMº˜Z„u\d~kµZmàü×a±m»wCšBÁû9eâMß0èÑ'æ'ß™Á<ƒBcÈâdÜqœî«ëzùè§«¦Ét`2>QÙ¿;³ ̃§aý¿ÅÖ–÷»eqÆ0EʸaÐu½;{À4i°’.j°ÖëõÐ4b¡sñâE\¸paìÎxÉdëÿ}G~¸®ßBR×Ç覈²º.{á™ûwI¢Ç¬å»“¾‚g]•Ùäì2oïK.ç¥If³¾¼›&ÓÉxDE¶;kµÝÿô0rŸýwоv Šâ9"ã°h/ ÝïÍM›ô`!k½^‡eY=ÁÞÞb±lÛÆÕ«WQ«ÕP,Gþœ`ii߆4’76.:VÎU%ó’ï }GÚÌ`¦‡¼% Ö³É"dù$ŠÅ"Ο?ßåê›A0eLsêÆdÊ‹E<þøã ùìiíáýîçɤ'^F°ãx>Hê=«ËÑÑþàþ`îŸ;«ý»Òù]QX–ç„é J‘ÕCöïz½ŽX,6×Ïž×þ:’Œ-ÝÏ¢ƒÊÀk¥Ré¬F£Ñ·³Ø$ôK-¾uëVÏc·ß~;î¾ûnœ?€§´t®‡Ð?IA0ª·rkË›w¦i=>˜DŠsçÎáÞ{ïÅ‹_üâ¹ö¸ò wÜqΟ?sçÎÍý¼ƒXÓ£Îùóçû*ϳdZ{x?Ç¢(Èåý©_@©ÔšA©ªÞ~L½gu‰Çã¸÷Þ{qçwÎõsç½Û¶¿ÿ‚ÿw9(8Wõ¬ û÷¼å˜ÏþÝw†6YyDU¾ÇšÃúðÃãààvG*í4¦!tÒÏÓtûí·ã¯xEOÏeÏ”É1.¹n«ùjpX7jJ'‹áÞ{ï©îzZŒ+߀§ðô’oÛ¶‘ËÑuïgŠf©4ŸïŠŒO"‘À _øÂ…|ö¬öpA”Ûö¢ýÕOýÀžg‹x<ŽF£1÷=|Vûw(®‹?ÿîŸÃÕÇÞß<ºõÂ\/•,Ù¿átœõþ ô™¡MÎãêà ŸÃÚ‰xƒi ã¤Eèz{J˜ÚÏ-²pÙâ†Ñ]KÀûP†ŸH¦%ßa„ʸ´œŽÓCæ 0KZ —2™ÓHêÆú¢/™œf-Û>¦ ll ÿáïÆO=ê…/]ô¥“3ÂnÓùHú³Pƒ5‘H„F†âñ8~ù—µZÍÞ¨8NK™öÈ){åÚ"ý˜¥|é)ßFW3™öÑ5´ H'ó’qakËë¶êgÌPÉ!3bÞ²- €ù'ÿÿìýæHŽvBFe2:CðÒ·XNGú0´Áº³³ãÿ_ eƒ Ó3Ê\×0R©vƒU×u/Ý ›¨àض÷ÓkÎäÖ–7¬›z—Iå;ˆã8áÖÓ´²qÑ5œµJÆeš2.üØ}ÿå¿|¢õ•²f!ÛBê7‘®ý 2Õ{â#dÆÌJÆ]× Á×Oé&C¬çÎÃööv×ã³Ú¬§EÏznÓl- Óô¬ÎŽÅãº@¿¦Â£M$:¸a…¥Á6ÖcÀÑ5$ŠüéŸ^k÷ÎO9{†EaYžÞ¢~Å ƒŸ€½¾ÎÆ4dåÎÐn#˜IH ÖX,6pp)Zu¦m Ä4[O²íP­\Óz¯›)õ°!dj„¦Ø˜æØ§ëzKƒãkH”(€Zí³¸x1 תJÏ Yz\×+Áyì¢JV™¡ÝU2€Èu ž†Ñþ»"‘Ôà\Û]$ŠBƒ•,]«ãŒ-¨غ„Ì Óþâ/~«]‘¦C!§Pð¶ëVi¶ÍúU²r8ŽÓ»C0kìȆ®a­Õj888Àöö6âñxOïŸ=¥™“Òž5æ´Ôƒ>)†ÑNoýYBA¨ÇrEžA+Uüz(#dI±,¯ »`Û6RôŒ“£gæÀåKäì0T„Õ²,\¼xµZ kkkþã»»»¸té‰ÖÖÖpõêÕE_O(mMkEYû,`æ0àc Ãk¶DH”p]wªécºÎ{‰–¼æ5 FÈÊaÞžëûa ¶m‡7¦!d‰éi°* SÉ@†2X÷öö ë:yäÄb­!Öñx›››ØßßÇùóçñðÃ/úz´gضíyäs¹V'0ÙGùéìå¡i^} û$jLsl“izÎNB¢„e/yu}då(Ù\ŽÓí-'dE虬ë}:¥â1Ð`•ÁÁ=ôPßç=ôÐC888@½^_ô5hÙ¢®ëz DêUÅ`•aª=C¼¨3­IÛöü:„D Ç\·cà¼i¶÷$ dɰíŽ&¶¿xñ‹é˜!+ ›‰‘Ih°Öj5Äb±¶È*? Xã'''‹¾¦6üèS0G¾Zõ4 >)7bd2‹¾BúÓe°Žiuª*ç “hbÛÀ3ϼ¯Ý`H¿BÆEÓ:ÊŒlþ›¿aê;Y9üà!c2°éÒÚÚnݺÕõøþþþ¢Ï½'A¥ã8^-HXQê€!Å®ëöà#QŲ:¢N2“fD\×3Téü$Q¤Z’I§½®¹ëdÕpÔ>ûYF¡ÈÊÁè*™”Öx<ŽF£J¥Ò÷yr<u]ÁN¾®ëâ9Ÿþt·ÕY.÷µD¥çÔB"CW‡`™>?¹3 H´ ú;=,d©éJ†q\ùøÇÙp‰¬]ÎuÁ0ºçPÂPk<G±XD£Ñ}N£Ñ@±XD"‘@<_ô5µá8^÷‚Œly¦Ó4VIôq§Ýk)EQC½H&½ÿ—J‹¾BÂ)€+WjÝÊŽ¤²¤°?9+„v¾c•‚É 5‡õÒ¥KØÙÙÁÅ‹±¹¹Ùf”^»v GGGþó¢†ãuëY±¡ƒ“,Žã´Ïë³, ›âuž±šÏó^A¢aßÿýaýY9:ëW?qÇÐî½wѧEÈÔéê,Æj¹LÇ#Š¡ Öx<Ž+W®àààÅb±ëøææ&¶··»3-š¶ÁL#+HÛ V©ébó7 Ï®¥±J¢N¹ ÆïwgЀ%KN›ZbY¸~ÇtÌ•£ËXµm«dd†2X¯ ð¥K—°»»‹Z­æ?Ç#Q·†_ä­ëTnÈJÒ6ƒUæ izl‰:ª’IÀ42²ä žûªI6‹w×jx ëdÅèj¸”Ëyé]4VÉ ¬aídmm ‰DÂÿ‰ª± ”y¬d‘ ¿)ð™_C–\ÎKèªÕv¬d©±¬î=ø}ú.‘•£Í`•ÙÙʹm{Ž÷\Î+o’fŽã5‘äˆîÕcdƒu™ðGÚ²‚ÈM`kË`è›ød0ÍŽ•ryѧFÈD8N»l»ÓDV”6ƒµPªÏà­ÃhŸ¶ ¬¯{j¡ÐòÑ—J^ÀðbSºÎ1Ý«ÈÐ)ÁˈmÛH˜µJȲâ8~x¤‰YNVÖ`õçSà YLxîsßPmΟüußççrõ„Dà„&Û¶¡)Š'ôC6ë $Êäó€â:~£<˲h°’•Á¶½ÆH€—ê~ÿïþ.à8Èç“ᥪ­Ú× ú=—••5X-Ëò¼ò–Å‘6då8:jà•¯üSïÓ„ö%ŸéÙ/“ñü6‡‡‹>kB†#ölS?ò4BgIB¢Ž¦¡­ŒeYL&+ƒ¦y?ÿò_~ ç <ïOþPÕI'ÙLL>ïÍŸ—¥&]ˆu]VÖ`u]oøƒ?h­BVˆ}ì¯ñæ7?íýâ8P5%Ô`Íd¼9w•,#¶mãm/{™§Y°Õ#Y%LH§™LV’lø•_ù Þøñ/ÅÆÇ%2•yŠÒã•DŸ•kóä‡? õ£ÞÿþEŸ !SçÍo> x‘']‡®wÊcµTZôÙ2µ+W°÷_üØ-úT™’ó®(L&+‰¢ßüù»ø?>ûNdÞòÜH&:ªj{Šr¡à7}£Ñde ÖoþÀðyÿöß2¬DV×uñ¦¯üJà{¾¨V¡¡½.CþOc•,+Ÿú—ÿƧ>…g`á¹d„L“ÓfK€—\âFMV ×uñÝûø6óSP¿úŽEŸÎP¤Ó^í­tñVUVNŠãL¯ýDä ÖZ­†µµ5Äb±¡_só€°æ‰DžqäðR%sŸú”§ô( ´ûf(ú$*Œ%ã¶ëׯ£ò½ß‹Mc•D˜±ä[×]G¡P`t•Dšqu”=ôžãü…Ñ^·H(—½aË‚_fœw/ͤÊeïß\¡åXšæÀÒJG’*¼Ùâ½ÓUµáÍåÚO&½s «¹í<™_kYíÁ ™aYžž˜Jy¯‘ª›Aç§iÞklÛ{]Øw#U˜–å}Vp‹[_÷ÎáG~dô¿Od Öz½Žl6‹Z­ÐO7÷aøƒ§žÂÕ7¿{‹¾Bz0‰|Àíÿ8^ä8­ÖwðêTƒ,!‹d"×4|ÿsžƒÒ·û¢/ƒP&’ïT ®ëÂ0 T«ÕE_ !]Lª£|ñ/ýþê¿ý·E_ÆÈ(J‡¿†B*Õ2LÛöŒJ "¨j+b+ŸÑËO ù$n¿ýö¡Ó·¦É8ò ò'‚7¼á ¸ë®»ðÒ—¾tîç=m¢"Ó$*köúõëxúé§öùÓÚÃæg~fa×0 ¢"Ó"*köñÇÇÍ›7ñ²—½liöðàþý¼ç=W¯^ÅÕ«Wç~îÓ"*²0M¢²^eÿþµ_û5¼ùÍožûçS÷ˆŠB¦ 囬:”q²ÊP¾É*Cù&Q"Ò+!„B!„³Ë³ÞñŽw¼cÑ'±êT*ÜvÛm=S)jµ>÷¹Ï…ïwl˜ã³ ÑhÀq|Á|ÁXçÅk"ã3‰|Oãø,è'ãËx=d2úÉø2Ê÷p"œµý{Ð9EñzÈdpÿŽþ5 E“ÌŒw½ë]Í׿þõÍûyß}÷5|ðÁæÍ›7ýã'''Í·½ímþñ·¿ýíCæø,¸yófóío»ÿ™o{ÛÛš=öØÔÎy×DÆgùžÆñYÐOÆ—ñzÈdô“ñe”îáD8kû÷ sŠâõÉàþýk…Hv ^ŠÅ"vwwaÛ6®^½ê?&ìíí!‹ùÇkµš¼ß±aŽÏ‚b±ˆz½Ž«W¯Â¶mÄb1 }NQ¼&2“Ê÷4ŽÏ‚~2¾Œ×CÆgŒ/£7ªòÀ=œ„qöïAòE‹LîßѼ¦Qa ëxà°½½ xøá‡xЋO~ò“=ݺu«ïkE¨§MлóÈ#à‘GÁææ&vvv^Ï0缈k"ÓaTùŽª<ô“ñe¼2=:e|å{8 ã,ì߃®)Š:™Ü¿£yM£Bƒu$ lnn⡇ò ¤ƒa÷°ç÷"‹õ}m,›É5œ?üôÀÛ*•ÊÀsšô8‰.£ÊwT塟ŒßvÛmKw=dztÊøªÉ7÷ð³ËYØ¿Éwu.2=¸GóšF…ëŒ(‹¾çCú8wî€öÔ›z½ŽX,Ö÷Ø ×Ί~ékkkŸó"®‰ŒÏ$ò=ã³ ŸŒ¿êU¯Zºë!“ÑOÆWM¾¹‡Ÿ-ÎÚþ=H¾£x=d2¸GÿšF…ëŒ8þ<*•ŠÆ˲ü?~,C"‘ÀÑÑQÛñd2Ù÷Ø ×Ίx<ÞÕB»X,úž™IÏy×DÆgùžÆñYÐOÆ_÷º×-ÝõÉè'ã«&ßÜÃÏgmÿ$ßQ¼2Ü¿£M£r[³Ùl.ú$V•½½=!‘Hàä䤭@ðrÒwvv‹Åü¢æýý}¬­­õ=6èµ³B>S¼T£\Ï4Ž“h1‰|Oãø,è'ãËx=d2úÉø2Ê÷p"œµý{Ð9EñzÈdpÿŽþ5 Öl™V'Ñh4|Pçñ~dž9> &=§(^ŸIä{ÇgÁ,×$å{ùè'ãË(ÜÉpÖöïAÇ£x=d2¸Gÿš†…+!„B!„HÂVB!„B!‘„+!„B!„HBƒ•B!„BH$¡ÁJ!„B!$’Ð`%„B!„Ih°B!„B‰$4X !„B!„D¬„B!„B" VB!„B!‘„+!„B!„HBƒ•B!„BH$¡ÁJ!„B!$’Ð`%„B!„Ih°B!„B‰$4X !„B!„D¬„B!„B" VBȰ, ®ë.ú4™;Ó”}×uaYÖ¢/‰B™ 4X ! #™L¶íEŸ!sgš²oÛ6’Éä¢/‰œa Àã8‹> Bæå}þÐ`%„B!ccš&xrf ¼ÏŸg/úÎ:ŽãÀ0Œ®ÇÓé4TUÐò䨪 ]×aYÒé4/­LRÁR©4Móß#—Ë!›ÍÂ0 ¸® MÓJ¥ü÷SÙlvìç2ˆaä …\× •±iÉ8å›Ì“yÈ~>ŸG¡PÀm·ÝÖö:×uQ(º^Cȸô“g˲à8LÓèºÞ&Ÿš¦ùQ'!ËÀ¤ò®iš/·Š¢ NCQ”ëqa„uÁ¸® Û¶ý˲|ð»P(ð<:[[[þ") ØÚÚòßgcc£mÉq×uáº.¶¶¶ÚÒÐ …r¹ÜØÏ'dƒä2™Œÿ{/™”÷šDÆ)ßdžÌCö3™ LÓÄO<Ñö¹ÉdÒW„™ÃÈs |J$Š: Y&‘÷}èCX__÷¦i¶•lP®Ç¤I"Ã7šš¦5³Ùl³Ùl6›šÇÇÇþsTUmêºî«V«þ±R©ÔTÅÿ@³T*ù¿kšÖL§ÓþïÙl¶©ëúØÏ'd:å»Ùôd.ø{©TòelÚ2Nù&‹bV²/¯/—ËMþçåši&Ϻ®7Ëå²ÿ{§|ËcÔIȲ1ª¼Ë~|½èíò\Êõè0Â!2™ TUE>Ÿàyet]oKK¥R¼T1UUýîÁ߃<‚¯UÅOIèŨÏ'dX:å[ÐuÝÿPþf!ã”o²f%ûÁ×­FNrŸ dô’çN:å NB–Qå]d6“ÉÀ²,(Š‚r¹Ü&Ë”ëÑa kDÈårpårÙ¬ßÈÇqü:¥ a7BM˜|‚2NVyÊ¾Ô f2/úÒÉ 2Ž<²¬Œ#着¢\.ûe|®ë"N£T*-úr–¬À0 †r¹ÜæeQ¥§ÑªišïµÄûκ%%zÉ÷ (ãdÙ™·ì—J%¸® Ó4ý& „L‹qå™ed\y—ÆI¥R ¥RÉ;¦išß0•ŒS‚ŒmÛÈd28<<ìRDR©”_ì x ‹4ÝÐu½íw UôMHTè'߃ Œ“efQ²¯(Šß­’cÈ´Fžûe…²LL"ïÁÈ*àE\éà™¬ F:þ&“IÜvÛmþO.—ƒªª(•JH&“H&“ØØØðSÂD)ÉårØØØð;H.ú’ñé'߃ Œ“ef‘²ŸJ¥J¥èÜ!Sc<+Š‚\.: „ecy—6ëëëH&“X__‡ªªì-0!·5›Íæ¢O‚„#)`ªªúsXeN“(.Á&’FFÈ*A'gÊ>Y‚úJ°¡ !«È0ò.Ù‘Š¢°Œi Ð`0Žã`}}Õjš¦Áq$“Id³YæÁB!„BV6]Š0Á”`×u¡( Òé4UB!„BÈ™€VB!„B!‘de"¬ï}ï{ñ‹¿ø‹øâ/þâEŸÊÔXµñO=õžzê©Hüžzê)üÝßý~ø‡xѧ24ÿê_ý+¼öµ¯]ôiL(ÉôˆÒšµm‹>¡á}¢´fŸzê)ÜyçøžïùžEŸÊPpÿŽ>QZ¯¶mcoo±Xlѧ2Ü¿£O”Öì8û÷ʬÿûÿo¼à/Àùóç}*SãÑG]©ë¹~ý:>ûÙÏFâš®_¿Ž?ÿó?_ôiŒÄ­[·"ñÝM‹(ÉôˆÒš}ôÑG} #Á=<úDiÍ^¿~õz}ѧ14Ü¿£O”Öë£>Š;ï¼sѧ14Ü¿£O”Öì8û÷ʬ·ß~;^ñŠW ‘H,úT¦Æ7}Ó7­Ôõœ;w÷Þ{od®©Ñh,úFâŽ;îˆÌw7 ¢&Ó Jkö…/|á¢Oa$¸‡GŸ¨­ÙeÚùGŸ(­×¾ð…X[[[ôi ÷ïèµ5;êþ½2ë*²¹¹¹èS˜*±XliÒ[ÈìYEyXµ5K&cÕäa×,U”…U[¯d2VM–}Í~Þ¢O€B!„B ƒ+!„B!„HBƒ•B!„BH$¡ÁJ!„B!$’Ð`%„B!„Ih°B!„B‰$4X !„B!„D¬„B!„B" VB!„B!‘„+!„B!„HBƒ•B!„BH$Y¸ÁZ©T} „Ì Ê7Yu(ãdU¡l“U‡2N–…g/òÃëõ:vvv`ÛvÛãÅbm% ìïï/òt  Ê7Yu(ãdU¡l“U‡2N–‰…¬õzµZ­kA'''ØÜÜ„®ëþckkk ù‚Ê7Yu(ãdU¡l“U‡2N–‘…¬•J–eõ<~ëÖ-$ $‰…}1„Œ 囬:”q²ªP¶ÉªC'ËÈBjX777±¿¿ÝÝÝÐã’S_,Q,™cO– Ê7Yu(ãdU¡l“U‡2N–‘…7]ê…¤*4 ìììàèè¨ïóÿò/ÿïyÏ{°³³³èS'çèè—/_îªÛ˜'£Ê7<þøãC?—œmvvvpýúõ…ž÷p2+ŠÅ"._¾Œ|ä# ù|îßd–Èþ]«Õvܿɬ|Ôý{¡M—¨×ëØÞÞÆ<àçÌÇb1‹Elnnö|Ý~áâo|cO!Âææ&b±®]»6÷ÏW¾àž{îaÓ2ûûû S¸‡“Y³»»‹óçÏÏ}çþMæìßñx|îŸÍý›Ìšquð±"¬Åb/^„¦i(‹8::B±XœÊ…Äb1ìîî¶xonn¢Ñh0-,=”o²êPÆÉªBÙ&«eœD•‘ V1P‰b±¯{ØÑÑÑT<ú•J¥+õàÖ­[þç²ÌP¾ÉªC'« e›¬:”qUF2Xëõ:Ïç±»»‹sçÎt]Çþþ>*•ÊÄ9÷kkkØÛÛk{ŸƒƒÄãñ…¤G2M(ßdÕ¡Œ“U…²MVÊ8‰*#Õ°žœœ@h«kæF£1Ñ ÅãqlooãâÅ‹H$þg …EW„L 囬:”q²ªP¶ÉªC'Qe$ƒUÒFWj@£Ñ9ºšH$B;µîîîbss³¯LHÔ¡|“U‡2NVÊ6Yu(ãd™É`•(êÎÎ.]ºä?^¯×qùòeÄb±© v,ókd Y5(ßdÕ¡Œ“U…²MVÊ8‰#7]ÚßßÇÚÚ.^¼ˆJ¥‚ƒƒ\¸põz)d.¸.à8Ã=×0€­­Öë!„B!ËÃÈsX×ÖÖ°¿¿Z­æ×«®­­±›L Çñ~t=üx2 ä󀪙Œ÷¼Tªûy–å¬å²÷»i…px¸è+$„B!„ ÃHÖJ¥MÓÐh4Ç‘H$H$h¬’©âº@.„”V —ó T1fÓéöç†g¨š¦gÌ–J€¢´ž[.{†.!„B!$úŒ\ú¶¶˲°¹¹¹ès_*lÛ†¦i‹>…!h©4Ø`Ô4 ZmýnYÞ¿¶íý?xLÓ¼A¢³×¯_Ç÷~ošv¾í½i¬B!„²<ŒÜ%xwwÅbõz=ô9»»»‹¾¦Èaš&¶¶¶pxxˆTXîêŠP(Ùlø±|Þ3$%Ú™Ëy¿kš—Ϋª^tÔq¼ç ¬ƒÒyåµëëoÁɉ‚g=+t:½è¯†B!„2#×°îííð ‡Aƒµ›\.‡r¹ŒL&MÓ .(Ìçœv*æóßùÎ'ðÔSïÁw}×Èå<ƒ²—Ý—Ëy†¢ëÞÿ%e×0¼ÿ«j{t3ŸoEL3™V¥`ô4øÜQ0 º®#ŸÏ#™LÂqäG}r&q]†a ÛËóLÓD¡P@5lÁœòØc-ú4 !„B&fä.Á¶m÷ý!íär9¤R)߀ÚÚÚ‚;Åvµ¦iÂ’ä¶¶¶Ëå>϶=#òÇügaYR)ï±V´S0ŒÖÿóyïÇ0€õu¯9’a´¢ªhš‘-—=Cµ\îýÜQ¿“t: EQP>í¸´¾¾#x²$”\.Ó4CŸ˲pÛm·accÉd™Lfªë`lÛn;×u‘L&aÛ6666Æ>?×u}çÐ,p¹\ªªvufÏd€×¿þop÷ÝÃqô1?B!$:Œa€££#X–…J¥À*¬ë:ëZ;p¦iúQT*Û¶Q(º"~¦i¶mè§¡IUUýH¨ã8p§í1þ{@:Ý?õµP(@Ó4ض Çq`Þûd³ÝFâµk×ðe_v€ýýÿˆ­­-T«U”J ,Ë‹¦Š¡©ª^ô4øú`C¤QP”髎ãÀu]¿^XQäóy¤Ói lll@UU<ïyÏÃË_þò)ü•W ˲`š&t]‡rú#Öu]”J¥Ð× èºÞ³NÛ4Ͷ”xù[¤Ói_.]×…®ëþèD H¥CPd}QªªúN´Îµc …EA©T‚¦iÈd2H¥RÈf³0Mþšµm©TªçõÏ1™LÂu]T«Õ®síÿÝ{Ž¡t:|-X–—vÿ¡½ï|§wαØc¨Õ>‹ƒƒç¾ü˯áþÏ·ã[¾åûpròÄ%ƒÌ×uCï g•L&ãï„̋཈F6X‹Å"°¹¹‰íím@£ÑÀÞÞjµ.]º´èkš+¢d(ŠÒ•F˜ËåÍfÛ”Ö|>õõu¤Ói_v]×®eY¾ÑÕ‰ã88<<ôoà™L¥R ªªúJrX*£g ¨V«0M†a@Uóp/jú†7ü¾æk¾Ï{ÞÝÐ4à?ü‡oõ?'•JùJ”®{‘PÃð¢§Ùlï4a¹.#)í“bF¨á®ªªolØ¶ßøßÀç>÷¹¹×2 ¯àßÜu]ßé²µµúýJ´Ô0Œ6ùÇeYmÆ^6›E:F.—ó1š¦¡P(À0 _icÔ²,ßø£RÖžã8¡Ÿéº.TU…¢(°, º®ûƨ¬Û¶±µµå´²~R©4Mƒã8þZÍd2þšëE2™ô¿¯n½ÜÓ㺞ªªžHU[FkPWr]ÏPµ,à•¯üQ<ð@ºþ`w÷CøèGk¾…B¦iâ—~É3fwv~uÑ"E†$èèÙ;Ëd2?“@²d™r?Õá8 r¯#„ŒÎHk½^ÇÁÁM ¢ë:vvv°½½X,¶èëš ®ëb}}ÝOñZ‘%Ó4¡iZÛãB§l6‹B¡à+¾…B©Tj`s Q®ý(˜(6år¹\ÎW˜ƒÍ‚†s:Æúú:ªUï÷gžy‡‡¯Æ»ßýûø¢/z1¾æk®·½o>ŸÇÆÆ À¦i§×è°NS‘õ¶ó6 Ã7.TU…ëº8<<œÛ&mš&Žû>GÓ4üíßþ-®]»6—sZD¦²Ù,666ÍfÛdçðð¾ÞÍÛä3¨p÷ª •èfT*Ó4ý÷•÷Éf³Ð4Í7R766xò9l#³B¡€õõu¨ªŠr¹ EQ ë:ªÕjhd«3*[*•ÉdL&Û®Ož'ߟ¬‡ßøOâU¯ú$nÞ| àø |èCuüÜÏÝ?üÃ/€¢(¸páv¨ª§ÈˆãK×½óŽvzÕ«þ À·à5¯ÑÛγPÈ`}}[[ïÿºÈr!Ñ}EQü{ɲ0íø†aÀq¿ïC¡Pˆ|M9Y r¹\›7­l˲P(`Û6J¥ÒÀ{–ëºøÀ>°è¯ƒH1’Ázrr¡©2õääde VQ¼5MóS$Åe]¢6¦iöô §Óé¶¹`Úp?4MÃáá!¶¶¶ í5^„·ÀS|¥ÆM ã!?qlH½µ•b´ëY5MóoêAù”¿w0B;,©TªçßQR¼ÇQ$²ÙlOÃyØ÷Óõ¶·?‹XÌÀ[Þòrÿ{{ÿû?…Çÿ6d³¯óŸûõ_ÿoqr²ƒ»îúy¼æ5ß‚|àß#û[¼öµ„Læ½xì±ß…iÚ°,Š¢øéÇÅõ:k;Èd2ø‹¿€ŸºÜ‰8 Ø {ùìÉÐq]c+Ê®ëúûå´‘}[Þ;¸G—ËåÐÏ”ìUUý½%Œ`&…eYþ½S¦Ò“€a êÁ†“z„ ò\­VýŒ‡°ˆ¨DþƒdAGöpEQ|ǽC–åÕÆ½ýíÿÉdÒOá”÷ée#jHD.XÛyªªbkk˯+”†¼[[[þ÷%µ…ù|¾-Úqq£oÒÌŠŒ†(’Áï.˜¶›ÍfaFW4(aˆÑŒ"ßQGד“çBQZÝÐ'ÓôÆ3mmyk¬\öÒåÓé}ûwø¢Óø×~ƒ¦àú’Ôc‰¶•J¥¾õTýÖ'‰.¢÷0EQ|ç_Øß4˜¬Û*ÏÁÔ~MÓü2Iyï\‹AÅ^”ë “h¥èŠÓQúˆ3Sœµ‚eY~¤4NÃq?;#˜… û¹Ü€ö,qže2šiFÈ)ÁÒ’ Ì»®ëËf&“A6›õ×Q0È rÌ„ZYnÁÌyLJGr¹\ß=[šoV«U_ßI¥R¾Ž£iªÕê™+¯#­ ÅR©4–Î$2$ú¸ÜO‚Çå"™<¢Ÿð9Á ÜOäÞ Ž)³je^¶Ë»<Dz,ÿd­9ŽƒÏÿüÏÇý÷ß?Òõd°& Äb1är9\ºtɤÖëu\¾|ÙÎ*ÐÙDÆqÿFÞ ‰ ã­(+€‘¢O@øX×mq¯ζ½Ú7yûVC$Õ?Çi(ºqí…¦i=Ót[‘ߤŸV;"ž™T*Eu H½£lNažäA©Õ½£6“É •¢5lÛûqœVJ®ãÇÇíÍ,Ë3P¥¶{›¼ó†x‘T‰b/ÛwG†Ãq_É파KLI…%!X–ÒëýMÓôßGöÖ`µ8ƒFdPy___G>Ÿo‹¢†!÷½\.‡|>ïï-^߃Ö9æóyß8^__‡¢(~'÷~÷K)MLÉú½ßû=|Û·}Û¢ÿ”dFˆE²º$óFÖ‚išmQJÑgD†Ãôˆàû£ÿâ  Ê¡èhÒ°R²!:K^ÄÀ”愃äYÖ‹Èó,jeI´èÔ¯doLj4}tº®#™L¶9î …‚ï$ 6Ä :0%J/²Ù«Žì¡r4dEÿFD%“QÓ4d³Y?³K^#ýD‚¯]0L¾+•ÊèeyÍyì±Çš÷ßó¾ûîkû¹ÿþû›=öبo75}ôÑæåË—§ò^ÇÇÇMUU›š¦ùe³Ùf©Tšê9çóùf6›úùår³™N7›¥R³yãF¯so6UµÙ<<ôþ­V§zÊ3£T*5S©TóF¯ ›2Ó”—yñàƒNå}ªÕjS×õf¹\nêºÞl6½ï?ŸÏOýœ«À7šÍ|¾Ù”eW­zëJÐuï'›õtúÇÇ‹¾¢ÞLK^æÅ2®Éq8<>¦±J¦‚ã8H&“ØÚÚB&“mÛþø.ɨ)÷èåf$ƒUÒ}÷ööÐh4üÇöööx]„—‘^ÑÕYÒoí¤Óžò].3ÂJ&gò=*2×TfŸf2^Õ0¼5 ¶¶„óùåé‚M¢‡DzÇñÓ{“É$’É$EA6›mëô ´¢«B6›e½Y*¤FO&L*¿â´‘¹©ÒÕšµ×dÖH&L>ŸGµZ…¦iÈårœ:±¢ŒÜtIߣ£#ß8­ÕjX[[Ãþþ>ÖÖÖ}M#3«èj&ãžaJu2飃 BN&Ej5¢^+äºíëE×½u ·V©Ù k aYV[š|*•‚a~´•‘!²¬Ìªót:F.—ƒã84VÉ\´ßt:í;i¨®6 Öz½ŽF£á§2‡Õ²,Ôj5Àææ&t]_Jc˜]ôÉqZÊ·Ì…T/bÄÌ2/¤ ^Tqo=t–“H‡_éLÈ4¥½WS£Îµ’N§±±±á×C1­Œ,Ò͘M“#qðÈhBf‰4¹c†ËÙb¨ëÁÁêõ:‰t]G<‡®ë‘V‚‡Å>Í?œÅµñ22•òŒW:éɼԯ(R(xë!8 5ˆªÒX%Óöm¸®;R^EQüT36é ËH2™„¦i3Ͳ‰ê=†¬’¬( ûœAÖ°Æb1 \¹rñxGGG¸xñ"öööüTÚeƲ¬¹(!ù|K)O¥¨„“ù ãj¢–¢eÛÞ'×¥ó†ÌÓ4ÇrNÊxzóɲ‘Édfn¬2dfüáá!Õ3ÈH5¬Á¨ªeY¨T*888@<÷£¯£R©TH$BImì,ÇäÈpöi⺞2ÞÙ±T:™’³Cä;*™ŽãÕuËÿK¥V#%²¼,ZÆGa\£3NSA:ƒ,“l‡Q(FÎ( g‹e‘qÇq`š&Ž9 àÌ2rÓ%!h¼Öj5X–…ƒƒ\ºtièNÁõz;;;~Znðñl6ë×È꺎B¡0“/Àqœ©+"¦Ù]GÆÄq–vmäÛ¶í…Ö–M!Ÿo9lXâ´:DAÆGa’ý>*Ž2–M¶;1 ¦i2U—ôd™d<“É0KàŒ3•9¬ñx»»»~Úð êõ:,ËêÙèhoo±X ¶mãêÕ«¨Õj(‹S¿øÎÓ¶i°…m·f—„áº^›Ø Ï °$DE¾ÅFXåÏ*_ƒ¢ÐXmÃu[3|&Å4½µ´±á oÌEœQ’ñá¿¢Ùì÷dµXFÙîDÆË”ËåÈ•ƒÅ³l2nš¦?>‰œ]F2X+•J×ï{{{(‹]ǽÏÑÑQè±z½ŽJ¥‚íímÀÚÚ677gâ%œUôɲΠbnž>,¶í ÝÔ4ïß°zèBÁ³üËeïý·¶z¿Ÿëz »„ôÂŽzŽcù¡°¸®W§-‘Õ…aÛ­¿ùúú\þþCŸ×ÆF·ÁjY-çŒÈ¬8mÖ×Ã×€t®J¥¼Bùr°mä~ñgvúQ“ñaXt¶Y–Q¶ƒHcŽ—!½X&7 ¹\yÖÓy†2Xkµ.\¸€ÿ1˲°³³ƒJ¥‚r¹Œž  “ÍÍMìïïcww·ëØÉÉ ´Ejãñ8êõúÔ/~Ñ'_³’ˆQ–â8áÙØð Ê\ÎS¼½Á›år·…oÛÞ—™Ï{–O¹Ü ÕubÛžR¯ª­¨l˜aâ8Þ9Ì8ú´ÊòÝÇñþd[[ƒEa,\·w4¾A/•|b–Õî@‘‚ô0‡Š”¹œ'sɤ·>zÉ—eµž”ÿ Šâ=o}Ýû\)ö­V½¢ø°Z4MóÖ‰ü­U(•ÿçÿ|¨Èx?r¹\[ª[”ê¹ItYÙî…̦äxÒeqÛ¶‘L&aÛ6ªÕ*{á Öl6‹x<Ž+W®øI³¥+W®à‘GÁîîîTRú-ŠF£ÑóØ_þå_â=ïyO›QÝÇq (ÊDHÇñôOÑ¡å÷…éDÉäìRgƒF¡ãx wÐ(Ìf=E»ŸA4L«ÕÖÿÃjT3OQvÎåZÏÍf½S©n£U («UÔ~w|Ã7à/û·gó]õa\ù€Ç|$ÇeYsÙä ó¯2™V:üÔ¡âˆèåŒ{Ì0ZÃï¤,«¿Ã¢PðäLä*™ôŒÊ࢖Tv1(uÝ3$ËeïâÃÖ€¶šæÉèñq¸<·6ýÐõ®çìììàúõëSþ# ǼöðA˜¦‰L&×uáœþMqš¶í­5ùÕÑ4€b±ˆÊ÷~/nôš‡5#湃ÌÜf7ëåFöo©!'QÙ¿3™ ²Ù,J¥÷íãèè—/_ÆG>ò‘‘^7°éR¥RA½^Ç•+W°¶¶ÀÚZ­†K—.ùmnnú©Á½:Ž C¿qëÖ-ÿó:ùÂ/üB¼ño õ…aYÖÄH±³ÇÓÅÓYæ‹YU{wŠÒ;RÙ Ãðù`[WÃðŒc¹1*Š÷yÞÆ†§Ä¿ EñúAud®Ûm$ô"Ì2J§[˜ÎïAQûÝßÅ3?ú£xßýÙlÿ!Œ+ßpÏ=÷`èϚǎ ï+žªÜ‹ Ï] G‰òwF%z)²¯iž,v¦1zÎÇñÖAðFèºÞë²YïG Þ0yÕõn‡Š<Þ)·†áý”ËsËÞßߟšâ0*óÚÃû!Í•4MC¡P€ªªŒ®Î‘õÎÊŽã±Ù,Nû>±7ýælvP÷P¾ä°?÷e0MoÙH‰ª»»»øÏ¿õb<çå·æzyóÜ¿ÇÁ4MTçlįŽÓºŒ€yð4Üß|?Ò|®¦O´ÕËþ=lÓi…ý[ÆñqÏž ñã÷•EËòu±eBßëêQø??Œì?øu(?þý)|›››ˆÅb¸víÚH¯h°Öj5$‰6!•ù«Aaê·QB¿:ÍÖÚ¶mÝ€#“ñtgÑm…žÍl%5,õu¤Î-¨0Ûvë„2™nåØqZi£ ‚ÍdÂ-ñ0C(nÕÙ…D}†FŒßIð·½ñu_‡?»óÎÉ>c æ%ßÀl:`w2uLä4hhŠá(2!9ÇÁÅ'ŽQŒ%ÔÛyýªêiÉ’š[*µGúƒ^2(ñB¿¨è2Oï…8$óù<666€5PÓ¢PðÖO©¾F:•AN#­¹\ûRó²å½úkå'N`›´s¿¬µåÕ/ò“O· èøÏÿõ¹^zd»R£½ÊÑ( Ôu[ÂðG‰¨²ºrÚ[C"¥ ((<±WÕÖ­¦Í“-¡ð“_…Ãíß”¯ƒiz?Ζ#šEw^6igk p>ü7Hýã:²ÿåU-¹’r&Ñ­tÝKiþÚ?…þ𶍬øcàŸ^‚»ù%#í1®ÛªÄ’·³,à—ùKñþÑ‘®g`JðÚÚZ—Ç¥V«!OÍH rîÜ9íi õz}ê eÜz¦Ba ç›Ô`v¦Ñv2lC]ïN±#OÓ¼“Éö†.¢¨K4üKé®Õ+•†KK >àÀÙD]¾‡EÊIžÌÈ€Õ^OÎd€»î,ã™Lwä3n—§R ¾FЉhù|”ãýjN§…Dü§Å ´æ3/ï‡mÛ¾üÂuݳ©›Ž…1Bª®eö7ý¨÷žÇÇ­4÷Ó1 ç¤a9}îª Ãð–CðÏ‘Í~×4¤®¼ùÚ[‘Ò()ï~—Öl?±BU\îÏñâßœë×ÙîÅÊ*ùnê™ÌjÒÆìöƒ°¾û×Z}+:^(索§[jÁõ{ $QÆÆï]†ýeÛ¼[ʧ‰=×®aë‹?ˆþ)2ï}p_ÿPÿü7Q}"õ§ßhÒiï=ûk°¥A2éÝ“IïñE¬Qq˲ÎVGw×, î;~™¯¸çG>¼oFÛ†õïßßr vزÏVþàÿ/{É¿ÆßûOÞú¨ÝŽä=בS<=ö¶õt0~~ ëßôåÈ)%˜ÿ‡Ãøÿ·w÷QnõÀ¿¦¼”ÄVrçàD1rëZùŒØò’Pîl Ù$­¨¦,öÁ³g©Ï"o€®¥vºKØÍÐQ—n»V;ˈžÍ¸õ¾t´™&ÞzenqKc_ÞÚÃÜ$0²!ÇøÆ–楠ýãÎsuõþv¥û2ßÏ9>ÉèÎŒîcÿî£çõ÷ ^ü÷XXÞ‹ÐÝ·u ¹_^2s0R/„úeô@µ… Vàˆ™#±!‡–X€¢Ô~ˆ~®ø•š(ɇºå"ŸûmÀõ<äÊhùê¯)„Ý»«[€êh»¹€B ž¶Ú¦²°PÝ$¿²âxú}7Äv+¾]œÏù<´o½ýÅÿ†ˆr jö XDä]šöŽHü—{æQDÞú2„B€œ8är˜Ú¯"µFäî ùè˜q˜NZÞŒ1ãõ½P}6B! õ÷ õ ñ‚ò{›Þý;Mã2‘2¿õ,>s}Ób…BÕ…Cæ³'™i7~ãÀ—7ã†ß–Ýóyd¾õ/ nAþó?Dêé­£êâqs_„‡ªÚ×_‡ÐÎg0SžAä¶ç±ðÖ‡šœ4Ws‰•’ôf,ߌM¤×Ò8€{Ð,ï³Ñ„ÕÖ-Í@=ó}DþÝ¿EÈÒÜê´»pfÀÞ;üÛYF<1Ë(>, …ìù¯¨«k4Åìì,b± …Êå²9’W.—ñØca~~ÑhÔ–Q˜d2if .—Ë8räÈÀ¿WÐ4­¯‡Aäöih'‹e³fÅrÝ™™Úƃ¸ëõTª:$!‚Ö*©6Bši×iL¥ª{M­DV^ªaÇ7`ĸm#”ù<”µ ä”n¿ØflŽZãMdºÍdªë¢Ú-ñÎç«=àn;_v.uöÚ:Ñ+pqJòQÄx+º× ú±Õˆ¯i14{&Ä~lA<¯’„œ1s’iš¥£ùÈ#ÀÞùð^óÑI:^øð¹jÇ¿R€ÝmoóŠeÅ—è¬Ö,§Ìç;?ó‰Äà[Dlädl·âëåÀ[ÿþ¹{ŸCâ‡_1^‹F1õ…cAñ\Zb™ŒÞ‰pþ/³TëÆæç”¤ óo`å‘ïC×#µa'IÆàe7–çL>>Þú¶ã:öäGH¿ÞüëÞíúmÞªƒ¶KKXøô§1úîªÁÉ÷õyÙ¢¢DjcM’ /,!P„á{¼é{ŸƒúçX85Èÿš2wþIR€±ÊUU)4ìßíç8~¡¼·ö½ûh3Þ½ ¯§R[‡)üïÀÒÛþ¡Lí ;c±ç+u©ºEQ´ó9©Ò…«W¯Vî¿ÿþÊ]wÝU9tèPåÁ4¯8q¢2>>^9zôhåêÕ«Ýüº®\½zµrîܹʹsçºúþsçÎUNœ8ÑÕ÷¦R©ÊÒÒRO÷³²R©ÄãM.¬¯W*@¥’Ju÷‹®\1~Y*eüY[kvƒ•Š,âñJåüùæßÓcjÈrí{Ÿ?ߢ€þÔK¼ C¯ñ]©T*Gíú{C¡-÷yåJ¥"ïV+‘ðÿ«$M¾¡>Žz U*‰Dó§ÖR)£ÞY_où-½ÄË0 ³ogee¥’ê¶>v©µ5£:¾r¥ÉÅõuã¹iz±;mÅÚšñ\×ÿÀ Ÿ7[¿"‘ØzÏDÂx¡þ}zàd>ìú»Wý´iFáÊ•&1¶¾nÄØÚZµÞ__¯¬eÔŠÞ¨¬eÔ¦¿«Y¼Š¯×Öº Ïóç+•……‘•éVIܹa~Juø¨³|no×ú;‘HTVVV-{OÚ~W*#PS)ãßW´ïëˆ.AËŸï’ó»ˆÄø]K[«;¦~õE¬¿á½ÕƒFOì¿¥£8ÎÉÞû­v,EûÕ\Å»µ<‰%gη¿õ-ûDž³*b§IýJÚtÚÈ?³u²Lû¶ƒØ62¬AL‘ÏÁ# S7Ér`q®n³À9x°šìNQ€ýû¡ßõAL~v ‰¹kÍ1 M3Úðk[íŒ\Pqx 8Ÿ€Cæ¯gÜ/-¿¶×Óø\!Çzz? ¥M®=ÞI–kG”8iP“,ÏUÄé"ãª$Aûô*¦”bå {€ë«ÇM‹ãÞg´5£Î.—!ýüÏ@¾çZ,Yêc‘tKä4µ;7ã¨HŸ8†Èä$.…'¢X cÓÃÛU‡5›ÍbyyÙüzyyÓÓÓ¶œ·ä„nfŸÄrëx¼}£¶#ÄÖ,½v€ÁfZêïñâÓB í°š£ÀbÊÆÒ¨\Zô;¦Œ‡Â£³#äo£8ÎÉ$ε6²Df  q¯’x¦¶>TÒéj§³Ù ’hј³|§ñŸ&·ÒiaŽØJÞU‚éf³¬vu?s–N§‡;@“ÉTC–¤.&ë×ñ8ô¯=…É»~‰ù÷^c~[}|v¯Öä~.š|ìÍÚZ5£¶ªV'd¹v¢Ãš¨p›Êd2îÝ¿*ŽEPèç¾…©oý–ÜcÆåÊJuÑ¢,[5ŸíŒÇ«ÿÜÖT6ž#öÏÌÕ–ç{®«ëòò2b±˜¹,x~~ËËË8räÈP޶&EQZ6f¬¹“š®Úµ~€×4«}‡â4m°ÎC>Ï·´aš™1â·É¿w>oI¦´œa‘þæ/œ."QSÃ>ÎÉòFFT¬Éµéu惖¡ô»15Y=m¬¦“‹¸ñ;s9ã¿6–'ê!¯^*U]‹i7;ÎÚ&ä·fèlmä[ÇÐuèo¿ êۉP]â=I4]‚„j5ý™}‰{¿"žmÌí:±µC–·ýjƒ|>]×GÛaÉ Å ±o¢¾Î«û7œ™âïiü¶^ûi¾¯Ç ±P;¬â¸šééióµd2‰ÕÕU‹EWíã膪ªM;¬33Êe,,ü€TÿCÕ<ÎÖa+'jTQ›kÚ`Šš^ y]Û&±d#ªÆìÖ!Šy}©:#]gÖhòv’¶ÐõêLÉÂBóz·‹}ryËêÕƒI5Í(RUç—Ü‹YV.Ýu%]בN§Í%¶¶±Æœ¦AÓCÈeý@B*U{ÄŸ8zm­º,]²vX·1]בÉd°2ªúNU±:&•2â^œnË!_,W·lˆ*PLlyr‰ú° y°¡ë=¬Ö½ª^›Uµj–pI¬ÊXºåS€j9GTäáÑj¿ïd”§?÷pø{SütñŽË·Ræ›çoefÑÿ¿Bý¸¥]“!…ÈÝ4MÃÌÌ "‘Èð–G¦ÓÕ£dú|6DZƒ†[V zð©u¦Ö)MÏq#7Ë'‡1@£ëÆêß•ã Ñíy º²Ul™“æN‡¬§ÈrÃçÛQ&“,Ë#ÙÊ¡ë@N‰@J­#‘ØÚþ—6®išŒHDFh÷³P2Õ­â8k]7þ0u‡3ºî°ú…õ Ö|¾š¨paÀŒeætfƈÐx¼ûó¹ê¢ævYf9rV«5D-|ð ÑÂX_‡ÂUáäQªªbjj KKK¶.öÐØ ˆ|éïFèüyhº„œ%Á§X€#ˆÆ»øÑ9Û{ºšsK«ß-÷A54Mjf`1éWÿÏ/ËÍ'­É¤©bóâ0tºœˆåõ :ž¢½S¿ýOU]¿ Ÿ©¾fO`;É9Û²Ã*²é‰ýïf=!ôªëµÜÞÜÆ5Ò4­u£Ý2¸¡§“ŒÌt¨&€$òš|>T*eûÞU±…;9”•A›Üdo|É$Àäuvή*JõHjYFõl¾mžÁ“ÜEQ”ÚåÀºÞ0 i}°…G­“u½R Ÿ¯M˜ÇÏÀ’¬©¾MS¸Ý·õ‡¼¬ë«¢(8qâJ¥RÍëËË˃Èd2®_&<¬ÑxFg`ÐãeˆÐv/ˆ¦™-UåŒ*ùƒç®†BÆ#b¶ûYŸ“Ë4]œËÄQdâÜ™­Ö{&SSý¹F×޹œ¹b WºúÿøR·¨f&»ú2äO/éæ› …R©ÆÆÆÉd ªªù'“É`ll ‡F¡Ppº<]õy ëZ;ärN“¨Ê2/+‰¼ÎzLY_tù= íÿ‡´ÚAüúY+"‡åóùÆAÉ­£÷ôõ;ØàeÆk[UU5š"\$@nÔu‡um ê|úÿwÄ?yÔòë0#­@Q%¢èæ VÑç¶<òƒ®:¬š³Uc¥ªÐñ}H?ðHÏ]j>ÈÈ…\¤é [ÿ*/ÞŽøÇ^ ,-™ÇÈOM++ cr§n;¬šL}4í÷ÿ’’ÇJñM$còiiÉaŒû_Çk¡P@©T±cÇÚ~ßôô4J¥ŠÅ¢ÓejJ4æÅM¶«è8ÜChÚa­Û¿ÊÐ$?è*Y‡®­öfY(#d~é H¤wC:|gãuUe+ˆ\C×õ¶ üx¼:“šHa_³ÄÈeºÝÃ*IFÇ4РƱcŒïí£ãÖb±ˆ`0ˆ`0ØöûÄþÕr¹ìt™Z’$i¸«¼¸I„¤Zgø­"³%ÃdKäGçu˜œ4ZïMêfM3pÖ×h¡Ú2[CäMgW·d2µc2‘påŠÓwLÔZ/9ex¾5]̰lnn:}ŸÉ9,“Mög¯¶ê8 QËÆ»$™A ñ$$ò‡ŽÖ©)£Nnq&v:m¼iöP0é¹HÓx×43©‘—t³X_~_ïÞI0­ŽÖp8Œr¹Ü¸žH¸œ.SK!K£½A:m_GS×92O#¥v»œ]%¿hÛàI§Š¾EgUœÅ]3j_ßêçȹH³„Kú»I•È“ºÉðžùÏ? yœKÈÐU‡5#•Jµ\î[.—‘ÍfF]{´ªª$ ªÚ¢/©iF †Èƒt]oÙxÏ匕‘D~ÒrÿS kZ“m­–óüp‹¹†ˆõš=ÛŠ‚™—|©·Z“÷XæY›ÞSSF®i€R "õ_ö9}«ä]e ž››C©TÂáDZººjζ–J%¬®®âðáÃ(•JH&“N—§­H$Ò:›XÃpû¸$˜FLo3Ø¢ªF¦H"¿h›¬#“éð‰DÕ}‹ÙY¢QûÝßý ߬iØ+òôWìe˜’'‰k.gIâžÏ#<Eöï·ý-cÈÔ1é`̲..."›Í6=º&"™Lºvv0`È+us9ã ììüu¡åòH]Ç’6HkNß"‘mEiˆw]ß:c[>”ˆvN>_,UUãùävboêÒpñâÿÅ7Î!Ÿ7bWU™ÓwbåþÔéÛ$ˆ$çÏW¿œ‡¬ÃR<­uT£«+Pí´Š£kŠÅ¢¹\¸Sa7GÚhZ‹¾¤Y 4ÍØ÷Ôì¢!«?â#o|!Vúä3š¦5ìç³.l9xÐè¤æóF}/ËFèéÜD.#:«boêOüœ9“5övd4H7½ ë¹;ëô­õÌ:àX³óBTÖšfLq[YtÝaÄ7õ…r¹Œb±ˆh4êt™ˆ„4bt½i‡Õ®uºÎ %4rªª"a™6ÊçXE.:}kD¶S©­AÑ®mÀhèçrÆ9}Ö•Ãm“Óè:†{îQwòy#v%Ɉu³½µf¬” iB̹A¥i¾ùÍ —k±Ú%b&1jÐÕÖn‹EÌÎÎ:]ž¦DBIj3`cG'S’¸•cÝÓ—ÏoÕ÷ªÊl§ä+b¿¶H¢—Ëï‰D¡ŸNÿÍdX—“#D?TÒ5@U›.G(Äò¬Ç¿„§Ÿž@üå òˆº`[‡ÕÍÄþUYnQÇÛ5RÉrHý±65Ù°Ùa%QUÕœq …¶’Ûq,™$U·† õÀn¢ÖÄ2vÌÌšUUçÒHò‘/|á%ø•w]†ô>Æv3umÛtXeYnÝžá20ò8ë¼¢X–½s†•|Æz~Ÿ$I³¯·>+ÜÚAPU þý¬‹ñ84MkÈO@äe.¼±'þ­í jÔóÖQÉf³X^^®y-bqq±çß%–MM5µÀhèØÕaÕ4#㇙öŒ¨‘ñ­Õ% S”ºpf‡•`gŒ[‰ýÚ““[[ú~@"ÙQÊïÄûˈ|èóç[g'²aÅ·®ëx%dÈûÏñŒÓÅ$qm‡ucc±X¬&¹S èëwiš†ýè­Ã?iF–½O1¢ìŽok£¦æT%6¼É!vƸ•¦iдPu¦eê÷>ˆcÉØI †ß‘¿ú”‘lC’ ªjë󆉆hXñý—ù8^ÿò—20õ¬c‡µX,"›íœ:½\.Ûzc›››ˆF£¶dÖußúÖÎÑL4‰£mˆÚ°;¾­––,_0Ó9ÄÎÄŒSÍJw;V5Ò²Žþ/‹þqGËEÎmð^ëïŽK‚ …fggkŽÍ(—Ë(‹¶/J¥¦§§qäÈsÍ|0D6›E,kú3×\s nºé&LLLÔ¼®ª*öì™hÿ†™Œ}K'Ùñu½±±18pO>ù¤#ïßO|Àµ×^‹‰‰ ŒÕ¼nmÔ¨*J9R,r‘‰‰ œ;wα÷·³·j˜q²; v$bì­â¬–«…Ãalnn:Ö ï'¾[Õßâ,á…ôÈúAή’Yïڵˑ÷^ý į|?x×»)¹ƒhƒ÷Z÷µ$øäÉ“˜Za‚Á ’ÉdÍïX,†r¹Ürtòšk®ÁÍ7ßÜ´}Ýu×unÄÛ™%˜2® qàÀÜpÃ Ž½¯ñ  žh4Š`0Xóº®ëfÃfm9¿ÈÈæxÝu×9öþv×áB(jún¹ìH™È=D¼×úÛ•ç° …†¥›››zÏR¦( ÆÇ¯ÍþU=jÃÎøµ1¥Ó\ÞHް;Æ£.—$ ‘ˆ%±˜$q€FήøÖ4cl¤Yª&\"§ £þ€þï{ßwœ.y”+;¬@óóó(‹ækËˡÇÃ=ÿ¾gŸ=ØþìlÔ'l@Q[vÇ·…Ÿ™©KpÊä1ä»c0VpƉÜÀ®øVÕÖãÛ\LNFý­ªÆ #ëpê—+Ïa ‡Ã˜žžÆáÇF±±±Èdz?dXÓ4œ>}Ç·ù&;?ø0RvÆ·õHE©;Ò†³«ä;c\ øÉIcé;‘SìŠïš3³ë0á9Åîú[U©_.ãw>Yf™úãÊ+$“IÄb1óAé7ÁÓÅ‹?‹#G®qº8D5ìŠo±Ï©éd*GçÉAvŸ 2¾×;Lä;âÛL’§ªFòÇ••­×9»Jβ«þÖu£³ºòÎ?ÂÌß ²üN<ªë«5ÉR¹\nxM°óh›`0Ø`¦WÏ=÷xûœœ…"‡Øß I¥n¤žÙªÉìŠq º§ïüù­tÝÈXÃéVrÈ ño- Ö´šÕYL¸Dn`GýþèóHüô3ˆüÉQ`ru ßEÛ[ÇëØØ¦§§^Ö‘6v+—;i“Ë1#$y–ªª$ ù¼98o`¶jò™†äbviC4bf²¥ºXVUq¶KÈ´G¾‡¥ß¸UÓ8CéØaé­½HUþðCí¿©fÓ‘·èºŽ·¾õ=êÚîœa% —ÒiË1©šÆŒìäYšf,0cÙ²DFQ,±mB> ýàZ •‚šÏ³ÃJqe–`»|êS—ñÚמü¹Ø¥K¯l\$Ép†•|CUUÜ Bù«ŸTú®‘Oä%ª äó[_Xb™Ù°É/4 ÁXä1M4¨®÷°‹E,//czzáp¸eBÕ%{BuxôÑkñÁr¦‰üKUU¤R©Æ}P++\.I¾¡iv^ºÒ³O¸U¼ÈVò,I²ìFªÉö®0áùB(¬½í>kPU‰fu©«VEQpøða‹ÅšCƒ“É$æææFpöìY§ËcÊ牉§/è:N;}{D6Új´ONV_ŠDؘ'ßÐ4 ëëïFüí—¬/ry–,[–×í_e‡•|AQÌØf"1TWÖùùyȲŒÓ§O×d ‡ÃˆÅbX\\ÄÄÄNž<étyL¹°wïéÆŠf†3Oä=öcü¥®ÑCä'O<ñ<ºq‰On}þpБ<Î\ŒV7ð¢( —N’/¤OÝuÏ/A×uÂÐÀ:vX …Êå2Ž;ÖöûŽ;†ååe”J%§ËU5&—®¿þJíÁÛŠb̰rYùD0x·ÑÖiz+‘?\¸ðóøu©:ZD‚õ8yÚÔ”å ËþUµí"Rþ~/"ó1® [t찋Ŧg1‰eÀ‚¸.vR(d$ÿÕ4­¶âO§…§oÈš¦! j–!ˆsÎØ¨'ŸÐ4­6¾ÛäCš¦áwnº¡wŽ9}+D¶hµýš3Qä'bP†K‚É;¬ápår…B¡í÷‰ëÖYW§é–Ì{ÈçyùÊ¿ø*<òˆq+÷°’_}ùË—ðs;wr@†|Cl[ªÇVò E©Û¤ª*ãšÖU‡5#›Í¢\.7ýžr¹Œl6‹h4êªY׆kÃa•DÞµ¶vû÷?ËÎ*ùÚ½÷¾¯ÿîw9àH¾¡ëã/Ü¿J~¢ü¯g^+êÚâD}ê*KðÜÜJ¥>Œååe óO6›Å=÷܃R©„d2étyj˜)´ÅrI~<ñÄ›0=}-—“oé:ðú×î¿ßé[!²Yeçrfº`ή’ŸhÅçz¾Èeîd›—vóMáp§NÂòò2²ÙlÃõX,†ééé†ÄLNRÍœñ0:ª©”Ó·DdM®½öÆŒ!Ÿ’$`çÎû±ç—–œ¾"Û˜Uv.¬­àþUòíÂ?"rèÈkÏ_%[tÕaŒ,ÀsssH&“(‹æëápØUûV…šMÞ|XÈgxå+¿ Yþñcœ|HפKlðŸ,,ÀuÌÁFUU‘âÀ:ùI(UQ8C¶èºÃ*D£Q§ï»#®™'?S`÷î¯:}DC5>þSÜz+;«äCuy5t]çþUòUB•u šÉ Á#ÈÈ]íaõ"UU9*O¾vùò_9} DC£ªÀ¾›/qtžüIUÍ«ÂY(òMBÏIâ ²o;¬ÀVÒ%±ì†ÈGVVÀò5EBå³ø•s眾"{ézõlx°ÃJþ¹õ*oü2;«d+ßvX5MCàÛßÒi§o…ÈvfR±™3Ë$‘Ÿ( ðOŸû3¼ü½ïuúVˆìU·˜çT’Ÿ„~ø„ÞøJ&#[ù¶Ãªë:&ž~šg÷‘/™IÅ…GÚ/iðÎÒ—0~ü¸Ó·Bd/U­i›p&Š|EÓIb‡•lÕsÒ%¯ÐuÝhÌo¥Œ'ò]×qëSOq@†|IQùÍ—ñµ¼79}3Dv‹D̺›zò­$Kêä$.‘m|;úó™gŒÿaÖ=ò!UUqðÙgÙa%_Ò4àŸlþ.¼æ5Nß ‘ý,xî_%¿âʲ“o;¬¿²s'óäkoøæ7ãäK‰pÏú^xç;¾¢¡âþUò#vVÉn¾ì°ªªŠñ+Wj’ùŠª¢üº×qù“®ãÇ?þ1^ûîw;}'DCÅ%ÁäGŒk²›/;¬º®cÿŽLFC¾uëSOáUÌžJ~•Ïãüõ×s„ž|ËɯØa%»ù²Ãºû«_Eiï^§oƒhhÞvñ"WÉ2>ùÜslð¯åóy&¥!_b‡•ìæúk±XD©TêégÆã70ñàƒNß:QGýÄ7|ó0'r³¾b< ·Þêô­uÔo®iEáþUrµAâ›+dÈN®=Ö¦T*!•J¡X,dYF&“éî‡%‰{ûÈÕŠo¿þÙÏ:]¢¶‰qEQØØ!W´ÏårH¥RNƒ¨©Aâ›U×ΰÎÏÏ# BUUœ={ÅbÙlÖéÛ"²ã›ünWU• rµAâ[×uäóyĹ­ƒ\jÐú›ËÉn®ì°–J% LOOb±ÖÖÖœ¾5¢1¾Éïñx<ν}äZƒÆ·è¬J\ F.4h|G"Æí\ÙaÝØØ„Ãaóµp8Ü×:z/óÛŒ[±XÄêêªÓ·á8Æ·Áñà·g¶_ƒÆx(òEcÞoñàÇg¶ƒÆ·¢(žñc,øíyí—õ·fXý^f]¹‡µÝCQ.—^ÿÎw¾ƒ3gÎøj)Âç?ÿy§oÁV/^Ä¥K—ï˜]¼xßþö·±cÇ$“É‘¿?ñ ëë똞žÆM7Ý„›o¾yä÷m7·ÄƒÜò̪ªŠ'Ÿ|Ò±÷gnpK<ØÅ-Ïì… péÒ%ìÞ½Û‘÷ï'¾­õ÷;Þñœ9sÆ‘{·‹[bÁNny^EýíT]ÈúÛà–x°‹[žYÑïµþve‡µ\.·¼¶¹¹Ùôa ‡ÃxûÛߎW¿úÕ8pà€ÓE°Å /¼€‰‰ §oÃ6—/_ÆåË—ÿ÷¹|ù2nºé&ìØ±Ã‘÷ï'¾à~á°oß>ìÙ³{öìqäÞíä–x°“[žÙ@ €o¼Ñ±÷gnpK<ØÅ-ÏìÞ½{qéÒ%ÜpÃ Ž¼?ñÍúÛýÜò¼Šú{¯CÇ3²þ6¸%ìâ–gV´Á{­¿]Ùaµ.C¨ ›¾>>>Žññq§oÝVÑhÔé[ !è'¾àŸø„Ó·N¸å™uú>X‡œþwð+§ÿ^û‰oÖßîçt\¹å>Xœþw Z®ÜÃ:66 vYB©TjÛ˜'ò Æ7ùcœüŒñM~Æø&7re‡5 "ÖlV“““NßÑÀßäwŒqò3Æ7ùã›ÜhG¥R©8}Í‹EÌÎÎ" š›¼[îï#òÆ7ùcœüŒñM~Æø&·qm‡06~‹E\KNþÃø&¿cŒ“Ÿ1¾ÉÏßä&®î°Ñöõ3÷Ýwß}Nß„ß ìØ±£åRŠb±ˆ^x¡éõv׺¹> årš¦µ<6cÐ{v¢LÔ¿AâÛŽëÃÐ.ƽXL»÷b<°'a»ÕßîÉå¡Á°þv™ºR¡¡yðÁ+wÜqGe||¼2>>^9zôhåêÕ«æõÊ¡C‡ÌëÇïêZ7ׇáêÕ«•ãÇ›ïyèСÊã?nÛ=;Q&êß ñmÇõahã^, ¦]Œ{1X‡“°ÝêïN÷äÆòÐ`X»¿L½pe–`?(—ËÈf³H&“PUgÏž5_æçç ÍëÅbѼÞîZ7ׇ!›Í¢T*áìÙ³PUÁ`ËËË]ß“ËDý4¾í¸> íbÜ‹å¡þuŠq/Æëp¶gýÝéžÜXêëoo”©'N÷˜ýêܹs•ñññš×Nœ8Q9zôh¥R1F2ÆÇÇkFGxàÊ]wÝÕöZ§Ÿ–«W¯6¼çÆÆFåĉ]Ý“ËDý$¾í¸> íbÜ‹å¡Á´‹q/Æëp¶[ýÝéžÜX ëo÷—©W/uºÃìWÑhªªÖ¼¶±±]»v™ÿápؼ‡Q*•Ú^ëô³Ã"2Å…Ãa‹E”Ëe„Ãa$“É®”@¦IDATîÉe¢þ ßv\†v1^(õ1®(JËïuk<°§f¶CýÝ)¾ÝØæ"û°þvg™zÅ=¬#päÈLOONž< À€V._¾ÜòÚææfÛŸAm7ëèÎéÓ§qúôiÄb1ÌÎÎv,O7÷ìD™È½Æ·[ã¡]Œ{± íb|ß¾}ž+ ¦]Œû-¾Y‡o/Û­þîßn, †õ·ûËÔ+vX‡dbb…BÁœÆEQÌü`0ˆh4ŠÕÕÕšë“““m¯uúÙa ‡Ã )´³Ù¬923è=;Q&êß ñmÇõahã·ß~»çÊCƒiã~‹oÖáÛËv«¿;Å·ËCƒaýíþ2õjG¥R©8}~5??ÕÕUD£QlllÔlŒ5é³³³ƒæ¦æÅÅE¶×:ýì°ˆ÷£T½”ÇŽëä.ƒÄ·ׇ¡]Œ{±<4˜v1îÅx`NÂv«¿;Ý“ËCƒaýíþ2õ‚Ö!³¦Lo¶O¢\.›#@õ×Û]ëæú0 zOn,õoø¶ãú0 ó™d|{O»÷b<°'a»Õß®»±<4Ößî/S·Øa%""""""WâV""""""r%vX‰ˆˆˆˆˆÈ•Øa%""""""Wb‡•ˆˆˆˆˆˆ\‰V""""""r%vX‰ˆˆˆˆˆÈ•Øa%[ ”Ëe§oƒhhãägŒoò3Æ7ù™Ÿã›V²Õìì¬yè0‘1ÆÉÏßägŒoò3?Ç7;¬DDDDDDäJì°Ž˜ª/•JX]]…¢(æµb±ˆåå塌ˆ÷.—ËPŠ¢ŒdÙ@©TòõªåTŒ;ßc|;aN~Æú›üŒõ··½ÔéØNfgg‹Å ( Âá0 …¢Ñ(‚Á …²Ù,¦§§‘L&mïd2‰“'O"›å©S§ ‡RÞb±ˆÙÙYȲŒh4:ô¿_ržS1îD|Œñí†u8ùëoò3ÖßÞÆÖ+ xøá‡±¸¸ˆd2‰B¡8}ú4N:Y–‡6ÂsòäI,..bqq?ü0víÚ…ÕÕÕ¡¼—õA™››Úß'¹S1>ÊøãÛëpò3Ößäg¬¿½‹3¬#699‰@ ‡ÃY–Íëcccm±”¡•X,Ör´&‹™ï0666”÷)‹8yò$&&&|ó P÷‰q/Ä7ÀßΜŠoqu8 ëoò3¶Á½‹3¬d»l6‹]»vá±Çóźy¢zŒqò3Æ7ùã›ü̯ñÍV ƒ¶¯­·û}b±’É$î¹çd³Y_ðÐpy!¾Æ8õgTñ=è{1¾©¬¿Éï¼ã~ovX=¦X,"›Í¶¼žL&Í%N½,˘››C*•ò͆o>/Ä7À§þŒ*¾}/Æ7õƒõ7ùbܯñͫǃA9r¤íu·¼xHæççqúôéÑÿe‘çx)¾Æ8õfTñm×{1¾©¬¿Éï¼ã~‹ovX=&Œd¤Ä®÷™››ÃÝwßl6;²¥pä]^‹o€1NÝU|Ûù^Œoêëoò;¯Å¸Ÿâ{G¥R©8}DDDDDDDõ˜%˜ˆˆˆˆˆˆ\‰V""""""r%vX‰ˆˆˆˆˆÈ•Øa%""""""Wb‡•ˆˆˆˆˆˆ\‰V""""""r%vX‰ˆˆˆˆˆÈ•þ?2Cæÿ­e­Æ%tEXtdate:create2019-03-28T17:36:51-05:00«ûs¹%tEXtdate:modify2019-03-28T17:36:51-05:00Ú¦Ë-tEXticc:copyrightCopyright Artifex Software 2011ºÅ´1tEXticc:descriptionArtifex Software sRGB ICC Profile †2tEXticc:manufacturerArtifex Software sRGB ICC Profile\~=Ÿ+tEXticc:modelArtifex Software sRGB ICC Profile1(‚¡!tEXtpdf:HiResBoundingBox1080x792+0+0_Ýx+tEXtpdf:VersionPDF-1.4 G:xIEND®B`‚blis-0.6.1/docs/graphs/large/l3_perf_tx2_nt1.pdf000066400000000000000000000634261360743507500214060ustar00rootroot00000000000000%PDF-1.4 %ª«¬­ 1 0 obj << /Producer (Apache FOP Version 2.3.0-SNAPSHOT: PDFDocumentGraphics2D) /CreationDate (D:20190307162047-06'00') >> endobj 2 0 obj << /Length 3 0 R /Filter /FlateDecode >> stream xœí½K¯-É‘9Ï_q‡ÝƒÜåïÇ * I´@%‘€‚F‹….e¡EÐPÿú¶åîaËbïsóœLß e¡ÀsÃv„¿?w·åÿþƒÿæäÿ~ÄÿÔ¾ý—Ÿ~pòxzý%ÿûåá»<µgøgn)×o>¥o±ŒwåùO?ø˜ºüñßæÞy/»ë˜þé‡ÿøÃ¿Hè–ÿv<»~ü—?}ûÇþýÏ ñ×û‚¯ù[ˆYâSý·ñW-H‰ÇÿôÃÏþ°dŸÄQûóÿé?ËþëáÛ¿•ðþo1•”[Œßþß¼ûöý‡MáH2~º¶#”Ê#WŸB‰}%Ë<Šs¾ÅMQˆ¾ÉCß[9˶G’<2ŸÔG)½w—Ó;ò`–:=¹Eàï~ã¿ùòþøÖŸ?ˆV©î‘[q¡»öÍçÐ-ÖØB$ùû?^xù1øG–¾ýñ§oÿéóÁ¹ÿýÛþöÇûÃÿùÇ‘ôêB ¾|*¬lX»Úî ‰-¥¹À°¹íêÔ”YA𝛑ä[´ÐsJ®Úɶ1 )=J–jÜ|¼EAÛöE¡÷GjMØÐn1ЦýÕË=bpKÞYîД;…ü†rQàrç(ì/wDËbð¬Ü¿Oeß%˜Z ¨×=<\)[½R†J‹’ŸÅ1|xøœr‰-ÝiŽ@ü/ ¥Çð¾KÒ>#ü¢0JäK(ÞʼnÈè7Û#UÏýæîáÆ¡~ûñ PÅDÿBBtRÚ ý*…}Ž9ÆpH”qÄ·óŒËoó»ßÿÃnñÑýø‡É¯¹?‚w­w©å©Ë¸È×ê%³ïÁ¢[ý1Í`ÿúç?ýôÓçûUnî%› Ï$JLCÿð5ùÔ21¾ùG©b Hˆ¡æâ¤!É0½e©ì’Ÿidh{„^ZÍÃ"_,Èê"™ãÒ£†–вaëòÅŽÐzÆüá‘KI5õi“/"&Ò&¥ ’}IÒ‡I>Ù˜TH6ùHE<¼Ô1¥æzI¥ “|0Ë%Òß$àä¢4Ÿž‡I>ˆxÈðP¢+‰—ììã%/ß“qŒŒÕ¦É·šÊ|ÉË÷ÚÉ 3ë®§†I*¥_]ˆ˜¤0’dˆTñ”ýÍ”~eT+.ÑËÝXȘ+!AÊùe&u&©…Ò¤ðÅäŠË¾ÄñV”ï!Ù|PR™ƒäÕ0ÉQ(¥Ž°jv¹dHþDI›ÈßZ• äë¨F0É÷ÎlÊÁQÀ!¡"˜m0Y>†Ò¨R©åc½:é]ë°È·(e#/ k]­mantöR‡s*ò¡£ZŠÉ•RKlE¾tÔJù’´hùå,ñ"_’ hÝTJ,s¬³\¥j7Éx¡Z†)HŸ…WócÕ dJÆ—0k¹|ú‘QM†îb*¨®Ò\`’ÆÑŽ:‰EvHu&ùàQ'åoyì}šy'%–:‰&$è(=$KmGf©ïÝÏÎHºæG@^Hç2>']5"8Lò¹ºº015'©OQÒ0ºDB¦êã- P:W7æÚÒ0Ê¢×ñ–‹éèy£4Œ£írPQF?«¤§Ä(Øœ&ùÞY%“ä³|Ðû6Lò½³JrPÒ0úh id…/½H‹ªÃ$ß«”¯a>2>(-£H4ä¥< «eIôxIF—ñ®wSŒQF<«%% £œõ’ # \òS€Ð>5wœ$ÿ‹ÀÚrQ8 ß “Ô©’o7Kï_ÁÙY á¤òGWº‚a™Ž…›ÝbXº?ýF¿^³ÒJ)Ô…¥†J£_5” àÌ’“ú Â’2Éè&ùe!,`l½6Á× Â]ªI•†7¿Güy鸳kO \Z)9útƒ0qI3Œ¬’¼’o ®)’”ÒÈ$fpE^Êd?=c°æ 1XzÌ$m¤¹üÂ:,‚0Ç@,Ѧ%ÍäÆa~‰@ÌÐ F«Rœ£m3ˆ9Òbó–±Ô=)!é£ò ÄR¤dHÔn ®Re2ž¾˜?H æhˆA6IýÄ#ƒ˜ÃÒ æ ˆÄU2_ZIõáFb©a!I¹¤;‰)(&1…E$¦*Æ$–"Á+}BŸH\¥&JÝ‹!ÜH,å./ÅRÛÄRû\Õ*Á$®R’tËߚÄ4p`ScsX„b1uL&ÛÓhLòÃ(c®ÏMá®>QܾÅoá[ü˜ )s®O†ÄˆÒ;Á¡±dÆ¢n°‰iì+"*ñåiÅð¶5©/¹[c0-ó¾“ðšÇ’Õ2ƒ–GydÙ»ò(Ró‘J‹dØ¢di½3Ù» Q‘neVc‚2æLIêBñ7(#VRU¥ºg å.yYFÝBY>Ø~–,•ÅâC 5æÛÈX‚’CBU‚¨,&éÇ]œ#¢²t]Rgݳ‘1÷jÊb‘‘P8P®¡,A x%z.X(‹ÉKkxª© Små`©¦²„Õ$Æ4<–gÒD]_¤¦²˜*&¯mŽç5•Q+"já*'eØæÃÇ(Xqµ°ä&9ª³døC:ê-–að¦Øîä›MyT]ÁhžsD"ó°9¹Î*¯Ñ<âßeŒ'¼e3l2–)F¾ “MڈΈ¦“±KÈ®[<ã›á2ÅçaÂ@É· GÄ VKhD¥£;™B„†©J³\sOBô°I“e4‚äËð»Þ†Ë°Õ¤®Ì~DS¶$Õj9ÀÓÃV1yhÙrÚÔ¢áè“GøfœHùÄ yʃòÞrærHtøkxâaÜ(> {‹7²/yðLØŽP„ÏCK¾É2¶ks9N! VÆÒ“í ,ç'ÛÅAÞYäúÁ“à¿ëG.øÀ¤Kô~³“°_z¢Ñ÷ý-‰WK=‘°¥ôJP@ا9ªÔÈ`­®þh/€¬(¤C~‡(Ôob Gá ¢P·b Å`¿¨«—(ô2ÛØXîД;…ü†rQàrç(ì/wDËbð%1P†ðAæ3–—mb ²K ¤P6‰Æ—ÄÀïÈm{$âUŠ]æÎËmNËmýûä¶ØIn«2Éóô-$gä¶ ?z}Nœýš¹éiC”ýš¸¥œaΖæìçÄÍÉÄM>é§s,aÒ~Ìݤ¶b€?§ÄÉkGoW<<Î)°§7Bõ‹cΞ‚r+Ej-‡7B;Zn“L©0aÆ~úÓdT„˜‡iøâ—?Mæ;-3¶„ûéO#NÆtªüiú­Db[’——Ï#%vôÊ|²·‰¿”ÙÑ«~)kG/ICRâtúÓŠ¤CÀ’ËK&îõô§É8TòÓÍa’*eCi/IæíII:¬ÊŠ›òÂaä¦%·(‰Œ!Ì·†§W뀩iG¯t‚>»2§Ö©iG¯¤ªÔ82¨±âæ}“9𘲦Ί›ŒRgÅM©B©³â¦´³ì´£7?4’Y—³ÓŽ^Òl²ÓŽ^§äËZqSÌž%· ˆ³šgis6ïlYdLÒ‘¡{“O9hG/GPÇŒ_ ’Ü:ý9jOo–YKña4QÉS¸]èŸ+U8ÒÓ|Gûy%½RmNø³´8$e$‹DbÅMð!C€ùÁÄŠ[ÌNBTÊÒ<:©KÕVÖ~^ «µ.¥† I0´.}ôËï‘ÜÄþp)Å*ÍÍ‚ØGä¡´½ÙJ ÄAh ®†;ˆ¥-¦vÒ‚8HÇ™‹ bù[>§Ï•I\¾¥¾t!Kb «M!ŒH,ý^ƒÏgúH5‰»tUʧ MX,Mwô”“ „bá-$æ Mâ±¶ÇÅ^'¿ ‰•*C$&ÝÏ’X‰(Lbþ‘˜Ô&±ôðžß0\¥ŸèI¢qÇ0w!„aÒÂÃýš”× Ã´–ca¸ kÒá.c«(„e€êóáúdÓš†0¿E–í¡/%ƒ!,%.eC 7“†0,C±•@ç*‚°®\Ì`5j`·‡ @rkc¼ÃF˜R¥sO7Óš&0I’L`k‚eì‚>LØ‚I}eó[„`ªxŒ`'1‚9g Áæ-`Nq FoÜ(µ K¢“¯Piï.Ð]ʹ?£°Œz$¹-ß1Ü„]þU†!dÄ"³m°Œ‘%ï–Êa),ã²8uv†0­–`K?Qš$-Øá°PSWý‚°Z²F–jó(Ñ q樒8ìe¢#ÅÔ‡8Àf01‡¥.2 s˜ª‡å°fá°†*q˜cA(¦ÆÂ(fxëZK¥m2Ky‚b݉(nR5–Å(îhí‹ÝžÐX­Pawˆ‹µ‡œžÐX¯Ÿê¼…1!œa¬¾gaÜd æ*9 c5³0Öüd«[«Þ‚yL+™ÇŒOâ±ä…4ø•ŸðXGÐðXU Ãc~â1Ç‚xlLšÇœï†Ç +\ë˜YYënÆðøèÙ-ŒurK0C†O«i2[ÆÚE ¡'ŒÕ´gaïpç­düt%lK(mêF^íJ–y´]̪٪iüd{(Á#ËùÉv5M§wyþ…jZŠÒ;Ë,ÝçZãf5MØ\¥À| ;¡þvÔ´«¥žHØRz-( ìk©*5so×è€öȨ*òT•VU8 ûUDUŠÁ~5MW¯·¨iД;…ü†rQàrç(ì/÷Ú¬šF1ø’š&£¦„Õ¦˜ÕlSÓ8]j…²IM£0v¨i2%”Á…”{•YÄÇjZ¬¦ýåŸw©iséeV¾ù^$g› ÑýàoÐ°bŽÚæB] 7ß/ÛÊt:›ö„"'Ÿ{ó’É{Ð0ësk¦Sâ\¨+5Ë%ì†Õ±cÚY"-ÔÅúÐz¬C/i¸Z{uu¬ÅŠɆAÒª×é"©Á%ïækò•9 škgenS§{½díC Å¸%Ïuº³YJülãRò\§;IMëtKQ.¬­’¬é”(2CwÇL]ץ腺õáROqÎ%‹ÌÐëš©ÉØ«À=3Å‹Rõ:Ý(ƒÕÁÃÇQê\§;fj}HÇ2Þ…Ò” ¢¤“aSøÒ” Afßþ?拥ͅºc¦†%Ø¡õµ±t½P×?d’.qœ…"©ôÇdMÂJ­Ê4{N¦úX¨K±®˜¡5´;™¡KA΋Õ)‚˜¢ïÇòØŠúQAÅäº4¶¹~´zv!¨y}õÚ‡@sãŠ)úQ;yb/å|ù$¬Þ¥·˜sÙŠ)úª›ö¥—þÜg.šiÞŠìCPN!;ùt•g¦&öé*WOMìÓU;!jbÂé=®™=:³ò “ ½yBIĵhIÄµðæ µ×¡Þ<¡6jp”AïêjAmg©UûHAïÌ;(wijŒã𰨏 =Ðâ8a#X]+ÖÇ «óû±gŠq´s_…KþŽãŽï·åþ28–ˆbíü¬PŒc±u_ÎoŽ¥K…nîI08¦ß„cÈ{’­p@UËã±ç²pë7KÁºíÙΙǴÛÂòXmf`Ó¾æ1Æ„R‡g !g¡¬t]ÓÛÄ<¦• Ìã7¾@r~ïçò˜vr[c[H™ë÷™ÇñáÖª½T–*©ZçÉㄇ³ßdKY4© ­ßpL‘f{)2x?g»Ó8æHŽYk%³›ŸqLéÑ8ÆK]jÄôÀŽ%(/c?7ŽwWÂÅäý'›ˆ'öç nFA£˜¥`F1yz Æ&g5ŒI1`“×›aL®h‚1úÔÔŽM cRcÆÚcoa¬½ cNp ¦{Û¨®K/#Å8z´'<î`Ï\»a€\£S; å#…>N-` Ëk>½éÈUÚ³tÿs½‚²`Æ LJ}2Kcy+HòóPkc6%5m êÇ&,côgØ ;1ÇtRÀy:_|­?ãTŸ1Pƒp2ex0×ùS¿9ªÜ·åùûßý_Øæ)kƒ%RXQцB2=R£Ç!o—Óý3é èÓ3Nžá«„ýþÿùÓ¿üýïþÍç÷Ù‰†N¤ÌªÙ&s>üJBãæJ“”/½Lè¿ùÿî~÷…"ütN çp2Ç„àÓšðøÚÃX@íÓòmwùâ¹-„QÆÙ:n9­Å^ê@Äi8ó=bÎ:#£ÀatÇkU‹¢ªÓℼ狞ÆFœ&C²ǽå±9t›ˆ † Kod$s—¡™ÜJ×á` e:þŽ¡L&†r^štËd>¡œ%B©¬ýà å"YS­ceìº9Ö(œP.Çb8A™´ ‚rÇò;©ÁS‡$(C¤ƒÚÑk”Å$%QWMPæ}»eóA‚2‰1ÊØ‚†õ RÊ,“”Y*"(C‡Ç’ºV£¥2«5„eîõ ËZa"(Ë;+æB³L–W2:ð-“!â`¹ÜˆÉq.Õ2™µ$b2ë!ÄdÖý‰É,ð“Ç^°´Y&ãƒ`¬çsιvÌ¿U46XÎØXè ζ¼aÙ£—:bi°,‘1ÀZ>e°L .Ì‚l¬œ[õÍ€9K5ŠÇêH3öab´6Çx fIB•ñÆS0gœÛîëÔ« ˜é@Df«Hf,mÛ€Yð%‘ ° f «æŽSf©þX;6‡uÌA†%B­ÕIh0ÚY6wÕdÆB¤ŽóÌçÚ&F3-)b4ÓàŒfCmÍfhÙ½®U€Ìf°™ÍR륛ËXÍÔ L2Ë7óYq˜Ìù1—÷1”ËÃIž¯³WÊ4™a(gŠ ¦”ËPîRe:#EL®È÷µß•™LçÓ9AÄdʘ·ÏöÎLæì$&K¦a]Ý\(ÃL–·œTèÕA“±pÚ#)·¡2z\HbsxÅT¦ ŒeKcù,'"2—9ãtÖ Kä ™kÎ%ˆÈÀ–´¥5¶&"sÁ‘I9Kãl>Ì)Í$2õøŸÖà<ĈåäFÕZ"þœ÷ ¾ó"ħáïñùåC‡;S·%ôFˆ³Ï¶ aØ©;™n‘àgoˆD°rœy´]ã4¯(У¯ìÕ¬X“…³¢¥!î¾J`%Z %|ê”Õí× ª–{BbK\ØÞr)EyÀ™Ž#Q7ioC¤°ß¢Èä›g"ñI&[9Žãð=NW4ßÇ­|2\ÚZþcºaÊŸÂ~GùH˜òçH¼¡ü×ÀV—?ÅáKš\)Qfú}:^¶ir&”]š³I“ã@vhr·‚à?èpßÑä’VäþºïúA‡i<Î€ÍØ»-SºÖ„*c#‹ƒÌ6»?[£v8`y¢Ì8œÌï§Q{piK—*|³óZV*1ãÇeyë¨mXQY—1s¢bòmlô­Èžé`ñã°ÈñT â¸{F’çÆYQ£#=R»°5¢O£|ï¨Ç8ÊÌãôÇ9ÝÆ¦‹Ë-a%7ù§|ö¬ÈÈ௰ë86jÖdbé ¼Í[…$qún1Êð6EU.ÅØÂj;€³&‹qžK5µ#_•—;2jMã(©aSn }…ŒÇÆ”|ÖbÞª'¹ìÆí,³‹1Â]~SK|=‹º)FŒÊWan0Ñö³óý3b¼üf·ö˜ÂÐ-ðŒ,5sŒqIË¬ÆæÊ=à+žÕ˜O£r^Àˆ‰ss’tjÊ{1lX+¿êxRg5æåb¬ã¢–Y±{FJ'O)wD*X3¦4²ízC†<¼†ëÌ×äeN,¥vž_m _±aOʸ¥v‡|5å¯ 奥`Ít9˜K˜O­õÐo æ6vT×Ûêu4æ…ªÁŸ—BÌGtçvxƒyì¢B³šÂ…Á|€`å²Á|ˆ7±Áü¸Q«j¨Á<ö¸àµu§.s^ªJÄéÂGë!γ 9ï ‰AXq^K8¶ÒÎû±‰èM çq\Íó(kzÛ` ôhwè!¦3Ñ€Þ´X½´é–e¶"4AÏ"¬=9C•9r†@/Æ„ý‚s眽ä~ÆþíƒHzÑ]O¡Ñ€Þ¡‰tÜÌ ·F=ŒØM~ÀLƒž/{3 £‡«×­øèoFMzÚïlXol„z>«Þ¢Þ?›g_Ψ§Ã•-êñAÏ0õ2ÍÁ¥@íh;šô°eù¬pz±…Ž5ÌyÒ ˜òØ ˆ™Ý\Ùä×ie®"cÆCƒqØ-RN_2‹;oMdÈDZ?rôþ·|‡’ÔúŒñ!Œ í¹Ú§f¼ ñQ±,oî¬gÆÃ˜°¨c<1>l/±Î•QÌxÄââWW­K #@t¬ÑŒÇ|%c¨º ƒ9t@outšñ¸1ck÷Â&1e }útãaLå|ÓŒ~,E*KweÆÃ˜°µyMˆñ¸Â±ã@Êc”«›ÔâkĪ?î~¬§ ÃŒ¿©c‘‘ð\Öň‡-ãàÖøñ0Æ”Ê1Î!ÄÃ"¤5îÔ„G\›ž Âß’§ cÀ⸩¡2áGÚ£Ä7¬©&`(_Îãtˆð°&àà4áa”æóÑ÷iÂÃ(SR™¬©…&<ŒîZˈc¥L„üAjÅxk”z;ï¤`ÆÃXŽÅ[Lx˜$¡ÓMø0†ç5^Ó„‡ –²JD#~øŽÒy¼3FwÝÆÂÇŨ}B)Þ0céçBæüݨ@c>·‘3èaÇK­v5ü§òL¢ÌÆ>/HÖü –ŸÆ%Žøk^ç8õ]Xç_›äÈ'¡oñv®tü¤Ò¶#œæ±¨¡H=œ 3òð°Žã¼7]næçɨã{+oùÑö(P’g¶›Gíºmí ¹°Jž=‰ÂwµOéúZ±|<ðyx~u!2ÀASe‚%]Û§ÂÚ.Džmö¢Ã–"\DØÝfuzæ¹¥ãÖê½0•pÁH_wYÛöE!–òpcYm»El£ C\HŸqÀº‚¶í¯aî‚â¾’rÉsÈûKQ0%OQxCÉ(pÉsž•ü÷Ù<Ä &3+<,ê”!oj±Ü庯+HÆud9™^¾$ÿ¢@¢Ãºoìïñ·#8~‰ºÉ¡8ìÙŽŒ,ûG¡PZÆvzxÊ=o§ÿÑA À<¢~ûQIWTô/¼“‰\j ý\¸È#F™”{¦Æ‚MƒyFæ·¿ùÝïÿá·éýýf@Í2U—Ù{ï7߬7—ßÖvϨ·ý¯þӾ͊üu7NÄiŠëà!ÁcS‡wÉo¸uoÐåÛ<¼Ë¥€qÃäáiÁî«ãÎú8®®9Ž2òX_Ž‹?°1Ì¡"6‰Œ×ú<º«ãÍ(ÓîÎ ¾«£»`»BƒØ|ÞƒkïL‚SgwE8 殓8l×í80é× )ÁÁ’li¼æDxÌÅÛZVçtöH¼-X„=»x:®„á3ï8±Õob9òº¨'„ë†|D®Ó1'Ö¹¯éhFuGÞS™Õé]xMA>Äy|—ükÆD¡/$?'³SØtêpÏ•cœò4ðê¹Æ‘r•¬ñ²ÁeuQ>©3,ã/<Ô9UÔU9qh¡¨ó»ð}tQWäØ<¯ê/óZUxY“º$éÔ‰iê/Ô(õZS·äà5]›:à 6É~Ý“ƒ/êìéê/[µá¼=îpâ˜Di-ý¸Â &Õ¤£S'yÅὉØ|ÖÌ¢ã½:ÅËD`Ö€³fR㬨\U³êÆXôãþ&^PGyÁ¦ª_ ê,/“`œƒvÜàdÚdŒê0/¼¦J3â2Ÿ«jÝ(Çx‚ón›¦ItÎõÊe‹ç©4•ó•°\âk,7]&Œedô•IŒeÆ9s®oQËe]±‰Ëý.wz¹ÌŸd.·;Žókó—ÇNWƒãÂ6âqeñ˜¸Á8¦ö`pœUqSS18Îl#EaÆ*CX‡Ì0ælµ4V¯1Œ3uW–Æú5Cc]W˜ÆáNáDk)¬³…(ÌS8ßáK]¨¡/uH†¾‘mú, …#“–(Ì_d s,™Â41¦ÛP˜Ãc ó7Ãþ khM o>ÛÖð׿7f­æp¤baÛq®1 éÌð8¾itÜuáÑq"ð0… ª5„ivl ¬KÄ0˜š*#˜¼†ÅN¿F(ö 8ÒˆPŒ%‚×™Äíà~o£lcð6†2‘·ñ`—ÈËï1yë³pû` ÌÝ$C˜*¦0¿Çn¯‡Äœ.¦p§q/A¸1q‰ÆTF ãözHÜ>;JS™j´¡²cb•—5•ÙÄXvw›Ÿ)æ†ÆüÑØu/ù›LcN1ÓØÝ!ìø:MšO Ðf§3×ON(þšŽ÷¡ÜnŸ…¾Å÷Ø!ðJÛŽpR™zWòåH˜y´]…Ãý§ÇQ˜+kéÉöP‚g¦›GÛe@âUìôä+"`tqœ#Põ~³qw1NžÁrÓ¿%ðj¯¶` Š ÅF•žrUн ²R…ü)¨ßD@ŠÂ;¤ ~9 ûE@]ÃÔõ…ûJ~È%Ï!ï/yDÁ”S0~ÕTR ?Bæ÷´#ô’Õ9Æ5‘âXí{L×"ÛÚX¦¼¦kÚ’’ºÇw)±È¦¿˜Œ+A}1MQ #Z-²G푮ɧ©¨»qÖÏ‹º‡Ý= —öhÝB©IMMß0~#IMM S=$5]B< ‰mF[Ó‘hÆ«‹¶¿×R)®¥þZ\ËŽ< ºÐ³3âšê˲{-®eÿR\Ëþµ¸–½º%ǸùsP×ä°+?‡›¸†£>ÎÕÆ6ñµº–ãuIŽ@|-®å¤®É±Á¥×âî-y!®å¬.Ê™‚Aί5µœÕ 96Qeþü+£€OjjÌbìçsç± ãR_Â8qc Ñ8’³…iv‹3މŒã õv¦q ¿ÑÛ˜•‰hâkí(fK¬«Dã®&ј¿˜ÌŠ]Dãàn–/éœg÷×4Úél¨Ü)ŒåÎx%,kG¨¡2gE=ü»º8šñ益b¨¬pÊí(s²ºñïêzØWEÓ`™¹áŒƒ—pN\¦’a.k¿°Ár¿Ó˜"nhÌ`·;ŽÛ8®ô)Â1•¢Á1ÇŽq\©kbkO¼¡1¿Æ8&áÆeÍ^æ2¿W‚@6ÊmŒæ|õ572Ó‚ ‹æ;‘yüDDND dmbß1Ì]ŠÛËQ1öª)ˆ0‡=ÛhT¬›†÷׃bª1<(&¥Èp8èœàA1Í!xLÌ aW⎣šxL« iÀcxÌ5†¬9œ™µÄáB¬% ×Ç<%bÓ'Ã4µa Ò…õ4Â@¸0¼ Â…H«œ_3˜FƒÌb¿3‹y Î,Îwó˜žYLfqþ€Å;fqb±8¿f±žž'Æ4¡ø éK嶬õÓR[ñ‡³7QNO#þZ—Àú½RÛ³ÐwxøŽtü¤Ò¶%œV®#.gÂÌ£íJnCe©Ÿl%xfºy´]jÓ)^Åž©Ô–b—¶«ÏµÆÍR[ÂèØµãØÖö7#µ]íõ"ÖlAqac{Ué)×ýo{Ady¿à‚(Á…¢ðÁeDŽÂ~©M×°·Hm#@.yyÉ# ¦ä) o(ù.yŽÂ—¤¶ˆsû[³œmR²Ijã@vImÊ©M¦ˆØ$¶ì¿#µÉlA mùçm“c{\לš~ÂmÃD ù:H† zîW‚v!D=Ý.‘|º4)1_·‰`Þ5a(‘\•BÃ4}¡b\ ’BíÒ5¶v¿­jX%_‚~/çë2ö%”Ü®»DŒ ¢ãÓ%[¾Nš>µRȉ@~¾"ÓózÎÕªžÆ•j¶KèPj»îY¡4rc¡4ã< [»NûAèâèä<ÐB@Á½/dŒÒ§AO¦+¦çG•4>Šêȉ §Ó¸ý<åÇè5Õ³a<2¾«Š™ùY%ÙƒR9¨*fæG4Ÿ Æy òµFêcò"ò† JT4ÊšŽI2ÊšªH5ݤµšÈ K¯fãÊÕ_ʬ¬élʯ]¹µW®NT!÷go1¾\54¨Õørõ{•^%O¹£G¿ÌÎë]aëÎ:o1<άc gFa¸Rb­Æp$Šá0ù=™ÃŒæpç× ‡µ9LÞ,æ0Œ ˆ.gâÆ61S·¼të2©æ SoÁ<ÖÝp¹=ñþ5ј˹W.Ù ŽUËELáSÞqò鈗>ÇáK‚œ<Â-çÖú6E΄²I’3¡ìÒäL0;D9ì­{„ìÒ¼àæcQ®éýoÿú—}ûß°ÖEÉrQï¹ùž–åÆU÷)±,w Ò[ªZ•#7aËN»ƒ½nX-'-Ë‘jÓrÕ^=ClÅ)§E÷‰\>?Uíkàð«ÓÛÝô ݆+D´§‘qTÒkØu¬›ã…½ e­%ís  {Ãõ!ZQ3§Öò96%ís ©yÃÕ!Êç :. Ñë&Uì0ÀR®rvWµ«A—E÷b7yTÖwäÍí¬»Öž•ÔUc6Uí` -¬GÇvLìaPóåŽ{A´‡AgCr¼áMÍë{JäÖÁájíaP…Ô³c?0ÙûÉVI—Ã#ìšRrÜx”h/%´TÞ馊+@´ÛW‡\éqjò †ò(h!¡ãöçŠDÇÝzŸ›¬UVãt$»£nú“Ò,´7UÞç¦Oø}1?Û%ÂtáŸ=ëÐ ºä´fúfÏ6_ÞgðKŽNÆoLlÓølkê1Ž\ïNspˆlÓ–^Y§ŠÌ«(Áì.6&à3µ»š¬…6Ã_î ˜¿íÎ_R} +³¹² §œÀZV7ü¥ú}ð÷ v©%1uÙTYƒÓ°fúr›fú¦v}%Âø2¯¾åøòk _~á[~s¹G°ðÕyÀðe×;A‚¯Þ÷`ØËo1|óºÜ_1tÙÆÐ%ÈPר*knʶð›¾6Ùý´êfœ®ìF`w/õfÀ«…{Ã]Ýp vƒÎ 3ꥵ\Œ]2v™äÌßøzLÇÂü2FyÜy$ë^.‚0ø%Eî6Ö”u$ºé0€ë“q/ñ…Ì-ž\>I¯ƒ Q®ëv~Ž€õC;&€Ùf†Â4Üu/Oò50fl|b(ü¹ð“ï¹<,göò×ÂéÓa¶1„IÞc§×&381p Æé§FÀOF¾‘Å••´û8~eÝÃy‰œË§”…Ú´¤´q?ݼG?Øz‘ÜÓìñÞåSM»¸%$nrš}¶]Î×%B: !Ô3üì ‘7QÍ>Û®ªqªHð³¯(k÷eãbkËîKå*6»J»o¡„O]¹ýR9Õ~/Vl)ÆûÛ/¥)ª‘Žƒ&wiî–ã°ß¡±ä›ÀÆ‘x‹È’o›‰Ä46]×pÄKX7än¬#HS8ì7Ô€1¤45€"ñŽ0ÛãÀ‘ø’ÎVŠÌÛjO2Mûnš3¡lÒÙL(»t6Ì­âj…*ĵïêl¤²ýuß]nSúãNñéh3ï îß[wŠKcÔ“U1j×C#G«µï¡j¯¤w2{)}])+ù,Ä8nŸWŠãÎ!}†«Úÿ0 Þ5åwÈ™?†ýq“8ÜÒô±¦=©óŒ£k×Cš/ÈìňÑ­®Úp˜Ö÷‡C~Tƒ÷N;y«Å¨½‘£á1³?nǹC:uÞ+ÇC¤5ØbÓž‡XÌ‹u«3n‡Q_Þä1Çwëqì‚¡7ƒöBÄ4ŸiïCLüf÷ÇÅá¸ÖDå™Ço‹Ã§IŒÃÿIfT¹q¸Î¨­ðûë«¥üÐVmÑñ÷’vADÏiÎÚÁî‰Q>{VWvz‰±ŽCvfu£FÒÅ÷øš®uúâ{|Œmêâ{^“Î÷ÞCÖ Xè{ïæÁ÷ÞÛë¸èÞ{)>úÞ{³³ ¿Qþ Ú´Êß›…÷tñ½ÙÑÊßèãJßÛ7éâû)íð}÷xAAˆ®»‡¿’lê¶{Øtºéº{ûâèÎ “Û®ycp»öܱÇÀ]ù’Nw¡]‹†ÜÅß3“^ó:3øÜ©p@nÖ -¸ÝàŽÜñÀ#—+<êçà1²ø Ü5¿£ÿ€ßÄoƒÂ7ïE4øæ:Ïô¶M0 c‚0~3jއüŒã!s,‰ãt©ÁxH†ñã!q¦ÆoFÂxüã¬ôž³Dax¸÷fž›Ž8™ ôÀõ†nŒ tÏb {æ2Ýô­LtÿÑM%g¢óÌÝÜÑIâ±hw M†íî Ò݉rÿ¥ëŠ>/úY˜_Êå\‹ Ëùj8ÃòÊ#tby6Ã_†ºñÆÕS'ÄÔ›ˆé†Ï„tso´AºÅ61=}ÄtÓrêþ¨;3œ×Pa¡¡Î»-Ôytc¨nËTÏܪ뙑OTÏO`Î=³œ þ`¹a'³Ü°“anøÈ07]'ÃÜ æÁž`ÎË,7S1b¹§N‰QÎ3(ç‰A¹3œ'”»ÆæŽâÃ$ww€» ¿Yg~ßl$v3èÖüž­ƒ±Í »ÁözpÍçd\³&o¸më\ê¯C|І5+ò»ŸÆíuøkÞc—gäÄ:ÿÚ$> }‹r¥ã'•¶á„4{ %ö•0ó¨\ 6]æäçyã{+oùÑö(P’g¶ó#PàØdó†\X%OžEá»TA¤ô“­F\û–UB£Ò±¶ð×ÖƒÇbZðå3Am—Ï{±aK.ìn±:=óÆqï^Á/5ï8û‡CÖ¶4lc1Šô¨ÒM™(mcd€,]èqàGAÛö×0w­ØWò#@.yyɶÄ%OQxCÉ(pÉsž•ü÷ÉŒÑ_i’u2HË=%Ìê]wúºHȈµ<:¾`›Ó¯(Óq(E†Ï.aqÈðŸ„_HÄú™š¥êÉ'_‡)”¿û Ž·”©—ÿöÇTaþènü's§•úxEEÿÂ;™ª¥–ÐͬÆüD&޾<¹™®`g^ž‘ùío~÷ûøÃ-BRð=¬}˜’\ ɵÞýØùÐdŽ-s‰v_‚Câý—?ÿi߆@_ÕM¹c:„ø ᾩãˆrá¹b›Ç9&Hû‰˜k„Ôܘ 7ÌÖmãH7WJÁÖýðȸBh¡É—Ö´¤Ïóˆ:. Ä ­Ë ë#aSçÁæš+­¥qo¼S·À³”ì\iœ:(K¹KiÇãywÝþË ¼Ë% †`ÂÍ$pä4mRYZÍÃ2&r^&bø`ìqëv?&šÝ ü’a2„ÁŒIÙ›d×\ü Ûuûn\h=D)“‘¶ Î%²ŸŒêúºLE}¨˜-æ&‚Mø*q7çÁD®T?lÅg'¿ß”âB꤉äÛ7qIÉ™aÐâÌPS Ã6&êY²v¤A©Tžñͬ&Â7[’÷$æîÿÀ{=ÎlÎØk3£¨K@ð!)Á,CüQ˜EJ„÷œTÈ–fFu Hw¾ä”f¯ª:”hdT‰˜«L›:•ïy)…,õnØÔ= ö›MJdãÙÔE JV*ä<2¸©c‰`kNZMu«_7Œ¼¯ÅÃù7LêX¢Ûkm¸Bf?[–ÌŒy¥Ýô㮓eÑ©c‰ð^’ÙÛ”¦£4›³usÆÁYgm5_[WÎÚŠ…ÿ¥Å6jÁ¸ꪭ2 €“¥'4Žám?n«á•(Aç0lê\"رÂõ0ßSçÙo⌧ã¶›+QKt{¯ ×Ȫ­Xž.?‰8x«4Ìô©±ÌgUJbvëX ”°¨æm8€c<*C»Ap{mÀ4ÝšÄky,³é²êóÛC¤õA¯±åQšÕ*æµØ²4ñT|¿ñ6áHs¾Y^‹IÆAEºÓ~ãµØ¼ .Üy-—e*=×~2¯é-¢µyKÓZ!³Jõ7Z›×ˆÖÖ¦i xŒ,·´Fx‡kÐÒÚ¼G´¦Ä«ÉB¤6q$R[Û 5 ¥FÞim^ ZK¢Zé%´áaZÑÚdÑÚÚ4­M¡­Í{Dkûž¦µ‰§¦5LR!ÜZ­Å&Øîbó7Zó{LkkÓ´[•ùMé£g&\Ã$HŽy4`Ƶµi\[›Æµ±®MTלtƵypmÞ#\›l!\›o¦`?µSŠ$bW —¥í µnÄb£pDjúØòé„%fs}[Þ“±yjS#l Rc,ù†m™úJˆ¥-›Æ¶ØŠD3­Ñ a;ae…p)¥;¶eXßdVs 5¶Ç´Û¥¸Fç„móI7éIO[ÝŒ ;aÿs‘n:–º6-†£C&t§òð}´'í„S¤¬ÜPdÝ sðS•2ºªK(OÑ-Ô•©_]%Ëè6ﺱßT •éoÂ7ÜßR9»û@éKÑb¸ðÍÕ„ñ-¶.Y¼9ã9Éš>g„ïìc‡Ë|‹Md9zw·}Oã[â"•¦‹½ÜðŒÄõ¾M†¾å›Y¦qiì&%z›\!z‹­ÈÔ©®ù$ÑÅVª+yl bzã=©ó2û÷7z#u­8Œª-½%E†ÚRråFoJÃy)U}Ó ÞòZ“Þše3¼¹\ÞNf’wíoyOøoèF‰×ÞÖXÑmB#t£©"Šsð0Ýy¸i´ùåø´ ñwzbq¯ßœ¶á¯éõ‡u§ù,ô-ŽÏ~hWÚv„“ʔڤb 3¶ €¸Vò8ère-=ÙJðÌt~´_Ô)^Å®Ÿ|IŒ.Ê( È¸#Ëhe«þÓJ¶"=@ÿÒ¯ÖzqaKñ• ¨°QçTé)×¾Õ½²*…üªßôGŠÂ;T¨~Ó9 ûõG]ÃÔ=pûJ~È%Ï!ï/ùÑ–¸ä) o(ù.yŽÂ—ôGÑ™éŒE‚ÛôG d›þÈ¡lÒ9/éßød íeN‰bk@¶À÷O¾ØIàÕN ¹€ñ‰#OL2Ń˜%•0aFMãs½¤é"Ǿ˜xMã°îU†ðs‚MÆçHyL€[Z"Q”þœÆaôï ¸Íozø’D,ù8¦ß)¾—òÙ±”rØŒ¾§ä¸´¾'ù‚ƒmæóQ]8S“YÉ‘ˆ‹ç,H‚Ì›Ëð$¤çœÅ!´91‰Kêæ‘¡Ôù&Óë2m,ða¹¸cÊ•’ö;`Ùs•jbiÊFà“‰¾ËyL—qø†øÚ£zÑÝ|¯EÏK2!ñ/uóÈOÒÒê865uóÈлJ/~¨7 ÷£\Š þ‘ërD¥jô½,ÙâûpI`HGúžÒ­R=4>ý°‘Æ×döŠÌGºi|U&éRyúp%¥f4¾ Õ|ùZR7Ÿ“v%3ÏQ¹ºÑø”d•ºÑø‚|dyز#Oò_¦ÂÒ(ÆUñÎh|J&ÊÎh|JÎÊž5>•ÇÙ^SWX[PWÀÖ[æÃ}s0_À8º„F©«žm"©|p<ÆðCØ®«G`йäR‡Èȧ”²±Jà8)Ɉn˜ê+‘Oz ¼Š 6+òÉ×8|9«ËGŒP™³ù”™³º}dØz8Ü8£'çŸïù˜Û­?°ŒÔK¦ÜÀݰˆít2¸[|][•À]¥!DŸ ¸ëpeµ%¼3¸+¶¬Jú¦ÎàÆu°•]Lî ïO„ð©wE_&ÓÙå2¸‹@ÝõCapg¬øèÒAŽî˜Á î8aÜzÀ:¼ Õç9¬&pK‘Ñ ƒ›´ævJ2€”xö;·Å¹5®±%q;EaYÃ\ì>ä› œë}¼-Ño½œcq‚¶Ø\“ø×-´%eÒË=o',Èìטšt>ìrrØNwÇ6냌mDS0µÔOÆ6ëuŒm /H]l®–¶vœ·c‰c›µ<ƶIÃ742Âj n¨0’޵¬†Á- £ÊÚ¡ÈàNXŽƒ¦³Õ¤óáôV‰R»·YÑbr³&Êäæügr³¬Èäf¹‹Èͪ"“¯U!V\cqMn‰Ê›çhˆÉÍZ“›U&7)“›•7"·ùd´BŸJ‘…WbY&7äO™Vº¹ä”ÉÍ :“[l­JÍe¿Ln´‚"½Ý2¹µx8ýfI¾¥¾~Î+z}2Z.OÜ8=økº×aÝ)ô= }‡‡ñHÇO*m[Âiå:zs&Ì<Ú®³á KúøÉöP‚g¦ó£ýBŸNñ*öüK…¾{pÊè@&i{…¾„ãØ…ñ!¢›ÿÛú®ÖzqaKñµ ¨°±µªô”«JìÅ‘{8äýrÏ(9–{( o{FXîá(ìút {‹Ð7ä’ç÷—ühK\ò…7”üˆ—))‰æxJÎ×}'c­T?—P—Ü®ûN² Xd‚wœÎS i}ågàšv•’¯CŽÆôÌ ' íæ“"Âácî[dNœpÓµÒý”ìJÕ®%Táú\IÊuñIÆz’u2¢9Ö´ëAl2½•YÞ˜ú–¦]b‹¾»8]¥µët£<ÎwÂÜmÌ7K×®î+£6?µ&™¨_Ÿà½&ÿòs`éÃõÀ/TÌé ›ÿpÚ ›v=ˆ ‡g©~ÃÖ®ކ-¦æd¹zíz0ßôÚ÷ 6ic¹ÏMI“ú³ÆbÆíe¾»%„3³ÅBªKxgf³z»˜‡qžÌwcöÀɿ戀™Íê3ÛØ¼Ñùºkyº‹ÙƤ‰Í[L™Ø&šØ•Î=df³^ÍÌf½š™Ce¤¯% Ìló1›…nf¶µ‘ÎÇá³­Më|üIb6l‚—å—gfcç­p>µqù3›7ì2³˜m¾I̶ï‘ÎÇïµ`mÔù ¶û[ –¥=A·´)Ð9ÜÑòã¼h0º;ÆÛ®·ÕT-¼¥ oë@AoêÉ‹m®F2ðÆkc»_¸Á£©Q3çnðÆ‚—:ÕS†·»…·±?¦ o§÷é²¼±kݤŠÔП±»-íú»u`ÙÝíÉxÛtÌn”È8÷ÒßÙM2 »Í°UcÉr7v«œ7v·Œ…~õº×|(ºÕúq¬ÜZ>gÐCT¥žÍÞß ›úƒnþ&³›ú2oþ¦…76w®Ã[즒y‡7ÛÞXêÖÛZÎfàM]§7uÞÆFð66‚7u¹ÞÜo¼+6“†y"‡a7¿eÙ-nK75ìæ¾œÙÍ6f·±»9.–Ýz °Ø­¢÷i­Of'ÇÁzpýLoçøsúØaß©ö= ‡ŸñHÈO:u;ê.[½Ï<Ú.·á~MÖûøÉþèÏ\çGûõ>âýäKz_®Ù¡‡ÇP`«Þ'lŠ2&~"äÏH‹»õ¾«½^dØs%Ý¢ÁööªS”ß¡øY÷á÷ë>£ìX÷¡(¼A÷Q`݇£°_ñSuLzôwH~3D.{ú ÇÊŽæÄ…Oqxǹ²Îy[ú‡/©~ò—>´œ[ëÛd?e›îg‚Ù$ü™Pv(X²úY¼óΈ”¿vé~ÿú—}ü°˜æòCàb–(“b-"JNé~Ýa‡ß!B´¤.9‘ ȣϭ~Ø*ë~Nºî>‡åMÆ J÷Càý˜Û´œ”QÈMùZ®/u?™4*ÝO¦Ks6>‰»XôÑB9˼tú¬qçå‡à³…ZuzÁ±Ã]~œÙpË¥£”GŠçf…†Éè ݯ5§u?\rNëZKZ÷#¯á¾KG)Èê²–F¶î”ð—åwAjÔp·ž´ð$ÇŠ„œ§­²ð'Å3ŽO[Ç,—Ž‚»dâ8×8ãæg%üIÉâÎéŽì®já/ÈüL"2Ü%’9Z÷ê·ï³sÜ®t?ÜœÔÇS˜ê’ý’Ï%Íi`ǵ+Zö“bŒK×è¸uå’ýHê¡jÙÏ?šLÝ— Ñ£cÙ/JÛZBu—P²ÛqùŠ–ýB+±Î%Ç=9–ý¼têqú÷{JJö£ÍžÕ^ J¤ëٱ𥢧¹¤ç¤…?R±:._¹*ì¸cÈçy|Ìîµð'áÍst¦-é-~$gõRµðÇÙ‚»W´ð§Ã«ImñëH3Ó‡‡I é~®”ºv)t\»¢u¿k7n‡E[R¯-°½U#û]çÑöî^Ê~]šŽ’ýHJì½²ì§vøÎÎR 4¼øéÃ…>¥û1¸ý8è©£g ¸¡ –k /»H{Æ™)Ë¡Kà†ã9Ž—üÍàîHA9¶Î1¸eºæK( î><Ûam'p›þ…À=v Ôã$_7Ö£ŒÖ:²›ÁÍ›Übó!†§à†¿UÆžI ÜRß’ÌÁÊÜÃàÆ?®›rܨ§¡øµõœÀÝ@Ɉ¹ÑÁÝ0 ÎØ[ã6ˬÌm±¥’Òâ=s[lnìϼcNhéyÂ챘Û8iP’}àž¸ ›+a-yXà–‡Ròq-M`p7F²jæ>ƒ6·Žrfl7 ]º¸¹g€± ‘A»ÔyÆ6 Ð…æçéŒmc#l›ojl‹)ÕáÎ7lcÕ^’áæQrG—¹Í\ëÀÔ†-ŸÃ ¦vTÆmWýFm–I˜ÚÖ¦© › ¿ÖLm±IÕ9v‰µYnglÃæÏÃÛbËÒçʘ§Ü¸-¶Ôd9s›”Æ6+=Œm|RªŸ¤/ܰ]1 ‡O)\Øæ•¥?Cnàq©%OÈ®ƒ ¹±7¯' ÞÈ-cË 7r'L¦ z#·ÁŒØrÇÑšýr gŽQÃÜRGóÀnäöå<^óFn©šÇV2Cn™m´ó@?Cî$HrõÜjÉŠ!7¿I¬ž ¹Û¸roVK‹ÂŽ1ð¹‘;»óØ“ºeuLêoèÖx&rãÄSœ²rCw’âx:ä6$¶è}Ý̧ѭAËè&5× Á±93Ýám là ýZvƒ·dU9Þ§Þú›Þ®ž“=†·!&Ñ[l­É2½AL!Ðö1½asݯaâÞ‹–Þd3ôÎkAæxÜÒ[!ÓÀ;ºs |ƒ·"à ÞÅ1·êÞà-õîX2{ƒ·:&ÂлI”Ç1ìþ½é=¢7éô‹Þ =õ¹‚øó· Ê<ñp²£Ò-õoÔ¿uýuÞ+ÿ=Á—c>À+[B’OYÐ>Û®Àë2ç2¡ž‘àgoˆD¸é€æÙ~!S}D‚ž}I ”!A©\Þ}Ë`Å^àæc“©ÆgÝ~Ë j½)¶â‡ý­—Ò”ÇY“é8þsgæ²Aû²P¾i‚‰·èBù¦ šH¼AÔuÍ÷qçݸyc AšÀa¿¡Ìq'׊Ä;jÀ<ã‚kGâKÒ`AVxÉxßÕƒÊ6iгI4¡ì+.Ȉ¡Êx OLZüë¾ËýÇ-ò§È´ueXTpãºE«’óuã&:ÚUQdòÞŽBĨåÁŽ{ê¡gy™–»GÆ-òÃ{J¨Ó!!Æ4$sÕ›§jwQå]Sþ 4œþ—aKŠæ=ò2‘„¨ue$Fí±(’a3%™Mk—Ÿ©>GÁ=Î%šMwÜK™¬É¯ë8›h’<Ëô0ÅÃkæ½Ó^‹Œëð\\'NŠQ»-¤YIM<#®ìZWÊc¨ÔS?θ’Q¦r\ÈÔ5§¾ÎH“v\dÍɰbùŽÅXÇE#/aÄ©BkÜŒüÜceúÜ«ÙbëÇu>hçE†{³ö5éÆå½H˜u—C4Nà\ØuÇ<ŒÕg\XŸ§1SŠÆó8Tîñå$ãp`ð¹M’>7Î)UÙW%Æ„ƒŽfU6§,áô#åÃ`áÙûìÌ•$d”ÏžU™OãØ¹³ª²5gn%¹N\£rd$ìU– pÔxΪ,ÆšªG¶UíÊàý±8.HY„ÑῘ¥¥ÙgUFn­kR†~‚³*ÃË’q-Ÿo6íÎH2ZÄ Ï7íÏ£pöØw0î{ígUÆ›Xá=݈bT. ±aóÚÚc:&4êä"ˆQ§ƒ3Ž‹ 0†µ·bÓ#žu™ï/£vkX£×~ >ïXŒÐÐϺ~Ñ/5[Œu\U²ê²q„켌è¤˜Úæ[öÐÀ—0x‡½‡oöÜûcaåñç ² û"}œdUpóûŽƒ¤]‰gØ÷ˆ»<ýÎö¬*2ìõ‡7ØÛþ¥½vOسzh`ßp‹ìyC½D@>}5Yg\ÔΩæãŒ:£¢Ôg°g T¸à ö ™sÒ ìá‰fû¸ÀK°oXþ_µ;öÐüòu7/Þ·ØCàųº4‚=»ž ìY;`ß°  –µ_ÙÀžÝÀöì[6°gç²=9©ìÒñÜYÏîeÃz8éÀ¬g™YÏfÃzö0Ö³*gXÏÒ›a=‹h†õìK6¬¿ïZy“ ëÙ̬g·°a=û“ ëyã—a=¼ÇÈ€5ÆbÖ³kÙ°ž÷ ÖÛÏëùäóàä$±tó†W 2ᔾ1þ¸á¾Œõ/×ЕpŸ!Q^IdÜçqP×±ûq£¯ èŠö%ãf“s_ÓFíJ%ÚÃæ®ížL{ìÌ×É»L{Ãu%Ó~ì<ìØ<·6iÚK$8¨â)ía”©MZ' 2íí›Dû¦ç¦çíË81=Úß^Ô¸AJ÷¼.cÜÃØ0=€§q?RgOzÃ=ŒEÀà 5í‡Íwª3íaLsHào´‡QêJ\ÛTíñTŠ¿åyÔÓFÉíxk5íïFEû›QÓF)%k2ía\—s˜­i_p²äL?ªio¿ª`OÁþfÔ°‡îw€WÃ~k“Êú ö#!’aåè 4ìïFû[„4ìoojØß2OÁÞffý°å•dËúQ”Ò½®S?™õ¶úëGu,ç‰;ÌúQóyç³Þ6b½¥Àðqrãÿ¼b)­[ ÷@¥0þšwÊ<AˆuþµI­|ú'èJÇO*m; ižÁ‰ÆJ=jãXî­!®ÖÄhz|oå­~ô†(P’g¶›G~·FII^%ÏžDá»\,k›ËCw¸¬~×Tdôþ«+”2øŒcKðå3AY…òï~ã¿á¢b¡Ãÿñg¥±ŽãÔ…«nŒ¢AFú=är›!Ã?}û?¾ý‹üÿ˜û 5.>m©D‹I»©¡Ó3èwJïÅa®ÛµÈØ„L¶QÀy%Ò’†CÖFAÛ6v %>J,slh¢@¶ý5Ì En\?°¯ä )y ù %?¢À%ÏQØ_ò£9sÉSž”üàhyÈÔîçrÔҤývAƹ˜ÑÜ)úuu”ÈØÐ"½\s-HþE8tDn¬3¾ò X %UÌÿ«ŒRúGʨÏ?§§ûÑÁŠyM•ëc¯˜è_x‡£[B·‹c±=¨Šnœ+t»?±àXÕ<#óÛßüî÷ÿð‡[„t•ù¸ÛÍpf¹Ö»ßüßq(Owš®müÿþ¼ñÚF_Õ-2ÍEÜ"s®¦hêt©Š­×f2µ¾—äŸUÆLç{°5Š3ïEÁá/ÒÎŽ¥xÜ…ap]À6&Q}ž.Õq­e-üÍ®N—26âç-2Èd…·àÔñR8ÀF…ÜuLÍ•L¸ì.ŸœúxMÒÏÓ¥œ—I"ÞÓQñóx©Ù»dœÕw.ý¸ì&K<ü°éo†ë&‰)›ÔéR<Šêü\Ç<ªS¥`S…â"VöX¹~®ÙïˆìŸ›a~^œäÑ«¢–½¶Â#T=zmñ%1BU¹hF¯á5%eœñ’’…èjF¯IÜÄ™’Ú¦*¶—Í4Ua€âŒW5p$€b· H 9îåè•ûŒ‰T è%RÍü…*6ÿ„ÔÜ_^‹¿![oUZ ©&´òÀ› kÐD[óMM[“b´òàŸ°[ܶ¦-V èq¬«R¢»;æïŒa—°«r0„];âï †økÊcUš’2ˆM0k¬ªjö§%DèºÓ‰‰»§ïM‡9¬;%Äg¡oñöCB¼Ò¶#œT¦R–|9FÞ ßKhÖ¹ž+kÕ“7D€<3Ý<Ú. ê¯b§'_‘£ÃU0Åט½ß+F\Á‹•·X÷Nù0zWÆÝ°nûÛ”/b\lÚR…JPdÚ(Sªô”±±ot…{QhE$ò;D¤~“9 o‘úM>¤(¼A>Ô5LÝð·¯ä )y ù %?¢À%ÏQØ_ò£9sÉS~ ù0`‡0”IÁ.ùÃØ$r »äC åKòáwô¹èÎeÄW™ÁBŸû§ú\ìFŸS529£Ï©II‚çàœ7Ž…ƒÉ©K_j.€ãÆäÕ¥/ìô§à:¸æ^O•’'ÇÍ×SÐŽ ž?§ <ŸJAËrœª¨.}1󣄻i.±ƒæG ®-v¨ézJêÖ¼·–·‡aÓ²ÜȨDjÇ.“,×(”L²œ±µ±´úœ@ê‚)ê²^Q—½ØãN­zh[%YŽ„ *½žI’m:6ȵ•96È+“šqlèÐH–óÁÎŽ *æn<ôy8PA³ÓŽ vˆdg*zÙdžʇìµcc„âµ?ƒ#•½ºÛņÔÝ.6”@ŽLPWº§Å¸KR;4t0ñºÒåö96(«sRwºØT%ãØPU»fµcƒR•Õ.6.Y{8LYeu§‹}¯Ü~¾Kc{Å;ƒc¨qº%hæk´æ²‘ވ˰é–G\fé¹Ü;³—e9Šg0úœf½æ2;û™Ë,¡1—í{šËý.w&_z©Ø1 -Åî% Mph“+h“›hûž´ ¯ Odz²†ÇïbÞK@›šóÞ,æÑ{ݨzú=´}U=¢†cym¬óéð˜Ô‘Ú¼Fȶ¯id!ÛÚXù£ðˆÝÆYd›b· .-P¿Fì¶6ÖÙÆ¢ …Gì6%”Yd®kv›ðÊ-êõBƒo÷ßûöÔ(ŒFÓãð{3šn/©‘öKj—ÄïÑh:2™ÍhšÈL2!5µ+‰LíJz1S»ròˆÚ؉ª¢BÔ6áij›àˆÚµQ'AÔ6ÁµKä÷4µK ¨µÍ¨•¨]â Öò)2É„L£i¯ÑáƒAt¸yfÕÌ’62;‡×h.< 1cg ›AôK4óìÐ ¢Iy5ƒh_›Ñ´“òhÚ¿F3ªMS̰ú>š¦Ù ¦ýk"›˜Ñô}Íyǃh˜ó š£Í’6UoÌ Úß@\¾´Á°øÃ;Û§kM8¬;ÕÁg¡ïp éøI¥mK8­\g}΄ѣ7ˆsð%°:¨Ÿ¼!”à™éæÑvuP§x{þ¥ê öÜaï/®8‰{ÕÁ„³ãå!âv„7ªƒ }eî.É›âàŒ M[jP L¡ÒS®Z¹—„F"¢ß (°DÄQØ/ÊÃEá â ®ao )y ù %?¢À%ÏQØ_ò£9sÉS~ q0âê…Ç4k—8Èal9]â …²C” 긌Nz´ì?Cdqð/_èÍ~ž8ˆË¼ÎóIÇM°ŠE¼Ñ4Ç!¬çÁLR'u›-Aû5ZÒ5¹Dí×hŽmùº¦Ö¤'X%j¿F8 ÖY6¸ð]¿–H$$‡AÃÀ­c]PÞjêT2©„´ÑnœytͳžሪtÍ3½W´_cF¡äë&´yª¢ý&˜êÏ#˜ *J5î âÚ®kalpÜ´«²4rg¶µëø%ûÍnüú½ž¯{a`Óñ쇃C¥¹ÂƒpÔTó‚ü楧£ÂƒpÖTóž7ž屨Þx:t\àB8kêðïÔ@*“ ÏÁQA9‹j ÿùë*<WåOFrpŒOEòkD]Ÿ1ÙP~ &‘_ÃØÈ¯ÁQÏäàà÷²vpp¾frpðk…#”Bòy¡A—õ PŽ@¹QI%ä*QÉÁÁáµ[5Þ¥.§+—n\.ÇÍÑ.Â1„CÝ #‰ƒ‘x¥qŒí@ÇF¨Ô8†ÞH&…c|RƒKãxî($ ãçš-DáN{y‰Âˆ‚Îb4A²)ã=ýÍbDAmS8¶¯U£ ’Máïé¨MPç”Æ±QtǬèŽñžä2ŽY|™86EÉ8æŽM@ŒcŽ áØ¾ç&¨ÃÓ8¶ï£ j(.ÏÇ86±Ó8^?F¤Ÿ+¯Ÿ'#üéŸ'#üi&%#üé4d#üifþè5#üiB#üé÷ŠþtpÅÕ:¼úZøc›ðÚMÚÞ(ü1‘ƒ#ÇYjªn=ÍJ4hnψ¬™ÇDN4f2D¾8~ÄÌvKdMd³]üFdsMd¼GcfCd¢µ"2ÞÓ™nˆüj€l;Kdè*"Û÷,‘u4‘mÒ-‘uVk"Û¸|—È&Ñ7"ëqœ&²é†~6‘oïýL"Û4|Lds fj4ßÞ3‹3ˆµŠÑ6æ–ÑbšÑö½ŸÉè•⟋æõóÏ™º†È4xVD^Áü ÿ’ý¸toÌt@Ž?§ÛöàÓðw¸þŽ„ü¤S·# î²UéÑD8\ùÉ* ~òŽèÏ\7¶«€:Å+ôä+*`®Y‚ >{œ¼UÅ.>ê!FpüÅ*`n!áfiéAâßèÁ‹öÜÏ·ˆ´:EùB 4r…ü9hDå ŽÂ~9hT–ƒ( oUë˜SíWGˆ¦ì)èw0;âÀ…Ïqxà ³£IséS~ 1PáæŠ–sk}—hÙ$šPvéÌAqYw|,V-þë_öíÄ¢åx.Ĥä´ã ¢%uAKmt KKU뀤Ê4À\:`¥3&Ñî•ØVµH“ÇV®ËYj¡uç­$­ÒaB­T­fJBuZ$)³á™ö¼ÊdL뀤0´æ´HþøÖ’ÉÒp…Ì¥·ð7»S‚ I ­'-:~­jA\.½=KE³»¤AòwtWµ ÈßôŽÏCRñ„ü®Aó^½Ÿ‡ÔqcÌ%["Ap<ªZ$wQŽw ê°!¾\: IÇ%1—Èï%Ç‚ Ž^JJäOâž½àYUþžÉ3×sÒ‚à(%«:«’墨‚êØ•¤AZŽÞKÕ‚ Ûp5Œ® dKZOªÖù×ì_õÒØ¯xÖ©j•Aý^w,êK!APH¯$žn þü.!Ðð8ÒH™xšå±ÄN§’x /µ†‹æ1v%¾â1¶¼hæjcÕ)ÿ3­¡ 7GÛ[ˆÇp˜«ÝÔÄcãL'Ã;N¯Úÿœ([4á''V'í&MxlüëšÇmÜVF†W]§JcØ(´„aصI»Iã& ÃF¨uÚíÌMAcx꓾ëo ]xÁuëÓÐmŽöj}­MÓ×Dß›­j'3]Ó6|E_£k}“Ÿèk´¢ïÍVµè-}&@ô½Ù*;™uç ékôMß›IaØÚ4†o6…a£6†­McØH%„áÛ{ Ãæ½5‹#b£h¬Æ§É‘d‡›5c™Íå6¶›©Â6ÓD–Ùܘ¿†Íš‡ÌæÂc^ÃæWceØtx–Íú=fsâñ0±9Ý‘œî$¦0’îŒd’a ’ydÇHŽÏH¬È#±na†ÄÔR$ÓCf3wÌf:2›ÙÆlw$‡g$Ö̲$&“X§Þ’XÛ,‰5',‰ÉÆ$¦ b$!ÙØÉÜÑ0’M#™;!F2ÛÉÆFHæÎ‹‘ì_ŽŒmçÅHæðÉþ"±ÿ’Ïì¼mÐåS‹C[Zà˜Í ñƒ­7>Áç_>åÀ+[B’OY=Ÿ½AkÎk¼C=#¡Ÿ½%ᦠÚgÛeANõ ~öiPú‚ð(½úÐvß>X±QXy %|æ¤Ò_, VœÙÕæ…Ú{Ò &ÈE«-éÔ~‚Pšò8Ä2çŠn Ò^BHa¿E$Ê7…ÐDâ*Q¾i„‰wˆ„º®ù>îÂ×9o¬c”ij…ýŽ0‡º\8o¨ÓÍ5€"ñk…¥È„¶ö$ýoÚv%¡ d“PhBÙ%r0;„Š >b¨2>éß “– ÿºïÒ?çRÍØM›Ôz@p òLnr”>]F3zÖ Fí‘´¨v#F­òQoÞÉ|ªŒyѸ”)Ígi†»+C€¤ ªV m‹ñ®)¹°% ÿ–iVn9æ…/®i°ÒrGïº{y’Ó8 i6RÄIÝëâàiÃùêïÖ ùÎ1j±°Òrp1V\ÐÜÇÍ÷7£Wra%‹Ø´^XùbÉøq R‘$ £¾{ÑÃÿ03{|v>ÓJa% ¨¢¤ÂÊ÷äxxŽïIJzLãT¤qãýÝ8Ôš8+á‡8*ì-CNÑZFJtÒÊaÍXÖÒa¥coÅ(Ÿ=+l]/ÔqJÒ¬°ökE‹†¶dŠR ×Çà}8ë© ½j¹Ð†Tµ^¸¾&U3_õÔäFÙW=5ù×´dhãÝ´fhß„óA×SªQ]©†ö«]ë‡Æà7¿ê)§$H»‰ªžR NKˆö³^kˆ7£|VWX]ANÑV#`®øÚ(æ³wn+ŸùÛtŸ©á¶ïÆhü×J˜cnÃMpÔ§õ,À1Æ"r6öaëü&€£“a£ñb“–Лú MruÕ!’ÃYMÍÇO6ÕeÇ®lnÆšäF»$’ÃM­Çÿ6áP“F³Drãýf¤÷7#Ý8»éÆÛÍH‡‘2(þnzª‘nœáŒt#€2ÒË›‘nïÄ$¤í”Ù>]ãŒôóû½ ˆéöÖMBúÝøÚõÍl7B*³ýn|íýf¶ßÞÔl7þobûÝf<àDKÍö»Q±Ýø¹™í7£7^p6*¶ßŠíÆù}êÖ_š~Zš¼áýNõÈÈT¤¹ZªW~“©ùM¦ºç΂¨îÝ3˜óHÝÀœBb˜›éü£9“ž`N+Mî0ç‘·9±ÅœØb`Î#os³˜ó(ÛÀœâcaΤ'˜Ç§ §HZ†3à73œŒÿ‹á_g85†ëjwc8ÿç`ø¿ÿáÿ*”˜ endstream endobj 3 0 obj 25253 endobj 4 0 obj [2 0 R] endobj 5 0 obj << /Resources 6 0 R /Type /Page /MediaBox [0 0 1080 792] /CropBox [0 0 1080 792] /BleedBox [0 0 1080 792] /TrimBox [0 0 1080 792] /Parent 7 0 R /Contents 4 0 R >> endobj 8 0 obj << /Type /Font /Subtype /Type1 /BaseFont /Helvetica /Encoding /WinAnsiEncoding >> endobj 7 0 obj << /Type /Pages /Count 1 /Kids [5 0 R ] >> endobj 9 0 obj << /Type /Catalog /Pages 7 0 R /Lang (x-unknown) >> endobj 6 0 obj << /Font << /F1 8 0 R >> /ProcSet [/PDF /ImageB /ImageC /Text] >> endobj xref 0 10 0000000000 65535 f 0000000015 00000 n 0000000145 00000 n 0000025472 00000 n 0000025493 00000 n 0000025516 00000 n 0000025946 00000 n 0000025815 00000 n 0000025710 00000 n 0000025873 00000 n trailer << /Root 9 0 R /Info 1 0 R /ID [<7D94E7FB17F45525319A3A9975F5FED3> <7D94E7FB17F45525319A3A9975F5FED3>] /Size 10 >> startxref 26027 %%EOF blis-0.6.1/docs/graphs/large/l3_perf_tx2_nt1.png000066400000000000000000002137131360743507500214150ustar00rootroot00000000000000‰PNG  IHDR¬öEYa )iCCPiccxÚ•‘gP”‡†Ï÷}Û m—¥ÃÒ›T) HYz•^E–ÞY–"bCÄDiŠ AF¥H¬ˆb!((`A³HPb0Ѝ Ü¹3qîüÈóë™wÞ9çÌŠ*’*àû¹Ø³CBÃØð ‘¼Ìt®'ü#Æx ÿJtL&V Ÿ—Î ¹ •#H Ç€•”.@Γ€ÜfÜ_>̨¿|˜ü?@¢Å}ãQßø÷¨pù‚„ؘ\¶Z¬ '’ÃÎôs±g»98°}øi± É1ßü¯Êÿ€ &Wà–¾…Ÿ/`ÿßPcC##øûï|„5ø¿ÿ€oziœEìÀßYT5@÷é'gjÇD ºîñ²øÙe8‡ÏÁá+ñÍøNü ü(~ÿ@ °šs‚+!”HØJ(%%t®† S„E"‘(CÔ%Z½‰‘D±ˆXMK)Hq¥b¤öIµKH-IËIÛIÇHKwHJ–aË8É$É”é–y&‹“Õ‘õ•Í‘=&{Cv^Ž)g%Ç“+–;+÷D•ב÷“ß*B~P~QAQÁE!]¡ZáºÂ¼"KÑN1Q±Bñ²âœCÉF)A©BéŠÒ+¶$›ËNfW±ûÙ ÊòÊ®ÊYÊ ÊCÊË*š****ÏT)ªÕXÕ Õ>Õ5%5/µ|µ6µ'êduŽz¼úõõ% M`½Ý³šÒšnšyšmšZt-[­ ­F­‡ÚmŽv’öQíû:¨Ž©N¼N­Î=]T×L7A÷¨îðü‹5©k׌ëÑô¸zÙzmz“ú,}Oýýný7ja  ¾š&6>5’0r7*0ê5úÓXǘg\küp-}­óÚk{Ö¾5Ñ5‰19fòÈ”aêeº×´Ïô‹™¹߬ÝlÎ\Í<¼Î|œÃäøpJ9·,ðö;,.Z|²4³XžµüÃJÏ*ɪÕjv溘uM리U¬#­¬…6l››ã6B[eÛHÛFÛvªvÑvÍv3\mn"÷4÷½¡=ß¾Ó~ÉÁÒa›ÃUGÌÑűØqÈIÂ)ЩÆé¹³Šsœs›ó‚‹©ËV—«®xW׃®ãn n<··ws÷mîý4ž:ž|Ï^/ÔËÝë×Äzõõ©ë»½ÁÛÍû÷3MŸ ŸŸ} ¾>¾µ¾/ýŒüòýüþ›ý[ý?Ø”< Ô Ì ì  j Z v .†„l ¹*šÚF k[Üà´áð†épÓð¢ð±šs7ÞÞ$»)yӥ͢›#7Ÿ‹ÀGG´F¬DzG6F.F¹EÕE-ðxGx¯£í¢+¢çb¬cÊcfb­cËcgã¬ãÅÍÅÛÆWÆÏ'8$Ô$¼MtM¬O\JòN:™´šœÜ‘BJ‰H¹*‘š”ÚŸ¦˜–›6œ®›^”.̰Ì8œ±À÷à7g"™3{LAº`0K+kOÖd¶MvmöÇœ œs¹â¹©¹ƒ[t¶ìÛ2“çœ÷ÃVÜVÞÖ¾|åü]ù“Û¸Û¶#Û£¶÷íPÝQ¸cz§ËÎS»(»’výR`XP^ð~wðîÞB…Â…S{\ö´‰ñ‹Æ÷Zí­ÿ÷]ÂwCûÖî«Þ÷µ8ºøN‰aIeÉJ)¯ôÎ÷FßW}¿º?vÿP™YÙ±„©ÆÚuÈëPW»¢¸âýá͇oWšTÖ¡É:"¬ò¬ê©V«>P½R_3Zk_ÛQ'_·¯néhôÑ‘cvÇÚëêKê?O8þ¨Á¥¡«Q£±òáDö‰—MAM?p~hi–m.iþr2õ¤ð”ß©þó––VùÖ²6´-«mîtøéû?:þØÓ®×ÞÐÁê(9g²Î¼ú)â§±³gûÎqεŸW?_×Éè,îBº¶t-tÇw {B{†/¸_èëµêíüYÿç“•/Ö^’¼Tv™r¹ðòꕼ+‹WÓ¯Î_‹»6Õ·¹ïéõëû}û‡nxܸuÓùæõîÀ•[Ö·.Þ¶¼}áçN÷]³»]ƒ¦ƒ¿˜þÒ9d6ÔuÏü^Ï}‹û½Ãë†/ØŽ\{àøàæC·‡wG׎=>Š~4û8ùñÛ'ÙO–ŸîœÀO?{Vù\þyã¯Ú¿vÍ„—&'_ø¿x:Å›zý[æo+Ó…/é/+g”fZfg/Î9ÏݵáÕôëô×ËóE¿‹ÿ^÷FëÍù?ìþ\Y˜~Ë»úgé;™w'ß›¼ï[ôY|þ!åÃòRñG™§>q> |þ<³œ³B\©ú¢ý¥÷«Ç׉ՔÕÕÿB,¾;E^ cHRMz&€„ú€èu0ê`:˜pœºQ<bKGDÿÿÿ ½§“ oFFss0ˆÚÿR pHYsNNÆÊ/¥tIMEã$:.$¶­ vpAg’Zó!%Õ€IDATxÚìý{|#w}/þ¿66›‹’Ù¤¹iCÂ8´!ì˜÷”všÀׇ1œ²4î!HOB/HœíáRº‰þZhL·X…RNñÐm·6áR"ã!)—Èx€dµ¹ìdµÙÍ.K¢ßãÏh$Ë’lKšÑèõ|<üصF—ù­>ïÏuSµZ­‚ˆˆˆˆˆˆ(`NðûˆˆˆˆˆˆˆšaÂJDDDDDDÄ„•ˆˆˆˆˆˆ‰ +Ñ€³m†aø}DDD]Ç„•ˆˆhÀ™¦‰D"á÷iÐËçó°,ËïÓ ê9Æz1a%"""¢ Óu•x Œõþ:ñýïÿûý> r†sÝtÓMøâ¿Ó4qùå—cË–-îñ|>ï~8$I‚®ëPÅ=nòù< À$I¸ð Ýc™LŠ¢¸ÏoÛ6.¿ür÷9MÓÄË_þò5ß—h-ZÅøjÇòùðßDÕ*f¿øÅ/âèѣزe dY^§ù|že6 ŒÄº,Ë-Ë~ÆöÚ°‡5lÛÆÈȈ;ÿH×õº¡]™L¹\Î=6>>]×Ýã¹\ãããîsŽŽ"ŸÏ¯8nÛ6lÛÆøø8‰LÓtg2™5ß—¨S­b¼Ý±Æxóþ¾Öxe|S¿õ2öS©TÓV~Û¶‘H$`Ûv]Ã&ÑFµ«¯4jŒS–Ù4(6ëíÏØ^£*ù®P(T½ŠTUU­.--U—––ªªKKKîqY–«ªªV«Õª{|aaÁ=>33S•$Éý@uffÆý]Q”j2™tO§Óîó­å¾Djãk‰ÿ………*€êªÕêÚã•ñMýÖËØO§Ó+^çÀ+⚨[ZÅlµZ­ªªZ- îñÆ8e™Mƒb£±ÞîñŒíµakˆ!_©TÊÒ[( Ë2t]‡ªªuÃÂ4Msÿo†;ìÀ0ŒºßE+÷5@’$H’Ôö|:¹/Q'ZÅx»cŠ¢¸# t]G2™¬‹ÉµÆ+ã›ú©—±¯ªêŠ×-ôÞï ¢ni³«iŒS–Ù46ë<ž±Ý9&¬ Ë2 …`||›6mB*•à IhE ;Èåru?Í*2D~iã­ŽN·ÒÎØ¦AÒïØWétºîyˆº¥]Ì…ÅFcŸ•îbÂb!¥™™8p Ðuù|’$µLZEq[mÄÏÜÜÒé4ç.Q`´ŠñVǧÒnš&r¹lÛfÏ ”~ÇþÌÌ Òé4pþu]»˜% ‹Æ:?+ÝÅ„5ÄBJ"1•eÙ *,bÒ¶mÛuÁ®ªêŠÛÄÄo¢ hã­Ž‰ß5MC.—C2™ôûRˆÖÄØ—$ Ùl¹\ŽÛ.PWµ‹Y ýÈ0¢A°ÑXïäñÔ9&¬ æ%ŒŒ ‘H`ddÄ­¨È²Œ™™$ $ ŒŽŽÖ “L&ƒÑÑQw…๹9¿/‹ÈÕ*Æ[Dà {WiÐøûš¦AÓ46^RWµ‹YI’ÉdØ‹Do£±ÞIùNÛT­V«~Ÿ9D/ª$Iîp^±x’,˰, ²,»CɼI©w‘%1L˜(hšÅx'Çòù‘ƒC‚Î;$xÓ¦Mu‡’…išØºu+dYfÌÓPaì9ØÃJDDDDDDÄV"""""" ¤“ü>nùò—¿Œú§¹çžë÷©ôÅáÇqøða^ïž/‰`zzÚïKëØÄÄN=õT¿O£o|ðA\tÑE~ŸÆÀ^ïƒ>ˆ/|á ~_VÇX†‡[/Êðg?ûÙøà?è÷¥u„åw¸õ¢üþð‡?ŒX,æ÷¥u„åw¸¡üMÂzöÙgãꫯÆÔԔߧÒÅbóóó¼Þ >ß 9õÔS±gÏ¿O£o&''y½|¾AÂ2<܆½ gùn½(¿%YX~‡]ÊïÐ$¬ÃfÛ¶m~Ÿ¯—zÊ»ßð0¶ëvÃV¦ Ûõ»a+φíz‡Ý°•gA¸^&¬*"ú}¼^ê™±±1¿O×K=3leÚ°]ï°¶òlØ®wØ [y„ëå¢KDDDDDDHLX‰ˆˆˆˆˆ(˜°Q q+u…išPeÍ3 £åb¦i¶íºÛTU…eY0M¦i²,¨ª MÓ IÒªÏeÛ6LÓ,..âÈ‘#~¿mä#Ó4!IdYnzܲ,X–åþ.Ëòª÷]a«>Ö¶m†áÆdãç@¼¾øl©ªÚô³"^£Ñ/~ñ Ÿßa"""¢cÂ@¦iB–åU“/QÉ ¢¢(-µÕžh^‘Öu†aÔUÖE¦iP–e!ŸÏC×u÷±"©L&“e¦iÖU¤“Y‘hʲŒ\.ç>^Q÷5t]‡ªªu×fš&r¹\Ý}UU…a­»–Õ’hEQ°ÿþ5'Ô¹\–e¹É¢,Ën ›¦‰|>¿"QL§Óÿ½LÓ„®ën|5Ƹx ïmÞûÎçCœ—`$I‚¦in")γٹ‰†q]â6I’ IlÛ†ªªPeÅçE 1âšÆÇÇWÄôjI7V""" &¬}f Ãp;¯òm·¡ô®waÇÏŽož>þ÷É'ãìK/…¢(¸2Á#¿ü%núô§ÝJ°¨àf2$“I$“IضíVÖú^Ñ#išæŠŠ´—HNÅc-Ë‚aÈårîýÓé4ê’I‘ȆEQ033Y’`Ùv]"ß,Éï$I0 g ÎY°,çߪª"ëí}RÀ{mŠx^¯X,âÀÌŒßá:º®#•J¹1äíõÎçóÈårîí¢ÇÛ²,7¶D‚¦išûœù|‰Dªªº!¢Q§±Q¶mȲ\×ÙØø"ËòŠØÄgjf•Ø1.ËòŠÏ°xìz¶7ð^o3âgÖ³ƒ¶+ owôKc#V#1òàÖ[oÅ9çœã÷eÐk72L×uXÑpî}¼¸ˆWoã«®ën\{›}GˆÑdâ‰ê1aí‡|Ç?óüç±c(=ø ~ç/À—^ùJ|ó¥/­ë¡Ü>7‡üÁ ò·‹×çóx}>cÑ(žùÔ§PÞ¼_G¡P¨U’MÇñãÇq×G?Š¿ç=8ó™g 8é×~ GŽÁÌÑ£øÖ~ä¢ïݹÏO&kç&I@Ã[$“N‚¸L–e$- IUuŽ ¦YûQÈé4²Ùlíx&˜&äåÞ+—aªê¼öÜ\ÝK«¶ UˆŠ¼e¹\ÓÇÀ4×’$ç¼-˹&ñ»su +ÜÕU~GF ‰ž|Ñû-*‘Í¾àŸøéO‘ÎfaYpÂÁƒøîÔ{ñ/öïXxnlÝ„hôùxûÛ‹¸çŽ‘Ï›H¾´ŒìuM* ¶íþýLå$’Ðõò\*I¸´ñ$XZÂÉ^ax…a8·-Wâ#–…÷5©$#Ÿ¯%t’T«XÛöÊʯ÷µDrjYµdP’€tºi¯'V©Ø»e –ÈJRóç\íÜDO,mˆw¸·ø²ööT>òÏÿŒÌ³ž{¹×\’$¼óØ1}ôÀ©WA;~*¤ÿñ§ü>>~Ñ?c—`×o|Õ™ÈçÐIgÛV‘ïeȽë ðé4¯=öûaa9õÌd(¿òÖ'däèE(¼ú¯LÆYo¨fÎ}Œ$ÊÙ?ƒyý?À|dìÃÏÎ: Ù7ß X2…ßFþð[±ð¹ŸA¾öù0 ÏGb×$ì_἟ õŒ"ä—ž¼©@¿ïihªä¿¾º^û(¡\†ôËGœ“=ù²w˰m •r>:†.ÇÞ ydr÷ò‰?ÇÌköŠ‚Dö·ãÇQXþe2ËyðÓC:6óï¶Â>ºê%÷£ðÎg<ó‰Ka…ìóAáh÷¡$Io¥ Fbh¼þ.*áÜJ{ôØ1œyæ™xhË+L‘(†Ÿ~õ«°s9ŒÛ6’É$LÓÄñGÅwßô&lÝ´Éù^j,ÓÅ9Іï£GâS²ìžãÂÂ$Ór9+•pëïýžßo9­ƒhøh–Œæóyäóy(ŠâÆ£7Ù£°Òé4ç{)•J¹É¡ˆc]×ÝÆE‘$ŠÏ†¢(˜››«kHiüÌ4Žf£×R©LÓD2™D¡Pèx:–¸†NP9B†¨Ö5², ©T ³?ü!¤|Ú±cΗlcï£m»=„¯—e¼¾És©ÌÉë´wHUWÞ·ÙóËrûD²Më¨7kMšôÞšúJô†ˆáÞù÷޲…,ªa‡·Þêþí H½ù h“§@}Ù)0MÀN’ 4F™÷㪺­Ö>"Ç…"DÜû¸¡šÀºó ìeçEË7: ãŸÎ: "jD§cîÇQ(/zsL¬ùÇ€g’„윲|~Ïw_KÔUÌȵºµ” I8?BýG"ºü#\`'$PMÏs-·ïØ6NK óìpÞ‡†š¦‰÷âĆ׹tùÈ6|l"‘Çz(ä1·ß[¹nœG-’ÊÆ¹Ûº®cddétÚŠŸL&1#>˜Ë³mÛZ?>lÞ =êü¨ª¸†QûY~Œªª°î>ü0þàÛŸÆ×æçñšÃ‡qñÏ+Èã7aŸrû1HZmp€®ÖígBÛÿw¹ãì7#ÿл`?y2”çV ÿ1¬“öS—@Qœòò€Ü_iûvüæ_݇Gççýþ“ %«qÔ”‡H¸½Ÿbš†·‘DS™DºªªuɤwêˆèumL&³Ù¬;up¦Xx§}´›r ®qvµãbêõÖ5sð¾tň^wðÎw®~gQ3%bx“;ä<“t[väb¤>q ìM[±°ÓÜÇäóÎÏÜíg¹ ÙZÛ4Ö²î•|õEMoÏ~ü¬ºßk»“xŽ©çh^!iÖÞÓ ÍÚ_Ö±˜öºCÁ#z@½CjÏÿáñ“_üÿñØc$ ~:ÞwÊ)¸òé§`^ \.]×ñ±ÓOÇ™W]…?þýßÇû÷׆!XV-=Ag€š*Ëxê‚ PþЇð†3ÎÀyçär°Î{ ògü)p‰“ƒJ’]—`>÷k˜ùØ60úÂãPÿó~X‡Ïƒ…ȬqoC z÷™P”ÝÈî~;^“Ë!÷øõÈã5Ð^|BmIÏhygI‚Ka½y² (6¶ÄH÷­0ŒK Ëµå €å=Ï;šv ~ò¿ÿšÃ)‘H¸š¦Õ5Œd2¨ªê&‹¢7T ñI¡H8EJÌý¬›ò´¬]")ˆdr½;ô”hEmXK€3ÕIŒ6ëtØ6^}÷Ý~Ÿ5Q øš°‹ElÛ¶ Ñh´éñR©„H$²êñ~½ª’$áîO|‘ë¯þïÿõõœ(ø‚ßãããîÂDÜVkïð&ëÿü dÛFòî4 IHHÄ+°ÒNEÕkf†ÉÔ0 bŒûζ±ø€or î¿ä˜¦‰ ~ô#üïÇq褓`¼ãjC/ýÙÏpÉ©§O? ¨*ŽÝr Þ}ÂâËOlEömïÃw¯3Ü-˜^þòoáÖ‡FvóÇ/|ùÊ[`œw'ÌÅӜƑƒ?ƒ<:LÝùlÚ¶“ÏÚ²Y` €žS€G€äêG@XÞíŒt$`é“a17l¦6+C†•žþ­V?olÌ’åú6]ï,YnÞ€$I+GjôʰÆv&“qÍk”J¥ Ë2fffê¶åß#Ùl¶.¹\m+.¡=–M“U±®…8fÛN JÃ4*—i:1ª@ W-ÍT¬¥ašÍ¿¥éZ°mç¹ £6ÌGUñÀ½÷võ}ifXcœ“/ ëÞ½{1==J¥ˆÇãÈf³ˆDœ/Èr¹Œt:R©À)ôÄDö^ºó%/Á–{ïŇ®º Ož|2N?~ÛÀ7Î;étÚ)xGGšÀ·‘¡áÔø‹Ld—GUQù­y$“¯$‰¹’oÁÂÂ[꫼*è¯,rdÔð jŒûIÌÿÞùÑâñ«®Âï<ˆ3¾÷=l~éKqä±§‘‰~æÃQ$åÚg'•ÌûUgà‚m#sÃAè'ÿ¨¯>®ü—oÇóï)á­o} &&Þ°( Ï·¸woÏÞÆaŽq\'ôû+• ¦§§155Ó4qÇw¸· »wïF4u—J¥ºã=aY8ûî»qŻޅ{ðA²YüÛ“OâÿÑ¡P(ÔZ gf8Ç’VØøð•Ï|ýÑ /¯ªª*òWýÌcÏw+­šÖü;WUë×ãj¶^ ‡ Çx¯™¦‰T*…T*…G>ö1`d€SÁÇùçÇð¯­à_þŽ}ñ^ü¹²€šÄhåkP®bnΩ‹ÏªzFÙKpÉ%(|õÌÌ8Ií¾}›qå•W╯|çóéýúÑ´ú…ГI§þÛéP{nE]/ì±-V±m¶HVgff°°”###Áøø8$IZÓ¶Z®&¯ÀI$Å1à §C ‘p~òyNn¸œ+;÷Cßçæjõ1ñ¡h¶hd2é|©‰Ÿ¥%çßlÖIB›µ¬x?LÙlm1HEq~_Cbêý®Ìçk§iÎïÞŸTʹü_üâÙÝù£7{ŒSxõ½‡µT*¡R©`ll ‰DÇÝ–œr¹Œb±ˆ[n¹Å=>66†½{÷bjjªgçõÙY|OUñü÷¿¸æ§ÄX^©·NÇ?†3¥ÁK¬málñ±¶Î]Q9j6u‚º/¨ñmš&®~ôQl¹öZç[QU¡›2òÇ®CáîÚpŸÕ»±áZŒ^Ô^Z¿ Æx¯Ø¶Þx#6ßvŽnÙ‚®¼¿öÀ¸ó–/á/¤Ûjá²Ëž…B¡€\NÂë^çY€},ÓÜ…OÔÊ_o¿±c§Ù牟±þ {lg2™ºEŒÄ¢Dbå\oBšÍf›îß”XdR oœ/ ]¯õf.²·WßþàÇ }ãó0žóèÌÂD–ï¾\õòæ»v2ñq`.½üyòn]·JG‚3òÀù¿81ºW|ß-ïZæþžH8ÿW”úÑÃbζm×êiâó™H`U"¿m¼ÚÐwEvíz '1ö§ðê{ÂÇW´îíÛ·gœq†ûˆÅbîñX,†r¹ÜÓóúàw¾SÛ_QQ€6ô|ºîr [š®Ê¶k-ožÅëXœÂz®Vè6öt‰ïˆBÁy]1zF–ko?Gð › Æw>Ÿ‡òÎwº 0Vz™ù1î>g] Þé@4\‚ã]! â™wÇ?ùILžtŽýëxæÁ“!+Á´$¤?þ[øß¸ÿß Tê]¤•Ée/W§Þ slçóyX–…Âz;JVgñ®'¶Ͼ²aÀ>ºúã¯G^Ê6° æJ9u…É$RFe+~'0Ó$yó~¦ÄÖÛ‰Dmd‚wzª¨ãN"(¦•Šï*±»H\ÅG"¡÷ËfëGkZmN¸8Y®ÿlwò¶6~g¶šÞ+aŽq›Ëel>rdMñuÑ¥ÉÉI÷Ã!ZsZ}(*•Š;ƾ›¼{s­—®;…YÒÓ*(IµŽZ1§HìõhµÝ’ÉZïi«7•‘Œz_OU/Bcq"Á T¿%¾ÅâÞÖóÌOSÈ~bm_–â‹^Ó8•A‰ñõC$ ÀmÛ¸éØ1<±u+Þ;2EQœm9þìÿá/K¯ÀÌU›¡ß$2χ$Ÿû (c~_õȠǶ—ØNf-Éê "Û³m§ÕÜ+›E>˜w€³þ¢s¨ÿŽqV¡®ýÞø4Û-`†aÔêMb×%Q•ëdZi³ùÖ"™m6µÕ{ƒ.L1N>Cî¼-F^žÂ)û÷ã”§žZÓËô}«×u×]‡‰‰ ÀÍ7ß î$ðf:´ê±x·ß~ûº6[ÎårZ©Î»5 ¦G,,8Ç·nu¦gx‡¦x‡YŠ9ýkábQ(8…»Ë‹s ³½{÷⦛nj:§›ºßðàƒbrr{׸ƒ®ëu c†“ëÙ‚¦ÇouÉää${þ:A)Ã×Ã0 ü/|! P(`)™DbtçýíßbaasssH&“Hé*’S›8Ÿ™¥%§\çÿLOO㦛nÂOz¸·MPÊïõÊd2H$îÏÌÌŒ;¸#ù¼3¤+‘¶n…ý?þù#;aeç–Ÿß™Â-ê4¢"~šÍ¥îÖ´$ñÜŠâ|E=J$ŸƒÞ(/Êo1L·W¹ü¦•<Ñ@%攋9æÞÌïâdssµ„Ä[xìÝ»üÎwðÝÇ_Û¹U P(T·oß^­V«Õ»îºËý¿Ðì¶FwÝuWõ¦›nZóküÚת²,¯ë¼p~I:]­ÎÌø}þ[o¼¬G7â»Z­Vo¸á†5¿öÝ_ûZõªK.©.--¹·8P­z~]ÇõT«Éd_Þ:Ú€õÄËzùY†¯×¿îØQ=rÁÕª¢8£¢ ^>Äú/~–ßë•Íf«š¦­ïÁssÎg!™¬öü¤šNW«šV­Ê²ó1áG¤?X~Ó†8àTÖÄÏÜœóVUçÃ,IÕêÂÂÊlj»,W«ÙìÆ*‹-¬'^ú>$xzz¥R {öìqocç`Û¶mœa bï'ïÿ»é‰Ÿþ‡çw¾é¦5?Ö¶‡tz°ZóTµy uGâÛ4M,Œá o{¢ž¦n± ÌúŸ—½KÃ,H1¾^¶mc÷ÏÏÁü[îDòºc¿ûÙÛ•‰‘(\Iwx„!¶ÅÜëuÿUèt'2< *jCcYæ‡CbœP¿˜e­_ïœ-(Š“´´ª®gœ~Ÿô}HðŽ;P,ë†:†á~¢Ñ(âñxݰÃ0hµÛ:ض=¯z~ñ›¿¹®áÀb"ÿ %«@m!ê Ä·išxǛބ‰ÓNCôà”oË»p¬[.çŒcåex%Æ7"ŸÏãeãã.8‰7‰q# Kªj±Ý" AmÛ¶‘J¥:þ+Vq4 äóÎwƒ®e¨o8 p·WbyƒãCIì$†æŽŒ8ÿйˆÍæŠy£ÞMèíB|Y%xll ;wîD<Ǿ}ûpèСºÖž©©)LNN¢X,º“¼¯»îº®ƒmÛH$øwçx^WTNÚ%¡b²ÿz¶$ 1‘_BÝ„øœ=õ¾|õÕØ|þùnÕ¢%‰+» Äøšd2uóhòù< IÎw}.çüxËtïb.42¶—Ù¶ññqhš¶ú’¦é´8ŠŠîòƾš]¿þÅ×i©AŽñÐÛ‰x{‘š­¾ê]ôÆ»ÿÒñe•à]»vabbÂ],×Åbøüç?ï¶5ß‘¬þ¯×¼çÜwŸ;ÞK,ÔöÊj$¶Œ»"Ó¸Ru—Ÿñ 8‹m¨ªŠóu½¶ö?VŠŽÕicüŽñµ8òÑbÓßý6=ŠS>úQ†mÛ&`š’ûyH§ëGضóÃáÀÃgb[0M©T Édrõcù<ËÁLÎÀHÎ8» ä…$ëÃfc<42™Z‚*–£n·Ù¶ûõˆ±pÄ꺖åÔ¿šlbËÄÆ7ÄV¡·É² Y–!Iöïßßù>ÏË|݇µWħqQ×92âv“J’“ ŠeÒ½s—r9'Þ$)\=K #äÜ÷ ènE­ —XVó­xDC9Äü¥¹¹9äS&¬ç¿âOœJ9áÎÆ ™L†aàíòÿðâ ðwýGüëGc¸úW¿ÀsÿìÈò¹u÷÷–‡Ý>OÔm¦iºAÑ8©ªêòðöU¾ % Æh©™íH§ÃU!ò•iÖö&jÿ&“ƒ= s™eYÐu¶mömXžŠ¸(‡TU…¢(MËI’VžP,1??¿¦ó eºÃ0077W7Ó¶¤Ç[‘ "–å$²aLŠlÛùœ¥ÓµáÐâ=ÈdœÏZ«ÖWÓ\¹ ‰hdÑ4çó*žWì1+>Û¢"èm”ÃUÅc¼£$R)çÜÿ™LmÄk>ïü½Äc¼£.¼ Ãù›zÅ ±ßª¢(Pìq¤6'1>^{¿™¬Ò°H-/4P(Ü/RI:ïùë;ñåc¯ÄÉyEËÇsþ*‘˜£ ²,cÁ3å£QmaH€Â›ø@ÔuÉä@Ç1M¦iºI¨è% ¦eY°, š¦A]¾¾ÕÓ~ eÂ*IÍ“7Ó÷Œ_½ç)Ì…»·2¦(µ!ЀSY¯%²\K³Yçv±ðŽ7v—¾o|^1 I$ºÞÇ‹÷ºñ1ÀÊß›iòÚnåîbÑï¿@÷Y–å,eÌÌÔö~_ïDE×ñÍ÷½c›7ã5ÿú¯Èå%w ‡Óˆv þ`fõ‡»åQˆÞÔlÁiÝ}¦y¦ÇúK´Ëó¿Ý¡˜B€?X¦iÂ0 ·'Ô;l×4M¨ª Y–Ý:£Út·êõK(ÖæÓ8ŒZË€¦šæ.ªæÄt=TÕé¹4ÍÚ{Óø¹Þ¥^Íl<¿0öŠ·cY$Irz²¥æàTÒ9Œ†EùÆQzþó‘L§a|ÿ\FýšÙló邪:õ&¬4"am+Ÿ‡lY(ÄDë"ö7µíÚ‚ «XŠR‘ŒŠa»–eAQw¸.à$¤•Ê„ðô°ærnvÕ8ÁW’8ìk5’4£†šmÛPÅݱ€h˜Tî¸ÿuè´åEd«yCV«3Iª[X›(0 Ã@ºÙŠÞJµ$Áú«Ï@ºóV«jM4 ÃÉ€Ú<µvÃõ|Ïç‘ÏçÝEŒÄàÿ°Ý^ mÂêVJ–[I ¬LX»µr*‘oÄdâlÖ‹ÐíkˆͽøŽ¾üåî—5Ëv 1¬¯iETÌßI¥ÓDêÌy¤—΂ú¿ÏšhÀ躓 ú\2Mº®»óL‡æ*Š‚¹¹¹5¯²;èB›°Ë *Y–óŸåñÚBã"=Diyõ¬'R©¡+¼ˆ¼¢ÿùŸøGi:uwö”RXèºÞ|8ŸaÀ¬<:²€çÇô½¾M4˜ú<äR¬ú-†ñÚ¶ Ó4ݹ¥ÌW&¡MXÝEf–+ñû¿õ-(é4ÄxI]—9˜žyK ù+‹ø½ÿú/§ζ1rÅ©XÚ·ÙïS#ê#•ÂæK/Å+$©é–]DƒÌ4Mwuà:²Œ ²Ð–×â0Í•Û>Q±w¥Ó4‘ÏçÝÄ´q(o6› íÞZSÂZ,ë6.‹0 ‘H;vìÞ&Ãb2_6‹oŽ:A ë°^þûd£gÝóðeP½`þË}PÎØàŠ =/Ñ )ßv^ýá#ŸwFG²‡‰ÂÄ0 Ì4ka—e$ÿ¬¶uãžhbú”˜ïÝÇVM±Ÿ©®ëeš¦5ÿT*…‘‘är9X–MÓ°´´„¤H>¨«Úö°–J%Äãq71œ@ݪuÞãA Iµø4 œr  ëP®|)TÆ…„¹i’ØvÒ'_l¾1h[†õŒè]œžU&¬&¢®åö°&NK¼,sl¢v’É®AÈçóÈårÐ4 Éd’=©}Ô¶‡5‰ R©ÔÝV*•‹Å—¤6Êdjÿ¿òÊ+|ʵQ£ÐXøÞ °m/zÑÙ_ûëPdÛïS"ê Û¶±ÿSŸ‚hTU¦pÉd2µ­-–WÓ¥Úðw"j¡‹­÷º®#‘HÀ4M d³Ù–sË©ûÚ&¬±X ¥R ¥R €3Ø0Œ+‹¡ÀÛ¶móûš¦ #o-ÿ×ÄE/x2W–Ì&x g+gytå#;1s÷¿O‰¨/t]Çw I0MgäQXäóy(ŠR«çr0^þ~x–!"Á4ë{©º@×uwدwn*NòGÛ!Á±X ñx“““C±XD¥Rq‡W*ÌÏÏc÷î݈ÇãîªÁ¾³mHgž ÛvZâeY†þ Œ,ãŒBÂ0°ÜÒn;[6q,0 ‘\.‡ÅãÇMC>ÅáÀ¶m#—Ë¡ 6”ÏçUEêý`nŽE=QÃR©®,oÛ6R©LÓ„ªªPU•Ã~¢£U‚³Ù,¦§§Q(‰Dê`ºùæ›1;;‹x<^º†ù¢ëàY ˜ÛP¨Ÿ{ÊoŸÓ494…†ŠišÐd'/×Ü… ÐPxˆ9rnON.‡üÛ‹Ð4'Ö‰€e9‰*à,¬´ÁžOÃ0J¥N§1Ç„!p:JXE’ÚÌØØÆÆÆ‚Ó³*Ø6¤ ·à¡‡rzŸÀ‚žÂ%ùíë }èó0 Ûq¢a ë:Þrâ‰n·*e¤°°mº®cA¬ª´¼pî“ç@t¸ =Ñ«:3Ó•á5bïÔ¹¹9vTÛ9¬íD£Ñà%«`Y®ºßøÆa(ŠÒí¡íD¾“7—!Ip·õ º®ãŠGþË7°l§P½«n#¤$Áxñ_@Q¸¨‘+“qzU7X÷1 ###°m…BÉj€uÔà 8‹*ÍÏÏvìØ±bÑ¥À±,( ð•¯<†³Ïæ‚2–…ü‘HÂIX‰†…išxåWà“÷ì@~öžyûí°ßv#ÌO¯©C‰hMüŠñ5³,g8ðÌLGwÏçó0MKKKý?Wê¹¶=¬Û¶mÃÄÄb±XÝÏÄÄâñøŠÛ;199‰x<Ó4qÇw #íéݽ{7¢Ñ¨{¼T*azzzÍÇá’ä‡~Ä7÷ä#?õ« ÷², ²,CÿêVØç3Y¥Þð#¶'aE>ý¬w€BÔK~Åøš‰ÞÕäóyäóyÌqÒwhµíaF£]ÝoµX,¢R©¸Ï‰D011½{÷¢T*!‰ X,â–[nqaïÞ½k>Û¶¹0õU?ãpe’Ì^©úã‚išxÉe—á4É€ªŽùý6PùÛ¶mãMgŒŽÂøÏS9‰zƯ_³5ô®š¦‰\.‡……nób ŠÅ"æççÝßwìØx<Þñãc±öìÙS·Òð¾}û8 ñoom,ó=QüˆonkCýäWnYÞ~ñÅxîXÀW©§åWl›¦‰ß}ê)Ø/yÌr80õÎÀÔÁ3™ŽzWMÓÄøø8æææ˜¬†\Ç‹.†k¯½“““˜u&''qíµ×v¼'k$©KpËå2¦§§166†h4ÚòCQ©TV=väÈìß¿ßÝ7Vœ3˜•Ëe,..âñÇïús÷*¾àðáÃ(‹l¼¡¶ŠÅ"<Ø“çîgîeYNü¯C0޽¢Oï"U©TÂââ":ÔÕçõ³ü¾gÇè¿|‡“[~·‹©õð«ü^˪۞r5Þd•£)‡¨ƒ¯µüî(a-‹H§Óضmr¹LÓtr¹¶mÛ†;w®)P+• ¦§§qíµ×"c×®]îí«iuqGŽÁC=T×û €AL+ìÛ·¯g «Ðíøœ Ïüü¼Û pž657??ߓʎW¿ÊðLȼäNü¯Ÿÿùÿ¸Ö©Ïïã;IAÔ«„UèWù-†Í/})$ LXÉ-¿{ß@ÿÊïu‘e ›my&«ƒKÔÁ×ß Þ½{÷ª«„©ª UU‘N§±{÷nÜvÛmmŸ¯T*a÷î݈D"¸å–[V =XM4]õØ9çœEQú;Æž’h]ìJÁÚD/âÎ=÷ÜñÍ‚šš™ššêxÔËzô³ O§~v1¾rï—pnå$hc÷ñ¤ ½A½(ÃûY~{ɲÌ=ã @­ünSë5èupÛ¶‘J¥033Ã:ÐZo¼m«Ârã7¶¼ßÄÄÊårG•¤t:펣oüplÛ¶ ê†%”Ëåu}pMÓä`ê»~Å7‘_úã’üÖoÝŽç¼ê>`ûvÎò£ü6M‹‹WÀ¶ý¾zƒ^GÉd2ng ¶ «håi¬"èÛ C3 årªª¢X,ÖýN N<ÇÞ½{ë“H$Öul}¡~êg|s80ù¡¯e¸m#3ñ0,ËÂýÑy(ü¾z ³~×OºŽ¶cvö<¿/Ÿ†€/1Þ ±*pù|–e!ÛfÈ0…OÛ!Á‘H¤«ãèEìäääŠc¢>55…ÉÉIwùíH$‚ë®»Îï÷Ѝ­~Æ7·m"?ô3Æ­/Þ ÓØ ĸ}õž/õ“LKŸ‰¯~5£^ l<—k»<¶eYîö54|Ú&¬±X •J¥íÑ:ã]*»™©©©¶cÜc±>ÿùÏ»¬µl›ãÅ(ê·~Æ7‘úãòO¡°ófŒè·o¢žë{ùm€¢àE¶Í¡îÔ¬£ØvG{®¦R)d³YNõR%¬±X étzÅÞM‚Xm,·œ°½Ko¯{Ÿ(¨ºß6'Ÿ‡$IиŒöÐêh[›]»v¡\.cçÎØ»w¯;»\.cï޽عs'Êår VèeežÂÎ4MVä)Ôò÷ìÀ‡pãœBɾóû¸ð¼ÃŒonù|Ë=WmÛF.—ÃL›X ·Ž¶µ«‰MOOc÷îÝ+ŽÇãqLMMu­wµX™§aÀ§0³9 'ð#HÛ8ŒÂÇøùsqdÓc¸”CiX-¢A‹ºL*•B2™äPà!×Q ԒV±uM©Tr‡ G£QT*‹Å@ÍÇcp .û¿ÂÁ߈àzõ~Ÿ Q÷=ü0NºêqN_¢á•Ë9›m¯B×uضt‹ûÐpè8aÄ7û•J%LNNf¡# ¦°3M“ó9(Ô¬C¿<þQ(ʜߧBÔuæ#Ûð¬cß‚$§¡Ÿ¨oÄ66«ì§jY2™ W&ÎaDLÀ-ófö¯ÎÀÑ2Nö¦­øå/YŽÓp’¤–+sU`òZsë aÂJD4¸lI†mY~ŸQOHÏfTQßµh¨Éåre™£ÈÈê„•(Ì,Vä)ääØfØ6[×)œ Å ö®y™¦‰|>Ï¡ÀT'ÔC‚9̆ŒÃd(ì²YÆ9…‰VJ¥R˜™™aÙOuÚö°–J%LOO·}¢J¥â÷µ¬À`§0c|SØI+ôN¹ððÃÇØ°NÃÇ0œáÀMê0™Lªª®XØ•¨kC‚#‘H ¶´aË%…WÁ¦°3Mà«_}‚z %Û.¸à)8à÷™õY*,-­¸Ù4MèºÎ¡ÀÔTÛ„Uì¿:hØûDaÆU°)ìôëÿ[/? lóûLˆº/k&×4&¬4\LsÕÅ–òù÷Uq80Ñ`S7}ÿ1+ô>¶Ì3ÿiÈÐ$)ÕuÉ*µ´®Ö›o¾“““~ŸûªØjIag†ß§@ÔSæÃQ¬ÐSøX_¼&6®Óp±,g¡¥&e:{W©Ð & ;®¢Ga6~àã\\ŒBÉúÞPÎ/öm–ã4ãüË!Á2Ö#§A}阦é÷©õW“Î$&¬Ô‰P&¬DaÇž' 3ãKÇ Ÿu€z %ë¡-~{;Ëp.MöW[ôqT$µÓvVÁ»ÈR¥RYq›0ˆ{¶ "ö¢îË~: Ã0ØèHCMì½:33ã÷©Ðh›°F£QLMMù}žDäÁá3f² ;VGS)г%Ëqfù|žsW©c &¢à˜››óûˆzFÓÃ(sŽ…GÉÐÐpZgV,¸”Ïç±Ðd^+Q3'¬¥R ³³³˜˜˜@,[µ"ÁE2ˆˆˆˆV'›! ½|H§ënÒuªªrX¢ a|SØ1ƃMìqØéœI˲êöE”$ 333C9ç²—±Í!Á=/¿MsEÂÊÞUZ¶=¬¥R ÑhÑh´îv1 XÇ÷í۷⾫)—˘œœ\Ñâ[.—‘N§Q*• iï.QÐ1¾)ìzã–eù}y˲¬CUUu{Q[½Ú###Ð4 ªªeOw?Êo&¬ä§¾ÔQL³éüU.¶DkÕ6aD"8tèЊÛ÷ìÙ³î-—Ëî"NÍìÞ½Ñh·Ür *• vî܉ééi7¦Àø¦°ëGŒÒàÕæ…öK'Ie'ɪ°°°Ó4¡ë:2™ ,ËB¡PŠ!|ý*¿Ù C~ékŲÏÂJ\l‰Ö«m‹ÅP©TP,ÇW½ŸVàíumußÕ¾<Ëå2ŠÅ"n¹å÷ùÆÆÆ°wï^Vèi 0¾)ìãõüN¬{êdYF6›u×u½ãaxâ着ª› f2äóyÌÌÌøzýЯØf…üÒ·òÛ¶Wlec†ïå% ¦ŽÖX,†ééiÄb±¦ i¥RÁôô4âñ8b±XÛÃØØŠÅ"&''ëŽíÛ·Ï}]ï9p Æ7…]?b|V zl-ó%mÛF>Ÿ_qMÃ’`õ«ü–÷“‚§ouÓ¬KXmÛ†aCÑðEÝ×Ѷ6»víB¹\ÆÎ;1;;‹b±èþLOOã oxÊårWZÏ[}(*•ʪǎ9‚ýû÷·œ@N81¶¸¸ˆÇÜ—×^M«ø€Ã‡£X,2¹¥¶ŠÅ"<èËkw« †á§½b6mÚäþ˜¦‰ôò<²ÆcÞíl@Ó4ȲŒ‘‘w[Ã0ÜÇA©TÂââbÓéJ½Ô­ò{c¨ÿDùÝ.¦z¡«upU<É©®ëÐ4­ï×DÁ"êàk-¿;ÚÖ&‹á–[nÁìì,¦§§WÃÄÄDÇ‹-µÒêqèСU‡9r=ôæçç[]&Ú·oñä“Oöýµ×߀SᙟŸ€®|Ö(¼æçç}©ì,Ãý¦ª*ªÕêšyoŸ™™eY°, ’$®ñ@$¬ýÖ­òûСC{O)8DùÝ.¦z¡—å·auSh8‰:øZu”°Ny×®]˜ššrW°ê0áõj5¤¸U%ýœs΢(¡#EÝ# SQyè§õÆ7œ{oêHc9ÝOÝ(ÃÙå?ïÖ6A366†h4Ú÷2¼[å÷ZÀ¢á#Êo?¦{YçVN¬¿ÞÑ`¯H$‚x<îþt»õgÛ¶mê‡%”Ëeö(Q(0¾)ìºãÜTž‚¨[å7d(¨zUGábK´QkNX{-"cïÞ½îm†a¬˜gC4ˆßvŒq «nÅö mÙDÃ¥«å·§aÆ0 6BÒ†t<$¸Ÿ¦¦¦099‰b±ˆJ¥‚H$‚ë®»ÎïÓ"ê Æ7…ÝFcœ=PT,¿)ìºã†躻è’išm«E´_Öx<Þ´b‹ÅðùÏÞƒÅ8h1¾)ìzã’$±Š|ÕËòÛ4M®–J¾ëiŲÏÖM–eqþ*mHà† Þ¹²DaÃø¦°ÛHŒ'“IÛÛ¶‘J¥°uëVlÚ´ ããã]ïµ6MÓÝöFüd2™ºãÞßY–…D"]×ën×u£££Ø´iFFFÏçý~;WèFùÍø¦ ÛpŒ[–³­ œ²€ñNØ„•ˆˆˆÖ.‘H@’$8pÕjÉd‰D¢«I«mÛ€B¡€B¡€™™èºî¾†mÛ-_/ŸÏÃ¶íº„Õ²,¤R)ÌÍÍ¡Z­baa¹\ŽCĉiË=ªœ¿JÝÈ9¬DDD´vº®C’¤ºýUUE6›E.—ƒªªn2iÛ6’ɤ;vddš¦AUÕPÎeæ\> =ÏüUÃ0ØHCÆ„•ˆˆh#ZÌÕàô44ö0†óÓJ2éöR´|-Uuç‹­F’$·§Ò;G8™LÂX>UUaY–û]סª*dYvïß®µ“ýu]‡¢(0 ’$¹ +,,,À4M躎L&˲P(8¤hPx†s±%ê&¬DDDÑ®ÐÓÛà’åõ=®Ùc<BY–›Îù\m™äy Û¶Ý䵓•šeY®,†·{\>Ÿ‡¢(ÈårœJ­øq.QuÏ5“É ŸÏ³‡†hPx†sþ*u V""¢XϰUY^Ù{Ú…×C½½¨¶m»·‰¹¨‚aeÙí h¦]¯q¦i¶íMÑu²,»C •J¹·7Kx¥f‰;“¢8?p£Â8¬Ÿú +QHH’„B¡€ññq7y4MÉdÉd™L¶m»+ [–…l6 I’0::ŠT*å>&›Íº=®Í†M›6¹¿«ªŠ¹¹¹–ÇżU/UU‘Éd°´´Ã0022âž»mÛuÉ-œ§ÑÊ0 w¸?ÑF0a%"" EQ°´´ä®¬(J]/¥ªªno«·÷CÌÏ!îë½7y¬V««žƒªª-{išæ&±333îð`I’8œhÀq„uV""¢j•ìy·éô1ý²Ú¹ÑààüUê&&¬DDDC‚û!Q?0a¥n:Áï ""¢þ`ï%õƒeYLXiÓ¾ûÝ3qäÈæ5=Ž=¬DDDDDÔ5¦i2a 8ÛvH/EqvT[…‹m¾- Èçk÷ó>NUtÚ¹-“qžCìx–H¬|݃/Àë^wʚΕ +u…eYÉÑ#¦é$”²ì$œËÛYp~_Þκ.iqþ]ZrþÍdêNY®mûíý¿øpnóîP¤ªÎmÞ6 Euo¶È{±¸ˆùùƒkºn&¬DDD!cYR©’ÉdÝ62‰†ænI’N§¡( LÓD&“A:®[Ø0 är9d³Y(вâ9EA6›…išÐuÝÝË•ˆ†{WÛ3M'ÁlìÑô&âÿÙ,LÖz05ÍùX¹¥w³-¾E¢*tRD‹žU¡1aí7&¬DDD!“ÏçaÛ6t]¯KX èÛn&ŸÏc||KKK°mÛM:½ «®ëî9ÏaÛ6ÆÇÇ‘Ëåܤ—ˆ†[ÐV‘,Šm¦ÅIMH¥œÄl-ín–UK>§×Ó¶ë“OËr’¾…çw]wþ=š²\KEÛ¨±S’j‰kØ1a%"" ]×Q(0::Úrx^2™D*•ro–t¶ª|нR¹À  ¦i"Nûz–åüˆäO×kÃgÅPVñ¯(¥–PNB›J­ì±4M'Á™q~I®(½CeWK>âf=£TÄ•ˆˆ(DLÓ„$Ieš¦A×õºÊ£!º€Ç@UU·gVô¶6&±Þç0 ƒÛå‘˲,HͲ´=¥¦éüd³N’hµÞMUuLÏ`“ލêÊá´Í4ÎÓdqØ}LX‰ˆˆ6 Ù*ˆ‚¨ÈˆE.De*Ÿ¯ k$æ'uºâ¢w>à$¡Š¢À0 H’Ô2a5MsE²©ir¹4Ms“ÑÆ„5çYé#™L"™LÖ=/ 'Ó4ë¦ô‚®×æ|ʲSF*ŠSŠ^J&á„•ˆˆhš­‚Ø¨qøW2Ù¾BÕ銋òù<Eq“J˲ê†{E *y“V1,XÌim6Ô·ÐɉÑÐéÅô±ýŠ6+IõCr)ü˜°…„®ëe¹.¡L¥RM‡þ¶¢iR©TÏ{Jˆ(\´µŽ»]…HPÅüSËr†èz'¢áq‚ß'@DDDÝaÆŠ £ªªÈ{w|o8f,1Ùk™˜¿ºÖʧaØ´i“û“h5^šˆÈÃ0œi##ÎI¢XJ&Q*LT‡{X‰ˆˆBb¦É9MÓÜÄÓ»¥ n“eÙíQU¥î¾ÞÛfÏ8ÉïjLjˆš1MgN¿aÔöþL§›¯¬KË=¬DDDDDÔsù¼Óƒ*}ˆù¨KKΜTMc²J+±‡•ˆˆˆˆˆzÂ0œ…“ÄŠæÞç¸ÿ(u‚=¬DDDDDÔU¶íÌIÍd˜”ÒÆ>a-•J(—Ë~ŸQO0¾)ìãfŒo ³Ä·iÖö^Xp¶é"Z¯À .—ËH§Ó(•JœÅ¼• 2Æ7…cœÂŒñMa¶ÑøÖu§Wuf†+ûRw¶‡u÷î݈F£0MwÜqJ¥¦§§ý>-¢®`|SØ1Æ)Ìßf‰oËr¶¤)˜¬R÷2a-—Ë(‹˜˜˜D"ŒÕ-«O4¨ßvŒq 3Æ7…ÙFã[–!Àœ³JÝÈ„uß¾}€X,æÞ‹Å8OÄ£T*aïÞ½~Ÿ¯wß¶ÞŠ0]/c¼½0•iÃv½ŒïöÂTž Ûõ2¾Û Sy6(×È9¬­>•J‘HdÅí?þ8~ðƒàæ›oÆe—]æ÷%ôÜââ"Fý>•»ÞÇ{ wß}7üq_®e=ñ-Îûæ›oÆ9眃sÎ9Ç—sï§»îº ;vìðû4òz±ÿ~ß®…ex{,Ã×ïÀüc<õÔS¾\Ëzâ›åw¸õ¢ü.—˾”,¿Ûcù½~¢¾Öò; k¥RYõØ¡C‡š~XN8á8pwÜqGËLJšC‡óóó~ŸÊÀ]ïþýûñóŸÿgžy¦/ײžø€-[¶àŽ;îÀ\€ /¼Ð—sï§­[·M|wûzMÓÄ3Ï<ãÛµ° oeøúýä'?Áã?Žç=ïy¾\Ëzâ›åw¸õ¢ü~øá‡}IˆX~·Çò{ýD|­åw Vï0„F«}x_ûÚ×ⵯ}­ß§NÔÖzân¾ùf¿O¨#,Ã)ÌÖß,¿iP°ü¦ äÖmÛ¶¨–à×Тnc|SØ1Æ)ÌßfŒo ¢@&¬Ñhñx¼n‚¯aHˆˆ‰ã›ÂŽ1NaÆø¦0c|SmªV«U¿O¢™R©„ÉÉID£Qw’÷ž={VßG4HßvŒq 3Æ7…㛂&° +àLü.•J€x<î÷éuã›ÂŽ1NaÆø¦0c|S:a%"""""¢áÈ9¬Ã¦X,®z¬T*¹-\«oµgV»ãýÖîz¼÷ Ãõ’cµ[|wrMâ>a¹^b¾ÚýÂp½´þøÄ¿õFâ{¯—X~¯v¿À]o•|µoß¾êöíÛWÜ~ï½÷V¯¹æšêöíÛ«Û·o¯^sÍ5Õ{ï½·îqo}ë[Ýãï~÷»W)‹uK†7«T*˜ššD"LLL \.»ÝíÅbîñ±±1 h{Ükmu=B¥RA&“ªªu´ë%Çj1¶øîäšÆwذ gfëï;ï¼sàþÖ‰oÆö`bù=xå7VŸŒaÏž=nyÅb±ˇïÛ·€âÿ±X¬î1bÜx»ãýÖîz„Ý»wcllÌÝ´ºñ¾ƒr½äX-ÆÃß\Àø–á,ÃÃl½ñ}øða÷>Þûùo½‘øfl&–߃W~3a  H$R·„x¹\Æôô4ÆÆÆF[A¥Ri{ðÀµ¸ËðpëE‰DfNËïpëEùýš×¼‘HÄïKëËïp Bùš„õ¿þë¿ ëúÀ|ymÔþýûñÐCñz7ð|‡¨„õŸþéŸê|7êöÛoÇ«_ýj¿Oc`¯÷öÛo¨„•ex¸õ¢ 00ïËïpëEùýÒ—¾t`V–ßá„ò;4 ëÙgŸ EQ055å÷©ôE©TB©TÂØØ˜ß§2×[,1??ï÷e­É¹çž;4ñ-ðzׯT*ù}9kÂ2<܆½ gù~Ý.¿£Ñ¨ß—Ô1–ßá„ò;4 ë°‰ÅbˆÅb~Ÿ¯—zfX¾ø†õz‡Ý°•iÃv½ÃnØÊ³a»Þa7låY®—«Q 1a%"""""¢@bÂJDDDDDDÄ„•ˆˆˆˆˆˆ‰ +V"""""" $&¬DDDDDDH܇•zƶÓTµùqËr~†óU­=‡,;?âwÀù×¶k“e ™¬=P{í|¾þµöï¿‘ÈC~¿EDDDDDÔÖ50M@QœS)ç¶B¤ÕcÛ@&ã$Kª hš“X ƒLÆùW$™ŒóÞe³ÎûhYµÄr5¶]K`EÂê}Œ7ö&¤¹œsq\Qêß÷ÅÅ {¬â÷[DDDDDD-0aí®;IЂ“ü,,8½v‰Dó¤U$·’ä$©ŠâÜ|ܹ-™tn8™L}Wf †,-Õ~Ïfë‹^ÔVd¹þq¿¯¦PXþe‰Uu¸ü‡:㌃˜Ÿ?è÷»ID,wcrô´ô`´BZ0:ê|÷ÌÌø}6홦ó=’N;ßÁ²ì|—Ñpaºssõ¿‹œmtÔùò—$º®ãرëðÍoÆÜDV$eé4 ( …%|þÏ.F©ð\ìÌl±‰ |4ƒ$IH&“[tÁš¦ó²,#Ù"i4MöòxYI’ lô[Þ¶nåVÝÉù|‡ ¹aÔ2{Ûvj%†áüf¦yw´aÔ?ÎKŒöþ¾´ä´6ŒŽ:YÆiåòÆÞ!¤ë:LÓ„eY˜™™ÔðÞÛ¶ ]×a$IÂ#¼ÏyÎYؼù›0—Çq+ŠUU¡6i©°m¦iÂ0 X–…d2Ùô~@}|Xõ~k%*ÈÉdÇ¡î+ËrÚ4m@À˜mù?œ´ê€Ä‹,ÙXˆN 7¿ ©ÔŽ5%­†±²xòùú-ígÛµQ0âkLŒÈñÒ?õfö< àtȲó47ç<îÖ[ÏÅæÍgúý–­›·Ó8Eê¿x¶ß§G(¾'¬ÅbñxÜ—×_šÙl­Ò×,¯³mšÖ¼6¢i6þó?oÁß8‚‹.:މ Ÿüä4^÷ºó IïX–]בÏç¡( F·nEº’Á›:Šo¦€Ý‡á§žŠÛ$ ãããe¹.iµm–eÁ²,·ÂoYFFFÜÄÁ²,˜¦é&²,»Iê_þ2þ£RÁ‰7ÜPw_HŒŒ úðÃøÁe—¹·Àö°ýÀœþÆ7âꫯÆéõWΔN¯þ†ŠÚ¾mCÿ×?DöOôŸ;·7¾¹¶í$Šâüß²jÙýrRÙÔȈsIªŸÌ*ž[Œn”N;5£\°,\ôµ¯á¢;zc~Æw§ ÀišH&“+’P˲Ëå`†›hJ’„ÑÑQÌÍÍAQ˜¦‰|>Ã0 i²Ù,,ËÂï½åEHýPÃ{6ÏâÿÞòK\rÉ™0 º®#—ËpbÛûšŠ¢¸±›Éd ( 4MscÛ4MH’T÷1M©T ªªB–e7ñ5MÓ}>I’êâ[eét²,Ã0œ ±¦­>rb£,«Ö&#†©¯²b¾¶>oYµ÷ÎñN&k‰ÂòÛ Eé_…gb¼rÿýh—?éY‡‘ûï2²ÿ~¥/çÑ8Ï_ÜfÛõóþí}‡¡},y*‰ó“ÈÜþSŒ^õ&ho9ªZ‹³TÊù¿É2:êüëCIrž[4âŠÛ× Ðu@•aþä ©û ‘e@>÷I ÿ@–¡>ëYÀO~R»°'ŸDú¼Û ½àNwG6‰Ìöê< ?½â´ž¾·ÃÛ½fY$IZñ}#ˆI§#¹±,êATUuËFQ¸õÖ'qèÐ!¼ím,?gýš^Í’FUuþŸÏ{Ï»öÞÏH"áùݶ‘P«Í¯û¡-XúÔ·EA&'Á²j¿¬Ï}zá,à±Ç òNztk_þŒq›ªÕjuãO³>år×^{튊äôô4fggën‹ÇãØ³gϪÏU,1??©©©Ž__tf³µy© µã¶mãu¯û>î¿ÿvD"ÿ‚d2‰d2 ˲`Û¶[ùÖ4 étÚ-|mÛF&“qï=v ï9ûl<ï}ïCäU¯ªÕ’Iär@:¹œ¼-,Ëk»¡dUÅy~O úX$‚øÿÀéÇãeO=…+ï»O_y%¶|ë[õjY¨ìÚ…#ßü&~|ÑE¸øâ‹qþùçãä;ïÄáÇñ+®À‰7ÜPŸ(çó8°´„|ãX\\ÄiW\òë_EQÜ„W|q(®¿ÿ~œ}é¥TÖ…/Çø{F°°ó£âäqçÉ'ãøé§¯¼žŒåɪ |3Þ/2‘°È²ì&1>ö±áðáÃH·JÀ7¨›ñ “““mï³¶m#—˹‰®ën‚Ô* "î½D’(*š¦Aótñe2î¼ÙGÿr} øßÁzÞj‘‹†šf#Ä0{M3êî+âë½ïý: o|ãé¸úê‹8i§’Ÿ‡®ë¸ï¾WáÑGÿW\ñAD"÷¡\~=N:éÎ;ïj‰´“ éÌg`¼öÿµ¸»ãoËWæ-?ƒùÓ­P^q§ŽxŸÒ4â_QUµÈÔç? ³|>J¥c8ùäQùÅØzÁpÕU·ãÊ+ÇP©¼Š™wzá—?Ëß|ÛÛpD·ð™ÈžzÑÙÐ&N[Ž%²mâ©?þc8pÇG$Á–-[püøqüêW¿ÂI‡á¤ÉIœúÎw6¼)#€¢ ÿôÛÇ‘¼ïÏÝ,Ö¾ý»°>øÏPvÆj÷oÌ»b›uÙ.g‹‹‹øÆáÃHö¨ zùÝoÞqñ!ض{î9>ø$N?ýtœqÆøÉO~‚sÎy àÒK?°,GŽÄ02òUlÙòî¹gžy濞zê)À–-[/zÑÏðôÓÿ€RéT<úèŸ }—^ú \~ùËñÙÏ^'žØŠóÏ/ã—‡ãä£GñË_þ¿~Î=xã•ÿ[~?ÿùÿ‰'žƒÓNûȲŒò›xâXÔ=çg=ó NûÕ¯€óÏÇ+e¿ñÂ3pÿ™ÿ ßûÞV.¹ä~¼ü«ßÃÏÅ–SOÅI'„O<ÑMO@9öŸµ8uç¹o”ÓâƒkY+[$“É•·‰!’( nºé&ÜxÛm=ý›sùMþZO¼ø’°–Ëe”J%ÌÎ΢T*­ø°¤ÓiD"‘ºa~‘H±XlÕç\ëÅëºS¦4–5ÜdôOÿôïqèÐ^XÖVµÊ=P_­K¼Ä AðTJ §€ò®2´<¹Sä©î$Y Ö´gšõ“@ÚsȲó¯( ;™ê]b×¹ˆŽ+X¢N$ÒÞaâKÌ0œ„áœs^ƒC‡~ÇŽM×Uȼ_p"õ>‡x︕9I’`†û^‹Þfñš­´â5dY®»ŸmÛ°mÛ}½jµŠíÛ·ãÃþpG׿½ˆo {ñw-YϤ`ï߸]C÷oU;0úÂãX81髟s2J1ޝø^÷gÍçk½Žâcàé¼wë´ÞŽx]¯=‡wh¡®×:äZ‹x&ã„¿xÌø8ðž÷ü çþï?¹wÞ‰_ýéŸbËû߸ñÆ2>÷¹­xå+ÿßýî‡pÁ/C¥ò\Üw_gŸ})N>ùI\~ùCضí—8å”§pÊ)O¹qçmðÆ®7NEK2™t{¬Åmb´„àMr¶=Š ¾õ-üÎ÷¿ãÇã}±ŽFÝçýñŒ¯ý뎗f‚P†û*Ÿ‡uÇÏ!ÿ¿ÝÎﺎol>§EOuÿ¶¦é$ Vy3.¾è8ÎùõððÁŸá¡ïž‰?Pt|ò¡oãŠ+¾„o|ã2,,Ñ¿œ€eYøÑOGùà ¸ïéWá_žÃÖmßÁi[ÂÕ}xÎoÿ6n¿ý|ûÛ_Ä©§–Så2^}ð ,—žp~yžÿÜ žuë­n£”!!¦‡äóyȲŒ÷üábÓç>‡]p¾±o_}C¤¢à§žŠsO>wŸ}vÝw°² åEä¾ûðøg?‹§N9?¸ì2|ëG?‚¦i«Nai,sDYí}Î~ÆKÐËï^S>¼gEQð¥/=Œÿ÷Ÿ@’T\|ñÓø³?‹,¿ÛñÀ'aÏžÿX¹¥Ö¯úÿÄž~ ˜xÛë7ãô«¯†nH"sÊdQX‹QVÍ4VÒ2çËÀû½³ÜåÿÄOà‰'žÀ%ÿð+ë6MÆœ=z?ü0Àpÿ%—¸ÇD£º·1\Ô%¼·‰ølר^ß°h6½Ýë®»îbùM¡µžxñeHp±Xt¿ìš9tèâñxO‡)d2+ËÁ|>\.‡cÇnÂïþîOqÁ·áoþ&²\îIµŠ}&S[9ÉK,¼ü!w ¢Æ¤ÓÓ{šN;¿J¢ÛŲù“_gmî|(uçH’ UK:S>µnˆ XAWl%#ò^·]r+r^É®Uæ› !_,Îó««Î_l¢‡­V!gË¿¨”€¦i-çÙŠ$KœK»¹¾íˆK/!¾›±, ‰DÂí©, +ÞõÌu®(¨ÅP&³âon€ý@ûΟ4 ÑS*I€vî¾òço6Údâ‰E°{ïÔ‚´±BbN ‹™¯Èf5­õ’Ób†hõñ{ã&&õy‰Ç´k×åm}jVyó^{úÑ£ãWÞ+ù|™L𦹉ž>{æÌ®¼æÛØõÉ žyæz¼úÕäÝJ_6›­ÿ\5Nêlóšbúš¤Rµ½Ò€eYîµ6e_Ësˆi½Ž— •ß‘ÏçÝD5™L¶mÜ{ïCxÞó.Àùç¿ïzW´Vš&Œ[†R]€´ÿGµžOoͳ“¥úiCX~S¿4.ø%ˆjˆ¨~‰tÇ4ú¾m×¾âD›½hCVn*â ÝòœŸÇÁ³†w¾ÿ;>ÏÀ&¬Ñh‰D•J{÷îÅ®]»066¶ês}éK_rçàµûÀˆ‘·bê‹mÛu‹ÈN]µnïÎf•hÑU)*´lõÒL>_{¨¨éû@$ă° f'ŠÅ"n½õV<ù䓘žžîéët+¾àMoz®¾úêŽ+H^b.è\ãR×R)è?¸ ê—ÞåöšŠ¿{* TÂÌæÝV]4ɨX{ef±à–w€,&‡6žgãpv ¾ÕLcó°zr2ÓÓÓ¸ýöÛñ…/|¡§¯ãWÞmÞu®>å}øÁü}8ò¬gaËy/Áã?,ãw"Žûþ¿º t‘¿öîÝ‹ï~÷»€}èC=y •ßjì±#“EA2™„i*î(,M²é&`ÞÚi»2–zB”ßÿøÇF7þ„«KùMo§yã /ïQÓ¬õ'ˆß›}ÌEç˜hSws"‘¨¬Æ²VÖá<=_Ä='žˆù+¯ÄŸÿÃ?t|}¾¯ܨ\.cbb×]w"g®D4ÅôôtËË©§žŠ .¸;:Xùõá‡K°m¹Üw>˜?&Ô5Z‹nÉÆ?À’T¯\®ö4¢ã'HÂ’¨ Û¶mÃe—]†Ÿþô§}íõÆ7œvÚiرc¶mÛÖöuDkº˜÷›Ëå°à]Q¬mæG¾Õ² Ý’®m¸Ø,!ðîÕ©f÷’Ê׎;p×]wùòÚý(ûɻʴrö?âÁ¯~ïØy.¾ðøém8öÜ{qåG>‚?ëÒÖI´q±X ‡r{€ú¥_å÷zèºîŽ ˆªªø‹¿ø:¾ò•Ó1>¾¼Ð¿b@‘tÀ0¨ëÛ zJ”ßgœqFß_{ÐÊï0òöxŠ$T¬P-FKŠüÏ;ðK×ë«^êÅ÷‡-?Ñ@QÍÖ×ͲæòÐH³É‰Œ:#×ÔfyN«2¢YNQÜ,ùÉr?3 _cù¸„5®hÃôôtËå·O=õT\xá…-[. Ãp·Ó«ýŠ=MW¬›h˜ÑÃ]ÖÅhEYVÏjXE£Q\vÙe¨T*¾¼özâp*<´Ìçr9·¥3³Ü¢ÕlËšºÇ|à(ps ªY¿÷íÜÜòÚwMC9òM`¦Ð»VwV’º&ãÌ3ýÙ£²—ex·Í¤Óøðç>玮±žý›ÿýoEú‘×Ã0 |˜q8±X •J¥ïex?Êïµ²m©åõ3êÊy1{IS,ÌhùÚ*tªÚÓ: mŒ(¿EÂØOƒT~1PÌ;|VQjS÷²Yçwq ¶ý—tñ½Pž[Yžê$CI&ëg]åó˜±œŽ 7¡°<£ÉŒå'k¬Š'i72Îkƒuë­ƒ.a-‹(—Ëu-9‡–[6úáM¥RøÜ羊3ϼ¤õßD4iôi2¦PŽ4 ¿^Æ7w«™B¡àV\· ê÷[ãŸÁB扦A(I€òÒÍ@r®ÝËõ<Æ×ʶmäóùº­ÂLÓÄ›ðÚ“NÂÇ·=ŒTêDg­€û›ºÆ˜5Ï+¥P Zl†T*…d2é6ºçóÎÏ :æÌ €å…-z¸…CÐb|Ј­è¼Cp½ëƈy¡êåA~ð'€}ɤZ—jfšiÖ¯š*ËÀÏ¥Õ; ’É%’íFÆ@àÖH$‚ÉÉIÄb1w íÙÙÙºß×ömȲŒû¦éú,u2™•Ã{H$¬†á´¸°Ž^½Šon¥¼qžj³ÅMr9@9ûgP÷~8ã $ã! ý©ÕŸ¼ Cßi8ô2Æ×ʶm$‰ºUÌà¿¿ü÷±ç­ßGêoÁÝÞ ÉqZŸ Ä¶w»"ïÚÎ1OõEQ˜¤Òš%ƃÊ;DWl»g€X HÌõ¿d‘CÖ;ÍÏ›ˆŠRq¨eSKXc±&&&°sçNÄãqìÛ·Ü¡¼ë%ð®jÕ”˜}ÜÇîNU­íÎd5Üz߀³àF²ÃÄ2™å{œÂ3à-k48zãk!’UMÓVLùHœÿ¤ÿøB¿ß*0~Çv*•r÷%[“éºänu-¶ÿrqØ­‘ß1$bMUQ­3Ýí/¾éS¾ IyÈH&‘Lz>sbGïçP¬dbßlïb„¶m×mŸå½]̯߿ÿš0ô5aÇã+V'€©©)Œ¹”nŒ‰Ã"Û.h¤(}ŸÛ!I·‚ ¡áÒÏøàî¯Ø‰ºŽ¤žÔ_ýŽñNˆ•Q?ó™»ðßþÛN¿@­-Òøð÷€ã€ú—¯ôûí£ Zl‹õ¼ûsºÃ?÷3HÿS™`ï u(h1î'ÑcêMPÅn!ÓDAYïk¢¶Mž¢/_îijÖð°ÑiÞ¤Ql˦(Š»×µØ—Ùjµ °ûžÕö¤·,k¹SÐI`ÅtšÆç‘$ Š¢`qq‡^Ó¹®‡UˆF£]]ÎÛ4Íæ½OkØ/¯—ø3\º߀S0HmzJ Ã)„¹ÞõZ/b¼±øŒ¢(8óÌ`óæÍî±ÌJûИÉÚëz~" ÿ±-a¼+½‹ýä ‰Czӌӵ°Ê1 .?Êï~³íÚ& bô¥¢ÀÝÖ©®ž$˵}¯0uÄ»h¬w½oBÚì1btà$²,»{\kš¶býñ¼²,¯Ú3jš¦ûœíœqƘŸŸ_Ó56aí6gÃqÙ:ã9PÛsÌGAÛʆK³…•<Ý ¹ô ½õYØ»DábY Ãpz òy$w~´n|da_ Æ»¾ë±”^á÷éuD¬ì]›À0€Ô;žÁÜö,äêÏ9<‹hÆÇ=û_W‚òÛ—÷ Fó­Ö³µÞ‰ï0˲V$ÞDÐ0 ·§Ò{?I’ ª*4M[½>Ø„ÖA"ž»•µ¼æz EÂ*ZšnŽÛA·7QеLXÅf]f4Éî| Ÿ|>þê3¿þO+î£~äõ~Ÿ&Ñš¤R)¤Ói·|·, óçOcnË”çF,‡Ëµ£ëN>*zM³;Y¾Å9`-/~T(ôt=Ó4a†û¯è­”eÙž«ª*TUE2™¬nëZ›N§Ûަ £¡HXÅ‚K¢5¥Ž‡N4ÀLÓ\½•̲ú¶E‘Ä" ȿ훰Î~²œ›MÎ0 ض]7I–l,œ˜Þ¹Ám,ˆBλ+LÝÌ?Ó„|Kιaa¡ëIªH>ÅP[Ñcª( EA:vGLˆûxÔF­†á“¡HX-Ë‹_ü;øÓ?]eîÞ¶TP¸˜¦‰™Õ&¦.t££=o@$ò…®ëÐ4 ’$!¯K(|ç"¿O‰hÃr¹\Ýþª”Œï¹Hb–åÌï6 §Î#K6ÒiOÅGQ6´u¥H2k¯gÕõšŠá»b­¾ÛL¯‡Ñ†ÉP$¬¦iâá‡?м|oÚíJ.¢¥‘É*…Q.—C¡P@þmß„;éª×ø}JD¢ëº[áµ,'a-H¬¯5cÛN¢ªë@úíaF™fmϧ ‹‰…ŽDgmmõNꊡHXô£ lÆìì*w`  01佩å!ïºîûºbD=ñwï?öD" ä+_ÃÜçNðû”ˆ6,“É °<•C–‘‹DÔ\>ï$«š,ià“ËÛ!¬±âcÛ6òËÃE©[IÓ4ÌÍÍqx®OBŸ°š¦‰Ë/¿põmclno@ƒM,5Þ”,Ù,ôQNc¥ðI¥RxôÇ?Æ;®»ÖµïÆùj¿ÏŠhcr¹4Mƒ,ËÈåê»&¢¶í –,¼ëË?2åd­kœ›jÛ6r¹œ;½¨ÍEõ.zFþ }ÂjTUZ½• + 8Ñò·úq¦ð{®þÛ׿ÐsœÝAƒOôð,,, “ð³ŸãïÞМ;¢0#Ç$ÛÂŒ•,,OZ][¨®ëÈd2Ð4 C¹ï XSÂZ,Çë~7 ‘H;vì¨;ÿò/¯Â_ÿueõ;pÿ2p-\8˜B'“É@]Ü«*ehð‰ÞUÛ– ö,` øÜ?ú}ZD’Ï;?ª §ã)ӸEŒ®ë°, …BC}®£„µT*!N£\.»›Ô†t:h4 ˜Å®]»066æ÷5¹l°, ²|Žß§BäÛæ‚’†aÀ0 ,4Lêãˆ-tb¾\¡P@*¤ãVH¯z3ƒ›¨¦yê5|> À®ë0 Ñ&L*p¾Ô@è(aM§ÓˆÅbÈårîm³³³ˆÅbسg"‘fgg1==¨„U’€Ë.ûdyß§BÔbõUe2ȦÓìz¢P°m©TÊÝÃN-íl §A–ËåL&ašì¥Hnú?@š«-µU€“ÉÎËzñaÛ6Òé4Wò`m—S,‹(—ËØµkb± R© T*all ‘H066†J¥‚b±è÷5Õyì±ÿ¨ý2:Ê9«:-VÓ„e3Y¥pH¥RH&“õ `$î•DƒJô®&“Ig¡¥‡Þ¹ÊÆñDÃÇ4DÀÃCNŒ´­Ë‹¹à£££n/ªÊ¹Q­mÂZ*•ÇÝĨ­œåýã{E*…úÉÓªêD½`YL`)4 +*îfù|,O÷#x¦i"ÝdÉÔl–½«4ؼC•'ï„úûr(0œzÍø8ÿ²ß|™³Ù*£Æ2™ FFF0>>˲°°°Ðô;ƒOÛ!Á‘H•Jý¢E¥R ±X,Iª –¹®KX%ɹQ$Úb[]h@†áƸaö}Éú9Û o ˲š&0MÆ8 <˲ÜN€ì£ÿƒC‰Pkˆ_¸ææ¾°ê*À¶m#‘H@UU,q1ÕPjÛËÅP*•P*•8à ÃX±"° ¼mÛ6¿¯ €äÏþÃõ7* {T)tÄðÈl°¾þ@ý±èÃ\p‰BÁ²¬¦{áÏy<‹> $Ó4qöÙ—ÂøôCNaÎuhÈÙ6zÇ3˜9ò6H§<åì­Ú$Y5M£££H&“Èf³~Ÿ6õHÛÖX,†x<ŽÉÉIŒ¡X,¢R©¸-•Jóóóؽ{7âñ¸»j°ßLxö³óÏ÷Tpd¹~H°irGn ëЯÕ~q†ø}JD]ašfÓýñŒÛ¡ZLƒÍ²,|÷»—À²5Ë …‰ ûÂÿ岨ӈӄØCunn®iƒ&…GÛVÈf³PUÕ]úÙ»ÓÍ7ßì®"¤– ÃdÙÓìn-×h›âY¡§æÝO,5b@>ã°î|йÁ¶‘øîÿaï…‚mÛ++$¦ ë© ü>5¢ ÃÝ9"†È!™´}Y5YÍçóÈår( LV‡@GÛÚD"ìÚµ«é±±±1Œ¦g¨ú]\üNmÎS*嬸çm†÷ö¶ Û¶¡ªª³çð[¡üú!XE@¾ú"À¶a±ç‰BÁ4M$kó¶ {Ë%œÃJMlOÆd•È©Ã'ÀBAqXª;fC×uäóyH’„B¡Ðtä …OG=¬­D£Ñ@%«@ýºJnÂjÛN²ª(µD•-2’i`æÕÿŠìüêåÞ4Í›ý>5¢®°m–Õd±£[ü>5¢ ±, -ý6ìŸ>á÷©ù.Ÿ_i IuL†a`tt–eannŽÉê騇pUšŸŸìرcÅ¢KA"6Ïåš,° aËDeŠÆÛ†|ñÓuó´ítPˆ$@µê¹Á¶Ù(Cϲ,Üs×» æ8&˜†^³¥e,ËB*•â\Õ!ÖQÂ:==ÙÙY÷÷ÙÙYLLL`jjÊïó_•3]uyUIËZ9WÕ08•ž(¸óŸ<Ê«T(ªŠÌ;GNÞÊrÂÀ4MȲìîLæÆµ¦AÊo詉|÷oœ‡WÃ4.¶DÃK×<ò´?:¯îvÛ¶1>>Žl6Ëduˆu4$xvvccc0M¦ibll ³³³+ög "I’œ„µ1È…-™úcÀò¾ÈúÎè.¶DaaÛ6dY†òä°¿ú=÷vËbŸßþ_„×¼àQ6¢ÓP˼û(~óŠÊK*•‚¦iÐXص¶ «Ø_ubb½Mô¬Š½YƒÊ]AµÙâJ’®ÔAƒJ×=Ã؇N‚rÃÀÒüãœ2Ÿ!Na`.xð\w³ø+÷vYf»# ¾'+; ½á—~Ÿ‘o2@;å ÿçïÔÍ[Íår€4· z/ºä]X)²Ü“X¶ ,¯ºçþî­¹g2~Ÿ!цäóÀ½÷­ 9v¬vpù¶¬f2a¥ÐxÖ©QH[7ÁÞÿ”{›eq$ ¶\ΫN¾“ChhÙ6 ê)¤Ï›­›ÀjYòùE € ¯H¶ãõWµßM³~¨i²–CMUyOÀòí NqåóËsAØ0C!aš&ÞðœH_òYà‰'ÜÛó“÷°(§öµÿÔÍß÷£a•ûÀQ$MCú§¿©»=•J!›Ír%`€„U 9n¦T*¡\.¯ýIeOíßçœãü®iõsXÅBLD=Ö“ø†Ò_ùÊé–§h¿ùR÷˜,æWŸÀøþ·ß—OC W1îeY.¬üjfÙmsoW¿ÿ·¬çSÏô#¶ÕcÓ¸úêGý¾TRýˆñV, Ðo>‚ô;Ô5Úd2(ŠÂy«äêx[›ÉÉÉ·MOOãæ›o®»mÏž=¿x¹\ÆäädÝ\l¹Ðï·€B®—1î%Ë2ôÅ+ \~!¤û‡wõÂ{&¬ÔýŠí—?x+*oû_~_. ¡~Åx+æ—AvëGê¶œÌd20 …BÁï·ˆ¤më¶mÛ011X,V÷311x<¾âöN”Ëe†±ê$êÝ»w#Â4MÜqÇ(•J˜žž^Ó…Í_|1®X\l~ÐÙó¦Ïo5 ‹~Ä7, jä?ðoœM«o“QÀþ¯0÷†öû­ êKŒ/3–×#ÈßõÀxå°—·Ø6OÜá÷[A!ÓÏØÎç¿>ï%Øqà ~_6 ‘~Æx;ÚÞ -]kuL¥R°m…BC©NÛÖh4ÚõýV‹Å" Ãhz¬\.£X,â–[nà,ð466†½{÷®éÓ@’€¹¹®¾‘DB_â[–ñÓsŸÄîç}™×Þ³òðÅOÃŒ^ë÷[A!Õ¯2p\RÖSÒ{”—n, Jôa¿ß ™~Æö 7T‹ñû’iÈô3Æ[Éd¼ÁIVmÛ†®ëÈz†yuœ°†k¯½“““˜u&''qíµ×vmOÖVŠJ¥²ê±#GŽ`ÿþýnmYNܱÃúÛlVï"L4TÊå2ñøãûòÚ«ißpøða‹E÷9þåâ‹¡ýà½0¾¾r Dòo/ƒôÒΆèSø‹E÷|(Êé~_* ™nĶaÊáoàþK.Á%~_Qƒn—ß^– R|Ãk8 kÒ;™•hmÖX,†J¥ÒvH€Xp)‰tüâñx¼iOQ,Ãç?ÿywx±ïÜ)˲X™'ßõ*¾‰‚¢×1n$IBþŸOƒš8¢“U~NøÉOl÷û- êel›ß> å¢G‘7MVÖÉ7~ÔQô™Ð.ü6 ]˲`š&Tïð¢Ut”°Æb1¤ÓiìÙ³§iBZ©T0==x<ÞrÂöZD"Vä)´6ßLA×­2\–e¨'Üé‚Ôn;óq$ÒW£ún¿¯’†Ñ†Ëï¯Dò¥›%N_¢`êU\ÿç_bî­O;ÿgï*­AGÛÚìÚµ år;wîÄÞ½{ÝÉØår{÷îÅÎ;Q.—;Z¬dYf‹ …šmÛŒq -Ër~Äâyê3·Cynm}„äÛŸ†²ù‡~ŸÔ% ä€IDAT&Ѻd·ÏAþíçpqH*–À¶!§œù°ù|ž +u¬£mmb±öìÙƒééiìÞ½{Åñx<Ž©©©®õ®n”,Ëlµ$"P¦)¶²1!Ë2¬oí‡ìm ‘$,¼â] ~Ÿ*Ñš)åÛuv&ã÷©õ,Ù˜“3€üy†áì¯Í­l¨C%¬@-i[×”J%w¸p4E¥RA±Xä0^¢>`Ë<…™ØkUUÈÄ÷®Á’÷Šhšß§I´>Ù, IlX§áb_ïtl‰Õ‰:ÕqÂ*ˆ-n­T*arr’sëˆú€ ‹Q˜‰F÷£G·dX.¨¿‡’Ñ R6:ÒÐɘ²Y ¶mÃ0 ÌÌÌø}J4@:šÃJDÁá4fª |ík'8½P[¶ø}:D]eš&{Xih˜æòV8½«GÈÐ1a%@Ük˜BͲ ¾øI8°–%C:ùI¿Ïˆ¨ëØèHÃB–‘ð€³]VZ«5 &"ÿq( …šiB5 <úè_ä‰Wú}FD]Å!Á4L$Éù±m¦irJ­{X‰+ PÓ4À0ð¼_û'°^OaÄ!Á4L2¯½‡Óúµía-•J˜žžnûD•J¥í}ˆˆˆ:’Nã=úlû}`c<…V–˜ß8 ÀœN§ý>%@]ëaD"ÜÒ†ˆˆºâg/z^ùÈ#À‘#ÜÁ†ˆh@™_~Êyû8˜6¤m«Ø•ˆˆ¨_îüqœüÂ"÷£H^w¬¶× Q˜¦‰$·f¢!`~ñ!(/üt]gÌÓºq+Rñ…/Dú‰÷8{"… ‡Ó00¸êÄ6®LÒ6a-‹+ºï+• ŠÅ¢ßçNDD!eš&œu’3 8‰•†«Ó°°‹@zãÕ\hŒ6d]ÛÚÜ|ó͘…ÉVo""êÛ¶Õ°¹"6…VÆ­OB=ç˜æÃœ»JÂ}X‰ˆˆˆúˆ=M4 ”'¾ ùw Ó:˜§ áV"" ˲ü>¢ž`ï* iñ;û9\˜6Œ +Ž;$˜(d8—†EY@Ó˜°Ò†1a%"""ê#‰Û4QÈY–óãüßb# mHÇsX'''ÝÿW*•· ܳ•ˆˆ6Š‹úQXqH0 I²Y'Þ™¬ÒFµMX·mÛ†‰‰‰·Çãq¿ÏˆˆBŠÃÇ(¬8$˜†$9?†ÁáÀ´qmÖh4Š©©)¿Ï“ˆˆˆ(˜°RØeÞ¸ˆìÿ=†a0ÞiÃ8‡•ˆˆÅ0 Îñ#"P–˜wTIâ`ꊎ簖J%ÌÎÎbbb±XlÕî}Î;""¢b‡ÂÊ4M$“I¿Oƒ¨gŒÛŽ@9µ`LÓÄÌ̌ߧD®£„Õ0 ¤ÓiD£QD"÷ö©©)D"†R©„Ïþó~_ 8.JCaÇ 3ë®G¡^}ÜïÓ éhHðîÝ»¡ª*n»í6D£Q÷öX,†±±1ìÙ³;vìÀÍ7ßì÷õÑ€ã¢4DDƒË¸ód(/>†apÁ%ꊶ k±XD¥RÁ7ÞØò~7Þx#fggQ.—ý¾&""`Édš¦ù}D=Q(ü>¢ž±mGŽ@ºö°,‹ëPW´\*•FëzVg[ïð`q|ß¾}+î»ÓÓÓ˜]ñšÜç•€ñMa·‘gï*Ëp ³Ä·aê _ä,Ë‚ªª~_…@Û„5‰àСC+nïu¡¼oß>ŒÕº7A&dŒo ;Æ8…ã›Âl#ñ­œû ”Ë¿ Ó4‘N§ý¾ ¶ k,C¥RA±XD<_õ~Åb@÷ ìC‡!·|M¢AÅø¦°cŒS˜1¾)Ì6ßò¿¼üBà`êš¶sXc±b±¦§§Q©TšÞ§R©`zzñx±X¬+'&àééiLOO»¿…ã›ÂŽ1NaÆø¦0ÛP|'“Àr¯*“Uê–ŽV ÞµkÊå2vî܉ÙÙY‹E÷gzzoxÃP.—155ÕÕ“ãç+• &''±wïÞUï{äÈìß¿Ÿ_ÔV¹\Æââ"üq_Ïc-ñ ‡F±XäÂfÔV±XÄÁƒý> –áÔ¥R ‹‹‹M§+õÓZâ›å7uJ”ß«uõˆÊoIbï*5%êàk-¿;Ú‡5‹á–[nÁìì,¦§§WÃÄÄDW[311ë®»ÎbF1==±±±¦9räzè!ÌÏÏsˆµ´oß>,..âÉ'Ÿôåõ×߀SᙟŸwïO´šùùy_+;,é—DÂê—õÄ7Ëoê”(¿:ä˼èn•ß–eqKZAÔÁ׬ºF¬Þu×]îÏÁƒ×úërðàÁêöíÛ«wÝuWÓãwÝuWõ¦›nê˹Ðà Z¼´‹ïjµZ½á†ü>M A‹–áÔMA‹—vñ´Ï#[Ðâe=åw6›­f³Y¿Oh=åwGC‚½"‘ˆ;»qk›n)‹+†ˆ®c®ÂGƒŽñMaǧ0c|S˜u+¾mÛf+uÍšÖ~ˆD"ؽ{7J¥’{Ûì쬻Ñ c|SØ1Æ)ÌßfÝŠoÓ49‡•º¦£9¬ý‹Å011;w"cß¾}€\.ç÷©mã›ÂŽ1NaÆø¦0ëf|³‡•º% +LMMallÌý p Æ7…cœÂŒñMaÖø¶,ËïË  l 8«’q5= +Æ7…cœÂŒñMa¶‘ø¶m²,û} "œÃJDDDDDƒÇ4M¦®bÂJDDDDD]ÁáÀÔmLX‰ˆˆˆˆ¨+,Ëb+uV"""""ê Û¶¹¥ uV"""""ê ˲ ªªß§A!„•ˆˆˆˆˆº‚sX©Û˜°QWpKê6&¬DDDDD´a†apþ*uV"""""ê ö°R·1a%""""¢®à‚KÔm'ù}DDDDD4ø˜¬R/°‡•ˆˆˆˆˆˆ‰ +V"""""" $&¬DDDDDDHLX‰ˆˆˆˆˆ(˜°Q 1a%"""""¢@bÂJDDDDDDÄ„•ˆˆˆˆˆˆ‰ +V"""""" $&¬DDDDDDHLX‰ˆˆˆˆˆ(˜°Q >a-•J(—Ë~ŸQO0¾)ìãfŒo 3Æ7ÅI~ŸÀjÊå2Òé4J¥@UUär9¿O‹¨+ßvŒq 3Æ7…㛂&°=¬»wïF4…iš¸ãŽ;P*•0==í÷iuã›ÂŽ1NaÆø¦0c|SÐ2a-—Ë(‹˜˜˜D"Œ¡P(ø}jDÆø¦°cŒS˜1¾)ÌßDLX÷íÛˆÅbîm±XŒãè=D2,Ât½ŒïÎìÝ»×ïSàõ®c¼½0•iÃv½ŒïöÂTž Ûõ2¾Û Sy6(×È„µÕ‡¢R©ø}z°oß>ÌÏÏû}¼Þu`|wÆ0 ¿O×»NŒñöÂT¦ Ûõ2¾Û Sy6l×Ëøn/LåÙ \o ]jõ8tè"‘ÈŠÛðƒ೟ý,þíßþ —]v™ß—Ðs‡ÆáÃ‡Ý ña×Íë}ôÑGñ‹_ügžy&¦¦¦ú~-ë‰o°, ¿õ[¿…­[·âÜsÏíûy÷Ûƒ>ˆÉÉI¿Oc ¯wqqGõíZX†·Ç2|ý|ðAT*\rÉ%¾\Ëzâ›åw¸õ¢üþò—¿Œ×½îu}¿–ßí±ü^?Q_kùÈ„Õ; ¡Q4mzûõ×_믿ÞïS'jk=ñ ÿþïÿî÷©u„e8…Ùzâ›å7 –ßD¼mÛ6õÃÊårËÊ<Ñ `|SØ1Æ)ÌßfŒo ¢@&¬Ñhñx¼n»aH$~Ÿц1¾)ìãfŒo 3Æ7ѦjµZõû$š)•J˜œœD4E¥RA$Áž={VßG4HßvŒq 3Æ7…㛂&° +àLü|ãñ¸ß§CÔUŒo ;Æ8…ã›ÂŒñMAè„•ˆˆˆˆˆˆ†W ç°1a €b±¸ê±R©ÔrߣR©Ôr“çvÇû­Ýõxï†ë%Çj1¶øîäšÄ}Âr½Ä2|µû…áziýñ=ˆëÄ÷ ^/±ü^í~»Þ*ùjß¾}ÕíÛ·¯¸ýÞ{ï­^sÍ5ÕíÛ·W·oß^½æškª÷Þ{oÝãÞúÖ·ºÇßýîw¯xÞVÇû­ÝõxÝtÓMÕn¸aM״륚f1¶øîäšÆw¸° _‰1ë‰ïAü[o$¾ñzÉÁò{¥ –ßìaõI¹\†aH§ÓMONN"Ã4MÜqLjÇãu÷ݽ{7¢Ñ¨{¼T*azzºããýÖîz„b±ˆÙÙÙ·ÚõRë[|wrMã;LX†³ ³Ä÷ þ­7߃x½ÃŽå÷à•ßLX}R,ëö¸jÞ={ÜòŠÅb+ö»Ú·o'Äÿc±XÝcĸñvÇû­Ýõ»wïÆØØ¶mÛV÷øA»^r¬ãa‹ïN® `|‡ Ëp–áa¶Þø>|ø°{ïýƒü·ÞH|3¶ËïÁ+¿™°P$©Ûóª\.czzcccˆF£-ƒ R©´=´ë€½{÷¢\.7-<íz©µ°Åw'×Äø.a‹q–áäÕ*Ž=ºêã‚ú·ÞH|3¶Ã‡åw½ \/Ö«T*˜žžÆµ×^‹x<Ž]»v¹·¯æÐ¡CmízÊå2fggÝß›=n¯—Z [|¯vMŒïá¶gN^ÍâaÿÖë‰ïA¾^jåwíqA¸Þ“|zߨR©„Ý»w#‰à–[nYÑÕ¾šh4ÚöxЮgïÞ½ˆD"0 †a T*aß¾}n Ð ^/µ¶ønuMŒïá¶gN^«Åàþ­×ß###y½ÔËïà•ßìa ¨t:íŽ;o 1¾ÜÛ _.—ÝÀhwï¼óòz©5–ßÁ»^ö°a(—ËPUuņÆñxÑhñx{÷îuÇ›†D"mízÄ –ÂöŽ¥¤ë¥ÖÂß\ã{¸„-ÆY†“W'ñ0HëÆ÷ ]/µÆò;˜å7ÖËLONN®8fš&'&''Ýåª#‘®»î:÷~íŽízÚ¤ë¥ÖÂß^S+ƒv½ÔZØbœe8yµ‹‡Aû[³ü&/–ß+áz7U«Õª/ï mX¥Rq±Yw~»ãƒfØ®wØ Ûß{Ø®—†ïo>l×;̆ío=l×KÃ÷7÷ûz™°Q qÑ%"""""" $&¬DDDDDDHLX‰ˆˆˆˆˆ(˜°Q 1a%"""""¢@bÂJDDDDDDÄ„•ˆˆˆˆˆˆ‰ +V"""""" $&¬DDDDDDHLX‰ˆˆˆˆˆ(˜°Q 1a%"""""¢@bÂJDDDDDDÄ„•ˆˆˆˆˆˆ‰ +ùÊ0 ضí÷iõ]·cß¶m†á÷euV"òU"‘€iš~ŸQßu;öMÓD"‘ðû²hHåóyX–å÷iõ㽿˜°цèºÎ < Æ{ä÷ P·µ&™LB–åÇdY†ªª0 ÉdÒ=n†;LÓ4(ŠâËd2H§ÓÈçó°mŠ¢@Ó4÷9%IB:^ó}‰Ö¢U|@.—ƒmÛMc¬[ñ½žûmT/c?›Í"—ËÕÝ.ض\.·âqDÑ,žÅmº®TU]Ÿâ6–×4H6ï¶m»±+I’É$$IÀØ^+ö°D*•B.—X–…ÑÑQ÷’ÉdÜcº®c||ÜýNeg||€SAE>Ÿ_qܶmضñññº¡h¹\™LfÍ÷%êT«øÇÅ\¾Æëf|¯çþDÑëØO¥RM[úmÛF"‘p+BDÝÐ.ž½šÅ'Ëk$‰wÛ¶122â68êº^7eƒ±½FUòÝÂÂB@uiiɽMUÕj:®.--­8&ËrUUÕjµZu/,,¸Çgffª’$¹¿¨ÎÌ̸¿+ŠRM&“îïétÚ}¾µÜ—¨­â»ZubNü¿Zuâ·Wñ½žû­W?bßûøB¡PP=pàÀЏ&Ú¨vñ¬ªjµP(¸ÇãSÜÆòšÁFã]”ǪªªºÏÇØ^ ]סªjÝ0±B¡Àiai<¦išÛcdY®[Rünš¦Û²î}¼$Iî„fÖr_¢vZÅ· ªªûïýzßë¹?Ñzô#ö½D+}6›õû- é$ž5‹O–×46ïâq©T š¦AUÕglwŽ kÀµÛò@ ;C„f_Dƒ†ñMÃj#±/æO¥R),--ù})DDCG–e w*ŸmÛH&“˜™™ñûÔÖ€hLL Ãp']·JZE$Iu­6-ðD~[-¾½ ‡5Ãø¦AçGìÏÌÌÀ¶mèºî.BÔ ëg¢A´‘xuø™™ÌÌ̸ێ)ŠÂÏË:pÑ¥H&“0MÓöeÛ¶»‡þë=æ]tû ™ &~A«øn‡ñMƒÌÏØ—$É]±’[/P7tÏÄ6Ñ Øh¼{{V§Ç•C~×=¬ Ë2fffH$ ªªÛ*#–¿n<&*2@­R’ÉdÜŠeYmÇÙõK«øn‡ñMƒÌïØ×4 š¦!•Jñ3CÖ.ž%IB&“q‡> ²Æ{2™„®ë¢(îèMÓü¾´´©Z­Vý> rX–å¶„‹yJb˜,Ëî>¬b¦¹¹9÷±â~@m(Q4‹ïN1¾i1ö)LV‹go}¥q¯a¢AµÑx=´’$q*Ó0a 8˲022‚……(Š˲H$N§Ù‚IDDDDD¡Æ!Áç’`Û¶;É*…{X‰ˆˆˆˆˆ(¸J0Rh†뺎O}êS¸è¢‹ü>•¾8xð *• ¯wÏwòÉ'cvvÖïKëØ5×\34oX\\Äe—]æ÷i ìõ...âk_ûšß—Õ1–ááÖ‹2\’$|ìcóûÒ:Âò;ÜzQ~ÿýßÿ=b±˜ß—Ö–ßá„ò;4 볟ýl¼úÕ¯ÆÔԔߧÒÅbóóó¼Þ >ß ¹è¢‹°gÏ¿O£o&''y½|¾AÂ2<܆½ gùn½(¿%YX~‡]ÊïÐ$¬Ã&#û}¼^ê™aªì ãõ»a+Ó†íz‡Ý°•gÃv½ÃnØÊ³ \/ç°Q 1a%"""""¢@bÂJDDDDDDÄ„•šeY0 ÃïÓ jʶí =Þ4Mäóy†±áç""""D\tiÙ¶ ]×aÛ6E¨ªZwÓ4a†{\–eȲìo¬DK’Y–¡ª*dY†a°, ¦iºÏÀ=&Ë2,ËrŸ[–eH’䞃¸mÛu¯/žCÜ–Ëå`šfÝk¬víâœþçÿüŸ~ÿ)BÉûwYÛ¶a†Š¢¸ÿïôñº®Ã²,Nl*ŠRã"¾Åg@’$÷~âñI¢,ËPÅGŸ^âsàoïkŠøTÅ}¼¸M<§8O˲Ü׳, ¹\nÅ{kÛvÓÏ<þøã=þKõÖ>Ëd2°, ÉdrE’(*¹¢,*ÒÞ ¬H%I‚eYn%[QhšæVÄ+ÒâùÄkhšn¥<“Ép*å¢Ò,I’Û{iš&,Ë‚ªª0 ªª"›ÍºeqÞ"Q B2™t/qÞ¼½¥étzM‰Š—išnòÐÌ m‰0lÛF.—ƒ®ëœFG"³, –e¹I–÷o$:¼ñéMMÓܘ±/ˆDN$˜š¦¹ŸÃ0 ë:2™Œû¹ŸI’ÜÏŠ8/˲ iÒét]Œ†áþˆä5™LÖŘˆßÕâ[¼ŽeYŠïN Ú¶6DDacš&lÛ^Qß#¢µaºF¢²Ú, ›ÐXiÍd2PUš¦!—Ë!—ËÕ%‘¢¬ªª{›èY=)Éd…B¡®¢+^w||Ü}ŽÆÊ°(4[U¤{xV»þl6»¢¢-€fÖÒÛÖ­B½]O+5§ëº›Àyÿ"Qq$xßg]סi–þÿíÝœõ}'þ— $Ø`Á ­S2‰Õ$ ñ8«4¥¤£æšhÕÊäŠ9o[ª-{Yš^ûˆt·—¤¿ö*Ý·wíY×mVM.ë‹Û†U²Ú¤m4BZZäzr¤¹ ð@ÂÊvøá1Z†@öûÇø3ýþ±’f4z=¯~Í|¤·>;ïÏÏcÇê^ €Ýï|]‘à9“¸FŸaö¨€fiÆy<ñèEí¹·;V3Íê¢QÓìb\|Ÿ›ý^ûˆïI,³RuJ¡PÀ¯ÿú¯»ýKÚ5J‹éB®MÄ(K"ÖÄèhÔø*ž/âQ4”ŠFX稘\.g7jGȈk¿fS=8B†¨Ö.d³Y¤Óéª^EQ:{Åý¢7Fü¡^\\´‹Åìûb±Ø†¬VÉ¢ó1í8{s›a+¡iš†t:T*U57Xü1 !Θ±mòù|ÕÅC,³{J›é4ždYF"‘ØpÙ£IäE"©¬Zo’ÉdÕs£@\Ô·šr‘N§ënsNï¿7J.TU­;7çûd2i7†©ªj7Ær”ŒwmdZˆh¤Éž¸¾1"þNÕ+W›žž®ºÎrN×]ªªVþßÑ9päÈH’T×ø X×T©Tªê6ñZ¢cC<¿Q 8¦½p„ Q5&¬•h£ È9Ó‰É Ã00==åååŽOØSHÔ;ç|zç¿Åôq»s>6Ð8¬Ë\û»¢(˜žžF<G"‘°‡È' Äãñª×®íñ©ö> ¢ñ”#½G$d"™tj4-Dô8ŠžH±>€H>Åg,^KÓ4Äãq,..ÚÇIªHN‰DUlˆÆ—l6 EQ iZ]j+íV;i|:[þí$êÖíÝ»·áPX€½64úD|s(5QoD/Œø9KñÿÚïWínqŸL&í üÚ àV ¤˜ƒÝè{œH$L&±sçNȲÜð‚žßÓ“]׈Qf±XÌ^OÃ9 VÜ'¦…8G™‰!¼ªªÖMkr6Ø8§3u2z °¾;ËËËö0ßÚ×'¢ÑÇ„µFíÜöbC´°y™h¥?â‚•ñMÔž*/IRÕ÷%›Í"›ÍBUÕºEóº]\K¼n/Ã*[­Q I7¼Š7ù—HHE¬8{ðÅ‚‘µ£Ìœ‹EÖŽÐé´—¼“éH`¯<‘¹š° LLL  6¼¿X,"4½¿ß¦§§íE‡ÄņU;W¨/Æ7P½‚.ÑFx-ÆAôœŠVÄC]בJ¥ ëzÓé"1¨¤’ÉjgüÛ¦iÚÃfkçMŠF—#GŽÀ0 {ÔˆýX,†|>_÷šÎűh´ø1ÆÉ¿\IXWVVÉdP.—áp©T @P*•H$P,X­fuè'q1äÈ‘ºûXS7¼ߦibzzÚn5'Ú(¯Åx¿‰yvbŽØÆËizz{÷îµ{–8ÑüÛâï@í¾Îâ61”VQ,//#NCUÕ¾7Æ»üãä_ç û€år™L³³³Ðu÷Þ{¯}›0??`0hß_,«îï7‘¬òbž6Êkñm"‘Ea|S_x-Æ7B¬&ºgÏD"ûG$ªÇŽÃââbÃFËÅÅE¨ªÚÕBeäm~ŠíZbêÅÅE,.."ŸÏãØ±cÈçó ·ËIk<g²ê#~Žqò·¡'¬ÅbårÑh‡Q*•X-;…BSSSöýÑh´áP”=O@ëdU,Ðh@2 d³•û4­þ§É¶Z4¼ß¹\‘H‰D¢/[ÂÞŠñ^e³YìܹӞwzäÈäóyû§Y’Z+3Yõ‘Qít:H$‚d2i`/Ôì:ǹÇ(ùÛ¨Ç8¯¡ ‡ÃuûY­®®bëÖ­ö¿  Ù÷‡B!ûËÔ/bŽF£¡^‚iÓÓ€,© I€ªV'¤gÿ8^·òx?^ˆoçþŠÊEýæ…ï•®ëØ»w¯=ó9Éi”c°Óååe{;1¿0ú1NãËÕE—fffì/Ç¡C‡ å—¢\.Ûcìk}ÿûßÇñãÇQ(‡[7N#›ÍÚCºj ëVoj<ˆÎ)‘°:Õ&¦¦ ìÙÄÕËJ¥Ž=Šgžyf Çég|ÀóÏ?ßr‘±žØÇι¿"—B¡€gŸ}vàÇq«ï…išØ»w/‡ðú@±XÄÑ£G±¶¶6°c »þÞ¨l6‹X,ÖñV0ä]¢þnS5Jõ7y“Øq¢ç–UÂÚÚ¾óï`×®]]ËÕ„uÿþý(•JXZZÂÁƒ1;;kOodmm­å—åĉ8|øpÓ/‹®ëöþvzž’I«ÇT’E–—­ÿwC’€³[‘G­®®âèÑ£xî¹çzœ~Æ7`]ð>|^ðär¹ª…3úÅ0*#Ø!5>Ü2ÖúeØuøFˆáñ¼˜}"a¤a×ß•N§Ù‹ê¢þnS5Jõ7õ—s;ªft]‡a0M³áßM±h[£ûœûŽ×®H~ôèQ?~Ï?ÿ|WçìjÂ*‚: ‘H`vv¶jB­V•ü¥—^ EQ0;;Ûô1ÓÓÓH¥RM·ô`¯èxq'.}œ~Ä7\vÙe-ãÛ0Œ /‘͹œó5­$UQ¬Ñ²l=&³gÈ›fggñµ¯u÷Ç îÃ{5== UU9êÀ'¢Ñ(‚Áà@ëða×ß¡iEáwŸ˜E±Xøv2£RS=±E•Y³hŽ¢(UÉhm z<Åö"™óÝ{yÇãñÔ)ªªâío{×õ÷ÐÖL&ƒb±ˆ……û61v&&&XÃÄ—Ãùï~¼QNº¤ÓVoj?MO[ù®DÐ Ó´ž×Mç€aXe“$+7 «YQ*¯ãœƒë|kt½ò˜dÒú¿HèuÝ:Iª,4%iDo2iÝ–JUŸ³¦Y øŽÅbÕï‘8®iZç^K ˯9=m=¶›Ïozøâ¯A"Ñÿ‹7ã[×õžWJ&­÷3«ÿ<q啞^¾OµÏ7ŒúáþNÙ¬õ˜xÜ:–ˆ{Àš2P›¼‹ûUµúuMÓz-ñ\QG8_SünÖcu½2êC44#^?›.ºh÷@Þo·ëðnˆU€c±÷Ò¦¶F)¶²Ù,c¨#£ããÄ0 är¹ºd€½•¦iˆÅbU ¦HR%Iêz4‘ÚêÂÂ#†ž°NNNbii ÅbÑnÉÑ4Íþ2ƒA„Ãa¬¬¬Ø-5š¦!‰l踢²–if˜c"QYMX̉‰$U.‰J¯U6k]¬ÆãVB .N³Yëw‘$./W.fe¹’`lÛVé ³Êl½¾¸_¨OXMÓJ\L³>)¬cçź,WÊá¼h¯½6L§«/ðUµ2üZ¼G{öX÷9kÖcÄãÄû(W'Ä‘Hõsœ‰·H¦ àW~åA ¢qÞ­ø¶ÞC£ëÞUñ™ˆÏ¸“§'•8êE.×x>¸h\iÔ{«i•c:ïD¬‹_nÛVù¼eÙúüM³2Ĩ4¦ˆx (¢‘IÔÎLjÛÅqEÜëzeκˆéX¬r¿x­\®ò½‰Å¬sPÎ×”åJcŽh ï·¦Y¯“LZ·Åb•äWU­ï‚xŽææîð3½PM¸ãÝØ»w/LÓäâJÔ1¯Ç¶¸PCõĪ¢Ç„¨¯Çø8Ð4ÍžÂUË0 »ç³ö;-zGc±Xÿý^¸²Jp4ž}û‡±ººŠµµµªÖžÙÙYÌÌÌ P(Ø“¼÷ïß¿¡ã6»˜¯íéçJÁ"aUÕÊ…5P°‰ N!³. ÷îµþÏW'Öâ‚ßY¤S§*¯ TK<§6‘ì¤Ó¡öýI$* QuC$ª^W–+çï,_»s©mƒèd ,O=Õýùw­ø6M³«‹ò\Ί«^â_ôö5"Ëf# "‘J¢‹UÇ‘HÀD£I2i§HäœIj>_‰g繈‰j£]j¿kÎï€xP}nµå礪Öy'“Ö÷U¼Ví|_‘d‹ÕÆÅûè|Œ³ñIœ§óøÞs‘‹×iö¸~r+Æ»‘N§!I–û=t†|Í뱉D‹Å‹Å ª*4MƒišmÁDx?Æý@$–ÊårPÅ^ ­ÛÆÔqþ®oZ___wãÀ¥RÉ^¬Ñír¹Œb±ØôþZ…B‡n:~>™LBUUß·@PgÚÅËFõ;¾kE?ç'±ç^»aé´•Ö6žôbïÞJ/¸®WF ˆ$N×+½–µÇröèý’O­ã¥†]‡wJl]Ã-ümuø°ëïNär9èºÎ¡ícb\ëïQ$’SÓ4ík1‘Xʲ\•”Ö.@4®z‰×] ƒ-Çċ͌û¥Ùü¾tºzø+Q? ;¾5MkY jš•<ÆbÖpÑ~Ô—‰„Õ*†­Ö¾¦¢XÇjD’¬$6›åwoT ;Æ;aš&¦§§±¼¼Ì‹ê™c;—Ëõ´FQ#^ŒñQ i²ÙlÝ4CçBE‹í  †\]%x˜š sʈF™išMGˆ9›½lÓÔJ?†ŸrꑬŠÕ‰üB -d\ sX¯sxo"‘àtŒEÂ*'h†ëqШ3 £iÂjšÍí%Uº®Û«³Šü&—ËõÜ5¢aÉårHŸÝ¢Â9œWUU{Þ)¹clÖfA¶‘ÕN‰¼ÂhÈB~¦i’É$y1A¾¤ibˆHÓ´ªÅú¸²¼÷ŒEªiZÓàcLÒ¨k5‚À¹­‘e³Y¤R)&«äKL48Ùl¹\€5Ô— ³z×9nŸÀ04ëam°'/ÑÈiv1“ËUö&ò#Ã0Z‡'uLÔ?Éd›6mB$A$®ëX\\D>Ÿçß‹Öf +{ŸÈš%¬’ÄEÈß²Ù,â rò±l6ËUG‰6H,Ê'IN:ÅUäGÐX$¬Íp;òÃ0¶À³±üNÓ4ä¹¢ù”Ø?žÃ‰z£ë:4M³G*p.øèò}ÂZ»WR-6²Ð¨k4‚ —³VÆ7ùU6›å&ìä[š¦±A†¨¹\š¦A×uȲ EQ¸’t•° …ª„ …4MC Àää¤'7n¶ÿ*`ÍïëÇ^’D^bš@:Íý…Éßr¹R©”Û§AÔwbøâòò2dˆ:dš&"‘EªªJï3%¬Åb‰D¥RÉÞ8W,³ KKK˜››C4u»LUZ-È¡ë\%˜F›hAtJ§9w•üÍ0 ˜¦É¡’äKÉdñxœñMÔ!‘¬Æãq®kàS­œH$ …pèÐ!û¶¥¥%û¶{î¹³³³Èd2n—§N£ z!gï*6Ó4«âÛ4McÂJþÆÅ–ȯ¦§§€síˆ:Ädu<´MX …J¥æææ …årÅbÑh@FQ.—Q(Ü.Sf «¢pŽ6Ã0ª~gï*ùišÈår¼0!ßÉf³0 ƒC‰:”N§±gÏ&«c mÂZ,‡í݆¨jë¼ßKÄæF¸G%:çwÓ´[bM~Æ})Ét]G6›Åòò²Û§Bäyš¦açÎ0 GŽa²:ÚÎa (—ËU·‹E„B!Ï&©N­æ€ðâžü$—ãBKäétš+§’ïhšUU¹ÈQ †aØÃæóù¬@sss ï‹F£ˆF£žêYZ ±á¬äbaÜüÌ9üÈoZ5°3Ã0ÍfqäÈ·O…\ÔQ XsT> ˜œœ´ç°z1QŒ6U™°Ò(Ó4ÍBÆ]ÈïZí©M4êØCÔX2™D*•âù1×QšÉd°´´dÿ¾´´„©©)ÌÎκ}þ 麵bj«VUeÂJ£ð4.œ[8ù‰¦i¬Ë‰j˜¦‰H$UU¹•u–°.--!ÚÂççç±´´„ýû÷{rkE±öX=rä›>¦ï×=g÷¦…$Õ¯ædí—%Vë¹^³ÓÇwz¬Ú›¦•å·"Ëõ¾®×O®=çÚyÍb³ÐVç×+™l~µ‹±hZÓ÷pÇñãøöˆ_ ˆ!d{÷ZÃ]o oGÄx£8kv¬f­­b¼7ß§fÇnãµù}rjU·èzÝ}¯}ê©öïãÑu Ž{'2M“½GDº®cïÞ½H¥RLV @ «Ø®fjjʾmvv+++(‹uÛÛxE*|ö³ïA6Ûx¯UMk’´Š @ñ€Ú%“ÖÅ¡H¸œ²l]46º¨ìäb¹Ó lq1¬(õ a£cÕ^Ü7ºÀnw~º¤ïC£Ç7bšÖs]| þp·kahôY¶ž§iuÇ+=Š3Ï?Qfš&ÞñŽŸE.×C²êlØhãB£/Kí>˜º^iTht2ºnÅjí}θ±áÔ(ƳY«Ñ£•FÇÊf«BIªŽéncÜù<§A|Ÿ€Jýâäü¾×Óo «oWQ­m¨h×°Ó(¶ý­jÔ`'êAñý¯ùniŸ9Üy'pâDÕíÊÖ‡!÷Ì\ým·Až}?dÙÑ6óÉOÅ\Ð?õ ðíoW=Îy,ã…í0nù(ä÷^m+†u~^|ñEÈí:\àpw¢Š\.‡d2‰ååe•'[ÇsXsU½Ø«ZË4 ¼ímA:ýûPtÜišum}$uö¹®W~ÄEa³žž^VBnt1щaK–{;V/ÛKôz¬^Þ?ñ6xî³[·âųs²G•išxøá­hÛø((gœ‹†•Fïk£îv¥>‰íD¯q×Kì-/÷v¬n óûÔâûþ33ø¥î_Ñ“ Ãp÷‚Þ0꿹œ•žM8µÿw¤§®$i/½ úû>j/†–ÍÆÊƒÀ·¿]÷òñûGÈo¹Ù'‘{î|¤þGe¤P.àÑ«`žÙ ýd€õy'n~©Û…þHÉßy ­v‘€~xa·} sí\˜O½ŒcÓÿ<:=RiÛÉfÜ_^ iýßB¾úè«—›7ÿó~ðm˜ß¿Ú?½êÙ·Ãn›¹ê6àªê2É2 œ i- ÀÑîe𮹏æ˜Ö]•çÅIŒ³ùvÌt<€úç7îºë øIDî¼ pÃŽk·ß•áÊåröjÀlÀÙ FS“jGÜ4›~×njWŸF½]ôÌ3]É· «y¶9V’ê; ìϧ—"0M'OniÝù|ì˜Û§I´aNXÏŽ00Oœ~ýoA¹îBH’èUÜ]ÿ}È;~å§¶@ŠXO‘b@6eTÚyTU|­Z¯_û7í¡ÔŒo‡»wõÔψp&"!L¥¬g3/Â8a5Èè \]¶Ÿ ò­?‰lÖz^*¨²ìÔ?!÷­kxüØå_CüÃ[`ªqhšcF$íÑÛê/†®CÛ¨ª=0O’˜&dCƒŠmP¯Ô«­~²ª’E®˜¤ÞvÖo³’ŠÔâߌ‘éL.—Ãâââè'«"YltqÖh=g‚(Z/jŸã|Œ˜Vç¬3jG¸5›šäœrÒlúª6_'§‘G½=¹cGWï8a™™©»-“ÉààÁƒU·-,tÞ$T,¡P¨éý@ §­s¸g¹mñ X «®÷6’”¨ã‚®ëˆwèbþ¦¦Á|%¼õ­BW }r?ðÕs!ÿ¸õ·¸Ò«ç5|©1Ï#ÆÖ°b€o’U±þ›þùÇíÛôÂË€ø[ ˆý÷Ÿ¨ëUÔÃ3H>zðòËa@Ú¶É~®¼ù”O©Tõ¨|Àb!ŸA ^í8‰' é?nýÉêëhSBücW ŽSâM¯iE²†Ï˨ilÊçÑɪ—’dÈŸøºN» ÎwWVþy 3ÆÛ1 Ã{×í­S½Š†a%‡µ÷‹çÔ~·%±íÄ^¦ súÝ´MX'&&ª\šx'ŠÅ"‰J¥k~l:¶_³T*!‘HØ_&UU‘N§»>NÝFóˆúlXñ-ËrݺAMW#ê£aŸÐñÖ\Îj>ÛJlN'‰]ŒÄû­ äþ|Ó˜q#¶½ž°jw=í³OgÎ@ÿƹÈßüI •²;~R)@ãú?€~é!_ò,äןc½?a@}ÏíîÇÚ^E¥°€Ê¯“MÏ¡êÚ[U¡t{aÝh¸Ý˜vŒw"—Ëu×(ÙOét¥‹ßÙ{).°-xØëZŒÁžµMXƒÁ`ß÷[™™ªª˜››C¹\F&“A"‘À=÷ÜÀÚ6' âСC(—ËØ·o2™LWçQ7'Ä4­ý?96‹jñ­i$IªÕžLV&Ñ Ð0b\èfÁ%MŠa1$ ¦Z‹Åãh¿0ÑYÃŒmÀŠo/ô*¥ÓÖeR*@Óüèy0~°úÑ  ¾ê^(?ñjÈŸBâã?LXÝÕ8 Ôµ/€—ãÞ7ìïD.—Ãò ¦é‰'Å0ÝFCnEBÊdÒÓÎö …Êå²ø@SSS(•J(‹(•J( v¯n @4E¾ËHëö5Û»× Rüa ÿV|5«JNO[òâ¢ÛoùÜ0chqAv$±`]oäóV»Íν7‚ÓxvlVãú°{Xsÿó{˜?ˆ½;þ¹jN}½.ËÖÂ@¯ÿ ŽÝ·ŠÅg?ˆøßE¡~ö6HS7ñÂ~„¹ãíˆmú2w5›µþ ìÙcýˆÅ€TÕº>jô!cL€®]* 8ìØdrr²«½XC¡ª¶ÆY]]`}1Ä¿CŽC¡=t¡S¦s_»éi«…W.4`Êo]×qôè[üð¤^ø ëF&«4ÊqAÓ´ú ðºOS)ä4¹ªULÝéeú·aÇ6`]«¨ƒ¼X6 $ÿÃKпq.ðƒÀxòB¨—ý+âѧ¡ì3 X_”ªEŠeÊÒàzÓÈ=nÄx;¹\®ßðP^ùNÇ «¦i8pà@]Ð.--Õo%T%¸¥R ™LÑhÁ`…B¡ésËårÓ=`Ÿ~úièºn[°/pDÓ;ƒ—Î* ¸ë®»ðÜsÏõýµßðä“O"“É`rr¦iâÂW>ù®ÿ|PâÕ9UÉd2xâ‰'òÚêÃÃ0«Ó›N[Wر9Üw쬬¬à_þå_úþºÃª¿ÇÄV1;dy@6 õü×#öKÛ ¼;p¶'©»Õ9i¸Dý]*•ú¾àѰëïNär9ëtGÓ´zMµ³;×vDñZÈóÄ5x·:\(H$011t: ]×íŸt:‰‰ ìÛ·¯e ×ãæo¼ñF„ÃaÌÍÍÙ·7³¶¶Öô¾-[¶`ûö혜œ„¦ixw ` $&«Tebb»víÂ%—\2°cô;¾à‚ .Àää$&&& ë:þ\ þÉIVÐTgrr²åÅs? ²w2M³zH°aX+ñ8œih|„B!ìÚµ [·nÈëºþvª‹ï™¶†CÂe`_ö¤RP?{”ù(‡=ŽQ*¾áÕßítÜ»j•¡¾†a5XrÔäH×àÝÆwG=¬óóóMW SUªª"‘H`~~Þž´ÝJ±XÄüü<:T7ô ™V-M[¶lÁ•W^‰p8ŒÃögøõ¯|Åj…aM5‚Á víÚÕ²bÞˆAÄ7`]ðˆ–Ñs/}ñ‹Øü¶·¹ð’×…Ãa\tÑE{ýA×áNzí&æg{W ÃZšàÈ‘¿™ä9¡Påry uø0êo'£vóÑäÒ’ Žüú Hàuü¨õ÷ ‡Y·“Íf‘jÕènV¯iV’ʨ‘×ë5xÛÖB¡€R©„;å㜓¶ÛI$ö8úÚ/‡h}t=îvXÄÕü Ê_ù “UrÅ ã>qÑEøÈ'˜¬’;†ãBUï“Ø =G:Í sê¿aÆ6€žš1M }G Éë@äŠo"ýñï#¿lBú“»ýöÑvŒ7cFëé´µV¢ÇޱÂsmÖb±ˆ`0Ø6XEз˘5MC©T‚ªª( U?€•y‡Ãa¬86MÖ4 ‘H¤ãBI’„«ØóD.F|Àµ×Þ€>4ÌumX14è}J§aÎ~ Ù¬·õÍ0c[<·Û„UŸ[Áží%˜ù#P¯þ–ÿê9ófÈ7¾Ùí·FÀ°c¼•l6ÛzïÕXŒ‰*ÙÚ mçftCôÀÎÌÌÔÝ'†~ÍÎÎbffÆ^~;`ÿþýn¿WDm +¾?ðãüóÝ.-£aÖáµ[Ú$_‡Ü¶ýˆÅ¬íkˆúÉë“n\’Kÿ€#ÿ¼ÒÛnrûí¢ä¥kð\.‡#­ætôc›ò¶ «˜+ÒnH€hi7æ~vv¶í b¡Pwß}·ýÅêvL<‘[†ߺn­'F4lìÃÍšU•Ì÷üŽ|’±Oƒ1ìë“N÷`ÕõÊ|miéÝ~›h„yå<›ÍBUÕJüëzež*Q%¬¡P‰D¢nï&A¬6‡;ÚÚ¦µKoùÉFãÛ49E›¼­u¸ØP^Xüü¥n‹¨o×'îÁšËY[Ô°¡††eÐ×à¹\®²Ø’h‘Y^v»Øäamk377‡R©„}ûöaeeÅžŒ]*•°²²‚}ûö¡T*u½÷õF×9Z†ÆƒHX9g›ü¦í Á¦ ¤ÓH¥¬ugˆü j±%g²Ê §:ÚÖF¬&–Éd0??_w8ÆììlßzW‰¨5Ã`ÂJþg$I²÷‰çŽä'ízXÓ?ÿä‹'sûD‰ú(—Ë!‹Y 2LV©C%¬@%i[׋E{¸p0D¹\F¡Pà0^¢!àÞ“4D+¼~¨ˆXŒ ¢ä/­zXÍÿû8²ÿôfùÞ·O“¨¯r¹–——­d5‘`²Jé8aÄ7µ­‚Åb333õ›¼Qßq.Ã0Ã@ünþ˜­4ä/­\JÿÒ7{ÏB’˜°’†Y–!g³V¢Ê-k¨C]'¬DDDà Ë2ŒÅ<¤Ü ¶ÑŸ˜¦Ù4a5>s?rߙıâv·O“¨¯²Ù,öƒÖ£.t´èÑ0‰-möþŸ„ùî¨Û§CÔWº®Wí1lßžÖ°÷×.Bêã/¸}ŠD}—Ëåð“wÜÁ©kìa%""ÏÑu¯ô=øaèû¯{³Û§C4xÉ$¦3û‘úì¨?w¡ÛgCÔW¢‘æâ×½ÎíS¡Ä„•ˆˆ<ç¼çžÃß}÷ßâÿ¬p00ù¦iU{ GžçâbäO¹\®£}‡‰i›°‹Ed2™¶/T.—Ý. ùÄ_ÿÖ·qÍ寂ü¶Ü>¢ ëô´X†Øår9;vÌíÓ Õ·9¬@€[ÚÑÆ>ÿøÍˆ¦·¹}&D¡ë:$I‚aX«¾s±Tò»O?ÿ¼Û§@#¬m«Ø•ˆˆh²™qéåbûöÍnŸ ÑÀ(² ˜R)yã/FäaßüØÇ°i ©w\%˜ˆˆ<%þÇ!\ð†ÿÅùNä[º®#}Ë¿ÂXÌ»}*DwîW¿Šoÿø»}4ÂÚ&¬…B¡néõr¹ŒB¡àö¹‘O=÷ÜnŸÑÀ(Š‚ìW^y:âö© –iâòo~WÝv›ÛgB#¬§ÖƒbffÆís'""")†aà¹Ç"ˆI s80ù\.‡üÅׯŠMÔ &""Ï‘$ngCþdÎ+O"ÆEid³ø³W^aÂJÂ}X‰ˆÈSîQIä««Ïã;§®òá׺}*Dƒexå•WpÞßèö™Ðˆc+y{Xɯ–?ñCüòEwr80ù_6‹‡¯¿ž ´aLX‰ˆÈSt]wûˆæäïÅu‘ÓnŸÑàÉ2¾|ÑELXiÃ:ì\d©\.×Ý&pÏV""ÚEQxC¾õÉM?ƒ¹}ÅíÓ ¼x'’I¨×^ëö™Ðˆk›°NLL`jjªîöp8ìö¹‘qÿUò³ÄîÝÈ_wÛ§A4º®#‘H¸}4âÚ&¬Á`³³³nŸ'ÑÈK¥RnŸÑІÁ5 hÃ8‡•ˆˆˆhHEqûˆ†Â4MNï ¾èxk±XÄÒÒ¦¦¦ …šV¸\,ƒˆˆˆˆhŒE"Ð 6ÐP_t”°jš†D"`0ˆ@ `ß>;;‹@ MÓP,q÷Ýw»]"""""r“iB×u¦¾èhHðüü‰L&Óv´Q&“ÁO<áʱY‡Ó ­¬¬à®»îÂñãLJz\Öß4 ¢þv£!¤oõ7W¦&Ä5x·õwGû°†B!:tKKK /&¢Ñ(¦¦¦êfêE«/ÄÚÚZÓ!Ç[¶lÁöíÛ199¹ás ›˜˜À®]»ðØc ýؽÆ7\pÁ˜œœÄÄÄÄÐÏ›FËää$xàWŽÍ:œ- ammmèô¬¿iDý½uëÖ¡»/õw8 0r9Äb±¡—¼M\ƒw[w”°Ö*Àsss˜E±X´o…BÍ[íT«!Å­â-[¶àÊ+¯Døì…¨™`0ˆ]»vµm„^ã°.xßÔ‰p8Œ‹.ºÈ•c³§Akk »gýMà êï~^[wªŸõ·išìa¥:½^ƒw4$Ø) Û?ýþB‰ÖGgæ]*•úÒ{Kä6Æ7ùcœüбM~×Ï7 Ãíât°Z0D8®ZÁLÓ4D"·OhÃßäwŒqò+Æ6ù]¿bÜ0 ®L}ÕñàašÅÌÌ …Êå2öïßïöiõã›üŽ1N~ÅØ&¿ëGŒsÁ%ê7WÖp8 ]×ën…B¸ûî»í¹²œ÷A£ˆñM~Ç'¿bl“ß 2Æu]ç–6ÔWžìa*se‰üˆñM~Ç'¿bl“ßm4ÆMÓ„ªªnƒ|ÄssX‰ˆˆˆˆh4éºÎ9¬ÔWLX‰ˆˆˆˆ¨/¸èõV"""""ê Î_¥~cÂJDDDDDÆáÀ4LX‰ˆˆˆˆhÃLÓdÂJ}Ç„•ˆˆˆˆˆú‚+S¿yv["""""LViØÃJDDDDDDžÄ„•ˆˆˆˆˆˆ<‰ +yç°‘oè: ËÀF·„Õ´Æ·K (Ö¿“Iëÿ©”õÿl0ŒÆÏ«}L_ý{³óS”Êù4:v-Y¶#Ê%Ë€˜Ê¬ëÖù½NqfÂJDDDDDž#’/ ’ìˆ$MÜo•DJ$e¹\ã$Í0¬dS$²¦i%iªÚ8IK§­ÇÖîÔ#ËÕÉ[m"&ËõÏqR”ê„:¯$}­ž#Ê#ÔþÞˆHn›ýÞHíùÈ2H4~¬³QÀ0¬gÂj•Ï0ž}v®¿þñö'áÀ„•ˆˆˆˆˆ†NÓ*I©Hv€JÂdÖcœÉßâbëdèO’Ö(AìäXíOñÚ£D’:ëíä½(Žâðá'»:>V"""""(M³~TÕúÑ4«GS ¯Åê9E©¿­]²JþÄ•ˆˆˆˆˆú®vˆ¨ªVN‘¸µÃU‚‰ˆˆˆˆ¨¯Òik¾©˜ã(TöR·ØÃJDDDDD}#†û;æö™°‡•ˆˆˆˆˆúÂ4­UyÝ>ò &¬DDDDDÔÓÓP"êV"""""Ú0]·zX›íÙIÔ &¬DDDDD´aî]JÔ &¬DDDDDDäILX‰ˆˆˆˆˆÈ“˜°‘'1a%""""""Oò|ÂZ,Q*•Ü> ¢`|“ß1ÆÉÏßägŒoòŠsÝ>fJ¥‰ŠÅ"@UU¤Ói·O‹¨/ßäwŒqò3Æ7ù㛼Ƴ=¬óóóƒÐu÷Þ{/ŠÅ"2™ŒÛ§EÔŒoò;Æ8ùã›üŒñM^ãÉ„µT*¡P(`jj F‘çÆNäŒoò;Æ8ùã›üŒñM^äÉ„uuu …ìÛB¡ÇÑ; …±jíòSyß™™™qûXÞ1ÆÛóS6nåe|·ç§úlÜÊËønÏOõÙ¨”דsX[})Êå2@Ýí‡Æ_ýÕ_á _øvíÚåvîÙgŸE¹\¶çø]?Ëûä“OâÉ'ŸÄ–-[0;;;ô²ôßð­o} ïyÏ{°mÛ6\vÙeC?ïa;zô¨¯.†YÞ£GâùçŸw­,¬ÃÛcÞ»'žxÏ>û,.¿üò‘©ÃYûÛ êï/}éKxßûÞ7ô²°þnõwïÄ5x·õ·'Ör¹Üô¾µµµ†_–ýû÷#" º}úCQ.—±¶¶ÆònðõÜ*K3Íâ>ñ‰OTµxú]±Xdy7øznaÞëðþ¼ž[ei¦Y|³þö·AÔßn½¬¿ÛcýÝŸ×ë†'ÖV_ÒfoV hz¡ïG,¯·^¯½Äw»çùËë­×ë×±Y‡³¼^|½nô߬Ïüõ7ë3?óBýíÉ9¬ª‡%”J¥±iÉ c|“ß1ÆÉÏßägŒoò"O&¬Á`áp+++ömš¦!‰¸}jDÆø&¿cŒ“Ÿ1¾ÉÏßäE›Ö×××Ý>‰FŠÅ"fff íIÞ cÕOþÅø&¿cŒ“Ÿ1¾ÉÏßä5žMXT­H‡Ý>¢¾b|“ß1ÆÉÏßägŒoòO'¬DDDDDD4¾<9‡uÜ …¦÷‹Å–ÛS‹Å–{fµ»ØÚ•Çù8?”—,ÍbÜoñÝI™ÄcüR^bÞìq~(/õߣøYo$¾G±¼Äú»Ùãëq+/ßgîvy™°‘'qÑ%""""""ò$&¬DDDDDDäILX‰ˆˆˆˆˆÈ“˜°‘'1a%""""""ObÂJDDDDDDžÄ„•ˆˆˆˆˆˆ<‰ +yV""""""ò$&¬DDDDDDäILX‰ˆˆˆˆˆÈ“˜°‘'1a%""""""ObÂJDDDDDDžÄ„•ˆˆˆˆˆˆ<‰ +yV"r•¦i0MÓíÓ º~ǾišÐ4ÍíbõV"rU$®ënŸÑÐõ;öu]G$q»X4ƲÙ, Ãpû4ˆޱ>\LX‰ˆˆˆhÃr¹/âi,0Ö‡ë\·O€Ã0ÍfënÇãe@¥%G–e¨ª MÓÇíÇjšf‹ÅbPž/™L"‘H ›ÍÂ4M(Š‚X,f¿¦$IH$]?–¨Ä7¤Ói˜¦Ù0Æúß½<ž¨WÈýT*…t:]u»`š&ÒétÝóˆzÕ*¦5MƒaÈårUUëbTÓ4Ö×46몪Â4M;v%IB<‡$IÛÝb«˜¦ ]×íMÓì À êt: ÀjÑÙ»w¯ý%¬‹½{÷Ú¯µgÏžª/™¸ß4M˜¦‰½{÷V EK§ÓH&“]?–¨í⦧§íßkc¬ŸñÝËã‰z5ŒØŸžžnØÒoš&"‘ˆ}!DÔÄ´SmŒ²¾¦Q±ÑX7M;wî´s¹\Õ” Æv—ÖÉSN:µ®(Êz"‘X____?vìØ:€õcÇŽÙ‘ey]UÕªû9bß¿¸¸¸.I’ý;€õÅÅEûwEQÖãñ¸ý{"‘°_¯›Çu«6¾××­˜sþ¾¸¸8°øîåñDý0¨Øw>?ŸÏ¯°åŒk¢~kÓªª®çóyû÷Úe}M£¨—Xõ±ó5TUµ¯çÛÝa«ÇLOOC–e¤R)V‹ŒªªUÃÇb±˜ýoMÓ Ë²½:¤ówçbÎçK’dIh¤›Çu£6¾UUí;ãoñÝËã‰6jP±ï|¾ Zé+ˆú­YLתQÖ×4jz‰u·ÓÓÓÐ4 ’$!ŸÏWÅ3c»sœÃê!Éd†a ŸÏÛ·µÛò@ ;C†…F1Dnjßí0¾É†ûbþÔôô4Ž;ævñɇz‰i¢QÔk¬Ë²Œ|>oOå3Mñx‹‹‹ni$1aõˆl6‹l6‹|>_ÕÂ"IRˤUQ»ÕF-ðœ·D^Ñ,¾Ûa|Ó¨s#öaš&r¹œ½Q¿ôÓD£f#±.NZ\\Äâ⢽혢(U‹¦Rg8$Øt]Çôô4–——ë.Db±˜=Ù€½â˜à\…L¿‰¼ U|·Ãø¦QæfìK’d¯XÉ­¨_:‰év#ÈFÁFcÝÙ³ X=®làéV+þF"lÚ´ÉþI&“e‹‹‹ˆD"ˆD"سgOÕ0qQ’L&±gÏ{Éååe·‹E u|·Ãø¦QævìÇb1Äb16ðPß´‹iI’L&nB4J6ëb ›;w"‰`çÎe™k ôhÓúúúºÛ'A͉!`²,Ûû°Š=šœ.Î…8ÄP2"¿`|Ó¸bìÓ(q^³8”!ò›Nc]Œ”$‰S™6€ «Ç†;wâÈ‘#P†a ‰ ‘Hp <ù]ò8ç`Ó4!Iâñ8“U"""""ò=ö°‘'ù¦‡õ¾ûîÃç>÷9¼á opûT†â™gžÁ3Ï<Ãònàõ^yåüþïÿ¾ÛEëØ‡>ô!¼ñotû4†fܶ®éwyu]ÇÒÒ’ÛÅêëpD¾uëVüÖoý–ÛEëëoDý=??`0èvÑ:ÂúÛß¼Pû&aýሠ/¼“““nŸÊP=z/¾ø"Ë»×{ì±ÇÜ.VWÖÖÖÆæó€x€åÝàëÖáþ6ˆ:¼T*¹]¬Ž±þö·AÔß[·nu»Xcýío^¨¿}“°nÙ²W^y%Âá°Û§2صkË»årÙíbuå‚ .›Ï~á~åÝ€‹.ºÈí"u…u¸¿{ÎúÛßQ·‹Õ1Ößþæ…úÛ7 ë¸ ƒ#3T„å¥^D£Q·O奷:mÜÊ;îÆ­>·òŽ»q«Ï¼PÞsÜ~ˆˆˆˆˆˆˆaÂJDDDDDDžÄ„•ˆˆˆˆˆˆ<‰ +yV""""""ò$&¬DDDDDDäILX‰ˆˆˆˆˆÈ“˜°‘'1a%""""""ObÂJDDDDDDžt®Û'0Nr¹4MƒišˆÇãPU`²ÙlÕcMÓ„aEQ ª*dY†¦iÐu’$!‹AQ·‹EDDDDD4®'¬…BápØíÓhÊ4tH¥*·‰ÄÓ0 ;™4Mš¦Ù ©3‘4M¦iBUU¨ª I’Ëå0== I’ìäS–åªc+Šb¿n.—³‹Å`Òé4t]¯zž¸Ÿ‰¬7x=¾ûM|D ¦iB×õªÇ†QÓ4šÆ-Æi|0¶Éïã4*\MXK¥fffê.d3™ –––ªn ‡ÃXXXêùår@2 ¼ð 8yò“(•îaˆÅbPUŠ¢ØÉ$¨ªŠÅÅEûù"™”$©îµUUµ{PkU'I’ÇÞW{»aÐ4 ôG+øÖ·¾„mÛ¾ŽóÎ{†aT%Ë€•XȲŒÍ›oÀ·¾µª Ȳ•@8“ EQìç|ñ‹A:D ð n¼ñ$Þÿþ’}\Ã0 ˲]–/~ñEÜu×qæÌf¼ím".¼ðÐu¦iB’$;©·8ð˜œ|ïxÇ}ueçñÈ#œ8±/½ô8sf3yäi¼ôÒQH’Ôð}|ðÁIÜwßÏâ½ï]Á®]ß´o?~üxË÷½¼ß­ˆÄS–e»w?›ÍV#Ähgâ{‘L&!ËrÝçí$I4MC:†¢(U±äläœ1Fîå'j…±M~ǧQâJÂZ*•P,ë¾Âêê*¢ÑhÕ…j øy麎\.‡œÄ7¾Æúú)¼úÕ³¸üòÿˆW^¹‹‹ï¯»Xn–LhÛËÙé…w6 È2 ª€aXÿv2 ë¡ëq> Äb@"ˆ\ùÏþì0t}†aàÉ'çŸÿAÜyç…(—Oàœsþ'x.¿|7ßüqæÌf<ÿüÛðº×­C×uœ>½ ÿ÷Q\|ñ)üÂ/|ðÕ¯^Ï~6„›nú \|ñ)˜f Ÿþôsxë[ ¸øâSxì±·àÖ[ãê«_…O}ê5øå_~.¾ø€«ðçþ84MÃG?zNž bïÞcˆD¼÷½ßÃâbKKïÂ5×<‚§žz-`ÇŽ',áäÉ î»ïgo{›„M›"X^á o8ÇŽ­áرÊ{ò®w}ï ozÓfüöoßàr—㓟|=y$€ï~÷\üÜÏýí@âÈ«ñÝ̽÷–qôèQ|ç;V|èºn'ž†a`ïÞ½vCÍòò²Ljšv2Ú¨aFÓ´ªr¶"†º‹ï…išöhçë°_LÓ€ªÄZüä@>?¯Å·`š&’ÉdUR¨iòùŸÆ®]»ðË¿,w<œ\ co§Ódt]aTZ.Ð4àSŸ*#øLóA¼úÕ·`÷îSxà¿Å¯üÊoáøñíxî¹ç°}ûvlÛöu†þð¿àå—_‹]»>f÷Þ*Š‚3g6㓟|'NœÀí·?޵µ7`jÊúÜî¾ûœ9³ 7ßüIÀ}÷ý,àôéÓ8sæN<õÔõxË[Žâ­o-@ÓNáôém¸óÎÛ°k×7qÝuVCÈ¡Cûñ _Ç /lÇ[ßú0~ã7Ç?üÃøÔ§~ˆw¼ã>œ<Äý÷ߌ;Oø,þàþ‡/àoü.Þõ®oà²ËÞûî»_ÿúf¼ÿý%\qÅ=8t¨ˆ»ïþ5\wÝ“øÐ‡Vñ7s?Þó«éßãÁïÀ+¯¼‚ÅÅspå•ßX,y5ÆÇ‰zߨ¡ˆz7α-íÓ4DÃa.—«jäk÷wÂÙ¨éLLE+G[õu³TNñx¼ê1ÙlÖn€t6ªŠón”H þÝjT¾F÷·zLmyECë sŒÓèÚ´¾¾¾îÖÁ …BÃኢ`nn¥’5ätrr²íÇJ@cvv¶ëó8t¨ˆÿôŸnǾðß<1÷sçNë¢]ôªÖ2 qAïö™Ž®ÄK7ÇèW|ÀÌÌLÏCrLÓD$±RMÓpúô6üÚ¯ý´k1¯iVÈ¢ñxe¾x.g5~Vc„¦¢–Êf­¸÷§Ó•Û$ÉúΈwî® ‘HåÄãÅk6»žÈ媿oŠ2¼ï_£QØH¼tÊ+uø0‰5 :éÅÎ `‘Õ±. õ¢¡E$¢·¨¿À `}b‚3Á­½`¯M~#š é¯}\³ò7z¾è¥sNï…óõœ †˜²"IÖ××qíµ×âw~çwò9{©þî7Wβ‰DUÄ•HN5MC,«K‡¥Ubé,O6›µã,—ËAUUÄb±ªç7êé­mðiw¬Z²,7gL7z]Ã0ìuLœÉ¹iš8~ü8¾õ­Á5:ãY“7ô/žMXƒÁ "‘Êå2VVV077‡h4Úôµr¹þ÷ÿþߨ±cGG"àMo:ƒóÏÿär9,//·¼pD€|~ðï‰aÓÓÃ9Ö¸ZYYÁç?ÿyœwÞyM‡ÄôC?ãn¸áìØ±ªª¶}l-±À—,§ìöX¬z1±a .¢‡½¶QÆ4­$TQ†›úÉÌÌ Ž=Н|å+=Ž›u¸²Ù,Òé4 ‹!uö ¤i²Ù¬=LR\€Šhºê»Ð¨Æ™è9Cg‚Ú(at¾–H>D’â\;¡Q‚èšï\g ÚÇŠ‹o1µ@$âqâ>g¢,Þq[mÙÄí™L_úÒ—°cÇŽºöûÅKõ÷F9…kÔ¾÷µ ÎÏkTzñÅ. ñx|dιQÿéŸþ)B¡ÐÆ_°‰q«¿ÉÄ5ø¶mÛÉd:⺋xàõÝ»wWݶººº~àÀõgŸ}Ö¾íÓŸþôúõ×_ßöµ8ÐÑq[_¿êªGÖeY^O$ë§NjûUµžGþÐM¼läýŠïõõõõÛo¿½ësÈçóë±Xl=_W”õõDb}ýÈ‘úÇÅão„XlxÇg½ÄK·ÜªÃiqqq=•J­çóùõ|>¿¾¸¸¸žH$ÖeY^Çãë§NZ?uêÔz"‘XWžýØÙ?ÇŽ[ÏçónÃ÷/^¨¿7âÔ©Së‹‹‹ë±X̾ÖY^^ê9PïX“Ÿõ/®okS+ ÖuG£Qd2™¾-¿ý‡x'Î?ÿ|üÓ?鸎=žÔÈoH&“ö·rù?á駯Áâbeá¡Z’$æþ=°æþ8äŽaÅx¿†á‰PÝû©ªjÝÈT*U·PÀ•¬ýÌë±-©sO$ž˜êD£Áë1NãËs k¡P@©Tªz°¶¶ ?«”†Ï}î4>þñ[!I[Ü.n½{­•ÈŸß@eKšåå#åÎæ@*Je˜ð ™foó1it #Æ7J,$ãnšËåJ¥ºÚ˜‰Àxñrl§Ói;†ÛëuÃË1Nãí·O V Àüü<ŠÅ¢}ÛÒÒB¡P_ÆòOOOcÏžÞüæî’UðVä´.çûÓˆt|›¦‰ééi¼ùÍÑAÔI‚(ÖAÓ4ö®úÝ c|#ÄJÙ{öì©ÚŸZUU9r¤«d•Æc[Ôùº®#ŸÏwµ2;Q-/Æ8àÁÖP(„©©)ìÛ·áp«««`/t±étúì l2º­ÓeÙZ)tЋÔ8÷%ÿd|‹×±.º¯è*ÆEOì q8°ÿ :Æ{¥ë:öîÝ‹x<Ž#G:ŸB$x)¶³Ù¬=ü7‹±W•úÂK1Nääj‡®>8;;‹h4jQú5f>—Ë!ŸÏ£×E9”‘º1ìø/ù'\˲5dw×ñºnmaCþ0ìÓÝn5x"Á«±-¶(S…óSiC¼ãDx®‡Uƒƒ}{=±Ç˜$IH$z{ Y¶†4jÄM2i]Ì31ö¿~Ç7`µ¸'Îw/{wŠaÁƒQ&ËŒïq1ˆï–؆&‹!ŸÏ³W•ú­ØÖ4 ÓÓÓH$ˆ³åÈ õ7‘“gÖ~­†Õ‹ÔK£¤,ö‚~CŽÉŸÄ‹ªªBÓ¬Xêv„Ø0b£ÖhÐLÓD6›µ)óù<û쳸þúë»:ÖX$¬ÙlÖ@Uí=1TÀ9ï<¶n±­ëVï-`]”wów…‹ÑÐFhšfÏ]5ŒÞGLO®4¶¶Íá"–4†a —ËqA%ò± ðââ"“U¢.‰dM×uär¹º$Ñ™ø%‰®þvtº"·H4u]¯JVkI‘ü5ûž;ÏU×uûuMÓ´_§ÕsÅcu]¯{Ĺu›DÇb1;®MÚk÷.w* øêW¿Úñq€1IX5M³{Ÿ6r±,IõóÎ9­šVIXÅÐc‘ˆŠup²Yë¾X̺=›­$ÁL½Èf³UÃQ6²Ÿê ·¶‘$&«48étñxœÉ*ùŠXÙšÛÕµ'Sg&®ÜuSé0ëßw¸ßõÁF_¯—Æ´-[ºÛ^Ô÷ «hM¨´Jl¬'Ó뵋7u²˜“¢T/<sÕTÚ±Y¼ÐëmX^¶þï-àlÑ´JÃJ6[IB%©zȯx¬ø¾Åãl¡Á1 ÞöAä¹\’$q%¢6ĺ€ÕëתwF“ïÖ\.gooÖJ¼=îúÑÑCý$æNôk¨˜¨ßuÝú¾Fe@íÜoIª$¤"Is¼ÅcE²ËN/¤t:m¢!òÓ4‘L&qäÈ·O…È“r¹œ½Ÿ¢(X\\d’êc¾OXŦڀuñÍ„‘üÄ0Œª¡™¿êÔIƒ~,V=ô˜£È bá ö®’ŸLOOsˆ;Ql6k×ùªªBUUÖýcÂ÷ «ó‚^Ó¸/ùKíJo²Ì¡·4>Ä‚4Ëb,;‘är9˜¦ÉQDÓÓÓö÷‚ Ÿ®ÖB¡€p8\õ»¦i˜œœ¬ºÏ+œ+p™&VòÓ4¹%Ó4‰DxñB¾“L&íU߉Æ€“cìœNT,qã7bffƾMÓ4ÌÌÌ P( ŸÏcff+++n—§ŽÏ®ëÖü:^Û“Ÿˆ½­„½{»Ò/‘W$“I¨ªÊiÈWÄ6|œ‹Gdå‘H’$qèï˜ë¨‡5‘H  !íØ„tii ¡P XZZB&“A4u»L6MÓÎîudí/ÉKò›F .±³‰üNÓ4˜¦É òt:ÍÞU{†a NÃ0 ¤R)Ž$£ö=¬…B¥R sss…B€r¹Œb±ˆh4Š@ ˆF£(—Ë( n—Éfš&$I‚,GŽðBžüÇ9ä¨lKCägÙl–=«ä;ì]%²m"‘b±òù<“UÐAk±XD8¶SÀjݪ7šuÞﺮcuõ·Oƒh LÓä… Ã0êVÇ&òƒ\.ÇQ4¶Ä"zpäÈ®MUÚö°”ËåªÛŠÅ"B¡'“T§“'_ÄáÃ?êöi „®ëU k.gí3Lägì]%?õ9!i\MOOCQ,//3Y¥:mÖP(„b±ˆb±À¬iZÝŠÀb(ðÄÄ„Ûe²=þøã˜Ÿ÷vRMÔ+1äÝú·•¬:÷E%òÓ4‘Ëåì½µ‰ü"—Ëqµk[b]nåDÍ´ …‡133ƒh4ŠB¡€r¹lÇ*—Ë8|ø0æçç‡ Ý.“í‰'þ•ðä[º®Û8é´•¬òz‡üL$«l}'¿1 ƒ 14¶¸Ÿ6µÓÑ*Á©T ™Lù|@ j¦ƒbii áp©TÊíòT¹à‚)·Oh`D«®šf-,FägÙl–5äKÎH¢q’N§‹ÅÿÔRG «HR‰F£ˆF£žêY¬áßùÎínŸÑÀˆ…göì<ÖVDÔw¦içø‘ïp=W¦i"›Íâ[Ü©ŽVÀš£zøðaÀää¤=‡Õk‰ªðµ¯½²l¸ÔíS!]סiÖ0`.˜J~§ë:W&_Ò4½K4–¦§§Ç9̓Úê(aÍd2XZZ²_ZZÂÔÔfggÝ>ÿ¦òùuLN¾ÊíÓ EQ (æihšÆ^(ò%¦q”Ëå¸Ðu¬í*Á€• F£Qèº]×F±´´T·Ý—<ñÄe¸öÚ3nŸÑ@èºI’ ILXi<†Á‹zò%&¬4nLÓD:æ¾ÃÔ±¶=¬b»š©©)û¶ÙÙY¬¬¬ X,Ömoã§N]Œ[o îºní%¢(@'Ct½ó%\»yl#¦Yy qn†Ñ8³•åÊý¢l@뱦†aýÔr¾V£c5S{,çã%É*Oí1Û½ÿÊçbÎS2Éù«4xQO~eGÐX -1î©SÏauÎU ¼½·©¦çw€¶~Píï"9J¥êÆH¤úwI²~Òië÷X ¨ÝÌ>™¬OÐt½ò\Iòùúû“ÉÊï¦YŸur~µ¥>³Éf\®ñãeÙ:nmY{~µ]|†a=§Ù{á|¼iVéFÇ2ÍÊçä|ÅãkÿÝÍ{ñó?Q&†Gf³M`šÖx¯5­ãŠb}>ÎÏÍù¹Ö>W|n±X} MO[1$âQÓª?ÇFq—ËYßç1jÅøÎÖyFýñtÝ*S»oô}jôÜ»·rœÚÇ›¦u¬N¾ïÎã5z/zý¾oÛÖ¸QK¼>Kì†vAoV¼È²õ>ŠÏB|wb±ú†¯lÖz¬¸=—«Ä²óöÚãˆúM¼øâ‹?ðüàcbÀ˜¬Ò¸1 º®#_û·—¨…ŽÖQ¢(Àë_ÿÑ0a5 +¡qîw&zJk/âzùbõÒõ¥(½«—ç4ºèÔùõò^ˆÄeïÅÙ‘£ì²ËÞˆ†£i4ÍJ$±,ËÕÁµqÞë纸t;¤'CO$;Öýszñ^Ž5Ìïû©Sí33ÓýëzTϽ«¦i5Žd³Ö{‹YßtÚʘήL)Ú4Ôû?iûù0ïÚì× ·@=ý9@Uan¾æý' Ë2 ö‚gæ¿\íãÿýûOCzåi¨»_D|VñCöÿxëKHݵ €uXÃŒÏÎ{5°õf(ÊÍ0þy3–ß·höú0 I8òMëœD;ˆñà!¿öe'6C:çYHç=‡Ôu_‡²OEöÅýÈ}*ˆ”b“ÝNSžÖB_S‚tþ…Ð_ŠÂ¸õ—ÿ©"Ìþ´¿X‡øQæÖó ]øą̀ú IÐ_TaèÔ[­çȲõ<ãíÿ¸øbàäI¨?v ½ý?ï¾ægN÷o®jŸÒ4@zûë¡L|Ϻqû }k; ßl}Û¶Â9†GUž¹ô^·CsÃ8r j;"€Ö#·šSպ癿ó? ÿõñ†‡•Î]ƒòîJYq}¶í ¹´Týê·æ.„ñÂv$®ºH¥ C±ÛGeÐ6Ep…¤ n¿¡ƒ!zWÉ{ 0þþ¨“e@Qª“šŒûOÀ8±¹ás•7¬AºîÇaBªê0ïºúÃ[až|æS/C~Ëx≓xå’vun¾LXu]ÃÎ5¾S–­ D¶hÒÓu‰D¢ñhhE±.ĹêùDGõ5S)Òi@ú£?Bìßo‡tê”u6 ýS"¹ö1Ä7ˆØÙ§‰‹Ìì·>㯭çǦ€Ø;KÀi»#<öc@•Ž~ëy7Cù] ®::Èe@RõëÿÔÄb€’Úm¿†=[C¶z(Ô´{TÚAœ#›Äƒµða\µÎK¨´ÓÎþÔ®æ¯@V€Ô‡늳?µ¬©mþ©\Û_|ö–úç«·n·\ÀÙ¬:7µæ_µ¹†õû…®ÆÖB‡?‹Q组µ6 l4(‡nÊU³ªŒkÿŒ¬^{óÂ|y+äÍ'¬Ç?ü0ÔÿFñ²Y+~âqÙ,"ÿqã…·çlÞ åŠR×ý- ªˆD¬S:vÌ:ŸäGσ~2à*àÜŸ£S·= E¾ÔnôÌÝ? íè…ÀùçWTÇ·,ÊÙ/†¦UÚ‡‹A7bö «:©Ûz¶zàòy|åþsû³Ó4¡iÚØÍ]Õa m}Ê{/dÙncQUÙ,²+—Âxö’†¯kò'>bÇf*(Б½õûÞ»ªkžÙ ã…íHíZBüwƒÐå’IÇ@¯d‘?ßÛô|Õ?´þžjZe0œd0¾ø8´ï\Ýð9ò[ ~|LHö`/YÌ¿+@ûFeú¨ñèž[{^÷Óݽ‡'¬3 Zë3™ ýu«§^«ždL§­vñ5UÓT@2«Gê#Ç¢Úì;ôz×YÇtb‘e¤þ¹Õ—nR¼tÕàùÆ7#ucû·¬¶^èdÜÚ_U••ðKþÛFŒ;år9oö®ŠFG¯¢ªÐ4dÿ¨l%‘g§-ˆrH]þÇPìGRS¡i•¸ÑÃ3H>z[Ýa”7¬A¾è(W—ëVEòÊ+×§fÒÂöc+³MÄRмù°u\Ô|R)ä[þ Û  &~eÕ:vÓçXß7ÕßùAí¡¬­RC7Ú&¬U . ¡ÐÆ4*•J˜™™^ÓúV*•H$P,ªª"-æŠv@UU+aMàD.T|@*•B2 Äå•Îþ" À cÜÉ0ŒÊ>}ÓÓVÝ~ê”=¶Ô|ÓµØûümÈêòk§sÖN­f’Hí 3¶‡2‡U\…̇N@ûÊ9пùjફ~ò¿»YÄ‘N[Râq@ßòSH>ñ‹P"Ûª^ÊÞNílàò²#‰TUä[^‡Z_ĺ$íÖíh·Ër£F ~—{7¬wJ§ÓCŸ»jbI&1}è§ìd/¿ ã…í6Ÿ´ùÕÈÿÞ?Û «xŽ,ËPoÃ`·Û·Û·W†}ã3€TM\*…tRʪ޳[¶3®1ß6a ƒ}ßoµT*¡X,Vííê4??`0ˆC‡¡\.cß¾}Èd2]‡,ËV+¼[rÈ׆ߪª¯¼²ñ¥‰z0ŒœC&·m~©·ÿâ_³ºD³9 ñx)ÇÅo£*_mw%LtÖ0c[¬ö>(b(ªdÖ°Á>ãônkÈê{¶@ùèùÖúÊÍ€d ÷v&’ʾòûÚ‡ƒzFË0cÜIÓ4(ŠÒߘ7 Ÿ= ã_Ÿ³ŽQØ ù¹E|þG¡+ñêa°±bïx#ÔŸ»°É‹½@mcŠ Y–1¦9¢§t=‡ÕêÆ=lÿ>99ÙõÖ6…BZ“žÏR©„B¡€C‡°V$ŽF£XYYéþË’ËYMDC4ŒøÖ4@Ùú0ð^6ÈÐð ­GuÂz졉íƒqö"$—ë~M-¢VÜŠí 1 ˜>ƒÜ_¼„ø5‡T Éœb_)ŠŒØýFœ‰%a¸1î”Ëåú3üÝ4aþü¯ ýí‡ï¶üÔë~ö_军¥Á0XEÊöý‘Õqªi8€R©TuûÒÒ‚Á ÒétÇÄ£Ñ(¢Ñ( …BÝÜØÕÕUÕCŽC¡PÝqÛ“þǵïœ\3ŒøÖu@>ýu^­“+†R‡Ÿ¥ëº=çIzÝÅÈç­é«ÓÓö"¿D}3ÌØÞ³Û'释ÈmýäžþMÄny5Ì[½¦)…KPcnŸ®ëÝ/¶dÖ6^¦d-¤'©”ów@þö,«¼Ìçtò B¡€D"‰‰ ¤Óièºnÿ¤ÓiLLL`ß¾}(ôa«V_Šr¹Üô¾ï~÷»øò—¿\ùò‰å݈j¬¬¬àÀuó6†¡×ø€'žx333XYYñ`Ê™ûYSSC3338zô¨+Çî[~Öôç>UU±çìb ’d-’tìÃ\e28p?üðPÛ¯ú[Ð4­27» úÄMˆüæ›±sé£HïÉAþÍ›päáRr>Sõ·˜C:Lý®¿]×» œÍBÿ±[‘‹TöŸVÕJ#Œ|ÝŽ–;Q’w‰kðnëïŽzXççç›NºVUªª"‘H`~~÷Üsφ Òê ±¶¶†@ Ðð¾×¾öµø™Ÿù™Ê¦&¢Ñ(‚Á`ÕÐöaé5¾`ÇŽö*Üÿ厓XœýÑ¡Ÿ?†………¦ƒÖ·:L/¼ð‚½ç©/ÌÇ×ìì,&''‡^‡÷«þvêiHðÂßäźO‰ú»‹›v«¯õ·CG ëÙ}³³ŸzÒ/ÿ”wÞ‚øo^(g·gf¼ûB¯×àm{X …J¥î¸ãŽ–›šš²'roD«/hÇKks80yT_â@üÜÿÅäIýŠq€®cõòË¡ª­¶ž!޾Æ6¬E—Ú2 ˜¦5~zÚºI¹‘É* F¿c\ÐuÝZ,²™l¹Ðǰsñ#Ðßÿ1äõmXþâ\0lmÖb±ˆ`0Ø6PE·ÓÎÄÄ€êa ¥R©»/ŠirnyR_â@ü““l!OêWŒÀ÷\ÖLÔH?c°¶´iw½{´Û/˜¨?úãBË„5™ÄôŸ„Ýõߑ׷aqé5¼¼¡:mÖ@ €µµµ¡P0D8®›ç‰D:EaÂJžÔ—ø¸OyVßbÀ ÿøøîö ™t»TDým M«®#™X‡þÿ}’ÄKŽ~Ç8ÐÁH‚T ©û ù¯žÇD•šj;‡5 ¡\.·ma .µšÃÑ©ÙÙYÌÌÌ P( \.#`ÿþýn¿WD}Áø&¿ëWŒŸ>}_9ýKøy^ÄGô³þnz!¯ëÈþ›èoH õî_Su£ß×(ÍzWs9k‹¾ÅE®I@íu”°†B!$ ,,,4LHËå22™ ÂápW“ÄÃápÕZC¡î¾ûn{>l·û¼yã›ünÐ1þø7__ºÉéÚ4dè¿.B“Í"÷±o"û#iäÿn‹Ûoùذ®Q4M«ŽuÃdš$n¿ 4*:Z%xnn333Ø·o¦¦¦‡ í †—––°¶¶†¹¹¹¾X à…<ùã›ün£1nšÀ~êð›ð($i·ÛÅ!²õ£þÖu½nKóÿ‹ôWAÛþ'Xþü«ØëD®éç5Š®ëˆ‹1íºnÍËÎ籸ȡ3Ô¹ŽÖP(„……d2ÌÏÏ×݇1;;ëÊÜDDä?é4ðÖW ¿ök·¸}*D}gšfU¯“ñ_>‹È§ÿ=b·_Š#)·Ïލ ðbÝ4¡ßô{˜|GdŽ ît”°•¤Ul]S,íáÂ]=ŒˆˆHÈ嬆xs×$éCnŸQßbû½³äÛß‹üÏžùmnŸQÿèºnï5¬ÿäØûâg°ü&«Ô½ŽVAlqS;º\.£X,r˜#mˆªZ?X˜’ÈÓ Ã°/ä’™C€Égt]·zW5 Ó'ËùœaOÔ©¶ÛÚtªX,bffÆíòш³æî™usüˆüDÄ·i‚[7‘/‰½†õ?; ù-0Y¥žõ-a%""êçP2"¿±{ž` 'ò#]×ñöK.Aî+—@½år·O‡FX×C‚‰ˆˆMÊå°mÓ&·Oƒh`dY ±Ëžb×¹}:D}gš&.ºóNh|ynOFÀV""òœWÆ;.»ÌíÓ {ÿË\Ò·à6ä_ªŠå¿Þ̧ aÂJDDž³íôi¼ô®w¹}Da¯œzðÿ!ûʯº}:D}gšg×!PUÈo»ÈíÓ¡×vHp±XD&“iûBårÙí²‘O\ð½ïUíSIä¦iZÿ0 äLÊÕ—º}JD}'æi'“@Š{ Óõ­‡5pK""ê‹Ó›61a%_²›žFnÓ^ÔìHä ¦ibóæ+¡inŸ ùAÛÖP(„……·Ï“ˆˆÆ…iâÔúºÛgA40o9zÉÕA¹–sûÈŸºÿ~¼ã²Ëpë¢ÛgB~懵P(Ôm-P.—Q(Ü>w""ò#]ÇêåÜüIûüWpç—oƒyÍ»±È‹yò©ë ¼õ©§¸÷*õEOC‚<ˆ™™·Ïˆˆ|è›_û.¾øb·Oƒh Z™Ä]]Æâ_Ø»J¾U>ú2Oþ·Oƒ|‚«‘§l}øa<ÏVò©wïü¯Pÿ·l"+<ÿ>üèïpû4È'ÚÎa%""¦O\u\"ßúÒà7¹Òù™¦áÏ_ùU»}"äLX‰ˆÈS‰„Û§@DD=Ò>ù8^{ñiHÒUnŸ ùV""ò‰ûÈÇòù¼Û§@4Pñw[qù5ßsû4ÈG:NX‹,•ËåºÛnCDDDD4~ÌÇNã«ß'n~ÇŸº}*ä#mÖ‰‰ LMMÕ݇Ý>w"""""ò-SÄ;äQŽ”¡¾j›°ƒAÌÎκ}žDDDDDäa±îÂcß?ˆÝÊ/º}*ä#ÜÖ†ˆˆˆˆˆ6N’?vŒ=¬ÔWÏa-‹XZZÂÔÔB¡Eiø8]×Ý.¹¤Yž@Ô‹ŽVMÓH$ ìÛggg iŠÅ"î¾ûn·ËCDDDDD.1 ÃíS ŸéhHðüü|+++öãú¥P(@UUd2Àää$¿8äŒoò;Æ8ùã›üŒñM^ÓѶ6¡P‡B8F&“ÁÌÌŒý³´´UUqèС¾õ® bü|¹\ÆÌÌŒ7òôÓOC×uûËEÔL¡PÀ]wÝ…‡~ØÕóè&¾àÉ'ŸD&“±‡ß5“ÉdðÄO¸}¬Ãi VVVp×]wáøñ㮞G7ñÍú›:%êo·wÝ`ýMƒ ®Á»­¿;êa¬U€çææ0;;‹b±hßÞl˜ðF”J%LMMaÿþýökƒAd2D£Ñ†ÏÙ²e ¶oߎÉÉɾž ùÏÄÄvíÚ…Ç{Ì•ã÷ßpÁ`rr®œ7ŽÉÉI<ðÀ®Ÿu8 R(ÂÚÚškô½Ä7ëoꔨ¿·nÝêÊñYÓ ‰kðnëïŽzX€=®½vk›~ ƒ˜­zíh4j/þÔÈ–-[på•Wrȵ ±k×.\rÉ%®¿Ûø¬ žp8\·ÅQ­p8Œ‹.ºÈµã³§A …Bصk—kô½Ä7ëoꔨ¿ÝZÄ‹õ7 ’¸ï¶þî:a†B$êZ• IDAT¡P7ô@l­ÃUøhÔ1¾ÉïãägŒoò3Æ7y‘'Ö@ €ùùùª¡ÇKKKöŠÅD£ŒñM~Ç'?c|“Ÿ1¾É‹:žÃ:L¡PSSSØ·oÂá0VWWétÚíS#Ú0Æ7ùcœüŒñM~Æø&/òd ³³³ˆF£ö…ãâÉOßäwŒqò3Æ7ù㛼Ƴ +`MÌåäWŒoò;Æ8ùã›üŒñM^âÉ9¬DDDDDDDLX‰ˆˆˆˆˆÈ“˜°‘'1a%""""""ObÂJDDDDDDžÄ„•ˆˆˆˆˆˆ<‰ +yV""""""ò$&¬DDDDDDäILX‰ˆˆˆˆˆÈ“˜°‘'1a%""""""ObÂJDDDDDDžÄ„•ˆˆˆˆˆˆ<‰ +yV""""""ò$&¬DDDDDDäILX‰ˆˆˆˆˆÈ“˜°‘'1a%""""""ObÂJDDDDDDžÄ„•ˆˆˆˆˆˆ<‰ +yV""""""ò$&¬DDDDDDäIžOX‹Å"J¥’Û§A4Œoò;Æ8ùã›üŒñM^q®Û'ÐL©TB"‘@±X¨ªŠt:íöiõã›üŽ1N~Æø&?c|“×x¶‡u~~Á`º®ãÞ{ïE±XD&“qû´ˆú‚ñM~Ç'?c|“Ÿ1¾Ék<™°–J% LMM¢Ñ(òù¼Û§F´aŒoò;Æ8ùã›üŒñM^äÉ„uuu …ìÛB¡ÇÑ;‹E¬¬¬¸},oß·Ö\?•—1ÞžŸê´q+/ã»=?ÕgãV^Æw{~ªÏF¥¼žœÃÚêKQ.—ênôÑGñÅ/~º®CQ·‹0pÇlj'Ʀégy?ŽGy›6mÂìììÐËÒK|À±cÇ055…íÛ·ãÊ+¯úyÛ—¿üe·OadË«ë:{ì1×ÊÂ:¼=Öá½{øá‡qâÄ lÛ¶Í•²ô߬¿ýmõ·[u!ëïöXoìµyä‘®ëoO&¬år¹é}kkk ¿,¡P×^{-vìØ]»v¹]„{úé§ñôÓOEYû]Þ§Ÿ~Û·oǦM›\)K/ñ ï|ç;qÕUWáÒK/Å¥—^êʹÓK/½„ÉÉI·Oc$Ëð#?ò#®•…ux{¬Ã{wÅWàĉ¸ä’K\)K/ñÍúÛßQ_qÅ®”…õw{¬¿7öZÛ·oïºþödÂê†P+ 6¼}÷îÝØ½{·Û§NÔV/ñ ÿøÇÝ>õ¡ ‡ÃnŸÂÈ–×í÷Žu8 Ò(Æ7ëocýÍú›Ë“sX'&&TK(•J-/æ‰Fã›üŽ1N~Æø&?c|“y2a ƒ‡ÃU|5MC$qûÔˆ6ŒñM~Ç'?c|“Ÿ1¾É‹6­¯¯¯»}‹EÌÌÌ  Ú“¼šÎï#%Œoò;Æ8ùã›üŒñM^ãÙ„°&~‹Eîé'ê7Æ7ùcœüŒñM~Æø&/ñtÂJDDDDDDãË“sXÇM¡Phz_±X´[¸šÝßj_¤v÷[»ò8ç‡ò’¥YŒû-¾;)“xŒ_ÊK¬Ã›=Îå¥Þã{?ëÄ÷(–—X7{œçÊ»N®Z]]]ß½{wÝí=ôÐú 7ܰ¾{÷îõÝ»w¯ßpà ë=ôPÕón¹åûþ|ä#u¯ÛêþakW§¬ß~ûí]•Çk奊F1î·øî¤LãÛ_X‡×cŒûG/ñ=ŠŸõFâ{ËKÖßõ¼Z³‡Õ%¥R š¦!‘H4¼ffápº®ãÞ{ïE8®zìüü<‚Á }±XD&“éøþakW¡P(`ii©îöQ+/µŽq¿Åw'eß~Â:œu¸Ÿm$¾Gñ³ÞH|byÇëïÑ«¿™°º¤P(T-^{_¹\Æìì, `jj ¥RÉîn/ ˜šš²ïF£ÈçóÐö~7ÊÚªïq+/ßg>någãöY[yiü>s·ËË„•ˆˆˆˆˆˆ<‰‹.‘'1a%""""""ObÂJDDDDDDžÄ„•ˆˆˆˆˆˆ<‰ +yV""""""ò$&¬ÔWbÓ`"¿bŒ“Ÿ1¾ÉÏßäg~Žo&¬ÔW333öÆÁD~Ä'?c|“Ÿ1¾ÉÏüßLX‰ˆˆˆˆˆÈ“˜°‘èª/•JXYY¦iö}ÅbKKKkÇ.—ËÐ4 š¦ eØ@©TòõªæVŒ»ßc|œ°'?cýM~Æú{´ëö Œ“™™D£Qhš†P(„B¡€p8Œ`0ˆB¡€@ €L&ƒ©©)ÌÎÎöýس³³8xð B¡ý¥ """"""¢Z\%˜ˆˆˆˆˆˆ<‰ +yV""""""ò$&¬DDDDDDäILX‰ˆˆˆˆˆÈ“˜°‘'1a%""""""OúÿHøŸaº}àò%tEXtdate:create2019-03-28T17:36:52-05:00ši$%tEXtdate:modify2019-03-28T17:36:52-05:00ëNј-tEXticc:copyrightCopyright Artifex Software 2011ºÅ´1tEXticc:descriptionArtifex Software sRGB ICC Profile †2tEXticc:manufacturerArtifex Software sRGB ICC Profile\~=Ÿ+tEXticc:modelArtifex Software sRGB ICC Profile1(‚¡!tEXtpdf:HiResBoundingBox1080x792+0+0_Ýx+tEXtpdf:VersionPDF-1.4 G:xIEND®B`‚blis-0.6.1/docs/graphs/sup/000077500000000000000000000000001360743507500155045ustar00rootroot00000000000000blis-0.6.1/docs/graphs/sup/dgemm_ccc_epyc_nt1.pdf000066400000000000000000006776311360743507500217260ustar00rootroot00000000000000%PDF-1.4 %ª«¬­ 1 0 obj << /Producer (Apache FOP Version 2.3.0-SNAPSHOT: PDFDocumentGraphics2D) /CreationDate (D:20190828165947-05'00') >> endobj 2 0 obj << /Length 3 0 R /Filter /FlateDecode >> stream xœ¼½ËŽ(9rm9ϯˆ¡4¨Óþ~Lè ºÑ›@¡%©„–² Û“þý¶í|ÙÚ••yòV”dDØ¡;N'‹Æmÿã‡ùcŠÿûþs-×Ç~úaú6/ûó×ñSüùüp|›ïeß¶ú7ýz¬ëµ®ó¶},Ûü휷u¿ëe>~7}›žÿËÇO?Ì×>ãOÿYþtßÓ·ûùßzÅߦ÷Xèßø¿øSTûaù‡T‘z™ùãÿý×ûáÿüÕ5ýk\+þþ±¬s<÷6-ÏOÿùütnñƒŒÏÿþÃ÷Ü#^Ëv?õýãÿôÏñïÿå‡åããÖÿO˜ŽmW¹ÿï‡yúøß>­Æÿëß2î§ñ¸í^ó·¸hyÌöËr[¯§Ýý¬Ëü­U þüµ÷?¢h¯@ýåKkP›ýyïíçýÛ1?_ítºÒêÏíë_z÷Òäåöõç×ýÿ—ÿ6Ç}¾-Óõñã¿EmÊõÿøsµº®¸Ê¾NQ­åc>§ãÛq­Çq]«*øw?ŽÁûwóÿøñ§ú›ešþöãŸ?~üÇþþÇç¹ÿò–yݾÝÓrŸç±ÿò;mßq§õÚ¾ÍûW¾Î_~§Ãïô#f¥ÿʽ§Ì_8b¦g\F?ý²éa¾¯oË=ç_ŠU¶¯©Í±Óß–ky×&Û¾¦6ëòíú¤Yꟿ¦S8âÛ¼¼*’m_úmLßÖ{šæíØ¿¦§êÞÖSQ‰¯í©OmØSY›/í©OmzOeE¾ª§>u`OeE>ë©¿`Ö=¦x€éÚïõÔoçúm:bU°mû{†š>ö2Aýê‰Ðn³ªyÔõXÍþo³ç·sÙbÊݯ÷mîv›ù·>νqŠY~ÞÃsù™ûüÆç¹Ž%œÜéqsæ6 çqÄÂ±Š††#Ö—™Ëùñ»tËT™üO¢ÍáÉlÑyïèˆóžÌ¾oײ½«ÃÕïª'óÿíýßÿÿþªOv ¾âÉ£i÷uWÓûñí:Âs;Óïªÿ<·ü—?þëO?ýþOúýþð‡ßÿtüé¿þã¿~“3ÅÏsúóI¼¨í¾—ãŠÕȦ–Ûæ9^”»s¬÷üֺ8^¿E£Æº%^PÔëÛ4ÇG®éc‹v ÿvš9Ä1þžÛ|Ä-[t¹e{Ö;Q ~^ö)çjºV»Ô€a›ã^×\.ïmbÛ®n¡¥KTeš®Çv~Û6½îM®èOØÖSË7ßâ×}ÖHx};î(¶ì¥&w¼¡(¶Nã Mõ—b»v•Óg"¯;¾ÅèÂO5÷hÃCÿô8Â6E«Q…ýxló·M·ºÆg|GŸ=–èO]ö%BÍ«n¶{£‹):Ë©b÷®¯ÿÛÍwMw³]ªÚv]í¼£˜šA¶xØ+Êéî²­úß~=­²ïß¶d'Ý}^â"Qǧβ1'îѾ›F›x[[ý‡²ßº¦ý8§XolÕt«¯\§ª¢/<óœÖÇv};£Æw<ñc;ô¨{©ÉýmŸ÷:܇éÖKØ÷ç½1 ª«œk¼ X.w4f}yúr‰žwÄ~?dI¾—»ôX‰µÜq¬W±-ßÎè&W|xO¹¸Ö~M¡ZŽõÛ®çŒû”‹¹8æœr§íÛºíµÃ†)^Qü³ÒŽ1FεCÅwñmQOX·­šîhµëˆg‘- g{ÛG¼)õ’X¬½ËѬ{í°óÕºÎø®ÓÏsצãÔå’ÑŸ¢—\çYªï(:âvTÛ¯cV§Q¹u~:•O.&­óÚk‡µªœ± ×ÀütذÅã•–×}ư}Çýîè÷ºfø WŒ¢ÏëŽù¶~áó«.§F‘½öذíz[ó¼W›:vé±qÍ;^Á^{峦žâÏýæŠÎùy ç/eª=ÖŸáü¦Qäé±~É+F˜»öX„øUCÊÓc½\´ÄÖ<”°-Wtˆ£¼ÙkŠáf¯]6lQÇø^ËÈW ŠWþŠáq¯c¬¿œ°Å4Y{¬ÛÖx{í±Öb¾mŒU —ø ¶íy²k×[»ì¥F Ûò£jmµÛ]òWÚ ;|J¦{ïcì$§*ÞìUjrÇxÓYMóKÜï€ãÁâ}ŬS  g)²É3¸îõùócMdq+Ùî+ ²¸ÕTdã¾³^Üz<¶˜Mî>Êâ©cÚý¶V úªaLVSeU•¹²ÉKh£¬_òŒÞÖFY·é«hì?úCMfí~á-kf­YæùÛ´¶a–sl×Ú†Ù°ÍWÛŸÏ.¼€hÁ6Ìz¹ð²öiôYÜ/æí}}åäØß£Ï¦~)›†²ÞgS¿\ç㙸{ŸE=ÃÍ:÷ÞgqIùxm˜õ&‹ÉáÚF§Ïò,nLW_ÚÝÔ¯î}ôÙTìAÇþÄáW¬ÍõNÞÞXï°ùm‡íž÷ÑaqÍ-–aÓçv‰…|óc­zá‡-÷è¯Ó.¿`~Lá[­ûp ¢Ábê8Îj»»kWÔx»ÞŠvŠùu»GoÍõç4 £·Â&hNA®JØîáÅò~kxXÇ=zkzº5¬s>ÁÝfeÙ6õÝáÀ¶ëÍ'ç­Év¾ªí¾Úë69Ãi€M-¶ÆtÕœX¸˜­ºKg]c¶êN¬<—pïæèo²mñ¬Ý‹ [øùû=ßWµ]Ý‹ ۹ȋ}¦¤5XDz¿nk«­{±òï»­‘âyb”Ilîw±¾™¶<ÀÒve7v §öÚžMüÔ×E>ÀÆÚjÛÓ›&Æ5ÖVË‘X” çêø3lØ®# °°EKœi€ÍŸj,®¶+ °¹œ–}W`óÀ‹«éJlžwÂvÝi€Å5c¾šÒ›Ÿ/VWû”Ø<âÅêjîO‚5VWó¼}êÈv/i€E9QŸÏØX]íK`aÓÂ# °¹9a“4Âæ‘/l÷–FØÜœe{ÌgœX]í{cóx««uOcl~±¼š4Ææñ$l÷™ÆX\3¼«sŒ±¨|8Wgdq»p®ºëÞÕFÙlzäXYMÙ…Åݯº(Ès@¬¬Î)‚óžð^ceµÏÓç>A¬¬ÖìÂæJÆÚ­{°^Lš@A¶ÅÊêì¬ÍI±²Ú»ë¶ð¯Ö òãÅÊjî¬Ma»÷ pÍ𯲛æXYí݃õg+{°ùã•Õœ=ØéîãÙls5Hwãb[ßA&ÜŽWpÔ …p׺]†»»‚>âË®ßèî.o6ÞæYYrÆ»aÛ…aŠ·I¼ל4ý—y–|7Ê“°ãUž<ó]{ð]]3<ⳡÌÌwuM>¯˜|מ|7üÒhƒh™å xíùxå—Êú„ïîZ*ÄEêX¾kßø®ÛàÃÛÂTé)¯}¼»êNxåh¼»üÈ£¹‡¼V•xmHà•ƒ*šå,—Ë€×^ïÛ6¯:fxæÕ àÕ“ÍmåA¾«×6)g/WÌ|×:óÝ]£ŽÖŒKc¿ƒïÚí2ÞµÞµöÞÕ˜}k†;ž ¼kð®õtà]·e¼¿Äg!o¬\3ãÝøåh“—ã]ötÐ]{?™îF©XÒnwYd“îÚ3ÜU%ï3¦‚­°Ì wísܵa£Â]=ÿ½¶°&Â]›2wm,ܵ^¸kÃv†»6Úîê Öʼxd„»6Uîšs¸kŸ*à®Up—Ai¤»vMÐ]›w5ƒŸ ïÚ\¾ëuÉ|7îŸÇtî¥XÆ»ê ‡ÜóàäŒwÕ}ôQð]ó3ÀwmßÍþà.fÒ]á‰MSYy­îúŸ3ÝÕ}ÎCºK÷ t×\JàÝ]Ãß®âõatWŸr<ú^H é.,¤»ægƒîƇ®u@ S'ÞÕ 0)g{ã]-äb :ײá¼k÷Þݵž˜—MôËñ®XNøïraE¼¶K°ö-à]qž÷ïªÜ$G»²ØŒwµ8¿cn©ŸðnØöXð/õóÏx7.nF,ÒK×ÞÕHwŠ,¿xW´B“Byµ»?«ÿ^k¥ˆw¹..Ç ò-Òñ{𮢃çÿ«2ÞëcýX~ç]îႼ8ï9pä‹ó®†9éÕ¦¸F–ÆWé%Ë)j^ Ho¬¹–K´­Ä´’ôFOÔ¨Sz¡^9QƒÒõÆZ-\€ýªX¨wÖ{(ލW‹ì(WauF½–»XÃÉ2ê•mÞõÅlQï 'Öûº&Xï"_5>¸½pàÌzÃv(»Â"°^}’Kw«Èzã³[Öîò:ëÝU±Ê÷Àzc\ å®ó¬±ÞX-nüôj¸ ·¯#O€Þ˜>ôÎF­ W#½^iù  z…°rQ› 7ÊÚ˜˜·O@oøÌt×w4oÌ‹³@W…þ™óFwu„^ ÎòŒKÖmp^ÁÛ5Ö½óTl™ó:Μ×qt½ñQ•æåèñpŠõFm1'½‡ÞZY¡‘ôžÂ’ñè•IôZ]HzW9x[]Oý Ò+÷ëîC Io|áqÏÍô‚R‚óë=µè›ÚË#ì%5ÿ5°wO;6 öî"`[]Šì%h'ìe¡é„½‹š£ „½›|’¾WØ{jò{œo†½F_A{è‚ö®Âaû]{3i¯U´ Òh/fQ£½ldÐ^ޛ՛K“öÂ3Ø; }Déò†{£*Q¼Ú€½Q•M{‚ö:xΰ×ï—`o´f,œ¢;ÝoØ«•Ò¦bµÅö¦ã.¿˜öÒ'é´ß”Á^#¬€½t4{gé®ìÌËMÂ^.a{ gØ·ÛÉì=ô ö£'/Ø‹Çì 8ºd¸ { ÷ {åÜi÷®ÁRÀ^B ‡½yuAÞË— ÞkÎ+/ðy/—Tä½\^dÞ«I{o1æ/Ü›ã^dàÞän;®¸×°á^°càÞøJuVªŒ¿Æ{c®¬,‚¼× næ½Dä½$ûä½|¡¿†÷æ9yoZ=öòuöÆK‹W¸Ô!a/áa/ް7ƪ3¾Æú„½¤R¤½Äx¤½\E“ör÷´—¯Ž´—p“´—Œ‰´—U!í5t;hoFx¤½ì±¤½ä¤½ë  ŸÐÞsÙæYI{‰{¹‡å¸wRøMžˆ{ÉÍ÷æKÞËû‘÷Æý Í%jŒ¼—–¼—Sy/)y¯ÝÏxonNòÞC;ñg=di¼—ŒÀWkèh³:ør uà›é,‰/‘‰o8”sôï:›‘ø4­Ä—FâK€FâËm _6$‰/;&ˆ/Û‘ÄwÕÙñh„âøÚíŒøZƒøÚãøÊ_-ƒ)‰/_œ_ÐY_¾8_>‰ï6HÅ‹ø²£ø‚q’ø²[’ør0ñ冉/ù2€/÷÷È|±9KäË»uäK"MäËÑ’È—ƒ6™/v«ˆ|9G€ù²É|ù\@¾ö\@¾ô ˆ|¹§IäKg‰È—Ÿ8‘/¿9"_&@¾66ùZŸòe/ñµÄ×ú$ˆ¯}r™øškâk{ã ¾Q®xçóørü"ðÕË»ô‚ðe×ÿ^à;uà;Ç¿ÿ5´wP[‡½±BšaU£Ø¨Ý ˜ìýŠÏDíuÉ[ÂYlI»ámÚ â ‹¨`9° í†K3ôÚCç Ýp=kÑù3Ú«pÓC(¢i>dñ†S@înëHŠ7(„Uk·›{càhApìlš—¾þG`¯†ÉjÛó!²WC¯ƒ A{cÄŽáéüT½!Fú³Ïk„½qÉYF‹?ΰ7.§ùb€½oS ìåÝ2ë=EOGä5ãzcM `îJ‰{£sˆÔˆ»×»©¯ÕÏËÂz£«i{ª·"¬÷™Û ð ë½î¹m¬¿Ãz'5ymÀ½‚lw›Z ÷ÚíÖ»)ðµ­o ÷Z0³áÞMzµW÷®rûÛlBÜK˜ ÚkU!íe«8íÕÛvhoü²NKµH{ã"Ñ;ûh¯ß/Ó^ë'À½^—Ä{U¯p¿Šïµ Þk±Üà½ìÀ½Ö!*îµ^ ÜËWÚ«bó‰íµÞÚkà?Ó^ûA{í±2ì•Ï8O}ÿ°×möÚÃeÖ{Ššž-≬—C PïÓ_ÎÆÆˆz­&@½²G‹"!ëµ—ÖkUɨ×L™ôú3éu["½ÖO@zíÍôZM2è…‰œ—霗-EÐëE2èµ;eÎë—Ìœ—/œ×®˜1¯_1c^ôcR^3eÈk7ËŒ×nÆË×BÆË»ñòMñâ£!áåˆ@Âë—Ì„÷mˆ×¯™/' ^ðÌxõNcIÝ@¯ætmï ÿÄK/G‹Jxí½€ðr, áå«൚ðZOÍ€×k’/{ø®µ!ø®U2ã]ŽïÄ»¼¯àµàu[¼Ö^¼nË€×mðòÉÁwíé2Þõ§KxWûõŠgj7ã]ö9Ð]Öp×zà®U¤Á]kEÀ]»Sf»ôïÉvu†-ø¶<Û ÛªS uU›Ùn˜–¡gG¶k}l×¾A°Ý·m°]¿ff»fÛe«íZcíú%3Úµn´ë¶Œv­*™ìÚëÙõjf²Ëµ È®×$“]«I»4ëZEÀu­/W®ë%2×u[æºnË\×® …™ëò£Öõ+f¬ë¶ŒuÍÖõ'ÈX×Þ °®}Àº6ëZGÖõûe¬kýXמX—ÝX׫’±®?^º~·ŒuÝ–¹®ÙÀuííìz¹ vù`àºÖVàºnË\×.™¹®W"q]{kàºnË`×î–¹®W2s]¿dæºV\×m™ëº-s]VXת¬ë—ÌX—³0°®&²{ï',€u­X¦ºÖ•3ÕµRêzý3ÔåIuÝ–©®Û^„ß§Ò0§0Þ_£Ç+½Œk)Ñ/¦žÇ1N&“én°Å‘é®’¸–ÊL×ÎÙƒé:ÂpÏõ¨™®ñÓ妙.cgÈtcº—lsyÒëûÝ£à€tU/A©Þ¤åö&_ž+‡ë>Ž:â“çnÚ©˜z0$x6Ê5ñ‡Ìsã=ÎÚÜ©‡“tWE<¯gÿÍù¶ ˜ž»J,9‘ÔÌs× DYyîªÆ‘pñÒ¢zÐÕÆ£¬Ý42ÐÕ*E/­…ýf ¶Uo¾¶€îª(ïú¢ è†iNçýtcuލ&]Iœ\#h@w‘ÖèˆaÐ]ôK4BÝJÐ Û²J†ân±½èÆRñÔ@uOïøÝEߊbm×wün\sZG´'ˆ®–Ÿ“Äž÷wünØŽ¡øzf ¦EžÛõŽÞÕ€=Egž+Iêk¨W€ç†MÊ¿-<6ó\­ŸE2j0$x®l"uS<׫’ynÜîÒAï£fèšvºR­^´<÷eœîº j¼"†ãÈ#Õx£qöq¬‘j¼fË|— L×lPã5[滦 ¾‹\Ä»&5 ¾ë—45^”K€×DOxMt„×Ed3â5? ^úkˆ×/—¯é{º/l&Ç‹j@Žê¦Æ›e$]6SãMÊ”.ÆK´xqEÓâMÅÀwM€×-xß¶xME€×mðú53áõr™ðz¹LxMì9^SáõÛ™ojM ^S=âu[A¼V ^·eÄkÏ Äk•âu[B¼~ÉÌxÝ–/3ñš0(¯_33^{m`¼vM—âÍuãe"2^êãu[f¼ÊX¢#œw #ÎŒ×ï—!¯—Ë×ËQ7k°º/Š™o~@^«ŠËñæk6ÈËü"¤¼òFÎqr”— ?HyÝ–1¯ß‚¼¨½ ò¢˜ ò¢\æ¼þԦȋkþRE^S)vEÞÜi]‘ʺôºœm½®kмyªpEÞ<æüE^Ô…Š¼xtSä͂տF‘7É¡¿y“³Ñyïim§¾¿—õNßÇz¯AM_ÙV£KN]ÝŽ¬÷ùØ–žP‹¹×‡÷Ks¯Šjœ"ëÝ:ÐpÔkž‰zqô’¨×4%Áz)SBÖ«Ùö¸:Fͨ×$%A{uuÏ ½T>!íõkfÚ˳†¤½VÍL{ý’‰öú3î5ñNà^j°÷J»e·*a|À½öä ÷Ê5Yu2nyç^ÃÁQÒ^“ÑíåiZÒ^gž{·kC•‚I{£Ø±T ±èΘ(¯]¢¼lGªòâ(´‰5Lú®î&t­¶%UyYMªò²*Ôj˜u6£-¡M«Á® ­»&´ ¹cb |APk°KB­J=¦Ö€#ì¦ÖÀkR­A| S• 7µ>ô/—浫%Þ+õšC»¨MÉ!ñÞ—mðÞ—)ñÞOlø¾l ø¾l ø¾l øº-ßOlø¾l ø¾l øÊv†ÇÐt‚2ð}•KÀ÷[¾/[¾/[¾/Û¾/S¾ŸØ:ð}Ùð5€ïËV€ïëÏ ø~bëÀ×ZÀ÷eKÀ÷eÀ÷eJÀ÷U•|?±uàû²%àûº_¾nËÄׯ™‘ï'¶Ž|_×LÌ÷U.1ß—-1ß—-1ßOlù¾l‰ù¾lƒù¾L‰ù¾l‰ù¾=1_/—™ïË&æûúcb¾/[b¾ŸØ:ó}Ùó}ÙóõçÊÌ÷U,1ßOlù¾®™˜ï«\b¾nËÌ÷eKÌ×ï—™ïË–˜ïË–˜ïË–˜ï«. ú¾Ê%èû²%èû² èûº]‚¾¯b úz¹ }½\†¾/[¾¯?'èûºU‚¾¯r ú~bëÐ÷eÔ÷eJÔ÷eKÔ÷eKØ÷[ç¾/[â¾nËà÷eKà÷eKà÷[¿/[¿/[¿/[¿/[¿ŸØ:ø}Ùø}™ø}Ùøu[&¿ŸØ:ù}Ùòûúk"¿/["¿/["¿ŸØîìÐÂ6ÈïË”ÈïË–ÈïË–Èï'¶;;´°%òë¶L~_¶D~_¶D~_¶D~?±]ËŸh3ù}Ùù}Ùù}Ùùõ¡*‘ßW±D~_¶D~Ý–ÉïË–Èï'¶ë“N›ÉïË–ÈïË–Èï˖ȯ·H"¿¯b‰ü¾l‰ü¾.™ÈïË–Èï뚉üz¹L~½\&¿¯r‰ü¾l‰ü~b»÷?3Ðfòû²%òûºf"¿¯r‰ü~b»ÏÏÚL~_·Kä×ÊüZ9ß—íõÖ¾üþF±^E§ÖÔ^/±ÞIÉЖ½ÁÚ,Ö+Šy¢TëUj8Œçäú ‹o{ ìU‘j½»f¥”µÞI‹"…,•#èPëµé™"À˜˜*¸LÌô2µÞxôsÐBªõjÇ áS¨õš ‰Ù´=68(°:Ø:Q­7\Ó³³=Šõ*¦t$ò¡X¯Ù Ö«m°J3Žbqý¦ìM,Û<˜rÀaÚ÷®áhr½—4~Û® °ØÇØAêØ‹d¶eéªp$Àúˆ·Á¨A€­A€­A2ŽKn×PöÛel»'@ÀQnÈDÀQîû{DÀ^.#` mG—µ$¶— lÏ,ú4„X€ý’«§ -"`ë–@À…™:Ú¶—— °IÉ€‡mVˆz=ïì—ÌXhíŒ8lÑÏ-Øÿš°F…eÊ­ ÀÖVˆøõZäˆß°Å×1t~3¶Ûåˆ_¿$°v^ÇN °ÂJzä˜`ÅVÝ„üªY–.*jX3Ó þ$À|àY.»&7B~9X"â׿ìyøµn‚€_¿døµ”~½QrÀ¯õ#òßIúë}_‚ü3ñßø%E.vþ«-ì¾iaü×.þ«ö¤þ þk6ð_³eþk&ð_³ÿjë<>é’”Õø/º¥ñ_+þËû‘ÿ¢Ëÿ…Ï`üWÿ°o6ÿe=Éíšà¿fÿ¾]9vü7æØ©Ërÿµr™ÿ²ÉÈ­YÀíñÀ'=kÛ×3þËkVþËwCþË^NþËÊ“ÿFç¾gü—•ÿeS‘ÿÚíÀ_¶ÄùJÉ­*à¿,GþË®Gþ˺ÿZ9ð_v!ò_»&ø/›šü×îþk×ÿµgÏü׊ÿZUÀùjÉyMò_³5þk—ÿå[%ÿf ç») ’ÿâ¦ñ_~$à¿ü¨Èù1’ÿZ9ðß—-ñ_³ÿÒFþËG ÿå›#ÿeS“ÿr| ÿµk‚ÿš ü×® þû²%þk¶Ì­YÀÙ[ÈYMòß—-ñ_³UþË;‘ÿ²†ä¿|0òß—-ñ_»fæ¿V ü׊ÿÚƒÿÚ5ÁÙ‡Èùÿ²Ï’ÿÒÕ"ÿµk‚ÿZ9ð_³ÿš ü×ê þËv!ÿµzfþkÅÀívà¿|}俼&ù/{gç¿ÉÙÈírà¿ì ä¿vÍÌ­*à¿fÿµÛÿZ9ðß—íÎm¾&ù/Ë‘ÿš ü×® þËæ$ÿµrà¿ôÈíšà¿ìEä¿ì´à¿fÿe§%ÿ…Íø/:­ñ_ô–Âñ§ïÓîýãÓÇ?ü°íñ°Ñn?ýpÌÛóÓ>?=šŒÏÿþÿ}c û@pñòg‚‹?«Æwaí_wËs«w|žò¯}Ã[^ëc¶_Â_ÛÇÿW¯Á.‡»V þüµ÷yè쨿|i j³?ï½ý¼­‹/zÏíë_z÷ÒäåöõçOî_®øÇŸ©ÇúÐís‰oh‘~̶ͤ1ö_ת*ýÝUüwsü¿´L>~üéãŸþ&Æ­¿ýøçÿñ‡¿ÿñ—ÞiÛäãÆZæÞ®_~§í;îtÄäpÇ­Š˜Ò/¼Ñá7ú‚!²ËåîÒ†â/"Ó3†[Ú1ÌWÍÏÑ]3Y%`ûšÚ(Á\9dÿ®M¶}MmÖåñ–½"õÏ_S‘lÉ©èt¥W$Û¾ôÛX’#÷=U÷¶žŠJ|mO}jÞÊÚ|iO}jÓ{*+òU=õ©{*+òYOýËT¼Ò]BiZΪYµà?®]‰{Þ3Ôô±— êWσv›íœžó5›d‡þümößxís‡?q(Ïèû6w»Íü[g:´Ý±\:R÷s÷ùÏk<áëǯý™Û,þ8áÓþMY~×3¼®C9]ŽXÉžÇé·ÐžüËÿõ§Ÿ~ÿ§?ýþøÃïú¯?]ÿñ_¿É]ùøEGE·'gTzÀTI$ê;3Qo Déš*W¬¹÷½%.Þž\AmûØ”H­aìMù¤zFDä‚“éêš*T—‹:;º¦J¸ãQ¦å_Ûtj±Ÿå õM'ûYÑY»°%ëᦒ]PeRNéô-kµ]]Peº¿][ Ÿ’mIE…:®£HÞ”ñ¶ŸWq­ m:ÞÚUÂcÝ„3a”Mý¼ë©(‰¸6´—s¯¶&¤í[uÞ¢HqBÚ†UŒGÐÚâÝÚÞŠæ˜ãn)ÃâÃMaBÓúíVÚåÚ” E©K Åç(q9YF”âªÎ¦$¡a§G ÝOH×\ŸLQ(m[EÑ$:wlÅ´Œ¡[ÛÄ]-pSÖ Å[hsäPþöZbB úPéÚJ#íB“‚ß–¦V³)[MÛS¹Ïh1éÉ>:IÛyŒø íå¯wï7çÙÃîý Á™êK>Ϥ¥•N–ø²í¼FpÐV×ïßÎt6šHi¡ª˜ÜvM#2èVZI#–øÁkA·ÔB·» «ÉÔã‚n¢<1oOó]Ë R(ŽÎ¯°MiÚÛFŠ*¨ §g)´˜ …5µ:™ötK5<2nŠ;i›(·"£¶dêA·¢Z¤ŽýDslŠ:i[(Ö¸ :i;(·²…÷,ã›bNêŠúËH{²ÝÓº•&ùXªl•L=H-8Rêl÷<‚nmÖ¯-Ûü¿öX |tØÊ!W²ñ'~í@ö<÷6
rô˜ÉøVG"NFf†-îÙ£€nÅ ÆÐT$Ô¶x7-H·’"vQXØ8Ö6Mîõ[æžX¬½wÉMª¢EbS@LÛ-a³íâ¦m³D0ÒøÄÇO#Úã€,=ÖGUS¾¢¢º±ÏóõÑ[V*ãšoaDúijN11/Eçb×,c ziø™#ÌÇZ{ÞF”_J0šæŠþ)h¿ªEŸ¹Ïqù[‘´'ø]Ñ#¥ =_å!qÍ¢ê±+zD½!¦û`w´ígë@ÉEžJ¦ÞÃþº+t¤m†Üšíº†ìþ”µRqƒá•Ir—œœŠi{ê>+“}íØåíDg8×ù”7 ›ßÇRMW¶0õ 7i:;DƒË4i¢(ïq èɼK&Feí'Æ ÛîP OË-™ŸS;çÍ-Ø×YqÔÓ2ß§G2õ€)@KÙ´èÖì±Pëñ<ö>ÐÔšA¡"ÍÛ´î§H‘ælÚG¶>Í\{f8D#EÀ®8‘ù.=3Hê©Ech_c“õtMvçU®ÍT»&f™}½Gu²0õ0}ë÷Ôœ»øW#ŠÇªnyâ¹%qò¬öŸž´… $Oâéš|¬M"Õwíšüx¤ãD隘Xcm ¹ìÚ59„mûˆß±W¸#|‡³Í¾#zÇïuŽà1òmáɨ½·‘¢Ö]ºå­Tmz¿ÏÞõ®¸*Ô+íM)*$FŠÒ)9RìËÙáÔ.Sر6ßKUí›VžÕîZrÆP:î¡ÑwJÙgº¶G#j$Èëˆ)Û®\u2Ô’o«Cf,;$ÏQF%²ÞÛ)O¨‹‘ïOrò6d*!“>ÁÇÃÝ•¾»–J µ´œ¼»„AºoÉÊKåâCf¾ ”M’o™¼ºX!=y1J¿ä7p(5n2áíZçÝmÈÔ†…ºìÖL×݆L¾Etç’½BQ ݹÜF¤…LçÐ±Ž®y ™ÇHí'“>ËÖ9Qy©ÒÕ1Óü‚XÝİÙ:gîѱºY×6b²áÏe„æØ­”ú°;—fzöyì”Z¦{—V}„娗«ÔžG29X…)œÒ3éöÒC&ª.Ð5†Ìú¹Ÿ÷ƱqïšF,޽½kÊ"|«–Âýic]“DøV­9{_Q°Ç´õ~™[B±ÓÝûeš é1á2wfz,i¸Lͧ8e —ùs»Ž„C—>ü¼ƒcµ¸FŽÍ8±®™¶1dæÙòzhC&ûr,l޽™«¤Öî^Ä 6Ö¤Sï•ù%ƺ¦ߨS©'t“ß¡šæÜ½ñž0†LÜf7Ö®r}º·QFéö §æc¾,ww2ÙcÑ1¥3, èHNfŽ) ±]…sÌŸŽ˜2õ€ÞëÐù´îdbš:1º“i&½¸1bFÝK!PǤÓIcÄLoJ¦kã<²È´¾éL"{a;â û“]#Ô†®×ë›9¹™Één~¦µâ¬“I}ÌLÝæ˜u.iŒ™(”¸;GŒ#‚9y™É‰?Šï>?ßw²%Æþ +IKåL·•«ÓRÅ|ßzm[Á—‰–.‹ÂK£{—@'ÐRȼ4WÈ:`©düæ‡þM/Xºh¸÷µ`©Õ,]´¾>ç–Ÿ°T¶ùl90ˆKy¢¸”géˆKªÂA½JÄh©Ee¾®´TÃj|Ó½7$:pi ‘kx~õôq©hW|–Ë\QjÆ¥àžÎ¸t~ˆ\ϰB^z‰êÎKM2œy铼ûêVL/q»k‚˜Zr?0S% >ßIšJƒñVŠÚÒšjz×qÔ’;ÔTi—ד€M™ØtŽ:E{ß5ŽÜôUNcàøv­Êò|~8µÐ §*cúÖºPf§*7_ðtÖËX¿×c• §z‚c *àS•›×ž¶üÔŸ<T)]ž:yX1`F¨³Â†5ú–s\`¨*×OYDU¹Is`Í•)ª2%Ågj?cTµÙÙŽ!GU¹ãî§®3HU±ð5ú0•IªŠÍS¼×ú襪É.­[ê§•Yª6‘•è¢æmLU¹EŠð%¡vÅ©*ÓM†2OõFÎ@UåíçT6›€ª·q&ªÚ‘R$iÍû—‘ê#^*Ýúñd¦ªbñq.Ò&²êMœÑ*…MVU,GáÇ̘Ðê³mv*åF¥“‰­ª˜¦ˆ:b®JbõšÕIÊRtU”û‘ÀÕç\ZÙÞ/º:?‘ú1Æ–¾ ¼ªÛí¶ÕëÏ|õU.V•ëçØ°òµfªRáS´T)à¬V*ƒÖ§ØÒ³€´ªïq¨Uå&Å×uKe­ó‘Žám}Ý*áVÝ*.{×$¡à­Êt¦¡æB'àªb¥nh(#WKæjý.3WÛ5 ¯K³$*ú:´}VÚ#CW¯d¦®Ïݦ.ßìê·ËÜÕûy¯*ãüºÄ âÃÈ«:nÕ&w W•[çåÅ]u¯˜8æ:U¼>u<{ª××½zŽ2xå¶7À«5G¯Br8&Õ¿yU¹ùš[V8 ×ç³W†´¢˜PØ«·_†¯&Å úúÜHÎXQ~õ7ñW‹Î×âÀÎåÑ|”V ©Â5»ìëv‰Áz53„õ‘"SØW¹„a_å‡õzf«rÚ©þ HìüøÇcÜÍ(ÖÇ‹ÌbŸûi–]péS42…'x®ÓŽô)”A’õ-3Y¼eýé2•}KXöU.qYï-Ìz 3™}ÉhöHXÆÑì¡ã‘1Ƈ>£ÙW±g_Å}œ#‰£—"À³æÎ€ÏªÜzôìt´*·µ³ÇFh-K&­EíÑz=3¤õ9?SZw12¦U¹YkÛ⣂Ó>õ/k[€Ú§žZR¹Z¹&±Â=_œöi°©§ì¨•í’LLíz‰Ôú»Ë¨ÖßyfµÞ–Öšÿ]iíãîžš ¬Ê¸O•a­wƒLkÍã͸Ö#óZ;¤oS] lý­dl«r×ÔõšÀmÍ&¸ÕáÇ–«ò6Ám¼‰åÁá¥gep«%@¸Ów¸¹5¯ìVåÎë:«&àíó Zé—fÉðÖV8 ·¶`¾}T:ÿ^À®-PApõèÛÒ%À€pUnÕY°¢Ã“®Ë—Ëh@\½„eêÉOAqŸ¥¤²á1ª†qµ‚ŽwÔºPƸ¶HÇ}ÙÈÕny>u°H$WÅ¢ËÌÍgÉ(W¶yDÝ€å>µTž“Ú+3Ì5š+q¨‹–^œkÌ<×耮®¹Žw]åŠ^´‘X^)®ÊmÊZ\,0ÝW¹=çÈFbhR]è÷UW†TW aï\T× °®%1×MšÇe»~ÉLvULðµ± ©v[b»bh›2vTÖ­¾¤Æ37 ñ}(­€žEÅÆä®ÑS}ãÅyc Òò¥¸+Æyc’žÉyµøÓ(X@ɘè:¡y½"})b —‡V zyh5“ÞçùÐ2Ò Y+’Þ¹£ŽWíÞD½,üB½J[>{C½ÑdŠŽ¬‡rzÙ§ˆzuÂ]Y'Z nF½§ÌWSb#ê匹¾À^B’^.ÒHzÃïÙŽô*”X³þô&½»òØLmªí¤W~¬2ׇéµÐj^æP!éݤo5¤w?·¿föéÝ¥ò}¯"s ^ù¸S×#ê•åÒs õÆjV5‹£@Ô«EXÜg¨w——_A½P¯Öˆ=Iïö0Ì~æ¤w“þ•‚ƒK—éÝ´r>Õ!é]%†r5ÉD’^éÀÌ#ò¤w“DÜÕV¡$½~¿Lz×ÇKiÃ'HoTe’ƒQû,Ho»îµåP"éUUÈYG#^Åè÷˜%‚^ÝNQÎeîk w•xGö®ZÀ,mã’°W5Ѷ_]ùö µ‰ÄÔ•rƒ½QséN#aoY†¶/a¯Î¨¬#¾°w•ÊæR÷AÁzíÕ€õ.ÁN¯A´™õzM2ì Û±ö™…°×ºh¯Nû¾[â^3í³ÜÆ´7lÛÚ·¾H{ãù¢‹õpÐ^!Rù`õ®äkßï"õÏA€|%Ý·Xä»>ÎJ_êù.ׂóõºdæ«%Š"+*ÍóeOò]·­é3ñU_L*›¸$¾öƒøZ§ñõr•ø>~`_]÷ ¾~+ãJìµïOƒøZSøZwñµyÄ÷ÖvFÄ×¾H_·eâkÝÄ׆_û|@|ß¶A|mÔñµî â+%J-Ùë®&ˆ¯Úl)„òɯ?_†¾ºæ"ús[†¾ödè«æ\祈—“ùÚøækoÌ×Þze¾~µÌ|W‰U*’¯Ð`_ö€}ý¹öU÷š2öå× êëwËÔ×îê»HQ;aÑL}W)j%,š©¯}Y ¾Öó@}­úZk‚úÚGìë×ÌØ×Þ)°¯õJ`_ûÊ}ýù2ö5g)c_f}Wé¨õP r_JÁ}Wi?OmýÛ¹¯ ¥¿ö½üÚtðë·Jà×Üi€_k,€_² ~éÀüJØvï$œàWÕÔxU–¿1‹Ì{߯ øåÌDðK§ˆà×ï—Á/½q‚_:—¿«\¼Ý7€_Ý¿~Í ~ýù2øÅ¢ Üw•@ …’ûºÍ4=`ËÜ—sùQG…q ã{±ïȘ=Ç¿ÿÌW+(… «}_JÛ˜âþÓ•älmÜ¡‚çèÚ-l6+!h>*v!‚Àý>Š ÈYG -Dvõœ¥£Ë¬O2Éÿ(S9ãzœö†ßxVßþÃiïãŠÆ„¡/N{¥?¬o½F·‚önr箳kÐÞp§×%ïfÚkx ´×0hot[M^¯A{Ã6GÃ÷gO¸w“¬ÊYö|í]:u·ûM{ Lö#uÚ«õ“6$+/íÕºk  ½"AÂæàö†m–°v}q ½qÍhÇé,ÁC ½a:ç${i¯2Ð^B"Ò^C| ½v?Ð^CŠ ½ëãÛ÷PhàÞ°i0ið¸wÙÒ9Öû¥ð²÷Jæ¥o–÷úý2îr1°Ï-Ò¸×ïÜ{jÚ^×Ôœq/oGÜ{Ê+o£h¯u$Ò^Âeâ^¶ôÀ½¨DƽVwâ^³eÜë6àÞó™ŠÚEƽ^Aà^ötò^©Éjë¯Æïí}ôÅ{ñeïÅÀ`¼(Þxï=è‹÷Þš@ÛAòÞ]âö}N2Þ{«¶?Æ{yqÞ+Áü¤¸q¯†KÝ .ŽÀ{c”]tR¬…'Þ‹­âÞ(µê VWÀ½vEà^»d¦½~ÉJ{£Ä&Íý œiï&¿ýnYbH{›Î½ “örfî Ó¦£{ ÍfÜë¶Œ{åéLGÛÎ%îõ'˸תÜ« ‘en‰nˆ{Unî'bˆ{½\ƽÖ€{•VAgòBμ×m™÷º-ó^nêø²3øú#dàk¯ÀWo}ê§z|í øÚðµî àko¡ß¨û®Ãš5ÊÀמ ÀW嚃ëÀך8_{£¾a[$£UÇ_{3¾z3J‚Q×ø¾öÑøZoðµ×à¶iï14¾VO_¿_¾z§:-Wò¾~¿ |Ù+Á{íuƒ÷Zïµ.àk—‰¯ÕÄ×nâË'ðµÛ€¯ß)_ëz¾›& »–ïÝå\_ƒ!'Þ«W!L_{x¯öÒÏí+Íï Û*BV§ð^Ùz‚;â^Ý.Þí\¿Gàް¶-à6ãÞx¸IÁ!õm÷n‚×ç¾~ÍŒ{•F`³"gÜki/# H{·”fÂi¯?z¦½J*æ6bíµ7ÚkuÉ´WO§£ÇKŒÁ°×^9`¯½ò {ý¯öº-Ã^·eØû¶uØ»)gÊÙÎ߀õZör-MØ«KÎs?AØk/°×^ `¯õ;À^¥ƒ•óPq"`/_ X¯™2êµ·Ôk]¨×Ú¨÷I¨t´ó|D½| ½ÖÐ ½VÍLzùp½Ö\½ö½Öèµ·Ðë—Ë ×Þ(@¯Õ>q^kp^¿[æ¼^ËÌy9n‘óâ«'æeUˆyý’óò­óòó æeÇ#æEƒ‘ò²UHy7áÙµm´‘òîÊ„3DYÝ’òF¹ud·$åUFä» ˜’ò*“—Ζ¨0`^7eÌë¶ŒyívÀ¼ Û{0寮Œ{‹¥ÿ>õ†9…÷þ¹ÛY lЫ”¤òu«do—½U¿˜úYr€^ Sö–oœ×Þ(8¯†´}xfÎ#¡rƒµgΘ÷S[KäDÌë¼óšb20¯š¶ «"m¿Uâæç(ÅK½AQèÛÀÛ&~{*ú HKQü6\§ÂkÕÈ€Wi¨´…U»?äo­Yþ6zªö”‹@åou’h@}ÈßjFÛYþ–ªÃ¿UªQ9ÏŸÈß²5 «œºZ¸Ÿ¯@Þ[Ÿ•*xþYôo!ò ùÛè=úJŠÜ$åo¡u÷’¿M£ äoM³ò·ÐÌÍò·&›5p¡¢ žëB·ÔÀÍâ[ÐÀ55Û¬K-ChàR (7¯è{K;¬”;?sÆ\…Ê)‚ Á1hàRƒ ¸ËÈ`lW¯Z¯`8KàÆƒLÒ*BŒYwѰأAWç"ƾ>„pu`YQš/‚ë…²®vLGèAÓÁU Aíh¯/i†'ï6о½ŸµcÀ)„£Ð Þ4r“®½¥8㬄;I@;êV„k¡„k×ËJ¸ñ±êNÕ¤ÌàV‹ mÕž%ܘñ—”%\^J¸’§é‘R™ÙjœI§O3²U6ÂtÂ4ÛKc¼Âm¶°õ &^{i«¸¦½1ÙŽkÃtIÎþx á^WÉì\3­ ÓÙÎÛfR{+ñìÿL Vƒ§V MŸ6qZÕNAæU….cZ­¡õ™)í¥O¨Ÿñ)V'žnµNù`2£õ+%D륡½4IK4c-Ï3í¥3Êã yæ³JB™Žtg<ë¦Dgµœ.)"÷ƒ³—Ò´óÜ™ÍZ3šÕÌxéõZ°f"³º`4EUƘõ{%.k5ÌXÖM‰Ê^:‹µÜM}8CÙK¤Fu__ *¥ÅMUÎLV¥äõT-èŒdLjójj³™Èš)Y·$름ce"EÀ±—ŽÄoÍåo4V>LR—É0V”E^tô&‹½%@ÑCcãÑwMÒu¶L0öÒ b/™Å†iS¨~Õ Í(ö’NÁ²µ)6“XygŠŠªÕÈ$ö’&°byßR¸º`ÒHÊV¥®¾†õ & {IDh¨”dû6u{=ÞÑÜ4L3ƒ Ó™dH2‚ Óž22uS°ÞP‰¿ús%üªj$u’D_/ihI˨ ø™¾Z-2}îÓÈW¾¹Ì^âuMï,èUWR¬\U6ÍäUWJÂG¼º)q×·©a×°ÌÚ[¨âÒ»^RÿY[P×KÈM¶*ë&è¦Së¦òš˜«"IÆ$#W°2q•I ¤ú/¸ú½oõ{%Þê¦Ä[u ûKOÁ %Øê¦ÄZ½‚ µ^Ȫøb4ÇþRÃõ{%°ú$Ò–ƒSž8aÕ§ÃéO½U¢ª^‹U½1S5SBªÖ¸™¨z¡TÙgÀS9he"Ú¦.ìû½4uú>šcKø»Ñwšª±A"35êôÓTôMå)ÒÔXÅ×׃9ASȦjõ~oCîàÒTO°•`*ŹÉRã rwkþžÌR™ ,•ËN T,êRc˜~Ê4æ™8*—ØÀ¨1LÏò+LÊ•nf†¨ìä`¨iu€*a…ÒŸ/ý[Mrë ß™žjjTªº»Æ™xÊØé¥í“$CØ©PãØ­:õÛ$rªjw}ÅÌMu'í'Õ&ÍÜ4æúCqsMÏ7aÓp6}Ø-¥Y¢¦M¯'Ýa?ÖŸ¡i\A± &h*Óˆî4åphê÷JÐôG›—&ƒ’¡éó Ï13µW’™©.¨sMŸ0S¿Wb¦Ñž·²4¬;˜iÜj‘@HÍ‘–™©DNî³ æšÊvm}5•íØúlú²•òbŠûÔÂAN©•´Ë“Ñ©lrýŽ&©›bo^[ºZ5beS¤ïUqp ˆU¾; gŸéÜ:-;µ ÄŠ˜öFÆÃê—cïá>ˆ‡E1†ÃŠœ‹Õml„à ŸK ½g4¬™2AUv@!§º ‚*â.O n¶#öyбÍ`ذ%™XÄÂz± +—L«i&ä`XeDŒeW½MÁ°ÚŸÐ\ZÏ#VÅfɇ¾@ª‰¯3Vù÷>ô0–Yˆ{4¬þ,}½zìѰ^$Gê3I ¹.6 ë¶ «mˆ»/Œ†u[ކ•m]Ûa Fþm#V;k  ¶ó¸z6¢a•ñ‚ˆ†U9íJÖ3—ˆ†U¹é蚈†½ýûxÇhXíÁl}Àc4¬—ËѰúÜ›ûbxõ)‹÷ª©ÏpX/—ÃaµÁ#MçFss8¬Ö,‚~5æ=‡Ãº)‡Ãê’ÚJ¬‹r„ÃÆh¦D¯ŸpÖÇ4kò¸“üÿ1GÃ*A¥ÀcSÃÍѰoÛˆ†u[ІU*OñÕk/õËѰ*¶ô¹ˆÑ°ÊX¡eþ'IÇ^år4¬÷ö±qõk"ÖËåhX·åhX]S„´ª÷!ÖËåhXe‚K©å +ÛÐEA0¬ÖÃú"›Ànކµb9Ö‹å`X+–ca­XŽ…uSŽ…Uþ‘Ô' û¶`X¥»;ºZW†õ?ç`XÙ¦e ú `½\°^.EÃjkT¸·Dú-GÃjE’6% kåû¶pX]S»iáæpX¹8Z4ÝÚëår8,Ãvû¶pX¿fއõr9VõÔ¦zÝîA<¬Ûr<¬?_އu[އ5[އuSˆUÒxÇõ„#b½\Žˆõr%"Öÿš#b…»b¸l[Iˆˆu[Žˆ}ÛzD¬L’Ro »9$Öm9$V‰Ü´¥Pcòk6„ľm#$V©ò* ý0@+ÛÍY·¤«r³¼¯²“X·å˜X·å X¯KŠõºä X¿fŠõr9(Öm9*ömQ±J)uuq@Dź)‡ÅÚ%ëårXìý´ÑÕ¢å[X¬ÿ9‡Åj±uuŰX/—Ãb•¯MBM!ÅžM#.Öm9.Öm9.–6ÆÅº-Æòñëår`¬—˱^.Æz¹+›¶O›¸mŽŒõr92ÖËåÈX/—#c½\ŽŒå«ed,{"cíÑËNËÈX·åÈXŽcGÒ?ÿ>–û…oÃÅkçú]!V` © [ ßN÷¸ˆÝe’Ü2Hp"º\“è*Óâ%Oô|ÇÇ æˆKU”¢« ñ× Ü@tcPŒAzJØvÄÇjovîá錽,¦–ÖŠº·¦mkº·‡Nêµü`н=Ôõöv™º·ò)ævc)Îâ3ÐyÕ†—-ÅÙ-ö¦ë‹g"HI•)ÎâUjK¤á «é'1O¦8‹¯bëöŒÕ>òÞO*0BV;É­Î!ko¡EÈj—céûŒÕFnƒÍ!²"–ûNEˆ¬W?‡Èj‡|뇗"û6YkHÄÈ*‡ªÄ}ª2,bdû¤hÃÒÈ‘µ”vPBо£Î4qÛ¬„`û/PBP9ñ·*$%„ëYñEc—`ÄÉšÆ3eí-@ AþÍÝ»P Á¯™•d»ûy*!h;vl!e!{!¼mC Áî%Åx)ƒkÓÄÍJö•W%uò¹Ÿù ‚Úcig(„ÀÇöõºgìk-œ±¯›2öõJfì«OîR-à¾|iä¾z‚øZZ(€_>Á/žœàíEð˱ÄÀ/™Á/W¿\ý‘ürÑBòKïœä7*¦è™†¼3ù¥‰äWVçæ·ì­Ü\E©Ô—@ô«Ozn) ýêåÅK¯‚üêPÆÒõH~é¡’üÂÛ2ò/GjSÇüÚÝ~éšüZ{ü ýÍM ßÀ/ž™Ü—ÍOîËbÀ¾:9¢>_vˆ}Yb_¶%±¯Žö\k;!Gì« Uqàû'Ø×Ø—­IìË·@ìÏ0‚I}ùŒúf8íÔ×l‰ú@û-þEØ7óTþÇ^s™;ó %ó…ɘoæÎ|3°sæ›(¥#ß\}G¾nñÍ5qâ›kâÄ×h0ˆ/Êñ… Ä—ìð—#_²Cg¾¸˜/Gs‡¾À¦úr 'ôå% }‰ù}­\ƒ¾h„¾Äq}g½ñ?Fè+( 1dèËÍ,B_n‚úr“ˆÐ—ß(¡/w{}±£CæË®Næk60ßôøZ!_nÝørŒ!ðµkøæŠ€öÒ÷ íå i/¿Ð^^’´—?iïËÚ›?¸A{ñW£½¹ï;íÍ}ßi/˜3i/à²ÓÞ ‰öâ’ÿ³ho&¥ ½èÈ„½œŽ{¹›HØ›úI/ç’^ŽØ$½ì$$½˜<zÙíz9€ôÒÐËnGÐËÛôrÄ#èµÛ Лû+A¯1Ћr½œÂzÙÍ zÍÐËO‡ S‘^”3Ðkåzñ}èµr½v?€^4‹^»&@¯•èÅæ¥^8z™` ’Þü“óbŠ0΋iÀ8ïøŒ òb¨?lîú>Û?þ0}üÇâÑâùé‡+¾çåùXôSy¨0>?üûÿö=Y´ëÁËŸ þ¬ß®Ý-ãá~ûW¾á)é„ö˜íåº몿v ä]· ”Ÿ¿öþŠ|í¨¿|m J³?ï½ýÃTãÔ_ôÊíË_z÷ÒäåöõçOî_®øÇŸ©Ç&L¤J£‰YI»1^1JFþîÇ*¡ý»9þ_R-?þôñO^Éß~üóÇÿøÃßÿøKotÆà9û?—̤¿ìNÛ¯¿“<¾o’ÙNQÒ_z§ÃïôCd–ÿÊÝ¥ Å_7Dæg<ÒžáWÍNµJXàêWÔ&ç”öÚ0ßôWÔF‰ >i–ú篩C8´Ú:tØ×+’m_úmlÃ…û’žª{[OE%¾¶§>µaOem¾´§>µé=•ùªžúÔ=•ù¬§þ‚¹pY.uÉ÷RNð3–´ÇµGe÷÷ 5}ìe‚úõS.o£ýøõŽºJ´çÏßfÿ·Y”¼>Öu—öô^·¹Ûmæßú8“"$öåš%ßð3÷ùÏke±êǯý™Û,þ8áóõì‹ â´G¬`Ïãô;è?ÏõÿåÿúÓO¿ÿÓŸ~ÿ‡?üá÷?ýןþë?¶ßä­|ü¢Ó‡"Mr,Ð5×HŸ×™ÌúP¤IÛIQ²”!‡w(Фo¤,JìÝD•ŽCW:èCHlWßHQhÆÜ³HѱG(Ыþ¥G%ÅЙBÎí›RnÖ ±sI‘@ȶ(¡ç“ì;)¹kOÝ(ÛÕwRE/)GM‘dT"õ¾“²K’±Ó9ÂAßúFŠ õÁˆ'‘*AÅ&¢Þ›†Ù‡J^Ý7R'úXú#œ)ˆY–óJ‘@Šlšúñ±C' ûF “:×sNc}‡òN£ÇW\¦o¤Îã°µ¢MÚFÊ®sž[Ëý{(ؤo¤(8kígž›ô”C{Ý=¡ø¡`“‘ûøvoÓ<š%•õ<Ô§õç”[ÇW÷–G%>§ÀÖØµF5´¤µ‹?îtéoé±²Ü#éPyßGÙ·§_Ô`Ùî¾Â®}{ÄÉ4õƒ^‡Dªú6ÊÙchŽø\FÐ.1ˆÐî5翎g»x ÛÝ·Q”ýg.âœÚFîÛ(ñtëÖ!ó!ßwR4J]ÃWn o—ÍkÔÆPßIÙ*Õô¡p“¾“¢Ð¿³h™F=ù¢€ôVÇéÏSÁ&}+e?»—-·}+E-&´2jœÓ¢˜•Ú_c¼§FeºúVŠöÊ XòÄc¤0 ]›œ=ØáŒ‘n„ù5FñªÄ‘€øÚdq@^‹øµo¥ðµ“ÄëË^]ñÛÒ·Rvíôƒqñ*óe믗r/µn²@ «Ê<§@ (7’2~ž`^R^ÍÒéì9¯)ˆCÐ9—XŠÚaùºlÒ·Rìukê[)^îx6Ö“ÐÞÏáÒÛë»)öW ²—¶´?mÇðœïd]aÑ ¾;¾s‰1µo¦ð“;—'O]ï°¹ÁÂt÷Í”(¶ï}Ë!>Êd ÎìˆÒ{=úžUŒ©%È*x¤8 {æåHq@üîO¥íí{)ÖŽË5¬s=y/ûËfTœIßK±¶R IßK±ª(Ð¤ï¥ø5— ¤¨å»ïCÆŒ"Ø*ë–"¼*{ŠBZ6™F ™Ždo{=S}áJÑ´§ñu›ÊÖBtÆ´k¶ë[² =ÙÜ÷âN¥ß8ÒðšG¥Øè»(Ö}¶eDÙûÞÖ”븭)ÈzÛRšxî›xÀòJGŠÿѕξér*¤³N£² {‘Š0iž«u)˜tÏ•/rŸR=Ó>§ »dØFÍ û’B€ì]îkвw²o)ÈZEîžÆU\sO1@n;R ¿ ýËéJ@ÖÎòèºãJ¯O¶ûÏ8®Ì#ÈZ3|Õdï\þö4ÆÕÜùd2W»ä–"€lÌUœÉ’ÆU”ÛKßêH@~¹3EÅŠf «g,NFÐÛÖ#€¬!c=5"€ønÎ)Ù—¦ Ï•µŒõÔÊcFô½´XLð«Ç–¢¬œ{ŠþÁ½Žýc-¶ýcc’‚L¦4¦âfWŠþ±!PA&ÓŸVd2§a5Ol 2^+ï§ “áµÒU‰¥Ôž¼Ö\éî ¯Õ.¹¥ .bÎçÜCòZÇÒ§n-ù0¢S3{\s7ÖQ•ì´f—ïºRüyŸ’tíN«¢M:õ ·!ÅÿXKg÷ú3£«4}‡Ó*½›£e¦:¥yÔV.ªeñ?\‘ž±”ñ?»>¯.dq>Ñ èKì:/§ÂLº×zHpIÏWF^…™ ¯Uaõ-ÿTÔBwZ½*g ÒqE›U7F!ÝkÕ¡S!Õ¾ït( 'üüò _R™é^«ÆL(Ãä5Í)(Æyëyƒe1@?–ž.úŠÅT:$•=µÓ<ÑR ÐCxöz¦*1…ŲŌz¥"{ŠŠñc— ZÞlOP;êZüÒ`÷Zc¤=‡àî5)HlG¢C]±”@§µ®_ßãˆ:$F=¨äe”iD±òŠ.éN«$Íÿ¥Ã^ /éNë©Àù«¼^ÒÖ¨¥¤¥æ"…r)¤;­‡ÔF»Ll#HO72*_󖀘š8ä¤ÞUe»¢ß¦èk’ùLÑ?1¢:¯·]Õ4‚8„à¯XGà+v§Ø¡½öÑ*¸eJ±?"SŠÆ©—ŒuÔˆýíÚ{V:ÙFìšrH¾_Ë2‚bˆÞÖžìíZÖü£ƒ]í°ÍÚ¦N§DoJá?õ÷žðÙ^à±ï;ãÙ\ÿ…£Ä¹ë#DVn;ÎU(DZõtâÀ¹±’ÛSv—Äs-ÕÑ+“Ê<7Z1Z¬µ p®2m°eÐ\qò%Æ™†PÌ•Ø`Ìè:ešήcÉTÊ%𫣩»ŽÉÔãÀ™æÆWªÃ{{•ÍÕ±ÕE-!N8Wˆ{UdË-”qî¢ð¼ih>çBÁŠ4—RA¤¹·&Ò©}«¤¹ŠÔ ^ƒà@sÌJš{+ }n)@su*î¾{ÚvÐ\M΃cæJÝPšzU<0WŽ…RR×PÄsuîQíØØp†¹R0T:Œš 0W—[cv©)îsy" 4÷Ò9„ýê 7ÓÜKkÛ»¿6Ð\©GK¿ñÎŒs¯§¯íw‰´'Εڔ¯ðÌ<7îwIRª±×•ç:gÉ‹ÕdKà¹O”sÏ?Nž+É;å€s£_ìGÏçHœ{ië¥'m$ÎÕ1Uu¡ú…çZ“çêÄìÑ3,èrT$ÐÕý®žm”@÷|FÛIЕ  NN¨N ËG'Ð/w¨C“çJ1qëiVÏÕºaï©ÚÉsux$„!Ï=ÔÚÓ¾çús%ž£Î%°„¿“çjˆˆ·TµÚÈsãvËrf›yî©xñž?<—;mºq¿i¤§!ÐÕ¡ë­©—’çj‚oÞŠó\­Þt ²öð\ œÚnúp˜«w6´W sõÜ÷ÒW. ¹Ú \–v›4÷9ŽÐìæêÐøÒ'Ò\бà2qç†mÖÆWù¬€sO/93é8—Nq.)âÜx¼ëì¹çz-2ÏÕ|wíœ"y®õ<ð\ÊÉqÀs½X湺ݬ¬HWá«™çꌉÖVu½ž+ç^»‹óç>³òÜ4;ˆséêçZ«€çbט8—þ/y.—ºöntÑ‘¬x}¥à¹^,#]”ånûzDºRt•R¶c‰t£\É«UméjNšŽ¾PÍH—µÑåëÓ=•QHùžÉ‘L×úyaº:œtvGŽXW¢"#åL3±nxб šÏ|ÖÕKi£€uG1zd±uÍ®kÕ×e÷ÖU-—«ås$ÖÕûÔP[Gs`]6>¨®urP]‚Lu­û€êF5¯C †*™ÎXW'Ä´[VY ¸®ª)4TGzpݰIr½óàÌues¬| \×ê ®k/=s]kp],ЉuÃtšÖYX×kY±®õJ`]­QvÍ^ëžR4éPŽX÷ÿ7ppº6ëÚ»×å ¬k#°® Ù »öä »@vù}ìZ%3Úµ‘h7®¡Ìo•2íz›d´k5Éd×']»È®uW]æR¯¬/“]{« »6IƒìÚ+Ùõj´kNЮ9:@»QnÞñª>*ÐnØîõlò@»6­íÊ׌NZ÷Í@vm.٥˰kß Àî©§iK?r]s"ÀuµFŸU®k5ÉXWÊ?RVh¹®šù^Úªœ\7:Œ²ïlå(¹n\sÑçQV…äº\¬ëY×óÒY¸Rr]ÉMK“×Õ÷ Ó|åÓ!×åò‚`÷’|5Ýu’]¯J%»úL$UTN¤“ìráJ²Km$’]}_Jk× r"»Äs$»Qû ·ùyß$»’B’ |éB$»„$»Êš“T]¯ì*u–y‰á#Ù•¼ÒÞÉÉîC<:°#Ù…Ù.»Èvïgük|p÷ÖQÕ©¥“ ÜÕã«ç #Ý¥²é.¡鮨¦«Š[3ÜÕ¢ñ¶,>w•xiIúw)åbùQ“±GIº+ù”rób+j+"ë|ù}ç;­‚ŸEí>¹Ž'òó…ySDï ó®]âÌ(¯DvÂkmAš ¼óóé´qç—cÞ˜1ÄA[x*8¯\}¸2Î+%¼>¶óFgÖš¤GæfΫ$ª»FøµDôæ¨]JÞçÕ¤4B^Éyã{9z~BpÞ¸”òKtfNÐË h‚^(ëèE®€^±êuIX6ƒ^½:ʯI½]2ƒ^L-ä‹È€‘^¨„ôŠ (ÀªÈáwÒ{>Z„-tÌH/†x#½—΢·dñ$½Ô[­L3“^º‹Fz±Ïc¤÷‘ºh±ë½§Î~4œ—›’ä¼ñ~7½Òfç…49¯–,# 9¯Ž\Œ«½Z¬Ê!Zº@/)Ao¬¬äÆÞõÓè=$”4µ™È@/_@¯Q€ÞgÝÕ·Wz%Í2o·ô2nŸ ×ÖŸŒÜU¹¾…BÒ{(³áÙ^C#½1rN#Cª‘^åŽV}kÞX†N#)Iï¡TIJ|²¾H¯ŽòNkI¯2þží;%é}tJûvIÂÖtIzùNIz£E[œuÁBwõn¡» ºêåi¢^4³Ð] ùEè.BéI{-È´WB¬Ó½wŒØ]Æß0v7GõöJÖhï¨%³^Æ@Xèn86ËÑ> Ý®é›wºk‘•õÆ37UAG½»yê+*†îJ­÷úõZÔ`F½‡>ç>!Yè®jÔ•ì,t—‘Q ÝÅA²^žM±Ð]õÉÂýº«H¹_±»ùE쮄±zb Þ¡Z£l bô.ÃäÆðòcêÛ{O¼iA¼ÇËÖd¯\¨©o½0ŽwÒÛ¯tÃÂxÙOÆ»ˆ¤íW‹«E¯²DwzÂÞM熻ð³…ñr@/Ÿœa¼ì– ã"3 œÂx¥Tµõ@ðÇË€q¼ñç¥uÆñÎ:ÕŽÅñjŽ:Ôaq¼1ƒ¬Wß7b/ÇmÐ^kGÆñò£îÕ"ŒWàÞG“ól›<À½›ÄV¯îÝ÷rpíÝ•ºrí›wŒâåÀÜkƒ3p¯}ÇÀ½²­Sß<î¥;@ÜËÖ$îݤ‰ÜÏf÷jâÓz§Ò¬Œ{%~ªõf‹ Í¸w“êÚzV|BÜ—<µ¦i˜8ã^º¾÷ú­2îÅçMÚËF&íe7íÕ¸ ÎÚ»Iæ7zlˆ{9l÷òó&îåû&îõkfÜ[}.r^[ä¼ÖAz­=z9òôîR þ<ˆ—Î9ï¦ö8œW_÷’€m漄zíñ2èµn ÐËÁ„ —ãA¯µ´@¯Æ÷ãjQ¥ß zGÚí9þý¯¡¼ñ)m5žòm† P©Íî߈؃6R\š6Óß@›;Ôfâ9¦eâ6ƒ8äzš…°ÉB]!V¥¯ ŒÐeˆÉþ’zå„ ¼Ü%¥0ƒC(Ì`ò9—›²¼ÚO¿¯6ðº-Þh:Iö7ÍDòFÓÉûýT—Á1h¼J»²͇$à•´°>àšî€÷Ѿ›ã@À«ž!¬Ù x•FùeZØm¼a[«+ðá€×x%¯Ê©ÿTm xŸD4SóV x¥ß!ý–ŠNx•Û¦gL'àb§VCu#„׊eÀ{‰<#u_€×ÚŠ€7ÆVyKU¬‚€÷ÒQœ©éiðži[b¼^€×îÀ+©îUŽ3àr³¦Ö3à}2 MmûÒowº‚ï©Ï§%8"à=¿%œ ¾k›ÀwYK㻊@—jÓø®NÓÝ-ù.»ñ]{.ð]»Ó/ä»vIòÝqÇà.€îæB »V(“ÝCˬ!hÁîË”¸.MÀºl `]·e¬ko†X7‰]¿°.zëªùï{îëº \÷ÉÚ>Ód Á®½m€ÝC¬kéá|»aSPX ˆî`׊d²û¶ ²ëµÏd7l±öka $»vÉLv­È®Z$ü«‹AvÙOvmŸíM !s]íRJ_†\×6hÀumw\ן s]¿_æº^.s]ÛÁ×õzf®+§®%s®k÷×Õ™¹ºB×µ·—¹®Ë\W˜K±95 x×xסà]ÛÁÞõïú2Þõr ïzÝ3ßµgßÕíæ¾‡O¾k]|7l±hŽ-ù®_3^/—¯5¯5 ¯=;ï¡©.ü¿²k¾ëUÉ|×¶¬Àw½93ßµGßµj‚ïª.K?N¾kO¾{(?ÒÒ"ˆÈw½\æ»^ÍÊwí»ßµo|W™ –ÎŒ€wo‰-wš¼ëwËx×lÀ»ª‰Öi'ïÚ¼kÝx×Ëe¾kO¾ë¶ÌwmÔßõûe¾k#0ø®_3^o³ xm‚àU9-äën¯=¯Ý/^›ÄxÝ–¯ß.^{íðúÕ2àõË€—3.ø®¹ ™ïšï¾k6ðÝìîÚ2€" tÁ)Ò€%é.—9&ÒaGiPÙ¥¡Qâ]H˜Hƒ›ܵ¨ Æòjqß%[,/lË‹$¦Ñ@ÕH4à\Ø.5X(¯3†tÆD÷3DgM¶ë6ˆ4àL™‰4Xps‚»§F¼»íí›Hƒ²`žmoßT”pðjJ%¦Ò yG“*1•Ä|î2 ÌT”Éìl’Ë«zg×ë`,/ƒ©Ì;kE9Êxß¶Áx¯ùYuþ‹`^œX¶`^äN5©$ µ`^d%ä5]…ÌkW„RCŽà3¡œ(7¥åû[ÚÒ¥(5 Yô÷)5Ì)–÷WiïÆ—·ÔõßUâµÁ 3ßU­jÔ&oŒ’Ñßê ^¥‰ºcЮ˜ße@'/T8L|§šM}wFyI]y¡òBÈ{hù=jÆ‹a̯±aˆï¿4È;ËGmÛy—oåÜ2ùnø®÷v¶LΦ»K9bêîYM|7.y]=µ:ù®SPèî"ÀÏøîªp¢µ†² ¾ øf|—ä”|R9Æw‰2ÁwIUÉwuÈjjÇ[ïfÏÆø®Sð]‹¶àÕ0Àk×´Þ\M^8gx± ð®ÚÀºš//\D¼@jxù†xá(’ïÒQ$ßE¿$ߥ#KÀK€GÀ«Ô„s;³c€—µìñ»ùÀ»Œÿ$Þe1£»`Ì™îÚAw-ºtw#ßÅpa|—+…_Žx!Cþb¼yAã7/ jŒ7d¼F›ñâš`¼|d¼Ü– ãµk‚ñ²‘ñ¾l‰ñÓãe{:ãþ0u=Z2Þ öÈxÙdd¼¬¦3Þü=VÆkãe¯$ã5> Ækå2ãÅÆ /¯HÄË·MÄ{jEsWéo^»$ï¡cma„—­è„D„—NÂË—í„8ˆ—/ˆˆ—‹ˆ×žˆ—/‡J"^¶¯âå€NÄËý7"^t–NxíNFxÓ G© $PªG¿œðæ+ðJsþî¼* xùÖxù¶ xÓdôËé.gÒ]³îrj!Ýåû$Ý%sº›ü$Â]F8ÜÍÓÇ/‡»lgÀ]3Ü;Ü%³$Üe« ¸‹Êî²'îr(qº›ú$à®Õp—ÝÕánî Îw¡Rk|w@aÀ]»àýªií îr 'ÜåèC¸k0t×® ^4%x­xñy›/F4Ò]^“x—_xÑL8ñ.PÒÝ—é»q±Cç>š¦xÇE¸ËžL¸ë×Ì ¼ „»ì’„»BË:Ä2ܵöÜåCS¨‚l—Yd»ÖÀvœg"¼“¸B;Ê@¶k-M^ìG˜ / ŸÉðb_h—ñmD» ì#Úµjf¶«¹ðº»ÔØ®_É cH¸k­¹”-sêÊ¯ß w§ÿép7¾i@‰pWîúg zÀuÐl7ÜÓíBÒµÁvãN¶«Åœ‹² B¶k´3ëíf¶«5Ú–B\3ÛÕêT³ì=5Û€»a»¤"´îïÌjÑ …¸.5`8Ó] Wh»ìØÞt7lצýÑãZ-VÑûÝ—^¤»Z°O6‘îêíi,Üß¼ñmLrþ?I¬¥¦¤$À¶McÙ¹¼«-êIÑ)¶ýÀ+*²õ„G¼‹´xuærz+4D¹KËs†Bƒ>ì6Ü9àõ[eÀë·Ê€wQhD {?N€WlF ‰µ«0 À;K—âœ: Ï€—m ÀkOÀOê>È€W£už ¨Í€÷Ò¢§/÷‡ó]é›h™t—èdð]k0ðÝBÈ”/³ðÖÌwg‰| ¥gð]åœûY^ò]½Sq™ªì¾“À¶êTþùàÕ$ 7´|X¼l2^>/»¯_2Þg®ÒðVЯý‰%*áÆŠžÖ±€xõrØ×rôˆW¢5㨯®¹Ž4™ñÆ{‹Á$ª7•b™ñÚCƒñª{Ý#m o”ÓÎÔrŸo-ÞhI¹Ž¢þ ÈkÈõŒ‡éçÁyã™å‘ïºfÈ;KXéú¾òÚûäŤìp”gÈ7:[85{;ß È+·xù-y½\†¼Q'ã•I#|´ÍŒ÷Ö[â™ñ¾MƒñÞzòž ˆ×Lòš©2^}–c€"ãÕbÚñš)Þ[ýçlÇ^@xÝ” oØÖ1Àñº-#^æ§&⥎/ì‰x5Ž•Î[î—oŒ÷‘ä 2â}ÛâÕB- @¼²¥/ˆ7lgŒm} Ä+Û”ä 2â Û‘> ^+ÄËCWD¼V—Œxô¥¯ˆ—yˆx¥Û4f "^·=ˆ—BOD¼nˈ7ls¯x5§5gѯˌW¶1û‘ñº-3^­ç·‘ü÷ÖW6reÆ+Óœ2©eÆ{k2‰Š…ïúáŒ×m™ñ†-Æåϳ¬y¹ÌxUn9 ÁxÝ–¯Û2ã Û±×Z¼·Üâ1Eðº-^Ù–!“¯›2ན#ßk{Þ·m^·UÂë΄WŽÖP f/MáuSç»fÉ|W€éI²Àwo­+G¯ßu[æ»oÛ໲­)YF¼nˈ7lÓ‘²½eÄë¶Œxß¶xý~ñªÜ´Äu¶·¯Û2ã5¯ÕŒ×ËeÆë¶ÄxÃ]¿‡ÇLÆë¶Ìxß¶ÁxÝVï-z6R‚ñº-3^Ù¦¡1Æû¶uÈë¦ yÍÈkUä [Lj]u÷mœ×m™óz]2çõr™óÆ/ê -œ×ËeÎëå2çõrôz¹ zYŽ —åzß¶zÝ–@¯›2è•mêÚ:½^ÍLzß¶ëü¤HF½n˨—½ˆ¨×Ë%ÔëÅ2ê5P/¿8¢^v0¢^/—Y¯—ˬ×ËeÖËÁ†¬—&Y¯—ƒ(¯•ƒ(/E²^N$d½f£(¯Ù2쵺P•“!`¯› Ê›§Wåµbå…³QDyá|ëý¢¼1Ñ5«—(oxïóÝD)×ð¬¥{˜:UyãÕÜ=##Uyã‹L ¢¼¢8:¯På2ð傊À7úöºC°7ß[3g×,"ðÕ¨¥køjöYºà 樂Ĩ×w8¯&BÁÙúx¾1ö›ߘŽ/!تGœy¯å ïe°9¯˜«H> éeÓä‰P+ú››5y™3ŒÉ×I_‡“¿D›‰¯Û} Qý#û˜&ï¬ÔqwOæM^J°ìkŒGöµINäT¼Cz-=5y'ÍÆ-µªe_Ó^˜>žªL‹ìkЯ·ìkšµsµ¸Ý¤Ê qûWöµqTƒÈ×L™øRß4yåƒ umO¾†r™ø†íÐy55‡$Ék9Û É Û+÷ZJ-÷ʽvÙuæ^CºM“äµK"÷š•³äk¸]M¾f% ÉkwBòµ[C£qž{mçE˜{ÍLPäΫ¯»ª#2÷ÚË–r¯!¥©å^c8æ^3r¯…ƒ·œ™–{ͪ‚ÜkL™ÜËσ¸—Ÿ‡¥_ÃQ&K¿†£S–~ Ç£,ýmL¿v)[Cg‡L¿fårú5+†ôk̲ÁôkvI¤_3[K¿¦ˆ•»SJ¦_³j ýÒóZú5+—Ó¯Ùí~6¦_ã%™~ çá,ÿšÙíRÎèS‘­ÌükV Øpnϰ™ جžHÀfåÍlÈÀö² èk÷c6–ôµ×Ž l|ELÁÆ,5ÌÁ&Gg@9ؘұæ`cKæ`³!ÛË–r°Y%r6«r°±*ÌÁÆK2ÛË–’°±šLÂf÷C6»&²°™ YØ^¶”…Íî‡,l91¶%a³K" ›Ù„MÍ$l,Ç$lLzÊ$llN$a3’°á阄 C™„ÍjÒ’°ÙŸ‘„Íj‘¸¯r†¤ËÌ}ߦ‘‚¹˜‚6¦`3r°ñ»g6NœÌÁfåƒÍî‡$lvMda£¯Á4lð5˜…s?³°™ YØ ÜgiØè£0 ªÂ,l¼$³°ñé…ÍLÈÂf5A6ÖIØè€µ$lv#$a³! =R&aƒ‹lVä`£×Ì$lôU™„×d6Ö’Yذڱ4lLýÌ4lx:fa³ª ‹! ›=”ì  Ü`6Ë–ZÓ”p?SnxÙ’tC~*7Ðá» „ÐÌ&Ü€Õán°"nÀšÌ„Ìák«,Ü`µ€p/Iá^’ ¼&…Ìá>:…òœ¼—ëW‹íe1„öâó î}ÛRh¯ÙL¶!¥4ÙPâ^K)Üû¶ ÜkuAl¯ÞäÝ¢zˆ{Ý–y¯ß±½V®´â˜l¾Oš÷?LÿðÃ¥\e«‚tçp žÿ³üXx˜Ÿþý‡û¨¬„kˆ^þLüð§ù.ýëîO÷Szà¿òcFŒõGyÎúór×ö¯ÿ˜ŽëÝ÷ë«ï“Löòó×Þÿiïç}×÷ì…AË—{ë‡/½óÓÖåÞåÇOî^®÷ÇŸ©…æm_¬ÏH­ òŠõÆu­ªÎßýXž7ÇÿÇ °}üøÓÇ?ýMø"ûñÏ?þãÿã/¼Ï1ÚÌ:×{_®_~§í;î´.çãé(«é/¿Óáwú‚1±Ãå¾ÒÇÞ/óS^£g~Ù €O«„…ƒ~Em²–†×†:_Q›uyüc¯Hýó×ÔaRZJ‰âÏË«"ÙöÅ_Ç9æ®/é«ÏÝ­³²_Û[K}Ø]­>_Ú_K}z‡µª|U-µ`—µª|ÖgÁü+r¥°{N½®ß¦ãÚ£ºû{¶š>ö2YýêIÑn³éÜÝu=–ûgn³ÿÆÛ,:#»l‡Ž ½os·ÛÌ¿õq¦è‘SøJêøs÷ùÏKt½ïç(ãÏÜfñÇù ·XµÜv¶C 2XÇžÇé·ÐžüËÿõ§Ÿ~ÿ§?ýþøÃïú¯?]ÿ±ý&×åã½Ä£àÔQP‘¤'ħø9–À:!_d!>Ó^´7|õôÆ×•@Ó¢½ò¥Kô\ Èh{ÊY­®:äÈ66Éypຖ|NGuÂÌ1ÄâÑ˨˖£úm§ïW9Tp)̈¹¾M×ÑrNÈ6ö¦•o^#B=°ûˆ3VšÉœÉ×|Òd!weÖnŠ×{’1³&ƒO„(dê“ßy;˜üGÁ'gÚ O»ä—‚O<· ”RS_)ødèPR¸ó^sÈv„âIÒF¬ð·‚«0ìH#^Šø\ӖЮÃëõˆ½ÂáÆn˜•;K …"+¦LªÀ“|<–-/™li;žúΠpÖ@•ɾ§)g¾ã­À“A…•Nùn¹—âßçÇEâ75fT¦¤î}€{Z3œ›¥‚Ö’…ÝñQP’°KÝÓž ôùVÝï0…l¿75È{: ã ø{ºr¸Z8¾Š¬ÍÑ€øèòž4)ïy¦’E’Ymœç)þxCÌ«’$)îyÍ‚X—¢ùæêPÝó–•j pxπ΂l#J·vá†d”^ ¦0Ý´øˆ©ûJÜYµ!+ÊÈtg) $¼r/SV˜ƒré½ÌYû Ê%wŒíPÜN AnZÏB_ù.‘Tç<š@nôKfðH°ìYniÓ¦K;€E1”¤cvk™ß5¶¬ØÝF4ʕլ’:‚ݽNY€„åb_²°•’ŸÎ…ùÞë’U‘F]6¤ªèc•lkV²r[ÿØ4[lU÷^wJ¢¢ÜA©Â$·&ÒD$iÄ{=‹®•¸ZœŠ±.[1ŒfRhïÅ]²Bà\³×IX¶$Ù¥´¥ê(ßòZ†žŽºáÒf¾{[F\²éOÞÛšµ?­dù6…öÔ) :S:KŽß.‘¢a#[Ò°a±Ã¤}·P\T*én†Ÿï©fŸ½·+éà-J;­lžÅtg‰*¾Ð ê1¼›rXΩÃ^ S-ÞÀ­Tc„ÕD¥3®eôUºÐ1Ââá”Hq °Ë³ƒUõ×eÛ:VlËb¶1«(ª¡ŽËOŽíÔ]“Tê­ÌÊc|UìFÌouŽØÏ,ÉÄ·º—>É«}n7ÝXüó;³EãlwMìx+Uü^YõcÎ H“äåšKu«MÇðÎä&çµL‹ñkRmÕ›ÐÜ2¼JÒv ¯ìu4ÎØÊ—r¥Þz*¢¯Dbß6ÇøjåŽ,–Ê>©™·¯‹ÒÈŃϥšWÒc÷‘0Æ×Yb‹€ŽKe1ä'ÜöªâÖ²%}R>¬ÍѼ婻®ržÊáÐ[iÆøªË£…xÆce¥¥½÷¢tŸ[ ž”w­;£¥²8¨UeÏb}“2V·@íû<²”–ÒrÞ-F=.•|WÅ(Å⬮nÅÜí¯®wBŠ“ÝYûŠcxewVdù^çOÛ©‰[qóctc¤ºªZvô¸¬¶ËçŠUSÁÔ‚8ÊU·%lÉw•o<5??¦‚,Åv¼öì»òµ]GV¸RQãò£‚ÙyåGwIŽÍ]ɃåÛ¾î,aí¬/§„åGKeV±ÒWÛ¿uBdN6·J¬š’«^ß× ²%Ö® áh¹ÈwÛ<Ž~‘=Xö•{Ï줘¨¶±|ëDÕaùùëðvac¼”ó§Ná’¸SÍmöl8½ â†è úš² «àý˜‹Š ×cLNì$§²¨㜽XôL5vcѸf?• ã–Y¼öÇ8<Ùh—eízaÜ“+Æó˜[èFäÌjf¹›S¶3;³èóaLªX»¼§¦›¦;y³^Ó;¹³ûÓÓ³æIJ?k%c%•ÚI«¥Ú¶%;´ÒèšjTBØÖäвÏ<ÆáÑʸtu’0n9å½NÊtÅ“0îÙ§E×㑜Z=å}·¸©0žÉ«}]öÌn­_ö*Ùæ½LŽ4ñ6}BMZG¶¬P“‘ôÛž^±&WêÇé»ã’œ[…¾žËÞ[\"bÓŸëDZ°Ú4ÇÆÔöIe÷ìßNÒ‹¼«ü^µœŠ$ýŸ\öÈ..æ«0ê J”Üæë“Úfuy/x7׻Ϊ&‰i¿JÃöóPÕ8]ï±üŠÎÞÃýÉwk¬áêFI}uáÆUG5ÃÑœJãÅôÝä˜C%½eWKªÌçûUÇl8¼>B¬ZüÆÈ-ôîß6lˆåÖí£ƒoŸ\ôö*~ßiÑ–Ôû/% Ö E·Ê‡u¬X-õ+¸ Žžrj]Xc̓£‹©âM 0x:]ººf†ÁKôÛsë aƒeÛzN;°`¥%?äŒ6[bÁËs"Fg`K¬YfÁ‹NÒ„›Qs‚/O$ØÒBfÁ::{jŽ«p6±àåùØÙVâ3 tnܲ €ËviD©Iœ2 ¬7,Û²nmE ¼Ho»ÛJ,X¶}Þª; ¼,šÎ÷¶" –í:Z–t`™¢ý;éÎ$øeK$xÑ¡+i‘ߤ’`ý5–÷Íc –m¹§æ€˶]‰'¼H,ín Á*#_÷…2 V±^[#Á²ký –mÚz< P°lóÙ£­‚uøúØû°`Ù. RªÅË&‚ÙðfbÁ2íK?ö”°L“š—­aÙ¢¶MáçœÂ¹6}#aÙú1 ¼î Á KʜǸëR–ð]!…žLQYÂnc–0–²¨º_MX#”& MÔõ™\`¢ ëëf–04V×2¯ÂÆyÙ:ƒ˜YÂÐÚYמƒ^YÂØKU¤½Qa WÍÒ ,aè®iô0a ««²„­ÂŽúö}\T*KX…h”%Œ}t9ñOXÂÑŒ^GsKâ®HKù¦SQ–°Jl(KXÄ/”$|¿dǯ„$ PbÙÞ”%¬²aʶ¦KØ:&,a{©B†jïÞb&„#¬7J&áC]·áTÌ2¥‹²Ž²ƒm,„ Ðçìw¹JVíE%«†—²ƒUmHÙÁ¶¥=ø:ñ:ǕЃ½-§ªÇ-ϼÂz°*­TzðU Yم啬ˆJÖ±v°M(akx‘²ƒ nóTØÁ1±ºœWgë2;XåL• ¹êg^¤;øŠá>ÆÅ’ƒ¡H}ÌkF!«4™ƒ!€ w¾™\õ¯qÝüdr0+š4à7ƒm" 7Øjn°U'Üà ‚dãnX¨Á*z¤Üàxþ¾OÌK¸ÁñïåšÍ,×¶óO¸ÁöZ…刋·‹ýÆ †>Þ6¯¸„|Aé7Û Òfn0„î@0jø³pƒ¡ÇwTIŽer0†àž·{ÂÛ³ÏMaÛ Rv°$AUv°¤jUr°ÅÑ 9oãw²Â †Já2qSækÎXáÛ‡#Üà¨l¿ê=Â(7ØBì„ln°½ác& $Á¨Á:”B ¾ª¤Ý¸cj° ŠPƒ5!¼Rƒg( °‚5Œñ%_ˆ‘žwFB Ƨ{Ûi#GÛ¢XݬïÅHÁúÉ)X7'?ˆ°,LáG•À²ÚÙÍ8Á˜B˸Ð4N0>žu\q'X߸‘‚õÓRR°.Ê Ö©i¬à .ã@*ŒŒ^>ãâÙXÁú¡-ø‚?>8Æ >5Äøt:©ð‚eb-8ªŒÍ°e-Ø»)´`,öë¸ä4Z0fê:Ð!£ë:hÁ>ç<·4}eF Ž®e0&ŒŒçwïÆ ÖÅxÁ¶j¼àø|îs\^/8Œ ›o¼àš÷b\²/8=–yÁèÊ2èÆ öï‡yÁº9*/mÝÆ•©ñ‚u-5^pÝîÌg¼`ŸË Fru¼Þ3‹ñ‚±h^ã–ÍxÁòl¼`] ¬k±ñ‚%æÜxÁxê6n½œª|?F )ÿQ,x¦ _ãßÿ xSè…é’T„T ^*”ù™Œ•úzÚé¤"âçê à_-!)-M*Bñ?•Š(¢ìë JE@B¹ÞĤ"6@4} ©€— ¸÷êX*B@“Šx€ ô듊0E¡`ä¬Anóô` R4ÚF+(X<“Џeo¬PÅ‚ú %]ˆA¤"Isp´ ÆÈ^#ªT•"îº=“@Á&¼Ñ `CÁa~×è ¡w³i®* 4fð³ Ör kÆ…‚²R$8Öÿ}P¾ Þ—ë?çï_dgå2|×$H#[¡`k&CÁR"õ|õ ¯8oÆŠr|€‚%gAÁŠ(¬å ŽrÏÚ¿…‚oȺŒ‹'ƒ‚Q(ØŠ  achðnLP°uA `(¥ß#(X¡`mÊ‹K*$E‚íß ,i VlOàYùFT¯Á’±É€`…Î^!Q²ôƒÁÚ=‚ 5 ˜‡Qà‰Ÿ ¬h¢ À6"†§ ¼è!¶ ëÔWX_ŒÀ¢³c°¾˜ϼXë ìÀ±ÀòL€­ ¶§ÝßÕmVØz¦°¤Ò2˜`RFýyŒþZ;úKSJÑ_yÙþzÿÕ!ü—›¯à/Ï€ýiŒóÓþµoVá_ÉE¦ø¯ÊO)þkÓTð_/'ø¯¾à¿:ÀÿÚlø×›Âð¯¦o«ð¯·Žá_ÿ*I®ø¯•ü×^¦à¿Ö<ÁíM+þ«¦à¿^ã¿ÞN€£-Ç3B©Žrq¨J#‡rr7Î À^êù®´™À6G¶ž lMØëÛUÚŒGL `fQi3³‘ú€ó0¶‘VXnÂl© À¶ô xa7ê/0./ÁiŸ Àx&Ø]ç`«O`ÛgÖ[`dÑÔŒòHF€5W¢BÀöN¶©À°}ãÛ·#°½m€mÆ l3H `Í.©°˜`À6Y¶»HÁ€ík Øëc Øû^‘`o<#Áºè¬SÏ`m†!ÁúÆ ÖA1$X{`P°öÄ àôØ¢®+-c§~2¬¯I¡`]< Ne(؇] `ˆûøìm(8ÕÉP°ž`ÁÞMÁ‚õÃ7,8µ–±`‚§®0œºBXp*HX°ì‚ûC ̰֕`¯R°àdd,Ø_µ`Á©AŒ'cÑlÀ˱‰Á¹Nƒ}¦ ì“dO«à©D¬D þ-¢Á?[F»„K€• Á€XDuU]NO¶ !eèÉP0H§méw ÖS(ØÀK‚•c©Ppxà§ •âb®õ| +N$P°^x() °ó'AÁ ;VðS½N©¥ØpÇt'Ñà-6 {·*¶R WVpØZþáo®0ðb²ªŒÆqir&*±ýÂ¯Æ ¾‘(~2”Y*Âm¯T„?¥"¼KE¸¥" Já)vÖŠ¼à.?+Znc­&kE¸µ"ðLÈuœ˜µ"ÂçˆI8X§¢¶ûÍ|›hÁl#èΛBZnb­´Â'í0"ZxÆåÑy­+'ZÙ6ƒîðL¨…µƒ‘ˆEXïD,Â˱X„×ÇbþL‹ðþ±X„ÛªX„ÿÈbH;G築X„—c±³‰X„=SÄ"l@D,Âm,áõ±X„—c±·±X„?“Õ"¼©Ex÷H-Â^›¨Exm,a½k‰èE¸õ"²m†ØY[D0Âm¬aýů#²¢ëÌö*Fø¯¬á6VŒ[ì4C¢\#l”E1Âm,áÏdÉk‹HFØHŠd„ÛX2ŸɒncÉïIFx1’ŒðR,áÅX2Âʉd„ÛX2ž)’ncɈl{x™åy)’ncÉ&KF¸%#ü™þ9KFØËÉ{”HFX9‘ŒpKFØÎ"’^KF¸%#ÜÆ’Þ–Œ°qÉ›&,áÅH2ÂM¬a6ÑŒ°QÍëG˜×#ÂþLŽðg²z„ÛX=ÂFSÔ#¼ËGx[X>Â˽ò^‚å#²M|YöÌD>Âm,a ©ÈG¸å#²íáE–çèG¸­¨/+¶S}Y±]êËšíáE–^ Hؼ +&n[Õ—Û¦¾,¿oðr»ú²ÜNðrE}Y)w~ß— 楾¬”»?ø²" ¡6ø`o–ª2‰Trû®?k©äñ]Ö$’±¨O«­=¿ëÔš€D2ÞâÖêSñkµÜó]ÇÖ$ü©" ‘JnêÛjÉ][7ÞßYx]@ÂEý[mÐi®/ópͨ.®ïîãjSurÅ(ɸª›+µ©€„—ÜÔÑÕ’»zº´!¸€„?¶|××u‰lToWŒ" áÆ[ü]mÏ#¯”S‰l|¾³»€„¸ . áF°5D$¼NpcQÇW" áÆË\_1Š€„Ó*ø£8ñòc81in:NlYâ'Þ¡­Ðu'Ö{pʼn•´¦81bž¡©8±æÂRœQ× Sœ8<í—Ä[4A¥ÌPœ8]¸†=CáÄqÎ)…ÊDŽc\Í`Ø{À”ႈ…±àIÁ(eëX°0†ã„wìÇ@¥…1|ÖuIâ˜1|@çó‚Â> `¼÷l'Ê> LÛCê•0Çóøv.Œá:oã®CÃ;î"F ž2†X¬1˜½{/c8>¶e ˜*c`F¿ÙpÆptzE4ô»P(c¸~£S]VÃú™‘‡Â„Cò”ÂÞ1r#–NÃd”GR@e c‰"-Ia ïH¿8BÑ„0¼CÏsj ax8õã˜0ËÚC"¬ †ãeïà¿´˜E! o¸Ÿ tuLŽ÷ƒå®åþSÂð9Ï©(„a,¿¤o*„áB™SPÀ×™5MÃA<ÖOŠÂ°ÅÄ:žÉ žŒŠ3ã"EÃx&tj>1†7Q&¼JŽNߤº)”áxØŽHÙ®¾Ìœá°![úHÕÇœaè²ÇV7"˜3ƒõº£p†±éN#å o¸ŸÀjø¾á o8ò–NìWÎ0^‚¸]8Ã;¾²©-œaëŸp†¡ÌñŽëý°˜7ÿ.¾«ã“zD|W4?Z<¦ð†ãë:sù wþÍ3“ yØFZÈÃÑ„VÅŒÍäá ›9š¹dò0ÄÞ±a=/ TÈÃ1|çþ†|38æÉOÔsáï G¤Ÿr‡ëÛºG p‡7|.È`Ãæ p½ðÞg¦G=~=¨ÃöÖ„:ýZgõ ÁáI±€´?ÂŽï'§˜³ÏMèÃc„égˆA]I4UèÃ4†0›ÏOþ0Æ!_Çš â5>¥²8á|3ˆåÊyMA âoØ ïÂb¸úÐ_×Ξb|,P¢9JW˜˜¼Š €%²}í¯Îóˆm X_·Ðˆ±@;¦Ó™FlM1²l˜,wBˆ}æ X'³ÐˆQlESŘiÄH°€  gÒˆã§u×K>1þùBYõ˜F-_à>K¦Ç Æl¼Ø>¡Ûç!4b”Û‘©%µc1^è3so غ,4b¬Ò2B*<}X[f„FlKÓˆmȘF¼!-ú;ÊB!F¯·™+Q(Äѳ{¥œiL!¶¹#âØïnxÙÍ· ñ†•vt_¦#ÎŽïþJì…i/ö™çK(Ķ …û+õ×%«HØ, 1pHu¨÷åoЃ®[“Ç`qŒdøó3sˆÃG®XG>pˆñ €h²Ý †W²1zÊbó„CŒ%ûª‘fYE !Û›±—c1–Ì,áJ"Æ[Eºƒr¤sQr8 ܘHÄpÊž»G9Äá†m Øu}æcß(Ísˆíc1À¢¥Q”B¦ýœ9ò„Bl} ±ÍX¡cváM¶ÍC(ĵ֑¹I)Ä^Ž)Ä^®Rˆ½Sˆµ€QˆÃ¸ ıŠƒX_›1ˆµFcc˜‘N£^ ^òìZgcú2 xŽ“xÉL Æ:uD=f1Rμ„  £NˆøÖ‡ 2ˆm\…?ì ØÇUøÃè$N³/ßÎøÃºhØß¥ð‡}Ô…? Q,[Ocf 8ÕÉüa_ÂF>Dì¼ËèàãéÜjæûkþ°~£ÆN%™?ìc#üa´ ûöIK"•dþp*ÉüaŸÂN%™?Œ’-ûзÄÆyÆÖ%‰‰?ŒLHåü@ÆyÉ*–ŽÁ2yXWa#ûdòp´eŸ:¡FÆ\À{}:Ë—ÉÃê©y/‰™úày8{€`ßÄ<ìK„‡ñ*‘GfÑL®'{JÝ'äáTò5 òj¼Žä¿SU~&“¦0 ɉ益0¾¶­SºMS8BŒMB5… r£u•8Ó–1¦),©,TSX•œUSX‰šª)k{,h3±k cS˜¾”Š GâHÓ¥ „?¬BÆiíéêFF¶¥«}8ÖŸ2r‚8¶ƒ}¤6þ0œ³2²Â }8¶õ{¤ƒ5QáË@Ïç™D…EßXD…±’Š ‡í˜ß„‰ «DH¶_ETøŠétwq?Ž·-¬> Œ ?ØFÒÕbÓ.y¨j —˜x÷Èé%¸°ìÛZÖ‘ïKE…1ªÇDqETÃÔð`8l;”ë;MÀðƒY¹ŒLT ãý‚_¿¦DsP0~HÕPpa}V\øp4¿ ›(²J ã°]†äžÀªù¢°0l€w?p‡MFaá«ËLÖ-°0l€~; Ͱ0¾¤ýàü û¢) ·,öþ®tÁ°0l¸-î¢ CÌ*kúeX8lØŒ†\À­q‘¤°ðƒ£ä=°w…³múÓ85ᶨCÆ ‡möÞe&vÃÂxæ3r‡ ,ì&‚…MyZ`a¬.lŸªÀªÿ£°0Ênq¹ »aaDWX8le÷Š ‡m‡f_‹âXØmŒ ÙÎv”\øÁšŽØŽESBDŒ:0ì%†m)f6›jJ-ØÆ•„jJ˜Mt%à©nCëQu%Ì&º@Ö”«®„Õ'ÂØåú=¬éJØ#EWB‹±´„ZXYB%·UYÂêu -§ê.×{ü©K˜MÔ%¤•*.ÅÎẘ¸„5EÄ%¬:Q—0›¨Kà|ºöP$S—0[ûç‡l³ˆKØ¿q mŠKàX9Ò!›¸„ÙD\Bš¢Úp‚ÂkiÛ¢jK˜M´%àuO°Iµ%¬œhKX9Ñ–¸QÁˆzm  Sm +%Úª¦¯ÚZNµ%†#õÖWіШ¶„•m /×GÖpj§hK$iK˜M´%ðEoYÃU³ é;›º‰KØÓD\ÂJ<êÂø‡M’E€áÇ@BL\B?R—0›ˆKh‚ —ÐEAÕ%€“ ,SÅ%ì‘".¡ÅD[б¶„–bi +$ÒÚ|•–ЕA¥%¤6U–ÐêTYB‹‰°„µD„%Ì&Âfa‰²D)eÂVN„%¬ç¯°„ý(Âbsa ©É…%¼¤KȺíÂna Y]XBÖK–ðMŽ¡]a 7а„×ɼ]˜®„wRt%¬“ª+!+¸ëJXkTWÂKŠ®„¬Æ®+!˱ëJxkEWÂë] Y']WB¶וð5]  èJXÕ•°’ª+!;€ëJ¸Qt%¼NÑ•°ª®„EWÂ+ºÙH.®EWȼnc] Ù‘]W"ÉÓu£èJØdV] Õ•°·©º•K'½¹®„7Ht%dïv] çÛg¾p~¬èJ¸ññ¾ÿ˜ÈðŸÿ°|û×xÞý|Ûâð?ÅÙ²þñ/ï›4|ØëŸþ×þíG èÚç›Ð’·ïÐ’?7å‡pïßXiôïêó?ºJ\Õ—ÞÓö—øï1¾ ×2êÇ¿¶öšÚv¼é÷/_Üÿ:èïko.ópø5ãßjÇŸ¾´îwÀÛ”ÿü¡þ÷‰þí¸ÀÃÈk)tôÂ):ïýÙMú矛ù?­ñŸøÇ·ŸùößþS,ÑÿùÛÿöóýÿüükkÚá\_çu]UðÿWÖtü@Mᘇ#}Üëó&ùu5^Ó—¬‘ceþ‡Ï˜¶åÉýŒãÙ¼zü²M—S¸ª>„7CŒ_Ôž3<ç—ûõ¡=lü¢öÕþ44í÷/jΜˆV "5…_û´Ÿ}ÍœEí>g¥_\î\ËÓkYgg–˜KÙp±ºý­j~_oî)cj¤Ù‡!µlÞ™¿SÃq Ôì¾·8‡žåh¼Àaò*ð?µ‚ÿñçÿùË/üë_ÿø§?ý鿜ý÷ÿ}ü.æÛ¯ <ÅŠ úÔRn°lpYÜÀù¼Ï+A% -lHðžr›á¶|’Œ–Ÿž™Ö$ŒÈ ]?®—õC"xÕHÚ.øjˆ3ö …ÚÑ®WëÈiT)ãüg`jJ>g,µrÑ„6a¼8k †ÔßÞÚ«&{!M䯛òÖ OÆSîÎãŒ&ŒËe¬XSWñ…\ô¢<Œ«^×0³V’Ñ€\ÇÛÆ÷5r“Æý§r ÄE2‡ñÐÆjÖ¥wÄWÒO; ãÉw6’G¸ÃsïˆË‰Uñ2—Ö ‹om òÀö8±0Fö¸àI¼¸`5>ï½  ³§Œ_ךx¾ÍeË8S$ .YRÂ"F›ËšO$L_݈Ve‘¬MeÓ«ã¡Êà,É ã3ÑC/YTœ…g×5Æ‘UiY¯µò3Y•–Ån+Ytf(“´ŸÕ8/q¼à# á¤æ½VŠîÌPö ]¬*mÔi2ÑC•r­2¬JËJÁ5æe&мbãj¼Y•–RÁ®5kf(Ã\^Žñùl±HÌ eÈ+ÏÖÒ…ÕÇ™¡¬^®òGÔÛ23”Åcß{æa¤|‘’Bw}#.Gв+–ÑcP¤_°£vj¦/iE\áÞ—eKcÆ•GÆ—¼caÙ‘£Ïd¶}㼑þÔXúªœZ¹t©#¹dW3¯}QÆSÔ·\{5ÒyªøS£ícQ¾°°,w‹K]Ny#5eE:Æ¢Œ!8‘²-,ûCy#%%ëŠXÕ™7ò­fl°ÝcQÖü¼+¢é)odÁÕëÚ̯ˆÞ?Æ¢¬ix×7FÌdëH8 ËX•5/s5RÞH‘b¯ç§s¬Ê©ÎSG^ÛÖ…!Öb>VeKº¹7'ŽôÂEæ˜Éš2Ü®7q¤ \ÁZ2¦² \Y9q¤MÙ*í©ÍeëD)q$g ÛþÓ=Veï`8”7ä™ðíÛeØZ çÔL¦P™ ¼‘VåÅi#5¥&Œ”6R³?ÆXhÚHmÏCi#5#g¬ä”6’ÓZ¯Ñ ¬‘Va)k¤¤:ãÆY#5çzîœý¥Å<Ý¿8Îy`“hdÕKJ‰Å{pÇ«‘’F±¹•N€… 'Ôì±qªâ¤‘><7g”ô¾+´K$k$§Gœ^aœ¬î¹ k6Íš¼w.Èšá´&wž+²fÎ}¾ŽY¬©Hk¦Ø¹"‹¢J5’L‹æ­ßà\’5)>3j¡ô¸ï–Hó˜3œbc£téÙ8ýäÔ•‡D½­=p6Æ’l½¼Wò“}ؑՉSùÊȆqúÉ>qªp.É=½ó §qúÈ©…“¦k¾]8·ÓGöA Ÿ™Ò¦kîÜšr®ÆàRß¼ÖËú¹knØÊÜylÔ+¨Ÿ[ÉSG¹Ç´]†@I5Rîtk ˆÖs1æ„åk%‹ÏµØúWÙî4‹9Im¥×ÏÅØºPÈCö¾Ç)Mò§sÚ“0ckëÍÔ5¡.Nä {GJ¢®‰f§1³¨kºa„wƒÙÏ®´âKyÔµ98þNÙñ"€…dMT ãtÓc ;ÈðÐã |ö’';ÈÞÍKÓ©_Þ)hu|öâYÛIG!êº&¡T jˆ!mDlˆú] ‘Ì¢†q¹Fâq…¨±€Aæ„PýìÚLŠP¿ËâЄ¶ÇÄHM5l+{_ŒPc•†ÏÖ¢'¡†qGX_Ë&u]ßïwÔ‚!j—“áb‚¨ë¶ù”ŽmBlPö³è§v¨±QÝ÷H®u5Æ4g>¨ë·l‘ PÃJã8î¼5~^=ôÓ ÔŸŒSa‚+aЄOW>Ý~ô`€:$|[ÿƒr÷¡ŸþdÞw5n#™°âÓ0Bc\_0>„Og#áÓ0.Ð}8Ö§‡0y²M|¶õ`”™ði¸Fç=s‚ > ãuÐÎøt.Iø4ŒñéŽÛ&Á§aÜ!NÚ?ƧsI§áåÝp\î O£àöÑïDžþdœåð¤ï%xÆkjùu2$~= Ögxz{Ch'PÎð4Œ#üæ/†O×’Ø%û§Êø4üß¾RÿTŸÎF§ñؘ˜CØAêêVߘÊg¨³‘êZ'’öåjc–•¾Ò @ † Ÿë¸, €ÚÔ ®ÆØî;/Zêl$€z«¼}ºl`€:  þdœ*å+H Ýg€ºuöXúÔ53^¸Ä]Gj”ŒI±ž2`€Æs™WŒO'[…§ñëƒlÇ žÞêÒEB O×ç]{W3PxÆmŠæ*<ë$|º/¨ËnŸ®½;:Åàt®Ài+FÐ4«PFëƒÉÈ4J-7Ý12mœ°4žxà{‚¥½6†¥?ئ.ùŠ©¥Þ»AÄ„JW}lÜìö˨´ˆgÿÅPéüX‚¥óc –Î% –®ÀI÷ Kç’KÃxf:ãÓŸŒ-Êlc|:ÛVvݸ±ƒìÆdý¶Ÿö™.ø´]‚OçÇžì ûc/öý±7{È:%Ÿö¯K@êTò —±) u6nì!ûó6öum:ö­ŒRÃxLýyE©ócOö½µ{ÈþØ›=dý ¥NFB©ëÒ²Ž›v©S… RgãÆòñ´j;ô uªñ`ÿX:Á¨aÄ?ïD5Á¨s•'ûÇÞØ‹ýc$'˜©sûÇÖ“‡ýcìãóv¿aÔéyŒQgãÆ²?pgÙzÏu6ì ûc ;ÈÞ “}dìÅ>r8á8´3­€Ô¹Î›ýd¯óa?ÙK ueÉN"cÔN :•ÛØUö‚;»ÊVð`OÙ ö”½'{ÊÙHžrÔÿL‡@ÔÙx³§ìFŽØX_Q¶Î6ˆ:u¥¤Ùýc¹¿U¬ÑéÔPíy×´ U+ôelêøø·cŠO*›šÒÈÿÅ¡j;,+TmïM¡j›Ô U«Ü£AÕ¶t)Vm†bÕ6 Vm`Õ:­ª†âÜÌgbPµûÿUM]¡ê®³ªVÅKêWÀaój^Áê¨sÝç… ‚ÕšªÞÀjM:o`5ºrÏ;«€Z“kܰj(lïŒ`Õ–ÐK±jo‰€Õ ‰õýÅÁj8zhÇàD3ZÍÜ®m\d(Z>츱j@ Õi„XÇà…3Z #Äô6(`õ‚ÜP íLk«_‘që¢`5¦ÃȪ X5¦\èð1Ví-¬Ú[#X5üò™âÁ°êdd¬1øè#š±jë‡@Õñ±œOjiÐ0CÕ ’œ“Q/X5”8Ë:Yó‚U#Œã ß«C•‚Uǧ}Ïì†UCSu&„èXµw^°j•h5¬Ú+¬zÁ™õ˜ütÁªã±ûÌÝ`Xu22—Z…ZLmA,ŠU«4¬aÕ8ïáPÓ‰»B¦†ŽÜ¢&Ô¡dêÂ2]»_¹ÔÞæR°MºDag>ãR«€¬q©Ÿú‘M@^¸Ô8 c5ï·Â¥~¤f#윹Ô0ž7ÁãÌ¥V}\ãRkȦq©E·Ö¨Ôa‹ÝeBËB¥~ ª]©ÔñóM"ÔJ¥þ@ªØJ¥Fm3†I™ÔpleÄ&u22•¡Qè`‡z…J Ÿ©Ðúž#Tjà!÷E8S©U+͸ÔðC¦ÒwHáRC¹÷Fo¸4s©!ÁÖ§ß.2—Úßs©Ÿz¾˜@©Ãx!Ë×Àz™Líã#dj!S‡+ëÒÄ…Lí½6µ ›Ú‡@ØÔ*Cllj•665„kÄd[$„M U´hé„n!66µj›Zµ™MŒÌ¦F…Óo…MÄljïž°©“‘ÙÔp·ÁîK‡°©URÙØÔþ2„M #"Û»`25ÁËQz„¤©¡ÓŒlâQJ¦†ö3”Ø»Ÿ,dj%B¦V©f#S?8ó?“h/Œj‘°6BµhQŸZEžO]Ï?7Ñ´™OÆ‚¨~,|jAÄê>½ð©£Î‚ð¨Îán„joŠª½)B¨ö×/„jU‚6Fµ–*£Z·Qí3@(Õª„m”jÔ‰ŒŠÝŸJ5fÀsN"¾pª!´xn3A8Õн/à Ã8µÏE)[9Õ6ç”SmÍQNµje§/è^&µX8ÕªnœjåT£NÄ*5©PªdÑY‰Î”êÔ¦T§ö0¥ú©¨èÑ}WåU‡ñjÈseT«^¹1ª½ ¨†±_“F(Œj•7F5µÉÔ©)E5ÐIËÜÈÔ„kï©í{R255åVty3B£Vs¥Q?uÃ"Šõ*"è<„E­¢äÆ¢VtcQ§Ö0‹Ú_ƒ°¨UìÜXÔ0^“?©,jB£N bµÊ:5ˆiÔ>Fí¯^hÔ*&n4jÕ7µ¿¡Q§:™Fí=µª·Ú»/<êÔ}æQCl}d·45ôsá¼õ«/¡Q{•L£ö!`µU(4j¯PhÔ©B¦Q'ã¡ éR%³¨}Ì…Eíc.,êTå¥*ée›L`eQûÜR5aQ{啞ï?ŠPÏóküûßOcÕ¿_<'«}èUxRû@¢Öþ^ŸŽ m™ ž]íR(#ÛãoRûØ!}tUñi O é Ƨ—íU{éIÉTí£n¢Çäv‹Ú‡‹ˆ°Ú‡ªöa`ˆª} õÚõ¬Ifµ‡fDíðÂsÌ8w‘ûðC´È}ø©Uä> +µCQµèX#ÅXsØTí/©ªšã-b~¦±/øj}ü¨'R~Щ?—°Ô‡‚XêÃs"õáGh‘úðEê#=vW¯[Ž"õ‘ËR~©Ô¹}é‚§&Z€ KÝnÊZdR®ë#R©+OK@$‡'‘úð£žH}xe¢õñ Ýã6’EëD¼DWì`©Í®äð´W)ð´EëÃO´¢õ‘ºy©Îî´ÁÓÞ Æ§mŸÖÓáÓzF4|Úš£ø´wuÄ;J,È´™¶þ)2íµê‚«Qi9†2­çC¦í5vdZÏê†L««lÈ´5S¡im¦"ÓÖE¦õpbÈ´ž1 ™V×i=Ö2íÝdÚU‘i722msQi=¡2­€!ÓÖKE¦­9ŠL›8™"Ó^²¨ΧC¦Ý(È´™¶aWdÚû)È´M’ŠLÛÓ™¶ñVdZ’†L[™¶7¥È´7RiIqæÈ´½)E¦õhoÈ´š"ÓÞZA¦½Ÿ MÛ 4måš¶^*4ä!ÛÛThÚ:¢Ð´?V°io­€ÓÞM§³ñf\„Ï„NÛÜSpÚZÛÁi{ N{§­‡ N['œ¶!WpÚf‚ÓŠ%8mc£à´wEÀi7 8m߈‚Ó^'ƒÓ 8m°™‚Ó^PÀiƒSœ6,FÁiœÖ!0pÚE§½A‚Nûcö~ :-©vLùŨ­ƒŠQ[K£–סµ5S1jk¦"ÕþXAª½µ†TkIAªÝ(HµàÕö"¯v(—ñjë¦àÕÇ`-ÍqÀZÞ‰Ö6> XgLZëí(S›à7ÖÞÍK“wjW °¦ì°Ö’XKÉ X+»ª»¬F¬¥‡X+²l€µ>Ök)i€µ œÖúX¬el°Nccˆµ ±–9`ˆµ‚¹„XûC²Ö‚YkIƒ¬µ9 Y'äÙ0kYǬõ±†YË×%˜uBßoõ—µ¤aÖR§cÖ h'ûÇ”?V¢Uÿ&}j&ψµ’ýb­f×§&¶¬1ª4N±C¬Ÿp¤­…RD©v#SªÃ¸ÇÒ6„mR­ÆÆ©¶Ÿ•So†µ+¥Ù#ÏmpÅ”RíF£T?ÐÏìÇG¥T»Q(ÕÙH”j\ 6"’êd%ÔO–IF–IFRq«$Ë|0>¼0«‘õ@|ØE$Y$YäƒñùîÂ,z >ì¢âoSô@RIÖñ:¡’#wÙGMô@|å=ôXÖI%Y$ÕÉz ©$ëø¦$z ©$뤒¬òÁøð²¬FÒñwÁz ÞKÑñÁ=ïˆè$#ë$#ë$#뤱H2²Hê'ë¤:Y$=–õ@ˆK¾©–ñꤟYäƒñæeYÞ£èxSDÄ{(z é±Eýe}lQYKžê/«ñRY·úË:úËnd™Ÿ*z VNõ@RÁMýežª’Jê/»Qüe5õ—µÎÓüe1^ß÷—U$Õyß_V=¯óÕñA=žè $ˆÖÝÔ_ÖÇîê/kKõ—µÎ¢þ²Oõ—Ý(þ²6èRY·úËò:DÄ»Âz É¶Š¿ì6ñ—¥#"’Œ»úËÒQIÆ¢þ²¼MQIužê/«ñRYë¼Õ_Ö’úË:BúËòØ& ’~^Õ_–Š$ˆ-¯* âíITç¡þ²‹úËúØSýe-y©¿¬Æ[ýe7Š¿¬ƒð¨¿,%Y$ÙVñ—Õ¶©¿,½QTrWyíx Å‡úËúØbþ2÷RTARÉSýemÐ¥þ²oõ—åU7Š*H2¦ùQøzù1øš5?2|í5Ã×±=mø˜^ûü½³:'ðµ…Ü zíœQA¯-µ &W ¿t¿¦²&W úƵA+RºµeòSºµ%\ºµe¸ºõ…vOù¥[[ ¥[Çá / Kp+Ý:™n}"ûQÌ©]*tkœéž‰Â+Ý:ŽŠ1q¼ù ‚³kt鱕·Ž#qAººQ’ùÖ'|ˆsê7 á:Nᘑ=PV×q|ßÏñß×QRÉ.‰®ãy+(ŒƒÃÌ„ëÑó³ç#d¶5 †“1of[—ø4æE3ÛøÃA‘ð¶æ}.³Ð­ 2ÆG7ÎÒÕÀYö‡8Ì·®P EV ß)d&c3ß:ŒÇü …n]ö˜«DRfºuÙÂ7 þªÐ­4-DµºuŒ0ˆ¡èÌlë¨1\§™ûHØÖš I ÛºÔ•oR{„mëðsPzQa[PµŽ¥°Çº Ûº@i|Þ9*Û:Z[bG Û Š7>a[Ç–ÂTÍ—luË ØQ²5v‹£L}![GŽg¹§H“­QÏ?’­SIf[G$z ¿°­£[!’°­l%3 ²­£äºÍ‹ee[Hn°ÎåHØÖÑŒüLØÈlëT’ØÖxÇ”ø@ØÖØÞña÷ :a[û̶u”¼K™÷~Â¶Ž’±)ìŸÕ@ Ž=šÊ(×ÑX[žm"Ú“C‚×e—!ŒkŒë3¯À•qJ2ã:J>+ñ÷„rm]pù×ãë…r=7d / Z§aÊõä̱ä´Gט8÷Ìó*„k/Ç„kŸSB¸ö¾ áÚ§ª®ñ ž¨¯ŽB¸c̼ml,B¸N#À„ëÜ€c^R áÚ_#®}`µ ð­mè„ní߆Эñò©Õek„ní+‹Ð­šx}JP ÝúãÚcý…n F‰H”n w_#·Ò­£ä ñ‰¾ó ÝÚ?H¡[ÃSG=$ të*âeœ=”n_Þ±Sšg¡[ûÀÝú€ryŒwßR„nÏ’2¢(Ý:•dºucºMWMèÖ©$Ó­SI¦[G?c¶#s¦Ð­”MEèÖ$è)I Ó­$S³±'$ºõÝ~JÙ+tkÛ8…m2öv 2a[cÂ)ò+¶Ž5öâ4,¶ŽNâ Œœ¬Â¶> ´|ÃŽ²­SÌ¶Ž’'›¶5êäd3¶Æ1wF*ÙÚ_t#[ãEm×ÁÕÓANe˜lí¯XÈÖ86ÇüºZ4–p­1«Äõ‘kí¹p­}Ê ×:5–¹Ö©$s­1"µIõÓ+¾æ®(ÓÚ¿ aZce™9Å”hí¯XˆÖ6à³> ¹[†ø§ò¬1¦ðèºE³ö1ž5Æ”’‘)ÏÚߣð¬}À…gm˹ð¬ýãažu*Çõæ”gÆg™"ʳ#æÖØÒ„gíŸsyfÆþüNý{Õ«cÂÅ«Ff–¯Öœ–ièô|©–iàì5”Il ¤ŒQg‘¯váëC³ÉpÞ“¯Æf³ž3ï¡ÉW«ðµÈW;*/òÕ±&¯3Ó»ÊWo¸|:q*_íŠÐ¢_ µXJI­úÕ/"89ׯV-i![Ç«½fŠO#[?Ø™‡”Ž‘­o¤S™pÄ"dkÉë8+×yá– (×gwpr­/¤õxÖ¹§|p­UÚ¸Ö,;“ÈÖÂ&w²õx±×"Lk×§6ñjåhÓZYÏÆ´V2µ1­•ölLká8Óš/ÃÓÚ̉¡ί3­Å¦L뼤%-Lk¾¦õ(¢yŒS¦õ©ÜytT¦5DÏD”i­—hÆ´7é8ÆéÀ˜Ö±@#1dGl”i _`›ÇeZ»Q˜ÖÈi@wHi¿Þ¤ö©LëØÛ‘KrdL¦õ} ±EÓZ•½•h­ÚJF´Gè¹fVT%ZïµùT•híÙ¦|—­UìɈÖ^RˆÖ^RˆÖ¦n.DkœNαÏ*ÑzÃú8O²J´6 w%Z»QˆÖ&n®Dk/)Dk>c5¢õ†¸£yM¨Dk@W¢µ—¢µ…h­[F´vc#ZoÐ^*CDU‰ÖVF‰Ö^RˆÖ&6¯Dk›W¢µ—¢µ…h½a¢Ì‰B´V‰1#Z{I!ZCmóùz•gmbõ³ÖrB³6“°¬!IV×mQ–µÕ§,k/),kK  ,k/),k/),k/),k7 Ë:‹´#]òEYÖn–µ+ËÚ (Ë'ÌíCÆEïš2¬á0ãÒÐ:eX{}°æú„]ív5>®Xûºè²«½¤°«-}²«½¥Â®ö:™]mOvµÛ„]mUvuJ÷p¶ÉÝ&Üj³ µ:¶X¤µø¸P«¡Ò£Ñý<¥VÛS…Yí…YíU ³ÚK ³ÚJvf5`'Ð'öOÌj/#Ìj7 ³Ú¬v£0«D¸^ãvT™ÕÙHþ±?V˜Õ 4ßgBqeVûc…YíFaVgãC 1­2«µœ1«µ­Æ¬ö’¬v£0«5‰‰1«½NaV»Q˜Õ ¢Ž!«Ìj/)Ìêlün~cV›±1«ýWaV[3•Yí%7Íï¢FaVk:cV{IaV»Q˜ÕþØSó»¨ñ²ü.ÒOaV{IaV›Q˜Õncfõ‚èñ{ÿ̬”4ÉbƬö§ ³GÆ8õeVûc…Yífµ?V˜Õnfµ…YíFaV{k…YmÆÎ¬¶QUfµ½deV[÷•YmíTfµ…Yíífµ—fµ…YíFaVgãÍ ³6H˜Õffµ ­0«äb8"O/¼0kAaV{IaV[G”Yí%…Y½ «è3d^”Yí fµ?V˜Õnfµ…Ymu*³Ú«ïƒ?¦fýç?,ßþõñEÅ[ˆÇÄOǹ×?þåýã»çÃ^ÿô¿þðo?‚Œ#+ã7!poß#plÊÁñ¿±ÒëèuÖžþëŒIGñíjÿÛ†1rÝüÃÛîúlÂû—¯mÁûÖ5šÐÿöÅ£ðŽýûþû_û&Õ×¼‡Ö€÷Ï_Z÷ö´¿|hÁûÌ?ÿ–Ü.å ¬7b€ôƒÌ†.õÏ?7ýZã?¸:ÿöó/ßþÛŠõí?ûïß~þ¯ø—ŸmMW½‰í Ž1¿¾¦ãjzÀ.?bK¿bšþêšN¯éKÖ̱RÿÃçL[œ¿tÍä~îä&}Ù&!I¼–¡àKÚs–Ÿðc½æNíaãµgªÿ¡)í÷/j2uâBç¿=5…_ü¬d}ÉœEí>g¥_\î\ËÓkYgg–˜KÙ e¿ý­j~_oâP‡s`õxÿF-›wæïÔp‚ÐtÄsÃ;Áì:Ÿe»ÎË«ÀÿÔ þÇŸÿç/¿üñ¯ýãŸþô§?þrþõþßÿþ»<˜o¿.dwÇMðR:• ±1ÜGt<íó˃«‡‘*j_Â1‹>ÞúíàX ^üùièw°Eâ„ýRâîç§íœ±A0N¶ñ ZÓ¤"ï•6Szêª !»ãFl—(Ï+VV$tmd¥´™ÞÝÞZÈ6‚ÃòÌ`ã^á¯ûÈ cÔИˆçóS¹ö“³ƒ<3ø„ºz†i?ŽÆ ´ÚD"ª"šõž7RûŠ«ñ£Éâ>hÂ,0Nþó šwk-d|Jø»Pg,Gkíɨ¸Ÿ™ z÷·`¥G-DW uÑ}å˜ðÈ\ÿŽáC´MiAиƒ?#±™÷L2ºoXé<Åõ§çš™öš¯kg5HlcÒº½Q@•„Ÿ‰m¶Ê½ì\TÙ·ƒiþZ°oH©t †3&ygÅqøˆ²¼aŒãËç }cª¥’jc ŃŽÉ—dÊýŽú¤ñ*ußcù»Ê¤ã2©F¢‡Çì|ÖASì§+^N£Æn1ÞÛ¸èŒIÍqñ¦@7mÑ>û"Néœë 1Rã¶fßg‰ÙZžrõÚc…o_^5^ å4’Œ,µü´èÒ—‰¡G›ÊI1[i?fÇ$C\s#IÅ’ùÓËÄc£äöDãïñ÷AXßqpâ,l7¤oëTF•ë6ÒšíèÙ ÜFã UãŠ`½6•wp*·‘¼a(ƒ%©ÝóÅûŒ%©-Ëõy¸l| gÔVzÞGm*ïg»{DÆŽ˜³NqÛ!…†·ïˆû¸ú²¼ƒµqÎ¥#¾‰IOO¬/ËaÜ‘¦¾¯öG-ï•ïP:×m¯ÁÃ}YÞ—ÁÞ9¨åßco[A©£m.c6,ûˆ]ßk4j_–éãu/[ó~çrdwÏ@¶NÜA勞áó[_•}v &}Pùw|¿“» ã³÷Uy‡2ØÑ¥õä¤@ÕWº}UõkÇöü~Z…Ã0pKÚØ,áÄÂÔåñÉkÕ‡bÚ|ò@9áì+rA®ë ÿ[ž ê*yÖœU°ƒF¸ic™³DóÎÝc»œÁ*ñ" ǘűg 5„zƒoïõÉû˜³XŸzSl]zì 9ÏïÌâó¡ÈML7$<ìë|¸äТ³8VäÁšq¡0‡êA×Èη‡'²ìeÎâØžGŽrgøLÁ°mÊýB®º¾ ûô¿ ÊÚ%FžÐµ”¾ §öœò—º w,È ’M‰’ýº(’¤Ix¡ý©1©¯±Üi÷ë™aÖÉ;œ¶{,ÇVî^(˜gÙõÉðöL¥±umî‘G/ Jè ß¡QwHÀ,c9Õ÷9÷›Õ9°ßKÆgËqLypîºwþíhMOdfíÙÛsQøGá„·êq,ÇþX޽Oí omxÉu[¿G¹ZøÃKÞÁÐØzн½fÆ«1H©e¤ÔÞ‘¨n8ÉaÈÚü ¬šé Û°±ŠDê|xkÓAaýêù"ãC¦ÐK/ˆ{úÇVeý¸ÇŠlC€W>ýc«’Å}‰³fúÇ+Ä FnœþÂôm¡ƒs?ýc/y‘ºÅþRW:Iè¨ÌZ•yY’HÐTåƒÐ’6‘k\ÙH=v€W3]d[%A¬™.²µ§ßé"Ç ìÆÍû„ñ¾ÞB8Çt¥™¬7-ʱsŽü5¦§—ì% él¤’á® /.Ï5ò*Á6£R+ kæ|9À©™^²—¼)Ø9•Œãôô’­$85ÓK¶’PÉš^2Úe¤#: „5½äµºb]Üíˆã\™^²—ÜIùÑCðXÚy.vž*…Ó&³½f€\ÓM^pLêñ9¸f¿¦— ÚóŒ&<â87C°S7Ão›^²}?å¦Ð~Ÿ¯a|¦›ì%IVÍ],­czRЯ\J t•|hU–öœi[ù±°ÝS¯/Z×€j(AáêP½á»ŸªJ ToØú¦’Õ°víˆTƒS¹‘‘Õ`cÆIi„1 P ’çØ¡Äw‚J".Tƒ;z"ó' FF'§®ŒTŠ˜œFˆiŒÃãÔ º^¢º¯Ì85Œ;æNó#§ÆåÁÀ"ãÔU ”aAÆ©+)8u[§çÌþ N ão¡Çù N ãÆ€"áÔz-85ŒpÁºF–àÔ•{þb—#œÆ !eD2óƒØ¯æ÷ N]™ÎwŒ}s3¨†ñŒ·Ú‡¨†1ŽHûÑ|>ªa[eb Úc˨-ûfŠjO†™©†ñƒ·OFªaÜ`Ä0uµ€_9Àf‚©a\à:}‚©= ^`j/†”¦†ñlÎk$˜ƃ‘Q†©+ß=^Iw'¦†1ŽP¥K L ãÊ(%ÃÔ0.ŒF1L þÃà'ÁÔ°] )1LŒ SWZ rW5˜ÆCÚ—#™~&œÆýÆþÒ>¸Cô ÂHH+ÃÔU”€Á$‚©a[…b˜a aBŒRÃv3¶Å(uw`8Qêl$”º7L•vÈ(5Œ'a[ RÃVp_Ùïq¤†ñ`‹Aêj<áÙ-¥†‘ô¤†mc¤ŠAêOÆR»„€ÔÙH u5®@;0ÎqGˆAúàµ7ì{¢Ô°ÝtÜ ºqâíÕÙH@5Œ£ «Î¿sDi]V„UgãĪ·*¨0Ïè‚UW#cqŒUg#aÕŸŒ«†qgä±ê*ÛApƒ`Õ®é!Xu4bÌ‘±êOÆ<“ŒVø@I·I X]¸rkËVg#ÕŸŒ¬FDÕÈ$ƒÕ0Þ 2Z­®6†2®þdœÁ<ÉHp5Œá®/mÜ$°¶“QS«SAªsÁ‰UgaÕ0›«ÎFª?gHQ2V]9VVÃxT#`õ'ã«óc9ÄazU&œÚõn§ÎF©剄SçB„Sø„%8u6NœÚm‚SWEÆš§þdœ¡¾ÉøâÔ®$8u.D8u6Ní©n¨FÁ…-Æ©s9©³‘pêl$œú“q†»‘qêjäs;ãÔÙH8u6N½Õ3#ê¨Î% ¨†ñb”êl$ ºqÞm²Tç’Tç’Tg#+P™êl$ ƳÀ¼2Po©V P¾+ª³‘êOÆ•l„T§/„‘êl$¤:}•ŒTç’„T'#CÕé± UûgÉHu.HHu6r˜¸?•êd# :Ù§N¶‹sl›Pê¶þé¶ R§õAêl$: ¤N_y©s!©s!©³±°üœ'HV#©³‘@êl$: ¤NF©?Ÿ–d5HÖ1©sÉñÜH uz) R§¡e”: ¥ÎFB©³‘PêÜZB©³‘´ÔÜÆ(uZ’%t#•$”:ý[ü±HÝ߬-ih5²§4êgB«±QP°4&huìec3¡Õ~ ´ºê΋E«W(NW›ÁjÓ‡T°Ú柂զœ¨`uø$….Ú­†º¤û‘PÐjølÝCþ‹£Õ¦q¦hµiŠ)Z ¡'>Û9Œªc·^C\ÑjSÜRVõƒxýB˜3³ª“‘YÕOõƒF¶ aUWyÏ™›GYÕQ,ø½Ÿ™…U%oœš:”)¬jèÅ\aÄUꀗv,RhÕ7…Zeo§ðª¸æD ^õ³T„¥ßÜ ¯:žà<„ ¯:NVá”LHIxÕ8°3òª‘Öë™94”XíA ñ±:‰9Ÿšð’«£ä~Í\!J¬†8,Òa´ñ^u´§ºAfµ«Ó0±:¾¾rÒ=«-lS‰ÕÜ,"k ±úèIì.!V[’ÖA¬N¿3±ú‚ë0¯@•X}aÊ3Y™‰Õ–÷U˜Õ±¤a†MHž™Õ:;”X]/æN3±:™X•‰Õ(¹Ì›E%VcP7bz ±á׳œ™Xu>×¼vTb5ï$ޏ0«u^)±3`e3«!U[Æ5˜òª±DU^µžðª1x‘À™VéóÌËg¡UûÔZµЪ}t„V}Áž×UVíÃ-´j×yW«´ê˜×3±J¡Uûp3­:Ù˜Ví#*´jìÛ…™ÓL«¾ÞÃë_ §öŒÈʨöoJ(Õ^Pª¡}¼0Ÿ˜)Õ¨s#¾½Pª}1NõãäT_ªeˆ›9ÕèÿCœsáT§’Ì©¶)”êl›”j·£Ú×aT[9!Tö«N/¤ª¯W1yܸ¡Ú_±ªSI&T§’D¨†m£ !TûÜBu¸­ç6S“(¡në…eüÉX5ŒegP™ Õ^RÕôm—:=‘¹Ôöš„JíoB¨ÔÖúIÃ&õ…õŠ8ñB¥®BãÌ–f*u”\.¢· •:=–©Ô©$S©“‘¨Ôx'³¥™Jí…J}áTID}¡R£äJAçî¿ý(J½ ”zÿ[ jlkûúæ—úÊš0C jSUådŠ pPIEù#i†°òG¸·×C"¢üñ ŸÍ1ù¢ü%d¦º>Dƒ·Û-þ€`9V×%³ð‡£Œ"ü%c®>*üãÈlqíð%¿‰ð¤Q–™ªT…?±-7)‘0D£Ó1³û)DíðŒ@ԀĖ™ÂO jÇ®¢¾áµÌtz Q;<%u”z1ô-:DíH‘@Ô@§°/H˜!ê0nø®5$e¤m¼&F¨“j×¢„ÚE¨ Õ„:¡¾«K=¤á¡ÆA=ÖŽ¾Æ(B’ps3Z j«S!êx7«{DÔ3×™AÔnˆ: ¢ö®D½Àå-ãNA!jh ïC‹Ý jìÃW µ·G jkBÔQr£P…¨8ïóþ§CÔTú†Q‡ç´S„ƒbÔÞÁ¨œ1ö}q0Fí6Á¨Ó¤bÔ^¥`ÔþXÁ¨ÝÈõõŠZ:{ƒ¨mØ¢ö‚ŒR;J)(5°Ý…´h¥¶]Qj{%ŠR/ÁJ#K(µÌ‡©õ±S—q]þ„aêT’qj/È8µTœÚJ*NmÓ`Õ^ˆjªS[¨NFª}^ PŒ TûÅ€ÕŠý+Níð·àÔZPajܦ–y.0µ”S'€[`jk ÔÖE©­ £Ô©FA©í@ê^ÍÔcR Ú TÇæ·’+8µ]V(NýÀÉšá\ŠSÛi85D°¡œÕ<:Å©uÉQ˜Úº®8µ7„j¯Q€j/(@µ¨¶U Ú:"@µ¨ÕV¥Õ^Rjë¦"Õú†¨ö‚T{{¨¶§ NímœÚT»Q€j»‘ Z;b@µ´Õpêgîµók §FZЕ8qj¯Mpj¬ŠSÛ•£âÔzM)0µ-ò S›s©8µí)ŠSÛ=­âÔ¶Ê+Nm[’âÔæ*NŒŒS›ç©8µ‚àÔÉÈ@uj-Õ¶ *R%y)w?¿*Rêd¤:ÕÉPu*IPµ¨Ú Tí£'Pµ÷¤AÕ¶]+Tmû£BÕ>n‚UÛ¶+Xuª’±êÔLƪS«¶ÝSëôX¬½›X{I¬e7W¼Ú‡@ðjÁ«?ov”¥›ŠW[{®Ní¸Ú 2\š#xµ$¼Ú+¼Ú*€u*)€µž1`í£€µúdXû°v#Ö© XûÄQÀÚÛ#€µ¸ŠWûS¯¶½GñjõZ ´VÍ@kk¬¢ÖÚX­½  Öê_(jí뜠Öa|¿rÓŒZûþ"¨uz,£Ö©$¡ÖnÔÚWlA­½±‚ZJQQkÅI~Ld%nõo«Žm3šújgÈZ35 dý&Q$]d«v¸VŪ,&õ…L™$_*µÝë)d}"K^¸}Èçà}æÔV̺¾¾“”3³Ž³A}¦ð fñ„ˆoG8³Žù½½‡²V'cÖùÉ÷õ*˜uˆotsèQ3f} Ëd@»Ö¢`Ö®º,˜u,s÷vSì„1ë²÷|ׂY#)ÙJ4NƬ¡,Z'ŒY8¶H*˜5òe—¹´)f ´p™ŽúÀ¬c3‚ó ¾ f½³a¡Ƭ\"ÛØà\3h½ã0H„1­¡ÒÊÒZïXÅöý³\5ðY–VÐÚ•A´6I]Á¬MˆN1kW\ÌÚ³6q"ŬM£K1kSoRÌzG€Ý{ fmjRŠY#ß!$Ï[䪑¥–žÊ5´‹0]û7+5†òÀåx›# YGAìWcr d½!4âV]@š!ë “ Òm%È:ŒgX:S±!Ö©#Ö1Sâ>Àb»q"ÖÕGFÀF#² bÆ Lµ.ØA€5oç/œk/Çx54HýOñjäÉ yíw‚W‡q‡dÑÒ&•àÕa¼›²W5 `½»y@ónF¬¡#’Î`#3bÆðO[]ë ®ÎÒVO¬7„l ­fÀǬVË>0é Xã±@Ï.»Á€5Ö0£îv½"€5ò“âN´´)'€õ†ƒÀ=ï °ã‰æ=çM€5n±Ä9óî¶ XûS°ö§ `íí€õVGq’G°Þ„½÷Æ Xc…[ájÛŽÖQÝzÕ‹Ë̬NOeÀ:=•k4™úw,€u£÷óÎBëôXF¬±QZs…¬ñØ“ùjF†¬¡AöÀÍ:>0«ã±’e\Ÿ ë0ÞôxÚ]€@Ö鱌YoȤÓÉxuz$ÖÀ ¾te¬7!fVk¬ÑåÍëñ°ã†(˜§]ü2`Í ´ÚŸ(huwJ¡¬h5\ ÊÛÐj€'s¸ƒ¼ŒVo&œ—.W§ç1\%O8ýî„áêT%ÃÕЃÄÞ½´þ1\žÊp50¢‘÷ÐÐêÔ†«½#Woˆåºæí‘ÀÕþX«¡iÉ®¾ \ÆB ¬ù2W[G­®ž$X1½9ŒV‡ñÆié#\Æ ¾v_«®Þw†¯¸ÝÍ \:Âp5`;D †8ÁÕÚE«Ñ¨‹•†7\½!ºYsŸzÕ¨±9¢¯±ÁÕ5w”:–´ê øÁÝÅõ­Fæ¢8«€äÊ­†º <Ù¡HÍhµWÈ`µ™‚ÕñÔ˜mj°§ 8ÖÇ'RµÍ «­“‚U{•‚U{•‚Uû‹¬Ú»)X5 `D5>¾bÕ>²‚U‡qÇébð¸«FW înR7N¬¢ŽÐrz–V'cÕHý»ÈÓ6yÁª½JÁªa§“ò«F•g•?ºqbÕ¨rpëÚp竎Ÿ ­U¬:ÕÆXuª±êدq„¬/y{Ï—¯±j´áÍçP¬:õ±jpÁªS7«ö:«Æ9üXG4—bÕà¤f«ýE X%O¢º¸€Õ©N«S V§:­Ž’1G”¢hujÃÕ0RÌ·ÂÕhîØšË&x5ª„Ñœ2Å«½=‚W‡qÇN½<’žx5´M¡‰gp5TM /S¸:ÕÆpuªájq†«½%ŒV㓇ˆàv}Ыöñ´^ÙüJ…«­JA«}* Zí# hµ€ Õ@ª ±·w‰lF«}Ô®^ÁG=J'Ž*\]°e > W§:®ö1`´Ë0|†gÒ­Nía´:µ‡Ðj¨ß.Ôª!~‹kÝjÕ>êV£98ô7o¿‚ÕúÛ‚ÕË‚դǑSÇb¼P+`5ÈÅD(¬º`º¬CÝõ×cÕÉø[+bɚÚX°ìF ²$VŒ‰¶¯`Õq» iã Ví!ª‚U{ì£`ÕÀäw’¸¬ñb´,:×™±j }R¬:J®…ÔZ«¶ØŪ-œL±j‹ß¬Ú¢¦«öÀHÁªý©‚U[¼ÕÀª-,L±j %S¬Ú‚_«¶¸Áª-¤F±j‹ R¬Úâ4«¶(Ū-PKÁjı,3ãN&X3iWÀj`$ËL(¡`µÅª(X­A7ŠU§ö0V}EýÇ̶ XujcÕ©=ŒU'#ƒÕ£`5âœî™ŠAÀjúÜ3Ù‚‚Õn°:ŒÛ«þ P3Xm5­¶`E«½™‚V#r!Œ_Ðj ä¸Ú"™®Nía¼ØÛM‚W§Æ2^Ú#üjë‰àÕ'†ûàP¿:P\¿éR‚µõD Ööª¯öž^]ñÇc^" ^}NºZ¼úBªîm‚Î W‡mÁÛ„o†«“Qd@ì0\í£®: 6ʯ¾êéc*a0\méhµw^u@ìU Z’ Ýe©ˆ ¸è€,œ8'×Yt@Ü(: 6ç”^mzµwDèÕö¦”_íF«½—*b¯Je@Ü(üj7 ÁÚÆ@k›ª¢$xu*(k怜½±L°6›2¬íM+ÃÚ°vã˰ö_³¶IÖöвö†µdÈÚ?¬“‘!ëó©°/Ó¯'fíù³¶^ dí6F¬Ý&üjë¤Ö>:‚X{?²Æˆ ’ÁvfÈÚ‡G ëôX†¬SI†¬½›ŒX»k5)`Æ h²Sǰ¶w¥€u*ÙëøýÀmÍ€°¶¾+bmCª5Ú2ÇEëôPF¬ÓC²N}`È:õž!ëÆ Yûc²NFƬý±‚Y{W³þ`œ˜µ ž@ÖncÄÚm X§¦2`ƽA­Ý8ëT’ëd$ÀÚ§–Ö^¥Ö©$ÖÉøÖé X§2 X'#Öþ¢°NU2`m/Cðj·1\í#*puª‘ájq•ÛÔŽ®öÇ \í¸Ú‡Gàêdd¸ú´2U®NF†«“‘áj=A«Ó0ZËhu*Ihµ7GÐêdd´úÄÛÃù¼áµ Vû¨w´Ú›)hµ7SÐêT’Ñjo ÕÙ4ÁêÔ=«Sk¬N­a°Ú^¢€ÕþT«}Ô¬N%¬NF«½±VûXÄ`u22XŒŒVã@¡³{ŠV§’ŒV'#¡Õö¦­ö^ Zí¬ö=ÓýXýûU«¯ * Û~uÝŒ(BÞU«w(Î}V­Žïf}ÆE©Vƒ¬K9 UµÚõ®E¶ÚRñ$Ùj&–ŠluwJ›ª²Õñ/˱Öñ¸è¼¤²Õ±8Þä1«lu7:¨lõLpWé9²T¶úB&¸g(dªlõ…ükeè9«lõØw2K%Ébã5“H 'YŒ·ð ™$)SÏ$‹ ˆ™S„‚s,†©@ɯÃÄ’c1ŒÇÙ¯K‚ŰÄÔšDpI°¸H鸃Ä-Ã"¦¨Üæ ‹1ÿ¨Hš3,&#gX\0‚ Gvʰ˜lœa1=•3,¢$”fúÇ#]ï\2,¢›LŸ• ‹ XÙDu•$‹Þ I²¸€æyÆÜìiN²è ’$‹ø´™—+IÃuå³SÙ$É"J‚=0`N²è#$IqRŠ:f6DN²Æ£’v?Ö}Óð¼0“$‹©+”djñû>u$É⇗YÄœdq)õš2Ù’dÑ{Ò’,¦2œdÑ…ë%Éb*ÉY½œeëw§ 5gYL9Íb Q–%ËbØ®e›a’e1UÉY½#’eÑKJšE¯SÒ,¦’œf1•ä4‹©Aœg1•ä<‹>z’g1•ä<‹0 Uœó,&#çYLFʳˆv­SÿIò,z{$Ï¢äYLÆ–gÑ«“<‹Œ3Ïb2ržE“œg1Ù8Ïb2ržE›Ì’f1Ûf–ÅìòrŒ¦’eÆwÃÌÂÕÙÈY“‘³,MªÄ½®²ÁY“‘³,†qÝ19ɲ˜Jr–E”¼†—¯I³mæXL5rŽÅdä‹a\@uŠ)”cÑ JŽEkޤXtgXt[Ͱèƒ")½gX̶™aÑm”`1õŒ,úH‚Åd䋌3Ábª“,zII°˜Œœ`19ÁâãL°ˆ,,7E>J‚Ådä,‹ž¿E²,¦’œe1•äT‹©$§ZL%9Õb*I©­ ¦Z´‚šjqÁÖç…5Öù©-Ó¢—áD‹ñ—놣g¼Ú_•&Zü`‰SK8ÓbêgZL%9Ób2r¦ÅÆ™iÑ’i19Ó¢·V2-ÚÈJ¢Ål›ySœgÑ ršE·q–E·q’Ål›9ÝÆ)}zpŠE+'ÝÆ ³mæWtÛ›^чKÒ+z%½b2rzÅÆ‘^ÑÂÙS9ή˜Œœ]19»"Œ[œl?!ÕÞI®è%¹¢W)Ù“‘³+z•œ\1äìŠ^³+¦‚œ^19¿¢?•ó+fÛ̯˜F‡,z…’`19Áb2r‚E¯²'XL…8Áb2r‚ÅôDN°˜Œ”`1=•,¦‚œ`19Áb2r‚E †ˆ=%0z&Xü`œ “‘,&#'XLFN°˜Œœ`ñƒq&XLFN°èƒ ÃXžåØÎ Óc9Ábz,'XLFJ°è6I°˜Œœ`19Áb2úÇøcÒÕþÃòí_ÿ°–5Œû ¸ùÄ7òbUøã ëÁ^ÿô¿þðo?‰Ç›~¾ }{û}ûcS~‡ÿ•Fÿ~¡>E•­Ÿõ@áð¾À?¼v¸í£þ÷/_Û‚þrû{þâþ^ÿÔs[~ÍØ·Úß?iýmŠ÷Éþ¡î÷iþmˆ•jP[õ(À~:ÉÇH4çŸn²øÿ´~û'ȶ|ûù—oÿí?-ÿùÛÿöóýÿüü+«Y¡%/\á=Ûß©g=FEÑ®ß^U G>6è0]¿¾ªÃ«ú²%ãK–)ûL¿~i,tùùe{Áúܸ“8/¨ê{3ÄøEíÊz7,;µ‡_Ôžýu¦SSÚï_ÔŠTÝó ¯NMaã';yu_2gQ»ÏYiÆÏÙÚ›³Úž¯³µ=sÎjS¾lÎÖV؜զ|š³¿b3>jx½p2…¹ÊÎ»à€šw®å[ùÁÍØª9@±‡"Î'߯¦üÎj¶ó[q¹s5O¯fý½ÝYbF.e»Wˆjüz~gn\ÏÝKõtÿF5›wçïTŒš1kžåúv\Å÷:/¯ÿSŸÿ?þü?ùåýëÿô§?ýñ—ÿë¿ÿïÿ]^Ì·_›«Û9TÀ@w¨.ße0ÒìF1ëSñQ“U(i ê?ÄAÕh¹˶Q,ì~‚žrw~CÙv w ÊQMÅ¢l…"Z+#P¶Ea‘ &>no¡ž2JÞ“ãó@ývæÖ-û2Y<áýŸˆmü~äùl2h ~Ë€¤äÈ‘ÝÂâË~LÖ̓HF¤œÛßÁ‰.^ Ê!‘P ®-û=©3ˆw¾&Û¨ìÏ$ÇÜèl¼—þÌxýƒþrG_Ë5R)Åç3 .w¥P i¬QÈŽÍ#ËR׺å8'GñÕ+ùÊíp:Â~ã2ìŸ7z«€½ÐQtdã.Sß©”u0IpÉ×¾TÊ6©"ÈZ´L•¢R oŠgá¹Ë‹•R&Û# œ¸òo„•R®ÁçH¦g26©©,#=wü6I¸×,Ç s¬Z†®ì2] îê;xŒOÐçl\¨‚mµãÃ7ÈDSÓ§œ×$G ¸n÷Ýl÷¤?@€þžJHë|r‘j›ÒbA¾«Elû>åcJ Žî¤„)>UÇ·ƒ®>.È*ÜaU¤ÅZ{]÷dÄónÒ_-¸î¸(–Ûeªî•ª™ß'+îêgb´Rû3'ëyNy¸‚\ó½Ä¿»§PY‰É;nîãßA«K¼ÅØÍËù¨b—ëŠÉ?ïߣ\AÒ&ÃQžuܰ#ÿû!ißÛ³ÍKt¯®"Þ>Ü­öuÙÃŽ©2Wp{ºŽé*Í{žy£mßnôhÞYÛô9—mÞJßÀÓcMk:'®·Oš®ÏÔX;ATë+¬Mƒ³’*çtå×}.÷¼örë2pMi®÷†}}µ©pânp¬¯ëO¸.j1€gìZ3¸¹s`~ì`ó>Óâ™t…¢(´¾òš}Ʀ5.U1 ‘ÞÖåÚh´¾nçdXž±e‹ÑìÆÉ:¡ Iëë “­™Îy»©‹á‰Û±±¼ÞX4Ö.~râþk›vß§ ⹯ãò¯±ãO3íó®1²! @S+‹¥ ÷‰7<–)œyÆ65n o¬±…7]¤s¿Æ`ý¸Žªöüšžyí3á9¦zaøCób/¸¬“˜uâ6h,°º`œ¸ô ,>¤)ÄyâZg,°1- kÞÔ½ÎãšwlÑÎ};·&¯-š—h>Œe™e7˜f“ y–mÞ…armS‡ôŒ}jÜvy3K™—ZÑÌX;‡vçY®ymå“ ×c=°å­Ý99q“1ÖX{߸«k¬lì±Ìë#Ÿ¯g™D>!Îs^E3c¶iÛøöæ%r.Bƒ¸ùW'âwÇþ ¶ÚvuBåá3V&ŸÃ¶\Ýv«ó®¥ FØÅô¡/À@¯ºý¿ î%o @ñq9’—¼Áñ…p#ž‹—ì¤~ñ’"¬^2¶Iè6v÷@¼dp°hqT/9ÞÚƒ’£Nò’c]Œ¥‹¼v“«º9æh›7â'¯øFö™xTeh>¯Û *OÚß×±wAñ”¡ù¼ÀÑm±â*Cw£ ]ñ•]FOœå*4|UYœåuËÞ»Nгìjâ.¯+Øøx—-ùËëŠ×ŽSVë;Ì+@n,*{ö˜qâY¸°—ì3?Xv¯!N×|fô`‡jIr™×8ÏíX0úK"Ÿ9®(ü|všÄ.ÜC²OœflÅÛ ¦6Òì5?8#ÆØwo޽fÔ!à1`ì6)ƒú[“°·9\§Ô’Äm~ ‡ǵÞRv›ììÙ¾1ùÍá÷º×Õ<ÅoŽ øðm¿ù)Ø ‡øøÍ’ ò¦M{öÁõ»Þ,µÉwF}xçÃ_eß!¥3ÊE|g¤ ¿m…â;G}j5½ì;ƒ”H*â;¿TÇcŒûÎ88“Ö“øÎ1ù¡ãØ”ÅwFcÒãy}g¯‡}ç°m÷}umGñ+¿¥tM;ñ­Ëê;›È¾ó1¡ͧ¾ó®ÍÝ¥ÕwŽy¸¶ uœñ>Ã=m]糪Ð}qœÍgÇYAuœ/|#F¡êttg\çpþ·¾¨ã\0ðâŽóñšm{¯ÙÜq›ÍÉ¿¡mSéßýf†,ÄovwVýfö£†ÛŒ¼ñáilŸÜfvëÕkžC!.³&Ýe>—)µ¯.³9kæ2‹Ÿj.sYgzw™e~˜ÏŒ`§!ÌeÞÖ[ä.³ºÅê2«;­.ó5³ž¸Ç,®¨xÌ€ñr^Å7÷˜åË0YÝbv™cBÂ-êGXs™·'ò£2ÌeæåÉ]f9¸˜Ë\›ÕpNs™wèöS¿¹ÌÇ5]Æ3MÕã'ýæPÚì;_[UuK®3ÞÛ9tpÌu¾jèg—ˆR×A“ûÌn­®3HÄÇLF$®3œnöbÙsÞ ÏŽ8Á¿²çœÐpöœ=NP=g“¸ÏOe$WÏ9\†Ã{ë ¬xΘA×2èÔu¶„™ê:ßx?Ë<[ˆë ï.6µ.ø¥®ó‰ˆ ò?ÄwFÄÐ9£lÕy¶ QêìQc>®èž%»Ô8–0ö‚äRc´o6\v«a|¶}î¼ìW£9 ‚ÖzIv¬QåI)ŠÅ³ö!׺¾‹u™T •KÎ;׸„óÔáTv®qÞáy÷£ {רò9g¢6q¯Qå“r»sûÁ•w Áp•ÉÅŽv¬|C \\l ÏB»ŽøØh¾ô=Rœì˜|;½Ý¦°““l´Ô}v²ŸúQUîæe¯é:ë²±&7;úv"Ü¿< ާ]¤Ã,®6BÏp Ó¾ qµÓ»e_ÛÎëâk?Xzc,ÛfWûìó>'>»Ú>#ØÕ~ ¦±M´˜\mÃTÄÕF¨ß6Eqµ©œS{D\mïûÚÑ…p²ŽqsÀ¾öSgMéWÒâk:%¾¶ŸzÙ×~vœjÆŽ(¾¶Ï/öµíª^|í8Ò/@‡vBe_Ûaòµ\?߃n ¾6VÔX|Çâ6zuL…Áõµ§Æúÿþ·8ÚHƒ=zà'*Gx „ÊîàÙÖ·g¡rœ8pL!¥r`ß3ဂԘÐÏAP3ƒÔáÔÏ9âÛ¤Ž/}žAj¤Q\¦·ºÚ„^æY^Aêp&ž2ùâi[òõ´-µŠzÚx×2>õ´W,eH Ðx©‘>&qÃÔÓN±¡Ô1÷ˆïÁžöRU(Æ}’zÚˆ\mǪìiC eÐÿ¥®õOv;Ú¶rª£½@¹û™#ÛaêcØH‰Ÿ §·×6&7ÛM…©/wGÇ.ÄѶ-FQj#ù°›m Ú@Aj]¤ÖåÁAjÅ…ÉÉÒ ö[WŨ¬5ež£~÷£~y­u|0¸Ò|ƨwÜ —ΰ1ˆÂ+}ûUˆ:†Ksv®u4ˆalPxÏžõƒ·5D ŽíDÉ”Ü1ì%ÝÆ54}÷éŽ3@m$,!wØ#_r‡#ÍLîPÜÑjFr”Üa×üPÇ\Þ§¾’’;ôZ[ði»6|Z =ƒ¨¯Wn‡CÆ QG3Ÿ)r¤µÛ¢ŽO`ì8 Qä)õ…O`¤%uˆz_0K:ÚÌu ®ºkgµ€t‚RGߟBî5£ÔþLF©í=Jmã"(µ!xÊîìRPjÃ'Lm<©•˜Â0µ ± Õ|R-å ©–ëCª™Ö¨Hµ#ÜŒTÛm… ÕÆv¨ÚÚ"Puœ˜®àéPµà¼¿ª>±øN¦’AÕÄ T­¤Z™™ŠTëõƒÕŒ3PmWMT H€jk Õvá"@µÝ]~?ó£ ŠÖ_› Èˆ]}R_Y‚ ÔU6{ÊÆÃOÙz žò޳ֳî1{ÊJ–WOÙ˜êâ)[[ÄSÖ‰¢ž2½Uñ’m”ÙK¶/9^ø:S¤w/YYûê%+k_Üdzñ’Ô.^²5]¼dè¿Ítêê%o¢œ7â%kÄzÉÞö’5NF½d{¦xÉÖÒ‘4\½d1Q/YÔMörì&Ã÷Ňz`tØ3ÅOÖ˜"õ“­â(Û{GÙÞƒ8ÊñÌef~WGٞɎ²½q”­™i¾þÃ3yš£ÓwmÔbw”5W§ùɱßóOeˤ­Ž2äËAjl !q”cSÙ)û¥øÉؼÂ9÷Ñì&ëH½d¤š„±dtÙ7Ov’-ªF#-(‹ds4PÐbÎ$P~™ VÂͤ¬ü€4rOö,ûÈ.æ,N²kω—ìâ€â&›Ö§¸É®HË~²‰ÎŠŸìê°ÍQv-Fñ”s™K4 Õö¨Ì3w€}eWg9wÖc¶1cÙµ¦ÄaNœ!ö˜aÜΉ*ˆË ã²Ì@ ñ™·79Ô;KoXÖž,U¼f÷cÆN‰Û #¢¿&n}³"ñ†l:Šì8o¸Þ(t•=gÏmz*â:ÃxìXfÛ©}g° ”g×…¢@Ù{Þàsâ´Õi^ì>Ãx´ÿ ÛuQŒ(;Ð0–›"_)(ÐÕo ceº×¹1° ÛzßWOL)N4ŒK¡MQ8¸]Ù&õKIˆxÎ(‰F`æM[Ya<‡º‘‘8p#ÎØÚk&qT¥XDºv(˜IaÜp(¾3ô\mÀ‚Ûw§Ž0B¤³§åRǃ“uëÙ)ħ†€Ç÷" SAápXA¡pz¬ o¾ñWÙ‡ÿýå$2ç;>¿hU]aœßåzŽÏL™Ô–áD½ï§V+ÌùAªÞ)51v8Êíu+L}ÅÒˆÐîÜL%ñÆÖ-üŽºÉƒðÞqAÇcz¶ÂîÛÅ'R¿!×zùq(?>°;ÜÆäŽ ä`ß ðr\ òkÄ ‡¸XÊí¸ esîüÄí¸@?ï­{èÌíH%™ÛqÁ¦°9!w¸‘ÉñÔíªÞ\!w@óæ&-—Fî@3‰í§,j0ÁwÜ'ôxGbQC ð~÷]„I}!gK¬LדÅ;`Œ‘(=‡2©/°^ö}œ2„I}ÁÑãß„J ã¢äõKí=.µ ä—ú‚XÍ:bW…J}!¸7Æ´Ÿa„JžÊTêdd*õ^+ˆƒí‹*u·pyÇqC¨Ôx,nŽÚ‡%Lj\‰/LêôTfR_¸å™7GB¤N™Hãñ€6Õ®…HíF&R{[…HíU¾Dj¤ÁNÔ<%R§ºˆH‚àÃî ©í©ê„××=CJÕ OFv±‡3^@ð0`uÇG¼p”¤¸YõÂQëjw¶Ä ¿@ µ²9>â†{ƒÄ O%Ù ··¡n¸»úáÞñÃÃ¸Ò ¡:â×V…)ÂŽø…Uo’¯Ô¿aÕº3FÀ;¯¨®8’Ú€’ØfñÅ¡!J}g+³/î_<__<ý̾x2’/Žì:üŽ”êd_<Ù÷*ÅO%ÙOFöÅ“‘}q7Š/FÜ}tЏâncOÜm숻ýpØnÓ=[ñÃÝ(~x2²žŒì‡'#ûáÖXqÃýÛ7ܧª¸á^%»áþéˆ~á€Y 7<~еo;^Ÿë÷®ÇïߟWúX¶ú§¿Ô?mÕ•õýÓ?\nýSCþÁ2˜½w¿PÿÑU†Û};:þrηýo’iöô?iýçŠe¸5 ÿå+[Іý}óã/Û·kÝÖ_-µþý—8þàgß~þ7ªõŸ–Ÿ–úÛõퟨNj ÿ“šÚ ÈûNn¸-ØàçAÞ›²Çÿ7gæ_ÿËÿýÿü¿ÿ_jÄîÖž¿7öYލ¶Þü#‰æŽþzþ=)øÿóJÁŸ_$b7¢)‘NYýö§ÉúžË{+H¸^ÏÆ !Ê ÞPÍ=ºظÞss ÔÖTiöz¡º„3Y/—¶#¾¡&±GsAdÜZ™Ï0!ì©âÈɼ ms?ŠŸ“!‡ìÒk“‚ —•MÉY·S\J½õߎ{Þ"}8¤"î7=ê­·\•5_Ó™ß÷¬.ÖÇr¼(:’¤Ÿ[ÍZë+K½«Õ\Hcd üTzR§ÑÂ}yOÀµXå"Ôœëe\DÄœœGHó¾ úè®AØÆYMêí{¡3á§ó…­ÎjÃ…Iã{l%f_§#•ýy¹Ï­ ˆ)þ1Þ#Ê­Ktû¨¸þ#4 Ù#†¦u ìxJw twy»~×;¸úa{Î>&HNUúžS~ŠÓÖv^õÆ9>ßvL’ó°ÕKÅ£>Ú­Ô¦?{ůîí1UaŠÉr>íi1¨=U/jÂ7ÞâÕ·s¯—y Ušw¯V§.& íTéBÛY°â¼óô u_*~MÒ;Ç|¯u϶8KÕÑ=¯ŸJ©Ã¶!#ý½m­)÷¼„m™é·:ÝÜ_hK|æ“ÚÛ3a5iPçcÆýtA>û¨Èàý¬íb0ãz£øÎÖèßõŠö‰_{¢>|'{Ek)nbÿ~Ùk-· rÞ†/¶çæÝß,Ÿ1ß+¶ñïzÍ|Ö’AÂv¬WY_rívõ†²MÖøo«7¯¥ÍÕŸF©s &"xTÛs=ËëaÀŠR›«VÛ3¯5QîÞn0}ku÷‚)ß§«öîEQl8pÁ]Ž>[Ãñ›œDØÆ](l±½·÷E³ä­VÑ1å¾j¹IÜ ¿Ü¾Rug½Am6ÞîÈV¹ÕÐËÒçë6Öf˜®©“FãF圮ü>ÛT¿M×øþÏãz?'öñ˜#mºjç º¹ŽuuCòõr^o}OÝÜút•éþÿy;—QrãÚÎû+jh|ïÇÔ€aÀ㞆²-¾-hàÿÇÍä#ÖÎjéôi¨€T§¢˜Éd2ÉàbpÇ|œ­œðñݺ«ÙV©¡´îo{[.:K¿öÁ•½Dgð×>¸Æ³]߲׃çØáõv‘ØËÖ×xö£ŸŒ\ŠhíÔ;¬Žÿo÷ÓWè´o©Ã¦Î¬¸±½®óØÇMÑ}|͟Ǫ€Âsý5õ‡UÁçîÝ•ïy›K¶ûÚFÅåÖ²Å\{lÙÆÞï­ì?¯Šß½÷Ñc%-¶>ðš¤&^·;ŠÎdï±i8\§sìK³ËÊtÍŸ°ët-kÕr“³ÓÎ7©FÔ.:‘W‰Z­}„ÕðÅžùr#±.›[SZ_kaóG3š‚ÈÇ›>ÿuV\ï4zl~r9 {aÓç¿jm²>ÂÊv[ê±£7¯óQvÎ{Í/o.GÑ;ÛPhÇòÌë,ͱ>Ä^Ê[O°?¶p¬®4Äæw>ßc¿Ý_ú"I®4Äæf)œøõKÅw¤!¶uìÒÛW*›¢{‡…MÇÏú‹Q!>ÊoMÔ>îðæÊÞ~í°fS¼HcÙóäUoiG|õº(ð3 ²©£/÷·g¼¨6÷’5«½±|Û±þ™‡ïú²ÝÝåíVyéC,Ÿn ïêìC,Æ®uÝF€y.모•4ÄJã¯ÈÖclÌ‹ò1Ú¤!6·Øzö(cs†b¡ôm›Ò‹G¸KÀBbó#lŠ‚Ù‡O°ŽèxÙ®áÂbH/—Yö÷¯;i]6†m=eylІèc¬D-›J™¼·i¸°áÂhi}–x¤â¸/–%‰¥îÅš)\«áŲýË™ö>Äj^ŽÙùñbu÷ix±˜ÞKÍŽ4ÆÂ¦äÅæºÄ²g^,¹ì?ì£ËæÖÜ—Q»,û®ȞmôÙÜÒ±®:‡kåt⤲ è‡â³'D^6ÉýôA–/l÷pc'-¬ªö¹¥grcñäŽIn,lw¬e·ÑeóÓÓˆùÄv7–oýÙ ó÷}(öhû¼ÇëˆyÙ–ÔY¶UØîáÇšm¨^”¦’ÆÕ©PbNj—•éÜòË­Ò(»Ò ›_©kï_d• cx²×˜PdÓq‡>Ê^ê²ûý°Ûè…1ÔìŸ.¼ÖsJLþ ž,›ZÓµkJ  ³èB Ž,M ¨Ú>ï° ê~¬ÕR±V¿2ȆíJ~,LŠÂꃬ=À]"i>óccŽˆñ& ²ùš—Tú Ëk*?Ô¹¿:òU<áÑeaÓéÛ>Èò©uy8²l-) G–ïM'ÿ»#;)r´ý®W £,žúÑ<¯§V„Xeív÷ôy=ž‚ÇÒ(›¯yë bbùÑoÅ•í£ËæV–Ò×ðdÙ,’j®,úÞ­½éÓ+ù³áÈ6„¶JEsø°t8uÐ3û°y0,1þÓç½UÇDÎ1¾æö×Áš3¯(Ö•Æ×ÔÆ¶“b¶YjQc|M¾Þ³ƒÛü]óÚUŠX.¨É_{ÊÒ<оP‰$Юœ’öMÚÝÔOÚèB´»…»5íG;|´»­‚£Òo©—Lhw[‹üúUwÈÀv7 æ ÕG°Ým/x+³]Ù¶õZü%°Ý-ÖSZ£g¶k¼X»Jcq} n¼Ñ>åÌò|>¼›–wÍÓ1À»ÅÂö\FSgÀ»i…z)‡g½_¼*·/û5W’oé:vöt3ÞèsñÍÏíÜG¼êªRið €WydsW9¯d{ÚÜàŒW°?*>ÕÛñ*ÄfÄõwÆ«ô9 8|¾B^‰½vK2^©ê­ÇRa’ñ½¦=OÌ+[ø(u= Ì+¡™˜ÙæÊr€y£ò:wÐpƼRI' ˆy¥_oû À¼Êã°øÌ+¸«{ļ ™6ˆyKýŸîüƼÒô‘\]å_À¼¨ÑÉÎúÂy¥V§!ÿÑ !ç½Ê)soä5sÞB@:Μ×ËeÎ+É·. Î{•ØÔý|@9¯?Aæ¼ÖÒ༚ô¯hÍGš‡œWr8ñ ÛW^9¯ÔnF‚^É…ŒD~½Ö¿zEožæŒ W"׳êù0Ð[ÂÏΘ¶ç’ô^åØÊV9’ô*±é=ð*PïUeô Þ«¿9êéH¢Þ«„Ç÷] ^»&Po 7‹Ï§{@½zÖÖõJñP±éõ³êµE ^åÕ]oê•LM ÀRÿpÔk¯¨W9~ׄ–ê½Êщ&ãIÔë—̨׫™Q¯¤YξaCÖë¶Êz/‰˜%šY¯„+tÜóùÀÉz¯rD£Mžd½—“õªç·A×X¯›2ë•NÆ>G¿ÜžªdÖ{•Pð_{YCØë×̰÷*±ç-Ïa/ÇYÂ^ lˆç>Û„½«Yú¾iïUb×Ûþ4iï¥hëí½Ê´Øà>`¯ß.Ã^¿d†½vÉÄzÙ-Éz9¨“õê+¸Ç¾X¯–jÇàÀ`½Zþ ɯÊzÙÿÉzù¹‘õÆÅt†¦-–Àzõ qO²^i›:¢Ï¨WáFV`¢^M Ó æ@½vÅLzuÉu¬¾@z£˜’5œ ÔkÔ+=˜=±ÐŒz½\F½nˬW‡m§YÀz£Ü¼%|œY¯5 X¯=;X¯ûsbÄ™õZ»dÖko¬·ðƒÁ÷ÁzUÍ.•IÔkµêµ'o¨×¨×º8P¯’/ÆEÚ˜Ö¶ð+ï6æõÊU¹îOY¯ºÞ1ö¢Àz½š™õz53뵬ׯ™Y¯ ¿`½vM°^×ÀzíÑÁz­Ÿ€õúý2ëÕ5—ARÀzmôë¥SJÖëåìåð Ökã(X¯µ&X¯Í`½¼]E½6‹õ^:³Ø¢Hzõfî±C Òk­Òëåéµ»eÐkc@/ýX‚^›ÚzÃ^‡œ¼êôÒI'è½Ê™êǯåÕYï©+“òju²ôø-R^­~®ŸpÂNy•ž1zYe ¤¼×1f-§¼Zw|פ¼Ê'~FÕ”WAIÀžu(¯ú䙫7 Ê+5Þ½Çâ‘òJÁ}.¢+O53åõr™ò’vò^¥c7ujR^Iünc«”÷ºÓÉJBÞ«dl$ŠWõzFd^eS‚Í$¼ÒMé’¤¼÷‘5BHyõ$Gtƒº¼å•&s×#äUçJÛÈë¶ yIqy%q}öPB^å2:òÞr“Öß#óÛ3ní=†ïñ¢½rðš`¹Á^ºöJ%ºK#R€½¢»)°°wˆà{Óêî{-ܰ÷‹Òì5 ›`oŒu)l•°·½½ª~ì½³Ý ö–ÅP£¡€½gVa#쯬#÷ý°W«Ð‘1ˆ°wVHJŒöOàX¯™2êe ´Ð*m˨Wù’ò\-•H¯ ó"¥‰J52éݤ\£XÃf$½èÿƒôâ[7Ò{Ih¡7P¯EõNùй¡Þ%ËÁõJ&¾ ®/ÔkǪ̃÷*B8ÇÙ"sz‰W‰z•îlë ½“‡&‹‘^ÌÕ/Ò{N£3“ôjÔ÷ÎHz‹J—Õ0Ò  c¤—H¤WÍ9B»IzOù˜-µo‘W‹%D}ÁxÏmÐ'g¼¢OG?=IÆ+ÂÔ—x¥¨¤¨:\ñJ:M'k*â»)Ûh=bHÄk=ˆ×†X ^ݯ5¸#Þ¸_øÈÑJÏ×Ä{ê8î±46Äk]ˆ×º,¯î÷álWÞñݾò°]ɺÅÀÞ;ØnÜ(zU,Þa¼‡r Æ“·gNhׯ°]¬ÌvÏY›+Ù®5#Ø®î7õõbƒ»œ3ÀvéužÑêç ¸kýpW‹Œ9¦çŠë3ÜÕí®£‰’’î¶×¬k ¬ë÷ÉXן+c]½¯£'q#Ö•.gãmx€ LA–÷—JÃC——Ñ fº+Ý·&›á®LýŃíNz’«míjx^=r l7l—fŸ»Jƒ¡l•;ìZ$+Š »œÆªì´ÁßvªÍ V2 ZÝ_\C¦AYršƒi`W¶K)ûê3@¦AâXZ¡Lý&Apª4Hkx× ð”†JƒÛ’Jƒ¡Vñ*Mè>â—ÄË`\ñ.E`¼‹1ÚMJàí®ÚO­Ÿ«‘Ý¥èŠ×ïÎÈ®’Æï½ß‘ìZT­“Ý1A²k•ÿ A¼)ЂdWDâ\}'JaÚxƹoÛÀ¹fε7ðà\ëÀ¹ü>‰s9Lçú2Ïõr‰ç²9€síçz-3Îõ»eœëµÌ8×Z<—Õε~ œkUεªçº-ã\«J¦¹^,Ó\ºLsí’ ær#Ìõ»e˜kï0—w̵¶l0×a®Û°{ÇÑaîÛ6`®Õ0Á\¿[†¹Ö€¹^.Ã\ë$€¹^.Ã\³æÚ  ˜kï0÷m0—­–ëÅ2Òµb™èz-3Ñõ§ËH×.™ˆ®W$]N*ºV @×^+€®Û kUÑõ™è²‚º~É t­3е‰ @×m™ëú%3×õr™ëÚp®k6p]ûÀum˜×u[æºì$Àº^•Œu½*ëÚ%3Õõb™êz-ÕµŽªë—ÌTמT—ËR]ÙžÐUDìÞe­Ó’)’êÞ%éÈÞ€3Õõr™êÞZì¯-Í"©.-ƒêú%3Õõbëë–¼ysßüÖ•&VcàërõC¬k×× Ûo¦íþ‚ëÊv¯5ˆX— 1b]+†¨]a¦½åsµ°ÝSi-ÔÂvålµŒÜµk5Éa»,…¨]P:‹Ú ÷x;k¼ˆíÒ„˜ÝC€æªøÄìæ¿þÑ”êów¦T'Ï]÷G_JûWNbü›Tw“Õ]#VPÝÅ‘5ŠîòÈ Uw•ëoÞÚ·LÕ]2y2]m"Ū¯+:d¦{(ÉýÞcïÀtcê›¶$]™n”‹O¹+A€é*E’6MZ@kfºš€•ȯmÓIýŠöY>cºá&(ÿegÏêîÊ®Õ?fBÝøÃ¨Tê– Š‘êªÎóà”€ºñ¬óÑ‚âÉt£‰”=¤KÊf¦«æk®GbºÅ)¼Ú¦©ïÞRî+â}u©hê»Ô¦o »ñGóõbº’¾SÔ-ôwå_¤¨[èïR>ò»»D˜Ï:wšüî.œÐŽŸ˜üî>¼'Gº¦ ìò»Š‹lË)¿ë‡=EùB~—RÆ”ßkžˆÈMú»iiçL7ló5N¤S7ºvß\3ù]»dÖß^êÇ¢LwÕÇ9Ô* ¿»öäP×D—]7Itvý]JS€Â/^\Íx³f¬ ðf%]êïBšú»ÐØ4^SÙ5Þ¬PK^+g ¼Y0š ¼&C ^SîÍhWCKR÷Ú ›ÖøSà¥@õm ¼¶…¯•ƒ/„¬LÂYTàµKBwÒ˜RÃZM€Wg<Ýgxפ¿¨Àk¶¦Ào#º}‹êÞ]cl¹{0µ)ðZ¹ŒwUn¸ ä»a‹~w¶ ‘ÌwÃýv„×f¾¶cd!ß}Ûßõkf¾¶h¶«œù®²(÷jâ]3eº»êEMM|ˆx7lû>¢˜wWùQ#–xׯ™ñnØ¢%ãaçx7lDzjxxÞ°õOÞ ¯_3^½»~Ò…„WŒlîð$¼þä™ðŠ»Å<ÕÜxÞ°E÷êÂáUîëöœáõ‹e«‹Åëok Þ5zLxÕíX'oØâ è®&¼RSTP=L Äû$.mþ=¯_2#^·eÄëṎ×Êñ {Æ$ZÏéñ¾mƒñº-3ÞU+ԯƻžc…êŒ×mò®%p;×AÈë¶ yuˆoO!ɉòÆÏóܦòÊ$Õ¥&æ!oØ^®«D@ÞU¼nЃòº­R^Ö×êÈkG|yyʈWµ¸Æ©Ly½‚™òªM—HÊ»*ƒÇÔY(¯—Ë”×m™òÚ5AyÝ–)oy÷=¡)¯ŽˆÞ{LÄwSl”W‰†ãý6þÌë¶ŒyuÍæK:çõr™óz=3çõçK wÕ‘Ç«Ó%€^·eÒ+Û4Ž>€ô†m^Çy^·=¤w•Ã8ÂîAz­ ½~µŒz½†õú5êÕÙÞ!tHÔ«1BjM¹7£^=Â4"ùzß¶zWq…~|¤×F!Þ¨Jªôº-“Þ°çÝ8HïZF©õZ³õú53êõºdÖëå2ëÕýæb™Q¯Y2éõȤ×l ½nˤ7lá=žë±Ÿ‰ôZÏéµÞ Ò+ö5ùôš)s^™ÂQî‰ó†)VB}ûœ7l1(õ_p^/—9¯?sæ¼VËŒyívÀ¼n˘×móªšË8‹Îëå2çµÇèÉWGU·Ú›A¯-&zýš™ô¾môZ7굞ÔËrd½n3Þ´p.°—ãÒÞéÇhoÒQxÑ^Q¢>Fƒöª.‰ô¶`[ÐÞX]бTHäX£X(s¬iv‹¾‚ ïǽ—(TWy%îÍ8I{……b \ÅOH{OÉï#J7Ó^žo쵓ր½¬ X¯ŽZž}©BÖ+@µ'цÌzyˆ‘¬—g-Ézy†¬—GoÉzyÚ¬×Î"öÚãö Û]}ÙÔa¯ɰWE澚"ìµÓà€½v<°7®y×CÀF{ÏAiöJ¯êê‚Ú„½šÝ¶µ3zÐ^œ %ìµWØk5ɬ—‡ÉzíÙÀzÝ–Y/8“õÚÓõš®X¯}sTf€ÄiðBL°×ùöÚ1xJ3˜¸Dx¡½fÒ &TP¥ 0hÒ Y‰Ë4xMÙ ±^ˆ)«+UdÖû² Ø+Óö;2ìýÄÖÉÏë’ ö¾l ö¾®™`¯Û2ìõkfØû²%Ø«åoIƒ$³ÞbŠé¢³×Äz%à3¥””™õ~bë¬÷õ‰õ¾ª™XïëšãÛ}]2±Þ×%ì}Ùìµkö¾lìµËö~bë°÷u¹{í©{_åì}™ì}ÙìýÄÖaïëì}ÙìõkfÚû²%Üë×̸÷eK¸÷uÍ„{_¶„{_×L¸÷[ǽ¯k&Üû² Ü«7»¦,O™÷¾l‰÷¾.™x¯Û2ï}ÙÄ{ý¹2ï}H¼×»e潯k&Þû² Þûº]â½/[⽟Øîíóa¼÷U.ñ^¼Ì{_¶Ä{?±uÞû²%Þëuɼ÷eK¼×Ÿ/óÞW¹Ä{_¶Ä{_¶Á{_¦|_¶|Ý–¯7K¾þxø¾.—ˆïË–ˆï¯q_»2ˆ¯l÷ÒÓF€ø¾®9ˆïË”ˆïË–ˆïË–ˆï'¶N|_¶D|Ý–‰ïË–ˆïË–ˆï'¶N|_¶D|_¶D|_¶D|_¶D|Ë (í⺗‰ïË6ˆïë’‰ø¾l‰øº-ßOlø¾l…ø¾~›ˆ¯™ø¾l‰ø¾®™ˆïË6ˆïË”ˆïË–ˆïË–ˆï'¶{ø³r`ÇÆfF¾Å¦|2Ÿ÷ú53ò}Ùò}ÙòõæÌÈ÷U.!ßW¹„|‹íš¯ù¾Ê%äû‰íê­™ô}Ùôu[†¾/[‚¾ŸØ®ÒiãÏñETp˜¡¯lÓÖÁ|½A2ô}Ý)QßW¹A}_Åõ}KÔ÷U.Qß­xËTÙ¨ï'¶{ø³¼_ƾ~¿Œ}_åö}ÙöýÄvÖl ûjJ[dlQÈ ûÊ6oó²ê}ö}]3a_o—„}‹éîY„€}_Åö5°ïË–°¯l 5¾DØW¿ê$æG±ïïÔ啺àöHQ;üÕ¦þÚ’é+Gq‡¾!ÝÀÚ áZ÷¥±ßmQÐçzƒvƒ%Eƒ|ìÀ–yDù†Yé3ûFuygµý€TÐå%Ñ&©œ¸G]Þèz#ìÂtywqÛ©Ë«’c„ÞB—÷eº¼Òú{ʦË+7¥ï[š.¯Ù Ìïaì“B™w–“Ù·–¨Ì¶yljPš×ËeiÞXÑÒÒÿ0ö+Ã56ˆûկﱛö[rà-]I ìW¶sìñ€ý¾l‰ýz^½Ä~_Åü-Í1ö†=_†¿¯ÇKôW¶¨÷yT•éLe[» 9èïV˜Á}÷ÈÛD_¶Dýv™þz«dú[šeìúþúãeúû²%úë=(ÓßW¹A‹©ïwþ¾®˜àïË–à¯÷ó e;ƾr…¿Þ2üõ®éo±õ½rÂ_3eöëWLì×6ìÀ~m$û‹_³õ¼m™ýšÚ8دÊh²_³ýÎÚ›S`¿V°ß°Åh¾]@øë¶ mdüõºdø;Ë ;^€¿ºß¬"üµ=Ã_¯f†¿*6‚„gEú÷ !Â_>á/«Òáï¬ÀÔøë—ËðwÖ7Û£×À~ùdd¿~ÉÄ~ùÐd¿v·Œ~ý2úµb™üκüù5ȯÝä7ÊM)üäW=/…8‚ü²W’üª.gø¥{Bð¶9EiüÚ%3÷õK&î; ¶ÅÀ¦ŽæÜW~]p_ðÌ}­¡Á}gq ½Ü‚}­ñ}­ñ}íZ™úF±=‡ëfêKÔ× Ôן*S_¿d¦¾~ÍL}½š™úê Nm;˜Ð7L×1´›}­Xf¾ö®Á|gzŠf¨z‚`¾Ñ_•n›ÿd¾²]}‡™Ìׯ™™¯_33ß°-{ßEóõ'ÏÌW> ¬ó5˜¯>‡tæ;'䔘oüúLð`¾³V¡ãИ¯lƒ=ùÚCùº-!_¯`F¾^,#_&!ò}ÛòåŠÈ×êä;+¾|œ¿òµÖò•íìACD¾a»Òy _¶4ˆï¬Pþ¡´âk/Ä—ùUH|Ý–ˆ¯Lm$vâ D_®n |ß¶|géP°èžÔyËÁÞtºÀ×mø²Á{ýF™÷jÙ~÷øð^®èÉ{ÃvãNä½ó5gÎ{½\æ½Ì´CÞk¯¼×Z¼—ǤÉ{Ý–yoØêJ™¬×ï•Y¯µX¯Û2ëu[f½öz2ëUSöÃhD½fʤ×^Hoqö«+ÎfÒ;ë$QèµßfÎë7Ê wÖñ¯‘– wÖI´‘i ÷më ×MôÊå™F²[€^=Á8%HÐë×Ì W¶q‚ w¾ÇêÄA¯êrŽÌнnË wV¼mp3Ð;‰v¶,NzÍ–Aï"kïs3è%ê3Ðk6€ÞI4÷N¹ÙèµÛôš  Wd·ÇyèE9½1Hì=³õzñ«èýÓOÓÇ¿þ´…7´m‚µG [ùôÓsð]Öç§ÿùé¿'‹|} ˆxù• âÏ*òCû·ÝRO÷Kzâ¿÷-¥/»×íÿ8FOø»×`—£]+Ð~þÒûåëVö¯¬AmöçÍ÷Ęߖy_ôž ´Ÿ¿ôþO³× ´|Rƒçšú+5 ·]™í–ø–ÂÝY§âfÇl¶Ä¤•ú矫ø?Íñßÿøù—ÿ‡pcÿñã?>~þ·Ÿþåçï½Ó¶ÉÇ ïõ޾óÝwÚ~àNGIè¹,%&û{otø¾`¨ãó߹Ǵ1ù+‡Êô”Ûèš_63€#z%È¿ 6ì¶ÚXÐ÷WÔFÛ›çþªHýõ×ÔA@GÛûŸ¼ؾôãX’S÷=µÜ›=••øÒžªÚXOEm¾¶§–ÚôžÊŠ|UO-u`OeE>ë©{Š’¸^¬¶/-lã"“`W¬ÕÖ%–¼¯9júØŸ)ê7Ï„¼Írik`ŠuQ¬+ÿÊmößy›Ò@ñL÷q¬ïÛÜí6óï} ÌIm:V§Ë_»Ïï}že’d±œs¬dý>‹?ÏߺÇž«rH‡_",nØžô|ø=ôåÿù§ÿúå—?üùÿþðÇ?þñ¿üåÏ×ÿþåw¹,ßuöã¡“¼gÒÞt±ªLñÉU…4ÙÆùÐé(œë¶Ø¦SØý|¨vW‡}tìt>4®Ýi®©‰·kKçC 3²ålŠCéçCµ®úa‘›QúùPõ…jkŠ7ÙÆùÐ(·:ëÍM(ý|¨5µQúùÐY6»8ävÝã|è¼ 8&ehïçC­9Ã6·F¹=Ƭz¢3!-¡Ë^÷6%òéçCµ/ÙÅ*æú‹ò`g;µ¸)ÛB?ªEÕŸ$n2ã¡qŸ:8I‡·k§ÄÅîU;AewS†ø®ât@ë ÷•d'å§á©wÒóºßIpR ŸÂåËW’ `¯||èIP]1†¥:úïŠ<é²) Ø»,¨lCµØyÒUSûa«bp‰ MÛùªç¢Œ˜ñxëüŒãó<µä¼²]ñ€Ñ˜Ç».Ò9ç»÷õ}Í= ²5Ã3úö¿§quj}² ý?ušõlÙ?÷%<¹%¢F¥Y¤úÌ ñi~{’eîÒŒ‰Ç»[B×]áûÑÛ–[j+1—®=Áê¾ÎÚà›–X‰”ZžÒ.~öBdúêéSx+Otå¾.IÿϾ¹‚aýÓPàÉ”†Õä:ìòôo`s(ð$“Þ_ó›QàÉ|×¢¦6wEÈÇXR;,f®}•ÂÔ:,¿nIMvÏÕ:¬nºçªiæn‡Vv)‹vÇuÖú¡ŸDÚ¥ŽÚ}×¹ŒÂ-µì^ta·Ïûëï0:Ií¯ñ.ޏÝãæÈvGgªýU£NQ¥/&IýN­»*|kj©€c´Kê6Šjã`¸®l•íLêÖïÂ6Ôÿ8ù†ç«3 ­¿ò-IJàùöûÕÒŠ>™ö§ÃZ1!äQj‡åã)ød¸®ìG >®+;˾>òÖÇ÷-éÿ©ÈÕc¼÷}OúTeÞcA5­mˆÕ›*=ö¼«íÚÆ›ィƒ§1ö¼úÓhš?öÑeÑXwÒÿ›å‘ö#û1%ý¿9…A<¶¡ÿgᘓþÆX’E÷îc¬]SJì}Œe':äVO£Ïjn©‹BÙ®»±lj  çÕ®y$ý¿áÔ(ø¤û­ôºvŸ ¿•MY#§ñM^ØÒÆWë”§Ž×ÖúdɃÝX>™*g¿5}Åʧ<ÜVL,Ï&“†ç–„ÿìmž{þór‡4˜ÛøÊÏWÊ5G_儯óÖ8¯˜‹ãŸ}|å &qš«¯Öú±˜ZἦV¼¦$ü7^¦Ä`†æ_ î•m΢Ñ%æRÇHÅžL}l´n‰¡wÅžL÷è§i(WèÉœ†Öü2z²¤±5÷…ž ï•Ý´ˆ€¶±ÕZ$ÖQCóÏË]IóÏmáZmyl=äx¸GWÍcÝ]âzWÍ]áÖ©4´æwZržî£«Â¶&Ñ¿YJAGË-.ÛU{knH%Þ+RiT†÷}â8§JöXN-Ý{åºhWöÞî¾â•*ädx¯4é¤TXs'QÈIv^“í(é‡óÀ:&üC!'ÙyŸµLCôÍLJG{Ú_£«(Öxô×äGÿ¶ni\M¯æ˜t`jåö$úÇγ`ý 'e—páã+Ó™Dÿ$¤^WÔÇt%Ñ?.…Ijï÷§Î€l÷™×4s£¯1¸¦ÖÔç•OÇô¾CÎÿ×\ËysGìǶ|Ýã̧Ýs#¾6kcQ·½ï‚‚@tüFoÙ2›€®ŽŠF±nš®’Éè ÔÐ}]¦¿æ. [Ž‘Žlºq!œ½æ?ÐÕ9Ò£æø0¦k'vÀtydH·l;H^ðùò€t·rä®)N@t·IîÆ´ušˆ®èø¡„•e¢«`²IIhj‹e¢» ˆDGªyŒItoÑÆ­eƒè*xm×Ý+ËD—‰¡HtEˆF %‰®äÖ²Nt‘ʼn@÷VtF÷9;ÐeLݘiÎÄAt™ ›DW€0‘0]Ѽ±4ѽtX«ç„!Ñ¥°%‰®8æÚòÅè*„s $º—N—œSÍUD¢{iiÓ¦ÝL[èF±i@9]ySO4C +ÚºtýÝK°7*rÖjf {iRŒ)ñ9¤B 7yÉs/mA¬çñÏ•ï´öœ0๺u|Áí+ÏfˆÛ^5ã8yn<ΡÄõÝç^4¶äNä¹ÖÒ•çjg|ŽWS:€n´Õ³ìÒY/ôþí7Up›¡®^ÅÖSëê*ð9†µ¶;”¡n‹Î¿Ösº„ºJŸuœ-/¡n´p4}ß'ÕÕ±Öz}‡ºêL1¹<³Á.% v/Ë:ÚvÁî¥IµÇ “ìÆÓ ñS‚ÝhÊõê#=Á.Uf v%Ú¹ôpg‚]¹ó[OòC°ëå2Øõº$²«P÷ ÛzdWÅb|í¸5“]*^’ìz¹Lvý~•ì¢ãìú2Ø…t%¹®ä:ïM°k¥2×õêe®ËîJ®ãiŒ®í¹®•×µràºQ.&ˆ6Ó’ëjü>û‰r]/—¹îy_ß¹®N6Ì=õ¹®î7õÃ2»öaìz=3ص `Wkä'Ý7¡n9}ÑS7êRó•T×>PÝSŽ¥ÓÏLu/±‘X]=ƒr¥º©n º~“LtÏ"îÑ\P]jGƒèªnW?ŒC¢KX]¿]&º—–Uý܉.Åj‰tíÓÒµ¶Òµ× ¤kÝ H—êÅdºzg?D¦kï L×ï—™®µ ˜nzåà¹öÊÁs­M2ϵ&Ï•ÖÃÚN)çÚÝ€síCk8×ï”q®Æ‚³ÇÇçÒŸ"Î…Ïš+ˆ¶ sšË™0—>a®ffeCx"ns…©”§öÀ\yh1Uµ=À\«&`®—Ë0×ê ˜åVå©l07ÊE§k™ s½\†¹^ÏLt¥ÆÑ|'ºt It鯑èÚKÏDW·‹uFͱI¢kÕÑå[ÑÕÝt:¿àéš'¤ë7ÊHמH×Ë%¤kͤKçD×›#ݨIx]{ÍßA¢k=XW‹« æë^’”9ïª÷O¬Ë±.X—I™ˆuý2Ö½b—ª³=°®W%c]®Âˆuýšë^Šh»?ž!°îsœq¯jÀºQ*ƽ&uF¬«ËgÄ™±®Vyg˽BªËu~§ºwr´ê^ k¸T7.·f𜩮Õ"A]¯D¦ºXŽ“êÞZÒ­SP]-^Çþ©®iŽ]mRÝ¢˜Ów ‰uµ²ôXW¯Ixì!:ĺÂ/c÷X÷Ö:¯o|ëêÀèÕ Ö•BOW#Õ•ð¦…g=Eª«¦©¡ ê–3¨1s>«kRÝ»ÈÑ´QR]³eª«4ÓQŽØLu£§‰¹Ö1ŠT÷¾‡Â‡S]f /¡ýjìø,j^Ý;éù[%ý ïñ÷â¼ð®¢k÷–I˜xWéG¾iò]­–f¾«G¼Í|7¼ùíP|7¼°x„êj½øîº#ö6ó]J:¼ùî ú~¾»*ÕzË[L¾;)¯ÖÜR=3dW¨%ýwƒ±¼Ê7´tØ„Ý2ö”Ù ÙµLð¼d¤ÙdŠuÌÞu¼ 8Røs¼ÌœCÀKw€—Ó% o<ë«Õ:¸vÂ{JmnAT$¼qßMâ+­H&¼Ò3Ø»¤ ï! ³XPÏoÀ{þÚ‘k¼‡äb–Qù xãòËÞe x™e˜„7lçÞ£ Ix£³)T¹¢!á=ôµNmb&âÏ:záÒhF¼ÌjLÄ+÷¾- ñÅ£­Ýˆx¥eÒД#^‰0Æ"¾ÇgÄ«o7íþ€ñF¹U¯è™§Èxõ|k<{ý~2ã‘éXGwãíœ÷«ÏÌx5üì#úŒ7çЗ%ãÕHnÄöA¼ªÄݽ"ÞËnõÝ€ñjÙxõ JB^»Qf¼»äΖŒWotáî`¼»\†îyñj‘zŒ˜v0^‰áÜ}ÿ†w—ÐÙTÄHyµ”I¯ÚûÈžpßLzíuõ†-+Sõ"ˆzÍÔû¶ ÔkŸ>P¯ÄØ÷ˆ€zí+êµn Ôkc3P¯F¶‘±€¨×ë™Qï&w'Ê=ÒƒD½›tÈê-ßËÒ‡D½aÛu.¤ÎX@½a›¯ÀÔ»IÚy ÛLzín ½ä Ôk5©¨—ß H¯dùïA©Az7]¹Kñ“ôzéµA¤×j’I¯ä/HAÒk/¤W <®Òkµ鵎ҫð·)ÑãLzù ½jÌul“€ô ¹Ü#¢¤×úH¯r*lc›¤×«™I/c%Iz7iK XI¯Í‚™ôÚÒk_*H¯9@½æ)5ÔkS8P/!^~Az5C4×ÂP/J^sh€z5ŒdwD½z®‘T„¨—^%Q¯RSŒL D½ÝÕ'åEí yweRáÀ™ñÒ1$ãÝŠÜx u&ãUå{Êt‹ÜÅg‘»vÉÌxù²Éx¹ ãÝŠ§5âl㥠!¯U7lš¡äå"èÑ(ÁSý(äéºçøûß@xó½jrûá5tJIÁ—¥EB‘Aâõjï ç²"ÃĹ ðÚ#˜é—Š Ž$³"Ùä? ð ê]ÑÊ5¸†¢ Ê£q=è¢ ‡d$§æÜP”A²´K‹ý'à¿;·m›5qøüª”¯Á2Þ"Óû+š j£x=-æ€WªÀ ÿo—ï.¥o²ðîZê\M[œ€WoyÚ›ü<¯zÇ5¢‚xŸžÓ4tÀk¸ €w×"îsM9ÄáÞµøWð]Ù–¤»ï.Öx9 À»‹@O-A¯üöÑüä»ñ~cXi½’|Ȉ|Wà>µœ4ä»|pâ]+¼Ëb »›¦Üg{hw—¾r’qÚ•XøÑ¶íâ‚àºÃí÷g\…uíNêòzuÍ– nº‰î®É¬å12¢ËÖhHw2éR#DºÑæ G®ƒ ‘®]Í.Êé÷f¤»ôBºü$ˆtå@Ý"]|†t÷‰tÃã\Ý3¢{hFmÙCŒèš¤[N#ºJ~p´¬ $ºÚÑÞßg»ÒE¿µ„x‰î©™¡fs0¢«ÄgMmA k¥ÀsY*ã\ ›†sOQà–Ê8Wåî¾}Gœ[–ðwKå@œCx¯SÝEùôÖ¶CNª«°Ý¥yt¦´ï!²]RJ»á•\#(™J»’#Ôån6©.÷QIu7¥—NÊ™êÊÝZ€ºÒ¿8îÏ£vWe~ìés uWe¤\×v\=CÝXkE«t‘T@ÝUtì‚ê*CdŒŽ]ô CÝXIÆzt(d¨+ÐèÖ=”¶2Ýx7—<ê†K3ÓÕ9ºë&€º±¶ŽÊu@]eè¼ú4H¨+e¤S‘¡®ÄŒG >¡®‹Åþ?‹ÚrÛz~"²«ôdýø‰®¦íªìž!;5Ò•8üÔ¦²+40 4Uv㫺y¦Ê®T,úBÏTv­.PÙ=Fq²«l¤ó§TÙ=ôêZ†kSÙÕy «÷e¨ìBulW) †J85võñ­k/Ým,¡îJ=yéÁ¤]cWç&ëÖÙ Û8Qb*» ¬W*¸ªÀ ©]hx’îR¿Ô¥v“´ñ® Cj—z×À»®: ­]ŠV îB×w“hùnÌz:ß Û1r²›âîKUŠ»Y¸ŽŠ»T{)^TÜ¥ì—IîfÕ/0^ýݬNð q0^ù3C%…ŒWRAÛˆ ã5©äÆx•z¿â5¥d ^·eĶ3f̈W{ÀÓ_âÔI%nW墚ˆgÄ+ç¯ ßðÆ{Z®àÂ[2vè0^«&ï¤ÜÉáVèÄ;IƒgDÆñʵ=ö¦ÇDÄ«Ìâ÷ˆ¶âU?TFáz´ˆ7lQ¶ù.D¼“PÅâu[F¼ºfW!á$M´5Ï„מˆw’ò͈â ÛžŽNñ–/7¾ÆzòòA¼ñËs¾>‰Ûõ+e¾[ò½÷à)à]Éd¥óÙïNòGøðnØî!D¼ë—Ìx·,šöŽÄ€wí~໓ܛ}¾ëå2ß ÛœeÀw'‡¡R¾ëå2ßtܽgå!àÕ(=t^x'e”ª1¼²%^/—ï¤Óü[_§ðÊ6T`2ßµR¼Zñ¶oÈoØæ$ôÀ+›‚ž*3®|7~»Ü]V|W6mè7\ À{è U%%àͧÎÀw5}žóÒu3ß”I!¦*m¾+^Àw'Iõ4¤ä»*7tš€w'éS ­àÝI®TO–J¼;IÉhèYïú53ÞU¹¹'g%Þõr™ïNÒ%š&D¾ëuகˀ×ï—¯×3ÞðšÎ¥'Þà$1ÔóîðNZ±ý$Þ°íI$ ˆ×Ë=ˆ×KdÆëµÈŒ×m™ñ¾mCr×n—¯¼È¤_BÉÝp䆊 ¯Šµ$ÈŽxß¶x§òÍŽÐÕŒxÝ–¯“2 ï¤ÌçC#ˆ×mñ†íJú7@¼²U­jîJOgh ïæ2™íz™ wõjÖž#šp×êžé®ÖWKO¸+S[Ì9Üu[†»¢žIæ¢Á]ö°]½“¤¶;ÉÓîšéTÛ¥)¡Ý°„Ç}vÉߌv')‘õ3ád»vÅŒva"ÙÕÝ®»¯%ÙeMHvY’]Ù´>l1¾™ìúý2Ùµj&°—ŽQö8>‹Ø}Ù2Ø¥CH°«“ëCd]‹`—݇`—`w’Õõ°vÙëvYp]þCÁì¨Í{ ²åºÓqÝñÚéOçºÑâ%´#Æuµmxw•pÝp|–{€‡Ìuàm`÷ÝìH`7–Xó:jì*Õ24|v  ð²k˜ d—«a’ÝXh¬×xtÝxÕûÔÓ’ìr±O²«Õɲ ‘‡Lvéü햎܃ZAv'%¼_¶^ÍLvÙ Av9]‘ìNZxö˜$»m vùñìj˜S²½FvéƒìÚв»hàÕY—–ä-‘ݸÞ7%ƒl‚® »ôÉ2ÙÕíÖ3Ü©– -‘]Ý.þnD'²s³<þ¡œé.=Ð]UsŽ5T"¸î›â ÞpWUQàp]|îÒ‹Üõá®!ÜöšÙ®LË £@»œüv_—Ñ®T ‡h!Ñ.g. Ý©8»MBlwÓÔ¡POwe‹O²ÜÝJŠÍ5œðã¹f‚»/[»úí#`ãàîÊ£R“^Îl×óPf¶+Û.Kk«Ìv%àr¯g£S™íM˜)ĵEçv´+Ó¾õe#Юl:¯ÓúOF»&´[l#û#Юls|7í0qF»Ò¿c™Ù£Ú-ÚäÍŸ5´+[,ûÖºQ´+[4EcS™ìÓØÆÙ•mYøooQ½ì¾Ê%²+Û:¶Ÿ@v_×dW¦h†&s²+ÛÔ,#»šŽ£!û9¾Lve;Ï«3´Lv‹-æ²F¾*Ù5-( Ýb–Énq ÆŒ ²kC&»2Íc;dW¶IG…e™ìš¤<È®l@èÙy;Úu[F»²š5›ØAB»/[B»Å6’®íʦöjç63Ú-¶x¥k‹ŠMh÷U.¡]٢ɛì7Ðî˖Юl}¤0´û²%´û²%¶+[ÌÆ&¶+ÓrŽø¦Ìv?±u¶+Û¼ ¸Ìv_6±]ý2>š}~F^àÝb[ºÀ»JYpo#p+óÝW¹ÁwUìºzd\Æ»Å4ð«Œwe;÷v¼+Û16—wÝ–ñn±Í#Þ(ãÝ—-á]Ù¶‘¶x·Ø–Ó•ñî«\»¯r ï¾Ê%¾û*—øî'¶Îwe[÷5•ùîË–øîVf¢–ïË”o±-=+ó]¿[滯K>€·d˸\àUÖ‹[` ¹&ÀëµH|÷mj|÷u³Äw_¶Äw-¥ø®W2óÝOlðʦ7ØØoâ»nJx×M‰îÊt÷͉î~bët÷eKt·Ø¦pîE>Ð]ÙŽ±âÝ}ÙÝýÄÖénÉsr.ƒl'ºk9P2Ý}™ÝõÛeºë·Ët÷e+t÷õÛDw_µHtW¶-œõ†­3ÝýÄÖè®LëÙåÞ@w‹mž»üw¦»¯K&¼ë×Ìx÷Û=üXaƒ½‹kg¼+Û<â¼€w½žï¾®™ðîëš ï¾Ê%¼û²%Äû²%Äûº_¼ÅÖ:°aÞOlóÆ"áN‰æõfÉœ÷eKœ×!sÞOl×ùI‘zý©3è}•K¤÷e¨÷Ó=Iï,]Æyé"¶…ôêó=÷v…¤WÝwÞr´î ½³OõÐ~€^3%Îk–ÌyÃt.s“ "çµoœ×Æ p^Þ˜7#`¼ö•‚ñº-3^«?¯}Š`¼nËŒ×m™ñÚ˜Æk£oY3¥FÉŒW£P|‹Uˆ×ª’ï¬jý” ¯Û2ãµ!Œ×ËUÆ«Ã4± h™çÁxýr™ñZcò*ÈÕã@yu(IT*cåµ)”×>PP^} SÏ+AÊ«ãGãL%)ï3•ìw=/Ìkó,8¯Û2èå÷Îû2 ÌëµÌ˜×mózM2æµÛeÊ«+Ö»=Z¦¼V,C^32äuS†¼ö‰òZ/äµO®B^ûØyù±ñš7Æk_Ff¼nʌ׾0^ûrÀx5«+‡Kݤã•C´msåŒ×ËeÆKˆ×ú¯ÍF@¼Ö·€xÕIæ®1IÄk³¯u< ^íxóÔ¾«~§ÐåâÛù®÷­Ìw­»‚ïæ{îZ?ܵz4¸KOl×k—Ù®Í `»vÉ„ví{Úõ»e´kWÌd—L²ë¶LvÙ¾$»¸Á.[‘d—O@²ë·Ëdׯ™É®Û2ÙõÇËdW“؈õ$ÙÕ$6ޤ“ìÒ%Úå´ë¦Œvéœíº-£]·mþ«SÞýÓOÓÇ¿þtèî1ýòÓoy+ƒ’~ªŠÃÇ^úŸŸþûG²X×¢…—_‰þ¬"?ĬÛ-õt¿¤'þ;ßòT[{Ðþcô„¿ äc· ÔŸ¿ôþ×^œƒ§í_Zƒ§ÙŸ7ßÿ±Œ½‰/zµõç/½ÿÓìµíŸÔà¹æŸþJM6A£¸Ð%€±NZþ\1.1%Eþùç*ñýOsü·E½?~þåãßÿ!¼“üøŸÿí§ùù{oŽÏî—ï¿ÓöÛï$çO¹ŽíÔ„ï½Óáwú‚¡rŒÏçÓÆä/*óS9yçÍ räü”@e¯D¶}ImÖX(ìí°³Õ¶¯©MMÆàé9¾¢R™QêµOÞl_úqlÉ©ûŠžZîÍžÊJ|iOUm¬§¢6_ÛSKmzOeE¾ª§–:°§²"ŸõÔï˜ ‰XmÒ¾ºÊ)ä3Ö«ë­À‚÷5}ìÏõÛ']Üf¹´šŠÅÚËÊ¿r›ýwÞ¦,â™îãXß·¹Ûmæßû8›djÿ×{ùk÷ù½Ï³”“<Çrα*üõû,þ<ãÚ9oš?„{´µžô|ø=ôåÿù§ÿúå—?üùÿþðÇ?þñ¿üåÏùßíw¹,ßuôPÌIŽ :æ¢òðš>¶uù6énϦ֡˜“¾­²wÁ‚C1'}_EawK4{÷·iПÚ+î™w”9¥ë£Ü«â1Œ‹§s L@}œs:ªý³§[=Î%½¶os:R,UÂ~´0»šÃèzk'÷ÃЃŸdSFŒvrŸùc¥îâ(׬}ˆ.hy*¥b;¹/Æ™Ž`Ÿg:zJŽhkZj²C ŠOÙ×&U}ÄeÆ!Pý¡ºÜ³zœw:ªš;µ=®i$_^ãçqîø¸´·ßB;ô¢ÜÏYÙî&”ׇâNº<Ê©„Ñw;‹{(î¤Õ³Jyó ¯SÜIWG‰¦ÝÎN.XúQSeW4O×G9u¼uoêf²Ý]åÔ>ãÔ²Ñ÷”ôýâb®i¢ßÇ=§S §„Oïšø¸—$ñwJ»ï¡ñI ‰?EL}Y¶»ë£œ†èuÇ­Ü$í<Þ4¯-IÌqïB~µËžÚËê´ý¸µðoú(ŠÞÒÙ²Ú-|ÒõQ¼žW:ê×¼’Äß©@¦½éÖ ?éç@£9ç»ïŸŠ?éú(ì{g ‹içøÙ.§"µš>Ê)¹¢Ž>eºº> ûÞ))®rêûþú9mIäOÍy6 [y…}n§ŽpvyõÊxS52Q¶¡ñwJr¨ï„DoüvtyµãÕwP΢uVåQØÑ£3~[º< {s|¢ÊÊÙz¬"èÚLCâÏîV‚Rb¬ÜT vèò(aë-²­Iâ¯l<µ]¥S[&]Eƒê8%-ÛÝÕQì­)þ¤«£¨*Ó³Çp*ø¤K£X™Ï$ïg=k¾’¼Ÿ½° y?î;Éûq<9Ñ>À®#ÜD¶ùÛÚcçíá$pÐ$RÔ‰®¾ !ÓÝ5R¬G.k’÷³¶\¶$ïç¶ý‘÷ã7z.GÒ÷ójIßï,›m¿ †û¤ïg-¹\CßO{kß½:wÒeR¬AwÒeRâ’áÞ´»S.V×I±»ÎIàOQ¤[ßê<xÒ…R´S*Ïå9ÿŸCøóºlIàÏ^êº'?‡² ?Ö# üåÞ¼žIÝÏ+y%u?¦©‹™ÔýÔ+Ͼ/#ÛP÷ã׸MIÜcÚ6'q?û¶eˆûÙפígÝNSûË^œ2˜v•Ë ŒÕ/÷½Çœ¢ÓÓöygUäÉ”×Ü!Kbò4¸æ¢È“y ®¹Ó)ò¤ëûY'(Y§ÑYa›“¾Ÿóûœôýì]ïKÒ÷³Î³¯IßÏm[Ò÷óºìI߯mß“¼Ÿ_òHò~֚ʡ2¤›ÂÃOêhC93êž$þØ·.b>,îw)è¾û°ÑÓbZnš²óŸŠhŠ%m ¼t6©û°‡ŠU¥¢KŠªGb©ƒ]RJé.¬®8”‡/½è.ì%Éø®º,[‘ù;C>µô~—Ž V¢HGKþuMgÒùM¸z†+–UCèï’øs—)‹ÿJº]4~™eJñÁÎCòRJwa£óÎJÐPÛX!(Ý…Õ÷°ôäÏ—8kwa/‰g…+úD¼\Eð½ ±M² ­?á„¡6)p¦»°:ÃÍRÇ<áã¡õwI¨¶«_Ië.ì%Ù½>»ÄÈ» 9{2\ÙF—»Ä·ÕÂk«!ö#Ò54ª¯X[ ±¿|þHñ/Sû»å?·ôg×2'±¿ø^:€ylC쯌w ¦b‚= ȥݕHÖº6½HîäPâ$>Ô$ö'n=«:/eSHJVÛÝÞçÿl‰¹ÿÆ©O#¼1Ål\¬¼6o“_â„w+=§çËÉ„w+‡OÚØ@«uEÊ” ¯R„o#ã ¯ #—¯@ô¬Ñ¡Ò¦Lx7eÙ£÷ר±Lxupõ:»&> ¯lËÕ½)@^ÁmÉŠµ£×ònêUGWÇäÝf©Öë%À»)AÅ:µlмŠ‹ëµ¬º¼Ú,ŒÊo5€W›%QÙ娡mðZ¼+¯Ê­#KK¼ÊÄ>È®à5õ3^•[b–o2¡™ðšS#¼åÔm;}h„ת@x‹,”ò ×ýƒLxýÌu"¼&“EÂËo„×T¡@xíÌ? ¯fàcjÚtD¼aÓÀS5|xMkƒˆ ¯0j|U’—„Wú€GÏfK¶íî©`Ixã%ø$€ àHàº) ðÞB&=%5¯ü™{jS(oÔù¾[²gòÝ[ihÎö€ï–s”}·–|W·öä­ä»òðŽ{páÌwñàïê0gŒÕã!ß}Ûß•»Ø¦m绊lî©ñ]ÕÕõ‡ó]½Ýuo»今žn_ðê¬jóVAw£Ðž©p¦»q³ÊÛèDº{—má–»†x÷Öz£ ”ïÒ !ÞõÈ„7lÇÚ“Ý“ðªÛTE$¼ŠQo‡§ðê ›z¶{Þx¾]û¹•Ë€ðÒË"áµvÉ„7n§Ù¾ ^EÙ7 ¯¶FcAQ×Ô¼ñt׳’|Ðj¼ñkyeÏ1_ò]í—ŽÜAä»Q÷ø¡ƒLðÝ[nØyWMð]œŽ™£ÓéÌwÕüóVw»ˆwíÍïÞ:dãÚLƒîZƒîjøVÒïú1‚îÊŒ%Aݘ&ÝÛÅz¦å²#ݽuˆ¬§t#Ý}¦„¶?Hº{+SÑÙN[ðê`û_q%⼚JF'Þ[yÃÖµêmðZáµ—Ä{+½Y/âÕ²<÷Õ[ ãå‹⽵ʌWü8»D¼ªÉÔ¶Šxý—ñªùGÒ*"^® ˆxÙƒ2áõêeÂ+]ðhª°9 o9$³íuž%á•æÀHgEÂ{•7Ø’ðÞ:ÿ­9lzh&¼êÌí¸¼^­+§£ù$¼þ ñZ'âÖ¼†R5¯Ê)avÀx/å—êé³Èx­ñ^:1MU·*Áx­=Áxå¿Ušð®Õx×j¼[ÐÓÝÙi¦»fªp×:$à®?R†»1I…ÿÞÎŒîÚÌáî]&—¹*=m„»63î ©ió8¯„»*7í 8á.¿)À]%wmtÜõjf¸«Ñnä#ܵÑpת™Ù®Í9`»ö`»$–™”h×>a ][3Úµ÷ ´ë¶ŒvííZcV´kßÐn9¨·5U{¢]Žð »·J ÙZᆱ²«b1º´í]{f]}:1d· ]sæAv욲k>=ènv¤voQË3ãÛ);¯#g É.Wa$»ZEþÙÅB…`7U‘T—+ERÝ[ŽñC3Ñå€H—ͤK5]uèê墔Ñ•*Ž4µŸD×´™@tMdD—+c]/–‰nQ)×îÐ'D·HxÅ\W@tM“DWë륧ñÑ5Íw]“#Ñh8®¦Ç¢kjÞ º¦ˆ¢KI{]‹Ð¥ x®à‘ü¹šæ,çç¦$çwJŒ±‘½ŒsUuëÙ²Ás¿®ciŽŸñ\щF³M<·¨x)♿ž ÿEú÷môø±3Ÿ¿UÎÏÀ®¶Zæg³âvp32ŒìjßµævcI4=5 Áî:—Ê4¼F°» .à`·Ä^Ç»ò.€]·ìê¤õ2=‰EŒëÆ+ŽÉlk|\7<§»{z五Í!×¾|,=µ–q]¾7¶{Jʰ34°Ý©ÀÜæ÷Ûñ:Vy-€ÚÙî¾îÍï7¶Ëèj²Ý¬íE´‹ì †vEÆÎ­GÆíž‚œKÎÁ»ôzðî]4ŒÚ6ƒwé50xWö¾[ï^ÊŠÝûBŽÝåRÎÈ. ÆîÞ:é¾öåˆÝU DWh ÁnÔxŽQýÁ”äº Ï`è.±Š…îÆË‰wv­&™ë^ʶÙóš’ë'îjÕ-r7n7R ìÊ;Ϧ¾O°kUÉ\WÛu#É«îÆT/Ó"i3ؽ´`ëÚ¡»Üá$ؽDú*Šq»ÑBs— °¸] X=_Lܽ•Ϫ¦/"וCµé±¹ˆÛ½´¾_-æ8s]‰<ŽŠàº¶Q®{)ïgOåjq»BN]û‡\÷RŠœžäÇw•èjD»—FÔ¹e  Ú•è>¹+^» $ŒÈ]¶?#wÑÏ-x×m¼ËpGï*ÑÙTk-xWÐ¥§²à]ùe]Í‚wÜÐa,v—‡Ø]I„IÓöla½ƒìEž­%ŶØ]ÿ1vwÑ9Îp[è®ÅÒ"tW”dí X†î20ÎCws°Bw´+!UȧÃÐÝ$²ël÷T‚Ø®²g¡»ŒÜ=вjs,t5,t7šEsûgpס»ú’§“º+_åì+mFïJµeÑ»œán<_ü¯¥“·èÝ£ê)' ß-Ä¡¯œ¿ËX¶ wù1ÝÑ'Ü•|Õ+»úÕîêµKœ³ö@ðJ6°».•î‚…g˪Gº+ ­³o[oÑÍìUGü.{z¦»ñX÷Ýפ»VuÐÝC2TýlšÅïJ–k”6Ó]1¾¹­‰wã’ÇÖׄ»ºÝ¬C;5Dá»ìx€»Êó<÷ô`„»ÖЀ»úx>œêúseªkÏ•¡®×=C]³êÚ ¨«õËÒ×X »‡BX–&@N²ë¶LvmÚµ†jd¥±Ý¢èÖc Èv[ êj­£l×uçPWŠs{ÓÕŸ= ™®d{O^G¦k#˜n^܇éêc“øíC-(çaAºö±IDß´Ž9Ì¿‡éŽ„Üsüýoº1eo5Úð¥Å 㾃jZ Eûu„ãf-HƒèªïÇ[z6hÈs-µ x.³*šƒ !Åð -^ŸR ‚^±Voxj Ü¢7l©ÆÀˆoª1pC”j Œ¾¡÷£©Æà¶¬Æ b¹Þ­«QATcPÖŒe0g5†Kºîg=¨Æ vÚ|WcP¶io£‰î¥ÌèËÔU*Ñ•Øltô¶ ¢+ê©®VE@t½ º‡âîN‰Ò5 ¤÷Ž5iß×Ò•í±Vè ¢+®75÷…tsMÈtÃÓÜo ™éêvá·€u0]`0¦»«s÷¸zB]¥ƒ?[âdƒºV.C]•{&–GþPמPy^TwŽI¦…p‚êòð†Q]<;©î&<Ý Á‚êêÛ[ºbÝ(·o-Þ˜T×/ùP]õê¨]cdĺ›B$Ú2 T×/ªkœ©.4 îИ!Ñ¥0]Þ DמŒDW*ÉGÛf%ÑetQÇLs­L†¹Zž„½-Žs%ŸWîÀ6Ã\·e˜{‰‹NWCÎ0WUYŽ¥Çþf˜Ë“j„¹|À\/–`®•Ê,×(=X®®ØžÆY®—«0׋d˜+½q5ÆÐh0×+Ÿa.Oræê’á*µ£€¹¶Ý˜k½0×^ `®=#u5˜.S×vÈ0Wu‘Üsƒ²æz¹ s½\†¹^.Ã\kjFê.Ú{iÙ± s­©syhŽ0×ÊæÚ¾ `®õ"Dê–ÔJ[;È0×0×mæÚ#<0× d˜ë•@¨.6IsýF‰æǦŸ·"Í5cu!mHškÀXÝEµ%'ÎUv€hüæÎX]»_ƹ^ÏŒsýšçz=««ÜzssÃIuöí1ÁUüÀX]³!V×î‡X]>°®•c¬.ï—±®=:vÙ[°kUAÄ._Cú6¸1d—6ÓÝr´mPÐÝOlïúí2Þµ1 xׯ(à]ûFÀwß¶ÁwÝ–ù®Û2ßõºd¾kãø®_3ó]۳ˣ?à»^.ó]kkð]{ð]{íà»ö|™ïú%3ßµa|×Ëe¾ë·{ø®—È€×€×ËeÀk/^ó x­/QxÍÝà¥Ê"¯<Ù9Öç-Ü7^óŽx¹ž!àö[¶£ï/ðjá[ŸöÃ9/–•¦Ép(¨·o R“áPTr£¤Ä¼J”³Ãü‰$C¼‘胔B’r”dÀp^7eÎËcH¦É€3Á›—^»[/"Ó‰yíp>0/m1€—Rà¼$J&É€8m‚^·eÐë6ð"žÌ$bŠÓ7%@Å,€W„öh1%½nƒ$ƒày ze‹Îü,ê‰{Õ}cà® Êx‘`Õ"xÍ63#wÊãi1¼È{Ê^+¶2%7lˆáaQ±\ é"†Wš w `{$rTò)2Ì)‚÷·ÈîÆj nûwÔ«LG!!êeŠh ^›Î}kØdwMÙ]jž˜ì.Îë5dw'éáµnf²»8Ôf¬×êw+ï†:ëMåÈzcb_¤Øp4Ó@½j–èçu •¨7n·¯GJ ðn<ÞÑùô’î‘ôbv¢înQ4лÉ)ëÝ — ’º»Jª¾¶£ôÂ0Ћ)Ùtwµˆ˜ÖõzÑÄà¼VyÈî"xÏ8/ܽ”n¦ìnÜ;½‚^J7ôÂ;³à]„ýèUš¹«+¡ôÚ5¡»›’ ¾@ï,/©ev~E說éî²= zçžUúÅyñ&»‹jzôn —md7ΛûƒEï¢çµKÒ2Ëw΋æ0Ù]9°ãÈA/œ—¾8Tw­~½ó·¢LPÃ>Èz—…œõZ+’õÚ5Áz¹>&뵈`À^ʈ‘÷}øNŠvhzï|¹ìqà P à«0И‘k/¾äܾlߺ{ÉÔÉ{‰/È{IEÉ{ó½ì劓°7ÿ=H¯=,H/Aˆ“Þ ôj9ð©â®]’¤—+=’^¶!I¯T#½¨ H¯]¤×êi¤D¤×®i¤7÷G'½¸“ôÚ5AzyM'½i› 7uP^ƒÃ ¼|lR^¾:R^»æCyY7@^.Üòäò² yy7c¼ù£!ãE1"ÞM‡¤Û¡¦âÍ—ý‘ˆ7½ç»`±à»œOÈwÙqÈwIqÈwù6Éw‰\Èw‰qÈwYò]³ï²÷€ïr|$ßå#ïr>!ßå¼Ðù.7ÉwùJ)É@J2˜žùî8 ex{Õ¤»|5¤»¯Iwùa“îrÓÔénîçNwóÇHºËGÜ5x ¸›>]ûìâ¹AvÓëæÊ;Öͱ.;°.»¸c]àYú©'겫ª›˜T—ø’T—“S]pçLuí¹@ui#ÕÅsêò{r¨›kéP6@]è¿Ôµr€º„Ö„ºöx€º|7¤º©Ûér!ÒE£ÓÅPnLþ±1],¹Étív`ºfŒéŽg3‘]»`%ºzHtÝ‘]lü›È®=r"º., ‘Ý<Ùèšz05vs¸I좃˜Ä.Û<—ç]ÉsÙ±Lb÷êq±”Øn"ϵ ž{):2–l;Cb÷’pbÓO Â®Ä“ïv։Ỡˆcø.Î?3z—1hˆÞHŠG©g½Ëˆ>KOˆxDFïú5Ÿ6\–³?ÿ(Ð~èÆ…ÔÝˉÒе\itóáûtµT_%ŽÐB~е¸f]†¸è†ÛzŸCž@w€‘¡º›ýÐÝ4ø$EÜ tµ¨Y‹þëèÆZh_: !ÐUsnR¨Ð6]¥¬;úŠ€@7ÊÍS?{M¢ë×ÌD7ÖlKô´zd€D7Ö×ÜÏzéjxw¡q"ÝX“j ˜š ABº±ì¼.TO¦ËÕ%¥ºÓ ÛmõKiW·›–.Œß™®V¿ÇHR¦+ñcA …g¦»jSÅŒtý>‰èÆõ¢ç˜fß‘»~¯LtW-G{z]I’l=G‰îîE4@§¡™èŠ@Lýü1‰nÔåÒ1üi}ÝUÿH™Í2ѹÀFÓGÈD×ë’‰®žoíù+HtÕf×ÈY¢åz¥èúý2Ñå3é²=‰tyM ]™îqÜH7n·.[Û¾%Óõr™éz¹‡é®ZA^]]L—/ŽL7l’i‘?€º^.Aݸ6]:âÎP—ý™P×*¨«ª B]kc@]{@]{o€º¢fÛ8ã ¨k/Pw•*Ì8‹¨kM–¡®T æµ“Ó5±*0]SQÓu[fº²E-û ÕÌte“(v;B˜°n1Íý`°®‰UëÊhÛ¦Ö•m’‚}ÓÀ}Ènùµ’ÉÔCÀ™ì~b»‡r¤Äw‹­µ8áîË”à®lÇ=Žwf¸[lJSVWYîÊWoañ™íSôõq“Ù®lëqöùÌv‹mêì`»²ÅLÿí-R·³ÝOl×.S¿ß;CÉlW¶iGM3Û}ÙÛ}ÙÛ5A7°]·e¶kŠó€»/Ûà»Å$a’Æiß5W|—¡ À»&§ÿà]ÿû„we’ß¡|†wM6x·Ø–é3¼k*uà»nË|—!à»ÅÞ{[øg¾kQ à»%À½šø®%â}Ùâµ@¼¯r ñ–ìëܱMF¼–yˆ÷U.!^ÙôZÛÔŸ¯lñG kB¼/[B¼/Û@¼/SB¼²m~âýÄvå”°=ˆW¿7}Dý&Äû²!„×.‡Þ—íê)#ÞŒ×2MòzU2ä}•K÷[‡¼²Íñ°ifÈkâ—€¼Å6/}C7CÞ—-QÞOlò¾l ô*4ëºæ¶·Ÿ9¯›æ-¦ðkºDn"½ŸØ:é•M¾Kqʤ÷Un^;Û Òû*–Hï'¶Žz_¶‚z_¿M¨÷Uù„z_¶„zíômF½¯Û%Ôë¶Œz_¶ÄzíµeÔëµÌ¨×‹%Òûº["½^,ÞW±z½M2è}Õ2Þ—-±Þ×ýëµûõò€zߦkx±vÅAz_¦Dzm\0ù]|§€½ŸØ®óÑ*ÃÞ—ÍôwcÚoûL¦¿‹6Ã^›;L—6êïje?ldÐßÅÔhú»V.á^›6M®ÆK¶„{_»ß­¿ ±\ÊïÂ73ý]¸Ž/ýÝ\Žá»)µð]¸œ¾K©c„ïÚ%¾ gÛÂwáø[ø®Ùžµ@5ý1Þû;%x%Xå=^¼S|d=¼™Ôw¸ë‘äã5ýZ„ñFCnSß £dC¼ïðj÷V›%”Gìê2‰”àO$€¦G ^Ë ^ñ•µKQƒ—‹yjðr1I ^v)jðZ"1hðr4 ^éý_-p¼(7 Þ˜!ã3¯r‚”à’‡âÐ7&ŠS lõD}†¾– ЗZÝ„¾C—Ýôw/õ¨¹§]kú»²1ž·,nÐ߅̸éï^ÚïÌ—™»(ÀKÉ ð2¿x¡OæëyÉZíph‚²–ZíT®Í¥%©£ïùíYœ.o Þð&Âïï8ž¹ÕìšÈ­få[éÚ,µZx[K~ÇÜjй·Üjqýk¤HdnµCaS;ŠÄÜj9Á¥åVCU˜[ w³Üj–º ¹ÕM‰{#€ž[-=]O®Æú!·šU¹ÕXÌR«¡5rj5–Bf5>3«±"̬få\­áÉÕRÎ;æVÛ5Ú a[äVSÅìÚ0*r«1Ws«á °ÜjfCn5{äVc>æVñ'Ë­&l;uAcæVcÎ æVã5‘[©ê˜[Ín‡ÜjfCr5»fK®¦•I™ ߪ ^ÙÕ¬²«™-gW3²«1 Ó«!5¯¥Wã5™_ gÙËkÇüjv?äW³k"¿š]ùÕ˜S…ùÕv-cö†ÁËk ™_Íî‡üjŠñ —§ÒzæWã30¿ïÇüjlëÏkUa~5>¬m:´Õ³’3Áš•{¬Yý` g%Ðëå`IraÍŠ!Ë!¢×mȰö²^{fX³r9¤×Ú‘ÖìšÈ°f×D†5³å¨^áŽð“ë. ¬±ÉÔëÕÌA½~»Ôk] ÖxMõ²šH²Æ»!¦×¯˜cz½&9¦×˵4klÄôz52ðU¤Áˆˆï}™F’5N:L²ÆI•IÖvÑ™šl+“¬qrd’5NðL²f×D–5Ö“YÖàO0É}&Yc1äX£ r Ö(ȱF9ÖXæXc9&YCZK²F IÖ¬’¬áé˜cÍ®ˆkVËšcÍž 9Ö̆k¬r¬Y%rŽ53!ÇmÌ±Ææ`Ž5+‡k|æX;”áqm¢Á̱FGœ9Ö¬.ȱfuAš5³!Íš]yÖ¬ò¬Ñ½÷¥¨Ð?ÿ\E¿ÿiŽÿ¶¨ôÇÏ¿|üû?„Kòÿññó¿ýô/?ç}ŽéÔÌt†OûÝwÚ~àNër>ŽR¬A¿ÿN‡ßé ÆÆ1 ÿ»K„¿rlÌÏy¥°Ê¯š ´5¸h“RkT¯D¶}Im Þhµ1eǯ¨ÍvOÙ+Rý5u·Ø‚W$Û¾øó8S8âWôÕçîì¬V/í­¥>Ö]YŸ¯í¯O}z‡µª|U}jÁ.kUù¬Ï~Ç ¼okÊ,sjŽ«1M*Mõ{¾š>ögºúÍÓ"o³h…'°pÆ’ò¯Üfÿ·9´pgºc}ßæn·™ïã(œã_f]ïå¯Ýç÷>Ï2iÏëXÎ9V±¿~ŸÅŸçoÝCû—"»¢NSÙ`Ù”2Àï¡ÿ+wøÏ?ý×/¿üáÏÿ÷‡?þñøå/¾þwû]îËÇw ½~Òƒ[7mÄÏGIÍèóëªÝöž ùRüIh^'Å·\-ý•c cý½,]dåRüÉH=ÓAº¦‰)Ù¬W:ëÚÙü%ùåé%bui°AçêG±vn禠w•#ΨÏÓˆH‘mÏé¨qXF¶{Äsª±{µK‘(ý´ÀümUÖ¾ŠgJ ÊÔÃÍsx_‰Cé˜åÖ¡¤–ØøÊ(.Q©@”¼7œvP/¢Œí¢¢\i«*ÖíxçUÄH;Ý`]î5G`Ÿ'ž$ïÊjëm¿Zú¬»$H·}is!oçmÄK€kl!ŸÍ¥H”Gr5.µ7MÅ ŒÃá…U ÊØ›(cU;y)eäóæ·BPÆFÔ$¹Ò–gòV ÊÐ~@š˜[Á\COèkh²%¥$D¹ÿ8ŽáßRäi™ioE æÔ3Ijñ.Ú˜9áNŒ15òèV/J>Pâ’m¨lÜ%öJ¿=Ó±÷CŒ³eÓ»§+ â~¼·¬ÏÈ[™âsjŸ¤pÏ3E-’P†lIåÙjâåô*Ѹç5kbe©â{Þ(Y“´`îrY'@¦$¬ Ùˆh‚¬xgO~~{éYßóE‰˜¡î]#+8X±;‹YB3ü^¦¬.µ¾{™©•T!@ ü½{Y³lTdK¹‚ p’Þ PsU^‚öa¾B7ú>—仯íb~Ø«|L“àpEbýÖÓ €¿ê’P°ÊI3¿Jÿ¿ê’P°Êðv3 ~Ue `™6©Ô!4£àWM²ÐŒÊI¡‚ÿŒ‚½\fÁ²-ÊÛØxobÁ²íÚ5®ì<³àW¹Ä‚_uI,X¶x={C•™˶î=ÃXð«\bÁ¥.:DÐÉoQ Ç¢ï·füªz¢ÁEµD{+ˆ&ìUÈ4XåNáå:e\Ê=º ƒ_Å öbYmÑ‘Ÿøâ*–Ë,ØtWÀ‚eÛϱ›œYp©ÊTŽ —RSßFÏ(øUÿ„‚e{Ô>@í>—SOð ì•ÈøU.QàW¹DeSnÏ8×>`ýzŽ%G‡¶‰¿ª‘°raÆkÝçgÉQð«Dìô §ÊeìE2.?{Î]àRnîY'A€_å~•Ë‚Ršƒ·C‘ ðVäzÖI`/–p)¶´p“ €_õØ %þ+Óµ‡ÓYyy濲íjºêJeþk—Ìø÷U,á_/–è¯LÚÉjÛS™þz±_Åüõb‰ý¾Š%öûz€rÐc‘ˆÆµV¾ ökÈ~­ Ùo)© ¼Ç&ü•qÑ.äãæþÚþÊ8k“éqJ ß%ü}—Lð·*yÓg÷–x’,Ò½·5 b‚)ÊC ¬3Ç=BxæÛΘ2ØŒ Ö‚&z``ë¯ÀÀl``ŠÙ3$Ê(Œæq b`=›¨Nã²™Û+@D0[0"øÖz¸¯|k™röè¡LUXSí‚Áe‘Ù÷ìühÛ´;r`*Ô2"˜êRŒVíSB‚©²Éàp£Žyl« $X·›÷í3 ¬r×#öÖ Y-FG#^K÷P¬KÆ”Õf|D‡-®Ø·½ÕÔò¡í=æˆ`ž¿BDp´óvÌ-O#‚í•!"ؾ DÇÓ­ÓØ|@DpTE«Ðº[†ˆà[KÀ±Iˆˆ` 1"ØÎ8!"8nßis*|U×¹y  –¼ Y„¥³îëÔ`{k hœÆÎ"B‚ãFŠn¸¤x˜-fôA[sX°u<„[ïBXð-yɶè`T°Ö;SŒôõ@Tp<ó¶wô¨`;‡¨`;s‡°`±Þ5¾Óz+‡k µ–™Ã‚µ„ ïy©ÇŠl1‚uØíìÌ’aÁq‘cû. æAD†ÇEÔ"ÕgAT0s3*Ø>Dۙ̄çì+b‚5±li1ÁÞ> æ`lAÁ¯Gð ß4b‚5Œ/=Ð¥ª¸œ1JÄ¿îG‰ðH>Çßÿœ˜¯ã`„«6Éã—@Dù˜êyPâ`©÷OýôŽéCìÊÒD^MbÕ\uКq°¨R¹^úrô{z]â`%XPHxU- >„iNd}i_ÞÍ› >„’Š\m»‘8X„nÖí É¡aj3Ù*y°%%fÞWa$[%¶´JàÁÌ ŒT™¤ÁLÌi4˜©»* &Û4Œ<…¤Á–«Œ49z ÇŒºÄ7P}Gâ`"Eà`]$FñF ÉãÁdBäÁÆu3Žrñ%õóP‡`v¨CÈÁÓ½k´O–ˆIŠNÞõ-ÀƒínƃLÁƒ  Ar…ˆ’jæè‡h¡aŲ@„±7(DÄ%Ï­‡¸S!ž̆D‰cMD„W"KDèb“”ZŽN©•¨!?]½5"ú© ´!þ?qïÖsI’‰½Ï¯¨Gòajóyy\$!аßb Éá‚Ý£–ôóå–'#ÜÌüTOU5ûÓbw§¿ã׌Œô°07WìL¤!©é¸FȤHC¼$ÒÑõ_ާ+C`ÄqêT6EuúD +CxOHÂæðka`_S" ¡ó%Ê¢ ¬Í)¬U²0„N¥èBØ”( ¬ONQ`I¦U``ñ: |7* ë’SX{¡00†©ÇþŠ,„U©(°¤ß*(0%+0°Ô)0°vEP`C68WP`­’A`ɦ 0T´÷ù«(°V( °>Oµ5uJ¶:Öiv˜_u5(¬CWØ€e­Î³vA@`–€À:,EµNEEÒÜP`y±–´uëÊRغ)(°^»( ¬CWØêdXŒ À:8õ±(¬5*,U*¬«@A`kN@`­’1`ïISE3¾¥R X«تdØ«|0`ÉCh0Î÷WÏ6 ß«ch€1Ø<)Å€uÝ)¬½W XçX@`Á€mÙ lËNA`ë&ƒÀö™eØV ƒÀ^£€ÀZ£€ÀV¥€À6)Wù¬ºg lAQ`]B‚Û—QQ`ë ÃÀ¶Èö~Þ0°wP`àÌ1˜Ôk¥mÍa`šÁÀÚ‡e×3¸ÖwÏ`àÒ¦ÀÀ2ëJ3¸ .sÀ8°w‡q`ýÌì}iI§%…¶ÉX×›áÀúÚìà ¸tˆàb Øž¦Áe(l³ª IDÕÒ€`Ÿr‚½+ûk @°î6†ë«j`°l†k2XÂ}}ìR°`_Ë‚ÛCf,Øm‚ûÄ ì},ØŸ±`Á>w‚û@ ö5"XpJûb>`ÂKµŒ—Þ ìã¼¼ß'1=ø[´‚cB¦ýɃW à†àæ+^㪡iº ž‘ qêü Å‚‘ƒ âaBÉX°åºT,Xó®›Vð†§Ñ¯  ^ïŒbDSnìVËO°`sÑ ¶kD¥ïÑô€õÜZÞoÛ¼#oûÕN ¼¥º:YT±`Ø–¡°ªX0à™=þPe,8l1!ýîQÁ`/Ç`0’ÑÆ›ÞÕ? Fk—_U4Ø‹1 tŒ^ŒŽ#7Ó>n€ ¶[AƒÝÆh°Ö©h0léú2ì6Fƒ«BîFÄçà!9 ì6Fƒx2ë ­ ì6fW[FÝYsÄF)(ÖtE FƒÝÆhpØÎWÞʶž;Øêv°—cv° AèÁ@)’ðez°·Çôàj£Ð;Óƒ½ÜMö™ì6¦{CL¶ =ØmLvÓƒÝÆü`ë‹ðƒ½óƒm|"á6æ{Äv„«‰¢í¬5&[9!»Â6+"áåX(Âú"a·1EØm,á}a©ˆj£;³½¤"¼÷¬á%X+å ×õX+¢Ú(¼Îl¬á6ÖŠ°~ªV„ÙX+ÂÆ ZV޵"¼/¢±Å®´/oHÂ¥kE˜I´"¬'¢¡6ÕŠÐÑ©V„¶§ZV§hEÛÕ¾°fU+B·nÕŠ°:E+ÂÆ ZÖÞ›.Zf­µ©V„6£Zf­ˆb;y“›ˆE˜MÔ"tU-ÂÆ jÅvñ&+6–‹ÐU"rVJä"Ô¦š::ÐÑ©h„~ßU4Âl"auŠh„ÙD4Âú)Êfå«S”#Ìvv_Vz©/k6ñeÙ_Uù³‰|„>T•0›ÈGÛ%›,ÛD>B_oÕ0Û®¾¬Ôy¨/k¶k„™.õeyͪ„„ÙfõeŶ|Ù—U +'"fÛÔ—[S_Vlû—}Y•‘0Û¡¾¬Ôy~~ÓÁ닾¬ËHTãõ…Öe$¤I—‘p㪭7õhµÍM]Z-Ù̧͗Ùe$Üx¨W«mžâÖªí¿V+½¾èغŒ„gumÕ¸¨o+mªŒD5ž_Øx]FÂí‹þ­ËH¸ñP×ââªñì>®þ|©“+F•‘pã¬n®ÅÏU㢎®Wõteâ‘‘pcS_W"#ámîêíªQd$ÜxŠ¿«M^âðŠMe$ªQ\^iQe$pq ùšºŒ„½ì*#Q$#áÆfޝôVd$Üx¨ë«F‘‘ð6Ë.ø½8ñô}8q¬àv\7¡³äl…°ÐP½WœX &6Ž(ÃÄJg(±!º„#¦b_+e”ø¦\cÙ¾‚²%/ºßå\%ŽJbµu–«€ÄšËC@âzÇG{£éX½óHQÇ åÂ9çÆ@p’/â ÕÕA‡æ„rf“„r;Ö@Û†Î'”ÛffÞ;I(‡ˆXV‰ÌrB¹³‚“}Ñ€Õ’J/ç“kCW#šO.Ž“K_a¯ Z6ç:äW>¹v«2/ç“k8–øAÍ'2í#P\óÉEK ôrB¹ ό㒄r8|§‰&”Û0êê¡ åbÔsjÑJ>¹ ò’/'ùä4[g¤r(:Âhî$ 0a 7øc)÷ÈŒa$l%;f C¼»í=¢&•Ûv<„óŽð=ðÔÂѤrt¿TnÃR‹‘?ñÛBÞ ÌÓ¡8‹Ž0êl©õ¢œá ¸#A°ÌÖ4¯]Ú[9Ãß<âP”3Œ:¯fÎðO;¶«soÎð†J¬ NNfΰWÆœa(«”¿ŽyÃÑù{<±¾ÂÞpÏïâCîð†èŠLi&äáh/Vzæªcòð†ÐHV%Üá¨z€Ce˜ÉÃQå K—ž`ò°Í•‡m}1yx‹íw_ºVð`[àiŽHå £3¥›cÞ°Í•ð†¡zŸ’?JŽ.tTâ0>¿{¦âptsF˜æCúæ0”í·s'uXç_˜Ãx»IV˜ÃÀa"*3s8ž[,ÿÌ÷bC¸~Í WÂŽ­SÛº`®0‡mA s\™èI¨Ã+ÂoFF!aë#âðŠÝ{Jr0‡ÑÚr ¹‡±V3¡­2‡!õfŽ.aoH>›i–„9l3ÆÔáÎGÂörÌöö˜9ìå˜9 ´aÉ‘”9ìå˜9ìånæðŠÐÒ6rtsXŸ¶1‡µ%cë·Ï˜Ã+³ì3‡‘ð%\KWæpi“™Ã:ƾ}¦‘¯Þ˜ÃhsŽƒ\—$fâ02´ýÉQ%¬á2;Ä.Ík¸ô“YÃÒœP†½˜P†Wè*R~3¡ Gp:»Ò‡0†Ã¶§,¤1†ÃþÚÒ†3†KIf —Î2cØ'GÃÞY& ãE ?ø‰g„a]ÂFÖÞÃ>@! {7…0ŒC±ºŸhm# ëd„aäõ¹h„Œ£äÚ–.æeŒáR’Ã¥$3†Ë8™1 LÓõ:¼*cX–#Ó…ñ˜bª¦—ûgtaä :æ.ºdtaˆž øO….¼B‹qï*VFÆøã¨< E ¦ «ootauH.¬Û·Ñ…ã_.Éy5ºð ‚jø|]²FèÂ+¾©ëºF0Ó…AÏMÅÕ]X | ü Մט‰G/ È _È–ÔO†EOxzð Ó&"²“†5C‘ H¸ö¯è KBÓ®'†G¯‚õ#Œ|ªrÂôa.rÂqð]â{õÞTNx†n¦œcΰ g¸špxÌôžR†g䥙Î!d!jÂðU·ü_ä„YŒCå„Og<'C“†ÿ”9ÛDNxÇFÐR“Æ'sø&'Œío¤759a<ƒøô?¹…2Ÿ—3?CN86“ñqVÆðŒ·hl¦&¬"#ž} axd2ö% WÅ`QÞbIvˆLÃ3 £±©)ax°ö¬YÂ< a†½ùj5áN»žóó…g€EÛPÊc¾0€ZdøžkJ9:Çnþ SžG¬#I”°…ã5ÞÄŽq[8l±ì¬.lá°ÍHãØ]f £Î¾Î ¾e^a¾#º"&.Ò?¸‰Š ÇÒ Ôà*&ŒsÒ4ò©˜°Õù–ø ¾ HÛµ÷TKøÄë´‘°i «Mµ„ï-z¤ÏQ-a{ëH£ZÂ8ÙnãR@Å„q²]&R˜ 1á°-ûHØÂH0lk;Fê! {9>¡Ê24ïDLXÆUM8þ›”‹™+l&ÑQÓN³t})Æ|w/BÆ¡ öÊ~;!Tá°­Hû ª&l6QV=m¡ »©Âa›pù@[Bör/ª°—`ª°Ê)U¶X"OøšR…«ä„ÍÆTá°HUú€vB¾e#y­R…½S…'|ĦÇP¦°c¦ð„Ë544…)lUQØKQ8L±]_=#©…Me\ˆÂÚ˜ð„­Já [1¦ {1¦ [O„&ì6¦ £Î8¬thAhÂncš°Û˜&¬šÿ` Ç'ðœö'IIÂN£ãR[IÂÖˆ„µJá[•¶bL63„:l#ɲ2„ÑZ_ì†3„'D·Á†ð„CÚ:X¨ÌöbÄF)p&žï¶0„Í& akM†³$ó–Â^nQá3±1CxÂÙt¬Pa{?™!ìå˜!Œ~â$ÓÐÌörÌvÛ‹!ì£f†0$ÝÎ}\/ CxƵ0¥Lc†°Û˜!lo¨0„ÝÆ aË[! a/·}9†0„½3„ÝÆ aÛ˜!¬{„mß‚°U(a+'aÝ~„lý~°7Çü`Ûˆ„ìå˜k%>×=tSùÁÕv~iw~°·wóƒ½óƒ±¯sêÚ ?X ?XvD£—Z™ì™¬»³±ƒuRŒ\ŒÌ.av°÷‡ÉÁº·+9X7[%—q09X·1#[o„ìµ 7¸™üÆxò–K"ãû(…¬¹qƒKIæ—17¸tèÅ .­17ØÂ .FækÚã#sƒKµÌ .Fæ—j™\J27¸”dn°~°Œ¬_AåqƒU¦Ô¸ÁoŒäåZ­Â ¶….Ô`_:B Öï¯QƒK˜¬_g£—’L ö‰jp©–©Á¥ÚË×÷÷é ÿá7Ó§¿úΨo;?ÅOqæ¼ÿóÇ×våãø¯ÿü÷ßüÛ÷ Ï1¹×'á /_à ¿ïÌwAÞߨ(øû×ntFï>Öþ> Ç4Ú¿ÿóC[‡*ÇßÿøØñß“þ<øþÇ’çÃy½ýû??´õפ÷…ÿüñ¦¯:ÿð3=9æõ¼aîø®lŸÖ©…¿|ÆþµÄ®ú«õòßÎñ·èø§~úôO›õ_~úçO?üÝoþú‡¯méÎvqìÇqàžök[Ú¾£¥tÓc;ç s_ÛÒî-}È^™[ô¯¾hž]ùC÷J)ÒSãŇ}p?µtRGé?¦?ë 4ä!xyÄøAýA4ÁÑjWžß?¨8 Ñ»§$Æ}QbÒ×û5{·nkV»ñ±kýñ5+ýùà5{÷'׬våÃÖìÝ [³Ú•wköÏ» lܧħ+*Ùõ¢U?]Ó§öúr}óRZYNÜÆá눓ÛÏ´Ò~Y+û|ßX„u_k+Woeþ…ƒ¹ïwA6\ã@ö3ÍüÂÑ,÷¥×¾„/3ÿL3‹æÏ5"ÊqE­ç'ˆÃ5Û·žwoÿs·ð/ø×Ÿ~úÝÿów¿ÿýï÷ÓþÇ?ýÇö‹¼˜O_qŠ=Ë©€PɃÔ×O¬¦&GhhGS[ùæûÈ Æ…%]ŽÏ×L ÍO³Ý¯×dëK¡`»ƒ(Æí‰xá/ó ‚Fgᇱq¶¦=ì¸k¶c/ -ÜpÆ£‰yœ«F8&·y¶^Ø !ˆø|]J†ñ¤K¹a~O·6r‹>ÏÈš»ìÔìm¤½Ÿ‘EÖ  û¶ø@/x&=‚+ŒË/n¸~nÇÀ^$K7½ºa\shr ìE²Ö„qçËI"|ÃsïØ ç´ ÛÁ·7’k&Œq’Yô")PÂx½îo$uò<#ÉÐÞ—±¦T¹¤®šË3žêù,cË·¶EµÀIö:Œëçg O¦ŽÌMÆÕÀUàÆ+•hUmvG+¥hwdÜž»€0ÆK˜R´{¼ÎÇV¶ƒÁ9rذ‡-nƒZgzÝÆ”W)Õy¾H\5Xç¡5CˆVR¿†qfUpÉXF<ò!D»ãò­G•„qe]päÏ‹…ÛŸÏ¢mÈó=xÞó-6„h9 =lѿԡÝ@¤Ù*¢©C RLë“mŽíkJÚØ®ÎpÚž¼t0’8¸W .ÏС]cÍÇýÁô^pŠLëüÚ¨æù´ŸUŒ¨û˜¯Î[ ãÌâà «ôô(·íZÇ"–Áa\Xü¦¿ zë Zo‹75Ë`qÏ` § €W;Ã1±dƽ¤ /ÊÑ#®ÃvþXÃØ™ÖÁœž’5þ–²òÆ8汊Wd:ßùŽ ê«XrËÆû6‘4¸$ø½mç:V±ä| ãÌÒàá Ç{Û¿F3Â.ÛXÅœu7l«fˆ¤¬©aŒ‡~ŒEìýÙX\rÞâKñy¿Æ*ž‘à!†mgap€%±zžÛ²yC„ÉXÄš±rÞN·u·!ÒXÄKüË8¹ôoG¸ÊÇK!‚ÒïÆ†Ô†´)7xg—ŒÄa\xº¯dɲ{I\GßÖÏgîÆ1áˆÞ~-‘ZšuÁ%cnë‚Kç0î, .©ŽÃx°0¸æ:…‘„Á%¥wO·gÕ®/&‰ŒMœ„Á'f=”g†ÊBîÇÖ0’0¸&HŒ—ž…Ám~ö•…ÁÁjˆ?^ìaÜX\Ò±08çÖ½m¤ îýÙ5ºöç`]ðX"×eãɺà>yý-&à_Ï:¿¥w\¦û˜ðÑÔ©ó±¤w|']öžæ0Œ+gıE~lé[XØH–%úç°¡r26dË;; ³¸ñH¹LÚq²4‹óLÙ“†¯”ñ­>Ûç”®’ù:LsúÇž"w>Šp=qP…Ñ¿·aréê¹R+ª}öÛsKßø®q Kwã¾—Óõ†£Ò©¹aÜÓ7ö4§3Äo2_¯$+ïdª_Èr:#»ëØŒ-?ꌩ›±åež‘üøM7ñrÍØrǘ¾q)¹oŒ~^›x­|ñ9[¯<%¤@Ÿh «±‘kŒi[FÀÈmLר2aÏHQŸ{Ýxkl{ç;QÄXı¢ÃýêYføî™±×'è’¤éñaïåð'Êš. ÕÃ6“slÉ¥—;!çXÅn\É9F_!-Æ3§Ë²Zî¦c%ëÊYp_ {u€‚¤s¬:üMž®OòËœš?]çî"ߨKÞWa–P>~É=.ÆES¨Ë¬Ýñ!c!Ãe:8a\9…ºMH}¹»±q u7îä[úd%…ºŽþ Ù²V/6r;S|îDõ0^ä û -åP·‚Ëœ²½;°¥ƒl¯Ç²,œCÝfvYÉAöù‰ÃZzÈ>Е;h)Ë¢C†Ü”mf—F©KO¥öGPêZrcÙ;ÔØEÖ=@PêÚæÎ.²v‘õ•”º¶y±‹lÕJí; £Ôe”ŒR×Jv‘FÍCÉNPêÚäÆ.²»È"™¦(umsgÙæ‡QêZíÉ.²W{±\ïË-39t”º´Æ(uyÈŒR߯·o~6A©kµ+;ÉÞ•ä/}—ÚR”º–ÜÙI¶á3J]«=ÙI®Fr’}(;Éx÷ÏG3L@êR)ÔÕ¶°›\ä)Gƒgòq¤®Æ]åh¿%eG@êò0¤.ƒÜÙSö&v•½É“]eo’C%|£.oe+¿}_tî·*6~™Z‘ #SëwFÁjU_4°z…ãšL"#S¿\ˆÎúP´Z\g’I­¾¦¼íP´ºTËhµq­¶£³ Õ¥V‚«c”ËAl!«`…D4¸ÚæGáj£©+\½ÜrqI¸º”d¸»ù’×° WÇG"|Ò!j¡xµ¦V7¼Ú2o)^½œÆ†âÕFªïx5>  7u¬RðêHÑH£bxµ*t^ |¶/Þ¯Ž6cämЯ™Nƒ«¤ùcÇN®ž¡‹4R1d=‚KˆXkˆû‘6¶"Ö“Acéä?A¬EÇSk¨†8XÓ X[1Á«U6Ôðêbd¼Z•C ¯.%¯Æ€Ï6õ;4Å«g@"mPÊ®F,¼¾¯†6ãÍxH£‚W¿1¦;>ãu˜C¼Zãç:^íõ `]Œ XC2%¥ð °.F¬g€s ÄŒXÃx-3b )LÒ£VÄZôU °vãÕxâË4bÁ«âBRÕŠW‡q‰Ís HŒW‹4«ÂÕ3ܹ¬RÐêxßϘýq´F’£V´fñ РÕPƉEÝóVC³´o¿?:X=cËoÃgT°z¾µ¤†s§`µŠÅX­ú­†VÏHEÓ’4+huã ç”jÓ˜5´º¯á$™ájUb5¸Úû)pµ(ÍZ=!ãÌr=*Ó†V«àª¡ÕªÔjhµÊÔZ/Ó±®ã®SÐê[ív@ V—ÎX Û8 $kšÀê inâê`¾àÕªÌjµõF ëén=™¨YãÓbk=£¡Õa\â¼ÒЊV[°îƒVë³P°÷{|‹Ÿï”‚Õ>«K[ VûÓ°Ú‡ `u©–Áj<Ô§žn$%IlOI„S°º …ÁêR-ƒÕš@YÁê°…'s<BªÑ\|Kú=¿"ÕHG°d„"ÕhJc­Hµ†T+P=AÛôÇuªÑ$ˆƒÎ@u”69.z¨F“g¬²ÇWQ Ú³Õz¹ÇãP mžƒ™¨8µÏøƒSûóœÀÍ•ŒEÅ©ýù N=LK"¤âÔ6áSC5òãÂA`ê jéÓ9®*¦ö ˜Ú›d”ºÔÊ(õ‘Žk˜¨ Ô>? SûÊa˜Ú+˜ÚÇ!0µ¼‚QûóŒúÞÔ·T)FíS.54Âõà¿€Ôþ”¤FîmÈ’‚Ôþ¶ Hí½Úç µ 1|/HÙåçø÷ß„Pßj)w^Û P;ýõjŠZ~‹ÚИ‘Úê„ȱÜMíC?¨'°Oú^üP¨Ì?N€³ƒ+ø´!I‚OÏH³%_Tði‡ Ÿ†ØP¦µ4|ò>WK…Á§£Ú%SQ> °ˆ2÷)>­ù‚ ŸVMsçÙ|Y~(õ›âÓQíFYÏŸ6hç§1QZ3…§ ›t¸1eÃ3tÚ(A§Q+%Stz†væ SxÚ† è´bŠNÛ¬)<-™Œ ž.#xç¼9):‚OÌ&ø´•S€Úf@jÎžäø´iŠOx§ø´bŠO[“‚O{_Ÿžá§èƒâÓªúcø´$tr|ÚGò§á©,)÷ ø´ÍŒâÓ¸™Rš@ñiƒŸ6ÐOñiƒâÓŠ*T»‘jš T[›¨¶~*Pm7q TÛÂR¤ÚK Rím Ríý¤ÚÛ¨ÚÛ¨Ú«¨ÚU»Q j»È¬ZׇbÕZΰjmѰj¥aÕ:?†U{«öj«v£`Õ^í¡Ù:µ¤`ÕÞÛSóuê Vm3ô`Õzoj`µõDÁj¯OÀj릂ÕÞ¦€Õú¶X­7®VÛè¬ö¡XíC°Ú{+`µµ)`µ\W+\m½Q¸ÚT¸ÚVŽÂÕÞÁ«½ZÁ«•`xµ Dàj³ Zí£´ZoÖ ­ö’WÛ"èpµOáj7 \m­)\í­ \íÕnÊæ9U¼Ú•âÕÞ¦àÕÞæùe6‡Öú¬¯¶Ç!xµ–¸ÚzªpµQájopý2›Ãk/)ˆµwhW6‡ dW2‡vöPOY¦Ukï¬ÖÖ¬­ds’Ë÷ ÌDªþ&mj€ëzï-¬VÖ¹‚Õ…IüõÒÔHv–Ú1Võ>®€¬Žãå:¼XJ¦v|\°ê ¡ ™KÀꜿkÜ\+XÆðÇå­’©‘À¸‘ÚÕ¨ôÜZ"¼B¦ÞÀØ?§e°ÚZ42õ±‰qç¯\êÂÉ–*5>ÆÓ¸E4*µ…J_h†¶P©«‘¨Ôn*5’ðf4µ2©Íö©õWçQ›hÔ øµã*Lj`¤Ç>.PFí%…FíFáQWc:àÈ“·]ãJO‘êbd¤º…HíFFªáP`eŒ=ðcÆqèa”1Rí6Aª½RAªÃGjl T]Œ U—jª.F†ªKµŒU—¡0V]ŒŒU£Í8ðŽ«Áª1®+ZÜnƪQm8ˆ#Vù…UÛ›bÕ^Ÿ`ÕwJ{8ƒ]fš±êbd¬Ú{"Xµ^°êR-ƒÕoŒé€—q2X]ŒŒV—1Z@‹'I÷ËHpõ‚ÀÐiKr3ÁÕ>³W{w®.%®öî\]Œ W¿1¦^Œ W—1\]æ€áêbd¸ºTËpuéÐ ¯ö ¯.FÆ«}ʰö®`]ªeÀÚû)€u12`]ªeÀºô–ëR’ëR’ëb$ÀÚm XÖoŒoÌ2Xû@°.F¬‹‘ë7Æ‹7f52`íŒÖe( X—j°v#k¯MëR€kŸo¬‹‘ëbdÀº´¹Yémû2«CëR-Ö¥Z¬ß/Þ–µZ¬½? XÖ^©ÖÞ¬KI¬KI¬KI¬‹‘ëbdÀº´É€u12`]Œ—…²ñ¬½+Xû” `]Œ X—ÖVõ—%RëR²©¿¬½mê/kI¬‘‘²êJ!€uéЩþ²VË€u©ö™ `m-*`m•*`]J2`]J2`ýÆ(þ²v¶©¿¬mîê/«ñPY«=Õ_v#ùËÞÛKýeióX—_gõ—yé(`ýÆ(þ²V»ª¿¬ÆMýe56õ—ÕÈ€µE+`]Jê/ˤ `]Œ—ùË\-Ö^ëRŽkȲ¨¿¬%Wõ—å™b]J6õ—µdû²¿¬u©öP™7<Ŭ‹‘1k_Ë‚Y{‡̺ü̘µW(˜u12fýÆx}icV̺tˆ1kÌ‚Yã¡þ²¶É˜uéí©þ²–dÐÚK2híµ.åµöj]J®ê/k“ŒZ—’ŒZ#£ÖoŒä/»‘aëb<Í_–Þ2lí“ °µWÛÊ[þ½¸õô¸5¥M,Àu¼ÁÇ42ŸnílhÆ­±ËOûžP0ãÖ#Sk£ö+b­1cŠXO˜àië¬ A¬=g˜ ÖÈ7žø†X#õÓ±¤H#Ö°ç·d“¥‹iŸ]•J¯¾sМDfÄz½ä=ãÛ±v‘t¬ca|Že˜%cÖ%ccÖµdã¤1 ®\ƒyʘõ‡–ã¼R"™1ëx ¾Ö cÖkœhv;Äœ´Fµáµ]ûB­×òéû¾wþ$ÃÖQYÌâzŒIɨxJ#íɨÕ®û9´|%¡â!oâHBÅãžþ Å—„ŠQ+²ßØHI¨çÖýÜ÷·°u­–*âÄ»|`£ùë¸(TI®Õ5¼DaX—jEÄôæ„a]ÚI¯–Öe†Dd‡ Ä‹óò£ÂÕ¥VfX{w„a]ŒÌ°F­ë”)‘…a]J®*œ'†ué,3¬‹‘Ö>í°.½e†5JÆ?왟UÄ 2ÁÚ >üj¸‹ë–ù …_íE˜^] 2½º™^í½zµ7Éìê°m &t'YØÕÅÈìj'"¨»º™]]FÂìê7ÆÔÍÓZ…]må”]Fœâ{žmeW—Z™]]J2»ú‘œäù{zo%WÛÄ*¹ºÔzhbTru©öÔÄ.ÒæV{­/nµOŒp«KæV[[B­v3«0‡GÐÏ×ʬ.FfV{­L¬.]eb5òÝIÂRbu12±ºô‡‰Õndbu±±úíäý˜só(±ÚG)ÄêR’‰ÕÅÈÄêÒ!&V—’L¬ÖäEÊ«ö‰^u©•yÕ^+Óª½àC«öÆ„Vmõ «ºÔǬê7Fò‘Þ¼mí7gª.%™U]ŒÌª.Õ2«ÚG¤êRIÕ&G«¤j«•9Õ^)sª½NáT—‚Ì©ö™R]jeJu©•)ÕÅÈ”jo’ÕncBu骋‘ ÕV«ð©½`›ý›¬ú¿ýÍü B€ëúé‡ûÍÿuCÐ_F¯ã‹€K×.W’ Zók|2dÿÕ®õoñ?qêøôÃOŸþé/þêïÿ—üô?ÿï?ýå§þôÃßýæ¯ø.Œüë(Ü7®9]{”lŸV(¿åá¬.Gtôõk‡ê¿mÀÎËG|"ÎöçüûÿóÿÏɈç?w cŽÝ©ŒøÞ±¾c¼ 8ÐÓqÆ÷{ÿ™ñþõÿøÃ¿þñ—Œõ;Rvê o0® ûþõ»~NÛ‰±?3ðøÓ¿þñ¯þþ¿ÿã/û·(ÁëÀ·µ{[¿oÐ[ìGl¯G[ÏŸ]Ýÿýÿæ¯ÿá—­íoPÒ·ýÝ“¾ý®A·B uöëgýãÿø?þßÿùÓO¿tÐ_Š¢ƒ>¦:äøí»¼<œ'ÜýÌxÿ·ÿõïÙXá }®ïóýë÷&\øÃ½¥† ˆKÑ•oûzÿç¯ÿ\^¨þÁë?ÿý7ÿö=cGný\-_zÌo;ó]wÈߨ(øûWo4¼3 ¯ÁŽ¿ö„e> '¾ü½ ÏÚƒûîstaüõ±³ðšûgŒ¿Nö1O¢wáùãC{ðÌ}ú_¥·ƒÇÜé|仟ÛoÏ­Á-&œ‡uj8Ï„[¾Å•øí¼Eå¯7 ºã~EKÇ}Ň›yº¾¾¥í;ZŠäç0 ¤gþú–voéCvÒÜÀõEôìÙ»“òHW:óØÇC8—nhvçèϽœ»Ãû#ÆêÏòB«]y~ÿ ^àFkßß?%1~ð‹2ÓU̇¬Ù»u[³Ú]³è¯Yéϯٻ?¹fµ+¶fï^ØšÕ®¼[³_ñíÂm-¶}Åç×üÇz!ímýtMŸžóÏ·!¹•åDʯi¾Ži)§,j¥ý²Vvd€Š]{L]iåê­Ì¿p0·”5’­×òsÍüÂÑàúê:÷嘗ùgšY|4®‰›’w^Úˆgòy;¶ð¸ç7€Öôjá_þð¯?ýô»?þçï~ÿûßÿî§ýç|t \òœZg柧£½x#åkâÇ+rÃÎ-™…Àd;÷9œ5º3’tk+ŽÜ¯d˯,Òœ¼FÊJ i;×›úPWÖëú¼`0½7,ZpEëÇ6÷hÄÐ×%e|¡.02ázç2éd|ÜwǢ覤Ю`‚rü‰ÛÕe„ª¬`‚ÆžñèLÄŽ²%Er92ùœò6ü6‚éµ=Œó㌷-9@0fÏ2'Ua `?[Waˆ•ÝòRmÅ•hŠ)à‚!3õa—‹½)%ZËœsëÌ‘‡(@–Ù1 çç+æï%P0}n[æJXÁŠ|ý$¤”%_Aõó°îO¨Bì\¶‚› ¶Èäæ+H·wh] Êtéø]bð9Ì~½Õ ·Œ¤ç°ÜDå Ð(áäèm{â4Hx]vŽ‚Ü>Cºg„ìÆwñ_ë"‘w».¸ŒÙ:ÿ?^‰sh­_¦ˆ”x—â—.û2@· ¿æÐtE€c³£ú˜ã–¡Ðvµ‚ Ï!Í 8õ ‰Ñ3ˆ¯¼LoâËA™ëÚ2˜ß¢r`£_ Z]×=ÖýFÁ5ì§E“јŒØ0?ã%Í`1^y¥!°Ý•W艆īÍÁ|yÿ‰åô,eÄóÄ{×™¤‚Ÿ¶ –ðÁbOzvecE ²'Œ|\Œ˜DÏJ'piƒ ºn-#vÞÛep× ,„g[†qCÂø¾-oµŸÌÜu6Йû¶ÆxqñbÝXƒ£!´áD¸yÛÕ÷åv³*6z ZE› · !Ó^ß—Ã~Ü`À˜ÁР[â«Õ .øN>K¹Å:ŠÌØ#ÇØ7„Zl¸x{­«Òbß–cf£_ƒ¼¶FAi Q!ûÞ5S`¼Ö¾-‡qŠW²= h&ãcÞäŒø€÷ln}[nˆ'ÉÐüA´þÄØÜ϶6p;i ¶ŒÎ©4¾ø{/ß½oËÑ~¼ÎšrNˆ&ÒxIWüÑYwï“‹ÑBо«‚À’¾–û³äë—xÖ2Þùe¤½µ5®gWƆóòÈÝ[ÿˆ!+-Æ—nê›rÃuù4î«ñ¥É0ñÒbÌÿÜwå.IloOh äeR… ÁÅ!ÿ#>}ñö¥ :ö‘Ÿ^èÝŒÈ=Њc¥=‰ ê3­}WÆ ‡Éá»1ƒAK­ïê»rãó9²áÆLQ|1ªFÖxx#ã9lKøD=üVR±’ qA±eôwšP=ÆcFú÷g£ƒ«2pëµg;ú¦Ü3 ¬ç r\Ix¿‚Ð{úžÓÿÿZÈöD˜±©xá+vëDö×±)7( ´.Óò‚†ñçøzÏç‰è±'{c¬7Æø”Â0^óØ“õ Ÿ-µü‡¿Ù–±%C´`¹~!@–˜ís^žæ@Ìû1’y\#+1Œ [êŒÏÐð“1Ùçy¬ý†êš(öZ §yN²×y-Íï«"Œ×p’AÆßS+ŽË¤Æ}¿FJ^ɵá$7¤Éôãðÿ3¸%§Ô[[¯x®ÃI.Õî¶ÆíHɹ[f8É¥· Þ ÞLÿD^H…Ówc­zy©/€‚±ÇœÏwIß§á$‡ñ‚ ÝóɆ1u+lB~_j?q¨JE”†#)œG”Ì6ƒOe¼"#}r €¢ˆ£d8/S×e€ñìNrC˜×·üeÛ)0ÝÞaœ·á$Û³‚6aj´›Òº¼‚ixÉeø)ixµÈ ;ÜdŸU¤ÿnrôv¢ÐP¤\^†›\ª])°¹TÏa¸Ép4pv¾ÖhÌPy[“À Ûp“ø jíù–ßìªá'種Tu‡ ?ÙŸ5¢r†Ÿ ·¨­#oõ_5ü䆥´ŒŒ×w<[÷“pì#ô¶7üd_ÍN~²× -Îô“mí½®Êðs8»CïM?1¾™×éÎU”~2‚Œ§‘š¯súɈÿÀð5ÒO#>½ÜAâ5 @ èc?3î¦ö†‘Ê ‘†tìÊÖu")way™‘éjO/ÙVNêRŸÂçnÅ?¦YVÎÊ¢'þAM7Ù«m$¦S:´ã˘KY–è¢é&ë´ßtÑ–+Yž¥È ?áÌ>‡pD¢ïÃIÖ:·‰ô2ìQ‚,š.²=°EÓG†ñ5²^ÃxÞ뺳çH÷½m+Iû”Bá-¥Œˆ¬ G—Ÿ$£|YÅ™n>²­*0EÓIž!T747PEÓIöž$ÝQúsÅ~´åJ–õ®h:ÉV2ŒW:É6AñwJ 5Ëž|ÃO'9þá´„ð¾Sé$‡ñHt/¾;XØ}!rÚFfbSø¡” ·-½dœðÛH‡½–^²w(ü¶ô’m±â¤vО,oNìíÉò8é„úƒÂHºÉ¶ºä²Ã:Tk.Ú’eqíÏË×§þ}:_Ë8U¨z*>Æ[¨zƒf|Ÿ`…ªãuºd¨z˜Fê;Ū7(¶‘ON±ê Áóȸ&X5Œ3‰À XÝd#ù+«± ]$U$`õ½G¥`ÕØÚbUðyÁªaÄǵǹ V ã<­#‚\°jì¦Gœ{¸`Õ0®ó9B««ÆËn; V ãÁ(cÕ. "X5Œ3ÃŒUã‹râDûxŒŒUo7†H cÕ0.ŒE1V cü£ª$X5¾p'cVýúò*ÂX5ŒÃŒUø¸ÁP5lñ´·ž’¡êûC#ïBËUÃÛÙ6€c†ªal _0V}ˬðÉ—±j>I3X}» {X;èÎ`5Ž‹ŽÅŒUßÃSî©#«FÁ=Íë#! X5ŒƪaÜæÔ"¬úv€Æ—ê—kDgXFªaœø¸ÍHµ‹ÐR}Û`¤ƃNTŒTö?ÎnHõmŒ·÷xÞeª]öF€êÛ;Œ}jÜ=ÜÉŸ:i3P}רËú»¸‰< ¨†qæc8Õ°MtV îEgBªa<éŒ*@5Œ•¨®Fªo¯šÜaªal|òg ú6ömäGª«‘jgò©Æà¢ï½ Õ0ä(RýÎ8êÛH|AªaŒG;·ù¹Ïb¤ú6’ß'Hu5&R}‹-‘Û'Hu12Rí2MUø¦ÃØ‘êò3‡È…m!‡O€êÛH>Õ0'_^ª«j'r©¾±¦cŸ,¥t!$k8 ‚TWãÅ)%@´¹†L½ Õ¥$#ÕwIr—ª~g¤;@ï4˜ÈMPu5nœ» ¿ë±¬ºwVxªÆUWãÁ9ç0]Ç5,ŸÀj£§ÓÕ?†VƒÕ.&`õ]?Ý e°ºoFyù•Àjãå]Æuå)RSÕx%­ã‚4¤[­¾máçw7A«kÁƒsÄhŽm¬kµ’¬ÓX—j°®F¬aŒ§2²— fýÎ80ëÛ˜ÂaY!ÖÅÖ8c ´µÖ‘h]ëwFJˆtBSl$wÀº–”L[^2k· `]X»žÖÕø¬oi<èá<ÖÕH€u56ÎÍ$Š{ŒW×r;'ý‚¢Flõ )xu5žœL΄W¿3’䜯®Æ™“`ÆC=Âm}î䯮ƕ“E@™Ï~x¼º–Ü8 ™—$¼ºwNrçF«ßIï¼?á?Tß7­"CÕÅÆP5WÈ(µý>êû÷)>œÏ) GÕð?'lZ~—,Ô¸ðßFz¥kD÷¼ ÁÒµàÁrŽ^’€éj”ì²@§ X¿U`º”d`º ˜.Õ20]LW£äô6 ˜®%ëKº‘€éj<8ï-˜±¨žãŸ Óÿòèéi™v#Ó¥N†¦KÁ…“YÛy¼)²²Ö%x?q¸ޝ‚KWcãì»nL\ú’Ô¹‘péj<9û¡²U—.%—†ñŒcM§" .]ªe\º–$`º–\YÖÕK0]Knœ5ÏK0]KÑK0]'gû¬Æ+]b7²¨Ù˜®FŽ•u†{ð€‚LWãêøû‚w¿9œØ j¤{ÄÛ DíŸ@ÔŽ²0D­~Œ"Ôù•QppÝ´)8íЂªéꛢà´Í»‚Ó xWѳp*=q$Ф€Ó@Fc¬KÀé`4³¬WͲª%œ61^§M„UÁigSpÚÄ6œcœõ·g7Ul‹óáÀ-›6ÅŦç—8Ò˜Á¦-Ó£bÓð÷bét0°CÓñ3Ú1mMÇ£‰¯æ¸ˆWh:Œ\\°i¸»lìØ+cÓXc±J6bJ'6=]÷¶™DqƦ£äÚŽ\‚MOÖÌìiŠMÃ7_˜bÎØ4^¿ë¸~Nh:j]~X7&6L˜Wf+Sl‡Œi_:t/Ðtì×”Ihš†ÄöÎk†¦QòÈ4FŠNÇt-™ÃGÑé(¹‚»?0fF§'Äõdš3E§'@?yÕ$àtØ0W]¬ZÀi„¾œ™BÁiè“o™1CÑé ÈrìrƒŸÌètAá låA§ãç6i[ÐiŸ ÓãÛ3Ò¨¡ã73Ý›Ðé(4#;„FGìs""°Ð¨!µ¿‚NCôýueøTË4j¨Å/ûPjVu´gÅÜ„F )¡%s (úНdè:ÝMhÔ>áQCö¾í¤"<ê ÃÇPaSõt§ƒ½Ì¡†$ãNWw¡"rÑÇS8Ôhï¤8áP["7åPC]r£«-æPGG'ÝV5Ä%g"  ‡Úû#$ê[³’èDí]õ‘»Ìt $jÈgÒ+.êÒq¨=fJHÔQétÅàûf%$j[è®B¢¾°ÍTHÔÈß°×WHÔ×=ä¼µåV5º“®š„E¡¬ýbO(ÔÈC±'åH)ÔÑ›u¦Ë2¡P[nbåP‡±]t %ê0ÆÃ¼;åP_gì ãPCCu£k?!Q_wŒfÞB1‰:jÅsî6¡ðy%íN9ÔÀSwõ…CíQu/µ‡Ó ‡IANº_µ]8Ô–ïYHÔÑËs¡[!Q/¦£p¨£Åe¦ë%¡Q#»ÉEW!B£Æ(ϼGuŒw8áuaQûô‘Ó3.Žò¨¡äÛò 4jT²%ÁGiÔ>;B£F~—‘n¦Q_ØÓèJhÔÒÐí›Ð¨OdàH OiÔ˜¬ù ¨ô=ë ÝN1:žÀq%R!,ê¨ôyÕ@ebkI¢j.ôp§ã§©Ž&Üébdî4t˜':Ì w‹ç"dйÓak+e…;­ËW©ÓncætØâ$ð­0§ß“9m³,Äé°;aOBœÆl^ Ä2q:JÆ G@§§KI&N#a{xæó"¡Äé ®N^ª)qÚ—§‹‘‰ÓoŒIœÆìŒÆ3qÚ4§}Ú…8‚ #­Lœ~cLâ´½l8?_G^œ)q#C‡”7m @hÓþ0˜6]lL›¦WM(Óh ä©'äD9Ó¶À…2íH(ÓxwBÁ„2íH(ÓÅÈ”iÅ…2]ŒL™~cLÊ4ÔÝÏeDÄ+eÚ_9¡LûÖ!”i|Íâ—î¢+gÚ²P¦õS#”i[7B™Û„<4¦œi\™öEÐ9Ó¨q^®žÔB9ÓþŒ…3]ŒÌ™.FâLûË$œiÿ˜gº™3øÚÔãF„3í‹C8Óþˆ…36ÃíGåLÛ#δ¯ áL—Z™3í‹U8Óþ …3mkG(Ów‚ƒpP{„”P¦‹‘)ÓþaÊtx‚'иε&Æ´¯VaLG‹8‡ö£¨‰æ¸‘)ÓçyOd±~IðÈ2þ^`ú[Ä[MãƒX˜UãÛû5ÒΛÆÇÎÂHÅ«™9I!i‡2—Zy …àÒ3RÀMɳ\:ˆb |Dpé ɸæô¹—Fbä”ìGY¦áQAƒ ä˜Ž®C3b ÚŒKƸþ~Ž\zB¦°¨¶³ì—¾g‘T—žî¼NÉG\ùpDïñ·‚K»vŠàÒŽ­ 0±‘„Ó@¯Ì­À42S™þXi¸Ú'É_0.íb¦'¤|#f¼Ó¥ÓÓd'#˜Fö®Lÿ©Àô  c¤#5`RÅÄ©W`Gå‘Õpé8œlÄ·W\ê™ÉÕpéx™±ôRi ˜¶6˜¶6™žÒùý± ÓÞ¦ Ó0 Ô` ¨'ébˆÂNv;ňÂÇŸãYðTáæ™ÙDUáHi~»DàÃATø(+ |˜òè{xW}ï¤è{ ˜õ=ŠÑ€iÌÆP¡`}ï¥àÒfXú<”q¢îqáàHKkW –>>qXZž¾ÂÒÕ(°´¢ÝK3¸Ì„é 8Kfã6HÚž•ÂÒº •6›€ÒfLÚÖŒbÒú¨’öŽ $m³¦´’¶µ? i[l I{!¤/l ©÷ ˜´W˘ôuãs˜§tiߦ“ö&“®F¤½?‚I_¸AMŤ­·ŠIÛúWLÚK ,m½UXÚ^H…¥iý+$mcTHú’;.­‘öñ "íFA¤íþHi]ãŠHÛË¡ˆ´½ýŠHÓè4Ú:¨h´í{ŠFëœ(m#0ú¼ßóq[-`t)(h´MÐhï« Ñ^RàhëÂÑx`…§p4’¨n#ý§âÑ6?ŠGÛ0Æ^GjH‚GûâV<Ú†©x´“ñè2LÆ£}$G{“Gk9…£mê“¶PLº`Ú€Ó6>¦íX9Òæ3 0£†ª$iÛ‹™¶ës…¦Kµ M¿1&4í#lÚ¶ ŦíéØt)ÉØ´íÅŠMÛWÕ°iï-cÓ>NÁ¦‹‘±éÒ[ƦÕéPhÚm„L›I€ioO€iŸ¦‹ñL›ã Àt122]Œ M—Öš.6†¦ý! >]J2>]Œ Pë'Nñió›ŸöQ >í|ÚvOŧKIƧ}$‚O—jŸ¶/¬âÓæu(>m.âÓoŒ PÓw^Ði…ÀÓ^£ÀÓ>D§}ˆž.…žö¡ <íÏ_àiCÓ¥CÓ¥ Mû·C isSšö’M{›MÛ—S¡éR-cÓ¥$cÓÞ[Á¦KµŒM›S¢Ø´orB›öO€Óþ©pÚ?;Nû.Ïè´tºöþ:íÃÜW†¾OÐãÒl4 ÍÕ‡ S¡iè¨,m„G 4¤¯ñ¢dŽ¡içý1<]ø` O6ŒèOã‚ódYk ŽƒÈöæÐaê0Þªzfjè=_™^¨ïC±{E€ú>4îIòêãÎášÔ Ž#î rgß`ýé²më©ÄU:Îã3HzƒÇÍúÓ.•,úÓ.½+úÓXÂ'ž¢? ¬b&¦&ëOÇû!MëOCx•dMD~ÚUtE~/q<é1]~Ú5 E~:v›s#ÆœÈOCŽÅD~ÚµY~ÚtãT~ºÔÊòÓ¦á¥úÓ¥ZÖŸ6)6 Ž|â8f ŽO@£ WÑŸÆç££Eà G£Šþ4rCPp›êOoxy¡$òÓ+¯¼vUtzÃN½%u\Ði|?ãER·‚N‡Œª.0,àt)Èà´$pêFQÇžpÚÊ 6 !šãÂHÀé0Ɔþyépe@´dj§7¼ÒáÖ @›Ái¤©ÝãÈÛKœ†ÎÒ~> #Ó¥CÓBC¶¤Å 6Uý!Ø4ܦø* ­Á¦1ŒxÃ;¿_ idâOä:h:Œhpð>šö‘4½m‰VüèдDÓÛë$3.öžFnB#I\:áéx:Ög¾ > 4΃¥*uT;ñË*5_±ç½eMoè:] 3D+,¶À!ê.5–Ù´A¨áT¿¶ü¡e€:Œm‹õß©¦ F*æ-Þý~+5$¸@åâ! PÇ[rÆBy«çq¿%ç¾>%ÔQ0º¼nyÙ˜4]je€zT¼Óó¨Õ2@ÆðoòîNjÄœmk†*@ÍÂÐÖ`€n\¸uã£+õ³å…ÔQÜïáì@%ù‹,øté,cÔQ0|¬ D¶9¾¼‰z3F¾.áåŒQ{­ Q—Z ¢ÞߤS‘ŸÆÎ¼ïÅ2>½‚ˆñ_O㨊d"CËú†¨!ÛžRb Q{uŒPKž^ì\‰Û2<½"ì-vž¹ <Æe^RvMài|¬NJK"ðt|åàËu¦–ÂÓ¨ö82ï€ÀÓ+öì3£>ž†jIŠ:íµ : Ѻö:,tcÂÓhr‰ÏýÒqm†§Ã¸ÅGcˆÄ <]ªex=V¶žö xÚç]ài8מyŸãÌŠŒOÛH ¶‘(@m¤5šŒ!?œ˜O— Ÿ†à´ŽÄŠOC ðXzÆåM{c„N.Šïfçß(m:ê<3½¡ÓÞ ƒÓ°M™BGÁéÈ`œÔº§€ÓÞ¤€Ó4$N”‚Ó0>>ýËÈà4ÔÃQ;ºfª€Ó0Æ0žhCŦ}$‚M‡1æmDh)6]šdlÎ.ö°>†¦Ë@š:•8€õùal:ŒG&ÆSp)ªÚ:B˜ö&öç%è4”"ãHщo.­1:SÀþÆãN+:ít°è~ƒ‘EØã<4»˜¬ Óe€ŒN¯Ø½¯‘°@Ñébdtº “ái¦ÀÓ¨6Øut™ái¨hBåè9Œ(<ím < ‰M cVxºTËð´¯§ÃøÈ>H;ÃÓþÄžF›û9Îú O¯H½;aK…§K›Qc˜ÄTˆÚç@ j_AQ¯Hí|P¾QG?ǧöG‡¨}Dí#ˆGå£ yN©_Çè.ë©(u £ÔxnÇ5‚ ¥.-2Jí£”Ú§@Pj¥ Ôèv¿ýƒÚ¤ Ô>NA©K›ŒR—6¥.F¦P‡±‘º€¢Ôe¥†0E¿û1ZQê0®ðû=£ÔЩ=’©(µÏ ÔÞ¤ Ôaœ;µø&P¶YZ(ý^ˆzú jE¤¢uø-ª焯G¨ã1Hj£]i†DǶ-E¢kŠD‹gÖ‰+‚ìS.Bj÷„Úch¡wþœR ]jT»“æ®@ÔÈŸÞHV jêˆÚ1|¨¹y¤¢@Ô*)5âÐv’ŒdˆÚ⢢¶(-Ũ½VÁ¨-Ri`Ô¥Fƨ=0S0êŒÑRxÚBO¶è E§£Ü¾¦+è´á(:m1ŠN[D”¢Óô¢à´Åf(:mA/ŠN[¸Œ¢Ó½¨èti“ái‹ RxÚç@ài ‰QxÚB[ŸŽ’ñ6œ#ßœàÓ0B#|‚@m? PÛ´ @í}€Úçç¨-òKj‚ÔÀ,ŸÇe¨O¼Ðë}zx £.ÝdŒ:Œ ™Á:* 5j]ŽLJ*µ?cÁ¨O<º‚ÚG" µ?)©}˜R£Í8%^CËšAj_èRû¼ HíËC@êÒ[©‹‘AjŸx©ËP¤."ºŒ„Qj(Sû*˜ÚŸX‡©}Sû ¦ã„å9n†©ý13Lí“*0u!ÃÔ¥³ S—2L}"ozê Jí}”úŒÃNwf‚RŸ÷➉ Ô>A©1y@V‡˜£Ô>©‹‘aê2Æ©m¦.Ãd˜º§ö†qjŸÁª½ `Õ>?VÛ@^Xu)ÁXµ[*VíëXðêb$¼ ¸ãýR:ªíáCg¼º¯~cLÉé ÇœBðjN‚WÛ^í6÷ðJ®ö¾ \]Œ¢îa“'pu12\ÆY@;aEÕ=¼¤¨{XÆt«}XÜC§ÀÔ=´R“øÐ _Uø0[Wùˆ'Œ´›=±‘ª|è;c*:ߦòa­±È‡™DãC'ÛD>¬ ˆ|ø0LäCäTäC¡©|¸QT>¬³ªòáFQù¨FRùð6­.%EåÃÖ†ª|¸QT>ª‘T>Ü(*nd•{(ªòáFQùØKyKUùð’ʇ͛ª|ؼ©Ì‡Eç£SçÃm¢óÝœSÅ´>Ì&RñÔžZ^FÑú°‚"õ¡6Uú° U¥†*}xIQú°&Eè£ØHç+s™zzqÓù°‚"óáEæÃ†Ê|T#é|Øœ‹Ð‡vG…>¬œ }ØÌ©Ð‡ug}سP¡ëŠè|˜Md>¼1–ùð^ŠÌ‡EæÃ"óáMŠÌ‡öUU>ŠíbïX¦[E>Ü(*ÖUùp£¨|XDäc48˜j&òaEäÃl¢ñáãj5²ÈGœáAÉì7™*òao•Š|¸QD>¬É½4õ}õ/WŸ>–G¡ªO±hI@PõiœäI^’¡jÏ&Z+È áaœk…ªK’Ñú°\Eƒº-©¥cÔ;Àƒ<¼»õÂôK¡n{>T¡v…j¡Fó8÷|WD¨7Èi¦¡jPƒdwÿU‘j|¬wfý‰ÖÇ‚Ü×+Éy°ÖGã¼”¹'EëŠÙS<”!PÍZ òw®"2´>е>ÂøÊÒô@C¢õ±Ü§™$ŠØ‡«Èt±ès_QeGDìÃÅ»Eì#ŒûzLIPf±Tºd²Öº/A*ˆ°ÖŒÌ@­TzÌ)\,b¥Zû(SÃbQ2OÕûðù±xŽ€4†Ì2‹} rƒÚÌb xØŒ b6y¢õ‚8Q ákV¡ö9ê(y0¡RT¨1bŒŠ5°õ®2)*Ô È„Ä ejŒصYD„zÁž° ekÑ ^nG'™¢AíÃx4¨ãç…y”¢AÆY¨¢A³óE%C¢w“$¨ð‰J* K¥œ %ã•H½l– .ƒç‰ è†GÆoH‚D´¹œ©“# ½¤$HôÉ“‰3¸‘D_•‰¥$ë}”69Ab)É ËP8A¢Ï­$H´5'ùÝÆéKw(=¢?iIèFIèOZÒ#ã“ѧM$›ÆP–‰ÅÈ ‹‘$+~#'H,FN8#Íá”bH’ Ñו&H´9Љ֦$Hô6%A"ŒLâ–‰ÅÈ ‹‘$#'Hļ/Ï ª~g̉e(œ ±”ä‰ÅH g0 ¯„’‰^P$#'Hô9x$Ú£Ò‰n䉥>NXŒ” ±Ø8Ab'HÄÌ@=r¤Žä‰oŒ‰V#'Ht£$H,FNèJ$–’œ ±”ä ‰ÅÈ)Kµœ#ÑW¤äHô‰—‰¥ZΑX:Ä9ëq¸çH4›æH´Z5Gâ ¢øIœ#±Ÿ‰åwΑXŒœ#±YÿF\´w–'I,#dýR+'I,%9I¢½š$Ѥ&Iô6%Ib1r’Äbä$‰Þ[I’XJr’Äbä$‰ÅÈI‹‘“$#'I|cÌ$‰e(œ)±”¤L‰n“L‰ÅÈéííÒœ‰þ¬Ÿœ‰þ”%q¢¿;’=Ñ(ÙßGöÄ2ΞXºÉÙK8{¢¿?’=±ô‡³'zIÉž8㣹d ¾dO´ÔUš=±TËÙKIÎžè“ Ù‹‘³'ú8%{bi“³'–ÞröDd’=±)}¢×*ù½ äOôÎJÅRòÉ X q ÅÒN¡XJr ÅR’R(–‚œB±ô“S(új–Š¥ZN¡èÕJ E_°’BÑG")½MI¡XªåŠÅÈ)ß/všÕÈ)K‡X dÆÙlNqI¡XªåŠoŒ;Íj¤Š>ï’BÑ_=I¡è#‘ŠÅXÔ÷)Uÿá7Ó§¿ýMLRôn;DïxI^à(þó ºÆ?xýç¿ÿæß¾.‡}}F÷ò%F÷ÛÎ|Fÿb€?ñ°?¢Ñg¤¯ÿÜs=|@ëpçGûÏÚƒþxÇ“þØñç#ýç’705û½ýçíA_çcÉ¿iýUß~¦±Iƒ»²Üîŧ8³Çú §j‰OTtè¯~xôð;úm‹>úá§OÿôÓ_~úçO?üÝoþú‡¯l&¾z¸™ç3¶¼åÏ´­ô†¢_ßÞT›‘Œºõøú¦6oêÃ6ŽÙ­ìmýÿa‹ltEúa_—²XºÁÆéÏ ˜Ïe›÷GŒÔþ­våùýƒzcwÃ*]aã¿(+¹x²fïÖmÍj7>vÍîG+kVúóÁköîO®Yíʇ­Ù»¶fµ+ïÖìW|Ž·³áâǤ5¾]3®Ök]âTS¾]Ó§öŸcmf9gÜ.ÏqÜ]¶Ÿi¦ýÂföùΛæ8ÿ”f®ÞÌüK‡ §Ö8½.?×Î/Ï2ùÞ—cŽá—ÛY|<® ¨k#ËtÔ À÷=á[Ï»·ÿ¹[ø—?üëO?ýîÿù»ßÿþ÷¿ûéOüÓüé¹2Ÿ¾.v×¾èëç}~’jß;ø‘¼&¥´pE’ÓÉ6\Œ8Ú}‹W*³å4Ð[F@[?Ÿ{Ûº¨V»)>Uº ú:ÂJÚÒ(àusëñämÙ)¤|ŽWFö×Ñù®¨@A ¢ÄZm(0ÉÜ)„´&2hõst.:ÓI- LAGÄ~Í=~»­Å‘b¶öchË4H <ÞÜc]{Bâ—Av~NyjŽ¡=zÛHð§AøjpÖñ¦"ÂðQkhÿ´tpËæ˜¡‡ºÐ ð6˜ç3âh3b´m;aN'®ŠFüU#bðÇ£Ú“˜~ ¤‡AŸ@¼¯aÄNŸp}—„Ö˜a ˆœZ4ã|";[Ûî˜GŒµ6ˆM¶öŒKèÌÙÚ‘‘‹±µlbØ {9H×n2š!~¤øÃ¨u‰6»h~ `°§' ¸©ØpÝ?Òñ¤ ÑoÐnô :ª½2`¸í…Nˆ®HE¶Ÿí‡¦uË—à˜( oB`ZòNÛ1SÐ^ÌéÖ’óÖâ»—qyfF§¶£QøOÞ±S„Ý4¿]©á¢{Є'Dv&¸á2{0üO‘Ríœ)·y®ï6!z7y¹±¦)¤ /å´];¢Ýlä±’íEí ߯gª,6g‚-ÚLfnC^äN¡Jçy^º¬DìÇAæósc¥¶Áü…Â]lõò¥ñ(vu¼P CŒ‹ÆôMܧ…b²lgا¯ðµÜ“×¹c–ÆŽ%§åÞ-Ÿ& Ÿ²·cm#}8Þ.é±â±%ÛR…’‘N6£;¶ËÜ’ïå7‚bX¶„?H0!¾{™„¬r³yµ±ÅPü‘-äýæ#…Œ]véÑv÷/,äœÕÜ’ãƒq¥Žæ‚íØ’a\“jºÇç.ã}ænNú⎷ܒí‘áR-·äxòsR©öøÜÀœûjûj]­$>{MîGÊhF·îðü “‡5½CifìÉàW/IyÜ¡r6öd›hò垌¯A²ÈöøÌe°‹/[s,æ;ëÖµ-vÜ&=y 65;w\=ÙÛ vîÛA±'`m¯)»CR7÷äåóAÂÖÑi !™ÚÍŒé æ=¾s'âsÚ6  !|·ã™ŸÖ(â܆+Y±±õPX‡Ï,îQrO¶’¸+É=Y·²×!¹'ÛäÅ×.£,¢³ë’tëXH%5‰ÑqÀ¢` ìÅ{(zû~R@„/ c¢ Ûê÷cθó ô”ƽ%q|Ö1½‹ßгþ¹ sÁc¡MÇËÅpÊ¿±&ù:qÁqŸ ÍÃ~O).8îÊã ³÷H\ð›c@ú½â‚{šHqÁa\ ŠÞÝsvÁ-\F]ðTñÔÚSü^g–*í ôº¸à1wñˆÒü¡Úié ¾AÁ-³ºª nR»ê‚›2 ºà&ü«.xLå9aù?n–¸à¦¦¦.ørQ†ŠŒºàËJ|º}₃8rc?|EjÊ}¤[WË0^‘1Acˆ“zzI⃃ÊM 8Äǵ:¾Ï÷L}ðá¢á ?ä õÁ±\ã=ì¬zõÁ¡2í#¯­úàË,0e˜Þ¦3½wvÂÁSšé”*NøŒ[âX,ó¢N8ÀŸ%¶í‡ÎnN84t2,ÖœðpAH J½ðx]¼µÏN N8lKxHýh'N8(`ÝÅfÿ{Æuÿ‹€ðÆÿVÇNýo늺ß‚er¡ªû ¹£ÔN1÷[}bñ¾}€â~ƒ]úHª«ïíg·¯ò½åÀ"¾·öÄ÷FêeÒj2ßÛœ5u¾¡ò4T$Šï}ì)\o¾w8ZSÆM™ï­¾ƒºÞzÞ5×;ô•#æzÇ‹« [šëm€¸Þð‚ÎÔ¡/®·¸îzËAÁ]ï˜ÔAÒ/®·83îz‹»ï®·¸eêzÇ ÏŒ¬ú6×[üDs½ÅM,®·øØÃõ>§”§7×Û¹Þr2s×[æÔ]ï˜ý³Wæz»wm®÷[qO§Q\o™S÷½ùÐ^|o9|¹ï-çõ½)¤å[}oé¬ûÞçûÿó!+¾÷~¥ñ½ÅkWß[ûã®·¼[êz»_n®÷AÁ¿æzÇ|äQu½e›ë-ýQ×AÀËÔ%L_®·ôñû8Uß\}p^‡cöÆG»ý{!.8D¼Dç7.x 5Þ£©#Ò샭J™gÔGää‰cÏóešå¹Xì‚{`ޏà7šä·ÅG:^É‘“@\pÞ&Ê .ø¿!VJwiÄ¿Ì;ï ‘³ ް®õä´ò1[Ç12[©~`#m‰‹®ÊL›·zà'¶¨‘_@ð ˜,™ÄooNìHðx1Œ¾ñ©¾ßNÎÆ<^-\â~0V|C8qF¡v¼!p8ž×—£¸%-øÈЩïRl#y"7ÜÒb¨»M¸¶ã»¨~8˜ýç2ü8õÃ$¼†º:âÞ!vÄqØÊl¿êŠß[Ø9—Ç÷ó°øâˆåžòÆ®8"©)/ ºâV©ºâñøãÔñõÄ×XÕ[üÿÏÁM]q8¬TxvVuÅÀ¾fn6uÅck_ÏyÀŠâŠ/pƒ·B¨+~+C{]]q;i«+®OB=ñO39Ûì‰Û1\=qС—|—ÕG(™§zâÑ&²Á$ÎÍžø ¥âåêìê‰Ïw[—JWG\¹æˆ#4œª#>#:'ewÅŸKéN¬8âvoÛñwÿÓÈ1¢ŽxŒ/Vþ1·;ʪS Ô÷qŠ'îãOÜîõÕ¿‰¨ËÈN`ž¸]‰+®×¨ê‰Ç@@¦ÖÄE7“Z·9âñ Ÿ©4Güº7ýŽª‰#½9HÊÚ˜N+∶î]¼q¹’ý^G¥ƒÏð=GÚk£ƒ{›âÇ8§!×hlð$óÐÕÿ¶9PÿÛž¦úß6õ¿gøßCÂüo(Wfºqó¿­¤8à3Ämó‚NpëÏã€{uâ€Ï”èÙ¹Õÿ¶>ªÿ=ÇÔ§‡ùßÚIó¿õ!š® Çp7Š>ãý|EsÀc›{R«™÷=áÜ:îÌ·*Õç*ÅûŽ™Ø2§¹yß6xõ¾åY˜ó=Å.~ ÁœïXÁWJ‰˜ó-&/rÌùŽ/é9ò››ïmX}ï[ÈnpßÌ÷©2 «ï=ƒ •P¾úÞáö´ql®÷ºØàö™ëm#®·÷ƒ\ïXÉŸñÆ u½mxêzk/Ùó¾k©í_7ó¼ãQÌ™¹Þp"à>FAÀ£ÁIBãF÷<€£ ÇS2þí‘‚#ãß·/šdOÅ¿Ã83!Fpëàß>HÁ¿½ ÃßÑדù9û@þFI–øÛ {­[__è÷…Õ+ ‚~ÃÈ¡¶ —‚ ›Qáo$ùØ3†BáïR’áïR’áïR’áï0n¤¤ð·W+øq¦$ *^J2^J2 Žd&LÜ6#…Á£äµ†Þ/Ê¿Àì"b©ààÞ¦ààXFñ|ÁÁm])~!­Stþ-Ü–áÞ[½³„ã · @‘ðb$$Ü/$Û9 ÷’‚„#“PôuT$Üm „c_&B((ºé*î“&”bd ŠuGppÛ•âëF(¶‘)^ŒLAñj·mWðbd ÜWˆáöqU$Ü4#á>A‚„û0æ}ü%*÷ñû§xÒáÛßÿõãý_ívå`}ýׯ®pÿ®#¿²ÞhÝO4â_»Éð¢Ò×@Çë*¼#8«w ÿ÷‡¶¶Ñè@ÿã#{ðLûëÉ÷? Âña*ó¯yuàùïmÿ5íOž?Þõà+táÏó¦ùLÑ•·d AÛŒÛÉÿZúøìÄa56ÌpßÚ×·Tè¿¢¥;—^ÇÏõ<¾¾¥ýäîs¯Ì úW^2}SþȽ’F è&¾ûˆÕÿ°OÃŽœÃ¥õûÇišJëñÓÇ|ã„_ÆþúñcÚ_kë±3ò2Ÿò›ô!kînûYsÚúǬ¹»ý{Íië±æîMåYsÒú­¹»ýµ¶þmßBàçgõ€ÈXªÌÛÏé¿{*m&þD’õµµvþWÊÌk3t޳ד{ü¬üû/BÔ?#1óŽoï¥Ì¼µÓøºµ5Ž©_/3ÿßþfþ„ˈ8ñþðoÔêo‘‹ÿg9>ý–Ú¤Þð?¹“œƒÞ ¥=q×ë1Úõ®¬ñÿãoÿæïÿáÿÇÒŸ ÷;OþÜ ´ÏÛ6mÑ,øùm¹GÂ¥*Íšþþñ%‚¿>ŠÅQýEÀ©¤«ô"Ðû8äÇ¿Ûïó*”^¤<žð ‘¶˜(dd­c¿/ƒ°¶öyz’›ï Ï´áÄKÂ)ˆ^Ë (,Û­ u][‹ïÓÕ2ã5læúuõ¸xäÍ„íü¼ô ÇíXòÂvÝ7OÓ:Åþ$ £3/Û ÖWü~ïŒs:ìK›îK©û ×#S+lsÌÒCæhH°· Q¤å^@Žbaâ¥èˆL+XÈQ슮4‚<_¶q“¯Qæz† wEÛKž³!=cG­`‹%€'†¶5S˜Â¶^®çΫ”ÃhOÀ‹P"¹îë¹¶aûΧ» nÆî»z\=½¦ [Ù“0žUqW†Å†¿FâìaZïëµ› 摵&Š!kñõpakØt{?^ªˆo½ÅÆíl{Þɡ܂ÐïWºªߨ=þÔ–+oÔ~æul ¡Œr×ç×µèqÕr‚_$n6Gãa;¦ÏG—ŒE9"H.Ç|ßò½–*²ØÆbéu½Ÿ«_\¥.ñóf£-ñ¦ŒÇˆr##*l[ì½ÏÛp_Á¦v,lã϶ly=[¼Ñ'ˆr¸Â8Öûs2AçËâÎwÜ2Àv e1T9Ó®[=öÛ×ZÅUq{~=»ŠJŒLÛ°]y͈›ç•¶´±tŽr¼*`#~Ýò>—¤âŽ»C/Ó;ÚY衜¸ð~‹ ‘’϶Š*iŸ¸ íª(5”´ß÷™¯¥j¥è¥ƒ‰7«‚}S]—“«ñ<ä×WêÂÁ«±P²Ï ù×m¬€ *%­¯Ô%sš¾l¸}VªÖyÝŸ¶¾REriÛ|õ•J3ØÖÞŒ“Æß—Ñl¼ÁãÛÓ»íob|{Æ+Ê×¶=™x3Nú \ˆvUÐxZpÃßwÕh.þØÕ†›Úg½ˆÖ/â‹‚Þ׫l wPÀÑ7×…vØb/¥£¾#r:)Ðz‹KöLİM½nØÖûÚ÷Y²î²«o¯ÚýWXø³½êösGh÷èl$^} òöÝö®²ßš½lç<¶WÙ~nyÙyl¯Vç•êsíÜúöóÒÎÛëþ$èºõÖ±³î™¨ùeR4Ŷ$©¶cÎ ‚ŸoÑ|oYWú[\ÚØYu&!ÆÔÆÎ*;2lÃ5C{c›…m¿¯²ŸõºC\§¯ó;úóè;+®î¶1Ëó‰ÓVî¬ÒM,ß±¿fê—iÜÛ>þñÛ«öäÛ¢~[…ÁµÛkƒ â4¿X1ˆxë»2E1Á6äJ¹->ÿcmy}ƒµ*÷û²ýY¬ HŒWû$_Ç+ß<ØÆ„mÝòÀg|¸­Qç:]æV@߯»åm[ØVDŒVë Wdn+ÄË·±‹Â68Èwé³@çØÇ˹[·ð,<ö!á¼qåïÅh’`"7¶1¹ö¡…vÏ1¼VýxßZôÝk]Vv&W…¯Õ'åºÃ`Û6e öU…íΫŽ ˆËkŶ”D½E,‡ïj+oC?ù®mËéßˆÊ ŠÙA/wØÎá½.WÅ`Ã!©o¯H >¾µPü߆÷ •ÃüÖB3r0°—l{>› ¼˜±½ê–½Inð/Ä­šIÛ+ÛÚ”7è¾wÝ™oÇ{¤k ÛrS"ÆË[}[RoÉ÷XD ¥÷º³Â#¤MÛ¼×x«bÃ{¬Õ¹g”K)·ß‹Ücé[Ç©cÉ=–·fÈ…¦÷ªûo»R Îmû”Ì ïæŽhOÚd¹½çjÊvÙl ±ËÓš"7&Ä{,môa»Ò‡Õbø—}‹UË~3@Æ˯Õ~$dQ;“ê»vØ®‹vX~w:(ùîÀåÝØau·?æ›R2vXÞµãDµ¤«cˆÕ”>ìsTÁ¯gz¯£ÐÖ8Kí齚­¥wi[S¥–•7×5ýœ¤'Ú\y/Û9œWÝ“Сhsݦ~ì^ã<µ5Ú[ùãwNù¦{OÎ9wÀ9ßþ—9NT{º®ÚÁ8QméºêÇ&NTKº®Òù8QMé¹j'pKy _@*„lèØYµÔ‘Å],q•õïü  ÇÜ}M$´U_ɽi´sbÑäîÐùiTŒð\ئ5§Š 8½•{ðÜò· 2žûbú&0Kp.nS !úl5‚碥F º°ƒ±º;³èZ‚Ý}¢Ï¤º7Í9çŠñÜÛÀ4v ÁsAæeÅx.ºÂRð\lI¹›ž{ß²1†Jxn±1ž‹-1[Âs1ÆáÏ…m£+ÁscÇݳ%<„óFã¹°mmx^‚ç"ô&(Ås/:Q8ª y‚ê"*‡0Zu#Dkìuñ+]rª‹/­-Áuñ1¤B€] ´K °‹&z ؽv¹ò`÷ZùШÀ.H\yŠV`×m ì^«‚° ì^M>HìÂFKV€Ý;X(ï¯ؽšlìÂa!ä]€Ý; ';»ÏbL›]sù›ä`®w“Á\kNÀ\< V0×Ë=h®ÿÌh®uQÐ\{:‚æÚj4×ë$4÷¼ó4×fDÐ\¯’Ñ\ÔI˜¿ ¹>tFsm&͵‡*hî5Éýƒ ¹ç%À‚ ¹×ô c‚ãÚ«#8.ð‚0Çõ^0Žë½g×v Æqí™ ŽëÍ1Ž{^‚ ŽkåÇuÛ Ç=OÁǵP€\·1’‹®&H®m“ åÞ{WŒ±Ü¨’ 7rñ ”H Ükt˜‘\[ ‚äâ•Ç~Er­—‚äÚ¤’‹H4ºA$·ÚÏ5Mð\/Çx®yèÚC@÷Zä&‡]oŽ]¯’Ýkîwáæ^‹ÀtæZSÍ57MÐ\sÓͽCV‡—®h®¹“çVÛÀsÍk<÷–,HoKÝîA ’ë}g$Gâd’ U…ÄɽKb¼2ä¢ÉFQ Wl äB"!6rqúÈx1r/@-Ã×›cu¬9Å‚ãê¹Kp\›0ÁqqîšòíW«Ta÷‰`)Ž‹àûô­o7†H ÖǤç4@wMh´ºsnòè6¹ä5@WÙl èÂ} *™ºÛ>¤ÏmrÕfxî™/Çsãá3ž‹°kæü2žÓcT† ¦[ú)˜®õE1Ýÿ·³éµä6¯î\¿¢‡oN}WM2Ö,%¬[’[ÀlKuO©ñ,Õ=¸òRÝC _RÝ“³úê†ÂW\÷ˆµº‡ìE¬Õ=$þ³V÷Àt-°®kÅX÷]÷&º§d&º‡"Ýû§Œ @wH ;”¥Ý!€îpëºqÉðÈÖ5t‡tsѾ÷ óé7ža w퀹7Øä–G!°¹m4O•(A®G(¹u„æ' ÷®s‚tS>º^ß¶®×/ì£{îTjÉF÷Þ#t±œltÏI„I6ºçÍö»´Ñ­>·.ÝÖã~ê£[ÙMÉG·õþÜ@.ÝöE&Ë>ºG?1Ë8·Îâ#M—î~‰<ÈF·}/ Ðä¦Û>Áô³”›n¤ÕðËM·¾è0y•›î¾qUÐnºujS_(±nZÐU~ºuæ!8ýt÷EMH~º-&c_úéÖ‹ ¢òÓÍýt[LâeúéÖ8ì3?Ý}êC󀺣ňüt[~ç'd·ÒÑoY~º5^¤þ—~º™ýê¼.úDÐOw¿Ù~º5ví¯+íZ+2r_Ïk¹/´ØÕ7Y²ÛbÒ¯Ò~¡®‰™§ýBQFÚ/T zQivë-¢Å5»Y Ú/l罿zãeØ/l—Ýíf2Šv3;Ú/ÔY£”Ó~!¯IÑnܺD»‘Nö -vë:GÛ/D•I¶»áðÎà»÷l)$Û­ t–ýB‹ñ£%Ýn–“ºÝŒA·ÛúUIzi¿°Zl“nw;ä4"Ý®¯iÝn‹qèóÒí¶Ÿ9ó—n·NžÅ¨NÂ]ÃÂÝJ‡á’„»•E½îfˆÂÝ,=…»µý™pw;4€–p7ÓQ¸·.án‹}ÆxóšîÖG–îfŒÂ]¹ï[·[!PWÊvë óß.ÛÍÜ(ÛÝvó_Èv3DÙn‹ S¶[E!n¦l·Å(5—lwÛ¹…í)ÛÍ)ÛBH¶›Q¶›é(Û­t”ôB¶›!Êv3FÙîë²ÝBEx1´q–‡­Ø˜»ƒè[’ÝLGÉî¶qA×’ÝcB$Én‹IBKÉn2¨‡©Ø­KBÊ%Ån‹Qo-Ån\‚ÝLEÁnæFÅn‹õ‚ìÆ}K°Ù½»ù3»m/ý/»™Ž‚ÝÚ° v·­ŸkÄ{ÏÖ<ìf2*v³˜T쎱®Øm1î`Ån”EŠÝH'Ånä'Én^“’ݼ&%»-F}½$»ÛªÁÒìÖ?ülwȈŠÝwHu3Juë ]ìåT÷u-itãN¤Ñm1ÊÞŸÝúµA`n¦ F·Å¨'—FwŒunÜ5º™ŒÝŠa—‰4ºYLjt3Eºñ¤$Ò}Õ®Ô¹QW’çFF’ç¶÷ÇKž[³üµ$$ÏÍå¹£<7*RòÜm‘)€ô¹yèf2 t3Fn½gØ’ n‹ÉÖ€ÝÃþ—>7¯F}n] û,¤ÏÍõ¹Û¢ ¤ÏÍBŸÛB4¨>7“QŸ[ÙÑcúܼ&õ¹c¬ës#?és3F}®+YòÜ(¦ä¹ñ$Ï×[òÜh–’çÆ;,yn¼v’çf~çfˆòÜhé’ç>_}ërÀº\×Ç]—ë·ó{aîíû`.ŽTJ˜»ù0ÁÜõ|jE…q×Cú$¹-ì’Ò¥Ûå‘¶[k]Ù-„m‚ìI5m·6 ²[¸l »…ÓÂU rËæöÀäfLž ¶– 7ÓQ–›e¡,7îOºÜªêr×KvÒåÖy€$Òå®—TÀ”å¶¹‹´¹ÏjŽûÁÏ]›»^¶N 6·=)â&is«‘ÿR›ÛbDXÔæ¶ }©Í­sì€ ¥Í­ñ)/¥ÍÍtÔæfŒÚÜ1ÖŹÕHÁí%ÎͺÛMK$RèVì3ÝŒQ¡»Ýä4+…nƨÐͺƒO-º¢B·½Ÿ7ø£J¡»Ý>%¹™NÝŒ=$ºù+%ºú„$º£D7cÔénÁ6eº¡Jwu•n„(Ò5º5Õê]]‡¤ÐB]¡{Ÿ»AK…nƨÐͺ£BwŒu…nƨÐÍ%ºÛ$“Jt3D‰îëÝŒQ¢ë˜%º{Jtógjtë“ÆS̨ÑÍ5º£F7cÐèfˆÝŒQ£;ƺH·bŸØé1ªt#&•nƨÒc]¥›1Êt3F™nƨÓÍuºc¬ët·I'/I§›1èt3D¡nÆ(Ôc]®1Éu3v—ëæ”ëfŒrÝ1Ö庣\7cëfˆrÝÃZ¤åºc¬Ëu3F¹nÆ(ט亣\wŒu¹nÆ(×Í庣\7c”뎱.×ÍåºÃürÝLF¹nÆ(×KJ®é$×ÍtO¹nþL¹nÆ(×Íåºc¬Ëu3¹n†(×Í庣\wŒu¹nÆ(ט亣\7c”뎱.×Í庣\·Åà~e¹n¦£\7ÓQ®›1Èu3D¹nÆ(טäºcìâ@V±‡\7¥\7c”ëfŒrÝ1vq «ôº¢`7cìfŒ‚Ý1vq4«»“b7cTìfŒŠÝŒQ±;ÆNh£b7cTìfŒŠÝŒQ·;ÆN h¢n7cÔíFLºÝŒQ·;ÆÎ­t»£n7cÔífŒºÝM1K·›!êv3FÝnƨÛÍu»cì‖1év3FÝnƨÛÍu»cìâ€V1êv3FÝnƨÛÍu»cì€V!êv3FÝ®cÖífŒºÝŠuX}×íúŸðýƒV»°Ó°ïò2•Mâ+^qßEJq_{ÊŠû.‡˜¸ï²KÒ$îÛ‰Ü=ëcÓÖ´Òűiëk׳OL[´[Û'¦Í¢n²Ùå;l›ÝY c!ß´½%òm1º, ù¶X_ —݉ >ƒË.Ö¬D|+„ â›1ߌùÖ9r€ŠB¾-Ý;ò òÍ$D¾#ò ä»Ì\#òÍ¢ù¶dŸžœÏMÈ·ÅnŸ!ß6g;HŸ‰|—‰þZF¾‘NÈw¹éÀ%!ߌùF~B¾-ÝÃUÌ´×MR°7Ú`oÕ2L‰{ã öF% öÖ󦘰7,d{ãš‚½qÍ'ìÍ_ {[ŒÇ‚½Q ÁÞÖÝpŠœ`oKýŽhoõ\þôß?ü×÷0ã¢\ߤž?ÑTïÂÔ¿/˺»_pÇï,ë(Âíy£ï¿,½!üÝK°Õ¸úY€×Ÿ¿4ÿöù9Þxýå+Kð¬öÇ“ýeÒžþ/y<ÿüµù?ªýY€ç_>*Áãšù•’´iw›s{—æýÛZ}o$¯ÓÔþÖ õO?=¼ÿqjÿ][Á¿ýôË·ûmôúßþýÛOÿúÃ?ÿô[sZ×ÚÎí†×ó·ç´~GNûU~ó\ßÛߜў}AWÙûç¿s‹yõÉ_ÙUâ.×Þ4¿ìËP|Ìýþã×ä» ¹ßOüНb-&eî¿&ÿeÌý+:F6󣳯hs÷¼ŸmιM›»çosÎý+ÚܽSy¶9åþEmîžÿ2æþ»>…ó5m?îóY“Ì­}7öZœ¿–­Ê6|7nß¶Çgãw"›i¯í(ç²mÛù+Ùl,›ö¬÷£Uî1Ù\¯l¦?x;ËQ7ËÞþs¿–ϽŸ»áǶ-ûÙæŸç3çýü­<Î6š¼ÎykÓñe¹Ë3ö¹VŒ3úŸ{ÿñ—ÿüå—?ýß_ÿôç?ÿùO¿üï_Ïÿùß?4Œøö›vd®¥á9)o)G~rÖ™†pkI@&8¬­%éöz}òÖ2¤yÛO·i}ߨW¡³%¨óôÖ GvnµDÍtìõÎ…1ëŽvÛ±Òçkݧ¾k¯<æp6uÅη1É~qEqmïs÷×ÛW+´Ö9â4&á éíŸÎݘDæ;­âúÖÈm›ú²þ#Öýõª)±œ;üõJ&Õ­1ÖÖF[m”싳ë~Â_¯öÇö=uk Až«Ûº^߇!̳vKóñv'Ék⸀¼f‰>Þî$¥%ì‹Û<ÁG¯]c[ÔöúBW:°÷j ï¥ÑJ×ÕgzçP!ô\íó}•ŠÁȪÚu·Ñ›OºŒÜû£·;I]óíúuïŽÞæ$óAAaýµûèevpÀ­Ýã½r_š²¬éjŸÏÜï—ì:¸jï¿Þ/Ù?±÷Ì{6Uå껀ë ë>z»CÑvÿh}¹»…^Ýî{;w}»ƒ^ÔD >^¥Édûûo™û~[ŽîÆsuMi KÞ2ä!Ý=÷ÔëzTî´Ø am¥Ã˜5:ê.z-ÖtÝÚÈü­€¯f„ÓPïã·×I“• ÇNVìýOïéú¨­M3ú‡âžÝÛ¹ÆwÝC¯>(]*_#Éî¡—Ù0Ñów¯bÝD/‹rö D~mîö׫¥n?Þ‡6[‰>ž}a°4¯/Äý•ë^%5´îz™¶ÃCºÇ¦…z 5½[œ½Gª|¥*¶Ñ@o憚¾•ïeêOúnjùìU[»ÅQ®wCÒíÝ«î<’ûî$»½{Õ•{Bî.º} ºý—Sq÷Ïko"|L*Öýó–™Þ(å§Üýó–™Î.åßüÞ&Ãλ?õ륭Ž"Èšg½-)²7Þ×nÄqî¸æÿ¼²ïƒô»m7Ç«Â0΋n¤D} zp@ÅÞ£îŠaßCM ß“ËìÛÜéenžÝþ1õ¹‡Úe—ÞÍôj(‹ÇÖbÝLÏCí×Oû–S51îfzóA²2tïfzu_}óEÁ¿ Ã5»ìpËG_µÎ¾óÀa÷~¿›àlmâÔÍô<ª¾{Ù_èW1F¯X7Ó‹²´™S7Ó‹GZºÛ»c}ƒNHµ³{/ÉÇôîS£ˆp®ЏÁN¯¥ƒe|9ÿw;=OMê\Ýn§Wéº_Õãhß«O«pÌíý$áõÝ©ö[»ú×7¿@uHúöêUý`.l’ÍpÒ‹OµÀI/ºý»[éEzVãc×fOÝJ/*±ÍžÞççéŽw'ŸÖR{¼ÇªYø³!ò{Vr>VÕ#Ûo899¾g{é=ÞcUÏt*Öô<7¼Fñ«flé‡Tß'‡WµÖÝÏ̈‰ã~Ûú-&ŽëVz™n‡•žß¶ûa´(º­û™­}´z=Ž4zw®'ì¨ØÕG«'ñßãÈáW窑åý¤bŽVûÐèßcã:¹ê¹s½/ø†¿oåë뿱­Òì¶èâó†•ҹϜÝn˜Ýnj€B·m\²öAB·e©GJCtÛÊ/êBt»·¡1ÉÑí^òj;¢ÛØø$tŸ„n÷ö^n}ú"t[ÆÜ@´‰n÷y"z#¹ÝïÂÿŽÞHn÷é8"¹­Ã1t¹-€ŽCßEnï'†÷NŽä¶±j ø6/I|{?±ï|{oüèÄ·õúb¼ö·÷׬o@¾­ÞSRáÛû[ÝÇØÂ·Õû€:’ÞÆÑ°¤·Õ×™‘ÞÆ!Ù¢·Õ’™‘ÞV_÷€©7s=÷Ž:°m% ='¹mœ($n[·ŒA™àm¥¸#»­dlsd·÷ï ámÕÕJ„ŒÏ}¥#X'¼½×UŸ5 ÞÖwïBOx{¯±ÎæÄpïŸK€u2ÜJ‡ÃnÄp+Û*î½ÆºÈò‰pkŸ€5n%`w „{]ô4ÂÝICá|ís"ܬ*!Üëäy.B¸•Ž=šnû‡ËcG|»S|šø¶v bDoã9‹ÞÆK*ˆo nmÂ’¦ ní,êƒ7CÜèçqãi âFŸ Šݪ(nÝ^W ½(n¦ Å ÅJÅÍt ¸ñŠâÆ@7>×¢¸1¸Žv-E‹âú3/ˆF7’äÆ'Y 7F˜¹5nÇB‘@nŒÑr3?‚\Ÿkëñ¿An¥KÍqin$Í-gîîdšƒkÑܸõÍ­á?V8DscŒ/šÛb8¾Ó4×7Òܘ çæÌ‡8÷Îa»¹pnL΄s+ÝÑG:¹1oÎ °pnýCœx*œ[ótðáܘ çÖ?ì+º¢¹u},nŠæÞ)DŸ‰æV:pÑÜÊî±À [—#‘%ÈͬrïÐ¥O=r_@&”ëUB¸•@Àøu° ÷}›*¯E^°Ü¥¿« s·K¢RÃ\é´ sÛ( _:ÁÜ„²„¹mÄDM©`nÅ€s׉R,ÃÜõ&ò$˜kϾ€¹»Å€¹‹°¥aî†s‚ånœçÌ]yªGÀÜ•+솹÷ù/Š)˜{p‰Î0w¾‰›æ Ìõ›b˜û:¥=0î¤z” ·uð7óÚ» ×ÝÀ¸3^Œ+6)îµò­´·F£$ÃáúÛwÒS‘÷<ØÝ[„[à]"ÜkIbÛE¸jX…["õ>f° ÷Üù¥³·à']$Åå×7¥¸çFXg-.ÔR‹ë3ç­Å­‘@‡7Öâž“úÔâ¶¢)îqIý"Š[½&2¢¸ñ(Eq«kƒ€‡·\Q0TÅ-—LÌòEqïs§D¢¸Õcr$Š{œRˆâ‹VDq[Yð’ã¶²œ˜² ãÖt4Q÷Ø4¦Ç­)0Ù)9î±Kü.ŽH÷XØÝ™ãÖTý31nÆÈqË^Ô·¾âoXÛî±hR%[_ꎊÄq£1<8î±h¾'Ž[#LÐÄqË›µS3aÜz …'Æ=fMtÅrËÖM\,÷X5¯Ë=VÊÌrò“…\•,÷˜´!–{,<•Å,7jE0÷˜iÂf˜÷'š[&¸XyÍ­¶ƒù®h.ATÒÜj<èýEs£‹æÖ;Œé¼hnˆ¾ÇËAs£(¢¹Ç¬¥0ÑÜö>[0÷˜xtßæ¶«a o˜{Ì¢¹Ç"%¬pnÞpî1  çF‹çæMè“Ö|Dt#?Ýj³Ÿ‰rË2 0HD7¯I¢ë]’ƺQºñŽë;ÅBX7ªZX7ÞWaÝcÖʵ°næG¬1bÝšAut(ª5-ª[žnXŠÕm鈟T7Z²¨nõ‰ j¢º5)ìk‚ºÇÍ´P·Œå°.¨[O»Ÿ i¨»_†Á„º-;Öu[Œ«p‚ºÑÒEu[:Á`RÝj±˜ ëÆ=ëÆ)¬[ÍëTºyĺyĺÑí ëV~ø² ëF«Ö­7sbݨaÝã¦uaݺ, ëÆ-¼°n¼ºn—¢ºñz‹êF7DnôBéî—Ö3%Ò6$‘®ßS‹t]‘éúÙX¤ëŠ´H×÷`‘®Û¬Eºn—éúݲH×íÒ"]÷éêéY£›·Nn^’ݸujt£¦¥ÑKJ¤›—¤H×mó.Ò­ŠêÓ„ï%¼ýÔë©ýûßw‰i_¡Ë¼•x·¤ð f¶Y˜xžÉo·Y¸Ädd³Ð:(Ÿm³P¼B?Ù,´~ -Û,ܽ-»èM6 u*hšlê40FÙ,Ô7ZdÙ,Ô\ø¾{GL%‡Ïéì°Pó/P9,Ô”éi•@sM~ä­p¬}”P7&6Xð`Ê ¬ífì‰v=´Á‚ÇQ6X¨|ߨÅh×S7£Ý÷æÆ¼È@×Ó0]Ïr tÕ©è:™©îí#anÿU(×/öåzn”ëé­Q®{³\÷Jb¹žÁšåº4˽¥ö,×SX³\ß‚Y®oÁ,7Ò‰åºÍr#Yî}\÷±0×Sc“ܸ¢HnÜInp‘ÜŒ‰äú³‘$—1’ÜÀ$"¹û•ÚÛNrcš.”›×|¢\×£H®ëÑ wãÒw€\-ŠãntK Ž»QÇ·ÞPhqÜ•º±à¸(òÀq½neŽ×ÇuYÌq#?qÜH'Žëu9s\׋9n\S7îAwëkà¸ZU2ÆÕ–(®#¢¸ âzYÅwím7RâF BÜXü3Å]¹Î×k^¦¸šfšâÚ1(®c¦¸^ƒ2Å}ü$xûøIÌÖ+Gf¶.TëÅb¡Úªr,jÕF:¡Ú…'©ªtDµ™N¨ÖëFFµN'TëìŒj}{Fµªk¾P­«Pm4³ÚåsnÆÈj#;²ÚhEfµŸ„hõÏѪähõÞ¢Õ»ˆV/U ÚµÆD´Ç£x"³Zw2ûøç²§e½²íÅZ)?‘=í4!"{Ò½"ˆ¬d&²¾¤‰ìÉÕAdOJǂȞAÞ‰lëA`ó}¶ 4·¿Ëõvîepìcˆ ±_ç ±…\¯ŽUIcgí½7ÕÆŒ0½=ùÉÓÛ™{øÂôö°™¬LoMÝ~»éíF±˜iì¾HO(»¿\àu{’"Úêvç¶AƒØm•dY v›(H2mcW*¤eu{²+¿ÛC2áô»%•ÝíÊu†°»]ôÌ^4¶l–i[K[S•o‰a×›Ìds{i¿mnO|Ùܽ£K['öóv¹=¨¡ —ÛMuk—ÛU¨Û.·ÝÔ$yì²pcJ¸ÜÊã,¬nWkoÉc—UY¸ãÏÝ@aÕXÎ «Æ€öÁ]5ð²n›pc†·þ¸ÞöĶ™±m^ضŰÚØÖSEc[OXm=€6¶±m†ˆm£(¶#¶bN˜z|Ÿñí_~¸}û—öÊ·ýÿ/?œó~ÿÓÏ÷?mZ,¹ðýOÿýÃ}.õMºßùÝïGù.$ýû²¬»ûwüwβµÐ;°¹ßèû/ ÷´ý½Kp×1? ðüó—æß¾;Ç»¯¿|i Õþxò¯¿´×Š„/yÏ<þüµù?ªýY€ç_>*Áãšù•’¬E†Z_y.Ûôm]êê«}¶¦öBµ2ýÓOO‡íœÚ×Vîo?ýòíßþ_“ü÷ÿöÓ¿þðÏ?ýÖŒŽ6h³ÓcZwø›sZN5æk£¨6Ö<¶õ·ç´gN_ÐUöþùïÜb^}òv•¼Ëë‚_õeØ—ùÇ1÷û_“ÿí6äÞ~úš¯b›˜÷þøñkò_ÆÜ¿¢cd3‡kØ—´¹{ÞÏ6çÜ¿¦ÍÝó¿·9çþmîÞ©<Ûœrÿ¢6wÏsÿ}ŸÂ¹üdÖöAŸÎ«}7ö6=®ö]le¾·oÛã³ñû?„Îf*FvµÏï¶¿’ÍöDziÁš[­ryÌæze3ýÁÛYŽ­” {ûÏyüZ>ô~Šo¶yܲŸ·éWò™ó~þFµ>q\óíØ§o˲U ØçZÎ<êî9üÇ_þó—_þôýÓŸÿüç?ýò¿ýßÿYÿÐ0âÛoÚe¹ß­ß>–áø“³ÉÔd/•Ç{}cÝy†õ^*×úFùŸõÕã}¿úî¡8ºbïíTu64ö‡í¶„µFÁ=t{9Ï¿W8ʵ ^„ôg/I·åÖ¡|õõûн·ð éÖ¾õ¯=›»—=ñ{…£ÅX-÷ÃF^®bÝÊf/W¿÷ Dzð´äŠu!ÎRg6¾v*îvÓ×çç¦ìÇÎ2Q´Ÿðá®Kò8õŽc›à§Yb×GËgî¥õx/pÔ2Na/­Ç{™ƒKp[¡ÃYfÚFîuðüHqUýÜu8UYÝ”¦Ýt8ËLŸ­ý<ºãXŰÅw?Ïw½¾¾³ûyõ›®uõÊÎòÕa:ïUŽŠAaxMPàÔ5ûTk¿p™ûëÛ[ö:¶ú½ÊÕÔbïZéºà~• ò‹ÁÅ»pÐøûž¯½ qwË웿ö’w¼W7òž!û Öµ7ràÜKÞñ^Üp#=néEU%òx/n¸ªŽrä|-n¸¦*ôî≷қ,Ê éM¦{À›Ì $÷ƒX—Þä–éÎ7¯Â&®µÂ~Q^²Mèß*hëÒ›¸±6¡ïÒ›(Ê4C€“ù-àDE–Þ㽸‘ù­Ýîi(çÖÊŠWê(Zú^Üx¶ãc: ÂÉžPáDE–xrAª›¾ É+7¤÷ªFưÓ0ËR~T[ïQ#ôÞ¡ë×ú˜¨p¢”màÚU8QÅóã$w¿„ǼC„“eß!ÂÉœˆpâ’g×àd*8ÎdùJññ^Ôˆ'³à¸Ž,eýÓ ½*›ÇÕn–eY ÇÉØ UN<޲ûi±®ÊÉt;T9yï°ªˆÚ\Nˆr2» ¢œÌî‚('Ò•ÏìþIïºÂ“:«¥ èÞ¢—rÅ—xHµ@“7¾®Ðäă}€é¼ØMN<í’|ÐíY· »ÎøF%ùxY3„ó׳(å|° oŨçØ&¨râžKrßǬ.f›-tUŽÇǶ@•“±ªœ¨–mƒ*gŒ]³Âä¨ÙѾÌc; Ê‰þºÍÞº*ÇC­£Í »*§ôU]1\±®ÊyõåmZÒå8q±}‚'cs—ã,ÐnWh'ʾÃ#3ߌp´ªtë–Þodý¼C”SnÅ]y´YTådì„(gY»Ðà{‹r–…66GäÛ­¾uJ­l‘uŠ­ŠÁÀ1f<ëÒœê3»Å÷Áò­:àÛü~ÒuI®úžÛô©kr²;49™ÑMN4ýÒ}ôQëzŸmÔÏä8šq'ÌV2§R|pÀÊœN—U—„#ÐQvïï«k°ÍœXuÉjœ1ÖÕ8žÛ¾ÖÚ2Å5ŽçhG›8-°Âdù8á3劽Õ8ñ¢Ÿ8èþø»AèqÁ»:‹yaSL¾i× 5N¼Muø)¬÷ç|¡ ±B†M±¤}¤ºÑí(©GªìJêÁ‘*¿×βòȤã:!ÃY6:¼µòC†ãgÓê2?Óó6A†³@-üˆuβÑWí¼Í]ˆó¬ÇvçPàd>+8ËNg¤ó¶AÓbðnªØ]“ÙÃʺ²:ûrñy; ÀYúÛœmîÔ8ËN£¥67í œºd÷Ž­ÐÅÁ*ïl™¹ûrrÝYR>X=èt–Ô£V7ú!ž%õèƒU}ß+Ö8íš8@ðœàY]g÷“NîL¹»)óY:š}*>çÿÿ}(ªXã´!T.çÝ}*Þ³Œ(û`U]ÝY¾°ïÁê:qGÂÙfP]ŒSêã. ªXã,÷Ú˜ç®Áñ«{Î 48ÅLû«‹-¡Ï2ïƒÔ•ú÷“ª„.Ùù¾=”¯Ó§ÿÆÖÉ`¶Ø+9,‘.O_.ãÚRÌBÙE\»É”߸ö.ú}“%ÑÚmÉ­ÝdRmZ»ÊÕÓ´ÖÖx¦µ%$ïfd¦µ-ÆÃoLkW…Ô^4…P»ýx|ˆh…v…hM “Ñ~ŠÑ–m>ø­m›4SÔ&F[îþŸ0Úšï÷)´mµöÇ ÏÖæpGáÙÚð‚“—^x¶Þ©÷.\ÓÙù¦z-×frgÒÙÚN jJ:›—$¤µìÕv¾‰ª ÒÖ]¬Ò†àN6´•‚´yMBÚÖ•óQ‹Òz¿)­7UÖN˜š$¬m1¾Ÿ‚µ–Ö¶ÏØ³ÔjÓŠ9í$—9m}Ü0â!§m!XěӶÂ?Î0¢n\2¢­ûí«+/DÛ>¼ÝUÍ„vao¤Cä³~Œâ³mà€>Ö|¶]cóÙRQu+sóÙŒ‘Ï–j«oÐ6Ÿf*ºÌg[Á°MÞ|vša†g<Û²ƒ=¬ñl5šnðn<;M\ž3¥}76ÚÚàØ·0ÐfÐÞ.ZˆÐV•÷¯µm=¹þN ÐÞµ½Wg£´µG0F„6jK„ö®3o}Úx.B´5^Å|Nˆ6¶Õ$§Kd´Óƒo3Ú–¬{5ÑÞNø*›ÐÞNŽ‘LhkTÝ· ›ÐÖ †™½méñ`“l¶Tܽã3›­W³@±ÙÛ©©¶Øl½¯˜d‰ÍÞAÁÙLG8Ûò{òqÙþ³lË#u#Ùû¾^àZ Ù– ´BH¶ž&•B²Ñz…do‡hÅÉf"ÙxôB²·]³^!ÙxXD²÷нC’­i_©ɺ:Ddo'OÈ2‘RŠÈöç%{;Þ!Yl4i±Ø1ÖYl4±ØÚIŽ>],%$†’ÂÞŽLak_:>r‚±Ñ£ƶtŸÁØh<‚±QÁØŠõµ~ÃØŒ=aluÀN‚±õ™ÇXE0¶}û`sk;Ý0Þ‹–/%‹nC,6ZXl| Äb3?²Øï1‹÷BH6>VB²YN"ÙÛE»r#Ù¼&‘ìíä±âæ²Ug}«šÁl};’0˜Ï…ÀìûC${»è¼o$[ƒŒÎ"Œdc($;ÝxìÆ ÉF5 ÉfND²™ŽH6¾µD²yI"Ù>$¢±5¶ë6G¦±ÓD\ecAÙ¼&¡lÇvÌe&³1Y;µ˜Éz~e&›12Ùùæf²¶à1“õPÙL¶¼*úüÆLÖ;Åd=í4šµ•Ñl Å?C³…û:ny£YפѬ·¾ÍÚ+Ëh¶á·`²ušÔŽb“É–£#™l‹aÀc&[çÉtHc&[Çptjb&;õÝš‰dÛtCv#Ù: U%&›éÈdë½ÞO˜ÉÎ3ßl3Ù:‚®C 3Ùybo&[¾b}àc&[ô=4’­£û<ÌH¶MAÀÄfgìÍL6Ûb8>Ðl¶ÍxpÀ›Ùl›ÕÌxèÅfg4˜ïÝ%ù{ýíÒ.ýƒ7@Ú™ß*“Ú¿ƒ‰Ô†WÂZlbT»NtAaíÅCüÕúx£Z~¨vã’”QívãW!Pí)ti^»éœóÚ]ÞˆÚ¶{‡—‘©m5Þƒ¨í͸9”µK?c!”µZÜ j+3ASÛ•½ ¶ê,Ý^Ú1mtkâøB·îÙÍnˤ¿6»Å`/ØíÄ“BÄnÍå‚Ý®½CIvënÉìÖ“‰`·¢bf·»¡3»½Mì´ÌnË3 ˜^ìö¶Â«Òè¶&l}Êèö6ó#kt[ÌKB··EíYü6fô¸å»MÜÛ,î ~[jÀUâ·m\@™ !îÝ4ƒ*AÜÛã»'Ã-‹Ì—qÛ?Ù0g#Å­ÛZ)è%Æ¥c\bÜ 5Æ­Æž· 8N&ÆÍ¢ãV ½ž1n˦¸w ÀpbÜ»Á&Cä¸UNá_rÜÛMt›·ž'þä¸ã¤0·ò㜈0÷îZñ‰Ú¶|<Ö¾š/˜[¦@æVY8ÙÌ­ìp²‰`ny›à„\ÁÜ*ÊÁt€¹£dõ s«„œæÖ“Ãx]0·\Hˆ¡sålb˜[7ÍÙ=aîv7Ÿì˜„4·.‰O™pnE¢ZàÜl ĹeéGBáܺuL9u+ÃYAÝ]u@ݺ?¾$„ºu0¿Ôb€ºw3˜>ÜÔ­ûÃpSd·î¡÷ø¢»•¼vEwïOâ+ÐÝj—ø ï–çÖÿ„wó)ïVÁsæiCpT?ïÖŰÈ*¼ûA¬ŸV}Mýód¼[>C+…ºÀ»u_ìñ‰wï^Bf¸ý´j×>ñnÝW‰w+»•ÐŒwˆòfòÞp!o]óÀ*!oÅpn¹ o=6NDz«:á@.Ô›Mˆ¨7«š¨·òÃR·PoYHá 7¢ÞÊnBc&ê½ÇнõfUõVv8\í…zërÉ1PoÅú¡ò"½õà`O%Ò{÷ÆBÔ›Ÿ¢Þ¼i¢Þû3}ªLAyËÙ‹«•¤¼U œ‡'Ê[«Ó„¼®¾fBÈ{7#äÍÇBÈ{oYÔÏòæ##ä­ü¸PKÈ[×$j'ä½w +òV~„n„¼U"¢€¼uM,kƒôV„#’Þ{ˆQIo6H’Þ|Ÿ&+ç. Io´U‚ÞÊS+Þ|4½íŠß‚ïF&d¼ç›Áx³†Èx£^‰x+7H”„xï•wWâÍ—ŒŒ7;q2ÞìÈÈxó&ãÍ[âÍwšˆ×•)Â{¯Là(Þ*ÉD Â[n~éo'¼÷÷¥Ÿ_(Â[É$°áV*Âïà‹ðF3á­¬ú*˜oT±ïPúÎy‡Ü€íî· ‚óFÿ(ΛّóÞ?}ÉPœ÷Þ¯ž¶½ÑA ô×èV)Л€ 7†½£Öƒ‰þ¥â­š<în€»÷G2éî±~,µ»7ÒÝû¨ oáݽKöåk›mœ;çÀ¢»Ñ!Üí:Ü'~/ÝíÇROíßÿ´»¡“H´ëÓv—‹w"´Û&eçS¶Kª;ÿ‰êú<SÝ»±yO'ªëLu'¸ 'Õmï(‘©nÉͨ%Õµ`ÀT׋½¦º™ŽT׋å IúLªëkSÝÛ¦:ÕµôÂT·:Õ>Õ-sËÞK™êf2RÝ:O ¿}¦ºÁ_T7ØŸ¨n¹rÂ[RT÷¶rBnª›é@uM6uƒ^ êÖ±ða¨;[”J¨{[žv¡Æ¹3ksMÎçÎ\ž œ;sj8—,4pîÌÕÙçvWÛ ¹‘›hîôìËäªðVâÖp< A.¶W$ÉH®ž¦å¸íúœÇšäF1ß( =Pn”‚‚Ür¢%3ʹÂc”;õÏÝ€r]WRäF1¥È­tÐØH‘b”ë[°"×å”"·Å8Å’$÷6qý$Pî,N"InÞ;%¹ïf’Ü›è$¹Y IroœtH‘[cLæ¬ÈÕB¹Õ¨1ŵ ÷Ö+ î“ ÷&Œðäz%À‚ܸ¹±`aAndAîí¦“¹7Ž1C‘ëÖcI®×9,Éuý[’ë…Kr#&anÄ$Ìu•Y˜{.³07î]ÂÜ›f„æFY$̽i–iu®‘%º7ÓXItýü$ÑuQ¬Ñõ-â¶žŸÌÀÝ(ÊC£“F7âfFÒèÞ4 –F÷&nd®‹"ˆÙY¤ëkŠâ¶t¢¸yMRÜXá´T÷öíçTè¾~‚0ו f›W³R‘ÙÖÝ|Âl‡Ú“<×1ËsݰÅl#?Ésã’d¶™LòÜ[ò\Ès#ö’çú5±<×ï‰ä¹­Wf-Kž;ojÒ纷>W'ËsÝ Xžë:¶<×w`y®;9Ës]+–çFY$Ïõ-HɤÎbJ1©s£˜Rç:&un<«sŸÕ¹H¢¹I¢1It]+’èf)ŸÝH!‰îí3p›E—@×_B t}Aês²F×Citc°'n ¥ÑÍ5º-Æ•/it5ݱF—³‡à·1µF7dÒèïìçY£[3’¾éß]Ǭѭé>”¢Òè/X£ëÝÒèzK…5º£F·vpô¦b®åðoîKÑnu®¥é¡Î•vÞê\Ë¿%ÒõÆ3‹tµ…ÒÝiæÊohtµo14º3;ùÐè @Y¤ë=¨éj[«5º!u–FכޭÑõæZkt}ÖŽ5º>|%4º'eNÖèëã—HwŒá““ú‰t[¨oû·F·J¸n®¶i‡F7bûCPØaã÷ù'LÐèþÛÛ6F{ßü—²W!ÜÒ[+Â¥®ÒwçÆ×›oFŽÛŸ_pÜ×:"¸­cáVr\«4MpoôG ‚+¬1Ü…49nPZ\Mãƒàj_×@péNK‚j² ¸+•(Áe-ƒàÞcݺÔw±ôVw퓸Ûuréª\ĸb¾¸U¾~üv¦PÉüöT‰nC¦è6D©nE?n£“ßR›ü6lÞü6n9ñ--#¾‡¾­g‚ÕËÀ·ÃºÅHèv¸œøm LñÛ@ à·uÓXÕ4¿ ªü–£8+qOú%ý&~¬ü6Å•ÉoUÂà·/ßÞ›#M~ߢ¾ VJ\Îß–%¾¥.ËøÖ“çÄ·ºfà[ÅB‰‹ÙƒéíóÝ nk9vÂ[ÌÌnÍ÷Íngî x°RðÖ­DôVK6Aoƒ‹ÞÆ%Eo=yÓÛ °¢·Zk zë1½"’Þz]ÀôÖ/¡é­+Òôv¶ÒTôv–ìÒôÖ@Þô6Ê"zë—>é-_*Ó[?ÓÛ¸‡ ·h䆷žµÞºªÞ¢©›Ý:™Ù­ï<Ø-¯(të¾Üö ‘[ [ÎÿŸè6ݺ‚m¯ J4¹uÝ‹Üú…2¹u󱻂?Q&·sJsa¯`´b{…÷’“íüLÚjMÀôv¢ˆ!0®kÄþ ‘Nþ Q™,Ä5…q½zgŒë‡jŒë·Û. þ ãj·ŽMÜå™âúe3ÅõwåMqãr¢¸n¢¸aí#Š6¢¸î”Mq5Œë:6ÆõÛkŒëfnŒë²ãzXbŒë*3Æu2Çõ0ÁWÜ7n]×MÈ×ý1®«,1® ¦0®Š)Œ«ÑŸ)nD×CS\‰*Þ×o¢)® !ŠëÑ×[×ÏÚ7à©@n@W\_3A.ë#A®b¹Þ`– ³º ¹•ErÝ–Mr=þ3Éõs3É]úÜd ¹ª— ¹*KÜH'’;ê¶à1HîÂ5ArÛ#ˆäêÖ;É]„Ís£ä¹î¡‚çꉋçzÎmž%Ð;Ð5ò7ÐuÃ4ÐÍkè‡èZ³f¢[SRxˆèZh¢K"º–ñ™èÞ§¹]Ð-¢kÏ#]+MtKµˆn+ç{ôD·Z¶Dº·Í X'jUÆH7‹¹ Áã÷"ÝÛ÷!ݫ燓ÌV9 ØpaæÄƆ W#Â÷FxÞ¸7nŽ2Ò]/ƒÖðÆ=úZÖh¸€­ù6\¸h6Þ¸i vÛ„€ûáv[ ë>»£ƒ N ìV PO`·Å°}ÇÒÜU“ìfŒ`7ÄØ”æäÙÍÉn‹Á¶ËÒÜŒ=Éîøs'»ƒ„UÒÜŒíf hw=i®emîë€7b¼-Æãð¤ÍÍ oÆHxÇXWèfŒˆ7cD¼£D7cd¼ûÌqaŒunÆz3èÍ…ºèCôfŒBÝŒôfìz #ÊR§;„:æ)o„ Ò!¯Cb¼C¨+t#DÂ!Þ‘ïFˆxwuº!ÂÝîV¨Ï”Dx#DÀ$àTä» ïFˆt7B€»C¤³ÝíFˆd7BO°¿’롎u#Dª!@݈é:$¤!Ý!Ôn„Ès#Dœ!ÒÜæ¡Îr#D”!’Ü‘äFˆ wuŽ»rÈÍAnÆr3D’;Æ:ÉÍInÆî$7$ÉÍIîë(7c@¹"ʘPnƈrÇXG¹#ÊÍynÆÈs3Fž;Æ:ÏÍynÆÈs3Fž›1òÜ1ÖynÄÄs3Fž›1ðÜ èfŒ@wŒu ›±'ÐÍŸ t3F ›1Ý1öº"И€nÆt3F ;Æ:ÐÍnÆt3F ›1Ý1ÖnÆHt3F¢›1݈‰èޱNt3F¢›1 Ý ‘éfŒLwŒu¦›±ÓÍ_Ét3F¨›1BÝ1víw´‚ºÔÍ¡nÆuÇØÅ᪎ #ÔÍt„º#ÔÍ¡nÆuÇØ9ÒÑ êfŒP×1CÝŒꎱsý¸£ÔÍ¡nÆu3F¨;ÆÎ±ÑêfŒP7c„ºÔCǰŒ êfŒP7c„º#Ôc×üqGk¨›1BÝŒêfLÇ› ±kû¸£5ÔÍ¡nÄ|¼YÄt¼Ù»>Ð êfHçœEŒP7c„ºîêû îôÒû'd@»Gå´{{†ªÛé8¥ lto:5ITwÙMYe£«SMuçË’\Rݲ<þÌp¡=™*ê¶gý©\7lfEu«iu ¤ nø» ê6 §¡nœÛ%¨[$‚RAÝ;À€–P7=j uíZ.¨[/8¼_uãŒ-AÝŒÉEw—âûí¢ëcÀì¢G’êfL.º£‹î–¦ :JT»èn²,ÐhNfÝ8tL Å¡c2ѵ·ˆ@[©9ˆÐV)Ìó4¾"yšb¿ù´!†ÐlJìSÐVëXu šð1hº¦A[ín«cÐiî|Ú"¹…ÏB‹tϳÐtx‚ÏB‹‹é0´ˆÅihÐÆ‘êftÚ"-˜C[$‡ôahÓah‹a> M7.´›—ÔYh>ê͇¡-ÚKíÃÐV»9è0´ˆé0´EÛJx³,:Íé ñƹs>md»ùÏyšK'ºkk 8mÖîañÝŒ=¯7z™ðfV:-Ò‘ñ¶]ìtZ”‚”×{ÕŒy£(â¼c¬ƒÞ¨d‘^ïc3êrŠõf9 {³,¤½Q-½uM1Š÷f~¾qö ˆo^“È7î]È7®)äûhÇ$½ñFˆôf.$½qW"½™îNzãvDz³È$½c¬“Þx$$½5Ñê*“Þ(»Ho¤éìDzóš$½që"½#ézéÍt$½YN’Þ,'Io^“¤7ë…¤7ËIÒHo”S¤7c ½q "½#éÍK’ôÆ­¿HoܵHo^ޤw»ÉµS¤wŒ½IoÌ$Ò‘Þ"tÔ%é\¤7ŽœéÍt$½™Io^“¤7>Í"½1LéÍüHz3FÒ›1’Þ(‹HsYDz3FÒù‘ôFµˆôÆ£é&ácÒ"݃ôæ¯$½Yx’ÞLGÒ[CFú:€ôfuLš“‰ôfLg¥¹˜>+-/ÓYiqИÎJó({8+ ãö<,MåÔaiû°4²}XšGõ>,M󟕦bÆYi¸ÇQi*e•¦»óQiº8*-J¢£ÒxqRZ†Îc˜5ÅAiQY1ø–uNš3¢CT†Œœ™”™’ƒtø0¨A> ŠÉ‡A<Œ"Œ4±#†ÕÞ»2bˆt2b°'„|² ƒÎ9 †!—R6 ¢áà‰¤ù®é†ùîûdõ;Ú}ÿíûUç²mÛù+Ùl,›öAl3©éhÕ{Ìc6×+›éÞÎRˆwZööŸóøµ|þèýl… ·mÙÏÛô+ùÌy?+Z8·;|[–­ZÀ>·ùà•yÔÿÜsø¿üç/¿üéÿþú§?ÿùÏúåÿzþÏú‡†ß~ÓæË³D oð¸}I!?>µ‹ÞŒ%yóæâÆ0(F¶ù3µù8îj4f'nãa•Þƒ]ðÔ·Á´àÌámŽoó²%iEX®áµlÁ#ܸÏ6±ºµºi_Ê}|õZðü¤#nÁÚýÑR®Ó/í|j˜ëØ¥q®.ºÔÖ-mçði»¯OzãlÝD«ŸVGÇ¡gvÕ$—R㵇¹ß>h¯Ëê¯*¯†LwmÞôAÍÖT”}²ƒ­›hõ³îó5ëU›,ÐùãÔjh>¬„ájß·GóuöߨšiðÛF¨¯lXp]ž»/|÷¹÷¤‰|k ×G1ßRRÃ[SÌ·ذùó­ñ–Þö~I|kzÕa¤‰ï}VÖý1E|[Œž›"¾-F×SßÒ÷ÃSTÈ·M»í®‰o„|Û”{ _Àw[iÐmà»-ÒÑ ø¶ØS'ÖÛ~¢7ëÝjÍz·™üÞ¬w›!©4êÝfj¬Íz[ Øß¬·MèýÍz·ÆíCçmâF!Þmâ&!1ÞÚ.ò†öf¼uÁÎúÍx·[ò&ãÝ&®-˜ñ–yfßÐmÆ›×$ã­/Ý›ñæ5Éx7ˆä“ñ¶Ø{¯v2Þ2ëìo™oK÷î¸ÞŒ·¶ëtV.Ä›×"âm1ñi‚^_Rœ·…Þß°ä¼ö5ç-ìDLÎ[>£@ÞsX6~Àyí0jÎk7PÓ^;ŒŠöÚSÌ7¯HækÇL3_û„šùÚÑÌ×Ξf¾+-ùŒ|íDiäÛ’qÆ,ä›E!òÍt{¸vÈ#âÛBœâ=‰o™Ë!ˆø®‡&Œ"¾u5L4E|#ˆo”BÄ·Ån}˵‰o^“Ä·–ˆ»‰ïzpônâÛÒ]Xmâ[{øæGI{Û_p¶ho–´·.Ñ…ßÆ½Y™¶ìý¼7Ò‰÷¶Æ3¾+, øÖ¦Æ÷¶߸ßußjß, ‰ïºó|7ß=Ä“ø¶Ÿ`£mä'ñ­døL ùF „|«5ô}:F¾ëΓ:|3‘oÜ‚ïºÑwÃÈ7Óù¶tŸùfYÀ|kSkß'æ[©úî%3ßH&æw æk#3߸1ß{÷9É|³,d¾•£%1ßLGæ›éÈ|óÈ|[ ÇȾ˜o¦!æÖÇ‹É|#˜o¤óÒ‹ùf:2ßLGæÛzSøº˜ùÆ]‹ùf:2ß, ™ï ³`¾–ûˆù®+g]f¾‘LÌ7Ò‰ù®€„É|k¿x( ùÆ ùF2ß, ‰ïºò$Vߨ/ßJ×Ï’5ñÍtwâ»®<ÀÄWeàët|]ú¾ëÊCøºü|+eŸ ðRø) |‡”¾ëÊãCø)iD• |‡* ð]Wž¼À7S ø) |‡”¾-»¦¾Yy¾-‡®¾Y¾CJß!%o¥¼Ï>ïpûro‹4¾™RÀ· ][BìÚ™Ú`7Þ]oì"h•Gý§Ìuí%m®û®u]Û‰èNO¶Ñm!~Et[íBõb¢k )ݨbÝò{ꋸ"º­ŸLtï<®/T éÚ"ÛHצâFºÓ®ñ°nÞ‘n r)þ½#ÝhBºî ‚éÚ<˜®‹LWÖYtý*Òm3 lù ¤kç³@º®ê@ºÃéÚâ,®¡‘®}Út[ gQÒµsc ]½yAtëù~*áuoD·åuAt§'&ÈmYí|¹ ȵ÷{€\½dÁq3öÀ¸ÓÆÙ{`\ûUƵd`\»ÐÆvŠÜã–½9o^,×v„ÁríÕ@7Ÿ­€®½Lƒêʾ/ n¾„ºC P×߆€ºYTAÝ(˜nô Bº·“Ú@ºöP¤k?Á@ºù® éfóÒµm`p]|×Íç¼ 6Éß wûiÙSû÷¿‡ìBH™x7ªùn@YòÝš*vqAð]mÐ6èõ†É½3Ö½¦½­íªœ½­ÍcŒaÐ;ì¬zç]F›½žOô¶7žþ½f2½ž{™öÖŒw'ÜÛ:.ˆÁŒ{çIRiáÞy¢ã†h¯'L¦½-F…µhou°O}-A¯§€/ÐÛ~…|É ·L]Ez[ÿ?wñªIoL…Dzcä-Òë#oLz§KR ‘Þ Ç¯$éõQ9&½ž/ ôN§T½í’Ô1ôN§Öí z§SÚÞ˜W ôÆäQ 7Ò ôö™oLòÄx£ôb¼5CŸÞݰoÌnÄx}¬oŸ×ÏæÁx³ì§‡Ðʇ¤7¦)"½q_"½‘N¸7îYÌ·¥Ãj£™oLøÅ|§ƒëžf¾U–¹O§Ì|£œd¾1Õ&ó–E曵Bæ›w'syߘoͦ €óüÄ|cz/æ°FÌ7&\b¾ƒªÚÌ7Ò‘ùf:2ߘ->™oÌ/Å|«ÁC/.æ[w Û1߸k1ß(½˜o<1ßLGæO@Ì7€’™¯kKà7ïà7ïä7šºÈoÜÉoæ&·î­Š’üF™üúy‹ü¾Œ˜o+üíúXÖÕhäë{6ò’ùfÉ|ølž'¹ó}ßh¯i˜h¯ïÕ°×ʰ×õ`Úë×E´·^3XÕ˜ö¾ Ÿœ—Ìœ7J(ÎëZçK’óšÉšóú51çu'çe×’œWLTœ7bä¼ñ2˜ój:>p^¶CÓÞH'Úë*3íÝz¿fÚË¢½Q#B¾Ñù^’$òe«/žœ‘¯»#_?#ßÀÈb¾‘.˜/ëÅä7îä×Gä7`·È¯Û‰ÉoÄ‚ü²ß1ùuQŒ~Ó ýFLèWp0Ðo”%Ð/û£ß(§Ðo”å~£B„~ƒÑŠüü3ùÕEGò‹W'ѯÐu _WJ¢_­9$ú Bhô›—ú}vÐ }ÿ úêùôU_›Ð7‚†¾‘cR_U¸©oT¸¨oV¸©oÔiâ_U›ñ¯ÞÏÄ¿QAÆ¿±j`þ›û| dñeNAàÈÍ8j5ð³aýæýÆ6úUG˜è7/+ô;5˜}•SÔ7jRÔ7Œ¨o@_mç è«]·!êÕŽÄÁ©w"1 §^`´pêÕ>›P÷jOE¨{C`,uïjÏ©{%ʶºw‘(UêÞ… $Ô½‹\¬î]8½1ô­Ó*ßf«f¾m’ŒáXH|íal‰o¤#óÍt„¾ëÌÊÐ7„Í‚¾%l†ŒZä·V(~ßXSù­“=_Â_l‡‹d¾-Æ“˜oÆÈ|_9™öV £D{ój¤½-Æ¡™hoƈ{3?âÞ1Ö7ÂÅ%{3DÜ›1âÞuÒ¤G¸7Ò ÷Ž1ì„‹k’ùÆÝ‰ùf:2ßuÒ˜VÌ7ÓÉÂ!Ò‘ùV: ÈŸq1y8Dy8 1ì„sÌ&. M\›8DL&®d›8øÁÙÊÁMÝVqMY9Ä5iå!Z9D­ÈÊ!îNVNg+‡ˆÉÊÁE±•ƒó³—CÄäå1y9DL^QNy9D:™9Dº§™C¤™CÄdæà«ÙÌaˆa¿›Û¬Í"&3‡¸¦Ì"&3?9›9 1l|ó½ÛÑ!ÊIG‡ÑÑ!BrtˆÜäè໳£CÄäè19:ø´£ƒËbK‡H'K‡ˆÉÓ!Ê"O‡ÈOžqÍþ¹<âŸËÔÁÛ¦NgS‡H'S‡!vnŸ4X›:D:™:øþlê±Ý§MD Ã׈ÑÔ!ŠrjsšB—7§ ”ÊÔÁµiS‡ùsœ×œ=~UlñøU±Õã×OAðÓøõSœ÷pxüªØù¿òãhS‡!öÙ(6L"&Sw56uˆ˜L†ØÅNV1™:¸­ÛÔ!b2uˆkÊÔ!ÒÑÔÁÍY¦’©ƒc6uˆ˜L"&S‡!öéXÖ®[?ËÚÖÁíÒ¶C cY·KÛ:Dº‡­CüxÅXVÈV¾J˜¾”¯CçOdzéëAù:ŒÁφ´éëAù:äeåëAú:dŒ¾™£|"h_‡98éC[!ßùÓ±mú:ŒÁó“Ž7}2¸}:¾M_‡ ŸŽpÓ×!ƒO_‡,Š|"h_‡ N1Ì•¯C¦”¯CeîA™;dpût¬›cð³ÑnÚ,m¶¥¥‘pÄ„„#&0l»Kƒa[Z G:áH'01á¸&Áp$#ŽÀ𡃠†3ö5 †m»5¶)§Á°> †#&01á!†Át”S`8Ò=Àpü(0ŽtÃŽ ûšÑN`Ø•l0é†#&01ሠGŒ`8n`xaü1áð­vs6ö5ÓãW׎taò«˜Àp\3L~•.L~#&—_]óíò«_Óå—W —_¹çþ—_ÅÂåW±pùU~Ñ.\~yÃCL.¿Š GY†£(ÑJ`xˆa”ìK GL`Ø×4ŽtÃCìú¬›5>xlI€áH'0é†Ábø‰`8b—7³1f0±É+&0ìÛ2ŽtÃ[íZ¦˜ÀpƇØÅN–U,0Ɇ#$0ì˜Á°kÅ`8Ò GL`Øw`0é†#&0åŽt»G²ŠÊ*v¾Æ²*ýå±lÄÎÏ:YƒáˆMË*6{,«Øâ±,Ëb0±ÕcY]sóXVéve;<–DzOv1áHpy˘™pÄ&c›=Œ˜†±ÏG’Õ#XÅ6`_—Û=xí?ŸŸu¨†À;ü X—Ç­ÓK {ó˜U‡T™ÿ"Ñä«Í±:¸xÈú¾âê᪭¯¾m«¾ß=N>’ýò÷SãSç}i€úNrypúúÝ\¿O”¾Ÿ= UÞ¦¹:-!i.®¸z8ê+n¾í‹¾?<Å?_ãÏ÷/—Çž¯ßjñûä1'~×xSua@‹D‹›ïß…eñûæA¦Ol‘Íì&‘0¿Ÿ^¾¾4´|ýlúªß5¤|ÿ.æE5sE"áVü¾z鋉´"уÈ×ïâ«y±#F¯DB«ø}è`¾ªþ1Ý6è¬ Xõ espÕ•>3ÉU¿ JÛ«²¥m¨wgŸ?ƒ½PÚÊË?xêbëñÔзn–*œ, yêÍ$™<õ¦3¼ÄSÛìT™TuùÀ>÷¤íFxè<`ÊHµ^ßm¤Z-öêÞ»òÐÝy~Sxèî<¤)º‘L>ºGÿž&?kŠŸf:ùèF:ùè4 ]¹J™Ÿf:òÓL'#]û™ÛH·U-=„Ï~Âø4cä§áo#]Aü4Ó‘ŸŽ±>Z »q;éÚ\ü4Ó‘ŸV·ˆ‹øéƒ˜-ñÉO+lŒÉO3D~1ñÓº;1‹Ÿf:òÓ¸sñÓ¸;;éº6ÅO[ŒGàŠŸŽ1¸íòZ?Íùé¼ÓsóÅOÃ_ü4KA~º,²´?õ8Ùü´ê Æ[(t:ÆtîÃër¤¦ø™À4¯D`/š€i¼¼¦™ Ô4z9QÓ¸¢¨iÆHM£!ŠšŽ±“½ªbD§#:—Bè4¾øi^“5^BAÔÌïQ3#BT'’êoIT#pª›~àT¿ÕS‡ ™êpÙÕ§@8H°:\–tu¨"V÷ æ¬C œõƒ²» |E\3¥°ëÑÁk—èµÅèc*òšé^3Føš1²×¡ °U˜N=‚Â~¼ØÓê68ÖÞ–Ádóz³Ct¶> ýl‡@´Cˆv¸,9í$¬.Kb;Il?b`»‡‘/±íìvH€›1Q\ýåAòܬñÜ|Ô‚ºÃeIv‡Ò’ì)‰w³~Äx}hO0Þá²½íÑâô„íUûþ>[Ý¿üpûö/írg»Ü^Øvš·éþÇŸÜž…iÿàñÇÿþ῾+·º½¾I«;¢Õý¸0ßŲg¦uƒ¿ð¶ÿÞ™NÅP^÷úúËB<òw/Áq{çÿã—æÞÆSýþ_ùÚû¿WúóÁ?ÿRßЮ¼ÿŠ'ðÊ¿þøµ¹?*ýÕðù¨kþåWJrLµÞpœÇ²žë·ú>µAëx§y¯BýÓOOïœÚ×Vðo?ýòíßþ_ëdÿáÛ¿ûé_øçŸ~kNËYRõý8ŽVÂßœÓú9ÕrÌíXÏ©õ¿=§=sú’¾²wÑ÷Fóì•¿´¯ä^h¡_÷y¸/ÁŒùßý¢Üncþí·/ú<îÇ÷ÿøõ‹J°|ÿò¹³É¯XÀøšÖwÏýÕúœÿµ¾{ ­ÏùIë»÷2¯Ö§ü¿ªõÝK°|?Zßßþžìç¶Ö €çrìís²×œûZ¶*Üð9iCÇ×äwµœË´ßÚGë\¶m;%—íå²Hߦ£Õí1¹\¯\¦?v3˱•üÞþs¿–ͼ›mÛk¡bÙÏÛô+ÙÌy7+‹š«Wµœß–e«§¿Ïmúveõ?÷þã/ÿùË/ú¿¿þéÏþóŸ~Ùÿú¿ÿ³þ¡‘Å·ß´[²úÚÏ_gဒô¼Ààðõ©ý’WÇ:×ô㺴„Ë1Ý7Ej¶{ÍTöpçc‹-?>×ñ[qÔä=ÏŽ*[lýq?ÞêµG¨´àf;=h Zpÿq¾°©«Ù-xPæ#_¹{ðš°©X^¢W?Dá®æé¹PúgJŸ©]ïÇeyËÚtu NÒú¨^+ØÞ¿'i£ð7>½gÊ~η~ŠF .í®_@DÚž[©û¹D5¦Ûö㣠LO¡Q׊´àNéÏÁ3ŒïÁ6œ~òŸŸÝ‚%@G_º[6Ë‹‡Taßût[ìzh€;µß[M½Úq ¾×åžA¬£ìjríÿ~loÿ³!·à‰sÛ¦i¦Hº§lØÛ«%û¬ì\)j °;Yßƒ×›íµ”8c¶7Š´²Ó‚­6Þl¯,‡«èT§Îu9ÐöㆽØS©ñf{uÙ 5^ÇB¾AYAAO W«˜6ƒ|S½ºy7Õù9] ´éT¶©N~S½ÄÎÝ\¨Z¹ô<ßTEY©Ê4­cxó<Ÿ»^³;J‚$DkÁÖŠÞ¿.Ç|[8ø•ô¾W~ó²+¿™rãX²ýÜ9vÛžÛDlf×û¼Á“c_÷ó<‰%ùˆæûÌh·|òÏ…¿xps›-ìuœ9ö’´àÅ^—-i®3z´Þǵ©×ÆîÖ™mùN8#±Åv|ã±ÖMìmŸyñN*¼¼MRØ/ou¯x™ˆ—+!gœdÌ-¦¥11æ2†eO)Ð< š žb<`Ð<äIÐ\AÎÿš[PËqÍCžÍÃe š½ÔkÎt¼ŠfÐÑPÍ ‡ë‘AÛ??ôpY¢è!H¯•YôP ²èá²DÒÃe‰¤‡ ‘ttBÒ>(ÀH:/*$½‘‘t]Z'#é!K"é!H$ýAö‚$’n…HúuôAÐèá&H£‘†z=%Ÿzqèþ¯E ‡M½žÜïúƒ œó²$ÐCJh”“ì9kBì¹ '˜…ÙóÙó${öéfÏyƒdÏY±çºèLLö<¤${ŠCöl»`ÏCždÏÃeÉž‡Ë’=—%{Κ{ÎËŠ=gÊmhÔß·£õ÷Ú„nÓ¨·5È(v61„Ž˜!´ëN¹ó,h½”?²ôÎ7ꊮ—GTœ(ºlŸ0U1Š®™¯ˆ²Pô›~™B{l©sL+,uŽI“1tL»…¡£#6†¶õu`èÖú%‡Ž×Ó ºÎk'èˆ^7 ‡¢C~n]æQ¤ƒ¢Ñ>JüM£C•nmgÊÀÑ> >pô:½û¿Ÿ“FWÿ'„K]øJðwõŒº*atŠÝ£‡«FN#ÃŒ¾‡‘TKê|ÙHêÜ:1 }$u¾Ì[É¢×›ì?Bïì·1ôΧٚõÎîBï|Zõ¼ù±êy÷„Õªgw¡znµüYKð¼{"kÁóî‰ÓSð¼{®eÁóa„cÁsÙSð|¡µYï¼»3²Þ9T÷)z^ØÚ,zƒÐ~ă°èyÓgr=SÝiÍsˆ_Có¬îOÂç`[øœªàÉ ISý¬â¤úÙA©ŸC2šêgCýlQu¨Ÿ õ³¤`V?[#3¨Ÿ¥’{©ŸCÏfõó¤ï¨Ÿ'¬~–Œ%ÔÏ!¾³úyên~?'”®à…íV?[§êg Bý<ùÃbõóDèT?[¯bõsĤ~¾uó»ŸJÛÎ5åÏ·n øsBé!(ùóM+½V?GLâç[7¯ú9‘´aIÛ“4´ \IW½ÅÏ7¯g’ÎBÒ/£Õ€Ñíg­`‹FAÒh›‘¶ÛjÐh[Ó¶÷kÐh{s¶jàhÛ¸¶¯­yt ‰GÛ]5x´ìUGÛp6p´­6GÛ 5p´mTGÛ6ptØŸGg­ GÛ‚5pt>áèp{áhÛŽ®¢P =‰£ÃPÊ8ÚVŸ£m&8z(-qtJGÛß/pôP Ó>‹’GÏ»Ê ¤ÃrN@:ŒÜL¤£¬FÒöj3‘c,iûÊ™HÛ°0ˆôü¤ÔÑíb Æ7ÑYD‚è,9tu_J“CWŽ^?8t¸š™CG9„¡ËM[ dbhŽðN MÛz(1ô¼Io` ]- z cè¡@ÄÐÑî¡+Oj…¡£õ C‡š0tGºø0õ™ÂÐuU ‘‰¡‡,W[-ª~„¡‡<‰¡³ „¡[ðlšz(-aô¼ñ´â€Ñ”È›H:ó|"éÌM`z^µaÛ`zÞ4+6˜ÎÛ˜Î&)0].‰ÒMLAâé¡@‡ýÕz„§kZÜ`<=¯’¬OW ›x:Ÿñô\s¨þI5ž®»„6^t:k@t:[¤èô¼J òÿ‰{›^Kr,»rž¿Â‡Ò R÷Ú·AÐD€$@h èœ BRªP…ÔÿGóØ»×¸ÖæóÈÊתR…û=N3šv¸¸¹i:w@tz¸HÒé¡B¤ÓÓ"éôpNÒélz¢ÓY¡¢Ó>ÕÒé¾eù³ýûß„¦×¾©Â€¦±šŽÍ›Œ¦ŸJG4-ç ¡é›™JlYTz½EÕÉ£XçŠGÇâAóèyåNÞ†Ò±¢TPZ{´“Îñ¿í7Œd쾡@Ò}#Ývߨ½e<Û~¶Þ?%’ÎQœí7žƒpZi·Æ3BÒÞß#ôPòIç¸KH:‡sBÒy6!é’ˆIç0HL:³y1é JçxEPºƒ…£3Žx8áfâ,CâèÌâÅ£óúÄ£#¥3;;ã–HX<:/D<:‡OâÑ·W| èø®E§#Žqƒyt«>x´skãèj GGg×-é¡yt^¼xôôØ«óm«©‰¡ct` #cèH¨Ì¡ÃÕXúUGâç?· ´ÂÏȲy§DŸ³†¢Ï‘,š>G‚júç >ç\c[¢MøœÏQð92^Ãç¡B„ÏCÉS9¶ü´Ÿ#…4|ެÞð9¯Rðy(Iø<”$}ŽŠñsÕ–Ó ÂÏ‘ð~v’ø9oÂá$Û—r:Éf»¼ès´8Óç8˜ôbºt\ºt<}3訣´‡ ³²BÐË=ƒjú÷Òôy ‚>çE>Çã}Žr¦ÏYPøyñ2ãç ?Ç­1~î·Æä9Z†ÉsžNä9î›És^¢ÈsÞ‘çhpoò4y^,0yŽ‹0yΠÈsÜ“gʃ<çaEž³¤ÈsN†ÎyDAçx ót„Î1ÌtöƒèœAAç Š:0uöƒ ìØÁØ9KŠ?ûæΠtÞ èEBŠ Ðq^:Š˜@ßÏ>è³~?ÙýúÏ‹–øxÆÅœ³zbÎq‹ÍœãF™9gPÌunŽÓ 7ÇÅ 7.3nΠps¯‰IsÜ“æx&Í‹Diκˆ4çÕ‹4ç9Eš³¶"ÍyãDš£¶oÒe’4;(ÒÜo¨!sžH9n‹!s\œ!sV9ƒ‚Ìy΀Ì.Ù×/È<‹¾2GÁ€Ì:£!seÎ3ÎN€Õ¦L™³dPf—Üœ»¶¢ÌYÛ ÌzšI™ßÇ€9*jÀ§{fTþǬ7ž?ÿ&—ç6B»»ºQøüè¤7À2tvÁ•wY„WÞ‚èÊáy‘þ($Ï©¤–Åó,ÏØö§ßfñ§¸9”Ï.)“çÓö 6yΠLžOQL‘æ¡>>«œuÏ­2zQ´tÏ“ì9*ÙsXާìÙAÉžÏ_ÀÌY©žÏ ÛR=ç)%}ö³ò9«ó¡|ŽÃYù|Å•òÙç²ð9b>Ÿ&þ>ç%|Žûbás^ˆ„Ïqí>Çã·ð9¸•ÏYÄùBÍP„=ƒò9Î(öœ¯ØópX)Ÿó°dÏuXÎ)ˆ=‡%{‚R>ç9 Ÿ³Ó|Kø<Ôö>çÏ‚ÏCô9«"úœ·ÜÒçhX–>û-°òÙï¿…Ïñ†Xøœg”ð9Þ Ÿã ±ð9KásÄ$|ŽºZø<Oöȼ;Ö=gA鞣°ð9ƒR>Çí±ò9nmŸ3(Ûç Êö9ož”ϼlŸãh¶}Ž+°ísÜ5Û>çaeûœ%eûœ%eûœAÙ>gP¶Ïñ¤lûœ7F¶Ï§ä‘áþœwˆîÏ“ûsÔÕ&ÐQW›@çQebè<§L 3¸z#V&ÐYR&Ðy)2ÎK‘ tV&ÐqØ· tþ,èXàfè,)訧½ £yØ :KÊ :KÊ :KnN’]ÛÝI²ƒ‡“då:•%K˜*/è3ÄO§É¼°„>Cä<;Qþ¾ú“ eWhu¢ì’›e×vÿ~¦ÎÐn^á í‹á z9CÇ5Ø:‹LN•]rr®ìàìdYhƒè<çêdهݜ,gPɲƒ»“eŸóp²ìàédY—"£èŒÑ(:j£è Ê(:ƒ2ŠÎS.N–Õtl‡•QteÁÝɲn»¢³BrŠ>C¼-§è8ìÛ):ªb§è,#§èèwì‡,;¸8Yvpu¶¬Ë·StÖvwºìàát9ƒJ—<.+(§è¨¬œ¢369]ΠÒeŸqv¾ìàâ|Ù‡]/;¸9_V'i§è,¹;_vðp¾ìڞΗUÒNÑQršÜâêÇâ꣯ qõª¥x±+¡MacWB»´Zö„Öajh]fžÐJZ‡±¦¡un“(›ŽGxb˦ã¡éë_íÕ‘F޶ŒÎ`xuÈTÑ–ÑaÇhËè ζÇ;¿+Œ‚«íñÜlw„jZöxò‡3³ 83ë žo{<ýlf=e'‡3ë N¶Ç£©œ‘µM¬‡˜¼ñ[m§Øfg<™.ب#ƒ‡­ñdSA^=ÄNyãÙÞâas<ŸvÇsp w<g»ãeÐöx .¶Çspµ=ž|'Ŭ‡àþ}<3ë!x|ßï­óWAë!ø´?ž¼AD­‡àþx Îá§àb<WûãeÐþx nöÇsp·?žƒ‡ýñ$¶Ž˜°õƒ?^øg[gIaë,Il=…­ÇàÁ>ÙÁÕþx[Aaë [Á“}²ƒ/l–¥ÆÖ¶Î °õ<Ù';Hl=É­‡ Áõ$¸‚ןvÌ\A€ëŒ\1‚ëO‚';fI®‡ Éõ iI®? žì˜ iÃFÚÁ°‘¦]è`#­àËFZGKiÃFÚG iÃGÚÁð‘Π|¤ iÃGÚÁð‘v0Œ¤3('ií$­X8I;NÒ†“tå$í`8I;NÒ†“´ƒé$­`8IgP©²ƒ‡SeÓIšÁî$íŸÓI:‚J• 'iÃIÚÁp’v0œ¤3¨lÙÁp’v0œ¤ 'iÃJ:ƒÌ—åC^ÒŽ…™´ƒá&í`¸I;nÒT¾,'Ý´“v0ì¤}ذ“v0ü¤}Øð“vÉð“¶/òÛOÚÆÏá'í"á'AûI+~Ò†Ÿ´+ô[ü¤3¨|ÙÁ3?i›)?”/§gô¯ô“Îàä|ÙÁÙù²ƒ‹óeå'=•/;øëý¤3x8_vðt¾œAåË ¾½¥óç§óe'çËÎΗ3¨|ÙÁÅù²ƒ«óe7çËîΗ<œ/gPù²ƒ§óeå3±§òeÇ&çËT¾ìàì|ÙÁÅù²ƒ«óe7çËT¾ìàî|ÙÁÃù²ƒgäË Úg:ƒÃëÇÈõïô™æ&‚#¿>ä#üz“QDðë…~q¯g‰­|Í•µ¡¹6  Íuú^Ks}ÄvˆÒ\§-‰4×[ìjHÍuªÇ©¹^í‰iÍõ{Js½„U†4×–‡æz¶[¤ùõlcGóë©'5? ü: žÍ¯'»i˜_[”üúéíý̯íÓÜ%×žÔ ¹–4<%׆äZ6)¹vðWK®Cà&€ÊÈT\«`®m ‚kITl5íYb+®Û Xr}ýd¥õá¥ó–ZÞ2.-¦ ‹i-r·ÖúÐhK­/È·Ô:«¥µ>¼ŽÝZëCÆ –ZgAi­w¯Ø}i­w¯M—i-¶Öz B@âV[gPjëî m¡õîù5 ­QHëÝëf­±Ûqk¬3(õîé<Âê<¥Ü¥íäëð·»tå.Ai¬7Ï×Ù]z B6¶äv—wrk¬3(wé<¬Ü¥7MÌÙ\zÓ$Úòò–v [Kožv±µt˜¼ÛZÚç²¾z33·¾:O)u¸À[a½ý¨Î)…õ„d$+…õf""…uĤ°/x+¬Ç $#›rÎXgII¬Ã(Þë,)‰u–”Ä:KJb½jÓXça%±~{¹…¸úþù’U¯ÚC'dÕ”¬:*fYõªýcBV%%«îÕ“ :«"Aužm³¥ƒT¯Úx'Õ±‰ÕyX ªWmÊcAuÔ‚ê JPuµ :O)Au%¨Ž ,¨Î Õ” z BPmWÀT· —^ZP%ß‚ê(cAõ„ :ƒTgP‚êö‚oÈ®,¨^{Cú)±ôpTgP‚ê<§Õ” :ƒTç9)¨vA ª3&AµoAª³¤ÕYR‚jízêUÚÐSçQ¥§Î’ÒSgPzê<¬ôÔYRzê¾ôÔù«ôÔíj±ò8ôÔ¾3–SGLjêëX"ÄÔQnõŽ+ŠmÞpÅ•”: JI1 ©ó Rû*¤£ŽÐS»­è–QgP2ê JFAɨ3(õD>ì½aBFAɨó°’QgIɨ³¤dÔ­$Ö³ß2êüY2ê8›eÔYR2ê,)u–”Œ:ƒ’Qט¹KCF甌:ƒ’Qç9%£Î dÔ”Œ:*+u–“Œ: JFÝÞyØŸ†Œ:KJF%%£Î dÔc9qžS2ê,)u%£ŽÃZFÁgöî?f+ý×?<¾ý·?´7ª=…váí§e›¯?þôñÇõ£ªõ>þøþð?‚½ÛÍ=¿I¯=}O¯ýie~ˆµÿÆ“ÖþÌËþ»Ÿ´5ôJ?.öþÛLÉüß½-WïUxýåKk°´o×~WáþÛ×Þ…{ÿj￵¬êû¯yï*|üåkkðº÷ï÷àõ·ÏêðqÔ¿þB]Že-v0­­ ܿի% ­CN[Uë?ÿéejÿÏö¿K«ú·?ýüíü»ÖyÿûoÿóÛŸþûþËŸ~í™ökò£}Ú0æ×Ÿiù35M¶´{Ký™¶<Ó—ô½Ëþ»7›W/ýµ}'¯tFÊôeŸ‹kã¾ñüׯ_TƒÇc<ûí‹>—Åû†óüúE5˜?9ÿ×ô”lòOP©¯ùbÖÙß­Ïçÿ¢ÖwÕà£õùü_Òú®^æÝútþ¯j}W æOÎÿÛ¾‘ŸkܰÍõ‰ÜZÖ±ŸóZ•>'oëÇ×ä·µt–š©8Îc^×õø…³¬¿ë,íËX»,îíÞîÓx–ó}–çﻘy_kzlkÿÓrŒ_8Í:uÝʧnÞŽÇóN3åÕü­S\‚ãÜëóZO›ÚpùÌsÔ®3ü¯¿þïŸþó¿þóŸÿò—¿üùçíŸú—ß•Y|ûu+h[_ûÇã±¾uDkç÷Ã×§wÉÖçGK˜žkßu»ô ïI³ù± „‘åÌ¥YyûÊç>.„kòÌÙÈùù󭔘¦›%YyÎ’tÌ¥YyÏjoÅJYŸR®Üè±U{ëVì^º=ž2œK¸òÆ'ëyhž~.áʶ|`õ\¤\šŸ˜©nL»ò¶:ÿñÑþñ…$Ö©†×Ï7â]C'ÎÏš©Z?XWû?Z;;?·>О¾&tægÍL­Üi=ž}¹ùÖ|Mʾmë…õꕘâÝŠ×êBuï´´ ré¹DFïÛ±–0B•=o_GåîË;Z[ùh¶í¨ÔþÍ%x{cÄ:*ÕtsiäZ“ùh¶Ë)Û›90d3[J‰ùêz« L+xÏCWIz?´ÆÙ:žW×»RGr×›Û¶ØÇO[ïd×e—^ln¿'ÖeuKZ°Kñºl²êŸȬÖâ³|xAä¼õ\æÁoL^ç¤ e.ƒ÷+·¶û°Âš¡‚÷tøZj v«íy¶à«õ–ñ/Œm碊ªççµDï>·ì °'RKú®6CËà ¶Ä®7anv>Wvñžñª’”„ÎkM£¾{Þõ©»çÓ¨T?Ó^é·®Z"÷I®Ø=A_Aê:çÖ‹ÜOpx3ªOzíj4}¬ ¶”fUH½Ú†©’*D_ß ÞõÕÂÙÓ·Î󞀫FLµy[ouS5X "ç ¦ R¦8·~z¼»à‚û|…«£ONUIî|UÁãùàÙp–ÚÁy/Yê»/.!2û”ö±º[ÚðîíS»Ëᄌ¤dìÆZ𞬒”8Î-‰½§ ê ¢jÞ—>É\ï§‹ÚSì&ÜõP¤1ï[—0Ôa©YªàñN:®Ãò;»ï]S/æÉÞl?ú‹z½{Ð7ÌûyO&UìCˆ3·lãq¼ûâê0½RÁ{~rè"Z³w_\é2§|-1 ›þQBâ»+ž¸ ä|@G|Žm žÏ»'žüu;z.9ôFGí˜wwÉOw8ÇÞ§³*¨´ë¨þþî’[Ç”ô€pp]\Á–½³àµ¾u¯[}>úÜûpºóÙýÓ×knŸ°–ºÞz±=ÿ;ùåïé÷Ò½»ßVmî‰Y,öžKS¡–žÜ9oÉ•!1¬àý®Bô€ŸÏšû÷¼C“ÿuØcæŸ÷bŠŠÍP”µ>©«IÆà³[ Õ)ù ©à-Sª’ „yK¡Õ&ˆ¸ò¥ËnÕÛx°¥ëà¯>¤V÷>W\ï7$¤{Ûà^¯>FWËÛÖÛCÍÒÆeËöF°<»Ô[ÀåK—=î´w™5ZÁ»³Œ—r©™‹;ýÞaiã²;W® ¤Ë³Ž³öüaƒµÓòœûÔõU°ë7*vËÈ{7ÕjÞÅÙ´áq¿ûcpë›z븼gi£²{wUc ÞÊ­ 2ßn×Õ3¸ñ°}UL&O-nÃлÃu·¹´aÙ=‡ž½@Ïžüæa?¦šJò G«¥†ö=ù}hw‡e‚¾ªÂÕ;Kí°¸¡Ë}`UÁóN~³àÖ?„uJŽ–Ò­ìèr]òè÷x%йÖËÈ•=¼?„ä–½íîb‘ìw—ùÙúXrê¤1X;£óuª¶«¶UUð˜Ð ë•zeF7ìöœì΂‡ÃîývVpǃåR¯ /öa¡^+TNW½/>áµ”xå΂ó”õxï,¸F’èªAm芹«‚ÕWî/¼TÏu'ÁC¡¥/Î[1ú¼bk—_Ÿ0dqKe’ïø“Ø=\¨êp¥râ;æêŽ+xtV9Œ]J¶rçÀ¼,ÙÞýq­½ð«àyçÀ5tÿËŠ•eõ;ÚRãï;ý­Bü6´±Ô­Q®ºÐ¡¯}nú¨§n‡G¼5=U’C š»Þ{ú_Ž6^[{úë$viãµ›¹ÕQ=káê;ý]±œä¼÷&XKùQZ3½ÑY1£_Ö­z½ûä­–›¡ý{väð§¬åy'Á-å'-¼¦8Z2Fóc ^ß»Öÿ­u®¦Ìí"ïáÓ8Ç;Ƀϔ¹VT 5e®¥LéE™Û}ä­3e^OÝ:cæzt°B5f®A:‡sÂÌ\òSbæjKb¾äÌkiÒˆ’Á™s­¯8s|vƒâÌ1'g¾ü1Ÿ„» Íµ„X´‹´Y»Dý´¹‚¤Íi»)Ú\K“Ù6WL°Œ¸9ý:…›Ë¯“»„7Wû@¼qsz| 7W.¨ÂÍä2OQçZ^-¬ê\1Q RçË9Ë6EsѶ¨s±lSйb ¤ÏW[‹>_¶¢Ä‚¤Ïé+*ú\AÒgl0&ú|ýNŒFú|Ù†"}NOQÑçZ³®Iâç+HèGüœKá…Ÿ+È%ÄÏ×2y¾­ÄÏCø¹‚\m'ü\A®¶{‹ÃôKøù³àñ=áçšÔ ð󇾂ìqÈ¡· -ãå ‡¾‚¤É‹\*3]AM~‘Cœäд"‡ƒàÐéw ]~¢häÐÜ„vWÚµìFbäÐ4ûÞè´ûcH âÐc°sèŠ=È~É¡‡ 9ô|ÒåÌñ§Î¡ëwÍTnòÉBàÐ2#‡¾‚0 ‡c¯6'äÐc}º6äaÁ¡ÇÊžtê‹ 9ôpXrè¡¶äÐWSžäÐcЦ‚šû#‡ƒàÐií!}9›O=wº”dz žpØÙîñO£+&>ü qÓ,z ^’ìáW°è´5Œ®à““&„Ñcɣǂ€ÑcÁF?›x‹`tY©(W>业A ʇÒH•DºË¡–ˆôgÁKAy*¶ ¬=¨ÇàJç¡UCQêÏ‚0¶Ê (õv™ •<嘖ÁN©3&J}1!JýYv2ü Ôãï3ÝC3T]A0¢ê+øþõ£%6Ú¶Ý%@§ñëAÀûWðèúuA‚.= ¤Çà“æH«"Òcp¦³×¼‘ô\h—A€é+ø>!˜ƒÓi¬$0=¦Çà÷íš¦Ç Àtz9 LÁÕ( ÖHÿ`z,#/ñ L‡]á¹™1€é r.P`z, 0=–”Gð<{ZA‚é10='z5.š˜® 0ú,Ð…R qéŠmÝHXzˆJ±ÎÅ“þ$w¿b}ÐL$=Ĉ¤¯ ÔBÒcɉ~¤cðØ×á–IçîÙBÒà “Kv&ýY Fƒ“êzÐÀ2b Ò#î ôp»I¥óJ§œ ôPLºbœÄ“ƒ`Òã)7zjfLz¨ÏA¯Ö!vöD8o÷̈‰ôäz!"ÕÙæl1?¶èõ7;0†þùìªLœ>‘>'Ä"”‹@Céåiüi(OP:Y•íúvë}¥s$*(‰¶ t©°út¸™t&ÄbÒÕɉI‡e¡™tMBc6ÄL:ö(3“¶kŸ‘tì!l$»lI‡3”t}¼ÓkÙÅj„¤oï!Ãèö±Ô“zÓèO~‡áFI£ëó‹›"=?-v%Œ.Â_Áèòª#iŒnÙ€Ú…`ôüàT¨atûËIYš`ôtj.Ô0z:-ØŒ®Å¤m‚ÑUŸ®†1‹ž¿ªÒÓa‘¥€ô´YØ( Ý‚44®Œ‘óÒ•2JHM =-ä ¤Û3xwcdÑeá(NOÝΦwM,:iE·£J)ùBÑÕô(“ŠÎKŠ®’ìl…¢ËïóüBÑÓì.S(zZµ›Qô4™ EOs°v¢èvXš]Eç Š.[¡èòðü.Šn}ˆ™1Qt;§&@„¢«ƒao+Ý‚fãDÑCI¢èƒu}…Ituxß%Ñ­“Ô×H$ºzPÂD’è¥î$ºõËB%"Ñ­ä„©y“èçéÎìM¢³*"ÑUˆI$º•†‰®1u˜$ÑykD¢[AŽØM¢‡£’D?OOg‹D·sŠ–ŠD?-݈¾Ì”¨\&ˆ~ž$ˆ.§%ö€ÑOÇŸD·oúƆ.Ý+µ­@tYFñ͈~nþ¸D?7+P¢[…ø±‡nW¢o/9táN DÇA¢Ë‹hÑÃQ/]Àïª@tyS—!])g@¢[b&ñAtÅøˆ.fösÑ4iþ)AtÞrèá”äÐÏ•^éÆÐåBF)ƒ0tœQú¹:ë…~®”ŸBç5 B£ìBz8%!tÙ°ñ+=\%!t–C Aè:,ÒK2èËŠV1èZ«IÁ¨t\ˆtñÏ>Is#èálDÐqñFÐÅL§›3wú\Cн¡³dÐCMˆ¢ãfšHÇVƒé:,µ^Ó~ æÒ­ t`âÒÏ×oÄÑïßH¡ó¢D¡‡3B×°Ç{Îê¼Øsû™ƒ³çç$ Ùs¶H±ç˃ÓdÏÙþÅžŸrf öœOOì¹7±p²çO‚=WH[ìù9i–ÙsVHì9ïžØsVHìy¨áóõvkŠ>Aâç¡¶äÏ„jËz¨ t¶W茉@·wŸ6ö&Ð-HÏ|è¬ì›@g=… Û'i°‰ Ÿ“äeFÐCE óA A—‡ék•†ès]¥ÆÏCUÈŸŸO­'4€þ$Øtá2d«ÐY!èW×'ðœ•yÎû%ò<IžësÝ©Éóp$ÏC…ˆž? vö\ j>±çl3bÏ-H¥ŸÙs¾‰bÏUƒƒ >·ß@$~>÷ëŸíßÿòÌsFçI{×?‡ñ¾7‚¿ ?·ŽV ™í¼q(-}n_TÑçúJQ)úœXCô9ߢÏÓÓUø¹X« -ñs"…Ÿ[Îð^M(ò\d÷ù=òü<8waðœc9‘ç²&ŸyÎÑ—Ès rM‰Ès;Šž£Ès+h‡ ’çªåôâÏÏÕëÊßü9ÇâϤÖQüù¹háƒt JÒM]wŸ˜Hz¨'tdÊЗå4šªt:Ȉ@gIè^‰@W+“; tdÖFБYA{b÷@:/Sz(I=\& täæ&БE C;˜~KN/ ýœƒQ“CGºüæÐYMqèȞ̡‡ªC?_G~*@ü£ã硎ÄÏï3‘:U#uÎzˆ:ǸÁÔù“àÉl[³•¢Î‘ùš:Wïù]ꜭAÔ9›®¨s@cç|²ÂÎ184vŽ’°ópk…ã”ÂÎùÄŒ£>oìœÆØ9°óPaç ;?'Ïq;ç)…ã¾;O6Åvî°sÍã°æÎq÷ÌÇàÁÜ[MÝÜ9–¹sVHÜù†…›óTÂÍñ0Œ›= Þ족ys<~óæ8¥óÔß“ŸàÇ 8GE œãâ œãú œã> 8¿Î$ΜggÎ3‰3A€f³‹ÍQÒ¤9ƒ"ÍŸæ¸æx0FÌ“V„™0g5D˜£S7aŽK7aÎÊ0›-aö¨Îˆ9ðгoA æ EFÌ1¼s€)3æ!ɘEH š5ÿ`μxíôoÍ Ð,|-Îl¢™˜Y(Ôœ9 cpfCÏÎqX‘ç€â&ÏyöU&xv} žÈl­Êf>:hÖZ±@+Úø6´ƒ7‚V5 ól¿AA"h#öDÐ AgP:++íL!tTÖ:& Œ ³¤t@#è¨t–‚Žiý‰ðy8•Ès^žÈs– òìJš<뾘<ÇQMžcîÂä9*{“ç<¢Èsƒ<ût"Ï#]&yvA‘g'JŸž~}8n,Ø1lŽÓ6Ç=4lÎÊ:"sÎÛ`ØœõlŽÎÓ°9f8 ›cÂØ°Ùß,³æøDˆ5G̬Ù5jö§Å¤9.c›ó;þc¾OÈœ›Á36!1óÓSÿÆÌ³×š3ãJn.5ŸlžÏÕúqÚ<×€$V>Ï5° ‘Ïó9[•&Ÿç6^‘mêÛèù|Z:.£çóiî-£çÚŠÏQvÏm fq1ìžkFÉìžÏ‡5U²{>Nkcd÷œvݲ{>ŽNô~ Ô¼^ƒQJ„éûÜ‚ò}>FózlËîùØüÖÈóùØÜåùÜFØÞÊó¹ÕLåù\Våòë ésú˜Ëô¹†üòë és ÊR˜¦ÏÇb‡!™>‹Teú<iú\|B¦ÏÇv4}>æðL¦éó¤ésº¸ÓôyˆÑý¹‚lürþ$ØÝŸkç_hãdþ\䆊̟£ ¼ŸÉ^üò~΂´~>j#x|9dý|L¿`ý<œ’ÖÏe€Ïi(Y?ìÖÏCÖÏCÖÏiº/ëç!ëçºLɧiý|žv1õó¤õópXX?õ¤õóPÖÏÇS‹]äü|þË÷yÒ÷¹*ÄdF¾Ï­¤åÜð}Ž[ óç<£ÌŸóŒ2®)š}™?·b÷ š?ç&rNF÷ç¡$ÜŸ‡³¹?Aº?­ø¶û3ïÄOÁŸ¯Ã ƒÓý9Ÿ…, ëœÂÉ´€Î–# è¡$} ‡’4ƒ‚t„nA.ú·#tµ•€ÍݺÊû×îJ~ÐÃñèƒ~Э=kÁ«ü £ ý ÷S«ae±—tÜi[Bï§ä+¶„ÞO-¢–#tE‡%ô£%ô¤%tQ–ÐÃ]£%ô'Án Ý‚ZE-Oèê\(t“'t±@ΦÐûé%Ö2…JÒ:¡ó‘È: Ò:ožœ¡‡[@gè!HgèáþÀ:ﺜ¡óÈú“`w†‚ÎÐÃÏt†®k 4RÎÐû©%@v†JÂz(Hgè¼mr†nAŽTí =Ü7:Cçs”3tÞ9CA:Cç¥È:‡ ¡ó:å =é ½ÚåÄÎÐ-hg:Cg“”3tvõÏW]¾g ÝÊØ÷™¦Ð-H“U›BW=)•+t^ÄÛzø®Ð¨¿ ¡‡:Òº¹?‘¡‡úÓºò.î Gè bälGèáÊéÏVŽÐŸ»#tÞÙBç=-ô¤-t>}ÙBç¥È:o‚l¡‡ÚÒz¸NÚB%i ýI°ÛBç#£-tÆd Ý‚r[—-t r¥_ØBÇ Ú>N…u~?Ч?ˆ§·~£ÿÆd}Ušph×™pœ†A6á8¼vYx:­;í ý0ײ Gø—Ú„#ŒFmÂÞ6á¨FA½]8Â÷Õ.aðjŽ Ê…ã × j¡ÓVbèª$Ͳሧ"1ôrÊS@Zè´Û¥ºv^#Û“:M|m ÆÁ6†ÎàÛzüÎÐáSlgè ʺ;Ëúa×›B‡Ï²M¡Zõ¦Ð”+tå =?Ûš%ÓÚL8\¡ Wh®L¬¡µßWZC;ÖÐì:èÁýš:è!HtšjK=”„º8(ÖH1© 3FtÆ>4Ðù+%Ðá.tÆ(€Î„Ð’'…ÐCBèO‚]=©ˆ‚TD¯OË㤈ΒRD%©ˆ‚TDA*¢ÓÁ]Šè¡$ÑCI*¢‡ ÑCŠèõúm*¢‡’PD©ˆÎ ÑCŠè!øRDgU¤ˆ Q=©ˆnAoEô£"zR=©ˆþ$ØÑCŠè J=©ˆ‚TDìŠè!HEtù˜I‰LEôP’Šè!HiôpXJ£‡ ¥ÑCÒè!itÆ$þ$Ø¥ÑCÒè!xI£‡_)‚”FìÒè!iô£FzR#=©‘þ$Ø5Òér't¥‘‚ÔHAŠ¥? v±ô¤XzR-=©–‚TKìjé!Hµô¤ZzB-1«¥‡ ÕÒŸ»Z:¶-¹ÕÒÃïTKG¤Zz(IµôPré¡ åÒCAÊ¥‡’”KAê¥? vÁt%˜‚LA ¦‡ ÓŸ»`zR0=©˜‚TLA*¦? žL–¤bzB11)¦‡ ÓŸOæË~(¦‡Ÿ©˜‚TLA*¦? žHš%¤”bzR1=©˜‚TL<™4+(Åt^‰ÓCI*¦‡ ÓCŠéO‚“f©˜‚”NAJ§‡ ¥ÓŸ&ÍB:1I§‡ ¥ÓCÒéO‚ÇþY!J§‡ ¥ÓCÒé!éô'±“Ió÷¥ÓCÒé!H u%¦þ$x2iþ¾˜zRL=)¦‚TUìªê!HUõ¤ªzRU=©ªþ$x2iv²êŒIV=©«‚VÁáT?F®¯ôÑ;–Ïü£¹cfðkSáëã66\vËàŒ¯½wCxHÏ6£0¾žlth|=Å>‚Â×O‡˜^?mÕ`zýˆ=ýD¯Ó³™ôºÌž©.½nÁïʫ˴ù»òê jü)z] `È&Ò‡©ŸM¤wƒZ™Hïª6‘ÞåÎ&Ò”•ôàûm%½{ÒÄVÒ›ç!l%½™³ÛKzó–wò’ΣÊK:ë)/éÍSö’1ìá2ɰç-”ÂdØ­ä½!ñu+¤ùûHo± ñuí€+ïjâëÄ~ì¦×¤é$z=•ôºýE›¦Š^×åk BÒëÊ“¨]½N3tÒëëüÐW _糿n%-z&ÀÎ+y쬦vVS »•´Þ™»Jj×CPì*ø=‰õXMRìᔤØÝ ^üz(C~÷Züz^«èίó"Vòë¡$ùuÞqñëyýé|¾â×yâ×Cmɯ‡Ú’_‡%¿J‚_ëü:ƒâ×-¨^ñë*É5&o~7Nüz8"ùõpDòë!~]½s ñë!H~=T–üºJrÞ\üz(I~%ů‡ ùõ$¿ÎsŠ_×X‘ į[öåÂ×Ã)‰¯óî _×Q±²Aô:ÏHxåÈ®3t=/ÞeTè:¯Bè:*r=¼ÈuT_à:FnOAÜz‚[×Åqᕸuž‘Øz8(±õpqÄÖÃ)‰­3(l=‰­‡ ±õ'ÁŽ­ã2E­‡‚¤ÖyD­³ÙˆZ‡%µnAíB j=œ“Ôz( jÛ¨˜ZW[t˜ZìÔº9†º©ulmbj="µŽ‹0µêj=•Ôz’Z÷†Ôz(IjOÙÔ:++j=I­³B¢ÖCÔ:Z–©õpNRë!Hj=œ“Ôz(Ij=”$µÎg-j=Ô: ŠZçCµÎúˆZç)_Ôzø™Ôz8©õP’Ôú“àM­‡j’ZI­‡ ©u6fQëᜤÖyXQë<¬¨u>+Qëá°¤ÖCIR묭¨õP’Ôz(Ij=I­‡Ú’Z%I­‡;jE­‡ ©õ$µÎʾ©õð;©uÖSÔz(Ij=”µ ’ZAj=•Ôz’ZçHÔ:KŠZ%I­‡’¤ÖU‹JM­‡Ã’Z%I­‡ ©õ$µ¾0]©I­‡K!µjKjÏ“Ô: ŠZç)E­‡’¤ÖyÊmxÌxú¯x|ûox®-UÝÚaÚO[½$g©?®ͲþÁÇÿÏþñGøx{Øç7 »§ï »?­ÌAùßxÒºÀŸyÙ_qÒו~üQþL÷³W:Ÿÿõ—/­ÁûñÞOúk¯¿?òë• v¤ÿ5wÿ}þ¿|m ÞíüÝä?;ûÇñþú µhtËLŸÓ•^|«Þ³¥ÜíËöœ¶ªÐþÓËÞþžßþamuþö§Ÿ¿ý÷ø÷ßþç·?ý÷?ü—?ýÊÓ´¯^Í…<ŸÇs:§¿qžv–÷‰Z½~û©Ö–à·oOÊÍû¯?Õ’§ú²Žãkz«x[ÿè"±×â×}j+„OÎýúE5x<Æó·ß¾¨‹ÜöO®ÿã×/ªÁüÉù¿¬‹¼›<>N_Óú®³¿[ŸÏÿE­ïªÁGëóù¿¤õ]½Ì»õéü_Õú®ÌŸœÿ·}"—cýãùÜkè2·ïIËì§ýœ×ªÝð=y|[ð§ynEãy]×ãN³þ¾Ó´c}þ÷v{÷i<Íù>Íów^Î¼×ølÞÚÿû/ç÷^ÏZ³ƒíOÛñxþÂy¦¼ž¿uŽk=v{ìmè7×”è´oS2žyŽúÏu†ÿõ×ÿýóÏþ×þó_þò—?ÿü/ÿü/ÿô/¿+½øöë–ÕÆWöÙç2®.x×õÜ€u­ Yû.@Þ v­=`ïIbÓ0Ÿ[Ëèñ¶i©0ü|ÖiîJŸôóYk›äÛfß´éç:m°CÜN ½×Ú¥è6ui½ -–Ök?ï·M@)³ IYK±q{³¬«*[šŒÛ~¥]bh­™–Ûg¥$g2¬%฽TZI®ó+Áe“µÝ:c £Ö$ X˘óv=)`­ÇºLðü+E¤Pu¤îï·L}¶ð n°ð›wíÒº–.áö iU_T¡F|×éKë¦Ü^{%'é“k{cº›^aRÐÖu¹ ójºš­£=œî‰W_Ì®Y×~ø:†¶‹„±]Á\Ú0¯›Nß´š¿Í7¦C>;kÍ»ßþ% êÓnkͬßUÚ²jžÝI®’FRëvÀ,®iܵîøÁ]ʦ.÷X÷'<ßj³y¬G^Û×®Ûºµ }Ö}…}[A}Þ ë5|·äz±Xù²#¸­&ZIª/û¤ÛM"KxkmSaB™vô¬P¹6Þ¶Õ –9e÷=+œ|Ë^³[›Õ~í°±-×Îî^–%ÏnW1ºn—ei7!kG¥ƒç Ôáø¾Õôéù¶ ÝÊ˯õv ËJž' Á¦‰~ÃeÛ-¿j®-y{Lpõª}ìÑ?–m7îš,¨mg‚7×äå‘å Ûí·jò ¯ÿö8`´5y¡Íö|ÀKkòúçk¯…»Ož¼˜ó²Y¾;å’>ÂöãÚ6áî”'->¾vf¾ûäÉë±?¶r»[ò¢­ˆêë×mªžv^¨ éºUík›ÂkŸ»»O濼‚+ü¤òîÕ|÷É“Âצ|ݪô˜ØM­L˜»õÓÓVÛüìîNÕòÐ÷”³s7pš¼¾øš\‡{:Ã?¸jI+ƒMÞFK­’4À¾Ô¸w§\©$—<¸wÊÑ kæ§wÊvB¨$¦›eã¨Iœ»W~Ú<¢ôÓݺ(ßò¯¸{åÚÆʵˈ`ASÖ²Npª, ßú­}躙PÞ½u…iÐS6ۺèÒ-|J{Þý²²5Ñ;åx jŽ¢wÊ6R)šÛzòJÚ¯{ñÔF‹’nۻɫ@·í€£N¹€ãCÐ:d˜æDÓÛŸÝ'ïw•«BýGõôl‰À{ƒÏ¿µ2 Rð¥ÿ%Sðvÿ)4u Þ2¬SðR–bB¥à)‹Wž\*¯ ]•”ƒ—Ó9M°ƒŸ‹ Iƒ—}òÎ sðr¦Æûî$¼Ý=SIx{êì_„WóåpBIøöЋà$¼½¥¥( ¯Q9kë$|Öl¦“ðu¦ÕŸsðšR† ‰sð’C¨îœ•˼º8f°N¾]ï¼.ß³ÈÁ? %áå­ß¿X‘„û\ÊÁkVEDžÙ²rðMëFœ„׸X§à$¼D¼£JÂkÀ´Žùw©08$Îü›¦bοcì¢ô{~Hëé÷)óèH¿c|¢ô»’ *"ýÎ ›éwmÌ‘¶Óï]fŸ‘~ï N¿7Ù‘9ý®Î‚½™Òo'‘~ÇPë•~— ýŸÓ﵇ô{Ѿ‘~ûò"ÿŽTÏù÷Æ- "ý^D"ýv}"ýŽá€²o7Ç!ûÆŠ³H¾—þ¾ÉwätN¾3Ù·rºÌ¾ œ}O6Ú²ïíãUÄÛ†q‘xgP‰·sçÝ‹º¸!ï&fʼ›O*Ón¥GH»±ÊlÈ»•gÞõr‘wÛ³mÈ»5 òn¥Ö‘wÓðkÌ»9|ʼ›}êw† y7—šy7WÚ y·ÒÃÌ»5JˆÄû·çÛlM™n¿V˜ ™6áØisX‰¶îH&Ú\ 3$Ú0Ó~ڔЙöüø ßÖ“5ýæÅ¸CÊ}¯ßRî`N¹ã{«”»5"nî”»8æMJ¹‹ê~7å^õqpÆ=“+ã®——›Ù(ãÎ%¬Ê¸·K¾‰L†wŒ»¶tZtιŸZt¬Œ{«Î£KeÜõ)ån6JjësùòÏU²]³Ê|™loϧO§l;ö–p¶}ìc’}œ†ÙȱcC%çØõ b'Žw’}­Ab®Ì${ßµ¥–’ìØöÃIvë(¸¬ÛYv{ß¹VÐYv{ß™Ÿ*É j'Ù%ÈäÅ+ÉÞžüê9ÇÎÁ™íö.Àq¢³2J´ÛÔ<‡2íõÐzPgÚ삇L»¼EyÛ•iW·@,¯L»ž,vr¦Ýnq53íZª™örèÖ*Ñ®É'¾¿J´[}4Jg¢Ý^:n™áD»ÞÎ/|$ÚE!8ŒV¢]‹J9Èb¢½ÌRö:Ñ^6 •h—k/º 'Úb;ÑžUljö2iC'ÚË¢%„δ+aþ¦L»–M¯c’]i×ã{IöòÔ&N²/A;¾ñJ²cî${>Õ;8ÉÎ )É®ybtδëÓA§L»ËsH L»–&pl£T;o‚Ríö-S.ÃT{^ôÝs®]KÅ¿?rm¬_RíÀJµkµ7z§ÚµÖ-Ü©öò~¤ÚNBœj¹ jUªÝ.ÖI:Síyó¬Rí˜[tª]‹e°jÏ©v»ùBùJµk! '”jO•(bĤT»]'}œsg£SÎOÅI÷®>"ó>õµvæ=?´ÎéwLh;õž6y­Gê}rÏ)3îÓ}’L“´Wæ] ©zoü£™w7p¶ÿ[Òî–Ü_ßL»»Ÿ†3îç©ý}BgòðÒ:“çZcâ­úBc²÷ì,³íÒ˜ÅYc²n‰o·ÈÑzhL €Cdd‹Lfç^âÛ´‚Èl»¾’š³þ "“È;Rd¢Ú¦ÈD¬Ö"“Sû„È$ƒ™lžaÿÕ"“ÕÓìo‘Ië0•’ p/Îg¸R³àÛ ›™z'ŠUê=?#ƒàNíŠ÷£[ ¹÷üÐz£Ae¢e•É)´(wJP¤29¬ ³ÊdïKó‡ä{Ú53bʽi#”™ìž®2åNâ,‘IðzSî@ĦÜ-odÿbʉLÌ9M¹±œnH¾§ /ÊRSî<Þ¯™t¤š}V ¸CaŒτۛp»¤·¡ª w¨aŒ¸ã¨F܇¸'í'? ná7#nË„q²ý “¸Nsîù²ï!莇¢ì»äP&&è–Œ"I·øò›tg‘î˜7éÎã1ýNYÆ 0ádÓïÐÁ˜tça™~OÞI"HwVéwÈ}’t"'èVÆŸ [õqösÝYfßׂÝ1 0Ðn¥ì¼]’‰÷ <óÐïÄ;-N¼c^)˜·&~ óîíGwNzþÚ“"ë>úPyÈºí§¡Ô»¾"ß…Ý”2V©w»Òý*ÿ^ŸV +ÿn­L©§òïÚÞ‡òïÀ7ο—Yƒ3çß™]*ÿ®JjlæßeÊÖgN¿ç)dåL¿3ORú=?eåô»•ä¾®N¿Û­ÔèEéw$QʾS©«ì;ä„ξ3bö•~Ççð~×ç‹NéwÛ”~Ggæü;¾I!òvÏëüûyFæÎü;&aCåíÞÅùw(â"ÿvwçü;+¤üûémµœ%™‡âÒùwÖÖ ¸?NÀcr× xHQ€?Ϩ0yj'à1-ìTHb7.åàYåàCÁ¼n*Y¼rðxÆLÁóaXl²‹A8îfÊJÁŸ¶¶v ^þ\ë›Ä…(¯Ìè»üé |§ày)ÊÁŸ–â:+±É®é\çàU’ÓVlfÃJÄ뽆ðÊÁŸ[÷qrð:"G‹–ÄU(/wlŽ­;Ù„“œƒç%*ú8Ï£~ü¶wªñwß¾4'nþ“Ixɉ9×-þÛV†È[jŸœš Á‰{М,^(ÅIìÙŠ“8§'±=£'6¹·àdõz NÖHÁ$8YŸ¡â>¼-¦F N²²œÄæ–V %oQé’HÂÇ’û÷7šT>‘„—É$<÷Ó|%áãÏ+wwŒk@ž;{* Ï}H•„Á‰{-fpævŠ\¹c¢ù‡’ð±$’ð!È$|¸N&áà b>ÜZ&ácpãfƒÑ<˜„A&ácðÉm£}0 ƒ‹7÷S«c>oÑçàé]ø¬yj£=Ç&ï¥çàòÇaSåàWç!1Rð!tzS;7&àClòÆtŠ-ÞzNz^¦ßcp÷rÞ#NT‘ øœ¼Õ›ƒ³wsspõ®mîÞ˜ÍÁÛ¯ie'3ð1øôj:,3ð±äêÐܼיƒ‡·3sðôŽe "cÈÀÇàòÞZL`™øܵA˜c§÷û.¯ ¤2LÁÇàâͺܼ—ƒ»·Ürðô®Z 2ƒ“7ÇrpñþW®ÞçÊÁÝ[YIÛÅ|2ƒ“7rpö¾R®Þ:ÊÁÍ»C}w¡åd>ŸÚÆÉ±Ù;5Iß³û¢Í˜"ïë­†d<°¤’ñù´…’ñ2æd¼²ñš‹‚„\®žö‚Ë)P:“ñ2±–âœÉxé´e2Âdü9‡þƒÉø”ü{R—álüùtºeù÷S²Mgã-ø]דúj‰Ë)oA¯~d6^%e‰" x–d6þ|H`æl¼öµ/g6žçd6þh™ŽVH2oA’´w6^;lq|¨l|"bÌÆ+M€4FÉxƘ‹· Šsñ¼åâe-ÏA§rñª+_åâµÙú ^A’"çâ-èÕ“ÌÅ[P³BÊÅ+H…Œrñº |·”‹gIåâyû”‹çu*ÏÚ*oA饕‹· V(J2¯­8ƒÃ\¼%¡šT.Þ zÁæ•‹·"R»+އlü±“ÝD:^%.a>^%É´”Ça‘×–x’|3#¯ é“2ò¡$3ò,©Œ¼öm ·RFž‡UF^% Ù”‘G»rFž·OyVHùc×°3ò|H@ÂŒüÚ‹eäáæŒüQnötEaF+~œ‘×#â"K$ä-D33'äCð#!~fB>‘1&äTB^·”òt%äC ùdBžOJ ùP!&äyX%äyX%äCI&äC ùdB>™g…”A&äMþqNȇ ò!È„<_.%äùB+!ÏÚ2!ÏSÚ×3^¼}Í÷í÷øÌ·ß¿í²þÃò˜®?ýtýiŸêe­èÇŸþîóŸUäïì%ú¾ºŸqÅïS¶ä¡ôãBï¿<¾Ðc½6}Wàýç/=ÿö¬ÎøU÷_¾²¯Ûþñäï¿ì¿{ˆû[ŸÁGÞþÒóÜöWÞù¤¿Â™ý8ÊCe~´ªLßÖýÚø·R¥ýßÚ~z^û¢µ#ï5_økÏ4XÀÿŠ3]{Ùµ¡ùQƶ¿úL›Ïôþëó[!ŒöyüÓ?þ–+œËðöY&Jµhvoßïu´ñ­Y’vø3ÿüí?}Ûþã·nÿù§/ò»ïÝuÿFü[íû»ð•Ý5®² û+õxlϯû:5]<œýúñkÎÿ܇³?Xû¿ÌÇ2^ûÇ_sþuxîõÓ—6s|¿¤Í]ç~µ9ŸýkÚÜuþ«Íùì_Ñæ®NåÕætö/js×ù×á¹÷6w}Pj%äñk?(;9Úðêœ÷Òx×PâXö6j›ÿ-]îã4ë£6jœ÷öÝü7u¹ÓÌ…~¦ißéßÖå>ÎólÛ—g;ÍüÉçÿw¸Üû<Ëù(DþØöЭÉý÷rŒ¨Åèõÿ¦ýÛ?à”¨ ÿɵíY1ú6T½Ü´^‹ÎŸÛøçöÿ_ Ïû¯ÿ×ÿýÿü¿C}ØDÿÆ x>Û8v:£]à²Õ<‹I-ÃiÓ‚ÿ_?,ø·/²à¯b÷¢¥c_¹<Ã_Çõ ´Ì×ÔðÙ.²õ¡ W÷µÿûÇiú¤|;.oåéMK­!®r­Øó‰õä)ÁÉãyÌßörèìð~Z ´¿.k})Oм§eïS9û¾Rã=-G×B±óš{ÌÖm g’¦—Òí\[û­D S‰q ;ÎÛö­6ŠÀ’ð©æ° *·Ê¶r/CéÚ¡ Fk‘û¦¶Ó:ÿñ±W‰ó£]”Q¡{zm¯Å(}*¾µƒ>w³¯Z#;•±L=v-¦5Sù´–üf^æë˜0˜Ú_‘pU]…ÎU•ùg'¸S-¯É¼ö®µØ{½ÕtmÕ±¾»zªM¦ ¾çöEò©Þ×¶ÚP¶~¾ÁpU7S»£7V¯ØÚaÿ´Íפ_»=ËG-ð ·ÚüáãB® ƪŒi[«Ã¹ši{:zb-t– r[÷ëz¡¯š¶­ÏÖ•ajªmdÞ F*ásûï‹Ä¡ÓvvëÌ}‘Ú¸bgÍôû•ƒ%Dkq­¹¯ïv:щf*¥š¸ÚéªEkÓ^òí|­ÛηϜÞR³ÝËi_ºMÈ.1h…n{ý*†Éµ©½”·áDÞ•}»f2?ë"µtû:¶Ã¬Õ ÒioýX3|ŒÅZk«Žäj¬ EÿÓ~öéÏ a²©õR}Øn Û›³Ôލ}“é˜zV1¬¿ªØ=i:”«ÙøõÕ`³\uR¯~5ïãQ3ìËÝ`uÈíšjýh°­ØÇûyÀª Ÿå±÷þ¼š&‰¦ãè‹¢®»Ûç§ã싵+ö¡ìžÎÖÀjúj¥«–"U¬¦l?ZiuÉ}*»°>iïVªµÅÓ9µîå|·R­÷nϳ¿ËÙºÛ‡å~ê|˜W¯Ø­oÈ{®}¸ßij4ïîtÑ‚§é,¿±ÇÝB±¦å•”¼[èÁiøŠÕ”qo¡÷äw»úÞÙì–C¶Kêj‡ëõ»—xΕ~í%ðÝœ[Žv/*ÍS_6Z!|6çÇ|Í=ß ‹M*q]Ïw‡\ÿX·ïÝ>Uªuõ¥·O,7škrቂŠÝKR†rèI®Öݶ̳ÿu·(w.½Ä|w¨Ò»ÏeÓ7ߪšqÅjü£©ºYÍÏÒU¼;ÔEŽ“ósîýy4 ¹òÍ=*^¹åÆ÷œv¼½»g¶£Kk¥ÂŽ.µ·®¹²ì¯uÕ*ÿ¹Ö÷ìgÿüÔd®õFÇÝ£j1rÅîiø¼ºZ ýö'¸>±}ò‹»Õ²É:TÞË”àîPõÙ­Øù¼;Ôˆ•:T¼£s-&ÞêAã“yÚ®ÿWƒU8O{wSÅÎ;SÕÇgžŽîxvÝ©.1šËêi¹{U_ÁÜR©õݵº'™çç¥ xu­ZLU±óNT³\˧¶w×wåríZ?ÍækSé»kÕš©ö9ênìy;Ë븻VµÉò»ÓÕ¸ÓsaýwÿªuVsmu§«ÑVJ7w§«Q“²:»sÖ¸º;˜´òn¶AÔöÊxªÚÉêNZ- kÔ:š;iÕVJó²\’ˆWÒ*›ùÚíNZ¥ÏmÉì·wÒjeã\6…wÒ7²rž´úNÖ…ö¤•Y]…Ž ],²ø¹>ž=gu÷ÛzΥ笾‘W†¾ö&˦¾N—.ãn²l–ké}–Þdù™(ŸE&­:ßÒ?‰Ã1×î‘·lžh‘“WìdÖÊÛÙÆQ;²V6ˆ6ŒZ{ÖúJ0Zãl©ÜÒ[+oÈU`v5-vûÑ^bñM(jOUõþÕ&Ù=SÝ)úšk·‹ +ïk‹=SÝ©{›ËúõÎT•·§~ÉOîVÊ[·í}ÕÚpÐ wéèR® ï¶ ¹úH«bØ0bÞk‡ê»gÕ]ÙŸ—œåÕJ} {mmww¬k_Î;gõ£b·¢bÉÍmü´õœU£Ã–Øu RÅ [›Ûøiê9«TÎ{É®îžÕoh‹Ÿg®ó^‚,d®|é÷óÕÜm•ß”ãÑÝá²±´Ô£§®~ÑŽã¼|ÇÚj빫ִÎmµôäU_Ù²Ûë¹k²eT=wÖ¦ÜçÝdùakèýŽÍäØ;zÉ—ç8zC®âí| ޳ûhg-ÛPj﹫ªr–Àlý¼Å¶‘ÔÜSW¼e+ñÙÒ[,/áÊ{.Ým£PN·,­ Îo@Sï6¿˜ËŸš™+¿¿m8õ왫»ø³vúèݪB¥Ž àMiŸ³}+úOOTø‘/m<õ왫næ5ÕáöøwÝ¾Éøö@:øv7ľÝõðŒowm¼*|[âf@òÛ£õÖ0¡¿m¯ {ñÛ£}‹ ©¿=j³Ü.ñ¿­t6â·GáÂ>Œ¿=ªMƒFßõ!é+EqvAøˆâ¶¦ÂA0îQ)áw0n•ëãQÜ£Rk^(îq™÷Þ#_RÜ㲕Ä€âרbé÷h#€¹7x¡Üc~¾<÷„qÚ¶8ê…që,}Å£(îQ¾C}ä-Š{´A,•EqëLl>¤¸Gù‚öâ5Þë!Å­CŠÔ‚âV CQÜ£äɯ{€{Ôj^nM‡œ}á‰îqY,tüE€[åÈà p¯&6p2½éÝ¡nÕ“üŸ·n× ,O„{”£1n•û¸#„·õ+g!oë¢ás-x{Už¬ð6o$éíõ\0Ez{<¹=Â[¨Xâ€ÏÞÖÁ¦ž8 Þæ]"¼ÍÝÖ5“ƃÝV)l”!vûIì&¸õLð¢à—)NŸ!À.÷C› x{Ý LMÞ1ÀÛl…„·×!-àmsÞ–Oáíñ\¹@]ðö¸¬‡ú׎ðv¨'nP•÷¸ÜZúׇ÷¸L€NâÝáÕý^øþÕÃ=._™ŽË^·G^Iˆ{5«þ%ÅŠ[Çü˜´Àýxþ·Ñnnë øâ Ü—™KOån+†E…·Çs$¸­rpÛ¸}Õœ¼¶,÷æµWÖ'Åk˼¦sòÚ:&É8ym•ëgµÃé€k¯+îišpíuÈ>}I^[ âµÃéÀk+›ñZ÷(µUK’ÙKkív.\{f#®½ŽÕG¤µÃ!Ak¯XŸ„#­­#Â!N´¶ŠÁIC´v8$híuL`{âÚ*‡9`áÚŒ×fó!®ŽH¸v8&pmÜMÒÚ«çÀôií ©Cb6Q´6{#âÚ|zÀµ×•cš€¸v¸:àÚ+öèCvâÚ:&æë„k³š/\›­Ÿ¸6«H\[)áGwIP{”§%'AmU€³EµC%j³Ôfë!¨Í{HR;Ä@j³'©ÍÞ‡¤6;I’ÚᘠµÙ5‘ÔF’)R‹!mô¹d´‘eŠÑæ× Œöº°î F{}€ú?‘Úè; jóµ1x¯!i%à'"\›ŸuòÚÈXÄkóKN^{•»eYĵù™!®­üP…k¯t Ӊĵ‘â ×^)tKĵÇe Þ¿)ĵuÌl¼6Ò?ñZ§ìµ‚G½pm‚(áÚ<$hm Dk«FࢵÇå.»2ö¦µÚ1sHZƒ)ÑÚAŠÖ׬½c%­½F$`t/Z›Ï´¶†öØxO´¶N#\`áÚ:&–ׯ0Q¸6›ym‹×V¹]!ym•À¯Íym“Àö˜´]¹€í°­óa1¥€m•ëÚKñÚ"<œà'°mtõ)¶_Ä€l+ö!"«å¯À´Å'§ Îd1ýåÿ|ÚÄi/jÔ[ê%ǯ¯.¾fó϶‘ ÀÖ*@Û…;[°}Ò‡ÅÀö!``{0E7°m}$\ÒØ.Ê Øn›Q¶íK…vk`[€`YÀv•³m¹Ë÷ ÛCÿm`[+³ÁU l7Û“KüMlËê JĶæ3"¶5wÐ…“¶5ÅBL`[“{½ß7°­GB‰*©mKæ{QÛYFƶE©{ EmË÷CÔv– ‡©íe—ß!1 íò RÌжÅ(Ð3´Ý9\3´mÝyžÈíå.´’Îvr;ËaÂä¶ÁV*PIn9Kr\_äv.ðÒ[¬Éí"Å·Éí®é‘Û6641¹­z’ÑŠß’9“ß¶/2PóÛkHÛ1œøm»e'I~ë¡ùm þÀõàË·¶`ì³âp7 _ pý}0Á­­@õHpç‡á. nòbÜÚvSXB¸õ¡$Ð&½våÃÍ"Âv"–ó(b¹ñþˆå^[gÝÅ,·LëñpÄrm™åN üPŒr¯{ˆ$wšôÉmGÃ\c.Ã\ã Ó\uIAs¯ŠæŠ‘æšàæj†+a.Á«a®ªa®'~ sýd s…¸o–kxo–kR'–賂nÌ%ÑŠ6Ñ%¯3Ñõ#M¢+õpÝñ4Ì x,˜kÄg˜»h…‚an‡«â¸>•9î'øv–60ñ-–äÃ5ò6Ä â*ˆë›'ˆë×Ï×Í&!.¨)î¬å&¦¸†ýoŠë®Ôw£'(î,™°)®§mLqý1 Œ+Ì,Œë–aŒëçfŒë7]7ž›0n=7¬2Æu?fŒëK7Æõ hŽ;KlŽëžÌ÷~+Äp úpç×,‘àmÜCÂÛh!†·n!‚·ÑocÝðÖs¡ox§¼Ö!x=‚·qi‚·1ýBxûÔö؆·ñ– ÞF'"x9¡àmLu ÞF9ÁÛx¡o-E1¼ˆàm<ÁÛÐˆÞÆ—Zô6ž¸è­[²àmäD$¸1¯&Œû\¹¦1®Ò°DQoaŠk…ˇ©Š^¨/ØH>,æþz – §FûB¸å@I,nû‡ØôW·»úÜî,‹e#ÜYV¿F¸óö}ÍmB!Ür-%N&Â-o`a!ÜÊq¡êÂ'!ÜwB¸Aš„p zÓáN'?!º5Ù²èV¾~B¸)¬ Ñ-Þ$3Ü@Jb¸¥•…å…•·æ?PÞÒÑ"¥·\Áné­«aéí¡µÛ–ÞîRYKzkzhém\™(îÁÄÍ·¼v{çúÛ݆ ÒßF9RÜÚk¾§¸¡¿=e„ ùíi9,!®õÏf¸‡´†f¸n–Épñ! õíÉ”5®Û%n´1ܼ)ÒàF92ÜxCÄp=‹øf¸ NÅpìøƒáú¢Íp#F†[5„Ü:.؆ní]ЇÉf¸yH1ܨ n^:n‹:˜áޱÎp£.b¸Ñu‹áF7+†;?¾%ºª‹ÝÆS3¼õí¼ÍcÞzÃðÖ½î«æ$·¾AAnO«R_äÖè6ŠÝf9¢ÛŒÝúù Ýfí‰nýå2ºõ½5ºu³7»Íó‘ÝV5‘ŠÝÆ1Ånã˜b·~ÍÌnãÄn³á­û$ÃÛ«˜ÙfÕÉlãi“Ù†FBä¶ò+ÜŽ±NnC“!rÛÎ' z‘ÛùÁÒä6c$·Uù® ¸­ó• ÜÎå¶ÞÁà¶Y;Üh§¸QòÛ¸Áâ·QâÛh¥Â·.&zwKôÖ•¼»%x›1ÂÛ8$ÙmÜJÁÛ¬%á­g§o‹ÞÖ®ñ´~ ¼'ΈÝÖÙ0Y#v[ÅúÌ×›ÝÆ¯D·5 UÑmm~à tÛbp3ºÛAtë…oF·QK’Û,Fr›g#¾C’Þ:$x÷Dð6Šàf1ܨ¤Hn’ ×­@×9sÜ(FŒ!P܈âÆíÄÖ(ˆë+ÃWíÅpÝKáf"\¿Ò"¸ÑˆàF7G‚›!ÜŒ‘àFMp³®G׸À«ÀÍr¸#À/¡nÆpýìqãÙ‰âæ!ÉrÇØÍród¹yWÈr]K¡Ü‘ä¡ äFõr£¹#ÈëÈÍr¹Y‚Ü,FFŽ›5!Çbĸ)n%ÆÍ1nŠ Ææ*ŒI0nÆ$ÂUwg ®;kQ\ìŒqkå6t ĸ÷¢n\üLxk¸gzkY—é­ûé‹ÞZp÷£¡?åFèn¡”ý-n·³6ö ·[2¢Ûi–U¬Ðíc•¥ªÐm»…Ô9ÊîöLRéyÛ^xékéyÛúÙïÂó¶]8d¨²¼­-ìð|dyÛî%mWå{ÛúN,L°ñíË|ýv¼m}ðŠ{+ÇÛÖwË5€Ž·ÅÉú½•ám} `*ÃÛèfRÛ®Aë´eu[_œï˜$T9.#—ÕmQÁÑ$¡~æ2l¹Ü¶Ï%ý<år»B^r¹ÝwN%Úå¶Å„Mér—-›ÛVOÎdÈæv?µ<]>·û©5áò¹k—Ïm–ƒÏm^:}n+f…îsÛ‰ïŒn÷Ã(–F·•Ѐ§½Œn÷]YF·û›ÜvÛv¡MúÛ–ñ 0·ümÛM!§¿mÝ*^éo[=T¦r¹m‡ä\ƒ\n+ϹÜF¯'—ÛVNËûiu[Ïo­¬n÷Í*ZZݶ/:'Ýdu[™>0²ºkÕm%µ0ä‘Õí¾Ù"—V·ûªiÞ¶r²ò…ám£áí¾rün+-§î˜†·ŽÙð¶bX÷v¼-?{ˆ‚äx;ƺåm­¯¡™,-o÷EÆf²¼Í­@°4½Í³ÑõvŸ$a—ëm\\oã˜r½cÝõ¶ˆÃçì6®@ž·ûÄäßž·×nÏgËó¶•ÃÞ¥ö¼ÍMoÛ1j€az[mbzB¦·YŒ¦·Ã,ˆLo÷'ÍÄlz[1̺¼Mo£2½ÍÃÉEÁJz{)<ú^©oóÒhz›!šÞ|¢Ü˜¦·-Æ5É2½ÝŸ\äg×Û,G×ÛJ>¿ºÍ²» Êî6ËÑîvr\i»Ûã*fÙÝVŒ¦ ´»m1ø¯Ùî6šìn3»Û<$ín÷òØô6b2½cÝôvиðm}›¿Òú6FïÛŠA¦/ïÛ<&½o³¼o‹”Â#‚Ö·5¨é"}9ßF)ßF)úÞFw-ßÛèæå{ë³É÷vÐ<ξ·£ïíë¾·µ@sùÞVŒ~ô½Í}oó|ô½ÍóÁ÷Ö7S¶·ûƒ 3m{[1˜ÞÈöv0­·ím–{ÙÞFó‘ím4;ÙÞÖ© ,•ïmÆè{[;S ßÛ Ñ÷6kOãÛ¸[2¾g#ã[7=ùÞÆ!å{›1ßfŒÆ·ñpd|—'ãÛŒÑø6òl9ßF^/ç[·=ß^¸ìÆ·ñÈø6n´œou6ßFˆ¾·k^¾·Nc~”å>~ŒånwÆ6)LZ/”Û:ImûE”»¢D¹Û®Á{:ßR´h#…§L¤ÂÝÚÔ*8,¶‘ÂAa¢}!/‰pkwªT)Âõ!¥Á­N×3J‚[¥¸M%¸uŸ¿g{Î’àn» $¸ƒ·…¨o•7OG nň·)Á˜$¸Q•·7ª! nÅ(³¥7c”඘´´”àV¬÷ì’àŽ¡.ÁÍ+£×Ï[ ܬ ¸yTàæ]¡7n¤¸£7c”àF»”7cÔàFC‘7n™4¸¾e’àúÅ¢7+BnÝYŠ©À­WÓ RàFL Üx!_ Üü• Üè*¥À͸£7cPàFß%îë ÜèפÀ͸yL*p#&nS ÜŒQ[xû;Û™]1ÌêIURÜ1Ö¥¸-Æ©Iq3F)nRÜ QŠ›‡¤7cÔäúV[“›±—&·¾ÓK“;ƺ&7GMnƨÉÍcB“›!jróÔäúŽX“›Ç¤&7cÔäÆù¤ÉÍ5¹N{¬ÉÍ5¹q ÒäfŒšÜ¸irÇXçæ1)ÎÍĹÛi©2Ź£87«BqnÄ$Î[ö!ÎÍ)Î̓Qœ;ƺ:·ÆûÀGRçf òÜ<$å¹£<7/úÜŒQŸ›Ç¤@7bèÆ%H¡»ßîf¤ÌÍ•¹£27c”掱.ÍÍ¥¹yYæn£7n‰ŠÜˆI’;ƺ$7c/MnþLQnÆHu‹x|O”;ƺ(7 Qn†(ÊÍU¹£*wŒuUnÆ(Ëýx(’ã¾~¢ 7žªd¸qtÉp3FnƨÃÍóQˆ›1 q+/§E•¸-Æu”âæ!©Å͵¸“7.ObÜ,÷!ÆÍ_©ÆÝF7Üí gŸÛwºL‰o3ñm†(¾ÍÅ·£úvŒuõmÁÌJ}1©oã˜RßfŒêÛŒQ};Ʀ¦ŠQ}›1Êo?žŒT·¯Ÿ(¶Í#@l›!Šm3F±mĤ¶Íå¶cìÒÛ¶ÃåBÒÛfŒzÛ<õ¶£Þvd¶Ûh“°¢Úmt´—]bÚè$¤¦rRÓfŒjÚ8¦Ô´£š¶b˜O”š6n±Ô´QYN›1ÊiÛ1e´@9í;‘dr]“4µ£°¶Ð.ð²°6cÖV—pOe_ºÚ¢*}âûÇXìïó¶Ý¯M'«þ*`ìjÿÁ؇Œ2 c/ð§P‡±üJ&Œ­//¼>c×§4°Þ†ìIåIlCö°i/ilåܧKÛZPï?ö²Úìõ¡÷;@íÃÍöÆÔn¶ö‰‡­Ü Ê2qØ,G7Û(&;ÛCŸ¶³=9Ý=ØÙrC:qت )ç‹ÃÆ©lgë›l?[o'›1rج mÃŒW†¶~8â°#ˆ­– ŸRØJˆá‹*;Æ:ˆ˜@líç ¹ @lÔE ¶¶Qü0׃Í"d°y*2Ø:œÄ`k×RLÐÂf9PØx:¢°µ%3)l´sQØÚbšN±¤°qÌ…­䢰~·a—“ZCØ(FïlíCP';Æ:ƒžN ¶z:º'Á.§pÉ`ß=¤à+~ìÔµŽò=߃ÖéÒÃYÔ5zQ׸I¢®YŽÔµÅh­#êzWž¸5Üw\¸5ÏCÜZOƒf Ä­~RÆ­~R7nÍ"Ä­~€Æ­Yޏu9%!nÍrÀ­"nͪ·Ž±Ž[[ ¡¾O¸µZOW ж€ÐP´µÅ &lõÅZׇäb­#k­šÐh–¬µÒ˜†Šµ^ùH—‰µfŒ¬µbt-kÍj’µÆ•µú%5ju1‘Öxr¤5þ=Ak Qu)ÐUhõãg{OÎ÷Bœ5‹‘³Æ­g[/Κç#gm1j…ÅY£‰ˆ³:»5pm1Š™\[VK·€«³i×*‡ E\¡¸f9׬ €k#ym1ÊÔE^[Œòv‘×ãV&"¯{‘×<Ék‘Dš ¼®Onn-ðW&ð7„à5C¯yH‚×x6¯5Âþ^à5c¯ÑND`ëFÒk€(6î¦Pìë(ÖwS$¶Bß1Dˆç#›1‚ØÛay@lÔÖ‚æ°uDlƒ.åÄa×Iš5qØŒ}pØö+WŒˆÃf Ùº§d³Ì¶Ø‡wª˜ì:õq0Ù^€4¶ž(LˆEc{ rØ:–éˆÃÖ Vµâ°qKÅaãáŠÃÆÍ‡Íc’ÃÆ‡Í9l“6ëB ›Ç$™{F2@dÖüÅdö~8b²ñÄd×Ùδ/&»Îv%“]'ûæ’ɶr\P&&[1,N“Ír€³Õ‡p5RÚŠqs3âÚuæf«Â¶"µÍŠÚ¶MEm Áó\Ôv]d¹+j».ò5µÍr¤¶UF=¢¶Þ'ÉÔ6Ë‘ÚfŒÔ¶Å^>ζY‚ÀÖ ÑÀ¶bTÁغœm‹AwÛöz?æcû×?<¾ý·?,í Ç1}ûùÛs¹þôÓõ§}ºlZôãOÿçÿø#\¸`Ö7It§ïHt?«È¡èßvʺºŸqÅïSžó…m® ½ÿ"§¹¿s Öʨ_xÿùKÏ¿_Vç¯ ¼ÿò•5xÝö'ÿe§>íKžÁGÞþÒóÜöWÞù¤Çüë/Ô¤¸K—9µw©mÝËsúî[Uê?ÿéå˜ýÏö¿K«ø·?ýüíü»–°þûoÿóÛŸþûþËŸ~í™–¥ÒÙ©]ðrüú3-?p¦­}!ÎvªúÜþêm>Ñø¯Ïoe¡>ßþô¿é÷–…­ßÖšÆz´Äj9×i8oØÛá?ÎüÏßþÓ·ã?~û¹ýçŸ|­_Ð[÷OÄ߹Ѿ? _Ù[ã*—þv|ÙÇéûæÙ_,ø+Îÿ܇³_Û·|Ňù²¼‰³¿}p¾âüëðÜë§/mæ2įhs×¹_mÎgÿš6wÿjs>ûW´¹«Syµ9ý‹ÚÜuþuxî½Í]”Úãäøµ”é¼>@G |ÛeÔ¼ÛR¨u?do¯¯Éoþ\ÆiÚ8¦eÚÈ~y¿pšõwž¦&~Úøgßiü<>Ï÷iž¿÷ržéûòl§™?ùü÷óüÎëYΚ3Û{ãÿÂi¦¼œ¿qŠ©Š×in§˜¾-•ÖÇs›žËðdê?×þ×_ÿ÷Ï?ÿù_ÿõÏùË_þüó¿üóñOÿò»rŠo¿jæR(¡Ë–kòûS˜´K:–RžtÓi¹Ö.¥<ánX†¹¬{µ‘8Pa…ŽÛ»ä˜©iuîG¹œÇú<Þ²ÂIéX{Ù fÞ?Ü·gûöTÚ_Û³¤Î‰}l¶ {í¶ÁTlÙfx야¡6ß¾MKêµµŽ¦ûíuý¸Ã-ÖMö®[ºôØ—½ƒìjÙönT´>ÛQî€Ë^‘º>û´”ìä5ËRŨc»„j/$¸×'éæYK‰NnÓ’R7t¾·ì˜¨X×R¢“Û´¤Ô%}NaiÏð2Û«¶Ù§/êçn¶·ÎÜai-·›íÕ´ndYYÝn{5=öÀ©6¸í­’¾/ûÞ=÷j ¤sÐ uϽ’‚t$»”¦î6-)KwxZvL×ïµ8.¡Æâ·iɲcúb9àRÅúli…ºíÞ²R}°l÷j&µÓ³¥Œ oç’¸ðöáí¶{qå%=yß¿z¦°çZJ{r;—”¢CÝŠuÛ½šlê³ ËÑ;`»WwåžeYJzò6.©}×ÑTÎ\÷­â«XwÝ[¤+]Î'\÷JˆØgu—s‚ë^Á _0£$/ç×½jÿHW¬ÛïÕ=F¯p®°ß‹wª}Jºý^I“úDAkfÝ~¯TR}>£ýØïe1ìŠ:Üö{¾É­ì÷jݧ÷Ús‡ý^Í;v­µÔ(·yI5ËŽ¤+Öí÷Üd×R£Üî%5•ЧÕ×R£Üî%î7*/êö{y¾ ö{n²ëö{nêë&–W¹n^°>ØïùÑ®³Ûïù1´„ö{îS*Öý÷ܧ¬Ï'ü÷J5Ð…líÉÀ¯”ýK¼>çÿ=¿üë”=:í•V km“÷nû׋Ð?Wkm“÷údäC}bsZÖ’¢¼G>›Ò¢Üî%c¬ÛïÅLH9âÕZ§'ì÷j~úã­fãóÒ²¼>eœ÷x‚©^Ű«ÕÚRÄ[­0ÄV¸ïÅž6¸ïe¹î{¥µé3:ëî{¾Óµ[ü»§ËgwmõúëuWºHw»p'¯¼lºßBŸ¼c3 ¨âF×v·}I õEú/^›§Y`½Wò–®Ô\KŠÒB”[»gO¾4%EyÕ¨B˜ÝZç½ ŠóþÎÐf73Ÿ°Þ˪œðÞ‹·cyÀ{¯”}áÿÚþî½WÊ‹¾Š}]¦žV +6×¥õœo«êë˜=Í®Øù~[®r]æØß-Z*†ÅÉ­»ƒû^‹Áwam_”î¾ç§Ý> Ý|¯2³®ë­X7ßËCÝ 8ZÉ‚]è#Ù[/ýßúj®—¯W½ïæ:qVd]á–^1,|©X÷Þ»ÁÚ¾>—å^ëÒ̵æI{öêÛ·®=™­at–kËîï,½b+¹Ýúù´Ú`âÞj¦bXj½¶£Ü«[†rX “­jîMuyØË¦bÝr¯”]™·n¸Ú!6õ˜¥ ¤îÞ&†$kUÝ-¢bXUT±{âv8&vs͇¹Áz8ïKÉRîüõãA—&åN[ó²à`ž/à†6akùCÍïÕ)NÙ Îw:s‰ÎZ®NK'WªÊÅpÌ)¨%öõJK¬«DÏ‚Û[ý¸·eQ¼ÝjÔb»S÷|”e¾ßj;úp>ßš2ö¾óÖº!=omeà¹7ƺéÞŠ<¼bO˜î•à ë ×þ|y#K—ò8{Sí;[¬%KyÞ}ª†­kÉR¦Go©|£J–ÒÓVßécƒé^ܲ6šê¦{ÑNÚhª›îE7׆S·÷c]:vˆ¨Ø&ò¶œÝ®N‡-ÖóÙ—Nä;'¸îE5Ï®{¥Ÿé»eTìrÝ«þô!·\¦{×jªþménº÷êëç­¯žÉçrî·]ÿpM{w=ͧ$)wÆÚ*(¶–$åÎX=BÙJ’rg¬ëÒ?|{Âo¯„1] Y±n¸WR–.]ÜØ4lˆÍý¯í™ro‡ÖÂû)®›|¸·ÇÚ|Ü–›cøÙlmÕíöJy &;ìöêsvÉ~¶Ç§½¼!gïªòØï bgOVWz×nϾ­R¼õÛóÙß®<¤' n®Ø ³½ëCýæ2³ s?¶¾ó½áõßXÖiŽ{ô,+1înd#Œ[+° …q÷‰wE׎,¸Gû„'ãÛÄõ:¸G oÉ^\Û¹2›ÏE*â¹±¢D<·vóÆâ\âÜcYÙÝ çÆnÞ¹ÇòäÐL8·vìÆš2áÜÚ¹†X8·@»êrp·ë…KæˆskLÝ1àÜØN<·öhî›çƆ¨Â¹í«EôƹµíÆR¹G ?œ‰s6à{]¹ÙõDïUáÜ«˜•Î=¦§îqnU /„sÃòZ8·¶Â.áÜ*‡~C<÷ºÌtèÏ“{žèV9¬@Эr|¨ºuíA0€îñ<^_G±Ü*ÂÙ²Ü*‚mRÄrë²w2`°Üã¹3k!Ì­Ú£ÃÌ­bHõs«¶Ì=®åP`ž€¹Ç“úvX/uàsãÐX0÷‰·ŠˆãVΑãV9 oÉqëz9×EŽ[SkœË Ç­rœÁ!Ç­r$~ä¸U»ýŠãåÀq?¤·GѾ‹“ðm]1G¤Ä·U]øöºÃÞV±{|ô6‹Þ×Ô> /àm]0À„·UŒìðö*F x[å ¼­r˜8¼=ZªNð o«ùámä_ð¶Ê‘OÞV9ìtBx{ðo«˜5èm6ÒÛ*‡o¨èm•öj¢·UŽœô¶Ê‰úßÏ Sk¢·Ù„Ho«Ö!‰Þ^m¤·UŽ#¤·ÙHok-io{¤·uéX!|[å°Kø6Ëßf1âÛl{Ä·US·â·Ù?DÏ­'ƒp«°àV¹†¸W‹ÅL n{~Œ÷Èn³±’Ýg»½kŸV»Írd·U³šb·Gõ{’Ý^õ€ ÝÅ€n£“ÜV1¤7"·y6€Û¡Èmž à¶Šaõ«Àm¾Q$·yq ·× …q5ÉmžŽä6_D’Û¼º$¸žZ6 á5ÂÍÖE„[Õà$*nÜ~Ü:ÆQ"¸Y 7+I€›·Ÿ÷º€~¸y p‡r¸C9Ü¡îPIÏu[ÀpóU%ÀÍW•$7n'Anvv¹y:€Ü«oíš$ÜüØäår¯¾µS›÷zŽj\ _¹ÙäæÇ† w8S¹C1€Ü*v±!Üüháf&F„›·‰7?OD¸C9 ÜLˆp{íIo¯}ø'z{Õý¤·W¶…Y=ÒÛÌHo3á&½ÍÌôöJ$»:Eô6S1ÐÛ¾ˆÞFMx{ ­úÁÛ¢¼ámŒ­Do‡Z€ÞÖè¾lB¸Ã1;­b$ëD¸Ã!pcl+„[1LÜáV ÈZ÷:fï’…pk ŠZ…p+ô#„[ãll!+„A w(†; â^è¡pq‹<{Ê.’[å`Á$’HF$·Ê—‘äu¡’Hnlæli{‹ÁŒG$·b0'ºÄñ5Lè3¡?¶ðó·Zõ…6÷ìïíu7º¤Ô} ŠêÚ©ÑPw—ˆÕPW¦‘uµGB@ÝC[‹ê.3êzÕ­¡îºrš6 îƒøÒP÷)Å£ înuN-ÔÕf÷u—ïjt6\Á¤~@Ý•i†º³‹ê¶—žwÚPלRT·$)Õun}S]çLuÝ×ÕUÿbªû|ðm3Õuž%ª[›® 1Õ}zp¢º­o{@3.ªû8°q·¡îóaàK¨{1OªtÛg„*V2ݪ H¤˜îcåGÅL÷qÒyÐL·üÂ0s#°ûXøñ3Ømç£<\`÷±a‚ÏÝ"˜¢ H÷<9â0×}Lš×m1.!H÷ò ø×mWΉƒ×mGû00Ò},̬ÏõÖ-ÚAš±î㩹6bÝv6v¡’ç–MºBÉsÏ]sX’çÖ?Ä VòÜóÞ‘<·,"XŽòܲ@þ,yîÙ1Kàݪ L¨¬Î­„ŽX˜êÜsç+eunemHÔÄwÛµ¯H{.pà­ºlTC[¹%2W©s£šRçV+ÃpUêÜÊs ©Îª¼Õ¹­}‡=‹sËà3‰sÏÝ–µ¹-tC›ë–gmn´‰sÏ™9¦Å¹uû ¼)Î=W~-Î=Ò_Iœ{N’‹HË—:øîUOŒÑ¥Î='ÉS¤Î=' Ê¥Î=gæWVçž0J ¾[×Óp«s[‡U;–çæµSž{ΔyHžÛúaÎ-Hž·ZòÜêl1t’>·n hÇ@÷|XëKnÕ@ø%n )ª$нŒj:g @7¯‹ÝVÊ%ÐmU¡øIÝs’&OÝö­£:ZÝËÛŒ”ÝV‰w)Ðmu¡|QÝã”XXݸÕèÖ¸þ{ÝËmuý\ [ÖE˜‘B·¾åÀRè^óNe¥ÐÍk§D·VF’è¶'›$ÑK—D·ª±°$ºñˆÞZÝ"$èø¤Õ=$’êÆ‹#©î±k®…RݼfJuãKª[þPзKª[9Д¤ºÇ!°&©îq #IªÛb„7’êÖ11å%©n+÷‘aY¥MY*ݺɀ>Ré¶SõaœEºÑZ%ÒG-µnÜI©uó˜Tëº*ëV¿€Ik‰uë~`‘†Äºq6‰uãx‰uã‰I¬{œÒÐK¬Û$¥ÄºY{ˆu³ëæ…Q¬[ srëFU$ÖªH¬u‘X·ÅÄQ)ÖÍrëÖ«9~©u£ÅJ­Û®akx»N7¯Œ:Ýj<='“L÷¢·=_–L÷ذ)Óu{”J7:ªtã&•n‹Q× •n¼óRéÆc{«t{W …î±+_—B7ÏB™n=däò”éÆ-”L÷Ø57#™nk1-"™®n¯UºÇAŒn•®i•®û~«tÝL­ÒõwÁ*Ýcçt£eºÇÎ1°eºÕDúˆuºqé”éúͶV×­ÎZ]? }Øg!.A> ûŽ,7/O> þ0Øg!IŸçnf¸þ‰á*‰±ÏBÜLù,¸7ÂÍ}ü‰zû,¨ó·Í‚óic\€l³1Ú,8µ”Í‚¿ 渚ã¾?j&¸wN)vŠØm íÄn+ë•»m1ê0o#s4¼Ujx£ÁÛÖÊZ!ÒQÁÛH+oݺoãt‚·1F2¼å;ovƒa±Û€Pov›g"¼ÍÃÞfíIoëTvÞfˆô6c¤·1P½‘“èmdÜ¢·QNô6Ë‘ÞÆó½Íc’ÞF½wDô6Úºèm ÍEo³émŒDEoƒ ‰ÞÆùDo} ¢·1ä½Íémt+ôöîÄmóŸ’Û– º5»ÛÆÕŠÛú&‰Û^ß)ðhrÛÀ â¶c ÜVc¤à¶î(Åm££·ÍcŠÛú¶ˆÛf9rÛ<¹m–·râ¶›…¶ä¶åpé*qÛ<¦¸­‡Ôâ¶>¸m‹qRܶªBa¯¸mÄÞÜ6~&·ÍjÜNZ.pëaÀmÑLk ÜÖü$¬·1Ÿ#p1Û˜¸ó ÜÆ†ÀmÌs ÜæùHoÇè­ç¤Do³ñm^;ñíùä"zÑÛ˜¿½­Ø½ÓðÖ!ÂÛ8 àmÆoϧýo=8{ÃÛ˜¼))ÁÛŒÞÆôámÃj.ÁÛóIñ—ám‹Ñ»Dð¶Êaò[ð6&DocFJð6ËÞF]o³ámL° Þf9"ÜŒáV Nb¸#ývïøÃYJAܺg”ÖâÆíÄÍ!nÜAܸ„7Ä;"’OG$7ZŠHî9Üh«¸¯Mn·TÜöÅdÉký’˜×úÌæµY޼Ö×n^›1òÚŒ‘×ú^›×¾.‹˜¶Æ·Tð’Ó¶;¸³êä´bXÆ´µ7¦nÇ´­ ÓúˆbµYаVB³Z U.`Û~úpˆø13…'¸¿Å—:ÙÔjƒ¸ðÅ=å *PÛ†a~{ƒÚX,^û8´[¼¶5\‚ùâ&Ф/®ççmÛºF8È ·˜àŒ r÷ClRÀ¶vÄ¥À–Àv?µœ\Àv×N¶µI®]À¶çy7$Û‚¦ßÕVÚ@u0Xí¶Ó9Ĭ¶Y¥Yí¶h5¹Xm‹ÕngÃnX»=eáikÜS¾¥¶Æ=¹gfXãÚØÖ¸;7]²­ä~ÀD¶åÔõÿB¶vu1²µû‘­ÍœŒlí’dfk;¶ðÆcÊ×VÈb¶ö3³-³*4V1[Û(šÙÚÑÑÌv}¨¡ØW¶ÌòÆ=¹ˆ:¼qõBöÆ=ÞŒR®¸§£MkËtvoWÜ]ó3vÅÝ9 WÜMFvÅ ›Z¹âÚÆF®¸›4ìvÅbአÏe›â®–¥ÊWë2LnkOJ]In—•*_“[{<›Ü¶6¦6¹­r;‘Ûk¯QÈgeŠ»P€jO\‡d‰»°¿K܈ÑwfÇ–¸C –¸3·§7¸­ýWéEKp»LÌunp»ÌRÏÊwê°&¹mMޏµLâ[Û:O—oØ wb×^¸“d¥öÂ}J°m/Ü©'Ä l—'g. lïº Õ.Ó÷…¶Q ¡ÚÚB—¾ Dµµô†& ²Á}X„KT[ÈÂW¡ÚåI£8»àF1Ùà>å€'RÛªò2‘#¤Å¯ä³QoñÙZ¡D«òÙå!FõÁgÛ¤7¢´¥­ýŽ)i%¥ÍÒfˆ”6IJ;ËÌ”v>FDi«ðˆ(mÆHig-d3¥m1©]Ii󘤴QOQÚJì1V¥?þ9áì|ØI—pv>ìÜK8Ûb´¶%›­¯¿§©Ó‘ÍF1±Ù2Ô”Øì¼K%'6[1Šg_lvÖJ@³Y_˜Ðl„HfçÃÂ^Ùª;¾)"³y]$³#™µÒd¶¶$§–•d¶ÅÞòWBÙy×ô¨ ì¼kJLP¶ÞGL‰ ÊΛüqeÛ1¥%”­câ›((›u!”må¨\•m1NωÊΛÕÉIJóFY$±ì¼iÖKX6J‘ÊÆÉDeã^TvÞ,Â%•ͤ²uÉXÊ$*›å@e£î„²YwBÙ£4SPÖ‡“­»ˆŒEL¶NG 2Ù¸ûb²qÃÄdçÍB\2ÙöݹE³ÆÎëw­†3‘Æf IcãFŠÆæ!Íc’ÆV=‘?‘ÆÆ%ˆÆÞ-¯µ8l<–7‡æ&oŸ8l\¯8l¯Pl´4¡Ø¬™l?iì¼rŒi,ëÆb,ë{j,Û:)ˆàe}¹Æ²¾ëƲ혒Ø˺S4–uGk>›1òYCÌg3F@›×N@[߬®ó b"´óÁ Zhã¶\wøàB å´â´…ºÞßÍÁ#AsÚíýÅ{„]ò9Ù#¨m†=ÂjêKDÛj¡Ò(©¥8%µÛªEÜÒÔn̘,©Ýô\-©-âÙ¥–rGˆÍ¶+glްíöp 9¦Íæ£9B“æƒ9¶[KLs„M-Ôæ£9B\ºÌ¢*os„¸2™#l»w"£9BÆhŽÎ2GÈÌÆP7GÈÍ2Fs„M½”Ýâ䎷E–·q'e1Ú#Ä—=BÆh1Ú#Ä—=B´Ù#dŒ¾·ƒïm„ä°¶ˆ¥?BÆè{›1ú#¸ãxÙ#Ä´HBÝù6BtHˆ ÜÏÐ!ldd{;ƺíívhÙ„lo©b´½ÍmoÇX÷¿Ý<Ö–ÿmÆè›1úßfŒ¸c¬$T æXrÀÍrtÀÍrpÀpÇXwÀÍp3FÜŒ½póg:ànf+rÀÍp3Ü Ñ7ctÀÍpÇXwÀ­/8 rÀÍp3FÜŒÑwŒu ÜŒÑ7c´ÀÍ-p3F Ü1Ö-p3F ÜŒÑ7b´Àu²d Ü1Ö-p3F ÜŒ]¸ù#-p3F Ü1Ö-p3 Ü Ñ7côÁÍ}pÇX÷Á˜|p[ŒªùàfŒ>¸-ö½=Ì>‰u#Ü<&p³p3F+ÜŒÑ wŒu/ÜŒÑ 7cô½pãòä…›Åè…;ƺnÆ^^¸ù3½p3F/ÜŒÑ w;Ò?áöÂÍb4ÃÍÍp3F7ÜˆÉ wŒu7ÜŒÑ 7ctÃÍÝp3F7Ü1ÖÝp3F7ÜŒÑ 7ctÃÍÝpÇX·ÃÇ';Ü(G;Ü Ñ7c´Ãc'SYÅ>ìp£‚²ÃÍ´ÃÍr´ÃÍr°ÃÍíp3FSÜ<Mq£œLq£œLq3FSÜŒÑ7c4ÅÍ­q3FkÜ1v0¡ýžÚvˆÑ7c´ÆÍ­qÇX·Æ­q3DkÜŒÑ7côÆcÇ'VÞ¸£7nÆè›1x㎡“ ­bôÆÍ½q³7nÆè;ÆN&´ŠÑ7côÆÍ½q3FoÜ1v2¡UŒÞ¸£7nÆè[=+ iå;ÆN&´ŒÑ7CôƪÈ7ËÑw{ƒÞ?ýßýN¸{ï“ònŸÀÝeÄ&¼•øÀ!’„·R7#áMÞùô!Ixk~äB„·àÿg›Û~ñÖ|9Å­D¼5=Ì]ÔÈx[]hêmÍ–—ÌÚ|w^ìù óÛÙZà0¿…%t˜ßβÝd~;S¯$¾ÛBÜP+w4#éßm1º!{K³‰Â$óÝxo¾[‡ë"óÝ6F$ÎßmåàŸ*¼[û±q×2âÝÜ« x·…äaK¼Ûbr&ÞŸÒ[ ïÆé„wãê„wã®ï¶rš«ðn\ƒðn¸áß+¼ë»)ºÛŠÝ]sÒÝx¦¢»Õ† rýÿz;“UK’ìŠÎó+ÞP©t½wGh"ÇLˆ¤ªRPŠ¢õÿÈÎmÜÖÚþ¢*òABFÜæÖ¸¹5ËŽí#º›6ÒÝi´ç2èn´Šèî4R˜Òt·ltª%Ý­šC˜–p7zó“îæ¯Ä»Q/ñÝȈ|7ëEÀÉx3oôWÞø€xm3àuÅ xÝÏ xUó]¿4ó]]æ»™ùnÚÈwýy˜ïºŸ›ï¦|·Ù¤ý ¾ÛLôýßdâ»ÍF7DñÝmpT|7mO¾ë ¼ "¼ëaÔx7™x7MÄ»™ñ®ÇãÝ, ñnØ„w£,»™Žx7Ê"¼›éˆw=vïú“4ÞvÞͲïf:âÝ´ïf9‰wÃF¼[3ˆýp;ÞÍdÄ»žyŒwó™w¼ß›ð®§?ãÝøà„w3ðn&#ÞÍdÄ»™Žx7º‰ðn¼áÝè²Â»™Žx7º‰ðn>“x7Óïf:âÝ´ïf9‰w3ñn¦#ÞÍrïF:âÝH&¼EÞ×'¼›Ï|âÝ,ñ®×zÆ»­ËSÒ\x·&A8ÅïzTÝ¡Ft7’îFw£Î‚»Q9ÁÝhÁÝLG¸Éàn¦#ÜÕBÖl7«G¶-&¶ÝDl÷jël7›Œl7ÊB¶ÉÄv3Ùn¦#ÛÍt¶›)ÈvÝŽB»Qg¡Ý«íD»Ñ„v3Ñnô¡ÝH'´½Dh7ÓíÆd%´-&´åÚº íf:¢Ý˜…vcŽÚÍtD»i#Ú½Ú:Úüˆv£9…v£É„vÓF´›Ï|¢ÝøP…v£— íf:¢Ý´íze´›6¢ÝØ+ íj6Ùõ~Èd÷j;¸–¯1Ùõæ×d·ÙpÛd7m$»™É®÷À&»žÎLvÍ£Lv› åLv3?’ݰ‘즉d7Š"²;M›|fºÞ¬Þá®ú>eÜ_º½ýÇOkå»—nÂ>®÷?ýßýOÛx/^yßÿô¿?ýá{Xr°7¹ _p~¯ ß…¯¿-˪ÝgÔø7β}ÅwÐs¯èù—Õƒ~ëÔJûU€çŸ?4ÿý~ áY€×_>´f¼ùó/¯øwð,ÀóÏšÿ£ÙŸxýå<žùë_)É\©•û´ oËãFj žÛZeú×OO îÚs+÷Û§Ïoÿõwmð÷oÿýöé?ú·O_›Ñ¶–Ó°ÍC¿:§ùÛsªõY›ÖÚÂs[æ¯ÏiuNÿôïÃÛ°´ÝÛñöéßRñMËí.u2v›¶6O-ã%ß’šhäüÇ·y›ÿùísûߟ\Ù­ûñwÚ×´ð£5k‰(º69íE".¹ßü˜ü‡í’û°}ÐÄÜ69—º?~ü˜ü—Ë{¯Ÿ>´›CÿCúÜ=ïgŸsîÓçîùßûœsÿˆ>wTž}N¹PŸ»ç¿\Þ{ïs÷ ¥b°ï_=¡Œãþ»yn+Œa?ÚDÖ¶KûÞf™£íQ.Ùíí9›|ûÌìlêÄliW›7÷¿’ÍòƒÙLmCÙ6…Û¶×éq8^Ù ?Z¡6¼mÙ±Ó;ÓÏçë3·:´»m{{é_ÎfÌêü,êÒì6l-‹õm^K;wÖq˜/o¦þwÏá~ýýçÏ?ÿå/?ÿòË/?þóŸþüÇù‡Öo_uQt-oºg[9ÿ¬º¸¿–·Êy ³¬ÔY×~i/Þß}{ÖŠ‘}ÃÔ ¼L—mïÇ0òÑY·}‰Vj6­Û@_"qŸuáKÔVÚwX· ¾Du‡×ÍvÖ¶4 ,h›»ºÄ=>û#rNIí'0ãB÷£uÃÝëš¾¶Fï¾DíqýY¶îK¡à·¾D ~;àK4$rkÛójdUŠaîC@UÏ„+TÙŽ× Ìx£[ÓZî*ç Ì8Pº²¼Uzí‘T×òVéq´ÀéµíîžDãMBž%»ØÃh‚`%z}ž¿8~ÏZ²ãçùËp¨Û•¾üëü¥e‡Ø±k…8`† ÊËe:õÍ*T©ÖÒ`=`ZndR¥t|À ;e‹Ö{,Ù¬ÈÜÛ®볇Ñ>ˆ—Ëvœ0­˜TQ=p÷»Ò`®|ݲU*æíyÓÊ‚óÒµVΘá Ó]Ëcå<Érîp%j夺ly¬œ'0-?`þ­îõ€ëP4dÛKuסzoçÁuÉ~\¹²+lPŠÎJ7ÛΕ+«m¥V®\ÑM6¨ç+mÛ©‘ W~”à³æÍÖ=‡¢vÐ^IÓÇ¡6pËF)ï”sÙ+òN0Àò{,lUQö±û E›´ÍÔÄU+š²í¥ºÛP,h›­» ÅDü<Œ*W<¢såêM‘Â-Åîæ&yÅËá©â”­}€åû¬¨o\¹rmWq@úÊÕCIE¨è+WjЧ/]]éŠ,z.]ÛÊ ×p*i÷‡îRR¶~CÑÈå™rÃ˪—g ×®l²òL9×®YÎ ~Cî%Ç·¡LvÀm¨½=¸gTÄØî6ät(¡» ù•­» ù ¯µ§ÛP&›à6¤[1oG.`ñyì·^C‘ìë"È@×íNC…'ºÊá~Ûà4”9íprj{Õî4äo»Lׯè ûE)—”sý¯f€u,ºörL9ׯW[÷ª=HçX{ÛQu¯!Ym­ ¯!ŠµÑ½†¢©kÛy.b› Õî;€s[eéÁC5P :BÙx ƒrÜ›iÅ˲Œ¸UQ68Ý–­{ ‰JUî§ÓÐ8Pýú\ÃzlðáHÔîNü^ƒlméºØýxÅ•ú¾ ¢¯xÚã^¨¡îŽˆ —ÕÛ+î·xî¦ðâ¹mz]ú~Ì<·¦s@HñÜM’Ùæ¹«$¥Ís[q]<·-ß¶À<·œ™Ÿ\–(·î–å–s7” ÅsŸÈ•wÕH¸Ë1ŒqçÃø—·bRôèƸm“„°1nÛ¡/ãι†0n]6˜NB‰q[ç£ì§8n¦#Ç-=g`Ò'ÇoŒK/ŒÛ f…1ní‰{ð cÜ8 ÆÍGãNS"ÞÎqKœ¸»C™ãN3c)˜ã¶gb@6Ç=} Ep}gØwº4 àÖuâ>›àÚñÖ×þϸ¾ðb€%€;ê¸r‡5¿nÂlâ·i¿nfä·öb6¿wvcó[_­{A\×Ë÷jëw< Úg†;ÝÌ~Áp}îd†uÃwêHšáŽ;Õn q›í1â›ßŽuÌoÇõwÏD·UÝ~°gtK(˜èö~øÚwüF·Õšn[:RÝŽ+#²ÝfcߎÍo› Ûæ·>ÀmÙA×w| † oë†_?ó3¼m9!¬Ð oë2!i*áí¸œû¬d·ãFõo³Û–í¹ï v›MAv;®„&»-vHb·ãÂ0´f·ñMˆÝVë“Ý:vœnËÑ– p«àú ×7^Žn+gd~[/¯Ÿ[šß¶ìn@ˆâ·a¿mÙ@áæ·nòÛZ’߯¿ÍìÈo› kŸ'¿Íä·Ñüâ·µKr›%¹!'‘èvœ¹\4ºg.4Œn£ëÝÖMá~Šit[+‘/Ñmtc¡Û*'È¡ÐíÕv|ÁçÀè¶•᱌n³œd·µãégNB·ÍtÃd-t6¡ÛqéßB¢Û×ë&³­®ê,fÛþ=¢Œ™ÙÆë³gF ;™mòU2ÛqâÚ0˜íNæ`¶ž Élk¤îOÈ6(°í¸2ì¼™mŒb¶1\‰Ù¶t y.™mM!}‹oh3¯ í¸qëfh›ùÚŽ˜ÚžkÛ˜ l£ô¶^D Øò.ÛXÃØÆd.bÛ’!,€‘mL B¶±ôy"[{3²=(omd› Bd«e"ÛZ›õ(F¶‘mTLÈ6ÖB¶ùL!ÛƒÒÌlÝ{„lkAÝ·×F¶™‘ítã)¢ˆmìDlµ‘3±m&„x7±õFÎĶ.‹H‰Øf:[kÀˆØúZ£‰­÷†F¶my…ÀƒF¶Þ§žÌÖâZf¶Y 2[ï–Íl+ŒÊÒé0mÉŠ2ÛöÖ -mb[1ƒº•‰mÚHl+Xš_Ķ⋘‹ØÎ7†Ö1±­x'}3±R1°-4Òws¶óÈžm LôïÃÀv¾ñû7°­pa}Õe`Ûž ?/Û °ÙW¶ýÄvÚ9M›Ø–]¿MÎôƒ–›Ø¶¢°§ßòWâäï»ìù­’~no}~JtÛ¦Lìׂߎ,¹ù­"%šß®¸úžü¶-Ÿú®2ð­”ßΜçßÎü(ßÎ\ÇÃu$3Ü‘ÛÑ`¸“Ýæb«0w“÷£aîæº æ†M0wït.an+ËH¿VÂÜêú}]h˜;pz¹DÛ†¹3]› s=Nž4·}ˆ›ëû±ƒ}5Î5Ò°W®¦ûÀ¹7¹»&Î%MÎGFÛ5Îõ†Ù8×›¥€º’ø ·ÜY/ÝrEnÓ-ZT×ÓpËÝ8”›ê†—³Ýrw“bRÝZ´Ã÷Án¹3"í…WîªU^xåÊ»–T×îçá•;ˆ[Ø+WЧkîÊxáš+GòpÍ9;‡kîJAØtÍŬg¬ë#áp͵‡·kî¢íƒ}sT»øæÒiBl·Ú~»rÎú:7 oe@Ç]9ç. cdÂ[«0¬Üíœ;ŠCØ97ÚEιÑ.$¼·ò±&¼uÛýÒº7Þ‹°‹îŒu—ïmROåÍ’òÞv¹ œ.ºá‹ o-±ïå½+‚ä’òVÅ.Nºƒx‰(ouzÓ’òÆ#Eyo;NÝÃAן° ïm¦,!ïmãÜcÈÛlç0ä½<’7ä­ý zž o¼AÞÚ¤`+-ʽK”·tWè€LÊ{Û¨SbÊù‰òÆ—EÊÛ†Õç™ïm~K®ÛžNëâºÑH®ÛÍÓqÝÚ‚qŠëÖ¨ŒÓáÝlàÝû¿aâÝVvŽû»5A€ ï–`(°˜ðn ò o ï¶Gf»хwëš&½„‰wëÎ#€”ðnÖx·Ù@Dwo«\^Ew«z8oÞ½]rëæ'ð‰¨îm•GŸ¨n[(l¬Ûš¼EX÷6!6š©n´ž¨îm½zQÝ*6ŽQEu£Ã‹êÆ 'ª{DIu  àCÖ-V!º1(ë¶䣰nÿºEtëS4Ñuû è¶ŠW èÖ+¥ t‹(axО+ [rX8‚Öî)¬{ƒÐX7FaÝÖ’ }t‰u{K’èF¯Ñ®%¢{¸"Ðuû?ynüHœ[:É8Nν HRábÛ±R•¯4$x‘Á•À¬€®_ê×ëàúPÜ@×›nÈφ.•>S—ô/…p9ò§®„wC —6¥p)@+žëÎ`žëO.•pÙd!…˘çz®L-\Šë†.+p*á²SšæZš6•p¥¹8—ß›džÞ) \Òà¸ôH \1×Àå#ã²Õ-§`McaܳìÖ¾u¦ö-GƒÔ¾•žnhßrÆHñ[éé~ƒø­„`…p­¢j„J°_/~Køõê·ê;'ÃõGk†ëä"+!ˆ¿•0®ù"ËZ‹áF1Sþ¾Tf¸Ñ")‹ûAù[Iã†ü­ž)ù[¹õ…ü­\÷Ìpã­ZþÖ²–¿5зü­´šBþ6Äj%;P¡2äo-Óú·Jú·TÍ ù[J|XþV^k/ù[äò½ ÷ö} w»3³‹‚ÂÈ<‘$âÛõàÙo((IVPØyœ »ýe%¡°ã^H((l¡ °ñ*Y((l┢·Í§yÓÛj"*Þ6Ü÷MokeRÑÛf#ß½MémÙˆa‰o› 7‚o¯¶ß®+ï@ߦø6mÄ·eëŒ'¾½þÜùmDÚÀMnÚpÓD€Ûl„­¸aÀMnÚpÓF€[6ª$à¦7m¸i#ÀM nÚHp¯¶NpÓF‚k› nÚHpkÜÂ%dÜ«©Ü´‘à¦7m‚›¿’à^mà¦7m ¸i"Á ›îÕÖ nÚHpÓF‚Ûl¸Ë'€›Ép#ùm˜ˆoÃD|›™ßF2âÛ‹©ÓÛ0ÞÚ$v&2Ü0ã^,ㆉ7L„¸az2Üø•÷bê7L¸a¿ ñ­M¢·a"¼½˜:» Ñm˜HnÃDp&ÂÛ‹©³Û0݆‰ä6L·a"·½˜:¶-Swµu*AÛHf‰Èl#‘m˜ˆmÃt§¶ñ¡m˜Èl/¦ŽlÃbR[›mÃDf{1ud&"Û0‘؆‰À6LäµSǵa"² ‰m˜lÃD^{1u\k“hm˜kÃV¢Ú0‘Ô^L;W¤4=9müJL&RÚ0Ò^L;£´ÑÚ$B&Ú0‘Ï^L;ס4‘Ά‰p6Ld³a"š½˜v.Ai"˜ ¹l˜ˆem–½˜®>i"” ˜lXˆdÃD"{1óû£æ“ÈÆ²a" qìÅt¬ï›B²6 Ɇ‰D6L²S²a" ql˜ÈdÃD$&Ù‹iß6ÅcÃD+“il˜c/¦}~wØŠ Il˜bÃD{1í—¾i &BØ0‘Á† öb9´Ø„,l˜È_ÃDü&Ò׋éß6Í^ÃDô&’×0¼^LÇòî°iì&RW›]ÃDæz1ï/6E\ÃBà&×0‘·†)kó}´õǤk·ã,À…»ÎºB/îÚÖ ¸ªcî:Ñ?>Üf79Š»Öªól c×¶ø¥©½f7Å{w­•þø .Ö‘«ï]fÉí0{¼” [§Q. [˺I¶F+ÁÖií°-a«„´ÍZíæmÖÚö”/I ÕÎviff*ê³F$-aÖi’ª±£ŽM–èBµãUîÀû!QkÅ GuÁ_Çð*j—Ë µ#l#àXÄóRÀ1»m;àXØB¡V±¾vlàQÄ$ì¸cñ̈;FmáŒ;&µ\Ås4âÕ0)ê˜Eõ3ì˜l ;6Èq"ÂŽ)™ÂŽÝä´#ºš‘Ì"옲{ÐU \­Bà¶¡àªCdÔ1š€V#¯ˆ9Æ‚‹¬F3‘ÕLG²ê &«™Žh5Ó­f:²UWOlÕÙpÕ€áj†b#]¨«Óí-±ª¯ ™«ZÍ:‚ŽE:»˜tL·¨"òØ¡{ÏŽ<Ï|E;t—Ö‘ÇjïGè±ÈJ¡Ç¢ˆ =ÉzLâäzÌEqè1Ǻsè±C=æ¨#=ù)ôX”S¡Ç>Ä¡Ç"?…‹t =é{,lŠ=åTì1¿vÇsY{̽H±Ç⑊=æb:ö˜â,:ôX${„srè1 qè±x˜"…È\< sC9™Kâd•!ÌQkL%qø±ãË‚YJ…ó#},* ècaSô17¥£E:…»Ø~LÅtô1µ¥ƒ9C¹ŒAæær²C‚©A¶W2¿PØ´‰Än]7Š•+ˆXl4%©@sžhlÚˆcsž0lÌkâ°1} Äf6$±UtêÅÖê€úd±™Ž06ó#ÍrÇf:òX_U6²ˆÈF„dceD&oTP6‹B*›$–MÛƒËfá f£ADf³bD³WÛE« A8«AÑÙx¤ðl4–øl>“€6"‹ÐÆj=é™d´±Èd*')m¦‹d\±g2Ù‚ÌWáEjµ­ˆd*JD ‹G2Y˜Lo/"©¥#™6n=™ ¯dQx ‹œ€,rb2çFdOt2mCø@áu.Âz¦"]l>ˆü$|6 h‘NÂúzBø ê'ᇈ³ðL踘Ö=x·#t\éÄ#¥{ æÂ‘„Ü*ã­|Ÿ€í¯?ÝÞþã§½úÎ>¾µ_Úk¹ÿñÿlÛ´Ê®Ùúߟþð=¸¸½ÞãM¾¹ã|sß-Êw!êo˳ª÷™uþó<êÖ³¦¯?ë:Âoÿ²¿r_öλÍ4gÝŸþÐüïíýxã¯?o¼ÿÿmÿÌýþ§ÍûÞÞÏÜŸ~'ÿÇý+åXîÆ–cjcöR[þù>âmkè_?=űÿqhÿÍ­ÐoŸ>¿ý×ßµ¹úïßþûíÓþôoŸ¾2ŸõÖÖàóÒ6¤K«ìWç4GNSiµUêÔö _ŸÓêœþé߇·ÒM·Oø¦¶ÍM3.1ñ6µí`«ð%ßf[Úã9þç·?½ýËÛþolÿ›]ß¡û´ðwÚs*øÈšõÜú>jFÚ‹]r¿ÿø1ùÛ%÷»*ÃGÌÆmƒy©ûãÇɹ¼÷úéƒ;úØõ½î‘û³ÛEþÓï%¸w¼Èÿ#zÞclyv=çÿA}ïQ‚åúþ{ï»Ï/%5³íüÒf´pkûóVsuîóÖv(Óu^»½='—ož>#›¥âˆ´:´itÿ+Ù,?˜ÍTaNÆqÛÚìšÍñÊføÑê fZ6Ó;«žÏÖg>*@N[ìmCøålƬÎßÈb©ÀLÃвx›ë ~oÛñq˜//¦þwÏà~ýýçÏ?ÿå/?ÿòË/?þóŸö?þØòâí«.fÖ]Ì~–¿M£äD´êÊZùu¾ìºq¼O-+QÙ^N)ç±Ç2Q«¬lý }Þ¨v×FcœpÍ+åàöñl*N9®¦·ÏhÈ!Ú÷þ1ŽÈ\6œ_ŒQ¶—ÊëtÉ\ö»{Ê‹I¬Ä•i'Yú1TÙÂ?Eyàð¸Üq Ýyàvu¸ÜìåŸÒ™«|WöòOé dÒUäc²O/»3mwŇ:x˜ìäXÌ9y©}ÝÉ«m ¶¶—ÊrÏ Q&÷cç!éΫ³eë'¡«Z¾)ýl|£òÞQ¾)ý¸JÚ©5>tˆŠˆÓ}ŽŽŠ—ÔO(#݈ӃL7éY„ñ¨xh][E1žÛBé®m–ÁÉ "÷”­K\<œ±ê× ×μú¨ µçƒó´ƒ§þ3µCáÆ³¸° Tž:غ‹à83ðý1Œð䉯&ž´+rÏ1 æVÙð¥ƒty'F.Ô,¥6p +i¬:ü„JïÃ÷qÿ±¸¥ÃÖý7¨lˆäR—œBw\úÐ]¶Ê.YË¢Ÿ÷¼ X­E±‚µ_í.[÷D‹&gúŠH¨á¨îJeNDÇj¦®ôØÛ¿l[w,ÍzíÝý˪e‚{†t@QRC#ex:ºê9~f ]ý üŸéh#hw¼²>hÙà!Áìã>bŸýSjG ºöá´gµ0p”BAå0zޤåŽÔ§ä²ÁAŠ©G9¨> —ÍcÚ)s•:³‰º-:I[â¸ûÕž=Ô…¯% …lØå«ÛRiYóÇAKȳ<{\…CL¥l’a³Ì¸ÕJG¹!§VÐ1ïP›“ÂJk4ug½BµŠ.í›·V-¦Û+—\´Ç2RôCí_ãx9^[³A(%9[&–\è¡öjŽZòõm¤T^Žeû’ÒŠGë-_ ÇMÇr“Hi©-˜(k&M˜c,:„ÅÖ±ŽXºtÍÔxäD•Bsý²”Xû6¬öƒ5Ú±bÏFl6(”-?ʹäÔ ¤ Ö±îRïbÝ å•M¹Ý(.,µ¢²TÂ#·}~«%ô²£b½Þþ –¦ñÈ™z¾Bkÿ£äf²7‹åÁM¢qáéR”ºÝùÐÃúº¯bp­ ùÎf¨O%F¬KÝõšqrDÕ~U[ìÌ›q¦òdhÁõR •ªøùØ-ª@›fšqÉ>º(t.ÉÞÝKú‰ ½—šqpì?8ÝRvU k(îƒì†K Í6a½zI؆‚Ö4ót,ׄ‹ƒîÁÿ¬‹µ¯£íׯý­û’5¿Å:Rn}gš‡ý”bτǔ’‡[y‰¶ÆY—}¼uºqñ-nÔ ÐZè2ö7ãHt·NA¬Ö:Ëz{§¯NÒ6ŸÊA²ºÀ°];UÅ>a@0Õ² ­xs_+âú¾5c«XkŸy˜/a¸žÇ*úL¿ïÚç+Öõ߸íi»ç°u9L½½‚Ç á¶¿0ŽºnKÏäB¸åvK6J„[îº$™D¸ÛÖÝ>áÖ¢ Ž^B¸ÛJ4d†»3&ÃÝ–ß=x0àm[P""Ùíý:èr‚b¢Û¶@=Љnë†é£mEm «C$RÔ6m¤¶µNF`!QÛf &µmko2µÝ ‡ŠÚÖ}Ûs»`hÛLtõ´m»Ê›>¡í6ˆÃ Ún7¹Ý Ú6Ö†¶uø,Ìlk›ÒûŒ™ízð»5³m6Lf¶Ís3Ûfƒë¶™mÛhá6‡™íºóŽ™íºóꂘmÝÃÆŽà¶®ô³ƒÛº¯Í„À­o(Ü®ƒ|ÜZÒà6ŸIpkÑIƒ[ YÜ®’f4¸µ ¥Áí*ý>ƒÛU ‘Op[w;@«ÄoW ½™ß®«°¿ø­îýßÖ¿Ëß6O'„oëZ?È(n>Sz #É™â®h¸¤¸Í†Þ7Ÿ Š[lˆ„7ŸHŠ».:·Åm¶¤CwÏ%!n”D$·%Û;Dr£("¹™Ž$·ŠÒ©‹hî:ë4G47Iš»boÙinû•G6¢¹+nã'ͧ ç®óëpH$·‡M¦Hn]±ÂžU$·ÙCÌ$wFÎ$·Š=²Hî:QlÁ$7ËB’[épE’ÛLan[Ò—‘‚47šW47ª$š¥Ò]Æ–4ÒÍgéV:Õ éf:"ÝLG¤›å$×ÍtàºYÀÝuЉ·àn$Ü]lØÌv3Ùn$#ß-Èâ»ÍÖ7Æ»QáÝHFº›ÉHwëê3–ñB¼™îŽxïw¥ûƒ¯«Œw¨Œ×O ÆÛŒS×·Æ«VÄ{IHÄëšâ­”ýä>8殺ä¼5#ô½HpÞKž¶ú˜”œ÷’Žœ7 öfö®¸ÀÞlÁÞKJéáfž„½—”„½—< {›ñvÍ콤$콤|ÀÞ|U‚»¸ìÍ&콤$ìÍê ö¶” ÷’ˆ —‰y«ÒÝÕ% 樓„¼—”„¼ë;†¼Ù΄¼%¿Ð÷áAy3¡(oU°;æåÍŠˆò¢iDx/‰¤Kš%ὤ$áͦ⽤$â½äIÄ›¯cº¼¢ï»ú­Ê~á´»t?Ó‹Ó.c•øz‡eà»N}iÀ×_œoÚ|Ûž0SÀ·¶ÅýPÞÀW{ó^ïÍ{×Õ<Ø7½•Á}³¿iSÌmMÛž F¦¿¥ØÒ—ަ¿ËJÉÓß* âa‰þ–T ^¬èï2Mþ¶&A¾èok1¹“þΫ݇ôwÞ93›þ.7îÜL—‰!}MÛFŸ\¿µÑ‡—ðoíæ%…K çõk_EÏO ~í_nðÛJAß?ß–Q ~‹ÎÀ½‡à· ¤­¿- ~ç•AÑ ~§CMà7²øµ‹¼Áo©8J üf~¿í™Ó(3_üLÜ;o{‹‰Ñט¸·½3ºd=poñ&& îm] 'ãÞyoî'^v0ïm… ‘xo«ØÓùS¨w„·…z[V=ƒ‰z[1äõΫ|2…zK o_¨÷%Š*È›ä'ž‡ò–ÌÙ— oÉœ¡' òÎcE™òÖ7 ê$ÊÛú}ÆEy‹šÂ!L”·î#Óù–”7º0oË€¾œÂ¼í/5¼%‡qç‰y뛄«´0o%Xæmɉ …yËFªIÖc€Xok:Y‹õÞWYð&ë¯S¬·š#£Xï4q¢3ëõݳÞèÉd½µÞ뤔¨×7hŒzk} P ÔÛ BÔ$Ôë»CF½wUB€Y²^ß2ë¶¾ôHØ›ùöZ¾Ä°7Z°·º3pÁ^ª>a¯ey {­'lÚÛšcÆAŸhï´ëüA´wååJØÛ²ã4(ØÛ²ƒO»Xo½ìŒ„z­iÔk­b£Þ¬QotV¢Þ($€¯U› |ë+Ű,àÛ2ÃÝÀwåf,à;ͺl à•ð"à;î:fðõÕ3sßøpÄ}[ý°E7÷­IŽÁîÛ: gDqßSMÛÈ·Fô/9ð¶Lèø+ä ,䟆¯ï ù–t+CÈWÊ»&¾%³Š/JÄ×_”€¯•*|¥®%Þ[#æ{ñ^ “›÷ZÊËÀ×eïµ”oŒñ¾…WðI‰øZ)ÎÄתáF¾5®âꆯÅX|£û?o½3ê ùz<äk-µ@¾ÖYä[Í/[Ò^4{ÛÂqì;µÀ½= Ü«>´×_oÐ^+ZíõË3íµ0ši¯'Ì ½Ö? Úka± ½#ioåIlÑÞºÑÚý-‚öFÛ öZ¶/`¯G¬€½VŽ Øë^°×"‰'ìõ`°7Ë)Øëá `ov+Á^ÏÌ{}6ˆ¯Fо­ö¸[Ô7UÔ7›FÔ7;–¨¯tõ }½î2ôͦôµ{@_«îôu䆀¾¾~ä×c__Köù½I~½< òïRà×7ÖübœŽMÿB÷ ì endstream endobj 3 0 obj 203286 endobj 4 0 obj << /Length 5 0 R /Filter /FlateDecode >> stream xœÅ½K$Iv¤»Ï_Kí¥öÀ€$³˜™ÚD/Šì"ng6ú Ü¿Ï1w3•ONDufÔt †ƒ®t µ—««‰}zLôùŸ_þß/ëëxLmY¦ö2ÄÿË®ó¼Ïó˸,/Ó2¾nã2·cÚ_~þöe<ÿèýË—áå—/ÿ6Æßÿ{üû?âŸÿúezù¾ ¯ëÒ²ñÿú?¾ìÛöºÇqlm}Ù·áußÛ0Ûöò-´ýuCkãô²¯ók›ãïöa~ùÚñºL¡Mëþ²·ýu^†!þ{Mm^§øÓc˜æÐZh¹ývœÚø:Ä?c¿Ghãë< ðÍÓSÛ[hmn¡Å±¬Ëc#©M¯ëÚtŒ/û²¿nûrló¸œÚüºl¡ ËZœÐÖΗ×iñ¾¬¯ËÒú®Úëã¬ã¨ö%þ±ø ÇS;B<·¾/óë±-ýð׸ڡ Û)­±ÅiYÆSÚâÅ?ÜÛô:L­Kr}öøÂö£ ãÑ'v¼Ž×Vöex]b›÷…<^5´ñØŠv ¯Û6<6“Û¼¯jjãk»NhŸ×ùºª©M¯óZl+´¸[?Îc~ͯcœ¦ñÔvùRåu‡Gߨç _Nhûõ=f»¼ÐçFRk¯k\–aÙ÷³ÝßìyB©å—2œ'j[ô”û8·×©Õ=í¯C»ºë¼¾­Ýß[hûzuWßÓñºnWwvË£Oæ™ÅÅx]ö«»†6÷Õ:†ñ5{ù£»r›ñ÷ýO÷¹½¶ã¾Z©ãÕ]C›û7~ óë6]Ý5ö7çW—–ÚÒeåXä—”Û\®®‘Züi»úlhÇõ{xhÇúì³óaéR\óýÙg£Õýëì)íý„Êĸúl¶îïûãÇ?^}6´5¾¡Ü~JÑK§«ËÆ‘ÜÏCÛç»Ë²Yüô—»Ç¶û˜ÒÜûšŸÀ?ÿõî°q]ûäã¿Ývé}ã¡ÅvuXùÃÔÖ×õ¸;ìòz\½7µøý?®Ô|%Ç¿ü{xõÅ/ÿ^g¬Úq¯¡Ý_nhSüòïá5ph£Mmìçy^âs(<¦©~¦ùu¼GVi2÷NäG7-Ñmî^Ú^ÇùyÓH-w/µvñûGV»SüÞÇÞK‡c¹¾ä)~ó×ÀêdŒŠ›Úë1Üãq\¾><§v–¡ÍCÿõxߘã'¿I/]®ßqjS¿SøþBÛûÀ:GwîCEѮäGî~”~zkJ­ßË)Ä/¾«v(kÝ?ŸÔâ'ßüǘݚtS=†¸}ôQ5L@Ü/®ßzÜ’Æ>ªr?¡}Ts0ÏOíÒ?eGqÏ\ûpÊ“[íÒ‡Sžì}¬§Ö.~}8åîâg¾¼ÓOÃ,¬}4µsÞ¢ë\ýÔNy­\ÕäˆË-õþÆÚÑGSv¸pAÛ(U2ÜSëÃ)÷צ> Ù‰·ø½/ïôÓÐŽ>ž²Yüæu8ÅQÆo~“~:?ͩŠn—~ª×²maf¥Ÿ>úH{ãwq©vùÚ!.54±yažÄ¥Ú·µŽâRy®ë$&Õ%´nRïcwÜí©Ä"öÔÆòµ‰Gµ/c]Å£Ú®bQó‚_W8µ­[T?ö½[Tû­‡XTk£Ž¤Ú.´CGRT¶Qª÷6©CeoÚfu¨¶»E*w×Ô âr…´ë8ªÝg[ÕŸ^ßg<-:‚ê€ES«éPgZ4q¦tgñ°$δéƒÍDâLq¶ñ<$Æ”)ž‡FIµS…vèHº ·>âiÓ‘TO:ŠšŽ¤úeÆC‘Sk·©1µvŒ©žÝ.¾_e<5HõÜâ‘Hl)#‰Ä–ò¼I}©msR_Êë|ÌjLm›‹:SGSgÊ®Dƒ¥h·ª3å€K«¥ØæþZIñD4¿3¤æ©5ŸyЇª÷_žâ¨æ×2/¿ºSœ\ˆ³zTÜDB\Ô¤ÎñS»Ÿ™NQ¬êó'7u©S·#§¸ªMõ nêS]ÜŨÆÙï÷“dh‡XÕªÁ«Ê˜=ÆÀE³JqT·jW&žŽÄ®Ný|гúU»!°>/[<7‰Wõ6MͪçJ»Š.Gð«<‰†.ž¡¶V¿‡xDj:à¢3Å3’¸Ö8‰i¹ ~ˆ#m+:pˆð­8ÎxŽÚtÔÅÅ ìózÆÃÒWe‰èzÉâiIÌ«]ìx\âªÄÍp}ãPVõ¯}o1D_™—q¯W2—Ô»òH1¯ Û–ËhŽÑ¥£ÅðÓö©6 Qì«]êx˜Š¾áxã—,Ö[ÆnâÊ´uxã²åDG^ô´ó¹<¿ÐñkšwX{q}⹩ÅõYÖ ×4™`aÑÓâÑiŒ,ãó—{µ9¼kýïÐÜáeŒÿwÑÜNrG#¹Ã¡Ü=ò¾ÔD¹aV•å†É½ï“ŽrÃ[ß&Pîi§—Ëßå¦ ï0‚(7µN–ÈsÃÙ |!Ï m[–뺓çæÁÞ)£òÜäñÊyî}W+–:ÈõünšË“þ¤«çN¤KÚðÝH×8õ£¬O2dºì¯dºÜ™îõ ï4wšîÉ<£¹F¿›æòæÊͦà\½ŽÄ¹„³ ¹|„¾h.¿èïÆ¹†=¿çâø~€éŠýû¦Ë«H¦{ƒåïÆ¹ì†sq€Ä¹8>à\žòav¸»O£¹üÎHsq sa*æ²™Á\í7¤¹D%NsõÄIsGÍÕîýdºØ1]phcº ”éê6€éb“Ætu v²‹Ýìò‹q²«ß5É®aiC» Šv M íê€J´‹Ç¬‚võ;#Úå‘íâ1ÊØ®‘P°]cÂ`»¼*„»¶M£»8Ð];wc¼Ú#:ã}ò`Ð]<ÝåvHw‰óHw¥ ¾wµg9ÞÕëD¼;õûöxÛÞå8H¼ËkH¼kç§x—_ ø®]1¼ú[tÀ È ÀË£tÀ ¨ú#€W«^½bxõÔ xypÀ‹ã4À í¼8^ÇŸxIbð‚Ë8à•Aϯá¼Ã5´KJ ÀÛÛÚeC»$ÛŠv ¥ÚåiÚ%øÚ´R®¢]‚O ]ßìÌR|IŽv¹Ù…Åܬ^"W¼Ü¬^wÆZ¼ø6ðb*á‡/Aç÷ÞüÇÍ­*à%76ÌKðê˜Gk˜÷"¹xñ59æ%ˆVÌëLù€©%5Ìëâ¡c/!¨a^|UļEó:æ­"0/~­Žyy*†yÑ{ÿ ìÅŸ~öƒ½K‡A{ÓõžCØš\ÂÞtRÆ Ø»‰/°÷`Á,`ïÑ{B½G\)Ðì½êr…ñæÓƒ:oÜ•7+ãŸFgc`¼küñ†4Þô‹„7¤¡3'ÞuëÿpšVƒð¦Öñ o>(UÄ›šðr ÞÐÖ^¥GÄZëEŽD¼©unCÄ»Šîˆ×?UÄ[µîœC›·JwócáiÀ»®)ÞuMñnÕºimê“Ä»¡ ²'ÞõvŠw½âݪݦ9¥ñ®}ãuI¯kÊxMã M&y«ÖM³k y]SÊ›OºÂu€y½bÞªuÓìÛTÌëÚ‰yýCż®)æMM¿yMæuM1¯kŠy]SÌ[µî™]SÌÚ®pU1¯kŠy]Ìë’`Þ*uÏìšb^Ó€y]SØÚ&t °×Û)ìuMa¯k {} {]SØ[µî”]{Ð^ÿTi¯kŠ{Mî­Z7É®)ïuM¯k |SS «À×5¾Uë&Ù5ž® öuI°¯KŠ}«Öý±iྮ)÷uMÁ¯k ~«vè0 MɯkJ~]Sòëš’_×Þøs%¿®)ù5 ä×5%¿«|óŽ~«¶ë MѯkŠ~]Sô뚢ߪ:ÈBôë’ _—ýšô[µCYhŠ~]Sô뚢_×ýº¦è·j»²Ðýº¦è×µúõOWm×AV5`×»¦Ø5ÀU;Æwú,°k €]SìšàªëÛ}V°K €M^“óݳ]à¿&)þ-Ò®#¬J ý8úXm¦ì×$E¿E‚‰Ui£‡UiZXýì ƒí’QߪÁÁBé`¡Mt°Ðf:Xh ¬ip°Ð,´•ÚF m‡ƒ…tÀÁš« ¯k#,´‰ÚLk,´…Z£ƒ…¶ÒÁBÛè`Mƒƒ…¶_Ÿt°ªôº6ÒÁš m¢ƒ…6ÓÁBSÆëZ£ƒ…¦„·jp°Ð”òº¶ÃÁB:à`Uâ­Úñöàj€×µ‰šâ]×:XÓ:Üu­ÑÁBS´ëÚF MÁ®k¾©ÁÝA*y÷—ùez™$ša‹ê“ûÄ» ˆwÔye«ç?”÷çYÏKRGÄ»†[“v@¼ñ¥No”òZé0èî¨u‡¥”Wj¼ˆyãQ‘‚yÛA°-˜7>&-pVÎÏ1·érÐÏ?Mzã¹é轡iy0@ok€Ç½ñœ&Œ’ 74”+èm³:‚^×ôƳæÞ§ÉzC[úÜÆzãÓû‹rÐOµPÐ"”ö†v?I:íͧèAÚÚÝö¶EÀJ{›<9íuMio<íß÷[§½¡­}&ƒ´7´RÊ»Œ€Ìë’b^† ó.ò‡Žy]SÌË|»9éÝR^×”ò&N铤¼¡!]A)ï²i?)oh«Rl¥¼Iv åÿ ”7äv\µî‘CÓBBPÞÐ4À@!ï²1ÌA!ï²2Á@!ohZ€È›í¤, 7)˜ò†¦%4€¼® äMIK•ò2f‡×·¨7´g½®âÝøTKƒ€wCÓ&àݪusšVÉï. eCÀ»¡I1ñnRH)ÞÞõm*ÞÍmöYë ï.‹Î¼ï.òõ9ÞM­¿©A¼k{Þ MËÙ€wmÀ»K»¨Ó]“î.M+`wã÷8åp×5…»® ÜuIà®K wó°Æ^U¸›íºƒ Üe”á®k w]S¸[µw]S¸kýpw[àp×5…»®=þ<¾!©ô×ê®p7´Unõ€»¡Ý}Þá®·S¸ëíîf»7Ââãåè÷p]ßœrÝÐæLE®ëÛT®›Z¯×uI¸®oQ¹®5×­š¼€¶¨Û%×õvÊu]S®ëÛT®»ñq®›íÄÛëf»¡Oºëº¦\×÷÷àºÞB¹nÕĵ}Xt®›9vû]\A®k¿Bp]×”ëÚU×µŸ¸nþäôX”ëÚÏ\×Û)×õvÂu½™p]—”ëÚ\×Î`·ÿˆuñ±xÖþ±Â\ùX9®ýÆrm$ɵ^”ëÛT–kýós½Ò\9†sù 2ž»,ý.[€.³Ft‹¨H·2]v`ƒº¥¥RÝÒR±n9 åºLk$Øå×D²[ö¨h×l·ˆ w‹¨t×Ox·´T¾[D¼~* ¼wÏ7º[ŽEñ®÷,ðÝ"î—‡åÇJxÙñ ñQ¯‡€¼SŒò–Í*æõÓç-¢‚Þ²Y%½åhõú—ÖëýXaoÙªÐ^?Và^ß(x¯Ÿ€¯ïÄ·´Täë˜oP3}Ëf­-.°o9Ïæ–WHÁ¯ÿvæÃï}A¿ãË/_†—‰íí±½ø&⣩ç~}üç6=êÄãÿù__~ÿÈ÷xAñôNñÛó!²ýƒ;Íü¦§ý×Þé8%t{žëõA_ ø«Á6Üû?ÿóS÷ÿõÏ=ÿó¢?¿øë[üœoàÚÿùŸŸº÷ÇE¿:þóoÁc›¿üÊ‘lcÆ+oû6/ûòÒ¶ …>Ém̓úÇŸžà7Æÿ-qà/?}{ù·¿‰ìo_þýå§ýòO?}ïžæ0ìÓ¶nÛGøÝ{Z>°§pæa§—}Œaøû÷´rOÿÏãKx¼üôû9íÍsܪZÎ5Ž"n+m*û]ó;ÛcÏüï/ß^þáeýo/ˆÿYx¾Ÿ2b÷Å_½ë>ï Ÿ:bë™Æ£agFŸv“zLÔ”ý?§o>寭îüðôóÞ¤÷åó|úIGÐê÷ŸŸ}n—Ï ¸Û;~Jï;÷~õ>îÿ“zßyÞÇýJï;G™«÷aÿŸÕûÎ#hõûï½ï¼×$ÚÜ¿÷^³žsö{ì'njm’-[<²Ìõ7¼<ï3?|'å^ÚþqG‹êþ+{i¿m/sBŸ)n£ûTo›ãqíeü'Ϲ¯Û2Æ^æ7\AßÍo;›%« æuØöx>}/“ŸÌ_ØÃš%C[[^â9>¿üqÆ¥|-ù?çþã—ÿüöíwþóï~þùçß}[ÿø§?ü6›ñò]o•æÀûº'ÛQ$¡¿Þ£,·"¼ÚøºÌÑpÞ²¨½nîsLZo„€½ó½Üó·õ¨Ë‘9ÔS”Å`}¹.ÄŒ~ºM›4õ.ĦS8{g.§¸Æóÿ h6 þ qÓIœ•/‡xŒ7 Áü`ˆ»LãLqpò¢òq HP^Mc{¯ó|¿r•SBß]æv¿r½jÉÉxþXzry 1ƒ3îW®±zGˆ —‡9îiÜÐÚëãëK Wˆ+%ÏîÃÝ_o\/ZÙâ¦-¾Ù¸&³¼q­/Ç¥½RZô›ŠÏ_‡õêÄ󦓢¤µL:eâøºîW/q\ZßÛ8i^˨X(Ä8†áêËñ„+o‡¸hbË4šâу} :Ħ™-#ØÕ8®Ñ™®¾û”ˆÊ7™Ü)?Þ6N2Íõ{nËÐ'ANíèó;ñ+e’ãH== úuP q” žh(oÑ…ƒÄͳd¤×…(•ƒ)JáÛ)î7k ññq|¾ÈtO–®HIÜß_—3F#y!6ÄUf}â²tRˆ1>ܘqÚôí¶Sìó>ñ±WÝ„¸GǺºñ´ñ§ü@RÓªxÑ¡âÞC±ŸCt…>õ'´jG ñ¸ÇâiÅè8Îeš-› ccÆLÜ#r´|^Î3ãîê¿ññ(áÀc®ÿrÅYB$ £†Ý×½Åî±Xö¶ÉtÏ´h(èxâ{žZ/½8Å£O÷$¦o2$c¸FáÐt\ i¿áІ>Óâ(³=!ÊDoˆ1$܃ð´èòP!Î2ÛãßQ‚ñ{Ž–=ÒõÔúdOsép‘Õ:÷ <-}šf•+7»ùƒã‡3ëo>n“}Â'w©ãLˆÇ=çYN2‡CÎ Ÿ¬GÓ¿ÅhpÁÓ¬Õ’!Ž2áãÇ·óñƒ£¥,ÕuŠ}§ˆ±›>Ïšiâ">9û£Ãlk2áSNe• ŸÛ*÷š¶É„Ï4i%ó)ö Ÿ¬æ[0†áé>¹ÙǧGŸéñS8ýÓÕ…'IO i”‰ž2Õd„Ø'z¼á$ó<~r¹æ_{'­ q‘yžs –ñ>`ŸçñËbŸçñž¸Jq¶”UÎBÜdž'Ïd—›w8Òþ¬¢¬Jâ!ó<¾ÙdòñY/}ÍÄg‘æ!?Þm#·6© q'-¥Â5ÄYüpŠêu¶Eü°_´».-›øa¿.Û*~ø¼×ŠßÛ6ñÃåºì⇧Q‹nO±ûáiÒÌááþÚ÷¡[a¿¨û(V8/ªÞ\÷I¬ð4i¹ñ)v+\ZŠgLÖ3‰B…½e+ìW|_Å OƒÖ߆¸‰ö‹b÷ÂY5¬÷õ}/œW|“‘1κv1Φ†‹dÝû!†Øm°6šÄûWÏdÝgOZån~,ê€GMÅ ±‰ÎÝ r‹=š8àq•Ç*X ©OqSl_ß±«ñ¤×_ ð(A¡§vû_Ûh¥ø_Ì¡…6ŠN7Ü‹CœÔc‡Ó0«ÿåp2 ‹úß*Šÿu±© ðè4 «š`o¹© ö–»šà*Š ð\ç¨&˜=ä9/™—MrΦx ìâ¤&˜¿¤ÅúR@ˆ³š`ßì¢&xÐ’ü›ZaWµÂ~@«Zaßç¦VØ[îjˆ½å¡†Ø¾ÈiCl߆tÂÔ4z/5qÄÂÞ¦óNùNWNKÖ‡bßꢆx@JÜ”¾»ÅÞ²©#vqUG< ëmʇÔñ½®œOŒ:³å¡–xxf½åÇb†­M<³m­^±x k:³Í¤føÞÏ!xõßóÅ“îSlãЙ@/ýãÐÜÔ{ËUmp?”M °·ÙÕû¹íj€]<ÄÇ*þ›NÜÕ{..£ø`û9äÊË„M#ì[Õ ÛYfýç$]—-›8ár"«8á7DqÂB#§|JGaîsW'ì-1kcûÌ×ÚvéÀÚè >ö²ëµþø_xÇÕ€sN_.²Îýš˜gÄ9ßk VîœuÜr«!wÞä½¹¯Î·ýÙJœãcYKň³šæ¯Nœõ5Þ¯NœÓ‹ ² q¯É ÎÛªo8’8‡7-áÆJœCÄÃ<ˆs*€5Ç#¬Xsˆ—³fŽÏáž™ó!MŸèž˜9>–7÷ 3‡¨”9ßž1˜”¹ˆJ™Þi”™éF™· ñ´¤ÌÛˆXSRæå•/£Ìùä«ð”¹´TÊ"iºPæmÔ7±H™]fÞ}UÀ0³.õÕ1si©˜yP^JÌÌðScÍETÖÌhTcÍùÚŽÔ»’5§8(1Wּʋ”_5¯YÉ.þDÍk¿¾_43ÀHób7ÙLw5ÒÌxW#ÍETÞ\DåÍëι ðæh9(Îo.¢RçØ¬¼´oÔ9DÌ´(uf,-©ó*¤_;g0m_Ö°³°3rk:§¦(Ô¹bç̵UüìÌÐ[ãÎeŸÊQkÜ™™¸Æ‹øàÎ̵5îì{wfÊ®qgÆ_wf&®qçr@ÊkܹìS¹s•;3ÕÕ¸3Ãn;—:*#Ñ¡ÄϾGðç7DF#*¦€.»œ-#q¬ ÐešY ×•³‘ÐŒ‚!.[Ý™/í ˜QÏyògÛø³Ÿøsˆ³Ç§ tUè³oô¹ˆJŸS”§\âç"*~.›UüÌ’ ?§(ÏÄÏå4?¯ åDÄÏeŸŠŸË>BûfŸÚ7]D…Ы¼áúÕQtÙ›¢è"*Š.¢¢èܧ¬µC]Z*N§'H‚@ºœŠé"*^0€´i?éܨÔH—=*.¢é²YÒ~–Òe³ ¤ýÊH—– ¤Ë>HûÑHqôÎú±Wp4}ÑK¡%Ȥ–B|X-4ŸÝ­"z-ðYø‚’1’éx2’Ê/âi®Ånxš‹ªžÎyþ"ž^ë3ñÝé³»¿•æ²2¤ÒîË@¥Ý[‚J»Oœæêõ§×Yóü N¯ k§×…5€Óùt«¥/€Ó̶48á–úœ B_âý¯N¨ÏgxÝçƒP·Ã ¯j+°'¢nf²‰¨' ÖYuˆÏH ÒéØ$Sqz±àt[ɧÛJ„ 8ÝV`M°éL±R¼ 6ŒF™²éÖ€×MG3p  iæ~šn3Ñд½ì@4â¨@ÓÌ54Í€OCÓ)êOh:Ä«®Z¡tœW¥jPºM||Pél¢ÅøÀÒšˆöÕ±tË®,?$`iÆ–.-K3ÖɰtPz *]5I¾9X) (]G¡tO-”qzJ»¦PÚP𣥋¨Pš‰ F¥—Ê•J‡É¸fì¤õseÑöYtšPleÑL 5ÍÄPcш÷¼QôÙþ=ÍNCÑWú¥Ah|,97k뀟Àiø9³(uÞø¹ˆŠŸËá+~f€¤ág&O~f¸ ñ3ƒ3‰Ÿœfø™Y]†Ÿ{ÉóÒ8= òl¡1$Ï–ECòÌ8/#ÏHÉ4ðl‘;$Ï–®Dò?5ñxf$Û1i­Ès´ÄT!Ès^:ýò\HÉs^!õiJžs—:¨äÙ÷òì¯ ‚<Ç1s òC4¦É@žËñ(yÎUÔyöKòœ[Z ò"LÈsŠZò\®òçh‰Y­'öÓö½?çéë­ü9óru^ :6‹©9èeä\tÞOõŽ]ö©ú ð•_ô2p^º\%Ðþ’©hÛ%´í‘zaYI Ïè¾J C”91èЈ®@/&  3èZtnVko ÃT9cîv8÷‰"jЉ–7@ÏaÜ@ÏQ"ô2p::S¡O@ç9¼  çƒÈºìStÙ§bè¡t¶Œ3ƒÒv”€Ò¥¥Biû2À¤KC0i;HEÒ¥¡"io¨Dº4‘¶† ¤mZ“@Ú¯Žio"m_/ˆ´*ˆtÙ£éeFB#‘tÙ§"éeÞ!’ö.&]ŽV™t•I— ¤Lº´T&íçùdÒÞLÚÏLÚ¯*˜´'˜´_0érœÊ¤Ë)“.¢2éržÊ¤mŸLº´<¸@ŽVÁ´7T0íÇ 0í½`Ú¯,Àti©dÚÏdº’é7ÄCÇdîSÉt¹>;(â©4ÈZqA4íûl¥Ðâcq£TGÿXDôp>Z¼•­ÅI3ç%Ñèh)‰Ö‰cÒ2[¬&z@¦C©‰^5ËHš5µ†¤w­à±Òhߣi,BoDú #‘>uL$½“.I‡$Íó ‘¶Z]i›@ ¶ é¼JH¤ãñä@ºˆ(ŒÞ°Ü]½+°0ÚÚ°0ÚkŸ­0šÙÒßU½T°0ºŠR½ñÁš•Ñ.¢2ÚE+f 5J£«ØK£7>w¡6ÚÚ±6Ú²6Ú[¢6z#cm´oµÑ.¢6ÚªÃY½þJlGQ!íG‹ io‰ iÒö)+¤]D…ôʇ/VHÛ¥a…4‹êY í[E´*¤W<(J—†¨öcU(]Z*”ö&m’2é*uóí§$ ’IÛéƒH—†J¤½¡i×H»¦<Ú5ÅÑ~úJ£Ëé+n+ŸÙŸ4ÚÛ€Fû×mG ]¶ª0ÚN,Ú7ª(Ú5%Ñ®)ˆvM9t9PåÐÞP1´Ÿ„Phߦ¢è¢)жm‚D{'‰ö† ¢ý{ˆ.[U]Z*.¢Òè"*¶« ]ŽGa´e²èò‘8aßPt•EQY´>P´ß^€¢} Šö{,PtöXeEÑ~§Š.ûT]Z ŠöãQíÞ(Ú;8P´PtÙ¥¢èÒRQti©,ÚÇ0°èÒRatÙçFGÌ– £Ë>:b’ÕárÄüx¤#vq_ß³ÀÑEœéˆq†àÑ~Òö+.»ÒSÜø¾àûDÚ~ëÒ~8£m›àÑlFm§O"9­òè·€G—–Ê£KKåÑ6¨’G—–-ñû<º´Üi‰)´Ä¤²ÃÓã¤ýH¤ß“)δÄzbP£'~I¿!¿Ϥ‹¸Ó“Ê4Åĵ\1±ê[ì|ñûPÚ/ tã÷¡ô"œ1h¥5~Jq§9æÅ;èŽß‡Ò.>¡´oPÚÞ‹&”.-gÚã÷¡t9”…þ˜b£Aæ>W:dŠ-òûPº´Üé‘ɺšd´T(í×@¡ti7Ñ%³áD›Ì–3m2Å…>™¼Ñ'³åJŸÌZ铹Ù>™›Ýé“)4ÊØ, ´ÿôZñ´…Òátë']ÈtõöÚK‚éðì\ƒ)ÒŒ“µé•+–ÕqÀ˜ö €éÃ*©ÖqX ö_ ë`8)Ã:jʰ +©vqf bYWÝþȺêÆéÖU7Îo°®ºG]µ%G[â´ãí^B²Zä40˪Ú|–U{pôÿ¡Èé™A„,«žaȲjo‰²ê™«„yð4Ó³QVí-QV=c‘­gUõŒe€YDmûlå6õ±Ðé_¾ /ÿò%~Mñ-Ä7-ë|þç×ÇnÓ¹ÃüƒÇþ×—ß~ÇÅ=^P­=½W­ýæÁ|ˆ¸ÿàNó¿éiÿÕw=âãdï Š1ÿêÇV½ÂóŸzËtb¢ë+¿þõ¹WáqíŸ=àþצËÚ|Î7qÂóŸzÏký®½q ­þò+Dz/-ÂÔbÜ^Ú–7Šs°ÚÖ<¬üéyÿwcü߇þòÓ·—û›ÿöåß_~ú×/ÿôÓ÷îi;ç@âÆO1ß¿§å{:Ú¶ë¦À¨ìÿñé'A«ß~öÉ]~4ö)½ïÜûÕû¸ÿOê}çóãwRì¥Åí­Å-n¨û¯ì¥ý¶½ÌÃòšÐaÛ§zÛk/ão<™³Ðcc/ó® ïæ·Ír 9O7l{|ßïïeò“ù {ØÇ×cÊGÐèYñ\Ÿ_þ¸NãR¾–üŸsÿñË~ûö»?ÿùw?ÿüóï¾­Üÿð§ßd3^¾ï•Þx_÷¡õ7¯×(ËÈ2>²,ñ(ýŒ8]˜8 ËÀé´ÖœÕ4W5Ç>OX7!ÅNz§ käíŒ}¿Sòs­ó©K¿ü°´wcŠZƒ7ç.œ¿eÑÔŸÍ9q~Í™¥ˆ1#Ä{Þ>E­y›³~j¼Fâ„DzÙܤì#[b´iYöq ÇþõµÖçOò’k°GŠÇ| Ç) Íž³6óúɧ¨Ás†Ó.×pœ‡Þgæ&Ó½ùeán”‘ÆÏ»wjZ2™Ú]HPÄU¾†¥žrÎ¢Þ ¼çÑhjöœÃÈöèº6ó¼Ê„ÎÙ=¤^2ÅûÛ)Ý#Ãk‚°ô€µÝ}¥èêŒ)j‘Þœ#÷p ÆùËZksæ.\Óg[ô]­Kq¯Á8;¶ÞsVYÇ÷¥pnά’éŒsݼ^P—¥O0¥¦U&ó–µb×XœmRs“â=g™"œUøÚ{à®âÒg·œˆQ ”wÙë²å³fÞÖ^c‘d4µ»  5èg^±“¢ÎvÏÛÞÿ¸çqÏveC­Y˜÷œºÆá›ôçâ=êǺç=ÿ†ÏòBéÅD¼4$¢‡³Ž±èŠó dŠÞ¥ê9[.jÓB<.gœ-eú0¾Ë»„¨ÇÚëçS¼-Â)n}Ò-ÅQª¥ç]–‚Lön—ÊÆ-o=(eyÞ§vÏ^¦öó.H·Ùcìk6ž¢ävÌÇÔ OR„õ ñ¸}q‰ê½ãš¦u…ËùÈš¦k4Î@~Ɉý÷IÀz@¹â5ç}ôy;Ö>¹\%‹®¸‹T+äoCrŠÇ枚،•zLj^¿Ä³è垢®Š™â]]µ-–š\†Ç#„_•%žXî½r(²¤Anp•!3¿ÏsŸ_¡Ü0R¼"í—¶ S•ídUÓ%žß–Û[‡Zâùí.ÆIQ߉§À‡îaxÆ"¤)Þµ^þÓXâùm½=q®™!÷þe»s^:1#K<¿M·'Î>#O[Ë8÷y÷r´!Þ16Þ«âz)‡wœx îV*E]Ãu×^$T._<Ä·'ö+4n½ø¬|™ñwßem|XÆþzOéZS>d-Ò“å}€%žáî;°üàòóãöà ߧº&Ê¢+ê{;Ë”¾«õ>¼ÊkË$•ûÙR—QûoŒC÷H,ÕOñº'oaÓuÒeZ{‘³ÿl²ÜæöĹG1ŒKÖÛÜÆ8K,ä}e’²Üs—²ŒDŠ÷-¹tÔxxÛ»1¶ëok7Æv@ñð¶tcl£F<¼M·1ök0KE^¶¤ò?Åý6ÆË„µ4–¬¼¹q9 p ·1.´õªáò…¥7÷xìAJo¶ÇƒÕùñq÷çüTÑÆ’ÓŽ·'Î÷„˜,Yts{âe÷Xòj¬2ëÒ%)îÛ£‘> .ÙsoSìß~!·)Î'Jyð[rÓnŠG¼¶,ý16jx^jG7Å£®0²d¹M÷Ä#0Ü’å6Ýûsu”¥wc]2wÉr›î‰¹Ë³šö‹‡ë©wi²²·|vêvx@Jõr¾ÇsÃL\òªn‡íP–^‰T®w<»mÝ àZK†4vKì»”íÒâámìžØºT¢“î‰ù.iôÒû)°\ðœ¨îžxÄb0œÍÈKÐB¢óÆÃÜ=Û-¿ù½«û¤Ðã_zE—ú´òÏ‹S@ôfO~ÑY¿¯àSAô¶V½Ùƒ@ôfö :Ÿ‘ôá :¬Nz3§ ½ÑM@oæQ•@ûûÉ Ð)ª›Þ7ÒÀÐ)êŠOÀÐ)ê:߉ S í/LC§¨£0tÆ€‚µ †>—ÊRh¦úÌUF­:E †N³O m/vƒB§ê£Ú3GA¡=sT)tjú.(tNà`Æ@)´“‚B{À((´Ç„‚B{Ø'(´sB{È& tè«»€Ð)î`¾¡Ïp9…Š ¡=¿Ú ¡SãR"&cB§¨oö‚B§)Ú_þ†>Eyë:EŒ‘Š¡SÔww¯zÉœ¤ÐBYô[¢$¬úú.Pô®\î+QtÕEWqÓT—• OytŠ  Ò5]]DåÑ)Þœð«ñhbNQßÔ>EpkHÏsQxtŠOŒ¡$:?ˆT¢¾Á ]E!ÑUD.V¾[*£‡ è·4 w÷L¾Š®â¨¹$ o_,úü\üXô[âý «â¢QJæAXtjú.1Xt…EŸ¢¼   EíÐdÁå¾~5]´QÓw¼j íq"ÑU]ÅEÃN'*ˆ®âªyt g)G§H¨¼i¦Š‹‚£«xHЂ7†£ß%hjÁÃàÑU<‹Çóc5¤Ê£«ˆ é*Þ<úå _Òµ¡éÚpÓ`"HûB¤ÒU"í ‘‚HWQˆt' ê«âM¤SÄ´Â ÈåžQ]5 IrQ`tê`&F‰t…HWán.v"íô)êã®"éÚrÔ8Joù@Òù¹NâI×F¤k˦q`3^“U&]®4ç …J×–»zK¡Òo‰’i¢RiÏ[•NQ=A¥«8k@Y÷n’}Ÿ‹&ß¹(TºnvÕ`E…JŸ¢L‚JWQ¨t5uÊ4ÅÒ)ꛫÀÒo‰G7É.>^§‰užlº¶A$º‹Â¦ëf›D‡º&lºŠ§«(pºŠˆ:®âÑ]òŒ²<Àé* œ®â¤‘“3&’”M׆È2µ†‚¦‹Ö4"×4ÓEÛ4Ù4ÁÒ©ÉtïÂŒBÓ:švIÉtÑLmÒDÕ¢íϬ¯@K—6B¥=3TºŠJ¿¥IR¢Ž`é*îÁé›,]DÅÒo‰Ç$£1ÅQ“ƒ]8íÙx€Óµ¥Àé·D‰nôÍ ®âªÉ . ®â®‘³U<6µOétÑ”NWQß5ð]*ž®-ËEûØ{»?%IN½õ¹ïŠ©9Qi˜z´²gÁÔ†Ì@©W¹Ÿ~uJí”z•Çõ¯N©Ý„Vûã `õ:=ù6(õ:±”z1ÏNJ½¿B©³jq(u;^{ µ„sì,ÚŸ¶ŒDòé“›hõµòéŒáRÀ>m±ƒàÓL¥#ž¶8âi‹«"ž¶÷Úo]Z*ŸÎ*9=AåÓYΦt|:bú|: óP­|zi˜ö"ŸÎL8™ &ŸÎD­ÚŸN䨮;šÎ{›ö éÑ ¦³hC‹º€¦ý$€¦óF¬u @ÓY ¤å¾@Ó9±­¿m é,ìÑQh:MN Mçœ ¼ éÌ Ôr( éœKG±¢éœ¢Tà 4-Qœü@Ó9Ù/k˜Mç ®ã Ðô™«!? °ée0.lz>H-Á¦³¡¦‚M—–ʦýºMgKíX`ÓùÕè 6}~5ÂÁ¦O*\l:Ä‹hKçú.–Ž>¤ˆÄÒ!=N—³P8=o¬œÖ Í¯§ÓŸëø8?‡qŒ¬p:ÏDÌ §c—›ÎlNçïH_NœžWVo^p:EŒ€Ó!Ú˧çÆ!p:DÜRNÏå—€Ó±KÔŠNçƒÔs‚\:>Ç0­£ÖW'ÓùØ&E Ó¾U éît„š–ã•ÎÄW°•ž–l‚JçqÊMP:·ª÷@é²U…ÒóÌIv@éh©µb„Ò¹Yõ’ ¥ó£“K€Ò¾K@é3vIë–JÏ3çõP:nuZ¶D(OìOö­8:£ŸtøŽžgTìGËöD'4©pè¤ WEÐñù²­:EÀkEÐ!N:@AËî@Ÿãa@Ÿ}w Ïyr2?ø\E t†o½K çU ЩéÐ ]v©:E½ @'’|åÏý,^†ü9DuAäÏéÑ?‡¨oàÜü9ýÞÜ äÏáÕ‘?—cQþtGý²ðçÜ¥˜@òçyÀËYäÏåx”?Ÿ™lRg þ\ö©üÙ÷ þ<¨(!ž‡ÇgЧï;çÈÝ–ØÙ¿ `çh©¶‘Ø9÷)·1bçë •6OÊRH›3’\nµ¤Í3"“H›½K€6û1‚6ûEmöƒ}Òæìcòi³_.Ðfï ÍÞU”6—#QÚ¢>.6ç™”Ö6O‡B Âæl¨ò€ÍÞ›£¥Öä6GK}¯–°9óêŇ“6û÷ÚìW¸yÚQêCÞ\ŽVótèkwÎy%Ÿõó€Íå8•6ûUUÜìݼyÚñØGàì× Ä9Ï^ßf¸³_2 çuQ2gÿÑ€9—æ\ŽS™³\Oàæü\_òn.¢âf¿ØÀÍÓ†J-âf9f?f¿  ÍÓŽl’æuåU’fï  ÍÉTÄ‚“4û7Ò\ŽVIó´áMt’fïJš“é»S Í~& Í~iAš}[gÿê?Jš‡›4ñ÷?‚™³ºp{#œê É/’#×EчWEÌùʱ"10fËŸ&cÎ÷ˆs9C%ιYE‡ ÎrÊš7(kÎU}´Ò¬ÙE°æ"*k.âƒ5;âkÎ ¯œ¬¹ˆÊš‹¨¬Ù XsÁš«ØYsˆ»pdÍ_° k¶ƒ%kžðŽ‘±æ* kžDà<âÕ.Î6p8‘7Û—LÞA£Ô+‰ñ"¡‘h^#ÑÜ›‘hoy‘h~Ã$ÑvrF¢9Ä‘D›i$‰¾ì–1hšP2hi£ôÙ̲ÑgºE£Ï÷Éí ÈmƒÐfy  ÍJ€ö}*€.-@—–Ð.*‰~C<ÔsŸ Ñ~žJ¢ï‹ í;ƒö‹ íGòdÐæÈ ËÞ”A— ‚AÓì€AMt9eÐþ;ƒ.«ºìS!´‹€Ð¾O@èÒR!tBó>I]v© Ú¬ti© šƒ Ú< AtÙª‚h» D›#ˆ.¢‚è"*ˆ6srƒh;Aph{”"‡.Tmß”bèr”Š¡Ë•Eû¡*Š. E[Chÿ*@¢Í{GQq´]Ðh?VÐè²U¥Ñ¾U…Ñ>´Fûý0ÚàatS¡}ØT íÛ†öñºï Ú Ì  ‰[>–Ä1J©ó…BýA¼¦Bü í$Ytþ^µ,‰©ÐF mz–:îN¨<‚ŽÛK²‘ ½¡`:—Ê“º%fBÇBK ãŒ5 ‡™1œÐÃp½ÜÍHèÍh®FB È îý½,ŽL F"¡¯H‚>0„$èxXB½!’ ó)K÷„$è}!äFt< jÞ½d¦H‚Þ'«FÖ$è|fEIµ&Ao¿:M‚އdÔÌ zÛQãÀ$ème’ s™[€lM‚Þ³ iØ`Àšq÷:]$èmÂkiL‚Þ&–U" zËç8eÀšá.O5:>Eù#" ×{Œ€ÎA1Ë#[# ×ãýhOûÖ èÌûNèW€§QZŠè æÐ‡ßg´„#:æ¬9ÐZ •¦È^»¢æ@ç[É:#:_ƒÕÙä@¿!ö0è|ó½çXŽ0è|ÇOVïÜiç@t¾&¦µ“ƒö¨s„A¯sDwkD> z, DàKK ƒ‡ Ñ È‚vM£ ×Ñ*²5 z¬Æ[² ßÐ$ úÀ ØÌ‚Î(?côÁéÊ; úà'འ-!aÐE”0è¢i´‡Ò# ºLöBti©aÐùb‘¾­‚0è¶óF†0hß,Ò Óê iÐoˆ= ºˆšøx‡:WQÓ ‹¨‘Ðoˆ=ºˆš ]DÍ„n+tÍ„Îß‹þØ‘ ý†Ø3¡}«…wŸG(t|ª½ ….ÛÓPèÒRC¡c<Àô©¦B—­j*tkœvF*tˆ,ÙÖTè7Äž ÏV(×Xh ÞÎ#ºˆ ík, ºˆ ]D…n 'ƒ ]D…~CìÙÐúØúÕÐó¹èƒÎ÷ :n)„Ö’ mám) †C·…Oò‡Î—ÅdÜáÐås ‡Î§v$_h:t¶Ô`Q¤C—–’2"Â?4:DMÜc>t5:® 1»æCGËQš"â+4ú4ò2:ò¡‹¨ùÐ\ÄñЮi:´­ñÁtè|YTRmë0ú ±‡C·'m×Lè6Y}¸„B»†Pè\.B«½ ý†Ø“¡mMŒ+ÚV¼`2t5:ÅU’¡ßïdèô’ZsdèçÕB"tY°ŒDè"j"4WÞ` ´_gBGC”f":·ªÝ„¶Å`mË„0ú ±BQ¡ÓºK)8ò mAæAQó óG?RìyÐY Yó ³|RG_äA›¿gtF^>ë‡5 :5%(ü ‚Þf„c1:Œ˜ÆØ1:JüŽ ¯)htX¡#zŸKŽ$è°+H·&A§ÝÐ $A‡iÀË H‚>¾×‚(è¸õ㾄<è¼õ#`Zó ãîŽSAtÜÀgæAÇ=IE„>ö °ç¼kò8Øs®&ˆrfeÏÃÌŸ"ØsbU\!%йn±Þµ”@c$vtIèaçúÀÐÉFõÇv.:èÆG1ôð1 ½õþšµÁ0xRè ¯U@‹¬ Ëme؆%2lÃâW=l/À3lcåS ÛXѹ,lc»àb܆eÈ2nÔq.*‚ΜX}„A ´gÓ¢ºˆZ]ŽVk =+5ÐzþZþìѼZþ왾(^·÷—%¬¢–?ñʃ¶ðaB[l1¡-ð˜ÐKàí í -С)Z 4E „¶Ìçï΃֕Çmím m›È‚öàjdA[^³ ½%² ]D´‹È‚vQ³ mc´‰Ì‚¶3a´·|fAûÇÈ‚®¢dA»ˆ0h5 ÚÆL†A»ˆ0è*J´‹ƒvaÐ;CGí" w¸| ƒö–ƒvaÐ." ÚEDBWQ"¡]D$´‹ˆ„öóD$´‹ ]5‰„¶]2ÚZ2Ú°îHhÿ‘ÐU”Hh í¢FB»†Hh í""¡«(™Ð."ÚD¦B»ˆXèëÓ2Ú["ÚEÄBûf í"b¡}³ˆ…®¢ÄB»ˆXhß'b¡]ÔXhÓ ]E‰…v±Ð.>b¡mÆB»ˆXè*j’)j,´b¡]D,´‹ˆ…®¢ÄB»ˆXh;XÆB[KÆB{KÄBWQb¡]D,ô€dÙÐ."ÚEdCWQ²¡]D6´‹È†vQ³¡©Y6´‹È†®¢dC»xeCûçȆvÙÐ."ºŠ=Ú5dC»ˆlh‘ í"²¡«¸«]†Èlh‘ ÍÕ^,Ú["ºŠ»zfŠÈ†vÙÐ."ÚÙÐU<Ô4SD6´‹š m³¡]D6t5ÍŸÙÐþ1²¡]D6´‹È†®â!¦ù]:]EdC»ˆlh‘ ]ÅCM3DfC»ˆlh‘ í"¡]D8tw5Íí"ò¡]D@´‹Hˆ®¢DD»¨Ѧ1$ÚE¤D»ˆ˜è*îoõfæD»ˆ hí¢EWíPÓLAÑ."(ÚEE›È è*jš)"(ÚEE»ˆ h]E ŠvAÑ."(ÚEE»ˆ è*Û{C3‚¢McP´‹ŠvAÑ.–]} [ÿÖ hY"¼Òk[•Žôº3O€k[m†àšiù®G®›Mp=°~à:k½5.à:+ž¦¡à:©3”^·Íj•^·•©! ×Y*£å ‹n|`X´…£02z±¼DF/VøŒÈè…ñÜM(ŽÈ處‘­Õ爌fœ4£m›LŒž­œøJŒžY×ÈÄh¨Fb´·T€-±>šì,sЮ€§Ö-`QvŽÇZ¤ €]ÎDvN¶ëká Ømº ó¯}o ×~†À×ÙYÓʯ³JâÝüŽœ÷×bbì²YØ¥¥ì<}TE+ÀNÐùV€GÙ°k»Îâ DX(»ör°k?‰'»>oÒ2œ‚]gŠº¾d vh v{Ó7“•]—†Ê®ýìÁ®£%êOÁ®KKe×þr Øu~KZOvûD̲²k¿z`×~´`×¥¥²ëuA²kÿ–Á®ý<Á®‹¨ìº­²kéÇÀÖyH¯l]6¨ØÚû°µ·¶.â[—-*¶ö líß0°u¶Dpµ`ëÐð6°uv"yC Ô: „Y@ëªufíÛTdmˆµk ¬ý¬ß;°.¢kߥòj_¼Ú¿_ðjÿ’Á«}—Š«íª‚V—ÃZjåA«½!hu•VÛá<`u>L©{¬.›SXG©ï Vû¾„U—=*«.¢²êr8ʪß;«.¢²jß'XµŸ%Xµ%PuÕ:©^¾^R]ö¨¤z9øÖHµ÷ê²O%Õ¹Y}Q ¤º´TR]ö)¤Ú’TÛ™T¿!vRm»¼IuÙ’ê²E%Õ¥¥’ê›Öu*©.‡¢¤ºlUIu•T—ƒURm}™¤Ú:Iµo¤ºˆJªýhAªý"€T—–Jª­Û‘T—–JªCÔÕHªËf•TQIu…Tû.Aªý`Aª}« ÕE|êò±’ê²7%Õå8•Tûw¥¤ºlUIuÖ(k½%H5Ç:‚ê²GÕe— ª½£Tû¯ Ú/@uTû¯ Ú¿€êÒRAuÙ§‚êÒRAuT¿!vPí×VAµŸ @µ,@uT¿!î[5Õ~œÕETP]ÎB@u9Õ!®š®P]DÕETP¢¾/Pí P훨öS¨.›UP]6« ÚoÕåTT—RP¹ï‚êr@ ªý' P]DÕ¾U€j?X€j¿zÕe³åçø±œé_¾ /ÿòelñ³Ç·­ù#ÙϽänÓyéòÿù__~ÿ$_öñ‚Bîé½Bî7æCþwš'øMOû3vú<ÓÇ"«ä¯¾÷tó÷þŸÿøÔ#¸¾Þû›þÜóï_ùã?7-üœ«íÿùO=‚«Ÿß]þ½?¶÷˯E Ò1ŽÓ9@¿ÄÓaܾOz±­y@ÿøÓ3ÍþïÆ—¿kqÌ/?}{ù·¿þöåß_~ú×/ÿôÓwî&îz92Žû8Ó_ØOìåÚQ×徭¿{O<ÉÍÛ÷ïjá®þþŸÇ—œ Šáû§ßÿÐ9NmÝ÷öÒr~m˜·¸cµ©ì8*.g{ìúÛË?¼ü1þÿx®Ÿ6r}ÎpiÃÅÿ…1ºÉ<ì§Ý–N¿Q÷~úIG0nuÿãöYcô¾¼qþO?éZýþoó~j—ŸÅ-~Jï[·Ö{÷ÿI½ï<‚Gïãþ?¥÷£ÌÕû°ÿÏê}ç´úý÷ÞwÞf–×iØ¿÷6s,{{=Æ-Ÿ¤æ¸½e½Ç¾lñä<×ÛÛðÒ>xǶݴ!Ú¼ÅÝtÿ•ݴ߸›,‡‰G¨mÛ§zÏk7ão=qÈ×¢ÇØÍü†)èûùç³ùŠð:l{|éïïfòÓù »8rùÐ1Ž=žé—õÈng\Ê7“ÿsîá?~ùÏoß~÷ç?ÿîçŸþÝ·?ýñOøÓoò/ß÷R/o99£u1ƒs<Þd‘¢åª|ja¢z“PÅÔ¦AJ”†å_mšzÒvŒÈèhÓÜ Îô¹çÞ¦Öˈðù*Y‹ªÕrÅ©žÓ°ZMË{ s*ZVŠôÐ楴3^ôÊX˜A›I:Lf!Sám^%Ì0˨äµÚ–«>÷–+¡Í‚3,`¤-£ä æÜ¾tµe’HÁeG¤At!I ´8Š–«³ß'Ë¡•N-‹!îL“تÖдDAwlIlE§X[P\%Y5!)D-³Eî’¸¤ºRË\G8‰føµ¸D»f‰è •­m=U/gåUä–Édw4H‰AŇ’gé-«î„¸ZÏÑr¶ÿñÈ«ÖkÀZÎçß1~Õòýê;‰#ɤÔì4}u&/)úÓ6HÜœÏ6J¤\\ÓÇÎâþ&qqöÅgže…ðNxÛVM~sq—t·8yMðkg¦çÝ…¶œ±¾#)²›J°DŒRÛÕò¸–ɤw°„wð}•0µì´:0ì»ä¥åu{þÖöCÒÐbwZ;׎±ž-+ªcZ¦et×0`¹º'vH,)oî·CÖóî’³±Wö˜wÐãx1¶‹S’±eGÉØ:L’–[21Ö¼¡ßã®í2ö$I_gÁSÎã®)a^q-й»ÄuEÃÇ ½ŽƒdqEgZs²½¼”kÎ4öH-öÁulÅn¶Ž›¤cå˜*w5îe=+ÆMÍV[ãfÖC®rmz)5Û¹äXåp,Ýl›YªÊ»>t¯qCëaT¡5!óë´JâÔÌ×kלlë#ðˆß5çÓîxfpÚëQß#ð€»Å:KíiîRoÙk.„ÞÎÃÔ\Ó5Wª»Gàoõ¯óÖóšâ 5ükÍÓ½à™¹küq_šMâZszég¾¸¾æ4Ò=þÎ|ayÍ™¢{ž7öªe“(¤e@ŒÙšƒÙ=ÏŒY³Jî€sñÂIÅI²‹f¾ñ¿f6Ò=ûÑf¤à=ÏLËXÏÚì«'ŸÔLºyN¡Üãp®O(éÌkN“Üãð9;ØýК3!÷8œ“޵®øS:äÚ$àgfØÆš±#}å¶®{ŸGÊ£Õá=â>U”G«ië6Þãì’ÀG¸™¯Ÿz¼¢{ùàZ6ô/½s`8K²ÆÜ[ov¿ÁÞfDWÒa¯ ïüê°³ ÿ]‡í¡î°Ù)zS¯½çèmI½ö>ŽZ«iLz«S§ß ú$vfŠËoN;oÎ24Ñiïãm¦Õco›ŽX4ÚÛ„ l:ím¯{ÑEƒ½å Oü ¶…>Óa[ävÉ®ê¢ÅaÇVñD‡m‘w·Ã>Pgk»aU:ìÙÛtØÌ–3ƒ½!ÑÊ vC± ¶ïÛ?¶ýΊÖÒO:츆èdtسfšÓg緦ϞðÙÅ·«ÏsJ‹ÝtL‹]ü7,öˆåOÌb‹†»^nCZÍõ‚UgÍ\O½3‡MëG‡ÍvìòÿrØŸðé°éêh±G·šÅæ¾è±ç·¬õŒ5ÌZó꛵‘—Hkmþ‘ÖzÆ‚º4Ø~/‡Ávÿƒß¾FƒíÎà 6¬l ¦ÁÎDuy¡;µU[šÁ†¡q‡ ÿàÞÌ6M½9l<‚˜ÃÖ @sزCÅaócsØz£ Å\,6| [líWn±•ÐbÇpŒs‡Å^',N‹_# $,¶åôÓbÛ2´Ø~@°ØëÄÇ Xìô¾j=a±ãha/a±óÍMÁ´ØéšõaÛ–9 ÅöSÅΖú€ ‹½.x ƒÃÎJ(ý±ÃaÛ"pØyñt|Ãö+»=ÖE¾÷±ª©~½·Xíû™ê «û1­öJ«o˜H°­öÊÇxZí²tÚq=õ_:í4^Š6á´£§ê"%´ÚBëa³ÛŽE÷`³÷ؼöØì|$ÀWmö¹¬»t˜í=ŸÃÔ«ÛÞ³$[€ºí}š¸OµÝùrÈ8Û_)žýh»Ï(êºó9ä¹àº÷¼+Ãw×½g™¼š+¸îpZ8‘§ë¶užèºÃÙ¼çºs9ªE–’¦ëÞ'XÚî}gÿ‡íÞâKØîðTèÿ°Ýùª_#lwüæ4.—¶{èæa»÷¶;ßÅW´ ßOqúÎ}w®Ï¤sðÝq@²b½wŽ·ò (½w®À¤-ðÞqp·½VÛËSé$¼wŒ¥Êqi¾m©!šïuÃIÀ{ÛSðÞù\©ÏŽðÞÑG=½wÞ½ôAÞÛÖ‚‚÷n^ï£÷öÛ ¼wº%;0á¶6Mx>ËøGž‰ ê³ÕƒçR@ïáíu!3…Ï=ÊOÕ ÷†Õê ro 4ôàÑ5=xp[rÜ&éÁÛŽ5ë̃x¿Ÿü,€Ï nË<ЃÛó<=x9 õàË΋ <ËêçãnëC\¼­ÏM žo'¨×Q nó“´àþeÀ‚·4pý&Nns~ôà9©´+¬Vn³—ôà¶x =x’—é=ÌíóJðà>¯Þ¦j½ ¸Ý>èta½snH¹8¬·-FBëôúƒƒõö‚õ^6¸Zo£NôÞùŒþ2à½ýz«÷¶itzo¿ÜÛX¬7oû¨õîðcüýøî¸Þ/=¿Q>‚¹_øîy×Á•¶;_èSCÛ=NÈœ íF”/ Žäh¬À@1I¸Žã=À}®˜©˜Å$ÛŽ‡x“ä»äJË€¸ó{ÅVÄ]DEÜà¾ëftgb€¤@tÛZ|$ÝëXâÝ:–”p 7Zo[ŒÄ»mœu ñæ2S Þ½d†¬ÛËFž¬›r©&!ñ·³qñÞ¥2øÛJãHœT[‰ øXbbS ðÞ¥%JLŒ¹³ÄÄJ¼Ä=^b‚j/1Ñt=šðR¹‚.Ÿb%&, b… k)&¸^a‚êxpß% LìXY`‚KX_b%-^_‚Ò­LÔ|tM¦ñ{×d2ÿ-„ºøoÂÑGå)|˜éˆXÄÝ[XÄ}™x…ß&z8ðÕ¹±œ{Dzåtàçûàê†QÎmUpà^ËrîMž¬Þ¨æV—Ájn³…¬éæj†VÔ=âoEÝ+ÒÝèÀ½”–EÝ›U« ª{aQ«ºrXÕݰڱUuO,zúð û{ÎQÀ‚[9…:p›ë´zî kêÒ{í¸ÝÑÍsjÞ,8gziÁçã çMDçm7U:o»Ðy笻²g8o»}ÒyÛ-’ÎÛÎ;ïŸ@×j½9¨ÒyÛMÎÛªñž¹>·Ýi¼m ‡ñ¶2To«ú}o?8ïLï|y[].w4Òc½‰ÕHÂy—–ê¼ÓJ¡à[wNë¡à…'V=çíûdå‰UËÂy{é÷Šºv:ï•çtÞÖP÷ÌEþ¬°»ýJÝInµFVvb׎e' sLæ»íDà»ÏhXq»ôÝ5ðÝÍp8|wÃMáöݾ=øîÆøîFLß½`¶„¶»aîÒl·5„ë^ùj®ÛÂt/ð>fº¤ºšé¶‹FÓÍ]ÒsûVá¹íxè¹%nž{áôÜ<zîLÇ<·‹ðÜ ^ 1Ïm_ M÷rMyšß¶kÃm'HýÀÛ= ÷¢v毾ê÷¿/¹ñ陆{]‰¦.HŠÛëL¦à¬Îd᫘¨3!ÕaÉʇ]Ö™doPˆ:[Ð’Å&¾Y›Q‹MÖõÙZk²²޵&ën&þàš(§A­‰­ÊZ“"j­ÉjAívm©Å&¶V(‹MVó3Zlâ—]ívÙåÃnûZ¨ðÜå´Ø¤h‡®öȇy˜îsµWÔ“Lºf£‹3—eÄÉ«éNeå›®­èÇ#滜‰šïr}Ô|—kªæ»ŠM4´¯JÍ·Ÿ‰zoÿ ¨õ.G­w¹v꽫¸p…@ü´Ô|—ãÙ¹Ì~vê½STN¢Þ»j×ãÃïõ¡U>]¹ªž².õÞU;¸6 ™zïSD!øÄî@ Õ{W±q:nvãRtw®6‡Íª÷®âÄEã­Ô{WQÌ·GÂ|Wqç2nجºïr´ê¾}õ@Øï*6.©ò«ö»îsçÂhÜìÁµÏXU2by3lUíw9ž‡ý®Û[¹Å Kq{W{{—­ªÿö%ã`ÀkË•+{Qܸx—2D8ð"ª/›U^Å…+iq³+fQܸ(7»sá+ˆjÁËfÕƒWqæ U¡¢¸ê:S6£<Få\0âÓN–#ž" ¸ñiޱ¢ïÏ?,úÞÁ¢ïõ3@ ®—¸Xpi£æ;oH)óò&`¾§•5Oó‹(éä>ÌwFèHù“zoK×õž£-`½§fa"j½‹¨Ö{Z­pD­·¬wÙªZï\aJÉ=¬·Ÿ ¬wˆŠ h½ËfÕz§ˆBµÞ!*g¡÷ö–0ßÞæ;Øõ­ ˜ï²Y5ßÙòªWß]¶¨Æ»ˆj¼e‹ê¹§…sGðÜi¢4Jèá¹£ ŠÞ๧ÅQÄs—†ê¹SDÕ‡znkIÏ]Zªçž–MÂs—–ê¹Ë>Õs»ÏûÔZ xîÒR=wi©ž;» ¹Õs÷¯—v{ZÂO»w*Ô›ÂnçÂZg»ÿÐÒØí'˜fµÛÓl5Üj·§©»½b·óhÕ¾¨ÝÎ…¤Dn;4yÃç2ÛÞB½¶·«í’:mÓ`´]SŸš–ÈÀg{CµÙ¥¡Úì"ªÍv6»ˆj³CÔDÚì"ªÍN_$S„´Ù¹\¡VîÀf{KØlß'lvÕfQmvÕfûï6;WÔâ6Øl߬Úlÿ­Âf{ïßš_´´ÙÿóËÿ¹ö endstream endobj 5 0 obj 24703 endobj 6 0 obj [2 0 R 4 0 R] endobj 7 0 obj << /Resources 8 0 R /Type /Page /MediaBox [0 0 1469 828] /CropBox [0 0 1469 828] /BleedBox [0 0 1469 828] /TrimBox [0 0 1469 828] /Parent 9 0 R /Contents 6 0 R >> endobj 10 0 obj << /Type /Font /Subtype /Type1 /BaseFont /Helvetica /Encoding /WinAnsiEncoding >> endobj 9 0 obj << /Type /Pages /Count 1 /Kids [7 0 R ] >> endobj 11 0 obj << /Type /Catalog /Pages 9 0 R /Lang (x-unknown) >> endobj 8 0 obj << /Font << /F1 10 0 R >> /ProcSet [/PDF /ImageB /ImageC /Text] >> endobj xref 0 12 0000000000 65535 f 0000000015 00000 n 0000000145 00000 n 0000203505 00000 n 0000203527 00000 n 0000228304 00000 n 0000228325 00000 n 0000228354 00000 n 0000228786 00000 n 0000228654 00000 n 0000228548 00000 n 0000228712 00000 n trailer << /Root 11 0 R /Info 1 0 R /ID [ ] /Size 12 >> startxref 228868 %%EOF blis-0.6.1/docs/graphs/sup/dgemm_ccc_epyc_nt1.png000066400000000000000000005656601360743507500217370ustar00rootroot00000000000000‰PNG  IHDRâÜJ&¡ &iCCPiccH‰••gP“YÇïó<é…@B‡PC‘*%€”Z(Ò«¨@èPElˆ¸+Šˆ4EE\•"kE ‹‚tƒ,ʺqQAYpß÷?¼ÿ™{ÏoþsæÞsÏùp ˆƒeÁË{bRºÀÛÉŽÌß(ŒŸ–ÂñôtßÕ»­Ä{ºßÏù®‘iü常¼rù)‚t ìeÖÌJOYá£ËLÿÂgWX°\à2ßXáèyìKο,ú’ãëÍ]~ )úÿ†ÿsïŠT8‚ôبÈl¦OrTzV˜ ’™¶Ò —Ëô$GÅ&D~Sðÿ•ü¥Gf§¯DnrÊ&AltL:ó5204_gñÆëK!FÿÏgE_½äzØs û¾zá•tî@úÑWOm¹¯”|:îð3™ÿz¨• €è@(U  t0–À8à|AØø $ȹ`(E`8ª@-hM œà<¸®ƒÛà.L‚—@Þ‚°¢A2¤é@F²† 7È ‚B¡h( Ê€r¡PT UAuPô tºÝ„¡‡Ð84ý }„˜ÓaXÖ‡Ù0v…}áõp4œ çÀùð^¸®‡OÂðø6< á—ð"Â@”]„p$‰BÈV¤)Gê‘V¤éCî!Bdù€Â h(&Je‰rFù¡ø¨TÔVT1ª uÕêEÝC£D¨Ïh2Z­ƒ¶@óÐèhtº]ŽnD·£¯¡‡Ñ“èw †aaÌ0Θ Lf3¦sÓ†¹ŒÄL`æ°X¬ Vk…õÀ†aÓ±ØJìIì%ìvûGÄ)áŒpޏ`\.WŽkÆ]Ä á¦p xq¼:ÞïÀo—àðÝø;øIüA‚À"X| q„„ B+áaŒð†H$ª͉^ÄXâvbññqœøD%i“¸¤Ri/é8é2é!é ™LÖ Û’ƒÉéä½ä&òUòSò{1š˜žO,Bl›XµX‡ØØ+ ž¢NáP6Pr(å”3”;”Yq¼¸†8WJ)Hq¤"¥öHµJ IÍKËIÛJGJJ·IK”aÊ8ÈÄËì—é”y"‹’Õ–õ’Í’="{MvVŽ.g)Ç—+”;-÷H–×–÷–ß,L¾_~NAQÁI!E¡RáªÂ¬"CÑV1N±Lñ¢âŒMÉZ)V©Lé’Ò ¦$“ÃL`V0{™"eyegå å:åå–ŠŸJžJ›ÊU‚*[5JµLµGU¤¦¤æ®–«Ö¢öH¯ÎVQ?¤Þ§>¯ÁÒÐØ­Ñ©1Í’fñX9¬Ö˜&YÓF3U³^ó¾F‹­¯uXë®6¬m¢£]­}GÖ1Õ‰Õ9¬3¸ ½Ê|UÒªúU£º$]Žn¦n‹î¸CÏM/O¯Sš~°þ~ý>ýÏ&   ©†.†y†Ý†iñªî¯&¯v\½mu×êׯ:Æ‘ÆGŒ˜ÐLÜMv›ô˜|253˜¶šÎ˜©™…šÕ˜²élOv1û†9ÚÜÎ|›ùyó¦é§-þ²ÔµŒ·l¶œ^ÃZ¹¦aÍ„•ŠU˜U•Кij}ÔZh£lfSoóÌVÕ6¶ÑvУʼnãœä¼²3°صÛÍs-¸[¸—í{'ûBûªƒŸC•ÃSGÇhÇG‘“‰Óf§ËÎhgWçýΣ<Ÿ×Ĺ˜¹lqéu%¹ú¸V¹>sÓv¸u»Ãî.îÜÇÖª¯MZÛé—ˆÿ2Â6¢,b&Ò*²4r*Ê*ª4j:Ú*ú@ôLŒMLyÌl,7¶*öuœs\mÜ|¼Güñø¥„€„¶D\bhâ¹$jR|Ro²brvò`ŠNJAŠ0Õ"õ`ªHà*hLƒÒÖ§u¥Ó—?Åþ ÍŒ]ã™Ö™Õ™ï³ü³ÎdKd'e÷oÒÞ´gÓTŽcÎO›Q›ù›{r•swäŽoál©Û m ßÚ³Mu[þ¶ÉíNÛOì ìˆßñ[žA^iÞÛ;»óò·çOìrÚÕR V (Ým¹»öÔ±? ìY½§rÏçˆÂ[EEåE‹Åüâ[?þXñãÒÞ¨½%¦%Göaö%íÙo³ÿD©DiNéÄ÷e̲²·7¼Yn\^{ˆp(ã°Â­¢«R­r_åbULÕpµ]u[|ÍžšùÇ‡ŽØi­U¨-ªýx4öèƒ:§ºŽzúòc˜c™Çž7ø7ôýÄþ©©Q¶±¨ñÓñ¤ãÂÞ'z›Ìšššå›KZà–Œ–™“!'ïþlÿsW«nk]£­è8•qêÅ/¡¿Œœv=Ýs†}¦õ¬úÙšvZ{aÔ±©CÔÓ)ì ê<çr®§Û²»ýW½_ŸW>_}AòBÉEÂÅü‹K—r.Í]N¹<{%úÊDÏÆžÇW¯Þïõê¸æzíÆuÇëWû8}—nXÝ8Óâæ¹[ì[·Mowô›ô·ÿfò[û€é@dz;]wÍïv®¼8d3tåžý½ë÷y÷o¯ñy02*|ñ`úaÂÃ×2-<Þ>†+|"þ¤ü©üÓúßµ~oš /ŒÛ÷?óyöx‚?ñò´?'󟓟—O)M5MMŸŸqœ¹ûb݋ɗ)/f þ”ø³æ•櫳ÙþÕ/ M¾¼^ú»øÌ›ãoßöÌyÎ=}—øna¾ð½ÌûØú>|œZÈZÄ.V|ÒúÔýÙõóØRâÒÒ?B,¾“sMT cHRMz&€„ú€èu0ê`:˜pœºQ<bKGDÿÿÿ ½§“ oFFs¸NBIE] pHYsNNÆÊ/¥tIMEã/ÀŽ”t vpAg7O£¨u€IDATxÚìý|Õ}ï?Û CÀFÌjBCÕN𤹣¤-­›ëf7´¡¶¹»S7IÛ›Ý[B~4%w7í§´õ-éNs¿…Ò›hß$-4©¦iHC/v44-¹Ï5`aC@c$° üýãÌ{fvµ+­¤]íJ:O=ôvvvæÌì{Ïžó:ï§:uêF£Ñh4F£Ñh4¦­œÞéh4F£Ñh4F£Ñh4K®â:Ý„%ã8‹EÇ™Óq|ß§X,vúr:‚¶ã…G†‹ÅI¶¿TíXÛðâ¢;–ÏÀbBÛñÂd¶ýñb´aÐvÜ ÌçØXÛ±¦ÌÔ–õø¸mÇ‹‹NÙqW qžçuº Kšb±ˆçyضwÔ³% C|ßïô%umÇ þþ~,˪²ý¥jÇÚ†ÍØq±X¤\.wº©-EÛñÂd¶ýñb´aÐvÜiæ{l¬íXÓ.fcËz|\¶ãÅE§ìøŒO}êSŸê†‹ÿ⿈ïû˜¦I¹\æSŸúÔ¤í=ô\pAü:×uùÛ¿ý[.¸à ÈõÐCqâÄ Çáĉ˜¦9ißf÷›ªÍ=ô«V­âÏþìÏ8|ø0¯}ík§}n:¦Ûµwºë ‚€b±È÷¿ÿ}LÓĶm‚ à-oyË´×|øðaÂ0¤\.c\pApï½÷òþ÷¿PŠó‰'êî¿i•Ï— §í¢ö=‹ ×»¦ÙnoÔÞfîM#ûjtmžçqï½÷288Èk_ûÚø¸ïÿû—Œ7²ázÏ-v;n— §ÛÛ;žê5ìXÚëº.¾ïóÐCÑCƒY3[;ÖcŠùµã©®k6ý±a‹Æ†åþê±qãkšÍö©Ú<Õ5‹7P»ÇÆ‹­/–{ Ç Ïfž§ÇÇ s\‘>×RϦ?:fÇ÷ˆ Ã<Ï#²ÙlüÜÀÀ¾ïýýýUŠ}6›Åqœøõâ&ëû>Ùl–b±H†d³Ùø8¾ïÇ®¤Íî×ß÷Éçód³ÙøÍ‘öMõÜT÷¡¿¿Ïóâkò}ÆÛ§jo3×ëy–eA¬ö …¦®9›ÍÆïŸœ+ã8äóy Ãhjÿ…D+íx¾lxª÷t66,÷¡ìx*ûjtm¦iA|~Ïó0MsR«Oeðtì¸Ý6<“kž7k“i;–ë.‹T*•Îa ˜­ë1ÅüÛñT×5›þx±Ø°¼zlÜ=v<_cc¹æ¥`ǰtÆr/ºa|<[Öãã…9®˜ê=]Èýqz¿™Œ+f3>n·/kÙ‘f‰ã8˜¦Éàà`üØó<ÇÁ0Œª/"yó\×%†‡‡Èårô÷÷“Ëåe(r<¡T*pÚi§Åۚݯ¾ï366†axžWåB>Õsõp]·êzåÍ÷}FÛ-ËjxŽf®7‚ ```˲ðÇÆ‹¥/–ëÓã âkê†ññllYî¸BÚ³TÆÇÓ]×lÆÇí´ãŽ qžç‘ÉdâÇòåAÕ%7ˆßØ´"†aüÿtoðL÷k„mÛ ¿(§z®µ×+«ÅbqFÛ[q½aràÀ€X}–Žcªë²,+~Î4M,Ëâþûï?Œ¦iV½¶Þþ’³`¡Ñj;ž/–6Õ{OgjÃõ®·“vÜȾ]›çy¸®Ë0MÇqÈf³ñ—èb·ãF6 KËŽçÆgzofbÇÓ½¦ž—Ëe,ËZp6[ÙØ±StÆŽï¿ÿþ†×5Óþx1Ù°\¿w—ÏÇØx©Ø1,­qE½ë]Hó<=>^¸ã iÓRO5®hôš©ÆÇS½¦vÜñÐÔf m¼r#lÛŽ‡††:})-¥QÒÀ™nŸ)¦iV¹[–5éÞÏää˜cccSªí³=O7 í¸ñõvÂŽµe*ä‹Vì?—ËUµe±ÛñL¾Œ—’w“ Ëñçòšzv,+†§vZ¼ÚxÚi§-ÈdijµãÅlÃr­ ÉŽgÚ/&=¦˜êzóØx©Úqú¾Ú–›Û>SfcËz|¬Ç®u!+¦{M'ì¸ãBœeY¸®?–øiÓ4«Þ¬ô—mÛ±)ªg>Ÿïô¥Ì‰Úë-—˸®;ãísEšÄÈ娇ă§ÿ¿úê«ã¬R©P.— ‚ áþ m•DÐvLÝëí”ÃÌíK\¦Ó¯Où.v;ndð´ì¸›lfg_S½¦ž qêÔ©øàÔ©SKÆŽ› ×»Ön´ã«¯¾ºáþ3í+•Ê¢±aÐc ¡[ìx¾ÆÆ‹©/=®H³mYõ¸¢Þµ.´qE£×L5>žî5s¥ã¡©¹\×uéïïÇ0Œø&ˆÚž.—,ˆQoÞ¼9N ¹cΧºò…,•BšÝ>WLÓ$“ÉÐßß?ã{+9Z¤Ã®}eYär9ŠÅ"¹\nÚýÚŽë߇NÙ1Loµ <Ï«jK½×,V;ndÃòÜR±ãn²a˜¹7ûš´׿âXÈÌÆŽ› ×» ÍŽgÓ/;^J}q½û°ØÇƋɆA+¦º É–õøX+ê݇…6®hö5óiǧ’%—“VA¹ý‰ IÜnm2Ä0 «bw:”Ö™nŸ+’̳Ù{+îôƒƒƒq|õT+,3Ý¡ í¸úš:eÇs±¯™¼‹ÑŽkm–¦wÚ†avöµmr6ÌÆŽ› §¯i¡Ùqm{Ó{2ô˜¢úšûØx±¢Ç Õ–ÓmÑãc=®è´ Ï×ø¸ÝvÜq8¡öÍ‘2·bÈŽãÔ]EjAT¹žÖ2S5´Ùã5º¦f¶·²Í³54Ã0fôA›éþÝŽ¶ãî²ãÙØ×lÞÅdÇõ®c)Úq·Ø0Ìξ“MÎ†ÙØq;m´ÏW¼˜è¶1´Ö.Ú˜b¾ÆÆ‹ =®X¶¬ÇÇz\±ÆÝ4¦î¸zHù_H\<5ÝIxžWÝiõþ mÇóÏ|Ù—¶cM;™}-%›œ ÚŽçm“­EÛðÂAÛþÔh[^¼,%Û×v<ÿÌ×ø¸ÝvÜÕBœF£Ñh4F£Ñh4F³XèxÕTF£Ñh4F£Ñh4f)Ð59âÒÜ|óÍœwÞyn?ü0—_~yGÛ011ÁÄÄ\pAGÛñÄO°fÍÖ¬YÓÑv<üðÃìÚµ«£mh–;wjû‰è&ûéô{ðÔSOñ™Ï|¦Ó͘ß÷ùêW¿ªí'¢[ì§Ú111Aoo/7ÜpC§oÇ´ÜqÇŒŽŽvÜ~ºÅŽu;&·ã=ïyO×ç£ÓccÝŽéÚ¡ÇÇÍ£ÇÇÕt‹/”ññ·¾õ-vïÞÍ•W^Ùé¦tÝb?ÝÀ‘#GXµjÿøÇ§Ý·+…¸Ç{ŒŸû¹Ÿët3¸ï¾ûزeKGÛ°oß>öíÛ×ñv|ýë_§··—+®¸¢£í¸ï¾û:zþ™066Öñ÷MÛO5Ýð™¸í¶Û:Ý„¦èø=ÓöÓ}íØ·o£££¾M1::ÚöÓ-v¬Û1¹Ò×u3zl¬Û1]; z|œÐ-ý`·ØñB?öØc]aÇÝB·ØO7044ÄC=ÔÔ¾])Ä­^½š­[·vºüâ/þbÇÛ±qãF®¸âŠŽ·CÚÒÛÛÛÑ6œsÎ9¾ 3jk§ß7m?ÕtÃg”‡ÎBଳÎâ‚ .èŠ{¦í§ûÚ±Ä €5kÖè~P·£!{÷î嬳Îêô­˜=6Öí˜ =>žz|\M·ØñBoܸ‘Ë.»¬+îY7Ð-öÓ <óÌ3<ÿüóMíÛ•B\·°}ûöN7ÞÞÞŽw΀þp-P´ýTÓ ŸiÍÌÑöÓíÐÌŒn±cÝÍ\è–þG·C3ôø¸mÇ3ãU¯zUÇÚ» m? 3± ]¬A£YDÑF£Ñh4F£Ñh4šîC{Äi4 ˆŸÇ¶m\×Ŷm|ßÇ0 lÛÆ²¬Xˆ“ãTµ3 1 cÒ¶0 1M“b±ˆiš“^ëû>–eáû>aÆmªÅqœxßr¹L¥Ri¸ïl HæeAôk£æh~´Ýж»Ñc3z^ö·#ÚfD¯7¢çüšçàÜ+šnŸâ4š.¢LÕ1:8”(‘'M†Lìºlaa`£ºÔBÜôHë¡:Uu!él‰¶{$v†ê:$é´­è¯ÈGÿËñ´Ó¹F£Ñh4F³ô(S&G?õc¥–îk a’ÏçÉd2j?ßg`` Þ–ÏçcìÀ”ËeÂ0dll Ïó°, ×u)—Ë Ífñ<Ó4q]Ó4¢¿¿Û¶Éår‹EJ¥Åbß÷) xž‹u_ºûnŽf³ì»ûnVß}7™L†;GFèééaË]w1¸v-ç_q'ŸzгFF˜Øº•ðÚkYû¾÷ñè·¿ÍŠÛo§ðÚ×Òš'‰8fÌ­df£æeâë'û¦…3PÂÚŠcÇâªèFô›GÍÛd¿|ô¿üzÑ~’9`×GÍkÏ#sA€ßïëã—ÿã?š²-Äi4]„éñ9r±0gbbDRÎ ƒñþèNÔÀ˜ùÉÒ)§Å/ù•NÖ'éDE0“»•îDk…²´Ø–#YAq¢×e¦i[>uî±Ô1û ßò–Nß:F£Ñh4F3OH”Ó0Ã)âáMrœ ‚€þþ~†‡‡c¬\.cÛ6årÃ0¨T*˜fõks¹A(ï7 P(DÇ϶•À”ËärlFÍorCC”ÆÆØÿÈ#ܺs'»n¼‘_ºé&zn»§#±¯T*ñ+ò'¼~˾ö‡ÈµßøW~ç;˜O?Íûÿæo8çüÞøøã¬+—9ïÿ˜ÊÄý§ÆêÕ«ùç'Èy&VòàŠü¿uë¸ìÑG¹¥·p·ogå—¾ÄûŽL&»Î|þy‚ àÅ+¯Ä@Íá  ýñÓOó·_ü"|ô£UÂÙ—Ÿ~š-çžË»~øCÞñ†7ðÝÇãš /¬û~¬[·ŽTyþ¥&nºé&n¹å–êç‚Â, | #y üçþ½O=Õ”=h!N£éÄÙÇÇÄÄÂÂŒ~ ñ|k†…,Ê¥½ÌüÔöÄã¬l&é!Yå€ÄeØ%qÎÁ”þ›£× ¥ŽS#:3ˆ›²U³í°ãû߇_þåÝeF£Ñh4FÓnBÂxnV¤Ô³±ã\ßµ8ŽƒeY‹ÅØë¬T*aÛvì•V+Âù€eš˜¦‰Qâ”…¯r@6Ú7ƒŠÚ)£D/oÝ:¼-[ø(pÆ×¾†·?»ä¾ú®wñþßþmÊ+WbçrÀ/¿uÕUä¿§ûÃ&W\D‘FëR¹îÎt? )e2±£Ã/¾ÿý¼ûnžxâ‰X(ó}Ÿr¹L©R¡üGÄ¿þó|ðÜsyó›ßL8žG¹\æ–[na||œK/½”¿øØÇxjûv|ßçèÑ£ÜûÅ/rë'>ÁèŠ\sò$_ûàùÀ;ÞÁÁï|‡¼ò Ïüýßsúç>‡mÛ|ö³Ÿebb‚?ëí×…0äGkÖ°ü¼óX;<Ì÷gÆ l€ÊÌ %j!N£é0R„ÁÀ @2eJ”ª„¸¥¦×Í!©.ªÃÏ :›ÄõاÚUXbîõ¥Rˆöšæ\3ÉÏ&᦭”0Mª=í4F£Ñh4ÍÒ¡H ‘˜$ìÔÂÂǯJ/ôô·Ÿf¿¹×uŽCQÓyØJ%5't ~e%€‰W˜ƒZøÏ£æ"èùˆì/s“´t”‰~ÞBÃÀ\·Ž,à­\¿¾–z3N‘%Üu’`å…+‹d2,˲¬¸°Dù´Óxçý÷óè£òá˜r¹ ÀÐÐO<ñÛ¶mãÓŸþ4õWÅÏÿüÏsë'>ÁÁýˆMçœÃO½ñ|è5¯áÁÿþßy饗(8Á[žžÜ}7_ûÚ׸¡·—ð3ŸáµË—ó®_üE†ù“¯s·laÓ+¯ðÒ;ÞÁÇËe<Ï£T*á8_vÏ­\É Ã`tt”5ÇŽñ»¥•J…ƒòÕë®Ãù¥_ââ‹/æ/ÿò/±m›;wNkZˆÓh:Œ‡—­f8G¯¸…„Oµ'˜ÏÄ3-D i>0H"~Éþ­š—9›Ç$I ªÑh4F£Ñh–!!&f•÷›…o+—Ëœ¸âŸùÌgx‹õ Ã`pp°î1‹(M¢ˆ@‰mTGøÔ¾º™y–¦Dd5vlHn¨‡ïû¼ùSŸ‚U«T8'€ibyØ6¿7>ÎGŽàÿäOòÍo~“³Ï>;>ÿÃÛ·³eùr>¹r%Û>ýi~÷ª«(nÝÊñ7¿™ão~3W^y%?ù“?ÉwÞÉ#—\™۶qÑÛß®Îó‡ÈàÇ?Î7ÿéŸøùÇçojÛiÛÜxàßúÖ·8rûí¿öZ6-_λßýî¸íÿñ©Oñçþç bý×Í¿øE.¹äuÿ‹Eòù<ßýîwù»¿û;~é—~ Û¶±m›-[¶0::Ú”mh!N£é0"ÂI8©œÝíÝ&EÄË ’ñ‰OI¡‹4þð‡¹úê«•w¯¶2l¡PàŒ3Îàšk®ášk®‰½öìÙ£…8¦Ûñ£,hé¼pÀ¤ ¨Fò³AÒùK(©A$B•çqò×~ÇÏ;Ó0xä¹ç¸æWU‰s¨¢–ç·³m›Ÿÿû¼îº*1¬\.“ÉdbaÎ4M‚ Àó< Ãà¨çÑsÑEpÉ%uç¶=·x£zâ\íóÛ¶m«¹!Ä“YÃ0T‡è™åÔ] qM é§Ÿ J”pqgìýæºn]¿5íS9Ü ºâ¨ER=´D"Œ™Tç 0€Smh×Bâ4F£Ñh4ÍÒÄãB…€ v¼øÛ{ïå]_ûÅ| .Â&úQ ú’S»„Š4J«› Ùlvrè«ï«_àÁcÇè}Ýë†zÆgÐóÝïây™L†ÿp]®IyŠ ©ìÝ•JÇqâê­õ°m»JK{Ê™¦É5×\3?o$Ÿî†ú¨7d˜jÏ’Áh{&9ÆŠÑMŸþôù¹JF“ÆC%¤ôñÉ!G®nÕœz„aH¹\Ž«æÔrâÄ ¾ò•¯Ífãçk÷+GI(ƒ ‘¶BÔJKÕ§@R%ôÊë­„ý¥Ïš*w@3×1õ®o:‚ ˜öØRug*<Ï›tœô±½¨2OmÃ0œU»5F£Ñh4ÍÂGÂR2Ѱ·§‡Ç~ú§›öJ|³Pó25ïRÇi]Ê›B!:jB±¨~³YŽÞ?Œ…´b±Èþž0Mr¹†aËåð}Ÿb±8騹\nJ¯5˲¦ôHk9At#Óˆ×IítÐ# ûJï›vId¢ÜŸÿ¢Û.â¼ÇÏkªYÚ#N£é>~Š 4-ÂU^Z(ò<Ïó¸ã’;8úÕ£àÇÜ—Ë帣÷}? Á0عs'ÛLµ>“.=xüÜç¿üe2CCXVuw/%´¥ŠO½j8@U%É>F6›“|Jì½´Ó²¬XÈ ÃÛ¶ãŠ:¤:uÇqð~U´“ìǶm*ƒƒœµs'6ÕBÜ J÷ùq À ‚穊¦¾Ïè'>AùÎ;) *¢jÃ>ðå/«¢•J<¿±"gŒÁÁÁ)s¼Í+e’å‚Ý8#ºyÉÍŸ¿kF77Dy´ ‘xÆÙ$Õõ¤"aåáæ£¶Í©},T®¸ æ<"ìíiþ²Z"ÄŒŒÐÓÓÃÎ;زe ?õS??wèÐ!n»í6 Qœu'­é6æËŽ%,ÕÅÄN•8‚ _L‡H†'½ ‚€ÁÁA,ËŠÅ.˲( q˜èG(òT_rη¿Í…Ë—ã¢úcÓ¦ª¶„AÀY÷ÞËýû÷sñÅWyne2<Ï#—ËÅBS:ÌÔ²¬Xü«¢¤m :ë´UK­øÕˆL&Ãûî¹€Gy„÷6yïgââ<a£ƒƒƒìر£mÇodǺ/Ö,ô˜B³Ðv¬Y,èq…f10Ÿvb¤Ü´`O;®½–+n¾™§V¯ÆDÍÕL’düú\.ÒuUη àØ7¿ÉcË–qÅûÞ• ½Àà¶m ÄNaÖûÔ:!Ì55‰“s¹©íRtAb|A‰sò”£ç (ñ̉^_S«'ÚÇDMœ‹¨øàÚ,L’(}Ž´¤XÃÆ¹é¦›âÇñÿ{ö졯¯/~¼uëVöì™T¨ÑÌóeÇ!!gÕ©ºyófÊå2ù|>ö²ò¸R©ÔP¼’ÑVròäIÆÆÆšïfJ«ó¤Éù¢‘ë¾X³PÐc Íb@Û±f± ÇšÅÀ|Ú±‡7Iˆ[ñàƒ¬}Ï{xòÊ+ùOkׯœCâ÷F`ÃlOªàBpðàAvÝpÜ|3®iVTŠç'Rt¡0 #Îý]—Úi_ å&ša‘DÑ´PšLÃÒÓ1›D´«¥¶ ¡ˆrmÒ%["ÄõööÆ<::J¡PàÆo”‘oܸ±jßéxꩧصk£££í¹jÍ‚ctt”]»vñøã·í­¶ãÇœ]»vMÚîá±¼,æº.AÄ¥¬ljCCųL¼jÿïêÕÜÛÛ‹‹êkĽù”ð/!ðÓ±jÕ*öïßÏúõë[z?}ßóÂµŠžž–-[ÖÒã6SÁu¦ÔkŸØñSO5—Äs64²ãÙØðÓO?ïûuíX³tÙ³g_ÿú×yâ‰'ÚrüV÷ÅO<ñ_ÿú×Ù½{w§o¦Ëصk¾ïóôÓO·üØík¡C“¦“ããÙØ14k–.{öìYãã©Æé¢|<ÿ<—8À—V¯ÆCiE6ÊiÂer*²f˜T­Ô4ùò¯ÿ:®eñÊg?G-IZ 4’"¡›ˆ=j+“‚ 5ÙÝ\çùRt3%µý-FÛkì ªÃ\…Býfà÷±{÷î["Ärãܵk×_=7Þxcìö9V¯^Í–-[X³fM«š§Yà¬Y³†-[¶ÄBL»h¥÷ôô°eË–ºÏ]FýPNñ “|kò¸T*©êŸ(®€ Y·PýˆüÍW̰ëÖ­ã#ùHKïcz5£•ážË–µ¾Ð³„·ùB‘*ªØñêÕ«[~¾4­²ã³Î:‹ 64´cÍÒdãÆ\qÅmýŽne_¼fÍ®¸âŠªoThÒ† 8+U½®•´clœž0j4‹m|¬YšlܸqAŽ+‚šàÿèG|ôÉ'ñ¨Öu’:3" ¹î‡?T9àR¼ëƒ$“ÉÄ銤^­sÀ¼‡žúÑo%º¥xÃ9oPÊdZc IBCÍè†ÕîC´=­/¦+ÖS:Û¨¯¯oFãã– q;vì`||œo|ãU1Õ}}}ŒŒŒÄ%>{*V¯^ÍÖ­[Ûþ¥¢Y8ôôô°uëVÎ9眶ž§•v|Î9çLªº“'‹Ëý©m¾ï“Ïç1 ˲ˆ;Q õQýU‰ja?ÃŒÄúIœþù¬*‘¶Š¶”žníÎ —ö¶;n÷@£žÏƆÏ:ë,.¸à]MSEoooÛ…¸VöÅZˆÓ4bëÖ­\pÁmâÚ16nÖëH³4èäøx6v õÇÇš¥JXhããF㊰ªXãgœA&“aˆjM(Ç ½á‚@‰oÙ,g_x!;§2’ù—ã8ªaØ–¨ŸãR?Ç›ýøÏýç$—›P[|!‡ «½¬“s¹¢sÌÓ-舷{÷nzzz¸é¦›&®÷øø8@\uQ£é6æÃŽƒègíÑþx›ïûA@&“‰Å8 þ_<ᤲr+*é¤ñHª?/Ú•u-í¹ÖJ滿kdǺ/Ö,ô˜B³Ðv¬Y,èq…f10Ÿvœöˆû惲îèQ àö¨%¨“5¼aàyœxíkù—w¾“#©E$™?–Ëe\ר[P¯e”™,®5ÂC jiAÌG eŽÚ~UÏU\>q¹òb“zõD4ñŠ«¸LV5%´´ ´Èz´$Îktt”={öLz£}ß§··—n¸ë¯¿ž­[·âywÞyg§¯[£™Ä|ÚñwN?]u¨QX¤mÛ±+±`Yƒƒƒx(á¿Ö®UÈÛ_/´û™ª©íf*;Ö}±f! ÇšÅ€¶cÍbA+4‹ù´ãt¡†¿¾ï>®êïoúµ^•7¾¯ª¡ärP(ð7®K†q¿´p˜Édæ§C¨Íì Ä¹JÍö\ê¹RêõNÂ;¿öNFVOH‹$ÅÒÌDÁìŽZui‰·sçÎ)c¬o¼ñF8tè;wîÔ!§š®d>ìØÇg9®{ôQ>ó䓸‘Ëp¡P¨»úâ£D¸!f°b2CÚuÜvñÜsÏµíØ )¤¶SÙ±î‹5 =¦Ð,´k z\¡Y ̧ŸÑ{hÕ*ÞþêWÏ­ñ†–QÑà|ßÇq*•DùJ;u´ )íšž&™LžDѾ…šçr(¯´"*Ùù0‰@ s/^ù"ËV/KÎ!ÞlLr%\h“×´>óyz{{u^ Í‚g®v¬*èTX}w,‹b±WI­Gdáö7í÷|øá‡YuøpKA{¾¸ºÝkÚŽ5‹mǚŀ¶cÍb Uvìáq6fìHñèg°m.ǵ,õ P©Tâ9‹ëºóãý&ùÜÒ§Ê“li¡LŠ)Hž·¤àB5¹õ˜$ä­¿}=ëY_}ÞÊ+e‘F¼·¬XƒF£™š€€q`llŒb±ÈºuëbÞzžXªßY¨Á’¾ï·ÍÃìÕ-®žÚ‰L5F£Ñh4Í‚æl •- Á²šÖ‘¬if}" P*•ÚŸV'DMFÝšíâkQLýo£Ä¶ô¾.jR+ˆ—›CsTšÜo¢…8fž¸‹W?öX²1ªŠZKˆ ™ŸK5ÔN"_ íð2;|ø0+W®lK{[M» @h4F£Ñh4šîÂÇç$!ð·=4£ªÜœ8ù|,¶¥1MÇqœÏ J*奧5J +¨É«ŸÚ.B[@µW‰…ìÚ‘ø|¡…8fžyöÙg)•J\>1Áe—]6É¥8@õgµáõ ‰væZ;Üâ°TÁóZ_U{Úi4F£Ñh4Kƒå‘oÛ/_ÎeQ5Ö¦øÞ÷àŽ;À4Š7û¾ s-‹8Šª•N‹T)- ÂQC’ÐRP¢Zåù–¶U¨c­mîBùj!ZˆÓh所€GÇŽaY_|1kÖ¬™´Ÿƒê»æ³ÈK³ÞÁ¦I€çZª¼Þñ‚ ˜û4F£Ñh4M×ãã³øá–-¼µY¸0„/|Þû^(p'ެ±,+ö‚+ ‹Å¹7Ô%ñtKoË×l3Q™x±É©ÓâZ&:V­—[™ÄKN3 -Äi4óD@ÀùX¬·]vÙeUû„¨>p>½uM’ðMkÑqF£Ñh4ÍÒ `=F´¿ýâ‹›{¡ãÀÛÞk×âû~\HNõÓ^p-I©#¡¤il&‹s‰ç\%ªÕŠkV´­6§’‹ždNâ4šyd%û÷ïÇ4M6mÚĵ×^[õ|å 7Ÿ FÍßnçH*sår¹¶å‰k{UF£Ñh4FÓQBBLL¾ü8»>úQ®mNƒ C%Ä]w D·\.‡aäóùªÓ4ÉdZಢò»¥‘üniΩy\¢¾¸V`r¨kÉbŸ&F qÍ<ñ`?°êðḈÁ† âçÔ"ÄB-ÐPK;r®¼0ƒ¤§Í’Éd(•Js?F£Ñh4F£YrøøXX?~œëûúøÛfœ\úû¡P€³Ï”Ø&…†††ÚRü®n"ò"Ê+.]õ´^–z9ÞruŽ)¹å4uÑBœF3OÄg'z¨îó ¹Jj#ÚòÅ¡Yt)â7¹dæD CB\\Ê”;Ý|FÓ!¦²M«}5F£i>¾ª|:£ù04©â}ŽãAkrM‡$EÒ”PÞnéDáIxjz}:¶½¦ÙËÔ~ YÖéÌ'ic6 £aÅ‘t\v†ño&“! Ã8^Û4M<Ïò¬¶V‰Ô,öcaÔÉ&Þp•´É¢=a©…Bëeŵcc9r„õë×·ý¾hÚK™2…HzvR£€~ú)QÂÆ& ÀÅ¥@ ‹|”E6 ÀÁ!$ĦµÅ64ÍÜqpȵ¸ôPýôÓO!ú©ÅÃ#K–9,,ᱩPaë8À þùuÿLß²¾Nß*F£Y„¨y‹Iuú0ÑpÒKð²Í~e_#õZÙfO½å-¾¼9!çÏ:‹f_T'"'‚X«p‡\nߣ>Õn‚LÅ ÎD‰fR%Õ£~å@q§e, !Îu]|ßÇu] à C È5˲pÛ¶ñ–eáû~k>i_‡o›$êTû¥Ÿ ©/ì¤;ë“'O211Áæuëøàø8ûöíc||#Èd2‹RÜ|ÕÁƒüb¸~—ÎyÃehOqˆv„z®<~€sÎ9§½7E3gÊ”c¯5›̨7qq)RÄÄÄÁÁ¢LÀÆ&C†,Yll|| b/¸ˆe`P¢Œ×h4íB„0“ÆÏnô“âäµ&&A,¦)âáQ¡ ðN|¬ ú±£ä}üØCÎÄœtLéC<ÌÙgŸÍ«Þû^xãù:ð”eñ?öícÍE±jÕ*FW¬à…^”—ÑÚ±1V?Ϋ.½”³Ož`OÇçøñãœç@{øá‡9ç•Wè;v ÏóX{É%üäµ×ò×_füŒ3øÛßû=Îéí…Þ^Ö÷ôðÁgŸåŒçžÃ ŽôöòÒ~IJÇcųÏvÚ´f…z§}ÆÆÆªlЏÀP§¹€X»vm§›Ðò9]*äÉ3ÄA<ÑW}—€€¥Ø[¥B¥ÊC ’òµ°È‘ÃÄÄÂÂÄÄÀÀƦL™%ŠÒŸ$¦eˆWY…J,œ•)ãâ2Ìp•çàP¦L… yòXXq? ¡ä"±G+ m^Ü? 1D?ýñg?O>ÙBB bѯB%~NBÝ¥OYñ¨M‹x¿ö­_ƒM¾ÓF3Á<Œè±‘z^â Œè7=ÔY¨ä×,­ÉÈv‹D€+¤Î—ÖpÒÿ7C3Ëò¢ÿìžÿ[ÜR$U‚8Ì”»îº‹×®ˆ·,Ũ¬é7]Ü!10ƒê0Ô"Ú®ÄBÜž={ؽ{77Þx#}}}ŒŒŒ°cÇÆÇÇéíí¥\.Ó×·0\÷óù|,„¹®‹eY“TiÛ¶ãmâÍ6› ³; C<ÏÃ0 <Ïöm‚ȳK𗸛Ê, CljŸ»êw—Ë.» €ý{÷röÉ“\uÞy¬ºàö­^ $eºÓÜýš$*Ñÿ{÷îeÏýëW¬ˆÛ.b¡ˆ‚Áßý]ÜÞM©{%ל:…yØ7ª×­^Ï·ä½ ¿·—3~øCNիسf —ŽòÜyçñÈUWñÝ—_nÉyæ5%°èÙ¿ë½ï­Ú.–¹øüÿÚÉ':Ý„¦iIn‡„‹‹O‘"™ÈÏR&Âýôcbr€ñþ!á¤ýêaF?B&åÙ>¦Î§YʤÃ@ÅKL>WY² ¦–°<¼x[–,%JUŸ3ù,›˜)Ä¢YB6.”)“!ƒƒC… 66EŠ8щI†LÃ0Rñ boXéKJ”âþB„Aý-¬X¨Õ'Ô†ªK» ŒØ#Nĸ]OíÒBœF£™w$š(Q*gt‰ÄƒMÄ5“Dp³£ý%‚pˆdÎ'zJz­xºÍv®a5ø_S‰Ù\ÖR™Ôm‡f›`h¨E Ë.õÝ Ó_“!Êðl&çƒÓé¾ÛÆ2€ÑÑQvìØÁ7ÞHoTÝ£P(`Û67Þx#CCC ¾ùÍovº½SR.—ñ<0 ž·óN‚ç¹üÚ6RG2¦¦a¨j)$ž¢Êó[¶TU N»öÎÄéÕÚ²…nÙ2§k±r.î¶S ñW]À5^XõüŽì²Ü œwÏ= qâ&î2³÷±sRê#GŽtº M±T<áÄ»ÅÀÀÅ¥D ‡A«&õiN(é ®Í”dÉ’‰~€XЪ—QB´3db¯0 ‘”W˜x©I¹Ož|VãG?RÆû1‹tò™±¼ö3] Àf6S¢4e®8ÅÊ”ã~£ö±\[…J•ð&ýO3XX±ÇF£Ñ´?õW„4Íd›öJÁÌB…qº©ÿ§MÖ~©ã KcDÚøÀÉÿøÌ©˜ÊeÈd& qøÀ¸ùž›ù¿Cÿ·µ ªM^k46ª Cí:Yí)ÒF–±'ÜÎ;%ÌŽŽ²sçNzzz¸ñÆÙ½{7###]éW,ª•ZÏó¨T*ó’[L„ãZjQB²‘.:"íLÝ{5 “ûñXƒAÏþýñ6âÕg¹ÞsÒ†v‘!3kºnb¡Û±F#´s\1ß8¨9 x´‰ãE ¢¶ÅL;í8~ì‰'ê?™ÏO Iå\$ÎË—/Ÿ}.vIFŸ~¹ ñ@†-¿¥šYp:ÀöíÛ P(P(âÊ©×_=»ví⦛nšö`###ìÚµk’÷œçyÓ¾v¦È1 …BS9˜|TÞÍ(Wb8€òJªD‡@g8Z´ÎÕù\Ô"\Hò…Fb½Ãè×­y¾?õ|M1Þõë×sõÕW³iÓ¦Ù·KÜ_¥-µÙs õ츕}±‹K†L,(,†É²¦»˜Ï1E«ðI[\\úé'OžÍl&  ?úqpââEŠ“ò–yx 3 jõ>_âçàLß°&±¢Ÿz¤ÃZk¯¹]žf‹!Ü}!Ú±FSv+Ú‰ÞòÀºè×Géè¹`5ÿ+ E¸ÅL»ìxÚüÜR)µ&–ïûq¾û‘­³Í€¦†~ûhÚÎ2ùgçÎq±†4;w/®»víbdd¤n¹={öpÛm·µ¬ÑŽãÏç›ö„sHJAH<Út•¦–vÙñ¿âó¦ñ몶ÕVˆnFUŸY#ß'µ£‘,j©ÐA}˜DÕ–¤Ÿ^ta.I‚BÍëÍÔqŠ$.©(®ˆÕ|§­\¹’uÿo%œüMtŒaÔ¦µ¥H¢²gRmò¢¶”IB‰3/Eד‰ŽU$‰MÈÔ´_Ä.¥5²ãVöÅ’ô]£ió9¦h%Y²q…`‡a†` ‹räsŸÎ­6ÄP\ðDŠXXq.8ÁÀ˜$‚™˜dÈàà´¬@ŠÁ ƒuŸk$ÒU¨è°ô,T;Öhj™qE»QÃÅÚTCš¥G;í8 ÀÂâK/¾Xσ:žn–e18¨¾w¥ZxÓÔV}”J!Sá¢ÃR»„eÓíÐL8*‹xµÅÆÇÇéíí]>[á²ìº.h\$Bæó!ÊË­‹çÍš.¡]v|‚K=ƒ5óViaáÑ‚Jõá©tôª/£Ä® É'ý:$+-’8p‡¤ÒÉ J¤“ä’ýV*öˆ„|_I‰©×c`÷EûŽ‘(ëÄR¹¨ "z$_:2«'4:$¥º%NAÚ‰Ë÷c %šÑù<’ÊDrüzowïü—wÎýý™‚zvÜŽ¾X‡£jÚÅ|Ž)ZIzE\ÂQ‡ÂÀ`3›c1+O#ú‘…“tޏZ2dê~ÞDëägQWInÌBµc¦–ùW´Š5\“5cñt[¬ø¾ßò‚…Íây–eáû~¥†á¤}d{†˜¦ÙtT[+i·<þÜsô=:ùÉ0„Lªy¹<ãôZU”˜œ/ËeròñÚ)¡ ­ð×ÐÌXˆÛµk·ß~; rÆÝtÓMqxê\att46l1òzÞwÂóÏ?Ïž={zâ¥ã¨á£æù6zåc10>>ÎÈÈÏ>ûlGÎ?;~öÙgÙ³g[·nå!gŸ<É‹ ¡Ð€x$ÞmE”ß¾øñ$¾û›£×I–}ªÝýŠ©ýež*Ìô¸¡Þ‡ÕG ^ðàmÑöÚ. BcoµzÑMµûdHV‘ 5¯•‘œ¼.—zÎ#çÖEÏUH*¾ G¯wà¥5/ñòþ— /œÿoÀÙØð±cÇxâ‰'b;N3­ ¾fÑ2::ʾ}û˜˜˜˜÷sÏÆŽ'&&Ø·o===-×)ƹIÅ@yŒÖŠd&&V,¾‰à&BÜTáÝÄ.#õ£™9{öìá‰'žàرcózÞ¹Œ7nÜX•kN³´éäøx6v ÕããV#$ÈalÞïJ{qwhš&®ëa–eQ©T¦#×IA9Ïó0 Ã0‚ Ùär~Ã00M³jN.¥-aÆB[ºMrìlÛ&^ýêW311ÁØØü¿csŒŒLWL,[V_`L‰nAàûÊk¡X,ήèdžÉ¢›¤ßqIæ2µNx¢LkZÎÈÈÈŒÆÇË@ DvïÞÍM7ÝDooo,Ê•Ë3pl@__wÞygl¤[·neÛ¶mÓ6öîÝËÆ' q͈pŠ:ˆö¼\,LLL°wïÞºáóÁlìx||œ½{÷rùÖËY‹ÉsÏ=ב¶7DÜE+u¶K¶Ú1T‡^ ÙT¨îÔK$žnâÙ–~^<å„™:nX¨/ •lïÔûJxl«É5øßbr'#ݦƒ©{ðäè“|ÿO¿Ï]Á]ü*¿Ú††6f66|ìØ1>ÌÞ½{«ÌŽö†[Â:tˆ}ûöuäܳ±câÖ¬YÓ!ÎÁ‰ó¢('‹ iëX‡…5)\ÓŠ?7 ñósý,åÈé³dïÞ½>|xÞ…¸¹Œ-Äib:9>žC2>nµ磆`6 gèû>¦iÆ¢Lú±ëª$ʆaÄo–eÅB–o"–‹E<Ï‹Å-Ó4cÏ4×u1 Ïóâmµû¤ÿÃLä½%m³,‹ááaÂ0IJ¬*o6ضŸoß¾}Üÿý>|xÞ߃¹ŒÓBœ,P?·|ù´ç Ã0.ÈÉÌ"µƒÌ/ŠT;H”‘|³h"B\³,5±m;vÃìééáúë[S6·§§§JLkfðpÞyç54þé\o#Ç“†%¦5 “ÞÞ^vîÜÉÈÈHGÎ?;¾ð ٹs'k1¹çž{¸y¶UpÚ)PyÕy× Ýñ)Çä‘Í|¤’s®ìÔÍšé/½šN¨··—÷~î½Ü³ãžyoÖlløÜsÏŲ¬Iýq‘bW†£i/½ùA&O" Ì'³±ã .¸€-[¶´dÒ'Þk’Ë--ºIJ€oR®µ¹Øs­ÐÂÑñb(hÐ)vîÜÉ®]»8÷Üsçõ¼­k–.ÏÆŽ!· É^"Å!»et""SÚ;Ê÷ý8L³\.Ç¢Z.—‹÷u]˲8pà@SçI :Åb‘r¹L&“‰4Ã0bqNÎ#âOZL ‚ ö†k†Ù pBz>/×°cÇŽv½ ™Ëø8Âbc³úɃM]{ZP1’~Gæp6ÕŽTñ»:)þ<²}ûvz{{›Ç¡©ilÅŠ±pûí·³wïÞ8âž={æ´’çy^CåØC‰pChNÓZæjÇ+1Ø¿?öÍ7Ï_£‹$Åj“y RÁ&)e- ëåëÖ++ –VöÅVWNþµ·øiõ˜¢Y<¯Š4¤Ä¶b± pâ•8+$UÒ ¥‘"{’:HÓuÄBÜÈÈ»víªz²öq3nÄâ+ôôôpçwÆ¥‚`hÇqVÉ£B¤»T;Ð,0ZiÇ>>GPyü›Æó”°æ8J\JeãÌd”çûªSÏd1îqàSB\5BùºlCàÁ7>ÎîÝ»abb‚¾¾>úúúf—@° Ôsû•Ü„ÚBÓÍ;ãŒéÝÖóy%F5»2"%‚+¨BºS~4µŸä˜nUÄ4apPyàͤ͵"$e©ß0LòÔ¹®ú›¾fÓTŸi&Þwa¨®[ŽQ{|? Á C¬?ÿs¨ ™×Ì/º ‚f)áãÇBY­Çe¥Îw6‚\ZxëÆ"(FÓ HN¸fœ/\×Åq,ËbhhH{±iæ…ÿ8y’Ìå—'D‰=¶4¤¹ˆ*¸§Y0TåˆÛ±ckÖ¬a`@ÕŸç–[na÷îÝ”J¥Žæ¯*6UÛPâ¯{5ÝÎØ™gN¿S3 VÔH¤@’”S¶¹Ño XI"ÊåH¼ãšÑÔÅ“­0MÖ\€íëU66ŒªD¥ñSnŠÀ‚š6ÓµIÓvíy¦Yô)ÆDpK{À5B‡Gk4KÉ™ÚìwbH¿&$$C†€`’l@i¶OùÑE?êôíh;Y¦w¾ð<|>eY …®q&Ñ,n<< ö?ÎG¯¼2õ„W×#.—›A)Ó¤ÀžPŒ§·é¡ù‚c(ÁmÇŽÜpà ÜxãU;ìܹ“;vpË-·t´¢S†“ÂRËè܃š…ÁÆãÇ[w0‡Dˆ« |ô‡H’f”€×'£ýSòijÏît 4h±A³¸ qp°°È¤:ÐéBFuH©FÓ½Ô¼‘%KŽ\•ð^¦<©K‘" rCØØøøXXøø(``P¦Œ‡GHˆ‰Iý88ñþ*˜˜äÉÇb]ž|\F¶U¨LZ(SæÀÕÍk.L’|àä‹ p]×uÔÅ4óŽ‰É gU­WÔ¦î‰ð}¿y-G¿ƒL=§Ó¢È‚cÀîÝ»éëë›$ª$R.—Ù¶m£££ôööv¤¡R&Zˆ2Féüƒš®G ž¶¶æ`9Ôªˆ‡êŒÓ€ J¤XKóŽ Pá¡¥R÷jذAýýÂà+_Qÿg2“½êdÑ@Âg C} µ¯ ¢ÂZëûê5¦™äw¨ v‚+÷L^+çÌå¦÷*\€ÈäB£Y¬øø˜˜Q´êümCSÄ}h!N£™?üè'W#Í 0@†LÕö"Å*q]R,¤s<  "‚ Š «6'×\ÜØ«M^“'OB•–!ƒ‡G‰^üýR¦L… ..›ÙŒ‰‰Íp|™^°°(Sß!"$dã¾ðÎN¿+í!röi¤3”ËeÇÁ¶í¸ò©F3ßV¿øbõF߯ñ”Î[ص® Zt[,–:U¥ÞÞ^¶nÝÊ¡C‡:&ÄAO]Fé3pêÔh:Æqଗ^ì©~BŠÌÄ=TG0¹6™]Çlšª-i¡ª[ùÀÔßûïW˜䚫ŶÕ5YV"†M•D E 6ß–\N zRж“Ý.hÎtòzf1âãWy¶h4šî#OPÞ`Fìefaáà```a‘'Á0ø¸ñçÛÅ% $Œ½Ú,¬¸ ‹ƒS%°»¸äÉ“#G†Ì¤â*²M(S&Ož •XL Œ8'd2¸¸U¢ÑÔ—ÏÚck;ý–´q¼h4¼Íç•èðjܦ½â<˶nÝŠçy w™Ök®Ý„aˆiš±#ž jU.Èâ­ÕÌê]€ 9µQa©™èq«;áâÅeœ<‰±vm§›Ñ<®›x?F\±jU§[5+BBí ׬Ʀx~3*¤xÓÖ~Ô’ÊÊõð¢ç—ˆÆ$ÝÊ KA銧šÅˆ„gJñ’òä b»/SÆÄd3›ãצ…8°ÒÂü/¢w@@?ýŒ1FH dämK‡•æÈ5ù 0£ŸéÈD?Í0ÓïÀVŠó’+®Ù¶.tdÔT;ç ‚ß÷)—Ë 7SCU£i/‡ xéùç›Ú×ó<<Ï›Ò+®.J™–ù fÁ³ À¶mvïÞÍ-·ÜÂÎ;«ª£îÙ³‡]»vqã7v´jj…Í §šÅIàL¨ÎYQ,6¿Jb‘øæË¢Ÿ™ÚÖ H.3¾õVÌ…$ĉç£e)1. y,í¶¾€Ð¡z³@„ôÚ»O"ˆ¹¨Ï´ ¬r5û9(^„´LÍñCT }Eí­_üaªEµ0uî€É‚›»µuÎ}üØëFWÖh $d˜a|ü¸¢g… eÊØØUáŸé} ¼h°2À@\Ä -¼IXiŽ&& ÄÞlcŒ‘#‡‹çs³±§¹r‹0ˆ¡µ4ò’[è”™üu†!ýýýär9u>8MWp˜€Ó_\ÝTÞ7Û¶g–.•¡=QQšŽ° ’‚ ·Ür ?õS?ÅÖ­*±ü¡C‡åÆodçÎk¤xÚ/èá±f!1˜‡Áùç« ’à¿ÙFƒÉFßê\³pîæØ\}u§›Ñåru˜lÔ[±¢Ó-› Ò#NVçòÅ!â”ý5<ïFçIÏ =’PIp: ô“bIØyÚ «?z]ˆÊV-Þ°P=;ÉEÛ£ãdHÄ3+µ¯$WHÇNEçrI„:#:¯A""†@(ÀŠ-+xÝ?¿:74hEŠqÒ÷"Å9…Ñi4‹ )``ààP ‹`Y²U!ª’ M¼¶BÂXèvq©P‰E²€€,Ù8œuA<<\\ ª€ŠgjmŽ5MÂbâÔWV­_²ëºär¹™{i4mä°îÅ+¦Ý7 Ã)äI)`Ò ∡=â§Ë?½½½ÜvÛmÜyçôõõÅUT¿ùÍovT„ƒêŠ©ZÖ,4ã³òøñdƒï7' D=Ã/Nÿ²EO©´pò²-²r ÒSH<ÅfÑê³("”Só¼m÷H<Δ°å“Vo¶*T4µK>Óò¼z" –£}‡¢çDl+Fç,£f*J¬+DÇ)FË$6);7Hâ^žþŒöËEËQ;Ôë¢ãçà…Þxð'œ÷·³”)ÇÕuž7ÍR'Ož,Y€X Î‘‹½ßräâÏH† cŒQ¡Ò04[¼×†ªá@…m3‡®š˜äÈMYX3™ÅØgÉúU-ŽãÉ,B·lÍ‚æÉÚ Ù¬šïÕP,ÉçóÁ  ‰j ú²dR…,v&k®˲ðýÎȯ¢§#y4Š 0MÏó°mß÷•»«Lè$dI\Yèw€äƒ½8@2Ì &qâIá’x_ù©ÿ§òq˜\Ù3DMN¥sÈäoÙ<ɤ•ÔyPç¸úÀñ„Jq:›y6mR\·94õÞ‰ W@3­ˆÛå´£bªëºíTO7Ÿ+’x˜Öû’‘jÄÒ:;Hß”!Ý2¨~¬BRYHnY):¾›:;n.}«…ĤbN…è¸vt®0:ŸGÒ?ÛQ[²Ñc‹$㵕:¿|Ò>!Oâ¹—!qK•Ùµï-›Oj“"KRwf©áàÄŸŸu¬ÃÀˆ=Òš-&0‹1\t>‘qKªzc IÍd23 ëÓhæ‰ÇŽ%|Rîm‰ð›“7§,Ðj§ýEÁés?Dûq©^®€,ò0óO†Œ~s4ñÀÈÂñO' Cö~~/žçqëÎ[9üÅÃxžÇÑß8ÊÓxÏóø§Ÿû'¼¼Ç>øÈÂÈ#ê8|ù³_æø¿'[ÎrðàAîyã=<¿óyö~~//üþ |ïߎþÆÑ$wPõ­8@"² D³$#.ñybo ™(‚šÌɱÒj¾xÈDTÄ'5QŽÎuÿæû;ýŽÍ˜—0“• h®@©û)+5¶¢ÑÌ7­)• X¹\Æ÷}²ÙlÕóŽãÌì€Óí.Þa@*‡x‚O2 ‘~ÈOWB=Eø$)n` >£…híDpÏ¢:Ä|ˆDT“ñX­Ð&ÇfrzŽDT;jz¿t¦šû6êŽ*Ñs•T»+,º/a±iŠªYj¸¸€à6³™2åXx³°°£a˜áEéyµ¨ç‘¾…9Yßk Ãb±H&“Ñ!©š®å™GUÿ„aݹžaäf³@?Póxñ}ä—, Bˆ î¸bRŽêG‘X*—ËxžÇ=ï»'ñx€/½ûK\òžKðŠzÜóW÷”Žþ·£¾÷0ží ‡Î?ĉ·œàÅ _NþÁI5ù"ññP‚M’ð[T}ñH‘ oÚë$OâY—eA²eÖe©PÅJ“•øl’‰~<éôÕh–2s Ku]Ïó(—ËA@±X¤\.cš&Žã000€çy±Ë¾,J¬[·.®0L®ðíV?¬u÷ó!¯|åõÙ)DÏK)ðä“Oòà胜øÈ 0àø‡sô/Æ¢÷ÉûNªbA6P‚Ð †%nÙpò“É9å *u›^yä‚0Z`ñÕKºRý­ªÍé}æºð/}êÌßìäõ¹Ô¶E¨W /äZ£™>>yò¸¸”£AhB^Z¢D%úÑt'!!ÑÝÒ¤Á'ˆjÎd†á4]ÍñãÇÕ\¯Ž7¨ñ«Ÿœµã®AÐEÚËæ~ˆöÆ•Wv½ÍÉ„*cfÔ·H vݸ‹ž«z92BÉ(qÇw`›6Wí½ »`sô«Gyú?ÍDoo/7ss\ž8C<8ü–ìڼŠÕg®fù/,熳n€OÀ&g¼‚š$åQÂWÖVÖ²Ö^ ¢÷ÝÚ¼×~/X°ö?­6mÚï{Ȇ]j_Þ x÷xï‚<Üš»,xô¦G9õÎS<;þ,ëÿa=~Ÿ³ùþËßgÕ«0/4yùŒ—ùŸWþOv™»p‡Óât.¿ürö…ûÈY9\×Å|JÃÑ»i“Tà :¬ÊŠ®I<Â|8÷й~›gMœ˜³Ù OâÑ"y›tl¶¦« Ãr¹–ƒ ```€B¡@>ŸÇŽòåIŽÓ4±,‹b±ˆmÛ `Û6ŽãP.—Éd2†A6›·»®ËÐÐêk3žày^ô_IJ,Nž<‡®ˆèõÿûÞz/þä‹Üý¶»yêwžâCç}ˆo<ÿ î.ßÍ7F¿Á¶¶qï ÷ò¾×¼†o ÷¡^v_¶›¼ê\úì¥ô÷÷cF<€2 C}/׿Žog¿mÛêù>ÅÈM¸H‘92dª¼Ý´ç[÷"i=<,¬8ŸÞvtºi-ÃE9ƒñ÷þððð\©Ñ´•„¬äl5×óýºs=Ïó0Msæ•~k‡wz¸²hX0::Ê¡C‡:Ý–) /PÄ¢Ð5!$Aðâo¿È7_\ðgà¬txökÏò¯|€£ù£lß»ÞÑ^‚l6‹eY¼0òïzô]àÂÚkyð‹RÚ Sú™!N²½¡°A}èCyiH5“¤ú„„¢mµ9$§Pz›úp©ÿkI-†^|ËÅêŸ7È!UlÔu¿wÚp£ú³ë§v(÷ÛœºOÙ¦-ËS…¨™¦I.—S“æROÔc i[Ôî§w<Ýé·}VÌ*¿bZt“TÚYÓÅxž‡ëº”J¥x[¼ßã4žç‘Ëåbý\.L|ß÷7 #.Ôc¾ïcÛv,ÌyžÇГC|æW?CþÌ<‡ÿð0~ÿ‡é=ÒK¹¿ÌÚ¿XËÉ·ŸäO7ý)7¼‰®ú!ò;²ÿoW︚?ÞóÇp‚Çðá;Öw „#=‚±ÓàÒ—rüøqÿj0ΓZoµÝßçé¨Láõ†!Ùl–0 ) ”J¥X€3M3ÉóIµ÷_† áº.¦©ÂݳÙlÜ&ù›®2®™)Ò …¸¥AúóµTqp01±° çœ÷M3¿(%KHW–]LdQSù–u‡ááᙋÍ>ÆŒWüÆ·ŽóùퟧPPƒÃ0XùôJ>ºë£¬X¿‚ÒŸ–( —Œ€ûŽÜÇ|‚íÛ¶«0(>½íÓô=‚¼nÛë‘%-¤¥«è‰#^b >€R-/]à Þ¢‘ÙàÿyÖN åqz*E'jÿ_,Ìjàï F#iu`æ‡ÑhZA½üp¾ïS.—co,ß÷ ‚×u)—£p§BÓ4Éd2d2<Ï‹5!=À¶,‹¡¡T……Í s yeYÉgj3Ü|æÍÜ<|3øäñOÂ&âþpùòåÜzû­ÐŸü¹OªíŸ†uåujQc¹œXëo_ÏzÖÃ-Ðãõp¾u~|ÞzTµgêM&Òý]ú8r½é{•~^<åäžKHïàà`왨'/šN’þއĖÅã¨úî÷>ÎîÝ»™˜˜`Ë–-lݺuFÇ´,‹ûq±±qp(Q"K–9²d)P H1Þ^ çLðñ °±ñð c: “€3~Nþœ=|6Z÷! Ã`Å/­À~«Í=oº‡{ü߸ö{×bž0¹~çõ¬]ÏɳO²Ü]ŽY21ÿ¦ZÖ)çGëËE"œƒú–‘jx:Eyˆ0I*ùAãXð´çØDà2È/qAïÔêu«ì¸j†Í‡¥ú$µÛÓÂj·ÇgkºŠZ;žK_, i$—™çyqÑÛ¶)‹T*‚ ˆ.Û¶«„¥i©³à_Õ'˜¨ ¸Õg¦«‡ 9’ìÏÒ¤j€Ó›®^q”~3 Còù|ì)W,ñ}?î?ƒ X°^@íS¤ –àhVB›}ßÚk'ºòÙ¶, Çqâœéç-Ë¢P(ÄáÑâ*•îEèâ0ëtq–r¹Û§nAÄ¡çÒ>Ó4ã|’âA‡‚CÕþ"ºÉyD «T*‹E8Wšw]—J¥Â÷¿ÿý¶ßóvÛñt¸¸qåR#úÑ,L:åÉØÊqE-µ…ËÓ¢¼FÓJZmÇãÀþýûaÓ&%ÄÕ ¹Ž§õŠ+¿¬Jm«=œ…NU´Hhiޏ‘‘víÚEoooÕ`£X,ÒÛÛËÖ­[)‹ÜtÓM3îXWPˆÂR °±±°È‘ÃÂR!XñSŽ&& #{ÑIø‰OHˆ‹»ç›˜xxœùÀ™ì?¾Ë´¸sù¶ŸÄ8ÝǦ+•Ñ&˜†aÄŸ×uÉf³ …xàW.—9uêTg.t´sL!„„‹",5Ý禰ŠÅ"…B0 q]7N ‘ö4Ù¼ysܯ†aH.—‹=WE(✎AËåp'þ¼çr¹*Ñ7-’A€mÛUâ_.—«ò„«m»¼Ÿâá&mê¾×i¿QeÅtÅôþ###m}oæÃާ"O‡AãPlf6´c\‘Æ#æJ…u] AÓjÚeÇëî¿»\†Âd‘¼éEh%Äi–ËöìÙ0§UŒ]»v122ÂøøxÕö‘‘:Äm·Ý$Šs³ÆíyÇÖ1†‡M%0K…³Ú¿‚L%‰iŽêrÁ²¿íÛx¡JžøÕÏ}•Ü{rØž Í"½;{©Øî~ònFOŒ²éO7q.ç*±LêkÛ¨‰_:o›)âé&!¤0ÖÆwtašf<ˆ—$å¯~õ«ÛzÎVÛñsÀY/_…i®TÉ;§'¤Zl®Ý$|Xâ‹Er¹aÆù¿Â0Äqœx• 2 #öH°m;öp0; [Jçãï‰ ¸g‚¹¼‹!û4„çBù.ð.TÇsµçÀ7Î1KêØ9+J÷àƒð쳘o~sÜNq– ï½cx˜påJŒ•+ Ÿ|ãøqü+®ÀøñÇëëà êÖÇDZþëU Ã$ïcà÷õ^pF¹L@ê«¡\Ç?l› ǰü¹çªrGú€jËR¿†¿?VÚ£&Ú>¶mS.—ã°Á0 éïïCÿQ,«‹eÔa÷îÝ|ík_cݺus´Ê™3;~øá‡¹ï¾ûxãßX•WòÃÎA*oL>ŸC8óù|ÜÏJ¿lš&¶mÇ!Ÿ¦i²yóf6oÞcxxxÊ"•J¥JàÓLfÇŽ<þøã¬_¿¾­9Új™ËØØ¶m¶oß>å¾>>^äJÑ)±Y3?tr|<;5>Þ±cG•ÃG:kO3ßCšÅÅîÝ»ñvïÞ?™Qxá+Ÿàì7´ÄóÙñur–Õ<Ï«òf«7y[HÓ4ã¤ÛÒî´»·L>ET tI:.žt2ðK‹õêµX˜« UIíGŽaÍ›Þ?–$ËIÑÒô_©¡0DÔÅ>÷ÅÓO'÷åß§R ÇÀüÙ·aó>J;ÕŽ…ÿRσuÀ“ùÁe˜œñ£¯Ã2ë‡9Ø™8!ß91Áæ&n¸ö÷SxùexÛÛ”vöÙäHÖRŠ—_޹bEœMÀ¹òJÌÔóÒî¸Rüé§ãŸ>DÛLÀíéaÎ8ÿŒ3âõñþùÒ̳ÎÂýƒ?`âXsÛmxúÿ‡¤žOðV­Âܸ‘þ} 3ºŸÃ€û¾÷aÙ+¯$×6öóÏc~õ«-‹ðÕ¯&sêÙõëyýE1üîwsáSOñ›6À©SüÒø8=À•/¾Èž¾>.âßú-òÀ__s Ïård‘ßÿ}øÆ7:d­i…§™kÈžëºq_!¹Ô|ß§\Vþžâ½+ÅË#]Ù³R©Ä}Y3‹!†atEø°fö´ÚŽÓxx¸¸qHªKÕ´‹VÚqœ6•çR£™ZaÇ˰¸ÆŠE¨ A•|ÚM‰qàR•ßÓ(JÐQÚ‹–Xˆ×fùÿСCñc!­ú6K___œ±§§'^o–«NpüŒ3æt‘aòÐß>Äkn~ |îÿûyí¯åmO¾óG¹ب™TäKŽô˜wI3W;^wÚiIhZ³4°9™ä‰pdš&¥R).˜°nó:öoÚ½Nµoºv.¥<µX ;àŠÅ"†aÄ÷f1LzçjÃiþñÑG½øb>ü®w±Ä# á­D”/î3ŸßüMJ† X?ü!ÃïxÆ¡ ˜6¬ ÈL\ûIlÿ' ðª‚:X?X·¡Ô¼Èå+RçÿÄ«ùÙ°vmU»kýƒ*+VT=®ÍÞaÕù¿öé” vƒ×VqÚi¼Î0_»–Â=÷P¸æšªi²:O½ó×¶1˜«Wà 7¨´¥wßù‰O(qúg~ïøèÇ?ΫN?MïxŸØ¹“¾w¿› æ›ofÍÃ^}5¿õ7Ãg ƒçŸžûúây •»ÏŠÚåëW¯æXÍý›ZeÇÞ¤BN3AªÒ:Ž÷%éâR|F* §…}˲^Tbÿ´ˆ{i£KHŠZÍ5~k”†'@)؃՛W[¸v\‡,,-ÂiÚJ;ìXª,w’yO[:_HˆŸU™˜UÛjññ11 êzy‡Qç›>†oŸ r.é[Bªmržé<ÎÅ+=}ŒÿxËÌÛ}ZaÇÇ㋚ü>I: ¦È%÷ÆvíÆßgšEÁ2P±Ð###U%Ü7nÜØ’’î½½½Üpà \ýõlݺÏó¸óÎ;›~ýÖüˆMÏ¿©j²5S$añ¦6a&G3GY±vçŸ~2 “ÏMæ0.×,RæjÇëî¿Þð†éw”B \’%S&vÅb1.šP*•TÈ%^\-X3=†aP*•Èf³ ÆØ»%]$Br7-4æjÃiþ=Êq¶råÊØNBM+Ï='O’ó<ŒC‡°¿÷=ì矇RIH]uäÆ&+M°ê‰rž¤„šú+Ÿ“Ul=:}£§Ã0ãLÓÄÞ¿ãškêîÖ¬ôkÖ¾ægRž„ù<˜&ëxð›ßT^³wܹs'oºþzz׬áÁdðþû)îÝ‹á8l^¿r9Ê$À…~Ô[R|ÝëøÕäri•;8ñ„b¶äóù8¼Ýó<Êårìe[(¦ÍI¹èD¸ÍLŽpHòéf¡Ê%µIeOñ<4£YÑ9¦âÜèoª–gÍûbk+ûãÉ—ÌI`Ö´ S“…¼© MIøúTϧÇrlý;µXØ*;Ny=Ïki>_{BBLL\ܸïO‹W"L¹‘G†ˆÙ>~,–‰—i† yò€Ëäµi+ýý"ºåõS‰}D­t•ïÚ×Ê~Þ!5räZÍè'‡‡MHß›t‡…X8K·%Ý6éV%?¬~3Wg”çŒ@Ørû¾Û[öÞ7K+ìxpöÉ“°|ù¤çšN!‰•å-±QãàÚ!h‘² ’¸ìVP/>úÆod``€C‡±sçι{>³î(›V­jzÿ4â!T*•°^õ©Oñ§o~3.jòHÛïo«ÇñqSžµcßFHn6HV¸K¥…Bb±ˆã8„aH&“Ybý´d™äMVE€ËÒß_ÕnžyÔØ«”z>=3§9þtÚ¥$¡¬w<‰•wRm©À?oûg¶¬ÙÒÖ[×.;Ö4Gº–ϰ„”;ŽC©TŠC"Eôò}Ã0âðsIó!E¯ wIþMÉ? Ä…²ÒûRtJÚ!E´€ªbZÒfÃ0ªª Ë󒞤X,rÎ9çÌ˽lǸ"KD Í,R„„)ÖÁD8ƒjÁJD%;Âlìøû@Òo(ÄB”<–ãøøñBŽÍà”býv·ÕSµf¡aFx`¶z3Ro³mUŸYXÉø- ƒ0‰ÆyƒÑkË`Ú¦4ô“ä. S¯©µ»»ŸÞÍ|Ðj;>uê—OL@ÿ¤ç¦á«¥îQŠìäúûL@¨lO‹a-³aޏ={öLÚ6—ŠR½½½MWÓI³|ü\^{þ세L&Ãø£b›¦úÐçH’þبþÆÎôÔÄÅ÷•eÁbDkæÌLíx"Œ]}õÌBS£]Ó!O¦iƜΟÑzš¹§éв@œï©é²ä]ÀlûbŸw‘!Ž=Jï /à­XA5N$gýŸÿÓàæ‘ ìêÅWJ%‰ƒ¢ë\òlj'jâjÒ “4Ù&Þ«2±°,+.²ââ‹ù¡mc‹uÏ3U¡„cšôš&¥mÛõ½(‚Qz’˜™ð<6 ±n1Dõ`²÷…íà{1[;†ÉžÕÚP#úûû« Á¤½Þ¤è‹çy ¦/˜1R¤ÞàW&éÉYucs$¹w¥l²Ä®›$ÂX)ÚOÊ/Ká"7:†¼IR¶9ŒöéÆHª”8$’™:w4Ñ«Šóö€³:wKçbǵxx CÓ žçÅ☤š2ùn–ñRºO“¢Ti+]©xóæÍU¯u'þ¬Ë‚œˆpéÊí²°ašfܤ?ÿÒW¤ÓbÈù¸sÓår¹ŽV…Ÿ«Ë7ä>Žï_$¨‰–{d•)“AyT‰°æáÅšD†4ãõl×qÃ50ê¾ÖÂÂÄÄÛÕg­¥"ÜIŸ&•šBT?’ô¥²šæ¥þ¯ôÇɼXÄ2uóÔ¯½V¼šK©íYTÿm’,œŠ´H’E×dAV蒯̹ØñK/½ÄÙgŸ]WÊçóÍU6Ô=ŠíÎ&/pš¼Ìº·—O&úQ#“h™Pˤ{xx¸êž/ä°u ©,R¢þ K1e’VFMLÔ¤Ê$ñè–É\Vn“½å’„ƒ6‰øJ  HÜÓ)<’ðõ€$y¡C"ÔYÑs//ØTOh$ ¶:ò‚ÄÃ#Ož€`Æ:Ý€äeM{…‰·—ôϵ iAM<ØÄ«M’£Ëws¥R‰Ÿ›*t:fúù®õª‡ÉÞ^­Q7M“WG¿"QÊ×…Ì,YBB2d01±°(SÆÂbˆ¡IãÝ9úéo(¢µ kÊÐ9㓌ä±M²p!Ii $} ,zD9uã™ôÓÒ/ zŒW£BU†’óE¢PŒZðÌš_£ÎsÉ [bR#Ä£òµ "øšá`ò}µ…ڭȶHÈ"sØv"=Ô®‹•­6oVÓüz¢•_Î%¢U$’ƒi*)& «å‘;¤ËÎåª%Ù§²]$ylÕÒE#I¥ömK¯±zžj«xæI7^*Áž=ûØ»÷©¦Þe£££‹E¶oßN___üäm·ÝÆøø8Åb±jû|0ñÐ*¸lŠŠEu'*‰¼þ†¿ÿ{ø—á®·¾•kïyþ¯ oФËÐL:œ€ Û†«™É(+4MXùPõs"¾‰Å:n¹¬[Öd!.mb iq­^ìx>_-º iKª]u™Ø²Ôë!±.Óç ‚d¿F÷uppòõ¤?¹a˜ì+VhÛõÛæO5Ýa²ÅËõˆXiš¼û™gXh<÷ÜsÍí(!:2ÐÌd234j!nþÉårø¾‡¬¦Þ‹­új<{é¥*ÅÛÁƒO<ÁØC5^€È¢~é°²ÍTW7I7T¯¢6¤ ÏÈ]wÝEx,¬• …–eá8N\5·\.cÛvü^ÉëmÛŽßGÓ4ã "L®h™~Ÿ›Î˜¾ÏoÿÎï°oíZ̨ éI¥T5I«Dßo"Ø‹ÅØû ˆ'šõÂ`'aY\¿~=¯?ã .»î:[¶Œ:v,ùÒÕ;Ã0Œ¶å§mÛäóyúûûV‹U2‰ž‰èØØØq¤e-Ò‹—„§ßGº¢¨TáÕŸW )%ÚGÄ8›dµ^Ä2‹dÂ0Q]¼ã ™|Ö–!žÊÑËœáöE†xdT˜ÅØq¸®Ä£Ö4MâKƒ8ªÝóxÝ ¯c3› bhV¡Ÿ]#ÂI±©tsB’1ô¯j"í½[!éWk=Íf+,žõà®ã0pæ+›€''…¦J%õfRV„÷4Ø÷“Wrп”b”9+y‹yž«$›V6[í”›„D˜Kù³LÌêQÛÔz¢SÚ/¨U4òL[Ì,™äñ–Cíéé‰KûÎgxª‡Çé'Ï®îRÅgQ„¥:_Ôÿð—ÿÀÇþûÇ0·™p„dUw‘¯†NÉ"±ìc+æP>·C¬9y+½´P‹$±!ÈX¦¯ëèÂE¼šä=”¿¾ïó /tºy3ÆÇgÓóÏ3~é¥l:zT GyÍ6Â$ñ¶‘°0Éb ž2Qo`ê’WMªÊ¿&–aüÃiÿÀ]ýcXW'á@–eÅïE½Éž¬áÈ  ¸Ã۶;pÏ~ÖŒWÅ™Ù4“5‚b±ú±¸Ö‹«zzÅP7ï=x°¾'t“Ô m’(å\×¥R©`Ûv,¤AJØ4MÕ†0dS?ñ±Å›L°%\V< IÕ¸dN”¶Œ âäìš*•ª»™ÅJž¹ΤÉâ'ü‹.^rö¤½,$¬3L»H29L{XÈkæªÿLU-UО [žN>’“­R©P.—'åiKçZ³, Û¶«¥jéd.3M÷!ÝÍ]Gîâ[¿ö-îâ®9Ûò|äIœ2GœGRZJËg¢‹•ƒT÷y²8º¯[`N;q¢îsͤª‘µ`2ï‚'ý¼Ñ<Îç‡]¯P¨öƒ±¬É¢Z½aå_7]ô,X³fͤ'n»í¶ªÇãããóÞ¸'NðÒª·Lñóy%ÏÖ|ÑËdác§>Våa„KõJƒF3O< ,;uªq&IP_È9õ+'‹!üKS×u;ÒŸÎ ‹‰e*­è©µk«Cѧ~a’×I&ý²½Ì¤g6›åúë?Á¶m*‡Y©TŠ*WøÌg~•?þã›y×»®# ¯â«_= ÀÚµkÉd`­µ–«¯¾ÇQ¡8Õú¾úÚCõ›ÖãduQÖx@ÜóMþõ_ϧ¯ï|>úQµ]ôôôºF:ak|ÉVuŽ4žÞ¦/WEƒHö‚RI½¶\NªBÕ".ûÕéB ÊœžMl»Äk_ûaî¿ÿ0Ï=÷_ü,aòå/ïåÎ;ï⡇¾/ ÌB?økȪ ”UŸúûû«ò0-t$¡½GýˆÉ5%U »vAÄCy]Ô†~Ñö\ô7ƒÊÄãMò³IÄ4ÅÓƒvùœˆ·Z퀾Ö[CÓQZ]‰Ñu]ÊÑ*„4°m;ÎY(âFŒ_ºR°Öt-ªùË7ÿ uô¯°VÏM…š¸ÃÀ†)?å ' $E¯ã!àÓ!‘)_ä6Ñÿí—•Àt\  Lj~ÒEÂé<õÛ¿=é}H/V¤‘¥ÏBåã`þð_¡hý66óŸL3¿,èëëc÷îÝuCSAyÌÕóšk7GŽaÅŠ¯îæ CyÁÕ™ýA Bù CÉ`SVtÿ¤ékNž¦ðä“å@Så>YH85wt¨®¢V*•رcG§›7+Nœ8Á™Ï?¯oꄎOB*(ŠvSr¾8Œ |vÿÓüãÀ¹Ï> äËI¿˜Ž„D¡©`”S¯‘~Ò0TV,VtÒ‰¾DÀƒdÅQ2ä×Þ Hg×—ç £~ê#)])÷Râ%£ýððä{›IÝY%]àÑ[µã…0„‹.:É—¾ôÛ¶õVm÷î‚ÂåÑë¾’º=ÚrI± W€Í €IDAT`ûöíª:Z¡ÀoýÖoU•îõ<[n¹…o¼qÞóÃíïÙÏÄY·N~¢Á®j²$yO$Ü¡K´5‹Ÿ ƒìSïdçƒ[èù—4 \LÞO>ù$›×¬ag“åÝÃçÀýaän9Xäß<`P¹Nã‡ìÀq.¹d U*7b«ð¼ñÈ3KgJ„«Í2õ»‹=ТÄt•¾VbY`]ú¼Ãoƒ°rÒ>ÍäÉkN?>uªzŸlö·¸÷ÞêÊ Èf³±gËÐPƒy=Ê~ŽñsÚÂæÍ›ãío¿‘W½êgZzoæ‹#ÄÙªRæóy|ßgxx8.ìÑÒþX&(éÁº¸bJ¢€oð‚îÀ‡Ô¦ŒÆÎóà!x3àO Ø€iI¼1ŒÍp® o0à.¼ ¸Ž:ŸURêÚr> “ ›´Ñ² èW—GúûÕ«¿_=Îd’ga` z’)¢™dK®=¦ë*EYÄ:96¨ýÓ…¡ ªn_L2QˤÓ÷ëçÌwYq+5 ^{ìÏÿâ/¶ÚÌæ Ÿ3[À Ãb±ç””~BŠ#AÀÐÐPC¦ÜN–ÿõâÿåüCçÃ…s8P±A€ñÎÁœx5zzz”P×È[®Ýl8ô<\ÊäÁW IÔ-å YnAZoÖ+š¥Í|ÌçOM¿£AœÐ¿kà43¦Þ$Þó<î»ï¾N7mFø©Ú’7>ù$¹i„8Ï‹ %2ƒm‚Uu×ýÿÈöÿ²²[Æ>jsÝe×Q©”â…”´ÇÅL<,, Ö3J«„8Ã0âj¤má®»P ˦öªª¯ ¦i2<<Œëºxž{áN*D C¥#»ðŸ¾˜ÿªA¼$b¿çž¯ó3?󯶵½]xx¬Á$DyÕPfÇÇq2™ –eÅ1š Äc­\ãå  ñ ¨Tª'-Ù¬2Þátà,4 oÁÇ jQQD8ø;àÍ&˜!ÜêûM8âÃMøV&ȇ!l-ÀzC¸5*­&i=jfËRÞfâ"“¶ PÛëÝÛN’$f2jâVû™•‰p½îz”J‰×[3û×*ÒÓQÇ“÷wíbáúÃ%ýs³y±|ß§\.Ç•¥kçL&£½Û4óÆQ|ÞþÐk9qôÄÌ^("<$ÂZ¡@Aú¶·Õô¦©šõºÊ¢/Îqà•A8`LÎc0šgZ DðߜDžõZoaËš¹Ø—NxÛlP¯åt«‰õœE|Ç~yþîZKØOý”ê«Ó¤XTºç¤j µ·xñ¬Ûkšd™ü#bÜèè({öìatt”n¸!å:Á˧–<P]gÀeš&û¿½Ÿ3¿w&æ·LeÈ%˜áâ FÓÎ}|9ô7~>ȪŠÍç"Šþ0ÍdŽäû©üWMÎ=o²7¾¦uH¡Úpž7¾qa ’ü+½½\òÀ°vmÝAV$‹œ…Ø9”0 ùà€í¿¾l(ØÁú)‹íööN_â$Ú™P|àþ2…7}Oü:‚T†ê‡ÞFi#“ÁÞ¼¼×i²­·ÇqX³f ?þxç.`–”)³•R\Ð7 ¹ãÇÉår“ VLÂu•¡‹ç˜e©qÉ0FâµIhO(σz­ôù~I…:Ñïpô›A‰|Š:`ïGÿÀ_ï‰^©½ßJ NwZT%yk$¬ rýSM¦256+• bÖ,âlôRÓxxdÈàÎ ²x{ …ºl´§™/BBNÇäâÏ_<µ÷±„Y¦mÓ²êì›’zý™2Qcuå4ˆê{û‡ÔÿõºÂtújº B^xr5ïß_e£jM+ Tªc ª³ð}~þàA^9tˆ“?ýÓ RˆXUµ~¸.÷ù¨âc]úû«;ãL&ɃãCìbZj@]!©ˆmP]ÑTHW6•ÏA™$[µ X&©jšžwгf3GÛeÁØXN¬|üi“ÓK•Ó P‹~A´ÏÓW£™÷às>žç6¶IÏS«Ï­Z“Ä ’K¼HÒ·Ö nV´½AÿÚê)š…ÉÀ .Û½>øA@-JèCóç^¬W-ozæ£Îˆ¦»X6÷C´Õ¯æ’µkáÛßžrU6Nn?×dÂuÐBœf®œuÖYU+ü¾¯¢—|0²V¡B¹å²÷e™Êb`¹œ¤á©/¦Ç(«8L9JÜžÞ^ë .Ž0Ó¥€ÐÌŽFIþ4pÉïv^8z” ÿîïà{߫ڞλ[¥3ú°oß>®®À’'b±c):†MnA‚X.—©T*d³Y†‡‡ñ<Ó41M“rRˆlùßçè¿È¯¼ím|ç;ßétógŒA2%˜/ÿ™Ÿ©ëS*©¸R™ÜYJ5¼2I)Ž Þe¢¼m$B¬€WHÆ/i]K3 ‡!šz„„ØØSæ‡s]7^ªT*SÛ¹F3| /]ÄêFs< ÿhÕ.Õ¢Tk,vÈ8¹^3¦áš ×,n^Îzþùª±ïÃ5×\È5×L!Nˆ½Iq%Í’ãôN7`*Î|á,õÏ*Á¤Uü2*®_£éެYÿ/iw«àÿ}œ÷xœßÿý®6ó´Ð–ËÍN4“”@S!9Y5óÇæÍ›;Ý„q?ppæË/³âúë«F|?qZ®åÈ‘#*Qä’ÜCK>ô©¯ã³»¢@˜¼CCCqþ8¡UO ‚ ì>ù'Boo/¯yÍk:ÝücaÅEäŠÂ0äÊ+¯¬(zÜ­i‡#ß¡>Z%L¸nƒ…j—x•9 CNž}ÞÝé«Ð4KñÞ÷ýø¯ëº‹B„‰T„8g–yü8|ä#‰ÂÓÈk3Ø[ÂLÅÅN j,R{‹$A³G"5‹É ç)A*½ˆ"|J%%>åóê2ä±8’Ü£éùB­gvªÏ¹¼¦¶ò›ï+ý2—KÖL×­KŽ•Î³žþPãzQþÓȳªa˜´U®’4 ’²/}?dJrºËs…Bõüþ×~í)`ÝÌß„. ¬“Ý»\.cšf\ýW‡¢.0Ò¹#%†Þ¨y^Kž³¼ó_ÞÙé–Ϙµ˜L|÷»Xù”[°¬ ÌT„s¢{#/+E÷) -‚GIWÔ¤0V®„7ª/ýl§ñ˜˜æ{_§ÅZt­çáqÆK/±ìïPƒã:HòÙ õ¥¥mWÓ%œ|)Yewœd²À»Á~7Uš%= i’o\£iÄabðÖýûccñ¼Ä¦kÿ½qöûñó>oýÈ[±®×ÁZœÑk ŠÝ)˜§«×•Ëe±,kÁ‡´yxñܶ† và |óþûOþ¤ƒhvt€ÈóSÝ´š×˜©}ÓÛJ$ëèTž™xw‰hIõaÑÒ"–ÔŒ½P¼Ääë¤ÖSµ™ƒr¾4SE‘g³IʼS§ªŸk¦†Cm{rM’’G5¾¥fõ÷×” ž{öLò.ÃÇŸä•Fjé`«ÝÛI&¬Ö{³‚@íÕ®þâÖ“;T×U¬Ú¾K ê‚7P©x,ˆ‚-U´ÒýÂÀKÀ~Ü‚§q xG^UHæ)>ðÀfx2€ &Œ…ðz êµ§+ðrôÌ…Ì~xÁTæúhÇM8-„ÿðúóXH+^+1˜˜˜¨ö’°‘æ”ä~ƒDx+‘¤ÐhÚÈ|Ìe‰¤¢R¶xÓ{ÅMâ–]+Äœ~t3¼÷½ Ÿ/Ô0wi¨fiòÒÉ“lذPc±ôxÏóTþ€™ ˜35Flå»vb£ÑÔr‹KN%U2‹SˆHß~úÛ_6È9XßéÖwCCPüé·wºM!ýÓÀÀCCC±'úE]Ôé¦Í 3®˜êû>¶m³3“á›—]o~sãfQa¤RåL&à9”k]­FÙ`,æ ˜O4?ßWÚB¹œx«IÑ¿×Ѻ¡è¥„}JoÍüTUL ‚€r¹\%¤· ‰åu]õæ§¿‚@¹aŠÛ¢ç%1Ϧ™Ä Ž“Äƒz^ŽêØéÁIZy–8fÙ&çªE>LrÙǶ1NÝÌÄ«õ˜ðáû0’ƒÛ 8+€/¿`BÆTžhIÅãø‰ŠŽ—%Þ%WÙO ^'ÛÝê¯T]¶©÷dnFÛ ÂäÔuýáŽÜÖúw¾­œÀäìÑõÀqfÖ‘ˆàVDyK—¤?ïe¦ÍüØ¿¾ú“nÅó¼©ûbÉ!«YÒÌ›·{÷nFGGãǽ½½lß¾}Ê×Õ«0ÎßP÷9 ±,KuÆ:Ù¼¦ÍÌÆ†'–=Ækž¨IuèÃø3ø­™)jµ«ÿ­¢‘—]zœªYÌÆŽŸŒþ^ùøãðîwãºÊ.ª¼e}p”]gí¢d–!m?u)}ëªN7afí&Ü¥R‰þþ~~ï÷~¯£í™ ÒÕåóyu]ÓurIu‡*§[ˆæ jü!m’Š@æÉ¹œú•hòÙ±¬ÖçZ›/J¥$G›âfÇ\ì¸Ö#®X,bšfýEêéHkúõ⎉ æûê ¯ý܈º,Õ{Ò†]¯¢|(š%-ÄÕºjZu\7]À0Á2•X“C}V¥‚qÆN*K±è<~¤êÜŽÑJQ¯!…VD4“‰t`V >éÏtŽD˜O_®UóT_#Íšíõè’p†ÙØqHÈK/½ÄúcǪÙf)¢ÞãÕžoº°f–̶?~öÔ)Ö½ðXž£ººBaWÌ)>ºFÝ´šÅǼ qžçÑ××7ã×5‡aÈþoïÇ*ZªÖ“=M›™ [þ8½O>9Yè `ûUÍ ¶ÓL•ºh.äóÕyz\WýJ´G¥¢Îë8Id†a¨Ð¤ôÄ2Ð{1ƬõDmR½Î¯Y°»hd8Û¾xÃþý¬þéŸÈó=ÕÜ.øâd¼ Ç?|œUÿkUÛr³Ly}x]u¿ë100}˜`·‘¡æŽ;îàñÇŸÃçÆlì8$Œ¼;“´²b=åÐC1\Ô$o5aÕ\<¼5r”óUŸhšªOÌå&NLsñ‰V]¢,HfÛûøxx¢øæ0 ñ}¿y{ñFe¤’ð³Ö8Ó öš­UïË¿•‚,ɸ?‹·ò¨¡…ZM’ôFû¨Ï¯KRáX^CtÜÚ¦Ú©¿éçÒ·j‰fcÇ>>/¾t–µL µ3¥bµ=Õ´ˆÙöÇ/½ôNL€aLŠ~jÈ^íž®>¹$˜7!nÏž=ÜvÛÌœ¦O?^w»Tt³ÞeÁLþrÔhÚÀllxõ‹W²fbè%zöÒgáR8‡s:}Y1a¨¾=Oy: IDG:éµë&¯)ÔØ<]…N„º´(Wï\P?m‹„dzìûp‚å(Ëç¸67ÊÙÆÉäX„“„¯Fìs¯d,iL̺UXT†úÕáz{{9räHÇîálìøÏñùwL~%\¤s·L+ÄÉÄÞƒ ¦þG¡8ª4†Â6õ^.’ZMáûªï×^Ó³g¶ý±‹KŽ*W²ã8“àjÃ1㻪ªT’/ZYmkD[\ñ™üÝPD‰ar(a™Nôÿ`ô¼CR}X7—¤ Ñ69¾‰à„ô÷—ÝàMÓÌÖŽ_xeëŽ>4³ð WXT{j4sd¶v Äö+ÑOëÖ­c¬™d©š%ͼqãããôööÆ.ŸÍ¸zzxœ¶¡~XªçyX–…mÛÚN3/ÌÆ†xq5—ïÛ7IúʾB”ÞÐKyâ“®z'ÔVÓK•äIF?QÞÄÙÆIúrÿÄýÁQþÕÙÊ¿—á*^àN„«Xn<ÇÅ…/s~æ{Œ:×ø¯Ïí¡‡ýŒW3æ_ À2c‚åÆs¬ú¸·xÛ*Jõ;â÷²ÞÅÄäÖ웸Ø|™ÆaÖ‡}ì÷{˜—q¹5ÁG**ÉAÿR~ÎãjÖccãÙ`"Àü,KÇ¢ûQ,ªÇrŠ‘¤¦‘{``ïUEÎ pC°Ã÷a“äP’pµ €“'ÿ:PàlÖvaD«}rÍUä`ßÝû¸rõ•œß{~ô'"mšF+—õıˆ¶ðß½kXgÞÏ:î¯ûZSM²ëx_ÖÃÂ"$$ hÊ#0-ÞÕk¿ß0<"$äù7=ÏÏ?óóópÇ'3[;~x7&%àÛŸÿ<ï~æ™Ôý®ÃÊÛ"*ÎPþ68÷BÆRN ?†™}Ï„0Ôa©sa®ý±ô ù|Ã0¨¤¿¼³Ù¤ê‡1Õ“L&qÙ¨W¬ Ý„(q;Kâ–A}_¸$yÕ’œiÃ$Ö‰7IzU;XŸÍùd¶v°òè‘=z´9o8©Šê ÞwÝïhZÈ\úãcËþx#Œ§átޏÇt‹Þ!!ûÎÙÇ3ç<ÓÔ1çEˆatt46l1ò;wÖÝÿùçŸç¹'žãÒgÞOz“Vé¦Ê!i,jV$ÿ«$CÎdô*îBb||œ‘‘ž}öÙŽœ¦6 ¨¶¾ò _|ɤ´93ו+¯µŸ‰ô]þ—É>T‡c¦=hLLÎÈlfÄ»”¼Ñx¦½† ¹'Ù`&øf`cš7G7E^}ƒúÓÀÓÕuÁ ÞÊÀ€ú—¢6›Ñë}ÿ*L ¬œˆˆ=Hù2»°)>–t-éÜMé9Im1‚f*\ÖŠ•’„Ý4aýúcœuÖÿó~øåyzWfjÇÇŽã‰'žà™cIÎ Ól|~´æG\øw&á¨]:PN{œ¥ÿOÛ¶< «„0#õ“ÞVÏk,G®¾ˆfCÑ›÷v´È0» øèè(žçUåR™OfjÇìÛ·ã?~œËVÁ=?û³lýÖ·ø÷;îˆ÷™ô.Äù œœ¢²ýá‘ú^¾KÛViÀòXjÏž=<ñÄ;vlÞÏ=›±ñž={ظq#~¯‡¥AÀ´+¦”ß-’7g¾ã‡C’pQù¾¢Aô\ "gq%¢¥s®I¸hú³¶€í¬,ÔññCÏ=βg‚u÷ÿ|þóÓŸÈ#©:=ÏÚ±¦ýŒŽŽrèÐ!žòjÏ7³ŒŒ°ìòMñötµòºÈÃXXq$fjELYÒHL«7æ¯ÅÀˆŸá-Í3Ï<Ãá—sÅsW4Õþyâúúú¸óÎ;ã¸ë­[·²mÛ¶)O?õgœöJ]WyÉíâ]îs¼Pê&"›äº’Éu©”TIÏç“ÿ‡†¤Â‰zMZçÛï÷pÂ\5ež£  ±—ˆ¦%LLL°wï^ÆÇÇ;rþ™Ú0¨ÁÑi¯¼Âñ ª„8Ïó8÷Îåªß½ª#+¶i×ÒR:´xåÈÿÒùXX“:ŸI˜DBÌvõù±ì3CD(« k”ùYf“ߺÒ){FGCvïþgNž|ª='›†™Úñ±cÇ8|ø0ϼô0ÆcgÄ®€µsº0 9råÞzè­j°,úí< q’WÃÅmZþŠÍ‹Ý¦…³´=§í¼]Hj¦…Ê¡C‡Ø·o_ÇÎ?S;!îð©aÖð~C2§NQú±k|¼÷Bñ`¾Vô÷ûdæÛ™ªÕìÝ»—ÇwDˆ›ÍØxïÞ½½A–jš¦ÔŠG\¨/´ùLÔ*ÕCè×"©ü)…Š$žL‘—iÉX[‹nM³PÇÇãÇsÖ¿ü «W¯ž¾s Q6¦« .Z:ÄÞ½{;&ÄÍv|<22ÂÉ׬«zNÒ”ê­ZËøoŠ>N¢!µ%µÑµ‹áiÄñ£ÞÜ3-Ž¥i4ž·£Ÿv°ûžÝüà?hzÿyâzzzèéé‰÷ööN¹ÿyçDz«.åå¿rÒsAຮâëP¹µ~z™,Êöôó2xL‡˜Irza` ¯àc‰m¶xƒ¸®ú_<íÊå$%‡$¯ñ¥XTåµ²`) î¥Ú{úüâI#mõ¼¤r¼ˆˆÍ,rJ8\úúkóyS䄬­Ÿ®Ÿ¹K“‹¼‘<8qeú5r\¹æÚk¯½¾]»z±¬¬\Ù™z¦6 pá…rúéOq|Æd²ÀU{¯âœ‡Îiû@ÓËW<¼IâƒtR2u=z4­¥··—;w222Ò‘óÏÔŽÏ=÷\,ËâžžQ~çÁ5`‹)8ÈÃ_8ƒóÏ?¿#×”ÆÂÂ~äKöÿÏÞ»‡Éq–wÚ·mÙÖÁ»de|Ø’ š‚qM4€Y ÙØn³‰‹ tåSYBwÖ@ÈÁIw A‰“®ýìàMÂ4;1ò]pJ,1eH°=…4–FelY>Èúþ¨~ª«{ºgzzú8óÜsÍ5Óuîê§ßzßßûz¥õ™ TÆÆÆ"Q Û,ÕŽ·nÝÊÎ;ù×Oq5Ï?tˆW¾ë]U("²”+0?‡Áü]/:¢t‡½{÷²oß>6oÞÜõs·Ò7–A¡´i®ëò¦ááêNX7b…%DPú6iBq-G%‡W&¶­AuV5ÉK{ë±>µôUk'ã¥O-ýçb±±Ö$û5êcÃü~¶ÔÄð}pœaLs/—]6ý ûÇßxÞ.xüqÎyÝë¹áÀvªóú)+ޱ±1ÆÆÆ®¼k×.Ö=þ‡¤…äð]Ôé¡GÄEµøÿõ¢¦å®MÒ”£G³k×.†‡‡›îwEˆ»ýöÛ9pà@”qÿþýM5Ô—<úhÕkσtÚ K» xC¸®^Ú0%‹?kscMLÀ—øfü.¿„iV„; 窽ָ¸U;1é8¡ÈTëa9~\ˆó„&Ë5HËRMŽ+ß+ÂV\0„…; q!N3¹ò%ô®Þ}±®Òi×çk¼n¥`—ç…×mÛ/%Ó —ÿð‡—Ó Zµá3gÎ1o«4l67Ã?ÓñÌ4ØØLhVÛUO«v,óìÒTÀÐ!"§J‡ž…¤š˜ä´„ÚŠ§U;~†pþãÜcÇøÍ¸î6ÞQÈßF9”ý¿ŠsŠÒfZµc7â<Ïã×Ï=·³Ê¾T %Û¤h“ÌPÉáó«X.±¯S;1,‚T¼âpíä¶ã4x>öíÅiÐu+áÔÒ?®GmÔ²*çJ&ço/ýyé¿× qÒŸ¯‡ô©%çb½>¶KŽ#ýíx…úo~³…϶ ´jÇOðÒ/vï^xÃ"aŸB…8¥ƒ´jÇÂÙ_ŒëŠnÔŸè“ÂO,VU&–;å¹ÕÕâùÐâãÂYì$A¸Mí±z/Bí$k£*õ mÓÌ>–5_Ü”sïÙÓ›ÜZ­Ø0ÀÉ 8ï—Vyl:–æˆë %ͪԡU;~î¹çøßùÃuòÃzë)î8~iÒá}ö*¦U;>‡áy¸\P¿k@þ³º8‹JUGÕv•Ъ‹—Á·?õ)¶â\uÑEð¿Ñž‹*ö«‹„ÅRù¯Gø=˜"ôNJ…¯ƒ²÷¨a@`€çV<º\·ýÈ$J"—«/t5ŠÐˆOðÖö#C/”J?K&ãÔös›ÍùZÛ-5ѽª=v3NŠÍö—ãŽ6q!nrrñst‚VíøÜç®ä›ÃÃb‹.*Â)g9íñYe§‹\NG<‚ ‹JÆ '5š(ro`tLˆ‹GLA(°Å ƒ‰À&QSýY²RèZhêwÞÉôô4sssŒŒŒT¹Öãç›×}w]Õ2Û†ññq^óÈg9ççtå­ÃK$ÂPÛf:Ê|Z±aNŸæ_ž¼?Šæû+/î_ ZµãÓ§OsÎOÌ«þ{ÿý÷óðþ‡Ùôk›ÂФ•jvÊ‚¤Ršs¬UZµãXà8ªßµu`b7á O?¥Ã´jÇRùé·ÝÈ/]|1ìßßžÆÄ',ž âG©R(‹jÙ²¨f_$òгb±:õˆiV4–F9”ë ]ÍôSë q ­W:G«v|êôiFk"ŸªˆWH°¡Xm(±xtJ豤ä‘å²N&j©E®ïZ›âª=F¥x²¼ÒøwÇóÂm¬ðQI±8ß;Tï³¼'9öøxµ³ŠDjÕ£6z«—cÔVíøßñYóÌ<»q#W™r=Œz žGØ¿ðY´`Ÿä<ΓÂ>ÅÓÌÆŽ„4©p×+d²FÂDå¯x´‰Ð&ÇËQ¶Ñ!NˆÍpö3Ïpñ±•ÆLJÍLE)ó0ÍùÉí•¥³>¼ã0g?ý4þÑõ•Na SzÎRìX8÷ÜsçUÿ5ƒs¯8—¼Xye;J·‹®DZ±ã»¿û]¾xõÕüÏØ2ÿcp Ä©¦/5ì¯þjiŸw»Yj{|°>0˜¹üçø¿ùð¥j]`ÂR¦ Y ‹qÆI‘"A" õñ£Šª"¤-T!T<êºâª4GW…¸%_ܳ£F´T “ÒÚ:Í¥ g•mXpg~­(}ÎsgŽóäÚµäóÕ³Ís›ÙüÒÍ*µ€ãT¼F”Îóõ ¿Îã¼ÿÜ€‰ØÈ  ]€«Þ¬CE8e 0üoýzÊ`äêµQP_õáø0÷MxäðÌ ¸`\u=ÜmÀ¹_ L¨x.nÝÊɯT\a …BØQPï e@xö™g¢J»×ž¾¶×—¤(Kæ‚c^ûÆy3 Þòa27”èÅâ„”8èŒrwY ˜±›Uç6®E<=TBÎä·‘Nû,ç?æ‹À’ 4(«¾âÎ?u —^ú$‰m@ Ö®]Ëùo8¿×—¥(KâôÜÆ°dyš0,dP’æ(J™CÏ?ÄYÏnäOþd]•W(VFì^H¥4it·yî±Ç8}âï<çß L¬U"¬d6äcÊíøøx48K&“Ñà¬X,V Úñ¨ÿワ©©©h].—‹R©T*úß0ŒH ƒùb—ü_+Š)í' àÛ—^ÊÏ•?gœðw|<ÌE[(@ ôÀ°X1"ójÆ÷ý*¦öÿøw}8sþùàƒŒþ ›íQš‹ø}ö}¿JËf³Ñ:ÇqªÖÅ…¸Z«‘0511µ©µ‚X|»T* _µV\[hr¢neÚEhEˆ“׋Õ[744´äëí%O<ñk×®EᛦÞ;—J%#},* è[!îÉõë¹ÿþ!°àk¯üwß}·ºÓ+Å%?zŽÓÿü*îIÞÃÉ;Nª +Ç܆ œýô¶ŠpäÃý~ø³¥¥Ç|gÓ÷0¦ŸâŠW0þVð?DO“Ó‹`&!œòÿèèh´>î§P(D™Zoùß¶íhÀ÷P±l°ù«gîæ²ÏæÍ?õS•J ˜š‚œF–Ð#ÁÀˆÌí@ò Êÿòýò}¿JÄŽ‹&Éd²®ˆºjE°…¼Ÿâ^§étºêÉæÃ‡Á÷Íe§MéOt0/\ ¶ø}–pK!Þ¶¦R©H8«ØXHÀRV._|q¹ªo~¿‹„²Óë+Sú¾â. Ç.¿ür^úÀK»^¤AQ–ÃðC›Yë[Ø6\÷ÞëXózVŽÇÖ¯gäÁq¤C‘‡Kö^v24¼²%²YpÕ;¶«<ûÜs¼âáµø{Á~˜oêü9ãaEñ°OÇq¢ÿã˜eYQ§išLÈ;Ty¤™šÁ|ÕòXpÅ mÀ„´QÎàþR„ýŒëk4òòÊçóÑwÅó¼*+êç8N•'bWm˜^ÜkTòB˜Kö/‹Ñ9â‰.’ŸPˆ‹å©T*òj²m{ž'”/—ËEßùø>–eÍó˜ÄöॣŸøDø>‰'h«'\<´>ŸÏGâ[íDD­ÀÖÈãLQñ£sqÎ=⺱ü~0AØ.khª²}+Ä\}õ9àÀeÏ\Æù³ç«§ =öW¼ò^lé ¯¢Ùiee1wßHE8*ÀÐˡٖ¹ª1MÐ>~wÙþà_rÁñã˜o‚\›sfIa˜ïa!Ë3™LÕ€OÙê5¡,…û.ø.ë¾$!ÿæp™„¯û¥2_\¸ªõ‹çÅŠ iq¯µ¸€Ï;hšfþ\.W•«*î±ÿ~ŽIãËgffªD0ÒlÛ®gšñ~ªÿ”ùlüá¹è¢‹pžGè7ò½ëã!ûñ‚™L¦êsKhÉr¥õcŸª¬œhý¸ŠÒMþÃñãÜ÷ðÏ‘ÿxëÇÈf³‘øày^Ôé.•JU•?¥Sl:>ÅÅç܈_ëa‘£#IîãBG:®;¡˜H$ª„´øÿµBœ iŠpòžW`ÿôÒöq'z-˪²5EéÖkÏÆ4Ë“EÂT=á2ô…§²ÒŸ¬éõÔãü“çóÒÿ:Gö¿@.·™Ì %»Pà /dnãF0áó†^_Ž¢´ÄckÖpõ?\Mð^0¾ºŽ›¹¹×—¤(KæÙgŸå_|1¿ðÁ¥í—Íf#OžT* ñp5Eé/üÎSx×øüÒø&ffb+,Zì¹®‹ïû¤R)<Ï#ŸÏGGÜÎ㢇†÷)Ëå;^ʯ51¼Ëf³Qåfmw•~dÿýWpå6r;a¨µàгJÀJÿÓ—qgÏž½ ¾ùÅoöúR¥e}t¼ã(o…¢ "÷þý/|¥×W¡(­ókÛ^Ì·ž9û?/¾m¼˜B<·zô(½æ¯Þö^¾zÑ¥´¢ƒÅ+‹Æ«wZ–UU0$.¸i®3¥lÛ¶ïŸ^C£èÑxÁŒxá Eé7æ†áö¯^_YPÁ§EÍ”è[!nîÌEðøã_þã^_Š¢´ÌEgÓ=›z}ŠÒ2ÛgÎpòØ¥˜wRU±NQ‰]/¾óòÆëƒ ˆÂNMÓŒÍ3¤ôÏ<ö6>5»…fƒjÅ7ù?.0×4P”Nsù÷^‚¹yámTV5ÿß×¾jù|>œÜ¨$™ié°Ê*¡o…¸{.½û0|ìÞ6eVV”.34t-WmºŠM£›t0§ ,ï|×wxþSÏ‚©áxÊàòÄ};0¯h¼¾X,FžË¶m«ç›Ò—¼eÝrõÕç°P—ÂJ\‡a}"¾Å+‹*J/9ôèó±^T½¬X,’͆¥Ø‰„¶ÁÊ@ðÖwoÇ0B›½éäM•üpŠÒ}+Ä}úªŽW€R”N27·k#Ìž?ÛëKQ”–ù—Ó×rõFÀÖDôÊàrï¡uØ?[½Ì÷ýÈË3î!¤(ýÊó®ÝÃbŽBŽãD¢òÄÄ„ŠoJßqlÛw±ßS½Ì¶m AUŽ¡û†±íÐ{~ý¾õª](K¢kBÜÜÜ·ß~;ûöícÿþý‹no^…©B/ï¢T±TxâGOPô‹½¾tE‰Xªÿ³arÕÓóî;ßÝëKW”ˆ¥Úñ¦W<ŠýÎêe’\QzÅRíø‘G®­›ntt4ú_Å7¥Û,ÕŽŸ÷¼ïcÛaJ©Ìk†zÁ)=¥•qÞ_®GNsLJïPNY2]â²Ù,³³³ŒŒŒÍf«\çëa7}è¦^ßE‰Xª oÛv}'7ð«ûÕ^_º¢D,ÕŽ7ž÷s¼ð‚syÅ^ÑëKW”ˆ¥ÚñÿãÁèÿíÛ·áÀO ¥—,ÕŽ ­"Ÿ¯dŸššêõ[QV1KµcÁ0 õDVú†VìxÇŽŸfdä$¶kƒ¦5T–ÈšnœdzzšÃ‡sÛm·¡â<99¹`"Îܰ{COoÎôô4###=½†¹¹9Nœ8ÁððpO¯cvv–7244ÔÓëè­Øðå?¼, ‡|j?ÕôÃwº×ï©vüês6ò?y6V+eúÚ„ÚO^G/ßÿRí8ÎÌLo²'÷‹ëuô­Øq?8pöKû£×Ñ´Ú{ž‡eY=+Ê ýãjÔŽ—fÇ'Ožäĉ$`-ú }î«Ý~âˆm4CW<âöïß_õáŒ-êòù;~§—¶ ûöíëõ%0==Íääd¯/ƒÉÉI¦§§{}=£þ÷Çÿ[ι¥§×­öSM?|§{I+v|ÙwóÐpomHí§?¯£W´bÇögV4ë!ýbÇzýA+vœÍfñ< gaÕýÒþèuô­ØñøCŠÅÞ¦mÑþq5jÇK³ãïÿûÜwß}\ýÅIžúÌS°Ê³i­vû‰#¶Ñ ]ñˆ;qâÛ¶m‹^/6ûðï|‡¯Ÿÿu¸ù6lèWÜÁƒÙ³gOÏÎðØc177×óFúСCÜ}÷Ý\xá…=9ÿO<Á¡C‡xâ‰'zrþ¥Ú0ÀÌñŽ>u´§6¤öSM¯¿ÓbǧOŸîÉù—jÇà£G>ÊyŸ?7h.Ô¤¨ýô×u;vŒcÇŽqÉ%—°wïÞ®Ÿ©vüå/™‡zˆ øæ7¿ÙõëúÅŽõ:*?úÑ´L´ƒÐ{;>vìǘþñÁƒ¹÷Þ{¹òð•üËëÿ…“{Nöäºû…^ÛO¿pèÐ!Ž?ÎÚµk›Ú¾+BÜRùÒ—¾ÔëKP”eÓl¢OEéWöîÝÛ¡EQÚI¯=/¥hßXY)hÿXt~ÿ÷¿×— ¬ºš:22R5ã0==ÝóXxEY jÃÊJ@íXY ¨++µce% v¬¬ÔŽ•^ÐU!nnn×u{–œSQZAmXY ¨++µce% v¬¬ÔŽ••€Ú±Ò Îùà?øÁNŸDå[o½ß÷ùÌg>Ãoÿöo«Ò¬ jÃÊJ@íXY ¨++µce% v¬¬ÔŽ•^pÖ™3gÎtëd³³³>|˜‘‘5le QVVjÇÊJ@íXY ¨++µce% v¬t“® qŠ¢(Š¢(Š¢(Š¢(вZéJŽ8EQEQEQEQEYít%G\¿177‡ã88p€ÙÙYFFF¢åû·ËW¾ò¶mÛVµO£uí¸–±±±¦ÎՉ똜œÄuݪ{±Ð¹:u/”æidò®Ÿí¸S× vÚjÇóÑöxðÐþñ|ÔŽÛÇj»7ý¤™ô íÔnVGÜÜÜ7ÝtVHq]—L&@6›Œ,›Íâºn´ßBë–þ}û¸ýöÛ«–uó:öíÛÇþýûÙ¹s'“““ìÛ·oÑsuê^(ͱ CÿÛq'®AíxðhÕŽ;ù¹õ«÷â;­4‡Úqýók{õ©O¡B[*•¼ŽáááŽÜ ¥yÙ°¬ëg;î„ý¨&­Øq'?·~µã^|§•æQ;®FÛãÁDí¸míW£vÜ^VÛ½é'ͤ_h·v³ê<â¶mÛÆ-·Ü½>qâû÷ï¯r_cÿþý‹®k•¹¹9>ò‘T]K·¯CŽ7;;g÷îÝ ž«÷BYlúߎ;q jǃI+vܩϭŸí¸Ûßiei¨W£íñ`¢v\¶Çƒ‰ö«Q;n/«íÞô‹fÒ/tB»YuqÃÃà áLA&“a÷îÝœ8q¢*fW¶\×*Ùl–[n¹e^iän^Çìì,³³³¼óïddd„°wï^víÚÕð\¸ÊÒhdÃÐ]û¥Ûq'®Aíx0iÅŽ;õ¹õ³wû;­, µãj´=LÔŽ«Ñöx0Ñþq5jÇíeµÝ›~ÑLú…Nh7«NˆƒPѼãŽ;˜œœä–[nÁ¶íª&æöÛogdd¤*É_/ï…¸,OOO³gÏvíÚÕëËR¡ž wµce¹¨Ï¿jǃ‡Úñüû¡v|˜_ù•_Yð\ݼJ}²ánÚÏBtÓ~ÔŽ“Vì¸ÛŸ[?Øq¿|§•ú¨Ï?—¶Çƒ‡Úñüsi{uæÌ™3½~ýÄìì,‡fdddž+åBëù:Z9W7ï…²túÝŽ;q jÇ+~ùÜúÁŽûå;­,µcmWýò¹õƒk{<¸ôËg§v<¸è½Yü>¬Æ{ÔÊýP!NQEQEQEQEQºÀª,Ö (Š¢(Š¢(Š¢(Š¢(ÝF…8EQEQEQEQEé*Ä)Š¢(Š¢(Š¢(Š¢(JP!NQEQEQEQEQº€ qŠ¢(Š¢(Š¢(Š¢(ŠÒTˆSEQEQEQEQ”. Bœ¢(Š¢(Š¢(Š¢(Š¢tâEQEQEQEQ¥ ¨§(Š¢(Š¢(Š¢(Š¢(]@…8EQEQEQEQEé*Ä)Š¢(Š¢(Š¢(Š¢(JP!NQEQEQEQEQº€ qŠ¢(Š¢(Š¢(Š¢(ŠÒTˆSEQEQEQEQ”. Bœ¢(Š¢(Š¢(Š¢(Š¢tâEQEQEQEQ¥ ¨§(Š¢(Š¢(Š¢(Š¢(] o…¸ñññ^_ªÅq²Ù,Žã,ë8žç‘Íf{ývzŠÚñàÙl¶ÊöÕ†CÔŽ›fìXì%£v<Ôk‹AíXP;î Ýì«+¢;ÖþqcÔŽ›^Ûqß q®ëöú–ÍYgÕëKX2Ùl×u±m;j¬[%<Ïëõ[ê)jǃÃèè(–eE¶¯6¢v<Ø4cÇÙl–|>ßëKí(jǃA½¶ÔŽ…A·ãA´án÷ÕŽƒA³åVíXûÇQ;lzmÇç|ðƒü`¯o‚܈}ìcxž‡išäóyäÒj×Ý{ï½lݺ5Ú·X,òéOš­[·bApï½÷rêÔ)ÇáÔ©S˜¦Ùòv‹]÷½÷Þ˺uëøÈG>‘#G¸æškð<Çqxå+_‰iš‹¾ÿFç\êò…®q±÷éû>Ùl–¯~õ«˜¦‰mÛø¾Ï5×\S÷=ʱ9Bäóy È>ß÷ùÚ×¾Æ[ßúV TO:ÐpŸA§U;®÷yª׿ƅÞ'4¶­FïÂé×¾ö5&&&¸æšk¢ã¾ò•¯\u6,÷jÐìx¡Ï·_ìx)÷b©vÜJ[,×Z,ñ<{ï½—>é´…Fv¼Ô>ÅR?;µãÖí¸Q[üÖ·¾Uíx™v¬}Šúר/}ãÕjÇõÖi¿¢ý¶,^mµvüÊW¾RûÇK@íxðúý¬Uô…G\Œãº.¾ï“L&«Öãy¾ï3::Z¥à'“IljŽá8žç‘L&#Å>™LFÇð¬l6©«‹)åA0::ŠëºÑ{ð²­…ìØ4M|߯z¿µ¤Õ`ÃòY¢/ÖNAïíx©÷b)vÜJ[,ï9›ÍR(zjwíf!;^jŸ¢•ÏNí¸5;n¦-µcоñJëËû^mv Ú¯è†-7²ãÅÞ§ö«?KµãÁëWôµVq¦Èårg‰DôºP(œ‘K+ glÛŽÖe2™èõÄÄÄ˲¢u333g Ã8S*•ΆQµO&“‰^˱›Ýn!J¥ÒàÌñãÇ£×r}Íì_ûþr¹Ü™B¡°äå‹]c3ï3“Éœ±,ëŒišg‰ÄÃ0Î …ߣ[ÖÍÌÌTÝ_Û¶Ï …3–eUíßhŸA¦;ndÃKùÜÔŽ+ïs1{lôåxÀàŒagfffV Ÿ93¸v¼ØçÛv¼Ô{±;n¥->sæL´o³÷hPhdÇ­ô)âŸI|?µãæîÅRÛãzmq|µãÖìXû󯱟úÆgά>;–ÿµ_Ñy[ndÇͼOíW> µãʽ”~E?kkÚ/í-×uI$ÑkÛ¶£ÿ}ßDz¬ªu¢¨zž‡aU eUû,D³Û-„mÛ‹º 7¢öýÉìD6›]Òòv½Ï ˜™™ˆ艉‰ߣeYÑ:Ó4±,+RÖEÉ6M³jÿFûÄ?ûA£;^Ȇå>5ƒÚqõ6ì±Ñ{t]—b±ÈÌÌ ¦iâ8Éd’\.·ªlXîÅ Úñrl¸Þûë„/å^,ÕŽ—Úçóy,Ëx›­G#;nµO!÷·ÔŽ«·[Š7j‹§¦¦µãåÚ±ö)Z{ŸÝè¯F;íW4»¼ﳞ§R©ß§ö«ï…ÚqÈ õ+Û§—vÜ¡©K1Œ¸ñʱm;ú-•J½~;Ë¢QÒÀ¥.oÓ4«Ü-Ëšw¿›}ñc?~`Áij­œ§ßhÕŽWš Ëûë•×;öbÈÃUì?•JE׳šlÔŽkßß ÙñBûÔ³c×uÉçóœuÖYQ²Þ³Î:kE$nÖŽWzŸBÞã ØñBm1¨7b¥Ûñjè«׿O+ÉŽåýõ–[µcíWP;®~ƒÒ¯XlŸ^Úq_q–eQ,£×ñ’ʦiV}pñ’mÛ‘2) h:îõÛYµï/ŸÏS,—¼¼HâN1´F¹Yj‘Xðøÿ¢K£U(Èçóø¾¿è>ƒJ+v¼l¸Þû륷b[’ ~ ±ÝÕdàv<Èv¼Ô¶¸T*qæÌ™èàÌ™3+ÚŽWzŸ¢Þ{$;^¨-–õ v¼Òí¸Ÿl¸[}ãÕhÇ ýŠf—/—VíXûÇÔŽ{oÇÐþq«çY*}ššJ¥(‹ŒŽŽbFU§K”÷xédAŒzûöíQ2É\.×ë·!®¨ ]Sí{—‡±T ivy;0M“D"Áèèè’î§aŒGu½},Ë"•J‘ÍfI¥RMí3h´bÇýnÃ0xvÜŠme2\×­ºžÚýVƒ ƒÚñ ÛñRÛ≉‰Þ} ¦‘rŸV¾7ÓƒÚñ Ûñ Ùp·úƫюeö+:oË­Ú±ö+¨÷ÞŽåz;Ý?î–ŸuF¦]ú€¸z,A)“P‰ãW•2ËÛø¾ïûM©§”Ö¥.oçu7s?Å•~bb"Н^l†¥•}‰Vì¸_mËŽ—k[Í~+݆å=‚Úñr–·Š¶«í£Ö޹O+ߎk¯¥?ƒ^°’ìxl¸Þuwªo¼Ð~Eïmy)v\ïz´¬vݶåVíWP;žÏJíwÚŽûÊ#®žçEqÅââ©ô¾ïãºnTu§Sû¬ÔŽ;O·lkµÚ0¨wmW;‹ÚpwP›ì,jÇý‹ÚþÒP[^9¬fÛW;î<ÝêwËŽBˆSEQEQEQEQ”A§/ª¦*Š¢(Š¢(Š¢(Š¢(ÊJ§¯rÄ ïÿûyþóŸßëËà¾ûîã/xAO¯áĉœ8q‚­[·öô:~øa6nÜÈÆ{z÷Ýwûöíëé54ËÞ½{Õ~Êô“ýôú38vì¿ó;¿ÓëËXÏóøÄ'>¡öS¦_ì§®ãĉ sóÍ7÷úv,ÊwÜÁììlÏí§_ìX¯cþuü—ÿò_ú>/öõ:»í7ö«é;”þñg?ûY&''yñ‹_ÜëKé úÅ~ú£G²nÝ:Þ÷¾÷-ºm_ qßÿþ÷yãߨëËàî»ïfçÎ=½†ƒrðàÁž_Ç]wÝÅðð0;vìèéuÜ}÷Ý==ÿR8~üxÏ?7µŸjúá; pÛm·õúšbnn ç÷Lí§ÿ®ãàÁƒÌÎÎöúV4Åììl_ØO¿Ø±^Çüë¶®ŸÑ¾±^Çb×1(hÿ¸B¿´ƒýbǃÒ?þþ÷¿ßvÜ/ô‹ýô¥R‰{ï½·©mûRˆÛ°accc½¾ ^ö²—õü:†††Ø¸qcϯcvv–‘‘FFFzz^xaOÏ¿Ôkíõç¦öSM?|§!ôÐÖ¯_ÏÖ­[{~ÏÔ~úó:A¼Ø¸q#;vìèù=ë;Öë¨æÀ¬_¿¾§×Ð Ú7ÖëXí/ íWÓ/v<(ýcÃ0زeK_ܳ~ _ì§øþ÷¿ÏÑ£G›Ú¶/…¸~aïÞ½½¾„¾hœvíÚÕëKPZ@í§š~øN+KGí§?¯CYýbÇzÊrè—öG¯CYÚ?®Fíxi\~ùå¼ð…/ìõeô j?–bZ¬AQˆ ü£(Š¢(Š¢(Š¢(Êà¡Bœ¢ ㌓'_µ,MšQF{}iŠ¢(Š¢(Š¢(Š¢,‚ qŠÒçdÉR,ÿˆ7\–,›Ø„‡‡‹ Pµ^QEQ¥ß‚×u£ßørß÷çmëy¾ïS,«öQED<<²dʘ.O>Ó)«â¥OÉ“Ç+ÿ88)R @ž<>> äÉ“ ‰Iš4Eн¾lEQEQ”ºxžG>Ÿ„¸ñññh¹ã8ó¶Íf³ø¾O:BANöQE4||’$£1›Ï@头¯æõRÜU¨’E;áê¢Å¥É“'K–©Hp°±£¿I’L1E‘"F$ÒåÈõú- $àSÝØZ€S^) ÈüÆò23¶íñò¾~y¹ØÀÜæÍ½~«Š¢(ŠÒ,Ë"— û)͈j®ëbY‰D¢îú ÈçóA@*•²,\×Å4MLÓÄu]Ž9“O>‰çyضÝðXŠ¢(ÄÁ!C†"E||<<ìò‹K@€ÑëËì(á8ɤ2>ò Ç\FlÊËÜò¯Ae&û$Êë¼òëTy?ÑŒò¯WþkÖ\ƒÅ|‘-ˆm'Ò¨\güšëñ­[¹ìüó›º*Ä)J1Î8..2äÉsœãU®Ê66&&@´,Mš‰hùjÆ'­“FYºãE* ¬0d´È6Ú¥EÎ&|ä‘. \úÚ×öúÖ(Š¢(JOO· °¬Åû*‰D‚d2I2™¬+Èåóy,˲,lj„8Û¶#!îÁä /¤P(L&1 Û¶{}+EYe¸¸”(áããà`V:ˆ„¹v“%ËX„ã<•ñW^—"Éx'Q^îQ-d©Œ™2åuNùu­Ã‚¼–ãZ±óËÝò¹Òå¿E*™ˆ_‰òÿ£å}åõrŒBùµ\G.öÞ(¯ÞkŸŠX×è5TƆµ¢¡8_ˆ#Æ»Î9‡]xaSŸ‰ qŠÒ'¸¸Q(jŠ&&F$²äÈE"ÛS@(ÂÉ2sÅ劋ÏjX„i¡òp€J#/ë2TÏl5Ç Å6˜€šÇ!ó| ›C—ÆY²Ÿl½çxå+{}[EQ”Žˆ^½$î`šf$‚9Žåk„a”J%|ßÇu]’É$˜¦?ŸÏcÛ6©TªáqDÀK¥R‘P§(ŠÒI²d#7ÁÀÀÆŽÖµƒ<!-O(¤ÇY*âZP^6E(b„c"¡È%BXª¼=TÆLPɦÇ_â©6Uþ_öµ ÇbEBç£üÚ-_„c´bÍë¼l_Î'j&U®%ò$‘”¥RÅ5Â(BOé믿ž{ï½·ê>ŵ{÷nn¿ýöªõUã@Ïô,ð}°,Ì À 0M è[þò¡C8v¬©ÏL…8Eé1"¾)’#Gš466&&™rs’‰šª<ÝÙu9Æ)^k2ó•ð=Wö)6Ú9*BšÙÄù!³;þ2S‹„´Ös Øsð`{o¦¢(Š¢ÔÁ²¬ªJ?÷Fs]— Xxò0›ÍFžp©T ß÷ñ}?âLÓdjj ÏóH&“LMMUí/Ç—¿¾ïcƒÛRep\pzO3äáUñÌF"q‡ñR˜!ŒÂÏ·B}ãÓVy;¯¼¯¬_hœe—…¸ãT<æâ¯åœ’R >IcPÚÞüö·ó…/|ϲBÑ«¼ÏÛÞö6>ô¡Q,ùêW¿Z5©r×]wñ|€ÿõ¿þ¾ï3==ã8†Á÷þéŸøë×óÐ_ý/~Õ«8Ënºã¾|ß}¼jÛ60 ‚\Ž—¾ô¥¼úÕ¯æCúÿzà ¼fhˆuëÖ…× …Êÿ;†™ËUMê\ûæ7óàÏÿ|SŸ qŠÒc¤jN@@ŠiÒ‘\ŠÔ’×J#Þ)$üSqñdƒêFÝ.¿6`Áy!É$ß®]鉼w™ùÑaEQE©P,«¼àr¹®ëV-O$‘ЖJ¥H&“Ñ:ÏóÈd2UÇËçóQN8Û¶q×uñ<«®º*ò¾+‹óÄ:EQ”vãáabF)†\ܪqZ<¢ ;X„a›Âñ…K8&J޳„‚—ä´–Öq‚ê0˪kó¼y©,ªÇ-ömÇqB/eÏ Å+Ãÿ/Ÿ÷Ì™3á†ÅP¬|àóŸgnr’«>ñ zè!.»ì2öíÛÇÝwßÍÑ£G9ûì³Ù°a¿õ[¿ÅÖOšo¼îu|9æ~˜áõëY{Á¸Çsð¦›øóK/¥T*ñßÿûçG?úŸÝµ‹¹ýûyòúë)ÍÌ0|äÇßû^¾}ÇüÝéÓÜñ™Ï°ÅuyÓ›ÞÄ]wÝÚ±ƒ£G2víµ|þóŸççþçùä'?ÉO ³7—ã–Ÿú).xÇ;¸çž{¢ »s'›}”›¸?*Ä)J Ê?Ò0O0±,O·n{É„ l*±þPI ¹ZÒ#Kþ‚Ìr¤(Š¢(+Û¶™™™YÒr òxª<(äµïûU9ç$?œ¬Ïf³¼ýío¯»¿¢(J'(R$A‡€7ªZ,™J47! Ž ?õÍo²{Ã>pÉ%ð“?É¥_ù ¹uë"焸ûF$‚ňO€ŒŽŽ211Ѱpçy˜¦‰aQ1œb±¥ ø¹Ÿû9^ÿú×WMŠAÀçþüÏyËÕWóõw¼ƒŸyúi(WÃ~æØ1îä~ø’—pÙÿù?\ýõ¼èE/Âu]®xä.ûîwÙ¿?·?Î?ýÓ|òÌ6_}5_ùð‡Y÷Ú×ò{ÿú¯Ñ9~㪫ØöÌ3ìøþîû•_áþ]»ø™ßù0„Ô²ø™±1~8;‹mÛüñÿ1¶mWyA¿ò•¯äÀ<=5Eö¬³xðÁùÖßþmôÌX¿~=úЇ8zô(Ùl–ééiþùŸÿ™ xùË_ÎüÁJ¥øÃ?üCîºë.®¹æ6— ñ]qÅ\¨9â¥ÿ‘™/šIô¹T%Mx–Êì Tr³IQƒ$¡{ñÒ}úVÚÕWEQ”ö°Pa@5Z&ÊfŠC(Š¢,—€€"E¦˜ÂÇ'O>*´w×—¾ÄoÎÎ6ñÀ ò¥;¢ýiì:BšäO“"6V¨6MÏó‚€L&ƒëºUB\øåðË×¾öµlݺ•‘‘ à ‘Hð˯=_ÿýßg÷«^ů;Æó€D‚;ÿå_زe wÞy'ç}ùËìxÁ xÿ¹çrú'~‚MßûNŸæª¾óõ¯ç¢‹.âÏ~üÇyÇ;ÞÁ·¿ýmî¼óN>ð°öïä×ÅpìO}ŠÝ»wóÞ¿ø p]~/úi×ýþï‡Âšmó¦špû·¥"¨'4þõ_ÿuÉ€L&æM›0 ƒB¡Ô sÚ¶m[XDèÑ$?÷›?Ç.üQ˜+Î0Ø¿?hÊFTˆS”âã·\\¡X,6œÅh†øLGÝõTÜŸ¥‚ŒC%äRjZÔœ:!'ºË?„¢(Š¢(«Œ…Š8(Š¢´“€€qÆÉ‰Ä·4ifÊÙ×þîþû¹ïÕ¯n¸ÿÛä[çǛ四73ôö·c sèå/Çó<ÜW¼"Õd,ŽãDËmÛ¦X,FU¤ã¢’äØÜ¾}{tN ñ7 Ïó(•JAÀ>þqnô}púâyÅÞÀu33<ÿ7~ƒÿþ7ÃïþüÏóåo}‹½{÷ò†7¼sï^~/ŸçÖÏ~˲çŸ …ª‰‘t:iš‹EÇ!™LR(¢ëJ|ìcUÞ͵,Öž×›˜©]ošf8­ºdï}ï{+ <ÂXà)À ¯)Ø|.,5çW‹ qŠÒÒ¤I‘ªò„k¶jŽ4´ÒØÖÎîž:uŠòã˜?aFê<„ ,åG°!‹»C(²ýú#0·ysr*¢ÚÕ¹Z » ‚ ú­½~™‰ÅÒx~©¦fÛ6¾ïG¡z³ßÅb1z8¥R©èɃ*¾ü ÷Ü÷}ŠÅ"©T ˲ê^g<‘´¢(Š¢(Š¢(+;ÊõmcS ¥!ú·ç=Ó×^Ûpÿ¯­[ÇoÞuºäÎûÖ·øÈÍ7Gãñr“ñ „mÄ»MÂH§¦¦³k˘E0M“ññqvîÜÉøø8¾ïcÛ6Ÿ~Ñ‹Øò†7@ÜÑ#˜Û¼™¡Ó§ ÞùN®}Íkð<‚€7ìÝË·¿ýmþ¿»ï®ºþx¾ÎzÅ‚d]"‘ˆ<Ódl*…yÚJ@ýªR6> ­—Ì\Äi*‰÷ ¬ èP 3à²?»Œïšßmê²TˆS”àãàã“ ghÏóÈfËb•ÆÇÁ÷} —8õéS|á~T*…iš¤Óéªí‰AD¢œHð¶ƒùÚÖ­<ñÄ<ù'BÂ÷ÉÙvÔp{žzå–g7,ËÂ0ŒHÈóÚ·išËä”oð Í%ô4qó*É-ΊvqýÐ$¬”QF Å8«¼…²P;žþÜs<õÐCœ>u„7ë{ð–·,‚)ma\d“|n£££‘ÇY•WšïC>Ï.Ïc—a€eá¾ç=ÑñLljâQCñs6Š–š‹Ó²¬ÈI£é}E½´™ïéV‹$2ÏSɧ$^n’kiB.¦¼®H%$L9™òÿnl½äh*çpšþÈ4Ì6÷Î^Ò]jÀÜÜ7Ýt###¸®[î–Íf™edd$rT”~£›vìá‘n Âù¾O6›f8„ÑÑÑ($²P(011A:Ž„®D"A*•"Nóþ÷¿ŸR©ÄÐk^Ãû¿ô%®ÿÔ§(ærüì¾}ü§‹.âýçGHYsW_Í‘#GxɳÏø>©T*¤Dðó<‰‰‰*Aiff&r6 ƒ‰‰ J¥SSS‘@%30ǧT*qüøqr¹\UÜ.—cjjŠL&SJ+®×–e‘ÉdÂÏ¢ì™öo|ƒ\.yä•J¥†3(ñãÔkàåuí@¨­´SD"Ñp`A•H855U•“¡Ý,dÇÚ+ƒ€ö)”•€Úq}Äó!>'Ë%Ì*¾m6›­òúð}?ÚGéÚ¯PVݴ〠*å üØÉ“†Á_|þó\vì[×­ãïOœàs§O³õæ›9=:Êë6oæÖ_üŦΓËåV–±•Œßd²ãþàxà¹çð'& T‚šýE|k”G\Öµ‰”ªK–ù‰ÂM*"Zœ<•J‚‚EèÑæQñxK•-*"åcŠ—› Å‹5ÇÊ”—Y5ûeà©á§š~Ïmñˆ›žžfhhˆ½{÷°sçN^óš×Dë>Ìm·ÝTgÍRúnÙ±fx’ ÊG<Ñ€höW<ÓÒétno¬J¥R$(eÉrä‚ øËâÅŸù O¯[‡ÏüünËâ÷ÜÓðZ …BÕ,u­[óÄÄ‹ÖÑììÇÜÜkÖ¬éHˆg·rº uìØìXÛbePÐ>…²P;nŒeYÑà±QM<ÓzîK ÉÝ_&jâ½oš&™L¦*òÀqœ%…S­&´_¡¬ziÇ0½~=û~û·™Û¹“c\ÀÖ{ïåïŸ<ÉáÏ~–÷¼èE|㜀·\pæÍG¶WAÀSï~7O®]ËESS05E6›Å¶m.ºî:4MŒ ›ÍFžoq²ÙlC¯S,8ô™ïõæS -M afyÙ8Õ!«òè(QÕŒ:ÇËÇŽ3S>n–PÀ«}ü,>^”¶xÄmÛ¶[n¹%z}¢¬îìß¿Ÿ‘‘‘èõØØû÷ïoÇi¥­tËŽ¹+C%Ïšä4“j2ŽãTå^ƒÐSkËí·“+{¦y„mÈç€zÎ~uÝ:rT畬åĉÜsÏ=œþùu×·:ó/Ý.Ö­[×Öãuš^Ì 7²cm‹•AAûÊJ _ìX¼ìÉk á3*î…Öêvò¬ó}¿ávqÄÓ-N75'áUÉd’|>?o€,“—¹\.òœ“´¹\ÇqøÄ'>Eˆ§¿„»B%Å„2íW(+nÚ±8]°áÿ‘Ã?ýÓüÕæÍüÊÚµŒoÚÄ}7òG=ÄÿáÉd2\ \°œ‹Íròúëùƒò¹Ó§¡ì4Q(¢è) /•è§Zz‘Ï:‘H4>ïo6!M¨p–+™’'Ù|ÂAp|Û¸G[‚pP\ûx2ËËãCÞ\yûzÕ —}[—¶qÃÃÑÏÎÎ’Édؽ{7ù¶mÛª¶]ŒcÇŽ±oß>fg› °UV<³³³ìÛ·C‡uìí¶ãC‡±oß¾ªe>qaªZ¤Ù¾}{T@B-¥”´mÛÑL­ˆbiàE7ßL‘Êd@XKØ5“»`c9?Á–-[Úz?=Ï‹ÂVÛE§„¸v †P–JìøØ±cyÐØŽ[±áGyÏóæÙ±²ºÙ¿?wÝu?üpGŽßî¶øá‡æ®»îbrr²×·Né3öíÛ‡çy<òÈ#m?v§úÆK ÖæT‹ç<­M×Ðêvñü±¶‹#“‰D•¶Ð{(•JQJŠd2YõÜ–3Ã0˜ššš·,‘HðàƒF^¦%ý¬b±8°…”zÙ?nÅŽ¡~ÿXYÝìß¿ ûÇú>~Uú¡/>ÌË&O¨ë˜ÀK¯¼’Ç®ºŠG¿÷=ò5鈚¡j²ß÷!›…|L“3ï}//üßÿ›ÿò/qkÚØ ¢<ßP?h©9ÞÅm2|~I!<9IÛ„bœ¬—b  eáÐÚ ’n´¼¬Ö/¢Xçzˆí'åó4é999¹¤þq[„8Ý8÷íÛÇM7ÝÄîÝ»#·ÏVذa;wîŒEÙ¸q#;wîìhH´×އ††Ø¹sgÕ²4i’8¼ŠÄ<ã¤Ú¦ëºär¹¨Ó+!ÒM¥Rø„íŠAèaë¶o¥òï -\ëã?ÞÖûXÛIïg¤CÞ‰ãBue[±ã 6tô=µËŽ×¯_Ï¥—^:ÏŽ•ÕͶmÛØ±cGGŸÑíl‹7nÜÈŽ;ªf¼ÂФK/½”õë×wäøèÇŒÍÐBœ\“mÛM=ƒ³ÙlT0J¼9âBœT"¼øq}ßgíÚµóŽ+ ÂYˆ[)ýceu³mÛ¶ì/Ô¯0b±HGN"e¨ÖzÎûÖ·ø¯1oé¥P;qêÔ)¾ô¾÷‘ö<6ì޵˵ΦßU¤øA‡ù¢[=¿ˆ€ÐÓD¾fyßZÑl‚JÎ7ã,*ÕQã$sƵ™‘‘‘%õÛ&ÄíÙ³‡¹¹9>ùÉOV=xGFF˜žžŽ^K|öBlذ±±±Ž?T”Áahhˆ±±1.¼ðÂŽž§v|á…ΫºããðtY·±«*ŽA%•­Tº1 ƒ\.G–JIÛCO›á‹=Ôv¸A¡ÓyáâJ±ãNw4êÙq+6¼~ýz¶nݪUД*†‡‡;.ĵ³-V!NiÄØØ[·ní˜׉¾q³^GýL±Xd||œñññ¨byíòxˆ«T8Ïf³d³Y<Ï«ü2™ étº*ɺ¯’íwìØQ÷ZDpÔ°Ô^ö[±c¨ß?VV7ÃÃÃÙ?nÔ¯ðk¥ÙóÏǶíyÑJ¿òÿÀ¼öµ î[—tšL¼Í2MÞúðÃ\pÙe …¨ýL§Óu Õ-Vµí¤˜ï•ŠjµÁbVìE¼xó,…êQëû!!¥]J[¹T!®-Å&''ªŠ»Ž_Ðôô4sss Ắ&ñTú’nØqظúÄ[ )ÂP{¼Ú䙕¶(.ì/×ßì¹+¯ää–-ug‹û‘NÍáÄí¤[ „Fv¬m±2(hŸBY ¨×Ƕmfff–´ €2±U›Dܲ,&&&ð}?pÊ2ÏóæmÝí„䃈ö+”•@7í8î ÷Åo~“M眃qÙeó¶«×þ,(ăçñÌÿ8_J$ v­ñzâW[d¯§x„ƒ8#öº@(Æ¥ ´’ß- ̳T+ÂA%5Ë|¯8êlÛÇM|[„¸ÙÙYöïß?Omõ<ááan¾ùfnºé&ÆÆÆp]—;ï¼³×ï[QæÑi;vq¹“#øø§¶†ÉÜÊÇO¥R¤R©ªxÿ¸'yà¤úr;ytÓ&^÷º×uk²`Ù|©Ía´qV‚·k[¬ Ú§PVjÇíg¡Tµa±²L…¡å£ý e%ÐM;Ž qóÝïrí _¸ü7σaÀñã|©\@ǶmŠÅbUŠyÝqŠåßxõP©<¬„¢Y¢¼}*¶­ ‘Råu¡›„¡ú„àÚf¼ÀòBÁú„¶q{÷î]0Æz÷îÝŒsøðaöîÝ«!§J_Ò ;¾‹#ø[ûc|æ3Ÿá¤’D"eYuN—°ý’°øvc×^{m'niG¸ï¾ûz} }ÍBv¬m±2hŸBY ¨++íW(+nÙ±TLiÿ³Ïòêç=¯©}-,·.;v Ú˜x}5 qŠ¢(ŠÒ+‚ ˜Wíϲ,‚ è¢ Š¢(Â|ž°½{î'ryã¸RE±’œrÉd’‰‰ òù<‰D¢³!øRT¡VˆËQ Û¢¼. àâ—ãz¹Éo£›a5X7ÁŠ¥mUSEYœg1øñ/¼>ú…ò’ÆM³„Ä¢'„Nˆ[÷ÜsOÇ®WQEQ”Îáyù|×u£_Ã0ð}¿ýáTŠ¢(]$MŸG 5©}ë[¬_¿¾¥tfÛ·o¯z],VJ…f2¡»Y¡PèüFĵæÕ‘° ½Õâårœò>².N²Î²F¬à!šzÄ)J—Ù2;K!Ë׸úÒã³3ÿ{Þ6R,¦´ä£¯lÎ=÷ÜŽ»v†¾t*4WQEQ˲¬VšÏç ‚ Êkš&ŽãDÕÍ ÃˆÒ_˜¦ JEQz‰ƒƒÁNrXÀ?¬]ËØ‰Íࣅ˾ ›LMMU­r]—D"yÃu -µ©äI’PÕ¡ÇHŠPX‹7ë’/®ÞüJmޏUŠ qŠÒ¤(ÃÆëz%©ÎsÙi$¤¿ßiwn¸8퇑‚¢(Š¢ôŒ€Æžñ0!™7’΀Gýå} –d$—¤ÚÞ"ç!œøÊf³Ñë\.‡ëºQ@=>FGG) ¸®‹ïû ’É$ëׯçäÉ“LLLL&ñ<¯;UEQÀÂÂÇç©là;÷'œz×»øÅü ¹}¾üeøíWqè¡ÇùnyâA(ÂDi™LÇqæå†[2ÒVÇ›NiÓkCK¡2(•ýâÃ& KnÈ”·u™Þ5ƒÎ. Bœ¢t 9˜ßû"\y%\pÁ¼í²T&ºwmƒÛNÚFkYÖ¼JFŠ¢(ŠÒU*"[-& qþû‰'3y"Ĺ‹œ‡ðyÛ(œÊu]JåœHRÈJ<ßD¼{ðÁyßûÞK'½Eé &˜àÃÏœÇ:óC>üô%—4·³ãÀÞW^Ì3Ï]F1Ÿ¯Z-mfÛ²„ms¼B±üwÆsÛo&% ‹*Ô^†KµWœ„©æël«*Ä)JWÂâ‘Y¬÷¼€n¸aÞ6µí_§±ÛÎv·‘’x¹Ý<ºiSÛÙ)ï5(Š¢(=Ť:\¨µÛ43hSíÙÐä¹ Ãh(Ä™¦Pò}?Z–J¥0M×uùÄ'>ÑõÛ¨(в~yæÂÆ&³þ8/>çߘ Éè¦ €bÞ÷ÇÀÝQ›'ãˆøxJÖµá‚çW) bå”âñ,È>µñüpñs4šÐQTˆS”n"Ò ¾êU¯ªZïP’añj\ªÛÅc_ÜöcNLLt$G á2 < xx˜˜¸¸ØØxeß÷€ ‹ òäI•Gˆ>>A¹ç`¬ˆoŽ¢(ÊʦX,V=sãùâR©ét˲ð}Ã0H$Ñ2Ïó¸æškzýEQª>)ÀñãÇÙ2;;OçªK±Ù,¤RPŽ”’ɈŽV‘–ÜoÕo"ô« ;“#ôŠ«¥Ñ¯©›°:Q!NQºˆIÅKjGMËä³² 4˜¦ÙêÑG墋.jûqU0ëY²¤HáàD—"E$00"¡MĹ€§üãá‘'O@€‰ y6v$Ò)Š¢EŠ88äÈEâ½O‘"Þ+»³×—¸j°m›™™™yËåìº.¹\.ª¤*Ëe¬¶ÈÃBEEQzEÓÎŽ¥˜&Ûï8¶mGEk–>Ç¡ZP‹{¾¹TIJ€ÐÓMr„6z ­‹Ó¬Gö*E…8¥«,”3*c•*É’xÐqñxËÜ^9Ôâg©®xS¤wEd:q …BVÒNN:Õù¢, ?Ñdìâb``aQ¤H‰R4Αc”Q$˜b —¹*7Ûd™üÏ:T«§(ý‡|×­X,K‘"f§žƒCŠTä› µ+‚‡}çey–,Aù'E ·<¨ñð"Ú &gŸ 6Ïm†õ½¾CŠ R¡z »P8«¢(ƒG|LhjE‡wìèõeµ…G7m‚lncÛ.‹pd‚AŠÑ,kÂA*œÆ»Ç•gžÊà;ÞÄf E´z^"ê»ÐTˆ[¥¸„ß9ŸŠ#’IP^Ï·+™¬—må;›Ž:ÿÇ_ë¡Z[( XÞFÄxí `s¯oÜ28“ûßÿ›L&Sw}èb1êˆLù·Ýt¢Ã¼ev–£ÀW\ÑéÛ¢,ƒqÆÉ‰¼ÝR¤HÀ,ÿFôz†Š‡„Uç o5xêÇ…·qÆ{ýÖeÕàáÕý^æÉGÛ8㑈„¹sÊOœ,Yll H“Ž„ö"E||<¼HˆóñqÅ9ŽxÅ&IFb¼´3I’dÈ0ÁDäqëãc`pœãìûö>P‡¸¾Á4MõrS”/‚Ïp,ã1©ËÕ"šQÞ§X^ïÅŽ'ÎUñtcñ1¥E¥æKzä‘^߆– bwmí“O6¿c¦ñ(lb¢M#Ãz]h£ü[ 5¡2 ´¨$,O×ÙW›è¶°¨777ÇÐÐP¯¯sÕ# ž4ŽAu‚[ÃJ„ß³ø¾qO¨¸/bX#ï,iˆÇËÛXÜCµV˜kD‘J±®¸?Üóö·wèwž 1þÌgÈ”æŸJ±„vyÃ4_$`%x*ýƒ ¸Ó¤É‰À¶Ö*W”®#Þaæ"O7òbM—{ÝL`bâàD!â&&6väQ–!…•{x‘g«‹y¡Q¨¹ˆóâëã“%‰z"šA%¤=EŠqÆ)P E ¿ü#mJ–, óDÁL)&sžgž¢(Ê #b—AE8“å2FLޱ¬ò_¨a"š‰#†‹Øö©šó¨ŒIÝØëBëjv|³g€…8‰Üxú?hé÷ß?\zµM„ƒÐÝР"´Å‡‰òÁ7ú°zªµJˆ„¸ýû÷399ÉîÝ»azzš={ö077Çðð0ù|ž‘‘‘^_ïŠÃ)ÿ•†S “„^\TKĶ©OâM æ ßâ'ÐÞd…ò56&Þ âž\Ôè'6¦O\#YLÞc­Çðr‰€VGíõ%( Aµ„”åtÊLQzJž<¥ò“?îU–'yŒ‰8fcG•ß$tTr;ŠP'Ø‘7Z‚EŠ‘X&ç1,ÞˆGš°íXXª†oD‚@BôL«õ¬]j;ãàèÄ@àº.A´%ŸlUÅÄ#?‚èµxž‡aUë|ß–k¬Ò D“è$¹ÂqB@8Fò¨Ž‚jáL0cûÉXàâÐT/X¼`¦°X+ÛèÛ²Ú¦< ¥VÚÏ|æ3œø¯'xêýí½°¸«bx¡ÕŽME¬‹ØÙÎß³ÕÌ€ÙÙYöìÙÃîÝ» “É`Û6»wï¦T*‘ÉdøÔ§>Õëë(ÄSMQ™…ˆ{†ÅE5qßuÊË3ôV[Ѹ]ÇŒ»-Ç—ï‘GàòË{ý¶—Ìß5ÈŒ'¶ 3IÊâ RŽ8×uWU IÕ>ŽeYX–E>Ÿ_v8ªçyäóùèy›Ïç)•Jxž¨%Ncšfäuâû>étšT*…ïûŒS*­”òYJ? ©x\*‘?Že¬J(¨L«—_Û„c/¶®ž“F-V“Ë4RfyxÀ¹?¾ðFÙ,$P3FøÕ_ýUòä±/iãD€KøAÇÉµÝ ›0l®ö±]Ï»Gik€ÈnïÞ½@(ÌÍÎβwï^†††Ø½{7“““LOO¯¯¸b±¸ì $q1-OEtzäöoÜÈ•Ï=Ǻuë"ÛMP-´Èöµ_³f|˜|ßÇu]R©ùÉb±ˆïûAu&¼ê*¸òJæææ(~îsxžG"‘ˆ*xú¾í'ˤC#¯å9¦išØ¶mÛd³Ùh;ùÛ¨ì²çyÑ6ñÙ9nüÖG>Âe\@öÏþ,:Æù7ÝÄs<×]×òçÖ+Nð,|¿-ᵞ(õ¹âŠ+¸ûî»{}MÑÑ’ã}‚ƒ%?/à*JkˆXVh0-#Þm@ Z¤XUYØÀˆÄ2)x …—–(EûZXQ•⸗™x‹ÅÃ= er>ð ­G­‡['‘ô!Îÿö·¹dÇ®ºï>ýû¿S(„Mªïû=ß²p]7êhLMMá”_ žç‘J¥°m;ºYîû~$†ã8Ñ`ß÷} ÃÀ²¬ÈÕ^fû Ã`vÝ:üçžãGÅómŸJ¥ª¶—09Ÿˆ|rßEø“sJ«X,²ev–ÿ¯ÜAžÝ¿Ÿû÷C2ÙkSj "ÓH[¨¬,Vº'\<´-IQ¥µÕ>%4ÓÀ K– ™ÈËMÂ;!ô~“‚&&Y²‘håá‘!…‚çÉG"›zSLEÇ­ýnа& êÑL5;*€0Ñb¹¡ ™® q™òÒ;êM$[±þq ’É$†aD}fY¶~ýzNž<ÉÄÄDTQ¾¦L /öü•q€ô9S©‰D‚d2I2™TANY)\'i„D|“ˆ?Yïj åÝVV^4ÉóàBS=o^•TXF×òëüz{/̦b|YBƒt˜¯üÎÔÙ7ž,Pi;k†‡‡™žžfll  êáðáý¾Ö¦QJD*ñ€s‡D"A&“!N“L&£0Û¶ÙzÍ5üÊwðÏÿöoüÖWð?{Œ¡w¼ƒ-_ù /|üqÞõì³Üü¦7á;†ã88e¡ ß燷ÞZu ŽãD"„ ß÷£Î‡ëºŒŽŽ’H$"J¶±b±H"‘ˆfüj=lDàjäyïè|üøÜ#ðä3ÏDBØbJûb±íâ ï8!.ì5:N£Ž’뺸®•kŽo·Çudêyaº´7oŸBóÙaó„(«f_I˜‘¥Ð‰DbÅ rEŠ$Ê?iÒ¤HUå}R”ÕF\pË’%GŽbù'.ReÉFUEóäJˆ§ädK‘Šr´Iq Ê¥æâba‘"……© =êºñ¾[A½Ó:Œç…aP ‘H@mŸ0› ÷]ˆÚÐÍF粬Èû£QŸU–‹–J¥"qN&!à¾ï}ïˆ&yåéoŠ]#ŠÅb4‰,}sÃ0(•JQÔ‰$K_NôŒ²2ˆÌËŠo *9ThSdr ®¼òÊú8äóó¼á Òö]uþU¤^Ü„7Ü&X`.-~Qr„FœA ¶X022Âí·ßζmÛ€P¹óÎ;£&''9qâÄý*NÔdµÅiÚA_y%SßÕYª’8T»LCEüJ”÷õx¶lÙÂÈÞÀER¿¢T²Û¦¨ˆpñŒ¸ò7AeFH™R^.ベò5diÿÕjí¸]m±‡W•×IóÂ)¢—}Š¥"Ť vq£ïŠƒFÈÁÁ¡D “ Æ 5ØØ$IF„߹Fß3£§ßAágì¸Ó¤R)²ÙlT°ABUeBW¼ÙŠÅ"SSSQ‰¹æškê·X,VyÁår¹(%,O$‘è'$‰Hœ« uo§„tª_ÑMÄJŠTÏÅŠç[¦¼nŽJâQ¹¡n*ãCù‚'Üô¸ùÆß¯ LŠqF=;nW[,aqŠÒIzÕ§XR•ÔÂÂżÕ$‡›Gˆç)30˜`"òœË‘‹Äº8}5É¢4Í Úq§ÉårQ.áÚè¿d¹eYa4Kmã¯gfæÏúÙ¶=oymÄJ\l›ššŠ„¸åTq]©t²_Ñi$ëˆC%Ì4Aý¹b¨t?UŠ]ytÊŽ%éÛ³³\pÁó7pÝyUR…–Úq\ˆïê¼ ¬zU>òmº™Ê²X4N±ÙpÔ}ûö1==]· Ãþýû¹í¶Û:ò\× óµ9Nô05 ƒ™™ÓŒ"ÔÌÍ›ù{Ú¸iÓ&î9y’±óÏoë{’Y¿vsâÄ Î=÷ܶSòs´[ˆƒÞTì¤?ñÇf;–‘z¬;Ä³ÌÆ—m'lôTÂ;BÑ+A(¢Í”—%€ty_qÿs€ *³3&•)I»¼]‰PäK”·7c7ë8pcù7Ž[Þ¶Ý ÷⥻EdLR©$dÒw.Þì¸]m±…Æõ.ƒCR©Ð«>E³H˜©x…JÉ×–#Wª*yÐ401«¼Ú Œ*±Nó¨ &ýnǽ$J*HÞãÚ(‹zÛvŠ•’¥Ýtº_Ñ)Š„ÝÉxN·}ÞÿV:F'íXúGÖ­cü‰'êoT§«Ímß>•ñQ§ÄÃOk*d¬¥ôœHˆÛ·o·ß~;挻å–[–¤ ‹7]ílnnŽáááÈå³®ž®ë’Íf™˜˜˜÷àöM“4¡ÃM§ÛM›6Q(•èDe[K†ôÝÿý¬+çlJF_¯ªV7è”Åㆧ¯êú{jˆC%DSð¨i>¡Mj¯Kȧ„•ÊT¢E(„åcÇ+ŠbT<áêå[k¤HÊ I%„õA`¨Áwº“ùÉåØÇ`öì|*Þåו‡ Üc3¶¼ ½¿zvÜζØÃëzBxeuÑ‹>ÅRoµx¡ ‹ ‚¨¨B;=G»UYTiýnÇýF/ú}Jstº_ÑnÂyà€°+ª­§µãE½Öcí›ëºäóyJ¥ŽãT…ç7…ŒÉjçŸsTò_×:ÄÑù†¾àl‚c°Ó€IDATUàÉÉIn¹å–H Qn¹LOO3;;öää$ûöí[pŸC‡±gϦ§§ë®Åó<ljŠ2T‰p„cä :;æ5¡#"\©Tj»KüÚµkùû¿ÿ{ž\»¶­ÇíT‰÷L&SÕ!›žžfÏž=6ÀZàŠÞ3I$™#ìÙɯ\_P¾_Ò0Hϯ,d>ú—òùë>Ï¡C‡º~é­Øð÷¿ÿ}>ÿùÏGv¬(æRùÈG>Â}÷Ý×õs·bÇ÷ÝwùÈGÝn)Hþ·$ɨ؂‰/‘ÜoíÏ â:Àž={øüç?Ï÷¿ÿý®žw9}ãÉÉÉ^Þ2¥Ïèeÿ¸;†êþq»qç„G »* “““ìÙ³gàúÇõ¶›m"RζíhüœËåZ+ Ÿè$:©H%ÅPv‰ÇUZfß¾}Kê¯8pà¶mGêïÐÐ7ÝtS[.hdd„;ï¼3ò®ãMozSÝ|tÂe—]ÖÐ=Ô÷}<ÏÃu]<Ï›çªî΀¤è¼ØÛG’JSÔWïSj…È‘‘n»í¶ž ­ØñŽ;¸í¶Ûðð؂ʼn'ºò I0j+ç …$ =•Ü R´À¥R0¡T^7ÕÄ9ãNRH¬±¥{·¯!ò¾â!«‚Eu‘šõq¯åµÜµç®®_v+6|ùå—óÚ×¾¶j ÁëGúõºV»víbxx˜týÜ­Øñ ^ðvîÜÙ¶Ê•Pè ™ŽLh§¨§T¸í¶ÛØ·o—_~yWÏÛî¾±²zéeÿ¸;†Jÿ¸ÝÄç‹'P§Ÿåày^$ù¾¿àøÅ÷}‚ ˆŠ Ú¶M±XŒÒUë…L&whhˆ÷¼ç=üÑýQ×ßk;úÇ~Ì5gÝ‘#M9^´ÅØ¢’S; ü¤Çl£ôy2ò•ÇÞ½{Ù¹sgÓýã(45^Œ¡I7‡††ªŽOTÛ RòÜqœyžX-—Ccÿ¡÷¥5–cÇA9‘Øý÷ßõæ7wï¢Å-*•D%§€èœ²~œŠÐ&âäxӱߊ ]m±ß·…úõº”öÑî>E³xx‘WZ@À 3œÅY( Ñq¸s2*­Ó+;î¤Hƒ ê2P¯ç"¼â€,‹ï_›_Ù¶íºË-ËÂ0 \×Ω¹á–N¯í8 Ò½õ¨T8]éc p]ß÷£Æ|>a‹Š;bóRMX¾Sò}áLò5Bø}K$ÑwÔ²,ÇÁ4ͨÚgüû™Íf£ï—|¯ä»?G2™ŒÎ%…úÚ˼ÚaÇ>>&f$Ç-Öž8ŽÝ—b±ÈÄÄ-‘!Ül*Eï ’o;hí°JwX´XÃr¹ýöÛ9pà@4ó±ÿþe5ÔOÍf«*I:&qÞéõ®••ÇrìØÇçp´XÄÈ·±LCu˜§çaÁ·ŽÀëN…¯|Æ =µÄKN<ßÄûÍŽ3C%癲bhW[ìw$ _Qš£Ý}Šfpq)Rœnjcca©'¦²dzaÇý‚çyäóùh *ýz‰v©—š%Ncš&ø¾O:&•Jáû>ãããÑþñãB(Ä5Z>>>ŽeYX–E>ŸÇ4M­”ºDziÇa×Ö$Œ„²éZÞeáy^$þ6 GáXø§Óé*¡L„1Ó5Ižô¸@O6y‰D"ìlÛÆuCÏnÙ¿‘.×-Þn¥R)ºž–Â*©_鸞í²cØ2;[½"›…D¢ªbª´MAP(´sYæ¯ÉxO*ú–Hˆ“R¾qj_/æj\]»váº.7ÝtCCC>|˜|‹b„çyX–=ŒIË$9á» q+ŸåرÏ F·o_ú‰³Ù°á®Í÷}; BÑlSÙêMžŒ§á†²Wë8¡Xw…àÃAr™ê ñÿ»­d·@ûë ¯ÚÕûøZÁQéíìS4Ë8ã$HTy„æÈ©¦Ò½°ã~²¬Hô_p[‰„Oñ²©—Ÿ8~Ü…–;ŽSµ,‘H0>>y÷(ÍÑK;Nmt" J;ð¼\7|}ðƒm.8²’‰Ûq;ÛbEéìS4uþrHª……‹yÄ©§4K/íX2Rô“Z~l6…¹-D±XŒÂïŠÅ"‰D‚d2I2™œ'ÈÉq!âb„,¢¾8–eẮVn]„~èWH±N¿\×¼ÍêA$êŠg„6kYVËå"[á*›ÍFâ¯ad2‰Ä’C¤kÅœx´,"¹Zé”»À™™Ìo¬,ôýp¬£X,bÛ6ù|~ñÏÝ'TžK±eµrˆ[Þ.Qg_¥oY¡ÜérÓíÈ;çºî<÷M—PghÁ©sÙhÓ¶úhgþĺø>¤Óaƒ-b[£P‰€Jl´GØ(;ÀÏ”×[„½”,åÊ:6üŽ v.ÜþCŲPç…¿–Š~ŽSqŸ¶¬PtÝð:jÅbxͶ^³lkš•åÑõ–…µl6× £r^9W|†»X ׎†Ç³møä'!þpS–ÌrmØÃëhRzEi†Ž·ÅeŠI‘Š*£jÑ¥]tÃŽeN®—Ôú̘¦‰dŽãÌ*ã‹Åª¼T*E©TÂ÷}\×%™LFÞBñãÆŠør9fÝë\Þ=½ [í±àBÛ{!AD¹Í R¬Àu]¦¦¦ªlÃ÷}²ÙlT4Pò–Kq‚Úíë¡aÐýG»ìø¼Ã‡+/‚`žD¶S/?¥Wûã0?ȧ:äÔ-ÿÖ qjf}MUޏ¹¹9&''™žžæÄ‰ŒŒŒ022R× ­Ôs/Ò;4}\+ÍrèÙgÙ´ØFŽþf2óCQë‘,ÿµ Ŷ3TªåÀócÛÆÝ–%‘ÆåsÄÏ•ËU„?ß…0߯)×ÅÛ× 2òBAÍ÷ÿñmåXLJ"„ÇuœP”«ùR©ù®Ü·Ü¢B\ B„²ÜpL`aQ,ÿz2õ§(­ÑBܼkŠYÓzˆ7œˆ£££üÒ/ýÿé?ý'‰D”'Nr`ÅÛè|@$âÅ—¹®;Ï›Hé/d^Ù¥= êº.Åb‘L& ÈårX–E6›ÅuÝhØ UVA¹*Âý<Â¥—^ZYw8ˆ!mZ=Ü ^……Zƒw¨N-”"ÖvÏuÞ¼¯©Ê·gÏ6nÜåk˜››ãÖ[oerr’\.×ó0¦ZΧwÞp> ¨gœ²8§OŸ^|¦Õu¡T E©†Û6²òU0©ÄÿÇ•¾µ±mâ³!Kécšfõ,ÎÔÔüm–’`Ô¶+Â\üÁ³„ޝùì³KxJ'´“²JððHˆBP-¬ù3ÕŠ¢,ɇ%är¹Hèå‰Dbž8–H$8~ü8ù|>ÚÎó<2™Ì¢^uµç“BñÔNzÄIUÅÅH“&C¦éðwŸ§Ö?Õ±kïdÌg±üZbžç‘L&£°SÃ0ª¼Øâ­eY$“I2™L´zN*µHßàÁ³Ïfü‰'b+¼…ÇuÍb,²Ì$Dtž| X¡à¶gÏn¾ùfvïÞ]µÁÞ½{Ù³g·ÞzkOÉJœ~œ4½ÍÏÖŽ‡A7Ñ0ñÞqñÓO/ž”³™™5©p*‡*PñÑO³¸›f¿M‹/•·¿½×W°êQ!NYéÈ€9 ¨*Lbcã.%×§¢(ó°m»nµÄzËk½E⢜kâYÏ ×Ìù¤J¤x?A8˜®'~ùøåŸøë€ Zæài0*É’ÅÁ!G“"Eòä)P E*ठ’ˆqrM²ÜÅÅÆÆÁÁÅ%A‚"EÖþç•›?7K8†±i}Ü'b¬Œ' …BS_‰D‚3gÎôú(}ΣÀ V1ïAutP™³Î:«¡]Õ-ˆ&¡§â\‘«»£2`¬˜œœddddžaÃ|>Ï›Þô&fgg{V^=^9I¨CˆGšÒ*Äõ†p–d¬ {%ÒÎKÿ2Aë%E%/\¿'u½úêÅ·ñýðWð¼ðAá¬TM ñhÃó×A%§ƒì[Û‰w]ÈçÃóF¸­ëVÂr‰Æ¹ý KUV2ÛÙN†L4Ø,,JUÙ’Eé2Tõ}\¤¿’'O‚DôÌ "QËÀÀÃcÜ'gçpq±°gœ)2d00H’ÄÅD¸ Š£ãH1ä||LLLL,,\\¦˜Š„6Ÿ J”(Rd›01±±±°È‰„63ò¶‘<•iÒ$HP¢„OŠô7¯ìõ'Õ~|*sТì$`=›pÇq–UAQãQài ®üú׫…8Ï«ù³dq× õ1žÒ·¬0,u¡…ÃÃÃŒqøðáž qžçaYV”¯°HEP”~gý³/ö·ç`&•â µ´Ú·(ë&í[ÞúV8r$ü¿T30vÝj!N„2Aò×Å_çó¡à–HTBr}¿qè­iV¯ð¼pÛ® ÞpÊJÇÅ%E*øjeTEiŽ$Iå ª2,ÂÕ 3UBšƒC@@† Y²Ñ÷ÎÆ®ÚNŽ-Z† yò‘ð•"E‘bô|rq«Ä0'Ä£,O>Ðrä(–kpQ!¢d9 ¯…ÅSÑq‹I`‚ òä±± ¢kH“&G“ &¢ó¤HabÖòmìºy'›-Š´Ò'ÇD„[hj3ŸÏã8óòþ9ŽC.—ë›|çÊÊåRà¼?ú#¬x„“²[.Õå¯ÇAçŸ5Ín877×ëkÅ0Œ('\€†A/)¬©tŸ'sÎ\4…TH--¡%ßü´5BÊu—”«­ç\z)üõ_×_—ZbfRÓ¬¼0Y»n•Ì®zx+¾Ó?8,œWë´ºÿ*ÁÅ%K–9²d{}9Š20)b`DB\–l•š;QLÂ3}|œH¸³°"O6ëòä11£ðp ñe4Ú^<ÐääxqaO¼Þ$ÿc\D« ã¤Ê?BmªM‚DÕr[cÅ–ÇÂÑOÙl–b±å”*¦R`¡T*©œÒ5<Ï«N¥UgB?òù|ó•såK ƒy5çÀ±±1\·ñ¨~zzzQ¯¹N#ÉX¥à¸†¤.•í§Óß< løÁæ¯Èf®Žšeþ`@Øw"MÑ yÄ5¢Ú/‹¡Çœ0À³²2¨QÚˆK˜ß±ÖÅ;Ëüj@•Y(ùŸòþÒ6øÌÕF©šc‰Ó¨IXMy!!N  ®é6E‘"9r$Hàਭ+J™xî´$IR¤"o°$I<< (RŒ¼ÉÄ ˆ„. ‹ûçÕÆ«q‡ãÆqþ<øsþ_ãÿå׌_£D) ß/9ñ&+RÄÆf;Û™b*šÊ-à+UO«ç•– Q•®Ä Ni–Z/6A .$ fff‚€ñññ¨Êi*•"“ɨ§t…S„uòš±·xº­¦&Ë& ìÔ8Pé:k Ld:99É­·ÞÊÞ½{«ª£îß¿Ÿ}ûö±{÷îžVM ‚Ã0"‡ E$ž9v¬z†Dò˜-ä½åR,KAUU‹öΈÔVHíSLÀ·œ/CøàÍ”¯«.¿tí¥ð½þpÚKܶuP®¬vÄóÌ+ÿdÉ  Ö’›­VtKÔ‰[‘ï×vð*^Å¥\­s]—b±ˆmÛ ƒƒÅƒ„o‡ß» ( $“IFQ¶X[¸;{7y+išäóy,Ë ‹¶9•Œå¹\ŽíÛ·311ïûQb~Ç4Z¯ô?2UËøø8¦iêç¨ôGñxúøu•ét8¶Š9aøåBr–eEáÓMo¾5ú}E1/¶ddd¤§¹àꯘºÊ4œÅ‘²ò< ¨.ql>ÍÊ7ë(°…Š7‡_þ_}’«È¡"´‰…KeàP ƒ E"ñª0Ëÿ;„¸ZïiwÜò²z¢ß â\ .9|.¹$\N‡h‹¹ÍTÄQ³ü+Þ5Ê`0Hg—ö\p§5}a±ÊQq/´è"…µ¸ –¢â‘Ï›ÿ𿍴™*!›ñH© |M"–‰—¢âi&á¨&Õ¹áä¶ÄT—:ûdP-ÔÅS¬ØßFÓ•Éñ†“kˆ]ËSÃO­8!NsÂ)+‘€ ®Pö%¾Ä ܽþŸãQåu¼Žƒäz®g-ky1/æ»|€‹¹`Qá­)¢ÐˆDÙË=•JñQ÷£ü(÷#Ö™ë¢dûz’ضM*•ÂqœÈÊ4M,Ë"ªg*[¯ô/ác2®9A@2™Ä4M *Ú+J/9²®âåæûu ¹Y–µô ¾µ}\mÊV M'y±, ÏëMp£T )Rß` ‰'Ìn´^¼ÔâÿÇ×ÇDzT*jŠG‡C8p>\Ce •|A%*ƒB|ÉÄc–ð‹÷ð(–—˶Aùi*žq²N¼DTrIÈT¦ll?9†ˆ‚XËÙ˜l™ …¸ X<7œ "f>yòüþOTÅÔÅå*®¢@“¥(œÔÅ­ëÕÖ.D$ó}ŸçÏçGüˆ'ý'ÃÜоišLMME ú—%Â/u\3©x»I˜“céòþ`{ù}ÄŸ‹ÞdÇŽïjKåëÊÆÎ+ÉËET:9¯$*·`óÜæ^zKæIàÌÌ \w]Ø07۔𴸧³ JXnè^6›­ÊÕã8®ëFaGù|žL&y=‹ÅH¨Ëår8ŽMe2™J"æÍÎq~þù?Ï‘#GØü–Í]¦á…ÿÏ 9ûì³9ø¶ƒlx÷N]qŠËw_ÎO<Á?9ÿÄø_ŽsêgNñ)÷S¼áSo`ø·†Ã¶§Ü—*‹Qš„À0<Ã7p}#oDbbøøžAà†â¢LbA€ïû†A¡PÀó¼(‘¼çxGÍ0 LÓÄ÷}­üÖaDhP”Õ€‰‰ƒý•J¨Þ¼| NKàº.žçáy\—½Žç'žOÎÊ‘Íf£çDÛ²Jß"¹áª †Åb‘©ÅRº(J?u½á<ÏÃ4Í… ‰dßÖÕ,‹³e ¯A7Ï@qžçqãoüÆàˆpqϵ|yYØDÂäQÉ»#•ô b*O!Ù7 üÂ¥cÇ4¨$ÏPñÞÈQ •P« p¸²| *ù„Šåsbçñ*^¹ØkÉû&žµmM<ï`†É¶¹Ø¶+áar¢ö;åë…W^ùÊ^}ª-³*iÃh^ˆ“û¡ 烎•^ÑLèžïûd³Y&&*%"`ÅQŽãÉd"¯†t:M©TŠrf8Žƒã8Ѿ£££¤R)\×Å÷}òù|tŒS¿uŠ—½ðeLîœä™ì3üºóëœ?{>ÿtÖ?ñé[>ÍgO–?úÔñÇ'ÿ˜¿¸ð/ø×/þ+}ë! à w~ŽÒ%%þ2ý—üÁkÿ€;¿p'‡¾rˆóOžÏo'› Ÿ»kÖ]Ãôèt$ЉçF\,ó<×u£Î•aÑÿÅb˲0 Û¶£m]×<;LÓ$›Í’J¥(‹U÷SþNMME¹Ž€ªûìº.™)¸Ò)’'OiÁgEY9xxØØËÀ…¨ª©‰Y·ØB»&iÿå¯ëºÑº«rW¡7Ÿ´s¾ïÁ¼I ]'ï{MÕ[¯ô/ñâãB>Ÿ'‘Hh%Te0ð¼ºB\±X\¼¯æzp¬«YG»{+†â¸ðÂþÌÞ"ùÏTÂ+=*9€$×TòÉsD¼âTkK¨T.¡ç˜ä[Ëz¡åbË¥_!É´ST¯zIãî¬"ôÄŶFý/YÏ/´”¾Z½0.ØùT¼ëÒ±u2fÃçoý<¯åµ­~Z=C¼`%Kh'b7)*ùá`…¸„*+ñh‚Çq0M“d2‰eY”J¥hx»É H*‰P5>>N.—ò¬y³‡âM711ëº|òè…Gàwüßáõ¿ôzì»lÞúµ·rVò,ðCÑìÈŸá×ê×9~æ8O|à ¶íÛ†؆MÞÕÿ7oŽ®¯”(EaQ"¶c`Ûv”H<•JÍ ³IÔ©œN§«<%LjT ‚€D"A:ƶíèÿB¡€aäóy çY€¥&²WúñH­ç1%º|‡ãU7WAù'A7ò„ë5q‘L<öâ¨'ÜêÀcþ\¾zÃ)ƒÀ=÷ÜÃK £nX*Ð\?Ì$tÚÙÔëw£tƒ5³³³>|¸××ÒÏóÊX¡³r’$2QÉ(£¤Háâ’#u(òäI˜—ãÂY¶%‡¡$ …­¸Ç›o’3 *Å$¿š„›JÎ5 ë¡M<×JåmÒåÿ%™·Eµx&^h&Õ®ª©Øù‘øu×M8yþÉ^_å’yî¹ç–ÖWÆã’·OBv¥Î÷}ŠÅ"ÙlÃ0"K¼Ä£­Q¨¥eYLMMU¾3Rø üÒ4ËùŽ\°s6x°ÅÚ¶ð»×ý.oÿOogøãÃ•ŠŸVuçç".‚îdž‡©x²Åg,¥R_/±m›|>‰’À|||ÃÃbå~»®…ÀÆ=%¼×÷}åÊ88åxÿ9-ÖÐdÒI1±Ázß#ÁÄK´P(àû~$ØÇÅ3É&Ë3™L$BKX·ì›Ë媼¬äÄ›J¾'r½o{ÛÛØ¹sg¯o]WpppË?‰ØO¿QOˆSV.ÕCõ†S¹ò_˲Àu›~ª¥Ü»¸}1A¢t–5“““Ü~ûím9àää$ccc GËæææ˜œœäĉìܹ“±±±%Ó²¬rΊQœ²€6Ê(fù'C†,YräH—ã<}|ll\\||räÈ“ÇË:Û dÈàà`•æ!TòIá)*¡¥’ô[<ËDP“©9ŽT‘m;¡Ð"uñ°O¨ïqÖ}¦G;íøôéÓXÖO4wb¢ª$ËÝÔÃCùv¼œ¶8¨“ ð{>ʾéiv|ë[äóùhÀÍf™ššª2)I·Ï¾* =Š匶ñEu#üNÔÛ*îú kµðåØòÔý*§çHØi3$‰ÈSnjj*%2™Lî;>>N¡P NGb›ˆ£’gNòôA%gŸý:ÈéDŸ"ú Êy=<&˜XQ9âj=Àšº±Pè "qL¾ÛñÐìL&C2™dbb"ÊŠÅb•W¬ˆér\ Ö¼‰q!Í0 FGG£ýä<²^µB'í¸Y\\$˜(Ïvö³øÜÏ×¶šig¿¢–ÚÜp’¢B‹¥]tÂŽ/:~®¼2ôˆ«):&“B‹N„ZÀ÷â;öúN)d ÀÞ½{Ù»wï²6==;}û®êld³Y†‡‡#›ÍrË-·,)OÃÛð,i&˜ˆJ {x¤I“!y¾ÙØUï4i,¬¨ÔúÎaÇÛÁa;Û£JP©²òaaU*çIqÔ¤:¨G%w›T1-þ$‘¿ùòúZ¯&ÕâcÕUBÑ<ÿÍÑN;®Ä\L3ümD†Š ¬¨PTñЩÙôO©TŠrÉgN¢T§€t€”eE)3GËÓÄjÖø‚‚ù×B&V5Ú±!åA¶™;À°ˆÄ:ëQ ®¦ºýíñœ¬hÕ¾e¿xuY ïÓ4#/¡D"QUðÁuݨ:­ýè%ש>E/,®±BD8ù\óù0Ü:—ËEÞ’²^„­xÞCñ¤Œ ’”ÿ—å‚|¯·oߎaŒW‰f²¯Ç®ë255 fò°,‹ãÇ×­’ÜHD[]ÌCµ^xwüú…B!ìlɽêݰã…pp00È“rÁõñØòÌû‘GMíríô–v÷+jq©<²Å›{jjªo'Š”Á¤v|—‹Ž—­×²æõ$ÕÊ¢$Á˜¨±÷>ìÇ*ía Àþýû–5‹±oß>¦§§™››«Z>==Íáǹí¶Û€ŠâÜôàÏó˜³æ8¿Ê}^:3å¸= Öv0 ØÄ&21µË¢@iU{§Iá9L‘Š÷’äxKŽ,ÏPU *´ªŠEÆ>2X“m±XŒrÉ,2T'‡ú³àñØ n²Ò†í¶ãC<÷Üsá qU^¬!.PÉ3؃±q<'Vœø`,›ÍVy)HxœT}ÛÏŠ|Y¨‘|U2ø“£ü•J’‚ Òljò^ižÅ©gÇËm‹¡ÒÎÊÜÃã?ÎÜÓOóºr#N}ðƒüÁSOE¥u/µfÊÑ¢QQd™Ïp€Ä=¿¬_gXÓ};Ø.$¯Ò'!{XWCþ"°¾Þ/‡óV¸¨ü; “çTŠ—\£« lë ‚åùÒí|§úõèGNÄZ©,,Ÿ‰„^f³Ùh[i{%ܲVX’ÐLñH“íâ6’Ëåª …Ȥ‰¬ïI¹ 9—í¤í_H$«·®ö9#×Õìþí`ýúõ9.t׎áâR¤H†LÏE8 Êzüøñè³–Â5QÍõŽ’Î¦™˜˜ˆÂ—ûqÂ`µÐ©~Eœx­8)ÈÔÏ#eåÐ);^‡Y‰t¨ÓN5=†¶À4tÂaµ°àÀ@Eˆ#ƒlñ¨«í(íß¿Ÿ‘‘‘èõØØ·ÞzkÓÇ=púÿðÕØÏ»¥å7i`#W%Ä ™l20eLáÿœŸ„ü#yòšÇz¢pCÃ1ªs¼™„#?ñr+—‚€Ç~é1®ô¯ .9äß ¿œÉÑ$™L¦*d+¾!_Ràòù|”À\–ˬ²2«íy†aDUYÆÇÇ™™™!™Lòš[o…«¯æÔ©Sd?øÁ(éw.—‹éôÇCɶoßN©TŠBPDð‹çbYI´ÛŽïÖœ,ï4á“8N("4µ¶$êyÄEÞxnÖn'ù¾dpHÉöb›’_¼àLÓŒ¼Ä^¥"šwr|Y&"ŸeY‘ƒ²0õìx¹mqŸ°)¼ø¾ûXýõÑy<à«k×òƵk#gaù…JÚL—°-Æãà\.Ï•£úÏ›ÖìU`‡ìuÀ£þœ é=åýßîUáñ‚üÇ^ßø&é¤WGmÛ%³°ñgE¼ÍçËr' Gì5êSÄ S^¸‘7|/‘gº:© ªúÒFË3_î•<“ à ™LD“ P]Рö¸UÉòñØ”uÒž/ä…¶šé†/†‡GB_å‚ÝT*U?L߃/e¿Ä=÷¨Ó'tº_‘¥PÏ…ª(í¤Sv¼³nUg!›Í67fÎç—RÉ ¯¬H:^5õĉlÛ¶-zwËoıcÇØ·o»víâ¾÷ñìšðwËLX˜iTë·ÒdÖÉÐn";%pq½m”OÿÁ§¹êo®â~åxæg¸Ñ½1ô~˹‹­"V`á|ÜÁ° Ì¢…Aå (Þ<’|Û¶m’É$¹\..D Éd2Ñ€HŽ#_ðz%Ú…ÚJ„¹\Ž·~ðƒð×ÍÁƒùÅXèPÕ‰·m;Ê”H$¢p3¹n™ñ®M¼<::ÊÄÄD4°k÷Ìõìì,“““:t¨­Çm–VìøÐ¡CÌ~ík¬yÁ³ág’Ï7Ÿ¼³ËuÒ¨õbÌçóUž“"ÆŠøÏC?дèÿÜ›Iª'Ô"Çu½Nš¿\ÄŽ;Öõs·bÃ<òžç±oß>¨“½àáµk992Âk·n%I%%›HÎâìP O•ù ™ËÈy¡—[æ(7ÀÄð\°ÿ'8ÿ Ì«ÁÛ¹×Bò_!ó([ÊBÝ óoì Ïõ]ÂqVùØQ(lùšL*" _Þ.˜›ÃÂ?rã¹ç~ðŒãÇ 6m¸îºÈ:Œ/}‰ ìAÀclØ@pú4Áà½æ5䮽¶êò'<€qø0ÁýæWpé“Oλ—â˜ð}ðý0´Áóð_øBòëÖQؼ¹zßÇ3Íý[d#B›LØÄ½˜êåò’}$ŸW-û÷ïç®»îZ†5¶N+vüðÃs×]w1;;Ë®]»ªÖÕ¦³è$qT*Ë„F2™Œ>ùL$|/‘HDˆ‰D$Àår¹y9ãÔÄÆŸ «%pß¾}Q޹n²œ¾ñRò)bc÷…Ø'‘HDÌÅb1ò¾„r;4/)¼DE¸&èeÿ¸;†°¼oß¾ª´Hñ é—êç¿zØ¿?¸þñäädU¿¢‘Í.éóp¡'Pe ˜œœäë_ÿzÓÛw\ˆk… 6°sçN6nÜÈ7}ƒÓÛÞÕÖãÿãŸþ#_àëüæ‡2¡Hö–o¾…kíkñJ¥¦ÔS_Zäü½çó䛞d×k*_´d2I¡PÀÀˆ’ü& r¹\$€ˆÀ•Íf#¯A:Ñ@•8'Çi'¦iòÕ¿þkÎN:_<™jCCâ3åµ G\\‘‚išLLLT {†aà8NÛ<æ6nÜÈÎ;¹ûî»Ûzo:ÉÐЛ.¿œ©S÷… šIÎ.ã¦"-kp]—ÇÏ}œ¹—Îá8¡ÿQ"‘ÀqœHP•pRñ™˜˜ˆB?k‘Ï´_eñªb±H.—#NS(¢Ð¨¸W¨ !wâÙ)¹Œ$WQ·ÞŸØñ „×®_¿žK/½”;wò÷ü}´üñÇç¡çžãÄéÓüè «<ß$ÏKõ¦ïœ> çŸ#“íž}vẗ—¾㪫ð7l€sÏ…³ÏÆxæ‚óχçžÃ|æFË›Úr~á(^x!‰cÇp7mbâü.¿òJ¾{äGŸzŠ€Ð@Ä>(¾ü夦¦(^uÆ©S›6‘¥"`z@êþûI›&…w¿›b*…}å•XëÖE©MS±c&m›Ë~÷wyæ†ØrÕU|~ãFãÀµ×òâ?ýSÎyç;¹ëK_bý‹^„÷•¯àãããd2™ºmø¶mÛØ±c³³³½6Ѧظq#ÃÃÃU3Þ›1:"ÄI1×äy)B›eY¤Óéh¹´G©T*z&‹'âÙÌÌLäITÛÆ YÐGƒŠ;wòðÃw4<µ]Hß8>`l„‡G‘"{½ÛÏÖßβ*QTããá_Ño³Ù0çx=r¹p_Çb‰07¹ç…û-tž8’;0•JUU²e ö'÷ØŠ1‹0¨ýãÚJÅ>•´ÙÒ¿SVÒ® Zÿ¸^¿¢õÒ0ÌC ?þ*”SÚWf·µ€ê@022‰'šîw\ˆarr2z===ÍÐÐЂûlذ!ší{ê©§¸ðœsÚr-Ò~Ég_ÂKo|i¸0 —þØ¥<ïûÏ#3‘áÈ?á¯ïýk|ßçÐÿ:Ÿ€`¸:ÏNm)íT*Uå©S›H»V„ë%/{ÙËÝf)¹àÒétä5'ï]rOD˜[Nî“¡¡!ÆÆÆ¸ð {rßZ±ã /¼Ë.» d’r)I) Ò"¬IîÓ4yhÓCQ‚v±WÚjÃI¡?µ×#î%"Ô ˆñDõf'¡\â!’ÍfçUÑë†'v|ÇwtýÞµbÃëׯgëÖ­Œñ>Þ•RÿÒ¡CÜ?2µ[¶p”Jýñ2“|”yÂâÒ”—ç;ÚþÇÁ|rçwɳ`ÚŒmPÞ)ã…£Ãòw'Q4*O²Y({ýP,²cäÛ\¼fÞüy2>›6‘úÁð R„ú¶M(ÇŽQ|öYŒ¿¸* hü=Ä=ç|ÀÂb"v‰¬[½ÿÔºu‘×_±üV‚Í›)þð0)`ô¶ÛûÒ—XóÇÌ‘—¿œY@ž&6M2<÷ßOæ»ßÅH¥¢ãM•ÿú@þu¯£¸ú©ÿóð¿ö5œ­[qo¸Ü#à^qEÔËÙþg^èûüÛ¶m<þwÇuW_MéŸÿ™·ÞÊ9›7³xô/þ€ëž{l›¿ýí=z”óó7ÉårÑsaxx˜;vÌË{Õ¯v¼qãFvìØ1¯Ãìáq¼ %tƒ ›ÍFì¾ï3::Z%²ÅÃAe !^Ê™LÛ¶ñ}¿ªm®õއƒd(¨(ÉPȯ5eÇÆÆ8pà@×…¸åö¿ýbT L ‘¼ÉæMMDøÕ:P6Ó-H¥ª‹ZÖüã4B"CêMø¦Á†Ì†ªï•RŸ^ö[±cûǵ¶,Ïáz¼”•N´þ±ô+ž¥ìõî8óòÇSB4Å–Øÿ!¥BÜ@022ÂÜÜ\ÓýãHˆ“ "òÿáÇ£×B+•UGFF¢¤ˆCCCQ‘f9:|”ÍO½ÚÐGú§¿ü'þôèŸòö/¼ë^Ï-ÿx ··ò®Ÿyüܘ¿‘ëÎ\dzp±q1ÁŸ|ëø·¸÷ì{ ¾T%Š•Np>ŸˆYiÇq •âèÑ£°eËòX¦T*E!¶µÎx8­ Tâ9éi–s¹v¼îÈ‘æ„8É‹ q’¿OB‰åš|ßçŠ+®À²¢°¦•BE„”uµßQéØËÀXEËÀYB´ …‚9‰åÚ0TŠá|דּxÝë^Ç7©ä~“I»<¡'Ëq˜ï[€ ÷®4\À{òF¤€Mm{\(¢š«qÑlD£‡ˆkâiÕH òËÛ4ºá3vùÚë²i—×’$I‚..V_"‰D1ÔÉ¡»#±ƒÀ­„’<¶²oS&JGi§‹¥¶ë»0ˆ ¿³AùÇ/OZXø„Q\^ô}÷Ë?âííá°pŽj9Níù串¿‡ý/烰˜—Cö•kk2110¢y¿NBìÃ;wý¾·ÃŽç({óŠ‹p ©>¾ä6kœÎO^)=e „*ôôô4ÓÓÓÑŠmÛ¶U½n•ááan¾ùfnºé&ÆÆÆp]—;ï¼³éýŸ;ûlÎ9sѲ®A’¿÷?¼—Ùóf¾>ŒýþÙ§–ožóM®0¯À0 ®{ïu€÷€ÇÆX–Å«x—r)ö¥vÝA½¨Üý.*Y–ůNOóåÛos•µùØÍ$u–P]IÂ*a¼ƒ r,׎×9R_L¨Ež•uÂw$¼T*âIH©äш OEŠ‹Ÿk…ÓìÌ“a”J¥(Oã8X–E©TŠÂËÁFc¹6 ”;]ðƒüv쨪Äl rS@bzãäI¬b1lo|¿"œù>¬µáƒ&|9€9>l„ù0rщÊ]y½û‚ xË ^À¦K.Á¾ÿ~¨â"<¯a›±PÊó§.¹dÞvò}°m;ªx) ßxóÍäóy,ËÂó<®ÿÎw˜xÃXwfÁ¦¿üKîÙ´‰/Á>ñß]@Èìí°c€4ée‰*ᦆa°iÓ¦¨/ ^nbÕ¨0P"\’Šûf=FËë5½&•xõzHRI»f™ÛGB%åY)±Ø¹òv~yŸzM‡OÝ”¯øÔ+ ¹è¢¶Ñ.;®G‘b4`î‡*©µHuT¨ä2–åòwœ0N6î ß¶‰{rIQµx´H|¬ mFóòùʺxñ+é—ÈkÉó • Ë[·níú{n—{T qíô‚ñ* ÀÅŠ–XX88¸¸óD(Å)3’ œD˜’¿ñïšœ/.2‰è$™ü ŒH„“ïn†Lô]Ž#ç‹kqqK^»¸Ñë¸@Öè¹Wï>Ä…·èü'M‚õAtâ‚\ü=‹ç–v;üÿQHœ›ÀÛà…Ò݇Ëç|p)~¤ûB\;ìxeÏ›çxÑôs¾V/•æ»?²)` À®]»æ%0n¯NB‰Ý»w3>>ÎáÇٻwoSnËÂ1#àÅ'Ÿlzû8¦—ÏçyÃ+Þ€i™ Ãa‹vº;ùæ}3|ø‰’7þåÑ1ÌE¬PBú,«üˆh³‹'åâÞ„’pZ„¸l6‹išQ­^ç$k·¯;r¤*UC¤íŽ ¤£&Å?¤ê®tÔ¤sÖë{6èÔæN¢¢$¾ïÏËy(yçúY„¯µãåØ0TÆÄ—>ù$îºud¨DpËu¡\­=Úâ¶iY0g…‹,•ó™EOßHAƒå|ßžÿôÓ\ñ²—Áw¾SÛn®Êr#j<߯tm;äˆ Tå=ø¯ÿÊëøCî¿ÿ~6¾úÕüË[ÞÂïÞwfÙËÐ~ùÐ!<ð@Çïw»Ûâè–à3ÃÒöžçáyÙl6ô0ô<&&&ªD{©ò,žp+q±Œ7yRýDD6—J"C›J,µ$‡LÕìkÅŽçSã½òö•6Bb´!t»—P©`Oùo²|œø¹²T’PIbF+Üæªo\ÅÁ;zë:eÇUç({ Ì0‰YJˆô£¤ï)¡àÒö‰wküW2ÉáhÖ„›Q¡vì¯|,ËkÓÙH?OrBÆÏåû~Ôï“mK¥Rô,Aïw÷w»rÛݯ€Šç¶¼ßÅž¯"|ùø88‘+ˆÄ,,LL’$£e ¤HEߪdì'y ŒHÔÁJ„: Æ+YÄ~âÇ]ŒqÆ È‘‹ŽUñÎQÝgÂÉ&wÎeæïfÂv¯¶«”÷;Ä%¯$g‡tþ¤ú–T×HÛ 'a¤*—èqÅØ±m°3v¸ ‰©DxŒ}åýކûûñîkh§{x¬“Ë™¸+ÒP™á®÷ù)+‚†9âöÇ+Õi¶JS=$ö{©œûÈW]vqKçŒ ý³òöûß^5£z­T¸ë¿Iö²äøô. ¤R©èÚ’Édß&ë\ª?^~¿î:xã[:§x\är¹ÈkKî†ft‡xH¬|—¤síy^äY4ƒîVÛb!}ôQ.{æެ[ƒså_L¦¦B!)Þ!‘$´Òq“Nc#ûk½j‰'×/`,ËŠ<ä;šN§£Pqñú5M³ªhÏÆæo>ÿù…/®öäyág’N‡Þ‰å<§2ù!×ïº.žç‘0M~tÏ=i±ÔhŽvôÁkûµÇŠç¸ê6˵cÑmêÝ£l9&]¼¼,,òä«B!Ń B›/PÀ)+Gâ'¢œTŽÛW#±«Þò¥l+ÂxÓH²Ýøc$OØ–úTÚÊ"á˦ÒJÅ'¬Æ«Œp?)—.o'3¨nÍ~ª'F¤s'ç–}åüÂAÏ¢2i3CE쓯KŽÊ8Ü*oCeÝW÷|•Ÿç盿gm¤U;8wÜ ãããu«˜×%ެ؄϶žºB‰„¸ýû÷së­·rË-·066Æž={cÿþý aÛö²„¸V×µk×¶´­HqïÝPý‘ÌÝ«IÚ/Œ~#þà‹p2ØOâR¸§NâÉK/]Z±†2RhAr¿yž·²z}L*•ŠÂ‚ ÈBÔEˆoɇØOEZÚI†Áî¡!®xúiìo}‹Â#Àë^nÐè»*Bœ ²¡ºÓ×!Ç©jóâšã8A@"‘ˆBÇÇÇ£üCRYXñ<¡Ê)6™]•0DÉ9(v!×L&¹úÿsófŒ'ŸŒxò]ß»òJþýœs¸"¾6$©iL³ìy¡gtì¦iFùʤr¡ f-Ëâ_óšhùJÀë„yåß…žŒqÏd g?—Ä{¶o‘ÖB¦S/¤S*‘@Øgו4áJÖ‹w[|ß)ï÷v5wÂv@¼×¬šm$/ÜÕ‚^Šíâï+{-ו¨ÙF&Špì²îx`tÉÁ”+7°­ˆÍB&­j ’HŽÛxåà 8pë¾ó§ßÁ=åFm/„b”´µÒ^™¦åy•e–eU…76*°Øä™ˆoq1¯Ö£­–Rm5"P<·². ± H‘ÂÅ¥Diъש-zß|/Âö¯@Å0]^¾‚'øTBÄ«*¡øª\6ÑDIÊ.¯x]Õ›¯NhŸ­mÿ 5Ç­¾¹ÕX5ÿÇ_×»Ý}ü¨l•³Ÿæºã3°}û¼uË´Y§(Ãn mƒtë¤{(ëR©°Û躕mjº¥r¶ç…õ),+ÜÏuÃùÞ8¾žÏ0@nEm•ît:l0!ÈÀ/ðã}Ò wé”Ë wêñÙÌl6ËSO=ÕëKZ:-âÑϨ^ƒad³Ùh°?(¡Ð+©–ïPËg"~Ã0V¤WÉ…ëÎ9'ŒXøò—a|<X½îuõwŒW)ÌP-ĵÚÖ‘#GðŸô#ÑD„7Ëd¹„e2™*oñ ƒÊ MŽÿþÕ‘urid;9V¡P N3S^ŸN§#ñ..àýàškø»Ï}Žk_óšè=Ê@Õ²¬Ès8žO¨©B8–ö0¤­a@¡P58v]—L&3oDDÅA*yzÂqT—­‡„±‰§#„ê®ßŸ…“6ƒ„pÆ=¶vôÅÃ,K% 4ci*UB¼òv™ò6>¡ 7JeÀ ¦TïÅfl{ÉõÕƒÃxÈ©loÕ¬_è£X¬­ÉÀ¿¬ÿv²s7¶?ðð"ï`iž8mBúlÿ?{ÿ'Ç]ÞùâoË’%YÖX%ßFƦd'⸉ßâj` `ˆ Ç9à`“îEAö—“îÅa!Þtg³ÖJºÃ:›dšè€9¦‚¹™xÄ”m‚í6SñEãXe°,Ù–æüQýTU÷tÏt÷Lßfž·^óRwuuÕ·ªŸúÖ÷û©ç2::õ¹ò0CB?El“âHÉq©ŒuÞ¿íýü÷/üw ψ¼ÜäA—„~'Iö͵6÷´H®„]å\xãããQH¨„“ærŠ>{-}·äö±Ì&öÚMŠraÿw6¸ÝPL/œ8œT.Ôb¡ › E ¬Ç”vqrnŽ;vÀÅÏû¬æ˜, ôvK€Õº_Õq¤“Œ#"%…²T*År¹P„²íðµëÆŸU‹QA !üLÄ©l6ܧ»È:†îOÖ³mh$gµƒ`±‹x2Lõýp¿Ò¥[VÜ^Âg¾ &ŸI{’Ï‘e®n?‰ƒ¬#Ç(‚\rÞ"ÊýøÇÙ ”…¸ýû÷cÛvͪ¨äóyÞñŽw`Ûö’sX4Ë‹'N,,œ'ÏbÙºî¹õž:ùwüÞßrùËßÌùùœÿR`³ïJ…gmt4<³†Y¹­áápù <ú剜s€ÊÁ•eYüŸÿóºÝ¼¦YûÓs~êê»— ‚€íÛ·S,Éf³:ˆìš™|'½9GGGI§Ó}ºZ/>ý4lÙÂÆÃ)žë†wÏ…¼l%“„ªIhY¾ü¾I=ctt´Bè, ‹E ÃàØ±c<ðÀñ ÆÞð†Š¶@ÙÎL3ê’ý ¾æ a’¶m…Baq!X¼Ž%\5A:Ž Ã$=ý$a¸¤ÈçóüÒ/ýÒ²ŸŸN ¹}œÖu' ;—Â8]"ED«×…H¸g½¦ÉŸSþRx!éñ–'㊄“FÉ$ÿû„¢Ûaâp%'Õ“<ƒ…Ý å;2,YlØe5¸lââΫlb.{hªxìær9²Ù,…B!*vD‘ÒV_|ß*’»®K:®ÙÏžKXp¦ç=M•%#݈çyŒdF(™%J”¢\ocôéƒMXx%ì Å Xúòj/Ýr¡6)~`aUöéÚçõ2.›çH‘Ì—xÐæûpGéJ . ØS±à$YÓéXˆQ,) ¶Ñ!î-”ù©z(_-ÔÕêŠky–µŠm‡í«%ÖÚO:½ðôC¶™œz%ºzÔ:Îêõ«·›l_ÓBÜÔÔÔ<·dêÀÀ@TÚ·“á©Ïo~iýyšg"ß–åȳ÷ŸMp" ûébìï< üeù{õÂz$·‘ã„®¥¦ ۿß^)ù AP™˜\„AÓœ/÷&}¥Í’@[¾W-#KÅAЂ ü3ÍX–®¶Bñݬ–z“@µ¿iÒêäý¼NË}<‘ÂE9¸º=b¡ \­^à8•Ó¤T*Åm·Ý¶è÷{‰Ç]Q&Pù8øØØXK!JûÏ©ˆØŽãDÂÈöíÛ{¾ò[=,,ž~úi˜› sú~,Ä-Ä8áäœXD¨U±Lu¢æ¤ð&I«e9ŸÃÃÃŒqpö CG†æyDÈ9_4´ßJ‚ vßOêUžvy²N©wƒò^0Íø)¥iÆ·ž'žØÄ{¿ûW ,aô’ôà+•JѤvllŒ\.Wá5R}£óP£o1M“ÇGïeR](*Âã¿ô¥/õ­'ù„\*6£ÏË^;*ù|¾»}ñBÕ@!ޝÑLLKÄ4ˆ½á|B¡Mªˆf …>‰Ï•Ë'ª^Lœ¨Û$ÎÅf‡/åiÍ[oå=óì89r‘¸œô‚K³Èa“'U8í™PÒzH®JÜL‡^oSž/nÕ$n/=ÌJMv<ôüÜÏU,“T(5)‡;ÿü×; Â)x¹ý0¿ÀÅÑ´¹Ztª‚ê‘Üõ2#n+}ì³Ð4k6oÞ<nº©âýìlç“Ù¬[»«vwT(„ø[A8 6M2î;øòÍ_Žcé›J.™1mÿKȼ»œÐ²†ÀT*…V.3³…<ÎÒéÊ`â XüªH¥bÿÌ ¨ü>Ô–m-+lWRørœð;bÝIS™A ²\|T“Û•öÊv% \ÄÄZÇ/í¯Q5lžßiòt¥Ra›“>°}ÆqÀxþùÐ×¶Þo-ÉY3ÀxNX}O„8¥·a¢‘õª™žžî»ðp!šày^÷ºP”O&b¨‡ˆnÒÛ,‘Éd(‹‘X466‰G###LNNòþ÷ÿG>ô¡¿â£ý.¹Øö$¾—Û—c_nS*ÅÝ]*Þ.¤`´t]ò3H÷ãºa7)O%Åèèüç ’_"âܲM铯“ûs]p/þ/ä,ÂÏeßžßZ’]€Ü £ò Fñsš‹.ú >ó™;yì±oóÉO¾>ò¢“„ä &  Rò$[HV{ê©§ÚmvmaŸBÙžÅ,ÅŠ …BÎ,âC­P¸e§–"XMõm2 NÂ-ñµ.q›8ÜS *ˆ— ùª÷Ÿ¼Vk ÉPY}^Ô5<<\Ü% oâé¶mG¶_*•*Bß'''£þ@óÕ*ËÁ’ãÉò§¿ñÌî˜ešiLÌ…«…vé£%g›,“B3É Pÿ!ƒjl+Ž;“'Nðÿ^y%WV¥-©—çÜ ‚ÇËyƾþS}Œo@Žoò<`ó Ý>4¥Í¬bÿþý5CS!ô˜«å5×5Äí X¬MKåÃßüÎßÄPé›íÏ£AwÙ½¡–<[Ãדê ãFÖ¯å5±Ð“DËš?9׊7FõwR©ú†B³a¾¾?<Ìàà ëÿÝ¿‹½ð ƒwöáÄüŒsê¯ 9ÒDÞ W×Q:ŠišórƒÕC’»W?õß¾};¿ò+¿ÒíCiB/HŸ·X¤$q›ò©Ê0ˆD³oû(³³§²uë-¼ò•?âÜs/æíoÿ_|ò“WñÆ7~K.1›Æ0à—ù¯ÊžhçF¢Y&ç¤Þ‚aÞs‘l6ü[ˆZîë‹^²B­÷µ0b=3JŠ+·’¤H˜ô¦K’çÄ}_œ«3yƲƒM›.dóæçy÷»Ÿä‹_4ÊÏgÂD3"8Êw’¿‰yþ­ð`y¡·ˆ{Rø+_ùo{Û 0Žï§ÖV<<¶$fBÉèÊðœºQ%E :’óQ $‡%ÂëGžá$z‹¨–´É!$ùˆJÄžjɪwµXì6¢=…UDÕ^ ñº—‡~õ²-C¬‚¯or)lÖhj2—¤ˆó’{RÆ ’ r||<ò¶í$ý\íUiއðx†Ïmx®¢jO !ýæó’ükÉ\ocÄOtz¨éJg9evÃØV±Ì÷ûÿçÿù)ï|çÑòR œOCvGøÞü90=à¿uû(”N³`÷îÝQræøÃ¥{]×å†nàºë®ëx~¸ï¼8ÁiO¿ª+ FÍ çl6zS¸E¸–ʧ·Í›Ã4ŸÝõ ¶‡¬ö:¦ÉÀïüëwì˜'ò}iÏÞÔíö5ÉŇu„8—ð&_Η5:<ªá=N.—k¨h†a8ŽSQ¹ÍqlÛæÇÍ$è!zè!®¾úêТoˆà(Ý îd—‹æÂ9u>†ás÷Ýwó–·¼‰ øõ¨PAX˜` ¥ÒéAÀ¿ýÛÕÛ­u‰¤R°ÂF½:·,°.ù,npu”= º­<§©çÿ$Ê r¹p¢=>†²:N˜,=ö+ò0yÞ}X¹ëC'cRøáûqŸy ¿ó;7qöÙ³Æxæ™×ÇÉ“—vût6M@ÀyåI¾88HaM Ê#(ç¤-ýqR‰ àž lˆa„÷ëÿÀlL°ŒxòWþÖƒ·»ð¼¥ü PðàÖrõ}“¡ø¾ä†b^PŽ£¶íp& ÿW ¹\ì/ª°äÜpY>_)€U¯Ÿ<®ê¾R²EK ÌÄe´zlW*U<Œ Ä N‰Qý@ éþ*V2MÇØØ¼TWÿ×ÿÊ¿U—Dë#¤zd£Ålj À8ŽC*•ŠÂÑ ……BéééÈó­c£üOYù<ˆËçN~‹þé"øÀÝnN%âÐ!Þnâá/9;…F¸êT@‚<ñ’þpã±ðö-®ïµ¢’Ùú%»|¯|¤½°| ÅzˆðXûâ‹ó<çÞüæ»ùЇ†¸ðÂøák*©ê­9¢IFT MYÙ¬…0ÜM7ÝD.—㪫®bhhˆ¦¦¦€P¨«ç-×Nž|î9¶>¶.kl}Ã0ƒw­#4âªÑCœý±u» ËÆi'OÖŸY;D^ RU…¸Þ¦™‚ ŪpdIrÿ'ò'Ý>Œ–8¶q#W\qÅ¢ëI9s÷ u¤~̆ŸFññ,ŽãDL/ºè'5½Žš }²,8Dˆëy¾úU,༶ï*›Í’N§£pK õ•*­ÉÐÓR©„%¹žG¾TÂ{àëXå>Ùqç3\zé³<ñD·Obó$«ÿBœBÖ# ‹/ I:ßPa† ÅŸju4)"‰÷–ëÆ‰“6ß,À-n¼þ#À£¸Ì€{'aXJx§z°?€?HÁç¿ÇƒïZðŸ¸=€o™q(WNÃå°Sá÷ëyš³j•«{B½pÖLß–lK=Án)H[Ú±í£ºRêB8Žå|›\¨tž¢tˆÛðx ÿ~ßëyÝϼniw{ˆ$œ½ÖÜ1® àXEx…„ý®E'øŠÞY^ÿ*Ü„GW29,ÌïgD˜“a@ý¶[VU¿6ÞÂÕ¢™ç…Ime _¾ÏCr”'Û“L[T”è”|âÉd¶Òöjší_•®®èÓ‚f›~ú ¶ÞygÅo"§¦fz³ÆûþuÌVZdmõ‚ÁÁAvïÞÝívpèô)Î’7’¹º¥R‰Ó¾|…3 Ô˜_/·÷’†*}ÁÓøüôÈ‘Še•“ÍNœJDUR!~&÷PIä.õ5äáR²n‡<¤ ï•ÏçG¾ä¾™£È3'´ Ïäuµæã×Nd=3פ"\R«5‘0…u*¶¯@—€€QF£íCXùJÚ"û:õ¡S¹ë®»ÚwÒÛ€ü†ë^x×ïÛçŸ_×p²Y°}B±@*‰U|žÂ¢ÚÕÖT?<…yॅÏÉšÂp;ž†ZXfl›¥B(ÀÉï122ÂôôtT!5Bž²CTÉyçÎÌÍ͵Ҍ®sƒSHu]nܸ1ò\lj+‹§C>^*ôpÔ.àÙ¡Êg9‹šNK%Â0' {Æ­òµcÀ‡ 8—ðIyõ÷Å¡®›ûá)Î ÆÃ«Âéy^”°Ó|>¯EV>•ESúŒïð2LŽ~ÛÇÌ68ï¯l¶RL Ýâr炈N9ÂþÖ"ì£_eé°ßM›per'Uã“Vú9ã´ðÖÆŽÆ]Mío1«o×z¹8—s gš‘‡àwöìá=Ë·åŽqøða.ùìüîïáœë-oy„_ÿõ_dºÖïâR™·UB •¾"9Ïžy£ë6ôýµ ­Õ%N;šÙqݺˆëºÎ(°eË–pðºòÂÏ•>åçÏÍU L3ײY~ ž_zík£h©ô(_Éçc‡ÐB!N²^íÙ¼ß|oô­@<XJ¾qéhüò?˜/¢‰ÐVÛ¥Z«NÆÛM‘{©I ò‰,çE*½hjuÐ_9÷+<Ýó];æV±°xøøq6>öؼ'»™LhÏÑxM’wÞy'/³^F.—‹’ßû¾ßþ*”m")Õú}“b³ˆ¯rÈuq祟 ûäðÃSdø«šÂq#I«’å3 ]«n«‡ÇìgùÛMËl~sÚä¼oÇ/^ý‹¼ç·Þ}ˬ¸¦g÷n^qï½|ïâ‹»ýs4‡ÇFÒ`Z,2çVo†Û¼YÎKß $s(y‘¸–ôìJúÕSta ŒyçÈÞ`³çû{ºÝ´¦x ¸8qì뇆*ŒJjÀTØ­þ=ýôÓÜù༌—aÛv¹ƒÑ–Ôf‘u#bZR\60*nƵ<0».FN‡ì_]Š;òòÝ|ö›ˆ,•¼qþ/ø¸¸|ãØ7øÏOügÎ=÷\ ö¼11±þãe|Çý6¯š|¶{m_«†S¾ï³sçNì8DFBN-«2<(ù:CÉX0?q¨St2©œ¬gËëÈf ÄÞ+Åòw;œÖPrsW§:§q6qÝ8Ÿ¨ä—ˆåzé‡ZmO²®‚¤J¾O¶YÒ>I{«ëGHÞÓ NÑR¥N…[õ}ôèع³ÿDeyPVÝwù¾Ïðð0¶mãyù|^E¸Zø„×f9EH$š ——å Eœ€øzÎ{¬‰ïPþŽG,ŽI9fƒ%Ó}¥ŸUŸQc™D4Ôúûx˜ö4>‡zUmo8¹€MsñKÁ%œNÿ>"Â%QNYF6œ8QqS<ï¼¼8é’W‚öPE%¯«£3¤ê·ŒÑ-¬h.j—ÿ­Tz^ˆ3þõ_ëª^9Q¤a˜¾µZRzŒ‰³Ïâ¼±¦Yå{lúäàÓ°‡P*ç_ ;íãáWâÙfaEB[`A`‚Y™ CQ*x¸"×· '´5»gƒp ; …tøØ¿•øLLF‰lÝÄ$K¶îS³na¬û)FL  ÈŠ$+©æóù(dHú¨ß|÷o299Éu¯¿®'ª+6‹ô…¢… —½†€ðI‰a,Çïçd ¨ =ƒ¸kÒƒ&9†ôï¥orŒ™Ÿ’¢’ïÇM'’ÉÉðFGc±Kr—˽$)Ä%SJ^Ò$’_Ô0BQ+ïgRÛ"—‹£ÇDP“Ó+m‘|¥"æ¹nØVÜ’û‘þÅ÷ã"¯"ÄIÚZm”ÿky…Wüõ_„0~¸¿¨W¨A ­È¾~MÐ2r JØgŠÐU¼V%WX@8o¥\…ðšNl£Q6Ø“!ãÉS+×;‰õ¢Gúÿnò$Çoá²zqéts…fÊÞø žÿ<}›?Oé/Nyè!àe@xËçé$|½Ëé!‘äÐn¿<×ôñ#'‰$IîO·åò<[ ô¬çáqtíZ®¸÷^ØS;ŒËu]‚ óº¦Æù+=L"ŠiÀ‡’ï7å)T,ÂÖæ ..£Œbccb2Îx}÷Ý•_XNY"ás øÙÛnƒŸù t‚:cäñÐ+ãî³ïƲ¬ŽU‹µ±™¤?ªz³;Ü–ò8·â˲¢‰¼ïûQµÅ¿ÿû¿çÀÆ\ï{‰ZOV'''™š Õ¡…D8¸Ê©äu§Ò#_¼XÄ㥚:›/yíy±¸%"šÕK f"JU‹fR¥MúöFr„V;MÕËÑ-”Jáw’b Dî&=ܪ©.˜N/ž ½º@ i.\P0¹ž`µ£Ú~îç~ì\|c=„„çŒW%“\œý(’7ŒäRƒðÚƒðº“\jr„×§x—Y‰u“!åµH.Ov*ž-+Gð8Ó#tLˆÛ¿?333ÑûÁÁAvïÞ½øÇë—ò}Ó1ç[ý;7Pú„VløÄ‰œþùÀü±Ù7¿ùMòÿØœáærÍ= lÃP!n5ÑŠ?üâ›xç?ÿ3FTt¤büïÂÉÏœäŸ.ÿ'Þòž·D¡ŽJmÒ_}G·›Ðâçû>Žãðè£200°ô ·H+v\ ´åKÊïGGGcï¿ÅŽEòy„ù‡¤âÄEd/öj“ã¢Få} Ÿ5½g,VÇÆf”ÄuÝúúz øJYzø,°Æ€X±‡é†Ü„í»ðõs\øF¸^òÁ@ ð I$÷Îù±k¦ï‡¨ê‡ ££á…*í’˜mɪ ¶@8+b!¬:Qa6;ÿ‘˜ì¤ –ün­ýH¥Ð¤h—Œû®õ.Њ{xlÅ Êî½R¬™±ƒ#ç›#À)Ý£e­"¤!Zô¡å":«Q×#ÎÇÇÁ‰Rmˆå$“-¥aYéyØz™Ž q®ë244ÔÔwž7 8·¶¥zžÇ?þÇäã§}<¬LV+§¢,#­Øð)'Oòþññ¯úíy;¥a³˜=nçÉ$Ú+9:¦_iÅŽ^|ä¾ö0D\ý·‚žÎ<ÍY?:‹c÷cº}¨=ËÈȂϘzÓ4ÉçóLLLt54µ;~àjâ³ z$‹gŽ$r/†?M…¶¹>Øå?Å7~ƒßåw»}¨=‹T£\,—RŸVìø/`aÇî»ÿpÉ%QØHÍìÄ¢€Gèm‘¬€èBæ¿€6ù²N·Êœ@K¥:¢¼Ò0­öÇé²K¦„7•‹SJÕŠ—˜ ^[ÕI}þIJB£—ü©Tüþ_}ÊÞmiÂñù¾²¨$•„-êÛmÂÞФqÕ3ÆÕrƒï­Ø±GÀóÏ?öÁ2 kæ7%´%½‡*ËD«ýññãÇyìüóÁ4qÝðÄöíÛë{(7P)ÝÀˆÂN3d"Ñ-]þ§ô?âfggŒ\>qõtqYsÊ)5? ‚€ âøùr™zÏ'랊–O]7.eñø#• —É„_'GJ5­Ø0À©'xnÆycÀéÃÓx?ôH_Ù©T¹“6f2áõ!^;ÃÃñµåºá5•3åó•׌Œ×kUÕ,•Â}Éu92nGÄ6i‡m‡ÛI>d¯Wá^"4¤=ÉÐÉR)ì ’ÇW]‘ÏqâvA¼>Äé\äx娶n —ÉÜd®'¡G¢F"Zµãc<5ó æ/…?r”ÖF*”•KÏ÷¸êÚ«š˜Wssá|W*@*ÍѪ¼–pŽ÷Ÿý›_ýê…W–Ôr5ÄR)|›uÁ6 øYÚ—gª°¬ö¤KX-,ÅŽ%É6@¡\9gÁtA»lÊ 9æ«Çr¬ö«—÷LÖ±¬pr¹øk^r_LÎIÕfV ­Úñ]xœ÷ø ðTÃßaÎÎ4¨¡,K鯿‡óÏ¿ˆç ¦ ðXÔƒÓ¢D Ÿ÷õ2Á$ƒLŽŠ¥+8ÌÅ”ªŠx7¿mñÄb1 ™UižÙÙY¦¦¦xæ™gº²ÿfmà™gžaýÑ!Î<óL§ÒƒÈ ,,»wfp¶ I"¼¥R•®Õ‹E Ô²m™¤I‰þð¼ÊkI Ëؽֶ“bV&{å‰pnY±P&iFä¸$?ÓBÚP6W…ÊùÝ|¿òœ>\¹¤)ýÈȉĭð7ó"×]wgŸ}¶ý?j šµã£Gòè£òÄñ)Î<ú2LÓ$‘<±Çƒ ¥RiIÉi•þ`ff†ƒräÈ‘®ì¿Y;>rää?{¯Ú_¾ùf†¿ÿ}~rà Ñ:5‡¸føó'PpÂkZ¼€•¸Oíçó111Á£>ÊÑ£G;¾ïVÆÆlÛ¶ ãåA0^ë. ¥T¬ÜEh“§Ðµ¨å= œ '…{Ž„áÛ6¡Ç›Š$¡_ÇLJŸzçgÏáí—]ŸøÄâùJÄE8Ѝ˜»Â˜™™áСC}7>žššbÝŽ‹9¯œ[®¡Hw“†úÇf ï)½ÁÔÔTSããŽqCCCÜrË-QÜõ®]»¸êª«lã“ý-8µË ½;û™ðØcuEˆkel|àÀެ=‚1Û®/a¤BÄnì2ð­à6‹CØ×gˆ½ÜʹI{†Ê5Ò'"œŒ¥åA[uÊ×­¼çÉÃ{Ig!c|Ï‹Ç Õ•‘'ŽôM"ãÓ ·ë8ñ8_<ùk‘J…ã IÑ÷ò—ŸÂ¥—ößøøà‰6>û³\òÙÏΫ¨…M(úöx=¥5:Äún|<55Å©—laÃsÏaP(* 6„ã 7ëxbE"B\£tDˆ¨¨®68¸°qÎ9çð⎞»çȼŽ9ôls˜¾çœ8õ¿ÈK~xáøvé“}T„[ÁìÚµ‹]»võËøx÷îÝ\û쟰ûŸþ ×Η¯ÏD¿,”ûÐiâpýŸ{+ͳ{÷nwDˆ»ùæ›9pà@”qbbbQ0Ÿx"ºãˆW˜iB1] 8EGÇÞÒ7’@„5¡:ô®ÖúI±¯ÞƒÍdÒdÛŽŸÊI¸žäöÄcOJrà"‘ 2|½²‰~öWlÄ{°Ÿ'К _¿žÙ;È&¼6ÍÀÄ´uV§tžVíø9àœ'€°_ˆ„8`ãÛ7òæì›»}xÊ*¡;>¸é k1°}“žžßü£?ª¹ž—/çu ˜%(kõg¥ ´Ú‰Lß…B!žðåóqN–Vž‚y„ ð-BÁM&Žã„ÞHrÔIE'"•Œs¹¸ÎƒDŠˆ $""¼É1Y'"™÷*sÒ&ó±f³‹Wh¯~hnYU®®›7ò °º-Œ«sSWÿ|õÂÀ{ÁK·U;¸à?`͛޴ðJ.¡øÛ'^–J²;~qÍ.ºˆÏ•$µIº:B8Hèsqg¹IŠsÕáuT@¯^GÞ‹÷]:çÕ“}IAŒd’IîåÉf2Bbd$ü¿:TW]"Þ%gݦ†Pˆ;ç‚Ë*’Ûûs²FOʪ£U;>òâ9÷Ñ5@Uˆ¿ Øðà›äb.îöá)«„VìøÈÚ# V›ùîé§sm7ðÑѲÛÄút°¬´Vûc ‹ ˜ºå>ÿ¶·Å¶2h*É0 m½ìœä™`•óÃ&E5Ó‰÷X²8ÄãCqrZ¨IÕãÒÅÆ©B¯Gq¬&Zµc€‡×¯‡…ªT–ûâ.T¤9™3¸šjÎj‡ä{q¢œåò^Ò Õò¶OI¯AIù¡ãDõþ™_U{ŠƒE5–·ÇóÂm§Óñû¤“…eÅç¦:Lº:í’¤’©Þ—¬#ôÉöÞwßÏwþǦu;öñ9¹f 6lHü¦A¥·²Ke•èrþYEéXhê-·ÜÂÔÔ³³³ U¸Öâ§kæä† ËÂäï9²~¶cUù,UûæyàU'Ú¯^·ÖzIªŸBŽÅbÌO´a‡îûðæ.9Ý´bÃg<ý4ýÍWÜS¾ñxðȺG⊿ŠÒAZµãã§>Ä`°¨œT=ýôÓ|ñÁ/ò5ïkŒ5”¡V´bjë´jÇxÿ|Ê)\[ýá À¹0> ¦x)JiÕŽ þå¯ÿšë_ò¶LN6_ÌJ¼ ¼,”þŒ/@¾<¦K> …Øck¡4sÕÞb½ò Ui/­Úñƒ¸\±¹üB eHš¿¼ÑضC±')&C,²I¸²DvÇûDpÑK¶+1EÔJ:T j"Ò%Ó%ïÿâà ×K-/PˆE±ä{ñ0M^úÉý§Rõ2Ä©¢:Ÿ¹ˆÉh™·ÉºÕ‘Nµ\ù©+»{ʪ§;Øôü«€G¢ÜfÉAßW\Á~÷Ý>¤¾c9 ¢¬všµã-ÀÁ'žà±Ë.«Xî8𣘚ƒHé0ÍÚ±‡G–,Oý·ßä’ Kx7òpÚðÞ îgÀÿO õ`]éoÅE)““ê0Ó¡G9AÍ‘qØ•x-‰u“pŸÕËäSí$Tk’>¬zy½õ[Y§Y¤¨lõù©>oõö”ÿ̪ePûÜûÀ“?ßO"¡Y;Þü• ?ýôÂ+UÍ DHhûöJAKD¶ññð’ô:ò`?â\|âÉ&äó•¢U#aÈÕë4R¥ÙôDõ¶]OèndäzÕB^uû’T¯ÛÌqö ÍÚ1À©/œÁƒÏoã`I¼«”¶z¦²êé¨× s/¾Èù3çU¹øú˜† Ï©«(Móã—þ¨tËž¹u†ÁTïiP”F˜;q‚™óÎÃq*CS·lÙÂWt¨bÎ C|kˆUçØì}öYŠ WnÏ …¸Áÿ lB‹2(}ÁÌc1ûõ¯3°@'â—ÿnóàÄO_†ÒgaÍ%°õSp¡ %#4ûR´íùâ,kÄÉ­œ…#zm–¿ë%^‹xæ'Öñ«þ’ß¼ªe~b_6Q¶„hÝ€…#À¤]Ÿ'‹xX«}Ò–fÂâÃwø–NÝè¦À„ znɃúÑÑJOÍéª'R¨CHŽWR xÖ)J³ü8åÄ ØxžWÇΤZª¢TѳBÜñuë8cv¶"ÙÈÈÓÅi(+}Ãšãƒø~lÃ/¼ð‚vÆJßqòäIÎ=÷\üT G³Ͳæ§k8ƒ3ºÝľ"ŸÃ`$o‹Ò~ .Àó¼Šjf…¤‡ ´ƒ%ç‚ ˆ‹NBìA$"ƒ•X§š…¼rD`X õ&övby­¶U µ<}Zi›o¹ðò<Ï)‹Xt‘öU{Õó 2Ÿ'ÿ—ïÉ1Šôà /dçÏo'ñð˜¹õVoÙÂ+ßð|âÜßFù¼%§ŸæàqþtœaAêŸÀ:ŽE–CYÌÁÆnbÝålW/°¿Ï„¸£kÖ°µŽGœÿ{ûCð†+#f+°¡÷S¥ÓÜl8vŒû6¼£œQkdd„ñ¤±6PFYô¬7wÊ)<»vm…ˆ1==—RW”>àı^?uq·›¤(MsêÉ“<¼ux^HÂC·=Ä–ÿ°E…¸&‘'ú:iè,[±¹ÄŠ%ŸL¦\•(=…±$ÝvõÿA†ÔƒjýìÉp¾¤ˆŠ,K{é%Ú-¢VRÀ’m j|^x7-v>ªO¾—ôœ2`™kÔ×FÑèØ.¼^|±{^®ö[|ñÎ;Ù|õÕüá9Ë&Ž+ ¸¥¸˜‚ÿ0 À€KÐq´ÒUfÏžåŒ#GxÝë^7ï3ÉÕ–ÏÃØJQI•ËO ØüÌ3E=ŠÅ.TQú’žâ^8q‚]|1®[£,y‡;fŸ¹yË ²dqËÿ– »ü¯‘mÚØx _óä)QZt]ƒ)J”ÏÏ ŒŠ÷ml ‰áºQ³È…‰‰ÑP[-,üÄô§óõvòd"EÀìg¹ˆ‹ºÝ,EiŠc60uÎk±ÎI,ôàUïxhÊæqœÆ«*ËÃãë׳öرðÞ?…ù„Æó@ÖÖX 2ˆ‡}˜â¦¯ ‚ß÷±Ê*µçyÑëR©„mÛ†ã8X–…eY¸åìä¶mãû>¥R‰lyà˜Éd¢IQ¡P •Jaš&¹\Ž´e‘J¥øÐ_ýüÚ¯uûÐBÆD·=ô÷ÿùŸóÛGaìq"•Öó ç„ÿ‹C†y%0ÍÂIÆ¥ƒÌž=ˆcðÜùçGË‚2W‚qêüSEéUîÇã…£ë+ÂRÍê§Ö9´úºR“žâ€ò+|býÀ_<Àý÷ßÏ[ÿô­oG±NÝì9,,räH‘ÂÄÄÇŸ'BùåvCú€€©Hüªµ]/ëÌò¿j\ÜH(óðÈ‘«{<ÉïdÈD"›´K´äqdÈ%K@€‡G@PSD,QÂÀXô®ëFÅ r¹étÓ4+xžG©T"_®J122B±XÄ4M'œó¦Ói<Ï £ßúŒàäϱÐÙq à e¥BwåqõVú’žâž;ý»üä'¹y¾äû/aà•«–š¤žp”&Í(£äÉ“^ÆZÄ©]þöd%LÌE¿WïóZí²°(P`Œ1,¬Û.ŸY‹$&ñðppððc¬â³=ŸÝ7-õ wŽ GŽOÛáuc¯ãué×-y›ŠÒiÖy gIõßà×Ö}‰«­«»Ý´¾$• 'Ì}8Þì[ެ]ËæÃ‡õ`CþÛ`Ø4gÚ'$½Èr¹\$:9ŽƒmÛ‘G˜iš¤Ói …AÏç)•JxžG>ŸŸ'¤%'m¾ïA$ˆ‰X—Á,ËŠ&^@Åk,‘E=›{H¥RÑ„Ï4ÍŠõ’ÛªØäx è˜Ü'Û“|LÖYguû'k˜€€~öY^6=Mú•¯ E8œÿ…ÏAöW!¦qI—ÿ¥Çx~ýzÎüñƒ^‡AŒ}en…õǵ"z•J¥HÌJöÓ®ëâû>étš rår9R©–eá8¾ïW]µÖs]×u£õª…8Ø‚ ¨¸xžõ¹"Ò™¦Y³¿•¾5éfšfEŸ|’¬0jš&/yÉKºýó4ÅCÑoþ=ð)ºÙ§Ð'}J]zVˆ;eîeìØ1Ë?¾ßòî-=÷D/Mº!a«°°ÈiXäk”<ùšÞzõÚÐh[]܆×ïeÖ>¹Žà_Ë î „qNÚ!+}Æ)'°¬²—LÂÕ¨×*¶­Þpæ‘5wñÚnç瞺 0^F(Rôø¤O&DÕa—¾ïã8ù|ß÷Éd2ŒA…YÒsLÞÑÄ ê a–eUäºI aÕߪCr - ¼¬xxüÛI“~wlÆÁÿa¸ûäoƒ!^ž)ÂdqŠÒƒœvâ³'OBîVp®1IÙƒ$…³dß ó±¤Gp.—«)ˆ%·—Ü>TzîŠØa_š,ìÓU¹.ê­gÛvʼnzÛ‡4µÖKö÷ =øHÞ Z/‰a tÇé¦UŽ?ÎÀäwÊý­eYq9f9…ý?UÚÄšn7 ›ÄŠx8¾ÓsB4æ•Ö ØØ¤H-»¸Õ.!ršéyÞpýÆiÇOçøW~™ÂAxèƒñÄÁ'VNy.eÕ0»is'O’JÙ£ƒä~Ã÷Ã02¥³Ì}û(¿òã_aû«Ê ²tÕcÈ÷}d¿O×r&ûZ ò2ƒðw[(ü]D§t:õ¹–eÍĤßM b†aT]ÕBœöÕ½ÍñcÇ$&ònÄ!´ç4¡WÜò¥‘WV=+Ärâýèëа͠U¸>#O~Ù=â”úìx`€sgLìwÃE¿q[‡·êS¥ïxæôÓ9ûÇ?¦T"LØa®'¥uœr2u¥s<77ǹ'NðƒËN#uMg÷-NîdR-!>{Z@8IO4Ó4£IZ£^ÊÊåûG¾ÏÆûŸÀø L(ˆËÏŠøÖÞEÍ’±‚ ¨’âT.—‹„°¤8U-bŽŽF¯“BÜèèhô}Çq( ÑþGFF¢ï ×Á$VrÛÉcý$=Ÿ`¾QR€q'•JUê’gBq'݇•®þóØó»? á3`ÝLÛÄãj-i+Éþ*·¤ V-œU‡à«çïê%Xû}6þp-¾Ÿx¸&©|BQNÇ{Jz64`ýú0¸Y21 ¦>áSúŠc?þ1g¿áìà¬ûƺn7IQZâŒ; ç@ê;@ ¦óšÜl)F9\]é;~ô¼tÍ`ÝÐÞç!’WÇó¼(DTr©AeøO2·Yu(¢ÔâØ‹?æŠo_Iú¡\âºn$TˆW‘„Y'“¼399 „¢X6›Å²,‚ Æ—óVÉuA…’Ì9UF— ¥«²—Íf+¼šD0«öj’6V?y-W–¼®«óÖ+,²ÒC¼×Ḭ̈ýÈvJï„Ô—Xo8ß÷£Püd®K¨Ì…VíÁ&ÔòLS”Åx8~ËîéN—ûá‘álMI¤Ô¡g=â&'žÿÔóÝnŠ¢´Ä†õëÁb±X¶ (ýÂúÙ0mvê^¿Ld³pøp·[±º¸)—ãäš3Ã'ÔË€xNxžÇðð0PvfYV"jYVÅ$[QZå‰MO²Ëß„¹&)ü:J2TOraÉò¤YÒë,é VíÝ™¸ÆÆÆj†ð%¾W‹XKªª½šLMâÙV¾:8È}gýJKŒ Îår‘}9޳mÛŽ¼ ýT”¥ò‰¼òä°¬D'ŽC6a¾8E©CÏ q¯xàð…wï¸[Cú”¾ãeßûÓ}%¤`ëðVØ)}ÉÁmOpâ¥Ob_ ˜a~šåÌ«£(`ý=÷ð؃ï#ó®Ö·!9œ’!r–eE"B2ŒTQÚÁ)'ÎäÜOaÁ—vê µBAEhö<¯nîÂj¯³dEÝdxŸz­nnâì÷6÷ß÷+BI“ãê|>_a_*¶)âÚ_Ųs»ج¨TÊòÓ“BÜñÓ³íÝ¿æ$ÊÂίïÔ$÷JßñĦMüdÍY]áuP ô#GN;W}mˆ ˜¡w§Ú²ÒoœzòT~øÃ+±£¹ïår¹Š$Ý’Ç)YAQ:ŶG>ÁÝ—ŸMÂélÉ$ó›yžåQ“pR!NGi©T* ëKQ”Fùѱ3iÄl$ÌB›Lz' Ð(J·˜}nãcãµ#Gô™ƒR‡žâÖ]ÏÀÏÅ8Þõ`¨$kê¥ÏxüôÓyfn þâÀ¼½ŠÒ/¬™›ãÀ_½?ªò©žJ?ò·—ü.úÐâ뺮…8%Ø4‡›ÒmîþÙÿÌG†Yª3™ëº‘·[2÷šiš"sÒÃSû~e¹xùK^΃kÖPϤ’…l’â›zR*½ÆìÀþïRY˜¯W¨N¬,@O q‡N^ _…¿øÀ_h”Ò·lÙr˜Á}ƒjÃJß²58~ô ÌÑÊ|,ŠÒO<ûй\rvý!Orâ—Ìç©ÞÌJ/1÷â·?xÙŠ—%+ƒ&½ØªCF¥ÝœýèÌsëîºn䥙¬ «(½Æ¶Ï^ÏeÃ[)•J8®3?‚Oç•èÙª©Gæ¶`ÿl· j²Ò— \Î%[/ap­Án7GQZâ‚ïŸGNœC“+ýËÜioä5¯©/Ä ,ËÂ4Í(É·¢ô÷ÌÝÄ©§žº`n8ß÷#á"Y4éé¦Â†ÒM~üã—bW¹LB¤óù¼·Qú†½}”ÿµ˲8÷[ç.}ƒÊª¢g=â5þ](ÀéœOéSfgÏÆÚ ‡·hyD¥yõÛÅÅÏÐÁ±Ò·œxî"ì·V.K&þÖÉŸÒ”†Í«5::y.'+*J¯ðÀ`ªrYuþ7Eé{öR,+|¸±éS›ºÝ¥Ïè˜GÜìì,û÷ïçÈ‘#ìܹ“]»v-¸þ9kÖñ©'>Ň‚©†Ò4kÃÇ:Ưûcòh%=¥7hÖŽÿqàr.ß°Žoþô›\É•Ýn¾¢ÍÛñÑG±Þ¶~Þrø)ݤY;~è¥;ÈÖ0Ù‘‘ŠÅ"¦iFU|¥S4kÇÛ¶Œå\.G6›U/M¥ë´2Ïû‹ÓÎåå?y€/ß|ï0ßÑíCPúŒŽyÄår9fff"—ËEeÏëa<¿ò‡¿¢"œÒ34kÃÛ¶äÓgm ?®"œÒ;4kÇ[·Žòòƒ/áÁ×<Øí¦+JD³vüë|η}ûv |‚­ÞBJ7iÖŽ¨PƒäÐ"NQºA+v,H%jEé6­Øñë_-oyí:^ûW¯-­4IG<⦦¦8tè7Ýt+Î •;ß\õÊnŸEZ³á—þø¥ ãb¥‡hÅŽw¼x*ö`Ýxm·›¯(@kvœd||¼Û‡ (-Ùq"ÍžçaÛ6†a¨§tVûì{‰·€IDATcÉe¨Õ¨•^ U;6 ¸øw/†_4½¬Ò$ñˆ›˜˜`hh(z¿k×.&&&üŽc8Ý=3Àž={ºÝ&&&Ø·o_·›Á¾}ûýÍV2­Øð¡gñ食îz»Õ~bzášî&­ØñO›æñW~µ«íVûéÍvt‹VìøÏÿüÏqœp\Ñ-Ñ¢WìXÛÑ´bǹ\.ªî›Ïç»âIÔ+ý¶£7hÅŽggg£üœÝl·ŽcÔŽ›³ãƒâyÿñÉ;xzûÓ«^„[íö“Dl£:âwäȶmÛ½\¸‚ä½÷ÞË·Ž}‹Û¯½M›º—øðàÁƒ]7¬gžy†ÙÙY¦¦¦ºÚŽGy„;3Ï<³+ûöÙgyä‘GxöÙg»²ÿfmà{O}oÇý{îïJ›Aí§šn_ÓbÇ'NœèÊþ›µãð™CŸáôçóÕ=ÝãÔ~z«O>ù$O>ù$çž{.{÷îíøþ›µão}ë[<ôÐCAÀÝwßÝñö ½bÇÚŽ˜ƒrìØ1€†ò-'­ŒŸzê)î»ï¾Šïušn÷?ÚŽJúq|üÀ°iÓ¦®ž7WÒm;~òÉ'9|øpߌ<È}÷ÝÇ=þ=|qû9ºçhWÚÝ+tÛ~z…Gy„dzaƆÖïX±†føæ7¿Ùí&(Ê’é…'\вöîÝÛ¡EQ–“d.-EéWtl¬¬t|¬ô;ôGÔí&(+€Ž„¦ UÑ|à 7àû>_ýêWùƒ?øUš•¾AmXY ¨++µce% v¬¬ÔŽ••€Ú±Ò N™›››ëÔÎfff8tèCCCjØJ_¢6¬¬ÔŽ••€Ú±²P;VVjÇÊJ@íXé$âEQEQEQEQeµÒ‘qŠ¢(Š¢(Š¢(Š¢(вÚéHޏ^cvvÇq8pà333 EËÿæoþ†oûÛlÛ¶­â;õ>[޶ìÚµ«¡}µ£û÷ïÇuÝŠs±Ð¾Úu.”Æ©gÃòY/Ûq»Ú vÜ´bÇíüÝzÕŽ»qM+£v<íûµãùhÜèøx>jÇËÇj;7½¤™ô ˩ݬ:¸ÙÙY®¹æ ¬âº.Ùl€\.Y.—ÃuÝè{ }¶öíÛÇÍ7ß\±¬“íØ·oìܹ“ýû÷³oß¾E÷Õ®s¡4ÆB6 ½oÇíhƒÚqÿѪ·ówëU;îÆ5­4†ÚqíýkÜ_¨×Þ¿öÇý…Žkï_íxùXMç¦×4“^a9µ›µÝ>˜N355ÅÀÀ{÷î`çμá o`jjŠC‡qÓM7¡ñí߿۶ül)¸®•IN¶¯S혙™a||œ[o½ÚñññÛ188Ø–s¡4N=–ÏzÙŽÛa?jÇýI+vÜÎß­Wí¸×´Ò8jÇ•hÜŸ¨W¢ýq¢ããJÔŽ——ÕvnzI3é–[»YuqÛ¶mãúë¯Þ9r€‰‰‰ ÷å]»v111±èg­2;;Ë7ÞXÑ–N·C¶733mçºë®[p_í8JsÔ³aè};nGÔŽû“Vì¸]¿[/Ûq§¯i¥9ÔŽ+Ñþ¸?Q;®DûãþDÇÇ•¨//«íÜôŠfÒ+´C»Yuqƒƒƒ á“‚l6Ëu×]Ç‘#G*bve`ÁÏZ%—Ëqýõ×Ï+ÜÉvÌÌÌ033Ã?øA†††8pà{÷îe÷îÝu÷ÕŽs¡4G=†ÎÚ4oÇíhƒÚqÒŠ·ëwëe;îô5­4‡Úq%Ú÷'jÇ•hÜŸèø¸µãåeµ›^ÑLz…vh7«NˆƒPÑüüç?Ïþýû¹þúë±m»"‡I»¹ù曪Hò×Ís!.ËSSSìÙ³‡Ý»ww»YÊ"Ô²áN£v¬,µãùçCí¸ÿP;ž>ÔŽûµãùçCí¸ÿP;ž>ÔŽ•Vé¶fÒ+´ëš^u¡©{öìavv–/ùËQ=44ÄÔÔT´ŽÄE/öY+8p€›o¾˲°, ˲"÷ÅNµcpp°B¥Šâžëík¹Û ´F-†Þ·ãvØÚqÿÒ¬·ãwëu;îä5­´†ÚqŒöÇý‹ÚqŒöÇý‹ŽcÔŽ——Õxnº­™ô íÒnVGÜþýû˜ß+'jvv–\×gpµ>kIØ'X–…çy@èúÙ©vìÚµ‹›o¾9ÚÞÄÄDä2Yo_ËÝ¥yêÙ0ô¾·Ã~ÔŽû“V츿[¯Ûq'¯i¥yÔŽ+Ñþ¸?Q;®DûãþDÇÇ•¨//«íÜô‚fÒ+´K»YuBœ$¬5SðQ¨÷Ùr³Ð¾–»ƒƒƒŒŒŒpÍ5×°mÛ6:ć?üá÷ÕÉs¡Ôf!î¤ý,D'íGí¸?iÅŽ;ý»õ‚÷Ê5­ÔFíxþ¾´?î?ÔŽçïKûãþCÇÇó÷¥v¼|¬¶sÓëšI¯°”ë锹¹¹¹n@/133áC‡šçJ¹ÐgýÜŽVöÕÉs¡4O¯Ûq;Ú v¼òè•ß­ì¸W®i¥yÔŽµ?^ ôÊïÖ v¬ýqÿÒ+¿Úqÿ¢çfñó°ÏQ+çC…8EQEQEQEQE髲Xƒ¢(Š¢(Š¢(Š¢(Š¢tâEQEQEQEQ¥¨§(Š¢(Š¢(Š¢(Š¢(@…8EQEQEQEQEé*Ä)Š¢(Š¢(Š¢(Š¢(JP!NQEQEQEQEQ:€ qŠ¢(Š¢(Š¢(Š¢(ŠÒTˆSEQEQEQEQ” Bœ¢(Š¢(Š¢(Š¢(Š¢tâEQEQEQEQ¥¨§(Š¢(Š¢(Š¢(Š¢(@…8EQEQEQEQEé*Ä)Š¢(Š¢(Š¢(Š¢(JP!NQEQEQEQEQ:€ qŠ¢(Š¢(Š¢(Š¢(ŠÒTˆSEQEQEQEQ” Bœ¢(Š¢(Š¢(Š¢(Š¢tâEQEQEQEQ¥ô¬722Òí&¬ZÇ!—Ëá8Î’¶ãy¹\®Û‡ÓUÔŽû‡ Èår¶¯6¢vÜß4bÇbÿ+µãþ V_ jÇ‚ÚqwèäØXíXi­Ø±Žë£vÜßtÛŽ{Vˆs]·ÛMh˜SN9¥ÛMX6r¹®ëbÛvÔY·Jxž×íCê*jÇýÃðð0–eE¶¯6Ò/v¼Úm¸Øq.—£P(t»©mEí¸?¨ÕƒÚ± vÜy:=6V;îÔŽu|¼jÇýM·íøÔO|âŸèöIñ¿þ×ÿÂó¹\Žï|ç;˜¦‰mÛø¾Ïe—]V÷¸‚ à±Ç# …†aD¿ïûÜ~ûí¼ï}ï‹ÎÕ±cÇê~§ßiÕŽkýžjÇóÏíbÇ õmK¾_ë¸\×åöÛogllŒË.»,ÚîÏÿüϯ:–sÕovœ´aÓ4ü½£]vÜ̹hÖŽ[é‹¥­¥R Ïó¸ï¾ûè‘!Á²PÏŽ›S4ûÛ-—/¥/®w­,¯u^ÛeÇõúâ÷½ï}jÇK´cSÌ?¯½46^­v\ë3W,¿-‹W[µÿüÏÿ¼Ž›@í¸ÿƽ¬Uô„G\ŒŒŒàº.¾ï3::ZñùÈÈžçáû>ÃÃà þèè(ŽãDÛpÏóûÑÑÑhžçEn¤®·¢¢º®‹çyd2FGG£®÷óááa\׎Á󼦗/ÖÆFŽÓu],ËÂ÷ýHáÏf³ —l[~3ÙO5ŽãÉd0 £áïô­Úq-næwS;Žs!ÛZè¸LÓÄ÷ýŠó`šfÅþWƒ ËoÙvœüíä}¯Ùq³ç¢;n¥/–cÎår‹Å®ÚÝr³7;¦hå·[;nņåØûÕŽé‹Aítl¼ÒÆÆrܫ͎Aǰåzv¼Øqéø¸ò·T;î¿qEOks=@>ŸŸK¥RÑûb±8'M+‹s¶mGŸe³ÙèýØØØœeYÑgÓÓÓs†aÌφQñl6½—m7ºÞb$·Ì>|8zŸl{-ª/ŸÏϋŦ—/D£Ç™Ífç,Ëš3Ms.•J͆1W,<.Ù¶|6==]q>lÛž+‹s–eU|¿Þwú™V츞 7ó»©Çí_Ì:®l6;Ìs†aÌMOO¯:ž›ëo;N®Ó‹vÜì¹hÆŽ[é‹çææ¢ï6zŽû…zvÜʘ"ù›$¿×n;nņåXûÕŽe{Õ}qrµãÖìXÇ•ôÚØxnnõÙ±¼ÖqEûm¹ž7r\:>Ž µãø\ô˸¢—µŠµË/í5뺤R©è½mÛÑkß÷±,«â3QT=ÏÃ0Œ …2€Šï,D£ë5ŠmÛ‹º'©>>y:‘ËåšZ¾\ÇÓÓÓ‘=66¶àqY–}fš&–eU¨î⢛ü~½ï$û~£;^Ȇå<5‚Úqå:õì±Þq¹®K©TbzzÓ4q‡ÑÑQòùüª²a9jÇ!í°ãfÎE³vÜl_\(°,«ïm¶õì¸Õ1…œßFXN;nÖ†kc?Ùq½¾xrrP;^ªk_ÜÚqvbl¼ít\Ñèòå8ÎZvœN§<.Wž µã~W,önÚqO„¦6cIã•cÛvô7>>ÞíÃYõ’6»¼LÓ¬p7¶,kÞùnô’Û<|ø0À‚‰g[ÙO¯Ñª¯4–ãë–×ÚöbÈÍUì?NGíYM6 jÇÕÇ×Ov¼ÐwjÙ±ëº N9å”(‘ï)§œÒ7ɇ¢Q;^éc 9Æ~±ã…úbP;®ÇJ·ãÕ06V;®}žV’ËñuÖ[µcǨW_¿Œ+ûN7í¸'„8˲(•JÑûdIeÓ4+~¸ä ɶíH™4“Étûpš¢úø …¥R©éåË$îC«—›¥‰O¾µX:­b±H¡PÀ÷ýE¿Ó¯´bÇ+Á†k_7í¸Û’|Émˆí®&µã~¶ãfûâññqæææ¢?€¹¹¹mÇ+}LQëûÉŽê‹åsP;^évÜK6Ü©±ñj´cÐqE£Ë—J«v¬ããµãîÛ1tf|Üê~š¥'BSÓé4¥R‰ááa èt‰òž,,ˆQoß¾=J&™Ïç;Þ~q9måÇ©>v¹K¥F—/¦i’J¥nê|†ÁÈÈHÔY×úŽeY¤Óir¹étº¡ïô­Øq¯Ø0¬;nŶ²Ù,®ëV´§ú{«Á†¡¿íXl¸Õ}÷»7Ûµ÷é"õìx¥)j{?Ùq#}1¨¯t;î%îÔØx5Ú±|¦ãŠöÛr«v¬ããµãîÛ±K»ÇDzãSæä±KT… "eâ8ÞdE!Q)“±¼Ä÷}|ß_’JZOimvùrO#çS\éÇÆÆ¢øêÅž°´ò~¢;î¶ Ãʰã¥ÚV£¿ÃJ·a9Fè/;^NG?Ùñj°ÉV¨¶ãÕ2¦HG?Ùqu[ºyOì%V«wÛ†kO»ÆÆ«Wtß–›±ãZíÑñ±Úq¿+zyLÝqB­IJÞŠ!;ŽSS‰o¾ïW¸žV“ÏçýqÙF½chdy#Ûo†VŒÍ0Œ¦/²V¾Ó´bÇí´aXº7jc½bÇ­ÚV³¿ÃJµaèo;^Ê÷:Ž~°ã•l“­P}.V˘b¡ãXlùr)Z±Év÷%ýÆjµã^é‹¡sc㕌Ž+æÓi[nUPÐñqŒÚñ|Vêø¸ÝvÜSqõðõ©Ou»‹ây_üâÕ~ÊôŠýôB;Ž9Âàà ×^{m·OÇ¢|þóŸgff¦ëöÓ+v¬í˜ßŽw¿ûÝ=Ÿ—NÇÆÚŽÅÚ¡ããÆÑñq%½bÇý2>þ?ÿçÿ°ÿ~^õªWu»)=A¯ØO/ðøã³qãF~ï÷~oÑu{Rˆ{øá‡yÛÛÞÖífpÇw°sçή¶áàÁƒ 133ÓöÓ+v¬í˜ßéëzk;kG¿ ãã˜^é{ÅŽûe|üðÃ÷„÷ ½b?½Àøø8÷Ýw_Cëö¤·iÓ&víÚÕífð®w½«ëíØ¶m—^zi×Û!mìjÎ<óÌnŸ†¦ÚÚíßMí§’^¸¦!ôÐéN?ýt.¸à‚ž8gj?½×Ž~/6oÞ¬ý ¶£.àôÓOïö©Xk;BÇÇÍ¡ããJzÅŽûe|¼mÛ6vìØÑç¬èûéžzê)ž}öÙ†ÖíI!®Wؽ{w·›Ààà`×;g@/®>Eí§’^¸¦•æQûéÍv(ÍÑ+v¬íP–B¯ô?Úe)èø¸µãæ8묳ºÖÜK¨ýÄ4cZ¬AQEQEQEQEQ:€ qŠ¢(Š¢(Š¢(Š¢(ŠÒ44UQEQEQEQEQHÖS÷³ü¿_^f–ÿ< f6mjxû*Ä)Š¢(Š¢(Š¢(Š¢(')lµB@,œù5ÞËŸ]þßK¼—ý»UïEt«Õ.èÜò¾¬ò:“\ÀçœÓP›UˆSeU”ÿ7ê|V¢þSùs©ì€Íòré¤} E܉ËçÙ:ûUEQEQEé4Iª2²ˆç1nâ} (”_§ËŸ—ßÑ baÌ Âd.å—¿ï'Ö‘ù”ÌÁä{Ò¦Tùµl»xýøããŸwFâø¼ò¾Ò@©PÀÊfJŽò:þð‡ûä'1ŒÚ³·w½ë]ü·˜+¯¼2Z6ñÀxòɆλ qŠ¢ôò”Cž^&aÇoRÙÑ';xˆo"Ab¹üŸ"쌽Ä>’¸ .ׯÚQÕ>pû-¯{ΛÞÔíS¨(Š¢(Š¢(Ê ¤@,Š9„ó—4±XU*¿q+Sþ±\Ü–ÿówÂ;ßÙ¥3¬(Š¢(Š¢(ÊJ@,¼2„s›Rùs»üYŽx®T$œgÄ‘;¹òŸ LSéiFù{±pæ c”·S=ª7¿ºùæ›yâ‰'0M“ÑR Ã0°, #He³J%òÿ8Ï?ÿ||œAÀÿÓÂü£?âýŸü$>ú(¸.X_úæ7¹ùæ›ùÖ·¾E±XäsŸû¯zÕ«Èd2˜¦‰ã8¬_¿žk¯½–¿{ãY·n_{Ù˸ï#á¶ë¯Ç¾ä…M›È|ñ‹ýëÙ·oûöíÃu¾¼~=—=ÊN;£öglúïÿ?}ó›ùν÷ò?ï÷øok×rÁe—qÆgðÚ×¾¶¡ßM…¸&ð}Ÿ p'rQô}Ÿl6‹eYKܺ¢¬LDär™ŸÜRÂDíÄçòÄ&GüäF§.‹åHºF7B«ùšEn>?ùI‡ö¨(Š¢(Š¢(ÊJÁ'Ûäa†pŽ4B®9N(Ä%&«¶“œcÄ[­Ï©Ú–ëºX–E6¡‰†i$®ëòÀpê©§â8a|P˜¦‰ïûœ}öÙ†ïû¤ÓéÐ[Íó°‡Û/¸€R©Ä•W^‰ã8ø¾çyÜ}÷Ýœ}öÙ ~ðƒðµ¯}/¼ýíüäÙgä'Oò…;¸ë½ïå/ ¯¼’+ÿå_0,‹¹¹9Êàÿ÷ÿÆ8õTF¾øEù­ßâËâò?ÿs.÷<°,^üÀ8ð/ÿ—¾ô%Î;ï<žyæí×8ð<·Ž2ý•¯p‰çáy÷¦Ã™çÄÄhèwT!®¥R Ó4)•J¸®[!ºA@:Æu]R©†aÁ÷Ø><ÏS‘P©‰ïûÑkqÙmÉ O_$/šKeyêb%>“'0¡x&7l}iŽ5EQEQEQV âØ¦º±ËÿCšœ[ÉÌ?µLû.•J … MIJ,\×%›ÍòÝ}ûXsÉ%<òÓŸFâÚ–-[øÜÃiòþ'ŸdÝ9ç€aðÄOp®Ì;w€•ËåH¥RLNNòï>ð®[·Žß¸ürnÌdø­uë¸ð á†R)~é—~ Û¶±,‹ƒ§žÊÎ} Ó4y½çA:Mî½ïåâ‹/æoþæoøfõÁïùþ÷ñ<l›_ÿáãÏìPfüû¿ÿûhž|ÕUWÍ;–e1==½nUkQ!ŽXÁM*®"®‹E ÃÀuÝèOâ8NôY§Úêû>¥R Û¶#AÐuÈlÏó¢ã‘õS©Åbq)»m Ïó"1³Zì‘öÉgòyµ Y,;vNW¥R ß÷q]7²Ó4[ð{’/M’c& ˆ+²E( ‰ 7I}áL]EQEQEQ”ú8„^oEâkyB¡Í$žk-çÜJô™7ŽqÖ3Ïðäw°ãì³ ‡G7l`;}ü¶a°þ=ïtºÂ è™»îâÔ‡â‘W¿šB¡@±XäOË‚›eYxž‡“ÉP,ÉçCÿ>³Tâo/¾˜‹¶mcãÿø¼ù¶ÛØü®wÁË^åcýÅ_üEÎ;ï<ÞûÈ#q£S¡ôø/ÿò/ ›iš‹:¡,ÕI¥Tˆ …B$ Y–U!Z‰!ÕS:«Å¦VI­¸fÊ2×u) Ñçb<Žã`Y™L†l6K‹E‚ ¨hïÈȹ\.2ðVH }¢tKÓétÔ>q9M—/DÓ4£e‘ k¹­r,Õ•H\×ett˲_öß{5222Äb¨mÛ˜¦Éøø8ŽãËå‚ â·(‹m’oM¼Ù$ô³žþo-ðY5ÒÑËuADB¡eY†mÛ¤ÓÍœ./Ò&±s±a×u£kV® ‘óù|ÝJ;Š¢(Š¢(Š¢(IBñ-K8ss¶ίdF¿1o¹\ÇqôÛ¶ù•_ù^óš×°cÇŽp>^*A.Ç™©Ü}7ÖY¼ö3Ÿahp,‹\.‡Uí¢¹ÿ÷¾F^y%ÙruÒ|>ã8‘†P=·3 ƒ7ßuW82 .¯¡ÁüéŸþi·¢eaU q££¡C§‚eYM{aÉÄ{!J¥R$ŒÉ¤bÁM ¶X,’Ëå0M3Z¿P(N§™žžž'° ‹ lcccŒŒŒDžh‚x®‰"íyqjü¤§š`Ûvt¼†aÍf£ÞÇÏR©Ô‚ïëaÛvt‘f2™èü<÷ÜsÍÿЫÇq¢mxx8ò4´m;ŠÆÆÆ*ì@DÓ?xüqî1Œ¨zN@ÜÑ/ÕŸR<$ÅÖ$×"Pa×Ò^Ó4Éçó‘èåº.®ë6-ÄU‹‹@…Çh¡¹N¥R‘§©ˆ•r¾DL–ó8::‹œGÃ0˜œŒ3/¸®K©T¢T*uUÁ¦Ó»ì²xcY<íº¤ÒiÆÇÇ9é8¼2¶)^n2g³,«bTKS7;_Ûè„7Ú²#tKlQ]µBœˆ"ú´úƒ‹°P*•"aMBE-ËŠ ; …HHz‰ÇW.—£P(066¶ì9ÝÄ›H„¸ Æ4ÍH o#qÙDá^ND•ö¤R)FGG¹÷Þ{—}_ýŽã8‘G¢„,—ÊÕgä·¯2Á%®tccôG¼î¹ç–ô„E¼Ú¤-Ò¹ÖÄþ¥*ŽˆÞ‹ U†aD^— a·’Ë@¼VåH »ét:¬Ö3:Ša‘¨mÛvÔ¾¤ 4æ-7˜L&]™L&ïºë®å7EQEQEQúq„€X|%tŠhV¥‡‰BG ™ f2»ýv&¿û]p婹9N¹újH§1¾û]>}Ï=äËs4qžH§ÓÑ\)NGó¾b±éÉ9T=úFh<ÂØàj%TÂÇ|[ÿy+¯üÁ+aïâ›]UBœ(´"š‹Å†½´Â4Mr¹†aJ¥‰á¤a.ÄRÂF›!—ËE‰{ÝS§ú¼‹E~çw~§ÛÍê9ÇallŒR©D*•bxx˜t:M±XŒ:: 1•þBÂLóåå[J%Nù·ÃZÄ“¹ El!Z®+Y&"tÒSl)HxóB{R”œœœŒr?ŠgY©T* ›–ÒBý@«Â¸eYäóùÈå;•J‘J¥(•J:thYΉ¢(Š¢(Š¢(ý‹GXñt(æzkÕÎ÷}FFF"çY&óCq0øú¯þ*?zã¹äê«azš§Ë–i²Þ0H Eß—”F@äø‘J¥¢¦…:šU˪§Ÿ2¡®òG:ëëgqçw6´«eâfggùüç?Ààà »wï®ølÿþý9r„;w²k×®ŽœÇj2™ ©T*Ê‹µ"„^mÉлd´^Ê%‚I>Ÿ_¶cï$†a°~ýú¶î£ìX‚€\.Ä[’yøJ„nÍ"ÂI§^ÓŸkûvÜû7‚ ˆr&JŽB)âàº.™L&ÚmÛ‘§[6›err²ÂëMÚ²\ˆW\=!®T*E¢dr¿µçNU¶m{ž™N§¹ûî»ÛºßzvÜk6¬(õè§¾XQê¡v¬¬t\¡¬zÕŽâ*¨VM“ÉdÈçó¬[·Ž~ô£¼þøqÆ.»ŒGþçÿäßqGäðõŸù.)Ï“$š*—ËEŽDRȲZl“@è-­£.’}˪ZV tCÌ»#æÊÿU­+ß&®š‘LŠ>J$Ê=yá“ 5kÍrÛìì,×\s CCC‘Ç•Ë嘙™ahh(òÈê$¹\.$ u¹Õ[‰‹DŒèD41M³/E¸NÐëv aN³L&Ãðð0ÃÃÃ8ŽS!L¶ŸÍr qß’%ìÐóÌí+(•Jd2 7žžžÆ4M¶oßÎÖ­[e||<ú,•J‘Íf‹Ü“^fÉøÿå@<Ùjáy¹\. i^Í,dǽ`ʲýÐ+Êb¨++W(+^µc)Š7Fì0ÑêìIÒò¤l›3~ý×ùÇ^à3[¶@:ÍS'Oa„Q¡PˆòÐ'ÃNS©cccÑñ'CQ“ô„\@8Ùm KâW!ÌT-Ï×XW¦—ÅÄö’¾!iÀnx áCX¸©©)Ø»7 †Ý¹s'oxâÏ:ÄM7ÝÄŠs³EZÅó¼È{§'Œ¦‹H8žR›^¶cAò­e³Y2™Läýå ø&aŸp˜æ:qIÔ™Éd¢¼iv´âQVm;½Ú,|¥R©-ùû‘zvÜ+6¬(‹Ñ}±¢,†Ú±²RÐq…²è5;m§D˜ïš+Ž'¹ß%íV*•"p¾ðö·ÃÈv>–¦ †Á‰rNxÉéqáÈ$¢ì <ÂɯE¥ V*¿ON„ÂIò0¡WJŠ8ÜÔ&Ñ仩òº’Ó)]^W¦Áâ'ûd[²xÄm۶믿>zäÈ‘èõÄÄCCCÑû]»v11ÑD —@Ò0óù|Oy¨u T!®>½jÇ p£££xžÇøø8f*ÅU÷݇™JQ0ͨÄõaÑŠ¥›¦n»ÊFš)ÜÑnª…6 ÍœœT®L=;î¶ +J£ôr_¬(¢v¬¬t\¡¬zÉŽ]œp>¡SW³® ¾ï³}ûvljòÔçFFøÈÆlxÛÛ X„t,‹B9¼TÒE‘{†aÔ,B×sÎK9âQÁ#Í‚ëV{ʉè6Fì—ôr“uªÅ5‡PŒ«5 ^†S´,BÜàà`dÀ333d³Y®»î: 4òmÛ¶U¬»<ò{öìajjjIí·K Å$ÊÖŽ §`jjŠ={öpðàÁ¶íc¹íøàÁƒìÙ³gIm’Ëááa|ßÇ0 J„aèçw#„¢ÛñÓ”ÕB.—‹òô‹È.vüÈ#´mõì¸~øá‡ùÚ×¾¶d;VVû÷ïçÆoäþûïoËö—»/¾ÿþû¹ñÆÙ·o_·OÒcìÙ³‡¯}ík<üðÃ˾ív÷ïßßíÓ¦ôÝ·bǰ<ãcee±ÿþ¾·:®p u¡b¬F°ÙR©Ä¿éMär¹(êornŽ·oØ€ošäJ¥(·7ÌÏã&Ž }ƒG(Š%õB3•x/âšK¥@—,ÀO,Kž³¼-ÙžL°a~¡†:ìÛ·¯©ññ²kØ¿?×_ý’Ü9/¼ðÂÈ=´$a¼ïûav«žS·›`hhˆ›nº©í7îå´ãK/½tIv aPÇq"¯Ûî¾›æ|Kö%« ß÷)•JLOOw»)MÑM;ne°ðÒ—¾”7½éM‘¿¢ìÞ½›ÁÁA8ж},g_üò—¿\“ˆ+5¹é¦›Ø·o/}éKÛ²ý^++“~WÀòŒ•þÁ÷ý(wY6›­ùð|÷îÝìÞ½»oìx©ãc0 U"š=_û»¿£T*ñ«³³¼ïØ1v~å+¼úÕ¯ŽÖ‘qâxS¿[è»(¢¡è6JœLÏKœ<—ØsÍ¢2Ñ^2¬ThÔËͤq•Ø»w/;wîlx|¼,q>UœåË_þrÅ@chh¨Â³Mâ³ÛI&“aûöí¤R©šî–ŠR^²cë§§§±,‹ðË/'MS}Šdxx¸o½;;A-;î– +J+ôZ_¬(­ v¬¬t\¡ÔÃó< …BE)”X*•‰„)4×-zÅŽEJ묇äqû_7ÞÈ7~úS¾wø0¿qâw|êS\xá…†ã8Aè&Þn†a`YVMá³§£kÕåÏ·,¡Gù½šMì—Ì—#®nšÄ¦¹„|mbY<âöïßÏÀÀ@Eܵ Æ=;;ËÀÀ@T8¡Dy´ÔNi‚^³cIÂiš&ÂN»¿ü¿Ú‡eY=Q,¢©gÇݰaEi…^ë‹¥ÔŽ••‚Ž+zƒ pÛ¶£ô,¶mcšf[N‹°#B޼$ÿ¸ã8”J%LÓIJ,LÓÄó<Òé4žçUS³,‹\.G*•êxZ™nÚ±TFM‡¥6Šœß_»÷^>òÈçÙJ¥SFò·I¥Rär¹ÞwFòÊ'Æ®ZV T(åÂI°Eeèhõ4Pò¿%E7³¼½êSaÐziÚedY„¸™™&&&æ¹9zžÇàà ×^{-×\s »víÂu]n¹å–¶ŒïûAuŠÒ ½bÇ‚ëºX–E‰Ðƒv¼Û'¨Gè‹J>]d!;î´ +J+ôZ_¬(­ v¬¬t\ÑyÇÁ4ÍH|qK„¬l6‹išär9Jå|`ããË3Sp]7ò²*•J‘ V=þ–6ÌÍÍU´[RCU{^¥R)\×Å+Wðì$Ý´c)ð)B\3¡§üñ3ñÒ—rêÛßå4S¹\.>%¼ ËzФ˜&”ꜻ|²¤š©Wã¤%s¾%©ÞžxÒõ€èV“¹qèС¹;î¸cî™gžYtÝ~ðƒ-íöí9Ã0æÆÆÆ:uXJ‡iÕ6–‹NرN§çnš˜˜3çææ¦»zÔ½C*•š›³m»ÛMYÝ´ãfløŽ;ñÆ»ÖV¥wé¶m4cÇ7ÞxãÜwÜѵ¶*½M7í£“c eeÓ/ãŠn·µ×8|øðÜÜÜÜÜôôôÜôôô\:ž›œœœ3Msζí9˲榧§çŠÅâ‚ó[Ã0–ÔÛ¶çLÓœŸ3MsÎ0Œ9˲æŠÅâ07=ÎD&''çµ¹Y²ÙlÝcé;nv ”Ÿ››cnnn|nnÎl¢M“““s©Tªîç–eÍåóù®³†±køXâ„Øs•“ÝÃåeÙòòbÕwÓsáIíAš±e+Ö°ƒƒƒ WÓiß÷Éf³½÷¬ô5°cŸtxž‡W,RDs òô­_ª¤ö"²aEi'jÇÊJ@íXY ¬D;ö}ÏóÚ:§t×u£ˆ.‰æ¦X,Fm³…¨åå8N”#L<ÜljøKN7Ã0¢Ê›###äóùyÑeò:¹ŸV£Ï,Ëjû¹m…vÚq@˜Þ,Cca©AÉd‚ â< … ï·±±±È+±ãÔ -]ˆ,•…ä°Æ=Þ åuLB¶q`{yYµù÷xÔm£tLˆk'¥R)êD4»Òψ«±çy¼òöÛ9æò¬4ô\QEQEi£££A@KÎI,ÛPdËçó†A¡PÀ¶mÒé4ÃÃÃ>|ÇqZÚ§„®ú¾O&“‰D<ß÷I§Ó”J%&''q]— ¢vHNjÃ08|øð¼í.wNf9vÛ¶WMNÁd•ÔFÎæ-ý(¦ibÛv$~Š9Žƒïûäóù(—xW0•ÅÉEÖ“ôuÕ.ùÜäo{yYr{)âœq+!ĉºßsñЊÒ$’ƒážk®Á\¿~¥ö;Š¢(Š¢(ŠÒSŒŽ†%-ËŠ<Ä–*DŽŽFùÕŠÅbTÕR¼™¤àÂ\9ÏZ+N%–eE^TŽãÍf)‹Q¾·B¡yÇU{¢uzþ,Ž3«©¸G@èè5ÎÂùáÜL†mO<Á®Ûnã¿ÿýçÇ0 òù|d?%Ç|1Ì%ôV«f”Pt“¦{T†v —¿çRéÙ–g~ÅÔ4¡@·B'Ä+Bˆ—L­¢¨ô;¾ï³íºëØðº×­¯[EQEQEéY\×¥P(Dža¶mG!­P(H§Ó‘799I©T"•JaÛvŠºœ"Øèè(ét:ô žË>{Û¶£ÐØÕÄB¿öûßùNò®Ë?¼ímœÿçνŸÿ|ä­˜¬J›ÏwA•* kvÕ²éªõråƒ,T­›¼ŒlB×Àê“‘¬ˆ*˜À+–¾â’î¾ê§ô3òdÈÿøÇùËn7¦‡‘›’¢(Š¢(Š¢,•R©åãªöÒ ‚ !ANBƒ ÀuÝèýØØ†aD¢ØR¾zˆW]=§”vìSi ŸÅ‹v–J%ÞùÃrî‡>ÄÊBÛúó΋~³dAÑ>::ÊV9M^²{§üÙxù`%\†ÐãÍ­:Y`+µ½éVkºÝ€¥âyétZ;¥¯ñRDb5PíVÍögüîø8ëÿú¯¡lornÄS3)²¶½@CPÕ`¿ü—Fˆ=ß’æn—4©9‰ -TŸ€þÒŒÛF_{Äår9\×Ų¬U“ìQYyHyò/:ĺuë}j²šÑë\QEQEi–\.G  Ã`rr²¡|[IÏ­B¡ÀÖ­[Éår‘˜$ÞoJ㬦3ó=â|ß*óúÓ?å?úQHxN¦ÓilÛŽŠøµ‡JÏ6=Ø2UË<â NZý½Ty'±Ì*¯g²xlî*¥¯=â<ÏÃu]ÆÇÇ»ÝEi™íÛ·cÛ6û5\^QEQEQ–Ïó( @èe499ÙPt…„¦Šç[©TŠæ*¾)õð5)‰Ð¬.À'žnÛ·oçƒo~37”m3‰iší-ÌP"È’š¨„“&½â âœI¢úÌÏ£d‡® c„^q:Á­I_ qš´½ð…ñFêãX¬á\’m¾ïŸà hn¸Å0M“l6«ƒEQEQeQÇ!“ÉN§)•JX–ÕpŠÓ4Éd2”J% Ã`||\Ó£,‹åÞëgžó–J¥¨ˆG-{a||JÀ™ï|g·OÛ²"OÙî}Ýëh*×þG¯yEQEQ¥Q …“““Q(i3E ÃÀ¶íH4Q–•üP]æÐµÃRaáð\ñºt]Û¶Û'ÄÉD_6Ÿ.7¸@,ÄÉ]Öq­Q_(ÍW—E…¸ÙÙYºÝÎyxž‡a+ú"^Œ¤ð%"Z½(rùl¡³U+„[¼ÓdyŠ8ä;ù'"Òrytyå6§Ëû.“Àõ_ú¼éMí=±B*6Aøûi€µ¢(Š¢(Š¢, Èd2¤R©h¾Øì¼Ñ²,MÔ&,ËŠ<¿VáüÕa¾ÓY©Tª[I·:Ú¯­•pÍrM*Å2‹J¶aˆi½ÏC…¸ºDBÜÄÄû÷ïçºë®chhˆ©©)öìÙÃìì,ƒƒƒ †††ºÝÞ éëgÄÛKžË¸Tz—Õ{^“ô&“?ñ ƒ8D[D¹vH•&í­:\Ýæ•x {žG.—ã£ßýîª ÇUEQEQ”v‘Ëå"ÁC#)z“|>ÏÈÈH·›Ñ6ÒÄEC“8ŽÃ}¯=¸.$ŠÐI‡b±H©Tj¯ÝŠ˜-7RØL{ݵ>ÎÐÐPEn¸mÛ¶1;;ÛíöÉdp]—b±¸ô^,b£š&ÎW˜‚å3hÜ.U„Sêñ­sÏUûPEQEQ”%Žã¨'\ŸQm%@Í´C{gf°,¨Û$ŸáØØ¾ïãû>¶½\™×‰óV ÕÕÅ©ZÌ£Ý<“+›5ƒƒƒLMME §¦¦ØµkWÅŠ‡êv[#‚ h¹HƒOX•s„ÐÞ¶–_{„¶6 &ôÒLWñ•?Y¶²º ¥xžÇ½çŸ¿l.EQEQeµáº.ÃÃÃd³Y²Ù¬V8í,ËZ±á©ÕèyÿþȨáHdFäÁÙLU߆±¨Š‘¥2Ÿ–M(ª¾'S•¶°`hhˆ›o¾™mÛ¶agvË-·D+íß¿Ÿ#GŽÌçj±ÿ~víÚåš“e333ÑûÁÁAvïn=+VLOO7õ‡PxKVâÌ oj_J5°c€»Ï>›¿ïöÁ*+–j;n‡ +J;éT_¬(íDíXY)ôê¸"—Ë166Ö²£†Ò=Ú"<-B»í¸Ö=ûå/óÐŽœ[ç;­Vö]É»%9ß$D5¹ñ˜«Þµ š?©}¬ؽ{7###ÑS©œ pÍ5×°oß>®¿þúE7655ž}ûæyϹ®»èw›¡÷Õ€PpsÞ' …¸,0ŽŠpÊ|:eÇíØ¡ÞpJÛ¨eÇËmÊÒN:Õ+J;Q;VV ½:®( ˜¦©"\’J¥ÈårÝg'츖ßÜø8§îÜ9oy©TÂqœöpŠ8q}©ü'ù·jNJÍ:Ë•ea­¼Ø»woT¬!ÉÞ½{ç勫ž}û˜ššª™Gnbb‚›nºiYìºnÃîÆ.aXs–ÊB! UE•ÚtÊŽžø…_оMi õìx¹mXQÚE'ûbEijÇÊJ¡—ÇŽã099ÙÕ6(­aY–eáy^G„ÔNÙq­#yúé§9ë­oB/ÀB¡@>ŸÇ²¬ö†R'½ŽºLÆÛ×¥6k[¡‘pT ñª/¢ÙÙY#—ÏåpYnÄ#.C¨F£‚›Ò8²cß÷yáÊ+ÕÛWi µì¸}±¢´‹NŽ)¥]¨++…^WH¢Í ׿˜¦Ù±ðÔnÚñ|þy¦¯º sÂIuÔe-Táz¥ª–ÙT r#¨ðÖ¬‘ûöí‹Ték®¹¦¢xÃR˜ššbff&2ìýû÷³oß¾¿óä“O²oß¾ŠXma±„Ž0Lhƒ*­fffØ·o<òHWöߊ?òÈ#u×ù§Ÿü„‹~ò“®‹Ò=ÄŽŸ|òÉŽï»þÉO~‚çy‹®§¬.&&&øÒ—¾Ä£>Úñ}·bÇ>ú(_úÒ—Ø¿·N™Ò£ìÛ·ÏóøI‡ïÇKOLLtó”)=F7ÇÇ­Ø1,<>n†B¡Ð1O*¥}˜¦É¾ð…¾×Wxu¾“Ü‚ ˆÄÇB¡À²a†ž&)QY9"GíØYeÉìß¿¿©ññZÕû÷ïçúë¯gpp}ûöqóÍ7/‹a qË-·D9çvíÚÅUW]U3 VØ´i;wîdóæÍó>[¬œïaس­^YlÞ¼™;wrÇwteÿ­ØñÀÀ;käø»§žâgÎ:«+Ç¢t±ãn„P´bçŸ~:çŸ~];VV'Û¶mãÒK/­ù°¬Ý´bÇ›7ofpp0úŽ¢;wîäÑGåôÓOïè~—26–ÂjŠÝ·bǰðø¸Q|ßæ©âY¤ô'¦irË-·ðÆ7¾‘M›6u|ÿK×WÔªyÉd*cÏóa||¼uoÀaþ­$ó•@ÉWÝHeÙâÈ‘# ×8pÛ¶#7Ì®¹æšeiÐÀÀ@E~¹dŨzlÚ´©nHl=c ^áV"ìÚµ‹3Ï<³kûoÖŽÏ<ó̺vüȺu| †ÐÜv|ª¯!Ó„N¸ô».T‹èAž–޶Ã0Âe†®ßÎ'›V=mßa~Éï€ðI’E˜ˆÒg~§ãQÓWìøóŸÿ|ûŽ©­Øðé§ŸÎ\ÐpŠeu088È¥—^Z3ïU»iÅŽ7oÞÌ¥—^ªBœ2]»vqàÀŽ qË=6VV/Ý·bǰðø¸QJ¥Ùl–T*¥a©}ŽeY=z”©©©®qK72®¨%Û¶ÍÜÜ\ôº%Dø°ª–¥½à$<µzó)ê»í)KbhhˆÙÙÙ†ÇÇQhjÒ—s°zóÍ7³gÏžèýÄÄDÃu-êyĉïž>QÚÁrÚq<òŠWpM+×YÀöí0<ŠU޹ŒŒ„ÿ{5zVù|dFG¡Pÿ2™ðÿåÊËàûáöd™L¼ßíÛ¡Tš¿¯ —oß¾öýðäÿPi¾û´C|ÁŰêÏGˆãÔ‡k´u¤ü]§ü·Êýx„‰& „_FËëÈ>Åí;SÞÿHy?Rø©T^&ë8åmzá~ÞyÇ;—çœ7Ár÷ÅŠÒ ÔŽ••€Ú±²è¦;ŽC:Öüp+€d¾´n°Üv\=«zå¾}üÞ{ÞSs]ß÷M»U—1ÂùEq pˆç5Õ¡ª*˜ô‹kX*»wïÆu]®¹æ8tèPË!¯žçáÕÜòŸÖËQÚÅrÚñŸÜvÛwì€ /\xÅ …-ÀlÙ÷Ø0`z:¼,+\–Ï×övƒPÔJ§CÏ·F)•bQÏ÷Ãïz^¼Ë‚ñª,Ÿò™mC*¾^g•uöë#&‹á_´ʉEíðæ‘#¼ÙŒ³#ð†ñHÐÂ'\§˜Ø¦x§ ''Í”×ÙNxó#.©œ.Ï,ï7&¼Q¥ÊÛpˆoZIº<ñ“'ƒ¸3Ê?mÊ—÷ïÞKpðíyojÉ~Ze9mXQº…Ú±²P;VVݲã­[·bÛ¶ p+Ó4ñ<;vt|ßËmÇ•‘ ¯~òI.¼òÊŠuÇÁ²,FGGÉçó­lH;’ ß ž?90¯: ¦Uì "!njjj^bÂê÷‹Åüó„²n¹å–¨TðÐÐP…÷]3är¹y 9}Â9®þP–“vÚñ?<÷#[·Ö_Á÷CO7× ­t`ël6ç„z!œ¸<Aø'7ÛŽ8ù~6»p«m‡ÉœÛ /Ny*c `c„‚Uš8i¨|Ç$¼‹ÉçróÈ0Áv![ÞO‘Ê»]¦üýty_FÕçãÄwÈd§a•ÏÁpÂÏ€tâ|¦Ë¿‹ëƒmÅçAÚVËý[¾n”7ÁÁ=ÿM–¤/§ +J§hg_¬(BíXY)t{\áy¶m366¶ô)=òVm€vÛ±Lî¾í6Nݺ•j× ’óù<©TªÙÍÇN ©òÿ2U”ÛÔO»£ô k!Œ…žššª¨”ºk×®e«œ ËîŤç ñœ¿³—¯²ZY;~ä§?åªZ.ÏA†sú~(¾‹íÍ–ÄuC7q Ozá-DŽJ÷fŸP@“¯Š7XÀüD¡B7ÖdbÑí„7•±òç©Ä÷,àÍFx~ìªýºnÙKÏ×÷ýðB1QnðfùOÂxElLbšpª5ÿÜû~¸ŸL&Îe—üÌ4al¬3ùö–€æÉRVjÇÊJ@íXY tÊŽ]×Å󼎋6Jgð}¿+qB;ìxæÖ[9ãe/›·\l¸%ÂùV@(ÀÊÿK¤PR€+5¿i¥s¬…Ð%S 5ô2ATV)ÿ[Þ¢¢t– Xóš×ÌŽƒ Ì¥–J…bN§qœpßIÄ‹­º±9â <â¹æ”דÂIWU·¼Þ8áMC´IbaLv=NìÅV+eÍσ72RùÞóBQLÂvm{¾ØÖJ. ñø«÷Ý ¨á'lC§ÄTEQEQ”Êèè(†a¨7Ü Å²,¦§§»ÝŒ%“Ìø¶ößàœ_üÅyëHŽÃ–±ˆçP2-gâ$ßµV±ìY*rÄÍÎβÿ~¦¦¦8räCCC µ^Íc™©~R팣(½Žçy<ûïþÝ|!.— ½à–Ò)/×ï'’!œ^yyPþ“ÐPñ|sÊËD”ƒJ¥<*Zëæb9¥ºj’ñ L¯ö„ ‚ØSOQEQEi ß÷±,‹ñ^ó)ËN6›åcûX·›±db-ìÄ0ôÙÏÎ[§å B­ùTõ&-4d°Ç‰ª¦NMMñŽw¼ƒýû÷³mÛ¶¨üê 7ÜÀž={.ÃÚ.‚ª$ô¡ýõ†D¨(ç?úèüŠÅ΋pR1T¨’ňs«å‰½ÜÌò2“P°§²Árcš•a§½ŒeÅUaEQEQ”–!Né=*5 ‰Xs™¯ ååAb]©ËæY/Ùµ«Û‡³l¸®Ë¥§Vóü’«ÄŠ÷›a~X*Ä©‚”žd-„žp{öìáÚk¯åºë®«XaïÞ½ìÙ³‡n¸¡«ªs$ÓP)J¿ð÷Ï<Ã…/¼Ðíf„¸„w¿4p¡ß(¤T )¿ÊÿK!¯Æ6;1F’ ®½ž#IJBOÇRi^ØïéÇw»uŠ¢(Š¢(}AuÁ“ÕŽŸø?96‰S…¹UŸËP^>¯Nß,Á.ò:•X×`¾à&Û˜ïx%^a^Õ2¿¼]QäY¾´{öì³»}j—ŒómŸû¯Ú¶­=;GŠdj­9˜z-õ4köïßÏÐÐÐ<ÂJ"…B«®ºŠ™™k%™ïR¦Z:}.¢ôÇ6n䂻݌˜+¿~ít(ÀÙ„B[‰¸P‚¤kÇV6ñ]³äó½/ÂAìaX£èÛۭSEQEé ‚ è™TI !Á"D"#@‰0¨LãEù½Y^Gœšj¥i–¡¹›øÌ*/¯KT¢ì' ¿ŒÄrØDŒ±M^ËvEÀKO»Øsð`›÷Ð~,B{½ýöÛ¹á÷~¯æ:###K³NÑ80ÊüÂx½ɬjÖB–ºP¥ÁÁAvíÚÅ¡C‡º&Ä%CS“Uz¥ŸøþÚµüþK^¾ñýùÕ7—Š<Î2 …µóïš’\1ùXkŒ°b)„v-wS‹ÞP¿û)4¡V¸±iò¥×¾–7u»mŠ¢(Š¢(=Žïû”J%²ÙÎÇBIf–$nâ3yÈD°òˆ=ÏDü’ÙlŽX“Úg’b¹PÞžx›ÉwŠÄaIá­Ñ ÇfësöÑh»çð<×¾ç=uÓ5%Â%çvõ©¤þh}ÅÚFWìvŽ8 M•”U“]m¢´Æá¹9¶mÚWI\fK–B yâÇnÕ²$n}˜8·›”ÀV–‡nßPV‹U.* ¹EQ”¾¤P(Íf£È¨¶í‡pè,‘W’*¹ÖHN"þäÖë>Ó®ÖJÄCn1%ÉB·kupêm¢áåÌe-…ò+ܬҪtµ»víÂuݺ+MMM-ê5× LÓŒ¼áÚÛ+Êòãy/»,¼‰f2a•ÒåTˆÏ9ÄþêOÔåQZÒ»-K8BGl}õ©(+Ÿp€UOH “ð.4sY˜q|I-FÊû_èIju2EQEi3¾ïãº.ÅbqÉÛaÍM¼NÞÖ¤þ˜ˆ]¾ÙȰ¸žx&·U½}®$u–ïûuC©=ÏÃó<Ò>¬c­ÞQ5úì¿ïX`ÛvT!µÚómbb‚n¸ë®»Žn·7ŠW”~#ކÞpž·to©\âµ$íL{ňGòh/é7/ñ7Q¯EYnª³W“üÌ%¾F½Äÿ¥ÛHÌr¬•ï娬’,ŸÉ¬d¡â¾qSn×Ú#kYt}GO¥¢(вºp§åT gŸ7–ß[„CàI€ù+–—ÛÄbœ>›VZ!™R«zùB"]í/1_d Ð0ÔÀZˆ 2Üpà ¼á o`W¹tð¡C‡˜™™áºë®cïÞ½Ýn+?]·ÐNQéO\×åìK.¯}m^Í–‰ºO\ý”òÿ£å×6á$;O8ñÐ'&в,öTHµ»„"Ötù;áõ8L8êOŽøÄ÷FËßñûɼRåÏíÄ_8žF„9ÙÇty›"¦eˆØ”xÉY('ÙVì¤ ¼ìð˘}IwSV(Š¢(+×u›Nj/·B 5µokŠÒnd¨ôš¯¿ú«•Ÿ¹\Ž H5;”D‚"¾IA=¥¯Y#/¹é¦›¸å–[Šª¨Þzë­=!ÂyžÇ/¼Pcã;Erâµ”mT#^I/‘ZÞÉ ¨+„/¹„W¬[®Ûº—ô˜)NÆMbï¶âòFE ¶CeÙ#U²•ÕDŽù™–›¡Ä|O2ðÚ“‘~!±/¹>å1¼$ˆÙN\JMf #Äâ˜x°Jé4é"$ƒ,qÝ"ìD¬“g[Ë˲‰eñLDÄø"±(û2¯EDôˆCcÇÂ}:{*\þ@›ïsÛ¶[,Ù[dÜ êT±B˜W¬AD¸j,ËÂó–2›Y:iªfˆÛ«a†Á©wŸÊ°ý/·óýïŸ3þì N»æ4^xÿ Ln™äÝÓïæÑ×=Šé›œ~Ëé0 Çn<À†Oo€S=* v…u'v"îHî ¡7†G8'ôÊ ™ÐJG"ùÉ$GRrš#¼ƒÊä°Ï;c7ræÑ£aµÔV+&‹/È„YÎ}2TU2Êÿ=„ç¯ÔÀ>e%á±´ìÆ’$7™W1(os”ØCMÖKz˜Êï‹ÄâÛáuœ.¯'Þ«òt³ºŸ“8ˆûaßE`¡m®ü¹OìPKóOKªj_Âxù8sUç¯Üï|éAŽ8¾|¿“¢(Š¢$(•J ‡ð?3R§ ¥›˜Ààñãóò€†Ñ¼'ÄÉñLj“º¨¡¯®šÚ ¬´¨:̬¢Ì7ù›¼ú¯_M&“!NS*•H¥RA€išlûÛøÖ‡¾ž—rEÙ5__ï>ƒ»gïæã7œoü_ß`ð»ƒø¿ãsú}§sé{/å4ó4~òáŸpÉ__ÂôG¦™Y?ÉNðøÀãlý·­|ýߟìþ ¾ïó±Û>ÆúÓ×óâÞùÁ³?àµoy-OüËlýâV^¼ÿE6Úc¯‰4q§ ‡”‡| éñDÒ/o;q©¢d¾² q¨•—º´Û?[ËüðŒ3øÕ»î‚fòÔBúuIÀž#®i.§xÞTOÄ›óîW”•ÁB¹ŽÒUë‹€&á™#åmHù4q%^jcÄ^j6afS)|Q~-â™\“õ6Ô³™UÛZèó¥ô¢SEQ”â8΂ÞCòÌ?ùÜY6”nsôî»™Þ²…Áªå",7]ý7Y„OÈ?œUú–¾â­\Ókˆàfšaë à —Ë‘N§Ùô©M8õýêC˜®É¯z€_þ™|0ºÙ¶Í¿8À«N¼ŠÏÜþ~ÿ[¿Ïy/=׿þõŒ<=–ôfo›e`ç;Ílr6ñÖ{ÞN =¸Ü¾<ôÖøPx"p¤œT8Q¼‰pâ9ï¼â¬±Ö@Ž~ú(w¼ú®|Õ•ÿ×ã`ÃÉןäÅ-/ò‡Wþ!7x7pð¯2{ù,;s;ùêo•Ë“¿yÇvØûù½åqκë,6ïÞÌš^NN%בx”ŒzÎå‰Ã¾$?„¼ôÇ/íöÏÙ2ÇŽãuGŽ,=?œCø›‰@ ¹¡²Ä!r⩨(+dÎŒZˆwhõwJÄýM5âQáµ”&öX“Â'Õ×XRèKÆÁ´S(SEQ”ˆ8ÈÜ)I@œ5B¹Õ9Hé<à—¾òž;ÿüùŸy–e5/ÄIŠˆ£+´r加o„8‰äée‚ ˜wãåcüî—k>} ýŽ¿æŠw_ÁÇ>÷1NdO`|Èà‘u°ãÕ;8å?œi8åŸOàâï] ß#­&açŸìà÷ó¿Âë§^ƒ°en ”`à°ÁÄ ½,$·Mœ«âP#Iž¥Âccͯ‰ê/Ÿþøé\ùgW‚Wr%çßz>¸pCþpáÔ7ŸÊÙo>Ά{λ‡×?ýzìòù|4yõñùOçü'n2n¢d”0Æ ¬ÀÂ(qÞ²qâ¼Gòc''´iøÎ¿Ã;yg·ê–xîüó1®¾ºñ/ä¨ ƒXOCùÝÌÄŸ,W”Õ€\'I›¯µ¤ç­¬k{¶Qþlœ¸8A‰¸ª°xúB,Ì%¯³zc©^¿Y)Š¢(Jã8NͰԀpê’"tØÖá®Òk¼üž{p_ñŠyËóù| [£² „Ná´"X 033áC‡ºÝ–¹äê«{Ή \×ÂE-ËÂq†·ó›ã¿‰Wôøƒçÿ€K¸”˳—CÞ{Ú{ÙçîÃ~ƒNm¸ à ¾,ÌÅW‘x‹Oò¤e‰D2Lb!f„¸¢J8;iYÄ«{’~ )~%©—3HHVù³a‡½#|]„¬(go(nÉ®Lnš¸)|mš†A¡ºo¥R)2Ã&'')•JxžwV¡7¡a‚ G×íöÏÞ2‡çæZ0HöÏ€ð7K²˜L¼Ö‰²ÚÑ­zÙ(áµ!¹ÒDŒ¯Q ¹”K¹?ŽBåå;4‰a=ßë4qˆ–r6ñ2yÇZ,¿P›‘œxÉ'““¡šdÛvôùÈÈÅbÇq0M“tºsn&í°ãµ/ysÓS0]žçáy^t}d2òù<¥R)² (•JÑ:žçáû>étš *–KÑ’–n~}Nµ/Õ†{8œô’MŠ'ŒŽÞË?Éö‰.±W[²Oúg¥k´£/V”N£v¬¬:1®yV59Â[¸ŠpÊRYn;–©Zl}úi_ùÊŠÏ]×¥T*Í—¢zú+8ê×÷¬Ø»wo4^èo1¦¦¦Ø·oß<ïº\.ÇÌÌ CCCär9\×]t[IÇṟù™®yÄE“ø  ÒFš³þëYd2~ûõ¿Íë}–e±õß¶†_paã6’ÍfÃ+Ò ²JžT¾t‰“zÄðÄN<Þ œ@.¤CeËë@¥¯vŸyH†…õŽaš&ù|žt:çyÜqÇmoC»ìxãc5×—¸:­T—•N·E8Ïóp]—L&‰l¾ïS*•¢ó%Ÿe2™h}ÇqÈårø~xP2øÅ÷ý(±©ôCÃÃÆ­/\>ŸÇ÷ýh?¥R‰B¡€ã8QØx¡PÀ÷}r¹0¶W>[‰Ô²ã¥ÚpWÈ,òù8±0=LxÍH“•}¨ô©"¾%« 'Iö›«O¿íÚÕ+J'Q;VV WH.-AœÜ%²¢,…vرG8\œÆ·l™—ÛжíÖD8Ùx’ê`Jß²`bb`IO1öíÛÇÔÔ³³³˧¦¦8tè7݆%ŠâÜh9jaÓ«^ÕQ!N&íŸ¼ê“ ½u(ò®› 'zOAño‹Q(¨•µÂ‚.ó+ô‰L>x/@ݤÚT† «ð‚3j”}þ™Ÿù™¶î³]v|ë=÷0øÜsÍ5f„8Y¼…džªeB„- ¹–âR©‰ÑâmfÛv$nI˜q>ŸÇ0 lÛÆ÷}FGG#¯Ç¤÷£xÁÙ¶½àùïR¹™É L¼'e›ÉÁšlSqÉ'ª¾ï“J¥0M3ZnFt³ ‚€t:Šíé4Žã„bzRËŽ—«/î8áu´w—°M{—Êë9„×Lž8œ*ûP´ ThëQÚ=¦P”N v¬¬:9®ð<¯" fõ„S–‡NØñ=ë,&Íeœ¤å¨ô~ÓhÃZ€±'F*Ù{÷î*'ÅŠ|CCCÑû]»vqà 74ÕH×uÛªC‰çŽiš ¶LnáÈ‘#ŒÝ?Æ^ö#¼o{X)‹“gždM)¬&ÊHùË9懄w ñf’ülâ'÷–qÜ·NÅ4MÚºvÙñÁçŸçõwݯ{]㱈C˜Mâj²c,ky¨B¡$ç!@:Ž„8×u#/7»,Ëbll,Nå{ÓÓóëjËM­úü.7µ¶Ÿ<>ù\¹êãO’Ì[èºnk:L-;^޾¸+j*HUlé&#ÒÌ/™-a§Vùÿa_œ-¿Q¹†tnÛw´sL¡(BíXY)tr\áû>9Ó$K<†PNY:aǵ"mr¹\ëŪчÈ+†¶WM=räÛ¶m‹Þ'ócÔãÙgŸ.Šv~ðƒ\Ô†¶IN*˲ð) ‘·šišQ˜§x¾‰÷ÙbÔÊÙ±HÞÅ Nþ±â¼v¦iVä¬k±ãgŸ}¶ãÇØŠ =z”G}4²ãF]¬@Ü5NkÈRäTÞ:o怭ÀœÛ/†é5°õF˜ÎCn'äÿp®…¬Þq°2àïÓ*ךּÂi²HC£È*)¶ú~ø>¹, ™²ÁóÀ²ÂåA¦¾ïqѶfff8xð GŽéø¾[±ã#GŽpðàA*ÛŠ211Á£>ÊÑ£-µ”±ñ¶mÛZ_Yts|ÜŠCåø¸™—„ãM…µ‘ý6>žššbhh(ª¡'«Y©ó'¥’©©©¦ÆÇmâZáÙgŸåÀlÛ¶޼üåË…—à à “É0>>…­E„bZ‘¸’©M<Ótˆï.qò™øIµ>eÉ9r„Ì ïèefgg9pà»víâ™ÞPÃC¬."¸IÞ@±¥E„¸dn¶T*EŒŒŒN§ñ}ŸL&L¸eYV”Ï\N÷éHòüÈ4‚艗œïF\ØÅŽ»1Ðh…£GòØcEv¼~ùO8ÂîÒ'ìKåÿ ÄÏ1JÄ…ž!é €oBîi¶€ó*pÀœ*;!œ/Cþ“Pøx(ìùP‡+©¤ðÐC¤ÿm‚àÜs1ßÿ ‚ .À8í4‚çŸÇ8í4J¿ÿûQº8öîŸ÷^¼mÛ0"ÿ/½4üüãY°zn¿ÿ¹ç.¹„`Û6¼uëÈ—J¡çû¡—¤T‚\²YH¥ P×…¾lªÄÝà“ŸÄ$ûµ¯Å_rÛ¶þuC‡qðàÁŽîs)ˆ·yófâ” 8Àc=Öq!®dl ÊʧßÇÇÕ¸®‹mÛˆS#ë(uåsèС¾‹å४ü†BÑ@ò”ZÐÔ¡}…qÒv!nhhˆýû÷W4p±ðÂsÎ9'rxîüó—5²¨P(`Y###Lç§9oïyà‚Y0Cã'‡dN®¤¨V tñH6*ù¸¦1ç"¥ Ù»w/SSS]Ù+v|á…FvüÜÉ“œwöÙïPlÏ"T’Þ>5’Ø®ëF…U\×Å0 ŠÅbÛCAW†aDsIQn1ºiÇ­ØðÙgŸeYýqÞ’‘¡áà9 ?8åÿ „]iX¬3fá¬?{ýzœ“'±ï¾›Âk†Hu#ΛÀþ8o9IúK·SøOÃØÿ|ÃZÆrÿ@î?|„Ôß~“ÒÎa‚K.‰öáŸw…‹/Æ?÷\Ü‹/&õÖ·R2 Ò<€{î¹`øÄ]zÿý¿cÍÍávZü;S_û6.¿.¿<þ~ù˜†¯¼ˆ¢%²ÖÜTŠôàýëØžA€ûÿ€{þùQH«| ’B4øèGñ7mÂÁ¿è"ì;ïdôg–±ïŸÜ%—Pt ÖæAHm •.}3\bà'tö6l ø—^ T6rå÷R¨ZB0×­ ½éˆ£À“ã!å¼Ä{ùß./OzŠøU(osT>ík±Ê¡ê£Ÿþ4æÆ¤‰ëO¸ÄNØy ÅË‹. W\AÈ\q&À$ é5óðaÌýgçN2Äy÷¤­&a—"]‹xfËmÍw=½H;ìXQ:Ú±²h˸Â÷ñMS½à”ޱœv|æ×¾†õÇ\±Ì÷ý†Ø3Ñ`TYñ¬…ðiÊÔÔT… ½mÛ¶eQ¥¹öÚk¹æškصk®ërË-·4ü}ß÷yñòË—”“Pª7J¨žmÚ”CÎB6™‰HR|8œØˆæx[Õ,ÕŽyî9ÖmÙ²øz•Éåm*fÄR8À4Íè©!„!“É÷†ah>‚#E+FGG#ñÓ¶ížB—là $œT¼½ä/ ¡$O\öýïÇxó›I}ìcXý(ÖÞ½ä.ºûž{0Ç?DÊ7 øVÌð ¤=à0XÛ|y¸ìH“8ë ±CQ–ÊÜý÷C#ž/òäÃ!ÒaqÇq§T*õL¨£Ò†aDÞp"Ìô;Qn¸cǰŸžÒÀÅÏ|kÏŒ-[j‹nI$‘šYµ,MhÿR¥ºÑÕ¥R Û¶kþ."¢º®K*•¢T*a¹\ŽÉÉI2™ –eQ*•Çq àP(099‰çyA€ã8‘€î8ÛÞò W\ÁÕ-z{7Íøx8òðý†V7‰ëIÞ;¹#Êsªw¦åв¼ÈøOQ”žãöy£ŠpJæ÷¾ÇS5Ò5,Â%‡h’¯îšýŒxå&«3õ±F à=Šõ—I°{7釂Fò B¹(qèišXáY¢ç8NT@b/DÓ4Éår‘à&ÿK5+É«822‚eY‹EœrŸïû>©T*òn“‚r=ŠÐšN§ ‚ úåÿdXr:泷ߎ üÝg>Ã,+úlÛ&“ÉÍfq‡t:ïû¸®»´k_FM0–ø‰43ƒÒ$Ë×Â'téÔRŒŠÒ“üäÄ ÞÝè8BQzŒ­wÝÅ[·ru«ð©¼Ù„! 5a"ŽU‹dAûކSél6Š”añº"ˆV˜Ë…Sï|>œŠ;N8•¯õ|]†¶¦ V9S©Xލ%TuYV,~Õó‡Jʦ·{!ª‡Ïd Q1¹¯Å†árÜÏÒ`ÑÔPˆÛÿÿ±÷îqrÕõýÿ“IÈ’‰\.žf*r–«‚¶gðk«`óuVÛFá÷ÕÎh4VÛ~™±ÔË·–:óÕVê¶©;ÕŠ•ZÝÑ-h©—=–‚·$ß=ˆ¢lö$‹al€$ØßgÞ眙ÙËîûÉ#ì\Îå3gÞç3ŸÏëó¾ aYVŪ¨]]]d³Y®¼òJ,Ëjz‹ÝÝœuÖY•ß´mߪ¢ßÂÈ_þë¿æ<Âi/y‰?z÷»ÃXœÐÈÉÒ0(a``ºÏb:íÿƒRYÔ¨2â3 ˜˜E«è]"V%«ñŸÉ„–/wˆ_Ñý¢²mT:ÎçÃÇåç)÷É”cÆb¥weùç‹L§C‘/z=* qò9åz‰pW~î(òy¢m)~ÎËﻯ3j)°²X%±&Šn(yò8ŽSûJŠ¢,0aD °×­óç´çž;7/â,aì£ü°Õ¨Åãñ’{bãÆ%!Ú†aP(Èår ^k†x¿E=à¢áÃÑÐïtº4]tyÆJ^t³y7mßÎås?bKq€O>YÛÆq`þî¡¿cljLE8¥m1ñ=âf C¤ªƒ,~H©ÎY¼ß2™LÉ&zOd2ÆÆÆH¥R Éd0M³dûà·ª̈ YÍý¶m»Äs®äºÏ/ïEÛ&"aôsôöö2<•‰Ä´³¼rNörh5T¥5|å+¼ìÌ3kÚtÙ²ew¢´Qg ˆÕ*ÂAåþw ôxûñŸàßÿý'|ìc9 Ã$™LòË_®ãškü⹜Å}÷=ÏW¿ê‡kû‚W’BÁÿ+),“É0Y¬oòsQ>ö¼Ê‚œ ù* #0•G×—¯JÊsÏʉ}AÐ"Æ(¯|û+üF(ØåraÒWÇñE=AÆçPºM<î.qn;ÿü! ÝÝGšC‡®çúëŸæýï_$Ѷ5Î0=Šyÿ8<ð|{¼îuA~iw4ÑíÁƒU‹®+ˆƒ/”Í4—Ë0ó8JB`¢?_Q¯)ÈR ÙÖ‹l+â[2rÜ¥ž´½„ñòFñŸ]<Þ0þøOîA ]ü^ºì¥ìýóÎü)Êbàî—¾”ìÞ½µ6+G~p£y¥Ë£$‡xTÀ’œÚª!Qd….zžòH,ù±ŽÅüìó‰D(°•ÊÛ›N—¶Çqüv” }–å'Ôºø²ïŠÁ@ {ð6 fXÒ<œ”‡ÿrá@ñztgðß18ßñt'ý>S.Ùþ>ˆ9p?Îr®¿ß1Xa€eú}¨4ï]øëHÞðŒ §ÆàƒÅÄ`F¬´¯¶Ëõûg¼xž/ÈÉJäŒnÑíK¶%òÝÙ¶ÿ1ûûæ½ï=5x=“ ǦòõM)P¦´Ë¶nÝŠmÛ¤ÓiÞ÷¾÷•”îµm›ë®»Ž«¯¾ºéùáÀOrÿV‰·ž%x8›ÍÒÛÛËßÿEz¶EDÃZ)Kižr 'žX}ƒb¸³±z,Ɔ}—èÄBylÛþo¸ãø¿ùRªYêsÔKÔ›üŒF׈ÔàtS.Ö,e¢™3kþêÄûMØöŸA¾ÏÏš¦I&ßÿþ xê©C¤ÓÃ8އçÅxê©ïáºG‚pTÇ9‡hz5¦$&„÷L¥D«³}‡åï—£Ò6Õ˜)ƒó•û°¶¬$q騿ÙÏeš³'¡5M?iôœÛ¶‡ëæ‹mMòÿø7lܸœóÏ¿°‚vEhý¢ª+ð¼³³á–[p°Ï)üë+ î¶ _ûÚ9¼æ5—³S©‚‰ŸÆ£:SÀFü ¡Q¶ß@ñ_.òz pì¢÷HÌþ>oÀ2#ôÚˆÞG¯vü a¦¸}˜ÀïglüB\©âñÅö$#íH޳~n½éVNã´Vвd±l›w¿õ­µïä8¾¸á€R­J)xk?¶Lú⚸³WúQ¯åǺÒq+ aô…²¾¨• 1¸¿{e 3\•¿owa³œ†1¿¯û¿6œ | ‚Ç»Äê¸>Ÿ™ãäÀ*þϲB¯Àè Êq8©"ðêÀÃÏsÍ-ì;I:ìÜÙæMáü,˜S•k:ÏXr,?ÜÎ;Éd2\qÅôôôÐÕÕÅèè(à uÕ¼åšA­‰¨]×õCrâÀÓ“á¼ 5BQ΃§žÊY‘bÓD(ÈÁgϹŸcvßÁosQIe›hµËò;ðB™=ËvJ·q£ßÙK] q–0»èþRzZÆ("hÈs‰—E;i‹xåHºÆd2,ôkÛáâ¤l/^EɤߦxÜo_%ïŸhÉ#ï8~ÛücJ=“èo¸´QÆD}}þ~’Ë_ö‘ë%BŽaøÇV*ok%&ê™T(L/Ý]iÌÖ âãlÔº^é~ò߇™` Ãß p "–eaÙ¬ ¼N®Rñï«KŽWîݽ–õ¤¾l–Ö9ïÆ®¿®VUDdó¼žç½ÿðS7ˆ×¸eM/rQrm­ßvÜè<#†×¼f/{ö<Òê˹¸)/Š~çuU„°³•×b±ÐíQÜ<üq‘­N.ÕÙ²Në7ûà nÍã¼Æ€{ð(ðÉ,ŒÄà8VÛ'á©WÙð]6ðY\6†ßyœÑyp’ kMxÖ׃#. øj,#í,NŽÃ3ÀëŠ!_¯¶àf¼bÇ⥅Üxà/ýÕ¯fÞȶ§çgðWó¦¯\7 )9Ù¨F4dÞÃïãÄÃ,ƒßGKx¨K¸° YaÁ¦4Ä?ªë%#æf®™áú´b\Z)oše±wûö4fþ˜0­x–¯ßšäó¡ù8øy죕C%– rK† ¶DĸññqvïÞÍøø8W]uU ÊµŠƒeI«aÛ6…BÁé3ñ;3é𒨻§Ò2Ž>Lì©§‚çQ ò«àùçßÉTÑùS~÷£s«(Q5Óœ¾M4/•N…B(ÌÉ¢¡iB*U:~)_4,.s8ù­NÎ'm)Ÿ+F1ÍÒ²ÕÕ£b™|NñÜÏfg÷Ô+o{-û”W2Œ°­Ñ9q4ˆ Ñç26¬ôÙ;ãÐ!ŒC‡`ݺŠïKÑgÛc-$ÃØßy°]›Õ{WsÁ^ˆ%6‘Éä°,«$Ï›2ʫ †aËå0M³D„› «KÃåkBD­è$#žT(”Šd²Dý¾$„¡;/ˆÁ `“QY‘vÝÐÃ@:w «ôÇE}À  9ºcàÛašp§SEÁÎÂC™À™Iø… oHÃÝìvá7 øV ^àÁ½1x¬^hÂIœåAº¸Òô¦$¼©øãÒ‡?ÉÈweábð¦˜Vz[ñ|6¥^yÁDÓ€_…Þ’Pñw#¡\¥‰EwÝÒâW¶ R¥°w.[V}ƒT*ìGå~ŽÞà ! ó ‘ðz Ýwñ½ƒ£ãC‡Ð£·¼ús‚P˜‘9[÷¥Ý[ó9z”îJ^ò£”\’ÉJáÉ„^ßòó¯i´–Ó’¼tww³uëÖV·+Äqà²ËfܤP(à8N˜W+‹Ž ¡ 0sÈ…¢, Ï=õ+W®œ’ *“Ü13öÔÇùóÉßçÂu–ì[ï\!ê½U^¼¶|¼2<<7/­JžaQ¿¹žòðÕV{=U K¬TT8‘¨^ˆòÖ[[û9æƒy÷Ý$žy^ñŠŠï‹—âØþÀRE\ßûí¶ ·qàß°éC›H$†Q³×ób!sß;±ž¹‡f:áH¥ÕD"Qõzçr¾0:M•dtžƵ֘÷rÑ#žWŠp啸ÙJ'!+"òZ¥ïC:fÙ'ƒ?¹ªÔ§”wò¡7DùäÏ4ÁôïÉÛ‹ï»À…ø“¼aU;ø²å?É[©ãuUq[€¯‚m–NËIºÒÞÙh€°úËl)Ä,£nµN¶Z,úBNØE™3ðÜsÏñª—¾´ôýþJVçþÙJÄ9Y|\,DÀ¡“H´+2 ósŠW¯Ð!ÞþKžƒK~Â$­O__ß´Z¥B\Q—mŸmyåÊ•á*o•˜úL&3Ýȳ„%”¥…üúÔS¹èŒ3^•͆ <“IXë®å-oy kïX ÿ³ùíÓÜgJÍ<þ8ÉÏ}«¬ª¶„âf³ºéH.ÏóØô‡›¸øÙ‹Y[µd½àœÉs±ž¼Ž?¾éçŽ4J¥R$‰ ¢ªŸ¯/ää é4¦ibÄb¾Ø”JÁ—¾ÔÚ‹ØjâñÐMbØÓéÊ¥m³Ù0)wÔæ«‰H•Ä"ÉÏVÍ»_Bû³ø™UÜV ä '~.þDÏ-¾ÖG¸p™& —*?¿ä Šû™æì“‡jí^š]€¢(E`íȬ^¾‡ùT*…}´š~_èz6ÅðûNÑd1D(*UrV dùÉ'þp@ÖçFfªfkÚ‘Ãì‹QÊ¢£­…8xéý÷Ã?8£kð´I®á¨Jðĉ'òÒbå_ 5G‡džúݧ8çÏΡëÍ]ú­´5ÎóÏc]tÑ´×3™ÊšÂ“O>ɳ?Kþ»y®1¯!388ȩƩ5œmqcÞñO…Üü4Ê«3[–ˆrQbåɦ;¸ÿ5¯iiû©b'¥s£*³”©­æÑVŽÄ«Wc¶Š¤Qúð'{ÑÁ»”4–SH®!©¢'UHûŠÛ˜„9Ø +šTž Tòv3Q!MQ”yaÇüò—XïøâÀ@û­G…´ ¾›xü‹˜2Bå>QûÉEóì³Y\ýð‡Ç±íÑŠc* T„3ð›E¸U– Ëæˆ…åEHr÷ÄãqFFFp]wZ?udÕªV7½ñär¾aŒ4·Ä±x·É`\ª½‹'Û0þ`ÝÁŸô‰Wœ„¢z‘cÉJ»„—Š¥ã#+)ŠÒb<`ýøx«›á÷µ~Ÿêâ÷‚š&¬nª(8ëÎ;9ZLC”ÏC2i̼ð,R‹%_œ²¤hk!îG““ô<öXÕ÷ …B˜xZVeª(mÂ#§œ÷݇ã”EXçÁ2,ßvÛ`2®(5Q'<ÏOV1ýEœ`eX=à*ãþj%öÏÛCˆ+'*¸áy…Bañ/d2~¸é\+ØTC&qÕ.›LüÒ„!¡’· Bï¶hζ~øÓ@ñø J !T²§Z<Þ¥ÕxžÿɽW(”®^–ÿ–D ¥øå êÁ’7ŠxýÊûr>!.í]×ÿ‘dŸèól¶´]…Bx©Ú++V±˜ÿxb¢´)àòp¿“n˜»DÚvšÿ2ÎmòÀßçà©sá…ï;æ™°˜Jñ‹£Gyq,f·_h¤M¦ø7OÊ?Œ¿ØÑ‡ïõ&ÉóÛðwZi/–?û,çž{.à¯çY–[µ‚=Ú¡Ø–†¤.IÚZˆ;æ™gxvjªêûÙh‡í᳉*ÊJ[±zï^ ÃÀqÊÆ‘øÂϾÀUÿqU«›¨(µ19L ¤òo¥1Æóÿïy– ,ƒðä%Or›NhuËÛ‹s΋:gl‹ÅÈf³¤R©iyå‰D}i0<üÉ\׋/–I„j‡”°Rñ´0ŠûH>6©$ÚG˜C.[<_’0L*:˜WjFRü)u "–„n»n(¢•Tð-w1ŒPŠæVŒ”‡ÌdÂÊÃà YQá«dÌ_VuX9ؾŸL†íÁNîùJÛ†ŸGD~äd÷ÓœåÁ.ØFiNE)—îyðe~aB:æßë6¾ÐÃï#,üû9‘€BN¡4=* õ›øÇúPì´¿åoó|šM­¶‹:X;1á/ò 'ÄåûÌ>Â>ׯ¿¦YÂ<™Ñ‚ Z±T©‘ãáàyça# …Bå*ô‚A©ãätU–m-Ä=¾v-{7n¬mã&,¢(Ê\‘µVÏ3¦Íïîºë.®½B]Ý•ŽÁ=ûlX·ðç3eÅ5Þzù[É’ÅÀà„T„+§ÚukwÅD€}}}üéŸþi«›Óxêõøsð'o6þXd €íŠs"°'ÍA˜©L´…¾·›S<^º¸_¾ø~tÕ|º‰:Ai!!Ñ% ÅœuÉdX9hfÁŸlԪˊN\2'¼Ü℔χEhE·}EÚ+d³þþ…‚¿_"áïç8Ð×j'žŠmQݧ·×}plÛdóæú¯å¢ÁuÃ/¶PKÿFxÁÁ=…,ÏLsº‹¯i†…MÒéRo´òûm.",6·íaz®G ?HÅBÏRmLü rÅÇ Ë¿?3(ç¾€#M‰S cŒÁ¯'Ìÿ§”ß÷â¥@è!õhæx,Ø»}òVãâW›´[¾œX,†ãø¿»³"}–²¤išgÛ6===sÞïU/}iÅ׃*nR9 Ô+NYPê±a÷ÑGyÍð²Å>º^«"œÒ|ê²cüq­czIðûåB¡Àýo½Ÿ³¿|¶öÉUˆ¦jÇq³aYï}ï{[Ú†zÇ%8Žÿ¯^aÑ#ôVËâO®S„"€Lö¤âh¼ø/ ƒý2°3Þ±¿ #ó¢i®bñ0Ú/*ÚF½¼Ä†D)wMsºæl^™â%” qòš´³üxÉdée­ÅÖËujúÍbJUØ;®…¾¾Röè—$†&žkÕij¨*Úl\ü1¿ˆeRiØÃÃ=Âô4Éâûvñq¬¸Qü›%¼GgZÓvØ_7ƒzíØNô<Öõö6îÇÐÅï‡aºw[ë×”6¦^;v€±5k0MÛö×/z{{©˜D¹ˆä{U–4MâvïÞÍÎsË"zxåÊŠ¯;ޝ¼™13 ר!/\tE6º ܉!¥ùÔcÃî“Orή]XüÇ%¯ßqÇ¼í —qY«?–²Ä¨ËŽñ£sd® ++!þÝÓÝßí?ï ýfâWÔju+æÇÅ_Ì/ùË–¿;žF&3÷ð¶ þ„>M˜·-…“Dî w=¸½¾Ð&8ƒsŒMBÌ÷ÅÅqÈkü÷kIT.hUÈšÅðp¨ë(õÑ;®„ÄëÊ—“͆.Žâ:9“ÐÖª/Õ%ôbËáß_ü{,KXM8G˜+Q*]BX¼D<‹DÞÿ^]D‚n«©×Žà·¾û]¸ª¹’¥Ú´‡ïY,©¥FæÓ¯»ãxË[‚±qv¶tÍg¯Ð$!nrr’îîîÀå³VWÏ'׬aå¯~Uñ½/~ñ?pV›¸O‚ópŽ€Ñ®ÌJ±#Éâº~J É¡*c 2ML.)$ŠZߢZyUê§^vW®ääÉÉiÂÅyçdz/{¶ÕKYbÔkÇPšR¥švñÝï~—Wõ¿ v€çyÄb1lÛÆ²,Ç)®Ú˜¦‰ã8¸®‹ëº$ …Éd’|>aAµ)ÙOã ò¾Tg-[yžx虦‰çyxÅ+ÙÖó¼i‰tƒ…žâëòÜ0 <Ï Ú~×õ+cEãºî´j±Žãmóo9ŽC,ë¸ê²›6mbrr²%çžHî«Z¯»„±9„9Þd2ï€ýU°' [ ‹ÌÿÐÿuJ§!–^ë?O.‚Õ𪢼2+ ±ãr$¦X*Ëd0jë­ìk<Â0Ð<¾`’ÇQ¤AŒPdƒéâÚ ¹¥Ÿ¯ÖŸzž‡ã8X–üþxžG"‘úmùr]7øíþü>ŠÑyª€IDAT»Ò$Ûq à ŸÏ‹Å*þ¾Áo†[Då7¥P(û•·_ÎmYVðhÛvð;ëº.S3·[HæcÇÎÑ£|ìŽ;à‹_¬ïä6aUéháÐ…@eNÌ·?>xð žv¹Öl+d5äSˆ¦q£££Œ†-F¾cÇŽŠÛïÛ·íÛ·ó’¾>^ò†7TÜæç?ÿÇ›`} Ò±M”¬|H^™ìˆ@7ÓøC )A±bÛÓs—È1¢PÉYÒas©Žatt”þþ~öîmM2Ú¹Ú0ÀÞ½{yèÀ>ðÃÒ׿­q‡ï~ë»\óækZòY”Ö!v¼oß¾–.vüàƒòï|‡á©)ÌK/ŸðB`úÄÛu]ö½wŸòódŸÎòÞ›ßËŸþð'}}}%– ÿ ±XŒL&ƒa¤R©`‚ÅŽ$—q¯üýr-*úÉÄG&7òÜó¼`ò=~ôœò\Ž=·ˆ€2áÊçó$“É@ Œ €²ã8¤Ói ¸å–[øÞ÷>@:&•J‘L&q]Û¶ SŒc$â¢eYÁûŽã0<ÏpYlat’XÎÐÐ_ûÚ×XWÌØlæjÇ÷Üs»víâ’K. ·ÉåjLä‚/ ¸ÅÃà} ÿÎÖ¢—hœÇÀˆÔ–ªº> í†ãøŽWU°}ûvöíÛÇúõëÙ²eKSÏ]ïØØ²¬ÊD×õí:Ÿ÷m{¡*PÖ@>Ÿ'™H2þ½qÖ¬åØ[å±w>ÆÊ/®ä÷ŽÜx„õéõ~š4É“ç+ßÈ¿<ô/Å*óFÌÀI9HU¾#"”ôåáepƒþ6Úï ¦iÒ××,à8ŽC&“™&‚Iÿ(B]ôøëÖ­ Ž/ÛF/ý§ˆs.NÉ1ä7Q¥D\“…©J}¯mÛXh†aðàƒ²k×.8Ð’ïºÞññöíÛ¹ç¯þŠž~šKê=y4ÜZü¬ƒ¶íްcO>ùdÉ›Œ•¥E?»víªy||ÌT–P&''â®ÇÇǹâŠ+J~£lß¾;wrÉm·±ë²0tO6/2³»|6Ée+ɣɎ7nôÇ:Édin—òô²½¼/•ɤúb ëX¨Ur±f3W–¶Þ{íµ¼óU¯âÇoùe`3Mp•¥A§ØñîݻٳgOïØ÷•¯}Ë[ÈdÂ>­P(Ð××lj'žÈ0ÃŒ]3†3yì+ñâq‰ˆU.Ä-&âñ8žç188H*•"›ÍR(( ŒÇI$$ z{{±¬An¿}?§œòSþöo‡B¡@>Ÿ„5Ã0H§ÓÄãñ@LËçó¤Ói2™ ét:˜@ÊÄM¼$ÄP<øD,ŒNøÆÆÆqT¾ÇÀ›#›Í²nÝ:’É$ét:PÊ„8‘H¶1Ódk¡˜«÷÷÷³yóæPhq]?gÖH.ipngò=À*°ž‚ĵ`nkúÇo â=$‡ëºÜ{oÿñ/åÆŸ™ñ·°Úñªýʱ¢^@Õ"öLi*qÓM7ñ¶·½­éB\½cãªHž-Ó Wç‰Q‚,DHŸÏçyÿ«ßÏøø8¶góücÏóÏþ<æƒLNNrË3·pϳ÷ðÍ˾ÉËï{9cqëä­%çp§Ä#-*¢I?' ">EűJ×Ë0ŒàxÒgUûýé„1™DïÈGÎ3lÛ|íµ1®€È<ïØúñsÍg>3·“æ E¸Aüʵ‹Àø¢#Qdn)óVq‚õ¼ÐY¤¼+,ß&ê8[^\G(·=cdž[ iŸ’êØåµo¸áÃÜxã_6ýºÖ;>Þ±c™˜¼öZ¶þþçŠ×Ï_L­*ÄIÃYrµ*É\ÆÇMñˆëêꢫ+LLßÝÝ=ë>°êÙ0t/Ÿ÷ú#q×’#b59é”*GFÂ'ª J‡VŽTx—N̶ýÏdš¥a„‰%¿n´™xù•ÝFV©³YûLÆ?f*U*&ÆãÓŸ ÕÚ$¢cùóÙÚT(„¡ÂRÅ­åÛDÛ·k×›îËžzl€C‡¸gÍšû0 #̃²8u ¥M©×Ž=ÀøéOá-o¡P€dÒ%•Já8cccÄb1Žãñ\üçCνäÜÛ6yìZÔ›L†Ñ÷ø£?:‘¿øXþìÏ®,ΙM‰D  Éõ Þ³,+ñM$†A&“app0ØÏ²¬@ì›šš¢··—X,F:¦··—t:„3‰pÐæóyr¹Éd2˜| 2™ ¦iž$SSS\rÉ%|úÓŸnÉõ®»?$Ölä€$¤~n˜gÀà:0nÁŸ>]û)Ë#Û¶I=òÊúÑ08ÙWÂääýr/Å- w–}£ž;å牊Vr ¨,Jˆ§OÔkè™g\úú: fÁ]Îõb±XàkW›FÚý¼Q!OÄÃjŸ à¿øEí_d™·—^ÌÒÁR  …O_±©D"ؠؒ\Ãhåäî[ºùþG¿ÏÚôZžXö¯?ãõœøÏ'òÀçàëÏ}G¯{”ÓŒÓ8ÓJòáþ9^×G,MQVöÛ]*Pø¢ƒI,‰D²¢ÏË8×öf‡FÇÉFÇbaäC<îï;Q¬¢Ú¨q²©½­¢^;v€§bbíÚÚOV(þ¤4÷_ "\tN#Nr=%ú)ó¯yT´Q«¼Rµ¤2Š.§ÜnlÄý¥·7tZ(¯R>¿ÉåJsG÷Çù<–Šl¶=ÝAÂuÃ{A¶°½•lJ®ÅLB\yõîòöÉöÑQÑsMNžT»=4yõÇ¿ø+Ÿ}–\ÎOÝbÆÌÞpe9e•¥KS„¸n¸={ö+5»wïžÕÀ`ÍÏË+paŽ–Q-ŸmµäÉåíO§gŒœ~Œèq*åÜ-ß&‘(õê‹ŽÓæZI ¦ój÷•·©R·r*msÉ%7—Ï~ÂS û¶ A½vì«~xö\Z‚ˆ’ÉЋJ F±Ï.îë +?ç󾉽ˆhuVQ JÅ"Ñ·£6!ûÊv±˜ëÍVã¤ü»¬Å!»Ünj™7–ô)¯€]‰JÛÔb³åÛÔ‰Yþu—·wûöÖ¤ šO¼û”S8ÏóH$DØœ9W%š#Nš$ÄmݺÛ¶Ù¶m]]]ìß¿Ÿ\.7ë~kŠ¿ª¢À¤R©À;XòEµzÒ'~(í\ Ä)>õÚ°·r%÷<ó`u Š“X í„•¦S¯ ®ëòÄ1 ƒß–S°ìþeœýØÙþFj×5ѬÔ!2”¿Éd’d2‰iš *!©¦ibY©TŠt:ˆJâ‰'BäÀ{ì±ÇZríæeÇÒ;•é &&øÒ®3øêÅËxtÏüó»þ™‘‰‘ ÄZ5/äÚVò0k$é*30à:“ùöÇ»Ôfá§\+{;بäÃÁXÆÑU½•=üqè÷Rpmðnç6à°Üßt16ï@!ztE½l xÛ9¥Q³•ĦÈúBUÊo…ZÒ=–×f©¶h®·Óܨ׎]àñÇ'¶aÃìÛº€Ä•`Zà|MÌW3YEV8£sÉ(sµ¥JvÓÆQÍJ̧?>úì³\¼jï,Žë$oqÕ4Z¥Ê’¥i¡©7Þx#£££LNNÒÓÓSâþY‘{ïeÅr¿yÑT.’‡^ü‚zÕ„a„îÐÊܩˆ» ƒS-y­Yù ¥œzí˜ûï‡Nàþ‹7¿ùr¹G~x„g÷?Ës ´F“QæHtâÍfƒðT×uƒû0¿yÞ>ûôëø>±é\3p Έ3kˆuÍžŠÂ<íஆg€£á˶mÞ“R…ZŠ´DCߣ¸.x×ÃÇÞù/@! ‰‡!™wS(ª™ñŠ„H–{tUò²iÖ-âRÛšÔ`±ðuGm¢~¿r,ý_ê”cE^¯äô"YIŠÅ–'¾Xä¸T9ölLžÔº¾zìØVýð‡˜5ˆö—!¶ŒµþóDbæì*–)se>ýñò§ŸfÿÑ3‚…É /þMâ¾ q Ð$!Nˆµà>û,Ǹ †a”þr)J™‹ ¬gü¹ K«èXÅÞWJ‹˜«pÚi|ã“§ñ£Üg}\‰/jÄç~È¥L<·R»‘b,cxx8˜ ‹—œxZ% R)_|•j´›6mjí¤N;v øð3‚ô¶móö·ÿ ““Ÿ`ÿ]DZÖZËØÀX¸Ðg/þ<‡Jë¨ËŽ&\?á `äÓËår ”x·z†‡?|Σpàß?+O…ž«aåà à½òäN–AÌ SIŠ[qšƒPüQÉ‹ü5¨,ŽyøsT«ìXŠ^P}¸$Óù¤\НÉ\‘6¹‘ç9BA­Z©¹&DÚìT8vµ)Nyzay~÷«^Uçh s¶ã;ïdåèhÅ·$/x"Y’Ÿ>…¦¾PœzúãŸôô°~Å‹9\¨V‹ŸÛ0IØqÚ¨3‘Ò\!n.¸+Wrêƒbÿ8œ¨lܸ‘±±1߈G˜þ«­(mÆñ‡±råº`õøŒ/œ¥Cp—/ç™gžáèá3xɇNƒ?¨°‘:zv4Q¡IòÐI¾(©@‹ÅO¹?øƒ?hY’ûyááŠsðôú§¹Ñ½‘¿Éää>Ç7n[ÁÚaÂ*|Y –•öÃó`$|Nª3¿c÷nì“NòSj Â6¯r`½ O;ðË?ƒÓN‚+3pî ˜÷?ï"0¢®`u,xÛ‘]½ ËÑŒ)aò¦›àòæçP®ç„x)øJlq}Úq|üeCÜ t¾§´-g<ø ö'cÇoî`-kµ?ž’C©Ý‰¦i’Ífyæ™g:Sˆ3¼*¾ñwßà¿¶,cåqÌ­—³ÁÛN°ÀÛzªTón‘P3˜~ ¸„ÞAÕㄞCD¼ C¢^<ÕÎéTh—ÇôvÖm“¬£Ö¢óDÃëEÎW©­ûÞô&6?ôÐ<ŽÞnsà&dáæn&ûÈOýô§äV¯&‰oâÉâg•‚Æ !ûò⟣4æòÅÅïcžF¨ºµR G{Œ<÷œ?ž0ü´©”ï)žHà{Ù›‘ŠÒ¦œüÈ#¬9õô üyZ>×AÂTE‰Ð¶BÜáÇ9míE¬«4 Há‹ŠÒæ:thº›²ŠpJ§qÖYü÷¾\¶î„’ñ‡~çCjÏuÐ "ÀÀÀ@P)5šÛ2™L²{÷î :kGaƒðÙ·~–å«Îã¿ÿß[ø ñß÷`^]4ñwo Å"5”Ö#E1¤’¯äæ³m»ä±xpFKÑ©2œÉd°b1xÍkZý±æÆe&ü†ÁõO?Í_]ô2^6±†®^†Kº¾ø68)ˆÁÜ`,r,:”ðxWçü?ÀïIèKùŽAºõFV:„'Ö¬áñ}§B\Å|±Z A©À²V7`&žz´+1ÇñGG“F(J›³þ‘£\rÉ ÀŸÜwß}­n’¢Ì+ø¯=]œ½æDßÕ"KPQSÕ‰¹ãºþÊ'°¨r£½(7ùâÍÎß:“í'_Jïüî*þ9L<çú©Ùsü¿ ´È5xFº®ÜûÑÇžçÅ=<Ï##=ñs£Êv™L&Ø.—ËÛ9ŽÃºuë‚}âñx g2™ Ro>Ÿ^÷<;R–Ó¶í’¶IŽC ±Ž~¡cí;#cä<ÿÝ÷øî·^x¿Åã~ÿ’Lâ»ð=1 ø!Q*n(mÀ«V1±v’;Á/ÊW¢_¨6)ºÓûïh_í³mÛÞs‡x_2)Ìó¼á+ê`šfð^:¼ÑÒétàiiš&Á>Ñâ!Ùl6(,’L&ƒý Ã(ñÔ”ª ²<6M3Øß0Œ`ðC®W­ZÕê¯kn| shœ¡ÉIV]z)ÿ뿇wØ÷€nx¸(jˆœÄPëB¶Ò&œ¾?¯|Ã+ƒê’Kß{s‘aÛvÕ\¤rl&“ Þs'ا\ÀŠ ]Ñ…‹|>_²]*²ê×××Wu!%*ÄE1 £¤/æ>Ëf³A?L&Kúãèo@t»l6ôÁ•ŽwòÉ'·ú«š3‡ï¾›‡^\¯eO,óÅ7ùмº­,rÚ64uùÓOÓ»úG8Ö‘2òŒLŽàæï™Äþ@GJûsdÕ*Nûå/éûÍ¿"ý—i /·ã¸¸Œ•Ä…(J{sðàA.»ìÌ›‹/¤à_^ú/:±«ÈL:”¦°wõjþàgßçȺ#ÿGå ÿò/ Ek qpðfQ»¸xÅÿ‚dz{÷n.»ì2>Ì·¿ým®¼òJ¾ò•¯ðº×½Žµk×róí7³eËÖ³ž;½“õ«Ö³žõÜÎý>|˜MlâñuC2dÀcÐð‰±¹b-IkÄ¢Pü¬ÿ3d‚\{2A²9§èàZ.yò`\“X,†‹‹AŒX°½± ×m&{ýcl~lóüÔ$¼gFé˽ãÞäß9Á;Þ«(ÀÅ$` ÿ{±èxaYY\œùë_óãeËè¾¥›ÌqþobÒ!ôj³ê¨"ZY–,@ˆè´nݺ`á “É‹ÅH§ÓÓ¶³m;¨F.˜<ŠV‰D"X¸0úX,V"r™¦Yâͳ¢ ¦i200Pq»è9Ë5¢Çn®X­, ß>üáãÂÏÁ·añˆÓtZJÚWˆ{à~tÖÃ<·öQþ÷—þ7o¼üœîžŽ÷.ãu.aY'“D»õÚŠÜóõ<½÷ôó:¾=ômî|çXXdÈ`cãáùÀÂÂÄÄR|¥ÍøÅªUÄד…ÕÈ©]|R.Rˆ Q+..±â•Þ3“+tŽ)䈓žÿ±ZÄ›žà5÷¿¦Õͨ™çÇÆpÏ?Ÿõ_øïéGÉç!)óË·_±cˆDŠÚŸYƒªaÿ›'Ÿ|€N8Ûo¿N8‹.ºˆO}’ãÏ{ÿð½ÜqÇÜqǼóïäWÞ¯è:¦‹K¹އÓÏ8‹¸ë-áo‡ueäñ…‘ß”³#'_Ûšï ÃUú½31KD:3èO œ`›Jȶµ|OïýÙ{á´Ö\‹zHýnœw~Š¡ÑQ2îÁàKÀˆV®ÎŠpr‰5?QˈzŒFEÏóp]7U2™LàÍY(‘Æu]lÛ.Ém(ÂI>Ÿ¯[,iû6mÏãØ]LáL#›!–IâüÀ!{}¶%¶*áõ’h?“ÉH$0M3LÅÁtÑ*êÉ;“XVÍ[LòWF÷«´Òž¸Àã?N2Ep?;ë ï)`®0Éâ‡6¶êJ m+ÄMvu±»?˯?ÐÅm¿¾Oœô ¾ÿÏß'sM¯Ä ‚•[œÉ_y¬(Íæ¸£GyrÙó¼Â:Èò'—óê§^ÍIœD’$ 2LÔ\ÜÀã E #°ÛèDEE:¥œ=2†ç%`À·ÕÔŸ¦ðþ‡Ï• åöY«±‰Ã¼Lº{ïnö<²§Õͨ™žqýñ[þù®bxØ‘ó^§ÇvÄKËÂ"A¢¡c‡\.x6d2,˲,2•Á4M¬„…±Á¿W ¬­‘~ÿ"‹¢Öæ FÑȺ¨3jé¢×?ºýl¿ƒsís^¼ïÅ#Äy·Ý†sáf^ù÷ÿJ"‘ Pð+L+‹dÀ½ÔÅtagqŸˆ°\ŽSµðüp‹ÿÍ÷7àСC¬\éÀ‰‰‰ —àøø8ëÖ­cåÊ•ŒsèÐ!Î9ç&''yà¸à‚ ¸ãŽ;8çœsèêêb||€îîn&''§§§€ûî»sÎ9gÆc:tˆÑÑQ.ºè¢`ŸSN9eÚ±'&&ÛðË;8çX¿ k}áfë8tü!&ÖOÐM·Þãì:e+XÁ×>LWW9r:í‡Þ|È÷J&®™¾»É·N²bÅ ž~þéù.‚ãeõCñÏ×ÿæ‹\ÜQ\2Gl}ŒT6…Ø,èI=×ëò¢.}}}¤ÓiLÓ$ŸÏãyétº$œfÎEpœ ~ô0?Ž?Áß­ú?xáø­õ¿ÅÀŠâÄÉ:Yp —Ì^âªK(BÛ qœý<']q+1çj.{ü2Òw¦1®1dpÚ¶ÑÙÆÆ)þáʨ¬¾–ªÝ &æ´UðZ˜i•VYZœòøãœðÁëùŸÄ œÀûsï‡ý@zºÝ¤#Þ1nä?ñ8òðè£/ð&ˆ+±ÏòçåÇþ­þ€©ÿ÷z 2(ð޾wðî“ÞMÌѾ®2°µŒ}S9è9Àë6o&ù_.)|÷Ît,åZçÁóƶí 'Zoo/ÁsñŠAJ'{æÕ¢4ŸÂc‘øÉ~¾ø_äê«o¡PœOøã^ïÃÞ#‰Õ¥^Õ±4ibÄšæu,o¡Ô«+ê}$ÉγÙ,öüDñÁvÿ7ÜÇ9à`¬2ˆ­Œá­òðŽÇì]àmð±øE‘Dïó®â¿"™ÏF¼ÌnÉcY–Ovû^l¬,;Þ9U޽®øO¸¨ì=aeÙ~Ñǧ–m·²Ê1ŠŸaûMÛáòÿ ½§ŸÎ÷VïåàŠ§ùÖqä~šÃ{‹G’$66#ŒL›KÙÅÿ¢ÎzÌONN²råJV¬XÁ±wËO<Áe—]ÆãÇ<ÎÞ£{ƒù`r0x'I#86XY+sëB¢R?ù Ïþ¿—ó7γ!±ž/óeÌÇü>μËÄý;^é; 3LŠI’þ‚VÕ¨’J”§tˆ2×(”èñj!êØ$^éóA¥|ÚVˆ[àF.zÿ>xóÕx ì—³páìûYÅÿj¡’Ñ‹`òjÅÃ+ùqX(<¼oi» ®“8øØ›kuæÄ¯OÙ‡y½Éâ?Àø¨Ái?¯mÙ½Ú`€ªûØØ³¾7{vqÜžËmYîÇjá‰âå:×УVSþc7vQçä<ÐÕÅ ^ð4ûßp'Üé1váœT|³3.Û‘Íúÿ”æñÓõgó{?¼‡K¾p‰¿JM6ô¬Ÿ§GEâFFF‚m‘sGQÜã^Èù»âÀ«Ïgäƒìú¯ãÅXZ¤O[ƒçËævÜùžç‘ËåJD5)!Iã“Éd3Kîè}P-?•xŽVÚ.‹ÅJ<•êñZªÖ†ò¶*óçȪUtñÇ~øZb·ÄH¿Å_ˆžir^i~—ËåÂPÒ¿Îðú÷¼ž£g-/®çõN0þi¬ ”8o4  g›¿•o¿9úª£­nœxâ‰'àô#Üa|¿äÿyQÌ “ÂÆî\Ò¤11`€räJœ(fÓ/f‰ç"×"ü âU½wf»f;^£ï±…`®÷¡ƒƒÁ=æ=¼ýWo¯iŸ¶⎼`?—=s÷>ÎõE>òó4ü2‰WB:iÅoýí·sô¡w`ü±íØœfo[¸óÍÔ«g[먴ÿÁý­nVͼ`r’•Ý_à—ÞË'îùñx¼$ᯢt׿ïƒ<¸±·¾æ~02X×ï¿ã8A÷¨Ø "hNeaqÖ­áô{ÊϾô3,Öó ÿ]|?Aø04rþî8NP¾P(ày^‰¨–Íf§%›¯&h•çÌ*»”¥C×·¿Í³]WoØP³³€ëºäóùOã™ú^wÛ¿¿þ Õ­¨O9…—¼©Àæ(©xŠáİŸ—Óë2‹¾ úˆ-Žfñ¿zh¤ϵ šÛ®vv;»Ù³¯¶Ô-m+Ä=ò7Wpðгö3kùÞV·HQæÆ3Ï<ÃáÝ¿ Ÿƒ¤©Î¥ŠäœŠ2ôèP«›5'ö®|„^õt½¶‹l— Jç±rü>rÙ±\òÒ}˜¼¾æýăÇ0Œ’¤êÕE× ÃsJ+1)J'ñ$Ç>ºŽXŸŸûGQ:‘þö9öÇ›y­Y}YZL$|ñMÅ¥]øIÏ Yñ@¢Ž(¡L&C.çç}5 £D`SÏ"¥™œ¼öùAv,?Žþ[T„SÚ‡¹ÚñÚCoæœ'_È'ÝÙê¦+JÀ\íø=ïÙ‡8ÅãqÀÏÓ(¥•ÌÕŽc1Ñ+ …Bðz:Öñ²Ò2æjÇ{7ìEŠÉÇb1õÖTÚ‚¹Ú1ÀÙ{,^ùÊCtߨ­a¨ÊœiŠGÜèè(û÷ïgçÎ@¨8Ïä‚<¼ xw«¯¢õÙð¹“'a\Òê–+JH=v|áéÏñºûżþ#­n¾¢õÙq AUÚzì8êìfÛv $«7‘Ò*êí=Ï#‹i¡¥-¨×ŽÏ>ûl ØŠ–TæLS<âvïÞMOO讹eËvïÞ=ã>ß~îÛ-7èíÛ··¶ø×®¿¿õUýýý³~g‹™zlxï3{ùì²Ï¶¼Ýj?!ípO·’zìøu‡&y¦ïG-m·ÚO{¶£UÔcÇ7ÝtSàAÔ*Ñ¢]ìXÛÑÔcÇ…B!¨î;00Ð’v·Kÿ£íhê±ãÉÉIR©TËÛ­ããµã¹ÙñÞ½{q‡½âvž¿æù%ï ·Ôí'ŠØF-4Å#îàÁƒlذ!xÞÝ=³§ÛÏþsn?z;_ºêK¬^½ºM¬ÈÞ½{[nXO<ñ“““ŒŽŽ¶´ûöíc×®]œxâ‰-9ÿSO=ž}ûxê©§Zrþ¹Ú0À]ûïâ/øwl¿£%mµŸrZ}O‹?÷Üs-9ÿ\íxÏž=üý=Ϫý«xÅöW´¤Í öÓníxä‘Gxä‘G8õÔSÙ±cGÓÏ?W;¾ýöÛãÇ?þ1·ÞzkÓÛ+´‹k;BöîÝËáÇjÊÔHêç;ßá%/yIÉ~ͦÕý¶£”Nß{ャ^½º¥×MÇÇ¥´ÚŽyä&&&:f|¼wï^î¾ûnvÿÇnþó·þ“ÉON¶¤ÝíB«í§]Ø·oüñ5mß´b sá¶ÛnkueÞÔª†+J»²cÇŽ–-ŠÒH¢¹´¥Sѱ±²Xh0E™ÿøÇ[ÝeДÐÔžžž’‡ÑÑQºººZýÙ¥fÔ†•Å€Ú±²P;VjÇÊb@íXY ¨+­ ©BÜä¤ï¶iÛvÍI•¥PVjÇÊb@íXY ¨+‹µce1 v¬´‚c?úÑ~t¡O"Šòu×]‡ëºÜrË-üå_þ¥*ÍJÇ 6¬,ÔŽ•Å€Ú±²P;VjÇÊb@íXiÇLMMM5ëdãããìß¿Ÿžž5l¥#QVjÇÊb@íXY ¨+‹µce1 v¬4“¦ qŠ¢(Š¢(Š¢(Š¢(вTiJŽ8EQEQEQEQEYê4%G\»199I>ŸgÏž=ŒÓÓÓ¼þoÿöo|ï{ß`Æ %ûT{¯mÙ²eKMçZˆv aÛvɵ˜é\ u-”Ú©fÃò^;ÛñBµAí¸ó¨ÇŽò{kW;nÅ=­ÔŽÚñt´?î<ÔŽ§£ýqç¡ããé¨7Ž¥vmÚI3i©Ý,9¸ÉÉI¶mÛøRlÛ&NÉd#Ëd2ضì7Ó{ó¡¿¿Ÿn¸¡äµf¶£¿¿ŸÝ»w³yóf†††èïïŸõ\ u-”ژɆ¡ýíx!Ú vÜyÔkÇ ù½µ«·âžVjCí¸òùµ?î,ÔŽ+Ÿ_ûãÎBÇǕϯvÜ8–Òµi7ͤ]h¤v³¼Õ¦ÙŒŽŽÒÕÕÅŽ;ؼy3¯}íkeÿþýìܹðohh˲f|o>ض”I޶¯Yígxx˜¯ý뀯ÐÏØŽîîî¹JíT³ay¯íx!ìGí¸3©ÇŽò{kW;nÅ=­ÔŽÚq)Úw&jÇ¥hÜ™èø¸µãƲԮM;i&íB£µ›%ç·aî½öÚàùÁƒؽ{w‰ûò–-[ؽ{÷¬ïÕËää$Ÿþô§KÚÒìvÈñÆÇǃã\}õÕ3žk!®…27ªÙ0´¿/DÔŽ;“zìx¡¾·v¶ãfßÓÊÜP;.EûãÎDí¸í;—¢vÜX–Úµiͤ]XífÉyÄuwwÓÝÝ ø+ét𫝾šƒ–ÄìÊ6ÀŒïÕK&“áÚk¯V¹™íg||œw¿ûÝôôô°gÏvìØÁÖ­[«žk!®…27ªÙ04×~`îv¼mP;îLê±ã…úÞÚÙŽ›}O+sCí¸í;µãR´?îLt|\ŠÚqcYjצ]4“va!´›%'įh~á _`hhˆk¯½˲Jr˜,47Üp===%IþZy-Äeytt”íÛ·³uëÖV7K™…J6ÜlÔŽ•ù¢v<ýz¨wjÇÓ¯‡Úqç¡v<ýz¨wjÇÓ¯‡Ú±R/­ÖLÚ……º§—\h*ÀöíÛ™œœäæ›o:èžžFGGƒm$.z¶÷êaÏž=Üpà ˜¦‰iš˜¦¸/6«ÝÝÝ%*mOOO÷\í\nƒR•lÚߎÂ~ÔŽ;—¹ÚñB|oínÇͼ§•úP;Ñþ¸sQ;Ñþ¸sÑñqˆÚqcYŠ×¦ÕšI»°PÚ͒󈢫«kZ|¯\¨ÉÉIººº°m{šÁUz¯$aŸ`š&Žã¾ëg³Ú±eËn¸á†àx»wï\&««ÑmPæN5†ö·ã…°µãΤ;^ˆï­Ýí¸™÷´2wÔŽKÑþ¸3Q;.EûãÎDÇÇ¥¨7–¥vmÚA3iJ»YrBœ$¬5Sp‡«®ºŠmÛ¶±eËlÛæÆoü…jï5š™ÎÕèvtwwÇÙ¶m6l`ÿþý¼ï}ï›ñ\ͼJef²áfÚÏL4Ó~ÔŽ;“zì¸Ùß[;Øq»ÜÓJeÔŽ§ŸKûãÎCíxú¹´?îÚå{k;n—{Z™;jÇÚ/Úå{k;Öþ¸si—ïNí¸sÑk3ûuXŠ×¨žë¡Bœ¢(Š¢(Š¢(Š¢(Š¢4%Y¬AQEQEQEQEQš qŠ¢(Š¢(Š¢(Š¢(ŠÒTˆSEQEQEQEQ”& Bœ¢(Š¢(Š¢(Š¢(Š¢4âEQEQEQEQ¥ ¨§(Š¢(Š¢(Š¢(Š¢(M@…8EQEQEQEQEi*Ä)Š¢(Š¢(Š¢(Š¢(JP!NQEQEQEQEQš€ qŠ¢(Š¢(Š¢(Š¢(ŠÒTˆSEQEQEQEQ”& Bœ¢(Š¢(Š¢(Š¢(Š¢4âEQEQEQEQ¥ ¨§(Š¢(Š¢(Š¢(Š¢(M@…8EQEQEQEQEi*Ä)Š¢(Š¢(Š¢(Š¢(JP!NQEQEQEQEQš€ qŠ¢(Š¢(Š¢(Š¢(ŠÒÚZˆ‹Çã­nÂ’!ŸÏ“ÉdÈçóó:Žã8d2™Vœ¶Bí¸}ña$ <ÏÃó<À_aI$ÁwQéÚÄb1b±XÍût:hÇQ¶,kÚóÙhµÏd“ÕìXî×l6 @,«:V;NT}=jÃÐ>vÜ.}ñ\?%ûªfÃrì¹öÇÒÞB¡ì»Xi”/õ1…\³…²ãjýqùgV®}L!×Gí¸ôz-ÔØXÞ«¥?^Šv,×GÇÇÍíë±eÏŒŽë{½Úµl§ñq³í¸m<â<Ï#cÛ6®ëÒ××Wò~<Çq\×¥··7PæûúúÈçóÁþÑp†¾¾¾`Eµ¯¯/8†ã8i­ÛÍDÔ¸£gû¼½½½Ø¶´Ýqœ9¿>S›jù\¶mcš&®ëmN§ÓÁ1R©}}}xžG.—#“ÉÇ–ïHÎSN>Ÿ'•J‹ÅjÞ§ÓéT;.·ÛN³ã™ì«šË%úY+ý`¨—Úq5–ëÑ.vÜ.6\Ïç/·¯j6<Ó>åDíX>w&“a`` ev¶Ð4ÒŽ—ú˜b.Ÿ­;®¥?V®}L!×]íxz›jl=þLýñR´cùœK}||ë­·6½?®Ç–u|\wθ¢#ôŠ©6!›ÍN%‰àùÀÀÀ”4o```ʲ¬à½t:=eYÖÔààà”išÁëcccS±Xljjjjjxx8x,û¤Óé๻Öíf#º]-û”¦l6;5000ç׫QëçJ§ÓS¦iN†1•H$¦b±XpÜááá)`jbb"xnYVply}ll¬äzZ–55000ešfɾÕöYLt²—oÓIv<›MV²c90LÅb±©±±±’mÔŽ}ÄŽ«ÙðÔÔT[Úq;Øp=Ÿ¿Ü¾f²ázúã©©© /¯õ:u"´ã¥>¦˜Ëg«ÇŽåxåý±ÚðÜÇSSjÇÕXȱqôø•l)ÛñÔ”Ž³ÙìÔ¶mÛšÞ×kË:>®ŒŽ;g\Ñ zEÛ„¦Ú¶]âÞu•,wí×`q#Œ*•Q×îZ] [á:[þ™du"“ÉÌéõF|.Ïó—eQŸ“É$à_kYµ+?¶¼n¦i–¬9Žƒa%ûVÛ§·ØNAí¸uv<“MV²cÛ¶w}qiîëëcddP;®dÇÕlX®W'Úq3lx®Ÿ¿Ü¾î¸ãŽª}qµ}fês¹\àþ¿˜i´·« WúLfÇÕúãl6«6<Ç1¨7âsÕ36–ãW²ý¥lÇ ããt:M&“¡»»»ä5Xøþx®¶¬ããêèø¸sÆÕöi'½¢mBS«]ÀJD×0 ,Ë þ ·ú£ÔEµ¤s}}®†Qân,ñÖõ´?z̉‰ €Ï.Æ|jÇ­±ãjm™ ù1ûO&“%mQ;žòϾì¸lXŽ?Ÿ}*Ù±mÛär9Ž9æ˜ aï1ÇÓ1 ‡kEí¸sìx¦þXm¸6ÔŽÛol,m’c.U;{žÇ¦½¶Ðýq=¶¬ããê踢sƳíÓvÜ6BœišA‚C ¤T²a%_dÔËEJQCS©T«?JM”¦\.G¡P˜óëóŲ,\× Œ¬Z€r$<úX”bé¤Èår¸®;ë>‹µãÖØ1Ìݾ$@tÿè¬Úñt;®fÃйvÜN6 •í뢋.šó>3õÇÃÃÃLMMÿ¦¦¦ÔŽQ;nsµã™úcµá¹)@íx¾Ô;6†ê¶¿”ít|œËå8pà@ÓûãzlYÇÇÕÑqEçŒ+ªíÓNzEÛ„¦&“I …½½½AÕŠè{Žã””RƒÞ¸qcXR*¼4q9Íf³%ký¼òƒlÆœ^Ÿ/R]¤··wN×0‹ǃκÒ>¦i’L&Éd2$“Éšöét:ÙŽËí¶“ìXÚ;ûJ§Óض]ÒµcŸjv\͆¡=í¸ÓlXÚ?Wûšk<88Øô泌wŽ×Ò« ×6¦µãùRïØX>óL¶¿ít|,¶™J¥šÚ×cË:>®ŽŽ+:g\Që>-µãË>W'ÃÃÃAÒRabbbjdd$ø7000•L&ƒ÷GFF¦†‡‡K’Ÿ6I*\þx6¤íó}½Qí¯åJ‚ɉ‰©ááá g£÷éd:ÑŽËí¶“ìx>ö5—ë¾Ôíx6žšj/;î$žšÒ¾u¡P;n;.oO+ÇsíH=cŠ©©Ö_ÏN¶ãh›k½~ÚÏŽŽ›oÇÑ6ÏåêøxæÏ«ãŠöWt˜ú˜©©¢/t#e€¥´w.—#›Í6ÍÝÕuÝêr*)¥õìÓŒv5Éi1•»ž}jÇiS%še_KÝŽ[mÃÒµãùí³Ôiµ×k/‹ÙŽ•¹Ó‰v¼˜ÇÆ ¶_/jËiS+Yê¶ß‰6<Ÿýº]å4k|Ül;î!ü]‰3O¥5¸®‹mÛAÅ…Úg1¢v¼04˾ԎՆí[›‡Úñ¡6Ù<ÔŽÛ µýúQ[îlÔöÕ†’f›mÇ#Ä)Š¢(Š¢(Š¢(Š¢(J'Ó6USEQEQEQEQe1Ó6US£|èCâ”SNiu3¸çž{8ï¼óZÚ†ƒrðàAN?ýô–¶ã¡‡bÍš5¬Y³¦¥í¸çž{èïïoijeÇŽj?EÚÉ~Zý<òÈ#|ìcku3fÅq¾úÕ¯ªýiûi‡vJ;v9v’°“®”ÈyEQEQEQ:‘èüÊòÅç"fÉs™CA8§‚Pà’9“Svl9nù~ñ0«¸¯K(š•sã7Ò³m[ÉÜPð<7222‚aTÂqÌÒ#'"ãñ8étËòe: ؽo{žx¢¦k¨Bœ¢(m‡¬l˜@¦øšˆZ¡P&ÛŠ@fD^¿³4 WU(îëi`p5e6äGÂÁïøeïÛøbœUl³¬¢H›bÅ÷òÀÙozS«/±¢(Š¢(Š¢(Ê4$B(†?wÉηdN$NÑÏ|q›áâq\ü9WŒP°‹EÞ—íe‰8r]—|>O2›-qjÈJû‡ó_ýjž~úé’výë_çŠ+®àÖ[oåmo{c¿ù›ÄŠB›ëºd2’É$®ëò»ëדÏçÉ&“`¸®K.—ò,^uø0öÛßÎÓÛ¶qñ _È£{öK&¹÷ÿ‘“O>™É·¾µdûâ ¸å/ÿ’çŸ^Ó5V!NQ”–`v¸âfìDþ„¢—EÛáJH­îÍQ¬²çµ†“ŠfUy?ú^²ø7yßRÀ°ãû߇Ë/_Ðë«(Š¢(Š¢(Š"Îfñ±<·ç`þœ¬@8O“Ÿçƒü ×_=?ÿùϹrõjòù<çÞz+žç±ëèQîÿ¯ÿâÀÀŽãH$ø_ÿëñþÑQ.~â _½š72SS¤Ói …}}}<²kïÙ·7œu{/¾¸¦k£Bœ¢( ‚¸G=ØÄuXÂ8e%%Eèñ–¦rçnÑÙ˜ÀHñq×£¶º9Š¢(Š¢(ŠÒ0<Ï#›¾Lîº.…B!ðr]˲*n«Ìn• œg™ø)uÀŸS¦ð±ðç_"À™À@qÛò‚âlày~¬QµïðGò'¼ìÌ3YÕÝ …wíßÏ¿nØ@6›¥··—X,Æg7oæøo|ƒSO=•çï»eûöqèé§ùá׾Ƈó7Éårüð‡?äÞ{ïåŠ+®à_N?Mï{—O,#™Lï?W(J¥˜¸åLÓĶmÒƒƒìÞ½›'öì©é{R!NQ”ySÀw-–UéÄ£yÒdÅÅÅ_U1ê:“¢(Š¢(Š¢(í@¡PÀq,Ë"•Jbœa†ã8ð&¢†x™¦l:”¹áâHæ­NæiK3=¥„¢Z¥ç®ëúßc¡íŸ>øA^ùÊWòÊ·½-|±Pà©;X½z5ÇvuñÇç /y $¬Þ°=ù$éË.```Ã0ˆy©Ge``€'<ßZ·ŽÝŸú;á$Óäø\Žt:Íí·ßœæÂ³ÎbÓæÍ|ùÛßæž|ž«‰ApË·¾Å¸ýöÛqÇ‚³¾ÿ]6úáYÖtWt:išÄb1ß#¯ŒD"ä•3MÓ¬¯\Ÿ qŠ¢T%Z¥Æ­ðZ´ªL4þ_r³)Š¢(Š¢(ŠÒ8ÄIþ:Ž\õËåÏžB¡@2™ÄqüØ ?½~"‘À4M …žçáº.¦ia„’+‹^p‰„/ÉëòOD¸l6K.—ã‘Giõ¥íRøa1|O·,þÜÌ.>/·z±m;ôZ´mˆÅ8üÅ/ò‚›ofÙå—sãÀýàÛŸçyX‰bÛ °8ÉuyH¥R sBñØ…B!° b1‚ã$’I.ûÆ7xÛ»ÞE.—câSŸšæu÷ÉÏ}.x­’€ûå/¹äù\ŲX,¶¯ õŠoQTˆS¥©&*ù$›t¡»²¢(Š¢(Š¢( C¡PDÛ¶K^q `dÄO‚"ÂH>Ÿ¼ÎlÛöÓg³`Ç1 Ïóð<>ÎE/zSSSuÿÎ[oåu·ÞJßÉ'Co/x¿Þ¼9mE þ¹aÀ3Ïû:Žƒã8¤Óéª6Zëç*G*Ê5j¤ VáâÀ µ i™â_I/çâO¬óøë4s®,Ø0!nrr’/|á tww³uëÖ’÷†††8xð ›7ofË–-:íœð<7ú×0›¦è‹¯|ñ•Ü{{{1 ƒD"Ñ’q®d˜Ñ|™L†Š7ˆ¼ŸN§+§P(`Û6Éd²Däq]78¿ÄË5’›R\™]×%™L½çyär¹ @t¿¨’.ù’Éä´¶ víÚµ ×·ì8WLNYþÝmܸ1È!×3J&“!ŸÏãyÿþßÿÍ®Ë.£€ßÇXÀ®o~“·ÝF|Ý:®yó›õ‡vRÍŽÛ¥/V”Ùh§¾XQêEíXY,è¸bn8Ž&]ÆÛ©T*˜'‘Ëå‚9Û|œ=,Ëbdd¤$‰¼RJ»Ø±Ì«áᇊœeƒŽC<'‘Hp ”ÕÂ4/½ôRn¾ùæüÀ>ÀßÿâÀÅ_ÌÈÄg¦Óô ¤³YLÓ t¦rûq]—|Þ/ÿH$H¥RX–U·Í9|nÏ rÚ¶U˜·7›‹¯f”½æà{µ WØ>:=¶ñcSøâxÁ‰‹Y|oÎø‡3ØsqmUS—Õÿ‰B&''Ù¶m===~ ׈€Éd§§§‡L&ƒmÛ8mõk]ŒÊ_K¥RAì»>Ÿ'“ÉÐÛÛ¬Lˆ%È6RÝER–S(‚c‰ËoôüŽãÇÉd2%ï‹PÕÛÛ;í=QÍ{{{ƒv®[·Ž\.G*• ^ïííeãÆÁ¹ûúúˆÇãôöö–|ž\.ü@ȱ6nÜHoo/ÇsL·J¥Èd2¬[·ŽT*E<'—Ë7Ž\/ÇqèëëÃu]Òé4###LMMaÛvðy¤D±išÁçÉd2wb&“!—Ë•T×) Äãq6nÜH<Ƕm6lذ`vÓNv,רܾ¤!XPÇqèíí%sxåJ’¼îÇ?æÿ»àþæ¶Û°2œxœ'?ô!Ò–ÅÔØXE;v]—¾¾>z{{+¾¯´73Ùq³ûbE©‡vê‹¥^ÔŽ•Å‚Ž+j'—Ë‘Édèëëò¬À#mdd„ÁÁA`jj*ß²ÙlEϵzPñ­:ídÇ2w«FÀ¶éÃwÄú³{ï Ä(×u‡™òcˆbÛ6»ví æÏ7n$ŸÏsÝu×ñ‹¿ÿ{–}õ«œñ¯ÿÊ_<ø Ža0888ÍùFæêù|>Ð6$Ÿ`*•|{ây^Ð&Ѥ=ù|žx<κuëèëë î•rÍ|Q9ZÙtÞÎOÜ÷û|A-òÅ¿)|õBq.ºm¾øþ0¡ #ë¤2Fs¢!q£££tuu±cÇ6oÞÌk_ûÚà½ýû÷³sçN Tœ2É^___I"JñÔ2 #áDŒÊd2ÄŠÕ:¢mÚ¸q#¶mÆ! ¥ÓìííeݺuLLL"’eY¾’œÍ’J¥H¥RÄb±àfO: X%2ø¢® ½½½p•N§KòˆA'“I’ÉdÐyËû¢ZËñúúú˜˜˜ÞK§Ó¤R)²Ù,Ùl¶¢—„6ŽŒŒªtôG"‘Hžu‰D¢ä†ÃqlÛfdddƘèàTnXy=ú½,dÏv±ã¾¾¾`åLV:¤C¾'SŶ‰±tš<¾0ÿÚ®.~ó›qŽ=·èî]M0M“x<x€ŠX ¡Gb®ÿŸJ¥‚A‚Ø®x2F+ÝH,ªynFó\¨G^c©fÇ­è‹¥Ú¥/V”ù v¬,–Ú¸Bœ à “Éó§™¶žD"x UßvúõéTÚÉŽgËóó<ÜTŠâü8—Ë1<<„-KŸ8ÇÈ1ûúúˆÅbl>öXž:î8î¿÷^>—JñÏÿüÏüéŸþ)Wó¿¥R)«Vñì³Ï²nݺ@óˆ*Ïó‚Š©Žã”h怡NŽdN)¹Þ‰ñxÓ4ÑY*îZ–E.— 4”hÁ‡r*V0uŠÿjÑ蘺fÊW5‡) EÀ]JÏŠç\¾K¢I˜@ü$ê…ÈûBÒ?ÏÄoNÀdmvÐ!nÆ \{íµÁóƒwïÞMOOOð|Ë–-\wÝu8mEÖ­[„žpŽã011xe‰ç[:ž& AX¶6™LV ÒétjiÛvpl!jPåb„bD"‘˜~ǃª6à_¹aVJdÅ0Œ`ÿ¨‹§xœ –u%ö«’-"áL®µs-m¯ÄŸ7‹v°c¹ž"ðBèiY.l‰8œL&1 âÅ×(†¨ö±Ç^9bã©T*H¨Yž7B<íäû—±r¼7’Ë傪LâVoš&}}}¸­¾$³üXDEZ±SéÈ=Ï«9—…âSÍŽ›Ý+J½´C_¬(óEíXY,,öq…8/D+‘¦Óé@Œ‹Š&Á$bƒ,N ,ºñª[ö7ê äÐy¹¥ÛÉŽ]×ÅH$B‡ üPTш&''É&“d2 Ã(™+É\Ë0ŒÀ3-“Éð¯|çØcyî¾û8tè œ{ÅA¨©T½õ>N:æê«¯|#†Êv3±oß>¶oßÎŽ;JnŒÙJIziš&7nÄ4ÍiÉ Å;®š˜U~+¡xš‰Š[¾ïl\ ,fZM™+ѼUå:Ϩ…r…–ë7::J?{÷î]ó@ãíxïÞ½lß¾=Xa©…h¬¿xÄ …O8ðû OØ•³³ÅÓ§Óiòù|pïPaÿ™l0›Íâ8NUÇX,|&è¢å­¥r“$©•ÜwѰÛ\.מç122xòyž´1êæ-ŸÇuÝi¦¬"I®ÄòUžòý.¡ïÛ·¯!Ç«D5;®Ç†|ðA¾óï0:::';V7CCC|ík_cݺu rüF÷Å÷Üs»víâ’K. VÂ|/û}ûö±~ýú†çZ¨±±LžZ;>®ÇŽ¡¾ññB‘ÏçÉår˜¦ÉØØXÉ{ù#ÞB†aÐ××G¢( ˆ#F§ AŒ.¾Þ #ßaA7"Û˜øZ‡UÜVþ9‘÷•-}hhÛ¶;n| Ìi\„,'ülÇ…Èu–ÈÆc?ùIÒýýAñŽJóŠëå/ç~ýkþzÅ ~~É%<|õÕüüûßçô#GpŠiµ¢sq"µò÷¡ög˲¦Ý7s!‹UÝ_æÃ®ërão$[Ȇ„Æ'œ»ø-Æ ÿÚ¶‘00L#,°•cÊ¥ã2LW ÛýýýìÚµ«æñqË5 qíµ×ÎËóŒ3ΘsçlÛv’*F>Ÿ'NWTbËc¡Ë©E%Zæ¹î?Óq])Dð]鬭´ãþþþ9çÌ3ÏäòË/WñB)aëÖ­tww³gOK~uÐȾø¼óÎÓ$âJEvîÜI?gžyæ‚¿ÕcceñÓiã ˜}|Ü DH“T*ÕÆqá‘L&ƒ 2n)Çׂ·_'ˆ kàÏă͠Tp3¶¡FaŸWºå9ëÁŒìÈ9ÁÖ­[ÙºukÇØq=ãc‰øÉd2lþìg9ÐÝÍ÷¿úU,ËÂþàÉ'q_óš`Ì8¿ù‘aô$“`Û\`Y\@(b‰w[ô8Ñhµz"ךEÔ1%kg}Ï´¨1Šã{§¤ð³a|¡­@XuÜ|1ôÕ*n_Kí1öUç;v°yóæšÇÇ â¶oßNOO7ß|3]]]Áë=== Ï%>»Ñˆ‹|iâ©V­üs#ŒNTäF𼪂ÌÀBü`˜¦Y±ºç|heµŸfÚ±0¹ftU@ÂNKòçŠoÅPtæ{•äøíØ—ß_"²Ë=mšfÕû%*8ÏTU¨P(a²rýE¸“ÐöB¡„©oܸ±$\\ŠšÈ¡äÑ_,”uùÛŒœx•ì¸Y}±¢4‚V)¥¨+‹…Å2®Å[‰œ˜q\&sÉJ‚H3ÆÍBO6ðç¡ÓGX´|Ý@<Ü\j[¨ŸiyÀ˜aÉoß8WŒ…§•v,ឦiòéÓOÇœäΓN|'öWU&Zå ÁßK/½”lÚæÏç,>›ž-•U[!_(uX Œ±€o€â©Ã÷Në%4Ȩ·\1Þ·äXs‘Zç¿4†qCCCtuu•Ä] ===ŒŽŽ299IWW׌âz·áòδ¼¸@£‘P»N@Âu}=ä4Z\˜-‰åBÐL;.¯Æ#«Q¢yþÀï¤3„"Ì_„‚䙀qsµ·™¾«D"Q’”4z]LÓœæñZ~­²Ù,…B![Èçó !¿R(šÛa¡¼/ º7£/V”FÐê1…¢4µce±°Æ’ÛMDŠjŽåH>åF"^fEçPbÓN‰#ŽI©#ŽxÀY„zD¥q3fPI|ǢΘA´ÖŽ%ÅŽäü9ð»¿ü%ÿ}ÒI¸ÀqããAqê¸.är`š`œúŠW9ñ£s¹\.‡mÛó m8BWLA¼Ö¢¢˜K0áuc~Þô_ýàWœúØ©œÃ9þ6R¤¡_’‹)N·—ºÙ†H„vuïeÇ®ëbYVÎXmuÎ%ô„SÿNßÛ¬žÐÔV`F{¹…id_¬(­BíXY ¨+‹vµãT*µ`!z.¾£KiFñr˦’Ñ™Wg°Pvìy{ž{Ž›  ß^dN·ñŸþiæœpxÁoÓÑ(ÇqZ7×¶r¯”À\â¨ó„žl¡'œƒÓ¸áû™\†Ì?e¸ÿâûY}åjÎåÜðxÕƒÈ¦ÓÆI —µºõà8NH]E¸ÚjÎs Ô‡”œž-^ ¿Òq)ÎÐ Bœ¢(Š¢(ŠÒ\<Ï òó6J¤±ÍÁÏ ß[|lᇓã×ÓÅÇÅÇ‹}ünPZLB™N¡P`óë^‡ôáÏí‚â–5{ЧHúÓ4ûZ«¸ø¹ß¢HìµW¶ÜÂDˆÑé¯Ù®(œe³YÖ~f-†k°ª{UCš,×­]èH!Nòk¹®;Ï#--²Ù¬æÍk¢I@Å»³òÃÖø |Š¢(Š¢(вøÜáóÍa-yÞ{ñ”ø"J’Pp‹æiKPšk) NNJu\×åøW¾’0ˆ_K@ìd¦Â’•ò¼%“Iz{{›ïP.®A˜ 0*ÇD+þÿirÅc•{µ0}Ò›€®{ºè¾¢1žŠå×Íó¼’¼ís%“ÉLÓ£¾ûÝï²oß¾šöï8!NªMÚ¶=-ñ¡¢t¶mcšfšZɆ=ü}õõTEQEQ”™q]—ÞÞ^òù<sv>ðð…¶bÑFú(õr“"Ä)þµ(´ºmŽã8¬_¿>(Ê1c¤d<7òÀµ×Ç‘H ûŒÍ»ÈÜœ±™þEÇðoèë.Ó+ –Ço2]Ô«z9âöÓ’É$ñx<Ï …ù|¾ÄËÐu]2™ ù|ž\.¢žç‘J¥¦O"Ü¢í\µª6¾¦åˆkÊ—Ëù2ìBTìT”…Äu]’É$žç‘N§+®jˆ§þ‹Š¢(Š¢(Š23ù|žd29g‘B¢å$ÝUü=L©h¢IQ*£s•™q]Ž?ž·Í¶¡xÀq=묠"ªa°,鹚åæážRÐAü›¦ÿ&Æ÷„“m2„UK-BqÎ,¾g2« i6›m¸ãÕp±:íÆKB×Å3Ñ0 ÃÒy‰ædšæ´×D‡r]7Øÿ·û·Ù³gOMmé8!N:YÇqÈf³ê§t’¨Ø‰æ EQEQE©N*•ÂuÝ9åÌ*àë6¾¦ B[ýTÊϯøM0ç›mã|>(Ê”ÊQ½ÃuÝYsŒ7I†Xibjà‡”fðo$—ðFŠáOj«y¿ÕpÙ¦¹`Ÿ³<dÔ¹+zí£}Š|étº$¤µÞü%ĉ€!_HÓÝ2¥Ì×_`nÅ`EQEQe)âºîœª£öâë 2ÞNÓÖ…Û“š# —â¹6køn¡¶ ÅŠ¨ù¼Ÿyϲ,òù|‰´âTEL|Ñ,:}µ‹ÿ¤Yò7ªG% ã½Ë›»r/ÍXé¶F:Jˆó*5_Œsæ{ E„ëºAÅÏUçŸ?³Y™ahªmÛ³FSÕM‚ÊnŒå¯Éd].þ-¿™’ö5Фç3ÐQqÑ$÷Jmˆ ívC ÅcTðŠÿ²øbY%¬ÜÃ4¿’ ´B± O!²½A©8gDŽéDŽ•ìNjõ'¶mWu1ΣåÐEQEQe&r¹ù|~ƼpüH“T€[hd^§á©>5<<<{~¸ óBq>Š{h8q üö‘ãh…ÔòðUÙ®<8±R3c,Š|p ŬBÜää$]]]­ngÀRâDHó«r]Ü‹¼/ÂZô*Åð;Ç ü{.FxO‰VÔË!Öf Žã vYf¼ËïÃjµxÅ9øýƒtOqà‘W½ªIW~ᨴz'ß³þ€)Š¢(Š¢(JuÇ!NWœæÓ_ ¢"\30ñçnŠÏ½]] \s PY³š ©à¹ ©¸D€Ò ¨lÄ¿qªyÍI¸ª2/!n÷îÝ qõÕWÓÓÓÃèè(Û·ogrr’îînr¹===­nï¢ÂñØDÐÊÿšø÷‚I(ÆU²ý™Dœxäñà,ûXÔÅ_qj¤×©AÚýñtG/¿¼gjr̽“VEQEQ”¥H¥4E©â_õ‚k.2GÕ¼ü>·ýô§l:të²Ë°¨PÛ —Ë‚ˆs†çyAäT,kl~¸¨‡„ák¾H 7ŒÄD¶­4ѯ­@±2 ËÆÇÇÙ¾};W_}5ÝÝÝ€¯ÄZ–ÅÕW_Íðð0étš¯ýë-mì‚ÅH/â¥&“GŠ)”ÜQq-Wü[)¡h»…Y›4¾âq‰ ¯óàƒpRç¨Ê ^9åjEQEQE)%ŸÏWô„“‚ª´†$þV#}Î:ë¬êN2ù<”y»9ŽC>Ÿ'‘HP(žõ5!¡fåED|H}øÂÃDñý¨‘¡ò¤\™7ËÀnÇŽ€/̳cǺºº¸úê«btt´e^q…BÛ¶lš’’wM=Ú¢ÞšâÍ&7b-¡‡íñ [ËD…×Là±GmuÓæ…ã8Órĉû¼¢(Š¢(Š¢(•Éd2Óæƒ¾'œG˜G^i>Ñ‚šKÎ=—óŸ}¶ò›®ë{•Í-˲üaÃCRc”Šòø_\žÊ¨è¶€,eóæÍÁ‹ÃÃÃôôô”ä†Û°a“““-k¨ã8A§›Ÿç±„ÙüëÄ“-ƒßÁoÄ·Ç^üŽF:›ÞâßDñßþÁ0þª@_hiµØÒé•GM`ÃÞ½­nÆü>C7ãòE EQEQEQB …®ë222RâWÀ× †QÍ •HÎñNŸo6‚Çsn%ÝÄóü°ÔÂN]× Š54 ‡R¥üðarÅ(zC-(˺»» ^eË–-%îß¿¿Õm ˆzŸÕ»ÿFÂ<‚oƒNäýÅ÷øü¾ÇÚa~C“öÛf¢ÓËJ'èl!ζí’Cž0T¹ÑὊ¢(Š¢(вXp‡D¢4FÎAó,·Iü(Ç¥Îã?Îyç7ý¾>0 (KS”ÏçÉdüÌñŽã`Û ,}Q©`¹wœTJ­$Ðé$uÁXÐÓÓà 7ÜÀ† _0¸ñƃ†††8xðà4q®CCClÙ²%È5'¯Ï»»»ÙºukÝ–\ksi¥Œµ¬šÄ‹¯eŠÇ”|g³U†*‹“…°cÏó‚°Tq£OS{a E™+åvÜè¾XQšfŒ)e¡Q;V ­WxžG¡P()Ð s¸Ôq§]Hàϵ۽hC3ìø„N˜þ¢çMáÀE/¸r±yÞ¸ø¢Eô°åM°ðo¢rE[îeÀÖ­[‰Çã¤ÓiÒétP9`Û¶mô÷÷síµ×Îz°ÑÑQúûû§yÏ5BÕºhÎÕ³+ßQgð½ÙñmM¼0Gðs’I(é0µEhç¦ýª…²cÇq‚ÐT9Ržö+À¡,*ÙqCWØeYÈ1…¢4 µce±ÐÊq…ÌS7L{G#-E,ÂyN;Ò ;žxÑ‹¦Ï½m{ÆÔòâ E'¬ YŽ–n:ËåÁŽ;‚b QvìØ1-_\%úûû­˜Gn÷îÝìܹs^ u]wÎe|ÅN &”çHƳ/v–Âgl iÇ®ë«ÑpTýn”FSÍŽÑ+J3Xè1…¢4µce±ÐÊqE¡PÀ4Í }ž°Ð™:´&¾׎ŽͲãC‡M·MÇñÃR =<“É$™L¦¤PCÉÞ(&¾8’Á÷vÓ¨¥,ŸmƒZÂQ@Ä+Ë&''éîî\>çãê‹ÅjN)ôÕCHa{í¬ø+sg!í8*&;øÞ—…V`eQRÉŽÙ+ÊBÓŒ1…¢,4jÇÊb¡•ã Û¶ƒ…lñ„“ÜÜJû‘ÀŸƒ·cìfÙñš5k¦¿õèŒDú%“ÉÆyÃe˜b½QLÂj”*µœeò ¿¿Ó41M“mÛ¶•o˜£££Œ†=44DÿŒû<òÈ#ô÷÷—Äj µ„¥JÑašÓ´['3*ŽÓßßϾ}ûZrþzìxß¾}U·ñðûÅ–|¥Uˆ?òÈ#M?w=6üè£â8άÛ)K‹Ý»wsÓM7ñÐC5ýÜõØñC=ÄM7ÝÄÐÐP«.™Ò¦ô÷÷ã8>úhSÏ;Ÿ±ñîÝ»[yÉ”6£•ããzìfW¶íÀ[¨€/ô¨3O{“`nλwïîÈñqµqÅÁJ…"†Ø´aâ ¦OöËÅßDi8CCCs/ßø‡††¸öÚkéî¿Ÿn¸\.WÓAf¢§§‡o¼1È9·eË®¸âŠŠa°ÂêÕ«Ù¼ys‰šìyáM]É3ÎïÔ"a¨ÍùëTa«ÓX³f ›7of×®]-9=vÜÕÕÅæÍ›ƒçÒÉÖShDYˆŒŒ4ýÜõØðªU«8í´ÓJìXQ6lØÀ¦M›*.–-4õØñš5kèîîöQaóæÍ<ôÐC¬Zµª©çÏØX «) ´v|\ÃôññL¸®‹!á|„ÑNJ{#᩵"ýZ§++ÇaeWœ{nÕý=Ï#•J‘N§q]7»ž’h<*x$™ž.ƒ/Œâ×IžDBOO¬y|¼`Ïž=X–¸avuu±mÛ¶†4¨«««$¿\´bT5V¯^=-$Öó>®D´ÐY î׉ó¯¥F4²–¹gww7ÝÝÝ7>®4®ðìÆ Ÿ˜.\ ~-@Î …KÙ7™œ¾O%4Mˆ«„æÉRjÇÊb@íXY ,„»®‹mÛ k¨:ßê<²øÓ‚m”» ´ãµk×úªäá¶,‹X,F6[§¼ìâÏÁ ª'âË,ìõRæÏrð]2¥PC;"ùœ ïUÒšÚ64ÌZ©é3«o`ûbÏà`éꉹÃÉ¢j1]蛉aØhpÕ%šWM<ÓFð¼â{"´Ai6y­R±¤èGYlk"¼^©T(†‰˜fá5“¼sQæ^ b®?–™Œ/Œ.¢bŠ¢(Š¢( M>Ÿ'N•R›_KSi1ÂÕ¥âÑøØóÏóByRÁÂ(©y#©yÒøó²hþ#yOì¶¥$GÜää$CCCŒŽŽrðàAzzzèéé™»»dƒq]—D¢4ÎKRGAëv¡e6¤XƒË Âm>ïÿXá­œ¾¨6‚/`•ßçÒXÉqfF^󊯥 ½Ù_‘†(2ƒ0Í 7Ž=׃*Íb™<åÊ+¯dhhˆ 6åW¯»î:¶oß^sÖ…Àó<<Ó :áh ©VVbiw[¥}\¥Y‘DÂ/²Ð œm.¡¸åz³e ô–ÉÎâwìR1T>Ð¥«0•òÌ×7Ý4çæ½×J¤°E>†´*Š¢(Š¢(3âº.±X ›öË/¦Ìa!Ï¥Àã?Nu\wᢚÀ¿¸âjh£Šu‡± |O¸íÛ·sÕUWñõ¯;v°cÇ®½öZn¾ùæ@k5bÔ¢ Ðz{“ÔU€ â­Å²¬™=â꩘3WÄ“ |aÍÄÿet]Xa„«)qBãŽVúŒÔâ³´Á†/nµ;Q!®«^Š¢(Š¢(‹II$QG­žë)ó'Ai4Ûbfõ† á“*¡©óö†“ÔB‚\ØòS©8×Ö,¢§§‡«¯¾zÚ]]]är9®¸â ÆÇÇéîînz#%¬/ŠTçm%&¾Ñ)BœÒz—ªÆÑ\yJ½c­â¶üd«Â>¢@eï7·ØÆÞð½Sn:…“<¹Õߎ¢(вˆp]—#W]¥QP‹œ,áðc11yî¹¼õÜsýyž=ýÓår9r¹Ö\ŠÎåðÇpÑé0²(X.º»»Ù¹s'£££ØEñ,‹-[¶´¤Rª`Û6Ï;OíMYÄ ì˜ç[ù3‰ß)K a G•‚ÿ‹g¢«%ÊÒ#ÏÌ%ºäªvoˆX–!TÓEØî-î'^fâ1&K½â'©@RÅm£¹üûSòùޔߥ´êÕXñ8Qq}¤ø/G(ÞIüGa(zôo¢¸ô|^ñ½8¾P(¢¢TGŠ\§g×<Ë}/¿¯)_¡¢(в4°m›S?ô!þ²Õ QœAü!Ê‹#êÍ_·Îÿ,®;-G´çyÄb1,˘Kþh)Â-Ø ULËË_èé驘 Î4Mg¶ŒÏ Ãñ/y‰ê BÍÖàyE= Pð+Îû øå¡—K¥_²èD[;ne©!—ò[Nr&Š×ZôÞám˜°Iœ0NÆ.¾&žcÙâö&¾ÀÏDÿ^o:ñ2+ÎˬðÚ@…÷c‘ÏÍ\)°”.+Gö7ñžª®Àî{Ï>Ø3ïEQEQ"xž»³wõj¯,d½°—Å!ÆÙÀ™»wÃÙgƒãL›ëÅb1’õÌÿdlÖ€©£Ò^,Ÿÿ!žÃ+Wj‡¬t4®ëúižç{Ä Ô™QSÂÌäq’P0¬ïŠÒÑäðů™VkD0ËÆCäðG~aþ3)ÏíPê¹# %•pÝ$æ?*äE°Ê뱈Z¬Â~õ°?Ž¡€¯IzEQ”&`Û6ç_uO·º!JÓ5ͯ39Àš{îñŸxÞ48É‹ÍCr”1g•Z•ŽbY« (K©Ô/ÂAêæáOÀ³øzuU–*Ñꟕl"´Ékâõ&žqñâkùâkÙâóÞâ>ÑŠAI¼k³Ý{•„6­>¤(Š¢(%8ŽÃºÞ^Ò.1¤–TkâîÇGŽpêí·W}?ŸÏãºuTÿŠæ÷–¿Ê¢ #„¸Ã+W¶º Š2o €Xl~¹áDˆ†‹ ¢Bœ²tÉ3Ýþs„L|±NBDm¼lâ7HX!]|?A(t+Š¢(в xžÇC§¦QPKŒhJÛÉ“NjusêÂN}ê)LÓôóÃUðzËf³þûs%ªPj“EÅr€ññqöïßßê¶TÄqå+[Ý E™“çžKW½;;ø‚Ayþ&éŒ5tLYÊD‹H¸¶Gé`Å Lv+ÏÓ”V+:ªFC]Õõ_QEQ×uáì³Uˆ[‚H¦Ï_tQ«›R.pîädñ‰ õnµ c^õŠ[,â†nhȇ††¦UZœœdhhˆƒ²yóf¶lÙRóñ<ÏãEÐÂÊ­ÊÒ£‘vì8Ï®Y3÷…ä¡ò"¯IŽ«$-ÀUÊ“àyžŸK¯øãåy…Bd2‰ëº†A>Ÿ/ItêºnÜ×4Í`;ŧ܎çÓ·Œ¥"Y%r„U„MÂPը稄“–߈•BG;=QÉ"¢Ñc EijÇÊba!ÇŽãh¶•%L¸ó;ß7½iÁϵv|øðaÿmOóˆs‡\.Çà` ½½²ç­±HX°cÇÇ™õßlŒŽŽÒßß?Í».“É0>>NOO™LÛ¶g=V”•šª4‘FÛ±çyœñ¿1÷†ä)Í ¡{²T[llÛòD¯‰mÛ8ŽC__ŽãÏ烾$—Ëáº.½½½Á_ó'Äãñ@`s‡L&ƒã8xžG.—#“ÉP(Èçó …’v¤R©’ãÊyS©Žãžœc1RÉŽçÛ7“ ‹Fžé9à2øâ›àáÇ4Äs(J%a¬³„÷L‹ØK‰…S(J3Q;V 9®Èår$ á”g¡ìøá‡öy²–0âñcÛöœ¯¡8‹þäVª þÍž(n+î!¬ü'á]’ Ýdz%Ïâ_é‰Ç.žË%¬FØbÊŽÞ°ß‚GVκ*’D^ 4äjÛ­^$¨ˆSò·P(`šfàq&Ÿ9ŸÏcŽã`Yžç•cR¦[VDH‹Åb  V b%¯ûñ€âý&LjzËÅb1‰DÉëÒ¶t:aÁqÇÁ4MÒé4±X,ðÉm:¦··—ÁÁA\×ÅqÒéjÆÛþT²ãV÷Å~7!)[5wØŒ®– ùõ wÎIò¿ ‰×@,šë­¯xð*ºu(:¦PjÇÊba¡Ç¶móžÿw¾Ùêª,jÊŽÅOÂ0Œªó¼º#vÊ/4jcѰ`Ïž=@(ĉ‘ŠAÖÂŽ;¦%!ܽ{7===Áó-[¶pÝu×Õ|\ÇqX¾|y«¯S݈˜ ¹‹"ÂZ*•b``€¾¾¾@tÏyýßÿæßùا>Æß~þo) X–Å•\Éç~ÿsl¿s;ÉdÒwëŽÅxÇ?¾ƒŸüþO¸è·/âñz>³€_þû/q_6E×ÿîb²0‰‘5Âdå’¸|¤Ø¸Tñõ¾$þâRkZJ'çE½h®&'Á2ûŽÆ´ç‹ûs6Éyªpù/_Ðïk¡ìÀ˜˜˜VÊzF’Åk#‚§| —Ëaš&žçbò\ÂEMÓÄ0Œ@„ Ý&‘Hb„‚Y¥¯d…&ùQškòÒÙJËqe»òs˪Tô¼" Ê>"*FCc;…JvÜž rËK÷ÇÖâÀ¾7H¨ÿo,¾¾üWˆm…ÂJÈ û¯§† fBÂÌÍÀzQó1ß%¤è`¥cYȾXQš…Ú±²XXèq…ëºüâ„ÔÑGYPÊŽ=วŸ†+*¾ŸÉdê÷ˆ+§ W”ưà ×ÁƒÙ°aCð¼{޹Þ&.ºˆu]u§¹¯ Û¶1 £î‰¶„݉7ø“øD"A<'›Íâ8¶m‹Åð<+º¯àéž§ƒœW‰D‚§¯š«/½€gÞô ÆMÃÑ‘ˆÃçÍÏ0` € ÿöŽcÕV18<Øü‰Íz!Xð\ì9ö}pG½£üpô‡lËmÃMºÆô8u ô®‹_‹áÏΓ”~±1ЇyJ½ôløeæ—xÿ×ã'~ƒð‘[İwï^¶oßNWW‡ßùN¬;î€ÜãKÅÓP„kDò°Y–ˆnò/NkøK D‹IHˆ«xÊc˜^db&$/ž}ûZýñjâÁä;ßùάÒ¡WÀ×ÃB ]"Ý%ㆠœä_±óÖ'ÈývŒ„ùô\=ç¿ûݼéö'p.=…ÇÖ>Ï—Ožâ²ÂQ–¯;Ⱦ¿ÿ?¼ú­Ã1ÞÁþì7x`õ­¬>ór2É^rï½|ôøà¹çòÛ·ß΃\ÀŽŸþ”¯_|1‰C‡ø·®.ÎèîæÀ‘#\²b?yì1z¾õ- ¯xæÝwãW3cGÂãc{,±w¾3;xà˜üÙϸàÞ{ñN8ëK_Â}ãq.º÷©§HüÉŸà¤ÓAÔ½„Û&âqÜU«X»?_z)=Ÿú”ßHžÂˆ¨ëf¡ |ù<$~%.ùݲ¬ÚÃÜÌÐÐ_ûÚ×X·n]KÎ?Wî¹çvíÚÅ%—\Œ+”ÖS(‚~TþJéWc±XÁ0Œ`$‹Ú¦iѲØéºnà­-Û%‰ ƒ¤\ø×ýWÖ­[ÇúõëÛ¾X‚Œ-ËbëÖ­­nŽÒ&ȸbïÞ½­nJÍÈøXÆr¯—§®P–CCCضÝqãc½ìž|òɪó«ä|ÆkzSt ýýýìÚµ«æññ‚ q=== ÏGGGéšÅÃíŒ3Î:ç{xƒÚ"žoâ™VžWJ¼_dÐ&ƒ9É•N§q#êÍW‹ÅøÞõß#{ ;M :þÇsüõÇséêKáÅð‘·}Äw)à‹SÅ퓉¤/Àˆè–ÄÚDàêÅWÇðgxaŒ—Y|/FI•ÀKþýn»í6È^VÒ.ŸóOçÊ¥‚÷Êsp8;H@>“'‹‘NøëWr­¢¡Š%.)ÑëQ­SIŠr‘ ‡/âE¼èÍ/â³Û?Û k˜õØñ¦M›Ø¹s'™L†5kÖ„“êZäó"hF+AV!“É`‰D‚\.G6›%ŸÏ¹Ü ØßLw-|¿®òÐÞJôôô°sçN¶oßÞôv×cÃgžy&—_~ù¬â…¤Œ¤øW :Š¿ýÜs\¾o_9ûlÈçÉ%“ð¾÷±ò£å¹eübì.î}àËüúï?À;Ö|ï“·ò¥®.~ïȇø‹ûÞÃyü ¯þѽœ´ç>:îpÇ7¿Ê/9†—y/ã×ÇLp÷÷ÿ“Ÿ_s »àþ—¼„÷{,'ONò•+¯ä”ûïç-W^Éi<Àw;ŽG=–Ýu®ëòϯ|%SÏ?ÏŠË/ç„}ûx椓9÷\.xà~ñÒ—rÌÔùµkYÿôÓüêðaŽ?tˆÇ–-ãÇÃóïz+öíãco|#+V¬`õêÕl;î8Þ~ñÅlºë.>}ø0Ç?ôÏlØÀ©O=ÅÇúû¹÷¬³¸üSŸâ„®.>ô•¯póþýxÕ«xãÙüÍorçë^Ç9÷ßÏÏO;kO=•›ßô&6vuñœŸýŒÔùçsÃO~ÂÇŸ}–=þx>~ß}l_¾œ¿;þxv¬_é8xÇ‹uæ™ØëÖQÀÿ ÄœtöÚ×’‹Å‚L B§ÛZ}õ¶nÝJwwwÖ¢Ýíø¼óÎÓª”eÈÇó< ÃÀuÝ Jµ^²À ý²ˆiYVI ƒT*ô}ÑB8ÑÅ yÂ1‡l+ç“÷DŒ“s˸Bö—|¥òXÚM`šf°¸*ç“Ü¥žç‘N§q‡¿ýÛ¿åúë¯çÌ3ÏlêõŸïØXQ„NW@8>\×ec¼Q³=¥Ùºu+[·ní;®:>Îç§-’–ÿ&ÎHyw—p®¯´=;vì`óæÍ5!NVTäñþýûƒçуϕžžž |ÊLè_ßIDAT)bWWל‹ œwÞy ½@žç¡¢Ñäôђ²2#¸®¼ŸL&ƒ÷lÛ&—ËÉæO™+ ?RylÔþŒÇ! ;±%NqÆD˜Úoµ™oú|íø±N˜VAgFD³óðÅÚüï|¸øvq¢’Éø±ª"²åóy2™LP¬ “B';ä¢9s¹\Û…ñÎ׆g¢9ÎWî»ë×cŸp&ðÍ£G9´s'¾úÕœÕßϲ|„S¿ÿ}}ýëÙ~ñÅœý­ûxÝO?I¾ƒ'O\ÁSoæßu ‡Ïø7Vݽ Žÿye?›î»VþÞr‡Vþ9ËþÃá^{!OxŠ·ölàåGò?þcn»ùf>ÿÔSl0Œ°þËoü?½í6b†Á݇³æða>òû¿Ï'ÒiN‰ÅøÅ#6 ÜçŸÇ¶m>ÿ¢‘éïç5†ÁÞ‡Æ3 žºùfrÉ$žçqù»ÞÅÿÛ¿ûío'—Ïó­{îaòÖ[±m›‚ëòö¢Hñº¡!LÓäï>úQÎ?ÿ|ö^z)r÷ÝüõŸÿ9[n»[௮»Ž©C‡¸âK_âëÛ¶qÁ{ßËO>ùI.ùïÿæS§žÊÅË–ñíï}¾÷=®ºâ Näʾõãã¼Ä²Xuäÿ‹Ñõå/ó­ž^ðÒ—ò’ýˆ‰eËX·n+ð>üÜs<ùä“\ðâsçæÍüÍÄGl›K/½”_<ò©Õ«y¢¸ýæcåâeËøâ~ÀGW®ä»7²õ¹ç¸¢¦­«óÿð[ÒnvÜIDE-ÊÄ;,•J‚šY"jE…7 ¦ÀïËâÅÉptÂ]|êØòwbbÇqÎݧ–ö ³-dÌÆ\í@¶?pàÀ[ÓQ;îªEшX-ˆ@½¿¢Å¯¢)pä*¿Å“3‹÷«ü1Z¶‹ÞožçñÓŸþ´éצQvìº.º»ÕñGi ²ãƒB¡0Mˆ“ßçšœ!$‡‹Ìƒ]ü0ýyX”,?ztt”ÑÑÑà 6”<¯—îîn®ºê*¶mÛÆ–-[°m›o¼±©RÄ èM•ËåH§Ó …àÂÇèÍbš¦/Òmc,4Z–EïW{Yû¢µ¥'”8,qG˜Ö Bl°ø¸(°!œÑ÷¡6o²Èd2œó®wñÚ /l赕@y‡%U0=Ïò¬ g^§2_;>ñàÁÚ„8)‚‘%´‰2=Çq …BÉäJ*—ªðÖ:¢UZÛÑóp!ûâ> ÇÏÏÆÇ9úãc'\ð±qàýïçÏ¿òþçg>ÃW^‰û•¯à¾ýíØÿð~Ê‹¸ÎOÂ?ź?wYwž·[¬Z±Ê?°›^± ’›‚¾t¥¯:ôFxœP<ÿÀ]–ÜÓ¾›Ë|/`ñÞ66¾y‚”¨ÇhtQ&“É00<\2 {y2lûîÍ›aóæªUvËû½ë€î3Ïäî‡æÅ¿ø_ŒùÎwúoЇÀë_lï<÷\¸˜ñ–·EVLËò Y§R8ŽÃ›ãqîyÅ+¸ct”¬\Éê .àÚóÏgÙÅs‹_̱înìXŒøúõ¤Ói^÷Ö·òÀðáO}Š[¶ð…‹.âñxœw}ìcL]s _ýÉOøÛ[n!=5Åä[ßÊi=Ä#GX½ao<óLšèÙcŠFR^F<öå½\.üŽŠ×y4D(ù­‘L¼÷eì#ã õ¬Æ\ªE§R© %Ç\ îTjÃ|D¸Nc±Ùq«ˆ†G‰Ž*Ý7ù|žl6K, <*%ÜY¶•ã‹7§x‚æóùÿ¿½÷“£®óüŸ!¿C2I…‰Ô QRÙ Š€¸5Ê6kÞ…Ûãzv³Vït»¿—S9ÝÜuŸ{·»ÎntúÜÛàÆuÞÅ•uÏ)uW™˜Bd"!%˜’@H‘IHI˜ïÕïªêžî™îžžþ1óyæ1ôêêOW½ëSïÏëóþ¼ßþu$÷áˆÏ°@~?<1-ò\úÓ4ýHN`Œx'׈T€—ït‡ƒÖüøWËŽÇaÁïýžÒu¡Zv|âĉ‚¯—5þâáÈ­XG‰pÓ˜Y££££µø¢ááa†††ˆD"†{JÞ×u¹á¸íc›TžB‰’è5ÇqŠ 2CUP /”¼@"ßö„^“Ìãgã "åÿ:Rl Z)©T MÓÊÒé´œeÀ[*áœõ ;N$üè®»øÑO~2qN§ —7’¥CrþÒé4ñx¼ªçSQ=dÝÓÓC[[ýýý¾C]O;.džؽ{wÑhhÉExàþ€§ßüfFfÏfÕý÷sI{;÷ÿûüzx˜;GGi¹åÍuF-lã9xý£ÌüMcÇ㪟ÿœKžžÍO>É]Û¶Um¿’'K×uo)¼ëz³³š–“ƒnÂýàxdx˜ù<ÀêœÏ9÷†‡ùèÂ…lܸ‘÷¼ç=ìÊ‹œ¯åØqwwwÝ—¦†£½ÇaýúõôôôøÑkáIA;›Æ@×uâñ¸/$HÎÏüHs¨þ½½R©”_pª¯Ñ¨§}TâSLg$o`þÒbÔD|{—åÆòž\ò¾U"JK”™D¸Év2 /טLvÄb1Ûü¥ÎùBœL8‡¯Ë²–¤M’fñ+ µµ££ƒ_þÓ?ñó¹sUAÈN³Øq¾Ü\°e wä”9¶C'žŸÜ ªŒpó1ÑØ)Ìœñv’Ïdœ”ÖÖÖ²«Ø¶Í©U«*¶AÉFÉd2þ v‹žLÆ{?öþ¢Ñà;À›Wg–Ñ4ï»:;Ƕ-õ¾Ç¶!‘ð>+rÃxua¤-†‘ë6'¹5Ç{ßu½ýÊï¶,X½ºôZ¨sÀ ÅL$lÚ´‰H$⿹cÇFFFH$9¯×’W–/¯x®zZ4ÚJ"1ƒ¾4A•9 AÕ7ûødi¡¼ž¿\éºL†Gzˆû¯¸Ó²H$¾£&bNU‹}äŸÇñ<˜„ )ÓD›©B+KàÂùŸÄ.Š-!U©Æ!\ ¤dÐRƘGQùB˜Î’kM®…ðòÑh4š¹A>ÃpA²Ãäû;ÍâÃ*ÊǶmÎ.Y¢„8EÓ³~Ö¬Éí ?Vo<˜×ýIÖ‘°@fÛž«ÎDÒÑ‘+@%Þç pÿä R)o_ɤ÷YË D´|DRÑõ@4“}æYc±Àõz{sŸÛ¶'ÀìÙ3±‹ªëg]1Œà· ¥è¢ù®Z)Ù]$ro`à¥.™Ð×ׇišײ¶´´L&¹ýöÛ1M³¤µÿÕä•åË+þ¬ ®LÓäØsÇxë·ŽÝHÊý…Gžòœ&ˆb“ˆ7ï¢è!¨l žã(Šü í·Î#qž}é§ Zxékoo/ò'Rï¦M-.¾ ûåç¾Ì½æ½$ %Â)ŽD"Á¹Ý»9úÉOò?^{mâü‡2ÁÑC`ç:Aß(7F²"Zò«[…sJHŠ@"×”¦i9BZ¡èÙwXl o—/˜å/×ÏÏGZ(E‚ìoýúõìß¿ßßGWW€Ÿ‡SÚš?±´~ýúœhÙ ±mÏÛÒ´ oŸˆŠ®RØÙÙé'7 Ã?î…r´)Ñ­LÒx¾T±ŒRM®XW3QúY¥ N˸ä/'M¥R~~4É»&“ ±XÌ£ à ™Lúp²ü>QüQ%®)Àë_\u•Z–:S ½49‹^{mŒ%“«¥ÓsuHk esÍëz6{Ë>èìòvÝÓD‚…¿J‡»ÕÞÞ@xOLš¨ùï—"„£Ö °—ß6Ùn¼ïž Ìñ^†ÚÒÒâ—ö­åòÔsç²ìèQ(GŒ³,]]xàºh©/qi¿ h{V%ïrø’xB^~·(AáY¶jà vâÐõÌâV1¡šÈò¨FG*°uvvæ x›Õi;ºn]ùŠÂܧæÒÕÕUÝÈ…¢ HbëÞ~”¶«¯F¿á†‰?$  ë”=(–<“’Ÿ.<”jØáÄÚá>$\UR¨ÇÒºüˆ9éÛ$— <—è<‰äëéé!“ÉøÑ(àM®„ûˆL&S´‚ O4êýI‚TjòÉ…§!E©ë:]]]þS©d¨"ÝòX'¨Qwàù\ã¹!ã‰ivöýñ„8k‚χý6Å\×õóæWæ»—IŒñl¾Yý5Eíq]—Ó—^ª.ËF ,ŠIt–‹7 "ESÙm$zYòWÈʰXv?éÐ>%ÀEr“ݧø…²âL‡+‡®¬÷Q¨*¥k‘¥’Žã¹f²¤Ó4½cg$s—dBi¢•¦•&¤)êÇ€%K–Œy#¿bÉÈÈHÍ÷Ì’%ž7‰„/½:‰­‡‘ŒF±ÃÛ†U:\…§ ï3mmÞ{"ÛÚIÏIÜO®“öÌÖ mÀµ6l>œ 3³ÛJl¨í ­u=x=_*ãºcc>åóU¦Ù£f®ÂfÎË/c~ùËpï½oõÖ‰D‚={öp·~w½›¯PŒÁ¶m~ó²ËÀ²Ð>ýéÒ>$Â[x ìLrÈK¡HŠðL¢mÛ~¾µpuºžž_´/9ÕuÇñ½4Í ¯· ÒÃ!øétç" Bÿ¥i¸[—U¸‰„÷¹þ~oá<Ò½†÷¸·×ûÌËù.øƒßÊiG8G‡ Âa¸/”¨¾pî$‰@éêêòÊ,“è¹1gÃðþR© !G89È '“Éø9c±XÙ›ŽB•áÃHÚbHJcœ÷¥*r!’xbÞxíoð! ®‹¡g÷!ÀžON&2$m¸(BxEI¾è¬PTÛ¶9ñ¾÷Õ»ÍC –å§[².‚Õi`¹}Ÿƒ'ˆÉ=@rUï!X)yÒ£}½äøo’Åí/¼º!™m—MP +7æ–`þ´·¿#-Gê}4+>×\ÃÍy~–ãèý†tbÙsá/•L06*\u½Óš9‘H„¾¾¾¢eV FÍÕ‚§N3‘²¦Óüäê«Ùø­o1ÿ²Ërý@Óô: IÓ%±™’³H#PîÀ" ¾Õ'5¸Ä€#À[ø:ð¹"²ÑÉf) /Ž/Ü.´ˆ¹«+W€ g4to”—½–HäfQ”ÅÖò¼Œªx“&ªf! å»-+X(FJÒv9F2(ÚÚÚüèfgþèèø„rg9¦ƒ“v|Q¡ž„ó (a\×åúw¼{õj´E‹Æß8[ýϹG@‡}·µ¿¿6Ûí¥·÷§Üy§ÎìÙÏÓ×÷8Ÿý¬A:ÆqbhZñ¸›è4l;J: šÖ› îŠætCÒ-É܆T]’×l;ó¯ ûKºîm¹2DØòCý'š¥´,ø‡ï^G쎕¬¿÷{~æUÐíêòºCqÈDГ.2Hë¦ÓÓý’¸®÷<ï'“ žàìÙ—Y¸p!K—þ[_„“®[*A¥Óð³›~Ìû÷øü?à¾õ]˜ÿÑÛ&ÿ·BpÛÚ»·ôªPÍŠ,Ás]wÚÜŠÿX¼Ød~¦DF@p_t¼Õ]xýÀúìwÉ2Ž7PÓð".âE®tr—•&²ï‹ßf‡¶+@n‡ÚÏî?“}}œ‹ø–J¥ü¢ ™L†žžÇ™6“¡ŠÆgÍÙ³õnBs‘Á®üZ&^™ÊþÇëoÝìcÎD„³úØ ÖEhA$Œ?îÿ M¶„µ(“±÷šìûG¶4§wzáBÖ}èC9N“eyÑnŸùÌ)î¿¡ÿº ç1“Ñc&²TÎÓiÍ€M›6ù³_÷Ýw_Né^˲ؾ};÷ÜsOÍóÃ}ÿ¹çà‚ Æß(4Šº |‡¢`Q†|A!\¨!ƒ× ¥€§ÌòªÌåÞJa¼å†ùÙ å‹pP8œBD;¿Š-K¯$Š|Gþgå¹”‘Ljjùˆ`(bcx¿²ìçöïßïõT–ü.ÓôÖß7j%˜>òÿŽø-ét°I¸âL<î¾D‹°‘Çâ3ÛöXs,$ªIÔŽax-Ëûœ¦·ˆ étPFÄ ÙŸœji¯|wX7›¢®{I8¥ªŽhÈàéÙáª4òÛ £°ùÚÞ4ǶKÓ¼`ùݶ]Øäå÷vuyï‹("rx¿rìf¢Pù£^àô­·ò Æ_N"³žVöO’Á>Vô'áoÿæoùÇõŸañâ3,Zô$ŸýìM<öØ óçGøøÇ/Á0œl8ï³’0<,xY–W5)œ¤6ÿ›-”S£¡¹’¶/]£eîßļÂ.Æ® -eÙAþ6²¬!»Mƒ§žzŠ«¯^Áþý7ûËSeEjø7}úÓÌ©[·ù­•èZЇKé®åš;vìbÞýî '@YŠ.â!U›‰Ë·«päÂx«“­Ð~tÆF¾‰åâ îį’mt¼–™×P¦³Ûôã‰tÑìö)p{€/€&Q²T*•}.…²DÀ“÷$ #Ú®7ûš‹'Â¥ë}rj‡ã8¬_¿Þ/¢ Ëô]×%ŽYÖ®PL5ý££\Z¤ÚdÍßôd|¾ ±ŠŽ.Ïd´ûƒ­ð¾aôsgˆú+Xéw‘Ùº{åJb³gã˜0üÔ-lxõÕz›_MB5¶mÓßß_÷È{…âÌâŬ]1IŸÈuƒY×ð,k:íÝ $êè;³,"’1’}tuåÎL‰£ÚÓ3V°³¬ C¾aÀ­p‹•ñœFƒ¹x¯°Î…¿ráM&\¥éÂÚì>åÊEn=¹Á¶í9¢^xüر\Nc+ν8¯áÏH¨~!G¹ÐgdÛB3êB¡q¦Ìäk+ê kªð&FMì’ãNˆ3N× D¹Çy bÜðð0 s÷Ýwû¢\½È‰&ÊdŠVv“ü8ýÅjÒŠ° kÔ5rgeÃ3ÁÓÅ/ ‹t:ȧ—O¡e¤F6ÔåËCC|¶Þm©3dÇÙZ".iÂŽkÎräg·Ðjzù' °Ì×4%÷z!t}üÒÍÁ’·âŸÏ¼Éÿ®p$’ÿ›'0)‰î S( (¼Ÿ°ÏPlûü¶UZh¼}"?¢É4sWi‡Å̯}íäÄ;lPlÛfíÚµ<Œ×…æ›ëÁ¬Æj0o„þ«@¿Ì{?í¦Iz'¡££ÃÏÿFý¥PÓ}ph¬… &¿ŸrÈ—x<Žã8†áOZMºn˜wÙ˜™Û@Ÿþë#$jÐ4ÍÂ9õ +¯…ž8ÕF6½Î™3Ä¿û]:ë·è=ï<:_äÎ$î½—Øyç‘ÎnŸ9}û‡?$síµ¶}Í5h¯½¯½†¾r%öR Îà¥ØX°®¾í-o}ûàüóqÞt!Ú±°jú¾W°ç¯ †7ᮿöéË÷`ÿò½0{úº£0 ÚÙStý Äúž!õëë1Bêt½i1œ9MüÏ sõåè_?Îú[oÅ8÷KŒYƒÄ¼~})±ÈtAü¯ãØËVÓÿ¿qž@%óÀ±ÓXW`λû=x¢ A›ÿ8¶ý2ÆÐAŒoÄþÃ+q…µk1—-ÇY±ÂOߦÆ«¯¢?þ8öýƳÏzy½ò}Xµ ëCïÂ9}Ú-7fÓö¶‚6r”Ì—ÐóôÓho~3™ùóIüË—ÁuéýÒ—àöÛ±¯½€ïþâpé¥õ6¿)Ŷmň*¥h$\xáä 5XV°ôBœYY~!ÂY>…Æ‹…œJËòöSRA zzƒ%¢âãÚcO)jÙÊñ:h±`¶¥”á…~Oxv>ß¡‡;,¦Ip‹Dïåÿ¾Bƒ‡RÚUnéËp°‡m7­wæÌ™1¯Éx£à-«¥ú‘ÝBƒÇÕeNþ ­­­lÚ´©Þíà•åË!lØ⛇ã88Žˆp9SÇyôËñÂqS̢ѱ!KMÈ‘:ŠÂÕ"õtQÓ ŠnÜ8Ì[¿ÿ7ÿÃÍ5oO3iùK™ð’[Èõ5úúšWˆ{º¯æÎåa¼yü!]*åýöýû –¤f—£Ù¶Íá¯æöÏÞ΃C’L&ý*É3‰Ä³÷_ð¬ªýw†—s ‹aY–_a6YÈ–ª’\N÷EžšÉ7^.8):O"ù¥/Ñõñ£ýët~àÄÎ?ß_eiÒÕÒ‚ñÈ#tÞ~;Ñï|‡În ú¯ÿJêwž_HÎyõU¢¿øîEÛ»íÝïöþ?{wÃô‚~χY¸gáBt èlÅ= Î0h· ]׊ö)`¶×àdÐ?¾yxg¯Žþß[=(\¼ûzO9< üœ‡çB×*ôý±…@Ä!yõËð ¾!k@Äÿ< 7¾‚ù¹\‡jÌÎÐ`šAôß.¡ˆéаq£7>¹újïµìÿ…Æ,:@$âí7+°Å²\t|æ39Ÿ|ñÅi)Äy¹5½¥ý®ëbš¦á Çù«W——^–I„ïWÍFO†Xl|ÇÓ"¨$êT /ÃOã‰o²,^fÂCÚ2õ«‚[y”ŸWÇ™;ǦÉÞ-[êÝ¢Š8uê7ÿñû¿%•‚hÔ¡³³“={öŒý€†g#’ÇO1#™3ù]L§.dÝáÃÞÏ¢ v¨]]]˜¦,!éÂëÐÂ}L‚ ¬7\áe¦P(dI1帮˼sçr^‹Ç=¢«+{JÒ°ãŸwê˜Iʰ¢ùÚOðùßùÚçÏ'FzIVcø¹/q.4ùÁiÒ|lÝÇØñRÌ4.‡'¼È¦:^ê«iÑhtL¢äJ¼Ù‚p)ØDþò/ëÒö©À¶m:;;Ç‘Ažÿ4žÏœœ„k®!õ‘{è!:ø£Âõ׳'û9cÕ*lÇÁ¸þzâ ¢mÜè‰Ö7ŽmÀùçCßÏG¹6ûÚºu¹Û$X»jƒ*Š ¥ÁˆÇ/†‡ ¤ñü.ÀÑà> }°Ï’u«:^ø^ö5]¾Ç†Ðávr}*l*XîšZECN§Éd2þDu,S"œ¢á°m›³kÖL”É.ë”åµ,$"cË4Þ @ò‰ÇðòaêxcNÉ;.·×°È&U´Ó´fîܹ@*/× ‹pàÙŠ/RR r3ކârÈdÆ®ýÊFÇæc–hŒp‚á2ë*(•bÛ6ûÖ¯‡eË€ ]žûðslܸ‘¦\s«˜Q¼28È“wÞÉ;È]ÁßÑQdɯ }?ècSr-Ÿh!fĈµ¯¾Ýˆh}½«þâ{X M§Óþ½4•J\ùyB3æHLܬd2’ɤŸ¨>CÖG&}þù˜Ë–aá¹IÀzÿûéýÇÄÞ´‰ØÆ^Q¹ë¯÷÷§,[†™ÓJŠöÜ“a-P”@ºäëÀ«8gTM‡^“È )Xà†^?ˆìû²j"Mühb”ð9EÍ‘T-š¦áº.Éd²â\‡Š)FDs¹.%RJn°2 ¬âÑ fÁâÙ÷pÇ£wÔû×”ÿó]—c7ÜPøMÇñÆ~’h7™¬MpÄb›D·9}o’@\3PãËŠ,9s&gIð=÷Ʋž,Þ÷Š>!6§DgA18¯Þ ( ™)^\0‘w¯³ì ˆƒìzüzÿ ÅLbí¾}@àS€—ßU‚/ž~×Ó´ÜÒüKn3€ƒÜDó&ž=çtà ü%¯ßþ:‰Db3<.ÌÂ…ØeÛžjlÛÆu]R©”¿l¼êéN8vo/¯…ª«7;™LÓ4qðÆZöéÓd^}•Œë¢½ú*Ö™3ôãùÉq î\ŒM›|ÿyŒUË¡’çbDCÛ¦Ä3 É“ÿÙ/Ôü™=ä^òž£J’[/Qª?¤MeY¬_¿ÞÏu¨ëzÉù ׬CpÝÛx×»TìíÄëXdû®ì6^Loix"»]Ÿ&¨î"Õ‚xѨ½bQ ~tÕê}4ÊÆ¶í‚Õ&oæZr¾õöN­ç„þ²ËîýŠïàƒ^¼þÒ ˆ Vú¶"ËòŸÿÜ×)4 ÞñŽ¡òvCé3†Žˆ;::ÊòÇóÔ‹" #]×Åuݱɽ£3ÉÒ±*uàåì ‰e)Ä\ÇahÞï¿åýõn¦B1!û_{µÏ?}É%\š­ú4fŒg°t‹µï[ˈݜÉw§ŒH"çÁ÷ôôà8®ëúÕCÇñ—mÆb1‰„_T£Ùq]—L&ã®H½ò ™7Þ@?|sÖ,ô·½­ôIjQ§eÞ‹7àÎV ¦‹ ²¥q¼Áx8zÍ!7Ú‚YtQ ET+´êPRtûg‚ŠI"ây¸ø¢B©²Ô[D1I¼%ˆJuð–s‹ #•[¢׿¬Î‘ëYRRæ—Q¯/è'H©SHK•k[‡#-Gê}ÄÊfpÑ"ÖË7Z«t:.ž Þ9ê%Ú$òMéØŠ 8{â„ÿ8•‚x\bbZˆY»–·üìgcË0géèèðgür„8¹qfÖî+uÆ¶Çæ’]]Ɔ7mPá슦àà›ßÌ ‡±ï’KhsáÌzp÷èž8Ús”ùWÍgöïÎ.µ<ñÞö1¬Dã¥îÔu}L®´£G½p*)h`Û6K¦A±†T*E*•¢7›öÂyâ ´¶6Üùóé/'Á·‹õ"Ë–¢¡×Ã9ƒd Zøs²Jª@HRoÊ'BŸ1¢Ý&RÕìúŒÃqzzzÔRÔ|ÂמDœ& ¢Ne™¶àû "ÒôП‹w=‹è^Hñ-™÷xBO¡S3žÈ69½p!«NªÝ†'0¤ïuÉæ»Ì¾—!÷|)%pÙ¢E~”E&ãjÐ4-ȵ›²/Ec qË{Œ–O|¢he—þbe•eóB3L E ylùrd Ëò–ðù¤á;g¾Ã]ÿï®z7S¡(‰EÇsÍ‹/²èÏÀæhÁ9èG¶>ÂôAλ»92 ÔÃ[ÛØð‘Ú–eåLtéºîßwêݼIãº.ýýý˜¦é-I}ûÛÙŸÍçYå’Èþo¤Ç¼mREO¢eL¯oðד}Ͼ޲Â÷^ ©€ÜÛ;ƒ C®‘Lö±," Yx‚–\_1rÅpY1#Ë<ùÁDPòç“TÀᤱ+WzOÜì Q«²)Ë%ÂMld=žMDÉ=ç3@ULºî¯~J¥RD£Ññ'E”7ãih!¨¬¼r8±±êPufÙÑ£¸î؇#5Â]}W¯P48gÏœaõ-·^è}¡¹Û¶1LƒÛo„ EW ×uq]×ikk£¿¿ßŸÝýéOZïæMÛ¶ýJ’i §\Î"˜ì3„4ÏÄö%²M³ËòRŠ˜ KÖìÐöà økpKŠ&Ç þ]¯,mSvõ$šæíßq‚“¶í½&û•çatÝ{MÜCÇñöišÞ{¶´Ó¶½×Çû?ü=RéyºÐÕÕU¼B_£ 'DŒL ,\z;L:äˆv]Ø<îž"q/‚TD´',xÔ‚#Ú\ø™ wkðÖsšÆðÂWÒ™ÀxÀ3.1NÃÛi'Á¶…(ö™T*×€-+x‹ýLggpÁ@p¬ Ãk[ˆª?³©Ä†…6×õ'‡…VÝÀ —ÞP˜aLÆŽWs¨µ(èðÈ} ë:­ßn­÷OmhŽõî9ŠÊ˜Œd2ºúj^tQéQáj{&ÞrR-ô—çswuK&M3•ö³ìQØ/Ëé²4㹬FT”Ïdí¼*©]]]c‹—ÉúÜB¡’®8$âäæ«µº>ùd–RŒÄ%XªΩ(ÍRþuSS©Û¶Íyk×ÂîÝA‚Çr‘W¢–Õ E…T£?w­NŒ$Pöª¨788Èðð°oØbä[·n-¸ýøo_ø+.¸.ºhÌû?úÑ >l¢ƒäE`DìK@»Œ[yM£égÃEëiæ‰Ê-[¶pàÀV®\I{{{M¿»ßxË–-˜¦é-Ë"ŽÍ ·~}à4jšçÔʉªö “ÜQ¼Á¥äw±ÍÍ>ëzMnûå^fFüzy_VNÚ>ü¾Œ$/ckksúÇ_øÂ˜óÅ/R0‰òD„+JK‘›è‡äœäÙd\(çÖuÇ¡æŸÿðótzlPùŒDIËä^8h8Ýa¡Ïtuù6ÁóʘSrtŽ÷’4?g~;l;È?jYÁç øÜÉ“Ã\vYÇŽ5— pº£ƒ]­wlÒ9é<Æ0ƒúÀ™Dww7>úhÉþqM„¸H$®]»üu×íííÜvÛmEûâ‹/fÓ}÷ñ×ñ9BœôÕÿú¯·±gO¿wãŸ_‘Ì9½ùy•,Í‹hù$“A¾Y€b…ªÂùZ{zrŸg2A&´üÆÞÞ rOÞÓ´`EA¸,c‘Ïïò+žÉqi/‰°cǶlÙR·ï/dž®¼òJ†V®äµÑÑ1~¿É³]ÅŒ¢ÙìxÍš5¼÷½ïeþÖ­üäÏÿœËÛÛIïœ7&"Îu]þäò?aûÀvÏ)N£ 5ŒC³GÃmÚ´‰ÖÖÖºåˆ+׎/¿ür6lØà -×ÿð‡,üèGs¶wîZóçõ~d®÷Dç‚‘}ož½O6ÝQ3 ÆFÉ·V ;vì »»›5kÖÔü»+ñó—M9Ž36òBf¡Ã9ߪ•ýsðD‰Í÷™'!Ž„}Ö°(‘ÉT®›ë‡‘ÜŽäR[íè(ü™üÉóp~F)>"“ú⿇}ö|ÑAVØÈiÈ÷ÉÃ…P¿É*03öí!Ø&™ôüŠuë¾;5çz*õ7mÚÄ8uÊû!åt"A4‘cgb#\¥YÆS2¦Q¨·×³£üÂ/"¬D£Þv–‰‘s¾ÛÙÂEo,Ëû;ÉòÏ8·§]ùcÙ°=™fîóð˜2üzøq¡Uå®ë tàu!a¡-¿«ÉÏ?^Ä&Ÿ ×5‘×%íBø5ÛnÅ0>˶mÍåoݺ•o=ô79âŸ7×u&þBå#OK¶nÝʆ Jök"ĵ´´ÐÒÒâ?omxÙÒ¾}û˜wîœEË2þ~Ɔà÷àåo©±ã[Ê}£Ðl‡PjÂäüçù¯‰#"^xE‚axŽäÒ•NW¶•›E*Ü`ÄyçÍ“Çc+®‰£Žô“N½ľñ¨Ä†^hmåM/¾ÈCçL×uO„SCŠS©;À›_z‰¿ž7¶T¨ÊFBh®Æök·3ÓŠq©$@P©`Û¬>|¸´°t‚ó‡úX/Bt…7P×Ãù]: –¬Î *]M¦ð˜”g±mÛ+ÒN³8{Õ>9Á$‹D"™´}Âbš<?S|S)\N #„ë(H„ŽìSD‹pª»pÊA„°ÚÓ3vEn~ºÈü¡F)EOóß/ä³ç“¿ ¸ÔUÁùÛ\ý7÷–uJ«A¥v¼kp.½´|[Màû²«¥²²\Ñ(´µ+˜d<#i$åa<Ê•ã­åߣc±\;*åÜæoS(¥b>ùç?±6ù"Y)cÔB¶ŸZŠm_ìýB¯úý㪓éµcǸðÜ9Òi)¼¬ §Pd©‰·sçNvïÞíÏä LhàÇgÑìÙþó‚{r£ášüN:ÿÚ/¥ÇsW+D£C”Ö,ÂZ:;ó¹3O™L0S)³…¹7¹AʬJ£N+±a'×Á³mC qŠš3;¾ðñǼUM’Ÿ%,J¨Y>Å3;>ùÈ# HyQln«ôñaèùuè½pÿm2ãD8ÿ0¨ûXÅLʯÀ‹†{ùÙgÑDÁç¯@?ѽýq`+¸_÷%°Àp5èÉ~Ug§çJô˜LCñb¤•ˆVùï—R$µÐ ¿Ý*[®ŒJíøôÂ…´Ï™SòRiË3[”¯#ŒE$¨ ÷Æ.¶í9d<1^ ˆüqÏDÌ™)¿3ÌdúcgíZZNœðóÁJ!­XŠ…·š¯§¤Ý+¦15â6mÚ„eYlÞ¼™––†††H‹%/‚t ©T Ó41 ÃsäæÚ N³^ç^ŒBNÔDH¾>AB¦Eô“¼ ,÷íìôþŸlA¥ÉP© Ÿ;}š¾k8r3™Œ'Ä)5¦â¾ø•W˜7o{?wÁsô¾ÒËCWp{üözÿ¼¦BR (Êg2>ůÿå_8^B.°®ßý ˆÿwHÚÀÖq6Vߊ ˜¬oœJ¥øÒºu^hYÏäFtþÊïûètȬ'[d• ²¢Vž°•`™C5Æx±þj0öy#Ð?~ŽûFÐW46“µãÖW^áƒä=N§Ó¸®K²P$L¯ÿTþŸ‚.MݵkƒƒƒŒŒŒ‰DrÂ? qbî\ÎΙƒëzBŒ„Éš¦$¥ ;JUn òg& Ãûo&*_¼«•Ø0À‘ùóYvåÁì’ I;éåM™3NŠúR©»¿ú‹–-󗼋€ôò/³lÝ2n\z£7Ã7C£‚*a&Î8W‹Jí¼àÍ7ÝzkÎk9Ù\Ràü ´¹ž‡†×W«ó¥¨2“±c€C?þ1æÏ÷ä ÖuÙ¶7 *¡Íf΃Xø‘÷\9+Æ£R;þåųþìÙ‚ï¹®§/Ëj¨=O’[uW¡¨2“éçœ<É™¥—ú«ŸâùQži¼{ƒ`5ŸBA„8A –Âà‹/²hþ|2™ÜÙ #'¡ž“¬TåiM#äʱa€Mcøå¥þó}·µÆZ%X(êJ¹vüú믳xñbººÂ ýfÈýN-´¡ÏëEö]MMÉ.½ãŽ*î±|ʵãW^y…+WŽy]ŠÒù)‰Rx}q KGŠÉRIýíÓ,¸j×+'¹,¤’žâjG:Æ0Œ±y[NÀŸ„·¿Ú¿d_kÃ1—;’W(jÀSßú‡.¾˜‹³kBe\'‚”¤î|ȆÇ,˜kÀ+&œÉ.+½ð¼ú"<ÔÆaà7·â˜N ÉÕN*Ä{yˆK¥Â8¡ÿó‹]Ú‚—Kéb•Ú¯z-›æ+»_­Àv…Ú.Eˆ”ÀØFfË7¿ ï­}±†JY3:Ê⇂;ïô_K$‚Â|ºcšIUßU(¦šÈÏŽeÝìOR'‰Ü±ž ì'èL¥ØbÆÓ°BÜé ˜ÛÞŽõ'Aê‹=’4,+ß wFÅŒfÉ3ÏàºëüçJ„S4#?]µŠ›/#=øàƒÜdÜÄ*}•Ê“Uɤ·,¬Q"}g Žã•™½ªðgwÀsÏÀ?ý§ÐÆdâÍb+ Ä®ÿ˜¡ÿý¿ÑðVì¥É]"¬ç;Ðl1à:²Ê pOöÃ&°¸tüï,Ö]é¾^è}ÇÊ]èbNðZ¥]­Z 9u¼üæ7cŒŽúÏ¥Š® õÏê˜Z¡(……Ù%Ö²h/Z(ßR¸Ã3PAD ΫwÆãÔÈ윟?‹-SI”§hxÜðZ µÄIÑ„œ\°€5ÇÛÇqwÞy'«V­ªwóšÃPÉÌëN8OÁ»ð‹6¸»´n‚>ºå(78nèæjÛ¶ÿÜqœlÉN×u±²åÜÃ,ËÊÙG3‘ÉdzÿûéÄs‰{ñÌuðñ è]ž‹Ü…ÿ‡ëdg2»±Ì FQé]uaÁ™3ho;àEÁåˆp uY©¡ŠÆNÌš…m‡Ò¶äW É¿ÍÄQv­\ˆ\ä«Ë¶m“N§½'Íé7)f —=ê/­¶,‹Á]ƒõn’BQ6§.äåyyNF`átåN®¨ÿþ‹_D×u2@Û³pÇ£ð¬ÿ1H^HnR,%NLHXÔÊÂä½°ïæº.‰D0ÕÙÙ‰­Ä”H$Èd/Št:ío—Édèêêò÷ÕÖÖæ¾««Ëï‡2™LÑï—m4M›Bœã8èºÎ¹s‰Æn0-O;îê„ÙüZÞAÇï³YŸ}­@ÕS…¢Ö¼ØÖæ+ºá¼HM•îBÑØÀÁS§o’Õ²,ÿ¾æ£‚†EhX!îÀ…rî¹ó|uÙu]¯Zª‹2hEÓ0gÎ?q§iš\ùê•õn’BQ6gΜá™g–x#Ùåz¹Y«%cÛ^5pEíI¿¨“yzƒwü%<Ü Fϯˆ1-KD†‰DŸÒé´/R¥R)_K¥R9BXgÖXmÛöË{²¯°þ>]×s¢ÌÐzìd2é¿~‹Åü4Ñh”žl~Ã0rò÷ööz~aöóòØ4MbÙÓðcÙ.üXoÂ$O8À™ÿôŸÐC² zúÀúº—à^Ó H7bš¦º—)J¦a…¸EgÏrñÍ7^‡•Éd<‡ÂBÍ’(š‚×-báÎWI$àgÇ~Æ­}·A­IS4wܱý7ðÄŠ ^?:::rÎ[8_e1!N×õ¢"U¸ÏŽÅb~ÿoýýýþvùÆ¥nŽo¾oŒe‹ÅüÉ–üí ÆâV¯^]·ó\ ÍšÅÊ•+ýIÛ¶±°è¤“ O·H€ƒCšô¤¾K1ýhت©#Ë–±~½bïº.ÿåÅ$þ]‚øçãh—(%NÑø\xì/¼¼”•ï8ɦ%›p7ºðwÐI'G9ЦeE“ptÝ:lŒ¬_ÖÖÖ–3Ðãfÿ9LœCÎÆÆI?_q–ó”{ ºëÝ”ŠyáâÐÙ<#ü . í¿ úö·“øœ÷š)ãŒ* q::..66»€¯üè+lܸ‘'Nð¹Ï}ÎR‹'ã|êSŸbñâÅ ]9Ä>ƒC¬7†‹‹……™ô6zL÷‹À¢œûÉa¿ÓfÃÙ õnF‰µy¾µ•õg×p1^’{]]ºãž˜ÑSï†VëçÚèºîç÷‘Bò0ųËfºººü%̉DÂ,R©š¦‹Å°m›T*å Ó–eù"@"‘ðWÞˆ žL&q‡ŽŽÿ~Þ·ˆ/²ït:í·!“É‹ÅüˆQÉëçºnN~UÓ4ýmÂÑ¢ùÑ£q?é_®ðaÆØepMÀÁ+¯äM‡cÜ'V`’‰g°°è¥×÷!ôl'gVQË·)Çq|Á(ìÛ$‰œÉްh›¿Ä^0 #GTʽ„ð~ ½'ä‹T¦*µÞpœ¥í™gpn¾˜Ïú,??÷sV°‚$I:é$©%A÷îÓiÒÙ 4°7|É%lÐ<ÃMè ö¬ÙCæÃ˜I’ØÙŒÊ66ZŽóF^—AŸtê:º?`ÔÐÔÅ¡¨ó_Ö÷?Iwì£üÞîßúÄÂz¯7–!ƒáÛn±ÁðëŶ SŠ£b`(PQóOžäº&ñ7!>åŸ>ü§$HààŒq˜…RûÒBvëâNk;ýrú-²[«:¸¨5{GöòêéWëÝŒ’Ùwø0­ó®<á"Η÷Ä3±çñú^{\Û¡¥¥…3gÎð³ŸýŒoø0¿\ôK޽éó™‹á7þ¿ßð„4 #ÕÁ£<ê}ø&‚ÇeP©ï"þÐt¾ÖJáÄÐ$ÅŸÿÆ7xÓû¢\ü·×`D"¯ÒdĶ'¢ÔíÊáСC¬\¹€çGžgéK9ºì(#prþIÞy'‡â‡?ü!wÞy'_ÿú×ýÇÏ~œKg]Ê2–Ñ¿®ŸóŸÏ;y'{oÚËÁ ýëèèÿ8JÙ¥tÉÐu›Õ¯2d¼eç½Ün俌ð£¹?"EÊôî tÐ÷ë…÷ËÛwOî¾»ðŠŒH7Ÿ"å‰çqüvçÜ·PÍ{/%³ã]ÞÙϽ|ÇËU=wSɼ×^cÔ…;2_墨Kâ\÷ —èÜ(tÍ&4›Ld{¼¾.|?=xð ûöí㦛nÂĤ££ÃÅ$ZÑ4Í1Bfx‚q<áL b aÁÞ½<×s÷Œþ˜wÆV²yåfß ÃA34_d^z}Ÿ9ÜïŠmgÈì‹õì¿ñújñcÓŸØ+v¿/Õ6(ªCà qëlú“¤pø÷;þ=Ö: m¾F&ûOŒÅÅÍy,†™o(b|&&6^بl>Äx]ÜœÏË@3<Óm`ÐCOpã­€°(˜˜hh9bl+‚£å8ßI’¤IWÕÑÒщ œ…q·»"9ç§ ÇŽã¥öã¯ÜwÇ}<¾÷q¬YĈ‘ áÏŠ„_x~=NÜ\(ŠHlz<\Ü’ÎS!òŸ…„—Z¡¡•E•¿m±çµà‡®lžd´£§NqúÅÅg óÕ ‰»˜­&&&Q¢jò¢Œ8|2 F qKŽ-a÷k»ëÝŒ’9|Á6üøàårqqI“ñûNÝ·å°ÿ0‘K%Оž9Yoo/î wEÖ˜ „®–Õûh( l!®áBnzlÄÀuà¿&O`ñ(Ö',ìß°aI•,>àûTùR•ø[äá‡öųx<ˆòüÜÿùíngíÚµ|ý;_çÂUò¡›>ÄÁ‹°ŠUœX|‚;6ÞÁb`Þô‹æµÁãØòÎrό΋bÌ ]—Ë'y`[¦îœÕŠ-ßÜï­w+Jã×—]Æ©‘yú«õIgpã.1#Fr‚d†a_wס]¾Ðû©~Š¥K—ríµ×òdË“œk9Ç^öz~oH$Íêi)Ržà™×½‡Ç\ù×Lx XëqÉLñµšÉ?8ýÊ+œwÍy<«—‡ùJÎ{2¸¸Äˆab'N9š‚œ[£‰xFƇaÑn"QÍÁÁÊþ|»am¤Ø~Ã:‹ƒão7Þµ`aù¿EÆ’òÙðØWC#IÒ? c&B'K9ãljŸ# Ëo«ŽÎ©‹O•¼b¤a…¸SÀY\úžîã«û¾JæCLÌ ;èb„ ºZQmÉûé/¸M¸Ó†\#ÐÐü€T:º?‹–$9æsQ¢þk…D¼b$Hàâ’!Cœø¸ÛŠWŠÐÒE—ÿ&:®Òäogcû¿ÅÎþ‹Ï1l9–KïXZ•sW VíÝËüGÜdZ|å#_aéK±–YĉÓ3κ‘è¥Í*†kdŠuè…"6ÄÞ&²O¹iɶa‘S¢g«ÝÉ—ÊБæq4]pkÏí†?ë%½ÆæÊ­WÒÓ=MÖ=Õ MóÊÜ+jÇ - ¹åÔ,>–8Í‚äýXXD³ÿzJ\Ç—ÉdˆF£8ŽCWWýýýhšæç¾ÑuÝ_ö¦iZS.S46ß~×o²õ›ÇqÿÐÅ2¾ëïÓs&쪅eY~ÔP"‘À0 ¢ÑhβLwËUo¹Ê÷M¬„…¶8»ÜòS!ß;$°åŸ‹KoŠÄ˜,Z´ˆ¿8Íû–΢½¥•ôu]',%Mß—&–Ìæ/»9´áùÀµõþ•¥!"D¾~½ÔtÓfò~y啼ùºÿÆýo»-ˆ¼´Ì“γhs4ö´x¡ËâwTB¹ŸÓщ•˜Ì¹ÞãÈ),¬œà¨^z«vO“ –ðï,4Æ,4-tmfÈ"åyʼnûшå¬iX!îÀæqÕ2“kŒkXrÿÚ–¶Ul¸SE)B˜8 á~61¾Þ2ê—ꔾèPÍ‹­‡,¬’Òbß›ÿzŠ”/Ú…Õqh®¿s¯½Æ«¯/Ä\`b¦L2dè¡G9’(vL uÀ¥Ú¯tŽÅž—³¯jÓw¤¯.ß[ /½ôGßñ*Öµ?d{°n·&¿S…¢Æ¬>Ë,·¯œü1¼É³R¼L&ã'–¶m›h4Š®ë9¹Ô2%E­¸£Ïâ5ã2.O?È'b:ûÙ?é}:Žƒëº~.ªÎÎΜ{ŠßýÍ7&,&¶O×usrµ… Ñ4+Å¢ŠÂ¯ËäñL ™üc€Ùo¼Á‹Kžñ¢ç“Ù~°ØÆuîÏ]zŸîÍŠWŒA¢…° V­ýçShÿ…®³B×f,û¯Ðw”³b¤a«¦®øÉ^.8ð°AVg´c´dU·–„—É62áeÕd2QŠÅ%õX2XMž]º÷•5¬ø¯+p/p?û›ú7)fÎÛÞ†ý·ëè´¼%:(š‘õÝC|rõn6˜#$IŽëàÙ¶SÝNç R(êÁ¿ùêþ󛾃ž‰O¸ºa<,Ëò«@æWpÌ/ ÉãUe]E59üìùKŠÛ”µpÇ·O]×sìS¡¨7³_ÇÿÝ—½å †áEÃIÜPŽ.9бAõabæ,{)Âsà qG†ÚØ8áÏ>òg¸ºK‘•ŸuE£f¢YD é†}fñf]ö+Øã93iFK1}h9y’óö_ÆYëüJ E³ñ쇞åÜ#·ðµè¦¢Ûˆè–Édü¨žX,–SÅN¡¨'_Š]˪oÞÇ'bå'5ëêêòÅ7©> žÀŽpS‚›bªyaö.Ò^Åxga[K¥R¾­š¦™cŸ E#ñ8‹¸`ùì`rÃ?>EiÆe!ãþ‰ŠbM'viê‹+ÎãÏ\ÅpriC‡¹7“j[¯|Xåb`øë®›™³­aåÞ•\{×å4y 7Å æ­|†o;?†Ï‚æ4÷5©˜¹ºü5VÌ[A1wBDæx<®"ß Ë®·œåõ¯_G´Ä4]]]D£QLÓ¤§'ø—õäç/åÎçß’S(A"ߢѨŠzS4 ú£V¼åÖ®Xë燼b £õnaó i´f’×°q£ƒëÐÖxÛmo«wSŠ¢£“!S÷‡¥ùltJ©ÂÒ ,ÜÿVþÍèåæ¿P(™g—Îåíè¨YiEÓrécŸd£9?ç5×us85øS4:ómæbsÇݦ­­ÍîìééQé ÇÚÖbÞûZ8 €BÑ,ÌýžÆuúRLÓäÚ/^K“ļ4$RDO qSÀÈÈ;w»› ·7.„¾µ}~Ž€F¤P!†FÆÁiÖj—D¹6 pnø©Žz7]¡ð)׎ÿåôÍ\õºFÆÎ”°w…¢6”kÇO.!šWïÉu]¤¨+åÚñ[ÿ(…tµ®®._ÈØ¿C¯$QL?ʵãY×|ó Þãt:ícˆF«(ŸbfQÉ8ï‹—³ná2žüô“àB¦³oLLR¤šjµád©™—H$&‰H$üDÈÅhy©…XÐÐN²‰É(£M#n5S>»£¥‡×^Ôˆrmxõê½|~Íìœå E½)׎¯ÿå¿åê3«s’y+õ¦\;þÈG^@R_uvvÞò<5ðSÔ“ríØ¶ñí8Ü'G£Q%¾)êF¹v|rþIµˆò-B¹v Ðvð·¸dé.ùâ%4ذµé¢†Õ¨Þ,Ô$GÜàà CCCìØ±ðç¾¾¾qÃå“ Áøï¯÷ñ™6hhØØM±Œ¶©Ä†¯ºL•‹EÑ8TbÇç¬æ½ï<SåÍR4õÇW¾ê?VKP@%vÜ*\–J¥èííT5kEý¨ÄŽÃ¨ÉjE#P©¿~h57íZ Ÿ@¥"R”MM"âˆD"þóööö C>lv×÷È[¶l©w »{òÇÂÀÀĬ8z¯œ0ÝéH%6ü‹¿à¾ò?ëÞîjØÏdiûi„kºžTbÇ¿½ò.ºüɺ¶[ÙOc¶£^TbÇßüæ7ý*|õª Ù(v¬ÚÑTbÇŽcù)[D„«5Òÿ¨v4•ØñK/½äG&׳ÝÊ?Pv\žïݻ۶ùæ;žäõ×_ŸñRgºý„Û(…šDÄ?~œÕ«WûÏ[[[ÇÝ~ÿþý<~üqŽÿ÷ã,Y²¤M,ÈêÞI¿ð –/_^—ï/׎Ÿ|òIžxâ ^~ùe:T—6CãØ±jGn:ÄòåËioo¯éwWâüãç]ïzW]uU]ŽÔ¿ÿQíÈ¥ýã_|‘ÖÖÖºßÇ”Po;–óÑ,þñðð0û÷ïg÷ÞÝ|ÿ¾ÏyÝ [ÿ²&ÔÛ~…gžy¦¬ú5âÊ%2wî\Ö¬YâE‹êÖŽ•+W²fÍšº‹“'OròäIV¬XQ×vèºÎ¢E‹êv>NžûÙÏN8c¢P4 ʆÓeNJ進cÅt@Ù±b: ìX1Pv¬¨³FGGGkõeÃÃà ‰D”a+šeÊ進cÅt@Ù±b: ìX1Pv¬˜(;VÔ’š q …B¡P( …B¡P( ÅLef×ÚU( …B¡P( …B¡P(jDMrÄ5###¤ÓivïÞÍðð0‘HÄýk_û<ò«W¯ÎùL±÷ªÑ–p•Z·£¯¯˲rŽÅxß5UÇBQ:ÅlXÞkd;žª6(;n>*±ã©”EõÇ͇òÇ¢ì¸zÌ´cÓHšI£PMífÆEÄŒŒ°yófÀ«bYñx€D"áY"‘À²,ÿsã½7º»»Ù¹sgÎkµlGww7lذ¾¾>º»»'ü®©:ŠÒφ¡ñíx*Ú ì¸ù¨Ôާò¼5ª×ãšV”†²ãÂ߯úãæBÙqáïWýqs¡üãÂ߯ì¸z̤cÓhšI£PMífN½L­¤¥¥…­[·°aÃn¹åbÇŽ€g|}}}˜¦9î{“Á²,¿Lr¸}µjÇðð0ýýý|ë[ß<…¶¿¿Üv´¶¶NɱP”N1–÷ÙŽ§Â~”7'•ØñTž·Fµãz\ÓŠÒQvœ‹ê›eǹ¨þ¸9Qþq.ÊŽ«ËL;6¤™4 ÕÖnf\DÜêÕ«Ù¶m›ÿüøñã ä„/···3000á{•222Â¾ð…œ¶Ôº²¿ááa?÷Üsϸß5ÇBQÅlߎ§¢ ÊŽ›“JìxªÎ[#Ûq­¯iEy(;ÎEõÇ͉²ã\TÜœ(ÿ8eÇÕe¦›FÑL…©Ðnf\D\kk+­­­€7Sǹçž{8~üxΚ]Ù÷½JI$lÛ¶mLiäZ¶cxx˜ááa~ÿ÷ŸH$Âîݻٺu+›6m*ú]Sq,åQ̆¡¶öåÛñT´AÙqsR‰OÕykd;®õ5­(eǹ¨þ¸9Qvœ‹ê›åç¢ì¸ºÌ´cÓ(šI£0ÚÍŒâÀS4xàúúúضm¦iæä0™jvîÜI$ÉIòWÏc!!˃ƒƒlÙ²…M›6Õ»YŠ (dõFÙ±b²(;{<”7ÊŽÇeÇ͇²ã±ÇCÙqó¡ìxìñPv¬¨”zk&ÂT]Ó3ni*À–-[áÁô;èH$Âàà ¿¬‹žè½Jؽ{7;wîÄ0 ÃÀ0 ?|±VíhmmÍQi#‘ˆ¿î¹ØwU» ŠÊ(dÃÐøv<ö£ì¸y)׎§â¼5º×òšVT†²ãÕ7/ÊŽTܼ(ÿ8@Ùqu™‰Ç¦ÞšI£0UÚÍŒ‹ˆëë룥¥eÌú^9P###´´´`YÖƒ+ô^%HÂ>Á0 lÛ¼ÐÏZµ£½½;wúûðC&‹}WµÛ (Ÿb6 oÇSa?ÊŽ›“Jìx*Î[£Ûq-¯iEù(;ÎEõÇ͉²ã\TÜœ(ÿ8eÇÕe¦›FÐL…©Ònfœ' +EÍlÛæî»ïfóæÍ´··cY»ví¼…bïU›ñ¾«Úíhmm¥££ƒÍ›7³zõj†††¸ï¾ûÆý®Z EaƳáZÚÏxÔÒ~”7'•Øq­Ï[#Øq£\ÓŠÂ(;û]ª?n>”ý.Õ7Ê?û]ÊŽ«ÇL;6®™4 “¹žfŽŽŽÖû4ÃÃà ‰DÆ„RŽ÷^3·£’ïªå±P”O£ÛñT´AÙñô£QÎ[#Øq£\ÓŠòQv¬úãé@£œ·F°cÕ7/rî”7/êØL|fâ1ªäx(!N¡P( …B¡P( …B¡¨3²XƒB¡P( …B¡P( …BQk”§P( …B¡P( …B¡PÔ%Ä) …B¡P( …B¡P(5@ q …B¡P( …B¡P( E PBœB¡P( …B¡P( …BQ”WC†‡‡Ù¹s'}}}ŒŒŒÔµ}}}õ>Š&a``€îîn,˪wSrPv<}¨Õ¹´,‹zÿ\…bÊ)·ßV× ¢Ñ¨Äg®…+n šÅl”±_˜Fõë3“f¹–§%ÄÕ˲ؼy3ÇDz,¶lÙR·yhhHu¾Š’ؾ};Û·o`çÎlÞ¼¹ÞMòQv<}¨Å¹fûöíìÞ½»Þ?W¡˜R*é·Õ5¨h$*õ™§ÚŽ• 7Íà6ÒØOhd¿^13i†kyªPB\øÂ¾@2™dëÖ­ìØ±`Æ¢9Š/~ñ‹lݺ•]»v122¢f‚MI<¯wŠ)§‘ûmu *J¥Q}feÊrh4;näûƒB1™SïÔ›-[¶‰Dعs'[·nåøñãþómÛ¶±iÓ¦I}ÇÀÀǧ½½Ým×®]þã¾¾>†‡‡ÙºukÁö™¦Iww7###´··ûyµèîîö»¢9™ ;‘Y»%K–ä¼~üøq`b»êë*eÇÓ‡jŸËîînÚÛÛÌy½l¸}¼¢z”cõ¸·OÔo—J­®AE}ÏþêÝ/Nä3—C5íXÙpc“®›ÙŽýþ h^Q]ʱ‹zÜÛ'ê·K¡–× ¢>Œgõî'ò™K¥šv¬l¸±)t®›ÙŽùþ hLjaÓµðWgÚxnÆGÄÜsÏ=˜¦™ó<‰\Ë?^RÁBêíðð0´¶¶r÷Ýwû9&–,YB{{{ÎlÉxíkiiaõêÕ·)·Mà»eY<øàƒ´´´Ôö +ªÎDvR‰@0ƒ–?cVªÝ–z]UÚFeÇÓ‡RÎe96222ÂöíÛI¥R÷×(6\J¯hJµ‹zÝÛ¡x¿=]ƒå¶g¢kPQ?ŠÙ_½ûʼn|æR¨¦+nlŠëjÛqµÇ~ш÷Ec3Õ6ÞçTØåLÏ)!®–,YRöìAkk+Û¶móŸcYVIq±É´i``ÀQµ,«ê˵g";)×Fúúú`×®]~‡Çyà¦,\XÙñÌ¥ÔsYŽtwwûÈÀÀ###ŒŒŒ088H$™’ßQéýA1ó˜Š{ûdúíR®ÁrÛSkPQµìwʵ›ÉøÌÕ¶ceÃK-}Àé0ö«‡_¯h\¦Ê_­d¿3u<§„¸°aÆ‚¡›ÃÃÃukS$aÛ¶m´··³}ûvLÓœ1ê³¢4†‡‡Y½zuŽ]¬^½º¡ò£(;ž>LŹliiaxx˜xðlzdd„––5€RLK&Óo«kPÑLÖg®¶+n\Ùlı_3øõŠ™I#_ËS‰â*àøñãEK—šåˆD" û³g###ô÷÷ûj¯ÜÔ'sC/·MbܦiÒ××GwwwάbúQ‰Ýöõõù§ÌKÈs5ìv²mTv<}(õ\–c#ù3¼’ÌV^oVL_êqoŸ¨ß¤¥¥¥à¬v)×`¹í™èT4õî'ò™¡¶v¬l¸qï\WÛŽ«=öφ§²MãÝ3‹©òW+ÙïLÏ)!®Z[[˺·´´°uëV6oÞL{{;CCCD"u__ƒƒƒ“ªfÊéE=îíõÛ²Ìn¢}»•}OêÝ/Nä3ƒ²cÅXòÏuµí¸Úc¿Rm¸šmšèþ ˜YLU?8ÙýΤñܬÑÑÑÑz7b¦0<<ÌÐÐPÁpöîînå(e·ŠéŒ²aÅTR/û*ÖoŒŒ¨|@Š i„~q<ßCÙ±¢ÙŽëiÃã][ …¢v¨ˆ¸R¬”¶TÕQ(e·ŠéвaÅTROû*Öo÷õõ©èŸ4J¿X̆AÙ±bbÝŽëiÃã][ …¢v¨ˆ8…B¡P( …B¡P( …¢œWï( …B¡P( …B¡P(3ÿKc\E¾š½|%tEXtdate:create2019-08-28T17:00:47-05:00:ŠËw%tEXtdate:modify2019-08-28T17:00:47-05:00K×sË-tEXticc:copyrightCopyright Artifex Software 2011ºÅ´1tEXticc:descriptionArtifex Software sRGB ICC Profile †2tEXticc:manufacturerArtifex Software sRGB ICC Profile\~=Ÿ+tEXticc:modelArtifex Software sRGB ICC Profile1(‚¡!tEXtpdf:HiResBoundingBox1469x828+0+0‹QÊtEXtpdf:VersionPDF-1.4 G:xIEND®B`‚blis-0.6.1/docs/graphs/sup/dgemm_ccc_has_nt1.pdf000066400000000000000000007253211360743507500215270ustar00rootroot00000000000000%PDF-1.4 %ª«¬­ 1 0 obj << /Producer (Apache FOP Version 2.3.0-SNAPSHOT: PDFDocumentGraphics2D) /CreationDate (D:20190828165840-05'00') >> endobj 2 0 obj << /Length 3 0 R /Filter /FlateDecode >> stream xœ¼½K6»u¥9?¿âV ”ÁkpZ@•£})=0 d[F—ŽàêIÿý^‹·Øk3eëAŸlàä—̸’±I>\\ûþt.üßoøŸ'<ŸßÿüÓõu‡Üûþ„_ÿÏŸÊ×ÝBNiþŽÿ,1>1~î”>!Ý_õN1·yšÏo®¯«ÿ¯„ÏÏ?ÝO¾åW¿jíújýñÁï®ózпþôÿô'ÜöPòw?ñFæiîÏÿûÏŸùéÿüÅwú·8~ÿ ñÆs§+ôŸþت ?°°ÿð¯?ýšk ZRë÷û‡Ÿþáñ÷ÿôSøü=.ýÿ ¨¤Ìãþ¿Ÿîëó¿}{ÿýoI<ÜÏïãþ­/øÜ_8éxÌõоâÓÛPûÛßA ÷׺ùó½~Á¡ûæ?~èÌ×Þë}ýœ¿ÊÝ¿ÚÐèÆ[ï—Ÿ?þЫW>.?>®ÿ¿ü·×ù ×óùí¿ànÆùÿðïÝÕóà,9^¸­ð¹ëU¾ÊKyžÈü/¿}ƒ÷oî„“~ûóçþS¸®ÿüùÇÏoÿþ§ÿúÛþÜÿñ•ÂÓW»B«µä¿üJéW\)>éëÎg~ê_~¥â¯ô"æŽÒãÖ³"óŒ˜æÃÛNX÷€jÇUs|J+þ&¤ìÇÜÍ}}|d¥ÄãfLÑm×Wl×u§’L­ðÚ®Vä&~l­ô»‘ZÑ›ù¦Vþ‚hZ®ðõ\On±ò_5~]£½”òy®Oç8w™ÐÖ|…»Wië*᯼L 髦;=ì]þüe’\¦wHè`BÓi·CýüÆ\ÒÜŒýTÉˆŽ«ÇÆj{ÑsÆ­¤ó‰Ñ”3#úßý·ÿõÿ?þûq?¶ƒü÷_žüëÊ9âZáSrùBÛÄý”ê¯ÊÿôKþÓþùçŸ÷§?ýî÷¿ÿýï~.ú·ÿñoU§¢ùúó!•Z åÁ¨ìÁØ( =¤t„_ \ÒÓpõRú$!~á¥bü† hò p·{L ðžÐÏ_w@Ì@KÈc…xç^–¿pú>î»ñŽn¼–XÐÙŽ¢'ò°‡£‹øºGê-â°”„Ù.j¼Òõô²ú•«wÍPpóajk½ O”p\¾+å«åš¯Æ9ªÇÅ _m~¾b®¸É«Ì²'ó8~&¨>Döã2^báqx7ÿðÆ€áN£ o©à8Ü,Êðuá¸îÒ_K_¡â¸ÔÛåóUCÆYÓÝËÐ\*k¸¼[T ª¥´Yö<<îyP†×yáìuTCN¡à¸šËy\þJˆJžãsÇöUŸv¥ƽ”¯Ðò…‘›Â×ŘuÇ^V¿vƒ"TEF;»gQcky*n%f¼2:ü¦—á‰.ŒèñÉ¡¬|azkdYûÊxµ#@Fœ$ámŽÆRpq6–QCèå’1Ìë“ëÀo7äþkŒÌÊÇ•ðëÚh³,{ nâžeá«¢¡ ½ñ¸ðÕ?õg4¢¿2 j¶—Ìá׋,¸DÊ£Íòœ!FÜÈ3Ι|F›×Wm7n¶¶YÔð~ü-Êð-Õ§$î^V¾*Û Æ­(C%âý®Ï T>èl³8'šê1ö÷_ÐK›m6ò;ˆ|ö^Ô¾n4“§ÖÖ‹jà“—<ËP÷×:`dÿÂO±Ý£ºñUÔ'Ï&Poa\®bF©Xo²8'óJ³rj@džëµëîõϵ sêEñk~㣈±•9Ê"p6-ï2¥g=l%£Á"R· Ùø@ú´/m]øBKÁ÷ÐGxί„ë‹V3N´0Ê*âM -4²÷MuÜɃpÓfƒE+,øšÖ·Š²‡1¥7X´^ÆkíÍeÛHo®8%z.´‚0NYñ±¬ËW\.4„Ößþó Ìì›Þ¸É2Üuûl¯™Ÿ><¾ª†VF2Ú«;Ží^!e¨¼†Ñ^[ïèf{eÝÔ+>y «ø¸³½"n⣺žÙ`ÑÛD´’Ñ`&*7Æþ¦ÚS\¶°×K»£CY‹+Â"‚¤€'¿GÃkÁ¦Í‹2|ne5ʆ˜V„Å'°‰ÏjÔz«Œœ£Á"J ¾#¿‚^ôp¸2,‚>þÂnb–µ¼",.‡Þ ùh– ñ½¬Ëȃ¯ Á‡Oö•ë ±(‹hÄ¢—Ý_lŠ<]ćۊý×øúê ±,C׋7fY{Vˆ-|„§ÑPâ…wÞVˆå¥ÞbȽ õØVˆÅ[ÆGr§Ì¼Âh²=‚g ê(BOu­‹p{å=XVåf“EYŒøGSg:®ÙdÑcd~l3,{PÅ+¾ħòP¨âbqÎÂŽàîÍ2Ž¡ýl²(Cg+î^†@WˆÅ3 »¿ê,{â ±(ÃÐ`ǽÜhni…X¶½šVo4·¼b,Ñ *•Þß&£yÅXt¢7h{Og·yM½ŒOĪeOYAe%àáRÇa SwÅȦUô¿aÜ'FYuYJ ²ê ²ìœŸ+ÖQ{7ä³c,›:+2ÄYöìYËЮ&pcmGYD¦›ã¶g”¡ñ—„"`ÕþúzªkY|{mm6¼€q浃,zì›!{T[àW»ƒlaw÷àÝ2Ì[²™ãß+¥»WM@`]ãX¨'¢Oã0~a;Èâ^ßË0¶Š;Èb¼†qHÍwžemcÙýá=®V‚‘y}DZh±h}0Ì2 ®’ ²¬—9BWˆÏsY ¢SäåÇ+C_dñ\žQÛ¼ãXÄ–5-`>вƒlàп¬¯ÃްG‹åø·± o£Œ£¸=Ž•±qäØâÇ>üW´dY{V”å€Q–×ìeˆtï8¶¿À6ûÖˆqfÙãXD„¶§ö(‹áìWÚãXÞ ^ÉÝæaè³ö8ÖÝ&,{{–={ ‹²¼Æó,»9òÀÛB$nÝlÎE ÒÈrZÀŒ°¿- æÃÈrÜÍA`é#Šˆ Öµ²mÜsø8Ëž=E CCÁQ}z¼\#;ò\q(Ë É®èGÐã”{|¤(kïVÛÁX èU²Þ3ËIüŽ®˜ÊägÍŽ"GﶤâSæÙUvtÕªDY{‡°2Æ‹ì[÷–UÇÛ|0Æö]ɦ±W #ï–# „ÐðŒGÀgòaE âu,q–µw‹–sñ…´q\ûª{ËfK*Q:AˆäŽïMóB_ZRtL«Ò;„eÆ;Á@£—axp›èjB$ù{‡°ÛÑ3gæ,{öí¯rò”ro©˜X•=†åq¬á4ï}×â ƒtó^Ðî1,Ë¿¨:ΉñÕÃâ}–€YÍŒ µGÔÝbkEãzÆŒ‰UÙcX‚0³J{ ˪Å4t w0µ { ‹×Y8°¬}þɃöÖÝæÃy÷h´h—mS¦vtÅ ¡¤5±˜[¥=†uý)æVaa3—û¯…–Ѻ0cÝÑ(þW®9X¶–4}׎¹U݃ØÜsÍ¿M0gÙáU;Ì­âÄfŽº0ž¾æq WoïÁi„;”µ=ˆe\LuÑ9 ò1¾Þáe¨R <ûq˜[å=ˆå<Óø'ŒPˆ¹U܃XHòö ß2ÿ—ã,k{«!s«ºÇ°™3üóò°Ä64FZ‚yYv1¼]ƒÊæm0^”më /¿Q|y?|<äe…6®K %×?…¼(K²Ž#·q` ?òòã&c&·m¢¢r:!/ã` íã!/ï_Um儼Ë5ˆ¾1zj!¯Ä e¼<ŒÃÿ2Þ¦0^–ÝĺóV,ãå)1rá?žñâ8„ú­`ÐZÃx]-ãÅ)1`\+@ yù¦×PÌC^”]…½j_œSÈÛ8Tg›ìµC^}Âxý¹,ãõ粌—¡¤í•-e¼¾Ì0^„¢LTéÉxù:ÐíåvÃ,ãuD/ÎÉ%ò©-ãÅqT`Ka¼úVñò”„/ˆ°x5Ø+âõeñ✥¾“/A¼Ò%)áe?€³£ýôv'„—§L/hÂ별—·örŸÞ£ÑC„±€ „—‡={QX ¯v-JxÝÛÂëÞØ"¼l–×^ßU‹C0ÙkBxÝ- áeûª²-}<áe¯É 0t$Bx»n4Ë{|‰BxÙ7Ž'»¶ˆ×}Á‚xýå,âuJ/Ïy§çŽcéG¯ B‚x}™E¼¬9† zßxÙó¢O*yÄXA¼,Ã3çyŠxuئŒ—Ç]K" ˆ×ŸÒ’^,-éuoZX¯c–õºÏ@X¯k€ÂzYÖ¶À`±^~pìÇJ“ ^Ùõº+¨—˜NïJP¯Žóõj4ÒËÏcµZ¾!½þuXÒ{–½¤—1=mþ¢¤—·Â/¢Œx/¤×}úBzÝ9…ôòœÝÍ¥~%½.ê éeÊåÎ# èõ‡YЫq½.² èÅqx¯ ŽŠµ ×u¬z;°ã´k¼i½<%—ØF3Ϋ­O0oëóÄr=ƒT/Ìë^±`^_f1¯»AÁ¼î-åeÔN蝹¥¼®ÏÊë*[(¯k\ByùøåĶ”·sO.N…“òº2¡¼n !”W¿9¼®È2^}_‚x݃ ãuáU¯ë…ñÊl_/{ˆ„Ù\/³ˆ×|ñj0ÂË‹4¼š©µ„W£«^÷PH2Ë‚?Õ¯!¼TߟÿkbÞçƒ è/B½£µÐ®'꽸è–Ê7¤3ÒÀuRE/g˜ÅÉW êÅô¯$ê^â¢À/꽸hÃê³\A½·‰¢WGªèEº¯ ÜUÑ+5¬‚^™«ž—ÓÈ®åà¼}¾yÉLjœ—m+3T y§r^%c"çÅð3ÚpÕÊyÙÌÃ+ë9¯F•óòˆ]¹4»[Îë‹Èy—f(ÉšˆÛÊy5tªœWêy•X-9¯Æu•óú2+çE æ1ÄFä¼:Q9/ΉÚÚzq+çu Hä¼ÚªœWƒœã¼2‹T9oã‚ ÷\‹æ¾zÞÆÎw]Ò7¤·OE¹ð4(©Õóê@õ¼î”ô&^`­H© eéBÔ¸ A/ÛìMØX¿!½Â6éÅ»nŒ}× ÇBzeŽ©¤WÆ’õâñÊV†9Ô‹©×4j ¨—cƲŒ²ÞÈà³5è‹õrèûJŽ•õâeax?ùÖ«®¬/?U½Ÿƒõfê#qù„½2;`¯a`ö¢òs_~·baïC¾€BÒh½·à%p"¬×&¬—£4<ô«‹Ö¾ÆÂ×`¡‚zõ¥(êí<¬Q‚ñq¨×·Üð[Z-E½ºâ¡¨WAI/å)ÛIÄ‘^myJz]™^W&¨7³¶¶6LQoâ?ð$#ü êÅùžîíAQ¯ îê•®ZI¯.-)éÕÊ[¤×ýVP¯®ò(êÍŠäµgHQ¯;Ρ^áÍŠzí @Y¯â1Ïz-æ:X¯%ÒÂz]™c½6È*ë%#>ÀSP¯®?xÔÛPeÄ'%½:”PÒ+ÎõÚïCQ¯Aõ&îoâV¥oP/Nºô¾/ê Üp¸¬Šz/nb\]ê Ü;;iœ’^JiµãÔ¨ êÅQ(À 6~,êÕnLQïExñ „õ"°´Wòª¨W»?e½ˆ,Ñ(ˆ-ë¥üùÕ×+ë¥ô¯gè©ë¥8¯¼+e½è²+‘Ës*zYvs®ó êå~#ŽŸÆ®4E½‰½ù«†Ô+S+Çz¹¹÷Ý¥°Wé«À^%à {Ñ•=[)ë`¯F"…½ÂAíÕÎÅÑ^û+íU’-´W>9G{eRãh¯ÌNí•Iɯ¥½×¦½7þþ— ^ÜQnežô:i«¢^t´¸ë)Ðw¨ßJÅÄ!uÔ«2(1opsj1op²+E½‰“ïµÇΡ^éíê o+?X¯H ìå®Ò7X|Ú+aG{¹©#ï!*ó ·Žör£SÛ>J{e(æh/wǼ;µ•öÊÀBi/nóæËÌßÒ^3Tq´_8…É1žæ þ)íMŒ è*&À4¸×°=޽Î)â7¸W·â^­ŽH[Ü+]¶Ã½‚XîõÏ{ÍÔÎá^;zs´×k¡½J•ö:-­Ð^Åû^×k†ÞŽöÊ𣽢"9t½ö[öºÞÌMcÕ¡ë¥ À†B{qÆŒhl§ƒ—& íÕµ¥½´8Ø++A‹õ6n‘Ë CY/a(^ǤšÊzEBéX¯+Ö«÷gQo£9ÀÚhíPo#ÍÀ«ŸgÔ«%²^«åtªÞÆ¿CŽªzåŒ*êÕu1½¨4åe—¢¤WU¯z¹'nlAý|zͺžç¼FOsp^çÜvp^#­ôœWY®`^£“:8¯œQ8¯N•óº²WÔke^ zœ)éÅýÒ$¢…oH/ƒF÷IJNÕ›ïíDâU½’`|?¶*é ý¡ê•[qª^£æw¤WäÖŽôJŒq¤W‚…#½€éU´’^÷ºL”Ò‹ÉeÚ,ê•ýõâ>ošQæÏAzÝm éu—³¤Wö 8Ô+¡Í¡^ õj™¢^Ñ¡OÒ+1Á‘^é5é• àH¯JGzÝqõJ u¨W‰¢^wœ ÞF§§í©¡¨×• êÕׯ¨WB¬C½z/ŠzµjõÚð+¤·+è‚:7½*éÕ·©¤Wj[A¯V¬‚^wJ z݃ èÕΫïÒr^··Ã^mFôöCn´Ž<¾Q zÝž(½}Ž4”ŸK¸{½ÓÌ®.‡yù$Û¦N1¯<żÝËÓ„±ž¤œ—(¬ {ª§kƒ?§p^wœp^=Îs^{/ÊyõœÊy5À*èÕ€® —C4®kHôº2½îzzeè@¯» z5º)èuezõQÒ++… zõ _ WÛ±½NŽä@¯~nôÇ èuÇYÐëŠ èõqÆ‚ÞþÍ%2„{Y4lÐ{g@¯ÿ¼-èu{°Ðk_™%½þœ–ôvéZIaÛ>ÔëŸÁ¢Þ£Ì Þ£Ì Þ£Ì ^'£Ô˲øL£¨·…T–y€E½ÇaõúËYØëC¦…½þ•MØëoÃÂ^_ö·a`¯[ööëáZw(öú§¶°÷8ÌÀÞ~+-aÌû†ÞQРåÚDwÃÞ㜆öú{±´—e™Æ.u4hK{YVè†1¶ í=Ž3´·—ñ¯&s·¸·—êóƈÔâ^–µŒê3žÏâ^ßZÜë«Ïâ^_}÷úaq¯ÿÎ-ïu Sͬy+†´xð^WEÝ[ïü×ù7ÜFÝûK¬z1< i.¦|gáèG6¶šª…ƒ³w‡Æ}eËE=ˆTÃöU†Øç›q®L<8ñÝž%êáà³þ8ëáàÊÄÃe˜¹ÌSi/­Aó°(í¥õfÜ{9•öÒ64¼*0¡½1\´™ÜSh/]D®W’*´Wê”örd¸`7<1,íU7,¥½,E‰<ಥ½4‡je;Ó í¥‡RÜ’¥½t°ÂTrYp,Ú;Ý®óª›b^”aúñZÌKGŸÀU¢y ‹yÕ¸J0¯óŽÌëW0/o…‹ÜS/˜—žhì´®y9‹yÕ{K9¯ZT)çE¬i¯ˆ*ç6ż|<6Á¹ô"˜W}ŽóúËYÌëÏi1/ŽÃ¨ôž‘ÝÙôr†þŠJզי©I/¯ü®ƒ¨I/úŒÂðè÷ÇC^çèª.½h*¹ eŽæ¯6½zÎeÓ«w¡6½ÚðÔ¦7²FneøxÌ«¾HΦW_•ØôŠ«ž³é“,gÓKŸQª‹Ë zÝ·«6½ÚJÔ¦—Ä cƒ¶q®±éÕo^mzõzjÓËÚ(mQ}µéßCe½þœöòÙŸ-USØKƒ0:l,$ma/žï"lŒ÷ {i±uͲ½þ6-íÍo06Œµ¡½þ”–öòµ0×CøÆÁ.aÄÜQg)Œ ß88ðLo×DW`/-<Ù`§¶U`¯³÷ØK‹³Š–;FD{7À^›ö:¿;½|‘hÎËsV`/ÏI/ÞÉ ö²¬ðÓj§O¯»½þ8 {gŸÀÞÄiÐ@<ìEYüsº^m( {ý}ZÚK[¸¸$øÎ¦W>-½|ôÛ[Øë¯&>½Ú,Õ§÷¢’à^Ckõéåì ®Ü Ë§×ýR|zåÛp>½ê„'´—M(±c)ßøôª}£øôrÊïE{Ô§—3·½“Ai/?âô³ØË×xQ·TN]/åô#Ø›¸ ×–á–³ém$iô¬;u½(ËL#p-_qé½Ð]®å!a½Þ‰W\z9¶æØlXÕ.½ÖW\zÝqâÒ‹Óçš”ºôê÷!.½®H\z]™¸ôÕÑuzkwK¯6tuéÕvd\zʼn׹ô¢ó‹à«K¯~ÝêÒ«î êÒ«ß›¸ô’t-Ç=å½®é ïu>¥Â{]ÓÞËzC #Þþmy/ÿpHËNÞëÚ‚ð^‡iã²»Þ›h”—Gœò^œå½Ìž’9ýÆÂÁgy/¯ÇˆžÇó ïuvÎÂ{Gû¨»¹›Áò^šUskä´’ÞËØc$v½,º‚æÁ½îjÂ{7ö佉£L—çVöú2Ë{ÝÍ‹°×ß…å½½A¡5Ïm-–÷òrÌ4­ªEØËÃgࣙ¨]¯š2‹°×_Î {é´Ò3÷8»^}<á½îœj×KwO‰({9s/¯x_íz©tzõßj×Ë#¯âQ¤½œW?ï°ÚõÒŽuC(­v½îœb׫>÷b׫֧j×ëOìzõ8µëëSg׫¯sÛõN€¶ì]íz)œÚR2g׫î¬j×K¥ú–§9»^u÷»^n“}Õùj׫‰Ô®W0޳ëÕ$j׫•ªv½ôÔ4’Z±ëu>¸b×+n²Î®—LîÝÕ§v½(»¶»ˆºõö¹Ñ6]Q»^îÜ3r[±ëÅ{¯k¨{ým:»^ytk׋* ¯ÿ–Úõê8·ÞR¶›†së• rn½î¸àïà×ÒÞëWÑ^Œ9Ó‡Ú^vÒu-†9m/?ø¹›j{¬¸?7/WÞ˨"¬L¥½ºÙ\¥½b¾â¤½rÆm綠´µÑJ{ãWy‚§A°H{¡-šg;`o?g{é J{#³¥¼U¤½ºÏ[¥½zŸ*íµ[(²×ÉiEÙË)ÏöÒwÊ^=¥öâÊ×›I…½‘¹Yv>4ö:±{ÉV¹‡a*qEØ«çÜÂ^Ù&u{ß[N×ë®$º^'!]/^jísâã½Ôq‡¼3Ñ©¬㿺-ÀY¯ÕN«¬Õ„¾©¦ü­¬WÍÎÅÁêͽ‹ƒÝÔ¯º^•«®W̯œ®W$Æ*ëU'V•õÊFÖCÖkd|NÖ«¢l•õªðÚ™8˜Ý‡‰ƒ±·s²^UI{¿^{9Õõª—ïÔõ:·[Ñõб¨Óõê ZàK“rô=yå°´À÷({ïQd€ï7eøeøz¿t |ã ðõeø~S¶ïQf€ïQf€ïQf {}™¾,Ã7—i‡öú2kØëË ð=ÊŒa¯/2À÷›² |2|]™ß£lßã׆ø~S¶‰ïQfˆïQfˆïQö_ÑŠxfбÀ÷8Ê_˜á½¾ÈàÞãŒ÷ú2‹{2ƒ{¿)Û¸×ÝŠ¥½Ça†öe†öe†ö~S¶iïQfpo¿Í»>+ÁŽÁ½þ íu¡Ahïq5C{}™¥½îröúßÖëoÁ²Þã2†õe†õe/ë=Š ë=Ê ëý¦¬%`¥ÌÀÞ£ÌÀ^_faïQf`ï7e›öe†öeVÚëË í=Ê íý¦lÓÞ£ÌÐޣ쥽G‘¡½G™¡½½(t {Ç^œ¥½GÙ÷ú_[q¯/³â^_fŽgÙ¦½î1°÷8ÊÐÞ£ÌÐÞ£ÌÐÞoÊ6í=Ê íõe–öe†öe†öºÎ]hïqœ¡½G™¡½G™¡½G™¡½ß”mÚ{”½´÷xC{à îõe÷úÈhq¯¿ÞÀ½þ y¯¿’Á½G™•÷žeM²öz/î=³ò^óVÞëOiå½,ãîý!ÌPy¯¿ž•÷º2‹{2+ïõeVÞëžAä½þ8+ïuÏ'ò^Wá"ïeÙSWa•÷úëYy¯¿ž‘÷úì¼×—Yy¯»M‘÷ºãDÞ{–=ß dEÞëˬ¼—·ÁþkZÛˆ¼×gå½®Yy¯/²ò^_få½þrVÞëˬ¼÷,kv,[ðiLm’Ê{Ýq"ïu.ò^œ•÷º-ò^/VÞëÏiå½¾ÌÊ{]+y¯?ÎÈ{}‘•÷²Œ:ñ™€Rä½ú*ïÕsª¼×—ç¿øþ•æ½ÌVv×¾÷ñùb~ˆa$Ð>¨/½A–bL©/S¦°’° öÍø»p¯ì[‚}É5¨šž„M°/Ÿ3¡¤bßÊ®‚¦áÐøöÚÏ›íK°/º4ÌqnîÆÿxìËÎ3îíŠ}™{°ËÑøâ3h¯!¨j|Ud'Ø—‹Æt› †ûú2Ë}5±r_T%ÅÓ¤L¹obÎ<_»–mÿ‰Skà„¾4£M+eªúd=ϲ‡U¡/£†žñ@¿ýVè™;$Nç{3ÇÆÞh©BßÞÓ׹̊B_¦´å‚éØô©B_暘©@þè…¾4ËÏ+ÿ¬úâM?[lD¾´Ã1×ÈÞ®"_Tãà Õ|6+òETE¾Z©*ò•7©_|6\ë-áÌÓfî~é{ÝG-ú^~Ô9QFÛ}/Úš8âËã"³ LwmÑ÷2`®´ŒB­¾—ó®ìÏdX*ðU%¥ |5ªÀ÷(3_| ˜,oEøâ⓳¯M¾&r„7^™ |9LìÅϽ¾—sžçØé{iËÙÊв©¾—ó(Ê®Óω~mÞ¦è{)†@ Ÿî—ªï¥À•òq/c2Ã÷q=«ï•(¯ò^ Oªï¥²o¢úFßK‹À'Ì Nß+—[ò^ý¥¨{)àÞ„vº9¸X§ê^ ï*î¥Íôó,)§ˆ{ñBÇ?žJĽäEÜËæÃ”As …Š{õNDÛK£l|¦S.,Ò^}6•öji/#Ñõ&Žz¸b:[]/ñæMÓ‚Zu½ºÍNt½n÷‘èzÙ†¨V¼çåD׫ÛU׫“ Ñõê^Ñõºza¯ÎsTØ«»_œ°×CCš“Ð.ìÕæúël{ÿðÓõù»ŸÂÚïççŸpÓý§?öŸúçÁÂþÿþô/¿†%“y}D;þŒvø»ÛøUøú—]²¦yÅþ”ë v/Ýù˜ëÆwóÓÿ›ßAæàzÞÀüùÇ^}PÝ70ÿñCï`¾ö^ïëçü.Qü è—Ÿ?þЫW>.?þæúãŒøwî#ö¬]5àB CÔ,¤ÝSÃÈ[ú/¿¶à¿¹ñÿ úŸßþüù‡ÿ„Áëþüãç·ÿÓýí_z¥ÔCjÀã¦ç/¿RúW*=B½§ýK/Tü…~@ˆÜaùoÜ\V(þ!Ò<#†¤[m÷£úƒ›ƒrfV'Dv7!e?æn+Äx(7cŠ~h;fqúGÔ ¯íjEnâÇÖJ¿©½™ojå?<ÓÌ>¢Äà“ëȜĕ'ãVóy®OçÇ7w™0 /¯€©Üq•¶®þÊË4Є¡/÷ÇýùË$™ÿàæK5aÞ§äBZt…Šù±»ÿÓ/ðOøçŸþÝŸþô»ßÿþ÷¿ûùßþôüû«Âöç/Ú—rÔq9p%æ9ÜùE¦™tãîL‡Ö©B£º¸H0-Þ¸ðþK–[̸Œ6‚ûð÷¶=)c¾ñ'ÜÀ=sùœGÖð.œ±¬mDæ„s'#Hå2Úî ä¶ø±«9•ûëz‘2w’Ž}‹({¬w׿ÖÜ­ÑFpe>?3ã]*ÑJ#Ä4¿§EÀ»l×G&Ãïîëz‘²,°ì•FD2‹{o¡,ÅJ#nòàå…˜ S/,ô]þ—ÇP*‘F×¼’$.¿/¬Iq –žYd¤œz?+£LâòûÆÊ‘"zÏÌ͉Ëï›+ÇLD¾ò&.¿o®Œ‘ö4~ëŽ83¤™[óæ&5þÖ(#xÙ{Ù±¤š¬2‚ËÇym;ÄFÁ1=¹ý0ïHµe¸o˜<¦Z_e¾Ê…Ÿ»s e„,^§úXi^*qÖåš‘FÐBµ;6ðƒ‘Fô¥¼ä¹ö¹­4‚ËÂÏòîaÙ+„´ø reÁj#(Æ‹Ë.=Ñh#µ)mÑøô$«àêÓ½–“à7[Æqhvž²m—`ÂHL;Ë^mï‰ái1ŽG¯Fá{Œ6"P ¦®q›íÕFtŸÆ2Í\CÌFË o\㹺L€e¯4WÛKk,»4‚kVL˜õÌã‚‘F ·½G9a;ŽHFáZ,“µl¸Ìæu3ÕûˆPÜö¸á2ŽK¡%¼êÞô¸gpÓe‡ O¹Ÿ‘7©§ÑAvº ABb¾ƒM—¹¨âr”aÙ«ðÖŒ6µ¸Ä9,'2MX7^ÆqÜ89¶Q FÁ꾸?¿ô¢`´¼×Ç[Ë^mnóæJÔLå+ª šA]™Ëï/^w›Ìå÷—cÿÊ–ƒ z£Àõ06XëG,{µ‘=(‚ó܈ïFÁ 3&Öq/ÑFtŸÌüÞK{µU’ÖJVFgúj#4L±ìG06p…4µqÜmÄÝu4//-TŒGðz\½Ëö¨ß!Žà§Ïå¡¡ºÃàΈ#ø½±§›ÐYöŠ#ظÖ\æq âe!žõ0T2†¾c/p¾Ñe ¡àÝúÏ så}4¯ñF2E¢qÞáC×6Dã«ù‘¹ô¾sÈole¯8‚?M„ÆÞÜ̵÷M˜y\í¦³ý^ÂmÄx“é cLIJÀÈCÖ^}ÂûüBÇ(âž‘ônuä1a"Û~@Ö`£90›9¯åц"Wƒ†¾1ãÎ^mÞ&n Q!Û,Fϼ0Iñâíq¼wf aÙ+Ž`CAü™Çè„{ÆÙwL™oW¯ìSÜBc¦s.:ŸÐbÚX×£d §âkï’/º~:,{Å‘tû*äÞMí0› ý×®ø¦¨ÿÃõ[žñ„kï[Á!¥¼uÞ_6êˆÈ…³º¬ X†j¶XÕd®¿£™Œ+RÄLá0ø»û) ]áÆ ZüWä:Do±‘ã¹JŽ?ÊšQGDVé=]6XôŠ#bW]®,^x¡Fïãʦœ»ÜnYœòfš€a(“»Ž-Í‹ \è§õXN‘+³ÁêÐ’e ád´X”%ú´ŽšÔÙ`©´äïPÇå²Gð­Å–#Žp5ŠöôŠ#øxt¨žß*]Þ‘l¡‚úžÂ©Ì% ŽPzƒÅq‚,û”L_·þ °Á²Œæ‚cÓ{æB<ç–l°}b~¶0VRfƒmæBü;’eðÏÝ{–½ WCôÕí#YÊž•ÄÕnTþtÙ¨$ cv[U™9qYLÍçád3­ Ë1(BÙΡQ|ÍQçÝÛWþ¦-="Í ²]?r-æL³æw(KíÞ³Ò¢ær•„;g¹ŒJÂw«J¢Ñ•cÔ[¡*vÅXÞÊ›e1zÁ®ËÇkô]3¬«­ ëÞtá7¿‚,Û=w ÄÑQ—lTþ6‹QIðz1¬ dæj¼•ù´Ä5&âjü;”5ßõ(k÷ ²þ•Q齃,‡|qeÙ@Î.l¶Y øÊÊü„׃€³ƒ,}IÑXj‡#“`À¬z¹±ì•IèDpâw>ÂüLTŠÏÀê$˜²qïïÀwot|®R–ß><:=¯ ÛÞ!Î(C Ý-ÖV7&Xµæ·ÅÚ×Xyc;Ⱥ÷ѬPB&Û8Æ%¨º¿êò!gÙ«” .¥ìœbx F*ÁO©Å m¯¿.Ç_&ÊFÄΔû2jærüÕÞ&kÔà™Ëñ÷޲–%d®Æ‡eÝ)³QJðµÐ mØÐ¢í¥‹eå1˘`ý¥®k”¬ Ìc§m¦‡1Ϋ¢i–¸=l£ÃS£fBÎ3ÈR[FŸ³!NÍL\®·Á†D)ùøvz–äü6Øôr§Ü¢•JpýƒÑëÒÁ§K%0¶çHNAºÅÙIÁÊžd"Ó<æ•JˆŠ?Ó^edýq= ÷h²|2Z§ -AîéáLµ-+ñ{$›ú†…•6,s%þ•Jˆä»p%þÕJÜÌ¡³2$®Ä¿Z‰‹¯`&‹b‘‘J0tZ¹eËÅX+Ȫ^¿\ô•ZA–etÖhåè›We[ ÿH_ƒï‘fÙkë½FÑ+•ðOWŒT‚}ƵÓÐàÛ4R Ò¼¼ó³¡2ŒT·Â°1Ý©1é™tg“Eà¤ÛÜpfY{²"ÊG„J¹v“ÍôÒÉÊÊÍM$+ƪÎ_;Û­ùž!UˆF)á^ô|¿7¡Î ¿nGÜÊmül„SÚË©ìÚài/ÇúToŒ·¢´—èŒÄÒÞÀ™Ô»ÃÒÞ¸Ûí¹æ¸Thoßw÷Š‹…ö†>Së l`C{ ¥ñþêô¢Ú˲p…å—'´7 N gk©G{&·³]<ƒ<[ÚKÐ-Í=9‚{CŸ› NÄjpo¸¸}ïÅj÷âû¡×ôV–öÒMîâT}x< í ÝÉgW·´M1lš×…ö~¿øœç,ÆÒÞÐYÍ…©J:h/·8ö¡Cr'´—)[jb0™D×Ð^Þ ^ÁÒ3 íõYc&íå!‰á†â^M)¸—§{ XÈ'îÕ¿Š{5™–à^Í}*¸·§ eíØQÜÛÌŽ,‡{]Ê"޼ Sîk#Ý{{¥¢šêEÁ½ý^0Eëë÷iq/CU-o Á½.½à^_ã÷²¥ —²]xoß Ë}Zc?ð^‡«¡AONlx/+캆Zx/Ÿs¦–ÒòÞ~u|sµÄ_vǽáU€o?óîi÷'À··@ÌTçÎN¾ÇqC$Å eíºàÛ_HKK9+À×?˜¾nW±ß0ØicÇüQàëëÍßþþ1bEðI|yæ—kߊß~\Lkg­ßþ…c`·aøò8z¼åy˜¾½Y¦ké{øò0<ÁÚ± À·?Þ ,ïu/Sx/‹ì˃±¸×ÕàÞзÝkc¿àÞ0FWKu+¸·¿”ëZÊf‹{yXá `ì&ÜÛ·O¢eqoÿæ8Ë6‚{]CÜÛ¿ñçYýç½þî-îu‘Tpoo ˜@ÎÔ;‚{]X°¸×¿d‹{{"3†ÀgÞ½Á½<îâœtìËÜ{gpo?ŽSŠöº^N`¯ööãpûxiýý[ØÛÃdwf¡Úë_Š¥½Çq†öúú¶´÷8ÎÐ^—È7~=ÐüãØÛ/‡F5·" ìí•Ç9ØØäoaoÿTÇÞ@½½yñvïÑê,èõ5`A¯‹PSÊŠŽ\œ¸ÆÉ è=0 ·_hÌ ï½=4q“k›Ïô‚ÞžEönk[š€^Ñ,ç퇡¯˜ë¿ÂyýadøïÓr^+,çíÇp•ÁH æ=3˜×?œÅ¼î.-æ•̺yûÅ3¹Ê±÷x6yã äuã¼½ÅúÕ{`yÝ;1Œ×w)–ñú«YÆë¯fïq\œ‰Ý¥’&6–C²Ml,}º0^û™¾ë¿å»½ñ“¤ÍåËw{Ecœ5ç[Âw]ZIá»ö6,ÛõeÙnGœ£ÍJ±p×w4îú7oá®¶F‹vå ÖõÍÞb]×ì-Õu2L·_èâRý<¡aº¾Ù[¦ë›½aºþÍ[¦ë߆eºz“–èÚ·1µìî=XœÛk A0ÄqÛçúG²8×7xƒs}L´8÷¸œÁ¹¾AYœë:8Ks{{jLÉÕ§rBsí»°$×w.–äúNÑ’Ü~­X— „\ßÃY”{—mþb+X”ë¿‹r}GgY®› ËõcËrûqÜ£XFŒ0,×_βÜã°`óë€Í²Ü~WàêUa¹nè+,×Ea¹®«±,×fY®¿MQëW'0×Uz™AÁÔõ¯ÛööKíÍœ†JŽ}tHxõXóñP—t‰±x¤a8$¼ÆE_$¼Ïc7ç8 /úظmœ„—¦U{Y]%¼•Š5Y­„—ý6[zÔÓJx+gÄa%¨U /¯W0³|†×*xÉ Ñ­Îí×*á­„¸h5ÃA%¼¸ZÞ•&,µ ^öª7—ÏŸúñ ^úFõÜ\“÷Z//GE×p*Qoå˜*ÜóToïóÙÃ<ãœFÁËAãÉtyú¡•}W¼„¸á^éiUÁËÁÇóЉEÁËë]”=Ý–éòæºXàO˜®?ÄJxýmX¦[©P¬+}¶2]¾-&jZZ\Ãtñ 8̘ UÂKÕùÓ–Ó2]Vjº">ïQf%¼lêm V”éâ$\<Ÿš•ðòõä{eÄU oe#¥·Ó,³^Ü ûð¹©[%¼lD× o¥@îÕl‹„ÏpÝÛ½M5¼î¥í1§^÷ýˆ†—çs[ýnå”z‹%D¿Ë÷ól{0ðÒ­òƒ¬§€õ±Ç^À[)ÌÜ¢ðr,¶Ûðrÿ{–DÀ[©‘Þž€*àeÙ½MüTÀËAlÚëÄ*५@Úëû"àå7¬%]ÕïVæ§¡Ásêwý3[ý. ãÒ«›ý.Û|¤y¨ D¿Ëo]fÙ§UÀË'¸_½‡xýõ¬€—cÎJ“£0Îi¼¨àpH*à}¸}çwTïC³ wT¼´¸ö^uðòœûæ‚­~·õ}EÏ\™ý.‡¸\±¾6´}õ»âÞ;ŸêwÕ=; °êwy¹5Îu@×?ÁÒïòKaìS‘ï6Îùæ&¶òÝÆµxŒ¢Ç"œÊwÝ)z—t%¯GÕ»ü¼îÔ]Õ»þjV½ë^±¨w]Kõ.ß{ͱQ@Õ»¼^£ÄxW¢ÞuïXÔ»®nD¾ë³ò]=+ßåq÷Ο­ò]W¶?žÊw]ûù®/³ò]Wë"ßm\×àž¦ $|·Ïʆ…ìÇQÝ~›q»{¨|wDêkiD¾ëëò]W§"ßeYÚú•ï²¾ËNp¬ò]÷­|—ÉÒú>Ìêö«a.7e,ªÞå¼—.¯c¹VÕ»þjV¾ëîRä»üz–‰Ãºþ+ù®k]"ßuÏ ò]_fõ»îëý®«Ñïúã¬~×µtÑïê¦?Õïºè ú]wœÕï6Ž2î{ÓL«ßåmb”4zO•ﺧù®kÌK¾‹+¡fÖÈWå»®Cù®æzSù®?§‘ïºw,òÝÎjv*o•ï2*´˜6I¶ò]÷²D¾ë®'ò݇³Þs¬¡ò]÷¶D¿ëúwÑïúëYý._gÛ)ØU¿ëz@Ñïúû´ú]m "ßåmÞ;£ºÊwÝ')ò]Ž3uÒå”ïºG·ò]w—"ßu5+ò]—÷žR•ïúã†|×_Éêwõ}ˆ|×½‘ïºOÑÊwÝ­z×U¶¨w]e‹z×U¶¨wÝëõ.K>xLÔªwËšw©z·Ñkd®©ˆz—×{ðDCÃ*â]ïE½ë³â]ÿtV¼ëê@Ä».ÄŠz×5Qﺺõ®Vžïº«Ýu\´»¾ÌjwiÕÅÛ”gkw]—*Ú]7`í®»C+Ýu¯ÑJwÝÛé®?ÌJwýqVº«­Ò]=N¥»ò*ÝÕOG¥»Œç·b3Zå®¶Uîê§Ê]i®ªÜÕNN•»†weÄt­p—Þkte]@× wõ Pá®Ö w]™îjÌSᮾfÑíúìp—ƒinÄ|Úb½úš~-ê}Sßøû_ÂyŸwl­<@/eDa…¾ô^ƒ½4˜M[ë@/“º.½4,¬Áeô2|ÖïCA/-ï½[@A/Cr¼—äDA/Îyåí㨠—˜,3ÅD¡–ô¢ Ar«…ô¢,–½%HIïCçy|µÃiOQ/Ê*›yʧYË-3ËiÖ@Dxm¹ºC½tA@Õu‡z]™E½èQóÞIäP/ÍAwÎ ‡zQ't0¿†Ö[Q/‡çu©øêUžµQ/·<#ÆÏAzÝ•„ôÒˆpoÚp¤W9—^W–ô²¨m±½’^¶“°=,z—¾Ü1”úœf l'+‹'½l_×Î?¢¤×µY!½½§÷6%½(kL>å¦BzYV¸3ã>Ô»Ì-³›'½Ýv®‡z—eÏ^§³‚%½,»è«üœn ç”Û4Si/ï… 7®I-íÅa7S½, ûÂ^>γ¶')ëåp§ä3äÎÂzvýùÞŒXX¯>Üd½þl–õâ½ÝåÝ´ ¬—eè§Øe|<ë}è\½í¢•õrhX·#®²^æÇÞVÀ {YÆÅÛt-]ï {9¤Dÿ†Š·ia¯–)ì}Ø·3…ÍÐŽìå#p‰yl>PØë³°et”˜Ž×ö²·IkùÎÁ^Ú±£ÁLAžÂ^¢ØÏÁÁ^4í¼ýSì¥×1&¦Wýörû:ð‘]iïCÞmy-´×Rh/Oymj¥½¾ÌÒ^meŽöJKÚ´W[Šâ^‹÷º·%¸×gx/Šr¼^#Ë{y+«q{ÞËã(B‰â•÷ú'³¼×§¼W¯'¼—ÇQx3Ì+ïegdO<¼¾± ïå+{¶É³ò^ý(•÷ºçÞë¯gy¯«"á½þz–÷úsZÞûÐGèÚÖ$–÷Ò‡92Œ]'ï}˜egøPÞë.'¼×½êÁ{ÝûÞëÞë.dy¯k—–÷úÃ,ðõ—³À×5/¾®Y*ðÕªàëŽàë®'Àeôô@ ÇYà{–½À—1øám×o€¯~"|}™_wŸøúã,ðõÏn¯ë+øºsZàËG§r°Œ/R¯;Ì_× ñå­„Äa_ötÑuwÀƒø2ÉØvjWâËANw ›pÙßwÊ.À×-6xà›è¥OÞK»ô¹wÿ´k ý);)ÓÁ{íqž÷Ò díjދѸÙlteŠ{ G÷æP‡{]™àÞ£Ìà^6"ëvèz{Y¹¶â^W&¼×•YÞ˲¸ÝÚ•÷ºzUÞ˽ýxiw;y/éѵý˜”÷2JÛ6;Ê{2Ã{&2ŠËº`ñ^Qd9Þûp–¹€øº2 |ý•,ðuEB|…8âëÊ„ø2Ç"÷Üò¯Ä÷({‰ïÓs…ƒœ~ ÏZâ뚺_t €ßßø5ø{ä{”=v4[ÒΡ是VŸ0_WEÊ|]™e¾®=+óÕ†)ÐWA©¯^N©¯;ÎR_7_Ô—§Ãµ—½…P_×R”úºK öu·h°¯«Tž:ìëoE°¯”)öÕs:ìKߌÁãU?÷%'}Ý/>žûú2¿!¯é§áÀ¯+ðëžAȯ+ò+ÐÞ¡_æ…¼—òOÑo#!Ù ýêꬢ_]oôè×¢rA¿"gQôëŠ,ùõg<ÖöeÃmT¾¿Ä£÷fÒNæ2ÂÁ}u“†p_/îK)Ì*{–`Ç}ñw±¥­ sÊ}é±VêZñVîËiD¥ªvZXî{3Gqµ"Þ—ûÒ%¢v ¿˜ðË}1#½0ŒšCBå¾L’È=¤Ó÷S¸obÈØgÒ9ÁÜ)ž_›óË„û2iÓ› Ü™ôr„÷¢L5éå°Šk锸JòYõè­½åÍl‘ΣW<äœG¯ØÙ9^zb“N ‹}™É³žõtÛ£—I²ž«žž ‘ra¼ûåË ½ i[D-Зî~wïSÞ«3ÈWÝGù2]0h&mtþ¼è¤ž²à“"_gتþ¼h×ôPJí{ÞÈÕ·vŠ{épÙýŽîðÇÝÔfMįþ¼ôe‰+ê;^&ŽY†>ùFfï»×rßaÐ[ë£ä™ÏûÕ_«A¯x*òÕË‚|Õ´M™/ݸh¼0F# }™¯šYˆË´Q°ÐW“+ô¥ïMTÜÈWs+òÕ,âŠ|5;£"_Mf©È—eÆÃÆ"_ædî6ˆ÷a×àr )òeNæRÖ^E¾tà O‡1ä+éß”øjB9%¾Ó”øjœtþ¼¤5ˆÙúñÍ=ÕU™É»”ø2mYÚkJ|™tðuRâ« •øªÇ»ßÄ@ÌUûçÐ÷&Äá››ÂI|5óš_Òö9CÁky/S…q¨0ų÷¢?Ç4dí@ظ×=–à^vE´p ßà^M­¸—CZbL€dqo¢‹þ^ÑUÜ«©Ú÷æÀ©SZ[:÷2Í3ÃÓÜ,¸×=‚à^fæãÚë3X¼à^œ3ÒszXÚË'G _]…ö29é_ K{ßÔ¦Jz3ÙgY‘PIo6Ùº=éÕJzÙ #3(‡eãð’^^Ë z3÷ó%O±°½Þ”ÕL²i@/“zÒ\d¾d½|‚šãÚ”* ——£û~yNÐËd…å…ôÞ†’ZÐëî]@oænÛ°:"½šKW@/#áU¶Q†€^žò!½Ò=½Ä†4úš–Éz]ÐË4†×»ø% ×S@/S#£Âµ —tó~Å£z5¥¢‚^Í«® 7wœ÷ªp-èMda4°üôjê>½þ^,èõ׳ —ùK\c!½î}ZÐËSb€ïz¸6°¬Þ/ÑЫY~ôòÑ9•œ¦Ä ôòRíy è奒ÑáZЋ(šè⸱½™$)ӆ㠽þÉ,ée@ÖÙ êM̉ƒÙtpÔ›8KÛ$XP/}òŸWújI/_x2J[1æíˆãŤõ¦ÖÓž- lI/OY×’^”E§ñ»óRœWJ %½¼7†M›S!½LµRŒz×’^=Kz9Ý£°dZs éEY™Û^?žôºç³¤—);Ù3LóQ!½þ0Kzy›x‰Ó£RIoî¹cwÒIzÝPAH/[%ÜcF¯¤× »„ôòî™2elŽÒ«ƒ½î èõeôò¹®½¤!œ× á¼…ƒÈ²Ç©Ây ;›¼’\+çu—³˜×_Îb^—bB0o¦›â½&ÊŠy5!¬bÞD‘h¤?ËÇSÞܧ³åiÜŠ¥¼™£TÌsæz¬úò’S¥ÑB}yõ»²”—q3Õµ¯X)¯/³”W›ƒÚòÊÃ-Æ[˜Sœ¶ÓuÁ2Þ‘¦¼cH)ˆ—=N~÷„âE|½Ë»ÓÌ"^wFKxé¥~3oFoâxuK±^2¶™ˆ^BÖ˘Ä)àÕ}à x5‡¯^–E¼úgŠ~-àETèé8f2 xu ®€—q²E°¼š®Ê™òæwÔz˜òf戊û^,àåð$í K xý³[À›Ù™ì RxµÎðºW&„WG.Šx óÙáÑꈗùàöûµˆ÷úUˆ—É›ð)÷|ßx80²†Ž4lt¥vwílÄë¶9[ÄËÍëe§*PÄ[8‹Ú~(Šx1Ü@xà rHfñrºEóÝ‘1B/Æõˆ ¯;‚ ^Œ_û !ñFîàÞ•Jx9]g·•Neo`o‰Oº=g6¶ÎêÖmZ‹Ih`‚Šis,„÷æÇ68µxBx™€ ïm®V+á } Š·Oe/Óñû:”½ŸßîÔñj®E¼éÐiSc^ž3fšeŽËi6½Í…xÕôŽŠ§‰¦åhlu¥ÈËK=ü¼¾ËÃÆÙLßïRÞÀuˇºÜwßµ˜×eÌ‹y\àd­gÁ¼©ãµk{FXÌ›¸Ç5nãZÁ¼™cÌLï­ú}1/g4ħK¡k1/p(;[‘b^ö²l` æ­$qç„_Þ›]"jah!Õ—÷îϦôR}yYöæ[^ž²bð<Ԝ˖¿ÅË[ý¤ÚòÞ «;OŽÚò² M}â6µåeYd–ŒzH{2cË‹"´ƒ¶¼¼Å–×ߊµåõ§´¶¼úÐjË‹²Jò7¹“Øòê9Õ—WïE}yy½õ :ÐË2LkÔ>ôö2‚iÇ`}yý#Xc^֞ɀ$ƼþV¬1¯?Îó²Ì¤²Æ¼î01æu·)Ƽî”bÌëÏiyµ¥oc^:kÌËC —Ëî…_c^œ5æåq&–5æEfa.dª1/Ën&+™bakÌëoÅóže¯1¯{j±æu·)Ö¼òí«3/o½Íð«3¯»qæu.μþœÖ™×gyÝmZc^˜uæõeÖ™×Õ‚8ór8ÈmRw<œy5ºóºË‰;¯»œ¸óúãº;¯?Àºóº¨ î¼Ĝ׽}kÎëÏhÍyÝ­9/‹Bç7[s^ÿÖWO)æ¼®Èz󺫉9¯«21çÕѽšóúsZw^G#¯+þ¼þ8ëÏë³þ¼gÙëÏëˬ?¯« ñçuÇYƒ^_d z]Sƒ^œ5èõÇMƒ^ˆ5èõ‡Xƒ^W;bÐ{–m“^WábÒëˬI¯¿MkÒ뎓޳ì5êu·)F½½ÇØ~}jÔëˬQ¯/³F½,C4{âXþ«^œµëõÇY»^œõëõÇYÇ^f}…2êØË²ÂŒDCÊ-޽îœÖ±×YÇ^_f{Y¶‘™zöúã†o¯ÿ­õíõeÖ·×—YßÞ³lûöú´¾½þ0ëÛë³¾½î81îuljq¯/³æ½®Nż×gÍ{}™5ïõeÖ¼×_Ïš÷úã¬y¯/³æ½¾Ìš÷ú2kÞ{–½æ½î>­y¯;LÌ{ýaÖ¼×gÍ{ϲg6ZûU‰y/ËÙš÷úÓYó^‹Æ¼×fÍ{ýaÖ¼×gÍ{õ85ïõeÖ¼÷,{Í{õµ¨}¯/³ö½¾ÌÚ÷êëTûÞ³ìµï½9T éo>í{oÖÉJN«î½®Èš÷ú«Yó^5kÞëʬy/‹ÞÔ´jÞë³æ½¾Ìš÷ú²äõë˜ï_iÞ[ØLÆ îA~ áÅ–Ô©©ƒ“žüÒê-smt¨XÅÔ½HÁ\lÁdqïåØ³Ô•}HÈ/ºÎ<²Ã*lÉ/þ0P—öMJ6._¡ç^¹U•üRZG)I}–³ïK~1fEhK„,è—)㳓ˆ©¸µÃe™!6QôK5h3ž¹ý2,uÔ¡žè—Yªoã2lÑ/çJ¦G+š _œ“˜*SqïÅ?|VV3‡~[áFßýr$‰ÇðZÉoäV˜-bTò[he²Á£’ß›#‘•ìñ%¿œ´›,n–üºe!¿‰YY÷î %¿UÔ-âWò‹Ñ?Þêk lȯ®Æ*ùåH¾m›5%¿™ù¦¸9k2m!¿^¾yö”üblS¤Yò‹ «¨1}£ðeû*˜"Ÿ_\.Ò¢;L7]øPÐ>Ò]K~iùÂðŸ§ÿ‚%¿"‰ŒýY½Fà«¢h!¿l–xP4ªÓ½— "Q­Sv ä×±yø2*ZÓ•òÛßYrl+[Û+ð%¡hq zTà(§¥ñiàˇ^Âa¦I _\(r¬QOòË ÝLŸÚâiêàžK¾neG¾üiZæõŒÂ—K\“-UøÚ¹¼‘T¾Ü^Áîq:›ˆÂWS…/"[ˆh +/™•øRëX¶6˜ŠÄ—znÌìð‘,ÇWâ‹ÈÖâ¶ü>$¾a©[Eâ›Ðû£!Md¾¸Þ}‡mÄ+:_>_¨\ŸeVçËm›‹Î—M–I°†ñºê|]õ‰Î—÷SVv8ÑùÆÂŒ²—ê|u‡Žê|#‡øJG ZÕùF" ºÉÏÜpVçë/dt¾<åµÍòUç˲×ñLu¾¼ꔋ̇~{3¡š$ž2_Ý¥2_<ªÌ—_7¾¬éy(*_J«òåë.èýfª!Qùòr¹oBü8òë¿ùâá¸Ç¸w^ŽüöOœJ á¥"_Æ5³D#"_–™õùrWƒ§«†ˆ|ÙbâÎ5!"_ÖqÙ‰TäËæp³K”@E¾Ü'…ö0o"òõÇM‘/«{Œ>Ư­È7rÄ€ ¨á‘/w¶ÕLTã«jJ«ñÅŸ\¶ZO4¾½õ¦­òoo½ÜªNûÞÈD&×–‹Æ7îíUkùºz‘/3K¾"òuMVT¾Œ£ÔϽ¢òu½­¨|ÙƒSùõƒn.T‘/Ce·¤ÓÎÁ…XùòjßÄvŠ|]ˆ‘¯~uVãë::ÑøFn\Ú9\Tãë"˜h|]H™_“R·‡»h|#e5˜”•ûts`h¦nûÙ)Ù^¯¿ £ñu_©ˆ|y9Œê¼šÕøº`/_ìEäëzù&îžÜ~€*ò4z æfš2X‘¯Öšˆ|í®Q•øê^X•ø²[CÌ¥göÇ!_ÿ¾Dâë"³h|]#¯Þ¥Uøò ®6Žº(|ÝÇa¾ø¼#s¦Oc"Qøº1¿(|Ysϵw/ˆÄWßò’øºÖ*_Ý,®_¾â¾7á:m\D°_ÄÐ`DäËÖJÙp›§´*_ÿÐVå«·¢*_ýàT嫃 Uù2Ý¥©ÊWGïªòE?pEôÑuÒR«òe<ß:ùêðVE¾¾ÌŠ|I¹ý|4JùrÇíç%ž"_™-¨Æ—½Zëœþ©ÆWGÌ¢ñ•™J|µîTâëêN$¾øÐTBµSâ‹“¹YiìLøu¾øéúüÝO…°9R¬‹úï?ý±ÿ48= ûÿúÓ¿ü¤Lôõqø3*âïnãWQì_vI<ÜÏïãþ/ˆ¯¸+ûúc®v8m‰ÁþÖwÀ1öºñó½>mÇö ÌüØ;¯½×ûú9[Û¢RãòãÇzõñÊÇåçÏß\œñÿÎ}$£ÀÝ ¹ëe¹’KAÆ;ú/¿þ࿹ñÿ WŸßþüù‡ÿ„qÉþüãç·ÿÓýí_z¡Š©Yéî¾Íá•Ò/¿}ôj+©Ž }Ù•Š¿Ò‘;,ÿ›Ë Å?.DÚg,fñGõ”~÷DK´nr7!e?æn0°à¢I)ñ¸SôCÛA2kÔ?¢VxmW+r?¶VúÝH­èÍ|S+AŒ \"Jèî‡Û@+·t=·šÏÈs}ò<¿<”êeBãvì>6=¯ÒÖUÂ_y™Æ…¿t§}þüe’¿Ìp‰ûù*-qwç§[?0óƒ»ÿÓÏÿOøçŸþÝŸþô»ßÿþ÷¿ûùßþôoÿ#ýUQûóm+\}t,ÉU||ãV"f?“øÂÕ÷WPÌÌ•¥N3ÀÂÕ÷-(¾¿)/¥qheû~¸UrîngÑó¦„#ŸÃ£×ÁÂgk‰/n ¦ÄaÌ3êm¤˜cB2 nƦ»À¥ãÞÜÞ[ïkÌ•šk-Ѳìy ‚Å.²ÔÔ§åÓðÂ9™cyìxÄ %½ÁÖw¿T:ÁlçÊ_νz×js}rû3±Ì$6–$ …ù@ßÄÆ˜bs¿\·Òl¶‹ˆo?®™…IA^`ÉRxk›(kº –µE”ñ<•3Ý- ß7Qæ³Ò|÷ÛU¹øþf»¨_Ô1à¾ûmrñýÍva–CY–g¶ .P¬„ÒÝ‚w¶ n"ÚK^…YRÞl’0¸ Šó&ʸ‹ò,«·òôôÁxTú¿“Øî[º€e j(>XÖ^`®Èçµ9¯0õÒ›Ö-ûÁf" ž©gokNO¦Ò‚MkL¬Ð–á^iѦ5>ÊÚÊ5¾ËJ,c›<:o¼¸Ò9ŽëVγŲÞâVœ5Ú.àÁ¢öÊøö¾2›/×ìÛ5ý±YÝåÍjÌìäs‹Táâûë|}̀׫\|ß@¹Òn¹!Nõ–^¹ø¾2),ê±–.A©Ìijˆ2ŠJÞ ,z6Qæ‡•ÐŠÆÆìzE›Ù˜Tq/O×+aOß4>+ãœye669|9B7™%-3Ë$³±ÉPŒÇ áj±.a E*BgZD™t9à_-¼mDší,ùxð‰r‡öj²L >ÒØ"“ì⦆÷Ñ] ê}Ûd\<µ†Qí‰vŒe¢Å혆Pd“]HŠŸÊÅ÷×øæ¦—kú²¬]&Æ¢‹NsWyåâ»Íîí)Q¹øníñà7¢Þ¸^µÉ.˜~¦•IJ+ºéWÁÈ÷šËL² ¼$’ök>_³É.ôœóÁô6ÙZ6«tÙÜX™Ñ6¬¹¯×q–=°ÔP Báz[¬ÉÅ]¹ûu–üÞ5$›ë‚:¸­å«Ü7ÜÝéÊ@Çüu±¹..†å6ÓȱÌäºèB‚0“ÀãS¶¹.øžÕOWî¢ÞîÀ’¨rÙýu¾©ZÎa5^6×…Hõ™ƒèÊu÷ט’²šfÎ`–™\ú\~Ý%…Qå‚Éë¬í’ |ceê•ë ¯;ðÍý!KÉ2“ëBC[_ßîÀÚ¾¸–º¹2ÓRÑãó±&>6×5Gˆó•5›êÂ=B³©.nJß–ŸbíÆ©Û…ŒÜjŸ=allS]H:ŸÊ-ØÛ˜K2@ 1I¥yß›ê‚*- _†×I힆ۅ]¿ž”lª ®S1ÁYWLÕ_WžùËbS]èÀ•÷7Õ… ÙXfR]Ü_ÝîyøzT®¼ïT\€ŒK£X¹ðþ¦º@Cg¢çaÁQóeS]hmÓ­#š(Ë…¦i×Ç2“êB#p—Vš(kßcŽ6ÕÅÍ­haÊ:kN6Õ…$ÏBUÚTb^Á2“ê‚ááUÙTúÕåjS]hO‘›ê‚)@)®·Òl¦ ÉéÆ2“éBk¶\6Óó¶kÊOk¹m¦ ý ¸þ~½AÖÖ—ßßLZë%ÙLwð¸Ñ:ËL¦ }c%ÏLÔo¯L¿m¦ À´Ø»â0ÄÁ(ù=ÝcvÅ©!5ËÞÜÆ´`íó®ŠÉ•IswƲ#Ë0¹2y.¸Ð»m^…mž .´?˰7›ç‚äO¨CNÈ2“ç÷»¢®Êáé›çB(s8ü¶W[–m¢ ÷ Å&ºÐN e&Ñ…$—«Ÿ¾‰.$Aaåìç2ÖÖ6×ß/bm âúû;•¤tµwDím°£7é šµ‚;Äjù“è‚BÇ{Êž+î›çBkáI6Ï…»Z²y.ôMäÞÃ!ZZ^fÌ®Lž Tð½m0P¹6Ͼ®¾‰bÔ7b&ÏÅÛ&9@+íhªÌùiǯöT˜V™üÎÚmó[È;bfÑwüªµÌ½6Y›É V™SÜ&k3¹è*WÞ/YM4kÙf·Ð`Í…÷wøª•Ì…÷wø*Ù9Yf²[0n³Îs>6»…VZk6»pˆn­/?ãEÚìòA=×m³[®mÄ|÷×É2“ÝBšF&»…´FT¿ÍnáN™lv‹@üpõÅˆÇæ¶ùËFn JŒJŒ­Ž“›ÛB>@< Ím! Ó?›Û‚°ƒÞ1Ïx®fr[H£d‘Ém!ñåá’ûebk¾)ϯ˜‹î·­´Ú݇‹î·‰­Ã,tÜ ÝßÑ+å$­ )$‹Lj ξövŸ§»•˜ÐúTzÏSf›ÚÂÝf±©-$÷+‚¡Mm!ý/ËLj ÷x¦¶Ø“ –5#ŠÐLÏè`mVcn¹è5”{Ùm³k«äž›š¾o±!˜ÔÒû=!Ú¬Æò¦§ë7‡ïÔæ¶Ð7Šoû¿nÜÊhüì}S´K›ýçØcéŠROŒMR:Ñnè½ÝΩmÑ.7·D&èºÄ}ù.fƒˆnˆTgà ÝŠkçSÀË kÙIðîzr-!¼YiÎBx™Ó£Õ=¨Â˲Ô8g™e†ðrÃbÛêe¼ÞwݘG¯óÎÂð‘fÜãôÖÂË´Ò11ÂÐ*YÂ˲òjÓ„ðv”Ž…*êÇYÂKúÞPµ3\ áE‹eÞØËÐßExyÊöGFàÜ9„ðò”1 Ž% x¹‡2Ð<å…r xy¶BSü8 ¡%¼<¤`|;]¢…ð²,3ùz8 o¸8«·¶ˆ·ïäÄ×u\i /3w'&G£!¼<¬R³=7ÝZ‹y(SªÄ™ùE/Ë0àǹÂ8Î^=Kxy\e«¬“ÂËAb*àÁ „ðò¸Æ³,¢l/'´l¦X·ˆ—÷‚¸‚xy\Žau†‚xy\¬u8E¼bßë¥âñºë âåóe†Ék,âeYaâØ8àƒA¼¡hhKÄËGHÌ$8aš ^Þ3ÍÒÆÒ ^ž³¦º'Ôñ6Æ9|‰< â¥G$ê 5•>ñR$‰ÑÈ‚’‚xùv"ÃaY/*ªÆŒ­Šx5ª÷ZÀQċ˚ƒŽŒRÊxõðèÑFŠ4e¼¬`î0GóñŒ—ZNî%˜¬\ïŽrr:޳Œ·Q?õl*Œ—£[ZKŽÜéÂx{}‡ò²hËx yæÂxùª_sIe¼lø\˜Íañß—ñº*Æ‹û¼¹Uª^ƒ[ZÆÛˆ>ðk;/?¦ÕM9ÆË•NÚÅŒeS…¼³Úº\ò6n¼C€çò²âÞ¾!ocZ‰>üxÆË£oð\–ÆKÑïðë_•0Þ>yÖ"›0^×ö„ñâ°–â5så*ãuïJoãÈç‰1”ñò騑-ã„ñriÝ^ÝÊxŸ§o¯>/¿;êø‡1£2^÷M ãmœ*_{IF/®Ç-0sT/gŽ-®=5Êxy™ÒXJSÈËj Êl¬QÈëîE ¯;N /ãÃÚpì ¯«>¼á/7Žö,W¹Çr¤!PÈ‹ãn¢ý¡4Ÿ— #[Ëyy][ÈÛ¨nÞ)qòº:µ×5¼üc]Î yÝqy[W\èQA/Ç 7§uW zåËQΫŸŽ‚^ýôòÉù…_'çåË$kj³Ìr^×H„óže/çÕ‹ã¼Ìˆ1œŠ”óê \9¯« á¼® çÕH)œWçÊy5)èÕ~@A/?qÚLŒ­„ôv¾²óÌüºío¿Ôí̉yÉÃȰtˆy¹ßwch'æOg!¾D†Oza°(z•Ö©¤Wú^!¾nˆé$½ý¸Ê¢8M/·±2)EBZ§éu'(sš^†ã¶œÂœ¦—s‡úÒizFž©Q/çƒc¥ïãE½ºöª¢^zL…wRE½èùËÓsŽçQ/Á%=ªM¯,qª¦W×EÓû´åÑ¥šÞ‡9T¹5„¶"émäÒ‹w9I/ÁصfF¯¤—i˜¹ÔÖ¯_M¯.#9M¯h œ¦—³µpÏ\P*êeˆ Í8Q/¾À{»=¨¨·ös”™ NE½*Xr¢^îÐBà‰Æœ¨—¶R¸ÞH=ìD½ˆËzîõâMÞÛÂCE½TÌ0”+õªôÍ©zEÚªª^©ªW—ÉœªWä“NÕ++‹ªêu+ýªêm»¨zÙ.™ijlVVU/ n®,La¬¨z©ê/›N‰ª—_] F2ëSÉIoTÕKC5gTªêUõ”ªz‰y†ÆhYU½Õ‘×¾NU¯Ó©©ª—þ;²SõJí¢^#ÉàË*zØ`Ô}5½ZA^Ôk%ž*ê¥3Ž–ƒœ¨—Ûr0ŸÏóN&ïÕPãD½lð”ßKðkD½t Ú©‰ørZL‰Ì ,¢é¥ŸÜN¨ã4½"ÐPÞË€qÝ{)E5½œò ßhzõËÞË'×^ÐQM¯Ö¶ðÞ‡† –q1Ý—÷jŸå4½‘F›aÚO8M/}sê3}œ¦—ÞNu3VÕôÒéc'Bw¢^Ú¤±ƒk^ÔkÅA*êÕsª¨—޶ÜI3å½ì—¸«*GWQ¯V»Šz9ºMs¥Uq¯Ûë05½´æ¨k©Ôiz5V îåž©P÷R–jzU€'š^Z2>µSÓëZ¥jzU㬚ޣìŽ<'åØX ’^Ú#"{'éåëææ”Ñ€TÒ{±[Þk†*éu·"’^wN‘ôjª¤—HJ‘tð’^£¹ó’^Û󨤗ëDµ¬µF/éme'âr’ÞÞ°ÓÌD¨’^Ž'Ã8I/§GÜR I¯ÆX•ôj§»%½üïÖ±8I/îþâ®yˆ!¾ü4|ãbS–øö2J'ÙI¯ö*ëå‚ Ó ‡oe½6`xYï+l:T½Vb©ª^§Àuª^Q;U¯6ªªW{pUõÒ mgrwª^⢭rª^ÕmªªWfÛŽ¿ªîÕžNÕ½¸•x­4•NÝ+eNÝ™#·û„¾2Äuâ^:ç3MêÖZq¯¼L'î¥Ã+Wãò ·"îeë½Wfi'ïµ/Ū{M :Ô½¦ººWFNÝK;=îšïê{¢fDZö5ªºWoEÕ½î8Q÷Jkvê^­R/ïµ ò^éú¼Wª[Õ½‚›œºW>§îµ³…¾Ѹ—Æl¹ÎTgNÜ+#q'÷j퉸׉¸Wºª{õeªºW+=ÿ«_ }ß´Æ7þþ—_n0FäFÇsß‹–æKJåˆoæìp­»+ñåT§­™#¾¢˜½”{Ù;à+ Ý_ÄÂ7yòÆéKL)À—1;íô |Ù°«L€/ÑÝCã½aæ§À÷¡›zÚ^ørû×vUà[i¼^Çšžò^Þ »Ã¥6ÞËÙ &Sñ>M˜7 cì0Ö·”÷*…qÀW™µ_û½¼÷5¼×D%¾úý)ðÕû¾Oï‡ÃÊÞ‚ |uŸ‚_UÝ ðå8‹ž¡%ŸÀW7½:¦&À[¢:|ež¢ÀW7ò(ðõeøjPUàËHíl ß_¨GÃüÀWkN€¯2|¥?qÀW&? |]kP૳¾::àûô!VYú¾œ/p–3Ô$ |éVÊè´j°À—M§#ä!Pà˵\ľ)[ÀW'|ŸžJ%Ï^|ñ‡5®E0G|iÛÁ‰Ã˜ÄXâë߇ßÂÃÊéሯŽ@•øÒ.œ£è!ªPâÛ ‹—žÝßÚ§£KU¡ÄWé _åužø^¸™)õàË%7s2}#ñå„ÃÀIÈ”÷ê‹VÞ«Ó|å½:ÖWÞ«O.À—µPs] !¾‚ºøRMÂ¡Åøø²ì&7õ£ÀWú|µ^·‹ƒ»y¾îtøÚ&¤. ÇÜ5µÄ–øVNùG*­ƒøâs{bÚjUqqл÷Äw#Ѓøâñ®öÌV¢&ú"øR Õ®-âàKRËÖ5·bâÀíK\?~–ø×_wN¾Z[àëÞ¦_÷Êøº{à+ Ïøêó)ð¥Ãð³6ñ¨‹ÅåÙv૯Ú_ûxêâ ëñ‹øÚ1”¾î\bâàîA€/óMPò0tÙ|õjâáP ÚÌèáx¯¨ïÕJS‡Ê\YÏp™r¼/ƒÞs#ò^]âRÞËem•r¼—¹8’½ 6¼×]Ox¯öšÊ{™ßboás¼—”-…™†ÎßÊ}lmí'Pà[§ó’×)ðÕs*ðEE²sƒ…_iǰ4õ |¥”÷rŒÃÑJXê_Ã{9f§ÕÝø:6ïÕ˜§¼×Ý (|éü½3Â9…/BûíI×Rÿ¾¼WëTy¯–)ïåù ÚåØƒ¤¼×• ðÕ_¯Ì“ðå ¿ŠÁX…÷öqm¦Ñq¼WÜ‹ð7§™Ÿ÷r<ñW–IŠ{ÅæÏá^̵êΕçp¯>âÞÆWçJƒÒ^NÑðls³—Ò^u ÚëŠ÷òCÂ@)ÕeÔ`p¯>â^'mÜ«YŠ{ù8ímœu_ËKi¯»¥½îþ„öês)î•y¶Ã½z+Š{µ%¨Ä1¯fY]©ÄWöœ;ÜK9Œ¹l®¸—i1‰Ê8§à^&ãbж.;ŠÅL¬-ˆòÞÖgh3 ‡ã½îÙ…÷JÓs¼—/1âòÇ{uO½_wœ¾¡÷ßê ðåD4„aªä€¯<ž¾¸\¦v¬©ÌC˜‹”§ ª+2_A+Žør; Ǹñ?”Ô¯òí}}›sNåKe“=ߨ|iFÆiÄHF *_œ³±O½¿QùÊlÇÉ|/Æþ07·:™¯¾•ù^Ä.#[à¡óåÇTW:U§ó¥=z½§;‰Óùâ8®ù?÷éçÀãJ³X§ò^§)ºr*_òý{eép*_Z˜áÊC¶àT¾ÌUÚÊöW•/g×ʧ*_W$*_îh~V.0§òåêåݶA„¨|©äŒk3ýPùrý4†éCñëìn£òýEf½Æ*ÁÓÞ›Éúâaèз/i_ý»³°·o©¦Çrö²þp'+ã¢Ò^‡Ü„ö:t&´—θ DÝ+B§îÅx p©bìâ÷ê^3¿t°šàyØe޽ï2Æ!î5ó='î톃½Y~ìuÒ©{ÍZ¾ƒ½L>x//°W5¢ {Q*ìu(UÔ½èÕ(9ˆžõšåêƒõ•§c½ÖíÒ¡^±QÔK], ñ4Ô++€õ ½WÒË ½ˆL˜~Ò+Ö‡Žôje+éˆ#½Jö•ô2‘…¿qÜ‹^¡«ŽôâÚù|½4øf¼ª§_¯vÖô6¦z¥ëð(³ WC¦‚^¦®3MᬽÌqÂÓ¿ÞÆùrY©/ôº³jæÈTV )½=]Ö3S:m/Æ„»8¤¯êå ‰ÕËÔm¬|<çÕ]>êåÀY&;³ø—ƒ¦^º?Q½tÓ¬z9àœô6Îíä¼ÞA¼dƒ“ór-ÎËA÷À*è% +q´Jgå .jå »ç¼“ƒÙjåœHªÑÓOòªNº{_tg:9èÖYqr8ŠÔÉÁì¾T'&Ò¼Úâwêä »"·“ƒ¶µrÐý„jå ;½•ƒ}“bå ÍÄ[9˜Ýžêä EÎÈÁnlôFÖ½BôNÔÈA6`9#õ·ðFöez#1V#ÝzªFZ jäàŽsFò ÞÈÁìüõFbj Fj8!F² 9Ø[Q#wœ3r¿‰aä íXzbܲÁ°9¸“y#óy‹‘W âÊÏ|9Xß5rÐרFÚ„ÔÈAOô^ÔÈA›ºwrÀl&.`1rÐ7¦Fº­\ÜåÄÉA­mÔÊç¤om 'éuµ ^º5^½ôÔËÁg½´ÔÌ¡²+½ùW3|jæ 5´Í*¡\^+Æjæ oR¤½ô⨳ ;h‘ö¢ £ó%/÷>ÄËyxóLW᬴Hœt‹»:9èaâä€8†NxŽ~•ƒ¾)µrÐF¢^îœâåànS¼ô6ÅÊAkG­´vÔÊAÛ¹Z9p|¯¤·ÎÊ–wxÔ»|kå`ïE¬4øª•ƒV¹Z9h[V+2¯žÒwÀãiåàÎ&VøÒé›:—UÔÊA}“ÔÊÁݽõr¡œZ9ÐÏ¿šK'jå gT/wœx9¨»–‚Þ‡#ï+ÞC©­ ÷yGôrÓy©a?ôúû3}cêæàÎ)nòÊÔÌA«Ô›9˜‘±3sÐ}ójæ ¦jÎÍÇQ …õ—OÇýCcYÉÙ9¸ÃÄΗ£­åXštvÖyìżb”ç0/¾Yª˜ÂD¹âæ ÎuÎÍÁ•YÎûp^ý,á±`^ôv=Çt¨S0¯øÝ9ÌË“Ü›È åŽ&”·g°âæ»Iœ•òâëÛŠë7f\£Š+7”£¼Î]A(¯;N(/¾wš-½]ŽòºsÎËg¸m-ÇJr^®„ÜkS¨ã¼=%ÚÊÞã8¯|ÈŽóö4pko¤r^1ctœ·±‹"îè¯S9/ʸý`d^ÎËÇÃPlùÅöÌ‚„)¯Œ¿–ó^¿Žó2×X©ûÎÆ!¾HME½|ƒ}Yæ0îÅ'™Þ{˜ã¼÷ÅYÏsÚ88É¢p^öùháÓ>]9of)¯³­½™+ {eCAoâD}ÉÆ”ób6Pù ?³ÌrÞÄ ÷– *çÅ­ Îžª-èM`\†[þô&Î(·Š_AoâîðíÑ­ 7‘ˆàªa:QZЋ²tí zw@lHo¢OYr%½œüµÆ@Ioâvùí¯¤—ÇÅíb¿Q/îð.{‡½¢ÞÄ9ý›äNPoâ€Óð\ËzqÜõìl/Âzù²ò» ¬——«¯_°ÞD>¦Ç™^öòV®½UMaob·ðBTËz¡ ‘ù0¶Ö볨³MÞþìLõfz8ïþZQoæ\ûËÔ[hˆ‹oö®'ëÅ´jþÝ·0Ü+勲^Ü ¾¥®á "¬s®®×Á²^”Ýa›+ëE@ÃËÛ[%-ëE@‹5-Y¬e½h•.+cše½…9þöÎe½…”ªn)„½©Þu>˜ÂÞ¡oãpqàöâ.r+[€'°ÇevõÃ&D`/ÃÝ¥5ØË°Z®-Øën^`/B.õG³KSØËãPs{¾°ÞÚ{˜ü¤IC-ëål“û Ÿ•~íE½•©¬ð +å™E½8eD+ØæõV¦²bµÅÓÄ/šRó9˜·¬×¹œ ìíeiç±Ø{”ØË2ŒZËj±öö2¡<¶dZØëÏi`ï7Eö§4°·—qynü³°÷(°÷øµ½ß”mØ{܆½,c4_ûÍ ìíEdYc«†ÀÞ^F>6÷c[ÚëOiqï7e{RæÁâ^/÷ç4¸÷8ÎàÞ^Fò? ,î=Ž3¸÷xvƒ{»m?¾¨íÇ`pïqŸ÷ç4¸×¥Ü본—ewÙYT-îeQ AeyónÜëÁâÞãV î=n…¸÷8ÀàÞ~ŒØËuÁàÞã8ƒ{ã^Ü{fpo/»™Œö9’³õS2ÝÞÜÆmq¯¿œÅ½G™Á½þ[Ü{Ü‹Á½½Ú8.gr6_¥–÷÷bxïñ|†÷úwfyïñ †÷ç4¼×gy¯?Îò^ÿ^ ïõgyo-t«ÌÓ×ðÞ㔆÷·9xïq‡†÷eVÚëoÃJ{ϲ|;4À×_Î_–5rÖ1 ä˲J–ZÆw‹|YötýÛu0ß^F(:­B,ó=Ž3Ì÷¸Ã|2Ã|{Yeâõ°„¿›ùvÙÝŸ¦a€…¾*iæ{f˜o/#Ì›†–ù²ìfâÃ6- óu—³È—E£¼¹aØ"ß^„aÁ2²È·—cN ‹|Y‰㑟­q"?¶.Oâ{œÌ_2|{ßô°À·ç¦¹ŸRnŸ¡%ÙLí—_–•ØÒÜï/À·Çö3-•,ðíÇq.Ÿøö2æš^,ø†ÅhÞöÀ×=å½ý”…ƒøC×{<Á½,«L†ž¥Ýݸ÷xƒ{Yö¬ ŠÅ½ý”íŠËLÆâÞ~&bS¿"¸×'¸·—9g˜ö÷ç|qïQdpïQfpïq+÷Ç Þ{üÚð^}Y‚{YÔ0¯@xÐÙàÞ^vïð÷²‘\ã¼çä½,»ÇÄt0]Ã{2Ã{ý9-ðíÇ––<À­¾( Ì/õœ²ÞnµzÅ©IÞë‹ îeQÊi©ˆ÷öÃúzÁuØ8°¬£æùt÷º¤UŠ{9ïëíxÈzûqic?Á½ýzÄÙw=p¯¿‹{²÷öËa\…9ЬÁ½G™Á½G™Å½¾l<&¡Ë“ö×áÞ¿Ò¼—îpSpwP_ÚšmÊÃÊ¡¾ý’RßÊYÿÚ¾tX9<øGxtVhè#gò7Þ½èNкÁTq/â´>Ô½²‘[¡/ [Ú }§î•ì®J}1 OÝ”)õ­ÜæV—¨@©o¥Mòÿ(õ­$x†¡zè‹ge¸¨¨@ßJœÝ_:ë^"n¸Nu¯·¸µÐ—Çc.ã ô壦ý)ôÅ£²ÉÎ=µÎ¼—ß.ó¿M‘÷"„å=µ{Í{3 %VÎ …¾•jÑsu@Í{ƒó³’ˆ©y/ݯ0Þ˜ø\Ì{i6ûÌ${jÝKk4­ºh°qîE R=åáB|+÷–¥XqνZkêÜËyBÓÊÆ9÷âõãjËÄY{q÷_, ¯e¾Þñ×2_VÛ–fðpîµ-A˜¯k]Â|ÙDi)QÃrõ5νˆ[d†Š0ßÊAbÜI’„ùVî-EoÞÆº¶:÷â^¸07Íý,óÕË)óÕÛ<œ{å8Ë|«äd¾œB U¬=ºêÜ+/K™¯?›e¾¬€rï{±îEã­i+E„ùÒ´{ q=óuïX™¯8Q+ó¥‰h¬÷Ú³©ÐWA /¯Ç#Sª Ô÷aÎÈ]L©¯¾2µîÅ?˜sä{pNˆÔ˜3ÍÁ“*|+·Q·-‰/¾ƒòìîL%¾|†— A%¾µ¾$ÑQß~=îRZùάÄ×—‰¯»M5ïEÈIeë EâëOi%¾ì³B~eÃ˼Wº2•ør¾|¥ï%¾c.½U¢jÞ‹‚aÖ2©0 _:sÑa‰ÂWã¥3ïEǸEâñeÙv¶Ñø¢ jKƒÅ»s¹wΑøò«‹÷RЈ—OÐÚÖu©u/ZPˆ¯Š×*|y')¥xåÓº×Ý¥8÷â.Û½3‰À×U|ùR˜9uZmˆÀW¾ |YVÃN(_çeîœ{Æ´k¹K¾¼M¶ú{¼MøêÁY÷ê÷8¾úªDßëÿ^œ{µýˆ¾×ÒÈ{ÝKüÿy;›ÌmìXïý+zy³HCEJÚdí]xá$±‘ÿ¿º§(~T_gìLA¦»õŠ’(êða±Ž÷”ŸµÑXÝkÏLÔ½ö“êÛ‹ÕIÔŸh† ¬îµ›(ê^{f¢îÕ‹u/šRÒУ‹º×›Éê^¿VöÚÃe¯Î†EÙkù¼H{홉´×úˆH{íi¯w"íµùƒH{Ñ+šVŠ¥½Þ–öÚ$Ò^¿>–öZ?i¯_k{ýúXÛ«Ç©¶W_Õöʈ¢Ò^<¾èTM£/Ò^\ÝûN3•öêB¥½ci¯Þ•iڭ݆ը™öâ[1e³já€ðÞšJÅ,6 ý“Œ¦i¯ý¤˜öŠQ¼™önÀ­C橦½bZo¦½²›ÚL{1q«bÛÕÃmÁÖÂî!¦½ j‘ÁFþùÅÕ½v;ÕµJ›­ƒ7×^k‹¸öƈìÞbÛ+¦ufÛ+ûÖͶWSm{õ©mï†v×Öªm¯^žÚöÚabÛk1±íݰ¤Ð¿ã¯m/tN‘äÝõi›mï/?l_þå‡+ Bݘ^?õyÿø¾È®øïþë[ r<Ýç‹Hˆß‘lÈ7ì?wθº_é‚ÿÎgŒ‰)$'õ:ÛŸg¦·ÿó统ú¾ç†²¿Ÿýýó÷=½ßõy·?ff.ßáοçÆ¾ë™ë½~ÏýþñÃÙßßûåÿhL+°z‘jzoäJ¹ï„æüÓÍü÷ø?¨‘¿üøë—û‘ü×ÿòã¿þðÏ?þÁóÀç">«Wz°oúŸéü†3¥‰e>a”úÇÏTüLßaLãðß¹¯Œ±÷;މ|•7é)¿× Š±º<Å!±ïÓši5¼^ÒÒ }çžp‘àî{<—zv{0ÚŒïûdÞöÈ£±æ|x6`L$Þdñç Rs¬+G6Íë´}Éïô§:;ÍñnOCZºžåég9þÆÓÄ<"¦‹ûY [ýþiN?Í_9E¬§’ÐóKÉà ‘ÌCwe§ÀÿÔüÇ/ÿùë¯?ýöÛO?ÿüóO¿þïo÷ÿœÓðýå툻+µë¢Ô¹ÈïÞŠe˜;°G Þ­ï,kïCɉ©ÅÜCtß"¯BÑÍ9SÄ”`jw±$oÇýÎÎP|`*ê°³eÎâa™5E.¥¢úìÿ®•`>ËÔo8é³BŸ6ÜðˆzY,)>cbÏT²mXù΃Xâ¯ÂÞ}Ñîºß45Ð…Ä{߀è]×ß»œhÃv¹4 ÔðXçy·>ÛMÊ ­ïû‰ãåƒoÌ4sÇgßyc¿¶rßUµc•a¿ßã®;Oò5{Ï—Ø%È~ód—fö+}vÙM†·§–žFŒ ±÷@¸NDž½°؆¾Ø+·¿¶zÀÖÆQüãÍžÈâø„)Ûj#D&Òôc³ò ØÇøbçØ÷\ÿ¤¤¥©NJ¼)LKÅ41¦´\H =©p¡.ê€ùçK±”']_[‘ ªÞÑ$¤VHeÞQ-=ìÎ(–ôq.Ž¡ÞšªžCŒìê¥êǃÜeÖB9£ÛÅÏyL…¦Ù³?uw5U0ÃÍ?ÎãýM+IAµãkÆ•A¥j3bdÝ,OlPž®§'4:{{Ï‹ªÎHÌ缩„ˆ7Ÿóawv©‹ñv)ÞóäF-¶«>~MKÿäƒK0è’“–À¤úˆãÖ÷?Ÿló)¥3Ÿœ¹Àʨ«oé7…ëH¡ž'_ìGÙÞ+—²¢zñOl©™ãY_w“Ç@Ë%MN¬»å+½}²l\i@Š(ÆØ§¥oìb>ª–õ)»æJ=AĤNý‘rO­"_ç2":(«ç: —>%k•Eªª÷”ÂNµâ9‹• ’J}OÝÞû«ží&K}©mOœ—H¤*Yü›Äê;um\Ÿ§æžÝÕáà”Ë$S å Ö9ÀÂ.oÕ{qW²r‚Õ}ßת*NGw•z»q§¸2Ž”BCŒŠUˆ­îƒÕÏ9ÀŠ]ðÔa¥VøµP^zÉe•ÇK\d¼û¹Å(…cóëYåœAÄ=GX© öà&<Ôc÷3w³#¬v!4›«ðRâç–…6©ÂûÔ±ìsQ¹§Öù¢.¹ÛÕŒ¾¼s„Å÷5æ†ïƒCÙ”2{,Ʊž¢Æa­‘›û"¿}GÍÏÑ5Þõ¥Ø*yÎþŽóSÞ9ºŠ£4bäé‚õc¸<¿M|¤nZÌ€àVùÊZdä\÷“>+1¥ ôvîø2Ödž‰ÏEµ”á3òT³¿ÑYÅžû©Ž`£³Jaå—»×JG”÷Ǥ¤ôñÑUßù^$Î_wlóé~!5H,·>b;§°²¤Š{Ï9lp_×YIÆL­ LÃZO-jšžO*=Hi¬,aG0s….öÍŽXá<Öbç±Ãʺ´%x]ÇØC™ì^…´mN\c”ÊJwÙcøâ\V^<˜p2+ËÃ<8›¶Àµ'‚™7u_Zº­AÊg!æ)Ýå$‚''´òF0sF¯ÖéçÑ,œÒnØÿyµ¹T/Îieµ¾sù‘Ôe÷÷‘«­ëŠ;*™\ˆì[]r'¹ô ¬¹³Ôb+1%¹¶£6™—sMuÍߥþéo0qn+ëçŒQ!nMüý ’n"‚™³[»©ªnØGX>\JáüVïÛ5>Ì(÷÷2H›gP¹®"öp`ô½Ú-x(ŵãôø1"EŸxqtFhù{p&¹øðaKâÑŒn·§€¡ÚG2‚§¹2ØF0†ˆ¸=ž-Ëà’¤øÒ– kðJí>àF,s¦‹Ü%‰Ÿ÷Y&(’b(9ÍSÕøŠÀƒü¹[¤E0òâ¸AçÞ2'JØ_ž¯–ûß¶k®>þ+›å”'(kZµ’eÙkƒ¦õ®k ~à5{w™ˆð`Èà‘þ½÷My0º6À¾½Cy0&÷Öõ×{ó`lêÛz-)ÅÁ¨ë¨«¤88bq{Á(ÅÁ&9ñͽPp0ôÿi`ÁÁ&Ä%ÞÄÆ| #tÕzoKc·A%ÚØv5* )> Éõÿ#–Ñ“^2Í8±#w@ 4ø¨#r§IJƒlK»Ê›-+ ®cΘ0 Æaå³}…Á6·ôR%‚JyÌ  Æ¿žg/ *(¡}ú‡ Fì–w‚‚QïåIp0 Æa¸Õmï `‡èq¼“FÁˆÅ÷»ç,‚‚;cvÚöµ –Ú3†‚; Òè´ àú›ç¨«+,±íå5˜ן•~+è1 ®ë=]Ri,¸þbEæ…㸸¸žs ößdŒX)s–Ï,±;ú±§—é F섈ý%Œ‚ëO>sÊ(±ôÌÙ>³àzÜ6*3 F,2¨^AZX°7å•Ma®Õ· ^î‘`ÿ)"Á~£`¿*FÁˆåsÎ/Ç >ꆬÉ×…¿éÞ((1šÊ3 Fóãf"$¸þä4odŒÐyOšB$¸žìº…׳%¤•ïãdŒX†OÉV^¢K$±h`i¦VB‚½)L‚C颶)$¸þæ3§ÞL‚Û #˜/m!\Ë1´½®CB‚—¶ >ªXtÎl_¼´‚Pp=bºY Æq1}Þš½Ž `w01 ®‡á9½f>B‚ý°C Zãz%ÁÕ¼5‚¶±^@pýÉ{b Áfú* ¸—'¢aŒXÁäâ­·Ã$xiÊ$ÁÇûnF«ÞžÇ$¸†Eåsï”xÚlà¸Èõ¶ëý²0 ®-¹°Ïý‹q`Dâ£6àsàzÔ´YŒXz&`¼G¸7ÍH…/W@¸—òa \/!OŽ—ßÿ:Rð÷/ÿùÃZ†˜ÎÝ/bì­f\É|¶V3^Kœ¶bÞvÝ­–ž`Ķgâ#FÀÒüÂ9ë ïtNð¯sQ¾ŠIç˜É3û]Zþp¾Š>û­­HƒR1úõ7…ѯö]¿Þ_üÖ[¸l=9[­›½óu¾O“Á¯72s¶úîºk(F¸ïrݧ«ø;߯}—F°S¡ñp¾ú¦ëƒpnœ°Jwcì[Ú‘}[°/beÚ ö]ŽKœ¯bCÒÄŒ}—ã2ç«v\á|U/±o=.w(ÁØw9ê¦tÕNöpºªÎè×cô»wpº?‚l55{pºj¿™8]E†A¨ùä|5Ž;ð£ï¢;³_ÄÒ³¹wº-ð×n³ßzºHóÅéªî•ºo¨PÖù•°_k»²_;PÙo=ò!BÅðw=òà¬Uî³Â_ã'dø»yrâ*ãžÂ_+•üÚƒUôk·ZÐomèM4•Я=YE¿FoŸÌ˜Ño=rº+ú­GF¾Ô”Š~×#ç¯~$Û–ú-gô[LDÚý®GÎa7,†ö IŠ~×#/Îb7¼ }KRg¿ë1l®åÇ0û]ž³ß¥Ì~—‡Åìw=gâtÖn9³ßzä6öÅ)û­­=˜û­Gž„ÿ™ý®ç$ø[U›×ØÃ'ðwyÊ—ãþ.2ü]$ø»Œ õ[®ì×>¿Ê~×Sû]žÃßzäY‰íœ×ËdUºÊaþ®G.¿öm{èþ¬eš©‚i޼¨‚1V—¡9 SxP·R4ª Fæ”FUg,ûöÚ¡¦ Þ±wy”ŸVU0²Ë’»U0ð†| :IW3ÞPÈ)îõ["ÉTÁ‘CƒT4À*x«»&ûÒ£ª‚ñÉÏ©*`ŒReÿ  ¶˜¨‚ñPSKúL¬TQpŽl¹t€¹ˆ‚¯#ßÓ©(®Œwgg& ŽÏö€˜×Ê>°¡oÅî…ÇoÆýêàÓDÁq3¡ìzõ@ Ù2(P¼Á¯éîcS5)EÁ‘´Ä·½3Š‚7T7?Æ|HUÁRîÒTÁR-ÇTÁ1tÂï- lª`)hª`)Ùiª`‹‰,3Úè•gS³,ØcY0y£P†É‚¡v‰~ÙÊ‚£c\g7r5Y0€ëÞ½õL,¥éM Kû—2PY0Üã`WÔ@ŸÊ‚ÓÌ6Œ[iÓ«ìYtÁ~œè‚!Æ¿z¡€¦ ö².±c§W×%º`Ì.¶Ò÷4¨0 ¼Žnc¬Ê`µ‘Te0êÎÃíl™•Á~œ(ƒ£3ÀKñ5Ue0F Xr½¥kLœ0äÞ³2øÁP½uŸiU£1r¿ÞÃ" ~^sµ´IŒiU|®ú‚ƒ1ËGwxVa0”øÇÕ|)UŒØ107W\ÕÛÃ]𣒯ÚLÑÇq¹ ɹW]0úC¹öO4—jt¦Ô‰/v†ƒå¨¥Â`­6Þ…Á¸Y0 mÂGã5Í °/¢apüZdÁ݈]…Áx…a¦Ñ˜»ƒMõ/Âà8ß¹G‡:^5´ƒÑcáÝQŸeað“´­ç * Æ5ø°¼sgÛ]a°Ý-ûo’2ØF6VG_€/÷/³21ì?Ê­%Rf/>œØ·”ï…û"Ê`-é®Ê`ëC¢ öãXlŸQ?С¤^óF•ÁOÝ×$é¢ †á üA_]0¶^C-PuÁ@;÷Þ ä¨.JÊ+ræ­ÁbÖã8|Š^›Õ#eBŰWÒ¥º`Ûø"ºà5·`ôâÑ£+c$iTPtÁˆ)äÕ-&õ`Ñ0èô_:#Â`ܪÜÄF†k["½êyƒ±óÒøw$ÂàjãÉòÕôĤ ö+ƒí¡‰2]ë>»½*ƒ­Û‰2Ø>I¢ Žó혠¼=K„Ávq" ~ 7JcÌ8ã+×VDŒK£:3a° " ¶½ ƒýLR¹\»— ƒ£_¼^/¯P—…Á¶¡L„Á6Š0-„YÜ[ÄC…ÁöPEŒvF>ȲÔ×Ôkapµ³Š7ä­¤¦Â`¼®'$1[WÿÊî6KXl_wVc¤¬[±ßÈ£›Û8q0N¶BŠ&–]Y¢ ~À”0';@ì½RµÁ¨óy•˜J¿ŠÜS÷¶=Xîp–ÅÁ6¼‰:ØFJQÛ)ê`¿†W¬ªsQ£ ÃÕï<ÈÒÎ&aŠZïƒ>X³QÓëà5ÂÐÆ5¼»”] ü@™›­´ „oh'Ò«—q}0lés¯Aèúà‚Œ¸„ûƒ>í9 Dk1‘Ç;Ê´£ò`»•û…ˆ<Ä.wädò`ÍdM¬€êƒ±é7¥×_^ÕÁˆÀÐæÝïaê`À„è/ã2q°î#6q°¿"ö×\ÅÁÖßTl·UÅÁ~¤¨ƒ‘¾Ù©«ƒc½†¶ÔÔÁšC›:XSˆ¦–Æ·âYByÿþÏÐa|ìßw{¥Ã² bt¸Ìdf¡ÃR¸Fáp¼iøä¾½Náðf2Š*ä®Ù«¬+†‘S|>Þ‡cn¯Ýë`pøDÒÛ¶ê,l¸\Ñ_º¢lVÒWQ)VGeÃ;’±¾öml“¾«¯ .Ž(I÷®ŽÖ’éΆ3¶¾#¸°áZíO·"6*|A$óßkJ„ë?ÆÓÊ V+¥ÂúÌ” ÖæêÝF…å~(ÖŽ¥PXï•Ba}Ø …-&PX/]¡0¶ c&w¼ Y °Å˜ ë*,o¾Qáˆa Éû+Ö.©T¸Ô¿Ä½^4µ“\w·Š6*Œ—ïîîØF…å V(¬}K¡°N °®%)Ög®PX_«…톶_(¬R¡p|=±xÛ  ëÍR(œQÏëºJz‰ª@a}A ë5(Ö![¡°µE pÆ¥V&Á˜°]3a10R&\Pù½Wû0&¬ý\™°Æ” ÃU3úl³´P& Ûî»ûNÖ%NeÂzS” Ç奫;n¶ß&œ¡ÐêŽâF…uøU*¬·3·¾…}¥ÍýC©0†îÒ+£õÃ"Æñ ëo*Öþ£TX›®T8R}¤½ÍG©0Ì:¶nÚnT̸ï]:P*,«ÝF…+7ëŒ ëc*¬¯±Pay ëQ …µO*Ö7G¡p¼ü¨¡wl‹j¸ž+ÌÛêa]D¡°>W…ÂÖÂ(‹—ºhÅ pFm_Þ{©PØ.½AauS( _£—L3( î‚ïÍY ÇGˆ£ASÂÑBÐÖ¶ÍM¡°žO¡°º¨)†-MÌôÛ8…ÂzÕ …õÚ gL«¶¶ÃÏ °vÂú ÛÙ„ ãEzË|Y¨pþ æÕ*-*¶¾ XöRéì6QŠ…3ÀL/x§Xç‹NÙ7*† ;&üÛ¢F,5LñeÁÂz[ Û5Ö{öbaœh?zE>ÃÂrÑŽ…1E9îf{äXX.ͱ°œÒ±°)XX®Î±°Ü2ÇÂ1iÅkó¤OXØ$XL.:Ii¼H±°ßÆÂÞØGRÚŒïµ)Fq­Ò*{9¶˜@aé÷…åÕu(ìG2VÉAaíû…ur(lK©°õ¥Âþ³/ÖWͨ°¾÷F…1•˜`<Çp† —Þ6ÃÂøÙ²_ÍΰðrNæÂ:Ìön%\Ø{«pa¿qÊ…í1+öKa.lï€aûQ%Ã2ä8¶û£håR¡8¦=4á¥DÙа~P ã5À~nW)hx 2^.…Ѱ~P [R4,ŸÆ ã 6\í7Û÷ÙI:ü§¬„Që· FÍ—kOÍ;K¡0ªç`gó+fQ(#-ìöÛMU*Œñ;¾Û½œ9SaŒØ¯íÂ[¹›©0²êcÇÔ tâ¬:úmŽ×ãºÉpR]¥Wñ*lBÁš3Æš@KîgSáÛØŸöƒB… k Fßy>GD,zjs5*\jIÖ:Õ7´½îù|c¢/K[›m›bX »Ú#Qߦ;„(†1 Œ|voŽ¢®_Þ³MÕ:"b1®ÝmV7èüõtHS ã§¡>Vïˆ ß"Ü ‚aY0Á°È¤Õ;1¸À¶4P¼#ÐÑk˜NŠw·-ž-eï?޽#pÜV¦Ç{Gø5°wÄVëSï=«cï\öBv‹ òŽÀQØuÑ’NñލÙÔ9üNÅ;Î×Û9Œ3Å;âñhÅ'Á° ËÕ;BÝ´Õ;m:¹%ñbá¿Éæ»Þs«ybåö“báDZyΑxsðxÝ#⣇ ˱Àa¿Y·–`ûo<ûG ¸3Ò¥fÞ!þv>ñ@ {ÆÒ;ÿôµ¼ÏVñðßdÿwG®ðŽÍêáDZ„Ÿ $¼d á!2À/bÕµuo‰¹ Ï[ÉöÄ@§ƒH ¹«ˆ„u1°¶ˆžvm531@ ›Óš»©HX× ¼ñrtªÌãÅT™ $ü¸×@Â` İlØ£HÄò²¯½à]†YöÄAÂÆ!q°!Q,$0ªÃ< ÉNÅBÂcl!á1¶@ìŒÏjcb!¶ÜWw'R ûT°…„nà ÔÅBb=<Ìò€/v§ÅDB¾ƒê!á?ÉÖñðãØC®\<$ìû)c »ÑùÃÎ&Ö·ÄDš'FvYb$a·CŒ$ìî‹“„ÿ&;I ÷Šq¢¯ºˆ“„½â$ãÐaîî,;߸ô•-1“@ b½¶x†~JØÉØOÂ~Pü$4çTC ä•G6Õb(a-K %ÍcXË4z/–z â(á—À–cK ûIv”°'×%¼}ì(bζf-Ž(G½Ö¹è…qXŒ©/K‹£„Ý*q”Д_%ìã Žþ›ì(a-Ž55Ý›YœJèLA %´F‹8J؉%<ÄŽºeT%jéÕmxÔ‹£„N×ÔQÂÎ'Ž:;QG 먣„έÔQÂî˜8J †W®-<Š¥„ÿ&[J R-´¸ï¢Ëë(±ÕàÝW’ÄQb£ìØÁð§àÃ#,µÐ%Î{xÌQbùYv”ÐÆš£Äò³ì(±ÉŽK]%–±«Ä$W œ²î³cur•ÐÙ±¹JèDØ\%ü*ÅUb ²«Äò³ì*á W‰%ȮҩÌTB;¿™Jøì)á÷N<%üq5O‰åö”ð‹O o¦xJøÏЧ„ßñ”ÐaÁ<%–s²§ÄÌšájƒØST šàTV?á:öÁíð- ažËº%Ë-pÞêÍy$Ëå8J¬1ÉråÅPº†øIøÓC ?㩉®ž‘ý$–`ÖT·Äg´©XÌOÂOyi®+=@ì$–_}üß¾• o߯„+¢{ªÜñƒ‘ıj&6³“ C½^ö^AN¤Âpªo›ìL* öòs,PØ&j …# ¸[Ý„mc•2a-ѤLÿá9±-æ¥"θ«ÓŸA´Â3ßè•ÛÙMƒ§Ò"fþåÀ‚krCáÈ÷¿Â·û›²X8cÞKUîX,|ânvhÕ Ÿ.¦‘úŠX8Ζ± £baœ.Aø4xñZœ{§“²¨…O¨cÓH§E. ¬ñÄ8ÙÜCE.œ#Mºfå3‘ ŸXfœ.Ò"Ž›‰[ýð±\8£–%aT¹pÆÚÓ~ucK‘ ã†d1Š˜:‹ŒTvËå>º”xÊ,Ð2Y)°\8c"Ó×V~åÂÕ!cÀk{E.M‰¯oôQ¹0ÐÑ>7‹\8cùlÇêÓb)¼WRuw• Ÿ(;«K‰\8žÍuŽl"¶÷ŠåÂx¢p{—T.ŒÃbØhþÌ"Ž›Rðê´Š€"Θ܎õx• çû©úWNåÂx¯éKÁjá Q3 nY.Œº³×ÔŠ\±sŠàD. dvM£È…côM¨ßÔÞ9‘ #¶Ý£,R“ Ç¿bwsw=¹pÁ´íµiD.\`m6sD.Ç=¹Þ¸ý‹aÜÈóš’G‘ Gì(c»ˆÊ…ã7adÓ|ÖT.Œžž¡ó~C¬F3óØ)£já‹Ü{X,ŠZàŸÍæ+ ra;©…ãfÂm«oÑfµpW}4ª Ù¢Ž“í([ÚÞ8Q ÇÉP_´ïƒµ0ŽÃƺ¶×]ÔÂxv†·]ߢÖo­ª…ý|¬.Ø1öôÚª.p³»»Ÿ˜ª…#](M.üÅx0b (´-á¢ÆÚ§áS~;x\Ò°Úµp\–ý‡å1«…ã¸hç,¾Æjáˆa×íID-\¿Ì…MQ XŒÑ”®$žº†8.ºÕUÓrãÁõõˆØkıXO& ͰIÄÂödD,ŒØ36‹©X'€™P3n`±p„"9Öý,Æ3C¡˜–~ˆZØ‚¨…ËÏ,Çjáz÷¶iÐËjá‚Ï|ŒÊÇ*.˜ÎEbQ #»E?k]¢¾ê„'¾GвZ}%å^#GÕÂa€/ja —ŸõZv—¦°‘jiM-ŒÞµÃ2_ÔÂhÅ–‡°¨…ã9•ƒ°/«…m0µ0z,Êž•fÍÀjat… à›eˆZ‰{Ìi ûbH±˜C}Q 냱0†Ãç^ÅÂÑE4ÜD,l/‹… 2æø¢6ƒ+ —Šíʱ°=R £•(AÔ¼ñE,l7EĘ !oì5äX,Œ¶¤†T,!CÄÞLVD,ÇÝû„á"Ž·3Æžíܯ ׇ¾MIˆ…ñú´3|1$ŒØ™çù+ŽÞûpT±°þ˜‰…1ì”±’bba=•‰… èÓЖ˜X¸`fããØ¶¬Ö·Ä´ÂïÓ6÷ú‹Vx9’µÂŇ•‰V¸@ž5VL+çÎZá õeŸžÎªVøÂ¸™ºï–i…/p”ÔíçL,|¡à )¦ÆØ¼AE×MY-Œ ÎÕ¼0D-A䣖¨…/Ø7ŸÇ,Çjá þÅ©ù®˜XøB!гOšL,|á{¿ÏÝõ"^Žd±ð…*1‡¸vfÂøçÈ+R+ã`bá P+wo G0ÆŸaf!Záx[$¦ q˜VøÂÜ?š•ºñ‚”˜ɺ§M‚h…/T)½¾»i…ÑØhP7_©ð…¬sïm“ £ë\gw54©°ßp‘ ãó-ls_• ãĬ½yÕ©Tßg8ZäóC‰¹ ¥`¢Ýå“‹ŽDáï—O˜TØ›#Ra‘^¶÷C”Â*,]gwD(g¼Q£<÷3²P}à†In/jÇBa¥>ùN¿ ÿ&Ãée]þ€†©¶UšCÅÑQZÖÐ0¾ÛØè¯l_Ïk =õF’5ú³y ã™änC¶x _¨Lö™Ä'ÒÝ[ÌFBEæ#!ëmØc8r Ô·owLÃøbÞ­XÀb1œK£ÐÌ…ã›}¡ŠPya¹{Hd¸—]ÍïW<$⢯hÃû¡Z<$⟑®\Xw›‡ï[1 Ùed"¼V q·%‰˜4Þ£zk…ÁÏ>¨Tx¯“çnú§Záâ}¢ÞQà.Í)Å´Â uAžµÎphX^µÂ;œÃ¯n©eZa髪Æoµž©V¶ HLVsáê^ Î×HL¤Â1vã²[ ³Týj‚´XHà¥9ŒqeÁˆ]˜¸¿;&ÖÓ©TØŽ©0N~¥^N”ÂñeÚÇÒˆ)…e-ɔ¥ðbç[¤Ôk™9ЍRØŽ{•Âp.Ùbê}.ÖÂUùŸƒó:×Bs»Qî]G2¥°ÆT),j" _gñÑÛgáºïcù}®…æ¢Ç\ìù^ê¨BaÙË`Báh :cþå/.F.Röa0!Ba½-"Æ$)?ݘE„Âv"Ö‡ Ba½: ›dV„Âxôy "V• …í7E(¬²9 «„L…²‚¿…i‹Ç"–ó]]NÁ$ K+D(,*0× “rÁtªS°jš\'L Ó «°Ïu¬{r0­Â›NXœë„I ä:a’^‰LX…e.–‹3™0m¸2™°^¸Ê„õF»N˜oŠë„§Îa‘ ³MeÂ"H1™ðõµ:v7Ç• 달2aDæµk¹LX”Ê&&Ñ´«„é‘-"a«HXÄ"f‘¹Š„uŠ„1+9GÝY ÛùŠæ®¼cGUº»FTºAHTºQAeº%DeÂÚ• ë• ÛoŠLØŽKZ ƒ÷ë¨LX7p¨NX7…¨NøDýë]U'ŒuQHNß Q…º ¥ …u3‚ …µ¹P¸ ¤Cy×YT)¬ßvU ëöU kÿR¥°^µ*…Ï™ÇüÅ•ÂútT)|bÆ{t7u• ³mŸ)…µ3ˆR~Ë;jù~ù‹ …õɨPX¯M…ÂÚF Çœ'Ý£² …±ž5—TT(Œ…¦¹¦Ba‹ºÛM~S„ÂHÄ®QY…ÂÑέËtL',{rM',ÕeºPXëѽntW°(ë\'L/ë„å³ï:áhÿ¥é£)hE'Œtªì¥¹˜ªL˜¯ÃUÂ)îÐXór•°_eÑ-oô&¸JØ–UÂbEi*á8._­Ì´‹„õU#l¿©akŒj„ãWŸ±4áa¼úW/Qíap]#,#®‹„½µ"Ö[ "a»¢¶X“ûµ‹DXj¹D8òÊ2V©]"l?«aA.öŸ‰°Ýo•Ky+—c¹.#¾áµµ"Öû¦ aÿUVÛC‰°ÅT#lw]EÂ~¤ˆ„%ar•°là\U”:¯2a’LØnÊ„íI«LØpÂjʹ|-:a}$éñ3}›©ð/?l_þ%~/¦Þ1eüÿ•Åñú…ãM_ñú§ÿþ῾<Ç}¾ˆùø5òç¦|íþ“'ëû•®ùï}ʘîàƒú^iûKüÿ4§Ž÷\Û8?þø}Ï„9®¿ýå;_½éïcoÎsRø}î;;þô]ÏýÞðÖåß?8ÿû‹¿üí¸â%ߕΫs a1Ç$;¡Iÿôc3.ÿÇ=þ/þƒóË¿~ù·ÿ#ô?|ù÷/?þëÿüã=S‚»~Œv×UWfÿà™Îo8S¤ãoQ¼§®ÅýÁ3?Ów#ÇÈüwï1m0þžc$_'ª`ŒéÄwû(TõŒ‘!·ófHð;µ‹(˜¡—´6‡bß·?œ4nŸgƒ³û³‘f|çgSÛ£ÏF›óáÙüõ‘¨Üª…8ÓÑUUœÜ¹V!_¢íK~Ç¡?=ÞéYTØ2æ‰ëIž~’ão;K]ŠÄ¶Nùÿ,§Ÿå¯œá<¡¹o á’kñŠ Ÿ ?þ§žà?~ùÏ_ýé·ß~úùçŸúµüö¿ÿsþMcø—?´ão2Ên1@õh ÀVûeébF÷Ä%t‘ŠÒÅÔ7§ ¥¨!±ÙæÄ¨Vté3Îk'{Ê{Ó.¡¢c®1­åÊQÀsj+bRgÅW›AÐÐ>ãÄ*g¾»|3ê˜Ñïû=mó`¤üô'(ÖYKÓÑÀW~ŸÜ‹{Õ ôS­Ðù (½`[o¢ÔxÉPÁúi±GÊ8§ÓxSJkCoËîÇX3ŽÖ>¹Óøi¼‰ÕyÜðý]l\‡ÏÜ« E»°¥«ûnJ1‘œÈR•%b9 ?€wïå{¼¿yúnJéÈñÙù†SàÖË#íØÃ;}7¡@€ èñž2rA–ŠÞ5xOßM !lvÚÚ‘'û K½—æºã¼ùnÚ-8 ;!Gƒ¢©Ñô÷%8P‹cønÞ»÷í´5(^ÈäÁU\»ï¦·öùÚjŽÌj1Ðò~M}P¶šܵªU”¨Á§Êµ( ¤iWÒÁvÈvkÀ×èÊè q ׫µÝÓÉvÈqkª ñl Šñá]«õñÙÛÚ³J™í­$æ]ÙnMºØùúzeoꋽÎõvêËxÛÞ[ûò }þÑëní˜P ;äø‘hLÛ8Ucw¢¾,Í9wõCŽ'²7 *‹`“IïËR‹'‚‰ ‘Åa?‚¨zN}9†ó«-2× "û9±1ú2Ýö4×Ͻî!åJJ2œqš}ôeÿÙ› ‘±pkÚÔcO]ùƾŽãxö|^?d{9ˆ2úò ¥ÌÖ‹ËaÓ$ù!ß•oP¾AˆVG_¶gAòC¶¦ä æ°,u pc?d{Ísf?dT^KرÓÎYØÙ/åb?d¿ û!Ç9 ôm@Ï7û!Kµ,R"?ä¸w|Öžö«ØKxϾLvìé"?ä"Ž„s=H~È6Ô=@<.s[±ÅŒÇåë¾Ï&`Ú±O#ñ¸,olÉ쇌õØ‚·o76Øð¸,o^)쇬æu/F¡¾w'ž^oíÍ~Èxhn{`ð/àq™ŠýT ¶üZÌãî9*Û]«ÊõÑ“íÆ@öÏ£²ôǘKQ1Ë! Ï”``ñ+™¬1ò-Ž~ oºÛæ™)#vGÂÓ ºF°¨q±|ì¯kfÊËàpÝl]l].‚Éòí¹* bÞ{£š ö î}fÊVj¯Ž<&K–ÁGÆdnθ0ˆ}~ïs¦ÊË€}gÖ%[o¼ËL•­hÐ^m=¸þÒ}â>¯£ï9&‹¤y¯vÏ\I>ù¨É9e)C¶×Ò³­×,9·× ßsP¶³¡Æ4Wazöáj;Ô™+¯?›´Ì\>œ®yP–.PMÙG_–úS5H¹²˜Ûë̵˜4xq®ìç¼µÚÕ+Ûkb=e®Sc\íµ¸"yÔ³%|UÌÏ1YÊÊ옱H½*g´W‰þ“µn@U°Gý,¶¿Ëà£+K᥽Î娢ž+ÔÉåI]™JKí¯ž—º2÷¹ª8f‹z½·¿£*?Ø—Á©²ÔÏikVt ¢BM•©<ÓŽýR9dŠúkŒ2ekåž´p'àÇ~r¦¬.û¿[8È‚2eÿÙ¢eðäžÆÄŽ2e-÷Ã2åYª#ò`N”uÄ|áD™ç4 63Q¶¦;'Ê\6`ÇþÊ“íî‰ód PV ‡PQÐ'Ê“µ‚R8„+&`¯åÉÞ K ‡pý à0Ê“ýÈG ‡Èƒ>N”íµ‹Ù]«‹'?íœ(ÛÙbê&…Cäl)q¢l¯MŸß“˜×Q¢¬…1Ž”µpˆ :©p¢¬…1â3«…C¸HÅ‘n-"+ÝZ8Dú@z¬pÕ]‰3HáyAÎ]ªäÑX9Õ á10‚R&Ož×™8OÖ:7qçÉu.M'Á¬uC¤ÓÅÌMê†xPê†È[Ó:­"·àÖ<™“W]кrû¿¨›ú2•ç|ôÚ¿m;`¯&ýWv¯Ž7 ,ªî4^Wæ ž®^P¯ÂE€54?µü{Û¼©Àúyë¬Î€5ìË"i/gîÁ ¬!:ÜZ±zÖÐ a²5ÉëFïš[ŸXÇ@t ýHÖàKÕd`Áüúÿ®Àšiÿ_ XØoŸÜ>A` .ȲŽç°>Ð ãÖöµë%ÈÄâ.;Ü[¦-Ä:‚é-ÌòægB¬ÁߎY"]‰5~³±wS•ëæw1Ûîu!ÖK‘õÒ FÖÜ1ZöICCÖþÏ‚¬Üò>¦½‚¬ã#öÄ×¼ Y#ˆEŸ‹ ²†œ.>cÎȺ~±É0ïõí£G*FÖõÈ|ÚŽGEÖÕº¤¿ÏÓY#˜ñ™i>¬×³®GªOâˆY¯7h2ëå0³®W‰!¶MaY/1"ÖKc˜X#ˆcFÉÄÁt {G%ÖõÈüŒõÖˆí0Óº;<&`]ƒñ¬zå*Ö5¸aèéWBÀº.ìaWhûz °®ÁøO»·D—‡Ù! ¬k6†š][nAÖ5XñV4û™Ëˆ~$kãÝé“Xã7ø¢œ­S1°^®õÚ Ö¶æ)¼úM:¯<Ñ;ñj/øD–¶À¼º‡¡àêå8ÂÕõ`go›j ®^îãêõHÂÕæöy{ƒ„«k¶Dm–*¸z=’p5‚у†±àêĆâöî0­F 5Ï»ñ¯Ðjã^tw[ÕëU­®mÅ÷6ƒm´º6ä:çZ'Óê5H´z ­^®iõrg˜V¯G­®Aø¤µÜ^hõz$Ñêœ>•J«ëMÅf›–Û ­®Á騩´ºSžÁL«ÃøÕKÌ1­®±ëû,„V N 6æùè5Z”V/WÉ´z ­^.„iõ§à”`/A¢ÕõRP'÷l]’iõzD«×#‰V×›©A›½šY»ÝL«—»Æ´ónà>AZ½tV¦ÕK—cZí¯*ÓêzÊgïÊW×`$}N(¸Á;xŸ×®^ïÁÍyò|xHÞ¦çªàêåæ®^Žc\½ÜXÆÕŸ‚”'{0qžÁæ„ôOΓíæ1®^,œ'ŸØÎÓ«×(®þ¤<Ùƒ7çɰfÞzM8ÅÕ5ˆ6¿m¸zýçódìêÛ†ã—àêå W¯ÁÄy²]ãê¥×1®^Ìœ'ûlë{WW¯Á‹ódÞœ'ûMx8O¶[N¼Úd^X$~÷ Ì«×N”=˜8S>±'½À›î žœ)û9OΔýÈÌ™²¾z¬ý“&ÀÚ»—ëõRnN•õÎ °^‚m']ëåd ¬—[Ãĺ#ÑéuB¬×sžœ*{0sª|"‰íõ•X¯ç,œ*{ðâ\Ž1÷=Ð"ëõœçÊ'¾˜ÛÀ|„¬kl»ÇŽ+FÖËed½ÜYfÖËýaf½\%3ë5˜9Yö Éœ,û‘…“e»?̬ן½9[ö#N—×àó<°—Y/gcfí-aÖËÙ˜Y/’™5‚çuw+IeÖ5²st~œ9]¶»ÊÌzmÐÅé²_çÍé²ysºìG>œ.Û]%f½ÆvJ—=vpºl7¡õLœ.ûÏžœ.Ûmgh½ §ËkÒe{` ­×àÍé²YêÁt_c¥@ëå™äeüü¶-‹ÖÃÎÕÖð)¯u襁FÒ-¸?È­á¹Ò'z •³;½F~Ϧ‹›^Ç÷ ŽŸz‹Ø:º1üߟfÛ©bëç+I³LkÊ#OW™Ô:fïÛ5ôL&µÆ+wѯ¹øƒcÝàáD®-¦ä:>Jwü±™a¹Æ ‚â£á5!×DÞSG(äóNSô(äŽ[0ÿïz!×ê:hä:‚ñͺcr­çqC†ôDÈ5Ž„˜¡ôKar½ãù”©Úr ú:Hkä:þyËç=(™kï<)B®+>†ÎAɵ7Eȵ7EÈ5~6C‘ äz9'“kü }r!äz 2¹†MÀµ_cº+äfm÷tÑTr½cþp¥:!×Ë)‰\õت\J®ŒKìß!E×ˑ̮w˜yœcARÙõŽþyÍ(ÅØ5Ž„¹cjóa×~%¯wxî—½‰^/­ex#!_ÍygF¾´–á5Z ùêÑTáõ¢õŒee…׸¬ëô¹“ÀkƯqdÝ5Ñæk¯‘dopámðAà5~UJúìRàµídQx#{,X+¼^Ždz¥m|Iè5îPzÆj¶Òk±vÚ‘ãk¿LÆ×~ó_û3|í]Gð5^-,|ä~JÆ×Ë‘Œ¯wøYìÕñ«gFŽÁ& “^¥×¶EHñõóãÒöÜÚÃøÚ{«ðk46áûÓ‰9ókøDBǾµ­5_£Óm˜uvtËüÚoªðk=„_û‹#üÚ_Vá×>´¿Fƒ°­ e†Ê¯—s2¿Æ ö‚ï9õLÈýÞ¿Æ-!ª7~íσù5N §Þ˜_{s„_û…_ûm~ `{KÈ•_ãœØTš_øµßá×Ë•0¿ö^'üz 2¿^‚̯—Ka~íÃøµßRá×þ’ ¿öÏŽðkïU¯ý– ¿ö»&Û_-ØܰAfë—»šÁÞGweQ€½4ˆörNØË‘°=ÆÛ_IØ~Û`>2*sÂ#Û¿”°}È€í]N¶º`/×É{9’öd€íݼl?›l¿Øþ`û-€½ü,l³`/‘5]ÖsK—ùó#{9’¶ƒ°ñaWÎÞÑ÷#é27V¶e< °mÐQ€m)˜ì%xjºÌ·@¶ j °—`Ñt™Ç-Øöv)À¶Û®{ií£é2÷ô°ýd°mìU€m™¢l<+àù!œNš.kð´t™¤ì½:–Ý1Nör) °mTR€m‰­ìåg`Û›'ž×>Q3lÿQØ~Û`û…À^~–ö¯±ml]U€½×dDà °m:¡{¹LØËý¹5_æQB¶})`ÛÜöòÏ»æËr6ØÞNذ«h(üÚcŒ¯}d|íJðµu‚¯mª¥øz9'ãk·_ûKÀøÚd|ío³àkï­‚¯qäùLí¸àë½fS’.øÚ&ùНý¾ºÓ”ë ¾6ì¤øÚßÁ×ËÍc|íAÁ×þ4óò½úV|= sïñßÿ)vM"Ε]ƒ8?Ó>PÙ5ü=£‘OwavÃjɰ yØevJ0§ì¥£Ô* ºÆ&6£×ªíŽ­Ÿ‹U?RÃ×þ³—&äü6~­°Pùµ Н‘µ¡zy·B¯ßšÝ€ÓèµÍ©”^Ç÷>Ù©A ¥×p EÑ¥¶w\é5&S{¯ìô¬åÚ»]¨Ñkõœe—Œ^_ß÷8}¢×(ƒ_m^J¯a? O¼É»{ Þ ¯ã³ !ó[þÕá5V=ž»Zýe×q‡±!do;g^K5‡×(€¹wE k­¦â>!ñƒŽ3µ­ì‚®qd4tø–¹F5Î}lwPrº)û,, ­ö[#´ZKF­öwMhµÓB«—s2­v»/¡Õ[­Ý¿»Š0­¶ °z¹A«í8fÕþø…U{_Ví×(¬G–YŽSYµßaÕ6" ªÞ°z3<P5ºÅwûT ªö.'¨z¹Œª—ËdTí7¨OðyƦ:EÕ~_Uû7JPµ_» jœBžaÓÁ¨OÿœuSUãg¡tiÝMHµ cª—¶2¨ö»- Z˨öa•Aµÿ*ƒj„ªýPíÍPíW© Úò Õ1G+ØÊWVcšU¢"ù•>j)µg z¹ ª½¨^ŽdPíG6Pí££€jT¸+ÅPí½U@µ¿!ªý}P½!šåŠT[wNí]@8µ½‚©—¶2¦ö™R/g|$7ækJm)€Rj{ˆJ©µ5 ©=vjClî‚ʨíú•Q/G2£¶Kµ¥Ê¨­ó(£^~–µù2jo¦0j»3‚¨í*¢^Ε,=æ§(ˆzùYFÔö**¢Æ•ר”­ˆz9'#j¿m‚¨—#Ëi D­¥,Qû#DíW)ˆz92iŠ,·@µTÝ4B)Wd‰÷¹Ï›Gd®PKS#ÔKc™P{ÏBíe@íWÙµÿžj¿§¨mäT@í× „zùYFÔK;Qãg¡ÉÍŠ3¢^.…µwHAÔKƒQû[)ˆÚ>Y‚¨½=Œ¨ýæ ¢öRõrFFÔh+6\{“_3¢Æ«…JßÍUõr΢Y²¼ê‚¨í“®ˆÚ&AÔË9Q{?DíGæÝûƷق줰þSNÖñ6< Dù'gÊŽÒ Û»3ˆ™O'\ô9`¡òéÁ™…/AæÓKùôd>½™OÎ<| Ÿ^ble-õ„ÿ”•µhv/k0^€‚.„/ë+ÒÇmZc¨—µ]‹—5JLTúkDX¼¬ãÈŒïEóàS/ëÞÑwKé1Jåʮ{YÇx§û³¼Z+¡N/k--g^ÖZ±Î¼¬¹ ª[YKýW·²æJ£N¨¥.¨jxH¥iµ „Œäš{eQ[u=CÔdD­EïQKacÔR Ö5T5Fbz*`í´ÕZ²Ö5ê G¾7”QKicgÔf箌Úz¹Bêx$¨Ã×l*”QÃI.‘‡@j)jí:zù†bpÝv[(µÔ¼”ÚÞ(¥ÔL-5tSc—DdgÝQY9õõˆ¡›´(¦†©]š–BŠ©4ži£˜ÚîŒbj)ìœZ*;¨¾êc‹þùÁDË–©¶Æ ©¶UIµÔ`uT Ï96‘(ªöS ªö#…Uû9…UKÙsgÕv”Uûu «ö#…Uû9V{°Áê:ˆãø` ²U‚0œVÛsTX½VË‚ŒÂj#. «=˜5¦°ÚÖyVs‚o°Ú³Òj>’‰K!¸á¾Ïn-´ÚîÒjëJ«­»*­¶¦*­–‚ᎫýgWÇUæ4ÇWÇeÆ=ˆ÷êc½o«D΃âjkmÅÕ6”)®Ž§}_ÓäKqµ­N«1^ŒŠ J«í”V¯Á[e ®¶KW^mã®òjíUŠ«mœS\m_ÁÕvcWÛu(®¶¦*®^ƒ’&ëÏ&M“åY ®ö{'¸zCa†ãŽqà®^Ž\í \í—"¸Z;—Òj;e§ÕzˆÂj;—Âj»>…Õ~2ÕvJaÕq#ªOæþÁĆÂj?R`µR`µ_¥ÀjØÎûØiª´ÚOÉ´Zó+¥ÕzœÑj ­Žàq ‹@ÃÕ~¤ñja*Ê«eùpÁÕ‘A?GéôÜp5Ó8¥ÕÞœ[³d~”F«ý*…V[°ÑjÿW¡Õð›¡6wgÁÕ~äñûY²áj»ÅÕþ³‚«ãgŸêæø Wû9WÇ_R‰†«ý:WûÏ ®¶ ®¶W¯1J“­­Š«ýŒIÓd¹yŠ«1ÃBÔxµ7ˆyõrNæÕKPxµ2ãÕ~¤ðê5ø0ºŸíÀ:²ç'rÑôXÛ*°¶[®ÀÚîk?R€µŸS€µ_„k?R€µõWÖ1=½R̬#ïHy[]¬—+^í1æÕ^mס¼ÚƒIÓäc‰&”5^­StãÕ^mw]yµ)¼ÚƒÂ«ý&¯¶¤¼ZiãË«å=þV^½}¯†Õ0\Éñé^õÔ'ê¾ì}O­é©cƒçÒ-=DOýàF—®40=u$×3êT™žZ7 ™ž:fO‘#öð*§>jÒmm-*a5âꥀ"ãjHµò¢&EŠ«*®>¿žºÁ0ü 4;ZÊ´àSZ}|­Û)SÇ«ø¬Äc…Vo‘ÞÝcEXõRú‰Yu­ý“P.d<šŠÉX•&†ÕµPÕóЮ¨PX ^ŒòhýJn.&TƒGñ©nÿ÷F¾“Q·ÕÈžk5ëøÍ7¸s1™¸q¢/7t|p1™xR¸5}K­^Œ; [î?{r1«}Ÿ?{@VXÚVm-¼˜`š\òÕV /zP*/âº`øÿ "MÅdv$×qs›Å³T^<à™yæ«×—äÊ‹ žÒ1d4%¾W^Là?}¦•í*µò"nÖ97håŸʄ­ ÙjåÅ„ÏãîSåŶÊxŠtKåEÿY©¼_z„›¹‘U^´²IZyÑïT^ôë|+/F?‚Nç9V/ú¤VE5ŽÙ.” )OR%™Š¤Æö-¹xÀMzîˆð’‹ÇµOk"-¹xÀ{îÂÐ’‹ÖýµäbÂ7qÔºµ’‹”’‹“£+´9³–\ôûÆ%\ÓãÓ» Í%lRîË1&-$ÍØ1¯ýW¥â¢Ô²‚‹Z*Æ .ÆsCÛºYƒ\ÔZ(VpQK¬hÁÅšª§!—‚‹Hˆî±QKë-ÆãúíÞÍVnQë"J¹EL¦®{¬9h¹E¼ºÇ>Öã´Ü¢½ZnѾžZnz”JîWR´ŒÌqsƒ¿–[Œ!õÉ}›U[Ôš7Vm1ãÑM{„ZmQï§[´˜ÔZÄEEšYJ-ZáS-µ_Å û-µhwLk-F0ÿaO Åm´Ñb‹qlÔyº…‡[´/”[ènz·/€[´ÌQŠ-ê'µ-$¥íh©Åèy;Lá!·~x,–$OK-Ú ÐR‹ÑÛcþ5¬“´Ô¢•$ÖR‹ ÕHæFD-µ÷[J{UT/O-µ˜P©c‹·ªÝ)µh×ÙK-&ìŒÅÒx0K©E»B-µhõoµÔb °1ÙmS.«´h× •½=RiÑ~U -&8äàM#Á/ÑÚ•Q/y¡ÖY´BÅZgѲ )´¨¹ŠZL¨þ±õ)—ZL()“ð¯oP -ê·x)´(õǼÐ"—š´J‹Zÿ×*-êͳJ‹ñ`ã{нŬҢN:¬Òb Ý_NŸ*-jO¶J‹x}Q{ªÙ*-âsÂI­%ZiQkãZ¥EMï­Ô"æÊÈo¶Æš¥Ôb<Èóû”­Ô¢æÝVjÑÏ)¥ãœ1—m¸ZS†yÿì¥Eɹœ¹•Z#FYÄgõýð·@k-ÚeJ­EMõ­Ö¢Ý<-¶hÍÑb‹*®ÄTþlÍ9µ(yÙêÛýƤ֢NZ¬Ö¢ŸRj-ÆËìdÊj-êìÂj-Ú“ÖZ‹ö0µÖ¢³×ZÄâXm?–ÔZ‹Ç×w³ê¨¦( òŽ=ÞÃ_ZJ-zeC)µhW¯¥b€|ÒʨësÜÁûÚ0¨¥­·j©E¿L)µèA)µh}GJ-ZW–R‹*$¥îDlµ!wß«¨ziŽÖZÀж4¡‡ÕZ´·Yk-Ú룵Ô‡‰Ý>öÃòôH­µx hSÌ£öÖ ©µè7Oj-Æ9s_¬¶Õ>xk-JÇù6Hý·ÚVS±½UKїŵ:ºæÕï̵z‹|µÆ?’jwŸR/ä1ºƒ‘jh<Ó(ϼ8°i‘9DëPÓ´u$sþˆçví㉛óGŒæH)ÎôUëhWVËÖpEÕÊ¿U›VYPµéQ•U#Ã.³|²j¸°½…Áß «™@¹›DX…Õg¼Çpc7aµAwV't½ÑEŒUÇØ[¢©=;Tau$ »øVãj‘íuä¦ÂêxÄ7f¾7©°:¹öøÝÝC”ÕñN WßêšµFz‹»ú…U+7Qeu|•bŠQ±ô IF¢7Ę46*ªÆº ú\w TMîÏ1öx«²:n¬²»M…¢êè‘•]÷Åýc¹&ÕØ µšÔÕlÌx>‚jC ªµ@µêè 9ºjßx  [‡ïi ¯ ›ßPjý§Ž¤»Zz]QåÔv•‚©±‹³ #ÃÔ¶ £˜Úƒ/¦±:·¾UÈ0µÝ…ÕZÍ×`µ­(­¾1ÂL;|¥Õ¶¥´ÚÏ)´{9êWv)7<¬´:ž\!{ñX¥Õšú­ÖôOi5Há=¼¡œVËóWZ}}EÖ2 €*­6ʧ´:²äxSÇ¥ÕvÛ•VK¢n´:ÒÃN¯9ª´ û dýW…VkYø…V_دÐwB(­ŽÔ/DwþwZ¹Ë=¶,tZ­Å§Vc'CtÉþµRZmà\iµCp¡ÕÖ¯Wtì‚ú^jÿÛ1tV÷6D–>›ðþåû¶ æEu—K{àíoßù.¼÷þ}þý/~ý>Ï¡5àýów=»ïí hùЂ÷7ù?Z‚Ò§Ø/‘|Ë"á*п—‚Ï\4êŸ~l¾øÿ¸ÇÿÅð}~ùñ×/ÿöÿbÄþ‡/ÿþåÇýáŸü£gºê’H| bòòÇÏt~Ù"5ÿgJp›üÃg*~¦ï2fŽ‘úïÞgÚàü]ÇL¾ÎDÛξÛGª&cå)K3$øÚg=ŠxKs(öûÃN+™ßåÙàìþl¤ßùÙÔöè³Ñæ|x6`$¯Dž‰ (p[ŒÉþ«Ø2m_ò;ýùñNÎr¼"ß Å…–“<ý$Çßv–ÆcópÐß?Ëégù+g(‘ÿƒßñ-*¹Tk¾ÓH;þ§žà?~ùÏ_ýé·ß~úùçŸúµüvÿÏÿþMcø—?¶W1aO[Üã¦ã(Ð/Þ1\ºø6o)æM5‘°Ø¿çY üȨY´¿±,•°x»a‚bàȽ´Éñ¹‹ºT;ò¨OU2?o…N*ÀžX{¼S³„_z©ÖQµmm áÀÜ/ót°ëæîŒàOZÃ$¡r‚p€×oîãÀÖÓ7v|EŒ±Óº´Ë² ˆ´×Ù)íHô„ 4,QÝo,Îrç6œb8¦¦½£…êŽlm÷C‚ÐæÉmfß_ãnn}7íÌþo( ö±÷ÁúŠbž}ØÃç-A80f˼Ë1Œç¤Ñ}Þ!鉞Û`jÙdJQ/…Hº ŽÁ†:<˜vg&{7º®¡·HÈÞÙ¿•XvßÃ‘Ž“hñU`4›îÜfŸfÜçËâ¸ý¹ÆŽ—e.BDì…ÛÓƒñPÛ| í1Ë}¥ƒ§íW]_Œ)^±˜¶ŸmòU<Þ5¦' E°ÄSëÊßoÇ+7°sñîÞͽ¡IJp~j/@¶Åܺqï‹ÞÈt½¾öxfÑœ”ö&¶~G{Ž—ãÕé¤tÒšH|ñÅ9WUR<ñöÖÕ QßS…à\p+Xæ˜þ­)¼K­#G0ñp‰<™UA–á)cå×ã8[?.ð¡N)=“{ØfMc Äîè¼ÓÅßóÓÆdŠq–¹ž†>Wö§¯(&´{;gWÖàM«´Þç0\ì}PFoÝâ5nè;d’x?±-R6êÊø”·…ʸ/Í VŸËÓ×7Ò…„+Ï®??t—ε˜8ò@’ß ~ºªÖ²wåŒÏTîkJé:i¯ Ðæ»ú±>чe4KŽýËtZ<.(aY·ì=X×ußΜ z΄/îXƒZd‹q°­›ÄØþõ¼Æ¸ŒjççÝÖÒõLŽ_P”/¾!mq|ká5Ø{s4ººZÁ¹:TP/¾É¹¹C†Ò{3rŒ±íT Vÿ5žE_o‰¯ZÝûr‚V§ô-ðñ¡uìnijO|ö1,G ›f<)XNYP¯÷å¸u÷MRé¾hAa BÉ5†e¿3¬ä‘‘™”û]<¤•‘z_Æ,ï$q´êWÉh™2´D5jù×sкz8wŠa|FªŒÖžÍZZ÷°`Ê¿¾’΄u¦‘*œcÒ{Û±V22e|*û]ƒ1RÝ4,˰¡ÎH•ñ)ùªóªÄ’,Ëz$/ÉbÈÄÝivã bÝž*GìŠ{Óë ÆE+ýÆ´æ8ßÏÖ¹¡Èá˜å*›òœ1^Êèhé F¢w¼ÇÄ€]]íßÉ’"‰éçÇDäë1ReË"¢ý´’‡÷1c^óöïž+£ F^ºÚ…Ö‡#XÎ8ìhgŒÄhäÊñˆzÜû7i¬Cž(©2råøÕ¸ݹ±©fñæÄ´®ŒT¹~¦`AýÖWŒɤpï°ÊÛ<=NlÉ2{Å7´mf‰O4­,â*¯sLP¼G²ŒÆî·ÖÖŠÇÏìÉñ)Ê}›Á‰‚£#Y.œEk±Wƒ…´Ùz|¹îvƒvŽQÙºA§Àý ûW¯Wž—‡ëìʾp÷›ÚÇp ·ð«ûû™ŒÔ;>~çìÆ7ÖrK{Ì1©›Ëœb§äÓnkõ::g?Ö`]XnËÙò’1eDgl/ÍÁ²eܳöá}ƒPK!™/ O‰½¿Ñ b$ʳ Ë[MÁÌ’÷¯Us¼&1çQ=ÄzÆ7?2ÖV“ó8_÷™#{cO]ÔI>/*À2ã5sdHÜAZˆ9Ýôàî¡ZöYÚ‘‘³Íyƒb%NÙF˜ÓM•˜uƒˆ=3EÞPI;Þ«ÞXÚŸ“*¦]ĉ“Œy òªºy‚ýÒÞðA;gW–1¢¼ïLdðw«ºômû{‰ò¿¶MQ)u”ª¼õWLO ¨¸ ¯Š©@‡­¯Ä(§>@€®^gS15öñ\yÔ|SL]cÇT‚©:«ö‘Š©Á#ûíFÛŠ©áˆ-{‡ÍŒ©¨­ò0¶L ŠCï°.L}Ô¯Xšô‚9u­ýŽÊ†mã‹pjÏê.y=8u]9ÀŒ¨sæÔîo¶]1ª!ã{°Wøè?K ºjü`¹ÒŠª¨FðBÂ×áƒj3f M¾Í ú¨;ÁÎ^D85b1‰›Õ&ÚÌ©ëØÚñÚ™S#ˆËï{N;¨®"F”·ì“Õ?ÂÄq@Õ5ˆ&‡R ¶hòE¤º6å"Ϩº¶'fíc1‚QõÚX¶ìD{Ò>£êÚ@–~ãU#x·6áš êÄHÜL¥V#sÝ >V#£àÈ#V×`ŒT]4,°z ¬FpCŠÖ‘ Ãj¨Yã¿›P‘au ¾d®] ÁêÄÈ}®¬z=`5‚wôôn<ݺÆ!ŸmϰAì8†au "Wº;‘%X½›H,†Z¤|Mô-°ºSДYuÅ×£+)…U¯ÁɪËø¨7ë$aÕ5x_1«®A(l›ªQXuÕ$#lh‹Quûä^Œª_1ó>)£êåHFÕŸ‚sgyòˆÎ¶U¯A.q±‰U×n…´§mVý)8Xõ$X½tH†ÕKdX]ûêƒtvJ°zé­L«—É´zé–L«×àK«—^Ç´úSpÐê¥Û1­^:,Ñê5F´Ú»$Ãê¥×1¬þ°z >ìé`}‡aõ$X½ V× Òû&GX]ƒ [ßá“Yõ#T½þ(¡êj@¢=Û¶¢êÄ„«éÆU¯Gª^:£ê58Q5b;œ`;ãfTý)8Íá<‡Û¡jÄ0›œ¿Éfý_‰T¯Av%ø|v”7L^n`R½ÆˆT×àÓ…¾fͤz=’Hõz$×rBÚôLƒêå@Õõ@ˆiÎlª× êOÁgæÊHãb*ÚÜ×T¯G¨^$R½‰T× J²È©^ƒDª× ×dXÎ9I5b7æÍ©±hÕþ£ª— T×l½ÛzLÕë¿'6™‰ XKé¿H zm êõg'¨öÁA@õ$Ríß!ÕkHu}ÇQ•ª-r ª^‚Œª× ¡jU¯ABÕŸ‚÷Cƒ2¿²‚ªýÍT½ U¯ABÕŸ‚Ó/EG!ÕKŒLxü7Vû+ °ÚßfÕë‘m#ÿ3Áê5ÈÅ — ÁêOÁQëg‰°^^Ök€µ¿ÈÌ«?ÄΔʇ TK¼ÚóáÕ˛μÚOɸ±˜*ŽugÁÕŸ‚÷Ac² W¯AÂÕkðbï+t†xM¢%¸úSp–Y‚W/1æÕkxu ¢FÛ_'ÀúSð¾>DÀz ±®»BQ‘¤ùa ±^œÄz=ˆu ¢þG['b½þ*ëõH"ÖË9™X/_F֞Р²^–õ$d½´–‘õ§à,¼² Y¯ABÖkõ2L0²^>²Ì¬×#©X¯Ç˜Y¯AÖ÷/AbÖËhX–S}Û¾Å?í±gëxÏë]„]á5¶h¬ó³Mõ±ˆ¬ã3ðÖBþ@¯#9Û9˜žÐë®/il UzéÅ‘®0V‘5HSÞ»¿˜ÒëýuâYÇçåÕ쥜ôç€v¤b)½FiJX.6£(¥×¨·•«›Ë(½F^‡ŽœŽôÚjB+½¶BÓJ¯k* 5`Ó ½®O(®³Ùï(½¶*4J¯7ì%Þ'ždz ³¾d–‚¯aówœcú(ôÚz½ÁT‰TP^Ú/Ú8DÆB¯qPsl³9¡×°¶HSV¡ôÚEøL¯7,‚Ãҩǘ^WD¬÷_ezmæ}J¯1IS¡ô†ÖÆš‚Jèµ7Hè5Þî‡KB¯1±¹H(&ôzÑ4'‰B¯7l5/…¼ÖïÊ®=Æèºº‚ÄMi†/Š®­½¢k«Ò®ìÚ 1)»¶Òç®ñÚ¤#ví »Þ@Ü;”][óή1<¬¿fvíMxm^kMOa×jÞ©è„}F_Mtý!8Ñ5®#w߆pZív9#“k«f¨äÚ*¸+¹ö±SȵUSr ïÎ ;f¶vN)Î\ûûÖ={•\û›#ä/ä šÜÚßUáÖ0Ôɤn]?Ó[ê®èÊ­}0cn J¢Ýé“pë ö{ŒŠ$3·Fð:Fú§ÜÚGäέÍ,U¹µ)­‘Nl$n œ‚ø3·Æ—ã"!½pkU\ãx•¹„)à&±ê›Áõ ö5—)\ûÇSÀµïþpí­p}ÝGž+‘®1bÓT_TrÖfT mI‚ k“…\oHÍi/†kŒ¬‘Žö…j×HKvÚý!àÚÓמÐ0¸ÆW¯@_áp ÒÓü[ûãpm– \#IM´qAÀ5†ÿøŒCøÅN×®]Àµ\û}p½´‡Áõr$ƒksbVpà YwS ¹F0Ï ®Ñž‡v}¸öÆ ¸Æ-€“|ó’Tpípí7OÀµ76kQÎHW¢·¯‹pkþ­—Ëdn½\¦ÕŽÖÝÜK¸µ]ˆ‚k¸a>ô ¡„[ÛãRp_…ýXÓ£ pÏC¹^-AÁµ}\#xNÑ¿‚kû¸¶b” ®-1WpÆ='ág×ö‰Tp¯Ëuº*Ü kš»3”[›û·rëÞ÷5ê•[ÇÍ*ûܦ£ÜÓ¤L:RáÖöuQnÏ@ž{Š”[ëx­ØÚ>hŠ­ñý¸çn4ÅÖæ<40Ê­íã"àÚc®íû¡àƒò}M· ®m¨ïàZ¿9Ê­íˡܯê¶]Ý_¹õÒLâÖhȆ—'}àÖ6:(·¶AG¹µ½ä­u$Snmã£rk„[£#端à+¶†]cĸKž™[[s[û#ØÚzÅÖ6v*¶öû*Ø·{ýÛ.hÅÖ6˜+¶ö+!j/ú† P©µ åJ­íI ´ö;Сµ}¯Zû2³ö',ÌÚ;3k¿i¬1:os_¨2kÿV ³ö¶2²ö»&ÈÁè ]X©ÈÚÒ+EÖ~OY[a EÖ–*²^Žddí£‘ kËËYãg·¹±T‘õ†sĵõ1²Æ‘À°=1AÖè£v=£ ë­Vh"QEÖ6Pdí7Hµßƒ’|ÿVd=k—ïñßÿ)^M  ¯ÞËLþ²ðꘗæ2*â ¯Þ ò| mQ^½¡ÄZ:»¡«™‚`§0mLVS›à*¯¶É¶›‚¤·¹‘åÕ‘žsCžñjw ^éÓTÕ›+ˆÔÙ0S­l¦ 7v'çy¤˜‚hm!sñ »‚XQpuAM$Z÷'WsèWÔK@78WZíuÔÄ B«)ˆùzWøB'ø€¶iˆ¸‚ÔB?Ø“ô¬¸ÁÑþb¸zm(Ù‚àW6GÜÃúƒÌÉã¶½(ïƒ/ˆ„P_s.V_«ë ¾ V†A}A¬ð‹:ƒøÏŠ3ȃRQs«‹:ƒ<5™=§‡ðjìx‚±|[cbVæ¦-#Ö˜ò¢DþD¬÷˜ έ™F¬±YŸŒçƒÚsºèHÛÕY·k „X__Ï€&%Tb)øÜÓiÄZ+±®•›ö^¦qk˜ƒ_ÓrI‰µÖ?2bí´ç[‰µÝAÖBV•Xk&#ÖkˆµÝQEÖ¸VÚG®ÌÚ–‰”YÛ]Sf ZJ[Å•Y{Иµ,è8³F¡æ^§SAªï|\Ê6Á4ÙEk¡uA0¥áŸ%Î VqTA¬ˆ‡:ƒX‘dq±Jj ‚@ÛÜò¦Ö øl¡¶ÚÕ¾Ýb R«žÍ ƒÃh朻 ÕäCpZƒ<È»P+¥ ­mðh­e¾ Zc"€Do§@ë •!ã’Ú¨£ÐÚ>é ­m¼Rh $Ï ­ëèzÎ퀭±Ëpnn6h¢tx”=DÈ„vî{6dm½çŽoCÖx—ælcÖ 7Ûbˆ2ë[7ôó4€6Aë8rŸ ʬ±cŽ"ýW…Yû¯2³¶U_e݄֨ô¿ L ­£ën*´ @ë;å°«I±_hí'hAæ/eXH 8m‰@k[Öh­KÁʬOÒS ³VêfÌ:Á‘kÈóŒY£$†•ö)Sf½cF<ö¾´ö @k» ­­?*´¶)´¶Û®ÐÚ¾LJ­í¶+µÆ<*¾ŽŸÄÖË•´¶,A¡µ-P ´Ö 1h­oQk]h5j­·`Rk½£ÖÖRk[™5bí¬›‰µ¾àF¬q5ä™ ÄZ;•ë°§g‚ëÇ 9W(²Ö×ßµö8CÖö Yk±GCÖ(²ƒ©{ABÖš—²†ýÅôÔ1d­½Ê˜µ&|Ƭu˜3fmÝJ™µ¾<ʬ5Ý5f­‰›1kMzY{°1k»5 ­½)­ãtÉ~¤@kMèZÇ„ü¹ÇîƒÖ¼´øê‰JwÛ½ºƒ¬j½N•Y¡`ØZG$ÃÖn w²ÜQn­ ×@[ÓG¹µõ+åÖ:]0n­z4ãÖÑžƒüx”[ûÏ ·Ö‚k»J×µjÞÔ£+¸¶¤äÚîì ×Ö%•\«ˆÍе Š®­óº¶¾£èÚnª¢k±]ódÒȵ½±J®m`Qrm¨’ë8ò Ï%%×¶H«äÚž£’k{ŽJ®U:häZ‰¤‘k[ÛTrmk¿J®mDWrmŸ{!×Ö—•\ëTÜÈ5L»bšÑj×¹¶T’¿Rßf²“ØúÏ9Y“ÓÇ*³Æ°€:¢eªG²’cÊäÄ#Dj ›GˆîîW‹Ý”`!;ìèÐf¡ÕíÚÏŠÈå¨âf>SGM•Õã;ïû±ç»'´vp/ЕÕáÛ¥"²kŽ4ô8Ú.Y#™Ayàcuü j{ü.ëŽ_íXÖÑî%³ö¦…5®x'[aUX£’1àÞ{HÐañPú)Y`]냥 ÉE`m*RUX›ÎEÖºþ?ÖR5ÝÖ6ƒSµI ]`­‚fXÛü]ÖÆ T`½Áù5zIcÀJ¬k±®alµkaQJ¬ãi”cöU%ÖZÿÙˆµñu%Öñ¬ðží±k-9¬^ÖV9U½¬Qn2FšèH¼¬µô™ZYÃé=núØ4/VÖ¨‹”îá)VÖ˜ ƒÄvØ+^Öðˆ‡Uðì&/ë˜~Ãfk@ ñ²~Ðîiç-^Ö( )qïâeSþ²¡ö]¾^Öž[ܧæÀ¥^Ö0߯2¸¡xY£˜éAKâe 4A°/.°—5J¤FÙ «—5€ÇQuÛM&Î^Ö7Ü—È&I¼¬#Óäù/ë«A‰ÅÙËúFÜ{ŠÒÄËú†MÒ9U„âe"²Y‰—õáú5…´âeÁœÈ^G¼¬Á™2ÉdÁÈxž^rT‘õ™™`²¾k®?‡&aÖæÙ/ÈÄì.sØdâ½µ쾚YߨÀ{Ž D‘õ2ìÛÄ*‚¬Q/;®:=dBÛ3÷…td}Ç7 Þ§ ²)ĦæE©ÈºÖ.NÃÝUݬï¾Î}²\v³F È©bq³¹<6âÙìfSBíÖÏÈfÖ7æ+÷gbà»§lÿ`fSb ²Sr!Ö~•bfGðŽ>ËÀÒÓÌúz0¥ΈjfÁ˜'ÎÅ1³¾ñ2]ÃÄQͬ­¢šYû‡B™5ìö†—3k@8,‹·;¤ÐÚp°CkY 4h­¨Ø 5crñ²F‰âÞ3鲕µ/î¾VÖ¶¸$NÖøò&Ô2kêÄÉÚuâdÅkäz½L;Y£’ûŽ;x|p²†!Á´ÈNÖP Äh÷°ÉÈÙÉKô ë#HWãCµÔ½ëÄÉÚ×èÅÉš,êuï:q²öexq²~ ùŠAD³‘µ•–V#k­Ú®>Öck+½®>ÖFS†›øXãgð-ì[ÄÇÚJ™ªµU,k«Ú®>Ö¨X 󾿥>ÖVëZ¬­ò0²¶šÌjdme—ÕÈz9YkÎ&>Ö¨+ [äný >ÖVyX[>«ÖÖ®}˜J³‡5rÝ_óî©->Ö8°”¶óQ]¬½wˆ‹µÕï.Ö1ôÄ-‹§Òú°¸XG0†Ÿ1ŸUë_ßÈ»¥²¸X? •aoÌNÖ¨öÐvj«“5†BÜ¥³_;;Y#χÀôêG²“õrJv²~ð=‹?ÏØ4²¾1QŒäบš¬#ˆyذÛ'ë»âÊ;¦e'k|N[î«5HýYìx¦Yõt²^~–¬ãgï¡FÖ8åu¤aqÂ>Ö‹¹ñ°~UëŸÖ{ÔRëâ¿ë~Ò6‚ÓÇ_Ä{úŠ5b ?WÀld}ƒDºžÏÆ•Ùȧ÷ôX7,øìTëPÜ@°ˆ²O±»˜Üð´?&ݳWœ@\'N 1‘ï{4¦ûv4'LpcT…X¢ÿût¹‘ís§¾8Dð„þ¢+‰Ä $fÜGÖjH@.¬®ÝS#N ŠN–©±'ü*ŠÁŽª“¬«€ä¼äºê Õë÷ÆXV}!}¸†>EeÕ*B2bSU[tÅëƒm5¼±4ÙMtEU}>Åÿ¬ª¾Pk+Þªá†Íªêë€Yzf=UÕÔØu0 öTU_Ñ8HuK²ª:‚ñ÷þõRUõµ7•a- ªê Óïsj™YU ¼Vß7닪ºÔ> º²ªúBµåƒDЬªŽ#1xw¡^U_¨\¼µ„ŠªãçH½Äž¢ê‚MØiøÁ·Áv.å\Œ«»²ÙQuŸ{'VQu‰û‹¼刨º+Òq³¨$ð|p¥º³¨ *笔+¢ê8g\î0½Mu‰!î ¢©FcOÚË!šjÿU–T#VÈa\$Õo¢µ]Â(’êÆè7Ì»EQ¹£NðaEu©9ù«Š¤ºÀƒ›ü^ER[§/£Hª£9w™æå¢¨ŽãösÚµ©¢͹ÈDVÕ&gÈjz°)ªãlõs»Á”(ª‰Ó´TEuÁøHÞË¢¨ÆÕ—é(ŠjX®Y”WÕV†^ÕFcjN=%ÕÖœk[I5ΙnòPIu(UvIuAMßi©’ê‚ê»ñ”ŸÕ¿¿>«äŠ¢º€Ý•YRÕi9[Õþ EQà;7Z]@öxz×C´žA5‚‘ÃKK4Õæ'”%Õ˜!îÓëG%ÕK%Õ{ªçJª1+EùÔ]ÕñÏèIñ€›.šÕø ´Y×™‰¢]õÕ!¯¸?{£üaówIuÁînò,©Æ5œìôÁ’j<á›6¿‹¤z¹z–Tã x¦"A%ÕhÐN!"©®srvú`Iµ·V$Õ{Z ö‹¨猩਩Ȫj{—UUí=@TÕõC;5ªª.(æ4-¯TWö&$ºj|ù/Œ×ýHÖUãgñ»[¿A¤«¶wGuÕxw.‚¯,¬öþ#ÂjtågZeuaµ7D„ÕHŽëŽoÀauA^|²Ì*¬F°Íè{p«Ñ’4½ÉTX]@žç¾!VÇ«=lìÔQau#06娰Ú£(«qßÒ4'Se5ÎÙÞ´7ÈÊêFj¶õ Ѫ¬^‚¬¬FkiUªÊêŒY¶VlCv=•ÕÙ¹MH¥Õs*l`~VÚ „ŽÞEЬ­œÃLû¼—žÚjäÍ‘Cwª&ÒêŒO=ÞØ&Yfi5ÎøL£=•VW8w-©´:‚÷57J©´:c§Ôô\ÒjÐGM7ª¡ÒjÜ·8&ºìʬŒÉAНBk K«ÑPÌÝïsñ¯^•H«— K«ó5»à_ŒY×+Á–†®ti5~u¥Ò>°ô”VÛ£euÄ®{nèRe5nÁ=w˜©²:×IçÙ÷«²zùYVVûÍeuÆ$ó…¦Veuä|eŸbCUVÇ ÃÂ\.iuÁ,ø†rã±²:þrlÃYW…ÕÑ]Ó•ݽ «ýˆ°ÿážG•AVgȧ˫ «#)E$Ïç(¸X3«ml¬ü6÷ê_~ؾüËñ*D0U‰tÁªÎ›=àï^jÄëŸþû‡ÿú0޲ë_D¿}üž~ûcS¾‰ÆÿÉ“^g?g½ÒïqÊvõ€‹H}ßê‚÷³#çÿò}[ÐnÎßùúǯÊBú”áûÜûvö÷Ïßõü­‹÷ÎþáÜï¯ýò´ÈT¤&À¨âH7ãûÍù§›3þ?î_þ1ƶóË¿~ù·ÿ·ý×ÿòã¿þðÏ?þÁÓìð©‚KŒrÏñWÎgé'ŠvýùSåÈèã{ƒqöúã§:ýTßmÈø.Ô½¦ßhÌ´úݾX@¢nq%ÍàwjOL›±…®”´6‡bß¹?$Zoÿ.Ïg÷g#ÍøÎϦ¶GŸ6çóùƒkd«_ŸýB¢[ÙkBy£ŒéÆ:m_ò7®všãikÉû³žåég9þÆÓÄÄ;&ûYW~ÿ4§Ÿæ¯œâªÛ+÷'æZ%WÿµíˆµŸÿSÿ?~ùÏ_ýé·ß~úùçŸúõûßÿùß¿iÿòÇ6'êpô€}„°‡©oõEª 3ô1GÍû=…ÕB|{aC>¶©½Àrw¦h«rù8H^aŒ?‰éªÝk gÈQŽ9ù½ç\®WO“BJ¬šSߟb‡Þ:q™m»mN v¬—Ä9›eÆjø ²qä¾,¾sÍŒï]k™Ò¸–¶ŸÓIðÀ8}ü¹ É *Úœ.R1Z”nyk/zÏé!µÀŽEþiˆ•ÏL¨+~¼s´|´êy|Ͷw«Xô,Zد¦xS’ÏBk÷µFU–¶«¾ƒ;‚cY½¼P2caw E(0ßÍÕïS‰yüXeß±<ƒ¶÷GóAëè;&–¬ÞÍg]+¯¦™g'ï?gZ¯[o2–ÁÞîí±â——¶cÔeÊ€ZÖÕzXå½–Ö­1|âRÏÆd0ØÜP~*æð`„o0Ñòs¤§7JâííÀL+ÌŒ òú¯^´ˆŒºdN–±D5è $Å3Þžw1+W‘ÃèÊ(òþ¼kvùÚiÁhû¹ï^à%ǘ>×t!ÑÚJÜÇ— ç+ÓÚ-ŽÜÚ(Qƒ…–gpÞ4¶0ç*Ìé]9‚Ö¼Û•`Ín£hìö –Ï« ËX–pïÀ}#µ‡y'Z*ÅKIÅ-ru9ë=9nûFU:ò]hÅ¢«V­Ý7-jîP ¥qÜC«–,êê«Tcð é¸å¸0"”ýn×ø´ô¸C¿:ÖÆ³:×ÚëÆîrç+!ÉXš¹Ô‰RŠÃH3cÍ©¯bLÁrν·ØCË€UÞ5} Þ½DÝøA‰µôBÓR÷ëönŒnCÖ:%ºõ\±Ã-ÛŸš‹7˜iQn„ߟè*é ^´î¶cÃùÓtÓÒÚ¨9nbHëgë¡äöþæe?h‘ÌÖ‚Å•1$cà€ì©Ù‹Å狼àQzÄ`t¿Ëy10ÑšÖð~ btªÁk*pÑxc‰mv˜zOSÈëº þä<ŸíŸ -½ ÛÝ9÷4¡Ä÷}¬®ìø8M;ŸŸ¹¹€‚å=Ô©m06†Z#Ù±ÑkãïkP΃–Aâ”'°òñ´#OZé€ÐóÄËõ~Î ù”—s^´^áëÿóö>=›ìÈuç¾?Å»´ªÉäÿܰÌÓ;ÃèEÛjÖ´Ð÷ÇÄ 2È8‘O»ï­Ö} ºU¬|’™d’ÁO”ÇmI`3¥­_‘~ã¶ðÂe"0q“ÄnkA¾‰*ò‚à­·y[˜9k]XÝ&Áë–ÝíÀÇàâî ìxÉØ¬­¸rf±màÃ{PŽïxÊR¡ ¦³sŽhì19­UÇÝ5‘eÛª˜IÏ“!®°;™ »µáè¹ZîB<;k¿ “zƒRÐFeMŽZ0†¬2¿$’ÂŽ³6kTÖ%1ÂÝöª?›ˆqGà÷ß’\¤€¸L†$œ™18|®Óq\¥VÖ¥ ŠÁa;1Û/¹Åàí+²/¯ ŠÁcNMŠÁQ(áÇVq ü²8ReŽÁ%0°“Ïq <9Çþíµ“òr.S¼ ‹9›û‚cw²mFŽÀ+âVèÃG}Gàao—#p E˜¾ŸUYŠÀâ¦),}GàØ¬ƒ¼xnúq^àî—ö6àEõ Ûú’p¹P¾¹ýUr./²téoKÄe8¶>a@·vî9‡n¿ýƒ.Ïð ÂûÈ"ðœðî¥Os ŽÀ3LuÛ‰¡(—þ'a²Œ­ý~GàÒQdp½–.*Dà0W‡@bŽÊ«ƒ|ÙÀC^û_‰À¡ÁFX³vÉ8Oð†=‰Ð8Oúi'KëÈx¦û9„"ðŠÂÛ´Þ!OpþÊWé‚ëiûê‚ËŸƒ»ä!¸LbchŽõ!8BŠ«­OCp™8ÓÙ!¸ŠVÚ^§q~ëˆ!+ÀþŠÂ%äÉò¦{÷Q¸Üã}!‡…sL¢p ¶°p­kœ£(©ƒ]Ÿ¢p Ö&kå(\1ÚÀô! Ç-ÎsˆÂ‘Ëjî5ŸqŽÌËGì¢ðŒ}ã¥* 1¸¬§!84°h’©ž !8”ß2-­Ð"p,§¡ï+B¥¼ªhÇ´J!gRµ#ph×ïnócˆÀ¡YË{äÊ)‚~Gà¯8š"p©«¥î 8ã‚3…àüuOŠÀqšª¢«ô8„>8z±EŠÀc…(@-„àÐßÄ‚ÃxY¹ØóÁÁ !xG.ùŒ“}ŒÀeÖBß±P™"pH{±*êŸ"ð²þìÁ…"p`ÃzkbÏ8¼•û_Á!UÂŽùZÛPÃs Á;Ž'íó3ÇIŽæªŸÍ…þ?Ž“Q2:Éúæá2(ÉjyÅâjÛE~_HŠx8&{o+ããpHÜ“tÿ§ŽW(µ´ŒÃqz@mšx£P\ÂâR¡#]ˆ‰Bñ „ãPJ'Dxë< ‡âAÒÇ¡¸|ÜÓq,^‘®UæÌÒ>àðÓSÕ ¦w,^@<Û6ú Á8ØKÍ&lå`\‡³€æ`¾žr^ãvò§{r0ž5¾Ú>å¯h¹<×½ŒÃ›òäTà`<–Q0Ž\SlLÁø…ëwÒÆ f¹´ó¡q0'Õ¤Ý?ã8x€ìë«_Q0.¯°õÆ(/ð>¾þ!¯8k§9Ç)A,)—xŽq®CD°ò s ×"ˆX-ã?p){ž“½€ãðŒƒ“çˆ#ÇᦿiK9Ï8âxí”*‡KáÀ縌Á8‡ÚG m7âðŒÍDéÄ‹ôpxÏAŽÃÈ¢ËÎ8qžîÖ‹9—xE‚ûmnÌqxÖÃÛ¦âp,ås¶TX‡c@@’¨Ä®8àN›sŽÅŸô‰QÒ+O¨¢—µ¾£8äD–Lóßï8<#aW¹–ƒ‡á'RÎAUñ¼›ÑüÃÑsõ®~‚áSõµ½Û9O8<|Dèˆã.f–uòq)Ì#Ûó†C莿ˆÃeXO ù —_•Ø¢™Çá‡3eš^úkı’Oç@.â«»£þˆã„²¬sŸk”w .s[•Ï;—')ÈW¤Eq8~µœ¹ˆãÁN`Ha¸\Væi³axèw†' Âqt$_{c—ƒð„}$ÌÞåC€ áØ¸,ÐP¯NNA8²öAˆp­ýŠÂåg»?pŽÂ ç‹Öë£0\ž’m›•Ãp¹gŬܞ _Yű‡Õ[) G´ÙŽª›Âð;Œó×ÀCa8¾@—ŠSÃpd:Ä.{+?+ƒ* Ãoù÷¿)‡“û˜'2_18¶©ó^±2²!Ç ‚¶)C>°‡uÂZ ÂakÐ?sÂ3¶rït9ÇŠAšeelá §åác›Ø„‡S@„£p·Â+uç4½ƒð_cä É NÚ¦„#ËØ}H!áÞ*u A8Ž!]ûÜV¥˻„Kœ!“^:² Âñº$J»ê‡ o$uyµž (\Æ·Ëå¤(ß‚mA³Âñ%9wl‹Â¥U¥­v>ŽÂnœô>Eá<s®+ÑsȘ£p|"8tÓÖÙFâÈ)zÎq$Žú8ŸîÀÄÑ4Çù70q¢aíjÚ Åo ½p,±Z\ìCñˆ Y•Ÿ 0-“¬*GYNV¯ J©°»È¶É±8† §Uú&ŽC•WC²,s3àõÚQ`&ŽsáP-ŒL’œŠîãC,èU`â0—IÏz 3q­Àé&‹_v·kDLüB&@b$~cL˜3#q-˜¦¯Z0‡ÓŠtÇuœñ yÉNbŽÄqnÙE–>€qlÓ^yŸ2æH? ¦›W”B‘¸&j{ã‡#q×$JÝŸw$w)`“e¬Ù”YJÆJLá¹¾#q¨È0!õëC$‘ÜH;±=‡âØçÇ6åXì’Bq€²Öö§8ëý8ÇÖyÉÛl€Cq¼,dkIïP\®”n¿7”8Ç•&ÀHüÂÉúh#„âP, ñëIX•’1žÐã°Cw¯7KÁ8:Å…‰é>¿°·yoo0Æ/ØkÝ–ºž¢ñØ!Y”’Ðú–ºP4ŽmsÀ´66(‡š NÅ .EãšåBfeÛaQ &÷Öi÷Ñø¥ÝhçœHÙÂÒ6Ò ¢io8®¬o=ˆR7±ÒZ’0ÇsíT”ˆ«D2/£ JOwY"K Åœ^,È öI‘‚ƒãÈRú¼yøëBâáHuN×ïzu Š”ÐZÌÃ&]{z <'Æ¥«Iw81X®IŒÃAGÇ>ω8œÿýïž_ýÅÁûXÐ?ÆàÁI‘9x°Õá¼Âgª\9Õ·&fªp2²èÝÇà74Tç@8Çà€0w1’źp’>Ö|MxU8<Èd4YGà7ž9?àW%'Ç`Q8wB{íãSg?ôìg-n(’HÀƒÄŒð8;³(<7mûÏ /ð/<Á+‹Â‹Ù–‰Qswv€Eá Öñ*^{àÔË+¶WÀq¡ÔM~nø<¾qV…7øß´µË¢p±ƒ(œ·½8þfÕ^Є³òŒÃoXTv8r¯…ßa‚ ¢pÞ2 ¢ð gQx8@á7d{ȬRÖN…ߨý„£ é88üF+J.Äá7 Xlå­öçð;œo ð;lärø\Pø¶89ü†qè…VYÒ$ ¿Õ«Z¹­)üq‡ßê_#CÏ“Þá7¢(é%‡vûøÉ´~#Yp>pŽŸ8¿ðözŒ"pކ9GŠ­QöR2Dà°%-Û'Dàˆ¸Óý‰„c6‡€ýº?Åß°3ŨÒ?Åß°áKkDÑ7ìtzχèû‚ïÚ>3¢ïyÙö Ñ7ró ú !|ô­I‡2€Þ‚o>¾ÂÁ7¢Èj¥¶qðMÉ¡wˆ.8ô†AÓZ{pØ“ ­~Vƒ#šó] q7rãâ\Œ)ÐCÜý`/ÇD,!î¾à¸²Ø†¸;¬,îæÓ@!î–yàk± ÜÞ8á%äOjð[Sà4xÃØäÙJ¾xó)¬—<=ÅŽ|„¸ö>m‹ƒ%®i9î–)-—\­ûÞ‰bp̵P§ˆ»a܇åEúÁ1ôu¼ŸñA‰‚EÔ SÍëƒfˆ½‘¬YÖ%Ïø{Ã$¨í„=!ön.ôœû;ôÆp }{¬O•#ï°"%Jø8ôŸãêÅ®ú¿{ÂÈ{Ë##Óùõ|8“Y ”½Õ7ÿŽ~Ï{Ã’î~Ý¥Ø;˜ qì`x[°æëïØ‡Üd!°#Oн±TÌy'gÑ·||Øg7-Gß “ïsìÒGßa‘Eá÷ƒ\0Y½?Eß7’å5[×Rôý i±„ûOÿ WI|¿÷™,¾eȆœõ*ïà;A›W AÏoúÂ)ìu½õà*7‚ ¾}8‘‰B‰äY,¦wÁwR¼‰e} ÂQX$*Ù=]ðTdê”>øÖB˜tdwk𿾕¬¡‡‚o-„{šm€¸èû]æ¢oôs v:{оµ;l·×tá÷ûJ~k!f´Ô»ð…£À„ú~+“ ^g2ÅWøý*ôá7 +ä@aj~'EÆPš§·E +$#ëQ|øÂæå>üÖ{BL¿üb)ü~ÝӇ߯Ÿõá÷ëg}ø­…åØ˜Søýz~¿ ]øý¾§ ¿Q˜Õéêz)µ ‹×b*~¿n9í‡^ëÂïW;ºð[/D„½Œt)üޝ”Âïø«~ë•2.Æíðø¾)ºü}O‚¿®ô!x|A‚¿ ];…à:«MgööŠÁã”G1øëž>ÝÓGáï+]þ¾ÒEâ±ÓQ${Eâ¯ï#q½òhä| þúQˆk¡;t³ñW;ú@ü}Í Äße.•ôqø«¡|®…l©Š Ã_?ê¢ð¤GYÏ0…áZ5Ö^„Ã_Wú0ü]èÂp-Dª‘•BœÂð×ëñqøûJ‡¿ ]þzy>]éãð÷•._éâðØ&> GÙ…¸Î >×B`±ú–Ä_õqx¼#¹÷¼®«q`ø¦„8¯h|@7åŠðæÝJXÞðªÓ:, RpAØ'Ex°$)jÃ}›g3ãp_ÕƒU‹.S0Ž-ú¢êËŠðpöˆƒqìO‚=ØA°ˆÂ ¶`¶0„Xø…4Ûª•£qœ&–RùÔÛ+W. ûsÛ.çpœ÷DC8®1ÝR3 <±ðhA,üÆáY”•õ†ˆ…G±ðpŠY¸NC8`r½%áú B¦P?°ð`è`,<šÙ àÕÃðÑ2­s\ñ0ü†ñt9«*vHá³@LÃãϲCJØ€!›n âwß!ŽœL8[Ó×Å(ð³^ èŽ+º™'éÊ+'P" ‡y,Ω›¤ÄÓpƆóþ.Ãp û-¦ †KL÷¤í\`83–ÃÃÖ ÃpÞü 0œai€á ÆÝîØ'Áp ½mw3 G.4œC~>¸¤\pG¸·`8l}_Wï` ?2<•0\½­ñÒ>œÏÔÌï¸[¯h¿špjÏì!†ó`p8œÛ¶A¦hüAöè®7 G&2¼²-Sw±8<¶p4qe¼ P\3`üºÞRuQÃ’<½q)“oê^“ðÝÙfãñ‹„K¸Ô¤WÖWþzx…ËCบ³ (Y dNÚú„?˜ŸÎ™ŠÁñô ¬Ì…àxzägYÛÇGôøS¡‰.æåäpéy¥¹(ÊÇßÈ4á +ü–,-ïNVºè7B õXÙ ¾QÔ°Ž¬ïØ[“ËáFvüÑÅÞòµÝûô3…Þò›˜mõ¡·\ׯÝ?*O¤ ^þ÷;î~ÀøÇ°ä(v£¬î#÷tKEppf¦õ172}¸ì`réÝ3ãÜ+â–ë „qÃüS\Àý*óŠù\ÚÑR¸ëpÆÒ¤H$8 uñÁ6òœßG޵ÍN'ƒûýÖ}£ š£·ÖiUÚuÜs|œýàTÜ9KCJ%;‚e ³ã‹ž}5Á¨l*¨þoùû/œ©ùåEVdót þ+égŠÒù_¿»…÷§ŠüÎFœöt¿¸'þ½o9 _ºÿÐNcÿî5@ªF«€ý÷·Þæú»ö‡ï¬Ází³å÷’ZßÓSà[Ú`VÀþû[ï?_ûª€ýáC ~…¶L*?”£ßE–éÎ2 ™ã0•ÿûZm'=‹(£eïÒK~õ^NÛ¿âNšLÛÊ»øÕwjßæé}ÆÊ3@ÿÎ]Æåï+ÝS¦c7ÿmS2¶9Ëhª•}OmRW Mwkã˾µ+\gÜþ–vÁ½C»P%¾·]´6Ü.\›Oíò+ÆÔ¦‚m '±zÉ×dŒb™W'o¾M’°u)Wùwuò·I PË× Wª™ Êç l“AÐ…üg“k¨€"uÉ»tïùæTh;ÅK Fx©èîòøµ>ÖžJV¯ ¢ÈÔÓ+ùæUp‡Z)‘q2ÃGJù@*H ¼¤þÈœ)ëù>ÊZÌŒ“OéF¥¢-M K©ƒâ|•J ©ª|½¶UÉG³É+ÒCòc!6@Y½”ô«eGÖ$ÄÝN®§zk.yM¹‰¯-‹y¯º  GŽ‘TVZ!ÉT¤_j*yu¬Èš&u›/¡lïèý.YÌæyÈÀR¢¢®eòøú y¹Lã y|¡]ú‰?`6$lH~[ÓL±¦eýÇžd4·Êrûšo )Æ,›{Öc³òäW›eC·`ó•5sàÊž…²çä‘ײ§¦+é!ź  Ÿ²Ô>ÝÓê,áÓM¸‚ÓÛlrð×Ùmfý$ºª7Mš¶qeaC&å„O°+bKHúŠŽ"-«ew/–ðK*#CT=VËðñN»y7?l?é Û³€ ùãáΦýD~ÛH`BÊgãkÈ®{_rÝÜTK3wõê³ÒŠ8ÿ’ÓlT$ 7ö†ÜÔWƒœVq FR©µ4@´¬uœˆž ®YÈå ¤¾);±‹ô;Í?û¬6c³„‰I»â³úl,KØEÆsÝúì©×ýàÄõüÆï¯™ Èyݯœ¼ñH„}­FÙ˜ÛVMSþÕ•ÆT4ÉÐê8è´{AYÓ™Ùi:vsf—×›àÐf5[V7/ƒî³:-r¾õ HQ†”ñ³ÓV—0eÏÙÇÑáäèí0’ ·ÌN«Ç-kçZ]âo‘uòšèW>ô—%²Gås–ž÷èvÊöîÊn8%LßJy/š-~öYíÜzòY”óÏËú¬¬§›Lyý&vËé³mXÖÄ4šîÍ>‹ôæò7R|¯²)>Ï©ø´ÝgÙ‘ªØ÷0ìZòA”É‹IS“+/æÇ|ýÚeuÃcm·¦iªõX®ÉséÔê±üÎt«Çê›}L5mÕãXè±(“ÿèÏl6Ì©Z…Š¿Þ6~=ål\¡šè"úÖgÙ“m˜­zÎQ¾Õ9û(á[X5¾±§izøÕcu©`‘©e}'F‘3ŽB~t€Õc t¦ T•a#lõØì= ÎÙYÂKm„ ‡Ô/ƒá÷=ÌRãeœì5Ö0ãO·a–«‘gŠë²ô¶ò•uWm ³ÙÛ"erxö0«fÎ6^fhËf‘uzÿ¦%MS¯›½]RÆ ûjõ…àˆÿÝÆ*÷e3&¨‚¼]Z6Î&]Ö ˜O‡‡µ–aï·œ.‹#;Ëi-Oß?볪‘®ËºL¦(Í¿ú¬*–å3èe•ako²2KàóLS“¬†š{”Õs ³…Qï=Êêk7[Fû’Ú½r}&£B²¿¯«gÛ ±Ðþ…3–é:=6KdºNpÉX‚bõXyŵéî¦5ÝÍœ]¶a·øYIÚ±B܉ß_5éÈCpz,N¯õ¹!ÁâÉû®Ã=ŽQ=:ÿe¸•çÄþ5 ªîQ–æéœoÝÝqÁöÖ)ÊžËV=H"÷ÑÝŠ k®öœë¿ø9÷=Êò’ËÙTÕž¯¡GÐ9×s¬J{´˜]'U”í W.¥VÓh(ç¦ÙÞW‡…ìÀ!Ýy;Ä“6È"o¤t†g>8v÷÷îÑ]ÚÝ_}“ÃÌïvc¬ ÎP6’c/¸Ì]e^§ûõ¯[Ék;ÇÇ.. ¬è=Ë,Š•C¹ç($ ¬k‡²2ùÊ?² (űþ Q3ñ3Æ"¾ÑhG–çšâ} ±zH‚Æ4/êg£X Þ–e9èöK1ÊÆ dõÀe*×zWêÎe–¦÷ ÊÈJD‚³˜3GwV0[]X ³í:oŒÑñ:¬Š2,W2ʆd}]tf¸NXP$<_G%´ž@¶y5Rœ@–ÊÚÉë®bVï°´ÊžÈ"™ð>ÿŽo°ï@v`½r¯TúÇÈR°ªû²zjñÁ‡€²v}ðz¢ì9l÷F•k“@Cfimí†ø[ˆ-\ KWä³þºSÑtîkŒ¥Xed© ÈîÑEM÷Ýw ›e°_NjX çÈêÙê*kTý@|¤ö ÙÇmçäQ¶³hgDÊ2ýYÿ’Vß‘¬Ø æÁ*,¢ÛåY·8ÁZ¿øP¶#]ÈýÌë’&q?‹¯¼ó+€\;” ýŽV'”å»—£@YÁàÐë,ƒ”d²Ü|½…@XèbýzPV²—yˆ E£žAV–ævLK&¶íD²´ª–·ªÂ‚Õe‘Û^>ú[wóAjÒ‰dõ¼]_¾Ð.®î€ó3ElO³fHU[Š0!÷©y%†È²À*Ã_}Y`¥ÇL/ÿn´ÿu"YÕ^"L²2ËPŒ¾-Õ3=ºT?¡ì@P.m9'ÎÑÏ7­ýQA)³šãØEee&挂©a+ð¹$58žÃ×ÀÙ0Û0&b1ÃSRfå:Íe¿†ÄçÖdí‹q©Ç¼ôÂÙMž¤Š‰ÙeÛ½'ArSN}@úZ.«ì9,‘™ -Ädl†åÊõµ>˜pl”…Ì-+Yž¿ÙN–vüæPÉ÷œ™duŸ@VÏÈ·q¯ß”ÍÙ†¾&«‚™ @¹ãdcÙsBsü¤Í–™„7øÑ ZŸ@VÝ™ò×^„®õ»fµ ¸KúyÚðµåÖ§Û›fÚ[Ð$²dH –zÚ[`8¡ '€u´z'Y ­• áÞo] ]Õgí-gþvú>½`Òé´+áÞ\Íñ²˜´Ã½(»#ò¸·À Ö-â=î-0ò…·Ô\Ùî-«?È»ò¼ÎáÞ¢N½Å21î•há>n]Ôî-Hq²³îÅe ÎI©OÆêpoÁbgH°XƒÇ½‹–cÉçq¯DDñ˪f>{‹Ú^Bm}ϧs¸÷u;‡{ RHhšÊ5¦Ã½EÅîÝ̯ ÷âV2ŸÚM¸·¨©gûˆ{‹¦žA9/Žêp/®»Á~¦o áÞ2× Ï’zÜ[pŽóñLàF¸ÕœNÁ ¿:Ü«{ˆ/á^™ÆqØ,O¸€„i€"_÷¢Lzó@Úį€{ñ›åX¨îE]’ 7²H™\Óá^<ºÜ8㙾îÅý:|bS~á^½N¾VéLó:‡{Qv˼ sOú ¸Wß'Üùs(Øá^Üo@K8;‘§½EÓµ$(Ôçe‡öê/^Ù¦^¢½¸,A™7¿{Qɉàô'Ø‹Ÿ”|ygëÕg»`áz9ÖjîQ/~kÈQbúê êQïëFõâ7[jÎzÔ‹Ëäq/ÛËñ¨7~mõj5¯n+B½ESuÊR<õêmíY¯þf{Øû–‡½(׳–º{Ñ—¥©mšð¬Ÿ£,Xë”ëÕî:pveÛžõjUîla±Þ¢ `ï~O/}b½øÍý‡Àzõ7{¶9Þ³^\V©¨ç±^-ƒÓÅÝ­l³^ÜdÝ--9Á±^½–wÏDVžõ†Ö3Ö«ŸÆµsVëåîL¨W?Ä!ïuLœàQ/Ú»—¾#VzC?ñ¨÷UAÇz_?éX¯v“é=P¾ë Ã(±Þ¢Yõ$ªyV™c½úVžmŽX/Êä ïÕ³^üfÁBqòžõ¾®s¬WGš!kº{ÒqÏz1¤?-Ù®±^ü¦T ûó~ŽõòèE¨Ew—€¹ê\L¨W_µôôµ_åQ¯Ž•}ï›ê }Å“ÞØžôÆFð¤7ÌK°Ê‘Þ0Ié #‘Þ0èéÕ ¢ý¦S‘'½øÉÚž>·à ôê †ÝÖ åˆô¾žÀ‘^ýIeÌDzµoÝuS OzQ–±ê_ûžô†‰‘Hoü®<éÕ߄۴o#Ò[4ßÚ IléÕël> ¤÷UOGzñ›ØDX;ãDzõ8Ì“ÞØèŽôj-e\‡‡õÆOÀ£ÞøÆ<êÕß”Ásé<õ¾®›¨7ÞÊ£ÞØàõ†—P¯ÎÓÐÊÆ c¹£8Ô{º‡½aš&Ø_²‡½…ƒI‚½úõ´¦¦d_öÆ×âao<ì÷ó°7~wö¾®s°7DR{µï̤®{c_÷°7D|{õ}Âäé¾'\>°7¶ž‡½qèð°7|"žõÆ/Ù³Þø“K”¾Ïzc_ð¬7¾Ïzõ7×—Po|Åõ¾ŠêcºG½qòð¨efhSÛB¨Wg~ gæzHoX|é s½(ºàŸÕÚ,s 7ÖÒƒÞ8ÆzЋ²$u¼î9ÒxÐÞ³ç¼z;X ÍLJÄyuø•g°¾磔½Úý1¥ÞO¤VŸµƒâ9oì=ç}•9΋ŸH Özþ:œýîJ²Ú­3~òœ76›ç¼q|òœ7~¿žójÌy¬÷=çÕ0ï~6ÝöœWC\(”¯1 ¶ã¼¡ãyÌ£QÏy5¸šžSÄx52´˜.0^]B }ËL›@ŒWß–ªÅæ(é¯Þ ïÔ੃¼aYB7F©òbµvWés ÷Œ7ÎqžñÍ+ƒyŸû»žñ¾á0ÞØ<ãÕV"òµ™áoxp–¾ßJŒÙâ KÏSœü=Iê• "yøÝe¯-q‰óJ|w)JPõ¢q÷¦3q^ ¤›ÂÀ%öœ7Á£¦ÅˆóvAœ,Á&2#Ì‹¢«o1a^X3=]í1|0/ ÝLy9ØcÊË“S^õN¶UÊ”×Áðbîb1åU{‰&M=Þ˜кíc3æ…õ&˜kÌ2ya¢¿Ø6¢ª·¨‡K1… c^5»{L“Á˜7–yÌ sµt4ÒŒyaÅTM‰³1/¼1ªDæ×ûÌ«‡èa;8I4c^Xß[ÚJ˜÷ÖŒ<ò¦P1/|)ïŠYä+b^¸X$³da̳̋;"Ì‹Ü8O?–˜0/rSusÅ ˜> °ü¹g5 óʰœ$V_šEƼÜQó²–ž)/’{Ôn¤š)ï Ü27ðä…R[ÆÞ%»fÈ‹¥æe‰çä1•Ö©³/0䕯“ºäù0åUKXõLq®§¼°6™Çj¾^”~]²^šé’æå®Î˜÷>@òÅy3ò™sãæ¼0êæ$@o9cÒ ôr-ôRÔ@o¸ÎƒÞ y‘`΋»Ì>«Ÿ3ç¥N˜ù®»Žé_/Ì‹ 4%xz½0o†ËÃV¯3æ½aµdömóbçx˜-c^õŽ|L ÃœWíözÕ.úâ¼³ÚÒç…Xö2-#sÞÐxÌyÑí³IÊç ežóRø8/ÛÌy3òÁlÁ?s^ªfà¼ô›‡óÒÇ8/R¦izYcÀô¦3Ä‚Àz‘¬<™³g½ ˆÖÌô&TïÖ&BaÐë»sà¼ø’¤ãMS†Ày1‹¦-²cÎ ýím*AƼٙ~Ê›±Ì¿db›Ð’(/MïòÒ0ä…ÙÓ}ÏmèÀxóY½/Ì5²9¤Æ+}Y–(cš_È+±´ôúµµ /6jÇÑ2{È ¯Úb©VäEëëv߬&Q^¹‰_Ç[φåÅñ'Ó%-Ê˘)/Àͽ•¤Lyëé/ÊKÃa ¼$1å-H<-¿8KŒyC5 ó”²m}$c^nRƼ4ÖÌË×1æ•w„Ä;KJΘƒÎ‘~ ¼º^©£OQS^šååžÇ”—FØ@y¹+0å¥ð$`^ìêÉu÷(èÕ ä–qÈtÀ„yir ˜W&º«™N#`^Ð楑ù`^ æ eó–C˜WÊn˜/!c^~“„y3bÌ-ågÌKSxÀ¼R†SŸKÀ͘7ÜŽ0o¸a^Š æÍ½/E5C^šÀäEöY ~S5µ¯ƒ¼Ü-òâPȵ¥ä yáŠ×å ä O@—ûC^î²y‘zí± o†S¿ŒÇsVbÈË])/7^¤¼~Ë(/7)SÞþ±OÉ0åErÄl&âòr0æåq0/²ŒyCa^Œði›`Ìû*s˜N«e«ù™ó¢Û–MGÎëg+½Ü8 zaZ¶üžA/ê záFRöQ½á7‰ôb¡\÷q+&½XóG ¤°µtièñI¯Ý¥ŽcÒK+û€zyp`Ô‹o¾Á¢Íp®C½˜Á`OØMî;Q/–Œzy˜eÔË_?£^žxõRøH¤—KôrW`ÐËS“ÞûÇÔL^é寇i/"L{y\`Ú+‹“Úìi ½<÷0í…ìNðh¯LD_褶ö†Í&¢½ÝJ½‘{áëE{9¬cÚËÑ^8ç}‚€i/qª€{©*÷"!C²ƒÃ÷V¸ÞÚ¹è‰{éÃø†ìÜöbQ¬Û/㘢ùš÷é|oŸ"!ߊ…vÝ’Y¾ØµµÃÀ |ak—Ój'&¾š‰°(ñ zSB¾š¤^"Ÿ2U„„|‘W¹ƒ)>fòp/m:0ò EžøBXö`Q¨»`A׋/Nb’Ñ>èzÕú1Íè;Èz¯½Ðˆ¼7\Eª^œ²ª—Õo¤êå›Q¯jÒô"Y=ü4ï—…ƒîÐÈüºf †½¨ä5®µ°ðš^ÏM/=2±Þ„A3ÝXÒ‹Ÿ––YoÒlð[jîY/Þ”,ç–åþKÒë;Kzo¬ˆ,Ž¿"ëEîlX ¯ƒØÄzC÷!ÖÅÑžõj>îú¬³ÌAÒ‹°6ÔSUJ°W®“¦Á"f b=ì…IyFjÃû-éÅoÂR9¿a¯ɽWÆif½ÈT.¿³*f½ æ™ù6‰õbÈ@æ÷¾DÂŽõê ý¶£(Ìzq\¬?±^¨×ŸlÇ[˜õ&õ‘È?úY¯ l7ìfïÙ›ëŲÒ^çù˜õÂÎݺhd½ éA%ô\û.ÄzqÞÆrŸ Ö†WÏzãeöÂo7eKáȰ7þ¤§½áeíçi/Êž}$Žio‚OŽ|VóhÓ^\wÒSQo‡ÙÞ>jª^D©ò›K€O´7LÈ\gäÙÓ^”ɰŽcUo¨‹§½øBä† u1핲ћ¥h%Ú‹Ëäå-É´7ôY¢½ø²djXk¦½ø´ä~su@¬—?ºÍz“ÚS‹Æ‰õ¢îpŸ2<ìY¯\'k¢}fŒXo,ó¬÷kÒÝæn±^ÔPÏÂ.RíY/G {ãí<ìÅí¤•VÒw†½ ©}$àY§}‰öò÷͸7–yÜ‹/¹'38bÜËcó^\7ïþ¾<ª0ðÅçZ«9Ç1ð×yàËŸ2_÷øj>æ»›‚‡€¯ÒÎ3ô,óÀ¯¬œƒ¯|1U„´×[Ö‹ëä³²ƒÕø†ç"àËŸ)_¢ø†6%à+è€o¼¾ñ2|ãuøb”>²!·¾hnùvì€-ßÐøJ„DÎ¥áßcÖÞl1EÄÏ*ëVõD|c™'¾<û0ñ ¯šˆ¯<û…ääùeà»ßXæ€oxÓ|ãxàËó_žÏøª—½Üb)7à‹—,÷4-_)ëH"°NT³® ûmMt½RÖå¹gF¾¡¯{à^_¨p¤‹šÑ ß„tmHu^&qh#à‹2é ëp'ßPFÀ÷“5™­º ùÆë<òå`Š‘oÚù†!˜oüM|óò c!ßXæo¬¦G¾è˜·LÊ Tò ##!ßðx„|9 4ä>TB¾á³"ä{B¾ +§§("äË¡'!ß0žò ]o/ ùÆÛyäËçdù&\e¨_Ö„|ÃLÈ7Ô…o¼Î#ß0Úò ï…ox/„|c™G¾ñ7=ò #¦G¾(’e{Á„|ÑgKË óñ ­@Ä7–yâÞ¦_Lt` Rñežø† â^ßð5zäËMÈ5É2°-™7!ßø`ùÆZzäké‘o¸Žo¸!_~"¾ñ2O|ÃeøÆËHÞ+³MíÃı¾W¾j™ M|MÄ7IJD|Èè‰o¼Ì߯ñåŸdâË)ßX6§Fwvþg“Qß¿2õËÆ!•zëñžO>eš}EÜ=ÈÇ¡a¿¸š&ù84ì,´uæ$ø8 –ƒéÝv„{c™Ç½(kêaó6rˆ×yܯó¸7”ïE™„v}{!×ßÇÆÙ¶—m\ɶ7\æ]{;\{¯g[ózÛÞŽýÈɶ·cÁ¤CÁµé8÷¾¹övŒ†{]{£'±wííH³—ÌY‘]{座íÝivíí€Íœ·k¯üœ·~©äÚ‹²{o²k/fÓ¼¥gìÚÛˆom£7íí´±*›öB ¢I¹f™ö"°’‹–6žL{;bXiŠ»Î2oÚ‹TvÈ3YW™7íE¬Ð?Ë{Q¦(mix½i/žy § ›ö¦štsjð¦½ò‡;™ˆ={õß&yöâJ¬–íùj×Ë{ë={bô½QΞ½øxgRá<{;Âân†9ìÙÛŸ3Ýæ‹C†¢„Vú Ìeý:ÞäÙ+1† ‰-{”äc”A–½°»Lg[‰,{ñc2☗7Yö²&[ö¢¬õºÈβWФþ²ìÕøèì{eo¬¦·ìà±ò0ë+ Ë^¹i‹½gï@t{=[¨ëM{ã“i/¿còì•? ©í¥g¯&èÚi”Ù³Wª"#{Z"öìØ÷ix)Ëɳ±ãS.Û×$ÏÞ¯àl1‘g¯4%ìƒÍ®ØYöjTéø²·ìE?’7iúw²ì•²ZŽ YöŽz l@¾ˆ9¥+ï`Ë,{²’Ik/õYöâ…hð1Oô“e/:ÍsÀYö¢,opC޽ò“2#T“xÇÞXä-{…ƒɲwè:ú±SdÙ‹fƒ¼ÁÃÞ²]*Ÿ™qž-{ñò&­#Ë^)«ãÄëdÙË Ç–½#Jëˆ[öÆßô–½R–ï³Ì"Ë^Œ"P#=ùeã -Ûö–"[öÊo¶û¬~ȲYB¯ƒ½eï´’·0,{ñ >ÇU™,{±Þ1Tˆo¼YöbU&Ïi‚½iÙÿÒ[ö†wE–½ï²cÙ˳[öJ»ÈÇÛ2Êu–½ø¦ÚYð‘e/Ê®³Ì"ÏÞ} ½ûÊž½[¥òÏ–Š<{å7–Üyžì%Ï^áÏú˜<{åóA¦éížà={¾ ä†/o3ȳcMräÖ{ö"äæ½ë={6¥¦ÕáW ¾ú›ÕñYïÙ‹Õt>,•L{1Q@´6&dò¦½˜"/óûaÓÞøÞ´wÃîÅ›ö…õ£i¯NÏ{;n›ö†[‘iïÐó XoÚ‹Ôº'û#›ö>89AúW ¾Úî³þ÷¦½H½6Î"˜l{‘=ø9›gdÛ˼m/~32Ùö°sGƒ½m/†nX¯£ãdÛ‹™S^˜ùÅ“mïy"ª{l{Ãýȶ’„(ë4'Ûöð¥C´É¶÷{ ö¶½¸NâÏÄlÛ+ó}ƒõ£Ù.xÛ^™Ô‘8o;G8Û^üdÙÇ_Ù¶7–yÛ^TSI‰|¦X×Ûö†×B¶½xt‰þÛLk¶½áe‘m¯æï;Lšl{Q‹ëàx²í /™l{ãýœm/.«g'‚l{qY>·dÛ˼m/È]ß›êlÛ+í6–ÝÉW ¾Y32¿ ²í G¶½òý<°IXÇ:ȶw °&¨ÿ ÀW‹®³×M®½áȵe8©ó¼o ɵ£Ñu6"ȵ¿YÎþ¹·íÅÓÉšfY^²m/!o¥Ûö†Ÿ$ÛÞðdÛF#³íÕÜÍœßØ¶e½o7)²íšª=›… ÙöJYE>óeð¶½X†ÜÉŽ<’m/–+Ò_Mmì]{uõÐo› ȵ±~;>³äÚ‹çðÏ[ä«ùͯdà]{ƒ 5¹ö†É‡\{ÃH®½¡›moøZɶ7–yÛ^ä,—©ùŽ‘kïŸãºÍæÈµ7>‚wí “wíE8X¶‹)»ö˜Â¤#½4¾xµòß–¨,$H:R–û»ö>˜»-瀚ö"GsßQ~ø^?| ¢î9U¼ô½ˆÈO'NÓ¿£ë6›GÖ÷²nÒ´±‰%_>d Sé6Ï/ð½¾&ÆøŠÀ 6‰æ·ã/Y:ðQ}¶tàë|å~È"»³œ‘§ÇgOY…çÃùØÓ ØÓº³§C¸Ž<¤.Ò{Í9–=èØ{:ÈOJßBdòt¸i|¬ø:x:ø#‚ÁÒ!!`^«Ó`éÀ»·¥‹ –ÐÖö±]}ƒ¥ƒ;Ü,è´q°t slé€î1±¥M –tê(X:he)}‚¥Ø­,¶Fz1ßpÐ:X:Ð ´`éÑ1pøc oÉÓεO°¯§˜7›: Ÿ‰ÄQÏýÁ¹×ŸTžñ?; {:Б×àé@‡œØÓÍY¢§ƒw/aO:Š<øéØÓÎwmO‡ð·äéÀž-ìé@æj¬óÍãD¸ú¢LÆj‹Hç+KX E¤àyë|Q&q©m)Ή9å‘ââ+@ßW™×ùâ7‘—dÙÍ’Î7”‘ÎW~sìô,óEQ•Ž_gÏ#™/VÓU]¶'ö2ßXæe¾áv^å‹¢q›«|å nYŠo£]¯òe^åZÁ«|c‘Wùâ'«,{Úe9ÜŽÊ7–y•/Êåúæz/˜†¬לµu¾ñ¯½Î7þœ×ùâ:™Ý—ç5ë|c™×ùÆßt:ßXäu¾R&¿³mcIçû.;:_T¥K(Ë Âë|c5½Î7ëbz\cÙ3xo¸é|Ão’Î7–yo¼Ÿ×ùÆ2¯óuñ:ßwÙÑùÆßô:ßXæt¾üi±Î7–y/~òäpeo(#o¨ÊÔù†ç"oü1¯ó}—/òBH¨¾r*°Î7–9/~òäŸeo¼×ùÆçò:ßXæu¾ñ~^çÊHç Ã6Yqlç¯óõfnú¾~Óë|c™×ù†g'o,ó:ߌo°æȳÐ7–y¡oüM'ôE^èÒ }C }ã“Ð7–-¡/~NæÛÖ–þ× }ã­¼ÐK5YžÙÖ }ßeGèï焾±È }cU¼Ð7–y¡oüM/ôe^è+eÒ{—Œ„¾(;ßé|ÃO’Î÷]vt¾±Ìë|Ãã‘Î7^çu¾r3_Öùb‘~²³Î7–9/g©ao,ó:_”I›,{9Öù¾ËŽÎ7<ÂÒùÆ¿õ:_¤Ø©÷öÕ$o,ó:ßø›^çËœÎWŠ4‹ÞÚ”#o,ó:ßø“^çû.;:ß‚A9ÐæNé|Cé|CãÎ7Üt¾ñ:¯ó Óé|ãozo,ó:_i¢Çå-#o,ó:ßø›N狼Î7>žú†2ú†¦%¡o¼n }Ñ:>Éç;ÿÚ }CË‘Ð7þœúÆ2/ô ˜úÆ"¯ô•²ù¾Ë+[IšAb¥o•2,Ž·/Ê`.y-³ ¯ô o’”¾(ÃÔ=Ó±Ò7Ô“¤¾±ÌK}Ã+#­o,óZ_4ÑÜŸ~k}c™×úÆßôZßwÙÖúâU×ÇR+±Ö7–y­/“¬õe^ë+Pàñ™~µÚϡ߿ÓÍßIÍ_X:f5ø/vlá4A'ñ_¶OfþK®‘ÿâ7M£ÔçÝþE/hü¥lõø½2áÀì:…ïñ¯j Ò9¼ïño¸yü‹1ª«¶%‰uøC0äèó”:«}aªx Ë}<.Ér_k²Þº1 ƒîédJz_™´ªtŒåÉÄz߆¼à{´cÁ¯Lhõ¹ªµü6¤÷¾LÒÀ‚ßìº? übº®ÍvŽYðÛ`3öùwüÊýàP»dq¦÷mHe}×MN½ÞdVs2%½oƒƒÎjOƒ Òû6x~uSy±ÞAŠ<Ú–$;Á¯Ô¶ 2³ÄrAð+à–¯àåç‹¶ÉNÙN‚_DfÜ‘à7´ ~øu1 Òûâ¥Ö»Ë;ë}›³ÿõ¾`¿iÛFÞW_­ŒœKuM‚_yV®^¢&ü68«K ½’ç‘à·Á>]ZèZŒš¿à×Ι—¿Øÿùh苯N¶ÛKš¿È-°­(YðÛu± ,ø â|üÂå²%ž)~;Ò26ËïÉŠß‚·3d±âW®KiçŠ_8œ_¶&aůü¦ÌUi©µXñ{¡?í„t¤øÅíêÉ HŠ_m·K0¿ò Û((~ƒ8(~¥ö'•)~_eNñ›~¬Ã¯oÉ/4‹HD~Í›$¿¬F ’ߌüÅöUXò+7LJ%¿ð®oû“dÉ/ ¯YòKŠZÖü"1è±+bÍïKì4¿ñíNÉš_sÍ/âbóÅ9’ß޾umɯ|í^ÉõJÝ–µ“^;1K~|½Ì'h~¡Ú—__™Hó+óS:«YóË:OÖüÂcwÍ/½¬ ù Gš_8GlŸ© ùÅ`#Ñr•dÍ/ ÕXó[0‚˜«GÐü†ß$ÍoFrÛJšßð›¤ù•6‘˜~Ûó’æ—:XÐüâXé9^í5¿A;Jš_)ë+YÞ*ÖüdFÊÛ}˜4¿èÇK„5¿RM¸ÀMs¼%ùå¿#Å/¿}Vübq"sñ²¢dÅױDü²F…¿¡â$øee ~YØÊ‚_ sÞ: ~GÛH/Á¯UGÁ¯ O–8~¹ÑXð‹‰ˆVlü²Ø9 ~²kÌnõ¾ÅÞú?¿õ¾^Üõ¾¤&½/K~HïËÚ¶(ø% 1 ~Y>Í‚_–A³à—Uå[ðÛ^’%LbÁ/¢ ü?¤nÃjh”½k¿XÎï˜4~ÓùÃ?Á/&Ãq™}uüJ¥Âr_²‚Ü×QîK*á(÷uå¾^ÿÍr_ž(Lî‹õˆÄ“;Óš—û×Ã(Iû†Ÿ#¹/$÷ By/÷ ÒoÒû†Qô¾áøé}CÃyîgcüÒaÅ ø¥ÕWü†ëHð‹?ÈW² †Yð~“¿|¾¿ ¯d‘¤ø 'þXñ›a>¼q?+~3²¦Üf1NŠ_> ÈŠßðè¤ø‘7XŸÉoIñ›°ob›¬øåÈG%¿˜Skê þœ«ï_þp}ýãJÅ®Íøúå°â,Êjð_iVRJçý¯?üÓÏÐe°/Ò§¿¢+þT‘ŸÚ¿í–xº_ÜÿÞ·”N}}Ðý‡vzÂï^ƒŠ€{UÀþû[ï¾+`øÎ¬×>[~ÿan\ÈPß¿« fì¿¿õþóµ¯ Ø>Ô`þæ_þ5râ–oIf,ÜnËl†åºTê?ýqù‡ÿÃ-ÿ[¤â_üåë¿ý‰fÿã×ÿúãýÃþ㯽ÜedáËÎñëïT~âNM¦‰§ƒ(õ_£oô CåŸçccòw•î)á ¸"ß43(ü@š'é ±Tö=µ‘U^[&±6¾ì[»Br[×ßÑ.¸whªÄ÷¶‹Ö†Û…kó©]þöð#_Ž›é‘lù‘ ¹áñé©ïñçúªsøùÍ£ßF–aˉRd-óºËcwIïm°ï4Ýùë÷)ñ>ëCfñGÖ¸2Fƒ”è¾l¦ã=ðÿôÿã/ÿó—_þô/ÿö§?ÿùÏúå_ÿeüïý»†ï¯_uz®`wžÄ×3Abæ2aËšt¸ìÎcá=Ó». vçz‚²Ì•Úzâê°K2ъƦβ¸«wË£Hêü£mêü`G0Ù 9Y…;ùDÇf÷Î/RÚåä eŸ«‘ÖøqmêÜ)g+WP66un Žv(´´ääRéÚ‹Y7Ýï6 „ƒ¤´yý&à‚àµQïu‚³@"°©sÃÎñcþ4(; ༴W©ÚP€¡¥}š´ÈíËÆÎ[èªaÑwÖ†PÈ3 ÙÈ¢ñ›ô†+ôÔÙòø¢è(*¤ ÃRÂlÒoì\6¿‡¼mZlÒoì\±“ÔWbÝ‚=úM‹âCnE·Ð«>´n–Ï-üõP€Ã}ŒY>3' ¨ ÌÃÌÔŠ´ÿP€;‡ÃÞœ€¢à1¥qVzslŸu. ãéÏ*8ò‰†mÈ­+ØÑÞÌÝW>%;@ =ÎfÎE^ÇS´ÌíîÕce ì'ÕEÆg3g¨Ž{>ÊŽ~¢¤éd_)0<ÚÌY®Ýóºìôàë·qÆP¶‘sÁ#lšX@µ6r†$Jý”fOÀžÃFÎjÁ(¯eöe5 2RTP—¨è\i,Dn×Oêƒ"¶‰sˆ9à›;Fœ’?òŽÆüI)ŽzBn'«ƒ63‘¢èˆ' NëlàVžÛ‰'Ô'ÒvÔŠ¬JŽv¢âì“Ö¡®µtÓ:\Ûý£à€ôæÍ¡gA¶y³\w'fR'é—N;Q°C–ÆÔ*•§9é„Ô/íŽ!]ìH'ÂK„5ÇÆÍ¤‡l”éiÇÀ¸¨”ÍXÿÈ¿w‰&•$ÔÑ›¼/'œÀU'CDÅÞüæÍRËz2Y ì' ¶¨Àuóºbo~ófŒËȯ̲â„­ë¿\½ªN }&I+ZÖœp¢MnÞŒ²#œÐ¢ Ãô¦eÝ '亴ÁÔk8Ý>€ã|^¯çè&ðÝÜ7W$T߸O~\Ê«žŒÞ£+}8UûÚèÊÃIÅÉÜ›Ñ '×EÅÑÕ>{øuLÏë]œpô)éäc•á^Ömöùne ‘Yf~¢òzÍÀ«ÞíGÅèw­Žwœ³+öägßš/dï± Lf)t™W´&òÁçuV¸ÞÓMÈx7úe2o”ÝÆÐ¾ÉiŦü¦Ír¼—Ï…¼k§›ºô“U£"뢓«ô8.×óMy¼|Ïä>‰RªæÑª³áné”ò%Ìû„c*¥šzoJ…¿ð¦ÍØK=I¬*2TlÚëÙn"Ö³;Ý„\'áó2)Ö„ñ]žki­gÞÚ¢šįò“õ‰ÚH‰ @[’ž<…Žèõë+Ï`ɘ’¤Ì¯N½ ¡ìè&ÔnwCÿš“ÓMðh:锯 ÇÚ»bc~+'0(K<¿Ž¾Ö\r" 5ؘ—ádvY„:÷Ž*+6æ¡)Ñ.ûÀ]ñš¶2âT­ÇúY¦~D´ÖaTJ×rBª8ºCØ W¹íóˆ²#œŠ\PfÌC^±8áDÅÁ÷a¢Â ½õaå7Ë}› ‚Ôê‡Z|j‡å°±˜Ý>«ÃVL[;·Ê$|^eP»L‰~Å[Úa+q @¦c£ xN8!e" iYÁSO+CÛ¡}Å´wBØ'TK·‰²£œˆ÷ƒd°¬+õÌw3AeE«è7Pç{©m›ÙTlÎëᬗÉxó¬|y5“1evت1y€ù `oþ„°Ò Ò‘Gžcöæw ‹?äm&X埪roR¾íe{QkqÊ \Ò¶³g­Î¸{–sm% )jl”•2©]]ó;ÊÔ¤åš ~·4eÝUünÞ³$§•].Æa°Õc+ûl^ÆòòœlBþáÝ%š˜$£ÊÂêÈ&Páû1k”Ù~Sî·¤[2p8ÙDEøÓú’nI#Avz¬ŒiuIÅd±°õتbÊô EðcC¬”¥‘Ma޲ñØëRl WPÖœlõ”'Z;ÆëºÄÆû '›¨šS³h»ó· ±xgiÌdµ»óɆX, ¥UÖÚDæo,½W…Îh˜mDíêh={ºjÏV€¿ñbÑÝ$ˆZÎŒUõÇ;Š•9]–a+÷ÝÂïú\°ZÓ‘¬«Žj"´w¯N5_ëݲ«JH$ýp±áN2ì·g÷WY §©œ¯Y_lˆ­Ø?½ÍE£Ây\§Çú$ «#šÀ§¢Îâs"…ÕQMLå‹%•GÙÑM`™øŒ±¶®+´´;Ž…8¡íÊÛ«×c¹i°3=§ÇvÀ“)¬ؙ¿÷‹õ½ N+üÂÎ|Úcl…IW^j”ÝD…jŸ"©Ðýd‡ ÂÊ‚)Û²‹n/ÀÔwTh{v‹.4Ìó7,ì¬ÇÊðД„Õiuz¬Üª¯ÕD}àdc,wæGL®Óc% ³¬Óg_v ‹ÇƒEýŠkžìtÜÓŸ»(;ª Œð÷6kl—ºYY—•:#]Ó”óI,Ídå~E¾¦°ÉcÊܰÙp?…nÖe%pªc¥;FÑMàñ$8]Ù(ÚÕ¼hâBRySJ{8Ñg èD|agím²¼ºw úÉôBCÙ³ÃX@¯‘íLD»•í+}ÃŽY4M¾ÇXìÄ]– ™ñ:÷±£ÆâvØ.%[/øÖ$ ýç}Ò, òß8#0/t¥½êòôµ›ÕðybÕ5¹«Ç¼XÙHXS¦ c^¬À¤#­#„y¥ÒùU"Ì ŸšTdÙX&…ð˜WëI#p–9Ì[ñCb3…t„yQvŸq‘0ošBùéøE˜eè*+A/a^ͶÙe|»"Í> 2¥(#Ì2cæ Élójz_y‚|æ…Ý8ËSÞ‚EqÉXŸ4zp"ä“LØC^86uh}ÓeEÎ"íÁrÙNßä… ”Ä/}¹‰äšɄ“+w™×%‰ò²aÀ(/ªÑd._î„DyÕK&f}qòªUÝ£QÞà²E”׉–&£>”÷uY÷i8h%£â”Nç yÙ‰ó–yø n¢yy´ ™V²/Ës^´€ÄaæEœW~ÈóXq^\W Ë]ÄßsÞØâô"‡µ¬·íì;^´ª|ÎæCJ WÓ9·í;O¤×ÉX›Ò˜;6žôjîjÙôâ²ýÞôjî\ìô&^ý °ézéåœßzq™ñ2€iŒF¤7Tré¤â}ŠÏ„¬dÔ\ñ ô¾nT}&ä ë6‹ˆôêëÕZzÒ«—]ÛèHo|‹žô–y|E޾éÅu²\m+y ±^\·ág`½¸®â%LÆM¬W¤Ëè¸ì4ˆõ¾®s¬W;%FßyXohb½¯ëë ƒ%±^}/×>«H¬÷õ^ìÅuU–ùËÐÌÃ^ývú>I°7|ã{q’ŒÈG>™¨ƒ½¡i öª7)v çôi°7ÖÞÃ^ ¢¿R¿ì­ãi¯VÑ”ªL{cãxÚ‹Ëdž±CŸD{C_'Úûz#ŽöêuòƒW™ ËÓ^½_?(ÅÓ^¡d Yûö¾.s°÷u;{c5=ìça¯N²vXHѳÞXK‡zãUŽô¾næH¯^víüDzÃ8åI/.ËHòÔ›ÑÜMzÃ`O¤Wß  ¬=Ozµgâ'ç!­¥ÅËÇ™À¹½K¤W/p瑾é}Ë“^5tê­ Ó“^|[/‹"½ú´É=æ À„yõYÛÞãN_óÆë<æõ÷òˆW¯™þí«:Ä«ã R]=¼xÄûº—C¼ñÃöˆ7¼OxC(@„WÇ‘¾“Váà'¼¯j:«ռöQo"¼ñµ8Â;ª'¼q¸ó„÷u#¼ññáoÄÞødžðÆÏÓÞF{Â{¿g¼ñ«ö7Ø™äÕö8ø¯«?‚¼±yÈÀC^}“2kµÚ_W¯ƒE挫òÆ÷Wë qÃ\räÌCÞ×ó9Èë¿9x_×8À›ÜÞ8ÿzÀÛÀÞ8ÛxÀÍÞøJ<á}]7Eîüq{À{¿¼±+{À߇¼ñnðêÝéj:là=Ë^íåÍòJß—y¾;²ç»ñ-z¾Îóݰ "¾?FÏw_×9¾ftwãäñn ;<ÞÕ%vj–œ…ðn â<ÞÕtx7ÞÎãÝx;wãíñn˜ߟ†ç»¯:¾ƒÏwµŠ2Ž-Ïw_5t|÷õdð¾Þˆ¼qà G€×Ájú#À(¼¯ëà N€÷u¼!$#ÂûºÎÞÐ DxÃ÷J„—?s¼aþ ÀÏÞÐYð†VgI9·:ÞØzkPpýs§â~«“^àÕ{e]y zqlÐöƒžÇú»Ù(=/–Éé–/hê€è•W|Án‰EYÏ‹ÎɇIÏ áØm†õ¼|òrZÎ9¬çUÛùÒÚ[Î 7œkï ²œkj°œN9ÒÖ Z–ó‚áä½Çr^V[Àr^yô;í}3–óÊoJǶ 0–ób|Ø2kéżðj[íÀb^ξBb^­¾Å,æ} Û›Y,æîzöV<‹y¥4kߖżvÆ»í•n1/9=eéŽXÌû@6¶7èYÌû µËhK¼Æb^ ·»­ü ¬å—9-ï£`–™‚Õ¼Éõðk¯æÅ ³Tlˆ`5/ ú,÷V󆪚–5FTó¢#ØLÕ¼°o’1o)¡XÍ‹ßT±Ó5ñ©—ó>PZïý(–óâàq݉rX΋ 2WM*õõ¼$¥iïžW=¡’E,¬çE:9‰í—*‹ô¼º½ŒŸÓð…õ¼±é= z‘sI–Ú¦ 'Aïƒ=§|m±¯Wô⺜-þ3Eo¨<)zÑQžjÁ++zÑQd¬·s¤è×yI/ðm¾÷Nˆ—ô"W– ªQÒ+eêÉ0ljߣèOà½(KiG[¤éå÷Áš^´DÖmÌøˆ4½ñ:¯éE±§´B.â¼|$„9/X‹€Ç½t7{ÃÍöòEõâª[ÂÃ%&ÔËWMк‘^<T…&®õ¤7^çIo¼Î‘^n"½xI8>ÓÛ2éÅÇTÏá"½á:B½¡wêEŒ( ê¥4bÔË<ê÷ó¨7¼B½ñ:zc==êåWF¨7^æQoø¶ õƪxÔæ B½áûõ¨7´9¡^êwzCEô†wi 7LÛzÃ(M 7Ö݃^Ä1iâ&Ð º9º¥]gЫâA¯\·qZ½üÔ zi2aÎ˳s^nmæ¼þõ3åå`ÊËcS^`ÝÑÛÚ aÊËmǘ—ŸÎC^,I Õ¿ò ò^Dùg2äÅëRÀFÈË}’!/í=qý,ä=ÙŽoù÷¿EË+ ²â øEx¥÷, ùõï‡xï µÓ^´¼¼ha-/EC,å•~\5gÆB§^Ê+=YZÝ|åXÊ{Á¹˜—4KyÁ"Ú¸°þŠRÞX楼ÒþÉ q®â}`(lÉ%âEVù VòlF¼X¸ÊHðŒˆÎÒÃ$/¬å½ÔDrKóò2¦"ÈÛ±æ3¹€¼h®ëHW òÊuHœ²Z!/Ó#¦¼Ìª6åUWÍ­veÊ˼)o(#Ê ÿÇ2ËÌ‹Lu×ÖbæEš™kÇeEGÌ [ùÖG¿¯ùÔ^Ì à#Ïö H̋ƑĶ:ó"•ÐNÉb^¤³—ñÙôç$æ½°´–`s&éf1︹Ó(±˜P®¥´ÎL²˜Úz[ó^0aÍ4Ç^Ë+—Ƀã ~E)/Šž}‘¥¼˜®äu.3MÖò†ÏÎky1<;õky/äÚfnÉZÞ+fż<г˜3gÙ¹—L͋鵳½e5/Æš§lë/æ½Ïd³óg1¯”IÝ$æ½òá=AÌ{)Ã3·{ó¾ËŽ˜U‘ÐAšáyõvð›Y‰y/È'`‡ƒIÌ‹ëàß1q¬—ò^8Š,Óõ:RHRÞx7B¼È¥Ý-‰Q@¼ Ûߌy4oEeË©¯ ðH]³Îø3â•Ö¾w š€x©ñâ•õÿµsÉ0â¥/vwζÀx©£ÆË·cÆ ïo$_šE†xùo‰ðâìrÞ>LxÈðâØöm™˜ðÊO¦d)&á…ÕsÞvLxCUˆðâÐô®uæ–/?^I7Yþ/ðr5™ðòí˜ðò“3áåÄ„7ü&^~¼á2¼Üƒð†ëð†jàeÐ@€:‰–M߯' x¹* xéÉßåVc¾ʈï†ûߕ֖‡YæbÌwÃOß —ß ×ßåÇ"¼Ëý‡ñ.÷IÆ»ô“Lwù«bºKC} »<¨1Ýå'gºîGt—«Ip7\Fp7<9ÁÝP‚»üxwCÑ]k˜îú)<À]ZIºË“Ó]¹$˨Ó&•‰xÉè̱„ñ. Éšâh}ßøðºéEðÒž"ó]¤.«–J%ð].c¾û*s|— 4ã]ÞLe¼ ¡K^V_ïâhl±¤ï¾ÊÞ…M^nëŒlà»D(ß ×áÅ‘î\ÖQu¼üÄwqÚ;o'æ»\F„7á•¡¬$˱/G;ÌxÑ£’%­Ø”·Â~+¯ÃÎòbAºOîÊ[±æ±,0òb°½·™QÞp;¼`›÷²L ”鯶i ¼Ð²ÜÛˆ)/¿¦¼¼‰Ï”—Lyù`ÊKÀW;¡¥Ô êc†òò‹fÊ(oEF«löLyáqÙâå¥ß$ÊËu!ÊË?É”7”ç eÄy_ecuYÿdÌyùM2ç刜—nÅœw‚ãÛ¼çÅï?iXDÎë’8/U3p^úÍz GÐËä’AoøMB½´UP/²Ó ËÆPoxB½ôÎ^¨W†/sÓ ¨W&:™—N) ^zÛ³…΄zå{}nËâ`/iSö*ï‘…vŸö {áq™s€½á' ö†ßœ: ¾]Ó/ãç,n§èý-漚)l`åñ&½,'Ò« Zéx)ð¤öÔ¹›VGz5Ÿ¨|5K@Äb^˜É&Yé÷2/ób^M,á¶¹=x1o…„Ä`+›4‰y3ª´g%»e5/‚÷é.ñ6çÍpf˜”å+ªyÈÉŽÄ‚9/XÕm'‘ƒ9ïƒ#.vF6˜óÝÛZ•Íyeø•)QÆ©ù›d΋ɦßë£æ¼:W[Î×`΋­Á™€ï+ ^˜žÉLºLƒ9o>±oD½Á€—¼y‘@w3œ%Ò«á´ò™˜f÷Xó²C-ÉyáÅwí¬I,ç…WñØ9í‚9/´â½mƒ2çE­æ‡å¼X– ïNý+Êy‘~[Þ˜tá—iƒºã,i1›óJ™t ûÈ£;¯36 î¼àß2Ž•rÞŠM†^už‹r^¬º¤íêßrÞŠ¯g¤‘Ê‹ôj’ù^ÎË6Q,çe#–ó"·´­ùNœWʺqˆr^¬x4Ú[öS΋qGþ<öIJ^ÎÍËy‘‡J¿å®Br^¸:a)ÝóÛ¡c™¬IÍËÜËy97Ëy¥LCÎÕOHÏ‹j®¯ç+êyñ²¦Qí„¡^Ï‹tycï[°žWÊd¦² .Öór=YÏ˯“=z1ÈhŽ‚äÑ‹L×Ò kdc^Æ/¬“Ó0ÍîñèEZô^¶i+yôòè̽œ¸MzejxʦÞ£iÿÒîìÑ+oZ:Ã>¢J½èz ¤7ê ¾ðlÑ‹Jʈ¹4ûÛ¢—g+¶èmš½bgì+À^Ü*Ae>]®Ø£eí蛽G/š[w°úÛ£]HºäÊͽñvÞ£WÊ`’´rO²G/25JÏ“·05»Þ£·a"C÷½4´Þ£7ü&yô"}: Tò蕈EšfŸ;&^|>ð&Z‡eÉ£¿Yr Þ¦ Þ÷b†bréEÀ(ÑÄJ§Ì.½p4¿÷ˆ]zbÒ”Íi„\zñ›¤¬DŒìÒÛ`-uLäÒÛí­ réE*Ô•Bó+ ßhM.½ qÜVÊ’IoÃç9ý ¿ñE™´Îvðö&½M—þGéëMz $?ëyØN½ s{6Þð®È£ÏuïíáàÑ›p`ÊÖ.Á£7AfhÉ£¯q4Ë‘É.½ñÁ¼MoC²É§›*ÙôJ™4ßÕúrTð6½ò›©%Ùôbó|–ôƺMoxedÓ‹ßlg÷ˆlzQ–tšlz‘MWš•X„mzñÇ…%Ê”%{›Þx?oÓ¯ó6½HåúK$²é•2¬«¡¨ù Ø7Ú{›ÞÐDdÓ‹ª¤ËݳO/ýÞªq²é—M›^ù[ª5°ë]z àdD\æ3äÒ˼Koƒ×Þ³³x—^ÜíÉllɦ7ô.²é =ˆlzÃíȦ—ŸŽ\zeI%Ãöž6É¥Wþ/skc“^4Œ/c\oÒ+eùxX²Io¬¥7éÅ7W»eg“^”¥­`c“Þx?oÒ‹N˜d¤¼V™7é€hi«wGoh òèÅíò‘yxÞðäы˞£× Þø“Þ£DEfÈåèl½ÒÒ|æÈ½h(¸..³yòè Syô‚—4uR}yôbQ.s¬t“ò/Êä?vÄ@&½hÃVö&½‡¤i¦ñ›ôJ;Uã^ÕŒxI/V»M~¢,ëMzye“ÞXæMz9š`“^ž^ؤƒ‰¬ÔÖÙlvéÅ'bi£_.½éD®Ñ¥WÊd1dÊ0véÅ3\[µÍ.½)yc™Wò’ ) yC‘ÓñÆ'ò:Þp•—ñrŽˆ×—°„—K¼€7–ù.UÕ»ü*X½.óâ]ôYÖ¶Mkx÷]vÄ»Ü×Y¼+e§ç9ncLïÆGðâÝX/Þ e^¼ªIâÝx™ïÆë¼x7–½¾üŸ#¹Ÿ/‚¸´üƒ^<7£QóÚ‚ f  ﵕx.b*‰TûtSõ^ |p.Øñ^è ¦©_‘çØñÍ#ñ\€¹äÔ¶žçr¦Zæ¹2ÑŽ«VóÜ$ ÛpÎ>om&]-¬ub/†îf Áƒ‚J–qêI ^7/å¿ ^¬LtR™®»¤àe­$+xYMD—‘(+xYdMDW"ð¼4¤_/¢ 2$¯sÊÿÑWՊ̪þ›ùŽˆ.¶R¬ÏF¢‹MÑKžº½¼¼iÅ@ ŸnÎZ8 Ë7è’:&]y÷­âø—†—2 /lw›Éè&MD·À'Zþ…Ùü’†)³mL1Ñ…Xnæ AÞ76{ /BæiÕó‰®.Õú@ϯHtQÏ‘­…èb'­­´ržçŠ·íÏ bbRðBH-=@BÀüõRð&äܱµ+xƒÈš¼œÛ¼H™ëÒ?²‚7Áf¾˜°Ÿ¼JF­ºˆ³Wðb)Ùóe¯42˜H/™eNÁËbzð|ÎõÞZbð"ÎM·Œkó}x/Äêp1ƒ\/à%e<ëw1`ÈúhͰ~ÉÌ=AõúÝ ñsåü‚¹ÐIB•)¿ø‚¹ahfù®Ü.É/öÒ Øù®ÜnÝʯ´k:Õ9þ~˜>–ïrÞ\Òïâ¬_ÒÛ«!æ]'ý®¼è ž7+à xy¼d/®KIúæ”6š€—û? xYLÏ^>ÇÁ^¹®Ü¥¯¤$à i‹IÀr_“€7$¶'o¬¦ð†j’€]öä\e/²®K\¿m¼€¯ÌÑ#ð¬Š߃~—ް~'fÎæ ëw+†³h$ýn…}•-H½Ë“;«wùè«wc-½z—s“zÝ/´òÊ»oGêÝPMRïV¸¹ÉÌÛW5U@ê]”I™4”Ô»HÇ Çˆu˜›Ô»A.ïÕ»dç´óõz·â(a9)à¼zW®Û˸@sqŒ?;ñ©w+œè¶Û«w5àÝÎì¬Þ­äɗéwñ›i˜P€Õ»8ÜÒ·W=«w± †7É&©wù<«wñ äj€Õ»±ž^½‹Ófigˆ`õ.¾Çü´­öò]Œ8š«ÏpV/$ßåO•Ô»iî-S'õnx:’ïâstDÁä»a`#ùn¬ Éw‘Öå¶,èA¾ Hk vI½+?»yS©’zŸÔ½óX°z·Bâ‰|ÑKdìÕ»¨@ ËDÖ«wq@Üxç_ÓyVɬޕz6hãÌ´Ö«wñ›eê!¿ÎE™Ä–B…Õ»è^C>ó«ë=êÝ0œz-Õå³[ÆI¤ÞåC¬Þ Ó©wñⱦžÒEÙ•wÖVïâ9èÕ»ï ¯û•ƒ-tgï†V'õnèK½:2ÉwA®•þ…Å»X¤æX…Å»¨žÄnkgÎkwCß"í.ÞýX¡zຯ÷äµ»øÍ„.1kIÚ] ]c4óä"ñn*I¼[‘$a®6×/˜Ä»¡‹x7Œ3$ÞåC´,ÞÅýd!a§×H¼‹®,½¹4‘x ÔÆebsïbTj–0‡Å»`E5{¹òjt*¯ÝåIÒ]t/C½r_·üÉô̤ܥhÇ„»X $[£±nWZìÉÍ©³n7|Õ¤Û £–×í†Á‡t»x`|ÓKãLº]æ7¬Û媰n—ãeÖíÊoÊ´u›`”t»<€²n—?yÖíòäÁº]žùX·ËŸ>ëvãý¼nWfH&ïcÝ.Ι"øš±Y·ŸÝëvñ u§3bÝ./]fOM¯×ír±n—ç*ÖíÆë¼nWzà­ðmÕ C?g5Òÿ9[Þ¿üáúúÇ?4 æ‚üiòÁ|MÑ#þ+Íæ‘Òù_ÿëÿô3Hìë‹´Áé¯hƒ?Uä§(öo»%žî÷Ä¿ó-å[VÉž>èþC;á÷¯¢l«Àúïo½ÿ¨ºç;+`øÖÌ×>[~ÿAfbKå“ÿY£ëßܫ뿿õþóµ¯ Ø>Ô`þæ_þ5)`GòC*È ya4d Ä¡1©Óúãòÿþ‡[þ·H½¿þøË×ûüǯÿþõÇÿú‡ÿüÇ_{£Ž‘\3ÔbLüµw*¿ýNÿà-Õ$4*¿þN-Þé†Ê3>ÿÎ=ÆÆäo*ýS6·•ø]3iÉC%‚Îü;j“:ò%ÏýšX_ö­]¡¸ýêïhÜ;´ Uâ{ÛEkÃíµùÔ.¿b¤K ¹¸eb¸%öÌíâ'ÒSßãÏõUçðóÛTºMj8cs_@‘ï»(;b‰øKHYJðÚTOOúñZ¥Ñ¡'éøÅ ¯YÞæ2_ê Æ66¼ÒÀ¢gƒæ`~oéÓN,Z('–à¬ó=Õ)–À­ºt¡™g´Ë¬|ÄL³·²eG,ŽáÌ´«–‰Ç‰%ð›2øê+GÙ8b ä;›_wìÅoØÌÓiÇ^ü†Íá3@„u»aö¾¶B©ë©Ïzú¬o8lÆ“|´ÅÂŒž³K _Þ†4eVqZ >»ßsuZ ü䀆o6Ž”±D,kN,ÁÓwÏ݉%xîy8±ÄuéÓELú¤“K õú°£è(;r ´Ô„ók-—SK„†-·Om{ëzIG-„èÍ<øº„ïG,Æ");b Tò–Ñh.J$Lvb‰0N)šÒ'®;Úì¥9±v<ªDUS×±¿W¥ákSE^9ýÕ÷sìÄ›X soµSÇFüK„ñ ê§ìÇX_v;±D¨&´>űþ™krb ¹ÉCVèÛ±é·Åñ7‹K i.ä†P§†®{ {Œ¥„](;b‰ÇɾPÖ|r ÊÔUs±ÙpÝpb ì Õm˜Ù¡79Ù-ôƒ7·C”±§>};b Ä}k]:<O ‹vÖ­®Y–ŽX"Tò'†‹š¦cCÞghC>ÕY±O¯(—ÞBæ›’¡ÌÕ¸ Ÿm!ÚÕ¾p²ÇWËŒ-;ì öÉ·[·Û# ·á޾aêº4þ(«l«%he4ž¡e‡³ÁŽdQ&U]fTÒžN-¡ÂÈ-Ö–7êÔœ=¯ËÒê¨%ˆ†dô&=(;j ´7ÜŸ9©Â,ñ„²p†Üþ¯]–VG-s¦ GYuj Ýi¼Í'LBZ§–#eG-¾ìÊŸPÉB$”ªÓŽ]ùÊrŸÅ®ü e¬ÝDØ•?.t•±ä‰(;j .±)"Yn¸8å3Èú·"K«#–}L–VG-ëp’d¦BAÙQKàmJ\5—] ¿kÓä9ɪê%¨:¶7m‡§ù bù‡†SJ„–ãH%BçÁ"ð±οôµpíX –I—õtV9ÍÍ·Ë2åH%Â+|’OnÙùöÖì_»äØŠ–YêW;â‘˰϶>îOuZ tž´•qãôÖJ ]eÙ»Á\`k%8•e×t’.¦´Ñ³`ýÇ/ÿôh%ð›Ë,W¬¡‰÷KI$ž(–>8”µ¿Ï¡Ùáêî­÷½­Ý’Ø(öÁöI^dh !õ‰b±Œ•¾k¤:AùD±4¡lf·À™ž–‡ðyÌ\„óâÔt;góÊÀ-kŒ}ª0/Ê:’OSf¼òwÒA†¥¿"Ì‹ëî“0/@ü&2Ó6yÌ‹¢ÔnËöD˜·S[’™ˆ0¯^wÝ– „0/²a7Øuvq^YËH./õjí+pÞ¢ècQ5q^-“.´²Ãì- ZÇl¶döâ˜çùÓd‚D{ †Ÿñ˜9?á^½î¹í $á^ÜN^MYî„{ ¨P5›T¢½¸Ýh— ÒD{ñ“²N¿Ö颽HDÓdÒ”® }ÁÓ^\'k¼kìÅíØbå}%Ø‹²yìgJó<ìÅ‚£ËZÉ>-{_à`/6lÔP~}Zöê›N!´¹ºö°7¤;¬c¬{_eö¢h‚öê#”m>L°W}å›_Ƴ^üd?ÆŠ¦“ºÑÇû&žžõê&Ue™ÐëÅd:Û+u{5·É¯Ç¼ñö˜e’,Ï3„yµ›ÈŒ`»sóƯÇcÞØM<æ_«Ç¼ZO˜õÎOyµšp¡ŸYˆò¾ªé(﫚Žò¾ªé(¯6\Jv„„(oœm=åÕëê6&gÊêé)/®“×¾õóƯÕaÞ0VzÊÖS^\&a²3"Ê;ËRº#v“.»V3„zcäQoüp<ë}Õð°^­à“ì8&±Þ8yÖKÏzã äYoüN‰õ†¹€X/ôg÷Ø¢ÏzãèåYo,=ë _G½±Ý<êÕ2 edšøØ¡ÞøxÔæ3B½Úêš-6Î=¦gh=é’KbC¨WÇôr¯Óožô†ZéÕ«ÖNÕW ½á›#Ò^æ&½ôô¢Èà5…zy>"Ϋ¯Ñ¬(˜ó†¾Lœ—I´ó˜Äyõºœí¸qÞÐjÄyãxΫIœ—gwâ¼úñß|â¼áƒ$ÎûºÎq^×ÒeÇ߈ó¾®óœWn _ÎXö@Ìy9'ÐÆm½ap ÐÆ_zÄL 76 Ëoع4Ó™è âJÓ¹Õ~îlÜo5B Ä“ÕJÖ‰/<ÒÞ ²^„¹f¶Ãª^¬í.KfT½p#©–]0ÈzN>Û±á ë…KÙc>à |q‚òÞ9g‚¬MFg^Fª^XìLtAÕ{Ã#­/Wå êÅWž JU/:¢%IbÜ«ãá}¯3ØAÕëå6L{/œÕÞ部—v XÔ ÜwŸSŸ,êðŒÏ3iQÐôv¤O»§¾¢¦v"‡³¦È5Ð š^œ3ÞÿYÓ ï©¶=`¨·"¡Ÿ%M¢^i èÂ'm'Mï –Ý%Y×Ko˜d½ÀþÛ„u½2ë—¾ÏÔa/lÇn‹#ƒ°gˆÇ ÐX×ÔϬëÕ†7rt½ü†IתɺÞYÎt A×;`ém9²ƒ®)S. .Û¬'ézIýt½´Ýt½$Ä º^°®—”,ëeý•'½H¯#“Ì2¾bÒ‹aá)†âƒ¬W&¡ëÆôËzÃozà „ƒ+5ß ða]oC~SÃíÄ{c5=ï ò^,¶q ïž×yÞ˙Ø÷ª_ÆÎad½t²ˆy/ºó½ó"]ïG–?˜u½\Mâ½áñˆ÷¢Ls2èziF^¼7Œ‡Ä{ã}H×~ÌóÞ0š{ÞÞ"ëzoÊœ:7Ä™÷¢*Ò——sóÞw™ÓõRpż­-QòÚäeÞ::ëzûiÂ3é ñ^´¨ÄË+±óÞÐ<ïÕpB>eüʼ‹íÜL À¼7¼j|_z`|ÕU3UuÎŒÀ7|¬ë¥°‡ˆ/÷<¾ñn$ëea Ëzq@jKU6ðÅj`ìD¥ |/õ ³Œ— |/xjÕÇ$¿ž÷Âö3íÄ{1Д¶™yï…।cÞ‹¦IéM/Iæ½ArM¼e²*j×DÖÄ{cÉziýÄÀ—uz |ñ›ÏN8d½¤\dàŸÁ_¥¶ñ9ßø øÒéÍ—ªwȨ#iþм7œ Þ‹ Ëß>ÇXÏ{Ã[!à{Á1èhˆ=ïÅA'‰ÔŒ ï ßãâ½!%Þ¯ð¼7Œ–Ä{1½ÃIk!n‡{ðM¸7Ä„{yI̸7Œ]„{Ã8¥½TFÒ^¸÷ºü^÷†™‘pol÷†‘†x/ÊšÄc¡nÏ{C+ï ‘ñ^˜÷rë1ï×yÞK)ñ^~›Ì{9ŒbÞ‹»Á]&EbÞkiÊ^êDAÙK/’o,óÄ—•ˆ/ƒ!&¾¨¡,;dq:Õ´žø†7BÄ—û_üfQ'oSïâÚ†•½ž{1ðåñžo¸Ìó^4TÝÖ™÷†F Þ ëg˜={Ðõf¬ /ƒ¥¬ëÅ”­£ïd³ž÷¿-Y>kƽ'2îÅ ±YFw¢½÷NÿÍ´Kj¹ëÚéeÚ˧ÿ™örx¢´W­¦;ö×ÏÓÞ“Àø–ÿ[P/œÇPXòB½ͻՂŒzÕû­¶u¦œX¯ x8™<¹£^yþ~Y÷ ¨×gE¤×K½‚ŒgëÎ…B Û-­["š@z)1G ½êöÜ–TŸ•½ðJO2™¬Zè•÷#£y~¦Á“^°Æù¾¢°¡| e,› B½ žÒëýza/œ%ÂYÇYØËpƒ…½øÂfÂä¯^Ø‹ ð>"j/ìÅÇgŽûQØËHt½ŒDX׋ ¦tÁ/Òõr̺^>™Áº^Ä=òФZ/Ø"7Òõr0h/+a]ï…„,¥¦°št½ØHÈ'û9éz9hea/¯nqöbFJ åCAÂ^ЍX×˳ {ÿ±°—¹ {Ã^ {ÿ±°—×,ìÅuòûËÒœ…½Hˆ ?ºFöC¸—£4ƽ°ÍÉÔ $ì•2Ýq\8ƽù@‡{áu+#T]z_¯ì½àÔ°³YÜK3hÀ½~㈄½aÉ´W»E3ÁÓÞWÙ‘öòÝöò0ìÅü±óϰ²7,L™ö—#}d½l÷½6ƒ…c£ØÑIf½ò¹xW/\M¬r‰>Öæ3+{8TùBëVý:Ö«ë#K“X/ŬìÅ“^j*f½\b½p,fégHÙ«´0™p–Xo¨ Á^^ñ™´—£”{É(ƒY¯4Ž„‡f§BÒ^Ô¹L{ìY/²­(ãÅziÏ•¥½¡ç1ëÅŽ»÷K(Ŭ—·ødi/Š®+›\ŠQo9«¿ê…û²|KÛDÒÞPF½€,WT ½»²E-íž@¤×áÌyC=¼°뻫ݦ£Z Úðƒx>€^þ.HØËU Ì˯0/¿A’õ†˜—ŸŠ1/ߎd½cd0+Ƽü•2æåÑ•d½á{cÌËßcÞð|dßîG˜"u –<Ÿ1/¤û˜#ú[×fƼ\ƘÔoSs“®u)ÝN…1æ…my½M?ÉœöÜ‚’y™öâ•ò®ÃÌyqbgK;œ7Ô8o¸„„½7,¶÷yr½R&µß‘^·¤l' za>#¸üAôÂKåÿçííz½É­ì¾{}Šsi_L§X¬Ö¥qøÂº3"ÏȆ»• ìùøY‹Å—½ÿ-u?= =çìSU$‹Enþ¸¹ö8ˆi W¿{½˜/J¾º“‚^».öÂv2yKÛ±TЋϚ…íü‡‚^³ÅÈ^ØP¡¦ hœ÷bê²”RYä|kB9ïÅeäÖÏ@)祚L>Ú¹#å¼XÖa^i»ÛŠyEKÐ0/;™ôn® æÕöRÌ«•S΋[¢«37Ê×Âyíqó^3×ÝÂyѦ ý­m*祎=£eÁ¼VøÈy­ Âyµ^ÊyãÉnü<…u=] J1¯t¥¼7¿¢½«Ä(åeD]'¾Ê«-¬”WÚD!/SCŽLˆyµ(äÕ[ ãE3cìçÁ”ñòþOÏùgŒ÷æìÕñãå¾ýÖehñÖ„%m÷‹W#äb—c¶žLÓ /ÏbQÿž 3Èkë·Ð×À´÷ ò2Jñ.‡ ò¢fæ÷H·A^ÕÈ‹—y¹%ý3È‹NO•¥´½á«yáuô”¨ ä•®g˽„®~/ê 4í×H´iŒ—G"Wƒ¼zKa¼œJx@ó­B^ØÒÖU' òV¿©´Øƒ¼…ØŒÕ{1µõnÌÏY:¸× ^ª•_=þÏ /%prW«4Ê úsË$h”—µpèÞÌŠyí2Á¼ @ÛSë•Jy…å¥Ó}÷ä‘/åů°TiÙ¿M¼!…˜Þ_%Ò‹–¸(—XÎñ¢g?E¼ùíƒpƒÖY…´âªÜ GBU¹Á”D¹A«®Ê ;—É=c¦)7˜ªƒ(7Øu¢Ü`õå=.ªÊ ÚfªÜ }H•LÕA•ÂÑNSnÐ/D•¸î(C^Y¥¬˜¯tÃÅyüêk~•nÐÇ*Ý GëLºAάªtS$o<ŽøÖK¤ô–.Ý" LºAŽí™tƒ6¿J7h;ºtC8iÒ r˜Ô¤´ê*Ýä.Ê r™(7˜ú„)7HÕMº!œk]¤bTºAÏÁ»tæD¹AÛË•â;Wåþaé™D冨Ñ…T`nM¥ú¼ª£Â xMïJmỜ$±"é¢Ý*Ü GOM¸Ál&Ü‹©Â ÚX.Ü:ê6hRáí$*Ü Ò®Û_©ê6èe&ÛÈ«lƒ•ÄdÂÑóE¶k‹~~ÒdT–Be´¯˜lC”PÝ-¦ë6ˆJ„è6˜òÄÐmˆ'ÿU·Á®p݆ Ó¡º r¦ÛuN ¾Y`M·Á4L·!¼R•m‰ Wmˆ…TÕSf0Õ†(Êäª ñºjC,¥‰6Ä)SU›ª6¡ Wmê™jƒ¨/˜jCx¯¿BµA+Ù†püßeÂ{]e¢¸„É6Ä3ðC¶»n{?ñkº ðàã7%IÓm5nnàŠ¥þ²e zKnPn ã}÷“P&ÜãM·A”Ð݆ ôdº "\dº Q ÍdÔ$ª ¦!ª "«¡|÷$Sêç»WѬű"{É©ˆ6ˆ0މ6håT´A¾cm€ ·hQ’&Ú€ví©Eð™hAÕÖ³€˜hf{LmëðmWö­€wû6À‹÷LI”mÍóHuÛ7†y[³°yªµx¹Wº¿ÞÄÅÊÓ#:•ðb,'ó¼×4lLý„û¥ž.^8s¤‰#œ8ÞŒ›¤šý¥CÜxóE 0¶¾ð¢SaLñAª×pñ8ë½À›é’cŸJÓ°¡i‡±7Þ‹ËÙe®iØNúk㌲*6àU/Û/<âíÌ”ðV¿w×SÄkÏ‹ˆ÷b0üŒ"Ä+Iä•ðžœÀGªE¼|Ú>’i Ä{±³.ŠxOö¯ŸA¼}®¡j®ˆ÷änÍ”.ˆ¢ Ó‹ÌA¼|7ïpÔñïm¸˜4½o‹©fÃE 0¢ÝT´áâ†ìܯÑØö'DGц› n èD´Ÿã¡»0Cm@ß~;ÂK°E´Ÿ ™} QÑôµ »0Cm@_»éï´î%¢ ìk÷ÐÙUÑذþêZö*Ú€¾†À8÷%¢ …§Œãñ¢ÚÀÞÆ¿ëùÖF*6ôм¥ß;S±ÁVTù2ÊKÛy %gMņî{rs² ޵Tl(F„³/ˆD³¡pø¸Fࣤb+ÔÅàWTy^¬ŸpkúŠïÔ¨ÙÀâ¶~ V8¯•^8/mÌdÔÔ çõë"çõçEÎkUÎë×EÎë×EÎë匜×m‘óòyøxÀÿË9¯_9/ý__wQlð{ çµë„óò…Õ™³…ÝÎ[ßåÛ…¿œóú-#ç¥ cbÛVUÎkMýr^>bÛYWÎë7‹œW(çõöœ×o9¯ÝR8¯S8¯ÝS8¯5±p^¿gä¼n‹œ6̵™á_ÎyWÛä¼~Ïz½œôzý"èåu¯Úÿ #èõçEÐkÏÐkïA@¯Ûêõ[FÔkEÔ»Ú&êµÏ ³^Td½~‰ÄòÚuË»ØëµFÖk6a½V3a½~]d½Va½~Ï{íåìµAC`¯u0¡½~ÏH{m¼Üë÷Œ¸×"Á½fÜkóà^{žà^›^#îµ)Fp¯9‚{ý–÷šÐp¯_q¯_q¯¹(‚{1«£×ÜC "à^sÎ÷ò²Üô"îµ[ ï…-1‡^ZS²-÷ŒÀ—1m< þ*(ðÍôðJÓçåØúÞãk çåÉÔ§K›[¤EšWmôÖ²”­Ç×GΛëÂõ˜)Ë¢2ïÉ}ï™ØüÍFÎ[k‡98ŸÇ‹_EšW[¥r^‹ó.Ïe^í%óúw0om(î4÷bQæq}Á¼æŒ æõŽ Ê¼;¥>·z¢Î0oýæZгŸÌëÕSe^ý"æ]ê0ïRNQæµëæõ bÞ垢̫2b^¶ œ¾Õ1¯y5¦Ì»ó&W—b˜·¾u ®½¡¾òz!U˜w±•=Œ±áËiŒ×Ç‹Èx} ×x^®‘ñÉ–¼©þáÖNCGÆ»<.0^Úv:-32^sÚ…ñz×{ed¼Ëu"Ì« ¯È‘ñúÛVa^ýZ#ãÕ× ¯÷×Hx—’Âë5ˆ„WŸù®‘ïúpø®-1„ï.¶Àw­ îzC¾tw)^ »µx<ÌÝ¢"Ý]® t·örÆÈôˆÝIw«7Ví¶¨òú{‰t×é®7G¤»Þ##Ýå¶#}ºÐÝj±ËìDºëãV¤»ÞG"Ýu*ÒÝ¥îî.÷ twi—@w­]ïZ»Þ­ÎÓ]‹5ò]sp#ß]ø®Í¸Âw퓾kÕë€×œ[¼ê/ßuSà»öqDÀk¥^oÆx½ðúã"àõöˆ€×jïrËxmöÀkã¹^\à»þf"à]ZEDyEmÜDyáQ®¦i¯%^ûðÚw¯Œ”‚wýYïz+G¾[;pPGR·Ë×?ß&Éû§ßm_ÿîw…ô(_ø £µŽ*êTÿ¹¿Òǰ¿ÿú¯¿ûçoÉ”Fù’áýgB†?å›àõ¯{&«÷S¬óßø™sÍ·šö_Ó·ýÛ?ÿ,ýéõ_ßõÙL›ÑŸÞþý]Ÿ_Ûû}ãýß{Dß¡íÛÓ뿾ë³k{·§·xþ{Ç?ý…rÐcàÖEæ€ 7îjU>œÖQ ¿ÿ}“ûþ»„ÿ(ô×ïúúÏÿ ®Æ¿þúǯßÿûßýÃïás¸ý‚•w¶PÙ_ü¤ãž”w*‰ŸGÆô—?éò'}‡±qÈãî2áï96Æz–Mù½ænÏ\<”yÕ“*R±}ŸÒ°W¾çúóRšhûÎáwßãÍÔ§Û«Ñb|ßwó–G_Ž•çÓÛù#ëy—\ø÷ͱ3ÔÅ[ìϹŽCÛ×ùC¿z¸ÓÇì×ÁÕr’ëSžþ”ý·>Kb¬°.&”üùçþœ¿ö bàrSqÝKÎ\·†ýü¿ú„ÿò§úé§?üùþáüã~ú—?—ÿ~ü¦aüëˆ+Üa}±¢ËPÒiì˜ Þþ{ ­p~Ds2x"¯R$¶ŠùüêñüºæüˆßÍÌ:RäÒ6E€2Ϩ €Pʵa»Ï¹J-YC(C¸#Vö!°‰q¿ÇØ’.å )à=އnÄöÒ£kÊ—ºߣ¹;½w-ÛR÷âGÀæÆ³‡-ëU‰{ñ÷ÊÊy6ÝÚ‚Þ ‘Êø¼Ç¹?ƒ“Ñê#¾ p+~žg¤Ë¶½òŸåÙC®B+=àìÅ/ý¤êÍ:]{­ÔÍÅqD—Í<²eѤ¸3ļ~os1±ì<“ÍÀš³ ˜fˆr¸É^Sò[–û‘ºP¤»‹«îÂG]—˜©»ðSoAÄT wᣘOлy¸NêAûá©)…ÆÁwªØåãö0½gÔT™ÒM4[†þ\=Tø¡û<|œxÐloù"ÊEÏì…+øTR]…<ÔSž‡Í7ƒÚZ:£§Êë>S—2È‚Ñ6ÁßÑ„’JµIGB«–ûáM¦œ„h]=›(ÑG}²'mQZ$Ÿ”T¶5H|ÒÎÊ@惦 2d#nzNô§v ž‰IST…±aG]æ tû$K yÏi“\2!_ÉÃo) äâ=ö¡­›UûIEUƒ€/zH(íkÚBþ-ü¾iF…|ïýÝC¹ë)åH1Ê‚™§  ›E bI„ó`$ ª ì2åj#3mAPOëW‡¹ r?£Â¸H…£^QúWÔÜ:*CX^Jaz :vQÝ—–?€;×èZ¯îÙ³‹²÷Á ¶û y:ÕvÙ˜c_ùÔÍÑa>˜RS,€ÿS„pk“º7“Èö5ý'ç(¥Íý;ô×—å=ùˆ"·¢¹ŽUmÌÿ³ÿ€)¬ÏfÑ–C´;Ÿ:ûŽÞšPôžE›ýp/™TÐW4oXÈj„¹*ª1Jî DJ¸DQŠ]’·ÑT’%%Õsn1¦Œ“M /ægÄžs9ôÅž9J Kš<Ú‚6ñÎ hoá~˜~¢¬§¤ì{ÎS3gã£ïêüÏyižƒøU·¦™ ¹”*T]WüJÓ$Åï‚çëçà*™¸áQÅÄ’s¾jTû¦/‹[¾A©˜¢NnÌESHP$ɪ0ÇiΔ>µ#éÏ%›Ã(3ÙŸïgQf[’ub¾ Èøý¹æ£-d’ìyðÑCˆäéˆ$ô¥»ÖUÒ订 ûPhBÆÖð-Þ[Œî’PãçN1M!ÑEÞÃ5ϽÇýP®5¶³iM£VQPZ´ƒŸûˆá›¨åßóöh©˜gû¡–¸¹d°…P‡‡ƒÙv\¯¸)éqq« ÆñPVµSÞ{–ZZ¶\;ƒi4†1iûßOÌ!òèèQ©Kð)9ñTñ€Ðcc7){L-¥½¹äDÄx`§Òla_? ?EäÑE²÷ŠÊÂtoÞó?ÕvÅA®sig0Q¾¸£ÍRî°§®ï´”°ÇÅ­÷ûêÞryb°¾RÆÏ}MâÏ Â¨ÂøãºlÁäÚ\hý¸}óôCÆê÷i¶’C— )®ŸGân Ïv\XÖ.Äè9ÂêèË0רÂÃCKŽsÜ;~ÐïáÞ•÷ûgªö˜0<$6(÷Íþ1€M5†ˆF \V¦nÕj 1…’ñÆòFúX·ˆ[ï|ÚÆ öÞ¡êÞûèâ†cŠQÃÌtчÝj Q|,]±ôVÛï3Ć~8¦ùü¾ ¬§®-taéûXPaºÛžªý-ß¹¥ñ€ñŒÑºè©<ûFÑÁxÕ¼oÌàQ¯ÞY3†Ðó`d­q呚|~˜¶­=3å£!áW6ñ+Øž&«­Ãæh,"ßf=x´çEáÕ"KÓ’·–ÆD¸ÇáY² ðØ[P>áЀ5moÓå\‡]zïGþ& |‹š%Óø0ÕøÆJ¥%55Œg<ó±õ8ÚRF h”ðY›Nð»÷»Ì‰W¦Ú2›¼0ŸVðÝ¿§y¾íÈ\ÏoüWNʦq¿ú4 æ·z”Nc™ôàÃmƒœ‚`Ø(þÖsã>xzlæÛÌeǦ¿£ ˜'ú¶ž H9ðÁC8#ÇžràƒçY†•ràƒ$óéòBÊ:·\’§—e|0ŸÞH×&˜¦ñ½Ô9ràƒLu¸Ë ‚.½ÆòTA07G®‘5OAðÁ@Ë™e[HðÁ4¬o2Ó/GÁ´=#k¤¢àƒ!ÊxÄ+l«(˜áð<¬úF@* æ~Ë}EÁÕ¡ï¾¡Š‚‚ª™Î¼¡àƒçëÐZ[§½“¤1{náôJ‚ÊŸgQŒï¥Lù3%Á|\š0HH°>OI0lé0HH0“Ê`’à9’à˜pÆI0l½rüJ‚Y…c$ÛP|ðDÞ\éGÌ‘b93„“Ø`0nRJ‚¦Sz}J‚qØÛ{XI0oÙƒ5×U †­¤ž"[A0LŒYmšû-#† }nøØ‚yÏ{º¾‚žàÜîÿä “~´*ª > ˜÷:æ"M8ðÁ ïé[2ÊÚ¦]P90wá°kâÊ:÷ô;Êyxæit”óž×\¼ †ízFråÀ°Ýk±W=L90¯Ë×Ý;”ûuÃt¦‘Š\80kFsÁ|Ú6â,ó–Ǥ4B‚+íœCHpæñ㹌 ÛžÞs_Ž‚yÏÎË+ï¢08sèÏ]8Pa0¯Ës-0د‹0˜e9æ’^`0S]seÛp0ïVæª\p0¯Ø&¾LÛCMÑ·Ï ÆM “ã¬óŸâ`+…à`¿n×ãú±,‚ƒU»Uq0lD>Ìë¶±–R̲0C_~«i0LWÝ~·5" ¶« fÃN…R¥Á^~Ùàr‡uy?¡Á,È=1™Ð`ذÄ|@h°U@hpæZ˜Í÷•ûu‘ûu‘{" æu˜Wá°¼÷Œ4Øëðþù>åU•óVׄOBƒyæÙ–œOi°]'4˜ûˆUÌëò¤‚ƒýºˆƒ­›Gì]Rp°_q°µ‹à`/ËÄÁ4mÛà‚ƒiâä›–PypæyÔóºkPáÁËu‘ó:ŒjMIYxðr]äÁ~]àÁ¼FKî¦8Øjq0/»Ÿ/E 8¸^7Ew;¶BDõB¿ Ò`~4©30…Áö­ Îõl ›ô¾óq¼÷7!¦À`¯s„Á´] ¦E]G«ip½kñýxýÛHƒ—ë ¦í,ƒrD¼T¡I››9¹•a°$Â`¯\„ÁËu{T¬Â›Þ\¯;P\¯»2Uìe 0¸^—D¾¢PÔÍ”ã{Ëñª0˜@âù{* æ/·A§ÛP¢0ؾD…Áë•):²(ã™n€ÁVq¥Á4¢ðF¼^yDgöfúÉJ#¶—¤DØH‰°5·áµ°Oðhýº'º´¸°ü ^/áå‘‘¯WæèÖr&t3Ráõ¶GtlýÊ3z¶Öì‘ ×+¹r½KãÉwômý™wtnýÊòz·þ4‘Üâ‰úè#®OÛ‹Tx½2E×ʱðzeŽ>®5\äÂ4âƒïiE• /Õ\x½òŠ~®_¸pm1 ·è*áÂ6Ñ®ODÌ olµÇ ß;j}¼:‡,ŒÑ ˜⊢F Ö,F,Œúdº9/×`aJáí}™fÁÂ’©l £Q¯«,,Ù·-X˜gÎŽ.¥aÁÂ(Šp¼ªJ,|3¿AŸŠ-XØ"{%Xø„Ÿ‡®ðv=eİácÍX ¿hÙ¢…gz^EÄš.Í‚…y¬÷ìÊ],Ì4$ø ‚‘`aÉ¿(ˆ8æT@̤ñK|w[±âÄYoëM± #ÖÒ±~hƈµ#Ö“ˆÆˆÙãà}`$l`62b=!dŒ˜KÙ’¯–üÝ1áKåÙ‘n þ->•Wu¼ÕóeÄUì2ny}`ÄuáœWNÞ1Fâxí-Þ6"bje“U¾™} û#ó ÚN}Õva$Äìc÷Èãj„˜ÌxøÑbª…ã®×ÝNú/„˜ ŽÎݲD!ö6BÌÛrÞ-°<â¢ëhîg[ 1m$Ëwë­Bˆy7¨K#Ä~W fÐ1^Ø“?b³Õ174æã±ÃVK¶–B¼UEß#í¥#!f7¸[>â…oõ6p“K«J$ÄœÐyxì«bž4† Ñbª¿•ÏÌÊ ÿk"ˆ9ñµ£‰ –3ÃÃyz4 Ö¤(‚‡w¦£Ecå«G f NGšÊ‡O¦è§»Ã¥ûÙƒl•S\ï鱡Ƈ¹J)T>1iäÉ@j{†rEàÃI…›&„òa:Ÿ¹Ç¤˜–DaóõÝlåÃ(7ø&W)ŸÅ$˜¹ý«3Ãwªgx_x,j'Å`6> Öäæ®&AÍ™ýnXñAGïê®&ÁÀŒZÝ15¸ïÒÎ Jº¨IÀ…é‰ 3e6¦ v ^±Ù3ËÞ‹ KJwÄFs£Ï™’ê/}uBüî?¼ÔÖqMüæoPBLÔŽ¯7ïy•“'Äò4‘“ÀËgp÷+æoŒXk Œ˜®/¾ž&! Œ˜×a`å®ù׈eö ƈQs¦¿reÄìC˜Àòõ!ŒXû—1b­Ÿ@b¯_„ÄöÖ$欎Yþn"‰íA‰õ($ÖëSxï"Û‰õž‰­[ $¶®.ØË"X_œBb\—sÏc ØË!±|É ‰&*CC 1Ƥ7OÙ*(aoH)1Gµüü†â %¶š+%¶{ %Ön"”ØZL(±µ˜Râc&“rJlŸ`b¯_ÃÄÚ‡û“"&¶»)'Ö\8±ÕZ8±}pʉÑÙZÆÍ/çÄ|ã©ô-ʉmXWN¬C©pb¿gäÄPÒu9ŠÀ‰­;GN¬S rbŽI…ô·•ë´ªœ˜­R¿ô\81ïy½S_ΉyO¼g§”¿¹ozÆåÄVNáÄ~]äÄöM 'NTÓÙÐá^Z/œX…¦*'æŸcMÓ”«W£œø¾û™>åÄêE)(NT‚ü.ÁÓ–ëAê®91}Yk~ÅöJÓFÍw¥ Ø:¥€bk*Å|5äxûµ€b/JŶ½- Øj' Øj  ˜¼àÎG[)(¶ç (æ=áûwm'Å•Aä­­‚óž7>òóÝ›Pìu8Õ—R@±ufÅöý7Plƒ¥€b¿B@1ÞÆµ¿$X 1÷C¶Ì,”Xöû…'fkxZÎ&EÄ”sGc7¹uEļìzºZ¸"bÚ0 ´ÐEÄ̽•J×TDÌë°öo¢äŠˆy]ÝGlU¸%ÌáfȾ¹¦Me?{:EÄ&"'ˆØmÓûì™B[QÃv¾üñE¯Òµ$ _Žˆ«·{ôLCŠˆyÏr]AH1Ë‚þÓòR)"öúEDìõ«ˆØ 1ÄÝ뫎D¬‘9†ˆùzN˜·´jJÔ.»ÝMÙޱDZ6ŠbÖ}½5B¬¯Ï±ö@#ÄQRB‡ïÑ·z³a>VBÌ!¥Ô£‹®„½ #Älžëîi*Œ3òç¾z^#ÄÞêBˆ½ „k@‘boY!Ä4nyo‡\ÇÁÉø0Ÿ˜®žÃÀø°·ðá¥8¥{¶aü1>ìÍ&€˜£!Oû çV:‡bo6Ä^ !Äö  –Ûø°wáÃþþ…ãÊóÜzŠNãÃKYVÝMåÃÞ:‡u&1>Ìâ”­§ç1>¬þªñaõ|ŒëšÂøðãäú:0>Ì+Ñm¯¦#xØm‘ëòÚè0×FX,6ì·ú6Y‰‚‡Ä0%/Ì*¤î V¾+\؃g#Fß™gª©À*fŠê»‹)VwA¹°±@áÂúq(Ö=BåÂD ~K!.\X]áÂcEQï×Ùt‰áT·MöÃèŒ}û¦1Œ°]ç~\+¦¢17>ö¦–`qÃb‹\˜o¡î^œ+¶7¤ÃðŸö„ c >‡saÚÐI»DÄÂÑUÛx©Xxç[.W¼:FCnånÙœ [ f‚ÒýÌH(æ0Œ¿ø3BaÝXP(Ìׯu Ñ…ù¼¨ÃÝ…ýº…Ý¡°ÖA¡°v!…¬ÆÍn °W=@áŸ\îZ! …y=ŽÃ#Þ™•ïú…wº×ÐÔ(¬aü … /ëÎqYµ%Þ¼JCéR 0¯C]»Ï,P˜ål\ãË¡°—3BaÞóºîîÛ æq¬n»ëüBa^°a„~S‹*æû=$C ³MºÞ‰@a=× PØž'P6N/MïD˜°ÕK˜°]‘°Õ@0/Ã;ëK$AÂÖT‚„ùáoe¨íF$l% D؉°7W$Âva}šašP˾ä lÝG€°µ—a{„9PbÉß…g[1#öÚEl_UãÁ^ˆÈƒí{l6áÁÖþƒýºÈƒ­!…[l,<Ø:—ð`/gäÁ^–ȃ퓋<Ø«x°ß1ò`¿cäÁ60Ö® 8Ø^·à`¿eÄÁVLÁÁšAq°Õ\p°ß3â`{y‚ƒ½œei)ÁÁö>Û—!8ØŠ 8ØšQp°õIÁÁ,z:‡ö“à`kFÁÁÖŒ‚ƒmìÏ‹8˜ÏÛÏTrZpð›ÿu 78Ø^šà`«à`kÁÁV;ÁÁVÁÁVÁÁÖIû=Oua¥§º°±5[l~YÃÁæU ¦—WÎûººIœØ£äÒŽáÞ¹ÐIC©Z€°ÕK°Û"¶™@˜°Û"¶ö&l=O˜°Û"¶÷™°›æ÷ñ&˜y[%2a«¹0a{Û„ýºÈ„­w ¶Ú ö{êÃÆÞ,LØ|maÂ^‡K}Ø8nv[i>¬<èQ6T̘°Ö̘ðbLêÆ¢´ÌÎûAZBËcPXß‚Aᥴ‡:³¡î…—+#ÖUˆAaí† …—G>âÒjû<êÓJq {öÛ ^n›Õ±ÕÛfõl¥&…—ÛžêÛÊÛ,¼\±°.³ ëzy`a]¼ö§ ö—,XØ+!XØnßÕËUcV7WúŽpᥴ§:ºò> /¥½~ÖÕ50¼¨ˆ³+ßlÃþDÃþDÃþDÃlæ”{ÃÞÓ /:Ô镯0¼Ou{õ¶‘ /WÞêøÊÛ4ì}//wûV:¼}>˜BšIN×üs–cNƒ†©ç[ÊÙ4,f¸`œng‰ÆçÚ‰.1Ã’òMb†-ç›°atÀ»|N?§Oʆ™íR”¯È•²aæ2ÂàÒó¢ –|‘ Weô4ÔZ„ ã3Ç{/W;R!l8ÿùê g>šÍÐs±6Œ€z¹IG Î…ë}ض• óž;¼ÑØ0ZxzœÆ†ñn¨À[õ½¿Œ g¼JžŽçyÄ/ƒÃùfV»y#p˜Ï»áŽöñÂጾ†Òtí;ÁÃþüs}[ÁÃþ<7µÛ¹ÁÃh%îtÁÃ|ÆÔ‡ñp¾%Âèùדx˜¶Ío‹f9¯2cu"f‹Pf¬#ÌWʳèýc €¸^uÏŒƒç;1ŸYêbÒûG1JÀ#V©/0#!æw…·5¦"!æ=Ÿk†FEB\Ç MÀ]1mpšzWÄüÄS=½*ó2”pÄÕFBÌáò‘]½ â:¤pb}š­bd…Ó†nв< æÍø{z _ˆYx&x9Ü ®ãö3£# ®»B qÄ>nG@\Çfz-—^ÄK9 ® r枨B1Ÿç¬  fý.~É›óa›y"æWNjc´dyóa”vï‚‘³èfåMµ x˜&´qKn"x¸N-<5ßôˆ‡YHÌ#h3âá\½Â½§ <ÌÉýÞÇ–¾àáÚïЦ÷,L>êØzŒt˜·¼&ó6W„õM˜¸}àg©Ýñ~Î)´à0m'ûp>p¸öt† p˜×¡¶‰™g¾ ó•íÌÕo"ö†Šp¸~R•ûiú‡kÁlý!T¸NëÓ¨>‹¦w^©'ê6\‹‚Ñ£«oD6Ì¢ä7‹‰äŸc Ë>µˆ'æ+Ie$&.̆¬ÚGM†"rá¶é¾²„ydG.\‹š°,zõCڿϦ‘ ×ï‰ù^š ¾$ ã¸u]´7bakÚ®¨ùçPLxæÛS2Žùçn.Œï.Ò©ùçè7âm·,m/®5cê1ý;d>·¡¡°0Û?Sg©§‘ \¸vH,ººQäÂÞ‘#öñEÒÏ…"©ç؆÷1ÔÂ#ö.¹çØÀ÷Ñ%5÷?w̲=s„äž+<ØènÈ=‡ËÐÎ]ÛOrÏY/Üs…¹¾·¡ú/¹çÌÕ—ÜsJ0™Œœn1÷»&Á&o©¹ç wëö‘êRrÏRô¶0at|8ÊcÉ«¹çÜsϱٱV"öÖŒL¸¾… þäöÌ8á:¸—ç%LØnf¹çÐi˜dwû&œëê=àI=Ç'2Hýî5¦žãú=÷ÍK=‡'žL2v啳Qà)Ýí`‘¥ŸãÊÿxzeK?‡GbJéëK?ÇÛâ¡38¦ŸC?QÅ¡#ÓÏj·^AØ"¤ŸÃu—íW; /éç ×ÀyI«$ýÁ£i¹Xú¹B¥ók*Hú9<ó:žyØ]Rб@hÇk OG–ë¶OE É@W¨m›ºRŠe cyÐEFn.É@W¨Ñ9ý,ª‰…OWu² t…IòVzEÞt…‰&rWDZt4ÞðÆ»4¶$ +Lf‚}“_Z:Ž9èµS‚7f #ªÁú¿é¯Y:Þö ÞlŠ’‚®°7TÅ­•W#æÓ‘¸QRÐ&)Wî=ó—´­)èÜSÐñøàÈlSÐ1ïOîš|–‚N ”ç ÛˆîSK.ã9èH`°>ÜÛgy0{ÇYÎ)î"9èøia…>Dl4 ÝöC>K…ë;¹Ÿ.k©<˜Æ›x«gÚ“$t…yï42ÉÅtì#\sÞ+ ¦ SÃ54¥%]á`{¥‘¾2?ÞU¿ÿ6¡a,±Jç0Ÿ ðLydPXR æ[H÷H­&BÃ;ÉU[ +Ît4¯Ü²!æl½wF’ ³×߈éHœp-ïÏ©è¸3¢Kæ sg#WÏ«&2ÃèÕÇÝSXi¸ðÆI`Òb“‘Øî³§Yd$~nMÏYe$øM§³§îs‰ Ž»ÈHÌØd]F‚ÞvãMFBNì.2áÉH ÅàJ—ã+2rþtÑž'(†Š„E¶€a&‚¹z–ºkA‰ýÒ•Ã,`˜«Ã<5`X®³€aÙ©°€a®Ìß\Ã_?zÀ0çÊ1ZÀ°hN[À°]'Ãv c‰“‡w¢ÃG—äÒ€aŒzGw,^˜~'–Bå=¥¬ñÂ(¼‘Ž´5^˜Ø: É4&&¿'7•xaØÎ» *Y¼ðIp=˜£Æ Ùï]hÊâ…ÑójþÊwI¡ñÂ"McñÂ-„WB†Íf!Ãq`Öa"4d˜Î^Ëy­ÙèxÊ–4'çUi˜½‹t)µìw2,ÕÓˆaÜ>sÏh ÃvK‰–¿=bXÖ¶1,/Ô#†‰îñµç}HLGË„âømdÜ#†å­áE=bXšÔ"† Xİ_'ÃrÀÚ#†90œ˜Fš@­F [³kݵF [xİ"KݧQÄ’ƒ†4ŒW={84f£sÂÄÄ’%­wªk¾1*'îƒbüo¼-Õù¦wäcWê§Áñ>· –,ù¨«­ŒQ™OÜ1*÷Ñ lK–DïŒr¨IqZOÆ ÂüÖ§t¡(–Ì£»+žÂˆ:FeÂrt›1|ÂÄ’7žb¾ûðq0GýèÉøsêaŠ0^Q+ÙYoõ®¼ñx÷HÛ#KTKö+™‹ttåDÏéÜÛ<7ó^ò›cãýGWÆ JµCÚ Û‹bÉ’›O)sݺ²¦n¯Æ –,išaÄð0Çdkxš4šÇSrÏ3ª%oŒ¼Áà N]Kf ¾òÆœÓyÇtz’»%q´KL§‡VÄ'Ò£™ÓùHêè/Š‘ÅÕvmA.y£“¦jãî•b:½‡gÏîÑÒéIk~}Q.Ù^X=ôº²˜Nïᑲ㼷vÛ3¦Ó{x¿Ç2Ó²é=Œwô’5sô~”è%{Û=šsz¿ï~¾#aÕ¬9§cÏ;_'àáCßc²ßvV°g›þ+GUÓk+õ îJª™|:Èsg—RMÛ5»‡’ê£Æ,Ž¥¤ú`K€Ñ‘T ÉMŒTó{aÊž£­ˆ…TóE¹ú™#ÕËm#©fàÍ÷’êåÂHª1ÕëîÇŸŒTsúÃ\ÐäÃF”Ï“Ý=­‘jo¬Þ[캑j˜j^É~sèæ…hĉ¸#¨ö #§^š r꥖S»-rjoáÔ4æ3µSÎÆ©yj å;©ù7¥·F~#¦æ]ÛÌ÷#¦þ`œøÁ³G?fœz1FN½#§>8Mç~<Î8õÒ‘S{Çk Ú\@µ½báÔË…‘S[› ¦öú ¦¦NIoVLÍÊo¹Ÿ33LM#‘Yi9‚©½ÙSظ£Ðö…R»-Bj·F]=ê½!RF½Ø"£æ=驶ČʨѧßǶ!ŒÚ«(ŒÚGµÕC5mãè{g¨ýˆ¨YËsï9? Q/µŒˆziŸˆ¨ÙøþÇ®b šå !Áiˆz¹ 2j·4t²•QÓ˜S?ÙgŒz1FF #œÜž+À5xk=»¦2êÅ5— ðeº°¹2ê¥"£þ` .²W%@j·EHí½Q µWD µ7»@êÅ!5oKÕ³»Ó䩽ÙR/Ʃٲ¤ÿ}·V õòÌ©—+#¥æTž¡]®”šW¶S‘ÕØ(õòëH©ý= ¥ö¡S0õbŒ˜Ú;–`j¯¾`êÆà#{U"¦^®Œ˜Ú[U0µ·ª`ê:í?C)M0µ](˜š¶ýRûŠ©í¦Š©cÄÔyÖÓ´¦¦ñyÆ‘,åÔ‹1rj·ÒÏÞ§¶ÆSNmïD9õR•È©—Ö‹œÚ¯lg ¬©—K"§þ`|âÀ¬·œz1FNíoY85¯ÌeÄ‚)§þ`|âÀ¬ÆÈ©ý] §^Œ‘S»1rjØŽ£ôÓÐÊ©mêUNmCˆrj¯ˆpjïË©cÕKE"©^žIõbŒ¤šm0åµT/ƈª?Ÿ80L,Þb‰:ªöT½#ª†1OÙqcÕËÓ"«æ•åî§ÆUÓ8UÉU{“ «^Œ‘U/U‰¬ÚNXõbŒ¬ÚëYµ?2²êÅYµ·¬°jo3 T°5Áع_÷ÓrÄÖ™1Ôù:ž¦§Kα73 R¨uf5Vk¢Gh±üÊÏŒähMˆG!Ýrјuæ¬{ͨ:aÖÔ±ºñÝ–2€vÐ ¡ ô1£êZ/WFh½¿.-ªßèr„Ö4îS;S¡5æÀç(W.ϽžùN…ðûhj†­)×u–­i»´^ŒZïÔ§šê™ ­9)S:ú:WhÍÃëèþ#Œ-Bë¹ö$Кvßg¤@kÕ£4j½s †ÎÑCè…ZóXÃAr´ ¤=ò噑Zû j½Ü6Rë’SSV±µ·¬`kÕ‚­yÛkjé6lÍßænoÛ–lM#Å ó‡øj¾ÿRs œÝ8ýq/ˆPk^É^¾5~.ԚǰvÌk©‡3Gj½Sgâ‘:J­Ñ†X®Ž¨ÅÖlÃ4%Ÿ[«Š£që噑[«¦¤rk·EnÍZžwO¢bÜšÊÏ”_VníµnMc™ê» ®yÛOø*¦¶§9¦$ª˜ÚÖl¿So#ÅÓB©,žÆyj§Ô²†tJýÜ3«ÔB©eÝê”:®…RÏ¥ j­‚òiYO3…V¬¥-Ú•OëDe|ÚÚ[ù43”¦y~Wù´úÖÆ§)L¿Ý˜-Gä´8ÞZ áÓÌ´Í#¬Ê§ 14>mTBù´-ØP[PËúZµ1Ôºjw>-ØFù´ÕÏù´Vó2¿[®4>-ÈJù´VñtÙÒ<û,xÚÚUñ´:²†§­'+ž¶æQ<­ÞÑ‚§Ëõ&ßîFq»¥íOòRíãŸòiý®OÛ¨ªxÚ^°âi5OÛ›PíÆSýä`|ÚÞ²ói½í­q©æòÊô?–Û>ÇUø´ª—*Ÿ¶~¥|Úo*|Ú*¢|Ú§ý¶§:Êq»}áÓz[ãÓÒ#•Oû•Ƨå+p>­¥}ÔQ–Öù´=Mù´uIåÓnÌê(»QeéΧ¥¿*Ÿö+/u”õÊ[eyYʧmàQ>m/Kù´ÆF(Ÿ¶» Ÿ¶Ž®|ÚF3åÓÖ² ¨už0@mRj YQµ€eCÕ2â)ªöÛu”µž:Êr¥òj7.üÛT@Rˆ«þušÕ7ó¾³÷'%‰¯mJ j%Ô©eÆb‹Èj çÝ(J n%7ŠˆE d5†sŽnŒR n‹¢Õ\¼ŸW VÑj®¢Qü½E=¸huÈ ë¼Z²ºhuMGî‚ÅÊ«qŽ•êó!¤zçJ‰ " u)®–„¹®Y-™vWcÂÚóÔOP\-ùtWcPF·ìGÈ;­Öß*¬–œ¬«íBaÕ&É­¬Zr”:«–¤³Îªc®WGÕ’*ÒYµdfuV-)]U{a…UKzPcÕ^žÈªaÛïy"[µ¤FubÍ<µ×<é«ÄZµ:±^Á·Z*±¶ÆSbíUbíÏb-¹#Xû3…XKV¶¬­¯*°¶þ¨ÀZo§¼Z³Ã¯v£ðjæ¹ß¦„k¿R€µ_)ÀzgV¿ ="Àmp‘íµíÖn`íŠÄÚÊ#ÄZ’ƒ:±–D×N¬%-¸ë˜ûÛµeP`í<4Û¸´€ëÌZa’KŽë•¬­°Â«ñ!erÈó¯–,í“Wãš*ô’?…S»Qxµä7w^M­û}(©¯® «¡6q(¯æ)9ÅÕÖàŠ«Ý(¸šÃþ>å|WKòXÇÕ¸2ïSaGqµ#®Ö7%´Ú.SZ-YÙVS³ÿ¹»Ž³Ðjz ÇÔPRZm³µÒjöù}*)­fŠçÌÂ-ëà„ûm…V{U„VÛûRZmïKiµ•¶ÒjkR¥Õê=(­¶')­¶qCiµChµ=R`µ5‹Âj»PXµ5¶²jýpUc­¶ŸS´KQµ]Iµ BªížJª­ŠJª­JªÝ˜5©‹8«JªÍÍURmXI5ª™©L¦¤Úú€’j/mQ9Ž ª­ÛuPmÝXAµ}ß ªQ{”âhABF«aÄàt”¶¦´Z‹©°Ú «­M•V[PZmͦ´Ú†c¥ÕþL¡ÕþÌH«uäTZ­­n´Z‹c´ZÇ£ÕÚxF«½†vŸÑjí®F«u$ï´Z_†Ñj»ŸÒj}ÇF«µë­ö+…VË8g°Z}.ƒÕ2$«6Û­²¶KQYë!¨Ú®T­ÕRmÕWRmNIµVCAµ½HÕ~×Sd©£‚j7^ê kMnu¥ó(¨¶š§öfNmßyçÔz?ÅÔ~‰`j»P(µ J©­”J©ý‘§ùÇb¼~Þ?6Jm¯Q)µ®»ŒRÛËPJ­m Úž(ÚzœBjŒpOº¶©mlTH­_Ž2j³æKq.õŽeWDm}\µ:3†¨Ýø¨wfl#Ôf{ucn{å\¿•Ooßȧƒ–ðʧUîi ¦fߟÉ%˜šbÔwég\-˜:s¾Èå£à‡ SX,õFwÿü Tm;þŠ§í´®âiËph9µÇj85^ÜyY`ÅÓ÷Ÿrá­5ú*xšôžB‹Š§±4ÚÑäGù„§Ñ'>‹Ît#æävßÇ: 㑘– ksD<˜Úg/}«Fñ4æSŒ8Ë ÐÓûÆ,]n¦çºs7Nï›éöžövìNð4™l9 |§O)ኑÃvo¹ñi8TâÞ¥¦TD×ÙRI#5¥âF!™À®)Ù¯ïÝ÷%¥"nó`乚N·¦TdŽGf¿<>DS{ö;M©ˆA!—y$Qµ'áDÍdc×=ãâQ31ºëçpjOµµ§Šˆz«ŽQŽˆÚ3í¢ö4C‚¨-7ƒ"jKù ˆÚ²z(¢Æ•x{| ¢¶üЍ™ ¦ ­{%Ô–µ@ 5Œç^”jË2Ò5«ðLÑ}EÔžƒHµ%`Dm)QóçkF. ¢¦WˆbAÔž×IõF•ûcâBAÔÞl‚¨·àýèˆzãôŸ­ßžµ7xDÔÌŽ3RuFD­-”P[* %Ô( 榹'„Wb˜V5ŽgÐÜD!šé0DÍõ(?¥·#¢æ}†$‚¨Ù<[HY ˆÚß—0jV%‡ô¨ùÌrçt‘üà ÑçÆ®¨0j¿FõÆ[ïcR‡t1šEõrÛȨÙäÌGÛçÔ,-†Nz¯1BjŽgÈØ ÚMªèU‘¤Š•$¼›OIñ‰ Îó;ÐdI©H [&3Ñ”Š¨#ÖƒÓjJEmrK©Hÿ¾LúûfT<~¨gÊöû£6ßÉ2*,ÍÄ+–Q13lqlï[FÅÌ#vÄ2*2 ÎH a ñšÊLóa ñÈkf^±„ŠÖ¥4¡¢5ŒäS´×¯ùµ¬’N‘ÙãgæM§ÈatfóXÒ)JREéþšNñ 6âØ´tЍdº—tŠüø‚’¹¦S´. ù™yh¦Þ°|ŠLh”G¨ÒȧÈHSæßò)ê—¨éÑ3ᢠήùÑmžmìZ>EfsšYp,Ÿ¢}àšOÑ>Sͧh¯Qó)2÷ÌLêaù½i$Ÿ"sfͧÈN6SOh>E¼ªƒÎS“Ž—|Š;zîÌXbùík>Eýt4¢}æšNQúª&Säòe&‚±lŠ;ùŒ¼–MÑë/ÙÑ‘ï9@³)Ò É4›¢5›"ÁbУ?“{Fߪ£@5û9™ÓkŒWï¤f£/X85‡?tø’>$V$[(¨ùöIŸ kúà[W™6}jjw×~‘þ8¸k£“‰˜J‡h¨ †ilœò}ÜC»:Ðj®Ép>‰èq{¥ÕvVi5Z çé*­¦2Ê3WéL]8Úõõ‹Sãã˜.¡S[,…GSßD<Ýó×hjÌWD»´ºESS½º6åÕ!X$q†<¦ŠDSoø"oôýNÁWÓ%Dzø)¢Q½“c¡ØCWSiKÆqWóâÆ:Cq5_*z\WP\Íï¿/ ª&pÄÐKUs¤~öUó"–Ký좠j·w¡ ³ ªæ•¨Ô8Ù+¨?ÀÏ-CxTPµo:DTMÛƒVJ$Us›ã@3åO¨Ú7+U³àL½Ú²Z+ªN\¿ÍkŒ¨šíqbQØAŒ jÞOäòô5FT #:ø=ÑpDÕ‰+êížP9¢j\Iéó®V¤¬šÆ½ì¬GV½\YõRÏʪÁ#¾ºo„UÓ¾¸U|~`Õ¼ßÁíÖ¨«ÙÉÐÆè °Ú÷ŽV'b:,˜;OX½”6ÂjÞ–Ò£Æ ¬Æ•˜±òÈ+°šÏ¬È'Xͪð¤S_À ¬æmït¨a5lè¸ÏGXÍ•OKwR)´šµÄD;Hv„Õì9û{Þä5FXÍ»ò<ÔÀ¿Vû+Xí +°cMá¶åöI„µ¼Þ²×aµ>«— gš™§#Î'îû˜^U/O|QuŸísZEn:2ÿaÂTè2£»tEçHªÓ»Ð‘¦Bª—»FR8“•mD} ©ö¾!¤:qj-×&R½(’j¶A~ö‘=2‚jØžSýõAššŸÎVþ×05ßÚ?µ¼‚©Ùp»ÙОžNxâr½¦ŸÓLÍi­Áï×1µ‚©YCüáŒÂŽ˜š$à¸îqÔD05ëqÍ ¦&&xö4²ç ¦fUN¼Éæ|*¦ön'˜z)PÄÔ0žXðŒ3>ÄÔ‰ç3®<’Æ ¦Nuœ#šåGCÄ¡©c¤ÔÞM…R'ÎçÌ|"œ:1ä +ã±#§öÂFLÍNõ`}ò)«"ÛõÄhÒÏELÍÒ<èqý(š`êåˆ©Ùæû]Æ!ÁÔ¬Çv÷U€Rju˜¹#¥fÓáÕŒød¡Ôœp覥.\)5‡œ–ˆã5FJíqB©}°JíÑB©a„?rzREÖpËûHþ(”F¸q3PêÄåÀùL„)5×yO()õrÛH©ýE ¥†ñªÛÊ÷‡¤Š¬þ…®Ÿ;6”Ú»²Pj¶ ó™Ôˆ‘RÃÈîÑS‚)¥^Z5Pj.˜Y»-!…RÓ†nxö)5Û®æLS)µ=Q15n{3Exi'éT/· z¹mÕÖJª­×)©fØ[ÓÝ®Œ¤šÀ´äϰ•8"cº|FÆ4Õ‰ëKL.[¸¨¤šUÀzÀ!Õ^?!ÕÞ2Bª_ –ùþMÍ+±ªïùA•T{³ ªfàX”g‚ª¶Ä ò=A› j^ÙÇ™U³´˜t{â(eÕ‹1²jF`áÝô¥·ÀjÚ°bƒ‡3ô§§‹ŒW\÷ͶÚÔ‹1ÂjçİœJS¼Ž°Ú»•Àjÿ´V/·¸Ú»ŽàjoYÁÕâ+¬¶…’Âj[z(¬¶¹°ÃjÞ0ãaí¼Âjáþ¤¾3¦´Ú–‚J«Ñ¨NÑc¶”VœPZM0°=×PíZM0À º–”ü)VûoþÐc¯|ã­ìøéšˆæ;”¡pæïEh?|×¼¢>ã•÷Ÿ¾o+¼mßzÀø A,­°L¹¿Û›èEh?|×´¶ïßAÿi)Cuá¼o¥9Èõi¼-ÇY£ÐOøñ7¼Í“+¸å\4Ø€ûwX½á¿uÄÅ DGÜ_ð¤»îòcuÃÄq¿øIÇ7< ä<}“±4úåOºüIße$øß¼µ1ûûޤ±¦9,û¿Ûä‘Ëý—ìõ(«CŒß©<Ç|ùu^Ëß¹K¤°ñ]ÞŸîoGŠñßN-½-ϧ·ó Æ#nÅžpï)oŒ©áögY–ÿ Ò|Ú_?êŧì“lñ0Çê8ÿ]zúCößø”ýb öë©»›?ÿ˜Ãó×Qã“°¶ã3Ðö?÷qPyh]lïþËŸþé§Ÿþðçÿù‡?þñøéúsùïßàΔíìŠù‡´½qÀËpGü¸§³%ϲï,à…þ/4SKz•á72x~‹‡‹z¿ÙÃŽô){°`&)ÎG›žj ëy÷HÒüоý8sË”UhQk=°ždF?æ…A:ï×Ü`GÓ1ÃÖˆ½Ê5úñvc'1‹ZÓDàΨåöÕÕæã‚òGãˆ&ÉCªï±9©ëAQjvæü0˜o{]ð—çÖ Ü°jÛ +ÿöeÚðñà}÷Ár~ÆŽº·:pt[Aÿ©}y¹ðØæFq.OóÀøÑŒûÐÞ¾\¸E3Ôµ• An$ÙÊ#¿Û€\iæ­£q¶äÂÄ#3-zj•â{»raî®™æïelц†Ù50TüÐÆÙTÛ5¥ØóqϽEsgªÓEëË0rZŸÈÂ:Y¬Ï0m´‘å`jÅ6$ךl3½ ù±”‹!¸­/û3a‘64¢íègoœ;çÈÖ— ÷ž­ÏggˆÞª¢³ö€û|rëè]™#gBÒŒåçØí¤‘9]ÆhƵi>{W¦xÌÌÞHŒ:¶ÒëmŸ™3ŸŒÐ8gW¦‚¾ÃvÛ¡Qe¤Î† €ñæl]mu|²ÛÝm#òg)Î…²_mX®Á6s?æ+ÍM²jÄxÛ5Gòµ×8Ë—s…ÝWÑJgP§ñ)}XæÛØgÆ|sS5žú°¼Q•i¤”Æ—7ƒEêð€Âô n3ìÛ—¹}‡á¡®W™QHµ»ž#M#mè¡+GÛ3ÃÛh;™¥ß'•7zO~¸ƒ‹Æ£—Ð*s/˜FJo)íÞ1"õQßÚid~£qÐx_3¡\¾îØGåB¿h&Ê÷1cW–†…Û:Nl­ºfhÌDØ«ÓX#YjWæòÌdù¾gÄ]-Ð3òå»Ð7n=™ÕÜGžÂ|?csšÄ†¶R.èMå˜=£7GɽËósƒrIŒ!žƒ²Ô£²±úÛm&œ'Ï[“ãë/áCm³æçâ÷O:g–q¼œ#æoy}åbÖÅ9ÇÁÀ4¶Çk)óÌÎÉ¿My ƸkžÙùó»àWš äj?8Â`üÜ3wc&Î0§sfÂâ–Ï1ÛmŸ}†Šy³Áö\c,¶Qü Aˆ4bE2r•ä‡Aˆ}0æäf†©ê‰u'¹N+y¦¡’º“\o[fÖG0KÇc!ýŸû‰ƒñyOý 1F4¼*D7íØæh|QþyãOèúŽØ5¾vXžî!ÑiÞº“œË»Ü;§mÄDÒ¶ñ¼Ésµ»ò ¯aÜ–ÏÏ;¾½£û`ì7<àÕŒ±˜°tᎺ`raŠ—ý*÷3Œ¥{ȸîA}ºX•C†‹Ì 7†„¾n0Çp‘iD™›êÑ+µM¯‚k•î°S^j2yòèAÆ¡I#‡þ.¾Q×YÃCö+â5ÃA¾9ƒžýDÞ?ç ×ÄFÑAÆü;4µê‰Çè £Þ¥M<*™¦ƒlr¯‹¿Þ•5öÆžW#Ös%:ÈR Ìa&ŒÉÛ6ÓRñÐç1d7æšðg¸øtFr)Bž-zÈxmg_ ÒX¦‡œ utù@Q½¦‡ G¯ Cï[OFËå0(oï1ãV {žð1«Ëõ1ÙÇ¥,W‡VŒÒ]3ˆº‘W÷Ù 4^û²+w†!Y>XË]aH.¸ê:Û÷Áh¹á fCŒ‡Ær¿aìân#h|ùª(ŒY ¼ócSÃJižE¨SªÚŠ¸Ã¹=sTvÛÜc9‰‚º,³ÝÃA^*Qf8è2»0PnzÈtofb$Fº¤é"Ÿ\N+Ÿé"»1Í öZ H¶3wÖsgÃrº®¡¦„q¨õÜÑ=ÚY,Üþ‡4]dk„󘑄«‘GÅÆ¸l-ĸ˜é"y R×Õ®¼©\5ûræéÚvâì8Ë ^«R=²Ù™Ó+Sðvæs=õ™ûâÉÓ1,cBÉS0ܶ)¸¢Ú‡`ªDõé`ä{rؾé¤o;ˆÿKîŒQñ4ÉuO›,¸¥ TJ}pâÞ&ˆ”š_Lº‹ ©yŽðžê‰ ©°Ša²èUH}ðlþTÖRH}Pvj ½)¤æ·MA…Ô0&ž;i§`RcHaxUÅUHÍÀÜ„ú}6ø!57\®)V¤FÌM!Ø5· ž©£¤š<‹±êMýXI5ÇÅgªÀ(©>xTÿêÒÐ ªêäí ÕGF ³€jDåxÜR/)¨&’{¦@ˆ‚j·©J# ºŽþe }tPÍ)ãy¦‰€êjܦª†€jÙO& ¨®²Èe gDP]m;"¶•TÓˆµT],wã ÕÕ¸aÆo6„TÓxÜåè'R„TWc*[×™RM#¼·‹S[5FRýΩwê„TÓ˜n¸áGj¼9êjÄXÔÏÖ©¦qÃz»0R½©¦ð¼l­(êOÆ  ²q-4Ä„T¯Æ@ª«‘ŽíR½6Ð$ÕÕ¶ÁGoÇK„T/IõÒx‘T×öAúñ’=F^¸ý !ÕÕÈÃå,Ý2>º1êÕ8Iõ뎥!D-¤šFx“g×âRýÉ8Hu5fžod‘TWãv«"©®Æ7lò}‘T/·¤z½2Å ”‡Ú¹aI5ø¶ºà¶êÅ@µ÷ªÈ©icœyçS/¶@©« SK×JM#,{çB©WãÄÔÕ¶¡[5AÁÔK·‹˜º1Ô‘À½Æ€©—žÕ0õúûxHˆ]®ð¬ÅÝmƒR/Ý1RêjÜ1ÔïŠOJ½ôÆH©Wc ÔKoŒ”zéSS/}*rêÅ9u5¢4c× rjW1NýÉ8ÃY_˜z±J]mø¡ïoDH]méç R2HM#±jÞR{ä³Ðº1PêjÄ›Mw£ÍS/¶ˆ©«qÇêï,ƒaÏ“y™ë}­íUDLí²ÅÒÚ/¤^lQi‡O‚S~ŸÊñP|“¢Q ©^mT¯Æ€ªi„'‹ÅsóX#ªþdœ'c@ÕÕxŒãsBª— #©^.  úƒm€êÅËg.ÄB CäÔÕcìîEN½^8õzeàÔë•S¯WƳ°42^çxšqrj¿P8õj œÚï* z½òÕëEYs|èETWCÅøÜz@Õëm'ªöï[PµªýV½«ö\XµOIªWcŠ*nînØx¶V—#F ³¦â"Ÿ4°>Ë=üY“ë•U¢Õ\A…ø?aÖ;÷¾çF´2kË:ªÌÚ…+³¶ôgʬq[| Qf­û”Y›Ø…2k&G;ïIs…YÈ?›|U˜5¡ð—×RZSìï}p5ÖšVM˜µ‡³ ³N\}ò[hµfM?ˆûwiÐ62kÊßl°´_ƒY'*<¡Cô¨0aÖÕ'+£· ²æ…„¥­)Y×sÐÏÖ#Y§êDŸ=ÊIˆµÉ©*±fiˆFz”«kËv_#>VˆµÉÛ*±†o÷1°B¬-ÇëôŽ#ìJˆ5Œø«m› ±Fãåãáy6‰5_òy½ÈN¬éÑÞ¤GÏb¸Êؘq­®F¼Ÿr´í !Ö@*y.è…XSœæÙǬȚâ4xуsDdM…ãû)#>J5ŽQÔÁYÓ7ßKg(Y/ÆYW¡¦2¡£ kÿY奈WàÇY{%"²æ'†uïyY/ŌȚµG7,S5S2–;Pòˆ¬—+#²æèÁØÃ‘µ¿*AÖl,ñÑdmjÔŠ¬}fíãŽ@ëT)å<ó ÔšßHؽ‡qbëD!í4O.·æèƒWÒs *·æðƒ9¹3;ÁÖaJ)=a¡`ë¶ «hÄ`k<ƒ^›}[û¸Õ±5j¿aņ÷ÜâµEÛJE¨”[óu`2G„[sTb¼VûD"¶æµ·ÝKÁÖ|zv«líïX°õRœˆ­}lÍ Óò<lmjZŠ­9îh«³àˆ­ÙéêóÛη`kÞöÆ<уڅ['¾ò4¼j×¼²÷À\›2»’k~–×øs$טܫÁ5ïŠçŽÃ ®yá™'àÚ§×^×þH×þÈ—\/¿èÚçaלw¶}bKa×Ë•]{s »öQPØõRØÈ®ye>ïìšÏdeÓʮÙs0ƒô5 Âk/­Àkj¸çÈÔ¡ôšN–]‘Qñõëá¥.ô¨üš|‹ Ú•_›nœòk÷ˆ…_0†DæÇŒc~ÍÝÇÞM•_/· üšÕÄøÑV•_ۅʯ­”_Û·>ø5‡Þ Ìv´IùµùÄʯmšT~m>ŸðkÚ°ºÃ"¾•3òkA•_ÛÀ¬üÚIå×¶–P~ms“òk/­ðk^É#e[iáË‘_{¿æû¨ZØ‹G~½#¿6§Xùµ}µÊ¯müQ~M#Ö5ƒ¨ ¿¶ÙIùµ¾Â¯m P€Í Ÿ7‹ÔkŒÛÛ@¶ 1a/7Œ{¹¡$=°ÏC¶M‚°—’D„m‹EØþa›¯ ›*­gÑLаm U„í¥„íÝC¶H¶70ìåÊȰý ÃöÎ# [ EØÞ]a{?„í/%"l¿P¶x‚°ý®‚°?KëÌR{AØËEa›G¨Û¿Öˆ°Í[T„m«fEØæ½(Âöî!ÛN¶–‚°½ïÂö—%Ûû‡ ìÅ6?=FE·(CEØ02ùCT„í_— lÿFaû„½´PDØKÛÆL?Ö@‚°½<‚°½Ýa{a¯ìUÿVˆýk„/`‡ Ö•`3´|;û‰(CØð’Z‡{asM¼¥ÁT„ÎÇs æ¥[³…Âæa{$º]öÅzG þT„ k›¿ aŸÆñCØL[“GÎ^CØN© aoåÚö]ÎÏA×–žÌ6¾FL=‡l¦ƒH#¢ìLVD>`»Qöj|‚GŽqeë98”`û…B°«[ÂCm¯B6åÔù”] 6/*Ï ò‚íä_öreDØû>›øGCجĕFÚYeØ|$Ï<¥’=örWѪ޹éi as{£äžµS öÎn´ÏÀz!Øì: Enik•`Ãxá=‹’l¶ÜÁž]X ¶Êc+À¦ ãÆýO[¹`óÛ)¸ËÑIsØT§¬ÒV>„\Sëü.o[%Ë `¨ó<+£›Ëâm$a7€m»\ °í® °¹ >çQ‘°™*kYÔ `knCØ~¥lþ@6Ý!uؽkxM¯Í&øÚZTñ537m²Óª ²–FðµfZ4|m«âk{¦âkkÁ×Þ°‚¯ùŽñÉôÔfН}à|í¥àk~”d¥õsÁ×¼-@Z®Å×üb·º{ߌ‘_Û ñµ邯}Œ|½#¾¶L_ë<'ð— [žå¯nœðÚ¿q×> EzmŒìš÷LçÈ[¬ìÚ»†°kÈ„][e×nví£œ°kïå®í“t­  ]{M]ûǬèZÓ¦º®ê­#廡k¸ó÷8 hè¯îšG" ]_tÄæ¼ˆ®í¦B®}@VrmíªäºÎüó]#×þ[!×6à(¹ö+…\Ûër]! Vî" ¹&ÖJSÍMɵ—GȵqJ®ý™‘\{GVr­ÝQÁ5ó~¥gȤ)¸¶—¡àÚ¦N×VX×^ØSzÈ—®àz5pmM«àÚ>׿•¸Ö¸֜&®µ(¸Öw2¸µµ¸rë¥(‘[ۨܚëg‰#·^.ŒÜz)OäÖö6”[/ÆÈ­Ù4™;ÜÚ¼ åÖÖ_•[/ÆÈ­ý™Â­?Kt”cOWnmŽ’qk7 ·öÒ ·F·OAÊ@¸õ^“îÜ]Q¹õÎt‰Ô%è’&1¡·~"Ê­ý ·öŽ ÜÚjÒ¸õòëÈ­ý- ·öסÜZGIåÖ^AáÖºLPn½Ô>rë¥<¦’—¬ÜÚª©ÜÚU¹µà)·ö.©ÜÚŸ¹õòÌÈ­½…„[{ )·¶†pmS©‚ëå™’SË\ûøªàÚ)àÚæ}׌åþÐ'\{9\{pm^€ë¶®)Ë´,\S–#(W)¸ö†pm5pí3€ëåÊ®m¾nm΂rkï¯Ê­­×)·v£pk›õ„[û#ÜÚ¿KáÖKU„[{·öÞ%ܚ幃TŠpks§•[ûĵÅò5”`¾M'äW$/2©zÊÛ+ã´J…0gbêjB¦ÂÏmŸ@J”Bvz´eªTu:ϳ'ËTfí‡õ„Yc$zýä¶fMrµ aaÖ™i·q›æT ²&I£°ÞGdÍ¡šŠSÝdO?†¾¶ ëLU‹22Ø*´&M„]s•‰ºá3û€­a¼gÖ ãÖÜï;0ûÈ­7ÂÆcaÃä3—ù‚­aä¼1À¤`ëU.ÊFjÍ„ÖûQÚ)ÌÚL‘Xsj;‚’ëºàßfÀ ë8èú±¦ñ Š@B¬™—;"!ÖóÑ!Öä À!Äš\cZ(YóÂ'È}²ö4‚¬=Í€ k¦&Oó˜ôÔ³æñé—μ¿=kCC§ù¨gíÒý¢gýððdš§9DÏš‡`÷U-zÖ:3T[ä¬yx:Ïq^䬬sqÃ8õ¬=Y€Z3üsÌc;"hý„QÿG£Ö5ÍZ{œ°A뇢¥Ûœ?DÐÚR-ˆž5SàqCÄM¨õÆxŸ»Œ f¡Ö˜!0 Ÿ†GjÍSF”όԚ°³/ÊÐn±u"q…—Üu„[3©ôç¾ ÉKÄ5‡º-uÉJ‹¸†ëš1ù6¹\åÖ0Rí³´ƒ‚­\šÃ+µNDpgêZ§J­}Îj #s:B­™Ú_Ú¾2PkNÖè8ƒõ µöfjÍ,ÝøþG†¡Öô0¨Ž$B­Ù.x9c“@ã­®u·±Ç$Ô>RÛ­[/ƈ­aÄ·Q˜S¤'¶v×B°ubæ§i‚ ¶†kQöó‘ ‚­é”ày5ƒÑŽ­«£þN¡Ý’œRâ5¤v´D°5Œ‚èuÄÖ4âûÓDÄÖÌr¿¡›˜˜bkõƒ”Z¨r-вÆX¸µ{¸uâGR7L ·æTzn}‚°pk óX*t‰e¶†Zø%­Ôš«¦+œm͉…nÑÇhë¯m,w,Úš Þ9f7x*ÔÚ|9¥ÖüÃôˆNj #Z'o¡Ö0äŸç'jÍ+¯3Àq¡Ö,]Ù»~»BkT ¯lˆó+´Þ8×l @ŽÐšé0óô$ ­—ÛFhýÁ8¡õV»îаWhM‡îÅ¡ŽÐš/h»¦^¶@k¼Z~Û}®Ðš™0Úô4% ­ëRŸBË Zã×;·ü¯Ü¡õÆd;[P‹ŽÐºö²4Õ¹Z£ryt0¡õb‹ÐÚ{‡@k® ¶ºÊn…зÅ(7w Z3VmCˆ] µ9õ ­-“•Bë«Õ||dÖôø¯©¼ªÌz£ ë6ó…Yoœñ¦,«2k±äšÂÆÂ¬¹8îI΄YÛ*C™5óŠÂ1™„=2kÿ?âÞeW“åJΜŸ§ØCi@vÜ<.Sꄺ>3Aà€RQ€têýÑn·Ïbo23O1»À"3ÿ•q÷ðXþ¹¹-ÙßÝ3`ÖêÃæµã^gÖµï[·å™¦'³~Y †Í‘YkK%´6z˜5‡JDÖŠ ‹ÁeGÖ:Ú:]: ëA˜cä4ÖQ«ŒÀZÛ#B!°Ö`©6Õ[MC`]ƒs+Eu;‚;°ÖlïβÖŸ;°®_¯:P˜îo6µ‚µc½)‰µæ!¼vapÖú&îãÃ’¬kP×û,ƒ°Vðèö²ÖyHçÕƒÆFÝ^–¼:ŠË‘WçË^]w;hùв¾yuM Ž©;™’WÇ@“¼ºnYwúðXòêA6õ5¯»è××=ûQ^ýÍuçƒW›ýðg6!uàÛ«¶Qd]EgŸ`TqœÚ ¯å¢YqØzy™,¿ès>!±æ,AH¬Cÿ@‰uͦך»ìǃ¤Mb­lz6³è”X»ðŽkuð“U.„ÄzRU¦çý ‘õøGi¡Ÿ\ª¬¹X!xu}òÇÜ]É«G•õ©£Ñ[ò ^=¨¸F7%¯N»° ÙCø„Ì¢ ·±#u ÖoÇÃ8¬3`­ÔÉ<ÅVɫ믟™§ÄX…`Å>yuÃ(‡ ŸÄ€«ãÞW‡pu¬›®N¯àêܸú„Ê:2.•u?u]ª¬aÝî"›6! :¯Ö¨£î²cuãÕ9õ^ýÚÐyµ‚úé)é¼:VÓXçÓ°ÎÆ`DZ®ÁRßï:ˆ{pvgÖy´ ‰ë¤MˆxÓ~_6!4 ›zn5AŸ Ù„D{¥MHarè-ì4˜>!¹¥û„„}B¢×¢Ðš+ŠÃ'„ËÏÜ'$}BâÞPj¶”Z‡¤Ö4¯¢Ö:¾ÔZ‡ÍµÖ±×° ám ›¸¤M FÒ&„#iâÞ4i‚6!4 ›Z“€Z祀ZçnA­ÃуÔ:ÚNú„ÀÜ bëtÀÚ: 8@­ã2‰­ã2/no9¹u|@É­ã!“[ósl¦6ÄÖñ±§Ø:<¡(¶~Ml=ÖÏðþŒC^6!ètˆ­ãe¦Ú:úª­ë{7¬½TÕÖé>lmŽØ:>"ÄÖaGl½ë_ûûò‰Íu¦<ÄÖá&DlÏØ:}2€­Ó·Ü:<+Ò&„>7·ÎÃ…MÖ›“[óòCn ”Ã&ë°Ó&„Á´ ±–àZIEmZûð°i³ á7ý;mB,]~Ù„àyÐ&dW1²ÕØ4\B¼I¾\B¸W¸„0!¸ŽoÁuš|»KH¼—/—zš¸ÚzÔ'·“ª­i;.!|ó®óu—ëAÃ$$Z+¸uX ’[§‘ŽsëW̹õë0 É`˜„ðúœ[çB±5Óš—IHm O rë—{Fš„øâ€ë|ChÂL;LB`ò!ñÆ‚[ç»N“ÁÖÙE[çû lùYè¬ã}M|›Ó ÜZí±œµÇÏ ûƒØ%ÒD]îÌgkm¥R¦÷”(½A¢…¯sŽ¿ŒYÿNƒkq”ùšÝøŒ\דß‘`’kŽu|;\«¿•ÿÎ`‰®• ®ã#Ê!»æZ¹`×a‘vÎ,d×R½ª0çU—ìzÔÕ:”=>s¸Ny*صò•úݾ Èuýw5Y™;Evrr'kÍ—£›|ƒ\ë~¬V'äZÏëØï,ÈuL¬’\kjp™ûm¹®mb\½ò¢‘ë”/‚\K„¸ïݘäZsCÅJà\Kô7Xœ›\+õ®ýowqr­É¸âu\+Ó?¬nÈõ Ê‹õM¿+ ×R.ν “ƒë—ÌÐÁu“Öã?c<×M×'Ï™û!;¸Ö(©œS opÝxêœç[ŸlàZƒ/‰³®¢ÈàÖÒÕ1Üõ„Á­5¬ÏuøÔD[Žk[ðv¸nƒÁÚn!¾ƒkí¶6«ú\:C×mŒ©¯ù­4sr}]kc¸§…]kлkûv™µ;ºVp(s×o¹Ö8[Bnb}“ë6ÌngóIMFÞÏzëÃÛàZ{•ÇC½]oƒkív®_§g®é$×íh-Þ“WN®³Ô6Èu Žæ½ääZÁÚ=<+'׊Õ\`»Ë :¸>Ä1=ŽÑ®Å5ê(¹:9¸Ö'=‚ÛËÄÀu#"“- qpý :¸V°-Õ¹_×­ýÔg?[¿b†­½m)ŠckA¡½¬Gm†ol­-‹RÿÛ¯Þ±µ‚WWòx‡<غŸísçØZ[NŒî×bÇÖRéñ®ŸTeluÓ‰8÷—?HÖT¶n5Õ×¾èغÏv¬ô×±µ‚Ê»1ö%¶0«»{&¸!¶ÖE(ïÙ—‡i^çâbëºe½ƒ½ê³‹­£Ü<ÅÖ¬ O±õ®YK_ñõët\l­²ñƒY@l½×ägp£[+8a¢`}Ñ̯ÃÅÖ5¸l¶ bë]óñ6qµõk·Æ­ÅTëȧ;Ö8·VpSu’a¬n­cÖ$¹[ç›çØZ±š™lutøöQPz¹§R„aëÖ"ë#¸ÕyÀÖºÊz¯Ì'ðõëæ9¶Î¢ê'¶Öµ+Á­éÕåžaغaêÍ$Ž­õ—ÚYtS7ÇÖíIÕó¿ëa¶Ö¿«†î¹ãØú½SÃÖmËÉ–ª9¶þ,ø`kNÝã²o·ˆÛ°u{Â5ïzês;¶n÷£¶½®_6l­ D±]mØZÁñ¬1óÆÖºùÇÜmŸ­µåiìwµÇÖm·õûVwý6 ÑnÇÒɰµ‚5ßë«g[+¨LñI[+¸7°6l­à4Zi ÃÖŠí›{T¶n—YúâB`ëÖ:k*| ¼­ÛùLËã¶}cký¾ìæÈéØú4lý>œaëv×Zë;xcë¶WÓNÓ[·½Ž]WlÝžÕ>Ȉò:ÃÖzsDz|^QÁºÿg"ØZÁ:&¼+"ƒZŸýA¯ƒj­žd›Í€Ö©µ¶Éû´8c ÎæëØºsÞ¬ ¤akStlo‰·akmY–^ÒØZÁÚOÝó­ ÖŠí£žØñjýÞ«QëW30jo¨µ:±ùb¼gШµz?¹BÖ.ûímݶ»•ÔE­Û[µ´K:5j­KÐLâƒëœZkÃá|9/é³QëìœZ·§8ôÙRPëW{tjݾAûôyuÆwШõû˜F­Û#>º¶ ÔúÕ¬œZ·-gGÚF­[›ÛÌIÚ u{³×uh­à4²shÝ.s²©-§Ö ÖN× ªZ·{°v%¨uûöf*ïÔúõÞ9µn—²ŽŸVg|mèÔúu>N­óæ9´nmd2ûó Z¿nCëwРõëhƬ_­Ã˜õ«·rf­Öœþ!`Ö¯®Õ™õ{KƒÖ ®£Ñw‡Öí1®½ õë8´VZ³ÝoÐ:³%@ë–•îÃpÝ.e°‰_ׯëtpÝ‚‡Íþ:¸n ýN)þw€ëÖ"ówpýzd®[ËÚ‡wpÝîûö…±µ‚eÿÂØºÝ÷bݯ_Þzêµç[ÿå—áã_~˨ëÍŸzÕÄÎ9E¢?Ng®­pþñþò¯?‚ÉU‡þÊîé+e÷§'óClþ;ª üÍ/ûgôºÒóko?áèJäŸã_ù©gp?ÞçIÿÜëïüüã¤.³æü-ãü9wÿ>þõ—Ÿzw;šü'G?÷÷—¿s²ÁÕ°¢%5 5åpÎõ„þÓ¯—}þÆ?”zοþöñ_ÿÃð?þÛǯÿå—ÿüë7F'&R¤©ºÚAþýãԣܪçõý‡*ZC®!mM ¾ýPKê§u?§·Š·õÿ‡.²¸ ÕÏú*ú¨|W[jš§àO:96Ý–,¯óñàOn³ÍÄÿ”§³nåõtp?ùé´ó‰§Ãóùìé|C»hÈ^O5õ­yß0ŠëÖ]L5S}õGÃGùÁ.–‡™ÖE¶êC-ï£÷Q¦ß{˜i•Ît=^üú8K磙ÂM›’c ™»y¡ç1ô?íÿý/ÿã·ßþô×ûÓŸÿüç?ýö·¿þíýíwuçß¶Ž1z5ÍåÖA½<Ú»½ÙjDÍÖ`ígJ¾›lCÿP\ëx¦›fçæÇ\GªZð}‚¿¢B~)—?.c·½,ò¶¨qNÍk½Ê«‚xQ!— ×áT=6,E%Îö»œ ç&ùeÚûXlnUÝËã~S´Ðò!¸Â#¥ÍÍgpìŠÍ ×a«×–s-Hop ϳ—.NМðlëʼvýf¾×©Ž¯¶sY» Ì›¦ÙæcÝ®:LGPQ¸Ã½è¹è0ýlEèà<¹L&P©ôux°PmY6ã_/ºM÷^C×¢†ñpJÕ÷Y¦§\|Ñtðƒ"ÕÇÔ?Ó9~/šñ}pc«ºŠ¶`ûô{›ŠïÆt¥Žíû {^I¡ q×ÓÜ•ú]8­”bóäͱ°>K8WÊÖ§ÂgéIÆãC×oÓÝ“föÊqUJ¯¿Ù„ö¬ršÝm¤hôÁp“ì}–Çø´h®ó!m«ÖIOn.šÎ|hÚ*áþqë ËºÙì²*&Må™x,ënÈ*ŠdelŽxVuÔÚN._ÙF›nŽ ]JSê—¢OõΪQ46³€3XlJwçjžs×–«ÍÚfƒÜv›™5E¤nì Š=@I·¶›¼1͵åΫ`úy%šÂz¨0û±ÞºË"Jô€YSÊéìšÕ/š^¬%×V»•e¾‚»Íu¾ŽyØ|¦:+]ޱOYÖW{´:C“MJªÃü¼fHΑ×Ü<;à9¥™ª'§š5u¶O÷ÔHÑdÔ=}˜ïéqØ ¡¬&?Ú]©—eS€âÎõ ®RÕgp²Y¾zÀã¨YK9u׫ñž.yÖŒÁ>Ýdo•¬ìé’µ´|¹‚›MÇé1™ïË*ïÂÞ%« T7V\eÎótÉRNÕD@þ6gp²‰³zÌ©þt¯NZ5ßÒ»dvU«\±fkÇÓ8-Çùðëkk³\RN™¡ÊÚÔ¢O3Ö4ñú|ÐV­žzzdõªér)EV-Dï=²f:»5Òªeó›µã­þ×-W]Õ[öY»Û@®zñ½G^ÌßfÕœÃÓ#ëu­=Ý ïVM+ô9n­žàÓ#ÇÓTC|zdv’õÀm¢¦YìÀfW›Œ©gUOr¿ÍÃÖyëó-³ÀeíÈ.­ÿZ߯>¥¢/»ùOÔó±Y“I%®×GY° ´÷¹Í·ëØ* þôÈõÚwó¨YÍŸ¹MøtÛ—ÚÚÆ$½vZÖ«²Y Í1™$²6›ˆÐTQÝ÷mè¶–É&äÇm¾”kýÊõ …VHe~ª¾Ôd:¦£Ôûo“ú$kŠ|¿zÑä§KV 4æ*bütÉ“^‰ã™«[…{Ÿ\ä»^ù`MÓ Ôçù¬ÅX¼êט«åº®†Û'iwçÚ¼ ¨«.YŸÔÞØ˜¹¾gÇzå^ë6v(ÞRÊýq{ˆ1Ñ"aß#/8Q2x{ä ?¦Ë¾Ë5þ#9v$àõuܯ÷'ðÚ<—¦Ì>IÀëhi‘ðóÔ¥#ÿÖL˜¦•o]>òï¥eV5iý,ÿÎBÞÈ¿Ó'ù·&Óë×t¸çV‘7=AÍ nsæßò‰\ëë3]éòoë.ï.„ù÷¦.éYEÁô[ºš½{¢0ýÞ$ÛÝ–[÷Åô{“yñ²—Ê›é÷ª…$ãv¯†aú­ŒjëV€L¿%æÛéöcú½êóü8÷2û^e0Øýv˜}K¯W¿±w_Ïì»ÈÜ·>¶íÙxö½Jß°ÜB &ßµÛ¦Aeöä»^Á°Û³@ò­™¹ÚoÜ:$ßEö=õyMñ1ù^4mÛ%ã;ûVo¹ÕÆtÇ<ù®wWùØsD$ßy H¾ÅýÍþžÙ·ÔµåÑn1ù^ä¦ÞçG™|/ÊŠËãUÆä[‚åCÖ£×½Cò]Ç®›¹ð0ùÖôÜ'™|K[l‹ò#ùŽ”É÷"atŸqdö­Dr=KùȾ¥©í¬Ì¾%é®'sU–dò­Xíèî“ïYµã¦ý^êÉ·$ðm¥Yy'ßšÔìÓä‘}Ç!¯ì[z¸ã‘b¿²ïÁ;²È¾=þžì{6ÇÁÇì{)­²Æòξ•õ™wâ÷eßkwë{%ßz·oÈ¿_ò½K¹z­ûÎäÛ²ÒïʽqÈWîmНܛù~æÞ6T~çÞf ˜¹·ìÞ¹w ÔÇ„ä;ñw%ß5±©¹Ôñiò½ª›¿Êœü{&ß5뻿¹ß™|ÏCíÆÎñð;ù®½õíêû}É÷hîˆß™|㘙|Û²«Wò=Û¢¤H¾ãÆgò-ÑÜ-vygß>ÉìItfß*zëï^éwÍs ï;ýª=ÆÏpsú{%àKmö‡©>I=t"?læ7~ÿÂÈÈÄkrT¯º y_(|Ögvüœ„×—®fð5B&.‹ÒIöß7Ïvë³HÂ%¦ßk¿»_¤ $\ʧÒëÈ’„oúØôu×$á5¹Õ»¯‰øVª¡¯`d"^ƒóQs”KçÌD|mBЧÞ:3ñU^ßÇS‰™øª—®<rfâZR¿Š÷ŠÓÈĵ”g|”àÌÄW±îúk“™øªuÀ ‹L¼îfy´ŠÌÄõ*ÛòØÈÄUÚ¥—ŠL\InwØg&>h´þXçD*.Yt½íWßw§â«Üª{]‚HÅåo\;áõN·=ßÿxªçÇñŠ+Áïµ@"ߤ¼Þ™Š5Îú/§ÈHÅw}•úTRqMøÔ;{ø2oWÕ„zû;ׂ¬½¯4d.®îZ=×µ†¹x[&Ïö+»E.^¯³¾àó3nD..¡–†^³6LÅkã8úº¦âúžYiF¦âE†³KGÄHÅkãܬ@Sñz%µcïo,Rq©,éñ%g.®Õu’zß™,rq}CkB|/de.®µw5ù®ß^¹ø¢å±gzøÎÅuÈËÛÄrñEËØúR:æâK“ú¬µ¥Œ¯\\YÀÖè1×ËW´ë´m`6^F±*xÌÆÁ÷±~/Ï¡8³q}#Ì0ˆÙø¢ûÝx˜7qžŒ Ùx뺺x–Ù¸Fq˦u³Ÿ°ð&2®wo¼Ò1¤ãgV·Ô®û“t¼¶«ÚŽžâhÌÇ•òÕp7æã‹Ä³ÝõùxŒÔ"!oýw¹(Gäãªa5ܵÑ"W38žÊ&LÇÕC[ù<¦ãõQS_˜Àt«# ë0׺‹:Ì|ð>Sñš@•Ó蕊—¶Šæ©[ÈT|Q}­rÖ(µT¼n¿Ö„÷©HÅÏz Z2W5šã™p(^›•»ŒT\¢·µÏAŠ×”Pî a`*^oaíô—á¸Å, âA¡IÅ%ÓXìˆ×'º,}Ye@q~„Šï*Ë7$ˆXœ‚›ÀâÁ`‘ˆ+É«_¸ýÒO×Z´šü_#ñÀâJ{GX g•§œ;qݼ¥¶ñcÿ$Oð‰D\Ÿ¾ö%,ë;× Þ^Äy¸v½TMPqÙlô’3×ÛUž‚ãAÅU)xi#”W^7Ô²ÏÛ?"¨¸Êv¢T<ø+©¸ÞýYUT|‘'ÓS‰&¨¸æ“ûÌoPñҾݷà5¨ø¤e=é#—§Gwé *>ÉšãY°Tœ‚× âZ×½= 9ƒŠ×6VÛÍ3¬&WÜËZW¹é1üc&.÷±ú2mSß™¸ŒV­²gPqu{:‘ŠÇÙ’ŠË:»nxU, *>4uùíb„L\ž ¢¤_ *®òÛCm[&Þì ÷»šå?½$û‹‡/ËÕl3 ›YÃ=GþJ]¡ ü ¾J$"»¬ è! ÏTI¸†\P­@^ÏÇ|/C^/Ò|ù^Âp¥C·ø€ºpMÇôJ“¡ _úŠáW®YI­içOpx*m© ç|fèÂCÀ‹$<¾2¡ ož7ÏÈ’Ix̃….|Qeõï‘„7þz:7Ÿ£.<´èž„kÒî꾓pìºÞÆw.׈nÙÇ\}ÖlBKÁã+¢ðMæ¦O)<¦àµÕ¤ìÉëSî¢>fàúj[=Hfà1'ýV…—2_$Dᔃ†(<¤ïLÁÇæ>u»®‡(\®ûz/.\Ý}M‡.ó ¦àz†š)¹sS¦àƒfnúøå% ï2ÈÀ©— \>FÓ|ã¥W>YqøÈÀe{Rs‚ËLã•‚×Ó~œ#×<§ð®)¸bÛúLA1—F¯¹ÛSðúð÷Ûƒ)¸’Áiî:)¤àÔ G >ˆ_Ìwø)¸¤ôVº2Rð3 Ïù|¦àÔ— Ïô“¸XÂé÷I ^_{£»jp¤à*‚V“«ÈÁÕröõî:#ÇkÇ|’™O½·Ãgxä‹ÌÀeå±?Îb‘GŠÊ <¦ä™ËϦ~·2}’3“fù+2ðM&ס3Å‚sMÐK˜‚ARpö/aÊ\¿·­^¤àlTÌÀë'¶vÕZ¯öI^_²šÔk-ŸdàÍ$ì™»Ž <©Kq±4pÉO{áÒHÀåhvtlÏ|‘¢³ÏÚ¤,Eïj}¡ŽOp®þyÉRê»2ÜL,p®á YÊ¢÷ãÐ 8>Ö/UŠçH‘‡À†ù·¼Ê»,:òo¥°Ã¼M󧪼çÌ¿S3~ 0×Óäèwäßß\b2òïš”+ÿøƒOr&¼ÅÀൗÖÔçU™€ë…ÇÛ“ø&öã1Äe.Ñì8=ê$fàõnÊ-eñ ¼fSÈ _UPui×üÊÀcÕSðV|qï‰=RpU?öžJe ®ýïãu‚Ë»LBæã–È —å)¼™‚kekM ¯¶‡\l8º1Rðæí5uû_¤àK[+¶t1®§àK}§—Ò+Wp«Oó† HÁÛ–§©ð˜)ø;f)¸‚²Gº]·¯¼¹Œ9‘÷$\A-’¼?[ž…+VÛîq»S" W°^Ìr/AÞ‚{ŠÏ7%·4¼MÍÛz›/" ] §á Ög×—/{þÞ­åá îãÚ1©çámËú¼oÓKäámK™°®ÇüÊÃß[ZþZÞ®³fÓ; ÝÏÂÛ3©èúÁ_ixk³¾‘Ó+ _Ú˜Yuœ>ÉÂ_-Ù³ðVWsp—ÀYø;hY¸‚«+, Ï¢ÈÂÛ†µ{•ãZÏÂõ«$5ÏkìYø;سðv,yÞKÐ= Ï ‘…çå! ÏÝ" e"’ð«§²¾õ(еY>ìÔ“ðÜÐsð׆žƒ· >8÷*0ÏÁʲ?uÏÁ[Ó¨ùYýޝ<’ðlTÈÂ_»õ,¼’²;ëõ,¼mY³Èù^`iIx‹Õk|¤¯ž„¿NÖ“ð÷ùXžÝµgá-¦ Ø×*kdáï [Þ~®wåÆ3ÈÂóÃáYø;fYxöÈÂßAKÃÛb—þ¥©ñ4¼mÙ¥uÈÂ[‡<ÊÙýFÝ–…go,üµ¥§á¯-= o'+§éëK4¼m¹›Ñ+ Ïï#Òð÷n- o[ŠŠ]”ix~_‘†¿·´4ü´<<¿¾ÈÃ[°^Æ´\33ž‡¿‚ž‡¿vkyø{CwòymXrƒŸT6ç•oǵ´áu8¾ ”‡‹ìýƒˆl|—¶¯~ί’]a”BcÌ0J‘×výª]Ù$}R”õ÷>p¸ÖéíË“û2ßT:c™Ö›0g2^_·i{ðpH´^8\VùOÊ„\<wD.À©¸êÒWø1—Óï$×üJÅö­cƒÚħW*ž“£/—Ÿ~ h¸8Û¡ß>S‡J# oU[–õrÀ%Eeíë§püŒ†ÏšŠªwö*@õM.)9 ®Ò)ÛScâí’b랈‡E ‰x¢ô âãTÏâV3ˆÇºû â5 ¨g÷,&çZ$"ñië)]¦â CˆÄ) W…!`¹‡$âä $âbsº}·\D\Ôsq_ø¤hÁöö¸¸‰×îd¯/ø¶\WBŸ­^'["qUYÛm‰}R¤Ì8¦ÎJÐûS…Hœó™ÄcBöBâ*Þ¹70½’ñXp$þÚБx«Í¶Š L¯dü¬“µ=D‚H¼îë‹Õ‰Äå´>ôµDâ*:6õ5DâJwkË|ô`âuËc>9^ù¸ê•6 3|²ZsT]š>2"×–‹eùŽÄóJ€Ä[1Åú&,ó'²”z²«•Ž"×Z©Yå­æOÔá²™»r¡W:®“­Ý£eÕÅÇEÞý£ºµW>®-Eþa™_ùx{(õ[p¯õ'ePºÝþD€âõFªàÜý%¯ÛÉÆî.%pCq9$ÔÃã[(®êqS_(>ª’Ùl¶,ÅÕ¬l¡x=9­=’ PñQ˽ê›:^ T\Uï¬È©ø¨z6½Ž©ø(êÖu[ âÂrÃÞo€ÅG•j«y·W>Þ®³v‘™°x N|^îÑÄâ¯c:×–®\’Pq”píZ¸D*^7\¬²©ø YJ_ D*^ƒ‹ ]÷–ŽÅU(Ð*K‹Ê°Ïªd¯t\µ)úoq‰SqÕ«° bT¥ÔÖª5פ#©¸¦ˆ· îÝŽ¸ÔIõòkB­Ì~mg®?•Ö—)zþéŸîþÙ‰ü“Ý;ï«ûÍ®øŸ}HÙû—ëBŸ¿Ì?Ñ»%× Üþ©Ç_•Àß'pÿågžÁuÛÏ'ÿEUjz:úSžÁyןîñÏÛ~Àõ—ÏÎàœ³ë7è¬é+«Ä¥-¨ §—íêïô讟BÍŸÖ¼pkâô²èþ†#µ2cE‹Æ÷íÛ´þ43ðÞWöúŸÜdîNùgö•v•§O}[î÷Ó> ÊíòØÃÏú,-¯cŸEÊ~âczŸüSîy;öð>öϸç­Q/¯cû=ÿ†¾pme¾k*¨µ|Kó÷Ú4l\>±ÓþqÓn¦¦­‚^Ç £øOÓî8ÌÚêÓ«–Þüí¦ÝÿÇÿ9~ˆáÖ¯ÿjGýƒl‚õÓöñ;¦ÿ“V6G±öŪ+´µ<»>¿zÜ<•:PøÃÕ!ÿËÿùýßÿÏÿû:Ÿå¢ðçùü£; ‰qHÎ =Ï:š®Ãˆ`)þo=-Åןd)®ÍêÈæäp=CúìÀJs—¼ÙþÔVÊÖ¿Ïõ i ùŒ¼Óòô³ÀêÇÚ¼ÊjlmCÊIkÊ&mW7k BÏqòÙç“5.<äA±µRëVK©g2*TL.8zX5>«¹ˆ–Ü·ØÞËÊ«UÞ'…Ž†ë‡¹öL®8cS¥¶ßƲæg³:èÈoš[19)¨ë^ÏØØ•¼EÆõ¶ËÖ4Ì®­ò\d35ýó:Êk`°ô¯±û+f'^Ïë9î^[è™;P¬6;=‰zò aÖÇ7Çyk+!¯úÔ¥U‹{JàN2θ¿›\àž*| 烯gÒFÚ“w×’¸†_ûC,G/ß°mGÙÓj-«ÅHþ~®ûüY4¿µNÁåã~ЊM½`|Q­];iùËÜëkʶx~:­Ëç»$¦NãÁqŠIKy´Ö)ÐímBG×8»¾¥U)î·o]ûô…¶36µÅ·'aûv^0Åö>³qRw»„ãã¶œï[Q¦m²Æêy ‚©óýU©7]…âϪ}Úñ¶±Í‡œ ´žËƒ)ÓÌçµ ®ÅÖ~¼mî—«ØØÉý´-½H¼®Ýoç¶ô§RÖŹýT›ÙS"¾ÝÏ©ßëÚr5Ár6U ñêûw¹oL›þi9›ªè‹Ý²M8ÇÕT×Ò±ÍS}ø³Íª´="Y‘Þ0¦Ê©µ´F»©œq?•k¸žÑôßÕ‘–VúycU„õÞ¥Zx±~TuxïÒÚÿvXL+H‡»¥¸µ*S½ýhÙTA¼_Ÿ«¥î>¤ØSLA‡ÓLãÝ}íÆpõZû­UÑô»º¥®n‘ËIì&M„ÞgÝ.Ïn‘êÍ×Öq¶ÔØç1´™¤³¥n ×ѾfWKÕ$N¿›ÇÔ`Ú³ã˜%ñ»jÜ–céÓO-6÷~æÐ¬æÝ§ª©ØKSqÏ禴b×ý–k«6Ô­x7-óÛrw©k'Ì i*ôéRÕõïã±÷åÆíÂ=vôâï¥ }ntýÓËm }.OŸ<c«ü®ë²Ùùy˜ú{¬-¬e+öÔ}×vÆXæan3cW‹Ý¬÷Ÿ¥^½-D0¦XQñÅÞ`ûÇnÚ ÚÕ`7+[©ØÖ÷¢Øó98cûxw­;(„ˆê3ÑÖ®¼%Äî®U/ïÎN}w­5fÏ­Õ{½MÙmþëŒizîùö[«lsËݵîZTmÇ“£÷ݵª8´=!%œ·Ìºìî‹æTîZ¹ÚÎÚ‰bO÷ÒŠ*?¹Y›âÙî®Uå\úÓk"µ]=ëãÚ6Y·Ý=«Þþª¶‰Á[‰ÕžÂút mÙ3$RƒèÙR›3<îžµÕ¬ìgy¢-mÐí04#:ß©ª.Ù›LÀï§ß.¹½;î:ñÚçÑÄs½ÆÝÅjþµ=L-Ͻ’Ô×I¬m*òj«‡çÄmfz~:×Ãó¿¶ey:×Ý;Ð&Š_žÎõðÏd+zq¢õL½•‚`.¥·UK¶ÚÌü“§Öížù3vô•¶¯Ö{Z§Øñ$¨êðìÞ*ÿÞzj Nc®'?•ROÒdUôÔ_Wì™`Vlïÿ´Ý#ëã×½W_'"×ÿÃzP¿ðMIÃ݃Æm¯Ùêò¤§»‰é›Zåõ«¡jÄÕê¦ùÚ'?=zêÆöÉzÐÙ.OÃÔžŸ²—ܤïxÆüCY)¶öNZÛy#Þ¶Vsýl¨­²[Gkl¿ÔÚ#ØÐzÞ$ ¹»R½¾~*G›Ø¿ZꂾLϹ'¨KÏl3o+Å|Ÿ5¦rëzu½1ø7^¯§7|¥QO†]³>[=CÅ[°KŠRz‹-×û“šžæÖÑ,½­ÚhzöŒ¬É³ìÍØ%T¹;U=ûŒÖñÒØSS¦»4,w§Z/Ë?é‡VPߪrk¬êH{jÊ瘚FáKH1õ é©éàðE±£§¦ƒëæÃVªWõTàÐâ·§Wµ¿(¶öªêùØêÇoì©éŠ´ë5å›.Èžêˆi{rÓÈÊ£UT¿ëäqý>û¨\žrcÏM'‡ð­ë õŸZs&íÒ¿t¯é­Ù;`ZõþF@h×ÑG$ ´å#¢U’þÔ‰hë²¹IÎÊiøI•ÈfËép¶I‰üü ή-›0ktv­ðQ·ÓYŬY‚ή FZÀ´ÒYYF L[Ÿ –sZm‡}§mª/»‘Îi[]ÕÞ×;§•Ìá€sZ©ÌüÙ8°Õá&ÇÂFl×e ~5b»ÊåÓnÙElu(§ñNló;âÄV :KµAlu(çêNl³OŽÛµ-díøÌ‰­kòØ®MþÞŸÛUµ>æ°ÕvƒŸ¥Ûv½û°Õ>Ù;°]›]ŸIq`«˜ó,¶Ú§G絺+k±]¯m1k'Îk¥|ô¦ç¼V‡6^«˜a0õÚãt²_µú÷\Ô¾n•ZÉ6ývP«#ùTÁ%—R‘kÝNlµ7'úÞç·;áØ×mÓ‡Zãqd«}Z‚çÈ¶Š‡ Ù*f ÈV»ì©-ˆ­6ónÒ‰m;œS`#¶±K¶m—ÖØ*f1€íª•rv*NlÛvöf8±m}‚õ%NlÕ,±ÕvöÕ±Í~Ɖmë0VwdÛ6s°lȶ½øÖ™;²]g²· ÈV1Ï“.f«Ý†³Õ&žª:³m1ctÎl㥴åËë̶E:çpd­ ȶ=6Ã`Žl_1C¶í%0ôêÈ6/À‘m‹9¸4dMÈ6^H ÛÖ›X>íȶíÓ±¬!Ûè…€l_û4d«7hÙmôEÎl_!c¶Ú%Ȳ1[mç4Þ™m‹õy0ÛÖeZû;%¡qîÎlsBà̶‰ðWÄfÛNÞµÎl_ÇëäV½Íã€Üj3Ü~{žâkŸFnÙ‘ܶoU„:·m}¥ñyç¶±GǶX´Øv=×øwŒjضmgLʱm>Ƕ빌³£`ëæò¦8¶ÍFbüvm qX¿Ï1n»+b àÆ—7ÂÅo#À}íÍ0nk%6cá7ß6Ǹ- 2^ÜqnvyŽsÛ»m˜Àqn¾VÎsÛûkDÅyn¾÷Îsó•sž›}‚Ý'ÅÊÍ7ÊQîko†rã^9ÉÍÛï$7ûy'¹yÑNr_§ÙQn¼‰Nr3d 7Üȃr[§f/ð¥u×;aœÑn;’}è¾bt³q ÛÞŒ’ܶ/ëå®Í’ÁÎÚPnK…-Ep”Ûn } å®ÍÍÀð°Ñ«XŽqópŽqó½pŒû¼ÓÎo³»q~Û70p«¾kîšN€Û³dÀÀm~ŒÜm¸ÃQãÍüà6†¯M%®œ¦wñ?§V! ®–ìß½Ý'[Gz ¸B”'R…ºv íƒnÌ¥ÝÊVÙ@¬“ÛVuç#É-?=D·óò5º ˜!ÐíLáÐmŒ¢nµéqÜ#¹ml’[¹?.¹G›»$¸•®_ƒ[¹Mrp;Såp++çÒneCaÚf€[íÓi*À-Oó·ò’èÜV%MrIn{sn;qºÜvJ­®Ç%Àµ×¿vL›'L;õÓ6—\ ØŽiu»ýbÓÊ¡ÂõªŽi§ÓÀ´Š9¦(¢§m&¨_pÚf»ÚÏœvÚ©ñuN;QýIN;Bé v:|¦˜¸6npm[»o„Úq­bv8àÚ‰òß ×N›gdĵÍÃûSíÚLä:­B|ë´6^=§µjz® vZM´Væ9Öç×Nè×Ê(Øa´ãZ9œXS¯ã×êxÎd×N!Úu^«;}RU'µÑB@j§Ãµ|AjL8Ô²&©eŸ R;ƒÔ>u¤6v R‹B\Aj§‡ÔŽÔ´:©(ù©BP'µbH~Nj'èÁjÕ³ŸrJ[õŒ”V1KÚHi™œƒÒæ>AioŒ¤´úPØØ‚”v@ROJ;P J; Ë&¥0Ä$¥Î xVˆ1àÙi±žUÌYªáYöïijòcÿ ϶ZðvÍŽgù"žUº`8²ÑY}rmÐ :ÇUì+:t6°ÓÙh: ³jÀ}‚pK0g£βåÎ2F<;€Ü:Õ•Ýž•¸€gU Þ‰¯ãYÙ€;÷t<ÛŠõF<[c>}<Ûœå;Ev:;ggP6wäPVÅ1\ºëPöÜX촦ж³ØVšÄ¨ëcùÉ'Œ6çl`±ªX`LÕQ¬|ÞŽ:ŠeÚ;aW°ØH,V_K;G±Ì ‰b™DŠåáˆbœ&Qì.éÏÎŲÒ2á£öÖT@C§íZBm\fàYWìBCK!é,o5´3í@g)< ¥>‚t6b ³—Æ€X–/Šg©ÕIñ¬{@;K´³ëu ß,𥴋¢YžzŠf¡e½E³®hLÑ,b!šõqiŠf!¶ Ѭ«¸ š ñkˆfq*)šu?ˆf)@KÑ,b¡šõÓ$—奧lÖ¡›…ýt³|Þ©›õ±uêfý¨›å}¡nöj÷)˜…‚ fãüB0‹ÍB0 Wf©•LÁ,d¬·bv³³RT̾übENe)ƒR–Z2ÒYÊÖ¨”¥>JÙØ'”²±ÏPÊb‘?ð,µÝijÈç^zY _èeÙ÷Q/ËN6³ÞùQ0ÛA0Û…`֧ܾC0û…õAÙ™¤bÖÉ4ù,–a™bVP̲§búÛP̆=³€¸Îz•ÂYï[R8‹Xgá¸PKYu g}»Îu×r­ q-—òYg–ĵ±ÏÏâbFrݽ–WóÚ¸•0¯=† µjÞÞàY{`&–žµêÁÝ—Ö=k^·æY/(ˆd³¦+-Žâ®´j‡.juWÚó(0£ÈmFß˜ÑÆý¦-ÌhãúáF{ 6 Lió‚Ü”6OÅMi3榴y87¥ÍíÜ”öàwSÚx-aJ«ZâÖe”6Φ´ñJÀ”VçâÜÔMiådnB]÷¤%˜¢'­®§`I)[Òæ¸%­NÒáµ[Ò2FKZ¶pZÒjôÙ'›%-úQ;ü‹•ƒì}ì—h–RGzDM1g±õcå>Ÿp¥¥p”®´+àaºÒ‚©Â›Ö‡@áM[¨§…7m\¼iWH)¡™} r¡–-äPË–õkWZż™«es;WË*~¡–͘«esŸ¦–ÍSqµl<7¨eû qlýÕI7t²»t²ïŸ»N6ž³…\‚ÙxÊÌf̳ ¹’׳ºgîXà‚YÅlŠ‚ÙŒ¹`¶DU2Ì– bO(f#ŬbÎg]1[cкnV1CîÐÍfÌu³ïXÎfÌ•³n«4ö3él¡–ÒÙw¬kg‹/F¤t–!(gc—r66pᬠa>‚Áö]*ûüèYýhhÕ$²q…¬B.¬u…llæÙ¹>6÷èúذ¼>öëúØŒ9r͘Ëd3æ2ÙŒ¹XöëÈ5œÐ\3æÈµl_"× 9r}Ç:r͘‹e#rÍØÅ\ ëÑ€¹¾c]5›1WÍfÌ¡kÆ ºfȵ³síì;Öµ³âÙŒ¹x6bÏfÌųïXÏfÌųsñlÆ\C[6RQ×оc]C›1ÑfÌ l†\K›1×Ò¾c]K1hi3Ö´´ù£kiËF±©kiß±®¥Í˜ki3fZÚ ¹–6c®¥}Ǻ–6c®¥Í˜ki#-mÆ\KûŽu-mÆ\K›1×Òº]@K›1×Ò¾c]K›1—ÔfÌ$µrImÆ\R1HjkÌÕƒÔæv—¤67qImÆ\R›1×Ö¾c»'­ˆ™¶6C®­-˜s¥¶6·smmnçÚÚÜεµ±´µ±´µ¹kks;×Öæv®­Íí\[›1×Öf̵µy<×Öæv¦­Í‹k3æâÚ8ĵïØái+b§¸6uqmÆ\\›1×¾c‡'³ˆ™¸6C.®Í˜‹k3æâÚwìð„1×F âÚŒ¹¸6c.®UÌÍ[]\›Û¹¸6·sqmnçâÚÜÎÕµ¹Ëks;“׿f.¯ÍÍ\^[b ¿Ëkc;Èkãx·¼67qymÆ\^›§áòÚ<”ËksŸ¦¯Í]ºÀ67smÆ\`[6:@¸À6·smœ ¶±¶smÆ\`ûŽžÐâ\\`›Û¹À6c.°Í˜ lß±ÃZœŠ lûJ`ËÇ@-Gmþ«)lùÏ ëþ>ÃÚÚ±?Ó#/¸[ 6¢#Âæ =ºÖÒÖý Ø&Ú‚;Ú­¹¿¯IÚ­C˜ÐBf»³À™³Ýµ`»×@Sf\Èl‰¢¨³-XøOíDïèl¡ Îv? åƒÎ–uôBg[²¸X×ÙÆfÚŽÅRh;@ýJ¡-k’Ah+\ŸÚ¦³eÝDêlëîÊíÆJHÐÙÖo Ë(!´Ý^µÅ¡íæ²xêl7Vœ‚ÐvÃZ m·‰âbWÚnXpF¥íÆÕíPÚn§ÏaHl'`hJlQ~•[}¿ÝéÕ5¶Ú§‹_]l« Ÿ®bu±-k‡Sl»/ ÷ÛJBl{Ûª‚¦Ýˆms—.¶Uó7×ÚÖ£Ÿã]½¤.ß½¥¶!8…Ô6ÑoWÚÖo¾¿ÕTÚNTÓBi»~ecÛD¿Æò!µÝ÷/}Zçc2UJm9¥’R[WOCj»ãëðRÚZ¡Òö@ç“J[ØTÚN¬V¥-,CiKGk*méyâØ·MÞÚ4˜cß&´wΰo;Üðöào ìÛôj^T JÛùK„zߨ·éÐM\áØ·=¸—‹m̶Sc‹U'à½j!¾˜Õxo,¤‘mqP[K¯5ŠliV©í^¨†Ôv¢Ñ+¤¶¬ßM©íF}«KmSŒ ­-eÔÚήK Ô¶@8©mpôäRÛz*^z"¥¶ÞhŒò6õ„o©-VlPj«6êâäÚBe ©í„íN­í¾ÂÜÖ¥¶µ rá6¥¶ÃÇÿN©-KÐCj«•$®6©mô¶R#~^]¬}Þ]¦ Õí)T·j¦înàª[=먺eð[u/e·;e·Hø)»¶GÙí õd·;Íu!»ÝY ºÛB@ên7ˆä¨»ÝaØë²ÛÔÏBwë¹WÈnWùgv•v•Koš?íËPsñ×±‡ŸõUZ^ÇÖO?õ1[q®ŸrÏÛ±‡÷±Æ=ozyÛïù?î6jXþ¸N»ò·RûUS°Ç\tn¯~cø(g·ñݽS3Ë:¯ÿUó>Êqeú½‡YG œë i¬mñëã,yœt ¹TÔ ºÔ¡Ë<·IëuÒLZCÿÓŽðßÿò?~ûíOÿö×?ýùÏþÓoûëþ¿þö»ºÝoZ•¶h^ÜW¥=÷ì"Ê|§͈?:ße5p»hBÜk8\l]­0aòK¡ýøŽá-Z÷|ÇÑ)i'»)üìÕ£è† Xp³¬6p‹9AÅöb˜ÂÆÑKmò]­PGÁ~.ëlj…½]ÖZÇÉü­c~ã™õõ«¡›Ql‡£4źHAh¢£ôe]M¤ 41Ylë F3á»ßŽÝD œ]_4~Óßó„ u‘Š9½Eóáý]Q#bÑ|øCW,\4þÐßãÏEsôÊW×ÙI­~î*IìëÅt•B½å¶rlÙŠ©V¬Ë^êÍë*…;[é¶u}BýT¨ V,\]6³…*÷tQŸž ´Ì¯seL P0±4MÈÍ(´°Ï}*Ö ejôl^Æö8÷Ét zMë¹ÌuO-6›N!·˜NA«Hkç»–½]ŸÃÞÒ6ýmÒtøC~ÛrЇC(Öu åBq‹ÐÕÃ|58²÷Hxø!¿ÒÈtú²ßäw…ÕÏ"nòßkë … †7‹`ÅC~WÌ»/š°{Èï{¨{\µÁ©i‘ ÏC~7¸~+ÖE L£–šƒw‘Â6âÖd½‹¶™|¾nô»Á#i–h7ú]QÙF±.RØPÙf9¬xˆbÖÿùK=èWR›~éõ]7‘Šò•EóàúÝP°M±®RàÃ)šÐo=žÍM‡?èwEuº¢éð€!‘*Ãj*½~½×Rì貫2˜8MB)›,*Ãn*uÊÄÕ±DW)Ôî¨k74"}D m«Þ¼ë"… v¥eM¤Ðz1‹M&Rˆ3çS¤?/&R¨ghý®b]ª PǤµ{ë쮌R.íBu8›Ô/š¿?ýº2›e-J—ÔNÔÜ´·“ÑfÁ_ûu_TWí¡ÀéYK™l¶J×·øñ6S-ÔØî÷ÅÊU¶XT¥uZÎF¥&a‰Die Ïy©Ð®ÝJ{ÓšÎÐ.OõMžqG}DÞ=Ì“©âøˆ/Ns.~zÙ­Y“ãϘR_årýZ_¥»­h´z€sƒÕ4 Êü; {5_±Ý4 êý†ì}tÒ¾ÜÖkz|ºûUz˜6•ñõCÈ©„pž­_µ °ea‹õ«Þ}émïù+û܋Ыsêo{_{þ 5¸®³+ÔáõéÔÖ­OÿºáÙºâ¬IÒÛ×¾7ëYý5Ú¬Ž_ë%ü$-ho¾I/c×,Ä.k¬k”ãXâZJ×,lp×›¼OÇŠ|뇧cÝMeØÔ|ãݯnç¿.ýýÍO±æÅŸt5> »5…l†µùwÕB|.4ÜÓU~Â¥FXîîTãë¾Ý:,dœVWwJ±Ã)c½»Óx-e’ÙóÕ͵êmη竾2²HomÕt`-­˜I³N¯Ö¥·QOÚûÞ´U –Hjþ䫼ɚïù*ó2M‰{¾jJ0õ¡ÅóÕÞ7A²§«öA-p-lŇ<ŠuÑ‚ä%ýÊe­»õtus¨r~ žt©J£f=]Å“ÍþØÓU´YźhmOߪ.Z¨Û>†ë°¹hañ®SkŠºhA£÷e*ð†Z̯¡¦TOº±±Û’µ7ýÉ"Å ²Õþy%%ßšµ¢=[=¿$ë5°„æÇVœÝµwÿÁB³àµh_ÓIžÚÖO°36PÛ‚J `·Ê"ŒÉ€Ý(ªÉn%4yØmAU°Û3VììVÜÙ顳[-£³¼ìv•b´ójG·ë²à®8Ã]›¦µ3 g¸k}”Ž_œá®óÆíŒá®óî2.0Ü(ц«Â{óç 7ª%‚áF)G¸ªÿêlÉ.‹%‚àªÚ¬ƒ¸‹à¶²Œ¶\Ó nT3ÁmÕ.i;Á ¿ ¸­¸¦ÃéÎq£38n”JÇÕ.í[ޛŹã¶ú»=}ÇÕmqnì·Õ­î8n”ÌÇÕv&¦Ç}mW¼ ï¹° 7Ÿ›Ü(‚ Œ»^,x÷Ê»|”Fo[È.Õém»›rz5MAo[MtkNo[ÉVëÏ.e’V8i]¼ôîˆi!§·ŠÐ/½ ·}ÐÛöªØDˆÑÛ¨ zÛêÎ;–Þ½ô.ÛŒÓ[µ{§ONoã&ƒÞ¶:èÆÞ¶2×½§½mŹ;v½mÛ9M5z«X·¼m¯ ŸæêexQæðö)Ü l5ºmã¹9¶²ßà¶ñÜÀm[mt#*ÎmÛÉ[rêÜö¬J~ãÚxÂÀµíÎ9v]¼/ê0×fË0\opm4DàÚV¬ÜQ®áÚlPŽkó™8®m§Ùñ…ÓÚ×fFkÛálˆì´VÛÙ@Íam|&k[Oa£g‡µí«d7Úam”b¬Zö€µÙtÖ¶¯§_ºÁÚèÑÖæáÖ¶kCb‡µÚÎFÙÎj[æØ³É-'"‡µñ¦;«Õf>ìrbÛÚñ!#¶ Ž’Ø¶v`ƒC'¶m;¿QFl³ý8±]'8[€Ø¶d¬Â‰­¶sܿĶmg™Ûv<§Fl#±Õv6• bÛ¶s*k™¤¶³aˆmkxNضíœ:±mÛÙ7ÉmψmkHȜض˳ëÄ6 [mç”ä"¶m$NlÛ&ÖÇ‚ØNXNb›§oÄ6Þ Ûx»Ál§ªmoçÉ2 ÒfËwH›/šCÚl‰ióMsHûÚÎ ík;ƒ´yÿÒ¶ó4ê¬võþ0XíëxÆjóÍvVÛîdŸcrT›_CµíhÎ. Õf>䨶½×Õæûy‰È£qT›=£Ú¶Ñ8Gµ¯í:ª}mfÀ6Æo¶m;K–ØfÛvbÛ:£|Nl5&ùŠØ¶‘„5='¶Šùãvb# Û¶å+ÎnÛvöZ;»}oõš¼wFêÔöµÅîåxã ÚÆðÔ¶„­»5j››9µm›ÙCuj›7Ù©mÞ‹Úæ3sj£?PÛvGœèµm§híǨm»²ž*:´UÈó3‡¶yÿÚÆ µq(¨mÔ6n$¨mÜHPÛ8OPÛx¨ ¶¯íŠäë[½ ïùÏ7¯Ã§¾{ÞÉ}ík¬mt·ÁÚ¶Ù“*Ö6DÑ{BÊ©ÑÀjƒl4=öŒ‚t?¶Ðì{MÂÈn¥¼_¥O´¶>î§âvž ävKBÛmÂHÉo'J1o§‰¢Üƒ6aö oÇ…rR‡·ã y%àí諟ÉnÛ¨ÓçðvÄBUÂÛ±+ÞŽ7ÏíÌv„¤ÈvÄRY"Ûqõ¬…ÈVggøÇ‘í¸¢²4m=qׂَð ³Pž‡Ðv„ïÍmG#´áÓ@h;ÂõŒÐ6·sh;ÂJ Ðv¤¼Ð6çÐv,ºmÇBÀêжÏY  ­Ü_ŒéÚÖæˆ˜CÛº‰ómó\ÚÆ›h×v«ãٜحÜmúW‰ìvä4 nm´®švˆ;ÂNƒw,ä™qÇw,”ç:Äw낸|QÁpŵìšÁpenhm —Ƈd¸£èáÖ–þì7:& Üx¢@¸#äÔD¸ò²Aî¸ÞüÓá-ïá-JÀ[ÓÞ:½åÔéí;Öé턲Ìd¸‘›ƒáư —!0ÜŽ'Ãau†«˜ bá&¾`n,Â\ÐN°ÜxÉr½8Ë>A¸ÊáÆQáb;G¸ÎáúãíÛAë ÷š›zÁ[×7%½uVðÖ‡" oý$¼uá-›}Â[o¾„·l3 o}žð–‡Kx xË;–ôÖŠfÝôö€Ê>é­Ciâ[¾ÎÄ·Ô»Noy‡Io‰ÎIoo½á'¼Å‰Ýòhd·„ÿ`·óàKM‚ÝòpŽn9„ t{¸"€è6FÎD·œšºÉ ¢[ÎÜÝbÉW [Né9º¥” Ðíq‰²m9~³åLìÃlóÎlcjÌvá«3ÛsåÌ6çÌ6çÌv†¹&ámLáÎp­$ÂF„;{ xÜyøRfÛ …‘@Ü<œ\JHpã–àæ¥;ÁÆ ‚›Û9Á9m ܘÖÂå{„KÎC„O—’Œáòm›„›{s„ÇnÜG ÜÎ$¸ÉNpÙááÆ©áF o@ \<±@¸Ð\Â(’Â.¾-pÑB^˜w ë,ƒåžÍ' î@…/ nì j(BÜâ"ûŠËS!Æ}.y~ãö¾cý÷ßÃpmV1îB°†;C„N†;¡ ÍÐ/š'ìXùLó bÉpÃÌ w¥  F¿ÁpWïžâbù@@ÜÀ«€¸çO`·q €¸3d†¤¸‹!‚â.@¤¸3 ¯SÜ Ð‹—Àžׅ鄸㠽.î÷‡‡áNÄÅ`¸3Ö“áNðº ÿÞŽ Ô®ÁpïÃݸî†û„ ƒÓ@¸ð$Â`XH„›1G¸}€pkÌ-=€p§ï6nùLnùCÂPAž·nçF@¸Ú§û<ÂîDQ>nœ&î[L"Ü Åì.o2®¶p«3\ÉÌ4Àpu“MMî@ݯA\*wC€wLÀÛ!n:\œ&!.÷:ÜšYèpG_ôInì$w<óF"Ü3"Ü îDÜ „;a ¸°‹ ‚‹¡Â…]'¸ˆ< îHË ÜÉ¿jpáŠJ„!°Ü´‚,%‚åÂñ(ä¸#™,X.|dCËË£–ìtG:é¢O ¦Ë6K¦Ë{F¦ËGD¦;ÒÉL—L7b`º<0]6L2]XýÓ奓éòò.¦ CÞ`º±30]¦Ëg¨‹Âuù¸ uÙôuÇ4X0ª;R² ¬‹ò5uùh(É}ëòS’Ëó$×åµS’ËW™\—÷“’Ü8pÝñïHr#®;bê ’\ž&¹.›,%¹¼JrGêkoI.&%¹l)”äNP¿Q’;AX In„ É$¹q*ÐæŽ ÍTèòå§B—ﺼËTèòÁQ¡û„B—ß *tãxPèò%¡B7® ݈A¡Ë—„ ]6vJtùÀwÙ=S¢ËVF‰.G‰.û•›ï²G¡D—7’ÝñK7…¼!èò>R¢Ë¶G‰.Û %º¼JtÙ†Àw3æ|7.|7.|7ønnç|7øn^ŸóÝh(à»y.ÎwãñïFî¾1ç»ÑœyãÑó*ï:©+/¾ðƽàgÀ÷ €7^ ¼¹™ÞhʼѴx¹O^^/ï//»CÞûþíæŽvyöD»y†Žvy#‰võAr¬íh—JM^ æk;á}ÇObýxNx9J"áSáU¬—Ix9v\¯;lÊïóVM¨û=v¸b¶÷±_x—ŽÄ»ÃßÁ»h·À»º…¾KÇ»m,oÊ^û¥Õ<ìäÂñn«œhtÞ¸,)EoÜý…~Íw:™Þ¸ƒ‹ä`Û¾6†Oé‘ûtWßIØ#Àw!(†5.æøÂ—ëÒiK„°Æ5YOXãbù[Xãn¸ùië$ŠÖ¸pÌêÖ¸MŽÖ¸¨ù²ÆõúÙ´Æ-_òÝ×vf«ÁÖthË*00È]GÞ-7È•­ød†µn»¢À rùë‰5×}j͹õ\ ™uƒÜupá rUOÙ yÝ w߯ JÇõ‚<.½îŒ»Îti€?îËøãÖípÑî[ZyÉûšé[èeëþ¸+j~†?nÄÜ7š^÷Ç¡u\ÙÖÐ;·ûã®K+äöœ½ûãÖ‹vjøã*f3î»ÞÊ[³Å1 [ÜuB'[Ü•K©a‹»2q¢-.3 Øâ®Ä°Å]st¸õHf톸¦ôaˆ»â[JCÜuúÚw5§!.ßLâ®(m CÜÅgéˆ[®Z€ôÂ-(\I/Ü‚Z]ôÂ-(êöxáê ½·¡nA­9zá¨þé…[P^¸5ä*xá–>î…«$ »qÓ ·lpB€n9è#ë^¸:žÛ¸n^¸eO¿Ûî…[ŽV&õêãè…«[æ¤×½p[IÇþà…[°¼ˆ^¸EBé…Kïpzá^u"¯î…^¸³ˆô­1 JóÂ-Û— Ý<¼pËA@ì^¸:ßgóÂ͹nÁì(Íp ê|Ò7Ú¥[âêp{o °Ä-Û—Ýl–°Ä-UÉn‰û„%nÁ* Zâªbh7Á§%nA‘\Zâ– RbXâÖ}Bmë–¸kth‰K“Z⬵¡%náŠXâæñÜWû´‰8Xâ̳÷`™ -qkÌ;mXâÆ›KܲSîzYâªÓ°§K\µ°À¸¯ýÆÕ+³3vcÜö‚:56Œ«˜Ï‹:Æ}JÐWÝ ÝsøàÆ=‡n¸ñüáƒ{ -ïî>¸,È@\m×sDÚá–ò%Àm1÷v;ÜR¾¸¯˜ÛáÆ>Ý—õ4h‡[¸l v¸¥¹ºn\Þe‡[ :;ØáÆ„nÁ8™v¸ÚÎQ±Ùá,v£)nÁªOšâ–B^즸,øAWÜ‚õtÅm袿¡pÅ-(çLWÜx¦pÅ­1ã`‹[ E¿î[°6’þ¸š"úãÖ˜¿¯ðÇÕñì„?n)ÈØà›Û¹?nA løãÆ­†?®bn:àþ¸qyðÇUÌ.Üh)ðÇe‰úãzW·ôÜÔÁüq×üqו¦åUðÇ­1ûÑ ·Æ|Ù> r…|z¾IƒÜ³15 rµO—ººA®zA¯n»Â³; rÃöÁ ró\Ü w…¿ rY·…¹ºöž(Ñ —ã*äÆýtƒÜØ%rs3wÈÃÑ!—åëÛa~æ?s50¾ËËoÎA„¹Ðšf¹¸|šå’á¥Y®—‚¢YnÄ`–Ë}Ò,·ÀçZ]e%ŽK]«[VW\Bª«LЇCª[ˆ/!Õ͘kv£‘n×ì* rŸ×ìêCäZ_×ìæ>]³ë.¡Ù ÍnAW@ÍnÜMˆvÛ¸ásÑn\Ý-Ú-+M\´[X,¢ÝlÙE»õ¸>ØE»ºjgË&Ú}‡ºhW»4pÑ®b&Ô„j7NªÝŒ¹jWû4À Õn´K¨vãÒ¡Úó„j7c®Ú‡pëªÝw¬«vó<]µ«˜ÃgWíjŸ}ÅE»Š¸FÕE»ïXífÌE»wÃhvãâ.Í.O’ÝW¨+v >vTìfÌ»Š}„T·ÀËRÝÜ—KuãR]©ËͨÔåá¨Ô͘+usŸ.Õ͘Ku -¡ØÍ˜+v3æŠÝw¬+vó\±ëãèÀ»¯}šd7Bì¾c]²‡ƒd·ÐØ’ÝŒ]’ÝÜKvs—ìfÌ%»3Én†\²[hw Énnç’Ýw¬KvcŸìfÌ%»sÉn’ÝÜÎ%»…6 ìæv.ÙÍí\²›1—ì¾c]²›ÇsÉnÆ\²ûtÉn†\²ûŽuÉnÆ\²›±&Ù-u¼.Ù͘Kvsg.ÙÍíL²›!—ìfÌ%»sÉî;Ö%»ƒd7c.Ù͘Kv3æ’Ýw¬Kv '!ÙÍí\²›Û¹d7c.Ù}Ǻd7ç’ÝÜÎ%»sÉn†\²›1—ì¾c»'±ˆ]’ÝüÙ%»sÉnÆ\²ûŽ=’Ý ¹d7c.Ù͘Kv#ÉnÙRλ{‹í\²›Û¹d7c.Ù͘Kvß±.Ù͘Kv3æ’ÝŒ¹d7c.Ù}Ǻd7c.Ù˜Kv3ä’ÝŒ¹d÷;!Ùí Ù͘KvËöe´s)¬Kvs;—ì ¯¸DØ%»¹Kvó<]²›1—ìfÌ%»sÉnž‹Kvc;—ìÆS‡d·Ðm’ÝBGywãVßÂÝ< îÆÙC¸›Û¹p7·3áî;txBû¥p7·sá.cîfÌ…»ïØá íññ§Cùnnçò]>Êwó\\¾›1—ïæ>]¾›1—ïfÌå»ïØá ­Ç\¾›!—ïfÌå»qyï²±7ù.ú1âûû|vEBïÆ÷â¾\°LŸÝ`Ž>»j«¹EC9µKaÎðí6WN×;çPPœW1óÇuÌ[›=dªŽykÌÝC³{À}Ž¢]ÅL Ñnía¼Ô»ÇzN«S·{P*Ýîqß9(vékÅníqM¦Ø=°ª€ŠÝºK“(C°[{u`_ìÓW~ ÚÌË®ÝzÝz(«æzÝ#,z]¯»Ôùº^w‡ÄŒzÝ.®×Õçѽq]¯»C9A½nžŠëuEmÆzÝr)êuëv¨3æzÝz<ŸÅ€^·Æ¼(ôº5æ³&Ðë¡v½î~x[€jWOÏé­«vë.}6ªÝöùï»tÑîA_íXNÑî…$íË©¹h÷‰o]´{`Rïí =x]´{p&¢ÝcXuÍîqó[ë¶ÜÄŽcbÝã)ªvëa¼UAµËеTí*Ý16ÕîÆMÕn=M_ÚÕîÁÒe.Ú=VséîAÿJHw•2[—îta…tWÇsɯKwfiî…N .ÝÀ.Ý=n§…®Ø ÝîQèÐàº]åš^rÌu»GùÌ…¡mBmn×í²}P·{¨Ø=°¤ŠÝsTì*ïv®ìŠÝceí7WìêýrìŠÝÓŠTìò½¤b7ΊÝhÀPìFkƒb·Ïå-Pì«7|v£š;»u³°Íí‚ݺKf Ø=(,`÷Xhá‚]½ó®45Á®®ÎÞ%vL S°«|Ç‹‘¹`7ÞùS°«ŽÐ ¯¹`WXó†`÷ >‚Ýcƒm  võ´]”ë‚Ýc…7®ëu¨ ¨×zôÐëêÝéÉõºÇ \—ëê½v³Z—ëÆ»¹nA~e&ÚÜ×á\´[·ƒ%¯KwÕ?Ùl&¤»q'!Ý·ÒÝܧKwÕé™í®+wµK ¹p×Ó§À¸íõ÷šj.Ü.ÂÝè6 Ü=:(¸p÷€äˆÂ]ؾpÖÍæìÂ] )Ü/8„»¼cÐíF‡yév£ýC·«/£eÐíF× Ýnngº]½6¦á‡n—)5u»ì ÛvÙndhíª¯të²]æ×”í\yÙ.OªÝh”Píªï²nªÝ3[„XWß)û¨@¬Ë­®ÃÎ ·Ù!C«Ë‹r©.S(uãã¥n4n(uãks+uãÕ„R7’B(uÂzi®Ô.Ä•ºÑ»C©«ÍÜD•ºÑÏA©{` (•ºüŽR©Ë§M¥î _WêPS©Ë´„J]öfTêP•Q©{ü¥îñ5·}‹+u9j£R—«ö©Ô=ö¯lb|L¥î2Tê_s[=gëD›R—ÿüÇŒuÿòËðñ/¿¬¢ÆõÿûE‹MÖö4õ§rz<èÜÚŸþç/ÿú#tXŒëràé 9ðg'òC@úû©«ûÍ®øŸ|Èú27•^»Ðç/³¿gÿì3PŠ}ŸÀõçŸzüúýÙž¸ÿòSÏà¼íç“¿ÿR_bË8~Ê3¸NàüóÏ=þyÛ¯¸þòÙœûüËß9“EȨö˜û\ÆeÖ§í¨²±¾PõœþÓ¯—ƒ÷ÆúŸ¥ž÷ǯ¿}ü×ÿP»ùÿøñß>~ý/¿üç_¿õ@›Ö¹/ã¶ŒµSüæ#-ß$¥~µ¯çV–o?ÒšGú ]eïŸÿÉ-æî“bWéW¹Ú¬àÏú2Ôüãuìág}•–×±õÓO}Ì‹M+ÿŒ{ÞŽ=¼ý3îykÔËëØ~Ï¿¡‡šTG{©ú¸µß;ÈvÔ~±žÛ«ß>ÊÙm|GˆÃÔñ—æKŽ¡æ}”ã>Êô{s¼×ÑÒXÛâ×ÇYò8ÿà­üÌ1 Û:~ÌsÑ['Í­å1ô?íÿý/ÿã·ßþôoýÓŸÿüç?ýö·¿þí-¿«Ûýø¦Åj«&Ê»haîD+»ˆ‚„«¦Ë»Ò×ǺëÚ Û¯…NnkÝIúb^R±g„¸.0ýX·Á4 Ì#ÖÍæë§ -·É4 ê<¬îØõIl„¾ VНÛÒgLó‚/›(ó­?[ ¯aífŠàZ·Í«-(p X_¬&ÆÜïô¶ÛZµÜì°µjË‚‘ò>ÔÔæ–Ÿ-‹ÓÉu;‚h‡+ׯǭð­?ú=Ölù£ð“Ðlù£ð]PñvÕlù£ð]_¬»ÖÔ­-SËC[¦¶ „H½ã¶L­Æ¬˜÷n…HÚ‘:ÏYw¹”\º³ÜãÑÏi=Ÿ¡&î]oì©öW}qZ\Ï1Úâ4Í!X»ð9øvâV¼ü˜mqš¢ÕÄ©±§Y^§'¦óÈyÔY«aÜbfـȭ2[Ї¯}î¶&mAémÅúš´hš&ä¼|Æ›¦É9/ŸÈ¦iòGΛ±^¿=NE¡ý‘ó*Ö¡Ê6̶(wx[•ÆK¿‡¸Ú[ï¶aµeiïX_—ÆP[ß×GΛg¸?¤?žM½òz¾åi¡Nªë+Ù'¹3¶¬X_ž¶ÀT`«Ãž¾>M±N’6•zyä¼›m}Ú™Ù¦éòÁºSoéX3ºÅM“æœw¯Å6šçM‹Ùí7[ŸÆþmSÅ¢ÙºSÜ—ÝÖ§å5¶>m)>_»©¾ÇòyºM&èyÅTÄàîUyéÓÔ?ޝÃͶ>MÛu`¹MÖ°ò¦r®OËMlŽü“X_Ÿ–±ÍÖ§å)î}}Z4”ÉœÑòª[Å»§å.gSTÆwW±¾>-®Zæœ7÷9Ûú´8ÏÙæëÛyvØ6[ŸöŽõõiÑØçÕÖ§åñÌwìu_v[Ÿ¶Àê£6J[Ÿ¶ÀÉD±¾>-bË`ëÓâ\–ÑÖ§-p\Ù–®HTÈæ÷¶úeëëÓòp³­O‹[¦¬á‘óæ©4”·ÀWdÓ”ù“µ.°7Ù›~m·Ùú4åg}BeÓœù¶Æ-Ö”ù“¶Æ.‹¹ ¾b£­Oc:¨X_Ÿ¦ãõ ¾­fÍ}}š¶{榶2Ûò´ÜåbËÓÚ}Ó&ÆÉ}[–—,KÏVý—Í–¥é8Oªµ¹Öëô[•öŽõUi÷©¯ƒ-G«?šc“åiOSqÍo²÷ªØj¶ÕhKñÈ›&ÊGëUmÖO±¾-÷YÎÕhñ.È‚e²^Õ&Û¶uóÕh3’Žu÷Õh¨†¤Ø³m—Ó¶¶-buôÔW£-ðrÚ6+È•W]GO}5Z\^õÕh¹ÏÙV£©÷°4m³¤è+¶-c6Gø:—ÕV£Eo¥¹òÁzUìs·Õh;l5Z\Ÿ&ËGëUýxš,­Wõ}j²¼§¯«OÙnuõ¬F[ŠÏûmuÕW£-ðÛv“¶¿·Øj´Üˆ¹¯¶-ÏÂFÖ-f vDõÕhy¤½¯FË"y}m~ÚÓW{Þ®ø{]Ãf«ÑrŸ»­FËØa«Ñ¸Ïz+m5¯aF[Æç X_ÆãíZÝaé+v9Ûj4>¾zF¶-Wl5Z«ÑøT÷:®Z=}Åî6[–»3ÿÛWìè«Ñòp‡­F‹Í4cîé«?·Ùm7Ùj´¸[š2÷ô5b}5ÇG{WõÕhl|5U¶Õh¹O{/ò›­F{ÇO_qí»­F[àô¹×qU_¦Ü­c“úɵÕhñ¼bÞÏVmãóŽvou'îŽ6B³­F˘£û<•b«Ñâ1L׫ßG§?¶í®ü¡Ï]úí5Ý´¸l—$äÀ°» È›—àwž±äW·º?=BÞyqa#!¯b}Àꌷ-kè9ï\|A%ï<ùÂ2^a^Œ·^— 0^$;?G¼óäÓ[D¼óŒ…#Þyô~™ˆw^¼QñÖÃmå#á®®Ö§ÃÝ‹s wõ,-³Ü­gh6á®î‡ ëwëñl¾„pWÏÙ·s¸;ãKE¸«›ìÐÛáî< Íîj»ÁbwëõY§L¸[cÅ1§ÃÝyô’€»ón¸;¤ªwÛbŸ~逻:MkÈ7Ü 5ޏ››8Üña'Ü–5 ápwž|Rpwö|€lW¡“l:Õ­ÿÄ1 ¨®Z¨n=ª·PÝyôO:©®ŽgÃ_P]½‡ý³Mª; @ ºÑB@u§Tw†/©nÏ©î<`Ü ª+Ã'œNu£i9Õ.TWîùÞêFCÔ+8¡®Z„“^‡º:ëœuÛŠ;ÛΡn4q‡º:?cꀺºSŽ#êÆuêzóM¨«Ó4ú¨«Ó´~T·-쯨îŒO„»5‚ët7Z èn=OŸ‹ÝÍíïN&³€wÛËn×îxwF&RÞy±åÕý´sq̫پ§À¼qšÀ¼ñ‚óFs¹1¯ž€uåÀ¼ñ~ó*Á³~˜·Æ¼9æBjǼq“yãŽóê4­kæ.˜7î$0o¼@À¼ñžóêvÚæ¤qÀ¼¹cÞxrÀ¼óàS ¼ñÞò²ÛäîwFå?@Þè9y£ò†ŒW€5±‹ñê.ÚŒ7^o0Þ¼ÎxyñF'Ä—Ä;HÈ€xãVñΨŸHÄ o|ã€x£âwˆ—ùod6 ¼ñª‚ðêFwÕ ï<ú’f^´^¾9¼Lôxuá~4¼5fc*^^/_ðð²ðæ:à͘^}tzzÀ‹ÆL¾«ž²g!ä»Ü xW÷£ ˆw9*!ÞåSÝe:JºËw€tWmÙI¬Ó]~ýHwãêî²›!ÜÕ(tìWàlw}ˆG¶ËÒµd»ì¿Èv£á9Ûå·˜l7 Øî[*²]~ÈÛ»à[°ö½Fcd¼Å|­_šÝ‹ ©Ùe-hg¼ *à‘ñ.˜ oùzEPÞ:îú’òÞè‚|w‚=ùîòÝ© Ò3øî„ùtòÝ Î$ä»Ö×’ïŽ0"ßÞ2Ýi¤zٱbÝÑ=£HuëØßU@Ý5¤HuGJ¥Auëv~Ÿ€ukoæ«Yo¬;"içº#ô亵ãtÙ(¸î¸.Ö¡H!ÖÕ :äv¬;†öÖ±îm±î8ù-Õ­Æ+’ƒêލxGª;Îþù&ÕQ´Tw|ÃÜqAÐ\‚Ãj§¹ãì0÷dö`¸l0d¸ã†$ wDÑL@\};\pëW1g¤qGè‰ qëó°Týb¸ú±§f@¸#uj`¸:I2Üñp·0Ü‘â&0Ü~rd¸ïX'¹ãß‘éÖo©53’Ü •rIrGÊð@r'TÑ%ÉÕ>íöƒäªëðót’[û>Pe'¹#ªß’äŽ ¬ ¹õ/N–@rÑ1䪙D 7Z¥ƒÜFˆ¹#´û¹#ŠÑäÆY^÷¼TàÛx#€oóÄßæ‰¾mbýißÖÇã3%à·µ9eÅåõâŽXºCˆ«®¹lÁpëÓÿt†;Â~’ Wgé‡s†7 WÏÕ·ÎpÇ™œÖîxUõ$½Q•ôvÄ„,éíˆj ¤·#j‹‚Þê»býèí¸aô |›‡s|«}ÚHúÄ·ñ0oãßòÃG|ßÇ·#t#Ä·j;Æñ€o£ߎ(L|;r1ðíˆ9}âÛ¦§Ä·¹OÇ·²Þ³f|Mø––¾Ä·ãÆã9¾Qü•ø6¾‰À·zDFå€oãÚoGJÚ߯åߎŒߎ+& €oGTLð­>]NŠßæ•9¾ÕÂ:<à[#:E6|;b#ñm|€oãªoëUû\)ð­¾qöþß2©'¾W·½!¾U#r”ìøVçb—N|; _¾Q͘øvDlâÛÈ.Ào£ñßêÑZ"€Ë$7^¸L¬ pGg'ÀÕGÂP²Üx.€îóźÍý8ºÍ;aìvÜI|ݪC·v=,Ø­>T6ÙvûŽuv°ÛHùÀnõÂYƒ»^ìVçbŸuÀÛõÇ oŸl¯=°­î²SJǶÛòÛòämùÅ'¶¡Z#¶Í]:¶Õ}4´vcÛ<”cÛÜc[¼¤¶l ¶yÛªAv$ClËÑ(¹­Îä%Û2;"¶Õ©·Çn{·Û܉-Ó>Û¸!@¶Jv»!Žl'£“ÙŽ‡Ï¦“Ù2ý#³}njَþÖƒÙæfÎl£ÙòÅ&³ç=­ù>ÿ(´íõ€Çúï¿G•k ±] Ù±UÆkÐÄvƒ±•"Ñt«$¶#y(ˆívb›lÜv÷ñGp[X6·…óí‹Ûº|ØvÆRvbÛ®uÀ¶y*À¶“w^üÎ à·4´oCy ~ i«Üg™ªWÜÝÉNÜÕàniÁpxf<}%Ì Æ™×m5@p‘ÅÁ î·>7„ÁP² îF¡,îêœà…pû×). .á$¸(S(÷€ê›(x(wxy`¹˜:ÓegL¦; ö5$ÒÕÑ,¿Ò…„žH78®žkvéÆÐøbºÓ@•¯CÝ bý€ºy´C]]³Áccº|ïÉt§"0Ý·u¦ëßÌÓåé¢_#Ó{L¦Ë*™îH;gºSG#Ó]ÀÑÀtã¹éêN—¤¹µÚ:Í z œ;¡èqnÜeǹJLl˜ œ«›ìØqnÜdàÜ 7Ï&ªŠìjwö:ìªm}$ÑÕ=èN“÷î$ºú®þѼ€ n4 ]Ý%ãè”åF,®Cb'ºr ÛI;º÷ÉO5.H®êYô~87Ú p®ÎÁ^àÜè€sU"÷3œ;͘kÎvœËç š/ÙIsùí$Í·‹b\Ça®vùÌÍ£9ÌÕ{nÜ0÷~È ¸Ó-E¸¼VPÜ8?P\µ]»í ¸ªƒÅ…á)n^D¸sŠ«×Ö>XáN;ÅFŠwâFÓÅãQ„1£¸1YJ.O7n(.3ñNqãgˆp#îîŸwjp3ô0Üià wšÁ)Á‘@‘áÎà˜”àÂP˜ ×ké$ÃÕ׫_n ¹h™w¾>Ò€·õsç¬ ðvš1£x«#¹îØám\àí„Ê9„·Ó9¬³Û E$Ànã Àn§s&`·ºÅF:ou¯zº|‹ogäš`·ÓBá°\ Mê€àNXÜ ‚›ççW§âÊU¨oy8ܸ\^3î„"P¸ÓB¿¸O“£ìvþZv;-ÄÁŽn§…ò`G·qÿp£íàªÉ™lwZˆTàN t¦p'ÔW¡î6öÝ- ‚àòêBw‹;öÜ +ª p'ÔS#ÀÕ)öá nž¢Ü Å.Bx‹2l$¸JB„ò–ûÂPÚwBù"Ü<G¸J^ä*Åw¥¯ƒÜiõïAî„r¹ÊÈÝyír5Bpèê WÂGÛŽâÛõïˆoW*l]|»È:ÈP4%Ä·+õ¼r'” ;Å·(¬ñcÆ £©o¿Ç+·˜výEqœâ·£@\-å6?QBÜ"C@Ü€£Îp'/57d²ŽpÇÝGFp' ypéè „K­n10\Í’ÛŠsJo!o Ã¥žŒ W×çø >hq1wú‚¸®Ä]p[₞Ä…Ò6 ®#jSá:D'Ä]¡ ÃÅt|0\¨"Èp÷?¾]þ.¼(F…üöÀÞî§þÖ4à­ÆÜFÝAo'”O&½%|#½%R ½åP†ôVi½©ãIo‘’ÞN³Ë/ߢ |[bÁo™ã:¿¥´>øíNF ~‹R”Áow(¾n~»ƒT‘ßîtw¿Ýyaà·´*¸q¸­¸;­ pwŠ]pwHcp±˜*. À=<ß&¿åxº+Ѐî õÁpWàºò[®&ÅsÅåB?R\>=RÜ#-º»Â@›[PÜØ —+#Iq¹Æü¡¸± (.Wÿ’âÆUæÆU;Ì=¾vW˜PÓ9`.—J“æÆv0Y¾ÖçÆŠ|š,pIwš,Àª<—ˆ<mT—‹0ÓcÁ_RÝð‰Öå+I¬ËÓ$Öõ`/‹…¯¨®Ön~%Ò=êâžê ?ÔåEÑaoM:,à‚á°ÛÑaÁ›©.ï}:,x¡Ã—è§Ãwãx€»|Óa^·ßá°às€€»éè ¸>‡»ÜES ÛE(ý|‚hW Ùmž,üüíÆú÷ôWð O¨f»¿À´£Ý‹ØÕ>Ô/š¹¡>i&Óp`¦³,´ª=  U…¡rø¤EˆU'xÐo`òE÷`™ôa$ÒÔbêÿ¸·éµÇ®6çù+îÐT¶ø%‰hôÄ€_FÝ@çÌ0r¶Ó€3 5ðÿGsëIëY¼çfD¤ó¾F¹*âì HQµùpsm=ÇXÕ‚ÜhŒU-Lfz*ŽËXÕ±ªÐ¶°XÕ·‡PÕA†ªVD 2TµPVCUœ„g¨jÒ(Í¥SýU‰æ‚Ìw¨êBE%šcjF ­Íq9Í™¢9l¢¹l”"P¢9> :N¨›¨­ŠÕ„f‚hÆgBÃ=²šxXz 2 ÈP½¥à*ÌfÚ±*LÁ IdjÇ2Ì—‚íu&°„”w¦"ûLüLùXæÆ†Ö@Cðåc™O–ò±Ô=¥|,s¿žú±Hœíú±º¿’é6èÇ6¦D£~¬¦~ìÊlXÐežn×….€²L·)̤„Ǥ«q5À™ ¾ÂvóÌÄÌÛÔàÑvj ,ʤ~¬iÙš~¬†öéÐB7–c—º±v-èÆÒ p,§—S8¶bë€Â±v5ÈÇr.¦|l{ j“ò±ìxÊDzñ”åÄBùØêÂ]½VMÅõØŠ)T姉ê±ü,P=–Ï”ê±ÖL¨Çò QC¶B[²gû!!‹Kš†l{©@À‡G ÙFVHÈZK ![™ ²pDn YŒJ“mŒ^…„,žiÈj?RB¶1æ²¼"%dyc”­H€ Y .“­ÐÎ¥„¬5²õ] Ùö„¬µ²xñMBÖl&!‹Byà©ófâ±l!ÅcÙBÐ\{dåø x,Ü6å CñXŽÇ¼úý}Ɀþ°¼ýÓ{Œ—Ñ‚ñËøþüíñÇö¸»aüé?øïÁÆ£‹û¢_ó‹è×w›ò]¨úÛêŒÛû¢÷ü'×Ùã0ûóNÏ?øþìúÛ~Ö~üéSëîû}ïÏ?jýG?žøóÏ çA>¡ïŸµÇŸ>·î£¿Ÿµ?þü^ý+þúA;ZÌOkëeLrµôXdŒy+‰i4è~z Sÿ%ÿÔÑè·Ÿ¾¼ýËßYñïßþõí§þáúÊzÖeøÛµm£Šq³_]SýŽšJ¨¸ Ç´ŒÕæ××´zMŸ07ÞòŸ<\®Iø3çF½OI°ðiß‚u™ë^>ë;T§ºã§O~Л•ÏèõGíË;µF¿?FvkמÿбՀëøósâqtgÌѺiþXÞÚcúøæiŠÕŒÕÖòcø²£š¹–~Ö’ÿh5ñ±4Jc•óºžêõü^i÷óì[)-zløécQáuÄÿ5üÛ¯ÿþåËÏÿýןùå—Ÿ¿üí¯ûÕ?4ý¾}Õ¡¬}“Ì2‘7áZøøDÄQŽm]#/w°Ü!œ=¶Éï°ÚÆrI(@qß³F9è·íEÃ!9»ïUCvß›BB^§°Ý»iUåÝ÷Ø+¿¢4 ´[åÏÕÒ0IÇ¾Ë ´µ"ÏàŽ­òŠÌ6a»÷¯*’ì±U~åÈ),‡jc«ü@•ùdº ›¬Þ»J<ЉÈ\¯²qT‘3)l7Í­Hf°‡ã†.r\{—À€µ®z}òvüªê€±G~­}‡Mû?öÈ/šZ±Ý²Çù…:*ö${ì‘_ȼ"íT­ò mUdãÿ^6J*RʆíF˜¼³¾! Ò`}‘€°‰üј e´®ª8ØÙF¶~ Û½™÷ã?JÝkÿ¾HN›£%÷B}t‚l>ÚÝ%I^=-«ùž$âé°ÝÌ2lwSlèIâ“›Ü]ݯ° KëIÒ½L×T^VÃvGU°£žDlºæ±äõ¢µ6A9cˆ¤ªà^a»#v¬¢,gáúnÂÒCÃûšd­CBÎúšd­#C3ùšd½> “ñkVÙÀ® A=t?¯I¶*)·%ð·b·Ç’åšdÍ´ßñ)Ö™‡pÙ5bö¬[:»ÉýP1¸,ÚX’DŸy±,‘!³íÞ¥­è!–rϱ؀ïEö«íŠlŸ—¼?½HžÆÃvï]‡íÞ&­OœÙË‘kjšälêè`e¶*éJ¼þa»÷#kÕ¯T¯’gã°Ý1½Jž…©œ(x‡Mô †Ë%ñ~Í&;ÖõUäü¦û¹Õ°‰®a¯wôéaº¿|½î÷E¼`¯]vâfÛÍÉíšè\w~„Ç“‘P»æð‹îMv·Ù«È ¶QÛ·*¡zÃ&au}øvM'Wí–á\űìqP¼ËhE¹Ç?ׯsÓ¨¸°Ý¾Eo]üWkúºˆÿj×npÖ©Uok,~nÿÕšQÀM,Êñ_½>Ý˵{X…OåVñ_½Üªþ«•ÛÄ5Ó.¬™º:°ˆÑ—R¶Š¤G˜ºÎ­ÚÈ-©ÿj¶¬þ+’8»Rÿ»Ê}«êÀbwa\ZØÉ¶ë쪷¾­êÀ"ò¸”:°ÖÎýáÀ"•÷øŠª;ÙvcEXdŒu`9ôö¤,ΞÕ5[Qv²‰[4]ßu‹ÌîzoêÀ"AxßWu`yïû¦¬Õ·‰[49H «Ûå³Ù»:°,xážcÍ–Ô-pnÇÚIØ÷¼gu`‹FÔ÷®á±Öc½ªË^éMX+·ªkmYÕ5Û¦¬Ýû±oá4Œƒ¶xŸÔƒâÂ"³û0&õañÄ£ûÕ‰-z–`‹z±xBÃXÕÅc8ŒâÇ"¾›:²E×Wøª'‹ðø©/ë—å¹Úºx³~#]ÝY+˜uhݘèÑÊÒf3}Ú®7’Š:µ~Y=ôÖÆNù"£XFÕ06õk­gcû<Ë8¦qS×v6v}ÙÚýáâÚ¨Ãæ¹?Çc]3.›è› 檽»è:³dV?×ë,êèÚHÄãŽcÛæË6uuí>Cš~ôÍøR®sÇåU]¿lDÑ’5íó“ÌF?UÙÅß-*Õ–CpLKmç!‡"œÎÆèÙPG]2º¥ÎÌXh5yÙÐÑ3pûqÙ;êa5ök*Á_1 ÒöNɦ~¯=ͱÜj£êšû;%u`ºÏýÇ4®TÓÓ‹’•íc#¿}ßA·3£îïœo30\ïItÚ½ªðH"áµ@âHx¥VðZúHx¥ºðJÝ: áakŠ™ G`뽺"^¡.L$¼B·N‘ðJ½>EÂäé_€„WŠ ¯‰L[‘ðŠ]n"áB¢DÂ+Ô ‰„ݦL¸A˜Lx…ˆ-™pëDÐÊ„‡íÁÜ ±}ÒàÖ‰¹•Ç…(ñ‡pœhG‚7Г·Že hð(§N*h0ÕèHƒ‡ œUið°+ ¦ŠipCRhÒàaÜ4¸í:êHƒýŠÐífið°É´@zz²B ök* ŽzyB ÁqM Á85LÜ6}qƒCOV4€ÁOi¿'¦Æ)05ýH©•G L½?Rà;R`»&(0PH)¤G uB`¿d£’Œ’RP`je€7$Q¶†(öR¦àNª8• 8`à†”MÄÀÜî$n+H0p”“Á å7Š*#nÈEå„T^Þ¬^4À;•TÀÛÕÕïTRïTRﳫŒìÆ9Þûsjd÷.¢H]hDwºkÕG\Ý©œ]/¢ë7¢ÛªÕÑõÞÑJBæÓZ ¢Û#Úˆn´övzŒè¶ªk#ºSI%ºSkDwºC(\Ym º^ˆ.¿=Ft9µÑmUãòŒèúcÑmU׈FtýöAt§’Jt}ô€èN­U‰go¬݆„ÇFt½­ ºÞV]{ït½9º>"tòKЫ@·!U·Ý©Nº I& èú¸,ÓÕ¾ï¨Û·*—‘ì†G{>¦)äw¯Ä"~›’/‹øe†hFüf¤%aÄï¢:IÄ»µkS@w+RÍ’î2\“t·®êE¼/"2ð۹ˀ_Ùð°€ßþšîƦõ¿ 쿈²EÀoÆ)Nü6¤µaÄ/D,âwÁ3gÄ/B°,â·’Ê*ã ]-·šK›¿…­@Äï‚^fÄo!èDÄo†‡Æ[v„d2â7Ì0â7é[2ÞP¨Q"~ÆÙ"âwÁŒ7莶E#~¬Añ»0žYoÙt) Ä;›¢SÄû²2Þ8Þ¸¤´„wØäƒEÂ[Vý¢“ðŽg§£@¼Á¶d]ÄMëSÄ BJ©Ò;~TÒÒë TÒ[ ¦GÒ;®©ƒ ¤7¾W<ôŽK*óè-özK{†ñ– Ku0ÞÅ1 Æ;®¦áš`¼£Cd ãA¬Å„ñ@´B^/†ƒp ï¡>µ¼ê}™ÀxÇ%òßÛ…—WUÆ;®©û`¼eÅ¢Œ·T 70ÞèÙJ㵑÷d¼LEÆk×î7®)ƒ”7îZQ©RÞ²â¥å oN–Π¼¥!X”×TL®À½ežp¯ >ŽeßÚoÛ+Ú[¶×´7Ä‹}*í- ³hoiYía)¬´w¶I¾´Æ@a¥½LFÚå4ˆXioŒm¡OÀ¾¥"\°íþ’ÿú•ÿZ뀭cæ ü˦€þZG[ƒÛˆ>–,‡•‡ív@Àö‚+öÚ„— O¸T.@`›œíCðþ„—Šmu0àègÁ“`ÀÞLeÀ¥âe¶^ŽfÞd¸TxqOl8JdØ$7íJFª 8>Êr•[ƒ[gG‡Èžp©˜êÁ€mª›L“`ÀÌ®Hý"»ÅÊ€£)r>Ep©Øbއø-`À1LÄ£ÍIµez¯º…s¶¢YeÀ~MeÀÑNñPÁ€KÁVPpA˜Q°õ4X0ó4?Yp$¢Ô`äF¶IcÁa¼Çº±àÈEù6A`&Ò4Ì ÓK!ÐfúQcÁSÊ‚ãæv¬Hù4 Ç#P\,Dxjªá¨O1ˆ°ßˆ°—¶ÖÇC¿?Æ„™…Ô€0_hœÿ ó½5 Ì7×€p¼ž«Œ8áRHaŸ@˜¯»aLáÆƒédæËi<دšéÖSÒp°÷ pðdTÌ9Ôp0¿Œ†ƒ9Ûó‹Eìƒ8Øp°_8øäµ™®í¢ï00=ÃÀájoÇ÷ñþ®>Û¡ô7>î:“€þò‹jô7nQÂÝýŽï¥¿wþÝ4þý· _qM&ô‹½Ic¿ y‹Á~ëþ:´7VT«ì·R­ŽìÑé3ÃŒU"þ’ ‘þÆCñA¥¿±^f,ð7Ü7™€¿ðÕÉ~¹Ú ûÅjƒèÎ3É/¿ð$¿øHüZ‚k€ßñ*"äVÁ¯%éøE{ã¾ `þÉ}Kr5ñµ!”ûZVopß’à ‚û–„Ø rß;þà¾^N¹¯ÛÀ}Ä]€û2Ù<¹oIŒ÷M÷{ûΉpW¸/“Ê÷µøMà ¿%ëþ‘ßÉ&¾sÒ¥,ÁoÑiÁÀoÆâ†àתøÍó¿…Â+]gêøÍsøÍ iÉ}qZÁ¸/|@ã¾|Žà—í#ø-x¿Pžø—Ëâ_õþêϘö­xÕˆ}¹°"öå‘Üë㾓mשU¡6¹¯•÷å"Ü—OÜ· ûb.¿¸/ǹo!E÷}¬ã øÞ?ŠKÌ…'I/ûÀI¯0‚^ƒÀ•î°Î|zÍgX«çå¨çå`èµÊzɲzi#è%ŽsÐ ™€^pAr^b.ç¼’çå^Žs^…³†yÕÊk7°»+>a^¨@óÒæ˜Wq­Q^àZP^¢U£¼zEƒ¼/5¢e+—¤ü«!oƒn oÃñ@^>JB^nòòò\òn w¤¼fååÖ")/‡ªS^c§¼j3Ê«0Ô1/ öó‚RL˜÷ñµ"àå=ðn ¼§zx362xéšðf HÞ \IÀ›Ÿ›ÄD»ÖB ÝŒƒ@@»ãh§2 ífÌD»¯5Ñn˜'Ú¥¿H´Ë>&Úe;‰v±j2´›1†‰v36µˆvíþ€v­ûÓ?Õ>&ÚÕ噓]\ÌÉ®x³NLxÕ%v¾‹‘â|7ó44ùn¦„ó]ÆßE8¨^„I:á5¥†¯&¼@ÃNxl„›ˆ·é™:G¼³ : HxÇO„WG oÑh¤™ð²$¯Ù›ð¾q(•šŽx©ñîÚãŽx!pAÆ oʯ7È/ë4ÆK#/œ™ñ²µ`¼K1Æûš Ú¥ÈЮÓ[G»àœD»:ÍŒ—0ö[/[ Æë%ÉxÙ-{›âvÂ^¸m{á¢ñ;â¾ tÜ)¹ ˆ®Ç4 :04 vêJ ëÛNd}ëÈTƬo;õ(õmÓð+âá° gT<b«z ‚‡Ã¤Ð\ñpÈŠJ°,ðpÛÄm#~ªŠ ·ÊŠ…).J.l=Lm_+§\8„G_©?¸ Ú¾A5…ßMYÔ¥ðk% ð;ÙDDm³§ÂïFn …ß ” ¿”M¥Â¯•ƒÂ¯Ù ðk6(üê(0ß ±©øl·ˆ\!ðK Z üZ1øEÊ3ø5‰ü¢œ ü¾L÷fwç¿(g¿(g ¿–  ¿°™Â/®ùúEnLú¥(¯ ý¢"ýš˜/„~©¾ìB¿õ…Ð/f´Iè×B„»Î®/…~ù¼]è·¡_ÓV¡ßÍ•TèW4¥_ØLéWmTúíÜ£Ò/›B¥_+gJ¿cîJ¿h‹)ýÂ¥_*uSéwc„±)ý¢-—Ò/~…Ò/‡³Kýj9JýN6hý­_Þµký¢´~××*^¢¿V¢¿œð)úkíTÑ_«NEùÜ(úkÅ úKEÍÑ_³Aô×lýe¯Pô×ÊAô×lýµkBôŸëIýלe€­ÈoØ¢ 0ËQØlžlâÁš bÀfƒ°Ù l6ˆ³_(ÌïÔ€9³A ˜ÃœrÀüüQ˜Úþ”Þ?æ+G9à »œ”Þ^'~³§N9à§H?u€™D€:Àg¾«~øN`¾ìF¾êÿÂDýßgåþ巇¿ªÚgº¿›¡a¤¤mrªþ²D™ƒ¢¿¼§î*Mê­ªe‡³úè‡N/²¼ ½ÔGm¢{zFøÂ/Ã~ÙŠúòµ¢¨/‹µ—ù&¨ì‹gFa_–Ú,ÛÄý`žº¾ü­3åÄ;BÏwçÑpêùb8»žïFq„L/”º·…n(”*ÝP³‰êm/Q—öíÙµdÙÍ$`ÌnÁúŸ1»TÊfÌnRòÌ]@3d7A_ƒ!»8‡a!»™¼·2¨A_p†ì"èÃBvÓ=Ý:›åf÷²›˜· !» §s²Ë2d»Ø²‹S+²Ë2d71Bv­-• z‚—!»“ a :53d—RìÙ}ü²#t G¤îdC¤.D©›˜°Ì"u¡”k‘º( Z€ú."u3²1R—ñøŒÔµrˆÔÍøÖz¤®†­>#u)Öûõ‘ºzÚÝ#u!Ûk‘º¶µHÝ|Æï"FWcô7J4ðâ3—_<†âz0)Bq-ì“ 8`8‡âR£¡$ÂÛïdk¢ÎÐ)0Ku7BÁˆÁ}Ž;×eðª.C'‚.C§&u:©uλ¦ Cgh)º©5@ÁZɰۅ'èv»˜8.:™3úG‚ :Îdèd½O=/=»yê1tÓ¿…ƒ [ê1t]`¸C'T§¹ºƒ= ê1°O)Çà…ƒ!ÇÐ_¢ÜÒ) Y†þšåζ®s.7n³åfCÀ-D.Ê`c€¢ ¼Ej2tËêM{Ôd8ßDŠ1tK­1†Î]„Ò½C¾Oy÷×–·×ÛÇõÖˆœM¹¥ã¿=þØžóÓø?þçÿñ=ytjCDo~Ñû~c¾‹]c¥qƒ_ô¶ÿìJSŽ‘ð¼×ó/PóùÓ[°-WýÇ?µöá4Ý÷þåsïÿèôçƒþ%¾šì½úÖ'pÖüÜÚ~üÇ_ÞkÁãš¿~Ð’-E®¯mßJÝë[|”†;0&î”×hÔ?üôÔùþKÿ©£áo?}yû—¿3èß¿ýëÛOÿüÃ?þôµ5•á§çmݶm´ð«kªßQÓá\ouOcþúšV¯éSæÊ{ŠþÓÍsVþÔ¹Rï´/zºã³>á.Lµ/ŸöqªsíñÛç>îÚÅcù”ž?j_Þ©ýSzþßu®]{þ÷ç‘u?ÒñŽ+oë˜FÖXcõÒ¢qÓ42>Yä›g+ÔR`1þkÔ2WÒÏJò¬e•TÚ·–Æ¢äu5Õ«ù½*b?|ëc~ßßJiÑ_knx÷:âŽþí×ÿòåçÿþëÏ¿üòËÏ_Ö¿þí¿êšƒß¾êôY¼™?îöX.­¢¼>ÏS‰š€},ÃÇÚ¢•-!8¼×³Æ;씆룚vŒÃçùUìEãxÏ2÷Ȉqÿ¬<ö×›FòZñ>þq¿¢ ‰Nß4–×Òd÷8¯q…ÿì»Dóæ“”ô.¼š‰ãJ?–rÅþd¤Ì+x åMP© coWôOÂê~,Q5˜7aÁ8–[?¶íŠZpœ4-UÃy¾ii?>n÷ ò’«¢ä.ú° ¯ã< Ù•öã¦B4e É1®®š;ðÎèZÖLÔiüü㲞ƒxÑq…-oºÍ9Œ£÷sGޔǯY¹òŽ«:+K„H>–«2’aÛÔ/¶ñ¬JçdÞe§_¬ëӬع½C›ãê€6›> iódTÚìÝÚ<•TÚÜvî*‚6‡Q]Ðf/ù¤ÍÓÏJ›©dl´ÙÛ ÚìÚì·Ú<]VióTRi³ß>hótŸJ›§’J›£ÔçmžJ m¶‚ Í5EÚlÝNÚFY˜’6O—UÚlï'iódl”cÓH-ÒfëYÒfëÒæé>•6O½§´ÙÚì]Ú<É”hcI¥ÍÇwõž†I›½ÇA›'£ÒfïTÐæ©N¥Ía”µiódTÚ<ŒÀE ÍnTÚ6ñàA›½ó@›'£âfïàfïàæÉ¨¸Ù»¸y*©¸y2*nöÞ<•7¿cì:1ËwêäÍíÌì÷ÌOH˜1³?Z`橤bæÛ—Pbf¿/`æÉ¨˜yjbæé>3OFÅÌ~+Š™½JÅÌ~Q`æ© bæ©FÅÌSIÅÌÞ³ÀÌ“Q1óT§rf&N0ÐÌÔ š§’ šÃ(km‚f¿Ï6½Ûßwð[ÕÝŒ8‡bǹv˜ˆóX2’§×εEï æºëƪÅ=l{Þ‰d÷¼«HÕ÷ ›Æ=[ä7ãžw«)îY;ÖãžmqÏŠ=ìùÉ=à“Ï»€ç›Tð¼@œ§:-â™7xF<3ÂÏÖLx&8FÄó-áó›g fÎϬÒ"žYÏvóòL°Žg‹²gȳ7!Ï׳G´³õ ¢g"<´×ílqùŒv¶‘Íhg;­ÁhçqiŒvÞyî„ÑÎ;Ϻ0ÚÙK"ÚyGˆE;ï cسÍagسMŠ {¶v2ìÙ{æ¼Ç¨ç¤,êÙˆzÞ±ûÅ ç»_ zv#‚ž­gõl³;£žw„T0êÙž¢ží3êÙú•±Ï¡‡Œ}îMŒØg/‰Øç¡ Œ}¶!ÂØg{Cû¼Óaì³_±ÏÞZD@÷w" »…#ºã¤ŸÅAÛ½3Ú^)ÆA{ˆ†î_ŒhèÎ]oFCó,ƒ¡ýÖ mÏ mWE,´·Uc¡­©†¶~e0ônqÀ†ž m#ƒÁÐÞC[÷0z·¨eC{ÉÍŽ¶ógœD·1 ú.Óé÷= ýçDÏ•ÐnÌô‡1BÍÁøç>…8‹;<á3Äy¥;üAü³}Oÿlo)ãŸmL þÇ8þÌÞ±ðgÞ¤…?»áÏÝB‘þ<Åæ}Xø3—E?óE´ègS‹~>‡©Å=sTYÜ3¿3gÜs'¾aܳ]qÏ\µYܳ÷܉h÷ìFÄ=wCƈ{îÆ°÷Üa#îÙ;qÏ^qÏ\×2R!îÙjdÜ3§ |î8:aÏ|ß,𙳷>Û»ÁÀgþ |ö’|¶!ÎÈg7"òÙÞ*F>›ñ }¶ 2ôÙ^(†>{I„>óËf¡Ïn¬ô‹õãe¡ÏœV-ôÙKntŒY¡Ïö<úì%úl%úláÏ6^úl3CŸ¯yŒAÏ6ï0èù.ƒpg{¼ wæç„áÎfC¸³ †;»±ÓFŸ0ÜÙ:º%¿Ú÷Rè;ßsÿþ›‚ž—›ÛÎzEà˜!hgÅ@Ð$Ñ4 ,´Í»† c¾@ÐËã·•^6Å?”<Û @¢g{’@ÏvÌèÙñƒ=Û‘] ç›±:3ù…¡g;6ò•X‚<û‘9çÚ)K¥õ¥JèCðlgíHž«ñì'y®ÄY$ÏÕÔ&Ë5¢á$Ï•±>$Ïõ|dÎ…dΕbdÎ^²1׈¢02g/¸1Ù‚:Éœ UÀœ ÃEÀœ y=™s5 0ç‚aAälÝMä\¸.#s. š'yö’ Ï…[$Ï…ò$ÏõüM<ìÂ×…À¹`Æ:sá•À¹0؇À¹p¡Nà\ñD✹î"q.<|Kâìu‚8îÿ8û}‚8Ê78®LIœ9ž€í¹;Û³ v¶¦’;{IpgërçB@îìw.@#ÎÙ$,@œy{Fœ3þ8› À™ÏÉ€³×àL—ß°3ÝzÃÎVò‰ÙJRçÌSí¤ÎÙô+@­»IŸÝps6%Æ4|º­m¸ÙFq³Ýh³ßÂÎT|¬´™-i³5´™­!l6X³= ²æ ¬`¬Ù9ãlª±fkP³uQ³=|¢æ M’f$Íù]s,‘0ÛDÂlo; ³u ³Ý 3oŒ€Ù¯ À|ÝÈrþHT##ºÍȲßȲeö0Àòó_gz½zªÍx²u©²u©²µh9S܆h9O2]'\ tlt{Ùžn/ï¤[n>yèdËfK>5|Ÿ’F’çosÖä™+W¦”!WN½œÅA:¬\¹îL+D® ™8båÚ>ÐÒ0¨i|™ Çø2—÷Æ— ¶ É—³…~+_6ç|9[´1sb„1s"N$fN\Ç’3'S÷gN&Ú Î¼‚‘3/&‰м¹4/„Í {'h^,ÜXA³eù"hžŒÍ W3Í–øŒ¸ÙrGn^¸`n¶”LÄÍ–Y‰¼™ùüŒ7O RÞ̬väÍÌ¢g¼Ùº¼y!"o¶ž%p^°¸"ožlâ`/\•7/ÆÆ›âoÐæå#Ú¼;/†¿ííybç…ÀØÙÀ΋i;/Ü4 v^(ˆ ꜸICꜸ1@êœLiÔ9ñÐ5©sâ9oRg›‘HÁ°³Ù€3CÙ‰½`bæklª;'ÓÉv¶!vö’ÕÒ_ÃììÆ•Î6ïØ9Qó‚ØÙ[ ììÆ~&ÁFSˆ“‰h$¦Á¦ °³—Ìôºñ%v¶‡Eøœ(Ê@øl}Cúì }N”C!}ö’ ÏnTúœÎMNpg+AîœxªˆÜÙz†ÜÙnḳ&Ï,Ùè‚.ƒ<ÛÀ"yN¦Ÿòl3ɳòœ\pCɳu)ɳµƒäÙº”äÙK‚<ÏÆ2Œ Ï6ç’?ÛœKþìþl/?ùsÂö8ðsÂ?Ðgk)ñ³3}dÜñ³ }d<|âg/Ùè#cn ~ö’Àϳq×éýúl6Ðg럓>Û3$}ö2 Ï¶@ }öË‚>»ô™^á³u á³\™V6 çÅ„?€žíñ=›«ý„§ žß‘Ü`à 7ãim~Gg#®¥X†™ß£ÌìÃÌnf~^m§ï‹{Úéúòb®¯|²å÷œk9SVdâH ·˜»«ÝG°ÌGm`ÙÚ ®<ÙÄÛ5ÛFgRÊÄÊ ÕMˆ•í>€•­û•½¸2›J®ìWö«VúºËk®Ì…‹qe¯`ÙÁ²_`Ùn`Ù¯ °lW=É²Ý ³—`¶$`ö’Ìn`v#³=+foÐFW*>Í6Z š½AÍV'@³iör@Îv—DÎ^Ò‘3‘ 9Šƒ9smÌÙæì s¶ž%sös¶ÁLèÌÞƒ:/\¿9I@ó·¥LI8SátÔ åE43-š™z{&¨í fÞ7²ÓüèÔÓð”†ÐÓ  ‘ jìÆÏUPƒ3Ô €Š)jì–dŠf2E ªÈ˜®†—,Ô¶cÉJm;µí\¥ÚvË+µí^SgÓŸ!uFfì§¶Ê€:¿c„¶‰Úv4fjÛQ6ºPÛŽÆJm;7BÛŽbÚv,¹RÛŽÆÚvÀõ ÎSÉÚvlO‡¶l Î^#¨ódÌÔ¶£Q±óT§rçÉX©mÇË6jÛѸRÛŽÆÚvn„¶;µíhìOm;ì.”¶cèq¢´ &*Û±`¦²…Êv¯£½ÊFa;l¶CÁ•ºv,¸Q×wÊÚ±`‡¬–ƒª³£ª³•KµcÁLQ;,Ô´ƒ­RÒÎlP´ƒ­Qж•zv°m”³ƒm§šÙ fÇ›¿ÄœA\Ì™Fs¦ÑÄœÝY;¼õsæ=PËÙl¢v°­Ô´³„´{™LÐm;í`ëгCаŠ8›)QÍÎl³ƒ-S˶B);Ø*•ì^Æ6Ï6èØÁ¶RÆî̸QÀîüy§t®Ô©\§¶C¯ÙÒ° ³A­ùª:ÍV¤0w6lkžlû« –²Íf3ù泉ý]Ùéîæw—½H‡Ÿ{Š=,ðpÏ_M¤Ì™®­Ùú«yÔE›ÏªL®EL­ù,b:Í·ÖÅòjât泈I3£ˆ)3?‹ÜšÌç¦Æ|ÿ _õü٘ϟ¡½LÅoJ/›Í”—õ\xù¬Ê$—ÏŸMlùüÙd–ÏŸM`ùþYÝÏS©¢Êç¯&§|þlBÊçÏ&¡|þ\é`Þ?õ<6Áäóg“J>6‘ä[wb™§³IùüÙ$‘Ÿ?_bÈçßMùüÙïŸá ž?›èñù³ÉŸ?›Ðñù³Iß?÷yŽšdÏŸMÐøüÙ¤ŒŸ?›ˆñù+å‹ï_éâ=6ÉâóçÂdHªW™«6“*>/g"ÅçÏ&O|þlÂÄçÏ&I|þlbÄ÷Ï}ž….âû!>6ùáóg¾†ovþlbÃçÏ&3|þlÃçÏ&-|þl¢Â*ãšÂgS>5¨#|þJáóWÓ¾îïÌ=®|þlJÁïi3ØQ"ø=eàço&Œkìï¹S“°q!`Øü§ïã•P8R ¸xŽ•íºnÐrCð¿AËfвZ‚8@ˬŸRƒ– ±ø*ëj •¥8ß$Å@¥ „Ên&v¬¡²*¹ •mÐ$¶PY; ÎPYžî±PÙl *Ë [„ÊZD# ¥ÇmZ–N9@Ë1U jвìÔÓ´ó=ZÆ\剴?Cô‘¡²â¤ •­Ü´—ÅÃb¨lãM0T¶ZV:…–1Oë©@†Ê6†-0T¶EE¨l%af¨lå‰h†ÊV‹ÝÕPÙù]e–ѱ€nÊ,€%Ì2z@[fé̲Tx0Ë0ꎘå0>bÐÊèoñß+¥ˆbÊñ3ŽÈS–Êq`Êaddí)ýžÀ)¾ä”¥ò]¨Œ§ •ó@ˆ8@e<„Ý*¨ô»©ôaR9ݧ¢Ê鲊*}H€UŽË0Qž°Ê°éˆPXmÓMÀʲRé´r”dl¨ÒÊPßU\]å•ñ>Aé@eÔ ±`%–¥Y˜«"Ëq+;³,Í2Û)´Œ:M,¸ëTŒnR˘/TÒ@¡¥Ïm€–ñÀ_Ê2ÄPP®BËQR¤À,ËÆ‘h9®ŠhMP˘߻–Tlé#Ü2ž†ÅÎÞnó¨òê —ñ¨#,è2†+4„]z[/ããXiì:3m›âKŸÃÁ/£N^0Ç ‹¸4Ì鲊0ýIƒaÆwC?« ™þ€gú» éŸò š^ædT¦9•lúSÚŒ^“ ´é3Øfº¬¿!Š7¿Šâ}·1ßÅ⿱ҸÁ/zÛz¥õH"÷¼ÙëoEÉÀŸÞ†=D`Î&<ÿò©-¨ãÓµ]M¸þö¹½ðèûç8ÿ– ;ñ9OâlÂã/ŸÛ‚gߟïÁóoïµáqÕ_?hË^[Ð…ÜÆ¸½Åkø cúJyfýÃOO­ò¿¤ñŸ:šþöÓ—·ù»1yÿýÛ¿¾ýôÏ?üãO_[Óv쎌ÃXÍ|}Mõ;jêm8u|ÜÇgõëkZ½¦O™;ï)ûO6ÏYúsçN½SÑ$þ¼ÏE¸Sí˧}¬ê\{üöÉ[ŽžNϵ/ïÔþ)=Œï:×®=ÿóHÐÇð×Sã:¾6[/-7M#Ë[{Ì"ß>[i-Þúø¯QË\I?+ɰ–5Åvü¾µ4†ãëjªWó{U{±ûXÍŒ:J„$lk ‹îuÄÿ5üÛ¯ÿþåËÏÿýןùå—Ÿ¿¬Ýÿëoh~ûºhãÍK»ö\®Ë½>žfж‚ã–%2´;I²b³²4äÒ%z‰íÿ±Zx+¬ _a„‚¾Érlÿ·ûX Kbû;=%vÿœ*O».%bš¬n5}D÷VÏ5ê‚Õ[‰€k¥™wp,0îý±b<à½z+I÷úr7øqÙêsõ–+Ö}a¼ÑXN8XÒXíîíÉ"—´ xMLQ[Rðüö\¨ËèîRIÊóuÁÃ8ÞÒǪ9lÜ–¸Ö¾£·0€" `  Çl˜¨îTr¦OÊpôR¬<–d=~¿aÝÂÝÌ’ƒ½Ôë³fPr¼œ»ù%G"ŽúÆ —Ñe<Ò{[aÉ8LÆá{=‡ñ’€ÓJ–x‹˜c^øuN,Y’ …QÉ`£sLgã¶ueŸ¥É¬6L-v«Ÿ“qŒ ½¿±b¼^²‰º7Æá#=ÇðÎÜy¥Éæu”Ôð¨ÒbCªÝc“G“ ©i(ÆqîÖžc˜I.ÃvítuF_þmmçFòãá#ÝH0Æ7žâðËö(£ª±eÖF|lÖ€ÍõÃB¼ÊZï=Œé&Öˆ_¨Ï1ì¶õžýI­ãó¶ÔsûÍïwÏ7²©èœwæ*sñýÃøˆ[|žñÎ\r£?îWvz~[/×9GK„¥‡ñÚN #¾F[d€9'âÉXï]ºmøh_5·s"Þ[¶õÞŽ'¥Z€aÜ×s"öqºmwdA”Äçv#l«÷ fkïävÇtª}0^ëe¿&âÜ.îk5Ò¡Œybí×DÌÌ×Oxµ3íûp®Ç$tÍÛÆ”]â4cÇ—x{º¦á•¡ýŽ{: êÛ4þÞò5 3çX‰îÏ×4ÜøÅŒN,×4ÌÜpa¼¶™¢Î§w»^3°53¦³vÍÀ ¥‡*ä57¾‡=ß»âñÒâ0öÓ+Ž¡ˆvvÙ=¾Ïú±ìñ9«·7Çwø%×ÎD”ÔÒ‡§¶_3q¡›ÞeYsQ2öS¯™¸aã«tÙOû„Ø#sí93ãÞøã½OFu|ëá,ž3q_0üÃx€SÜcßcxŸk8£Wt:©Ãe½ŠŽ.“\ÃË=ýá¨H_ÐÑô{÷íp£ä ãþôŠÃ[Ô] :¼îëÍ £›:ÖoõôŠÇJc]dV>‹*Çì²Ê,¬j¯XÿÖÔ Á—W<:[Ejõºx¾òÙ«)\sñŠíò::×vàñêÈ` ãÞe.ÆUë½Ë||1dq,§l—Oì£f,ß®†Ùnç5¯ˆ‰c¿}bæ¬@nŸ˜³Ê—£¶Çíå8‰Pïá«KôšåÁ¹ïÆ~;à ŽÖIï÷†©¹æØfTgX‡E–xãxõÃVcY};à ‹ˆ0öËfƪ:oÛí s« p;ÃÌâX#@a |ªY" ÃáUaé0öÛæñŽñßûïÇeå›QÇâm½½a&‚«cñVoŸ˜š£5XÇåïsq-/tÌnâÕ#,érŠ3G”Æ»ßT}(p{Å;–/µÈ7Û}†z\s13-U ˆ­h혿.¯˜)jÄÜ^qB w€€Û+ÎllDÜn1…ø£Ù¯XîÈ8_£ÔÈ z»Å\ôÔ8t»ÅÔ˪‘õr‹¡:¦~{ÅTèª p»ÅÔÙª p»Å bÍj„ÜnqÂÒ§j¦Û0jgk@4™™ÑµÀÿȰFVã*ŽÅÆul!ôEÏ*|ßAÃ3Éóï/4&-á£óÞYáz L:‚п Œ^ —¢€Ñk>Y0t¬quÝ çÅ"†I7]i‚CǺZ1 8ôšôCGо’`heø¿9†^.)¡cô<~Uþ¼.˜ÝÉŸ5÷oΟ‡Üü¹uœµ#–1þ›ãçкS\ ø¹u"àç8"䫸y.ŸCAO—)'~>h‹B_îǹtÒçxÁu™úÜv’+eÐÆ­0èFa=2èÉhç”Au•A‡kTeЇQwh”AF™…Á Ã¨5ôl̦8!hGtØ4Š:Œà¬Š ß3B™FÝ è8Ȭ®ôaÔQ§ú0*k¶0]ôdT}d/Òáªz6>b¸¦ŸAFE)ç­7¢EЇQçAÐsAAгqSY‡ Ë& èèóq…úÛð«ú°¿*{>N© UôüŽM$|Ì&øy²AìÉlBŸQÜTxÂÆ’âç¹äªz#6D?#M?' ª2n¼ñó4D?#JÑ–âç[äÙ_û'xž~†>Æ6Aé‹;#É×oÆç’7wžm«êÞØØTð<wÕ=šxö«ÜÙ‡ŒÒçÉ–Tʃ €Ï‡Q1™Â繤Àç0ÂaPø|õã¦ðy.¹ª>Ól¼àsÁ'>%$*|>Jª‡&ðy²)|>Œºõ¦ðù0ê×Váól‡›€;¤M½¤Ð磤Âu¡Ï‡M?ûJŸgã¦â(núF¸Ø;$ Ý( ú0ªG¬ z6 ƒ>Œêk*ƒ~Ïx1èÙ±3k­2è¹dS7 ƒö·Yôd=…àtˆ5­80ªÚm ЇQ…è÷Œ"ÇæÆ‡>~×Õ®rè¹ph¿Cpè¹äÍ¡ýu‡žÂ¡gã®âaœÀ¡ç’]…ãøV‚Cûû =DÏ—-ª‡Ä— ÚßYèÙ($z¾ìª’g|-A¢ýý‰žK ‰žKªž·T = ÒþVH_àÑÏ7 z® ½n= bѳM@ôlÜT~Ñ«=½ýö®*ˆžŒ ¢g£€èÙ˜U@nÅâ z. ¡À§ó¢çË6Õˆô’¢£nï(ˆž¢ß3Šò™o6aàÐS9åг1«ª¡]õ¡ç2Eµ,é<COlªlº>q™òç©ÄªÊlw !ÏS‘]å÷î"‚œ½ˆg)¢¬ùøYÀ¸¢f-"Y.ª«jWº¬EªÊÃÝ? QžncU-À»ˆ äéUW”üž±ß¾®UVÞfEÉÓ‚8ö©¤ d[±ðnßw ð›5ëÈ”C¾å“sœ³­•çÜ” h9äd4Òh¹eR4æÆ}{æÀ ÏȼsÕÀ\¹½FÀâ>àÄJ˜«…Ý€0Wné‘0ŸU!Ù§Å7‚/W‹r_®Í²òåɨ|¹fÃÖÊ—™\ܳ%Ñ`®ÜÎ#`¶4HÌ­ÀÒzxtfË÷LÀ|AEÖJ˜-2 saø£fÓñ#`<‰mÌ¥s‰ À_E܈æ²[´æaìÆoÀ<ŒÏý å`¬:¤€–MŒh¹ì:±-O-[‚i¢e˲M´ì ´ÏAÓsUƒ+Yöç¯d9)¿ nö‹,gçå&–ã_"N9Ÿ¹Éu \ÙÒ3‘+[:kreË ®ì5*VžÊmLÅÆÛS¬<•´lÖ¶Wöî^ö:Á—-Q<³>™ßœ0ûûIJw ËÀ˜C0O73À˜‡’`Ì1þt—Œ9. :­ŒÙßW0æCÁOé´0æ(¨»`Ìö.1Çë ,  9® 0ü$Í÷lÈ<]K!sL_ú|™'£@fŸ÷™Ãˆ l…ÌÑo€Ì>Û2;fÊó;¢t3û<Î\:y98³ÏàÌþ‰g. ã gŽOœ~‹Á™£N6àÌaÔa Î<µV9óÔ åÌS™ýËÎìU‚3{ׂ3Ç7@ßœgž~UÎì8s([šñæÌ–'œ™y‰™ý3û°fžJBsÒº˜ÙÀÌÞ?ÀÌ“Q1³·˜y* m9{ÆÀÌþL€™ý;Ìì=Îì @ótYËËŠðÍV ÙÜ\‚fkAsL>Š'OÐÌcžäÌ!,­¤àÌecè8sxY¡/Îâ£JÁ™C‘!ÖÊ™£=°ÎlOrf~Ɉ™c‰ X ˜ÙÄÌu±¨eÅÌö f~Ǹ¿r“‰™Íõ f£ât`fû63sACÊCæü&¶W‹tzLAÀÈž„k)Êp¬Ä”áxJ›â/¡tÌê`Ý ¥M‘tz|,°ÜŽ< ?8=l œ.¿­lz’Q6Æ—Úeá²VÙtY@u¦Ë¾šÎŒ¶™ÎQ}’éñûkåì’ Ó‹iy(™ÎýD,€ÒÒ(-œPú i'^G½pF/Ü7 ŒNz $Í*I¤1(‰ôu$0:Œ¦B«ÁèÌ]*ÂèLnH u_²hË»a0º‘ªw¤ÖÁ‹Ž”/£œËÊqmùxNíµF»,h´ÉÈ’F[‚Ðh'Ž6åZâèÒHT€£#9„àG[.âhKrDíým4Ú2üfŠ£Ñ«ÅƒF'RwÒèÄ-5ÒèÅÂA£M‡4:qã4º0Ö4º¼ÜpycÃÑ”+7¤7NÀ¤mÎ&“žU7¦Þ"Š®&ÉmOˆ(šÒÿ†¢+NŠ6'(Ú¿ŸDÑöµ&ŠöE#™‹‘h¦!‰öO6Ht%^,qè/@4ß6ÑÕ¤ZñóbÄøyñfÅÏËj²ŠŸGkÁ;€ŸÃp­øÙç$àçèD„ ~Ž«"¬[ñó0’+~ö’ÀÏ‹Å_ŸüyaùsN?/L>Jí]®z±0I蔈ɕ?{þðçÅ¢AÁŸ‹éö*øs²³;àÏÃHP¬ü9Ù9"ðçD?òça$EVþ Ò øs\VÇ+øó;Æ›?§DÊ þl¾ ð³;9ŠŸ=Uðs #D0+~ö<ÀÏ6Fô9UŠVúìÉ.@ŸÃ-Ó™ôy·9:Ù‘pçdç=À™Õ̸s¢ö ¹ó0bZ€N· ™q¡²¬:Y88´=xðç(¨ƒü9Œº)þ<5Vùó0jj{òça„—þ£ Ê  mÌ€?'J‚‚?[s '£èdÚ¦ÐVå Ó2@§ü€Ž•‡`-ðçÄLàϱšy‡<§Âµ/Ès2ZçaD€Ès2éMçÄì$ÏÉ”vAžõÚ òì6ÏñðZÏSsVî<]U¹³Í—Ïq“ M@žÇeAzAžã²R¨äù~Ä`ÎÑL„0+sN®z¬ÌÙŸÅ“9'¤ !röÆ9ûørŽìÜ:dØsܼ y(z¶×¢>ü@ûSŽ«¾åS<6jA Ç¤5˜ Ð 9q¾—@/ßG ×;:ò=9£ÇJ÷r €•j+O9SceTÕ8VùPí²xУ$8´ Í@»ò´É¯@›– ´‰A_â3„Ïò³rgÊØ˜Ò³Ù ôl½I¥go¼*={A(=¯†Q¡ôlÂ|Tzvã©ôl‡JÏ&±H©g”z6YGH=›ª#¥žM™’RϦ”I©çíþ<•„Ô³)õ¼M‘Ñ"õìFH=ûe!õl*¤Ôzv#´ž½Nh=ÏFÑzv£i=ƒ%ºÖ3¦õŒÐ}Ózv´ž×—üyÒI6­gJ!_ZÏ”O6­g7Bë™FÓz¦‘ZÏ´¹Ö3nдžÝ­gMë™]cbÏd¦ùL£‰>»ªÏ¨ÓeŸi4Ýg!üìF~v£?›È± ?“š›ð3Kšð³‡:«ðóë0èItÂÏÖÊ?{ÉSÿÙ ™4 }‹ôöŠCO!½3ß ÝèÙh¶Ç ©YaÐÔ¬0 hÊR˜´¡M£i@ÓhÐ4šôkmŽwŒÐ€~­Í1•4 hª—Pš*¦ýZ›c2š4/{j@óWÓ€¦Ñ4 Ý h©M›i@¿Ì@8MÚЀ¦Ñ4 ÷—@z2š´ëo@úµ8‡w‚k@ÓhÐûK"=Múµ8Çd„ ´_*ÐnThÚLš™`)íF•…ö«BÚP‡v#ä¡Ý‘h3R%z6öüjj¦X´!íFG»Êѳ±«ÓL#T¤Ýi7BLÚ“ž]f !ˆI›bÒvUŠI»ÑĤyÙòr÷ÆÂ÷që?¨&yîN<;ãëÕÔ›¯ c’¯¡/õ::CÁ¯ëö¿®+#¨&m-T“¶ÜØT“NêT“6©R w˜º…;Lè’Â;ƒ^(ܱ} ܱYþDwX(!•;ì|<•;še„r‡…`B¹ã<}`¢Ô[Åö“Æ ØãŠ.=)vÐSÝ ¨I_:Të(àëPNAàµjvdË™!éŒÍYÓìxB{(vdË´ÉŽÄáEÉ3ºdbª(Ù‘xÊÙ5;pL–šÉ"˜¡Ü‘·MÍŽDøCͯ š¦ŽBÍŽ…C¢ ‡:E;,ï'T;î[Pµc±f¨v,¦[¡¼z<8 ‹SµÃZ^ªÆnWçgÅÔáî¾JIèj7ÀÔ1¬´Ÿ©ß1Þ˜:¦F„ÊSGc_bjï.`êwŒ7¦Î–˜:[¶S`ê¼3~˜:ï“ Ç©3ó¡‚RgK¤ J-ç)(u¶ä¤ ÔÏǯpzº9…ÓQ„NgKf 8=ŸpÚÛ8-ë*àt¶D¦€ÓÏS&-+˜t¶=t0éɨL:ß¹•FOµ)Ζ,4Úï 4:JB\Cit4E>‡€Ñ™IÓÀ¢³Ñz°èl¹[Á¢£­ÈP?ÊG˜mç ,:Ä¢³eLU-Ë!Xt¶=°hoXt¶,ží ‹ÎNp•Eç3‘* ´?…ÐÙs)*„öŽ„žŒ ¡§þR=u‰Bh“Dƒv›"èlY­ ÃˆdEÐÙv ³mÚAû]AgË} -Ä:Û¶"ôT§"èìY AgKÑ=AgËŠmwr!èìAgK#¹cMm½ W“ìPÍyŠzºA%ÐÙRð@gKm=‘žh¯:37t¶„bÐþ í]'€žJ*€Î ö"€ŽnGŒ³èé²  3cÁ ýi*€Ž‚Š# ½=ÐùÊìô7øŽ4ôT¢ç©Œ¢çaD(+ÐótYAϺZûÍÑóø ¤"€ž'£¢çhÊ=O=¦è9h@žïçæÍDœ³2g¿A0gK`ιñ9KK6O-QØì×SÖ<µRX³i°f2`Í1, ݬ¬93lòbÍÞ'`ÍQêö‚5|ÍšýÁ*kŽ;T•°æÌ8L²æÉ¨¬yêeÍÑã/Y³_¬9ž#´;”5’8köáÖì ¬9.«òÔŠšÃ¦Ÿ> fŸo€šmD‚4û“iö‚ š}(+h¶åA³­¯š¹%gön]‹÷æ÷iBÿúÃòöO?¤6Þ»u´aü´Æ ò0ñÇö¸Åø?þçÿñ=D{<èþ†@ìü*ûÝÆ|FÿÆJã¿èmF¥Ï;}ü±è û§×>ûUÿó/ŸÚ‚óñ^Oúsïÿ~äÇyœõszÿ¬ÿñ—ÏmÁ9ÎÏ!ÿ^íëýúA+Æ ÚÚùø¶½ÅÜ9<ëñUKyýÃOOåù¿¤·¿´Ñæ·Ÿ¾¼ýËß-ÿö¯o?ýóÿøÓWV3æéؼHiO¹çß©gÔrV4ÚõíUµŸ”4¾eûúªªWõiÇçÌVö¶þo˜"›ìb~ÚNØTûòiTkß>ùqËÄü9=Ô¾¼Sû§ôü1¾ë\»öüWLuNdÚÂ_-cYc²—­›æ‘å­}çÔÈj†—8Oã¿bôLµô³–üG«‰ãy[Ë©4Fäëzª×ó{u챯8ºi,°JìåmÍÃ'î^GüÏQÿýúï_¾üüßýù—_~ùùËßþú·ÿúÛš†ß¾î¸ ÍF’ëãx]7&áÔÓÒþ:“IË‹ªÇñLUËYâ»Z.°d,ýZ™¦Öy^¤‹:ñìhŒ®Bš.gZ0uY¿µØW=kÛHJ;´ðŸkø¶íX3¶Ø>QiÛ2–S-”#O$Ú¶­˜'õlëu‚¦ öÚ:†ëhPI[}û½—Æk]ÆHv>¤¶ ÄVE#`T…ýÆ1°î}ö¶¦›åÆõÞPo$ëÍ;²'-lcÕúØÖj±Ýz¢ÀÖÿ´Ý’ã§ã…X÷ühÝxI®Ý컟ڗíä˜ËÈà‰¿(uŽ|î?G³t[-îþÚbžŒc­{í"GÍÆ|o%züÚ Ž’ŠÝâY]Û½q·ºÏÓÖíÞ+ŒnÒó™1>®¨0â©n²‰F…1ì®]Øx"º=ÖÆ'áÚh=c»7Tã²JšÚ¶Þ{¦Ãøøm¿÷E[cjÍjg»ŽvˆWîz‘[eŽà1YÝSRñ0Æ›|mUÆ8Ô=ζ¯÷vds–·hLÐ׎cUYe¼÷¦bŒ}ÝÄkýÎ?4¾£¦]TX¨í¹8ª”Çn]ò¯RÖ"Ûµ•í(°õ{·.ºT9îìÞ‹*õÞcV½6Ý¢Ku”††Þµ±uÖMK¶{„ñš1£ì¥ÅÍ+çõ±kë*nS7ïi©k®g¼Ô¡LsM½Þ”Øú¸¦ÞJYñCôšzí!Å!ýkéUæ«wàÚTŠQ¬!N¡tz}Èâ²ú®9ÝßÇx³Ž¯ÚõÅ­h´e0ºk‹g6®÷.NÔùì› ÿפkïÌ#tç¿/öòtÎÁþ Tù>*Ó—íÐ~¼º1‘{×¾H”YtLH°àñ~ÊÜtø ×ß&áöxm`ŸíÍà×\)1¡–×·+Œ×h>Œ¢º=£[ÞyÁí¸ÝĉÀÐ ]ÆgVh¤Ž»>¦qY|XÇÌZï9˜©^Öñ‘¸@ÿa? „. Ô©­mÛÍì£Î&ÆÞ½¾ ­2n,ÂjÛ=3¥Õ¼öžŒkþávOÆ “Õª¾HÔ‰Q·®7?êDÉýFÝGk%ÆoLÄ7Ξ:aK²Ž‚¸®x˜˜èXÄ|_„ó™ˆð÷›ÍÏ™n÷³G·ÙÁ^‘ƒŽþõ&:ûÑ¿žyaˆ0¾)ê_¯Ç¶—–ÿÚC[à`ÇTˆ¼:Ø.ËûÖö£kÝWóØ«ª3Õ]ë^0¯ÐµŸr¬=àZï·'«NõXXèêêÅtª÷AOtª7&'¥SËyøð¬7ó'ÔÅÞ˜a”.öʰ¦ÓÅÞøá¦‹½vìDÂÅ^¹YL{Ì H{íht±m{åN;]ì5ŸÎuÄn²† s½#lƒÎõ¸ nÎu£:k_Oѹ^¹ˆ s½áƒj^våZ^vÛî}ò²Ûb¾²zÙZTô²["€—Ý(#i^6“ ÓËn”Ù£—힯zÙñé•Yÿô²7ûÕ¼lÕ²iZ<xàS÷‰~دÅù„§!ë#EÝj]Q:òž 8ÿü©õ¯)fßgο|f žÝþxò×_¶O”¨~ôû£çŸ?µþG·?pþå|…¨ô¾©~Ì`£)Çù€C¤u¸UÛÿ´|uNáËŒ sÛÇf¿®¦I½ú+j:’lïË^öíëkZYÓÿñ¿Ò[‰ñyüé?¾åKimó±R/Ûø~·\eMŸ÷uÚó\wþ¬/ã:Õ?}êc–ϧôùQwžëþŒ>?õ:Õ}÷ù1¡Ô±ÎÙ¿vB9VËû2œÓ²E¤m€Ê½nû-ÿ“BÝVMäÌMK­ëX3ÿ*u³žÚã´úèðQÏGŠàõ«f忌¹öø¿¼½ýEê”Öè?9RܪÎ}¬òsášÖ}jJÿÿüDüÓÿú¿ÿŸÿ÷ÿ›Ú£õwz z—Ü÷}Üàèáx ±n¯Sµ®#þßñõ“tÄ£Ø ž@‘û&1·>ŸmIccs 03Ê›|Ûצ+ÇüÔ­¥íÛÞQ–#T:G¹Q,änn¦–á¨(6VÙ{†ˆaœ«ÈÚbj-zd%WÙÞ—®h5‡Ô™Ãr_žÊ€9¨îXz/e/n‡‚GØ"O{ocìn½èb:ºWQn]ß¶GsH-Gj§ÑÌQ=T8—:Fâ¶#éWŽ#w[üóM@xÖCu[ Ê{k,‡8õƒD2n6b}âNFó߆÷¥è'G˜Kd,uÜÖö$ù©x~4¶Ì{%ô±6{PäP–5Øø:Ëõ;){\S¨A†så½€¾Ì9^ÔððCeFî7¢´Î[Üb1~㋺~gzʨJNå8 ­ÏrTµk¤Nmçßæ iÉc(ǰŽñ¹­«âë0ENö1Úãq.Š<òø§×JÛ ´žÇ¶'é×lŠ-Æ]Ý›(Û ]Û¼F^îúæaÖ¶#f<…a[Täj ¹#-ûc”®»*pæP– DzŒÒ#[ò=‚C¿úú’Žn‘¨Â¼•ÜmëJ[½³G}²D Û¶õÉÆTŽ'}&¤<ÚyóÆöínuŒ=Ïû~çckîz ²Måtœô8¯ÐÎ1 ­¿°«}ŽÙ†±Ð/Û9³.ºgš{³MŽÙÞs¿£Ýcõ‰ölÔ§ß¡›©õ³²/8¦Ð{c,î]¿}=Ò°_cVâÿÆšôÇHÂü³ü¨ôØ='ÚÒ{a‹Ý´çD‹3§£Åw.v{ ãÎn¬×,jKG&ö("¯@>Û5Q gVGCØ®<ìÇåî'P–rìÌ=mÒý÷²Ôq™k¢ÝñÑý1ºð1\ÑM%¾ÐË9Å*(›ÈÇ”·Ò¶§kŠMú6–e¿ÿi”“)½ÞÂ5ÅB4¡¤˜á¯)¶è>]IéHÃþ®8z¶Ø|N±ˆm/)¶œ¯)Šb%•û#הͮg»¦Xhc”$ç”íÕ Û•‚=îo=¶ŸÃâ•cb³ÍsŠíÖ-½ÎÇpí8#]R?Ò¯_~vu’syÑÙÓÃiÌ6íö š?¹›ÝsŽ=q™aeߨdyÇÌ|ïé†-ò¯?ë¦ïÓX¢“ê5X%ÊdL#ã {ΰÃ/‘Œ3_3,&ôɤË5Ãî:Ë„íªÁ> e8¿Wæõãs¤· ÛaQN_û2&Çvΰrœ”’wë1dǧ^¢ÃÂÖ/ß5\ zÇ.á9ÃFWËkpì^3,”Šž:úL^óXy^N슰]›¸>bûrbù™.qÒáâ8 ZâlÅåÊÒß+±¯¯®¬8`%gÞ®ì¢îjÑó¹ÇP—fFdC~¸¤²—TªxëáÊŠKZb)tº²£“ðTCóáteÃ&»ÈaÛ/WvƒÒL‰–Ó•ÝŸSBbâre#žãv J•ì¸3}ïjÄ.œólø™ò‚WyQN\†r0Ÿóì¶è¾l9TBÎy6Ú"µ¥#ÃúsÐ&ue#9×r¹²Ñì-¢!Ä••h·¢¹¦óúX]]ÙÕ'~ÞKDk]®ì¶`««+·úbò»Ô·›ãÏ•WÑUY‰@¼Ó•µ’ÞÂ~8õÏõWV· ¢8—+ ¥ñ²Š¨mt™Î9¡òv¹²ãßãçÆBt“þÛLçÒ‘w¤SQýÁÃ&Ÿ±C#èœbãи¶­wÿ­‹Æ`”ði/'Öš²÷Ï).%fÎˉ@¾1 \N¬½ŒÃvåRß:âÍŠV6f¶ÈRzM±‘ÛC-þ­úiŒ§{9±± »w½Kh^N¬—sÁåÄÚ¤±‰¼–¿U[Ä¥œS¬ ŸM)Å5eQVb4]NìVán¡0qÒ‚‚™9¤ª.'Ö¾<[?ž£µèQ¶ËèˉÝ>«„‚ëåÄn鼿FõöÚ÷c]uMTÁ>$Ö­ŒuÕ•BýXœËØëª|¹¯1~d¸î±#“«Ž»=tãÎÉ5c’Ü×#}úsr=Þ󱜺"¢ ÔùÙ#²æšS+FÆXÑ\î\”ÓìA7לÚк±JÚ.ßuØtbééÈ—þ¥ NZÏǧà9J+G:qΩÃ&1Ra»|¿¿^ïdéû’1wQü ›(}½s¥€*};B6®Qªß¶Úçœ 0YÆ»¼]¾ëžð‰êýH”þ¥˜Â úCø~ ëÇL¯9Ž­‚ ÙäþÔd5„¹qçcŸ6Æ +@˜;^W]¹æn8;˜»áDaî1¼€¹‡èòy8‡,w ±ëüYî ÙQݰe¹¤Ý=> ÷b@7(¶|!tÂ-ØAÉî¾"-%ÈîkQáEJv÷”¼¨d7Ê)Q²»78Å »{èkʃ¾»·Îê„ïî Ë?ðÝ©:á»a[ðݽ=ójœdwã¯÷cQ°&%ƒ v÷Ó–Q `7êÐQ `7l Çìî-é `7ŠItÀîQŒ»qM…A vãd °»7Ð€ÝØ;Q6­`7Ú"›»aÓÇ¢`7êÓQ®`÷xd×9RpÝ(6ݸg<Ð=ê’¥@7úC¾, tã’UMt£˜ å¹Q›bOå¹QL—òÜ£÷åi?#½¢}2—)Ï ›¾íÊsãjºÝ¡<÷ê@¹q1qöäw¤üW@îq­ë\ 8îtEá¸QL'?å¸> •ãÆ5E+Ç=3ïÅqa/X\9n¼ñà Ž³ ¨pÜchËl ÷xl»´S8îQNY­pܰ‰O Ž÷n|sÜ(&` ×gå¸Ç­ NWŽ{\Süå¸Ç¯\óÁqÊ¢@9îQDØ‘rÜ㮕ñ ÇkÞ'fqŠo-0צGÀÜãÕPð*íêâ¼*ÌZ)0÷¸¦8¶ s;€§0÷ëâH+Ì=îA–J s£À«À\ë1e¹ÇL—²\¿ue¹Ç[.þ°²Ü¸æÿßÛÛìj®[W×ýs»™_ýSBN€$@Ú§n8±ø=†¾|\z~4ÆTíãª2\€ïó¬’D‘E NαÜx$Ér«ë ƒåf÷"Ë'K,÷•ß/ ¡Ü aÍû)cõà*”/J¡Üû‚›L‚[·ƒ”~$¸1„‹àžèC‚¯-Üóœ¨YÜlÜóޝ׏î-‚¯Ül-ÜxQŠàfËàÆX.‚{»î³ÕnÏ×F€Û³[“Üžýì†l³½ˆlólD¶1 z!Û¬"Û˜GÙ®«#²½Çök’êµ# ÛQHl³Il³cØf&±õäJÀ6Û—À6ÇRÛ, m\޼ö÷@ÀÈk£Rˆk3Zëù’`m¾³kÏ7ëõ1EXƒYm|¯ˆÕæSDV›o3²ÚîŸrüdzEP›ý‰ 6›‘ v_•0X 6ß_µç)¯o,ÚóqÁÒ'Aíùb@ÍÔÞŽ¨Í×%AmÎjóöjÏAÌLjcV,P{¶ ¯P[çäò!Aí­,µ×GD›5BD› DÓv!Ú|]Ñæåˆhó8"Úø|Úì#µù†#¨­Óa -P›op‚Ú쮵Yõµùˆ‘ØžÝSlÛ³x× ¶1JØž•|é¤lÏo8‚PÛ¬1Û«÷ÕÆ¡Pm6µPíº:FT[“«k¬%©ÍQ†¤öVø‹Ô櫉¤6P‚H­«Øjp¿¯jcrêÉ}•“õ0ä·°ãºÉoÑ;ËosRÛ>’‹æ‚ØnZk5±]©ŒjãƒW¬Ös-³ÚUî®fµ‹6ÝÖž›g’Ò¾$ží‡I‹K<»È¶Õx¶Ò:€Ï.ÍL”xviåÏ.²l5ž]Ú§xvi&Ìij‹ÜQgí­7ž]äfk<[Å$¯|BÚÚ¹ T/J»ÈÌÄ”vÙŒ8Ii+44¡¢´§ïÆU#¤´ÕWeDi—U]^”vi&ª¤´•¤œP”vÑNzSÚENy¦´K3Q%¥]6!KBÚEö†´Ñ ‚´•ëýR”6nO°6ÏIX»ÈZͰv‘ï¢`mù²`uC°6ú³hí"#LÓÚE†•¦µ‹L­^´v‘}˜imCѬ]”÷Þ°v‘“¯™í™ÂúݤD¶‹ÐŒlyá™Ù.r1³V³]vËŽÉl+†å1ÛEþ“f¶ñ¬ŠÙÖ(KNJf»>ŽÌ6Ê)fë²=]€€z‰l—]í"[[!ÛxÙÞc²­¾‚G\ÈÖÅ4²­¢z>‘­_;F¶îãF¶‹ aŒlkÂ$IÌvy:¿ˆÖ^?’Óº›Ó.ãCÚE^¹´‹R’ÐÖqø| s ÐFýÐú!3 =“F FB»´OÕ¶·r’Ðf9Ihyð˜Ð.ò_¡õ Á„6‹IB[Uï ZÓF´‹òþ¾í òÑÄ,&´ÒæÕ.²+Të/6¡Z­€Õz 0ª-´e!î…j¥0ªÍsÕFL¨Öï £ÚE~ÈFµ5éF• ÕÖ[ƒ‡Pí"Gp£ÚEÉ?Œjk&yµžHí²ZKLd»È$YÈÖ/ #ÛŒÜúÅfpëW³Áí¢C·1î[}ë²X}{P¶mñ­ÆLko’ôÖÕ¥·j-À˜¸qsàZ¸°×Mn®›Ü\Õ‰õ·nº·þÖ¯%ëom´aý­oÌú[—Bò[×±ä·ÏWˆu·qén#&Ýíëdܺ¡¬¸uO³âÖ/+nu¯ܪ«Yuë£$ºuÓ‹ÖöA†kÅ¢µÃ"ŽU·­ºõp+Õm&Õ­‡M«nu\Èn5á Ùíq—Ý­ì¶D·ãWæ€T;_5zCµ£X‰P­×Œjg; ÕöÙ"-`…jGË8Elû'̧~·ý³høLw[f<ŽÌv?Õü"±×í¡×µåÒ "P;ÎÚy-PÛcÄ„µõ­GÉ.Aí¨ä7µõIJrJP;º;pÚa×fqÚ¡éÆ‰i뛚—˜vØm*KLÛc²Eèßí'ùì°’Õ Ï›l„g{Ñ>ó»­ã( $žLÙLg{A(\£í1’21Ú~JI i‡Y|\¶—Rú^RÚ“E)­oA6*L6nA¶Ú€ºYBÚÞ–äz‚´ƒÒ)ÒŽƒµ¸„´ã`—\@Úx.iÇI‹0‚´1€ÒV3Ô¦ í¸|Áí¶~¥± í¨ µ=$M/Iíx˜â’ÔŽ‡Y3Imµ XƒHíhÅ¡Híxè›]¤6c$µ~,Lj{›Q· T; F§DµÓ ©“Pí¨ %Fµî‘Fµ“}BµÓcïíë92¶|éh/.ÛÿTc;Y+(.[¹o\vl—䲓÷ŠËNÞ…..Û+œ¹l3´]—¼Q\vš$W—í5v˜Ù^\v .¹ìäm˜â²“7˜ŠËÆ“).Û+•2mÙ:'?Ù¼Áìh©ŽÈì(¯q“ÙÉoEf§»‚ß™²õ`¡£ ÈNÞ×).[BÍ^/.[w…Z—;~qÙºÔõ1+,;Òp ËNÞ=-.;Ù7@`vºñغþkòØ÷KA(¶~¥%‰Í"“ÄNr3‰åpg;Z´)…•ë‰ÊNVg‰ÊNÚRi*;yC»¨ì¤¤ËƲ“‘…eë9ÄÈB,[§ä­ËöE¢Â²“¥ý²÷Ø…eãžX6®$,[ÉtQɲÓ`dK,[5f²Óh×[`Ùi|‰„ d§‘ë}â±Óh'\òؼyìlÇ`ñØÙ^Ðâ±uNÊeÉc'›ç‹ÇF Èæq$²y‘l‡Ž%$;9›€lô:!Ù¨3"Ù¼‘lÞ‘l'$[1ÌTÄdÝê/$[¦ñ+‰l%¼FÛˆÈNËçŽ#’vKcd§ýsG„Þ…äl@2[ï;:ì’ÌÎÛKŸK4;·Ïµ´SÍfŒhÖï%¡Ùä?A³1‹ÍÆx,6;O¢Ñb³•h36±ÙxQ‹Í>^uD²“­Q„d3F$ëINæÅc+ÉNZ{dÖÛ9Åü^$;|’ÅæÍ›‚¿èíZ»ˆÄصvþÔ¡ÂZ¸ÖÎR+¦k-¿‰m[;K‚*$[^Å$ÇD²e,l›„‹Íö˜Ü(£­ã.(!8Û1(ÁÙ<%álÅ „œb ÎV+ÕÎVU3‡álÆ@gï¡‹Îæåˆg3F<f³ё^”¶~f&3RÚ¨IaÚ¨IaÚ¨IaÚh9bÚ:Œ¼•œ6œ–Åië½ Õ¨8m÷‘€öý#Élý$M0Û f£`"³#™­*G‘ÙŒ‘Ì6!“Ù<Žd6P‘̶ٌås'„Šè’Ì–a8à¿ÈlÄDf[@Ê™Í_If›ð”ÉlÆHfóœ$³™-cùkáD`öº¸l |L.›1rÙ¬*‚Ùˆ ÌÞc˜ÍÁl k‚ÙŒÏæ9‰gï± Ï6£sáÙŒÏVìËYÈn!âÙj¡» Âõ+™¬Ïc2›±'™mkºÒ^dö»ÈlžŽd6c$³Íè‹d¶­Ÿ[Ö6ùϘÌÞc™Í[ ™ÍÉlœSd6c$³m•’”`6Bä²yFrÙŒ‘ËÆˆËÞc—Ís’Ëf x¶ÙAC6cdµ~*Ìj#&V·ð`µù#YmžŒ¬ö»X­›F¨6B µyB’ÚŒ‘Ôæ ÔÆÕj󔵨mÞ!.PÛž&""´Í "´#¡û¡Í m“}” mƈj›Œ¸„j›HjóŒ$µ©;©ÍØ“Ô6c¡ÚŒÕ6£|¡Ú{ìBµ¯w‚híõ#9m–˜´6cĵµ"¬]°6c„µQÁÚ{삵ÍK/‚µ#¬ÍamÆk›· ֶ݉ËkÛêüj„µ#¬¢ÖÞc¬s>amþJXÛ¼Ý]°6c„µM ÃÚ¼`m´©mÆÈló”d¶÷ØÅlãa³˜˜mœSÌ6cd¶qb¶÷ØÎ™ªÎIf›12Ûæ=ýb¶ý?”ÚŒÌ6Ï f-+f›·@f11Û¸œ˜m“"æ m뻄•Ô6c¤¶y)RÛ¼©mÌZIm£FDmÛ¦ÅNQÛ¶¤¶Í–$b·m7ý&»²ˆÝÆ­‹ÝfŒð6ªEô6#½Íém;LaIo3FzÍ z{½1nÞ1nƈq}JcÜŒãž0â¸<ž?]$ëû0î_ih‹z½ÁÜI‰Ãm…°Èk@NMr)9!Œ–Ð’å.Æib¹ó¡h±Üy1&ËÂy,·ÏÌÉÚÄr'%M6Ë =••¶ad+¥­$&¡´Õ8J[s++mGÊ]Bi۬•Ҷ=•iÙ6'B“ÈvÖ>`«lgQ«l•ô0T¶J–x‰mW„ØV\UjÛQÓ­¶ÌU¥¶EY¤¶>OBÚbËm-0'Æí#Œo4÷LrJ_Ém‹‚Au뜢³’Ûî‚ó©·¥„7õ¶B¢ÒÛî¦ÈÒÛNl–z[ÎJo;Y2z[våÐÛrŒ²Þ6³ô¶6JµÞÖ Mëmm†üÒÛZ™z[*{­·µÂÞz[)Coëu‚ÐÛr(±Þ6ø¬ô¶3M,·„K-·µ³²õ¶öĶÞö° Vz[{[o+QRèm•k(T·vœ¶êÖé4­ºÝ¥zNÕ­²„…êVÖVÝÊŠXך‰Ýî–(Kv»}QvN±/Ù-¯Cu+—i³Ýñeê%ª;†á-©n¨JHuCb½­­ð¬· ¿Zémík½­oz[Ú„ÞVY¼¤·Õb¬¢%qÝÑŸ"⺣ýõ¬·ëIokÏMqÝñ±Eêm¥ï¥ìv³ƒÄ·NÂ$®k]Ÿ°®CVàÍ>¸.¹¨îhSgQÝq7L•÷pr3bÝPB“ë†îR\7•´RàÚhÑ Ü¸=)pÇÏÁnUܧ ÜÙ1)p™<4¸V+Z€ûÔd‹ë–Ò÷W”·Ü²-®[¬¼ì¥@®;ÖÙ‚ëöäΕwP'¶×±”àRÇ“\б_`7îZ`7îZ`wvnrÝ^åì»õd‘Èï^‚Ûá*ËÙ׊q‡4½…w°»ùnˆîÄw''"²×vˆãÚ3o1®{dŠqeL@¾[*,ÆõýYŒëLWâ»Ó#•¬Àîx÷DIÛÅs«y?ã¹Ñ’ß_òD¨³}Æs'¥¿‰o©š³øÖ eñí`\ðÜxÝŠçŽáÓ+ î¾Ðà†~˜}üòÓÖß1õ×ÿ;ÿjS5LEýßO¿û<\0ëC"ßé‘ï— ò]DúÛ.Yw÷ îøo}ÉÞ›k²vÞèû?Œã¬5~àõ÷½~÷´w^ÿñ#Kð¬öGË¿ÿ£ÿ6xàõ÷½þ£ÚŸxýÇJð8çï¥$ý» !§þ,õ³­­©,ç;¦mU¨ýùi•ýcÿßÒ þñó/ÿõw} ó÷ÿýñóþôo?í•–21Ÿ¦~ÃËþõWZ¾ãJ[MýRõºýê m¾Ð?ýûøQ´v:>~þÝ7Ý`ëS°õc=ûäj9ÖévÝ3Çá¸>®üÇùØÿùã—þð½þ€ÑúzEü;íëµð#GkÜ%2[þ°—SmþÉkO?êŸݮ]?ýÐfÆ®óRççµ§ûµDŸz»]ûªós@©-+û×(}¢_Ð^Ù> ´¡\–kó} >ž£É7—q™“· KÿöÚûeŽ×e¦¿ò:ËQ  Wx¿Îò+×Yò:áýCoY§þAÞçÙK½ö}Üú7öž×¨ÿ;¯ð?¿ÿß_~ùÍŸÿü›ßþö·¿ùåOÜÿð§¿jþøªMqK­ÒsSÜ{=#GŒ6œ3ùþÙóÊ©Åù7„vºØem°'£êb9]¶ë£¤j)ŸÓ‹AkWÞBw»Xn_ÊóøÅ [}M26^¼§ú„¯ØþbЭ__þË6Á¦¬I•°ló¥žh»Ôý÷œ^ ºíÊÕo¤>ùI«åÖëS±boßöQVwÛv©'ZŸô²6èVÚ©[}ã†eÛ/õDÅ';Þâ‰Vkà×’x…Þâ‰ÖÆZçRËó/ÝÇ2´¥–ç_ ºÇê¾jyþÅ Û¦dKŸB–x¢ÕÆFÜnÃúrÅØšçV©'A9/uÑ•å”ß> JÅjmv^ƳšÚv‰'*†Uû¥µ·x¢mJðP¡·x¢j„5ß°²yÖRÉ´â‰Í»dw¤ÇnÏnö*f5ü E·B“£b—xb„XöéO´M–}¾ÄUNì!]öåOÔqX5^j}þE'ë8¶\-пPtëÍ_ò…m|üï0Ø©ªyíO´¢xk…þ…¢+†E•¥VèŸDº.Wk_s¯¨ Ã%žhÛU”{‹'*Æj9°–[1*àŽéOô¦xØ×7_µûµ[o9–K6Qÿ«ä{·R=¸P*õùù%›8ŒuÇvÉ&ja·;Ú[6Q—Ã^oÇK6Q—Ÿ­Ø[6qÞ« ëÄk-ÓOï±Uú޵Có{l}l>­ß‚‰¶ 2¯ò¨žôçg[ϹäZ2¢œ®˜núx &ê¾xÊq¸ux_ÅÞ‚‰þsÉpÇK0Q÷…ñ}-k¾×P{¾¾7Ÿ²‰êY:Ýré&ê®1üWì­›8ߔׂÌ:"³v‡Ad-ÍOï"õ⬗/zäZ+ó¯F*³Oàúµ–櫇TãžF oB»ÖÊü«¿T/¢Š½ÅÕ °Z·r‰¹:+¹–^pºW^Ö #Ñ»†¥>©ë/™å1E©ë†Wì½ÚU1$Ö>#ܪŸÔå³#OëuKcÃMÛ%ž8¯w ëÔ®;€k½§boñD;V.¬ÒŽWŒõ9ÕâÕòiK4u­Ã¬óeÜZ¢Í³sìû8rŽ”õ#«x©šx0Q/1RÐò‰ýG¨9ÖZ”¿ô#‰ÿz~$¼õ¼*ÖŸ G½ó•fŒƒÑÜþ±·Ç£‹Ö)±|¹ÎÈÉrjÍX”ƒz‰‘‹òƒ^‚µ>A\¢Æñtиb×tt]ÊmytÊ qx[æKATwp®gÔ¯}Ìz÷l×ëñZ0üœj\tÍšÇ_Fe¾­J[xÉ%¢r‰™³…Š]r‰^@HKÖÊyY3ºD¶Zåv~éê86[-Ì÷ÑãÕ7¡Yk]þuõ³aŒ«uùK.1j8­uùK.áÎÕ?›r‰.keT»Ô.ü™Qí½Ö'Sŵ҈½&®!™¬Ø¾,Ï®êÞ¿"»ìûùªÌ:ëúê£n0šõ_=£á['a1vÅ.D=ÞhÆm¤i™»nŸÇ¾ç¿g×ň¶•+ÑòêŸ2?Yû÷ýP] oÓÛõꦜ–lðnÍQ¥Öæ/Ä¨Ñ¼Öæß:‰QŸRk­Í_ÊÞ‘슽Õ·û듾é=‚Ž”%ô÷Ý»¿ä“Öç±óŒ‘ÂkŸþB71êUÜc—n"ªå¹0yúg`DÅ:äZ[[.e¯¼¶ülQUŠí½-5‡‚þÑÔ¿6¯nªÃûmñ–jtöïÇ@7QuŒ7X}_ÒÞ‰{·×w{Z£œµR?`L嫬Vê‡÷˜ê1¤VêÇ÷È*޵Vê' ­ì µR?ahÕqäñlÕÌî’öNTª¯;,¸n÷P_éï¡U[á+ö^¹·þQôkd4ÿ8 á8k¬OÓ.E´Â1CFQœïØšµÇ!ø<[kæs |¥—_ksÉ(&Mpkö¶WŸeG9j÷ÃrÍ.Z…Þ2¸sŒb Kúüaeã³PÚž¢ùëñÙj©þ’ùÊQz«¥zÚêBPU±KF1΀gÛ€z[؆r£z³ ‡õ­ìÒ—÷0+Cm¨½#ïaVv»T£lj¶ò=¿T¾qN¤V½[í§¸T¾Q-Hòû*v\Õ•“í­Þmü[=¼×„UþÜÆõ£6ùm½vG:íâ‰<ÿ¨ìñß· î•ø/l~3´ÝáSy[b¢zÉð¶IµgxÛ6vxÁ[ï)4½-H³á”¤·í1=1¶ÝvsM`Û3Bl[ûöXvb[ïé¶­¥PtaÛHz"l[{Em#m•°m%ØÅס°í™¨†Øv_õ…+l¹…Io+7õÁ4À“JIx{f™GfkÂÛÚêå®àm¥¿¾¦¾/v[…ÀÜ[ì¶Ž Ý'»­ãð.v[Çái»­ãòÅn#[¹Øíyc¤Ò¨ƒª b·ûJa¢Ðm%\g§$º­ÃÀ%„nw~Õº­ãØ6D·ufKB·çqD©@·u´lB·g9ñàÝÞÊÙ˜ X–¦B·uÜ…rHn÷u4e¹=ï%!¹=#¹­þ€¹¿ÈíÙèxÆŸº©E{ÄoÏ# ¨#¿=ë]ˆü¶J5ñÛ:Žèüö<ìz¤ˆoÏÞ…g€ø¶ã"ñmufH2…oÏãH+oëz ¤·ÂÑÛs0H#½­Ëa´½‡Gô6:¥èmܺ nÇ©1!îíÖw¦(à%Ä­bŠ=L <¸âžmŽ,BܳZ0§&Ä­ê¼Ì~^ 7 O†{V$ O†{>‰×¼ž÷l7’ä áÞ®„{»nŒ3b¸g¯Ä:îùäÂpÏã0ñ'Ã=ŸG|!’áÖqÁiß 7Æ51ÜÛõÀpÏãÀiÈpÏW¾,Épo×Ã=›¬ 7ûn'öK†õBˆ­.–ëÇ,wÙ9ÜéV1¯¯‚'Ò­òaÈÒõH#¢»<7kšå.çýá.ò 1Â]´ÃÎw‘¹‘n œB¸qœ®§Sf¸µ# M/ˆ»ÜÙmÕÚõ~4Á¦ÂÍâáÆ“'„›Çá.Z‘4Â]déb„=Z7ªƒwÙ9å6­ËaÂÍãˆp£˜/„Y ·¦ (b¸î·B¸QYD¸‹vˆáæa$¹‹Rô˜äÆKW<×¥Îõ<×8wQÚãÜx˜…sã… œ›×#Î÷¿pn<¹YNâÜ…s£:…sãÑÎëçÆå„sÝ ¢¹qw¢¹1=iî{TÇõTA7/AŒã1îuÜ,nÌiµõ)8í&|*Lë7¦ÝŒw˜VwlJ{×ÛÆóoF«,况_`´Q1ZߟíÆQ<ífÉ©í&DmD»ê;ÙˆvÓàkV+;ì`µ›_³ZÝžQí&Y½Q­Ü=ÕZ@jT«hjãzBµ¾¡ÚЫ ÕnÜê¨vÓ:ˆQ­[öj7î 0©¥d€Z¹&©Uvl“Z¥±R«ÄàAj7+…EjuµJ¤ÖϤv³hU¤6b"µ¾“Ú[ ¤6Ê)R»½$Åb´q61Ú¸31ZW–­õÓF´ê "´îÄ&´JˆV1¡õ½½­»¶9­´Ujãlµ¾eZW¢q­»€q­ïK´ÖÕaZëËׯqµºœií«ã˜Óº:Lký”™Ö*vÐZB°Vǰ6Î(X·,Xë1İÖÇ Ö6Éö k›¶*Öº± k]™oXÛ(â XëGݰV•eV»iÁY¬6 (V»}®²Í˜X­^[ÁjUÿku΀µì%ÁjÕûƒÕjÏ]°Z½i‚Õª½ƒÕÊš8X­úB°Z5^°Ú8N¬¶QQ¬Ö÷ V«—lÀZåŒXëËÖrþöZ¹y÷ºïeµWÂà±ÿûoµP'¨ÝÖO•¶å© 9§­¶UØ*±lÛQÈOÀv¶p6-v °mÞü/`»ré<€íÆOö¶Þcm`«3¯5\2¯]ĪÍkg~­¯-œ¯5\J^+2íhˆ¶„¹¶ƒ= Ø’† تé7°´ëÛÀÖ¥7°©”6¯Ò‹×Îrö¯U൫ø¼­l¶ÝTYƶ›Ù¥°ínÇaÛ&9´¹­5éæ¶Þêon1qÛ]"esÛ]–¹­RM·=„ž…m÷‡Ö^¼ÖE0¯Ý5™×îZx0¯}^Ç Öõþµñ«@­\ÔÆqµ;÷ϱõ (b{Húmd»[·+d5(dFB¶:.m˜+ÙªC²ãˆlë8è…lçÃèUÈv„1²Uî#Ûù°Â˜Èv>Œ‰ló8 Û‰ÙÎÇçÌv>^8•´6 !f[1ê^ŸÌ¶ÿ,$Jf{]„Ì6 ³­Z'˜³Í™í¬$4f¶u9"b2Û:'>ÄlƒÊ̶ž'°!1Û:'y.™m\OÌ6îAÌÖ#Ÿ™í»’Ekã®EkýXׯ#!\[%Äwµpmƈk£,ĵQáÚùˆ¯Õn\[uÞ÷Àµù#ym^‡à6nKà6ªŠà6$ÜèW"¸Ñ„pç][b…pãö„pßýCðÖ(v[‹cb·÷ØÅn£>Än³€„·Ñ¹oã x[‰gÁÛhSÁÛxxoãoý5¼¢ÞÖ) v/xë)ÃÞFˆÞF÷¾¡Gøv>>ÚV«·#¾Í»&¾¿‡^ü68ñÛ¨ñÛhñÛ<'ù­ŸáÛ,&ñm<‚¸y!nÆq£:EqãáÅãˆqãr¸Q+¸ñøˆã¾Ç“'Áu àFƒ àF àæàF÷ÀJÀY7jI÷»nÔ nÔ nT‹@nG›ÇäFÿÈ:É­{ 8&ÊfʺÊaH(7¦ÕD¹QL¡\¾íåÆ|R(÷Ûï}V$7@$7^ÏB¹qJÜè&"¹y5’Ü<Ž$×1“\•Ä ×7`[‡}r]‹¹Y‚\·¨AnGëNi[/$`‚\¾äúCÉ w– A®_€¹žÍäFQr+}³@n·äãô}v #d·ßds ¨ä¸åiK-Aî"ƒÜš­S& ;Ù"@ w”ÜÀ ·ZŒf¸2¼ÝLH r‡›º†·{ªr/ÛŸ7ªåx[)#áª@ÃÛLÈ”ixk/;Þö¡·'ÃÛc°¸V†·Ú[iÃÛ}·x˜†·»eå{[‹p%•ïí¾Ú €¾·ýEFV(ßÛþ$N—ïí>IÇùò½Ý'C¾·ûh"KßÛ}’LS¾·ûlÛ^úÞV¦V48}o÷Á‚]ú޶×£ïmÿ6¸|o÷§á˜okÒBm*o˾]KŽ·Mˆow™òØñvŸ)þ±ãí>ëéãí.{#;ÞîÿØñv×F$;ÞVÂ"ts9ÞöÇJvp¼õÃ(ÃÛr'¥ámɨ–†·ÇJí¨ o¦;ÚÞæÙh{[¸uý–[cïH†·u6ÂUÞ›†VÞö@Þž³ cÞV BµK¿Û°í”ßmeŠÂV/ùÝ7ÑmýsÎuds[µ1£mníbj›ÛY€²¹=”¥Ê6·u= kIp‡AÓe¹Ý‡MèvÛ«T׃ÛíѼÁŸn·Ç!Í•ÜnÝV²t»=äGm·ÛJ[|Éè^f·ýÑ]šÝÍäUf·›E¶4»uOµÙ­Mkåu;Kµa¯ÛÕ¶»2»]L»evëæ¶ÙmÓÊìvÖGªÍnݽlv«t¹av;Ini³[{tØìV QÃìvL‘-Ìn-¸ØìVf4av;~j”pNVèpÝÄ)ŒÐMócݘƈè¶ÝvüMûão€Ü4^&È­bƒãÖé©ÝÇ sqqÜrf½’ãÖ[Ž 4rÜ3†vä—mÅ(æ$Ç­–HsÏ™ú qîƒöœ8÷̲@[àÜsf@;àÜzû³ÊˆsÏ܈sOÇrðNâÜtý&έsÂÏ_8÷vNàܳÈW/œ{úb›RáÜûŠ[ÿ¼]žð/Š{N‘0“âÆ\S· -ëZPÜsŠDßÝ‹âžNñ¤± ¸ç´—B[PÜ3Fh Š›mIŠ{VÌÝIqë¸J=ùŽâVŒã-)îÙÎí @÷Ž÷,ç•cA÷vN`ÜìĸYŸÄ¸g¤½/@ÙÙì$ØÀ¸y0î9¡Æ#LŒ›IˆqÏ Hq³úqŸ§"»Í*%»=¿U1ê“ÝÆwØmö4²Û[ñÀnÏ^ñ‰Ùmæ »ÍÆ"»Íæ%»ÍñŠì6Ÿj²Ûª3ªÀnω|ì6ûÙm“ìö¼œýög•÷àm«„·Ù:„·ç¥hM{ÑÛgß!´½]Ð6;;¡m¼©m£MDmOTsÍ¿EmÏã®”.¶QS¶ñú¶ V#l{¾ù¯‘]Øö,ËMyÛöÕÀöœÓ\Í/`{+!€m<¶Y“¶çÃû °ÍŠTJ8'°½óÑ–R/°¾Øî×ôîæ°ŠZÚãv‘(W·‹tm²¸52³Å­û°¸Õhh^lj׶%ŵ—ðö|W]Q¼¶îŽp•ÂÛ“½,…·QcRÞ¶5³]ÊÛ¨2)oÛòi†²Š)9•·£ò6Ï åí=t)o#&åmƨ¼m‹¥²TÞV  î¥¼½ÿ|)oÛb;*o›ù¤¤·ÕrdÔÞf ÒÛ Ñ‚ÒÛè ’ÞfŒÒÛê`´" ô6c”ÞFÇ”ôÖ¤”·¢ð¶-OE+·ï)µ­/{ )mo¡Kh!êlã ”Ð6cÐÛVˆJRêmï±Ko›1êmã”ì6cÙmþJÙí=vÉn+FäJÙmÆ(»Íd·¢ì¶-VSv›1Ên³ì¶-ŸËnó8Ênï±Kv›ç¤ì6c”ÝfŒ²ÛŒQv{]Ð6c”ݶÅÒaÊn3Ùm„$»½Ç.ÙmÆ(¾ÍÅ·{ŠoógŠo[dJ£7c”àf Ü Q‚›1Jp3F î=vIp#& n[,T¥7c”àfŒÜ{ì’àfŒÜŒQˆ›1 q3F!î=v q3F!nÆ(Ä…¸¢·Ç¨-’7cTâfìTâæTâfŒJÜ{ìRâf JÜ Q‰›1*q3F%î=v)q›“ÚH‰›1êq3FAnÆ(ȽÇ.An[,Ö¥ 7cäfŒ‚ÜŒQ{œ½*FAnÍêiú@AnÄ(Èmöw‘ 7£ ×äfì)ÈÍŸ)È͹£ ÷{ r³ôäfŒ‚ÜŒQ1 rï±³WÅ(È͹£ 7ZN‚Ü{ìRäfŒŠÜŒQ‘›1*r3FEî=v)r3FEnĨÈ͹£"÷;8¡Uì¡ËÍ_)ÌÍ…¹£0÷{ s3DanÆ(ÌÍ…¹“0÷;Ž/c só8 s3FanÆ(ÌÍ…¹ÍvRæf9)Ìm«Å·æfŒÂÜŒQ˜Û~%“YGan†(ÌÍ…¹£0÷;…¹ù3•¹£27cTæÆ]Sš{]ÒÜŒQš›1Js³47cÔæÞc—6·C¥87cç¶UR`js¶¬Í½Ç.mnƨÍ͵¹£67cÔæÞc—67bÔæfˆÚÜŒQ›[1È’¥ÍÍØ³‚kBó}÷¯4Ãݯ‘åæ±° ·ÙdáŠë­<€š2Y˜´c]¬·ÏÆ©¥³ÉÂ$õ›Xï´Ø¬wZì`KÖ;Ú¡W¬·%bFlÖ;®v·%ë“·˜õŽVBŠõ:GŸYo}ßó8²ÞÊÚHQ/Yï¨ Æf½NU)Ö;HooÖÛO‰Å ³ÞÁòÅzéæÍz‡¦|P/Öµ%Ö;ö= ëí§ƒŬ·Ç¨oëí1]¬wpÇë­Ëa¡A¬·8%Éd½ƒ6B›õöë‡D¼Ut·%ãÍËòöãèU,Öë”ÎF¾ÃË·R}~f«0¶ îC:LÜë´Â½ãj?WâÞÑk?½ãfXâÞÑ0½£Ì£^¸7FáÞQCƽ£”q﨤0ƽ£ Õ„{ÃÔ¸·r˜Ê3÷¢½£ô`¦½ÎajÚ;ʹR°w|.àóŽÍú[b^W£1ïèôF¼£m,…yGYCóŽ«„7¼£Í´„yG-?óŽöG#æí±*Ì;ÚèL˜w\œàŒ˜wTjGcÞjÆž˜·Zòa^§5æm®'Ì›·Ìë7¡1ïh·ZaÞQUƼ£}Y…y#»0ïhW]aÞ‘¦W¦¼Ñ¿DyãöDyór¤¼u$C¢¼c8Ù’òFÿåå¸,ÈëQÈ7ú‰ ïh+:BÞñr/È;†œ•7HwlJø€¼£-ey£y€Ø7 È;†0!o?ŒC¡ ïx“â^7ï‹7®'È[×£¤–7êQ7®'ȽY×ïŠa2Þ*&¹1ïηd¼cäe#ãêã‰LŒ7®GÆ;¾ò‰îúžwëL»YÂÝèy/¸;:;ÙnÝ-5µd»£Ó&‰íŽŸ{åŽNù#¶›—#Û÷OeºçPÁUlwtæ!±Ý(¦Øî¸Û4l7†±Ý<Žl7ËI¶ëÉ“Ùnt,±Ý¼Ùn^lwÜÅåÄvc&!¶;*U²Øn¼XÄv£˜b»yÙn¡n&¨¥ î¸}ꎶAÝh2Aݘ êFM êÆìˆPwtJ8Aݱñ»ÞP·G«A]ßµ¡îéÃu}놺ñ uǰÀ%ÔCLª;†­.©nGªëÙ‡©îèD`¢º£v˜›êúMgª;*Ûœ©îè\f€ºÚ uGå6Ôõ¤ÀPwTåêŽJ;þ}Þ¹¿ÿiøøŸ¶ºú^Ö ¥FÞOÍtýÕÚäÒŸýßO¿û€\œëC"áé‘ð— ò]ÌúÛ.Yw÷ îøo|Éþ,Ÿpç¼Ñ÷ dJëÔ$ûU€çß?ôúýUÔÞxýÇ-Á£Ú-ÿþÆ¡õ‡´Á³Ï¿èõÕþ,Àë?¾P‚Ç9ÿ+%Y õsŸ×ñ£OóŠãÕÚ¶*Ó¿þü4éþ‡±ÿoéåþøù—ÿú»þ þûÿþøù?ú·Ÿ¿öB­ôÕ¶Œ}Püê+-ß~¥šÿõ‰NŸw¶uùú+m¾Ò?ýûøQÌe:>~þÝ·ÜáÔßͽE×Ê 9Ì­¿¬ÖévÝþºöÓ?®ü‡ùXþùã—þôÍþ€ÑúzEü;íëµðGkÞ况ËõrÚ§ûµ§õbÜn×®Ÿ~h3#Þ©óóÚÓýÚ?¢ÎÏN½Ý®}Õù9 Ô*ÒþÕÊ4•ƒOÃŒûѲò™Úû(sô™êm >ž£É·Ì¾Ì|òÒeÙúWÓý2Çë2Ó_y¥ÿÜ¿å–>cß—_¹Î’×ù טkÝklýÛG¿‡ª²q›ÆeÏkÔÿWøŸßÿï/¿üæÏþÍoûÛßüò§?þéË_5 |Õ¦º­–ñßÈz‡9EŽM¦W[-ã_ºäA_@ÛåA±oJ¹¹mý[鬷‘)Uzèý¼—[:û.¡Åºê”m„Ðbô}×&--Hmm†Ð¹Ú*¶_9Þü­yîZyq•^ëÜBÚjÕèHû¨¶J]"~K·¡E?§ÊÙ ´X´è½µB‹^ÛÚÁt’l½¡§+ÉÛÈm#Û>^|¬\Þd*v¼}‚¥pØjMŸ>Á4<·±¾3 Ú »/Lg±éã¶LئÇ!è(ýWd³h¯wf³hü¾ÝÊoâ² V¾‡m/züò±Ü•›f?®®QÔM÷ïù+Ë›üØzµAgQ1X!– ÓYÀi;&¦³p1Ëì² nÊdÔcoÐUçœy\éàdÉÔòÇzm/‹,5pÍ—OðCT±Íy,°ZµÕ¢> ‚iayìÎcÁ§Või üÑX¾…Wk«eýË x'ÎkÃô^ï8/w¥B; ‚Ѧm˜™Ï¢‘·aqZ t½×g¸tÔÛRi-Ð Î7)}‚1Ê´RÌ\>Áq¥ý½,tvkÙ£ÿƒ>ÈÀ«§ýé¼päL¨Ø.Ÿ`Üt mäÌØÄ´òûlãì´¸Z-í_>Átd­Ð»ÔoæVkúW¦7Ùµ´ZƘ8²² Š‹ël@’ñ¼WL™,Ø7ƃ™,”¤MõÙõ¶nĽmBr¦ì!ÓÔ{ûå½Z…ÞË1´iv>‹ÇM 3YÄÙÖw& ÞÓ´}!“EýªLX2©o|d²Ø)IjÓ®LìÓáLZ­Þ_†Àònµ|OC`¥–ïiÌq£–ï™É[;Ó˜¿S»ÉS²¿?˜}X Å6¯Îd¡²¬Ìd!âÖÇcg²¸ú{oW¤³Ø±âÛªniÌŽ]ÿĸbÊfÁ'½Þè2¾ð{«™Ç•ÙMÓ‰¶LH>b7hÙ”Z!Ö(‹Oä#øIuZ¦^ ‰}뵂Ïi*‡ÏZÁ¿&«ú„>=ˆ¯Éê@”ñ°ÈÅÐÊf  îí¸ƒ‰‰UŸåä‹ÄÄ¿ˆ÷adbâSÖŠ!1±ê¬ìä¯ÄÄ“ßöŽÀÄÄ*f¿&& ,©ý ÈK¬Þ|îmxvY]ic^âMç.Š+/qÄv&&V£>6–¼ÆØ_Vëýáê³kÏ »kñþJL’Û:·¹m)¡¯ÉÜv]ÔXâ¶ëL‘£¹m?º/aÛUþæÆ¶«<ámû·#Í…m×ÙXšô¶¾9¯v¼í‡Ajx»ŽÞúÎ oëó÷šÝŠÝ®Z d»Ê³Ýȶ—›sF!ÛEB7#Û(ÀƒØÖwÖgÄvðÐØ®_¶=†Ýs¶Ës°4ª-ÙÿƒLÒ.‡¦Ž‚´Ë!Â(H»š: ÒVÛ]½Ýv‘rÒöÔŠ$ž]ð˜l¶ßɧlvÕ»ÇlvUJÚ¥©:Ehë8ÐÚºRXÚEjVÚEÊZÚåÐÌ[„v‘}ª ír` ˘vÑŽ‡7¦]v±9aÚ¼1ÂÚåзº`m–°6ÁÚê³–dµïî,J»HÑlJ»HÀjJ»lœ?¥}œJpV›m g¯‚Ë.MH\X¶AŽ'.»<÷F²ËžØõB²‹,ùdiÒd—ƒoœ@²êD"²¯»‹]´'Æ,vÑÞ³ØE[О,vÑöØ`±’x›Åöˆù\°Xu,²ØE[1 c—ÆAY,6îJ,vÑN³ØE )‚ŪF±’¯ÅÆ3 »pc—Ilôi‘ØEûu‚ĺO™ÄjóIl4«Hì¢ýAbµkÄH6šœH6#“]´kÇLÖâ 3Ùhô7“Õž„`²Ú¡LvÑe&»p„™¬x3Y—ÐLÖýDLv‘@0Y¿Íd]“f²íñ"7mâצ±Ú°cÛŸ .ñ™Æj/IÐXíG «¼ AcýÀ‰Æú9Nn»‹è“ÆÆˆaëÞe«ÇÛ4V[‡Þ4v3ÛŽõ€`ë÷¹q¬FIÑØ8J4ÖW3u¯2õðcëF3õ”Ó4VÛ`‚ƺs‰ÆÆpgûzQšÃj[PpXwsØç³d«[ +-YX=Ð`Õ `Õ*`73LXö°Ú†wXT`5€€UŸ«ÀöQ¹A^eõäÕ•kòêš0yÕ£äUS¡ ¯š¸yÕ äUÃJW½E‚¼ª'yãH^-Š òª¤`A^]N“WõƒWͱ ^µ-1À«Fž¯ÍPVàU#Á¼êqþ¾mßêYfÛ°­û[”´Û!À)%­y£•´ƒ2-[JkLe)íl¾I[SîÏ8ì²X†RZ,·„”vá„ò&¥ULRZ¹±„”6ÊB[ò=ÞCHiñÆ2‡ 娥´£¶‰ ÄöoÍF™-Aì¼r´”váreHigí<ˆgat‘ØIé6ß$vž¤»´–Vzl¡Ø©)]·¥´«V $¥]ùÜšÅNÆ®B²Óñ9’EHvR®ÐÒN\G )íHƒ73Ù:'c’Ò*ÅwHig*%Ídûý}AJë…KigQ|Kig¥g·”V=$¤´³è€¥´ªdKi­Ÿ[J»X*)í ¼b)í"îsIiùQb)íªKi7éòRJË™ž¤´“O))ífU©¤´!m•”vâ /¤´MâKi7Kw%¥u9Ei%é-­ÌôLmç‰tCQ;J£”ŠZІ¬«ÕúZ7ÃMW{}¸XV»ˆ¢JUªV«jy1«jU˜Uµ»fþ©­eþµµ _~¡­Ý…>¬° ©®¶£Ä=RØZÖn…mÓך¶.¦¶ª~ lwA lÉq„nãÎÅpëÎ1,°]tçØj‡ClW}›[a;ZÐ+‰íª[—ÂÖ‚n+lIˬ°ÝY‰sc;ynžR [ëÕ­°˜¶#ݼ^ [mh1Õæ¶ÂvLõíEu{5ª”ØzL°ÆV/áÐØú¾¬±]Å‹¬±MLÉuk»u»»ÑÜÖØ®j6kl÷ŒAc»kܶÆV“{“Ý^®ŠìfLÛ•ûºBc»Nv=Ÿ ±íª§UbÛ•ÐËbÛö9Ø7R»ÔÚr¹ðÍu-Ä×Mâºñ×µÈH\7†fqÝ(¡¸nó$kmÝÅucÐ×›E¬äº1dïÖ{ó½Îoº;7_NZ[mÇ3Ý­WÆtÑÝØ!ºÛ¯ÇQHtw–Mphm]Û.V¥ïÖf‹ëÖ¥µmZÝí±vçºÞ´f°£ÂìzÂ"®ÃAÊlùÀˆëf!vc“‹ÀnŒ»UvaÝ]?’èFÿÑBˆèÖD€t™D· HÎ*}­‡jÝØN%®û.sçÝç!ÎõÌK8×C³©®¿ôLu= 1Õ­w‰/©î¼ú8RÝykÕ5Ã1Õ­WãÕ·Mu£{ˆêfŒT×9Luý¼˜êzK·©®?WMu£ûˆêFó ë>Ÿ%ñܨGñܸ/ñ\O1Ìsß×yp?¼€¾ä^†Çþï¿…âQÜ‘âDCÜ?2ܵ ãˆáöù ÉœnQú‹«Ãl3 †»ñÅø v¯ צ·³Ì’Þr¸éí(¹ éí„F“6ª›Þ†¢Wôö ̸!ЪÂôÖ{ÒMo›-é­&eAo½=ÞôöøTF;[?ü†·ë?~Ú.â“v@Xme l»ˆöšÚb4„@Õ¤¶†Aa€0|Ü|V Sk]ÃÚð6ßzMÖnRIÖnf¨ò=ðb€™­»…™­¼‚ÙjSn0[½_ƒÙ¾æ¥¦µÊðaZÛ¤,Hã|zÖ†ƒ`mãÇċվ FJŸUixÀy¬)m¸ÒFíˆÒØ%¥LpAiý‰gJ+fH‡…ßõö;ˆãiÍm{°ÚŒ ­d½‚´2 ù&óªLÒ fŠÒjл›PÕjLË*3¦=$÷HóZR$¦UQ.  |Ã!ìŒiñ•Fªjk½1´­?ŒŒbEi%/ L+ÿaÚð,ÂÖ€$1-?Œn>hcZ[—ÓúöÂA!pc@!àkߔփCRZ‘N‰n'›„µºAiù'¥eã¥ïîíƒÀ–øÓ”ÖýÕ”ö°8ØF‚¢´;¹n”V3œä  J+ÕCPZ—Å”Ö8!ähN´ÄH']/œ¥õ†oSÚpPþv6¡J˺6¥µëDZ"pÕ7,ð€¤%‚´­a‰ÀõàĴܿ𴞅¦uX~»MK~»j Sò[o·üÖ[ÅÓAÖ’ßÚ&-HÞÓËki‰À—qz"(žœY…;ˆz[…;5K…ëmù鉠û OöY«pGÉN¥Â5¨[…kG ©pÇijüVÛtonBÅá†ÀªºÜt„ô·®@sÚÑ Wú[í·w–þV;šC;~Ži£)Ó Oõ·ÇËo'Klà ½Ñò[Oú-¿Sh;p ‹$kpµÿ=4¸“wz•w”˜ÒJÜñSbksF q½ÜB\Ýwqã”RâF)_J܉ƒ(qEMB‰§“7ŠO%îdâ,AîD}Zr'N1B;ñk<¹ß_!Èø& A®¶‡‡ w"œAîÌùyrg£äÎ\Ë AîƒÊÊAùxC‡»ª­ÃIxB‡«Ýò!Äi±!n%î,o ëpç_1@X$QŸ¨̾Ïa„÷[¼l¾é’ÝnvC¼í“H’ÑÛYÄÔôvæ,<è­²á˜Þ¦I¬èm“1«é­VƒÞNÜ9z3³ÅJD0\½±Ìpº‰ášÖ„w±+nÁ3:(ˆá®fÆb¸³wø‹á.æ»b¸Ë“j‹ÞZôÒ[*§ oC_Ñ[+M!½e1ÄÔ,†¸£ ¾!î¤.)Š;©Mqß™(îú+Ú[­+Î]l•8ÞIs©x¿¹ØÒ©V4W éæbK)¹h®¿ÙMsíºgš[³¡×#ÍõË4×£in½[0!Ó­w kßLw™÷^LwQâØpFXlt@¨»¬šì=©î²j“¼Ù®Þff»Ëji1ٮפì°Úòl7ö¾Ú'aã|Ú6 «ª²IØTýb»±ÿØ6 ›$3à*_W¸%h[ Ñ®w’íRÌíÆÆ5»&l‰}/´›·'ׄç¦6Û%l¶]‚÷Ê.A‰ÙÂ.A»ÌÂ.¡é3È~ abðòK`ªº°KP~¾°Kð†LÛ%D ‰t½5Úv ŽÙ/Á’® :N® ÚÛvsM@÷7ÒÕ&Ð4OA–y‚oÜ q²Pxv¯ñNpLïëpaྲྀNð³gŠks‡ôN¸:Ž ¼ë< PÈôO`ï}û'¨tòOxÖ['èVeœà^Î ‚ËáœÀ2¤uw[Ø:Á{Ímà¢Ø:ÁN a@”Ö ²UHë4‰½öÏÙmlãMï)ˆåàöæÀ* ï©MÃ;A*Zy'øÞÃ;‹YöNˆS¦w”¹a S¾$¶JÐÖ qHÚÙÚ²Vv¶¶Ã:‹DiÀÅ,³[ÓìÖÅL너݆ÛAZ'|fgëîœ rxºõ)En•ºúæŸÀéŸÀ›þ \•JÿYÖ Ýz›¾Ñ­-=„nû 8N¥(d¼ð$¸®ÿtPÐ"¸añ <§-XÇF¸J0×E¹Y( p]Ì´P€©™îaÜpPàüÝ×=6t{B¸áØ> :gø(È="ì®×óÍMAjS1\3 ®Šiˆ«’Ę .ÍG‚ázý›áʤ-îa«[1Ü]pÔ wçG£î‹êݪÌ&·‡,xEnËqÄ—äÖnl&·6Ì»Y)à+9È­ ZnV ºšÜÊ›&È­Þ‚ÜÊ\íf¥@ísZ)Ðü6­xF¸¾w#܃ˆÌ WŸ…ÁpåR ÷ìÃ]{í>üõ0Oÿ^†;|Ã=ÞÐã&¿Õ·R¤#“k’mlçË6¶B73[™¶†™-µƒ6³]mX+3[A’0³]Ÿ–º¶±}ÿJlÛVé…mÛªLÒ¶MÇØö»°m‘„ Û6a$cÛˆ ÛV ZIaÛ{ì o3DzÛVSbÒÛŒ‘Þöº¥/Ä‹Þæ!¤·yémÆHo3z›!ÒÛ{좷½­É.émÆHo3F|{]ø6cÄ·#¾Íñmƈo3F|{]ø6cÄ·äLø6cÄ·¾½‡.|›1âÛŒßfìoóWâÛ{ì·#¾Íðm†ˆo{Œ_áÛ8Nü6#¿Íùmž“ü6#¿½Ç.~›1ÜŒàfŒ7c¸÷Øp3F€1qÜŒ‘ãf ÷º8nÆÈq3FŽ›±'ÇÍŸ rï± äfŒ 7c¹"Ș@nÆrï± äfŒ 7c$¹#ÊÍQî=v¡ÜŒåfŒD7cäº#×½Ç.°1ÝŒìf `7C$»÷ØEv3F´›±íæD»#á½Ç.›1Þ ‘ðFL„7c$¼÷ØEx3F›1"ÞŒñfŒˆ÷»oÅ(?&â1_ˆ7cD¼~ÿñæqD¼žkñÆqB¼yoEˆ7cD¼yJ2ÞŒ=ožŽŒ7!ãÍï=¶sBËË‘ñFLŒ7N)ƛǑñÞc»&´<'oÆy3FÈ›×#äÍãHy3FÊ›1RÞŒ‘òFL”÷;4¡eŒ”7c ¼"åÍ)ï=vhB{ÏZVŸÌTï’òfŒ”7c¤¼þ åÍÃHy#&ʧåÍãHyﱋòfŒ˜7cļYbÞ<Ž˜7cļ÷Ø…yózļ#æõ9Íy3FÎ{]œ7cà¼"çÍ9¯oÁ ÷ÛïÖ 7!èÍAo[-òèÍÃH|#&ä›1"ߌùfŒÈ÷»oƈ|3Fä›1"ߌùÞcò&òÍ‘oœSÈ7cD¾÷Ø…|3ä›!"ß(¦oGä›±Û]}òý+ísáwp#¿‹“”ÉyA>1¶Ïd Ö ÌÖdñ® ªC¼«µz“_oŠ4ùõ¾.“ß’mF‰ÿ–V’¢_RàÙ÷gñîh²Ä»»¯—âÝÏÒ˜•e'Ï)ñîäû“xwøœ—µ.Á&)pÿ0Wf1Pà© ²‹WŒ‰ÂH§Í>²Ïµ üEûéœI§UûýíŸ;Û‚xš?¥Àõ„#ç›ìs•ºËî¹M]A ¸b4:{î"ݸp¯+>YbÀ=Ff»çúÑžv ›Å€{LÙÎäž»«[ŠG7±!Ãð\>ýM¯aÒßép/Òßi—êQôwÚ©È#ü­òQƒKø[ì ÙÉÈ~{HÉÆäÇ0Hðd¿uíÈ~ãBD¿åL )”ÐoÆ€~ë†בƒK&è[ÕGsBßéTHÐ7c„¾ÓaóBßì]Ò2D9 }ŸVÍ7»\n°íµ‰iﻚÄy§])¦Åyûå© ç-gjÊSÉy qÒ0œ·žLÃÉymvmÎëÇÕœ·NI…-9¯å7ç­Ó1O9oÕ´Aâ¼#çµé¹8oÞ49o4€8o’6q^^;9oSœ·ŽÃ–RqÞjpf!#ç­ûA?çd-eÎ[1ЇÉy{Œª/q^¿HÌyóÞÉyóœä¼~!›óÆqâ¼½^þÈy«-q{â¼Ñ|â¼y9rÞ¨Îçvç:ç­ãhôKΛç­ò}f–eçí1ÙóÆ"ÎM*Î[1%²ä¼y9oÝž2]˜7î\˜·.á0)oÝ95̤¼q!o´ oUYA^O˜ y£?òFãòºNÄxãæÄxójd¼Ñ¬/Æë™’o-Ÿ1ÞÞ-TxéxG[A€ñÆ;]Œ÷ýêÝ> ºëœ¦»ñNÝzÝ‚_Bݨ:A]·”˜n´”˜nÞ™nôC1Ý,9™îÔäÅ+¦c˜n”EL·^Ád’éúIÒ‰®ëK@7îû tã)з²€n 9ºÕ±©G¦A« r';(È w´Ý r}NûäŽÏNo‡ÜÙÆ rÈ?7]¨³Á«A¹£œ“m;ÙRB¹£ûäÚÝ>¹.ŠlrUa“û¬0È2)r§OŸ¹ÁÂ#wäH¹ïBÈWNñ—;®*/ÜqÕ+Â$W &¹£r˜É$wü“ÜW¹mëBØw”ŒØö¸£Ù×¥°=î»2Æ%8µ1®ÛÉÆ¸"SaŒëŽdcÜ(¡Œqe,ƸòõcÜQ‰ôlŒûºk[âÆtÆœq¹å<Œq]6Æn ®ÚïóÇýýOÃÇü´WÙ§þKÕŸþ¿ÇŸýë«.×ã¿þï§ß}î {|H<}¢þbQ¾‹BÛ5ëö~á=ÿ¯yŒµëïq§¯¿ååû·¾þº¿®~þõC¯}ì×½?ÿþ¡×?ëûÑ⯿µéÔýóêç_?ôÚg}?¯þüû לñ÷¿RŽµÆ§>E˜û½Ö—ür—m«ýëÏOïíûÿ–^èŸùø¯¿ë#úßü÷ÇÏÿùÓ¿ýü•×Ù†V9Û|¬ýf¿úJËw\iî51õ)èÜ¿+¿þJ›¯ôOÿ>~Œõv|üü»oºÃþ±Òƒk-V sÿÂë7|»n­ýô+ÿòÏüø—ýÿûøCÿ¿Å÷ûFèëµð7î´ïWÁ¡yŸ°ìüao¤>£¸]{úQoÃívíúé746QÿZ\}úÂÕD½?zöv¿úUóçØÒ¿f†ýkÇ–>šUÙúß­ÆéZîÙ—Ö§¶ó}L>žË7q™¹2}õO¼­O·ï—9^—™þÊë,ýçœ{•÷ë,¿r%¯ó®Qûqì×øXjñqïŸ-Ó¸ìy‰ú¿óÿóûÿýå—ßüùÏ¿ùíoû›_þôÇýÝ`üñUÛåjö±`wÞÀV ¨=í¸Kz%K¤}‡Íd©-˜l¦Ç®uÄÍOv¸í›<Áö}+z}í >› ½bïåŽmhãÊZ—³‘B½ØÔ¸_>1%ü çá~mƯTôj]þM×l"Õÿ=íV´çºb°?Ð^íc˜¹=Y E+¼y H{SeÞ<†&'ÚbY±Ëy`%ë=††Á+×}ŽÆàa­Õú­D³Ç8ÐWDXûÈÝþ —Ô+Ù”4üÇ8Ù‡ -{Œ3Í‚n7>Æ…–Ú”zŒÚd¯}C®˜™¼í7z€-æKÙ_ö0ÉquÔîÃ|ó}R‚TLÁú¤ÞzJÕuÔDçFGÎPŽ>A‚?›OžÈîpþDÃL™8ëA7»‰B惺ÝX„m¤Œv\ÛDG\ùTL^•§*°¾$à%7Q¦rlòz’Sö±­t`‘mç±!ýíÎ6zCºâ·+ÇÒY”kFyl;ü•&Šz§¥ýÉÈx?&©¥žYŸ¿GÙÛHû49‡ý“A¾ô,{›é62a>´…n«®F,äðKkŽìktû8]ZÑЭqvªIýß<,NÿPöƒôį&¥ò›éý!&¥˜ÐõÏ!LJå¥ß«Š“Rwµþ9„I©±Ç”N³†=ÜÎQíûÊ9©[yß8'U¿^>ÎIÝö†9i”rÇœT9 úk…sRÙµ÷šâœÔ Ù?‡0'UŽ„^Z'xýˆÉ¨‹~ÈWéŽþ„ɨ[íX9ÕGóÑ¿ƒäÚÏùã±q2ê^x4NFý =ÖEìÊÝÿàdTµTOòñq19ïÁ‘óÑ‘Æãõ'¤#?~{pބ׻¬ÎL•á bj*“û\=7Åì¾7NNW<®=Öœ‹Â·‰!nù;0MU·=c;GVÌaÇ>j9× &€=8r¦ª´=8qªª§¨gÎU•›ë b²ªô%=¸|šä¥WNW™‰¤Ç6NW•b¥'¬Js1c³î+ó{ô_奜!ãp.Ì¿;²¼î{päÔ5ê{9y*­úkÜ•kwΜ¿Fç˜ ëô{Ÿ[zî\9ƒÕèÔƒ•Ϩ?%ý+þ §Ý8‡Unš,‰Y?r÷ÙÐË'Úfgq¥(Ãè1%uïÿ½­û£bÛúúÙ<¦ý˜áx|ï±ç‰3ÙÅÆ\l¶7â6¬·˜å÷]­6j©ñöIÕƒ«'³ê‡ýËiíÅ[jñíÖõô+‰‡Ç’ô»^ÆgßÉãlØþð}Ûâ^yˆÿÂn8sÝ>“ø”ëöÓ¨ˆëR˜\w˜âFXw·Ç°n;¤OÖí±‡FF@·DŒðhÑm½øMtÛþ9Ñ­r¹è¶“)ݶNt›Ý"m·eÿFÝ&PÝŒ‘è6í4Ñ͉nkÒD‰èÚÏD·ÑÝ@·Ñ=Ü<·¥É¤<· ¢èÖÖB$“ЭØòè¶s¢$ŽKµmrÜfb)ŽÛVvs\¯‚˜ãÖ9ñM*Žk?s\Ûÿ™ã6NM„q½]W×>yƸöå3Ƶåˆ1®òŒqûqœ½ ã¶ÀªÄ¸=†ñзÎyõcÜö$†â·y2òÛfö'~ÛLBü¶™?Šß¶E$â·mákÈü–Ÿ®Éoû/§èâ·qNaÜfX(ŒÛ‚ ËXÏ8–7OIŠÛ„ëDq›; æ6ƒFÂܨ™0·È æFL0·”%ÌmAB s›¡¢`n›µ|#˜ÛcxæfYsÛÌ‘Î4·™¾‰æ6å{ÑÜ:ÛeÔlšÛÌëÄtÝÄt[TB]÷1Ý<ŒP7J"¨Û áuÛå6¥Å1Ëmæ¯D¹MÉxt³ºÍPND7“]†Ñ’ˆè’Ž%Ñm>It›qžˆn–…D·A&ÑmaIt›Œˆnžó¦0Ñmv"ºíWˆn ¸I¢Û±t£äºMøNT7î@T·™‹‰êæê6aSݸ@Ýf´D¨å'Óm7Ét£ü"»uN‘ݦäŽ&»ÍÈ_d7#ÚÍãˆv£¾„v[d¢Ý¼w¢Ý6jšòD»y„¬>òºÕÄx£ðb¼ñÔ‹ñFãÍãÈx[U™äøy›ž!oGÈÛ„èyÛÈÔ‚¼M”Í7J"ÈÇ ò6£hAÞÆü¬F½qB½-°,Qo…¨·\™ô61“Þ8Œ 7;Ao{¡kY¹é_ámJš„÷v$ o3‹àUíßmÊU|÷yv[KÝ–|–`7 I°›1z¼' ×ÍÈu³ Âºyãº-h±°®ÇÉÀº˜ëÞ®I¬{»&±îíHrݺϋž×½U¹îíš°›n‹¬8Fd7ÙmJìd7oBd÷v$ÉîíH’Ý–šd÷v+$»í×ÈîíH’Ý ^3B£ÝÛ@»YV¡Ý¬Þ–t“€7ÛD„7»‡ïíš2{ÌÒñæm ñÞŽ$âÍjâõ++¯ç(Äëß¾o?Ý·:Ÿ…†(á ^æN ¯d§¡á}‚Y©w“R©w›r„X½»É0ÎêÝUšMËwgÔY¾;‹b önJq+Ø»y“%ao/åô¸gb^NÜónÃëÞþ‰ÀKðökÉ’ÝC™Öx7å/6à­»…XVŠ]9˜ïnÚR*¾ÛCøh7àí_ƒDýOÀk«<ÞM™ƒ xól¼ë¡éœHoa:'Ð[†!úô®»E¹½ën1/AïºÐÁ wmĽ«Ü zëœTå‚ô®_½k3Ì%èí'”r˜ wݬŸ%èíçäŠ@o?çCõ#Äk1»ïºYKÄÛÏ&5.¯…î&½ýœw‰î:›ñ®«HÛñ®«Ð£oåi'&â%ÄÛcDˆB¼–ÙñZòoÄ».Â=B¼uÎÏo™èÈ’ñVg»>,„x×É—Œ·NIZ ÆÛC¤d¼ýŒ¯sFÆ[-J®JÆ»N\4ãž Æ[®C w˳àn´šànQ -¸ÛÏIi¥àîéS$ð{M««ò¦p·né3¸ë¶Û‡HpwJw×Y+¢»ÄIw×QÄOt·ÎIîJº»ÎœŸó–Y#æÍs’óF?&ç]'trÞê«@`â¼=Æ·…8ïúà™Ä»õÏQù»ÞVc¼»Nê»yñî:I<(¼»ŽF¸Ä»ñ( ïVYÐï„wëz×=wïöߧ»qË»ю»1²ˆïÒ—,ùn•0S€7#àŽ#ÀÕ!À[ç¼QñÝu´þ€7†%Þ¬0Þ:#Þ%"¼n*°»Nðb2×e—M®[%AÅuë” dâºÞüe®ëjÖ­Ëa¨Ö]G „Iu£ÁžT÷Y „¹ë¨Å$ÁÜu¶l•47ÞQ¢¹1r‰æFÛŠæö—Ds×YÂUÑܘ´ˆæÖ{!š[£ï4÷tþ»ž0ÒÜ蹤¹ë`ÐKšÛcû¢¹ÞQhš«=€†¹ë¨µ ÑÜu° –4—ÃuÒܺqìÍ]5ºp®óW箃É2yn4ƒçÖ=£E…u]WuÝu«ˆÄWº.GpÝuxáYÝÊïIú$¢[·¸SÔÁ,•\×y`ƒë®ƒ3¹®kÙ`wŒ­vëœ×¨d7ƒ"»NKd×md7Ë*²[§¥ Td×O_ÝA$^|‹7AvëQ¢\d×óÝ »}¨•öIv=€Ù­«5ôD‘]?AvýxÙõæã »·ÓΞÀ‚"Ù­÷ÌSK¦›m(¦ëÑ.˜nµÄ~#¦›0™îí@0Ýê©\´ÓÍ“ŠéfkˆéV—¢²XL7«@L7*1]¥ÁtcxÒ½–H7{±î:HÒÍšœ}/Ò½òýß ÏÝ®yÊçr1Ê8w³‡pnM\¯; î$ìh¨k(¨»>\ÎMsW9t›æ®J„`š[ŒÐf +çt6c˜Ùz6c3}©…Ã-¦©³èíà¹3ÅÇ&»Ãçd×0Ì<„ƒ¼ˆv#—·ØnIRf D´IÃmưóÛ)Ì;WÈŒaà /éfåŰ[+/©¸Ã‹!b³SÅaØ‹a×7½$R/†ÝH˜^ «Ð‚a·øX j¬°`PRö°`8D5Œt+]GOžeÏ0yò,D* iüÂÁ&»J ìŒw«{ÛsîL›ãÝásïrÎïêÄtWÄ »þ1ÝÕstןQ¦»þæ1ÝÕx×߯»qΦ‰3¿Ú„wGÉwg}ñ$ÞYÞ5Ø3Þ5H2ÞÄôMyÍÊMy'v.CÞA»- y#&ÈëoYC^7 òO6-ºëÇÌx÷y€¹®Ñ\Wó¢àºšPݸ.Ls]M¶ƒëú#:¹.ņæºÏ™v]q]ƒ*]ùÐÐ5ê3Ð5¿7Ùõq&»bX»Ff».f‚]jÝ v#`—ð+ÀîÇx.aê‹ç7Ï6Nž‹+y.ßSÉsy7æ¹ÁWƒçŠËŠçë žK¢Ÿ<—Ó'†2Ýo2ÙÅŽ…º[Ü4Ñ Ù¬ˆîÚ‚è:‡‹‰î` ¯dº—˜ì¾hoØëR]l¢«¤„Atwe³ÑÝ95Ñå$ütñŽ3Ð i ;Zš@¶Ââ¹æ{Ás'ÛÏÅnž[´¸…ä¹ÒãÏE­ç¶/Iu«ðÔÏýTªë¬Ö7ž+ˆ@—ßêº:gÝ&Ö’@—,Á@wÓçŒnßL Ëy¥î¦¹œ€n‹ÙUýßÌuñ¡c°»é;Â`7bòÖµs°½u7Ú=…·®Á`w㇯¹n³XÖº›™ª¸®Í6l­»q"û²ÖÝ‚嬫ucÝÍx\ƺ›>l¬»ª+ÛXWV%á¯ë.bÝÕj_ùëÚÓÙþºQNùë®ú€µÁî¦Ï'ùì6é‰ä³» WÙg×.ØöÙõgŸ]ûKÛgWƒMøìÊø%|vWmk·Ï®[Á>»»ºòÙõ€b£]¯HÙhw—Vûe´Û´]ÖF»^‹²Ý®ÛÛ~»›pe¸ë»¶áîö¹9Côu;în–Ëqw·‚wÝ:vÜÝ„ôì¸ëgK¾»¶Ù‘ïî*Yw7‘9ïúÎm¼ëçÀƻ맴wYÓ[fenXûînöÖ•ïîú¹Œ7“ï®+Ú¾»ò¦zøîzH¶ïnœJ¾»®)ûîz\³ï®ûrúîò–í»ëQÁî»Q¹ïÚÉÞî»6Ç·ûn”Sî»vc’û®:‰Ìw£2ß]Ee¾»Ë^V¼n;;ð®Ÿëy—Õ„V¼wïÛòØž»ëç˜7ú§=w£`òÜõ+ãå¹ë'Èæ»~òì¾Û´°h÷ÝFþœ×ö]½â z›–b zýÊ6éõgÔkë2³^[‰Ù~×5)û]wDÙﮦ߇g­|8ÍzÝÌz[¸/`Öº?;–!¯ÌõòÚóß×ãº!¯»‰!o³Rwó„õYBá]O¼EÞ]MdŒwW£1ãÝÕêNã]ÝqïÊ/ñ®œ„¼«Á“!ïÆ¥À4ÞÍÒ†ñ.ºr:ïjÄçÝÕIλr‹tÞ]͘켻†¨vôÜy:ïnfLvÞÂÚy7O+ç]ÍgÒyW¯±´Þ]ËÊz7O+ëÝ >½w£KÚ|WãGšïF­Ú|w †*ó]~Ï¥÷n”ÇÞ»ÑÑí½]ÀÞ»YXyïFCÚ{w·‡½w›E°2ßåü ¼wã‚öÞ]?ç½ÜÒ€7ú†x£?Ú7Z˼YXYð6Ãu[ð®få¶àÕÌ1-xõ^N ÞèÉó‘÷þ½àwø>ð{uç›×4ÍÆ ›“ɘÁþªâ¾m3;• ¯žpá]í+/^ ØáÅëqöâ]Í™åÅ»Úx‚^¼j^{ñ®6ƒïz1ÀÞlµE„\x³ ¯M3ì«4[áÂÇÉ…W­áÂ{‹ÁÞlµñƒlxåÐ>¼{úð.|(Çw±ØX>¼“¯¬Øw1Õ!ï’|þfö¥µ!oÄdÈëb6ä˜ yíYkCÞ¸yã0:òÆÕäȱpä•{®y#AZ8òê¸täåqáÈ«ã‘W±pääi2æý4±ZÆÂ˜Wå|óê€0æÕa̫㘗1óF,Œy¥³•1oƼƒ×YÄäÌ»ˆ#Ú™wù\×›ç¤5oœ’Ö¼q”¬yýÉkk^ºÚš7X­¬y#&k^×´­y]N[óFLÖ¼¾u[óFLÖ¼ËçÂÞŒ=­y£ô²æ#dÍëRØ›×ÇÙ›×׳9oÄdÎ1™óÆõdÎ1¹ôÞbûgìmz#F›Þ(&mzã(ÙôÞbÇgìmz#&›ÞˆÉ¦7b²éuËÚ¦7b²ésʦ7b²é˜lz#ö….›ÞˆÉ¦×1ÛôFL6½“O¯[Û>½qœŒz#&£ÞˆÉ¨7b2êuÓØ¨7Ž£SoF§Þ8JV½žiت÷Ã\6b²êõ,ËV½›=—•ÝÃâ¹lP_Íe?%ÂÍdÑV½N_k«^g0xYõƯ²ê½Å0— ¾)¯Þõs寿«7b³“K蜳“Kè¸ÅÉ%[\BçÜœ\k¶êÃhÕW£U¯O(§^…lÔë&°QoÄdÔëŽ`§Þ8NN½“SoÄäÔדUo'«Þ8N^½qÜë7eïê ì¶ì½1•]-à´eoåÙ»zG½M{™Ù´×üÊÖ½YXY÷æ%eÝ›…•woiÞ›1š÷ÞcšÔª¬vïÍàèi­ƒ“絿âÞ›Gʽ7ƒrïÍ Ü{3(÷^sC›÷faeÞ›Á§yo4£Í{ã›÷®a· ó^ÓÞ½y ¼{3(ïÞ Ê»7ƒòîÍ ¼{£ìÝ€WÖ½yVZ÷fŒÖ½³uï¯á,©{ã@÷æeÜÊ·7”oo(ÛÞ> stream xœÅK¯å8v¥çñ+îÐìk=HQ‚ቻ€l¯ó±ÔR–ú2ÅÿÍ?·uÝ×õe.åe)ók›ËZeùéÓ‡ü'óËÿúMüÇÏþsY_–—õwñãïã—ß~X^þ÷‡éu+5ËÿßóË¿}Ø[{ݧã8ZÝ^ò£NÓ¼,ó˧ˆí¯Û±:/+¯ë2MS[——;^˱eÛ#¶¼N­ÇöéuY#6-ë˾í¯G;Ž}*íŒÍ¯S‰?[;"Ö^Û>M˱>C{P]k„êë<=ÿÈØòºmñçrÌ+¯Çu¯å:þ-¯í:wÆÊë²ÇÝíS^ÊŒÓÕ×Ç׸’zð*ëëÁ¶ÎåŒ-‹Ä¶¨óˆMq¶Ú^7½ÊöZ×Ç 2´–øïmßÎÐÞÿÜkÜÜU±;^ç¨ö¥”ùŒÕ9êòhí;¶ˆÍGž®¼Æ1ÿ0bÇôÚ¢Ú磮g¹¬“³b36¿Ö¸×¹Myw+žÝ±¼®GÄÊVϘÞù±¾æ¿<›À^ã‰ÄA¦²Ÿ}”×麴½.Z™Ú¯3ì5®k~^sÆêë¶>r²B{WY›KÜlWy%«<Ľî÷ž!é g±½÷Îuéf±½ñÛû°ÊCF‡ï£ê¡¯¶c-}À÷bµ×l†îa4cÑíû¨ºë1cÇ=ªZË\[ŽÞHÖèò}TtT=ÖxN}Túi#VâÏ& ö–ØqªYW2r¦p¹GÕˆÉ+ï(K©ækôÁ2=þTYW¥ô·t†Š^Itø{L˜v»_Hg_ý‚ßΠäfG‰™ý5•ÝvNóªÙ›òÏÆô¸¦²[%:ËæŠ½dZœÁc¾¦²Ù\6¡G_²eïäŸwGl“ú‰ã½®ë=•ÅbbgÅÞXê:ƒG•©¬¬`DpQð½?~[_k“)¬Ò¦èaJ¼‰F¢¿>þ<ç°U×û#¸)ó&È`¡k»ê`›RïbwÊp½ç°‹.{FðxpïH0~¶« ×Y—TΠïm8þÏë¶_m¸Nºò±Eá·d¹œÁhÓÕ†s ]5XOº{Çè2ݬ‚Á3KeÖ[‰)ýcÊ®Kl£$ï3þñc²¤¶ÅÞ(¼ìèªó|t^(ؼ„œ¼qLÙUb‚ 4 ]—;ƒŠgóÙ3gë$øîtgp*7‚WÆV‹gèë8jû0¼ØÕVáâeÑ”ƒnÂÅOE«‡mÂÅ£¤®TÌ¡ûD+JbØ Ù¹x>déI2£÷c‡Uƒ›èÞl‰ò’Ï`×½¡VuÄ[b–Öuo¶‹EoeÝ»šÒÁCto^­è×e™ºîS¢jc&vëÞ¬­ÙeÝë׳´.€ý/«à(©ªc‰‰XÀQRçËRE%«(à¬vQ˲‰Ö<â3ØDç9ed[b"ÖðPRžîxµ‡Hà(©Ó†e=»ÃYqò:\b&Ö%pÎWä}¸ÄL¬K`ïà1ë8Jêä&ƒ]{ÝÄ<­Kàu‡b]b*Ö%ðºc·ÄT¬Kàª&_b*Ö…°×jüã.„ׂ!ƒ]{•g?½ä¼OíùÎÚzc™””óÖÁyv™!,ù6Û¥-ë *ƒ]gIyw,9뙤-ã>rÎÛä µä4û#xèø’¢È ³§ : ó°’ì/ˆ˜“uGÕ×;×"¨©>K‚¨{TÎ;Ñþ\g¿ÆoZ©¸°óüW¾4ð¼Ïý 9®—Í· žó+?™¶<‡ö<€¬~þ& ÚîM´‡”@ûÕƒ@Ûm@{Lùs«\«€nU?¾5=@gƒUeí­z*€n•êZ?eüØt«”îЭ‚q@ûs€¶'þì• þìõþ<”Tþ<Ü ‚衤â衤âh¯Tàè¡öG[AàèŒ*+Ž *޶GBÝYÖRȣߊ"ö òèV11$¶Ê#¶ú!nE©ôP J¥½äKû=K{K¿E\?ƒ¥‡s*–‚Š¥½R¥‡snÄß”Fs#ŒjKatu)0º1é0Ú« 0Úo0Úï 0z(©0z(©0Úë0z*Œ‚ £³t0z¨<…ÑoÁòŸ0Úk0z*ŒÎ*¿ŒΦ0:ƒºÚA™˜FûFA…Ñí(Œö*Œ‚ £ý>Fû)•F1¥Ñöê ¶×iôp9J£½ÚA£ýi‚F%•FGK= ÑÃaGû£ŽnLI#Žöꫳwªoûhñk]êiyÐõ™aбƒC7& ‘C‡Ö×9tã «qè™§Œž”^‹f¾ Y´Ç”EÏbdÑ'ËdÑç{`Ñ–Fˆ¢·Ã˜±¢èí0¸«(Úòʉ¢7.Eo\$ŠÞ¸œG½5Ò; è!ø@Ññ3PPtA›Eo\6"ŠÞ¸lDób­U è")(Zýd>:ŠÞ6KfV½UÖ*P´Nñ?:ŠNï›wÓ ·J§$z('$z«È'ˆŽp;@t55 : J­ š•¢Ó¢hWAt”dⵂè,‰ôiÑQˆ¶O$¢‡ ‚è´CÔ질Ž_Ÿik@Ðþ ·eÈî‚{[X×`ÐyXe~`ÐyXí`ÐÔ¤= èᔊ Ó!R tU;ôPR4-H ¡Õ/ê£Qh¯Pè<¨6bPhZV…Πt @h)ƒ¶j‚.Uôp©Š ½Z 3¨äÚ¯G ´Wù“@[èŒ=~Uòœ¢=äÙj à¹N‰óp8%ÎÞˆ@œ#Èn%ÎCP‰s10ƒ8gYÓJœé˜Jâì1%Îé|ªïPg{δL5àìÕàì÷àì À9Ï©è ÀÙ/Vy³·1ðæ¡z”7%•7{‡HÞ<þ&ê×ë¼Ù{À³× ÈóÝ€Áœ}¼s¦û®Ag"·ÅR•:gI]tÎsêR ³¿}”>ç7‚º¤¦øyˆ)‚ÊŸû;è9¯R•ØóV¨ÁžÕð£³ç­p] ìy[ ;ÁžýQ=Ûç“dÏD:س×Í“=÷Zuö‹u¥Ô9û¾*&`ç,©¢ØÙô$¨óV¸ êA¬ ƒ:ç Ê4 3½¥ :{ótve¥ÐÙ: ³uRBg‡’ ùº#s¶¡„Ìy[˜®æœ}TaÁœ‡»Tæl½›ÌÙF2gëDΦŸÈÙJ€8o ŒñHœý:@œ·‘8% 0KVÀúˆAœýƒ8û“zJ*{ö'ölìÙ8°ç7b»ŽÃH÷{ö¶öì7ölS ²gÔ`ÏÃa7Áúس½ÂÉžíåOölR‹ìÙŸæ“=[[zö~ ôì-èÙkèÙë èÙ”ѳW*ÐópÎFQŒzöþô<ŠbŒŠž½‚={zö3=û=û3zöšzö†ôl¯¢g²ž£Ïê’7Ñs5 ƒèYý‘?:zöª­³ßÞ·¢çéFÏsüû¯âÎÝòû ìlœü™=“?oüR…zã§3æÃÑh)IâIóáhÈØ2¦Ó›GuF-ò›’6y>´á0šWE8´£=rhžÒ@ta†A´Á2‚èb–•"\b Df,D¯FÆ¢ÙÉ DÛ|éÑì^¢M¤D¼$ˆ¶Z%ˆvî ½~Dæ¿D¿ÅŸ‹¹{lÔà4Ì0þLB¿Sƒ“yïÐະþl÷F½2G…ze*´‘èy°äÏF9ÈŸgCÌàφÈŸ—!×ü-ð<__Š9¥z"ç™k*ÏÓ<£å9xÆCð<&ù9ëQ—È<33ÚÈs½~ÁmUïÈ«XDÎ3f9óqó@Ά­9³ !gT ‘óÌ1Û™3}0 :£JI~QîÔ™WkÔÝÔy¶dk£Î,iØ™W{cgzHwÆu:~&»~¾{ÉsÿèÙîÊÑ3¾v zž¹ÈDô<óÝ@ô<[Æ7Ðól @Ï~X¢g,7†Ï~X8?ÌÜ£óóAŽ@ççƒìÎÏ;§Xt~Þ©éü¼s–Nçç"ôr~ÞÍÎÏ;¡7Ÿ¿í¤óóNÐNççF« ™;0ºñsãwˆÍÃõTîÈÃ*qöëQàœ~àß œÝ,\óâÌ -8s×P#Τå±ç "7Y‰óÁ.Á¹·¥!g¯; g«;gÛ6Äùé¥ÞìÏÔÙÏtBg¿p@ç!¨ÐÙ«Ðy(©Ð9kÎË ¹¬AgnNkÐy¸ eÏÜEרópµÊž½ƒ=c#]¢gxŠžßˆuÙÍM< =%=AEÏõ6tîj»ÿ†Í»yÍÖ†À™scRä!+g¾Î´sãn¬¦/_Tùù`²] X²Ÿ`É×Á$ûÁ”&ÛCLæÑ“ý wéF5ƒ%—߸O7ƒÊ’‡Ã K¶Á’½ˆòAÙØÐjDÙ;ˆ²?åá°J”‡’Ê•‡snÜ´/9Ð塤âe—2ÀËL¼Ì]æ /§jÒ‡¼œ·§0 xÙZ#è²ßè276ºìÕ ºì=x™;²^ö–¼üFðБ•W+xÙoSñ²7rðe?#øòpTåËCpŶ&TÊ<\R桤Rfïè ÌÃaU.,EvŠ\Äj\ô ™¹u´AæÀ°lVÈœ»ÕÃ[y¡ÆÅ‚7 ³wqPfE3çÕ‚z+gö®ÎìMœÙõ?8³O+À™‡ÎlwÐl7BÐ<ÛëûÝÔf»V‚æáz4‡UÐl5KÐl-ˆ yî¹vNÍöLšíi>A³µ+‚f š‡’ ší4%4ûÝ4AÍñaÒ´‚æ!ÐÒì¤ÙïÈÙ4‘óPR‘ópµûûb™ÈÙƒ@Î)Œµúêó·^kߊœ§oDÎ{g#rnHç·äfz®š¹ÆfYȰ؀‘§Yll¸Ìbƒ>–f±A¯F³ØØÐ¾Ìcc3ßf˜lÐQ&›åw°¶cÒïdÞvÀ¿3½íè’<ÓÜ;»ÑðÙK®t·ûŒá³aøL 3|ö’0|ö`£»Ýg Ÿ=x\îv Ÿ4|ƒânWù! Ÿ=¸ÐÞŽÁ•övºAýž½`¡» VšÛ=¿£¤Ñ³¯ÑÖÇÛéjg15µC耧†hòl±™ŽvÏ«§»³YiewYibwý\h_wý\i\wý¼Ñ²îú¹Ñ¬®ÿ ›:](¢o³ÅާIþH׿ûôkÖŸáK‡#-´¥Cl¥+b…¦tˆUzÒYL,é,¶Ñš±Fk:Äv:Ó]·­öÌ,{fÛ[öÌœiI‡ƒ.t¤Cl¥!b…~tÇËÛi>Çàó9n¤7Á|îý-íj€“=¶ÐzŽ]i=‡‚…ÎsˆUÏYìxc4¥û²i4œCl§ßÜu¸ƒNs×yÓpÚ-[ÐnÙ .Ü ›FÇ+Màà÷êÆË„ñ2Öã@‡‹5çe\,Œ—½ Œ—Çà1¿×ZáÀl1X0{Ì<˜4f½÷`¦ï„™0£ y0#f̼š"•ÁF‘êAˆTÍ‚™A³`fòmÁ —s`ö¤ehT\(Q/o ó^æñÌ{™Aó^ö Ô)ƒæ½Ì y/3hÞË š÷²wÆ[÷^f̼—4ïeÍ{Ù·ûƒ`}/@küƒ÷2ƒæ½ÌÚ÷2ƒæ½ìAˆZÍ{N×õ2~4çeÍ—=…Ë 9/3hÎË šó2ƒæ¼ìAÈ\Í‚™Aó`fÐ<˜4fÆæwïàÁÌ y03hÌ š3ƒõ}Í;x03¸½¯yfú„˜3ƒæÁìAÊ^ìÂw{0ógó`fpy_÷Ì,iÌïo8̓™Aó`fÐ<˜yµæÁ¬‹ÿƒ3kÌš3cô`fÌ<˜=Ì y03hÌ š3ƒæÁìAha̓™Aó`Æ3qf”tf‡^þm¤ø—z0ËVu#/vê ^\ð]•ñâÆ‹g³]0žÀY-Ey7Fýåft˜Ì04×R”i F7ŒÕ2±˜ÑÑÎn“9SÀ cº¤áWe'ÓÙÉÍ8raÞí5lùÌNÞÌsÙÉ™\-uÙÉ…ÞjWvrE²““ Sa™œ¼Ú6}HN.|LNö ²“WÞ=³“ Ó0™ì‡EvòÊô+f'¯fÖŒôä…Ûw1=ya²Ò“×§Åò’Wæ¥3/yå§ÜÌK^ÍyÉ 3Z˜—¼0yÉD^òÂ+æ%/Ÿ±bÎ’Úü™˜<3ƒ—Ê?Ie†òÌŽóLQž¹¢Áå‰ß0Eyf gŠòDN¦(s'VKQž˜ØÇåÙ¢‘¢<™ÉR”'¦2Eyâ'LQžÙù™¢<[Z´æ(ÏìqÈQž™ÏåÅL.£Ò`ÒyX)Á¤‡ R&=œ™ÌU t9Ì C¡ôpAšÉ“ÐaË@¦'?~³¬d}s0)Ù¡ëì”ïÛ<—þ0½üæCt¨ûýL/.ÛzþçÇǶ嬔üÿüïø¨Uz¼ ûyy/ûùÍ‹ù&’þ•'Íü¤·ý«Ÿ4šwê¾ÇÍÞa{¿_ýötC½.áùÇw½‚²œCÓõȯ¿¾o-<êþÙî¿Ú/ÝPóëŸÄu Ï?¾ë<ëþê×_o\Ãã¨?æZöR“,5¾öR[¾Nж¼¬þáéøþ÷sü¿—þòç—ÿü›²ÿöåw/?üöÿüð¥gjçúF¼bnòåg*ßp¦£æ Y¼ÌcÈÿò3m<Ó?üëü’4ß?üákîpÙçç}| ×¶rÔe8o÷Äágþã?¾|zù§—íï^þÿ³ó~¿ËÞ_¿zã}¾+¾ï®w*Öôß掠/oœ}ùn¯Ìm<{þö·˜ë}Ÿš?Ͼ¼qöïRógûÞÆ³÷š?Ç˜Ü yÿâ1& g*×mÍá:~c ‚u‡¶éå9¾|ýг¬¹ü4•˜6ìÇx–ã:ËòËNSŽt:‹úŽÓ”Ïœ¦øiþÊ)öùõXR‘ÇÓˆ[È ›·e.»Ÿ#ÿç<Ãïþ¯OŸ~üË_~üé§Ÿ~ü´ýiÿãŸÑüòeŸFGùeí;nߟÏG›±°N¹±o½.6­¯SO…u?â5“ bsVª Õ”5`¦¬gRAŠõøU³ Êô¼f6Á‹å||åÉ2© “¡ +ÃŒ xÎcºŒÃfRÁ=Û]¹§ÅšIñ~ÌZWzB®³..æó1WŒIuyÎ —ýú­¸¥€ì¬©¢&,"4™®š¬ó&,7Ù‘Ìž×9—ê“Ì„®!&d‰`F2iÆŽ¨µs¾>3ËiÍŒ‚{Ö=sGõ5S ¢½<ÆÂyA{Yf8QóºH²Æ}Í9ù™@vþÞ‰àD ÅuI"XžMwâ6%kVã9&ná².ùby´Ý‰Šë"ŸŒÆå¸d0Dà£í¶ƒŸð¯‹Ìÿãæ-³.9ÿ/&܇kCŒFr#¥ Šk4»¨ªúhÁí(àê¼e;V,à¯k®ÖG n³"Öué˜û j#]ãHÑrÎÜ‚æ5ú½xÒv..å>=;ܤ‘±{i®í\ìX×èÕ1®ŸÏ¯í Ô!¸;çŠ[Â’E ¯1?jº=[÷ÚÏÜöÂÊŽXôçG n¹Àô8G™:lÏŸQ[1ô–h0gãm¹bÁè_KN£ŒR¦µd†ÜsØmñ¶PÊ”ÁoF‡EBZ4ÍzžÃnuÑs-}Cõx\€pk K{Ž»D=•Ö¡mu›¡ îûsÜmm \‹äÁœÕ©-lŽçð›u )=k.R\h3œ&¬™Ý0=‡ß<ìMÅŸÁ{•2ƒÙ’fÎÏá7‹ºœ®UÖ½‡«­¹î]®Æ[žï—˜«Þo¡¼L¼’r"»Ö«Ù®pº_ëÖ/sa«öf»¢¤,lå•ðŽ„ê³ý.VmG_0ÍS*÷\Ó~u«W+^žëkZL_ô1Û¢¦ë¬¹·I{üsŒ’›°ì¼BJ¢]®ìÁÒGòÆ#¯¹QÙñ|óÙižÓšF¸ð½úò¯±ésw®‰¬ÛÞGï(ÀWx÷ùsw ¬¹ÛUåyØÇ¤‡ïR{ƒ}Ö_&¸^,ýl6»<æ¶Ä`s³Ï‹nKo yj¼íÒE¸\ÃëÎÀõ̽‡×ÆaãüÆè^7¾,CÚÝëÅ”ý 2¶o2¼âÝÔZÏB8ëI{LK²Þ‡W¬Ï¯í¸ßñÃ]ÆÀ5í÷8K׈ Þ>ƒÐ&{îCr¶…ï‚;¢~÷' ÎÁwºÛ j¿ê!òxºL¹ö¶º,2´ïõN‡8Ç…MF¨¼¥KäŽ[_b8GE­µ=3µ®Áv_©wÉÔÊ q¼Q[‘ÁV“¹ãú*èyAZAGú¡ßƒíJ9{,ýÖ†!îÈä‚{°]ÙÅIÐÈoœT+M[ŒáGíj%‡$H—Ð@ó~¹Ì.Ïམ–AŒ G®ËÞcî„u“õuÙá©éøÜÇ\³bLêküç€,«¡e ý?ë˜+ !ƒwÆH^¬J‘3³myÒWN Mz§"e!}7”SéÞ#/×ãúêž÷ó îåyùV)!®ï•âñ”¡¿ºìe\™D-¤XÔŒ«“³é–½6Úgðž6æèòeˆ”`,³lºrŠ5Yn,çÄ£öÖ¬O¹Ääì^ožG÷[÷î ú{ÜB_ºÎ–¥ãYÌ’_k×½ ú^‰YÚÚ…/¥e‰YÚ܅ ]ø.ÿq]¯­ ßë!ï ï#!¥£û‹Æõ,ÂÎ-ý2ƒ‡ê`\ÏsÁ`8[¦'ßCóŠLœ²HzòÌ÷š©N2xÍYÏî*~Yr~uÍ[@¦Ü:x(™iˆ·>0/‹$#ž£’¤'gð¸upjoíy1OÛ»ž0n—5d¯¡¹]÷žÁ¥ëâ,©ïæ²fË¿‡f,™EPrΠLN2¸wÌ)FÉœƒ[Ÿé‚ ‰vËáLÔJÈœƒ.‡IÊ™yp Íí`ïÒ̃sB$o™’™·žññGÉU˜.‡g䣖Ì<èrxzü¶ôd¨³qÈ„%ƒ{{\žªõóµ;£ÎgV13|-] Ó«Ä|méZ˜B¹”T>Ïù`²wÆŽ®…§>lÁô¿Eq4¨EïZlÿóù±JâÎnQA~¥Ê·ZTt•Á£kcnvQ4[ÿœ]j¯ÊŒü[!{ßÈ”[*gƒÒ^ž|ð–ÊÕž1ž3¤[—ÊùÎ’9“>ºTΗ‰¼²ÓÀ“ á´»Gä³Ë ]+OpÝ,µ‹“UÔ–s¿Ò1Þ¡`üYRÅGîÁص²—|LJ.|Û'‰×¶ÜíKD¢æ¬×«YŽ+d†¢æ}SÔ|v‘®@Í’½üÑIsc~(Is£Œ"pÎf'ƒsc^ sC¶yscÚ$ysÌÈÀ¬À››ðfÙ ý£ãæÌ VîA°pçünTFrgû¨”Ü™V‚ÄÎf%îlN‚äÎæ$Hîœf€« žÍðÏfHðlŸf<7:<§µ ø±€g3¯!xÎ ˆµ€ç b‰`±¯ðœAMtx>ƒú•s>ƒ:ø|Æt¦­ð9ƒD©“Ú¶V}E]¹cñ³~¥}•Ï|¤]m =« Û‚ Ç`S늢9ê Ðç§è2Š˜üYLø³Âþ<…?ŸAíÆÊŸ3`¦üù *õ¨0þ*ÊŸÏ>¥«yŠ¡‡§z,¹©O‹µ9ÅÐC³ªpÿqZÞ1ôÐ|CAÅÐC«T=´­'Œ> 逼ÁÙà ŒÂè1Øaô{ü¸©››Òè¡Ñ(š›Òè1x¨Kƒ5…ÒC[T4=´EEÓożƃ¨3ˆ!eÔgPå‘2ê1¸©ÕV!ØSH=›Ú·yP uU"+¤>cªrRŸA æIÍt<(:ƒ„Êg"íð«@ê1cÚ!H=–¬b$ç1ÔgPµªBê1(:÷=Ô@©Ç @ê3¨[!õTÅ¡zè" ©½W*£ k;‡æÂ¨Ç`U3 £Ϲ©×ÐÊôeÔgP‡ò&V^²3jQAaÔ~J0ê1ø`Ôãï«ZÇxPõx-Um¼dgÔÞ‘Á¨Ç 0ê1¸«MÇ0jïæ`ÔÞ»À¨Çଖ’ìÐ`ÔÞõÀ¨ß î‡ Ìµs*xƒ«éúa<%‹zÏyPÀóÜÔÚЃž‡QHÁ³O<ž‡‘¦ê–Sð< 4Hm‚ž}H؆!äÛ¾Oüj+¼!×ù^×s¯ÙÈs[,YÐs›Îò“;L«žcÎØÞ€Îi–¢“@ç­r>è¼UÎE·•ŒÔYMa>:uÞ,‡Ô9³è•G€:§=ª|`ç>oÀ9š²Eœ³é)ppΠ”8Ûöa$ÎõsRàl{z8ÛçÅιá·N֜͌ïζѳíqEà\󜹫+xó9º(âVÞ\=#Yys8ûЙùÓÊ›£$¦£àÍI´´Ñ€7gP[1xs±ïñfs"$uŽ r›@ý¨ keŠ. óp­ 3¨ÓF@gûàÐ9ƒÚ"ÕÒê£Ago>€Îö1<¡sµ Àœ=ö@Ήµ59ç›:·rΠ> çºÕ+rާ«j ζ:‰³íÌNäìÝÌy¸…ÎÔ•@g?' ³Ý ˜smäÀœ½Å9{sΆ¬ |`Îi΀ômeε"¿„ÌÙúsúÎrΠ>0EÎy9¨EÎy#º€ä|j°ÇÏ ›kµlé'lö> Ø\+V0Ášs<ÑFÖlƒ5Û®îDÎÔQ̹> »¢æ¡€¢f5çˬTQsÞ¶] f¯ fgšS?~UÆÜU¸\7òZÀåt¤³V¸\ù Ør?—Bå<—e€ÊÙÚuÅM¡²Yˆ*k7ùèP9‚X7TÎÃ"-ú„Êy•úÄ•ïMÖ_;FÎaK‡ ÅÈyÑš,Œ<íT´ÚA‘ónuMy¸[¥È~JPd4 È®]@‘ëÎñ¹î–l»j3ƒZ1rúB"IZ1²ß'0rEŸ‚"{L!òpPì™»q•J!òÕ4Híp¤Ç&I­+ÜôØ,Hí…EzL—hÂãŒYöó óu%Oˆðx(¨ðx¸…ǵ2‘ðØ^X„Ç&! m4%<ÎÃj&à±½] MbÛxGxl ƒðØ^ڄǦù sÈÖ„bÀcŽdÇXvlÎädÇ&”ÉŽýrÀŽíržè˜Ýäx8Ì—i®NrlZä˜ÎPÇÓž pìupl/O‚ãáF”{9ögr|6`ÆV£@Æöö$8¶÷ ñ±==0ä~н„rd¿F…ÉclWy‹ŠT®ì\Ùf†ËÞ”@–½Ê.²l¯I’eñA–óªC)Èò]—Š”M×)çR¤lDÙ+DÙkDÙ›,ˆrÝŸB¢lä‡D¹>Y±‚d’½w$›²#Höî ìc@òpNÉÞÌ’íY(G6MAŽ|7 dìÈÞ8·áßJû¶Ûsüû¯ÂÇBà|³(€†/¶Ê(x—E^9ç'E6ÆJ–üBžH4€ãaB!§w¹*F 亅!».B68 ‚œSXàW%Èf¦HŽœ6Œ0îPŽl„ÙÉ%0²OÛˆ‘9 ’#sDŒl3bdÓðÄÈ oŒÜ®ìZd“üÈ@¶9²Ý¶ddù9A¦²ŸÙ©5²Í…I9_"@¶Y! ²1Dd£ï$ÈBnØéÁ²™ä!7s¼BÞ‰åˆwŽ6DÈ6Á5„ŒO²!㨎q'C~râ'>68åø˜‡2|Œû&>ö âã©þäÇ;?q~ŒTòcz´?ö ø±-‘ïüªžüØX>òa »Èár¾Ìlwfº ï|G ߟèØz¸³c€w²ãfNÊŽ½ ر$;æYFíMtdnÈe¹ñà "dkqŽ1 Bºu„Ì‚DÈXÈv”¬_ç (‘(Ùn„(ù™É ‚¼ƒ Aö£ ï&H¶¾Fžlw@žìçO¦x!O~ 4Ùè?i²A É£}Æ0L"Ûe"[%?!²Õ!²Õ.YrãMDÊöÖRö«Rö ²ŸLÙïLÙoL™”HÙ–gˆ”=¤<Àè\DÊÖ`)#5ƒHÙƒ@Ê~X0eC•ýj•ŸyÎÊ’ùï )7|CgH™×hHÙ{!åÑ+Þ©äà6HòaÖB’s¥9$É;¿'I~^"2[³äfÖ"ÈÍÌ!=€lõK€lõK€Ìi™d/ €LÑjy7 dßâÆA Žýh Ž)è sð0p¼›UÀ1ûãMŽ)g [ÿ :¶‡KtÌWš£cÆÈŽ¿?)¾°îHpýv¨ŽÅÉÈ… ±‰…9†¶Š!¶Ç@È•Á‡Ïðak€äÃփȇ­ï‘Û`póaÿ|˜¯5ãÃÖȇý°Š‰ß¢Ã ‡ý¾@‡m¨"¶¡Št˜‚Çè°“tت…t˜wBJl †”Øê“”ØK‚ÛË…”ØÚ)ñnIàĜ¡%~žJé°Ýé°5Bb«mBbŽ!Ûõ:ø6S‹Y’‹¿ÎBY¬OF.<# X¸šµ¯raÛïˆ\¸ÐJXxݹ@…sPSý <)„!ððÂq‰xx±¬NàáܨW³á€‡n3L<œ{X©ÞžÏdÏu>øðlŠàÃóÆ âÜÆYkˆ†ÊöU3 •í³W8*›e§;*Ój•U˜¥òlpöIˆãwbeåÄóô™DãÁ¤–Ê©;,•Í–œx6O>pâôVF NA@=pâÓ±^,8ñ´zUN½Æ ЧúP<GÏGË(žl ø3ô‚géÍ Šs¾£“a€bwÈ(N‡l$"+(žóÍP<­˜Eƒg9½VpâRh•OË+0±_0qÎuh&Ž’´ÑPL<-Èg ™ÄЉs‚ŠÜeÅÄÓý½qþ¬iËʇsÓIm,àÃÓX8æÖZí ÂiZ· ¥Â¹õ¥ŽL ÂÓ08 Ð~Xaðä¤Xað4{Oý_€„#Ë` áÉ>ð€‘rîúiØ÷FÂy=€ÓŠ„³^¬«Fʇ-–ÂNù0‡ÿËN9×Î;åã0O µSŽà»çÂkY°? aò@Â1”¹YEgÂ9ZÁÁY™p‚*\À„§ÊÇ&<ûÃ>÷oUþ«px²eoÀáiâ+nËþPà¶|žOÜÙð4a•l8Ÿ‰Ž10[ö‡ Hœ}\Ÿ‰Bbï• ÅÞ}@‹³ãé‹´8kV'-ögZœÇÓµÐâ¼×@‹‡ ˜-gçðVZ<ܼÒâi‚V†×òqXޱz-ûÓ,öA´xš®Ex¸,û=Àe9‚Ë{œx:à²ìC8±?zpbÀ‰½Û€g P—eÁ‰‡ `l•J—eëtY>3(V`lwr»,\¥Ëòp:EÇ÷Ð8% rÅ_9›ÚãG5V>ø‘!i± Ðf¬|0ÿ ÆÊýdÊ‹óx`·ê¨ì5^lc•CVàÅ?Í$/žf¦dÃQÙ^ztT>˜DGå|tŠàá¨l]‰àØ Àñù¨5ÉYÀqnÈ «à8§õÈåUGeopT>v3Áx8*üÆŽŽÊÇnöê¨|ì4€Pn|Z°R>v3›P+eïA°RΫD¨•òÝBᡬ¿vóäã`>­z'{ Á;ÙFpz'Ÿ›ÅK+wrV³Ýá|ðû^z'{¿„wr6[`aõNÎg#î°NÎV¢DÖÉy=šñëd{âêœìOÎÉCP“íR“MÈßÊÙñ†¬8Ù;”©´H“Mù&§,Ð' ¬<Ù" ”Mß“+ÛtŠ\95P­re€Á•c0ÄÂ!¸rÎÕ^\Ù&‹Ëœ’+Ç«†íàÊùž‚´råÉ}¢•+çûU›¸rN£µ±‚+Gƒ!¸rÜ&qµæiþŒE­Yæ( [ æ¼=åö¸y|+jž¾5Ë£ƒE5_bY˜##Œ,Ì=Êf¼He3Á£‰²ùÀÑDÙlÎh¢lAš(—!SyW“8%ÃEÙ-\”=e«Ú(·Xe3­£‹²m“De󟥋²9×ÒFÙ«6Ê~TØ(ÛQi£ìAØ(›Íîm£l;ï¸2²ÛÝF™G„²bÂF™Oƒ.Ê—Ç í“Í”öÉ„}²5 Ú'[ƒ¢}òûdÂ?Ù.ˆ.Ê„‹²¹¤ÒEÙƒpQ¶ûteÍE™·b.Ê š‹2æ¼æ¢ì1º(à Ù]”ü2eþl.Êôl6eÍE™Aº(3f.ʘ_»‹2ù¶Ù(cMÈ}”yX3R¦°)3hFÊ„‘2ƒf¤Ì )¿»‘ßhj #å1(FÊ4#eÝHY«o0RFFÊ´g6#eîogFÊD­f¤Ì’ÝH™…ÌHÙƒ0R&õ6#e–¤‘ò»»úÍP™A3Tö½ÿ`¨ü>†ö’n¨Œ’n¨Ì’f¨ŒûtCeÖ •YÒ •YÒ •Y†Ê¶k •=h†Ê<¬*CD˜¡2 º¡2 º¡2ƒf¨LsŠËP™EÌP™A3Tæeš¡2KÒP™ÍP™A3Tf†ÊcP •=Ce ÒPÙƒf¨ŒÛtCe÷É8T+³$•íQÑQÙƒpTö •Çౌ •M¹ÒQÙƒê¨\ßßõÏÕ«9*ÁbÁËQÙ‡£²á¨ìA8*WÛLP•¹Y‰9*sKsT¦>7Gåj ŽÊcP•-HGe;'•½$•=Gå1(ŽÊ„£rÕu>3T¾ö91+åþ3L”±cŠy(÷"pOæžtOæfæžìA¸'AqOöàÓ=Ù†{²ážìA¸'Áîžl7O÷dÂ=¹r5ˆîÉÕ²®ážÌ§M÷dÆhž|??Ú&[Ø&[ ®ÉƒuòÛuwÄ`¢l18)[ vÊ• C4U¶‚pVö‚ê¯Ìr´W¶ì•-{å!¶7t-ú*÷Ÿá¨l¡²ÅÔO¹ NÊVFÊ•IÏðQæ¶Læ£lAú(AñQö |”=eÂGÙƒðQƒâ£ìAø(WnoJeÂGÙƒðQƒâ£ì¤>Ê£²á£ìAø(ÛH¶=N%«öß ©‘rëcΈ¡W¦eCsMZaìL ÆfF̰Â(ÜçN«¹^Àcùœ«òÌÏÍéª< ûü‰%†Ù ›#,"éˆa_¸»% ’ÍcyÃTÙ]h†a®¡tÃ0?GºaT~J7ŒÅöÿS7Œå3ÛøÕ'E… Æly¼pÁ˜mo»ËcaÓ caæ8½0¶Oza,̘…ÆL£eXaÌÌ—¢ÆlÉѰ˜̆V“erà câ­0&6{ÐçxŸ0QYésé{ / ?§Òç,‰ þ”>ç9u)ô¹ø<ð9bÈI|Ž×&Úàs™®ð9ƒ°¢øœÐ&# Ÿ½ àsiŸÙ¯<©ëƒ9Û=9ç=ðB‘sÙ™ 䜗sfAÎyå:9—)Ä@Îegê.süTä\v.ò9Gð9¢6nOØì5 ØìŸÔ6çMÀYas±V›#ˆnØ\¸ý*Xsi\vkŽ VVÁš‹m¿ Öœ%µ}*k.›ùD+kŽàe’¬”Ù¯”9oþñë/ç c <ÅËq¬ë/gð]ïäá°‚—³‘ª­x9ŽŠ x9‚ôV¼œm\þ—óœHžV¼\ì#"àå ê’(ðrË^5ËÅ6¿XÎ^¬¯,€e¿}€e¯8€åbÛã,ç‘+X.•¶Ê•K£1¸re¿Kpe¿Kpe¿påá°'WÎ_\•+û½ƒ+ûÓW.¶‡±råb›&ƒ+çý©,WöêWöG ®\lcpåb»+nd ¬ì/`å,(ÙØ Ê9gÖÅkPåsB-C¨r>U ÊÅÒb@•ßvªèT.Õ¼0*Gн0:TJ*T¶§A¨œ‡…ë±B塤Bek;„ÊÃa*›ó*ç9ßݦ¯8ôU¶\ª%x+aæÀKœףIpàÌCgö*gÎ*@°rfoàÌ~—OÎì œ¹p+sræB¼Ìì7¯˜ÙŸ0³?E`f?£Ræ¼wøS+e.ü<™˜9õ­¼È™Sßj:=hsqસ¹Øxs1`à\:ÛVÖ\¸@HØ\lI´y8›âæb rÀÍÅ–«À›‹±hÁÍÞ¾Á›½Úœ‡ goVr.¶ªð\l!øÙšðsQÆ|“g®àÏÃ*€.¶Ôí¢ÚbàÏ~PðçÂòg¯ðgoOàÏCPùópNåÏ…iäÏþÁŸ‡Ã**Aù³·(ð眅¾—óÁŸ}8¶žü\˜duâg¶©osaþùÃôò›sžºÇCŠŸ¶ìûÙ«ó?Ûr^iþƒÇþ÷‡?| èŽg}¼ Ñzy/ÑúÍ‹ù&ºþ•'Íü¤·ý=Nú¼ÓÇNªdõ³§r¿Ïÿüã»^Áõxï'ý}ï¿?òÇ™óûÔþuþçßõ ®v~7ù7Îþ8ÞÏŸ¹Š£S/ç@ô3ÁxiŸz£myAÿüÃÓëýïç—¿¯qÍ/?|zùÏ¿™þöåw/?üöÿüð…§‰:—6æyŸ—cù+牳\'ŠëúúSÕÓ|fŽiÛÚ¾üT…§ú‡_r‘'†ïþðU÷¸ÔmßëK}ì˜Ùâ…U—áÄqQQõqêO/ÿôò§øÿä½~·‘ëû —6\ü£e*ùý^KûòÆÙ—ïöRÜÆ³çoßùqËRö÷©ùóìËgÿ.5¶ïm<{¯ùsxÉÏÅö/^޲§¡RK½Æ°v3•ó¤uÖ¦—ú#µ&W ç)&<Ç~Œ§9®Ó,¿ð<¡°×\ šã<å3ç)~ž¿rŽx1·˜Û,kL~â²Êâ½0—ÝÏ‘ÿsžá÷?ÿ×§O?þå/?þôÓO?~úóŸþüÇ?ÿ¢QùåË>O´Áiê+Zgïm’Ýq Õ´Æ;§'päʺ€ÈºLjLw}˜^—E]ç æûuY{–E;VLÞê" îXPñ k;Éf]gÇ„“·ºŠ½Akü”´æÂùm_‘_nêåÚøo[îó"˜©®¥ç+´=½®â¾æö¸ uë) yX] ¬«x:¶¶cÂX×£³àÖhYP£Üà%ƒÏ_Åh«å¢›à—hL}ñ¿5ºnÕÒÓlm²`ëkøm£#oÍUà Mæ9uõ¦Öù¦ç­ñsáZÅ/ êšY¹}š~䫲TP«ØœYk_½ÍZTîò}{ģוø±¯rç)uûÇšK¥·A®*^°â ®}Å:Û¢bºIêWÛ¥½ŸAÙÄ mO'…šÑß6?ãaŠU–7ßüû¶ظW§Ý¯2…m–+ɦu;[5ÚÌÕ¶‰yUod~%)#k\éCÝÅh8g®êÍwóåçò1bõçš UWDê^úšÔ¾‰Tmqßû’èÙĵ'î‡ø:å§Ð:¨óMÉ΂Zï‡á mç1cË×CvnÌG…[ÈU«ËF)×ÔõÞO·Š«[ëØÒ:àvCÚ,lnéÈWïFÜúß,âl6nôØöÓ‰S‰yÑVP1[z:ÞCqô}ÆÛ$®y›š–°ÍS_kÛÌ Êu›{(Þh»ºåÒLŠ<ã-ÞoÝ(J*¸ßæÖ—ÄòjWeôͺÕuØ-^pÝË'Jê¦Û"nY· 0·EìBÏêÓŠ×Üݟ·- a‹×\·×ɺ•·Ê– }D¶ºÍ5ˆ>"ÏVÙ}0Ù²;‹ ‚ì>¸… e÷Q)!»C @a¨ì>˜SNÙ½_ ‘—à>˜ÀMÁ½3ë ‚{gþ wLŠ4€‚{o°ñ¥ànSM n©ÚQpOÈ¢àåÚTÝÝèlJ݇…ăîn3Ûu7}µMwÓuÎtwÞW×dƒî†ÖwÝ­é ÔÝ÷Ät·j2êîž}ÔÝmÓ¨»ã) ¨»iªÝmº”º;N‰&ôÐÝ™YªSñ¯ÑÝú3ÝÝ0Áÿ:Ýw5u·‰U×ÝÚ+Ý]¡Ø]wëëØu÷RßVÜòŒÅ­“µAqk?·¶‹Qq«qÅ ½FŽ¡+Š[ÆAqC¸âF­RqW`€¯RܘX|‘â^‘B?(nŽ\wãi¸îÖô£AwS”»îÖ‰çÿ îÖ:ý*Ýç8èn}®»)Êÿçt·ö‘¯ÒÝhƒîÖi²ëîU«Ïu·¦i º[±Õ »õµþUº“¾ÿÝ=ý·àMFη˜·¾¸È¼ ’³¨ÀËÊxô(#Hð¹}F‚ÏL¢![Ó„(Äã¡ù×âñÞy—|ûv $ß7ÿ!ù>v“ðJ¾Š %É÷±Rø‚|g^¹J5ïc1U %Î-› ÀEóº)Ä÷ÃÈ9„ø¬XÔáûA¾ÎïÿL‡ï¬Ø§Ïãé,„j¼CQ7JXªqæÂ›çÇ+Tã!õÛSã6G¥¯”“Pã;m©ÆC•>ç¡Ôá…‹0Ðá!s©àU‡ïL¦Ï’Ú“¡Ãs‘®:ÜÏ ¾/x]ÿ>8ׄßgŽäß;ÞÓÆ¿CÜàßûsÅüàDà»±{|ï|‚OðÝØ;!À}‰MxÛX#àOÝ}z½©VÝmWNÝO_ßSÐÝ6ñ¢înܡº;íºT°@wgsT&õK%:¿€ú¶åªo›·R}GP§‰TßùAd²ªïœ êkê;-PRÕwãBTß¹˜$SSªo¯!¨o[e¡úΊ×逪ï4"}—w7nAu©ïƽž¨¾mý ê;­ª¤ûQ}§ŒRùõí§„ún×fÔÝmáâ twuæÝÝÒ´@Ú8tw›G¹íírÛ¦ð”Û͔۶\G¹w­ ¹‚ºXåv>"yeQn·ù3r»MF²Un7ÂLÊí$=ïbîìˆòéåvVWX!·sí|\åv:u8æÞà¶ÿ­r»{_Ïñï¿Jkïýu9híú>ìòŒÄ’SKÙ•b";¸.MCdg%Ud/4Ç È^|ÄG‘=Ó©Ž"{æ·V¤Ý¹;Å»´{Úô»0Òî‰mÀh÷LfMm0Œ›[T˜ÆæÊ•i쟛ÈÞ)O\dSesÂê*›Â*{5~ýTÙŽ®¡²WË@Q•mJÒ™7&ŒTÙÅø³1ïÏ$™ÌdÁPÙ®ɼ¢}2ïÆ¬æšX·6æ½r–Eæ½ñQ‘yÓëǘ÷†Õ@cÞ| Ì›‰(ÐÚo…Ì{a’µ6_¦µöY2one’{þ$óÞ˜ÄDæ½3½åɼidÌ{çÄÌ»²o ¹&ú¶sæ÷™wÅ*Œ1ïõ3Úû|ÝÊKÔ™7Rȼ­$™÷D¡Dò½°‘|[¦‚“o Œ|[JÉ73ߌ|/£äÎ̪Þ3fN¼××:jí¸: >’nOä6’QõMn}šãr›Nñ–ÇmJ†yÜ+‰Ôv.Ó€a+àn«¥˜(àÎເ»Þànΰ'î[8Šís¯?}{ƒlçÝ+ÕÙöªÚÎïÌwî‹Dîf) Hän–D ’{ nÝÇ Hî1ظÅúÁ=öÉí»wBruðÜC¥ªä>ƒï|?9T›Jîá9ªä>[€Î¨TrÍC%·oØÉ=7‰ÃBƒJnßÔ’{ ÎÜÎM©Tq÷æ¯R[~ݸÛˆ´jìáÆTcû‘ÐØ¾ $4¶ïó=7n`¶½§±}wEhlß»H5¶oO=ÔÁÃghøuã–`˜ñˆÆc‡mì®Ò½ñtçr¹.HжÄ Û’2KI·}Ae¶%«¤Û¿<éÎÏŽtv ¤ÛÖÒIºíÓ{’î\ÛÅ·°-¹>ô"㎢½qo¶0DÃ’™ùêô-™ð $ã΋†5 7ù§1n#§dÜÍ>‘ãv®nŒ›ÔXwµ/0ȸ¹HÆ]éðNÆ]+`š1îéŸÆ¸¹ ;·­¯’q¯\ã®ö!í“q¯æfÆmKƒ`Ü+[1÷‚¦cŒ{FG4ÆÍ¯lŒq/†¬»'æyvÏLÈ$ìž‰É »'¦ÅvOÄÇ„Ý_Ú„Ý‹¥‡vË_ìžß×ßívó{Âî+Z»s3(vÀn«"ï™ÙÓ@Þ¹…Ì{¹%¹KÑ»¹%ç®5ÂhŸà»l–¢à;7rQ"¬à;7$y7·¤Xú;Àwn¢üà;ãõA€ï¼­€ï—2Áwî}®à;ìu¦ ðicøNÛií´ß…ÞdßÃ9|—jT\Áw±¾ó‚ô‰|§d@*‰‚ï<,ض‚ï´ÐF*‰‚ïÓ˜[MQ|ûÕ|ë#2~¶/|(©à»TòˆgK–uÀâÿþáÿ •Ð endstream endobj 5 0 obj 21940 endobj 6 0 obj [2 0 R 4 0 R] endobj 7 0 obj << /Resources 8 0 R /Type /Page /MediaBox [0 0 1469 828] /CropBox [0 0 1469 828] /BleedBox [0 0 1469 828] /TrimBox [0 0 1469 828] /Parent 9 0 R /Contents 6 0 R >> endobj 10 0 obj << /Type /Font /Subtype /Type1 /BaseFont /Helvetica /Encoding /WinAnsiEncoding >> endobj 9 0 obj << /Type /Pages /Count 1 /Kids [7 0 R ] >> endobj 11 0 obj << /Type /Catalog /Pages 9 0 R /Lang (x-unknown) >> endobj 8 0 obj << /Font << /F1 10 0 R >> /ProcSet [/PDF /ImageB /ImageC /Text] >> endobj xref 0 12 0000000000 65535 f 0000000015 00000 n 0000000145 00000 n 0000217332 00000 n 0000217354 00000 n 0000239368 00000 n 0000239389 00000 n 0000239418 00000 n 0000239850 00000 n 0000239718 00000 n 0000239612 00000 n 0000239776 00000 n trailer << /Root 11 0 R /Info 1 0 R /ID [ ] /Size 12 >> startxref 239932 %%EOF blis-0.6.1/docs/graphs/sup/dgemm_ccc_has_nt1.png000066400000000000000000005564341360743507500215510ustar00rootroot00000000000000‰PNG  IHDRâÜJ&¡ &iCCPiccH‰••gP“YÇïó<é…@B‡PC‘*%€”Z(Ò«¨@èPElˆ¸+Šˆ4EE\•"kE ‹‚tƒ,ʺqQAYpß÷?¼ÿ™{ÏoþsæÞsÏùp ˆƒeÁË{bRºÀÛÉŽÌß(ŒŸ–ÂñôtßÕ»­Ä{ºßÏù®‘iü常¼rù)‚t ìeÖÌJOYá£ËLÿÂgWX°\à2ßXáèyìKο,ú’ãëÍ]~ )úÿ†ÿsïŠT8‚ôبÈl¦OrTzV˜ ’™¶Ò —Ëô$GÅ&D~Sðÿ•ü¥Gf§¯DnrÊ&AltL:ó5204_gñÆëK!FÿÏgE_½äzØs û¾zá•tî@úÑWOm¹¯”|:îð3™ÿz¨• €è@(U  t0–À8à|AØø $ȹ`(E`8ª@-hM œà<¸®ƒÛà.L‚—@Þ‚°¢A2¤é@F²† 7È ‚B¡h( Ê€r¡PT UAuPô tºÝ„¡‡Ð84ý }„˜ÓaXÖ‡Ù0v…}áõp4œ çÀùð^¸®‡OÂðø6< á—ð"Â@”]„p$‰BÈV¤)Gê‘V¤éCî!Bdù€Â h(&Je‰rFù¡ø¨TÔVT1ª uÕêEÝC£D¨Ïh2Z­ƒ¶@óÐèhtº]ŽnD·£¯¡‡Ñ“èw †aaÌ0Θ Lf3¦sÓ†¹ŒÄL`æ°X¬ Vk…õÀ†aÓ±ØJìIì%ìvûGÄ)áŒpޏ`\.WŽkÆ]Ä á¦p xq¼:ÞïÀo—àðÝø;øIüA‚À"X| q„„ B+áaŒð†H$ª͉^ÄXâvbññqœøD%i“¸¤Ri/é8é2é!é ™LÖ Û’ƒÉéä½ä&òUòSò{1š˜žO,Bl›XµX‡ØØ+ ž¢NáP6Pr(å”3”;”Yq¼¸†8WJ)Hq¤"¥öHµJ IÍKËIÛJGJJ·IK”aÊ8ÈÄËì—é”y"‹’Õ–õ’Í’="{MvVŽ.g)Ç—+”;-÷H–×–÷–ß,L¾_~NAQÁI!E¡RáªÂ¬"CÑV1N±Lñ¢âŒMÉZ)V©Lé’Ò ¦$“ÃL`V0{™"eyegå å:åå–ŠŸJžJ›ÊU‚*[5JµLµGU¤¦¤æ®–«Ö¢öH¯ÎVQ?¤Þ§>¯ÁÒÐØ­Ñ©1Í’fñX9¬Ö˜&YÓF3U³^ó¾F‹­¯uXë®6¬m¢£]­}GÖ1Õ‰Õ9¬3¸ ½Ê|UÒªúU£º$]Žn¦n‹î¸CÏM/O¯Sš~°þ~ý>ýÏ&   ©†.†y†Ý†iñªî¯&¯v\½mu×êׯ:Æ‘ÆGŒ˜ÐLÜMv›ô˜|253˜¶šÎ˜©™…šÕ˜²élOv1û†9ÚÜÎ|›ùyó¦é§-þ²ÔµŒ·l¶œ^ÃZ¹¦aÍ„•ŠU˜U•Кij}ÔZh£lfSoóÌVÕ6¶ÑvУʼnãœä¼²3°صÛÍs-¸[¸—í{'ûBûªƒŸC•ÃSGÇhÇG‘“‰Óf§ËÎhgWçýΣ<Ÿ×Ĺ˜¹lqéu%¹ú¸V¹>sÓv¸u»Ãî.îÜÇÖª¯MZÛé—ˆÿ2Â6¢,b&Ò*²4r*Ê*ª4j:Ú*ú@ôLŒMLyÌl,7¶*öuœs\mÜ|¼Güñø¥„€„¶D\bhâ¹$jR|Ro²brvò`ŠNJAŠ0Õ"õ`ªHà*hLƒÒÖ§u¥Ó—?Åþ ÍŒ]ã™Ö™Õ™ï³ü³ÎdKd'e÷oÒÞ´gÓTŽcÎO›Q›ù›{r•swäŽoál©Û m ßÚ³Mu[þ¶ÉíNÛOì ìˆßñ[žA^iÞÛ;»óò·çOìrÚÕR V (Ým¹»öÔ±? ìY½§rÏçˆÂ[EEåE‹Åüâ[?þXñãÒÞ¨½%¦%Göaö%íÙo³ÿD©DiNéÄ÷e̲²·7¼Yn\^{ˆp(ã°Â­¢«R­r_åbULÕpµ]u[|ÍžšùÇ‡ŽØi­U¨-ªýx4öèƒ:§ºŽzúòc˜c™Çž7ø7ôýÄþ©©Q¶±¨ñÓñ¤ãÂÞ'z›Ìšššå›KZà–Œ–™“!'ïþlÿsW«nk]£­è8•qêÅ/¡¿Œœv=Ýs†}¦õ¬úÙšvZ{aÔ±©CÔÓ)ì ê<çr®§Û²»ýW½_ŸW>_}AòBÉEÂÅü‹K—r.Í]N¹<{%úÊDÏÆžÇW¯Þïõê¸æzíÆuÇëWû8}—nXÝ8Óâæ¹[ì[·Mowô›ô·ÿfò[û€é@dz;]wÍïv®¼8d3tåžý½ë÷y÷o¯ñy02*|ñ`úaÂÃ×2-<Þ>†+|"þ¤ü©üÓúßµ~oš /ŒÛ÷?óyöx‚?ñò´?'󟓟—O)M5MMŸŸqœ¹ûb݋ɗ)/f þ”ø³æ•櫳ÙþÕ/ M¾¼^ú»øÌ›ãoßöÌyÎ=}—øna¾ð½ÌûØú>|œZÈZÄ.V|ÒúÔýÙõóØRâÒÒ?B,¾“sMT cHRMz&€„ú€èu0ê`:˜pœºQ<bKGDÿÿÿ ½§“ oFFs¸NBIE] pHYsNNÆÊ/¥tIMEã0M†™ vpAg7O£¨u€IDATxÚìyœeÿßä ÷"9&‘PC0á@Áj£Àì$®Ø½ QÙßj—°Þ‹Û%®¨Q´KP¼¦4 *tÉ% S™@˜" W*™@NB~<ýTÓsffzfò¼óêLwOU»ê©Ïó=Ú·oß> …B¡P( …B¡P( Å€2ªÚ P( …B¡P( …B¡P(†¤‹ÅªÝ„Çq°, Çqök;¾ïcYVµ§*(;~„aˆeYlÿ@µceÃ#‹žØ±ü Œ$”Oúz=‰6 ÊŽ‡ƒÙ7Vv¬Hzk˪\вã‘EµìxH qžçU» 4–eáy¦iF꾆!¾ïWûª‚²ãáGcc#†a”ØþjÇʆG=±c˲°m»ÚMíW”Oúz=‰6 ÊŽ«Í`÷•+Š¾Ø²ê—¢ìxdQ-;ýµ¯}íkCáào¾ùf|ßG×ulÛæk_ûZ‡éO=õ³fÍŠÖs]—;Y³f¡iZ´­§žzŠ;wâ8;wîD×õËöt¹®ÚüÔSO1aÂ~ðƒ°qãFŽ9æ˜nçuGgûïíôÎÚÛÝñA€eY<øàƒèºŽišAÀ)§œÒí1oܸ‘0 ±mMÓ˜5kAðÐCññŠóÎ;*.?œé/;,.¶‹òïtl¸Ò1õuzgííɹé̾:;6Ïóx衇hjjâ˜cމ¶ûñü€±ãÎl¸Ò¼‘nÇeÃÅí;îjÎìX¶×u]|ßç©§žbt úL_íXõ)׎»:®¾\5M16,ϯêw~L}™ÞU›»:fé 4Ð}ã‘v-–ç@õ+ T»Ü—ç<Õ?žýŠâ}(ýã¾\ªÙqÕ=âÂ0$‹áyAH$¢y±X ß÷ ‚€ÆÆÆÅ>‘Hà8N´¾t“õ}ŸD"eY„aH"‘ˆ¶ãû~äJÚÓå:Ã÷}R©‰D"úrdûºš×ÕyhllÄó¼è˜|ßïõô®ÚÛ“ãõ<Ã0‚ RûÓétŽ9‘HDߟÜW1ŽãJ¥Ð4­GË'úÓŽˆ»úNûbÃò< ;îʾ:;6]× ‚ Ú¿çyèºÞ¡ #ÕŽ»²a8pìx m¸7ÇÜ;î©MÛ±â8º®ÓÔÔ}ö<ÇqÐ4­äF$¿<×u ‚€ææf’É$$“I@ŠÜž$“ÉpÐAEÓzº\gø¾ÏæÍ›Ñ4 ÏóJ\È»šW ×uKŽW~ù¾ï÷jºaî£'ÇA‹Å0 ÏóÈd2ѹí긂  µµMÓH&“Ô××cšfô½:ŽC.—‹:•–—ínô§755 š Ëö”§¦iöÚ†å1 ;†®í«Ò±éº}²-ò»)þžGªwfÃòýbǃaý9æÞÚ±ÝîÊ&+Ùq"‘ ›Ív;Z9Ôé‹«>Eu츫ßg_®Ç±XlDذ<6Õ7:v<˜}ã‘r-–ǧúDÇ4úÇ}±eÕ?¾ý Ùž¥ÜÝqõ¥á„:]¾·×ãl6;blTŸB2Tìx°úÆ#éZ ª_QÌp¶eÕ?VýŠJÇ:Üú­ÓUÿ¸»uö—ª‡¦&“I\×¥±±MÓ¢“ ÕöârÉiÔõõõQÉásÞÕy7dY)¤§Ó÷]׉Çã466öúÜÊ-ò‚]¾ža$“I,Ë"™Lv»üpBÙqåóP-;†îí±œt:çy%m©´ÎHµãÎlXÎ;Pìx(Ù0ôÞŽ{ºN±—çâÎôÅŽGš W:ÃÍŽûr=>ìø@ºW:#½o<’lT¿¢«s1œlYõU¿¢ÒynýŠž®3˜v|Ð>9äReŠdnR‰„BÜny2Ä0 Kbw‡;)­½¾¿Èdž==·Ò¾©©)Нîj„¥·Ë”—Sµìxì«7ßÇH´ãr†ÓŽ«mÃÐ7û‰6ÙúbÇ#͆‹i¸Ùqy{FÒwÒTŸ¢ô˜Fzßx¤¢ú†«-·EõU¿¢Ú6ê«vt×Õ?î9ª\ÊP±ãáÒ?þóŸÿÌŠ+8î¸ãªÝ”!ÁP±Ÿ¡À¦M›˜0aÿõ_ÿÕí²CRˆ{á…8ûì³«Ý yä.\XÕ6¬Y³†5kÖT½·Þz+uuuÌŸ?¿ªíxä‘GªºÿÞ°yóæªoÊ~J ¿i€ë®»®ÚMèíííU?gÊ~†^;Ö¬YC[[[µOEhkkö3TìXµ£c;äµn(£úƪݵc¸ údžÊup¨Øñpé¿ð CÂŽ‡ CÅ~†¹\ާžzªGËI!nÒ¤I,Z´¨ÚÍàüóϯz;fÏžÍüùó«ÞÙ–ºººª¶áC©öièU[«ý½)û)e(ü¦Axè &NœÈ¬Y³†Ä9Sö3ôÚ1Ä €)S¦¨ë jG§¬ZµŠ‰'VûTt‹ê«vt…ê÷Õ?.e¨ØñpéÏž=›yóæ ‰s6*ö3xíµ×xã7z´ì↠K–,©v¨«««úÅP?®aвŸR†ÂoZÑ{”ý Ív(zÇP±cÕÅþ0T®?ªŠýAõKQvÜ;=ôЪ‡5%”ýèm¨b …B¡P( …B¡P( Å  „8…B¡P( …B¡P( …bPBœB¡P( …B¡P( …B1(!N¡P( …B¡P( …B¡”§P( …B¡P( …B¡P JˆS( …B¡P( …B¡P(%Ä) …B¡P( …B¡P(ƒ€â …B¡P( …B¡P(ŠA@ q …B¡P( …B¡P( Å  „8…B¡P( …B¡P( …bPBœB¡P( …B¡P( …B1(!N¡P( …B¡P( …B¡(# ÁóÄßþB q …B¡P( …B¡P(L@€…Uíf(Š[Á-ÄgÛ†T b11½±Q¼·,ñÙÚ¯qLµZ¡P( …B¡P( …b¤ãçÿ)Ž ]/lšV˜î8…åÒi!¶…!d³!Î4Åúº.ÖÍfÅò™Lé~¤çºâýÁË<Ý£6*!N¡P( …B¡P( …b€ññ ªÝŒƒm !, …h¦iâ}s³ÕâqH&Å<Óëù¾ðpÓ4ÈåÄ4Ó,̱^W47 ñÎuÅ6|-k×îìQ»•§P( …B¡P( …B1À(o¸ýDz„°æ8BËå ^oÅÄã¥bZ±gšbžïúŠ®CS“xÿÊ+»hkÛÕ£õ”§P( …B¡P( …B1ÀèèÕnFUq]! ¥Óâ³x@S'Ë;ŽðrK&Å_ßXy¨hoÑ«ø5¨b …B¡P( …B¡P(H±w …§J1Ͷ yÙÅ<XÑU¨J‹ á- Å«ØmH ª—(8…B¡P( …B¡P(ŠÄÃÃÀ„çâ&Ýåz) ìGe¿"‹!t·ŒÌÛ¦ëâsm­ðj»&5š8¦ ÿúH¾äŸó!§ŽS(˜ ‹%Hö'”´Ç$…8VÏ(Þqˆ—e‰é®Ësæ°êŒ3z´yå§P( …B¡P( …B1€œ~ˆM~!a$ÆS__½wó¯ãî¼ß/Í/çû>®ë2ÐHÍ DX©ôR“X–x…¡Ð¬DÛ„£XˆÐS2°o$³p© „çqàæ$¬MBˆëšš„V.Âõ;2ÖµYZÕqÄéz¡Dª\'‘óÒi‘¤.›eý§?ÝãÝ*!N¡P( …B¡P( …¢ññ K>oóâi÷øø<¼ÈËÁä’0UÏóâq‚¼òåÖêÕ´mßÎ-'À½?N"‘ÀîCXd¯ŽÁÂ["!>›¦¨jBd‹Å a£ù0RÀŒ 1-0…Ø&ÛBä‚Ëæ?‡@2¿¯$`>àéBëaX*œU² P~G$ kn‘Ɉ–ض8™L¡„aðæ”)=n¢â …B¡P( …B¡P(ú/ú0>œÉóîBZü‰n\Îq¿çˆkoo禛nbÙ²e%ÓV¬XÁ¶mÛX¸p!‹-êïÝ*ýвcÅH ÜŽ• +†êZ¬ (;VŒT¿B1L;–žnq„×”ŽŽïÃØ‰¯³Î;’mÞ;Ù¦iX†FÂ_¶Êi<ÂË“'“¬ZE;ðÉÓOgͳÏâ,\HxÏ駯«ã¡§žâ#Ï?ϯ5-´lÛ&N÷¥É\eÃŽ¦k"ê]„‘–§…“✗Ÿ§!D¸$`"DµâÈRS¾±íB¸XLì$ïqÖ”ßNù¾|„@§L™‚žHà>õÉQ£ðN: È´´`Þr >ú(éw¿[¬\\^UVˆÈåºLtçå÷"ÄÄ ÿ> $¥ŠðàsóÓŸÛµ‹ãÇ÷è÷»GÜòå˹ñÆK¦Y–E[[ X–…çy}Û¸B1H(;VŒÊíXÙ°b¸¡®ÅŠ‘€²cÅHAõ+#Á´ãâ°Tg_y…ÍçÞÅËæ¯˜j´òTóDV‡ÏñÎð4~ã¯åñ;ŽáëÇËÄ+® ãû<þ‘ÐÔÔ„¦iLúáÉ0eåJ¦/~‘'ÇãÜ–‚ C,à¹-[JÚbYVÅ0ÖDB8ɪ¦«²0¿)_Ÿ@âZ°( 1•ž`…eâ@ÒóÐ]·óž'Ä±ÆÆaŸ ¤R%oÐêû4‡!ÍgœAÓI'‘Þ»ï„ЀVÀnh ‘Lœz*^m-þc•ŠpP5-÷xËÿux)Ïã+o¼‹›ß­[Çò]»p€T*Ų'ž`í«¯rÜ'>æºÜz晼ºjUl£_=â<Ï£½½½dZKK 6làºë® гiš}Ù…B1à –WUP(ú‹r;V×bÅpCõ)#eÇŠ‘‚êW(FÕ°c?/ñhhÜûÏñ¼~îé´¸m,6_àRsyÙ@Â1p”¯‡"23Qœ©Ä›„ðäQ(‘Ì¿,„H•F<ËÊ÷YòyÝòŸ1 !²I·tºÊ"¶5™Ëç^“HU°¶Vx®ÉïCætËçe3O=•âoªàðÃáðÃq§¦†µòût‡d> •?ÎöövtàŠäÚe˘mšd³Y‚ Àr\×eÎ;ßÉ751áïgØ1rÞy|ñÃÆu]¶mÛÆœ9szdýæ×ÞÞÎ~ð®ºêª’é+W®¤¡¡!ú¼hÑ"V®\Ù_»U(ú•Á´ãXµV1b©dÇêZ¬N¨>…b$ ìX1RPý ÅH v\\„a…3 ƒÍçÔ²gþ¯pÀºäܽs'cwì “ÉÐÜÜŒQ$Tɪ©¾ï£ëzÂÙÚÚÊfÓ¤­­Û>šsøC^;–D~þã÷ÞË®‡âÞöv¾dÃÆ8ŒM mL:Œe2ÂYÌAˆnY„°f Ä5-?­‰‚ø¶Hç=0$-«J¡Mz¼Édsrž,…Z.Ây^!9LLW,ŠÆãbZ¹—[Œ ÀCxéÅò¯[_€[￟”ëò¾Gaö®]ÜvÛmh,ùÂxõøãùÍ·¾Ågœçy¤R)Î?ÿ|^{í5š››9lìXvÝr »¯¸‚††N›Æ9çœCSS?øÁ˜8qbl£ß„8˲¸êª«¨©©)™¾mÛ6fÏž}®««ëv[o¼ñ+W®ì0‚¨8pioogåÊ•lݺu@÷ÓŸv¼uëÖ./ä|ÖC iÇo¼ñƀ÷ņ·oß΋/¾¨:ÖŠÚÚÚX³f Û¶m°}ôçµxÛ¶m¬Y³†–––ê4ÅdåÊ•¼øâ‹lß¾}@¶?}ã¶¶¶êž4Å¢šýã¾Ø1tß?Vx´µµ ËþqOú:z$ÆÝ•Z«Þ˜°“ 72þ7ÿà¿õ+ì4¼°ãh~¾};‡ßÃ0ÐdU„üßÿåSŸÚ‚ç !îÄßmÏuÝH ƒ€öË/Ǹî:Œhšmóäöí‘hùÐÍ7sÌç?Ï„Y³—J .]´j𸾓J‘Íf£°QéV¼ßD"Á®]»Ø¹s'¿úÕ¯8ÿüóÑ40 ™1c®ëyÕ¹®m/GÝý÷ßOcc#º®sæ™gò•¯|…ºº:>ó™Ïpøá‡cš&?ÿùÏ9ï¼ó¢fjšÆ×¾öµ;¨ýZ5µuuu\rÉ%\tÑE,Z´Ïó¸å–[ªvÀ E_èO;ö€¹Ñ{QÆZ qŠF]‹#eÇŠ‘€²cÅH@Ù±b$0vì㣡ñ°.q&mïý)©LÈ ^‡Ît¦-\ˆGYÎ5D-ƒâZa¾²i"ííWrÐA?ç²Ë>Žï—D{—~>ôÐkˆÇ/À¶ žm¦i‹Åhjj"ž_8‘Hàû~$JÙ¶ÍÕ55\µk—ä×û™ÏððñÇÓ¼o_vÚn»‰ay‡Io7MÓ¢°ÐõëדJ¥Ãd2ÉâÅ‹Y°`õõõ|ãßओNââ‹/îPŒÂ¶m‚ `üøñ$ âñ8­­­¸®©0 #ஸâ \×åØcE×u~øÃFÛ,á†"Ä•»1.]º”X,Ɔ X¶lYÜ–Šj3vüd>¦‹‹ŽŽ‰AT Åv¬®ÅŠáˆêS(FÊŽ#Õ¯PŒÃŽCBLLþ„Ϩð-ö,‰óð®ºQO‹0éNÖw]!¬F!ÅZ®Åu7¢iÝ9îÉüiaF^~2Ô2îûBñ3 kkEñà'žààÛncÚ…äãcÃ0äòyóh¾ï>¾‹A&Ãò¿ýä±ÇB¸ó< /¼Ù³gsùå—3mÚ4’É$?þñinn& CÇaÁ‚œtÒI|æ3Ÿ©Øf]×yà8÷Üsijj"›ÍòÒK/BD4MÏó¢ïoÒ¤I%áÆP]/·Þ0àq’ºº:•óM1ìé/;Þˆñ‡$ÉýÚ¦BÑÔµX1Pv¬ (;VŒ”+FiÇÛ0x+w: /¿Œ7§ …B¡P( …B¡Ptïêl„¯OÀ0ŒA-h"Űˆ /ÃN3Èd8í¼ó¢0ÖÿyôQÞX¿˲Ðu=*” …¼Ý»w“H$°m›¹sçò‰O|‚LqB;JCC3™L¯E3MÓ:¶}Ò­×ÞÞ^í6*# ˜€¸Øih 5„„…9Âæ×U( …B¡P( ÅÐÁG‹ßûpÒ7ßbbËÄÈ»k°D&Ã0°m» þ¥R"$UbšQu‡ ‚ Ê!§ë:Û·ogÅŠ$“I4M#N3oÞ<I&EJ¥JÇ"çBD2?7ÿ¾+?¯DBÜÊ•+I§Ó´´´ÐÒÒÂé§ŸÎé§ŸÎ9çœMW(û‡L&ä¸VZKæûø¤H`cãàDÓ †GòI…B¡P( …B¡8Ð!::ÿð7_‰Ä°t:=hBU<'—Ë 5/¯årX–c!¨Åb±¨mÉd’‰'rÝuוxñÍ›7N8MÓJ*­+B aj~þ½Ôæß; 9@Œ‚'…7)¾™ùeúÀ(€¶¶6.¿ürfÏž%)L§Ó˜¦Éþð–,YBz” «ñ8†cј€ÎÁWÝ€ $äÔÇç³þMøø‘w\˜¿ ?>MYñ5œ>^ …B¡P( …BÑlÁ§v@{éÕ µô}ŠjY`Û"á\¾™L¦¤MÙl–æææ¨*id2LÓì ¶Éâ ¾ïÏ\m.B< BhKI„ç2]h2@ å—‘‡mæ§÷1+V¬`éÒ¥,[¶ Â\[[[T¾wéÒ¥¬X±‚––ª}êŠaËo±9ƒ9h´£1¥(,ÕÂÂÀàzoaª™åÀ¤Ì}L»èèĉóMo ®/JZë:„¡¸¶Ž4/`…B¡P( …B¡NxÀD^DãÕÞºûnŒrj ÃBÞ· M+xhð„K§Ejùéda×uÑ4­S¯=)ÔÕõ†“žk™²éB<ëN÷Œ#Ä´f„ æ×q€® ±f:™n‹uìZÀº®ëÑ!Œ†zÉ%—Ds¹ ÔÔÔDÓfÏž­òÅ)ûÉà`@GGà ž5ÕÑ I‘bzøt´¸ÇAÎ2Âøm88˜˜hÒƒN¹‰Ä·áè¬P( …B¡P(#wàc2Sg`B8-K[°máù–ˉ÷ Ĺ"ï7ʼÖ<ÏÃó<|ß'™LFUQššzæÞåy^u *xñ«X òÓ ºâÊ5F‚‡[_[â@žôžd«¿µG«Œ¨««+É×ÒÒ¢E‹JܰaÞI…âÀ`0.ÿ¾Ü‘7Nœ“y˜‡Y’YÅÕÉÃù÷Ì:rˆX~™#nCüZÖj«˜?ÿ 2™l^µ¸†C‘+rcc#±X ˲ª}J …B¡P( …bȰ ŸÑè¼ïØ]½¬|üîò¼ÂƒŸë  ‚›çïãø¾ø\$ÂÉç6Ó4Éd2d³YLÓ$™Lbš&ñx¼Û]†ïû„aØ÷°ÔTÙçXÖ‘p%¡´P‚Òzp ûüWüæ”7Ù5qWVÐÐÐÀ7ÞÈìÙ³¡pÞrË-ÑB+V¬`Û¶mÄ9…BÑ{îÇæ£d;ÔEÕÐø¨9—[ƒßgæ§Î„Çܽí÷¢Õh½ãhüCɨFáûgå«ïÀµ×>ÀìÙk‚€G9ŸýìѶ=Ï# Ã’8×_éæŽãDÕ}Ç! à CÂ0$Â0¤±±‘¦¦&œ¼ty k…B¡P( …B¡8PØHÀt¼³<¼q=÷† ég…ï o· áQ25“Ó]·¤@†èºŽeYѳš|ì &—u]·GÂ]EÊ«Œ†‘­+o4áí&—‘}Dž6^i…¾ò}»t~:HŒX²d mmmQA†¥K—F¹à.ºè"ÚÚڸꪫªÝÖ~£+B¡ DηÒÏöAÒ›Áȸ@š ð9ÈJÅØ³g2ÿøÇ§ Ã9sþÄsÏ„a$ Â0Ä0 ’É$º®Ge«S©a‘{eYÑo% C\×¥©© ]×ill$1M³d=…B¡P( …B¡8ð9è„á…p†¡È÷¦ë"U†§š¦æ’ÉÒˆÍ @×u’É$Žã‹ÅÈår¸®K†½®âêû>ñx¼ïB\á—Ënêé^ˆ3)-¤`ä?ƒéÒÏ´ ÂúV~“‚pWeÆÈ7Ë–-‹Š5³lÙ²ùâ†+2¬Î¶m4M#›R¨BÔ"Ü?—Éázn^kÅ0|îúÂ]ì\7žðã![·ç Žà #/àÛo½Å/þð¶lÙ–-:mm'£i¯¿¾Ï3€8£F lØâg?û;·ÜòGÞx£ M«Å÷}|ß'—ˋŢ u<GÓ4\×*æèºNmm-Éd2òª“a)l×××S__®ëd2™áYÒZ¡P( …B¡P(úÀf<Ñq¸÷Þ…˜fÏt£chjŠW±˜çûB€“˜f!?ÂqÂ÷ýÈkÍuÝH÷@ï…´ðE=ŠÏx…0Ó, ^•Ä2á­Vi?¿N<ÿ^£àÕVÉ® áe'½å¬üçÖüû¡&ÄuÆHG•=2)!·ÊIœ¨PtBñõF+Ë÷Áàrö%ÅuÔ÷ý(t×®lÛæñ{‡ÖVîxê)vN?žú³øÐ­·²¼¥…MšÆäää[þÆêÚ¹á†nºéIZZÎbݺ;8üð%œzêkÄã!±˜Ï¨Qpþù?æ÷¿¿ £5†a`Ùl6r;.3M&“%£&åóu]~SÊ;N¡P( …B¡PHlój¹Ûþ_Šx˜æu=ZO£Bˆ¨m‹NÁcMâ#„ãˆT#ÿò‚Z’B¾7£h;R“4âEÛ(v2ÌPš|Ý) c•Ë÷¬Å  qË—/çÆoDθ«®º* O®xž—ïó"Q ›ÍF^?¶m÷¸2ˆB±¿4"*¦‚ñp‹æ=âMÆ0@Ó¼Áxã¸q¸†ÁÂiÓX¥ëÜ9joþm"s'Làóï}/7Ýtí÷ÜÃ!»vñ̹Çñ»¶6N›SÇ{ß;1Ÿ à«%mؼÙÀó4Äæ/ ›½ÇCÒ‹­+º›//ø2_ô,‡€Lc0¢ÅmâñáÑV…B¡P( …B1x¼löŽÄÌ‚mßµó—Sä(ÑÛPÓé5"Ÿqd>µb!N h•´8ŸBž6¹='?ÍÊÿ•!¦"TÔÏo?$!ªéüô•…³JUOËÉPð|¢Ïl£V®\ÉŠ+¸êª«¸î:¡ØJQn8cYV$Äãqt]/© "Å…b0—ʲSn „·q&–U‹ëº8Ž#„cÃà­@]'Ìd˜ÿÄüvî\Ö„¨ê!×\x!Ç¿Å#sætšQÓ4âq±?Óƒ*mmg°fÍqØ6Ô׋—thó<ñÙ²Ä{ï- b1ñWNó¼8Ž£óøã Ûo~ˆD"àæ›âÖ[_ÄýÃqÄöƒüñÛvé¶$ŽCŸð<±n‘—v„ï‹éåÛ–í)ƲJ— ñ¹RNSu9Q( …B¡P(p´ÍÌ5ŸÅqè[~8Ï!òaL pò}Ç™ÇÛó ÏšŽã`Wzêt¿q¬‚gšEÇJ¥åë›eŸÉ¯#CDƒü6“ù—Ú4Dθ&„g BIû¼8DÂO»bÀªU«0M“%K–°hÑ"®ºêª’/q¸aYVIžªx¹©øö†XL¼§à¹ P[+¼º‡hŸ¶-îqR´íÂ}ÏóÄò®[Ú]‡æf±¼+‰Â6¤èûEEÏƒÆÆÊâ B¡P( …B¡þÔ—4Ý™VêyEÒßY®[(ÊËW6Ð4ÈW?-& C‚ Àu]á—N§;¯"j!æÉÜnI(IQ'=Õ<:†—& *ðZOiI’‚7ôb“)ú5 žqÅ ­¿ˆBS‹‹1 ×ÔD"axžGsssT¦WÆAß}·Æ[oDåt:N*•¢¹¹¹ÚMWÀxt>N!¹æëyAH†êû¾™ 5É$ŸþÅ/X~ð{yìž{q­¼÷;ßá°Ã>Öa»:¯Ûʺ•ˆÇáßÿ]¼?÷Ü_’LFX¤(&] CˆI¾/Ö ÃÒ<¡º.¦›&¼þ:üùϵlØ0ÓôX»vÏ?ï°lÙÇØ±c/¦ù ÿ÷‡sá…OâyÙl’ ˜8q_\C"QË)§LÀ4Å>;Ke…PQÓƒ ã@Q.'Ú¢Íéta›RôÓ…óaä.×Óu!ºÙ¶XOÓÄß ÂZ:-–ÏfŹ !ÎhG,&¶#«Š§R°xq•ŒP¡P( …B¡P (§hÇËyx^<âdUTèô!H†ŸA@††!D7èèhÔ™ßQ¾øBEA,  –¥(ˆjÒûäðÀÙLi.¹JôÝ»m„Òm±†áDyMÓ°m ]/xêÄãB\H&…W Ä£0À>©Õ E)Ç’Eí/…pæ™opË-ÿ@Ý~À”÷½MƒÏ}ŽY Û0MÛ¶9÷܇óÛÃÅ×5Yp¦75pÞ|óÍèw#I§…€”É”æB“¿)FAÁs¬©© €}ûÛSøûß±{÷n~ýëÓ Ãèz†+®¸“‰wqõÕ˸óΫð¼ßà8A”Ëñµ×.¦­ín¾9`çÎ8Éd†ø-Î>ûŸwÜK¤Óâf$››àÚØYJ)¬É*ßâ|ækšð`K§ÅqOq\åé¤HX¼O™ÂA^sŠ÷],h¾òÊÛ÷Ëž …B¡P( ÅÐÂ^ûâbnšùÇîÞÔw¸d²Ë„Ô2äUŠn†añ,ÃÌ?|4"B=;«HªM“"š =•n&"dT._¼lñvzY€UQ$ĵ´´°|ùò’™åŸ—-[VíövK:ŽD7§—L’O„_xâ!~Æïâû~Ï«‡(} ³ˆ7B¸"ó0±Ø÷h¸è"Ö}øÃ<1sf4¿\X’EGtt||âÄK®…qÄ Eo®…íííÜsÏ(6m:/š¦iR¬.¥RmÓì(RuÔ66mªcÊ”§Ñ´yÑMèÚkÏŠ–yå•« ‚Oâº.–e‘N§ÉdŽŽÈçË‹ñ¶·]€ë~„Ûnû 7Ýô$O=õyÞ÷¾=œpB+üt]'L\×~˲ Ky²RÏó0M3Ê£ çKo¹Îü +Õš^xå„aA¬„ŽÞyº<¢„8…B¡P( …b$“ë^dÅ­7P·èä¾i AÐ¥çyfÌdù%ËYvã2±Óòb@”¿HŠkåMÉæ—/öâ(Î §ä‘e @]]---´´´D3-ZTòy(#ójiš‰iÆ#./ä›*Îe%Ñ4?~çw*_þri˜BÑßÈ"3’Ð÷Iµx–šÆ ßùºá´+¯Œ–)/Œ#F=šÈàW÷Šsav]ß´@mm-°Ž3fôé¸4­cñ‚É“÷p¤¹¹8«Óuu]'A‡Š¬Å7.á‰÷tø¼~ýýv˜y ÊsSìžmÛ6žç¡ë:aF…[2™ ®ëF´†að·¿Ë´iuÔ××cFIbÕx<ަiX–E<Ú†!žçaFIUØl>÷AgÞy¦ wß=­OçZ¡P( …B¡P ]Æ\ú+FÝ÷¶ý×’g„£ë¥•Ieƒ"Znná#ÿõ¶<·…©ÍSÅÄòB Ò“ÍDäu+Fn3^¶mQ°¡RµRE¿2`É’%,Y²¤Úmé3©T MÓhk»×¹œ’ÉB>&×í˜ÏJ\uU;®«“NWVžeˆ™Ô‚ ·ª’¡PôDzX|Í5Ô$“˜aÈï‹D8aŽ27§düø™˜èXX·›AäÌì©7P¼øâ?Ù¸±ûb(º®“•êU'D-qWª­­r;jš†ëº˜¦ p¹\Ó4ñ<ÏóH&“lÞ¼Û¶q]—L&­—H$8ñÄû8ùä€ÓOO–wº®ã8¾ï“Éd¢êCº®£iš¦‘J¥°m›d2yØÙ¶Í~ô'þþ÷û8í´—˜?_x‡aH]ݱ̘1ø@•¿¡Á%ÌÿÓ‡j q…B¡P( …b?Ù=j+=ì£88"d´+Ê…†Ç‡qºβˆP'QA¹¨XvÍïÖðÒG_â´_žVí¤ç‡ô†#ÿ>@lS>S¥ÕIËóÃI¯Å€S’#®½½+VÐÒÒ¶mÛhhh ¡¡aH‡mÊP½\.GmmÇDí²àHgbôÂ…Ó€­­¯u˜gYb{29}qq]Ÿ[[2HÏ?Ùöx¼£ báåøûOÿÎ.Ï£Öóø·7ßì°Hñ÷§ë:º®óàƒºÝ´¼Ëë°ÜHÓ1ðrúôØ´éôÑ!è8PcFÅÜ‹²¸N_µë±c_¯pŽŒ(´Uz¸¸I•_»4MÃ÷} Ã@Ó4 ÃÈ vŠÎ±Ó<Ï#LÓŒª/[–…®ëäŠÔÑd2‰çy8މvÉd’={>ïC[[ŸùŒYÒÖ×_øfßOøÇÃÃÈßÕ\\|| tô’<‰ …B¡P( ÅHÀ6Ÿù]v¾²g!e%<ù̲s'œlÀó*•êâ¯åX$“Ij¾Pôk¦1ÿçó™›_ö†¨h§Ô+#G©WœÌÿV ÕMFÉ7---œ{X±‚Ù³gÓÐÐ@{{;W_}5—_~9íííÕnk,Ë¢¾¾žiÓ®ÄÑ}A©è–L O¸Î„8YUuÑ¢ûáA' ¯ëû"äLzÚ55ÁæÍÂûÎóèwd2zÏ+€©”H â¯\ÎuÅgË"#å<ßëU“âÂ-q®† ëŠ|…}ß3ÆøS׆!5 pαÇvX§XTÖu½$|Sæ‰ (-¶ #ÂS=„g!®å¢:uñ×PWw'»vMdæÌÑ´°hîðxÅËJ®\ˆ³Úý8‡S¦L¡¬®DÉ>ËÃJË‘\g‰S‹×•ÛÊf³‘[¹®ëF¶d•¢\.G<gß¾}d³Y4-dôhqÝ”Þr†aËå8þøã÷ã, M¤Ø AŠõÔ#†ƒC@€MŠVÅÐj…B¡P( …b8sÐÞ½œuÖŒî½á GËuáõ×áÄáʤx˜óZÞÃ"£ë:{ïÚ+ækùyÅARÒí<4J/›¦Ä¶ª3 „'Üå—_Î%—\Âþð–-[Ʋe˸ꪫ¸í¶Û"An¨áû>­­­ìÝ»M‚Yy2y]/„VB×uÒé4 ¬IJ2Çœ® ¤âj‡ÉdÇíþ~>O†aéç €úz!¬™faûž'ÚHÚ‹‰cÓuÑîæfñ9“çÃuE~­ò}t‡ëökÛbý0,ˆ|Ž#^r¾m‹yµµqR ˆ‰Dé²û{¾†3::A·Ýv0Ï/^ Ù,5ªgëëñè\XX4ÒHŠRµ5‰¸Þfó/ê/ÍÀË¿Oÿ8û«ìܹ³Dˆó)Ìéq艉…eûB”zÆIñLÓ´È{­óó§÷즘§\°+ÏW¾lº(þ½®îNöî]µG:| I‘ÂÁ‰lÑÅ%Yô¯•VLL44œ¨ê¯B¡P( …B1RØ;fOÏ«¥J/¢D¢0͇µ5k‰ü,òBœ¡AÀÁë.,›¡´JŸÊï6L°bÅ Xºti‡jjj°m›sÎ9‡¶¶6êêêªÝæˆ Ð4 Õþ„aÊJ«ñxeï¹tºr“LqÉmY–©¤×]s³ø›N‹¿ŽSÈ–ÉDE™3+›-ÍUWÞ¾bÁ.Ë ‰ž'ı\N´IÓ„à&E4YMRŠzò\ض˜'ÅB]/TÚ”mH¥Ä¶äî N F@Þ>B„'syŠÅ¥éÂü+È–•K‹ ÷ Ÿ¾§0 8òÈëSűæ÷!6½›Rß¹ò¤{P1Œ6㕹pfzñƒ—íÙ¶m[tø +ø:8¤I“%‹…E¦l8.KS%¡PŒPTND…B¡P(<€}ûz¾Bkk!¿ÔäÉbZ®2®¢ÉkÖ½{o»èmLr'á&]4M‹RéD^qŠaÇ(a© .TWWÇ¢E‹Ø°aCµÛ†!º®G¢Äþ„_Ê|V]áyt×APðFsœ‚7˜ë áÛóÄïOŠdA ¦†ÇŠ+/Æãb¹¬ØN* •EAÓ,x–UhK}=46Š}I'žDBÌ÷<±Nk«ØnSSA8K& œ ÏmjË— Ò;O†÷Z–øìy…s!=ö¤ð‹º®ãyçë)=qÕõŽÞf¢Â;¬˜0,„ƒÚvÁ“L.W\ÁµØ“-—³Ùž{÷9NéƒlضeY%âb<.¶+½Î Ce›7¼­âq±ÿdŒx¾>e]„šð5pó1æ²øJ@åF"[ìÍ%Ä9^¸™Œh— ±µí‚8çyÃ?·\±Ì³sçF †yøZ޼þúýò—Iï{ëuÑLAü{Û—¶…tq„ðV,J·…Üq&…‚rïA~zQyì.1.Èï£\†±9äB„ÍÉ”©üûéÓࡇ6²yó %ëuWuµ+¤çšßÏ1ÓS§n`úô¡3€ÑŸ¸¸Øˆ‹ZHˆA3Í‘-væí¦¡'yÆ)#‰±Hˆ“ù;‹ó'* …B¡¹øÀ[o½Õ»,ăÒZv ‹güç?y»Í¼xfµMÑÏŒX´h---.ÔÒÒÒ­×Ü`âû>–eñðÃWDBÜ~<‡÷ˆd²²'=¼Š‘áž©”›’I!*Éœs–%–ijâ›3ºÛW1Ò#‰xœôæÍœ²c3x€ãW­âO§Æº#ä˜Ï}ŽãÎ=—ÆÆF CbR¼(-~ó DòþZÀMÞÇ(\äK&þ)õ”JѽgFÓ,T›M§ ùìdxªÄu í•y­­'ô`OCÙòºº;¯ËÊbB„,Öy7ÕÕÁ«ÿɦ¼fò/éåÂëÙG|÷n…mH¡N†¥úˆé/ZNzäU:z¹¾W¶?ùÙ±c&k×ÖpðÁgbå—1úid¨7ùãz¼yíù0Ú‘G@À>öDq=EC#MC‰qŠƒ E•"\œ8Aþ_WXX*o¢B¡P(Ã=ûyÌžæZJ!¼’Àdà%ÈîË5ÿYCÊNõ»³€bh0À4MV¬XÁÕW_Ͳe˨©©‰X¹r%Ë—/géÒ¥%Ó«‰mÛd2Âpþ~{ÃI¼>¨!àé°'ÿùó¼'_ôÅßµ¤t!.lòàí¦ÑÛ®A`‚]´™ì^þd¥x!m¥!E €w¯[†Á/n¿£^xNÊEË–Á‰'2÷ÒKÅ#nÊ'_Ó5sð Äï½X éÕùB\7Þ»i8úhÆ=ñµÇ¹ÊÆãñnE Û˜1£ ¨C×õ(]ñjŽSȧçºðÊ+oïŸ/¼ ¼^ô%ªjjÝ^XÓi!èºn÷žŸAŸòdîcÔÚÓ9ªÈ1TdJQ-IA 3öh X)®Ùìµøq2ȯ/í]+ÚN€°¥ h BœôΓëïÞ"\(ßõ®•øœFa‡>… .âw$«r;¬Æ)*nCÛYgQ»z5 M#žŸ rêÅòË™ù}5Ñýï%Öqsžëõ×1l‚ŽÞ'ÛËÁÇÇÆŽ<;ŠáDyD™+Qz¿ÉßDHØ¥Gœ¬(,C· …B¡P ?<`S®»x­ó`± Óã`ê‘ ¯×áMˆ¥bhš†¦id2™ž~P +Æ@¡ ÃÕW_Í駟΢E‹Ø°ammm,]º”eË–U»­Aô\e®´>â¡]VŽ4€ÓÎ;¯G놄'¿M!œÀjֹ𲓛ẸØ|03u¸JJçØ’Û–"B±°¡ç§k‘$ð<–êºl6_Îå:ƒeòÛùæîݘ»w|09 ‰øûŠLèÿî;î ™LrÍž=¬mn&‘H†!žç•T°L$‘˜ëŠ$“­­ŸâÜsw‹-­ïžLЍçsb"üµ¢Ï.°æŠ+øC[çT8VùïSbùyÍ@­YðdV'V~Rö\º²“œïÅ‹i”htVö}ŸT*U¥ Ã0 ;õ@Kë:¡ã`|0¦i†!¶ãÇ+ AžçaÛ6¦i¢ëz‡ @†¸¶ ™ {^~Ó4ñçÎ-ˆ†ùÊ”õõõhšU¤´m›xµµµ} N44|–³Ï>ºWëH½TŠqýÉxà·Våy&¥÷)¼mFˆJM„° !.M!”µ8ü¹ØÃ³/h/MâëÞÍœ¿<×k)²IØ’s0~<;wîŒÂ[¥˜èRû¤0.·Q©@‰IÁð퇼ÅÁ³6rÈG¶ /VF†7œ8~$fè)*7–b¸!½B}ü¨p‰ƒƒ††—O"‘ÌÿË‘#I’ bÄpq óÿdÕa™sQ¡P( Åðct0á„®šáÃéžd `lá£|ÞWŒLÆìÿ&ÏóX°`o‰Ø ijlþ%‘ž7•(7éëçÍã]ÿú¯˜÷ÝG’BȨÜ^BT2 á¦ë:º®“NëQ.³Â÷}‰™L¦D„3M³ÛP]QaÖ#6çÝÌDž½Û¶q]ß÷Ñ4 Ó4‰Çã<á<ÏÃó<|ß§©©‰?øß‚è|†” ¶º®—x¸555•Ì+öÆÓ4 ¿­D"ÑA¨iôå˜L³T|.§ØK©7L•Û¯0O R ¾ãž¶ZŠ^ýýsØsëXvß4ŽúC×`ÐËÒàùöxtüýz÷»Ù”Ëï7éW|ÌÅs•n‹rYX×°íçþ’Õ°‰ç·+óáMëçó2ØøøQèÜþzîH1cÅ<…b0ñðÐÑqóÿ2d°òCI’¸y©>[Ô+11É‘#A"ëÒ¤£°U3*ö”ˆt …B¡P(†6>pºñ3/üðÉÎ ÃŽa}EÈgàžöS O†¥"†ƒðrk‚ýzŒÓ4wŒ‹ž÷,7{™û,Ncš&Ùl–D"ëºy—Ñ9Öbñ+‚e³xqaÛ55ÑvŠiðAøèGûåœ 6c+®˜Èa‡Ý \Ríæ”Ðùc(¿] 8á—£îû5kjj:Øh3‡×j*ÂvƺC¥¦Šçk8``DÉí¡PAR¡j¤HÑLs¿Ñ&&NÅyRˆh¦ «Cµâ©b B¡P(ýM A-@O%ÐÜ ®+晦XN t™ŒøìûÁζÅçâtN¶-œÃ†{&"ñÌðÀ¦}ݲˆ¨™C<»û†aÍfùkÛ_•w€0`ÅŠÜxãÕnK·ˆª›IƃF„wKŽ KϰL&å+3M˲"O²J¹ØdA‚þ@† AÐï^v¹.†¤P×ÛóU~¡9Åö÷{‘v#é9µcÇLž}ö¬ÜÏ‘ô¾¿s&L:î8ެö\ÉŽe±†ò_gPöùø·Þâ·GÑAˆóyƒz¶†62oVŠ!!)RJˆS I¤hÜŸÛ«„ q͉ÂÀe.E‰,êF¿—®Ú&sÚ©|s …B1²âW>666B: †±˜Í C¼, šš ŸrœO\Ï7¾ñ6œÉ1Ç|“·ÀÓØ¼Þ:‡SÇ—ÎtB‰,{æ–Z‚¦iQ¡LÅÈg UIêA€aœ…®*3ö7­­Ë;t¥J†UíÍq@©˜à8N”oNþ36¼/q‘dÿòõDB Àç nÄG&ùÞßäù# žÙâØæ”᪕(¯Æjè:cׯgÑ˜Ò *°¬#::A”àÞÃSaªŠ!ƒ¼ú[„ëLˆ31ññKö'C¸ll,¬H€k¤1òŒ³°h¥—€€4i,,<< Œ’âåÈÂå·S( ÅàãºBds]áI–NÃsÏm¡µujäifYÝ qÅá£ÒÃÍ4ÅëÖ[_gΜ§1 ƒ €³ÏÞ®?…e|ìcOðÒK?Ä÷E®ñË.»¨!•J¡ižç¡ë:ax-a¨G"œ®ëQj$#_ì0™L²kׇX¾üN&LØÈ‹/Í9çü x™0ük×¾§Ú§»ÏˆBo6ؼ’±Sï/éSÁ—Šf¶m£ëzÅRŠ‘Ë°Ê†!ž'âÊÝj7& c­T¡YuÕ0 b±º®“ËåH¥R€ð„ËårÑͲ¬A=ŽþtUA2–aðÐŽ™¸=ª¾7’驪Ízòè,«©ö”ζYþX}Úi§1·ÂríÓ†ïˆß@‘$‰……q@رbèãà`bÒH#qâýž±³RYü¡|šppˆÇÀÀÄ$M7ªÆ`a¡¡ac£¡ÑJ+4âââàt(aaáâb`”áVÅötF@Ðá<…„¸¸ÊÓU¡P(º@f:2 !”56Âe—­ ­íd®½öM¶l©å½ïÈG>ÒÎw|‚Q£.`̘8âˆ&Þzë7øþ‘,_¾‰ñãÇó“ŸL`Ñ¢vvìØÁœ9“ùÛßv±eK-—^ú[öìy„Õ«ùýïw0wîoy衯q YÖ­ÛÊ믯çÐC·róÍPSs8a8Ó4I&“8ŽÃÞ½«ð}-ê'“É.$Y\PæXw]pœÓÈd„(¨ë"]ïûض,f8âïÂ`ìÞ ƒût1Öe±DÅÈe ÀÊ•+X´hQµÛÓ%/¿<™w¼cèÅkšF,#›ÍF <Ï#GÇÁ¶m𛛣 ¦žçEâ\2™Œ<ÒLÓ¤¹¹¹Ú‡¥è'Še£I“–T»IÃY ¤§ôôѹ/þWÝ别$j@ë ' ȹ ¼^I–='Mš8qR¤ºôR(&&M@Pu/MŸ LÌžméü?é-gb’!C@P’²ÀÃÃǧ–Úh;>>..iÒÑ26v4¯•V<¼H,÷óÿö±^zrÛY²‘g¶øŠCh …â@Ä÷Eˆ§¦‰¾¬²¾÷½'yøáÝ@’††qLº™#¼žÉ“ïeíÚÃxôÑÿfݺ øÜçvò½ï}•O}êe>ùÉ|ï{æûÄïçŒ3šùÜçêØ¸ñ=lÙ’âü#ïxÇXžzj2‡öÖ¬‰sÚiDZxñBžþ1âñ‹ñ «ë—bY ëïáoÛÄYgÍèÔ»N×õ¨nF$²õé溅|sr{Ó¦®ö×ÔgÄ3ÇÆ8∢¢d’Òj…•ÖëÆ™G1r°jÕ*  Ä-_¾œ––®»îºj·¯„©SßÏP¬ä›Ëåð}ŸD"AkkkTðAz·¸à¦ÓilÛÆ0ŒÈUWŽ0(F.>âÂ{÷^Ê@ q#Ý“¨7áÓ¥×Ë|Е$ØY1‡&àò»ï†óΰó4Ð ”@¦££¡‘$yç(Õ@^K¥¥ r59rÝVŽÇÂ"K6Ê5'‘E Š @ĈE"c’$>>!!Y²Q¡ˆâº®ã8+VÌãˆ#63wî³ÜvÛI<óÌ»X°àÇtÐ_Ð4cŽ9…“O~=tïzן;ö»ìÚgüø¼ðÂ}Qäs#À¡‡^µO¦1š5k­­M|ýëéüså»haGï+Ó”Åfð•¯t¾¦a¸®ˆKëk.s]áµ²øÃHÑ äa¼ðÂ6>7¡NT•Lægd.A *S‘J¥Èf³JŒ;é·ÐÔöövnºé&êêêX²dIɼ+V°mÛ6.\ØgÏ»éÓ`¨znÊb2^z¼%“I<Ï#²Ù,©T*ºhÚ¶­\Q‡eÇ»(Œ ƈ/ÖЛó—¦çùزôÜs.Èo·»GG!ÂUã³3;î¯kñ@"…¸±H Px FŸ¢+‚Èn¨Ø`O¼Duô´bdñy<2ÔS_RÄÁÀˆ<×ä_®*§I!0$Œ~§9rÑôTþ_@@œx$Ê0Ù0*6¡£G÷®®ò× WªmÇ E1œû]!Ó Ù¶ hR“ž_­­­%"I<ùä8þøÇIœpB¾ïóðóyòÉeìÛ—eöìSÙ°ÁÆ4}¶mÇ£Ëù炦­AÓÎ%lÒéMÓ¨¯‡ÖV¸ðBд¡iêÐF)x¥R)\×Åó<²ÙÒëåâųbŽ$“÷õé<†Ç kaÌ4Í(7\yzƒWl!n ìØÆá13Ú8aj>ò%Èÿ§iBˆ+zŽñ}Ã0º,¦¨ٌ괷·sÑEÂŵ¡¡ÏóJ\U-Ë¢­­††,ËŠ¼ÄzƒçyýVL` ÐuÛ¶1MÓ4 ‚€d2‰ëº˜¦å…‹Çãd2™(LU14(;„ê]@KËçEùê 8™÷H§§¿›ž’õæW§çâZŽÁ¯þÛ•÷ǵx0ÐÐ01‰åÿ),£OÑÚ°¬2š$YQ<,÷>ÓÐØÌf²dKÂF“ù™·±üþ"ÏÍ>ö•xé¥IGâšœn`&ƒå‹k¥=*ÒâàDÛ°é]¬¡ÈP°c…¢? ýŠr\×%‘HÐØØˆeY˜¦Ikk+¹\Ž\®•I“µ•t:Í·¿ý;Î>û{̘ñ‡ºšE‹nàŠ+Nå׿þ3ApûíßãÕWÿ“»ïžJSSš“N:‰}û2är9žzêó´¶šÄãqÒé4†!‹&ˆk±t*“!«]a¾ïcšf‡é‹/>›Ÿýì¿ú|>4­P¸¡;r¹\¿äïe¦™ýf íx2â¾xÚiâÙKŸp¾-ªiUKõ<ÆÆÆŠy¯”g9E?yĵ´´PSSU^]¸p!§Ÿ~z4oÆ Q˜«TœÍžüÒËxâ‰/Wû|u‰iš‘ s±h˜Íf+ºïfUTE÷ ”ûÀQÀ3ÀÔƒö1×W «Ó$à#x<^U;ŽùÒ(­F>Tn›Ùq^‹YÒËrUIš&š* ŽÊC5¬>EW …œpƒEw^v&fTü¡xÙbï¹bÊ=ê@„ÌJ/½b.C&z舋rÒ…„Q1ŠáÊP°c…¢?)ý Çq¢è¥d2‰ï›˜¦ÈW–H@S“™üØÇ¦£ëpòÉñáž=s¸öÚílÙrO?æºëþ×uI&3‘'™|ôë*bS qõõ­7Q]‰_ƒùLÙ_}pσýpªë5ƒaÇ[‚ZØ"O"Oܼ,xõòäâûjmm%|ß/ISå3È ¥¢jDB\KK Ë—/Þoذ!ú,‘†[ÎìÙ³¹êª«¢ÏÛ¶m‹Þ¯\¹’†††èó¢E‹¸úê«ûÔØ-[j«}¾ºDY(÷ÜS9à†iÇ“©Í\ Õqö€C–,4ŽX!®Ò(à ·‚§]!Æ0d²óufÇýy-,2dpqI‘"C†8q<Œû¾ùô¼Þwt‹mYG¼jdÞ*¯$¡¼b`™={6óçÏ/,ëoúóZäCX´hQ$Ò´ûó˜·wŸÓäÄ…’!Bt å®eÉëtþåPðº@ pƒ‹ ØôLŒ“v,+6 •ì¸/6Ë–MçÎ;…G~üŽDB# g>º®‹Å|T1¼Œ>E%||2„00h¤‘$Éñ¿Rn:)¸eÉba‘$…±khQÎÉF£ðWiÔ·¬–+ýM5ú½Å÷}lÛ&âñfž~z§6–tº÷9Ü#“Á°ã;¿ÿ^nùÏ·ÁHØ¢s\!W¼ïûUÏy­¨>c@ÄI[–Å’%KJ”Ýë®»Žööv,Ëêr$¹­­•+Wv0(ß÷©««ã’K.ᢋ.bÑ¢ExžÇ-·ÜÒë†N˜0«ä³2]E3Pv,}ƒ>d5ÏäÅléÝ&£Ÿ’toÔ&Bx³"ž¼®'žßƒm(úqúj7$OWvÜ×ârdÂád~x --óûÏ Ù½û%–.=—o\†ïû€Žã€ey\pÁ¡ø~i²ZÇqT›œÁèSTBVåU d¥Õžxâ uY á>ÅëK:Ÿ1Œ—æÆY-;V(ú›ÁîWô–0 9ë¬ùÈG¾NSS¾ÉäX%À)Jh;ÞHcBæžûnÉ;ÛR©”„V!N–è­Tµ¦¦†L&ù瞋iš]5—-[ÖiEU€¥K—‹ÅذaË–-ë‘Ûr1¾ïsðÁ' jeÅÇ@Úñ¦pÚnM¸MIO6 J¹ÓÓ—,P4!D7¹^=bÛM½Ø–¢ßÐ`Èd”êÊŽ÷÷Z\ Ñá…LlûF,ë¸.$PSó*›»‡G=‰ã8¼üÈË\ÔÐÆªU ¨«›ÈKk×ð¶±+8í´ï1gÎË\qʤR)’É$‰D‚d2ÙaT²«QÄΊ”H±Ð0Äß0,ô,K„ÔÊeäråZ eUÞ¦iB:->۶ȃ—LŠóbYb[aÞ]RÓD~¼òæ;ŽX&.|Öu± ¹/]ëÉum[lK×E;Gì7Da®d²p¼ŽCä +Ä¥‡ŠÁV` ûáá©ÜpCˆ…-®Òj`'Žîëð®þoµìX¡èo»_ÑS|_„>øàíìÚõu–-«TEeÒŽ}àuBv¾¸“y3uØäwiˆJ„S@¾jj¥Xèâ0ÔšššÈm³¯ÔÕÕ±hÑ¢>] Ã0tuaUT¾ÚñþÁœ¸õ°Â„ íÕ& Ä¡$_~á’•ÌOXŸ'‹>T"È/¯ªŸPìϵ:z ÉÜp ¯Õðï<ÎI']ËèÑ—ó½}Ÿå÷×ÿž0 ùÒ¡_⣇}”9sþć?¼™{SßáÔé»ùç??Køh†D"¦iœzj ¾¯cÛ6‰D‚TÊãüó¿ÅcmåŒ3¾í b!Îu!•‚XL¼jk¡¾^´S´Q„ÒÊõ C,oÛ¥!MV›—Óu]¼âñÒÁÎb±L.—N‹s£ë…sdW(î*ÅAI–¶WÓÄ2ÅÂ\ ­xÇÇ"…E¹m9_~_ŽS˜>\Ù_;.GæSI’,²d™¸kbÕößßv¬PTƒjرçÁg¬å[ßú›77)‡ Å~±¿v<ÆŸÇØ±ca  Rù]Űf С:)ˆ°Ôb2)swxžÇŽÿ©„8ŰeÝ®ul>ì.Ð.*Ù²ï‹'îâžD±Ê!).öPLîz?pð:…JÅÅRùe ¹ér(=FG<É2!À9ŽËš5/QSó «V¦µõ\Fß°„ik§1mí4šššÿýñ\¿îzêWËÛÿûíðCøÄ¿Lfþk»™rû$~û§¿ó¿ÿ÷3îºë.¾x&7ß°oýÝl™|ûöÀw¾Î®]×ðolâÚkµ|ˆ«È?g€o`⧔͊ŸÓã?΂‡G¡¯†ûöŽEz•éºX~ãÍ™xìDàâq°SBûŽk7¿16~žÒN¢i…yòo<^*ˆyž8™²ÚÝy«iZé:º.>76Š}W¥5ÍÒKˆ\OV±U,,U¤A¡P(†±˜ø[W÷ žzêæj7G¡àÍp µµÏÁnÇŽ]žçáºnÅŠ©Š‹Q ’ÊäÚ•hii¡¥¥¥GÇŠƒ>¸jûV(úŠì!dîIÏrî¹k:_Pº²»÷€pÇ©¯/‹Ó Ó4FòûB„ð D7R8-¿ìæjŸ!ÅpC†ÜpC;ßúÖop˲ذá þeÑS,Yòt]gïúÃIäb-Ԙ𵠌;y'·Ì®ü$ᬚqü÷û§þ,ÌyyÇÏËš5ç‘öDXê_ÏæfЪåóŸ_ÎÓ¯gûvQ<™„Í9¨­ÝÂcmeÎoÿƒSNù8†áòÄY·a',vüË‹ÜýÅ»ÙqÊ!T'D{‚ À󺮣ë:étÛ¶q]7š/… )š¦‰ã8455aÛ6õõa^àÄDB#Ncš.ºÍþ„L¦ÇI†¡ãº.'œp'žRm“ìTn8ëº%¶RÔ’fA ëzä‰fF$˜I¯4¹¬Ù ÃÀu]LÓ$‚ÈCM.#_RŒó}Ÿd2‰aA‰pš¦Eû¶,‹¦¦&4MT\–4Åa>AäCæ…(—N§ill$š››±m›LFä¥\¿~= .¬öéW(Ý ½átÝ'‘°ijªÎȲCA|3]m“BÀ‰†c”w¿ûP /`Ú±Ïcš9×A@*•ŠîIJŒSŒ‘o¤'Kû¶µµqÉ%—D¢\5yâ‰ùÊg£žlÂgëºcØöÚzp_=O¸šôæ"ÜVäûr¯\xë‰ç³â=ä7ø´ :oé°vöK Ÿ†ö±$±w@r»À^ ™û!<â·ß€_W‡±mög?‹5w®ÈÄ%C²ÙBùÑœ^èÅzˆ˜Ž ³3#¦®Îê߬†‡K“—‚ “ßšÌúƒ_gÔ“Çñæ”7kÆÿb<úx= çÖuñþ}ËÞ»ˆr½Mmž*?0¡î½uÄõ8s^žÃî5»ùÆ™ß`솱 ÁØÇR«×F=í´™†jÂçLíL1ï=@ÎL :QÂ7¾›Ï#±m,X°€ÿ|~!»ÚÍ!ÇÂÔ`*«uQiÅ÷} ×à¯/ÿ•þõÆmÇ›nd”1 ?ëÓÜÔÌc×?Æ[¿}‹Qâö—nÇu]<Ï£µµ•?~â|uæWY÷õuloØÎ´Ó8ìèÃXµ`¦iòÔ%OqÛÜÛsÅpaÂÎ  Ãcëò,LÓäË™;^oÁ÷ßÃÆ;á÷7_Ãó÷>ÏòS—óÏþ“Uƪj›d¿àáPa©®ëFÞe2W®‹â(åÞ—RÄ ‚ ò²Ô4 ×uq‡d2‰ã88ŽC&“)ÊÅh”ìÏ4M|ß'‹‘Íf#ϸb<Ï‹„¸bÎx>éao¼_ä²™¢Xíl6‹®ëhšM×u¿þõ¯œþùÕþZ EX–“6 ˆÅ,²Ùì  ÅžoÂÓ­üŠTüÙèä½â “/\×õ.S)<Æ”O¨««cÉ’%ÕnW ;v\L/#sŠ!Ã.BNzûœüÈs@¬c6õž"\V„h‘Ë•f†—$pþ£ð›Á‡þW,«i¢çÒY¨ZkþoGiZÁÛNþ•%%+ý=¯x :ß—bX³ƒ1øE/3€ÚÕ«y*v `l‚ôê‡ ¼¿’þÊEeËŠÚà’KÈxö5×Þ»ïá‡1x@ü&ŠMËá¶ÎÑ᜖·=Ù –U‚˜}Ìl¶î[Ïîk®`×9»˜NÛ2#“Nþo#`Â%Ÿ¿Dˆpõùet1=*Z’dŸ1c÷Œº‡ æ^óKç•PÞé²óÛ“1*9¢$1“üIÜúʉL~ýi&3 ÇO3e.GŒ¤qxoú½…ó<0²†oÀ±ý×pêúSyß¿½/úš«ñ± ƒ,4è …áùN{çi…v…b{|±L§y§qZæ4±? >4úU¾õ­W»óCÔü¸†ãôãà30¶m,m:¨Ú&Ù/xxdû\Æzà‘hÅÂU†Q4ö)½]×D±T*Ekkk´žëºX–y“Åãñ(¬3NÇ…lQ¶L…ăÅá¡¶mÓÜÜÜiµÒ¤ø—¬pïÒu=ò(÷rÙŸð³â6Uzh—Ç«P(†.žWÈi* ÌAèg†ÒÀ{¾)úñô)ÃÓÓ~&k¾|{žª+Ä(+Ý0fÿ71ðyäïÑ46 ¾4ìÇácÿ¦½ÿëj0ÃÅ;w'æ¬|YKè\}Õ²D1Ã($Šò¼¼ØQ”޼škùvÊ3ÏkšðvJ§KÅBÛÛ/§8C|ssé<ßÛ—û6 TÂÈê±Ø<ú2ŒºÖy$×üa/Ÿ¼ìôàNlǼõVÒL¶2S‘¡öôéX¦‰ÐÇ, ä›w]aŸš»ÒÂv~”Ï£¨Sà ã¿à”‰ yòœ[™zØ)y‘ÎÓ# ™|%‚c589ï—žJ¶WÆÜoÍeôe£?y|ï=G¥(n~?&ôe÷s>ZÅb±ÈMz”麎išQȱïûhš…pʇÓx<ŽãÒƒKÀqr¹®ëF"\†Ø¶iš† V]y›Èe=Ï‹¼ÌºCÓ46o®\!H&¯Îô¤ÚH?bçœsΠîS¡PôM]ÊD"å“H\Ä•‡¸•çPºÊþà!Æ“ˆ®W*ÿ×-ZÆ@t?Þ\½>úÑj7¹×ì `Ü:÷®™Ôé2–e‘N§{÷W1rB\míêj7A¡Ø/î_u Ÿ~mûíÚÙÔTé«„a.ŒŸ-ôÎ÷™ÉˆžÍ™6üƃ+“=+¹XN¥Q­XˆK§Å«7È7Yn2 ÷ÿ<*úÄ#x€Æƒÿ_:åM~0îY.þŸ“ùõ;!s\;‰­[1S)aw§w\?‹èxÙGEæÙg‰y$:Bˆ“ѧ€øÞ››…‡§e »÷¼Žß½—“OЉÿó+l?©A)qñÒuQu Xìý+P×$ö‘Fôþêë ó¥èœ÷ÓÜ\èËêÆÉ¤XÎ÷…'_ˆå¥H,ÍüŽpôô¼`fBàóϽ üî¬ ¼¹òMˆO)=I]ýDô²÷ûJg/^¼Xˆ“qè¯HËÅ‹ŸgçÎG˜:u\ÿlpˆáâjXªLÔ,½Ö$–eE‚™,"Pœ­<¿ˆpg™÷OæU+Dz,Kr¶É|‚¹Þ†y麵q) …¢Ãžpº®h^8á•%îûsØ·8gÜH$¤P-6$Ê‚CQ ÑÅñý½ü8b oß^íCé3ïL}™sâgƒ~jÅù•R2tFHGSŒ\†¼çû>/½têþoH¡¨"+Ÿ<Ž—/åíû»¡ è6¯œŽN@÷ËBL8Ü„xÝ/Ût]®Û·0ÜÎÚjšB)ߦ «-§³PZÛ.¬b{¢§W9Œ×qJ½+µ­\d¡‰- Na×V˜öؼåÛŒmj%7øÖ·h­tŠÐÉ;f‰p :dõùy:Î[6[ðÚ¬dÏ™Œxï|÷»Y=eµèÕ™fåÐíëçÿ¶¶–†dÛ¶ØF–Šx¦ ûöD>iãº.Ö/·÷‹LÈYÐ’·AË`ô¾¿pÿaßå„ñ9ˆ·”¶If£±éÕÚ ,ÛÔ©›ñý'9묑YóÍÇ'CvÀ’d8•Ó¤°¦ëzTÄ£8´4›Í’J¥Ø¼ysTÈ@<èºìèLÄ2M3Ú¶®ëQ˜h9Å᫽ApTäOR«ð¾3oÒâuå­bè9B*Š"|_Üãq‘Ër <áBÄ¡ ?ï7ƒ‘“Ž9Ÿå"òr“È1Àâ˪‡Ûâùeíü4ƒ‘uù-þnwk»ßÉr½lòñ‡¤·¾¢òB\†lÛö‘j7£€C¡,N/{ò‘ém“ž»8¶ÿB\qˆh'DB\Où¨? ÄRcIÿq(Ü5£6ôp»åÞu!…ü^ò³ ,Æ/›^üP%óÐéù|bÏûðŸÔæçÝŸ_îüßUù‚†!zZÒ7`3°Ù„!œ›€]œÛIñ(ä1:þ Ôš¢îL`-09ÿþuà 9!ä~îwà:Ix=€=¬æëLkŸÖë¯~¨ðq}›Õ†;k÷¼ëB4Ñ[îÁÃ|g—Î, ßGO¼(uº]»˜wИÂz½¥xÎö)—)ùºêX]Y!îÕ÷™ÿîfþòðq¬íã¸òu<¯Âm…-rß2[u1±˜X'›ßC APzì•‹’–%ŽQŠ:]ƒLèŸî­7ë0ÀCœçâÜý‰,‚AI^6éýUìáfš&¹\ŽÆÆF’Éd‰‡›¬,*ßïf Ã4ÍŽ¹”ú)aØv*úùýÉ{GqiÁÞ¶ËF|aòé.@¸ÓÊ[ž^´­&:~©Ñ®ø:¯P(†©ü×½U9îï>éG\.FÞÐiï‘PÏ•Cõò>©QÈ™×Õ9+¾óˆ[AŠ‘÷øì#BS_øá§Ù­½XQˆ+¾×+’!/ÄmÜ8¡ÚM(ÅF<%Êìª×9rx £Ð!âŽ'“’WêŒ `Cð"oÛ¹›9ÿìÌ3h8Ç€å¡ø®< S¥Ž&“e¸ˆïN&vÈ"î¬)Äw+{1ÅS) CdQUÊèaˆü|¯h{~þ³|PQPÂÚLø•nF,kИ+(7Å~ñ g ñŽ‹tºÐ»^G Ž˜n•/ßÈÍö#ãdÒ³G($±òË=˜„ï$ÄöÇÚðñ8¬¾kðC?пßá ñ >óIBo;ê>ÿñ"„4 áš}ÄD䉋ѷŽé'„¦=[íSÔ=†ü…WÞ˜Î[Ógtœ_ÂÝ“pñ\N|‰DÁÐóÄßÎDÉÆÆBèz¼m‹sFB—aà ŒÞ [tЬ bQV•cétÃ0¢ê¡27›ôh“455Eb[¿Ÿs;äP(R"Ÿ¤dâž$|Å^gE祋ÈP‰¢}] ×TyO!ÜN‚ü{Y±¸¸_-j‡•ߎQažA¡ÈJwùý'¤TuŠ!‰ãÀ±Çîâ¿ÿû}•öyY(¸á‚H6".£vþœ”_þå9êGµ‘|®!àñ¿~‰šsít™žz«Ô‡a!Ä͘ÑYí¦ä«†¸j !ÎG¸’€è Ê{•¼ËTkÔ×C\qc7éŸ,§ç(tòGèÕyB0“Eo¬E×ϨvS:'@!­‰ï£‘ÂCšLŸÎO×(:,N¢áS¸“§)lP³óïÓùeÓ%ËïSÚ«EÁ>@×`f|’ʶ]<Ì"òÌ¢ícÍ«×à+i±N¹î‘)Û~ùýQ/ZFVç4_‡Œqà¡æ‚HUkf_Ï) ÏÊP-¶NëóopÉ„õ"w[q!~ N¡è¨Ô}{DSh‰ž.]UžÓ4þý?å=ý¹QM+ÍרUh® 镸~ÁÛ4,  1 ƒl&SðÌËÏ?,cÜÔ©U=ûC˜ÿgì§œëº<õÐS¼çûïÁÁaίæð±| B0ãÂË­¼ó]ü@iFTºé¨K‘ËE\sн¿d²EñÃ)þúåæå@Ëf rÐE>Ihé*Òšÿ+÷%â`&LQ-Õ× ý"yýí³Ø3MV.ŽKש|ÿw‹¶Ó”?³h») U¿;#EaàÄ£ct¼fÉB3>þˆ|p·mxï{-âñx¿ydË.jùeh0¨tù (l’ãϲ=òíPšv¶x캿éW }Ê3"R¦½’Õ5õÀaÈ q«W¯fܸc I‘"$ä|ÞC†Äî?’ü¿·á_p$&æ~w¨KWªò¨âQØ¡0’ê ®”r¤Ù¢ dèˆt¹X1XCe´Š}–™;)jSq(ßàK{ñ€ì£8‡\=¥ÑÍÐý8ÂpâjjÖòÇG?Τ…Ï3·ÚéŽx¼sNb¬ÿô§ÙºjUµ[×'BDjÇC0hÊ[KoÂS=Ï#‘HDÅ´C5^zßK|îéÏqÿÑ÷sî¹çB“Þ9 öPp=0Ëv$Ÿs\5ŠïÁÅÞ`ÒóV†”Ê -öÌ  &…ëp_ž<ãúTêãEóË·_.ö…®´œžÖ*+Ý?LXß²ž™¹1Œ@Üü¿€— ™¨z°‹‹ƒƒ…E–,I’ØØ¤HáâbbÒšïDÖRõ­ Zi%E*ÚŽ‰]“ut’½Ï(öóññðH“&A"ÚN@€‹ñ ññ ‰¶!×ÿ\ø5¾ï?(jñ2Ú_È}#o†S˜æ/&àaö¸ç±(Û‚­ÙÑq%\3H’N÷R‡S\Ä:ÿPŽ4¶²Çx'óC²dÑѹÒÅqÁn0Àvë Öic0ôqd³Òc|ÏšÁ“«9^?„ÚZ8X{ƒ›â¿cª ·k¯P›¾ƒÍÆfŽ´Ê.ÿX^oZŽäÓ€º›qŽh¿|•Lîðwrqz<)þƒmÏUQnð†›3çSd³û_·T>ö$éÞv éïb Ò9Z^j士‘}©îºÌI*û(zÇ›áöí{.îx³ ‚ ßó*FC^ˆ{é¥Ùœxâ¼¶›h>bd³xoMåÜÛÞOrÞ§HÞjÂKÙÚ 7ã|Þ!}úE¸î%èoÌÄ=ïaŒßIâs ôq31þ~ ák!úv€9ÑÄH…á9b*ã¢l #ÏåO€r~ù(q%ò#ß]>&)tÒe2b‰d*T(+»–ÜÅbcqô£×4­4?WŽÂÈ)Ú†ÌÙ¢QOR%-N$*P®îËŠnº­CB#Œ*Å…aU}“m‹ª¹ÅõÂpÕf î0Ç{oýöõÌ>ûa„!`töðÖW: aR ãZO@ûëûzÏr—í'²ƒ(#”e*)©;@a¬DŒ#t4¸þÔ÷en/ º®éºu°ŠC =Ï#ãy^”Ô>™L=önÆ´¿ ßñ'@TÔL§Ó%Õ-}ßïrå븀IDAT4G˜eY¤ÓéN;v]­†aIÛdÎ2è$'Y7•p‡#6"]ã„ɇ™J8Ž8R‡ôŸœEÄ<ìÎæeN‰;ÜÛx 0™é_ºçòas*Éäv ް~‹Ÿ¹žÿ ~Ƨ‡p²1Žã‚-dß0û‘¦\š1 ÖþÈäøŸ8Åÿót¸%–f›øÂUÏ¢½²™GžÙöCÿ—…í'0fÜ Ü‘Ý…çý]×¹ðÿ½‹5É€™ÿû"µcâ­ÿÉ u—ð€õŸœý³ë¹ý¼Ï‘NgqœçxåÐ÷ñòª9üOð)q®LØ1÷hîp?Î{>ù5ÖýÛ=„{÷’J¤8äûسðNb±k£þó»>±‡ûÿq"ÓÎü éô»¨­]–̲ï±Ep~µ-lÿÑuøÚ×â¿x½óp>ÏëÖ#_f'‘γÕ|”©‘ûŠÚda!)ºÉôÇÒ™ZÖ#ëä£1r’ Xïoçø1›*ÎO&‡B+ÅPdÈ q»wÿ‘G?¾‘øoƒq»À0ðdBé'gäCÒMƒ É‹„±›]p’ß `šCú§&áè»ñŸ7a-x‡z0¬eáé!Æ3ÂsŽ,LÜ5vÁáÛgæ¥39{þÙ4~»‘ïÙÁºp¡FcóΣfmM!Á|ñ"FÂeøeQrâ0òêo^eúüéüxÕI§Óbä0©C îÇýœvÞi<ü3á¡ ,pØüÃØ3c/O~™1cÆ0ý»Ó±< Ó4Ñu{çÝËbã8º®“L&¹bÓü÷+ÿÍÞ+÷r—{W¡Êš‰5‡h”ݶmâñ8zRi˜ì½t/7L»¤™dÓ-›˜zìTöþï^nxþ2d¼€{޼‡K¿”0Ùóôôõ:Žáx™L¦D@ô}ß÷Éd2ضaèiT*ÅÕ§_Í4w–oqÒô“0Ÿ0ù¥ÿK’A<¸tË¥Õ6É^³ùƒ¿àè?^Uíf(ûÅàÙ™¾ïqþqØa Æc«”d§Sj²À¯$è|NDç_å—³gÍ›7SSSØ1c;v,¯îÝËèÑ££q Øûê«sðÁlœ5‹C<’Û>˜wmÝÊŸy„Ë.¼_Ó¨Y»–µk×òoÿú¯<¿izˆ£Ž:ŠçfÌÀooçÔ½{©M$ÈNJÌ4;ú06í™Ã­{×r ðØ¡‡wìÜÉC¿ûºiòØŠ|Ò0¸ãë_çè™3Ù>nìÙÃ/½Í4 Ã0ªÆ™Ë娯¯'—ËE à ‘HL&Ñuq}Íårxž †Rˆ¢u,Kää1 ƒX,F6›*& š››q]—_ýêWœqÆÎ{Ù kR”¹,ç8"õÞÇÎÙÈã÷}ˆ…o=ËEo¾‰™L21FKiž!ŒunŒ¼3@æÝ“Õis¹Òâ–%æYùiaX’\WˆÝÅ›A Öñ}1= ë£ £¨ât(Ê–#‹tÈeн=O¬#§IñÌɳ)Û1!vY.|¯Â£\<.æAḠ ôU Ó)N“àûBl3 ±ÿbñ?™,äA òîBÍÍtY0&–ïðd2%Ë­ýõÂ.$$FŒ4i̼Ê[œs¬x @þ“Ó `Òq“øá'~È«ó^®Ç|ÿÊKaH­Ÿào·Ì"yã |Û¤)©·[™œÉ홯ñîàîM}š&oe›s K›þÉŽeM°›/øKÀ¿’ïyÛyo²€8q¾cÃ_šNå4®€f¸ÅoáâÆ/sbú~Æ hù‘—ÝÏÎæÑÏžw<Í…gN'`Êa[Øéƒþ¶},žç¡ioûÖ³ìÙ³‡À})Yp~wŽšÈÿx„͇½ƒ€Àøð³[8êü£H$š¢ÊǼíÝŒ¾úJÜ_Åùò—oãì³ßƒëºŒ{â|&ýé<|ÿþò—Ÿ’N§ùí[_dÁïV~ΉέçyÜÑZÏ|™Ýß»ˆ;[îÄ8Ùàög¾Æû?ü~¾žù:š¦á8‡ûÿ+Á+ùl^®ëbÛ6o}á-öÑü¨ÚfÖ/|ÿûŸ#Sü{/.F$¯Gñ¸xA‡œ¨Å^pÃqI oÒ/B:BËqήrªgu‰ÀQ¯œTí¦(†C^ˆ«­]Mx|Hú¬7àO­`kBÌš1¶Sȃ"¯¸’o˜p®™O ïƒe¡5­ÆDƒybÄÛbßÞNxtHÆÍð—çþ§Þÿ)Ví]Åòc—sgÝ\þ¶Ë™ÊTvýrº®G±Þ¶msáâÅ,p¿Z®Ð˜áÀƦÿ®i°öðµl¼v#µ5ò¶.¢f| Ýtc>7†«€Žë`š& >¹€#/;’öCÚÙ}ên޾øhp`òL† L '²uÝV˜ ³p³’7®âØEË.b’1IÜð-á aY𦉍)¸þ‚ë¹”K1 #òTs]Ó4}Íh´PxÖ3Ʊö͵wÙqdòc(º«£;:„`h|Ð /|!ñ¢Ž~<>·3›ÍF9iäÃc†„N(¾ç \zÂ¥¬fxåÁ8äÑ™¶}^Ÿ×—ܶmÓÔ41” E÷¬!à¨ËžGÛ±…§Çœ5¨û–NH$‘P$Åý1¦ð$pñêÕLÞ¸‹Úþ“©›7s_&C:›åCá=ïyW]uÇL˜@:æ®W_宵kyüùç¹æÊ+™ö ´‡ÿÊ+œœtÒIür÷nݺ•#åLJ\:w®yÖu|]gþôé¬5Šñ ÒÐÀn .ß^–,¡§M‹2LøÞxÆöyîûÖ·xÐ/½”`ÌœIë¿Èf <á<àî/|'NŒ„ÀiFùóƒ\ŽZàðgžÁ=šp CŒQ55EE)ÉåĸKþÚkRtý5 2y¥xä6žË±uëÖ¨"hs³HøÇ=z4mmmƒjýÉj¦æßK¯¸b¤scØ~±Ë¡w¾ŸÏ|çÚÌa¿ùüÈ,$Úœ_)+x¥ÉJ´²rmtž2! …aA˜ò<±\®£¨$½{Z¥¼€GOH¥„xÖ“ü†QÈX ¹òãîiû¥ç\±0X ]ïº8‰¤\ͳ¾¥eD q2Ÿ™MœxIx¨‰r¶mãyÿûÿËœ¶9X·Züä'wsè¡[˜µx ¦-`×®‰Äã­Üÿý<ñÙùð7êˆ]cÓ¦‰lßþ$öbؘækÙñŒ}ën¾y'&gýæjN[(¿ƒ€kÆ\cà é/ñxœ¿>ùW¼økÜ7jKɹ¬§žÿà?ˆ7õiñ°±ÉÝðIàB@$fzÏÓ7™ÎSfuÏTPãÐgÌѧæˆû€#:Ì‹Åbäz{/V y!nì霊§„pº?F„Îî¥àO[œ3E"óžFIg4 '2M0M¾uÍG¹oìSÄßbæZѦúæb¦MœFНò“–NB_ªƒã4MÐ42¦Éö¿ÿMuü#B1ó9cð(ädÅ 7`>þ8{wì਩S™ñË_Â)iøA’#›/_ ë=2òÆså4¸N öÀÜ· Iô¡vZNŠQGW†išFÖ0 ‘ …Py°.=DëYL]:ÏQn= ®™…ýÞR¶ß8…ášý%?Ü#Gb#±0ω'¿ŽÆîï]Ê®©Çöyý p]—t:%Ï BX B! €ßoi­|äGN¿U0ë-ÒöÝ|hY†8Žð,H¿úÀ:¶­^Ç9‚ø¿­gy=ø‹_D÷€ËtËŠCÒ,ࢆñyÒ$–NšT"ˆ˜eã‡Þi›+ÉïÿOž{ÇÉÛíeÓ˜8±âí­œpôè¨Øñæ¢é2ÛAq—BM™’L2öu=ʸ ÿ=q"‹9( bj‡>l…88€C+œ}+?ä­h'¹û‡œ4áf®Ùº•±£Ϊ…â²·G(¤l(‘÷^]/}:KÒ\þ°Ù•V†:PôT„“ÇdÛ]‹dûK>¼€‰²ml\ܨ0A±ÈóÜs[øÄŽÿàOîË\vËõlÙ2šÑ£¿Ë… ŸcÖ¬El:n‡º:áëñïL#ܻߚÄ7¿y]z·?ö->ôŽïòôÓó¹í¶íüä'_dÜøÚ‚™€–× áè?<Ï´©S¹øâ£‰ÇãÔ×·Ÿù>F½ïu¦l}†˜ôX&|'í›Ï¢S&óà²ïrÎ’:þoí÷hüd-×e¯‹Œ Ãàˆgà§_ú)OÏ~÷…Ë?x9{ßK¦©´¯TK-W^xeôÙ0 ÎÏÇss¨¿luÔu8Ÿ9rR ˜˜ó’Þ7ö>‚k‚ ˆD%#ÿ¯œÖ ÙË:+xÑU@Ënn& C¾óï ®q ž{÷ÞNÜ,ó,Ò´R/Xè ¾‡@ì+_Á|ôQš-¿wÓÜÿÔ ¾/&SjkE•ö^«ª$ĥЦõ4§›bˆ2e {κâ,%Â):cÈ qÏ<òo\ñü³Î_p“ÀdÐÊïY•®\2I;ŸO$ C!j ì.üÔOXxÍ5hÒÍÙqˆg2ÿŸ½ws«®óÿŸ½Ñö„–©PδØ/Ð3¶^ЊžˆŠ_`«'¸ŠÒÝÅÌÚÝ*ú]MÜÊ.^X“ýíz«_ØdÑ…×5ÁYp]os¼ âÒ2Q„AÊ íÐR`N¶èï“ÏÉI&™I¦É$™¾Ÿ}Lgr®ŸsòÎ'ŸÏë¼/AE#ÀkÐuÿ L¾Cžsä_œ3+ŸsEåú±óù[âñ8ºN‡iRT—ËóŠqô»°.œûÄ0üÙi‚^"ÀD£ñ MõT¾4OŠÊË¢òĨ½mû?åù¹œÿ4H=ÕW„µ¥Œz‚¤BfÔÓ~µ¿a<ÂmËd íÎå ypÔùfͪ—iM 0{Ñã¼xÃ~ v¯8ÇqO8×uƒ|SÂÔ žº—½•èç r$NãëðáÃ;´šp]8œþÿ½óûšÊ°×Šã8Eù×R©®ëÞXªÿ?´2‹a;¯^¹ó 3ðð¨D³?CïûÌ7øÉ}ëðœ} +ÕJªxv)z™×q|1N…e*gî¥ù㨙‰9sÈ]ziPiNÒ¼uÍ.½÷Þ¦ÞÇcáÕ€º;AzÖü×ûG:ää7ŸÀßö>ÎÞ;‹­ŸÇÏ­)@}O·î¸–϶i6^ˆ&‹‹Mš49rAeTÇq¸çž=|øÃ¯ãùç³=úq,Kã¶ÛNáòËsÇóƒøcÐÂÐÒ£-üþžÓñ¼ -_L:.*”ëwq_¿üë~µ£Åš ba/0Ïãºç¯ãÝ'¿Ý,œ0Å~ú£¸wt©…© z NûßÍeÒã=¦ð±*ô²å–W*TqùTJ¬ø`¾ I&]®>ï+\üÐiðò—WýàÁ!µ`± ü9I*åÏ)àØÄ8×õ½—U?W®€„šCRÝÛëŸ7n‚‡e*í¸Gã HT[¸ÊÁAËÿSÕ„Uua§¨Â¯ [W…QÔ2·¦’GÔyTâÑ“GtGËÎí‹àáãïapl´¼wÊŒèWíçäLjFŽQÛóô\ÿË8ìÅ¡<‹,Ëâ ]GûÞ÷&>`E{ùüÊùã©ÐÁðD¯ì¤µôKSgrÉÏk U …Û•÷¬¸O­_b†QœcF á¤ÒåÂn<¯šSVcÛ,øæ7k¿GM¦ûò8óŽôUµ­ÊŸ§r—$“ÉÀ¦t]÷“#çó8Þ—ø1Ó4kÆ™T[\× Î—Ífq)‘H¡ÊKI­S!Í*¼9“H$Ðu˲èééahhˆL&ƒëú9£Ñ(ét:ðþK&“Á9LÓ$‘HN§ƒú¿ôø¥ô|ã÷X óçÑ#8«þˆ1§dò|äHáØ¢Í? ð MQp ÑÑÑÐÈ‘#Mš^z±°ñNyz*ÐÃCÛõGœŽ£«ýß ^óæUDöïÞ[pºhG!Ζf/§ã޳ƬËd2hš6fü*ÐâBÜS·ÜBgæRŒ«÷MꩳªÎ™J¥p'ð¨A£^ÔûxÓ‚ñBn*MäÇ#M“gúª´Z…QN|®z/>%Ž¥Óé@ 0ÃaêI«òÂÊår¸®Ë·ÿømîû?÷?!ˆcÑh”l6|J,SÕUUGUR‰YÊ+IµAµG‰Ú*ŸJ8‡à,Ë D¾d2‡áV6› ¶QaE"º:žišØÎõÃËÃÛ‡· ¿‡Éd’Í›7O±%Ê/ø’}žµkßW×c H$ñU½WjÐÒho‚fpÎëð¿žÄò6¾¶5O?Íh›yvV$•bÙ?ÅöNâe·þŒÄð;øøüG æ.“•O¬Ä> ¿ÌªŒ¥…6  â¯|ÓLLžX{óîfÁ‚S¹µÙM„"zº<þk·C磖Uw]\2d|çIç°ÃïNðx¬ÃåÅÏh¤V¸Ï»xG<ô9:Æ“Þj9bÄï-çÉ÷¾…·d æ§ã=½çÜð¶oÇüŸÃØo>Jüò¯ mÓ0îMâü‹‡–ÖПÔ!ö^ÝÕÑê~ñ=݃¥¾€5£h#Yll,þfLˆqnhßúœx"æ3ÏŠï¨È£ð<èÃù˜Ê'ªR†ïŒ1W#ÐËþ7Aï_!òŸ“œÿb Í‚[ë0cç^o¢æï2Žã?` OÇÀÌw-!ºõ­X¿9‹äGlÿ½*ÜÓ“—‚±XY¸€¡¼£Åß'èú«ÏÃóÏ3ô7ñGŽÀ‡šmµáËÎû6ïì„’RfSá+´/--Ä=ðÀìûÓYh«'÷TKM~Óå܈a xî¹Çp¢wc<Ø1á¶N>¼Y ;aQ§”ð“•°8ðøÎÇq:8÷8w‘ª¦èº.±X,(ÊaY¹\.¨b ùL¢˜ú)-ŸÕº°0þ;ì½þ>NøËÊ(“ç0|?Ê ßáJÇ’/ÄÚñ€¸¼ñäŸ1²öØ 5$òI¸b±XàÕlž|òIÎúÜçà}õ5…ñQs?/^~râ8xW‹]§|œ{½‘ f®¦¿¿ŸŽ/tð·ü-ÿÐáNï-Íi·CëDÀ濦Æ8«¨Úʱ]uUÝm.çï«>âêµrÆWs3M+ÌÕòéxÉd Q_–å¯S9ÚUþtUÏ!™ô›(ã«¶W•Ãýw¾óZÖ­{®Ù·¶n(oŸØÿ&ùõ¼?òÐCwó¬ä¹çîâûß çŸßyì'„:’»ái^þÈóÍ?ÿs>ž88¾G èèX?´ˆuÆpŸuyhVŒ×Ÿ®“žÚIàœä‡Xêè¾2â¶Jæ€K€Þñ;Ž·çÖªíÒ’b ß²ùþ·‡ ªùˆ ÿ­ZL4SÃÀ¼OK½=!ÿ,çÒKC òãyÀ®®±EjÜž”Š^1ü/¸C:q÷R´} ë vk00Θ·¿ß¿ácg2~‡šL:zÏk’þýÉþüï0<6§G 300vY¸0‘"™d€8ÞwAv%¿Üº•÷Ô覘¥gÞÁœ3>2fy#S¹íOK qÛwžÁŒOü#¼¡¶ýÂac‚ÐTdÎÂ?¢igŒ»™mÛ—Ú±òîÕï¦>Ì•ÅB^iá E©»´UB9žÙí±ïgŸ`ÅßLnå)JÕC’ãm rÎÆÅù/…)Á#_HpðÐ÷ Þ…G;ó_yÅ÷Ÿâ¤5äÜÎeAb¿ÑùÝ|ŠV³#OßœyÎ$[ôwø¼¥á°ãõ²ê˜ás˜‡»ø!Yê“ó”'j…Š{.ñN+ÜŽ]6lÐà6–»ðw1›Þ^ž¤ž¹tu2K„s„iZ!UR"áÏÝ”H¦i¾Ã†¦Ž£D·àós±DÂÇ”ˆûëT& •£=\@U׋‹Çšfq‘YÃ(´Mµ×ó| %ܽÎ,ä4-xeÃío*È¡—^’$ùØ[Ïaݺoñ‹_¼–Ë/œtº·h[UôWÝeo‚0ÕØÿxË–ÿ’7 —ˆ_ß» ªY²9Ã7Ú$¤4(ÎE¶^?ß‚êLs Ë)ªw¸—m7ÊüÃïŒóUÒùVEó볡c…;~Õ6Õ Mƒ‘±9 «B#ÿó3q.\\²MWþ$)þQ_lA*kl̹Ú'æ>:w E„< t­p,üöhž†æøç;yôäÉ݃&2ŠÃ“ÿõ^žY1·h¹JË«¦ª·p\ÒÒBÜ®ÑfÌ:ªœ£õööljÇãAˆŸ 4› ÇÿbUÂq<¯‹&yQ„F0ú½—q÷sçs±qFMûõôô ë:ñx<ð´œ.‰¥'Miõ7aJððçLq }Ó§‰`óÙ7>Îࣃ\¼âb|ø¿ò`[À«ÀYY<2(ÌËÂRj¸š®N¡íDÓ“pêÔ ž/P©ZG†N"À=Q˜¯ÃIINeuÁ<Í`Rò`}¼8Ü–È‹nI8×ôÛ·¹ÇŸ{}Ùõ÷}Hó= `¦³t‚l‹ÖPáºÕ=„‚ÃIÿ?+Yt¼Ðo5µ“…„ê:àÆüßÏð× 8âÂÊÿ¼¦®U5=À5òÅÙóí •³ò)S•$ŽÏtüFèCþk^|{~ÓÝo{o|ä‘:ZZsððH‘âß÷þ;CÙ!æ®üs¸äÒ™XW\A‚‚ͪªÈ9 š‚K!¼EÁ~Õ<<ÑN¨®KÿÎ38úò~ŽKñðüʰJ¬qð ÚYàY~¢Q,•%Ü'¤(ìhâ¹*…ŽJ ½.ÎÆ;O2´Ÿ¢´j/…N‘к\þbrùm’ª9„={ÕÓò×HD”±OjÊ1D±ê®ãwêI:—©A2ß«c«/Î(W` þéù÷ÉÍo§—´;Œz/U)øðý¶ýß'j?!î><žû—«8û™ÅEËÅ!H˜ˆ–â†^ÂÊÓæOØær~¯ ’º«üQõÂCD=arܵè9^ttFÅõ*G›ˆB«3¼sú ûÑ´sjÚ/œ“Oš‰‹ïœàâ`Üs¼ÌÄûÝRn{ázf=óf®þÅ<~±ìTLòÄJîTMÔVS)¯¼BqRiš…âཽ…(ž.?µWB©ôZuZ'ÿ«Á=øm‹‡€Ž!Ô^•æG…p† D¯°×Zþ§Ü¡5`kºm¸2 V~£ðÈ+ƒ?¯~VVz6ð'fu¦7ÿ£ªýŽÇ¶[nuë&üQÎZ7›/@EŸâj¢‰{{}7% Eü(«@(‹Å|AL9í'“ʲò^p&V†1vÎUíÇË0*9ŒwÓ7-pt0+œ_‰úܹ0oÖ¼yè;w‰'¢-_Ž—?~ gä?HÑ5kHþøÇ/{YðaÊ<ÿ<îiJäËîÚ…ö/ÿ|pr ‚®“}ÍkжÍ<ø ñ_ÿ:ˆ«Nœ|2‰gŸÅ›?§³“£7ß ±XÐ.Kµ)ïàtv¢Œ Ý{op.gÝ:Ü 3ŠLûU¯Â|ðÁ¢÷Áéì$zùåhEªý­oaþä'þ1ón³îÒ¥|çî»á¼ó¦Òôê‚=ºŽß¯ºŠ¹]O’Î¥ ^p%Š[”qýjQe¿¡X*-Þ G&½ÆåPððkåŽÛÂëŒ2ÛÕ¬–¶1Ï=Çó3æ-RãßÉ:ÙØ+M Ó‡–âžöYN™·dÜmÇ *<6#¨´#µ2ç„ùtœPÙ#NòíÂs»:9å•7Œ»ã8A_ÇqÊæ$l4Ž ^„²¨org毉½ýóD¾ 'Ÿ±™;úc³aãNzÔ0Ãq|QNÓü¿§mœL è/ãQú¼0ü¦–t;Í&œG¼Íü$¶Ém¹¡¡Ú¸|y Äjyx[]÷?'+W½%0´sç˜Á<é$™ëIõ%K˜©îY›ñã=wñäçnæÓsþªsZÒ«v1 ­Èã?x,Z&a©B5´´·Ç3øÓÎʱ⽽½¤Óé†z[H¾-á˜8™_>÷L$!‹5Ä‹SêÉ(<÷ü„OöTA†x<Þ4»–TB%üÂî‰ãþö}˜ƒmÿþþþªTÓôE¶\®P`À0üåá"wR¨]˜JúÝ!Öüf7æÆ­ÑhA¸Q1Ïj¢hÛVeg˹ÖšÛ²Š‰¨Œ,YW^é/T¿'²ƪÚãÞ¨¼·Z2YÝ~ª¤oø’Ô«VÁë^7vèMIŸŒê^á{øþ»ï®ýX-ÀžÄe|ø–ÿCüïâeÃ7Ux»t«B+bGŸÛÃIã8]B%ZZˆ{aÖ,Î}ñK+®WÞDö$´,çýŽ—\øk`Ó˜Uýý HŒ# à9<ž\a­¸ã8Au¨fõÉ..6•cÝü?ð=ž]\<|’r"ž‹Tn›§¸r[˜ùÂÔ#‡õ ûßá¡Å}˜¦‰QÃd8¯-Oœ 4’9ô\œ7|doyË·9Ì•ÅxžïÅæyá*ìec§ZŤvAŠä´..###|ôΖ eLᇠgk=° L!'xí©µEË"‘Ȥçy&æ¸ãXaúÐÒBߌÀ ÏŒY¼oß>v8À§ã@A˜6^°€çÿž™=cÖÙ¶®ë"" mÁ]Àšw]alª¸M.—«{¡œZ±°È™p£n’z¼÷/oãéïeÙ÷–Ymã¿òÈZhuŽÎ^ÀKV?È=ÖÐT‡©BñaZÒÒBÜü‘ù°¶üºÛn»®^Ýì& BE;ýtžþâjfÞµxÌ:å=Ô("øYÂ5´q=zrù‘k5^?Ê{¨tâÔk³ÈûH½®ö\Õââ'NL²‡4ŒU8<ùæ/¢i©¸M+äÃÐÑIÖ!ËòTz¨ÕRÈÃÃÁAG>K..=ô'އWÕÓTõyßà >ŸJ°ÔòÿÂ<òâGÐß^t ‡gÙ™â²Ë^ÛTáXŽ… ^vøex£—óÈ_YðñÃÕWÃûßïÇG›fù$…¥¨’ªª2£ê:Tb{_Q¯=Æ–I…âDî¹ü6ªbG2¿mÅÕUwÈoß_òZ•?U’}4 žT Õ"TÉV-ÔU1Ó¢P5³\7©’¾éø.Yኪ€Ú.Ü}¨¿#ù6Æ(¸t…ËÆ–v9*§¤º®hè~ú÷JÅ>Nìô‰ßëVÂqØsâÓ¼mæ)cV¹ø!©Y&¯g ÂTà/¼íç<ùô à]ÁòZ<íK10ˆ'“ÿ7ÞüÆÁ©*%K阰4ÿ½š×©åêué8±–kÏùÔÃîñòî«yd-‘(תñr8FµOµËÄœôÃxÅ×ü‘7îzcUÛ¶¬çâÂ’ðõ±ë–-[†yÅÍn¢ LÈŒ;W2íü¢eS‘Ä~€¢Že¼N4N<èUÇãáyá´2.."c:Vuíaq°ôúšÅ‘óŽ4ù®ÕƳObù÷_—]§òÂIá‘Æ£¡"aé ÅÊÿ«çgÖÁ >W¥ç»ô~ž>ôt³oIÕ ÏË“£w£»Os¹uˆyKs¤%™›ÐƤH±æ}Wñé—?éç>3 ?oY29~)]å€ÇWÑh4hOxùw¾ófßöšpÿ÷ìJž®³ÿÝ_ Š„‘¡†Ðœ<:ÊÞ+yåìEÁ²^ŠkTB;0°pÚîÃÁÃ=Û¶%e‹P5-+Ä=¼ïe<¿¦3@×u< mŬÃ?á„e¯5M“„öBÛ±û”SÞ”ãA$EB;ñõë¯gô“ù+ïkK/NÛ.doÏd2eÛv „• \‘H$Ø®··7«r¹Ñh´ì>¹\.ØÇqœàضmmöÇ)ïÂ÷Ö0Œ`[Ó4ƒuº®“Íü^b±XàM`šf þÅb±à;Ó4MúC@‚}’ÉdPÉ0Œ¢±bxŸd2´'¼ü¢‹.jöÛ[=½ðéí ðzÞ¸èÍn LŠ®}3ðÜs8wÓ €ï §ŠÕ B;q ¿ƒGB¦b±˜¤kª¦esÄ.~–Õ/>(ä!R(‡©Mâéà!2îz}\ï¹p%‘piÞÒê'jyiÕÊÉ”òupŠ&–¶!Œ]”¬ÑÃs]ª"¥j›‰YÔ^u>£(‘½Z_ézÕ1ÕkUòX7ü÷ò÷,ŸÂwþØøý+ãè^?y§zB"BœÐnÌYÀÌK1ÍâŠl®ü+õä÷÷þž§o?ï½òSvNÏóŠž’‡…)E*• ò,¦R)\×%Nã8©T*¬r¹\‘¥ÄªÒ¿Ã9HÃbSx¹eYÁDA×õ1bUx»rkšV$|…Ó/” qŠRoðð}¯kdiá‘>êr댗òÑà ˜û…¹Ín LŠ?.fÎ).¯{ß›pñsà 4»Q!ÂóN×u«î;Ô¶žçáºnПUêÿKç‰D"èƒ3™ ¦i¢ëú¸Û% t]'‹´,Ë ’„÷‰ÅbÁñÀï¯]×Ŷí²Ûe2<Ï#Î꼑H$øîH$XùHå¶Ëf³hšF$!™LÛ/\•¶¡xzÖ'h…÷_Ó4yP-TMë qœËÿÒŸøår¹àÞ FŠj‘ Ídól†óšÝŠêØó‡WÑñÂ(àO:¤BŸÐŽš½“‹_úc@žø mÅ­ `Þ+ruè+‘-ÍûsK$†oŠ 1Žã”Ë'¼¨0µ‰š‰¶SŸ{•/»ÜvíÂ7—½Ž#õIõøP"Œ }/–ÊO mŠd2Yt¼ðvãy‡…³JÛ…Eºðyu]/Ú®TˆSíOˆ+óE;+OÞÿKÏ™øßó" µ0eq£££ôõõqàÀÖ­[ÇúõëÇÝ~Æïý~ø)Pyæ+L5µÚðÁçç3göIEîå‚Ðljµãg~㱤ëwÀ€ú–e„ÉR«?<ÚÉÇÎ{aÜm<Ï#—+xÍE"Òét0 R““°ØV:yQM¨…ZíøW¼éååCR=jó*r]—L&Øs*•²¬ $MMäËy©Ï…F à —Ëáy^Y/.Ã0ŠÆ@¥ž:аR­NihóÀ@!ȱT\ oWGO-Û…û‚ðv¥¢I©@Séš*m^^º]³¨ÕŽW=ökÞöžn@ɺ°'Y4%¯Õ{­ëzуŽJï³ôÇB-ÔjÇï;ƒ7½õù¢T‚P-Sæ—H$¦»»›D"Qôt§³OÞ‹çyE•¹Âˆ¤!L5µÚð™GF¸¢ç…*.SC­vìÐ6s6®ëyB3©ÕŽß8w;þ³±y \× ¼nÂa=àOüÃÞn2ÈêM­vüä/#$¯ªì9b„Ž«èíí-ëí¥iZ‘÷F©NØöK½€ÂÞ=a °÷Q©`]®p‡z­˜ÌËp[…æP«wuu±ïò˱ðçsá*“áy_6›-*äÒ®^SB{P«™}”è[{ŠúYA¨–)ñˆdÏž=\{íµ@Aqϳâ#ÿçQ4íuE_þSÍàà ÝÝÝM;?ø÷êÀtvv6µÃÃÃ,Z´ˆŽŽŽc?X2îì\Àßþí»›Ún±ŸbZá3Ýìë¯ÕŽ|ŽÈ[玩t8•ˆý´f;šyýµÚñßýÝ…Ëåp]—x<>FŒhd’V±ciGk0;¾öÓ?D×}ŸpN1Û¶ùáóÏc^pP,n…«ÅŽ——©’8V®Ý­ÐÿH;ZƒÉØñèÉ'³ß®4?ÛT3d|\ŒØqmvüÌ3ÏpàÀ?rš¦5m|Ü*ïöFÙF5L‰GÜöíÛ‹Þœõë׳}ûöq÷±-79BÆüL¬OׇmÛ¶MÑ™*388H___³›A__ƒƒƒÍnFÓ˜Œ >|¸è©t3û)¦>ÓÍd2v|Ź/°vþQ y‰àÅ~Z³Íb2v|çwO· É©ÌMÓ*v,íh &cÇ]pW:DøóýûùÛ™3I¶irr^„ƒb!­Þžœ­ÒÿH;ZƒÉØñ³Ï>˲/~±ùÙ¦ #v\›?üðÃ<ðÀüÛgï)òè<^9Þí'Œ²j˜¸°bÅŠàõDOî½÷^þä'yËI'±páÂ1ëW›§ Ý÷ß?›7OÅ™*óÇ?þ‘ÑÑѦwÒ»wïæŽ;î`ñâÅM9ÿÓO?ÍîÝ»yúé§›rþZmàî»ïæðáÃŒŽŽ6¥Í öSJ³?ÓÊŽŸþù¦œ¿V;Þ±cßüæ7™1c;vìhJ›Aì§ÕÚ±ÿ~öïßÏ©§žÊ–-[¦üüµÚñÏþsî½÷^N>ùä¦N¼ZÅŽ¥î¿ÿ~:PU> z2™±ñõW¼è¬³X¹r%À3À}ùõóo>ü0}Ï<Óð{&ý`ë´£ÇÇCýý¼ð l®r²Úd|\L³íxÿþýŒŒŒ´Íøøþûïç¾ûîcÇŽœ}öÙ¼èE/jJ»[…fÛO«°{÷nFFF˜7o^UÛOY±†Z¸í¶ÛšÝA8f$W€ÐîlÙ²¥)B‹ Ô“p5CAhWdl,Ld|,´;ŸûÜçšÝa0%¡©ÝÝÝEO› /µ 6,LÄŽ…逨±0;¦bÇÂt@ìXhS*Ä©½pIsAhĆ…逨±0;¦bÇÂt@ìX˜ˆ Í`ÖÕW_}u£O¢åk®¹×uùŸÿù>ýéO‹Ò,´ bÃÂt@ìX˜ˆ Ó±ca: v,LÄŽ…f0ãèÑ£G§êdÃÃÃìÙ³‡îîn1l¡-¦bÇÂt@ìX˜ˆ Ó±ca: v,L%S*Ä ‚ ‚ ‚ ‚ ÂñÊ”äˆAAAA„ã)É×jŒŽŽ’ÉdرcÃÃÃtwwË¿ùÍoò‹_ü€+VíSi]=Ú²~ýúªÎÕˆvôõõaÛvѽï\ºBõT²aµ®•í¸Qm;n?&cÇ|ßZÕŽ›ñ™ªGìx,Ò·bÇc‘þ¸ýññXÄŽëÇñvoZI3iê©Ýwq£££\vÙe€_!Ŷmâñ8‰D"0²D"mÛÁ~ã­;¶mÛÆ 7ÜP´l*Û±mÛ6¶oßκuëèëëcÛ¶mž«Q÷B¨ŽñlZߎѱãöc²vÜÈ÷­Uí¸Ÿi¡:ÄŽËŸ_úãöBì¸üù¥?n/d|\þübÇõãxº7­¦™´ õÔnf7ûb¦šÁÁA:::زe ëÖ­ãüóÏgpp={öpíµ×¾ñõõõašæ¸ëŽÛ¶ƒ2ÉáöMU;†‡‡éïïç;ßùà+´ýýýã¶£³³³!÷B¨žJ6¬Öµ²7Â~ÄŽÛ“ÉØq#ß·Vµãf|¦…ê;.FúãöDì¸éÛ#v\_Ž·{ÓJšI«Poíæ¸óˆ[±b[·n ^8p€íÛ·¹/¯_¿žíÛ·O¸n²ŒŽŽòå/¹¨-SÝu¼áááà8›6m÷\¸BmT²ah};nDÄŽÛ“ÉØq£Þ·V¶ã©þL µ!v\ŒôÇí‰Øq1Ò·'2>.F츾o÷¦U4“V¡ÚÍqç×ÙÙIgg'à?)ˆÇãlÚ´‰Åìªm€q×M–D"ÁÖ­[Ç”FžÊv 3<<Ì?øAº»»Ù±c[¶laãÆÏÕˆ{!ÔF%†©µ¨ÝŽÑ±ãöd2vܨ÷­•íxª?ÓBmˆ#ýq{"v\ŒôÇ퉌‹;®/ÇÛ½iͤUh„vsÜ qà+š7Þx#}}}lݺÓ4‹r˜4šn¸îîî¢$ͼÊeyppÍ›7³qãÆf7K˜€r6<Õˆ ÇŠØñØû!vÜ~ˆ½bÇí‡ØñØû!vÜ~ˆ½bÇÂdi¶fÒ*4ê3}Ü…¦lÞ¼™ÑÑQn½õÖ ƒîîîfpp0ØFÅEO´n2ìØ±ƒn¸Ã00 Ã0÷Å©jGggg‘JÛÝÝÄ=W:W½Û LŽr6 ­oǰ±ãö¥V;nÄûÖêv<•Ÿiarˆþ¸};. ýqû"ããbÇõåx¼7ÍÖLZ…Fi7ÇG\__câ{Õ¥££Û¶Ç\¹u“A%ìS†ã8€ïú9UíX¿~=7ÜpCp¼íÛ·.“•ÎUï6µSɆ¡õí¸ö#vÜžLÆŽñ¾µºOågZ¨±ãb¤?nOÄŽ‹‘þ¸=‘ñq1bÇõåx»7­ ™´ ÒnŽ;!N%¬Tj¦Âq.¿ür.»ì2Ö¯_mÛÜtÓM€ÿD¡Òºz3Þ¹êÝŽÎÎN"‘—]v+V¬`Ïž=|èC÷\Sy/„òŒgÃSi?ã1•ö#vÜžLÆŽ§ú}k;n•Ï´P±ã±ç’þ¸ý;{.éÛ=—Øqý8ÞîM«k&­Â±|žf=zôh³/ •fÏž=twwq¥o];·c2çšÊ{!ÔN«Ûq#Ú v<ýh•÷­ì¸U>ÓBíˆK<h•÷­ìXúãö¥UÞ;±ãöEîÍÄ÷áx¼G“¹"Ä ‚ ‚ ‚ ‚ Âp\kAAAA„©F„8AAAAA˜DˆAAAA„)@„8AAAAA˜DˆAAAA„)@„8AAAAA˜DˆAAAA„)@„8AAAAA˜DˆAAAA„)@„8AAAAA˜DˆAAAA„)@„8AAAAA˜DˆAAAA„)@„8AAAAA˜DˆAAAA„)@„8AAAAA˜DˆAAAA„) e…¸H$Òì&·d2‰™L明ã8‰D¢Ù—ÓTÄŽÛÏóH$E¶/6ì#vÜÞTcÇÊþ§3bÇíA¹¾ÄŽbÇÍa*ÇÆbÇB£˜ŒËø¸2bÇíM³í¸e…8Û¶›Ý„cfÆŒÍnBÍ$ lÛÆ4Í ³ž,žçá8N³/©©ˆ·===†ؾذØq{S' R©T³›ÚPÄŽÛƒr}1ˆ+ÚÝŽÛц§zl,vÜ´›-OÖŽe|\±ãö¦Ùv<ëꫯ¾ºÙ7A݈¯ýë8Žƒ®ë¤R)TÓJ×Ýwß}œvÚiÁ¾¹\Žï~÷»œvÚihš†çyÜwß}:tˆL&áC‡Ðu}ÒÛMÔîûî»ùóçóå/™½{÷röÙgã8™L†óÎ;]×'¼þJç¬uùxmœè:]×%‘HðË_þ]×1M×u9ûì³Ë^£:öÞ½{ñtè@Å}ÚÉÚq¹÷Sì¸|Ç»N¨l[•®ü/ÒÛo¿l6ËÙgŸ÷¼óÎ;îlXÝ«v³ãñÞßV±ãZîE­v<™¾Xµ5—Ëá8÷Ýw-2$¨ •ì¸Ö1E­ïØñäí¸R_üþ÷¿_ìøíXÆåÛØ*cããÕŽË­“qEýmYyµ•Úñyç'ãã;n¿qE+k-áçy‘HÛ¶q]—h4Z´>‰à8®ëÒÓÓS¤àG£Q2™LpŒL&ƒã8D£Ñ@±F£Á1Ç ÜH«Ýn<Ç¡··—h4¼Y‰D"PW'RÊ=Ï£§§Û¶ƒkp§æåµ±šë´mÃ0p]7hw<¯xác«÷L§”L&Coo/š¦U½O»1Y;.gõ¼obÇ…ë϶Ƴc]×q]·èzK¿ŽVïe;ÚñDý4ߎk½µØñdúbu͉D‚t:ÝT»«7ãÙq­cŠÉ¼wbÇ“³ãjúb;O·±±ºîãÍŽAÆSaË•ìx¢ë”ñqñ{)vÜ~㊖Ö*޶Édò¨eYÁët:}T5-N5M3Xǃ×Ùlö¨aÁº¡¡¡£š¦íïï?ªiZÑ>ñxá÷¾Ý˜ŒgÃê>UƒØqñ6•ì±Ò5Ú¶M.—chh]×Éd2D£Q’ÉäqeÃê^´«‹ —»¾FØq-÷¢V;®µ/N¥R†Ñö6[ŽJv<Ù1…º¿Õ v\¼]-v\©/ÄŽÕŽeL1¹ëœŠ±ññhÇ ãŠj—×ã:ËÙq,÷:e|\|/ÄŽ}Úi\1Ñ>Í´ã–M­Å0ÂÆ«nŒišÁO³/瘨”4°Öå“A×õ"wcÃ0ÆÜïj¯!|Ì‘‘€qÏNæ<­ÆdíxºÙ°º¾fÙq¹cO„úrUö‹Å‚öO6 bÇ¥××Nv<Þ>åìØ¶mR©3fÌ’õΘ1cZ$®Öާû˜B]c»Øñx}1ˆWbºÛññ06;.Ÿ¦“«ëk†-OÖŽe|\@ì¸øúÚe\1Ñ>Í´ã–â à —Ë¯Ã%•u]/zãÂ_H¦iʤR@{{{›}95Qz}©TŠ\.Wóòz w*C«”›¥ þ[©ÅªÓJ§Ó¤R)\×pŸve2v<l¸Üõ5ÓŽ'c[*@øÊv'±ãv¶ãZûâþþ~Ž=ü=ztZÛñtS”»Æv²ãñúbµÄާ»·’ OÕØøx´cqEµË•ÉÚ±Œ ˆ7ߎajÆÇ“=O­´Dhj,#—ËÑÓÓƒ¦iEƒ.¥¼‡K'+”QwuuÉ$“Éd³/'@¹¢Ž×¦ÒkW_ƪRHµËë®ëX–EOOOM÷SÓ4"‘HÐY—ÛÇ0 b±‰D‚X,VÕ>íÆdì¸ÕmÚÏŽ'c[ñxÛ¶‹ÚSºßñ`à vÜÎv\k_œÍf›÷†4˜JvÜÎc ˜þv\M_ bÇílÇífÃS56>íX­“qEãmy²v,ããbÇÍ·cÕÞF§ÊŽgU]Z€°z¬ðLwV×bÇDz|²H¿Z?Jí¸Ç0ýí¸´-­ø4ƒédÇídÃåÚݨ±ññ€Œ+šo˵Øq¹öÈøXì¸ÝÆ­<¦n 8E¹7I•¼U†œÉdÊ*ñÂuÝ"×ÓRÆSGÕ›VÍ1*]C5˥㵻4M«ùC6™}ÚÉØq#mŽÝŽqË6«ý[ÅŽ'k[µ¾ÓÕ†Aì¸í`ÇÓÙ&'Cé½hç1L×{L1›lt_ÒnL';–±ññ‹Œ+Æ2Õ¶ÓìfLˆã8Ü|óÍb?yZÅ~Z¡ ³³“Ë/¿¼Ù·cBn¼ñF†‡‡›n?­bÇÒŽ±íx×»ÞÕòyédl,혨2>®Ó*vÜ.ããüàôõõñò—¿¼ÙMi ZÅ~Z}ûö1þ|>ùÉON¸mK q?ü0ïxÇ;šÝ î¸ãÖ­[×Ô6ÜÿýÜÿýMoÇ-·ÜBgg'kÖ¬ij;î¸ãަž¿FFFšþ¾‰ýÓ Ÿi€k¯½¶ÙM¨ŠÑÑQ€¦ß3±ŸÖkÇý÷ßÏððp³oEU ·„ý´ŠK;ƶCõu­ŒŒ¥µ£]ñqVé[ÅŽÛe|üð÷„· ­b?­@?÷Ýw_UÛ¶¤·páBÖ¯_ßìfðêW¿ºéíèèè`Ñ¢EMoÇðð0ÝÝÝtww7µ‹/nêùkmk³ß7±ŸbZá3 ¾‡N;°`ÁN;í´¦ß3±ŸÖlG;ˆ‹-bÍš5M¿g­bÇÒŽbvìØÁ‚ šÚ†j±±´c.¦Uì¸]ÆÇš¦±lÙ²–¸g­@«ØO+ððóoß¾ª¶mI!®UزeK³›Ð3ÀÆ›ÝaˆýÓ Ÿi¡vÄ~Z³Bm´ŠK;„c¡Uúi‡p,Èø¸±ãÚ8ýôÓyÉK^Òìf´ b?j± )Ö ‚ ‚ ‚ ‚ S€q‚ ‚ ‚ ‚ ‚0Hhª ‚ ‚  Çó<Ç ^›¦,÷<]׋¶u]Ã0p]ÇqÐ4-ØGAÚñˆAA¡á8ŽC*•¶mlÛ&‰Ë3™Ì˜m‰®ëÒÛÛ €ëºÁ>‚ ‚ЮˆGœ ‚ ‚ S‚a$“I€ªD5Û¶1 ˲ʮ÷ëvš¦ k•r¹éº¬Ëd2E9ã*]C?®ëbÛ6Ñh”l6䓳m›þ|\ÐÀÀ@°,›Í¢i–eÑ××ÇßüÍßç÷<˲Èår¸®;®· B=qqÑisi<¯â麾x¦ë¾'ø^j±˜ïM¦ (z{}áNÓ|¯·xÜÞÀß^×ÇÏÝÖ.ˆ'‚ ‚ ÓrBœB/q'8ÖíªâÂm²mO•­«@"‘BSc±®ëâºnÐ.MÓ‚×*ÜT‰mjݼyc“g†ä K§ÓÍz‹A˜ÆØØèùà‡¥ª¿§“ §¼Ý,Ëÿ;‘ðó³9Ž/¬%“þ:Ç)ÑÂg2þk×õ÷U„Ÿ“hZA”kwDˆAAaJÈårE^pÉdÛ¶‹–[–m±XŒh4¬s‡¸*mÄãñ ÌU剋ÅbÁ2Çq*†À†v‚ õF qªBj8,µ!Φ³:Ž/)Q¬§ÇÍÒéBÕÐp.6UAyµ©ç9ãy²9ެl¶‰ZÏó/¾RãmÛWãqæW}XâAAAh8¦i244TÓrðCN•—,‰c2 ƒl6‹ëºà¦–9Ž3fûðëÒu‚ õ¤´Bªƒƒ††‡ï ¬~kT~à=@?Ð,g0]÷=ÛÀã²Y?ÌtéR_€«äÁVKi:íïÛožç'¯Sn|žçÇÈ†ÕÆHÄáLÓ_‰pÊ;Þ¯zUU§!NAA„–f¼Â¥a±j™Ù3:A ùb3T~(,  Tu–c'—+Îñ‹ùžm ]÷…³zGõ7¥ËVéžљLAtSIêÂ%lÛßþèÑÂ2Ëb÷öí°cGU§­{ÕÔÑÑQ¶mÛ6fÙ 7ÜÀ¶mÛØ¾}ûÔÜPA8ÄŽ…é@©‹ í†ôÅÂt@ìX˜.ȸB˜L¥{ù “?Ü9Æ[.ÌiïðwH⇦ªÂ£ ’ÿÉP_Tˆh*åÿVÏ8T¨éTà†®uÒä+z—Åó|¶TªàÙ&“áî .($·Ó4ìðÞTjl• àÎ;ïä‰'ž¨ªyuâ¶mÛÆ 7ÜP´,‘H0<.ùÆ7˜ù‡?pÂoËU§œÂ3Ïñ ¾öÖ·bÏšÅiwÜÁÞÓNcÿÛÞ†3k£?ÌßøFv¿èEU݇ºyÄŽŽòå/™­[·-ß¾};ÝÝÝÁëõë׋ë²Ð²L¥÷ÐÓì˦)åìXúb¡1…0;‹ã8¤R)lÛ~4MÃu]<¯VŸaªq…0˜j;V!©ÊãÍÁAGgä„=}Ï¿Ì-g1‹L]T´ßž{ŽÎ›nbÑÇ©”§Þ9?N _˜R„E,ð«¹O?òÃóÉårضÍg?{ÏKHü8¨fªÂN“I?%Z½±CmŒñ[n¡¿»óàA€xþš”WœéyŽÃëU:º.ÀE;ÿ|‘Há$ºî‹oŽC.çµ§žŠÓßOJÓè–/ûîwù‹¯} +æà#pÞÁƒœ÷È#<õö·3ÿ¢‹¸øÑG9òÙÏrð¼óøòå—ÓõÿÈçŸÏ•—_ÎÐÐ;®¸‚Ëoº ×¶yûç?Ïéÿñä–.åÜŋٰa‰?ü×þîwUÝ“ºyÄ% ¶nÝJGGGÑò°bÅŠàuggç„ÇÚ½{7›7ofË–-E áøeppmÛ¶qÿý÷7ô<õ´ãû￟͛7OXJñjv"ÚeÇ»wïnèyÊÙñdløá‡æG?úƒƒƒíX8þèëëãÛßþ6K—.mØ9êÙ?ðÀÜqǼúÕ¯fË–-Í»qB˱yófvïÞͲeËX¿~}Ý߈±±išlܸ±Ù·î˜0 cÜj¥©T Ïó0 Ã0ÐuL&ƒã8˜¦‰¦ixž‡mÛèºN¼³Æ6¡™ããÉØ1L<>Ž?úúú°m»íÆÇÀ¸ã å§Pùá>>º^}2wýy”3b‹YiÝ ¼ð=Ò†×Áæîn¸å¬W¿šL&ƒmÛÄãqþ÷g?#öÞ÷ÕXÕ×:Ðcø¢Üûóì•ñëre~÷öõñ‡ü1ûø¿¼æ5÷b°gÏÏ·û÷ß)O9æ¹®;ÆKÙó<<ÏC×'ŽèrwïÆ›9½³“^|NÏŸ' D?þq¾»bñ—½ÌÏ4¾ÈÖ~i&¬S7'H€eqdÿ~®ù“?á»þ0 !¼1]Çíïç§££ì¼â Þðõ¯óÆçžã¼ú'NüéOùÍúõüì—¿äà7ò»Ûoç’o|ƒÕ³fñíï}¯ð¾-[FîŒ3ذaÉdrL1 x<ή¹†ôW¿„ ƒò|ÇwT=>®‹wà 7ÐÝÝ]·AÌ‹_üb霅"º»»¹öÚkÙ¼ysÃÎQo;^³fˆpBífǧŸ~:oyË[D¼ŠØ¸q#쨲*T­Ô»/>묳X·n]C„¡½¹öÚkÙ¶m§Ÿ~zÝÝ2cc*æ× ˜#ªä8jFáP9é‘ß×ÍÿèùŸJç Ÿß+.‘H¯“É$¶mcš&¹œŸž;ÓÓÓC:ƶm\×%NFY°`Ï<ó Ùl–h4Šã8ãVTδ۸Æ Ç'7ndãÆmcÇ“;8Ĉ1üƒùpö5‡¸ ö#ú6/ÃÄd[æ½ìwìÛw%»v}ÇqøÜçþ…‹/þ†ñK<Û.[_u0ëtØaÀîO…§ösSn%¯7àF~þþ×Ðõ­¬|ÿÕ<þøw‰ÇãdòIâ<|o5‡ÂW@$A×u²Ù,©T Ã0H¥RhšF¸|j7ÕÿÉóÏ3kt”]§FüÓŸ&¾|9Äb¾W›ërï3Ïð;ïä²_þ’œmû]€ÇÇX·Î-íï/®¡iÜý“ŸpÎÈ3gò©+¯ägçœÃ† øõ®]løÞ÷¸°»›åW_ÍÛÏ=—{ßñúo» +“á}ÿüφá{vv¢ë:¿½êª1×¢eC”u]硯~Õ÷ì -ß²e ëÖ­«z|\—ÐÔ;vpà 75Ú0ŒÀÕspp°`$ƒƒcž B+0•v<^•A8*ÙñìÙ³¥/ÚSÓ–±c_d+÷Ѝe gœýÂñO*søxç*ò躎išÁOåõ¡iV>ïJœ®Ä»]»v‹Å‚cIHkc‘q…0h†‡‹488 $Þ®ÎèÌ™¼d߯ åLý(KÝî²—âº0_‡\‡îîeÜsÏkø“?ù¿ÌŸÿKî½w×]wΘ›¹\ŽD"A?Üñ£oñÈ¿ÝÆ;Þð}+\t˜+závº¿øE¶èpôÛÄu],ËÂuýö…óËÙø}®òFîêêBÓ4lÛ&‹•íoU¸i4¬ðúo›Í60²jñƒÁ4q‡^r ôôðÝC‡øð‡?Ì;ÿáˆÅb|ìcƒDcëVH$xä‘GxÅ%—øÇ÷<¾þõ¯så}÷qî›ÞÄWV¬àÿ~ÿûlذ/|á ,[¶Œ+Vð®7¿™>ô;®¸‚»ï¾›/~ñ‹èºÎÕW_ͦM›‚÷=™L200pÌïo?E©ýj¦.q¥O5 ÃŒdxx˜ÁÁAFGGéèèžx B«1•v¼3ÿ[ 6õ¦’“Íf¥/ZSÓ–±cHV±]é6ÕdýÇ1Õp.MÓ*^¯®ë¸®üVËb±º®cÛ67ß|scî•PWÓfر E=luw²Ð[ãÀŒx·fž`÷s¯Ä˜m0ÂífÈ‚Nÿ¯ÿ"öí-|ÿûÿË’%à½ï]€eAoïÉÜ~ûU¤R~·­[âK_ZÈk^ã‰DxÕ«4ÇáÈx<Æ®C{ùAv%©C®÷fb©¹SOeí}÷Ñ<ýoÿÆŸy†›, ›osèÜ·»nº‰«N8x¼(UyÑ…IÝvÖúõ¼í¡‡øØÌ™¼iõj'“\»hWÄb|eáB¾»y3;wîä…^`É‚Ü>>×Åbœ}öÙ|ë[ß"’¸òJþíóŸgøøÎ²e<44„mÛ$ žxâ 6lØÀ­·ÞÊ{Þó^óš×ÍféêêâüóÏçÛßþvОh4†étšV¥®USËÑÙÙÉå—_Îe—]Æúõë±m››nº©Ù×-5QO;v€ù¿{é%Nó˜ôtA˜鋅逨±08Þí8—Ëyu„óÅÅb1z{{1 #ÈQdYV°ÌqÎ>ûìf_‚€Ø±0=˜ ;ÞüÄeÖÓ§øÕH]—yr?Gç9Îí¿ž{R[xíÛžâ= Mƒ¯}ퟋD°túú~ˆ®¿€¯~õŸø—ÙÌ…féíí%Nã8N –½û¡‡¸nÃJ4|O5ð $–-# lûÍo¸ñàAî¿ÿ~´÷¿MÓøÔà uÖYÜr ôâç\#¿¿AqÝsæp×W¿Ê&Mã[7ò³ xê·¿å´Y³Ð®ºŠo,_Îk^óžÇÿó?‰Åb\pÁhšFWWé/|]×9xð _ÿú×ÉårüéæÍ|rÞþ“Ÿ°tÏ–Ï›Gââ‹9ó­oåÚSO¥' úuàW¿úŸøÄ'ÐuÎÎÎ1bÙxUW5M#²víZ€¢Ó鯔 q‚ x2ôwŽžq‚ ‚ ×èº.^n‚ L ¼|e›½ÀÌWñ¾K_Î?}í^NízðµT*…mÛl¹l ×¼ñÇ=¦ª^jYZ¸¢h¹ó×PȦ´ª*’3MÞüÑòÛN`Ãþý¤–,áû=Ä=kÖpîÊ•ÜwöÙœ_âAöz¾ì²Ëj¾oÿú¯ÿZß7¢E™°jêèèh³Û(ÓŽ]¸Ìtt¼ü?…ƒC‚D³›(‚ ‚ ‚ GÝeÌ^ø0ñxœ9~Æ2|_Ïó°mMÓøôG>ÍKNyIÑœ°¹\.¨*=*üV¾©Ó4}o9]g©m³aÿ~°,žþüçýéOÉårüÇ+_Éõ'ŸÜìÛ[\Š+|G ÑÓñ@ˆÛ¾};ñx<(ß;88ÈùçŸÏùçŸÏE]TTÖW„ÉaãÇٯţ£lÅT§æc ‚ ‚ ´¶m)Ú0YÔdVý„—Wš€:Ž3f ÷ C¡V\ …ƒ†† ,þæ^ôäoÐ47½éAÖ¯ÿà÷Qžç‘ÍféïïgþüùÎ Ã@Ó´ s£…CùU%ê0÷üÝß±ë׿.ÞÉq —cÇu×ñø;߉çy$ Ð4èï‡xœoØÀæÑQ,ËâÌ%K˜Xœ$) +3£P…>YØÏ2T/¤åð…·pׇÆ]œÏLð˨oÞ¼™+V±ÑñxÓ4ùÎw¾ÃÆ‹’â ‚09¾H//Ãá8,Íhè§¿¨Óµ±«z"‚ ‚ÐnD"‘@ìJ¥RþDïp'ï²m;Èu®XJoooÑy]×¥··7ø[C¡V\àKùüp.ðÜóóŽwøÕA7mzïxÇëüí\7̪-:`ƤÂ÷KÃX—éKÌûÒ—Š–ÄbÅX÷ÓŸ²îƒ(xÞéº/Èáç–««6”Éß4…“_Ãã @«°ŸëÔß¾¨–ÂÓªqt€d~{…™?¯:O¼ãfôõõ±iÓ&¶lÙøÂÜððpP5dÓ¦Môõõ188Hwwwý[!Ç {q91ß1蘘,e)Cø1õJ˜S‚œ†6ÙS ‚ ‚ ´™L¦h"iY‘H×uƒÇq0M3˜f2™`™¦iœ®ëÁd0|̉D4Û¶1MÛ¶ñ<MÓ°mÃ0ª ÷A˜ˆ½ØÌB'œ£åxs÷™E}ŒaEœÁÄbœ®ëã;¨†Ý·ÝÆá¥KYõ‹_€ëb».&ðè£r¿ý ÿìϘ‘NWô¼K§ÓµP=‰¡Ü}¡+—_Ö‹/‚%CÛ§óÛ„±. \t;–ÿQJ¼‚/Þ%BÇéÊ/SÇVä(áJQ‚`ê9%Ÿ ~êå—_,ìïï§»»»¨tïŠ+$_œ Ô}8œ‚Æ(Z¨ÃõðH‘ÂÉ»2$H``”-äPm~AA„VÀ¶í1 ÃÀ¶m\×Åó<Òé4Ñh4¨Üçºn°lÁ‚<óÌ3d³Y¢ÑhÚê8‰DÏó&ô,ÉårÄb14M þ¶,‹h4J4AN„cÂ:ðø:Ø0|ݾpû—ÇlgYÖ”ö5ÊÙt]¸ôRîZº”µùüóæÏ§û†puT4JZ«“┣ ¬/déø‚™ ;MBÑT7¬ó%»ùýÝü±´2Û†o©•ßV‰~Åa®êx]n _@ÏÑ…Sn9…ÅsWukfƒ_®wppõë×ý­Ø³gOÞ AþˆË«ó=Ð…¡Þ%N'ÿÏĤ‹. ¼7œpêº^µ+ó±¢ȆaO› à ‹¡ë:Žã`Û6–eó“AAê€ãÀD¡Ÿ–%≄¿ïx”TÙ«x.À¼·Z¥*j¹š”Æb±@œÓu=#ݵkŸüä'ß3DUÔu=ñRt•Èårgëº(×ßßëºØ¶M4%›ÍÊxF„šq€Sp¹ “ß{–{ž[T¶ï+}(QOº¿øE¾1{6P\¬Á0 ßts/»Œd&Cöã'Ê'§3 ·Jxøi„.‚÷Y,³Ôk%ÊUêv³¡¿{ñ»‰4ui•Ž¡Ø;®1 ?œôç¯ÍË×óÛ´ðÄ…zõ¡ªnÏl€îînn¸áV¬XøjéM7ÝlÔ××Çƈs‚ ÔÆ`76`õF¾pC’$vþ€[&°Ýq¢Ñh ŒŸì3—ËI?ÕåÉ¢ž@ƒŸì3‘Hà8†a-Ïd2¤R©`¹¦i$ ,Ë"O™H(‚ B c¬`V “ÈAT͹”÷[8‘¸mÛÄãqÇ Æ5jL¡ëzðÀ϶mn¾ùæ²ÇÕ4-8¦ 9-‡ò€Sa¬===¸®„ÌZ–E, ÆA"Ä ‚0žÆeÈ<}ozÓÊ)?çÿüf^Ûq]—s::‚>òÑ›ofÏ–-d³Y2™LyA°œÇY­äð=Ð*C£ ®)Ï´j»\·ÆmÃÛ;<ótª+Ê Úªã‹wJ€Sa·:ÐØþï8\U³flܸ‘ááá ϦM›‚\p—]vÃÃÃlݺuR÷_„€'q10ŠjâXXAU3ÿüpÕ(QÀO,œN§±m›Ç?ð‚§¾W]õjæÎý>/}érvîüÏ …¸žž,ËÂ0Œ ×€t¦R©àI²a$‰@|s]׃§Äj@«ò´ôôô`šf°\AA‹ÅH$D"‘@”SiPðfËår E ÃÀqÎ>ûì²Ç-­ÀšL&ÇTfµ,+ý–ec–ÒPW)T'Âd8„Çrt¿Æ€–›zÇÏãúK/åm·Ü¶Í[¿ö5^zð ¼ûÝdl›É$oÏ;QTzhA_ :–ÈÙ Å^lã¡3~ا¢ß®Ö[>¶ƒïÙ–j™®ªs†¿Žaº;[ý±eË– XC˜-[¶ŒÉ'±QnZ.9gW—ÃÐÁàáAznìÁÉ8¤ÓiÇaݺë%—û!¶âÈ‘$3g^ÂŽÑÑñ4m3‰D"É2™ ¶mùVÇ D5Ã0Èf³ôööPÓ4O8˲Q/•JÇÆ„md³Y4M#•J¾§žú[ ‚ ‚ ¿$“I<ÏÃqœ1Õÿ”ø¥–«1J¹m†ÆœÇ4Í1ËK†Å¶`4™ª„‚ àç_ŸÛ9XÖyÖº®_ÙTÓ¸â?ÿ“oy&—¦RÜwÞy<ùþ÷s‰®cYVQHž`5Y!NuÕûÒMüª¨µ`¥žv±÷o³'Ú@ÂQ¡~›p›¯}/ûX>êoû /ðë·ü€¯<ú(·ÍûŸé×Ñm›HÄâéY«‰Çw0oÞr>õ)çŸ?Ÿ .0I¥RA˜j.— ž&«°Q•ÓMåwSr:ºÒj>ÙlñcÒÒשTª(ÌU…™‚ ‚pü%Uèº^¶J_¹m…<0áXÙ ¼ð<¸ë®(†±¶þ'q?€ë‚mûy>#0MÜxMÓøü)§pi?'œu≀ߟVÊÕI/~fßû+Se[Ê;­'I|qpL'!nÛ¶mÜpà €Ÿ3nëÖ­Axª õa_> ÕÄÌg+&•‚ï}o&###€C"aóŠu¯à''ý„þ _ž?ïÞ—’n^µŠ™gÍä¹…—ðÃk.æ×³fqèP×M²zõ»Y²äÍd2ÿ 8¦ÂJ3?ýK*• r©˜¦I<ßq+Že@ªÄ7r‰D°m›t:M&“Á4M©J&‚ ÐØÄå‚ SÉ2|­ì…î Äõôø99M2p]vtt°j`€ŸüíßrîÇ>ÆÎ;éêêªîAF_ÜÊá‹p”IU^• ÎÅït|³:^Ã4y^2`ûöíôõõ±uëV®½öZ€@”¡>ØÀ²qzŽ­[²`íÚŽy#àrÝ£óÃçsö¼×1GëåÚ;˜=8ˆ|nåJ:×ÏfË[frÁ¬YD€x2I6 眳‘ þ ðŵþþ~fÎüG4-K4ê18ÄbqFGWmÐêTªZ…À*1Î0Œ :Y4EÓ´q+š ‚ ‚ ‚ ´£yÇ ×…%K–Ôÿ¶í p*Ê(†TŠ—þýß³4—ãŠë® æ^CCCÕž±(xÂå(„sz”ä"ùŸ^Ÿkø¡£ÓD(k4³vìØišlܸ€ŽŽ.»ì²f·M¦àÔq üÃ?œÁðð6~óðqê_¿§g]Àèýóá5O¬ý'Žúaâsæœ<¨øÙÕ§~?™ÂÏ«™ÆÙÖ¬9ùóÿÓì “ýÈw‘ò¿Ç÷À{ðÁ+€î¿ÿåôöB<î¯×u›D²ÙBÚÓô—ƒÿº\¿®0{žxÕ©Ü+*Ĥ«« Û¶ös‡ÞÞÞ jY½„AAAA„Fó&&7ýj§Ÿþ@ýOà8EBœœt×]¬<÷\ÀJÒKBú«â  ¢Å(x¸eð Äð'œ9 ‚›C¡‚KBSÃŦ{Hª*‰ÞŒI~%Eºbé`aZ1 dËÌçÇèûÎk9^Æ‚‹.báãÏ3ëìó¸¤{—Í™Ã<;5 ÅZB¦ºðûLËòE·DÂáb1¿¿V™®ûë{{aéR¸ë®¥üþ÷§²f¿M$âo34ä÷õ‘ˆ¿½*ÄšËù^Іáÿ$“¾`þßaGÃ4“oN×u<ÏcéÒ¥ô÷÷“ËåH¥R˜¦‰ã8D"‘ œVUuÕu½æÏ‰'‹çù?’ÚNAŽ U¤A¡&Š*§m¹1²zˆ§Ö©¢SáÜq¥ÇUÇ.·Ü0 4M ª¼¸‚ õ`_þ÷þ0ƒ3Ï\R÷ãÞ}7—÷»|Ò4çÓ4Y AÕéš Îdð4_S¹áÔ!Â9Ùâ¡íàØªªçÌlvšA"‘ —ËMzbWn¹ã8¤R)¢Ñ(¶mcÛ6‰D‚L&C$¡«« ×uÉd2,]º”®®®`½mÛU?¡Tã– šŽŸ¸Ó õXjhøÃî„e7rà†¹àº|ì…øË‘!^õª“¸á _À– /ã¶ßÞÆÒ¥wUö@©Ð1ÃD)|O‹Ba†œç„ú1 ³³“ÁÁAƒëׯ/zÝΨPTUR}¡g2™ i¼rKÏår8¥ŒZ¹x*1Ì0ŒÀ{Í4M‰ét×uƒ°9%øõçgâŽãîïÊ~hh¨È~`` è‰]oooÐõa³mÇqˆÅbôôôN§I¥Rd³Ùà8*ÄOÓ4’Éd &“Ir¹†aH$Ñ0‹OUû”ȨÈd2E!‚Žã`æÝaÃ÷XÝ»Ò0a,ð•ïÞÇ_|†G´OÞL÷ý÷Ó?2ø¢êŽùnÉ’%<ÆclØð} ¼(®¶vÉ?Ш¢c5ën††–°j•ï «ë~?_ŽTÊàÆ½..¼p˜/y)Ï>{3¦Yø&(wÜ“Nš‰eÅóá«cׇ¿[žþŒàÞ¤Ó¾ý•¯$ñ¼±hÚǸòJøÈGNææ›—òüó×rçW£ëþºë®ÛÉ?8Չğ±xñ>üáÛY²„ü ßãŸÐqÝ/pãNpÎW¾òõd2?çÙg?ºuÊ£ÚÄãq<L3ÉÀ€®ë2<<Œm¿žDÒi‡ÑÑYüþ÷‡èë{)R,VA˜ TŠŸf¢ è)Ô¸4œG¶êaµòŠ·,‹h4J4#È©ãÁx¿t9P4^W¨‡Ý’&F„cåè›~Ʋ۟àÞ'îEÓjS¹Æq¶í?åO¥ü‰m³££ƒYùyLÙh¸‚+_ÁîÛv³íÑs•†? U9<ÄíIÌظqcP¨a: Ä&%E£Ñà©—a˜¦I*•*R‡• ö:Ó4ÞÞÞàKÙ4Í@ÓuD"ÁÀÀzH•/¼,<ÐPm‰T*a}%ö÷÷‹Åèêê*rGUmóÃðâxv÷W÷B=Yt‡l6äìR"®ëôööŸ\.ˆ™žç‘ÉdH§ÓxžGOOOQÎ MÓ°,+8W*• îýñˆ_A'ìËöíë¯ç·9“~}ü¿E¬üÑcl¢4RydíË~=<î¹âølú©Ž3Ïü‡Íçå/÷dÇïÂpã±zõ(»wŸÁŒ¯–•{ëmÛ°“ÉT'¾ðÂJîº+…a, ì±ÜGiîÜ 9ázMÑò~Я«Du]×±¬¥ìÛ7Èòå_übñK$Äb±à½ùú×oç‹_œˆïÊÓV}ÔçdïÞw¯gÏž{‚J±†að›ß¬å‡?<£Êw§}ñðZÃj— ‚ Ñ ¹³K¿ÂãæL&3nuõ€\¥…‰Åbô÷÷ãº.¶mFƒ‡Çáã†ÇS¥ãôJéi$JD„cÅžþyþýKÿ^ÕÆqQžpñ¸Ÿ;À²Õ4ô|\‘°›ÿIý°÷ï÷òüuÏãèoçíþ6ai!IÁ{C˜rf‡_ŒŽŽÒ××Çàà  »»›îîî¶OR©±X,Hú®¾àÃÂ[2™ <¸”·[,#‹ÑÛÛL°ãñ8¶m“L&‹îƒz"7Ê w¥TÊNL«ë:###cª\ñDçPbYøi¢ zzz¶‘¼‡V)]]] ¨žJº®ËÈÈžçnþ™L†\.ˆ“*§‡:o¸q]·èé§Âqœ²yùÚ‰yxE¢Ä·¾õ-æ?ú88ó×ÐÝÃÕËÞ_FÐ-ü½vd-ßÚõ-vî<Üó¨›ÕU]Íž=/&çëRðpVèºÎܹÃ<÷Ü?më8~x«eùß-©5y‡ùùñÖç d¥ÄãÅÞtã±fÍ=<ùäRLsí˜u¥a.ï{ßëxßû–t:®ëAßcY«W_mÃË_þQúû‡‚ýÓiؼyó¤îu;‘"…‡Gš4Q¢XXèòm/‚0e´‚7¦M¡±¬ 9-‡ªïážžþò/ÿ’7¿ùÍÁÃj×uƒñ¡VR%°Üù€à»;¼Ì¶íòá]‚ 52#óv.ºè\vYm0 ?×®ª£zô÷sýêë¹â?¯ÀÔLpòs”þ$,\Í4 °ïeûxÐ|Ÿ§~^þ„â ×TŠrÄmÞ¼™E‹ùFGG¹æškèëë#™LŽ››ªUPyÜ\×%‘H‰^*4Ôó(ò4Óu®®®¼gN Þ)±r“ýÉ*ܺ® ý,G¹'sÊm¼mÀÿ`–¶¯šB@ $Lä±§Âb•ç]øžªv)Ám``€h4Šã8D£Ñ¢ŠWÊã aµ†a¢œò Ô4^xaR÷¼X¬Ì‹ê=<9©ã¶#)RÐlcû B‡9âÄIJ" A„ãõ\‘L&±m»h¹eYcÄ1˲!•JÛ©‰¼êJϧ ‡CRÅ#N„zðôÃ'pèÐüš™Ô8ú™í¿ãú[÷ò¡ÜXHîÂ;õw2bŽ0wî\þÏB–Áß&žÿ)A 9¶.³ÁÜ6oÞÌå—_ΦM›Š6زe ›7oæšk®!•JMæSJ$!‹OزÙ,çž»ƒë¯ÿ-»v]Šçéôö¼<¡à¾^.”To9QªÑ_Øå„>%Nı<Õ çzâ[é^„C€À;N½?ª¨„âÔ L‰r®ë^v*ž5M’k©á ï™ÆJþÙ\9¡ŸÐòåËéXù8ó–‰q+ÿcã? O=žŒ§ó¯M`oo„wd³„Ÿ™+ßC•ÃSy×õæ‘Í/ÓòÛ@¹n~ÕªëKƒ×z¼pìÉšèxƒífPé üìÙH§—°zõêIµý°ó¥—¼¼÷ç#7œ‰I¦é™Š¡988Uù( ÂôÆ4M†††ªZ^šž%<¶Ucå-Wú0l¢ó©èÇq&®0(‚P3,<¦ýÜ>È«^Ë‘ÏaŽ5ÇÏ7ä€fh¤ãiž}ôY¶[ü‰ÞÝWoo¯ôq-ÊL€¾¾>º»»ÇˆpA¤áááZ?¥œwÞAŽ}s&öš×|ž\}ôå¬\ù ##kƒÊ¥QŽ•Ä'•cm*™Hh˜L{Ž)>}’”Þ;õÔQå´S^D*¤ •J¹ùÂÇ{÷½èE/šòëh¿»Œ%KÒàfÆž)GÆåË—³fË 5…ršø"Y¿ŸîÇXFð5ðEµøÚµ,Uç¤àݦD<‡B*ùeJVÑóËSCáå—»øÂ^"¿Ì « Š þ¾—ÙŽϳgÏ_×õ­8q¯¿¿|6¾óÏïàÚkw°sçuumk+áâ’ ‡‡‹‹‡Gœ8±|Š1LL,,\\ã„ã›zðð‚ψ‡ˆÖ‚ ÔŽÊóz,Tz&‚0Ylàè]¯dÆWÔ¾óõ×ÃwÁ):Ÿúî§8xãAÀÏWqüñsoª—-7ä‹ö©‰^4M Ro ­ÇLðÃR»»+Ç0wvv²~ýzöìÙÓìö–å­oíã¶Ûvsûíó9á?a6›åÑG/&•‚åË}#^µêz2èï/ˆqª†eYe¿Ðu]ŸÒœö²k¦7Q6› ¼äÂU•7&áä4Ŷm6mÚû.gù—bVH ¾×˜Ò2MLö?„ÓÝ7Ž­¨3刹÷‘G8kölLŠóË)ñM…¸jø¢›_æ„¶õòËuõw^A´)¤-pñ…@_Èë-½7Þ×ä(}™üºU«®çÀÙ,Y²¤nïG8t}2Œ7X½z´)"øT‘!ƒ“ÿ'Ž‹KštQªáÀÏ}‘¢õ=«¡D‰bc“!ƒ‰rQ¢tÑE”( Ç~"AAZ†™/<º-W€IDATÏêÕ£µíä¿YwÝÊÃ+ýìaçw€æ{OFL‹ÇãÇÅܺ]™Y톣£5Óò¿ÿ»‚[nyŠ•+ä o¸ŸHÄÚúûýÄè«WrÛmo££c'##~XêÐ/Ä)]j¼\Sã~µyת¥Ñ“öflj«ë ‰PÕZÃÞs®ë6½­õæPÞóÁó<†‡Ÿáä“û˜?oE*ðm5•òóªœí¼ŸÝ™·a€õ`Μ9ìÝ»—3::0ð=Ñ4 Eu|ÑM…¹j<æÔ;dP(Ô£:ðßo',—‚˜§;(iÍä—§òÇ4óíQžt;wî„üëH~3ùåj/ôZm¯öæ·‰¾á Ü{ð ›^ÿz"øÕ¼»òÛöäÿ^šÿ]ÍÏ·½ím€/:·w™‘ñ®3‡ƒƒŽŽ†ü.G–,:ºx Óõyðð‚χƒCŒ9rWÜ# 1lÛCOÍXAAh-žœÌwyþ„ã•—‚u1_t~+¤¢óte&Àúõë¬\1qpppB¯¹f2:z2?|€3ÎØI$Ò…®ªMlذŸÅ‹Ÿ`xøÂ¢ýLêá©ÙNŽVñÎ Ñ0 ƒH$2Fˆk÷*©¥<Šƒ†–÷JÔX±ba¾ èD÷ªà¹yWp1×uÂöÇU«‚¿ 9à $¨Qð|Ó)®ÈªÄ7;´¿M¡¯ï§Ä7ƒBî9__Ss™ü:%ê¥óÇýùI7ó»ß-ãžµ~uSå]—À÷®Sm‚gŸ:–j›ÊkªÎsଳY»–ËæÎ¥¿…ž?ž–=B¡¸òî ãâ‹z‘|{våïë=W],[ ÓÆÿÅÅEG*<)Ï7³Šú|Q¢¤H‰ð L+<<"DÎÀ>'Ê#.Nœ~ _,,R¤ppäó ¨­¥¿A¦ƒ¸Ì>tÂÄÚäC\È:þäë `åJÖ~üã£JWW×ÄÇÚ’™à‡f© ©¥žoÛ·oçšk®aÓ¦M-S5U 4žç§™;÷yä û÷ÿ†aPêܶzõ(眳ƒʺu'Wu}ñ¸ÿ“ÉÀdNd%+É‘«[HÓ²áa0Í |5\ÏW‰e*|aJjÐ(r¾Peâ UÙ‘Ë Ä¹8¾ ¦ŠE¨â>JËáW^Õ£ø^t~^;=ÿ{ÎI¾ïìçž#I¡Bw~ßð²ÒkP×á„ÚÚjÿÁåË‹ fèøB]¹wHyß…Qe•7ïàAžš3‡#'žH躧 66&&:z 6¤«¸BõP!{‚0]H‘"I2Õ Œ ,UC#F =ÔÓ(!Ϊ«§³ ´*™LÇqN½A˜ì–üɧÆ× !=™ $EÇqÐuX,V¶ØŒ0=˜ …‚ ×\s çŸ>ëׯ`Ïž= ³iÓ&¶lÙÒ춨ê‰D‚¹s/äÌ3òä“KY¸p°¬Ð¤iýýG9ᄧǬsÿDz`2U£Žã´EYÇqêò$3|­á÷°…ÃɾC‡ÍçÄ_ÌÛÞ¶˜W¼¢z—LMƒ\’Éy 0@/½äÈ^IÇÂ’%KŠ5E2ôZ bJ|S–Z¯°(vP¢XøJÍ’¿s¡ãŒÇ‹>ò(3¿ú³÷?7©ë4(„¿–#¼\Ë·«4C¤ ±-W¨¨ÔZïš1#8nXÌœ(ïõ» Ò¤‰þ‚Ðþ¨\‰ÊÛSU V9“eJ› 0€‡GŠ9rÁö‚p¼`Û6¹\Ó4±,‹\.õÊd2X–E*•ÂóŸVÕª>L˜žÌVtvvríµ×288ä=3M“õë×ÓÙÙÙìv¡¾€]×%»’³Ïáÿñ_‰ÅʪaÜu—ÁªU׎A!T.•ò½BkÍeØèF;ˆPj #LÌùßO>é×ݰáD4íĪ÷7 ?”Ú¯¢aa‘!ƒÍ“ãæÏŸÏ+7l(ÎÐßf™¿+}ŨPÕÒcTÚ6A±PW‰eÃÃ<6o9sšÔµ–†Õ*víÚÅÊ•+‹–YBh…\xÕ´õ¶ŸÿœùHÑrX=©Ö·*ô®V»31qòaÚ..]t1„<íÚ -I51b¨È®ê3”!CŠT]®‚BUèm&êsƶí¢bIétšh4¤µH$—J¥‚*©™LÁ“:™LÒÕÕE6›Åu]2™ Éd2˜ÇTZ/‚Р—]?þS¸tœ Urf)™ó«~*—Ë8¦³Ktww·l.8…ã8d2 Ã@ÓÀ4—ò¯ÿš"+Ÿô^Mš¿ûdºðCØTÇ!Ã/è0NáʦÑ x8Ï›0>{ó–8<üÒé köÀÔuŠÂ®-,tôcJ|ßÙÙI™ &E5‚• 笆ýÿ½œ½§/âæÀ‹ÇÝVå‡ cà‡Þ–ŠkçŸÏ¡’<™Œ‘‡ ãD_‰§žz*»ŸzŠÅO<'ú‚« Ó¢f¨ K fþ€Ž.áx´A n::9rE˪!K6x¸"BœP/<¼¦÷³‰ËªxW,öm’ÉdàÙ¦ Z¥R)LÓ$‹‘Éd‚ñ§®ë†1&:c¢õ‚ õÆæàrø'—TÞÈÃO(=àùÕ%lj€“Ó›ÙÕn¨Ü#[ÇqˆÅbA±…‘‘‘¢õvþÇÄOâ¾`Ë0/¼zNà}£Ëg€˜é‡f· rCmdñƒvðÞk6Oáabúuz²‘$I‚Ä1%ùV¹ÐêÙÕø‚SµWY­µuáà[—ó–vVu]^™c—~ÝÀìÙ³ÙwøpÍשªÀ–cÞ¼yœxäHí7°Å©—PËûA‹°'­€òò‚¼‰“ùŒ˜˜D‰Öî-•ÐшE·J$s]7Oª¿]×E×up‡h4Ú©[A8þp€S;Núk|ן2Øøa7Ѩïç‡:á8ñlœ»–Þ…ë¸d³ÙêN*´-3›Ý€ZQ"•çyÌ»…¼÷9þÄ?Šoß½øä~ÈÛk7ÑýÅ/’Äÿ¨Dñ¹ü~ž×:b\£žÚM·*¤íʃ^Øm=PaP“E IõtzÖà|ô*sÒc±xÛ ‹]XõuU+/íé©z[uÜÉ<š˜ÏäU•ÔcÅÂÂÀ J´é‚p,¨‚%Pð‚ÓjêQöÕÑé¡§éá„‚0UضM"‘ —Ë‹ÅH$X–äƒÎårD£Qr¹œ„h ‚в¸À,<ֳƮt€.|ï Èfýp§PtÙ•W^É»?ønñ„;N¨Ú#®Uð<§Ÿþ7n¸a ÈåÀ5 6mà‹p*üL=SNåy+õ êèÇä9ê+ úôSßpW€SOÝd8ë¬uooGGGUÛU*ÖPúœuà—/³Üöž÷Ô½ýSI==u úé!Nhy”Ý;8dÈÐK/@àÍ©D¹r‰ékAËãbˆ4i44ºèã5š A/½ôÐ#Ÿ¡í™H$S¹ÞAZXHŽW¸—02²¶xeŽ‚ÇPr¹‰V ͦŒÙÃÃÃìÙ³§Ùm™×u±m›W¾ò3xøâr_„›è«Ù²¬Š!Ÿ~Þ8ˆöø ñ«ý®w]·mí$l©'ŸÓýèM|æ3WÖõ˜õòLj^ûÚnn½µŸOœ8vnUqÜ‘µkY[Åv¥yçJ-4Üœxä#K–ð’’ü•/zøaX[Í[“Fäs;–‚#‚0Dˆ#Vä — AŽYŠŸjTÊ S#::..V¾Gë¥7ß\\räˆ#E +x@£Ä»‰r„%H¸ÃxxD‰#œ[A„bl`6gzoç^¯ŒQlÚc~.ÔŸÙ[¶laË–-ÍnË„8ŽÃé§¿gŸõ=ဪëŠéº>n.·¾¨÷C’ùñt&S,X+ÍM¥žh¤'\£>Hr›Ëåê*šM×ÎH>6B_ôðÈÁšֹ‚Ö®Á0RFÿ±¬„Z%uÆ-/°hÑ"aÍš5EË“ÀæÇoô­j;Üü?™À ­„‡GŠQ¢Aζ~êß•CUbM@G*Lj¡¡'N† 1bD‰Å^y*§AŽ661bÁ~ª w/½Äˆ‘ Aš4=ôø¿óÂMŽ#Œ¡ºV Z*<;ÿ/GŽ!†šýv -€çyc¢IÔCmMÓðàvml²dI“&I’!†01‹ÄÄ$ÉÀûOab7I2O`€±@ÐsqI‘"B„dþŸ…Uô Hyq«Ï¹M›Ï„šÐu=˜„–{˜ë8‰D‚l6;m£$¡Q8NaB*Ÿù#•‚HÄÑJë8Ž¿>—ƒ3 §ÇßV f©”/¢éºY©iþºÞ^»®.ÿ·Â¶ýcïÌ¢Ú iþëp´Y"áo»t)Üwßy;}“£¾æ©§æ°î·ë "\¸Rj*É$ˆFƒ‡2‡>~™]¯ŽŽrã7ÐÙÙÉÆ‹ÖõõõqàÀÖ­[Çúõë'uÇq(“÷¼®èä'廙2†ÿYÒ€`C ËÁ~o%¦iò¡ÿy=§»p· 14 >›€wë3àµ|Í…[-¿:e ÿxfþ|Nèw H§Óc +8rRM4°)„¥OÕtáXJÊK×qüŽýÈ‘™3ç)2¿“NÖ»üfeǺʛÓÀ'ÓÝ®‘¤©MØ7ò?q|1®ÜW¨ž_ÞŒ){%;®G_ìâ6ÌHG'Nœ ¢Do£‰„aú1cŠñP‚sŠT‘\;PZ˜AC#K6ÈÃþüVº®ÒÛrÅTÞ¼ÒþÀ î›Z§¡ÑK/Y²An=&k`´å}®†fÛq«bYV0ÎÍårEÕ\×%‰N§E„k!9®*ãºL×}ÑÊuýèÏóuÏóÔ-'Ê'IÒOѺ8ñ ú«º·iÒAmœ8Q¢äÆÄ´/­`Çà÷"‘ò?a¯µ¬Ð¾Êû©1d&Sì5Sé\‰ ažçá8Θ|q¤R©¢ðU¡yLŸb:ãy•çDJüR¦žËù^lK—ú?‘ˆ¿>œ§<÷çJºî‹lñ8d³þ|)ö…45w*ÕwTª¥lÖ_§¼Ö’Ißþþ±"\=Ðuÿ:¢Ñæ½´c8”Ï%{Î9;8ò™#þd¬[·ŽóÏ??X·gÏ®½öZ  8ךÜßó<µªêuäŸ<œ“ö½Û ^ȳ9xeÚ÷L‹Þˆÿ÷‹Sð(°ÔƒN`ȃ3=¸ÓL8˃T/¼9ë R)^œƒ°xßÎÁ‚4tÅñèï@& [5XÑï·Å¡¢šÆ¯ÄÚÙy¥}Ͻð¸›C*„úÓ>”‚‹òcÆJÀe<âÁo=¿ß;ðÊM/à¸Ï†Cž/Pö%áuQxý€Ì&àÝXZqUZMóÔ…,5᩾Óâ'.® =vzÃÞÏFÙ±’yó]ݰöû÷O­&<>1!¨õ©¼f›M%;®W_<hhŒà»?ëè¤H%:&kÃ)ùÏ|9³–ÀŸ À‹Â~VÀ…ùdÆ„~Ó÷nÛ d“5à2ðí4¸|*íç¡ |;‡“ðl~åÁÅýp«á í_KÁ¦á½.ÜׯÃï=øL?ŒêðÎüà¯oðrÖ›þ1SÀ/sБƒOëЃç44¡ß…Lä…¸yüei°Ì€S øt þî…õœxÉ%<ƒ°Ú€Ï§@ï‚s¬‚7Ðø€^¨ÃW$“`ÇüöèÀAv?±’§ºþ‰¥øÞˆ1üÎûî»\Ò÷³Qvì«yÚAÖ®Ó˜ÌeLÙK(&ü³ÞJv\¯¾xª10ÈâH#ùiÒc¼”<<$D¨›LŘ¢..fþßt¡Ù×¢„8•§®t]œ¸ŸÂ!_CUiUá¯6vP˜¢h¦·–eyàÊy¾Y–…mÛär94M#—Ëᬖe‹Mµ¿ùñËtW4 5¿I$ ºî d–EnG sÉäXÁ»–EÇqÏQ]×+…ëº×©®ëxžW©Â]#¿ñ8¤R©"ÏÝJ¤R) ÃŽïy^àÝ•ÉdŠr?ªÂ~š¦‘Jù‚œ¦i†|¦UðÊW¾²aïO£íx-ð“ÜËytûB®>ô2ÿi}øV†ÞìdþïD"ü-Ì?FzppÁÁÁ`ÅŠ+Š^Ggg'ù¤äÃÃÃÄãq6mÚøF¾bÅŠ¢m'bÿþýlÛ¶7mÿq´ÐÒ7[p[Ö¸ŒIe5€ X®ÁõÂdÝʯS¹Ü À´ ›€uÁ9CyO;À‹Ë{ÁÅ|Qý‡š/^ º°;ëŸëi çºpSï,FM“§2 ô&á/ãð í)ìKYèɆî¶¶VU|Ñ^ÅûÞó_< |0afœâŽ'ClÀîñŸ––zázø! n?pG_xœ ¾¨yáð0}}}ìÞ½»aïg½íx÷îÝlÛ¶î¼€½k×®†µ}*+÷ ŘPušñá¼ïß¿¿aí©dÇ“±áÇÇqضm[Å1µRRµ½&Ҥɑ#B$¨9Àà'{oT! ¡˜íÛ·sË-·4ìøõî‹yän¹å†‡‡‹rt•£—^,,räo-¡>˜˜gk%1M™00è¡ ãÔg\…ØÆˆÕô½—#‡†V$HnÛ¶-˜¤Ö›FÛ=WØ dhh¨hyø7Pä ÞV(0ÜÄññdì ããz+êã88޼*4Ñu]b±XªªÆãþOo¯?§I&ý°KÏ+„–¢ò½iùè jõ%¨†ëºA•H$ˆÇ㘦I.—#‰ iZQ¨£¼‰D‹L‰bŽã`Û6ét’ßþö6¢ÑÿˆjꞸ®K6›Å0Œ¢{’ËåH¥RÄb±à|™LÏóèïï/Û§†ÛžËåð<Ó4ƒðô¯~õ«Ü~ûí {1>îëë Æ{q™§-ä±ÇndÙwn­ªM’nzÑ××ÇwÞYõö³6nÜ8áàt"Tòþ¾>¶nÝzL®– .dݺu,Z´(XæLT‚´ ,Ñáâq\‡U7˜ÊçÕG/ÜWÇ€_ÄátàoòÇ3ÕüÕ0ü¿_îÂÎhãŒOt^lÀ°§3gß³ÀÜ`]:ßÐsCÉ< Ûÿ’˜_2¾-÷–ñà¶¼¦Š~dŸ³4èÕ|¯Ò°WÎùïÛ™µz5Zþ>}òE/b÷{ßË©wÜ1Éw«:êiǬ[·Žÿæàâ~ïÕ¼ø¤}õ´Îó»± áµ0µè§€E‹±nÝ:Ú¦rv\êá\ ,`ùòå¬[·.X6Y±Ëjª±eË= ¾|RÇP^3qâ88äȱ”¥èèEánBcY±bkÖ¬axx¸aç¨g_¼hÑ":;;‹žxWBUó40äáF(ÍW U¤EyÈa€44R¤HÀÈÿ þ•7£‹K†LâšÈ'ëÊ?öËaë·òÎy' ,hÈõ6blž0 ‚WÜÑ„ññdÆP7U×uÝÀ»Ë¶mî¸c?+V\Ì£~×5ùìgpÊ)sÐu?dTé\*œS9õ÷*Šªãçr9\×%Ns÷Ý?åóŸ³g¿ˆ÷¿ÿ°¤¨-¶m£ižçáy™L†X,F.—C×õ@°êÏÇ»®ô)–e+ê$“I¢Ñ(]]]€ï¥¦¼äzz–bY¦i’N§I$‡®ë$òI•p¦ŽWÚŸ)á®Òƒðòp[MÓdxx˜+¯¼2mõ‡Ç{qyã…Ëx:óÌ„û+O8©˜:½èîîæÀU+æˆÛ¾}û˜eã=Û¼y3ÝÝÝÜzë­ttt5¨¯¯/x­â³ÇcáÂ…cÎõë_Ÿ\ ÐR)»sÔ7»,ˆAG•ÛߣÃW’cò?Žá3IøÀƒcôЉës,5«öXðykb Ö`c¿ïáfŽsß À»à •e»N8séR/^\§;]žzÚñâÅ‹Y¿~=ÿ ,Àå±ï¼›×Íù½ê,Ä©§üjr#Ö6á;íŽòˆ«æ3ÓÑÑÁúõ냊M¢œOƆ,XÀi§VÔ«Dë•PÇðëçŸ?‡~ôù÷?½{?ÜÅ¢EÂÏ~öI2™Ì¤r© ¸*ì`çÿ §³³“5kÖ0::Ú°sÔ³/^´hkÖ¬©JˆSÅú”Bàx§±<\È …! Ò¤ƒÊÊ2A¿äå“eÛØÁw¢ mUâª*þ¢¡±uöV¾õÖo±à‰Æq ‚W4c|<;†Âø¸Yär¹@lJ&“yAJCÓ6mš‹mÿ/`óñmþöoß]t Çqòž`9 ß[Í4Mz{{!My yžGWWoxÃÅ|õ«¯á‘G¾D&“!5o7]×7MÓÐuþþ~r¹ñx<W)!m<Á«Ù|â9×uO…–VòDáÏó<¢ùj åBà Ø´°¤¼ÕÚm|\:®øÃ-õïAX¬Èå|o–§¢pÓ“îînFGG«BÜöíÛ¹æškغu+ëׯgóæÍ¬_¿žíÛ·ÓÑÑiš;;¾>:::Šâ®Ã dtt”ŽŽŽI'cܹs'¯zÕ»Z&½ïÜTï’—ÿ©ö.ýuÞh³B<®…@[V×u‰F£lÝú§˜¦Y”× ¸D"A*•²,âñø˜jÀÅâV¬c6ºnUÌ”€t{ê×_?Êu×]Á£n£»{.°$V¡sñÅßcÍšÐu“HÄï#M³ðÛ²üëRÒ††üßžWS%“Å}j>JP­t€¯5UUÍuý߆Q˜¨DÏ¥Ç0Œbï;Óô¥„2ÛöEÁtÚÿQ¢ž:ÆÒ¥þ}2ͱOýÕµ*âqŠÂu’IÿµÿŸ½3“£¬óÿ; ¨ˆ\ 5 j–ˆ(ˆ[£®(`´Z]Q¢²Ý’5®îºtýÄkU´Û]/²‹Û¥®¸àîÒ­¼µËE!q ð"œâÊp©©0!„`~<ýTW÷tÏôœÝ=ó}ç5™é:ŸªúvÕó|ê{ðã/œ1!n6ì¸TJ#¤+ÿ:™°cA˜m:ÍŽõ µ\þû¿w14´.®ôs´ƒ#hǤCB»…©ÚñŽà¹°÷#ÕçP}SZGnŒ¹¤Æ˜?,jª“jê«–ÌdRæñ8òÈöŠ0ÝŒâZ¥-¥ªÖyòZÑ÷´À¦Ç¦ ÁM·]w¼üå3|¶¦Ÿ;Fî`ÙŠåê B”À¦Å´Rå¤èXÝ€ªëPõ‚3QÂSY&¨l+dÚ‹?ÂXè° P?û·÷óö·¿”ƒþrxù‡øÔÏ!›ÍbDÁ;Àÿ¯ÁþOØø¾0°!Íê59ð+ÞaÐûÚg0< 7Ýѿµg>ÀñÇ/äÒKÿ€sÏâ—¿üù¼GOÏ-lÙr?ü0×\³ŠÕ«Ÿf×’ýè=î_¹óÎ;9ñÄÇøÄ'T‡ï÷²u«A*eáû>¹\Ž|>išø~?Žã==*±ã8„¡ÁÑG¯æË_>5kÆu?[y›âyêÎešfœTÙ¶í8œ¶Y?Ù0ªbï«-Ä:ÇuÛ¨_¦^PÓzÖ­ ë©ß¶öÔÛòýªgŸÞö>¦M³my3‹MAº‡¾¾>Êårü Öï}}°bÅ0Ï{Þ¡ín¢0O8à€=êÝ ÃÑa‚Pa!T«…4s×ܼysC¯¹ÙâÖ[ïáyÏ“ríS%ªoˆÆÂF‰k¡Õ¡KT·N«Õ&»}lK^°™ zaÕÕ0‹à,FŸ¼fžn:<5¢ª|š@_eZPÙî‹ýÚ}äÂ\%L|c¿øÅíÜsÏOXô‹û(›e¢5yíé{øÛÏü©½ rAÿéUGÏòw¡ðÚƒ|cQÝd<ª9-Èiu>®{ä ÕGpÞyË9é¤~\÷Ç{ì|QñÓŸþ_ÿú ìû¼Ã8÷Ü3‚Ä̃ ¯º“–ŸÄç¿ýy,Ë"ŸÏ󪕯bý5ëU“|>®œ†×þúZnåVþíßÞÄÆwlä½W¾7¾â<ø“9öècq²N\-2™ Û¶mkéêÐÐé~±=S#†Ñº7u·""\gQŸè¼Ñ|-„{ž{‰$«&—-•JX–‡A€_­/™Áܳ‚ Ì®ëÆ©,|߿ߎa>[¶<ÉŽŸæ+_ùbÛÚ˜Œæ6;Ø¡þpúðv•n…º\~ù|ž(ŠÆôŠæ V¯^ïûd³YÞýîw׸eú¾Ïå—_Κ5kÚ÷ÐA¿æâ‹V¶û|u5:¿ÏxL$CÎDë $…8›ª3X½®Oç$¿Ÿ Z²xäˆ;8ç£Õgi\–6ŠT’)íÆœNW“ZÝò•¦_®¨„{Ae~ ø p>Ä5ô}>_9©I¼ãæ„$Ú.Ÿ‡¯}mŽ8Âçæ½GXr¹‰± î_±‡½û"®_ ý×+³ô¨ˆqßLïÊöLTnD£²`@õ¦¢Ã¸Ó¨|Šƒ`ö›‹Õî~ø¯·ü†ý×þ’ý4¯ûïWÛÈWÖ+ÁIÙ“ ÿ`ÿø`ÙgþáL(©¼Š¹\®îÃÌ7ðÞÀò—,gëõ[9rñ‘œ»è\8¦Ò.-|Gà¤rf·är×îbøôáÄ[y?®0æ8N,¤ÓËšæ¦O„˜MtØï\†ññ%dŠDQ4ªÈJDQT#„%— ‚€ Ã0®FèyQ‘N§k–*ñÓétšþþþ8_“žîû~¼ä÷'Š"lÛŽìß÷1 ƒ[o½•ï~÷»í>}‚ L×uãûDE|íkß#“qH¥¶30ÐmÛ|æ36¶}ˬ¶+Bu[tw=ªün%ÊHè^Bà??Ʊ‹¬ô[M¨ÍûQa¬<ÇÂüb!¨pW^y%®ërÞyçÑÛÛËÒ¥KÙ¼YÕØ\½zõ˜ ggš0 ;ºv7`šÊ3¶ñÝFZÜn–Ö£"ë¿lªãïú«[¤3 rLpï÷.à™­ÇTOV2€ŽQ 60 TÓú›÷w\èOƒS7"NžÀävu2>M„ê ªœ…:cÌ/t¶ ®»wÝõ,-äøÑß@ºéÍïUú°ÖÐtgTû©…€iS›'Ѧê!gWVò+%mÔ§ÚÛÍÂó|ŒÓv,áæcž{󳫉+Mª!Ü&±n6ÜN5ߢ®£—Em¿×îåÑÿyó½fíkí„G¤¶qÁž ¸â_®à¾…÷qÎÒsøéðOÉlÏðøs'½$Í×|›6ßÄU[¯bçÎëøÖ·àÍoþ /|ácêœTr¨PZ?’Þ=Z`ð}?.`¡Cb ÃÀq|ß'“ÉP((•Jqèlrý0 Éf³äóêäjÏ"-¤Ø¶{û)!$M¡P`Á‚»+á¼>‡z(GuT»ÍpÚ(Q¢8Ïo&íˆC±µ÷'(ñ*i'ºo†!}}}ضeY˜¦‰[©ú†a칦×/ ˜¦I*•¶mÂ0¤T*aš&Åb‘0 Éçó£Ú£·[.—‰¢ˆ0 k’‡ëeu›óù<¶m“Ëåèé鉗ÓÓlÛfýúõí>õ‚ Lí 700OËd2œuÖÛ(—N¡P˜µq£î„Tß:Ô8û³¬Ý'L˜q"à„—ÿþÛŽ¬ô?G¿½ ÃL&C¹,²¬ X¨ÿÐbœ.í;44ÄE]‹rí伸Ýç©ëÑ ½ÛI£Ý= ¦Ï%'­{ðùö_óÇÏpŽž˜ì$“÷é·$ÜON0!È+±Â÷«®Îͪ˜ÔŸD-¾™Ô t.òªNh ?®*iYð†WßÊm½¡CÅÜÞP>H‰oõ»do¸èÁ ÞøFúQ‚»…­îU7[-ç¨z·…(Öu >ŸiŸÉ ÞñϹö-,>q±Z.WYÞ¢o¯×«xÊéõñ*Ëz•e}Õ–—œü¶-ÜÆKŠ/»Qaà9ª7­AâÐð/ä…›_ÈÚW®å¹x.Ë¿²œÓ/>Wô*pàÒàR>qï'ˆ†#,ØÃ–-pÒIïá}KŸàeyûd÷!´Cv¯ßMÑ,ª6¦Áë÷xÙµ/ã±ç=ƇÎý×,¹†«_r5ßÞþm(•J,ýóR6(•Jl¼r#_¾L.—‹=’´7’;úúúÈår±‘^Æ÷}>ºæ£ôžÛ‹iš±.Í¢E7ò/ÿr^\éëÛßþ6»víj·IN :ÜÚì˜2P³ƒ®n¬…\ÏóbQ ˆÅ`íE¦Å^Ã0(•JäóyFFFð<|>O:& ÃØžŠÅ"–eÅÛ ‚€T*W?¶, Û¶)Ô…íh´À–lÏxÔ/£EfÇqp‡|>O©Tjº_A:—L&Ãðð!±†*?j¡P •JÅ÷™B äQ]Ýe1©fž©Ï1¿ž,ó ˆ““4Ñ€MÓN¨aaý„åË—³zõêv·«†¡¡ÃÛÝa†0PoæúCÊè X¶¥ÉQ¶šØ)V¡«Q¤D8ÏS1c®«2À·RƒÝjqš 4à.à~LÁû0ìyã#þèI¼óvƒóöï¥`TYØóF•jÎ}6îµ×ÆQÒ) :òH_ö2ŒÓOWêžNj;äÁߨp”©4 %'C´u>9Ö/|ˆ?~%üõú9)èéÞ²Ž‘5Qâ³vÙë¡Z®ZW6-ZÄg1ú•¹u¼âqzÙ}—ñð¢‡9-: .¡*öE°á ¬?e=Ÿýþ±üeË6ÌûžÍ+ö¥ÚbT„ ÏÖî*MW] 3Lƒ½¥^0 m§!¯ÝýZ¾3ÀKÏàô/ŸÎéÆéXiK4CåAë‡OüIx ª‚!0¨Ä uaÔþo<âF^éïY¿þ¡XÈ8ï½çqÝá×µÛ$§…n Km *÷Œçy±Ç™ëº±P,éïWJ’ÛÐb™eY5¡Õ r&s²ÉR)•ÿÆu]¶G/ j’ÍfÉf³ôõõÅ…SÆc*U“ëê}¥ÓéšUAºƒþþ~,Ëâá‡?LªªæžWíf‹3ãÕ¬³ÉôS}m£Æ/­t¡çJ„ÏL L(?ç9ínÒ¤¹ÿÃÏcw.›;ù•„gáÔ71ó<ÿùÿ„r•æ"sÉû­ð¬ÜŠõ‡4d2ʃm²ttõdÉD×U=‘dεkáϧTc’ƒ`´àJÔs´Zú,KÍÓè7‹A ¶U¿×­æµ3ŒêºÙ¬Ž_¬†Úš¦úmU‘¦þÍ¥ëÒ]²QÂÔÛÆà!à§wÃ'7Ã~¯‚K¿¿›·}ý„\›W½cË• ÀºóNJ»v‘]¼Xiiûí‡wõÕd¯¹FÙ±çU«|·¤DæJÒn%”°”!θ߾7rÀG> FX­©MҒ̯¨…7PÛ7©ö¨µ‰Õ?v ‰å5É—›\ã]Ã;>ñŽÑ¥§ÓöÒ¼â¶W°~d!Û~w0ÇìØ>ÇØ½ô¤¹k/Át¢í¦jç"ñÇwÿ‘—øR¸¼nÛ¨ ­„ÆõYçäKjÅÊô~xÃ’7ðÒô±õ`Ô4ͯ.ß÷ãpe?I{éðP×uã|i:<”—Ífk.îf@í£WŸdäo=/M5ljê^|eewæƒß†Ïc·<“Ÿ<¼’jh´D뺮hjè !®Õž t"»‰ØwÃjö~ ¦VʺQùÂtZ‰ Â+ß=yÂZ*5ŽK.•T[ÂP‰fŽ£„“¤“ž¯÷Ó軨“êzñé´ú;Ÿ¯®ŸN«žRXW#·¾÷GW:ºGT¦û>lá·yx`°ú$וd“qõ@J•ßúIŸ¤Ñ0¤¶”¯™ø¬óî…uÛÒyö’NVb{sÂóð|î~3÷dá ç¿˜³wPM€Rªœì1zÆÎÎ8çŽC.&ú–/‡Š§Œ:g¡²µ¤½å+‚î÷#e%à,xµ ÷†ìÞZæ´}÷…ƒ—Á§rª#ʦµˆLhéyª½ZX"ð£ZA[T¾·¶ %¿yÉSÞzä‘°ß#=dÔì}èE6øÃýÏðWïÅ£g”Á¹ õ  +Ü4±¥‹®¸¨ùºõq¦× i8éË'ñÅU_ä9O=§š¯/RBÜ\! Ûr¶ÓiÚgƒ‚ù|Ã0âb†aÔ„e&súééZPÓÛt‡l6.HöŸJ¥©TŠr¹<*,S‡)'/¼K‡2O˲ÚZ¤SŠ¡Ì5’Õ‡CB òä )PˆSDD5Ÿ§ƒú0ó òPnTÙ¯H-Z4óññðjD4ƒ¥øo +Öô>ô²z=P9'õt ‡z°±ãÏZ\óñ飣FÌË’E¹ú6káNïï“+?ÙîË?cxžÇÇ?þ?<ç9߯yü6ë–Ni_T»‘Ð8Ôt2ÌåtÌZdÓ]¼£ßoê'O©òpÛ~\ÐîCš/øÄûxÝ¥Ÿ‡¯®…ìh"yîõt…'9â„nç–ëRì¤ ïI·ŠÚˆF=Ž CB(•÷Y5þô:'xð㊷›ö:Kn{¼WúÁâÙê ›rq°`Â-ÚÝ(šÕ¢:AÅó¦ê•Sy\T«D¶Zo­£žÚj=£ ú´×b˜]÷Yx(€ý õCÿdWsè錻÷ø*óà,”øó`å÷ç€í&Ühª^›WiäÁ_BØ|ØT×»R½œ/3ÜaÜ[9±¿:þö†ûy(ØùÆg©™Q¤®y±8¶Íh¯ÉŠà¥«‰¹T®®¯m8Ù‘q]%¦bð\ë¹üðÐ_(®TRËkoÐä:¾¯Ö‹"ÕV-ê¶%)•ªÇ¥ÃÀG}®÷"Èd8`ÇG*¯Ñâ³ãpR.çÁ¾Ïú C+`_ãÏ»³ù*Þrð¾@*¼P Ñ<°ñØ4‹í™Aî¨ ³g²‹¬½Õ@½\Ìçó±ð¬Å,]0Ãó<ljCLAyÆ™¦Y#ºé¼nù|>Îå¦óÏ4ò4ÓÓ UqkeР½îÚ‚þêÖ?Þ´Û…þ:[ ækïÚaJ”°± *ÿêE눈zH“&O§òÏÅ%"ÂÆÆÂ¢>""LL¢Ê?›ˆ¨F°Ê’­ñ6ë§ ‹4éx=ôÄ‚”Ç’B•n¨êØ9r˜˜ôÓ‰ mY²xx±8¦…µˆˆ…XÈË“Å1 Þ¯öXÓdÉÆíÈÁÅ%Mš\ƒŒÉ669r±Y/RÖŸó0 Âjç& Bß1ØnS™¢(âãÿ~ó›ob‹õ8žÎ“•šKx¨ÛBŽéÍt§n¦¾žU²H…îäQ]‡"ãŸ?Õïëònq|4†?€÷5v¶p&ë„!ÌY:^ˆûÖ·~Ïððñín† L ýàÝo¿]ìõÔ­`ÿóÔ6hšJHh%œ§ÕÁÑóM¸5Pb˜ÏÓ~•V{5{bêWˆfb½Yø°×j¾ÎbZ_ǽ&ÃiœÊ–8.Œà¡|µ¥òaUøLtHs(1¦QÈë~–Z~‘ ‡TÞÉéjž®Ǡ΋敟“€}-uÀÉX…’† _!W‚J,¤‰Jê¯{(]ÌéábJ[àÌÂZn¼ê#ê¼ærU‘j<áV¿ÂNØpEó¡Ÿ1BÕÙsB€^ |aÓ¿«í%vÛ¶ZO‹Ìc}O,K…“kÑñH§Õö_ÂÇ 8ǨŠá‰órðÁŽü'x`ULÔž²QTnYµB{(oSÛ®Š|:œ\{ž& ÃÚiz ÄòÄÕSç`2OŸc°Çr4œ4º0†ïû¸®?€Z7P¢W¹\¦§§ÇqÈår˜¦YS4‰Î}–ÏçcO¸dÓF†A6›m9Ê`¼Aƒ>†–ÚýAÚê Õ£G7õÞÇzÝ¤ØæS­ q5*Ä\;ë¢/™ê>^d½ˆ§Nªµc˜GdÈÄž].êù©Å¡¤0” Ùd0µ’¢“‹_šzÈ‘ÃÁÁÇ'G~úc1J‹t!!™Ê… ȑâD)ñ,RŒ=íÁ¦É“ÇËCD ßœ<ùxÝfA DÅÄ3ÃÄ$ " ˈ‹”èßZ<Ã#kðß'ü7Wü;¸H i:|\‡ñý¡º¨ŠF·C‡¯Ÿý˳á[í¶šéçÿý¿/òÌ3ÿ7#ÑQ!Õ¢ ª{6sBßV ”P¦ƒ:tý„q˜ú3s€î(A£€?|ê},Üñqp¾1jß÷ãÊß‚ éx!nË–'yÎs¤ƒ$t'ðT´˜áýîeßÇ·N=‘E æÐ2+-؇ïU>ë&º•О\ÚÔ“S J:,´PYG÷dv:p*ðÌ4ŸÔ4ÏÔæ‹ËfÇ>7–¥Â%›`Õ­?™<z@j×XJd¹Þ„ëC(;ÕŒ¿_žæs2KFQÄ®wñÃwý»¢»8ìÁÃp¿¦Äñ½öÚ‹¾éƒ_ (…¥ê‹ªb³óÖßídµimÿZ(3 #½ÖÛ Ý°þß÷‰Î‹b‘¾‘x® £Lä¾[*•øÈG>Ò&«™9®¹f3ÿõ_oâ{ß›þ¢}Ú ?K5\r¾¡nÛÕî}ò6[ÿÞ|º™ Þpšl¿áöÚ÷ Çzú>$I:^ˆåM$ÝÊ_‚Ç9nÉoXô’s§¾±™x“ò®À¬óœ©×¡²¨ÁŽ®>T~Pã’ʉ~z'ó·M Ãq§¶Í.´à«(q¤X¬}•Ø¥l'dWp<Þú( ÞXŒ´XP˜zWVG'ŠvNLŒ[¶l"K·õÏœÏ+Îü1Gqfí = KzÕ Ì†Q »­§Qîºlv´@EJœ«á@ q™L,òY¦9gÛûÀ¯ŸB~¸R©{Á <ÏÃó<¢(Š=Å …BU2¨©ØoЏTàiåÓ”p˜²«£7m®ú¹¥zÿ¯?Üú\›±çsõÑ‚ NW0Í‘0ØxúýÏ!ÀÅ‹ho¶FâY‘"ùJÂH +ᦠ?ïã8†© * Ú|„ÉÂ%I/Óú&¥R ·â ï8»×ïæ®gßÅ??ÓRb×AÑA|íگŢW6›½V‚ `Û¶m“öƪÿ>»®‹ëºø¾O¹\nè£Û1‘—ŽãpñÅs+OÁÛß~ÿõ_Mk.õNÁAݦfKjg!Íj·]£ûX6Õ[µ>)ªEæ…ÖØMÄ!‡ ²`Aã~èDÃR“ù6…¹KWq rT¤Ió1\ "ìÖ½&«ÅÕ£;œ:TB¦™…€uçþpÑ4<‚§’_n,ÆËë¥If]-Ѿ^E·TÒ‚9âŠ> ÜéïÇÓg_ÂU·,ª £œ&*E=U”V£6Zz¬=ÎDðéæ™Ïp÷öe¼à±}¦wíŽV}IncpP‰¬¦Y õýi[;‰_áñ!˜¤Å¸®Kø¾ ƒ»kÓ]|ó·ßäÚç\Ëo÷ý-û§÷W#¾ ÕÐȵ!—ÚóØAÝSur"í V¤êÑæS­\‰¤õ®W³qׯªçX–Ñáïõf¡Cäua»n^Ò4êljGв[–‘u²j}½Ïú/èX&ÙÊmÑLl§Ë_dtÚs^Wç´°` ¥uµZ«EN’Þ Iñ)“ÉÄ^¢:ç¡ö-•J±7i>Ÿ¯ í.•”çZ.—«´ûûûG0)‹£C¶ßÜÚ9šî0²t:瀜îm¿ùÍ-T—ð¦7ÝÆ©§†\xáêiÙžºk/¯Ùì•Ù0‘8•)¡½Ü|ª·xuFßÚ[Éß ·Ü‰²Ïž§àÔæÞpùÎGs"hWŽâ~ö³q¾‡)öÇä7xô“ÇÆŽ«…„ñ[¼`m òCl²ð.ôp G½ÑÒqOÕŽ«ºSég¹Ÿ˜ÞŒd²ßN Eµc­ØOv,ªÅÉ©ÜCªesêÇ{Õ·ïÉð™9ÊðF“§ö±!{X»›Òœ‰¾n¬ÿΙ#œfOô4‹€ßýîç¼ÓTî ¡u´¦¾¨N´Žò­× ‚†aQÄ•®æmßñé¿÷{pÁÙínNcô=Á¶•WçánÆe1æ(ǬñÃL&Cà+Ï¡KO»”³>‹/ýñKôÌAÜòÿn©fç΢žÏYÔ³°„ê{h½#W™ŸGyšéyúlP û×ÏR³úów~Cõm’¹ÑZEog²è´Éç@§ô‰„¦äÉÇ…BBÒ¤Ù6n¢Ö‰¡½×<Ïöí8b†d³Y<Ïò¬XXÓ‚MÓŒ Žxž‡išd2™QžnÍÈår¤R©šŠ¾­¬7QtAødשQ]  Pï«ôr¦iªðpaL~úÓÛøÉOžÅ–-K§¼­u[Õ·ãvõ¦[VÑC5=dÕišuõR›‰=’8SXw¾²;\Æc{vÀI£´ú…|÷…z:^ˆ‹"8ãŒ}¹?Î%¡•~‡€~úãä°2ô<ÓCúi‚çf€µH%yµ± w†¤J«¬àõ¯Dô«½ƒºy:OTsœ´Úw ¨Še“í¨Ž5ZÐm ©&(ÖÇ2Qßbý†Þ¦VDÓåpZ¹;§¨6-ª¡9:L¶ò·Ìá{Ó“Ï¢‹.mwS¦áæO¬Äþ zˆ…Ì@òä$õÃ'}{KQuÊÌPM)µøêvèSz¡£#FGÑétVIç ú‚‹!£ß$·1‘3ñÜ;XiÜÆi§L°jj;H§ÕèrŽåˆ ØCÄÑü‘õ¨6ýß&|ßç7|€[Ͻ•÷|÷=àÂð¿ ó¢E/âÞwßËÑÎѵ‘£6Ãuý34§¶]“»¬Ñ2 h%ÜŒ1ÖI‹¢ªÕBCÉ—õÅEÂPU$¶íªr¡óÖ¿Ô‚êötHv}aaº‚®ª«œ¶‚®èkY™L&Ïâb$:T;ÎÕfÛ„aH±XŒE· P†^_€ I:& a``pÔ£Æóô2ÕiªŽ•E±XÄuKd³ÛXºô®X kT@¾¿_™O6«¢õG}ÎçÕ>r95-“QŽÁº.ŽN êûj™0TÛÒ™´±6]öÓ÷+™*,µýtºú(áÇiWñÁ~†‹.ú¦ÙbÞ×&è´“:íE»ÐCŸ© û?zˆª½Üôû”dh©0»„Àp¸/Ï;p{Ã/òx…”ê‘ÔùCÇ q/xÁFî¹èކ K½Å *¹(ä Š8¿º\DD}Êõ~‰EÏ=°œCT‰ô U i·…åYJ,Ë ¼çt§X wj¨©d"cÝQÖB•ù:9°Î@®E)ï%`´@§ó³èNFžj~+1-DVõ]^WÓ#MäïV¶¥Ä"ÕØ/»² 8Áªk_bY½/°_—Ëi&dÚ‰¶÷U¶Ÿ«üm%Ú¥óˆ5ó0ÌÓõI v½â›ü« ÛÝ A˜2Os .~”»í6xï{g}ÿºÃ©3 è[­ºeïÆâhªú~ò6X«Ò…€ƒ0äãGÅ¡ûìƒ \ð䓨À¾ûîKˆºÕåc·m£oÙ2þgÏÙ{ožµ×^lݹ“ývíbøC0€E?ÎÝ ðšÅ‹¹mx˜?x ÏÞ±ƒ¥K—òÇgžá¥O=ŽgýŠ n»ŒM/{ææGá¸ãŽãŽ={8md„ûîˉO<ÁöÛ7ïÜÉþûïÏ·oçÖ½ö✥KùÚM7ñŽSNá×Ï<Ã>÷ßÏÊ•+9ú®»x峟Íï¶nå8€s-âË·ÝÆþæoø· 8ÿä“>þx†ï½—¿zÖ³8àæ› Ãt:M*•ÂqÇÁu]²Ù,Q‘Éd”wŠeáºnÜ™L¥R¼ñoœõë?]”€¯VÒçbñͽ\x¼¯ÀÛϸ’úùkùÒ®¥o휽ß|–^¹ÞVÂXi@ô4•øSÛ+TŸuÄ=¯*EVm'¾ÞÅÔ_Ú£HýÖE;Ú5’_¶L© ÉãÓEDtÛ¢¨ªš:æFùm[t]t$•ªV9Nž]€¤TRç0yüõ•³AmG«,É¢%AÀ¢öœµY'""C†,ÙšÂÍÃ0Õ t>Äb±H†A@___\Œ¤YUÞ P—Vy‡Ùd2êsRÓ—Ò¶«i-}_]®tZ]ò ¨-0íºêRFµ.M¹l‘ËY”JEÇóÓŸêã© c:«B.§LCÏ×&[ŸZ³PÛ)Ø4a[âÅ|³4žI´ð—©ÔžšƒNÇ“Â÷}Ž<ò ¾øÅÉ‹pÚ f7Üt¢‡T%ªþ zè)¦ÒY„Àäù_ésÚÝ¡‹èx!nÛ¶n~îlàxD臣’"Ô]IWùÒBQƒ"Å8t•«úœ>BBLL||ò½yœ¿w”(W°0^õ“ŸÄÉ÷] µ};ÿûÌ3ܼðW¿â{Ï>™L¦šÔ{ï½É».†a`g³aHhš\õÓŸ²þ¬³Èd2‹EJßøßxôQþx¼ÃÖ?Ã5§œÂ'–.å}wÝ¥ÂL>óvžqû=ð¿}ì1r_Ì¿ý-g¡ÉÛ·ÜÂ)«W³lÑ"X°€¿á3X+Wò¡ÛoçËçŸÏÖ}öá•;v|0‡÷ö⇿èEø rÁ–Ã`ß;û¿³ú"Ì>>|»"ôh¥ ›–«J%5òOºìhAI»ïÔ qÚµ¦jàUo0€º‡h©¤æÕWkkTiZ·')îÙvUÃÆž‘–5Zd« N3Ï-ċʽH«7õІeM<,>©¸Ôí¿ýö›Ø¶º7NþnâÎîû~|_Ìçóq^#Ã0øìg¯gÉ’‡b¯¥³Z„¡ÃÀ@.• C™h>¯.QªÏ®[õT桵ÕTJý]*©uòyèéQË¥Ój]½¼vÐmDRÈjäÄ«¿fA ö98XÝJ%µþl:Tꯇ֓ûúT‘îùî÷Ïÿü4ïÿäý×ôÐ*Mg½Ç¯ cš†Æêažn³NÇa!Â[7`XwrØ?ãm£æõôôÈøMhHÇ q¿¹k'.®t K@Tys}ðvÔ,Ù×kÒï³*ÿ€ØS«H,Ù8U³w¶Gž<NÚ!kfÕë+yÕn¼ÖK­•»¿I5›Ø®ŽJ&Gr¨zº%Ðõû°šìGo;ÃØ¾ÙÍ: Ú{MÏw ´BR@E¬ÌA`X9Õˆ(ª$L7 åG'ôµìÛ@¦}ªÂªîk÷¨e~tê8Óè&öúä¥ìzjâ=«L&ƒeYq®“Ö’( ÂLp'pÈ¡R(äg$çN+¸®K.—Ã÷}%°Ù6žç‘N§)»{7Þ§?ÍÒSàTÄ5úäû>†aÄmCuÓ9; …p÷qÇ=ëY8ŽC†¤ƒÇqøO`ð±³ÏÆ€Q•Dkr˜&&`Ÿ¯\³õ²Ž£|»ýþ]|‰2«ž¹WÝ’uWƒ0ÃûÏ8£úá˜cÔïÓª÷À .TÞ¶Îùç+'ëý÷ÇÜÿÚå?¼ºSN‰ë8‹«ÜÿûîËÅûî«JÛvMÏ+o»çŸp\º4áÍ#ÇË‹ÿð‡¶ØÁtp1°­IµÇ(Rž9Aù>:R`p`!¥'žfQ6«<ˆt²!]zOÓŠ ã,¯C9[Qtcµ`£—×Ó´¸¦+Of2£Å,-„é°OP'B ƒõêFR!™NL³êŽ4]!ѦY y­â]¿~zÛßaäÉããQ®y;«Ð^nžçQ*•øÅ/ç9Ïy?7Ýtïz×¾ärJ+ORÛË+óÐæÐ߯Ì8¬MS™S¡P Å„Zo³bQmOëÎõf©o­SÂNšÒÈHõoí|9Ûè¯~.§~òyõ3ŸÓH]wÝcÜqÇ8Î+&µ~5TœJ ™¢^ˆKz»é€ d`–ºƒÇ>ȧ~-ÄÍÕÊóÂÔéx!îÞ{ÞÈkù“úPÿ ´i)OU˜¦'Õ¹+LLräT…&CuFÒfš4iJ”è³ûZ®"Lì®YIr¬Ô†QHh…ñÇ0 ùðW?ÌÕÆÕäí¾ÙôõõRÿƒ À0ŒæUc ¨§PŽjXp'Ý}kï›»š-`Ÿ#NüËÀ§[^¯T*)oœJ™{¢ßR§Óé •¶¦ý½­ÿ[{ŒÅððp»›?!à –~奼ùÏžP®‹‰¢…5 &§mÛñ9ÖâZ†qâüB¡Às-âíW^‰ïúñýhø]©/%Ÿ¼fÉ¿“÷«éJô|ƯÍ=ÏÍwÝG=mç®Õ;‚ £†ãÉú+Éí\¹t)áÏ_s»rÞúÄ<¾¨;ƒúà\šiÉŠv—¼önùõ2Šß{6ÿüñÇwxYöL†þ§þ å>—efÜ&z_×âY³uëGõzY×U J¹Rº†lVéãó™ŸýìÓ¼õ­‹3'´^tdÎö JO?Ú©x’·©z^ë :3™˜žb´‰Ðù<ü sáKΓñšÐŒ½ÚÝ€ñxæåŸÄĬûW0Φöî5ZdÒ¹eË–Å%ÒuòYG‹P¦gb`ðÎO½3žÄ{:[l|ß'Š"|ßWÖ‚€T%4Båå¡E±(>bš&Wß~5@Myw=€4 #9ŽÑ“9:Êå2¦i[NÞÂ{îˆÛÜWr॒ªJ‘N§9í“§Aš8ÿ‡ÞŸöq}—'ϲFñOŠpz}Œ¾ïãº*mi*•ŠÏC)(áã«kšá*ç:y º‰ç¾í_Xpò‘--ëyù|>öøÉårñy5MÇqâ°‘L&ƒëºäóùøN'ú:…aH__Pý.•J¥øšô÷«®E©TН‘®V¦7‡aXsÍõ²===ñqg*=PmÉýº®Û­þ´#}úûûãõõöK¥R¼}}îê·›Édâv÷ôôÄa9Ë–-¨Ù^òx’ë|9zŸúû^ÿw·¹¨GÀN"ž\ÿRN>aÅ„J°‡>wš0 I¥R±w†¶A è,ËÂ4͸]:&“ÉðD%¼Rß›´Géd1 £fýé sÙaDüÝ^ïg{Åž:=(å¬lš …׿¿ÿ~Žzä‘v7{ÒsöÛm/êík2¼@WÂ Ãøç¬ý÷g÷’%|ø[ߊ?ŸÏ³ò°Ã8Í .ŸOÉÁ¿‹‹}®AU4HŠiZÐB‘eYñüÉÆ„Of Ú Ú;)=Í!“ S+TL 7I¡EÏûò—¿<íçe¦¹ãBŸáëö´´l«×>ÙQÑáÖ±÷šÛǃÏz«sWÇ=Ú»K{KêÏýýý¤Ói¢(jh¯úÚ6¼ô5Ñó´À¥¾ä´úù“E{@éí4ÚÖd¿;Ͷ'¨NÆCüÃã;¸ïøOi[úE‰®Äç8N,kÑ}2X–E)~UÝÙ|îsŸƒNhw3æ£kqwçô³ø—×ó>ó›œò«¥¼÷°2‡m;ŒÞÇ{9ê‡G©•üÑõæ=ɰXFyM~ÖÞ†ú³ï«Ÿd÷A¶mµ¬çÕ:Mæ ÓÕ3u߯î¯~›ù¼jª~é´w¹œj—ï[œ~z»Oæô õƒ Æbó·¾5ħ>µ€SO}”¿ÿû!.¹äÜv7UƒäE×p™ëcùoû¾úÕÑ!©yò”¶—0kÂéÀQ0ô~¸—ˆ—br!iÀƬücoàÊރ熬Zà€ÿŸÌË3xûx„»B9X-Ç®«=Ø(/¸ÈÞ›%8¥ê|¡½»¬³UžQï,åý¥I“&·WŽÒsJ˜‹*ã˜[¢ðÏõÉŸ‹Áù©Cñ«Eå*g_…¾õáoóqÏrÉy9RoKaìc#W;®??áÒ–‚„q¶íÛê ªæ—êx‚ÕÞé™c2XûX» Š_*b %nÞà@ð‰€ÔûSp¤ŸfàúŒ]Fm˜nÅVÍM&=/é!=œ&¿_žÁõƒ˜e³ök‡œ“Ã=ÈÅ>Ô&»8«*mýll®¸ýŠv›â¤8¢÷‡¬<ò9£¦Ïd*¡ûéh!î+_¹Ÿ]ÃoQ7BÓ×Å5V=-ÏSO.ÛfeÁW¿JQûÖ—J¼ûóŸgñâÅð½ï©žmOKi"íñãL$Aó8Ì”rnYVW ÉðÛ¹ÀîÇ—bYÇŒ¹ŒöT›Êµ×áÉ>~M>˜FÛÔ¢Syª¡I¼ –D°}pj9Á<ÏÃ÷}|ßCJ“æUvdŸçèÂá6jðÂ]ÀKR”Ï1|àìýû{9{ßw±¿¹¿ÊKTïŸÉ¤c@¬Ÿ¸­X¶‰ú®Uœ>0¨F;éDÞÍ´@cŒyw0lUÛðóJ+Ž ìþ\ÙÙ)9ØÂÏ3ÐëÀ%iø  ;¸?„ý Ø;‚síjË|eÿ_ËÀ‡Zð§ö1àì¬tà•9€³-x"‚zp° ׿áp  àiØnª.ßm¾:!O˜ê|ÞæÃ£!l)©6cÃ*T7.ÜÁvàAK­m?¼¡ _uáòõ& <ê€a«±öÀ]'-šV[k}ôa`0À~IE}¨TN; µƒBmSÚ¾™¸.ÒbÒýÙÝH¨”Žs¹¢ªïþûþœ .X ([î§cA´+Â^j3ÀÆÙê¸(»½‘SdzÊÊ÷ßÂR+åÁþ…MáˆÖiÆw U”N‹a*v4DÅŽ•T@ ¬^ÓE¢Ê?u³IœxΟüwݺV~4iÈ’ÅÇ'GçUN%wÕŠj=0<kÀªcµ‘µT ZX*ÿòË•7 µ®ñʶô•3ÌßT~š~‰Ê±ÎR(P€KQ?£¬B”«.RlY»e‚ÖÓ~ö±û÷/à‰Ãk#FE¸ B’Žân ÷â ãÿ+JÕšçõO!ýú´R@@‡èÕ r‡ÅÚk.ªO5ÃP¢ÜJ1ÙºÂXòUm‡"®±íaÿCîŽi:_çœÊ[“d.+›<ùv¶0ÇØõ­çó? LÞ=ÁûˆÎ}Y*•H§Ó5Ea¶Ñ)\BÌÿ÷uJ .ýóË0î‰è;«Ÿ³»¸÷Uû+ÂRDD‹j‘ï$ºò] 5f2ûÓóëIŠnõÈpK½xØmÁRk´ wƒ ÷øðh‹P"×Á•Ÿ7 ¶}}åqUÞcþЇ'*pXå÷îP aptÛ³JìòQbÚ=>¬¶à¡Lx&_,Á‡ ¸;çàŒ"œAU¸ÙíÃ=%Äõ[°Ì†Ý¨®×«ïT“ãK®g¦Õ9½«òŽî^íYWÙ×»êÞÝÝcÁËà¼4œkU«F(.Æ€§þêVàˆé6µY§D »âÔïö³gÏQù¢^ #_ D ê|Ôß…Ó*ºE|MµØ[ ïnŽEÕ©(HL«G Q:H}[¢7¢~¾Aõ{Uö­§éãób§WÆKËVq"¬,Ûhùz{¯ýc¦ ‡¤á¸>¸¶ Nª¾Ÿ$ÅÑáC™Ô>:|>ä”SþçK‘ ^M6u"À> " òé4º(ƒ©ŠÎ@µ²³iŽvÕ$<¸bAÌ¡6Ý€>¡±EU·Z¨uó…šÊ×:tt*8•qÛ’J¹~C£±˜RÑ ìx™osLÞxÇc‰Êwð—÷|‰'ßS;öš©7aîÐÑBÜý;îçù=ÊÃwŠM_éJ’P›;k¦©~´øVŸ<Ñó” fÛÕ,µ=‡ÓÐJvÒéZQÏóÔO½O¹a¨í7õ}&_{é_ý ÃÆÞº-AÐÞ¤!]ì— Gò¬¦óu¢zÇqäf-t4C¿?˜c=µ"Z#<) ú(aϬ¬£µŒä4¨~góTµ¨šlØ M:b±¿²]½_¯?ªÌO¶1éÕZ?­Õ§©†Ë,xSŠXÝ<+ ¤aƒ÷úðä!«gî‚Î0} «n)Ñ_IäŸþA/Ù󮆷;ðרY0 2•s\LžãddG¥hž§JК&qåM©›¥¾™Ö»:§Óµã ßWËê1‘_ÉS`Õ˜øútGÉ"TaX;¶k”¶(¹¼iVÛ¦×1°m ‚걚·a=¶ÔÇWß&=]¯cšÕ‡‰iŽê~} IÇPûª^*•ªÇ«ÅP=­QÅ—äþ+ç|ÉîÝÓj[³ÅáËÿÀÑçœÃPÆu¸t´7ôèBκçQød¬Æ]:ac×Þp¦çóê¦a°n~ü#¸ýÈ8ô5&ÔÍFo§¾ éô蛾 –JêÆ[£Ôû6ÍêÍ_ßôê…>dEõ·îƒ*¿TÓËdªÂš^·Ñ¹ó¼Ú›¼è·MÙlõÁ†k®FµýŽS{.¢Hm³Y¬O6[+VRüå/»ÎmæY¿ºŸ‘=‡6/.ËB·°ôÁåÜvØÄ*?M¶ø‚ Ì!J› 1oÝAé ïsüI;¹ûîóÚ{÷æõ?~=ÿçÁ ×-é\ãÓúügÚBç\W=nõ£0“©ÎL*5aõجÏGêG}ý¼ÙŠìNêkÓVJx‚¨Á¸ pq±°ø‡gNeø#>»¯_É·}0~ó¾8/œúö'JRhK’C _Ú£,D}·EÑ5ú»ö8Ÿ5é No´%:´¬óN‘Ÿ/Û#¸$)¦§!´áCÚœ2Ë­š:¥ì:ûÛØ¦Mö¥î« ¤q!Üôþ#òPö2fB•V"œt:£ ¨ŽK´TŸŒÇ#9öJ kž×xì§×IŠeú³&ÑÎzyý@Ñž{Íœ+ô¸¯T‚õ0Êfõ8T¯›ÏWÏS3g-XZ–:N}§ñ>òùjRÐR ƒÓº°ûÀŸØSóÖù%Gœ0-Ä=½c1«ú†5¿ðºP‚Î74eô«æFî\d`7êm&oä­Š)ãõ˜s9uc ÃñBÔ¿ÏåÔM­P¨}{£Ñ7ìÆ¥Ù1Un”5í7Mõ†§¾­Z¨lèžð ¬¿9뇤^ϲâQÉuk×òòÖÎtG°0:çἠD„º‚xêî#yâñ_Œ¹œ.¢+Rëp³IH8õs(m^u× øÇ̦M*X¯88¾¬“|ìé÷TɈ%ÛV_ý˜ÔŒ×=©ÿŠ4K&Ç‚SȦ!ÌA<<||Ša‘|¸‡_}"?úÎr¬¥Ì|…ßdü¨N2g%>{T]Ï*zB.¤ªæêeê?넉:ŽTǹŽ÷8™áŠ8ŠQ¿DÖ¢ƒž¦÷]ÿ¥N¾tÖ}ó¤@c£Û¬ûЕuèxšÚU5Þ0âM´{óŒóLó¿ù!ÏzÑ­dYÇäVçà}µËx•ŸiÉjì8“×&B£q޶ 4'­¶MÛÞ—mýpÐÞúïþþj”X³c˜¨ˆ”\G{ ŽõlàxãÚµüíÄöÚvv†O2‚šïýTu ¿aqanÑÑBÜã ÷pú²TÓù©TŠU›MÌæ»tºg2´•,¦I~ñ“0„¾¾©'ov¬Û㽞oÖÛîÊ0Ô†,\À#û/h8KWŠ”·%B§3LÀ^»Ÿâ óÏs¹R©D>Ÿ§P(´­8ŒqB3´e½ãô~ŒÌGÀ0|[Z_;që±CUßA%wò~E˜-tEÉxÍ[ÿ…Ûþ“/|á Ö¼#ëE«ømÿÀ?WîÝÉP´(ªíŸœÙ§Ùp– ÷8Ê„p¢­òéDmÛC¸Ê…}"ØnA)‚ùp ©–eÀûs`ZÕ/ë/#øI¾À“i¨DƒÛLµüÖr­à· €ÏeàS-ÿxå—›0Âéü¬NÎÑ9£}¿6„P dÍ¢KöDð" Ö¥áµ)x¶ ïÉÂ[ìj\®UYþaà)à¤Ê¹½5RŸÿª¡ãÕõ{o T…’í!œ_™w𦠜hÂß§UÈ"jÏùÓŸÚl“#<Ïåm›_©läªÑ†›áº>­X]¥é¶1’É2!¯ Q·¶XqØ0ð×ínŽÐet´÷Ì¡Û9þ¶KšÎhųkŠŒ)ÄYVë¢Y»©}ét户oÿ,þÉ€óFÍ“œYB·ðß~:VúÎ1—Óa¨^¡9 ÀÀu71 TEÍIªji3€¼:q©í>v¹ÀñÇ߯)§´~^§*¸‰WšÐ xx¤IsÏ­20ðIz¯^Â4XPçbÒâÀ/B8Àg²Õ/Ç!%ø³¯âGo U¥€•À®H•›í#¡†Xpè|T‡óŸHWÃÌN0à-‰6DéÚˆFQç$þθð*st¢Ã$Zì lxÍ š¦=éôûé ppƒþ¼öRªìHzøÕžtËêxW#yå»&O:µu?R(©XVç.ÓªÅC'±?}<ðŽt5ñœ^'J¹¸šg¬Reá´Áÿœª9Í:wy›.Ý;ú\—˜C"œ0'‰€¥ÛvqÈÃGÖLÏd2“ç“î‡ ÝEG q‡~â]ðµ FMâ‰'x0ŠæDr]anóÄâ˜CŽ5Ý÷}LÓ”ÜYBP©}f1KI–º#/ü–Õ\´èïï§P(´ÕÃÓÆÆÅe˦|-'»þtxäéXTùgbŽª„¬ç±øçàŒ“UÜ´XØH44+ÿš}îvÔØ<Àú•…e…¸îë¸üòÖ3ÍV6Ah•%>}ÿ§yÕ«nà’öã÷µ ü?ªh¤ë¦ë*!l_ΉàhàðHyoÕ|ÅÓÄYÓüÊO2) ön‹oóɺz’Ù<%ÉDŸ­ dµ˜5^pJA·%éqnXYY cTE.½Š'ue‡Áäa7Qåm”€–I,–tBªwHjEܯO¾—&®8qã 7²’•;¿mæ¢÷ÿš¥wºìó³}FÍÓÅ1D„:½ŽÚÆ1'ÖjSI=daáãÏèË]¡3èh!î‘m4̆º}ûv¾ô¥/ñ–~°ÝM„¦l]¹’_~š#‚ƒg}ß!!2-•Qˆâ›}ýòZ šl4õ’úíZX5ÓôCÈÂŠß Mµ$üXè}˜˜¦R ~Žr !ÇüõbYÍG …B¡í²Í3ï)݉è#ÛpDÔòwX xú{«× ãï^ò;øèÊGyé=/m÷a·ÌТE€ò 2îLãoõqKÔ5¡Kññ10ø§×þ«÷¼“Ï>úQúw\WM’£\¥úG2T²jªrT0¿zB7Њþ^?þÈV~êË©ŽE+áT©\§[ÖÞ2Ã;š~ïú#ÎOŒš¡´Ò"­ŸjAh°× ±ýÜÍÀëãéSé[ôÑG™ò„^ ë´!!QM4ÅX†8#´Žâ¾þ²e¸yô¼Ã?œ E„º€½n>Š%»—ÖL+•J83œ(¶™˜”Lþ™|ÓR/„illŒÊ¿ˆhÎߤ{èWÀÔç)).êóÓª7Ôžïi÷¡Nˆç<ô ‡^÷BxCóeÚ-ÂÍwLLÊu©¬ Œ–CZ'úºqËF6=²©Ý‡Ý2ì·Öö픞)²ûß® 8Ü•ôBW“'Ïë¶\Ì7Œ/ñöáw,>Œª7[šªð6´N@uÓÆ®O½›øÄ–QÓ3Ôš® t*ðȽcü±êQM:BÄ¢L™€€ ™QbZ=Í<çLL2dâ( «©‡éôø')䙘X•SõÐÓ/|¥þJ¾<ÖcO} Æ°a"ã²±¶¡Û%þ5žÈ‹êŽâþøÛ;8íÙ÷g×L÷}ŸÛ9û”SÚÝDA—¥ïü/úËÿ«™ÁŒ qÍÕc ¶% 28î>Mº&,°ã˵7®¥›ÊBí¾y9;î8uÔô0 )•JDQ4=•«a†Ø²ÿþìÿÇŸrÄÖçsHß×9ã ñ†º—€€]á¼ïÔ—rÃ[ƒœEø©ÊÌ"s› B~ïß9fÑòþCj¦»(ÇKIÃ)t O_~+=.þ†ÓS4ÌÂ"[ù&h1ÈÁ™1§-’Õ úŸW‰×¯ÒôßFzg¼ô÷À?ÕL7 ƒÃ?|VÛ2•ðk–©î¿á¥µu¡“¹ÊÿÑmyþÉÁñ4íÈYžìF¡ ì9ôŽ~áÁñçét¶pf<¶½J£¼ÀÚn¶™©±X§EvíÕî4ã7‹oc¿ÝËk¦A€eY³*ÄÉ \˜,‹ö,â©O¿¯F¤p]·ÝÍ„ ñç¥KYÚÿEœ£î5/Nã8Ž„¥ Ï=ѳØ~Ç=üåWÿD†]ÕWÆÃÿãræ _áC?ï­q’;±Ð-üðwÏâ°}ÄŸ#”7\)Î tÑn8îíŸæùÏ?J}Ž¢ióˆæ>+ÄÝ|õ{ÙæŸÃ|>?…- ÂìbðäUoŠ|aŠ`!t'#O2|Bm×XîÇB7ñÅÏÜKðô‰\ÿ߉Wr‹„a(¢(Â÷«aûÍþöžçÅó’/¢\×­ÙvEñt½N©TªY§¿¿¿fýR©ïS/E ,hضT*Õp?·Þzk»OuëzWóäO_Íë.åMèZîÙs§öV«¥fPu-:Ë_EÆæ¶GΡ[wÇã;ß÷kž;‚0šºû™Çxfyõmš&Åb{BI\Üšd‡:69Mš¹\Ž p]7œzzz(—˱¨fYV,Ä…a VIOë¤À¥—×Ó“Ï-Â5/9“bWRà²,«&ÍBòûßLK¶Ë0Œ!®Ù¶¦;ßäLn»-ì ) uÀfξàœv·F&žOýŽýž}7–õ²8$µÝyá¢(ŠïÉ— ­F³$—K¾,©ßž~ÁaÛvüBÓ0 Â0Ä÷ýøšÏçãÔ¾ï†!étzÔsÂuÝø†çy†ã8ñË}ßw]7¾zž‡mÛ£¶ Êó8¹míD¬:^ ú'“ÉÄ˵mß÷k^–t Ã÷þ…¹ xq B+t¬÷ÔŽ¥üÍ+•Gœçy¤Óéê›ÕYnK}¢B81""Ož4鸪ÉT‰ˆè£ƒ"Å1ŧ~T¨Æ ƒcn7OˆˆâéZTK_† .nÜÝ.]¹œx\öШ\zú”(ÑO\~Y·5E “¹XàsqcïB}^MLJ”ðñ)RŒÅÁ)N\yâÌ\äàÞa“ýSÑßQIXªÐ•ìZ²……˜˜æó©—e„vðƒí 8°wÃŒz%'AAÉdˆç  Ö»-é©eYV¼ ÔŠeIA)ùÝ3 c”`Öèoanžý)þüï.¿þÞv7e^Òª(“^‚ ˆ½C“ß}K*)Öh±~¬å’ÛKþÝM¬?ñ%Œ„ª” ¤™Z”uò<ø¾OEñ}8)%Å-×u1 ƒl6K”J¥ø>›|Yây^,éx½\RXòúèÈW¾ò•–:<—\rÉÈ~Ï FFFjØ{d¤‰H˜‹´ó=9ùuï9öµ×Ž”Ëå†v,Ì_ºÅŽo¾ùæ‘·¼é¢‘cÏ]ß¶ö I»…¸‰ØñW\1²ä¯¿3òöOýǸÛÕæ‘5ˆ×÷îš¿e07wh§7Ѿññ¯ûב3Þø•†ó³###¹ ì{,,›ÍÆóÊårèaÛvSÑ*—«¶ )F%¿3IAlÛ¶mMÅ»$õËM7ÍöÛMtK¿bdddä5¯yÍH¡P1GFç’ýåt:_›r¹\sæ&ÝbǺô¬s¾1òoÞOÛÖf¡ó˜HÿxÖª¦º®ËÐн½½¸®;nE‘‹v*‰p#$àB˜m&jÃËw>Ä˽GÊX ÅDíø¶==‹T8E³û± Ì6µãc"k_¶jÔô0 ãœ6QÕ¨q8P2ßZ2¬G¦ÂDí8¼Éáòô)Mç[‰íj2™LlמçÕT²M†Âe³ÙšB:T̶íšð²r¹Û¿.ì*$­YAŒäw&NÇ¡[ÂñQ¿ÜtÓ­aqÂDíxÅŠDétÃ*©™L5çt¡P¨±Cɳ,Ì$µc€'_ÊÛ_ÿR©TMÿAZaVrÄmÞ¼™­[·r啪RÙðð06lóÁ÷¹wý83ί"íd26|ì±så•—¶»é‚3;~ :‚×¾üÀQyI¡]LÆŽÿùÕ/«É×£“S† l†aˆ ³Ædìxíë~Æ9ç\(áXçŸ ‚€ë·nÅ>ÿ| VXÊf³±7+¦’_P˜“±ãÝK–à0*¡Rô„Ùb2v ðØú0ŒƒÉår’ \˜0³â·qãFz{{ãÏ«V­bãÆc®ó½³Îâ¥{öУ~fKo^»ví,í©97ndýúõínëׯ÷šÍe&cûwï®y+Ý®v‹ýTé„ït;™Œ_Ü÷GÎ}î"€¶u2Ä~:³íb2v|àÆo·“Þ5õÕüCçòž'žÀ^{Çüã“Oâ%ËbEE„ƒZ!nºïÛrÿ‘vt“±ã rÔ÷¿Áè„þ³ÙnéW;ž˜oÙ²… øï/|G ò!ö“DÛF+ÌŠGÜŽ;X±bEüyùòåc.ÿ‡?ü?|ä#X¦IÏöí£æÌÆåÞ²eKÛ ëÑGexx˜Í›7·µ÷ß?7ß|3tP[ö¿sçNî¿ÿ~vîÜÙ–ýOÔ†AUqzê©§nK›Aì§žv§µ?ýôÓmÙÿDíxÓ¦M|ó›ßäë_ÿ:?úÑÚÒfûé´v<òÈ#<òÈ#vØa¬[·nÖ÷?Q;þùÏNvØalذ!žþå/yVÛÝ)v,í¨²eËžxB•¼[µjÕ·61&Ó7¾öÓŸæÐCåø½÷f)ð p{åÞ¼øßûîcÃãÏø9“û`ç´£ûǃå2;¯»Žµßþv[Ú Ò?®§ÝvüÈ#°mÛ¶®éoÙ²…Ûo¿o¼‘믿žg?ûÙmiw§Ðnûéî¿ÿ~¶mÛÆ~ûí×Òò³"ÄM”n¸¡ÝM„)sÛm·µ» ‚0%Ö­[סE¦“R©Ôî&”‘¾±0W\ZB·óÉO~²ÝM泚ÚÛÛ[óÆaóæÍ,]º´ÝÇ.-#6,ÌÄŽ…¹€Ø±0;æbÇÂ\@ìXh³*Äé=ß÷¥B‘ÐUˆ s±ca. v,ÌÄŽ…¹€Ø±0;ÚÁÞùÈG>2Ó;ÑŠòå—_N†|÷»ßå£ý¨(ÍB× 6,ÌÄŽ…¹€Ø±0;æbÇÂ\@ìXh FFFFfkgCCClݺ•ÞÞ^1l¡+æbÇÂ\@ìX˜ ˆ s±ca. v,Ì&³*Ä ‚ ‚ ‚ ‚ Â|eVrÄ ‚ ‚ ‚ ‚ Â|gVrÄuÃÃÃxžÇ¦M›¢··7žþ¿ÿû¿üâ¿`ÅŠ5ë4›7mYµjUKûš‰vlذß÷kÎÅXûš©s!´N3Öó:ÙŽgª bÇÝÇdìx&¯[§Úq;¾ÓBëˆFîÇ݇Øñhä~Ü}Hÿx4bÇÓÇ|;7¤™t Ó©ÝÌ;¸ááa.¼ðB@UHñ}Ÿl6 €ëº±‘¹®‹ïûñzcÍ› ëׯ窫®ª™6›íX¿~=7näôÓOgÆ ¬_¿~Ü}ÍÔ¹Zc,†Î·ã™hƒØq÷1Y;žÉëÖ©vÜŽï´ÐbÇ÷/÷ãîBì¸ñþå~Ü]Hÿ¸ñþÅŽ§ùtn:M3é¦S»YØîƒ™m6oÞÌÒ¥KY·n§Ÿ~:çœs›7ofëÖ­\yå•€2¾ 6`Ûö˜ó¦‚ïûq™ädûf«CCC”Ëe¾õ­oJ¡-—Ëc¶cùòå3r.„ÖifÃz^'ÛñLØØqw2;žÉëÖ©vÜŽï´Ð:bǵÈý¸;;®EîÇ݉ôk;ž^æÛ¹é$ͤS˜nífÞyÄ­X±‚Ë.»,þ¼cÇ6nÜXã¾¼jÕ*6nÜ8î¼É2<<ÌW\QÓ–Ùn‡ÞÞÐÐP¼5kÖŒ¹¯™8ÂÄhfÃÐùv<m;îN&cÇ3uÝ:ÙŽgû;-L ±ãZä~܈×"÷ãîDúǵˆO/óíÜtŠfÒ)Ì„v3ï<â–/_ÎòåËõ¦ ›Í²fÍvìØQ³«—Æœ7Y\×å²Ë.Uy6Û144ÄÐЗ\r ½½½lÚ´‰j«\‰€IDATuëÖ±zõê¦ûš‰s!LŒf6 ³k?0q;ž‰6ˆw'“±ã™ºnldzý&†Øq-r?îNÄŽk‘ûqw"ýãZÄŽ§—ùvn:E3éfB»™wB(Eó«_ý*6là²Ë.öíš&3ÍUW]EoooM’¿vž í²¼yófÖ®]ËêÕ«ÛÝ,aÙðl#v,L±ãÑçCì¸û;}>ÄŽ»±ãÑçCì¸û;}>ÄŽ…ÉÒnͤS˜©ïô¼ MX»v-ÃÃÃ\ýõñ º··—Í›7ÇËè¸èñæM†M›6qÕUWaY–e`YVì¾8[íX¾|yJÛÛÛÇ=7Û×t·A˜l:ߎgÂ~ÄŽ»—‰ÚñL\·N·ãÙüN “C츊ܻ±ã*r?î^¤\Eìxz™ç¦ÝšI§0SÚͼóˆÛ°aK—.ß«OÔðð0K—.Å÷ýQ×hÞdÐ û4–e \?g««V­âª«®Š··qãÆØe²Ù¾¦» ÂÄifÃÐùv<ö#vÜLÆŽgâºuºÏæwZ˜8bǵÈý¸;;®EîÇ݉ôk;ž^æÛ¹éͤS˜)ífÞ q:a¥V35ApÑEqá…²jÕ*|ßçšk®Ô…f󦛱ö5ÝíX¾|9ýýý\xá…¬X±‚­[·òîw¿{Ì}Íæ¹3– ϦýŒÅlÚØqw2;žíëÖ vÜ)ßi¡1bÇ£÷%÷ãîCìxô¾ä~Ü}Hÿxô¾Äާùvn:]3é¦ò}Z0222Òîè$†††Øºu+½½½£\)Çš×Íí˜Ì¾fó\§Óíx&Ú v<÷è”ëÖ vÜ)ßiaâˆËýx.Ð)×­ìXîÇÝK§\;±ãîEÎÍøça>ž£ÉœâAAAAa˜—ÅAAAAa¶!NAAAAfâAAAAa!NAAAAfâAAAAa!NAAAAfâAAAAa!NAAAAfâAAAAa!NAAAAfâAAAAa!NAAAAfâAAAAa!NAAAAfâAAAAa!NAAAAfâAAAAa!NAAAAfŽâúûûÛÝ„y‹çy¸®‹çySÚN¸®ÛîÃi+bÇÝCE¸®[cûbà ±ãî¦;Öö?—;îÝ‹AìX#vÜf³o,v,Ì“±cé7G츻i·w¬çû~»›Ð2 ,hw¦ ×uñ}Û¶ã›õd‰¢ˆ Ú}HmEì¸{èëëÀ²¬ØöņÝbÇó݆›ÑŠ»®K>ŸowSg±ãî Ñ½ÄŽ5bdzÏl÷ÅŽ;±cé…ØqwÓn;Þû#ùÈGÚ}ô‰¸úê« ‚Ó4Éçóè¦ÕÏ»ýöÛ9òÈ#ãuK¥ßùÎw8òÈ#1 ƒ(ЏýöÛyâ‰'ð<'žxÓ4'½ÜXA€çy¼øÅ/Æ0 n¿ýv/^ÌW\Áƒ>ȉ'žØÒñ7ÛçD§7;·­g†¸®Ë7ÞˆišØ¶M†œxâ‰M+Š"|ðA¢("ŸÏcF|mÂ0ä—¿ü%o}ë[ãsõÄO4]§Û™¬7ºžbÇ£ÏíxÇ ÍmK¯ßè¸|ßç—¿ü%Åb‘O<1Þî‹_üâygÃú\u›'mØ4Í1¯÷xÌ”Oä\LÔŽ's/Öm-•JAÀí·ßN‡t ¦…fv<Ñ>ÅD¯ÝtÙñTîÅÍŽc2Ó×™²ãf÷â·¾õ­bÇS´céSŒ>¯Ô7ž¯vÜhžô+¦ß–µW[½¿øÅ/–þñ;î¾~E'káEýýýø¾O†¤R©šùýýýA@†ôõõÕ(ø©T ÏóâmxžG¤R©X±O¥Rñ6‚ ˆÝH[]n,´Šêû>AÉdH¥Rñ…kÅý¼¯¯ß÷ãc‚`ÂÓÇkc+Çéû>–e†a¬ðg³Ù1Ko[_3½Ÿz<Ï#“É`FËët“µãF6<‘ë&v\=αlk¬ã2M“0 k΃iš5ûŸ6¬¯e7ÚqòÚéÏfÇ=±ãÉÜ‹õ1»®K¡Ph«ÝM7cÙñDû“¹vÓaÇ“±a}ìÝjÇ­Ü‹Aì¤o<×úÆú¸ç›ƒô+fÖ›ÙñxÇ%ýãÚk)vÜ}ýŠŽÖ*F:€\.7â8Nü¹P(Œè¦ …Û¶ãyÙl6þ\,G,ËŠç Ž†1R.—G èY'›ÍÆŸõ¶[]n<’ÛF¶mÛN¶½õÇ—ËåF …„§E«Ç™ÍfG,Ë1MsÄqœÃ0F …˜ǥ·­ç ֜۶G …ˆeY5ë7[§›™Œ7³á‰\7±ãjûdzDZŽ+›ÍŽ#Àˆa#ƒƒƒóΆGFºÛŽ“Ët¢Oô\LÄŽ's/‰×mõw Íìx2}Šä5I®7Óv<ÖÇÚ­v¬·W/N.#v<9;–>E-Ö7™v¬ÿ–~ÅÌÛr3;n帤\½bÇÕsÑ-ýŠNÖ*N¿´7q|ßÇqœø³mÛñßabYVÍ<­¨A€a5 eE5ëŒE«ËµŠmÛãº'©?>ývÂuÝ MŸ®ãŒ¢ˆÁÁA€X.‹c—eYñ<Ó4±,«Fu×.ºÉõ›­“¼öÝÆdìx,Öç©ÄŽk—ifÍŽË÷}J¥ƒƒƒ˜¦‰çy¤R)r¹Ü¼²a}.ÄŽ3aÇ9µã‰Þ‹óù<–eu½Í6¢™O¶O¡Ïo+L§OÔ†c7Ùq³{ñÀÀ v1¶mÇ?år¹Ý‡3%š% œèôÉ`šf»±eY£Îw«ÇÜæ¶mÛÆL<;™ýt“µã¹fÃúøÚeǶ=úáªí?NÇí™O6 bÇõÇ×Mv<Ö:ìØ÷}òù< ,ˆù.X° k’E«v<×ûú»ÅŽÇºƒØq3æºχ¾±Øqãó4—ìX_;ly²v,ýã*bǵÇ×-ýŠñÖi§w„gY¥R)þœ,©lšfÍ…K>lÛŽ•I­€f2™v΄¨?¾|>O©Tšðôé@'îÔ†Ö,7K=:<ù·V‹õM«P(Ïç ÃpÜuº•ÉØñ\°áFÇ×N;žŒmé|ÉmhÛO6 bÇÝlǽ—ËeFFF‑‘‘9mÇs½OÑè»ÉŽÇºëù v<×í¸“lx¶úÆóÑŽAú­NŸ*“µcéW;n¿Ãìô'»Ÿ‰Ò¡©étšR©D__†aÔtº´òž,¬ÑFÝÓÓ'“Ìår³Þ~ír:™‹Sìúa¬+…´:}:0MÇqèëë›Ðù4 ƒþþþøfÝh˲H§Ó¸®K:ninc2vÜ)6 sÇŽ'c[Ùlß÷kÚS¿Þ|°aèn;Ö6<Ù}w»Oô^\,gö‚´‘fv<×û޽›ì¸•{1ˆÏu;î$ž­¾ñ|´c=Oú3oË“µcéW;n¿ëc™éþñlÙñ‚ýÚ¥HªÇš(Šbeªq¼ÉŠBZ¥LÆòÎ&a†á”TÒfJëD§Oçñ´r>µ+}±XŒã«Ç{Ã2™uº‰ÉØq»mæ†OÕ¶Z½s݆õ1BwÙñtØpò8ºÉŽçƒMN†z;ž/}Šäqt“×·¥ÏÄNb¾Úq»m¸ÑñÌTßx> ýŠöÛòDì¸Q{¤,vÜmýŠNîSw„Gœ¦ÑEÒ%oµ!{ž×P‰Ÿ)Â0¬q=­'—Ë{qZÙF³chez+ÛŸ“16Ã0&ü%›Ì:ÝÀdìx&m¦nÇ­ÚX§Øñdmk¢×a®Ú0t·Oeý±Ž£ìx.Ûäd¨?ó¥O1ÖqŒ7}ºû“±É™¾—tóÕŽ;å^ ³×7žËH¿b4³mË“¤\Eìx4sµ<ÓvÜQqÍ‚ Ž+Ö.žBg†!¾ïÇUwfj¹€ØñÌ3[¶5_mÄŽg¹¯Î,bóƒØäÌ"vܹˆíO ±å¹Ã|¶}±ã™g¶údzeÇ]!Ä ‚ ‚ ‚ ‚ B·ÓUSAAAAa®ÓQ9â4üà9ôÐCÛÝ î¼óNžûÜç¶µ ;vì`ÇŽyä‘mmÇ<ÀÈØÖvÜyç¬_¿¾­mh•uëÖ‰ýTè$ûi÷5xä‘GøØÇ>ÖîfŒK|ýë_û©Ð)öÓ íØ±cË—/碋.j÷é—¯~õ« µÝ~:ÅŽ¥£Ûñú׿¾ãóÒIßXÚ1^;¤Ü:Ò?®¥Sì¸[úÇ?øÁذa'Ÿ|r»›ÒtŠýt=ô‹/æøÀ¸Ëv¤wß}÷ñêW¿ºÝÍàæ›oæôÓOok¶lÙ–-[ÚÞŽë®»ŽåË—³råʶ¶ãæ›onëþ'¶mÛÚ~ÝÄ~jé„ï4À•W^Ùî&´Äðð0@ÛÏ™ØOçµcË–- µûT´ÄÐÐPGØO§Ø±´ct;ô½®“‘¾±´c¼vt Ò?®Ò)÷ÁN±ãnéßwß}aÇB§ØO'P.—¹ýöÛ[Z¶#…¸ý÷ߟU«Vµ»¼îu¯k{;V¬XÁÊ•+ÛÞÝ–åË—·µ tP»OÄÚÚîë&öSK'|§AyètK–,áÈ#ìˆs&öÓyíèñàÀ”û ´£)›6mbÉ’%í>ã"}ciÇXHÿxbHÿ¸–N±ãné¯X±‚ã?¾#ÎY'Ð)öÓ üå/açÎ--Û‘B\§°zõêv7åË—·ýæ È—«Kû©¥¾ÓÂÄûéÌv£SìXÚ!L…N¹ÿH;„© ýãZÄŽ'Ƴžõ¬¶‡5wb?U&bR¬AAAAAfâAAAAa!NAAAAfâAAAAa!NAAAAfâAAAAa!NAAAAfâAAAAa!NAAAAfâAAAAa!NAAAAfâAAAA‚ÚÏQTûÙóFO›"Ä ‚ ‚ ‚ ‚ ó’T \|_ýÉ@__U+• §G p D8ý÷d!NAAAAèJ¢H‰hZ8s]õ†ês¨ÏýýÕu°êÕV(€iªmØ6 @.W]6†ÁAõÀq”8—Ü÷D!NAAAAèx|–-ƒ|~´¦±\ Cy¶õ÷«eMS n D8=Ô²é´ZO‹m¶ –Õ¸ ¦©~´çÜDÃT¶û$ ‚ ‚ ‚ ‚ ÂxxžÔJ%% Å’lÙ¬úi„i*·©N+áo`@µcãÆÖ×!NAAAA˜qð¸VVy‘…ÀûCø«þj~·(ªŠsa¨Ä:íùæû£=ßfœúªõ˜ÏC±ìÇ?æÐûïoi“â'‚ ‚ ‚ ‚ L™[CXÝopàÚ<–…wYðÁ¼¼"ªù–å" mÃ-xe N1á!ì>›S^o>PÒTó·Í A Ü÷J%(—•*hµnp:É\_Gî»/7½á -mZ„8AAAA„#\ •Ï}@0›¬Ó«•´Ò†¨ê‘ª@e©ëß™Ä6úP¢×dð<¥AÒ©R©jhhOü»ï‰à‰,üÿh*±ìB®Îˆ Mê%L¯fƒ¨uËÀO 8ÂP*Ó¼Äñ‹?ÎG‘:°TJ-ÛÓ3ºôi.¹yËbÙ¶mô‹”>óuRסð?ž›?œ]ûí×RóÄ#NAAA¡Í(qÇ”(§E(·2¿ŒÚúQ"]±2/D uzù Jà²*T>…Ö§l[éPa¥²+º% ×‚—„ða V¢æ¿ÝVž{ÚkÍf ¯¼ PnuA ¼Ð*fÉ哞Q9®~À{úi¢§ž¢¸hQãí{žrÛÓ +CDQµŒj.§<ÞLSµ£âÚ§Ã`K•c²*?ƒ•Ï™óÏ'8ÿ|r•k%ÎëÆûïgÓ£¶t§Ý#nxx˜õëךvÕUW±~ýz6N¤¦« ´ ±ca.PoÇbÃB·!÷ba. v,̤_!Ì:ÝŽ”å¢D'%ÊéúFås¹ò3XYF‡ªfë–÷¨Šcý(iB woñà;×…}lxaB¯rŒêº.J€Úl(®XÙ~ØFmè蘡±™Œ íë«æY%z |ùËÏ<“Ôöí¸7Ü@ßÐËǃa¨ðQZZ,Vch“8‘e‘zl›žÊñ9À%—]ÆHe…ºó^ÞðÎwòýG! üß¾@©Tb ]¡©ëׯ窫®ª™æº.CCCôööâº.þxn‚‚ÐfÄŽ…¹@½‹ ݆܋…¹€Ø±0W~…0èt;QBPRàÒê>'1¨3¨[~[e^¶ò£=æŽ`UžÂò4\åÃ1}0 <’ƒ2”·—‡Ôr(‘P‹a”8nº ¨†|&Ïm¡ ¸rY fI¢¨:MKHR*a ”?øAì»î¢øµ¯QxÛÛH=@ãÐoYdps9\Ë"U,Zªµ>ð¦áá8¬T{¼•Ãra†!ŸøÄ'øÜç>ïò†nà„N ŸÏÓ¿};×v¿Ú½›uçžËG?úQ–-[Æm·ÝƱÇÛÒužÖÐTß÷®™¶yóf¶nÝÊ•W^ (ÅyÆ Øõj¤ tbÇÂ\ ÞŽÅ†…nCîÅÂ\@ìX˜+H¿B˜ tƒû(Á¬Y©9mÕã‘~¤ª“^nAº²ÁWàâîrÔþ“yétèå„ÏŠï«lŽ£<Ó< "Û&2 ÌmÛ* s”gœö&ËçÕ²¾¯„<¨­VšNƒeaZV|>ÌÄ.jT~G•éý•y wì`ãÅ“w]|Ó¤ËqÆ 7ðÐCqÜqÇaš&Ùl–õë×sÒI'‘ÏçÙ¼y3©T Ïóxöo~ÃîÿùÎß²…W>ßýîwظq#›6mjéÔL›GÜðð0W\q—]vYÍô7ÒÛÛ^µjUG¸| B#ÄŽ…¹@#;º ¹ s±ca® ý a.Щv¢BE“>_ãåq›Ô~*©Òòy¥{9Nµ8(À9 8JlK£<àPžbnO&Q*¢\®î|`\—‹/¾ÇqªËd³Ëáé"¥Rµ2i¹¬BK­„ïã4ô\´*m Þe˰¢§r<¦ï³äüó9ûãç`Çáü'Ÿ$ßßÏËN=€“N:‰Ÿüä'ìÞ½›b±H.—Ã4MR©¹\Ž­[·ò™Ï|†÷½ï}Üñýïc\¹’tzru\§Mˆs]—Ë.»Œ¥K—ÖLß±c+V¬ˆ?/_¾|Ümíܹ“7Žzƒ(Ì_†‡‡Ù¸q#¶˜üp²L§?úè£Ò!jÐv¼sçÎÝO#;žŒ ?þøã<ðÀbÇB CCClÙ²…;vÌØ>¦ó^¼cǶlÙÂæÍ›ÛwÒ„ŽdãÆ<ðÀ<þøã3²ý™è µ÷¤ E;ûÇ“±cþ±0š¡¡¡®ìOW¿Â§Z”¡ŸB=[ ŸWºWªÂ ™Œú­k˜MÜê ª¹åÆn´òT ‚€þþ~Â0T;]¶ <7òž÷¼Gy°Y–Ó¸æW¿‚ÁA6oÞÌ“O>I†¼ç=ïáÔ—½ŒL©D&“áþn¨62—Sb\¢˜À?þã?òš×¼fT³‚Š÷ÜÛßþv–,YÂ{ßû^<ÏÃ0 \×å³ëÖñÿ8wÝu_ùÊW¸è¢‹øüç?O.—ã‹_ü"Ç{,?ÿùÏãí”Ëe¶mÛ†•/¹ä¢(b îZmÞ¼yBýãi⮺ê*z{{YµjÕtlŽ;w²iÓ¦íä ÝÅŽ;Ø´iÓŒŠ³ÓmÇÃÃÃ-»¦ ómÇ3ÙјN;~üñÇyðÁÅŽ…¶nÝ:£BÜtß‹Eˆš±iÓ&|ðÁâfªo¼uëÖÙ>MB#ýca.°uëÖ®ìOW¿B焨üvZX§¿¿ñtŠÍ¶•øfšJÓrTºÖ$¸ûˆØôº×ñàK^ÂG׬Á²,¾ûÑ‚çñ‡3Ï„L†/Ýs6lˆ—Wmîç-oy ¾ï³dÉÎ;ïÃyç7êMsâÀâ¦%GܦM›Ø¸qcMÒC˲¸òÊ+éíí/‚n`ý›Áz=ôPÖ­[7MæË—/gݺu3:šn;>ꨣĎ…ÚiÇ_|1¿ûÝïâi­Øð!‡‚eYbÇB º;S©é¾y䑜~úéÓ6ˆæëÖ­cýúõrÈ!Ó¾mé ³A·õ+@úÇÂhV­ZŪU«ºÆŽuÿxõêÕ“j‹®C 1j -$—ÏdTmÃPëj(Š”#Z>¯¶çûÕ(Nnm*éî¢(Âu]Œ„7Ú®½–Ò^{ñ­uë¸>Ÿ'Š"þáÄùÜi§qéÈ£þ0ÁW¿Ê>{íŇ>ô!>õ©Oñ’—¼˲Èf³¼öµ¯åmo{—_~9CCCär¹xû¥R‰ زe ×_=aâyA000À™gžI&“áw¿û_ÿú×YµjGu/{ÙËX¶laröÙgsÆgpê©§ò‹_üÓ4kÚï8NmXì4±zõj–/_ÞrÿxZ„8ÜPcYVìÎ744ÄæÍ›féÒ¥ø¾/I<…ŽDìX˜ 4³ã¡¡!ŠÅ¢Ø°ÐñȽX˜ ˆ séWsN²ãíÀ?ç«õ `t¶¾>ÍiÊûͲÔïb±šß-ŠÔ4Ç‘UÛ RÃ@ms ‰æ>õÎwrØÊ• ”˼+ yxýzNËfùÈùçÉçÕïL†Ã_ÿzZ²„<é$.ºýv^þ†7°bÅ Ö­[Çg?ûY¦ZáÌ3Ïäu¯{À¨Šâ–eq饗òôÓO“Ëåèïïgxx˜?ýéOñ2Ç{,/xÁ 8õÔSyÝë^ÇûÞ÷¾šÐÑßþö·5ÛëT¦µjj#–/_ÎE]Ä…^ȪU«ð}Ÿk®¹¦ÝÇ-BìXèvĆ…¹€Ø±0;æbÇÂ\ v™L&~›ãy^»›#‚ ‚ ‚ ŒÁ°¯7•ØVç,M>?yO·FA@__¦iâ8Åb‘LÓÄ4MâÈoäÂÇÇÔ%W+.y:LÕu] ZIT? Ø‘lóA„ƒYôˆazñ}ß÷‰¢ˆL&Czªµ¨AAA„ÃpÀD qaX­|JeZ+Où|¾iاçyüß?ÿ3Ç¿ñø¾O±XT‚™ëªDsÙ,÷<ú(‡|ãÕè*ËâÁÛog?Ó$ÈåK%Â0UaÔ0ŒšPT! G>v$<¯µÅÇâtAÚO†„aH>ŸÇ4ÍØÎ4M©V• ¢&ÓTe"/±Lˆ*žIL3ê~'·—¦qyqAAA„FÀþ>`Ç©×ð<%È™fkÞpÚKM{ŽyžG¹\Æ0 \×Å÷}~½×^üHk. •§[ïóé•+9kÍšªW(ðy×Å>âˆØ[nÞ¦@òQƒÃ4ê‚•*¿ Àè¤~}GqTK»‰…¸7²aÃÖ¬YCoo/›7ofíÚµ ³|ùròù<½½½í>-‚0¯ñ<|>eY8ŽƒmÛ¸®‹ã8”J¥ŽâBÔŸ±ð+Ëx¨û›UYÏ@‰cŒ±ÈW–P÷I£òc&¦G‰m¤*¿ºåúµ[P™%¶ëUö#Bœ ‚ ‚ ­àûP6á¾<²YÖØžpaÒßßýòù<ƒƒƒô÷÷N§éëë‹CEŠEèëS»Ð¡£©”Êùfàºìòɱ§[\¸¡âé6'ó막ÈY‰¿µÈ¦‰¨ôô`PîOÌPË4jК†»_p7÷oº¿¥&-bíÚµ¬Y³&NR˜Íf±m›5kÖP.—Éf³|ë[ßj÷)„yM†q\?(÷àzzzð}Æâõ¡zŸÒ/ J¨çˆ‰ºE(KÏó€Äú~åw¾²£²¼ºÓ} ¯²m uÓ¢€[Y&MÕkmp ÇÖè”>‹ÉLúØAAAÆã†^Uñx]£*•R^pcé^™L†l6KEø¾O.—Ã4Íø·eYµaª¥’ݰ:-—#¬ìÄ$\jYV<¶ Ã×u)NG•ˆé$B @­æiqÄg åq ¢åĺz Ù(5]½çE¶²/*ëMað¹ˆ=áÖ­[(anhh(.ß»fÍ6lØÀæÍ›ç„WœïûóÛÝRèJ‚ æ"›ã8xžß§J %|éûXžêËèAÝ‹RT5»²^ê¾f'þ¦²¾~‰`Óø~g'–Õ6ªÑ‚_‘Æ÷cAAA„vo.ÊÂO<8:1Òš Ý<ÏÃ0Œ†¹ÀU,U;¬l0™ËͶñ+Û•Ö¨¯¯ší˜¦9;"œöj°Qž:,ÊIÌ×¹ƒ@ D=ª&U1-UY×¢b•<5IÕ1B õ5+ÄÉ£BE ËO½6oÞÌé§ŸO,—ËôööÖä†[±bÃÃÃÓ·ç6’Ïçñ<(Šâ* ‚Щ„aH&“¡¯¯¨†c¦¨†°k|ß'L¾ý˜ %”è¥ç+ÛQ÷¬"ê^–E½È¡<Þ •iúeÃ`ez±2o°òSDÝ3ëïiã‘«¬W®¬—Nì¯Sß„ ‚ ‚ sœø[¾˜UŽjIÑ-V?I¢(Š#òù<…BaôF3σžžÊŽ‚ø÷¿üò—DQmÆìt:ã8q¹2™ 3‚@Æí¥šHŸÚ©E¸<µaGQb;%ªÞgz@©)SÛÒ¨Á§øI¢óµ‚Oã¤ãÓÄB€åË—³yófV­ZPó·fëÖ­3׊Y"“É`a¥’ºÒŽãÌX(Ÿ L킆!Ÿ½ê*nÞ³‡<ÕPÎ~ªaë:g@ýM·úžç¡î{”øæP}0VM}3h,ˆéùÓýÍêGiñ¥AAaO¹\¦T*áû¾qBǡ߂A@¡P pžA hEÔ½M‡³÷_:äæ~Ít~7íUÕ¢)Ô½MNoDs&âÍ'‚ ‚ Âü$nváEU+a]¯Ñô980]°zõj†††â$ºr*À…^ÈÐЗ]vY»Û:iòù|œÀ^Ç<ë8kÛ¶Éd2qõÉr¹<•] ´‘ÉdèÏdðƒ€ãþê¯p}OÓ·æ4ðºSNáàýˆ›þéŸâùÚëMW^ö¨æW«÷xÓ^ÑZ¨ë¬ú«G+aAAA˜¿„•ÿZá´7œa£=áâ †à8jƒZ»(—UÂ9”3‡Î‰ŸN§kŽ&å§ó®™‰Ïzi¡ò¨ÁfžÙõZëböÒ¬[·Ž ‚ .Ú §_ýõÓç¶ØJ¥®ë6ôz³, ÇqH¥R±Ñ B» ‚€SÞñÊ—^ÊCCüç1Ç0ÖûŠ,° –-cï÷¿?.Ó ÉÏP-ìR¤Z<¦".®v-‚ ‚ ‚Ј8*»*ª&ŸÏS*•È%‹,äóÐߟØ`¨<âêµ™ÄgÛ¶ÉårA@r݉6ˆs"9Ts¶ P ÕUü š­{e£Ye¯ñXµjUMцn@ jžçÅî—QYm2 Cr¹\,ȤcÚÇWÿ{~¶v-EàÛ‹Õ¢1caË¿û]þa×.¢Êçäý2éñ;ÖK]xFhŒö2ì¡*v& ]€zf:ÿÉrgAA„¹OìW;{Pôôô†!år¹ÖÎ÷UªÖ&ÆÖêõ‹B¡P+êµBˆàèMåÅa£<Ý 4DÚtF2ï.!âÖ¯_eYX–Å…^ÈæÍ›ÛݶI“ÏçcA- CÒé4†a4õê3M3vÝL§Ó¸®r„ÙÆ<Ûæ+;wÆ‚ØX÷¼zþeùrŸwé0ä¤Ý»G ?ú[0–Ð6‘ýÍGÒ¨çÌêù¤S"@µP%¼é‚z¾üèâ‹Û}‚ ‚ ‚ L3ºÈ'¨þÿ°¯"IE™L†B¡@¡PP"\V¸(R²k¯üÀ8oùòImÃq¢("•JñăÖnÉïÄxqÂø(NçÛÓ?:B©2?]™žE qKÿüçv7]AA„i&D½€k#x0hî§sÂÕ8 yžãJ%rê8PÉoO: …BÃè½r¹ŒëºµÔà#‰‹ò 2/•˜7G #t"{lÚ´ Û¶Y½z5«V­â²Ë.ëZ¡) CLÓTU&ƒ ® Ò*Ú+0 CR©ù|žL&C©T¢¯¯0 ñ<|>ßò6¡U^=4Ä3—^Ê»Þüæ)mǶm‚ `û-·Ôä|+•Jô÷÷3H5@OOO»}N¡Ó%©Á€jh°q‚ ‚ ‚0wI/ŒàéÆó£(¢T*Å33TR9]”j”¼(Šp]7vÊd2ñ¼t:­*¡æQÔߥêˆÕ *Œ'D˜%ê?’yàtÅÔnÄóã8wû„:òÀðm·ñÑ“OžrqÓ4±m›;>þqž»cý×\C±×g¦¿?öŒÓ!ÜÂôaÕý®Ÿwð}÷µ»‰‚ ‚ ‚ L3*ZÆM0›„y–J%Ç]5 •'\“ñ`†Ø¶ëI!Ï4M5¨ P¢ZšZq-@ F´§FŽj†©Ú7‹,œú&:Ûm`` ž¦ T¨µ»†æ.¢†aÄ¢(¶mR©éŠKh>Ÿ'›Íâû>¥R©«+Ê A„ºY?ûÓŸ&ý“ŸLy{:ßaOOë׬ÁqŠÅ¢ò†Äu] àX,ÆÞpÚ›T˜:ÍCËÀZâAAaÎQMMiᫎL&ƒïû”ËåÚaX+š #ø4¦iVÓe;¿[HµâirZƒéÂŒ q›7ofýúõ53ë?¯[·®Ýíß÷Œ?[–…ïû±¡¤R*´º ¤º¬óÄår¹x{¥R‰(Šð}¿FŒÓùéDÔZ%<ø/ÿ‚1<<­Û¤¿¿?Î= ¿¹\.ö‚+‹äóù¶ q:Ÿ»;ß÷‰¢(ø!Ï1AAA˜ŸèTk=)HÈ@UC¬Ÿɨ°Ôfhï5 N;ö4>ð¼°ú«UìXÃ(ííæS EÕh ð›UöX^I ¿yóæøgÕªU5Ÿ»±ŠêßüÍß&®[ G-—Ç.<Z¤pÛ¶)•J …ƒR©TS­DÆ¢Ü;<Ì~Ÿÿüh×äi P(Ä¢QRhÓ[–ÕP€s]wT.D]Ù§’…!Æ<þR‰R©/«C§ƒeË–µ´œïûôõõµ´l&“‘‘‚ ‚ ‚ ŒI£÷öA4~¡oYp´YÊC­QÎ6>ûa?áqnÚsS5¥Øc q)”ˆ×LãË¢Üø„Yc!ÀêÕ«Y½zu»Û2%’žošü(Ž:j ~¢d°Î{8QêÅ ÖgYV,L„aH©TŠóÇéi†äÀ"Ì Þ»{7Æ[ÞB:ž‘0ç‰z¹é܇¥R Ã0â×r¹L©TŠó0ŽåÄù‹Å"¥R©©àçû~¼-]%Š"r¹S!Yñ¸Ú °™¸Xßî âBɰwP].—Ã0 óAA„yˆÎÐh8SS !Ÿ‡Íôæ”öüÑà÷‹~ÏI›OŠÃMã(<ßæ°ß j½´˜Û_Ë$͆W”4ç¨É7<<̆ ؼy3;vì ··—ÞÞގϦ½ÏôÀW maßüæ>DÑè\‡ *þŽI#!MÂÃ0Œ«–$ß÷õõa¶mÇ9å:ý\ ³K,»ûnV.ZD:n«x£E&?.NÇÕƒu˜·ïûd³YR©Ùl¶¦Ü¶¶×u±mÓ4ãïC&“!NÇâšëºñ6Ã0Œ¿aR(ð<¾¾¾8×ÝdŽ%ŸÏ7<ŸZ 3 ×ukŠ­èRà:ü4ŸÏS.—ãíø¾ÂÈçó± §Õ²¬Ø»Ï4ÍÑyAAA˜Ó4 ÀÓc„X[¸.€w@N5žQ•PŠ*Q–XkxÑ/^„aNÂÁG‡£Jøiǰ—þcóæÍœþùlذ+VÐÛÛËðð0—_~9k×®exšsWM'Z ЃéLFfÈ唇g½öeÛOË[)N“J¥‚€l6Ëàà`\Y5 C<ÏÃu]R©Ô¨J¬S9fP_æäßÓÒ7$LoòxÀ¾?ÿ9Ùl¶­"œmÛq(©ïûär9²Ù,…Br¹L.—#ŸÏcY¹\Žr¹L>ŸÇó<@ kºŒv±XŒ×5M×ucQ”ç˜øÊårü}%z—Ëe …ù|×u'd×Q‘J¥Èårñ9Õ¢šëºxžG†„aï[Ÿß÷I¥R±P—Ìžçá8ŽãÄë ÄÕ‹´À800Ðrh® ‚ ‚ s +É(Çœ €µ\SO[–…mÛ„„*œ4Rcµ\.Ç’—H ¬9ÂBPžpk׮墋.bÍš55 ¬[·Žµk×rùå—w¬à¢ðZˆÓÜXärêËašªxÃxŒ†—Ífã~¡ äf]y5›ÍÆÞ9¶mÇ¢¡Ç+‹ ·éºnÃÐ<ÏóâõµX +§ô÷÷ÇBIE£Bu'B²l&“Á4M²Ùì(Ï@Ï4Mlj½´S#OB¾«o2Z¨±,+>¦ù@·mcëbmÛÖîæ4Í{hYVŠ êa ‹<èª?£ŽVÖ ‚€\.DzeËX°`étšb±Û€^ÔwYï_Ûr>Ÿ%Šw Z,Ó¢›Þ†iš5U•“ûÖËår¹øûž|Ðé ZÜK¶IOOzÀÙ¶]¢ªB˲غuk»/µ ‚ ‚Ðý2URLz¼<Õô3Nò5|½ùèhn”hñá,œP» >G}à1>øÞòÙ«>«D9˜Ò_èlذÞÞÞQ"ÀÒ¥KÉçóœwÞy Å…:íqbYVåfiàºÐâxÛ†RI…°†ú™,¹\®FÀЃ~-’é*ª¶m³lÙ2r¹\,¨Õ{ÚéO:Ž8ÚEýýýd³Ù¶‡iÎݽ›…Ÿþ4ê .Š¢QB•¦ÞNljŴf"Yòú—Ëå{lf›Éë®ÃF{zzZΗ|Àé|mÚûm,´x× ß÷›ŠÄ­´KÛ¹mÛ"Ä ‚ ‚ t(zLEÛ:¤ŸÞé$#Atÿ[‡õ q=ÖH¦…™‹èÑBÔê:϶EpB,€bQ­Ð<Ï‹5„=gïáE{^®Ú°‘5ÆÍÓ-t A…¥Æ7°|ùrV­ZÅÖ­[;FˆÓaq–eÅÞ(™ŒÓ¢h|Q­§GUPõ}µ¼ã4®j2’‚ööÒ$… -†Q#Ä%Cµ¢9=ÝqœšA¿išñ¼dˆ]q¯‰æØJz>FQçK&£×ËéüYÚÛ/NÇ7ÝFá„:$W‡*j,ß÷ñµ‹Ô!DÀwwïæO?Ýî¦Ua5ÎÙ y£5Þ·YhµöbK¥Rã>¸õ÷IC5oäô„ˆN´Ó óîé¶e³Y,ËbÆ ÓÒAAazð}?ΜË妜Jh>Œöâ¾¼ã8q—Î](•ÿ,›ÍÎYAiïzRÊNkÉÜÓü2„mUˆ¡Á°+Y.ŽÔ PÅ„9ÃÂVì´qúK­ÕbP¡¦ ¢ã’ͪ/†m+1®I„è¤+Œ.Žçû~M¡‡ú0Ûb±WŸÔa¨õhÑKøµgÏ<Ï#—˵\("ž˜,¯¬Ãncò:$-‰Oõz–eÅá¼aÆ7«KçÛÒ‚%0g<‰|à¸[oí˜â¦i6 “ž-´€Û-¸»®;¦g›~ûVÏTr'&Å´‰ ÅiO‡ ‚ ‚Ð9hÑC;yè(Žd®ù†ŽäÐãµt:M___ì„áºnìbYV\`-Š¢8,I2m‘NS4W…¸§*N>IÕfš&üs ^`@“á…išqÚ'ÏóÔ¹›ÛѼ󒅫V­%š$Ù¼yó¸^s³‰®RšÌYãç… €<ÊûÓ¨8[år*4Õó”0çûUϸ™ÖHtþ¬drx}ÓJHt¨j=¦iÆÞdIlÛfppR©D&“©Éáµ*=(7Xý‚jAˆ©ŠZd,•J Ä×,)H&E ¸_ßȵw\œ}öÙ3{Qf‰xäûßÇj5Žz Åæfär9zzzÆÜ†U¯g²6¬=NIu¢(ŠÃÄAA„Î"úûûI§Ó ŒŠpê!NçÖ}щF<µºi¥Dt…Oß÷ãt/@MW’ñúóúåœ×y.á‹ÍZAjŒÏWAº¹Ð‹oL_TÐyìJ°ÑRë=ß6nÜÈå—_Κ5kXºti»Û '‡×_rσþþæ¡¥ ZmRõíG Q–¥Ä8ÓT¿S)%ÈÍ$Ç7lÛŽ½É’9åCI&ŠObÛö(<]¡% !œATn<}Œº-Ú3ϲ,nºé½<ôÐäÓU0uÕËb±XscÖJ=:§ž:/.zQ,Y´hÑÌ^˜Y¢,Ý´iÞ¦hÇqF‰ÉõŒ÷0šîbõß·‰z†aÄö+‚ ‚ t®ëÒÓÓC?¹\nÂéYf‚¤cL>Ÿ½Î€šqU«U3õ ázòù|,´¹®‹çyôôôÐÓÓ³‹Å"Û¶mc``€r¹L:ŽCLÃ0œRTv>™«Õ?_\'ÄiÁ1£ÜÀ+šÛš7+Þ(Ì Bµ Ãå—_Î9çœÃªU«844Äš5kX·n]»ÛZCRxJ§•ˆ¦ >¼ÊO¥L7ŠXuËh/a¦‹Ãú¾ÚG¹\­Æêû>ßúÖ¹¼ÿý/`ÇŽû®—¬òêyJx4Œj(ªëª|w…‚Ú~:­D6õ·âLLS­EJ¨ëëSmùþ÷ï “Éóò—§øò—sÆC U÷¯—·,µM½í½Eª]Ù,ø¾ÅæÍïà—¿üÙ왘¦9 %MÃ!µè˜ e+@ïÐË?¾ÝMé(Zy³7ÞC¿™G\;°,kÒSAA„é'Š¢X*‹-å0žI´÷™ŽøÒÅø¢(ŠSýضÎårxžG&“÷¶.8¡sŒkáMç×QEqÈi3´¤Û;UÑR§_Êd2q>ò¹Âö¢D!Èš¨šXÊèrªT ôطݰ0³ì¥ÿX¾|9W^y%×\s ½½½qÕo}ë['Âéš×­Šp!ÊÓ- (±­™ŒcS[^ÔF X „¥úôRZšÚ1TÚP¹ñ†Áé§ßÆ)§,å¼óNbƒ]݇µ¨ê˜õï|^µ)ªÅ'tj(áË0 ®½öúúÔ:¾¯ÎY¨ßûØÃì³Ï§9øàK¹öÚcxðÁÅÜsÏq„aHOÚ¦õyÑ/mR)ÕV߯n;›…}öy €§žzA¼žiªuõòúúé\¨ú¼èžåryÎy•£I.3aòxž×0GÜtt t.‹‰""œ ‚ BgE}}}†A¡P·Ÿ¦CSmG§ÞÉáÁ¡Å5Vêº.ýýý±÷™öz3 #Õ¢M¹\Æqœš):rJO+µ”N7¤·Y*•H¥RضM¡P X,200@±X¤P(´$úè¼y¾ïOK7NǹÁó3í 3KDÀo¼ê˜¨)ΈìßxlÒ××TÇ.3~,t£Š5h®ϬH~ñµð¡8P¹ µ¦^¦iD,TθzLSyˆA­à–Ïko/õåjæ)Eê§þ~¶`A¥B±Q÷t~º\®V.¼+€R¥ä±ö4 C%–JÕ|vA‡˜ðyO j·Fêï×d¡T¸ŠEµîŽ ÉåÔq¼>]Ý®ã€ï¯£·×#›Urø¿þõ©ìÞ}?^õtKzÀÚF.WزY%¬‹pÜq_âSŸ:žãŽÛ‰a¼¤rœêøúûÕ6|_“SûúÔ6´×ß\ôÆ €ž›njùÍöÚN§«žŽ0~NÄù†v£¯g:*^•J¥¶³AA&.:Ðj´Mòå®Î¹œ¦ÿÖ‚X22C;å` ?ëT,¥R‰R©4Êé ™wͶíX J¦_Ñ^sZkôZWèÔ"_*•¢P(Ôˆ;Óô¶šœ(Ú!E ¤¯xÅ+¦¼ÍvÇ™µÞpñ¹ 3jè 088H&“‰S: s›–«¦v*–[L•.‹ 3M2–Ôa0Ú#®ž¤äºJ$ÒT3´çZ¹ ™ŒZ¶XTß9ßWjI-ŸWûÑÞwwW¶}S%_ã¨uMS­†jÚù)xÈ€£rð³<\”ƒ_…ð~øœåÀ)ä-ø‘—åÑk žø‚:I?nn:x„_œ¼‡=oø"/;è úÝEXöÐ^ÜsÓ³Ù{÷¿qà0|ʇ ÀŸ³*ÇÞò,À›òð¿9â{3pJV ƒo÷à†l–+åÙþ#ߟÍÀíüÁ€t¾ÀRð/epŠðµ’:¾RI‰M¾¯ÂŠÍPÝÌ ¶n]Ùn“›2!‹|òïþnÌå’;t•_ËR"f©iºØˆ4ç3Úe?‰.ÿ=ÕÎB²*± ‚ ‚Ð=$+|–µ·E ho­eË–aƨbí—N§cLçœËd2¸®‡~&ŹVÁ0 Éd2£"Â,ËŠE›\.G&“!›ÍbYVœÚGïËqFFF¦å\ÎDÊ Ã0p‡ýèGÓ¾ívpQZ9üUïÁHFÕã§É¹/äX˜tWïìp•¡<ß&Em¢¾õëºnUäЂˆmWÃ)ÇË¿®ðêU78¨D¨ |*Ošú[{º= †M8ÒÐkUñÜ4•ðµWÊœc@S"û}&ô ª/¿ „–ú}ó³å9¯ÜÁïŒç0lß¾B.Ǻ™ÇÎý-g>¯úÊWH‘‹È|îüf÷¿ûìñœk)ïA§²Í¸ÐªÂp€æÔ~OH+í´l™}÷ìáàsŽåÕœPTâç«Qëe-د ?7•Gãmìcã&¼:/P¡Å[\XÁÁeØcv·GíÙÃcÐ44µTªzÁ ÔŠÁZ Ôâ,T½ uè°mWãúʸs™™Êá!ÕŠAAº(ŠèïïǶíIU°7M“l6çlk¾©CFëÿ†jÞëɾÐ- äóù¸zirŸ¤R)ú+L]áµT*1888©ýµ‹\.ÇŸþô§v7cÚ ‚@‰—!*oV“qY__RœaÑ•Bœ¾¹e2ðmn´''ÂŽF¯¯½ÞR)%d”ËÕœjI|¿ÖS)—«®›àªí¯Š%ú^ªÃ=A '߯Tx4 §Å¹ä o+Ñ‹J{fVÛmQUÞíÊü¤ƒÔß¼ìüüM‡a¡¾ÿùÊ—ü7ßüfüpуê=â‹>Ì…–Rís‰}iŠ ¦QÙwÿ-·~ýëü¬Áƒ ž’8ñaººïS]%æ•Cè*bdœ~ú¿WNòj·Ÿ8|hˆc<Œµp¦+Þ¦ÓE´zOåäç(ªæÜ C5o¾8rÍDBSÓ4›VœAA:ß÷c/±©äÚrÆó¾‡©"k}˲(—Ëq_UÿLµÍíbÑ¢Eín” €ûÁ®8½µ™#^pó…CCClݺµÝm™0ß à¢ÂäE8¨ qõ·«\N‰†¡ óM o™ŒØôø\ç;Óóu(ªaÕóL³*ÔéyQ¤¶w’­¼ÑNJÞÊýv3UñÍF…ßFŒ}ÌYF‹í/^¼ïä“yêK_¢ç“ŸŒÝ«UÛßô[•ñÎ_3´›q«$÷ô“œ:Î|^Û\N‰K?ýéd¯tg ó›QÞpZô-—ÕñNÀk¾ÇQ›¶Ù¹(ÂÕ$<­0S^kÚþ¥è‚ ‚ B÷Ïç) s>µˆî«FQW_Œ÷Ÿ0},®ü޽(KÀqüÑV§sÿÉXcþ±`Æ \uÕUínKKèw6„ÇæQ[ÅD…?ZÔŠqZXÓ¡}®[õ|KVUM§«Þoé´ú´“…þ?{ï'GUæÿ¿ ¹!YL±„QP¤ÆT@´FÙh÷º%»ëv/qãí§t¯(ÞíÖï*_²_ÖîuWXÑÕiÉ^VíÚET$‰S€ L¦ $#!ÂL! öïS§ªº§çšééîÉóÎ+¯é®®Ë©ê§NŸó©çj™ŒÚ®|êpOoLÀó %ºåP^aµ?ãEÖ“ÎŽÛ·L6&÷ÇJrß×xH’Ií¿–‰Dû q°è‘GêÑE/Ô#YÛiœtZ]ËÑ ‰Lmφ¡D>½ŸdRýÕí×yµ÷C-:ì[‡|k¯ÓÑBjGû‘úÂ~ÅÓO",f¡óæ©¢"ϧÅ÷xÛâß“þ̲Ô9ÆKk×AAfß÷Éçó]~_Û¶ééé‘ÐÆ&ã^0¯ óÃe\Pe±FˆÓaÇR!õàc.À† ذaC³Û2!<Ïci2É5%ø;{|Qj<,T~¹ª…8=‘×Et•T=Ñî-+咽NRË …(œëFBži*Ï.×Ë÷UŽ7rð´«ò¥ýßœ»kËtþ|hU~¢¢D*•š°`'L 0o¹kíZ@‰9º:l#Dœž%ni1Ø0” êe¶]¿Èƒï+›u]µÝÀ€ó C-‡¨€„ϸùæ{±íÿáûß?mÄùßr˳üÇÆ-·¼šþþè:‹Q>-–i¡\¯£Ï)™¼•tú¾ð…Ë«ÄÀøïf¹‰yúf³êÑÃXAA„¦R,à ¥‚0“8(Aû¸®K¶3«ð `šæ„r¾ÕÛN˜\‚ï<¦ºio³F¡uW-믳¯/Žtˆe¼ˆˆÜê¡—×ÓtkÍ¥6,u¼Üwzû¸0©#œµhùôÓóBO¹»ï¾›;îxû¨ N/¾ø4öï)ÐÅ£¦ÇûM4M‡3ϴƬ>[¯Ý¹œj³'ò’Û¿Ñx_ ‚ ‚0Aòù|˜GXæ/B³0Qs>ß÷ñ<%»†àM}QhJ ëééahhˆl6+^q)mW5uÿÅSì†óàA”£ZKØð_¨*Ù«Ä3Ó‚¼p÷f¡#ï4à ìÞX®8À´”"^ S)äYT^»±šßÓ4%þ¼É8À¡?¾×a£3±ÖhÁȶ£Â$ZÜjUR)øú×ïåÛß>‘K.Qmíî~½½½ Mp:Õòïñ|y–yɺ.<ûì MºŠ‚ ‚ ³‹b±ˆã8är9Iz/4 Øå‚Õ ¿ùÀoH÷¤áÆ<ü7UOþMÓ¤x'ˆ½¼L›7<<Ìõ×_@GGkÖ¬©úlÓ¦MìÙ³‡U«VM9öÊ{ïeå“Orþ9Ç5ô¢hA.N¹æ1 ¸ª£»+òð3ÀïU‚K)¶nuc:Á~'"…hî»zLUP8˜h´õÄa‡;Õʨ ^‡V䨣žàé§O ›ÍbÛvËÿ€Å=uSׯßÖÐcŽfÇÓÕ B£™‰1… 4±ca¶ÐÊã ]œ¡¯¯/Lý"õh´ûÀZ l ;â%œüŽ“aÉ{ ×[寒:™´QÂìâéØÉðð0kƒäó]]]8ŽS• 3›Í288HWWÙlG'O›$ßÛ·Wl?$ Qk6OA!÷KôÁ¶Ú|Tˆ> µ¬,ŸˆT¶*Ì(¶c8ìÞ{yöÙ…aÕ]arœqÆVÎ9ç€ß Ëާ«/„F2Sc Ah$bÇÂl¡ÕÇÙl–T*%"œ0&3aÇ:7œïû¼ùÏÞ̲¯,ƒ‹FVœs‡R©D:–ˆ1¡G\?7n _ïܹ3|¯­²j?‹/?_µjçŸ~Õ¾t¾9­8O¥œô+Vð‘ྫྷðŸ±TñlUZP3PÂÜD2HVƒæÐh;ö€]ýýìÛ— ™TyÚdÌ09 ÃÀ÷}ÇÁ0 tÕa4;žÎ¾XÉL)¡‘ˆ ³…VW‹E<Ï5W° hfÂŽ=.ÿ‡?ñÎ Þ©€«Gæó}?Ìcèy^ËG÷áP®™  ´¿¿€åË—‡ïãËë±|ùr®¸âŠðýž={Â×[¶l¡««+|¿zõê°Jëdð€y;vðš××¶UåkmmÇZ`M$ü°x‡09,ËÂ÷}Òé´xÃÂhv<]}± 4š™SB£;f ­:®p]—b±Ho¼*ESò¨ô@IT®n7ø›Ÿáë(4—™°c•ú…'^`é“KG]/‘H`Y…BAD¸ƒ˜¹kÖ¬©Ê[1Y:::B1oppL&úuëeäË—/¯Zw>ÞsõÕ\“ËÑiP"H­¸f£„;Ák½Ž…é\”Ó’,«Y¦©ŽtæM›p§íÆÇP1h  µ=ôa~ûÚãGÝÞó<©ì; Ù¸q#›7ožðøxÔb õ”à±):ùá¦M›¸âŠ+h’¼bÅŠó/÷ïçØ;GiQuòsáà ««‹k¯½–õë×7ô8ÓiǧžzjhÇ.êþú߬%6¨yx'L]ù·]›6ÓŽkS L„N87¾ñÓ&^¸®ª"+UÚÛ›5kÖÐÑÑÁÖ­[vŒéì‹_ò’—H2|¡.×^{-7nä„SɺÑccAh·qT§ÊÛ·n¥ÿß „_çQbZ%†¸Áÿ £Àóƒÿ^°Nå)§…7íI§:—H°‹ïÃC¢žíÔ.v\o|¬m`Þwð’_ÿøg× J§ÓaÕTaö°aÃV­Z5áñq(ÄmÙ²…«®ºŠ+®¸‚Õ«W³~ýúÐ5sñâÅØ¶=æ výúõtuuqóÍ7³xñâpyWW›6m ßëøìÉòËýû1x9îBâ„ÆÑh;~ê©“4SÇ4Mr¢âŒK=;ž.ž*Ù¬á@„8a|Ý ÂL v,ÌZa\á£Ä°"àx?=î8þ¿+H©šuM`¼éb½¡ˆ‹ aÕŸÛ(qNc‰-PB qÍ.4—FÚ±j³Yøð£}ÿàƒÐÓ£’‚× "œAޏÁÁA²Ù,===U1Ò×^{-·Þz+]]]UËkÙ´i‹/æŠ+®a¸]]]ô÷÷3<< ¨*!Sy"øÂ /ððÃ'‹'4ŒFÚ±ƒúñ^¸p¡ä†Êhv<]}ñd(•T® €LÊeUeÚkÁ‚;Bë0c Ah4bÇÂl¡ÆY 5žÞuà \v÷Ýœ·b}Œá %Àõ¢„¼ÊCn(ø?€ò²K „ÁB°n%ÄHíãÖ¥Ñv¬=#K%X8ç»Ìû÷¯+ ‚f.V©~´xñbr¹o{ÛÛ°m»®B<88È–-[F$t]—ŽŽ.½ôRÖ®]ËêÕ«q‡n¸aRt€Å>Èþ9H>C¡Q4ÚŽçÜy'O?}J³OS˜åŒeÇjÓ%›UÌŽ£¼à ,K q¥¤RR´DI£ûbA˜ ÄŽ…ÙB3ÇÚC-ÁJ¥îõדk GQÜçÃ`¤gü/Ïí4‰<èL”¨ç‰4ñ+¨óÖÅß»(aQ†FÓO£íØR.X§>Ç!ÛŸb´¤ö¾ï“Ïç%ÂGPB\ÿ·xêâÅ‹Cµ¸^xê† ÆÌ!´nÝ:zzzعs'6l˜´»§ õĸ;^Ùìë%ÌbiÇ>°8þøG“ÆYÙWJ…¨ÎÂËŽ´/ž(® ÝÝJhK$”ISáÍuÕßtÉ—(Œ Ñc A˜ ÄŽ…ÙB³ÆJÜê%°²ÙlK†õåPÃ}ÆE…­¦‰ŠAÙ9èE‰uZˆÓÛ¤Qâ^w°¾ a±7¬— >+žPM#íØ'( âÀË_ØÊ/]ÄÇX¿]s] ÓË\€#ÄÀBAýÍ番‘HH†{¡åp]õô/“‰–yžZV•ÃEòtVcýH7$¡Œür9¢ÑÌ´55²Ô£\ŸhD[ÐQ¯§G»ºdšÖÒuž»ƒyÔ*‚  ‹ú¹Õ~CžçQ*•èkóœ[©à¼ôÐAOâ7A ;ê Äý¨t°Áß"‘÷ÝQåØBð™¬kå²scû‹ v6Ñp'´1^5VG‡Ò:T‡&šam¶ ¥œMÃ\Õ¹‡ÝóÞ<ê:®ëâû¾ä”·fÍÇ!“Éð| J v‡«®ºŠuëÖ5ÍmÞ^ ¼ÿýóš}½aJøÀ’%CØö tv©òJpnt[ðå\”K •‰DÛV'T]ö<êUïCšÄ›ø¥ŸÈ)3Í€f‰Ë&P"œÆ‡\ ’¯†W™¸÷Eà(uK”–Ab—ÚÖ7•.mY@?Xk©î\W¨*—Áø ”Î{ XWA2 ,çHœÎ}`¶ ¹A² œGaèàÝ×s¡lCéQpr`ä{ù7Hokä>Þw!ûK0Î RÜ< ö;‚ÛØWímÜ¥ÃuM3Z×qÔún0Rö}õÞ¶Õgž§^›¦r¢µ,µn©ÅÈ媗k=?›öÇ4 +;§ÓjÛT,v:­ö›ÉÌEA¦õÜ..¹¥Óir¹Æ,p‹NÔðê ocaÛƒÃôsNM\L›è£|7ØOuýõûQx­õ"MäaçÇ–¼×âž[G{àé}$&ÑÆVDŸ·qâbxá¯8î¸9õ×ó}\×v,8sA倻öÚkÉf³\|ñÅtuu±xñbúûû%Ô•ç¢ÑX€÷èíR1Uh[| \®HpŽ —•à}6 ™°Ù‡pêIÿúAÎGm:ŒJŒË üÚD¾êf°L?®ªW×½Dôëÿ%„ ’ÍÂÜk)(•‚óÖÀÀ‡à+g¦ûEÈÿ5ï„ÜòìõÈÎ;‹ÜÀyJ\Ö"ZzÏ…zïÍP¸A‰Bî›ÀÍB®^J‚ì|°Ÿïjðî†Â*§ÑR 2ËÁ2 ôïã9uœÜ¿ƒçCïN(.†Ü±`\€ºè=øWÕÇ_W÷z°Ï€Â«À¿ ȃñ<ôžÆ<Õ~ãû2ÀÛ †î¿!órµåàêyêZù¾Ót»µã«i"Üwg£ª³ÚqVÝL3ãLSýO§Õ~ËåH¤Óèð_M­À¦…;?&¬†ùýˆ¾ãDBíwË–?6ÛA„Y† DÏšÇÁ0ŒY™WËFy¯(ÓLg-¡æ/qAo2Ó…¸—œö|Óz¨‘#š¶´³§ƒ'úúàkïÜE×™#£=Ï#N·dŽC¡9„´µ§+Š r饗†¢\³ÐQB‹†ïK!¡=1­÷}kp?|Å„l.I«zá0Þ[9ždÂeä/“Ü’¨_Æ^¢_:í?^>óbÛé°À Ñã,P7™qÂ$q]8ÔÏ€[¸ß‡Gž€ù§A~Øw@òB0ÿÑ{éüóW“è;Œüyç)“¾µ~lÀÿ-ƒk@ÊøßÿñȤ3_ÿ –å}À-(»ÍÿJðL(u‚¿ ì{Õº‰Ó‰â%J`Ú`¾0!õ%Ô#hý¨WÇhQ÷NZž²ÁˆÅc€(‹tŒ«Á.ªãP‚Ä ÁçF°`ça,¡>s=u $|HèÑèR°S`÷€"Ø¢:?3‹VwÀ¶¢ˆvÓ³fDÝp O<Ú· }-ä9Nä=gÛêæðf›  ‚0ËÐÞôqçñR©D*•šâ…©`£¦>“÷Ô‹ïC3^è©EfÛ®è!Ôwžø…×F|nš¦ˆpB#bK:::X³fM³ÛU… üêéwÖ#­ÎgŸû\õ·ð#ÔŒùÖ|Ý®þu²P¿z^ìoσd2˜U§¢‰º¦6uFmÂý«Ï"ë “Â:rpsVfàæ<ì3À¼M Øòÿ½Âq§’>-xÚyúé¸@OGæ£âwîþ“Ì›o…¦•Šd¦ d`| e«€aOŵXl¢l6¡Ö1†Áø1Ÿ~ v/JüÒÞ¢¢Ä)ñÌÄåàs›È{TÇOèc•À¼(x!ŠÉ ¶ž~¬›BÍ L"V}ëHEð?Éo@æ;Á2}_¦ƒírÁ~ÊD9êr±}D1"çô–ÿŒàúè6•‚¶&‚k£ãAô5ÑëkQNŸG.ánã'Áò„ºþK÷-…W5ÛA„Ù‚úyK¾ïã8…BaŠ{¦‚MT‘u&Ê $PC•ݯ}m³O}Ê<è‚ç²Áe-YPDh=Z>É‹ø®Ë²eË#B;âÆÞ½œ}äÂã_&Ô¿-Y‘g[í*:S2©báìI¸éTnaŠxÀo,8‚¿3à‹¿…‹¿ +×lÇ:ýLz³à‰rÕST ²[·’¼ðB¬U«ðV‘¸ùz¬þ~èî’Ÿ™ªúƒ.Ž` îýˆ¼%éâ&˜CâBÈ5šë#Ê6¬7D¡°Z¬ªŸéòbñd+Pí)š :K2£¼oS;³ÁHÇ?~‡Ùâ_mÖc»Î1âÄcZâ‰W4™š6÷hÛ¬^Çù$8wAf=‘ W€[vÀVAaZÐÏzª–åóâ ×$f2X±Öÿâð®w5ûÔ§Äžßïcè¶ýl¬ŸÎ+›Í’“Â{BŒ–âþs>ò‘ KÚ–ãz‚-‹Ï`ÕDØŒ,«Dz ·W%œêÎGn¢:3{ü)ŒNJeʵE'“•ü)“Ä4?µq]D5ŸÝ¸€[«Wé2ù§à}ÿì/^.ÀºÑ‡o©À2MìÓO§{ ä/½”ú»¿ÃøÉO¢u¥RǧHÀ|8Ç„„ Ž =)ø[e¿æÿ€õ_;¡ûuf$bÙ^à j(»7‚„hÚÍÚ1F†€û¾Jܦ':ÛXŒ7¾ÒÙ”]àNàűåñß·©ÆŒU vŠX—@~3˜ïîeú«Ú ‚ =êYOüÙÒl©”*Ì~<à¬#ËI»þ‹û–v±ŠU#Ö™9…£å…8ºw'ÖÅjvSaʸ×?Ãðž÷O¼>÷xÿ´§Ë+êrеBA"¡„ ÈJ-Æã»M³úXŽ­S*©e†•dÔ%ã¤ÓQFy½~üom>ÇQ™þ‰=/:·ør×U^€–¥ÎÉ0Tû´¸¨3Ô×n£½¬|_ý×™ðuÖû\ndFûd2º6:㽾Ή„fâ×Ùu£óÑmÑîõº³L˜¼Ëy%0ÛÉ}ìo:ð+>;ö@Ã|Ç;B—ýJÓÉù•+ÉÕ>õö}Uµ¯¶úð}¬˜ÚF(pÞ¯Ê!>ÆŽW×]ÇšesáÀiy!àÑÿÞMñÏb‰©›ÿi€÷0ûp€?>ô'ì½'M¿Ÿ·FÃ4«³´×[7îá÷žÓëú> ©eµ?"¾¯Ä©Éäî°m%¬LË‚%vhжÇ>®eA¥2±ý×'Ž2ã"^í6ZlÔ‚¤m«kY,F¢Ð,øvQéÖæ˜ð®lØ \©ûàᣨú¦ôíP@ÕGЧaWoÑwh—Áh*µá¹päÏ!WǶl[‰húØù|à Zç†4 eW–¥¾ïžueꜛZµhë8ê‡ÊóFœk¸oÀÁAœ?þ1¸_ ÎÏRÿµ­Õó¾Ó‚_ü˜µëvlAAhQjsø®K&#ÕÄ„öÀ½ãpþT9›á“‡G|æ8¦iJî8¡Š–â\ ²ï5­1§ˆ'ÎÏ›ŽšÓ‚Nèߊsü,QÅ?Ý>ý ã¨ÁAæuHkz¾d‰<]ÆÊÃQOÄOœn´X1“ÄÏo¼s­õ6l™§ÓƒNËf™Ð»};Æ_~îOÀ=xÄÏN5¦REcf‰žŠOºËO&Ç.«·›ñ¾½®e)z,ôwnÆÊŒŽf£×ª÷™…dLã¼Ýp]9 “—&ÿè‹J<ÔBÛ´‡¦FsÑÞŸñõ´Ç^¼m¥’Z'¾ñ<AfzéM]×Åu]ñ"Úçá“yåcϱì¢ÃF|æyžˆpÂZ^ˆ³€þ×·†WD ):axÕñšK$t‰’›ƒúE‹Wœi<¢ê….QµN5Okˆ™ ÆÝq ûþ¶Eo7ã“À÷áy0ü/Àvn´TÈè{,nQ×1ˆºˆII¼q!¬•1Mü?>}æ}04µÝ4ë{èA$”¥ÓJM,sœ Ð……yëå¶=R 4‚œyÝÝÕv†¡ÚbYâY'‚ 4”#ïÓé´TJatf•¥S*EY@R©hHÏ衳æèg‡zHbQÀAüY!¨÷ÃÃG7û2LíÚÞÎÃXý4¬xùŠŸKÁ¡-ª (ßöí;nd‡P*©»¸6_Ódà3²÷O¢Ä«#EŠøüÇ öÑ칟>¾®¶§Å8/øï£<žŠÁÿ^¦àn2 Áñj½Ê㢡®Œ8‹Å ïùçynÏ\ŽãÀ˚ݜ‘Ì.§-¡8ÀE.0øŒ*–pq_ðpbú Ü'€4ª[е &d¢åöI`6tÞŸã™eŒŒ=ñç8z7–ÇZ| ÝF(m[ý¯õÔ¹"u˜,ÀÒ¥UÇ^ñš×ð€ˆt‚ ÂâP=U(‹X–…Ýצ€ç)‘*þ3­Ÿƒé Z´ÒŸiÑ*îÄÞÓ¥oV×-z–ͪuÊåè½Îöÿ9×Y4,+ÚGgçÈZp‰Dôy6[íxŸNGixãÏuÚdÃYÛ­§^ö²öâügæ³ó˜>þÜ<¯ú3ßÇ÷}ñˆFÐÒBœ¼hûvöïï¨þ@÷¾åúÉd¢KKö£MX<”Päî½Ütß‹ŠÒÁz)Ôì°v6hë™L°• NZ?r_ä³ë¼m›xe>ýø)~‰´p7ç“ ®W½ÔqW—JøLç; #͛Ǔ‡,Áž‚çû~ë$© êˆh±X$•JQ*•0 ˲Èçóär9ŠÅ"‰D‚b±ˆišX–Eww7CCCd³Y à “É„¡ úÚø¾ëºØ¶M©T¶m à ŸÏ‡ë{žG"‘ ›ÍbÛ6¦i’Íféíí¥T*áû¾<­š çC7l„“Œ†ˆ¸ªð‰œŽ!êŠ[5²‹‘6afÊFë5¬¯¯ªâòÞùxî¹™i ‚0k©}ï8NÛä†sœ(u«Î.‘ø•Lªå}}jZÚÝ ]Z`3Œ(­ïGûÔ— §§¾Ã|¡ iy²éÚbeªuŒøs·\nìL:ÛE_ßÈôɵÔîg¼TÐñì-:;‡eÁÑGo›Ù/pšÐÓæë—]Ï6T}¦ç<"Ä µ´´§9òÈ?FIÏu•ÃbQ…èÄ“·ƒêÍK%x_ΰáYàï2p‰¡DM5³+£–ëºÙJ$J=TœT%Åï¡QbƒÑp‚ÿûMäÒÉ—´çŶÍ}ü¬7™nsPä‘ Qîs'8Ž>F)8¦vv±ƒ¿Zd‹Ò8ëh1G‹œE"ÃlÐŽ¾`¹O˜4êè6tYþ•çÍ¿cÒÛæóùP`2M3˜L4oFíS×u«D*ß÷éúy{>±‡cÏ>V}/:ÍWìÛ·MÛx÷½ïfßkö1÷‘¹<|èÜtÈI8¶ƒmÙtæ;¹ï­÷ñØ7cÙ²eüö¿¥££ƒ¡¡!.\ÈðLJ¹úçWS° œñ/gp÷ÉwcYóvÌcÞmóHø paßï÷Ê}Û÷}LL†††À…\ðk]*•ÂsÏf³áõpÛ¶«D9išxÁ£D]}K‹zÀ¬}âÚ(î,ù¬\¹¾khX¾A$Èé"ÝDÙÌàu‚ÈT×£»/Ý<ÝE[DNÂúz¨ï0²Þ¿vž6kö5~°Ý‹¶o片 íÌ÷}r¿z;‰æ¥c›ŠÀïN ¤;®Ë`GÃÃÓڇ ‚ Ä© KÕã¸QǸžÍûtú„I qÁ,›UU"ùyhÏ4?6¿ÓB”ÎäÐŒ•uSô/¤öÔ"fÜKN/ÓÞlSõ¾Õb¦ûMïƒYô·Îû¯åµSÜùÌ£/í¼SæMÊ{¨§§'œ;ކnæóyR©™L&tuN§Ó˜¦†dÚ¶ pq/½<Žú,ËÂq_w8çúh.[¸:”l¼;ô—,ÁÖÝ`à<þ8]–Ų£æ_vì`ÅŽ˜¦É[¶ðŽ7¼'Òi,àÌcŽá…d’oìÚÅÿû¿|èÒKùÍC±í¹çøÛ•+yßsÏ1ÿ™gX³x1·/YBùˆ#øæyçá¯}öYþxÒIÀËŸ^yö™&ËMSýLöõ…bæ{?€ß ò³ÃãÜsÏå¹nàËq¥çá›&™Xðþ÷¿Ÿ­[·Î¬4ý»ÏGqD‰ÄÈÙZO—”JjL :˜«þ9AHÀXéBA8hq‰‚@=ê­u#ÓÊW"Q­ år8Wþ ãçÏ`]l¨©Ý_=‡õüÙäÈC.Gg§êzt·<éîØóTÂ3í"—ËaŒVXiš™tú ¡)èÀ·7vífpÙàˆÏ;;;/¶W8(ii!ÎöìÙÑþçðüó#WÐÉ‚tˆ©žíèÙ™óæÈÇ ÅbôH$›Uk|ßg(—ãOGÅÑ«VE=aÜßxìŸÿ™c‰(wO±¨äå²ß²Åêl˜w{pEüD´?³þõÉßýŽ%U=±ïû”J%R©®ç†qç:Ç–ïûaþ+íÁŠ$ú±P-:AAí@Ù÷!eàÙ†o`Ô´³hñ\\"§Ž•RÇÒÞW©T ×uCo‹b±ˆëº |ßÇó<,Ë ½¼2F&þjŸÂä€M²¶Æà;?ô,çÿö² ­¯s¡õõõ…îùZK$¤Óé0ÜÔu]LÓ¤¯¯oD˜Y­(¦º±Ð¡šóó&V2~º¼­µW¤ö‹£½,ãÍ7ë¬Ó7ƾÇùý{ñ^–í´´9ì²Ã0|»×N6OæhÿèðZšï4Á„„¯î7ýýxž<›z–SüSøæsß„WCÊLá»>ÞÑÛ–·gŒ…&\Rë%8ƒÄCµÐÓÓu]>|à ì<öXr7ß®§ËÅ»®K"‘½»ô¶3«Àþõr˲¸õî»9ãÑG¹úÍoûýÚû(Q3 ¿ìä“«ÞÇ/“~–ÑeÁ•g/à†ÿ÷[죆Q…­«Ö¬©Úþ»‡‚qÈ!˜+VpÞ¥—ª}žvŸß¸j ~/;‰ž•$N<1ÚQ¬Ý%”“³~¾ãŸ>î8î¼K/žâ¿?öXоÏë¿ýmýøÇCïăf^P,B>ë¸k®"U8yôØžBAýžçrjL¡3WÃÈä:ºœœÞ—þ«E¿±â‡ôoøªj4ñ‰k<’>ç}Àÿ͡ܟ <›ƒm0à5¼6øýù¡ôÀNÀiúz5ûaúÑZS¼kÐè.#>)•”°ÔªÝÀTÐãZÓ¯ñ°¬ šîb}ÿ\‡ƒu±Z¥÷¶c1Œ·A÷ç f ¡«¦v±´ ]¡ ^ëJM:õõ:T6•ª®B>“è‡FºØ’„EÎÀëþeuæN}ãÅ# ---Äy@å–e|q‘Í· oélßZ*EɧËeH§ÙqÛm<¿bŽãpÎg?Ë‘'œÀs‡Îð­·rô_þ%‡z(žaðÜ 7Ïçéííå¢ë®£¯·7êÜ3ÕIÇ;ÞZ_g ŸL²÷°Ã8|ãÆªõï¾õVNX³†¥ÝÝ<þÐC<}Üq,/étÝê§DÙ,/ÿÿÝ+VÀ¢E¬yüqŽøÇä¨CåúŽèîÆ/Bì3úüÆ7XòÕ¯òµoä%/y çž{.wÞy'ýð‡¼9‘ ›ÏMÎqýÕWcçót<ú(Ï}ä#ìܼãôÓâ±Ç£ëÊ+I¥RU!“GÿŸÿ/¼@º³“%†ŽC×o~Çví‚l–”þ1ƒ0‰¾åyûíØûöUÄËû|ø†¸ùÊ+œ?Ÿúû•‹å3|]ûå$:âÁyÃão˜ÐºZ4Я゚aUßÿ„„²vb¬±Bƒgþ¡˜bÃ*[ Ú°öuüº×®£?‹/‡XX–¥ž[o±Ë:ìóÀ7À˜J»º_Ña*¦i†ùãßA"‘ ‘H`v~0ðÕ¢²¾ÞšÉæE;ÿŒ38ÿŒ3¦ýÜ¿|ØuKÎëÃcŠmg¶LÑ=¡åhi!ŽçŸç¸Ÿ>‹?t"”‚®Zç‹Ò9צ7?4¾t\—’aY±BååJ¥xþ[ßbÞO:b³Óƒ¿Z¡rFÕ¾ãèr܆aõ—Í’ð},”ëj¹\æŒ×¿ž]—^ ×]ÇI±íã“,=¹œ—Ë¡KÄË,ÔëÅÖýÔ‚pùåHðö ±=¦ÉÓdÉ;Þ¦z_\ú¡Á®]ÐÓé–_þ2˜&K¥±c™qO‘`¤°Dçôó<‹Ö®®O€­‹näó,Èå°‚õMàæeË TâˆøÌýûå x×]wMñ oçÛ¯¸ÿ­ÀQc®§s¹IRO¡U¹¯î‰`OÌÁsÒø¾æ(Ó!¨¶m‡ùsu’·èþ-›Íb!¨­ÎK~výÛ¡³§gZ÷;™¡ž]gÛzújmúP]ïç¯çÎå˜'߬#—£XT?Q}}Õɽ'Ìh³ymÇq{ÖI½à@Y¢*è‰Dä¡K Ûõelðàd?*À¤w™Í %xÁP úm¸5!:,xQ.RSÀÚ<|Ó ,8p}è7á¢Länéùðƒ<ÜîÁKL¸Î‹rͺÀ…F ØmIåàK1>ß‚ó¡þ“—àÄŠÀïÈq¾ ·ç!oÀ€Ï»¿ <îô®îráoóð ^ðÕºsm8Ú ÇàÁ·t:­§¥É¶b‘ÈS(¨×ÝÝ‘h¤Sqé.µXTËubûTJ­“Í*S/•”>\.«môü:—‹D¨øÜ_‹xš|¾:8%>oïéQí-ÔzúÖÒÂW±åúÊf«C;;£cXVä,_ßq"ý»TR×D£¡E},ê¨Ï­·7Ú·Æ4£sкH<ÑœzBf:KMDf¹  ¸¶Ùæ5)t·§9ôßþW/_7ÞXw}}=cé߯&•ãÚ¡ZVo£ß×öã‘!×;^½ÔqâF«X´Qº$.£ª«H„‘Tft“êãé3q£K&« 1~<½Ý®¸ X‹®dWìµ·3DHœÎÎH°Ó7[4—ËUlÛ®”ËåfŸzCÉüÅï*­ùë;qâ¶Ñê\sÍ5•Í›7¾B¹\©är•Jì÷¹·WýOä'».¹ào¦æ¯6m#xmV*•¡à}oð¿,·ƒÏì`ûLðÚ¬Dc"«¢ÆAz™kC!ø?œãÍ}ꘕʵ}ѱô9öõ©Ï2™J¥PPÁ4Õ²z\©TδÕúc¡ÛÞhÊ•J…àœêÑÛ[}.år¥R.W¾ý±m-Âþnh›N$*ÛV&ß×§þÇ×I¥Ô×_>QÊåè§RÑý30 LÉÌ%‘PËúúÔ:Úìr9µžeEmÔÛ ©ÿ™ŒÚw&í+ÞÖ¾>µ¬–hã¡?0 ®Ëxæ>ôõMü»h—qE¼­™JÔ%är•?,^<¢Ó-”=L;CCÊàµ!õö*£³me„3ùEO´½zl¤ož‰P(¨ææ¹uïø¹œ:ní6ãl×.v*•ʥ׺•<ôÆë•ËåÒ„öc2ãã–öˆ3žž/æÌx¶k-¢O ÜZ‡4õôôP(ÂJ”…B!Ìa¦=ÛÄ‹"—Ëašf˜« "O¹¸Ç„Õp‰%ŸUÜýï'sÉÊÃFýÜuݰÃxyÜ¡Y¸Àó&¬Å¦¼]ÙW‡¥†A¡PðuEßÙΞø)&L*4 ›,xB¯‹íY–ò쉇éÕË݆mv¢¾Î$Êk¿„·ä‰Â9`}-ITœH;Òû(¯7í©æy¹iÇ 7øÑXH›Rå—£Ú•Ñ®I²YÏ«5^:0N:­>«]~UºóP'V}BÙÕ92vùÚëÁ5ÙÝßÏÊjb³ð¼(2o¬°Ë¸óåTˆ›Y܃Kg%1Œêã÷ôD¦¥Í˶£š(ñí5¹qìɲê{œé´Ð¡ö>×ïS)¨M_ÖHê9dÍ&\‚nÊ÷Y’Ëñ“L†wÖ¸ºùþÔ"JÇÅ0Fü a˜r{ãžnµèß}­t:§Fybe˜:µöسԘ=àÕs·ó’£ïªóüʼN‹ÖâþøG>³óëüEmmëqÐáMZÀqlÛŰé̯VÌ«Éç%lßÎïüNæ¿ù£®bYÖÈ Q‚Ðb”öïçÊŸüΜú>tåßL&ƒeYíåUs8ªoèÀw$8Ù¬šÙ'aÝ&¨Ÿ,Ýq@'Ë3QÂW •&à –;¨É‡zx˜F”ò‰ªcäˆr˜Å1ˆ&RcUqm“!馃±”˪®fß&ø~uŠ£gž™ßì&5ü|u:äfFî×ëîËå‘Q~­.>Éð~úý,òyJ@ò2õ„@ç&L¥&(žæ‰úe]à«DõƒŒ© ÷Q"z€¢ûçdð>œHŸä<¿éøÅÏØ;¯³j™ã8xž'a©Â¨´´Çð0‡rÈ„sÁéj;®ë†Uò4ë„Oh.îâÅì~ö·½ÙM„)ãV?þùG¸øòÉmëº.¾ïS,éíí%‘HT³8YòÕ¯ÂW¿Úìfú©}0a¨õ´ñ‰4­‡þEÐ3tg€±¼»À߇ªþp?X[Á} X&`)ÝY cTÌØ5h‡OõœÎcäÏ%8fÍúïtàHàQž RÀýÚƒ‹Rp„¡>ÿY þ"o±Õ6N\OœµGS6«„Ÿ\.V5FÜÉN§ÒùÏuÞ.ŠHçÞ²måY¥?/¢Ú:…R¼à€aD‰ó}_­_Ï4á¶ÛÞÌë^÷û™·¿ã8꺼'¯Í€g*;кWܱK¿ÖÚ°6!m³‘¶àÅÖÚ ­§Ð Lßçùo}‹ïvw“6 \WÙmZ Õèòß=œ(¨M$¢%P™^‘¥;e/øŸ ¶ícdǪٗ~í=”ñƒ6Y(¡N·%¼OÛYT †npìøòžX»âí×–jµžÐì?KôȈµ%EiÖMµH—!*ÒÓ¼Ö¹>“ÁºñŽCÿHé‡Pù`iªÄâŒâòT{óµ!ÿü‹$«—ý‰‹bËâEÜ¡-+Ä9Àså <ü¹`t9þ6Ùl–\.Wå 'ÍÆxîqyÕ!£~ÞÙÙÉÀ€„¨ ­Íþ_ý káþI==õ<|>O.—›–4‚0­¥÷²YxxK.ŠútsÌm`¬‚WÖ^ö¹¨0ÐCÁ½Œ‡Àüjq#ø€ûjH=^P5).èiÇ ›‘ŽYÔØ'~‹=àÁ À\æ0äÀó>ì vø¼¯–œìðΠÈ™eõ÷Þ$X9x΄NJð‡l BZû‹jû­@§[J𤠀=>àÁ Cï(æ˜pDþÙ€ÿ@ÕØZ„õ èNÁ_›°8Ã/¯4ÕîyþX€!ö™ð"®é„[LèHÁ}6¼5ÕœØn+ñhAMó¤¬ԙЎ‰v(DsC=çóPóÀ9>Üœ„KûT‹¸ÓࢇöÃÍ6ÆéžoÁé}ð CÙ7µ?ˆ†Õµ‚nÜV!ª‹Q1Z}µCÞDCõ¸`§kûªì´éÄ©çõZ7ÐÏâ]¢{h¬íu›ü:û%Íþê ࣡«Jëò;z]ŸÄµSk)Fì³ZÓ ®™ÇHÇ¥Bp ô9ëåG]rÉϬ5Ð×ÇÁ5MlÛÆóTmQC¨‹ÁÉ¡ ¹ž³n\EŽw²ÚsM´¾àz?ªCHÄÓbT=âþ#=ºj;øÚÊGú½º¹Ò±åZTŒS 2¤ø:ÚüXŸ¶ÎyÄÛ¢EC‹ê’éñóŒ;>[ŒŸUà ö›«sõ˜È:-ÎcOÎòWý xm¸LD8a¿þ]Ÿ<ìH¿¯Õ¼²ºÀ·«#–°«… -yfõuu‚¯Ã¯é¦÷ôw£«ÿ÷Ë_Â)§LÙŒZ÷-^íÁ+mx õ#¾â‚YœZ=a¼XoÇü+ª´Ÿ(u¢¾ÆÑw¨5ý½j'¡¸èå9Ì‰Š§ŸžÜ¶m$N?\çøãáé§I|2üæ7˜`ƒm±R­þÑGãcýîwÁ…°ð #2O]ÍR£cÕ7Owÿ~Œ ª®[vófÒ‡Ž¿p!…ŸþûW¿Š\2ãå[5ºŠeÜu¢j”µåS=Oµ)“ÁùÝïH¯\‰9<ÌöªÖßt¼ñÓiR %ü¾ ¾\*‘K$ôiŽš\bê^SºïͨíšÏf*]œÌ´ÙvîfÍ6 ôM¬¬Ý¯Õ€ó=ˆ¦6.0ÿ^~ÞËÃeÙlÛ¶%"O“–âžÚ1ÇJð"ð€IDATÌ3¾ßóô_䇗Îçj²_ÜüÕáCY zþÊ:G\ ®«&Å¢šcë°Êb šÙ¶š/ëÜG?²!"W—àâBàDPTŸë¼HµsòÑÑE*Çó4·Ý¶ƒ~tž7Œ7· ×u±, ß÷Ãb*žç…yo^ö²·²rå±¼èOÛ¿6—“硇B‡“‰’NûU¡ê®«\=t,×÷ÃT#žãàÅÖØþÁÑwÞIþÁ«ö«CàuŸcš&ùoÄ:çœsÎ$[ÜZè´„ðæõÀÛsœWû¾2V- •JúÂTŸïGñÃêb‚çaêê ½½‘6àº$Òé(ù›aDÿ]r9ìxΤRI)ºzýšÏMÔm虯D© d¶ï«ãk{*¬„[ëäŽAq [·£¿_/—SçP{M4ú3GYµUû[åydòyŠ_ú–ëbu”ºfñØèZô¹éxk­¤»®ú[»~•Íb;…¯ë®»Ôò6~È÷þÕãƒR©N½:^:QïªvDçškÖ±%…Ù14ÄýûV2|òÖp‘ÌвBœìÚ}¯ï~rT¿ðžžÊå²x -‹7¸€ó÷9b¹&­\¡IbìøÝRŒ¯T/ó}ŸR©ÈñIÄD8ö‘·AÌ}Ç}Ȩùs)UPL&ÕÜÛ²Ôú+_Ù˼y;xöÙûÉd¶òä“K!²AÜ¥aT;Ý,Ji¿J¤î鉵7ÄFœG`—ú3×uǵó›HàÒ÷ãX8‘Íf) áS~Ó4¹ï¾ïñÔS'ba¯ Ú·reØÞzíí\â$ ,Ë íû>é Á\.—Ã0Œ°º²>ÿõ¯·¯í_ô5©wof³Y¾ýío³jÕª¶«f'Ò(‡b*US_#&Fá8Q¢=M©¤Öñ}µÜó"Ϭ8¦©$º?¬÷]ÆÕâ¸äâ$‘¨äûÊøu›R)Ì\.Ò(R©ÉUœlXÿ,kÏÃxÿûÉœtœtÒĶíëÞÍ唚É`Ÿ{.œ{îÄÛØÂXÓËfÕWUe:Wš •…Æ[¸»S"ò„IÓÒBÜ ü#¯»û­£®#j³Ðê,<Î:þ`EÕrÃ0Ä~…öâ/¨Dð¨‰q>ŸÇqœPûâ¿ÈÊ•+ù—ùN8ánºé¦†4C‹Ø®ë†u àr¹\(Hè´µ^xzB®µ¸w P%f³Ù wã6µÛŽ×n]-6‘H ”™‰"qño4‰®ëâÞã­?ZDѯ3™L(„èë²`Á‚¶“ÀG>Ÿ»ƒB‰’GÛ¨\=*áSÇu]^üâ³`Á–,©°nÝáÁõ€}¨ŸW¿z>`bÛ>ù|¶mß¼á /Æ0F–Ö÷‹´è«Eáz×W Ï3iš8ŽC&“÷’¾ê‰‚“ñ~¯«¯EíDG¿_“ñÖ“Édxúé§güzN:ßze¾ÝY°_û,û>Âø¶žžj¬··Ú‹j¬ê†q c‚å+jlW1Gˆ´ÚÞ=]8XøéOðø»ßÍ›7óâR)ü~ußïãj¬¨æªõã÷U±Xñ0F¯¿×FëÓGë§C18¸7µg¸º jy.—ò,’IU1 ¾ŽF¿×⺔セa8étø›¤Ï[§æi'\§cÇÁ²¬‘Õr‹(¥9GÛWÔf7‹}˜÷|u²d2æF„ÑhY!à˜“{¹à±Œú¹(ÏB«c·[N½š ±::úé¾tÐB»à§Ÿö2K’€t:eY …°N&“d2>þñ7´-†aÔÍ»¡Å˲ZÚÓÔ÷}’É$¶mcYÖ„y-&ÆÏ9>)Ô"ŸLh´©Ã ëU©®ÿë‰çyd2 à ™LÒÓÓЇz²¸mÛ6öîÝÛìË9i´sÅ&¬J¡Ä6˜¬ ¸°ï¾}Üþ·ã'|ܬº®†aÐÝmòWõ6þéŸ>A©ôŽc„“嫯V×Å0 ‰Dè±5õD©ñÆ5ͪ6œšŒRhÔxÏ0 NiÓüpy¢bŽÅ÷c>7‡Ü÷.‚ï¡<× £ÚómáÜqœP|Ò!Èú!‡Êô_ß÷¹hþEÜòó[xÕI¯â_÷¯¤R©â•ã8¼ç¸÷€ ß¿ÿûá÷§SÄÔ¸WçˆïçÅ/®Úw­tü¯îµHQž&ÇqÂ|¥ú¾‹{’ÆûÓT*5feïø9èíÆòôt§Ê£sºŸ\×%™LrÔQGMë~ÎóÏc~ò“¸®|7ú‚Åþ׫V*-Æa.ädW-“”+ÂDhi!`ñµ‹ë.×a©‚ÐÊüö+{9í°S«–†A*•jšgƒ L…ë,œ?ȇ“® Ó4[Xn—3úžŸ¨'P5ÉŒ/‹ï³VœÔ¯GëcâáZo¢ø¾ …BÝÇ#<’­[·ÒNøDs»0å…FtÂ>üæ_?Ÿ½Çîå›Åo†¢DmÁ‘¿ÿû ›}*ÂAŽCT‰À½¡ŸÂ? ¥¢ ïô<å^³[í=U,q]—#vÁ+Ÿx%gsg-;‹CN:„—>ŒíÛ\6|®åb­±8úá£ùÌ®Ï0ߙϾ×ìcø¹a>´êC`Âú7­çÇ'þ˜KO»”Sþþù‹C¢ªY¢¸Ùxå ›¨4h‰(I¾®6Að^WlˆëVډ̈½×%`ëtuÚní«×rÚŧս–£N˜kKÊêCÇ<¤Gó¾¬Ý˜BG—dïçJÿ<èò±c8[–ÅÀÀëׯ§ðþøGž}öY|ð|ŠE3Š6‰ŒÈYhq<`±½î¯ˆ'1löÃ+¡=hY!ÎÜ/„ªÿ¹„õ íÀœ¾=¼fç{ª–M$ ´9¸!¦iŽèETžµ¹«Z-òÍò€½y3ÉÜYdëÆ`y>Ïéβ[–aô)o›·ñ¶f7WF%MuñÈÂUƒÛS7ϘÎÁéº.ë_ÇÇö|ŒãÏüÍçÐwZUæ·ÏŒ2åŸÈ‰ÑN®Rÿâë¤t¦÷µ5ΡD´‘ â \>JÒ"“®´«sƒ¨,nÇX"*³š"*»šöáÇþ§ÀÈ؆­>ÿ%QBü¥Dáèº]ú½¾°YÔdd æ½n\PË•xuQ‰'Í mÅXÛâ?“úâyÐâïãècZ±sè ö9¤Þ^²ù’ÉšQS1¹oß>.|+©¿ºQOILjªçBëâ/Ú¾?ÅÆÀñô%‚0-+Ä14ÄK›7≔Π$J³Ðêüá„èXþ0Gìû-%‚Îf³âÍ)´÷9.ë–ª<ßtqÏóäáÈ$ˆžf†=sçRÚ»—¾ï|‡ž¾³°Ö*ûÕa·gï8[Äd¡-ÐNd„ î­Ø˜X ýétš—.x)êÿ{Vïáå}/ÇÀˆ¡™0w‹Ñ½¿j5CƒúIùcѵ¤¨®ðhÕ|>Zj‡\Ch{­“÷D~âD]ñö5¯m_½öÖ£¦NÝMgÝÄyã6l>ÃG ûöñËýûùìg·aþêtÉ'´-ÛþáX.ùÝÂ÷:´_´ a¢#|ïy^XuHÚ…_ýïË8ù'QÎAh'vs ìÛÇw?X•+¬T*5»imK*•Š’w 3Âàüù,þ¯Gȯ_ÌçßþùºG„É¡Ådß÷ÃP½\‡^ëj¾ý;èyÅb1\®ó˜z`¯Ú™ÏçÃõâãÀl6öCñß×b±î[W”Ô$“ɰÝù|¾ª í@i$nÚ†ÎÅ`Yvó^žøÛ'Øð³ ¼þó¯çèíG³òÛ+%Ç–ÐÒõÌ3ýýG±Qa AhGž|’CŸïbèLï^*•d|,L˜–â¹öOl=¤3Z`B¦,# ¡}°¶=ÎâCŸÆ0 òä)šEÜ”K’äï\fˆgæÏgøóÏžú²ªåâÑ5uLÓ”|d3ÌòÍà±]«8}çsüê‰_µ}Xpüþ‹ _ŽãT‰SÙl”õ=.v¥ÓépqáÊqº»»ÃmzzzÂõ²Ùl¸^íd#.¶9޶'ž 1ŸÏ‡ëÕ¦±m»ª¥~mšfÕ€D"QõZV*•¶ÅÃMÓ¬ú®s¹\Xá2“ɴǘ·ìÿýÝ?úGþùŸÈ'~ú nü»é­ôròe'KةЬ¹yüìdzÎÁûyH-´/Þ³s¹ö·/‹©FÆ üK“¦'ø—%‹ä"Z64µò <ºæGtóUžøÓœøÂ‰3ïŠ11qGy„bÕd…µÇÈþiKfP¡œrßï9扤ùÃÃüò¡WbŸiãàÐCVhÇ&&&fØI[XøøXX*Áò(#ì^¦‹yû÷óØÃK9á/ÿPµ\ç…ó‚õðƒ£}.L ƒ©1û¡šÏêGœqÏ_ñŠ£?Î?ýû?MhÄXÃÀ1®·¬zì2øÌ /ºëE¼ç5ïÁ÷}òùônVÎYÉ™œÉÿžô¿¼ò¡W’%˳Ç?Ë“z’lP‚qûßmÇYéàá±ä Kp—º”(…Éï³d1lƒÞ¾^òä±±)—Ë88¸¸U¹ ‰D(ŠÕ扌{ÆE´øòÚj¼ñ×Z(ÓëÅß¶ÞhÞµ‚w»‡ 9‹†Yvó0¯{|÷‹_䘳Žáʯ\Ùìf ¤ØØ~þï;¸î¨ßcbbaQ¤H¨sÈp'Láäû>®ë†}¦ëºáC@]ÐEïÏuÝðu6› ûób±> ñ<ÇqÂãf³YR©Tx\Pý³ã8xžWµ^|ñ¶Ž¶^;ñð¯*Ìyø\\ŠqpH¨š»iÛÌ’¥›îp®çâbc‡ó? J”0kâµ]\LÌpŸƈuê1™û">Ÿôð00ððdŽÙ@ZVˆ›÷ÛanýÒ&¶Q&{YÛ¶±v88Ì1 ©4»¸88u÷ïâ’'ÏdÐ7JíÍ ÷'†ZݹÁ?=9×ï5úµž´ëŽJ/÷ñ¹ç÷4ût&Lÿ¼Cxä%{±€¿Ùú7€êmlJ”ppè¥72tç«Ï×ÁÁÆ®²amƒ ln¢@½Ž¶]}]õw¡;j}ÌÙŽ¶É8ñû×à ¯™^W^ïþ×èmÆHvžº³Ù—` Í9=»äµo}-%JüÏ“ÿÃ]‡ÜÅã‹mq¼þ\„£éÁÁiºgíS§>Åë¶¿®Ù—bÂô²Ÿ3}–£/ÙŠ1.…ºÿ³dC;ÖrŠj)ÝïêõvíßÅã?Îi§©ß¨ÛîáØ³åÔÃNåæ›oæ¯x+W®äî»ïÆ5]/^Ì9Ãçàt:|ïaOýÝSaß¾ìÕËx`ÉüþÞµl>óÉŸ–‡ÓÔoH=¤Þ– û¦žñÑpü³råJò+ó$HX€eQ¿d®Œ=ðYªÖOÔ”©,Q¢›nR¤H“®úÖ×Dÿ†èý¦HUývŧâ¿áµ}®n† yòUÛ˜˜áï‘îsk')zßññ—î«K”F}ˆUûî¸d«~¿ŠvÀÚ¶,{7y÷y¾÷Wçó©«?Uw=}Žõ®{+ÿnF§Ï4ÚÖé±baU/¦ÊÓO?ÍyGœÇÝÃw³í±mœ|òÉüá„?Ð>g’Ÿ½ì0îé?šZüR¤ðð(Q¢—^Ìà_=<¼6ãã‡sº}ûöñüóϳxñbzè!N:é$<<}öQ=ôPΚwË—…}÷ƒ>Ȫ“W…Û,\¸ŽŽ{ì1Ž=öXöíÛÇöíÛéêêà±Çcéҥ̛7þþ~Në?5kÖðoÃÿÆîÝ»9餓¸óÎ;9óLU0å¡'‚}ÓÁ¾ûZ6D*ÓC+¢cA YÈÐ긔¥UëíÛ·íçl§D ›Á‹ùÖ1ßbóÂðH½ÍàÊAÞÈñð\6ÈÒKÃ}/Äæ#7s §0§{Oò%J,|íBº;º«é×».ÞÅýKîg x²ûIž=íÙ°¯ºãÃw&ÍÐ )iÜ:<ÿØ!<óÚ~’|žW}íUdþ4º§uŽ cއ‹)SWdÓã—±œ5êÝ+ñÏê‰}¢ÞñâÄç¼õæ¡zùxûi&3>žS©T*Ínp-ëׯçæÿM°îÇWxU³›3ëЀ¸8¦—ÇØq/­‰ì³‘7°¦+PʶGìýºW_IïGò̵kvS„ZøŒ‹HõƺÃoÄ$æº+¯ã†ÏÞÐìK1.[¶lá‹ÃÃl}ß1œúðG10X~ÇrÎ}î\.9ï’f7Oh"[¶laëÖ­lذ¡ÙM—7ò‡šÏ÷†æñÆíÛ˜³tN88ÕB›‹KŠT(èdÉba…B•Ï´,‹b±ˆëº <Ï#ŸÏ‡áù|žT*U7ô8I ‹ ™ðøyò 1T5˜îF…‡š˜ôÒŠO&&KYÊ£þ6)’ 1%á;þ”>þºÝƒN:)P×71É1Ò³A¦ÔZD+Q =tô>Ó¤é¥7IJ”ªÄvÒI†Lø=êmûèsì’%‹‹K™27ndÕªU¬^½ºæ7m¬_¿žÞó4çlÿ<Ç^y1'¬;½‡í÷AðD='¦›íÛ·³råÊðõ’%KX²d O>ù$ß_ø}ºæwáãsøc‡sö±gp÷Ýw³dÉV®\ÉöíÛÙµkgu·Ývçw^øúØcåÔSOeóæÍ,X°€3Î8ƒ]»vñë_ÿš .¸€ï|ç;œwÞy,[¶Œ»ï¾ \ï¶Ûnã/þâ/¸ùÞ›™Ó9‡E‹qôÝG‡ëmß¾Ûn»K/½4<î+^ñ –,YÂw¾óN<ñDÎ:ë,víÚÅ<¶ï;ßùN¸ïÛn»£Ž:Š3Î8ƒößÀÞ½{I/Mhßm·ÝÆ)§œ¶õ©§žsžñ ‹ö/âˆÍGpÞyç±ù[›ùé»~:ãßóTxÛÛ>Ë*§ñÇ[ÖLx›¸—ïû¤Óiz{{åM•H$°,k„§•ã8U^aµañã÷F«õ2Ëçó$ LSÍ—Šáñ¤Æ¼ç&2·Š?0/‹GÎhñQ÷õöéàLøa¨îs䨜z^áÖ¨O‘">þ”¼ö]\nºé&î¸äŽIm× ôèc^й+.gÇG>­¾/ß—Ô#9“Wfˆ§žzªòõ¯½rÍ5×T6oÞ<溗]vYeÞr¯2Tª ÌT…6à²Ë.kÚ±'cÕJ¥òÊ ?VyÑ_ÿk¥\.‹ U´‹oÞ¼¹òñ ¾TYðî¯UÊ•rÓÚ,´›7o®\sÍ5M;þdìøšk®©¼ä5·VŽØtI%üë«ôUr•܈uûúúÂ׉D"|_(*åòÝCÁ¿8•J¦’©»nüoí6­D½6¶â>Gc¢¿é`²cã¿´ÿ©rè?|¼R¨*}•¾ ebd2™p¬·÷¾¾¾J&Ù¨iš•¡!õý¤R©J.—«»^"‘_Ç÷W.—+…B¡ê¸š¾¾¾pßCCCU÷cüþ‹¯700¾ÖïãëM'år¹êX­D»Œ+*•JÅ|w¦²â>7îz™L&üÞ …B•Ýh,´&íbÇz ôÊž«œù—7­ÍBë1™ññŒkÈf³ ÒÕÕE6›­Jì[–ý£ªÚ• 4“ÉÚðÂÅrú’ÇÆ]Of’ÉÚñÝøüÙ Äò­ªÄï‚ÐL&kÇOtý„9õíôÿ´gšöpÂ|mšÞÞÞ0oN*•ªÊO6jÓ1£zÅóÀÔÒja9?XÂØ'kÇ¥Çß@òOsª¼7GÛ¯&ŸÏ×-Ìáº.===áz¶m‡žñ‚–eUån×+ a~¾Úõ´·Tß?¶mW…nÅ·Ñù±`üü€z½Úâ7qo§øöÓAü “µã3÷pÕE'Öý¬³3*Ô/È/Âp,µLÖŽ¶?q"©—XUa¢ÌHޏþþ~vîÜɵ×^ Àðð0›6m³=eï=Àëéëëkö5„)Ùð©‹ãŠ÷ÿeÛ'‡fS±ãûŸ0yåi*ƒÁtOja*LÅŽ‹oîfM— ƒò<×uI$xžG±X¤P(`F•x d*v|ä3ÿÉ?ÆÄåº.ù|¾®ýÆ+ÒÆ… ˲(—ËUëÅ?„‰0;þ³?û3Þóš÷JN§Óá|o`` \OìP˜)¦bÇO{/!½zÞŸuË|O˜43â·eË–0±%ÀêÕ«Ù²e˘Û\sÍiͽ2¨|ÍfË–-lܸ±ÙÍ`ãÆã~g³™©Øðüùó)•š›ÏNì§šV¸§›ÉTìø¢Õwñ‘·žŒaa•Ä™Fì§5ÛÑ,¦bÇ‹/«Ðé¿ &z:¿[£i;–v´S±ã¿zAè‘•ÏçÃ1†eYU"Üh^fÓA«ô?ÒŽÖ`*v¼ÿþÐ3Ó²¬¦8]Èø¸±ãÉÙñ¶mÛp]—§î«àÌìöGÛÆD˜¸={ö°|ùòð}GGǘëßwß}Ü|óÍœ{î¹Muß¶m[Ó ë©§žbxx˜þþþ¦¶cÇŽlÞ¼™£Ž:ª)Çß»w/;vì`ïÞ½M9þdmà—¿ü%?ùÉOxøá‡›Òfû©¥Ù÷´¶ã^x¡)ÇŸ¬oݺ•r¹Ì÷¾÷=.ºè¢¦´Ä~Z­»wïf÷îÝ{ì±M)Ö0Y;þÙÏ~ÆW¾ò–.]ʪUQuÌ[o½uFÛÝ*v,íˆØ¶mÏ>û,ÀŒk˜ÊØø?ø'žx"§¦V?üðÃ3Þ4»ÿ‘vTÓŽãã»ï¾›Ûo¿=, Ñ d|\M³íx÷îÝ µÍøxÛ¶mÜÿýÜzë­ô÷¿¢jÛƒ‘fÛO«°cdž††X°`Á„ÖŸ!n²ÜvÛmÍn‚ 0Í`Ât°aƶ¨Š)cÑlÏdA˜dl,Ì$—–Ðî|á _hv„YÀŒ„¦vuuU=qèïïgñâÅÍ>wA˜0bÃÂl@ìX˜ ˆ ³±ca6 v,ÌÄŽ…f0£BÜðð0ŽãHµ¡­fbÇÂl@ìX˜ ˆ ³±ca6 v,4ƒC?ýéOºÑÑŠòUW]…çyüà?೟ý¬(ÍBÛ 6,ÌÄŽ…ـر0;fbÇÂl@ìXhs*•Je¦688ÈÎ;éêêÃÚ±aa6 v,ÌÄŽ…ـر0;fbÇÂL2£Bœ ‚ ‚ ‚ ‚ ¬ÌHŽ8AAAAA8Ø™‘q­Æðð0Åb‘­[·288HWWW¸ü?ÿó?ùùÏÀòåË«¶í³éhËêÕ«'t¬F´cÓ¦M8ŽSu-Æ:V£®…0qF³aýY+Ûq£Ú vÜ~LÅŽù½µª7ãž&ŽØñH¤?n?ÄŽG"ýqû!ã㑈OÛµi%ͤU˜Níæ óˆfíÚµ€ªâ8™L€l6Y6›Åqœp»±>;6nÜÈu×]Wµl&Û±qãF¶lÙªU«Ø´i7n÷XºÂĈ¡õí¸m;n?¦jÇüÞZÕŽ›qO Cì¸þñ¥?n/ÄŽë_úãöBÆÇõ/v<}LצÕ4“Va:µ›¹Í>™™¦¿¿ŸÅ‹³aÃV­ZÅùçŸO?;wîäÚk¯”ñmÚ´ Û¶Çüì@p',“oßLµcppr¹Ì-·Ü(…¶\.ÙŽŽŽŽ†\ aâŒfÃú³V¶ãFØØq{2;nä÷ÖªvÜŒ{Z˜8bÇÕHÜžˆW#ýq{"ããjÄŽ§—ƒíÚ´’fÒ*L·vsÐyÄ-_¾œ+®¸"|¿g϶lÙRå¾¼zõj¶lÙ2îgSexx˜k®¹¦ª-3ݽ¿ÁÁÁp?ëÖ­óX¸Âä͆¡õí¸m;nO¦bÇúÞZÙŽgúž&‡Øq5Ò·'bÇÕHÜžÈø¸±ãéå`»6­¢™´ Ðn:¸ŽŽ:::õ¤ “ɰnÝ:öìÙS³«×Æülªd³Y®¸âŠ¥‘g²ƒƒƒ rÙe—ÑÕÕÅÖ­[Ù°akÖ¬õX¸Âä͆afí&oÇhƒØq{2;nÔ÷ÖÊv<Ó÷´09ÄŽ«‘þ¸=;®FúãöDÆÇÕˆO/ÛµiͤUh„vsÐ q Í믿žM›6qÅW`ÛvU“FsÝu×ÑÕÕU•䯙×B»,÷÷÷³~ýzÖ¬YÓìf ãPφg±cá@;y=ÄŽÛ±ã‘×Cì¸ý;y=ÄŽÛ±ã‘×CìX˜*ÍÖLZ…FÝÓ]h*Àúõëææ›o;è®®.úûûÃut\ôxŸM…­[·rÝu×aY–e`YVè¾8Síèèè¨Ri»ººÂ¸çÑŽ5Ým¦F=†Ö·ãFØØqû2Y;nÄ÷Öêv<“÷´05ÄŽ#¤?n_ÄŽ#¤?n_d|!v<½Œ×¦ÙšI«Ð(íæ óˆÛ´i‹/ß«/Ôðð0‹/ÆqœWﳩ öi,ËÂu]@¹~ÎT;V¯^Íu×]îoË–-¡ËähÇšî6“g4†Ö·ãFØØq{2;nÄ÷Öêv<“÷´0yÄŽ«‘þ¸=;®FúãöDÆÇÕˆO/ÛµiͤUh”vsÐ q:a¥V35®ër饗²víZV¯^ã8Üpà €z¢0ÚgÓÍXÇšîvtttÐÓÓÃÚµkY¾|9;wîäøÀ˜ÇšÉk!Ôg,žIû‹™´±ãöd*v<Óß[+Øq«ÜÓB}ÄŽGKúãöCìx䱤?n?d|<òXbÇÓÇÁvmZ]3iä~šS©T*Í>Vbpp;wÒÕÕ5•r¬ÏÚ¹S9ÖL^ aò´º7¢ bdzVùÞZÁŽ[åž&رôdzVùÞZÁŽ¥?n_Zå»;n_äÚŒÆk4•ë!Bœ ‚ ‚ ‚ ‚ Ìe±AAAAA˜iDˆAAAA„@„8AAAAA˜DˆAAAA„@„8AAAAA˜DˆAAAA„@„8AAAAA˜DˆAAAA„@„8AAAAA˜DˆAAAA„@„8AAAAA˜DˆAAAA„@„8AAAAA˜DˆAAAA„@„8AAAAA˜DˆAAAA„@„8AAAAA˜DˆAAAA„ ¥…¸žžžf7á ¡X,’Íf)‹´×uÉf³Í>–Bì¸uñ}Ÿl6;ÂöÅŽG"vÜ~LÄŽõ=p° vܺÔëņG"6ž8bÇíG«ØqK qŽã4» fΜ9u_·ÙlÇq°m;쨧Šïû¸®ÛìSj)ÚÅŽkí¶Ýìx*tww`YV•틤íø`°á±˜ˆg³Yòù|³›:cˆ·.õúc±á‘´‹ C{ÛñL6;†ö±åvOÅ–e|¾ïê K"‘¿‹zׯ0 Øð6íN;Úq܆mÛñ~<šmÇcÙähv¬ï×\.€a£‚ÅŽ£.Û0´Ž·J_<Ùó¯g_£Ù°Þ÷dûcÝÞR©n;[™.;>ØÇúš5ÊŽGëkÏYlxâc }}ÄŽ«¯W£ÆÆú³‰ôÇ£ëë#ãã™í§bË2>Omùhײ•ÆÇ3mÇ-ãçû>===8Žƒçy$“ɪÏ{zzp]Ïóèîî•ùd2I±X ·‡3$“Éð‰j2™ ÷áºnèF:ÑõÆ"nÜñ×ãoww7Žã„mw]wÒËÇjÓDÎËq,ËÂó¼°Í™L&ÜG:&™Lâû>ù|žl6î[Gú8µ‹EÒé4†aLx›v§]í¸ÖnÛ͎Dz¯ÑìXPâçZïCì¸ÚŽG³a}=ZÅŽ[ņ§rþµö5š µM-q;ÖçÍf) M³³F3v|°)&snS±ã‰ôÇbÃSèë.v<²MÇ÷?V|0Ú±>σ}||ë­·Îx<[–ññèÈø¸}Æm¡WTZ„\.WI$áûB¡PÑÍ+ Û¶ÃÏ2™LŶíJoooŲ¬pùÀÀ@Å0ŒJ¥R©”ËåðµÞ&“É„ïõ¾'ºÞxÄ×›È6µç”Ëå*…BaÒËGc¢ç•Éd*–eULÓ¬$‰Šaá~Ëår¨ …ïmÛ÷­— T]OÛ¶+…B¡bYVÕ¶£m3›hg;®]§ìx<›¬gÇz@¨†Q¨ZGìX¡íx4®T*-iÇ­`ÃS9ÿZûˆ§ÒW*•°/ŸèujG¦ÓŽö1ÅdÎm*v¬÷WÛ‹ O~LQ©ˆF#ÇÆñý׳ýƒÙŽ+çr¹ÊÚµkg¼?žª-Ëø¸>2>nŸqE;è-šê8N•{gÜU²ÖµS»k7¸Rw힨+a3\gkÏI?Èf³“Z>çåû~責ÕçT*¨k­ŸÚÕî[/7M˲ªž¹®‹išUÛŽ¶ÍDÜbÛ±ãæÙñX6YÏŽÇ ÝõµKs2™¤¯¯;®gÇ£Ù°¾^íhÇ3aÓ=ÿZûºóÎ;Gí‹GÛf¬þ8ŸÏ‡îÿ³™é¶ãVµázçÔnvãŠÑ¶i%½¢eBSG»€õˆ¯išØ¶þ/—ËÍ>•)1ZÒÀÉ.Ÿ,¦iV¹ëxë©´?¾Ï¡¡!€1ÏÎÆ|bÇͱãÑÚ2úÇTÛ*•ªj‹ØñøÔžûl°ãV²a½ÿÙ¦ž;ŽC>ŸgΜ9aÂÞ9sæ´MÂá‰"vÜ>vþ+v<ÒŽG³ah_;n%†úöuæ™gNz›±úãr¹L¥R ÿT*±cÄŽ§‹ÉÚñXý±ØðäÆ v| Lul £ÛþÁlÇ ãã|>Ï®]»f¼?žŠ-Ëøxtd\Ñ>ãŠÑ¶i%½¢eBSS©¥R‰îîî°jEü3×u«J)¡Awvv†‰%u…—™F»œær¹ª×=_ýƒl𿤖(ººHww÷¤®¡aôôô„u½m,Ë"•J‘ÍfI¥RÚ¦Ýig;®µÛv²cÝÞÉØW&“Áqœª¶ˆ+F³ãÑlZÓŽÛ͆uû'k_“í{{{gü{ibÇícÇéņ'6¦±ãeªcc}ÎcÙþÁhÇ ãcm›étzFûã©Ø²ŒGGÆí3®˜è6Mµã†eŸ›"år9LZªªôõõ…ÿ …B%•J…Ÿ÷õõUÊårUòÓ™F'®}=ºíº|ºÚ?‘k¨ UÊår˜Àsº·igÚÑŽkí¶ìø@ìk2×ý`·ãñl¸Ri-;n'®T¤ombÇ­oǵíiæx®™Ê˜¢Riþõlg;Ž·y¢×Oúãñ‘ññÌÛq¼Í“¹†2>û|e\ÑÚãŠvSÏ©T_èF—Ö¥½óù<¹\nÆÜ]=Ï«r¡®¥žR:•mf¢]ÓÎi1•{*ÛÌ6ÄŽ§§Mõ˜)û:Øí¸Ù6¬Û v|`Ûì4ÛŽ§j/³ÙŽ…ÉÓŽv<›ÇÆ ¶?UÄ–§§MÍä`·ýv´áÙ®Ñíªe¦ÆÇ3mÇm!ÄŠÑÕqÆÚÅShžçá8NXq§QÛÌFÄŽÃLٗرØp#‘¾uæ;nb“3‡Øqk!¶?uÄ–Û±}±áF2Sãã™¶ã¶âAAAA¡i™ª©‚ ‚ ‚ ‚ ‚0›i™ª©q>ùÉOrÌ1Ç4»üö·¿å%/yISÛ°gÏöìÙÃñÇßÔvüþ÷¿çÈ#äÈ#lj;~ûÛß²qãÆ¦¶a¢lذAì' •ì§Ùß ÀîÝ»ùÜç>×ìfŒ‹ëº|÷»ßû hûi…vìÙ³‡ŽŽ.½ôÒf_Žq¹þúëlºý´ŠK;F¶ãïxGU•»VDÆÆÒŽñÚ!ãã‰#ããjZÅŽÛe|ü£ýˆM›6qúé§7»)-A«ØO+°k×..\È'>ñ‰q×mI!î‘Gá-oyK³›ÁæÍ›YµjUSÛ°mÛ6¶mÛÖôvÜtÓMtttpê©§6µ›7onêñ'ÃÐÐPÓ¿7±ŸjZីöÚk›Ý„ 1<< Ðôk&öÓzíØ¶mƒƒƒÍ¾bpp°%ì§UìXÚ1²º¯kedl,í¯킌#Z¥l;n—ññ#<ÒvÜ*´Šý´år™ûï¿B붤wøá‡³zõêf7ƒ³Î:«éíX¼x1GydÓÛ188HWW]]]MmÇQGÕÔãO¶­ÍþÞÄ~ªi…{”‡N;°hÑ"Ž?þø¦_3±ŸÖlG;ˆGy$§žzjÓ¯Y«Ø±´£š­[·²hÑ¢¦¶a"ÈØXÚ12>ž2>®¦Uì¸]ÆÇ†a°lÙ²–¸f­@«ØO+ðÈ#°k×® ­Û’B\«°aÆf7¡%:g€5kÖ4» Âû©¦îiaòˆý´f;„ÉÑ*v,í„V餠ããjÄŽ'Ç 'œÀ)§œÒìf´ b?“± )Ö ‚ ‚ ‚ ‚ 3€q‚ ‚ ‚ ‚ ‚0ˆ'‚ ‚ ‚ ‚ 3€q‚ ‚ ‚ ‚ ‚0ˆ'‚ ‚ ‚ ‚ 3€q‚ ‚ ‚ ‚ ‚0ˆ'‚ ‚ ‚ ‚ 3€q‚ ‚ ‚ ‚ ‚0ˆ'‚ ‚ ‚ ‚ 3€q‚ ‚ ‚ ‚ ‚0ˆ'‚ ‚ ‚ ‚ 3€q‚ ‚ ‚ ‚ ‚Pƒïƒã€çMß>EˆAAAAf®;±õ|J¥Hds]èéQ}ÒièîVB\OzŸN«mâAAAA„¶Ãu•P¦ÿ&“J,óý‘ë‹JPÓb[2©þj!Îó “ËÀT  —SmL Èç£ý¦ÓpÅ'óÄGM¨Ís›}ÑAAAAa<´[*¥þ–JJÓbZ"¡þƒZ–N+-“QÛ¤R‘gÛJdÓèí4–Uý>þ¹ç)á϶¡P€ë¯ÿ=¿ûÝÄÎA„8AAAA¡å‰‡’ÆE·zø¾àl»z¹e)·!“©Îwê©{~jBÛŠ'‚ ‚ ‚ ‚ 4ßW–i*Ï4Zׄ·\Nmcš#…µÑ¨õh›Nt[¦‚q‚ ‚ ‚ ‚ ´ãºÍªðMÓT¯ãžd–Uªñ}>šË©uÒéÑ×m7DˆAAAAÏSâ™iBo¯ZfYQ^6ÓT‚\]•Ô¶•Hgšjý|¾:ô4‘¨_€¡‘ª©‚ ‚ ‚ ‚ ³€"0z•çU{°Å)•"q,ŸWÕH3jÚÙ©ª”Bµ æñÝ ‘—œë*Ô>2™êãÂÚnˆ'‚ ‚ ‚ ‚Ђônì}¶æó$‘°åi ¼÷ƒÏKS<¶ë*q­§G‰e¥R$®e³ÑkÏS‚\_ŸÜ õ?•R n³tçäç•¶ÀìSžpårtl-è5…Ñ”Çøç¾¯\ù’IpN[·Ž<0¡Ý‹'‚ ‚ ‚ ‚Ðb8Áß$J´r©¶²DbÁg9”ð–E ^Vð>=ÁcjqÍu•ÆÔÛ«6ÃPº“.€Ë)áÌ0”ˆ–Ë©×ÙàØ¶­Á`wÌ ^÷)” — ÚëQí%ÓÀÒše Å÷U¼l:­.ÀÒ¥êÄãâ\2©ÔI?›N³û’KØqÊ):Ä´ qÃÃÃlܸqIJ뮻Ž7²eË–™º|‚0eÄŽ…Ù@­‹ í†ôÅÂl@ìX˜-ȸB˜ ´›;@%VQ‚Z.XžÞ÷6JtsP"WžLð_¿ïA‰ZÚcÎ󔯤ÃK³Y%ÀÜúú¢ª¦™Œòr«­DÑŠDâáœ`y9hs%h‡´© mOÛN"±0‚}h1rZq]%¦ÅÉç•+Ÿemãýìgä·oÇýùÏ£‹–H@_^¹L:‘ s`€³?ñ \¼xB‡v!nãÆ\wÝuU˲Ù,ƒƒƒtuu‘Ífqgj;„BìX˜ ÔڱذÐnH_,ÌÄŽ…Ù‚Œ+„Ù@»Ù±âÁký¾%`åPBU%À%‚÷V°,Û—^§¸. ¯Ì*§®b1Ò¢R)0 ‘˜V/44Iä‘W"̲(!-.¼‚öÔÃŽ½N¡ÄA7Ø>þ?œO/Q˜í8£J] σ9s”ê˜N+1.æíæ\x!]‰zï{I¦R$O; ÿ}ï£xé¥tŽiâÚ6W?ó oÛ¿Ÿ×]G®TâÉ“NbÇm·Mè{Öª©Žã0<<\µ¬¿¿Ÿ;wríµ×JqÞ´i¶mOå‚ÐpÄŽ…Ù@­‹ í†ôÅÂl@ìX˜-ȸB˜ ´›kiH Y”P¥ß'jÖOŒ³?00,XgÀ·}XÑŽ‡vÃYaG‚__°ÝlT¬O”sN¯ç¡„´2£ oãµ­2Î:ZŒ+ÙýûIÌŸŠy~üü‹E%°e2ÊÕ¯¶Lk±ÈŸwoH¥ðS)òÀ›wìàëÇCÿ‰'òÐ?ü§\tÿîwƒãðÃþôö²zýzÖýÿ»/¼CþçX´nË—ãžz*…\nÄoþhL›GÜðð0×\s W\qEÕò-[¶ÐÕÕ¾_½zuÓ]>ÇÁ/ù^ƒp]—b±H6›=ð ÓN;Ùq³lXh}êÙq+Ú° ŒF;õÅ‚0bÇÂlAÆÂl ì¸Ö™FyŠiR(³!VÚT&wåà] `=Ê㬀½ÒA[L¢¼s…à¿;Õx½LP„s]Uõ¢¢ÄrÅ"}LJ±uk(æÒw@6‹s÷ݼiûvÒ¶Mç—¾DgÐöÛn»Ýÿïÿ1¼iI×åµ÷ÝÇÛÏ8ƒ£·oçk_û>ý4ßùÎwXxé¥|ëÏÿœ;ÿáøÀK_J©Tâþûïç›ßü&ÿû¥/±à{ßãeßû¦iÒ›ËqÓM7‘Ëå8õÔS'|Ó&Äe³Y®¸â ×ÄÄîÙ³‡åË—‡ï;::ÆÝ×Þ½{Ù²eË„ÕÄÉR,êrê8N•Ðæû>¥ÀÐ’É$Ùl6|/LŒááa¶lÙÂSO=ÕÐãL§?õÔS íȓɤˆqm†¶ã½{÷6ô8õìx*6üÌ3Ïðûßÿ¾%$Bë088ȶmÛØ³gOÃŽ1}ñž={ضmýýýÍ»hBK²eË~ÿûßóÌ3Ï4dÿ6÷¢ -E3ÇÇS±chüøXh?Ûr|ܨq…ƒ›*(‘+5…}d³Q¤¥çEÅ@T0Íè½ÞÿuüÞVÇÔ9ÜL”à¦CZuN7å7aá­†¿ï}ªØA±¨bcÝ:™ßƘç>ÿ­oqëe—‘yӛȕJä‚k•=ûlz’Iþ¿|„ìcœrÌ1ô-^̧o¿Ëzˆ7<ñºë.Þ¾|9¯Ø¼™¾/|_ö÷Óg<þ¾÷±·«‹÷Þ~;+W®$‘Hð÷ÿ÷¼éMo¢P(ð£ýˆóÎ;k®¹†Ç?ö1¶¿êU|hÑ¢Ð{²¿¿Rããiâ®»î:ºººX½zõt쎽{÷²uëÖ† ò]×=`8ß÷q]wÔÏòù|(´åóy’Éd(þÙ¶ÍÐЦiây^(Ì c³g϶nÝÚ0q¦ßއ‡‡Ùºuë´·³§§‡t:ëº"ĵÚŽ9ИN;~æ™gxôÑGbÇBû²sçΆ qÓÝ‹'ŒÆÖ­[yôÑG"Ä5jl¼sçΙ¾LB #ãca6°sçζȸâ·ß>êgZˆ‹ñ|ŠLSå|Ó:WÜá,1Nüª>¶‰ãÆkK=r¶ÍÕoy‹zSG79ùá‡yýâÅüéòËÙxÎ9a>º°¡Å¢ªZšÍV7¾³ÒivîÜÉÛ?ÿyüè„|Ÿ¯{vî䥟ü$ç>ö·æ3twvòµœ¿»ñF®]³†ŸÿÍßà~üã¼÷e/ãÑŽî¿øb^yÔQá! …B˜CðòË/çË_þrUÛßóž÷ð»;ï¤8gNU(pS„¸­[·rÝu×aYVPBò¬ÐÕ3n¤ýýý#ž ÖrÌ1ǰaÆ ?Y™(¾ïÓÓÓ¶±£ lú³l6KOOŸ¬P,I§ÓX–…iš,]º”b±ˆeY¸®‹ëº˜ülYX–E©T"_[©C¨¢££ƒ 6°bÅŠ†cºíxÅŠlذaÚÛéy^èÑéOÂWh>ÚŽ9昆c4;ž;wî¤møè£Æ²¬†Ø±Ð¾¬^½šK.¹„ã?¾!ûŸî¾øøãç’K.aÍš5;tB‹±aÃ,Ëâ裞ö}7jl<]bˆ0;hæøx*ã hÜøXh_V¯^Ý–ã㩎+nèïçïÇ8W±Å/×­v"Ó²D6‰n©”ëòyUõ4“iØ¥­ËIÛ·ó×?ú‘jDOd³‹E=ôPo¹…mÏ=Ç;¿ô%Ž_°€ýá¼ýíoçÓŸþ4Å¥KyüóŸgÿg?ËÆ‹/®Úg±Xd©ïsßïÏ眳Î:‹?ÿó?çì³ÏfõêÕôôô𢽈O}êSÜ|óÍÜvÛmÜ}÷ÝœsÎ9ÜvÛmd2RÀ=§žJŸi’ ÃÀ0&ïÓgFXC³fÍšI§¥XƒNn¨ÑÂ(WÓþþ~†‡‡Y¼xqèÖ Ç!•J‘J¥H§Óxž‡iš¸®‹axžG:¦\.‡‚Y|ÛžžLÓÄ0ŒP”ËårxžG±X$ŸÏ«8áÞ^²Ù,‰DÛ¶Ãc¸®K&¸ ,Ë¢P(J¥0 ƒ|>ïûd2™)ƒpà´š‹ER©‘ÎȦibÛ6Žãàº.‰ñk£Ùñàà ½½½-Ñ ÂX´Z_,SAìX˜-ȸB˜ ´’ß3Taø{'«xÏ:¸Àçßþ8_ûÚÑ†Ö C9c†eôöV‡›‚ß ªrZ.W¿ŸNÇ ÚI†Åb‘+¯¼’]»v‘©TxtÙ26ôô`/YÂÆÙo|ô£åuý×¼ê´ÓøV*Å¿üË¿pà 7Íf¹ûç¯à½ÿøÜ¾h¸þz. ˜ÏsõÕWsÎ9çðÙÏ~–s®¼ó¤“(}ë[¡°Z(ð}˲ª¦úúúFh+Sߦ›i­šZŽŽ.½ôRÖ®]ËêÕ«q‡n¸aFO2ŸÏcÛ6®ë†ÞjŽãN§BÎó<|ß:Påyù|žL&ƒmÛØ¶Mww7¥R‰\.z³•µµ¹\õ¥©5.ž¤R©ðžçaÛv(Àø¾ïûa{ÇÁ÷}_f˜fØq:®â²Ù,¶mc™L˲Z®¼¶Ðº´B_,ŠØ±0;fbÇÂl vüã;ç@œœÊ³æ¨ŠŸ6°âùçùá¿I©ôA ¡é8OòÒ—>ÛÞô7ÝdÍvbF$¸ÕøxGë"SE§ÙÂ÷Ù»v-kï½——üÙŸñ«¡!Ö^t]]]ü÷ŸþÄíÙà o{]_ù ]¾¦Éí·ßÎß~îsÜu×]€ Õíúî‡?LïÀ@è~Ægðì³Ï†ÇÞ¹s'†að⿘r¹<â<âóçVÜF£!B\mxçºuëèééaçÎlذaBnË‚0ô—Àâ_†1 ÃTÿͲ¬°¨‚öT‹{«Ù¶ pŽã0000f›LÓ ½áê‘H$0M“t:]%‹ÅPˆÓ‚!@©T"“ÉTyè¥ÓilÛÆ²,ê¦fÚ±þžã¤¶7mÚÓ²ÖÞ!NÜŽgº/„é Ùc A˜ÄŽ…Ù‚Œ+„Ù@3íØ_°–>Éw½%ئªæY@åd˼m?Ï>{ ÿñ)ŠÅ…d2P(äxÃö²dÉKpݽxžÇOºƒ—¿|jaéÉd’B¡0)ï?í‘g¦iª9i6Ëá‡Η†‡9þˆ#ðÿîïøáÆdr9’É$™L†¯|å+Ê-/ÐQ~ñ‹_Œz ˲x›çqÑ[ÞÂ5×\ÃOúSz{{«Ö‰;<µóü·áqšŽŽŽiÏù6Ú³ Tb{’ãËd2UÞfÚËÈqœ0Ì4Xëá–ËåÂØKnÈŒ˜­Ý*u›´‡\oooè¡çy^ènéº.ÝÝÝôööbÛvhèºðC2™ ?‹{Ô ÆLØq2™ ¿/mÇ:|:ŸÏ‡¶hšfh+Édr\1X`fûbAhbÇÂl@ìX˜ ˆ ³™´ãÇïz’gÀ¼ €"ЇÊ7fzpÉΆÏðð0^¸Ót(•\z{{ééé¡TÄ4MvïÞŒaÔâjSéù¢eYa4ÞXEÿt$V\¨Ó‚®ë†NA8f_ÿc¤/½”E™ ©E‹Âô[µ"ÚDX±bE8¾çž{fä;™.æÎgþ3ó'´îŒ q3ÅÒ¥Kñ}?Ìͦ㗵h¦US-¤iEW£·S<Ó*«7ôìôAAA„æâS]€áO/¼ÀɆϽ۞£xêaXDIÿM.¼ð: |…L&COO–ÞÞÞpŽWÏɨ–ÚGét:tÚ1 #Ô F#î„T*•H$ ƒýçžËð7¾Á{ßúVæÌ™ÃP›í»Ýݤý Öi²ÜqÇMû®ªpP±Âã᡾À¨óÛëy·Ö,t´›vމ£µ=Œç׎;‰D¢Ê³­¾ïs×]w…)»î¿ývÿüç¼újn|ê)NX³†_]~9…B!,zÑʺIý“²Áßz¤ƒÿÙØºõ¦ÙN°ž¡þ?xÕƒì^±{BM×#NWiutåRíåå8ù|>LüQ˜ßx´‚!M$ ¿_`ôðX/.™LR©TB‘-®TׄÐ"vIÕ9òj·[ºti(jEÞ²¬°úl&“!—Ëá8Éd’T*E.—£T*UÇ– !:W¡iša²mÛãºõf2‰ÝÝÝd³Ù0,YAAAh>>ÜßÏ«N:‰9Ïü† öñö7=BÂ= €ž¤J¡V(oL§ÓxžW•~H§²Òóu©fÅb‘D"1¢š)DZA*•Â÷}ŠÅ"÷Ýw_u;c)²Ž¹ðBÞôÊWrÙe—q÷Ýwó‡_ý .»Œ_ù>oùÁ¸í˜c¸'(\é8NëÌé‹D0’À@°,J— >/¡TQèE‰i ’õÖ€”Šª+i¸(ñMú±×Ø»e/lXSC¸-[¶Édèïï ¿¿ŸóÏ?ŸóÏ?Ÿ‹/¾8\ÞªèÜn©TŠB¡€ëº#B'á¡5Qqo*ŒgÐ:äv¼Â ñëP*•Fˆ:µá¹–e‘ÏçÃð×ÑBËårUž<-e2†††Bo.-Êéë_,q‡žž²Ù,®ëV]ǃٛN‡RÇßOTÖ!ȉDâ ½~‚ ‚ ‚ ­‚ œºw/ϼô¥,¸ï>æ}ótN:éF, ŠEÈd ¯,«º0_mžùD":™¦æ}ÀqJ¥¶m‡óÁø<Û4M …]¼˜×pBUÀ¬XÁÿ®Zű?û»W¬`Þ¼y\y啼ù]ï¢kï^º¾ò.Z»–£œ7¿ë]aJ-Çq¦ßÄ þƒ¾ôß,J< %ªåJ(+ÅþZÁg*1Ÿ¼ÖÕ2ô¥Ò¾Ge” — öaÆÚ‘@7æ ²~ýzÖ­[&)Ôâͺuë(—Ëd2n¹å–é½ÀÓˆvÅŒçTK¥R¡Z¬ L7£¹tè>ë…ÍÖc*yìÆË!fš&Ùl–¾¾>:;;GúâÇ­m«>†³4M“žž@u¹\˲(•JôôôJ¥Âp׸X¨sÑé»wOÌÕ³Ñî¿ÚëP{ÂMæûÕyü&âQ)‚ ‚ ‚ 4ð\—ŽŽïóÀŸÿ5‡ì»ûJ;È Ž£<á\×­ÊÏ WíýŸ«g2J¥R2 Œu]—åŸÿ<Ÿý³?ã+6ð_¿þ5]]]”æÏgï¼y åóþþ÷ó½O~r„†bÛ6¹\.ÌK®çñ#t =ÕÞiF°¬ˆò ³‚÷>*^-ù`¹Íô~L”—Z*X–G‰h~ð?ƒØ~ËDB\"ø¯‰§³»¶¦Âˆmãí™bzö¹›6mbݺulذPÂÜàà`X¾wݺulÚ´‰þþ~ººº¦v¤£Ã'ã ²~­]8Át |“uëœÌ±'*ÎèSÓ4ÃBS!^ B_«Úã$ |ßgéÒ¥áùèãiϸt: TßøÆ7ì·0ÝÝÝ#òÀMåéBÜ£Q 7‚ ‚ ‚ 4Xxë­ ~ŠE‹Î9çA@¥þ*AO÷t^v-¬7‹çˆ/«Í§uÏóTú#à9Ûf¨¯ËûúØqöÙÜôÉOB©Ä3¯y '\r G}ùË@}­A{é% êë*4´ŒÎÌà&XžC iiªCEµ˜¦Å·¸P§ÿ&ˆ¼ÞzcÇJ¡¾^ªs¿MwÔ¬¾$Ùà<¦(͆z饗† Ëå2]]]U¹á–/_Îððð4ŸÅôÍfq‡¾¾¾Ð`ã±Ô#¦;°â«ãqå:WÙt[ggg]ï¹J¥¾×yï@u&ù|>ì”n½õÖf_žiCW ‰_Ÿ­2‘½/]º”ÞÞÞqC—AAA„éÇž¿õnÒé')—×rÆ%@…ŽêŒQ:¿ÛtÍu + ô²Ëf³är9LÇá°³Ïæ'gœÁßû^nÒssËâx€^˜ð±LÃT‚˜O$¤i‘M{¢¨ßн.SÁö:´T{§Õ›Æ(±-Ž[7(œÐ0Ô—ê3e‚qU9àúûûY½zuÕŠ;wîlàÙ®ëâyÞ˜‚[mî­é Š:4º­¹\nÚb¾ …¶mOº-³5ßY2™ _O4Ée<\~,Êå2½½½ËóÈë—ÏçCOCAAA8˜Éçó³vÎÑ |ß'N‡…õ²FæWoUL`ÇŽ{øêW—°ÿi¼ùÍï "æóyzzzÈçóu=ܦJ>Ÿ§2¤¯¥O>É)ÇCo=xïu×1ØÕÅ?ýi&dññéb¼rh'ª¤2¨¼kZ:Ða›‘ôzV°Éú‘L$¬tºðQbá~]‡tuu±iÓ¦0Ù^m½M›6±gÏžâ\« KüŽG; g­ÂtŠ—¦i†¹àÆ#.M§W^«2‘0RåÙ›%òÆ +®§§§*g¼@†.o-‚ ‚ Åb‘d2)B\ ]USÏôuq‡¥K—âºn˜ûÔü"™LVÍßJ¥†a„BS2™¤§§‡ÎÎ΃JŒó€ááa #ƒïÕWÎg÷îU×)•JU¥Ø:P|ßçK'ŸLÂó XäòB—|âP*‘Íf¹aÑ":.¾×uÉçG™QêPQöéÇ–ëàµ>”gZŽ*ZÐvX(áðƒÎæ¬Y³†ÁÁÁ0Þwݺua.¸µk×288ÈW\ÑìS®‹.ÐP.—Ç\/‘HL«ÊÜ®4;‰ÿD5]¸à@rÔµ2ñzß÷'Tm&‹òìM£úªk«° ÆÀÀÝÝݘ¦VÕi¶=‚ ‚ ÂL£=¶<Ï s47¤úã4ây===a!¼ÉÏç±,kÜó+‹aåM=ÓÇò}ŸT*E2™ ½¹ôßD" Èf³äóù0˜^žJ¥Â}ëôC¾ï·ô5Ÿ*Y¢Â ￟‡v]Œa@\ŽÐ}ÓFÈu]œ^`þ}ÏÓmšüüï€~œa@þjYVXµ HlÓa z:ÞKTˆÁà€B3Û“êðÚ)2W¿Ø°aCX¬!Ά Fä‹k%zzz&tóê*œíÂD½ü&Ët‹ZŽãL»ÀiÛv(颳Í#N qžç‘Íf1 cÄ÷í£<àlT'î¢ú>ÕçW½Y“H$B·p˲Â{Á4ÍYšª ñÌ>¹VAAhÉd²ÊQc2¨ÇÁuÝЉe&èììÔÜ¡X,†U1uTM.— Çûµs³t:M©T¶íp¾‘Ïç±mÓ4Éçóá¹hÏÀ\.7ê<,ŸÏWo(Ôiw\×¥P(„ÑPñö†Awwwx µ8çº.K–,™±ëÙHt 1]§àôÖ¬Y1b½iá<‚9¥mÛä‹E2©^>ÏÂOºzÝ"ʣ˨Y–BMBµ>ç1Rl›}ºiS˜;Þ ­Žª+~è8ófªèµå€§“Fn˜î¶6ZÌ™®­Šçyxž7¢Âlõ%ƒò|Ky¿êoÐÖÓÞq}ãKç¡‹Wö”»^ 5©™½~/-Ë\€-[¶°iÓ&®¸â :::BQnÔä}-€îdš?¬Þ` Km$ÍþÚ ý#XûÔ-‹ê D¢šAý¾ÑDõ¡f°¬ê¡˜k©T*,Ø¿ßu>¹Fä-¨E{¼iqà<òÁÿJËý^ĽÛj-/~†¹Q–×#1Î2=\ˆ_c=ôÒyK±c:Tç2%&–€ nº ÞøÆ†^WAA!*> =ÃF#“É„a’Ú;Î0 2™ ¥RiÄö–eÑÝ݆¸êùP2™ÄqúúúFu¬Ð½µˆ¦‹ r¹áöqK‡¡Öâºnø`=~Þñ`†aTÍ⯧cî©EÄ©Ì ÃÃoÛýð`ãÆ~Ž8âåÓ|W…¡æóJŒó<Èfé`ÇŽò¥ÿ…<\pþì½`/ŸzϧÔDOf±¿µ ÂŒ1`ë֭ض͚5kX¼x1k×®mvÛF%Þ96ÓkL?¡˜nÄ»ìà@'/Õ?è‘7ÑŸB3ö×g;‡a4J¥¦i†OùÊår˜?cº‰â±Q¿µY6¬`y­çZüÇ­U°Pß_¼­vðߨY&õ¸AAfíiV71}Œ\.ŠoÝÝÝØ¶nSo ](Š¡ñÏó( ôôôÐÛÛ¡Ó¿uôS¡P¨»_Ã0¼kúýXèB™L&âêEÝ4Ó4ëFàŒè+°yó¼õ­ §wç®Ëwþô'^•É`%ÏÃËåȘ&?,”YÛw9”á‚ä\ÐwÁèEÄ ®©„¡©ñb ­’ªÑ™ã8M-ÂШέQÞeR!³uð}?t“¦¢Ä¨©<îLÓèB~í@<úå™gæsÙe'O}gÅ"8Äæ ¿ýp&x¦‡ùQ3JLŸŒˆÇ[K qýýýlܸ±êÃÚ÷6lhv{Ã[+íÍâê•tnElÛn+8ý4g6â8NU¥#}–êÛ9VØæxÕt¡†xõ¢ø¶©TŠ¥K—ŽH'êïuª%¼iO?]pâ@…´ñªÃ6‹™+`/‚ ‚ ŒE6›ÅuÝi‰êÑöo}}¹Ö«þgÃ0š>?o6ñ£{öüÙí¬èÀ ÜåÁLxÁå°‡±tÃRœ´CÖÈΈ ÓÏ\€ŽŽúûûéïï?X½zuÕûVAWq‡¡¡¡¦Þè–e5ì G#D³F¹ 7B0ó}Ö qÙl¶JÌÊ3=ÃZˆ›JF7ÏóÂRäõХ̳ÙlÝÏKDyßÜà||ªóÝiNAA…ïû”J%|gm@í\4‘H4¼ØÚhض-¡©D©u.|vê;¹É‡ß¸pMÞ•…÷Â6¡óÞÀœŒ6ôÙ"µ+sÖ¬Yjh;÷\pœ¦«íTüñ£Q–ã8c˜,¶m“ÏçÛ®rìD‰ç…óPžcÓ‘óÌ$ ýœ –eQ*•ÆýÁ¶m›åþ0mØÀá=zÏù(1Q;¿›ÁûbÍy‰'‚ ‚ 4ÇqH§ÓÓ:7i' …BS‹%Œ…s0áº.?>4ñ|Ôä0l/Áº¤R`šwC‚¿è)ÒËôTºšGUޏááa6mÚD?{öì¡««‹®®®–ú’K¥{>ö1–|ýëÍnJCiT:Ý£®Ò#L ¶9h¸É —_Γ–EɶñÛ£nç»ßÍï׬Ὗù étšýë¿ò«•+ÙðÌ3?üa (»´åàvLAAf ß÷Éf³3ŽÚªHÅÒæó«?ý‰Ò7b_¼kü Ïo~3È“O.F”^ï)A¯f[6M“T*UUìOhoæ‚ò„[¿~=—^z)ëÖ­«ZaÆ ¬_¿ž«®ºŠ|>?•cL:¡¼ ü×'?ÙÔ¶4’ÞÞÞƒ¾ÚÌlEÛ0@7ÓŸà?ƒØ@ r9"¡OçpóP\lT_o{,{,éÑÚŒò¶s‚¿9ÔïDñôÓYðÁò¹‡¦\.‡OàJ¥===”ËehAA„†ãy¥RI× -뺼ðÂ2º»_3öŠ>Á„ U!5‘<øWz#'-.»®K©TµxžÐ>°iÓ&ºººFˆp‹/&ŸÏã8ƒƒƒMm¬ïû¼ìµ¯àˆ#Žhj[<É™½˜¦‰‡Ê¥6ÝÙ+ T®6íi—–¢¼uþ8?øÌ$ 5Qb´ü«ÿ¦QÂ^šÈcÚþö7~ík#ª2% 2©!*‚ ‚ Ì Éd’\.×–‘EÂìÁžùõ\,¸š‡ú*çžûXýµwDwlY±é4O·”'Ÿ|Ïô(‹‹Åpí'´?sA…¥vuuºRGG«W¯fçÎttt4µÁ»àø¾® â¤#4“g.¤DãŠh1"á,ÚÆšh)-ð2¨ß‡Þào½‚’‡BAAhž§Ä Ó4›V)´qQ鵓€AõÅ –ë¿z]ˆæ>A8ì7ƒ<ûìkxË[^:r%åáà£&Zš¾>èîfýZ~qu–apnhd±Haf™;Ñ[%GÜÏ+¡Ù¶!—SÅG&\×vO»D"!Á$Ùvúéü_ªûÝF¡'z,õCÛ‡äz™zÁ×u%4UAAh¥R Çq¡ËÒ¹¨¨’"JCÑ…É2Œþ€½ˆŠJÑé¿l¢<Îv°o/ø¯s=ëõ²ÁëDlû"‘0¦Å3eÓû,ÃÚæ×´#;|n¯!Š”ŒçšNûu‰ô;Á:fì¼tÔM‰éOÁÓ.<ÿüó{ì^æÍ»¡þ<ÙA]œzŽm½½,ìƒM¨8™ °zõê1+_ö÷÷ë57|ÿöÛùE:=iO¢RIå9ô<ÈdÀqÔÿƒÍ«3^(`ºhT‰jÇqf¥ˆã8­Y3æv3Éý@ˆPhY–ä9AAJ©T‘&eª8Áÿxª‹êt.DÂX %>e‰Ò¼hæÛz¨ÜY”h¥Å5 %ºé}¨bj9"‘OÅKær¶ˆ@-°é,îZ +Ëì`R°^_°­Ž|Ñb Î6fç4Ù‡ðzÿútÛº‰D?‚uâ>‹ú:·âœè@ùÈG \>»þ‡QøR±¨D óÍ4)•JX†¤J>¸ÙÉ\P/7mÚÄUW]ņ X¼xq¸Â–-[ظq#ëÖ­«ZÞ ¶~:¯}öYVR¿d-Å¢àòy%º `Ê#î`ÓÚQÔš­a~:­ÚN—6-^’‚ ‚ B#ÑEÐ&3gÐ"—ŠD‚›ö>ÓÞdqNS;†×Þ`K‰„8«9.ÌiQM{±Õc4²ñ„*%ÔbÇŒ ^6‘xgy͹5ûžª[b”}è°WáL q~Ð^·fûý‹M±­ƒeùœùüó\~ùL–ã"âÎ3’nö2¢‚ W]uçŸ>«W¯`çÎ ²nÝ:6lØÐì¶òÀË_Îg¹oåJ\ªozPâZ>ºªo±å²˜G‰p.Jœ;Xk!´‹@’ÉdÚR<œÇíÛ 6»‚ ‚ ‚Жø¾O6›P^8-üèPÍxféñÈÒÞiFìo=áf"G;¦Yó^‡Ç6rf¨¯«nWgðW{fjÞœyf[3s¼è—»xºkÙè+T©•n(^è4RZ|O¸ÙM˜#®££ƒk¯½–þþþ0LÕ¶mV¯^Ýô qŽÛ·D zœlVdXºT… f2J|{ØåÃÿßÞ»ÇÉQ•ùÿoÂ-!dH…ˆÁ j¸Š5& ®,R#.*¸Y{p% _Ån™ßÆË®Ò½æ«¢+n·î~QféÖ]²^¦ÝAt±k¹‰K&N!"Ëpë" ¦’IHê÷Ç©SUÝÓ3Ó3™žîž9ï¼&3]×ÓUO:çsžç9ç¤`CŠ„ñòs‰d2Ù4³YÎÖŠgÓI' !N¡P( …B¡PL‰ÎÎNb±XE¡òS™Í!Œ”“L—e Ľ4£=Íf‚ì$¶•á­Î —SŠ¥yõ¤·`÷}÷Á‡>4ƒ%ª ·¾–w¼hô é(MÖ¶K¼ƒ2™ ½½½5Éë®hŸÌwV©«7ÝMj9yA—ÿ¹‘ãz „&4Öµ©a¸jui&… º{7çŸÿç¥+ÄE—JpGǨ0½^ß3NÚ·bv3¯Þ˜ y¤bÇžpšZ²¸O•a'BŒ[ìÀé0.½ÅGò°3#*k…b&^¼˜KN:iÒûY–ðølt]Çqœ?B¡P( …B!—Ë•xÂ9,ƒxz}¾éôx›ˆ$b2„˜_†Fât„t ³M"<›{pÚ±ßí<¡# Dd:-òhù©˜d¿)•J©¼ps„Cü3ÃS;vŒ¹.—ƒ‚ ;|…Yæ‹#*ä$`fCÁ¹ (Ä»öøV:48qŽxÆ)êÇðYgUýbŽzÀéz8™Žë ñYÖÝr’Fb¶N´¡P( …B¡¨2BKz 9Ç‹4£ó‡×%¡ŒM¡CÍV'˜1'âpí~hšP,ŠNÐÕÕÅÀÀ±X¬irº+Œ¦â¾÷ÐCèóçW\g¤Å]@ˆpyÂÉHd0f%¡BÓàòdÚ`u~§"75Ä¡ {ì˜Ûä#o&Ëu³¦‰YåîBŒ“Þ̶ ‰„¨Ë …B¡P(ŠÙJ>Ÿ'‹a#"ý\„ÀÓ"œbbâ4¶·à s~ó××–®HQšOq\P!©sˆyCCCô÷÷ûSoüqNhiµ|½ _q„—›Ž¨„åßÕN ÃÕáݹ*wR(¦€ õä“c®ïê³þÚ¶ß4Mx½Eëc]“d³Âvãq±ã”ŠxŠÅ,Ï7iÛvI(«mÛäóy2™ òÍ•çœs]7˜X&ºLþ®[~Œzç½T( …B¡PTeY<ûáÓ…Ð5äD Šæa¶ÆÍH!îÕWª¼¢Žã¨¼Ús”CúúúX¿~}½Ë2./-XÀüˆGœì>ÏÃ^qSE×áJ¾Ní§qVÌ]là5Ï_ò‚*‹tuuLjÅbÁ>¹\ο&âxº®c–eaÛvðYþíº.¦iÇq]—L&C2™$﫚š¦.âQ7qù}ÌF¹Ø …B¡P(³œ|>ÏÁŸÿ<ß_°¢BA‚°ÇE‹¶lj…yF»¶µ¡L–e¡ëºêSÌAX»v-k×®­wYÆå¹åËéܵ *º‹0ö‡,¸tdõx¶"r ŒåQ'ór%“BðP(&ö}û8t÷îQËs9!¦e³BP“Fu¢Z,&¶ËçÅ„¦Šyõ$‹‹ÅÐu=Èç8mmm°•J¥( †A*• ܱ‰ÃÃÃ0'÷ïìì –‹E\× ¹®®.b±Ùl68—üéíí ŽaY†a iZ ¾uvvú×ÜŽ£ë:ù|>ñäö¦iŸÓé´zq* …B¡PÔ˜ïoÝJâu¯ã+VðËzF¡¨@Åq£…¸HHªš˜aîr„ž®ZµªÞå“GO?äþ·à Ìi˜òEÓà“ÜÖù‚ã¶Ý E7éqäºJˆSLž?¼ø"§¿ürÉ2é —LŽÝ¢Þq¡iBÓ4!ÆårP(„9æ*‘ÉT/öM)ˆEÑux tæ»pƒ 'ºðî¬ÌÁŸ§àump«Nl](ÞÖÇɆÎF—„xH%.BL¬&µ—ƒÈóXMy1…·¢±Øº`Ë?<øœÏ‹¼pÉäèðSÚdòuFÅ8]‡ÎNQ×çóâo"e‰Ï™ŒX'Ó¬9ŽøI¥ÄçD"ü;Šm‹ŸòeòÅSiŸ(Ùl6ðH+!éX£C2¬µÒlBÙl–XìÀ²„È0U)à™¦9ªœ¦i.äÅbMÓèèè —Ë‘N§1oÉ’%,Y²„D"eY8ŽC&“Qyé …B¡P(ª$ÜÿòËÌßûøâ[ßZïâ(‘iàî¸ã ZZžF È娴sgÐQ9йGSÌššN»öZŒ[n –]ëÂÍyð'™6ÞcÂl8^‡ :tÅÅ,•YXfÀëî,4`$È„ô%àGiÈiðsnþÇ…÷¡«ªÓ¤já3j1v²Q!æ™L<ÓŒã_³jòÝ9ͲbÚY<<\ò¹«Kx£M÷ä8Ùl(NÛv(ÂÙv8¡ƒí #ä¤>¤ëBLËçÃí:;Eø¶ë o;©ƒ‹b[ÿ]7ÌY×Õ%ÊbÛb¹ÌwgÛáò|^loâw>/„IÇûHoA‰®ëd2™qݸ'œì¶ǿ¾Ñõòït:‹ãˆòØ6‹ÃŒG2™DÓ42™ $Ë’øe2™’u™L†¥K—NïMW( …B¡˜eäý#cíZ>qÉ%bŠF fw9õÔCÃÑ.‹mC<Î Ó¤àwÖ”WÜÜeÚ„¸‘‘n¼ñFZ[[Y½zuɺ¾¾>víÚÅÊ•+§[ž[k_®6!ušÏÚðaÎõûÓ/K·q4HÇa•/}çsòð`.Ô –†õ6´u¾È°MƒµYñ~=¯ºJ QMŠly„pýZmˆ0Y‡pf ‰°‚^¹_‰B°›f=³i©¥ïÝ»7‹”3”O¼0D…'Ó Ïá8A^Ð4M ІȤ%…(×…áañ;ŸÛH¯;)ÂÅãá~¦Šq¹\(ðÅbb;93l& n Îá{k“J…‚ši†^xétŒ_üâ¸îiX–ûÄyä9ººJ¯E2ævÌåB@9Ûl¡ –»®îì …CM Æ;:Bá_žCæç3Mn›J‰òËï3J˜kkkã¸ãŽ›þa,;žŽºX¡˜ jݦP(feÇŠÙB=ÚBˆ+–E±R#V¡˜µ´c¿ß}òïY6$žƒûSB xÞ‚·:pz â|j^Ñá–Ònvàxöåá~W<°B4;¬ ‘/k!„9 ȶÁÛ]èËÁóð‰²‹âzÁAé„ãÂ'ì¹îèÄÙL-íØõ/Cu]ØÑLäùO§ÅÏxQ›ñx˜SN iñ¸°iYFMËâñЛ.› Þ'¤Óâoé—NCo¯ÿŠEñYÓÂï](ˆŸbQˆp >ËwÔÀ@è9'Ïáº[¶œFW—xN“ÉP4”J²Édö+såÅbáO2)Ê'EÃèþ²Üá1Bï½BAlçºB”ËdÂë õíê?b‚$!üX¼ø¬šÝïñìø@ëb…b&¨u›B¡˜ ”+f õjWd:†É'¬PL•ZÛ±Me¶à¥•7òí8—ËaY]]]8N5n6ŠÙHà788HOOOð÷–-[‚Ï’±fV¤¥¥%X¿råJÎ;ï¼’c]ýõ@¨8Ov¦Áh^¥®®Êž=Ó&Âua‰—á ’D u†Q95z¼Jë˽+ÝFбXi}¡ëBÄ“çÌçÅ~2TVס£CçÐCßQ³k:–OW]¬PÔš™hS(µFÙ±b¶PvE Ñç0®Tê€s+µ¶c™îéóß= ØÄå§&”Sa©s›C@¸f288¬X¾|yÉçñX¾|9ëÖ­ >ïÚµ+ø»¿¿ŸöööàóªU«¸æšk&UHp#=öZÖÅÒëg"âqøžÙpªY:»e>Ynð?϶L…Þ5÷§ÁM ¡êa ÕÛý¹ WÚðæàcÜœ”†ÿÏ‚o°Yƒ÷;pç'd`»« X˜…ûº`‡Ž  aOVù<¬Ag¾fÃEü8» øY´ü:%ꊇ}ž¶aĆ»ýdxÌŠã=”‡6|%w¸pj þË€ÿráÉìÖàŒ$<ª ¯ø¯¬*d®2Ú(s”iüÑ…» bY>2“E­Û§µ¶ã%4#ßc.Ð ×P×+{í @w÷¯€÷×ä¼cÙñtذB1Ôº.V(feÇŠÙÂL·+\D4Ðà8Žã(!NqÀÔÚŽ¥RñôÓOs ão+E>}ºfœT4%‡¬^½º$oÅdimm¥ÕŸJzhhˆd2Éå—_#_¾|yɶ±yófº»»Y»vmð`D“s6’x|”g” wét©§t˜Ié¥^5—Ldß+ølVàëˆu£êý&ü%"çãù:tÙpnžÖàðÜÈ)÷—èôÏ›Fˆf¯OÀWbpM>ƒ´)fV5€LØ™„ðÏó?!Ú…œï5—7à‡q±^ŠzZR¼(¿›‚Ý:¬0EY·º0l»Lx»ïõ÷ûfGø½¥Ø‹ ï°‡Mp{ÅqŸµà! ¾kÁö^¸D‡?¼ø"ë?úG¼»/Á°˜v¦ÛŽ}ôQº»»ƒ–(Í $)¦éy¼yóæšc,;žŠ ?óÌ3üêW¿bpp°¢O…‰&ÐP4>}}}üçþ'K–,©Éñ§».~üñÇÙ°agŸ}ö˜žýйIww7›7ofÙ²eÓž£­VmcÓ4¨Í®˜]ÈvÅ£>Z³sLg»ÆoC89@>ŸW"Ü ¯¯˲š®} "bÐ"ìÏŸ{îí|bÓ'ÆѶmn= éééaÆ U·Çœ¬¡¿¿Ô²ñ)2ùa__ëÖ­; ·äãŽ;nÌÊÙuÃëÍ &ÜgÃyeËÇz®ª×pár`úbU–0÷ÛB<ð 1!ÄS‘0Ý"S¥F£w§ ¡m· ƒ222ñÚ1+ZN1ÃY\Èú’mÝ.f$¥FÅé_çk"¬X†îÉkÕÛ+„ÃGüe)öçg…à¨é… éúñ‰œÜý àÜi»‡åL§ŸvÚioyñÅ`y*ÕXb²bæhooçú믧»»»¦ç©dÇ婪áøãç]ïz×´‰¹œ°ÿáá?–¢~¬^½šÖÖV6nÜX³sLg]|Ê)§¨døŠŠ\ýõôôôpüñÇ×äø3Õ6VÌ]š­]¥íãrdÎk™Q'—ËQh”\.Šš!‚šÅŽ+µ£òDË-¡'óÔD¼tòù¼âf!k×®eåÊ•U·ƒÉúûû¹è¢‹®»»›o¼‘îîvww322­·ÞZÒÐhoo/ q•ñÙÕâ égYO¦0S¼Ó„¡d(RMû]Øì„¢—NxŽòsUûnZÜBõžXzZªøbOäàž¼ð†›ˆ“Òp†)ò>Œ—–Rþ-E=Ì¡QÝŒ±J­ìø—Û·sôÎär¥á¶ E-¨dÇjÃSÁq™jå¤†Δ«PŒE­êb…b&Qv¬˜-ÌT»"Ë×^C¦iªð=Å´Qk;v¸ãŽóX°52YC.7Ê‹Hå†S€/Ä ‘J¥èìì,‰‘¾þúë¹ë®»hoo/Y^N__---¬[·n”áJãÀ²¬I:@ËóÏ•p.×8a}òµ0ÝzKtØW­`6‰wSµ_áéVÀ8‡®Â»Åðÿ;8.\ÎÇ›ŒUú30”+-C­_õ´c€“æÍ#Ÿ9ŠZ1–O‡ O9‰K"¶A CsŽÎ0«PD©u]¬PÌÊŽ³…™jW8ˆ>‚Ìø£¼†ÓI-íX:‹Ø6lÚ´‰oˆqŽÓ8á|Іâ ˜¤RøQKK étš‹/¾Ó4+*ÄCCCô÷÷ª,mÛ¦µµ•Ë.»Œ5kÖ°jÕ*,Ëâæ›ožT!woÞŒaȉS¥­b"Â?§[ º= ¿æƒê:ØvuÛN¦ª0•ÏD¯IyLH"„8½ZÉÓ-Q¬,Ö,]:½&B­í8¸ª.VÔñìxºl¸2!´e³áŒ±ÚCȉðlÛÎ(NÛ¶¼0Mñ[Î-)Ÿ¹Wâºá ‡ë†ûØv8u9ŽS:¨2×óèÍT]¬PÔeÇŠÙÂLµ+rˆ>‚9~¼šôŠ*¨¥Ëtp·Ý¶³O<4zðQ :ÇqH$*äZ!„¸ÁÁÁQoÑ\*---Z\)ÇÊÚµkÇÍ!tùå—ÓÙÙÉ–-[X»ví¤Ý=wìØ®ëärµ1u*ÔÂKëLΜîrê“ó>ÉŽHUÃdÅ»„ÿ;Få\–£…8øÕÛß>ÍW&¤Öv ¢Ã­PÔ’ñìx:l¸d8jïwÄÒˆDˆo7±“Kg¨ŽŠQ®+Ú.“tq]á}''̱,ñýäy¢”"¥ë áP\ärâs"!–Éü–â³,_Gd"9û³aˆõ²ý%S*”·»r9!Zjš8TK§Åþ©”8nôs.'®ßlŒn˜‰ºX¡¨5ÊŽ³…™jWØ„}×uqG…¥*¦ZÛ±|ã7/ñIãµ"'„£°Ñítl6;ÙÃ+f!‡,Z´hÔŠò$šÒ]sªDg*™,/½ô†aà8¢S¤˜šVi"\ëuȨV[’:` 1QD†Ð+.*äu~¸gÏž™¾l%LÕŽmàüááI…+µà@êâjÉç}ÑÈE<Ìi‚Zû¤m ðI#Ó8ôw ñJ S©”®dnÅtZ¬ïì,î¤àeYb™\.?C©–Ɉ¿£ÞiÑId  »ºD½¼ÊÅ´j<úä>2$W?/$+‘ô'Ô‘åM§ÅO[Ûìâªa&ìX¡¨5ÊŽ³é²ãèÀ|&“Q³¥*f”é°ã­[ðž3ÛÃYËÃ)|”À¬_ˆkoo§¯¯oL•xpp°¢×ÜLõŠRÞDSÇ4'1Qa¢Ô‰0ýìdÛš‘}`üÏ@#æ­P(ÆáÕWŸD½M|ae£­yÖ>%4+æ˜2¸¥¥…d2ÉÐÐPÉ–eÑÝÝÍå—_^·yØýÐC@è ˜2ÏÑDè@/“Ó~ªÝ6:_„ÎØÞt³IsµeCChš&êby¤·…äîr…‹K"!–§€wÀŸu²üä•K…bÈçÃü“¹¯ƒûoPáçhÿ Xß‚%/Cj¤öõ¾“0>H›ÂëÙÔýu ”;BO7Í ßº1]ˆYñx•Ò¤ÿY²8e"\Îß7ºå—}aÿÓ™ûÍ0„I\Öñ¹œðìê¿åv]]á6‰„¨räçTJl»d‰X—J…ÇN$ľ‰„Øfhèðéû" …B¡˜ÓØ„Þpù|žx<ަ’*+šŒ ø†ìˆ²iŽ MÕ4MyÄ)ß#®¥¥…믿žT*ÅE]D{{;---ÁT¾«W¯7ÏE­ÑüÞ•Ê×y`ttŒ±:P"}誈ö“ DÿµRßY§ö³¤Î.°xx˜­[àþ´«±¹)„qƒ±<üµ Ÿ‰û ¬üïï…‚×% Ç‚ß§ádM(¥9ÿà:»ÈBxÖi„³hH?½¬@ªm£8@,K_QlÙ‘ðÀJa^9@Ûù.Ðþ’ÿ ™?æù&GØ«¬ä,.&B3üƒ”{{u!fɉ~.~Þ· ½Ãä°»1Á6å•rä ç—WóLÂgoLÆKy,b±ÒPÜDBˆ¢b]<z Ê|Árê!/ ë!¾É{Ùl˜ÇÎq^¡lÜN¡P(Š)á¾FmÛ&©¼/M„l6¶·r7A\ÕÈR“4(Ê9Dþ!Å89£ÈÐЗ]vY ÊÕ 8f÷î`&:% O韱Ó@hBÕ¤T´“ým©E™MU”¾g?þñ›8v+˜ÑÄxnް`“—Ü>u1{ÇYY‘Øêc\èÀð/`>Ü4ðÌ‘Bœåÿ­#„?ÍßÞE¼1äoÚ¦˜$Ž#„ŸBAJwÙpD'd/€ÞW!ùkÈ­„¡`ºff×ÝwsÚòs0?.ýÿ4¿Ã7f"7EÙ+Îdà?iÜD^=å¹ç’„ ý¤ ª£D8Å”&©ëðÉ ÜlCìËPø°ÿ’çúiá,K(vÛ\Ø£ã=´†ÜùgþöÛIþÅ_øB`Á m\†^JÛµ¶Qöeî¸@¼KøÛK±99¾Žï²‡x²!°øÖE©·©ì%¸ˆÐZá³Rà BèuêFާÄ3Uæ¶‹Ç ½ì¤X–ò·•ÕBÔ Pæ×‹V¾gHa2êõjûÇ›htC ôñÈßš8¦œ 6ø.*­‰B¡P(¦‰è˜±áÍÊw<ÏÂ…——¶ÏÊP!©Š(óê]€‰xòÕWƒ¿U½<ûÚÐ\à†‹äJ™È7Öõêc…MSôÚ‰©@vÞ¥ò)CŠ)àºÂË*¯CO öê`µÃ[Þû;’;í qœP,âüa§ØKÇw¿ƒåy|ößÿÞövVÝ}·HT–É”&±Ôâ‘ôÜ´"”E¨à'"šE²iBÊò÷é"Ìå¦#D´xä˜Ñ\oøÇ•ûâïŸ+êýfS:ê™òÓ…¨Øº8)ˆùXVäëÚ¡M†çJ9©ÛGÛmqF7ò4Š4E(ìÉuZä¸òû&üïµDˆP¼“mäÀú h»ýjÊ ËµðÑ…5´.…B¡PTƒëº8M>›]tʲ,%Ä)š’­[ðÜsËǬLEú*4·dÇ4M#VBÜl$Žè{ÎæñáÕ‰mãúÅWÇb"Ƹâ‰" Làe3ç8b›·7wNQR)èí u©Ȯko¼‘ƒW­ 2û?x×]$ ,Ë¢­­®®.ÜGá^ËâŸ>ö1vœ>¿úiþê§?eÛÎÐÙɽ_ü"™L†|>χ_ÿzÜíüoì[ìÛ· ?Ø N(gŽz†þÈfèðO2òà/-x‰gïü9÷œ}û?º?!µm›½Wï ¼Üöµîcÿ¿îâV/¡H§qGˆSŽ“R/Ô4¢Ó€ÂÜvÒÓN§$,<ýùHª…èH„Ü&M(ÜuúË—øŸ¥HæŠ}Qï9›0¬·ƒÒÖiùìgYñãsÒw¯aÝÒ¥œ÷Þ÷òÞ˜L&C"‘à½ýýœóÎwâ¸ÿö¢†ÛÙ!:X=âþäŠ+8éÎ;¹sd„c=”³?ó^¾p衜sØaüâ¿àÿ½ø"¿Þ³‡×vÛ-âm;ÂY»?ÀwOz ¼æ(8 ¾ì}™›R7aä æ?û,?ü0'œq§¤wÑ¿};›=”·û6$ i¶mó³ÿý;>òÎV›«;oD#ÏѬœ%®;.š­áä’Ù¤ð¶p"aLh9ñ]b±XÐñ´m]×Ñ4 Í­¬ÑíkÝÇ1›©·)*æR¨K!C×õ1½ lpÅ0Œ ï”ÜWж} 3­(êÅä³,+½¤°fÛvà}&ë@]×Éår£lLÚžëºd³Yb±]]]A½)8Û¶E=ê/‹ÇãhšVb»òo)ÞIq/Z¾±¾.—#N£i‡Þœ#2?@>ŸWÞpЦÄ6m:‰Ý/ü>;Zp³, MÓTص¢„†âdzœ—^ZÀŠõ.B15¤Ïß×^ZÛ“Mµ‚ÏfCO·rz+Ì‹+żJ ¦ò©§R¶èþc¹rK¯·Éºz—o_M£/™¬0£í8Ho¼1F¿èîž\™Œu9؇Ä/žbþ¼ÃÙñßGðGý—\ûâ~’—'Éçó ”ŒþKt]/é@išÆß\x!§‰µk¹7ïW,¦‰¡i ”…g¿¨ß oóÊ+¯pÄ߷™‘{eš¬ôï±ìL•ìo¼ÒÛËç6ndñ‰'ÐëÛüÓëÖqÂ×¾ÆiwÞÉÏ;oÌë‘ÏçÑ47š&· :iôÎNÎzá…`;×u9ëÄó9è‡Wqž¶›Ÿõoì\ð¿ ö<ü0oìéadd„–£†Í›Åw¶,a£š†å_³X,Æ#¹Gxè¿ÿ† ÿãüçë^Ç‚­[é}î9>¸u+Wj?¿ÿ~´ / :w‰D‚Ë.äÔçžãŒ×½Ž»V®ä¼–ºçž\±‚.ßSTŠ–%Ä<Çq ³³3¸®ër}Ëõ¼¼èåz›¢bHœJ¨w|>e®&)JÈ}ås'lÛ¦³³“B¡€eYèºN>Ÿ'“J¥aÁ4Í@XÈd2Äb±@|ðl6‹ëº¤R)Ç)ñ ’e‘ák–?õ&’ßÇ0Œ`¹ p]—L&C±X òõlذ|>¢ñ‰z¥%‰@ Åbär¹Àã$*¶F½Ï¢H»’ûçóy ÃÀ0 b±X`ëétz”ç„ö›H$‚vµž\ÑgGþ-Gßã!í9VÍ”Ø LtRpÛ¶•§hZúûÿœÙÈÉݨ÷¬B!ih!Îùýïyî¹å\ZcýB¡¨ÇîÝËÖçÛÐ>®1§‡íD$¼W/E8ü2ÄáìgžâØ][Y¿e;/wÃ箺#èŒ5òÇ+ºç›‘˜AÚ4™~ ¨,û,L¼ƒÝŸÙÍ›{3Å3‹L Mãà;Y\aÕ ç?ú-ãˆpÀ¨ŽPÐÉ*o5Mãðúד^{§’ømVt¸t.ºˆ–òƒK¯S(í dÁàþð³ëriW\x!Äã\꺣Ÿi9ý¸e±2—]g¥®‹u^©Q¡ÚqH%d2™ üIR0 leë䮵¢j/)‚e2t]î½´-)"D=nä~R|ˆ®“‚Pâ¡#ïiWWר„ïQa@殂Ra½X,ÒÖÖ†ëº$“I‰D º†AWWWà±ã89?mišÄãñ/ §¤¤à{J»®K.—#›Í–R”Ñ4­Dˆ©tÝ3™LPÎ\.‡mÛ‹E®¾úêz›Å¬BÞ“¨G—ÐlÛlNŠ.¹\Ó4éêê"›Í÷Y’ËåÈårÁqåó“N§ƒº7ŸÏ“ÍfÉçóèº^òŒI¡m,ïÉ|>mÛ Œ¿äöryt}Ò¯ßu]§««+À™)¢Ïo3óÈòÓV(ÍÈ+»wcx—æöQa©ŠJ4´ð§íÛ9è ã€òÛ+õdïÞ½ÌÿÓQè§œPï¢TÆ œQR¡˜€‡8ÌÃuùÁ»t–ïYnÛw… |l9é©âsLÈʃžþŽiBq’Â^”1^­­·±Ú8‚ƒv;z=T-kZixw%d§Z Çáó>ÈE=ÆíÇ/f}öó*žvÎ?³ä±%Õg#=¸‰Éd’ŽŽLÓÄ4ÍÀ‹§{º®‚Yy'z¢Ïå“x<„–‹)•:èQae¼ŽŽ¼ÆÙl–ŽŽŽ`û£>zæ £É‘Â¬À¤ˆ›Ëåï¨`õ¤ìèèìRÚc*•BÓ´àù¢ª`¥‡ÚXbšä¦â‹ÅÈ«,›Íªœf€l~NŠ«P4:óöíãЃÏµ\Ù¶b,Zˆs€#öîeß¾#&ï¬ãÎ|'3Nf_õ¬(¦é_ðÇCŽ†Õ >àN áÌŒ9Âï6¥3A‘}ex(3PÌ:¾ <ú*$zï%“¸˜Ïl‰sÏyó´_#œ?  1ñgŒp’TÊæuèÙg7ÛÿŽû_fdá/h[Øà¡¦IËÇ>ÆÝ2÷dÄûbç³™ÍsüaO¥RÄãqòù<±X,¥Œz†I-é5ãº.ìØA"‘ õ¢‹°m›m­­Aˆs*•"N—„ÙI®)u”7YbñxЊzÔAèYiš&èºÎÛß÷>MCgt.?瀸aˆ¤Ô±F>O.—cä䓃‰|uÿ·œX>»øŸå$¾¹H™e¢vÃ/Óµ·ÞÊ ¿úüä'Áwt#ÇqüÏr I=·ù®,KDÌéííÅ0M"Ó)Æ!—Ë•äÔ‹†ˆJaWŠÈétÛ¶ƒ|h²óõÐÌçó]‹Åà¸2|«³³3ØÆ4Må%5‹‰NÒ ÂR§?ÍnE:ØËÏ2³Ç‘NѺ>z>µ|¾t¾6‰œ“,ŸS6kšø\鸎8$¢,±˜pØ·,˜7ï¬z_¶I#ß#Ïíy=w˨õù|>sW(¢4´çüú×<ùä$ò)9þOÊ?€‰hÅ)m™Ùˆ–`Îß&Žh1Z7ŒJNvä˜Sù2ö÷è¸~ùåwÔ­N9øTôy]¦ê%•G¸µDË¥ùeu€aKZCÜ/³×§Î8Àqü#‡½ñ9øô¹õ.Nebˆ{eÚ·â |ft“´g‰å§Ó_'SpµùÁ$ì1Éç1†°“Ð럧+rn=²ÁØvl¶äÊ{‚²§&{’R˜·#e‰öä´Èù£õEô\rŸ¨ýF¯Cù²YÆ[ðÒÑpÞO{É$.æˆØœ¾øøšœ+KØ¡wU³ƒYÍvòã€6†7X¹I”ÏÐÝJÅŠÇ+Ë5žH2‡íû2¿ûC?«ÞùdM®ÛDTšpL’IÑB.ó6zñ´Ù<²¹.åolÛfóîÝÜÜÚJ¦­®¹†ýèG9ôŸÿ™åÏ=ÇQ¯¼BvÞ²’ÌD®…íŸKó¯íDäük¥ ži–qÞÿ‘‚§A(bÈk1KÄŒçž{ŽO\² ˜œ Uªi’OƒÐ®£è”ÚŒVö¹å¶!ïoT ‘Ç’ç.Úœì]i” wnä8 Y–PP—*KÔ"Ú~ÏûÛÊØG)::”öZ£"dÊÿ;êž%{ƒòY‘u „â¿,¥è½žòY§Jkd¾mÃayx) K¯¾€ž_Â[(-_6e¤!«ùY^ö¨É@xÛ$ÒÜ÷ícØó8içN^|íkñ¼¶´´pØ‹/rðÒ¥üsܺe ¯´´ð¼¦ñâa‡qºã°iÞúá‡óÉövâ„͈b¬"*"IqÙ´eËX†¨ê²„Uœ) „U´…¨Š«Á¥ÍùjOøë†wÞ<ì JÆX¢M)®•N"°€¯>þ87_vo¿ûnÜ¿ø LÂ&ƒÄ¥Ô«µ€¨JÓ„Ïsùö`´´àøÛf ²Çz æýòÇ}dSO^IŽp¼Uznº V®¬­5R|“aÓ±XŒÞÞÞQÂX­' ˜-"\¥Éì'šà~®õtµm{Æ<ǺþÑ4™R4’B‘ôüJ$ÄþÙ¬8FWWèùeš¡–͆óMEÏ‹‰cÄãb?ÓÇ—B–Ü'êµõ"“ÂWoox.ynYƱ(¿¼e³H¥ÂãʲE¹ÖTI{*/›a„âd:-þniyþÀnnÙ{° _iàè'Eãá5 W^y¥çyžgxž§ëz¸bØÿñ<Ï+xž—ô†#¿§J±Â²¸_ˆ¤çyºçy‘åòÜYÏóLÿG~–$ËŽ7ì—;æÿd#Û™‘ãËmäòòrÊsÇýí$ÙÈ1$†_ö¸¿oô²¼éȱÒ®IÒ?Î@äo¹]oÙ÷”ŸÍ²eå÷ÆôF_YÞòrþÚF3på•WzÏóξûnï©Kž×|Äb1o``À‹Åb^2™ôŠÅ¢×ÛÛë ˆ¡ ŠEqs‡‡‡§t<¹ßðð°—N‹›500à {Åb1øÍ #O&“^¡PðŠÅ¢ǃeòûÈc …àïX,æ‹E¯X,z±˜0ôt:íÝwÓ}%Çðz{{ƒõ²ɤ0°ÞÞÞà˜òœÑãËýä÷’ûEËýûñ뎑Íf½ñPÅãñàxÉd2X.ìy¿»ówâZ7oذÁ»îºë‚Ï=EÏ»ä–]ÞÍI{×]v—.àèSCÚ‚$›ÍzwîÜé鞨&.ùÎw¼žÛnó>øùÏ{ÞçqÞyÁ¶ÃÃÃÞÀÀ€—Íf½l6ë}ê꫃jÑL&½žzÊ+zžç¸®wÙu×yYÏóîÛç}jï^ïû<â=ðÔSÞ€'ªÆ›ž}Ö{à©§¼´'íŒm{ŸùŸÿñâžç}iÏ¯× «ëøÀ€—p]ïÚ_ôN{è!oÉåÛ¼7óSï„]»¼“žz*¨Ò{=ϻ䩧¼ ~÷;/éÿ½â•W¼–íÛ½·íÙã%=Ï»lëV/ẞQ,zYÏó.Þ·Ï[½}»wîË/{-?ü¡gxžwú¶mÞY/¼àÅ=Ï[bÛÞ¹/¿¼¦ Ïó4¿ÌiÏó®ëżðÕ0àyÞ¿ü¥—ôDUÜ~~'¿G¡à}ÿ‘G<Ïó¼óÏ??xnë®»ÎÛ°aCÕÛýëSðÂW’öê«Þ²çŸ÷Ž{ào¹ãxç|ë[ÞkzÈËØ¶§yž—~á¯øÃzÞ8Ïݪ©q‹žçáo›öÄ=“¯s¼Ò&K¯¿¼ÚïVí]ëõJ›4å ð²Ù¬WðF7?$Y¿ìòXU^¹oµÛeŸË›dQ¢×`²öQ/jõÞðLÓôb±˜§ëº§išgšfS<ßÅ¢ø¿ÆßvxØó ñÓÛ+~K’IÏ‹ÇÅOôXž—N‡Ÿe“©Xô<]÷<Óûzž8¦®{žß<ñ ñYÓÄïx<üÝ>™ô˜fiWDËšôÂg±¤¿7‹â>$“â^Åbžg¡Í$“áýÊfÅ6òžÆbâw¡Þ÷JÈcM7†QjO3M±8óçn;޶e—ú춯øÂ*o·*f7å}§ñhh8—²Ä†rØ$ê”nv]i!‹VµCçärüå]”G#§ÿ“#Ò#®|¯¼ìÑm: Ã褗˜9OtD§Òðr/¥žB)Â!㉣ Y TNÎõšËùÇÏ–}§Jƒc øçëB qPy8¾Ip-7¬ ûðaüã$<ülÛ¦··7H°-ó£’X{~×Ų¬ w‘œQLŽlK¯»òÙèR©T$õ·[r\ó¨£p^xATµ+VO¨¥Kà üE/ ~Øaà¯O}ðƒâœ¾w@6Z.ùꊆùÙ®¼àQ ÷gƉO>ÉÙÇC‹iòMà“À¹?øüô§3vÿg‹0ÔS†L™wÜÁ«<Âm¹·¼éMüõöí<ý¥/ñØÞ½¤Þô&>>/~ŽÐáI–ð¹4 ÙÄwºð6ßE$ê%]ò¦ÂxÍÂæ­…§9 ˆœéÚq²Ù,†aÔ,©¸m‡awÒó§´L“&šS*Jg§ð4r]ñw4P†ÀFÊ'=~ººÄg–ÝO–˶Åcë8âØòXPê±”J…!„2LP‹•z †˜ïGzWÉ}äyË·—N³Ä°* ¦7,ÕqÄ=’c‰„¸¦)®½ôBƒ0¼R~ŽÇÇœci\4mt˜êt0Pçþ®OízÌU2»Û*¦µiæv¿¢ÆÔ[5¬„TÃMÏóN?ý+áH‘ôz“ÞcS:üÌõà›*r¨;> å)xão71o´7 ×<#%²¬IÏóÚÞ¹Ñûñ»¾^Õ>Ùl6•. %^]Ò +ž]ÒKNzøHϳB¡à™¦¸€r™ü]ÉNz Åãqoxx¸Äã+Šô6STO¥ëÕ,v\>ª“(zÞ-õ¹öcÙv3ˆpžçy7´~Ú‹>X›X–b2 zS'3P$=®{ÎæÍÞÂ7nõ–.ÝévÚC^<>ýzW€¥hòÀî×½ÄüÏ߯y-çMÛq³Ù0¶¿««‹x<Ô¹ ê¿òça,ÛžÌÌ¥õ¤sÙ¯¹ìüìøY•3‡ëboÚDü xÉ$Ož×ÏÑ{öpýõ- žN6{€áb² #c_Sþï.@3Ŭ˜pa>˜ '5²€ÿ0á‰8<y4r&´$á1qüNDÚ€ïgáE¸n¾7q®J‹x,MûÈ©VOpáS:Ü–†˜ —ip“_÷cºÚ4û)ã=ãÀë øD^¯‹™ädYyàGiøiúýØ@[;]!w ¾k•Yq®õépú@Ã*Õíí…Òpk¾ñ^Oá+"|øá·>¾ñ “Ë//ÝL&sÇ»œˆ©Fx´(À)ðl³PÊßGGx”ˆ •òþ2éèiûÏAùã ½è\ÂYêÉßÄào¼Òe5pÄ–ž0c5qd"}]G¸WEO"‘àœs.åðÃ?Î¥—LVõ0Šz®)9ÙƒD:³Î¤£_55DºxãcwÙp¼áÄ~rýd𢮇°!$»Gþﱺ*¥“ÞA8ã„^»rf ©ÞWè@x%ƒx9þryYåûE^ñ®ƒãÿL¶)fQ:‘àxe-Ÿ€p–°ðÙXVjÇ2:mδm“¦a…8€?mßÎK/ _jƒóʾÇÉd0 N§œéÑqÈ>ÿüè·U.¶À4-œFÇ0¦ôf“‚Q%l2BÖXÇ­vyµ†¡ÂëÀbwǸ/'9CªBÑÐìØÁYOßÂWoe¥¹²êݺººp]—B¡@.— Bª£3äÎ9T£¬1°mH¥ø÷žúÞC´ïØÁöí«+nênç„h–FtT‚ž$á£YDxh¯¿èØ$ýý¤ÉGM Úñ—xdÿ¼2䬒òGv~5M\ª|^4·Ê›<ù¼hšFpI‰ÅÂŽs"†×iš8® ?ÌåÄöé´Ø>•Ç‹Šf²Ù’N‹¿;ý°«ëè(mf³b?Ó 'd•“ØÊ²˜¦ø­i°aÃ%äóOMÿ­3rVÔx<ŽaÄbGTÜ®¼YhößgQ?¹é±m1ûk¡NÆlV;¤N$SU"—°‰ky ×Åp0Ó¥›JÁÍyئÁ>à(6Ê8i£Ôˆ3þ‰c c#¶5üÏ9ÿ§€/ ¹pUR¾êŸŒ‡JQ§k‘ãu"Ä1›ÒA?%AäÄ{¢×?Ê?¦ü,éð·wýãÅüí¢"e%a0OéuiÊEEYvù®Òý²Øþ2ÿ&ù²ãkþ6²¼»¬]R‘í¢×*z¼õ\š"ðó[ÎçÒï)½,š¦ÂRãÒÐBܳ¿}•ů¾ GpÛ­Nà N—ä"ET³,ñ“Ï‹–Ù$¥hk ÿ޾L¢­Ç(²µÛ(¢˜e©Ža ¹çÕ·)ÄÉ\…JˆS42ðÒ}ÿËë?†CVMł€ P(¦®.èíå￾„—·.àÚo][º^æPKƒv Ø-À ¢ó #7é!Í&×ËNÖ4èÍ–åVXgÛb}2)š©T$ œ67âqñªO¥„‹æI.':öº.ö‰6c¤=ž´òù0WS2)¶ÓõR±.“Çð^G—zº¤Ó£›LåHQMõêì?òåÇ7 !ÔEééù1PýàB3°dÉŽ:j‹?ÎKŒõ¸L=èVØ'êÀ3Þòèg×ÿ¬ù¿¥“‹Y&‘ú^¶¼|ìÓÃhG!©•d#Ûe(Õ3Üȱ¢Î7vÙ6ryÌß69Ô Ü Ç l;ù]´Èq“„)˯µFåë?!œž†Ï;°8 [xÈ‚d´pØß>•£Ö yÝ\×-âû+·‘øÑy˜íÏ‹ü–±˜xèå ‘7#“- ?õ÷½/ŸŠ‹J'Ÿ‡t¤ŸåºpŽWÙð+ Þ¥‡#;€Ûøžß—F$E8•Ù<L_ù4ʾÌY)ØjÁᾕµ"rY},?Ñ·‰ð¢QY†¨„_oÀ߇âl¸` 'Åw•^gN‡ø.9]ˆXòøŽ>ºr“‰ô,+¬Øe…2ü${‘}>˜‚÷8ð:NB|W !@Êý ‘J:—ó_4yq¼h·ËOâ—ŽûyPýkšKAÊ÷ÕqÄÍLX3xû‘GÖÛ4' µýyþÈ%Ë•ó‹b"Zˆóv„ùGK$/‡Œ?4Tr9.¦¶ä§r6T)°Åã¢R±í0–Á0–žÌ^%šå¶Ç•£¿¢ócL³´%jÛâ0–x§ë£Ï/…Fy>Ù¯ä×nYáð”ÆX–Ø·¼r–G-ËóÉkVÞš—ÃÏÑíäwÖõÑÃz™ŒØVz0jïá…êïq°ëé§9ï aÐϬ¸¾««+ËS(™–ŸìàMÞ‘ì=voÕûX–L$¢P4î—¿Lî_þ…õL^¸g/¿\È™å*—ÛR`¼ø-¢#”#Û*‰F´½m[ô]ä«S.“ÍŠ|>ô‹Å¾”ô$“¯rÛ½Êä#&…4ÙTÍ"é!£Å®r±ªÑ&ˆ&¦ A.º¿¦•n'=v¦“ñÆ {{#aª¨2óHÓâÿpÿýœxÑ÷é\ÿî²áçÌl#[Ê“‰†+¦ )vI½$M(ȹ‘c™”F냿,Æh!„ÎC\ÒqHž#ZN=rIùßR”ÌQ'¼hù¢â ÜNFñIñQzFÅA+r½å>åå’Bf5÷ÂŒ>Krò“4|҆׭¹Ê»ÙÈk—²¬pÒ¦|ž\÷ƒä~†ìo–boD(q> BÌ@ZÑ gúW»,DÃË6¼Mƒû\øa^×Ð®Š‡¢›¬Û- ltCªø•<œ’@2R©~Û‚ál²ÿXNÚŸƒáöPÚÿ-¿®±Øè~b9½„ y:ö…Ëú¼÷uwó¡ ­§ñü“Î’·. >§R)LÓTa©ŠqiX!μMóØßÒ+ÇnÕ:Ž3z4e²èzå ²XŒ*ŽÅB‘J3G§Žªæ˜ *ñÄ—rQP mr8:z\)T• lº^*4Já,•*>†‰‡Ÿ+‡1%ò»Êáp¨ü¢’/ùr‘×Ð4źòïˉëYöÓînÞ5…[\/vy$GÌ[PqeYJ„S4 ?º³iám~ó˜Û¸®‹ëºØ¶®ëX–UuÎM…b¦È=ÿ<¹¿ú+Ž<õT6°5ïþwÎÔÖ†=kÙC—¯Fr³ø6a²B$“ñ|áK×ŲL& ¿”Î òuîºB@’ãr²‰ _}ò-ÇÌ2™Ò¦Gyÿ§Ò˜d-´pyžññ™îƒÈ&Æ\BFv¹€óÐnFºOæu¯[ RÓâYµ`gŒñw¥2D—™clWi¿$Bè’BßTuÞñÒ,ƪ\ÅŒüîòÿκLH8U&J’\gÀW;lê稇îÞ]ÒËýl¹e_¤pïáâëÊ› n@yðÈdžu9uoµH¯º\Nô›&s¯”y>Ë~—S.jÉ{Ý^ …‘L–::Œ…<öXÞZ†QúðFËRéTëS~:]/¾Y€ œuЃpÑŸËdy…b<VˆsyO¼Â©'ºc¾ýR©ñx¼¶áNr.õrjÙº¬T1UqŽUŽr!P×EÎtuŠ5MT¾Ò­x¢ë¡Wp™.yÑ”í/Ýš¼ÛüäQì;np̨uù|^”(š†­;–ðÝc¿ÁU\?æ6–e‘Éd‚pTŠªh4 ³t)Ùùóùî‡o¤mñ©|ëS+r„î<1„÷…tÓÑÀyðE¶|&“¡žòUÉ„2T3ÚW«ß"_ïc½öʽÒêÍXÍ#ÅÌ’Cè_ééá‘ä0W|äSÜpÃÂ?°ŒtдÑ ŽËÙ¤p!㔣ÛJ•Y¶í¢çªr4æÕ¶á;yØìÀk488HƒBMÓHGg˜çy e’ÃòI&Œž‡5—óCþr¢½ZÞ^–É Ëgú^L¾gF$Ò/‘(މþ-¥ÓÙPPêè×Uà¢Ñ8’®.L×åÕ^€5/‘ ,¿÷Þ |϶!7øgîõ¿ržÐµ°ãy²ß’N7G_¤Rt”bF0€—9´d™ŠQTCà qa×êSÆÎS¼I`šbäaºãÕÇóü; cVŒ”œüš]œ}ApÕ¨uىܸŠÁF^^HË’ãn'gýUᨊFE†²ûä¾ °z–pÔù EG/š¸:†ä"¯!Ù“ܲ¯‹)AJ1óDs¦k®ËßøO–.-pÕU‡VÞANý+¶9C‡Ì!•H„qÐÒØ-Ä á†`À |;­¼Q³ÆÝÒü‰íÀ~ ^¶i0ß±²«€3€V–à‡÷ùûü•?•äÑ&Ü Ìw EƒkãÐêÂÏuñœÊIQ~¦Á&ŽÐ@÷Û¹ó]8†û¨Q1ÅþÛ…ï¦@7Á± ‡˜/¾XˆÊBz#å«~–¹-Â0É¢LqDý‘´l;+…~™?9‡ÜÞ¨œt.zÌNÿ:IQ¾·òðü¿þ Þ&9i¶D„8ˆD~ºþµ)Ÿ¬ 4ƒ§¨+Ã?ÛÃaG —,K¥RJ§PLHC qËVÝÍÇßru½‹1{h¤¡ô9‚çíç]›Nµ<ŸÏ¢…BÑ œt4\qü5c®O¥RÄb1e׊†FF™~óÆy×»\Þ‘ø´ˆ#“±d)ÂÐÓ²Ì2“@¹ÓÊǬ¨DèãîÝ»éèèà°ý÷rÕŸDçpJF°GxZ9§¬Ò)q%NÁl¸rFÝFA:ðõB˜ˆM/=]Ib7xÜÛ?)Ýš²}6MàÙcWG>_¦Áfi9l †ýÿº aRd6áõIxΆ–8ü(çùûÊükÑYŽ=ÿoý'gÄ( Y—3+wùÇñsõ× N©§W4qž<~yݧ4LQÖ[.<óšgh&l`ù£rò”¦“e–ú‹p Å8À+KfÛkv•,WƒÑŠjhX!ÎŽu Ú· _Œ:;;) “=¬B1£ì¸s‹^S˶må5¤h*®øÓ“üñc®ÎŽªP4"r²½§žÚAOÏWøÝÊ߉2+|Âÿ{ó>)õ OÄYë?à”ïå/_¹‘øÿ$¼Ã¢QºÃÃaXh.'ĸŽ‘ûÊF{“ð†,¼áå¥ùËe26ƒÑ9º*Q©YSëWC¥ðÅò±çèLÁD+1‚ü7ÒÅP pÑï¡sÜhÊãÉý&¯,?çxÛIÒ3M<ÓÝ\Bœ\p×]<âÇùÜÃP-gžVÍ Eàg¼ƒ+ŸX²\åGVTCà q.ðØMKá•׫°>E£³ïˆ#xù¾ýõ¦¿O.²ÏljªLô]jÕo®¥“xµâ]#²?çîßϧ†ÞCò#À·ïjN(šŒ½ó÷ˉ˔³…b"Vˆcÿ~¼r⨣mÛ8Ž£ÂŸ Ïöãçø%›YrÜþ’åj¶TES±ÁïþÏŒZîº.–eaÛ¶— Í£ â<÷éÛoçkÏþïÞ´¼4L®ùS’*æ6"jZÞÿ~ÁÉ|þþ-áãµ¥‡›Ì9–¥4¼ê“_1§ØrÚiÌ{üq:õÔ0ôÛ˜Ü,¨ E࿺îe½eçr. Òiš¦„8Å„4¬·mÇ>{T8gÔ:5°¢YxÛü~tÎ*Y¦D8E³qÊÎ]èó^;jy&“AÓ4傯hx†?ãÑGyù裹¨µMû~½‹¤PL 19ï¾w¿øÅ;xì±EpââÑÊS—0ß–/Q4­ÿû¿< aÎ¥Y(š8ò¡Ý,ÿðéÁ2Õ&VTKà q›8‚íxÓ¨å*üIÑLl~ô$–³$øœËåT­h*ö~8‹ZŸào½Ÿòxå§h]¸÷Ý~;ëï[É—ør½‹£PL _¯hk㙣æ'?¹…O<.ÜÀõ7Ê#zˆiÆÎi¦PÔ‰}GÁ¡Ãäîù—¾ˆJ  hjþñ¤ëX¾èÿ"R”Ó¢:æÕ»cq\¡È/<#øì8S? BQ ÿùg´¯i„ Ë Z¡h6s mÙql[Ér˲¦xD…bæytáB¿ÿe~}ÿ»9ú;G×»8³šhÝ Ó‰€è Ø¶¬ËårÁº\.ìçº.ù|>Ø.•J•ìSi˲‚ýÇ)Ù'•Jçµ,+8†ã8MWÙ€þä“l»äÜpÔQ\tÑ…è&›„¡§(NѼæ™g8çwò–‡(/MEs³m‡¾´„SÞ} ÂR£ï)…b<Vˆ{ñ—‹¹÷àӂϺ®«YRMÅñO<Áòÿ|ÖudRMǧh>þpÓ)ì<½T¼P E3‘¼ã xojý#GyT½‹3­Œ%v• RQá*ÚY°m›D"l×ÑÑl—J¥‚cäóù`;×uKG‰D0Ð=ã8c qŽã9t¢Þ¶m— Z™¦ì/"BBî§ëzɱä> ft–ët]/Y׌B\¬¯·Þ|]"ô4Ù ‹ò‚S4<ºãðÀ3o`Õ¼CÔÄ Š¦Æ>ì0þ³ÿ],öÓÄãqÕ×STMà q‹ŸtyÍŸ?M‚Wz),*7˜¬È?g’gR(jÃ)O?ÍÑ/ïà;>Áëö¾Ž6ÚFÙ±‹[bÃÑ6öTN­PL{?œç6ƒþúÒå½½*»½¢y8çç¿ág;Ïåâu/×»(@©TîÙ%½ÁÊ#–,YYQ//Û¶ƒãÙ¶,/¢Â•aÁ¤W†a”ÌD?00Wét:AÅbÁv𦕠ŽF'Ðêíí öÅbAt]/IÍ mngšfÉvr¹¦i%ûT+ĵ®ëM^ïl~…â—â#ㆮ‚$b¢©c*QCÑ¿};ºˆØ¥â³…EÆÿ7Û‘3j‚¨ã£ƒåÞÄr»hï8NÉ@h*•*yŒ5ðaYVÉy£ƒ%ÑíÊoÆ;ïXeË;¹ü]'ý»Ùþé~ö¾è'O‚m´ÑI't¿$*îïà”ôû€1û„cýSÚGóÒ°9âZï{šëò>Åø¾ÿàýoz?‰ÓC#Ö˲zãŠ.áèª6δ<Æ­Íÿ5v8q2dpqÑÐpq10pý²\&&9rÁ9llLÌà˜::1bèèU‰0ò\Ñrih÷71±±K®…ŽŽ†V²­†”;º­é±F…$ÝÿW..Éï-—††…+z¢eÛ63 _XÁ³Ë÷±„%œ¾ït¾UøZL+©ŒcÄÈ“'Va~ûòûZŽƒÜ£N­ï‰Î+Ÿ‹¹‚|&bdéH½‹Z5;9„çŸÖXyå±€hXm>t3ûËf.†µ£êÇGz”=‡ï©÷¥¨š§î]ÎÞV%N©¸¾ÒÉTìùÁdÅŠ,^¼˜{£Ž:Š3Î8ƒmÛ¶ñØcqî¹bVµ{½'8ç¦wmâŽ#ïàdNfGÇ.|ñBQÝÅ(¤™øpœ¼ÿoÛ§·ñýÅß'O>˜å2E*ð„’û`F¾[´ºždbtƒ1lÿߨzÎdÂ6˜´Ùòö˜\'Û.yò%Ç—ÛG·­½ªÊcÇ=ÆJVNú^׃sþ‰÷ÿé*þnùB‘n‚û(Ûa&欭££ïݱìkÅXííéünÙðÖ­[9öØcÙ~üöýîÂ}óç³ù “éúJNÐï‘Ðò™wp‚þS%œ`{ÙÝ»w/ ,`ÿþý ±bÅ †††X²d ,`hhˆ½{÷rÒI'ðÀpþYçðÛ'Ë1ÇCKK ›6mbÉ’%´´´044ÄñÛêô{K.¹€[o½•7¿ùͬX±‚|€3Î8ƒ;vpÇwpÉ%—#Æßmø;Vœ¹‚e,cÓ¼Mì8xgpPú^xðàY¼s1+´<ºðQæ= ‹Kv°ã];‚k³¤k ó÷¾ö^¶nÝJ^ÏóÔAO±éàM%Ç~ß+ïc%+y`Çlݺ•÷èïoÛ\ó%]K°°ÐÐXöÖel=z+»ÙÍ+ç½”OnpÂÇNʰõ [ñ,¶-i«jPD®›éçw:Þ9]'~ð`Þ:ôVŠï)ŽÚ®‹.:éÄÁõþŒÖ#2Aß|"¢ýÀJDßòsô¸òzËg§Úw«Ô ŒQN#R—Ú‚<¿åºB9qâ%uùxö ëýò÷Z¥sňºFÞw!—ß!z¢ûG5ŒJïyLy.y\ƒçŽz®êöñAžçyUm9ƒtwwó«Û/åãÜGrÁ̹wŽg r]ôe ÁtÒIŒ.nÉKSþ-¼hcF7*˧rß±ppÊ>Ô.n‰¸ca‘%K‚DÉwÆT,, è #0ôrQM.K“¦‹®à|ò»Êë!T <ùà»gÉb`°„%Ĉû%Ibc/y}Vv¯äú믟1›8®jÿ¾ó†œŸ¼oTÅ'EÑ<ù’ЧQ©ô☨B-TŽå÷>7åÂóx/½ô/½ô‹/楗^bÛ¶mP²7mÚÄK/½Äi§ÆŽ;øýïÙÑínÚ{‹‡³¯ug<üæ.¾øbî¹çN=õT–-[ƃ>ÈÎ;9÷ÜsG ã÷ÜêU«˜?þ¸ú7Þȹ瞈-.”ƒ—Ì[6½…þþþ@”Éf³\|ñÅ,[¶Œ{Ã?œ³Ï>›M›6ñôÓOÇûñ̹çžË²eËØ°aóçÏ/9ïcç>†‰ÉoüÛ`»±¾Ó†làîÝ=ã÷v*\ò¦¯óßK;øÑ¯–Ô'Ò–Ëû).nI¼uëVþåþᳫ?KŒ]]]$“I Ã<Š¥—n&“ BmÛF×õQ‰ôÇêÌÜõà]œwÆy|ë÷Y´hÑ”êwY·UÕKÕ¯R[ GŽ8ñŠõôd„.éigêÌEOªeÿö7…÷÷÷³qãF¾ðÈG™ÿúOsÛÚO »£S-H*ÙöLuª¦-!5…ñ¶•ýsÙÏŠWåȾ‘üîcÙƒ´Í´Ÿ4²6²d‹¨-ËÁéh?L mÒãP~ÿ±@$”}X)´•÷ãÆr@’Ëz‘A Á1lìɵ½bçÎÞ÷¾÷=ïºë®ó6lØ0î¶W^y¥7ÿXÇ+zÅ™*Þ”ˆy1/í¥½¤—œ¶c{Þá^Ì‹yÞÀ„Ûêþ¿rÊ÷MzI/í¥=Ã3&,CÜ‹{IÿßDèžî¥ýåçö†KÊjz¦÷â^Á+xžçyY/ëžáe½lɹz½^/é%=Ã3JŽóbÞÙ·œ=m×z²LƆ=ÏóÎ~×ÿõ´+²^¡Pð†‡‡«8ƒb®på•WÖíÜ“±ã 6x™³®òŽüÀw½¸oø:Y1slذÁ»îºëêvþÉØñu×]çé« ÞÒÐ3=ÓËzÙ’wK”ð݋łυBÁ+Ä»KÕçµc¢vO-¨ö^ &Û6þG|Ú{ý~æé/ëžé™A[-ëe½‚ÿo,Û.gxx8°iÏó¼l6ë‹¢ŽÚûÀÀ€—L†m4Ó4ƒíâñ¸—Ífƒý£Û†QñØÙlÖK§E›±X,–ìÝ®X,ÏßððpÉvÁsÝN®«´ÝxLw;­uD³´+<ÏóÞ~ÉÕÞéÿÔ3ávét:°Ãl6Øšçy%¶«˜=4‹Ë6Ð_,Ï{§÷~¦než-HMÃóD; æÅê]¤q1<Ã+zÅ’rK&Ó>ž±q©TŠ¡¡!ÚÛÛI¥R&È}ù¤§ÐÑikk«ò 3¸NR–Þnm›õÿU*[“<ùª”x“ ™ªF1 räF·üJ¥‚™5MwFÚéÓ¥þQåh ¦ZKÛ5 £. ª}\вãÉÙñ£>ŠmÛ\þ—wàºîœ¨žû11)øÿ -(k¹$m£fÄ#n×®],_†¶¶¶Ž»}±XäøkÖ¬á˜cŽ™‰"VdóæÍu¯¤Ÿ}öY¶nÝZ÷rضH>¸qãÆºœ×®]<þøã<õÔSu;ÿdlÄ5ûõ¯;}ûêRfPöSN½ŸiiÇ2ép=Î?;~òÉ'¹ÿþ_°aÃí<õÔÔö(Ê~«Ï>û,O<ñK–,©Ëù'kÇøÃø×ýWN<ñD.¼0 ‡šékØ(v¬ÊQZ†mÛ¶±dÉ’Ÿ¬a*mãO<‘3Î8ƒw¾ó´¶¶²oß¾·ãz×?ª¥¨öñÔPíãRêmÇò~4Kûxhhˆb±ˆëð…/<Ä©§žZ—r7 õ¶ŸFáñÇ/ìˆâ&K,ãÐCåøãçˆ#ލ[9–-[ÆñÇ_×k±gÏöìÙÃÒ¥KëZ]×9âˆ#êv?öìÙCkk+oxÃêz&Ã>ðŽ9æN;í´º•AÙO)õ~¦¥ïß¿¿®×¡Z:::ˆÇãuÔQuµce?UŽçŸžO<±!®E5¬^½šŽŽ4M«k™ÅŽU9BZZZعs'u½Õ ÛÆK—.­ë;½Þõ*G)ª}<5Tû¸”zÛñóÏ?ÏóÏ?ß4íãw¿ûÝœ|òÉ QÕ›zÛO£°lÙ2N9åŽ>ú誶Ÿ!®½½¾¾¾àóàà ---cnÅWÌD±&¤Ñ§²WÌ“µa€Ï}îsõ.¶¢Œ¹þLOÖŽO=õÔ9?Ê¥Qì§QÊQ/&kDzÁ¬P”SÏgIµU9fª}<;Pv<9;>묳8묳ê]ì†a®Ûd²×aFrĵ··388ÈȈ˜€!šDV¡h” +fÊŽ³eÇŠÙ€²cÅl@Ù±b6 ìXQ¾úꫯ®õI¤¢|Í5×à8·Ýv_ùÊW&1Q(eÊـ²cÅl@Ù±b6 ìX1Pv¬˜ (;VÔƒƒ<ÏófêdCCClÙ²…ööveØŠ¦DÙ°b6 ìX1Pv¬˜ (;VÌ”+fÊŽ3ÉŒ q …B¡P( …B¡P( Å\eFrÄ) …B¡P( …B¡P(sÉ×hŒŒŒË娏q#CCC´··ËøÃòë_ÿ€åË——ì3Öºé(Kt–™.G__–e•\‹ñÎU«k¡¨ž±lX®kd;®U”7S±ãZÞ·Fµãz<ÓŠêQv<U7ÊŽG£êãæCµG£ìxú˜kצ‘4“Fa:µ›9ç722š5k1CŠeY$“IR©T`d©T ˲‚ýÆ[w ôôô°~ýú’e3YŽžžúûûY¹r%}}}ôôôLx®Z] EuŒgÃÐøv\‹2(;n>¦jǵ¼ojÇõx¦Õ¡ì¸òùU}Ü\(;®|~U7ª}\ùüÊŽ§¹tmM3i¦S»9¤Þ_f¦¤¥¥…µk×°råJÎ;ï<Ù²e ×_= Œ¯¯¯Ó4Ç]w X–L“-ßL•chhˆB¡ÀÏ~ö3@(´…BaÜr´¶¶ÖäZ(ªg,–ëÙŽka?ÊŽ›“©Øq-ï[£Úq=žiEõ(;.EÕÇ͉²ãRT}Üœ¨öq)ÊŽ§—¹vmI3i¦[»™sqË—/gݺuÁç]»vÐßß_â¾¼jÕ*úûû'\7UFFF¸îºëJÊ2ÓåÇ Žsùå—{®Z\ Å䈡ñí¸ePvÜœLÅŽkußÙŽgú™VLeÇ¥¨ú¸9Qv\Šª›Õ>.EÙñô2×®M£h&B-´›9ç×ÚÚJkk+ F ’É$—_~9»ví*‰Ù•Ûã®›*©TŠuëÖšy&Ë144ÄÐÐW^y%ííílܸ‘µkײzõê1ÏU‹k¡˜cÙ0̬ýÀäí¸ePvÜœLÅŽkußÙŽgú™VLeÇ¥¨ú¸9Qv\Šª›Õ>.EÙñô2×®M£h&B-´›9'ÄP4o¼ñFúúúX·n¦i–ä0©5ëׯ§½½½$É_=¯…tY¤»»›Õ«W×»XŠ ¨dÃ3²cÅ¢ìxôõPvÜ|(;}=”7ÊŽG_eÇ͇²ãÑ×CÙ±bªÔ[3ijõLϹÐT€îînFFF¸õÖ[ƒ º½½ÁÁÁ`=Ѻ©°qãFÖ¯_a†€aûâL•£µµµD¥mooâžÇ:×t—A15*Ù04¾×Â~”7/“µãZÜ·F·ã™|¦SCÙqˆª›eÇ!ª>n^Tû8DÙñô2¯M½5“F¡VÚÍœóˆëë룥¥eT|¯¼P###´´´`YÖ(ƒ«´n*È„}Ã0°m®Ÿ3UŽU«V±~ýúàxýýýËäXçšî2(&ÏX6 oǵ°eÇÍÉTì¸÷­Ñíx&ŸiÅäQv\Šª›eÇ¥¨ú¸9QíãR”O/síÚ4‚fÒ(ÔJ»™sBœLX)ÕL‰mÛ\vÙe¬Y³†U«VaY7ß|3 FÆZ7ÝŒw®é.Gkk+¬Y³†åË—³eË>ùÉOŽ{®™¼ŠÊŒgÃ3i?ã1“ö£ì¸9™ŠÏô}k;n”gZQeǣϥêãæCÙñès©ú¸ùPíãÑçRv<}̵kÓèšI£p ÏÓAžçyõþÄÐÐ[¶l¡½½}”+åxëš¹S9×L^ Åäit;®E”Ï>å¾5‚7Ê3­˜<ÊŽU}<h”ûÖv¬êãæ¥Qî²ãæE]›‰¯Ã\¼FS¹JˆS( …B¡P( …B¡P(f€99YƒB¡P( …B¡P( …B1Ó(!N¡P( …B¡P( …B¡˜”§P( …B¡P( …B¡PÌJˆS( …B¡P( …B¡P(f%Ä) …B¡P( …B¡P(3€âf¡¡!Ö¯_O__###u-G___½/‡¢Ièïï§§§˲ê]””Ïfê^Z–E½¿®BQs&[o«gPÑhL¥Í<v¬l¸1h–6`£ôý¢4j»^17i–g¹(!n†°,‹5kÖ°k×.,Ë¢»»»nò–-[T嫨Šk®¹†k®¹€õë׳fÍšz)@Ùñìa&îåÐÐ×\s 7n¬÷×U(jÊTêmõ *‰©¶™kmÇʆ‡fh6RßOÒÈízÅܤžåZ¡„¸âºë®#N³víZ®¿þz€9ktŠæ@ŽP|ûÛßfíÚµÜ|óÍŒŒŒ¨‘`ES’L&ë]…¢æ4r½­žAEµ4j›YÙ°b24š7òûA¡˜‹RïÔ›îînÚÛÛY¿~=k×®e×®]ÁçuëÖ±zõê:G?»víbÕªUÁ²›o¾9ø»¯¯¡¡!Ö®][±|¦iÒÓÓÃÈÈ«V­ *ó颧§'øîŠæ¤v"Gí-ZT²|×®]ÀÄv[ëçªedz‡é¾—===¬ZµŠÁÁÁ’å`Ã3QÇ+¦ÉØE=ÞíÕÛÕ2SÏ ¢>Œgõ®'j3O†é´ceÃMù½nf;nô÷ƒ¢ñ˜ ›ž©öê\êÏÍy¸þþ~-Z„mÛd2zzz‚Ïk×® Œá@¢½½žžº»»éîî.imm¥µµuÌò r×]wq×]w±eË–i¹¸æškèïïç²Ë.«ýÅVÔŒZØI{{;«W¯¦»»›žž’É$Ë—/gåÊ•ÀÄv[ëç*вãÙÃtßËþþ~ …BÅã5‚ ׺ŽWL/“±‹z¼Û'ª·«a&ŸAE}Ïþê]/NÔf®–é´ceÃM¥{ÝÌvÜÈïEc26=íÕ¹ÖŸ›óq—_~9¦i–|noo¯Ë?^RÁJêíÐÐýýý´¶¶rÙe—9&-ZĪU«JFKÆ+_KK Ë—/¯¸ÍdËÂØ-ËâÖ[o¥¥¥ef/ºbÚ™ÈN¦b#Ž •˜Uk·Õ>WS-£²ãÙC5÷r26222Â5×\C&“©x¼F±ájêxEãP­]ÔëÝc×Û1Ñ38ÙòLô *êÇXöWïzq¢6s5L§+nlƺ×ÓmÇÓÝ÷›ˆF|?(›ZÛtô˜µ°Ë¹ØŸSBÜX´hѤGZ[[Y·n]ðyhh˲ªªŒÇÑ92õ÷÷.ª–eM{˜ bæ™ÈN&k#}}}ô÷÷sóÍ7b2™äÆo¬™»°²ã¹Kµ÷r26ÒÓÓ4@úûûadd„ÁÁAÚÛÛkò=¦ú~PÌ=jñn?z»šgp²å©Ç3¨¨Ž™¬w&k7Òfžn;V6ܸÌdp6ôýêÑ®W4.µj¯Nå¸sµ?§„¸`åÊ•]7‡††êV¦öövÖ­[ǪU«¸æšk0MsΨϊêbùòå%v±|ùò†Ê¢ìxöP‹{ÙÒÒÂÐÐ7Þx# lzdd„––ÕRÌJ¤ÞVÏ ¢8Ð6ótÛ±²áÆ¥‘Û€Ø÷k†v½bnÒÈÏr-QBÜصkטS—Wåhooghh(=¡P(j¯|©È }²e’Æmš&}}}ôôô”ŒÚ(fS±Û¾¾¾ Á)Gƒ¥ËótØí–QÙñì¡Ú{9)á•ÉlåòF°aÅì¥ïö‰êíÁÁAZZZ*ŽjWó N¶<=ƒŠÆ£ÞõâDmf˜Y;V6ܸŒw¯§Ûާ»ï7ž ײLã½s‹ZµW§rܹڟSBÜhmmÔ ¸¥¥…µkײfÍV­ZÅ–-[hooâ¬ûúú< ™G&[¦(ëÖ­cÍš5˜¦©:г˜ÉÚˆiš rñÅÓÞÞΖ-[X¾|yÉŒ€j·ZÆ(ÊŽgãÝ˱‘r͆³‹z¼Û'ª·e˜ÝDÇëTö=û©w½8Q›”+FS~¯§Ûާ»ïW­ Og™&z?(浪ô¸s©?wçy^½ 1WbË–-ÝÙ{zzT£@Ñ(»UÌf” +jI½ìk¬z{ddDåRLH#Ô‹ãµ=”+ª¡‘í¸ž6<Þ³¥P(få7ƒŒ5•¶œUG¡hD”Ý*f+ʆµ¤žö5V½Ý××§¼ãÒ(õâX6 ÊŽÓèv\OïÙR(3‡òˆS( …B¡P( …B¡P(f€yõ.€B¡P( …B¡P( …B1øÿ¡¼Ùî7~%tEXtdate:create2019-08-28T17:00:48-05:00Ì»ž%tEXtdate:modify2019-08-28T17:00:48-05:00½Ÿ"-tEXticc:copyrightCopyright Artifex Software 2011ºÅ´1tEXticc:descriptionArtifex Software sRGB ICC Profile †2tEXticc:manufacturerArtifex Software sRGB ICC Profile\~=Ÿ+tEXticc:modelArtifex Software sRGB ICC Profile1(‚¡!tEXtpdf:HiResBoundingBox1469x828+0+0‹QÊtEXtpdf:VersionPDF-1.4 G:xIEND®B`‚blis-0.6.1/docs/graphs/sup/dgemm_ccc_kbl_nt1.pdf000066400000000000000000010266361360743507500215300ustar00rootroot00000000000000%PDF-1.4 %ª«¬­ 1 0 obj << /Producer (Apache FOP Version 2.3.0-SNAPSHOT: PDFDocumentGraphics2D) /CreationDate (D:20190828165921-05'00') >> endobj 2 0 obj << /Length 3 0 R /Filter /FlateDecode >> stream xœ¼½Mï&9räy¯OñG‡Î FAò:ÀŒaû2ìAúÐ’ZX©š½ì×_3ÒÉpóø·T•NI€²’O¼1œÎæÿó§ô9ð¿¿ãÿkgûüá—ŸŽoé,ãoŸ?á¯ÿçO÷·ÔÏ’³ýÿó¾®v]Ÿ”óçÌé[Mù*Ý~æó»ãÛ1þç>?¿ü”ZIòWÿ6ÿª÷ã[ÿs5üÝñþ =è_~ú¿ú.ûhùÛŸx!ö3éóÿþÓçŸú?ó•þ5~ ÿ9¯„ûÎÇ9þôoãO5ãlø—Ÿ¾çx-¹ëýãOÿø÷ÿøÓùù;œúÿAÓ ûÿ~JÇçûò2þÇ_ÿ”¸¹_žÛýkŸ°¥oøÑy›ë?Îþíj£õ¿þ\gú¶.ÀþücÏãÐ}ö?ô ì±÷¾þ\¾Ýi|µ? Óͧ>Noü¡gŸ|žÞþü:ÿÿòßÎóí<ÚççÆÕÌßÿãtU­áWÊuà²ÎOªÇýín×}·vñÿëÏOðþ]ÊøñÏÏ¿|þþ¿œÇñ7Ÿøüüw?ý·ŸÇ}ÿçg:Ó•¿õãìµÞåן)Ç™®–¿¥râ—[ýõgºã™~@ÄÜQú¯Ü{VdþÓÝãùôÓ6<¤ŠØTÊYĦpÒöc®æ¬8ü(g)¯G"m?f輿w©­5^Œoú¡½òøvõãHù.?¦ðÜ¡ÈEüØ>2®Fûˆ^Íí##FH‘‹ùªüŠ‘æ>Îoíh¥_•ÿU/ü 2áœË;*Ÿ2ƒòoþá4Wái0êÜ¥¿OÓ×iοô<Ç…ó”Ô[Ní?8Oþ ÏSN“¯šúñÅSÛ§¹å4#+À(vÍ öœïñwî”îbü?Á»OV3úTGoM Ãj)¹ùýdñÿΆտýïÿëÿþü×õø,å?~¸s<Ú‚Ê]îoíFQïÏÊÿ7Nùü§_~ùýŸþôû?üá¿ÿåþÓ¿ÿë¿ÿE#»~5ÇŸ¬xQ¹÷ónHÛ· ?†Ä£½Æ@dWºï#ÄíÌ×7ðº®ñשÖÚñ…Œ¶úmðhÂÕ©wkéì'­â"ÐOîŒ_¸ë¼Àö­˜Pác›mˆ[%ÛM÷op×SŸÕ†'¼ÛoûI½Îq\¹Ïû®ó!ó³=9†à%GÏg¿îgŸÝ•møýVê|ö÷ù­¢4|uhÃ]â!–rÌS]ß û^ê—Ž~Tô»ñœîŒnQfwåq¸äkž¯0îÌîzæoè¥çž¬ wÙq¼Î³ð•¡—ÕÑ%Ñ +»æ ãt¹6ür¯ú®¸ÑbÝ¿‰CÖè®7>À»[w=ñÞÿzºæq¯ çÃÛçËøW;okëè" om'.¬cê2Ûêñ ÏÔº+ïϯ`œ¯&ölë®h;zÁˆ9o½ž=q>tÒy¾ûê-ÝãZøÎ<ëä«Ç§5šð+ü¬FoÅÓÄËΘªÝÖÖØQFoÅéÒ‰ÿµ7;&†xÔ>ˆÑÖù ÜãíaðÎ8ßì²xœè ÈÓ¼…Š`S,)Åoä\¯zΦ†¾ÓWŸÅ%#³‡X[c@™}6\Jg÷^IÌù­œ¸¨k©v Ø”Õg4ðjŸlÎïǯճaî6~­ßø Z—½ÇŽÒr]m$W—E¸Àó>g÷j¿£Õcñ™_xq¹Îeô¨cõXt{`15>«XÞ3^áqßólÈj_=V{Ú{Éì±½¦fâ†Xûíî+À²­¡#æ!1_EÉOEP¿ñoæ¥àû¿Î^y¡?Ý}Þ^Çn²{lN7ójëiXþæ…ÚfPëc˜[=Vï¯ãU!\Y½¾¡ DµÑõ0_è&Öcñ‘Ÿè 5wÚñ®ãé²7>òžæÛC[¿V€E[N=§£Îë,xý}uÙ2D<´ÑÆ8²¢,ú%!‘k´Udék¥á²7f+«Ë"£•vWkÃw½º,ß> |$£­søZ]öfÅh^؆;ûVê³8±(6ŒêÕÁ3FŽVÆ_sXØaV>b¶!c|Â,¢>êñ¡^>ˆ¾Ã,Æs PHHæqùîóË0‹/âÛÕW˜½™h$Dšk4!.̺ü:ÈÓ ³²³o”ÕÖÒ³òâ®=í0‹ÇŠ4×=ŸVG¬qa¶Ö«±‡³ yÎyí0«¿™fe8`[»v˜ ¿yò–ž0‹#9˜ ‰³ L‚/ܭц‘£ì8‹óaHCû¸$ÎGéO§Å#+ÛÚí⬋+xìßîºã,Ú0#ožmÅÊî´n¬¾’¬ºãìï£Ì§‚ỹ0Û/ ¦„ÖÖž¹"ÂÖÅ<¢Ò„œf|Ë)Í6rLþXë˜ïò/^Žeñ¨ZÇ%Ëc9V½0'å—˜Çàx±«W•Á ã‚‹ ²èô+e[¿]•ã0ÜÜ_Ù óÉRË“`FÀk›Ç!ËÚY,;‚vmó›Câšv«é ÛzÛAV¦ ÉpÝ©, ¢Ú9ó‹/lêÌÄ X›‡aÌzrY_0D!Üì ËA=/ÏO5cb¹sYžlG²¦ö¤²2,aºÁåuƼ'“å„ÿ½ƒóÉb‘¥TLì9q²ñd±hËwÅ\ Y[{²ØÂ4Û6ŸðêÃ&VùÉb5*cbu>Ylhkä)«·ê‡ƒ¶öd±8?<»¢ôýd±œ1†ÎU~²X¤Â˜Ö‘.\˜WO+ õ…yÕñ$±hC A9­­=I,ÆÝš1üTŽ”¥>I,‡ „WüÓцÁÊg±'Ço<îÑ6‚íê­i ’Ê4ÇSL®Ò“Å6~*[æH†¶î³X7¶_…aiEWü»« /C;þ„Ï}EW|}™È¿fo-<û ¯øÚñ£‡ÕÑv™OJàÇp´õ'‹•ŒÇÖiBbrarUž,WˆÙä˜9³ ŸÉ“Åêar•ž4V“›ãÁ°ø¸pët$W|EWC F”ás«òd±Ú0·ºž,{-D ã9bn•v‹³µ5ãšúNbü*;EŸ™Õ“Ä"Ø"mDh7‡©Õ½“X¼Lžð€ç†©UÞI,Â~÷hsº†ùí|’ØÆ)&zÛŒDŒ;‰¿y1£Z=ÇU|sBƒŽÍ™Åê±ør1#H3ÙF†‡ÔaS‚‹Áw?c}åD¹?=ß1fÙÖ†üêIbõ5 ­­$ ÃC¾ïù^1·º}‹ÎŠ×5S!Ì­rq¬ â0ã§ÃÜêÜI,oCŽMkðeâì;ÀvNd‚ñmmd丄†h2ÿ™ÕNb1T %Á·1¿aÌ®òNb Að¢pîyªÌÏݺll+œ#¯.‹9FÍc²¶­ue>‘3ãïóx§_ß ²'¢xkÙ®²bÒ²ƒ¬ö¯F·‚,P\®)Ï+ÁÉwË+ÁË-–J ­ï–c|¿‰ÉUÝ9,3ã3F_mȬv‹[¸¨‡4ó7ë³xÒÈIÒ±0µJ;ƒ%ßCR6ûZúÎ_ÙïðFó9﫺óWŒ4’b\Õè˜X•¿âOÎ0&Õº:ØŠ°É+.våî˜X¥¿2Ó;ÐÓÌ—ÑÖWþÊœÉGšŸ?&Vuç¯x”úÐ<ÞÈN`q:|KÉò^]ìÀàÕÑW1§ž’¤W;ÅXßç'>ÖM0\_ ¯s¶ò]k#‹ð¦Ïõ@Þ3@Þ㽘Yã•ä÷¢¥P¹ÞŒß*ºl±¦Œ—1¿¶ÏlLbã=™ŠW<|¶O`¼'3½TˆŒÕ:ÆKÜÜñ(1²¾/ÎËqÒÏxONS2Ƙņã=ÓEŒÈ螟Àx‘0÷¨k®íï‰)ý®Å3^¶µ3÷aü×1ÞÓ>Ìõñ0Ó‹ñžœž"\Ú+Œ÷äôôFï˜s(a¼<® g¢vÏx1MFNV AzÌË£ðU"}3`û`Þó`Ð| ƒç¼çA.ƒ{='#òœwœ #ӆ͎óbÀ'‹ 3¿!á¼øÿx˜è1°ór™àîi//xÎË6†ÇãœkÂy;G¬\mÖ¯œ—Pdð‡ÉV„ó¢ I¢Ãýâ¼D7²,p(çÅé2¾sÎõ”ó"ÂT„ÝË ¥p^D4Ln‘AU{ÎÛ9×Cwž«rÞµ0S>æ)œ·s6€1¦~Áyñ›H»îÔ&ìô˜·FUÄßy ‚y;¡#^›°S0/Žchê¶È#œm<±1弤rø¶××#œ·P&z{sÞx{Žóâ'Þ/#¥‚y;g«H2ëì+‚yÙvç”ídžòräÛÃËžÄÓS^†'묞òrlÃ{1`¡”—ce/\þëŸHyÑÖùq_3@ æícÕ´.-˜·&&» ‡yù“Hü×z˜`Þ>’ ƒô‰˜—©@âò˜hÛc^}ÄByñ¬"¾)/~1 ÅìçByùŒ™í]×j{(/Ú-~óMy™ÍôŒ[2Dê)/þáÍI°­uåí\®½f’¢Œ?Ù1¦ßv•žñò扟Hxñãø.öexÀ˧ŒuϹ¸^…}r€xÙ‡2ã÷ìuxù“ø¹{ð²#cŸ F廉-øiÔø.{dF~gÄNø.û7:ÂÖù‰|—ËØ÷§o¸Øó]<ûÆ¥9#'Âwy\N\˜—áø®vrå»ú•ïòÒ+íùî|ïiqLỸL\¾ú‰ç„ï²—'¼n›ïße7ÇY,iS¾~Sø.;z]¾'}¾ËîŠá¹Ë'ò]|HzÃéôò7K»ðù\zÃÛÐFH½¡ó èåÐs^›ØzΊ缡 è 醀ÞxÓôòïÄšFÅ=è ñR@oÈÍô†;ÐH½aÔË{Ç€SúìC{C ØËd¶vÜ“]{C ØKÈP òÒkÝöòÂðº-ÃyØËO«7[%Ô‹_?ñ,› h‚z9¶¶3Û®¢^½HE½:¨—7ÈÜòþî-” §Ïÿe¼·}®Ïù[™o9:¢ßkW ‘o½v€|!_LÅV./f˜:µô’õ’gÒK“Ä*ò½9¡ÆžGÜ/Ñô^m È·ŒYâ‚r }˘–ÜY ïÁõÁbsÍß}¹¬L˜¡Ð—Ýz\¶ â^¢˜vleò÷’¦b hÆFDÜÛIªHMÛ¡ïMq~k{G´DR5÷Hæ‹§Ú1FÞK¿û _¦( s…? _îÄ;1²Ùµ{䋘ÎI@2©§"_I_òF/rRh«˜Š|1¾$ RLÝ®Èý÷b™¯[™o"ýÂÓ˜:2_±”ù6ò/Æ%ûMa¾‰ßgºz]º_Ç|=AÈW¤ ß6öI”õ¨=òÕ s`¾Hò ŸsYmÍgË.óPèKPJIð\ÛUèKНºôIb'óÅ0]H×úDçÊ|%'Qæ‹á!Ü·}6Â|ñ›x·èÌ3ê óåù*ò [ æ« U˜oãHœnãÜ‚|ñz´9ö*ò%䥸r.>+óÅt›í}"óÅ%£»b¬l“™zæÛ8ñD·›A/Îó.ÝC@¾¸qü¹Ô£ùâ]pcÈ' òmjñÛh&ЗßNoøœí7=ôŵ vö5öÅ›¬­ïx(Ü—o„Âå>ã†ç¾¸…£]{¡N¸¯fÊ}ñòðÀ.Æ÷Oä¾|d–äCÀoã”cÒm„×ÀoÈ ü60d%6±ðËžÁdx®Ú(øåÛ9ÒÖƒßÆnÚª4~ñ ;£v5±¿¸ëЧº4”~q™wżÕX™€_nÇs°ñOÁoc ×m_°/ÌÓmiI±/~ákËû2uë—û"z”žëÂa‚} 0¹[S}Á¾üX1ÈÚ‚•b_^ ‚Æž öeðÉœ–l$ü`߯7R¹±ï}ñ›x«Å6 ÷åB5§ÍsürŽup@®K»û€ßÆ,çZ»5üòQ·º6ª(øå׃lâ°Ù俜ãáÃYˆ\Àïœ[0À/—ûÐö~"ø ÷,à·q–Æ­fS#௠éáR© ø÷åÁ/;%ÓA›ž*øMž¦ Pð>¿|Vø´1Lú)à·0U³M!¿8_F²º`Ÿ_ÜîçQý ù¥Z'%[¦Tò‹{ç¸wÍÜ_Éoø ”üžÌöÔ ÅzòËW‹x|Έ¯ä—cbÆÔá0 íÈ/ÆóƹUŸ±AÉïɵ«%)PòËW‹\ÓVùÕÛò‹ÛÃ+è ¢mòËG‡‡0_€‚_5 ~ñA_7å¦6Öƒ_ž ÷oZ¿Œ¤TÇØš¿xŽ˜›ÝkáMÀo¼1~™á`zµ‡€_~ß÷–Ù+øåp…cR¿|§9¡OÍw*à—/ŽI›K~CŸð‹[Ï yjxü†Ç"à7ôK¿Ì(ʵvP)ø qOÀox.~þjÿyüòöòÝnS{îË»ãÞÎÚßò^>Í´cÊ}q¶Å;"ömÜ·[«¥þûêcìËWšÎ½)D°oøÞû†È-Ø]ïbg¶Í+û2}¤NÏVìûVîðÜS{žsœ` =öE[Å;\HK±ïÉü!¯5fÁ¾8_¥ÆÅöæö ¿©Ø]”ÒŸjÚX}C¨ìËÇI…˜Ý»b_¼EäÆÅDÖ‚}Chìz¬`_Þå‹¶l"Ø—_Ò·µní±/{ æ@WzË{5ò ôÅ p“m·}n(f­å­îeÞ6¦ÔI /¯/s2a`×C_NëpèÏS7ë¡/?† bôÖCßFe_^›B•ú†ç(Ô·B õ"´žú2˜ø‹Ðzê\¡¾!˜õå“DVÂÀú‰ÔméÝœ‚(õå­S³i«rB}ùX1R­¡¾á³Sê{‹¯ õ ±F¨oˆB}¹ÙŸ`¦§y-žúò¥­Uê O}Ûîâ½p_ýê”û2”{m}UîËWK¡Þ5ã2j©HŠ Y|/÷=6÷Mø÷¿úrŸ!ͤ¾ú&n¿´M¼ÑÌaìË™ÁÌáàBûJ–ÕÌA•‘bæÐG‚B Áy:oæÛ¼™çý—i{”ÅÌ!èïÄÌAW]ÕÌ¿‰¸sËŸÀ›9ðá†{,bæ Ð_Í‚NÌtµCÍT&¡fºÀªfñ:½£åUÜèsm´»ˆ_¸cÄx—X:ÄŸô–ªAPK^&¥û¶=^©ïÈ5<ïÅGÙZŸ;àî•…­€{Eù4¾*TÞ;†ájÙ”_}J |9[g‡0¨+_ ¼”_à{2…ΖSà+@Ty/ÊM!ùlœÄW…ŠÊ{q«òk[QÞë‚/ÜëÔ~AâËq>]6äÜ‹áC?²Üþyá^ F‰oâ–/[ÙP‰¯^¦WøzføbxÉ–Ø$âðÅã*–Ïú†½<·ŸÌdÝ)|oä5øÛ)Ú+ZPø^¡N¼¬y˜|E¶¾Üo}Ÿ–+ì¥WCÍ6q ßp:ø^œNÜK‡¾¢C…/·õÞ¸ÁÉ–Eà+MªïåþV ]Ç.¼ÊŽ|5÷7ì ºZ‘÷JQu¯ª£¾×‰—UÞ«1A¾>“IÀ£¾×£¢ïÕˆ ú^QÝ}oxZ¢ï¥Çñëý…À7´=_QΊÀWSøŠÌ8|å³~ |ý‚{ø>±õ%ðu±õ%ðõÂYøòšûÞü¦ß‹ÓþR¾"½ª”V}¯$Aß+\Ð÷rŸy-‹Ò©¾—ö.옔Tô½™ >ÊLªUßÚDßëw-y¯ÄÖ ïÅ- ¡ËÝdÁ^ß›™ŸPu>/af&&vU¯¬â¯„Ð ðÕó™ÀW¶~¯_¨UÌ«!>è{à?¨{/®át|!/ÈËç”é^fM¢î•Í$AÝ룖2^^¢&†©ù“"î½HEj²©•Š{µ× ã \Õ½èum€ñ7ã =Ro¸uU÷ʇ£â^ýpñÆKq¯ì„â^kñò0Ìq–ч Þx"î•ïF¯ÞœJ{õàEÄ/œ½Z˜Âº·^Œ/õ.&êWioáÂ]ÚnsÒ^ÕUÚ«¡P¥½2¨i¯nåPi¯h‚´W#½J{ÈËÂD¥½z>•öj Ui¯v•ö†ãDÚ«;_TÚ«]V¥½Ú‡TÚ+ƒtöê—,Ò^é_ªì W"Ê^n©ìm­)©²×%ÄA׫×a€ÿ¾q!ÀžªëUñ´êzÃ5ˆ®WRè ëåpÆmŸ€wC0¼Ë8ϵüó+U¯l7 ª^I•ï2ÛÃÔrmôWU¯>UõÒ#ŸéRîz¼EUõꮪÞpœÇ»¼²ý«ª7\‹¨zC›Ç»áÞUÕ‹ÌU•O »<Š.°&(¼ÛDÔ«#¸Šziçt/›Jå»AǹE½7WMËt RI¯^„JzéÏ~`µ6‘ôŽ‘éZ¦fªé èènˆËªéen÷Z±SM/­Ò±–hUÓN'š^ŸMoÐ¼Š¦Wß¶jzu¼RM/eOhXTØÓÝðÕ©¦WïA5½´Ô¤iÊoEÒK“Ÿû2Â]nSNÂ]ŠHzÃ鼤—ñiÄ4¢Þ•¦¼ÜlTÔËÉ.|ªº‚¨—3|XS:D½•ú­Þ–à÷ŒWþ}þ ɉz“MïÍð߯Ë}éyoºv`þ5Žêy;”j 2AÏËíN{C€êyƒ Xô¼˜\kÏYóâ°c+‚ƒådÅìwTÍK[«¶w;¨š—‡m€ŠyyX¹k±ä"æ W"X—Þ¾iï ½”pQÊÜ^b^º’ô{(>‘êfæ¹`L¨næjF´žêfê÷ûe›í‚Oo£yÛZˆO/EaÖêT96þ¢^Â…êÒIxØ7X›õ–o#:ÍÁI©.=cî{míÝlW­d—GÓp—©<·Þ}¦S»´'&q_¦îr’C¿˜°‡»W4׿C wé”s—­h¸«ÞB wiÊÅpô•ÃEŒwçÝò°]usyÈ6¨ Û¥¿#_2!¬g»4m'Á5ð%p÷â,z2¾O„»ôóáÚ½­Ýܥˇ’å áá.ÚNAK;ìánlóp—m”w™ÞÒÓ]úEÝù‘*{¾ËÙk.Ûç@ø._,_¹Þ˜-cb°ã†ðÝ‹Cã¹¶ª.¾{Q«­m—ÊwÕüHïEäVé­¼ô}"È1# ¼t¿FÈÀ7<5´ðrª9Ö2×ÀKÃüÐ}Õ7à =]/íàœ‹Œ^^ zÞRÉ«Q/:iÁ„Â4Ô¨÷Õö ^žyþÚ†¡F½bô¥Œ—Vlœq[È™õvÚàrÒ2xpF½býŒzÛà¦ìVŸÞ`Æ«>½xä[8­>½¯†/|z‰$¿õéUƒ:õéU‹ÕíÓë/^)/»–,UT›^º ô¡ó­æåq§}©o5o0Z›^µƒS›ÞÊ/AÓ'B^6qçFþÒ¤·ÒþèoÆ}†Å¤§¦+0Þå'BÞ࣪&½ê‰¨&½øÊÜšmÀU“^}(jÒ«n—jÒ«V‘yù‘Ó“Ó¦äByÑFˆ¶ÔjÒ[™\à‹4PíMzÅ9?˜ôbˆâî6ãDêÒnO\zq¹@hj?uée¦v-°åÒ—^<”Ö>±`ÓKëL¼á{[ø:›^pk4„zƒ^ºHr}ázƒÞðE©M¯ú¬ªMo#lk*¥6½R!ØôVR‰2ýÀƒM/ßÒѹ,ØôÊ®.½øZ¸—Å´ÃÂy9˜‘bÚP—ÞÊžÍgò…Ko¥ƒÞ•§)OpéÅeÒå$]ˉ׹ôª­£ºô6¾åœ/;N\zõ-ˆKogt¯–ã—ÞðÄÄ¥7ázÒËÇBJ¿Ün=êÕñ£^<Ä<]S´zÔ‹CÈ3×¶]A½|äÜmÑ‚zñx(Ñ2£E½RgCH/3ô«œ}§&דÞ„ôªù¤:õ†gåA/»!™óÄÊó¢ÇÔ³îEBõéí/Ð}ËÛÁ!|jÔÛ¸1«ÖÓLu=æe.Gä¿ÄÁbÔ«Ÿª`^¼ºƒ{-¬©Q/ºS+©/á°Ç¼LAŽ~.U«õjX£^~î×Þ##œ—³‡Vì0ÏyÑ sT¦Ây/Úç’ôš\WŒzõQ/£^N*HXß>½œ| 'l˜ëA/÷s!ÿ¹èe¨˜­XŠ>½þYyÔ‹þŠþ’ëò‹ð¨—³tŠ³Ùµ{ÔË„w¼L2Ô¨±äD«mkR§^\æ³WQoxŽêÔ«^°êÔ˽Ï>A½™[uäÙÊ‹:õ2y9Wm!E½xß«ÏDÒ‹/ YùÞï®F½2ÑVÒ›i-ù¨0…ô2Øä¼—Ĩ—Û…½`,¨WÑ„¢^¢«†‡aoVz+ë ÔËlZõ_×mÔ«fú{I³®bt[mzãÆÏü1±é%¿½1&l±îƒzµD“ÚôVN`.ËêƒO/ÇŸcA?E½™»ãöFÕàÓËú4ÈyŽô¶o r»i½úô¿Zz3ð³uNPoæ¶ÍÒ¾D½¼?<•å-­N½¼è{`A½CkÏ—­²«W/F¥òìnR¯^¼¯ÇµUY/ÚÒ|Ÿˆz9 <ßœ8õ KSÒ[èMñ8}é-tóØŽP/§Þš·#”’^üfk{£ô ½7?§¾¶~/é=¾ôrG‹í‹¤÷àœ“—T^f½‚ é;BÛQ„—Øu ú½Ô½„—Úçƒ&×[Â˽buÇE•ðrÍTWÍ5/ál§=’a/ᥬ‹å /BoYB$¼¬€yîž«¬7܃°^ݤ¬—mH·L×Ã^•Ñ+ìõPõ"âaße õê A½²ÙBI¯Êä•ôâý\ô]^Þôêž)%½º#d“^nSäÞl/Bz°1Z[æ ¤—mè¾ËGUm(Ýáó/l¸m­ñm”—o7Ï"ã0]’^înf6꯾ ²¥XI/ãË-vœ'½Ü‹_´ÌAIoã*%æþݘ¹ø6èµëå®[ÖI±|ÂzÃþra½ãnÎÏê›õr|ÁÓ_ŽžõênQe½ñ|žõ6® `2jû¢qÃÅÒ&¦ãNÁïEn”õò'Y”µÁzy™˜-×5n]ßÁ¸AƒØrn¸Y‚t»\ ëå®nœ¹¯åê Î xà¶V*¬—¾3‹Ô-U®c½l«,±0±ìCzÇA{ÇŠ€ÞÑDo†É.ô¾Úè}µ9ÐOç8o<ÌsÞ/Ú6ç}µ9Îûjsœ7\ŠÇ¼¯Ãæ‡9Ê›äMòƦ‡ñŽ–´hâo<Ê!Þ×añ†6A¼l;¹”‘½÷u„C¼ñGx_‡9Âûjs„÷ÕöÞW“#¼¯6‡x¿hÛŒ÷ÕæïøfŽgˬg¼áÎ=âMŽð¾›6àMŽïÆ&‡wc“£»ñÎ<Ýý¢mÓÝW›£»¯¶‡î¾šÝWéàîë0wÃažíÆÃÛ}ý¥c»ñ·Ú}7m²›ØM÷Îù<Ø}]‡»ñ×}7uï6æ›ÕMŽê†&uc“ƒºï¦~ç/û¨Gºñž=ÒÊ#ÝW›Cº_´m¤/ÅÝ×aÑeSw ËÝ3ѳ·¯â BtÃÙ<Ð}7mž›&Îëhnlr07>_s¿hÛ07þäs_G9˜r,7>]s‡)Þýìzö07þ¤£¹¡ÉÃÜx‘æÆÃË}æXn¼ÏrãO:”«ã­Ü×/:’ûºGrãÙ;d¢TЪª9r74yŒ2Á¸ñ'Æ}á8n<‘øá9yŠrw µ\rX¢Üâ¾®ÎAÜW›ƒ¸ñlŽáÆkt769‚š<À×án|ŽàÆÛö÷uœ¸ñRÀ “¸ñ0ÇoÙ” „|¿ed<úcâø­L+„Þ²Ïç\våžÞÆkôô6>JOoã#1z;ü6ôôöuˆã·1¿óüöuŽßŽg…yàe<ÛñÛ×é¿}ý¤ã·ññ{~;ÚÛö…߯äùm|ʞߎGæ Ž9~¯ÓóÛxëžßÆ{ðü64ÏoC¯ôøöu ß¾.ÅñÛ×速˜¸ñJ¿ LøíøIœ¶NƒGoë#½EN;u¡ßGoÿ2ó]®¾gó2|©u)ùÜ6ªÖåü‡›Ï®þòa`õªÖK™_©ø0hí!…¸¢ðý-Å›@\D®Âª¦jˆËà[~Þp²‡¸öî~ší·BÜÂT|;+ÄÅø{±&ú§â2'ùs‚ÝÌb HÕ—S®vIy¶¡\ìÒög[æÅna'½l_ÈK±{ÜÛÓD1.Ť¼Î~¾}ÐøåXUì’úÜk?CPìÊR{PìJíê ØÍô²Æ»=Þ> A”¸»A¶*Š]ºØðÁåwÉ5YpVŒKMÞq¦Ëˆ vé.ãä1.ËÜ”m»ÃU°+ Ç/Án‹V©Íc\]œ‚]Õƒ Æ¥ºÈ挫Ņãry·Š¾%»*9Š«Z¥¸ÔÊáÌŸ\)®”GVˆ«Òƒ—`÷¶²&Ÿq9cå׳œÄÕ2» qƒÂQ n¸JÏp1M§íøŠ7Âpµòb¸ÔäqáªÙßz†{JµÛÛ}W*Ò«\—¬ Hÿ–ëâ°‹»Aò¬¡çåº,¯[Iñæ>âò°âê"Š\÷d@¤ªwšÇy¹®*U®Ëß¼‘nµÙ·T®Ku\x€Ó‚V亘Qqð]ZW/×Åo"-“`Uëâ:÷™mWëžœ cìÍf!j]F‹ZW4I*Öeèêe;XˆX—A£#˵NÔº„ç±G:¯Öe`8¯ë K†xA«ëhªÕU%²juU¡½µºTÊàŠ–¹›ˆuƒTź,[ɧ6…*Ö휑ß}Õcób]RG±naéž¹ÞĺZè[Ôº¼ë~r¦½Ú¹.¥–¬ãöÊeÍÝŽöF¹Q8¬rÝÆ„:­z¾*×U£ÊuµÞµÊuq>ЬLÊuÃoF¹î3¨\— ‘E¥¬*´Èu¦è¸|¥/äº|Hy•?P¹®ÈE=Ï¢âùõ}¾=6põ¶ÒÔº• ¼ä{JPE­«%ÜM­Kaò”ÁŠZ—Éͱ½ˆE­ûn{ÔºA¨(jÝ µ.þŒœ _ˆuUÓ­b]­e/b]ʰñ¬V‹ŠuùvíÂÕºNÞÔº…®g™å’ä:¹.9AÝb‘ëžbMӀϿE¹.÷u¦º½D®{ófW¦äºx´ùØ¡*×Õ\庅y$ñwÑ5ŽXè‡Û’ÂËuYm¹<>¼Ï¿E¹.½©ÒVÊ©\¹JÚ•ëRæËÂjV#\井f,©Ü’ëâç2ÞÛvJðr]^Ö6Ãõr]æyÝM­—ërèG_K³”²Êuõ;õrݰCäº!‰\—_OWæžÕëªÌZôºaã†(v×¥ŠÝʲÔæoc‡4`™ò«b—‘ƒ\úZ±Ëë¯æ¾»O¨‚Ý >WÁ®u"ØåeÒÉÎÜU°ÛXÈénæÅ*‚ÝÊÚÓÔv¿8ïüé'•¿ì†ãD°Â}ìºÔØ v1ù[UT±b(v5 WÅnPS«b·ðƒ«VŸA»üÞ¨h5“Q솑E»ºÍB»š«b—ï;MLù ¼7Ž·Q±‹Ñ .Í¡(v9ç,RÓ­zÅnH³T±‹·˜N¼Å7ð/ÑÇŠr¨d7(»U²«)˜Hv)@§wRzßSèyß‘ø†=©ªØåКÎUðS»!]UÁîÅy‰rŸÊú¤n våL¢× ’bUìJf»ÁÔ²6TÅ.·YÐLä sÝq»ôï«Ò¸*vyÓté™å„‚b_ÛòGÅî5²‹Ûª†Å®ÛÛ«z]’|Ô—Y‹^—ЈbWÞ£^‡$+ôºAwëõº8®Ð—u•p½.}`“yÕëRkèœp½^÷¢W4†bsBÁî|·ªS»„¡»õð}Î+v•ÏÅ.'¼è“sÓ¶*v3ïgۇŮª¿Ïx÷?Ÿ¿ý)#Ôž¹-§ìçˆüÓxålø—Ÿþù{¸2GÚˆ‚Ï?# þê2¾ eÿ¶Sâæ~yn÷¯}B“»Íõc«Ÿ}úõ+(LpììÏ?öü‚ê¾ûzöØÇ{_.ÏrÅzãôöÇzöùÈçéíÏ_œþâÿƒë¸†6'F\Ü@Ôd9z^¶‹—ô_6cïß%ü'¦ŸŸùüýAû7Ÿøüüw?ý·Ÿí™rfz‹I§ì¿úLù;΄!¥ÃÎY—áWžèŽ'ú!r‡å¿rwY¡ø†Hw™ë3¶°þ£Æƒ±¹É×Àá"¤íÇ\ŸLīщÆ+iªUjkgã›~h¯<Ý¢ùè#¢WóCûȈÒGäb¾ê#ÿyPFR\˜ˆsþ†º9¿½[Á¥–wT>>eåßûÃiHw0û?ûÅë4}æüKÏsÐéˆ;¶9ËøóçÉáyjºÆ’VêÇOmŸæŽ§ùONqpE2ס’Âì˜Üî8¹w&œ‚ÿoœàÿøO¿üòû?ýé÷øÃ~ÿË¿ÿ©ýë¿ÿEcççWí7ÌBx­ 挓ÁÄQ&Ušîmó< !¶pû OYuá3…VE¼e1ùs¥f%›Ú¦ü•[Žû*k>Æs2î½ü¸"§U)¬žÞÊHäûpJKNå­DËMÇŒ½]œ ¦¡A(ÖÖ6ãÇÌô:K·B¾˜Y:¥ÊÅ*Œ|µã¾ïË UhHu4Óïàï©Z}Ö¤ŽÏÜbº?ý'h68˰²íªœ¢·¶y¾Û U(Ë ùõ,¢“o¾ÊÍžšs nž¯9¡ æu®¢–™bˆø>Óz­ÔB<ˆŸU©;¦öãQ ± êÁ™óL¦â!ütâÆ0Ae¦b~릗þÜ™†ogêTt!žýèT¨"ç^Þ¹úkö:Vl)K“)pÚ„?á¦qû-ó7o'TI,‰>;Ãh«P%qq»ã²éª¨U®Í UTì€vB.œT%¤ýTåÊ@ÄÑy{Ì\™ŸŒþªƒm¡ z‰õWýÂit„`bý•CaçE&dX\Ý5|V\ŠÙilJc-©L;„ÂÕ«ÆòÖ©~™>]%W§Tá[À°”"`Ó#T ý5SU›­¿2h§- ÇM›Ÿ@鯨=ÔåeöW;¤ri&Ô…Zë¯èËOÁB-ÄNcÙ—ëö—aÛ#TIóGL÷W0ÁBÍ>JÉN¨’X¥¥²N×<¤8¡JaV„6µøügVˆe¥#sU“m-¯ËL„—tÏŸ¤+ѱT Õåï…af‡X‹jÕ `.½í4–_å93U-÷á”*‰1n׺bÛ£TÑl»`võ(UæHs£/Œ7€ÙU®ýé²þ0½:Û±,ͱën•›u+vˆ¥ÿ.öÖwˆe!‹Cÿ¼‡â”*lc­:ƒå‡‹±çSНPáóXjŸ®#ÍßlNª’h©Á¼£\ÖÖÓ ²‰5 Ø,#¢â\A6Q̾]ú1~cmA–rç¬n„6«ÓRzV3»*õtR¦â¬f£R=TeÈq3× šsùƒ¿–Ÿ `ødT…ÂFüsNƒcT%Q¯· m È!+ÜaV2m¶1ŒÏ>K½ò.S*‰Ã ³iLÃR«‚éUi+Ì&J­R7ØBÌød²”EujÃÇ¥p Ì#U9¨3X;ÙØæÌå¨Ýá =¿z=ærb"^F¥àfSɵô_¨‡8v˜Õï€zˆ´Ã,½íÚlÀ¥â\qÇáÂNê ­í‘ªpïÕ‰dz,"ñvR•D ‚Üx2”‚ùÕ#Ua Ø]c«`zõ(UâeŽ-TÖeOBŽ!si,ã¾âìICM>¤™‡tzÚZœ=Iå0£˜‹}Þ;Ì’Qk7ûØ(V·Ãl¡žïrl¸(,)ô$²e ¦,gjmCª’8ÆR?=ã Ëž<‰l¥4ì²}…Å.Õ5=ê{²ÂOø'ˆ5+̲–.ž}o»XaV?•O&K{›Q–zhVºÂ,©kw´{GϵfO†ç³YÁ>tsÌ·V˜åûÆÄ{~u˜ƒ}KO"+”mX…Áá©¡r·ŠU¯îs^HUk9g¾ˆNá¤*¼qjºÝkY¬«’ë{LBWŒå<€qf¤7sˆ•ÈâÞð.ºMdo&]O"K>F÷I袂|xûNªÂ6:ŒDv.O\ܵµ\»¾owâªýŸlJTÐK½fã~’úZ®’¼ü¹UC9/…jçžã çe½éRvÎê9ïIè×y“¦›qœ—[ +b³Žƒp^nÞ,Ì/gª+œ÷ä ˜9˜DÓ^6Ñ1ôš%ôžLÎÇ|½¯6z1- •]â^!½tâ;x{e69ЋϔqÉ{[0wƒÞ“Ì#°Ð{œÐ5Ëè·½˜ÛPaµ*Å+èEŒÁLäX—â8/kw sŸÂÐ7·QŸæ"®œ·“cOÌW. —:3ô«>õ z;·½"7˜2I½ZVmƒ^ÆcnN› 7½RÒU9¯–,SÎÛùÉ5|†võžój%dá¼ f®‘>‘ój}:å¼,‹ø˜§~]9¯ÖUÎÛèZ¸§ÆÊyµˆ¬‚^Š‘’bxƒ^-ê ·QÛ·§( zY—†ÇsŽ‚^×V¹å¼íiN>%Êy…Öè_sg‚rÞN¤ƒÉF·Ëôœ—ÇuÛ:8/K§7 ³H—rÞÆöÜ$^8¯– VÎ˲ñXOwoå¼Ü;‡·JUççá¼a´rwË›óRô‰ÏüÀ•óRFÚ Rá¼8.aúÔN»zÏyÃ{ó¤wäT8¡}ÃBzµ€»’^>ó–ýDÒK«Ð²÷a+ée×sóD!½qûÊyºø*éÕ÷­¤w(oü%¤—…Š1o«F…ôj—UÒ«ê•ôò¥rÁÜÆ¥¨WjÕ+éePqÓ`!½¼”Úq  xÒË—wïRSJzõ¥+éegqRHoxšBzù¹pÑiúmlÒËÎðçHoã$ÄñUOz™êS&=ƒ½¢^w;öíQoÛ¥6 ԯУ^†Äµ¤¡ wj·¹~_>ôr®rçU„FAo#Œ{¨“€^† dG&vVÐÛ¸†ÒצD½Ùô2ÌQa0!‹p^F9dés˜VÌËIYc1äì9o8ÌsÞx˜ç¼|`,„»®ÄsÞ1'¬«ƒrÞй<çed8îj®Êyy:îVž>¯ÊyÙÁþ æESB¸²r ÆyyyiÀÁù—žó†ÛÎnK8/ßµƒžóòUÃ¥óÍyÃg(œ—BÂÀôƼcfÝÖ¶żáîó†^.˜w  my(æõE°òòJ¸b?]ò²Óš»Ï&”—ß .Å29¥¼ì‘Èom*¤”·qW@_;ª”òò8›ã~"æåK kȬQ¡˜—×Y¯µwJ1oÛfVŠ.˜W»PÞp•By5CTÊË«t«[‹òrè;û*䣔—ã§°³RŸR^nËp  By© Ù¼Ì?¸ná*ä ƒ›@^´ÓbÂayC7È;œ8ê"â•p-€—!y¡)àmÌD®µÝH/‡Äk[±+àe·7Þ¢=àå½e¢-»xÑÆlïN3QÀËgI9àœ}(àÕþ#|—A’0sî8V¾K3†sͤïj±XÁ»!¶ ÞÕUñ.¯¤P¥d°Öã]fkiê0>Þeÿvë‚w™(ãáU[v¼«¹¾â]ÇÒR¶$áñ.+tã^³”âÝð¶ïò»a¢YPñ®ÖkW¼«“?Å»Œ±wÎïrkBc™æSŠw‰ _¼‹”±º%Á»œLTÖtœ![ðnxÔ‚w; ¬â–l]Kð.g›\É/vœÇ»Zg\ùnH—…ïê”^ù.3çUÅHø.g>´²Ì]/g)Ü<› ‘zÀº„^ΊÎYéã¯ñUÀ«Ÿðr`|ÄM xù©L¾«5Ï•ïrjÆý†·=Ïwù‹ù:Ì»Gùn§ÝQ_ždÊwù#È'³ÑJ‡wÉ„iŠM(ï²xvYØÔ³]\03}•í*ö¶;h‹ud;ÎÃÝN^ŒÎ“Æ Bà.áÔus ÚÚÜEZB(¼»à]‚2µùïvò(LeoûM‡wùPНk>gÅ»á8wÙÄò@ÕšÞ ìMeôSÊ`m+ Ã»ƒËQ€2eCˆ¿Â0Š8s~¯Ä<ýf÷¹è­ÙHÖk߇®‡4éåîèóÁJzY¢‡[ú'êÒK{í¶úH/Âwq6 {Ò{2ü[ÕŸÀy)6AÒ1³Ázù‹mÕzíwY5弜”sUiÂ=Ñór“âf^a/èEŸ¹ .wAôv®2PðÒóòKÊ,v´ôµ^ÏÛè —Öþ{Õóê@0¯2HÑó2Og‘ñ <çå`LæÔ?‘ò6O–G›°ÖC^- · @À•“…zÈËçŠ!ÁF§ y™éœ»L©B^üG¡ˆcΕ”ò2©ÛO)/ž)÷ºãÅÎËð”·κQ›§¼øÉÚ©Ÿ¨ ?”w±á?[SÞ´ñ\uΔòÞ¬e†‘Ã4ÌBy™ŠfÖ`î‹ä>”÷¦͹&ÍJyia‘ŸN)”·Ðý ›¦0^&̧AWÏxoš/ã•ex…¼¼”=s±KyéÈËÙ|úBÆË¢& ËK@ë!oüÍ yGr|"½6œì!/;pÛv­ yÃM å¥Óñ¬[ å ÁsÞð çåš«EÍÝx zñ½xoöœW»—r^^%&ö¶@©œ—A#o[\å¼4”ó²­îº zuGA/úh&®²Ù…€ÞÊíKi§Ê{ùÈfñ·aƒ>2ŽÚÕ÷êkÜËÎršë¯ÂÞpã{c›‡½ì}û+ìÕïÃ^üµqó¯=ìÕ° °7¶yØ;&Ž©¥òÒðꇯ¬—”Ó|#¬Âzy6æF„õòéSfªBa½cBzV«'¡¬W¿°^´PrNw9e½•µhèñ<›<êåa5¯í#Šz9Bq2l3Ba½côº¶2DX/ë1ác¹L(¬W¾TE½’(éeP»ç\âIo8̃^q"05–½•:¹]ìOA/GQÚ€·/¬x6’8ã%zyß×½wûùÀÜôòRèÚ3U zq\íÛ&_A¯ŽŠ zã-xÐËóÍŒö­âåqéZÖn zã=xÐK[Ýb3e°½|.™%;ú ôêÓÎË&VåXÜÓ“^¶Q³z/ü€^~– æ­` ·²bB-}Éj=ì×çaoã¬kv§Èz‘HåƒZËÕ´QïH÷hÕu½X1ôØâ ^¦¥—Õ‚UÐ;Ö]ŽSAo¥ýµX¡p^ž,írÒ zëH(öbŠx™Ÿ èå$®ìœ z+÷Ãìj zYÌ YOż< ½ ¬ój}Kżñ8y¿Ù¾hˆb^¦ãмCá¼á7=ç oN8/ßÙåGÌËb ¥ïÅ-Á¼8¬QÄÛçó2Ξ¾pÞF~p?pÕs^^{ÞµÙ•ó⸃ÕlÎKÐJ{ƒbÒ_Ïy9áI¬ñø–ð*P̋îŽq×vh æåÓ¿ú’!*æ¥féL9 æå<éjÙLóò8̪M‡ ”—¬ž{ A(ox5Byõ äå£?.f.—% i”hì@^öÉ·`1H /ãLÝŽ” ys”ÖÖP%—oLQë /+} €\iÄ‚yÃ;Ì«ï\(¯N£7å¿æ)/¯âhtZxSÞð@„òâAžôZµ•JOyù)í?”w+€tJ‰=åý⪶ÿF(/»þ¡±m¥¼£¾d]²+¥¼ìyuïuWÊ‹[àöò²| <åm¬Æš¶"U(/î–.&VWÊ‹kAZR.¥¼·s+Y…ò6ÖéàÈÝ'ö”—WnZP¤—ˆ7l½Pʫۧ”òê^ż΀w² V0¯Œ©Jye£²BÞÆ:*èç) ãåQÛÌn ^Ý‘ú} É x“/+‘ÝWf¼£`ÜF*žï޽Ì…L‚(|·»Þãîæ»4ÜåB³Š¾ËjÐŒ×|7ÓPi(Êw‘`cB½¦Êwé}qïåå»ZðTùîÉze>õãÍwÙãÒ#?¾{QŠõÀỬéÇE@êÇ‹~u0=3®*~¼´9yT›êÇ‹/ïaŸOüxÅE'øñŽòÌo°C¼ô`å.…¶›Ä›Hã0Tšý†òŽiÕcYà/}‡ŽYºíoðŠ]ˆ—Ö[…5éÚÛ§€‚ë–×|s‚xi9„TiIüxéÕ™Ó‚¸bÇËÝ1˜‚”ô¶ãåVæÇ@¯x"*ß æ¦êÆëT…î’y•Æ ^¼ÌÓðþΔ?‘î&>žÄe“üНxù3^1ñü f¼¬ìÔžÒ]úb Æð43Þ‘?­ývÁŒwÜй \ÄŒ—Íù3ÞÐæÊF_Ä:keVéîÉ…` -Ç=ÛÄŒ7˜øšoæÌ·Y…ä`ÆKqFÖËH¬˜ñò\‰[!?‘îªíS0ã¥kÅ#Çup—~cg»m4Q¸«^DÁ·rªWR6²¸ñrWûIû™7ÝUl¥»ÁÈKÝxÅ;¸ñªŸšºñž¬«ŽIÁœ]ªï(y4Û<*t×2 tÏL^è.¯³PüØù}èî9j§åå*t÷¤¢nš–cBwÉñ˜øLÒéáî9&¡m9 Þ=‰pÛª^¦x—Wyä³.\wÕì]ñ®YÜx—c#¢Ë2¼«žQŠwµ@½â]’F–Ç›.ÖÂwÙ”è}ÚßZ^ &f¹§÷™ð]­Ó®|W«ô*ß=éÿ“²ykÞe]a*#ó5U²ðž¬‰q}ì¯VTÂËx?b!¼|©åѽáåùÙÛt*ÂËÛ#H¿Ž·¯ŽüŠxµ\»2^þC$DíüÂm··éϨ~¼ôŸB°´Ù‡¼™[gþ"—Å?¹­6]«MKG»Ž$Wýø òJ½fe¼Z!U/Û¸ºf~áÂxùL¹Vg®Žžñ²'÷F™•‡0^\záÒàu¾Å¼hC‚VmÇ£2^­é­Œ—5M‰¢§Ÿ»2Þð¨òò­qmaAWy/Ò¼wè ä½Hj¼-7<2¼ZþZ!oøÈò²R¨`³¼|fÌ¿fn£WKè(äÕ¢¦ yYºoù6—yµ0¹RÞðåeñÏú¬r æep@й¬EæÕ"ÉŠyc›Ç¼™¥b¯%¬R΋NJa£K{9òj±y½ì(ÈÒR{Ыµ£õ†oXPïEø¬õ ë…ÑÏn¬‚z/ *2þ/ÍŸô¨÷¢Â_H1/zµþ…¢^­å¬¨—%g‘­u(½á;ØËó1Á_&¿ö²Ö.MŸŽ·/¾Ï‹ ±fj#¬—ßî,x2ó¬7Üžg½çøw[!-¬—á+ee]W:º³ ýÚ4§°W+ /Øð°÷"»¾öãmöjd½¬nSó¶éØ«U„•ö²h.wW/3‡®¥£ñüÏiÒ/¬—ã¦VLYo¦AÞãû"¬—myÏ•õ^ç7K8…ó^,Ž´ÆÊ?çÅÅwf –_ çÅq7δ\Zô^|iíŠWÐËŠ´”ÞÛf½™ä)~)oÐü½ôâ|ˆ®ÈMúÛ©áâÎÄg̓ÞL„‡Œ èe$V!³mxzq •QÈ„BzP1/ØjEz “̃¬× éEÛÕÈÍÒÛŠ—SJ –Ø“^\> Ù’ˆž—‰ñÞl¯¤7¼!½™!àØÁIH/GÂcÃB½…~ŽùÚPVä¼|«±‚ÞL #ΘúÛ­¡p¯õu§åtàAoáîüL6èìA/w$#ó°þ¬ —Û>Zímɇ=è-4ÎÞ—‚œ—IU»-« rÞF9νŠ=èÅo^Ì@Êf¼Ü‡A›»f€Øƒ^îC<ê,KÕ¼^¬j^FšŠè>PÕ¼LßñÈ&´j^ªV+Š¡æí\,[ ð{Qïñ}¨7qÚìž±”åèâ'饲‚‚~ã$Bzuç£^î(<·O´’^¦Äp-URÒË`š™[š¹ƒ'½ôÀ=ÚQ–aí¯%½¬Íp#ˆ.šëI/KAPØ•_ ·Sè£ýDÎËZ¬M>}Á•ó²(Ç÷ûÍyi+ÓKÙ7à0/-¤Ï „òÒ‚âqàRÊË"¹¤ßåx yšž §¡­§¼´B¹ê²PQÊ{0üí¥¼Áú"¡ìÚÁe6Êx¹öØL,ÆËaÀǵOd¼|×ø lFŠ®qSÛjo¼\Ð`´™V”ò’²—¶m`=奅 «9LuŠR^\&"Ôu-—ay~÷Ú0¯w¿5×´—r^© 8¯xÕ+ç=Ù'ÏãjVÌL8/¾Lúv]¦Žõœ7TËÎKgö¾·w(ç=Ys1ï5Ἐ`%J"í‹ÎjØ çÅœ4å)ç8ïEµçæhà8/MÕm ÏyiúsMl2é«ã¼Ã/Æ$tÂyÙFk"Ó­bññ×çý¢msÞq¦z­sá¼l£…¯õyÇO–½ßÞsÞq—ùb{Î;®áiYø:Î;Žc"[úñœ7$ ç—BûÕåÓë8o¸sá¼lÃSLùrk÷ œ÷uœã¼l»i!k®žó†w*œ—m `˜hÉsÞq~`Éö<çÇ!’Î9µ§¼Ã{n©Ë8âÁ¼ã ®6_ñ˜7X¶ æÇ%³çÈû:ÊAÞ×Ù&ä¥ÿûA£)CòާÈImwó÷Õæ o|kòަÆ-3SÔã!ïë0y_mòŽÎ|ìõÏxãé<ä ŸŽ@Þð“žñÆnàïx(ôë1kÏxC¸Æ;‰g¼#t±(Ù2÷uŒ—mézÌS<ãeÛÁ²¶×nÛŒ—Ž` dqjÇxGÛñì#÷Œ—¿‰dá¨6ówŒ7ÔÆ;sëžñŽ6îÔ±y³g¼¯ãÈxãóð÷u}ò†!@ ïë¸ò¾swXÉ+æáë ïx5¬|”ä¸=^-òÆÇï!ïøM®œ.Ÿ[yG]|MEä!oì^òÆ[÷wt¡gÈ»‡¼ç˜M¢Û~yG?yJ¹ä¿ymÓ'¼¯çâ ïëZä^ynÄAÞø<ä}µ9È/ÅCÞ×qò†xâ/2%êËw*å¼\@Et<®7çÕš¸óJç—ž—˜•zØÏKÏ+Eq•ój]弬÷Íúcë7EÏKçwÜz²ãDÏ{ñ I³LµÊy9 jmO‘󞜽>åǼœ7q>u p½œW‹9/qk]ÛÀ·œ·ŽEÖó‰r^rv´0ký¼ä¼™½‰ú‚/ä¼üJ‘wß×r^þžêñ*¹†û:¹}k9 {9¯Vµ}ÉyYXl¹É¨œ·°¨†ê©¶9/»2æ»VYÔ¼‰å wž’VQóŠz8¨yµ”´¨y/Ʊºv¨šmèt˜ öjÞPÐWÔ¼¼N ÓkŸ®¨yI¹Áîþš7^§WóòåÊ&ÇKÎËÃÌ(è8oØÜ伉!¢óÄÊ"ç%Su•[DÎK[(ÿº<éK<®¸Èyy‰rÖ¤Aä¼ñT^΋ÿ(OÕK‘óâAÖ:Šý¾å¼‰Š½]éBÔ¼$»Ü|g‘EÍ«]Oż"jW5o¸HQójµbUó⸻2k~;ó2Tâ[]REójÀP1oèy"æ 5ÚEÌ«ûjTÌ^·ˆyÃ)b^~äôNžaJżø¾“oÑòêÂjy9ðЃº•çåÙ²+Û"Z^ÆDô»8Î;º$×b‹•3ób^~lÜunÛYE̾6ó²ßu$¶eÕ‹yC±xó²ïbR·|ÈEÌ Ы˜—;ƒÆ$þúÎ;ög ­os V1/s«ûjH='ÏôbÞPY\ļ'Ñ[¥ú|¹2Ê Òýâ¼cØÄGÇŸ‹b^Ú5â"­“ˆyñl*úšZ«˜— ˜µ‘Ï·5/÷´<³LóŽ@ÁÍ¢³§‹˜—ÕéYhÀ”þ^ÌnOżªÉÜ×¥b^|LîïT¿órZÖ)Xφ“HŽ›æòb^œ×|Ýç劘÷$}Ù¦«*æM <Ë‚Nż¬îŠÑ¨ÏΠb^Ù¡ļ'³ß½Ù]żTxqÃÎñ6è=YkAÒ>pó2ÞÓh÷¼Þj^¾8¼´%5o–DÍ«5èUÍ>Qóžä"}æéDÌ{œÉ¢¥ŠyõqŠ˜—£Í•©”Yî š7ìÉ5ïIjø˜ª«š—¥ºŽ½ƒ^Ô¼t¶ÆÓ»æ¶Qó†m>ªæ=Ù¹¯bšRQó²?ã@Û,²Ä¼º;Kżœ m«ÑòòÒŸòDªå úl¯ååÓÀurªõ ¬wÈE˜[É?Õòrž‚Ô3EË«òUóžt4}ÌŒEÍ{~Ú‡Oª)j^úÍœyV¢æå€îVáDÒK9Œ[’Þ̱üDÒKËþcM\^’^ÖÎZ‘Jzépc°«oIïEGSš+/ÚKÐiXhyˆHz-Œ/ß•ôÒüfÊö>ÿ$½,ÞCfàÓKz3kÜËXV%½Ëï¶§’š—ôŽ‘!¨ÜŠÞ qE/Ƶ‹ûDÌ^½ s,ýde0TÑKÃnÕ³šmNÑ›Yïö©*ŠÞÂ5¥]ŸY½ä£‡©4D΋Q}ËTÏ«gÕóòdi×nz^šð®óÛ¸çë|Âçö¼™E2ð|ok=/ò:€.°Øóб²êy9.#Æ÷T9¯T rÞàêÜyË5Ô”Sæ,r^’¼½-8ÈyYШ/!©zyXÛµƒ —K¨ûWA/ù&·n¬Âl^ÐËòÇ® 8½H|ÒFôßgÏûÇŸŽÏßþt“7_, Æ‰Ì9>3þiZ£qüá_~úçïÁÊÄ_Ñ ŸF3üÕe|Éþm§ÄÍýòÜî_ù„• Cë6×p‹ ÓÔ¹X÷×¾&ÚëæŸìùGÁµuö?ö æcï}ý¹°®ÓaG?ä ÌÓÏ?þгÏG>Ooþâüóÿø\ù*‡½†\€ +\N¸0Ôµ‹Wô_6÷ïß%üǃÏÏ¿|þþ¿ 1ù›Ï?|~þ»ŸþÛÏ¿öD•îøyŒËç¯?SþígbâG‹AnwÍ¿þLw<Ó‘;,ÿ•»Ë Å?.Dú{¼}9Ï4ÐíìBÖ?6ê…‹¶s5~r¼ÝŸü#ÆJÖ?ÅŠBÿp1¾é‡öÊìÍDá¹C‘‹ø±}d\ö½šÚGFŒ>"óUùñÿä~ç̽ÆÂ4õ¦mXÎå•O™Aù·3zš«ð4¤tH÷_§éë4ç_zLé)-èƒuþùóä¿ð<•륣tøñÅSÛ§¹ãiþ“SЩ©gÌ$Óç&?¼1«"! gàÿ¿ÿü§_~ùýŸþôû?üá¿ÿåßÿôïÿšÿ¢¡óó«v#ÞÔAø¸©»#<.Yi›øàkìKKWÅ6W=š_Øv'«)ùêÑCº±¬”j:}õhª=Kæ†oެ®z4u7T3Ùù8žì|á73ÂÌ.À§/”Kï›ïWŠÛ"_)…xœ™¥’AMÕWæZÕ}ÛàR©qxêïIõq¶¹ Ò|Y[’Y‡[ÞŽ°\§Û.R•n…Oý½½—*®rÅrãý)R\J»z’È®ú{z™'•V»wŒn9zå‚ÌScËöݪ\äy*ðQKPhÛ8n–^ur":¡K‡LÅÁSRèÞLýY‡OS~z,ëÄšiSeJýTàã¢ÍÞƒÂÕÖ/‘:ˆÍ÷ùäz™ø³RñTàsUÒ+uOý=©[Î6WAZʵWê ’ °¬NgZ‚z]N¨¢C†_CZCi.`‘”Lñ8[ Z¦mÃJW™§¬>2n쳩ú ÒfèÙŠàT곞ª"'—¸ñ) ýÑ—Agª7§Ðæ*Hwf*—“­ù𤩱ÒÜѸÌô‘z65Ÿ®°ˆÔ¦ÁŸ|a‘ÄåƒåMÌ6WAšŒ“›+æ×Ÿ³¯ Íò¿K3¡ /ï pu.FU.ž=¤ÅÌ£—^iL¶ýÙæ HëyÚ£R -݉Tp!…+Ƴ>_>Z;OI¾|4M¿¶ Û‘JøÚÊéËG‹ûJÅ<֛̕+Ù׿î~$os3"Þ£/+ràsnÍ’T¶¹²"LÓ²îF—òõ£%S¨¬üþ$°ÄŸ”ÅÌq,jO:ÐYccnK¯Ã¤fg°ÚIJ÷eE(,eäCÒ}ø²"T¢m¯­Ê•«'ƒ=èY±L$*u.ƒÅÕW“dTÊ ž V¤òõξ®ˆÔRb›+,"Ùmå g…ÆžJ=•ïøI`¥Db½Ó¨ ÿ:èPdÉ;þN`©cÍÛn§Ž l•m Ø}eêc02L«öÊG÷d°R\§ò5>µEn® b3ã®ÚÕÁgI×µ¹ìÍ6W\DäŒ_ÖÆ«£ Ó.ÜDEÇ}âåÛÃ) u˜yØZa%¼l{„*_äv)l”×­L–"+nò¼Mñq9¡ ¾øDZ3·Uë’©%«/Ï€Õ U¸‹¥^KO7V':±-dþ¾½‰«„ô²%QA/µ0­ÿµšH€^7lÐK;WJé 1zÐK³ÚG$œ—Òùg—p^bå»îÚŸÂyGëËqWÇyϱkฬö§pÞ“zÓ¾ê< æ%•omg‚y±'ž/V0ïɽ@´QKv:‡yѵ8¬/‘¡`Þ“äÚEdóŸ:«HÏyOnJ¹)˜µßtœ—%¾‘}¯áZ8/7öí+˜—?‰ï~k¶<æå¶ÙÔ7ló˜7nîõ˜wl’ækœÜØA^•Xp Dò3‰y¹‹ó»5ƒÈ«îÂxƒA‹0^ÚO0k»Ò¼ãñ¾Žòˆ—c%¿ÁÃÎæ/£8†²Ù%=ãeyu®sX<ÆË6>²µ,!Œ·3v§Ýï<ãåq™^sÛ»2Þνkxsæ/Œw”zoyÕ,SÆÛ9åºÛ濞ñòÞëÑl¶¤ŒùgkO¶@ÞÎq˜ÃÌÍLJyu¢¢”·s¿öö(UÊËÍx´5´$S(/ǘ³®zîByÜI(oçŽ(Œ¶– åíô‹é«D¨bÞΉw>[&äeæ‡ôüòvî+Ú;Óòv’»½™R!/Õ(é7“`OyÑe*w¶ÏÔF1oçÂÖÞ,£˜·³î@žÖã yÙ‚Ž7aš ^&¼8Õ\€RÂË•ÏR%¼Ì¯ï-–UÂÛÉ#Ú*š«„—¹7kÕ^F›=áåSFˆ³eGE¼‰ëŒõâåîÐÜö4WoI²ÒŠx)x&ÄJF¢=⥆šEŸl¦.ˆ—_4]D ‹ âíÜE¼6ñòÖÓ¹ÊT+âepIsƒã'"^\æe(è/N×𘭸àF¼ß&5Ó/B/wæâZ@E¼¡ã âåjëu-óNA¼Û#11 ì µUpL /Úf•Ò9-ÆÛi+Öª¢2^>cN˦y·2^FðÌ}îóÝã ïF/³@G˜{­”ñ¢ o÷2u´RÞ1?ųA[)/û^ÚæÑŠy5ÉUÌ;F“m­˜—‰3¾«4©˜—÷pm»mżš+æe':’…^yÛüuó‰PÈ>¼| tîµe*¼¼Ìå¬ÔªQ^ž¹LŸÆ~Jyùiä“0e^„§¼„ ô¡œÓF¥¼á­yÌËë4j5x"œ—׎ÌÇÖ˜•ó62%|¾Î…ó²#°ds6¶ê9oc%¯²™•óÆã<ççóœ7ttἂEyùÑÔŽ«˜‘W0oøØó¢rMxÎìæe¦¼*U æ ñP0og?Ü[-ó²í)ꦘW*a*åö ˜……Ê>,¡¼} Íå®”7D¡¼a˜Ê.ÓCÞx˜‡¼!Ú äeÆðlYWÈË'}"¶K /G³”Öº0Þ0‚ ãåS¡O}Å3Þð= ã #µ0^¾ŒîkÙEoèéÆxù‘ÄZ±e¼ìÌ”-xí/Ÿ±Ra¼2rxÂ!¼L3%¼<æ2sƒŸ^&í³vù›ðr²Äúͬ(„W.Rø.§ª¡ò]òšàæ7,|7ddÂw9ýzœ¾•ïòGX Ýb½ðÝNÇ“]öS//l™+ßí4¸Á«Tõ|—ùm¿Ð ',¾ËY=ÕçS¾ëg@Bwù¹q¦_ì=Ýå³,ýš Fá®ÎîŽðqš¡ç†»œ+ÞW³8 w;÷â«€‰Â]N8u,X+ÜÕ¹·‡»cê]³®Í}¼ÛY²‚ÕêÊD•‡wœc›¼jÊ Þe[zLòïÆßôxmø² ‹‚wvà~Èû|ãÝp{ï8²kñ)Ý哿îýéÔè®Ð¥»|Òü¦ct »t˜ßß'Ò]þ&N`9Ò]~,ÒÎI¥Ò]¢ –¹Ï›ü:ã9}}Bw;Kø–Y Ý^‚OÍ:ÕЫ¯­ÐÝàb9Tøèïi£1ÃïÛ†ø[ç^˜—Õ Eü˜—å$lÜRÌ[h°%{‘ó&–[a༤Ey)ÑçmĈK8/z7Ý"æûPÎK“‰gûóôÒ¦;}z‘,ÒýÔ¤Ì zz­¤ý›zùvÐMp© —0€"ïÅr…ó¦§@rÞcŒÑ§-Ë*ç=X})pyëy罜PÖƒ^¸ÛBÇ'êy™M>5‚ô²æ×6³  —&xeÕN ¨Wz¾êy­Óª`·õ¼A¤-z^Í‹TÏËAõñ’Q=¯®ôˆž—9Soׯ¹ÉgÉn°W9/óAGÎ)…Êyñ¼ ú͸U΋»)å\ÚIE½º>©r^^ Ñý•VÛƒzUСrÞFoLŒ› êÕEgE½¸ÎŒ)i½ìZ<êåþ=‡¦OD½õ|<­« êåÊ2^€¥xŠzÃ"· Þ JÔËe`Føl¢X‡z™lcFu™FP//“›Àç ¬¢^Nkú®g¤¨—Ÿ ½ŒL[b°w:c”¥ˆØËˆ€Å2y½ˆéº!$½•4ž‡½ÃÈ0-D¯°wxv;HP¯nPØKIÍV"ö¸—n!,În;á½”44ÌumŽ©Š^[¤ÓLÌ…÷âb fuÝ÷™  zñð0E·µã èe>º ;÷2†³ö«Í÷2Nãºlâ-´·rÝ ¿bøUh/…q÷¹)žÐÞÊe½r™£¾Ò^ ‰p§7„öVV¯ÂH×¾ ½7—•·ívôÒoMÿDÚÄ“‹ö†‡¥‚^,&OU5/FlÖD6-“ Þ ±ó¨—†¢ôA5Y¢ªyý~‰ æUU¢Êyц—m1FPo¥× òÌdÒ[zé—Zî=ÍU9oåd³¾ UÎËŒ7/#Ñ—œ×?aôÒ¦›ã¾aYzƒÊS½ôuÞ[Ÿ^Š^/Ô{4½Ú^ èE$¦¬©þTÑ«oV½q¯í$°—.À\eÏ‹ç>¬÷fE¤úÕð¬zicº ʃžWã&èe„©Ýº:ôºÎªj^ F5¯™ŠœW*4=/³½] èyU!¯z^Ý«"œ—åøòž÷=/—µ+ÝÕÞœ—û격r^<4DÖ»L¢)˜—5ë¶SÎË"ÇvÅrÞpëôjAÈ ç=Øy¸d9oÁƒ^†´^ÇãýDÐËW^wI½aûŒ€^zVS%`cœ½ì)}—KVжKèõ=L(oè*‹òrË@Ýå›”òÆK÷”7ˆÿ…òÞç°ùXkvžò²“»$‰RÞ›x{Fª”7ô¡¼|M,;iV0/]äîíߥ˜—~AHtÖ*”`Þ°qF0/3mÇ3ż $txöQŠyq˜M®‚JÊyæá¼êk¤œ÷¦§àžç(çå?¶¼C9/7êä]?AAov;»ÐvzoòÄ]KAoØ  W-ôò7Ÿ'mœ7ìçÎËaŠÖɶr*œ÷¦GÚ]6²õœ¿yµ]ïMH/!MUlö¤—ûî=«UÒ[¨zBzõ¾ôòäfµ^o)oØ '¨7Ä'A½ürÑæÆ2½aï’€Þ›f¯¹ÙFh½¼ºks(èe,ÀÀüæô²mÒ˜7éeÞ¼¹1+é¥ózÀ™^ýŽ•ô²sÝøúçµë厮¶+Ê(ëe$fZ_ èzØ«»QöÉ\éùùq°Wƒ¬Â^5BSØË¨ß¯e*§°—ã_91Ì™ Ø)yÙ…h ;‡1…½…Fò[4¤Z^¾Ôs`T-/?c×ÏSØ«;_öj‰bÕò᱊éYöêoÕòr{+··gÓÖzÚïÁÓ^þæµk“(íÕݪJ{u¿³Ò^:¶Ñ_| a=íå …°z ‹w…½¡Gˆ”WSm½ÌEÛ.­©R^>M ùfñ¬R^Žäe‡Q)o¸»sæ:øîW!¦ï…½Oé„ÿH/¦j'OÏ$!’^>üã@êÜ@«{­Ÿ éÕ*Á¹!tÍÖQƒsÇóþXˆsÝW1œ,Lêt-RSçz¥—eãœ*¢U¤HÈEé:oIun@¾BGäjn޹췷&¼s7dœùÚ²]±nà$é"àbÝ@3Ðbó,un Aâªû”¢ç:▵ꜘ®œK8§Î 7/yWç)Á¹×¹ÊÀ(éÅcÏtS6/íÜÀõD0îoÒ«ó\%½ôÄ¥"h¶xÎKØ@AŠ!TÏyñƒè-ü2?ôŽ:*yI¤‚o'ô×ZUÐ[i]¢P½Ì¨O¾÷þÖôrK —oMo ô„‡¸·<Û<èU ¥ 7ÜŸ€^~I¸b›[ÐKÃÚcé6ôV™¼¤c zãozÐË‹(Líw½ƒIb6Y¾½,yÑr>ór:?wãÂyôT΋8qn…}pnŒíÜ@ÓX®«Ÿoç>+–Ø´Ý]Êyå¸oÇ £:7„3yëÝØ-œ—›·q²…••ôÊ« Þ ºZ½‚„'½úøôÒA©Ó„¤bÜà·º«mMì-˶ xrôð™çо í^x‚o±xÃgõ…¨—ã×{çšuÀ¼>0¼(/®b• ¶ cÞ’l 1Ø6àÆOöñOd¼Õ÷Î e¼Ü {î•ÀÀxY ³{Û{¯¿oÇx§· Roغ1¯§cjÚp²b^î£^2mÁ¼¼³Š`Ἄ“4è^Ì2p^–b²ù¶b^NX޵ë…y1Îâ÷ítórÃ7†·%TÌ«Ÿ¢`^õ3RÌKo†£=ªdÁ¼j¤ï›åÍ,1pÅ߆p{Á·áº÷êî‹ò&¦ë¶gZ)ïŰv"EßôÖÛ6äÌNç1o¸Lá¼! NÎ>¶hÜYaAcA½è%˜g¤ó Iox5õòûèåkIoˆ„‚z.®¶÷1êÅ-à*·bNP/áv®eí€zYth—‘ ¨·q*ƒLÙ´¹žõ†µµnàJl¿ŸTëR¼cËØ”õââ¥6Ûã Ö tF¯„سް¤¬WGhÝÀuùPëµ(QëV_ºö>a½>ü*éõ#’Ú6Œè‚¯qö0µmЮ²m¸À¼Õ Á·!´U_FZ Ö êeÛU÷t[}Bâ®M êÛ eðmÐqE}Xõç:¬ `ðm A]ÛÛ7Ô·AÜÌ‚o l ;³Õ2õmÀË8¿¶ÙaÕ·!܃ø6±åla¼ú¾òÒ‚1±âÖìryU£—³n– ¾ßb^:p”½…XopçÆ+gŠx™Gs½+½µ¼ô ¹1­³ÍôBxu½jlå§z5TÌÓ ¯q[ ^ z˪ÓÂkËVðªüE¯2¼ê·"€·²SZEçð’·!™¯…Þ`B!€W¨Mðj`Á;VñÙ ×y5ˆ`Kù./…Ý®x5HÈ ^ ´p!´[¯ª;• ë”ï6HæâàÕÀÌ*­zCÊw/øâÍ!%x5Ð˸/'¼àÕ@ØQ7‚W¯Œï|𭝶ð6–¨:N+x,€·ñsGòTÓKÍD` xéX‚§‚wô¼D[®cÞÀCÝ÷95$§åýM®¼¬ƒ”ŸÎÛ•—áXImâ[A»ô“ËÍ\^íÒUt{´ËMµ\ ¬«íA»Ü³F/§¹”Ðn@´í^þMÙ¬Xò²Ìá.´À.7Qã:ûäÖ vù1¼™ŸªZòŽ’l« ¦‚ÝÊJ5Åê9¼¸.&ñuɪ#¯ø®Ë}ŠHègv«\¿y`ŠT§‡R»•†‘k}îvjÐÍ5YÁ®„h»¸Unúš²vYj“ºz°ËË¤ÏÆÙÞ^‡P|Ø*Ïvš*ؽYŽ<™Tævòm= ]}Ê‚vE¾Ø®8w¶[¹ôX.¿Êv) :–”$°ÝLaÅÜ5È.ãD]E+ÙErÄ=9=½yÃ+U²Ë¬õJã ‘]&cS.ùy‘]}`Jv+ÍC·Ò[É®ÏإȹŒ‚‚—“们ù¾‚]†\ÿav½v¹\6ªƒÎ6Gv9„>z »Ü ºüìÒŠøX;•ìª6{]ºø­d®¢*à%ùLœN½Ý¨å´ý ®«c¼p]uþ~éwÝ‘r]¹Ôiä­î}¸®:s)×å¥ô5 ¯ŒðÊuCr,dwÖØ»êì“Êv+ rñÏŒ<Ûe¹qÌñ–”AØ.'4©j†­=Ûåt #ÍšÈ ÛUɳ'»ÁNÐ.ºêÛª!èµ놫ð`w$Üeîìò½aF²fšvÃ;°Ëé#-Lð²À.ÿºÜ[f%`WÍÛì6Η ¿³7ØÕ.â¹.ïùÿçí|z7·±+½÷§ø-'‹’¨¿ÛA€¬½ ‚^t’ØÈ÷_Í}(^òžË×Ýv]3™I¹Xz%QE><<‡uüF„ë2&C϶Îò]N†áqÙg¬K™½ ÝU!r]¤ x›P$95èã®[¿f[Wã ×eœgCŽu}çOÂui¬,ò/^4°nò¬›ö¨QƒÖJĺ<¶r—FŸ#ÖÍ5±.µ²÷¸Fź:0V¬«ƒ{uj°7ÑF§Ý&7bÝô«S†ÖÞÖX7ÝBsjH?±nªbuj`xñQź7ÜÓ&zm±!b]®ï~\ ”œ€.kW%«Sƒ>QÁºétêÔP£s/×c ֽɰ´!…+Å©\ÌërB£N xï<]7®N âg§X—óáòÛ4ÞjÕ`];2]w#ŽX7_Kĺ O7¶O·z5†{6Üg¬Ë=}g Öåþî«{«WC˜\ ÔMÊwÅFC­ôÞ„îÒwû~?¬PÅÞÞ”¬n É}—f¶+V È[ŸŽÊ#Üe'øn“7Éw«­ÂvúÌ6Y5yp;ÆHV HÚt,Ùð^twNÔ¨ütÍíM),6¼Ö#Yت“ /z¥gmâ—dÃkßÞžèúú4Üm•önýV¶»|ÛÅ\úø˜a‰wì½¹2¢ÝähÑ®¢ÖvÓ’–‚݃ÊdvÛm»ŽˆøD,3ØÝAo;ßͯLvK…m@Á.™Æëê@Áî|éþ v7tÿ¬ßï3ØÝˆÛ†³Bv7Œ€ö³ì.Ê`×ÚïÊ6ã~e°kïļìÒ܆ˆ‚Ý7“ §ôY±»r]uû’Öví|X~ÏÖ k}mtç‘i‘ë®´gûZYúÊ\×:ä§ö¾2×µÖu²qϪÿ+pÝT!Âu7Á0aæº+Ññ›=¶uŽZ³KÜ@PV™_‰ëÚ£±ÛÙJcõÂu6Dì”—Â ×…”à…¾ß“/M%²½ý/ÞŒ\w³9±õ v™/ód‹’«oR²›3’Ý:ê{”ìâÀ²u ¡’]L¸.TS²KôåÒS}”ì”c¤`·œ,¹¬’ÝB¨Ááž´Jv!ì?º÷9l­œ5búú$Ù-Ø"ÔøéɆ·X·Ît¡|Ðìô\w+ÚµŸ´–î«ÌŠví¸§õ0_í–oƒT´k¿fÃÚÒ6(Ú-u¸»ø_ØîŽˆÐú|æ¶›‹lÜs Ò-l—KA9ö)oã¶¾¡l·@ÏËáÛÖîrâöXà.à´VÓÄDwK}øgËF¸Ëu®ËmUúž/ÂÝÂz¨_î쎶áœàîÆHí°[}NG¸îRfC¬>¤Žx+›ûr™W„»µ„¼µst»zTÄ»É'â]Šì ØõÊïRö\®Žp7* Ýú{ËÐ,6´[¯àŠïˆv)³J×$F´[+A—ÐîT6Ø.E¸p¶ÌFa»¹,ÂÝZÆFs¶á­—ÉŒ¥m›ŒpWë#¢]J¬ŸèȈv§²€v)ÛlæÑÖœíæ:Žlw* lwúÍÀvq]ºí»ØR/î¦ÀZ»µìY`îæã"ÜÍÇE¸ËuÚ\º¥jF¶›KÚM^þ‚vkámÂÑnþIÈný»—¦  dw* d·–­!@.]nØ9§wþìÖ¢ulŸd7ÿd$»õ¸³ç¼ Ù­e˜v7JÉît\ »ùö"Ù®%Ýú¬C«Énm[6Á¾œÂ²;/Ýé|ìÖ2¸_Q$»Óý²›w»©(rÝ|‘ëæVÈn¾Hvs{ˆd77ˆHv§KyÉîôsìæ~F%»Ú1«dW_›@vóÛÉn.‹d7‰"ÙÍ;’Ýeìæ¦Én®–Hvóƒ‹d·v•GÏB²››^d»Óµ¶;ÝC`»õ|ÛÐÂG¶›¯%²ÝZ×6cn¬CØît\`»ù9¶›^ƒˆvó'0¢Ýüá‰h··ŒMJ/Úþ6 Ý\íÖ{Þ‡áOD»ønçØ!ÐîTUíÖï~éV"Ù­W iõ ²€v9Ûr !yD»¹CŒh·¾çØnÑn}2çØÑn=î]nýÚÍ/yD»Œ5YW÷Ý íæQTD»êî(dw:]`»œÎ†Tîf.l75ÙðÂ*ûž(»õ%^ûŽ_»w_}ƒ•ÀÝú›öÕ|^å¤ÂÝ• >§‡Z$¸+·Ùn¾”Èv9ìBzµáÅOƒîs¢»5ãjí[”Ô†we|»ùx²á¥ÃêÛÎ’ /„à~š”«â]nxßÜÌöÛðî³áµ ÙuµÅ­IÀË¢}D'ïupÒ  ^ÀKß•¼ÖÞÝb-™3H,Î¤à æøIÁËÁņ[ÛLzí wàdÒöÈ êEúgÏê#êUi}ÒðòAyÜ{B%¼ö²ÚÛÚ$(Jz±ø»+¥lx“ÇŽ¢^µcQÔ‹ß²‘ÒªL4¼xš2u»'Þºã¾÷gý¤áòRQð®¨õüíL ^Ù*—¬$åXI/¦Dh»‚Wv+é=™ß»K¨…ôªÍŒ’^lK‹ ¤—Àô;8GÒ‹'ÔÒâDÁ‹!³}•_ÿ%½'„ä´W™ת¯Ð^\L¢¤WM•ôb„gŸóën o‘ôÚ¼Å&X€¦YÃK4=~§[;_$½ê¤¤—ã«xK"çÅpl³§Ð\AôªÝŒ‚^LÓÈ£hžz“)œ€^=NA/wn£?׉9ƒš˜ èUs'½øÚö¯Ü¸AÌB¥tk†½Îäœ/ æÅÙiHS„òr Èç°5žÊÙsÕ—A\¤”ñjÃRÆk ‹gôœxS]¨/ƒÖ…0^++,5ŸK5f Uï.Œc† •KÕ™V[-«E/Óå½ï QÀ›ŒÏÔ™aeøñøV]uf°†Öl)(¢ßM‹¢ß=ñyºT­È8[[ÄgrfТ àJ‚û.£Ã­'7«3Ë^=I 93¨y¡ xO¢ ¾qSÝwöô ˆäÌÀhéÁjãŵQÀ{°Ð·®>Š Þ“¹ØöØXðƒï‚lyóMÛ"á=ˆ‡(]´/^¬_Y [N/ÞƒØuë8Jxw/·9J¼•ín¼Ÿ¬´•ˆ„—åiö·i¢hx)»{Ä…jxÕiL5¼‰²k¤)>EÄ[AÙH¥ïQëÝÓ¬UÄ{ð»ûRˆxBŠïÿPïÁòñQ’:𲮺öU#ñbLg is+Û(âµK)7Û}×YÄ ?¼¯¾2ôŠxÕåNE¼vÀÅ®â&œïA¤fS÷‰‚÷`~×å3¢àÍõ¼ÇÉÐcuE(xL­3;гܡà=ÐÈôl€äÁ{œØßü•0o½|›$.³/žžŒ3[W(ò].ò,Ï=\v‡|ËmÛ–æ¼-ò];¡ÍcÑ¢|×þcÅž ytŠ|—Æó²áY¾›¯3Êwùͳ‹ŠT¾{Fì½95*ßåä¥GË&^©Ì¨ß啲O‰¯Fª-£M¤.î½0ä»~lؼ|eù®ÌÄ&ûÝ`ù®ö»;¶î5ªê]_&û]¾#çu>͆ ÙïGûd¿ËFi›á:û]ûfÞ8¿*Vµß­Ý§'ë©ý.áhãÞZQû]íÞí8±ßE3Ô×U’ý.ñ1ÏÖEÁb¿‹­Æù´`PµgÀOäîZ/µg`»4/ϻЙìwYm:Ü/5ùï ÀIþ»/¡þ»BŸ’ÿ®Q%ÿ]IÈHþ»â¤_ Ô8óÛüwÿòÃòõ/?ð­³ùËþ†ßßêùëß—ÇŠëþû‡ÿú–lO÷ù¥ðö+JáòMüú÷ÓîîçpÃç3>Ö¨vŸíÏèƒ}xû÷?ÿqûÙùÓ÷=·}gú½¿þ¾ç¯õ]Ÿwûã1fòߥæßsó‡ïzæZ×ï¹ß?~8ûû{ù+WqÐ+iãt@uìzž÷]¸œú©9zÿãjÿƒVþë§Ÿ¿þíÿÙpä¾þýë§ýáŸúç9,§’Ú¶û·Ÿiÿ†3ŒÄmtZlþùÛÏtæ3}‡>±÷Ãç¶ÒûÞïØ'Æ»¼ƒ†ò{}ªÞqlõ›.BʾÏÕ€AmBÓ6ÉèÕIJïóuÄÝÊFõ÷v勉Eß¹]^Aõ÷=ZI={j&zß·¼×£ %]Ïwm)oW!ME/çS[ù _›/²—Æþ|ՙ¼‰Yí1÷ÏË×ñvÏ¿û3Ncß7; çí[7ŸæñÓlô< »^ˆ9dõû×ϳÿÁó°„¶ì1.j­ŸæÌ§ù§(L>+ŽÞ¿Î ˆmNEºr:ÿ«žà?þòŸ?ÿü§_~ùÓŸÿüç?ýü¿Üÿ³ÿ¡¯è×oÚx#|è’JöC±û”¿66[Žnp#|èJZfxk÷~¸ï¨lcï)=äáCŒe~0Ùnž‰ø½t9£Í˜÷ÃPïjâ #+b’èa,8OtõêŠÏý4Ë»º7¸¬lÅ¥ÜX ¹²ÊÑÐ? á+òðÈxô]ycs÷³ËAî*XºÊ™u…–î{WõC×ʲl»¹QÈåXîÇ Bþ0t{l ëËb7ò‡! ßH÷u÷¬ùC‡yì·½;к‘?Äó¸¿÷)Atð°³¹›ÝýìqãÇÊ›æÎU÷Aýs¡Èt›²û9âæw–èwÖ»Ÿ3‹Èí}º9%ˆãÝø»·{#}añlj<Ü䎲ÁÇÖ^ÖÅEÝ'Û7³Õ°šµp܃ô¡¯…<,àÜÖMÔy> ‹Ù oÿ>,~Yìè˜_¤EÙ`Òö8¬¶=õ±6{³¦Þï¾Ù‡ç/."Ë‚¡ü[v„¥mÙ…ÿXK ¦1 ÝïÆJ†‘ÃÆÞQ÷†~ªÖÛÍ+5¬îÙý )ë䟖Þ%ºgS¨°Çø©ÂÀî €ÉÔêŽËϺFëäk›o¬¡,ÈÒ¬•ã ¸·²-èDxÃxsÞ…I»hÄÄÖ¿Õ#dŸuþ(µ >Î×ÁâY#óÄÈÒÞĶÀKÙЂÝè¶Ök•ŽÏzFŸ5yÅuQêéM¡~Ø ;ìHÈeÂÿ½¯'ÚÀî8»Ñ&eCÅ=ïÝFãÁåsØšY[eƒäËŸm–Cb°fÏO-ž÷bøþd‰Î«Ó²ü·µ²¡yJ‡íÑRŒE‡ÕñuëÑäGaöÏû8­E¦œ}û:Ê‹'Ì=lÉíV+°üÝs ) F^â“cõýuÄöÚ>L™«¬ñ)kô}»°¦åyÊ}úX)²VûBMÊîè+käÓ”¬»Œ®6â”õ”=ðj>NåtáÙSŽ áS[«§œÑOä—”‰uÕC$ê«zÊõZÉØó=졵îèÝ+&wv1¼ž¬°åÚ_m·D]éâY™øD•…ÝßïËͰ%:B5škì³oC›|©ž½­šz(?{2ƒ‹÷°a™«G¼t_nÊÆºKªäýŒaab>n#¦àõ¸þèK¾”Ü!WÚ!³—ŠÛÛ]‘Ät²±â¡FÏϱ¨úÄÙ£ˆöŠz‘ÇÍÏV²)<Õþy4#ÇLÊ~ÍÚz¤ ¿ÄôÜ|±í9Žèlh-€ÜïWbiŒ†câ#úÔÔÏàd)ç›&’ÏµŽœ†t¿öI<ÁEÜÆPÑãkaÙõn²ôñÑpU¶ÞkÄCÎüX{9î&³¥,D@±¿ûðeׇhÞ34×ñä;~}N L`³Çæb£§æ†æúæ¼¾µˆ¼phˆíöŠ­$Øsb`ÔÿDÑ=EüÅŸºÈâý«X1?¬ÍŒü½F4¹Gh­ñl5¥¬K–Eþý\k´«ÓVGèÊH`dbUð*Ÿ7ob´Öxä Ó[v–m{?¸ñG·öê@Yˆÿ›àÏãáÍÏWîzÚVè§Ú6÷Ä—‹q„ÍÃß.Gáæø9Ü@ŸK< Åj™²àÌv~àÞ'Ö<Âè5}áhPwh°öíÞZžÓƒeï`Sàç7:X}|†Ÿ¯]4^¢ïæðçNFôñ ð©ÎÑÐ÷¹Ïè­š»b”̓»ô·²>€UmkƒcK³DWñ.>˜ˆ ßd¹`†°*ôÁ‹ ŽœË×ÐbƒM³U~ wˆ¶Élzù1+N§ÛÃ6wÄä‚™7›6½7þœ1Sá&ëÜ¥VOEõ{áú²øÀ÷¹Â6wÿ8…7XÓ Xmá¼La«Þàµ0¸€‹µ®1v ?{»_K=j?ŒbyÅìexí®¬¬„Q,Fàlˆyø…q,ˆ¡ê+ª…Á€»Šp×å:[ᆲšma…gÌ!:ªì©¹"Zá“BnäLOÛÚl…ÁŒ‰ìÎ˾ïn+{‚óµzÙÞøN|kYÈ“¤Mt71 e`7±Â5†t¤ú©$fëY7Öd}à›˜)xìVhµ0D}¥'òaôºùœG ɰš<;º±Â3šË3Æ>í¾ïvÎ+ l9’}ŽU®]ËBÔÖ‰(ÄfìãoÈ{塵ŸÝ¹ã8Þ‘ûÁÔÕ]žʇGr ÿ/MIR C¸UªÈØè}Ä[{\üÈÆ·¼h•Þ„%+dƒÃ‚ Àë¯o]ó²oí!ÛäjŒp§[aŠcï‰Mü?\í£_ò‘lˆ°#¹o”!OÏô°²°!"?Åí Ã\r®×µ‰bVkáÖª¬KbÒQßóÙÚlº†à=¡M¿¬%ÚèáÙs^ýÿ[ ‰”½UÛ Û(s9æ†\$Ré ¸„-·¾bµõšŸf£;f0.iK°Ãr{«¹xÛˆ”BRÛ*LÒíÈuo¾$6~'ïXÝTyŽo_9q¼þŠJƒq«ºÏ7»rZ{dºWÚèƒÄ”Û=³aÛûŽYÁ>®›ïÆVÌžI«2ßÙ+4¸n}hüIYðFÕnâ£,˜2âØÛF\߂۽7Çô•„‚­he“j‹"Š(˜"¢æÆÀ# ÞØM×\S¶v4¢(xc§c Š‚7äúö޼¢TAÁx Ú˜·ç ®=GËýÌQ';ir\e΄„S´á\ßÐrÀÀñè=»>b`ÊbsW \Ì^ŽöÉ Ìê„UýŽ ÜWÂÀ”=G·1 LÙÉf÷÷“#˜²ëÐ*b`ÊÊÝ-•Sv°mªtD<Ì'6ܼ7‡û‚ëo®›Ë…oÂ.{\ç¦wO¤€`ŽÃ÷õnÐ! àú“Ö&[öJDÁ[•å5OnÁ”\k7åLÙ^ºS¹ àú‹ç y§MÏ‚‚)+ìB^Ž S¶²¨MÅ" ®ÇY×Ò¶ fµÊZÆr½~O‚‚§ß (˜ãìSÙÉUDÁuìîÆâ®\ƒêìn )(¸–݃FLÙöô8#aÁ”‘tÔüu…SfS‡ãl³•È‚óoFL™}iÜB_`p=Ž_åtPüŒþ•˜±îØ/4¸·Õ”ßÚÐ# æ8LøZ¬†Ðàz-´×'Ã`Žú4¯âƒëÞ{ŸÞELÙ¹yàƒ°àú‹wà¨Svl=¹AXp¾’È‚)ƒK¶mó‚ë¥,÷LÙö ’YpýMÜS·v-×ãN›Ð·ÙZdÁéÖ# ¦h-‡Câ ‚§K$˜2ûžt`Ip¾„H‚ëqG¼ ®×pö”:!Á$™Ù³êÐ$’àú›wÌq÷vøÎS!ÁÓµ<Hp½¶à´¥»H‚5q-‚àé¨A‚ëÉ–Áˆ®¿wõ8 Á”¡.l¬‚)³WÛÌ*x:,pàt%S´Ûsj^ê‚§Ÿ ˜²Âv¶À1p=ní±K‚ëq§G+ ®—›wOÛ+ÌeTHïU éŸ?QÅÀzêÑ‘×+`›Û;¾ L™}öö®.¸þ¦’›ß¾``ʈlƒaÁÀÓµD.Žc\[vŒ¸Þß9V;"FmÁ¨¤í°\ïÙÎÙ®e€àzÅïð)‚àZô`×ÖŽz¢# ®3ïÊüWÁù"®ÇÙ¤g¹Ûqç;ˆ x:.€à| W î쯿€àé®ÇJ\³‡Ðâ½SvÞcÉæÁÓ™¢_$GÔ9S;â‰ãWªu`ÿ‚ó]Gœï:‚`Êln×]"ÎWA0e;ömÜAp¾‡‚§ã ®Ç=ûUHp=né1Œ‘×Û[Çâ] Áõ(6Á>íl×;_ÆEDÁ”ëïš Š àz\¼:¢àé¸À‚ëµ Ö¾Úù ®Ç±Šý´ã Î5Yðt\`ÁÓù ¦ÌfQÍí^Ppnïöþ=”ÛÌ"¢àz [_ºPœTœjKQ0…\ÃѸJ$Áó) ®nksÆS\/ç².lõ ž x¾‚SÛT\ë󬻨¾N­S8pzêÊëýÛe7B¢8=wåÀÒBÚJåÀ¹Ê#®î}1I1p-|úú§bàªí³ÉeËpQ \O¹÷UÅÀõȵ/”+ž/(p`ŽÄèy ˜œç',&gùd‘'}¢ràùÈÀ§9ðô¨"®?»÷”wåÀõKOÁT<8ðT5‘×ÂÛæ8Û5sàZ¸ö…sÁóSváK¿µã"ž*/‚àzdÙßtåÀÓ #žÏ8ðt‘ç3 <ÿh ÀÚüåÚÚÅ«* ®{ï{Š·Â`&«=74Á`uì4otɃ÷§  æº&Á`¥+ ¦ó°‘€N –N^`ð êì‹`ƒWÖr½   ƒé$­¿÷àƒáM§¯ƒ* ^ÁˆÖfšC¨°`| nkg“ðFœË" ^1àÛ÷òÞ›À`ûîì×êc ƒW@ÉzµÉ¹Ò`Êx¨ï\q0&W$5z.8Ø>ÉÇÙýøÃ91/¸¯×ï´½0Ïíe¿æ‹¬8Û±1çLJú¾í4Á)@]a0¾Ô#ÝFa°*¶ëÝ) flcÏÍ5…ƒ¤?ä.5oTãcPÁÕó•X0Ž$U­(°`«¬Ï®×FiðÂPátݿʂmÛ(øõKVl?ÉÕ;Ì0rßú”K`°]¥Í:h]0ãܳûp¨.˜ä+RG_µ½ê‚:¨­—ç댺`k„UOcŸƒüêº×@ƒÁü%IkmŠ6˜gy3¾;ø ûàlŠQº¨(iƒÙºìnÅ—´Á¼öÑ,wÛÎ â`ªÎº•â‹&"ί€ˆƒs¿Sžü¿• ´êÕþýïÀÂQ™±0˺kߨX˜÷Óó˜±ðÊ$Ùæ-n&!†J §($Á ÖÊt¯MîÁðBÔÅésÃKµ:ñ„äQØ~ßöÍ&â'¶I! 4¼n—E(ÌTÔž‡oGKf‚Ä ×Iž½¦ï&$“wÃÔ7£(ß*•¼"¬µâ°ùA œ¢çÕ*"Nj„ ׉ïÕ=°“SïÊÆé¯É(B&©É(Mïê[)ºQ„LÌ“Q„dÝ$£ˆ Qkží81Š`ì¶;˜NFHó×6 Ï6á“MDò`›|Iëûõ—l"FU%›ˆt:±‰à%)ž”ˆ0†¼ÖìáLNbùœ"˜4²—«¡>uŠ`$P6ß®Hؾ†e¹Ž¶g_½"´šÅ+Ï ûg®OU¯›(&Gã:ºÐW˜°ýÃ7aà-&ÌçÖZJ£aâ‘ü?” ÛÃØ½¡SñŠ ^†"嵊À™˜‰×‡[­"Àl}—Ô+BÑ‚XEpË6kŸµŠ ìfðÕÊ¢UD ¶YÜV­"ÒeŠWÇÙçÛé´xE`kÂfA§ÅÑ,‚ß$^ï8?a<ΗҥÊÑ,‚w€CÓŠG³~7äuÏD¸¾ûW‘»D@ ʺM@8_£XEð8Ù¨ón-Q«h¢a€°uìtl»8Å*‚fÇ öx& \›¹ U;ôVÒ elWX¶ÇÀËA‚•.4—ˆôS‚‚‘/¾¬—P°^·¢`íëë©(˜„,ëúWŒüÚ˜Í0@Q°€­pç—föUÓ ë¡aуGQ0&öÚr•¢`ìÿQ¿ï¸D°d~zÚ–¢`øÃµ¹®PQð¦õÇ}Š‚ edÀ7*ã†eœ«"àôø…ë2p‚À°/ƵÝ[‡kFÀØmŸpj<Š€19½Zvv&À¡ãÏüw£åê;”ÿÊ`êå¿02{f-µôÛÌ!Ö  þ]nÁLBÛ"Ùøv¨$øË²S·Jð—À »37¿øK­õµ L*ü%–y. þZ¶å‰ýåfb¿">KìW½vþÖ™—o#Œ¸âSü —IÌõ?=ó8ñ_â¨{Är2 f4ZÖ®3³à£†X6ËÊd¼ÃmîëýH©*x©q|_«U0 µ§«(XE©É*˜D%Ùž]0'œN'˜oõÒ}RU W˜¶‘[„ÜìX0±µ=Äú%À䬶,ç$NÿXø¯V¡JõT \ß·ŒLR`}E•ÿ’Û½ÿµ÷nï¬ÊÓ  b`mt¹T4mQKÅÀú ÿÅ¥¬̪ø$æ{w¤ŠIþíaÎI LbðêÓ€$æzG I ¬Õ¬JàT&ð—è²­»  üEðpõHàc#ô[£VGm™L¥Àä¹^Ö×®³IpÚ†áðW«^¥ÀvxéÁî‰ÿê_ùï'̶¶µRá¿©i Nu¥R`ÒŠmºÜ¼w„K] ÿEblaKI¬MD…À¡üÍ?ào.Š*`Q¶$°hT”þÒ'\=K[éoꎅþ¦=bªÖ÷Wøoj ÂSóþ›3òßô¡þ›3òß|\å¿ù/#ÖÖš°öL‰OGF,Ÿš€µMX;ô„€u€”0ãÍ­ ó­ûÓ,3–­7 O?°~5?…³VAÀl²g¿-³3Dú‚'œ«G°¾‰ëN§D€Ù´tí7àÑíi÷‰ÿ沈u#T¿ù„‘þò£OÏ­ïô7×Ù£ÛÛBžào:Pè¯îÈJôWû¦Dõƒ’èïtd¤¿ù1 ýÍ7/ôwºÍHµgNô—Ý^ˆ«\Êé/§àºòHóåþÍM\ðojJÓ}(ýͧŒô7ÿjÄ¿Óÿæ#ýMQèo>.â_«Ôgˆ(þÍ5^Þ½zÒöWØñ­øwù6üì&ü  y|cœâ_ëï¬sôµÅ¿Y!»ÆQ´Œ3ÿ&ɘàß´7FøoUÝC7Î9-£æ}éÿ&·vÁ¿« WodK× &K·lCA°g¯O— fˆšä¹f_c„¢ ¸2pÓ·Ùm{»€`ZÊÖS:[%Y­Ú@°]ÊÉ8ÛioÁV}Ý\ (fä8Rp×êÃZ¿ b#æRQœ:gáÀ©\àè>R ‚ü£_?héX>h ™@›}±ÞO«`®eÅÉ¡Ù^DÌ=Ø“tuøñ¶p›Ãx>ˆ²`žµuÉ*GœnYXpa.3ç¬æêÑÅŠ‚©)LRï2£àBìØéc+EÁ…Á¨52—æFÌëq°ꜥÀô:ÛÖM Óë\®ØÂ@eé2ùH*ì¥ïNŠ˜ž‰Ñy{ÌBy$˾º{ŽP`º0P’;#D ¼1¥è¢Áv•׃(¶óQ¥¾D@0ߪ±óPA°]Ë *³#ÛFl¸&$8—El—y"KsI0+±¡EH0=¦½.¾Ê)$¸ Ž³oöÒ)qÂn?nÉ\@pyŸ“é•Ê®¿>‚\0"<޲Ÿ3.hTŸ¾Ž 8½S‚‚ëÇâè0[P°·gÉw>*Ã(ZxðN’D|*Ö9§ò`ÊFxBâÁ —a!xpnt„%$ Ì›…Çö O?‰ðþ‚šÅc…ë%!a+ÜG IbÂLØÏNL˜/ع%*a–s¬|•ßÙ¡Ö¿¾•ÿ1«`ÂÎÈø%…h² .Ö¥&Y0ÖÍ—k“,XmgUl¯ÙRz‚šøBìÔÜÁEuÁÌK7sOº`¾e—{’.x%ÿÊ›Ûd L)“.÷xªE2†H†b å~É".xLÆ÷h²C|&ì¿›føP}EX LÎó¿î[ÜŒ!dEJeÁ+áßö%ij‘ƒ'‰Ü>?DÇñ] ó$#µS¸¨V„ÁuŠÙÇ–* ÆPúê›áTŒýÓ7ß©0Xe§ãD •Àcc™Ü‚“O²ÚC)í›5sáz‹Í;Aí‚IL¾<QÃãø Æ÷͈Pý‚/0tGêL,òÝÝÍÔ/ýÅ ^·°ò?FêÇq6çY?xCÔ{ ;g0<]K ³2Ä>ïQ¿àtÜëLNô¹4!œÚ³Úißl·fÁãr¸zMÍ‚*×‚™ˆžî–¬‚O’ÆO‡j|2Fß] ¤VÁ,e²ÓañL¹`œ~S¬‚yµËÒ–€57ŽA7;q›[‰Z§óE«`Ù#¡VÁêÜ-Áqùl⬆ÆêLV¯}²Û¶õ ÖóIrœ•íçÊ÷iNŽã…fe­Ù‹U°‰S0cBû76*¹qL|êbê‡Ü8޳'å«SÇé¦It¿†¿e›¿Jtœ•ö¡,³Sp¾ ‰ŽÃøšÀA‹G¼WA(UÛÚ,Ñq‡)ÓÇKtœx6ir\¾”˜ÇO²ñbù·”úMð¤IIŽKÞÝ’—3DÇ- Eì5~§·ÇÙ†³¸DÇ1ϵfák—Ê$:Ž2R\Zg.Ùq\ É)ׇì8µhx¿¹ïáqù¸Çφş€p=ÃÖæâ.áqøt-·ûjz¿¹3QÓãt“¦Çq F”ô8~ª¬=TÒãÒi$=oô£ôÐNI³2´Y :hz\>.¦Çåk‰éq î)ö5iZIËç;u[Ûñ ×ß|À52‡ô¸|XH˿ж¶!éq•Ú-ÐÒÕbzwnÓ3÷%–ô¸› éqì ÛŸøkzwÎbÊñ!=NÇÒšÇùlšà®Ò’Çq,µî÷Ä…ë½—ku’&éqºOMÓãòq·ïl³qèæÆS’—k$¦Ç¥nTÒãR$éq©&%=N÷ÞizãY`H[è’ô¸t’ÇÓÙl Öd’—,ö%=Ž÷€žk6 Î-,¦ÇåK éqùbz\jÏ’—ËbzœîdÔô¸ôd%=Žk±Ñн_®Aã–Ù,8?!IãZ0c_þ†ìŠERO¨•ð¸\Óãò¥Ôô¸|Ï1=N®!…Çzl•6ig ã2¬jöî“Ó㦟éqqÿg ã¾ñAvù¬„Çé§ô8Í+Iéq\Ž}¤;J’ô8Í›Ðø8¦ö¬ œÃ bŒgY)?=R7ÅÇi›HñqLQ1_ué¥ÄÇ埕ø86º²(U¶Ù(8 úR~œfx¤ü¸égÍÀØXMïŠÞ˜Ç‘·uôë:“áZhÓ¾¶n˜ò㦫}óãrK€\¾N Ëg“9~„[>ELw(rùaI€? ªÞ?EÔ#íÜrK€Ü Õã.ÈåJ9޼ޖ#œòã–:]·F×kb~Üt­!?._ªäÇÑ'`нµ•ü¸\±’ —.UäòåH€œ"µ ÷¡pDÈq±¸ƒ¹DÈM·Cätƒy ‘£ÐÞÑÃõð"—[^yÉÓ†9äëŽõmfÁùaùúû½Û~¯àû°nváÛ»¶ÃÛ»låõOÿýÃ} ƒ¦!~‰(yûQòçKù&ðý;Oj÷÷s¸ç¿÷)Y\>üNÛlÏhßá ®¥ŸŸ?~ß³\ûý·ÿøÎ÷_+ý}ìíÏǘ~ŸúogçOßõÜo…·&ÿþùÃùß_üË_¹Ž«*á®Û†a7jyXàÍØâ.\Ò?ýÔ Éÿqµÿa!ïë§Ÿ¿þíÿYú_ÿþõÓ¿þðÏ?ýÖ3•j¶uÚ׌EÚßz¦ýÎdcr´¥wõ´úÍg:ó™¾KÙ{æ¿{‹iñ÷ì#ã}VƒRˆ}·.6NBÐðL—!…ßéz6@ `ꘫE ¿ÓG“Ý8ˆ¶kºœXö}Ûç¾#ß§­pöÜVä2¾s[©×“ÚŠ^Ï÷m+µËж"—ó©­üížÚ¦ h¼B´¿pFµ¹»Öc¯ãí§÷÷@ÏRŽ"±ÙÌå™ÏòøY¶?xš¥Š·Q ¬÷_9ÍþÇNs1»·ûú,ª¬ŸåÌgùg`3^k(rΣf-|ºó)ø_õÿñ—ÿüùç?ýòËŸþüç?ÿéçó—ÿûŸý}K¿~ÓHz°o( ô„Mom®?qlfýœ6å{ý¿õ?Öà«»W³¬§+…ÈZ±Ø+TÔ÷$Z!ϵ6úWƒÐ¾yŠ× vaßUP‘põèxà“ [?ç XÆ^§…ÅZaÝÆäÑñD]÷ãj²êbç3ÿp¾úö½Zø¬=:~A\Î&(|î°fpãp..áD\Ò)–$¹¬+Ä¥ÏüõÀ•ù¼Øááú½Âçè3Y¶ÂM ñháÖÇÜo!+…}ê/KOV¸«)Þy÷]Vxüø6‚õ]\{P°µ ÷$¹\I¡ !}êoHúWHa…W2Æ;ÊZÖv'8jõ™?~ÕËéþ³+³þæ9Çn·«Ýƒ=ë•ZKVÓ¹Znv6Vx©ÕH7/¡=+ç[ÝCBD-^#jg±®˜{«›3Åîqg¹Í¨¢Ýˆ¨¢¬µ¢°Ó}}Vßõ”ކ#’sQ ï±Óý ðîòȪºÃnlußyqŽÞˆ·æ[ÝYQ¿ö¶Ä½ÖMŽc¯»ÔÎVíP|«;Ïí\= B KÙ7› ž~Æ›=D¾×]b>Úd»¦Á²Í_ý²°1úmÂ5MbÛ]·¾6 ÿÇ€×Zø”Þ„%Ãd­[K†ï$þnkuêѰå®Up?v¤§;`ïÛÕÛðö#»¥\h^eò16%dÞ'÷ãõô6œoäÒà”¶±Ö™öÚ[±$½ò–Ht £ÛjäŠzvØŽl,žë+Nªewéíxeits>ùŠ{;¦g¶çå1Ø£g¶=¸«ú.G¼[£ïˆúõ¢_®ÞŽ%Ù¡Šó´õ7[ó®û¬½/®ž¼ïÆÝ·ìŒ¾#ÂâýßÎÞºà×=›ÙL|G$¤>¥7c x]ÙžÞl§£ ¸}¾ªÜÛŽÕ9˜]ëÁxÄ~ÏúÚ²´çt°RéÍ89rS(y*Öï=m½M÷wïˆÅyµÁ^0–ŒÓmq0V¯n{ÖÉt:8ncL§Ñ_ZGåíÿ¸¢é´zG¯ÇM§aÜÖÄn¿ 'ØŽP6ÎÙ[ ?1f|V¼õŸ«ºNG›g ƒëôÅð«lÞœ[L¼‘Än¾+b=KŒÔlR»°è:gÑízÀˆà;âçaypóÂ`<‚­Ããû˜­ðŒ¾Ó…hÝ}ô–h`$ŒÑyÚ>e ãž6þ9M¼m>êÛ õLÖû¾úDþj Œë™ö ¼â‰1˜«÷õ [Ÿqmc`<=C›¡…ŒîôÜšQ›žÆÈ¸ÆÄZ§¹ùøÆ ÇÖʪ¶ƒq¼=úȸÞÉAzöÖ~öŒ!'f’kÛS?zgLµØM¶º¾î³’¯öaq~ů'„n§[¼—´’n‚èåÞ«ËÉŠ³IˆZ9q{s¡f-|FOœ¶Ä° ß­ä½'&!xÆhû>‚"\£XÖ÷—B|æÁ¢W›ÆÔ—ld IÚŸf'P ïÑkLní„F€Æ¯Öž»wÆ7KóÇû(¤ç~o"¦jÖ<¤Ñlxc{R³b@H Yë—rôÆõ[ÚG €†Ï2Pãbêæ²yA›‹ÕPÃ%4äƒZýF¬ù©uHƒ`mðk`¿û+ŒŒóãÀ37ÁÞ·—w'Þë·Ü›²æË¾–ÑÞ–ÙÔhWþî%X«1zï5}¹* bl¬õ*\ý1Ó+Fö¯Š¡ÎÍFÌ4‚y{ò*ôýq>r£ãÔtªRwôÇu¾×æ*UHƒ`—rbžò¼…W dá…°—鵜Y™»ŽÑqª×:ëA°ìIh{ƒ×ºià-y¹/W¶å˜z‘l³k³f<˜Æè˜¤ÛÃf©m{VÝü³¹c‚4…ctœŸ”ÍáB.‹Ýƒ½Ma¼bß?ÆÇÈ»®ËÓ·a !–…Ó7­ï&žÞíWvLÞ…!˜Å®ÇFXmËë ôÃcú.†mì¼­·FsÛÿs5.-axL¿Â’Íñ¾ìFÁ,„ýÝ®ÇEž‚YøÌt'ŸZ6†ÇÜÈsùîˬÌ¢ÉÄ0¡0<Î?»Ç`–ô¼¶# ùÙs=|Hak û¶q|œjÖ¦k!˜%ævC¾ÂøXÞŽw ´7ã\OmsÉ–i“ÕåCmû´Ç„½Áè™Ñâ¤ûšnxñÄhn$¯Í¯ì‡ÇVˆIÍâ?[b,KªœF‡\˜Û°àýBlx8,¡!Ëë×Är¨¶×n¡KcÄŒhôÇl8»:5¢P‚¹åÛDNƒ¹Cº´ÝoeI]G¡?–ÎhßâøXó¬) ããô÷ÇÇ|õN~°Â=Žé¹7×øZáÇÇš0¿á|5úc ƒ§0ä²@¨¯»¹?Xá¥ÑÜÛ½¸JÛ ï8@Öàz]þ`Ì„G~Û‡i-0í™Ød¦í9xS¾66lî3ß¶ÓS¹ÿÆÌÄ¥ñÒy–µJŠæ•P$«cV®\Õµ”¾SWÁ4²âžø˜À4„æê9† L#ë:ÇŸW. 1ÚBpé…žê#Tz«›(GpQÄÒóDŽS*ýîò½r‰JSxp5ïÐK 4ýh ³9ÒSa„Òޏa¶+Pz{»“ tUà½^Àoa„Òµc&oa„Ò´an.PšÂR|'^‚ÒÓE*—™=š¶¯Séü×B¥‘ØÚ¦{¡ÒÚ8ÄÍa”JÏ…J×^9wç*Má~imäÒÊúà»–1†n*aÁÒnô,¯¯’bé¹0`éZˆO ˜¦ÐîbLš¦ çãk Mײƒ]¾ iD4M¡¹š¢é¹0 éZx1æj|=¢éùÈ€¦ç#š®…öÍŽ\Ñt^e4=ÕO„ÓõÈ'̉#œ®ƒ¦…Štâàt-deÞAb“â11]}¥Âéé÷"œ¦ÐZîá–Á§kᦧNS¸ãv·oíœOÏðôtOSx-çÞü…NÏ×è4…l¨mæhJ§ëÅn›ï§T:=:MY¹Ù tš²íóá§ëqk@"NÏ…NSHØPÛÞ©pº"ñ_?ÀéZXH•…CïÎ8½šéµ~7ÒéZx ˆài†ÿÏfï‘OÏ…O× Zºzë…O -Yj¯âéé"žž~/òézäàLäÓÓÝG>=@] K 7Q×»Ç.±JQ×Âbó_¢‰ˆº.dz:kŒˆšB'$u-Ü¢‰ˆz>r jÊÎ'€Ÿ€¨)»làØ ïQ×Â+°ˆ¨ë¯îVúø¯D]+Ïžäz:…ˆº^+®‹ÍaGõ|΀¨ë‘6?í«‰QÏGD=×A`ÔõÈc0#aÔõH€bc £®…÷ ”ÓRÏ…kTo0qÅ¢·½RÏG–8JÎ×±ÇQ2øÔþÙÙÖ®"¤ž 8JÎ?{ÆQòβçõ¼PH==‹ˆ©ç;¹ã89>aœœ.'pê©9FN=ÝdäÔŸ ÃH9ÄMþoâÔ¹"¦ž<â@9žq œZAÄÔÓ)¯8N† ,)5…¥ |'”ºÚ ÷ö%¾—RϽÆq2äïW(õt‘R×Â}`F¡ÔSÕDJ=yIJVM¤ÔõbÏÀè#¥ž¼â89Õi„Ôs¡GŽ#Bê|œ@ê¹p‹ãä\Xâ8¹ú‰”f(¬z>rãd0Ã~¸ž@êùÈ3Ž“µý¦Î5+˜zþÙ;Ž“sí=qœ Å h¼mÙÈ»Æqr.Üâ89,bêzäw‰ãäü³{'§JœzþÙ3Ž“óÏžq œžräÔ’Côšv(¦®e6ºh¸Š©§Â€©kÙ¢(z }²Ô]ÄÔù•L=Õ@ÄÔSÝEL=Õ@ÄÔóÏq œöŒå|ä¥åu`£ª‘SÏu÷Äq2ìø"KãòÂ'vÊñq½ z:$‚êé-“„U“È©§§9u¾½ˆ©kÙÐoÄÔSFL=ÕhÕóõÜ:J–* z®ŸGGÉò³Tçº œz>lÓA²ÔkäÔó‘%’SõDN]vëXSϧûdº7“Bjfÿ#~)Aêég#¤æ;:ò¶¤Æ 6ú+¤æ.mÜùøê @ê•^í¼Ü²E!õŠ¿Ìz»ŽBjŽdã5+ÚŽ´&¼¹¡¹Bj+´qÜêfðR3„ñ- RókûÑ ©óE ¤Æ‚˜…ºÃ v„ÔØëŽ¬¨©­èê£Ô VåÏØ}!ººöÚ£ñåÔ¹¥žÎ)µº0'J‹°Õ¯ËG„R/ÌŒÎóò‘C¤ÔùF„RçËJ©ÖvŒ-B©¹ÏŠŸ2RjÎ9ÒɦæbG’UÂÔ˜8ÃâNŽZ¼Ê£ÆOú±wÜɧ@j+´¡Â¾7qžBj0ð×7. ¤æj ÿo#nˆšŸcRê#JAÔÜë{íBÍКu¸&gTB½0×9ºXG µÖŠð鼇Tøt:.âé|ß‚§§[Œ|š+…]_Û>I›M¨ØHõF>»Û±ôi£ðéô«Â§qýf™ã áÓ8o/÷å!ʧÙD{°‹xùÀ§S»P>]'I«OœOsʳ•&< T~N?àiŽi|¢ÓùŒNÏeÁ¯ÐÆl3y·6Nø¢éôkB¦9à¶Î¶íT2­~è‰LótmpîÄ@É4wc}ô뛟Àt¾ÆÈ¥ßZ¸ Kcw=Bµ–¦F¶«céT§B¥sk*ëT°´fö$.MøÍmÁ^>péégw¿‘š.Ÿ†€in“¨V÷°0ÍÕe»;{Ž`šÆj5Þeý¦§Â@¦9åruc€¦Ó`YÁt~;LkPRÓ1o(aéô˜…Jç›*›±`ééȈ¥y’Ï:ÌKk QÂÒâ·àkl‚¥‰@²з4,ÍurÈ'5¯Å~ŽÍ‚¥iS6îÛH„K“D$¨ï"0-yQ‰Kç[.ßaáÒùí.=]läÒùi™ž.(²éJ¶á6Ùt~s"›æYØ ±ú’¶°i.Ç>}Ï‹°é\±Â¦§Ÿlšú9i’Ž‚#›Î„°i«®»Kš¦zlì0N°4©K_¼…‚¥õj–®±k}'\âÒò"',-I~Klï;Ï—ÖzM`Z߬D¦%Y0“étΆ¦¤EÝN&±étóʦÕõ2±imä‰M§G¥púd£ô憉N§JU:¯Vè4yKß^–ètª7¥ÓùgÇ tÚÎhcßΧt:=b¥ÓéR•Nç#…N§‡©tZ»À„§­–aKm%4áé|NÁÓúN*NMDétjЧS¥+ž&±§ô}„O§§¨x:= ÅÓ©{P<*UñtjÊŠ§iºwß?™ðtþYÁÓ:›¾}NÓ“<{ß·œøtÌ$8:-ý¼Âiç(œÖu‚Ós¡ŒŒã77ÁiO&8º…Óñö•Lë79‘é|—Ž‹µðNãâòØX M§ l:µÈcŠý6k5h¦—ë4(¹ ™L3F$J¦W<7ñïpœ»iÄ:֒ã#‘i{‘›;îÄ¥IÚpËÝÄ¥Ù-Ê2„ßf0mÍïÙÖî¬`ZrF3˜ÖDƦmè7vÖ©`Z²F3˜Æ²yëX LKÄrÓ|ÊÝM¹LKžgÓššÁ´ÚÕ —–TÑ¦Ó LK2jÓé>LçSM³´÷¦ï™Q4­©œ M'3EÓ1^4é•Èà«›v+˜ÖpɦóLçk}<ÊRΦdš‰º}ȯdZƒW™Ö€ÐD¦5æ3¡é\(h:=FEÓšp˜Ð´&d&4­1Ÿ Mç#oͲŒÑ¢Š¦5±SÑ4Ã0Ot!² iØéîGÓéJLkþcÓùÈ]£,mduŸƒ=K’%S õùÄ¥5u3qiMM\:_«pi­m˜ä6FÊ¥K×½u˜¼Ô¿»qru· åÒv@±±ëž…L[û¥ºYÈ´ÖŠ‚éT&`zÁsþp±CbÓ\šÍhÜÌXÙ´f&6¢²ét“¦SÒ‡²éT×B§›ïtU<~TñtzFЧ󑂧Sõ(žÎ…‚§S­+žNw©xz!zàîšzÅÓ) Eñô‚Žäìò}ÅÓùÈGCYâór:½`Ùo³B7e^S&Ëa9ßH t:ýžÀéü«§kbùÞ%ú §)ÚÌU+J§v“·Lt:ÕŒÒéô¨O#îg̽7-xzas˲¹Ëâé…=,= Iñ´h3Y$n”)ž^Ørø†‹Ä§múh_ÑåöBáÓz#‰OkÍ&>Ï)|:)€:߉j­Ÿ¨ó‘¨ó  Î$€:6@½°'fyš«SÔ쉹ßÀ‘µ6€¨ÓãP@MxÙ¶¹+jm P§zS@ªFu>§ê|¤ê|¤êT}B¨sY$Ôõß]û:ÊîØ-/ ™&%K„zaû“ݱ)„:ŸRuz&J¨ó‘B¨ó‘B¨óÕ ¢ÎW+ˆZ¯¢NÏDuª>GÔé&QçBAÔùQ§KQDêFuþYAÔùgQç#Q“OS:Lˆ:ß§Pêô<”R§s ¦N ¦N—£˜:ÿ¨`êt­Š©ó)£ uýhØØ»Ñ5¡¦[ç}ö ž‚ê|±ªSG  :>:P–'-ÔùNŽéï¾T/ߪƒoÊÌ©™w^}€–8µnYOœºüX·ív3ŒÈ©ÓŽ(%ÕÈDÀ³=X1’ê4hTÍ€¥Àx·}VP×8»Ç­‚ªÍOÍì…#4m5Ñ J©®©jì í®'ƒT3ð:Ø+ìv˜U3ØÛ.»‹âL9 j~•½¤ns/¨:GgERÍxu¿÷±2’ê)Y,’êšVd=’ÕFÓ:R=eöDRÍÈÛN±^ÈŸC/ÞMX#ª®á;{ðŽ¨º& ¡?rËËUOY6U3Îß÷}d%‘0—‹W̱qŒGLñ ˜;4ñ ÝÝA±ùçz¸Ž$$Þø ÓSIH´yÛvžcê, ‰VqÖ&Æ—„D|õ¯2¨d$Öiäyõ‘1%ñf»ô6f‡1%ñf,SÆ$GRo:˜vª)‰9–FR9Ò^­Þ^%%Ñ®Õniñ ³’xã|sŽPdÕÈnúî|#!‰ÛxöÞGhH¢MÁ7܆—}QSxYbñ“xƒ„¯z%&1ßIMI¼ØÁº=}7©¤$æ ‘”D¾NG$%ñª´·ç­’’x¡.¶®¹-¯jN¢Õ)›Â7$'‘û»Ÿ€ÀcN"G²ãÃ$'ñbÛ°5žOîÜgÛSß®6æ$RÈ.%_“œÄ‹Ý¯Ûí›X%'Ñ®{µ=ro‚Çú©æ$Ú]Z—3V$'ÑŽ\Èmô-ã’“ÈÓ|ýóŘ“È[gßñù'9‰ñ|V‹;\ÇœD˜Fna#A‰ö³ ³Ü¸ú…IÖl›9–&%r=ö³ÇâGƤDn“œæL¦I‰¼Z+_ü†ž[R¢f §¤D —÷ÿ›p5GÚ«x¨a Jä “J²~Q×—§v·è‘¤D~”õãÇ‘sÌJ´ÂƒÞÓ—¹$+‘7•†ã^1*1U·&%rÃwŸhRb®4IJ¤ê¶gïûsbRbn1)‘×ê°!aÛ«¨I‰T»¬<¹R’ùU;Ê·NjR"íØÊúB¸$%Zá~XSòm6’”8]lLJÌ$%Z—µc'ìaˆ1(Ѥ6û. IJ´Û¼÷àF&I‰t¡V#J2&%RˆW½{Ø”˜+M¢¡ºà)WÂKT¢Úücé镘{1‰J¼ør3PñLÕh…ö#ûˆsŒQ‰TÌS¶¾ÅL¢íHÖ‡úÈJ¢§Â˜•ÈçÑ^ÿ®-´Dîó´ùñÁ†º‰Gùâ?⩃c¿|KºÆ%^ Œ}4—h2W~9—hg<¯àÕ&q‰4*x¯ÔÇçD[&´D~•Mz×ý!-‘Zßíuò%[IKäaâÏë }’–˜?½’–x±)Äzã.™Ži‰<ÔS½IZb~Ç%-1°$-1¿1-15åH¬óû¡i‰{¹·Ï®i‰|”ʹ¸yHÌJLݵf%¦a‡f%NgŒY‰ÜyàÍæ,¥%ÚG‰YËL«yÖÅ÷õ|ÍJ䥳ÿã;i4+1L5+Q¿-*‘úd×k›ZjT"=“Rìoœ6F%¦'¨Q‰iè¨Q‰TÚno¦/hT"¹[VÁŸ<¨ëƒjWôƬÄôuÔ¬Äô-׬D.èö—š•È‘ìûþœ•øàT¸\ÝL:f%µ‡ñD ÉJÔÆ(I‰ù|’”˜úã””¨£gAÕõCnmú¼?%%æ'"I‰i®§I‰<èë±¼…1)17ÉJÌÏRÒÓç^ÓÓÊÓ¯2¬tÃMKd\gC]O<Õ´DÒNi‰VňŽfy$¯i‰1)«Ù-›(LŽVï«GmMÒêhŠ;I«£ïíäø C'iµºzÜbµGînYfiuvQmuÚ˜®Òê´AO¥Õö@0©î W¤ÕtôûRiµ½r^Ÿâª´F¾u×ú$­¶nÎÐÝáTZm#˜½ô„Ф­.(á^muR$ª¶:éÜ\[ŽQmuÒ °~pAÃåšãUÅ#qýIõÃC¶Ÿ{- °¦p;Æ`ýÀsöÁX[aÁnÚ XÓ­¹ú`S5£kd¹Åí¸#°¦yë$O€µýlËÇ<¼°Å™ Ø0a™¤Õ”Ü ìòAZM¡M)Ç|Jpµ,núS\ý¸gMµ›FG\mG(øþA[]§4|áç ¯¦´N_^^ý›fuä‰B«ó“ðê‡v¹ûnáÕ,1í¯¶B.¶¯†¼Àš®‹˜O_¯`M päÕösÖûsáÕ/¹U° ¹…W3…´ŠÙœ ¯æiØð¦{˜ ¯žŽŒ¼ši« ¸º¿·ðj;’L‹¾Ú#¼Úޤíõ.Ix5Œóæß«Yð{CW[%^]§ÑÖAmsrâTy«íûõ°Žê!°Â«™¹oØu}pýà”'«³yõÃNo:“^Mý0¤þ@«ó¢¦Ðj–J™p¶óEXÍOòiqò&°Ú~Ôî~’ ¬þP8†â\(£[n¿°Úêå.û0 X=FXÍpgEO¤\ÍÓg‘Ç®Î5#¸š§ˆÏ”{Юž.(âj^\l}ÿ„ðjž?†RN%XO·un9¬)ÄÕ·QD`ýÔ7Û~¤5Ö¬óX(Cñ\y¬íÈã:»A¸ðj~u_ºë¸àj»Ôãa‹›lD\mÚÝ÷pÁÕùŒW?(…ŸÒÛ‘V[Ù‰ ’„VO…‘VCïŽmSi5ëãìøìΑV?,åÕqÌ[aµ•k.³QX€€mžë(°z*Œ°ðȦ_w[XÍmÝÇ:Âê¼Ò/°ºüËðiXÍ9·sx° ¬¾\ÝoYXõô«UÓ†­ûïöuª:ßµ¯)«~è /‡Cʪ©õmä®+«æR¹¶n)YõTaõƒk$¶+¬F)Â~YÿÆ ¬ž #¬žn%Âêé‚"¬¾!~¥Û+¬¾Á 6R÷~¾Áêé¯WÕtÈ ¬~Ø``Ãw0XÍKÌæKwaX+N`5-’‹]Îi5m™ÉÎùA_ýÞÑ6œ…Vç6 ´:¿?B«©òsöB«Á|6,ëcöH«©~öµ5?k¡ÕéZ•VÛ‹ »¹¨Ðê›Aø:L–…WS¸<Ã#:âêéW#®¾ëˆ¨[+®Nu7ájýYÖ¬oÃCU€µû̧#°FNdsa7ü^9Î ¬ù9û w;[Öw·ÃzV€5¿w¬ÁÑ9ë¤kR`M}³û´ÓlÖ`‹eXá °ÎdQ€u®6Ö²«õþ`N]¯v½ŸÁÁ#°ÎÏQuª>Ö©Â#°Î—#È:Wž ëüüY£û»‡?· ëÔ8„XÏecŒ|ó^¯Áe;k+ÜØùïôT€µZƒè¦¢ ¬ùYk0ÝY€5?kß_; ¬ùY{u:mÀ𴧝s °¦ƒ´î¹p§®Í ŸÚn]-ÀZ«Fyuº{áÕÓeF^»9áÕÓD^‡òê«¶ùnO®¼úa]¦ €yu~V«öàôtåÕ¹,òj+dd<¬¢…W?(©z:ƒòêôýHÒj•~&^­«ó‰W§g"¼zª‚È«PXÇc-´ÝJÖSaÖ7Cê³D*°ÎÄñžo|:ß ±ßæPý—–¯ùÁ^¨íË^ ó~–úÇÿ}ÿøÎ†)¯úïþë[À83¿D½ýš„ûã¥|ÿ'µûû9Üóßý”ûV1ê{«þ_-ºç›]Ç×5°æÝ/áýï{»}´®~ þ_ß¹ÞºŸ¿ÿ‚šŸß¥ùÕªoðþù»ž¿Õ{{Ú|¸‚÷7ÿòW®äÞˆ‚ ¿á“6è:Ñxœ'_:»¨ú©¹âÿãjÿcý÷þõÓÏ_ÿöÿ¬Ëþ‡¯ÿúé_øçŸ~ë™®º0bŸ›Áüö3íßp&óãPUÊUÖß~¦3Ÿé»ô™½§þ»·™Ö9×>3Þg ;оÛGBÁù2’Zø»\KÆnE4]O,üNÑÓæ Çeã”kºœXöÛçÖW¿K[áì¹­Èe|ç¶R¯'µ½žïÛVj—¡mE.çS[ù =5X—q¸M‰¾È±!Ëb?æŽzù:Þ~ú÷ä,åà, Šìg>ËãgÙþài–‚h÷«õþ+§ÙÿØi.RWw›Ï>ˇ*ëg9óYþÆÎbsÑý^pœ?–°ò‚¯| þW=Áüå?þùO¿üò§?ÿùÏúùüåþŸÿûCßү߶}Ôz°«”ñÕÔ?Ú¿pÓgcùìÞ|A¨`‡¶®pA÷;ä¥H·,,Ïô…ÖR¹‘Qº²£¢­P(.ÂA¨Rª€ãpé‡:§•,àþ>ùÆK·*Q`'ëáJÍí´Øn-NÓÿ”q³Õä*Ó<¥ "ޏ”†Ù©'éDçîKbºÀSÖ¸B Àæ® \ [q¯ÝgÞa1}-…jž4Ýn­WVv!Ž(+®Ýæ®ÔtõŽgìÁž6…ot®¬¯Eaƒ,¬ü]í|² ƒ_cé[¾)´·fMïëô<œ‚„ã×tÏ ‡5öYÐ= …v¶2.|{d!ª‰âIìqmþ>ˆ‚YV¹ÛòtÙ ¬»WŠ]òˆÄ.ÛUæ¤3ÛãîWrð7­8&e¸ù¶F¼q÷‚]òEò‹)´´k^W“{mÙ"<9k6ëåÛ·ÊvWëÓXáµyffaŠØÁœíψ+Å^£ëh­ødoÏÝY…ƒ÷žˆ{F*G)+¾–­[¡u0=¡”-¬#œ¼¯ªì-´>ÎÍÛŠO þ°g/e T§U¤Í%–&b*å€'µV|ây/®Š p¬~²”‹†§©‰K9y±[+>±3/;C@…g]ê~„ÖU’×ZòÉÕÁ³–vÎgèÓzDÚ\Û5AÙ]öÖ’OŒ‡™[Ù—°°ÁJ÷v¬-IËj”*i ùÄÆ¬»µ9w]â&„ÙŸÔÎ'¯õÆRʰ¢p,Þ8Í é—µS–²ZC>éTšÎ{ïû1åÉúínUßžÿ~Ò嵆Œ`[|_\Ù¯@ÂxféBY ï»õÆÈ ŽkìµìQƒtÐrï«åÐY]">j Ù¼.»˜õpÖ½‡Å»¤ÿ楿…+rÜÖQ2à’íug…cUØ >ðMn5Áç±5d+Üîš ÝÎ…VØ;îZˆÒÀ»ã“eóµË°m8Ð|®Y¸@ñîøÄmmŒuŸˆ?4uå`5Ñ»c+¼Þ×·(®%,ü†oÙAXnëíÏm×Pñ²±Jmÿ±±ˆßvº—zuÞ!çÞ£·>×tyÐ÷ÚÕôÅ^¾ü=Ëc.¤/g\)ÈÏñd©À»dŽhû«aïæX€²Bëâ{pd9Qɵ.ùàÛ¸ô=u….¨3tªÂÎãëOv/?Z?áMÙêw±§ÝŒ­êÂ9ÚšËÞÂ&ä¤ÐnØ›²õö¡ñDëƒà ËÅVåöÂ^Øwy§| …7«—°tAá1ÔˆåB´âòñý\uKáX³Ff_ÏÅMríü¼Þ–ÑÊ®·Kr˵‡•Öég_qïÛ–ùY›¹ƒn©³ï”iõ ¯V}lÝ:½S¶Â­\=ú°Ô$Þ)ÛËô\Ö·¶ »…-—wÊ~HÕ ;|1ÅÊŽ‡b«wöCÜÞ)«¨‚¶fHáX¤;¤ `¹qéô^ù¨:Ã.{I$»\‡½æíQÝhn½WæJÊ9–;jn­'±_Û¶¶:Gáãcd ¯†ˆ÷1”*ˆÈNÄ~ y6½WF·‘zÐ ¯°ªs Œ»í·'Å’]éÝ2ͽ?–;jêPµáþÔF×,,õQ²hŸ¥n”RÁöQòú‚%¢v=}¨ú(ù u©†u1Aßp F³^ºY¤–*zìÝòÆ=ßî…Zž(áÈ¥/ýX!c<ï–ï=ËꦎÖÔÂ*ÓAeã–¶[Óº=뙼[¦pÒ1 Ç"¦.'ûË[°2Þ{å|`\çkÃÌG ÏCŠmkÉ«½YC¶c0ÔÊVh_­Çw<0û@™œ½„nJáPò n\Ç2îŽ!ñödƒ§Å·âX3 ±ë!TÙ1œîCåô¬vôý}¨LÃÂÍ­"÷šJ±÷¶|­CɺCûP™¶ƒ»tÛWi#FLµC[ÍuÇʽ•i’81µ-i; œ}¨œš$…CZtTÍ|×[_AÖWhÌËVî6«’5 ût=^¸À:sxv@B+Ó–¡<£ðîceø‰½†kÛ]kwt©íØ$–Mš£-Û½9´ïDô‘rj6Ä'íq´ä‡î¬í ¦pŽª´êJÏý1}¤ÌåÜC>¹¯Á‰y—k#G°î£)Û¤¿ÿÛp+¬6ç_ÝêìÊ›òF°K=ÛâLîìPÜ£¼!#Ù{Ú¬5‚6†×xéBNû"0éíxÅHü±×hóÂ.¸¢¬&-.í؈Ýûäô2nWòq¤µ•»iùwäc¨lï8s¬6SÚ·(ͯñV÷}z;N]‡Í놨 íÝí~Qö5b²µcÆwC"gc¦ Õɽ€Íë¶1V¶h»W÷\³1\Ѐåg‚Ê£•ó­”#h º‹£«/wT}¬œû,Tc¬Ì¬h°”½ªZ¶Ù !—î—²#ó£eT­ŒÛ«e…÷õ^'ÁmâºÛÌnh¹ûBß×îÞfv{,ï¨9×¼ÛÌnhh§³Ù ­–y7‡¦„²!8™d?­wÊœ’ßðÝŽ¾c –B:[o…¾c –—:nòmŽ;` –‘ÃØ¶‘4…Ï,ƒ Özoá´—|í×Í«wúÄ1VN¯%]ÿ+§¯+CÃ1V^Yïºf ‡¦n¨9.ï³Aî5ÆÊì$'þãnwry…=wggÊ2FÊ(»‡êhg®3FÊèÄíOG{)™w‘rꙎ°_Ê~•»÷Y¨µj{ÓöÑ’åU—õúöÛ­–¬Û¨j-Yöå/'»¯^•ç·íõ¬ø¿µmTAuaÞó ªùö‡¥ Šfõï–ª7æ¶5Ñü-Šœ0²rê µÛû  #¨ÞêvŒ®UPm…û=tr‚ª)´1Bl ªFlÈDªcÓˆª«ñ´>twŽP5…Ö€{†¬ j W6œú8;¢j ÝÚåPÍÒÁ…)‚Ïš#¨¦ð×¶±€€ê­n‘»{š€êºø¼}´šBkäö1x‹¢‰*E0÷¦íVMáÁF©öZMáF¸ÏÞ#­¦áäu:¨ ´šBë[í¦¨Š´ºîlnÍ*Òj´ÁÏ–8"­®›Ç _?ÐêùÈ@«)¼Nœˆ´º²eºxa Õ¯X™Ía­¹FZ] |"­¦ðÀ4Ã;‰@«kùôM¡+´šBvãn›ãå@«),×Q|«šàêé”.ÖË?xõ§Â±³žBâª:é¼z.¼º^æ6<0…WS¸‘wï=dÖµÔo'¹XS¸>ßA€u=r«›îyĺ¹ !ÖU®~ “+!Ö¯–Ýz«6ƒb] I÷<>k /¨ëåÜ9æŽluptù†;!ÖõAZu<¯b==åH¬k!ù^gb==ëH¬ká±]îN+̺PÖè@d֟ʆù@n‘YÏ…YO­¤1ë© Dfý©°3ëéB#³žH`Ö”-ì8ò¥‡È¬çÂÀ¬ka®Â¬?vfMûxîág*̺’úæ„02ëZDoyX¬çÂÀ¬§÷ 2ë¹00ë¹00ëé ŠÌzzI"³þTØ™5…é6â ȺÈÊ–/Fd=8ˆu-:¶ž¤%ĺ.Kw’bMáq힇%Àz*{Ìé/®¦Ì.¶àñÆø=$…ÖYmÄÕS·põT)WçJ‰´z>0Ðêz­¬è96´z. ¸ºjS°Çòu눫'áJÄÕsaÀÕµu«Ë¡sÀÕó‘1Gg:2àêZHX{§àWS¸‘qÜAwÀÕŸ Gœ…Œ:?¼ºÚ; )ÓqƒVS¶úä5’j-”: ; ªÛâ;|tB=ÿ}Q—Ÿ8HB],‡£‡È§kŸÖÕtðéÚêIPnóáÓé]<=ðô\ðô§Â,”ÞBÁÓùý<=<] ɦîä6ðéO…÷ºà}Á:|z. |:¿B¨çÂ@¨?ލ£ô6 ¡ž+!Ø å#¡ž ¡Îï¨êüöMù˜€¨ë1ôw£=QëÛ)„ZÞ§@§sCp:÷•§ç§§—1ÂéO…Ϻà~LOGE0=½§Lç!˜€éùȘýÄŽRÍ‚AÀô\Àô»uëÖO¦káq÷LAÓó‘L*É.¹90=•E0=5¸¦§Á4Ûqí <›/%50]¯s¸8ì dz. dzþÅ@¦káö&6¿…ƒL*)3â8Vü”LÏG2=2] ‘•´¨F!ÓÞ×Õ#…LÏ…L×Âcí`‚¦ç€¦ç€¦kávô˜KAÓsa@ÓóÏ4=àôôzE8-ïzÓÓ»À´¡túŒËÎ…|IçãŠþÍ·íýÝv†—^9øÈ¥Ù„ï~}‰K3"s›ÞL¥—2öý',]§ˆ«›X',­ó¥ÒŠ=RN©ô Ù\r¦L~p÷¬ïĤmÌzÏz÷ÂÀ¤m¼Þ=Î;1i„3ûÙUCʤmpbÕ֢¤5ï^¡tõOÀƒjïÒê¥WâZ×nÓ£Pšó{¤ÎŽJc±s“ „ •¦ðƒxº:9Ú_½â^0p½ºí‡i{2ì íâ@Ò˜\Ø¿;:“@zaVfcâÆªŽ^0!~ºÃ‚âh›À(ÏÓdÎGó{pŸ}ŽÆ8òY·{]gmepݾ8 8:9N*޶Á&ÐÅÍöH§ sÒdÝcŠÓÖóHc\i]øÒ6´+Æ7Òš×µº¬xѸn«Œ¾Hãr¹ðQqÎ4¶+6hv¡‰iÂÅïûöÆ!<:ù*^˜Ìý³«<š4ï+J™#N™¬Ê£S”£òè”›'<:Ù`*NyÍÊ£¹l¬Û§^y4w²TÁÿ[öâ蔀®8š˜kûVøDEqtŠ3V­»G"Œ&äv=íšZÓ§U=èe×F§°N…ÑùQŒÆÈÚ…»&©|Úι ÏñÎA`4az6²vË •OsµÕRË¥ÌFç§(òéáõÙsEF§<[•OW#¤µÙòªx:ÙŠªx[Ñ5è‡E<õ¯N3…Qù4…K:Gù4„ÄæÇòédW©òij` Šv‘OÃdÖ .wù4§³é»gü¨|:Y ª|šºi³°·0ʧñ¬âŸ9iŠòi,«ìs|®Pt½C–ž¿Ã(Ÿ~°»{^‰Ê§“U©Ê§qEÞM>ýTgß³/‹|Ún“‹g¨|;Sûh­Mj¤òinåØvÏ­Sù4ö¨Ø¼®Pôü³Q>‘Ø]7¶ Šòé亪òé‡ÁÚÖ`E> ìk6³èZ}Ö1lmƒ‰¨§¾ é]±ÔÓÆõ²Ô2QO?Õ~ë¼ü "êé§šÐ=gS•©zš qwz¿êiÌA· ñõtnW¢ž’¾ýA/êiê› (_zêik÷¨êé\1¢žæWQ|x/(êiÜ¿­_éËïwr$®«*ŸÎ$òi.è’}‘Os¤¯Šï²ùt²HUùt¾ ‘OçG%òéég£|«=²¼“ùôƒLü\ì¾7/ F©ê…¬úi®ö š}ÑO'ÓRÑOój‘øÑ,ùU?ýÔÑ¿½hŽ}£~šÛ$~ÆUw¢ŸV'Ð.ŸNv¾*Ÿæ-ʧ§#£|šzcѾ-¼Š|_b{¤‡Ó@‘O§§¡òéé£|óYëx}F£òéd>«òiqûUñ4ŽÖÖ—»äEÅÓãèÓðí4}àm]Ž‹E<<‹U<»#{I—p:¿w*žæÈcl[Qù4/é;­êD=¾ªžæ‰¨üȨž¦!ˆÓe¥Q=ªGÄÓÕ¥Ûµ¦Óñt®;OSHàK3åsõ´ý5±¾UXÕÓÜß>v©zš[`;°3í¨žNã QO§™ª§¹k¥žì êiºû¹˜V[ï·¸¨L¼>˜ú­Ð!ŽVi@ V™UˆÕGžŠÝGê¡ÄîCz=5ûxØÛv«ŠÙGþM1ûHC©nöÁÐÕÞæÓ9ÄìãA‰8v"ªÝ‡öêöÁ{Iô›{@D· ªÙ÷·Œ=jö‘"^Ôì#Ï Åì#zÔî#ŠÝGnTb÷;‚f»JIì>2»+´gfŸÄ–£ÛÇt`tûx˜“œÃÐEÜ>ìõØïà< nÓ‘ÑíƒsžgÏkU·ÌùÄívtsŠèöÁK÷ qûà@ë¯ÜnXÝ>2?»Ž´«^ÑO×˼ƒ+ø}䪿+<¯àÌí>2«ŒvÜÛ'ŽòiÜìÞÝO;k¦ëÁ´Aü>8%;|ú.~ÜFâ¾ãE ?r(±NOJ ?&\+Äúb•¬GkªáÇtd"Ö÷vôP{5üHó¦D¬OBÖÎÃW”Y'ú¡Ì:Q>eÖ:9JÌZÓ§”YcÓiZò)³ÖL«Ä¬Ó‘ʬó‘άñ°7ÎÕ£™Y@¯D¬3ÎbmÃ<{ikm< Y3{¼ÏHÈZ›zbÖúz%f–2kí³ÖhbÖùœÂ¬1êrg°„¬m<ƾžÍA¸ k,ÎJO}NÈ:=iAÖ2MÈÚ¾š×كϲVÓÍ„¬Ùv½lÝAÁ‘µj²ViCBÖR/J¬o20N÷MTbÝ0¸KÄZç‰X§ÚVb­â…D¬Ók¬ÄZÇ´ YkW–µF­&d­]YBÖé•TdMCé¦P‰Xë·5ë|=B¬µÇNÄ: ±Ö)a"Ö©j…Xk‡uê%Y§ÛTd:»Ž¬SûPf­³ˆÄ¬ÓãPf/42ë|‡Â¬u”¨µ¯µÖQd¢Öéö•Zë(2aëô³Š­s¡`ëô<[ç#[ç ljH±uz/[§OºrëôMSp>> ®ó}FpîDÁuz\§g­à:}˜Îéëùm–kÐXÿ>{ê‚Ù îaÄÕ'÷\œV)³ÖueÖ;°ûìáȬ'V™õ$WŠÌ:çk ³ÆÔyÇ–ÑE¶âOýFÞ·S aÖ7#™ms“'eÖ6»Â“½n…Y“áIÌ×'5…V})R™5ÎÇĺ>H˜5ÓÚkë©Ê¬ObÓïž÷¨ÐšÉ;_¡n^¡5“÷6Ýx„@ë•í9v­ ´ÎVÃZ㌎¾»^Gl ò° ªÛr)¶Æ½÷ 2KÁÖpgÛ¶[Ûkhc‹«·ÁÖ¼ù÷À*¶¶•d7SnÍ‘[à‘[p%V°žÎ¦¸Î6’®±í¼Çú§‚kýž±2¦àëÊuDe+¸Æôë‘ ®éqõ>[§Ú\'«"×|ö»á¶€k áÞ=!×ɰJÉ5 rÉJ®w¨ñsw¤+äšÂuÄò)¹æ ºÙ_ùÔ[ÈõNg€.Á¼#¹¶#9A_4‹äz?+r%AÍ˹Þë¨}lrÍ8!îÄrm# –N}5×ÉõN˺ì1¶ö!äšÁR†Íip$×dÐíöñ]‚®‰g±Èþ"ºÞi6KñÜA×œâ²ælµøFtÍ=,÷Õ_A× ¾lœÝ×o]ïdØãøÿ¼KÏ-¹‘]çúßШœI&ó15`7`°kfÈn°KÐÀÿŽÅgìÈ£VÕU×'4з.ož|1Éà⎹³.A×ÔN» ®Zì~:1{p}“ü ßz9˜cF&àúÀÝeeÜ)¸¦B|º¯á2¨àÚ~vwIøÂ­RflÁPÞ®Ô¤o/²?ÁÖT)tYÊ­­ñt¹ZŠ­Z {¤7¶®•î>‡dÜck$‡ÑV¾µQ°57¹éP°õ?…ÍìcŒØšƒìׇ/¡bëüC÷ìÁu|® ç:g®éòÞâHÀ5b¢1¢×väîMƒ\H ½±‡×Ž(+»OÁõÉÊTtͧeçŸÉ1‚®yÁØc¡ëø€]Øu9û(A×\­õëa/äš®ì4„\ÇÎ#äš•Ûã V¹>°ƒ´0b¨„\  WÎBÈõAŠ«œÞí<¹¦TEvÓž\sÊ;Ù÷”>ë£ÆrK äú õöYé#\¿þÖ“k†|eÓ \¿ÎåÁ5¿jëÑi‘çɵM2ÏuÚ?bqO®íW-žã¼pk;.Ù}z4½¸u¼R­Ãozh͵Ø5Ï;ÖñÑ´>H~¼–o¾@k®Æ¾©Q`U¡µÓ–^×6ŒÃZƒ(á4A¹‡Ö<’Wƒ@ëL`šPR¡5÷Ù×£qAk~ÖbÁg*°=´Î<®< /³&”8ÜjÁ3knd|·ÿ72k8ʱÍÝfeÖ!Ç5 ‘ kŸ!>u¸?EÖ|O·=á!\dÍ/úoAÖa>d©zþ,:"ÈÚmª:ö4Ì:<²¶Øo;O·¬9÷aâ‰u&í²ìç0}b^†ëÌØs/h Ä:^¬ë ¤Ê¼Ê ²Îc#fM#{Ý6K™5}„êJ“ ³~56fÍO5†¡éf͜²?p¡±"”m°uO­ùŽ÷kmÍxjÍ÷܃Z‡±L©5ÐÓºáôîjMØÜ¥{ß+´fà‘E£@kl*VmÃÌÂCë0ð*´s‹BëZ’ûœ r ­y²¸Í¥åC½ u˜ç•Z ¤<=o…ZsµÖÍçfPkŽÄü}êÉ=µ&B8ì8b÷ÔšŸ½¬÷\¼@ê¨5mT±ŽUˆµýÇe]gû`Rìnl\°fܹÒÜ÷Àš³m÷Ú^`M»ÒÚÐ`)òxÎú0 ¬ÉÈ-Cïí€5Sà¾OÖ¦À:>25¨Þ¨/g!ég{`Öqp`Í9·}mh °fØz Ï&Ö Û5Õ4 ¬é °”aW$À:õ¬yBöFœ¤À:>wÖ!¼P`Í‘éÇÃLÚk–ƒ¨ç°!kî3¯Êm¬ãè)À:þªë8½°~™[),8RãZ? ¬·Ö6…]äß#g{ë ÍÅv΀Põ†ÜwÖ&d]·71¿´V‘õCPµê‚)²JEÖ*bWd„´ZQ‘ÄýåäTÖA»+*^—Õª²fé‹wGk5 oûÒ¨Š(kM¼ kJ—:X©kìc6[N ô."ë V‘µfÞ‘u|AdÒ~Ͼˆ¬¹­\¦°Ê¬%},¨¬C"­ª¬1MXe¦È:܂Ь)2dñàÐeªÈš(œ¥à°°*ky¢²¶WdHeÖvJlOÎqJ‘YkþdY[èržÇ$1*³†_Ø4D>*³¶Ÿµu×LnP™5产_²Ê¬C¿R™µÝ§ ¾Ó)UeÖA.¯:k*ÅØúf¨ZUgMÌymÓ¡VuÖšÌtÖñœ¢³É·ª³ŽGŠÎZrhUf ñ*ÁdÖ™dš~Á*³Öîª*k :¸ŠQCe ù[m‚Ê:Üžª¬C2ʬÃk™ux.*³Öç¢*ëxFQYÇ3ŠÊ:Þˆ¨¬Ã#P•5¸ƒD¯ôö¡‘²™ÓgUUÖú2TdîDEÖ±QDÖ¡#«È:ÊÆyµ.“T^>•W‡GªòjýRU]Ï(êêpJUW뀭âj=£j«C›H«Ã»Piux*­ŽGŠ´:œR”ÕñWEY­N…ÕaPa5•·\‰ßèâ-ÌTXm½¯<÷,- Âj]Ïaµ^«êªã¯ö3åY÷Ç0õ?è]ÝÈÜÍfòy53QYXÕWŸh‚®ýµ¾š9þºç³ž lܶ$ŸÑèÏiµPU¥Õn³ÎÞYxuÓ…WßXZGÖ«meb›«òèyõMqDï¨íyõͺԾÁmtÏ«oªØåi7,¸ú¦z—² ®¾ÙLµ^°W#N'‘ëþPVq·5+Øf&t ¯¾ -.˜ÙójkÜÑ^Ó¬m…|S¿ ¼úb5s{¡Õ¶$Ïd‚O‡O«¯ji³î¬Ðê«ÆTij1…V_ÖÍí£Ø‚Òj~UÀÇ ­†-ØkZhÝÓê e”…ÌÞRhõùÔ;åu•V_8ñ;»q¡ÕóQ‚=¬¶SÚTú^²²ê…¼ ’ «¶mú9§¦LXõÉL¶åéà+¬ú$CΞAþPS‘Æ'åõy«>OV‰Û>vl„U®°L‘¸gÕ§E—é>Ç6¬ÀêÓFžt­ú «iÄ `ˆÿVŸX%`¹= C<­†À±èíÖUÛÏÙ"iŸN±Âªzh‚ÊC“áÙ‘ ª>ël¼’H<ª&}ºà,5)¨¾x«œ°ê“a,­ZbªOVΡۣjkc¨^VÜUs=Š傪mRµ_ÉsoQP5³êí ·UCPé¸ã‹V}ba]‹^}ÐUÛ,¿îú¨ÂªO¼ÁK5Ø•UsA›3cVmG>ÅÛf{VÍÏÒçFGV³O„UÛÕf´ÅC®èY5!Iq¡ÀêØ(°šÿ ~n»«yzäÊ áù€Õö÷ÜßÑ}$Vs$ÞnXuatpn2ªO|êsºgÕ\¦…½C¾£¬š_=VB»²jk<¯•¬°šFÊ;•«Àjk¼‘d÷Kõ¬º°3ï„UsÜ6²’•TóBOgZ!¤ºPè\O„Tsdö&žT[ãpz|Y‚ªírÈuš¦A‚ª #²sÓT]Ƚwf1‚ªíÈãqfª u²SS°ê×m:VM¨oOjˆ.V(b¢KeÕ…ÐcŠP”Usàáük«æ›½—¡§²j³°ÖSаê‚U̲hTÍBfs&äU<¶§}¦’jºg$.¤º.¼³µ'ÕW0g&¤š#7gS&¤º`uè$ BªëælÏU[ã½³µgÕ±ÇòUV]° .³ŠªÃóRM› 3-IHµ±œS»¤ ºT åܲ=¨¶_¥ªøð¥TP]@ö›e<OªùYûßœx=©æ$g!)¤º›è8•jÞÈáÖ¨BªÉ¿Ct|nJªùÅä°˜jn¢,;%ÕtºcÉk”T³Ð¿ùô¤š÷¿ýò»²bý³ËsR]êúmª.(¥–é‘¢êДU‡ôCeÕô¹}Ù»+«~5zVÍ3H1uúëY5½n_Þ¤‚ªc›'Õ±{©ŽO]HuÈÀTRÍ‘›ÃªBª_GzRMÖ' ¾iLâHu©ï5Bª‰¦¶µŸ¯¤šÆâ|ÈUÇnÙQ5Ç 8™ñ‚ªãe ªhµ¿õŒš«ßgóŒ:~k¨íaÖékH0…Q¿.ÞCêu!žNô=³{: o»Ü¾Ðik<÷%*S:]Ð19T(tšsZß½‡„Rè4âqÆôB§ äÎÖã#ÝBð4ñõÆäx|p¬¦±,Q‰âi}ÜãG=œ¶ˆ¡ñˆ(NsÜuÍ5·Òé‚(Ñ^Í5Žtxº°?ƒRt¨›=ž.5ö²ÕA×Ú ž.p¼mŸÕÿOsJ‹ÃFbáÄÓuÍbµ ˧9_™¾Q<]ˆeì|c<<ŸÇÓ!?Zñôëz<ž.dGÚ:þéÝ_ø´5ÚjÐfñ烘º`æmóíÐÙ Ÿæbm½825Ps›ÉŽU„PÛ”ǾÔ}ðY¨ã3DÍÕ¨ð'÷R—í6ÀBÔ$‰Ó4½°=£æÈ'Íbmʨ¹•ƒ—a*â!u!ú´¹`ä2 ¤ŽR 5VvèÛ1j~ÔN8AÔ|°SÌŒ+AÔ…BëÖCº…2êømU)5H6ßÛM}ý¨iõŸ·}ýÓïÐõ~¥\9óIÙÑöÅðǶ榽þéÿ…Û‹~¾D±þ–bûã¥ü€ÿ•'µûûÙÝówœ²ßgý#(‘€·ÙÈÿæg'jŸçoÿñ½W0^îxÏß|ÿó…×?•ššØv¾çÙ÷³·?ëù{ýùۯýù_¸Lµm"J5 €5c~Aôcó‡]οÿC7Äÿýþõ{R‚¾þðó×ÿ7Û¿ýú_øO¿ûø…§± c»cßo,VþÎyöcžÈ®ëןÊbRžNXÚ/?ÕOõmCÆ· Sá3ýþ¡±¸]Ïo› l¥IÏæ/C¿ézØ¢ùG¯ëñß4tÂËußéz]Žoûæþ™]©êoé+œ=ö¹Œoî+õzB_ÑëùÞ¾R‡ í+r9ŸúÊ/˜|Ž»üd‹tÕœ¬®  ë±÷H½}•œ|Âir]¨oXº=ïÓ<ã4é=Ï–ÙrÜŸºó·Ïsüƒç¹€dožÚ<ÍOówNaoÔVÜö£×—-=A[bŸ,œÿWÿþùýüóÿò—?þéOúãÏýË_ÿÏ_ÿ¡éôë—å…ê¼"“­ò:œ]N0sÙ²l%ò–ýö œj^\Òær4¯ÃV{Ǭ­RRrÊ|ñ¶Ua§¤ìÄ+¥Û"áFìkuú”Ì®ó9“J:Å¢² jãíT&;¢´Ç–Ùw]Þ•¼9%ÉV~*åxF¡Ë‚aÒpjžŸ5Ñ·¡ÁÞÔ+Åòg¿rk;œðc£ä½=¯¾¬/ù\ê¼ó©mÞ¹G!‡{Ðiì›Ñãjž%Ñ þ6Ké®x¢Ü?3j¢ºéÝhQ©Y]ܼ¹U.ÝúÔ’Rà5ŽÚ¹0Ùí,±{6\Ž”ÉÂ6û`½˜Ån«¢}a'}ÐÜ™ Õ¿úuÂ?¯åâ=‘úµõýR- £Aß|.•r~‘ŒôÀ^cûÛ²ÔT²£‡p¶°%Ø*Õ+ìVì·«gOt Ržd>ésí ÙÚ,”ŠB»õÏ-µÓiÉØžJ•¶«góxòLtbç½Íšr·—O/»·«\ýjÎkm׳•®klåd(ƒ<>lÏfûørûÑk[[î»èËR  øð©«”²M²Qlª˜ûæØÆv[ÇÖVÖö8Íèëž½=PÖÖ8¥ì“õ†î}Q®Ûíro(4€ã[{¦ìî«çîø¸Œ»`ûs 9z Þã3ºóÚ¦Ø6>gõ÷±¶œ)ÂaƒúÕU^å>צ2•mí]°uÔÚîµmÌ–^²qk| ÷ã6†÷ ¦&+<þ¸ÚJ±ki?ŠfwÌçñbêê–Ò#É>ôspºMZúº ßC‹Uª¯nGÿT ¿l¾„"`sèÅ(v«ˆõúOJÁ¡çm mÒv;¦lÙklZ´“bfsÜ¥’:…7»ËÎã¶=)Q¿-÷‘s»ÖÖ&µÒË2Ò8ù¼ç°ËBò,lCÔÛ°ifmP>U²3MkO¶µÖ¨kÝã¦??gkÌk›‘[ĸøh]é´ùlí&¢ÆLÇTéØÐì6 )4oc˜}íZ÷Ûm î¼èë°à£iÚÚ÷cô\†9gÚÝÖž=È-³ÕºÌiÚÚ½ÛPåÇÚÚ³KÅmÐ….pÚ„¶váìj/*Úæ»y»¶©pcòimÏÚJ£šœ3ßµï|m–Ù ¹ØeÛû ÉÙí‡Ù«;…ž&°‡\·¼6ü€Ê°Ú?óé6µè^ö°Ù.j×Ú¾âΘ~*O¬?æ[Ãy™iÍæ‡î6“–&Ì>Ù¾˜ÛMñ²M1FàxëlD¬8žòrÛBu{;Ÿñ`Žgíý`M}ØqŸíŒ•¬Ïà syÛN8çnÎyŸnØ'ûàçêÅÊ¥ÙÏRÖV ·apØ]Û2Ám¶l@þ´J¥'t~[±Þ*6c·ûÁÏO£eÏvžÉmŠØ³Ù(KTroÌnßccGõžEÎjl¼¢‡³‹ÿZÛé6/6¦5uî6Íœç½ö'âWŒØsÂñ_ûÚe@ïmË»;©ë³’’Ðåñkÿ¥ m«(ýc‚öQÜòï騴A‰Ý7=bp ôuÛ3\c!¾vhWIpžâ®¼bëªw~?['{—Å-ÃMSbk‘˽X­”}k·Mcë›Tªe ª±5ŒU"Ý]•ØåXÞn[£ {l@¾—\YQ k<=\Gí‡D×uË1£êhß«„׃Þ8owŽÄןŒuÝC\l–&hïn˜%v4Ö•Ûú*õùfár‰±w»i[Kμz ²w„Ô,ÏÆBÊGÙŒÝ6ëõFÙ ]NŽj=ȶðì§Z15HÚEÙ{âß ¸î± ³w¤éw­ y¾âl–e·ýÁ:Úñ ´÷Dô|Ø8p½m‚­ã&=çLïH;1[ÿÉý$ÒÞY|/½™DÚ;i ¯<}•ä#mV‰Û½vê%Ô¶wÅXÊ6äýе }OÜ˶&ý“`›SÚ•–Ôɶ÷ÁÕV_½Þ½†Ûˆ„Îëõh^áöÇ” Ú¯p›h† <"Ooï6Hmv6Þ¯€ûµ>ów]á ¥¹ëâàZÞÂsó2ãýžÓ;获âƒnBa{¹—¼– ›SdM¶Ø¤Ý5¤8ßÞWZ¯¨Û¯0CÔÎ{–Ú}EݧU×Ó¾€u_6Ì AÙ+êN®ŒÂ+îÞ‘¨w1î+î¶?O[Ùw3_hUÃnTзÝd)ŸÂîskÂúa7IF6rö/9†Ý÷¹|æBØË ½Ó¾à…Ý$Å -²†Ý¦E8¥‡,v[§9vŠ:ìÂnU) Ò_þ;ì¶÷‘º84„Ýa‘ a·Ý 1·¶ô a7¶å9ãîÃåiÜñÖsl}ÁânæÄ qfŒ»-¸²ýŇÀÛ¢ êøtÚ¥7R û±¿L1 5Bd ¼ÃBOoJ¤?3!#Þç¾ü†%ðh‘-Ú³ BÜàæ9Èö{ÅݯÀSo é¯zAïÀÛMûÙÞ¥$ìV*"aw\u‡¸ÛËåy(qw…}$þ>Çý ¼kÿßG•Ƚg¹JàMÍVlº>?ÅÝ‚¼³w»ùL’ž}1ân|‡=ÄÝ °4îΤc5çÃnê£Yÿe–YQ·|)?ZWþW¦‘¾Âoë)p¯ð;#NZN~[#ú§Q€%„ߌ$äï÷;þf$±ñ-Ï(ÚÇß„ÔÏÊ Ôø;1eصwÃ7‰¿k©{†gœÓÅß¶FÀUvň>þ&¯õ:ÚþŠ¿9•AOêÒð›ùÙ‚€ÞÛ5úµÚ5úî}}©ÁãF$ú¾X®yB£ïšüàžD߆“1*øhô}ó©l¥W?Õàûäbm†íNz|“3ß¿?îZ¨(åýî>°|_L•ËèVƒo iØ¥^WSÂè;d«hôuè‘W`â£o2‚ž+ x¾C ¾9ó¶ŠÜkø]w°V)w ¿OÌ ¶Y(BÃïLÎbšš} ¿C ¿íé[`\îñQJüðð™Éú~“‹5û±=ïðwqr:ûRA¢ïàÓªÑ÷4Ø^ô\øè;GYL:‚L‰¾ùZ¨t6v”$úá}Œ.÷3Âk{ׯúýUÊ;öfD K¤? ½3æìÊK ¼íãÄègÔ~RÚMÆ’uŠ”ûÀ›•›Md#™¾GÞ‰¾h£_OHÑÈ;Ì3™G"o[%îL.{ßV‘ÈÛÆÉËÆÇ± ‘7‹` Ñ( ^Þ¡7¹½;»=ôÐÛF¿‡Á«3f ½,Ìgz‡Þ¼{&‹pŽwð‰m%pç7ófd—j“VG÷}§úð¯Ò{±Dß,À·î–wø°–¾Ó†~“Úqm¼âû~“üjø]óVb¯†ß„¾[™u4üÞ©y±j[iø½“V]ÞÑ·b³)66|'’ÐM—WìͪõÀ{þj_ŽßiÉÁ5øfÛõõ>‚å|“×Aº{C{óÒœÁ‚ÄÞvo¶ò²pìSìÍöFô{ÛÃ~ÈÝJú{s~{Ø=Hbo6»[ßwS꽓$þÌ 4¾í‚ìF†%uŒ¾¡s˜õ^.Ñ7»%¸7÷Súà»vrô‘3Jü]¿t{c5àö$1¿Ê‡|gÒ¹±¬×œ>õ̬=¿, ä›lœkVâSò ƒ"pèÏsJë§6-§w¾SS/m}¨–<2”SC!Û·Ò7T|¾3®¤%ß·e¥u¶kx’Ñ·U<5¹ÊˆÁwû÷¿&gœ´_ÍÕ_âN´MÏš—¸$³xS¤ŠKÔ6CÅ%ÙâiVÜSqÉIµT{_c¶qÉI2u²oãüÀ ÀN›&K‡PÀCv·p¨±M‡ÝKNù7ÎÐæûykKªS¾=ûpË;§¦P>îó#ÿf£èx÷×ܦă”›¡Q<¡n`««G„Ûme»ð1UhÎèoã(,Þ8Im$Mwv,xª½ÿ°«Íïœ2«§EÅ=“K#pë1‡÷¤üŽÀùäí½”!|'w¥ŽœÞia›w¾³ü ¡ªë|Î{_³ iàßT rû E©˜‡šÄGàöõ^8ÀôÎ*8ž>i¦Úü5ªñ¾àQü½aµ0üVþ¦R¬ýD“$†ð{C͹-á‹âoë›èŒ_¿mP·Nµmç± ƒáfOu»÷ë€CÕŸ{_È]ð·²A À±o"q|< ÀÙêÂh{ŽwÎÏ>«&cÀß$³­R^[ЕN»‘A¸} Ž€À–aSÅ£ø;k¿±9²Q ßˆÒoûw”qêÉa]sBE‹U†)ÐoJ™â#õ ÁAf„sŸð–>K)úÆ÷æÀ‰à-8nQQ ïðû!ê»±Ë+ú†Q± —‡Åsï°ïcoH#~8ý2{ÄeÖAöçxÛO’a?:‹Rï&±} »)¡m³Ñs~Šº‰+Ø y†¼ÅEÝb»ða•!A7¢µçšÞs£1"ù6õ¨Ë‡Üvü±o³IÀý:,¨L,À¿m¤<^72yÀRh· Ë©cÀÍ-X°[Ú_ñ6èˆrOûÞ{¸ @L«ø„Ûñ͹h›>t%rdßÁ6šÁ;Ï<_‰µù$lx¿Çâ·Úïíž›uJ¹U «Ï;ЦFéý}ÛLâìj÷@Öz×:ˆ¸©ì9Z%Ê~ÀyPÄMtÎ[ø[!÷žV7Ֆ䟆ÌG× ˆúë‚kŒKóuöŠ˜** OYÐ6W”G¦èKRâF5%Û8Møž¼ÉvМ*ØFeÃÐý¼Áv4\TÛ†ýÆ0 Q®]­ J ‡«ö_ýq†ü–ªó¯x»»êNöЧìŸ6~x6Õ?3‘xÚ⃓Ué¶¿Ã郤aì¾òý§žµÛÜ€Óp;¾»qw8½£^iÖN£»¦S·(Õ&ýšñãN³ÉöØõ—’^á4TOfHzÞÑ4Æ‹¦>(i4½áÀDÖ£‰¦7Ê‘cÂÖeÄM³~ØüÖ½¤4š¶‹µϦâ»h:ÊÃ%˜û“ª×Ö=Ú¨×>°ºó›f£²Í¬|:wt¡4ûŽVû+’®¡E~ŽÔ×]­¤·¢Ö«ìÐøÃh‚jHxNÔÚáˆZCžÞÑ| F5‰¡Ÿj4d¦¿WC³M¦K¬Dª„Ì"ÕVùJµUï§RmêìlÛ`Û^ª­‚Þ=[pmÊxÞÁ3oüÈž±³NúªÔ¾ñ°ÞF½/UjŸ(¼»™¥ê´£Ûë´Ùp[%A§í§[ÕiãLuž{wj”˜Ùz µ‡ÕŠ™™@NGD¥m‡íh¿7¯Òzp1ÓæÜ4%b>0Òš¦(23Ë^ç\.jÄœð*™Þ 1Û…âg ‰˜íËs>š1ïÈìÎa½§sB'‘ï¾®ÕˆùàÙ¤)%—y£Þí=Õ'2ïµ×ÛlYÞ!s¸N ™ñ.²qïa¸ ™©½DIê¾ã¤sC÷¹¿.ó^J×%bfV,ii¨}ÄŒu›.ŸoeöMb…]èó!dÆ„Ìæà½«§$b¾ŸêÒ›‡X"f-â²7}ÄŒBæFxŠ˜í˜{¬{ÄÌÖüÁí ¨_¿æ"f*I>÷ ´]Ä|#xæö´DÌ76™‹œIÄ|c5ˆIÉ€È.b~ý¦‹˜yV>áùM¦9ÍÔñ!`ÆÁÓ¦ãsÅ.`¾1¥=òÞ4$`¶‘åda?¸³‹—o¼Z¯Eä+^¾±5Ä;«A šqœ[È0ÍHéóب9|‘1j¶Yzû}ÔÌ¥ÜlIôR¢æÐ)}ÔŒ­3n@wÿ|Ô|ã•I7{Ív…R(1hf$&M  Z%h¾qK<§~°Íö…Z4Ðüo^ù2Í6üØŸ391höÜ4ŸÜÔµ «/ šµdµÍDˆô§ÁB%hÞÀºynÕIÐçl3Û ÆÐé莕!dÆ%о‚š ‹òkCá#f^• ùÖ_ß›5«õþir)}MÓZ ˜oü”RØÇËXhX×­¯pùÆ)lÙÙJ´œÐWÙ³åõ$Z¦q¿ì½öwá£eÚ0²by,óöŸc9ÝJ´\1×,”×4^6)ÜÇËõŒÉÞÊNø€ù}`˜k?,6*÷Í ™iÌ”NÊ =ÓÆ{¸†ÔÙ͵q®¹â£æw£ ›iÄ3o˜}KÜ\«€ð|&ªfPÿ$þx=;㣿ïµþEzÏ4Z°çò]ôüzz>|®O|_ŽÅ?ÓX˜’óùÇû‚\McÝñ™±>‚¦‘¸iø'JÐózþŽDÑTÀø¥Œ…º£9²—8íãhΙŸå³-ô»ÑEÒ4Ú'¹Ï0Û…Ò¯;ñ±t=0­d‰f¥DbšEß3Ø÷Á4w°YÚó\,][Àgc[ÊÓ42;ÜcžõÑ4'|¨›ÞkK8Í‘6íÜÇLktñ4³÷r ¨ëk‚$÷Ž,5ö_#ÃDBj/bê7$¦æÛÃ}öîÌ'A56$îcJ¢êúöÁ-´û°š#ObŒkH¬]\ý>§ ¬ùÙÆÅni+5n)Ù˜ÑEÆ>²®²^ïº| ­k£ýiê|lýntÁu}îçr¡—èº>Úlë¾ëCÚc}@7 ùyé¯_§ô6˜Ó2Ò#ìúج'M¼ïClžøóÖ€Ô~Úsdsà¿ú&…²_WâÃì×ýù8ûõª| ýzl>Ò®W»³Ú¿kׯØn¾.Áöëg}´ýntñöë ù€»~Ϫ\ ÷ë!ø»ŽVøŒ«¹kc¹ì¥÷žîƒî÷‘.ì®ã  '~IÜýnt÷ûg]ä¿<z×á9¹ÄÈ»wØ/Á–8ÿ0¼Uñ¡ßcî7Uó ¸Õ¶š?¨@NF¼©F*‹}Úi¶üRاOLø–aW‡ÿLÎr‡–j1‚–jÖ—ZŒå¦bkjj, ô—ÅÈ £,Ÿ°5‹A ­’Rlý O²«-Ft+í…»TŽè0â·‡^ÔÚIùÕa„µ,vp¹[…7²®Kˆë…Ãûm~ØGvµ*@ ȼX ‘é‹„¹Â‚·©Þ›üW©}8ôÙÁfD]BâŒÍ©6s—W’1„\??U›œ­oü‹þ#âzAךç­èúYÏ/†àq3Cе ZÑ~C„$­È®}’·²ëà¿"ìú¤vCY²lo3ìj„]ËΡÞq'@ÈupQr­?)èš±6Q 2¿bnö+òú*Xеõ¶£ç9_w€v]+7vè:,ò]+má6ð>Öæ”¶9'ÛOyHGºÞȯ_C ¶9Œì—‘sçƒíˆú]ÃRPmœo{‘à?$‘6˜‰L¶éXâÑu ™>Ά†!dû ö¸¹Sœù ’ÉÃ;?8‹ðÉÉÏ=SÓ‡Øö›¶|:ÆZÁØV ·¶°â5ƒiný¸ô’_έ3éív£}5$Üú"QdÖa‹ÜZØoàÖÂi}` :ÝËÅÕw‹z§Þ‡Õv˜ é3¼éغNCg™y*ô°K›Vñ[ÛÜjqr‡°‚­íMÀNç­ë[ZK1ÁÖTy± ¥Š­/·E}[D„áZ[Ÿ(‰ì±ÝïXš¾ŒùNOP¡G¢ô^Wn]JÕl´W m×Y²[Ùp-Ú‡ÑÔ|9ž™©-Q´ÝYÉ>ˆæ[%}lkùúÆu œþ›B®Ofm›O÷·ƒukåÀþ\„\ŸÑ™×£äš²ެbÑ{„­$®í1 .ø”ÇÈ0Å™s¿õ«„·ö8ÛßZþõȯ†\ùS+”JkûÓoî3þéB~c7Ìqw?»;þ­OyS¬ßèüs½ìßü ¨9.`üù[Ï¢V0þã;¯ ?ööæç¤µ6ü¦wÐ.`üù[Ïß{¿€ñ®à¸tßÌ6ÜWû2̱,l´á ðð¯ëžê*xÃðÇzÉ/>ÓËüœ©–-³uô ÷ýÅg:¿Íx|•k€þ»Ì”¿s¬tw™–'þ·M ”<Ã*Úâƒ/BÚ¾çjŽã§¶-p]¯«ñmß4mæjÍ.K¸ßô­Ýr[sÈ·ôÎúˆ\Ä÷ö‘z5ÚGôj¾µ´AÂ÷¹˜O}äÌ5g­Ç^‹‹f—I¿,ìÇÿí_Ñf\OCPÞ2Î)f÷¯i3ÎS+R¸ñJé_Õf<œg£Î .ǽÿbŸñ÷qfø‰ÕÝþÙõ÷¬ÞùŸ-;ïÎ鮯ÿ“Za |K]ò/<¶¸)ï’¶xü}ŸYÿé?þçÿò_ÿÛëz0Џûõü½'@ùÌí°ÓV… ©ì´ŸÏýÿ5ôó›|Ð9ÌÖ¦kœ÷åÌ2Æòy¼fÁŒH-5ŸêB xnÇÙÇðu´âêÃÑÖÏ?% à?£‚ámŸà<Žº£ù¡øãý´Jνé!€0ËÈ%´õóYÛN|2m½_˜ áè-Û+GKi mÝþU‹µî§õ¼ÔŽÃÖ»×ï£ÜîZiLvÞºWBaѯZÄ×ÖÊië·ÐŒOáç¤0Ž 5%êA¯òy~åZ¤+¥q-Tk£¼˜]ìUŠ IG]5'Š] ë˜M¾°œiÐ>QZà:𔬖oNûØè§mn¾¼Î‡Œ¢ò®…ç2ç3áTϵ(ZFõÉþ3Q3vzÒrö½]çõÓœè)É‹‚·‰-hzè-¤"R†ÛþÉ¥í'ﺛƒ“am³Gtú,•/o[ÏYw  ²hý¤÷Y2öNöo®ÞÆf_ë³á;hû@½ÏÒ1R9Zªùé}–z$ù´Ï¶îfë'­ÏRëó²î¿WàœH^ ›ûƒÂï} % 1–o‹Ç&^Kdh ¾]ïý¢ŠuûÎI >Æ@ž ™³e¬¨ìÏØÓÙƒ”ðZùèmì§¶{œÞa"ïìÊA–fcÏ•zæ+seÆzOÛÏ{­~<:,iÀd×'WwÛσ,m÷9ÙÓ»&åý¬{´½Ã’"|Ž(%ï|}½PüØœ›ûe’/5Y& TQLÖ6‹/î1È2¼Ý#–¦{­Ùç°Á­Î ¹|!2 {®§¿aò— )·+OèÜëï8åµ#\ŸâÅTå¹Ú/—Õ]7ýšÜÝ‚Sj€îŠ­:5ÚmaĔƋBå´õn[lh¨;ɽ»†Ÿäk.«»úš(¬>GX’ÄmÕЦ#û2õF%û9·–Tiçú©½Ãú¯ Ã’2GXvBm€o£a¶õÐ>CÙE’…Y{?ïØcuXß)subVï=[u•ÕaýçˆùÙ ey}ÖUŽ–×3²ˆ9Â’²Ø$8½mîx¿Î‡dbް„cýk¡ {º1²۶¥žYÏÌPVGìð67ÀVÏÒV0 (-Êê°;_néÀÅu[=éÃsöãöšp^ošš3MYd–43’eëÛž{bÎÇ‹®€müÜQ7Þ[Ÿ½kMƒ½[ÒÒvÏH–rÖUí|…µxï³úqs£3’ÕYºvÉòg[c”ÞF é†Xg‡ÖFê9Æ^¸õt Û]nŒMhªšÝC ¨n7ÆÚ|4Vt¹Âñ²º¬<¥JVˆÌ·ûùÜa ögË… äiRoʺ]›aå8T*e…r%|Ÿs„•±«àí6XjÒ“óV# ÚžÅê `Ó‡W¹ê©=-[\•Ã:Æþt­*Ë·÷zòŸÙÌíXÁ…¿{3ߤ²uí^æ±¥Õy¹¶û±6ÌS±c†°…`;ÆwU&2{¬[ògR»VKžŸ½ú«‡oÍ »Ñæ¦=«›ºŽ6”Ü×R—ÔÕ9)>­|OFá5CØÂœi¯»Oä-ΖçmèÄŽÞöÌ –ÏåÆ;´ y6›\3ˆ­:ÆÜ´’¹Ö¬›œ€B{S iQnÕ«t¸E½“2D7¶˜ei3:,¡ÃLY§mNʼžíÙílUëœñµ™!ìÀÈÃ-;?¼É1ÀÞ *9\í8æÑ1ÂBÖÚª¾ïª˜ÞaÑöŒö¬Ú#[Ñ´®‚ßÒ a¡R¸IöÙªêP''8ÁAw\t×âÔßBmƒ´Ì1ö"ˆ=-š(_MõÂØ½“YÐ$:¿iUž€zëZ¾‘„׆' ÀÆýÖ‘õ²P:Åê=ª“òˆ«õb÷\ÍUÚ Ý£ÞƒêšÕôõB vðçQïaÁÝAÅ•N <ê=4÷· œ;Q/m(R{l)¨÷À$xŸ.ý‚z‚lìyŸ£áU‡zi»h_oÔ{¸c~w6RâQïÁ‚¹¢‚z¡êå@ØØ°’G½w¬8KP¯½›šVB*àW@½lìt2æHï¶Òn£[Ò{$&J¶Švò¤—³ÙH5´–BziËmZŸ´'½ì 4]ä6.#б×ò;G— é¥ÍºÖÑc"!½öœ~J5Ãí~‘Þƒ ®çÌ©Ò{Ô%­­¤:s¤÷ FJ׎è=éå°Ë‚¤àéåÎl2ªèõ+Þc¯fA£¬©Þ×ùé¥ ;ÝñiyÒ{ìÕ{’eOz9._3ŒÒK[jW]¯Ó“^®Ó~Ä®­ÓUGz½…®¾ÒËq$¨îïIïQK Z7ºÏF–éål±>h‚^Úl’Þ;žôò“8Úö4 !½õ'í?REžôrëkÝ6Å éåv¦µÁ_é—Ùõtá=饭 [=bô¤—3QÂþäŽôÖŽRsyÛƒô¤7¾pGz_?éH/m5Á­ïxÒûº1Gzëq{Uã·Ó9Ò[ß·E××Ù´#½ü&4½/S„ôÒö`|u*ëHo¼Ozé{O­–×1©#½ ÷ÚäÒ{T³ÓmT¥ÒË=2ØL ôrX-HÓ§zisyYzùÉ͆ü³¥Í{Ð[Ÿ&Bí¾ÉàA/?¹Q•©! ½±;{Ð[¿ÖíšA¹½´Ù'g#\[™vÐËÏÙòj·É©1TzëP“DÚ"Þ£ÞÚ¶Ï‚‚zù­WPy²çPïëê­MÒv!ôòƒ'ïΕè}Ý€½µíÙªýÑW ½uJ÷pëÔ[?¬­á™¯€zëÍÙÒíμ:ÒKSÆÆ¯/ð=é­?IeŸÎà=éå¸@Ôìá…ôÕ…6QlµáUGzëù“Ûþ°Þ×µ8Ò{T×[í{8ïI/Çm×ܺô¤7Ì!BzkïÂzaë‡9ÒËé.‰ìË8¾ê=jI7ë5ƒ_:Ö{P¬…*h}ÙÚ4Ã<+ü¿R»gÏziÃF¦ç ëåDméÜOäX/ï¦>á¾Tô¬·>G[QÔìOz¹:ûó¶ôÖÙÈzäÝw<èÏÞƒ^~ò öx_kxÐ4zi³ž5™½±oyпSzëùö1† ç¥iC“Úq»ç¼õ][ŸèÒsÞÚfAÇÕvç„óÖß´ëÙ:R÷œ·¾º¼O$â9o}ÔÙ^mâ9o}ÒWÍÿh—é8o½[ó<ý(Çyëû\³zÎË z¨Õ·H<ç­Wi‹¬¾w?8oý~)Èq¾0/M¶<kÁ¼uË -yÌK[­ÊWÀ¼±ë9ÌJyãHï1oìyóÆ/ÎcÞÚfK‡l æ MžòÕ®{Ç_¹^¥§¼µí¡Öwg¡Žóêwê)oüº=å­Bi¢Î =å­Ä6pˆg¼u:²5MêüÔCÞÐä S‡£¼¯kt”7>,yãèé!oÈSÎÉy9Ý|òÖ_³»+>ò†‹ðŒ7ŽÈžñÆÂ3Þ×.Æ[ÏÖâ²v}ŽñÖŠŽÚw®<ãA‰‡¼ú:=â­qßxè¨Ü!^ÚN[…îÏyÄ[ƒ;J 6%‚ ^FŠŸÊ>=xÄ[‡k²¨Ú6¾ Þø!zÄÇ]xk—œ?„ðÖ^‡UÊ=á­« UvTè o]Iã—^„7žð†ÕÞ8:yÂ[?n< úƒ'¼5º µ?õãá A¼uD®\gåžñÖSQ»IV„ñ†…Œ0^…8¦Kã}æïë2ã «a¼ñåxÆÖåÂxëêáÁC´¢Ga¼µ?ïNHå«‹¾z)q¡¼pŽR“´Û\å)/å¨"²Nrå­ ƒ>¦xÊ{ÔòFÐÎç(/mÇfêoÜSÞÀG„ò4ä)oʼnÚ5;ÊËaöφu‚P^޳¡ùj;zªöG~Oc ¼´?mº­ùzqßSòE{-øÌø¿}€½T&ØÛZé{±»l*•Àz‰âÊØCUÖ{°?jü)äÝnJ†YèР˜@^]ë½ o› [çÈKBõÑÉòn'õj§Q ïVG•ñÊBIoµ´Hg_4(ãµK¹1ŸêjKa¼`”˜Í„P/•|.dÕ÷KÍ[§c]Ó#ŒT„gº†bw1Þ>yÙdѰ¸0ÞJvöQRB¯ÝÞ“o߯‹ûØMqðÆõ:ãå¦Ë5e«Âx)Ç&UÛ¤VÆ‹™Î} {@e¼÷Ìà_/Ö ¸bµ_o<›G¼x@Ú‡z÷áNoõ¿{†I¯õ Ó‚ƒ>•+â­˜¾"ÜÝj!û.˜T¶‹£[ý}›FØ.}×Uý&¾"ÛåÆlŒ°],Þ7›kº_ØîV “3{¶»AÞÃ,Xá.ŸXÔV¹_îâщƒuß—¸‹§RZ{™îê¶ËÅ{¯Ïn"â}ª÷Õ(#ªlwC.C5¥FH„írw4\G×Ïv¶koÓ–ÄS$l—ŠðxýöXØ.?W¥-0¶[í©÷¶Ë·a‹‚®}òhw«õË ŠwkÍó¹í«p—áÇÆT{«ü.¸oÚËx))dïoÏ{¸»ÕúeP»3ŒE½=亻թ£¦¦êxñÛn[Ðtüéé.å¬,ƘYOw±Ë;í=·m2¥»Xˆ-kd¥»TÞšÖN wuÄV¸‹)NÛ¾{¸ËÙðìF»Õ)k»Ç*^àîÆrü¶° wIw#aª/î²»f‘N_<7!ïSG–Q6OáîV‹¤Œd(…»ôìŒ{P"ßwùJ‹½­û­ã­…µlIÝmBwÃ¥{¼ËÙØÑÉCª»è..O”ˆí¢¥»2”(Ý%Qr߆ë‹Ò]ú Â[“7(ÝÝj1[ -&ºËakÛAš…îbwl‹ŽRJkóx7\§àÝê·ïccDd¼¼¼åê§x—ùh•U¼Ë£Æ;¼]‰‡»ákô"^<Ø0ë¿ÀÝ­V,=ÐTº†6¥»Œe6Yu…Ð]»ÈÓBùÝåÝXÜÖ7ãïĘ̂t—l­.8º[kÒá3´ò»è.CeRÜåëxlVì˜_à.ó"j•¾õ$^qî†YànÐ…îÒIö<¼NUÄJÁ»´(ÞeÞ·Ùw"Wxé&[Å ÕzÀ‹;÷Õ¢î¯xCGÄË$aAÈÄÐñb!z{ß[WÄËÔ‹E['\žñ†+Œ—ñœz£ ä _@^¾ã{K ä —Ò!/7Fý¢JÞ: Ù›èFqªä E(/£-×ù¦¼Xx[×¹žò a«2x³§¼˜yZÝ>(yãÔ!”—wS¶Ük%*ç• L1¯é"äÅC¾Ar…òn0Ëml* ååñCš$E…¼O­e:”A*ä%t¹líÝ–JyÃd+˜7¼nò†ÑR0/ÐÊJÙgÛļ¬Yr]}·Kñ˜—^iÿ°/(ó†K0o¸LÁ¼á+˜—SÙÀ¼Ÿù…yc·ÌÆÁ¼2éäe´ÄDz‰jò†¯M /qkÊí¯y ®íJ{1j…¼ºUÈ»ÕBl£ä†B^| tݶ@^BžËV3çõ†¼8õ÷Žñ!ozòÒ†+`—Kä Œ@^î݆Êrtpì!//ˆ$϶7*Œ—ùÅFÖŒ7ŒÍžñ†E’0Þ0þ ã¥Wâk×ô¸ñêd¦ˆWÇËŠxužú†ãïR}ûd^”©‰rí×`¸ïR®|#廉<çmQÃ˶!‘8®¤|×b}çË­|7UÇé˜*ßݱˆœÊo廬fÅ»l]ØÚÖ–é+âÝÀ$UÊ»ùºïnÕ!D¯ÞÝj!ˆœÏx—Õ·ÝwÛSxw«è}”xw¯è½ë¼¤¼ø¼÷.øîîë`¨–w¯fçÓ$B´¼»¯"ñÒòîÃÒó¥äM ònÇ!€7íÞã~Þ ÀË!èÜ| <ßE½¼Œø•ðrË|­ç[ÇËaT8¹[Oðˆ7%_MDolóˆ7á—k!ußÄËq×5ù¨ ^<د%²Ä´Ù‚xÀ‹BlÕzÄ›˜N›VE¼·*(ãµ¶f„ÛµºžñÖeö9Me¼7l`#ãåZl®ê Se¼´­ºÊxùÍU,Bo¼Ïxy «…2ÞøÈ<ãÕmA¼á°Žx9Ñ*¢ˆ÷ݶ/}Á¾–«ChA¼GÙÝÛ<ã ÏÃ3^(Gž5Δñ†=a¼á• ã ]]oèêBzùMjýv…¼ Þp-‚zi³Á«¯_õÚý±èÒ E½azÔÆA½´Ù ß£E½ñ8z‹¬o êq«Â ^Û×´$¬7æu¼a Ö«£Š²^}E“õêס¬—ŸczÕ³^í´Êzãqžõj/Ø«w­°—¾~_ÇP ì?éao|"^ɫߤÂ^‡ö†ãö¾Û–”7Á¨_¦ ñ:öÆßô°WÇY…½:v(í ½Ahols´WÆD…½|v”§í«k½ñ*=ì P`¯G]É.O`oxŒ{ÃãØ/Â+yão:ÚkMOIc'Gioø@÷†7*¸W‡DޱÍãÞp ‚{u(UÜ®EpoxÔ‚{ãù<î ï[Ô¼á¹îçó¸—¨ˆǃÎzà^‘WóÆÛóÀ7t/¾áöøòØP1é­¾ ^{ÚÓn c_"[På¬_ ^”ø&*gØÂºëµ„ø&òÒÄgB|¹ÄUÉLox9‚|CG=o<Î#ßxœG¾á»ä~S½ÜÂyÏý/A¾–*òåqÚÍj"ÈWÃ`E¾ñ8|Ãô#È7¼>A¾‹(òçsÈ7¼=A¾±Í#_ž…ø†áYˆ/mvsGÏAïÄ7<+!¾«®—AÑn£ç.)ñÇyâËqvonøÆ£<ð ߢ_~§ÀO²Þx•ø2mTXÚÆžø&ê¼-;ö†'&ÈW#VE¾úÚD×ûjZ²Þ:¹]©§+ðåJžY‚E/WBñ‰NùDמŠ_Ž+å´…uþ À—QÊîiêàox |)(f£åØà.E€o<®_ÆÞûš»â¢ëå½ÙjèlVm |Ã% ðå7w²Œ›\o<ŸC¾±É#_ä½-_P‰¯ÓÜ)îÕâ^²yhs÷&êwY/ëûf‚{í‘Ü÷]wK¾"î s‹à^*å8g"Á½á1 îMä¥Òs(÷‚ Û2ô­é¥íLC¤¼W1£ò^î!ÙÌù´çéy/éïËmGp/}‹j[].¸—~ÞHÛ˸+±ÑvÏ@*ðm©.òrªùÑbÞû/,æhoæžêóKÍkwŸ—xMÕ¼T0:§Ÿ€ÊymÙ—„Pä¼'¹lçˤ·6¡IìRÕ¼ü‘qÛ[B{ÃOzØ$ÇöÒt—éF)°—6ö—|õ°7\‰XôÙ(Öä/Á¢—òJ×Òùz‹Þ‹ŠÓC z/d¾Ãî)8ô²ö[r4qèµ Ú>½É/Ä¡G`g¿z‡^‹¬C?¸J~Ö[ƒ‹kîÿ‡^r¹î~)Þ¡÷l½m¨ßÄ¡×Úl ›‡jt:ô²Õh'íÀPz/Â=^Ž8ôä”©Ø ½•C4z1ÀK÷П‹C/nÔ“^/ÜËUbcG54ë zyr57¤cl1è½±.£|ûMoÐ+~CÁŸÕÃ3òÔŸW f‚?/=ö™NêÏËèg†¿úóÖ‰{dk¨?/}ÁþÜ|ƒ=/õ²ÊLŽ{^{˜‡Ћ=oÁ’5Mµ¸Úóê»óö¼<èÞš÷DvNŒ(Ö¼ÖfÏzioÍK·Þmz¸û¥xk^ÚÜp.Ö¼g-8hº8óÚWÄ东y tšè^LykŒS¨Ëö²j¨¯9W{ÝH½)¯µ]5"x3^.ãÞ§êUMy©¦æ\‹Ä”—Ù¹yS^û`±9\LyíR6"˾Ó)z^êÚÜ›ºÓ‰˜òÖmS–®¦¼ëç•ù>Lyí2nR”òSÞ“‚ŒöLtõ¶¼×^SSaëmyml°®zU½ØòÆãœ-ïE_†“·ÚòR+•A¥{vŠ-¯µm§/=Î[^lò÷ûʃ {A¯µÙ¹îÒWËbÌ+s°úòr)6— g?ñ嵟L+áIõ¼,1ËÌ'P_^û͛͋//éa·§_^ûÍæVÚÛ¼/ï»mùò^$M:ïËK[Z9îޗ׆[KO×Xñåeõc=d£âË{‘ÿ¾²LŘ—>kÁ‚/·†Ú9íËŸÚÝ*èeɸèz»5Ô{œÛwæ ïMœy㉼¢7t/ïÌËÄÊ"´»é‰3o¸gqæ?éyi³Ù†âÌoÏ;ó†o_œy/¬¤›­äW`¼,ºm¤™‚eqæ­•Ÿ}ˆ÷Ä™—n²-Ó qæÕÈW%½P‘Ü>¯ÀxkWG¸ÔWâÌËù®iz ƼÄ0çò•p’^=ë±]á¾¼\ˆýaägŠ1/m¶–î}bÌK¥K¤‚]F$Ƽ5bJáoó^І™¦Æ¼ážÅ˜—*»Û¬}Œya¬äêyCßóƼdÙÓÇg®5ycÞxõ^Ô ”ºÅ•aóÆ[ðƼb)¾¼ bÀ1h¸Hz/ë°íW^æ² Ä—×îíJÕíåØÊ*=Rù t—Ïc?W²±øòjÙ õ彈ë¦?„·å ¥ØòB¤žcªì¼1ïE9íkºzŠ1/5>rÕó¿<êCNÓªLyi»–'‘óÒ•©ŸÓušÝ˜—Û²^×>jÌ{ñIî#Vå¼Öf£Á1˜‘ó^äm­q1æ%h¶0t:N8c^VW]L´&oÌ‹Iü2»_Þ v`Óð9øíòå ÖòâË{×ô£D„úòl‘óÚÍÙy¸u©/¯µ]O&»ï+ Ý\‹§oS(jÞ‹ª÷X«¨š„Š¡õ¸Þ–÷FÅ4fjË[K/Ÿ{QóÚbÃ"þQPMmy) iÄô«p¶¼¬…H–êã¤ØòÞìßìKéëmyoÀ…}=—Jly)¿/¤'¶¼T›¾—‡ó°åµeA¦ÚNOJS5/+´å+ƼöÂË*b¬Æ¼”…,vå€ó²êI+WÊóRÊ»,“r1æefH_̨1ï¢Ð® 'Šˆ1/eÅïmÎbbÌËŠh£àøðVjËôõ¼üG¢p}¯S Ö¼¶LÙHBbXoÍ Ñ¥à@ßÑkÞ§®JköúW¼Üìc_Æ9,}EÏkÏ+¿> ©žW?Äš×þ!ë®Q¨@¬y {:ƾ·æ}j˜6|þÔš×~²-˜;£A¯ä}„òˆrëoIï}EX“ôŠºùG ïöc„—‰¶Ë #âµöp…wñ·IA¼¶æÊ³EÚâÕt2e¼ˆú¹_]$ì¯>m-Äf¯,ó(®ãykçM«ª˜b³ã,ÔÌL ±…6)ÄÆ1sÏ.I/ÝÀ†²£§> å}(EÒ‹t<Ùœ×Ò"éµÕô‘·±¦’Þ ¨¹ï“*{Ì«9 Šy5çD$½ž®x9¯­Ë[wMoçÚž™+§r^ûMú|8TΛ pêžùõPo¸=QóZ“}€•¨š7C–í÷ïØ«yi:÷éç-j^~rËïJÕ¼´ÙýŽ …¢æÍÄŽç\ˆš—¶Ursªy¹1JÎ÷ UԼﶥ慧ìϱ—7é­§zæD¬jÞÌ®ÝyœÝ2Ì«yi*ÎJÁ«y9Ýc³æýóf–ÊsOHż±Í‹yi;€Îݳ‹y3«™ýîÕUÌ«/Gżv -@óTÌÛ¼˜W_ªŠyc›óÆëôbÞwÛóÆßôb^ÚÖ6­ˆyù²Ž¹W®jÞØæÕ¼ú‰¨š×Ënï²WñT5ol«j^þ² ^DÉËó}l‚{ëx3¡Íù]ü#:^:Ýuï¹å΋Œ7ôG/ãÚ0¿=é-ãålMy:3,oƧÉÙNxoló2^Îg‘ÂÓÁEÆÎ'2ÞŒ;É UÆK“Û>%¾KÆËµ$ç°àe¼±ÍËxÃuŠŒ7¼‘ñ¾Û–ŒWÞ«ˆxµT‡ˆxá¶×Jh¯ë@"à 7&^.^0Tµ^ÀÛº€7þµð†±@¼üöR^¨€—¶Ã™/x¯<'/ß Kä»ÚýE½› êgŒ¯ê]-Í­ê]kÛH‹iÚkw3ˆ‚0¦[$xínlóÚ]^عŸwÚ_U×>´-ínøJE»ËÝ¥•ü.Ú]Šu£^>Eµ»aþí.=&-÷ÑîjP"ÚÝð~D»Ë-%ñ*$ôŒ—¾—®’§ý‚g¼Ö×ÙIìºe¼ŒTDÛÞØ÷,{†¢0Þ‹ 6Ëw¸'ŒWå!ªä=—¶Ä†ÏWd¼È¿ìÅ]/ÂôEAÈk·s/£MòŠNoaÒê‘_Pñ›z2cç[Å»“°µ¨xí‘óz†ø×«xk¼0_ŽŠxqï›A·Šxù»Õ—úZˆ7*%EÄ‹Îx*âEKì Kñz‘¨Šx \mÐïOXD¼¶èØ/ò¹_Œ7ÆTÅ‹1Qã£óXQñ²ž|†©PñÚtksÿPªŒ÷ º6ô*ãµSØî™ó*ã%)âØ>™6¼Ú‚Œ×.<•ñâçòÌb\ªãµëšcAÇ‹öuwƒ¬ ãÕ× :ÞòÓí )ŠŽ—וfµ*ÕñBïòtpVïNƘLƒŽ—P¡Ìò¢ãµß$·|èÿ»Ž·°é??ªã%®¿Ö>“èxyÈ{A ÞÎ䕼A·-JÞÂÐüŒò¢åµK9×êGµ¼üä}Ùã~ÞZ^~ÒÆ½^oÎ+yƒŠZ”¼A*-J^Õq•‘ò J^}Ä*åÕ\¤¼ªT=ïÅ»°>ÙÓ6DÏKï²`Ì‹ž—;gªéUEÏËý9˜.z^†ÎbAÇ'=o;­;ôT¯ç ù¢ç-¢Ç1vDÏK_ylÕ¹½=‚æ^õ¼Ú¦žWðªçµ¶Û:ãÖí#EÏ«9ªçÕÚ²ªç%#o£|§èyYŠº8Zô¼ÚÁTÏ«ukUÏkmÛÙlj¿æ­aÕó²ÔF¾3Lx½ W?ôpÕÜQA¯Nt*èå±”eÝ&‚Þp"èe‚·p}”ÇA¯Î„*èÇyA¯&øˆ ÷$XzÎëæ¥ÍfÈkhA¯Ž*è-5ªîTH伡Ë69o©ë¹½ rÞpÇ"ç=yMË8Oå¼ì­/à)r^b•lq蜗&ª÷DC‘ójø¢r^‹Iök™ÃŠœ—ðg³X©‹âDΫ©P*ç%©ã^ž†"ç ljœœc_ÇÐpŠœ—€Î&”Y,ÍËy‰«Îå,rÞ“/ËkP伡T·ÈyCÇ9/1Þ½| EÏËùÎåïç½P§´lEÐOç½<–4[¢ç=ñI¶%cÓUÏËÝååý7ô¼@¬UŒZõ¼DïDzVV=/™­Ç¶âÞAÏKÄs¢ªç=ñÎ’^ÍK^ß±¼“EÍ{b);­äUÍ£¨yOü—›«¨yyk'é•oæ{¬èyëE/Ïø^^´¢èµ¶çZÎ’¢èõ /ÕóÚÝçÔ¨ž×~Ñ¢¡é z^Ò¯la?Dª¢èå•—e¯,Š^2/ïåzA©Š^Qϔݨ¢7ô"Qô’‚u-{Qô†¡¨+zÃ#VE/ñÔ~ 'TQôÆ_óŠÞ0(ˆ¢÷,›yQôb7}, ¯èå½5§É@|ùg•PvgUôÞÀsªv½¢—Ô=›oʰüE/Plú-E/E˜,ìèÕ&’^]!I/ЍYû9Hz%ÆUI/?bÕxÝ"é¿é%½'©}‘ý+IïùT’;vἤ×~㱉nßÞ~ <#›öW:/é½VFÛÄ­^Òkÿ°f•–éÉà$½ðmn— I¯OíúAÿDÒËyí:õ‚^dæeUAïM±,½ ——q®"^Ðk miFŽ*èÅ aÙeˆ W5è¢ç:`¯çEáL‘²áWìõ¼vÜ™ˆSÓ‹øòLlZÛ{"ç½±\›ÚUó2 V­Øpà]jÞ»†kZ׋š÷‹ÜGÏ£ÞËeUòx¾Üê¹-.5DÌËÜh×0õV1/–õ¬¶žV1ïÁƒÍÓŒBļözØCÅàîå9ÛX>F† æeÖ™,Gpo}²væ¾ß0üy5õùóþùwÛ×?ýî(ì-b´k'ÿj!j{g´¶?ýïßýó@eØ—(†ÓßP ºâØ¿î”ÜÝÏîŽëS"/ýF眫'üæWPˆ³ûŒ?ëù¯\÷„ÛŒÿøÎ+è½½ùù‰ª±mLÿ¦wÐ.`üù[Ïß{¿€ñ® ýæŸÿ…+±…8¸•4ºòVãl›É’MHvQÿþÝ ü÷»ýßaþõ‡Ÿ¿þû¿±é÷ß~ý¯?ü§ßý‡?üÒ3QöñJÉnØúÎ/>Óñg²ù†cò¦¯_~¢3žè†Ê5>ÿÆ=fŒÉß9Tº»¢Wó­}¤ ¾ÈÅ|ê#X¶Q§°cÀBÎnhÛë*‚ò—·¯Ò†å_=úëi`ºl¥Ù*ÈVt¯Ó<ã4é=O¶ˆdÞùÎsü£çÙêfÚcëÜ{ÿÎsÆóü½sØšÕã‰,`(Iº~ðñü¿z†ÿùçÿõóÏüËÿûãŸþô§?þü׿Üÿç¯ÿÐüùõ‹4^´Rä‰óŒ–9E,ƒæ“=óá’u ‹X¢•ÄÆñÑ-¶Ž§hEªÞusl¨ÀdŸg#Лc“ö×­×k”˜:Ø£X´ß1Éö'ÖËžã8«eÂØœ"éô¼÷¶%qÔ½£A¢ð7ÛçJûÎNÖÔMà¡(ßdýÆ•âr€('ë?H§Œ\¿–‚ïÎØœ²¥ömo¶aѶD+pîŒjîn¿y:ÑÊÁcaß~òú阨±ÖªÉ~ }™¨ßn¡ØGÝ7™¤õ#"Ä»£WÂA1Q?‚&ÄAm£ü@1Q¿uнt³eÄdý(SEkõI£Œ˜¬?3&æÔw2LŒRª­O ãIý¯—n!6>˜mëà ײþAÀ>uaWqº•l“Žˆ &U²4PT†•M\sàM1h?µÔù”†¥ZÄ÷µÚv;ÕÊÆùžDÖÛj%ã¡€|¾¥4ß›S­$ûÒRYÇøÊÀ•ž»¥%–š“:Æ×‚±.;©[k+N±Â;ÅâËv:½JB44­Di[z¶†Ñú¶=޲]N¯’!ö§Û”Z ~ ¯ÚÊö,½ ;Ø×ܪ(¸Ÿœ_žä¾9µJBñrÝWS×–}wj•Œë“ M`_𠘔ßn®X·«7°ç¦VÉa=cÜ*ûáÔ* Õ4Á£m©U›ÙËÊ·eÛÞ&ü1¥”Ý^”u"êã°û¨bÔÚD·¨ko¡½ÎžbT¡è$ õRvL¹îö“S«Øé¶•LÛR«$&ïDñ¸z{ˆ!&èG]`/Ë^kÛZ…Ábe—æ’oCìÑïá°ÎÔ6 †Ñ™–|UÉ®¤mnÒf÷³!lÝ„ŒÑ§_ËA,¶Y”ӺнÛ ÈõWDï½´N­Â·a²§8”t9µ ÏeU¾¥m©U¸–Ó¾á¶+ ©' ÷‰Z²}u °û³¡©Ne÷>=¯J®µmŒ%¸¨¬6öMldYF1õõ!¸iû&´-µJb“wð,99¹ oo•:oh°Žw5j=-Ä”«„ž‡bRž)ž²-GŒ6û FÅÕw--ÌvÖMZÕ§H¾­½ŒÖc1ë`\ÎýRn¼-G%^ž[Ò…œ’ÀÚqe/5sޝ|eiÇ[Äâä*áÛ?v'W _ÁA͇côXýödžÞc«jBºÕ mDŽË&Sä…53;B­ÇÚ˜jCEnrHðœ\…×@¸Ð”üœ=CXž‹Ís}áma“«Ðóîéo]ú¥õ{§ö]ëbýeJï°l˜Îu„ÝõOí(í±ìÇt`-h"lP©6=“ÕÕÞ\sŒþ\‰…†U®VÛ’“«ð¶³Û“Ó´Ô*–÷­yM•’›X…wa'ê¹MvïN¬b_×E^ÊÝ~¬8­ÊU7íï®U)¥îÀ÷‹ú—JsûhB«Ðú+˜­ÚGmb=FØ0:!×+c„ÍÄŽõ!5‚ˆÇÚqW¶Ôô\AÄŒcóYuLÝòøD1ãXú3VÂ-oîD1ãX;΂É1Oж+GMºõ2NŠnÏ8–)Q›mO|‰f›ÑÎXWmZHëç?åÇò¾Ÿ3¥&b=©?ãØÌœUç»·-É xgêâÏítŠ ÚÊPÌËéòŠšóâcf±çÜêðœ7þ¦ç¼gÃÌ=Pºç¼ì9Ây9®¬ˆV8o5™Kk“ÄsÞz>’¡š|S8/Ϭ¬9O8/îm×ÂjÂyƒë›pÞjögßÁÓa©ã¼G&÷˜—£Îç æåî,ðÆ‚yë•,À'˜·fW¯õÁЩ…çèAo}þûô—Ð[ß÷Õü¾è­]}Ågzëó_´½õ0»ó¦=Î[ÏöÌhá¼õ­á#Õ²{„órœuÃQW8/Ç1Dtñ²Þ×qõÖãðÀoƒ¬ ÞÚKîQ5JH/Ͻö…„ôÖïØEŠžôÖyáhGz¹È„ÏÕ/Ò‘Þ×Í9Ò>!½aØð¤·ö{ ©•ãÒËa`§žV*¬·çužõrœÝO:žNë­~‰û¬6Xo¼BÏzÃG*¬÷uŽõrœÍOÏñô¬7ô/¬·nQ;‚áY/ÇíOßjÒ[¯ÒqOzëQž;ÒËq‘vû|!½õ8Olé­ÇÙìÞ•ÛBz«µ³c¨žôrÜæ8¤'½ul°èáLÙ.ÒË‹½öY×DH/?ùP_pïÈÖ‘Þx™õÖãÖQânßR ³!TGzãìIoµ µŽÝÝ„ôò8’'Ñ‹ô9‚ÞÚ¬G žàIo}Šå²U|ûº=ée`¾Ïm˜| éoÛ“ÞØK<é­oÔæ·³t‚êHoõDÝg %!½ñÖ=é­Ÿ±£jžôÖ{'º­=…ôÖß|zMLá¼uöz«_óÖñimÅ çȃÞz•eßÐ[¿9rÛªÔƒÞú;DéAoü<<èc›'½q@é¨7~õÖwS!W}Äõ‚ˆ…Z`"¨—ÈøvœÄ±ÞøzÖ[§1j²4Ka½5ž±¹¡›f ë ñ“°ÞúÑ•Y‡MXopÜÖKÛ¶Ï*=ÂzëhŸgY2a½ÕÔÜmõxÖž¦G½5YA½q’ð¨·F•ûÎ×Ú°³C½µËž³„¦ ^F…ô†Ø^H/‹¤tä½{Œ{ÒB=!½5ˆM£0–€Þ°ÊÒ¥žiæhOzë[{f !½ÜòfõL!½u]¸t$žôò“§#qžôÖŸ$?µeŽ éå'K‹Þ^¨7X 걜G½|WÖ-î^@P/mé“¡:Ô˵ÜÇL³Ô[ãp·ëQ/Çeû®îízë²rÉyõÖgÇÓ·<ê}çPo}Öä“÷aÛ£^Úl´(=åOP/÷ŽÜ­§z êeœj?Ѻ„C½õ°í¤ˆD£žõÖ×G:pS<êå+8oÔ[÷SªÑI/ dëU‹„ôÆ÷æIoeöto!½õâí©÷z‹žô;Ô`þLBzpÒ[•Ž#Øñ¤—û²ÆNBz9ßòƒÐ[g‚{ÔÛÎËQiO- R(/×aמ;¼Ê@“P^@“}qÃ,F(/¿Y<v”—g’íozIQÁ¼ü&©1}—D0/°ì^›¤‚y¹;‹îZż¡C ä ¨ÌC^hØæ¸±‡¼f µÒµ`*¤g-B)§¶<ÈËoÞK6X¥ø;(­ ‡ŒËKüµ6t/Ú{Ù•=Ò$h³éçÙµÚË캘”Ò^1|TÚKïjóÂ×Úë¼®_´×YÅ+íŰƒ‘%w2ëi/b´š?ÞòD•ö²zšz”@{Ñíž#TÚË€´3÷Th/cÈÚ®SÚ»c®±ü¥½;Ñsx.´—/kÉ”önx‡Ï û@{7¨Ë¨ù¦´×“‘ôVu@S%½5Ë«¶¨è}ð&šÖJªè}XâL7aUôÚh-¹É¦ ×¦7»«­*è…òm‹Ñ‹ —äèÔò‰¿¢ —\i{Û=/Mö \]Üéõ¼7Ž€yÔéPE/%­°Ûû={E¯V'TE¯Ê6TÑK"xºænˆHzoØÂ6|\TÒ«5[UÒËE?¥L¹¯—ôÞDïûpíVI/ç»k½ò†W½¤WSîUÒ[dn)ª¤—Êa-2k×é%½°]^kî÷à%½7qaέ"é L$½¼>v¥›ÆB%½7x³:Jzíöžkm°ˆ¤7<².ê%ª$:䨋z©"—çÌ¥¢^{9ãa×i‹¨Û<ë©Ý;QE½œÇãԯ‰zyùöµ÷øLE½Äï,ž[¼«¢^;õô©æQ¯Þ´ŠzõöTÖ{3§ÃUWu½7¢yìÛª®—æ™N¦ªë¥Í"ø±p]/æ…ÇÜzV]/(ôöÞæ•½°yX«²÷Þë½›¨²Wkuª²ËŽ}Æ¢ìåÍÚ/öMUö†G-Ú^-Ÿ©Ú^>óó>hªíe­D˜Ø e§¶—áźÞÖŒh{)H vîë`Ñöòõ—<œèTÛËß–Ïk{YAÙL± ¹°×ö^0²}$ب¶7¼TÑöjm\ÕöÖâ3<ýTÛ«UÛK!RŠÃ÷.Ú^Îg¡ÍÐnˆ¶÷‚š€ÛzD´½ü&jú냶÷ªŽcÃ_Mµ½:©¶—NkÃsßÃSm/NWªíå|,[IÕör¶ÊîA˜j{ﺚѪh{YÛ zv¢"Ú^:´½¬m [¯íÅ'Æ¢÷!òq/=‰Ì—..mâÞ«úß-{qïE`~N÷^ÕdxX½«¸·á\O/Þ(âÞ‹È̼Tqoè^"î ¯FĽï6WZZSĽº¦}¸Ü«¸—nI,ÛAŸˆ{ÃùDÜ{U{°Qî@Ž¡;‹¸W§;÷QÀø}KDĽ¸Ä’GÓ7ºDܫӊ{¹Î”†‹¿ª{Ãø,ê^>óå¬#ê^Ì¥lìëᛪ{™Ò,žhE´½tt[}õ¿*îÅ íÖÓÙnW÷Ú! €ÑGRQ÷¼l3Må½Õ¯MPƒ›*ïÅ5÷ÕDß«NNªï ¯Mô½|Gaºê{Ãg*ú^¾¬4—¼ªï s™è{yþ÷ÔC©¾7çõ½ák}/±´=…nz­ú^fŸ=ó;Õ÷†ÏNô½\KžË}Õ÷Þ`²}¸Xª¾7L„¢ï ÝYô½Üß“Gq Ñ÷ª—ê{Y¤™o©úÞ0ø‰À·z,Û¨9ô³MànL¾tó‘h‹éDà¾aø†æ¾<+û¦ºÿ¹ |Ãw*_†½;“SUø†¾. ß0D‰ÂcéãÅTáËPšSJ[ÿM¯ðպΪðÅ–ºQ®¦œõ _ ×Uá†/Qø^Õ<³™¸ª¾7 ¢ïÕ…”ê{ílö‡ù«ê{Ã$"úÞÐY¼¾—'k¡ÙÑjnª¾7|X¢ï¥WZÀ7¶EE߆Ù!ð Y¾!J¯Ø²©¾7 '^ßË=Sµï銾7,^Dß«ë6Õ÷jÀ£úÞÕÿ6jѪ¾W¤ê{uu¬úÞ?À}”8V}¯N>ªï­Tbîªïå:Ó9JÁª¾—YõžrGø²,xQwâí¾wªÀ7žÏ |ý,®ò^¦dû†»:[ä½áæDÞn@ä½|ï ­m Qy¯~þgŸÅ܈÷£äw•—Þíßÿ컳:lå9^F¨R÷ž³¬6DZÖ¯ºPOm E÷þIâ»Ub™†1„g¾Z“W™¯õUÆ´§+RÅÅv'rõÈWË«ƒVXVXÈ6 ³ÕÆùlƒG×=‹ÃC-Ïé¯6Œ›RImp­?÷»gtªCµdpœ«Kñ•úò ßÎAÁS_ž7;y­ ”R_Ú(Ðf¥¾´%›ÞŽî¡à©/…Jl:ëuÖTãK!ùgZq+õå ‘e¹wg O}ãµxêË£ ê'êKÛy/½±§¾á‘9èËØuÕ›@_bf Ú^B_Úì+ñ®@ßxœ‡¾z%ƒùÒaKÊÛòlXÌ÷ݶ˜¯vte¾ŠÙlûÖøÖÃ"ß^ì@™/ÊS(#Ì—ÏòîAˆ_>¥\»¦Bˆ¯%¼—oÑ&œ_*ïÕyGy/·†Åõݵ¿ž÷Ú‡s>3ÍHy¯ú!+ïÕéOyo<ÎóÞØæy/n¤fìô_x/ÃåʺòFxoló¼×%ª©[u¼×Æ$ÐÆÑ‹ð^V^mQsEÞËeR££§7 ï #Jã½êG­¼—÷†´ô|)|ÃЫ¸W½£÷ò4²½Á냗WB­œ¦'ÚÏæi¯ÆVJ{à Ú‹ï÷n½µkê…ö†§(´7”ÃÚny¥¬—ãª[KÃÇõÆKñ¨Âs³óz¶ÓyÔ§ ÞØæQ/KÇÓÇNø=êÕ§¢^ž¦…,#{GPox*‚z‰-$ZãŽzyV笄§¨÷Á¢°õ™Hz©ž–gR%½ñ"éå0ûßpQïN€4ÊÚ(éåQ=³Ê—’Þð…ôêè…ËåYºJA/ ÀBÿ»[?‰¾.—gõO½¾éÚ¥_Í»:G‚J²·Þ"µsqØÀ÷W™õ&6m {ì/Æ‹ Ôµwý§0^®År] Œ·N ˰—¡r¯Ž7÷W ½áÔ±—ãž•ô&j^Ú.›‡o‚wìÕ7¤ž½úNÕ³Wº‰ZörØQå^ÅÙ^?é,{i"¾ì H±ì ‡‰eo<Î[ö†KËÞDxhƒ/ΆIüæXšö†Ç/†½±ÍöÒMîklî‹a/MÛLÒRÃ^ù>Ô¯7žÍûõòbî•-~½œÎ†¶èª_¯¦"©_¯~Wê×› )3Nýzãù¼c¯Þžö&ª7Ù?è E {ÃÛÃÞD"óy µ»öZU*‡Q£öÆ6oØkm›'ãÞ°7ñÁ¯ÄVoØË£½V ½öÆÃ¼a/mi&Ë©aob­3Ó¾ºc/÷lk莿İ7‘QÙ†¿@y_§ñ~½ñòœ_/Wð”©ù¿^Ú®•J-~½ñʽ_/?¯ägñë —"~½Ü¸Ûu¿ÞðBů—7c¿>\Sů—ó¹Íñë Cøõ†N"~½|€eå‹c/î|K“ Ž½Üw×ı—óåe$޽´m+ó[{qÇs{wÞ±7œN{ÃkÇÞø“Þ±—6[ªÍáØÎ;ö†ïF-{õIªeï«mJz5ØQË^ê…œËÂ@,{¹”5‹c¯6‰aoè²bØK›Eß]v †½á¡ˆaoló†½áÓÃ^în¦â×K'¡ÂEÇßâ×:—øõ†Uüzc›÷ë Óªö¦}.Û oØËVf’¶öêè&~½áÃÞ0L‰aoè|ݰ7‘X¼lİWëͨa/ÇÝÍ+ñ+`ÞØ¼ao˜3EÏ+/Tìzyˆ›ótðv½u/—(±ëÕ0NÜz™ÉíCfÉâÖâ*Qó†àDÜzC !n½|§ë SÜzÛ·Þp ^ͺ˜õ†ÀWÌz-±Ðä´ÉíåÛPƒé£¤‘k$f½ZšHÌz©>dcýÑ Š 7a³öÃŬ7ÄõbÖ›líëït{(zYsS^½öN D!Ìõ‚^;Ì–®ÛH¾·ÞÄn²}PC³;å¼Z¨I½z¹­ý•óò ¤/þ·]/¿Ù}øUÌ ?[Zóò ³M9[—*{1/7î«Ã‰WofÅÓm=UËKI̵9¥ZÞxoÿŸ·séÕäFŽö^¿â,?/Ü(^ªHn ¼ÖÎ0f1¶eÀ£þÿø2Hf2#ëH#µ0ÀÝb×[7/ƒ^Ë‹W€¥€µXLZÞ ŒA!ÖòÆ2¯åEô«tM›v±–eòävÞÁÔò"MRFÀ²áú£f½˜‘ì¨á—kÊF);Pˆ}¤^ %ccöm%鶃œ|26» È‹=S¨ökÏC^Àˆo·á+Q^é ¤>èS^öÝcÊ+ýz•Zx=¦÷=˜÷Áv“ô©Y/–âf$šçz΋ù{úDÓˆq^y¶Ò'Òôb¦†4­/öœ÷FÏvkÖƒ^«0èÅz.Ü òxÞ /b³iaÐ[1„JyhLiza@uÐ>^Ì?l¬\A/¦ü2mTÏÛ z¡¸†/íN#Ћ1žËþ#Òû`¿}Ò¶‡Q¯<Öç²ôuF½ñ7ê•7ÜeTïOD½¾O ëaÔ+ÇIRc7F½³ ³TcÔ‹ŒB+wåcQ/âÄR_ë÷AÓûÌߨ{kƒ¦·cPvB3“^änÊCùÃ^ «3ú%M/åu2é…„U­aêEüiÕi&½¸ó1¿ÉEIÒ‹õ×K7•±¤S°í[½Ò-ÝÏ66j^™Y®™ór¶lóðóBÁœÒÞ¥ļð3¬ æE˵7h-oÿ¶TLë4$å}¾íÄ —]ì YË ©þ\½_ˆw~‚ÎFœÅ¼HlÅ=¿mzqßX§Þ|óbWĉc b^|\ÒgìI ‹yaMn˱,æ•–ðéÓ'eáf¯æ…ÐL¦^Oz‡²áÞûÉ`5oÁBæxÚÞ²Gj^9NFŽê™Ãj^Žze5/¬åŸÒwÏBj^#©&ˆÕ¼î#e5/^_µ•vVó¢²\ÇQ”Ô¼A7­j^l¤¸J¶,4¯æ ÙʤæÅ*Ó¢×¼š—3IÍ µøIe5¯ÿX˪ iyñj²4!{ŽNZ^ɳ–¯¦´±o¬åÇy1/«ô|ææÕ¼œÌj^4„ÏiЛWó†÷Mj^lºq1¤æE’pͺÈjÞðÌHÍê ©y¥™j¨Ë{»%©yÑëK÷'½É‚žNÍ+ƒ…êVHÍË 6«yq{{¢ý(o¼½¥æ¥ZBZ^l]ºÍ7>hyiËkyåŽKq^»NËËÛŒXË‹«“ÆVw“–—ÛQÖòr3kyq)ij8_©lsÃD=ΤåÅ``8{[¯å ·NZ^¬†gùcùÄ¢·Â+´š@ļèýÆq•f1/fOÇz‘Õ¼ÁëERóÒ€†Å¼QÖciNb^<²q̶IÌ+ïDúû31oøIóBFçÜ{IÌ q &«yy¯ˆ©y±NcÁ3AÍ‹ušã7ÏrÞ{z>Ëeà Uš2Å“žÏø$Ï=/€×ñÚe=/`ÕÁ§¬çX:îܤçEƒ!ÍÒ^]g=oͳÙþ³ž7Ô!ÒóÆãHÏ‹0銠àW.Û¬Ï×±±&=//¨ó'½Ø†1,ž,Êy½ê›å¼ÓÄú-ç•oI^•&j°œÚ‘W–ó&ì<;ž¹N΋ZS¹î€å¼ðÔ/1¯Ü{jRÅ·+®óJû'è±[ô æ ž¿^Ì+÷—¦Ôz[0x1/žY¥üµ#æeÂÃb^9N›Ü4d͉yvtKáE^Ì J4lDz!ýPšù2è•!­®’,-¯T¦R4æôû zúáúø—°æÚ?~þ¡Ë§´V%ð§ÕJ¡týé¿ø¯ïaÊ@_$ο þìB¾ cÿ¾Sâî~vwüw>¥|ËSÌ7oÔþòœŠð÷¿ŒµõöŸ¿ôüýžKÁëô/_z뱯7oÉg¹â‹ÞÁ¾€ýç/=ÿzìûô/Ÿ\ÁúÍŸ~åJ*’üP/2z—MþÒ¥ ÌÒÄÉ5ýÓÛ ü“ü¯ÊuüøóÇ¿ý?éºÿáãß?~ü×þùÇßz"éMdšZMhë™êï?†€Øp{úÛÏôÄ3}ASyÚç¿sÑ6ù ›J—þü¢žþ~.˜ &\•}ÍÕŸ¤ë®+ •¯Æ—}Q¯Y¾)ÿ ㋾´ZV·xþuçu„.âkëȼ®#|5_ZGV#áë]Ìguä7ô9wH½¤Ëé2—9œ»‚¨ßíòõq¯fù÷w4tØa5íÉÀ¾N3ô4ùžgŠ`'&Ó˜_9Oý£ç¹Àó`[Z{ú•ó<ñ<ãàõmd î @ÖÉ„3œÿožá?~úÏŸþÓ_ÿïOþóŸÿôóÿþõÿ§þ¡þóã7íK|0e?šð¥ç™f¥¯~noÿ?D˜®»Üa¿Üœ8Baºnp:ÛÙÿ<bUqaÞ˜àæ›vY7]77?Èè5]w’1*‚~÷nNìo4]·Lh—nªeŸ3‚Õ¹®ˆJ>-)þݕ桬›¢û‚ésViÝ8à š;üú|°c÷4SÎÈ#3Ý| šË7„,]+ÚôÁ®ÜcМbTtï+RIõƒ5sàx7u šêx÷ Î`—'ÙÌ}eæ Zi÷¨°­¸›…(Ü; â¢lXfÖò|œDœ8¾.Ÿ`iÛyô âòa‘ ©yØA¥K?G¢Öï/ 뎤›ÃŸ™6m|pG1“yðN­‚»¾ÍÐçéØlQQ˜ã˜Ò#§( û;»oµE{:¢-/ÁT\åý('¯¸µ½ìûŒË©U:|orÝò‘æüû;<‚LùÀ·ÜçKÃj/>xW'Êœ~m—ÈÇøgT¸â+Š’ÉÍ0éƒìšȇÕH#ÃúëÈW¿¥´UšÏ´ ¶ (ù&äÄÙβîÓ¥ùiÎÔÛU.ß’‹®u!ÇKCÀuõ­›iPFê—ÓÉ+U׫v͵ ³©1±{Ó\ƒ$ÎÒø¤Hk?ã.ê'_uS§Ó·QCýœ{Ü®êÃ¥)°zÏœg~TòM<ëˆÇ‡KÌ̶k¡ìHV8ZN*£t³–ÆG[òh䱞4¾œž¼= åB¥¥±4¾úm<&ÔOq±Za¡U4 6ÊŽj§»LJÝdFëÂ¥±Ê4ä-e. )¯©rå«ôáÒ”èØ ‹8i|”鎲áÓøN¨^ƒ,âx4S‚|ƒ,"»F–Êš­pÂWÃ>j£ý³†2—--÷“Ò³îvØíÇe6ÛâÖ°ƒÀh?ZÕd{ÇvµíïX$4ezƒ”Ûâø¸6Cz}âø¸’A‹{âøød yD+œ¥Ö¤^ÙÒ”zÚ §5Þ»î&vDÙ­p`]˳¥ÑöµÐrwÙÒRÕgrÞª³Dï—Ó‘2{ÀI„Ïãså c¬“ÇGF(;²®CDœ8>j+[qV!$Rº§ZÁqÃvY4 €<îwñ(ëÞ»ƒ.óqª6©hÐ<ïÊnÐQPŸk£æZ¯µ²\Q¤ì¨VæBîïkðˆôi|®«k5ùdi¬¸­õ©Ûb+=²mChs‘Ô‡ñØy”ÅJ¸m˜û ×ĺ^|±ÁMÞKM³ž/ý „¦rÚˆ¹Ö?§é—7, Ý­Þ4"L³uéñënXí4Í §2ÊÛsšö8AÙѬp²d›Þ>õ |Ó÷ŽêZX8Vîí«R7f…gåœf%þæí4+}ªá–ÌSê‡J»TÖ†Åè*®¤;ÅJh2îá+llƒ²#YÁºk6×1ÀO* шmùQ¤“¬ pú¾Õ¨­Aa£XiÙdU–y`ƒ("¹Ö¿<ˆ"ü(¶Bβ–QÖý(ÖO°j6î›:þ a+–ßu]½A6%b×ߛ̪ܦD °'euO?zŒ¸›mÝiӬɵ®gÐ%/ÓGJÚN¹%2”žÌÉU02,f['m·“«Ìúô<Õ2)Åœ·Ì³µâ#¥Ãùª”~°)Cžõ…·ÛGJ?0uÑ M¦V.Rúƪ¤êPæ"¥;t×– 7h"N̈4ßOUåokÝGJCâ]6|¤4¹&µ~ùHiØ­u©1kD0¿k^1=}{€5Ôû“3"MeMGèÍmË¥)ÝÁðlŸ¯†ñúÉ¡Ùa›Ã-ka!zV«È½ö1§š39y7ûãcF°g1um®{sr•އ#íÀ½¾ LOÎHúv¤þ(:1#Еwµá–ƒS« 8©™ßiÜôÄŒP¸fÃ]ž˜¹-„3¬í"m†>YÌt`¢‡2—3RñÁ¦½C¬!èäŒP¼PÃ?µ!,Gž4œâäŒàGÌ}®Aqư>ù£ÍL3⺖Ñ|Ì¥{È;½ÊÀlV^ÉòÈkÈ99#ÒÀ…mM:3NÐ%Vô+¹‰P\êÑ¢³/©&OSÉVG¬Hµ)ºGK%p‚Žcë›ðA#ÒQ©Nf)>iÒ3¹EÙJá ||ÐÈ ¿ùk 3dÔàƒF*^oÚ>}RÿÜ¾Ä •þVI¼Ì[]ÐÚ•K'û(:;9W¢C q‚F°£ §-OêCØÖD„ëÊ¿ÜL«C q‚FP™e"°6ùt¨!lkâ5‡K:WF™  ç«nkâ…”§"?S×oÞ>idöZºO¾!5"Ïöî*µ¹Kˆ‘ÞtÏXPæ¢F°«è©§î£F¤Ú'3åë2±rQ#ذU4¯¡cõ‰©ÃTôÐáºr¢F ¯ÓÜp—¹¨‘_üµy»´t.j®ÃÄuºâ¶'&È]g<'­°$X?Œ¼çdóDt R5Þœk4¨Å,Ù¾o{¢†Iÿ]‰ðbP©Ó$ëExñ.¥u[Ów&¼–\ú6×ó„·bîsÉO.,F„;åäyîLLxaþ-mûÞAÍ„·ÎчR&¼ãöoçÅ.ŠœU·Íœ»ªäÝ•5Äg΋½8˜lPå9oÅTò±?â¼òcßÐɨe¤ç¼ˆæž;cE¹†y±;57ëñóÖ9‰–IËj¿ ó¦§ãÅJ˜7lãõ”wþ$>Ì’•äå{nÔÒS^ìâ½/‡IåÅÙ¤g²hOyq\: S¢¼ÁZE)/6X·£%Ê;ÍŽIÛ³6Oy1›ºvºíG ¼8N>â²kºjÔx'6QÞ97“‘›1cGyQ&^%¤Dyqõéd÷xÊ‹ 'lÄÙûJˆòÆ›ö”ý̨U¨‰ò¢¬#x‘=åE™ŒD4l™(o¼=OyÑÕÉÀôÞe¢¼8Nº7›ØxÊ‹û“n§äº±«Ã¼ó‘˜¼³µ|"¢¼³·Æ"Û˜åÅéäîdähø¸Ïq/O”׉ïΛ¾Ì™øîìûeΰ[lÏwqróùYnûÄwgÔÄ{êïùî!â»(“‚²&ÓžîâyÈTBí£‰îâ¨GzÙ…&<ÜCø«*ðpw^Ç“·c&±Ý9•z¹ÓˆíÎ3Ód-;¶s8®©í4¡]É Ó¡¡Ý9Â5GY"»¸H™ÜêX€Èn|þžì†Á=¡Ý9yÁà&GíÆ—ãè.Cs¹ª±g»óÝ<9íÀ"b»á(GvÃ>íéËxÃuzoGëš5Á<Èxa÷ þ!AÅ‹ï9œìU¼P$[² *^X±˜—oPñ’Ç@PñÊx§hü‰xñEI“ªò3ñÂÒƒÝM÷XÄËRjñ²^ÝD¼¨„æ‚D¼´• ˆx1yd ´>PñÂ^ò2ºD"^tnR–^$ˆxá*o©íAÄ ¿d[ê "^X ™?sñ ï2$Î*^–¼±ŠˇõÙ™AÅ Ài©CAÅ ?h]O "^ø]ÚÊbñ²¨D¼¬¾c /Üi,B—5¼Ó¢ÇúAÃËrJñ¢G–¿¸'IxYHæU¼<È*^ø¬sL{©xé;öX'¨xɱc«xQû‘ð»› Vñòf/KgYÅ‹>â¬IÅ‹AT‚Ôòã/o o­f4¼»™ÝÐðb"sÿëy‘^Ä0Jgó˜ö×kxñ:‹¤²„W.RFˆªÁzIx½$›$¼ˆ>ì@¤«½c /r(°ál¼@oT޳ˆWZ aÎñ,âÖPr;kÎýñº,âe©7‹xù|$âÅ(þ±È:ñ6|)uz†}Ð;뫼|å…,âÅëšV¢o/{/KžYÇëýjˆóÎæbõSóÂGæÌɈ²Sñ"Ï1«{‹x@ƒtú[3Ì*^Þ{@*^x˜uزŠîaE:*E²^ÅÛš¹¬Ê‹K‘·Òü‘ˆáÍd,â ?‹x1è· YÄ[0‚‚ÖûEyq™=™Yhñb’n–AÄË•’E¼2hJf¯Í"^xm?†î²ˆò¥©ˆ¬áå…5¼ÜN’†·cاғ áåQ5¼XÞ·ÔÖðv¸Ò™ckxCoC^|4ò9HÄK;HÂU$ám˜¾Mèd oŸÞg:¿d /Tý24Ø–ðbK%\·÷ªIxçC^ޤe oèÕIÂ˽:IxÃ; /ÞÚm¦˜¬á ›HÃËã'Öð6Hcd¤“Ó‹ò††’(/ÞùÈšôJ^®®¬ám‚Ÿ½~Ñkx±_©G^¼„ýÆj11^|ÚðWnçïl(«YµåŸa†Ùäˆ:Ê^ QÞ¢L”7 '‰òÎoTþ´eóDyÞJ¢¼³•oâéVv<çxã*QÞ×oÞÞsŽ6§å÷€©@Ýz[GyÃ0ƒ(/}ãÄxñcr=jy1^<͆¦µ>JŒ7V/Ïxg…(&Ò#ÆvzÆöã»-k™«µñ¾ÊãMJ±ÔÀé-Â-É÷BÞäßÿÂ[aܸ&m‘ðb>̶÷³SCBt{Ý›CÙ©5FÇ) xêc£Ø'Ènq#Þ Ý õZ/£ù‰´­™ƒ]ü5o8¸5À ´Zl»5øeì`Ö0]ùÕê•ñ.šPøìeÈ`Ö Í²|T5›!ÃÁ»ö­då²YQ²Pá`Ö ß”ÌuÔº‘ÌQ©ñ;U'š58LÌ &~mîdÖ‹¼Yb Ö£ÇB«Þ¬>#cAk]Š7k˜M‡YÕëN³†!¥fx³WCp¶ ¯”¥¡*ÅàÕKÚ¡ÊGòjHõJÚj²W.,iœ½°G¾\5õ`À‹¡Z¹Ís¼b¯†ñm-š³MÃ@ïl)2Á¦/‘m:ȇ®F›Ô%¹çý^î"õ ©605 è/Õ¸558•0£Ú¬œÊRt 885Àù¥+ÐN p|µvjë2‡ûDË7.ÝE×I^øáVyH[”ÅN ˜'X(954¼@“j±SÃø6göÅô½Î©C5ieûKÂ;ÍOä…^{³<5À˜?-oˆxÁÒ3É ˆq,}‹"Ù¦íH3áÛ4`9Kžý©3àåýv xùðJG‚Å 3àQ ¦&¾{Ã:ĈÛ4 Säe_&ñ]ù‹|Iuû0àmKEÀ‹&]Iy xgåZ»è: €×W¼ úkM’M•RÑóªwlÓ@Ÿ\ ¼°ë²lµ@xœqMãe„·Íu/^ÌÓd¹g(Lx¹ê1áå'é /fjC^á¶[ ÂË+SðòO2áåzÉ„·Cö¢#‚@xùÅ1á ¿Éz˜ðÂm¾›|/~äïl¥7#^n¼ñò®_F¼\ù˜ñÂFó1•û4p ÀŒ÷Aç5Šzã…Sz9ŠYOyÇüË&Êžñ7 ŠŒ7žŒ/Y;ÆËÍövjÀè6="3^^OaÆ+{4¼2ãå=ÆäÔ ÝÖcÛ£åeäÍ”÷‘ùµEÊË·ÀN ˆ™¹T.œ° §è†¹àÔÀ½&a^|XX¤Üº7¼á£cÌËMc^^ÅaÌî¼ø}3æm“;˜˜0/?kƼØB•l‡{5è Šm ¾9oÂ˶[ðòƒm:4hê—/Wb&¼¡†C¹IyóÌç2÷’ò¾ËŽS7ìÔ€Y´>ŸA^Œ¾S)ŸBÞ0p!Èê[5p£F”í"sÍ%'`Êê+[5`™öÎ f¼øj“m4 Æ;æ(õ£}ÂxÑ%!!w{£0ãåwÊŒ—Ÿ 1^¹»¾&nã#2^Œñ±1tmé`Ÿ†ÓDUãEjƒ´œ†^=ãÅŒ Ë`¸Èu> ü~òbÐ&¿Ždíy'´LlÔ ÝüDÊË 6ià.Ÿ/WVvi€û•e]—àué—®.¸4ÀEÉàlÒ@3§—Iƒ<ÍG & èỪ ƒIC¸2i€Ä¯êî`Ò€Ôãlrvi Å`ÓØ‚Pêçxiª\hâ\ ¬n4T /+˜ðÎ"C}¡LOx‚[ä½ï-ùDxa²ûêHxƒÅ^VT6Uõ„Wn¡>>Ä·'•p"˜ðô–YS>˜ð2Fb‹ ü=4ÿ’ /ã.&¼€ImÚ= ^Ä«ÜO«ÊqÉ¥!¸;Kƒ4qYfj›à¯œOf,š‘ÅŒWÊ䢠¿¸?^. pËh[¢M.  e7”¬ã †$¤ãƒƒör^pi(!©MêÔñÂý¶}k…ÿû\’òþ.+^l¸nË:éxa¿*•-•$À Íiµþ˜ïsú¡O¯T4°mfL„wöˆ¦ Œ„Åî–áÅxqÉ>á N4„x_¬–/:kì ~ù4Ìz“ªS‚ïÀÔ²Ú¥âa,ºúüx;Ú(]4{!^imÕµû…xaL¢;÷ñÒø& ^øPô±³Vâ½aÂ¥Ù§Œxý0%^jÂá­ß¦ö~?±Hx¯Þö®¢x!sg¬ö¸Dx©— „w6uËLx1•ïfÛj2áERRÒ¨[&¼ì“õ"¼£ZäU ¼ð³ÌR9׺^X3áE¬ª¯OÜx™g o†‹dÓ¸"ÖðŒÊti,hx©§˜¶ÔC‚†£Íª[#‚†—$­AÃ+÷×-D1oÐL²†—½Ó¢†×»i‘†·bâÝùÛ*kùyÝ×ɘD²ëvŸ ã¥/Ät¼Ø=;TÃt¼.¸ŸËxiø„¼ÆǹÂ+yå“ØÝ¸î˜„¼p®Kº” /†”ØÚ¸1ëxl+×ýAAÇ óz­lÏMƼÆP[ M׸š5UêÈ‹Ñϸt÷C^ŒTsÐa—^—c—†5-cßíàà)oT7{ÊLÜXÆ +¤£m{Oyñ=tÛ”O³ÞEÅDyÃý‘Ž÷ÁRÕ@ä … /C¸€ëzȯrC^š2ä¥WJˆ1Ïõ÷—SCw3âæLÁkB¼GIOx7<%»ámÞåþ’ñ.êÿUàÁx7èÑ ïr?ð.¤„wg‹ÙL0IxeÓ3ûMwñü1ÎÝÚF¢»<Œgº~ÒÃ]´N—íIb¸‹C~@A:[5d˜ÙšË3Y5S<]Êx3õb¼*$áÝqd5UØÂ»xÂÒ/¨hðîÀâšíßb¼áÝТy¼‹®¬¬mñï†f‹ð.*WëÉÎx7Ü3áÝðñÞÅd¯˜Æ‚ñ.OöÞíà¤I7B°]ƒÛ2Ý O…ènø®ˆîòü‘én°¬$ºË¦»Á|’èîs½¶ŒánØ2ã ïÀÞëÇ´®ðäL^Í€Ò‘ðù¼¼ÂüÂ/8K“A£z.xÀËóT¼³÷·‘ xßex1.6QcÀËSßxák±¬ x1%¹R7ǼX ” ¤Ô^Lrݪ»Àwivøî€—OQÂÈ^ è²Y¼áåÙXðj é{ðj/WF‚ª% ™5\ص—ÕsˆÍ°U.kôo0kH`‚–uÀf &wÙ|s½Yfå(aɬÁoöc¯† L6™êœÍ¤}YFÜëJ¶YQ6kq~Öãž”¼7PÿqË +^^xd+^øçÜR77¬"+^”õ#$+ÞPF†¼(»Ž†‘ yÁSï#@õ†¼®)GuH†¼(³²ãÍȸIj%Ãv¼ï§›«ÚñæùP5‘íxááÓ‘ìxÙ߇íxQ&37ãz;ÞyÚ¡3lÇ+eµƒ ²ã•2ªO ò¢ìêfÙI†¼y`uNG޼ñtÞ‘—™›Ž\Ø’7ÜYòÆã¼%o†SL×ðöäEYµ(Löä•·(ï‹Ȕ7–yS^”ÉÌ.iB7ååÇBž¼É›ÅfÈzaÐ]–{Q˃^ܹÔÏ\¯WâÚ'eÇ•—=ÿÙ—egìµ}yãô¾¼ñǼ//ÆÞR§>õåè|yãOz_^Î f_^¼Q¸míÜäË‹ãª4Ï+p-yWÞPGÈ•7Ô-rå u„0oüMïÊ˼+o<ŸwåeÞ•7|äʯӻò†2rå ×I®¼á-xWÞ Jÿ(¶!SÞø‹Þ”eØÁ¦Æ Þ”7Ô5å ™òâÆÎ0(y+Ä Rc®·)o<Ιò†gL¦¼áêÉ”7|ødÊž¹òʃçݾ‰ìÊó®¼áƒ$WÞXæI/:²|¶þ+oxßDzg÷u؉ô†7G¶¼ñZ<é Ï“HohTˆô†×G¤74²žô¢Ë*' ̓^œí>žbDzC$Ò›±¢/s»Æ¸Io8‘½¡Ès^ LÆ0lIœ‡Aû³uâžó†áqÞp[ÄyC¿Oœ7ÜqÞФè C½*lVëDz3Þ-þ›I/†œýø)éÅ=`Ÿ”fÃyÒ†ŽDzÃøH/Æpp,ÙKšDzù8&½8î2Ó.&½~ÌÉ 7å@¯ÉDí¾¶®ç¼S6Ùd΋çuéþ=Ƽñò7æÅÓ—)ÚödÌ‹špÛ†Ƽü„ó†«p”cÝÛ¶'0å¥^…!¯ÝÕv¾1äœ궃…!/·¯ yqºfI¤LyÑNÛ+˘¿)-õ^2fÌ›±{ÇvÚ0æçó˜7cUÎ{ —1o<ÎcÞßu”½Á¢’½á0/èmˆÈÊ&v'A/îË´[L‚^yk¦[ôC‚Þ†  ³°gA/.ó¶uZô6„v8 í½ wÒK¥·]CÃDÇ¢2ƒ%oøI¯èÅOVˆ™Ë+|mºë.4ó _‹¯œLy‘”i±Á”÷ž{/ý²¢·A•‘Ì^…½rzÝ›±Myåi-ö†Vô¶ 8m½(“ùásbذ@’Ê?Ø”—#)z1V,¦`aE¯œ.7©È×;€ e2jHû¤7úȲ-oApÄ“ú‹ô¢¨´“°I‚^ [“©T‚-/ß)zñ›ˆW…­WôÆ2¯èÅ­ßÉÖÙØ–YD'芽(“ï1©ÏƒWôÂàµÝ¶û„½ àÊ¥”‘-oÅÉkÙÍ Ùò"Sâ¶-ÚlË[e\v"“HÓ‹·w7Ýly+¨„fÆ›¦Í‚T¨k ÉXÓ‹º'ÓÞüÒôÊo›ÕQÔôÊ0ç 2æ•·¸×»?êŸw²¨Öô†fyŸ+šmö@μ`1òÕb—œy¡º”ÑË:óÞhÍ€y!)ºÉ0óÊÓ“g k~$éÅL+™°ŠM»&ýƼ\…HÒ}‡½i¾V¤î¨LÕ›6„ËÆ¼ˆœ©¶µŒy¥^¢"nï+ïÚ€ê•,r˜LB³H¦ x?ëM*>¦ ¡Éܦ °½O,™6 ¥ïmèJ;órçAÆ xþɘȸ¡azhf 7 ÛÌåN°7ï ¤©èž|BM ã<áÞ,œƒŒ0€ò¿¬y„»a#ã†0b ã\˰8+6nÀ¿šB‹ÂÈŒŒÆÇ£ŒØ‹–À ¤GÝ–~Á›—Ò^Þ¼fNœyÉrŸ­y±ûFÊeŠŒy”¾šÌFƼ4œƼˆ .Ï6àòμWz°~ýgïÛËÈ™w@Z/£âíÍà}Ø’è7;óòL.8ófÔÐÒ>a½3èAF2[ óræ}*ìm>ñmèhS3ˆÎ¼˜o‰_pæ%dñræuÓ§àÌ\{É™· §y>“ôÎt›{º;¿Xï´“fay‘³5/ã«`Í‹áµ-ëk^,1bþ»U»dÍ‹iä3#5?þ¬y±:'ãÄò¶mv`Ášº—Ú4u­yÁ‚m½#¯¡èv}hßçÌûÓ×Ç¿üÐgHYÿÿ"ýåüã_Ö·wµ”¯?ý÷ÿõ=X霤ο þôR¾ eÿ¾sâö~ö÷üw>çj}ï;Õ??gˆû÷?ÿÝõìóO_znìžÓ³ï?éùçó^o\ÿìæô_òì÷Ù矾ôÜóyï³ï?rþõ‹?ýÊuÜhž¤ -h²/èta"³U\Ð?ý¸]¿ÿ1Éÿª\ôÇ?üÛÿ“AÉ?|üûÇÿúÃ?ÿøσh(L•Ê@ð›ÏT¿ãL%cûò}çí·Ÿé‰gú‚¶ñ4ÈçêbðW¶þ>»U~U_0÷ªËxaŠ1ÂEPÙ×\MŤLF'X#ŒWã˾¨Ÿ,˜NÌMØáb|ÑWÌæd€_QKæÙC5áËøÚz²®‡+J¸ž/­)»­ðU…/糺òú™Ïa££ü¹¡×IXé.2÷÷»…¾>îÕ@ÿO“±Pù 3„×i†ž&ÿÑóÌu#„á´œå<õžçºa²×çž~å r§õܔś/ ì¿?~!Ý{·ÚHÅíáÁ·–`1˪ÓÖC‡òdÍ«‰l êÔûlÊNj;\—ž®¬p¤çÈ7f¦xµ4û /l±en‰)¥sñ–%M¥hŸµ±l‚2—”Ž]•—ÂÅ‘/·-ŽÓ¶‡4 gà j}–îyäìíB°&1áz Ò`açÝ«ïU-”™ ÇÜn7Å‘«Û’†—úÈ+XÛšUkcÙÖd@‹{\: \~Y³¬¹Ýíª6MH”*a*¤ù4‡t³yÕÝ60˜¼\Öcì·g\èŸï²WG¹¼3Æ% ™i3¥{:RC\JÇ6ϸ!ƒ­£šf4haÂFY÷´Çåi”ÂÞ7èÖ·i”êÔÊr™yw¥³ìö^O]3éFyÜþJTˆ,}çr€BÙðšÎVÑ+­¶¡4uœñu»ÜϯWzŽ2BÄ}*ª~““xß:rcB™³“‚vBžÚZ)¼£Br°K8&b)<¶7°QüxR’EÕ¨d7fúÔ¥…õvy¬láŒ2ç -Ÿ§ôN;¸@8o=K¶w£6çÙ¿a=äZ»±¥«s1äY&_Û'ɋўed^Ý`.° ë¥á÷v¯ä+oƇíq߸³÷H£œ³ ØÇrËf2¤[I(#Ãè ïcõ«»˜S”þ—Àìöæ‡ÎN^Oq߸›·…'Ã?pÂà*ÿÉ›šbnÕÓÖñKcÏAͨ#„ ¬¼˜7ì“FÓ;°cSMª{‡Øx²W;?sà³73£ŒüDéÇö¶%{£?rŒ“/Ùp‘¡½Œ£¼û9ׄça»àz ÚQFFž¾æ=Í™ìQ¬Á€õG9õÕןgxÃqèݪ.2ËOy§^>®]lžéï®%6¶s&2ýó©dº9¦bÆÕWi–UÂ7 ˜9î¸h7³jÈï½¹»tâcïhA™ó“›ÂI1 ¬›{›Rç 9°@|<ÑÇ?}{[Œ¹°üú¢yEÖ9 ÓÞÂÏ_jc!4=ÖJÑ“m_ýLÈuuv@µT¸«¡§%Þ1Ós]EnÔ^[G™‹Æ€UFj[ .ߟ·­¿‘Ã>¥U³ìövÏ”70 A?m,¹ù¬í6¶`4Q·3ʆóÜMØ\±25ÆŒÉ:QG‚Â¥H3),Þ¦ZnR¦µÛ,q:ûX2,—Âêƒ{(!F oŸªA)&~pK_TÜtÑ uä{W.ˆ"NÛ›¡ÝVï4)ìS~5¦‚NC²1ÐðŠˆŒù̽g H‚b x÷‘¢/ðc\i"j–—±ßDäù-ÛÒ‘Haö£\—Ó&EÅr 60õ=ÉBèJ.lï¾ß5gî±µšLß+v‹"çKÞݵt6Gùø.[†Û¤˜WŽˆ ®ûRè­ÏÉu_ʆëRr'œä¥ZE…^¯X Ýh×ÅI‘üŽ<yB5v¼R˜ýh7´Xy‘ϵLÕ`"]?Þ…ÆÝŒfáR°¥»gøLú/mi“BH´Còx¯¼,BÁ ؉&Mª™v¼’ÌÛ¯=‡[K<0 •#7ü¾M=ý7ö22 Œ“1c{ƒá†]ûÏÞrÁ`¸Âî²tE„†áÈ…í™{·-á {ͪQ †1?C¯xÔƒa˜ŠIc£3Ã=¨ùÁ2®°-þ0<õóU £ öÆ[µæÁðž~n D`8y0Œ…ƒdø„Áp…rÒxƒaÌ„§™ä2¶$0Œ2©•[ÈÊ`» 0ØÛ;g WЇ,Íæzš†1+¿kÓ¸*ÃØÕ°ƒáŠ˜{-ƒá Œe´ƒÁpÅ 8gß`ÿ/qµw †ëÜ^9ö—Ç`e2ä슮=–²§vB`S> Àp…VV¾ú5Š`0Œ-IÍ™ c]¥›1“a)Ã7ÓÀH†±ÃdÈÐbµÉŒ†+–3®0\m˱—ÀpüA†a¿(CþÄ`˜Ïcá ƒnÝÌX¿(Ÿûî7 cotF Í‚^„…ñ›Ò¤m‹?ÆÂØ#õV¥2–²i ˜?ÁÂX½*‡Ûæ•-ÆÂBIîyÍ* °/3¦ÞX^Ž·9g3Ž?æ±pAvWÓ-“Œ…ñ›Íbì ã¸l6ꌅAñªüüž=Ʀ'x›ïi/aaœ/[>ca¸mJšýz,Œß”׵㧠Ëqĵ,ÇX¿YDôXXŠ`¿}~ ãûÁ„„…áIyç²³å ã8ìËß‹#„…D±Ê±ZXÆÂøÍṳ̈∅¥ {ö²cáø›ž Ãs3ÍØ€Uæ¹p<Îqa”I'«aÌ…ãµx.<Í3aü’|ì›={"\ð¬Írž‘p8¿GÂñ8„ýù=ž¹gYþoi5Ïßëum %Œ£äp5Ë%üº ƒq\˨(MAñ1`á6‚Á¯ã ~]ÊÁ(z DYªp‚Áó0X˜ôý‹Þü¤ˆ¶¶QÏ+i2à[±MƒçqÃTÒƒçµÈd'ç Fε·8 ~]‹ƒÁuŽvå9”}œƒÁó¸$#ܲÏç`ð<î²”C‚ÁóZž³o<,E µr÷!£ÁH2øKë‰y<ŒFN×67a< WŒl¶^‹ð0g 2N0yÖÎ bÃHbœû¦ÖuƆ˜ü$• ÆüqšÀmqnáÐA_ˆ ÃùSFè[6Ál8lR!6bÃØ€•žŠˆ_á“̆ÑùTiyÓ'p.3ÚPE8œà:ú¨nƒá0§2–KXM»BF'‰5Û½Fpf¢ðó\þ\ ‡aÑ\zíª0öpœî{yà°” ¬À.ùÃá “âK½³ _h‡¯ýex4Œ„Òl2FáŒÐp–MÖOÐð5;Ù3FâkGˆ÷ºBø﫩c2£á M{Ñ1%£a¶»f4,×"—©2FÃjÉ{Û¡)Œ†ÃÆ)BÃ@OðÛMïFÃp±…ã×':a”µ'é(;è„á¢ù¨çzÐ ×ctÑ0òqq÷²4¡áp-„†Qƒú\ê{>þuÂ\Ãhr}ˆÄ†‘ ú°ªÄ†Ã¾bÃxÒ¡jŸIlûr“‡¾pîÔTÄíÙpxoĆ9á—Ù0G)3ÆŽ$™^&¦ýn\ô‹ÛÝX%ŒØ¹³ÄCd‡““ö&ÃØMVf¯¹ÔÌž _𴂪sÓ\O†!2“†¾oå>‘áÐý-2Œk¦b4 ²’®6Œþºínò­/ÿºG Ý~7h•¥ÑX˃ óF¹€†±ã­Üš`EÂð&¶É€†±óKN1Ê'dS;éŽöØ=al¤“Qà6· d»ÞzÓ‘z@Ã8?ªÇPüëS]!ŸDXÎ'hÖo=«gX £“ë:* liä € ÿcg_jõHk=æ-zs,|€Ã$=Y‡ÐÇj@t˜7?D•ð@?¬äófÃ@‡±¥Pîúè‹æÏ"ÐaÜD[£Ùæ$ú@‡y—F Ã¨èÒ[™Tžè0^äm€=†­@’‘PÞ“è0o+ tµGzºåzà0< mïs`Ãø”äåE³gè;*˜ c¬·öímlìØp¬Ä†ñŸ,u´}†ñÕ£š<[™Mt˜‡—_ðÎjäè0ïÿ tC„d‚°À‡±ãTz¼kè9=¾ y{Ô >ðaÜ'¨ðé>ÌYæ=¼Û‹eÀ¯[\ߡOºu’ÿ{à0L÷^Ò‡yÝ;ØH vµëÙ˜m$gÎèíå„åzÃáiØû6Ñf8œÁ{,í"ØHd¨~we.i>»­®d6 û[ªÓrÄe6̬€Ø0óÛ=Ò€†u xšç´M ó=˜HÐü6˜H`tñ诗‰„ŒŸ³L$o]îÕÏ Ш®Bÿà!‘1tÒ=;/ L4¶?rðïå¾e,¾Ö¢‡„Y"; ôÂéRÃOö /ˆ—‡DFNÇ’ |4C÷¼<$2ôÅkŠÉpšON÷¨‰ µ .±o­0ûKx.ŒâMi¬ç˜Éʇ®:Üh"ágl"!4|tSúˆX˜ç¿„…Ñï55}yHuyH@„+ ëž °‡ÄÀ†ð®ŠMöà©*{H@WZÊ¿ä Õ¬lúÃò‹M¾95h (,7ކxƒN†Â  c’ŠžßRa°Ž!ýßÞ,º$Â’ƒÄtq¯*Á%(œ0Mɘe¤H…Q³Ú¥ñáÁAæÁ2‘Pw rày;HÀøGpdÿx$Mé2ÿ@£¾õ’ìñJª,e‰ñmúÇï4bÂÀFæÄHø‚Eú­£Œ„Ã¥$Šp°ª!$LM aæP„ƒëá ²ÙœTÚè‰0ÚÓ±×’ˆ0øº›ben¨Ì/ˆ0ì{ªtP[«GPÇÉøSµË…/„ß`ed¨C„&ch"£Ž-%(̯•™0¢J&$ÜMÖÕ€‘0L¦*`$¼ÌÉ5Rp!a®„„ñeº±ÝÐ ãê­ÙŠŒ„ñÎäoÛa= ad%ÉýíEŒ„šf9$ õ«í‚c$*#ᙿ¤±™ CáZŠÊ sue$îÝ#a®? SQ€Â~MŠ 0Êæ¹^Þ³ÖÝ9›œ–˜0dãe˜´—˜0ß3a®­Ì„¹–3×BL38ù TJLL˜ë+3a~ Ê„¹Mc&Ìý1áàdFL8¬.1†´jÀð‹ [ãñbˆ “ŸÞLxVçôh,k`Â̵™ C@­¡p SSÂD ÏÞPB<K¨Mct#ûÙ}¼x0·?̃! MšIx0¶¹&ÍG@x†þéØöÆb¤O™‰ð oQÅS Âò›Ør±õL„!ß—î¡}¢Ž,¼±šœ›óÇÆD˜ˆH]d/G KH˜*^DÂd^‘0½  ÆƒtIa#azd‘ GÒ|ó0¶acgŸ¨…y’™0¦¶90arö L˜ªZÐ c-­v WL8ÉL˜,õ"FòÊÐ@èÈ„iU#2ajb"~à~¢¹¦‘ »O;è…¹·D˜¬ð"F»ÒoÓÄ*Ænóg‡°G î0êÂ¥áÓd%áPÍÓ½3¦¡DÄÁÔUE ‰Ó·0õœÓø%aê ÆÎ°K³¨#×ÊD¸K+)ƒ¤B‰Ë"cI @˜S÷y0Rpu3 tùrˆS;y0›J#U•yp<’xp¼ÚÑÄ÷ÙF$§þ]žÂiv$30çƒi‹ã Ë`@}Œ ÎRëÌ&x ãÖ¼3w –©±4qêJË0áÓÍ‹8Àà¡[Ûf€i0)2_4¸ fìÄö†«Î­‹Lƒá‘ÔY)z { +á` k¹E{*ä)Œ˜$i©Ô1Ùã`¨"\°b‘ኲ‚°?"†š±K¢šYƒ3ø|ê7LJá }Ÿ®÷<Û¦ÛÒF¾±ôÓÛ6ƒy°<é”t›xàÁH™•F|Á ãÁ¤² <¸­ê–ãÀƒCñ`DÅVÛBË<˜5£ÌƒÑ6%ÛüÉ<˜•‰Ìƒœ 4í6aŒ¤’íïb" þeûЈÏdª¤;n‚R!`–‚ÆHxNm+!ápw¤Fàjö>"gc¥0_%+…A²ns’$$,e¹XúnP £;Ï›¢E°¼µ%$a:†p0K§c¬jñºŒƒq®nÙˆcøƒâSRRÆÁwïøóÆê‘iò™‡kg0-Ì—I4xŽînóu$D£„ƒƒ<•5¡Ìk„‘+ÞÌ”“x°Ü8Ì@”Y¾ž|o³Ÿ ÆÌñÒ€‹ FÚ°¥r2FYêfbÈáŠÃæÈáð›¤Fô£¥y0bS5Ešq0¿s–“{|³ÈV%ÂØ“cÉ A"LKŒA"\!À¸U‹Æá ‡þ¢^Ï,®3…Vý3Y"Ìç‹a™*›Sg”ÓùH"LŽôL„ão’DÙ 4Â8%˜²F¸ÂÚPƒuY#ŒäcËD áp6Ò#¼ÙfƒFY½keù#áð¾Y&ÌO…e¯2'æë$$Ì«ÀÌ„1Pz,†™™0jàcÁA&,Ïì콿׿NØ¿—Y%ŒDÚþè>V W¨Ôæ€ì#"á𛄄å7eºQT2GH íbËŒ„qÜÚžû¶ˆ¿é‘p,óHJòayጄãù<F™TÈ{Af„Ã+ó@8Þ› J–k4ÂRrYü5ó`º ¢Á<ŒfŒ{~,Û—i°WÈ Æed gOvÿ¢Q0~³uu4eo{¡àðºÇ#×o«öÕ·<er©×ö 'Œ|µcÝëQpØ„@(8¼jBÁ¡Z ¦j@ Ø¿ÂÀ(ǽ…CÌËñ$8¼OO‚ãM{ê‘`ºxâÀ qòòǽ‚K˜ò8\"Q` Q÷öµH¥ ®¤×6" * Qàø›ïlóÏ„(0Ž“;»·Ê}Qàø=æ Œ«‡å¢‰[=FᣉÝóÆ@»>ÑÏ*R¹œ?p¢†õÜx`À¸œ47|ìBÏ€_§ô øõ<æÈ ×Z,"ð댞O8+cÛ{+‰£»ª2INŸxFpç07Ç_õ˜;Ö@€IPðër<æ]Ë…KkÓMUN8>ó€ñH«Œ¦UÜM 8¾~bÀ±:õ˜0Þ0œ+Õ™p|ýy»k€À¯Bcå /Ö3àמÇOÙ3`Þ*Ë /êjÇv›p¸BÀÜ=ÌSy 07ú¿Îé!0*ò3Ô>@àðy޵œpü舿N?ÄïEÀ×w!`練Ôéß±g,_ÒsÚ\€çg›ÕsŽ0Æ2RávØ`ìÐLOÞ{­ób`^É`ŒmI¾õñò1€§mtNû…xþ‹yù–̯×ñ_D¾?ùÒ•Xâ¿E¿±{~kb=ÿ-Ñ݈S|Ë Æ±·ÍW‰ÿPÛø+ð_$4ÊKTŸ8â¿(“ÆC=]ˆÿ–6Æ%×R_™rÒtÊp;e-;iüËåqkã¿ÒPJ—l* â¿åÁ./Ývµño¹ç÷Ò“üâGÀ¿ø±â¦=þEYζzMø'jÇ×™ð/ÎWÿšÇ¿eÎz6ëðoyƒ(¯[ÃæÎ»}ð#gÈ/›œ-;¯:Qv9k6å[‡´y2zø‹óIçX÷•ô[dJ4©p¡_#Sà#vèWæå߀¥Ñ\eý°ÚNú›C¿r±R}šm°õèWšopÒ{laG¿øÍ„ÝÿªÝu迉D6õ©ôð·ÜÒi#ŽJå¶ÿ–»5ÛÞm€ <ųT¼=ñðx>—нD奯o>÷ /™¸<2<¾N~•ÀxÈ­TÞç0‚n!ÜЊK ‰•²Mµ=þ•Ñ<`nÝŒïù/Þö… YŸ¨çqϰ=²žÿâq@?©“`Ïñøe~ÜÔ^ÔóßrËW$í¹n÷üçëY*ƒrcÇQ–Û±ùã¿¥ÊP“ ‘ÿ•ä)ÑmÇQ¤•—yÚÖO;þ‹—6°Ço“(Ïñ›r¦ºñüï຋zÏq™p©Uuç¿xš†ÏÿÅÓ,Øžª ŽÿÎÊÕÚžiRœœ¥î¿ýâ?Ïìã&|¢6n¦¤ã~ç·›ÍIØ_ÜÔ@Óµzð‹£¤³7ÉU¼…µMä#€_¼”E élŠ„}ŠÜ Vè¯ùEÙ}›9E.´þ”"W'°³ý”"‡»{.UÈpŠ>ïã¨Æ)r¨,ÏÚKøðolÿ)E§[®r¯9SŠÜ ›Â“òE)rhN.0©U‘vо;̩Š¥Èݰ¡?æÀÿbh'muÑM0ÿÆF”RäÂèŽRäPg¯“¥F)ráI’;Ä [+ùxv!¥ÈÝØ .ïp»-QŠÊðëÛ/ƒRän¤?œ¤NJ‘{°7ý$ø¹ó.ù€v „O‘C#ãLÏ)Ey±h'N‘»á÷îÜs^ëÙªÊ)rrw%-é÷GàÀhä#?ÉmÞ"K‘ã)§ÈaF,`?nN‘ã g0ˆÞ%=Î9€RäpF(ÏÓ–SŠºH¹šö™Aj[ñ¾ª#‡Ÿ=Ù!FîçˆeÅ„¹ ên‹;ÅÈÉ9o2ôõèÿ»Ë £¹ÜC_Qãa#'W{ŸŒ‹‘{à¸iQÔ!Fîž3/•…¹«Á2Ä3¤ëcä¤*'¯$ÄÈá‘Ko%£ˆÍ{}ŒÜ´çIMArÜ#¤'ïj×àƒänÜQÓå!Hî5už$÷À&ÉìBœ<„rÒ.BžÐÉ»à 9 ”ç¬àƒäðNC’õ"ôNåò$‡¡™s¡$¹x“”$wcæ’T¯’än$ëXÚGH’CEÖtV O’>ÙÛÙÈP’œœ³œ…$÷À´Õ9ŒP’œVì9UûmJ’Ã89„–˜x‡%Ǫò÷ñà?h!ì’Î^°Z1 Ž ¶œ˜ Æ€#{îëDÁÁY‚DÁ{ˆF×|7Ï„ ŒE¯‘ÕZ˜DÁ„p†YŒ¸h9ŸºUxQ°|¡°5Ú‹BA\ར‰‚a÷)/l{À±(øÂ&ßwÉ#óήIAÁ#B¹|B›¡“C†È·}‚C³÷—CÄqd¶,"‚ÕI‚™ç³$X†)œî"RW¹¹¢Ó/‡·¸ä"*$n'0ÎyjÊàv»0³w0:•[·+ï`ç´KíHÙ;V(3sóí‘¡S•^vg|³wð¡žmœ$Ep±­¶3Œ½ƒáw­u9X–]J¼ƒuð ¢qSƒÔAt½É…ÔI—×ã ùÞ4›ôÀè¡}ºç‚¤–_D×eBW¯ÎP³I—°éaó"­ÃV{±q°<éæ÷8‘åÀR–ûú™GzYùÜ®£E’àŒÊ›ÌÞŽDÁ°‹v”DÁR&Ãÿ®² 6–÷ØlEHƒÃÁ8X* 5ã`,Y£ÎZFÎÁÃñbvƸ£©#[p¦FUÁò™'…äì,]®t²E}wÉ9˜ö3çàpœWgLª0—ÝQs^Œ îÙAD^,EÛãö Wc8&“”=êcU0ËòC;ÛTÁ¼ÇƒUÁpë^û (%çàpœW7rRÇßôª`ÜCšKÆ ¾zU0,¨å™+C"Yp¸wÒÃKºtK¦']ðß^j륺`œIFŽy³ÒãÎzQ÷,Ö‡3‘.8œ‰tÁÁtÁ¸–3½'Yp¸L’çÙN›="É‚c™—“l’ã8xôìö—dÁ0Жª£FB^Œ4÷e ›^Œ_”wºãŒYœAÑŠŽYŒíL´l¨F²`v'U0kEg¬ Æ4¨‚Ä},—†R˜@ª`üæ–õIª`ˆd–®k1¤ Ž·àUÁðä–ªºå²*xÀ“Æ"8ïõÏ¥/µHQ–àGKó$Y0.ïê·š8‘,8\:Ë‚aí“›‚8–XXÒ'Ë‚CÉ‚ñzå P*K²`9NÞ ®m’,˜õAL6gAÌvé^Ì»ÂX,G夵 †i  IILÆ‚4#±l¹©,p±„PL»nÒ$Ƭô¶ŒSÖXý5Ûa}0èámY¬¬æ§Éúà?klXÝxc]çÖ£Ý,—:b±>˜¶É±>8´ ¤F”LzKê`ÚâÔÁ|}¬¦MyAÌFý¬ :–ÐÊúà»kË‘e‰0L8Ë%…0WRôvwß[2H!Lnû,Æ$K’ù“X¹qAáÐjD8´,æ/ŽEÂü]±Hx€¥{/‘³H˜¿+ Ë÷(¿®«},¿I"aÀïÇ2€·H¸ãKûÈo«®üQ$,/£Ͳ"aXÍåÒT®Ç*aª'Q%L¯<ª„åœr?{‘!Ê„}ˆ*áø«¤¦9zT Ë‘òÏ£¡s¤–$-{R„I*áø¼J˜6¿F•0ËáW®è“TÂRØ».#E™p<’dÂô¢£L>rùÖ§φ5”3EÂè]ä±e5A&‘püY w¬ ÕmŠE”CsD¡f°H¸ÃÓgÜj+Ì"áðÐX$$•0YÕ¤å¨îÒJß×–IE•px6¬ŽG’JvgòpT´Î*a9òiX‘ ‡ÇÆ"aô\Ù1c/Ædëˆrk–½H¸ÃN™£H˜6jG•pxè¬v\,j„!WíµšÛ.i„;¬¢¤S{ ü:ð»Ði„Ñçô³6Âáx$‰„up©…B Ó~ô(îЧa5ÕEÐæûŒƒúáúøù½ŽØ6~e*›æÿ²þ¨ ¤üƒõÇÿþ῾‡?ÃZìƒäÈùäÈŸ_ÌwAïßyRÜàÏþ¶ÿÞ'Mò}Üz¯ú7ú‚+h—þñKÏžaµjïzÿåkï>ôýâõ/ùL¿æ èùç¿ôìë¡kÅßùä Öoþô+WÒ¤’É*Ò{ýá,•¥ý’Y7.êŸ~Ü6åÿ˜äU.üãÇŸ?þíÿI{øÿþñã¿þðÏ?þÖ3•ލA¬Úa­ö·ž©~Ç™ž¹¶U;Šû™žx¦/i+Oýw¯4»UþÒ¶Òß©LÒÜòãWue e|‹8ãePá]O­à}÷Ü>øº_øUÝ'¶ˆ­8Õx9¾ìk+(Ö6Æú’º‚³ÇºB—ñÅue^O¨+|=_[WV›Au….糺ò·Ûj‘W¸S÷ÒiªáÄÔÊ€™ê»©¾>îÕRÿî΂ü.L>žŒ½¯³ =Kþƒ§Á­’–󯜦þÁÓ\wÃBiêÒßýÊižxš¿uŠ%ÚD¢í¦f2H¨2Ê“‰L8þß<ÃüôŸ?ÿü§¿þߟþüç?ÿéçç¯ÿû?õõ§¿i$Ú°o´§c\×·Ñè»ÏA’—̹5³i¤o3‰®´´ô0^È5²¾ q°o´ Ÿªo×=«ýʱî^Q$…gÕ @ïT’›C,]¾Èt9ݧãöÑÙX-…p¥T ±“$7*æÕ/éÚ&šY84Æj‰Þë¶ÿ–²î–ì8ưc8ý xYÑýs)!i§(½¸ñ4Ö8!úÇ/Ð>€m]Yš>аè.3gÝb›:FÆx£Ü;ND áã¡ ë¯Ð)¬Þ/‰„y.^öŸ!…¯!8z? e© €–£¤°ùU¦S ¶îÅ@´*—²¡þxžtÈÇrЮÉ ä¡‘t³ÐP³¦oOך Yƒ\ÕµxV‚æÆç¼:ë3)„jKk2ûxÈŸª_M€ˆJ^ÕÚg8 ÇáY¤’ÂÛ¯'dx'«3kÖíz÷›¬ÓLæuÛÞ¡.í[:DK^2öEi•vÛޱ̽Å#ØF¾#Þ‚ a…÷­ñÂÚ)E‰}Güü4ŠƳ°04¶KÞ{äBV”z´¢¬ î"W¿²€)éþS)D@¦9ŸRʦ>œ¢‚¥iû<䳿Ð’«;y¿Ù&h=æh )ìÐ&i=¾ [N{TáÇòÞʆN 9RÖ ³‰¾&oEíã1fÙ°ö˜ói!ÎöNÔÞ¼^Ê ´´ZŒ¯èšÊ–ПՊ!’ÂÂ5ðë†k?n)tFÔ>yyßš5ǸÖ$ïa/=ÈÃð>Ôä@žæ„Ûšc„^dÛŒ _ÊTqòÁ]·šòfY/ο×[íÊ›ó>ÔŠ!…Ø:c¾l.ݱ÷¡7"ãÏËÚc¬™È÷¿wªÏBçCÍŽÚ©ÂpÐ9øò9ö¡îí¸·T従sðÅ&å½»B »w¢¦„Y8¬=f§y)ˉšmʶFYƒÌ)R˜¼5{-'ìÒ°9ÞþÙŠÚ™ÂK¡´Ö"ëù­¼OØ[z̨ÙÀ8awÄ1£–§(u¬îTÃ4÷U8¯…ãpîæ­¨¡K”w°G(tVÔò£2ý‘·°ìlEíýÄÓ=~1œ0a·KH°×6åIÞŠIo{‡à,tVÔ”d+…Ù›Q‡ÊñoFÍfõ ÛŽ5…ç‡ӛQcGCÓ=ϳЙQSØ-,ü8Œ[º½¡Þ'éiÞŒZ>—6…ùûÈîͨã '¤‡ŒžUÁ÷sCd\Çó¨èR “Ï_qDF«° ³"c)6·3ZkÅ'°È9‹ ”wûت"ãWå¬õ ¹–¯CU5b׋ ‡€ÉÔoByÄh¹2ÞD¾>-ì>„%>îÈÈ'•Šzï3Â"wݪ{ÑN¾o—ÂRa…zÛ€£'7>ŽWÚ³Ïa)ðT©è,<ãc„ˆ^p°ÚO§Å"#k¹Ÿ¤-9…£t¥öºÁ1gËèàZy,è^æâëzK2[;£cއž….¥›‘Èxwu˜Þý(>’%ž²ºÑ1B®›Tñ{¢ãö¡,rJi¦Õ…gtëøx8ª[†ò2ªÜÍ‚ÌÖÎè5§<š­‡:ºOfáÌRyntŒ™”L¥·ù,<ñ,ÐaVÕÓÊ+½\> \j!EÉ Žå8©‹ioy—ÂìZò·éî±4àøuÃcù™~ÉXuç-äù¹¸z—‰”˜PzH5ÔÝÄD¨áÔñÈÅê^ZBÔØìpY”g@ÔÜèÌ…Q¯7k³SFÔpHrb‰PcŠŸ]ôꊠ,KX">z6,“5özÀƒXw°{@²ËM÷PFTi_ö•2 ®p[w“6ÔunákjGÀ€Î_2̼”4 ®°-ïºï-jüìp³lÔøÙ³å2jì…LÝ55 ;‚ÀõœP¿.ÈêŠé­›ûŒPÇÿL„…Ò^¨ÉêŠýnêF„…Ýy Q£0¹É+!jlgÝ¢RFÔ(Ęo ¿¢®`ÉnòJˆZ ¥{Í—b8BÔ¯B¨¥PcKŸFC\ºîd>+•ßS«SâÓ¸Ðì&!ħ㵡­«ý1~O„GVÇ ˆPÇk%D[LnÞKˆúUèuÅôåÖꀨQø¸Y!j:? FÔØGæ|QW@ ö_ûz–<ã»ÙAl²ëuÅL!éF΀¨G¥–¨.3êW¡gÔRè:PfÔá" Q¿®Õ#j\«ŒZm‚5 ››£~Ý¥gÔ¸V©aõÖñŒznÛ+iûv¢Ž?ê5Níº'5 êW¡GÔ¯B¨+f‹28ß½ #j)”ÙLëc“BÔ8ÒnË QcLu¹ù2!ê9àrÓpBÔ(,÷UtquÅÝ1BÔ¯«õˆúuµ QãlHS×eABÔñBÔñ&Qã:ïv÷=|`Dïõë‚<¢öÉt‰ˆúuAQÇG?{€ÁúUè!5 ¯CR£ŽC0Rcðׇ`HÂ|¸Cjü*<ˆÛ^‹&H»D©qµ2®]/ÈCêøÂRÇûœ¢åp{©ãu¤ŽÏ› õëH©_…žRWì³y†­á§® Å0§F¡Œ¯.]#NŸM†·S£r´Ã¡SÇúH˜:V+ªq9RÁ¶å#ƒêxFÕ¸Ôî7I :>ÕñÁ¨~éQõëHª¥P&ðòïôj=«OP5Ê`ó’öð“P5~õ)º > êxʪñŸKÓmÛU£0ǨwïÌ®˜UÇGC¬úu)žU‡»'TïžPu¼{BÕ¯Bª_…Uƒf<‡82ª~==‡ªÃÃ#T…ÇäÐ2Ϫù 0ªžlåQ‡€ª_¿êQõ'…Ý7É\èYuÅ”óÀzfÕáÙ1«‡YõëV<«~éYu,Üû8@œ›‚VOF¬:¼bfÕ¸{u'=Ò³ê×ÏzVýúYϪñ³åP\BÕ¯SzTß¡êø«žT¿~Õ“j^·z|0©–2dôÑ7v¤:Þ#‘j–ªöUÇË!Të#¡ê×ÏzTýºª_GzTý*ô¨úõ³GÊôJU¿Žô¨6Ÿ%«MªÃ[$P_jÞnÝ€@õ'…Ã7Ét!žSÇ—Aœ:~ÿª_…‡Éôí¨Žµƒ@u|¤Dªãò¤:èIu¼"Õáñ¨~ýháQ2_MåQ2öF—G¼y”L§|xÊhŒL¯™0õëŒSÃçžcÊM¿i#Ü*aêPaï×çý}{E¯ƒ`ÄÕBߦ7®†'T—|ïŽf\ ÆÑ\ ÑOiY %WCcz<´ÌÃ+ܳԑ:Òj¨hn"ZÍ8àjtä¾Óûb=®–OcŒ³tÅÀºL­Þ/ ¬ ¼ÊÙÉïõñX °Ÿp¯{‡7£jùôÇUŽ`˜PuÁæ{è“÷0ŠP5h³¸eT-G‚™“K{T] •:î§ŒªgWÛaçU³[d@ÕØ›U½g—yR²~ÃpysuOª¥ÅÅ¿2l¸I5Œ¸F{5ö¤[ÈïÞÁõ¤Z ‘wnÒ "Õ÷õºm©ƥÿí@ª±§]Þ§‰eˆTÃR L%˜Dª &©rVÚ©†Q2TtšM¤\¾mÁ“IuÁän^úŒÁÑ¡ÂÁïo1uA{×eXù‰˜žj5iÚS€Õ艥uÜ4žPu†‘G9ŠYBÕlPuF6©T]°öØó¶é ¨˜ø–šª+„ªQXJVë FÕ‹œ<‡€ªA˜ñFµBç88à rT>QSãjO4‚²j~jDªãDª±í£>NÚìI5\V§ô@¹±'Õ¯p²VúëQ5 ®9Zõ˜X5lúÆÊ?X…žUgøÌ@.µç¶ÄªQ(¥Û!°j\<üªªObÕóÖ®¦+̪á x’„VÇ‹õ°ZŽ«Ç?Àêøú V£P.l{AX-§”Îj¨Í ÃêXÍ Vg¸Éõ+«N`5 :¤O1é?ÁêŒ(ÙÔøÄªQ¥¢Ú¢±êøM«ÆÃ+£« ³êøemV®„Pu†‡¹ôT¡jL)jÔQu<¥'ÕðC¹Åö æ³Yî^—T£ð¾sUi#j˜XJ Æ* ªåÈr"i¨~ÓƒjÜå‰ËbP‘9"ˆvTÇ3¨O€85[ÞN«‘éší( Ný:Òsj\ÏÓ`<¶²çÔ˜:¶µ‘C Ï{Âi§F |IÓnÈÝsjv œ:#¦žÄ©aVZº©(&§ÆÖ7hÁt\Eœ:þqj¸?ˆ·ï›p{Nï€8õ뜞Sãy7 ! œ6ùÝ?qj<y~XÀœçO·ž2üv±½V©ÃmxD=YÍs¶6lDAymÂA€ZŠdƒIþ'€ƒ¤ ·§Bk¨ãÆOÔäNø4NÙÆQë f7ì¨Á©Nn^ÔÓF«êì|ùþ5n³$r2ŸÎ³*ÉçÓpL¾d”¯òCϧ1øÄÆÆÖ?áÓá 0 f‹î¨_…•¸1mJ;7‚ux] ¨Ùg<ê×­4vãæB¨ñ¾N^aÔdøtÆP²\: Ü|ïBš Ý2È|zZ¬IV´Ðóéðú™OÈ\F0ZY™O³ñyàÓ°"/霘OÃ>|$Ç0 ~ý¬ÔìbõëV<¡~ÝŠ'ÔØo}%Ó¡†18 R‹2VG¨QÖoÓd1¡tV1ãiöwx‘þR·1ž5ƒè4nb)o-tCcÞQÎtš-ÍŽïƒè4Á:çhȤUL§çäuØÅÓl7ø4~p4ÛÃ|šñŸŽ×I|:O³¼TeÐPÓ[$:«8Ñé×¥z:KmwÙ‘mNäþî]3ß™N¿nÒÓi˜0"îm ‰NãW‘(³üˆN³Û} Ó¯=žfïý€§ñä2Ï÷# < gþVM"Æxu£'•1žŽ‡ø4ž€ÔMng>BSW#ÛžOçy[& c>®‡ðt¬Ç÷òvw¿•aß‹§Oày’ÿ»ØtFzܧöR¨/WS—ï@¦a‰/ïDMÙ黼úPen°ú¸Ã5¯2Z}àaXTëÛêÃSË—ÕÇu"•™*<‘š‘L3 dš¹{}0ïc¯À¬ØëõÅ’$#Ÿ¦,ŸèõÑ1Âé Ã×Ç€!ªÆF¯™ïÊ Ï6³×H¥´ÿª%f¯Ž$åªÖ̧XNðeàÓqªD€”áDm@ OÖ“)l€:Ng PsNMÔ˜B˼ÌÜCPÇ© ꙽‘GWÙj ³OsÔè6eê`ž¨1,ÈYPb~sŽ<íÜU|D€:#éRžŒ™ˆx@ÍiOPc\UÎFp¨9Z‡uœ Ž3WFÔaŽÂˆ¤ÂÒÓ#¢–‡…Ý4@ˆš§n ¨ã {Ôá]2 U„õ¹]nt«°;ÁÛóØfŒ ¨ã%D“Ýí80¢¦Ì®€¨q÷cØŽFÔqÚGˆ3Mø<¨#êðQ‡©&#j ª–u‰Šu¬VL¨ÃûgB@×n+ŸL¨yÊL€:Ü#jD|ö¹ót ¾±ûx¾‚UH€‰?÷Ù€:|s ¨ybuxÑ ¨ÃaBÍqÔñb PóÛb>-/DÆu¶ÝFùô¥MçËA€:„Quø0U‡µˆˆª¥WU—¦©öÝõ‹T3:°š÷ãdz «caçQr,ìõV‡—A°:¼E‚Õ¡n0¬—a5½E†ÕÜñX¯õæQ2µ×L«ÃcgZªÓêø³D«¹æEXMKd‘VÖk¶h5ÝÝ÷y~$§¦þ}ÆÔsf°4)o5ÜZçÒ‹V#ÝZekWƒ ¥£i&Zè’ÔïE«3LQ²r¢Õ×|ê€h5ü“áÖÙ>¡ÕACŒ©eܸ¼î e»A8kȘV³XŒiuCpìñùdZ-§/ÒΫ|—©¥ð–n¤ŽOŒ?À8¥6™ñ(Óê@Ô˜VÉÓêF˜V‡iÓê0ðgZÍc@«ùC ´š¿M£Õá¦Õ<Ì´šb±#­Î°S9›÷™Vó \ Õ”Œiu<'Ñjéå1¥T%ÓjJ¤Ž´šÒ’#­–o4wÛ"h5EZË<­N0}·Ý#VSs¤Õ”òiu,,{)ˆZ-;:õ ωV_0oV§««/˜%ËxêkjÎެz6âÅÔÆÌª‘M)3^‡2«F%¼¬ UVlâ‰UsXf`ÕÑìXõ«0sö¥Kx‹îԱܩc¡gÕ(„S¤6Ī9¿7ÀêhjO°úUèa5Çû2¬æèY¦Õh5®µÂ«ißÑêx#D«ñ³òüuêK°:™2¬Ž7I°:$|2¬A¦ «_GzZý:²sþ¥Çd\MAºF«cÀÑjFeXRAV‡ìS†Õœ¤`uÈÍdXbCVs`n€Õ” ɨúõ§_j¼)cj*”zI×ïuˆýdD^êÝÉ„:$b2¡A›L¨C^(ê×9=¡¡ L¨C~'ê×yBÞ®Žbò}"Ô”Q:ùtÂd>¢I™OÇ;'>«ÉÛU·jϧ_?ëù4.¨Ý3÷/OÇZO|úu'žO‡QæÓ!F”ùtÂdS^µZ7Ÿ~Óñép#OÇ"O§q¾,IGLD§QØò œžA½21›g§ã…œ(Ħ¥Lú‡jâbÓ8¥T0Û'Alúu¤gÓ(”®ðÞ2)fÓ!¹•Ùt|>›M‡YfÓ8f´§ë¦>bÓñlĦãœÆsC\­î½ 8ïàôëj=žÆ‘#¡„§Ã»":2i3mÿ ÑéW¡§ÓH÷ºû tÑ“ØBg$8î‘á4ŽCkQÔ°ÚÃiœPQuÌd8ž+ÃiÙë´~ÔB—ÙÂ÷ÁpúuNO§êÓÁôg=~ééô'…ݷƾ`:sÊëÐ8›N‡ÅtZ ±©÷ÙfhL§_Gz<#ï,Ù­‡'<ïžð4ÞòsgÛOx:>qÂÓ¯Ÿõx:>7âÓ¯Gãùt¬ħñ³2¤ê¦$v|Ï ?E7ŸFî8ëqžOc~ÙdF²÷ö2ŸŽO–øt¼âÓñùŸ1Ş_…žO¿®Öóé×3ð|zΣ»ÌãôVŒ}7É€:t° ¨Ã8† uHÙbBÏF„:Œ QÇ7Iˆ:Þ!!j)ÌðòVgzBÔ¯B¨ñ³rϺ±õëœQÇ—Eˆ:ÞŠGÔ¸žzßšëCˆ:¾dBÔ±¾¢~Ñ#êבQÇçCˆúu¤GÔñ!DŸ,!êøðQÇÇNŒ:>=bÔñ!Ü)ÞÞ÷2êë;5ü··ñý[QÝ •´æ00ê æw3·gϨÃöi†ÔXÝR5RüÀMªŒ:ñ3¤~é!µtRñ›îØcHZK†Ô!ÊŽ u,#Hàî.WÕQ¤FtÖÕ~R‡ì8bÔ1ÌŒ5.GÔÑE{FÍPD¨A‡¥:Ù†5’#º³j#BãaˆP_y6ãfMJ„—#ãZÛÏI„!Õ™nB5†Ò;7Õ¡æþL¨‘'o¬»~¢§†½s-÷)•!ŸÄJϧq1)$$ÚP\&w2Ò•¯E…TžO¿nÃj\ÏÕO“Eˆ7"ý„:w{BkEe<ÊoG¨gáî@±·ÔøM\¦m=ö€&D¨ªÄjj’àÙmÚe¨qˆÌm3 jy ¨BÝÜX= ÆCk—ôÍ = ž…p^5äëõû‚ ~½E¨ñ.F—¡âý¼ãß·â5 Ÿ^º‰b= .K¹Ò4h\•Ô¸ÚŽPB“›@=/Vþjå5Ž¿·P—ÙÍÈØKÒ¨q—y.õoªëø4ÎØŠ3.ò€ÞȰÈF¨q¤üü0Ù£Ô³æ¤"L€zbùПu€z>‚‚@*µv€ö‚[”Ô¨]Hl3çU¨ç‘òÎÕ¾|j"Oá¶<*¨_µÕj<ð„½îF‹¡.“”8 &O¨qä…õTÝTä 5  ¦u9Åj4ØçUµYñ„7Ø›Ì~,zB ¶ïÎÔã:-2ú‹4Ì%×3j vóÃ÷œGJë0ÌÐÕ±jXårÌöݱêùØÑ•?†£¯Ó"‡§îYõ¼T¬ñjÇêYõûHǪQ(5ùR—GbÕxê–gÕóMcÒ¬Î.žUR>P:èþϪç³Î÷˳ê÷óq¬…Òc>ê_ïQ5N‰H™av×z¿P-ªe‰‡ÕóÖ³\¤Ú¡yX]&ñ“YÖ€D«g;†ÝF²¬ž·~Ÿ4Ájœ37gëåiõ¼»o…ÃŽVϟņÎMœ«~}àžUÏÇ"#9ËxV#oì­Ô}ZžUÏzÓd¥ƒÇªQßsßv°zÖþ[æéê×ì`5®FÚžG  V£ð¾eê¦;q<­÷ïYuül<ªžïQ¾¶DïYõlr¤ÿ³ ϪçS½§þþYǪñªdè8tÿ1±êÙÌAï¥~0žUã‚ÐM§Ï?â,T=+M.Zqª~½%ªç«hȽÞ2jvÄrè–Bª_M˜GÕñ {R=ß°‹Ÿð úÕÄ{P=ï]>Pµ%R= eö©¦¢Dª_oÊ“j\OmæÁæ8u¸RÏ©gÓ0¤5¹U#œüØ O¦íjýé95 ó8Ûx‰SÇÊHœU ö9ÍPtõƒcxÄ˼bÛs§žU`qêø–‰S¿ì~xŒÝ)Ò©ÊØsêÙ¬”##N‡#‹SË‹ü†OÖÒÄ<§~Ý çÔ¯‡ê95~oí1Évñãcœ}M~RãHÞ5õû$NÂÉОw§ž·’ßqjn©ˆRǾ(uo¥ÆËøÿĽۮ(Éq,ö>_±¥Îé[]úñË2d@ó&LIÔ84#þ|gdW^"{q¸g¸ ˜µVí¾UWgEEEFðø6ÿ…ÄR× šYê×f–Zç1™Å·ãÈ蘧Gb©u°"}ËõÌR—YšHj]<Ì­Y ‘Ô¯q“Iê:¿I]‘Ô:ËiUS ßI­`0õ¦Ò‰ú€=E&‘mTeŽºÆcâ¨õ3FÖ‚mòdŽúuÉžÁ1CC¢¨ëj(ê×YgÆÆøÉüæÜöÍØ¸#gÒ…À[ÂÆå]$†ºBb¨_êz«™ Ö踇_Ô¯2Ô¯‡Ìu®ˆ .K.â§_]æ§ë„•éé ˆž~ˆV¡Æ·±Ó¿Ôú’д=)no5W1*µDÆ +D£„‹Ž!är Žbû¡µë­"騫±é¨Ù=ø¥£–¯È犗Ž:ùX¾dÔ=Õh.2j6n`5§Ï²Œš²1ŠŠºsYEµàˆ’:¬¢n÷ñ‘ãdž"nÁ$“„º¨@YB½Ãyó¥Ô *Ûô-ÔPm¶³oRWÅÔUeBu•X-‚ºl)1A­,î ¦êtøáÞЙžÞá·ÚB€èii„Ÿ‰³=DOïÀÛ§-ãèi€éy'®H@½£@æ4 ¨÷Ko܉+P8É#{:" ¨qÚs4´“€ZM²6 ¨qIùÌ=±2 ¨qÅÜí:) ¨ÑÇô„PKgM\çÃâ‰;*+žnÂÃjtÖ5ÂçœÔ ,ϰ' õ¤a|™5ÔòÞç5cG"êe%ïPÔØ¥Â|äÅÜHD½ããÝ/«{Ì"jœvÎàQõŽYs¥‹¨Ýô &wNC'­ÈÕ~E,¢FI T}"óâºMÇòé~Ák’|ºŒ7’O¨#ä˧Ñò/Õ5‰§O$;4/âi@˜¼«§ñœ>õ4jloa¹NêiD;ND=ºÌˆ¥f¼AêiÈùgøL‘zzƒ|&Ïí¬žÆºñw(ȳzúuC|![ —k]I>U*22¬oH?½a½Õ6W™’~zƒFä>M+Iòéz'$ŸÆ%e¶ …t–OK£à®nÒMRO¿î5«§å’2ö‰›ôÓV•W¤J‘~ZNÛLÛf! 5:ïÚ’ÉuÒQc¢ÜS.³Š¤£®›É¤£–F™ÙÃÿš„ÔrVùƽd5 ©q¤ÌÜ.¦$!5ÞÈyz– ©7]ÿ%kè,¤FÿìHã¶'ÉBjÅäœÏBê =‰Ì sóÈBjìï·9o¯©˜…Ô¯kf!5:AÞ]˜'oÏ}Ü{ì²’”Z±°¼ÔÍRê r„O7I©ñGg)5zíìçÕÃÍ#Ð1VþVO’´Ôx‚¹'—묥~6k©ñ(×\®³–GÊ×ãÆf¤¥F—^GXŽ“–úÕ}IK³öc7‰©ëx$5õëÀ¬¦®_É©ëÇCrjàÓ;Н³œºŽ*ÒS×. =uU¤§®=KzêúõžÃK­Ü×%³œº´-55XEäÙ^©©qÈeéYM]Ç©©ë+&55†Õ¸ÂiÔÔõ3&5uípRS×ÌbjyÒá¥GjêWcVS×Ï™ÔÔ¯›MjêÒy$§./‘åÔeà°œºÄ–S¿ŽÌrj¼’Þ.“œüñÑÝ‘åÔ¸æ=f¨´³œ áØÝÁ‘åÔeÎb95&X,:׮˩ T`95Ð’Í ö‘So öp 9õ–gwÃ<–SË‘c‹‚V,§ÆiåáÌ™ˆåÔÒoÏËÌ¥³š}xD%,VSoXf\áÉMjj\R0…-’XMýºÙ¬¦–ÆC¦H3Ãb5µôá¬ßݳ:«©ñ’¬ñâ©_¯1«©ñÖMò÷¶ÀÉÒø‘'5Þß%KÄnçË:j€ œ]”oVQ|Ä*êHXE‘“ºŽÌ*j D¹!wÎ*jé—û8½Ô «¨ë(%õë´YE]¤©¨ë}’ŠúuLVQ×÷G*êúÁŠš.‹¨7¥1Ïj µŒ'0–¤Å"êWãøã™EÔ8R‚ÔÕÍe2DÎEÔ8òÆØ‘†ZšdeéžõYB›·˜9ö„º¾~’P×ñHêבYB]C*I¨y=à êú邺N¤ .‰ÔeÀ ê:´HA]FOÛ+kŠú¿ýÍþey~ùáß¾û¿”yþã¤5L‡6w ÆÔ—|Ö¾ÉÚy¿þ«–gõ¯ðYv|ùáÇ/ÿôõ÷ÿË?~ùŸÿ÷þòË?ùáï¾û뾉ÿ:Ͷrš(›.xôË)t®‹òUˆpýÕúŸ÷ÀÐEÁ[]ðÁŸzàßüŸ¿ÿþKžxÿS{üÌ{?±Îãß𼇠nd úO<ï_ÿßþëïɳ~C!N~h¥â^­ý¦—èû<ûø‰ÿ‡?üëïÿêïÿû?þÒgÿ9.ïüàñ_­°ÿúj rÄ9rtÿ÷ü›¿þ‡_6¶†U?pë½iýë7=t{ÒvÔÿøCÿîüÿïÿüñÇ_úÐ_{Â=¶÷#Ëß¾é;2qwx=ýTÌþßþ׿ÿeÏú ?èy~ôšõ¯ßZLá·Rb/T«"\ýÔ÷ü¸Ð þÁóã¿÷oßòì fxº:þØkþðf¾iëøg^øc~ì?ûEŸÖxÖëÁË|Â=LÌüv ë—O½ujò[ðß>·ž¾_#À;býðYoÂnaýò©w°úÞ¾ûíu /ˆ@ÖküT¼WÓYÕx8·† òc¼ Ä¯öKNþD\YpÄýŠ+ ÝÞ×Í×ûë¯t}Õd‚üY²ˆÛ¿þJ½^éS"ið?û Z1ûs#i~Ò3­ú?mò@‚Jˆ¶¶Ÿ¯Û ÆOºèžö­iêÈë~rãgM®'ÊÞ*m]o'·}òÝÓȧŒ\½ŽºO+z?e¬ðý|îXybºÆÊWÄjlO‚Yê'¦˜qŒóF‚Ì;To_Þÿù3B¾ 6(°‡ÑˆÒ^W¹í*Ç/¼Ì9`u2®{ÇO\æú…—ÙÚ@½]ÖKûO\¦×Ëü©K¨4LÖ×zM³(ûíP+Ûs…ùí¿þøã¯ÿŸ¿þÍo~óëûïç| ‰†lnmQÔ VÏùTM|Í9PôŒ½_|nöfÕÙ ¢Íì¯OìØÎ\t÷y\«ðιi姨RœušhL®›ì^|ªö …¦ôð´„§³j³ÙÉ Ê³_öè߯8=µ÷„:'TU'6ÖúXä.g»L…ÜvÝ\xî*«Ì|g5Î M¢Œ­07H»¦çžUGòÐçqxÁ«s‡êè2BY iëÓòј¶èJp¸ì ]äl¶W!w;°«¾ÞØÞ9‡;Ží\É´'7î™øçá•ÏÒÞ8+·tŸ¥½»£Ä Iâ–²‚j„¯M¼šD@žM–óeÎcç ÝœóxBk»r»r®þžR[Hð}Â-ý¼"ÏꄋÁ¡ÎGÏÌav¨'ªbÈP‘ý¼¿ì+ž-ÌS‹^›V÷íGçS}IÞ' À§~dåú”>½†³ûò6¿o2t–TØ}‚ëL9r³°âX¢Ô¥G[© õnaOâ™6Û ò–æy"E¤­Dƒ _CY¬<ŒóýDdð4”‰¢xQ¼í<Û÷Ûúô´ñ‚›½”³¥|Ó‰RçQÒE Δ+¿cBÐUdé’Ò4 Ù¢L‚ÄLÔ¬Zƒyb. šžçÙ?vöË S†Ï3’娪ܵst^[J(›ÌË<¼”#ÒŸß?YÉ*z Ÿåµ$ÇŸè*ŸýÛóÂ)VP>çŽä„ÝRwÐY¯YŸ·{xÕÈä¼ò”Ð5åc\9cçÕ"[DÚd´ÅçÁ Ù@FVX¸[œ×HIH8r„k) þñœö¸Â¶ö¼²óÃÀÀ 7QéNŒ]Y¦ æSÎ$ŽœáQzÊ¢tßVPF£|§í¢1òpÇÄêæBgCÅØ”Ñ(3—»Û-'wÈÙÃçlÐÏ]67y©³÷UšFmÊŠšt>ùxÖxŸÍ2äïaÁ >;2î Âj2Œ®G/=ìÈxÎ.Ÿ2hSç€qÈ'»ú¶á¯¨üê ;¥x÷¦™²CÇlQy@ü†ŒÒý)£4ÃRîìÈ—}B)¹Èä”ò´>hÄÈ~Fó€5•£`i¼Rúß@ÀH?~F³Êé¦ ÓŸÆ™•*qÞËMs· ,Mò³?‘¯(¥¿C¿EÒ*m('—pd1Y¡áyXm;é””5&·<®ÉgÎqH8²˜,s„l‘(×”æÖ!ºõl(Ëi!Ú[yƒç¸RŽë€€c¸E ~?šÅdù¥í‘ —ñd=ý—Ói´»ºÅ䴆٭ĺâyOÅ HX.³~ˆÃb2®1jöÕw䱩2òð¸„oiZP– ì¨ëcØ ÑÏW{ü&%jÊ—kÔl¤’<嫳ò 'ã™á¼ÜfŠÈáÆ‘-LÐxN>U÷)cx¦cær[i» ˜½ æÃf82!NóS8áétÄØûYb½ 'k#+ ‘£©‚Ñpm(—‡dh a)´@½`ÈÿŰXoæ0 ©š‡däpí^ Z3ó<‰±ö²ïº‡d$‘ÀÄÁN›½ ¤±MùmUChl†“_cJàfdøás°cž\êîc87¤^ÖCÝ‹@²|›mK j%Ù1R!g]ZGdÝöÉÈϾTFÝ£ðG:nåƒ|Ð,’yÉ'^ÛmKP=á»Fyý–ŽÆé Ϲ2äŸÆ–¬\ê;†pñL™¢„‹’Ë0Pᢇd$u$x6kŒÜz9­Œ w@*tw,¯sȲ­[µU€deÉÜ! ÖG€d¤äìî.-|@p?0ß[eàÑ8ÇóÉÝÝr[¥ß“µLÁHõ¾"c‰2Ýçà’5]ØÕž¹`RxùHæçë)Ë¿Î-DF&³å'^×Lά=ø° ’Å€ÈÐpnžÍQ¸’àÈ9eJZs‡4Þ‘œÆSî%Ü'«›:ªj)yDÆŒwaø=Ý-+ºðOz]ÅÚ<"c™=Ýt á:0ê©–Øö-EdšÌ¡P Œ,±,åʺh Êåëi3Ù¤ÔÉ\ïÀÈòÐw$ÅÊH ÿÌJ²Þ\)ìWGJ£ÇdI¼%Rà• `¹=ÉÈaާ×Gf¦DPx0Q}ajœ^4“‰ê ³ÖuY,m&ªÁ=ùB‰Xj„®³ÙŽYa©/ä\Ãìט¥Fþí%ßSÆ™¥¾ð±ßÛ*WÍ$õ… ˆ+Æ›¤V— ù‹m™¥F£,j‡Mi™¥F›Dòfs±Ôh”•›ÛìK­Hn8›HêgN9ÖˆÏERãï;öŠ­DR£? –S„Ži^YÄR£ìÃ±Ž ŽZ[öéÆÄQ_U¹ "1ÔhjÈnF_gåzöÄþ]a¨µQ¦««F 5M|öid†€¨«xÔhÛdU{Ù"2Ô˜ºAgœÓA­2LÚ2€ ‚ò2à†ôm¢ª_O’©j4žÈ-_öDU¿U­}Ê8³Ó&ªZ¯#¶Ü2UF±c™©jm¼·i¬q"ªßÇ%¢ZƒœâNLõ»11ÕõŠ9#ùDÎîÂû‰£®ç#ŽZê$—Ô8jmÔ-õÅfŽZ%xY!Cã¨ß\û›G­üàµÅkd†ú}±`¨õ³®q!‚º~¢DQ¿L õ«-ÔuèA]?m"¨ß{öÏã¡O õ»11Ô5Î;cnL uýhˆ¡~7&†úݘHêúEIýnL$õû´ÉÒ¯˜Ijþ0ˆ¢~7ÝW ĹéÉí(Ÿ.ÑÓïÆlÖ…F‰J&Œ zú£F÷·¨ßh&¨ß‰ Î_=‘Ó¯/;“Ó¯ï0“Ó¯ÆLN¿9ýúL39]>ïLM¿Oš½6Êq‰˜~_/Óz\›®– bºžtdëLi:—g-]ËÙôhÚº-̤ô뜙”~7&R·&ëUk‰”~]r‘Òï3&Zú}ÆDK¿-­- ðdZú}`¢¥ß&Zú5¼31ýúÜ21­§=n/“GÄ´6ÊÈ3= ÓôÕdRúõÕdRú5L3)ë ì÷R~DJWN¤´ ™í÷dRZÇðš}DJ¿L¤ô«_3)Íéw$3ÊzÆÌHÓ)oÍ ¯Ó[ÆèÏÎa}ÑÑûZh}HGŸ‡{"6Z&ÎË 1Íõ»»J‚¹èKWc»-k‰‹†/£¼ «hË\ô©Xû\æÊÌEŸ(NÕ€˜‹–*+r—åÀ»ÉcÜqÑ'L©F!ˆ‹>Q¸ ùî"sÑ'pNÔn`:ÓË1×¶“ÑÇ­^¾ì%2Ì¥ÀÇôDF³ß0“Ñ2Êûrg&£Õ̦ûK&.¦¤2¡„ö;qÑ``é&dÌEÃ>GÐÿø€‰†íÎô°DEƒc§¢áȪfa§ÿ=¨hAe뢢¼–½Û†,SÑò/)Èâ25u"£å@é·¢f2úÀ¼#“ˆ ¢£abÔG"â3}èzÃiìLFKÍœj#2úÀB¼' 7“ÑÅ4™Ùh5ÝIÙJlô¶ª'–6³Ñ¥*1³Ñ‡º!»)þˆ.EߙƑ›|e¦3#6ºTÒf6ãO^¬ r"£eÄÏ3i¾ˆŒÞaüsÄâ=“Ñ¥ö,³Ñ¥7³ÑòPÆÛ œØè¡üÛÃøØ‡Æ#Œ ±Ñõ6‰.õ¾™†U×Ù}#6ºÄd6ºc3½Ã….)W‰ÖÕQ»]Ol4bÓ*S"£É›©h,Çöæß ÍïLE«§u,‰‰.en™‰.þ×ÌD«vR=-—<¦V—5½CÖ:†oº-ºþµÙÏL4LÀç-hqÄD£ï`Ât¼ÕÒ¯ì b¢‹53ÑðG'Û'@Ltqëv&6Ʀê&&§ke&º>!1ÑÅr™˜èò»$Ý$&yg¨o‰ˆ†µÞL"T"¢aU~%ù*ÑÅt™‰hÌÝ— ˜‡F|õÜ$÷SèøiÌ<´\RÑ®ê$oj„6ih8Â!cZñÉ„toÝl¦¡a~Ü^‚ƒièûÜ¡ç`N×pã¹Løih6=)I‰†iƒ=3 ]\à™†Þ/åú¬ú%ÓÐ ‘ú⢡ÑAwÒè>44ŒÃG(“˜†ÎÆáÌAoæ ‹½;qÐx‡WˆA™ƒ.ŽòÌA¿.™9èÚÙÄAcL¡æaz‡‚hXa/¦ ÑŸ{(³˜‚ƘºBÄt}DA_y¦ á  8›éˆ‚ÆiÏÐÉ1 ÇÄsž«Û‰„~™Ièúy /à ™!ÓÐðã?}®ÚÇDEkU™wg*Ë VÖJŸ˜häkö$$#&ºp&·y'Á#1ѸMÔ®F|g&ºÞJ¦¢ñy^;—ý1QÑ»:~{aC梥qƒxf÷¹hé¹ïnz&£w5Xô‚YÌF£^¼¶±’T˜ÁÜ1£Œ·\²à4M,qѵ¿‰‹~5f.gÒæýº‹½õqKc.ºö,qÑ8íLR[â¢aN;²ÓjÙ/İ7sѺ¾^¿ˆ¸èRйèRÙhøëêªÕŽÌ|4^I›^PÎéR‚iŒ+™°MîÃŒtíb¤1t`Oy¾ôÒú|Sž¾¯QE„4Æ£ R+SÇ„4¾qy=˜úžÆLJ×+fN1ðJÚUâ¤qÉ#©:‰“®ÝCœtýxˆ“®¯˜XéR „Yit¬ÄËæd^ºâ¥®F’¶/qIßZ¶13Ññ”?^GffÝ7àí<¼1¸éÚ·™›Æh½.¯ÈÜtí=â¦k7͵Iœš´Ú’¼Ÿ¨é<ˆšÆ8_@ŸÆLM× SÓ¥B SÓuì5Ø!OdµÏ˜š.“1Óe²gfºÌÌL×@ìt©ÃìtWÄN×n'vº€f§ë #vúuÍÌN×Þ#vº&b§k€!†º`%b¨kC]çb©ë“S]Ær?ë]|+Yýs\D™©Î ¥·Åøçq+z‹ðÉl¡X|€->>$«ÉäBÀ°ª¿¼œH±øÐÓXb&QÕ(õ¤ü®[ÍTuå?‰«–FýñÕ/qÕ AQ{Ð âª%™ºU¿b®\*F×r“¨êC «»•“*dõHfLV×#‰¬æ’s…­$×l"ob«k#ÑÕ»À° Étõ†Aàªr"«ËaLW#B‚mLçLtµ|}0Üp´ÑÕt1&«å|[&0WÍÜ01Õ幉©ÆòMvÓl6 S½#ÓÁF SÍGSrÏóôÅ&qÕ¯Fâªâø]媵òš¼°+TÕÁU¿3W-²Á4ÅUâªåHçݲ< WÊí2î‹7ÎT5×M,L58ù/ ¦'E8²×LL5¾UY´yn:QÕÀ¤ÇiDU daâìx¢ªqÖ#àæ4vPÕW÷ÈÇ ªº¢ª_Už2:MiKTõÅŠ¬1Ì*€¨ji„𨷉ª\—ºsq™ªFq½Ö‘ÿñUÛD¥\rgªúƒÆ ªqZ«ÎñUýºf檥q Ì•¿™­Æ5ŸÊÖ®¶ZeÕ7O©[>É÷…èj\s¶áF DW£ÛêÎHgºúÀ„59™®>PvFòÑÕ>ÛEb]@/¶dY¦«±vœ‘"Ct5޼/ß]Êlu™vˆ¬®a€ÈêŒã³Iú4f²•N·qX*©“Õ¸ óz[{h¼:ŸZÑÖd5"tK.tö¸ÆaûÄVcò¸/7Ä„uYˆ°Fcï^ÿˆëÚ/DX£ÕÛ-}ˆë×Y3a]',"¬1ª+k-DX#dË×â*b¬1ädæwG?b¬ë`%ƺ V"¬ëx$º~0g]0/sÖ'0gýºd&­ ¬eÒºI¤u½["­_GfÒºb&­kP"Òºv‘ÖõQˆ´.ó“Ö¯Gɤu}×DZ—ù‡IëC2g]OJœu½Wâ¬kÏg]?#­Ë„Ƥõë^2i]?"­ë§œI나´.³/“Ö¥Ûˆ³®ß‘Ö8ë1¼®“Öõi]–÷LZ—û!κÞqֵ®½Nœu_‰³®½Nœu½ÙLYúƒ)ë‚\˜².È•(뙲.•)ë:e]`¶rÖŒË¿ÍæãgT*ÒjMr:ÝQ|Ö@í#<߈°>Íp{Ñm"¬+·F„54[²¡$ÂzÇÍɱF×a-¨ã€³„-ˆ²FÉV-AJ”µ<óq%Û ¢¬7ÔhWDç†ÕAYoúnîÌ„µbäk†×d&¬®±3êëØÌXíïà mÍ”k¬¦\Á|2a­kˆ=q™°~gÂZ×QW||lHÚ̧Œ/31$Gj¬özÒÉzB¯#¸ÃÜFÉËK”$° ’µ¬KÛžD©äG-Òèì§mÙQëz5ù4g7j8©Þ%Í*쨫Wf欫ñÙQÃØó¸“¥v¶£.¦NlG­ô¾ÍUÿ™í¨áKÔ»à{Óg;ê1îўݨx(Øz슎ÂÏ#»QƒEÙÎ0Ý&7êŽuW”xf7j4Jï„Uuv£î(}ù¾™Qw¤È$' 2£~]2›Q÷ „¾ÍÔÕJ3mŒ¦Þœ5˜­ž }ÈŒº£Êq(½²µ\±CÎy~ ®VºL@KPÁÙŽºË´Õ÷p¸&;êWãcGÝa@jv²£–6âÁ’µÜÉiÂVðdGÝ¡t½ãcÌvÔà ³CÙQwÐ/§sœäF-“—D‰fÊv£–iolj_û&¬qdϾ<äF zr\ÍFɺ㋱IHnÔ =Ù‡5…5z§·ØÍ /j9N‚êQÇNÔ¨PIBäC‘Ù½_äC K³- ìCÝaŒ=ŽÃä¨äC ÂSâ´çÈu‡¹A”¢eji„Ù¬çºdêŽÕ|gêŽ\?™uÝÞ9ûP×#Ù‡Zâ¶€÷±0ê‹“(Õ\|¨eθÎ(JÁ>Ôrí[ ÛXûPC†¡0~6ûP+kaÆ‘}¨q?G›ž Ï>Ôú¸GØŠdꎲÁ§¦eXc8QË5€Y›Ù6‘µ4N`y ÈdE]OKVÔYk'Ô=om5Žì]x·Æ°¢Öç’OÙ>I²¢F@F2Tÿ@\­¤ÀfGf+j4ŽT$ƒ¬¨k$+êŽÅþÔ1YQ¿ŽÌVÔè„åãÉ‹3„¬Ëf7R6›QËÛ5àÙŒÁ<›Å‘uÇwøL¨O£ÚQ×—LvÔ˜TRam¶£F<’®wˆLvÔr$V&Fµ´m \\OvÔ]×£±…JvÔèïliGvÔe’#7ê®ë­à[ɺÃo$¦6»Q£¿Œ”'7jý ãMΜݨËýu×2>‰VÎfÔèÉŠº#U²9kÈVÔuj!+jœTâ‡çIu­É‹s¶ôÕv¯u yQwÀ÷qºn={QóìÊVÔk„s†ár¶¢ÆŒµžöÇVÔ¸×íqh{—5Έl@÷wÎVÔ˜XÍ{}¨Ë`dêÒ£äC§CT5§Uò¡FoCûhYâäD ¸bïw…§~?^v¢–#e>:ÜÚ–œ¨Ë{b'êŽÞKCv¢–ÆžÌÙŠ“Û}AVÔƒú 1ÖVÔõnÉŠZå;:l%ÀVÔxéä­¨1äŽæÊ%v£F÷Ao·…Uu¸Qã´©r»Q—њͨkç‘5žRp¯‹ ÉŒº S6£–Γ/ewÙú2£–?d÷ÈfÔxÉa’ˆj=aò|d3j¼ªp˜$/jiºžÎ_wv£®/ŠÜ¨åH™Ž7)’5†•,=Ûµ½yê8غr£®£•ܨ1éÊ­ûιQcR’IÕ·ÈsÄV¹QwÞy¸€žÜ¨AsɃ†<»Q+&`Ø”£äFÓJ¿z¹Qm&“Nv£îÈnKMv£–ÓÊYQ*èeü¡mÉn“ݨ1ôN˜Ð›Ý¨;ÌMegHnÔÀÍɳÌݨë`&?jô^W¥“5VQ‚„] O~Ôõ}d?j¬ÍZˆjÉŽúu\¶£~=C6¤V¢3Ì=ٺɵy®5R£ßŒgü]!ªõfÎz² R×—L†ÔõQȧÅBÃrȺ2¤Æ²7y¼²!5¢ÏèùS­ãõˆ¢plH™ôš.ÿcCêW÷eCjt‚„ϧ'CjôÌLž¡CŽÔe!uýBȺƒÈnç1¤([&áDî[™êíÛ˜êl÷bªØ4¦¢rÌTŸPÙ;H!¦zGi£à+Q}í©ä ÕtsTZb¢šó¼¹vbÉ€äÚ‰%Ç™k'–,ÖZ;1'š°´º¤S°´úuÚ¬­Æ¿ÜWIÚjä¨ìÒÅk±¶º$†°¶š3ŽXZ]’¸XZŒ<$ˆÚº™¤ÕÒ•‚o§;Ldi5Úæ}¹?(‰«‘ý%#oX®7‰«ñ®DW¸[*ÜzØÄÕåI]g8¦ü¤®Æmv™†MJúj ]¢À“¾7„jÊ/ÒW¿Ìúj9Rδ{;ÒW×—Á^ >Áã0#ÒW£Û»‘-{¼C_­€lÔêÒWo°sÞwe/Nd}õë´ä‚ÉJ†¦‘¤°®ãн@äMK¼ó½ö©Ï™Öx”–˜5RXKã æÕ¸^2)û`¤°FÔ–¨~RX#Ø¥#A ëMOôÚRXËŸ¯\ŒÖud‘ÂzÓñ0ÜT–Ö8-fJ£k³Âz—Œ ¬k¯‘ÀW”Î ?“,°®gÍúê×Y_^|u謯®/ŠôÕ¯#³¾º^“ôÕxÌ'Ý™õÕX Js ÒW×"}u­¢Ëv %˜±H‰‘lÂé¥Å„sÕØä€Ü·Ûò¨Ø”tw¶‘HÛ¢ôK±)YôÉ„=DÈ5s\£SÜ@¤M>âf“HÙ´&7lö¤ýEv©’H™—«HßeÁeªËjBƒí@P©&êF?rCì‚OASV¢úHàõ:VÅôTÔp)† Ò8¢lÐË„oˆ A°tE £ßɤ¸,TC¾¡bÂGC˜:KX A¤'w׊WCò=©† Ù¾ ‚p†¤‚ äTªŽÇ† (¬•^ž 9ë´x‚°={‚”ï‘=A RdOÒ¥ì R;‰œõ]|eýKÝ«ñÁmOâî[b ƒâë‚E“'vUg¬5Hb-YÖ!^Aˆ=Aªíy‚pº3[‚ÖÃ÷²¨I&®‹ +×gQÝܘ¸Fp‰V[†‰kþ”˜¸~±¶™¸–»› ÑÊ@&âzCÑö‰uèjÌÄ50ݼ†÷:×ÒËY{C¼õvèÒd˜n•ˆë ;U©D!×ûéÛL\b§ªWYd}C,˜JE‘Èú†Y»V©[ÞYdÅäUÇÄ5Ï'(Ê·²£NáæÖ!³¾Q|*¬öYf-g”.œ®w#™õB‡V“ÌúFºÑîu¾Yf¥ÐZðYcȬ±ˆ!eR(’YË :ßö!™5ƒ÷ˆÝ ÒYCžÿ | ³®G’Îú†«öáþI¬³–ÆcïA’‘ÎúFzÿ+vÒYß8Íå³ ­±è½{°v$´¾ñ:“W; ­e-½É­áJ2k9nÀÖÝ?²Ì«wA<¡ Ï2ki”–>¶—‰5šúCÁwk ™5øÍ—X¢G’YO Ÿ5gIf]O»dÖ·®[Û‡%ñ肤öìp:ki¼€¨ÍÈtÖ÷]U$OcÒYãàµmL é¬Ñkw2¸'¡õDøî!¢#¡õDjadm²ÐzJ|’Ûíõ“ÐZN+Aïľ†6’кž–„ÖÊû +¦{½ÅZOäíb5õ&®•jÚÛ‡Jk9ì–IÂ=eIksnÉþ‰´Öy«ÉŸ´ÖSAë=>ÔZOX %O"ÒZO°ÏÉZˆ´Öódsåi­çÄ2=ÖzÊ Üáu—XjL4ì‡IJRkiÄÂÀs Hj]ßi­¦Ä$!­u}¤µ€)g@:ÒZ×<7ÒZË`hg2ì'±u}Ç$¶Æ5w¸¾Ë)¢QpLTf ¹5à|ØÓ˜åÖ¸¡–2=In-èÊÍ*1fµµ\r?.€·Õ˜ÕÖòAÈÛ„õÅ]ë‡á×Âç,·–+ 2qÈËrë<>½nË­_KnËMéïå9Årëª÷° V\ã^z­`Åõ@¢Æ>ºùefÅ5Úz´`Å5÷(¼ÀŠëבYq iW­+®ùU±àZÚ¤$¹ít\Ë«êÑ™9, ®ªDD \K£jÏVn ®?h Áõ@.‡4™Ö”×8R^©±àw ë)’àZ1µZ ®?h Áµ4Ží¼¼ô­$¸®ã +®Ë·ÅŠkô;òå]æœ×x›H÷2Ç^R\(×duãÒêGq—,áÚ–o¬¸Æ­ ¢ÙGäõûjYq]‡d–\cjƒIÉñA}E=ðÉ+Y¬w–\Ë‘'ª)™ —$×õ%“äÑãˆú#,¹Æi»„aÅu¤¸F¦°<°µâÓ{ n„×:sÜ–ÅŠëáy„§HV\—ìdV\Ô‰Z"¬¸®½GŠëßÇÛ‹±âºtR\×ÞK‚ëú”$¸–+ªãŸÙ)àGî2 ]rœ×4ªà¡EE¸&b΂ë$á(—Âë”Hp…àaÄw\£ÃåÚ®¹'Å5ÆŽ žîz󬸖#/¬÷·쬵ñ޲/¬¸.N‚ëzV\ãfåa¶‘×u,“à8V«{±àZ·#Ê‚°àãªEA\”ïésyŸ²ÞÎívN<Ë­Ñ&wN%YnýzÊ,·ÆG9½«­ë°ÊjkŒž=*1±Ú·Àþ%e¹uí:’[×)­?‹š†tþó[ÕÖû—ß~·}ùÛïà³þ+rùSÇöγ8Ç Lã:X|‘•:JOš:dv’ú«–cþ¯ö/¿jrÏ_~øñË?ýÅö—_þùË÷Ý_ÿð•—ý7x"$¶ øéëÈUìBr_?ÿRM½Ì;²„;Ç×_ꪗú´Àñ9Ѫ|­ÿ?„È–Ëú~Ö¬V>Ešü_oƒ?é~£*@{ÝOnü¬ 5ñCâÔÛÉmŸ<@Ϥ ø”±‚«×±B·ñÉcEï§Œ¾ŸÏ+OÌ ±B·óÑXùŠéçš ÞX$ÞvÞ²tƒ†ð«·/í§¾Œ,óáÀÒúš×en»ÌñK¯óð*XwÇO\çú¥×Ùd]ÑÙ¼æþ×éõ:êðh‘ió–5/8 l7\(d]¯ÿèþå·ÿúã¿þýþú7¿ùͯüÃïÿðøEsê—¯Ë$-SKWê®!UR#ÚH¥vìœ9h(v*v0؇|( vl!Ñ­ÿGDr?G(`°Ò·³[aC!çíeÁ¥"å …)œšÇÒ\þ¥Ùºµ£';@7טæ[ÖŽ™ÿ Z3»´èM~S?ˆ€Z$N7œ(‡|h†íUƒÁ¹pHÂ’Š¡¡p½sÞ'l×n–`íìIr ûn¯ÞΙt82&Ûy'%‡<Ö± ¹h£Œ‚k ·.söjבÔÈÇ€¯ÏÒ~ʰJ‚ tdîBß0*œ'†…ô9Þ ÛñN£ïÎHoØqw¶Ðãuñ.2„þüæ ™+’ħ-\2Z»TÅ “Á ŠûçÏ- šO| kS§ÉgîZxËÖv'¹ê„.éÏ56ú–ª’þX9b »ÐNƒÊ7€WhæÚ »ÍÎt¶Žrö¬©û“dxŸ^¤¸õ‘6ø¥Ûz—ÇX^@­Ï´‡Öê’ñll¶±¥mz,æPµÛÓ¸§­xì ´Ã¤¡mœi³ëšLJÍkÏ´2Hzµ±]ö´m8¼%„kºédãÕ1m,!fÃ^ª³y(U>·få¶K±ƒJ¾oKç/ã=íaC¡ÖnOôhóJÛÔж§+ƒÛìi'ûò9Z†]“ ›Íø˜ûtŸ6ù:Ò~2Huœ_úÎvï±eŒ¯®w/Å"7í C)(ŸËxz@×¾øÛ€¹l:ªÅæÂîÊmØ ´Ý[éP¹Ð\òTy³iaälÛ_+mÁ"ÆÂýaíYÈpN»¬¢õé{Ó}»ÒF*6Zs×ÃÉ¥Ò^©4öG»ÚžÆ‘¶CQá ¶ëöm¦Oâ’›µ\ÙŽŒJȨ¶¾ß›øéØ ‹€ t:«íL{“¸d;®¶ÿ;ê–{<> ´ÜºÉœú>Ò6£4nС¬24]Î[‰pNÇi®CêYw©NßfíèÍÈà5Ã9¢£ŠŒ‡dTƒ»áâ³;|–#$Cww Ë4î/cë}€Ú&ËÚªctFH†ixvìêxHFñŸsÞfÔ(6öØXÐu7¬‘¶Ñôæšhi¼t§ Á³Ë ¸R‹úÙÓf:qî¾ç-wû]°×Y{2¼Ò–œ«±,c(íZã®èØèˆ ÇHÕF=Ï€ =åé;ö,Ž4–-\º!m!ar‘ÉSžäéðëN»Dpxnáý)ã&mÕ>hGÚíáÈ*q)íçH› [«ÈÕ[KÛ6˜!kmä´‘vf`ñUîÚÛî ó#$C¤Û=W¶ƒ²<¡ØtQ@W;ž4”ï+|‹:ÜY#$¤ÛrO[Ú Aé¹~7ƒWÊÇ<\Òáu!yªZü³¡.á8íZ`v!ïp¶,mð0É ¯±aÛÃ/JØÂnªS¿¾u}hø{ÿS’ø‚À!öòP+I|ËBïÙ'b~áó ?BàºSyF·W/ó6‡™Ñ×uYYÈ"®[òû¸÷5|CDñì Û5g„ã{O,Fà7n(Íè„ÀoŽ<{‘8¶Ù¤[̘ø„B>ª«1ØQŠŒ FàPÛ \Þýš‚ÃRn/èÅHî¸äAVßÇÆã dá}GJà4×á x÷ä=áУö´ !~!Ž 7‰&ŽíÌg‰÷¿6T!8ÍXÁ@ø!“¼œku ð ÓÃÝ«„cŸûrÉcð*ŸÃËÁ2‡ÀY¦7ó#a .ã¨ïPé烟P»…Š18JЦ¬XÆàe Fü€TÙëf ‚Ÿ€kÇÒÙ3¯K7Bà'2šš×µe~À±8|n‚CÒ!S‡Å:†à˜Ó°¿cŽ#wlí Á±ò¹äŠkǘ!¸œiÏVކ!8@Î Ó8†àҤĆàÈ¡Žµ”¦Á±šÜ½ì#CpD ÁëY^Ÿ¯‚ðsäÚŸ&ض|s?Dá2¼¼,>Æà2TW¶DàòYasÉoÿ(¹´ô/.gtyzàöß!‡ÂxX¸Ž xE ÁwÌ-.‡f.Ï,Ýêú†à 1/K±Á P$^WßÛ}Z®Ì Ë«wûŽ‚À;Þš§…1GIS¸U/#pyö‹è¥¶bŽŽ/ýrwx!p¹û žÿ p^33¯ï˜8á¾ ÁÁO×ǽ08ê2˜ª¬`ð²Öd ^:¡bð–2ǃ¿`vÁàt·„Áqäq»ð²`ðB0/w[1xO9wƒ)å®`pA!óÜÌ)J18JÁŽ«Ïo·TÜ~~ê ŒOøÏ7‡ƒý¾YCâ&¼Ý´Ï Å/”±8½*"Cñ ÊÒé5©ŠãÈTã‚¡¸|®˜c-³†¡xɈe(~y…%#Aq­­Ò¸ 8D©[Æ~ŠC} eœiª Š«§¬Ÿ—K=AqõN”qj®Å«ÓCñy&‰%(~ã• XÞ Å5·óhžëBPÚuùwnNÊPûTòî+Sˆ¡8R…dÏ•jËP|"¨Þ^Ì… ¸,&*pÛ¢‚ xËßMhH¼˜Í3GÕ‰~”„ı.Ú÷Ýì.ŠÃV"ƒ#ߌÄåÛ—ðâ~ŒÄgýlg$ÜÙwÏÜ-l8žý±j#qØiKŸ9¡KP¼Øª30 ãxßÉ•Á8R»<Àþo*Úvr’±x©ÃX{Щ4cñ6òl‹7Ìé2/[JÆâ©ÞÏfÒ‹_È¿›Îj1¿Áª+ÀH3è<Ü8‘8æA™R‡£ã„Äå«¿’‘ #q,)MâÉ‘épýô<ƒ’8îCºÓhBâÐq!^f á4á*ðÀ@¼J*=Ç@¼aïVðÂʼc Žp(‘H ï@üD”¤âæ(ef€‘€¸,Ò$‚-¢‡¸, eRíÛJ\e,.ˆN†ƒ|Ö˜±ø %od^1¿8íÞ ÅO¬ýƒÿ`,Ž}a­)²p(añ$ÐéA—±82‹’ 0G…¨9ÁXË|yc‹<`(~¢BB8"1GQ¿{‚šÙð ™ø^|ž ø ×ÚkZ¦$CqŒòdýeP\.&ß· ¬Å‘2/¼|ÆâHƒHŽÅŒÅ1Îuœ\׋_°.‚ ßZM0ð5ò*7… ‡ÿiT3c,~â4r²¸p0- $Ž'Q¦ ñr;„Ä¥ïdÕÉH¼9ŒÄå} *n¦Šg$~Â/Lo‰Ã@b§•Éb$Ž`uGfbaÃQw¥[Ìb ޾“èq46nF£g ~b>Äö ÄA['× â)´¨G@øì»}XÄOÄ‹pvz€86X­òÅ·âð°6ßåßÿ,Ž)dõyiR6$Œ-7¡¢H¹1ƒ7Ÿ²„#Âã½®ÇdEÊl! +â³"sÔØ‹)7>ûËóƒX2bäÚÍ’ éÿjP½àÑáH€¸“”…èð’Átx‡1üéÞL‡—:lL‡—b…‡ÊCàòBYÁ¡sèÁ÷0o$Þ± fÃA²Å Äl¸fÚ„ÍCpÍj÷2ß/üB¦¼à¡íC6¼IY!uBàð @¾¬s×D†ŽíAÎΓz!ѧ)˜yu&Ãad¨ìË|CpØû·´Â`6|ƒ!ËmN?… X-Ÿ¡=“á°-‰ÔB†3Œ(d8L$ö€ŸÁNPÝÕ6v˜ G7¥5óáðwoXÛÍ7ÿI>×ÜŒ›dŽ‘\¦ >á§uÙî0#ðP.2¶ Žá×¼bwaÃå[Ù4Ó÷~#p(½ÐɆy™ ‡YÄq»HˆÙð†ßd8“k„ˆ ¿¾ßr¬c2yz‡W¥62¥:ŽæÊ:&ÃQG*­Àˆ GÒýèVƒàE†'B§JRŽÞÜ¥àE†÷´:‘áóF%½5×1Ž43õ~»Þ¤-èÓÃE’Ò“ÛS!ÑÚbf& ^÷Í+NœaðÊŸU>\–éÒ}v$âU\¾ì9|#¶ðá3oAa¶ ‘A86îÓ²:ƒïê11lc3ÑáYSÁ‚”ïw””!xÑ£½èpl°Ëp…!8˜`ÔôZÁ"H)›L‡W Ñáyÿ…¹ð¢"Žz+IÉ\øþ¤>[háÂQ;¹jáÂñÊv·b)\8~énìÎ|‡×Óæ^ … ‡ãvv¯páp5ÙÝÖ»pá' zÜ8ñÍ…'§´Â…—ç$þ¢Ø‰ ÝtÇJ“!8ʱܻe.¼´yAðõxçÀ¾éÓárö­å…ö¯-/ôà‚ˆâá À±Š… )E ‚K€ßRPbQ8Ìmäe: ^j£Q8Àró•cð 5GÎÐK²(¼°Q„Á1OÊ?7c×" GÂtT^.¢p™ŽfÔÿ+¢põxr'ÆàrZ¹UÁîó̓£ñº¬Îg‘„ PPZ¯•App.\4ÏšpÞr-ªpu ?«ÂQL+ªp1Ç‘·o,2‡zT|YN!5á,'eŽº-q  ïÀ3²F¿ d“&ö_|ÎíM‚c>Æ2}gü7(ÑÌ5<„¿å‚G^óUE8è³™g­ü5Æ€C^—õQ¬Ÿ0“û^_ËQŠhIÒýE8vc“›fàE5Á’pždŠ$\&KÔ:]ŽxEå;òþd®n…±JgMøùý²5û€‡¥ícß·¿!x‰é/M¸Ü±ÛèMx=-iÂQ.M ýª5ÇCïÝJ/›üE^r1–*œ$!pÌÇæ&ý¬ —ñ(S…mÆ3Ç“£NËet5ÉQ )nKÌ|+Ÿô9.1ʦ2/91€1b^ù€Ãƒþ±n{Ô À%¦ØÑ| RH9BøI#áÞYð÷©’Wíbü½Ãù«y!Æß°ê½¦×*øÚ¢)Sá‡øÞ›ÇiE. þÞ±‰æ5‰ þÆfˆ|ø+ºüÍ) À±Ã Æ06pGðƒ$Háü—‚Àeª½UNM,xQè3—+Øý¼Ä‚ÀËxdÞLä¦ÊK<›×:+|â<Àdü} ZímZÆÁ/dZ¹)|àÐÊ»6=Cp˜^nNlsªÝ7X Ç"ý>¬âhà˜†N+¤WøÛ"ß|)ÂØ­~Èãã9Ò^#pšenÁߨ><O aü]¾sÆßØÑ¡ë ømQà%‡ñwy]Œ¿±gHˆ4~ü—àÑ¥Õü³—÷,ø…,V|­øùM·tû*ONø[!öõŒßø[i=h3ü†‘PNõ$øÍÑ•Ñ7Ë&1pôÐ÷†b±[Ð÷öý™ü³}óhfð ïÀäÆJà[z¶º¡ø%ð]–© ¾7¼¾zP‚ߘðÎÛëq0üÖw—äô~cWfõÓð)‰PÀUÀ#ù4’;‹P0ÝÀϯ} BA£¼0×|þ~™ð7Ï q†ì©˜âD,Úšá·6maºà7þ,]v9‡›á7gÚAËð[Û² p½Çí:œÂÏÈî‹Íøû}`Âßx× cCÑ £Q" K6 ¿O›ð7&šDÌdø³Bbî¹Î~ã@( ¶­½á7A Z©g‚ßh¼e*_]—Á7¬ Ÿm #Õø†÷aÛºïãdì ëÃ9P À¨è„½ÕQql¡kÉØ—”U‹RöÆ‘sÓeÂÞZ8)?2ôV+FxË{–g‚ÞZ·SÕ"b2ôF›ÏïzëSbîÙW {¬ p­«­$ðý꘾ñCÂÚfá/ƒo4vdt+–Á·¹í»­ |k¿È:ÈŠùøÆý€ý¶êP„¾õfWŒ}¡o}LøµÛœÑ÷ëÈŒ¾_×ÌèûuÚ ¿õM]HâZÛ ¿uäÀBÑg†ß:’Ï¢°ë¿qZY x†8Áo÷)™ïÆ¿ÑØî˜@ ~£s± ­~£ñÈòž ¿ëp%ø]Çk†ßh{D“‹¯Íð»~! ~¿ž!ão½X¢Ò2þÖó%„@ø»'ü­/˜Ö—™«áê’HÂ߯·‘øëI2=IFàz$mé–ø«ß2½ŒÀ_w›øk¤gþºÛ ÁµûSµø ‚àè>BV¯œ ¸ô´ÅL ²êq»áh<ðZlo.ƒð2OG[]Ù=„Á5Ú_“cÅ௱L¶E$P£ÖøŒÕ Uf]Ï'Õ+*H±UÖÆpŸ~IQtôB•EŒ"ãX沕öÏ<8ȼù$|¾€øÄÚh'M@R‹;¶¯^æ(XĸByð6»/$þ2*!©™í²^ ÅOøáG僟Å,8'3Ç‚TnÜw¨ ˆ#c~¶…Y£®“hð’Î48˜¼æŒ ^¶ ˜çĬƒ³óàew—‰ðDÏpµ<áūÈðò€L„7TIêòL„.—©p¹eè€W }Qá9 ½PáÅü…©pÚé "¦*àq Ý2^î•­Q0Ñ%Y2[£¯–j‚Fø3ŽêÍK©"a´{åöB„ã4Ó 5o”Ò=̃ƒ˜ŸÍ6[_<ø¾‡8ip™fTóÿ¡5 dnW3¹w&Á7,'ÛfêL‚£ðØ¡Þde D8Ê«#W|M–L£þ‚£üÅÚÇb|ƒ±½ÄbKt#îþ£ï.Ï |þ¶,ßK»Ì,¸– ›Ýêx1 Ž_.5”¿^8› có9grnÔÄ,ø¿îÝ÷™ß`1}¡6Òi™xýXÜR`\^Ê<ݲipyè+•1b²ÉÛ&÷&ü”tšÎ4¸œVFøÜ-’hpÔ‡¸ÓJ…hp”ðkÍýº˜Ç õ½¹’ºÒà×R‡UŽÞ›WÆÔ™ßP[ºÁy»'Ž×q݆^ Žó¥½p¢Á¥íL%œ˜ß°R½ÃKƒhð 9©ažÆ4øÏq‰›Í„â™ÇÇi5 ˜ßÔ®Èi&"|ƒ×{Z¥¾aÕZ?íÃqMÎÝ–²–‰p|†‚ƒëÎD8J‹ÁF0^¢3áÏ7êžÌ„£‡Žkî‹Yz1ák·Íu!™ —’=éÌ…£@ÊØÝ|Œ¹ð ÕT I:?ТԎ'.žÓd5Ä…£n£„&ßÉ\ø†Rg÷n&XÌ…ã{žiE¾rŠ+¿Ä|] Ï/PZ0“þô;ý©)„Öç§?»ñúG7òg6µ§û1=ñŸû’¨MÖÖƒú/ç'«LqÝ€ýü©×ïÈF·°_>óV·?oÞ~Aa ¸|·ù n´O¿?7°~þÜë?ݾn`ýòÑ|…]ùD…Göä€hHG/ÔK}Ù¬þBct Ș@ÂÐ_{¥—/úW\I »É¬,k|ý•ú§9°G¬Œýg2”?3V¦§e„èµi„ð=|æÑO’FÝ˧޽!|3¯˜gºV­œ}Xm ­Ü' ÄþW:­—Ël™á÷±P|ã8­óuàƒÈ•Ã¥Ñz¹ üÏÒA~ûZ£õÿö7ûðT²ÌûáßÒUµAˆ‰¥Úøò«tÍt7ùŸh¹)0À2Ÿ¢ví˜(ÿ)#g?ò)ÿ_“êßþÍßÿÃÿþ¯û¹¾?dEýÜϟꛨp¨‰· ã¦#ÃëuÙbÿŸ¿làû'ÙÀã0YžÞ÷Ñç,fexŸwM=‡9—,:‘qÊûQ’ _W;uI,khT¿Üà5'm SdA~*ÅuÀibi$@ºÀLHbıš&Ø„}ÊÛ„ª|ºXý+tP¸`—㤡ÒÚµ ¹<ÏÚý ºý¸/s;9`§`fª»^ò¬}>m·n·l²ÔÕ¶ëÞM9¶ÙpœŒØ†ª#¥Þ6݉ÙÎÞ¿€æmmÈß”÷<°÷Ó¹Yiƒ÷èi ‘‡Ž°%2,åY{·¶„¡G;±Ã%ÇIï‚®Ö-Ô‡ÛF›ïß´¯WÅÉh@¸‘"÷Ò‘²e¹UGÓ"yДÜŠ{¿üV:ÜI6TxÿÒý>¯¹ìy½Íñp óަCE¾¢/Ø9ØNåûóäS·ƒä"r'MMG-Áøh0 ]µ qÜ8ž´n´uuÙ$4ȹ£š×§§qy¬’t» )YÏË‘?ß2tÄ¢í†Áé}:Æ®[Ï@½ #`@u(W¹·]Û4øÙ<†|{ϧ6ñT…c Ýt(Ô³H@Û|ö»¶](ÿì»Z=pÕ'Äo˜ÇzMr_#U®'+>ÿØ´Êî“hˆµ]†Œ:´ib ­‘Š]^þÒ4¢m"–<#RŽî£X‹Pd(Ý25’­‘ŠwÜ““kËyÔ·<Ü\z6k ÂlÛ‘#ÐV›ïÊá¸yéFÞÓ†½Þ¶ê…ò‰Û²–©úûk¬Øª}|v³o’X ï˜gœ"(>pÚý³ëVÞ3Nå0¥$WœZpvÓ ›H£÷ý åsàâ6N/dT#YACùœðãZÁ{Hh;tã@æ‰ïŸî×zäÒÇ=­f#_¼W1@ö׈=QcÓcä­ó› Y~†,ñmC¶As.>%vÜùT7P`)l 6Ú°Ê·g¿±Qm±UB HÝû|Þ¬Í}»1AwEV”¼±‡mÁD·ûÊwÔŒÆQ2›÷µÅuÜØÜöØzÑl$mØ¢|F¬Œ¼û8ÇÖŸycãÛc+ü8¥sŸŒy°ïÛðØŠ4k™¸.mAEÓ›@©Ôõ•‚¡Þ‡GV£É 8úµÚîé‘•LÅ@Ac³sEÖýûgÃ`Ý„ ÒÛ#+6[wëFÝ3¸WdE…×Ê5oYZ<3Xwov¬È¶œæt¶¹[d…ͲÖ`Ö»ïžê|ºÑnZÑ»W)džŒQ±ØV¸ }`v;-ÆjáW7F6]Ÿ‹‘‡:¨ÏPФ‚˃ì­)V+ ©È¸y½óN…î–YXt§ _¹mÝÃÆèÖ<ÈÒ8Ñäj+«1²™tû~l1bSÖ,½AöÒr×ë6!ùõ ›'1¨¶i1=fó´ùö/ÚN ­[­Ä?‹±·c‰arÊM<;”êW°yˆE‘`Èœž× Ýàæ!vÂeà°!ÙÝC,îÈ 3¡ó†_ÑÅZëà™X W0¯FíñÌ:ÚÔu«yÅXèVÕn´A»à1Þ’>VÓ}yˆ%ù=ü$†Ã× rç~/,ÓŒ'^Œ'Ü”%xö3Õµ«yˆUñµ…X•ê8|•¶.Ã`I—Ðv;|mªK™ ¨©ÕF¿cÀJŸX®¼ƒÚð»aËÉÔfjÊåð~zkúãV§Ýá+bŠÌ·ËËm¾®oAëŸaǯÀǶ—ˆ6¸2ZŒ… i({.Ý…ÇØJÖx„[7Ð*àïø‚$Ãc, Ÿ|vAÛ üŠ=ÐÛàšzÅ,üzG-+x£_¿. >æ_ǯ‚ Sq ø´mŽ_;*¨PýXmÓñ+Üe Û§aøUpßž:ùRûî5fÛÉ îƒAÆ”ïà«„FVjÖ“‹Ž_»ª›ü;Åxs 4ÞñjÉãV@ð Aõã;vj ·( `Ýa2ð Z­FjQ¶C™‘TëæPVwcg£m:”•eÜ@9³ÉáP¶Á’=Ý L!=Ê^¨u Y—ÚÖ@ÎØ ¥¥„y{@Y•˜wì: <ÊrXÑEÑŠ²Z”|õšÐ»e1·bþ­$Î!¥ûãAÛ·P&h„;v[6¢ív(‹U‚ júÚ×¾Œ®r'˜ü Z‹² ©â—w?CÎUØ!XÆÎÓ™ŒEYtÇuÛÂm·#Ù†eÅôɸcw~EÙòʸoŽdë)¡­i±âB®çÊÔòÊ,j( wÎBÛí@VvÒ¤¯ZWÃal9j zÅXHïúæÓØØUR±@Á?ó¨‚éá0@Ožd­^᜺9Œ­çToT® îbžÕU 'ë{ÚDÍû€±-m=4Ú&0gÁm¸Ûnc,ÌXMÓalÇQ}­AÎM‡X a'cÎq«®cVìªÉWü, ` {ôD ]Â>â ˱”ååÇÀ¶W‡LH-RˆM4rã®™(9•ÏÓóRˆSדã„ÚÈ,Dsr·On+Ú¦Ù†Þ9l¯ß@Vm¹ÖW5GÈJðå´# û„ZÉa, n¶ce2ÄâDûC̦£yGð¨pà Ãl¹ 56‡±r/×Ýl1wÊâêt+Ÿûuɰ|¸ïc¥mž1Òï3p3ÈB?'¤Q`AÚàJôdmÕÅâ°Í9­&ë(·"¨GÂès¹¡º—EjQö(ÚnC±Jg~NY\ G±˜U÷½hÛ rX 2çY2/ï. Léý\‹Ô¸ÙÆB³)_ႿϦ œA¨þY‹ ~†.Ǧkº×ç™á,3¼]%YS†¥®c‰áE¹šÈ&†ÄŸIg\3Ë5 qG oƒ¡B³PË /Bf€)bx;è×0Â&†m{„obx¡Uƒ[¯,m”È /ÈíP—^àd(ù3d†·ëÃúú’^èñ/†·£šÓé’^'ŸïåLt¢x;Àm÷5+Q¼ÂýẜÇõ·l{^6[Å 1¢Œác} DñârŽä Å«"FYk­ÕQ¼ýl9^Å‹CäîmL/“M>ÊPˆâ…þ_¹ñp™âÅ¥ÒDH/¤™2,në‘DñBîy$j>S¼¸\²Õ$Š·ËIF©Å¢x±Õ‘2ˆãÅõžíÃðeŽWïE–þx‰ìUA«¼9û&3Ù+` #b{Ñ£†U’‚Ø^œ3q>Äö⸠5IÖ`Ïlo×ŨÃYb{qNbtÛ«ºÛÇ8å¡fÛ‹ãZÚIl¯ }cÙJl/sž‹¸^}¸Ã2s½Ð߇fšëMf®·#³3&Ÿ;F.UF\/ÎÖP5e}ߙ뎧å8q½hØ&ý€ìí¨=)cк8‘½zJ¹ñ5Ù«§l=“½8åŒÙŽÈ^§Žê«÷3Ù‹GÏd{&{»VìmFIÙ«Ý"ˆÍF^&{ц´8 Á™ìÅ9·ËQ+‘½8NýwCd¯~!n°A\¯¾Ó§ƒñÀÎõâ°D|׫ƒÙY$¢zë L\ïëj‰ëåñš™^œðŠõ11½8ã0·fVbzËx5®WO'+"[f®Wsxƒ׫o[_žuHæzÑû㎵`æzq\&׫6±™ë-ƒ„¸^— ¾ÌõêãݹžÌõjÖ@ÛŒk'®Wº¼™^\ops¦x1ñƘgŠWƒŠj®¦xÑæÈ«P¼¸…ÌMfŠ·&¢xµ÷=-’^ÜfÛcù›Þ2ÃûjJ /NyFu`¢xõë9¢xu$®0S¼8ç~tß+x„ÁÇ•wÈâÕ(ˆg'K/Gš¬‘Œ È/Ž2Ù*S¼ÚW™t ŠW“”ýLñjÈÍo’)^=¥„Uû™ã}Ýfâxqœ gî2Ç[¯—9^=çœÎ»dŽW“±¿&âx5Z«UÔ3ø3ÇÛÕÍa;œSN¯7>Ç«m‘K/Æ Ê •–9^\/…’Lñê0Ói¯Dñê]J ¶ï*S¼zF}Y‹þM¯~Ç#x‹Lñvu@¸<€fŠWßP"&ÅÛµ´àðªLñ¢Mz}í¼e‚W»£¹·5¼õæ3Á[?ÅDðj¨¾QIo ­™àÕç’¹ÒÓ™àÕÛ”9amüÁ[Ck&xÑ–¶¼‰àÕˆ¾ DðÖgÈoý 2Á[à¼z/Q^=ghˆàÅqÓ‹ê¿‹ÀÖbõGü®ŽXÈ ÉøÝÞ2¿«hYÖkZÏünAVÄï$Gü®Æ°DYîC™ 2Ã[ƒefxkdË oÁiÄðÖ!›Þ׃%†·¯ÌðÖà•)ÞH3Å«•öí2É[?ƒLòÖ(”I^}tÍ›|î3“¼zÎБÉ[ç³Lò*2qÅq¼¯ÛL¯~=ië1s¼¯ÛLoý²É[D&yuêÙwßÒË$o½ÍLòÖˆ™I^Mn²n$¯~ÑÖH$o—™ä­/.“¼uðe’W¯×¤O¶'F%’·N™ä­€9Ó¼J§«æˆæíjñ€jLî–bfyëd–·ÞffykDÌ,oíéÌòjäN»’™å-«8by{}fbyKÍ$¯bÜ~¯ý˜Lñ¢O’+S¼u¢ËoYUÅ‹7—¬¬ÇÚ#hc¢xËš^5úðiô´ÞØçÔ°|q½¶:xQ½ú]:3ÆTïE:5¦zψê#©^¦‹@]¨^,Ö~[!ze>}¼qŸ6"ză”e¢·i@v×—Jô‚M=u0Ñ ýF㿉èžkl3Ñ[ۈ蕹e&v•ˆÞ øîÄ2½ðžñ=$&z!}KŒ½0GµoìÅó&*žxÞK-0\lMÄðâÉe•gB /n3 3¼è̬81¼õz™áEI¥ûðxAﮅꃫM¯\N–ÎåÅËM /ò<1¼ðÛ–.01¼(·5“êv1¼å» †—ãxáx9rÇ‹*\-–õÄñâ{»M7Äñ¢q‹— ŽWí¹}“›9Þ å0°Ìoù ˆã}Œ¶]LE/b‡áÇÊñÂ¥ÿ¶‰8^<úÜžu3¼ê…î9oÌð–øJ /†‰`?Óûc“/vшáݱ]zØ:†)Þút™â•ë᦬Ç2Å«„!$Š·„s¢xnbÉ˯—ó y¦xË#,ŠWÝß½ˆ0S¼Z®š)^­¶æJ¦xÕ®…t6S¼œ³K﮾rÓ Q¼j€èÉ¢Lñ®r‹Ý¢xiÁ‹ï£{,&xw%,O…øÝºˆßÅs‡‰ùÝòé¿»ë(trŽø]L=’6ˆàE­¼#ö÷‰àE?G*6¼;òƒ}øÝ‚[2¿»+ÕâBQâw‡Z ùræwS’À—ø]­Çå4 ó»è餶7~wWÄg«dæwKG¿«•<¥˜ù]\jF¾ñ»èHAöd~OÖcC…ø] Ø“Dï–aBôn™$ˆßÝõ¶Š¿‹Ïñòø]ƒÁ©fz—gFbwadÚƒ²'vßÇ–XÕÌî–Nì®–ò5#Ó»(U²ÕOü.fÛ`b˜á-a43¼ºËâŒgl5ÃËß\ñNÑ\~“(3à ¿gy#kry\Xä2›ï~But¦wÛ®‰/³}ÃÄ&Ŭœƒo|3¿{j)M×f~WËÕoÝ´[ÄïBgÕê™ß=nJŽ%†q‘ÃÄ ï1#ÿ„ù]ìÜ';âwAîï[Ø*d~·ˆòˆß-üñ»¼{Äü.‹‡˜ßå |æw‹Žø]Þ×`~ Ç=¼Ø^IÉøDð2¯Gü.o@2Ã[zŒ^\îzëx‹‚‘^œnzê3¼åÅÇ›·€˜á­G%†—¹;"xK'2Á[ÎH"^VnÁK›[ÌïòŽù]]¹(ø]È $ƺ66ó»õ8Òð¦׳E^ofö˜á%Vax¹ËXÃ[dµÄðÞ9_Þ™ÓÜ Ã[.G /) Ë,¾H·gеÙcï!T¼ýò%iáx™ƒfŽV ɋԚ=lˆä¹B+“¼#g!’—6& É‹ü^gö^*^¢©‹Š·E>KayYÜÌ,/¤ú¾Ø(,/=fy±jõµ[ay‹@–X^V"3ËËŸk•ñ6WŽïó¯‹z7™°¹Ë-ÄíöœŒþRï&?ƒ—z7 ¨_êÝm™àÉÜî dxVïé+q»4³n·({‹z÷8ƒ~¨êÝc÷0Vïf}ôK¼K‡ñ. ‚I¼[žœÄ»ƒòZY¼ Öbz¢6Ó»ƒ- HÅ;Øå€T¼p¼‰:«xžB*ü.}ÛEÅK!¡ÈxËq$ã¥ï‰ù]ŠuEÆKi#EÆKo¯ªxÉ#T¼”©a*^Š‚EÅKS[Qñ²zU¼ÈqŠôbVñ|f8Ä*^„ÄÄâ’Š—¢`Qñbi'+Þ¥Cdo9ŽT¼¿ØVvQñò½0ÇË£™U¼üè¬âå{a/MbEÅK“_Qñò³³Š·<©xKÉxËõ²Œ—?–ñÒäPd¼üx,ã}µ%/Mb!ãåoŠe¼3˜ä¥UNò*B^ŽÀ$äå¼<XÈKS~òÒô\„¼åVHÈ;È¥……¼ü-³—Ÿ…¼<ÀXÈ[”µ$ä%äU„¼XÈKRŠ·’7?)y9ð‘’—g Vòò7I4¯|ôp¤´ðÌJ^B^EÉËß)ySQÑñò4À:^é¬ã•3¶ÄÖ’Ž—¿ÒñòÈ:ÞrJÒñòDÍ:^é¬ã¨õòè{IÃ+ךòóÈ^lütË-Þœ.V$¼41²‚—;’¼ß›Û&‚—ÏH^Ê?+Þòl$àÍä»´B+ò]~1,ßÍ'dñîL¶fE»ËwèÚ]¹¨ÌžFš’t·AÒÝ â‹r—_%+wéU’p—ïœt»“ÜÓX·‹ï·…Þ—t» ˆØ…HKîÞ>Yîò±{Éž!¤%á.jM—:³pÆLÓ5ñDíž0–q“‚"ܽÑa®Q'jx"|S™Ú=·ì²ÂÊ]²w-Ú]X{…‚´»Ò}òØöÊY»K/¥»$ +Ò]%µŒãbb÷Ô¥­í?ÒÝ#¶|kÉïý+K~Ñ®4Ÿ+üÅêBÍå÷«;Sú,³ºH‘uΛ-x9“—-x“ £ð¢$±;Z^xŸ¶ÝÍœ.Ö¹WdÁ³oûö'®Î¼7´c·KCÉ€÷Þ²Û ðΖ“@Ø€wjæ¡g¥“/âYÒœ±/&`8È€w€ N”xGË"vàØI¶ÉÕéR²>9ðÊ •Y~ràÕ’wss"<;ð‚e̶Ùl"w̆%‰Ì ^i»†o°¯Üþ_;ðÂæ8ø2àí(44Í¥Œ x*"»… x‹“1ðv| aK¼KçdIœxõ¾BÓH¼8É—Béê»[ð³/î(騄·Cvì›dl ù0l‹ÛÌš‰=xÙ‡=x;¦Ò0s%^ikË€éKatÕ“.šìÁÛ•}óϼõjÙ‚·¼²àíiÌÉ‚·£œç¼š/nšûJ±oé²àE[6ÅͼåɃ·Ã€D‘Íð&^œr„4yðÖËe^|%‘×ͼÈÀ9&{ðv­†æ›äÁ‹s²Ïnxðâk‹$göàE±Î3’"ȃ·Ä9ž`^$ŸDZ.{ð¢MîΘxòà-ýB¼ìżX÷ ÷ÚcÞz\2áE‰ Lkj$^¶d^œ2”l‹W~HlÂËRnà 륳 ¯rd7‡lËĚ;ɆW‚Ë)oÜ\ÏȆWŽ“ÇÂ8Ùð¢#ï°Ô#^|àrrêdÃ+S‚ƒ,ì.Úä>æGì.¼ÆžUùjË6¼U8 “ /Žk—g™“ /ŽËJÛlÃ[î“lx»~´5 ¹ðâ ôtåkvᎅ"¹ðv”L.¼ÒÖ’¤“\xk[ráÅ)Q˜qIÈ…Ow„Ÿ3Ùð⥠¬ ‡Þpáå0Ë.¼˜—PO5“»uà‘ /Šø¦,`ráU0]“I.¼JA\A g^LªýF5¹ðÓ9ráåhÉ.¼S7F\ŸD6¼¸œ ;cɆ×ÛšshdÃ‹ŠŽCÆàþHɆ—ÛðÖã² ¯|ÿ=-ÞɆH%m© /^÷ ;P²áíðsË)¶áí(Ýr¹ ÙðÖ`Þb¸—]xq”|ãF1 /¦ë€áì‹Ár‡±/¹ð¨/åp“ oGeŠds°\xqHÊ»'^=·'Ñ^TŠˆ4váEVE0räÂÛñ='×ßä‹➑øÄ.¼¨â‘ühÈ…—*|^€4/=ð2á=B-ð2áÝC6Â&¼CÙW³± /!ºbÂK MxñÂ7×± ¯|=åi“ /†lÊQ!Þâ-I&¼ =wOd^é³=,µÈ„· g2á­—Ë&¼%0’ o ŒdÂ[¢Ã2áÅC?tÀÛ†±­®€#^ÁÉ-M¸dÃÛŸZ²n÷›mxñâ¢0Ùð–þ'Þ®„¢‡Y²áÅûéÙðb™qy¶á•sÈJÉĽن·8J“ /b—%«² //`Ù„uo¯ˆ3d‹{ §F6áŽM9dÂ;€9º%é² /¯Ù„WºéÉÎOg^,û®Ož\xñ:’&•\xy1Í.¼ÖGû ¯~ɘ‚9E°Ë‘†»²4tŸõºÒÉÒOaÂkz]lÉìD¯KßgÑëbcÌíŸY¯+m2 } Fz]£3ŒAI¯+ãó ×CÖëÊC§ ¬×•ÛL•fX¯;ðQ)ŽôºÐ,/'†2­.V†á;ÉZ]V×±VßWÔb­.w‹u¦­,Ö•W×’…-‰u¹è<‹uY˜Ãb]X:F fëÊ­¤m¤ÕE![Ý/û@«+â"Þ$Ö•qH¬‹½É¨¹·ÄºÜ‘OÊb]ŠZE¬+£uôÛŒ8Y¬Ëz*벺‘ĺ¬Sb±n9,‹uËèb±® ù\Úĺ¥¯H¬Ë}Ìb]š¸Y¬ËÁ®ˆuYiYźiR/bÝ;.b]Tø¾–b±.të‚Ö ßVë–çKj]Î+jÝr+¤Ö•QŸ ¾Ôº©<«uQI=”NI­{¥:‹¤Ö-ß>«uG®œQԺ螨±ZK²Uµnš;^jÝÐ)²V7gC­.T›¯œY«Ë±„µº2´³…iuQÒÅóžŠVf‰QÃŒ¤º”‘V¤ºÔŠT‹—pe©.¿:–ê–G ©.DáNQ¥º3ÊR]ðlGTòc©®tŸ|¬çì/6W×Hé ‘T·ˆIª+¨9)Î_>R]d3îáZÌRÝÑË-$HªKYE©‹}Z—>¥.ĬÔ`ŽpvÈJÝpÙ6¤ÔmzAGRÝ"&¬ÕIª+ǵí4Q:iu*å[1¥4kueN“âuòZ·\®ªuç¹*¬ÖåDj]\`‹ÄVëž¹6<«uq(ïÃj]œ3ÊWµ.v½É©ÁÕº ÝY­+÷%×ð—Nj]p°IƒMz]ÜJ²^ÏÚfµdÐ'Ÿ¢ûd½.€ïþЬ×Ežáp‡2Òëâ¸#j*^·è;I¯ }WˆºY°«”{9^·“U; v;{“`·åLz첚ô%Ø%+^RìNò‡gÅ®LJ3 ÚX± y”ƒ%Ín½ÒìÞTIƒ4»ã&xÖìNü: !Ñ®Œ¨\]! wetòÆ-¾‘pWTRá®Ú<šÈŸ…»sË’±"Ü%.ðî*ò±¬†o3ÜýíwÛ—¿ýN>¡/]îàÇïú~éO¿ÓŸÚ¦î¾ÒúüôïßýÛ·°Ê ¿¾:øø#êàn䛈ìŸwI<Ýé‰ÿÜ—¼Oîéƒú/gŒ„?û4 íuöó§^~öËgÞÁêöçÍÛ/û¦{S<ÿ)ïà¹õóç^ÿéöuë—îà9çoâNd1Ó Á6€¿\¾2/_û.¿ÉMýÕËÚûW»üï’ÿòÃ_þé/Íþå—þòÃß}÷×?|í•dm$PWàÜ}ͯ¿Òõ W’•>v³Lº_}¡^/ô ¡2âóŸyÄXLþÌP™ž™’.úø¤™µ mÕ{HMŸ3KÁa)#ʽä¦Ï¹–(è}|p3¹íSå‘¶Î?c„èµi„ð=|æÑO’FÝ˧޽!|3?’{oHøÆJ®ILîZ‹OVör·¯˜¼}iOHþÙ‘¿\FVM ldí½÷en»Ìñ ¯#«úŽ%Ò!K…Ÿ¸ÌõK/##»Ã(JpþÄuz½ÎŸº|ïyÈìËyªT¡Ø6­×Àô ÿòÛýñÇ_ÿçïý›ßüæ×?þá÷ó?þð‹¦Î/_•xA ‘+ÍŒ>jxoXÖ{öý5„˶¯–ÉÝ jˆl/w†Ðåj#ÙËQùq4Í(­‡òã.5¸4;Ê„…l"w!" æ¶l"§ÁŠtܳçË_ðqº_þ4å×­gϦ¹àg,ÿHE~.­ní ÔñýŒ‚ã'ÊîÂfÙHT;cJ.RØN7ä€Û…k{Ñ"HE"]æÂÎcPüðÚó ÿ.£À³Æ.È œâo¸¶SØ6¬>&xEnî8“H)¨ž}#q†_íØ²¡áGá›°’½FOùFÏÝSF®1B£"‡EB5ZB¢ÒÀººYÕ5f’¨\¨˜Ñ-Ãëãçÿ…½'ȯ¹%‰ }JJ–al¤RòÒÕ‚¹/Ûi“sünÙFÄ5Ï$P¹``¬›îÚt%}JÛ³mõ„3ür±|=ÉS®;»F¢-ä)‚EyŽ 2'øå ì‘CAáÿÕs-  2#ø/$F{µ° ŽNð£›c'm!O‘§ÛC'pÝ{’§ÈÓp5º8œà¿&QOk­Œ»ˆMñë¾’B%àÃum¡PÁÎOì¢^wK tdää_² …JƒöÊyçë‘u¸©óÝ\;{ò2“D{PÛeYyh ‰Ê5re9Y’¨@0~rD’¨4\y•Yùâ“DcÙŠó4!œà—‡Këh Ê5sMœ!„ü ö»‰e„Nð×s¶l+'¡*ò€å~³­œŒ‹ÈF[²•ƒTaZe궤PA6øa;gMDT†ž½”¦mwTÌ ¼48†¨ %?¸x´…H¥Š)žO˜14*è”Øõ·’D*íÊ~µM@¸ŠT0IG=̆ʀNï×;¼’HEŽ;£®¥Dx‰2ÒUÒO[¿,¹¡bšŒ’±§«.ßnAØv >ü1Žhƒ¯´œDf½•ÛU 2ç÷ßm!RiP 9¿ß ƒp~_:y ÅdC¹ãªÈ4¸†úÖšK| =ç…tÛq~/Gb'à=Jðûh“¸¹Ioö§«Ã¼¶iM-í¤gœhæó¾ÎÙ’P¯!Œ_›Ö¬² ‹‰§¹ãEÓ²0d¥?“m!T)#Eë¨ÉóÉ„¯L"‘ WŒ¤Í¹mxë+¨4PËÄ#“›†·;ö–Ú)ÑNb Ô`úèWl‰ -”*M' ˰—;)U¤­ßÓì ˆ­épËnA¸R÷×ÜV±Aá pÄJy5+Ò£M>(²ðŒqŠ|Þ>då+²Üvް–ÕïÔ—²íÄ”°Åˆ•g&Ží¼³PÛ˜·ÊóNBl’‡*O@Kª`”Üî…Ôë‡P¥<ù…:ô—XA—@+%kÄb¤»¦m2 Ùˆ=¡uö|an6b9pÈTBŒÊ`Ö¡ úEú}maf¡ ë›ÑB8‘¸BS¶§×€Õ¾5­2¿;ÂéŠ&©K‚Ê3`¡ƒu•ƯüC°Z“¾è Kfah›ÄÞa>®ŸÒѪòYû t*Ò«aZƒç ŠÉ–o2>‹²‹i¶jp*\Q°Ã ›4äšiAÃfäík<&ÞÄ3b'6­#®É£„NebB:Ì,S1¾ƒXì[ڋΡaÙ{I}'Âb‡qºÚU}T‡EØ¡RíŒ2ßM‹¯¬‘UKÊÛâ+*uIt[|Å:ê<Ì}¿&•ŠÌ®aã¡ê3±>à óô=:ˆ-Ó¤Ù9¹w= 3¯rc±yÛ‡ÇW]O[ÉqUbœ_[z©œ]É=MžŽ`Uã¶?h ‰ Ú›ëLm¯C"ªüjZA Ø™Ó9°ÑŒ?™{—ÊA­©zŠ®òöL’€¶»[t½qJÓÐàÍKÔXƒµ`5| ÓÂ+¯@uŸÛlù°±_íîCEíV¤-4*x„¨#Ñ"•2ñA±Yx-ƒ 2ˆí@²\Ô¡joæá+C|cë_žï{iI¤‚—úÑ“<ßäÔU¿6ì%GÚ4ì¶¼æémÂLÅ+TKýW y´M³ÀSݼAi»UnýŒTäžÄ÷t#yÇâ*ôSáh˜¦@%‘á唦cÂJR£m®Ú¢È8b^S &;=' š€+#WéÓ!´ó˜#W"¢´r- ¨¹uÑn•½¶îâÒÙC¶C>d~]K䇴rùÖV®Šê=r…<Ù¼h»³òÖT2—~?¹Þ°Q¶©¦Ë‚ª9r v^¨Sûœ‰;YY<‡•åîÈ\Y¼ -”)˜)/G\ ìÈM=äǬL™¹ävWn6éÿ’¦°oè^‹¬ ¯¢ðÚnG®¬JEFÛ0ä 5¥ µ—{&°d/[6H}P¦@±'¡rr¥þ%fé'ß–‰hU ÿDbás¯(©ûÚ2<'L.Ã|®ÌÉ[Xo2ŸË†Äç² ó¹@°Ý‹3Ÿ éèžxàÌçBášH¡Ìç¢MžÀF'ñ¹JIGn »•®ÎÄnØÜà‰ÙíW¶µÛOŒ2·ú%j·Ëò §~ÉÔ.jjËteFåÄí‚:—(eà‘¸]™Aƒì~ÎÄíâœGpÏÜ®ÖÁ ®†¸]œüçʽ$n·T‚!nõs§YJq»(|œ9¥Åí–š^ÄíjAmycé3¹[j,¹‹‚ÚWØÏ¹‹Û˜!©ÎänG¾C˜ë»«È#ß‘Ø]Y[D)žÂîâ8ùÁ?‰ÝE&n²W"vW! y»«Ç…¥±»x ÇމßíûÿÇÛ¹ìl’Éuϧ¨¥´‘÷ËV€ @kî J¢±³ÐûCañnDzšì.‚5 0Uå_Þ"#=Nš›Kq;^5¼ýÜË€·oWÞq ¼Ún*Ÿv^mg~{ ¼ºf*åy¾!9áíÛ•›¿^mÖÛ“€ðª Q;½gºáÕvfü«íŽcT½€ðêR›¿Ï£HëŸôêã^ma.b ¼ÚÎ|AxuÒwñ^•e[‰1¯HWæðjwÃâ|7ïŒó]ílºGøn¿ÄÇЮƒïj;Sæ‚ð¢Cøî!0X¢à»ÚÊ\;Áw_{Û½ôWms ]ôú+h7î3Ю¶›îQç¶«í̾ÎÙn G°Ý¨ÁÛͳv¸«íÊ lW›Mç¨S}Ø®¾½Ã¿Ëvû&÷YÐÔØnž´³]]û¶ˆ)&\l7]°ÝÅ`»ýü¬ œëöÛl,Õ¹n^`çº}Àƒq®ÛcëÓkX7G•c]Ža‡ºyukëü§y) nûµ¼s¦Û7»F}6˜®6k¹Ûcª ¦ÛÇx5IÓÕvó9r0ݼ7Ætûî ƒ9ÓÍ3w¦«Ïö2¤Û§[¶}Élç.úXÊ‘®ŽájOÇç³®¶;çQF ¤ÛwTþ Žtsð;ÒÍ+ìLW±Õˆ‰3]]ýmßÁtµÝb+Xgºý7—ñåLW§gë.0]m×nÙãý ¦«íŽk”í‚éæïLWÛM÷h~¦›CÁ™n?Îs´[Óí#Ͼø8ÓíïÒ©‚CÝ~;ŽÂp®ö´1œ›Òqn'eœÛo\9jçæ¸üà\íênùùs¯ç*6Bpœ«—ÓútÌÍÉÇ`nNîŽsûl\ eàÜ|_;ÎÕv‡sǹ1E:ÏÕfÖÖ<·ïÎøˆóÜ>Ãh2øp$'ºùÊs¢Û-iÎñåD7Oωn?=c.Ntûþª"D·'MÅ\@t»J©ÊBAtµ]Ë«Ç×;'º|uÏ„D7_¥Žtµ]u>ÑíC¹zL=í_­ÏˆnÏ—å#ûÐíÇW¥iºy Åsû­žGï2ðÜž¯–ú½ôäfiÀºZCZçR`Ý> Ó P]…öêåTWWº-Æu¬«Íî­>;ÚÕ´Üf|ßu´«30wÃíönÙå»´ÛÜUÞ´Û²Õ h·_¬2 s´ÛϬÍ×Ožæh7¯££Ý@ @»ýÌžb»ºg5RØ‘°+xa:2€ÝN(ªkÀ®Žr©FŠ»}otÓÁn"çñy `W¿Ùªç)Ø °Û¯X»†„u°´`WBƒäC!vµÝ^Þ&vû)Të&€]1$Q¡Îöõ»Úî,ó†®ug+ö+9ü½s/Âû¨8_€W%ÿÃ0#ïi"¯:GÕ0àMf À‹¾xÏg¡G¸«T­ZîîÝaëI; w%È4R ¸»Ëjˆ‰^pw«y3à®ÈÑh©A¸«›VÍ´w[ŽÝþ¹nÀ]ûõ‚»ëzŽN¸{jþz‚w»³Þ“Rî¶'¼½@?#”pW ¶Ìrî¢{ý î^ÆØw•“/&pýÀ].²îb2¸Û®ÖhH¶«ñf8d»óæÕÚd»½{Œ!²] ŒÑO’lwÚÜ͇lWHg+%3Øî¬Ùq´ü"ÛÅï†ß ÙnGj¥¸Ûv÷Õ#Û”à ¸E¶«©r=CÈv'9&ìÏç|²Ý¶?sm ¶;y¿n²Ý¹wÀzÞ`d»:?ûfälW쯺¤‘íê-aba°ÝYÞ ºc}@@¼{ßÞû•hwÜ9žƒv§Ã›*íN—{ïíN]7zDëhwZ½"Ñîtº·Å»§[Q¼«¿Ô¢pwZ½)7ᮜ½îRoîòô(Þ½%§žn„»ÓìS)ñ.âÝépK"âÝv`Ö˜x·íÀú¸’ñN‡7h'ã÷T£|·Ÿuæ%ãUVßÁx§Åi“ñ*ÉšŸÂÊwÛpEô»íVîeñj ã{ù`¼“\;J˜ý®гZg¼³¯Áx§Õ{ì‚ñNÝ\b$óÐïÞ7`¼ÝÑ¡–8à¼Óâ]ÇÉyu¿-£‡~·;Ö‚~·§þ| ß•­ŸÉŠzÛý>ËÀ‚ú]™gƒ~÷–ä¨Ë@½œöÈzÛ¥¶¢Ôïj¤5„~÷îß’‡Ö ¬wš¼y5Y¯Æ³-¥]¿{KÅ^ úÝvê®™~7ÀÞI¢™Ñ•ê£ß'úÝv^‹!sèwﮤKoÀÞ_®ß½eÞ7êv¨ß½.GÿàèwU»ŒŽêd½mo³}X~·½–rj¡~·í®-»Ÿ\œú]­:«õõ»:óÏhwõ6™G[^jweÙâ¼Öµ»·ù…çí·Ó86´»²–9ÇÚ]½õL) ín;×B¼«;n_; Þ½W(û]¼«õ»};„xW‘“Oïj‘nß¶ Þ•}hu-âݸ’ïÚp7wãÂÝ[™‡tÂÝöH¤t×våœWƒ±ºÂQ¶{w›ðñ‘²ÝpîÞò"[†¤ 75wcÊ‚tW¬eôê•_«F‚Þ8?HwõÔw&(w[¨½Š—GÔëÂ]¹clîêÌáB¶«[¶ÍO•e»ºŽû0z¤l—#ª]²×§jWŽÁöyª]ÑÈ­(6T»1z Ý½OÈ_!Þ,âݾ ")ŠwãÙ‡xWÏâ×ëå‹xšx72 ¨wù¸AÀ«ÅKÏ©>¢_ðÆ“ÿx•3 ÔïjPZ‰ô»òœ1¡9ô»:Šy.|múݸ¥ÐïÊŠºÔwÔïæ¡¸~WïµúHý®&jtgÏ\¥ úd}ÔïòvS¿Ë{CÌ«%í­C¥ëwR*º=¥yòÆaò*6Þ?d¼š-ÊœWTì™!路ÌxOòŠn•×8!¯¾_ò¶ß¨ï@„¼“,˜‡„ˆ7W>ùå‰WµmãEW¶Ôf™È+øÑÜ= ïé&Þy/70È«I{Ìý„¼qÃyó8y5€i€c‡¼z;×å òúØämA¹S¿;Þ„ o÷òäòªhc¼_òžî——!¯Vˆg¹0äm§5Üfò‚ÿº~7O —Ã2!¯uJÈ{x½ ¼Óá_òÆv€¼2á­,“§÷ô2ž€¼zyOoª÷tïüòözœçSp@ÞËâòjhcÚ€¼Ü„¼|")ä%ÛÊ{ê>Ä|P^Œ*y§î 4DR¤¼xÀƒòbž%åÍ\É«£¯–+AyUKXUô¤¼ÒrÕª•”—‡5/_Tóê—O<弜óƒòâ9§ 7÷ç‚Þx¸Hy1¯å•cÄ𦡢W×þ+Š^NAyñ¾ Ê«÷â¼=J`WôÆ¡@чEo )oïÀò8’|)z9ŸSÑ›ûqEoü/Þ¸ôƧzã]ÏÛ4åXÄ{©‘‘ãß’óÆ\ 9o|dƒœ7æCÈyó7]ÎËï’”óÆ°ƒœ—g5¯æ¿² £šWßʾÞõ,ýnžëw§Ãë-©âÕŒ~·v¯ö³A„Š7Þ5PñÆ;Š*ÞÙr ïtXà Šx§Ã[/RÄ«ld6ºì2Þ©·éz<— ãÕè-Ýe¼1ïâeOoü&d¼ñ…Ž7æ[âÝÓÛ0Qǃ :^æ™w9oïÆqºŽ7^{ÐñN—µ‚£Œ7^_ñFþo¼|!ãzÓˆGí:^&†ÔñêeiNTÐñN½IÞS¯o|ÿèxó_]Ç;]Þ<„B^í¨zhPÈ›Û9ácw%¯¨Mõ礒W¡Š°©äÅ’BÞ©»w<Õ±òNêS_W!äÚG!%…¼S7Z,ñ¬ yõ›«a\òêJÏõB^õ3;ëë1„¼So¦4|o äÕªNŽòÆ´!¯bs}k„WýÓÔïô“BÉ«º‹{T>CÉ+Þö(—Wò U/2*y‰E¨äÚÙËãQò*a®-Tò*6òE*y•0·Ää™Î¡ämÛµ—Êðær%/á •¼(ÓùÔ SÉ+d8ºÓPÈ˶¸”òª]U_SÊ«‹\n”òvB9D†”ò²hŒŒ—W…Œ·Ý¸­üŸ(åÕqV EJyÕ¯¼šÈxÛza/DO)/ëˆ(åÍsw)ï,uâødA)¯„såÈ)¯–5Õ†R^™ÄÖ§ç0›n¯îm´™¢”—åU_R^Ì£a6)ïï±Ý•SÈòiøó²ÝºÎí©o&à=ý«@Þ †™xyexƒÁ9àí¿Êx5Ë,ÓЂÐvW=y ÐÁvWï$³ù…ÿ.ÛZÒWMĪß3 x{—¬RvÒw–%î7÷wc™8xå÷V3'-xi¤F ^YÖ‹ˆ¼r«¬5,x{Z¾15,x7ÝæÂWýìÌÕ¼®ëý´ñ¥ï&ƒê2~,x¥—®/ž´àm #+V¥¯:–OcJ¢ï&!rÃw[ªv¼Z¼U‰-x7 ¾Ç»—¼ZÙEaÁÛ~s)5-xÛ³¥yüëåÖÏpà•nÛž,8ð¶XUÆÒ‚wUõåö,9éÁ»*-¾¯Çž×-xÛÒú.C=zð®êzy Ëxð®»ÙðÓ‚7Ä-xµ¼7ѽ[ðj‘_¹-x[l.SIZð®z™±ƒ«xebÒž¥ÏQÂW»;Gé×ãÀ«U—6:ð®šRËä¼k_U14xuVLÞu…àÙø®BŸ¯:Ðï®mÝz”79ÌwWµH¬˜ï xÃRœæ»«ðÁý¸7Q¿»jrFU#ìwu7+ñûn YrC÷]q™c#Ó}—ã‘V2ûæ»ê‚ÚNŸï4ßU÷Ô«d„Pï®3òq¸ï®3òwß•qNeÿ0ßUÿW§˜®Þ]n²[Wïª7¬U­Á}WÛÙbêqß]n¬F¡Þ•ŸN}ð¡ûîrAÎ ÷]µ¶-¨åæ»Ëå;Ä»Ëå0Þ»ËåÍFé½»ôÖ²ÃÉÞ»‹¨ÆÈ=¨Ý­¢-jwõ›Uê@ínìÞ»Úßfú\×îf̵»ë„u5¼w—Û=Ó{W.DUgCíîr¹¥µ»*+O*zïÆØƒ÷ÿõþÕíÞ†ZÚÝóOÐî®»w¥÷î:‘€víî*Œ4Üå¨ÝÕøßÌUÁµ»ë„Õ6¼wóàM»»Nþy™Ú]•+Ùˆwõ\“ùn\bˆwõÌP£ù.òBŠwõÌ=¾ú”-§¡ÞUÈj  ÞÕø™÷Çìâ]™fUÛ ŠwchA¼«ÝU?dŠwUðøE‡ïj»áÝénÜHw5—Ù¤»Ëí_Ü)Ýé®Þ›÷HÚ)Ý]p†Gº»j.«hHw×ÅÛ²R¾»Ê¦nþï*§2´ËwõlUå»zT8Œ·½‘Hx[ÌÚÀR«+¹•5 $¼»:¾&¬wÛ¡´üøù|O¯fœúhNïÒÛ2ŽóݶIh¾ÛN¡ý]Pð®“sU(x51Ú'Hxe`gŸ± áÕìw—^æW”ðj8¥v ¯ÎÎ 5d¼Ë…¯ñªJÓMq]Æ»t+ã‘_}d¼*´4 o»Ž-ñO>d¼›Ê’ëÅo$&n¿›!—ñj˜˜£8t¼m»»Jž©ã]e‰7!èx•É9Íto›7÷Ï8ù”W/ S‡QÇ«s0† !¯ÌÝ·/:ìB^q«Æ€WoBÙònŒ1 ämo7“W×¥:PÈ»É@¡æ/y×Ó=3)äÅò:ÞmûU»†>…=—6 oŸŸ¬0 ¯^Q—9Ÿ?BÞ˜O äå ”JÞ­wÐ`òj¡àüׄ¼²û,q…¼X&SÇÛÒ:‚SÇ«®-õ(RÇ+×Îõùˆ6¼«›o† oØâ†·¥÷þÓ†WMÚ3÷ÉìiÃ+3œy¨â…;>E¼>n2ì&¼í†Ê£†W^Áå“C ¯6› †—†ûÔðnr†}÷ðî½ an .á%±¢„—6Ù”ðÆÌåîN?weõ|~Z¼à®´çSÃI¸+“ùjL¼½9ÚÃF`Á»» U84ì®hР§óiE‹yå–À˜ »;‘…EÃòF‹†CŽQ>]¹ÛO¦ä€PîîjF^Í¡ÜU¬ ¨¨ÜÝOïwNå®<Æ«dŒÊÝýtÇ(*wó7]½»Ÿînõî;Tê݈A½›1Wï*fƤPïfì£ÞÝû·­¡Ðuñn„\¼ç ñ® Ô½ù›‹w•¯ã;4Ä»Z[•ñÅ»ûån”ïÊjÚ„èïÆSñ®´ôKµâ‚xW™y@R¼Û³öú¾ñ®œOÖò?†xWOY[Ì §\ïö'ðûíÕzì2÷ïʾjk)ÞUìø‡u$°Öˆïú'¨ïÆOB¼»_0†xW»³O-Pïîò[;Æg¨w3ö¥ÞUk‡m|ñ¦z÷+õ®b_/௣põnœ4Ô»3õ®BUfMõ®VpUGõnÆ\½»woÞ±^€z7c®ÞU¬zTR½«X}Ô¡zWs¼9¤A½»ßp3ƒz7c®ÞU¬2ªwsË\WïfÌջЙÚêÝŒ™z7BPïÆ©C½«˜5z7·sõnÆ>êݽÛò åÔ»ïX©w» ‰C½›1Sïî·[‹S½›1WïÊÒß †]¼ÛBG{Q¼ËÍ ÝU¨ m¨ÝŸ„vWG¹<‚vW±2@§v7c®ÝUÌV—Ðîf̵»síî;VÚÝ,Ðíæf®Û‹ÝnnçÂ]ÅJÊIá./…»¹ w¹? wq½¨ÛÕîÎ!K¦n7wçºÝw¬t»êóVÝp¨ÛÍí\·›1×íÆ)¸l÷*Õ.§¨vyï¨Úåp j7.T»<’Ïå]–!Žý1®ûOúïNåý¢»-MWZ~}Ç›¡¥®Q¥tW+¼â;ðfhoŒ£¼NéÍ ¯hÕÌ‰Þ ZûÛîœðÒùŽ„W.u-s{jº!ÝÕc|Â+Jwe¯v~¿Ãš&§µÈƒî ÝÕ´Qm=(ݽdÐ4(”î¦üØ¥»š®ç©Œ„]ºÛæëÙjÝ!ݽvjp]ºÛÞ{I1 ݽäf]Fœîª™î9>vPºÛ^o-½{Ä0”î¶íöy)ݽº#÷0 }¤»í0ê•»—þR7ÊÝkaW3Wî^‹[ÄS¹«v‡v·½î½7´»Wü–­ƒkw«oŽÔî^ÝÚ¼|\»«®ÄzB®ÑD­´»×Šfoï^³¯a)ÞÕéM|‘âݞ͘w¯‹wu˜¦ƒ†z7·sõ®Ný¸ËòÁÕ»×ìèò]³ûúº|7OÏä»m33Ÿ§|÷Z|B¦|WW³<:)ßåƒ|÷’âú4—}ä»íÂÍû(G¡|77Èw•ú¹°ËwuQÍæòÝüM“ï¶ëv–ŽÞ«{lI<4¼íþ¶ P¤Ü5¼z¼·¡¬¦†÷Ò7ÐrP„†—+5¼:=ë%¯.‹5‡ˆW¹­i!âÕþÎBÄ«w–cT¼¹«xÛvÖ’*^Ýöja ¯Òó6 i„‹x9ÉRÅ«9ÝIPñêY³¨xU¦bÜPñê}-¢Ý!^-TñòõA陵U× /§6Èxõ“å‡Bïµ t¼íj÷úÕ*Þkõzªx/µ–؆û3T¼Ú®ÚGPÅ{ÉSmÞPñ¶3Ø\Uë*^mg¦¾.âÕ[éëµôõ“.âÕšJ ¯&¶k+‹ñ^ú{@Ä{õr˜!@ƒˆWrÛñ3˜!âÕv•RÄûs/çŠxuUÌK"Þv(®ôˆW÷ÜzÉxû‹«¤0ñ^J-—·ù®¶©Þ¢ð*Ÿ©V­ð^jaP’Ax57í£ÚÞþðæur¯F¤yICÀ«Ù¢ÚSÀ{©³ÀèéK ¯Þ­‡¹×º†÷ê.i+ oÌõñòÕN/_eñê8÷jC牢-ëð߆ˆW׳ZWPÆ«|¤*É(ãÕØ²Ö8.ãÕk ògØAÆ{íÞ!„2^ÓtCÆ« ö•SCÆ«‰r膨âURd&ðà¼gExð®šÆ†æÜE¼Ú™ÉN!â½4´­œ‹x[ì6绊-WYþBÄ« d.yD¼Êø­L*Þx³@ů}¨xÛ‹ñ2¹$T¼ñØAÅÃ*^ çjôN¯†eË…G£6—ñÆã o<æñÆp†ŒW×ìºF1½Ëx5‹Þ%F„Œ7Þíñjež ã!û‘ñÆ«2Þ;Ë2^Žt¨xY? ¯Ž¯FSÅ"T¼mèYo pã]¼ 8U¼:+$‚ŠW¿i¥PñÆ[ *ÞÎPñ^»÷"¢ŠWé¡Uö@ÅË…6U¼×å-¥¨â© *ÞK=ªZ*Þ«gCo¯²ßj¨B¯¶›¶!™p¯^ ¦.‡ŒW÷5ÊS)ãá ¯ö]µ8Š7ÂU¼º¦.‡ŠWËçÈ7–Bôãm‰Ð: €)ãG:ÞvÁ󙎗×8üxÛüx¦ãò¶ß¼«DŠB^½`Ú%^.äÕÚÍÂ…¼|\)äm±õ*M1ìxÅ î²ÿu%ïÕ³ùíQÉ{«9úÀ+aÇ«†*£?PØñNL³uV+)ï%ÇÙapG)o¿C•-ï¥bÔ_„ïä…ðÔòæv°ãmo¾A¾¾Üx{†¤îÇÜxÿú‡éÛýÃщ³d¹WË€ô§¿õ?µä@;Ó¡õ?ýŸ?üïAËB`ß ^~E1ü½ù!šýûv©³ûÅÎø_¼ËS­¬žYk üë@éösŸ?ÿÔýk:ðüå§Á×eÿºóÏ_dáÓ¦Òyï(ò§ÜƒÏ|ýùçîÿë²àó—ïÁ×oþõïÉ&€Ô&Ì–½Ìß¶Uþôí<·e¾Žé?ÿécûýŸæö¿­÷·?ýòí¿ÿ‡öÚøßþÇ·?ý·?ü—?ýÖmqÐÖ­çÖÞœ¿}OÛïß“’@qéc‡ýÍ{:rO?aª¬ùù_Æžj4$ݺ5ûqtmî©®CЭ}UóÙmt÷°õT2L’î85ùÌIw\ÄyµšÄ¸øRE Mw\D©"†¦›ãü”*bhºûwà‘ÇRE Q÷z{¯ÙS¾½CÕÍqwΗÕ$®§7\S¬jãÖÍ·Õ$Æù-Zšo5^íy<—¹¼úÍ›U=—¥Í7û¯keB÷ÐvÇ=_V«IŒ¡"yèÐvçoî_5‰q•—ÃjãŽËí²9Öf¨s9­&1Ï쪚Äx¢$‰˜l’õ#”$b¼õ“Õ¨ðTž5ÞqE¤‰oÝ€²%=¥‰˜m’õ.÷ÉÅ&Yìo³ªÄŒíV•øŽUU¢bÕþð”Uãf“ì´Ž&§®üPxëEQmJO½+w›d}B\o«J|Ǫ*1fKKª*1&RYÙœ6ÉšÇð)£ØË&YÛ]ËȪ*1F‹ücn›dý²t¿š½ò‚©l>Œ6ﰪĸþÛiU‰ºÆû0ŒQ¬ªó¯ªJÌÝÝV•—qŸ¬*1æš–WUbìnŸ­*1†s[DTU"ßÅ-;µªD¾BöÍŠsw»%æaîV”ÓºŒ:F&«×{YPœ2|8mš­w<.›d}Â×êâ²IÖOà¶’DæŸU’?yÌV’SŠ´#½¼Sû©5Ðlsì= vN)#f›b±·ÍŠãrµ5V¯J”]Ï(T8eUíIì§%ðÙVVU‘(G¤òï;ËJ·Ý}Æ%‰LÍ[Bg%‰’UUCãv­$q½¼£úÙVV‹§°ÖDû<+Idê«X•$jÕ·­†¬$1nç¹YIâzyâ–‘ZIâÚ»ÿ~ÄR-±ŠÄ¸-V‰kw*Qçi#7“!É"*ƒín+I\/ëm|JQ ¬fˆùñ;UìöÖ‡ˆT•ÀövÉÀõìúÏÜ*«±*­9¥´®öòöѧ„…•À2OêšÊgnm¿yíû—ÉÏóñ©ýÖõôÕ¿V‘È%á©oו¾^Þ,þ”ævä¯Ûä b‰zítÅOˬ1¶Ð§é‘¼rq}¶UÕ#n’> Ññy/V¨³-#Ū QG_n.§ºÍWöŠ%è)5Äd€ ½gÆ‹Yjˆ‘½¶c¹KŠsöJ÷>LÍÎû´’DYjäY±ªIÔþJ`ª ðÈ^å•>íÏ”¥ÎÛ#yÅüÓ.¤Õ$êݾ “ìkš­(¿¨PÕ$nÊï? â6eWIb l%ö½T/4Wâ”v­(q“h¨vÛn­(Q±ªP¬—%j”YÀõÕë3TãŒO+LléÅüQYôØe…‰Ûn}².õwzÒVÞM…ª.Q‰Î×(¾zk‘gRÅE’øa$¬ºÇmd~Í—´#_e=‡kÚ%íÃÈW%8RQ…ª±ùðf½Ú*ªª·¾¤z¤]—:yŒdUiXI­Úóf创”j£˜àšO+GìiØ×?V"[_óeuˆÛá}1¯¶|ª:Dú…í]jeˆj_PåmW[=U¢ú ½§BU‡Ø6ÛJ‰w©ñ“£êW?¯kY­q;Ý>’Ÿ0äªYþÜ퉴:DI58ëCÔ“¡”}?Vˆø4‡þõ‡Ar%íÿ¨æ_ UQSݱAr•ý´ ãS›’Ûò©>–>½ÆAr½Ü1I®üžŽáeK’û•>Π$¹ª}´~é ¹»Þ_F‡äÊ0±å¸BÉÝû«t`*Üívþ ¹·­É=½¿-I®–$eˆL’Û†ËT2_’\%’ÜMe£ÜŽ$Wyi-IrµDÛ† ?H®fÿ¶»ç0ArÕF£\#Hr7凣’5H.HÉ ¹š­«Ü,H.J \,_(׎d¹âÐ`¹›Í£a –{T>öb¹ªˆõGÁr7­cqm°ÜU…¾’“|{¡\•Z ‡û@¹mßK•LåjÕ1 Ùå.®h”+ÑâHåJç;’™@¹ZËÏ_ë9‚Üåvm}€ÜESÕÔäN¦ã ŽÛÒ½c¸[€ãª{Â>,Ó‚ã*•žOç r\}úkÿÁçËQp\U €‡ãÊ”¹ºIã.}ðÜlrÜ¥ƒƒ§JŠ WfôËø8Ûˆ€4@îäŠN¢Ü¶»Bú$¹¢nÛø¤B”+Wù½Ö_@¹r^¯Úi¢ÜE}pFŸO¢\]õ #ÊU·†²á'ÊU3ŠR‚åöjªë+…"È•ù}uØ#È]¤¾7‹È],eIÛÎn«Ö¶¹:óRñ䪎ÌärãÄrÛaZ¹&A®ÚTTsr‚ÜEsç(¦ w‘4k¼;rÛ_ŽrÚ'È]d>œö rµß*4È]ºú1I'ÈUÑ]½Žru¸òüt€\Ū0” w9¼‡ wÙ¼îŒ wé5¿A®š¢U²M«¯ª’ r—ÍUð¹ñˆä*Ýl Û‡êä.*T­p r{ãPÏäêXZ~ò¬Ar•«Ücí «1P-Ur—^š8žH€ÜE…UÓcÎH»l•Ž%ÈÕ¡T¥çäærãÞä.½:iPP€Üeó2€Üeõª‚\Ū"ˆ 7¶ÈÕR ¹zÚ«V› W§WÅÌ¹š ž…#9®îLÕ±ãj«ö\=|WWÿ:LJwÙ¼fˆ$wY½œ‹$7¯¦£\=èŒ0®®¤ǸK¯{Ä$ĸ:Ä*„$ÇíÅ£œ™7指ãÆc»tÛAPsÕ}êEq„¹êŸ°Ïµ+ƒ¹ñdæê¦•Ü…0W‡YE턹Ëêl„¹qsuµ± ˇ,W7§*ÔÉr—Í‹–su¿kÍI˜«‰¹j²sõ¯|ÂÜ(€¹ñ„ƒæÆk4W—Em¡> 4W3e•QæêJWc+ÒÜd ¹1ŸƒæÆ¥þðÜåà¹:À*R#Ï]zÑþ“Ö“çæ‰ÏÕlXeª„ºJiª¡ ¡n u@ÝE¡ë ®bk}*Ô8 ®.rA@B]e$£™îÒ‹2‚I¦Y:˜®:'UÛj2]å[çP‘鯂Pw¹ ÿ%ÕUÚÜ.ç'!%ÕešN¬«hËpˆØÕ€(O‚]¾Ëv™fì2¥`WGxŽÏ'»ê¿Ó®ÁçÉ!ÙUÀõ~Rt’ݵ¯µžçhWÍtìZ9ÙåRštWFª](ù®úÃÛ€WoªG ¯ºTU»MÞîæp<\…Œ„ˆWí Ë]‹ˆWôá³D¼jW䋤WkþêDEÒ«FråØEÒ«GdÆÞD½ê`U=qˆzµB¬²E²^]²—ƒõªÝe{c}&u²^u£9Fá2Y¯Rýó)½&êm›µ«:†˜P¯’ÿjäúc凿×sîÅ|Çr÷w2_k Mæ»z*Ìwõl2ßcõÔƒÌW±ö~zÚÏ$ó½’ùšÔÏ"óg½J¡ æ«àáûCæ» ž–å ˜oÆœùÊÂïdd¾ZT¿Ù`¾m畳ó »ëƒ`¾øìÌ·=®·QS¨w)ruä;)­†—‰|s’LäÛ–$v†ÈW«_‰wÛ0?‡§C ßU«…ï_Á̪³ñ%&#ñ•‡ƒáqßE¯ô1LH|•“^ÏgD_eOõ-–Äw9Q â«<¡lùNޜȷ®aE@ä»,î»Cä«Xùøù2µ%òmoƒ{Ÿåˆ|—Ù½ãú.n>Cèû%ëeü1>3öÎ={,^j°w–?Éx3ö¶£3ÛÂ^½¦÷!±'ìÕîJµña½³œÄJô@Ö+/¼1îÉzÛ峿JÁz'ï8AÖ;»7"Y¯ÈFYþõжnµØìCìQ­õ Xï|{/²ÞI¾ÅÀzõnµ>ëÕ|€zçÃ[¬õÊ—­ÂÞ¯$i( {g½ý–ÂÕ{çþ꣰Wq•Iö¶kÔžúç¹ ìm±³|{g¹äÔÒ °wš­îŒ¬w:½¿3YïÔYÁ¯>¬W÷~ý-ÈzåqŒ Œ¬WžyeÖ@Ö«Ì·:2‚õÎjKZOXo{ôn# `½JÏ1‚õNÊKÙ Ö«2êj³NÖ«EÉÙš¾E»r.l7þ‘ŽõN‡w½%ë$÷3±³Þ>}Œ ¨wÒŸ¯ßD½Z}l¦½uÔ;‹ÖÔW, ^ÍråHJÔ;Ia1ü‰z'yYÖ£Ô;É”rôÛrÔ+—Õ†,Pï$OŽaÆJÔ;ɤ >íõÊ#²Œx>¨W“~õ ê•e™Tõ¶Ëê•ï‰}wqÔÛý4¢ÇŽzÛî®u~Ä©z¹† Aï¤Æwc Ð+ ›vãôêÄë;:A¯æU FÔ«‹bo Þi‚À¨·¿&‡í3Q¯Ví†QzµÌ¶ï+@½“rõaNÔ;ɳqø õv0L‰{Ûv-6ê÷êzníUú‹ãÞvê.Çîm±³\÷êrÚ'ǽ²rMûƒ{§ÙÓaâÞv•M7GÜ«Çt½~{µ+b;îmGß"ã= Ü1à^Ù¼êüg`:îí&B«Å ÷v3¥*ùî•™ÒíB[ç½ ¥fvÜ«Ÿ´ 3âÞv w›˜? x‰{¡FÿGÒÞx œöjw^KÚ+˧»^ï ½:”jµEÚÛmžíí^g}ÏíÕØ«fn ½r‘jÃu0X§½ºyÓ9Ê>@{uÓ«åi¯®X5^}hoŒJÐ^yR•E.h¯n÷×7ꯙ´7÷d´W/ˆrä&í$³¬O/N{uM…BÚ«<©Z›öN³¯]I{µ()iï$¥he6 ½:=—;î±ÜÛÎo²ï=À½b±íÜŸo‚À½:‡jv@Ü«ÅÏ1Ú÷2/îÕõlæ©àîÕ3bÅ$À½Ýj­ P÷ö±Qpo5:D¼“–4£«1ï,ïj𯫵àaÂXg¼“º6–*ŒWþ×ù|GŒW3ò2Úz“ñN§‹þ yµt64 ¯VêåüÈ›»sÈ+÷Lû¾È+Žfæ–€¼ê10™MC^¡ÀRòNšZ‹Sòj Pßž yõN,Å!ï¤Ò¾ñi7Ó!¯VÿU.HÈ+ÐöBûsÈ«Ö ×¨:&äµï<€¼bLUTBÚ«íLÜÚ«íÖ}p+§½-8𤽢`G­š@{'%«æŽà´WCÂ>óp¯(‘ V€{Õâ½ †„ã^0â^ÞTà^$ÐÞI6¸£Åi¯†×¨o!ì™5Å`¯òÒº ¤WPÆôfIzo“•€ôj_¦éŸ¨WIèhgóB½G}!êm¿y™Œ¨Wwª*'ˆzƒzõN2Õjy¢^ío-| Ô+«^+ÅsÔ«,µé#®&ê=ܦ(X¯ÜKCÖ+7ë¢ëg-Y¯£²^²^®^œõŠ…šÜ†¬÷Vâ0Žž¬w«j· ½-)3f’^€‰¹Hz÷‚vß!½f÷¤·ýÅ„$½xªzw¯¡ Ò»éñõ$½«^ôjÂ1YA/Á&A/wGа4@¯Cg×ôjF2Ý;Aoüd€^Dä¼²¬(¥ó‡órð“óîz±!9ïŠrÙä¼–óâ%üâ¼þeâ·“^ÞHzcª$é­©—”7Ž”·½D®²ö å•RÕòòáNÊk’W¸ÿ(p ȧÈË™>!o³ãÅÓEoÌå@¼ZœÕwN"^"^l(z5—ØGÐxùV$âe®BE/äd¡èm±ù©Yà=”l HÀ«ÙRÿð¶XK^çïx3ÄWÈyõÁÂ$œó*f"yòÝK_µ†Õô¼í­®1ö<6¼¸Ù仇•øß½\ÅB5/³Ó໇ÞéC7M¾Ë M¾Û†îQlj^®‚ £‚|—‡B¾Û޹¾Pï¶—é¶ì‚x÷¬YÝð®¼wJ~ ¼«ÏÙ·yG¸˜7¾ÿïÆo:Þå×[âÝKëåR­º˜7ÞBÄ»2()â],1‰wg9i-ñî,ú“yîbip—ߟwgõ‘5Ý­Ã]%¯%b ¸Ûé«>›îªÌ¸BÎvg}λÖÁo]Ê;ë˜×’五·m·šR^žR¢/ªjR^±ËjèB)¯˜g½ßc»‘ÒSÊK¾1ØnKûUò(7!åímJÐ )¯÷¨+§”WËšiˆwa4L)¯T)UN)מּã­H¶«…Ò9>›íöi]]Ê;+{‰-Ùn°K»o©Ùî,‰ÿHlÉvg-NËhZÞY©fy€íj»{в]¯eh ÈvçÞäzÜThyÅݪ•Ùî¬ù÷*jêlW#¢evŸÒq²]-öÊZ Z^â:²]O™Þ…õ´ŠY‡™án;æÑ¼¼Ky©cÿ1׆٤¼¿Ë‚w©žt/®«%_Q¯»N¯íÂ.$Èîå_3HvC H²‹Êç »m–œ~Å·aV³©̓ËP¨¿è®·g'ÝÕ£1Z©ÝUê¨R º{(·âØÄ»ödÞ „ ¼k¯ Â]Íí-)î w¿“¯;¼«v£.Y5¼«o3¯\Ç»z;Y÷àÝÄÂÀ»ž§ݽµh<Æ‚î"‘ º+[°r“%ÝÕXÛ÷¡ 6º;k-+0ûxìܽkñÿ¸Û6>çîò5p—¸p7·s¸«÷Ã( /Þäd»zu<¯çd»j ûdÉvõÚˈƒÞEçºÃt¶«Ø5„9d»y,Îvg}EºÊ+J^õË®¯ìTò*óªú:(yw}˜É)Ø.>¶„JRêxµž;—áüòªð¿êÇAvE.ïb1ò¶ùZèdWšÕÓÍ9Ô•·Ç“XRÂK)n*xçƒÓK¿»Žz”Ðïâ ©.ê]My¥[¤z÷t÷pjX¼;p85ˆ„ —™ïž^‰ò]ä /ùn[Žé å»j]š!ÊwUùapòݶ¿*ü¡|——LwŒÐ\ØT® ¹º:šè>c(w‘‹‘æÎj=øØææ¯9Ìw7E¢pW r(<sãàsãêç*ƒÛꣅ»êE9|¯ˆsuWZ¦ò0$à\Ý•¹œ7€s¹† ÎÅÍ7 —ä¸y|Pìž^gDˬœ 7F@nü&ÍfoMs†N, †:ÈK›‡â$W«…J0?$W3AY!‘äæNrµ£µ–ñ ¹,pÉm¡Ã<£ArcÈ䯒«Ã\ÆWf’Üx¢@rµ]Õô‘æÆq‚æjõ4—Íh®ŠL:ËÚ놢¹³’x#½Nsõ›ù3p®Ss&Îû1*€sÛ±&ˆÎÕJˬoáР¦ÏUâ ž;«'ú÷v5ßoÃW/â'çê'Í™¥EÃê ÝâA5æ¥]§EC»H× OaÑ  ø+¢]ÁWØÂ¢A)‰*iÑ ¢UbLX4ƒ¥ÿ÷ó–¦?ƒz¯–bœþ j¿\Ÿ`”ðgÐÛsÈßAá5+П©üoÝþ ‡œ}>vÀtgPÎS›èÎ@»º3Ð|‚î a¥w†6ÅÎ5—éŠàØoÚ3ìÖžD—’´pg GWº3XsèpgPŠ1’yš3ÀÌ)ÌÚÛõXGe=Í”¬Ks†£¾½ÐœA oUºÓœAyŒ©na΀ æ ±ÝcÎОåÛÄ¿0gè]‡“‚]:`èÒœ@7*ÿAtÃ&DWœª ÃËœ¡üÓ·›DWö‡O˜3Ð^‚æ ½ç¸ß4gã¸3ô†ÌE_áΠ Òá˜î í­ óÑGqìD·Å\uKw­z×2Ÿ€;ƒªË†­‰îª²Ô¡!Ñ]µdŸk_î V1Aw†M¬â¡/¿Ã^tg€fxŒ=î W-r~éN?Št×Gcðòe˜üM]Õ4“,Û`W5¡¿aÞòòâm—aà=ú2Lþ¢ /Þ»ÞtáÊ oOëcW¥ç¯8ñöªâA¬èÊ Odæ¨ W†¶]Kó0 S†Ø ¦ zh‡x˜2¼bt3KZ]Å*C#Òm±½½î>ù,‘nl¤«íŽQ´ ¤›?éH7cŽtó'éfìÃtßÿ\LW±u»Fß4gºs¦›1cºr¨«˜õÂÔíuc;@ÝŒ9ÔÁ¨›Û9Õ͘SÝŒ9Õ͘SÝŒ9Õ͘SÝw¬¨nÆœê2Fª›1§º3ª«yʲuPÝŒ9ÖUÌ2ypÝÜî‹ëæÎuß±âºùkÎu9À@ws3§»±;ÐÝØx·­ÍëÌÀË Ÿ„7Ó onçŒW1[Xƒñæo:ãû Æ›çîŒ7Åï;VŒ7cÎx#ÆçÆ›Ûã}‡ŠñfÌIoÆœôÆ¥~HoûgwéÍMõŠÔX­/P¯~³*±€zwV—f¨7b@½í'ÛÂy,¨€zß±B½:”i¤®D½qz@½sÔ— ¨7N°W±*^#òÍíùæ98òÕ¹[· ߸E@¾ŠYÕ.¯Ò·{˜¥ùfÌ/³>"߸E@¾¹#ßý†ùB¾ùŽ|5†ÜQÁ‘oŒ/ ß½;î |æÈ7CŽ|ã'|+] ‘¯ºNXÝ7oœoîÏ‘oîÏ‘oÆùÆPòáä—È7cŽ|3æÈ7ÏÝ‘oÄÀ|ã7Á|ãþ9óË æ›1g¾¹;g¾ñÔ=Ì70ßÜÄ™oÆœùæ™óÍ3ß]*Šj æ›1g¾JÌÌ7·sæÛb`·Î|s;g¾s櫘l€ùjf櫘u]óÍý9ó͘3ߨ˜ï;VÌ7~Ì7cÆ|3äÔ7cŽ}߱¾û¾ù¯Ž}ãBûævŽ}»óöÍÍûF à7c~3æà7&a€ßÜÎÁoÆüfÌÁoÆœüfÌÉïÞ¿×}ߨ!cN~3æè—û#úå-"úÍíýêzšÁ€£ßüIG¿ù“Ž~s;G¿¹Ýýæ?;úå Fö›»rö›1c¿¹;g¿±;°ßøI°ßŒ9ûýýævÎ~óXœý*VZÉ~¹"$ûÍcqö›Çâì—sÙ/_d¿±?°ßŒ9û}ÇŠýÆq:ûͳߌ9ûSüåâ´Ã_® þþ“Ö¼KùÿT½§Â,©æPZ¥ªwª"oYo}sÃ| Y/¼ÄCÖ;wnøEoU:ÿR úÛÙb?„ ôWõCÞû zÞv´W!qêy!º=¯zâLE¢CÏÛRÖïã_•›´³yשè]¶‘¢¾½[usx6Xé ½²ë½Ê\† 0¹ ÃT*†¢·½ ¬mßôÂ1$½ò¿©Îa”ôJ1:zX„¤WC²T¨0lX½wghzû«ñ™¬ˆ§®]*þðJÀ¿Ý·¨ôþaØ0£ fïÈ ü«ßliæxâhØÐRüZË„aÃä½wðv;aØ Á·|ו·³U§ZàßnˆT:›0l˜Ñ5×ñoø2ÿv?§rÇ2ú«­T›÷ùT úÛR¿ýHþö«i­¦é×@s¿/úK3'À_…–{ú| ·‘ÙJ‡ýæ/úM+@º5´ûy]óÆúí¿h^ÅŽ~³NÑtk ‘£“ß½g£%=pò«˜} ùÝûx±^kF~å§u˜úäWçh î;õ§¨ltûö_\ʦ°¯rw™±a_Å:öí'gBǾÚÝdÂkǾŠí&¼6ìûṴ́¯bëZÝâûöX½?€}eºX Ãö•wåWO…¯õ†c_mbí}Ó‰Õ±o÷Ã4ƒà¢¾:Ày7s£¾ýúoÕØÍ©ow,­ú ¾ŠíÃF Ð÷µ™A_ÅÊÒÌWç6íUüèÌ·_¯¥l¦ùÊ@¬]ËaLàÌ7Ò¯6ÛÛd7p°!ß–ØÕº*oú;:òí^fkµÞvä«ß<ÊùÈ7<Ѐ|ûm½Ë¨¾¼¸ãN|»KÝ<ŒàA|u$Þiʉoº.vâ« î})A®_ÅÖ*OñUl›×ò~0âÛ/u1qâ«ÍÚ3;š:ñU¬ÕãäâÛ¥º˜€øî÷Œþ4æmig7| Œø*ÖÎ÷™¹ø*dO¶óÞ<ç½ý^ïeæé¼WÀ]u¼à½ÚÝv?ºà^ýd›~ÊÍ×p¯e™ª‹¼ãÞ×Å4Ü›€ãÞ~z•ø†+o\óèúë¸÷µ™áÞ|r÷æ0¢+ïáÜ«'ÃäFÀ½}zjÉë#ŧÄWë‰2¢ÄwW{Ìçíí{+±½ÃÞ>Ý¥ntØÛ«ªQO^>rôä•UÈH—Ó—£Ëao>r4å]½C`o/‡½¯ã4ØÛ·«êÄpå]5u ³.‡½ý±–íŠÃÞ¾¿λöö¹Ëú ¼\yK_Ø«ýµ=!²ÁÞ~9¯jìà°7m“öæ)8ìÍñ÷{ó8ìմОçã{ßîgi r‡½ý(öò”1ØÛ‰¶~ÒT‡½=íÚ®²Q€+¯FõnsØ›ycºò¶›¦¼Û=,T^¦¼³£W˜ò¶ù£4D½ZþÞ&å5ÔëÚ0åmK¾É¤¼0嘣^­Ì‡ AZòžõi?,y]•aAKÞ6òæñ`ôÒÜ',y±Ì K^ÚüÒ’W/ŠÑ WçØÆ`¦%/ü‰Â’Í“}i:èýZø?ú÷óäýë¦oÿõWïT¶kÿÒnXÿãß¾þ¸O}wš ûŸþÏþ÷åvïo/¿"&þî¡üÄþ}ûÔéýâçü/Þç­ê·Ï™>^+Íý×￞½÷?ýÔ}ëõìýó矺ÿ~½¿îøçÏBíÏdøS®ýgïúÓÏÝw¿ÞŸ½ýù{ûÿúÅ¿þãØ5?µ®%“ß¶UÝÝÞ\mY¯úÏúø}ÿ§¹ýokýíO¿|ûïÿ¡å%ÿñÛÿøö§ÿö‡ÿò§ß¸Ÿc:e]¢|ídóž¶ØÓÚk¢ömm«Ñß¾§#÷ôæÆšÿÅÃeLÂ?snôó¼L`ù³Þdz­Ÿñ^R=ÍçsJ‹‡~αȔEþ³ËwÆc?yXž¦ücäkï$q?s”|=—&<šŸ:N¾Ž†%ç{#å7¼oöM<éh>õÆ9äØÞ>:Þ×ì<}Û¿&çßýˆÝ´µ†`Í:·…È{7÷³›åŸÜϪ¯"ëz‹QýÝlÿìn6­„õmûõý¹Ÿ´ÑøK혷oëºëÎK[ ½ÎEÿ¯ïáþõýòËŸÿß¿ýù/ùËŸù÷»þïöO½D¿ý¦JÅKRˆ¡³<æªÏižZ¾KRˆ’×vߊQ#|Aë¶»gÇ%)D ªõ¨–~×ì5;ŒbóëZ\?/ƒ’¯kuMk[c[£ók3ò°Ý.¹®Ýd›>!‘¼bU(³]Þ.ó’ bÈpÚõ[‡£ÝÕõŸEo Y·‚«Ë!žÕþvx‹ zˆ›Žj..=Äòm»{_ÒC Ì&w­*(—b(8·Í­ï/}G‹ìm7ÃÞKÌsHô“ë`ï—0ê+›ôÃwO±’hkލÖ×½›h²=&V¹îÃDF›¾MÝOšKØpïÿj}.µ2dnØ.òÙ)V²èmqsïKØl[ÌZ ÞBT·íE߀ÿÉ»“ gÈj»e|¼Õh(¹ôán劕9·[ ±§ö­¾ã“Ä6;p¹{OÍ1dg5÷tXÍÁ×$÷p@ņ 8ww§ÑÞÊÌã–ß±dÌ ‹ZìXöÇ-ì–4ªº‰Û·#ÅfÞê JM¤X Õú'Òñ…íž+l±¹¼"ï–\T1Ï6YÁ[NŽC1´MÞïž]ìª ÖöõÁIŠYÛvu’~”l·ì¹õÖGÞs|Yüp í§TD·LjÆ»I)4šݲks¬bÕ_S±em“µ[¼—Éêã¶.Hyü[Æ$cŠå¡Ë¬ḛj_­Åîe5ÎÇðŠU,[ÇßËf…i:”ÒVÝò¸¨6ó0åö&Í(n¾µì3l=Ò†[æc†œ$+Tµ`«ZW޹þ^¼>C_íKuÕfSKk»bÂm0‰áz{sÇ[î c†Íß\¬kUG²ñniŸÕD¬·KÚ¨p‹ŠÛ»•µÔÎt}-fÓnõèSlžÃaÅO;{!‚öTE0÷z¹!Doy÷¼¥ï¾ãa†ÑbçðǼu‹O±s•Y+V¥FqÖJ^ƻޮ—»ÛÌYbYí¼ð÷¶Zù²¶+Òß2/«(üÒp¼4+å±[›ÇyX›û;,)ã8+çê¸*Í_¼Ý›æÖçÅG¬Ö~ÊÒØ”-ViìªE×Ð@ßçli,{j´?Xë=5ÚI¹ÌåÂËûÜ,‹Í_Ü-‹å/î–Ã*Rïó°V±!ål¿c)¬¤¼àÚë)lž°W¯—©Pª v•”÷~|¨Ûа 6c³e°ôõnÃÍ2XûµZ»^Þ9S±J`q¡.!²[ò*U]ùÔµwˆ%¯;-y]/×+6’×Üì2'¡Ë¿G·w%¯úÅy´;lWÉ’Wm×V,±þ}Ï–¼ÆîîÅ’×wÌÌÓ¯.Hþ,KîÛ}/â8%ñÛm¨ú8éÚRª>²îò×þ2\߬Ò*øÄjCïëÓfÖ’Îß·bòÐõ¹1®qVúÊ‹%¥¶%°¼ßºø–Á¾~vµ–÷®7w¤RÏãs•Á"°[úŠÀa¹ëë NK^[eíÎ[Ðûªêký§áj‹Ü•¼òEÙc–½b´æäé«=p-4{öŠqׂ‹§¯@-¸zþWrvŒ“Û<Í wÏ`í‚Jîà³+ƃ>½Zoݼ}‚å–×W{štDÖ€žÁÆUér¶UÖÑ‚³å°kg lÝc·O²ˆ-žÃòh–ÕsXd3-¸©[û¤Ïk_£bNwhy‹i½uhÄôu’¦ êÁËg[«íhA•A´-Û8âMZÌÿF­£T¬ÅnKcãå`×®ŒÚ7h‰ì©‚§ÇØ´ÛL0©ØÿþΣԖQ»O¸~ÁW™å·›¨ìõ½K/Ìn¿zU‰]~IÓæ¯'´ÝGkÙ‚»g³y°‡Ôæm°¼&ùøà¡}ŽŠ™Ó5nÎÏǧÑõ·óÇŠŸVÒÿ 61 o[q>½m_ß%/¬®C€¾-•½Îù©#ô=º$moúJlŒÐ÷Ð3ôéŸAä{tÔç©'òÕ‡iå>R- _%êŠ:‘ïÑKp Ð:òU¬LW|½õv ߣI>ŒÈWúê"rD¾Š|"ò=º~ØiùjùR ‰ÈW^Ö9È7c+]ÇÌÌWÎ|mè}ž2_ìùÒì‡Ì—î4ó¥+™o”ÿ‚ùÒ‡…Ì—–d¾Z@–Å1™ï®&gµüó•¿æk2ß3ëg2ß3›f2ß¶ö5;i2ß»mU滟^ æ«å¸Î|÷ÓŒ¾‰|wùÂVÖ ä»Ë~(<‰|UÒÜ’…ÜšÈwïbܧc‘¯~sMŒˆ|÷Ãʼn|÷Þ±áQ3’ù¶íÚ¹À¾ûî­ €}÷Ý­þ‰}Å›ÊΟØW?Y*tb_m7ÿtêÛBí§çpRß½7˜ú*–$óÕ®£ù$™o‹Y‹ 2_þ¢ß}ÿjeAÖ«ÿ¾š£õª*©äúëͽ;ëÝõ~}tÉz÷Þkç©%"ëßëÝ{ëÂç›%Y¯Ž°Ì†Ézõ›¥Æ'ëÕvÕlƒ¬7·sÖçÖ;®/(¯¶xZ‹€ñÆÅuÆ»w?ƒñÉŒwWñ•–Œwß¼¯ kõ&"ãÕvÕ‰Œ7·sÆ«í*£&ãÕ9TéonçŒ7·sÆ» ulOŠ@ƫ߬6nñªàn­Éxóœñj»*¦!ãU¬ú1’ñ¶ØÜòï2ÞÜί¶[F&LÆ+‰Cõc$ãÍíœñRAÆ»+%»¾Ïxõ›ÕÃŒW··:_ñæ‘8ãÕfÛ<2#0ÞØŒ7·sÆ«3¨zd¼m»ëSÃû-)oœ9(¯¶kóÁàÍNyu,*éý¼Áywz*OrÞÜÎA¯¶+—ñô]dlá¤7îHo\-Þ8kÞ]¦£“7Q¯Že=Ƨ ^mWÜ{s;ǽ{ï­z?osàÞ8wàÞÜÎp¯B×òø\÷î} 8>÷j³*4'îÕví˜epoœ9p¯¶«Ž¬Ä½q,À½-¶žêØðAÁŽ{cì÷j»}¨^ˆ{óX÷ævÎ{s»|ws”Hà˃à« —µp€/wÀ÷õ³|_A¾¯}:ðå©ôÕ›o”ð÷}mèìW¶µÅç«H°_^<²ß׆F÷Þéñ¡ìAûKzÔ6ýÕ«7ið_mÙÞ‰Ÿ-ø/ŸŠà¿qy€óò€ÿ憎_:~¥#àïoŸz§*¹8¯)­Íb0`Î Á€_[:~çÝÎ3ÖÏW‹ À9¬@sC‡À¯ÃqL½%Apþ¨qàÜ 8¶Î  œG üÚÒ9°‚OÞ À--¾‡—F@à]RÁ»>bÇŽý9~ý¦#`Ly~|!`þÛ•/þ^Ÿº`ÁZð²Î ^½ÿÙ‹ŸÇÈË^,Xlæ1­LÜÆñ“# Žhðê}ÚB¬ð:º¿ƒï‰=þ¤ÁZ”mi0ñh0"Ð`悤ÁZ&M×ãIHÌå#ið®:ðQðOŒÇ080˜ wÀàI9Ü£õ ,ëÀAÈÉ‚ûý {h€9,©Ö9Ëз㼆7[h€O÷ •5Àš\j1 ðVQƒcëv/#S" V‰Â] nÀàM~š£G#ip» óQ4¸íoÙÆ œ<¸ÅÖj‚H¬3¿jÌÊÐ˼äz|ÑÈ…ã‘n±öHÖv_\xëþ Ï4K.¼uÏŽO³KbáØ%Àí'¯}|@V‰ÔTkt`áÐ÷ kf«/³ÄÂÛâv¡ÄÂÝ<í_Ç(n/¨­?Mß’ o²y}L¹ˆ†C@ïpX'p&)vÕ'àpûŵº©ÇO«¤ãFEopÙU¾À.íŽÀá¨Í ¸»ç‡.Àç>B0¯ßnò%†l#ÀWyb%N!¯ÃaòŸ¢öšœÐþRiùÒþºÞ6´¿fÚ_©.î‘cPû+/³*o¡öWF£'ghã\û{¸{µ¿œ`¨ý…rÊßK¢™Áó¨ü•OèhÅÊ_å:õÉ•Ê_Y,m£&+µ¿.[¥ö×ÛD†ô·½§ì‹2Å¿Ýäp àÃ=ÄB|¸)o*€µªù5 ¬F¯N©Þݘ7Àí¡j©ôç­@x±wä[œð*ë7C°P·+_ŸMS ,_Àý×ÔÀ2ºw–ìjུ‘X†¸[°ƒQÜMKÁH=pœÁò)\÷ÏdŠà]^ë¥R^$³:Š3RÜh)ìGE°œi—èj`ùSÏ+©–éõhWžŠ`Í%Ò¥"Øì]¼•ãá[¼¸ãeJ‚[~üùhþí- ^Ýr15Áþº ¼˜qÛ[¼¸[fŠ‚å¸:lÐR¼ºqrŠ‚g7 MQp»ÛíC¤ Mp›ÍαÂ]p<¬ÐKS9 éR¼j¸›šº`ôˆN]°Ü´‡r €°Ü®Qi”ÂàÙÝ‹S<ËæðÑk¦0xqÖ˲c«§•ÂàÙ-LS,{çãy¥28ÆùúÕT¼]¥'ÿQ,\½«çößÿ&,ã“á¾L!@„7Y¹¯8 ‹d´32YGÂa$Ì{"ab «æõ²cq$ÌJ "a%Õ€HX/ÂÒ§ ¯””ÂÙp·5/Ó ašÏÔü-‘pM©ÄÁzLÛ"m48Ö@;F;ò`5±/ÃeòàUFУ¯y°Ìã÷òîz¹GA°¦i´9$–Åý=–-ÂíÑžÎÑ]âÂ|ç µ¿=Ÿ"„u„g§/bz%*ošJ— ¼hy]5TÂíX¦çÂm_YÂg;ÂröŸM­ë@X³t{3<*aÞaýfÑçÁB_kÕå;Ž/ÝÎ}d•ÀÁíç©jáƒÕch¯ò2à`½H¦ÑÕ„8¸w;(Q4p0Gqð¢éuÈ›Aƒ¹¨ÊÒ`ÊZ¥Ê Á¹ÓàŒu¬cߪ4Xcë¬h°®•}ˆ Ží@ƒ5&תÁ Öv¿FƒµÝ^ wÐ`>¥¤ÁŠ¥é VÖc)@ƒõ¬^µà Ö_Óÿ ^:¼ÄÑi°îö>êuHƒÙùŠ4xѲf¨nÇq€w‹ªZ„ƒk>!'®äY{3³GÁêR?DÁšNÎRŠ/ÒD›Ô×Q°NÁ`=Pð"KÇÒ~Ppná(Xùe;ú‡ ‚·X»—C[,sÌ{)¶é,Xä*±4X°~s.8X°Îú܆ X°FZy‘ëXŽã1w& Îcq¼(b`gÁŒ ÆÈrWXÇ·Õ²Xçu>5äÀºKÉ©ÁÕøà2!°s`º©$ÓàÀÚ_ÙJ‘kD.Ç:Œ'œãÄAµ·v‰?o@`]ÆÑïŽ 8^˜ûýˆ€Õ†g5)µ#`Åì“pœ-80Î8Æ6(pŒCP௮@C¼ Ì“Öú¯ÍïÏû8† °®n9% °¦²©Äýó’8ŽA ŒË¬ã/ë0Bด€À¹Cà87@à¸& ÀyNuæóh H ¬íì!(pn÷…u=ì3?0pLûÀÀíEu–31pÆóYŽ ã88vçX»›J÷Ì~,„ÀqÇ0q ,—ø–<\8bÀÀ-mihHõ[ª4XéN œÈãŽÉ¸ýæe_§ÁãÇÖ ª–äÀ1 ¾8p^çÀ:é{hÒƒó*^”fÚÁµËm0îàÀ V§€äÀ‡þ2—ØÖ90oQp`m[i}ºpÖ»b:­àÀ|΃/jP4P>9°®AélÈEþ®!( œ? œW˜/ÄàÀ|¡$Þ\øX¯ýÈçËl€à¼a€Á:•’! Ö©,Înóáü ~Á`€R( ÎÓ ~g&µ–J Öµ)EQÒà8CÐà©© Ο…QpEÁq³( Ž GQpî¢àZÇ…#ÖÊÓà*TÁx÷¿TÁ•™¥(¸]Øö6.Ý)rÛyž‡©1EÁ˜ÂÓ,8u¿! ®™:5ÁñlQוšàÄÀ·ÆO}œ!ÖÃ}¿¢ ~§†øñGðôCXþ¶Ÿôì€%Þ'¸½GÚÒæiSN¬ŒhÔFƒ¸¶kÞÎqm»ê âz~ðˆÃâücé5âõ¯Ký²†¨/gˆ­€Ám–º‡ðuøÇ¿Â'ø3¿5ØÉ†O0¶ Ÿ`ÚÐÒ'XóèäýßÌoí3¿5ÚÉÒ'øPR>ð ÆŸàøMø·ß<ÚÐ,<\~k-dåˆð Ž­`›Á(˜1G FÁÜ‚õ˜-ûãFÁˆÁ(8öFÁØ.Œ‚#£` ‡Q0bQ0þ1‚=FÁƒQ°ÇÒ(ø°ÒÇ4 F,Œ‚ £àˆ™ÑZÄÂ(øh«ë§†‘FÁ±Œ‚c;7 ŽÍÜ(ø2µˆÁ(˜1óHè18¿bæ¯Ö…¶(d^Á´Ž¦Wpü&Ü‚#ÏàøMxGìãÿ ÏàˆÁ3˜GAÏ`nGÏàˆÁ38bð æ•¤gplÏàˆÁ38~žÁƒspü¦{Çfî!xÇ/Â;˜1zó7éܲ”êÞÁ±¼ƒcðŽíà1xG ÞÁƒwpÄÞƒ‹ÞÁ|7Ò;˜1zG ÞÁ|OÓ;8¶ƒwpÄàÌœÞÁ8.ýƒsKøgðBºËØ|gIáwðþ•É8„3á˜™è œûܘõfÐL„3¸3ïe.Ây´'3_á#œÁ×$øc„øŸôž+å|qbÍ€£D781šæ'n³Ñv•8q4õQ?¤Âò¯…ã!>ë…ú’ ß0¤ÂƒT¸½ø×áïZá¶*ÛŸïôŒ@izFÀ¤/<#`yRaïAJá62—­l}¡æOR) 3¡P Þ*”«ûp&†Rx•Êeæ¹€ª.´Â³>ÈŽ–yéÑF9´ÂðP á6CŽâµp åVá"Ü6³’*º·3± TºÓ|”.¯˜¹·T{-§2º»D=L„¥´-CAšÓž—&«¬6G‡c˜«3Š ‚ÝD¸mµÞå^áEެ#_Láöt>2ô0–bp<üa"L«cšk„–²4M„a a®sØÒD.4a"<ËAr8~¤‰p¬}˜·¬ =XO#‰p{ô¬¤’&ÂN˰ ‰°o‰Ci"ܮܺ?>š·l»ö0æoÒDxjÓí#Ä£…p{_¯%1¤…0ªÂBxÖ­6ÈÀÄÚÛTÚ:ZÇvn!¬Œ­Ì…ÝA¸EÚÛtz;Fèð§ÒÑ?˜ö¨T ë[ïøÀþÁ8 …c3Ø·§á.íƒ#û`=E%ª¢PØÎ'±¼m·’´Q(|·/½C(|k¬Ž\´Ö7õ’„Q(|kôŒbv …[¬zІP8¶ƒPX“t9lP(L?\ …c;…÷Bá[ }‡3…ÂídÛ=y¤á £*‡Baš¨†ƒðj½J)¾U“3L")î@i¼=(—àpVG·§À™BaÕ£ ƒ¦P ó쨎߄V ²C+Ì;´‡ƒ0Îbá°þòqmÿͨ„Ia·Ë¥^¸ýÔV.K ‡0Ã(ÝzÙ[+é Çþ v^Ê…ÙéxVe[¡À_îÁîD pÛ5¤…P8¶ ÷`kýòž¿+ö.ô¡F [(…õnœG˲´nýÉ>C)1(…_1T¸YóP ·+g]s©Žsø(…ã×Â<ØüéB+܆lKi‡¹.´ÂXl„V¸­ã¬w/µÂ—Þ:û뷛£·y¨…1)Fíaˆ…ùÀP,Œ¾õ {Wvj…ãøÃ<ØÍ§©–[u^ VX ™B,|Ö<‘pîjaºVS-ûƒZ˜#ÕÂyT ã¡£X86û ·¼F³– Gß{Š…/9a?‹ ã–¦ypþ,ÄÂ8ž ‹ŽÏÓ)>uæÆ¯!ÆmH±0Œ­S-¬¤t|OJµp þ¦…u¨…óp FÉmª…aºžjáøYª…ãX©Ο…Zu¾©Ž{Bµ0¬ÔS-Üö¹_ûS-œµð¡‡¿lrµpþ ÔÂñƒT ÇqR-?Kµ0œÃS-|ˆÛÜ=dª…óg¡–å¹ ª…|R-ìSfŠ…Q”‡øÓ2Œ >d´ ½W¹í¿§éöŠýš¥÷Û€{™ôIwºÛ ¡­ ^{¹Ÿ½,ÿÜnÖ³Û‘·kݶúõ½lÿä^Úý×:wÛÚšûïìæÈÝü£]H qÞí5}}[Wi=ÎciëÁשèÿõ=üÏ¿þ¯_~ùóÿû·?ÿå/ùó/Ç¿ýûÿÝþ©Wé·ßT©)ì—xÏ¥p?Ê.úêøýºÑ_–ê•uÏÜÖ¶åzJ¸ƒ²Ç\\ë‚~Ó-¨N=}Ü©—dõùnу¦'W'îáÙЂ²º°6O3¤lIŠ)Ê%4ØçQ‡{"¬.LVϧ;F ž®)Ÿ…R ”µà=]ØôÇëð‹p™?Þ%4¾Ô¯Þf×.óÇ@±ö{Ü  ÎÎf0O³}>ØÄ{ *xï`oç\ì+»ö¶àÚÖÙØDj¡ß–ßö¡ýlËÉG;÷yÚÿø5 D6!’~ÍÓa_ÔIw òz°å  -׉ótÚ·v¢mÁvjëC¤>1ßÚvm¿Zi«OܰùžÛ¿·ûú æœË&»M#¸²Îlÿ×ÞMÏ`Váv?<Û²^ aó×mãëŸsø‹´A4†@;Ja6óUðDk“µ\õ½šçb˜iª¢<Ôã3˜u*æÈÙîõ0óQÁ<ëKÏ3˜)méAo«m=Š[ì†$æ6Oßy™äáÿ æµpuÎ.ŠÉ-» Ù3˜sËÕe1q¬‹ºÄÁìb!µÚ¥.¦øèÒ5ËSz- qÎå©$Öò†íµM/Ô‚§ÚC=c½€{д1rÚºêÉ[ÚÌ0ˆ–„Aõè¡ìÞöÛ]8ž¡,]Å9 û,‚ÒÇHIXæ¼Ç¼¬Vß×R¹zÁ¹Bæ8‡•[ ®ê—þ e¹ôUc¸Y½½u•uInAYd¡Œöâ=h:™þ5ãùÔ‚ê±1†2ÌG[ðt¥ 4;s_uyYª©vEz?w Ã3”å%TýËf-mǼÜ.Á^å»j^–ðòðzîÿñÊq8J5/£ãs ®®™É}n²¾x†²,"êÕ¤ d3íÒ WðvÓÿxÔ¼ ±V .œ"«õ5hŒeƒZðréL{~·j§à]ó²¾#·¼¿zoÇîr©ƒ9~pŸ½ý6]-¸¨ßÑ3˜]oÚcÖ{Vì1“jAiZÇXF³éܼwüênß´áZ¶sKV¬w+öìÅ#c$C|ÖƒÖ„º®ö‡Ë¾3¼®À>ÜsÇTß¶.ß f‰–/Éíœ>úÆD+îeÚÓ\ì;C^×cµï :Ë{¹[{¨í;Cžå±{;î¸>Çnß^û<ì;ÃkËÓ¾3ä>.[¾Îó¶ï ùh‰ çÝ?'Ë•ó>³åÊ}©ÏLó¹X®œÀ¹z3A¬•Ì|n–+o²mzÌÔzÌ„áºße3Ÿ»¥ÊúÕ©ì~æópi¸ìc°;-SfSø¼\U]V¦Õy[¢lÅ–tLð‘n“Fål×ìir[ÜÃ!LN–ž&{cû³,z½\i%mB¿Ü›—1½*õ³ysR_\Û`æ.TœãX{±‘ fù. Ø`Æ"´x’ayÍ"¶Y’±mÃÄ·M“û<=QöÒ -F]Í}|§ÉwÁ>¹'®ìþz_ýX…æÓÍûf&°ÞªoÚXo’"–w*€õ±±»€u[ä\ç\~­ÖR¬{§'k‰8ª”2€u ÎûhÀº­ºªá_ðêCUž³W·ànp¼úP^Q¼úPÞhø@^-9¿ÎwyµVUö¼ºª_ ^­ Yg’W¿‚ΫݫñoÉ«µ¥ÓðêCÓêlÔÙyu ®ŽxÀ«ô%5xµŠ!œ*Wkñíëí¯Î¯n‹ùÛ—…àÕ‡l´mí ^ý :¯Ög_ƒW«ÚÃW½àÕ-¸Ûº¸Z:á®–ð5,pµ‚ç0~\­ ¯S«IRl½å¸ZÇS¥yÄÕ‡¼kXVç¡‚V+hKXÀê׆«u¤¾j¬ÎÓ­VЗԠջdÐËY(Ûiu ÞóðZýÚÒiõ®¥ù(ª Z­ ­«¿`µ ˆ|ý Xíµþ–°ZA_úV¿~Öaµ8˜¯ «ó«_A‡Õ: Ë´ «{¡T™zV¿¶tX­£µ°ZÁªŒ$¬Î˜Ãê[}% X½÷WxÀj}i XÝ‚³/«åƒk™ aõkK‡Õ-89¬Ö––JVkK[«Vïê"_i8aµÎÓñ`õ+ø«wsø[ÂjÚ¬ÎÓ¬ÎÓ­Î3­~VëôUWkKËóÂW«\в{òj-)&¯~W·Øi«?ðjÅl G^ý 9¥(‡å©Á«ó,Á«_AçÕy7Á«ó,Á«óâW+h©6yõ+è¼Zçi«<òj݃ªpýâÕyÕÀ«óªWç¯~ýìJ¿kþ¬óê½WM?%¬¬u¶,$°Ö–ë&\ÿÙ§ë×>X¿ŽÖ‰u^Rë×%5dÈ¡unhMÏï€Öß V®ü :µÎ+ jý :µ~Z·àVEÄA­i{Ô:o¨õÞ‡uÉ"@­óg?ÔúõÏN­õƒíÍ;Ä" Ö9°@­´e1©už¨õkK§ÖÚòÚ÷‘ÙƒZ몪ïË"¨õkŸN­_A§ÖúÙª©jwÒ©u¨uÜ*RkmXå“A­ãpH­ã†‘Z'Xéò+èØ:dgÄÖñ[¿‚Ž­_§âØúuõ[gð [çï[¿‚Ž­óv[+Xn­óº[+­Ù—!#¶Î‹ l­Ÿ-+ÀÖ¯ ckv8lýÚÒ±unéØú3lS±u<±ÄÖñô[ÇÌDlO,±uŽ`ë×–Ž­uše"Øúu .æËsYE¶Î[ lÁ¶Î½[+X5õ­u(eýØ:Øúõ³Ž­w½á‡KE`ëXÀÖyU­_§1_æE¸˜/sË›ù2¶tlýŠÍÈ—q €­uñÊ[ °u^Y`ëœC€­_?ëØú<˜/gù2n°õkË‹ù2ƒ7óåi;žî¼Á­óžì¯û±:Òßë,˜{­Fìo€ÝV…Û>–+°Wö¢O€Îð/€}ÏcÑñØ ÀF wì ‚`gŠkb–P\s‚Ï/vÜpluÔ)›— Ø’«›u öŽn*A°Ã‰“;¼-I°iz{Wuûð7 ‚Ý‚ëy H €Ýb×¼ûÀÖ†ËQ¬[Úúê¹=¶þ¹]ÔAƒ°w’E°[pŸöÂSØ{wª¬t[Á}XúÀ–C“AOðëk7xL3ز}:Mo €ý :À~`ïWÆØ»l>F»bì]+ûøá{ï…ÿ˸° ØÚp¶Ï Ø*t؆¯^ ì]nD&ÞÂÞ%––°Ua}̈°[p]œoaKOfôŸ[BaËÎÉ8v{‰Z;»@Øy*_ »mbÚ‚a룉}N ÖÏK­‰°õͤ>`«8iÈÞȯ¥^+aµÖZÖÕGðÀ×±!¤Ö-™«&¡´– Ô¡´V•±aD*­ÛÀ´TZë„)ô ´Ø6’J¥uœ#•ÖBÐN|¡´,]ªƒJk9œ8ñ…ÒúR‚YH‡Jë BiÝö9b¢ÒZ0Ðè•Ö-¸& ÒZË•JUCi­I¥CiÝún €Jk™mÙ‹ŒJë–ø•J…ÒZœ¡2´ZÂ8¥Öz,y<ý”ZËÃ÷óê ÎÚÐX+˜«ò…"k5²+‘-5Ö-6Í5KSc}¶gø–…ƺݯûª/ßÐXkª?«\„kÙU Ë­ÔXk*³Ì€먨¢Æ:·„ÆZ÷†EVj¬Õ¤Œ^Bc­v~Ãl-5ÖQFõ¡«^_⩱–/xùý„ÆZk¯µäë]c­Þ(e-ë£äR375Ömœ´ix }j¬[PEC~ uþ,4Öy@YõŸ"ë]åmõV¤ÈzW=Õ1Êø¨²ŽKJ•uËL&æTYçEp™õ®Jµz×BhݧÍ`cj Ðº/‚ª…Bëv–Ç\:s*­Ûø“úãIÀ¬ûêqR¦PZk:XëõN¥u\0kºV§ÒZÙñ|š Û„Ö¹¡#ë×ñ8²ÎÒ²¦·v k[b8¦ ­£âBky Ϧˆvd­-[Ê4¤Zô;u ë×>Y«r©Â["kmÙ’îRa;²ÖEÐÚûÑÓYëh§³¾™;²VìÚ†ØÈZȤ\DÖ-x›ŒÈšæä¬u–-•’‡Z¿ƒVŽÈ‹Gd­*Ðí,ù!•Öí€lÅOd­-•‘~–DÖÚÒ$kDÖQyJd­úÑ{+‘å²Îëd¿d¥µDÖÊÀÖ³°;õÖu©é¬_‡éÈšþí©´n‹Bì…Ò:†uÁ†ÒZ.ê—iÑ¡´nû4ŒdÓ•ÖíÊ><èo‰¬¾Y¿‚ެÕááZYçõ²VQnK•v²Žrg"ë×9²fˉ@Ö2±>káNd­-ÛvÅÐYo½…@}ôø ë<} k×¥X?uî Èúµ¥#kô–|?©‰¬UímJP"ëÌ@ÖùY¿ŽÖ‘5Û&²Ž"r"kJ›xŸÜ@Öù´;²Î²Ö¨3"HdCÈ:/uI*­Õªc6M4”Öqñ€¬sš Ò:OÅ‘u¼ƒ‰¬ó5d/ƒýóoí}ûÉÌYW»ô¹ý÷¿‡WKH8Ÿ¿"¸nl×áYÀ†Cˆr¦JTÂ!¤ó•©dÓpiÃæ®bsòêM¯²Ú…C鯎e,yµÀw}ª"¯Ö°™ýá9)BZv¯_6ïé§>üAÔ¡¤ìÃDæèídÎGˆ US^™œÐ„tøƒ }Kúƒ¬ê s”¤þ Q¤M0£ ?È¢¢ásT[Òd‘ÃQgøƒ„‹ýAÂŒ‚þ Q¨ ´© ´©I劺é‚&6黤?ˆš>9X†?Hî!aÇ!0º AˆÏ9¼i_!Ö8+ B¦öˆ^û˜4‰ò÷AHìŒ!aqAƒ¨ÒOƒC¤AˆW†Ó o’Y¿-@࣠B븤ֹeøƒ  ŸÔ:†røƒŒ_ù[RëxÔÓ„îáÒ®ù9¤[¤ÖÖcèírNËQp9ÜA¬OR2ëEÓìö}Áu^t¡uH¸ƒ´ã®¯éb] Þî p€x˜uºq„;í/ÒÆá7‚´;™µºIÃȬÃsƒÜ:® Ùuž'Øu-àuLf„×áxMÓ°k¾ýAüHy€]Ç¡¦?n&Ù5ž’k´€z»ƒðG=Âè"ÝA0H®ãV¦;Œ:¹NkôÓz»ƒÌ^VBr­ï’s¡Àt¡K ÈuŒ7’ë8½´¡‘Èuø<¤?Èê…Eéâ‚Ù´Á3þ ¸²áBÓpÁ¸I‡\´áÏ\ÇÝO‹÷dI‡<ªäÖig!éÂã¸ÆcElSÀƒ­'Íþ_ÿÖ ·—"¥5ÈâEUi ²Îµî bçÖ ¿ª±Î{D`MÇ„Öq¬éC^M‹'âjε ÕQ´¶ ~UƒVóA ZAÐê—órc¯'«¦ÁÏ˄Ǯ þè«æ½ V+¨:¨·ã!Õt“ RÇAR?PÍÙ"@5ŸÎÕ¹ËÍ%}†zY‚øcó²A5_{/KT_– ¸ÞÕoÛ·ÁTgpa«Iîqeb W‚jŽãt™ 9u\rê<ž“ 'ÝÉ.8uÜirêx°È©ùNœ:~œ:Ž“œ:†+9un NsU:‚Àj†œš&‡Á©ý5”šILPjÎ „ÔÌ R§QÆ„´øX¾Ã§ã1&ŸŽ‘OÛ®tËóef éüÅnyÖ¢7Ñ4ëMs hš Æ@Óv7Sa\JRi¢‹/*m7ûÇì?f“Qÿ>Ëjµýü¬Áß@º=ÊÞ-@Z­v­Ût{D6j¤ H·d~*¢–ÕškêúHoýÐG–@Z=ôÚ }X5p4u>£©fzáh×rG«á1q4?>ŽF—íÄÑüÈ8ºå,íE=–‹ÄÑIq£Ñ§:qtGâèXÜGl#ŽÎàÉD):t>@šé-yt,]Á£Ó¾<: {È£Ùš6xt,‘G³mðh4R ÍÆš£Ùš7ptXEG³s,qôk—7za¶1ôy®£ZÛœñ¹`ѽ´)Á¢Ù›2X4»y‹Ž>ƒdÑ Â«ÄYt¬£h*>žE:P´‚m¢’] èhŠHM#?(z• ®ÅŸÇ(:(:š>EGcG¢èè=GýúYgÑkOöÖÒk;‹Vï¹m7Ší,Z Íaâ:°èhÝGýºB΢µÏ»¦]°hŬXttõ#‹Î³‹~E«Qúa–ý Ñº®±^%>4) h´.Þl~þ ÑÚçeRIÐhýl/‚Û¿C£óž€FëaR[¤:‚“jÍ…Eu\5ZTKnšaZTs$‡Eu_ —2-ªó aQ÷aQû„Eu\UZTÇU…Eu tXTÇÒ¢:N„Õ̇¢Z…V—B‹jæC¤Ó9^A§_[¢šÙ ét>zô¨fÕqžô¨fJðÿ‰{—]K’$»r_q‡ä œö~€à¤€bDÝ@ÇŒ rd%ÊHÔ þ-[ÏQ•½Ä®gFx0oEd¸‹Û±—šªèÒ­[^ÕH RO?©5¬¾ëw}VWq7yMqùˆû2™DÔ·¬îÆŒ¡@jÚ“HM{Ò¢š¦Íh©«Hç¨RW‘Š—·ü|¸|¸[&]>èjY]>àèH—„Ë^qùˆ#·-“-º|haД.Å(’.Å(’.õH¸|k\Ú|¨C2ÄK›b1I›zäÛæ£œ6êáL¿A›r6Ú|ÔàBï=aóçô:<ôù(¦Žôù¨GîôÞƒW$}>ê‘'½÷x$|>ꑼ÷x û|»aú|”éóQ\轇†EŸbÈLŸÜè½Nú|Ô«-VÕ¼Ú“Þ{pÚ¬VÕ<ò¢÷ì^Õtœ®^Õðpžé½‡¬^ÕüÙâUÍ ¼>êÏnôÞƒ—(Í>žAxïñgzïñÈ“Þ{<ò¢÷ƒôªÆÓƒWu9Ž^Õ5X¼ªñðèU]\é½Ç`ñªæÅnôÞã‘;½÷<轇'[½ª¼è½WzU×#ï¿¦WuùAzU×#zïñÈ…Þ{DÒkÓMA¯ãÈÛ ‰A¯Éô§tw½V“s·.ÐëÚA¯£—ÙcŒMcj§×‡àôzǽrÿ»ÓëeÏBØ.ôº¼ Àëú¯ãGW7d¼Ö|ÐÝ|¯Õ•š{(áu£×Iu-àµÎ¹›Ððzi<Å$Ò¯}¦»¼nS[Sl^/¢cffx?{ºï˜àõ¢BÞsZ}^GpïÉÀu¶,{¸^T±Å6Æ\/*9¾f/pCåí¶a׺˜ëÊà:‚«›\/ªžažb׋º&ËX®ÅÌø“àZ÷™oŽ­ë‡áØ:~ò¾]oìØZ#¾»¼[‹†XtbkU@ÖzUÛ½¦lÚ‘u}l@ÖˆðDÖz¦û¶¯Ⱥ¶ k÷>²Vsô}“@Öjî`õFÖñ×罜iÓìÈZÁh&ã ²®OÈZ_‹™âY×d­§:ícwˆõãzœX³EW×ï¼úñ`œW×7^­K¶1ÐHŽuXŒ6é^a´ºt¤Õåå“Vë|1D§ˆÓêÇ)Vëg#9[ÎA«ÍAæ¬6Z_ãy 3XuyÇdÕ/úx¥Ÿˆ³j½Çh•éší¬zÑd*«ãU×#_¬Z_E|†Ãù¬º>5°êú¼ÁªãgcJ5Rc²êÒ/’Uë‚Ìh¬ºtÔdÕúÙi]‡ñXuùäȪ\íbUk¨Ýs·Qµ&Cßó¨Ö#ˆîdˆ/U׫ªÖf˜HTXÍŠ’¨ZS¾3'DÕå{$ª^Ôá§g Qué9‰ªuéfHT½´^oúÜ£úq¤£êÞnTÁÓ-Þ¨ZÏÆ¬A‰ªËJT½´ÚjqÙŸ(­ëC©Žàtnk²_'ÕëqR­'þ-_A'Õõ«©Ö£qŸ êf|æþ¡#Íp’¤º¾+'ÕŠÅ]Ž‡ç¤º6êz9 Õ ‰^Hª—6é[Ò.ÚIuí{@ªË× P½hÄHÿ:‚êÇÅ:¨ŽàjŽaÕ\Ü ºªÕ”„H TsTù1‡ê?ý4}üóOñZ$Þ›cÛþóϯÿŒ~Iÿ©ðúÏÿýÓ¿üW=Ò(º—ï)º?½˜âð¿ñ¤ºÁ_ü¶ÿî'•ÕÆÞovüiÍ,÷ ®!òô¼„÷¾ô 6¥ãÆŸ¾ö)¼žý»ô?ÍZw‰kØ[ò5o¢_Âë_{ïgß¿ƒ÷Ÿ>»†×¯þé¯\Ë)dtšË.ߪWÆ7ñ]]Ö?þüöÇÿ‡9þo‹Kÿøù—ÿþ¢ïþÿããçÿöÓ?ýükÏt¶Å‘bóëÏ´ýÀ™î]Û—bl?×ùןé¨gú’¾3»ì¿{³y÷Ò_Ûwú®–1}Ùpó˜7½2ˆÇUX싯cŽGõj<öEWDÕ‹ÉÛ'—ãÁ/nž³¤¯Ýtv¶^Å—¶”ö}²¥àj¾¶¥´«)-…—óYKùý´ ®òñcÕÐs´©ÙºërÝôô±¿zéß>à,“|Üb7Ç_?Ïr÷³,¿ï4ëÙÖâYÇQß?Ëö;Ïï_Ó^-®å4G=Íß:E[Q¿4+ýX×]¯åXbzø¸ýO;ÃÿüÓÿúå—?üû_þðÇ?þñ¿¹þõß~×Púñë6•F¦² ¹5ôèö›Ž~׊`.˜ÅäO{˜³‚õBŸÓ­Ó޾^‰w•ˆ£—@kµç´ŠRE6v[ÞX›ˆcOU·’])â8#}ÉIï*Ç 7>](VI9öïÈ*|ÕÞÙ^«ôÑXÆ&ÎURŽg4ØmÌÐWI9âæßÙÜävçuÆ2-éà:k-dë³ðI¹×›*”|sÖæØ¸¶÷2å:ïÑÃîoÀ3Ÿ¨¿Î‡qóÎfßµÎZŠÙß´EJ’¸–®(ˆ›µ¥ÝòÔ]XºãÙ5ð1©ô9D0«d_è;³¸«tÑz^#C#ºv©b ‹ESòê«øŠfÓ‰·úûä­ÓiÔ*@²nï–ÜTM)Z—-þ~Kew¡-§µäi‚}áªeÐA"¥²2y”‚‘E¿Zò~ŸÑ!lÃ0r] ¥ì÷›h&ßâf_-y¿wìѧ‹c¾wV’X%÷8÷WKÞïõÛnE®üWG®Vo}]£ÿˆ¦ÓZò~Oñ)åF€h¸¹® #AkAÍ_÷WKŽ7¦‰«´O¶àbÍõeÜójË1犯%ß   Þæ µÊ•-:úÖ–÷kÒ̪o¿“Ãý’¹[Úu­2Rì/BG:Ï_eOùFÒq« ±Š]Ñ‚ZkÞ¯ÒÄU¶·©)è» ×æà»½Z³‚¾¾OÀO•9¿Þ+N«Ä{û»S޾èÛ«˜ÎvõàXŒUp6{ºUE´Žw§¼Ÿ*Èg\ãkú{“h¤ë¨‚×±ecFûˆ;2‘vÎ9dqŠoÛÙ{eÓLIW½ú÷⊞϶¤VzÕ]_½WÖ¯¬)ÌRp,Ú=^u|‰ñ {c–œj¢Î7“TXSCŒô½)G#3C¿è×R] >é2—ïytË¥“¼ö¡Xy<ï¾F¹`8›ÕW\m®ò(­1{–KʺÑ/Ç×2e SÇêaë@=Ý‹aúÜF¿¬åæ\Ñ;ȵiuÚ‡¥­wŒmûè–ewžÚܘɥÜANsª/¼ë–oS ¯·Éh =Ý-Mï—ëmFî4–ŒsáBŒµr¬ìMY%sõQÁ±¨Ù.hÍ*H«’¹{ôËñ-M©Áˆ$7ËõR¢]åÔçÖ–†=›ò”ÅÒ£ÇJ Fk]fgÐmÞ/+8d= îÓ9rš-&xÒ#µ‘Ǥýqꔊ©ó˜Í;æøß–Õúåh £}Ä äújév»ÞÙ²²+_ÔÇëö ÎË1¬°£ÑçônËJ’öÝîþJ9ˆ‚“J…¯ýF¢'Ùòã¡Þ)3jžyCmâý§uË^Lv›çT®µ>{_FµŸ-¦vËÈ–[Ÿ˜>q[LíÆ‚¯šÇ}äʬ‚×È–O‰ Óæ*n!…Ï«mûóƼÅänÈT<ÍÎkÓdodËúÚ·TL)8„OÏ«½rÚ¢à5¿ò¨mέ&íïÌ¿£?Ð<¸7dU˜Ï댙ÝX|Æ6}ÃèŽUG5WôûúL7¹ê¾Ik—i2Èm1 ¹‚ÑQæ#yf¦ÉñÔל(ØÕWŠÍ[Vؤ(Ï”±,ccǶª¨Bïùýg®YhêÁŒF¢|hš³õº¥›#O®±%õi«QuÜóu•1mÚ¥N|ýýš’Çöƒû=ÊÙD‹—Ž­/qDÐôÕqÁ©¢UpIùÍÓºidÉ¥µJê1²ä£ñ±!ÎÚZáÞGP¥©û˜»Iê1²dµÖ,’´Ié‘Irôãgú³mRzd’<Ç»Y¶‚w&Éч,9¬ìsJ0ÛX•í-ætûÈ‘u#wn'Û´ÛoäÈ‡ìœæ!‹çÛœIr½Ø-¥0-í¸³Þ‘–|ÏL’WlÜbR·Ö{N²í*f0:cuR)³t2õ{-ƒºS ¨àI²ÞlîRÛöÜ3¥SƳ›:SÝd˜I²Â]…É5ˆøÍÝÌ£G¾9ÀÅ˯¶¢ÉW\l—ÐýØÒ^5þom-¤Zäæ=8>FWÈöIªI ïeLZœTÇ ß6 ¨ŽXvRkÑÅÔ›„ÔÇL² H­ù´U×%¤Ž ëá© Â5BjQ|+ÚIHA«BF]œ!ɨ… æÜ÷FF­ÖgÕÙH©ËFzRj9CZªGJ-gH§% ÔÅ6’”º<‚S¿rêâBNÁH0²í€S+éî€TS Ô,¹1˜œzŽÌ‘Œœz×]ã;§nAmHÛ8õ.Ì2öÛSksБ+0µ‚Çõ~Ø ¶¥ŸDލ[M¬èÉ{¢D­à–%f@¨³êeÔŠEÞ˜ó*Ô-è4ÈuûU‡a¨dr@­àd%o¨[ÐêóP·íèNçP×½êÔ Æ„ˆ‰¨[pδÌõg±¨ÛbÓÔ ‰9 nÁ¸§±8öÖê©´Ù•C'õgAÛR_ƒ†¨\§ÜƒèŒZ1a QœQ· ÕÊ£nA_mqFý Âðñ JtF­`4ÃГû#iªŸeÀ¨[Ðjð€Q/ùXBQgÔÏ ÝvÒB‡Ô­}XU(@êÖ>®Ü¤ HýhY©-Ë!õ£ñì0l*ÍÎ õg1³S^²ä’¬CêgpvÃŒƒDð ©k¡>@êÖ&Á9¤~7÷ø9H. R?ZºCêÖì|9Ö!õ£Á:¤~|©ÛÏúêCêôU‡ÔïÀ!õóHƒÔ­© /î:T>§ÔÏŸÝÜÍJ“òÜ>Jý n–V>§ÔÏ#O÷ûˆþØGLPêçÏÞfV#¹dÖê¥nÁ9§™ ÔÏ Qj± ñÒ0ëo¿G©<¬(õóH£Ôû3Jýx0N©[pËMΠÔí”Vß ”úy$ìKkÐ(u»M+þJý ¦V0f©pLýYp`ê´Z^ÀÔÏ# S· •ë¦~ S·`V«¥>Úfr[qJÝ‚f‡JÝ‚£`™3êú›`ÔÏ 1jm툺HûBÔí§¬Ž¨|£B‡ÓÏcv·©Ú±Ôætº¶DÐéúe€N×ö:ý<ÒèôóÈÛ}KÐéôãgN?4:]{Ðé\“\9œ~¸¹![Ûº•„Êátí:§k8];Àéö ®k¬ N?Öý˨ë÷B]¿qPê瑯ýüF©ù¥Q×/ˆú³`ÚY–¯ÉõóÀÓR둆¨ŸAØï>ƒ÷m1‚ލk¿à„úñ-;¡~þªêLJ¾Â_S›á²Ž!µu+Φý¯Jû_Ÿî7&à¶ ! x´‚ó½ò8Çqm'sÝþÚ8@Ñ“9‹ö|QèÇ…;…önü¹âŒ^³/´8~þ`èçµîÒWÕô3x¹ýãÎ…;'ÐØ ´‚“U;~œÓt f)Kè篮î‡\4Ý4W2ègÐô3x¸'ey% _•èú­:~èE'JÌùó3è[A#ÐÏ`íN~lgèo¶0|èC»Á…Ÿ$z£ê—$zK¼üçJ¢Ûì±zXP´lÒPŽ8zg™ âè]Lo3™6*·P]'„ÀÑ% Ž®Sàè}ÅR5y´Œ•\EC½‘2xôa q´Öyöœ1GÏßôz’ GkÕvÏ)q´•&¨ŽŽ>iß×Q> 8:bÇäÜqô¦å¬;G NõôÏGo'ÖeŽÞ.,G7Pv|G6­àd*e§ÑÛuUÐhõó¶¶L½©ê¹a^àh ‰·#iqÄ%Ë€IÇ tN6ß’Öîø-{h2éM#†Éó¥åŸ¶f5ßAÒt±Ï%^%]í@úq¤iÍ5öH±:Äs ±ÓuáÒ«¼[çkH㤛S¦kŸHëȾ„½êѧlƒ0ZO"d‡ÑšN])ˆ"ŒÖDÌ+£#ãìp²"Œ^›mOŽY€Ñzæ/;ŒÖ9}i0Z=)ÄßF#Œ~<‡Ñqµ”Ø£ëq±É—V£ãW×Bª­¤¬u[{Œ–—ª/<¼`t±]&ŒV09#P´®t×Q´Nå Ž¢K½O¢èrBÑzœ¾Á`üb][¯ßK-ˆ`s\‚hý¬Ão€èú&¢K^‚èR ZV×>DÇ9' ì½ÔC5q*AtqÐ&‰^õ/¸;‰.EŒI¢å%uÞÙ_9‹.ÀÉ¢õ«¦©'‹Ö3©>aôªñýLeW‡Ñz“>ÿŒÖA[z£I++Ô•ãÎGw $­'>ç^5"i‰pÁ¤KEv2éÇ­8“.•“ɤõaÆ Þ3e2ésë&˜t©ÕL&]6(“I‹m^ËÈwȤ\ÓÕ­3éÇ_;”.5œ‰¥5OmGÒµm8’®ïHZe¼ã™õ}äDÒ:Ýš™>‘´ž§mq!’.þüDÒµiIË+ß¶¸I¯Ú·}e™té–ɤk£“.e¼É¤K-s’éRM€|Zç´ý&¤Ôõ›¥^5hl¹¤V$祮¯¬º~`ÕõU×VÞ‰õãtN¬uÛ5|íÉ­ebè[UÀ­ÕNyìÏOn]Brë8ð0 En]Û$¸uý&Á­kܺ^,¸umÍàÖõ-\×6 pGF‡Ú! ®õ"çto%¸~œÒÁu)VW®b­º[Ú8“X ù¹åˆµÊaLéýY\>¢¥-Y;žÄZ r7G ë6ÐÍÜÈZÅ|I­åç”–V…Zó µf!ÊB­ãçÏ9wU;µ.&©µFó~,Ô:>ê)×XI­UÑ·c“Zk.—zšB­Ùej­ZE·¹j€Z×#A­Y2²PëYü.7Þ“ZOª ™K ¤ÖÈy ´æçS 5Gíùh­’¯g — ­5{8ÍêÄ¡ui„Öå@Bk~wZ³³кž К¥? ´fvZÀ5³e‚k&§\s¤+àš¶¡\×#®9ÚpÍq¢€kôÙ…[ÇÓϪA[——Al]ž:±5’B­Ëû'µ. ©u½PëòxH­KwGjÍ‘‰ÔšC~¡Öœ3j=ip6-2¨u¹ØA­1—.к<7Bëò‘Z—ÁÐZ/¬Y¸kÎ1 °®GXó3?Ö:Õù1«Ù”Ö¿É—zô}óÏ'¬^éPGX}ë¶Ò°°Z‹¹lQ`µºA›óTb½™#Z!ÖÅ@ĺ”y&±~0W'ÖªíŠVúR¯0.¡-uäÜKænÖÝãë4À:Ò ÊºXÏMŸDÆÔ1¥w½ ©—¿¢±V‘÷±±žš¹OZ½Ò˜ºM¾ssd±¦>ÐaM­"æ¶c¹ZSÏVÓºXSÇ{÷6¬©e2s»bMiƒ•].ÖÔ•À ÖúYóa¢5µÖEM9Ckêx«V™“ÄZ5eæÔTYOªÏ›‘õ#èÈzjD#!õÔ¼ÛÍ`Ú‘u÷å{Èzj"Ҵæ5µjeSßRB®¦LvcjÕét«tSÇÆ÷u$"vb-Óo÷Ï…1õ-Ct³h²~<7¦VmÐ8p¸!;²ÖüÑ7xÀ˜ºº—YëÙ¹¦ÆÔ·FòüBÞ¾ÔõoXë=Ö—Á˜únB×ñÙÀ—únÈÂø°S×ãÜ—ºM¹Må_ê[¨#}f@«õ–ŽsˆH«§Ù X­'ù°»R·®¦‘¬~é°zšÇJ(8u“–Q›œzÒ$Â8u}µàÔõöáH­æmÖŸ$ÕõƒTë ?lí¤ZàÁG'Õ“r ó@©.¯ ZÏÇmתåêe6<@u}4ÕºA3%¨Ö·ú‡Õz‡nóí ºö}ÕTOm‡A¢H€jñŸõ{ Zñn£*@µjÿZ‰L‚êòÄÁ©#öV$~¢®ÖmNÉ ©u—î%NcµŠ®äÔzꇣhçÔ“º¬L[ɩծ&ÛeN­þÖÅþàÔjX1´Ž}zàÔºMOSOmj˜ã58µ~5OŒ #˜œ:‚Ñùff N­/ÖÌöÞ~ÔêpÜ„ ~ÔêÝ» ~Ô#ÝZÕ¦}…sê:ăS«álɵȩՒ½~8µÛ=oc4§žTÕhpêzN€j} V“” Z4ÛJ(@uµr6(?@u}ÉÕº ,ÏL]Ç`êúS×1˜ºö‚ÀÔú`ã“IÕ¶cj}=w.é;¥.o„nÔ·f²fË7je9æRG7êúN™Á©õ©ºm(ܨã ßµBN­¹Œ£SGptaHõœS—„‹œº|äÔz·Yù‚S—~…vÔº“iMåì¨ãÕD¢v|Ê©[¾ou(ÌaG}·"ÉE`GÍDˆnÔj ›¹£ÂZ§4ð3j]«m‡¡µ^ò= ¡¼¨KázQǯúVzQß”_ô¢Ö‘f©?ê»í]šS`ì~Ô· mz¸ÑºÌhJ­;q«†·)µ©ÙÒ”Zïbï"ZR—¼‹–ÔzÁ¶© –ÔõÂ’º>QXR+x›É/,©uïʦõɨ? ¦%õÝv ¥°Œºäž´¤®? Kêú`aI]o–Ôõ!À’Z3šÛÉaI­6gþu´¤.#,-©ëƒ%uOó²¤%µšëä¾ÜfI]¾e8RëáGÚÃ’ºÌMèI­9–í§žÔµùÓZƒEzŠÒ’š“6:R—‰7,©ËÔš–ÔÝ’ºv+°¤VgåõàI][<©kŸOjÓv¼Ñ”ZGš !M©o•*sßi7¥®Í ¦Ôõ%Uk¬·¦DÕLÎIª§¶U9»2xR×ÏžÔ§çžÔ#Í“ºŽgð¤Ö¯š{*]©ëÃ+µ‚fäøªA?ËÊ?Šª§CÕq_‰m­ã‰ªO˜N¬ÃŠÅaÃQuuª®†´)æ‘´)¶“ÕdŸÎÍU;Ø`Ò¤øÝÒ„ž¿tMøC3S &õ±4) ä3/ó;¦H±4¦ȉ+Ť8%Ó–ºþ¬ÛR×_…-u䣮ò¦-u –ºüì°¥~þ½ÙRWGgØR—VL[ê“Õ¤Š-õå;À«/5ŠŸÑ—Z£©zS_(^S¼©c\ÞMNIoêè]-=/æÔÊŽ²ÒFq§¶©?WF-qG t§¾TúÊ;àN]Ì{èN} ?¥L“îÔõH¸S_¬òFwêK†àIéN­ªsF)èN­Îß64Àú’]|V>¢;uùUºSߪ(dXîÔ*»ví_ÃZÅÆR,TÜ©ïlf.œºO[5¢;uty¾Øwêƒ;u1®§;u1®§;õ2MźáN-m°Ñ/ºS× Ü©‹?Ý©kîÔ5wê[Pö¼t§®A¸Sß,¯Iwê„;uAÐáN]ƒp§VC€p§~ÆÌºéNÁ+‹œwêìîÔ¥`Ý©ŸAs§®A¸SkŽl.*p§®1¸S× Ü©o%–YÍŽîÔÏ ¹S× Ü©oQ¥;u ºáNý š;µ.Àa7Ü©kîÔ5wê„!ˆj:¯£;u ºáN]ƒîN-M¦ÔÐú4wê„;u ¾Ü©ëßÂZËòV›îÔÏ ¹S× »S×Ü©kîÔ¥Œ Ý©ŸAs§®A¸S— Ý©kîÔ5wêgÐÜ©uiáDsêƒ7u‰ÁšºÄàLýˆÝ^Æ1Sßê-­Ã˜ºÝšš±bM´-kêÛ ªgj¹IgêøûÓ¤Tt¦®'ƒ?µJ¯:W‡?õ3˜þÔ5ê„?u ŸºáOý š?õýíÚ\Å ê„?uy@ô§Ö9®í3\ýxìô§®¿ jÊ\Òžº{êr Ü©ëåÀºáN]ƒîNÀ¬áŠ;uy®t§~oÏ”|»S׿†=u  ºaPý Þ–)ãæiP]„Au  ºaPý Þž)#Hƒj¶GT—ü©K öÔ%wêGìò$18T—lªK ^Õ%«êGìòü17ªfˆnÕ%³êƒWõ#vŸ¿jôëô¬fŽÕ ¹_uÜž{^Õ Á©š!øT#D—êº=öª‚A5C°§fîÔ5t{öë!§Ð‡ŠNšZútÁô#è‚éO‚)˜~Ýšº 𴦮AXS× ¬©kðqªÃпӛړï'Œ¦{ѳ†…,œÝtŒl×6Ì”ŠnZ–{f]Mg£uóñ›RÑéã¦æNÕ-túˆ祷èôQÜñèôA³±âô!{ÙÔiFWÃjô׳^ð貯­}HhaNñ –Û Màôqjv”z«êôùÆòi©Dù`ÜæY>v7‚¡Ó‡ìóÍ}…Nâhit—Nñ,Îô=+N2ÛJK¤âô±(3Ìâ|túPYÏ#¥Qpúˆ7î?:}Dúä&šâô1³'­>–x¨æ@«„ÕÇŒtú˜YÙ“Nå@'Ò‹Iî!!‘^&²ée”€DZ©ªã?éO‚I¤g¡/s­‘VCrŠ"­ÔÙtA¤ç»àj#ÒÑ®/[ø'‘Ž_«·ÝBÉt±ú"?‹Å—7‘ž¬O›ui}®Õ‘žÛ$÷þÜêcn !)«u"­¹ j:‘ž5CÜͰÉô,3(7Ëv"­nÆëJH?Žt"Ý:¨a¿ ]¯JYCçÑóɪàÑÚ±âåÎÀ£ç“ šàÑq$ŒyÁ£uµ^™<úq¤óèù,…GÏJŒGëV²4pt}#ÀѺ¯Í=kZ–f§ÄÑóÂ%Gë ¯Mè4Z”¹@’F?~Ði´fÝæ’ =«~ží‚ž[I—¤ø Ñ#FϪdåÓ@£çVì;FóŽjÛ¤ÑÚìd.¤ÑÞ&œå“FëgÍ®‘4zn¼3—û@£A§Ñ šÑ#i´‚ÚËß³Ðh]­y’F+èå8@£#Š Êþ4zÖDâÈ0N£k 4ºÞ ht<½ø|sÑ4ZÛÓÌ÷M£õªÜa 4:‚«Ù*’FÏ­r—¹>;®ÍÃitüêæ–Æ ÑS:®ï4ZwbΚ¤Ñz¦^4:‚^ž4º¶eÐh6sQ%Ö­ø6LÐè¹UrIÃ|àè¹Õ€Éô<º>>é8r;I1¸éF§œ»å€¤ks“ÖÕFš•®%Τ#¨IóÈ4IëNbpHÏhgÒºØè];'‘ÖqSVÆ “ÖÔ"^X_}LzÖ¾×éHçfgÒºù)‹QIëþ"éýÜ :ø¾CÙ&ý¸gÒ Æ81Tw`Ò;t&­‹5E&ôã@GÒå@iMºŽ{è¤gõ³gŠ5¤4?0iýê|Ú)H냅Ǵi5d7’ŽsÆ8™7 i½÷‚“~œÓ™ôãjIëgÝa×™t=%˜´îdÊY<™tù^ɤÕõ¸Kê›I×+“.ý ™téèȤËe0i=™{;³v£3é²{™LZ÷àÃ`ÒjsP'Öù¶uí…F3­^Êl£ëË®Ÿ8p´~6Ùq´ö_gå,âhõ`fšJÍî 8ÚŸ Xtéh £KÿM]¾|§ÑµÇŽ®? ]öžH?~öM¤Ëx@$ݰ™*J+h6³ÄÒ|œŽ¥Õ`º¾w iÝBz8í/ hº6BÀi]Ìbn€§ëp@=·|HÛA¨kÌu}j€ÔâwnùL=‹išw @µàÞä’hÕ³6^º'ˆƒjÙ*%¨Žàiž•Õ³êW¥]@µbfqFP­Sº¿@u\ìíî&Õhܦèxô2?fEý§Ÿ¦þ)2ª¸º¸ˆø«CÇÑ΢ÿŒéšþSÿàõŸÿû§ù$.©ç„ÙË÷„ÙŸ^ÌqøßxRÝà/~Û_qÒ÷¾þsÍöðgWÂ>Îÿþ×^A½ãMíýç+oÿ©4T›áöë‡~~óÓïçýák¯ ·óÞä?;ûë÷þôW®BFw1--ŸøPï©EÅÈÛ£Œ úǟ߆÷ÿ0üÃ×üñó/ÿý?Lÿñã|üüß~ú§ŸåitabBs v÷ò7Îgé'Šëúí§Š fÜÒÓµõüõ§Úꩾ¬ãøšÞª|­ÿ?t‘»—èýª.ÂSëÇU íþ’Kt÷­¤¨Wã±/ºšY¾r‘,Ÿ]Ž¿¸yÚ@ò5-å8÷ÚRx_ÚRÚ÷É–‚«ùÚ–ò]ØRx9Ÿµ”_1ôl×ÞVœbJ°F?7µœwÌ[âzýôô±ÿàÐSN3ÅdP«À1ÏÓÜý4Ëï<ÏzªDÆ;ûþi¶ß{!½˜rH±þ•óõ<ëÚí~Çëˆ æªÅ>•|Ž)Òã^ô?í ÿóOÿë—_þðïùÃÿøÇ?üòoù·ý·ß5œ~üºm eTYµäòÚ<Öú³Ó6s®€þ»¦ÝZp±uã}™Ü=p‚pq_ºLg¼´=ˆijYö«YüÛÏî©WÙ/íWËýšñ¥$e?Oì.Ý—+U'ûq[·täÚ¥_é¤|?´˜‘5"w)†¥H< 7Ú[¹¾A]»K­8Þ®R>Ãdo[¿‡çë.O§×1¿×ªÝØŸ¼KÔ õ¾Ïºô±·«Æo‡Ðñ²¾­Wª ömNQÆ.I3úÛUÊz¸qlªÜ²ûÚhYfI·ËÛ6Íuöí0ß9Õh_³j¥þ<$û&½Äغ„!‚Ø›È ·é,]ç°«ðµù`鯆”aWMhÛØ©ç%›7Õ‘¾!¼ÙU¦v8U¨vµmÄ× vmQ˜ƒC¼w³d•»²NPüe*œÏ¬êÓF’a­ïÔ©¶ZËPèz–e »Vއ÷ƒÎiæj„c_?kkûqåJý®BSÇ=¤#û9åb¼àb¾ÐñªsÅ}—sºYŠï1"Eõ]ŒÔ˜â~î¹z®Ÿu'ÌýcÍTçô%3YqŽeQ}1±Ù”Ž^YÌÞð ¬AÇâ¦.È×/š!Âè•å·|¦)¥ˆ`=é‚ ®¾ ðhÊ[Õ]#ÕÖWF¯¬:ˆæñÜ„/£WŽ÷Ϲ L*¦±n¨±Û7"Õ 7–ÛC˜Ú½b³ù'Å£5…Œ¹tw’d¦a‘ÖXÒ仹ˆ>YUÒ¶Õ¢}²Îx§^+qùúk[‰iõ¡G—ü8äL UƒÈzå¹n³Ò7¼ç®e‰nÓå§¶8-qd—¬³,È!÷äôêÑ7l@ºÕÌ.Y*ÎÜr-Èw¶[ÓeØœ¦:ZE²•FÕ%¼¼Ovî_˜™ã”ûØ7ó¿Q®a®ñRB§ÏžÐ”»ÅåKf6¥—kÙ³O> ×==iTvÒ ðQùì“Ù?*{Lg)­DLCÍ›+i÷–-þ™v¯-<Òî6-y÷^´Û'lL»5Q6ö’voÐÀ•¼ûT“ëÛùKÞ]òõ’vcžÄ´[:¶yÝ•´û”ðt<Òn<Ï»uJ3–¼›¨`äÝ'”Ã%ñ.Y0ïb´gâíï¢&Þ Jâ­rÍwÁÄ[2¡´S+‰wÛ!5HPI¼'øm•Ä;ÆÝÜÓRò’wsÒ´»d¹L»wÓ;•¤[{…ÒI÷6!c`Ò­<ÒL±JÒ- Y0?’îcT×+)wŒ@Çú”û1@Ê=#+)w<=GA¦Ü’‘e} ’rl¿rnÉ|ÆvÉ­.ÿ÷ž>’ï+ºÊvÍä;†Œ#§¨%ùÞ´Ÿc|L¾õ’îÜ1Áä[9½§–H¾µmÅöž2ù.îiL¾…p­`“ï½ Ic÷“o}SVñÉ÷&»åÜȆä[ÕbjC$ßê¨6Û仕½wÆåÉ·ú²9r·¤î·ïYœ`d…ä[3 |c’áÉ·ºùHï’ïyòÝö—îéo‹ì»úË#ûÖèº->¸} ewž}×=b̾¯û{®Ç*8"o&ß³±FËÃå9~íFx‘‡G–wX3æášaYƒy¸2(•–éTÙóp=6L=‡kGå>ÌĵãGz&~)MOûcfâ÷ì5예kž”UŸ:6&âñ2&«¿ÁLüÚ˜O#nWãDæÓž‰Ÿ ¿Jdâç{RfâõœÈÄ ®ÊÌĵÜÕ;¼G&~*ÁÏf€DüTkµ™Šgâ“&ãd"~´Šã¡¿òð£³䆉ø¹Nx"®Ia¤ ÓöLÃVa$|ÈÂõæãî ‹Ì£R{¾«ºJ:2 ßµ°8]»Ï,\\gO:³ðCȹ³pMQmAYxâ)S¦ßºMÛÉô»¦ßBBéÎü{S ÅÜ<Ïü{×&Ù„7Ì¿•–¬Yžùwýf<ÿÞd"œs7¤ßÚ¸p¤}WK¿…sÒññGÓït Ÿãßÿ¦Ü{Ö¶ÖWnTsoA*Ks(>A-jfÞ³æ‹é6X´'ñðŽÜ7ÌÌ[ÉJÌûQDæ]ÊQ{rLJ¾XÒ í‰6˜ú8´'—úR{Á˜øšjű÷ÙØ—)H{Gzâ[މ½ÕH1¼ì}*=ÙÆV'bï¶¥ÞÒ`ïcGeGboYM›±÷1¡þ ¹·–1liØ{oÅ Æb.±÷¾¢@4°wt*» ,(=‘aòn¸ü•wÇ¿ALéÉ…¢§äߪ`›üiwÜ©ö”œŽ´[wãB)âï®.Ey¯û2䎴;ŽìPŸì[d+×Ô ûžáÎŒ[jÍ©¨NHE üžá¾]T'7Ü, ý.€›ªy—ÏcT¡ßO ý>Ó‹ä‘sÇÏÞI¿íØÎ,úý¾ rï;ʋޤm¾Ì¼‚o> ø>àT‚“h†Ã ùö ê$ß„:…|ËýcÍ|ãA¾§ô´/ä[›4Ǭ €om‚¶Ô¯‚ïÅ ›?ÀwLm†ÏGß%ˆ”[æFw¶¶¾µq-­û øž$h³­¾Uíñæ\|ká; ø.È”à»^äÏVðíî)…|WF ô­ÌÇò+¢omùÍyÉw9 çÖ£µEF’o5˜sæÛr‹‰^Aê­‚l顯„»¾CPï8ðÈj…zWѨ÷ &ܺQë¤ õÖú|–†bæ½ÈóéÊdÞr_óä™·<Ô¬ørÁÞü®H½Ë3@â×ãE¢™x/ª—êÚ‡ÚÄwx3ó^´4kEm²cÏî|G³œ‡ª†à»m1ÏùRo¹˜Ùš*So½1ŸxWô=ç`ý›Ä/µ2oe|KZ$1õ^”–¤„´¥Þz,ñ‹o…Öß½Ný#ïŽÞâ5´<òî %È™xËÔ`MfÞÓdÞÚVûn©ú>'ÔK§êûJ0q3oñ«;ÅÌ[~\&"cæÍdEÊ™yÇC¸í1ó–­šÕTaæ­ ¹¢1ó.³sfÞU ‹Ì{=àæÀÌ»ˆ,™y7½-Dȼµòve`fÞ¥bê­g¢n9ÈÔ»ŒˆH½uà‘ÅÛ˜{ÇëÛ~¥Þú~¶ûsäÝ>$íµ¿g©·¾Ë‘{k°8mJ‚Ü[&‰Þè{/*æ•C(so-½ŸY¾¨¾U¾×YdúÝšufRL¿uF3¢dú½HRdÙ>ÒïE›H—Tl ýÖ@2rfß‹–}ÎødßAdßrŸœM)ì[G^É‚‘|Ë¿Ä'‰H¾õÙo¹¶Îä{‘™Üœ}’ïE®R¶^„ä[.žf«É<~6še!—IËšâ$àê·L˜€/š _¹á•€Ç!îË\É”ïAñ¼>$àrµ‚ÀLÀePs›€ ¸´z†;˜«{¶ª¿ÌÀåSzy’í¸’Í4¦g®Í·$à+Š"3àd¥]˜€ËžÇª1×Xs&àõ ¯çD.#W« Ï\GZÅ&à2‘µª[LÀãgN+Æ<ŽŒ0ºfàÍ£(·Ú” |÷¼><&á3*÷$\þ²ñdz×Y’ðfåbºKÂu;Ž…‘„ëÞ§éè3T&á ’pz׋e.-”ia„ëÑX«’„k>3uE®ŸÍ=QLÂÕì9Η$|ByÖ’„ÏÚR ƒ$\{îl¶È|Ò’ÁÐŽ2/]2sðeE¹mæà‹*U義’ƒÏ¨Ö^rðÒ½ tÙ*=“ðÚ‚„Ï(ÓZ’ðå¿_I¸Š´ù÷ß½gIÂ[£¸>ž”:‚LÂ÷t«ª¾¯õþþÞ7艿¥ö]rõ«O"¸çòižÔ#!‹W pqm™’{Ñ e7nþrAâ×Û¡Ô[©t<žJN/ ¿&Ûó@:^–+Þt\]Ý4š“7 nBjÒñ²ÀQé¸_H¡ãƒŽk›S&xèS|KL¡ãjØ&Ð$ß°¤àqù=ägY𸒬ïää­¹Zm´?lÖòÀãŽëIÇw­òÌŸïË,+ …Ž·ÊeéûA:.Þ‘ÝV¡ãeÉ‚t\ãiXIÇU#ÏLjHÇ·hº)Â|ÐqRnêSTV i6ðxÑ6t<^ä/‡iªÇ À¬x|ŠÖ:èÄãÚ¤$àÇ7sÍ*xþËÇñŸ?þÿó¯_äûžÝuŽçVÛÇ…¯ì®í.—V’ R®ùëF§KÛ©†7¯ÁB_3Rž·™xãZ<ôE£öõ­oê©×b¡/m’6„}IûhçFûà5|eûh$Ú®åKÛÇ«sðöÁky´ÖQkkëõk;ê$®)rþõïU…úk;c*ô‰où»½—Ó´³ÖLOÆ¡ßáö^Î3ÍÖëŒèÿ¬Ý;ϳ»¨¯ü æ_o÷þ½Qõb¬lÿo9?þÁÎiWãÿ¤¼”ŽÉ™f|b§$Çõ¸”5þÿ{ˆÿçÿúýßÿÏÿû¸o<ã ¨¼Øºh—Ãý³S51˜íqÚjFÿï/3úã‹ÌèuXÌ”ï{‰'rÉTqyŒ«ãÐ9ù~§EÞá*-7ùqÉ_b}—ï^´ñ-{ù¸D$®±‘zi[§uT´©ŽÛÕ—Òº6æký¸¤ žÇœ~ÙZ…ëûšŒ'úm¹‡Ö~‘3úï~\ª$š m íÕl¯I¸nížJqm hZ§è,îÉ«j*&ô|ïÑ~O•ÄMo¬¥yê¸ø:ÏSõ^‡Ta‘m¼ª½ÅÅ>~³$¸‰fy^ª?zô­U‹6Êž:.ží)óÊô Tl¬*ç6D`‹<ì_LGײ[Óßo‘êKxE„ôÔžâ±%iÙ¥¸Ž7¹nq{çúmɽU‹M}ˆÕ¾Ü<¥Ð­¦rq%‡tÃÊ|‘²DKXñ:#¦ÛÙ;³Z´6Ü ØžZt8F ôåЇ8ݹFo~hÍnÔä[ôéjΧ2ûýõ­Ú j±§, r—ÀÒâ»j¢N5*è)¶¶/©T#6«븵C(uµYýæ;‰¢ ªïimVWxÍg_òSèÖE<öˆi‡éÀ.zxÆêIšßÂrœ2úxµYýæ}Î]·W. R-%jZŽ;…S§·a‡¡ØíDÈ>bg¼±q$ÚÖ?ßmV–ZöÝs[v{µY•€O ¿œmËý­%®ˆi alj]Ônº¸íäÊñÍtêkuÜyŒ½iŠ]¯ÅΣ7½`]k|Ñ„^¯V¯(>оçd9¶Ž÷j´»ÄúÃm9›^îÝhcÆ4Ýï½Ë)oÏûÝf厚èR±K½Jk³òôHò¿DO8ÿt6Û4´ÄÇ»©±¼Ú¬J¸_}Ï^Ç ”…Eü›¾ÃW“-gŠØX2Ô®é,²ÈJí¤5Yʾ–KåsßÝì)-f-—–©_ݬ›rktÆm¡ñÕdËk»4L½ Ôž”Z.×ùíxÓÚˆ½´¨/ŸE&¾½F­®d›¶¾Ç"ÞQÖñ³¨zo_Ù^¢ƒŒëê-"+Å´pùj±’¡NÃl¹ÛH÷n±2J«Èè<äMÔ[l¼àhAïµ¾øí¬@ÚÚBZæ,÷–˺=+d¢Ø½ö^VN%ÑdßúºE¾¸ë¸©#-·Ö×{'«uá{îõ;—[ŠÌw'ÛúðÌ#eP¾ÒìæíŠiñôÝ`WßYŠÁÞ`¡¡ûze=­ÁJ»|M¯ë4ÓæßòÞÖ,ç³w²ô±Vìî¥iÛÏ¥Çkôšm!öÝb7ßÊ×#“—l±WnŒ× íݣŚàr¬{™z'»o¾H¿Ng®Þꉘ½¡b×Ü;Ù}uGÑÈUraWùé0”Œ#'¹Þ`õ±ŒmÓë,›¥ÑÅj‘þè …Uõ¤ÖÞÅêåöïµ…í]¬S¶áK¸ªÌêÖ»Øx˜ó9Fžµ• ]l9N„=¬ù‚Å€ÝÍèb›Ã]_FUìêiç;Úêò»É–óEfuî#/¸Óm/^¾Ì'z^pš×ƹÚt.Vþ ÃÖC±±$Ýî ­ßãýÊê)[ì‘V„oJ•ýª¥•iô°‡ë`ÖHúÇBv»åá¥Ð=¶œ'^ð2:ØMhOwÖ¶ÂÜ;ØÝÕ(«Öq–;›«Zw]GË—¦¥³mÊæj¢€èÁbX=,F±U»ô·ÑâçZµt=,$¼«öY<¶4’f\Ú{X9y熘UZã΋ó­©H¨œ‹GË‘ †™oóÈcõ<‡‘Bcy¾¶})«lnëb£uõ ÿ*“…)»Xo+«ô £‹=ÜÜx•ƒöÈcõ“×{ZyHt5ßé`#v-ÖÁªÖÙ{~3‡˜Åîo“Ëï Ö\(W9¤,öVŠ4œøVɸF{K¶|öé€b×Èb¥ÝŸ‡Ó@d·Z|}·X¾mYàŒ,¶¼ííLMA}Ä2kY,ÇoÅ®Ìbµú>| V™e=ø=ê‚ÅÇ¡GÞ[,[—ô#‹Õ㼇*@åÞ¦‘Å*ú·×8¬È5rØ8ÊJI­12‡•˰2ZÕÖF«ãÒgUþ“9l‰icîÈaWÍwû’·bwæ°È@ÖV¬ìÝÁ–w§üÓ“XÛ«²JzI¬\9‡Ñù*ÊLb±ÑG±{$±{S×õT»¯•þð8gt±ØS²ªŒr&±ñº§á²Òíô>VS¼˜$½³QÅnOb§hˆï5÷õhƒïëJÖxÿM”1rXÓõ¯rµÌ*úèM¥pê ¶iضq•ÍYèÝ`µ0?ÙU<ÍoçHb#6§ßN´%Ù?÷ËÎ$¦V[&±Ø ²žMš]¬÷ ÊiG[>Ȉ]™ÄÞš:v ÏUþ©#‰—?Ù‹‘ãÉ'/sÛ׳ʫ,“XaÎ?ë)9RïcãAléì¦ØÕÓXUÕ˜FÝ¢õ”Pi°‚Éj(Äü¦IM*P²ð6Ø"ºëd} “lf±˜+v©ÉÊêõšºÚl¹Õ1²X]En‘Z¯Vü®7Yl&кà2²ØClì´ÆßÑèdã¥J°óÚIªØu'*ØS³²ÆÜêY¬MšG­×™Z—6íM®`ð:²Xa„{¸®1¹2]æšrÅî‘ÇJ½²}?kL®Î‘ǪªÉpò[o ¯)“ÆÙºkÌ­Ö‘ÆŠŠìcSaü‡ì°ú¼+þpGKÅî‘Æºñ‰bÒkõ^6ûžîŸ*‚½4öj2£ñfcvµŽ4tk½Ï&ÇyÓ-¥Â£œˆb÷Èbãe¥ûÄ“«sd±êvÆM+¡|7Ùøª—TçrÑATLµG§¦¡ŒK%æ:)ˆ/d£n_J“~Õ@ø»Ö;"ç•®õ~OèkžÓ·+Ūä¼út¯!õ%é=…¿Æ¦jÞè_lC I¯PÑ’Œ¤W]mŽÖ$½çì 9Iïqy@^Ŭ€Hï%§Ü¬eÒ{µ>cNÒ{i„ËϤWhÜ|1@z/íÝH…+H¯D§Ñj'½W³ÜßHïÕèCI ÔÏX¾÷€ŽzŠŽ·ZõÆÏE ©P¯Ngn @½Š­i Ô{µ|-QÐõ^‚¹±¨W±4¸éA(îdô} ½W¼©3§` ½Šm9ÍrÒ{É„)»h^ßß=h®‘^ÅÚxðn'Nz¯M)^¶/'½Z(Ù²ª H¯bó:<2@z/Mcém÷pM%Ho;_ÜÌ[ïÒÛŽ‹Ï§óN'½:nÏšn ½íþ†ï;@ï¥éÇ<¼zuX< ¾… ·ÅÒóÓA¯~r˽ ½z*÷9ös‚ôêî¶45éÕq‘õݳ ½í7Ó˪‹útYû ¤÷jîÓCG Ò[oÌI¯bKn·éÕoæÎ!½º¯)«Ûô꯬cЫûšs‹@¯Ž[± ¤÷j%‰ÇÎ3^ýæ”—@zë;é­ÏØIoýBœôê«[ýôê¹ìG®Ù8ém÷åFAzuÜbë~NzKY>Pomz†zu¶;7õ¶·—+tNzõ‹gúYôê'ͤ÷{‘ÞvÏ–Ä;émï{#5H¯bsåéÕ%ÆôuPÞr×NzK³é-w Ò«X4½1?sÒ«X$Û#¡uÒ«'†¶k<c½­É¦U*X¯Ž‹Ûé š`½Š™­Xok iÖ[º/°Þ«mÛÔÀzÛùÒA¬·ÅÆB)P¯~Ò võ*¦ãß_£Þòõ¶Æ’uÐÁzõ“ç6¬sÁz[,—•Áz¯M­*ÀK¸¬Ç‘~‘`½-–û¹ÀzõcñÆ’ÌìÕ£²2÷€½XÂÞv}Û0 콚iü0äìm?KÛöê7mI°· !éËØÛ.s”‹ëm !r¥÷nX°ÞÚÐõ֖ଷ}<Ý9¤·DŒó^H&ÝsΫ۾ÓS ·=ÊÜÐÛΖ%Çô¶–‰Ë{÷>@omuzÛe¦Ï"P¯bódðÒP¯b>£Þösw":G½ºë+xz•ÆnéŠԫؾ½]òzëó0ÐÛ‰iØ ô–ä ÷j“7ÄzÛq)ÍèmýdºFô*¦=µ}MÌAoËsÑÈ9o½Lç¼íί±D Ò[»l'½µËvÒûøM#½í±$þvÐÛ’¨UÞiù( ·öôz[6—®Ö½õÛqÐ[{D½mpL @o™-ômú4æáäÐ[‡L½í•N ô¶O㊀ÞúŒ ô¶ŸŒYCÑzkŸà ·ŠzëPä ·æBz[Ž× ûóÖö㘷vØŽyu¶=7}ó–¶å”·† ò–!ÀoÄœñ–ôŒ·~ûÆx[C>È:ãm³ª{(‡y[+߆ñ ¯úµ#Kaò>Ž{A^J{î{wÈÛqîqäm™aîäm“’e˜ñò–I¡C^ÎÀx›òñ5¨ÁxÛMg}0ÞšJ;ãm `O 댷=•}ìkãm/'‹œò¶OàK+Îxëéœñ–™!o™©ñ^­–W.¤;ãÕ,.†„‘.;ãÕo®Y W3þQ· sÈÛËò´€¼íöR  ÈÛ†âè(ßx›Š{!{l­äÕ-D¿ÖÛJÓîkœ¸Ædåk*l>pïÙ}¸7f…—qTǽ—*d —âÞøˆÏÜD Ü+j’Å5‰{õí»¸w÷ '÷®•÷Fk…­‰{OÕ7J1-pï9µz!±÷jE.Áqïé>÷înŠOÜ{^ú‰{÷½çĽZÙÉrĽ:.­C‰{Ûè>œPˆ{Ñ& î=ÜŽ¶àÞfq“? Ü«9ûúù÷ÚWŸ¸wSõ ÞÁx¯­Ý‚÷¶òdW7þ*¼W#Œ ‚÷jŠ?M‰— ïµÁ¨ðÞëÛšÅ`È{ëµ€÷înš_x/¸Â{y•÷ÞgjXÉ{1_)¼WùÆ›°Ø 2[`¯¦{W+žFÖ«bm©Ÿ$ë}p`c½‡hiê‹Ázå‡0ü ë)[º½Ö»êoÇêDe½–Ö;AMÖ+Œ9ÌS ëUg¼A¯Ê•›B W$·»ôʺ-í> zuÓOôª"[f޽­äÔ°ç%è­?é ·IosÐKYH絛}˜MôªàÎ|H¯Ë 6$½ é+å$½riK[åAzW .Çš+I¯‡íZSÞ­MƒzGÒ+¯ª+Q"X¯Ý6@©£^?̪GD½åõ–Ž ¨·Õ3D쨗Ÿ/Hoy( ½úâb0íM ¨WÍ.µ$D½êð²ðQoyß@½jƒ&ÔêU¯S‰ÏD½µçë-CXoó‰Lì鬷bÞ]„%wÞójç^nZ æÕqiÅMÌ«1߯À¼â¬™óò2Aywq…±Ï£ˆzyç ¼ºÛ–OQ¯<ÁÆúøCÔk®¤EÔ«µ¼„~ ¼*»×³ÅyU o¬Ô?4½VWë©éÍzMò¶Ag8„ È«J\©P%äÝuÓc÷6!/oŒ¢^>B^¡ª±KÈ{x•²yËé êÝõäÕq9lòê)§yk¼å7ò*ÝÌu„¼ÚEk6€¼zÙòäÈ«kq;‡¼õ8½zµ¹¥” W›pQô––⤷µí±U…¤·eácc8I¯:£LØIzË-b$½å2ߤW7–³R’Þò@zu\æ‘$½5æ¤W¿™)H¯.Ð,f@zuXÚ “ôª#Èê$½å‚ôÖ˜“^½[¡é-m¤·t ½mŠ4ö°Y/ï¨W]@‘"êUÛ@zëe:é-=P¯b™>õ–/¨W=@¢¢^½½d D½ê³ÐYo½Y¯þ:-‹Éz¹Mƒ¬—;FÈzyk„½õ2 öê0Kó{Ù_’öê'mWh/³ Ò^ýæ÷h/; Ò^öù¤½õ8§½GH{Ëý÷²_'îå7BÜ[sÜË·ÞŸá^®ä÷ŠhØnHà^~?Ľ~ˆ{Ùu¼yoyVà½õÇœ÷–‹ïåWEÞ[ž±ó^íC7ÊÞ[/Åyo½ç½EÈ{Ùã“÷–ãÀ{9Α÷r,#ï-× ÞË‘‰¼·žÏyo=ŸóÞò¨Á|9ü“ùÖk1æ[CÎ|ë#sæ[.Ì—ƒ9™/S‘Á|µõݦ´`¾2[Ê©)ŽB+]NHiïåÖ÷¾¥5;ñ-7ˆoéA|KÇ⫘M»A|ëqŽ|Kä[¾ ßr> ßÒJ€|Ë+ò-MÈ·´X ßò…ùª·4Lä[š¥#ßò‘ùÖKqä[Žò-·ä[:õ7òÕ™ÒdÈ·Æùê.mU ÈW"¡,{Hä«®4kÁùêûXG!."_féd¾Úž:›N×™/õJd¾*¹dÙ‘/?U_=F³ñUJš…©H|¹‰–ÄWÓÖ`€|[»È-ÿ@¾­‚F Z|uw¶$ è«ë´Õ}@_Y»e…@_9CØ ¯:>[gôUI×yÙÞÙx‘øjãØ h„¾r,¹Í á }õsëPGú¢#ò-=3¯îÙÀ'˜ï36˜¯Ü0Ò‘Ì—}/›9ˆ¯žðò”öêŽmå°WîñÁͬÃÞ½ñ†!fìUW´§/‚³^/®Ã—•¬—_P/5~EÙ«éø(W”½ªŽÔ-ÖzK ®÷üf›ôÖs9è-_e½h¦½{û‘ zeÑŸý»…4Ы¢lirñ£•Ëç_Y¹¼p^9£¼·0?8¯:ŒkˆÁy9¿*N½Ñýnë§‚^®Ñ§ÞUCö°¤Sï &C§Þ[»I‡×«“ÞfyµÙ-À©W ¯ÔT©7š£9±Ñ©WeÕÒ…¤wjÛ¤úL–¨wjû_ÆÞ{ ÞéiÛ0)åuàhÐ{_^#•½w[eHA®;ôê›ÈºIp辸ƖC:ôŠˆDOÕy9zoU[ºÏo|)®u‡ÞûðõÎáнÎfFnЫ¢tÛ(QEƒÞ{u‹AôÞ+¬/aÐà¶ Q z#´d}Lô¾jà : ƒÞ{ó5RôÞr¾ÎÝÞ0èÕë t‰¯ôÊõÞ½éÏ«÷–¦Nôç-­þ¼¢j*^5­¯Ž;Í;þ¼B`÷ضJ^Q¼ùöXúóÊû: §ÒŸ7Žs—øóÆc™Ü%Âüyo•M\Ï„A¯bæÕƒÞxd¾½krÍœ½ñ›î¯ú6èJÜsk zùÞàÏ«ÏÔ4üyïÍÝñèÏ« ‹¹ ÁíyEÓ‹ö¼÷Ý>¯BvÛGØßl!»zñ½+ÝirAÉî4¹Õ­yoÕYN©:¬yïË‹˜“ìλҞ^P’dwn¨µïÔ¥5ï-cë´Ù„ˆr©hw’¨jì€"ÚÕý­kßþN´;Í0¶v´+ÏxÛ^sÞ[“ãêº7ï}»Â”d7Fˆë¤»Óæ‚èv'åtÇ5\ìêߘ†`W3§îÒL®«3e}DrÝiÇpçºÓ ŸàºÓŽíªàººçT“ëê’Ï5­#œëF,¾¯A`Àuã7ï3cÁuËíë–§ ®;É+…ÁŽuõ“1ò}j×0mßÜåÁ©nÜYá“êê0ÛØ ª;m¦'Ôí…Ô«ZÍèPwVŸŸéL÷U`w‘×z®pì¶²°ÙWRÄ»Á:¼Šx­b Ù®ò4¤Áv—æÃ:r,‡»ªÏžû䋊wm•gz£ÜãÎt;zTI¼^sé¢àÅV(xd8?v§»—&'¯ ëQ€mGÍ#ó¶m5Ã͓Ƽñ×áœLcÞÅlhÌOú¼Rø[ŒyÍп85(Ê™=6Ô¢SÃÖ7ˆ—† E› ß­1‡º5æP×Π[q²+'i€A»ë¿çºÝú{®ÛõcL³Ë¿NÁ®°\B)èuu–è¶ú>jèuÏ-åèTëê~\&ûV몕™0Ôºm¤2¢éjÝs÷ ·TëÖã\­«ã(Éj]…\YëjÝs÷b&Të*fËPëÊcIJPëê8*»Z÷ÜQ| j]ÅL¼ µn à³ËR]­{¶ IB]­{¾§‚j]‘Ö\J¦ZW Ãn^º®ÖUÌXÔºõ|®Ö­Ç™Z·†\­{^|jÝS½I ;¡Ö=¬ð@­«ëÊ­-]­+ñÜDµîyøî9ªukÌÕº5æjÝâgîjÝrcPë>c©ÖÕOÚÚÔº:nþŽ oýMgºŠfnëjÝò›Pë–ßÓ·ÝÄÑשÀtõöÝ ÁÕºõZœé>cÉt϶3gø’ƒéÖ˜3]ýfjÀtkÈ庥íA®[cu3} ¨®bY»¤C]^<¡.ÏD¨[~Í¡n 9Ó=U03Mgº5äL·^¤3Ýg,™®ïCªLWç3³Gº:lÔíÐ-O@WG™,@·ÆœèÖßt¢[nH—Wé@·FçÖßsž[cÆs£«õÔà¹ç‰J:ºš.˘$è¶iã•ÐÖn9îtKÐÕµ§|<·ÎªÎ½ç±g¤¨s£ó1«cªs'õƒééëê\u±c«IQçÆýœÃL´¨s{GS§8wƒ=7Ź›/ç q®tSŸBqnë¼² Ô¹­ˆü°Œ®ê\o_TçªÎä’.ÏÝ´ ÖËrQÛÌ#»/,ºlTèÆ‹‰éEùœæ6A|<ßaa4WÇÅUFI…îŽB™TèÊOl݇ºÕiîtx™ ÒÜéj¥L?WèÆ“ÝïÇæê:Õ€†Á‚Ñ\]K¼ 1Ð9ÍÕqñá ϧ¹­×;ºå¬ÃÜú9R ŸÕvµ*tuýÃíÅanQ“æ–óuš«Ÿ3+yÐÜòƒæ–¯8WŸbJösÛû^²t­áÜGÈp®~ñ0Íã\>bÐÜòFAsÛ/Yê×qnÙt [ÆFÝö©ÎwǨ†sÛ/Ƈխ*ç–N8WoÍÊ›;ÍÕóò¢ãÎs©ÚÐm›k¶”!9ÐmÝPd­ÝêÀîÔÊ]—Ýi¢½°ÝÖ‡Þ¦ù5 [úWÝz™ èÖgï@·m"‰©nßÇéD·=Ž]ãþÚÕ»ƒè¶7“Æ] º­IÆçºmë¥9oSš«’=ËS–ËÍ Žt˦­ŽtÕ—QFƒHWC´•ßt¤«_s¿NGº39Ò-Þ‘®&ÁYaHW?é–£@ºñ“ŽK)ªÜ3“72]íI™s Ó-·¦[vL€éjcÇ>ª é–&¦[v>€é–$`ºE—¦«óY‰N0ÝpãEŽÚs`ºq¾x.s*vÒN#Ò­wàHwjÅàFApÝY%.3ñ×]ää•}çº")V\W»‰î”Sƒëj[Êœm\WiÙ}y*¸®²“x}o”¹Ê§Q ’ÊÜwc¦;0²sݹÙôÌïmnÀºò›É=…¹ÑòÕP—«4[u¹EÓ ]®vîf©Pêr7͖Ǥ„w>á®ÿüñKüÏ¿ò^¿ ·Î!âïÜhû°ð•½µÝå–_Ç— NJbèoë=õ,ô5å©Â³¯Dª\‹‡¾hоF%èz-úÒ&¹X6÷Ï¡íƒ×ð•í£}h¸–/m¯ÎÁÛ¯åÑ>ZG-Šë×vÔËÝ:öK“Êè¨ÏIsŒM\s}ÓÇ»—þÍÃP9ft· öq|2Ýý4Ëï=OüO+ñ5Åœí¯œgû特¶ O¶ãŒ×_9ÏQÏó7αê/kœcùØ4_11^4+çÐÿ´3üÏ?ý¯_~ùÿÿûþøÇ?þá—ûËõ¯ÿö»FÑ_µßq“ ûc†×vÇÖžþ¸M°I‘Ï(Û±I‘>v ­uû¬Mf¤ÃÇnŠ9øÕ·ã)t ù¬ŸëfqÉZ|èT ¶7[³c~ *0Ý­¶È]ÒÆî’‹ëð¡ØŽùÛ4VZ²0§b×Xq¸bâ›uè¶c1»säëóóíXS1£ãÌ&2þþÛ2Ö´XŸÐoSÁ—±æ úÙ¶öØS1£XdžnmÇaŠ™3fëרeÿâÛ6ÖT{‹¯ö~=Îã2ÉLüÃ+Ã&YF_sì#hJÉÌ1»EÕ&UÆXs8ælGŠÍ&™÷¾7v“*cHfÄ–³\[t2/ÉÌ!‹A¾ô×)™Ñc½æ¾ûnÓæË!™‰Së0kÙÎÝ43’ãÄS~ùM»ãƪƒpð5–·óLÍŒ[Âa…R3sˆfŽº÷Ûy™fæØ²æ»b·iföÛ96ÍB‡fFj£Ü÷¶©üXu6ßÇqÅR3£ïl’¬íZL3S®óZM3#]Qnãß®Í43Çî‹›tC3sl^7y“2chfô5ŧÕ[‘¤C3÷`@}“4chf4óH…èv]¦™©±;53ñ“oâØ.E*§±ð ×—ÊŠ¥fF« ¦n¼gÍÈ<Íe6-³Õ}ô×ÑׯúLþX½Òò¦åð!š9´þ=”Š¥hæh+*ãëתëX}8šWYPm÷a¢™CEÊÉÝ´’ÛE3G³dÊ‹¿L5£KI»/ÅR6#iÚ9Dq›V¤†lF«)¯Uš]^nC2Ãcâ›7ÉŒÔqC:¹K”13Çþþ«ʰµíÒbôõ†SPÐö¶ú0zVõWß½¸kµcèdäÅoÿ½®¼k:-×ÌcÅ\±ÔÉmu¬«L"51ŒºÈùžÞýÏ>]¦“ÑÇ”®#qx eŽÕ­Ôâq™PFßÒ1€°b)”‘65W7â×M(S.%òÍÊ´e¥±Þ°k-á|bG{Œº)”ᘡX eyH÷b\ûÜlOïóš^ÄœµöYõr¢9NjPñ£èÄ.%ÆôþPuX¼˜žCì’b¨uĘҮÄÌÞvI1fëXïmت*–J™øM3ÛµØ1”2å,³)ebœ»–ѹìKsŒ¾u;[ìØÞúƒ]n»ÑJÎUÙ¾Î}ìW,ÞüòhÏë8ÞšÛ]ë[3Ò__çJã}ÙM'£á=ݱ¢Ë6¡ŒÖ§a °/§ eôVÓ4L±TÊè7Ó%F™$µB5-‰k¯ß¸/rªyë÷”]ÄE÷N<’…x´qw÷Ú.eÚGy™]Ë Ñ‡,‘2µ˜4)–J=Êü:ÖÅ„2xÊ/4©³¤YÝ.)ÆÉ(7Ç–å]RŒ1W®“nÂ{+¸½ÛêÙ\®º€i_Û4Úêžæ,»*lvߘú¦U`W R¯¶z«S}“ ¦™LœN‹ÞLFhŒ ýÃnõê{§*4š‚Ô=2ýÔÊD7§ŸjÜÍ·—HGmU¹U&Øû¦qÿ~·ÖSîœC¡ØÉ«µ¶uß­ÛGôí]óýõÈ"=è‰yôu¦–‘¨~cP\“©eôFï¡éÙ·ÓÔ2ZŸ>ÇöÅR-£ïã}²šÞÞMõ”6òìÓ‘¸˜o¯Ö¿¿^Á–¾A»äÑ•´¦z.¾¨¹K=É«©Fl¨b—cä¬ZÐŽ—Ú‰1FΪ¹ŽjàÑ57¥Ì)㎬¬/(•2zgw*¾öV¢°÷¬—Úç0õÛ[åÞµ*‹KûSÅbÀz5WÍÜRÄ´¿ê¼›k$xë6d¬{Ëez×—² )^<=ÊÄÛœÒPxWÆ5RVÎ_BÝù|I~ðº;%á#eÕfÿ¬»·½gÕÈý®åÐb‘G]½o•ÏA4‰„÷[ >½µÊ:òêº/Å®»÷®:.™Ï~ì)•i·çzO#w‰2FÊ*ï‘e8díÇ™Za¤,b±««ÒúQ…[¡{î}«&÷Ñm¿v5ùeô­—×]ÚÕ+­ï¾UÒíLJãk/º÷­çíµ(vµß‘°j>“dŠ¥\æld=çî /šææ%k ‘¯J¹[o÷æI3 ëéõ5w¬£{½4îÅâîßíõ¶Z» ô»ÑtªùvŸ™íIôÞUJ,{½‹ètõjvs=ãŽcR,£ÓÅ“é5¥XF_Õš~Ã1µK±Œî|µNA˜`½+nájRÒl®æ7¹KŽ1÷ÎUÿ0“ÿýjUŒ{sEû‘cä®zY!b¿Ž”Êè‰YUŒ˜y§T¦íBÉAV[0FêZ>âK6ÿ½;]¶a§Ø«·Þ_°$r‹ ö[ïëÝ¿êN‡jp×–•£w¯å¥Ÿ™+ñË.ÝýÈ\ãy-ó;+vµæŸkÒ;ym ™«ÞZN8wY•ÌõÒ@—½ŒT•#u½´9iëÆ5û­-…ïþUï"Ú²ÈÒ;tOÖ¿¶2òï¾WJŒ‘ºJ’j‹]JŒ‘º^ª;:ßCJŒ‘º^b·RbŒÔõ’"mt\Š ­Ìã7Ûˆñn°÷îbǤ©~ï`ãqÞ×øŒéGòzÉUhP CÖ¹#}½ÚøÔSņV¦·c:R+Ó„êçÊùxŽôUµ!æñõÍùt½/¹~Lmջͪ!&Sìé«6xï½€’–hÑ»‹•V:‹GÛ®¦7Ú[º WJ€_.2&Tsʼy!íùFg‘éÖí†ìE¨ÿÆ&H"Ý«íˆz%CD «ÇÞ2]ßÛÖàL·ÝBÓÕO/ÝzãNtu¶ÇDWÇ™ah'ºí»ÏÅ$ÝÒ–AtÛw? é/®ž¿• r¤Û>ß\ÒmMÁfAŽt[³ÌBª@º:]ä¯1@·•ÖtÛEÚDÔn}3ty6ºíli$  ÛŽŠaíý5é¶7“Ö´@ºí!¦¯j8Ò-NÇ@ºút¢clß‘n»ó´µÒ}Ä éê|¶ÛÏ‘nërEH·Ý^¦x@ººL›b€éÖKy‰~ëFuÛ3žFʪ[<°AuÇ%Õm§3®ãT·ö†NuuœU»ÕÕq¶gTWÇÅWßg ºí2×±†âP·>‡º­3܇úP÷qœAÝÖig@]YØP·—›6uÛ­Ï­ êêZ̉P·½½ÌkAvõ›ñ†ÆÄÞÉnI2œì¶ÃzSÈn½uG»mŒH§W ]]æ|}¡ÝÚ¾í¶ï4 @»úpÖ¶õBçŽvÛ“Œ«èË@†vÛMg!^ ]d»%AÙméÂ6÷B »µr²[‡'»í¸¬Š²[Ç'»í¡¤Á5È®ÆÚݘ—“ÝÇ=Ù­ÍÒÉn°R²Û^^!Úmå™ËOÎvKÖ¶Ûræ¸ÎþÛ­_–³Ý6T§Ø®Ò÷ó^ß‹‰NvëKok(Ÿ†£Ý:¢:ÚeV ²Û2ßxñÙm/4·úí>®ÏÐn‹eýt ]½4Û¾ ´Û^Ì>vèh·æOŽvõbl‡3Ðn}*ÎvkVàl÷q>c»-©üœíjêwoc‡Øn™íÖ·àp÷q)w¯×Ð6ÖŸî¶Ùkä=U2¸ËÎÆÙnM¦í*vd_°Ý2êlWý%ÿ{ζ«CÌ,lW¯Ô$i`»ËH¶«Ãl—-Øn›Kæm°Ý2ýÛÕq¶ËlWÇZÛtUx/“‚í*fõÝÀv[kÎ*Ë`»:ß ÃÛpl·u±©ÔÛmýL. í¶g–:°ÝÖœs…lW1“Eíêµ›llWǙخøŽ•t¶«GfŒÁv3Ue닼¼¯âƒî ]÷›~×Tïê…4~l#äoõº{`ÞÞ™=(ïì&'¤¼"PË0##å½T£åîS4PÞk‚J”w÷D P^”Æ(”7>»u<(¯ïK%åÕ¸1Œé å=Òʯ1g?…ò¶«·(rÞR{œœWüp|õä¼ÛåZ“Âyqóî’ïy-༛¯ò’óÎ`ûä¼3‹ä¼Z`em ç•£ýÉy­¢Og¼ëæþ_伡À¼",‘ÏubÌ»*uÝ1¯¶š¬£ö*0ïzB Ì«!Ëéó²¨1¯Ê"-#(˜×hY弫 y9¯‰C[‡½ô5 ì]ŽoY…—¬wÕœótÎzuºõŠ‘¤eY/ s‘õ.·¯}‘õηr¥âõ.jƒ‰¤Ázµ ~<…¬wÑï.Ú•Ý€æCÛê°W;ämç {á¹ö„½‹8mö€½³ÙGrAØ1ì¸wYee𢂀½Kûõ«¯Òö6l7²#ÒÞùBÚ+c)“öªŒ_¦¤½bz– ƒöF̶H{ùJI{UW/zÏNA{µ êv¤½Úºcú!Ð^ÑÀ°ö²za¯œÉ¶±JHØË](€½óå* °Þ†ç$³N{UöwËa¤½Íì,ah¯>ÕQLdÀ^}P–wöƯ-Û{ç&NëËW„½*k˜öÑ€½sS¼ ¼Ø;Ë6+[`ï¼A Øÿp2FÜ«’‡–ë÷Îj†c|#îØOù½Å„¸W¥`m}ÀW­y™Ï·)ïÜ8[Ï¢|ÕLŒø²K'ðØ[CÖ«z›Ãä’¨WO2Íåˆzåëk¨—==P¯šÙ6:z¢^Vþ$ê¥Å0Q¯bº¨÷Ëy¡ÞyóÅl¢^(=‰zË}õê£ô"Pï¬7;§H×QïÜR‘â:êåÍ2Š\õ–®¨WnÎ0D½Íñ ¬7>¾Û0°£^Y¦­Q¯ÚãšZ% ^]zM{’^}Άð@zÝšœW[ª3ÈyˇÎ[úpÞYÓìQƒó–÷ãœWÝš)ÁyY>•œ·´pÞòUuΫ‘Á¢à¼³ñÆÊyõFÇŠ(¯Ú¤m\qÊ«[¾Þ©הּ,w­€òêT¶ Ê[>P^U¶]2 ¼ti"啹“-²òÊ,'*¤¼ê!Ós’”·™uqvÊ«óÙš(¯Á´p'åUl˜òj”]sS ï¬ýŽß¼å䨕žÞ€¼qç—-Éòέ›ëY!¯N·Ì fóêíc³]Ǽ¬9LÌ«òÚ[î$æ3msî„ç-íÄ9o½xç¼ú:–¡ü#çÕx“¦Êä¼zŽÓ°x%çÕùÒ”œ·ô3à¼úŠïÏ1oú€yËhÌ[Æ{`Þÿ¯·sÙ™$I®ó~žâ_Š€ÔŒ»G€ÐF)@ëÞ Â,Fä šà‚ïùñL7;Ÿe5Ù]ƒ)`€©.«¸¦‡_>?vL}ïѾ‰yÕͤk,1¯&½iôJÌ»¸óÊ·¿é9Yæ-͘WÍÒ6Bó®— ÙzõVLÅÐË:Ú½\Šè-/ —¶u½/VÊ€^Ξœó¢‹å]åÊyÅî(¯|ðî·âŒW#~_–¾÷úÈxבÄý„.Ö¯(äñÞ¿!áÕ§˜ÖÎ$¼l=$¼|õ$¼»Ixùé“ðr\'áek%áU‹L[z^-w­bßDÙ /çO$¼º—äc ¼*jŸõÊIx×ÍÓvHxµ^Iz^ÎFáíç]Ó«û{ oV¡^û¿ÿxWöòN±û0fâ{ºQ|.­*²¾ êa¢ì&è.·€Hw¹%KºKÝ é®¶ÒÓŸt·/Z.øIw%»Só º«D–½&Ý•Ãs\¤»*2ž}&鮥·”T‚îâÙí«0k'»}Öö#@È®¬°ï”‚ìnú;õÉFvUª=³SIv7[’W²«Rôg*Av7­Žî=ô¬Nv·!±ž_XÐ]ÎD‡ »›ÌBSü¼«Ó=Ó+™twøÏoŒpWëæ5…‘w?C w :$ÜUžñk|}°ÝÓ>)]‘Ö'd$»åF@v7Ib"[È®v)rW„dw°+ûkŽ0»bwû¦ÃxìL4-`—?5Áîé"»j ™ŒL°{þ”K²ÂuOuSnQ¸.s¬»i.‘&!Àº2³ÄÊ º—çÒªÛ¼& ©. ®OfJ¨Ûï/³¶‰uOXÐëêmQš¹ƒëêræÃ®«Ëõ?E>àºtÔ&ו«Y‹Ý7×­é\W1[}ƒëÖ›p®«ã¶˜.‚ëöКÞöäºtõ&×-¯\—û~些RZ£l¹nyÇàº5æ`·ô½»œWìnƒ›ÅF Èn½O'»zgfå¶[ÏélWç|¢Ù®z‡ôç'ÛÝF’í=Å `»ÚóÌ„0°]}JGŠãÁvË+ÛÕêH})Øny-“í–Æ¶»|ÃuB°]m}?±G oïÜ·:ÜeO º[zRÐ]N6Iw9¡$ÝõîªÒÝ2 ƒî–ëîr’Jº[sº«ë™çè.'̤»›ò–ÌñÁé.UÐ]æ“î*Öö+Œ)œîö÷4kÐ]ÝfË„§»ª‚tìSBº«'0·#Ð]Ö ÝÝT$8“î–FIïf¥ïw93'Ü-™ÃÝþ\{K¡=àî6V )Ôu¸«SöIÚÄ‘€»ÔΨèI¸[~mÀ]%("ÞÕk ’îê·y2ÅtWö«W‹MÐÝMUVÒ©t·üÞÀ»åïJ…s‡Èˆx—B"â]Ý‹¹78ÝåRŸtWs‹,ä º»Ÿkû&Ý0O Ð] m–ÄïÀ ³naÐ]1 ³õÝ3yÒ²tW·qNmXQñö†Ñ ¡þ5º[oÞù®Êèšiø®&8™•^T¼BQAjïRˆF¾»ËóᜠxÞþnëà€W%Ðú(ñNe'à%–*^Êx5!<î9§.ÞÞM°M¼ÛϸåNñî>ÖÔìjÙçw_G°«o4ýL v÷Ýkßìöì%Á®j»·¦ã´&ZIÞv™âõò®¾½jÜ÷3¬&Ýý]v»g¦<|P]=S” ,\·7Ë3õ~àºû¨&8§ï຤Åo÷R…è 8ôÛ=Ý\w˲ômèž2Sîö%+MÃÝÓgÎ4ÜUIZKA§ánQò§,†»‚FK²Öb¸›3Äâ· Ç6úíÊÑ,ë;¿]˜ ¿Ýþ3§} ýv…þâ3*~»›«ø‹ßî⥋ßî–ÙBtÛ½FEµ¹ÈM·ÝÕÓ!ŠÛn1ý…ÛîaŵŠÙ.]Fi¶«Ÿ÷œ“÷j¶kUi¶«šùé4Û=O/.\ÌvO«^¼v¯ _¼v[f §ÝËËÓÒi÷TtˆUè´;ª§æN»ZŽäÎ"vµ‚Iáv¥Ͼ¯8íîÞEÓiWKœ×ߺǮjÝÏY©®NæŸ6=vwþè±{®PÓcWÃf¨ é±{ž^TuzìžÃ è›rÝ;tÝs÷‰»çîµ¢é±{ª„`˜‹»Ån¿÷ã49±[ìž‹—§Ån?η‡`±«wuf6lvuÎ+”Ô´Ù=å>´¥q®ûìžÓ¡†»Z6¯oˆìf»j8o2öU ®bYòŒf»ºý}KNìf»ZÙ?QK“f»*¬iæš0Û=Ƽ"0Û€ØSsèf»J½°Ìvåy³½,ã¿ Ø-2gd4ÛíÇ-æ ³]ÝJïfrú4Û=ä Ÿ‰¼0ÛÕ!æR ³ÝSÉø!çƒÙ®Jõä–Ìv ¼ f»Ç6³Ý~J§ê0Û=®Á‘âEºÙî¡ñš~D4Û=.@˜íêœÆó`¶«ûtdêv»Š‚Ýîq‚?ÁpWÎC™EGÃ]·Æ,†»Šíik Ã]%ÍCƒá® ÓÊ„áîq%»ánÿŒw3vÃÝQ—6*æÒp÷¸ÁT`¸{(é!sŸá¸[ÚìËsWß[ºkÑs·Ü;Èsëq®Ïå/yn9Ìå¹< êÜ6L SëêÜzFWç–S¾Å¹åo]›ÛF)ˆ0c‚6·´9ˆsË)M›[ºZhs³9B–[qYnBM®ɲÔäòtÔäò™(ʭǹ(—­›¢Üzœ‹r?c)Ê­Ïà²\¶DÊrëq.έ1çÖgwqn¾f×åªulmz‚Q—[Þt¹l¾Ôå²Å]ï—k>ÏßGnÿ2K]qœnwê\É×ÃÕ½¨sŸŸ,ý—¦ ·Ï¥é«MÑtA‹¥”OÒtAUë‡ éCœ+ÇÇþ5ž!–u|»IËV;Ä·t"¿•__.÷Éoåű¦”üVnTgK¡°óÛÞ’Ke¿]4Ýg]v\qˆL÷#Á]|lpÅ=®÷~ î" Æ×pòU®º…cËJsNp•’Mu1î2ŒxÓÕÀ)î²yrR\u_Û5óë‚⊓´ð($ÅUoèι®Î}4å{„cÎJ&ÝÕÁRÍ€w{ì6Ÿ àÝ~Íò€w—ñ±‡ï‡ó]Íàû”}vCôÖíßb&¤òêî³>!oiŸBÒÛoóJ‹’^ž³øëÊŒ&<‹Hzùx$½ýù×~A’Þe¥ã(DÒËWMÒË6DÒÛ¯wo1¡$éí±%’XzKîºMÓñ©¸#çbËÊTt×ÅŠ²¸ë.PÓ]wÑ€=3–Š».èâõ.fÐ×ò“lŸ»îŸÿ°|ýÏ?\ºú(‚v÷§¹ÇûÓŸÚëÙ$!úç?üÓ÷ eÁ¯/h·_ÑëF¾‹bÿ¾Kêé~±'þ+_² Äñ~Ðø%[Â_ÿ4Åž7ðþó½þ}ŽIÂëæüÐ;x½ö×/ÿÑr·âýïxÿù‡^ÿõÚß70ÿãwð:çŸÿƒ;9„ŽzÙ;½õ«÷lÂyêBÛ¥{ú?¿m¼ÿÛÚÿwôûþúù—¯ÿý_ú$åo¾þÏ×Ïÿëÿóo½ònúZµkïó•Žß%Í¥Õºiï~ó•.^éoÿaýZ‡áê×Ïÿô{ž°,úEÏQ8qo}°:·ë*3³Ÿþuåùúï_Çß}ýÒÿï_ù°? ·Î!â¯Ühç°ð{kÊË+„þ Ái”h¼_Sÿzú1¥ÖGoùY¹ý AûþibÕz/ú¡Mò°ÙÜxãÚh¼‡Ù>Ɖö{ù¡íãÕ9xûà½|´ÑQ‹Óß¿¹£Þdîwô‘[`êlZnÞò–ï+€bùz÷Ò¿Äãeö[è0åãÐ3/³ý¥×éke¥y©åó\çø ¯s(k·¯©ú:qýÆk‹ë\õ:ÿÉ5ví^®}ÕÚךÇõè§Y¯MËýr ý߸Âÿýó?þòËÿýßÿø§?ýé¿üÛ¿þÛ¿Ñ(úõ›r/)3²µöí¶W!­ÚÓFVs—”±ãp=¹-©Ø03y1qÏô"¾”ç;R¯äîŸbwì8œ êÑ(—5v”l.Žm5ñÌÑ~Ú׺\m3ñŒÄ:È^m7éŒê›'WìŽý%{èÉÄöËrÖ/%ÎÅfƒ »Ol¯˜¼h'Û%²h‘³;t^Q¬D;™“¹+”²™}wW«Ý&›ÙOíL»”«÷t)›‘ÔÈüý7Þb³AûFOX7_÷šw-KBsHSì™› p®½$͈͆]`6$—¤±Ù Ý­Ìg½$ÍðBÔnyyŸïbԲϋ][ýµ£îwde—îË«Q/ž‹Ýí§3ŸéÊÙ;´}î6÷ÊÛ܆iÛ§vA±'vöá™8!úõ,–ýØcÇr®Þ’¼bÉ£ÎtÖɹžÍk– Íõì^º/T`çµ—¥Ø“–ÏpT½•â1CÒû:‚z=CW1›,ª_«ÏÚcãa×^IŒã—´±ñ@oTi3bßA¿Ð²Å¬hÉ£=û í¯!f:²Éš3m“:#öz¬/G& lRg¤å³D4‘‚Þ–QH*|IíGWèŽ}¾•öÚÙ˜ºXýä M;7iùÜ;¬Þ0߸vr•ÒN5 ËV·äQ¯=7ÆžÏr_ŠŠÓ½9þt¥çs¹Ò­Úªa Û{uOý¥¥÷ŸÞš¥û‡òÓ»êôñ¼¾+Ŭl‰ü¦BAÕÖÕ«Q÷ØvM•Èb×a—óÝ39yS»t|Æçݤ̈]ÅÞ_Ï;ö¤ãó#æ¹ÒD‚cÛ}e“0#vh®Ûb“ÍÔG¿½j‰,ºBR§˜Õ-y~ÚèÒ›ª*ïÖǶ¬D×Fì#=Ê­ÎTÛÖ³áÉð;vÊï½5¯]ò¸eˆ¦»¡š)Ma3w—ñÛ¬!¦kú§Y¼„/DšŒ,^ò¸é¢b©š©çܼx ¿Gù»ç3Ž;¼xÉ#aÁ¤µýôâ%|=fÅKJìòâ%ü´öæÅK$º¦ƒ_ÛoSÍp,hšy¤í3ßõþ˜jF=bzyˆ/¥j†s†¦bEiþ¼º¹Lëӣ͔~ûØM6#ßê嘅ŠÝ¾Jôà8¼x _õ‹«%§}Ü›R6£÷qFûaµXÛ§_S¼ð2N:Ò¤Üo‰2¢xI9ìñâ%‡‘Ùn½¬ßfŸ·®®÷µÇ¬xÉã.CZ¢¤lFïx?§[Űm=¬—ÍQúe+û+¬Ì)s[NyšjFS½'¶ù‡5nÖ.açp6SÍ”¦pÞ&›áälx8åLÂxÅŸÉú3\‹ÉfÊW [¢ÇzY“ýË밙왥‡-NNeËåÓÍpB¤Ø©¬ý²×ùΨáNÐÃŒ)§²“úâ*…3ýã뫼é¹-o¡ÎŸåßc·Me—ç˜v"Ãöú´nÖüøåܓ™ѯ†^[æC)œ)ŸV_\¥‚FÇ…§¬B) É Œ2û¹|ëíN31‹Õl4kŽÊ|Ã,6=‘û½š|¦ôj#á2f±ÚݦµI“D#g±úþbØ$Ñði¬ÏÅ‘b˪”ÃÌ<§±ð´W,å3:gä=Ê=Õ3š·e ]™!…zFÍ5Ó1úZÐÔ3Zgd%Ö>ç7õ '⊥z†57ç^X™<ÞW)\b}B_\¥z†uú‰M=£»¿C­X¨g¸l첩gt¹4ønÏbêÝfš<öQÉÔ3º•,…+_ûTÏè§ÉÒߊ¥z†Õ úÜÓ+—(Õ Ôy#—"ç±Ò„:«I¡óXÅBÙ$ÐÈi,j¬ŽŒœÇ¢Vªb©žé±>žN²¢ôŒTϨþê5” ’ê™ýrW%¤zF÷Ò›ù[MñJ0V`WŠ¥zf—ˆ)tQCß8ç±=ÔçˆsÒùRnG+õï:!Ï·åDÖ’3;½rÉiP õÌ~ºŸ”ä7©žé1sîz©?b+8åã¥ð°Ê%§6 ©FÌcË)SÏìW.Ö{LÚŒ¬\¢r©šª¾Þ±ÄY¹äró¥Q0;ç±—[çâÊY»är#¨Q2;ç±—;%Iؓ꙽™ã†$V»är¥Q^ ÝkÛ¤?ÕÓ’ïË•œu©ÿ“É‚y¥|ZøØT\•°½§ÛļB‡gVÈqÌÛÄuc®DÌ« ³'Ò'‰yi FÌK3b^åÕe§IÌ+¿D+;ÐÛôóíSýHЫ)’ó\G½#16Èz/})áQDÖ«„ɬïIÖ«ÉfJ” {ûüµ¥àž°·Ï_·Ì "ì=U %´d`½:,86HïGÇçÒ{ʃ?%m@½c¾Ÿ Ê@½:îˆRD½Z—˜‹çD½§œó5õÊlw‹B¢D½õtŽzûêhËå@½®!h%êU-å¼*P¯\O(¾‰zû"®÷!bê‹¿˜ÆõÊ©6j:ôêŒYÅ WUÒJ› WerúFЫR ¹Ø èU]…äm½ÇÐ)ÎU)I¯¬±û§:¥ ½²±î—~ϵHzGzÃ1ë(’ôʪ>‹‘ôÊŽÞ€ˆ“^Y¹gž Iïq»â•¤÷Úî`x ½²£Oåê$½b!—&è•û¨a÷†¹z­e“ WÞïiÁÐ{ìžÑ@Ô+·ìT¬õêr³2Aï1çFÐûJ ÐÛ»³Z#Ao¿KË5#èåûçj1ñ$ç•ç}ï]F)$b^>šC^Ý~V &ä=6Ï” ä=F‚õœÔò*Ó(SyGRdò›§§òʹº½:äUBT¦b’ò‡[™“ò›ç9å=6Kc"ä=6ÏZ#ä=ÔÝE BÞC_ä3·„yÝsa yÍS yMŒj¬„¼‚ë3+Cò 5ž!E&äUÖÙ,¥EÄ;.’]Ͱ2“•d·Ð]ýZ¹—M²«ôµLò#Ù•¬/F!‚]µÓÌÌ!Ø‘í“ì¹Ó°«[Ùc“`W0iÉo`W '*ëöSôŸã!¹nÿw1Õ«\WÇ­Á„€uÕvÒ€úuõï[‚`Ý1õÝ^bÝ‘s˜ø X·ü ŽuÕ0ÖÄ`Àº¼uP]þd€ºåõêîOKW¨[Z n¿Üy½èˆnÿûÞ.çT–D·<ˆ®îqO ¢«v´Av¢[šˆî±z¾&‰nùQAtÕ6wc¢Ntõµe²#‰®~‚Þg—êD÷Ø=KDwäŸnëìÁAt³R©D—a]u›kT€ ÐíGìẇ ÐÕ UÀ Ðíoü 'B]ÝàvĨ  Û?­-+ièêwKïuÝc˜e¥ЭÇ9Ñ-=!¨®î³%ÕU›}~…ê–T·ü¨ ºeÈÕ=VÏã&Õ-à ¨®ô@™5Nª«¶ž–û¤º¥ë–oınyeÀºje»áYǺeF¬«6ëëª].³©n™‹€êê§%©î±{ž%¨® ˆeu*RÝc¤ÑÏb ¤ºÇȸ½¿Iu5RdEsR]Íú²*;©î±yÚ8©î±{>.©n™­ƒêŽA(’Áˆu%D»"ëªÐ–ID€u‘0û/ÀºZڤ㱮VYÛ=Á ±î¡ÙH #Ö=‡éÎ|Ö±®VY™b¬«éd¦)몵ìvJǺªM´}v¬«uE¦¦×=äe4_±®Ê¦e%b]eÇ?ÑÙë#w.¡´a]ç©j ÖÕ 8+ ëJ°˜Õ)‰uûqwj–ˆuḵ®êe}Jb]­ðsçƒXW«øÜ¯&×­Ïà\Wd E YrÝ~/–LI®«ëõ¥óû[&×UJ'0rÝëÍ$¬s].ãÉuÕˆ®DÚàº"1ƒÖ- XWÜÌi°cÝa/6ĺ*ÐuiÕU•Ò'Š-~_RäïuÀûÄ»/Ð÷ ¸kÝøܵá pW!+2¸{/>K#ÜU,mºwïÅG߸ûš#ÜEÝ“w7*dîÀ¥ ܽ4 lWánoSêJ¶Û4dEt²]¬YχlWª(ÉõÁvï´"ÛÕT¶ÏfU!Â]i႟¼+¶%7‰wU).‹U¼ûd©¼»§-ñîqZjCÐ]‚|‰\ vÑÛ°«…JV§Ø%äØUåÎuû¶†—j»1CP4¼[ôìr@°;–¼Ù’vAÃHv¹“\$¼”zS‹ý®Bvõgé&Jxw¯¼]4¼·ÆÔ5Žs²«²¬EPÈ.—§UÃ{ôÛš;ú »’nfÉwjx±Á[4¼Í  ïáu,Ivù:'ÙÕþí‘„hWø& ‘íJX“C Ñî®pÆo íîÊ­ØC¼D¯>‰gîí «es&Ü­j\‡»ýVú¦ÕSQñŽ"Ps~@¸Ûèþ !ØùPñRá /TÃPñʩ잻|ä»›-¨ðJª;ã9¥º&ã¥à „Wík 7^9œ¥÷R‘ñj=J€"ãm¾Xe¼.¦áÕÜ«d”ðnòeÈ6¯ÌÖœþNïéU¥‹ŽW”*A ïvŒòÐoABÑñž¾ Ä«‡¶ïªEÓ«BÇ+‹¹\ÐRÇ«g›.ÅD¼ú­ðj2Ú ‚ñ®š›¶XžRÆÛ?˜\ SÅ«N"¼ˆÈzÇÜ7j ¯2H’kRÅ«êy¾öê7Í"Ľj÷On öj®Ì„½šßá…CØÛ¯gyY ½Ã¡Ñ´N{W•Œ"e¤½ºÍcKBë¸WŸÙ–âíî•¥HîÜ÷®cB¾…ì×q諭ÜîÝ„·R¾è¸WŸ[.$‰{uÊ0A'î]¥;_¯Ù‘‚÷ö_t±=ðÞU#ž ƒ÷®ª‚ŽIä½ëmiªD¾›D®¦vä»Ê‰&´TD¾zò5©´ßu˜,åñ•1åµFˆ¯Œ¡M8 â»Ê÷-Õg ¾ºÍT*‘ø®Zb†S:‰¯†Ë5ª3ø®fƒV‰¯†“44&ñÕ°°´Ðëø*7«‰òU«ÍÖt°ø°kÙ(~ £ ñ:ËÜЯA»Šáxüá×ð¬áÞZ ðU ´D[r^yÃk²M6¨VÈ’BXç¼}A¾=é+᜗I”ä¼ý°#7Éye¥~vλ It`ì zÝí½Ò^ÒÒÞz:§½ý3]Ò4´—ëæâذk…µ‹@{· ‚nÐÞMÞɱ”*Ž }æ•ÛÖűÌ€´Wëít.Ž ÄÞÀ½BRi>MÜ»í¦èìU¿–†Ã„½›ªÊ/»IuöªÓ»"Y§6ðGìaíƒâÔêöŠ÷Ü™¾EǽBAá>NÖ+²e[äd½go-–ÈôkØå7:Çé×€’JůÏ\üzë2M ý6Õâ™;_ôkkIJRÀzEùÒм°^²I:6CбaÕŠ>Ü`½ztÓî‘õªÞFÔ½ ëÝT=æEξ>XïÈ\ˆeXoµWë½µ@ - Y¯´èÕ ë•mÉTwÒ°¡¿M´ôò Hz7·%*¤W -w5@zwߺ+† êîcø,¤—/Œ¤—¤W_C&R’ôò“"éµ ¤W ·¹uÒ{j’KÞòåWÒ»f¹¥Bzû[5U I¯–Q¥86°YõösÞ–)MÔËÏ€¬·ÿûò^kQÕ‹©ÍéuÒøAz·L/%éeÓé݆åý܉&éÕ31-@¯:ƒTô*Ãj™ŒÛ9ï¦í´Xô‘ó X½1oYêÒ©ûÅxUlÆå·Îx˦Ê…ÀxµÙRv5p¬¡QƒìÆûdüMÁxË ü`¼®Í…Q;s0^µ×à:ãÕˆŸ ©y5gÚZ„¼ý'[²!oÙœ£SCÿ9÷ô§å-» ¼ú¢\Ïì”·üz ¼:îLP^uŒ–~A§þ~Nyu¹'vOyË›ä-#8 oég&ä-£" ïXg¦Ø÷<‰¶òž*ΕTß!ï¦%Dnxò–9 ¯b_•îêdYœˆtW“Tƒî–ŽtWSÞ,zAº«XVƒ!Ý#àêwñ©«MÍ ÞÅT™xW?ç }ä»eß-/€wSâaKs¼›–([4dÞzN¼œµðê•%¹ åÕü;«eójNo)RoÌ». *˜—ÛÒÀ¼›²Ðc¯”WÌÕ[Å{ðrÊËõA¡¼Ü¬åU™¯Ä¤¼Û°§c P^Í.²Th¡¼tf«6 Ͷo@yËûåÕLê™Æò6…šivòjåÝ{‘9ÝäÝFvpt¯€¼º\d¶ò ïl)¯…Iƒ€@”r$äUÍ´'ö` ye­žµJye"we= ïv»ÌwS±{ö ¤¼´K—õé{bW¸4À‚”Wz¢#ïÐ!/]ÔhÒÐG—TY“8Ý“¨gŠIƒXAl擆Q;óÞÆ'Åž†5ÅžAJÎ5y+ü”Y«›âÏ Ê=‘Xüúp–[]ÅŸ¡¿ë(w†Kʲ™VÜFñ9®wŠÐ>+ÅA-i]tg(!Ç»ôD)î ª¤µ¥‰„óÝ>A·ÝÓ¡/¢g/ø}î «éxoӃׂëîöa{M C¸;†° Üp;D „»Ì ¯p×J¸«Z?¡¹+p÷ðÚl„»û=*©÷öóUÙ.³õ>Œx-Mµ°]¸Ž¶  ÂvU1G…íÂÇ«°]XP¶»Ê v o °]idÂ9…lWf„9Å-lŽpd»j9ñäínYsïíZ—ÿvm(O´+íFì´[à3Ю\mbb\Ðn¡ÏŽv{ÈšÑ.Ö®íj­ÛªíöqYAA»6°‹ñ½€Ý[ýHâT€]¾‚]÷ð,\·gYo¾]¨î Ùe"Ù•¹‰)˜AvU—:z¿Bvé  /zŠý®ß&õ»4««.¼Ïš©‹tᥩõ»¼e€é¾jÜÆêúÝþ5kÊ9ítaÃÛß‘)¬hÃ[®çL·xR¿[ž6¼|/ ºrŠÛrõ ª+¿4›ÕåŒT·Ÿry’òÓ‡·ÿ^i÷R|xéC8©.-uuÄžªÒjÃÛ×ãéÂàPלaÁ+KûíÛÒÝ]Ð6ó$t‹¼@·/Ò‚—/@·8ñÑ‚vìºÅ@—oD·üÔ ºåSÑ­^,[Š/åítà•ÁCR@:ð®"±+@Þ~Ók.)áÀÛ'7[úÎЗtàes×­FÁ/^¼bðÒ“N 4ʦ/{Cçºå£×ÕNÍS1àe'C^6 z5”s€—>›4àÝ´Ÿõ§‹/, iÀËœ—jÀ‹S®¯…¼ü®hÀKÃ{ð~ÄÌ€—¦ 4àeŸ¬+uEÕ!쥯ß& xU¨>"Å€WË ÜË¥/}WÀWô(¤¡ôߥý(í4Ì}Ú5 ö»Ã .vüh¿»«¨Å;i´¸ï¢‘Ð|w ñ%Íw?b澫!+õÅtà……> xétJ^:¤Ò€·Ü x5B½…ïá‰Å€—ß xÅOï°Ð§/Ê›^-ÙÒ–Ÿ¼LÕ‚/ª¢ÞS–™«Ù1˜ï©ãÂiƒ¼Lñš¼šÚ¯±ÓI^7@-¼¯ÜѓӀ·wÅ©§£o¹ xxŠUƒJe* x‘ŽX xe  tjØ­î|1jü)Ãi¿ÛŸf[Òö»ýË´$ 5,n]YŒú€±DR1j€h1j ?  Q+F x>¢ÝcJzÍŸ6%¿ÝŸmÅŸAÃ`˜O™¥?œ5ŠA9 Ñí£cÂýbÐ ÝCÅåÄß0h€ ‘nŸ·ôËÍ/°4 O·4hµ)OÅ f$»‡ªC…Ò²4@Y zo°¡4\bI³§( Í‹€ƒeùD::Ñ.)‰vi„Y úIŒºížr÷Љ;Ñnñìp´«ôìÄÅx—¾@»Å‚ ý£ÈõÌË¢Aã|çR|/Ù]¾ì>é õAv›WÈAvû0j=7­w‡t5P$­wOï¹?¬wÍüÃz×¼¦‹;Ãh¬Q“œî §ûWw†‘i=×°ÅážØS• ´«ãîØ‚%ÚÕ½ä`G´«KÊÀ‰v?c‰vÛé^ÚD»ºønRgG»=fžßD»Še>9Ñîg,ЮB™&H¶[Ü‘ÁvËãíÖØ›íÖK9Ûm§×¹Ú-!'»%d`·Dœë*”Ymäº%®[cÎukÌÉn9ÙýŒ%ÙU,÷ÃHvkÌÑn9Ú­1G»5æh÷3–h·Æí2F´[cŽvkÌøîg(ù.[ùnÿà}¢¾[c/¾[ÿÖ /¿`ÞzœÞ3«{ît³pÂ[áýŒ%á­1'¼5æ„·ÆœðÖ˜ÞÏXÞsÂ[cNxkÌ o9á-?¯Ž³sÞá­1'¼5f„÷3”„—}3 o=Î o½ oýkG¼Ÿ±D¼5æˆ·Æ óÖcÞæ­1ǼŸ±Ä¼5昷ÆóÖ˜cÞsÌËY9o=Î9o9ç-œ·çœ÷3–œ·ÄÀykÌ9oç­!缟±ä¼5朷Æç-ï ·à ÷3– ·Æ ô–~ ·ü4½œ ô–ËôÖ˜ƒÞsÐ[czkÌIïg,Io9é­1'½õ½8é­Ç9é­Ç9é-ÇôÖ˜“Þò;8é-—é­§tÒËÂÓ$½õœoÒ[O稗ó_¢Þzœ£Þz)c½5䬷Ä{Ëå@{kÌio=§ÓÞsÚ[cŽ{ëõ÷Ö˜ãÞzNǽ5æ¸W0»àÞzœãÞîýŒ%î­1ǽ5f¸·†÷Ö˜ãÞÒ÷ÖØ ÷Ö³9î­1ç½5æ¼·LÇ÷ÖÜ÷–xo9ïÅºŽ¸·æ¼·ÆøÖ˜ßòt¾5æÀ·Ü¦óÞ2ßï­·â¼·Æœ÷òVÈ{yy/yÁo=ÎÀo 9ø­1¿5æà·>ÂüÖ;tð[qð[c~kÌÀïg(Á/¿)‚_®ã ~Ë9~ëq~ëq~9üÖs:ø­1¿5æà÷3–à·üD¿zg© ø-Ïð[Î ð[žà·Ü§ƒßrð[~Z€ßzœƒ_ò†~ywßG~ÿ2oÞöüôk•ך(Ý=ulEØ{{‘a {Åtgí(ÊzûäÒ`%e½peþðl°Â_Vð+²Þ‹µÜþÒ©’ð—úÂ_¥ÔÏTá¯|Öp¹úðl€…0t½P‰ýv]oÑàR×+7°Q+º^$P]/ú¨ëež>…½»R«î`Æö"íª{‘¾œÂÞUùãá~Aa/Ì”Hå3–¦«Ä¿n`úKc*â_™_Ùe½‹rgñ/í‰i`DüK—1â_Ýgï…:þ•ií:þ¥ùñ¯ mæÃVüK/â_ùîô6»|:6 ÷+ðžad¿zðüŽÉ~eN˜ý;ÙoñPtö+g-óPû]Gm¿výê.¯,èö»ª|Büôf¿4kú•+-~ëM8úíqú˜:úí¡Å@¿ò‚ʽ0¢ßòª€~ûœà2Ñ Ðo¹M _Í%šY8úí±#­hˆ~W-š[;úÿÐ qýwQ ßUE,Ì€×ѯæC÷̨ø]EX² À¯ü@3C‡àWVc‡Õ!sð+‹Á—ÆÕ‘Y¹6C¾ô-+¦¼ô+òUÌ|“|ûƒõQ 4ùêR™®Cä+M“ù®ãËJů#ß[ï¬ÌãÈwÝݧ©˜òöWor: _·EJ=h: â;æÒæSÞ1oëxòn^'“ÀWv&gðÕ)] S^XßøÊѶ¥TÀwÕÔ6½|õxYÓžž¼k–ì,޼z¯éø Ú[|x½B}xGõŸéjJ^LYëÒM®øðb1T|xOM R³ ^¸JÓ‡×WsņW“¿í)lµáMé_±áí³ì>{xÏ‚‹ /–lņW¥ƒÞÀ&¼¸Lx‡·OúA8Óå‚¿˜ð6•ÿhQ‰ &¼(:ðaÂk€LwQŒõ/™®ü?²ZaqáÕk‘¦¸ðj*3 2]ÑØþ–çÛt¦ÛÏßR_lx5èÝí½Æ%Ó¥ýRñá…WÕ`º* ’ìïóáýó–¯ÿù‡{Ô%Û¾úßôÉÀøãÿ{ý±½Þf¿þôÏø§ïaÇÚ[ú‚hxûÑð7oå»xõﻦïæ¿ò5´÷“Î?/9©ýë_ÿ¼çÕÇŸ~èµûHÏþþó½þx߯_|þ¹åúý‡¼û÷ÕÇŸ~èµÇû~_ýýço\ÿuÆ?ÿ÷qûªS[Æ_§ÖúÇè5Û¥ú?¿=¾ÿÛÚÿwô›þúù—¯ÿý_úHý7_ÿçëçÿõ‡¿ÿù7^çZšL4”&Öö7_éøŽ+ír­èSÓ¾¸h¿ýJ¯ô·ÿ°~­ÃØëëçú]OØÄ‰Ë³ìžîÖªÏr°ÎöšFöÏrÚ»v(ÓÄüé]¿9Ú<é`ùŠœ–iÁò«gi–ÿ-Ûg²¹glM…¯ÈÈœžLûY|g› VûMǨ3ÿþôíÚ¦;Šz+–ú8ÝÿÍîÏ ð/Ë‹,g÷̦MT¿å=vÅ ¦^Dü|I2ÆO°:Ší±u;§û‰b¦Ikùi*v¹ÜÝ¿¸Ø=z#ía§jã‘x(íRú0EZŸõq›Úþ]d¥1ÅR&¿Ž,ý÷î¥íZ¿£œÖ<^eb¼¬zöȃ<­J´4wIU‹Hû«Þ*ž¨¤˜UGïÃÅCõÆâvgýËÔ¤àEb™D¦Q¹ÉËíA„±÷óÝ>Z‡ó”ö¹B=öhÊV0åÑn·“Þ1DÏûŸÖÇŸ°Ê|ä_˜–r‚ÇQ˜í‘³Xj/O™xd •N‚ÚM ý¡bæí5„°³2÷#žtÝéÝ]’òGî6éèצGEÊÓbéøI;éïý GõÒÓ½ïrÅÌX랆\~æ—5okSßðèź»°Éx}´imt¹ùч™–yr­«rÅÌÇJ‘¨åýhÚ’‰ÖéPö¨¿M÷²rÎÝkÚŸ.ßí³F÷ªso¸G¶ÛÜϹçÞb¹¤÷Ò{“w©ç¸Üñ6ÈÏðâ}Â{Ê왞a¸‘öEþ»ÉT#{XéÁÏY[L1óÊÞÝuøÑ@Ÿ6]ÐÚ>š©b0ìL¸G+†ì\µ£¢>1³©«ž-eý¡ÜU¼¿¯;ª™ôEœÍ]i9ú(Y!;×rÜIÿº+w–ûôÒæ®´Äì§rq~y-÷Ë †lýÝ”ŠÙìuÑ.f¨}Ae³×UZâéCôôUÓá&æŠÚ›œвý÷U“YCò^îÝÍÚ`@Û×N4É79?™M_)ç¾¦Me¿AŸ¾ÂXQ±Ç¼~½¡ô5U:Ók´>föXn|ú [ÅþªÜr~°Ï³º"¬pûë÷ ,lU»á|é1W\Óøéë&›ÀöÇKåBo >ÝRú¬ØåØUž‹sô Úm¶œ³ÑÊØÙ×>˜šI®å{ëñ ¬;$êsò ,LGðqÇ3+ÖKïSXyÎMý«^>M=ͲwŸÃ¢ÕöàÁ WJ«GðÁ¢?ÈéYŒà=xùLE6ŸÊ¢ïèAK¢Ÿf=6™E¿2b·÷·æAº.ÒÖzÓ ¶ÇVŸÎâûëÁÍç³05–Ï­±í›ÁÛ;]ÞÏáSZô=xúœv¼’ ù¤¶^³Ñ“ÚL~GЦµÒ “ÁôàýÓÛÑÙº˜þ×(›;^­È|fÛ_êåñzp¥t ÚÜVtë­áê¡Í§¶êt«±wŸÛògTâU1‡´AŸ<}r+UÎ3ÓÅz°w ýÅô!òú*†ê#x{lýe*#£Ùg-ÏánëZíOf=6½ ¿ë>ì*£wDý'ý¼”Îáý¯Šu¾çJ=¸ê‰ú´á9êÄF£¹Oo1žõ r¶úO¨ik1îAø–·6&ÑúñWîyØé3Üò[hEÕßÌ¡­ÖñjRù­3L){ð–®GßÊZÞÜSßÉ÷¥JÎÊÖÿI†$ùï­©E ŸûZ ­YÚ’ø·OOî'¬‰‰ï±ÂŽŠíà¿’\d…;òßöø ˆü·ÇZÖœ#ÿÕ.¹x‚ÿ6ÉÉ£Zùo0'*¶+¶Fgà&¾“Έ€kÈp¥† ÛGÌ<‘š#.àÖŒèÿŽÌ`3Àt>$î³_#AÀ!g9zð_‰ éÿöع³ ù¯²HÓwà¦aâ™Rr`y,¶ôënçw #¦Ç" pNÂSHL?D`z3’Óqƒ˜+DÀ̱&fn6pÓº<„y@À=tÛÕŒ·Q£ý½B¬¼`CUKq³Ú#ÖXgìŸÀ{#€¸ ïèiýK¬•œÑàë'I:¼Ó@ çt¬´ç#²}I€Ëä7`‰û_Y'/hë¸ Oí)ඃ€ëd†€@€ÛXŽÏgà;ϨôKÜv `ÝËõLI€ÛÌÜöÌ;­¸í`,€Àõ8‡ÀMDäHë^²T pÛÜ·¸mnN¬|ÌY$¹‘‚H¬Ë].OÜ6ó•'fÕ02`±Ž3êf’×[qÜ6/ˆC¬[IÇv2à¶yuºÉ€uû6kÅ·  ¸œ ¸m¾$î±}‹êdÀmuN\nX¹;³|°"Æ`€ËÅœÿ¶±Î‰-ð_=Ûßà¿z4CgÎë8ÿÕÓ)™ü·Ü ø¯bÎGÿêá²øùo=§óßzœó_¼fÐß¶z&Ò_ѾLå!ýÕq}Âö®ŒIúÛV¯7è¯1ŒTþêŸ×ý-/ ôWÇóý-×n«—–$ÖqYj“ ¸ž^`}Ùºš n«W?!V^[æv“×sn+ðCàzF‡Àå0@`—¤ƒ¸©´ù3çd¤À‘ë œ®:Ö[ÉTCR`Ý‹Q3Pà¶z¥RàzœSàòË‚×gQàz6xW–7鸴KPàòÔ Àå.@ËSƒ·…¤×)p[À4AÇOÜb_XÇea!Rà¶TÒk3ر MU§ÀºÍ, \r ¬;1z <À0áï>LËZÀð½ÆTð ·'ö˜†kÌÁ°b}ü Æ Þ]çTÀð.µUd †Á "FGE0Œ±¬€aLÕ ÖŽV¦Œ Ë(ã2‰¯ƒáÞ~¢oáÂø FF.ÜWí6ˆë\X«‹31´sá~“ûvìÁŒ ËVq[#™\X&&iý1¹ð%—§ô˜î±þ§ªÙ¹°Ž{Âg–\¸/áZT½&î«´-«i órÄ—œ Ù‘X¸¯4÷üvˆ…/5ú _2Ýj©Çu,|©wK– ,ÜWË—)] _šÉæìѹpõ»œ]=Áp}Ãçí¹ë$ÃÃç   2ÜÏÙ{̘¨ƒ ë­Ü¹$î׳Äv’a=y »#’aÝKŒ#¾´æÊy0Èð9ŒçF Épÿ…–+™ß‹ ÜËÜ"V¾ÃcèÚÉð%H˜L døÒH·N› ’a5Ù–¨dXͤ_k²áK+ñD9 ד Ž@†ËCƒ «ÉîIÅA†™EA2¬Æ×¢‹V[X’9î¡+w+I†……bL'.-`øÚÜ”`˜I'ÃJ,É I’áÒò@†euƺ…døZÜŠd¸¿ÌTÚ «Áf%F‚ayVõß`–7V"pš“ ׳9îg ç¬ †Kw0¬äâãIQ±ƒá~=m®ÍOd¸tl ú—ã™5 Ɇ/-DÞÓ4’áSÆYù“ ‹Q¦AÑp™Ã*ÐpyaކåB¶]Ae€†OÕ ï4¢á~Ê#ÔDÃB£½™êy áSKÙ;Ðp¿žù² —{V÷;?é ‡•e”SXÂáSö4³ž*Ùpÿ#Kè‘ ÷Xÿ‹€¯bçÆÁ”–ƒ ŸÃ5Xذλ·ÀÓ`çð^æ^€ ËsÎvEÁ†õˆ¶s 6¬[·\°áÒÀ†Û͞ȆûëÞr K6ÜcGo‰³£î÷y¥ë>ذ~yÛGr6ÜÏhVDdÃz+iÉC6ÌßhXÜ®pDç’¹L<ìhXwå¼ høÃuô¡@ÃÇãþnDÃJ 5?аފ ¸†•××ï`îW «æ1Ææ¯$Ãýdæ¬J2|»§©R">‡íT“‘ ÷ ݶ2,wÅþ¶ç”dø”«Õ=mÉI†õ{§Épo•fTH2¬Ž%+…“ 3™‘d¸ß‹™j‘ Ÿ² 3ükdX§´]-'Ãçp ›û³Ã§q« †Õk®±¢$>5ÌçöÀ°~„ÔÖƒ Ÿ»û“ ËÙóÞbJ.,³McÛàÂçêökäÂýVÌ+\ø\Ü‘\Xgs/.¬Vl»ºàÂz°".Ì»/\¸¯¬m_¸ð)ƒÃùí,ÌQº`a¾Ì‚…U%7©~Á§èilh,\ïÇ©ð¹¸e¡Âý¬f¿L*¬“æž+©ð9Ì¥[Êœ «J0h©SáS>¶±¥V¨ðô#3¤Pa}I¹]Z¨0«$*ÜûÕÞé͉d¡Â‡&ÃË•7äTøãQœ kJtÇl¦Pa¶EVh¡Â:­ÃØ7®¿°°‡{Ÿz›‚…Uñ8Ý? f>uáÂýš‹#|páC~ûôl+`xÌú\>ìdøP^{$·6¬O¼¨€“ ²³ß·¸GÃÊþîßÀ@4\HGÃrþÍ­I¢ae¢ß1=.l˜ùæ… ŸÃWó~… «-_/áêfð†YH»°á4wÒ†õÒÓ•³°aeÀ§éoaÃ:mº:Ì2Ü…÷wpÚö€àðX`Dˆï…ÃYP{íÿþwa/ýá}B’aÍAó¶I†…4ZˆÁŠeÄ0!œ¢o’áóq³t’áS7.Ù$Ãç¨?0ÏbqÀW“–½g;×ÙëÏÿi!kÆØŒ£eÆH‚áCU6ìá ÃÒ: +†Kév€aöÅ2¢Ä WxFÜnG^<#”DFžôŒ@á›âqšWN±ŒÐ9Ò&dšFhz¼¤qL#ú-2²‹iDÿTR[L#$M`FÓYÇæ4ž¦ZŸls´(¦»:”X×Ð4¢¿¹%e4™|fýÒ5Bƒq”·(®’ _¯’a >g*uá¡”ß\ÔÀ3B“¦Ôé‚ ë¯Ìצg„f4&ƒ…i 8@†Ë9A†õÛ¥ûjqà#€ š+G‘U’aÝËv:V³ýaÅŽ\pƒ «1/k:X 2\ÌÉð1~ÝHãþŒå¤Z ¨EÝ’aA0§±N†åŽÒ¢ª Épõ.ôA†…Èr_…dXcpJL†Y‹êÞÜ óË.MÈÁp}8ÇÒžH¬.WÖÐ'N —–2Ì^Šd¸|á ÊÙȰîeME:Èp±¨®ÏàhX]ó’DùE†Õ‚l dX-oË.Èð!Õy —A†O \ÿ¹áC/ÌT¸„Nv | ïH—VyÛjîÿpÝR $|h&ÿ+jaÝg›ù­ Âõ Œ+t¤^D¸æDX7[¬Âå”Â5æ@X§\¡¶)rŸi¯¦ìu |ÈÍ?M„Ѿw‘"âàzFçÁŠ]‰“ÀƒõËm)?_ÿ¼¿ÓUƒ÷Øn‰àÁ£üKî—s‚—[.·¬–ûD’3ypi¬äÁêšÅ/B×ÛSM ¬ãvÓ-;fcu,;ª%•£Žƒå¤N8X±,‰G\<¸´ðàCJúPB«‚H3 ®ó`=B_N÷òàÍɃu½Ì'®Ç9Ö½l&îu|¬®™@XMo11³aíIA6 #.MÉŠÂ0¢O»÷LZÖ]´´ÉVg›jáò«‹¬DEáò‹ªød`÷¢ßÙ’ÆôÂå-;VK·-bÂÅ DøX=1•DXwi{Ä Â¥€—H¸Xz óÉ„KÛ.­H¸¸Ç —W$\¾»®'s$ÌæU°Þäm@Âz%GhÑ æWR˜0_Je««Î ¦^aÂìW æ‹)P˜sƒ…õ!푳B(üñ$…Ùè ®¯P¸¾<@ázZ@á1ûrë@áz$ pyy„¨U 0G …áX¡0FÇ…9æ S¡pyq„ÂåV…uäiö'€ÂüÀ+.í•P¸> stream xœÅ½Aï8rí¹¿Ÿ"—6ðœ–DŠ”ðàÛ€Y¼yµ3Œ^”ÝeLW5<@óõ'ŽþãœÃ¼Ýuo¡óÁmÔÍŒ¤DQü1xâù__þß/í}=·½Öm[âÿðc+å(åm­õm«ë{_kÙÏíxûñ—/ËÛÿ÷ÿKüã§/ÿöïQà?âŸÿúe}û¾,ï­î(øÿ}Y—·ÿëËÑûû±œçÙ÷övôý½ÇÚ×r¾ý¶ã½­aÛ×-lëû~lDZ÷úösØÎ÷º…mkÇÛÑú{_Ž}‚°ËûV¶l%lû{?êÚÏv^¶õ}©ç÷=ÃVÞKm½.}»mǶ½ìa[ßk/í,Ç~Ù¶÷Ö¶ëe[Öõ¨Ëñ*WâOöÔ¶¸ùYÊÙ.K}ߎx¸c‰šìÇ{­m©õ|•Úß_¾GMöþ^Ö¾ôÞêm;ÃØËZ/ÛÖöc9Öõ²µhí°-q³½½×õ\Z/Çeêï{Y–¸ÿ|Å#5LíhW14׺l¯g;ß×=lµÆ³í{ÔYÛz·åù~¶°­g¿lñ$ÛQ^my.ï½/Ëzî岕½`iדŸx]aë ž.¨n½Æ¹lÛ{9ÃVÛþ²ñÎýuÍò¾-aÛ6Ôe{N°=¦­¶³£*ñ®¶Ozî·í@¹^¶«ØÞ—ÒË~=ú½*Ze©Çq•[ÊrÖúzug4`4 ž=lK¼ÿµ¬ñ|—-Ú}¿~»×ç…žÇû²?ÝuÇóG¿/K¹mG{ºk=ßK¼Q¿ó½õ§»âŠ-Þ ñ^§·†i[ÎE/ÛúŽNþê­õxïÛÚîžÿ¾.OoÛEãÇuú~ÛÎõé­¯·‡é|ÙJ\æé­µ¿·x}wSKôôW\ó\ë¾/¯zâ?ý5ê²Å+ß¶Ú.[ ûÓ_ãš[)û±Þ×lуîþÅÚ9>s‰ïö¸ûk˜–#ú纕Ët¼ïçÓ_ãŠñÒÛÝ'£åÞïw¶^åÖe?®×s®Ñ|ëÓ[ÃRJ;î×s®ñæ·§·Ö¨Ôs‰—í(OoE›´x–hõË_~}zkØânñè×Wu®Ñy÷§·Æ5·ýØŸ‡[ãÛoOwõûŷߟî׌{í×E^¶ÁîîŠkÆèßÕëùZ´ßÓ]ÃÖc<)[•‹¾Q´Á^öìãóò¾Žá7:êv…°cx Û±]]æê@[tû1¼†­¶r,m}Ùâ³ëÝo‹Ï~ ¯uÇ5Ÿ6ÞÊû:F×0µîÏ#Lç\ãŠ%zÈÒîšÔè?£»Æ˜ÙÛóõÆ|õ¾á5l[¡ÛE¥ÇðŠr[}U¶è½Ïð¦^÷_—<0vÜÝ5®Õóá„éëµåßþ^í’%>þ1¼âš1|.×K|úcp Kt‡è¢ûUÿ²á[yºkô¥-úkÕ$lÇ\k}Ç«¯a¾lñéÁåz\ò5­œ%>ý1ºâ´ã5æ%>ü1¶Æ{­}Aê²Å‡?ÆVÜíù^¶sŒ­¸Û²–˜^5‰/ÿU¿˜AÊ=©Ç\û¾Ñ%ö1”OspwÛö{ʈ{¼¯cl+Ö5ºõkÚ†íƒkØâ…-5|ŒËnK®ïsCÿ¹lñÑçàZã#ŠÞZ-˜tŸÎŠf+Ï8â29¸Z¹x \Ñ­ë«{¾lG®QÏóhÇúd¢;µ\kŒ´Ñe¶×cGƒkEƒ]]æ2aˆ~z+ümL1²/9¸†­¬ûíCÀvæàZâ+]Çx /‡Wm±˜X÷^Ãód‰ú²Å—ŸÃktÂðî8þü}ÍáÕîW²C]¶tÖÎ=ºN¯ñJŽ^Ÿo8|‘½Såf ß§äðZb¨ŽÉ>ÓšÃkÔ3/tÙŽ~E¾«}†ûI¾k½¦Õ1;‡‹˜¾«}¢áZ¦ïêå6ò]ѹâsŠ©h»m黆­Eçyfîps®ôYµJ®«ÛÉyEUZ,ˆÀ˜±ÂXÙ‰µw‹'òbá ÷ra˜µÙ‘Õf‰Õ9²Ö.a$O6^~-7û Ûñòd½–'»²Ö”±‚"göZ_>x'Œ+»³h‰c¹IÍe$èö,‹ãŸ;´[¼šu½È0öh­ÇB*&ƥ‹}Þm, È•J®ûú<], –h˜ÛÜmÃH¾,êTŸÅt±¸Ž’u}µsk­Öí~A±ŠJoÖ¯z’;»a¾|¨Åý9:S ;ûñ*ÖèÛÝ%Â(m´ÊÃÂB´J´Ìä«„q3—6‡é0Æ€ͳc)zyóõÌN‹)rje¿ŒñÇñò××cñI‡×x¾Œ»ºµòöcEµGûÄTr·l;×å®kW·–ÆÕ0áŠD¹õžo@KëwuN¿ÑÿþNè»|ô(†µëBôíñÀë3è)ô Ûïæþ¨ú†ÓtÄã./ôÉÈW<Räë6F¾pµâ}ßëvE¾aêò„ú¢–1Â?XT ïlK¿9lk¼¤{Hè‹bÛ a}Qj_žF¡oØ¢w,Þo}ýn }a{qbὫû=!+ó^i~¡½¨D/ÏȪ´×*(¸w¶¥ÃwÛÃUqÿecÜëµdÜ [,H^3ÀC{ñËXãb¥ðæ¼¶u€Aå½úÈL{Ù¢°W-Œza*yÏŠzaK°¯¨¶öìK(é5ƒÞŽÎUë³tЫ ¨ ×.Iœ¯$Ƈ´0çµBŒyµ}óZ‹èEýsƒDAoØêÞŸm#½~M½~M½ýBç³’Ð+ïT0/r±¢xú‚y½&ŒyõŠ䵆È‹Š×åž©•ñZÏÆk6a¼ncÆë6f¼RsA¼ZIA¼ÖL‚xýfŒxí’Lx­3á…)^àíáõ›1áu^»¤^ª”ñJk áõ+2ᵚáÅ Ö³†Âkm"Œ×ïÇŒ×ïÇŒ×˽¯ÿ–/¾¤X‰>tH ¯•Ê«ƒˆ@^/Æ×n'×L /‘¥ídÈë÷cÈkÕdÆ+“"^ ñڇψ×jA¼^GF¼h“¨ñƒ¢ñÚ (ˆWŸM¯cÂkUÂëUaÂë×dÂë×dÂkÕdÀkŸÀ~ÌWbÀë΀×Ê àÕßµ‡¾kÅïúÝïZ ßuó]{:á»VÆ»þ„wÝDx×MŒw­’‚w­œà]kÁ»öÝ ÞÕ‡ºë·cºë6¦»^•]U¾]S_U.ÙÕWÛñ8«áaϦšÀ]/qª³Ê¸kåîÚð#p×mE]V³‰ËÊó½À]$…íú%›ú¬<€ Ûõr]œV¾Û!>«:Íi̓¢]kgA»Ú„ìÚP.d×>!»öÜ‚v­™íú5›z­òͼVz„®N+÷¢Ùõ_žæ¶æ} ñjAC¼“qU×U›ù®Ù^Æx'cUÿ5VîÛÅ,&Æ;Ýs7V.Û̇•’ݼX1_scòêÛ5ÈëÈ«}ÆhïTrS_VEÝYi¡½“‘iïtÙ]=Z-ÙÌ¥•’]}Z7ž_yï~}ª[+¯Cx¯—Þë!¼w*¹©o«%‹:·Z!æ½:‹øõ> äwª“_E=F~§’‡8¹j;ŞߌâçÊS ùõoDÈïtY&¿“±š³kÆ$¿“‘Éïddò;»9¼Ü„ýNÆ©ã|û](à÷x+oÛ[I¼þˆ¯ÛÞ>û­1&—óî¹F€·¨×ئ±°_Lßmá»ö«|l›7¯•°_ð†²?üOÃ~÷˜Ž'´W‚~q‰ã yT ¯>ö·gma¿1/Ö£ßñlöO€ åí¶iØ/ö}[xJoöÛrŸÂ~i,ý ìwmëqïÏYØo<ÁÞ·;æËÂ~k<ÝC±°ß¼âkÙ–ò"¬ö»½ç†‰EýÆÔÑ'Œ…£*JÅTŒ·ù3Fý÷q E¡ðZž aÂaª´¸&Œzä΢RáÙ–^´þP*¶ýkTu¡•§Pá°Å_mÏ×(TØŸ©°×åE…ý·L…AékÞ±^:Å÷¦ó¶-ÞgôÊãͱ0¦ÌxÖ'’J°pØbàQå‚…wLÉûs^D±0î·fì»`aØÚ¾>`a\3>î¿Ì\'zÎ ê0åúŽŽñê` †ÃÔz±3Æ¡gnoGÁ0®¸e̼€á°…‹tÇ€ Fý÷<” X8ê0Ãe.lï@¸°µ—pa”k}ÌdÂ…ñt{³•pa¯'saüaŒöÏÇ*`Ç£¢“Ý.Á†qº)Ãnc0 [¾iáÂxe[iwlƒ‚a´ý™§ ã€VɳB†áÅ•ŒÌ2ìå˜ £Üq3F™VŸJ©°=Sá1Æc”¡0n­~ïã ö{1ÆŒF(lÏ,PX+"LxGˆñsØBˆ°½AÂ;ŽDo¹‡-A¸b̺Ëñ¾0-ûö9!¨c+EÂQ.>ëã)HدùBÂöÄL„Q÷<`%@ØF2Â6" ŽKƧ?Îo–¬ %4ßvÏ(z¡ÁzA†Áúd‚т5ÏR ¶z öRÄ‚aÚò‰°`\N5 ¶ž#,Xë/,XZH°5•`» ƒ`/Æ ¶=éÞêžað‚ýšL‚Q®fØý‹[û Ö>o$®Ðl#Áa¼V”Ëcd<]–I0.âÞ—7¬ÜH0Zz§¨ŒO%™£¶ë8Qa$X[ÛH°6©’`Œ¢±rÈb"Á¨NêF‚÷‹Y^•Ç?ûöŠÉŽ?xýó¿¾üþ{€s´íù&±ÆÛWb?®ÌwQîo¼)ð~ì¿öM×ø<öçYŸ–ìŸPƒ¾Œû_ÿüÔ»‡?•Ïÿüð¹Ï5úýâŸz.?ç <÷¿þù©w5úÓñï>¨Áëš?ý™šÄ²<æ ¶õ¨o1Y…+}¹\½¡RÿøÃ—õ*òwkü¯FÅß~øåíßþ&Æê¿}û÷·þõË?ýðkïTÂ…Õ{ï=jø«ïT¿ãNះC]5†á_§¦wúû^ßV¬PÏ·~ÿ-OØwÄ÷¿ír/Q™sߦû6¼³X^wþãÿ|ûåíÞÚÿxûCü§êó~ʈÅ_½ëÞsÃ§ŽØü¤±XLlôi“T¬\ÞÛ½Ù6Õ‚lŸ4ev ÞWD‘׆mŸ5ã¨üT²}n缄…/ïSÚ⺻ö­Å§ö“ëëÔ~"µùÜ~ò+¤Ÿhm¦~rßÀ†Ç¯¿Ûk“=îÅÞ—ÚcPæicy»Çîožô.å8ÞÏûÎíƒÉé|î²ýÆÛ, µµöXKæ6õ·Ý&ç×éãÖ—õƒ6·i~›¿p‹†íöe_÷XÆšïemvŸìøÏu‡ÿøé?ùåwúÓï~üñÇßýÒþøßøm³êÛ¯:o‰Ñìý~:®À¬ó}eüEù¦1ß#Ïõ½–(Yúzãt†KçÆÁì±V.tÿÄ¡îë+€q}/±6¤'Œss¼·²%Y8ê&Æ5%øsÜMŠ[Æ*÷¼vo&p:öeÂØi÷¢Eíö:×a<ׇIÜ/v!>ÜÁ€¾_8‹yÕ3·0çPFÊ×{Žý0‰xr¬GyÊ]—•71ŽwèEÝ—ñÜ••¿0n¼Ñðö[Þ&Œå}ïƒJàƒGƒ$Œ•÷2d“ Û2ï¯npQ‰¸Ìz>üalºÁ<Æpj,Pµ<ia캡Á((zft¯%¢;Å'ôÄFÛ¾¶4ÖèQÉòê’ñûxuOgÆ`[FÃe¤mèÙ1-'\ãJÇÓ™ëuëÑb½Æ2&qÙuÄS„±¼ßÏðÒ<#=´0V2e¼ËxæÑzð›¸Æ]µøâŽO˜BiÞ÷b͉ѕ·Æ²&ØlÙ—Ñb¹ºä{Õm€‘„MbH[† YcËCöz‰"m­ù­†ãTư\±±Öž(„ÿ°ÈÉÏ·­ÎŒç–MFa-«œÄx•aFaŒ·3†å©de•¯PŒcXÆfÄÚe$•“è*1æ.—ÜÇPȉߑ“Œ€´BÓx_Œ@.co£“†ë›N2ú{X{!'ÙŸ Wr’½µ{%',{kÐÊîÔƒ—N ,Ѹ`£.êótDè‡ŒÑØ;b?ÈIŽ{ÙtÓI.¦]Ú˜æcY1œdìae¬Q¸K:ÉxÃQ…"r’ dUG<0r’£dÉX¯Ë˜N2bi?0{r’ýAŽJNrLzíB¯ÇNNr”ÜiÓo=9ɨmÆ=„±““Œ{¶žSvÓIÆ=Û ãANrA˜pîÆ{#'¹\ÃÚ1¼áX½m¯z)¿¿^ÉIFŒ'mݘN2ÞU ~‡q#'9Œõ ?ÛN.2L¹5¹ž•<ä‚…bÆ ÄK ùò#2Æô£dåK¬èÒCF{oC+3Œ<äé!ò½IcÕ–2¼ž˜†W~žé!ãEéG=ÓC.WXŸs$a[ÉC¶÷´Å¢-]ä( ð;Š8l…–OÛº“‹ŒwURòn‹%[ºÈèk"ÓEc4èÐÃÛb=—.²·Ûz‹\®pŽáW„L.2ú-Û·m!9s“V¸zé!ûsl+{ÈQ«Cc/Šê;—‘œä óW~ 9&Ç_¶ÔKÜje'ÙKîì$G£ÓŠ1”ÝäðÒ£=Œä&#Œ.ÉDøïì&Û0 6r“íEëfMÜ3*£Ÿ_§íFWøLðÚùVä’ü¾ó¡7¦^ÿ±PÕᶘ{¯˜ªy‹È)_¦‚êãZñŸ#KA( ãž PwQ¶3@ q”¤á¨M{AµÊR žjË€zªÐ®-¤Oi€z2v•h‰þ˜›4¨»ˆM* î¢!§€Ú›@µŠW žJ2 î-‡ïŸPÃÈ KõT²ªD˂Ȁg8@=›J´¦@ÝwœÊ]NÔØ–fn&€º‹çÔ¶•­€O>Þ˜°POFÔxˆðwŸµ€êÉÈ„OxÒn›j|!Ô0ÆW0؆êÉÈ„GÍVÚqD £PhFÔSIBÔ|6™SO6æÔ0æA3ãÔ@šy×8µ·pjïX©ý)…TØgRTO%™TOÏɤÆ<Îj¤z*ɤÀ°¤šó"þì¤Æ x2RÆ=²©†‘‰¹jBªû®”^H5.[Fú+#Õ0.#Iš‘êéÙ™TOÀ¤ú#¹Êþ(DªÝƤz²1©ö–RíM ¤z22©ö–Rí'¤z22©îHÙ•{é'©Æ=󜴑j×3©Æe3!¤‘j¿çMªñk¦øBªý‚Bª½á„Tûã «ö'XíM.°Ú_`5j›Gë VO%Vû§'°zj†ÕÓ= V[AÕì|õ7X=dX’ð0ïUˆÒjiHiõtO¦Õ“‘iu×<ƒm´Ú:–Òjû.•VÛ«VZ=•dZmÝë¦ÕöŽ•VûÍ„VØÇçVû ­žîÉ´zº,Ójë:J«ýE ­öw%´Úß•ÐêÉÈ´ÚL«q~>W÷ ž rW{w\íO)¸z*ɸڛ@p5¤žepWáÕ“‘yõdd`=µÏ¡þ²–d`=•d`¦%Fzój„W{_^í*¼ÚUxµ7ªðêŽS’OPÃÕÓ-›zËkŠ¡®ÆÃ#>ñÙÍ^íÍ&¼z2žê-ó&»ðjkoÆÕnÚÔW–/@põd,ê+keW£Y…žïê+ëe›úÊn$_9.ûÈŬžîȰÆvö±ù&°ºWÙ·WXíor_ýÕß)Öo3œ©õˆ(œ©uxî@‹÷úШ5Üú³g´Që~V u6jÝc 0R 9µV£Qëž*qF­'#SëÉØU2œŠ¡k]­)»ÖáBÙuÕe×WLjÆTt]%TAÉuÑP%×áêìKË %ט•‰?*¹Æf¦`2r ¯vdÓ2p-¢–®£àZ’x)¸^egÐÀ5:Þ’ Ò\C3{•ëè¹ Ej*¸^dIÁu¬hk ®ÃØ Åx ¸c\6£Q\£vOâtãÖ ÛPgPnm' \C*4¾µÁl\7°è¡¶gàB¢m¨Ò)¸V‘Q×°ÕmK !ד‘ÉuýeàËäxâ ä)äºAAd¤†2rÆrc ´Ž+–N L u~%*ÐuÙ  ¶þÀ˜n8”¤!_ÁÖÓó_ØEÖ)VlE¶úȺµöçj=™ZãäÉJŽB­Q²‘:£Ö LÑ¡ÖíR-Í5³PëvIC%çjÝr¹ŽM_¥Ö@^#¥ºAë° †Ö¨ÎNkX†Ö x•P§@ëo`ÍpÖ ’¢9q+´žJ2´Æ=±Éú„ñ ´nðz(M 5žd!ê Ð%OšXZC­§×£ÐºáRÔ¡@k\–³n¤Éôs2k<ýó% ¯ö ¯î¬)_¤¼úc:à(¥¸8áÕ-~ÈüÀÆ«Ž¥†ˆòê0vJª¼:î¹e¢\ãÕ ¢*#¸ñê(¹d>lãÕ™ç[y5ž$3+¯F9ʇ¨¼úcúàa\3%®ñjÜ2ós¯Æ3J檼¹Ó!¼rLjT\¶£t‘Š«£`¥Þ˜uW£æ”CWqu7Jª¸ÚÛ¸ÚÎÖ)®†|ò^Îñ­ ®F»PzNÅÕ¸U4ÙÀb‚«QÉ‘ SauƒˆOfUXÆ’i`•U£­)°²jÜN ªÆ)Ÿ­¢jÕl6T=•$T­È Z%Æ Tã~ýÈ-BÕè”–WAµªš¨†qé9û ¨nUª¹g) ÷äS^ª£äzfH”‚j•Â6PÆBK+Õ¨-¶r> ©¾¾ø#7¥nPíUP #åPím# wãSLªñ´TP=ÕsW)p½'ƒjÕô6PG¡'Õ8Òš±IÊ©QÙøÊÇ(/œJ"ñŒO™9uØš…SCš¼ç™MåÔ¸c_Ǫrj êgy„PjÜ·”Rï—g¾à>¡Ô—dëš(E(5ž±ï¿™ROÒUüÌè-…ÔY}ÐMÔöŒ¨QÕX9&j]neð¿A‚¨ýV‚¨!^NGØQk=PCš=&ÿ¾*€åè £jHçRàšjM¨§’ ¨Ñ‡[®)P£¶;!8Ôh‚ÌE €:ìg.8P«à½jˆÐS8œj<å^‡Žœj•¶7@&ÈséʧQ×è$#ÔVø´ŠÔŸV1wãÓÞ·„OCZžŽG*Ÿžž„ù4ܺ{¢¹Œ7 Vá{Ôþ6PC®™‚PC>þ Pã]íÉP{ïBíO(„ÚŽû+¡öþ!„móäJùÙ 5Ò‰Pt¢j72¡öÇdDº¶ʨ½ñ„QC+?³w£öÆF­yQŒQOÂŒZ £Fã…w5Îr¥6}¥ÔÓ=™RKNƒÔø~(šô‚Ôútß ©3Ûúÿ-„š33Í E`™ˆk|lšrg_òQ•P7LbÉk 1â,û8&` ±ÞÞ(” €D“(6A8Þºå=M„·&¢B@tí¦ ¶tB 5ö%O˜(¢npŸSGXµ{̨§ËuÄÅ+FÅÆë·»zà²^:mËOÓî œÆâ%V(ƒ œvOû†Ó^Óîî œÆ‚¨’ˆÂiUp18í÷8 ®(üYüoY›(œ^Rlúç N#Û3x†Ó1LpÚ§Í_R8­®¢Àió@ N/ð[ œVÄØ4æð=Á¦°iLÞ,y lš=MÅÓðÝ–ýüPõÃß²Ði¸ßË1„…N«ã®tÚoÉpzºêy;àÒž §ãEÐW!pÚfHƒÓðD["KÓÞž §ÁpYœ£ªŽ!1cŸNëZHá´f28–ÙÏc¿ œ6Sé´æ€R:í•:mí#tÚ<^¥ÓþÝ6GZé´f2: ²¶EˆÒi´,Ÿs:m ¥ÓÞ„Ncdùªæ‡y¯žöz ¢ö¶DíwDm~™"j[G(¢ö^'ˆÚÆ#!Ôša˵w:!ÔÞ]…PkÎ,#ÔæÕ ¡ööaBí- „Úë*„z§•îÏN¨½_ ¡öBm+1%Ôš·Ðµ÷AÔû¥QžÜXµ?‚¨¯|<{ª¢öæ¢ö†D­Yâ QkÖ0CÔ¶ÐTDíG µ¿ ÔSm™R£¶,m#”Zó´¦F…X†H05îÉ’Z‚©§{¦Æ’°¯=Û TÛÒVAµ¿~Õ¶îUP§ŒªfŒ2ƒjïɪmýª ÚÛG@µ-‹Tã²5E TOFÕÆ>T{moP=ýšAµ-ˆTû·* Ú–® ªý}¨Æ=O aP=]–Aµæë3P=='ƒjï=Bª½É…T{ïaRm_žj‡8Bª5)¡‘jC”UÛH`¬:ž’Ôš”UÛÛTVì„tRYµ¦WtV­cˆÂjçQ«­eWûeo\müFpµWSqµ5àj” XÛ‹`í/R5\4ú ¢zªPÓ„–jd`=ÝS€µ¤™t`m=K€µ~[¬½ÙXËS*°ö; °ö’¬­(±æ œ¬­8°\©ÀÚVµWV€µßS€µ}[°¶î¯ÀÚêéÀZªâÀz9‰|°–~¥ÀºcÚN¡ÖÆ?X3ât^­i¼Z+k¼Z1¸ðj«¬ñjyJãÕrljWs˜x5 Ê«u ™x5ûŸ¯–-çÕìc¯¶-çÕzYãÕìÂN¼š]» X #ß§ ™ï“Y)ªúÛäªÿ­>9ù–Òj–5Zn®›ŒVCf³fJ³o Õá°”<ên´gXó|£Ñj¤CM51£ÕŠ2ÉUï$S«´Z2‹­Žu ©¤­6¶¨´Z÷áM®Z™ªUyQµjdL^SNOÕªmY¡jÕHšïËÔªmq¨jÕ2°™XuL¬ä¨bÕ: ±ê޹1ƒ„Yïp«H¢TŪ‘A•„YkòecÖȉ̇΅Y㲬½ ÐZs{´Fj]>†(ÐZsò:´–„Ê­5Ó¯CkÓ$gh}åö¥˜rfÖ;>¤ ýfDµ|l@™5e96dmø YKb6%Ö–ƒÑ˜µ&ŒThíÂê ­wh§'¸hmYä Zû=…ZKÒÏA­­&B­½e[kNÅÖ––ΰµ&’Tlm9ë [kAÅÖ–ÔѰ5 E†*¶.p,“(¶v£`kôŠlm6ÆÖ¸htþ±ÄUlíSÖ[›`¿bk¿¬`kLnEԨɱ¤QÐ¥bëðõ+*¶Fªôž@±uyßHÃϰu|h )“µ.ȶ¹|T=Uö¦Öˆ‹Í™Õ¨µUS©uAF‚ᇵžä‡Ã±¤0W¥Ö1›÷cÈ()´ö‚B­q‹pn“0ë‚a4ɉ2k»¡ ë )t‡òŸ!k72²Že ŸTdmåYÇGO²†¬ööTjVdí·dLè[ª:)²Þðí¤Æ”"ëøVèKVb·LyCÖpm(²Xµ^¶ÎÁØÂ«7|É#”yyÝça×Ì©õ…RëÍRG;m”Õ@!5tý)”X!u\µö!w®ŒÚî(ˆZÛW uØHZѵ]T5ÌyˆNùt\”t •OÛ …OÓNåMãÓ–žDùtÔ†Oæ)Ÿ¶FU>°îXf=3­òi,YRÊÑø´—>í%…O{…„O#xbMGTù´7© \VØ|:~\L» |:Œ…"³•OÛÝ”OûÝ„OcL¡({åÓÖpʧãñcù2¶Ý•OÔv,OËà¬pÚ>9…ÓwO9>…Ó6V œŽ«.yX@á´>…Ái,×2<ßà´æï18í—8­/Äà´ö*ƒÓ˜WI`VátTh˃Ón8­Àà´?§Ài}È¥ñ§W¼ÆPfpÚn¦p*—á§­& §í])œö{ œög8mí¦p:¦’š±U§ýž§!ÙÚÇÑnƒÓ:+œÖQát”£(ƒÓ:Ôœ–ùÃØ´Lf†¦½6‚¦e634­ˆ¡i~AÓö}(š^ßÃËGQ M[›+šÖäYMGR>6CÓÖYMÛÝM[gU4m¡h:Œ[†Ïš^ñ󨇢i¿¬°i|aÓxèaÓVPØ´Û„MÏÆ“GeyaʦãAÎŒ¼46m-«lÚÚGÙô ÷–¨…Mû=…M{û›öË ›¶!VÙtÜsM%‹›–¾ñ½lzùN6}d®¦™MwɬclZq-•¢ªžZ*EU6 )ÌýklÚÄ)M¡Z…M¡Z…'­–ü4­Bt­ƒT­S5T­7®¦½ÇÂ|ªöaÒ‹ªöaúrªöáÆjÚ{rÏ]µ÷D|Nå>L|Nå>ÜØU{O‡jï‰øÜ#÷a¡r¦×§r*,f:ÕnÜT{OÚFuªM På>¼BÕ´÷¤änÚ{R¡¦Ú{Z²›öž?£½'BÕn;E{O.ªJÕ¦ö§JÕ^rSí=-YT{Oê£JÕ^²ªöžwÓÞ“Ë6ÕÞcñÓªž¢½§÷[¶½†+ß;åæGñÕ1@pN4 °ŽÖ¡ °¶ ] °¶ N °†Úç"1ÔM"aP`íÆ®Ñ$¼ïoÖÂñDX‡7sÖ!oÖK”ÙÒãÔk|‹©§ Ö+ˆ5K€µd‚]!mP2¶VvÅyù%– Á®8¹Mgì…`Wè~°œ5l¥N ÁÆe£{Œ¨~!ØQ²Ÿœ¨’vÄ&j*˜Á®HŠ™aò_­­#üºb!±f –ðë0B{nD[ ¿Œk”~Løu…[B)Ï„_‡q;h“Cø5H]ô•'Þ›é5æ;z]1 Ò–“ÐkܱÐVŒÐë µ{&潎Yl'm`×rõêÌð—Û¾¯Q’ŽÍ)¼ãFßÂkPÓ^HøƒáµW–Ù5l¬,ì ³|]ƒ¨~U±º"#&Kh3»Žžºìt¢ƒÙ5l1_„nÌ®+.’Çê”]Wœ«¤¼ŸÂ®+V:D.ìz*Éì:~ˆ†Ìè.a×ñC#Heד+P3»ÆË,H/ìºbkðòƒÊ®Q;Öúvý1}ñŠLßE|£ëh Ö r E‡…\£ ŠPr]¡˜1(J®+¤S8ë(ÙÅ~ ¹FeY†JÈuE<-… ¹F…XAVÈ5.›Ò¼Â­ñ”m¤Ï6n c¡ù¹5jÃV̭ö²J•pk4@M¿K¹uôê²SNáÖ¸ì–A&Ê­£äÉW­ë•;Ç\áÖ¸,+ ·®ñ¤hSáÖ—§Û)#së0¬7&ܵ­¦¤ÜÚÜWcå]áÖ)ÃIùH¸5®î{¦4dnÇc%dáÖޤ­+–´¤,Ü—-2¨Üº"Ú”¢˜…[{?nV|¯é·ÆxD•[û'ÉÜÚÆ ÆÖaŠN~|Œ­ýclÏ£}-àwd‘qÁÖØEbš±5z%ËQlí¯D°uô‘V¢k>- Ø÷d¹hÁÖ¯öWR,Vdh!é8ÁÖ¨ÐÊÕk ò‚[ûG.ØOI}[ãaÉ=ÁÖþ‚­ÑÑÏŒÑSl ˆ°ÓyÁÖh8§fl¾Ìå‚­ýlí£²`k”Ü9œ°µµž`k›Ð[Ûw§ØÚæ,ÅÖVWÅÖ6g)¶¶öQlmçbk\–å[Û§§ØÚ¦4ÅÖÓe[£Öq ã¦ÖáÚ­|]¨uyI¯L©uÁ‡BGà…Z— yäi~¡Öp'ùd½PkTˆÅ‰…Z`µj}¸\˜)µžŒL­ñ(¬,ÔCè¹b Sk_…Zc\ò¼”Pkø)«ªÐÚ†z…ÖpwS T™u…´l*§*³6?Y™µ÷8aÖõƒ´Š¨#å|PT‡[òˆ™¢jLž•ƒ´UÃ3ö£c¨¨wt’Þx¡jý”T£&å92¢œÚ\gåÔ^IáÔ(I9”SÛ¢B9µ7Œpj¼ dÊ©½/ §Æ§:ǧQH‚1÷ úýÅÖÔþ>Åꟾ,oÿò%¾%¤AÜšk+×?~ý³¿º$þàõÏÿúòûïÁáH‹ú&ÑÜÛ×¢¹?¬Ìw1øo¼)ð~ì¿úM/§æyØñÓ’Þí'Ôá€àôS…û‡O­AÝ®“´Ï+~úÜVxµýÝÆO=ÇáÏzOî>µwÛ?ßÁóÓux]õ§?S—pcAÄ3æå½cšˆ‹l±šGµþñ‡[/ÿïÖø_ª¿ýðËÛ¿ýM Ýûöïo?üë—úá×Þ©_»"1-Äòå×ß©~ÇN¨aÔ˜Ù{ ý¿úNMïô÷ÿ¼¾…ƒŠ á‡ßˆGÝ—¸ïK-:¦‚3Ö|~_¨šÇå_wþÃÿ|ûåíÞÚÿxûcüçÐçý”<'Ž¿zç½çŠÏÁùI ymŸ6iáѶ{u3Õ‚lŸ4…vhI½vd¼6lû¬ =<ÞÛužjC¶Oîœ+Ñ«Oi‹ëîÚO´ŸÚO®¯Sû‰ÔæsûÉk¬~¢µ™úÉ5~Ç*y9~õø ÂŒåA+˜ ±Ô¡¢yÚXÞî±ûÛg'¹K,‡ßÏGÛ“ÓùÜeû·YrºÖ¾”rþ™ÛÔßv›Ú÷kýÜbþû ÍÆmšßæ/ÜâÀ{,2£Ä"ïemTvüçºÃüôŸ¿üò»?ýéw?þøãï~i<þðß¿iV}ûu'\c4ƒ^ÞáÓäüÚišÆüë…$I/K¸,ëžÙwX&1ñ”WXÓ1„“lûÇÁ/0’ŒD¼)Ê@T®p’ýYX¸ŒN‚ó%#³ ÂI^Àó¦-”a§ ¨„‘ßNŠÌ0¾2]à®Éñ¬‚¨’0  Vb‘‚¨’ð¥^ |¨agê¹'oƒIQÖ£²bÛ¸ÞT`«’{Æd¬’ä&ò+ðÿŽýNÛ"¹ùÊÚá#àƒB’ÊŠ­¡ýÆ?ñCë‰ÃA¤­!RƒñŒö»` ‹ÄV î2 pî‘›&%у^SÅ‚«¤–bÙVâtñ⣌ „èï+–´ÑÍzMEPü>±/èxMµhá÷^êÝ›—"ûe«´›ÅBé,ËQÁúê͈ë"Õÿ²5Ú§ ãJÙ0†kÿêÍ{ÑÄÏî`Ùïô³K(aÙ€wê«7w¶‘>_\%Ùa?ÑYT•èik´ÈÕ›û‰>™² 0 ÝÏ¥Z¦M)oû«7ÃȪ¨¥l¹¿ÑøHö:¿ ºþë 5TO.¥æÆY? tŽüG¥ìÅzõæ~ |j)Â8¶ealû>Rµ”ø[ öWoŽjG?ÛG H,¤fö£‚À è£æ{ Î¯Þ #gCŽéf€ñ~”è“©qÛQê«7G!IS°%øp¾~@m5cÁ ¶Ì£]½·\p?TàXD]½\@ôšá»°Ô(Œcƒ¸÷C”= "MÚ=4£$«‰Ä‹Ü\õ²Ý‰ä_6¤y¿‡æÞ{¼Ôm¤T‰['¤ïØf&-^ãšaÄY™g·©T ‘ꈨ=rc  „í¼‡æË˜’K1Öç¾bô(QGŠ~ýþÒKF_#é´À6¶«;Ù­™¦ Jq½æÙHa³a÷ÀŒö9è8ndç³y€&ˆÿò¶0žå˜aäèËOrCjjYÓÖ{`FûphfÙiŸójv’·,È¢²¿f´‰–Wަ»'÷JÒrÉ»Úþ¸ÌÚ¬mMè ‹y•+»6Ê‘6)63Тےɡ`ŒñðéÇõ[GÈti5÷Å.ã–ùw£ 4ôîÇöž ðÁü¨+¿”Ö¥ñôã"‚)Ѱ¹‘•¬™¬ Ƙýž~Œ-ç}ölâ&‚’Që¡„Q:²i=ƒ²—ìkn®tRo™¨tHíÙ“eE›<[u0"®{ Hˆ×®Ï Œö–ðbŸ×ËÁx.‹¸ögPîW€øˆÏ.èeÏžÖe¬)ëãÑžAÑ=<;C#åÙ …ÛÙB|~åëÝæ6géçØï™ ư¹Ï ìÃ8öa¤}΂1ú|Æd8QÚI[•ýíp·c¢á3 zP&‘K»exÊ×䕉6`ñN0VÊÜQci׆§l½£®kÆÑ]Æޝ+öbŸ1Y;@ÐØ~¾:ܼÛg…ñ~2zÇ^ž)9êŸ! (¸Åtý|Y±ÐŽ <#24îI ©Æºn„ÉÀÈrIuÅ»fGæó=0Žè«~¹ Ëꊇƒ¿uwe8tþ¹®yð¥#–Åp¼cÌxFä0†Ou>ÑNQï܇±“”;Œçp“!f_3^úÙ61èu uCdû3&#ɺªE¶£dt¡‘Á<&d¤Š¿{2ÎÒD[<«3Ÿ80Øâ›kåpøRÔìªføú¨ˆ>nri•W¬ô‡›üÚÈ ¥ºQØ*JƧ3V0žÃMÆ=I<=¾äŒú˜³`-òŒÊ¸')`Ç÷D0¶ô¾k¬ê¶á'£w„÷ý`ˆx1˜v$=já'‡q!%ÄŠ(”á'Ã8|Ï ¾2¼dHù“ŽjE Êð’!»ÿjÒ+údŒÆXÍeuåè“~yï)7W~òøÇ†''±b n8ÈPœ'ÄŠð“á"£$9‰ñ'ÃGÆQª–1Í0ýUè MÞŠHá#c!‘b•õ:çñŒÆÓ÷Œè…1Lû“¸¼‚‚=.2TáK.Ã6"a`ŒÁn,e*"O†‹ŒQÉ`ÏŠÈ“á"C½'*©=.2*KȰ"ôd¸È(YiŽÃi®á"£BW¸b½K® z•<3J¶Bjs¸ÈÈÕP3¢¥B~¸È8öFº›/v¸ÈÞוƀ\ðVŸlÐØƒîÃC†4ox“ÏÚ¼"EÇð§úà Ìqxn§Â*òŒ —‰qþqf`<‡‡ŒÓ}¤ã_÷<¿u :”=¡bö:h £¾ŒËˆ`D=ÙˆPöm{ÒY&Ô0"œwôS&ÔÓUw>YG€¨ê˸-eðÔWb3òtPÏÆ“ôF¢ŸÄxŸ5Œ<¹ ¾J®ÛÈ¥-€ÚOú?„ø59³¨?2’òÖ58çÇ€Ʊ%jØ*¥@}É-@}™”2 †±PVÔ³‘5Œ° tÄú2>ïíg#Ô0ÆP³<Ι êËØó,– êËH }QÏFBÔWˆ¥õ4Œ¨/c¬•Ÿ%¸ ê©g1¢ž„¨§žµ‹‚TׄõÔë˜QO=ˆ)õÔ÷S_݋ܨS_¿çMŠ&Ú0³q`ê©S2¦¾Œ;íD¦ž 6V³²vcL}õIr÷S_Æ~Œ$Ë‚©/ã½P{Oͨ(FÆÔSWgL=uuÆÔWŸ¤lY‚©g#aê©73¦ž>ÆÔsÉÆêmkÄ‘ T0õe¤L[‚©§†gL=_ö$õ ¼Ì]%˜ú2ÒÊ_0õe¤%ºpêÙxVO×#N c ÄÄEvy6N c§¤WÌ©¯‚”~K8õeÄÚt@ìÎ2>‡n’3§žKŠžª×‡HõU’ ©ž“IõtY&Õ—‘’— ©ž¢˜=†<"$¤ú2âÀéãÔ1©žK©¾Æ Êp'¤%o®2ª¾Œ´¹zŠÚ†ñuœÈbTí6AÕ³‘PµÝQHõ\ðEªñûXõ£GBªg#‘êËHáBBª½M™Tû$¤z6©ž+þéx%¤z.y² ¤fBª}¼Rퟥ êÙXXÑK¿£5+“êùª¢ü9åJé…T_FÊr(¬z.ÙYÍž„€õU2@V•õó[&°žl ¬§«2°†±d(ˆðê¹à‹WÏ¿/¬pëFÖs=w–NÞñ°žªÙX`p“PKáÕSÁƒ…+½ áê©6Œ«§.ǸzêUŒ«ýõ3­öº2¬†m#15Õ—‘‚¡VÏF¢Õ0®”VShõl$Z= L«§±…iõÜ>œ/ÃÆ¦ÕSA9]0‰VO•mÓkú¾“­ß,¿8aëqxÆÖ«ì˜¶^EÿlØ:Ö,£ØúÀAêÜ2l͹ƒ€·`k„¿Œ¼×F­ ¾(µ6Z©µö1£Öâ¢K¥Öè…_ ¶Ž‰œãÃ[ã`@KL*غ¡êG?ÁÖ±˜Æ‹kÁÖX†‡0Vc‚­cUË5‡`k¬î;Å›¶nñI. ¼ [ƒô–½‚­ޱf&ÅÖË$á:ÅÖˆª™‹}`ën› Ã[Ø“žRë°Å…÷gßK©u± Å:SëK¤Š¯ÊØ%¦›±õtUÆÖÆ"½E× 9BG¤€k„â¬9î+¸¾XÒH9¨ä:n¸<(ä!N¤ä¢èƒ‰µ(»ÆeùSv—”BPt¶¶Pp¥°kà4ŠFRvبv­EñT©u˜–ûdz!¡THŽÝ'¡Önjø•h…AÑ…Z£6ð¿ «¾~ͧ „Z£Q8ÚN¨5ªW+m¡ÖxÀ¶&Édj¹2#þ™©5®Jâ/J­wà¥5û·Pëì?;µ†‘¤ù”Z#,f¥¡H¨5ŒqÃAr„Z‡qá 9¡Ößáø6¡ÖðZê/*µFŒ ¥ µ†‘w …ZÃ#¡*¥Öˆ‹ámD¡Ö¥>†ÈŠRküÐéŽPk\f£1—©5ât(¯RëàæbJ­aŒ1~¼N¡ÖñXÜ—hm ýRh}5’h¦‰¹qìN ´FI>×ÄÐcØN‰@ë© Cëýú$†x¯Bë'´ŽÜ…hx"Þòh!Þgh !Þ×h}ùÁtÈF 5.Ë;”­÷Ký%Öþœ­Ñ³yN 5•ø`@ktû˜y2Lœ¡õô( ­¡‚I!r ­qOÞüdhí6Öx /·Í±c̬QO†.¬§[2³F z&¬-Ó¯2kË”­Ì:Œ…2(³ÖÜÜŠ¬÷ •þì°Ú2+¬¶ÌÁ «§šTÍËÍýTaµ¥´VXmY™V›¸½ÂjK÷­°Ú’³ ¬FÁ%Å)V{ ¬¶,Ú «¡Ì\(¾þ†Õ–Ķö1K°µ…‚­§’Œ­-?½bkŸÑ[OBØÚ»—`kÿ€[ûU[£>%1Õ…­õ-~/¶Î¬÷küý·0ks™˜uƒ¸n&61YpXÂÑJô̲ ×÷˜¬0kð1>"/̺!EFêk+³nèÿy:N¡5ç±è|Ž¢(´vj#ÐÚQ‡@k€ZHA õÕ3’X˜5âí%`𙵓aÖX>s ²0ëøpädº0k¬V×XÙ~@¬}e-Äz¿FÁ5[€‰5ÖªOP0Ój\1?Z…ÕXÅu&Ä «}/°z*yÃj_ˆ ¬6¢"°ZÛDP5îµ#å j`ŠèÀ#ÚFP5ÚK‚¨Ucôâ¨uAÕ8n¶’F† j§X«Q2üþ$Ç «q,„Np)­Þiùð³Ójœyø/ƒjÔ…Í ¨F!V¬P cÌ“ã+RÆø@’ª3¨Æ§&Ø ªý#UXíõ1\½ñIfÃÕׇ>ÂWŸËW‚¬qP“$Ë WÇ"€„Ç®vÈ˸z‚¼‚«ŒÙñ,¸:Üc>*b¸:Š%hP\íÕ4\~uFë9®Ž ÖqbÂÕrOÃÕÑÇ)ÚqõI{nŽ«+I½®†LJ§PiÃÕÒ´Š«Ù+®¶Q@qµRÇÕAQ\ Õ°<«;ájå熫õ ®¶OVquÇNÀ8Ë®´Z§1…ÕH9Ô§HZÝU^Fi54Žœâ•VŽWZm”_hµæË3ZmƒœÒj,=óȵÑjkÕØ{JG(¬6£Âj½ª²jÛçPV ižúìW*©¶­%ÕH^u$ÞSRÝáÿ¤’’jû<”TÏF"Õö½*©v£jû˜…TèZ?«¤úx¿ÎË<;ÝJª‘€ä©”T»ñEª­–Jªm$SRí×Tm¯JP5²bn„U[Ã(ªv£ j°¾ÔG0TmŠªøÏ®¨Ú‚ªím(ª¶IBQuÇ@AÉ‚ªmgRQµ5Ÿ¢j´ÈAQÛ‚ªí)ª¶7¦¨Ú/+°ÈïÈs«u§Ô`5¦Ô±n¬Z?cÕVðAÕ^FPµ.d Uë{4T­ž€¢jÕx4T­s„¡êáx¥îØ&OI¥Ô³‘(µºÏF©yX5H­ÊPu‡Ã>â. U‹7k¤Z'N#ÕQ°fÆL#ÕöJª½>Bª½„Tgƒ+¤¶‡`F­…1jõÈQÛ3(£ö’7£¶Ú+£ÖU…1ê¸ É­(¢Ö^µŽP†¨uæ3DmŒ"êp!HGÒõl<Ù;fåNCÔádS %CÔº1Dm÷TD­lÄ52Ô¦ê’!ê¨`7D­£¢!j]²¢¶ñFµ—DmïSµTD­ã»!j]¶¢>Á R®e j]¢v£ j›2Q»‘µ?„ j7 ¢¶oDµßRµM9Ѝg#!j»¬"jëЍ½dÑtÜì:¢öË ¢v£ ju® Q[ó)¢Ö±I µ „J¨5^L µuf%Ôêì¡îù¡Øi!ÔÖx­øÕ¾Od¥ÀêoS­¦æOŸˆÐÍÐÅÓÑ"B†OãÍ’’”©VÕ0<<à1 %»Õêºg©ô ªÕš6Pñt b"_§ªÕU£±UµÞ8E~ žÆŒ%±ÆÌ§70S‚ŸªZ]”Ñ«jõú΢‘*Z½>R±TÑêU#QU´:Ú™u2T´Ü…N«hu‘ˆˆo­."”:«“H—Rj4ó¶Ì!ÕpikÍD«OÔL½.’´ÝD«c¢]I±„5«Oʃ®”½ˆ2h+¥†‡ÏR›B©ˆRÑaÔ ¾m–V½ê+¿a.ê„Q/›(*£^6É:ªŒzYTbG@õ²HzHÕ¨-ëf ¨^Šè*¨Æz+^VjG3©Ž{6&âW}^¢¹ÔR½ ‹qì4ëU#yéÃâIª­ÊdG¤ªÃ(:A"U}b¢øN‘ª>1|ÒnÃ-U}ž*$RÕç%N—­"RÕq7T©ê³¥´ ËTŸû×U@ .‡¥E¦úÜEœOeªA 8½€ÈT£$Ë®ˆLõyi¾e ½èTŸë4·¬S—a ª>·¡Ü§BÕPk_áqÝWeê‡ZFþŠN5”Üy êsc]3•©v«TŸñEuâ¢R}";2í{ˆJ5ªÞÍÁg•jp#ž:E¦Õ9ék¡jÜr!Y~Qª>àŸ›ø(U#1s[SNW”ª‘{y§>.JÕÈÔ™[ТT}`O(î¢T¶˜fÆšW•ª]_”ªÜx¤¥ê DOD©úÀøA»‚¢T} ŸoË´¢T ¸ÝSª>=ˆ$BÕaì…¶ D¨z*ÉBÕ¨a¡êhöµ}Z‘©Æ»\hC\dªýE‹Lµ'™j Õ“"ÑE¦úÀ踎LÎ"S} ÏÓ¯ÈTûCŠL54õè[™ê¨k ë¹Cÿ’©ÆoãùÆn‰ÈTá]íäŠLu+ ¼‹Lõ…•sÇœUª8^-µÆE¥úJ†N;ߢR’+Í¢R}@ ”ô4D§÷`gTtª¤ mÑ©Æ>ò£ÈT‡mc‘r‘©>ï…âóE¦™#HN\TªñFhïNDªã!O"³*R}¥ ç o©öž*"Õ¸e£ã¢Räõ5ÕmU¦O¹Ñq–©Æ‡|ÔqSeª„÷B¥ÈT {$Å+2ÕVÙ!SÛ-¹]¦2Õòõ«H5†ÜFÄSDªÃ؆û+ÕH8RîQ‘§Ö®¯êÔVLyôBÕ©­Ç¨:5Þ<¥òQuêèNåÄVujô5:©êÔ×GKÝ"OËR&gÕ§Æ4¶æ$Õ§¶QEªmªRjûRU :*T)×µ T ²$O,Õ WÊ-©@5žs¡óÞ,P*&Ý\*ÕŽöÐ1{¨þÀ˜Õ–:樞ʰ@uT%^còY¨>®|£$ÍÕ‡@5ŽÅª@µ}û*PojíëØñêéY —=Yž›ªmHQê0–û,îËÈÕ¼LúôD ÚEª½¶¢PÆF9ËU¡ú@âo û…êãÂã ŸjTc2 ïi¨›‹F5îÙY¾ƒ•ª½#ˆR5¾èFÇWX©ÓpÉÅ*Uë):Õ>2‰NõyPøõ­SͤhT›G¥"Õ'ò¬íÃjŒÆ1%ä³±HõX®EU¤:®îx~Å"R1þ d)"ÕX¬l´!"Õ'rF”¬D¤ÚV2*R WŠTÇe’>vïD¤ú„Œ(ú‘j8¢šÍ"Õ'”¡8Ò™EªáfÄÀûäqQ•êc;Š•ê()ÊP¢RËòy)Q©†ƒBr©ªR}îªDÍ*Õ'ú1mP‹J5H}T•j$RÛ3Ù ªT‡ñ¤JÐiÌì9Æ}/^¾“N™/nΩ`Ô¾’SB»¤Ô­:ÕU4þM§&ÇWü0!Zü(ŠKTñÃô}Uñ>HF‹ÞG˜N’–P½7ŠÞGU’¤zn½­ŽˆT»‰$ªwÉaÕZNªÕ$òÔjqjt§VéfÕ¦¶wmjÑsqmê˜s¿ƒµ©E~Ú´©UÐÚ´©w>ÅíÚÔª“ÍÚÔ.°-âÔXiR今SïÀ>ëÇPÚ?(¨cëãT† TƒrbB¨6q•¨Þ1â¤Ø¼JT7åªQ „¾Žód&RíF©6It©n’<ÆTª)Çëφ¥/#§¨T•ê&î©T{…X¥Úë#*ÕVU©6YxU©vã­RÝŒg‹Jõ•À5Ã1U¥Ú¢Ržˆ–¨TC¯‡S,ŠJµE¥z6’JuK®½M¥ÚäôU¥ÚŒªRíFQ©FÒ ÙU•j7ŠJµE¥½ŒÒüªJµE¥z6’J5ÒëôܹS•j7ŠJµE¥:ŒñF† ‚¨TÏ6R©6£ªTƒBqÀ³¨T»ñQ©öß‹Jõl$•ꆗC‘ТRíFV©v›¨T»QTªÝ(*ÕM¥©HµÙD£Zm*Qm6Q¨6›TO6Ò§¶$ªOíFѧv£èS»Qô©Ãx2H}j7Š>µEŸÚ¬Om6Õ§ž¤OíFѧFF: pëSûoEŸÚ¢O=IŸºIz5Õ§v›èS»Qô©Ý(úÔ³‘ô©Ý(úÔfT}j7Š>µEŸºinB‘§6›¨S›MÄ©Í&ÚÔfiêÉvr—#,¨.µšD–º[ZF–¥V›ÉRwÙ¸6YêÙHºÔn|t©5óéRó#˜*µšD“ÚM©H­Ñ£özˆµEÚ¢G=IÚŒªGíFÑ£v£èQ»Qô¨gãÁαEÚ¢GíFÑ£v£èQÏÆ“c5еYÚlªGˆ®zÔ³ñdçX·µÿZô¨Ý(zÔnAêŽU!oV¤îOÀЉQÓïE‡)År³Þt¨ýF¢Cí%E‡ÚŒ*DíF¢¶'W%j7еE‰z6ì«Q”¨Ý(JÔn%ê0rĽJQÏÆƒ]b5²µÙTŠÚ"EíFÑ¢žGÿ¨hQ»Q´¨Ý(ZÔnd-êÙv²K¬F£v£¨Q»QԨͨjÔ³ñd¯X¢FíF‘£v£èQ»Qô¨í R=j7Šu`«rÔ~K‘£v£ÈQÏÆ“ýb¹%«Q«IŨÍ&ZÔf)ê6!iùÕ÷éß(D݉»Ì\z•4ª5£PÛ+¢»Æ…¸¨Gôÿ<Ô«¢ à`RMáÒ~ \E=F8’Q• TMèÝœÂF5=TˆÅ4=›äx;õX°–þ ˜¦i$4 h:‡º’dªÀi ¡Gaƒ¹t…”I‚ ™®HÌû1™¤Zèdº°éŠÐH §a6]1‹Q^(ÓÙiHÑPàô…ârÛKá4‚ßË–ôíÓ@t¢êÌpúBt´a!p:Jn´!¬p·‹<átØÖ%åÃN{¾Ài”ÜR[Lát|œÖ\á4Ð&«´ œÆ¤õÇvNW4ñšÑ_§ÁaYNBàtÌð³ œ†‘ œ†oÀ§dNëض'±²épGç ›žêÃl,šq¯°iüÀtVØti’/]ØtÁj¯õ0eÓðj ›Ž«®ÑwÌlºtY ßhÏxPÐtè2š¶&2 ÷7h˜L‡m©4 ™.M7.„L—]v L—kÏ7/LOÕa0íFÓåÚNé&Ó1à(`:Œ’³SÀ4jËb¦ éÛ ˜Ž^sPô¡Ì˜`.9Ëó ˜¾üuŠÈ0G¡´ Â¥ÝÆX¶L“ÊP:,’%U 4‘’:+”¾Ö¬ÍPÚ»ø¥Ñ«Xœš™tiº)LÚÛL˜4^? &1“öö&ÃO’r‘™4š†õ›„I—¢ÙæJ—¢Iõ„J—¢yøKOFæÒG)öCÁ4*ÄIì„L—ÂÉbL—MÊ+˜Æº³¯$¤Á`:®ºðÉ(Ó׌@“²€ér¥XÉ6Ó¨ÐJ§ÃLO÷$0å5é$*˜.Å22˜¶ö.ííóâÒ^áÒ(µÍe.í'\z*I\º\ÙÛò€pére Ê=_áÒvÈc.íïB¸4š4>ÉÔßf.{’ªré0n¤Ïª\º ‰E *—†‘4;L—+cTî ™öF4ç$­TeÓþœ§ÃóÙê‘“–Ði¸E|&Oøt¹¨Dîþ ¡ö/„ u)– µ½1%Ô¥LY“P[÷„N˜ädB]ŠdÀTF]ª¤`VJуDÊ…Sc¤£,§JªQÎb(¤mÃù™„TOmäÚz–’jtQpfRÆ••@…Tã²µRD3“êr¾J­T!Õ6b)©FI–R]ðZ)¶TH5Œ¬Ò,¤½™l…T£$k ©¾Øf??Œ¢¾ÆJ¿$¤ÚŸDH5Þ˜(U3©¾¤•*¤Ú¾®‡T{Ó©žŒLª14s £j#T—¢¹Ï„Vƒër26AÖIIDVu¹ÒF¦À k›–Y{IAÖx,˜,È'¾)–"ërõú¡ªÈ:Œ|’P‘5JY*ÄÚFm%Ö6H*±.ס¿Ž5ÿ›lo_ Øþ°2ßå¿ñ¦xÀ_ø±?ã¦÷“¾þ¹dø„»Ã“÷¿øÔ<¯w¼éÏ}þ|å¯öÜrù¬Öîÿð©5xúùèòÜýu½ŸþL-b§4æ! ÈoáQÄäÙÂ…D…þñ‡[íþïÖ·¿Û£Îo?üòöo³üíÛ¿¿ýð¯_þé‡_y-…ûµ…‹õçïwynõúö[Åd†>dé¿þVUoõ÷ÿ¼¾ag(†ï~ÿMÏ7âØßö+Óc ûá lÓ±Í×Ýú—·xûcüÿôY?mäúœáÒ†‹ÿcôÎÙ?kZº¼£[JgªÙ>iÄìçPžöÚ°í³Æïø`îƒØSmÈöɳ_÷)mqÝ]û‰ÖâSûÉõuj?‘Ú|n?yÒO´6S?¹†îú¾-ǯºÏz\ÒFX”˜2Kq@U%ÖÓ”±¼íß9 Úm.¬¼b¿´}03Ïm¶ßzŸ¥/׫%–sæ>õ7Þ§vdÑëµõeý ÙÆ}šßç/Ü’T±@ÝbaúVÛ‰Ws;Ø¡Ýÿ¹îð?ýç/¿üîOúÝ?þø»_þûÿý‡ÿþM3ëÛ¯;ªjL´ß~ÃklëØsBïiì0îá7dì–ú´šÞ·…Âs6¨Šè§}Û(gí’ÎvßJÙ\LKÆzîÛžq40îØÕÎP™ÞÉ-»GFHL|"`!yÙ¨F„D5ð~ ï DCê¤![Áؗ݇˜ öŒãŠÏIæ=ÖÉ#B¥#yå¬ßKË ÔñJ5ï+MpËøËGðk/g†’ôkã4Èöxÿ©°×Ù“û¡{Ý2 ¤#çmxïˆôz qYŽmÙk˰ŽÞV„* ü·#$`¨z@â–ovìúáh|Ó™ãáS6G»Ô÷†¤ÃŽL‘C~9{èäùŽ4¬û\Í+ßÑ3 #<‰îP™~t2Ì}K²#‰÷Âhˆ›I-Õ¹Á‡ÚE+»³c+|Z@:™NîØóš}ŶñÚ±­=d)bœkK·£› å ˆ»R°D8é$ÀWN?Ä @kÈO‹Ñ~¤£†ÒñýŒÚFM©´þ˜ao{ßI-ž«SJûý: 4úr鯾C»YëIèyǦîgÀGw$ÆÛ•ôɼmBdè’Ý´_b7OgŽ—ËÑ6;0*uæ“’úì— Úè̃0vhK ½DOŽï;GÒÉ>èø|ŒÆ$ë¼– ó^€#•ÉuïØ±î$²RÜ·;¶$ù-$ø%¯–¶ ·OÁ0ñT$¢…Z åð½áÐ ¸9t²T¬¦ÀN[*ia!Ã3í¼Æ­Hïj/²!•#M+„êœëHÉÜ–ƒd«S“™aÛº0U "çšo°aKnŒÈ{•ýº†]·’WË‹æ%)`bÒÃmk'±(kÚ¶$ …8Ô#óÙµ˜èRúiGìa’(1ᓺ’éÐ~‹™.œöK}ã9ŽóI4A7§ßî‡]ª0!±6¥ÔlØxÊ!¹‰øFÃÞRÉØ¸H¹ÔöÚ1¸»ñ.{] c9$WÙô¿ºDðð$?ÛJ#]#$9&NßJOé"°l:ÙbšKu"ïVPOC2’UPЏ†í–’ T]bÚ«wÉJ2B0Ò¾JÃÖÉ’±ýå¾;"°ÕNb@úhÕ¢‡¦Þò4’ÌLt’ôÁ=iç¨!·sÉн;ïã^ M<dl>´”ÄkÈ65dl£PÀpÃä~ROføß. ÿÑ“ÑËim!½¼LÚk4r@Ž93>Øg½aR2’‹“vM‹É.…m”›ö¬c AÚ5ØYSI©µƒäiö«cјhö&aí ^º2vü÷áyè2 yÀSßôZµ!Ão´öíÊ}_`þ“eó/Åã›ó½¦GçÎwÌ–5|«á™ˆóÝãI^ï¾J˜’zß­³º§8ßžð^œoÎw¾½9Oˆó}Eã‘×ξ÷±5 ÿ«ˆêÇ9`qÀ/ék\šáËÒ46ÍÆ‰õ–Ň¼s#rõÀÏ&#xà—Þô‘cˆzàˆ#;3«»zàˆgbçL'ê‚„ƒ£wæ‚›Ó¦.¸u uÁ—÷[tÒ¼o[Ѩ÷Ý£éÊÈí=yß<,˜÷mŸ£úßÈ+±ŽT/“ÿÍàæã„Ù’-®þw•ˆ¸Éÿ–ªúß굪û¨ÐÌEiî÷&Ù&÷[–uê~#+^ų˜R÷YË¥V÷{EËö'(ÐÜoÄ·ö1ѽüïÊÜß›êþÆNŽxÌ÷/°69â8Cî´8âéeè±:â8¨À΂8âaÜIbV=ñc‘¬'ꉛjzâ8êrf¦3õÄñö‰­¨+>Ùoˆñ΃xêŠ#^°Wü:ÐEЏâèž4©+Žo‰çZvÅýP™ºâuÇ´§·M®8ÎU–Bpž]q=;+ŽxŒõïWäðïÉ?ŒLîâˆGoÁp2†0vįZÞG`GüJ Ó¸àåˆ_ù{¶L(Ž8Ò½‚úoêJŽø½çKžOSñ+±MÏ@OqÄp+Ž—8£p$7:)^ñp $¿œ:âÌM ¥~ø=¹§ "~8RôD3eß`?KCÙí?ü@8dzê‡[– õÃMõVüð+óe W?ë|îâ‡ÃÍÝɳ?üÜå—øá¸çJiÕGíhîW?üÀùŸ¥<¢Áê‡[õá(LÁëâ‡wÌgyzXýðŽû§tøí‡GEj¼ä±BGé4È G¼ƒicæWG¼#eoj‰ª#~Ô+úéYƪ#n@ñ†-­Fñ×47Î_¨#Ž]VÊ$«Ž8$÷qVpœÎd?íöÂ;²¬§²‚úáñË‘YêÄGÔCt*°¾Ù7r¢ŽxGþ Ú)G¼­2©#ÞšäxSGÜöÊÕ‡4%ÁRo¼ÁûNßC½q¸”@½q–eþ.Þ¸¡%õÆŽ´åñ?õÆ;öíRBD½qœ–øÌÞx‡ÞIJÝ«7rR’-©;®;øêÛ¶°zãd&eõÆ¡”BIºÕ÷wÍÞxƒC“Ç’ÕWí3dï^ÞøÉ´ßë§púÿ-®8Æî'ÓñR%’¤ Û#M•⊗&j×êŠCv#Ö™»âÛåuŽoW]qx׃™Åƒ+s±˜p¼õ–‹…orÂ\ƒÂÛ5' Í 7ïA¼o‹@TïÛgsñ¾m›[ƒÂyßP]o ÚR×{ƒ->¨ëq*¾$ïìzC2®Ðƒ]o|мÁ ®w=$ìãzãS^¿æz£Z쓲ë õÊ7¨®7®Ëû±b×;JrDu½aŒÑêCºÞ©cS‹G]ïŠð |‰âyCäaañÎ8¬ž7„ÿ(w¦zÞ¸%1)õ¼Ãs8…‹³ë][ªzM®7ºJM13u½+¤­hŒ×»î’BS]ï ¿ƒUØõ®;'åUÏ»’—X]ïèô1,e×»b••²8êz‡±ðÇÌ®7‚áÎí+á—Ö`J¦ÜÎwÝ5ꇑ׾ ÀÑZ÷‡1¹à0r¾pˆ•Rè–p“B­ppÓ%“Þ+‡±||,ó² ¹\¥ß0q ¨¢LFDÁ¦¡F‰C±„ž‡‚‚ž¦q(“‘ãPì–ˆâ6ŽCAþXÛÕ8K.+q(X.ÑÁC±ì£OŠåÕ8Ë;*q(–{TãP,÷¨Æ¡ôK:£‰$Åò‹jJׄ”â{ÂAñ¿=¨¢ 3iL½#ÜFQ|ô¤¥;àÓeÙG…X¢\ðËØ ÔK J×tžâ€ÃÈÙ.Å—ìî€O—•@”®-ÅŸž=ðù²‡fÈcõwñÀ=#{àž“P<𫲔³òÖ¤òG`|6¦~ÙHÂ]\p7Š >7Mǹ*Å÷çÜû²¸à³ñÐtm|xV\ðéžì‚Oµe|¾ìÎIÔ.ùÿ‘C\ðËXé)ûàS…ØŸ«f6£Lâƒ{ÊFñÁç«6Í@&W=4Çg4Ü2è±>Õ†=ðé¢/Ü3Š>;%íò›œ¡K?7qÂ-Ùøàž±O|ð©:ìƒ{»° ®¹òÄ÷bìO¶M“V‰­jZ*ºûÞžËMœo/wX).Ǿ÷lÜ4]”=o7íšÉ‰MMS5ì;³×=ÙëžêI^÷l+–:‰2–µ9OÝ'%HšÔP–z‹nNôš&têÉÕP6µàµà“˜HÛ³¤ìÓ %9Rø}:ÿ ü>ŽÜS(Bà7t‘è£3ø}H† …ßW‚DV¶à·©(ü®W€ý Œ}ÛV¾ ¢lÈz2tY'E”øMºªˆß#uD~êª"JÑÐ|Áß8Ú³qp‰(¢èQ-SDÑFÁ߸ê¾g8*¢Qö}ð·mu*þŽÉç|™~Û¥ßv ÜèwL[%sO› =Ögé¥ì»ˆª¹²oœ j;ãösÐÆ^;ÃoÛ•4ø ö“.É¿› ü6Œ¯ð‰Ï(¢GáwÇJ±å¿£çÚ¨QøÄ?tJBáwLìã8Áïx#ãE+ü¾œ—ôƒ~_ 2vÞ ~G}$HÆàwçC/¿qv˜â½~ÃÈZ27üÞd‹OÙw×1aß`ö=o\ñíÏ6ømøVá7²ÇeƒßÂÛ}C:C¤&öÝ‘Amijû_ºq„ˆ°oHu?Hc"ßLä{¡E‹‘oíoF¾ãóì¹ujè;žrüN^÷µãÀqåJ¾+¶±Æi¨ }q }û»Û=aJEßndô{Š/ÌèÄ#eø}¼»„~;.7ú-@™é7<+ÌýÆlx> endobj 10 0 obj << /Type /Font /Subtype /Type1 /BaseFont /Helvetica /Encoding /WinAnsiEncoding >> endobj 9 0 obj << /Type /Pages /Count 1 /Kids [7 0 R ] >> endobj 11 0 obj << /Type /Catalog /Pages 9 0 R /Lang (x-unknown) >> endobj 8 0 obj << /Font << /F1 10 0 R >> /ProcSet [/PDF /ImageB /ImageC /Text] >> endobj xref 0 12 0000000000 65535 f 0000000015 00000 n 0000000145 00000 n 0000244098 00000 n 0000244120 00000 n 0000272853 00000 n 0000272874 00000 n 0000272903 00000 n 0000273335 00000 n 0000273203 00000 n 0000273097 00000 n 0000273261 00000 n trailer << /Root 11 0 R /Info 1 0 R /ID [<83CADD7B183EACD28A91ED14A014635C> <83CADD7B183EACD28A91ED14A014635C>] /Size 12 >> startxref 273417 %%EOF blis-0.6.1/docs/graphs/sup/dgemm_ccc_kbl_nt1.png000066400000000000000000006602541360743507500215420ustar00rootroot00000000000000‰PNG  IHDRâÜJ&¡ &iCCPiccH‰••gP“YÇïó<é…@B‡PC‘*%€”Z(Ò«¨@èPElˆ¸+Šˆ4EE\•"kE ‹‚tƒ,ʺqQAYpß÷?¼ÿ™{ÏoþsæÞsÏùp ˆƒeÁË{bRºÀÛÉŽÌß(ŒŸ–ÂñôtßÕ»­Ä{ºßÏù®‘iü常¼rù)‚t ìeÖÌJOYá£ËLÿÂgWX°\à2ßXáèyìKο,ú’ãëÍ]~ )úÿ†ÿsïŠT8‚ôبÈl¦OrTzV˜ ’™¶Ò —Ëô$GÅ&D~Sðÿ•ü¥Gf§¯DnrÊ&AltL:ó5204_gñÆëK!FÿÏgE_½äzØs û¾zá•tî@úÑWOm¹¯”|:îð3™ÿz¨• €è@(U  t0–À8à|AØø $ȹ`(E`8ª@-hM œà<¸®ƒÛà.L‚—@Þ‚°¢A2¤é@F²† 7È ‚B¡h( Ê€r¡PT UAuPô tºÝ„¡‡Ð84ý }„˜ÓaXÖ‡Ù0v…}áõp4œ çÀùð^¸®‡OÂðø6< á—ð"Â@”]„p$‰BÈV¤)Gê‘V¤éCî!Bdù€Â h(&Je‰rFù¡ø¨TÔVT1ª uÕêEÝC£D¨Ïh2Z­ƒ¶@óÐèhtº]ŽnD·£¯¡‡Ñ“èw †aaÌ0Θ Lf3¦sÓ†¹ŒÄL`æ°X¬ Vk…õÀ†aÓ±ØJìIì%ìvûGÄ)áŒpޏ`\.WŽkÆ]Ä á¦p xq¼:ÞïÀo—àðÝø;øIüA‚À"X| q„„ B+áaŒð†H$ª͉^ÄXâvbññqœøD%i“¸¤Ri/é8é2é!é ™LÖ Û’ƒÉéä½ä&òUòSò{1š˜žO,Bl›XµX‡ØØ+ ž¢NáP6Pr(å”3”;”Yq¼¸†8WJ)Hq¤"¥öHµJ IÍKËIÛJGJJ·IK”aÊ8ÈÄËì—é”y"‹’Õ–õ’Í’="{MvVŽ.g)Ç—+”;-÷H–×–÷–ß,L¾_~NAQÁI!E¡RáªÂ¬"CÑV1N±Lñ¢âŒMÉZ)V©Lé’Ò ¦$“ÃL`V0{™"eyegå å:åå–ŠŸJžJ›ÊU‚*[5JµLµGU¤¦¤æ®–«Ö¢öH¯ÎVQ?¤Þ§>¯ÁÒÐØ­Ñ©1Í’fñX9¬Ö˜&YÓF3U³^ó¾F‹­¯uXë®6¬m¢£]­}GÖ1Õ‰Õ9¬3¸ ½Ê|UÒªúU£º$]Žn¦n‹î¸CÏM/O¯Sš~°þ~ý>ýÏ&   ©†.†y†Ý†iñªî¯&¯v\½mu×êׯ:Æ‘ÆGŒ˜ÐLÜMv›ô˜|253˜¶šÎ˜©™…šÕ˜²élOv1û†9ÚÜÎ|›ùyó¦é§-þ²ÔµŒ·l¶œ^ÃZ¹¦aÍ„•ŠU˜U•Кij}ÔZh£lfSoóÌVÕ6¶ÑvУʼnãœä¼²3°صÛÍs-¸[¸—í{'ûBûªƒŸC•ÃSGÇhÇG‘“‰Óf§ËÎhgWçýΣ<Ÿ×Ĺ˜¹lqéu%¹ú¸V¹>sÓv¸u»Ãî.îÜÇÖª¯MZÛé—ˆÿ2Â6¢,b&Ò*²4r*Ê*ª4j:Ú*ú@ôLŒMLyÌl,7¶*öuœs\mÜ|¼Güñø¥„€„¶D\bhâ¹$jR|Ro²brvò`ŠNJAŠ0Õ"õ`ªHà*hLƒÒÖ§u¥Ó—?Åþ ÍŒ]ã™Ö™Õ™ï³ü³ÎdKd'e÷oÒÞ´gÓTŽcÎO›Q›ù›{r•swäŽoál©Û m ßÚ³Mu[þ¶ÉíNÛOì ìˆßñ[žA^iÞÛ;»óò·çOìrÚÕR V (Ým¹»öÔ±? ìY½§rÏçˆÂ[EEåE‹Åüâ[?þXñãÒÞ¨½%¦%Göaö%íÙo³ÿD©DiNéÄ÷e̲²·7¼Yn\^{ˆp(ã°Â­¢«R­r_åbULÕpµ]u[|ÍžšùÇ‡ŽØi­U¨-ªýx4öèƒ:§ºŽzúòc˜c™Çž7ø7ôýÄþ©©Q¶±¨ñÓñ¤ãÂÞ'z›Ìšššå›KZà–Œ–™“!'ïþlÿsW«nk]£­è8•qêÅ/¡¿Œœv=Ýs†}¦õ¬úÙšvZ{aÔ±©CÔÓ)ì ê<çr®§Û²»ýW½_ŸW>_}AòBÉEÂÅü‹K—r.Í]N¹<{%úÊDÏÆžÇW¯Þïõê¸æzíÆuÇëWû8}—nXÝ8Óâæ¹[ì[·Mowô›ô·ÿfò[û€é@dz;]wÍïv®¼8d3tåžý½ë÷y÷o¯ñy02*|ñ`úaÂÃ×2-<Þ>†+|"þ¤ü©üÓúßµ~oš /ŒÛ÷?óyöx‚?ñò´?'󟓟—O)M5MMŸŸqœ¹ûb݋ɗ)/f þ”ø³æ•櫳ÙþÕ/ M¾¼^ú»øÌ›ãoßöÌyÎ=}—øna¾ð½ÌûØú>|œZÈZÄ.V|ÒúÔýÙõóØRâÒÒ?B,¾“sMT cHRMz&€„ú€èu0ê`:˜pœºQ<bKGDÿÿÿ ½§“ oFFs¸NBIE] pHYsNNÆÊ/¥tIMEã0M†™ vpAg7O£¨u€IDATxÚì½yœuÿÿLHÈE†ÔÈ=á°†#á’3^ÑjÙtë*˜¨Ø-Y£¸ëÚ½âúE»Ýý‰ìRwAQ±K³ÀŠ.v­‚’8‘‰„)®L¦ÂrpõïO}ª«{ºgzΞ™|žó˜ÇL×ù©êwU}>¯z³J¥R …B¡P( …B¡P( …B1¡Ìnv …B¡P( …B¡P(Š))ÄÅb±f7á€Ç²,2™ –ei;®ë’Édš}8MAÙñôÃ÷}2™Ì Û?PíXÙðÌ¢;–×ÀLBÙñôd´÷ã™hàìx*0™}ceÇŠ‰d¤¶¬úÇ•(;žY4ËŽ§¤ç8N³›p@“ÉdpÓ4Ãõhñ}×u›}HMAÙñô£³³Ã0*lÿ@µceÃ3‹Fì8“ÉËåšÝÔqEÙñôd´÷ã™hàì¸ÙLvßXÙ±b¢-«þq%ÊŽgͲãƒ.»ì²Ë¦ÂÁÿð‡?Äu]t]'—ËqÙe— š~ÿý÷sÌ1Ç„ëÙ¶Í/~ñ Ž9æ4M ·uÿý÷³oß>,Ëbß¾}èº>hÙF—ªÍ÷ß? ,àÊ+¯dÇŽœvÚiÃÎŽzûéôzíîx=Ï#“ÉpÇw ë:¦iâyoxÆ=æ;vàû>¹\MÓ8æ˜cð<;3~ðƒ€Pœ÷íÛPsùéÌxÙñdÙpÔ.ª¿Ó±Øp­cíôzímäÜÔ³¯zÇæ8wÞy'…BÓN;-Üî?øÁÆŽëÙp­y3ÝŽ'ʆ£í;jzv,ÛkÛ6®ërÿý÷3º£f´v¬ú“kÇC×hîÇš¦Í–çWõëÓh¦Õ桎YzMtßx¦Ý‹å9PýŠ2Íîfœ§úÇÓ³_Ý×Ò?Íýhš7Ý#Î÷}b±Žãày‰D"œ‹Åp]Ïóèìì¬P쉖e…ëK7Y×uI$d2|ß'‘H„Ûq]7t%mt¹z¸®K*•"‘H„_ŽlßPó†:8Ž“ëº#ž>T{9^Çq0 ÏóBµ?N7t̉D"üþä¾¢X–E*•BÓ´†–ŸNŒ§O– õŽÆ†åy˜ v<”}Õ;6]×ñ¨ 3ÕŽ‡²a8pìx¢mx$Ç<;nÔ&£v,;“ÉÏç›g„ãÀhíXõ)&ߎ‡:®ÑÜgŠ ËïOõ§ŽOVßXó`Çpàô+乘 ýãÑØ²êOÏ~ÅPßét¾G—I¿b4ý㉶ã9ã¶¥QbYº®S(ÂÏŽã`Yš¦U<ˆä—gÛ6žçÑÝÝ @2™¤³³“d2 C‘Û“d³YfÍšNkt¹z¸®K?š¦á8N… ùPójaÛvÅñÊ/ßuÝM7 £î>9^Ïóðœ<Ï«ø¢ä‰Â/6ªHú¾þ?Ü<ÒåêašfÝåPójQ}¼òíD&“Ñôñ8^ß÷éííÕgyãê¸ Ãç麎alÞ¼9¼u]¯X·Öò2gÁtc¼íx²lX¶©Öw:R®u¼Í´ãzöUïØÇÁ¶mz{{Ñu˲H$áCt¦Ûq=†ËŽ'ÆGznFbÇíSËŽs¹†aL;›­ÅhìXõ)šcÇ›7o®{\#½Ï$–ǯúÆSËŽ'£o| Ø1XýŠZÇ;Æyª<}û²MJÿx¨~E½u†êµÎxØqÓCS5€¨ñÊašfø[,›}(ãJ½¤#>Rt]¯p76 cйÉ1Èmö÷÷ ©¶v?SeÇõ·v\¯-C!´Òþ“ÉdE[fºäa| ÙñT²a¹ý±¬SËŽåÃY³f…ogÍš5-“ÖŽg² ËcNv<ÒûñL²aP}Š¡Žw&÷T;Žž‰²åƦ”ÑØ²ê«~E½cNýŠáÖi†7]ˆ3 Û¶ÃÏ2~Z×õŠ/+ú2M3T"¥ê™J¥š}(c¢úxs¹¶mxúX‘aMÒÈê娅Œþ¿|ùòð–ÏçÉårxžWwùéö–D¢ì˜šÇÛ,;†‘Û—t™Ž®}øÎt;®gÃp`ÙñT²a} µN-;.‹”J¥ð T*0v<Ól¸Ö±NE;^¾|yÝåGz?Îçó3ƆAõ)$SÅŽ'«o<“îÅ úQ¦³-«þ±êWÔ:ÖéÖ¯¨·ÎPýãáÖ+MMM&“ضMgg'š¦…'AªíÑrÉiÔíííaÉés>ÔydY)¤ÑécE×uâñ8#>·2G‹¼aW¯gÉd’L&C2™vù鄲ãÚç¡Yv ÃÛc5étÇq*ÚRk™jÇõlXÎ;Pìx*Ù0ŒÜŽ]'jÇÕ¹8¦3£±ã™fõÎÃt³ãÑÜ;>îŵÎÃLïÏ$Õ¯ê\L'[VýcÕ¯¨u¦[¿¢Ñu&ÓŽg•ä+—&UA¸ýI%Êq»ÕÉ}߯ˆÝîÔSZG:}¬Èdžž[éN_(Âøê¡Þ°Œtù邲ãÊcj–žFò}ÌD;®¶a80í¸Ù6 £³¯™h“£a4v<Ól8zLÓÍŽ«Û3“¾“‘ ú•Ç4ÓûÆ3Õ¯(3]m9ÚÕ?VýŠfÛðdõ'ÚŽ›î'©þrd™[iÈ–eÕ|‹4QxžWázZÍHÕÐF·W>žm­¡iš6¢ m¤ËOu”O-;}æû˜Iv\ë8D;ž*6 £³¯™d“£a4v<‘6 ÊŽ'ë~<“˜j} _»˜n}ŠÉêÏ4T¿bfزê«~ÅLèWL¥>õ”ñˆ«…,ÿ eOÅÔÄó<Ç «îŒ÷òÓeÇ“ÏdÙ—²cÅD2û:lr4(;ž|”MŽ/ʆ§Êö‡FÙòÌå@²}eÇ“Ïdõ'ÚŽÇUˆÛ°a}}}´µµ±jÕªpúÀÀ6l`÷îݬX±‚®®® 9…bõ)^õªWñÅ/~‘¥K—Nû8qÅÌDÙ±b&PÏŽ• +¦ ê^¬˜ (;VÌT¿B1Pv¬˜JŒKޏ¾¾>ŠÅ"7ÝtK—. «bôôô°mÛ6®ºê* ¬8å¶ùùÏž#Ž8¢Ùç†x€“O>¹©mؽ{7»wïæ˜cŽij;¶oßÎâÅ‹Y¼xqSÛñÀ°~ýú ÙöxÛñºuë”ýL%ûiöwðÄOð•¯|eB¶]ÏŽGcîëò³ŸýLÙOÀT±Ÿ©ÐŽÝ»wÓÖÖÆêÕ«Ç}Ûã}/–ífÛÏT±cÕŽÁí8ÿüóÇ=—ê«vLv;&»<;Õ?Ž2UîƒSÅŽ§Kÿøÿ÷Ù°agœqF³OÙ”`ªØÏT`çÎ,X°€Ï}îsÃ.;.BÜÆéèè ¯¯mÛ¶±téRÖ¬YS1OÒÕÕÅå—_>äö}ôQÞþö·7ñ îºë.V¬XÑÔ6lÙ²…-[¶4½7Üpmmm,[¶¬©í¸ë®»&lÛãmÇýýýMÿÞ”ýT2®i |ØOõìøšk®±  4ýœ)û™zíØ²e }}}²íñ¾Ë”fÛÏT±cÕŽÁí÷ºñDõU;&»Åxö+@õ£L•ûàT±ãéÒ?~ôÑG§„O¦ŠýLŠÅ"÷ßCËŽ›G\___|1lÚ´‰uëÖ±jÕ*vïÞÍÒ¥KÃeÛÚÚ†Ý^?×^{-ëÖ­«¸0&›÷¼ç=M_ºt)Ë–-kz;d[ùþ&‚žžÖ¯_ÏŽ;&lãmÇ;vìàÚk¯Ð‡Êp(û©¤Ù×´´ãþþþ ÛG=; ?ùä“Üwß}<ýôÓMµcPö3•Ú±aÃ~ýë_ÓÚÚ:!ÛŸˆ{ñ_þòX·n]ÓÎL ;Ví(³víZ{ì1–/_>îÛž¨¾±iš¹æ&›fßT;*ifÿx4v ª\«-;7ÛŽ7lØ€ã8Ó¦<{ölÂmè4Û~¦ ëׯçÞ{ïeÞ¼y -?.BÏ éêÙÓÓÃÚµkGÝQ8öØc›>èšÚÑ‘´µµ5ýæ 4ýâêèèફ®bíÚµºŸñ´ãeË–5ÝŽ•ýTÒìkzºÙñqÇÇ[ßúÖ¦w2”ýL­v¬ZµŠ¶¶66mÚ4aûÏ{ñÉ'Ÿ<%’/7{ÿªƒ¹êª«X¿~=ÇwÜ„l_õU;&šéÖ¯Õ?Ž2îƒÐ|;^µj«V­š6v|Æg°wïÞ¦÷§ ͶŸ©ÂºuëX±bEÃýãÙã±Ó¶¶¶ %¹££#tóïèè §§'œ×ÓÓCKKK³Ï“B1eÇŠ™@=;V6¬˜.¨{±b& ìX1SPý ÅL@Ù±bª1.B\WWÛ¶m ;7n ß2Hã–óÇ6‰§BÑ ”+fõìXÙ°bº îÅŠ™€²cÅLAõ+3eÇŠ©Æ¸„¦¶µµ‹Å¸à‚ Xºt)Û¶mãŸøD8oõêÕ\pÁtuuá8×]w]³[¡„²cÅL ž+VLÔ½X1Pv¬˜)¨~…b& ìX1Õ·q2í¶mÛèèè¨pé\³f ±XŒmÛ¶±nÝ:å²(;VÌêÙ±²aÅtAÝ‹3eÇŠ™‚êW(fÊŽS‰qâ`èÄ—S%)¦B1ÊŽ3z¶ªlX1]P÷bÅL@Ù±b¦ úŠ™ÀTµc›9ºénæéQL"ã*Ä) …B¡P( …B¡P(†ÆÅ%C¦ÙÍP4q)Ö P( …B¡P( …B¡žp&&EŠhhátÅÄáûàyƒ§I\ pœúÛð–¦X¯X¢À;à#~39ø?rxEÐzáô,$2Ai9à;6¯ÃŽ,ä!ôUÔuÈf!ŸŸ3Àö,XXÀ‡ ø‰ ]|Öۇ̿ŸÄž=ó:_JˆS( …B¡P( …B¡Gräp('óð01CÑ @C«øëº.^u³0BÞpd2Bh“Íôøuªëbßñ85‰ gjBÓuD££•¢èºh@!“á2Ó­ðâ“Éí²Žƒ·mñ¯|ví‚àX°,¡HÊýë•BÜHPBœBÑ$êÝ| …B¡P( …B1}qqI’¬(Ì óÀéèØØ¹â$žça®ëâº.™LÇqH&“¡¸æWä#¼âÝïÆñ<¾ýøã$ ²Ù,šiVˆvZ•Z$ )8ŽÃ[Þr>®+4¦xN?ýS8ÎC\xáÈfm’hw]ö_¹ß]ð:îNBÒb›‡Ú¤˜f µdðšrh(”Ÿä׿.âZ«0"m3t_žÊJ ”ºX ,‹ø¯~…¹e Ýt’ô2­O–d­H<ç—+IxžøÖpœPÄ+hñl–ì’%tGç[Vð»býGÜp‡>õTCö¡„8…¢I´ÓÞì&( …B¡P( …bœ‘…¢Bœ‰É­·ð…S_ ˆ ¾ïãû>7Þx#·ß~;º®ƒ®ãy©TŠ-?NÎ4ùÑúõÜö÷…Ç\ô•¯pÿYgñÿ}îstþìgüÓsÏqâÇ>†ešü¿•+yøÄqxë¹Ïrïo‹5uM:Ö'>q?~¶-œÅþøàƒ¤ ƒ-_þ2ŸØ³‡ ûúXúË_råYgѲu+ÛÍ@L’®K.È;§Ó±¡ªw’¬ï£çó"ŽUŠZ±Íu!~Ò=ÐÞ.µXLüJaQŠiÉ$xéÇ#»n˜W(ˆýú~¹4k´ɤøÕ´r Ë* }é´˜^(ˆöUãº">Ä|Ç!T1ÛÛY´e û,hÈ>TŽ8…¢Édgâ,Ù1nI¡P( …B¡P(ÍÆÅ%MzÐçÛž¾›Ç[?ÌþR?Þ¿˜\ýÝ÷pÎ9‡1ÿ#ó¹âŠ+ø@ìót†®‹Ó|žu}}¼ûþû¹ð™g°€Ÿ=ü0ŸÙ±âša,¼‹.BŒÇÃ:áöYð¿_È«íÛÉ|h%g~:žç‘ÍÂk^s«WŸË‹/Ãü÷>Ìgã'°»¯ÒiQäàÈ#9zåJö}îsü×7¾ìüò—á’KÄñ¸.©À#Ìu]Œ¨p•Ë q«»»»iYa•‡]±ÝçŸ_Îy'Üó ŸúYç{Ê"Y¡ –I$„–Ë•ãUe̪Ót]¬'‰&±“$…\ØÙÙIww·XF¶1˜GOOÆ­ºî`q.›¢`<Å"ýíoìß´©!ûPq E“É‘Saª …B¡P( …B1C¡¨2U~~þù'àN]³vÑ÷£sùá¯Ä0 Î9çÞÔv;_ ÀQ_øg|ùËøÀ•mmœýªWÑñÍoÒ <õàƒ†eAk«Ð°’ˆ\lI ­ël?äü¯ ¯¾èAæ_»‡tvòŸúVàÅwÒI'áhšÓéÊ’±  „8…¢ ø”cÕeÕ…B¡P( …B¡PLo|ü0ÿ›ŽN† &Âû못áe_yæÏÏYgN:æ§±‹xâ0nÿÕ!ägÅGIÔ§K IŽã ëzè0&5 ˆÓó¿õÎÏÂkžÿ1Ë—/A÷ õóùÛ'Nåu^H·ëòÀ)G°{mgüá¸À{,‰ÿû¿áжYôË_”…8YÞ4“ኃæ{? b~<ÎÒ;ùÈå—C2Éá'ŸÌ /¼Àk_ûZÇá'?ù û÷ï§««Kl¿FuبÐõío»¼l°¼;DEX×u¹è¢‹Øµk‰D‚k®¹†x<ŽëºœwÞy<òÈ#¬^½šýèG|õ«_å²Ë.CÓ4®¿þz.¼ðBÐ4žliaçé§óµ¯}»ï¾›mÛ¶Uw»wïnØF”§P4¡ø×KÒ©P( …B¡P(Šé‡‹VH••Så˜ïîg^ÃK/_vœÎ·=D&S®ð©ÂŸÙãŠl>»-:R”BœçyìÚÕ‰® ÍJ×Ed¦e•5¬ÖMq]öîÝÎÛÞö,ºwß}¥R?…BÃ0HV¬à„_ÿšä´ó<›Î=— ï»úûùþ#pçwpÉ%—‹Åøà?ȹçžË­·ÞÊ“O>Éw¾óîºë.~þóŸ³gÏþðÜs´a²H…óæÍãÓŸþ4W\qßøÆ7xâ‰'èëëÃ4Mþû¿ÿ›ûî»D"Á¥ï{ŸýùÏY¼x1ýèGùÖ·¾ÅÞ½{ê³–eqÉ%—T´a(”§P4wçâ†yã …B¡P( …B1}¨N;ôkLL`ÏŸæpjlm«oÆúL ÃŽcçä7p3ðJS8—ªÌ@ô‚›nº‚bqy˜ÒLÌ«LÉ&ó›yžWÎÅV… g}rÓ& àÙk®ëY–øunêû`ÛtìÙʵk¹ñàƒùÿººØòå/ãº.¦i²zõjn½õV,Ûæø¯ýë¹õÖ[ùÎw¾Cww7¹\]×ùÇüGþú׿ò®w½‹|>iš˜¦Ém=Æ{O<Çqp]—\.‡mÛ´··sÆg°fÍ^ùÊW’Ëå(‹$“IN>ùdŽ<òHâñ8©TŠööv=ôP:::X¾|9œsÎ9 lÛæüóÏ'NóÑ~”'Ÿ|2¬R«i¦irÇwð²—½ ]×9óÌ3ùÞ÷¾@6+ò¹þóŸÇu]b±{÷î%•J‘J¥0 ƒ£Ž:Š3Ï<³!QÅŠ&âã‹j9Ahª…¥rÆ) …B¡P( Å4Äà =âZ1yšŽm`Ç¿>ÁiG´sÙ5çŠJ£IÈóé§Üœ†4ˆf9ËåÊÛ#ÜCGÇ?Q,þfØv¸®zlÕ# XÁ2a‚x:;Åñ¸ˆwÍfY¬Ù²…3¾ñ ¾ñ¡ñÌ3ÏËåÈf³$“ɰ€C>ŸGÓ4®¾új. Š;˜¦‰ã8œ~úé$%1™L²íàƒi¹þz>þÉOò†w¼#¸’Éd¸íx<ŽaœqÆèºÎ¼yóÂãŒÇã¼ãTE%ŠÅ"š¦…9ï4M«,2!¿¿ˆxiY0Š›ÏçyùË_Žëº$ ;ì0 Ã`Ë–-œwÞy ÙˆòˆS(šˆ¼IKá-C +œWí—"Õì&+ …B¡P( …¢ÒÙ`+BŒûטÀì½óÑ t»°BܵÀ3õ²èæy¢†A2)œÓÚÛcÜrËõœsΊaÛ`¾ïWVþ¬Eonß%—ð“'ŸÓ4M$ŸK§Åon©ë:×,[†®ël߾矞b±VnM&“,Y²„SN9…ƒ>Û¶‰Ëʦ!ÎuÝAmÒu7~ù˼jëV^ùÊWâ8ûöí#ŸÏ“ÍfÑu=ÜÎ7¾ñ ¾øÅ/bš&;wîÊ¢Ûw܆ïF¶|P=U×õP„“퉶ON³m»Ò‹Ð­Ÿçúë¯'“ÉðæhŠaPBœBÑdŽ8y“–bœƒC‰ „8;\ÇÃᱸs…B¡P( …B¡PL.^Xœa6&Ÿõþ“ƒçÿÓqX´èq ÊBœ äWï ¦Éb–%„8©‹-YÒmÛ5½¸j‘H$/ëûbà \í Ö,ZÄSßùµ¶†ÕK1 H&q|Ÿ½§žzËÝsÏ=ض͡‡:HÄ›÷Y¹r%ŸúÔ§8á„í_ŠvµØœËñÂâÅaA…jV®\É™gž‰a¸A[eÛ¤'^£d³ÙAa»ƒÚçBM_ îüáär9 …–e±lÙ²†÷­„8…¢ øø‰<Ûi”/Î ~äÿÀŒâ†s•V( …B¡P(ŠéD4Íqçöïøo{Û)wÜ è”…8á)÷ày™®ÈNh®[Y@TÓ4<Ï«)PU“Íféïïs›…ضpµáíÖÝÍn¼‘û::¸û½ï\€ëº¸®Ë/N; +ï-ZzÛ …¦i<ôÐCƒ¦ËðÒZ¼ü£ÑøÐ0 2™Ìð^µšZgHmš&ét$;_=ÍÓ‡×ï}=ÙlÃ0èîîæ-oyKÃmWBœB1‰$H„Þps1ÑÑC·et$%§ƒVÙ‘˜˜ìƒ xuâî˜Éd°,kPešZëÕºI§R):;;Ãx}…B¡P( …B¡˜ D+<àG?zœsÎ9 €SN¹7ôˆ“#'éÃ%ljé´ÐÚ¦aYétšt:M{{;íííáüèÛ¶Ñu=ܶܿLp™H$°m;ç@<`¤ðgšføFd(7f…B¡P( …B¡˜ Hg ¡õÜÔ7‹/®ô”J#ÆrõüÛ§"JTl·O4Û+K¯.ÇTJ¨{†…qÛ®(8H$H§ÓÓäØQzàÉ ¦[¶lÁó¼ —Éñdˆœ¨j'¹À¥PÓ‡ñžK_‚ HG7©/V¯êGþÊyrÿQÁÍ ~=„7L¯˜üô«Ÿæ‰’âŠ)‰‹Ë0q‰WÜxÍÈ'—8ñ HƒàEtpéÄ"ã؆!¯{ÒÀœK,à GáºÇ¯Äd\¿|»!Ýxãñ8¾ï“ËåˆÇã$“É0ÿ€eY¡K²¦iär¹Š›t:½Ù …­­­7pß÷I§Ód2’Éä R”u¤P'+ê8ŽVÖI$ …ð¦oÛ6š¦Ý=[¡P( …B¡P(Æ8mÇŽ=ö. Rˆ«ÎòfbVä×õÁqCbY¢ªCԕζEA†B¡BÕ“ã59, á¼L&C:4nK&“èºÎ¹çž[‘žh¼‘û)Wð7Ú¡`å…]6tF¶!=XâÁ_éP—D|ivd»I„ I“Bl‡%–;ô‡6|œJˆS(&——UyÀ¥I‡^p’çß 3?Èw.p°‡?î¹MGu£ïÙƒµp!?ûàl;oOþñ§°óK$nZÞÄdE˲p]—l6Šmr¾®ëƒ“yRÇ_-|EE7®Äº®W¸+Ë 6µÞšH/7ß÷+r[Ñu ÃÀ²,òù<©T*<&y,–e…ÿ»®Ûp"SÅÄ-H¢P( …B¡PÌdüHeX¶iïyσ#ÞNr8©ËU@$™áŒ‡¤ã…tºOµªšÊH§êé®ë’N§éìì¬9n9YxÁ‰¬â² ÎñVó¼ Ä4™çM xz0-ätj‹zRŒî_ƃñbbÝcÿãXXÛØqªq Å$ãàptU@ºÆ`—Z—ÍÀŸðqBÜ`Ñ ‹Ù·p'û3o¹í1| ð«'Ø\x9;϶ëÚàÚyBÊfÙrƤÓi>ùÉËB.™LbšfE(j-¡,ŸÏW¼‘ˆÇãC&ÏŒŠgžç…b™ q­…®ë‹ÅaÏ[<>˲BáPæžÓ4X,ƬY³ˆÅbd2™ðØäƒ&“Éàû>©Tª2/‚bB°°Èiv3 …B¡P(ŠIÁÅ ÇvpýÕ/ŽgéD&#<àê…ìy•ŸEîÏ,¶mpÛmÏæÍ»xþùC°m3Üöxy1†Љ®ëbšfXɧ¿¿Ÿþþ~’É$‰D‚\.G&“¡³³3|¨È°Ûáˆ&(µ,‹Y³fU¸b×Jbª(ccWTýU( …B¡P(f:ð*þøÇÆ7•Žç‰œo¹œ¤éºøM&Åo¡PáÀ„ã&™†HF1Õs°ùÀ£D£§ä6¢,†IœÓ võAáÅŸF+žúU˺UÓ$&Ñò´e!.´Ç¦,ÄÉJˆD¦ù‘ϧÒ#„G ýãc JˆS(šÀ3CÌs€~`×sÏñz4^þâ‹ó¥/±áQRZÛª¡-ñ@óÙ}ìÏyiÉ>f}ìiÞúÖ‡yê©å|ùË‹82ç¦à¢CåÙEgÐÑ1Ÿü¤ðpσÎN˜7ïúúÎå™gæríµë°m1=•‚œüÍdÊbœÄ²Äo,V~ cÛbùã?ž%Kúéë;Ç)ç M¥ÄßDbxQÎ÷ ‚RôІÈ†AGÇëð<áÁ'(ò’ÉdÈçó ,Ë F³fͪxHår9lÛÆq2™ íííá|™/êE˜Édê zÍ6³¦ ~<¼ŠÊÀ …B¡P( Å€ ´GyÅE#Æq„ç[.'ÂNkl[Ë“c')ºI¯¶IÅbp©Ií°R>%ƒº¢Þo27[ŽÊÜmÅ`¹èð,zzªÃ[£íñ#û0‚õ¢E´`¾O¹€ƒ$È57gwã™ßTŽ8…b‘ 8>G¯m({ĶaðØÿÍEû{8äé§ùÁ·¾i8mÇi¸¿vYõÚOó#\¼o§àù»8qþ7¹¾å"@äá<; ÿƒ7YAnN–,ù(ß´@ÏB1×_ÿ:>÷¹Ý<ðuT¾ßFw·¸¿»®¸¿wwC±(^´´· ÑÍuËÞsñ¸X>™ÿ }ìAöî=š/|A,—ÍŠyž'žŽ3tÒLF}¥Ryš u’Ïçñ}X z{+çÉBÉd2Ìg×ßß”«³:ŽC2™¬ð”Ëd2YQÍ5‹Åð2™¨Ûn¾¹{îYÁÞ½G‹é ¶eâs.'¼ï¢Hç2-›N‹} …çiáv«Ò!Ô-@åðV)˜¹®Vé‰ÇãaÎé'óßù¾&;5M3õMT„¬Fÿ rQï»™FŠ&&2¡8×I'6“üN¡P( …B¡˜DdZøÍg¿üpå0$9`«á MSbªiÚ b{£Â†³ÏdƒÒèòV0-êýæ×˜åò²µÂY5Ê¢œô¼“Þrv°ÏêÓŸapÉÚ8µE4Ù¦jjB^XüBçpX!n`` á)Š¡1.`óÑÝ<ÏÃVÜs½; (úÙ¹'étšeË–Ð×·]‡¿æMœ4p¯8z/—,ü¦e–)îŸLÂÿ3…§nK/œ¨•_@Ä€g;np;MÈçËš$›÷üdRˆpòs5‹¿ÀÓO¿Œ––­µžhZ˜ÂÛá­Pà/Öó<1MnsÈï#ðj³mÏóÐuîîn²ÙlXÑÀ¶í°k©TÂ÷ý°J«LVšJ¥ÂíÚ¶¾q’ÉQ-Ë¢³³sFæ”óðp‚“,YttâÄÑÐÈ‘#I‹ásó) …B¡P(ÓŸg00¿þõu Ö5„ãˆ|™ßÀ¶d žháºqÇe°—›Kí¼oÒë-Ú!~EÅ9!ªUf¢—Ù~¬Æ>ÊbZ¼Fû$Yê{´U3>¡·qãFÒé4===ôôôpöÙgsöÙgóÎw¾3œ®P(ÆNuH*@{.‡Äí?ÿ!Pºšüx"ÄrÉ’%Ì ¼ß44hy n2|yÏËfábCÜ» @žò}LÊZ[·¶pÏ=+*ŠÅ •Ê@zÂÕyÃòåý¼ò•yúé‹ën#Ÿ/o'и0Írž9¶*=ð:;aÛ¶o„ëËçŠôÌÓ4ñkÛBÐkÔù,›Í†^kÕJ]×Éårèº^!ú¾_‘àTzÒù¾ëºhš–ø‘_.•Jaš&üío¥åL=¤—$† jhdÉb`&‰I’¤ª¢ªP( …B¡˜ñü èï/Mˆ“¹‚,«<`ªC&“! QA…ÑàQ)¤ùˆð®¨ˆæÓªsÂÅÑZ‚˜IYpó#ûÐÙZc¼T°}3² þZ )Nc!µP_´fôõõ±víZ–.]J[[ ÜMÓ䦛nbÕªUƒr Õc``€õëךvÍ5×°~ýz6nÜ81G2dÉÞZȧ¢E¤8g00p›7ç(Å´j³–š³è|_œÈçÅË$éa×Ú*Ö]»v»vµ"·]uähoooCPù s]—x<^ñà“ùç4M#sê©§Žè¼Œ–j;ï{±ôÆtq10Ââ õ0001q9° Y(FÏTîS(¢ìX1S˜è~…B1L´oÇE¸½{·|¾_¨€ðFÈf+ÂRÇ”î&N‡:ŸS}J¤÷Y´Ûï#<Öê B‹u¶×‰DF=Uò‘ýTS+¸& ÔJ n1¢ “ÉÛêÕ«Ëç¨X¤££ƒ–––pÚÒ¥K‡Íç8ΠezzzضmW]u ç 6`šc‘&G†ÌÝ$U†•e2<Ïò¬pÐÇÃ03™3jíÚçòË/mß¶…Ï–-ç±b…Çwlção¬-r›2á»Ì=eYê-Ë ÷ï8¦iâ8ù|MÓH$aÛòù<ŽǶÅu)‹§˜&<ûì<þøyÄãŸBÓà«_ÝÄ-·<É©§ÎeåÊc'íÜOG&ÞŽ“÷,Ïó¸úÉ'¡g ü{÷¾B¡„ã8|÷»­˜”ȑ㨣Žb7»Ù¹³­bkyòäÈ‘!C>¸k5RÄFÎ?¨c=wÌgNë hˆ 9ÄKY¼&Nå½Ñ æËð~âŸ@Üë¾yì+xðA và#îA}ˆañ}!˜I/¹x¼,Ä™f (2wî3€˜–J‰ª® žQ27œ¦UzËÉgj"Q.‘ˉ°Ü—½¬‹ãßÊOÚÁ¹çV µD8Ã0j~÷¾ï‡×¸Äu]|߯ð ;üðÃGa7#£ÚŽ'â^,ÃRut44Ò¤‡â È‘ÃÅ­é9§PD™ª}Љ FŒ<ù!¯Åôä@²cÅÌf2ú ÅD3v¼#xQíy°xñâÑ4R î]·,¾E¢sdœŒ`”Ñoõ ÔY=4zJä°F¡œ{):à‹ i²‹ãQW…r>·jÁLæ|«þ*ju•4j €2„k#tGËl€¶¶¶ o·žžººº*ܶmÛàÊ+¯äÒK/­˜¾qãF:::ÂÏ]]]“溜ËåBUÛó<\×%ŸÏS(Â_Y1™LbYVX4,Ë ½]cÓ¦ËI$Ä5‘Ë•=nº»á´ÓŽ¡Xì'“ÉJ¥H¥RaEE¹ÿT*Ekk+¤R).ݰ7oÝJ»móËçžãƒ—]Æ=ï{ŽãðéO_…ã8är9Ž9æ4Òé,žçñÝï>Àa‡Ãòå?Åó|¾úÕUa;d5Kϯ»î<ÒiÑ^]‡÷½ïV6o~ˆóÏÿ#íííüÝßmà«_½žX,Vá)'ÅI( —¹œ¾UXä~fiÇÒcM§ò>áy÷ ÐÒ²• P(ˆL‘¦iò±½ Ÿ­-[yàX²¤¿b»2—ƒS7_ÜP,\×@Û«EÊÈ$Ðü•÷.)´Ioc‡r˜«¼/Kß·8°ãM‹Y°`Ë—§ÉóRˆõÞ1yÀÍ” .D©ÔÎ8ã+@95ÊP^ÛÉd9—\:-ösÒIìÚµkØê¬•íÉz@뺎mÛƒ„»fg¨eÇq/vq+¼à’ _…)N<¬®jaÑNû¤ŸÅÔg*ö)Æ‚Ì8TXvõýÛÇÕ=]1u˜iv¬8p™¬~…B1‘L–¿Ö¯_Ïc=6æm ÅxÚñc=V‘ÀÅe€r˜<ù'=tw¿û*¾ùͽ¡ \~ùñ@y€öÌmÏpÎ9µ=SãÄq¹+õ³÷À¢Ãç šõ:–!ª9ÊáöFd™4‹XzÔ½°p!Ïr6ÂÙŽ×BÜWÛƒ_Yô&ìÕá>6Ëûk¥|/ðÁ‹Äß‹."qHf+Ÿ#õ*rËkWVWqm¼ùÍ/±té·yá…ÅÄã #ª‰ôb­•UN“vüÄOŒn' RËŽGcÃO>ù$®ëÊm$ñð011Fð„ÓÑÉ’%A ]匛flܸ‘n¸íÛGnÑ ãy/Þ¾};7Üp6lhÚ9Ë!òÎÆˆÕ´w7œ ù–ë)&†õë×ãº.O>ùä„l"úÆJèPDifÿx4v ƒûÇ ÅÆ§eÿ¸^¿b7Ò)Ì£«k÷È­Z× #â\h(SL­ê¨Pö*©é¤G\­ut†®BZ:%=å´Û™ ¼mcaÆ #êÏXµj}}}aBò5kÖ„Êð\@__ß ·yQ®¹æ:::†êF¢E‹X±bň]9¥7—ôHÑ4b³&’§' 4l[$r/E0žaÀWÜV.Ê º^)¾I\à§À¡‡žñðãqž~òIfÏžÖÚ&i´ž>O¦`ÁŸ~ËòÅiáU枊iÂ-Y˜›€G486;~õ5?ÿQ~–ÍŽoåÎ>΂;øjöå”Ö‚³?¶õV&…!sƧ/™ð„Ý~6/9Qýñ5¯ù{Þýî•Xüîu?äSÏ-â{€ÝŸ±yåíÏò–ö~>ßý?4—CÖ&ØpÍ~Î:ë÷ô÷ÿ3§œrk×Îᓟü mm§óÛß–xÛÛÚ‚ó‰D‚ÓN{wÜ1Ÿ•+¿Âw\È›ß|-®kÑÝÝœËÑ©‹/fÅŠÜu×]ãbcµo;niiaÅŠÓ^ŽÁ ‘Ïß¿çÝõôìçu¯ëà __icAH¦ŽN?ýàÃõןPs&樄¸ý?8Ô. sÍé@©æ> ‡‰Qý+üÛߎc늋ùâ^™Ff­ÁÿÝÁôeÏ»¢oÉŠé2V楀¾#߯æÍ[yòÅ3ÃuÍà7Z[>3jU¡Ö4áÝjšò9·€ vN^ˆ!F'«”ÀèƒQÚ±¼&‚ñ´ã… rôÑG²c mÄát²°ƒ††ƒ£BU§K—.eÙ²ec~YVñ¾/^¼˜¶¶¶Š7Þ“IÔ«ÍÅÅÁ ¯‰^ tÓ^@>ÚÐUoTëfÈ$9¦Y™Cr*²bÅ ¶oßÎÂ… Ç}ÛÕ7ŽŠ™Ò?VØÈûÚtëÕ¯0þvN8áÁ‘ïÀóÄ€¥™LÓ4Éåra$ÕˆÑ\}T&üŽedÕÒBÕ41H‹Sö\“LqDÞ7Y,!z(j‹k‡œ¦)‡aM:::ؽ{wÃýãÙòŸuëÖ…ÞKëÖ­#:ýÆoReÝ´i×\sMXÈ„ð%]=«Ã^«ß V³hÑ"ººº†]®ÇqÐ4 Û¶Ñ4ÓO¿’DBˆn¶-Â4S)‘*›-‹kž'æ}ù˧×ÝdeHa7™à78kè^¥—²L#Pú7á½wÐóISé\Ä5Ù‰çd®:/øœC\¯ Ñb1î) ºÿòMÚÚn®Yü¡Qt]s9:Á iÇ‹-ÝN žÏ™3gÄ6¼páBŽ9昚v,Eƒ,Ù†BR«‰…óïeå]w±÷‘s¸Þ‚Ëa޵lÊ¢¡ăóîbZ2˜gó,8Ä¢ØÞl~îq6wœ@ÇgðüÅðà¹pðÆdûö9ÌýÜ?òÏçœÃž=y,k1/¾ø!ÀçË_þ2ëÖmàßþ-Íþ°”ßÿþKìÙs!Û¶½–dRœ÷þpÛ·ÿ Ld¯iZø·YL¶k@ÏYgÁÏ÷±ê’>úÑ僖‘ÞY::³fÍ¢X,‹•‹Tnot‚Æ¡ÙʬÝpüÁÏŽê8j¥5²9æžòžA/7jI62·'ˆk±PˆTPEiYàê£*ß0Èj­ÒƒN¾GÓ){ÚiÁú°Í„gr0[‡Nsà[Ì8 ß÷qá-'ï‰Ä "EuÑu}7\3òÃճ㾾> …¸ٰ‡7È«g4ĉ#VTU(¦RŸb´Ho¶4éP@spj¾0qp01±±+æùøèèát¹qKæhÌ“²-~ð£ ËQËsM¶y´×º,΢¡…‚}† qâSÖKn¼˜ v¬PÀäõ+Љd2íxöÿžK?´SŽëB×GðÒòCð[\?ÏN@4úoÔÈÁQ56b%Ç>Q¶ BdШæd^8“ÊU»Î>j½W¬öœ“LPý‰©@(ÄEËøvttp饗ŽËÛã¶¶6V¯^Í\@WWŽãpÝu×ëA¸®K*• ó´‹Ù¬°aÝ6p®¯JÂI>hF9WUÐ xpLêéS¶1Y…×@xÔÈy’BÜ‹'Ê^¤š&îÉ$˜ÉÀ®#²¬­˜æàór[uY×™ ó÷êƒD¡ºà½ˆc²B…ô J¿­!ð: ’¦XÎ@ؼõÒKüª¥…mGI’À‹Ô€®9ÜmÀOO<‘t<Î-·¼ÈæÍ­€aÀ²e£i¦çÅßÎ'>ñ,ÿþï_gß¾\sÍŸ¹å–«¹÷Þÿ}ûÄ Äu]<ÏÃqâñxEŽ´©ÂDØñW¾òøüçá×äâ+Ϫé,¿ëxð3²°ƒ6ÂÀù§~{8ó–¿ãä0"÷¾`Á‚†ª·Z‘ut½¶7¶”رcAźò…K½J±:B ÓL{®%.öù鸸ï×}Ü¿\Ȯط a7Œ±%üˆãÅxÛ°?.i™3NæÂRbœb(&£O1ØØxx¡gb†k½0‘b—…¾T‘ëgÈT\kí´££‡¢^T «EÔ¯Þr ¡—jž|(V··ÑPSŸ ™AÇl bj‰~c ¿NL;V(†BÙ±b&0vüô /àoŸÏŸŸ~–ý×GÚ hüŒ•Å eÉ1WHÍ gõ¨"q¨dÙ”½’$q„P¢WMë Ö¯Ž8ÎÂð¨–9 $nذK/½”¶¶¶P”ËåF—$¸zð¹fÍb±Û¶mcݺu#9ŽT*E:fÞ¼×áûÂ[(“ž,RlÓ ñ}K¯I)²õÿ»AAŸ²}U“¤vq]/ o2_Z8¯jÙѨì'ïÞ gœ1jE@&ÉV.Röø3G rfQ.$`š&móæ…Ç!ÃÁñEÎ;0³Y’Iøð‡ßÍܹÙl–ÿ9õTÎþô§Ñ4¸ãŽèúþò—½Ü{ïÕ<ñÄÛxôÑ.–.ý=tšf⺙L&8—:žçEª×6ñ¶cŸG Ìwûí·Óºy3ý=ûîP£–ZÞpÑåFšok÷Ú“è߸˜§®EUŸ!H§Ó ùçéU«uX“òK’ç7µpk?%æÉçDÁ/QjUË8Á„³Âî?—åEØ»w/‡ò¶p¦+ª>W[(7^`Eþ÷¨LYTæRŒša‹5LeR©¦iâº'ð à›0ß…kõr5ÝeQ`º¾[­Æ›æÈ„¸èvdXn£ÈœrPöŠ[_( áð xÏ¡ìji%A9Œ5*¹@¶Pà¾ûî㼎¼çæ.:ƒÛnUröï'ð³)ÄyxœVñmšœxâ÷H§ß[wj Ç÷…WZù|ᑇ_<>îÇܨ‰F_˜ÄãõmûÐ}û8ôý~ ž<âö¼KƒOfË"¼¼&^SÓƒVq` =154,¬P˜“â›,Våë&ê™VÿÓ ~ªÅî8ñÐÓ.zý™˜añùÙÆÆÇ¯õL“ÆÀÀÆ&GŽûÛOí™ÛØJŽ5¼ÜLÓDÓ44M£P(4¶­Z¸ï˜jЂéåA¾Åâ]:²lt½záHŠzÄõôô°~ýúð¨ø,§M%|ßc¤ïðáS&ÒöÓéô¨<êªilh[¦zeOV™_îà¿À®ÖVQÐr1)äIï¸ëN?%¥!¯íû»¹÷š×‘NÃþÎVvî\Ⱦ} p]—Y³f…žÓ½2gïÓˆ4\Ë[n(+éq0zî9ö¬oãè£çãÃ…wïß>LmÈR~vÏš…ëúXø²Ùd³Šð¬[=!z÷{ð¬QÞ–3:‡­ÅD _iÒƒŸ R¤HÃN%^(&È\V*$UÑL¤Y‘b(jEÃQPœ30B!®ŸþA"X=a)MºfÑ=Ü·‹KŠTè'‘B^’dè©ÖO(° E‘b¸Œl¿eEÖVZé¤3#)èRí =¦è5Ÿ#p\\,,<9ÆÊ Ò3ÙÁô•ŽNN팅èˆÍ öÑüÚ5–ÍÓg‚t† ·=Íòå/~…:¹or¹®ë†UQÇ4þ—Þ1µ¾è$•IµeWZ qµPÝíqc6ÆC÷ôô„¿]]]Ÿ{zzšÝÖºlõàÞ2ƒ_ˆæÊøxq#¥^7Ú¤œc®´d ûˆj–²’¥ü:,Êy] 1kd³øÁç}ox{N™K</Ý>ûþúO<üð‰|÷»ÿÇÊ•ï"‹…Ŧ3"|pR©‹=ΟÿüqßÏHŸ—½¥òpá8 qõ¢¤ï'Çñì×7Ï¡®9oîܹ>Ö)¢E ¤g—†Š]²BèpÞb£ÁÀ N<Ì37QäÉ£¡…לÌíæà„a±c=ޱ0ìVÞ dñ ™çN†Þ¶ÒŠ 2,ÖÆ=¥­¬6+ïY²¡§B¡PÔ#ZTl´d:GÂ"3ضL3"–#ùñ¬1N’í0‚ÿåk‡l°8åȤê`F©­ÈêEÊ}d)ö‘¶A9Q æË¨Ä4å4E.Â1#ì_þŸ@ˆ‚å¨Èe­D°¬ãΔ÷Ü0ïÞá´,1Ыvªë:¾ïãº.ÉáB…†Ã@|ÙQ–Fmtu^)#˜^¨³MŘ©Í °aÃzzzؽ{7ttt`ŽT˜\WTÚܣÉY°f°7\-ò“T…¤Ñ¼zGy$ó‚壕Š]Ê×zT|q W»/“<+îeßÀI|ùËGÓÞ~Ç{ƒ(p0Í1 <Ž“NàÍo>lÈåuWT©@2~㡞#¡ÚÛy<˜wÌ>ž»bËîüðŠaÎCã9ê^ðáy¿ìmmKLØ ,С- "¼oAx’7"Äyž7#‹5Œ¦ Èh‘9£€º^;!h(µÈ‘ CeµÓjQ.Éz'>È+,I’ ™ « š$z‘M$:z˜Ë.¶š# q£õˆ30Â<‘t’$I–, á9Ž A^+…;; S v <ád¾<•[N¡PÔÃC]„Z7eoµÀGµ¦ r){”õÛ’¹Ì]DßS°4(§éÊQéˆQ]¤R~–k&õ{eäRtÙ¨Ö"=ë,„(ÉÓÈ_7Òn³jÙläÿè¾ –ïf°žó·ãŽ—ï®D_@íøÐRÈ“{[ l5t˲H&“dGRѯRñ­&zò¥AG—‹#.€âdœ½“PˆëééaíÚµ,^¼˜X{500Àå—_Ά Èf³•U›ã8ìß:ëuÈéÓG˜ÕFªªLäM9Iù ”å·*fd^.ò¿C°sŽx†R×^<ŽæSYè¾ùÍØ¶ÍÃ?ÜìC5ÀñÁÿé´A2ùä°¡Æ£y 2ÒÅñ/¾È– 8Þz÷ý±püÖ­ÜuÑJ¾{ÿ°Ë&i<b¿­‘Æ€—:œ†—€m9H§¡Ï,?³ä›ÇZÌÄ¢#@˜^¡8ÐððÈ’ s–™˜C^Ò#«šþŠáÍøÓH†±"sÎE]C #&æ¨Û sîUŸ+)bÊíJ0*~$HP¤Xó{‘ùó²dÑÐxìˆÇ`û„ž*…B1 h2^†vÊBtO0“r{¡U$«ÖŠ]‰`©gȨ¾èx©i†G޳¤8ØH¿7ÍЂ6Ä2²M&eQr´Ošêõt`ÇÂ…£ÜZó‰¦myî¹ç[i˜žã8èº^{¼£1‘LX¤;&”0D>{TŽøÂT^¸ bÁmíÚµ¬^½š5kÖT,°nÝ:Ö®]Ëå—_>eæ' t]ǹí9ÒyØ^3iFŽ·ÉBcðMUŠ1ÒZ^ÿÕot¤+4Y±àßæ½Ö ¡Î‹ƒáöq×]O°yóDHF‹Ë^ʵ‘æû“zŽaŒXò«6mâ–÷¼J3¡´ÉèxZƒY‘ϲâùr‡ê[ˆ)à¿x­)æÕzŒJO¸™*¾+ ..qâ¡èæ?£©R=SˆæÂ‹"ÏQžñwÝ71ÃÐ_)¤U#ÅÁzâh/½ÃíF¡P@¸”Å6Ÿòx&ŽèßI®V_OGŒ_‘ÏEDø§‡ÇT¯;ž½Â¨×ˆpi*Ëzã]…UvŒó6'ñ2J0–ˆBŽêy^ýñb£bRH“á§µ¾\aÄr~3­wælØ°ŽŽŽA"@KK ¹\Žw¾óôõõ……š‰ïûÄãY~éÁ£)P}ªæÓˆ÷QT¤+RCÏ#®ùbâüçŸg_n.§þ×í¸¼ ¸çžÜv›Ï)§|«Ù‡:*||BÇnÞÙF:½Ë>¼1“ùÈ|º»Gª:“8áÁ™ûÌ3œ|òÉãº]¨òþ6/›Zbðƒ^ñlº)+¾‡ßÙàYðÚî ·ib(úbKÓ4 Ã+;Ï$dñAqàÐN;½ô†yÏdž2‰, Â+‰¯é8ÈU ü[k™¢Š¥Q(uð({­ÉbF0½VúêÐÓ(QM#YN¾†9Þf®†¢˜½˜ÈÖVèìïû>h"—aÃç‡ó¨4.©F VÆ,†ØŽôˆ“±ÒÑéã­¸*BfƒKíè註P[[]]]lÛ¶­Ùí„:ì8ÐgÔÌo¨hÃåãÒê¬#ÿjÁ_Ó‡¥?~ØÃêW‚‹¸/ôö.tÎ8czzÄìBÃzh;v,hhtZˆu²ºµe‰;ãÍtÊcÖvóÍœtÐAã¾Ý­U§@>‡t³ì·×Âèã6ìwa‹ 7fàvÙk1Jw÷Ì,+¤r²)¤è&=â œƒ3Mš…°x€¢L–ì„s™(‘O¡PÌl|Ê=ý௬*š‡º>¼ÕQ|µHÖYFæOSÌ|¢9âž}`ˆ\wC$šŽÅb<ÿüóXVE„¢‰%²m)ÄE ¹VeŒZF\7N1®ÌntÁJðNº®³ÉƒÓ 8[Ýݦq†Ï©#îKg¦!¿çwœ¿|9 „¸SN™ 8~ø£Í>”Q³+8ÆG=™DbVCÉþ5M„BööŠ¿ž71BÜT,ÈR¿÷=Žn¤Ñ©7l<§FÏì…À{î/lÉAÂSÎóÊÞq …bf ó¾Èª½@X3K6¬Î©˜žì™·§ÙMP(LbšBŒ+²áMæB«•^G¡ Ñq‹ßZÁ!„¸îînæÎK¾‘ªŒõÔßêîˆ[õ½LcYÔE0ÉÌèêê²"eOOϰ^s“…LZx“ WNRåÐñf:yõ<æ¢þBQ×m3⊫ó}‰“N:©Ù‡1&–DþûÛG^©T×GW¼a8Fš¯n¦ÒHpÙÓY!(¯6›¢Àâ8,ʂշk°f†G©98S6O…Ì*Æ k7hœ8Ýt+!nóÔ¡O5» …b‚È!„7Y•Ô¦½§"î¦û§q± xi{öÔyÁãº"ïк¾¾>ÜîðzÌûõ¥plØÊEdRÙ°3í˜ b .+¤V{¾mܸ‘Ë/¿œ5kÖL™ª©›®®.¶mÛF__kÖ¬aݺuÍn+ ᆱ‚}ŸnvKFÇè 7Šˆ{ÌQˆ{Ƽ£¦g‰Ô9ŽÃ®]¦™ããÉH:eíÀ ^ža©Ž[Úa‰;]:lB¤S(Ó)꺸a·n%N…B¡PL :)çž®žš)£0Ïóð<×u±mß÷éíYõBß÷ñw2¿©DVǺˀù:lñ…}>åÁ.^Ô…'\±XY¼AN› LåÐT…b¼qpBñ­€ª¥P(Ó˜®5ë¥Èf¹\×uyŽˆ²Ò4b±H&“!“ÉÍŠ$Ʋê&'™ÚIŽñåÿ¾ï 5çûß?ˆÓN{ÿ÷·…ýííÛ÷R,Ñu®® üìg'ðÑæ±mز^ñ ñâyÉøÎw>ì[ô5 2™•¼ñâÙg·aÛ:º®óë_›Æ‘d2ðãoaî܃øÏÿœM<ŸæÙ=ÓÓáÄ‹»$Iz\˜ûàE5p‡ k’缡4+²šiµ^ç½5¦E1©Ÿ'N1©Ì©žÐÑÑQ3œa¡+j³Ê½ãèÅ‘)f ¦ ßµ§¯ËòÃqpÖYw6»9Ó–fˆ˜Ñ=&)¿hzUnñÀtá×´÷‚8:Þ<ô\Wxǹ.´·C>_÷EØ´bª†âMÕv)¦'..Y²ÍnŠb‚Ù=gw³› P(FI¡-xÁ_ƒ©/Âù¾Ž«}ßE1ÏóÐ4Ã[ÁG?ú>ö±÷’N§Ñu!b Ëçw¿ëcçÎÓñ<ÈåàñÇ×ãûÐÞÞzcù¾¦iÌŸ*¯ýëñ}Ÿt:ÍW¾²ˆ-[ Ÿ30pçŸÿK–”X°àN9e.®ûâqˆÇ¡³~øCQ4nÕªUœrÊ3rˆHÃuúŠ|Fåçnø¯šç ›Ø‹a}jï½ïçÅ6û«:: }x¾Þu"ãR©TcÊ;¿Õ9âj½/¬Þ¥Ic‰±Μ±obòp‡X,…m£„¸ Ó„ç¦y¨ßÑÀ~ ¯ï¨f7E1Jih:è>tðS`‰{âp·'ÞJÏ~ؘj·Í`*çaSžzŠñÀÁÁÂBGÇÃSï gáþ…lY´E…+Ó'ò+}¦â•ìû>¹œpCŠFžIñ-™L¢iZ’ØÞo}ëåa?R×…(P(h¬_¯¡iåTc®{0Ÿÿ|!ÔydÔóDZ²Í›Ë‘o{›Ñä²OUÕ«ÉE¼¥º#IýEäCFtܦY~)]‹x$´²ÿ‘Ã8èÞxO3¿©±ÓzÚ>æ´Ý ¼·rFµBYqÊ'¢¡¾¬ü¦¨ô€“›ïDäwª·)Õ­™L;!náÂKH§•Gå†ïÃÉ”šÝ1pBˆ›)!Š:·eÀÈŠgêE—ÈÂÓ<âÀIÁóö ΰÞèÓ YARQ‰ãˆg&#¾÷ø³AKáÖ0*ß:+&ž)<<òä10”À;ÃùЯ>Ä«W¼ºÙÍP(#ÄE€Ì"ô ¡K4ÛWÃuÝÐÃMæW“b›®ë¤ÓéŠbžW)VuvŠ>„m‹çmZ!ŠU%4MŒ•:;År–%ú#ñ¸X?Ú'Is‚ÒãèN˜Cù›þéW¸Ó¸ÃãããÏÏÛÃ+O[0¢u£9ázù'…ê±……¸â”óÀ)ÑmÊ2»Ù  ÝÝKس§cF„v)FF“£¢Ç ÇI“ÉŒa™àšWñf†|cAÓàHcð‹¦8ð>à~ú’b¹:0_BœeU¾iœŽ4”»b†!«ÍÛvY ËdÄk9?—ƒÖÖò<Çëöv±žç‰\µîe™Lyzg'ÌšUÞWg'´/,/'÷SQ dšÛÔTBVI50ÐÑIOù'…B¡8ððN?eÝÁD85CÊÉår8ŽV •a¡ÝÝÝ ŠÅ"&ã8¹œxÆû¾økÛåçzw·°z{+Å·¡0 ñ2PF`¸®X¿Pû‹ÁÉd÷á‡7» £ÆÃûsO?ÝøËÛ¶kæ)°ÉU£UTÓÁÿ>¨ìS—9}}}lÛ¶­Ùm–Ç_:c¼J#C×á¢4|uéôÌ÷ >‡ *¹p®€g½Ž‘<`åMz´7e/ØïP^}Õù jmc”×±a¤Ói:Û;ËûòGxF@šÚ/’’IؤíÁçèá¼B‡<ž†«hKŠ­æ•Å•÷½obÚ;Ì”0=Ùá¾|•bù&:“[Çoˆm»ÜÙM&Ëoz £òí´ç‰_Ó,çÌdÄ},“)/+½è4­\m7Ÿ¯Œ\ˆú-¢Sí8b†Q~Sîû´Á‹áuÙÎàä½Q2¨ŽZ€ EM’Tžp …B1E‘Ž?ãy—v]7Ì×&r’;˜¦I¡08ñ–,€ )hšF.—£P(„ÞMâYÝÝ ¶­…/c£‚ˆþÄp^j"ÓŒMG§•'î<ÝsÛݒÑ!ûÇýrûw6^èRж#FC RlÊ–h_.Ìk¶‹¨¢.s6lØÀ5×\Óì¶ Ëî݇]ÄPLKt.Öá«[šÝ’‘ãâò.o#Ëœ9ýÍ”|ó‘dä^q:µE²h™ë åªÑù²Lvô&Ÿ ¦ùÁòC¸BËãÔ‚6˸hÞ‚z¥·åqf¨H<ªiš¨•ˆ¬ïËÆ#ûIBrA{õ`9-ئÏà²Ýò¼Êœ yQˆaPéTÊœZBœ®Ã:Ìwà6îÏ‚ž…·"ŠçÁßþ6=ewÚŠ²‚—ìëtv K×Å÷™ÍŠ<*ºN˜¤Ø0À[ù,¯ì(‡¢—G9Ct@´ÏnÉtGLO$Ä~¥iV†T?㌓€[Eû³Y±¼Ìû·Ý|ám)EºXLtü}?²½¡p;h½Ž{ ÑÉ›†û‘"m\íÛ…B¡PL86ÐÍØ…8Y¡4•J¡iš¦‘L&Cîíoÿ&–eÇ+*˜š¦‰®ë¬^}%7ÝÔÁûßÿ ÿð‘Ë- “Ïà¨7ÛxŠm3•#ö »÷5»cBËŽ:ª¯ðÐØ6fó9`PB}ùêPVÙ©¦Ñ£1‚ùƒ7)vé‘íYˆ§ŽAYÌ‹#<բ–YG>,RÁ_!˜ÙÎ>ËD¤µ„åÜ2IˆNYDt‚mÈirùh[¤¸(ÅÉX°n:Øn"ØwšÁ0y¬À™Á´ÞàëðÅ)øš ,…£¾ÉψMÆýXðë³ÆfMÂÇŸv…ôNs]!Z‹Bp“Þj¾_DzÙJ¯2]}Âžê Æ¢ãc1ßûÓuÑ‹ñÀ¥R0t€ ™» ÷Q1)šdÙö‘ü Ä>%¦%“BøK¥ T¢þuÝÇpLÝㆋ‹…EžT.S( Ťá gÕšÃH‘ùÛ,ËÂ÷}²ÙlEÂ|}†›o¾„Í›?H.—ãÜs¿ÀêÕ«9öØCøÿ86ôÿÝïàÃ>„ÿùŸàÞ(¢ ‚ùWo…/kv3FŒw¹À¼½{+gFß”V¯ë0FëidPãÈ—ºf䯲Å)Í´)ÖàGâãÅ ÂÅåù »°{÷ÉcÛXÔkE†“IO4±3xÜ׸ĽxÙSMº&{)šsÂ,cSö`ËÓåz2ñgš²G<Ìà³ KŸ¥è¶‘¢òAGˆâA’Bˆq~d?²Ò‹M·Ç Ên×ÒKNŽ£åzÓ¢‚¤ôª“ÞKÑW«Fdz*ø+Û/lKs‚íj =<&ö§÷ÁQ á·`ïqLƒµ ±áø{áÙ›(Bl[üê:àh.7Y9¬X,{žézùM+‡ŠÖŒ o‡"êM*¯‰„hôÌÌû/>ÃŽºMÙ Sn'‹°U)T§!¾«Î¾ÁZ_(ŽIÓ„7ŸÜ~î2ðz \‡ÎõÛÙu`œEÙæ‡ )÷9`¼áÌàG¡P(S Ù%„1dX ¼ß\× s¸U‡zžè'¸®xa—ËýMƒßÿV¯zÊO”Ÿ³ù|9$PÑ[£dùò~~ûۧƾ¡&Ùÿøã,^º82Ó­ •@„AÇb1²ÙìØ„8}Å"åñR”ZΊ)À7ÐÕÕÕìöÔÅu]žyæµâÿf7FÑ4¶-›~oIæb°k×.Ž8âNàÜÆVŠ18›Kå+Àê2X¼1°w‚R‘'Ê@ ª£âX޲›Þ¢B[g0=MÙó¬;²¼¼éKAPþ•m’"Yšrø(”=ŒdhkôEd6²Ž꺃öu¨^=_ÉÈròÜê5–ƒ²(ÒY®@YÜÜ ÆìòþL…FópßFøâÝpæíbÛ&ð~ðiøúT•>(wxë!ó¨Ôl{px‡Ûú¬sñt±L4ך K•Œ¤ŠWš\í™™£,*˼‰vd~4œ;!üƒÀk >ß…¸VenN„ÊëLzuš•b·ÅÊùA‚0õêÐÙx\¼ˆõv_†ô“à]ñK!õèþI°­áÊQ˰¬P(S !»´ F—ØqR©Ùl×uÙ²åq\W<ÜR)QÜÀu…—|oo¹¯ å¢ýˆéTa:±gÏžf7aÔȈ‘þGãï_ZžQÝ t]§»»½‘ä÷ò¨^´V¿¬z%ÂMiflÚ´‰M›6…ׯ_ÏÚµk›Ý¶A,_Þ`éÅŒeé–i˜$.@÷<^÷ºk_¡–;q†r(”ÃD£•TOˆüŸE êeH©ìÅä‚ßXð9‹œ ˆ¹,A•EˆÄ ?*€ÔÈVsZÅI`ðƒ#ðd*…:Išr, Æ­ˆápázµÐÊë˜Ë!ûªò¬‚l0Ž?½ V< §håUÏ/€s1ñæ{Çé &g\sÄ9ލ*ÚÞ.:Áv dÅb¢ßâ8¢CÜÙ)¦ÅbpÝu×…ëú¾è ‹bù~g9W_ýU /áîù»{˜w__Ø’Õ©dRæ j‰¼Ò³Sz®ÉinÕ|*mSΉ˜CfRÔõÁÍ‹‚ |ØOYlÓv.½>M„QÉû€l;UÞVèÁj2¨í±X“p¤Ï«ôü${€ÝqÀ¿Lœ÷X,X±Ö}G†xL׈ …B1S±ÝUù¸í§ñwCžç‘Ëå˜5k©TŠ|>O<gٲϳy³…m‹VQ‡¥ê—uÑü²Š‰cùòåÍn˜qÙ/½ÄÑGá©2qš¦5&ÂA9J)J–Êjr* u2»Ù  +WþªÙMP4™×ÜpC³›0&¾éBå U²¥ æQ •ƒñã˜àΜ¥œ/MzœÉõ¤\}G]™k‰cÍb¨\\SùV£Ñ® ¡» vX³[:jÆR5Õ÷}:;;ñ}øæ7¯å û€…B9¯ŠôÓ4¡z‹¢SœÏÃ'?¹ƒßýîB®¾z+©”xE˜Éd‚pÔ%ײaåضmÛ´¶¶ò§ßþ‰ß~î·a´T*Åï¾ù;6üÝr¹¹\®\6>Á`1NæŒSö2•o&å5šEhѰl)jG.Œ`´;€• ¯ ÊU°ˆlGæ=”×±æ¢!ävy=óy`gPí#AØŒIx]›G/iïÿzÈïÇ—y ÄÞ™7Qü‹”óAÆÄßy}ó˜·gÞd˜Ü„ãâ’ 1îB³B¡P(ÆŽÌ 2Ò÷°Žã‹Åp]—ÞÞ^z{{1M×…¯|eïÿ\òyÂ>ˆ¿ÊÛ­yœxâ‰Ín˜9^‹ à\·f¬²ì“f2 ¼å”ù„‡ÂGôÓŠi“#“%KŽnv3MæðGmvFMß<~úÓ‹F'ó6¥kÑ0ÏhÆZ¦hÏkð0eÁ*¸AË\o0:Ÿ~ň‰:CE# oÿÏ€O4»uã‹ÐlÛ”ðØ÷…›¦¹¸®Ë§>µ“;ï|ž;× i½,™gjÖ¬YtwwÓÚÚ¾¹ÎårضV4ó}Ï»”TÊÀqr¹¯zÕFý‰d2I&“Á4MÒé4óœÏË»_Îg®ë’L&ù}î÷¬zÕ*þÝý÷ }"Qsx]”CÃeFhé*…8=Þä[v@l£“²7©AÄ€8hï…¬þ÷@?”Ú^™µ hÉ|YÊ×~r.  $Ê‹§Ÿ®©óeåEä=#ÜÚc} Rþ?ðn}%âþ#ÈN8éà“xø-7ÍÞÆoLB³B¡P(Æ—âýÒpªÉårX–E¡Póoe2³-¡§Š©…a@{ûfà­Ínʈ‰É;äÿ‡a.–®[×2ì{‡Ø¢)RªÓÉ—±SÙYA1ˆi#ÄíÜÙÆÎeëR]eÅtÁÃc6Ð÷àK,YÒ߸вh”o¬²Zg”î`ºE¹HÁO?"·è >Zà@]DŠI¥#¡L1§ŸùúÝLÇŽF=R)!´}àæöÛ\!Äuvv¢ëÝÜ~û³ œøÓŸþÄ¿üËKxžïë¡Èæºâ•_,#ŸÏ‡kñxœÞ ×lÛ6™L†t:Ž®ë$“É Ìt?]ô¦i’Íf+“/wŠìŠ0×™¥[œ“<‡s8ß÷immåñ-ó_ßø¯rõ\™“#ÒGÊ8t]gÅ­+8ñc'òݯ—ÃÏ:×uC1Îó=v\¶ƒ×ózñe)Wß•†ÐZ~GÓZ©hw4bÔcUΗ/X]„ˆŸeè܉’$¤“@Œ/BÊwt®Z³yð~ÚñØÚÇ&ÝÖ&Š8qr¡  B¡P(¦Ñú[ -<;;üìg›9óÌCqéi¯¼Ý¦2š‡>=.ÜÀe­Û‡Å}¾Üõýr¢ã¦9ÅL¾|u¨ý¢Ê/ŒÓŠPˆëééaýúõáÿÛ¶m ?KÖ­[×´†>õÔƒ¬Xû†ŠIÆÇç4 v.þ9Ÿýì¡ÀŠÆV”IØ£Upˆ÷Ûßrã7’Úµ‹çæ_ÎÚ{·pÚ'Oã‚ôœui÷]{-Žãpë÷¿Ïa<Âëß~³p4ŒA^o±g!}5pRÍþÙȈâ’ÛÊ8A®Å‘î#-6åkà?Ä!lCæò‹½~o˜(C›d\\²d±+ªm( …¢™HŸd£Ë/êÄ‹<“¯}­œ'?Ÿ/WKW(&“_ü~?Ïtqyb<^3Gœçy§+Ц%’ë,§˜VÌhkk£§§‡žžžpÆÒ¥K+>7—Ë/?¾ÙP(FÅ|4¶nÝŠ‘J5¾RÔcMzÇÉ{v½uáý2?øU4½Î´×ÜpC±záã ÝsÝò›æåË{¹ùæ[Ây7Üp`³oŸÅW0M“ËÞó/üâ-|[øŽ 0âqŒlvPR[Ã0„ê“ˉåó†Aww¹Á‡E³Žç¸§ï§å™gØ{ôÑbz"!Ö9¬¿OÁÓ&\œ׈ œàÀ÷r°ÑCg–;ÿëÏüú²Ïð²GvóÊÞxn¾ÃÂyWP2K̾ÿb>| |õ´Óxxöl:9ˆüòa•ƒã=máBÞœÉxè!>¼kv­­­\~Üqœ40ÀE[·'ч¥óqþº „8™£n$9$‡"ðx5ó]£ÛD<°ÈJ¬ò[ÏfÅ`æ}ï›Å¢»§.‰A±fÏV¡P(Í@fgi„X,†®ëtÐÝèz ¦Y®²žlTÉS(ÆÀf`Ñá»9ÿ°{ s·è0µÃ:;;éïo0JJF1u³œ²óiÇ€U«V±jÕªf·eHdÈ’B1qn> ?Ñà@Û÷Áp@saetmøª…DÎ*UªzÊ3sV'³O¥Ê^]Ï=÷KÀÃó sɤ‰¦i ߶ùì-W³ïã…È£‡£ £\*5™,—,koo³YÑ£¼âÈdÄ´x:á°.9òÒ¾—˜wî¹AÎÄÄ3à¤àýñò6K|Ý…¸ çäá~¬ñú3uþ²7;}7þÙæ=•—ÇtÚnˆžY³xå׾ƂC¡#ê–J‰öæó´ø>I×Å0 ¾”=ê¾û¸ôx°¿ÇqÄ9p]xêÌ·Ÿˆ¶³ü£Äv¿’\NLO§•œbò™½÷/|üo?äo†\®aÊŠtTh“ŽŠiMÝq7n4­«««i =øàêQ(¦³ú:†·_YG]Žzùð¿1øx\S¸SY–"’ÉÊòN²rb-d‚ éq#=Œ¤°‘Í–çÉÒ•¹œ˜fêu¢¢&¢ƒ0Aß÷Ù·o ³ví^zéy~Ø#—¨\Žƒ>X$§u]aï…‚°³LFôšíHx ®Ã·mx,]™Ø¥‰ÇÌŽÁ‹ßDës?ç裎8I‹ë%*ž½Oƒ¯gÀÓ Ÿ„WgŠYÉdÓ4Ùôž÷àŸw§½c!—ýß;xÏñÇsÈ»Þ5øÈWíÁµ“”¡´Ñ =“aïi§‘Éd(‹<¹u+ûÿ¶S{2)ø¢ðÜôªW1É`Ê¢‰ òB±ÒK+—˱èñÇyÛÇ?>)ßq&#¾ªê¼:‹¿ÀË^öô¤´a"ññ•§P(S ‘Þc(¾ÿýG¸å–¿pþùëøÝïŽÂ²”ø¦h›=¿yšçæ5~ œ,ª…8åŸ4#…¸7rùå—s饗ÒÕÕÅÚµkéêêbãÆ´´´`šæBÜÀÀ×^{- B]£vlذݻw³bÅŠ z®ë²wïÑJ„SL8aÇ>>0wö0ƒÖ@¨ PO½+€/$Ä ½HP”Áž3ž'D‹¨÷¾÷=¸à(Øî ¢dR,­Ì#ËF†iË’— þO$Êâœm‹íHQNnG–”£ôá.Ðè>J=;ë½8Z Ä×oš ë>íííø¾Áa‡ý¿ýín¾ûÝ·ò—¿¤…'m*ŸÇö<⎃oš"—›DÚUTôM§á"opÅ)rbÚ ªéî›vSâ9À×IHhí¯Œm9Sƒ÷Æá«ÉrÕẮ£ß}w˜Éñ˜~¹K—Ö>!T»Êfy=`þå/´··³òùçyhß}üæ—w°cà –éKuî¹üCg'º®c%œ·e ¯=òHNxðAfƒ¸]ÒiÒ®ËÉO=Q!N~òÇi¬m~ïþä_ºÙ§ˆââªJ©Š c²ìX¡˜h&ª_Q —ásÏ{žÇM7}Šë®û'Þõ®£ÂwÇ ÅPLtÿØÏ¿œ}«?4ä²®ëbÛöSLÙyv½-T.¸Äl€¾¾>2™ ±XŒŽŽŽpæUW]Å­·ÞJGGGÅôj¸à‚ èèèÀqÒ‘W™L†¾¾>:::Èd28ŽÃHð}Ÿ… 6û\)f8eÇ..O¾øâð †Øt½,0ÄžÀcH>ËQ±iŠì³Q‚4TìÜ)Býd’Ðb±R(“Þt DŽè±˜¦@âñò>z{Å6¢ Ï+—åÎåÄþÚÛÅ_Ë*/×Þ.D˜Xb+–U^/•Û™5 Z[çPsÝi™Wm²ÊŽÇz/®.ÖNûèºeY$“I–,éå…ÞDË obïS?%ë‹Þƒ{ØatÞy'Ži’Z½î\Z©Ú°êîLØßû’ƒßüYreDÞ°ã <Ç1=ƒ¨RêAÍú×gî±g³Ùqñ>ËfEQˆ{8‚m/ϵwœÊûOçí_übXÖu]üõ¯yã–-\ôä“t÷÷ó¥K.aÿ§>ÅÎ;ÙtÑEø†ÁEª•ærâº×†e‰ßDBL‹|ǾïcÛ#+H ë5#d'”‰îSH<!œAu©·Ê¯ÚeðõP¥É¾sÿ;ùÁAÇÝébh†³ PåÄ×T4M£··—Sçþ‰ýšÆþ×¾–ô›»H¥Rôöö†¹P?¾s'Ä™gžÉ¥ßúß™?Ÿß¸.ÿþ裿ù œ{.ßN¥ø×}Œ¤hžJ‘H$Hjf6+®%Ã×U¯(µlÛB4wOˆ õøILI7á} ‰ü¨" ‡ïûø¾®ë5«Äyž‡ã8á<ÏóBÁ8ãû>šV.îâÏ?ÇqÐuÓ4ñ}?܎ܾܯTZ6M“\.‡eY躎ëºaÕæK.¹dBŽ}²ì¸™LRyPÿ&åââá'އ‡ƒdèèëúÁ Y/3‘ýŠjd:¬¡¶àº.Ï>û,—_>µó›+¦oÇ ìmɰK6\1Ul¶L%ÄÍ æ€0Ìj·¨KfKK ôôôÔtÕ\ºt)—^ziøy÷îÝáÿ7n¬ØvWW—_~ùˆùÀ‹9õTUR1±L¤¿ì¹çxÙÒçÄ™›-“pQ2”=ájaÛC»¦¼1ò0øn˜XläëÔ"*”9N9Ãûp´|¾ÒÃ/zœ2üUm–zv<÷âj¾uÍ2~úé´íßÏ¿´´P0 6_þ,úÞ½$+›Å@$[v/ò\„C[‘öÂù °g€¶H,S’›ԜåŠ6BœË"®‘ a ÷?/9—æÎÅÿgÖkþø°&×µ¼™¯n~ñx< t]GÓ4!s“I>·?§½ã|ò³Ÿ…ÎN0 Ž?žƒî¹‡Ö÷¾x@è¾ëyät]䙓w¤³wüÕWS¸çáÚàµ.‹3L&ݧˆ"âŠá‘â•Ãä %*¶U“ËåpÃ0¿š¦aÛv(¼U m¦iŠ0í@4–ûÓ4-ô"Èf³xžG*•Â0 ŠÅb¸})ÌÉíÊßx<æoô^---üõ¯ÏóÏÿ¼ ›ñ˜§jôÎL|„'‹üýúzzzxúé‰K>ÞvüôÓO‹¢'QíÚ4…‡ T&œ¢!D—Áņ‹;øÃ¸í‘zÏÁÈ0׉¹L³ü;¤÷]*Uß»n íøÙgŸ°}Ô³ãÑØðž={Øyú<€IDAT¾};7nô&—ƒ_üh)ôƒn¿=ØüÓO'×ÚJˆQvj3"¿„q©4°Š?ã:H!ñy°ßùÐy åkB¾* uÀ[·oæßˆ1Ýü_Ó´rç,g¥®—•°à:miiáÜÓO§å›ß =Y÷nߎùŽw §ˆx¿½æÞ{ù¯sÏåû÷£Ëðq)ØÉÊ Ä öõõ±eË– aa<ï{ñîݻٲeKøRQâá‘'À qÒS ½Ñ4MÃ÷ýPèò}˲*Bwb±X(ž™¦ qÒÓR;‘ë;ŽƒmÛ¤ÓéÐ[Íqâñ8étß÷I§Ó5+»®K.—£P¨?ØÖub±®/÷-…íj¢ƒ)|oݺ•ÿùŸÿaÏž=ã~ž'ªo¼téÒ†–/,,,,zé%EªÂ£ÌÃCC#E Ÿ,Ùp ™#z˜ihĉc` ~²dqpÈ“¯ðDËAGÇǽÙ@DD÷Ÿ$‰…Ež<zðãà„â\ŽEŠhhXXtÓ†V‘ó4F,ÜWvðãá…ÛÐÐpƒ(½ôââ†ûõñIÛ(·•&M’$)R¤Iãà```c‡m01É“…NÙ~ƒlðµ°pqI’¬¨?ºûQ¾|ð—iÛ;q61žý (÷k9w8göZd2.7ßÜÃí·GæQ¤˜òôõõ±mÛ¶i×?Ž:36„VâÙ”N§‡¶] ›½$ø¬·iEOOψúÇs@ÄIoذ¡fhªÜh-¯¹(2ùᆠ¸ôÒKÇä–üì³Ï²iÓ&–.]JKK 'ñú·Íg9)44Cg9zà¾<=䎬6âÉP«²G(‚ ["9̯·/™X2:?*´ÙÁÿq áá"s;ûݽ{7›6mâÝß=¡§m<íx``€M›6Ñß‹W>ÏÊ¿#-ν½µsTIaÁ§vn+TEq·ß'ŸÐxCe¨‰èÔŒ ®!ä€ÍqÊ‚&ˆ×¤2¿žD’!z¦Y®›L–·åû•Ùè¥1bŸ´ã‰ìh@m;^¿~ýˆ·³gÏvìØÁ¦M›8¼ëðòé~éÂÉëîãÁÙDzå¼óˆ#.õbk+PݪϢN¹“´¯€6/˜ë䎃~ µÜ„¬ ú`$ è v`Át{/³~ÏjN—‚GE·Z-€SžyFüãy$2®X²„c,ÅŽZ÷®·œy&ù/}‰+Î=—õŽSéÍùèºCÞg¶mÛÆ–-[&ôç½X q‹/®èÇøøS"¬n<ùÒ4M#‹‘ÍfCA**´yž‡eY¡—™ •ËɼiÑük± ¿g:Æ0 t]'‘HËåH§Óaxz&“©ùLÓ$›ÍÇ…Àg!ÜcxÔ†1¤'ë`ü ƒbëÖ­"ÄÁÄô¡qÁc$Ho1)ü¸¸88ØØ)’ ~¢a›RP²±C(A"ô&“"U–l(VihèèdɆ^q1b¡ B`“"•öäüvÚ馛9Ò¤ÃöFÇ&fØVéµ&·%‰z÷¡'œüDÉAC#K–©šËFŵ8qLÌð¼HϽ ™p_Y²a¨¬…OŒI’á~uô0§¥…EB(X>°àÎúÝY_.( v]‘óÎóÄþl[l#šßN.'ãøêœË¶¶6Ö­[GOOÏ„ž¦Zv<>ì°Ã0 ƒuëÖ…Þ .ßû|xÅGúˆ_þÞ°e ÷—¿àôöVÜžä-d1 Ì»q_¾Ê8 ý´ePH€—ƒØ§Á? ÌpÞéëÄ¥bœö+Ä­BÔ7|sŒÖk0ƒ¶ËÛQ³p~-GÞ3ÀÊ£¯˜.;hÃ&ñ5 N þýÞÅc?ø X¶Œ<û,ÛuX,F©TÂu] ËAHo»ÿ·lO~âÝO2)BVƒí‡ykä²”ƒ') Lãy/>æ˜cjVAs™Þ¡îÒë,úW kÒ{M jQa-*”¥Óiâñ8­­­Äãqt]¯ðÐ! ËpQ9M×u2™L(ç#¢®ïûµóÊç=íl¨Qø‘N§ùÿø;ì° ÙþDöÇ /ø‘^o2„RŠM&f(Iï2)°™˜$H„ N@è}&‘ÂSµªìÇçÈÕ ñ”ëôÓ@ž<ÃÑh®7 ­âWM¶ª“^ë¤ø&·BÀ“B`’$&æ õttÒ¤I+%F÷õÎK“=ó¤ˆYœS„³a­½vÜm#Êxõ+@ôkÙr¤+0ˆO~r>+WÆ;ß9 úªŠÓÕÕEWW×´ë˪«÷ãòºR‰YgÖ\GzŽIµ 3í^6ȬZµŠ¶¶¶†ûÇs@䀻ꪫÈd2¼óï CBåŰjÕª!þ6l ¥¥¥"îZ"sË ÐÒÒR÷MýPl½|+9rtÓ¸œÃJ²ü7I$°±11ñðÂŽÀ¤yÊ¥O‘ìÎCˆ%yÄÅ% we‚Ï:e/ 1r”I–äÚŠðÑ‚6”7‚6¥({žIrÁ2”E°4„]éÈúVÐÖd0=E9\R¾Ùv ŒAó#oé¨äÇ$•û eaÏ‹ì·HùÉ›¦³&Å‹eAϲ*Ca'‘¡ìx¬÷âp[Àóqx²µ•SeŽÁ‘¦ì~Ágé¡ âÞ¥‹\m`D¢Rä-MþŠH“xE0Ovê«-»Ú1'W5_FÏÉ÷ òvþ·#ëÊ:F0?CYG÷ƒiò}‹|—²gÏïøçcŸàøãŽÇgh/€ºø>Ä㜨ë¤ÓiLÓd^$G– =´m›d2ÙXµÔ¨ÍHïOè´cé]úã´Å 3Ñ} `Hï—©€Oež6™û/Z „¸Í×&Ï™a5AK1¯š‘:ä>+*¡JE¼Þi•B\=Fâ (÷%=í“Áß\ðWêòÅf4=FŒ²­ g¿êìš¹’ÇÊdØñX}çèOu±ƒ(2Y5Y²ƒ^~'IÖª&ÎÙˆàõö«‡´u§î˜Bz"[–ÅÞ ÷òÓ¾ŸŠH£µ·+C»ÉïØ?À{ûß‹÷Re…á$IyäR_Máº.÷ë~^:á%boŒ…aççœs΄ËÉèW@íé¾·Üò$7ß|Æ„ŸâÀ`"íø0tyäNb™t#/çcÀ¿!ÆÕ!:œ‘TMŠFi’ضm[é®»î*=ýôÓÃ.{ñŇÿKÅÒÁÆmó‹¥R)=Ì6Š¥b©·Ô[Š—â¥t)]ÒKzÉ,™¥tðÓ]ê.J…RoðÓ_êþ ´ào©Tꮚ×]*•Œ`^1ø_6¶;²žLË—J¥x©TÒƒyéR©dŸ{#ëÁÿzdÛZäăùÝ¥R©LËË•J¥R6økÓKÁrÅÈöFŠl¯ Dm£ŒÔŽ‹¥bé5¥téûz©÷•o~ò;L—j7ét©T( ¹‰¢›+¥ûš‘˜a”Jý \SîîR)›ûvŬcì½½âû¨&›ó† ™v<¾ë®»JW^ye©T÷ÔtpãY/•íÜYZ‘Ï—JÅQÜDzKâR(•ïeFd^ad›ë6÷ªRº”ÕM­6é )ÃÝîzKâV+IN“‡©ÛJ—|üÞÒ7ÿnm霻ï.é‘ùÝ%q›6JÃ?ãJé´°µ|~Ьb±X2M³dF©P(”º»»K¦i–º»Åƒ%›Í–²\C†!öS,–4M+õË{D:]zfÙ²Òÿã?ŽÛ¹)#±ã+¯¼²t×]wULË?“Mwww)N‡ßEE›²ÙR2™,Åãñ’®ë%MÓJº®—âñxÉ0Œ’¦i¥l6[*‹¥|ð½÷÷÷—¿—I&ŸÏ—€RA>“ÌÒÐ}ŠZ}‡êë]/‰ ¡Ìë/•û.²ßb”Ä—,•/ªl©üÜL–J%Jå~T<ØFod[éRØßzüØÇKÝÅÁßÇd0Ú¾ñXÉ–²%£d”ôÈO¼ç'KÉR¾”ÚO±T,Q¢T¨ñ@éïï/uww‡å²ÝÑô÷÷—Òét©P(”t]/¥ÓéPÊf³¥%ßYR¢Dé“—}²T(Jš¦•âñxøW^»ñx¼´àŽ¥–Ÿ´”޾ðè’®ë%Ó4K𦕠Ã(éº^Òu½dF)N—NØzBiÞ¶y%MÓJ¦i†¿r¾a¥bð¼•cšRIÜïÓétéïxGÓÎõHì¸TªmËÝ¥RÄËäóùR2™lÚ±)&—éØ?.•J¥JféØxoéóŸ¿·æò½½½Ã?«ûK¥’Y*¥KéRñÂbù™5Â>±¢¹TÛÆPÌ«×(ÑJ%#aÓ“›8æ_¾ <4¢õäÛ¹B뺃«M lú—áô›nB÷ýлŽl–ûÞýn›ÀqÃ1Z;–ÈŠ†“E"‘ "8ŽƒeY …0l4—ËáºnèÉÇñ<îîî0÷[-·a?„á 3U!½îBOKi¨õú²e!î½”½Ød¿¤€0|™à –•é-Š”ûZÒÓ-M9­ôÊO¥j,Ø—<…ÙàQè’¿0~çqŒÕŽÁ ó‹98xx¸¸)†aŽ2œDæ"›êÈB$2DÛ4Ͱøˆô„óOEJLÓ$)ô$ïw/{ïËà"Hu 3™+QÞó|ß§P(àº.ç¼÷~ÊO™¿w>®ë†EE …Žã„!᯽íµÜýλÉêY.þÁÅC†¼¹¸œÈ‰û+û²‚q6›-WF Ï4M\×eÛ¶mMûÆÃŽeþÖ(W_½•¯}íÇüö·ÿÙ´cS8ŒÕŽ{ì1Ž8zì½)ï ÆPã—èEpå¨6ÅŒeÒ„¸Ñrï3÷Òµ»kÌÛ‘¬4iÈ~”ޱÚFžwž‹×ãa˜ZVÃ{܃« õl 6P É„@iÊá b;yÊ!U² I¹ƒ•a˜Ñ¯œ_­³Ds¶ÕZ¯ÑN³lçx1õûgS†ÀŸï8•cÖ2üÂrSïÆÛˆèñÌ30|®ÜJø¡VGSv–£ƒÆ¡—3Ž\2Qbšo´Âä!¹]¼fã+?ùÉámR†Ö»û–÷¬<åêÎ0.n÷;®²†‘·ññúÆ´`{‹Ï|‰ç7Ââ}û€r”ôH½B®#5™A i ëø ¾Ôjà8Éd2Ì%çy¦iV$ÝG×ù» ¸áóŸGûÃpÝé]Ü ÊdTL•ùØlÛ®-•â["‘ï‰òiÙl¶"„TN[c('N¬…ÌÉ:B!®[¶ñklO†¢ÊEÊ‚¼¬ä" 5Éð,âe_ÔðAi²ÿ%Û(SyPÕîhGæÅ-"úlQí5X÷÷·ýž£9zlçwŠâà„ùÄäg™ NŠ9Ýt“ Q^:U¶s¹\X©W nžç…¡ÛR|³mMÓÈdÄËõt:-®¹¸–‹ã;hšæ[L&“hš†eYX–Å_¯û+ÆEù|~Èk-ccó ’­ñ"®ºï³råJîâ..^qqÍùQêÝ4MÒI=O\‹?ýéO›ýu‰Z}2øö·/sud…b2XõêŸóº¶¥À;ͶHƒD^Úo þ—i¨3’)/ÄýáÑçèøÙB r¼5lV&ç100CtROåÿgïÝãäªëûÿgdáÂgA³8k‚ŠPôŒ´^£3ú+(´Õ¥ÄK«ÎT¾V¬UgÔVkZì±BÅVçÐx+ÝS%q¢\6€{’%!†œdr!„üþ8ó9sÎ\vg¯3³y?÷±ÉΙsŸ÷|Îçóú¼/í“Î!¦c–³ƒ—÷£òµiË£…|‚NŸN¥@šÉ*üòèäkO]ËÇVªx ©<{“aÇ8òÈIîdtTF50×4-6°WžjÐéºn˜ ܶíЃBåRQûTùŽúúúÂÙgåÝãy^ÝŽoÛ㺕âŽÓ±BÀahxÀÞ…ÛxÓI'5w-J9Šö£•7ŠjC§xvНW9#u•Ñœ]£¡nÁŽcŽáuY‡ÞMÏXÍᔨèPIÝé$“¡ŽM5žû(3¯†a„ÞrÉd’B¡@:¦¯¯/VÀ0 |à[Ïa?Ò©¼{¦ä—…‹OUÈØÔÕIWÖÑõÀÈ>èÃlx¡lY[ÊʃçYΉ¡ ì.–À/UÂFÏva0kÂ>.öa§—hÖÀÏÇ¿P™<$üྶ´•étmصRT!‘L&çJ¥ø¤€šü°¬à}]‡¾¾`ŸÅªb¹§ìÜ Ë–M™=´I’øøa1ôT5ºM D~õ}PÏíPè/÷KT_Cµ—‰DÃ0BZõkmÛfk¤Š³a˜a»  iF`®åB"i§d|qAµ¤¦i’ËËúû¡§'Á±ÇnE÷ÌšØÈL&0ÃÌɶA7 <ïLþ²*£‡®ëFY3ö¡”x//Kß]>§ÊºÙlí<Ö]™”JÁzª@{±¬×ÓSù*¦Ó°ß»£87’9Ãä¹çNœ±ÏuªQñ(Asæ+Ï ÂD8ƒÿùÍr>\zkÍ{ªàR3AuÈEu|v¹P¡D¡Í^ÚZˆspX°1RÉÊó‚lyv Ï ž’ÅbðÔ²í`´™ÙKA8ÑWŽ ųh¾%”åÊ遼_ã)RÉ[Tí¨NƒëÅKǾ@:/¸1úü…Bl6‹mÛ¡èËåÂÙ:×uÃÊx 5€K¥R‹Åp •¼ét:ÌuáGrüd2²Ùl8PP3‹¾ï‡L¨TdK¥R J¥Â™}:KáÀ5•J… xÔ%›Í’J¥Â|¾ï‡çR,ÃóP^V³%d±ûa“£­ <â ¡(õÓu=ÖUö¡ìáêŸü„uë^ ÷­à3T/*wŠçy¡]Eů(ê³Ëd2ô÷÷3Tž¶ÚUôˆ^¼q”ý†áL槪Þ^­¯ªªÎ¼ ÔwE ÌJô(‹±otß3›L½quŸÊßÿ=ï<Þ03g0eøø,8÷'OrÚþ&òƒ)ïš•6Vy©*‡cyàL•¨ÑD‚ú_ ôTû¤]×uÃvTµŸJˆSßÅR©î+z,奼0Òå\Œ'?õá<^ö×Ah”ŠÕ@Uí· ÅŠîsÿÚµ‹ç^¾¥Ñʯ6`.Y—€7–o±¹æF¢´º:ŽªÝÐu=¬’™J¥Âu:wÊò_ …@Hs]¼r»£< ȇú6`Ò\¡7»°ß ijR).Äe2Ai“Lú¨Q>ý'µînõ`‰ËÏáÃ6œ”„µ.œI0CŸ®ráûn ÞéZ9ìÕ€LVûpGy¿³à u¡Úì”EEA¨ˆhJÑ€àïøº…Býœœ¦Y+âåóšÍÕY~fžý‹_ðì»Þ5%öÐ.äÈáààã‡Â› =îl…eY …pâ8êõ¦ú¤ª}Sùjò.úÝ0 ƒGÝŒïW†¾Gù4†±LÝ0‚É…¯~u?¹\ðqëz`J?ýéSåýW<ͶoGq8}}•"Ѫ‹¢Š´F°~6 ©”ÆcýZMZÝj³Ëçaƒ¿ŸççÖ8³TwAÂ4‹‘¦ ú•èﯯôçãc-˜ë³Ï~tF>×é :øð‡Ù°á: ÓÀÃk›PiAhÄàÙõ'Ô}¯n4šo^¡¼Å©Z&ÌZÚZˆóñÙu//;±<ËSžåt6m‚“Nª„y%•ÎZTmö<°3ðwixJ‡=6|§ÇÁK‡ùáÈ6Œü÷[ú·‚–­‘ÌOÄOúÓèW³ÙŽí„â‡8¬… 9öØcƒó‹tj}ßg×=÷Ð}öÙX&ß÷Éd2¡ç'T(Ÿ Ô z´3¢Ö Jˆˆ.‹za(Ô¬#ŠpyjQ¡E¹økš 'Jäèééahh(hÔy©Ü6êïèÿ@Ýœ•Ô¬¦*Å®B•Ù©þäve7™¾rø©ïyÜ;>¹ùóæÃ—_βK/åÌãŸÜ‰*±Ìqjóx Á¨?• Fï§°¯¸­VS,V<¯Un6ß/ÿ–W‡óóÃx= eáÿºY‰Œ4d\¸°œ 7IÙ}3 ÿ9ö×Fyv«~[µ¢‘LÖ÷¶Õ´øºãí”Jñí5 òy^rÓMSÖÞJ<< Œ0üZÙúT'‰¶GÊ4ú,Vi+,Ë ‹@Ð&ªö+Ú'T^íª wËȦY MÀó‚eÉd’¯ýôõË‹E%|UŠ+]Õ«ƒ¯Mü«s Â>ò‘.úúó¨6»ê¯¦5®ßT¦ÁJGÑLLY=í8JU ª›`Àùó§·èÇtâgá±>þÏû=_þr/¯zÕ ððjB§ml’$ñðâÅöÊ?Ñ×õ°±C¯QJaõQ¡†ê=õ}òñqqIä Œ¾§Ž%–˯,(:8¤I££‡ûk„^þñÊ?Ñå^¬â5ïùåŸzÏ:ulŸúÏr³ÊfÕýP“XjŸj¹K]côÕ{Zì>$I†aó–¶®èÈdy|÷˾W3ŽÐ &#@çu… ÒÖBœ‹Ë¼ËxŹû‚©/•,ᤓ‚Ô2Ž{Ê)<.ÊÂs0äÃÛÓpQ?p€ c È’ƒ¤¨%„ò|>VÎR!Éýýýan”FÔ(ê}FJàSž?3‰|õnËf³aòój{©—9*(µ\y?E…65xˆ†G÷u0„SÆæ-ûÙöíýìî^ɯ6¿a‰JžÌTdù3ª…75ðÓ4=îá;w|ç×N(̦Óéð³VÞ¶Ê.ªm¨^ž7å¹1/ÊhÈÕ¿}ìc<üå“ùÌÆ8ιárõ ˆN:Dó)Båû¬—}6—}vx-zÙ67½øbáž&à¨âÛιçâ§îÛÇ¿vXXk4‹®¶w²zÿý÷sòÉ'Oøž´’qyÄù~ð\5Í@K$x梋p/¾÷¹ç‚Ï6›eXÓøÏ/}‰çœÃ‰>Ï=ŒŽ£j‚ÏÔ³V©Ñ6Üq*ÇtœJ,ZŽxœ²ïWú=Ê6u/¶âío¸K$5pµJB›J<4T*½×ËM£¾ šx®9; gƒïðÈñÇsèŽãÜY{âãca…ÁTV>u]—D"A6› ½e•—p¡P`þünþç^ PèãôÓß‹ç}M›C±˜þ•W^ÇâÅG…AƃW¿úÁ0êX…tªÊ¶£f®ñ—Ù|m©ˆãý¨(gËN ¨֣ögß^ÙêÓ›°  þè.Ž:êhÞõ÷gòõ²WgŽ\ à¬{í‘äV¬%M +x<É1h‡¢”ðSO +R  ¨×†F’$NäÇÄ ×ÉÁÀÀÁ!E*ü¾U£r4V®Ó ·WDßW¢œ»\\Ø ìýPíi ––×½KG»C JÆ_ ÞÉþá>Æ< \Þÿy­«b466Ú. s¾Y÷œíòg=guíþ:ï–TD?uÞ^(PV €QÁQíÓÅ%Gýw:éiØ ó꼉jÅ+æÜO0ƒG¥‹ŠQ¹WÅûí £­…8Ÿw½ÈÑGïƒ~7èÐÖ{ëz­§x• Ù`ð÷>À†;_ÊnâG ?Oê•)îþòí¼õŽ`ðh—nÚÄà›^‰GUÄû>ìßt<‡ßþÙŸ±ôod^Ù“Myhš†þÈ#t—§ôæjºmÃßü hÙꩵ\.Ø·®Ç5Õ!úÓ+Êa1@% =¨ô.’É ÷30lS(„ùRr¹W½úÕœ}ë­ÿwåÿ¯ö—Nǧ Õ`!:P‰VÕK§ë÷Ñ}×A…æªpUÇqx¤|?;‰ç×wqøKFY!:ÀŠÜ% )êUÎj(ÂíÛ‡5ŽJ j…ÈÔLþ6õS³ñÍmH&“$‰Ï>åiªŽ«’ê~ª_P|‹Å0d·Ùäü³ Ÿ_üòyN=}'—¼¤¹Ô-2¨$cOp¨RBh¡Psÿ©¿UYø/®û"§pJ}D_ODP›jOÐyeȱöÛLn6Ú ¡vúòTÄîd9läÈ|æ3øÚMšú±ÑÊÑûgÛ6¶móÑvf(”‡GrÔaÄ (”H¦·Óä”ß1"Ülå!,\,J”xÍã÷p—~ËÍ»)R …²ô§ŸŸýŒÔóÈnºí$!†púg¿öâ~ü· +W†âÖXa­c)1›ðfÔÐH“ž˜èíL†¨Ù25!¢*N[TÚs›  ÏS‰.P¥fuàsT<“¿MeRåƒT c•gæ’+“hoÑп¦ÇQü…>…c ±¢.&f°•¾)ú¼PÉl ÁùæKùÊäK¥bw2²n®|}÷î[Ú÷4Ðaè@}GvFyóí;´¾¬Rw¼àPÉ£¬b³UEqá ¡­…8ÿ×:‰/ôÀV£~dz â!Oå/|Á)pÁpܫ㗛sú9KÎ[¿}gg¯YS •Å“tUî4RM+ð™ù¿±üuhŒè`¦QØT®Q‰q:}›ÑÈ傳ÇÒé˜hT/»¬þö¶]»Ìó*É #‘M|+‚õÕýˆˆLᬾã„y_^µt)G¿õ­8ë×sÉG>™É$?ùÉOƾÖ6bpØ’]œvÚãÀÅõWR·¡l³Õa_ªºcGÛ°¨7Øo„ 70Ò¨HIµ £ò4a²õt:aá>T¸o´ÂëlÉe8>>Ï­ßÉ¢¹›lU‡RÍöÁ¸Ë°+1ZåzSÞÙl6K•r®v.æ¹í_ZjG“Þ7Íä«çUþ¥L&†j«œvê»cš&wïÜÉæ£¦@Ð¿× ú„Y*Å›½æ55ÇSû~ýë_Ï–-[Z}+'„‹Kq¬¸hºYžèË\4Èñ{Gøv¦ …‚gX#QÊ0]ƒt½¶àt¹ñ4ã©AZ‚JµŽ•¼3éò{ê;ØÙ_±¼m£õûTÑïR´Ð`Y´3|n‹Ï)ÿíDŠ9©jتƒ-#¯6µo¨mÑ×ÊÁ@³j"Þ×::|’àxêÚ§&ÿ™Í0..Ox YÐ@ˆ«›^I‰£ê³,{¸5 =fm-ÄœqÆMœwÞp^ü =f#V‘¬Jøëº¤/« äk„ŽvxòWg|,ù|ý©Æf©'V:ÆG” 8š‡‡ 3./ïÓ×%wÛmpã“¿3ă¸ñÒÅ,YR'çzˆEf–T‚tMÓ:>!út0]•UÛ Èf³5‚h´ŠòÂò}?ô¸SÕl•×ÀlCGgË1ÇðÓM¿æÜ±V®N>¦R4A!2šÓu½&?ÛlÞzúéœvôÑÓ²ïhžO%UO Y–…ïû(A?¯|å÷ùÁŽBÓ^+Z[Oò•ÓêÛ t0ßħ«ü·aK.±árAÆ“ ¤z²Ã÷ KâªÜÙ*5N2Yë”ày•ôC¾?tƒs8Þ¬x¼€w%á(nñጼÁ€O¹ð!>gTÂF!اòîSQA.`—s|“®u!µ¬Jš¡¨àèy•Ñëõ<„‹êÑ}Fc§‰z‚]T T÷#—‹G4éz%·i>_Ù¿Êí®Î[ý¯¶M§k?«L†S£Jqø+žÅ¼ôÉšåª:ô˜c‚ÎîÖ ¤­…8ƒžœ@ç¨Ü¼ûî»9ô¬C¹!uC%ɵr¹Ú›z^¦ÉºÕ«[}fãfߪùû¢B}pß÷¹tî\ô®.²aÁ÷yòøãqÓiÞõÐCœ}öÙèÀº£Žjõ-œØ½ˆTÙS(ï+ ,‹MóNâ‹/|¯ýífþá¾̉敬ÁÒà®w hiðŽm.8xyuüGèàØ:°XZÙe1ºÞ àüçÇ!O †õ$óþp~Õ9-eÜ4ú©œ¬ò)‰Õ¢Û'Ë¿“íu;i©A…Y«dòÍ`YVèçy>éô±@0ŽþÙÏ@×ßÓêËll¾‰Å»0ñ<Ø·ï>>¸qc @U神(J ‚`¢%:ᯠèDÅ)σ{pp,ð3^•Ž{Œ%·h°Ç òyÉZf“xÃešSØv%%Qô£Êþª UäÇ0WíˆðñýÊ5ªsˆ¢„>]¯ä-UB¥JÅ3Ûv¼t°Z·X¬äWÔ‹ÄŠ~&^¯d’ù_ùÊä?ópȱû8öØZÑ0Œú}æêñ_ä£ÑÐV´fm-ÄíófÑsoÿvGïc}z=›Í$“IúßÕd­ñŒ§ƒ#Õvô>-Ú]û†z^9„‚D2™œ•žTF£¼rõð}Ÿd2›ñ2M3ôœƒJeÏN–tt~öó#xïK–}*>¬j6Õó<<Ï ïO:fhhÏó ÉÂÔS¯À‰Zf—ó&“É0÷a®\§X,Òµ?määÙgs×ã“;óLÖ,]Êg:,g'Bœzž«°ÆÐ;Í4yß)ß⛿<‡Óz¾Æÿñ‡ƒ"É$Éd6V_(º» Ü>ð÷‚ÿÇüf9úè§à½²ÚŸ!£ ´é¯äì)¨[T÷•3ÌET’zC †éåß)úêŒÇãLEU ­ÃÁa€¦ 3ضëºÜyç«I§‹$“.Çb]wÒµòª„¸§¥Jk¥¢ÿÜÛGSyiUûSáÌÑc©cD…êè¶Ñc£wªˆ¦ÛR<{ÅÓ|Ô©ÇÅåt>IǵùÁ9ÿÄOŸŸüäÔ$*néea¯^7[åp+™ðQ³’oüa‚Y‡!*¾ú®ŒÖ]OW~´Üá@Ì5u´1BußLy6"*ôe2•àFˆÕÞtÑ{;¯E•[¼šu¸Øìø¿ãضmɹGI`ª*òÖÃÁ *£…•h]ÜP¸së´>*Ì5O>V¨*º]´8‡0s´µ·wh—v;ðšæ6HYØz`kÍì¿ ´ŠW®XË¢EgÄ:„³këºÛ¶j&tWh;”€”J¥Bá-êÕ˜ÉdB//UõµSÙûý#Yзdì3„BÑÑ‚òÂÒu=æYØÉ÷¤Ó©çõ–/¥òQjÀ“_ÿ:ýù6tØ'˜w”˦M„ßÛ|±ˆëÀ?8`™•\eIÊi~ž†ô—A¿—JRkxX¼“Š VO0SIÍ<*ɼ£#{;²]Ô!·ÃT°rÔoè‹Ùv0Tã¼B!¾žrÐPŽ™Lå5T¢±*¿º¦UœPÔ:jžEUó¼øâ3:52µ©Jj¢ã£}M»„^xÙ,hšaL]*-%H©ï‡¤”`]ê»TøqtY4¼Y¯Z-È­¾&*ï|u@-ò¾à€šanTÐÓ‰SåUíI•$V˜2)Õ,(q_5ê¢÷­z?~äÿ/Þ~;¼á ãüÔZÏ$ÑÐØ»j¯Ø¼~ù˩۹C@•÷‰C°¨M´b¥(ç&õáLG\|» i_ýº•üŸçp]‡QóžÊ#]ÉSíòÄó"F½å” §¡…ŒfùÇÇ=<>I’¸åŸê†‡WÉk¬Dg•¢r5s¤®]G'Yž¥U¢_4¤­\×%—ËÅÂŲûÆ×¿þõ)ý¼§mpϽý‹6Œ!ĩۅm¿ßFÉ*‘N§ë—oŸ\ÜöŸÑk#!n,Tޝ(žçáf2¼ë]ï_õí6 Ú&º6ПJ¡}ík¬µ`Í ÿÌ…êdK%là‡åõÆp·*¯›*Ä ÜXt«ªÓí U¹Ú"dž E¦R ¨QÎ^e2¦YI-”ÍÂV¡PÍ¢bKT¼J$‚÷‹ÅJ hØ­ÚwtÛl¶òQ«TG…B=™Œ;Sø~ÅS Æv´¨‹¬Úê5SåD«ë°~ý ÀÓS³ÃÄÃuÀôŠW|šSO]ÀÃ߯òåoåŸÿùdÎ;oô}*¯¬`ÿ•ê¹Ñ°MƒÀL£…•Þ1Q¢= Ñ=v t¬ÿš³Žùho›Ü•+¡2Je¨º2T¦ˆÚÍ3*qª(ÎDr~O—#ŽN‘"VPö76¶30H“¦@![eɆ硄:5®ª®^¬ ¢ûScÄ$ÉI]Oµw_=T!®zë©q Îø:VÃ|}Zè]k>}ÊÓM;Ø´­ðè­—ðØÎ'F_)·œt WÏ¿šý¸Õ§,16Í{ˆÃ^}¦½¯¯F \'H‰6vL¨Šæ P fŠTØhEóD2 ‹…º ˜r‰.PÀÀ X“­vl||œØñë )ÑœEŠc6ªò;Ž•÷ÑÃàM…>ÄT£›%[·á7 c\¹æÚ9Ï.aÿæ0.kð0:—WÙú±­àVæ› :"ÁmçÂSy¦V­ZÅÚµk[}:ã¢&TãÈ#Ñn» ××YüÒµôôìÇÉP Å 5 #pðŒ¹ýhPîÏV¼)FëkzT Z+uC§f0æ¼ÎB—òdSÅö "Š%“qM7›­/\EQT„¸Ñh$z)q®Z‹!Í}Jè䍸Ñ<âlÛæw¿{§œòYn»íkFoøž ÏtÊ¿ªg ZN•;M§Rôq´Û¤œ9¢î›Í âªÕ¸‘{jtÿÐü1|\—Sÿð‡Vß™q³—ĶmüÊ÷¾ï%“Û™G0©Q$˜øìD òØÿõ ¿!Bªiõ£¡QïùD…®Ö#]þ`œ”#\KY|Í[®ÚB?}|,¬Øûjì­ô#ŠN8öR}¶hß-ýTí`‘$sèÐÑÃ1l=±å€áààã“%Ûð~©óŒ:m88tíïâÈ=Ídkk!îÅ}ÏÓµü¥Wð<\:x)»ÎÝÅ|æ·ú”¡†ãx¨JàYN¢¢ªmN5Fùg,òMôTT£ÜL.š‰ fš=fÖU4j<Çò˜ÒÑëÞ› 2dbž}ôa`ÄyŽ;]÷¬Ü¿x1Ï®èF»z”kRÉļ¦TT¯C¼¿f ÓìX1Î4M’É$kÖ¬iõ©Œ›èÄ„óøãO>‰í›üÍßìãù<Áïþ;þ’%©ÒÓ‚Nó9Tò Í!ð²°”ˆ&¾ê¾n¼ÃÁû)h¯'Œ·³Z°‹\.‡‹¯µ±Æã㾦R|êP3ŽÑÉqõže¹\Çqxè!]_.÷ Ää•´W&9+ámLÆ¡š™\·ò¾ãTÔÜè6ªÚ£BÓâÛ¥Óñõ•h¡P¼¢É«'ÅR©Ê~!p-UûSU «=Yúú⮣J1WûÈçã÷Æq‚¸mˆŸkT”«îÿ©˜oÏ Ž¯â´U¬võ„–Š7ÍŽâ||NÀà‚Õ?á?çöðÎø‹ñïÄ¢’„PƒÚÆ[¦Ÿu¸¹æÐõÂeº®žÿ¸Í'”`׬7YôYÔlTŠSþ`zß5"*ú©mTA#5kêLÌ1£+뉚&&kž^ÃÚ½ÍMT·­çáñüsÇpÄI‡5^©Õë6 9ˆÎÞ©0éñºÑÏÚd’Kµå-Yì½{)t©¼ñ_¢¸îtþ¿³ÏæKõ7 \~î£"º)W¡<á På[S:«ïcd観‡‚ÿ“Wëç?\;-Sû¬Í³‚Ë.ûtàD®‹[31•ËA±x:ž×¦iXDò¹íÛGò¹ç:öؠ¨m‡â–Vöª*±Ä¹\`ÐÊ£"›¶\7ˆuŽºXB%Ù`´•ëV¾¹\ 4iZ ‚årñu«+z8NE\3ŒÚ$w†œ«ª©Ä¬zëªkT_¼hÑÚ·êÈ…êxîjL3~M>±Éª˜ó¶O9·j¹ˆl1·Óhµ ¨Äu›&?¿öZþl¢ÆÔ 8ƒç¿–9Ú÷дW4¿qŽJ‘u?gÁ¤€Ð™tí;…=#scËTÁ«†L½Æ´0ýÐj!K¥šèñ5´š¼äÕbÚXûkÖ‘eªhk!î…ßÿ)½ÿ¦á:;oßÉÇ>÷1n\scÝDé‚ÐjžèlöV+&ï:i_¿õ물ye«OQFL„‹.‡à¡b`!ƒO=a£>úÐÐ8ÿÔó[})ãÂÃcËš?cîÎïÐPˆ+0îüÀ$ÓI cæEf%|)ñ3ЍB³£‚™ÚF…G«÷¢…šã¨ìè~TÈôXy3T •ÃP£èF+=¯ö§Žeb†¢¯Ê›½.Ê­¨¨¨¨ö‘$9f®©Ù‚ ep=¼ã`ïýgž9ÂÀ@‰þ·ç1{{m:$£ƒ¾<8÷‚Sö`SŽ7ÑPQõZíë9À€ˆm­æÕ¯~謲©Ñü¬Šl¶È¿þëüÑFèÓ´ ¤td„RWšeUrS Ä[)˪xhEûÒétÅ ]·Öã ‚×[·ûV"¦¯«Ý% £"àU+ή[»nôXc¹^V{ëúè"™ xf¥:DjÜZ=ÑK[JNÈàë•u”–.¯c•×É——ÊËL(. ¶1¨cÉKE@3Á¶‰–™ÕŠ xÞ¹†ùà:¯¸ïЦí§pq9‘÷Þü$½çÓÜF>ñ2³"f³›#^XÀá‡oŽ —Ù¶ÝöŽBëi[!àˆ]/òÇxWÃ÷_øŸøÄuŸhõi BCöï=‘ÃNÙ ¼<¶|ïÕ{é=»wb;ZJ3îÙC5e¹Tîƒ^øÁV_Ƹðñ9dË œzê(ù órèwØï ¿É/¢B–ªÔë,ÙPhS‚”*ê¡ØNö5ãŦ<'óä›v»WBZTÐSEN†¢@ Jåté662á=11CO¡Ž­ÊÙ;8dÉââ†÷Hݯ¨P—'ÍÏOùyÇU{V"«uÚidÏ{™¿€®®+8óL=¬lÜÒf%:NÓÁw+•?åcž ¡›Bû¡x…üëw–ðÞ÷nåöøn × B#M³º ÑL&ãáÕÂU4!áX Ç« }¼†?Þ«JƯ„4—JR~@À²„.£¼®N“É#½¢—T/µhTxÏB8'cP›ÛIå{T¤‰ç~Ì2v"½è> j«{FKɪÂ0‘ó¾ý‚ÛyC£‰²6e='lÿ-×^{bs$>ã,â'´ /n9’¹sãqù|ƒÔ?*EªäšB=ÚZˆ{ꓺ‰?þøã<´á|è5jõi BCæj;jŠYüdí\mìŸ0+11¹ø?/† [}&ͳxñ‰#yíkë„oEBDTeÔ™*ÌPM½Ü~Õù¢¢W½ŠÀEyµ5³^µ+~3ÛT“$Iž|xÍJhT×\¤HŠEŠu ²T£<8³å'&*10AŸÔÞ»æíš’{7S„9âæ‚óè)¼å-{øùÏG(•zZH08vtAgY&¡œ…¡øø±väÍ7þ½ç½Ž;?±ƒw<õÆ’%¸U,†ÛßßX‹æ9›IªÃ,•§^c.¥âkÈv>q!ƼVU¶–÷é/óª¹ðú;±ÿXëÌÄœDu9Ù//´ÇàïûÞõ®Ëüȯ*×+m„‹ËÞ39zá¶æ6P!êâ,'ÐæBÜós×µ^C›K›1Ÿ>óûš‘ó¨*$ͰãpÑ;þ8Xà)øüÛ>ÏÇþíc­>=AhŠÝÀœ}ûxÍk^^û¦Ï/~ž§~óghgŒ{ßÂĉ©'Ö)¹fÐÐè§?f]Ê{êæSY»¹sª¦ÏFGÇ!èÿzüïÿ¾—Ë.eB$OÅ£¦š*â›ÐD 5Ü~÷ÝܯÁ¿þêc¤ûr º-Y¬X¯¬íx)Pñ¼MŒŠz¦i˜yÏ"ðñ <›üªuÈúY*ÕtyyÔë-úµ­É¢ïË|gÛò ö?=F‘‹Àþ’HªÐ–øøì;´—ùGï —yžG&“¡¿¿NdHèA„8hc!ÎÅåÌKîÞZó^×Ó]°¨Õg(ÍqÆöí•:t­éâcºˆpBç°ŸÃ_=\›¨ß|xîÏñkçלqL3yâÔà U5õé#žŽ qºéày{C/ÎåM£Ë'í‚ W\÷Ð.æÿðyÒ‰a(˜ÜΕ8•ðNõ}Ð ´~*žÐ¥òò •äù*LS§"„ESmªê–zäu5ÕM‘NÛWÆÇŽ]»Øþ7ûè;ã}Wr Ú_™Ú˜CÎØÆËO.‚Š©ÅhBX›J¹ê<ñ‰á ¦m…8€îMÝu—±èxÇÌœƒ‰Y7Y· ŒÅ†¥x>ŸbÛï#Á©J œüŸãÏû¦ùžÚ7Í ’fR\…„6GÛ6çð]üÝà|ÇÁ4ÍZq9Sþ¿:œMÚ„Šñ?Ú·Þ9¿§ôÁÇà†q-Sb™ôˆk•BýƒFåªPžq*šy¥±XV-fË ô fÃÒ ýì‹lþý.¼êäú+å„8©Å'´9»ÏØÆ’%ÏÅ–éÑœ›Iâb²ä†ÊÒêhÄî\ÊoÜÚièÇ¿ú8Û계æ8ì™xùË_¾~æâgÈd2“Ø£ Ì<Ç¢³í»OÕ¾‘†‚[À¶íVŸ¢ ŒÉ¶'ž€‘þòOF°,+^ÑÌ!!<¢á„¶ÄÇÇÀäÃ>,øäC˜;v ÝÐd •iÅ"s"ZŠ@dS¶¨V-˜Eó¬EE7ñXÆÅ¢õ‡ñ’kÊ߸´þ Õ¹ü¡ qp`ðdN:©’J˲,Üh%hÉp%4 m…8v?ʹÇÖfå|ò‹OòÂQ/L`‡‚0óìûÇ{9ÿü­áëãGŽo\IGÚ”]ýç²èí5ËÇÁ0 )<"´=k»ÖòÔ¡/áÍûX¼ø—äóù¸§<T8ªˆ B›ââb[ÎÿÛÁÖoü¶ùâ8ñÍ&ðPSá¦yÄëH˜yvîä÷»wÇ=‡¢D=.¡yñ oáùçÏ _ëº÷¶Ïµú …v¥mCSŸxóÝ\÷Ý…TâDÌ’‰3Ã.í>>V,'ÇdÑѧtÕ$IâãJ} ŒQ Q4:ÇFË«÷—$‰†«¾§*ï©ÿ54Ò¤±±ÃeÑý'I†×`v`ÖÝ£·ou2æ?·6JÚ˜gy–=‡?ëz~¼"öÞï?ƒg¿ý¬Ø´Ðü®ëþåToŽ$H§‡*oä¨äm |ßÇ÷ýÆ"@cì~9nw7Üz3úÂã¨ý«}çÈ…S¤ØöžmÓrϦ‹½7¼ýÆ ^ûÕµ,üÝBIh/t[º¶pLÏ‹œ{Î/Ë+o$`ɳKXb,iõ) ˜<HðõëWÇ=á â!$Ä🯻M6›Å0 r¹ÀÅ ŸÏãº.–eQ,q]—\.V«Ëår$“I ÃÀ²,Òé4žç…ë=úè£,[¶¬Õ·»)žÛ5ÿ]¾œ“v}M[YÅ>Ã&ð,¡Mh#^ú›—ðãM‰çí ø"Ñʨ9‚<‚ÐÎ<ÿ<§§JÆ'ày¨ëze’G€„:´­÷ìg?ͽûºP]Œ£ÿáhž]ö,\QÉ;Sä;°‡ž$z¡¶ÎXû˜Ìò4ñp‰zÂãxÅÈkoºnœ¾û6•œ°};»þóuáù¾|ÑËyáR «:í]]±¾O‘{Nº‡‹·ÎüĈ LïÅßÑýý?áK_:‘ááòhÏ"):,,ÏuÝPLô¼À{\×u|ßǶí0T1—Ë…"Vµ ¥Ä.×uÉd2 àû>}}} Þ‚–e…Û¨ÁÞdQñ-ú·¦iá¹E½Í4M‹µ!Ùl6|?™L†GÃ,«·)•*T4Ńaa…:Ã0B®z½è¾£ë½ìe/kõGÚ4>>9ƒS.ý-ojT §@¥b©„÷ mÈ©øןÀÕ×–S·¨ê¼QÄ®mðýr“¦ÅÚ{ß÷C\ß÷ñ¢Ï¨êçOõM£õ¢ÇÌf³<ùä““7‚dßÖùœxâà¥xž‡¦iñ¶ÙG„8¡!mëwú/^Â¥—ÀÆG6ŠœÐql>)øÃ·ùÉu!t$Ã'Ρ{ _Û¶ÍÚk[}Z‚0.¶þì:Ž>ê—|ø—L"Ç…Ê—q†¨‚ ¿Q …ó|>Ø” µ¶N,$ L/;€í÷žÀ7¾ñÆÚ7¢ùj×uÃïàhÞ6Ö«ö>jv=Ö ð¢<†ÆZ/êÕh=˲j<ê­WíÅý»Søù!o`ÏEeÎ¥šêxuöˆË ¨ö2[¸paøùE‹°èº”Ф¡ê‰¨ð•N§cžÁ¶‰þmšfC!Î4͆À2aÒyìxøL¼$ðkŠ~OÀžebD…¶â6ì?-lÄ>yÄ'kfIįHhw™Ë¼3¶ƒ‹Þ½H*K É OÏáGí Ú`7è,žùÎ3[}Z‚0.´»¶p†‡ßqx0Àk"\$Z“J¥ê††š¦ô¢3MÓb3A˜ Îü»bA÷`m"ËŒy]D¿Žã„Èh‚“ ŸƒZÁ)×h=×ucëEÃãÇ ÏÉ÷ýXXaôü¢[õýk$ÄÕ ©®z­ ”4ZO ,¾ï7\/Ú–¨×õÖ3 #&ubøö‹k·€VËES8åß"ι\.f_QÑjëÖ­áýI§Ó E0A˜j^9’å/ÝÄÓ9A›,ÕØ…Q˜±ÐÔ‘‘V¯^ÍŽ;X¶lË—/}ýSöážm“t“5в4«ÂL3^>jx˜£ØÂº®ã¤ÌIë+ ²ÐrÆkÇ~ûVŽ{É(¼ 0“f bt^ß_˜EŒ×ŽŸ~tW]q ÜMÅ®J¸ðyR ^ˆò&´˜ñÚð '¬ç-oÙˆ†ÆÏNh ÆkÇÏýº›E‡ìeïà^n"è\Hª¡ÅŒ×Ž÷í»÷žsz0;'ã¢a^š¦5Ìq#ÓÁxíxÃo^Â?þùâÀkÈr@p+"[µ÷XOOOèeVííx†††B#ŸÏ‡"V2™Œ}ªs:E«äV‡àÕ£Z°:ŸñÚñK^ò¾wä‘aÑœ'Ž{¢îzù|^D7aƯ¼¸e?ï|癤R)¶}m[0I­1©ôÂÁÃŒxÄ ²aÃn¼ñF ¢86‹óåëîÀz&Í_z-?¸4þžËÌ088Hooï ­>###ìØ±ƒîîî–žÇðð0 , «««¥çÑ*&bÃÝÝóùøÇß _§e"œØOœvøN·úúÇkÇû÷+ÞrõÙÌÛ?¯’¤w†íYì§=Ï£•×?^;þö·_Ç¢‹àár2oÏÓoœ£mªi;–óh&bÇ'ÿjtá©„M›6ák>½Ù^îÞy7?²~†FEŲhÒ÷jo¯‰Ø{»´?ríÁDìÀ°€ ~j/ÿÛ—)‹¡æL ýã8bÇã³ã]»v±cÇö>²M;;¨>Þ|Ž |ÍãìöEÙF3̈GÜš5kbÎòåËY³fͨÛ,ûú2þúûûÐ. Äåèo‰™aÕªU3t¤Æ ²zõêVŸ«W¯fpp°Õ§Ñ2&bÃ{÷îå[W|«¥žpb?qÚá;ÝJ&bÇ×\ô;~ê©§xì±ÇøÚ×~ÍðÃAHªÁAýt°ÛOeÍ0#q;vì`ñâÅáë±f~øanûým¬_û%Žzø(ê2ž‰¦sݺu\{íµ3q‹²}ûvFFFZÞH¯_¿žûî»cŽ9¦%Çî¹çX¿~=Ï=÷\KŽ?^ð\?yêOøÊ…_aݵëZrÞb?qZýVv¼ÿþ–¼v¼víZîûÑ}ìÜ»“]ú.v]»«%ç-öÓ^ç±yóf6oÞ̉'žÈÊ•+güøãµãŸýìgüèÁ±óŒœýÀÙðÀŒŸ2Ð>v,çQaݺuìÙ³ ©|@SÉDúÆ;Î{—ŸvgŸ}v¸|¦Å„V·?rq:±üÃg~ÈÏýŒ³®?«%ç Ò?®¦Õv¼yóf¶nÝÚ1ýãuëÖñÈ#pá÷/äÙ¹Ïòp÷Ãüüå?g˵[Zrþ­¦ÕöÓ.¬_¿ž­[·rÄG4µþŒkwß}w«OA&ÍOøI«OA&ÅÊ•+["´ÂTbÛöäw"-FúÆÂlAå,„NåsŸû\ìõ+xoà ­>-¡Ã˜‘ÐÔÞÞÞØŒÃàà`Ëcáa<ˆ ³±ca6 v,ÌÄŽ…ـر0;ZÁŒ q###8ŽÓå¶¡YĆ…ـر0;fbÇÂl@ìX˜ ˆ ­`î 7ÜpÃtD)ÊŸùÌgð<üàüýßÿ½(ÍBÇ 6,ÌÄŽ…ـر0;fbÇÂl@ìXhs8p`¦6<<̆ èííÃ:±aa6 v,ÌÄŽ…ـر0;fbÇÂL2£Bœ ‚ ‚ ‚ ‚ ¬ÌHŽ8AAAAA8Ø™‘qíÆÈÈ–e±víZ†‡‡éíí —ÿ×ý÷Üs‹/ŽmÓ轩8—åË—7u¬é8Õ«Wã8Nì^Œv¬éºBó4²aõ^;ÛñtƒØqç1;žÎÏ­]í¸ßi¡yÄŽk‘ö¸ó;®EÚãÎCúǵˆOÛ½i'ͤ]˜Jíæ óˆ᪫®‚ )ŽãÍfÈår¡‘år9Ç ·í½É°jÕ*n¾ùæØ²™·v¶ã™þN ãCì8Ž´Ç‰Øqi;éÇ;žZ¶{Ó.šI»0ÚÍA'ÄA hÞrË-¬^½šë¯¿Ó4c9L¦››o¾™ÞÞÞX’¿VÞ å²<88ȵ×^ËŠ+Z}ZÂÔ³á™FìX˜,bǵ÷Cì¸ó;®½bLJØqíý;î<ÄŽkï‡Ø±0QZ­™´ Óõ>èBS®½öZFFF¸ãŽ;º··—ÁÁÁp=Ö{aíÚµÜ|ó͆a†º/ÎÔytwwÇTÚÞÞÞ0î¹Ñ±¦ú„‰Qφ¡ýíx:ìGì¸s¯OÇçÖîv<“ßiabˆWö¸s;® íqç"ýã bÇSËÁxoZ­™´ Ó¥Ýtq«W¯¦«««&¾Wݨ‘‘ºººp§Æàê½7TÂ>…a¸® ®Ÿ3uË—/çæ›o÷·fÍšÐe²Ñ±¦ú„ñÓȆ¡ýíx:ìGì¸3™ˆOÇçÖîv<“ßiaüˆÇ‘ö¸3;Ž#íqg"ýã8bÇSËÁvoÚA3i¦K»9è„8•°R©™ ×u¹ú꫹ꪫX¾|9Žãpë­·ÁŒB£÷¦šÑŽ5ÕçÑÝÝM"‘ફ®bñâÅlذ|à£k&ï…PŸÑlx&íg4fÒ~ÄŽ;“‰ØñLní`Çíòê#v\{,i;±ãÚcI{ÜyHÿ¸öXbÇSÇÁvoÚ]3i&ó}šsàÀ­¾€vbxx˜ 6ÐÛÛ[ãJ9Ú{|9ÖLÞ aü´»OÇ9ˆÏ>Úåsk;n—ï´0~ÄŽ¥=ž ´ËçÖv,íqçÒ.ŸØqç"÷fìûp0Þ£‰ÜâAAAAa8(‹5‚ ‚ ‚ ‚ ÂL#Bœ ‚ ‚ ‚ ‚ Ì"Ä ‚ ‚ ‚ ‚  Bœ ‚ ‚ ‚ ‚ Ì"Ä ‚ ‚ ‚ ‚  Bœ ‚ ‚ ‚ ‚ Ì"Ä ‚ ‚ ‚ ‚  Bœ ‚ ‚ ‚ ‚ Ì"Ä ‚ ‚ ‚ ‚  Bœ ‚ ‚ ‚ ‚ Ì"Ä ‚ ‚ ‚ ‚  Bœ ‚ ‚ ‚ ‚ Ì"Ä ‚ ‚ ‚ ‚  Bœ ‚ ‚ ‚ ‚ Ì"Ä ‚ ‚ ‚ ‚  Bœ ‚ ‚ ‚ ‚ Ì"Ä ‚ ‚ ‚ ‚  жB\"‘hõ)´X–E.—ò¬IíÇu]r¹\«/§¥ˆw¾ï“Ëåb¶/6 vÜÙ4cÇÊþg3bÇA½¶ÄŽbÇ­a&ûÆbÇÂt1;–þqcÄŽ;›VÛqÛ qŽã´ú&Íœ9sZ} ã&—Ëá8¦i†õDñ}×u[}I-Eì¸sèëëÀ0ŒÐöņÄŽ;›fì8—ËQ(Z}ªÓŠØqgP¯-±cE§Ûq'ÚðL÷ÅŽ;ƒN³å‰Ú±ô#vÜÙ´ÚŽçÞpà 7´ú&¨ñÍo~×uÑuB¡€:µê÷yäN>ùäp[Û¶ùþ÷¿ÏÉ'ŸŒ¦iø¾Ï#<ž={°,‹={ö ëú„×ë¼yäŽ<òH¾ò•¯°qãFÎ:ë,\×Ų,.¼ðBt]óús¼ËG;DZ®ÓóEýsl—¾ñÁjÇõÞ“~ÅÔÛ²òj«¶ã /¼PúÇã@ì¸óúí¬U´…Gœïû$ ÇÁó ™L†¯‹ÅâujÅbñ€išá{Ùl6|]*•†¾744t@Ó´ýýý4M‹m“ÍfÃ×jßÍ®7ýýý€[·n _«ókfûêëËçóŠÅ⸗uŽÍ\g6›=`Æ]×$“Éš¦(‹£^£Ú·zohh(vMÓ¸èÉ4ÍP™T h&“iõ匋êë+ ض=îåSJÜ© ­Qn–jT,xôo¥«F«X,R(ðÌ~;n¦-±ãN¶ãN³á™êŒv¬Þ“~ÅôÛòDíXúÇÄŽ[oÇê|§»ÅDlrºÛ’Nc6Ù±ô^¤_QËLÛòDéW;®e¶ö§ÛŽÛÊ#®®ë†qÅÊÅSh<ÏÃqœ°êÎtm3;ž~fʶV±ã™@ÚÕéElxf›œ^ÄŽÛ±ýñ!¶<{8˜m_ìxú™©þñLÙñ” q«W¯fxx˜îînV¬X.aõêÕìØ±ƒeË–±|ùòi½(A˜ bÇÂl ž‹ „´ÅÂl@ìX˜-H¿B˜ ˆ íÂÜn¸á†©ØÑªU«Ä4Mn¹å†‡‡C#þð‡? À¹çžË'?ùI/^<ëâÅ…ÙØ±0hdÇbÃB§ m±0;f Ò¯fbÇB;1%9ↇ‡éïïçÎ;ï`ñâÅaeŒÁÁA6lØÀ7ÞTçÑÜ5?ñ‰Op '´úÞðØcñÒ—¾´¥ç°cÇvìØÁÉ'ŸÜÒóxúé§Y°` ,héy<öØc¬ZµjZö=Õv¼råJ±Ÿ2íd?­þL6oÞ̧?ýéiÙw#;žˆ »®Ëm·Ý&öS¦]ì§ÎcÇŽtwwsõÕWOù¾§º-VíVÛO»Ø±œGíy¼ýíoŸò>Ò7–ó˜éó˜éþñD줥]ÚÁv±ãNéÿÏÿü«W¯æœsÎiõ-k ÚÅ~ÚM›6qä‘GòÿþßÿsÝ)âÖ¬YCoo/ÃÃÃlذŋsÍ5×ÄÞS,_¾œÏ|æ3£îï©§žâÍo~s oaÀ}÷ÝDzeËZzëÖ­cݺu-?Ûo¿îîn–.]ÚÒó¸ï¾û¦mßSmÇ[·nmùç&ö§¾Ó@ø°ŸÙñÍ7߬\¹’eË–5Ý?nN®ƒîîÜÛÛºù÷öö288¾788HWWW«ï“ Ô v,ÌٱذÐ)H[,ÌÄŽ…Ù‚ô+„ـرÐnL‰·|ùr6lØv0Ö¬Yºu*ãVï9Ž3fOAhbÇÂl ‘‹ ‚´ÅÂl@ìX˜-H¿B˜ ˆ íÆ”„¦vww“H$¸êª«X¼x16làø@øÞÕW_ÍUW]ÅòåËq‡[o½µÕ×-5ˆ ³Fv,6,t Ò ³±ãúø¾ëºák5àõ}ß÷Ñu=¶®çy†çy¸®‹¦i2Hža¤_!ÌÄŽ…vcÊrÄ©´6l ··7æÒyÍ5×H$ذa+W®wO¡m;fìXlXè¤-fbǵ¸®K¡PÀ0 …ýýý¸®‹ã8äóùšu‹Å"™L†t:çy$ úûû[})Ò¯fbÇB;1eBJs£J#£½'í„Ø±0hd«bÃB§ m±0;®Å0ŒPpK$c®ï8†aL&ë¾ïû>…Bß÷I§Ó†ã8躎®ë8ŽÃÆÙ½{7®ëbšfÃ} ‘~…0h;vqÉ‘£™P8˜™R!NAA¡®ë’Ëåð}?ôŒd2I*•"•JÕ䔇aX– q¦i†BÜO<Á1ÇC±X$•JIˆ« 3‚‡‡VþQØØáß.A¨¾ÁØm¡0»!NAAf!Jôj%Q8]×C̲¬XθzhšF?žçá8©TŠR©æ“3 ƒB¡€iš¤Óé†ûQ^:–„ì‚ ÌI*66>~ø7ˆ×J\ H§¡ÑcÁó‚uòyдñí¿"Ä ‚ ‚ Â,Ä0Œ¶Ë§õFsß÷G]?—Ë…žp*Oœçy¡§ë:¸®K*•b`` ¶½Ú¿úßó<´©I ‚ Œ‚òxSBœ‰‰‡®£D9abär€¦D4Ï]‡Dª©Tð^dnÇ –e200P+´yôõû·mH&ƒýx^°Ÿ‰f:!NAA„Á¶í˜\>ŸÇqœØòd2 métšT*¾çº.Ùl6¶¿B¡愃 «eY8ŽƒëºœqÆ¡÷mÛ5b ÂtààļÝ\\’$)PÀà …:¡‚çb˜Ä<,+xiú@l3ŒŠˆ–Jë•JÁrlj{¹y^ð›J"š®ë‹ÁûŽkŽüæóÁúét°\[ðw"Q9¿b,hþ:EˆAAaÚ1M“¡¡¡q-B7 æª^{žË9§òé÷s¹ïyÏ{ên/‚0D½Þ..Y²áûõÖët …@‹z©U‡v:NàÍV,ËÊM6…B.Úß,·¬ÊúÑ}ú~ð«¼Ò”§öâzz‚ãFE\Óõ`{uìd2xí8Á9ø~pÊcÎ0‚_µ­®Â`ô\{¬ù{$Bœ ´;–/@AA¨Ïh…”èÖh™®ëhšÖTqA„©ÀÅ%M:æõ¦Š7D_ëèÙ}[àûw™ õýÀKÌ4ãÞhJ¸êé‰ `ù| nõõ"š®ËÓéÀË­T „®R)À øßuƒ}¨ý«¦=Vjš fYÁß–Uì” –JžtÑ}¨ms¹à8êü!¸&Ë Î‚óœ("Ä B‹H‘âZ}‚ ‚ ³šôdFK‚ ÀÅy¾©×::::&W@&“ -%º©|mÊñØu+a›Ùlmh©By–Eó· Áÿ*T e¾Ç0F¢¡­*ô4Š®çª<Ü .ªœpÕÛNÕãD„8Ah’@AAavâá…ùà ÿ)ï7 ­í<âl;¡ª‹d2Áÿ*¼4‘^§Óˆ¦i¨-P(4ߢD DC?]·¶‚ AÝ©ÁÅcÜ:•®.ñ¢ êÚ]78g%,F™ÊbÛ‡LÝ®Ah©Ž#‚ ‚ ‚0;ñð00ÂPT/æõ¦„9å×.¨*¡Qúú*9ÖýýÁ¯§ª #@¥hBí½¡&3ž 9Íå‚÷ô²—ÎC†àÀRüCú¥iB\èÐÆúÊûŠ¢<÷¢ÛŸžÖn?ˆ'- [AA„™À÷}Ç £Ë=Ï«YWhð<Û¶cÛ‚ tJˆ‹…¦zzè˜ÑJ8Ç©=P$³pÎØ‘uöð¡={x垨ñ”3Œ@¼2ŒZï5ETxs „­rä)PvE9ÌuÄ„à¦Á6‚¿òö…òïÔ-qQO(+>'óüò1óå÷"[¦|‰òÿ‡›°Ç€~ ¤ÊÛ+üÈyŒ·ä†q‚ÐBñŽAá Àu] …B(Ä%ʱM®ëbYVͺ¹\ÏóÈ”c¢<Ï ·AhWœPtƒÀNjp]7lÓLL Œ14¬QÞ›HÍU×uñ¼J¸i__ ‚årp‘ 'z.Þ»—÷åàów<ÇN~ùÃò§;v4ܧò>ƒ@ÄS¢˜·”ð–!¶Ôû6`0à5Ø’†¡ò:7'àK¤by_Y@ƒºòe¦Î½²ÊÛ$ ö“(3]>F‘àxÉòyËï{ÀM¸÷¯Ö£Ù6Fy? Î/ÿ­„»>à7¿ú§<úhSŸq‚Ð\\LL2d$_œ ‚  †aÏçÉW'çi€ã8†A2™$N“¬ãrQ(Èd2¡]t™ò¤³m›L&C¡œ<*üY–…ïËĨ SC´:ª sª­Qžp9* ÉR©Tø·C 0ÕÜ2Ðx|….¿üï¹ë®µ”JAεl6Èçv« }ç<“JñÂG>ÂÍ7âÜ2g¾rçÞuûöí åx’ÙÀË3ð6+§Ôù¤¨ˆVCåeˆfPñ,ó°¿Ðá&'Â(¯÷a.++n:'œª• ×¹nˆ'_ù× "ڥ˿I*bžnYËa–ÏC/¯“ûå/Ñ]ýÿ8¸9¾OÚ¶Ißu}@ßÈé]»(•›¿õV:ùd6ŸrJSŸƒq‚Ð"TâAA˜<Ï#É6mYVê8NLŒšèz*\Ôó¼†ëEQžn™L£^ò *’É$®ë’J¥( ˜UIˆ”°–ÏçC/“B¡€ïûäóy,Ëâ¶ÛnömŠÅ"®ë–½A¼P¸³m­:ÞJa‚TçƒóðBámíÚ-üüçoFG×É“Ç.KH®ëbÛv¸­òÞ*TÃî±`~¼×Ä ¿¼ÞEƒƒáz†äsÎ9—sÑE·rÕUÏqÈ!ÛyòÉÕaþ¶t:èºtØóÈ#$“IÿþïùÑ5Ïðúå‡22|'Åb‘“?õ©0¼³P>/¸4 ƒF Â)ïµ*f”—çÊÿ»®ËQëÖaAè7¸ÂvªEÇêBáÃ÷Ñî»/&N:›O æöï'Q^|Ø6i‚°ÔòÍnPä~¤ï¾ûU¯"yøáA"<5qd𤝽–!×eè˜cHßz+PÊåHßs«ßô&öΟߔHÕTAh1š*‚ Ât iZL¸2 #œt]‰O“]OÓ´˜°f6(/§ëzøžeY1/¶F×Ðßßçy8ŽC*•¢T*¡—c Ç¡¿\¦o`` \V*•Ð4d2ÉêÕ«ùÈG>ß÷}’É$¶mãy^]/;A„‰ââ’}·ÀƦD €ßþv'{ö|’ÿÁ ǘ†Q©žêû>Fð¿¦áx”ýÌ3üéºu\qñŨýݸ* «ðýp˜›Öîç¡eáôö¢¹Æô©Oíᨣ¾ÉÎ?âÐCÿŒüÇ¿"çº8ža.ð•¬~æyîû§âÆn@Ó4.¹Dã’K*×5ox˜•žpI‚PMÌ@S¢!åýFu´tù8Ià?ﻓ4 {éÒP\S½™”y®‹ñ­oánߎ}é¥å{u&ðãíÛáõ¯g@Ó‚ÄuÉdPæµÚÛ÷¹é&æÝr ûn¿¯¼’}èC|æñǹè?ÿ-™ŒÇÜB°¯T*P u=øàÄçžãw/¾Ø”ˆGœ ´å§¡Q sEöðÂ2ׂ ‚ e,!®‘p6žõ” Ö¬§ÎÉ4ÍP\.‡mÛèºN:Æ4ÍXaMÓÂ×Ê#/º_Ïó8âˆ#jökFèy"Bœ SIÔÎÄÄÇCSŸ[°<Ð …Jž6ƈ7=ñ‡üßÿ¹3 ,Çýßþ6_ž7‡ |Ò,ÀÌ@[ZaÂN<2§Ý· s›ˆ[ðš×¼›?ü¡‹¯}í8æÍ{ˆd2ÉðÊ•|í‰'XùÐCä‡ øÅ)’Ž´ëÕhšæV{çÐÓÓÃÂ… ñ<ŠçºnMÛ®WÝy'¾ïóÃþ·Ýr pþ›ßÌ®/€ª躰pa¼NÝ´òÿ?ݲ…G9†Â“ORص ï¹ç˜?8HØyÝuØ®KÿÛßÎþ¯~•õ×]ÇÝ'œÉdÌsÛÓuþæ?ÿÛóø¿¿ý[^ýêWƒç±÷¿ÀýÖ·Èd2|ùË_&—Ë‘H$øæ7¿IñÀ0 Þöøã¼ó¦›À²QNÓÀq‚ã4q‚ÐT)kÕ GÃSò ‚ Âlöm‰‰D×uCÁ.º<:PJ§Ó r¹¹\×uc‚_6›%“ÉËåÂ0Ùt:.s]—¥K—Ö=%8JXª S…_S UGsÆ=ܵŸ9ÚË¿8‚šSPÿûøÜþà•[ÂÖ4þ͇[û Pð9ü¿Ëo^úR TÂ-ß÷¾íxžG>i*ÁƒÞÌåžoïïüÁÉÐÐÐ;<ÌëÏ?Ÿ¿¿æ~øçÎWœÝ‹qÃ?ØðÚ4M òv¯]K:&ŸÏ‡éTH­eYôõõ•½žoxË[xaÅ >ò—É÷~þsž*{²}ë'?aã§>Åîn£*e£iAh¨ëB"Áî·¼…á;ïäùuëøø{ßËo/¸=ó{ßÃ>ê(Þ¿l…B½{÷rå•Wòï?ý)÷nÜÈR‰MW_ À#<¯ýk2™ ÇwCçŸÏåëÖÑŸÏsË=÷ð×^ËÛßþvÒé4›6m"Nã8÷Þ{/}©Ã`ÿþýlذwÝy'çîsø¾fY¼¡¼ï±ÐTAhªBNž|Lx“œq‚ ‚ ÌFÔ p<Ë!9U!¬ÕE àT*áy^(Щe®ëÖ¬}ÝlÁA„fqqcS£ÎÏ¿î¼zgå7ðg‰.'pô1Îä;tqŒîÂg{)¹s8©z †qßéïg÷+ß©ƒ™…\îsX–ÅÐÐP˜/ÓqÎ?ñXÛz€DÒýðâ³ÏRX´“ „³Üö²—qÌÀº®cYs5‘ãõÚ¢)n½õVþîïþÓ4ééé!‘Hàû>¦iR,Á¶yà‹_äÜ~”ÕÇÏ[·mcÁ%—ðÍþ~ÞÿWÅÛ/¹„¿ånÞ°w^z)ïúÂxõ§>Å ßý.ö£’Ëå"W(ð×þçø–Åí{öð÷ßý.É¡!î¸ì²à~þô§tŒ°àÆ)|þó|ñ‹_`hhˆžžÎyÍk8ì´Óøø¾À¯Ÿx‚ï}ï{|ï{ßãÊ+¯ä™gž©<-Ã`áÂ…Üzë­|å+_Á0 úúúÐ4l6‹mÛ¼÷½ïŲ,V®\ɇ>ô!zzzxâ‰'X·n—_~ySv"Bœ Ì0N¬ŠN–l˜œS½¯ÓLp¼ ‚ ÂÁÁh…ªÃbÕ²F᱂ 3…æ‡xúÉeô¸—ÅWuaž,+à)ëzæçŸáÅÍwð__ÛC’óÙíBZ*åP,¹qùìï†1Ìq²Ù,‰D"l“É${öÉïõ"ƒ‚¼m‡}âðÝï’#x]Î;æ8æ ð"Ö©_™µ•WsÓ¦MAëy\vÙeìæìùó±,‹l6Ë®áaNÏf¹ñ/þ‚?;ì0Žý·ã°TŠd2ÉyçÇÆóÎàÛwÝ…çy|û³Ÿe^>ÏMÏ>ËŠ+xÝÔ™wº€IDATë^Ç‚ ¸Îó0M“ùK–àû>w<ø PÎSw啨–ŵžÇûóy{ì±04ößÿýßñ}Ÿ ûöÑÝuaNQ˲øÒ—¾Äç>÷¹Ê…•+C¼ñoŒ2M3=Ïã-oy oûÛ¹¬,ªû÷ï|‡ãŽ;®i»!NZ€ŽŽQIʉOI’" ‚ ‚ ‚ Ì"R©ï¿é&û÷Céþðüì¹ ¨àyÐc~‹múùŒô¿€aœxÅ·¸Ø¸ÛªXëºÎùgý ?ýé |ß'“ÉP*•ÂIˆt:†Ùçr9^üýæ=xÀ]°s'y‚¿€zÁøÑi‹¾¾>ŠÅb¸o•‡Ó0 î½÷^n¹åºººøÔ‡>Ä'n¾™/½ãì^²„—|úÓtuuñõ[o%ýÕ¯rm:;N©Tª9¶G®»Ž¿ú«¿bçÎÜtÓMüó?ÿ3ï~÷»CO<Û¶¹ÙuI{…B!ð¼ƒPø»¿û; (zñ£ýÓ4Ñ4-Ì3 7´ïû\wÝuµ÷Á4cE„TånµýE]ÄÒ¥Kcyô’É$===œvÚi<ûì³MÙƒq‚Ð"´òÂ\Ž\èÊ|°‡§ú¾ïû … ‚ ‚ ‚ ´;ð×Ï?Ïÿýyþÿ~ϱ/ßÁÛ–,áï"a é4äØÊ[¶°hÑ"4Mcûc ˜þ°,+¬} ‡á8¹\Žl6 eJ„ŠÒó’ØÞBtE&ΠX,’ËåŠԞç…bàž={xòÉ'1M“ œCÞ÷¾`#MãÈnàg…ã8¤ÓéPøj–K.¹„ùóçsÙe—¡i7DòÆA Z–…mÛØ¶ŠÑÔÑJÞ£¥P"^5ªPôµºÿªÐP5ª¨Ñ5×\ÚŠõ!Nf˜F"›ª¨cbv¼çy^MòcÏóp]—d2I.—Ãó<|ß5ð–eáº.^Ùý¸X,†3"Ì ‚ Bgãû~ÌÓ‚•ïûR4A„YAõ8ÎÙ³‡Ã~s8wóÁž'X´û5l[¸÷mÛÆýá¼ÿÌ3Ø\°};ÝÝ«¸çžÀƒ,ZÅZ…MŽ%ryæ޳qja@Ð.÷÷÷“ÉdøÇ?ýSÌ+®€BÝï~7ÙE‹H$¼ó8÷ñÇá _€*ï°ÉT¢Îçóñ1ŸE¨"꺎çyÜõí»øñ™?æ³…Ï¢iZÃã9Žƒ®ëãCÆÖ÷ ’êUkz¹ø²R©Ä¶Û¶±ý×ÛaåØÇª©‚0Ã(±-ŠŽŽ‰I?ý¡çáaaMð(“<ǪrÓj™rKîéé!•J…IA!p¶,‹… ’J¥ÂØ|UÒºP(ÉdX¸pa˜ÓÀuÝØûªJšªL³páBúúúèéé‰UPSÌ™3§nyìè¹ ‚ ‚и®K¡PûŽã„Þ õú‚ FuÕTЇùÅ­·®bÛïÐsÛmüæ7¿ÁŠ´}Ï<ó x×»€JžË¨(džçÕõ€«æ=ïùÝ„D8˲ÂqÔU»vqô_ü¤R`Y<óƒðôÓOcÛ6z6¸òM±£Dõ5ãˆa‘{pÌ1Çpî_ž‹ëºa!…zŒ:Ì5yBªºE5vyåC躎ækl?n{S»8A˜A2dðË-‰am”Z(Ä¥»µÚ‡þÈ>Ç"—Ë… =°¬\}“Édذ …Ð3Í0 t]Çu]\×%›Í’Ëå°, MÓb³Ý–e¡ë:™L¦\Z;ÎZDeÛ6žç…Þqêj–<“ÉÄÎ=Ú¨ªš¦‘Éd(‹ø¾sãA¡=0 cÔp¡B¡€ïû†ö”ǼÊõãû~èéÐÌ€Ta&‰ q>ðÈ—Oâ¥ç.gñâ‹xðÁ0Í…hå*£ƒwÜÀÝwßÍaç }A°m:® ƒ4M“þþþ¦¼‡U›ÙŒs‚J ”Ëå*ÇÌdxí/~CC`YͲä¼óØ´iÙl–®K.K.™þ›™&–ÔN=HÂPzhôMÓé`ÐmTvPx[ŠÀ«­™´K<¶·T^–+ÿ l¾b3ë×®oê²Dˆ„Ä+ÿô[ËË«Å6ÕxG]›£ß}(ärdóy|‚¶ÄþzÝ:–Þ|3–e‘ÏçÃ…L&ƒ®ëaê*ë¨Ùçd2‰eY¡‡›¦i  ÃE2µ‚F-™L’ÉdÈårضM?‰D"Ö!ö}Ÿ|>O¡P¨É iZø€QB[´âYÔÅ8ŸÏã8Nøº§§‡l6æˆÎ† …ðÚr¹\ìø*©i¡PÀ¶íQ…:•AA:f»Ð¨t,Ô#Vù\b1Œò¶•YE}”cEC08Ìå*®êoš&¶T‘Ïf³áD›ã8xžG±X$•J1þ|víÚE©T =ðG«¨*‚ÐJ<`Ûà&.û»W“Jz©©qØüÁA~²`ëÖ­ãÄ×nW/ܲÙ~ õÇ4ÊyAµ¯j<¥œ&4Mƒ|>¨"¡iaEQÚçÑ ³,`”óá9ÏbÃè>‚jQ*žkê´5 HÅÛ­Y·ê™…Q>†[g¹A ò9À8‹tKhª Ì0=Œ€áà £ãà`´+ðÍyé“ObÛ6ïâ ,‚¾ï[6ndÍ‚¿œ‡-“ÉH$B‘-•JÑ××6°–e…!Ÿªñ-•J¤ÓéÐõÙ÷}Òé4躎mÛ¡ —ÍfCÑ/:s]MûY±Ëu]R©@xî* § O­ò,Ë"“ÉP(èéé óЩŽ~&“‰…Âôôô„Þ|Ñ¿™L&Ü‚pÜêuA¡mð õ~£/µLᎲzlzå×ÞǪzLª‰ÁzI¯Uå»hÎ5§Ä»'žx"œØS9dAÚ•G÷íã°—`šA…T…òX[62Â_ÿò—,Y²„c™y¦z|¥œ”8÷©½{!í”L&ÃÜl¡Ð§iPg’C‰uÓ‚SgYŽ@䲫ÖS¹Û¢x¢]õ²qÁN#ø’ÄŸc6µJ€§WíSk¾|>,]¹´éK8AhC4×x½ˆÎñy íp€›÷ïgKW[<Ý'žˆ*¼öÊ+Ù4w.']uÛR)Ìrè'6ª*”Tu`ûúúÂcEó ¨ÐQ˲b¥°óù|(€©ýnݺ5܇êT«õ€i†Ñ-Ó4ÃP¨äGˆî[Íö¨0Uõ`Éd2ô÷÷‡^s¹\.ìÜ ’ÉdX¦Z ê!eÛv¬l·òTÛ[– B…9òä'¿#AarèÐTs\½N3y…LâMKå=ª{ºe/ލ7‡®ë¤Óit]Çqn»í¶¿‚ ÁŽÞ¼™/.!NiX¦ib‹8‚•sçòæóúxzŠŽûéOšW½êU±e®ëâ8ùtš“­wÜÁ!?þ1†®ƒmC2 ¹\ð¿ë2¡s“%Eàѽìò²>*Ï%œT¼ÑüòëѼÀýòÿoî,Až§|y¿ÕÅT ‚]ô±e•÷cRñ·à¹¥Ï5}©"Ä Â 2žj¨ß}è¶ž}6ßÅå8’$ Ú¡Áùóéºë.Fà<À£¯x{6n¤{d„´a`ΟϿz(Åb[Ó0=/ %M$±¼,ýýýär¹ºIë%¾4 £a©g¨”ÎŽ†½6+Ä…ª¨¦(•Já9G…Á(Ѫ=*ß\2™¬ë§rੜ3Éd2–SÁ¶m²Ù,®ë’ÉdÈçó£æ\Pa±>> $IÆò ‚ Ôz“GŸ•*„špSžqj™ëºœuÖY­¾A„†¸¸aa>ØÿÛxÅ+Ù%ªmE'%rÏ?ÏQÇ9œ9Eçð‰O|"–Ö§X,’Íf1M“­Ù,[_÷:Ž=ýtN]±9¤zêû1nªÈP+dùåßêÀ¨:Ët*¢—ÊõæGög—ÿvËëö×| •TºjE–%DùoU¡’SÎ)¯k—ßWˣÜtðþú¿Zk›»-"Ä Â b`àáÕõºUØÀï€'æ=Ä;øwàð5‚ïz 8¶¿ŸÞE‹Øx×]Üõ'B×cñÜsÏqµi†Y‹²8U,rþÖ­˜sæ„ÅÂó){r)o±)»NÈ5þcáyÞ˜^eÕç8/4Ê}­<äThk¡P…¸è@A…îf³Yzzzp‡d2 UR(J!.W.Aä|-‚ l˜¦ÉÐPmrmõLw' {Šö J¥RøŽr0>gAhÔd´løÆaüíŸ]³N©T ÿ~#° 8i Žíû>7nŒ-{Ç;ÞŽ£Fž|’áw¾“3®¸Þÿþ œW)︩ÌõZS8ƒÞRÕºÕ"\t8‘&SMF–›BŸÐ gÕò,•°V£ê½h%E«÷ôV¢K¼øCuèlHŽ8A˜a†h\ÝÅ'øÞo{ñE8s„m‹ Ø–E?pÂÝwóË/æ sç0rýõ¼ùá‡cÑ·âq{ÿÂ…xšÆBjÃî}vï^ÀÉ'·n†¹Ùü.£­×LE jÔìz´h…Ú— WU¯•ÇæÊ3 #¶ž¢žiÛö¬Ïcãààá‘$ qV«OKAèÔóÕ²¬˜Gýhᬂ í†VVw<à°Í›¹æš£F]ßÖ ¦àØžçñƒü tp]—c=ò]ÿî=ïáâ+®(ŸhU8Ó ˆcµ·4è¥ÕY7IE¾ 3Šoêö$ËËªåŠÆAcu!NZ@#9¦¼îw¿cÛÈ™°©›ãÞÍûîömr¹Ðõøã@ÐY=öØÓÑ]——üt9™L0¡áû°¶¯âQ«*6k6Üé!ÿÇ. &; ü·Ü²Çr=ðXVX g”› K­ÑõÄ9‹Zá ó.)¢yÛ¢óøÑÓÍÖY¦D4¨qij 9(ê…Äæ#ÇV禎QíS *†O’1…¸‘‘‘ÉEœØìˆõJ0{Óð0lÛƒ—qÖ¶…ìØ±#Ìãæû~èm¥ë:ÇÿcN9å_¸é¦3pÝÀ›`‰ wçà ޲aµ#9XÕ÷¸°½L€üVƒç5déÒ1Í`©d2AŠþö}°¬`ÙÂ…ˆgÛÁ¯Â÷ƒmwî<,\–ËUÂÉ0–X71¯Þìz6›¥T*…û›3gNX96ºŽ*R¡BXUá •ûF‰uŽã`Ûv˜n6âà°…88˜˜èèåìr¥;r¤HI¸ª ‚ ‚pPàÛÞÍ«{{Ç\WHG>ñj …Bà`Û̽ôRÞ¾u+ï~äþqçN^öÛßB2‰®ëaꆔJc,M­åR_ü2¨-¢+/«ž§×©T(Ta Q’Uÿ«mòå_u#Óåã¹Ô iªÐB5ñBEÊCÏ®³ÞDî†BÜš5kÈf³ 088È%—\Â%—\Âe—].aâèù½ÚK·üãÎÜ÷£qè–-ðºùùüOï~:ôör]—óÏÿ B_¼ç=KxÙËþ”lòù@ìr]8Í€µ6ì·a™ M˜ŸLðLØ×{?žE‹vsñÅ?‚ýÌ+Ÿªe¢¦¢šïÃÖ­ÁkÓ Ž— œžH$ÑíÛß> Ç1°íà\•˜gÛà8‘¯¾Q²F³7Jà;úèS&ô™TçÍ‹R,Ùºuë¨ë `šfXuuhh(<_ÇqªH³›4ilì˜{xdÉR €‰)^q‚ @8I5ZÅôfQ“”ê7º¼‘·¼ª”EUGŸ­“f‚ Ì jâÙž_y8GÙ\úŸ“€}“˜´6M]×ùô§? é4\´ˆõ/{›º»¹÷?þ#,×:Ö¸¦)ôòF›ðj!K¡Â9£M«K šéPž „=5ÜSmF}úu–Õf)¡¯ú1¦~…ïLÕk=rQ]ç89`xx˜k¯½–Å‹ÓÝÝ VÕ¸óÎ;Y±bEÝ*ƒõaÕªU5Ën¾ùfV­ZÅš5k&Ö‚0ÍL·GóCª×·mÛÆ}ÿðضÍ9 ö=÷ìbÓÞM†æ'{晋ÐõÀk8› ªïäó•PþL.2àÅ4üuþ; CEøA Nï¯~±Úý=Ûjr—YØT &DÔþúûƒ¿5-xOӂתy(•‚uî¿!O>9M3)a` "ä©| –w™ ìÛ$0z×);k”cžˆ|…Bžßýî=þ<¢yâ¢èºÞt Í#Á@ T*‘N§Ñ4mFÃS«íxºÚb—øUBX‘Ѝ +¯¸h¸¨¢@mâs¨~”˜Ôí´:ûTçE§¾À7E°zõj®¹æV®\IWWÃÃà ³råJº»»¹æškšòŠ[µj7ß|slY.—cxx˜ÞÞ^r¹œÌ4 mÏtÛqµäc[‹Åò ­Îaÿüi.¾ø>o0oÅ*âWÔÑJ d6AÏ¢:ŒKÐùíÍ‹þ(ŸÿÊÉ|wøªð¼”'o5†N¦ÄP!±º¼þù[ùÍo^™g®Ã]‹Å@,Ìfƒuúû1ÑóàÉ'ßY®´Zñ”®Åq‚u±ÎÀ÷}î¿¿€çsÅ"|ãw°{÷¢0¿]xŸý@¨ žê•^“¡ÑÌ»¦i躎aär96lØ0©ã4KµOu[ìà 2X1"O«%òäÃpl@„8aÜHŸB˜ ˆW°, Ã0Â~ª†êyŽã`Y™L&V‘<ºLyÒe2™ØÄ–Úg3UT•§ºòbWË Ã ™L’N§Ã>—gºû‚0L·«þ°œ³à3áäüX¼æ ÒD½‰=Ïã;矑I €… 6}ìQ©7/¡ªŠFƒ]”G[Ôç@…wêÄ«¦uþ®ö®ÓÊÛÖóh«ÈŒ:Ë’Tļf¨çw–§V ›"…@`»úê«Ã…ýýýôööÒÕÕ.[¼xñ˜ùâÇ©Ygpp 6pã7â¼zõj©|$´-ÓeÇNDº¶¾ïsÇᇃmãùl¿eK¯{ÝñÜ}w°Î®Þ]àëvu= œßð8J˜+QÛî˜T ʨڭ .ä¹£ç²þÊ¥XT <(cHQÉa ˆ· ½Ê´‘éÈûo{Û~û[èêzæÌ™/QÏcÇòål{à “¡ë=ïá°ï~4 ­ü ¯9ß÷'ïרIV>U‚\4dU o ›ÚÜrPë1•\n“ XšìöÓÈ¡½½½Ü|óÍ,^¼”â[o½5\iõêÕìØ±£Fœ‹’Ëå¸þúëc^t;vì÷ „9èFcóæÍ¬ZµŠ+V4µ¾0ûfõêÕ¬_¿~Z3•v¼~ýzV­ZÅÊ•+cËSÄ=Õvvû}ÈÓÝ}6»w¿)ôØš3§‡+.¾Ã0p]wRWõ¼v¶>y<§õ<Ï CfUhªòÄ›.Û˜*ZÙ?žˆCãþ±pð²fÍÖ®]ËæÍ›§õ8SÝ?^½zuÝ~ÅVMé¾o?^Ž:Ì}Ž ]çÄo|ƒ½ßú_¾ñF.ëï¯ÛVEÇ$nËTSšŠÑ9Š$ÐG%ç›V¾øµ¢ÑêáC±Áñ§ (ÂL±zõj~õ«_5½þ¡+V¬`xx8,ÈpÍ5ׄÊðUW]ÅððpÍl^”›o¾™ÞÞÞñu FᨣŽbÙ²e,X° e7r2ø¾ßPyö}×u±m›b±bqJ`QÂêC5#¨Bß …ýýýáz…B!Ü^£X,Ö¸è+Q ]ÂçšeÁ‚,[¶Œûî»oÚŽ1ÕvÜÕÕŲeËj–«b Û¶mã~Ïãûë×Ãð0]]—°{÷¡aî·dRgþüM,ÅÀÁ‰|Öù õÞÛ‡«ND;çùX Í/iªµ B_Km±I¥­-ˆr;w~‚ÞÞãpøCTÒ èT<ìUV*XûØ_Ôx4?þxÅbàõæº'\刖Ï!«‰D¥BìÃ/⬳6Nê>ªï{õw+úzÁ‚\~ùå±ïþT3•v<þ|-ZT׎!(Ò$9.!-I=ôŒØÊÖi»ÂÔ³xñb–.]Êððð´ìªÛâ ÐÝÝ›ñžIrä0Ë?^è êã“"E‰ZÝ0o}\‚·Ð<Ë–-ãé§ŸfþüùS¾ïéêGŒH:&—Ë‘H$BQN…‰BNõ“Â" ª{ÖYõ«VW`Íçó5•Y“Éd(ú)’Éd(ÎU‡º6[¨®•̦þ±pð¢Úµi;Ætôëõ+Ô3^#ùN:iÂåjÉåÀ0È•õ‚ôu×qõ•WŽéi\§|’Ñ.¼Ø¢]•ÓH>PB\TP+Um¯ö«BQ£¨j¨³ÞÞ^vìØÑtÿøPõÇÊ•+ëÎL¬\¹²&_\5k×®eÍš5±¤‡†apã7ÒÛÛËêÕ«Ã僃ƒ£î ‚ÎÆT5öÓ»êÅŽ{žG¡PÀuݰ1Q Õá°,+tUûò}Ÿd2I¡P ›Í’Ïç) X–…išáì¡ÚÆ0 2™ ¾ïÇÜO£Õ5ûúúÂŽK6›%›Í†ù3ÔÌ£®ë躎mÛ±/«iša"Û¨‹k«èêêbùòåsÌ1ÓvŒ©¶ãcŽ9&fÇ*Q½IÐõ‹t_v[_ÿz¸ðBº»_Íð°zi~ú±\wÝqqcIî/¸ànàâ)»î­§ŸÏÑ[vÏÖ¼×Váü©òrÕþVËÍÉò:ÛÏ8„®K. ×SûÊ•UÈ~Oùï»V¬S ¸^s>•êÖ÷ß_¶òäö3ÂðY…ën¦„¡5p»3ÍʯãU[÷ìÉàyïo9ß¿·œ®ë Õ9hô»ÓÕÕÅÒ¥K9ì°Ã¦ì󫦑¿ç=ïáÁ —5cÃóçÏçä“Oµ=.Ò¼¨X¤ ÉòO>¾ Dww7K—.3oìD™ê¶xÁ‚,]ºtÆ„8¿ÜëôñÑÑCñÍŒÄ\äÈ¡¡á–,,J”BÁNG'C†tùGís&¿+³=ŸãòåËY»ví´q[ßx<äóùpÒ¸º¸‚¿ÔrÃ0‚ÕëF_×{þš¦Y³¼:?]Tl…¸fŠ>´­ìO¤_µýcAèî»›[n¹eÚŽ1ýãzý Ÿ…ågæ|óæíšš ðý ¬'›%mhšF¡P¨iÓšF…“F»ñÁÀ«Ú±¹‘çDx#ËÿG» ªCT´S8cì¯Céííedd¤éþñ¡c­ÐLC©’*”øËôàà ###tuuÕ¸£·ÇqB*ŠrC¯>?UÍÉ÷ý0§„¦i¡Ð¦ª7¥ÓépVÍóŸ¯W wJÌÓ4l6Š ¶m‡ž|jûéœÕ˜¦ÛŽ}|zÊ-‘Üx†{{áM+9ñ¹7òö+å—¿<>¶eA:m„;×uI§DTˆ³±ÃDø0vÓvŽ»ô·À)c®›fla5iâjÏ-]ʹUï!Êš]`ÛÂ…üáÔS*¿.ˆuvy}•§sïñÇóøã/ðâ!‡„ùéThì…YØYŸ¾¶Ÿ´Ñ©ò‰a]7äþïÿŽä™g. =är‹ä©¦Q~™é0 ±S*•¦¤-VÂÂx« Fm³Tž>SB„9ÙP…Ž¢ÓúQœX›<À~ù'ŠŠÑj›4i$è§Ÿ’$ñÊ?Š\\||J”°Ë=éì${´>>66.nL\·±ñð01C1PhŽN¶ã™ JªPýÈjÏòzëN12ÌD¿B¦›™´ãm/, kÛ¶oßN"qÔäN\U¢Óu¶% …£òdNÈaÆ£V Ó Ä¸FÅ TÈR½f2Wµò¸°êìKÒo!.ZÆ···—믿~Jf»»»¹ú꫹ꪫX¾|yMþ¹™F‰cÉd2&T©„­*”S×u’É$–e…Âã8 àû>¹\Ž\.G>Ÿ*Ã0Â<¦i†Bšúr¨}@Åã,J´ó Q5M³nÇ Ù¤ïJ`Se=™LÖìCS4ÌU)î*$Öu]LÓ¤P(„³š©T*ë” ©rkø¾O___lVR)ø­ö´k–©²ã¡¨/ƒi~¸Ž£ô—\2—k®‰¯oÛ媟x¡ÈYm7vùÇÄ··àõCÌ9õiNë:k\ÛÅ9×\ÃÒ7¾±F^щµ1óc£ŸJZ›Jø«A ¶€Ý'žÈŽ[xñ¸CH„ʪ‰W&XŽn1`@§¼ïTEØ´‰¼¨Á¥œŸ‡ÛO8“óV’ˆ»nmáˆÑ–×£žg{÷îÒ{Ý SÙ«"²ÔzKkÀÜçŸgpp/{ÊçlLب\ iq.úH<²T<¡Ïè‡ÿÊÂv~aÁiŸ¾÷¿ÿë¤Ó•ŠªžÕ:ñx¼áF{H¾üå/ŸÖ{>SÑOT𭇎N–,6vL„јÉ>E³Tò€š¸¸¤IǪ×CC£D) -UÛúø±B Fx #æ­ÖUBœòX-P A"äÔy*¡OáãS¤Hš4 ¡(§òÓ©ó©F…ʪcªcE©·m=Áî`¤íXƋر0˜J;Þ:o+.¹HLxŸË•ûÄ屟WvÖ) M‡Î*gœ†ç BG *ɺ•T’`ÐæT­£Þ«9$ uB_ɨѵª²ØTáº.Žã„žXÉd24De¸¥R)4Ìz®ð†aŒ)VEÅ©©fªÅ¨‰„€š¦†¡ªkŒ†ÉªÐ]µÿþªÒÉJdSrÑBÅb]×k„8×uʰ¹\.ü|”7c'S3èð<}ï{©W*zV£4Kå¥ulì¡'V±r¢ÞIÓU8Àƒ¦†YZy=£jY£«Ù¾ý8ŽØ½8¨Lè(ËS&å Meàú(RŒ‰‚ý‘ŒÐ^( ³U¤Ai4`Tê=ÏóÂeÑí«'áMÓ¬»\M;ŽSrà ‚0Y2dÐÐxqÿ~Ιx¾t×%i -ËÂó<òù|X¼±Æ\¯ÚûÍ¥20Sƒ+‡`p¥SlE½äFC§~hª”=âf ¶m‡ÂŽïûa tzª( •ÒpÝÉ hÓ•ßLk«ž&“ÉXÎ;`TÁêΓëºôôô`ÛvØÂ}Jàëëë‹u’¢sj¹V;™@TðÊ.WÇ?p&ÉäPÃm” ˜˜XX¡¤B–Ô¨²ÿ‰yqÄSz­ªÂu3g¢„´f˜ÿG/püÙ›Ð>¯ðjR?ߨڿ¢D0‘3'ˆœGh(§,ë›ÜçÀ=å³YXã•S%$+¨mC3‘¼ª°ËlC‰SYIqˆ¡ÐûGUSMCä bž6*§•Z.Þ5B+ˆV=UfI’a.6õ=11CJCÃ(ÿÔÃÄ ßSÛGÃW£a>: -|V¨c@PÈ!O?ΔªÖš'†~§«¥–Ê?Nr«Âb£ç­PÏ*=&ªb-Q¯¼9,:{ÂM5᪠‰%Êù\×m8ášÉdBïÏóÈd2áßÑí£û¦©^®i‰D"\§P(T¼OA&ˆ…<×v¼”ßþöOñ¼ üvܳ\Eéa.ûIR+/7¨ š¢Ë¢œzšâ„Q =âYµjUìÍê×+W®lõùŽŠ*(ÐßßO___XÕS=\ñ‹³yö˜%|ío¡Ë„{ups° x½ÑØ+i&Q ùÛz•eǃò4T³˜Q1N‰*_ŸZÞ¢ÿ{ž‡ã8œyæ™­¾-“æ‚;ùOÇÁ0 ~ÿû7aÛc¶ÀÄ$GŽlÄ0…ù‡’?àè@)?Îé‡#ßò–i¹Öf¬G )ˆÓÿòq¶ì>©ìW¡Ø`¿Õ(ÑowYùÛT•¨ôËÀý>äMøI1Xù~`ƒ†V '™„¾¾æBU§Ãcv6¢BTU_† yò¡÷”Í«Äòj]•ð¾Ÿ)(w+Màà„9Þ °aå ¦*&IbbâãS ˜(T¡žÄc%º)ÑNy¦u«£ªêªY²¸¸dÈ„ÞÒJÜÎ’ CGóäÉ‘ s3VWJ­ç•¦„è÷\4×*¡^C R¦H…ç­BlÕ1òäI‘ +˪‰$•nA˜½¨œ·@(¤5BU0TÞkN¹ÿToR8ºßÑ–[–[–L&I$S–jF„ƒ —õOñÌ3ç4Φš­ýý¼òþû)FRBÕLì+Oµ1øÉ×ÂEÏ__õTS^qÑ&U%ÖVGI$äž\±v²×ÝÝÍàà ƒƒƒáË—/½ng”Ç… Ôÿ*¤ñ›lØðOlšÏ=ÿ}Ÿÿ[ø¢/ú°Å‚çM¸0ä„RíV1®ñŠXLÕ¡t:ݪ¯×¶m<Ï‹y*ï@ÕûÃþ0cç?]<öØc¡Wàï?º}ÏÄä~îgo óUS¢4î¤ØÇnÝÊË–My0Pu†Ñhv½#vïæÙŸ¾$xqÎØë×{FéÀ64 ¶T9Y>£d‹`¥áÜló‚kñ¼ WªãTÄ·6ÑÌgœ©ô„‹¢•›J ö•—\Ž)R¡ òd)Oñ¦fJ­&F”ئˆŠZÕ¡©ÅºS„m»²g=\VýÝ«žtQÅ”gÚkµm–lXˆ¡‡ž˜¸gbÖ}¶D“!SñÒÆ%K6ôúƒ`2HyÉùø1¯íhˆ¬žƒA‘bèå*BÜÔ¡R2´øXNå ö}Ì~¯mÛ¤Ói4M ûN©TŠT*U#È©ýBU¢ú“ÑåPI™Å0 Ǒʭ‚ L5™üû;(îG×ÇÝöøãø§nbá¶m 8Áʹ(FŽÀ“a Öß½žÿØøñ…qï·Á€I‹¼¯þv‰‡I¸é”q(ÀŠ+X±bE«ÏeÂX–…mÛ±‡¹‘N¹uGü ÿ°ówl9B3œëÀï pvŽÎÿæà)nµƒÜOQ[z‡z^PQ±ú»àºJÁÀ@å=ß¶Õ´àïV?ÛUNV¡ÂQ£aÂÉd’l6 tÑ÷Ô̤òŠ3 ƒûu7pD`;wîàW¿ú,Ï=wò¨ÞpQ›11¹‰'XB¥HC5QŒfYþÒ—òS_Àf:†Q›¿·ˆí®Æ¼“÷Œ¹®APe»Þ#°‹@L;ß„ŸE–÷0¯|[M`ƒÛ]بÃ&ìÒ‚ï.„ ·8z¼¥Luhj5J¤P"ÚUB{%¨¾qÂL£Š1(Ï·ÑòF‹94“_1;ÁéfåYZµ¼D)ôB­>ÎXÕê»Y ÐÐKÏ ßÛË]{õñ“$Ãj¯Q1Nå®+RÄà óóE÷¯Âd§*wålAU!oõ9D‰¦ëd¸¸ æ•U³¨Š*D»õªHCØZµãV?Tf ‡F_ŒŒŒ°zõjÙ±c½½½ôöö¶œÀq&üHƒØÓ¼$üÞ«„›)o—\®"¦‹Áþ+1>##g†ïiZ°¾e'“ÁßÊË.• òªÏáºÁvº^?ÄÍ÷ƒu&ûLWçb¼vš¦ÕÌ~ªUd#šxWÍP†A:æ[ßúÖ´\ÇtS©Vóóòù<Ÿýìé,Zô{àŒÆÛ¹á×À cfx¹oÙl¼âȱÇ;-×<“%{6ÉÞ7Âéç<œ2¡}hÀ;ÊÑ‹;}èÕ‚g“ÊOª¾«Yàrž-bnÏÁ/óAXj«…õVÍA5Sèèa"üÑrl ÂT£Â£MÌ0ô3I2 9mFü‰†‰6ÃxS Œµ]4Ïœ¹&z hêüŠÔVAmòÂĉ刻öÚkY°`A˜¯add„Ï|æ3¬^½š|>ß–e©}ßgÞ¼•|ìŸ_Éç?¸„ÿ{¼Éƒ\ð8Ô€[þå>î»ïGÜùÂIdùe7y³"ÊåòH²óÿgïÝãܸê»ÿ·c;¶sq<&Äql'élYBJ’ÙÚ¡ ¤´#ZÊÃÍ …–@Ò6HŸhK+µÚ'­Dy ÛM¡M€¶ µ.PÂï …„›mvÂ%$6$;$±ÛN²ŽãÜõûãè;3ÒJZio’vÏ{_ûÚ•4w}çÌ9Ÿó½˜ð—(ï·±1Žæ8J‰{Á|ò“ëñ<{îFK§UÎ/ôâ÷O:­BÛJ¥HŒoùxa•G]Ë¡C«õy:‰ƒÓåâUO;Åu§ÞWc¨él`šfØ©J&“är¹°òí%—\2«ûšOTe;°dõY[Ù¼y)íD8P…EÒiõ=ˆgPŠÔ”ƒ‡©*M*›lŸÝo¼œ¡ÿÜ \ÚvÙ,­+i Nrœ@4$› ¼Ö„‚ oõá3òÕ}aê~ÌåÚç÷y!!ƒìézë̤kN3ŸH3)º ¡yò¡4RX¡WHž¶ÙBÄ­fçéãS¡2éºtê ˜&Í#¡—›À(S'Ê”qq)P«×Já ‹µäþµB"Þ98á{šÞP.—ë¼àòù<®ëÖ½ŸL&'‰cÉd’ññq …B¸œçyadE7û“BñTí§Ñhf‚Ê¥ Oî~ˆ 8£ûõ¯ùG Åó¿üËX–’i*ÆPƒ•©«n“÷gh¤ C|à$¡ªºŽÍœ± ”à¶mÛ6®ºê*®¾ú꺶oßζmÛ¸þúëʤý„çyœrÊEÜö³K±’p¸ ^ö ÁõE¸6 žû 6ܼ‡"ÊÖ "¯y̿Մϖ!]Šåç2›¶÷ì¹ÏûV¯~sÝûâ {žóŠÅX%ÌÚßø¶S)8vl=_þòYœvÚjÞþv%ÚŒDžQÒ—h ¡m …©…8˲f=ÜÕ0ŒPˆQrv¼ð…/œ»žGŽYV;¿öËÙvýwœôòÜí½ŠtúWgíX,úo¶¼g?w7»½Ëyü¡“§\6žþ Ùg îÑ*çh³I(xpgî(Â¥y°bßG;Ó_¨úxΧ^Ñëýkñ¼kâM%tjiÒ=f;”¼•_¤8ãv"^ÁÕãBƒ¹Ð6žOÂV BÏ·"ŰšmŠ@X­UŠHèpöùǶmÆÆÆ:z¿1_[\”“¾§ Pã9á:ÙŸeYA€çy­=N4‹Š€Î ‡iê ˆúÐñëW®½/Z‘Ùd=‡h î²pxî®U ÿâé¯`D9e¥í+•J­^|:.Ø€$ ¬gÖl“Ùtíš9å8€;w2<<^€0Ák.—#“Éð¶÷ep¥ 7áM¯ÂŸ¦Uƒpûík¹õÖ×01¡*n6k „³mxOü¤š}`h&A®]ûSš:W¨û«•cLÜËβ`|übÖ®½_ù•5€kÆÇ•Ç\©…½ŽŒ¨u õ+ÿÇŧMÓu•ð׬b»ëN-A$6ÌÔë'ž.Þ‘²,‹±±±QÅê^r]šSWêOH¡\†sýö"\·ƒŸ$ Lɧ¿±‚ô0C'¿lÚÛÈS_è§U.Ò𳬠VÅòC¾¥¬¼eAyÏ6>'š'\¿¡½Y4óAŽ.n(¼Å+‚vkƒšºÐ‰‡ÀÎñ‚‹{ŠGbܳN<á¤øC‰Y²$I’%K… Y²äɇáŲœ‡‡k-¤áßâÁ²¬?g[…²j.j¼·Ê(ALþh¥X@]£‘د¼Î¡&µµ÷¨kj¡ÆÒò™|^ˆýÊò©^_ˆY¦ãæ)—Sïq'—¦û2hiü¢r4¯Îc@] qŸÉ)ì¼&Û+£Õé9d¨°Ôááá– mذ-[¶pàÀ6lØÐëcÆó<¼ZÜôTÆx­Ã>¼" – ¯o²Î£®eß¾© R˜€ŸŽÂÔD©w‰¼3+(Û]qâ‰>¼5kƧÜn§ÏyË‚øL.¼ð.¿|r,ÃPá²…Ð¥ÓJ”“{V<ï,KÝ×¾‡Gß[&yÖI¨¬ãÔç¼’Üd’ÓNBm3%6êb3ñþ±m›B¡@ BpkÅ£  -ø>Ç¿ÃèÌ.Dà‘ïk1süSOñÔ÷OeÃ÷Ïêv ¢Y»8°Ò¿¬ò”ŽÄ>»7fòŽÓ¼xC6›­«è¦™=æ²P„F„!Œ’÷Åëä´gæÜ"Õ[«º:8u`! ™NþÊré¤àD’$…x¶×g«Ñhæ©&l£Æx*B"Çäœõý†H6h‚C}îc95ùž&r¦ŠG7µÿÝØµ‘1p:vDÿ±¨÷j“tOÇÕØ .‰}¥Øç à¡3Ïìõå6’ªÁŽ{úéÎWl2Àk,BÙdgêB7Î#e™,°‰’…ú²ÒD_p|ÜÒÌ+£“ÐWÍ´YÖ邽>ÖâLÓäÿ”áEÀ]°JÍ—Ñ‹Žðàƒ'súéS{ôÅ‹†ˆ½Vˆf ’¨†£ =é$ž|r+W›µs³mxøá³Bï½N0 %ŽIþ8ßò»Éûgœq W^ù-n¿½@6«<ç¤HD2‰=¾¯Ä7ÃP⛄Fú~äõã8]¨ü]àû>ãããa•Ô…„‡ÇcÔ^µ/yÉÔë•ËõÅBJ¥¹¹ö‹"êžnÆyÈÔDny¶ YàZð»9øX^}?rÏÉý§rEêp—¹@ šù Cf’ #!—žƒ{€M–lÀK§Ã#§<ôúÌ4Í|1‚ŠL”Vš(MK²öž¤ÌjÔ*æÕMÆöGƲ¬`*é–ŒÚç²tì“H4ËÅ–¯¶9.ñr“qq\kÏž—(þ2» ·FÂ"âÍY]»„+žàÈ…«É ø öw&釆þƒsÎùø¬_»Ûo_ËÅ·nFËåÈ®Ò lŠg\:…ª6 I‰Ó&ž#n¡r7—Ã÷ýI¹ßÚaÛJ|K&uhêš5ã¼à3¯ûœ$JkjBÜÝ-·¸9{^ó<ŒƒL?x¤iD3Äs“a~²~¸;ItØÿ â8žçáû~8±>ÛE¿4Pã¹€(‡Y…úB}Ð\\’â‘-ÑMî²2Q.4Ùo®ö:Ê ±÷×ÖÞÏÖŽ«‚¨Ækï¥kÿ§‰ŠX6C¼ùæ9îÙ‚,4Nðs_skgã<×­däóyxàå׎xŒxó) r<<5¾©x^¹…ú Ë *Èpýõ×óªW½Š-[¶pàÀ<ÈÕW_ÍöíÛ{}¬5#M©TáŸÿõ¥$vÀøøÅ;¶~Êåd¶Áló¹4†eËàÝûûü/”=gjËIvŽÈ£N¼‡‘K´Y{? ¬[wÛ¬_7å×¾Òd¥ÃŒýét}búHþñ5Ë ·ÐBTA­ó™ÄqšWâŠì4Ø1óÝao?Ed·€»lXØ…úê¶ñC åEº¢T{p>Oó‹é¡sšÙAr½ØØ$I†â¯xÄiÚ#®÷¸®K¹\ƶmíRËdšf#¹„Óé4A„O-ËÂ÷}|ß'›Íbš&®ë¶ý\³ðÉ…sÆ+ºõ=G‰hÔXΉm³YOD¿2Q.6P¢žì;Y{¿™÷ZÜ:uTà`ößÀ»Ú,¨YúrYUcDMR¤ÓiÖ­[7uhjc̱à%ôŠE‰^`ƒJ˜#nÆ ÜxãìÝ»7@lÛfË–-}Q A‚€%K†ø«/ÿŸº®êðyºyó6V­:ÔѲS—åóÓ_{‰ò×8ý`Šü9ç f9l¢dŸ"ȉçÙ~u?ˆ0÷,õŽãÏçÃqL«Ï5 5fŠûÌD~•Â"®Dc7Ia$c@ÑFÚ…q6˧¯Q,„b ¾¶ýÂ2c?6Îäwåh CæfUæDTˆ‡õ5.£ °/˜T¬axx¸orÁ5Ãó<~øÃßãK§nä’Íð;®÷ðÃ_fõêÙ[yì{öüëîXçÔ»;§Qå˜óD32ÔÃ!OÔ¨KÞDbË>sÒI<öٌ͚¨í¨¯~#3,åÚ¯¸SjëˆøøI'ñøãóðÒÍѵ¤óðp _7Í(–eÁ~´–‡~Ž»ï^çEÕW¥Z«¬3Uø»aá,¥ä Y¨9ã²YcúÂŒ(³ óÒLI¯m"M}BÜÕ¨ûïïbá«â-ºd‰šä²,õŒÍå–WœF³ÐI×~4 ›MnšùFú˜€ ç¹=M̶áÜR]ôl«ÔÂ5 …¶m“N§q'ñLÓIJ,‚ ~d9Õçš…ÇQèg…Ù«|*Q€RÔ¢œs.Ñ8Lr‰OE³|úum¿2ÀÅ||LLF«UÝûž)NÖRä˜ÓI±›$àQ˜^ãØÐ¢u:@§zî:®šjYV_äpp]—Ë.û'šíR6"É`g›‰‰ûØtÖs“ÞOÅÒK¾%j|e¶DÆæRâ»o}+gþä'ü:“El)Em¢<ëDLË¡D9¯ö¿[àž/¼„§?øAž:õ:Êµå ¨kOZ J\Ÿ }àR;òz“0ÖÏ~ö‡ÿ GޏXd2õB]“ŠÌM±m;œÁìµØ2xx,«=žg4¯Yž£uÖÖJ¯½$ã: 8«öÿsH9ÏSžæ¦5‘jà AˆóðÂJ’šY¢›YÍœ3ÄET‡Xç"Ô :&fß{pŠH†ÿ}ßÇ4MFGGñŸŸõ‚'Ÿ|2cÅÙïl»\%pµø<ÙðÿW€#/zQÓ©ñ!/y-¹âM›YÛ÷’%K¸ãèQ êFkÿ‹wDaE`ˆ ¬ì«PÛç3Ë—O<1†àÕÚX²Y%Jt¢Ø¶½ óvŒkkß^"Ñy>¾¦ÄÍ8îz¬™W à_òê~ø[àÞ4œíÀÇmøl^V»Y]WÝ¥nfúœ~âúḤš´T:žr^Á%š¾o…¸D׈UºW¤ènfJÓŸ…_ŒE‹ëºxž‡çy”J%r¹Éd2 Q•ðTÓ4t_Q3=ÖŠœíÊžÐ~Î{¬› Í"žç…¢–išär9ÆÆÆºš°ÑMr(ŠðmYk×® ½IÅÇ0 Êår]“äc4M“J¥Bd2™0Ä\ŽUþJÎFé$/Z3an¿Û—¿ühÇëLËÑ)îä"]ßx%Ô"jß*¡žüí JˆËårÓö&JÏA¹ÉåË—w”MŠ;tÊ‹_übV®\ÙÑ="Þl­¶o¡îÁýýÓÜýÐË ¶¼¤hÑ}¢{Ó¦~l”Ô²’çî£zpçUïXÅ™üœ…¯Ú¶Ê+'5☩Ôdab!w®á“ʾ?£\zäQe˜ ò•$ÔL²”ºØX†Gjnâ§'áž²}σPÿ»îÂðŠ[Œx^äáhY*W¦ë‚óH[`ý¾ßGµâ\ÊBðß¹Km'nÕ7_µ•étýÄ…ã@ð]È~2V”£3‘PiEBÊS|ŽöÉOµG^ˆ„ð98º2ê"áõß~=ÎéŸ\È éÇ‹W›ü•Ôø2¥ZgQª¬6FMHèªmÛá¶âë7û\³0ðˆÆ-EæÎ£j¶¶+U‚Óéô´3ÇqÂõâ!×™L&ã,Ë :×LÓ$‚Ð!%›ÍbÛ6‰D‚t:M:Ʋ,FFF°,‹b±X÷žçyá=•N§ë[’É$Éd’\.‡ëº8ŽC±Xlݹ\Ã0(‹är9‚ `m*ÅsÏ=Ç ãëcß¾ :^gÆí’òމïšUTLô8°O(!N¹®Ûd2@aÔsJ‡ªJ7öþÜ…r^ËY(G ˜zhÙX•ñ{Ïá¹N¨»ÿ*DÂZüþm —’¨4 8&,[ýÏ?}¿dªq^–(5¢©´Ùž§~°æÖ”õµ+«*}ÍÒF¥H7HLr'y>}™È">×kDd¾(ášü†ç÷ºàÔÄ›sòh'›¾&žÓ2—Sß[±¨þ&J “|—¹œÄLÒg€ý𯎄VéSxŸ'˜¿N¾ŸÍªvRÄ76÷(êu¹¬–3 (½üƒó®³m5™!Çc^ͯé­! w«Û&šÖou¯çPþ"Ð¥||ll-Ä-6=¸‰“Ï>yæÒ̈©£ýòÜ×ô)¢|Øýfâ9&ÿÇÓï …PØo4ß÷C¯3ùlhhˆb±H&“!NS©TšÞAÍfqÏó(‹uË9Žz¶A@"‘Àq²Ùl£J6› Y„4%dø~”ٶͰ/cšªïcÛ6×]·“§Ÿ~®káºêsßR³H߯4 <¯ˆU›`<묩V¿Çí·ßÎþ‹.êõW7m¤(Îѯ/ãÞtîaÝîÛ‘W¾„·I³)¡jñ~œ‰Ðë®Lß² ààÁƒ8p ×Ç2%JAŸ:ñÿ|²æ-oáŽ;nÖ¶g‰kSa98M¥•ü|6àþC‚Ÿ¾œ1–ÖmC*A儃É÷­ˆí”ún þeh GWqªAQÈ0ê=€Dˆë&å ¯¢óó?9û÷Ï`cõ.Ⱥó÷ iû^ *%.–²¯øý&BžL=Ê2’­vŽ{B2S×/rÕùˆBÏ5à¿sð VX`ðò4ü}²V˜¥Û˜zͬ!d\7òÆuõ+“A  l†ê@$^ iÒ‰¶%ixµÄ‚Æ%PŠ•äuÝH³ÎÌúvT¼ñ, Roª9»ÝîŠÈ9ŸåöQ÷n«ÛF\£[¥'‘ÕEЙóñ±°ppf¾1F£ÑÌ âÈÝosD¹\Ïó‚Û¶C‘¥R©`†aH$B/0ß,Ë =Ë⡊"Ú5æ+—£¾ˆ$ùOú2ygYir9Õð}ƒGÙñcw’LÖ{mÉ>|?šNúù|”vHºâ® »v]Ξ=ðÊW^€ï«md³ª%Ë%“Ñ1ƒÚN.–µ™lv3¾ß:q_¯¿ÂicbâO?ý4§Ÿ~5-ß@.å2ÿÎïð‰¥KÉf³$ Æba q‹ÄsKI_OÆlýtƒhêX°sçNnºé¦^Ë”üøÇ›xÍkÔêõÁ‰µk•´?tê<ÓM”áŠ5ãœú“+„Å«Ah^P%K”ÎÈÒ|jÙ2–?{&Ï—ÁO×G@ÆdÁ­\ŽfE$ýèh“óš£â½ÀÃcS­¼çžkؾ}+ oRº³!.ŠQ–J’À"—H¹Üâq* º|.B›A”ø3ƒúÂ"¥v-Ê»&n˜¢ELϪM˜œaÝ%3cl»Þɶá³Ü›… °<ؘŒNÿ¿s¡½_éuõ½vLxç’È©vG:‚,ßU>‰oé´úM¥"" ô`~­ÍN%¡&Lò3M°_F$fgÀ¾ öyÌ7€!r¤ŠN¶ÖnºüUàËLò|µíhBJn ›UZQ³WCRÜ ð¿¼Ìx ³:­XD8Ÿ,Y]¨A£Ñhú™[š«a£HÖ Émèº.étšr¹Œiš”J¥¶c˜J¥æÏÃwÔ„ßØØ™}T?3ÞldLwаÐq}Î2€íÛ·³}FêÀü𢽨±Òï‚%žo±iš‡OÚÞÇóð½¿Àñ/}ªå2ò0“¶xºËÿ¾¯žýR*î­?9wýëøaZV½S„aÔ÷o;½ífšÝ¶áèçïš­¯²gÿôÓ¬;}hònÔ±“ŸfLJ‹!žm—׬[©yäCžö­šžrÀîݻٽ{w¯eJÆÇ/^¹ÅD?éDoÌBGEé—0qô<Ž}û„ŽŽAô‘Fä˜lTC}®'pvmáFaPE$‰¹mGÞ(ÒÐû~ô€,ËZ0.É`«Wß=3!¹6À¯óÉw™$Â…á° ¢Y%rÙDb”l3O$"ص×"P(QMÖåÑ6Æäñní½<ê¡Q >Ö9A¤æ¦ˆfkFjËÄ>wkË7º‡æjï9µÿGjSµýɵ‘* ¹Úÿ#±mÉßLmrm¨ý/¯}ê]DƒØòr¼#„b¨iÀqÚý%Üô1žŠò¥žnÁW§Qi±àû°d‰j/ %*‰`/9'_þò[Ã|nâV.«eV™‡H§£âR ’çµ"¾Ÿ÷½ïq #ê™ÄC,‰·|ú–Hx¡ ÷þ齸®ËS¯|Šû®»'>úýîcÊÎD˜ÏP™±”$›5áÜûðZ`MìsP÷•ä'7eP÷]ލL¶—ˆ|9Ôñ–!³¯lŒ“ÔÿÎ?B9£[¡òC^©‘Ý—kûµÃá}ÒÄcî”ïžÂK¿ýÒ^›Ì¬"á †á4š9Âó0~ß©Í?l’$=—Ë…Å5£Q1G»p9‰Xjìºt*hzÂ2€={ö°e‹J,¸cÇöîÝË7ÞØëã ‚€U«a-÷ÊnO±“ᆣGyúƒã¾µÞ1{ÇgP«Y{?zHBôÀ'ÊP*)÷gÛŽòÈ,„­‚ÊwÌZUƒÞ°‹C‡qÎ9®˜þÆ$'›| I&Ç:> Þ¤•T?^)µLä}…”š¨†:ETC¦+±}eh_öº1äT<ˆäýxî‚ñÚßxlA<—A†¨"‰LV=•B/습å㡺•Ú9&bû–ýù±sOÄÞËÕ¶?9.ùÄCIjÚKÆ` †‡á©Ûµpñø£<ÊÊc𸧶xH/¤úu"¡î÷¿üËßbݺ …z™ ”Û_:µ¾Ï<ó û÷?ÈРͰm0 ðÈdà`òµáŒáÈȈªf¥ë¦Î‰Ùl–LÆÅ÷}^wÙë8ïóçññs>Î~í?9鲓øÉîŸðìºgùûÄßS¡B.—có'6sé—òO_ý'>óÝϰé¸MìøÄÎzà,v‚ú…bù»–óታööµä6åxðrØ:Œïû$+µåŒê˜g¯¾Hý”¾Ø–xÆUˆÂÏsDžªâ¥)â^<_ce¿""`¬„Ên¢œ’xûÁûxÙÚÀÀ$å¤Ó–"û¥½¨yæ­Þ³šŸ®úi¯ÍiV‘üpfnï¢Æ Œ‚çyär¹)Cü4‹qTÏÒ}²Vø¾нù|>ÌëGª‡ÊûA R?LN«c†¡£–¥Æ4¥’ä=Ì’IÕe㑉¦ÉÌ"gœ‡:³×‡0-$mK76ÎÅ¿±8»~!Ï Ýs¹¶m7µ¹¦$©OOÔH«àš¾f`ª¦zžÇ+^±WÅÁ÷ú`湸—ª§.åÔ7>ÈT¾vfÞªžBã³çŠŠÿ‰¦!ÑXâ!ëÙôä‹iÖW*´í(ù½µÀו:thæÊxÙڸ痄©À#À਼xµÅ3ÚÊwÏ1g2ùËc£Ä©N¿Ù_cŸvÄ0cDƒÿ“…Áøq‰Ç^9Îbm¿/ãךÏ(‰(/^/?ŽãÏçÙºuùÈ!>ÿùã±mÕ©õ¼¨(—SAÇÉaðüó÷°~ý%,_þeÒiƒ\®6}\#m°wïyðú73T ;Íår™½ßÞË[ªoáuÎëÂ÷Ÿú‹§(,áNüÒ‰üÒ™¿ÄÆpü×?ÿ7_z3Ͻì9̳L~á nಙÍ|nÿçøïwxÎyŽd2‰óZ‡wŸýn,Ëâý?&ØpíÒkyÍݯáoÖý 'ï<™Ón=?=îO ² ÛK¼ÎVÒ:€¼·ÑJÀv‰f?E –\z.íD,|;€éÔr¿XÜOåK ½òÀø˜+Qv/"u–Hð.áwb¿7“Ê3ýGG ’5ÍŒI&“aBúr¹L2™ '_}ß'‘HP,µ§ ) ºÀf.i‡xXzžG6›m›{Ø0 lÛ&“‰r²e³J\“BÝ#"šTWo'ªåµ—Ò$|îºë²^Æ´±°ð€ãŽûÞò–&¡c4$„¹#®S[ÑŽüÇÀq@ø@î'?©6yåûŠV?GõÈ2^xÂaàÅm—•1`·÷³\òjyÇ•[.^Í/z¶û[×þ~÷»5ýÄ£2ÄsÔ`Y½ûÀk€f‘擵BwÌ…»f;$á`Ü€šmsªçX7•eK],Ûâ\ŹÎŒå½\½_8Ì;à‡g挟‡7Ùe^>ó`íÚG)óMs¤d2™Ú`+Í­·îçÃþ |ßÇ0 ²Ùd-lÃ&—ËaYV8“Â-·ß>Šïûøþ«Èå2ضiš¼ü¶—ó—Î_òÜéϱì¬ûI¾Bu¨ÃcXÏlz†Ÿ~þ§|ý7¿Î+K¯ä¶Ûnã Mþ¶v€"8™pÕUW —þÁ¥=|”â‹aè€išä½¼²¹[à´‰ÓÈ¿ªá|3ð¿÷ó–?~ ×>~-é÷¦Éår¤Ói\×%Í=·>Â7¿ùûxÞ»'O<ØLž/±‰¼H=”q•P1:²lº¶œ„Uç©»7ÊåætŠE¨VÕ "‘óC`ŒAå "Ü ê-xv÷³t±QcZq/â„(?B©¤nžå«õÃX@uðð˜ ÉúcÇ€Õ¯(aúèÒÜ™QòÆÕ*˜Ú–{‹«gFzL<¿üÜó%¸ÿîóá½>ÂéÑ*wV.ããϳv­×ð~ŽÞÆ¿þ«ÍW¾ò+V\Éöí·‘N§Cˆ\.&â–÷J¥®ë†‚[¼rYäóy ÃàîÕwãoó9ûì{¸ÿþ/öIïÂöí¨„s–Ë9å§ð† Þ¼òÛ¯äñ>HÔØ¥‰” –e166zIœ»í\ò¥<›6ý'Ÿû“è DÌOyU¦Œò®Þtù&u_ɤe–zO4 –þç}løÔ†ú“ ‚Ž=CãÞ–e…a¹ïüîÙ|ëÐ*^ð‚+°mÛ¶q‡¡¡!\×%×Ä¥Dò(IE·¶Ä•ßøÛåYÍf£ ¬‰`*oñ hÈ+<ðÄ…8#N£™’Éd–Úês ôF.—Ë$ ‰„.ܰˆðP!Ós¹®æŒ-‹-E8×­ÏIõ…å´c¦¦O÷ý׎ùæ Åú²]$”q ¬P_XK3°,ذa{÷îeïÞ½á7n¬{Ýk|ßçC2€s€þKç€Ù>¿_©V!Ën}vÖÕ£~Ìg09¿œxÆÉãÏAµ#‡-H×NVzOÖ–1ÍÈå;V?°¸®Û6ESâ%t“Ô‡§6"Õ“Àéµ_MO‰{‹J‹kmøìƒ_~³×‡7ói§$X†‡ÿ›;¢eþñÏ‚à+|èC_åŠ+<<Î??vBâÞf¥R‰ïßÏIñÊMÝMEŽø Zúqº»² X°uëV¶nÝÚëci‹ H4Óê%óXwâ‰s³ý>/ Ú€¨DÜáMf›Ûµ\ä±b©T}U¡ABÜßrËa,« .e§5˜:9bïªoh]ÜŒõY•- çEI #ZtT¹‡F}L–Ä׋bU(ð¡?ùξè"Èå8õÔS¹éÐ!¬D‚%ÿ€‹/~ˆ•+H÷<Ø]o Q¢àxþ¸A#¨IqëÖÇTe“.š$_O¹&·Â@7Ì}„Ê­o#ZEzÏS÷e±AàòÍoþ Hs啸⊱nÊæÁ(—•û\ eQbW©%iÀ…·Ô’³d³*qÑK ÕhØ6œfï$yùƒ/§Êóµ’Ñ(ÌôS­S*)—°w'áøZÃruRU)A­K®[©¨ãwÜJ¥þØÄ+N(—U/ß¶Ãý}ÑEê³|ž Ÿçá /„l–GžÙÄ¥ëïäwßÅyº®ú?—ãuwÝ…çy|àÝïæÁ]»‚˲BÎu]‰{þñ¹ûÿ±þ:ÎÑ÷¼Ãr¤‰KÕh4ӥ̋ÙïdlЪë©T Ó4©T*8ކ.W*•P„K$T×#›<Ýr¹(N±¨=à43çn`üŽã'˜i’Ï£ÜMŽÆ0Ô´ãÅ!,Ö°{÷n®¿þz®»î:¶lÙ¶mÛØ²e »wïfõêÕØ¶Í–-[zv ÷ßÿ¢¾ M4Qy²e|"3Esq :¥Œz¨Ê9ªC¹]À…[žºƒœ$\’Ûß3~qwçá0Y¹éäë»ûnX~/œË°ÞøPp]õ™<ŠE5Ú–·ïG*hS;ÀZÅÏI´Ò±,%”ub‹S%\‹msãÒ²s÷˸àÙñ+¥¿Uï'“ê~4MV—Ëå2k.¾ÆÆxÛ˜®Ë¾[n!là™C‡H=ʇžxB­/|âq(Þ‡ÅâŒãmD'³zõsìßÚŒ¶Û޹ìSh柩Âp$¬Lþ÷}?Ô$©¿àû>Éd2\¾P(„¹ƒ ‹¤A€ëºáÿé´ÊMé8ÅbQ‰Ý‰ôGÄæÍ›çä¼µ×àá…"˜QW‰[ò4úøá2A¬wp¨P!GŽ€ ‡ÏÞðlÛfõêÉÕþ&&&¸òÊ+I$ ³sçNvïÞM¡æ“Ë娰a[¶l!—ËqÝu×u•ïíË_~˜uëÖöúZifˆ êÅ;~ðaeY ü^YsÔª¥\⢋Þ8gÇ7Wvü päÈ–.]:µ®”JÕôéh¦Ã’vTOÔÈX„«8é4ªÛ⯢P¾8®«Þ—ƒ‚¨F{ü Å[©ÓT½™é‰„:)ƒ ‘—„òIV9î P‚A<+¼ˆòù &ìíìx¦mq߇O|â>î¾ûÈf³˜–Å'½±þ© R@úá‡IŸx"ÞqÇa ¼âÌãŽ#µcÙ'Ÿ$³r%þ>ÎfH>©´¶Ò,SeÖRñEJ;'þw\úV>÷´úLN©EõÐŽQZ¡ÙÀ08¸jƇ¹ê;h²ÓÄ¢VU«Tâ‚‘¸à‚:!c¹mó_ÀÐÐcµ{Ôq¶îÙÃ3ÿ÷¬O¥šŠp÷]wg­^ÝÒs¯Ír ŸqÆ“¯ß-sݧhDi˜]‚ Àóð'MÒÊó«+¯¸8&Sç × ËöîÝ;Éã-®¯^½šááaöîÝÛT!Þ»w/«W¯…¼Í›7óªW½*üìÀÜxã@¤8wcÜ·ÞúCÎ:ë¥HÅTÍ`Óê¹ œ½NuátK9¼|> ï.ñánøîÜ×\Ùñ`ý±cŒŸò|ë…<¯^°‚¨Rb^Hø~g³+ÙlsO˜V‰0ºí±ˆç[>¯öÕL¨pµ]ôçóõÇoõÓ”A ~[‰‰ÄäÒš:ZÙñl´ÅqÁÂq P8 ÓTïY–…{è÷þý&^ýýG8qß>ʯ{ÙåËÃ{¾ W¬PBüÊ•*WäuP~+Œå]~%d®ïL0O€ÜGa4Á×ÀJyÅájÿW*wœŽ™ŸEOÊ.«N9oóupp;'¶*ž“Ïã¹.iÛÛ|G"fAÀÇþú¯Y¶ ëï Z­NZ¶\.³îŸà¬OTmƒxÌösݧhD‡¥*â^h­Þ‘K~Eøa­P(„ÅE€P³mÏó( †Šc†a`YVè¡–L&±,+Î Ë–ýHeá8²n3‚ `íÚµd³Y’É$©T*¬Øü…/|aN®ç|ÛñlÍlD„" +ôæòña„qÆC¯­ºâ>µpPÆÄS-.Ì1†ƒzˉ¨—!C™2*øø”(…÷­fâm4 µ°°±ÃQ Îó,~ÎñUÙf¶ö#ûb¨î810È“ÇÃÃÆÆÀÀÆ&E 3ïDH“ó•PXçF 7"+Þ…r·±mÎla.ûͧø¦Ÿ9®ë2:ZŸ§\®ïNê€ M#smÇÏxg³âùÛÔ ç*—ëú‘©TŠR ÃIfai´·@XpòÉ'Oú@ŒQ˜˜˜h¹‘7rÝu×…¯9þ¿{÷î:‘oË–-\ýõ]èé§ŸÞëk¥™c¤ªäúB™rS¡a!"¢Z.—SŒm;LtJ¨›¡,.މŠxªù¾OµZ CCEp³, Ó4CﳸŠ×ÚTyÜLÓœ$´mÃ[Û}&âŸ_>ŸÇ², Ãà‘G™“ë=Ÿv<›xx¡h&☋ C>>%JdÈ„cš#n#I2ôôòñI“…«øß9,, ŒIâÐ6¿šZê%Ú€ ŒŽr¹™˜”(µÍ1¬ý@ä-'‚`|[mUûi$þž{óÁ\ö+šáÐ<ÿ´xׯ… ™Ç–TM+æÚŽV«¬Zu(*Ñ›Ï×GA˜Ï°%ðx"‘}RSÐ/YÍŒY„î™ÍBSA)ÈÍ¼æ„ 6°aÃ@å›Ëf³\}õÕ€2ò7Ö-;û÷ïgÛ¶mlß¾ááavï>™w¿û´p–L[ßÂÄDµ5OYpª¥Ú¢,j ýÏ=Çê»ïæ¹}ûælÿ³mÇûöíãƒ7|“Þe°îÒc¼ø_byhâ³%’ß)‡R²(¥¡QK¤„¾¯„Ù,·hY‘çyêÚYVTXB¼ô\·^ˆ”â"hH%ÌyfïÞ½ìØ±ƒýû÷ÏÙ>ZÙñtløþûïç+_ù {÷îåÆoœr4>^ =Z|À=çJ> «Vµy(uÌ'jÎÅî t€÷Cú#¬‚ÂPz?”ÿr×€q;¸ƒ€ñä6¨[åðÇÀÄœÔ.‚Øÿy¢jÍ &r¨6HÞ—Ã%ªÜ–!j§ŒÚ²Fí=‘ üÚûí†gž—ä×Ú§]­!ÚÌÊæóy²Ùl(˜”Ëå04°Q˜ª«Ûo»_úÒdÎ÷£B.m„ü;wòÜsÿÉ™g®~‘Ùf¶ÛâŸüä'ìÚµ‹K.¹dR?'îQ2èÈ÷Ô\×%™L†!¢áy×ò°yž†lŠxæ×ò}Šš„RŠˆ%^qòW¼SDìjÇ”ƒ9$.nKáªmÛ¶ñÈ#pÿý÷Ïz޶¹êÛ¶]—kn¶¼c9ra^4“¹0G™‡†]š˜a˜¥ä530ppêÂH ŒÐK®Yت„b6àú™©D¸VL%v‹ô+öõ <;Õ?Þ¶mÛ$§ˆª¤6ZJär9ŠÅ"A)n’¸»5]°sçN\׸þ1ö+V?þ8KW®÷?Õ¸®ÉóКjì1|8X?g—B3GìØ±ƒ]»v±vmg)Õ–lݺ×uÉf³¼ë]ïª3@×u¹þúë¹úê«›æ‡$ùáÎ;gœ`Ó¦Muó9ç|œ—¾çdj¹%^O–ºTÈ‚#KMäO+ñÍ#J5²t)çÇ ÍÍdS2›v|Þyç±õ][ùûš_qGœç©|iɤò“ÆZ"ÿrÌ_5œ PÇ2ÛµÛMsn¼ò¤(„ä›/Á¸8×̃¨PPÓ¦étÔsËçah(òFÌÕŠbHøk.§¶;…0Ñ)ÃÃÃÜxãlÛ6w!$ÐÜŽwìØÑõvÎ<óL^ýêW7¤‘ãt:­©TŠd2YW-Û², …úó?gÕe—ÕoÌqÔk* ¼Tjڑܺu+6l`Ïž=Ó<꩙ͶøE/zQËäËÞ¤ŽÊÍd1X•!9Û·ogóæÍ÷—Êwã7’Ëåxýë_Ïðð0«W¯fïÞ½€º9ZyË Û¶mcxx˜Ïþóu‚xÛ ŸÝ A„ÉS-,¶à%øX˜˜ 1DštX‘È$9qÛh'ºÈôÐT¸¨N¬Œ"›M)5"‰Ùfk"2S;)pÖÅøFNÑ ÜÊéÈçœ7KÚœ¹°ã€#…€5Õ˜:nYõœP©øÍ{"ñª¢³…a(/˜†©Í ¶UYM&'çïj5È‘âA ÎslL˜ÈçÕ3“QÛ*—Õ6äZXVT@£Qà“Â’D¼8u8Ë|ÑÌŽgÚKÕ7¨yÎ'Á{Å+Xuâ‰ø=†}ø0ùNS4&šï8Pj–´‹qh1MìÚ{Åa æY§cq1ðWÔ7™ñÿ¥ÆC’¨ÙkÖü‰wœV…( ÙòRŒ" a`S€ªüfÔö)§üšßø{¾^~•Øû"Ê1µ´#NCCCضM.—Ã÷ý0DPru}ë²Ë&‹Õ•GT¢qÆÆ”0'9!ç¹ìS "Írµ9ŽS—s  Xk‡r¹œ* R['=EÛÞ.gM²ÉĆˆouž—íû2GÞîsèªß0lÚ´iζ=vìà„á£cŒ…ùÇD«å›låÑ6ÎäÔâiډǩ­…¸>g.úÍhV:G<}/¸`/Ùl4—]*õ·UÓ_Ì•ŸŠÉi/9Æ…îš{,KZ‡¶ŒÖÚÙÇQÏD¸ Y&ÿˆwðàAvïÞÍÁƒ¹êª«BQ®;wîdõêÕuqׂy˜˜˜`õêÕ¸®ÛõŒà—ü&—óÿ…ókÈóçµt§’oB‰EŠm“ÌN›NÝZå÷’ø& åbQª½n–pQn>qÇEò,ê;º>Êõ®­Ô¶[¨­ŸlØ¿„„ Õ>³ö_ eŠ(ërí·±¿î¡F™•Úr9TG:EëµKäÖÒp½,€gÀx 8U}–>ü°ëÙ“™+æÒŽŸ¿÷8ÌWÕ<®d Ûª16Q#ùf¦;WÓ~ñJ¥ýŒaDa¨Ó q2Œh=ñt³,å72‰{"®‰Çb<´5”(jÝ ˆ*¾JXÌ_޾ZÙñl´ÅÒžzÀ†<|sâüÞ§?͹§ŸNº›<žrßǓЦ›üßaÛF5]‡QžüSµú–û0jÛídB2 ¬¥^¤“0XPÍi²ví¾þ/bøî0¼î…\ÃÙaλ€(üu(¶~·P†aP,Ã0ÂL&C:sxÅC[ŸPZ ЙLäšÉÀÚµJtž#"˜û>E?”Zû ¯ÖÎIè(ΤKH)¨"%"ÄÅ…ºÆÜl³sPÔõ$l5ù¦òänÖ/jÜ~±É:9¢t ©Ú2ò•ËÍ’$ºÙr´î{x¨~ŒœGÎòüÙ½N5æÓŽ»¡L9¬Úi`àâba…E€yŸÐŽZXh´Ê‹÷^‡…K!©è+÷¿â’~@ò4Êò/{ÙËæìæ²_Ç#ÊR¿N‰D‚J¥‚ëO2©ºoŽ3Íh-siÇ'c°êС¶ËL5!VÇ{€Ëˆ¼Äõ<Å‚dYã6lè:÷„ˆwÍCoذ«®ºŠ+¯¼’-[¶àº.Ÿþô§»Ú~õø‹)ñ[á닱y7ãµ`Õ"ÅP¤“Ráò·B…9²dÃüÓʠĦñ6Ë,©}.q•Jq‰-Eâ›ñQâ—¨àFl›’y|•ùÜ@u|ãžp "“¸Ç[¡¶}»¶ŽW{¯„ý剄=I’”¤>„¬ÙñÆ…º<Ñ(ת}&YW%ÎKàQ¨J¢v>©Ú%ø˜?>¬¶SÌA΃ເ×tÿýuÀ\Ú±ùÊ}¼äŒ“ 4ÅXÓK=23Aˆák6(£s6 åõÓìZX–zÉ(1B*½šæäk&½ÃV¾kgÇ3m‹…f`¤ð(Þš5\ûøã˜Ýæ+’ËÖÌÛWÚ™.g¹EN™Í”ª‰ìäÛ”&¸Õ²ò8°úïõ¬z’õ±ååÛŠYa8_Q@5›òH°™zNHm¦iªŠ¶µôBÐÁ}äú>O½ô¥¼"/Þš½vëV8yn&FæºO§_¼pâžeår™B¡ÌEh“¹äs‹·Y LK„“ µVŒ@Ì9 ˲ÂÊ©@äÎÞê’JE¹±$$= ºßÍØ2.ª ¯}TEú(ñüK.Õä$F¼q¾F\TS„ƒ›߀»ÏõqóiÇíb A(ÂI¥N— •žG’¤û0ÝŒã8$“ÉI3™ Åb1Ì¿˜ËåÈçó8ŽC‘¼Œ†a„y¥âoÜ+&^AX„:ÏóH&“ŒŒŒÔ…±e2‚ ›ÍÖU#•Fh×®]sv-æ£_êÖŽ§Ž%d^uÕ§j“ ê=Ëê« Í€0×v|Çß¼€áÓ^Ñô3ÌÛ†¥Æû¿ÒA”qatU4³Muž8pà@u×®]ÕÇ{lÊe¯½öÚðÿñêxõį¬û¼R­V³ÕjµT-U©ÖŸBºš®fc?FÕ¨ÚU»š¯æ«vÕ®ZU«Z©V¢²SŒ]­VKÕju´Z­Õju¼É2•Ú2T«ÕbíתV«ùÚÿcµå¬ÚþŒÚûÕjµjƶ3V[FNp,¶O«Z­&kÇQª­g·8ždm™jì¸*µí5.Ÿ¯½W¬­'Ç)Ç5Zû¯‹;VÁ¬6GŽ;~9Ѷò±óË×~åܲê³AìxǪÕì±juãÞ½Õ^Ò­Wª•ê…ÕlõŠú§êßÿþj5;…ÁÍÂY©VªÙ) »Ëênù^Ò«c껫V«ÕññjªÕRIý?6Öt±x7ßtcûvíªÞpà ÕjµZ-V‹¡] ÛÕêš}¬úW¯{]µZ©´ßˆÝâ½J5j›âŒ5y¯ìjµº¦š­oßgÈxU5Åã.?Úá²Ç¿á©*T«سgÊí™Õ¨éÏ×^µKTépÙl¶ TÇÇÕÒÅb±šíÀžmÛ®šf“~t´úä† Õ/ýÖoÍü"O“nìø†n¨îÚµkÒûcÕ±ªÝÔ@gÃ0ªÅb±šÏç«Ùl¶jYVø}T*•ªišUÛ¶«¶mWªišUÃ0ªcccÕl6[µm»šL&«ù|¾Z,«¥Ri†GÔÀxµõ³\],u¿Æš´±±±úã Ý0Å&Û°«‘ñšÕ¨¯S©*#7jËYµý$kÛ,Õ¶i×Þ÷#ŠÕúþÒX5jC̆Ïã—Lú•juÔmjóÁtûÆ2Z­U#ìS¥ZªÎ²íô£££ÕJì™4::Þ3Ùl6¼ŸJ¥RxÿŽŽNÚN6› ïÁt:]µm;¼ÓétÕ4Íð}y-÷¶ÃXìù/¯e_²ïn¯ÛfœJ¥R½è¢‹zvÝ»±ãjµ¹-Ë!ÎØØXõì³/ªÚvµšÏ÷ìô4óÈ ö«Õju¸jWÏøÍû«ŸØzcÓå+•ÊÔý®Ú31+}Yy6ŽU5D£m´cÙL…¼N‰W*é>}ÓÏ’$'%Y¶°È ËŠ÷›‹‹O‘" äËyRšÂZ­òÍI%(FP^\â…V$ò"‹WûK ”êBm™Bm½$jR®Ï®}."¼LþÉl­$#²‰B:M¢ÄDE¢lá’ùÛ­-Û8ƒ^tjÛÊÔŽ«qÒSöŸŽ“„ÄB½ÛÅxm;^ío…(,¶õõÇã³W²ßQêU~‹ÐÌ\{ßT«<ôñÇ?L¯èÖŽþÂé¬}þspÕUíW˜%G*¯©ëb*ëõ½JÒI¨©aDyüâ…#2åI”Ï÷<¡ÉtÛb© °é]?cá%üÉú)üÏ®â9¤Z%mŸ¦ ¾…J©1›ÄCK;=†N8mÕ!?óNó}ø…_h»= i•Bñ=‰X'ç–çQóº/ oœ ñÒ¢ÉdRmòxðodÿæÖšŠéÚqÝùáÏY8œ30 # %Ëd2á÷¨…¸{žWçñ&¹ü$œ8›Íây^ûP™V)1:%ž±ùÅš”Û±®Hƒä¥•4¢{\ª¡€ê7X¨þ@†¨<:±e¤¬±TO6ƒ ë‰)ö¹I ®¤¹Ø¶¬hßÞûm6Ç+šÏ#³aÇí°±ñj?EŠÓ®ö9Ÿˆ'h¡PÀ†cé€IDAT¶í°‚¯xòJnDY=Ð Ã=ÒÄ»L–K$¤Ói<Ï ïOß÷Coa)bbF[Óni¼w§ÌÕ„vU‰ Ã`bbbŽ¿•Ö̆Ëð*N&“áCú“¦õ·4šÙfºvìâr ¸àÍßå׸vÒ2¶mOîÚ8t³™µq¡¦?™7!nºüóÿ‹§v^W4ÿ¼±Üyºö#H•§ÌSÌý&ö96¶oƒ#ƾécÏÆØl<X“`ô·F•›¾K]g ƒ(4TÂ) ¥QÅQçQ¶!BÔìäPËDG7¶ÏÆ¿²?ù;UQ›¨©×T´K½%bšd—Ž÷ti3hhÀŠ'ž˜ÁÎæÄ~ð X»üvøâ{}HÍ™…°T´KøF¼Ã˜ËåÂδä^˜”è» Òù6 £çBÖ”äó„ñ’_KBYËå™m»Çˆ·dÓƒ¼xßcJdl÷ýIâ3©ZPFµy¢û¶Ê…8 òLÝö©h)° xþ¸ãXŸù)7nœrýfM¬E¤9ŒP_Å¢Lr6Ëæyk×®ett4¼]×ÅqÆÆÆ(—ËaX«TÇô¾ï‡¯Ã0:ëÀKÞÕf¸¨û¯U8Wcϥܔ´ZâLý‚sΩ;'˲0 ƒt:M&“ ÛŽøý](B/çăk!#‡‹;É«¾[ …®ë†vÔ˜¿É²¬ÐÇqœð{³m›l6¾ž1S9AKE“f¤HÌ‹¯“B=Ţ䅲Íäà>à‚$ µìH F °‚ÚD„§’½Úv”£SÌ2•RKAy\Ö&*¢Ììåš·q:­Þ&¤’uÝ5õ£Jãª=µní~>íÍo†Í½ñˆ›KÊ”C71§—y†H¾´‘‘’É$år™J¥B*•"›Í†oñ‚¹\Žt:mÛuj•J¥eEÚ7Y¯q™ P¿«Š ¿¥öR§ Q'×ö‘Ë)SŸ¯”±/yÉKægGsD£C¯ã8À_tõ]h4½dÿ_ÊÚ_8Üô3)8R‡LPi-}-Äùø¼àépÒògf¼­ñ%ã°¨r©渉¿Ö'ùhŽ’`ÿ±M!SP…F F6*£n–"*µD”¸yžÇ½ét’Å®XH9XïÀ`3>+ÿüÏ›/ a<]ÎÖ7†–IèØÏ¾ø3n?v;þz?½……T*…mÛá6$ѰÌX‹x %ãElK¥R”J¥¦ƒGyÀˆw$.6 c’GŽïû¤R©P¸ƒ¨šP³m‹Žˆ}²Æ°“T*Åèè(år9<—žŠw @x¤º¤ë“ŸÂîÔJÂÓ$ѺžéMQYÇ mM“É$¾ï“Ïçë*X(ÂA ëºA&Ñq"Ûƶm ýÍDغ<9 ¸å=·ðÚ|„R©D¹‰·¤йr9´{ ܦiš$k÷Ož¨Òj–(#Á`œ}6ù|>ô,±m;ôúß÷ëè”)“&=#qÂqœºª†Ò&A@©Tªk{,˪ûMÓœú:zžRâm˜Œôe´ïûJ¤:œ…[ ‘ðoYJ!AËþgH‰(å²ÚŽ„Íµýü…G˪ ð­À÷ýh‚ñpœ ÅŠzÏîtáGi¸¢&rY5¤R>ÚG¹¬ÚCËRÇ+«…¸ð64T_É;›­oK¥ru±mCªV†Zß4Õël¶^˜K¥Ô2ɤÚg¹Ìùý(GßüæÙ5°ááQ¦Lž<&&•ŽëEw†„zJgŒŒŒP©TÈd2¡G›L²²ûD"A6›Åu]ŠÅ"žç1::¶Mžçáy†!¦œÄq”°¥Ú>µÃP_¡ï+çsËR_u˜är‘æ*™"‚@-oYõÅ -KmÛq¢zP¦©–)Õ1ˆÉ¦ÓõbžëªÏÅTã¦&rµhþØu›,Êûêµ¾æA ŽEšÇQÛO§_ˆ“,@ ¼án»í÷8ñÄóø—éõ‘i41þЙ-#ŒŠÕE$½Ô‘Fà¡…¹EFß q«v_6;‹Wé €q°[(}ÔÁ³Ï·q¨ÄÌŠLtCÄE“h´£ÑLÁùþ*–VŸnþ)"o<$9†tn³Ù,ét×u)—Ë¡×eY¡ˆjÀw'wRÈ(–L&C6› ïétºiè…„MÉ`²P(P*•ê´l‡ÂR³P¸82°‹„„¨ÊÀV<ñ à “É„¢Çèèhø¹œ¯eY …PlŒç‚éÄP£°°pî8Ê©/­rB§y4,¢N†´ÁsèîzèÐ!Êß,‡!~…Bd2‰eYa¸S:Æq|ߟ4CY©T(”ëÜ[bWÍr|Ä=ÞdÛ“®]ÍEغìØßñ’aX¿þ‚p±m±E¤Ž{wH½b± Ý‚ $S)ÌR©ÎÛG nRÀ/®XÁ-â9R[&›N‡E)å8Åk9§l6K"‘À÷}Þüæ7wÜô#ƤÔ"9ó$ÇÔÛ/½”'Ï8ƒ xýÞ½l½é&6<õ”ÂZ]#×*,‹x&£z±1ýçrp^6ºà9õ¢•ØÀÍká쬑—Ëõ®9 î»çKP´ÔçG}åMö|ÄPª­‰Ê?’+…ŠJ>XñŠûi?‡2°úð ø|zò„£iFç·©©ìÆ4•`–Í6w'2 uìq$çTÛŽ‹wµã:íÿŸNËú‹€7â||Ò¤g-¢ïû LÓ¤P(N§ëú ñv*›Í†ÏåFTê ‹LF}Íét’‘°,% ŽZ¡¾\,*SVëÕÿâ…ÌAýq̲Ôúétý×Þ8&N&Õ2AP¯í –JuÜ ¶Ý*’Õ²”^ì8QzXÏ“sV¦šËEZu|ɤZG®ƒ4 ê:Eºs>:v4Þpù—ßᬳÞÍîݳ½Ÿ#¬$ J¼–´A8é*G/Cº5ƒÃaàÉ ušŸ¦c éÇçäJ8˜¦#úZˆóðxòЋ>ënàòðý®¼”ÅíÓ­ýͶööm‚‘óÂØÍ‘†|¹&h`áâNž1Ÿ'w- šJ(ˆ—BoŽxiôFÏ¡~8æÅı»gè¤;€Wµ^Hbq›xXAÀèè(Žã„ß)ó;ÈL³„‹6æKÏòÄNš‰uˆe2¯ñÜÉd2¡€"^}q›‹{וËeÒét(º 166zÐÙ¶=i¦ª.'&ÄÀÀ½÷/znO‡Å+ˆ¼ád°0CJ$‹Åðû1€¬‹¾ùo†¶*ùŠÅ"ù|>ä*m ”ˆÀ4[mYãýú½çïá{wqñÅ“ABãÇ!ÇP*•Â0ÈÆ<ŒP ËŠ½NÍ%U€Â† 䛤°aƒïPNÞE”®ÒÎâEÌd2ðêzÝš—ðW>ö1Þvé¥üÎ{ßË…×\ƒ÷w§¾σ¿ø‹hÔ>4¤ŠáÄCÔA}.…\lÞéBʘìS©¨‘úU 8Ë‚oW&OêäójT?dÔ’;·x>-%¸“Q˜ê7Pn“>PmXÞ§þ~•Ýþœ½~¦æÙ6›|qAê&¦O’tME“gØ=]ÄÒY<ü^‘#GåY›"Õ´ Ùtp]7L!}<ÉË–ÍfÃç(Ô{Æ ž9h:Œ©ï@¢“Géªq'Èd2ܦJ +5’â¯ã¦Ó‰5ëÚÄoÅF6ÛžZóµíÈñ2¾x ¹uk]Ž:GW9ñÒƒúu¹N=tfw_lŸ ò}ŸÝ»ßÄ·¾µ:üÜÇÇÁ©+°cb†ö$‰…E™r˜ŸÙ¨5Z..Fí'î-çäÙ``P¦ŒA†Lx?M÷ÙÑwHzuÛ?ü5mqk9–=ñDÓGBô'¡Û¶…¦éˆ¾â–>¸”õçÕWæëJˆ“ü&6‘gFí>°¯±±;¬´$ƒo;­n"›9F»JÚÖÁyÅù’‡'“É„šëº¤Óé0Ô0²$¡Q’[£T*…96Ä;J:Kår™|>î¯Ù>DáI²ÿ¸h¯Rσ_F@ñÒj¤1Óbâ©§žæÁáá©”„Ø5Äc§\.366F6›%—›^:úºêv HhÝlŠls…䑉‹(é)zéqZÂ뤠„„Ôˆý …0ÿWÜ‹IrÜ Êuš+<àÙçŸgý¿ü |ýëS¯ áùâõ Q%éN÷ËÙæº.•J%ü.Dd•vËÃ㓟üd]G9n²^‡V^~ùå°k?þ8'5»l-lÌ4Íð٠²äWO<ãy-Ë"ð}¬ÚÿÿðãÔ„¸Q*0õѺO.û¸þúëùÙÏ~ÖÓë8]※!—Ëñã]»¸äÖ[ùØêÕœóío³öšk æýcYÖdwrY¹±$““…¸xø¤cN®$,É$8ɨúg3ÊF4™#˨PÔQ¢wùLšPØ7KašoñÏ›•b?u4†¢v úžm÷¨fêì""C–,>þ´E8™¼Bï`é׋En½u9¿ýÛ'‘L¾}OS,ÚaÞÉFÄ9SB<ãkùÊͱ6îè9Ô}hã×F¢¨§"àÖ[³×—fÚÄB~÷Ͼʹ/?Ó H Ûë$IL̰Ȏ¤€ÈÔÅ%I²îý€‡4iJ”š;X´ ~98¤HabRê—Ê/Ѹ¶€ïÊäg ü_õ ®°+*Ø#ca‰Nª­—¯}™†}$Á¿ÒÇ©ñë-‚l@°?ÀÜeªmÊÀ¼HÔß À{ÂÃ[á‘>5ÿ6y©«àáa6Ác§fÑ‘µØœ¼²yŸH _Õ!UÀ¥MŒ«ÑšEC_ qÏî+ë_õ|w+J‹.ÉÁ¥jŸ “RdÔ~" Å;°ô­KùÀ¿|€óÏ8?ÌË h*@åeû…B˲H&“¡È%¡ƒñ¼_2X”|D’ÛHö+KñA¥c<40þ¿ä²iÌŸ÷”’÷GFF(•Ju¹¾âËŠ '¡SUöD,‹aî®F¤"ÖB7–ÿÜQÎ:ë~â^uHñ’_Å4ÍP,‚¹:µã~ [AW콑ƃ¼–PÙG¼R›TbPëxÞ­…Œ‡ÇžÝÇiÆIœöâw·²I4Ó—¦íÌ«´Å2”|’SÈ0 r¹¥R)l3¥‡7eÆzE¯Û›Ã‡óÚ+8räHS!n*âa¬’¿1nÃy Š'lÜCð}¦É㵯BÒS:(‡q)z)·ÃÔ%5Ï4MŠÅ"»wïfÏ€VM‡%µ%•Óäé}ûعy3…B­YÃK†‡¹èñÇyâ󟇋.j¿¸kO3Dl›ª`O@û¢K9Ô@*@}©RÅ]ÔÖ"ª_ÔêP¦3®”ø¸bo¦ñåô¦"îø±P±b:yà$MC¹\=ä¥@B2™dõê Ù¹ó­T*ê–‚Hˆ*lÊåzO4ljB1å½ÜÍë9Êùöv`0½âʨç‡Çm…í Ÿ>LŠS±±;N!l“\+.¼M7/hºö“©ýÈ=—&¦î(SÆÂ" û#&&þAŸäÛ’_W¡¯r¬™=¼ÍÁ3ƒäê$< Ήé/¥±\ Nã6C9‰Xüa€q‰Yð«>ågʸž‹ñƒìïgq×Å}‹ÿ-V(o@ÿŸàWøu0}ëv 3oâ>ö¸†´s9˜?2 ŽðóIþ4 à)OD·äâ/SãTã4ó<ïÓ,…ü·ó¸G]|;Ç÷˜ëMFN!xU€qÔ ¸7ÀHØ%ÄW¡ÀOAðd€±Üàg¾ ‡–83NxöÙ¦SMÇYo9¨¾ñà§àÕtA_ q¼øK\|q—s•ê«z@NÍfX–Å'¶}‚ëÿßõáÀ¥YÞP“U«VqÂ+NÀþ9;¬ò$á9ñPâÂR*•"ŸÏ‡ÞHâuSäøË@'ž”^Žm&ùx¤S%Eñ}uÞmò¾gâœÌ”6%%PŽ]DÃT* EÌ“A÷B3¾ƒÏÓ§]ÀÙgß3ùCɹ“FS!tžI§ÓŒŒŒ4u›ÑêÞn¼¶ro5Úq\ô“ûQ+²ÏãE"·ž°c?÷hç!»¢ìÄ¿‚)V•6A¸R©z‚J[$bè ¶§Ÿ~:gþá²þÒKg´f‚¢\ñŒ‹qádŠçqÑ=÷`_qEè%úhm"›2J¿1QM¡P X*ÍFçžãáÕåüñkç\w5ï¹ 1?ûüëÉobûç¿Æ[ßú§¬Ýz!¯¨µ§t³SÊZ!ˆ­.n<@ FD³èQǾ–ŽÃÛ V^-ç\+ú~esRÐ"gUùë¥Ä•C°ˆ*òbš+ ±m4Ǽ†ÿ-êc½ØvÍØ~üØ2£LކÈ4ì3îTÄþšµË_Š-?ßoÚ4ðqñ½N‘þj¹\®‹¼ðŒ®‘û7ÁûžÞÇko~-ÿÓ+/©>|ÚäÉSÞ[&ÿ§y2ŸÍ;>‡…EŠ…ç ‡ ìgl¼g=‚Mö~›ÌÎŒ •= Î!‡äcIü û³6ö6æGLüG|ÜßwÉ—ò›¼¿ñpŸwñ¯ó±µ0ך¸ÿæbTkÏòŒ ³ϽÄÎÆ›âŠZLXî å è_àã\à„×WB~ǃåÀrÕž8¦†÷æÉ¬¨Eta¨m^à†ÑcÎ+œð8llÕÀÆ»8'Ä¿s?ããâR¤ˆ±ÂÀ[¯&ȶݾ­×_ñ´¸=XÎúOœ4ãAدm‹xÊzÝ„Žh–¾â||Ì °í?ôÙ”³ Ò›»·ÕåÔûN%Ožk2×ÏçðÎxåEÇqÈçód³Y.¾øbÕÐÕD5AªíÙ¶†º† —/v1k<ÛLñ¾›Mq§0Øèµ$¯ãE$L½ëpòž¥œwÞË'h£F }ç…R•p®èæ>š +^6&³—û?n¿ÝzÍ O›?»ï}˜—OQD'©OM"‘½Û‚ ¯s¼}Yh÷C`ëÖ…ÓME]!Ó¬K`š&¿¼ÝĪß'mšø¹7oߎ¿a”JŒÄŽ0ý.¡©. éÐ<ÿb#ï`û[_ÎÇþcÓ|9Õê9$“gw¾#ißEeb²w–èoåOAð‚z¡LüyÓÔŠþ>”ÿ7øE0Ÿ)pF-Á|AÎ H‚™lØŸ¥ö3BÔ5’s–ã!LÄ+9/¶ŽÛð¾l/Éä~™œG‰údì²n³^N³è‡¨–„Ûn‘ÎEá2J¿Œe' ÷õ½sÎáíGvþ½ö!.n×áÖ…B¯}m‚;îx™Ì+¸òÊÓùÎw6ò™Ï¬æ»ß]…m«†ccS‡GZ–JgJ¤K&£¨ejUÓ8Ÿ€o'ùÞôáeý#¨+Îg`~{’Pº­¤§QH~!‰u‹ÅÐÿ"ÿŸyìÃJŒ ²56áDpv9¸'»1×ÖîÛ?V¢™]ç-%‚Œ¿Ö'  ¸ªzÞ5Þ󕥕0ç]#r=M̺Ûfž„²ÝFoÄÆíÊ6³dI“î¸ 2k?ñïx¶ŠÊôŠ¡Çð²_þ„=%…DwL×ÇSgE&ÄÃkëå©Yôµð£]Ùôý¶·ºWûu€Q5(öñÉšYµânÂøþl6;iÀÒJ`j—ŠÅ"®ëÖy¯Ád/·n‘p€Ù¤Y’þ¹&.N ñð¨8ͪlÆsC•Ëåõ~©~âB;ïÀä$P¼á³¦9ó}}â‚q#™L¦î~Ïd2¡ž„´J˜öBåŽ?åy.¾týÔ÷b€êdäi;R–°wÉ?™H$ ’vÅß²oÏA„y@Ð+Y¼¼S©T]¾DÉK™Ïç1ˆDd.G2Ÿ'¼dÓ&N¿÷Þ^_É®i–¨q (—óù‡Ãßbxø|íGå™g¿À>ð3î¼s}Ómz`~Œ?\p÷ `XmK³¨ék!ÎÄä[Óù C¨Äõ»Ÿ»›¥þÒ0oY³Ð±fbY7‚U«Ð·~$—Ë…¹éæƒVÉ{[!ù $,MÂÿ|ßWÞ‰ó8pMV>õ§kHà fÙj“RAËåð}a H+;ž÷QÚɳ_G>äÐU©õo­â’UçO=É`£ÜUÄ}¦ÏóÈd2”J¥°xDáðƒÒ¦N›y.\cFÓ*ÈòÞØØ¹\.ÌXW ¸éŽ;¸jÙ2~˜A@É0:ã þ¨—×pš´JÔ-á“år™Ý·‘êÊo²wïŸaYjÒmýúõÜy§ZFòcË|Ÿ}¼«ÁÚ îÅ`½æqv)XÿÆ-5O®³£Ë: €;€¿…ìkbï° ™#J³ñ©ØçE"…«Ã[f6‡ií5Íü!ÕR;I ŸËÁ¿xoy˯}m~RPjˆ‹É›­®r'‘pny§6"¡Ôâ4Ö#òcËÄ‹TЧi|ê_º†ß7À]—]ÖášýÅ'Ip Ÿë¿~=ïz×SÏ8‹8¨ ÜÌ×B.¤I4¹!ïÉm%.óñ[H‡a/^|“{ïíB³h3Ô31Uî<Í‚§¯…8€³Ïþ.5ÊRLÒ…{ǼÇ(|­€5a…y×;óÞÕ­ç’ e0( ƒå»k,\0(\ò†ï°îœ†ÆÙ#|ÈKŽ sÖ ÍT¥Œê¯H|}¾¶¼ä+€èË´€ÞÔ¥Ñô9ÀÁ=ùÈÙ“>“HnÆÄ&&..^Û|qR|ÊÆ«ÇE¼<ù°€HBW!Äš¹§o…¸€€‰‰ 6l¸xêQF묯ª `ã6rŽuÎ$€AõFY,Ľ0D|K§Ó¸®æßz衇z}˜]ñ ðôSO5ÿÐŒÎÕ¶í’ÚIU€`’+»”ŸoÌáá…y/dÛ­Üà]\ Œºýw\%q–H&“˜¦朔ÐõAÏ…¸&ñ(çžÛAiyùÔ(¤úr·Õo$¹tXt¤_‘Ùq^ö²—õúpº¦YÛ‘¹ûnüsÎá„G×22âb>Å¢òÀÞðú×ã˜&.ð3à•ÀgâtPƒ»š ݀ȕQbZ™(yZLˆk8(…û+Þ¤â¢#ŠÂ, ÂW£@&¯m[ýï8êËR¦*ù¾L õ™a@©¤’ôg³Ñ²®«„°ø#-Ÿ^§R‘@jùT* eQ-™TùÆ C-/â™a¨íÇß4ÕzÒ¬†Z×4£Ï Cxmlzv츰\CU`ÁÂjæöÎw~–û·Ÿç7Óᦛ>\8í}ЧX(¬ºÑ4c)'Æ€(#G£h•"ªÔÄ–oÏ7(NNr;ÎgOªÓʽ³ÁTÓ´;Pˆ+Sf –ŸäªOA±8ÃoOD5ñjËÔþJ;B¦jË–˜”&†Áœ/Õôˆgs7ý˜÷þì 4Þ©-£œÚp20ðñÃhé·4ËØØuËÚØa貇GŠ6v¸n† EŠaAŽAÏÍ7èô­çá±ìð z¥êÙÁäÚç2řͼƇå°òM-ªf‰{i\rÉ%½>œ®8ü~+ÿ²]Ûåf[„ j?ÝTRóñÃõâtÚP$H„ƒ\Ú$1ºÌÐH™w Ød?@X^^(„³C²n@nSÎÓĬ۶äÌqc=ªøÃ,I²®ü¼<” µÀü jˆË÷iYVð&¹"¶m¬ªPb/|ÓCðp+Ô.ÝÁäçÒét˜l®=1¦Ò”çÍ|³„x w‚|7ß|3û÷ïïõ¡wE™r]nçá‡qׯÇ_»–5Ÿy–GÛ¶°ÒiR¨AvžZ±r_Y¨«¿|2èk–NÜbÒD¹㎵Vlñr¦m¼ÊÝçÑGñï¸^jÕ÷v¥áo\Þ÷U™)yß0¢é]™"N&ëKÊÆÇ{¸‚ç©Ïeº·ñ8z€:ÇøgS °‚ :6éU/¦Ä sÈ“yàÄÉÄhÅ>f›©ðµVñjJÒÀJCÒjvD<Ì ÎD°QLfZ’$'5ä".Ó4ø ØØMUT øøŒ2Z÷9¨‡SY²xxŒ0ž[’$C ac‡•DH”‡W'§nŽ~ÆÃãôýËyú»Ïc\Üâœ%f/æ÷ÔKŸ‚Ä^óQ0f *M-€ôóÎ;‰‰¼#ûŒø=ë=Jþî»)lÚÄc¿~€_ü½u|óC¢ ŒÒ ÔÝÔ¯¤Q# %mtHjTlÔq„Hxàã`üá€/WK³!õ<¼‚z$Û¶ŸG [Ù¬ê. Ñc&?º-KU½LSy±µ#™œ,|µºu-kr7¨‘Fl˜_àã‡Õ yòÉC¬]ûZ¾ð…{Éçó¤Ó­=þ<”ã4ÝRD¤B}X'LŽÖ£áõLòj¿ŒÅÇxl¥ÈÍ_<ÈöíO7_¨±Ñd|ôÛøÅ$< ¼×†tÌcºP¢@'kû‘ý/ ÈÍìq16‡¿}Ï\~yg+Ìóî’jÝsê*Ç%ˆC†‡VUޝ]û‰I¥¿W¦ŒW»è2v”q±Œ{e= |¿6­Î1¾/Ût óÇ]“¾â~÷t~ûتGجX›]RÎrìÄãù-ÿ·(:Tû›ÌœÈ©ëB¢VúFŽˆXÖÄ…ÔþŠâ4 J6bÛõ½LÓT`\´‹Ç[H|G‰óhF\Ä5µN‡"$¶=ùü–EJµŠCï+ö·\VÛ“qc6[ßð–Ëõ´ïGtcŒ¾fAr øÙÿ{ç¾=ª¬Þv"¾UbÊ Bš…ÅC$I’&M† *( Aš4)RáXHÖ“°X÷ÄCcpÜ$ÈøIþJ‹DP‰³C<Â)ù$a¶ uã³räÂq™¬#Ç$Û*S#Ÿä8dÛñ\zò9@…Ê”bc<—ß¾‰}}òhG׿¯…¸eÏ>Ë%‡¾ù϶]Î3Àƒ¢Sy›¿q] €¬5¿‚wùÿNç.^hͪ“6¾'Þk­â,·“°®Æudº\272P1‰%Q³Xl.¬Å=òøè£¼þÄ&Ìžúõþ¬¿Au¢Ô*5¨Ed¼X(P+ô _™L$¨äÿCý5kêG£sZ3³½}4sH³vÃu•½pÂa’É$ét:Ìç&à ñn£yQÙ?ІH“f‘'q Õ—”Éa2âëH|5D e"YÆû£"ÎH¿\&ÑE%omãûp»÷y*áïàeüm¾>ïcSËÙ‡£1í¸V™p­Ù¼ò†M¦Å<ÜaÂÊ@e{/ð]žJÃßåkË¢ìksp’Ÿôa“ ®§–WN¼*k09\ç„öbÃQ’ÉR©}Ã'Bf…æ‘?å²ÚN³(#q~($qZhœÕ¸Ô‡J¾Z;žd²ùøQª¾Ä'††ÔÿãYNœ@:Jz¦F yèÄ“aÊ9f2‘S†£¬£J×ïcd$Ú¾DEÅ#ºš%y(Ô‹²¿Æãj\G¶/É@ã®ÝÆ1àÉÃ'Ô}מç…y‘ƒQFÃ>¯ŒWŠÉŸN¢– B¯3“"ÅŽúÒâ6Õ¸³YÔK¯ÇWñã=ù±“ÙóÔžŽÖë[!ÎÃã‰û^Êó#C­òáöÿ¸ñ‹Ç±m{Npœ§Ô.ö¸T:€â‚G³sµj¿~:º…d+“Äÿ÷‰ù0½Y# àILîüŸrþù«¢ï´“FSDÊV4s„O»¸;2RÛ^zr$Õef­šj²}š‰„ŠÓJp§¹o2™è}˜|ɹˆ}ˆˆÿ/-ÛÈf[ç!hV0,~ÍEˆŽ …ÍÎ]„Ii/ 8[ ¡rnñrÙ‰D4ƒÕÃjÎÓEú—Þ¼ |_*Ì7âÊôeÕófbY¼ÜI1]û™îþ[E×¾âVî?…×ýÒó-—ñË>'|çÖ¼eM¯wú ¨ú_G§±1íªÞJG®±Ñn̶<`<÷Ä©±±É‘#A"|O\Ï‚P•<€ñõ&ŸVä.?S$1n+6A ‹>§?û2]Vß)*µKÆjB¿§2ÖÌ}Ý•~bÙÒ¶7ó-{oaûû·vÒŠò@/þíò˹¬×ÑO“ä¶ ,}ßR’IÝ /J”B#Mš€€Ï÷¹^V×Z²–më6ùÕ9ÎA~TµgïæÝ=9ÆV•¦½Ï:a6=ÅJ”ÚnOòJh€s3$<.ÇÿoW¨B¼;ãaä2øøT¨„‚¥äüd¸..9”w‹ƒÃ3›žá—ùåY»FóAX¨fõj².,Ý<ÁÃ/o§3C”±> ¥ÏEA¢ HÔ‰fáá…÷ñOxó;Ÿç”+÷cþå1”P&ž?09—¯iªÁðTù¬ºÍgÛnÀ „)±êåµí(‘Žû_ž9RUÂ"Ê‘æÖNÞˆ­Û@»qÿ '³±‚ W^Ù¡mÍö÷ÓÒËzqW£iÃc·lbÉÊcS/˜B ÉzÂNS£¯…¸ÇŽ=Ù´‘|üÎÇá}'±ýÿ¸§Yð,ù¥=¼òÌŸ¿¤ÞàûK¿Ïgþ¿Ï¿\‹°‹Fo: ‹OžúÉ^VW܃Ï3K—NÎgX}ÿQ~ü‡?æbúÓÓ¯×å×§¬®{Í¥[~3ïθ×]ü³xhA’$.nè°yÅf¾wÎ÷æçÎûNÜþï•Ê8Ÿù̟Ͷ<Ô„ºdZÍœ*4šùFBä~=Ç]ýg§^Ìçï½ãž{”‡[±k>¯<ÚZ¥(1ŒÖ"œÜY”Gcs&‰è,”€fÖ–ÉæW$][.CT1Ð&Ò,ê åÑÖ¬éLRïM’nXO3| å?¹‚õë/m½P-r“"ó[BæÁôSÓw/Z¾Ø@d2™É^qµ4зB\@€ù– ð“IŸ=pëðÚ“ú2¾Z£‰S­VëÕpáÿ» ¹°×‡¦é66/¼ÿ…½>Œ®XŽÁò•?›\tÁ81}"/øÝLkÛšÁ 1|÷¯îù+öÞ³·×‡Õ1G–©åßSýàM›vñîwŸA>ÿçõ Jî)%@,ÂW£é9Nx?ºÀO×ã·ÿõrn¼óÂ(—T:¹kv*'@Ý#(Õ:‰º µß<Ê[­Œò6oµ J,1¶åÖÞ/£Ä¶V[ñÇ‹Ô<ÏO¿ ¿úø‹›/ aÉ•.6:›hÑDÓÇó(g¬ª{?[—_“hrA£‰Ñ·BœÏiŸÖô³¡¥CpÍüK«!¦Î;À3ίsøp¬qvØ2óšÙã%ß| üf¯¢sVbð‚wŽbYWÔ%´uýY½>LÍûžð=Ã0ê#HziËš¾æ¸^@+&¼sÙ{Ë “?(çÍï±HE¦[ª÷\È™g¾,|}ôÏŽ’‘*IÍ€ðXpGöÜÝô3§ìP(ºÜ¢F3¿\qî½—pë¶ \×ÅŽ'˜Î¡T %@ vþhÍÇÄä/]¸äï¾Éëüc•~*N*y ¡ŠŒ ¼ÜÄU4 T‰Ä3 3Õhf™eÏžÌÿú¹"¿ôކÌÏ9Ô¤u³R¿M²zÏ0+W®_»®‹¯N«½ê5-è[¸‰}OðKKn‡†œC;·íäéoÿ/àøy9›z©™+>ø9^¹gkøúøóŽ'¯ thˆGNy„§¼±ä«‰IŸÝûÖ{yñ[_ÌÏ'¾×‡©Ñ´åàŠƒ¬Ú¿ûØCÜvêO(‹ºç¢-:húUÍve«¬{è}|ýëÿÜYUùJp+Õ~]¢ülÚî5óÌs­ç™¯~µ>å…²K=¢ ÿ«‹9á-¢Bt¨½¦%}ë·ì²[¸æ¥Ÿžôþ¯¿í×9þÜùá4š™²&¢ÁžË—/^k4Àëæ¸gŸæÅg>4é³Uw­bÓÏoÒ6­b\³†§Ÿþƒú°™kK¢ÃP5}‰Iù£eè·ðÏÿü¾É¹;ãx¨J} ÄfÉÏfÕ^ÛhN3ïÏóê–\\̃+ÈÿÆopöwÿº>¼:NÕNKÐa~š¾á©žbÕ^ÌY[ލ7ʵ_fÀ8ôä]\øº›Ã¾pÓ°Tݿд oCSO½þw¹/81|½ê†U<~æãðÆù?›‰°T¼ scŒ1ÄI’xxa5«x‘ + sµ°ÂmÙØxx“–•}˜ºç4Ð<ño¿ µ ¿M?ÞÄ’s—ôú4šîyöÙ:¢ª]åß.ÿ7Þö¹·õúÈ4šŽ¸{Éí<ó÷òÏÿüþç²Q5G¨O:?€Äóݹ®‹eY†ŠY¶mãyår9LH$(•J†A&“!™LbÛ6ŽãàyÅbÏóÈårT*•pÛ’„Úu]LÓÄ4ÍP”«ó2¬OZmšfè=Éd”Ý?™L†ŸÅÅ%˲(•¢2›r,@]šÛ¶ëÖkµ\:V=Ô²,*• ;vìèõWØ1½€%ù#\þú×7_¨LTá´ˆöòÔôÏžpOýèçøµß~^½Q u5Ý^‘ù&¾ï‡"”P¦i†“6 =b"xö!^xøÌ7EB\üÙF€nƒ5-é[¸'ï?“%[~-|}ê¹§òÚ¼˜ÿœ‡yòT¨P¤È(£ák ‹ll*µr(EŠ¡¨fÇ~ H¨¿‡‹K™2 (àÖ~Ê”ÉÕ~R¤H`ˆ!F!C†DÃCŽ\øz¤öS ÀZÖ†ï­e->> ¤H‘#ÇC$H!S·\ÍM Uû‘ýº¸áqÉ_?\ÎÁ!EªÎƒÐÃ#G.sä…‡áªãÎáÄs_¯üæJÎù§sz}XMW<±â ]õg<ùäá{ϾìYŽ›N Ž,YÂÉ·¬eÆÃJê“p‘ Bo2ß÷ë¼É2™LøY.— ?s]·n9ñö’mĽÖddšfÝà ž#¯X,†"V:¦X,‘P%äóùPTK§Óá:¶m× \qá+.°™¦®oÆ$Á-¾œö´mއÇOÿûÍ<36VwÍŠªž&»Ø¸F3O<ò³TÇN㕯¼@½1JO=6=|s¹\8Aã8Nئv³=Y§Ñ8—Ëáºn§o.— Ûúr¹\׾ǽvS©T8ÁÒn¹øöâ^ÀÍ–“c“‰—n—‹{"—Ëåºç™x,An/î½ÜîØ…G¾?ÌýþÕðuü9¢gšô­GÜSÏfý¥* µ\.cgíЋ¬Ä=Ó¬Zï]„¸qp(TÌ?2ü‘¨ªY~ˆ8O¯ÙöÔ/±ÙÞæ8ãM½õfr^Y²už{íÈ“§ÔB<é Ò¤±°01)R$Ož$Éðo… ÷mœÕë2—ÜýÄ Vœ;e8÷¾sΣO£xöÞS9þ„£àò¤i™ \£éSîÙ» ÏyŒ•ÿ]+øä3ehj¼A¼H@¡PÃÒétè-ИϬeè F3M}ê×XûâNöÚ™Ç|Vbû ¼ýã¡Ð¾ÖfJ¥êBïZ…¹­]»6\®P(t6A]¨v­^¸´ÔÅ Äd2™ð{nôð ÿ‡Í7.§ÑÌ6Õà^±Q¥!Êçóõýc)š£Ñ´`ÞBS'&&عs'GŽaóæÍlÙ²¥ýò§.Å|×ÅöìI ݬ.lš…ð¼ðþö옺µá•?ý)âûßÿ>çßu>Ëíå:O€¦çtkÇüèͬ=ã1úd€´U2p­1hzH·vüÜî‡xÕµ«á˨J’“2ˆO§Óa~Öòù|8އéh43¡[;¾ûÁM\øK 9våeý‹x2uß÷q]7Œ×®]Ëèè(¦iÖ…R7†àÅ‹ƒ@¶ÏIؘ0.PÇ…¥Æ0·ñññðÿVan0½°¹¸0nšfÝëVáÝù ãçÔ*/b»ðF!®ÕrÓÙ^/…¸níxãÆ}d2—GoÔÚâx „Ea èûŽ´À¦™ ºî§Ü~“±6¹Œ)LTºm¦š6Ì›G\.—ãàÁƒ ‡‰*ÛñÜ ŽãC>ÊÑíG;š¹Öhæšnmø´Óöó²—íæ…÷¿£·ÕUs4}A·vüèÞ!N_ò$Oí}ŠmßßUšÔhzH·v¼nÝM¼ãŒó•€ì£þZ“J ¦i¶4šÙ¢[;¾ïŽuüýoŸ©¼,\ÀA Ë ^q±‘‘‘ÐûËuÝðÿF1c||<zòù|˜*îõ)Ÿ ñ|V¶m‡ë4 qqZåáÒ .ÝÚñ _øBeàÀ÷—~?ü,ÞçóyÏR3otkÇV×ðÖ·žK*•âà*/81á~«¬é;æÅ#nïÞ½8p€o¼ˆçváM×g]v<{¿òûo ×ày±ðëmjúéØð† 'pã #ÀÏõú 4šéÙñ“»‚ýöõ¬x`ïý÷÷F³|M˜ŽßpÃU¬?² p³.¾ë“N§U*†Úz^2Í\2;^¿ÌW˜`Á£k`½†Ïý<_Ê|)ôŠ‹e£££áÿí xh4Óa:v (.{oØËg¾ü.|Õ…†Qç ©ÑÌÓµãêmGCM#g¨>ݬj:d^<âvïÞÍððpøzË–-ìÞ½»í:¯ÿô+øÃ/>‹Wë3˜(»¶QE¡æƒ~¨Ü²{÷nvìØÑëÃ`ÇŽS~g ™éØðSO=ÅgßùÙž·¶Ÿˆ~¸§{Étìøê7ìãíw®+3ÏÎ Ú~úó8zÅtì˜2{Ç10êóΧÑ/v¬£?˜Ž¿éð³<ê> %(]\bô-£…7\ý†ºÐ½¹´é~iôqôÓ±ãÕ¯f܇ _9Ü“pÝ?®GÛqwv¼oß><Ïãæ›ÿ›Gú(†k(/åd;[€,vû‰#¶Ñ óâwäÈ6nŒ’ìoذ¡íòwÞy'ŹÿÔû1ö¨ß÷–Ù9ǽoß¾žÖc=ÆÄÄ{÷îíéqìß¿Ÿ]»vqÊ)§ôdÿGeÿþý=z´'ûïÖ†øÎ¬9¸†.¿}Ûöõ丵ýÔÓë{Zìø¹çžëÉþ»µã={ö0^çŽýÅ­EžØöDOŽ[ÛOǃ>ȃ>ȺuëØ¾}û¼ï¿[;¾í¶ÛøÏÝÿÉñŽçœûÎÏû!ýcÇú8"öíÛÇ“O> ÐQ> Ùd:}ã{ßôî:ç^üâðýïŸ;ç£G\Ít;Ø?Ç1ˆýãÊX…ï¿ðû˜Ÿ0á=9lÝ?n ×vüàƒ2>>>0ýã}ûöq×]wqÅ^Áý'ÜÏž {¸ýâÛ¹Ûý=9þ^ÓkûéöïßÏøø8+W®ìhùy+ÖÐ ·Þzk¯A£™1_¼ë‹ü*¿ÚëCÑh¦ÅöíÛaþufV‰WDÔhÝ7Ö,:õÑhú•¿þë¿®{ýóü<¯æÕ½>,Í€1/¡©ÃÃÃu3{÷î¨2Û¶aÍB@Û±f! íX³Ðv¬Yh;Ö,´kzÁ¼ qÀäÒçM¿£mX³Ðv¬Yh;Ö,´kÚŽ5 mÇš^°ôøÀæz'¢(_ýõø¾Ï-·ÜÂ_üÅ_h¥Y30hÖ,´kÚŽ5 mÇš…€¶cÍB@Û±¦,©V«ÕùÚÙÁƒ9pàÃÃÃÚ°5‰¶aÍB@Û±f! íX³Ðv¬Yh;Ö,´kæ“yâ4F£Ñh4F£Ñh4šÅʼäˆÓh4F£Ñh4F£Ñh;ó’#®ß˜˜˜ÀqöìÙÃÁƒßÿ÷ÿw¾ño°qãÆºuZ}6DzeË–Žö5DZsçN\×­»íö5W×BÓ9­lX>ëg;ž«cÐvÍôhfÃÐÿv<ö£íxpéÖŽçâ{ëw;žÏ{Z3=´GèöxpÑv¡ÛãÁE÷#´Ï.‹ñÚôZ3éæJ»Ytq;wîdõêÕ“â{åBMLL°zõj\×dpÍ>›’°O°, Ïóåú9_DZeËnºé¦p{»wï]&[ík¶AÓ=­lúߎçÂ~´&Ó±ã¹øÞúÝŽçóžÖt¶ãzt{<˜h;®G·Çƒ‰î×£íxvYlצ4“~a®´›E'ÄIÂJQ3Ïó¸êª«¸òÊ+Ù²e ®ëòéOP3 ­>›mÚík¶cÆ $ ®¼òJ6nÜÈx×»ÞÕv_óy-4ÍigÃói?í˜OûÑv<˜LÇŽçû{ë;î—{ZÓmÇ“÷¥ÛãÁCÛñä}éöxðÐýãÉûÒv<{,¶kÓïšI¿0“ûiIµZ­öúú‰ƒràÀ†‡‡'¹R¶ûlc:ûšÏk¡éž~·ã¹8mÇ ~ùÞúÁŽûåžÖt¶cÝ/úå{ë;ÖíñàÒ/ß¶ãÁE_›©¯Ãb¼FÓ¹ZˆÓh4F£Ñh4F£ÑhæEY¬A£Ñh4F£Ñh4F£™o´§Ñh4F£Ñh4F£ÑÌZˆÓh4F£Ñh4F£Ñhæ-Äi4F£Ñh4F£Ñh4ó€â4F£Ñh4F£Ñh4šy@ qF£Ñh4F£Ñh4Í< …8F£Ñh4F£Ñh4fÐBœF£Ñh4F£Ñh4F3h!N£Ñh4F£Ñh4F£™´§Ñh4F£Ñh4F£ÑÌZˆÓh4F£Ñh4F£Ñhæ-Äi4F£Ñh4F£Ñh4ó€â4F£Ñh4F£Ñh4šy@ qF£Ñh4F£Ñh4Í< …8F£Ñh4F£Ñh4fÐBœF£Ñh4F£Ñh4F3h!N£Ñh4F£Ñh4F£™´§Ñh4F£Ñh4F£ÑÌ}+Ä%‰^¢Åqr¹ŽãÌh;žç‘Ëåz}:=EÛñà¹\®Îöµ +´6رØÿBFÛñ`Ь-mÇ‚¶ãÞ0Ÿ}cmÇš¹b:v¬ûÇ­Ñv<ØôÚŽûVˆs]·×‡Ð1K–,éõ!̹\×u±m;l¬§Kxž×ëSê)ÚŽ‡‘‘,Ë m_Û°bPìx±Ûp+:±ã\.G¡Pèõ¡Î)ÚŽƒfm1h;´Ï?óÝ7ÖvÜ?h;Öýãvh;lzmÇK?ð| ×A.ħ>õ)<ÏÃ4M …rhŸÝu×]œqÆáºår™/~ñ‹œqƆAÜu×]<ùä“8ŽÃ“O>‰išÓ^®žçá8—]v†ap×]w±jÕ*n¸á:ÄùçŸßÑù·Úg·ï·º¶œ§ïûär9¾ùÍobš&¶mãû>çŸ~Ëó ‚€C‡…BÃ0ÂïÆ÷}¾õ­oñŽw¼#¼VO>ù$@ËuéÚq³ïSÛñäk;ÕyBkÛ’õ›—ëº|ë[ߢT*qþùç‡Û½ì²Ë ˵4;ŽÛ°išm¿ï©˜+;îæZtkÇÓi‹åXËå2žçq×]wÑ']‚Y¡•wÛ§èö»›-;žI[Üê<¦ó~³ë:Wvܪ-~Ç;Þ¡íx†v¬û“¯k?õ«7ûL÷+fß–Å«­ÑŽ/»ì2Ý?îmǃׯèg­¢/<â‚ ‘Hàº.¾ï“J¥ê>O$xž‡ïûŒŒŒÔ)ø©T ÇqÂm8Žƒçy¤R©P±O¥Rá6<Ï ÝH;]®¢¢º®‹çyd2R©TøÅuâ~>22‚ëºá9xž×õûSc'çéº.–eáû~¨ðg³Ù¶ç%Û–ïLöÓˆã8d2 ÃèxAcºvÜ̆»ùÞ´GçÙζڗišø¾_wLÓ¬Ûÿb°aù.ÑŽãß¼î7;îöZtcÇÓi‹åœs¹Åb±§v7Û´³ãnûÓùîfÃŽ§cÃrîƒjÇ´Å ítßx¡õå¼›ƒîẆ-·²ã©ÎK÷ë¿Kmǃׯèk­¢Úäóùj2™ _‹ÅªZ±X¬Ú¶~–ÍfÃ×¥R©jYVøÙØØXÕ0Œj¥R©†Q·N6› _˶;]n*âÛªãããáëø±7£ñüòù|µX,vý~;:=Ïl6[µ,«jšf5™LV è‹Å¶ç%Û–ÏÆÆÆê®‡mÛÕb±Xµ,«nýVë 2Ó±ãV6ÜÍ÷¦í8:þ©ì±Ýye³Ù*Pª†aTÇÆÆ W«ƒmÇñeúÑŽ»½ÝØñtÚâjµ®Ûé5ZÙñtúñï$¾Þ\ÛñtlXÎuPíX¶×ØÇ—Ñv<=;Ö}Šzú­o\­.>;–ÿu¿bîm¹•wr^º}ÚŽ£k1(ýŠ~Ö*–;´×=®ë’L&Ã×¶m‡ÿû¾eYuŸ‰¢êy†aÔ)”AÔ­ÓŽN—ëÛ¶§t!ŽÓx~2;‘Ëåºz¶Î3ÆÆÆBºT*µ=/˲ÂÏLÓIJ¬:Õ]\tãë·Z'þÝÓ±ãv6,ש´×/ÓÊ[—뺔ËeÆÆÆ0MÇqH¥RäóùEeÃr-´+æÂŽ»¹ÝÚq·mq¡PÀ²¬·Ùf´²ãéö)äúvÂlÚq·6ÜìÉŽ[µÅ£££€¶ã™Ú±n‹§wžóÑ7^Œv º_Ñéû³qžÍì8N·=/Ý?®¿ÚŽƒÔ¯˜j^Úq_„¦vcqã• cÛvø[©Tz}:3¢UÒÀnߟ¦iÖ¹[–5ézwzñmŽ´M<;ýôÓµã…fÃr~½²ãfÛž y¸Šý§Óéðx“ ƒ¶ãÆó$;n·N3;v]—B¡À’%KÂD¾K–,˜äÃíèÔŽzŸBÎqPì¸][ ÚŽ[±Ðíx1ôµ7¿N ÉŽåüzaËÓµcÝ?ŽÐv\~ƒÒ¯˜j^Úq_q–eQ.—Ã×ñ’ʦiÖ}qñ’mÛ¡2) h&“éõétEãù Êår×ïÏ’¸S ­Un–F$<þ¿¨ÅÒh‹E …¾ïO¹Î 2;^6ÜìüziÇÓ±-É߆Øîb²aÐv<ÈvÜm[\©T¨V«á/@µZ]Ðv¼ÐûÍÎqì¸][,Ÿƒ¶ã…nÇýdÃóÕ7^Œv º_Ñéû3eºv¬ûÇÚŽ{oÇ0?ýãéî§[ú"45NS.—Á0ŒºN—(ïñÒÉ‚õÐÐP˜L2ŸÏÏûñ‹Ëét¾œÆs—‡±T éôýÙÀ4M’É$###]]OÃ0H$acÝl˲H§Óär9ÒétGë Ó±ã~±aX8v<ÛÊf³¸®[w<ë-†Á¶c±áéî{Ðí¸Û¶¸T*ÍíÒCZÙñBïS4;÷A²ãNÚbÐv¼Ðí¸Ÿlx¾úƋюå3ݯ˜{[ž®ëþq„¶ãÞÛ±œË\÷çËŽ—TeÚ¥ˆ«ÇB¡2 Qo¼¢¨”ñXÞùÄ÷}|ߟ‘JÚJiíöýÙ<ŸN®§¸Ò—J¥0¾zª–é¬3HLÇŽ{mð0ìx¦¶Õé÷°ÐmXÎËŽgÆãç1Hv¼lr:4ÚñbéSÄÏcì¸ñXzùLì'«÷Ú†›Ï\õº_Ñ{[îÆŽ›îk;´~E?÷©ûÂ#Nhö%IÉ[1dÇqš*ñs…ïûu®§äóù)¿œN¶Ñê:y¿“íwÃtŒÍ0Œ®o²é¬3LÇŽçÒ†aævÜ©õ‹O×¶ºýª Ã`ÛñLÖowƒ`Ç Ù&§CãµX,}Švç1Õû³Ý§˜ŽMÎu[2h,V;î—¶æ¯o¼ÑýŠÉÌ·-OWPÐýãmÇ“Y¨ýã¹¶ã¾òˆk…çya\±¸xjúß÷q]7¬º3Wë,´Ï=óe[‹Õ†AÛñ| ÛÕ¹EÛðü mrnÑvÜ¿hÛïmË ‡ÅlûÚŽçžùêÏ—Ϫ·sçN<Ȇ غukøþÄÄ;wîäÈ‘#lÞ¼™-[¶ÌéIi43AÛ±f!ÐÌŽµ k ÝkÚŽ5 ݯÐ,´kú…¥øÀ>0Ú±c{÷îŶmn¾ùæÿŸ½÷³£ªïÿŸ "‰ Á f³ 5AD©v®´Z…FïÕϧ(Xõ^ÍÇXíï­ø£­Ú{k­Jjìíç þî›ú£ÔÊNkµbIÜATdØ!kÂw`„`¿œyÏÌýµ{w³»÷îæÎŒ122ž={رc(ÎS™k~èCâYÏzV·¯ ·ß~;gŸ}vWë°ÿ~öïßϳŸýì®ÖãW¿úÇ<Ç|Wëqûí·³}ûöy){®Ûñ¶mÛtû‰è¥öÓíßàþûïç£ý輔ݮϦ û¾Ï×¾ö5Ý~"z¥ýôB=öïßO__—_~ùœ—=×Ïbélw»ýôJ;Öõh®ÇÞð†9á£ûƺ ]…îϦƒî§é•ç`¯´ãÅÒ?þ÷ÿwvîÜɹçžÛíKÖôJûéöíÛǪU«øà?8í¶s"ÄíÚµ‹ÆÆÆØ³gëׯçŠ+®¨['lÞ¼™«®ºjÊòî¹ç~÷w·‹—Pqã7²iÓ¦®Öá¶Ûnã¶Ûnëz=¾þõ¯Ó××ÇÆ»Zo¼qÞÊžëv<>>ÞõßM·Ÿzzážâ—ý|Ю_sÍ53nÃ]¿fºýô^=n»í6ÆÆÆæ¥ì¹~‹ J·ÛO¯´c]æzȳn.Ñ}c]…®Ç|1—ý Ðýã4½òì•v¼XúÇ÷ÜsOO´ã^¡WÚO/044Ä­·ÞÚѶsf766Æ»Þõ.ؽ{7Û¶mcË–-ìß¿Ÿõë×ÇÛöõõM[Þøø8×^{-Û¶m«»1š×¿þõ]÷_¿~=7nìz=¤.ü~óÁÈÈÛ·ogïÞ½óvŒ¹nÇ{÷îåÚk¯×—ÊtèöSO·ïiiÇãããóvŒvíx6møÁä–[náá‡îj;Ý~z©;wîäúë¯gíÚµóRþ|<‹þóŸ311Á¶mÛºvÝ 7Ú±®GÂÖ­[¹÷Þ{yÑ‹^4çeÏWߨ¶íºXs M·Ÿ?ºõt³<›v ºܪ.Ý~w»ïܹÏóMÿxùòåLLLÄeét»ýô Û·oç§?ý)+W®ìhû9â@YNˆ©çÈÈ[·nuGá´ÓNëú èjGGèëëëúÃèúÍ500ÀŽ;غuë¼g.Ûñƻގuû©§Û÷ôbkǧŸ~:¯zÕ«ºÞÉÐí§·ê±eËúúúؽ{÷¼c.ŸÅgŸ}vO_îöñu=šÙ±cÛ·oçôÓOŸ—òußX×c¾Ylý Ðýã4½ð„î·ã-[¶°eË–EÓŽÏ=÷\8Ðõþq¯ÐíöÓ+lÛ¶M›6uÜ?^>íëë«S’b3ÿFFFâu###¬Y³¦Û×I£iB·cÍR ];ÖmX³XÐÏbÍR@·cÍRA÷+4KÝŽ5½Æœq›7ofÏž=qc×®]ñ,ƒ4nYçyÞ´A<5šn Û±f)Юë6¬Y,èg±f) Û±f© ûš¥€nÇš^cN\SûúúÈd2\vÙe¬_¿ž={öðž÷¼'^wùå—sÙe—±yóf<Ïã‹_üb·Ï[£iB·cÍR ];ÖmX³XÐÏbÍR@·cÍRA÷+4KÝŽ5½ÆœÅˆ“´{öìa`` Î¤óŠ+® “ɰg϶mÛ¦Í=5=‹nÇš¥@»v¬Û°f± ŸÅš¥€nÇš¥‚îWh–ºkz‰9â`êÀ—½S£™ÝŽ5KvmU·aÍbA?‹5KÝŽ5KݯÐ,t;Öô s#N£Ñh4F£Ñh4F£Yl„Ó¬æøxZˆÓh4F£Ñh4F£Ñ,z|*§P×…µ~ 8@(¥Ö…À`Ã2ßo.slåJ[¹²£:j!N£Ñh4F£Ñh4FÓS8@%Ž9(íWQßE<‹·wàKðç!œš³*P‰D3ÏSZ %¼…@¨ýŸ[‚U!üЇg–Ôq¼¨Ì rð:þ±«×Âà úˆÊ8ÁWõ+ŸqŽ=¶£óÒBœF£Ñh4F£Ñh4æ°ñI,Î<”X&VfF Nè‡ß.)+³gäà¸~¨¸j{Éäﻬ-Á+Ufø^ þÆTe›$‚(±í¥eøÃ2ÜZƒ-ÀgRÖk…Š*'¸(áîž ¼Ôƒ^h€•û‹ð ¼‡ü0Ôjª¼¿õabj!l¿í6žùðÃ]#-Äi4F£Ñh4F£Ñh¦$^ïÃ'#·Ï´æ–—À*)Q­d¿ Á/À%!‡jÀ)>T‡à¯<%ÖýÐ|FQà v:`{0\W†wgÕþÅè¸â…Z,–:¦iB¥'åU½mÂ,”QBœ|ǃ½6|8–¥,æln-ÁöAx« —YŽÃ5UuüÈÇQǺ¼¨>_yåY_G-Äi4F£Ñh4F£Ñ¤ã¥¥ã¡‰¸U‰¾ÿy/Ì(Aí#987€ÿå(AK,ßÎòàš,üv7–”p…oËPbÖLø÷¼:°i›M¸» ð¬rõ<õJ\«Õ PPßMJ¥d}eÝ`Z‘ ˜:'YïVVYÑü(„Û?3T™¦©„8Ë‚÷äa`~–UbŸ`Då9(7Öï»ð‰¢ªÏñÇ?Ùñ5×BœF£™1‘)/Ƀ7@= ç:£ŒF£Ñh4F£ÑhZ3ÝøË!±óÖ…(K´Jôùy%¸8råÌEËDäúOþ" wÃ+ ¸gþÔTû^Q€†ðÕ²²(ûJîu•pP­B¹œªs$¶Y‘bf ,Õ CmkÛj}1RÁLFG“ýM³Yˆûó ¼ÝWßÓd£ó+ ,á‚28¾/³Õ2Pqßl;)ߎö5Z”ç¢ »ÔRu.áÒKïïø7[1ç­@£Ñ,IÔìƒzYÑ÷õ`õʡ̄Í”›°i øYêg24F£Ñh4æHÇA‰G2.ƒÄjËE§ªÑ¿AjPã«j¬µ&€ Îr €kÂ3|x0TbX8e-üѸÉbÅ øÿªJв,eõ¶Ê„o—àÒj²ˆiùËìI+BÔË@,ê" ãXؓϹh›Âe‰ˆ'&Ô²íZsêRô=ý¢Òh4F£Ñh4šÅF5FêGo†£¿Jô] "ÄŠÍA‰ts­B¦燰=? ”›è ~éÀwª‰8VVçÕþž§„*Á0Ѫ |µØ,ª‰XÖ ËRVd¶Í¬xn¥?>wßÍU<^tùd›‘£Ö/7ÞÿÊù3e=R.§DCßg*†ID8r¹×_ q¦ ÈúQ¦Ê&êa3‰š]i´vë5%ÄÙ¨‡Ž$^€üePbžOò‚(GeäQú,‰ qz»Võ”ÔÒv´­ÌI5䓸Ζhv£õi6ÕÖh4F£Ñh4šnà¡ÆNbdP@_Ä(b5Î1¢Ïò=K’ñÓ¥ÞU³RÓxo®‹v;Êð'Yê„)˪L×m¯MY´ödÊç“xnõ'æaY­-å¦$] »æ;TŸ|Æ~K‰a•Šò9 R£<ÇeË`p°^´muR ‚Ϲ©§ç©ý¦ª‡”åºÍ2S´§ÑaLCÀ%I-=„š5¦>@e;†SŸ¨,±z·V %º‰5ÝT˜(q¬H³@æ¢^,~ê_ %ÆUPBœyâÛï’¤¡q/ýIÌ»ôq´0§Ñh4F£Ñh‘€*$Ölj¼3­³¨]fFß+\»xyg•ÀMiJ®«,ÑÄ"MÈfë¿7bYõÖkaâOc5~ý{ØÏ¹#YxË[(¥Õ¹LF‰aË–Õ‹hiJ¥z1Í4)¿éfìoý! © +u/]F¨õ££õ'ÍF&u¥ä{zè/§Tâîÿ˜P,ë$[„â4Mš´ˆRo Ö qݬãtã­Vtü"JàK7±–ë;ªƒÈì'qyÍ¥>Ñ¿òr2£?±“Ù™$½5Ô»ÁŠ{+$"XÈÉ1Ä¢N£Ñh4F£Ñhæƒõc65Α±T#iã,ÇQŸGGáñ ¬,ÁWЉ!™ã(ÍÈq*A{óŒ bÁéÙÏ~ …ØvP'ž9ŽC&“©Ôâ:E¬T(>ãÓØûÛP©ðèk^™ ÿ´aŸüä'“ŠE|Îsø‹ /LR£¦ñ¼DP‹,ׂ HEÓLTÄ|ž»úûyá _˜ìùк®Ÿ·ãy<úýï«òÊeÞô¦7ñ¾¯RQDzmÈåðÅÁƒ×qË-¥R‰ÿó;¿Ã¿ýýß3::J.—Ã÷},Ïã©¿ú+Žzøa~ü±‘ûÑV>Sÿôþ÷ó¢Ý»)ޱµZ壅¿ýÛ¿ã8|ùË_æþûïçàÁƒŒóg]Ä1ŽSï·êyJA´m(®VyÇ?ü?þñ[^—}èCÜ|óÍ\ûÉO’}ì1¶;Û·ogllŒZ­Ÿçï¼ãË–ñ÷¥·Þz+[¶lQje$ò=øàƒ|üÿüœ»îâ Ï>0 ²Ù,¾ïó­ï~—CË–1¹w/¯ôEþ¤“0"aêÁ·¿&&øÚÚµ¼1—ãí¦ÉÛ¿úU@YÎ…gžIù«_ebÿ~¾öµ¯±~ýzþîïþŽþþ~òù<ÿò/ÿ‚ad2|ä#,»ä¶Ýtÿðÿ *ïyÇ+7làa`÷ƒrÝÉ'³üK_â†ã8Ø‘{ë 7Ü@öª«ø«¿ú+®¾ôRùÎw¾£…8f©"A7%S© ZiÑ«Õ,Šd5•@žO`*±l¶(Ñj*f’('‹¾ì†ýÚ•‘6ßN»¨š©}$³jZ„K#Öp"ÔA"ê5Æ•Óh4F£Ñh4šN‘¤s9ÔØmº1Y©¤¬Ü2eéfu0ˆ+ <üð‹yôÑïÇ‚SµZÅ÷}òm2&ضçyÔ,‹÷>}'gŸ}2¦i244Äg?›ÇúSUÏcí=÷ðè;ßÉg¾ðÞèºå2¡ïóÙÏ~–¡¡!²Ù,ïz×»°m;ßLÓdtt3¥Z–Å—o¼‘×½àÿ•¯À?üƒŠEW.sI$˜ýõ~ÄCýý¼tÕ*.¾øbÎ;W*lºðB–¿âÍ*ϪJ…¿ÿû¿gÏž=A@>Ÿ'ŸÏ†!žçñ¾÷½¯ãß] qÍ"Ã%qAõ™^èjDâ®É¿ó…Ôk*·T1ÞM¹3A² Abí–^ç“›i =å$î\˜Ú^£Ñh4F£Ñh‡´ yãûq³V¡Òš‚€ÁÁA†‡‡1MÏó‚Ë2xå+/¬³~³Ú¨x¿÷½üññÇóî—¼‡üëžÉ>nüÍ«9óU¯"8åΊŽU«)+¹¿úÎw0Ï?Ÿ7?ãø×^ËAÇaùòå”Ëe²©dµZ->®i6?ó™Ï0ùªWñËë®ãõ¯=?üáyÝë^Ç{ßû^¶lÙÂ¥—^ʲeËøà?H¸®K­VÃu]²Ù,™L&.ÿÂO} L“L.GµZ墋.âÀ¼óïÄ0Œ&Ò4M^üÎwò¡Ï#(~å+jßlÓ4c·Ôl6›$m€Ø2ðË_þr[wÙF´§Ñ,r$™@ó$ÉfÃl÷› ${ð™™EÜl±H^ní& Á-H-3H8‰-'ï=±Œ+EËçêP£Ñh4F£Ñ,M|’xØ­p]e*ÁP§U¢Î0 Éåräóy*• ÕjÇq(‹±Kesüz³º àñ/|ÕÇÃÙÇÇÝ_ûŸ|á ÃÃuyúýïç·¾5øŠÅbleýýý‹E²Ù,ŽãÔ‰pªÞíMALÓäE/zŸýÿà‘ÀW.—ë³cŽ9˲bëºË/¿œ0 ãr«Õj"’Y¹H„ÑïÜsÏåî»ïny= ÃàÍo~3o~ó›ãeCCI¢|>O?†a´1/½ôR~ô£uô»k!N£éaÒn˜nô7ĈW ÁádfÒ™QgKÚb.ON\vëAæ >«•ªGÚU£Ñh4F£ÑhÏSyZÅÊö}•tAb¿•S±†††êc¿…aˆëºxžG†±»d±X$“ÉP(ðYtRiPÕg 8'™ãpÍ)§ðÞnàá»ïæ ïxÃÃÆÁC™ '>ü0þ0F³_“išT«U²Ù,†aÄ]§HÒˆÇ{ Ó4oÚFÊ6MÓ4Y¹r%žçÅB\£•]¹\®[öÌg>“oûÛìØ±£m=DÌl%Öe³Y*• ££Í~Z7ndbb¢£sÕBœFÓ£8$–WJ ’ §K…¹«f“`¢ñ¸"¢‰Ë®ºþVôÝj8NZŒ#ÚÞö× 4F£Ñh4M;¶–à=5xÔlH %bˆ’ƒbšPL¹2™fbuV.—q]Û¶)‹MZår™ ZZ }ó›<ëÛßæè£Nü\SÛ½÷Ö[8Á0NÒFœxÆòyªFûX»˜s`¾¯‚µs›-§”ÉF·×V4 sÇ<«V­jé+ˆ€(VésÊf³¸®;åþ …¸Y†!aöÅ×hÚQ@ A5”'qÍæ3¦ÛbE„²™ÐîÕ!wt™ÄEÕ"DÓÇK'i[|ÎМÝ5ƒè4F£Ñh4š#•Ïø0aÂ×\8¿Ø<¾+—•×ß_/ÂU*•8æ[¹\Ž-´¾šbÀ¥”½¾½{Áq”ògÛ`Û”J%%rEB—ã8„aH±XÔh¦Vn3AêAGÛO'ÂjÐæûÿ¾óïäÌ3ÏT–åÖ»H=Ó4Y÷غ$£FTÇ´89[´7\×…ll˜Í\à“ÑgÉè©ãÍ ­®c+KÃ|›ýD¬k´Šk…ƒú-Ël«Ñh4F£Ñh–6ÿä‹,ø…ÿÕ %Fb˜–vC ÃÇq0 cf.ŸâçZ,*Ø”;e`Û˜¦‰ïû±›g¡P ‚Ž,Íæ‹ æÖàɣΠ#(—Ñléâ¢Ü›"Î4Me™QBYXDÛm,OøÑ œö‹Ó:ªÖò¸–=Md2<Ïk»ã88ŽC©TÂuÝØÔóÏ#Rˆ+ 8Žƒçy¸®K†uBœ¸ž‚ê*• •J˲(‹±J\­VãÆ«ÑÌêþO[ci¶ÎÕŽ¥í9ŽÓ´¾T*ulÖ«™±˜³I8ˆ8'Bœ¼$^\úõ!Ïxýkh4F£Ñh4G.¯‹b¿‰Mm+K¸V†`¢UÌØ/”\µ ÃÔ‚ ¶~³m›¡¡¡Xßd]OQ¢Y0“ Ü¢™Cë—×°­à5º¦Êà.]†qFÃr7ZWH­¾m5'u–¬áˆâ|ß§T*Q((•Jض]'ÆU*r¹™L†L&´m›l6gȵ6N»Hö“þþ~-²,A*Ôgß$ú,ÏÏóâ¬8"A@¡Pˆ×W*J®÷}×­:…aH©T¢¿¿¿Û§Zw~ I:Cj§4¾]YÓÏëÆ§¾K5F£Ñh4M¬Ž†g¦©Ä7×M\R ÐÂ0$“ÉÏç§·N+”²W*%–pb=¹cŠ¥›išuÆG½„eYÓ‹I–½4.õ¼A‰nf‹åéõB 5ÈóÖ‡ÑñÒ¶/"Ð¥ªz쯎åág>ÜÑyQBœXÀ‰ÏñÐÐÅb‘r¹Œã8är¹¸1zž‡eY ‘Ífëü¤Ó7øUw‚l'>ÞíÃ0S‚  T*Q*•ên˲èïï×VOK ‰'Öv½çQ(X»vmœ:YÜ£E¤«T*qúj±ü”¶íû~ìwŸÉd¨T*K¢ýÌdÞ¦ýÍ%í¢x¨÷„Gûg¿F£Ñh4F£YÚ”Jàøpo*²U¹\—¬¨7¬—Ôi3‘†¡²~s]p]¾qÌ1aXgb'Ù>eìØÊh£(—ËõBœOó@* qWJÓj[P‚Yã%”} Ô[R4 q%!N.—G2L ,öãŸç•¬Áq\×¥Z­R«Õ€$ý­aضM?†a044T×Ú+4M³IÈ›§Ñ;“ÉP«ÕbqMTi×u1 #Û,ËÂó<|ßÇ÷}²Ùlì"kYV\ç0 ) d2†‡‡çÅÛ÷ý¶éƒ5sG£;c;${eYäóy2™ –eÅ<³Ù,™LÓ4)‹är9@=Èí((§|ñ®£|ÒÍ–i¢žÕ VÉuu“÷Áâ—;5F£Ñh4ÍlxÌ€/z­ÝO…R©„ïû„aˆiš¸®Û:Cg&µZbB纉o«ãð÷_úåsÏV¹ø¾O¹\Ž5€ÞsAŠ%‚¥%qQ.¤Ùµ @$™ôòô©gIz‹UŒXÁ‰´b¦ö.]^Em{°ï`ǧtD q¾ïÇBE#˜°T*Å‚E'†[º‰µ‘¸¹®]»6ÛDü( ±9èàà ÃÃñ“+¾ïS,ñ}? }ßgpp0á䨵Z-´(f«s%¨8ŽƒmÛ 244gSÑÌZ' ð}ŸB¡€mÛø¾ÏèèhàjFlÕé8“““±«ªmÛq›÷}?n×bò[«Õbwçnq‹¹5¥'aä<|’‰’´»jÚfV[Æi4F£Ñh4Gß Àq•v¶}ŠÁ€çy ƒƒƒ‹ÅfCßO²¡z 'fu†ù÷$^P9Z6Ô°ºÌ´˜—>v6µ½Ù°Ü€ûºÆ:»G„'í;É42Mq5M“l6K¡P`xx8ÏÄ}U„ØlÛ&—ËÅn†Ó!Öq­”Ãù|žþþþø&n%~tr½Â0¤V«Å›ÉŠ $>ë¦i¶ðw¤ãÒ>6œ´Ý ZfèM'©V«@½ˆ+ i7jqÅ–Ï@Åœ|o, ”H;U[, T«ÕX„¶m;މX.—[¶Gg”Ž;¢D8¨ôàëN£Ñh4F£Ñ,0ð‚Ö¡4´FÒÞxi½¢­åšï«”«ù|KN’QJ¹¶mw7ÌT+!­13©`£Æéíå2¤-ÚÅ1—zñ¬•%ÂÑ>rý/›‡²ˆk”‹ÚÙge€ÑÔwCÕãஃG¶W*•Èf³X–¦¯Õj7À™ŠqCCCô÷÷S,c!NŽ%>­,}D8in¶äóù:DÄ™Nð}ŸJ¥?ªÕjìújYV,ÎxžÇððpìs.1ÉÄ2K33¦Š'&Ê"„NÅTmIî‡é–»®‹ëº±è*¿§¸K»®‹ã8M¿³´Ù?-¸QºlÛ<ÏÃ4Mî^¾œŸüä'<}—mæúž˜É&;“{¥>Ê2:öTÞ3ZœÓh4F£ÑhŽ,|àØDbHkgâAÔ‘1B©¤²;‹Ífu$ã-ɰ*ãÇ®Çz/¡®´Ä"1~¼Ôr‰ËÖ(VеJÚ¢-mÁ’r$™Tã¥cĉ°ÖhS’ê™–lÜhYã°4dN2ÿM›5ubbâð²ÀHvTH ÌgL[IܶN™Á¡X,R«ÕCtŠ(èi÷Ø´ÅTT«ÕØâ¯\.344g••xw®ë288Hô÷÷·t÷±çH'ýüiÄqœ¸}‰Ð9×äóùºvؘYX’¸®‹dòû¦]—å·—ö1::§Æ‚€ñññXÜËår”J%¾÷½ïñÜç>7ÎNœÉdÈårñ‹äpÉd2 NYV£ ùtLuO‰U4(áM,¤ÍÔ¿ºÅk4F£Ñh4G>°*Øv½†V(¦öÚs](”œN´ŠG[c8 ®‹pÇOkº(&õ¢[H2Pje-—ÎZš^ŸÖÝʵ”†mÛ ÆÆ[Ϥy nгnÄq»víbçÎ\qÅ 022ÂÖ­[™˜˜ ¯¯J¥ÂÀÀÀáq÷O âʺ&-^-´5ÏT ‘Éd(•J¹¶sá•øaí„3¹ÉEL×G‰K&ñëd¹XOµõ}?‚˜Nˆaw¾\€Et Êt’PBˆO†aP­V1M3vö}ÇqšSM£ÚÅÚµk›„>ɼaÃ6nܨžq)³i±*M[äÍ„tZî|>ëºqÙ.·•J%N.ß%FC+¤-çóù:K×´µs&‡äyŸNÈ£Ñh4F£Ñh–>Aô?ZȸJÆGSŽ…]W‰pa˜€26¨Õj±öáyù|žr¹ûÇi›¨rA©ý Ë$n[ÚH¬ WS‰%W%±rpQƒ¬´aã)6fÌ˦þŒþídX=ó¨I³`llŒ­[·²~ýzúúú€$±À7¿ùM¶lÙÒQ|5PtÛ·ooZvÍ5×°}ûvvíÚ5o'S(Èd2±;¦Xf-„Ð#ñ{‘r¹[3MÅT¦«+n: 6±Œ«Õj”ËeªÕjlkšfOT{Ïó⬒é³XÈvÜNˆó…F3Wèv¬Y*è~…f)°íXÂ¥o$~ý´Öp ÜP«ÕºÄ AÔy­s5†Äjôzê*1áÄm(-†ÑviË7Y.Ø(®qØ™§^Œó£íü†ýó¨Øn=Þ~9[ÂmÛ¶5kÖ066ÆØØÛ¶m£¯¯+®¸€‘‘‘i ܾ};×\sMݲR©ÄØØ”J¥Y ¬;A²…ŠˆX­V[§üf“a¡1b:!.(²q NÌ0éknÛv,°ˆHW,ãïé¶0_íb6,T;QφV-'«o¡˜J¸6 ƒÑÑѸ=†gÓê!Ÿ%†`6›% CÞ{ð SÉ×bU9“k-\Z4!Xê+ÉF ÞjÖu]ŠÅblE—NVâ8N,ŽÊ3¦T*ÅåüÛ—¾ÄyO=ÕúÈùÐö`>ilÇ õ,ÖhæŠnö)4š¹B·cÍRA÷+4K…lÇâ9žr{Íb||¼½Á X–²„® .'c+ÇqbO@aAâµ;(«¶4^‹e*±AÚuê…° U¦d#K4›æxpÍ®§"â ÇëAV€Ø.¿üòxáÐЬY³&^¶~ýúiãÅyž×´ÍÈÈ{öìaÇŽ€RœwîÜ9çVjbÉÓXn¯fó‰À`j×Ĺb:‘PENuãŠÅÐá"¿‰eY †††bÁ¥•EÖB³íØ£9Iƒ¸z†a¸ n»bA:“Ì¥rÏuzŸ5n÷ŒC‡Ú&1(—Ëäóùºøt¼XDtANÃb±'cÃÁÁA€¦L±"‹…¦d«­T* ÅíÓ¶íØ…6—PmÚ™í¦`*·R‰Û5ÙlvÎ…ÍtŒ8˲º>“¶Ðí¸Õ3Df6æÃ”¸]BÓ4c¡k&Bh+ñ»S¦ŠÃ&ë%¦ ã8ÓZt AÄbšaMŒA ÓC!Ýþär=$i„ã8‹Åº2²Ù,ÃÃñõ\¥Rá‚ .ˆEÔ0 )Ò:Vè§Ÿ>«kÖ)­ÚñB=‹5š¹ Û} f.ÐíX³TÐý ÍR íxw…þDWë(Q¡çµLÈ H\|Ó4Æ+¯Bsò„ Í™Gź¨Õp¾q¹DøWÔ°Å1Di¾–©·~kGfjÁ®‹,`çÎxžÿ¥È;wîdÿþýMâ\šR©Ä•W^YgE°ÿ~Ö¯_—tSqï½÷²uëÖŽ\a娒]r!ñIÚ“O"¢ehn/a´ÞAµ‰¡ÔŸÄ!,£Úh6Ún¦’”G{O,„Zîçy8Ž£ÜSÇM–o%f4{6¤-œ²Ùlì3ßh,ÉCn»í¶Y©3æ²ßvÛmlݺuÊmÒÖ¹\.ŽM6::º0fÅâ>ÁAz}­a_¡LëXM>Jœk<†I{‹¹.²}ûöõWlÙ²…±±±x.™S.»ì2ÆÆÆšfóÒ\sÍ5 L)ÔÍ„ÓN;-6íß÷;v[ OU൩ÔÁÉoŸŽç'Ãv—ä÷w£íó©å£(‹¶"‰{r•Ü#D Ævjßjª>éx„iû§*JÔ7h9f:+o˜ÚÇ#±KÌJª|±ðiÅ¿Þp/ùú×Á4)Dåº$Y_~î¹Üñ¶·Å F”°\K»B½{­Ä>› ’%3“ÉÏçcsÛ|>ÏÀÀ;vì˜×÷\·ã7NÙŽëÚïÇÔ™¸†vE4ëA€IDAT›Ã±œJÀó}¿.í¶dníqoN‹„í’^L%<Ê=Óé,“Ar€²lWXáÚk¯åUå2·Îc‡y.Ûñ駟Ϋ^õ*¶mÛ6oõÕ,>¶lÙB__»wïž—òçúY|öÙg³iÓ¦9+O³tرcÛ·oçôy°RîvßXsd°ûÇš#-[¶°eË–EÓŽ;íûÀ›ò‰·%L#ÄI¨ €)ÆI2¾ØÕóNc:H¬×2(«³ô²Æ‹`¦Ê%X-¶k<•JdX$lÛ¶M›6uÜ?^žÞQ“§Õ¶mÛøÆ7¾1¥r»{÷n®¹æ,ËŠ-§,ËŠM=Ý^g±¢šªŽ>êwì®vá ¦ú>HâZAµ¥`…jÙ Jàò¢¿"ª­‰ÈeEûfI’uXÑg‡ÄšRD3ešš6îI‹ZA Ö»%µm‰D½Û‹êY–E߃¨üjÃùt¢‘ÿçòåìÙ¸1¶ä"É<|gËÖÞtFtywî‰å\¬Î¹ŸÖ÷ât Ç–YCCCq ü…`¡Û±K"^J&Îùá:îÜlH_{¡Z­Æ–’Ó‘N !çÚi¢‘ô1Ó/¶N­ñd?Ù>-Šåé|Ó®¯X±bÞŸÅÍ\Ðí>…F3èv¬Y*è~…f)ÐvìQ¯_…aˆïûíÃY–JÒÐÆ-Uâ[ ¶mÏh.¿á_A†IIì·5¸ ¨—¾í†r.ÍbŸdZínøøycÅtt¢7ÎjˆeÀØØ###LLL°fÍšÎÌ1g€p­ü­=ö?·á‹>„><5”OJT«¡~Û Pòà& Þ_€> >QnoÕ%Vgílð,T;“vã’,ÅóÔ=6:Z¿}¡ Ö‰a_™ÄZ.•õ7ŒÛpQÿÇH,î$¢:À3+€Ïݰ/~q„ /ãwÏéÃ÷aÌ€—!÷>ëY¼ûè£ 4À2TYðâ»ïfùW¿ o|c\W‘äÞ—Vƒzk:3úói+«Ã0bDâƒU*•Ž…”Ãa¡Û±KʪÐóÄn&‚R·h7³cš&ŽãLi…çy^ÜNÒ/¹FË´Vض]w}fÛÞLÓÄuÝØ¢NÊñ<0 y衇æõúµkÇcccÔjµy{k4sE7ûÍ\¡Û±f© ûš¥@7Úq|7ùhˆçºîÔe›¦òcmC­VÃu]|ߟŸ„”aô×8Ê¢¬oRÚE,’¤"ˆcÒÞ( q¯KS¤Ùê-ê–±—Nã;00À•W^Y¸p¶ôõõqùå—sÙe—±yóf<Ïã‹_ü✀$ihÕK%¸¸ ?€¬³auöû°®ÅHh«Tà)N¬À¯«Q0E_\Õ=‘ÖÂ>ÂES4Š¡ ‡ B‘ßÊ)TA þ ÈfÕ±²YUŸjU­³L0%Ð9Lpwd$fUUÝÄfìù¦Z†ªœã=ø¼ ÇVÎäÀƒ;øý'V³¢«< àŒKñì Ïç×Âÿ«Â×JÐ_„G=°ŠªÞ£òÈ#@’ùE6qc•ä%²L.Mu_‰X(.¶"™UÃ0ìjÜùjÇ&‰ÕÔ‚˜/r¦ pÚ.>\'ضÝ/N2¦Î” êÜ^Å.ŸÏ³oß¾yOÖЊù~k4 nÇš¥€nÇš¥€nǨï{K¬ùN&À{…ùjDZÇXÊÂuÝé¯Md‘#ã†4b”0'Vp­,Íš“0X$ƒx?õYÈ’X¹ 7,wiζZ¦µ×*Fôl¢…¹`¨l!;wîäÊ+¯¤¯¯/åfëØhusÅWÉdسgÛ¶m›S³åV>W:ðWl«Âƒü9`¦ÚL (”0æº04g*.âð0ôGÙM\W [ù¼¶\W}ö}¸hŠûhY8°Ê…QÀ7×TÇ‘*:Þè¨ú,:S¹¬D·R j5ÈåÔv¶­ÄÃM9wõ÷´¯Ú}G¥¢ênšðã!ð øÇ<Œì;‰}ìâoí—ó¯;È}ûFîúôEœZXËÛËêxr®ÐÚ5ýPn¬rODp¹7+ÔÇ’óRÛ‰;f§R‰eY‹ÅX ¹ñÆç¬íÌ´mÍe;öHž+ 5ƒ¸Ø…¾jµJ¡Ph»^D³¹l%‘$k˜-®ëâ8CCCA€óýïspÆ»név<ŸÏbf¾XÈ>…F3_èv¬Y*è~Å‘E±á‹ã8±;d¡Pˆã[û¾‹C²}ãÄxS»e.0óÝŽe<ìG¾R©-*’Ä„k"§R©`ù|~î S (Q,-tµŠ'¤4ì'Áð³ ey Ë`æÖm‹'túŒYÊgÚ¶m¶lÙÀš5k¸ì²Ëæô@}}}gÓéß÷[Ƽú„ ÿ’…?6”X$zYúw/—•À†ê³iÖ OÙ¬¦òyej[ÛN„*Yê¾q]µa(AìÆ ›…WØð¹<­äØ4Þ“r );½Ï†ÈÐ&‹:Þ>6áwòª©»ìc,s%ls¹lûËy̹–SîøŽ}7ß³‚ça€$AK>¯ŽÉ¨rN:i×g`Ü€ï¹pS6D××ðàû!xYÔM]M’Yˆ«M’øD¬ä:‘žÄpp0 Ù³gÏœ¶¡™2WíX¬K¥¾ïÏ8[él˜÷óÌt–nŽã†áœŸ§eY'ŠHcžçÅùû÷¼…u (Ä52Ïbf¡ÑíX³ÐíX³Ðíxi!}^IX&ýk1p‡l6K†ŒFq–‚ Àu]ŠÅ"•J…ÁÁALÓ¤X,’Ífq]7ŽóÜ+B\#sÝŽ=à³óIR¾áááæ ]WYàF]ܪôuê$!å¬h294 g%Ô€¿HbuÓ(ØI–È€$•XÊEÚ@Ýsrë)b×Ô´ò;.© A.—«âT’€“B¸Ô€/O±¯i*k³vcv±JWí r­Õê·+T–¥mÃPŸ ~'²° ¯Ø™ð”F,Xóyu¼ô3MÎM==¸·Ý[èÔj5îvàfB³^` ‚ÄúÏ0 žóœ§È¼ þ)€¢ ¿öaƒ~äZ{(:Ð/}è“›4ù“(®\j0ˆº§º>¶mãyœáUìM$–eÁu ‚€ZcÃ:™JLk5Ëê%7×1Ôu&u—”âAÔÅÿ{ùË_Î|p>/ŸF£Ñh4F3cljã £)È÷}*•Je–ˆn D#Ó4±,‹L&ƒïûñX§T*µíÇ/%Äóé×h~{ï'ßWîx©ëíy^,lš¦9?^M!Íî©’¹ÔhX.àJT3©w#W¸F"Iä°x<•”å‡_Dw³Wâ\TÏ*ðœh›2íw̦B‰P곸Ês£TRB•ï'1}_ `"œ‹‘HÇÌâ¢MGº‹ÜtHâ…jµÏH˜¦É #“µÆç¡doµmCƒoC*;E&£\`Õ6/8ׂ•¸‡ïÏûÕöçIÚÚAN€Gs–VˆÙóR1y÷£n¾˲Èçó‹Þmt.±m{Ê„ SY§õ‹=pD¾§yÍk^ÃO<Ñíjj4F£ÑhÚàºî”!Q ß÷ã±ÛB¯\.S«Õmšä¶,‹Z­Ö¶ÏmFœd¯X,âû>Õj5óˆ!ÍR'Ýû—ä’-iá–jY†a¹\n~’í…(ñ¤®.QÅÓÑÉÒÖ2â‚êQ'T\¹FÁÃF‹pS[ÄŒŒ°}ûöº•ß·mÛÖíúƈy¬ÄsPíÄ/%âÙ\ɳÙzË4ËRB”Ü3ýýJjõL2h¶ò<|fn]»€›&Ùl6lNiS9I&Q©¨åæ*—•Y.«ëb꼟Â,åòëp¬ çÛpoŸ+¬ÉÃCøLìb"µ’VJ$ñäÄz©àwìÞiš]MB±Ø˜J°ì¥Œ°é8u>õÔSyæ™gv»ŠF£Ñh4šJ¥¶mS*•º>Q†!¹\€b±H.—öí9 iã8®ëR«Õ0 £ÉzëpB¾äóù:·Jqi- KÚ(ßF›ëú­ÝK}¿¥eañø°e\¹¹À$I¼ÐèfÚN–ì©âÚ•Þ¯U²…yò¨]*,bè‘‘‘øoóæÍußGFFº]×:$+a6›{–$¦Ú|¾D€J'6XH=e¦§˜Þ¾\.ÇpË‚}-žƒ¶­Ä6P.¹2q!e±P¶¬D|üÏÈMvtTYØ~†çD7ä à 6U2ü*TÏž½¡r%ˆ2&G1÷rÑq*¨ÏÝ~Î5ÓÌŽáÌFœìµx¶m·M`⤓º]=F£Ñh4MŠ ¨T*d2ŠÅbWÇ…B!®‡iš”J¥8aÂL-ÊÒqß q}, T*•Ø4“ÉÌûØ$ŸÏãû>™L†ÁÁÁî\ÜyFtª|^â®·qÇõ¼–Â…….šw7Þ2Í®© ªò9mŒi¦þí¾Ò¢gÀ–-[âD ‹yÕj5 $g )®§Â>Õ}^JQœ»b±þ>lEãýx£§DµW{ð!¸02{3m¸È†R±àžÊ+ëØã\Xç[S°€$[²çyô÷÷wû’6’E¶•µ”F1SQ­WMÜ[%Ðï-F£Ñh4šÞÃqªÕ*¾¯,˜<Ï‹½°æ ‰¡–ÍfñØS‰pêB$®|)d\388HµZŸ­š•úWÝ5ÚdQJãÒù¹ºÆŠô—‰‰ vîÜÉÈÈû÷ïg``€ž³Ú‘tÉÕj5Ž1V!“[º‚Xš…j÷ãNhu/NE¥¢Ê¸ÙƒÀ†Ï›JT« îߟðh¯Í—PÖ«_òàäè CR9˲xúé§èìçð£TàšÎ‘L¤Ï$ß÷{æž¶„Ó¿¯F£Ñh4Mo†!ab¥R ×uÝgÛ·ô}Ú> du'îßzžG±XlrcL‹nÅb‘L&C?CCC-ë(iÆAÿ3™ år9ŽßÖÈBŽ÷å|䜗T¨ž»Bx{ž×|Íðu`ù™Ð·mûðÇ7>°–úlÊâ "7­è³4Y©e?‹Ikk:ÍŒYÊnëÖ­\~ùå\qÅulÛ¶­[·rÕUWÅn#±µ$î‚aÍ_l¸#ü,-FÖÔø~½HjäaI–d¸n¾gÀ>yÿ­Š*©ƒ Üùö·côH<à˜Ÿý¬ç,MD6M†±…l¯1UXF£Ñh4͆!¥R‰Z­ÇEk'€Iò‚R©D†”ËeJ¥¾ïcƒƒƒq@}Ó4Éårq4I*X(°m;ÞV²‰¦HfÑN°m;>V+±Æ÷}²Ùì¢kH6Õ³Î:«ÛU™&¢¡ÊÐ a³7Q©TŠÝ›0úKg{!Î&qCmlþy’,ª,ŽfÕ󬨹s'M"Àš5k¨T*¼îu¯cll,NìÐMDÝ×åL&}4sOÚRx6s(í&8†H¹ºÖà…À¯/F‹^: o1Õýï',!¥uï­·rfÍH-¤£‘6ùw]w~b'Ì‹¥¤Ñh4Fs¤P©TÈf³X–…išmûk¶mãyŽãĆ "¦¥ã·‰õ$n—¾ïS*•0M3v •㚦ÙÔwήô …B]ìt˲ð}¿''¨ÛaA,!nO˜µLÎç·öäº.aÎJ,)’o ×òÙF ´=”ð&Í0ŸÚV3/¬å–:00Ðv£¾¾>6oÞÌž={zFˆ³,‹PEµÍüÓ˜¥¸SZy`Š[¹Hkâšn¢Ä9Xeªg@©¡^wç®_ßíË0'ìÝ»—7ÎRX”̲bi¨Òb«Eô4Í¥k!jYVÜáw€9}ai4F£Ñh–“-‚XH+‹SNœJ,·áááXpK[8µsõ´,«©*¡Kæ Ið ÿA€ã8=®¥SzÉeöpYG’ƒ¡Ñ“‡0l9H ÊŽ Jtæ"j Ô…Ô²ôáíhAó@ßéÖ<2XÑ醽#N³òwJSºX/Yºeg3›KmYÍ¿QãsÃlóoDâ’ O<¶’“Ï:¹Kg>wøÀøÞ½Øï}oGÛçrI<Ï(‘R,¼e³É2ÀBA‰sž—d»ëwq30zÁ˜¦Šù'"`¹¬¬TËåöÇ–mE¤µ,õÙu“öÒ‰G§XÃÉËÛ÷ëGæóA£Ñh4F“ î¨"¨ S ânZ,ã}zÍ C,ßÂ0ŒÅœÑÑÑÃ,µ;<ðÀݮœ†a³(Ú8X‰+IÃ0pgz I ”?õqà q9•Û@2¡66m—f—UÍœ±`óæÍ±{+FFF¦µš[(¤A{À‹=%Ì6†™fa˜My6ˆ5äÁÀÊ•+»}:sÂ]Ÿý¬J+Û€ç%"•ïCµªÄ6yççój¹d»6 õY„«ññd_ßOĬäö6Ø»w D/ÃHöM lí„°RIUÛ4•Èçº*þA¥Ò,zY–*S„B™ªVÕòB!Y>:ª¶ó}UŽa¨ò}Rá3šôòa¨öÉå”›z¡ ö·uÓl~ïÉ>F£Ñh4𥏅Î$v¯¸•.,ËŠãÓ-F,ËâÖ[oív5è˃e´Èž[*©AY 1×0 lÛŽ³øvDÚÝ”›Ù°, Îþu¢e²]%ì5¶ÈÂe‰<YÊÿ}çÎ\uÕUlÛ¶5kÖÄìÚµ‹íÛ·sÅWÔ-ï6 Âë«-Ûò’f©kâ›~ííÃÏC¸ëôßèvõæ„V&ãA „eȤ]§Û·m7Çöleu&k UV¥Ÿÿü*À¤X|?ÿ¹Zf‰¸†õjÖt¾¯ê–Ï'©X²U«­cŽ–JõeˆE¾‰¶­¶-Tyž§ÊwZ˜DKŠw˲ð<|¾J&“ÔK„DÓL¬óD” Â0„þ~%"Jý2™äZ›fóù”JªþZ¼Óh4F£YAg0^ÂAÅÅro± qK…¸0ŠqÞ” ¶Xl;˜+¸t¼Á)1iÉ šÅ9!K"ÄùÔ[¿´¶®Ó¡®ç•$d¸êª«xÅ+^ÁæÍ›سgccc\qÅlÛ¶­Ûu”²üÐÚµ@ñà„#A˜®s]·•Hr¢ Ýû¬nWí°ùåÄç5<`Ã0£ ± ;\Ä -›…~ô\ ä?Pq¦©/¢D°VËÅbMD·B!²Ù·]hV¡Û*•zA1-¸‰E”[,6»½Z–Å×¾vïz×?G/¬ä&ûËäe6«®ï×'t1 U·\.9§j59VÖŸS©¤DH)+ŸW®«¾Ë5ÒVvF£Ñh4½ƒÄN‘c)³X¬÷–:7;@¾E¢†6í/‚fÑn:ZY«¤2!6Ш›,¾0‚KŽ8F\__;vì`dd$vSµm›Í›7÷D‚! CÖ¼ö-ìɵÃ.nQ2Û¤ ‹“f«?Ó„Ó²pÿªÅ/ÄMuTS@Ò ¨w±ž«¸fâÖ™ÏÃ{Þ³Ÿ;îÈÇe‹˜‰%˜X¬ÉòR)q[+ËSßOŽ#ç:]®ïÄbïŸþéí¬^ý?üõ_¿3ÎPeˆå[é„0„ÁAU–Xñ9N’ôBú/ýý‰ð&×¢ZU×C®[©¤®I¡W–Æì\²5F£Ñh4sƒëºT*jµšÎd¿X 1â|àQl5ß÷;Š)8+è´…›P¡>œT(]…MД¬a`` e,8I…Ü |÷û§ðÜ¥¬DMÃRdi«U0lxãM«º]½ÃÂŒ ¨â*%ÞÌ·%ù_üEÛ·wž­)-ÔÍ%­b¾M•PBúM"Ö=òÈÑœxâ8Ë—¿‡3ÎHLô:Ékêøi+@Ç©·¦%Þ‰eÔ‹jrM‚ ± Uf¹œ”)(®v„Nh4F£ÑtIêÕk 4ÍX–Åõ×_ßíj6!pi6±Äœ.ƒ­çyqR$@eNM3X¨At£{ªTÁ‰öÓb\OÐqÖÔ^Á÷}ú®;žÓíšhæ‹VB£œ~z·«wX¸ÀÉ?ùIü°-.ሲ†^Lvû2ÌÓLÜJK%¸ûîçpì±?aýúúV”4jZÒïCËJ2µ¦i÷{d³IR ÓTûNN6—]*)O’X¤9F£Ñh4ÍüŽãàû>5=º(XJnÃg¿âJÿ«g´ÚfK Ã0œ¡2æ“dCMSFYÅù´W@ q=ÂònW`6üì˜c´[óˆïéo¸³ÛÕ8¼sž¾þúØ<>”¥×z÷Ì òÞ²,Ø·¯Ûn»mf3Gm„U,Öo+}» Hg¦U‚ç%"Ÿ{ÅÎ 5F£Ñh43Äu]2™ ®ë24]ìfŽ €Ûv?…mÛõîй\Ëí³Ùì´VsM(®•‚E½{jcR†Íñâ4]aÑ q¿8tˆGs‡_ŽfñaYðèè‰Ý®Æa³÷†âÏI4S“ÍÂùç s"ÄI¬¸Ù`šJ†$iCº\Ë‚ññÄ]tì¸ÅD:f®$éðý$ó¼ï«¸„âžìºjyz]¥’”%ë Ãúci4F£™¾ïãDÀlÛž“¾¢fᘘ˜èv›ø·??¶yEC[,DÈ'i%¶µòÞÉ ,áÒÿ­¶Óc‘ž`ÀØØ{öìév]¦Åu]Nü­ßbÿ÷»]M·8ÿü¯£â'>`Ebí¦8;l*Ÿl¶»æei!®ÑÒͲæ.á†faLņ¡&-ÃPY«†¡ÕÄk@2ôJL@™Ä4M%ʆ²€,“u¾ŸdnÞƒë · jQãL&©iªãf/€ü_$Ëaúxˆu´Ke/DÙ½ÚÒ X£Ñh4šÃó<2™ Ùl–jµº¤\,ËâàÁƒÝ®Æœðä“OÖ[Ã9NÝÀ w¿P(ÌÞr³„²ŒËSŸ-5=Ñ}¸žeÀÎ;¹æškº]—iq‡ã>ÿ-Öh!îˆåä“ïév‹ç8$2G¤5œ¼|”Ðh‘æ L¦Cš3ý ®Y¹œíú,§i¶ÿýDÐÑôa¨,ÖÄ 9”µšç%‰:|_}®ÑJ%µŸ¸§›e¡D®¬(¹I¡ \–ãß?€¬Ù!•·TR^¨~YÙ†ÚÙI™Òóýè± ü·‚qµ:ÇI„ß8ÜHHsªé49`|ŠõÓ qÁ€5F£é1<Ï£\.“Ïçµ·1 ƒóÏ?¿ÛÕ8l~îÁÊô -¯£s•"³áĘÃFõCAõEpóQÉËè>\³`Û¶mlÛ¶­Ûu™–§žÊ¡ŸËojUW³ p¡eÅ5—|Ì0—$ƒ†£ "¢›M"&8Ñ~ùè³IÓLNqª 'HÛõÁä…ÕNuQN‡£CêM¸£zJ†XĘnÆ)ˆÎ%„ o½p!®þ몾¸•Š8†J Ëf·äJE‰c"²¥­ÍªI,ŽV¤‚åf³ªìºþ€jG‘»²XÖÅž6XšÚŽe%BŸuTR¾$~ó¼D”3~ ¸i +ÌÃuAH·sF£Ñhz Ïóp‡ÑÑQ-Â-bN>ùänWá°ypÔ±?V«Ž£:o©v)ÆžçÕ[ÎÍ+ú+µYWH}Öô$ËvíÚÅ®]»º]—iùÉþýã~9M7ðSࡇÖâºMÏäÞÃA íðHD-·Å¶!ê%àF'/™‰6ÄÚÍþ2Ô‹h"\EG”H‚Ê¿%š3ÿ”Pb$fÚ~´,ŒŽ% ¦–Ë‹LΩÕOÒ|/‹®‰\—tJQY.­ ¢ó‹Ê?iâ¤yùÉŽTJ%ÕÏqœ$^ŸX¼…¡´ªUuωÛp6eiÙ˜„£“zsÿFÊÄ•a¨¸€uxÄm´\V¼¸ŸÖcª! V¡¾¯ÜgM o÷apþR­«T Àºã¤:j…ªKÝ|¦öø—û@£Ñh4šDÜûŽ$Î=ü"¦ìÞhfÇñÀã?žlž—¸UDø¾O†”J¥:7ÕaPotÐØO“dÁ:\ϲ`÷îÝìÞ½;^¸}ûv¶nÝÚíº5ñÌMoâ'Î cãh4=į3G}I,,4&fçon @ Úð‰ésäm.â”O"0RûÈ2õ’eÅÔçjô½•ïÄ'ý©ã¹QY"†I†H\_e¹l“޲Hª»&>õ{5«8âìÔùSëE « „9¹†²>üîYä.Öó‰$4h×/I'<¡Í0”…™Äy³,åZ«)ñ«Ñ€²Z¡>]š÷LG¥`Y™ø¼.úWÚvãýå·YN}\ºâÀz#øcJˆ”xt¯Îû:`@©þ•Të½/‘â"ާ;Ók Ñh4Mp]Û¶›2OzL=Ÿ õÝÄn½æœYîWbê.½‰dî­¬Eu[d.Z¾Kw½”ús£ýJ©r2Ñ~éí¤‹ É`Y´¬?ú[Kóœz%*?m¿Øi9g)ASˆøV«Õ?ÔN*LJõ›ŽUÝó¬èvfÂ2¾9>uÌi¦W‘‡ó¡C§u-6\H˜¸ƒvRa™À‘^Bõö­’Äq« ü"ªÉŸÚ_,ÜrÑ2yû‘ÌØdI„8ä³ ŒRt^ʲ¢Ïõ=.ŸD0+Eû‹ `)*7]GRõ U/©G1ÚVʰRÛVSÛ‰ðf¢z9C©í£roúÇ›æé^ÿu0þ€æxr-F2+ÇVrÚ/N›£‹¬Ñh4ÍÌÃ×uñ}¿É½OæMÓHÔéZÑ2qÎn©¬Kw•+Ѿsmo—¡¾»ØiùÒÕaKº‚GâØÉœµK}¸0+ZîEu°Î[OŠQÝ Ñg õ,õÈEëצΡ•=DÒ=®D¿K™$Á§OÒMš8iq{ŒÀi!ÜvÿÀÛÏçç0Ö‡ô!¡ÙBNÆ,Ú*®'Y4B\Lœu–w5‹–XvÓM<òÈ™]«CH˜<ŒÅòÅBMUÉ›2Ë,þ.Ÿ}’)5)G2öï-@½ÍÓ=±F³ŽŸÞ·ñ&Jm+ûÊqÓ½±LK‹h›|T®ô>ŒèF[\¤©âöÔ tH}¤wçÑ›Áþ±³ßk)’Ë)·ÐBAY«‰p&n¥ù|’4A,Ý CmëyêßJ$ ¥Ý»[L8Î iËÐ4éØ€Òžå>I×ÃLmŸ.ë,à'ÀïGû¨û°±o–'¹Ÿ|âûjH,5£YPs%¸†âj0ª``Þ Ö‡€_‚ýf°ŸX``—”˜Ù_;£®½;…F|ð_¿ ±f•éî¢:—ƒ¿y{Ÿ{ï<\pF£Ñh¦Çu]\WÙ„Iàû‰&!sY’¹W±î'™c% ÉÜšKû^D&¤Dà“ùßÍš¼¶­Ôw‰„2D•Eœ@Äú¬•<“~ýK÷Û éRç¢: “aâtÒØm¥Ç˜©²½«Ô‡ˆMw»ÛÙ¤ç£ÓÑõˤ֧óµyÀ­‹\ˆؘ…G[¢‹dÏf>›DÝL#á}´'aϲ¨„¸Ç~z>á$ºAi-cßy·íîã_ÿuž âO•¤ç!.‘@9ý”–i´Q’i¸ô쉼ùÅu-=må¼QE“}ó´ŸyI/o'xµS¬iÊš.Bc™Rïù@^z6:KQ $yT­Vß77ÓjC]ÓL\KÅëD¶I»œš¦óš&E°í{£LâZí“$nH÷L¥',îÕ2Õ[#éYK¯>€Ò`ïþo´½ôl‡Hb+ŠÛh‰¤—Ÿ‹¶—¤&’ Äkì;€?Iêd”ÀúAªž)—V±B •(šÍ‚õ)µ­}xcÀNàOUYÁ߀ùâgÃi??Ý/NBZh4F³8ŽSçÚ'ÝÖÑèsÚ2+Ì77vj¨Wn•dÎ9ˆ–gQchb%'Vmt—Å’L*Ä’,D‰eÙè8²_6*G^íA¥Hâx"Ý{éš‹x&V|N´ýlŸÚu¿«3,§QÌk\g“XÃ-5:uoîïïgtt´Ã­[ Ô×îG_Šy  q###lß¾=þ¼gÏžø»Ðí̪+qˆð]­‚F3k`ì¦~8Œçn’·¸ È%nZ•ú ý$i$›©ØåCò¶—PYŸv¹”ž„}nts=Ò…rNZd{=‘n’DÀ²”Ëdãaµšd år›¸rÒ[m‡ˆÊév,~rŸd£ñ µ®L2Õ.÷„Üo"b˹HO]Ê Áä? ö©O’ ½`Id’¾¥ì´?‡Œl([©å"Ö G˦i‡ž§®¿ý«ä0ÖUêsÿ5!ó>£o£?ÿ5<<ûßK£Ñh4šÙâû>†a`šflp.ÝR˜:]UeÈyðº"¬³ ’•&œdAÁƒ·•³¾ç¼ LÌ´ —¦Úæ³tÂdz¿±7/A\D(oW~µZûä1Æz³ž¥’rc}Á NᕯœƒF¢Ñh4Í q]—l6ÇBÞt®G‰®˜eQÌL-oG™ú¨*Smß*òI§ó¹RÏÅ"nµFÕá"îmeÔ;G2—¶ü!’ eüì`× Ô[\QeŸtœ3¨y5½»¶Ó¤¨Tk¸Z ŠùP)9iÿS×U™¤Îd” –Í&dY¨m$8\Â9ÈEÛ‰ø–Í&äNµ• —…îLþ݃Û"!îǸ߅?á•¶ö #¹—\"? KÝ/g„ðÑ’Šñöáš:Ž…Ë¬$£Ä×=xN¯·`"¸Cá=þÏÏ_£b»™©kP޶›®'Ýî>“û3Kg Y¤¸)î[¹ìR5ÓTÛÿìg«ùß8aAÚ‘F£Ñh4‚$hÈçóqœ7˜{®³±8[q,l±b×w\·«qØüñ‹?ÆQ»žh»>Ç¡\îЉX<#Ä’@‡itB†EGÛq»víjZ¶yóæ®Uô–uëXñ͸¨kUÐh‹Û?žc~ú¿¯±=|¸ÓÓP=qg“ñçø&3á!x>/:Ó€£=ø¦ßvEb„ôÄ5µS îƒ@‰ âwØè{(ªŒç%£ù0lêU(¨åa˜/¶­>C¢¤‘íç%Z¿¦¢{µJþ$ÚZÜ ‚ 1§J qÒ${ƒ,“]7ñ;ª7›«Õ”ÏÇó<n Qކ†Ô¾áB©¨,Fï¬A9€?uáf”ð|´ wçá]ø²­&"Üó|Xµ×Z*5î'MxVï·Õ_ú¤M.ªEiŤ݊y`>OxË~ÿÌýù“vªõrMr¹$Xž\D ¬—Ï·oã’Õ˜ۡˆiªjˆX—Ï«¿w¿{œÿXOŒi4fá‚€\.dž¾AÓÄD;",–‚Ï›\¼{7wüï©Ç{öL8ˆA…„L‘i=±·k×.®ºê*®¼òJ6oÞÌÖ­[Ù¼y3»víbÍš5ضÝU!nbüDÝp˜ÖDM9é—¿äÁŸ3ý†Ž£F¾"RIªHP‚YèïW6ø†ð›œkÁ!xT‹jÔ| €”`YÎÉÃn¾mÂ}ôEeÆS‰%ð‹êø^õ²ê£âƒT¤Ži̲’T—i¶ŽO<±Û×K³„™ÏvìÇýìÑú…A qé4M5Ø‹ bQ‰u–_©?P®¢ÿjÀ•(WJ¹„Ên鬋é[=ÿóy5òѬ•X6l»¹œVå¶ri$â¿ÄkIJ1qh(±´J¿èLS ÃÃj›¡!U®¸3ÊïR.«ëß߯¬ÿ@ ƒƒJ ‘e=ÈTíx.žÅ—<›MŒµ¾{ë^>ðÖû({.•w¿›ì¡C8¨¦š·m£pÕU,CŸMk]?ˆ$‰§ê¬èHÒ˜¥—åQm¿V…«"k¹Æí¤ x…ÿSž›{ uц^øþÖëÒB|ú>l CCêù³Ùd¹”!‚u¡X¯Ê¿þÌü £Ù¨t¾Yˆ>…F3ßèv¬Y*,d¿¢.ð†§Ÿæ®e˹ä>pÒIüÉ'ÆùÄ4K 881â¢?z ÿÀ”®§AÌ D’ !iŠÞ†hfÀ €‘‘‘&‹·´¼fÍi©ŒŒ°fͶmÛÀ¦M›xÅ+^¯Û³g;vìÅy&¾Ñ!ðÔ]a¿õøn_/Íf¾Ûñ¯ýÿõô>p'ÔàÙuëSHv2د¢„qŸK?˜EdõОÍÀÙ²êãwõr}jµÃ+Ge¡QlLÏç•W.«ßK„ÂB!q Ë.±¼×Áyˆ×Õ íÚñ\´á4’̶“Ó|ŸŸ=n-/yçͼ¥úuœB!’ÄŸýD‰=7mb<ú.É}sÑ¿MW,ÃÔ&ø~ô—/ªBj¨^»›ú%.“’¸”S„Ó—À ÷æ kÍ”>w&6w7qžê÷h7{*?„ã$íÑq”˜ìºê³¸oƒjÏùÞM¥5ßÏbf!ÐíX³TX¨~E;à·††8÷¶Ûàœsº}94‡‰ãÔçÜ‚¨Ë \ôå/Ã_þå¼wAÚñî§øÁý_¶7­ Ãß÷çæþÐ1â–ËŽ?¾Yà’Æ(´³†X¿~=W^yeü}ÿþýñç]»vÕ‰|wn&øÀêûž˜Ñ>ÍL™ÏvìüG?+ŸóœúXO³q“™‘FÝÎN-K[ÁiqW÷UÛVbÇèh"z”JI’‰\NYÎuɺ¡];ž‹gqÇQ§š6vüª÷þÇ6‘/§¾™¡šoÕ<‡Q"]%ÈPº[Ý•›NŸ.E;ø(ÑM&¨@!O"Æ 6­S ÍsÎgÓç`Ý:xÍkêWLe:†Qÿ<Éç“6+BX€JœÄµkÕØcÌwŸB£Yt;Ö,ª_ÑŠjî9¨TxËk_ÛíKÑ3„açÛ•JÉöî7ßȼ ç©Ïkת®óÊ±Çæí˜óÝŽàÐ#Gcµ1¬‚†^ êb¥>Ð"Üb9(óÌ¡¡ö&###-­æ„¾¾¾xÝØØÅb‘+®¸P|ýúõuÛNǽ÷ÞËÖ­[`ïÞ½0T_ü­[9xàyÎqŒ/[ÖRË2[|Ï£ÜVkÑç"*.mÕô3åæp†I%¢‹¨P1u1œl¥kuÙ—ÅÝý8öX8õÔ¹-¸•›|ikÏJEY̆êýFm´vîÜÉÕW_Íí·ß>/×g®ŸÅ·ß~;W_}5Û·oŸv[Í‘ÅÖ­[¹þúë¹çž{æ¼ìùêïܹ³Û—MÓCt³<›v õýãép§Ÿ~š£&&fS«‡ ‚ÖñuÉ'&ŸK%õ—É('\.™Có}µ\„-™ ^»Vm+!AÈKd1”—2$šü+x^rüBA•™É$ÃPëÒóϲ].—|N×%ŸWb@u•‡‡ásŸÛÉÑGï}ïüy»îóÑ?N÷+ओ÷·ÝDz¬ºð“Þ,t¬¸fûöí3ê¯Ø²eKì'ýž÷¼§®zžÇUW]ÅW\1e y ~¸sçN®¼òÊÃ2»<í´Óê,òþùá‡xÇ8†qJ·¯oo㡬@,”`#f'S¢ž±ËR}š­U (—RTvÚË1$q3“ãÚ©u>jôMÕeH-0رcGÇ/îÙ2—íxãÆõ–¥§|Ëz»²JqÝé(QA®•Kò0.ÑZ`X„ø¾ßvö¨'̪SÊB®UÏÊuU¯ÃqCv¬YÃÖÓN›×*·jdz!N?ýt^õªWÅfüÔj°xó–ãùÇôíÃbæÙ­ïëZȼ œ•`yP¹86õH Q÷C–ÄBT’1@½É]"žÎ Fú9&Aß$‹±iªy©¤Ú0$=qi÷ íË–-ôõõ±{÷îy«ò\>‹Ï>ûl _Ó’;v°}ûvN?ýôy)>ûÆ (cŠnõg;¹ÑÔ?nƒtuÍ}ì°’-2ëûIô™|> Ó*߃@ TQ"õØA¦XlÎT.Éæƒ@mcÛê³^ó˺}m燀zAj*$þ¨›PöÑÍA=†PŽˆZ9ÚÇ<Á«ý[AÝä"Œ¥ ,¼¨®²>}¶I‚ÿKü2ŸD@2Q]õ0£ ³¡ìB´Þ‹¶•cK<´6îÙ8¯?Ç|µãpr°atŸIDMH„R¡ÑÅ®ÇS¶ïaÆAOóÑy{ž§ïv‡r¹Œçyø¾oã8†a:_†a†áÂψN7p’`i z&é°|/“ô“®ËéããóZåVíx.žÅM§…ÊSO1²âlN)Àÿ-¯›“ú?„¡ ® Pø>˜}0ˆz¬XÇ‚q ˜÷A`EÈCðUðŸ~_=Ž$œ‹ºm$û»üªÒ•G–E²_z.#­‹Ó°fæÍjYŸÍév-n¬éìÂ2Ýn‰@·€ÌgŸB£Y(t;Ö,¢_‘Fæžk@Á÷ggI4¦iRz=žç‘Íf ‚€J¥»Ë;Žƒišqy²¯eYM¿C†ŸGs#ê…aýtcé}AÒ+±mî™Ç™ëvíx.žÅiŠÅè‘òƒpÁ9GÓwÁ\1› ë ê!™N#7SãúÄ¡í(‘m//ÿÏ!¼üïq<äú <=¾žìKa<²Òó¢mË#Pæä¶Ë“Ð>c꜉p!:øaEゾ¾>¶lÙ2£BD¼k|Hú¾O__—_~9—]v›7oÆó<¾øÅ/Ψü†Ïà´'ŸÕíkÕ9"˜5†Ý«Ä1ʧ¶•qZp“AZ&µÎ$ÇÊQyFT–”WEýTY5’ØbyÔÛM‚ý»$®¤²LF’wIêaDÛ:$y’ųñ"c5q K?0ª$>eàMªŒô8ÏL]Ça¸~ëõ\Ê¥óò³Íg;~àŒI^·â¿ÀüÃÎ+4Jò[Ëõ›á‹Ú÷ýX(bë2ß÷ Ã×uÉf³† k­Ê°,‹R© k¶mÇžeYX–E†A aå[õl6['ú•J% È_XÕÔ4dú%–ÏçÉd2 áy^,Ô‰iš”J%ŠÅ"®ëÖ•Y©Tš„>P"œX⥩T*3Ÿum̲Ú(.°ßTíøpŸÅBùHÈÚ³‡pÝFìÙzÈ$@Zˆ“åéŸÇ³L‚½ x d? Ü^9IðPNŽÄ5Ý} H¬çdûJà“å9ca1N–Ç­ûg7qçäÜò«`ðÚØ¹'‡á鉾¯½˜&|úÓóvÈùîSh4 nÇš¥ÂBô+Ò„ÑŸ†ä js8$qÌ'ßÊåz÷JÛN梠ÙBM‹h‹ƒùnÇßõá±/?>0‡•n•÷­÷½´52¹@ìÙ³gòÆoœ|øá‡§Ýö]ïzWÝ÷ã¶LŽŽ.TMƒjê_&''G'''‹“““åh¹199™–Évfôy(Zg6lcNNNŽONNZÑò4Ù¨ÌFjѱӔSõ˜ CÑ9u™Æ¶±Ð̶Ÿ†“wlØ0óš©ÏÅÉÉI»ý¦¶mOŽONNNNŽO‹ÅÉr¹<9<<öØ$ÑqÌh;yô×¢2»N¹¬þÒäó“A©Ô²m,3iÇW_}õä7ÞØµºjz›n¶Ãék4iK¿¢“ºÖ&£×{±÷ËfËøøäd6;99<<99::9iÛê{­6994¤>ÏcwO3CK;nÕ?>óÃN>ý[n_­V'‡‡‡§-·‰¡É¤c¨Y´;µbÅáKyÑ×××q6F]²s~c›wF«äãM¬ËÄ%´H½{©lS&q7?%H\Ló(‹6±3 Ip¦FçJùá†V°i¶ð;™m;^á‡ÜvÒYœ9“Ä YÜŽ%È•¬.•0M“l6K.—#ŸÏ“ËåbwKÓ4Éçó†Q7û“¶<Ä%4ˆ"«úQ<±43M³c·‰ý63q…•òòù|ËÔßfàb¥—F¬ßlj-ú$朸©Ê6r=Ä‚o*u±,Ìw÷¯8œg±ð±Ü]ï„§©._>û‚‚Ô¿ik`õ¼K] ò&feb,^¿$ç¹´*³QñNŸõƼb¨\ˆþ•e ð¢Ù{ücdï»`Æ8*@!uª’9ÖŒ–‰²¸Ò.˜e6«LÒ‹<ûå/¯÷«Y`æ¢k4ÝF·cÍáêñÜm‹¬¹nÇò>¯x^¦¤ã}ý$š‚$ .œiÖ'6€éciŽ·¼ä žûñðûMë$´Î´H¼ÙTBJi–$ &ÄÇþëãð®nׂÖI<’â6*n— n(<ÊJÆëi·+#µO•fÁ/½>ÍBÞœúA0+`üóÇrï)¯žÙN>JŒ·àšŠoð ó ®ëÆn¢¹\ŽjµZ'€™¦9#¡KĺFñª³™vZ'×u ðI ‡0 ãXw®ëÖ ”rmý(6‰mÛ¸®‹ã8 á8aR,1 ƒ p]Û¶gtÝ+ûlàa®„Oœ}AªË³Å&‰e ‰àf‘$/ÉDŸ:Ó‰®7Wˆ÷ýá<úª ÿÚÀøsŸÅW¿r2_xÝØ6ê5!n­v³†z½xÑzI”å ¼þ%?μbšÍ)ÑL“ÇŸýlV>6¿A•5f>˜I¸Þ™†ê¯UÙQ®&LS‰E2§(ñÅÒÛÉ܇$¤µZëXc• <ãvû²Î°îÚk±,«£>U©”dè ‚$bˆìºHæI5‹œç}/ÏnëPZ§|’ÉiÍ’ç0L†xôªù Ð?-iãá`-IÒ*‰é‚¬7P7‘X§…tîÓòÚ Y(Å‚©— §ˆJ.qÅ–2°nß>ž8çœÎw’ö<öšÇp=—ÀXóŠ5qŒ¶r¹ÿ‰'bÚ‘ M‡ã8S¾øÄbв¬–V‚žç‘Ïçãu+O,êòù<…BÇq(‹u…¥’ Êè6æ†_"¼ñ[“ß¹“òáˆpd5HLÏR–näiŽÙ¦C]æð [‘¥þq,Ù}å·v×u[ZjBó3. CNxäöÝ~|ÝvrÚV´­œv1:uI!!>]’×ôÛ’G7näY÷Þ»ÀGÕh4E¡V¥’Ê+“Ë©Ï"ˆ© õÝ÷ÕçLÖ®Uÿö÷«å®«–Ék;“IþJ%Uv:¾˜$–öR$‚šd̮Քu–ˆD®›n""Žªìšãã‰çûI]LS&W«°gÏÆnÿ$s‚¼IxÝuSz„aýµÍçÕßð°ºZ|Ó,$pß·ŸÁ «žß¼.âqû´¤õêô-Í¡„†E Äݰw/+×>Ô¼ÂuÕ[níÚúå•JGåN‰G2ª)À}_2 `â»üÓëÿ‰‰‹'øÎSßIÌ&ò¨‘R+;= œ†ô +ÁöÅz§q½ã8qû\Ô+Èår±%ã8ñr9ÆÂ0Œ2¸”rK¥®ëâº.•èšA@FÞz¨4ÌRV&“©ˆ:Ž×wpp0úÉ’'‰l+Aóeۥʡ‰£8ØØV[!QàS 0ö½nžçQ*•‚€r¹\—P ­Öz¡¡¡i¯MZ<›Žr¹[ÏIRŠjµJ>ŸÇ‰FaÆîÁâ²*÷H©TšR”^L„?ú¼æ5³/@²E‹k¿X¾zþ¶Ò‘å§ÌÒÒMÞ¢­F7uURBZãï#ÏÄ0 éïï­$%ÙH†d³ÙØbRžk²­Ü·¥R©î™wÜø8 Á-þ0ŽãP©Tâõ¥R‰jP …®ëÆÇ2€5>È ï¼“ ðêGÅEE3$I’½Üé¥ÜZ/ÄŒÐô4b$ŠI»ÁH&¦@A ¾‹‚ÒøŒö¼$Òz¥’¬÷ýÖ)‰ÅOMÌ\7Ù·ÓÁ‘¦'‚äç6Íú®XoÉ T³år™0 ë2].VB ?‘÷¿¿ËNÕMØ{Ã^þüŽmÛ±ÅÕ\¥…×(¦šmmµÎ4Í–ñJÒÂhÚE¸V«Å÷ÖRùí¼¯?Âõ_;ƒê×^AëMLð呠ĸYšå‹Å¢üy×ĺ±Z­†alA)“£££ñE¹\ŽŸcò[7fqK?3e[Ó4ë~÷L&CµZåϸÜ{ǽ¼÷½ïÛ®ë Õj5ÇÁ÷ýøyk†â ƒ‹O:‰wžtîÓOó«üãâ‹9o÷n6Ÿ}6_Ú·wãÆ8Á5$s5T_®T*5e ž ûú886vØåhÚ ¾s"J5HebË4U¿Ç0Ôg1 ÂP‰Yù|b‰9Mº‹©M6[/n*wx¸u] µ]ãúL&©—çÕ›üT«õç$æ-†‘(ÅbË>Ul$"Z¡P¿®Ph>¯Æ8‡R®iÂÈH·í# ßOšj#"®Ø¶j*"¦¥µUùiÓBUº‹/\6¤»é$ç­öm·\ö‘²»ßÓûvâ̲Úß^­¶m¥Oþ×WuVH"oÂ;¾úÕÖ}ÿ0¤ò‘Ç(^ÝG#ƒùzbZÓE–=ù$çž»ºi¹xàt„tÏ$Pp«pUš%CÏ qÿuË:VÿdÒõ&t]Õ‘Kn³Ù¤™É$o_ÇIbܤ×üko±à7²ðK l8ðóì½u/¿éböïø/ú¼>Øgñ§_úSÞÕw'oømþðÏÿˆëѬê¡oäþÓNcÅŠ51ÁÎ|€ â8R¦ãðá•+!“á–7¿€w¾ñððÃqP|¡T*ñ¦‹/&õXlÓL¦Ë: 6¢R×Äu±¢rVláSÖv²6U§éMNµ>-V”Ëå:RöËf³u/_ß÷Éf³±¹ñzˆ‡~}BÇÛøÁÝý(·ëvŒ‹”h™Íf; ò©™w¦›¥mü$~Ü\ˆ]' ás×òøsÞ6‹}£?ß$¹Mc_[‚¥uHú9ê¹å8†aÍfqÓ4)‹ql?Èä™bFüû¤ãüç8AËDZÎú] ¹Íäóù:ë¼ô³Oä…/_Nñâ‹q= W­býÝwóæuëß»—/_Né”Sâ‰TÉù24>Οžw^,êifH:°“¼çr¢ÈuUŸ&m©%ïüZ-ÝW*I„vÏS#}©Wã³IVe2õÊE«Q¿¨Ž“œWº_ÕH©¤úV­D8¨W D8Ëç[÷a$aÚÌ©†1³D!¢ìˆÇD“Œ,|?šƒÎÖÇ[“&ÑH¥’ˆUÏ+=W.óåi—R™S‹³™2¯\ýØœ_$BEÅqš&»¼ŸÒ߯Çzì¿aßuõb»ëªFV,jŸTMW€§W´–U¦7Ô‘Ç⢠4jíªY|ô¼wਣXÔ/ëí»Å~¼yCZ–²o…iÂk‡áÆ ÷½áåœ0üB~5úúþùF^óö¿y›îÞDP 0¿Zaô0<—Ïì5ØwÑ$ðƸÈBð™;ïäþ·¾•ƒï}/ûÂg._Ž} † ú\j5V9 ²A:Ùù6Æq@Ö0tB®Nw”ã¨?1çI¿}¿Þ÷­ÏK,ÒDq€zQH¬ÂÂP [íÚ@±8µßZ¸:ñ €ö"W«}ÃPõ':`´3ãIs¸¢¦$9jWŒàæ„0T]NËR]Ë\NuI¥¹.[¦~J×U—¼\Vû8Žú.º±ˆqië¸ô-¥®# Ÿ(7Sä9OPù>™W-ƒGQÍ]‡uÍ6àòúóyÕ3õï|L\W*ªaëIqM à×£'¶4š(•JeÿM{çu8Í’¤ç…¸ññqNzè¡$@Ãl‘tsE¸ï?Ÿà”e~¶Ñã׿µ‡ßÜuã¯{œóþß+ylàQ0" „ô©ZÝ*Ö5ô²Ù,ËóyúÚÙÚ éÎsµ:uÚ&ÓdÕ·¾•ô\$w;«°uÓ-¨ë*ËÌx±˜ Zcªm$­ï'n‰Ï’î„{dZTÜoäÚÈ¿¾GÝ-‡!c=ÆüãÙ·.ñxG¯ÿ ³@ºÞ¾þ Æ^©Ý¾z‘™ˆpˆ´uëÖnŸÆ¬ðoZƽ€7¿xáE}ÔC%ýøÎ£¸~’˜ˆmHÇØËçó Å]bi(î¤âFÚH¯¹·¿ò;ß9l W9£ô,k|æŸø•J…Ð0p¢óŸŠ†A^fqÒoñˆ·hïÑ. ñ’&²²ÏyÁ¶O`É;βԠ,ýN:HÞiéèê"àåÄ54íJ*ƒÉX¾wÌ| -K½£ç@1‘ÇÄ´ù´))…gp»ûozÄÏG{‰p>ŸtMå¶€$VW&£æË¥Ù‹õF•貚# —(4–ãÔ½³+—üóôÍT/þÖÔfŽ2éP(¨Ï-’pÍ ñ‘6ŒÄpÁ²æÇär.H‡Тá±og¶ìÛu$ÂzéŸìˆ¢÷…¸sŽãwÞ|*¼¥vxSîþÜÝüÈüïßÈ'^ý ÊV9‰ WúL~„㤣׊6ƒ¹Y»LN÷€LÏLÏee&fÏël(=83ÍfÁ°ñÚÙ¶šñOçiOG¿•Ÿ dÂ>×å¢'Ÿœ»ë°@¬³ ÿå·u´íÄæ þêE¹\æEoxQ·«®ÑÔqÇË_αOÀ_ÌäqTB%W¤ XÁ‰j‹G{¡P ›ÍÆñ$Åu?*• år9Ž¡&ÿöšàÖŽùt3/•Jq²ÍÂ0d̓blڤ;>‚ ô<†òù8 $‚\ %ìy¹µZ Û¶—L²‘Žð¼8`•ÞV*%2“ÁWc[“¸´¶­Þe鎷eu>Y6mÃ#É‹’NQ¶Ñïqu[Êçô¶RCã o äÞýnõ=U–m'=ƒ©¿I« €5a_R–¬3Îp£*×h¨— „—‚ùìúœG Ú± X>„_„Ì'’u>ðŒ7¾÷®»æíº/VDc­T’n$=HwMÓÝSÃPÛMNÖ—•vªÐhZáÃ@Æóbá¢T‚ð5ÛÔcר4}!âJ_($ñ.mûðÆ2@ç´õ³ If‚@m›žh™­ou§¤³—HLдŸ·Ô5Ÿo~o‰‘‡l?]=Óç(ÞÐQbô5f!‘ò;y·'-BÏ'Ö­ã¾5Oû¯-ÄQô¼·â´CØOíãù³+@F!l¸s¬ °:²"pÔò¸—S›‰†õOG˜fó¬’ø=äóõî°ù<,BK"X±ÿ´Ûår9>çc‹Ñì®§Ñôû‹ì$p H¬SJ$£óè9ÃXTâLÏéX‘’•Tht=]Œˆ‹ò|œG6›­wëA¹¯þ³iÖýn"j‚zÜH[—$|_eÅY©TâX§Gâ‚:4¦©òç\<Í>éßr†‚°ˆM’0XÂ(†¨ßED¥,SwOBT,@Ù7ˆ¶QË& Õ(ÇiD.‹ÄMÊd™³v-ƒå26‰¦n§ê\Bµ!ÿªoóóSœÀ ê"„å¤.uÜÒúHÛÍ ü9*¸µ¡*á›P < ‚ç‚âw ÿ]u,ÿðéŸúÐIVó%L:1B¹œäÕ‚ä»¶bÓÌòL £˜Öò®™õ+R¼˜Ä;GÞ]AƒƒõÆ"¬‰ÕxÐF³ÏV7Ä.—T¹"T‰û~­Á˜¤¿_ý+f¥isQýÒd2êßtRŠ´—RZ(l¯ÓóTÃÃõõ—]ÊÓUQÎ*$Nªˆ}†{.aš‰I¬ÔU Éù‰0(&½³|Ñ qpüÀcY¯n¸ô*éWG1‡oäÍ,éy!î×÷> ¸{öøÊ²â½#ïåŒ?<ƒc9ÌÈrÂ%éÔéÎEï /Ä%âuÛÛàY7?ívÅb‘5áš9¯ÑÌ“6l˜Éb# ÅÕ&>kF,LÙ¶eYu \–"óéÞÙ. p««™7þìgœµf o{ê)^ºr%ß½ê*VŸz*·>DÞ÷ñ€¿½ùf.Y½š7uV—¯àÓž—P"IF:1çJŽ;bäà8‰©)$‚X#ba=UH£©/¤ÆóH‹Žž—Ô' “8ã­®i' ©$j…”ß8¹—ÍrÏ¿ïšùùõ!Ðÿè5Æï×-—>mÇ…è ½G½-ć®<ó‹3|è8€Žï°þ’õd7fYõ®UøÈŽ5ŽU³¥  ö2­’q,RVýgM5p €XÃZ Öô6wðÛÓ$S¨CTŸÄÜÇLsÌó¼Øzk±[¹Í‰Öås‚ß÷ Ãû|_eŸµ,þÂqÈïØïûl˜`à²ËÈœ|2;N:‰ï‡Î8ƒWîÞÝí+97¤],‹ XÅ©(cù‰$Ä[«H‰%Z3µÞ&‰á×Îm:ÒZˆX¿ wZÖtñÕ:a-ʃe¤‹ I&7ÛÍ%(5ÑB‰v5ó¹l´NN¬LòœqSëŒh½¨™ƒÑ¾fô½]lƒÄ¬/-«DÛæS·àçßú9Ï幇qab "ZC¹œrx-ÿ9CÜ­EO»D Ò[9ç[$÷¾¬—fm¦ÖËg9¦ÓP¾Mâ>.»Eòê”1úR§Ë¹M|å+ð¾÷Qø·KæÏúÒ0Ú7òV«Xs­ŽÓXöt¢Ÿ:23“~NBðÕˆ>‹¹»`©}äù\B=¥Á96„v’DËD={}’$[Ì(ÉzyæÊäˆ|vIntxÊDÏoÊYõYÞQ=/|Ö…s½€ül÷SMË%icKäA ¿ÃýC£èy!nõÉÇÍÜ2"<ÈæÕƒÖ(ðkê§5šâøw?ʆ/ÞÙv½xÜw1gŒüa’4šùå{ï}ó=Çv>Î%ü<}ÂӜəuî§Gž§FÄ]â<Ï‹-MÓŒ­éÄbϲ,Ö‰êkûúÈ¡~ÖcV¯æ7Nè ôb@’Ù6äó8‘Qœ„(…z ª;!‰€!‰p!ñËæƒZ¾‰ÿæle!$®‘2‰àW!ÚŒÔ~yAMÄ5;µß0Ê¢ÍIíSE e ˜6µ+‘qò8I‡‘DTŽF,xø‡Öo±„¼¶­;Ò­îæ#HæhLý QK [VÃòt³’mÒÛIsOo×’´.æaj¹‘Ú/M+‰$Gb¹Úxk$BšG}]‹$úƒ¸rËwÙNšlZƒ6SËDì ÿûÏëï5ŸÀ¡ïŸÍû÷sÒ>@Ñ?BÝ K$â™Cb8"V¾b6- !C2#"ëD óHıt|AÄ:yyÉsX^ly’84d‹ä] ‚$œÜÈLéÃN}ÏG瘺q¯ßz=—ri·‰3þ¯³vm¡i¹„[iã$CjúwÐBÜGO qÞÚµœU¸ xõŒöûä’?8þ0\C5ð*©trÍÂ2>Í`uÕÞUœù­3yÙC/ëvU5š)yÜyƒ}jç;HÇ-(‡aˆûL•}³{ÒýÔó<,Ë:"â¡uâ"k›¦Š}_ùßÿÍš•+yé©§òã3Ïìö)>A ¶j5ÕS©Ôg| [°ËK\1ÅøJÆ$]oÉ2Šo§m‹Y^;»ô¤¥X+ÈÀ*í jРD·bTF†¤Ï•C%kÉ‘ AõËSŸÓÿJž Q' nÀ'Þ[¡ˆx2 ‰Ö‰¾'oòg‡0R6L3 wÔ×·ð?×B“É(Ϲ¹˜ûíStÑL!ï‹!Œh§i±KÆž²\bº©}»áß4žE7ïvóîSéȳeºØÓ1 ·ÆŸO4–øÛ/.\œÖDá¡C¼ø-o¡vù{ÙÊáÒ1 BÔó2m¥›!ÂQÏd™Ô€äY_N•'ŠmÚ¹1IV#émÓæÍi#½tƒ›Î00m®9Õ˜;]æ 5ò¼cyißuÀ›ÖµìOz¨ß'}­td¢#ŽžâVŒÀ«;â …¶mó®ï¾‹C‡qè†CýÿŽV+—þ˜JÓ‹¾} Ö»Z¿e<Ïcý?¯géÔ‚%ð2Ò,Q|Ÿ3ÿäL:âR³›w¬¹ƒ_z¿dÛ¶m}ôÑÜ{ï½|ô£%“ÉÄY8Ó‚ùMù¾'ií ÃÀuÝ8q€¸UB’½Úq Ã Ãø_ Î$*„aØR4 CeMí¤b¶ÈwÓ4ãºJL;×uÉçóu®0 Ã˲âL¦kzˆSç+u“sö}ß÷Éf³˜¦‰ã8óWÎ÷ý¶qå dÊ}áejÂÀó}~pçpÁóR¯#J )1¼GGá_† 4È^Œê^·_,H÷"=xkE£ÕA+Ä:Ut?{%˜Eêˆ÷q0ÿÌCð¼ÂûÀ¿Ì—CPƒòZðÀŽâÃQ†Ÿþê>ó¯Øó¦c0VŒüÁqœÿª£¹ôäû9ŸgÅ1ÈìQÕþþão–ð¹ÏÉÞ½«8þø'9õÔœuÖõWOú&™Œ‘ªÀOz/ûØröï_ÁñÇ?ÿ ð¢“Íž‹ãÀßþí;v(÷ék¯]‰ï›¼à‡¸ë®‡¸ýöÓ¹è¢ûyÆ3~Á±Çþz!~ÁG⢇áÌD8ñFAËC5 Ñ;Å‚MD ;µ>mÉe‘ÄȺvMo6Lj°ó!˜/d·­]‚•Å>Ä öïç†U«¸öæ°y±YÃÉL•X6}PBZÕH2(A-JŠÂ(IÂÀ´€å’ø1·ŠgÐ(ˆ¥ÓSÓbfAxåÊÿæÌ篊¿A@íûn2‘F;Eqô´wÓ—î㮟ÿüñÔÛ•J%²Ù,Uñµw`eqe}Äc¦‡ÝÔÖºÆ4Mžà õe©ÿÐ, \—_üï¶\åû>•J˲p‡[_v+»NßÅþëö³ãöü씟ñþ÷¿Ÿ—¼ä%xžÇ‡?üajµZè$÷H>Ÿ…+?8YDµjµJ¥R‰E-ÆJ¥a2444§–v¢[+ÑJÁZ­»{J½,ËŠ;d ²#ç·^t/¬ !yŸÉ¶R¾eY”J%Êå2^¤dˆP—Þ×¶íº,³Óÿ¬î”B\;}çø'Ÿä™/r—¾R)ÎÈmã“J(ø²™sæQã%p»è6òè¶GY}É꺎º´«0 ùï[cÃ?¾_œwˆ_þò—œtÒQ,[¶–ß<úIž:ÿVáî:¸Œç}÷xxìýɧxdðhÂ_À=Íêg>ÁäòIÞý’Gyßÿ¬¦ü¢qþåÃ+Ø»ïjÎsvsà Ÿç”S¾ÏC_{ˆ?~ü÷øëÇ¿ËäÓg°ïà.¾}à 8á±_³þŽ¿ç®‡ ÜwßËyÎsF8î¸rªy*¸¡;ŽçÆ}·sô¿?Âw¾‰‰³˜œTÉY&&ÞÅþý'óœç|•óÎóêÚôà ¹MA’<à%/QâvœsŽjÃÿöoÿó?ûöÇóž·Ï;PVobùöª¸”£ò«_ý:¿ó;¿Ó½wĆår},ø4’ßBBE¥Ý'm7ÉQ”60“HYO ´·ðRCž÷B»ÉôÄQz‚'½¿¼kd")½\hìON7™Òë„=„92Ÿºˆ“Ö¬é]. I#Q‚™Øõ?ÝðM7zq“êrjyã öرØy(0š&aƒ`šx‹]A×6½-Äõõq觇¦Ý®\.sË7oaï­{9õͧ&n M— >šƒc+[¯ô!$Ä:¦W{MB¸ï ¾g%üW²L8‰3&Ú›þùM¼fà5œ¶â4>øÁríØµ¼ûÝΠlYVÛAD»å‰LÓ¤X,¶µr;¤¼©:éu­D0Y•J×¶1‹ElÏ›6IE¡P ›Í’Éd°m›L¤Dˆžˆq†aày^%¡³1Xp†±•Ý•$#ï«l ÃÃqÀz×Jæï㯙fëdv3?¬_gý(õåÿ¹œ;—?Mð¤G†ÜyçÛy衇xúéX»v«ö¼–Óÿãf~ñ×À™g>“Ý»¯âСC¼ç=J”ú}^yïö=õ3~þn^=Á“gÜÎò‘óìàqž<ê)þã¹Gó/{.’?OÿžËŸÜô ¾¾âžu7­½‰UÏ_Åwƒ€‰·(ëÕçX–E6{UTûÄåæƒñçM©³Û’ú|Iô7ã ß_F£I@6›±ÈЩ0!‚wßõMNpIâ²¥“Œ§Ö Q?6M`Þ 5³@Ç[ºr¥²8йŸëBDËEàÁë‡Û·Ó·ocëÖqûpþÑGCpëóŸÏ_|úÓ¼èŒ3xúÎ;ybõj¾ýŒgpÎÁƒ¼qùr>96†oñ‘eYžÇÅ›6±áÎ;ùêSOqÎ9ç@°ñ˜cسj?<î8¬Œ¿ýÎ;9ãÄùÏGáÀK_ÊØØ:ÄıÇ2¶n•±±øy-çpì¯~AÀ3N;ÝO=Ÿ§ÔE&‰žxâ vìØ1_¿Ô¼áîÚņGeì˜ß§ØK®yâò)îùé„SU3ÏQ’À~Ým‘vD>ôÁ§OƒW&˦êÛ‰¨«9¢éi!îŽÿYͳéhÛ{ÿî^κó,ø<ª­UfMà?òËjù0>ô{‡¨œ_¡Ö´%œ¦çùìãÏçô/ˆ…ÇQÖ*ù|¾^€*Axú·žæÌWŸÉK¬—Ì{ýCâÉë8Îô3¥ÕTVµ|>O©T"ŸÏcYVlX*•0 ƒr¹L©TÂu]|ß¿‹¥ (aÎó<Ç! Ã#WˆËå Và <ܲWÓºûÐNßQTaM7×u1 ƒññ÷³rå-¬Zu!'žxð >ñ‰Géëbpð!žüÖ§ùÚSËøûÏ>ŸßýÝçáºa¨\´‚Ìÿo×y|妛0 p÷ÝÁ ׿ŠPvÿ¨\¯‚“áý'Gýß^›8€ÿÄÞeÖ{»ýk´e6–>‹Ù:èp‘ÇJœd„$gŽD ©Ïw‘Ð&Á÷Ú]?ßW‹Eð}üêWúö·Y³kW¼‰çy <ö÷ÝvÇýð‡ºñFÆÏ8ƒ?ü0¯ë룉L¾ïcyÏcÝÊ•Üèž~š½«V±wÕ*V­ZÅØ5×ÄV‹{¾ÿ}þd÷nÖ=öO>ù$apèÐ!nòIŽ9æ(ù³eÊÅÙ0 ^½q#[ò^³n+×­cÕÏ~Ʋµkùõ­·²bóf~çöÛY•]*©@„_øÂxòßÿ_ýj®‘вxMrË0rð ¶m+kç PBàyçñ~¹ðoy wþúל}ÑEŒ­[‹ƒ¶m++\Ód“çÁØX’%3ºÙ·¥ßc¾¯žS¦ ¯}m², yì–[øòò娶M¹\Æ4M¶nÝÚíæ7+¼ /äà>ÇøøÅÌÀ˜{îHgø@…’yÔK­«RoÒ™þ<“VGîãiÉâËŸÚÐ4 :íİn G<=-Äqð gœ51å&•JÓ4ÉžŸ…£I"¾j?kM/ðÈ#¬ÿrµÀƒ¬öKh +“{-±Ó†ë7DçBSHN𳝵êõ¾v­™!•Jd4f*9ÑL v÷Ý¿þu"´‰Ð“ÍÆ–e€ZoYqJÕW^É«?ñ ž8÷\¾uà›ßðnxüqNÞ¿ŸÏ.[ÆÑ¹žçű. =ÄûÇÇùâË^ƺK.á¿øÿkÅ ¾²rel%R,•µn­Æ_V*|àâ‹á¼óøU4xu]?zÇAÀ%ï{߉\±mÛÆˆ¬Ó^„²€|…iRèàÖj‹ºwØ¥—¶Ý÷œèO]¢èµ™’46Ñ¿fcšÚNb~Z– ZÙˆçqðvÓl?C°ˆðW¯füóÇR©¬ž‡ÂQn¤5’ñ¡¸”:$oòð·IÌC³Ì05u‡è‰÷%É#¿xFÝ÷B¡@­Vkß·š {Íâ§§…¸ûÖ¬áœuë¦Ü&N l¤LÒhz„G^õFûèÞ–ëF^6ÂË—%Ñ‹5š&|h¿ßÇ¡V«5Íö•J%ƾ9Æóï~>/å¥:[uÒÉ$æ‚|>[Ê RÊ·, Û¶Éårø¾O­¦F"ÎU*•8®Äì…=a±Ç!ª#ºV¥Œÿ`O­I‰£¸‹píµëé# Áù pÌ6Ê—Âïÿ~ÿï”áéÿ´Ê«HÒ¬¶Bs…¨Ì£U’þŒ¨)bÆTL-ŠöñÊÓ}¡#ŽÁA0Lð³*î[ùg?£81yꩪ±KªÙHÜñ;Þö6nþÇd⨣ذlC/zo{Å+øî/IÉÄ.Ô»ßóžxò`ÄIiN‰\#«ÕjýäBpYÃûá½ õ•÷GZ“ýg’œ¦'žKbz(ÌtæŠ0úî’ÄöH’§¤¿C’C²\H?É–aÙê{cJÚEˆ˜aÈ}‡.“Œ¾1rý=ê3“Êu• ÓªŸ"~ÚSežž D×,-nº‰ãO?¥®O,^mÑCz\ˆ¿k-öÅ­ãkA@¡P`(?¤¬ºAkz›|`íCÐiÒ4Lòv^½üÇgQ°F³ÀÜþÈsX±ÇùJ]é4år9ThÚ"™PçÊ-´1\«r[u %¹Æèè(†aËå0 ƒL& sÕj×uÉård³Ùf7äňaðMþ·MÕæq—¸ª‰U¡mÛd³YÖ­û0‡m+MãàJl }È XÙ¿C `¨øA’I¯U–4Hb‰õ†dØ · ê¾jŒŸ”þz@‹Ðt±„sËIŒCë‰'”K£a€ir÷™gò£×¿7—‹-:-ËÂŽ¬Ó~+›eÔqØyóÍq›Ÿî^OÇAªÄzÍRK²PˆÈå7,—¬ÄEÔýëDŸå}V IOšÙ²¨ñG%µ\ÒÊ:¨C,_ÒeÞ͆eaêÏM-7IDТcDu¿ôéK»}•gõ³pí\zéƒÆt1%§A,Ùä÷+a“ÄÚ-A³ö|b6ü«Y2ÜrÌ)<¹lÙÌb÷RžçQ,)•Jô÷÷×­w‹óÏ?Ÿ·¾õ­Ý¾”³æƒ\`ÃÿKõ›Ç¡R©ÔÅLgá 8î8µÜ4¡zÊôÈ^–ì&ÙñD—ÈGßÛg1 VPP쪳®Pi¦ á>ö0}zÙê]”ÿæo`xX¹6Žsí'?ɾƒU\Èë¯cLÚ¶ÝdMÖêÙÞ“$™U ±|*‘f&ʲÔCÝO"ÊȽ‰°f`Yê…muŠ`×j¬]nñ]–I £¡Ìt9Ìmä©7:(FQܱ¯oý:¯âU ýKá¾}<ñãÿä}ï{õì ‘ß¤’ºNeÚ'Pèy’v«-–—7µ‰§–* Q†T*•ö“­ò¬é•¶©é=-Äýµ‡1ŒS›–g2åå›êA»Hú š#‡zˆU{ë]SMÓdÍÉk`b–…j4 Lœòâåq€VV|îYuÿ*5ÒÊS’Ïçgäz5_ضÍðp½I@ÚÂ¥\.Çõ4M3NFÉd¸êª«8ꨣº} ³Â6ëÃÿ …8©EËYm¿EX' 5 rhŽ%T¥>N›…º7¤Ï „6ɸg¥¶ui+7Í4„Àoo{”¹‡Xwâ$ùõç@¹ÌÝwßÍo bÛ6ž§L”‹ÅbO£éq~òñýœtL}ÆHÓ4•§Û®f‘o,ƒŸñã¬úÖ*Õù.â½fQÑ(J‰8÷ä“O²{÷înWofø>_²³|(j£",ŠnK<—ÑZô¹H"X´v{j´€"±ÞH:áí2½ëw„fœñqÖ?˜·]¸ 븓0L“ܧ>…çy”Ëe*• µZ­7â¨ù¨v/ÖhKê2‰Ëeúþ©ÒlUÖh%šÐöÀ©j:À÷ùéÄ&N8úü™í ÚÌ0‰• CïO¦]‹5KŠcûâ¤ßØlˆãóN‰¶†ÓÐÃB\pï½ýÿ=.šb#mÞ«éqžÞõOœ›Ä9t‡0 )RÔmW³h€Ç¼ÎQ@ö± ¾n×Ké`îÚµ«ÛU™9žG¸w/ÿ^‚¿?ôÉårÓ[ ¥g®+¨T:ãY§Á½­t}Ïhf‰»v-µË0O;ÁÁAÂ0Ä4M†††°,k~-àĽJܳÅõÎ!‰·&qEDìäh´ i´`-®-AÂ}û0^x?/{å¯;ÛAŒ/1Kž‹EÜË=Í’bäàAÎ]¿>þ.Y¤›¬ëeB[Dj"zVˆ»á‹?ä转iùàà _ûìרpçÕ9Ön©šfÙ[qácâïo:ëM¬¼v%ü7½?{§ÑDüЇÿþ£‡›Líwf7oÿ³·sèÒCQš¾n×T£™ßçÏ~ÿC¼ðFH&“ëÌZ( 9n”FÓM‚€×‡&O9@ñPì~Z­Vç&™Š$ÜI»ÿÕ¢ï9Ô€Rb´¥ˆ%›TÁB¹“j4 øÇêÉQÞøÆgN½aH’üb”Å+b´˜5K‚‘ÿû N}`lSßÃ0$ S±)$ùŠ$Ñ} MDÏ q+WÜÃiY |(Ôj56”6ÔÇÐhz”£N’‹¬‰¿¯yp \\Óíši4óø¿üËŽ~FÓï®›îâôKOgÝÀ:=Ë«é}<¡j•á—Cf0C±Xl/Âù$Öo: †¦—CœíÛ¹uÅßr]ñ¨8ËñÐÐÐìD8ÝüÔ瀤-I„2íŸ÷‹Ç:IÓUþuÝw~êö¿]Ú~£I<¸¥´&³DÎC“àû• ¢gÄ4‹Ž§/xŒågÿ¿¦AÞŸùFÖ]°N ¼:uÏÓhºÀŠýûñ/¾˜8•J{·½I§YÜè´EOAøæ¦gþ}ß×…aˆï'ií+•J¼­¸´ƒrã‹® (•JuûÈv®ëÆÛyž‡ã8ñq*•J¼O&“‰ë8N¼ï+Wd!—ËÕÕoÑàûxð|ñcGáy¶m39993N,Ú ‰]%ɆPm¾][Oºhæ€~1Çï;®ý" 1µø»˜X„Í4„!Ïûð-¼âÊè"‚zk8HÂ[hÝBÓ@Ï qÿ~Ë<þ½£ãïAÄ)m §Y œ41Á#ŸN¬ˆLÓäÕFŠv¦KüúYÏ⼬[–—BçX³äY}Ûm|öe—sÏûâ8NÓ”»È2T&F‰?d³¨]­ëúL´¾|ß§P(ÄÛ ÆÛ …¸ Çqâ{> ú}Ò‚Vm…¸0 ã“éØ9鸓†aÔ J–eÅëÓ°ÓÙ}Óå‚J("å§3›¦Y÷»—ËåÞHb0CÂnàwõSÊd( mè´Eâ¶ÉX±œúÓñ 5 ÀÆ={~u*›N õòèfiµÉðð‹Ðô>pôÄ¼î/ùêƒ_%¨ $ËÒjÇš9£g…¸ñûVó«Ó~ÊZÖ’!ƒk¹Ø9›ðú)¦Yœ41ÁÑ«Ÿ D‰~úÉþèÒ?"GŽP¿5‹ˆ;®~6kׯß% ¸Ž›¥Y,¬¾í6þ}l€§ØE±X¬O: ¬.L<î¬XûƒÈ\×UU ‚&+/Ù.—ËÅBZZ K[|™¦YgµÔ‰`2h0M³Î2pxx8^W­Vãuù|>± ÃH²×Cœ”KNzò)]vZ³m;>ŽeYñ>BœmÛq½ÓŸÓ¢\ãqÒâ]ãù§À¦@Ú‹0ä3ûOæÉ+" CFGG§>å—än´w¦K»?Ï™çüKÎ˶áĽÚ'  ÿjNÓe~ö¿Áã×>Å·Þþ)žäw¨P!˜àB(žP$ÙÌ$P¾išñD‡ˆö¥R)ž(•J±ˆïû>®ëÆë\×}×uñ}Ÿ|> I²ã8T«jöF2p[–U·LªÔj5(Cà«0KéI©·çyu­,ˆ-˪›IOpd³ÙXä›Êº8=I“žÔêÊnœàH¯“ó–uÂøøxËmËMb>ÔY¨/2®{ä¹zòx xÍc¯á½Ç½·~ƒµ(‹{TŸÂÁ¡@,YLLlì:ë9Fï6)ÃÔ÷EGÏ q÷í—›ËùÿÙ{÷8IêòÞÿ½°¸,—ZQg¹X †Q4H»^£Äš˜Ÿ$bV{b" ǘž°q=š‹Ý'ÄèQ‰Ý'ž° ¦ûh &Úl”D£vÅû…Å)DQf§Ø‘‹8µÌ²,+bÿþøöSUÝÓ=Ó3Ó·™}Þûš×v×½ªŸúÖ÷û©çba” nÆÀ?Á§XûDÇV88ØØ88‘`çj¢.¥ËÜÿÓ-pÌ}¤IsÒã'qæégb½Ò"M—2e<¼hp$6*„„QãÜÊŽ“Ÿ¥ïÅœqêf›—ºñK–sÞ ×>‡éÀ·â8ÅÇ'Ož¥ºçD+,Mš,Yräæ]‹ÆuBBòä£ÁD»ÈväÙ¶ÜgXHˆ‡GªÉ)¹÷8cÅ›é EËâÁOëÿôuÿ]?3 ®ZZXµRcsÄKi±¼zËù¾ $¾ïG^–IÁ§Õt .×uãrår9+ŠÅbô9¹œäŸj&,9r¤ß—µm~p̹TÏÚÇ¿÷ïq[3ÁP@ž<›ØDHX×¶„„Ѳ?þ8‡殡»ppøþǾϥ—^Ê3&Ÿû·ÏqÆgp}MôûÉõJþÞIñ'êõRr¹F1)¹\©Tj¹\4Ï´ÏKOcè|2á»Þ»–eÕ |B\³ý6 qÊÒØÿÀOY÷´[pp˜zû^ʛ߮ÔqníŸ<=<&˜ÀÆŽžƒyòXXm?Wå9ŸDD¹äv“Ž2OÅ»þ3°BÜã¬ãןüb Ñ ÖÞâ–;â'þ1P1Z;;b íx#‰q/& .…Vo¥“ƒõlîzä©üôù{I“Ær-“ó¢f66™Zi±\›þ÷2x»Pr¾x$;,À’uYn¡åe{AË·6N˜Ø¸Æ–lW,~°a37ý˜¢S4¶lY(`“mT‚•zµ,‡ &Èi»ÍM¾aôñë¼€¤£ÒÎ9Ê6[-›Ü§ÜÏÉïr.^ÔYkŒ,t~ÉNW³ó°jÿ–B³õ^tæ‹xÖ·žµ¤mõ“ÿv Ÿø7½äÑH@ÊŒd¼É\6›Åu]\×eüMã&3åbŸ·—îy.W^w¥Yù$xõo¾:ÞX„‡R“¤ºVË—‘¥Ix›­³\¬âY'·3uïÔªâ>ó“søù±·qý—®Á·üØ#ñ~ðïóá ó»é`Þs5Ù/6P°°êîuY×ņ`ìüعc2‰Ézè!+à´cOãðáÃ<þøã<1ô!!Î<Ê Ã'`a1=;ͳ6>‹7F3›6mbnnŽ™™FFFxtæQÎ:ù,¡‡†Xw`Çœ{ '9Gî}„sÏ=—€€{ï½—OÛÈ•Ç^Ia¶Àúõë¹pèBæææØ÷À>¶g<o¿ývÎ>ûl†††˜™™`xx˜ÃÇf朎Á¼ð¿ë®»8ÿôóç-7wìžó çr.3›gØtü&ã1î>ïnžrð) c¶7»y–a†y`ËwÜqàg_u¾ η¸•§ót,,f6Ïp`Ó^Ì‹¹ù¤›9é’“¢³ßÿåïGŸg6ϰqãF6±‰ÙSg9¼á0Ã5Å~ö³ÑóæG¯þQ¿Í³mÞõ0ëñÞb&´±¯BíÔ Ÿù|˲Œ—Ù½žç‘N§ ùèEå´ãOÃÇçœ7ŸCHÈcø¶O˜6}e\¢~óBÏË$ÉßÝ")’èØnuq×ýÃ<|Éõdø ¬B“ßm‚y)/äyÚm§ y>µò}¡þ¬<d|—|YÖj¬¶”1f;$ïé ­"Õ–{ßöLˆ›››cÏž=@šÙk«LRà´°¢mÊuhöÆVÖIz}$åñ?¾¢ó_ KµãÞ}‡ù?±°gÜù÷yrÖ+J”ðjÿÚE iÒѽ"/idðÙê/¡ä !b_†æOR lg{ƒJHÈ;Ÿx'‡?Ü·cXªÏ ^~wü"/`^ˆ^£WJ6›BqR©8 0éù°*óŒ)ÃRíø3žÏ“.ßÍžùy\Üø¥ÖAHmJáltÈ’ÅÅÂûDÐI‘"‹ÉåT¡Âß<ð7ì½s/Ã/ÆÂâËõe.zãE¬Û´Ž»÷ÜÍSžò‚—¼nÏëøîáïò‚׿€•>Ä›ßüfîýî½|è¤á8ÞWk¹¯R°afÿ…¿ç¯'G8Â%›/ÁÚh¨=7Ù„O„ø3>îˆ Ç †ƒ¸sšùóñ¹ýg·sé“/Úéâ™æÅPž<åMåèÅEq¨Hf(Q¼âÙ‰‹7œø¼8×´eEŠdÎË4_n¨ö×0¯L{ÈÆrâíÉz§›ÿBBŠ‹|…¯Äë_÷ñKÃÆSµL™ÌqœáDŸæÜÇÝè%›ø¾ó¿v¯wÜDÛb©v|ðŒÏrîp­íô¨«&š Ï,‹QNÏdœÌ iaqåyæH»â|;ã¼ä‹;s˜ËíÕ8&\îËÌä‹Â…úãÒ_M¾8_(œ±ÛžRýíÕÚ?þöq'6s—ûv*@ï¢ÿ:uoÈ})ïÞþRhör{!ç“$Ÿÿ0/ûáËÚÛQµG\yå•Õ÷½ï}ÕJ¥R}ÙË^V­T* .»î—>W­º®[­ªÕj¦WGª 2W^ye_÷Ý® W«Õê¶7½®:ú®l5—ËU|õÕ†•ˆÕbÇ7ß|sÕþƒ¯nú«ÓÓÓÕT*U­VªjËJõæ›o®^{íµ}ÛÿRìøÚk¯­Z¿•«~âöOÄ3ÕjµT­NOOW …B49—Ëõ휔þpíµ×Vo¾ùæ¾ì{©}ãõ¿øíêåÿþ¶E·›ÉÄt:®NNNV«ÕjµP(Dö>==Modrr²:==Ý—k¢,ÕÒ¯¨V«Õ7½éM±}ªÕÃÿópµZ­Vggg«étºoç¡ôŸÕbÇÒ:öâÛ«¹é\5•JÍo3g«Õª6£GKé÷Ä#njjŠýû÷sÝu×±â¼Pn†g>õz,ëó6z: (Ëb96ìl°¸îÝï7_²hb{¥ï,ÇŽÃé#ÐI;O€IDATüâèÃX–eò—”Ñ2ìJ_YŽèµçñª ^‹-û¶p¾{þ¼ü8™LEé˱ckîï¸þÕ $’Òû6wßv7|×GÞšÉm¤R©ÈÆ“¹«òæÔœQJ»,ÇŽ7lØÙßý_»Ÿkn¿Æ„@[V]Þ5Eé˱c€êCdì A.¨oSÅë^ûËÊÓ‹ìÝ»—‘‘‘èûöíÛÙ»wï‚ëüíëßdª>mÅT~êCxðÎ;{¿ÓöîÝËîÝ»û}ìÞ½{Ñßl-³>räýó!¹L_„8µŸzážî'˱ã]gÝÁ‡ïýU¬ {®ËuØ+Ô~ó8úÅrìø©ß~*³Î,ŒÁ¶«¶qÞçϧ·‰ªÅŽõ8ƒåØñÝoü¿ìqƒ‡·=̉¯>²pö޳ëB¦“ƒÇÅ’KePÚ=ŽÁ`¹ýãýîÇ`6y3yoñ´;Ý8níǨ/ÍŽ÷íÛ‡ïû¼wçæ…H`Ýb2>+̳ª8Úí'‰ØF;ôÄ#îàÁƒlÙ²%ú><<¼àòßýîw9æïá&û&¾ô²/ñè†G¡¿ï¾}ûúnX?ü0sssLMMõõ8î½÷^n¾ùfN9唾ìÿСCÜ{ï½:t¨/û_ª <ðõ¸à#ðÏÏÿg¾òНÀzÜj?õôûž;~â‰'ú²ÿ¥Úñ-·ÜÂ=ÿ~Ÿ™û ¿ôqö·ÎöÔþz„ÚÏ`ǃ>ȃ>È駟ή]»z¾ÿ¥Úñ—¾ô%þmï¿qÚi§ñÔóžj&:ÀU½=îA±c=Ž˜}ûöñØc´•¨“,§oœú÷'y›F7™¶Xøpí¯G×LÛÁÁ9ŽÕØ?þñÞsÊ=§ð¡|ˆ[_qkÏÛbÐþq#ý¶ã|ÙÙÙUÓ?Þ·owÜqÛÿ};áB¾{Æw¹ç)÷°oË>zÝCf¡>ŒûúE¿ígP¸÷Þ{™åøãokù¬šúÅ/~1úü*^ÕïÃQ”eñŸwü'p¿Ío÷ûpeÉìÚµ z¯³(JG)—ûœ1YQ:@²o¬(«™›¾sS¿AQVÄûßÿþºï«µø–Ò_zš:22R÷Æajj**}®(«µae- v¬¬ÔŽ•µ€Ú±²P;VÖjÇJ?è©777Ô—¦V”Õ€Ú°²P;VÖjÇÊZ@íXY ¨+kµc¥ûîw¿ûÝÝÞ‰(ÊW_}5AðÉO~’÷¼ç=ª4+«µae- v¬¬ÔŽ•µ€Ú±²P;VÖjÇJ?XW­V«½ÚÙÌÌ û÷ïgddD [Y•¨ +kµce- v¬¬ÔŽ•µ€Ú±²P;VzIO…8EQEQEQEQE9ZéIŽ8EQEQEQEQE9ÚéIޏAcnnŽb±È-·ÜÂÌÌ ###Ñôù—áË_þ2[¶l©[§Õ¼NËöíÛÛÚW7ŽcÏž=xžWw-ÚW·®…Ò>­lXæ ²wëÔŽW˱ãnþnƒjÇý¸§•öQ;ž¶Ç«µãùh{¼úÐþñ|ÔŽ;ÇÑvmI3:©Ýuqsss\vÙe€©ây™L€l6Y6›Åó¼h½…æ­„Ý»wsýõ××MëåqìÞ½›½{÷²mÛ6öìÙÃîÝ»ÝW·®…Ò Ù0 ¾wãÔŽW˵ãnþnƒjÇý¸§•öP;n¾mWjÇÍ÷¯íñêBûÇÍ÷¯vÜ9ަk3hšÉ ÐIíf}¿O¦×LMM144Ä®]»Ø¶m_|1SSSìß¿Ÿë®»0Æ·gÏ\×]pÞJð`\?{uÛ·oçú믶·wïÞÈe²Õ¾:} ÊÒieÃ0øvÜ ûQ;^,ÇŽ»ñ» º÷òžV–ŽÚq=Ú¯NÔŽëÑöxu¢ýãzÔŽ;ËÑvmA3º¥ÝuBœ$¬5Sð}ŸË/¿œË.»ŒíÛ·ãy7Þx#`Þ(´š×iÚW§cxx˜±±1.»ì2¶lÙÂþýûyë[ߺà¾zy-”æ,dý´Ÿ…è¥ý¨¯N–cǽþÝÁŽåžVš£v<_Ú¯>ÔŽçïKÛãÕ‡öçïKí¸sm×fÐ5“Aa%÷ÓºjµZí÷ 333ìß¿Ÿ‘‘‘y®” Í[ÍDZœ}õòZ(KgÐí¸Ç v¼ö”ßmìxPîieé¨k{¼”ßmìXÛãÕË üvjÇ«½6‹_‡£ñ-çz¨§(Š¢(Š¢(Š¢(Š¢(=à¨,Ö (Š¢(Š¢(Š¢(Š¢(½F…8EQEQEQEQEé*Ä)Š¢(Š¢(Š¢(Š¢(JP!NQEQEQEQEQz€ qŠ¢(Š¢(Š¢(Š¢(ŠÒTˆSEQEQEQEQ” Bœ¢(Š¢(Š¢(Š¢(Š¢ôâEQEQEQEQ¥¨§(Š¢(Š¢(Š¢(Š¢(=@…8EQEQEQEQEé*Ä)Š¢(Š¢(Š¢(Š¢(JP!NQEQEQEQEQz€ qŠ¢(Š¢(Š¢(Š¢(ŠÒTˆSEQEQEQEQ” Bœ¢(Š¢(Š¢(Š¢(Š¢ôâEQEQEQEQ¥¨§(Š¢(Š¢(Š¢(Š¢(=@…8EQEQEQEQEé-ÄõûŽŠÅ"Ùl–b±¸¢íø¾O6›í÷é jǃK†d³Ùy¶¯v<µãÕG;v,÷ÀÑ‚ÚñàÒ¬=VžÚpïèeßøh³cP[î%Kµeí·ÚñêcPìx …8Ïóú}m³nݺ¦ŸWÙlÏóp]7j¨—K†ø¾ßïS(V‹7Úíj³ãå0:: €ã8u¶¯v<ŸÕhÇGƒ /D;vœÍfÉçóý>Ôž¡v<¸4kÕ†ç³ZlV·÷ºo|´Ù1¬[^íýã娲öÛg5Úñj³áN3(v¼¾ß$ CÊå2©TjÁyAà8år™ H¥Rض-–eQ.—q×uç-Ûîr !?¤çyX–}v]wÑsnµŸ¥Nov-Û9¯ (—ËLOOP*•¢ë,Û°m›b±ˆmÛ¤R)Â0$ CÀ¼aI¥RÑoÑìÚX–…eYm¯³ÚYvœ´a×uç}_Œ~ÛñB6ÙÊŽå~ÍårX–Õ²¬vœj9=iÃ08v<(mñRÏ¿™}µ²aÙöRÛc9Þr¹­»Vé”í} ¹fݲãVíqã9« ·ß§ë£v\½ºÕ7–yí´ÇG£ËõÑþqoÛã娲öFûÇË›ÞêZRÿ¸×v<0qa266†çyAÀøøxÝü±±1|ß'FGG#e~||œb±­Ÿ gÞ¨ŽGÛð}?r#mw¹…Hwòóbç;::Šçyѱû¾¿äé S;çåyŽãAtÌ™L&ÚÆÄÄããã„aH>Ÿ'›ÍFÛ–ßHöÓH±Xdbb˲Ú^gµ³Zí¸ÑnW›/d_­ìX:(ÉsmöÀP;®·ãV6,×cPìxPlx9çßh_­lx¡uIÚ±œw6›¥P(ôÍκM'íøhïS,åÜ–cÇí´ÇjÃí÷)享Ï?¦nõ“Û_¨=>íXÎóhïþóŸïy{¼[Öþqk´¼zú«B¯¨¹\®šJ¥¢ï…B¡*‡W(ª®ëFó2™LÕuÝj©Tª:ŽMŸžž®Z–U­V«ÕJ¥}–u2™Lô]¶Ýîr‹‘\®uÏ)—ËU …Â’§·¢ÝóÊd2UÇqª¶mWS©TÕ²¬h»•J¥ Tggg£ï®ëFÛ–éÓÓÓu×ÓuÝj¡P¨:ŽS·n«uÖ«ÙŽ—YMv¼˜M6³cÙPª–eU§§§ë–Q;6ˆ·²ájµ:v<6¼œóo´¯…lx9íqµZÚòv¯Ój¤“v|´÷)–rn˱cÙ^c{¬6¼ô>EµªvÜŠnö“ÛofûG³W«Ú?ÎårÕË.»¬çíñrmYûÇÍÑþñêéW¬½b`BS=Ï«sïLºJ6ºvŠk°¸&•ʤkw»®„ýpm<'y;‘Íf—4½ç†aä²,ês:̵–·vÛ–é¶mã8NÝÛ"ß÷±m»nÝVë´ã»ZP;îŸ/d“ÍìØó¼È]_\šÇÇÇ™œœÔŽ›Ùq+–ëµí¸6¼Ôóo´¯[o½µe[Üj…Úã|>¹ÿ¯e:mǃjÃÍÎiµÙq«ö8—Ë© /±OjÇ8¯åôeûÍlÿh¶cÐþq&“!›Í2<<\7 ºß/Õ–µÜí¯ž~E«uI¯˜ÐÔV°Iãµm×u£¿J¥ÒïSY­’.uúR±m»ÎÝXâ­—süÉmÎÎÎ,˜xv-æÃP;î·:–…‡©Ø:®;µãÅi<÷µ`ǃdòý•¬ÓÌŽ=Ï#ŸÏ³nݺ(aïºuëVMÂávQ;^=v¼P{¬6Üjǃ×7–c’m­v Ú?Ãûï¿Þ´n·Ç˱eí·Fû«§_±Ø:ƒ`Ç#Ä9Ž%8êJ%Û¶]÷C&½\D¡5tbb¢ß§Òç”Ïç)—ËKž¾R\×%‚ÈÈZåhDbÁ“ŸE)–FªP(Ïç ‚`ÑuÖ jÇý±cXº}I.€äúɬÚñ|;neðzíxlšÛ×E]´äuj+• Õj5ú¨V«jǨwŠ¥ÚñBí±ÚðÒú v¼R–Û7†Ö¶4Û1hÿ8ŸÏsÿý÷÷¼=^Ž-kÿ¸5Ú¯X=ýŠVë ’^10¡©étšr¹ÌèèhTµ"9Ï÷ýºRÊ@dÐ[·nKJ…—^#.§¹\®îs»ç+dÛ¶—4}¥Hu‘ÑÑÑ%]C˲‹ëfë8ŽC:&›Í’N§ÛZgµ³ší¸ÑnW“Ëñ.ž2™ žçÕ‹Ú±¡•·²aL;^m6,Ç¿TûZj{\*•zþ»ôµãÕcÇí´ÇjÃíõ)@íx¥,·o,ç¼ív Ú?Ûœ˜˜èi{¼[Öþqk´_±zúí®ÓW;îZö¹eR©T¢¤¥Âììlurr2ú+ Õt:ÍŸœœ¬V*•ºä§½F’ 7~^ 9ö•NïÔñ·s %ÁáììlµR©D <;½Îjf5Úq£Ý®&;^‰}-åºív¼˜ W«ƒeǫɆ«Um[»…ÚñàÛqãñô³?7ˆ,§OQ­öÿz®f;Ns»×OÛãÅÑþqïí8yÌK¹†Ú?^ø|µ_1ØýŠÕЧ^W­Ö|¡),¥½óù<¹\®gî®AÔ¹P7ÒL)]Î:½8®N 9-–¢r/gµ†ÚqgŽ©½²¯£ÝŽûmÃr jÇ+[çh§ßv¼\{YËv¬,ÕhÇk¹o jûËEm¹3ÇÔOŽvÛ_6¼’õº}\ôªÜk;^B˜]‰3O¥?A€çyQÅn­³Q;î½²/µcµán¢mkïP;îj“½Cíx°PÛ_>jË«µ}µánÒ«þq¯í¸£BÜž={˜™™axx˜;vDÓçææØ³gdÛ¶mlß¾½''§(ËAíXY 4³cµae5¡m±²P;VÖ Ú¯PÖjÇÊ pì»ßýîwwbC»wïfjj ×u¹á†˜™™‰Œøío;^x!ïz׻زeKÛÕ‡¥—¨+kVv¬6¬¬´-VÖjÇÊZAûÊZ@íX$:R5uff†J¥ÂM7ÝÀ–-[¢ØÚ©©)öïßÏu×]ÄŠóBîšï|ç;yêSŸÚïkÃwÞÉ3žñŒ¾ÃÁƒ9xð O{ÚÓúz?úÑ8ùä“9ùä“ûzwÞy'»wïîʶ;mÇ»víRû©1HöÓïßàÁä½ï}oW¶ÝÊŽ—cþïó¯ÿú¯j?5Å~á8<Èðð0—_~yÇ·Ýé¶X:Ûý¶ŸA±c=ŽùÇñÚ×¾¶®Ê]'о±G¯£×ýãåØ1hÿ8É ´ƒƒbÇ«¥üéOš={öðìg?»ß—l ûî¿ÿ~6nÜÈŸþéŸ.ºlG„¸½{÷222ÂÌÌ û÷ïgË–-\qÅuó„íÛ·sõÕW/¸½{_ûµ_ëã%4Ü|óÍlÛ¶­¯Ç°oß>öíÛ×÷ãøøÇ?Îðð0çŸ~_ãæ›oîÚ¶;mdz³³}ÿÝÔ~ê„{ˆöÝ •_ýõK¶á¹¹9€¾_3µŸÁ;Ž}ûö133Ó•mwº-–”~ÛϠرÇüã¶®“hßX£×ÇÑ-:Ù¯í'”vpPìxµôï¹çž°ãAaPìg¨T*ÜqÇm-Û1¸™™®¼òJFFF¸å–[صk;vìààÁƒlÙ²%ZvxxxÑíÍÎÎrà 7°k×®º£×<ÿùÏï{ŒøÐÐ'Ÿ|rßcff†‘‘‘¾ýSSSìÞ½›û«çØI;¾ï¾û¸á†ºúPY µŸzú}O‹ÏÎÎvm­ìx96üÐCñï|‡‡~¸¯v¬ö3XDZgÏ>ûÙϲiÓ¦®l¿mñw¿û]æææØµkW߮۠رGÌÎ;¹÷Þ{¹è¢‹ºr~Ýè»®[—k®×ô»ýÑ㨧ŸýãåØ1hÿ8É ´ƒÐ;Þ³gžç­šþñ‘#G˜¶q´ÓoûvïÞÍ7¿ùMŽ9昶–ïˆÆsB\=§¦¦Ø¹sç²; gžyf_gan¬Ahœ¾vúä:\wÝuìܹ³«û餟þù}·cµŸzú}O¯6;>묳ø•_ù•¾_7µŸÁ:Ž;v0<<Ì-·ÜÒµ}t²-~Æ3ž1É—ÅŽõ8b®»î:vïÞÍYgÕ•íkßX£Û¬¶~hÿ8É ´ƒÐ;Þ±c;vìX5v,Þ_ý¾nƒ‚^î]»Ø¶m[ÛýãöäºE®S’GFF"7ÿ‘‘¦¦¦¢ySSS õû:)Ê<ÔŽ•µ@+;VVV Ú+kµce­ ý e- v¬ â¶oßÎþýû£ÆÞ½{#·N1n™çyÞ¢I<¥¨+kVv¬6¬¬´-VÖjÇÊZAûÊZ@íX4:š:<<ÌØØ—]v[¶laÿþý¼õ­oæ]~ùå\vÙelß¾Ïó¸ñÆû}ÞŠ2µce-ÐÊŽÕ†•Õ‚¶ÅÊZ@íXY+h¿BY ¨+ƒFÇrÄIÚýû÷322RçÒyÅW066ÆþýûÙµk—º{*‹Ú±²heÇjÃÊjAÛbe- v¬¬´_¡¬ÔŽ•A¢cB¥¹U¥‘…æ)Ê ¡v¬¬ZÙªÚ°²ZжXY ¨+kíW(kµcePèHŽ8EQEQEQEQEQF…8EQEQEQEQå¨" Á÷!ï@øÕbýrÙÚ_°¤­·F…8EQEQEQEQeàñ|í³ç™¿FÆÆŒÀ–\G#ªåóàŽÁÖ¬™&۽ˇ?Kl÷/·Â]ykØV€™6ì:ÿ|î}êSÛ:‡ŽæˆSEQEQEQEQ”•òOüA‡ðñi#|½?„ØxeØ`Ã#¼o޳á½9ÈdàŠ ¸ ·Ø`cÖ•ÿSÀgB¸ßƒË§á‹yãñöP¾™†Ðÿðà=.”“ðÖЈx〠ä0ÂݱY¸ضíÌ<ø`[ç¥qŠ¢(Š¢(Š¢(Š¢(ÊŠCãe&^j>ðÜ xAÍãLB<'0Ó¶øpI`æ¶®H„Šþ^~«¿–‚ßÍšé¯ÈÃ3jÂÙ?xp>Âi.üAÎLw]¸? ß{ ‚­ð÷”€iÀ…£°! _±às9 €"X@.SeøAÿÀ‹,°mpjëÛÀE¡9ž«SfCCO´}TˆSEQEQEQEQæ16Vþ™%ÏL†þ „sGá¯BxÕ„Õ&0ÖäŒØöí<œíËø‰ûøæ¼/„ç†ðenÉÂö"l÷á©\ëÀ3ð+¡ñBû»|¼•^íqíïløtÚxºÙµcÜ‘†oW R\þ«&²LN¦áßÒÆ»ÍÞoÁ‡rf¾eÁÓ¸h>í™eäZ”Gá–¡8ŽâN>ùgm_S MUEQEQEQE9ÊÉça[.´bÑêY85%ªBø¡çxfùÉI°ø¥ž†¿ÏÀDŸò â‚•1릀¢¡Wfà “fúS@ŽØ°!€*Á'<ø‡Þ•6ËX õÇZ,¬‘&|t29-eþÀxíYµ“sj²××Ù†|a*¾¶ f¿oK×ï÷oÿöLž÷¼[ÚºÎê§(JGèTEQEQEQ¥3ø ÍKÌ ø`^1 ÏΛ|hßÍÁ(ðLÞ² 6ÁÞZUƒw–àÞÐx¾]ïÀ2Æ#-“‚ÛÇ!l$º.”ËõÓ2ãq÷å¢Ë, ®HÁ]ó+ÊåzáLHa<çšhtø>ŒŽ¶wÝ\*©zAÏu˜ÉÌ_þàÁõ|ó›ç¶µmõˆS¥m|ÌÛñL0 \91Ï24oø ˆß.äkëµÏ9 ÝÆ6Zm+×ï‹¥(Š¢(Š¢(ŠÒGŠ˜0ÍflåÕ>¼u~fÁ…Ž™öçeøY ¾—†Ôl5Þn`ò¢سfÜ5om>l(‚›¼¹.ÌÎÎ?Û6ÿ™Q0Â[¥b¼ëœv‘5¦§[ÏË,°^nÅTÊxæÉy$yùËgÙ·ïH[ÛQ!NQ”¶0.¾‚i°}LÌ|©6ÝÃ4Ì“‹lK7yI’Â<( RÛ†”•®‹lÛ™¨}ö1—2æíMP›&ÑÚ1…Ä‚¡¢(Š¢(Š¢(Ê ãç(k6Ú|#º›Õj0wå1c çÖ<Â6Yð›3þfCØ“‚§[0Y2ÓÆˆE-;ñXû³1n*Ó¶…ëï»$–µ2¬]gib_3Ò-ÎóyÏ{˜jõÞ¶¶¡Bœ¢(uxµ¿ q+¢Všø!Ъý’d—¦áw1 t¹¶Žˆkví¯Òb;¥Ú>=b±¯‘"õowÆ1ofD䛯uÒÎçÇ!ǯžsŠ¢(Š¢(Š¢ "y̘Ç€ØÃM"<­Úg3†’ÏfœdyŠExSŸoÍ÷á̬ñJK:+jûj¥¯ù˜1Õkmxm¶ÉdLž¶5C±ØZkæˆS%bÓØCüDÊKKjRÄjEº¶¾U[WÒXOb µ×âq×ÌNºäÛ!S‰cL>H$ì5Ýd[É4ÙÚrkéÙ (Š¢(Š¢(ÊêB<Ú&1BÚxí/MùSÁŒ«,`Whf¸˜qRx^ãã±ð•ÏÏÏoæ8ð|†drIñ¯q¼°0ašöÂ~øÃoò™Ï|°ß—·ž0äÐ%—pèúëÍ÷rÙü¹h­e–ˆzÄ)ÊQ„GìBœ§ÞÃ- &”ÓÁ4ìYÌC ]?UÛ^ãY·Ò0Ð,±06SÄå©Ĩ֮Û"&EÁ2±çµÏÉVEQEQEQ”^“ÇŒ¥,ÌXeš8ÍNc ƒP,ÃXÖäuól³ÜÇjyפJh.×\4[jX¨Œ£–àQ(øô§?Í•W^‰ïûX–…½%¯ ‚ À²,¬r •âͯy Ÿûæ7£ùv±ÈCCC–Ŷ|€¯=ýé\øÖ·râW°ëúëùËC‡8î©OÏãÐé§sâWÄ;Èf™{Å+òýeźªG\Ÿ×”/¦2huÄ9¨}AmÓ˜ç1¹äuK¶¿âÞª-çÒ>Ò(¯´I•|¹Ú¶$UD¾f4{89µsO9¿až„ÀÊYƼqÊ6Ù~¶áÚ*Š¢(Š¢(Š¢´C;J@€o4:$£…jSäñÆÛ-›5c(#°%=àR©ÅöÜ>1ÕH±X$hð ÃlÖŒ¬*• çwA°çW•¿}Ó›ÃB&³øNG>þq¾þÒ—Ök*a›6ç‘ÏçÉçó”Ëef§§¡\fÓºuüË]wá×ÊÄŽmÝʾ}ûxâ3Ÿá¹ïx”Jü¯xóÐÙl–¯?ðwß}·Ùîä$ǽå-üùk^íî_/¾þõP.óûÿþï¼ímoãoÿöoùÞ÷¾×Ö9¨G\‚ß÷I5±ò Èçóø¾Ïäää2¶®(óï.ˆó£ejÓE¼*c¼Û’e€ñ. 1žpIÑ,i½+ÌgÙìÚqˆ·\äá%¹ïBb÷n71O<˵ÿ=Œh%æ+Š¢(Š¢(Šbð< "o$Jd\<11eYT*••ìfU#Z])FPËÙšdIÊ«yäóFXs]#¾IÞ·‰ Ø?/¢.àbÆIIaÐ÷}œšwX±X$“ÉDvðŒg<ƒO<ÏóxôÑGù³Îâ¦g>“?ܸ‘¾ñ *©oüêW§Ùm·ñèÞÀæO|‚°æ)g‹Ñ ?|à ì:r„ü sss¤‚€Íßùßä~áÆ±/¹„_ù•_á¹Ï}.Ï~ö³¹î‰'8â8:óL>xÕUüÑý¶ërÉí·S*•8Ýq"áð…»vñ×ý×|ä#áÍ—_ÎÖŸýŒXÜ{/Ç‹”Ëe£é|á ëÖñÀi§qçŒÿéOóÒ—¾”g=ëYm]?õˆ[AàyÍýa|ßgbb‚b±å×ó<²Ù,a266¹]ʲccclݺ5R‰ó‰ä0 #£(—Ë”Ëe&&&غuë¼}{ž‡çy‹ÅHéUÖ&~âÉå¶)ñ9‡i¥Ò©É-Ð(¨å0"Ôbá£ýã¤qwjǺTÏ<ÜdÉÜq2O$É÷6ví{2ùi¾vÍ¥xE–zÏ:EQEQEY«AY·nÝʦM›%›Í2::‰"¾HÈáÄÄããã …ºñíÑÈ—}”¯>Üt^ˆ»e0iŒ—¼\Ù¬™–Ìõ–N×_(à—ØbÑ‚ àCúP<¦ C^óš×066F†¤S)~ýYÏ¢\.síð0¿ñ‹¿ÈÕ^ÈtòÄ]wñ¶·½Ã‡ó øØÿ1OÿÝßåÕ7ßÌ‹6o&C>z×]üÏÇa|œ|ä#üËïüx¯}îsùï~—kŸõ,ÜL†Ûn»¯|å+üÕw¾ÃýGŽð¿ýÛ¬ÿÂp‡—¿üå„aÈ%—\Â=Ïxnrñu×áû>žç‘N§yÉK^Âøø8¾ïS.—I¥Rd2þñÿ‘‡zˆßþÓ?å‰³ÏÆ²¬èÜòù<ï}ï{Éår¼ó¸ãø—ŸýŒ\.Çìì,oxÃøÉO~ÒÖ5T¸%’Ífñ<Çqp]#ˆØ6>>ŽmÛ¤ÓiŠÅ"ù|>ÇlÛÆuÝèÇ ‚€ññqÒé4aâº.Ùl6RŽFGG£Lb§Ã0$]«ÈQ.—q]˲¢ýA€mÛ„aH*•Âqœè/ÉÄÄ™LÛ¶#åZ\He=AŒ5•JÕ-¯ô -µ09ŠÄyÜ‚ÚüIbñj¢öy¡\gíäAsXšøÕI’™Mœ®]D8³ˆs,$EG‹XhLæKþ‘˜ŸJl×#.ßÀÄÄ?úßÿ›“ÿè8箻⒭¶MÎÏÚ¶íº}¼ñoä#ù³³³¼óïäù—œyì±¼çv>L.—‹t€R)%ʸù—ÿþïyÕ·¾E¡P ›Í’J¥Ìuªi®ëR.—뜦§§#ç©$ò½X,FâœeY‘=‹³ŽºÐTß÷#OµfóÄ5Q.èØØcccLLLDBU©TŠr½ñL›œœ$NG?V&“arr2jXR©TÝ %oiÀ\×E˲p‡J¥R·m òZ“X¹‘äft]—ééiJ¥ÓÓÓѺãã〩T" j©TŠŒ4“ÉDo<ÆÇÇ#NS(H§ÓѵI†¾Ê5•ÐÙ Ø´iSÝ2£££.»D&&&æM“0Ê0ËQZàšA@†þy²u’N?šmŒ§[2ÁhRòt–“Ò܇³JN9ñWëVEQEQ0 £ôH2.Ãu]&''©T*  …™L&ǺngF’#nzzšB¡@±Xdtt´ß—¥g<ú裺ýöºþ!‰Èœ0ä—ùS„Á÷͸îÏþì26n¼–o|ã³K[7ŽÝå¯"ïû¼ð…/dll €ç|ï{|ýþ .ÂîÌï}7òú׿ß÷¹if†¯¿ýí¦\«m7-ÕÚø»Û¶Í)§œÂïþîïòÃþ<ðç{,?üÍßä¾ýíH‹É,àîwì¶mÜxÿý‘ÓS£xœN§yýë_?O@keß©TŠ|>ßT„NFI.•£Ê#. CFGGë¼á$¬Ôó¼HõÉïyÏýÌÍ}®æœcã80::EÙIÔÜBâèèèh]aÈl6‹mÛ‘ð)âjªE Õ‰‰ >ÿùÏsÝu×±aÃ^õªWaYûöíã¿ímüÉÜ—|ë[Üê8|êoþ†¿8ï<3›ÉàÖœŠ‚ XP0š¯L§Óq$à»Þãã¼ü]ïâE¿ökm]c'Qx¡YQÛ¶yÏ{ÞÓöo&TíDS.…5)ĉ$+Ù%Õ÷ý¨x‚ïûض£¸¦ÓéhZ†Ñ6 …BS¥…èFÈfÒs)ض]gøbð@$ž‰[rãzÒ;Ž5èét:Êw'žƒrå÷€Ø#Oâ­C/#ì\è@RíÆx·ÝÆÄÿq$ì&à±±±¨A•på…h±ñN7*«…4¦C²Ëa×þ’á¬YârIO¹ ¶<õ¢žzÆ)Š¢(Š¢($›ÍR.—£P¾…BAeüD¡¦`DdÊ%AÆu­h7v‹#GŽôêrö 8òµ¯qÆáÃÜ=7CC”f¼‘²nÀ¶ËÜtÓŸE….D—(‹xžG¡Pˆ~ãF’oò»Š˜*Ï8㌖!•ó7Ãç?ÿy†††¸æšk˜šš¢X,òª¯}ÿìgüå_ÿ5G~ó79÷ç?'¾óïð»wÝeVvR‰|‚Ë 9®Ó, jZN»Î%©Tª£Å@Äj’¬=qŠë®çÌïÅ9â¤àmÛ‘‡V:ŽTYÛ¶Ùºukäyæû~$$‰w›xx%EФ¢›¬|ºZh<Öä[F‘°ÕM#ËÉºÉ ¯`nœ|>y ÊÛQðEô”dÓ¦MQî:Qøåúwò­Ëjâ·öìá›Ï{›òyÞ3<̇>ô!¾ýÅ/òóW½Š»jo¬2™LäÉ ño+×L#” Á/Hi˜²Ù,™LfÑ7k…"÷Ûµ2#¼y‰õ’¦W[ÆO|VEQEQ”vñ}?ò^Êd2”Ëå(ϸx°‰˜Dó’(’G\¢Ÿ’ãÙA_ýøÇ?î÷!t8ôŸqßÿ#·ý9¸Ø8HœX¹LT ãæ›ç‘G¾Äcðd˜pr†a^ DÅežmÛär¹¨h$Ä2Âk^óî¾ûnÎ>ûlþ×ïýg>ç9ø¾Ï7ÞÈ{ŸóÞwòÉ÷ŸÿÉå—^Êæ?ÿsœr™‰‰ ‚ à‡†à¼ó¢muʃr||œR*eÊÆ&G|Ì…kâçÔ¬P%!ó}ë€YÚ ÎÛ¦Äm‰HŒûÙÉ?ãÞ_¸·­Õלçy^TdŒ—xx‰àÖ˜k- ÃyoÚõ^k|£Ð DTé´·X»‰Çi¹l³*¹¬Äüužpâ"*Éå7‘0Õt:å—+‹”J¥£2¤2¾µ};/X¿ž¿þõ<þýïó™'žàx×å܇ŽÞ„‘Ð,׺ѵ8é±èyù|Çq¢{Dl¿R裕Ëòj£yuXñt[‡XÈKa<àlL»\Ì3¡ÙM…PEQEQ”$ò‚=éí4::J:fvv¶n æº.uyÍeìêy^ä`²š°m›û߇ÑU&&&øÆ?È£ß:È©§Î²ÿ“Cd/† }Ï›*§ùŸÄ;Iµ•ì$òbïÇ0 9rä_üâyßûÞG.—ãsŸû¿üË¿Ì×>ùIÊ·ÞÊ1?øA4|Ëë_—\ŽÃæýûØó¬T*ql±h¼×šçJ(•Jf Õ(A„‰¿Å„´#˜5:µ5& _*±‚ZcÓo‚“Û[}Í q_žl˜ÄÅ#HèDþ¬nxĉxÕÉÁ¥(Ò…$;ÿÆk\WÄϤ*.á´ò6'Y…DŠO­Œ‡!v¹ÌÇßö6¾øÅ/òö·¿<ê† üÅä$oª]Oˆ«Ìˆ«z£Ë¬xÊuáβ,fgg£Š¸I|ßD;áZ q"œ®ºVml;Ýqû.íuòŠ8‰ïaÃÿåÚç2¦Rž…½óEQEQ”µä%/—ËQ¡mÛT*•H,it DdñMÆ¿Åb‘|>¿ìÔEýIJ¬5+Äù˜T7žïóì;ï„“žÃöí{øöÛÿ?¾î sÏüf FG¡P€|>¨ãnâÒKO©Ç'£ÆljòÅKè¦8]H¾{0¶–܆äos]—þçæðáÃŒóÍW¼‚3¿÷=^qÇ<ýéuçà4 …® ‘nvÔ¡ÆÃ À’~Rf€%ÕóBr%=#$|©q€(¢_;ÒÉf€‘ ø´zìlï´Ö”'‚)¢\c(æJ=}ò4–"ê-%,4™h³IoÃÆª"ÉØmù’e„6þü¡‡øéOðªûïÇžsæ™<ïÜs9výzþèÄq^úR¾Ws3J¥›6mŠ f4"~“ëˆÍZ–…Ë )fR(‚ .ôUÖr¹…u¯†Rãq8érhÖF'¯t³ŒÉä«ò*Â)Š¢(Š¢ÍÈKñdÞ¯Vã¨Vã7)pиÝb±H¥R9jó?*!f,bår|ÿcãN}>ÏxÆœ³{7–•Ã-ãÀM7½ ŽÆW×^{®û»-·+ã¿äxLlC" ÀŒãÎ?ÿ|²Ù,¹\Ž0 ¹àk_ã×妙#äQS)NÍçaŠ*º˜|lI$¬©|@YLhTãmÑ(ΑØORô 1¢[.±¯°a(ÀìËgi—5#Ä%s½Y–E>ŸÚKN¸¤‘vÊ‹g¡j#+!™ÛkYH°kL’ØLà“øu‡¦§§)‹u Çj}³³T®š›ãuþp]èiãy7³ÛJ¥²h‚Õ$Éß T*‘ÍfI¥RQˆ°çyuEK¤¤y*•ÂuÝH¸“$œ‚üŽ’ëo-„´F׬á{jåŶzO¹ÁÍÌ¡(Š¢(Š¢tÉá%är¹Žy ‰gT©TRnñ€ o¹…Û\—áÝ»9pàõœwÞ|÷»±#Y>Ÿç‚ ŽÔ9?,6–ZÈ~òù<‡޾Ÿzê©Q±†éÉIgdóÎf<—ÍB:m(•ªÏÇÖoŠÌÏça´ÆzeŒX–p‰ÿå´üÚ2˜J2½™èç5l[Ö—áwØ „ð£7üniï´Žé÷uí’ÈR‚ià$$Rb¥»A§¼FϱNÒÉ "‹Ñ¨Ð—J¥ºï"Öˆ÷•eYQ± 9ΤH711ÁþZLúZã›? AÀGÿüÏ—üÛ¯Ä+MÖoÒ\.W—1•JEv0 a(_…üNR”cëÖ­ø¾ x2?™·Q‚ Ú^·é¤ßÞ4­ó½É~äL“ÞpAíoõ”wQEQEQ:¤â™œœdrr²£c=Û¶›zÉ)ƒÃÐg>î™þì’K8ùä“9ãŒÃuŽ'Åb‘B¡@¥Ri;WýB î¸ãŽhû/}éKIYßxíkùþ;Þé4C¿ó;\þªW™r¹XìtÄS–ö@cM¦I!âXÐdÙFq.Ähyb‘Mr ¥¨÷¢Ê{ëgh ¢Dâf6´}YÖŒGœ ô…žèÚ,ëjÐÈår]«ÆÚO!®Õ|ñ®ASbÛÁqI±çœsÎéÙñ÷’¿ç.|â þ¤Oo²äÞi,ÎæR©cccóªAÀÄÄ`<öÄ«.›Íâû>¥R) e•s“ñ^Uîd·¤Õ¯#ûv:yfæ9¡qŠ¢(Š¢(GR(­d2™U“.éhÄîù÷çüýï$•KS.ƒëžN>ŸªÜ–J¥ŽU´ ‚€ ¢qø×¿þu³mÇaS6ˆÛo‡©©Þy½‰÷YR(bÄ´v‚Þ‚Ú²>±gœñDs¨÷N[(è1M¦ê&¦%)6Ù¦,#"_ºvÖ—ëÖMÊÀ9ßøo9ýô¾ì_B—c•Ä“$ùʶÒét]¹\Žªúˆ'$˜‡$¯#¤E¹ÅmjbbbžèÞH¯¤M‡úÐÔ€¸Ðƒƒi·mL»¿­GǤ(Š¢(Š¢ôÉ ç8Ϊq²P:Ï©³³lÝ:†ç!Nü.¤zébc™¥b[öμÿ©OeûÞÅ‹_÷ºx¦ãt&ô4‹Ø´#¹„ÌÏñõ¹Ù Îá6N}5<©bêga/E,ÄùÔ |2Mžn 3(+cò ‘X>][GŠ8RUª¯úÌ Jphï¡¶sÄ-*ÄÍÍÍ144Ôæ/ÒÄ“¦×o ½T¦‚Î(¡“kIˆ“j¦|è!¬¿ø œû·¾‡äPì4™LfÞ=™ôø![„ô\.]“d˜ìÄÄD]XkòúI>…AÃÆÛ6a©2„¡¶SHq½Û/» n¼‘§e2lïV>îVEæ l9L¡¨Çl⪥`ĹF/»€X˜Ka<ñ`~UT?±l£÷\"8úR Ó¤Zjªö]òÒ¥εƒÁ…ÇÌḬ̀sçN¶lÙÂðð°¹F™ ®ërÓM7±cÇŽ¶•ü¹¹9vïÞ=oÚõ×_Ïîݻٻwo玢$ÿɹ^Ñé›g5’Éd˜íøv“Z¹\ŽL&ÓSÁµWvü‡3<5Õ÷‡O2,´—ˆÈÚ¬}‘ßÛq …¶m׉oRi·X,R,£P±XŒBY{´íâA]w®‰éûÏ?¿'ÇÓhÇÝl‹¥ôºO¡(Ý@íXY+¬õ~…ïûd³ÙH€“œÇB†LLL°uëV<Ï£P(Ëå(•J‘çÛääd”.©YtˆÒœ~ŽóºiÇðÈ·¿ Ø„!¸®ñ†ƒ8·öÊvàñØÛÞåÒþÎM7qþ¡CP‹.êZ^Âbí/©óya­™¯D2œê½Ì’y×ì†ùõœx54 iYê«â%CVi&߸µc—c´1âa³Ô€X\Ç‘'Ü®]»bff†™™víÚÅðð0W\qSSS‹np÷îÝ\ßàþ˜Íf™™™add„l6ÛQO–r¹¬á¡k y³±²ßkºmÇòPÿìO~B¿­·¢ÝÜw»…Tlj ?€¹†’gN*´nݺ•b±…ˆ8Í+wºjkÓŽghí±ÝËŒ’vÜͶXQºA¯ûŠÒ ÔŽ•µÂZîWäóy&&&°,‹ÉÉÉ(¸ô‹Å"ããã‘·[¡P¨ƒº®ÛÕb Jçè¥ß~èv21Ì´sûvó¡\æ§ù÷¾å-LÏÎòÅßÿýî_ÄrÃÿ`D,›z!ÎÈVõ9zdè—IlCª ‚Á¤ðBrð”f~Ø‘UÛŽ$è†Ø#.O½Ð'óà©Úôä¾JÔWOMõ`¶Ë/¿<šX©T©Ë ·eËæææܘçy󖙚šbÿþý\wÝu€Qœ÷ìÙÓ‘†JªÏí^ií²Üª“½&ùPkˆŽ9Òõý÷ÂŽEºï±Çø¥ï¿ëç´«AÌÎåróŒ’ßÎuÝ(@ÂÕ%¤ÕqlÛ&›Í299‰o"ì5v¤:Áb²â‘Nèúõj´ãn¶ÅŠÒ zݧP”n v¬¬Ör¿B^ôV*•º—ÃÒWÌçó¸®EŒ)«—^Û±\14Ng •^uÏ=Üòš×°í¶ÛºóN^Z‹º´æ@ÕQ¤XA’Ø‹LHaD«‰6¶)á¨I8»a[EbÑKæ9ÄUñº|Ã>Rµií¦oe Ý,»ÒšÕ"Ä Í t[ˆë¥ûÙ®]£NÚñYgůüʯ°k×®®¯²úرcÃÃÃÜrË-]Ù~§Ûâg<ãlÛ¶­cÛSÖ×]w»wïæ¬.x)÷£o¬}Hÿ¸›ÂV¯ûǽ@^º6†¢*ýaÇŽ ñŽ.މzÝ?1ZÓñ_ù ß{Vš|V€4>¶[s4È‹½‰jšÅˆlÍqM$«aZ±öÝ!eñÎmXÇi²íÆTèmÂí%°k×.¶mÛÖvÿø˜äоïãû~QíÚµ‹O|â Do¹å®¿þzÇ©«t(®ža¯o—ŠïûQ¸Y¿½‰”µC/í8îüÅ_d׆ ý>íU…T¯j§ÓäºnXç8AN§£D¼òð*—Ë„aH:®ûˆlùß÷}ÆÇÇ—}Ý MmeÇëׯïx[¬(Ý ×} EéjÇÊZa-õ+$ªBª¢ªwôÐK;€QŒÎtßW¿J¹ +ö Cð}H§ëÆ6ËŽÌ2ßMr¯5"B™\/éý&ë&ÃACW%ä´Ñ{­Ä|/·¤·Ýdýb ´£7¾ÕDé333LMM177ÇÐÐPG¼K4$uy¬¦ëAÏEÖ^Ùq¹\&´m^~ꩼûþÏžžãZ@Ä´å0==]÷]òÉ5&ßÍårø¾yÉŽŽF6™ô. ‚˲–Ô‘³€-ûöÁùçwåú´²ã™™J¥RGÛbEé½îS(J7P;VÖ k©_!‘¾ï¯šüȃ„¤ìêÖ•ëfhj/í8ÀhH9à+g¼ˆÇ[A¡†rÙp–Åí/x)£Ô<+Š ˜_´\›žŒÆökÉÏd± #¢A½§š“X?9lá®ëa5Ë#â’e|GFF¸êª«ê.—ááa.¿ür.»ì2¶oߎçyÜxã+Þ®ïû¤Óé¾'º_M¬¦b ý⢓vìû>v*µheM¥9a’Éd:bÏ–eQ*•êÚ‘B¡P7?—Ë1::J.—#еà¦M›p'ñ²ÙlÛoWŸûÙÏ«_ÝÓëÖ­¶XQz‰Ú±²P;VÖ«ÉŽËå2¾ïGá¨ccc5ÎXˆf‘yíb4·ašG쬔t6r›¬ŸìÑ–1NTNíÿnè#Ý.Ê׌nÚ±çy<þø«—å åºö}óçy|íï —Ë166¶l§„ˆ4æGMÒlhÕÌK.YÅšhX¯±jžö {°µý±Lµ={öpÕUW1<<‰r’‹i©È?áŠ+®`llŒýû÷³k×®¹{ŠW—„©KñÒX-×+Ø/ºeǾï³5›íV–5ä|³,«c"|ãväÁ–ô^"¾ï“Ïç)•J‹Eòù|.+íå %õMÚq'ÛbEéÝìS(J¯P;VÖ «±_‘ÍfI¥R”J%lÛž!ÑOò=B´š­@…¸øä(ÆáH<Ñd$×èÌ”DÖs0)¹rÄú‡§”œúRø2K˜Â¤›®M+×Ö-%¶¹µ6¯q.èµ7e7íXô§0 9昧²œ!KäpË™°Ôb‘ñÚ¥R©¬ü¸ÔW)ò‘ND2'ñy˜$®Š*ÓŒQ4 kbTèŒÑ¬rÖƒ‰™v]—;v044Äe—]ÖÑ ·]Mg!$?T†ê · VËÛŸ\.×w!®°ã N=•÷÷ûdV!ŽãôEL–}J›“¬Öêû>ÅZ²Tù,¢Ü uô„NµÅŠÒOÔŽ•µ€Ú±²d;‚ áúý¢TŠEŠàVƈk!q>{qPÅèAmúXm½qâøâÙ&~Qvmš,Ÿ!vzòÛq­Ùñ‰È6†ÑiF‰…:Áè(~m{ãµe%ÿj¥“v,¿›ïû:ôöe…¥&í5¬Lƈ¢å2A¬<´z4¹ñÄç4õ9ݬÄ|ù?_û,‡`3_¶kó'šì[% Q¬!©üv"$µ›lyûÛ—µžçA@±h<<•ÁE’ì¯EN{Å+pÐËAr´õrT«Õºcñ}ŸT* sÅb‘ÉÉIÇ¡X,Ö½q“‚8Š¢(Š¢(JwÇqœ®ˆp^í¯Xû.½»râ³x¦mÅVybï2ñ„+`´ Œ€S©,UŒÒ$q.ûÉÄö}Œh6ZûóðÒ…8Ïã‹/xžçccqÉR×uW–Nå49,‘üpI¡Ì#vñ«4¬›d’ùñÊ)Tt[€E‹5 "¾úÕpÍ5-çû¾IˆØè8“¬XR.³,7QEY)³]ÄŽ~„²(É¢ -#aò¶mGnä"$Ë[+ñ¤Çu]Ž=öØ~Ÿž¢(Š¢(Êš%ŸÏG¹|;Mˆ½ÄqHÄ·F`“È=É5ï$¾oÅXÉÎ ó 8M>‹ö‘J¬7–8ž¤'XAm¡PÖF’Dz†$ç—Ãh5ãÄ)Å–#}>å)OYÆZƒÉ­·ÞŠïûLNž³äuoûÀ¸÷¸ã¸¤ 499Í[j‘¸–ZLoÜ´GÃ,4Ó[Ê`eì("!njjŠÝ»w×Ílü¾k×®~/år™¹¿þk.úÝßm¹ÌÄd2Ft ÿQ†·;FxsÝØ3LC1¼$„sŠðn- £t‘0 ùÙÉ'ëËU€TTmy &Ž–eáy^”kNÄ9Ïó8å”Sú}zŠ¢(Š¢(k’‰‰‰(B¡„±-…Ñ)ò‰ÏnbþF`+b±fýýi–W|!Ir¸Z©í?KëÐÐÆÜúÝ@ö!Q‹ë0ZÃÑçU™…}ŒÑezý\øÄ\xýõ󽊺ÉVŒ¡B}Å#œ”hߠ޶ ¬=55ÅÔÔT4cûöíußß÷9|üñüÒ/ýÒ¼yù¼Ø|þ܇†pšŸÊÂ4üBÆØNÁ†ÏûFµO/ñáccðX1"^7C#v›TYø¾Ïág>SÛ¤U@»oœlÛŽ<Þ’ËË4ÏóÈçód2Ç!•Jñ¥/}©ß§§(Š¢(Š²æ ©’Jd9Hþ3#,¹ç ùì낈r­ ;‚˜µÈ>{EŽz‡ª1̵¿£)-ÏØºu¤3ÆÆ íÚ å²ñ À+;#(7ºCJ\uó#Éñ‰‰‘»‰ïç‹“uÖf©ž±`ÇŽQ¡†AÆó<ŽÝ¶ €K.¹dÞüLƈq™ |Ô†ÿÈÃ+'áù.|7€ïû0☰ÕÛ}¸=Ç8p¿g¸3+pcÒéætšéÙ¬ù¿Q°›˜0B`.gþ·m³l&cî)ß7Ëø¾Y¦× bñƒ£‘#ÃÃGÕƒh­S*•ê¾' :Hx«ëºQA‡t:Ím·ÝÖïÃVEQEYS„aH6›]qUI£]HIãda<Ïš ãŽFϯFu™ñu„X¨ ¥C$«7'?Û¶M¥Raݺu‘ §•žEQEQ:O†Œ‘Éd–]ð-™ûMò¸5¢)¯ÚGÂUL´£xºÌÏe·–xà§?̘~I]ÿL‡“§¦°m;*·"‚Ä_³ÛBŒÜǨÌŒØ6Š1öF?â”UMššâÒK/eÏž=lÙ²…‘‘æææ¸úê«Ù¹s'sssý>VîoáM”ÍÂóòÆS2À(íibñ÷ß+ð93ÝŽ gÃsðºÚ ’þÌß÷à“²àýyð]xÜ1÷†eaM¼ï ~Z¥IïÑTÊÌó}³¼ëš2›µË2óFGa|Ü,æ»ì£XŒDresë¦MFˆSÖ,ᬺº”œsŠ¢(Š¢(Jû”Ëe\×]²xác†ÆjÆiC_v£éȹˆæ‚eoq°ùÉ)§,Ïù%Çaä²ËSpD Á­o‘ù!±Ñ[˜M¼›’X˜r¾ÊŠ9Œ'ÜÎ;¹üò˹馛صk»ví⪫®âŸøD$Èõ›0 9r 8Ô߸žŸ à i£°Kü>ÄÂï«]x®eì+K«>M½{l ønþЂm¼¼—fà©9°Sq¬;q šÇ} õá­"ÒÉ8<—ƒéšJèºf^¥b¦—ËfÛétþ*‚^¹lº…¹04¢]õÄ ¦gg9þðáe­+9•Á¥YIqÛ¶±m›R©¤Bœ¢(Š¢(J(—Ëóú`‹!pVâ{Ž£/ŸY/kZÂrŒ·#Ì­%6ÏÌf<îûí¯çû>aR¬‰ ®ë®¼BªÉý–‚xÔøF¸FY°gÏFFF¸âŠ+æ-044D>ŸçU¯z333 ÷í`}ßçá]»x)õBÜD˜„w{õ’êK;7‹-à›°-8wŒ‡fc‹!q¾Ã…ÆÔº—mÏ_>y_Y–ùK -Éå-Ës m­BƳY#N:N\¢ÕM<1{ó)ÝçÛÇÇs|Éë…¡gËåzKe°höök¥yJEQEQ”Öd³ÙºâYí0Ž×5ê á§½ q€¹ÖBUïºkßÇï-ñ<#¤ÓQ^iñ‚ëˆ#Mó‹›Ü´ÏüÐÕF!Õ¬V]ã0a©###-fûöíìß¿¿¯{`Ó&lÚTg7úÚF,®±bKq]ÀRÅ^öçaìX¼4¿Q¹½Ï– |ÏCÄ3ßo-ôe2æž-—Íÿ`„ñ’³,ãYW,Æó[áyf¹fvAP¿~ÄçêyKSøÃc7²u™ùáJ%cŵöÚHQEQEQ–ïûxžG®ÍJxYLô”/ªèÖ?¤çZJ;|éIìܹmÁñ|D¹–>Èd28ŽÓ¶-·}@âIÔ*²*…Q¦“c{ðÊ(]â˜v„qŸÙ±ƒ7ÿûuÓþ5€mv÷ wˆXœÆrRÍwSÔÁ÷á6êC¨GY¾‹­e±% ¾qK¥XDËfÍý»uk\‘5•2Åb,ܤR)Â0 hÙ¬ ‹M¥ê‹Où|½@ñ÷äv•ö9°iÏ8x°íåÅûÑ÷c‘VÃSEQEQ”£0 Éçó¤ÓiBË"ÛlŒð6Š ƒ="‡ “TúOŽZ÷Nè÷¡¬˜86˜˜8@©ó}¨TÈ—ËQhjØÉ¤ðEÌÅõˆEµõžnR¸q·+ó0Rd=ÀöíÛñpšššZÔk®>|˜_zàxö³£iwûðë=H–ôè»ÝìQä3ÄîÍâj±i›¥åÈf_Æq懦¦Rõá®étš P*•3Û—h9©ê*¡RÙ5 ÐW(ÄE"\7Þ§ïk8ër9©ÍÐÔ ˆÅNñL–‚ÌO§Íg©Ü†úû(Š¢(Š¢(k IM§Ód1šC“Ó;…ÑÆ0c±q(¤fì<\àŽ³Îê÷at„ãN?Äk_v*‹ÖkH¸ÌINiÏó°,kñbYÚsç!"Ek8‹æ‰ôfé"Ç€Ék$=ßöîÝËÕW_ÍW\ÁÐÐP_vöéOŸ+}w—öØ@rµ¿ã¦g0žrbçeLãßÌqLæM˜·4I‘Ú÷iOAo Y8É=76fÄñ¨Êfàæ8qÕפ¸æyqˆ¬xc‰$‚O'**mADa|FÌÇŒÉÊÄ9èKµi6ª+(ÝgýSY\„ƒ8©;&rͲ,R©T{W›y°yÌ÷`K j²ÙfN?9æ'Ù—ê©JWXqA†«¯¾š‹/¾˜íÛ·°ÿ~fff¸âŠ+صkW¿•wܵm‡:?Ì/ŒÐ+6¹`ù°94 ¼MlçeŒ—ÁÜâœbÄ9I*¡×â&"®šÄ«m/Çòîljò@’Ï7Ïï–ô°ó<#9Žñžó<# ¹.QÃ"|†Ø»N#-Ù@`¿èE-g{žÑÄ›1›oß–ããæÚJø±eÅ^Ž•J\]7•2¿«çÅ‚«ˆz"èÉïTKO€ç™Š¾ù¼9ßsÊï*xIÏé\Î,kYqÁÛŽßr9.H"q*eÎ%“1ËŽz|¼¹×g;$ìm»탢(Š¢(ŠÒÆÇÇ)5x.H.ï4f¼4É÷­ >°ÿüóû}+ÆÝ{aØÜI0ƒ¡‰ 3pšœÄó¼È#nI̧>ÄT[}’ù┾³^> sÝu×155…©º®ËöíÛûZ)µpK7ŒÁëúXÉc“ ÷yp’‹cÉœr"ÄùñMª¯¦jçPÆxr?XÔÞâÔ<ͼÚ÷dˆ«„ySÛ^»HÎGñ€](ä¦MõW!ßäÿtÚˆ&™Œ™&Y¥’ù\*™öEæ+æwÛ¸qcËùëÖ™ë&a¥år,l%i¬šêºõ‚§ãÄ‹æ÷˜˜ˆ+ïŠ÷£e±-ùûˆW¤ˆo²Œˆp¾{U–˱ðjYæÿ‰ c[¥Rì•76f޹\Ž—µ¬x¿²?Ç1Ç7:jþ—ó8—«wËåzO×…f99 =Ï,#‚`±h毴2¸¢(Š¢(ŠÒŠÅ"®ëF‘Reê~¨}žî÷*mãë÷At€x¸þ…‹Œƒd¿\^šÆèêÞg~¸ªˆ2¿Ù2J_Xß8add¤i.8ÇqðûX&óÆ©)Ö¯7‡ë·ùð|®ï£!=­!<­ÉM–ÌCPÂÜR¡Gœ|$²3éôSÄÜWæ-N#ÞIµ×­Äo{FkÓ$ V IÄ¡à"àI¡‰ÿ‘…ƒ,ì%4Ýâ©Õ˜sLrÒ‰À!¹ål;öäÊç2=‹€G«èñß·ÞÊY?þñ¼é¾oD¥ééØË XøMJ‚f¿¥ˆ]•Š›Ä[N<ãÒi³^.g„¯É&¯ åá!/ÅkN„¾ÆuD@”õDìñ¬ÒB9vœø丒b±xø%E½±±Ø;SD= •–ã/>ñ ‚Ø °\6¶™J™"'®Ÿ§„ckèµ¢(Š¢(Ê`ù|žJ­c)Ž šyõ³Š5l^l¼kÛu^™å$övhžóm¡H íç–SºNÛUSûÍ®¯6é§úlDOŒ¨õÜ6Bßr…|±C–[²L,æI‚Q ˜%á–(⅚ň~ÓÄá±.û›y#ÂÌÔ¶ñWµÄu!æÎ‡™‹ˆ3ï^ÂÅIÂ)ÅûÈuÍ>E€i{´ðÈ#püñÇÏ›îyqg’ÆbË%ŽE+©¨›ï+Ì“|V4 ›$Ô´Ùúccí¯ä-LnW<ù\7ár¹ØV›‰fŠ+Ï6 ÓoÁéi³ÙY3tÔü/¡¾,t:YÜHQEQEiN6›%—ËEDqºeõâ}´ß‡±bBà´_ÜÛvjœ –W%µÂ|!.ÄxõXÏÁx)}gÕq·Þz+6lj!,­i78Ïã-ó·ó+7Ã&ν(ˈ½ë„4æÍi9ê‹Èvrµu‡B¸«6«l¼ë¤˜D™8Ü»ë0^y †FÌhÌå%UW“ÞIG{¾¸JµÊÓBSE„k*lY ‡w ×5"i·Hæ\)©T,Âu’R)Œ …8äUªÒJ ˆÃe×­3ß·n5"ÞÖ­Æt|ÜÌ;šEç£-R¢(Š¢(ÝAD‹TíÍoPû“B Êêæùÿx¿aÅÀ¦[om{yÏó–uhÑ\ˆsˆ½t`~>8·a¾Ò7Ö¯|½áþáa6oÞ Ôr¹ô_‰^ÚaAÀnsšE\’Ì=—Ô:œ†Ë³ø¢g<øþ&ÿc Þ‰¹7¥ÔwÒ».¹OE ›G6;ϳ6BrŒårñr–8ZÙ¾~þ-—ôìRÈ£›Ûï”—,Òi½à$?^;žœŒmݲŒ§xÏIN?0Þv¹\¢íû±P}´‹Ñ«™|>¶c ñ–Pg #—BíûP¾ÜQp^i–Ÿg¿>æMHÍ“Xr2Šgqº–pÔwâ܇Z€DQE9Éçó¤Óé(-O–8¢ 3>Ê·.ìá†!”J%ÒËÉ…#ƒöRÃô fÀžœÞ8öHÑ<¤Ué9ëfffØ¿¿eAŽœpÍÎÂ9ç´;kµ"÷F;·e3 ÌC)9V;Û…ûŠp2zp:ñC+ļIÚ„¹o“!\âÜvá«Ä¡§ÍpÝ8$Ðuã¼\G³÷ØcÁÉ'Ï»Nýá@s¡µBD!)~Há)$‘J™¿±±8ŒÌçl6ä‹ (ƒ‡þ±MW©,m˜üžîXCøL³~2¿¡ë‚÷7ÿß`©¶|¢í Cðï׎+ g2FŒa×û'ß ÅöcYæ˜äjê6ð.Žó.Ÿ…sŠŒaGa»­(Š¢ žçQ(Ç蓘qоŸR…ëÖñ¤'Þ|f­Ci9Î¼Š¿Ë¢Ñð[‰ß5GÜ@°`Ïž=\ýõý>–i&b  c7ü¿­Jr»MÖÿqCŽ쉨"iMæ–ó0÷¶”þ†8—\@œëñÒ žgQ¯7VZ•£ p¶„ø÷mÜÈ3žñŒºiÙloÂO»FÀšŽÂͰmn‡ŠO\&ùñÚò5aÃÁ¾Ü 8Oƒâð¿ 'YÉhWâ½&¿iÄ•tE,µ,^,ù.a~;UwÏæ1o(ÞOJŠ­¹W‚'Œ–ÍzårürB*O“†tço”—® þ!}²ñˆƒÚ´š'ÅX²_ƒÉÄ1I;+žu‹ m’¸w¡et”£(Š¢ô‘r¹Œëºd1µZŠ_: g(JÇ8ãðaî8°©ùÌ|Þ„ÉÔâa2>>i›0ñ3˜÷‰éòV@ûoÌ1»víÂ÷ýEÿúÉ=Oy Ï;÷\`p¼á,ß,—NxŒ&õƒgU 66r ËÛÄ÷·L“0U9ß,p pgmÁ<óó>&½å‚À ep{4æQzü¤“xõI'Eß³ÙUžä¿ˆ)Ý+ :ÏÅÇ M¦7"ÛÏ %M’gᛥÙþ‹Äêr±a[ Fœ¶[xo–Íß䄌Aöcà}‘8Ù¢lÿoë»Î‚ÿYxñ/^ÙµW–„T&Eªl6®¤ ¦mÊdŒWÙìl"ïe‘æv$„4·i! ÎÓþÙ¶Ù±h>OO£ ¶¿ IVv΃܄9žL&®¬œ g¿dÖ¯ sY3]Ú—ñ‚Sü}^NH‡ù÷o#òÜUEQŽNþfß>^üÆ7J6E8¤;¸yóÌü™AUž+–ËŒŽŽR,É-Ç#CvdaÆ$PßMæ‰Ó›e`9`ïÞ½ìÝ»·ßDz( E ƒ@·fñDë$'Ùð’t{a¡RqUu1ãQ©äš.±`ƒäL¢^‘ð*0ƒ¿‰šð!m$½?š8|øpôYBÛ–S­ºëx,<(ßJ}ìr€¶dñÚÿ>q\³,F¼“2½’à#_Û®¸V6 &vZ6Eâ·@òj4)ª×Ö‘‡—X¦œ˜—Ol¿&®EçV†T¶¸&qÍ~‹E( à^î9Äå»j×Àyäµ*¯Á ìÛ²¯§?éZF„5ùìû±¸Fœ7mTöM†‹·[cØhDÀÂB4ÙLŠw AWªS«Ò‹‡±;•ÅcoœøÞ!®<è>“i0ç(bbê§ýSSTdl¬Öö6VÕZè“ DEQ¥Ç8³³Üöº×ñ×/zQ4Q”A#žrÏ=Íg&ª×¥Ói*• ©T g¹‰“U¥HC2‚Ãañþ«ÒWޏå–[¸å–[¢‰»wïfçÎý>¶:ŸqptTßÌaò±u’‹\xrÎh-t5ê#3‰i`îûp¥§åâÕÄHRÄåxCŒ!I‘Tmšô¨RÄ›8 ¡xí‰õÅ@Eð1MÖÉ÷ÒìÚv«µÿKµ?»ö'¢Y¡ö9CœlDJ§kû*`Ä„JžN׋ó^jÕ*ÆN¶Jæ¨Ô‘ •â`«d8åøx¼¼„“JÓT*.Zà8Æ«‘šðÝxÏy^!òb§!F»“û(G,@{µeìÚtñÍaì²v/ø>„×BðÔ»‹ø&o6sµébW2_ö/ ¬Ž<“¢x­Š–ûS(½‚ßÿ.7BðõÚ2÷škœÍÖ~ƒ|âåž}»ª(Š¢ô”ŸùüÒ(EQEi“Ûâþ¯}´T}S”'ôÀ»Ðž×¿Mz™xž·2q9K}ÿ±Õ¦Ôn Y0<<ÌÔÔSSSÑŒ-[¶Ô}ï'_9|˜_üÙÏxgnu‹1«R“i¥å&>§G-x•'?`›1'Íh(¦kŽ$^œÎ(ŽCÕÏ[[ž¹NÙ±\§dþ(ФbiÒ“GÄë$·›lÇÆ¨§ÓÄ¡n)ÛT”ÈfxS*Å…:RtGªžø¤kžB¾hQÚÊÀ>¼Ð2ÓòyØ’†/ÙTò·Bå"ØÀÀ³ko×4N"ÎI"Ky›ù÷>g³µª)s®å2œþ.‚'6ÖŸ_±hÜÛ!yýšE7$s-6ôÚj‡jó“•f%=D*®{—^ªJœ¢(ŠÒyŠ˜÷È?ÿØÇxÏ;ÞÑïÃQ”¶Øð補±õ¿I¥._p¹B¡Ðæ©Ïá-¬±±²@ÕÔ½{÷ÎûëßÿùÏyÍ)§ÌË'¦ I'GsÁÎqàYµc²-ÙüØë¼Z~ôZöä Ñ÷Y¿ýæ™&&ڨҸɢ µJˆØÄù§âJ‰iê«läˆc„K-¶ßXµ´ºA—Ò…8¡˜²êSŒ! ëÓ¢$‚çÁ訿&&š—P.—Íô±1ó'¤Ó¦ÒÂq.¼¨Ö¢È?éÅ÷ÉÃã°9Û2à9ðÊZç'üC Ϊ•µÝdÜN ž“‚æÌ[†ß âê*’hWÔÄt¦§cw?™V(˜iɤwr.33¤Ó®Ì…ÊçãR¯É’±ÉÒ±$™g¯ÝßÓóâ;ssÇò“ŸœÒñcSEQ¸lf†‹*Mù«¬ B`ãcñâ?­þ…óJûq>&%Ir3Ꜵ&ˆrÄíÝ»—«¯¾š«®ºŠíÛ·³sçN¶oßÎÞ½{Âu]¶o_ÜÕ²[X–E±8?ŸÒ_r´'Êçó0kƒŸŽµ!Ÿšf”ƒû1~6þkE);O­ÏÜÿýˆcýŠCò’îÈ’¯J’tŠ÷Û¾™ö,<+þ‘ʘÏr Ùl,,v‘}?öHLÄ ñJ*¨ªcò³m±"y1ÆÆâï’ D<„²ÙØcH<:ߨ¬ÑÕEqJ‹LJ<ÁòµÄ†r“‡aüçóæûøx,J‰ˆ •tã—mðÀªV››c: )Zédyd×5¿KãïwKÇG6£XÜíÀM>”Rà§â|‰mÓ‘ Á¦x‰„rËñŸŸØ¦˜KèÃ'³óEã•`Y¤‡?‰wë/a¯ÛŠÝ¬DtòÞAÿÒéXÔ”¶!—«ëä~jeÿK¤X4·fòP‡‡03s¤3פ ÝìS(J¯P;VÖ ½ìW”±˜¶,RÍž‘вLºiljþ¢Œ7¸®‹³Ô~YŠú"y*Ä­Žسg®ë²k×.†††ê"—˱{÷næææšndnnŽË.» €‘‘<Ï«{ƒ‘Íf™™™add„l6‹· Í3Î8c-Ž—R)SœÁ£>½‘CÜŽ¸À¨îá¦"üå&“Cîñc·íxˉ'¶7–ìöID»Ø‚.™õɼ5 ÷:Æ+. ü,„ÍØÓð–ióæ§Ù¸D¢EìÁæáɘ¨\†ÙY³N3ñLBî*3ðÏåÌg Ák|k)¡zÍh¼R˜ÜWr{““±!Çì¬ÙWcÒÓbÑ"dlÝjþ²Yãiµu«ù_®…xm5–“±c•°w¢-NbÛó½”/ËæÍR&Ž;ÿ…/dëç>Çèç>G‡†ÈZí8Ÿ\Yª9¹¬[$ošäGts0•è°‡ÄeœÁ„c;‰y`îéØ4«ã8ƾ:9pü¹óðnÝDp`S{ë4ÞGއ³Š—h’r9¶ýññ®ÜV½|Ö‹>…¢tµce­ÐË~˜wdg}úÓK+eºmÇ>ð£ý„¾pCbâü<Ⱦïc©w‘BÍ#njjjžÇ[R bdd„©©©¦ ñÔÔCCCìÚµ €mÛ¶qñÅGóöïßÏu×]ÄŠóR*…l¼ï>àŒ~_+e…¸˜q±øzH>q 3VÎaÆÄ.XÎΈ<ÞþÏøÿáµÁk±ëì¥ÉɈ‡d\p³ß3™×Ìb³‹/\©ÌWÁR©†Øß’¿!“‰;’gN~çbÑœc›!Žcn—^šy·ûŠÒ ÔŽ•µB/û!f\p÷ç?¯ÞpJGé…‡{ÆÈxMˆ“¼×‰1U±X$¨õ3ÛšÓÄ^,c@›u½”Ág=ÀÉ'Ÿ<ÂGØñŠîW7íøøÃ‡¹ï¾Í³Å¢,Kõ…þ±Ú…ó²Y…äÅmôò±,ó`XœT*ÂòŒ']ò¢‹0‘N×_—dÕÊäò"PJ¼ôøxüûH>³B¡=!±G´²ãN´ÅI<¯>\.üð±üÂÛ¦¸ù‚Q>ò¯ÿ‡Üùç×­c‡!Ùl–J¡`J·û>ù|ž ZeÛÜ£CCœ05ÅK†‡¹ù¿þ‹}¿ñ|böIüù[?Æ_Œ_‰eYAÀ—þìKÜwß}<ùÜ'ãZ.‡ßx˜#áI×<‰¿ûÅüËCÿÂoù¿ÅëŽ}ÿõÑÿbº:eYضmþOÙ”³åÚÏbÛ6aâû>aâºî¼ÜA¶mG(™n%ÂgÂ0Œ¶™ünYV4ݺ ÷¿ \´ø…nöàsœæÊ˜ÂH§c›L¥âÐV×5ó¶n]Rµ†^çfívŸBQzÚ±²VèU¿âîmÙó–VYRQ¡Ûvì~rîÌ 0ñ5óB4$}Ƕsà Éaœ$PTÖëÁ¸gîÙ³'Rˆ™ššjê5' 3<< ˜|s™L†+®¸0F¾eË–ºeãÁd÷îÝìØ±ƒjËÁêÔ ”#è/¦§:À»‹`•àYe˜ `ýïV9õû¹÷Ã÷víø:mÇ÷Þ{/»wïf×®];7Ç#7?¼ZÔŒR©u>5ˆ+@:˜F8á!TG;}£l»÷žo+%Y(¢š‰ÍŠ­Vo§Ä+1Y0@„SIªŸÜV>oD‘†0ß™™öìÙÃ>صKÔÊŽ—cÃ=ô¾ïGvœD´ÉJnôàé>Î#;…Ôþ_ª¾ ëå/' CFGG™œœdbbÛ¶)‹¤Ói²Ù,–eá8n6K±X䙿ök|÷+_á{³§ò’ .à®Í›yÅ7Oá¯Þy —¿÷>NþÂ<ð©¿ášk®áu¯ø &Нàñ3Ïä½Óÿƒ‘C#üà‰ð’·½“ó'ãº.W®æw‚ßabb‚‹^ûZÞÿwÇ#ÿßÿÇ/üÇpîóžÇðð0Ö-·àû>OùËùÓßÿ}n¿ýv®úÀ(oÝJ†¤Ói<ÏãÈð0g_|1Ÿÿä'yáÈN̓ À¯ Ša’J¥(‹‘ðçû>®ëF¡arÝ©CÁorÓM7pÿýS‘ÀgYétš pÏó¢uÛ~sZ.×+g"¾%ÅbÉÇ—œ&v,÷¹ öîÝËÇ?þñžÛ0,¯-þÑ~ÄÇ?þqfffêrt)½Á÷ýHì¶—‘c$¬¥h åñ< H¥Rø¾}w]7º‡‚ ˆîcÙw6›% CÇá†nà¸ãŽ[Öq-F·úÆšONI"ýŠ{ïí}ÿx9v õýãF$á½z‡6G¢ƺiÉïR0^Æî’ù%*ÈW+"/©ª÷íÛÇCݲêúÇ{öì©ëWzðLœ©«Ìø*®WضMz9o6‹ÄN“ÌÏó¤ {öìáßøFÛ˯رcG'ýÖ·¾µÎ=ÏãꫯæŠ+®˜—?.‰$?ܳgW]uÕŠÐO<‘mÛ¶qòÉ's 0ôÄ8Î@9(Ë¤ÔÆ2ðœ ÌxFËØäÂßóSn?t"þäÕ]=¾NÚñÐÐÛ¶m#Àxu¾ýíO¯xJ8d+ª baÍÈq9Œ×ê°–2–°Ê¶©¬Ð'»Y¾7×]¸ €P*Í_o|<1äØÌ·ís9ö‘Ï1{úÉ|àÄÙøþk9ñÄùæïü><N;/Ï: eÛ„• ÿ œüêWóøì,/Á¨u þøo <óL^ñÊWò¥C‡xìØcñý×yñW¾ÂWŽ?ž €¨‘I\‹3æ¢ÙY†o¼‘w<¯øÚ×À¶9ýIO¢¼ìe/ã…ÿëñço~3ßþâùÀûÞÇ“ÿä‡Üu׌oŠÚ™ غuk$8ˆ`†”J¥:A"²ãy–eqÁ°affþÙhÛŽc¼ä$Ä5—3â¼´ žgDäâÄ–-[8ÿüó™i¶¯.Úðr9ùä“^´è”RÀ"¤•ËeR©Ùl–t:‰Ãr_‹EÂ04Þ®®Ù¥Ð"TË|×u”xž…ñ„a‰l@$´Y–eYu"œï·ÇqÖ.~ë·~‹øÃœp ]¹†Ýè'ŒÊÑEò…ŽÜ‡âG?úÑ‚c´NЩ~Äýã¦çlý¯ÿ:*…¸dÆ“04]Kù.ÙU²Y“ºv11.YT]¶#ÅßGGÍÿr‰kMå¼.®Ì7Û CÓnm>o¶-4FGë» ÒMQ.¹?â$#ÌSž2Ì7¾q)[·ÞÜÕkÜéþq²_ñýŸÿœïk9N‘gé’óÕ©~‚¥÷”ž122ÂÁƒÛî¯Ó ^wÝud³Y^õªW122ÂÐÐSSS€êZyË ;wîddd„O|âuñ¶$>{!N<ñĺ·}ﻯç¡1Ê2ñ‰Ã#›ÐÎcÕ†8èÃe¸Í†‘ ¸hfïúÁhW¿“v|Ê)§°}ûv<`è®»°.jC³í…ÅŸ"q•GI¤Gí³ÃÑëš¼RÏ…R 9x9ŒÄdJ¢y¸µJŸÙlœ@ 8ùÓŸÆ N<ñÄ®^¢fv¼>á„xÚÓžÖÔû"•ªuÄ€»Ï\ù:>32Bþ›ß$&&&¢·mÛär9r¹år™b± IétšT*…Z|åÝ_áø_;«hÁÓ!(f÷Aæ›Æä?ðêmìù÷ñkßï+gãlÿ]:ÜËÿ»ÿ¬oÏ;•÷7IÀé§ž~:£˜Û©Dœ“ΕªÀ5;(Ÿx"E S©0A}ø<Ô7uù)sÁÛÞÆ±ÿýß¼àüóq0bÝðÔ¿ÿ{ž~:c/})÷_y.O?p"Ǿæ5X#Öû˜Û=—˯¸í6.¾ðBòù<ŽãD^v"hd³qÒLþäØcyÑñÇs{Í“®%©T\F˜ž6!«â)[.›ÿóùyž³ÃÃÜþù ¦¬è† /׎O>ùdÎ?ÿü£Bˆk nD<ÇlÛ&ŸÏãºn$–‰ %á32è—ðkÇq(—Ë‘°Õj•l6k<[kâZ¡zžÉdæ :òù<…B0 #‘­\ŽÃÄS©Täá–܆ˆ~IqMh¾–«]×ett˲潩V«]âºÙ7Vz‡¤hFÒcY>‹MË}(¡år¹î…Š$Š÷}Ÿ±±1ljÒˆ­W*•èÏç#êT*=g-ËâÀ]½êW@Ü?nF…ïïò Ë^!iY3#T I!L"GGÍrÉ¢çâÀ.Þf"ÂY¾Ù;3I9-Ý]ljÅ60:‘ç™å¤è–˜·D;H*i߃v²Y#Ê9ŽYGê¶™—J5Ïz!ïûÇiÖ7i²vîìîïÑéþq²_±éÀ.zÙ§i•v¤X,.=,êÓ é´ª.2ŒŒŒ077×vÿx½|1nff†½{÷233Ãå—_‰r ±gφ††ê⮓455ÅÜÜCCCQ¨ÍRøÙƒ'5+:rtÖþl K,Ì4.caž^û™tc]ŒqÚsU[Œ2Æ‹«R;Æríx—8µº~íôÿ3€ï:»3×µ ݶã±1¨8Mò“-vMå‚ä0‰é'k×u•„jçóy2™L]~­ä Qæ/)ün%tzÍÂ{ÅË1—«ß_*ezN¯è^²ÃVvÜ N’NaˆÿÓŸ²þë_>òSŠ_6bÑøø8©T*t'WhG¬kÚ‚ÎøÇ3à ÐJí)DdjnlÎÿ=¦Á-O3MÞÈ%—PžÆ²Á] .ƒ…¹…’…SÁœ ù|”-…¹í¤ýi$iE™Ú~Š6ðíW¾²nyðmRÀùCC<öØc<þŒ§ÔÖMßðB ™Œiâ]— G#2þãî»ùlÍËв,r¹ù|>DyµžJ™žsém‹·®¼~O§MÖ£Q·ÛâÕˆˆa2èn¦éû>¾ï×…gZ–…çy‘ÀæyŽãÍfÉår‘Ðæû>•J% ‚ ˆŠ¢4÷r¹ccc€Å* ­’¬'«†J!ûu‡|>U««T*óÎs¹Ï˲(•J=­Â¨v…B¡ÎNÊårÔ®ï좂ѫ~…l››ã¡åx u‰vër‰[Í\ê§™ŒyìNOÇËÊrI]FÖzñªÙ% ³¼m›Çt±h–ÁNæ5n â.@³fÚ¶ë‹È'ßQ'…·FZ¥®$ºiÇrÇŸw^kfÙ9¯«zíÖ7N^rï;8¾ï3<<Ìå—_Îe—]f¼ƒ<o¼±ímßwß}ºùLü ƒƒ/‹ ö×x¿ûĹÀ€hV¦þ†ô0BZ¦¶N†Ø[Êk±]‡¸Ò Ý°?ñ¶ jûÊ`\:JÄá‘ÍÒŠ¡0ƒãd”›ªMwkÛ¯K€ +µi.uÂa¦¶H‚_6¢Ü\xÓûºçIÔM;þÑ7~ί 1Oév߆È5KæèD¿¤Ö¹ùÐ-_(TÏIDAT‡>Ä÷å¹—ò–F:°ÒQJæ²*—Ë‘ç“x[H‡S¹t:ÍÄÄDäæM‘„*&;¹QŽ1 c’¤äñ˜S³êþO&ØT*Õt ÛQäÞø ¯yÇÖEO¢…ìx%6܈ïƒú–Å»ý®Ú×î%NGž,"þ,ˆ¼DH ‘åÚüÚet¤í’¶¢Ö†9À‰[¶À–-´ó˜h™«RÊÀÖ¨^mk›ÍS46ë?ýìg?ãg‡ñÒŸ~Ooº¬4“c˜f4À4­iâT‘Wv¢Ö³3D…'ÆÇÇëÄñPÚºn¿kYÜ䊞H‘©œ´'Ïã”}ûÚ¸*Ë£›mñj¡X,F”eY”ËeÒétÚ"m°ˆ@Ô¶IžBÃ*•J>š )m—…Úˤà°TD(N’ébÞÒ^Šp vÜk’/ù$_ L ˆòoЏ•Ìš´uù,‚5˜ßN³¤·§ˆyRô'¹ŽÌofIñ\ú7I N366F6›„¸$½ª*Ú«~…¬ÿÖ·ú"J'ó›y^\ûHš¸¤·—¤YN§cQ-â:hÉŸ©Y×Z‚)YJ•¼D¹\|,+mæ$œu©¬†<îÝ´ã8ã;òÞÛù_Šs²º¨öˆýû÷Wo¾ùæêÃ?¼è²W^yeôù7ðƒêÖ÷£:=Ý«#íÓÕjµ´ÀüÙjµZH,›J|&«Õª]›7™XÏi²­RmºØN¦Z­æ–ÍÕ–sªÕª›˜.맪Õj¥a_éÄqdZœ—œKrÝRmùÙÚ¾’Û˜­“,o×þä¸å˜¦«ÕÉÉj5óüÚ±ÌV«O>å‰.ýhí±;ÎT«ÕÍ×__­ Õj&³èzòej×Ê®Öÿnm0==]­V«Õj¡P¨NOOW+•Jµ Õ;þ韢å&''ëÖ›­V*•êììlÕqœêììlµT*Uggg«³³³ÕB¡PÍårÕL&S- ÕT*mG¶•Éd¢}7£ÕÅÎ%¬Þ®}N×>‹k´tOGÞeÓ丠º¢+5¿¹8éÑG™ýá“i'+•‹q ´1Í·A¯½%—ʲòJõNÙ±P.ãtð|¥P8ÛJ%ω‰8µo©T_\\Ì!iÉfβb¯7ËZ^ŠaeðX‰Àñgæ ë¿€H×Ó¶÷ªŒÍs‰ïúˆ]³ôLˆ[ L H'%YÅ29- ­*Ôgñ¶kÓS˜£ib¡ªH<¢¢¶q8–Œºˆ 倭µÏ"¦0£5_UÇ6šØWúPU E•n:±Ù~cî8 —Í'æ·òn1?Ùd‰·’ølmº¤0ª=@7þ&ú)Ä-—ö¾Œüí·“)´)Rˆ(Q&ΗeEê:žn$á ò]:²¹\n^)0¯ÜRQ2xÛ¶›ø–Ur»C4wš þFGG™œœŒx",Ê@W:Ûr.’,Ù÷}òù<¥R)J.•÷ N0.B¤Tù”A³\·6O†‡º\ݬۼ8ùÙYÎü—opüÏ~¶ôðyq;·"º%ÛŒEÚi:W뇰þñŸ²åá‡]VÞáñã%‡i$|Uúo>¦i¶XD$ ê¥KYvvv6š611Áx>ÏyÛ¶qe­â­çy¼öøã9ó{ßë÷¥8¤QaëÖ­Q^Äb±åR“‚"HQ‚±±1ŠÅâ‚¢Û  ùç]ÄPúC²Ú­„O‹à,ó=Ï‹žÑò‘â>^†!®ëÎ{© Û* QÛÕ¯¼bA‡+J’{ÏkžÛKiNœ67ÇÙhO²Y#¸U*æ7˜˜ˆÃ6eó’®YBOõ·RVBl9ön^zòÞ¦óÛn›¤?œ¡³i‰”dà…¸û6näö{üß>ˆŒpƒ!ñPr …ÁKò¡Á|Ï·4±@'ómâA¨,›ô„Ib/¼†\Ûuˆh—ªm¯q<ÖJ[HOHÕŽâF¢S¯fÏ^ñ¦[ÅÜÿýwèI8Yڵȋ¤˜ÏE»ˆ—õ¢·½"år9lj<¹’ÂHÓxƒp"¸Vû€ª1мe—œtB³à¥D 'ñ¨K^_¹Þ’ô[Ľä5¶,+ò¢›˜˜¨Ûf’Ï>÷¹¼ºßkÜäƒçlÂ=ç®Ü´‰Ìùç/mÉdgÒ6‰Wm2'⺕rïqÇqÒòD|•„øRyR‹’ãPÄãdµ=Œžqä~üË3øõZc»PE¾ÆfZrÉö]âw;^íO/Ò|´JíÙNB™t:yÒJå?Ïó¸¼\fÇw¬j;îA0>>0—"AÉd°,‹t:=ï7n,pP*•"ÏŸAG^Jh‘£iËòù<¹\.zéåû~䉟ÌÏ*ë$…¸T*õ]¤=MÚýBý‘yÞ½mwœ¬Sø¾Ù^*eÞq‹q¥L©Û¤,Žl¼ãŽ÷C=/á öxkDl`•w{•bèKû¹ÿ´óØÔ0]"ŠÚòl­`4…< ;°(k‚âBà”u€ÓúsYâB ’8¿ñ¡šôFË×Ö‘þµˆwcÔ{š%½ÜäD“¢ÛB†db¡›4éYÖ)$dµY¦ðnÐ2«úêá±ÇîVᢋ_X<à$ì€ìx6ê°J2ü0 çW l—&Þm«]„kv¶mGÞoK©V$¢ Ä… d@!IÔ“ÛôtÌ1¼Ùu£T*EÛ•ý6zkÊ1‡aýÛ‰YL³!õu2˜ÇEœM@;r¹-ˆGŽ Ð(R§ÓiR©ŸÝ똙™éܵ ‚€ÑÑѨ"©”ñyß³ø«_ý÷ÝwŸ*ëÄ?i×÷› =—°GÏóêD¼à—ËØ >©çÿüíoó§ÃÃQÊS©·“Aó/†äÒL{ï3²y3zÓ›8ûâ‹¡&2´…ç™ÑärE;q²m¼Í›±n¹ÿÐ!œ{îÁOᑚJþßýÖw¾Cøæ7æM8·ßNêŸþ ç;ß!üÎw§þ·q."¥!)˜‰v9ÆŸ{.A¥$Å)®kŽÁu î¾›üÙgGë )âÔ³Ö+^aêJ½ä%”ßñÂ'=‰ôºu˜®ZGç…b]xa]:U ãö\·î|`ìä“qžx硇¾û]8p€òæÍX>ŠS*ñ¦^Ѐ“ {Ïf³Q®T 1ÍåruíÒ¿þësàÀ&.¼ð‰¨}›—cFù¼iÃn¸aWäé8FM9•Š)ù¾Ón²‰t]˜œŒE)qÄL&ß—Û3ùø–R¡Q’Çv˜ãcçËÅnaÉ'ç*"ZÄ]¹l¦‹P(⟤‰´íX`.é*×òê«í¹Ít’«ïº‹ÏoØÐr~Æâìôt¤§“JÁl§£e‰øÀñ?Žõ¢ùþpò"~Q굉’teÙ ¼wâ~ÄñÇoììFÅ«M’çä‰Ã¡¤r¨C\ªN„: ãÙV¦ÞÛ­CÈ29È—ÏÅb1 ²,+úžìøg³Ù(jll,zC?666/ñ³„×9Ž lå-¿Ò¡’_Q] ‹”ãHC:ƶí¸BçQÎìì,sÕ§/fÈÁáwæñÿzœÏ<ç3<< q* K­^Öè·uëV^ñŠWôû4–Mð¥ý<òga¯³ÛÊÄU”ó‰é%Ìèz‰ÞµÒÎI§FŠŒ„aH.—‹rl¥R©¨ýI¶Q þ~]ªL×xï¾èÀÿá ›‹§^ô«ä^ýê¶·“Ì­†!étšl6å&+ d›tø$‡\6›å â¹Wû9’ŵ]âú@J ß7#¿tš²ç‘Ïç™áªg?›lÚÄÙwÝý¨%KR¨¤+‹e5÷^k¸wŠŸù ©¿û;¼ßÿ}ÜC‡(ßr áæÍøg…ý‹¿çœu]ì?ùü¡¡ÈqßœÈ ‘:æ˜:'S ÈlÚ„õK¿í+xéK¬‘‘ºeEX ÇL³í:'ÖÆbì<þ8é{ïÅÿà#n¼a9IY<ûì¦U}[‘kKW¬Us!Ù[¦­=öXÆ_÷º¨+goÞlÏO8¿=ýô®˜ÎjBDædè»x¶e2™ºv³\6b‘e³…ú7Å¢1÷'žØÁË^v2ÿò/õó“ÍUò¶XNÆ×5a‰IÑ̲VÞ´7v¿ÚÕÜm»Þ¯citzN§¯ÊiÛ°aÃêâBà'·Þº`Ÿb|Ü\wy÷´ÚÃq•µGøÐPÓ†Ë÷ýö„¸äÛ&0³"ú–t 3ðBÜã!lÞ¼¹³MV¢ÌÀ¡Oâ®ÞÅé¯?{o¹— ¾|Gì#|ý—¾ÎW¿úU.ö.ÆÇ'E +´ð-Ë·–呿Õ:ä®ë²uëV&''„2É5äû~$¼Á|oš¤g„ïû …()tr¾TÀ”„Ê"°%}||œB¡‰nçûÈd2QÅH™—|ã)Ó$„!é$9É%"b’¼]=š8真·½ì­¸ó˜;ùæ“¿I.­™:×ÓÓÓìܹ³ß‡±<‚€¯ïû)Oy윷Ùþe1#è2ft'¡mã-ßøøx]uÛt:Éãe!¡¥B8à=ômÛ¶qÞûß¿ìõ“m°´¥R@$éå'í}24×÷ý(]©¶*õ†Ò¨(GÀø8žë26:ŠeYQá–óÓi¢%’-é΄¯{ùÏ|†Ì_üù /$53C~x8êS‹ÅZ€õ¼ç‘ÙœdbtÔE~ß>rŽƒsî¹@ëâÆ+}’-ÖTØ{ݶØY)ÄD>4¤}î÷¿T‡¦zžÇÄÄDiaY®ë’ÉäàÎíX4žHé´1y隆¡Ñ›ÇšViÞGmãÊ+»>GY÷rM'~úÓüÞ[Þ2o^¹lD8×­1»ôÞLQ–Íië6âÚ~ߘªÅ"Î9¯¬MªÈ•W^Y­V«ÕÉjµzì~¯Z*u`£“ÕêOÏüiõáÏ=\Íd2ÕCתVíjµZ0³¿ðü/T«³Õêõ×\_ý“×ýIµR©T«Õjuzzº:;;[œœ¬¦Óéêääd5“ÉT§§§«ÓÓÓÕjµZ­–jY*•¢ud³³³UÛ¶ëæ'iüžJ¥¢u;M©TªNNNvt›™L¦Z©Tª©TjÞ¼éééj.—«f2™h¿©TjÞ9·‹ØÆj@Žõœ»îjû÷|xÛÃÕÙÙÙju¶ºìk¤ >«ÅŽo¾ùæêµ×^}Ÿ|øá*vµzöÕW·¿‘tµZ-U«U·Z­¦ªÕj¦ö¹Év$“ɘýÖÚŽJ¥bîeAä9•J¥ª…B¡jÛvÝ3m±¶e¶j~&«j~>yþ%i´AæÚk¯­Þ|óÍK[©Tª>ú¢Uÿé%/©:ŽÓòšMV«Õéª1íjíótíÚÙµëgÕæ»ÕjõÿoïÜããºêÿãGdÅ×ÎÊ”ÄÇvfi ø +“2J`Ó Ôì˜L“.騨u(]ڙⶰÙõvDÙ. n@³°kÓˆ§ˆ$û1%ÕÐá5ª¯ó"‘cÇ7&ÖØŽãhlÉñ#±£ýãιsçÎC3ÒhæŽôûê£4÷yæÜß9÷w~çw~¿h¦nÃãV=ç¼FFÆw=÷\½«¬ŽïÚeý_Š‘‘ñqŸo.êðì¡Q䨭ÆÇÇÛš“S»èŽñÒï@¡!¨D?žSoC`)ÒÀ•ï|bÒaTìë¤Óœzü/;ȹ çðù|¹ãç›Ï3øè ‘H„õ¿XÜõ'wñ™û?c/'SYÁT|]׉F£öÒKåá¦þvp[§™Šã ó¼ÁÜŸwìØ1-ÝUl¤jÇ–Sè W^*ÑhÔ®ëÌ0 "‘===¤Ói{V¶½½=ç¦#nS#røŠ+Ê>vÞÓó,oYç58[0 ƒçž{®ÞŘéæfn‚æèïJŸ€åª¢2¤ªutŽÉÁ®®.ëP‡W­Ê.ªÚ€ê³TVÒF'‹ÙYv§ŸÏg'y…B¶ÇµòFvz?»1 ÃŽµL$øùÙ³ø|>®¿ï>îß½ÒëLaYÎù‰ މeŽ)Fš¬a1º#™ÿ#d½qµÌ=Ô¾Üò[ªV…B$±cgZýDšæC׃tuùì¸fápþc.„¦Yq»‚AË3nåJëWÅV„R<{útѱÉÀ€x9 Þgðüy\ötÞv5¶-Hëó$µ§—¦ž»NU¬5@¯46—WhúÚ«<—º²´"k€©™h‡4=‰‘YS(Œ¦i\zé¥õ.ƤHï½kˆSéUå ÖÔ%°¬àˆðn¡úN5QFéêê" å%¿˜)L÷÷*”¡Ë0Œœ*£¯;c«Ê· ¤Óœz4`ÉÈ,ÇÝ_\ïjœL“kz{ù_o¼Á7GGÙ• ùÐÞŸÚ_öY†³™lœ™ÓÂ䉶MCŒ!Xƒ‚R*ÂDëW;3ç;f#V…Û¯ÖD#š)C„l\_už2¶©lÊ §‚Æu‘¿ãTÇ2ã£s‹ÏMw {Ó4yßûþ#çÏ7ó2ÙO-ƒ™Šõ¦2s*õÍç›8FYî=¬8g*$¢ LÄüT*o,‘ Í)2$4מ8Aç‡_ÈÛ éT(w(u1ÄÍ*äëË ¯?—òF3±Œ]Å uvŠVŠÚT¬H-s­r+MeŒó‘ P­T5uÜY£›J§ª2 `<ó¿²/·gÎés\Sw”1 Ïö?Ë"ÕµêkÅÍ7/âØ±ÿÇœ9ËÐõ@~r€¾¾œÒ£Ôá:«³ 8»¨1eØæ8àŠ]-ž”5E¥Ÿ.8TeóT}´šyrfûq&?Äq¬ê¯ÓX}º¯H|°èlcÊñùãpæLÉ% %}lì`Â$ð´!nì ~KÅçÅãqÞyí;ùâG¾ÈG–}„+›®„>8?’Ío_%ÉhROc\¥õ´qãFÂápŽÁ4ðmwº­ õöåÅS²cÂé–ÓÜ2x ëÒë²Að?ýÆ\¼vÁào•w‚ð*¥ Ëó­½½]»v …l×YµÛ4³™4k€;A˜Ýé­ìì¯UÒ KOŽÅbB!ºæÏç£Ö¯«I: ™läú§¿à=ïù[âñ \€3á}D4HmÔþ9†ÕK%2×È.ßÌ«#¬A‚Ê‚l’?hó9ŽI`¿"™{+cX‚¬gœ*—ê#Ôù]Xky;ÉøÔ>µ®×‰™¹¶ó»©ï ²]3Ýs¦AøÕ‘_±Ž™¬áî»{yæ™ñÚkóÜsø| Š[ ©oÍQÙmÕãÇñ·…l*î¹Yâx%¶8Ž‹fö)gîRA”íÙ9ž6û ›ÀÄí1›Àj*>×9n§M\çZ/‘Þ¼õÖ*Ô|íIÇwïFÿó?··õõ‰!®lÔä†2’)6bõ‰Êðµ’L̉Ì9J°!D|XëËÓ‰5a¢îÓE¾MÃê¯UØõŽìÊ”Çíé¬î#›¸+ˆí•}ëŠÆ”ã£ÿtÉä{ó¶G"‚Á`ñ1¸J"TÌ@)Ìh~èW=Z³ï0md2£²k†a0oÞÿàÑG—²> χào4ë{›zMm§•£ÊV,ªÛ{ÌM€¬ÑJÅÀ2¤éXƒ„>¬Á•2´íÂHidc@†ÈºP*·>¬AáŽÌöÇ}c™íê:N˯ó¼bƒr§Q®(;¿ï,Ë>§B{|éK³}»Ÿ¦¦Oð½ïýÑ´Mz¨HÎÿÎ2*ÄŸsE±3tŸ2¾zÔÊYÇyMe VF5góô9®]¨I ìóe®ÑNv…³N®aNý–ûÆ2È kè4 úÈKà;%ºzÐg×8dc Îz#\œŒ:Ù¥öÎ%÷*@OfÛ²ÂffþwÇK £S'¿_U Rïžô˜h¡‚{’£œãç„­ïüÐ7âVKŽM€×^#4ç_õ9û@aWyh°ž‰8bÌBmÍVoÜhá”F d4-/ˆÀMwÝd x„‰;¡¡PAÇXüµ7I¯-®X´ Æ¼µóÄ-Yð.†ÁÉ{/Ãÿ­SåŸ3­©WÓ4‹zg A4%‘HðG‹3XïÂL•Hzz ïÕŠKÚÇ;"pc¾Gî ßós—Q±¿d±×o¢x|ß_ŒÇ!}Љ0èŸâÔ!ჴ `fÜ€xøèÙXa!0z!ô×–º[¿ÑOYŸ™s´ˆkÞu9D3sÖ=U¨ÛHæØtÚúU8UÅövëŸÏ:Æ0²Uõwþü[X·îµz?½ª‰DìeçÁ`M»¬ìø³j•™Û¥P6V§×™²Ÿ*”1KýÙ0¸¶{·mÀI1#a)œ« …|ÒÀOOŸæ?œ?XC½; èËŦÓÖ>¯éª±¨ï¡‚ð+o4ƒ¬¥ØéŠ;#’jʪ ë5¥¾n˜ìdƒNé1¯s‚¥Ð»@£þªߨs& ´¿=É-Ï?ooS!Eû\§ÑÓùl„Y…§ qG=ÏÈ#~ØÞiÛœÁ§C!èêMãìÍ7C ÀÜ=¿æÅ¡Ý¬š{ŒìÈÎÊ‚¥l+mÍç³R@¥Ó–&¦ë”šÕ4³S6ªãO§-Pm÷ù¬_d£P¤[¥*íÑùÖQ纷Ïg[Ž2•Ngÿªë9¿³û†Qøefkéõ§Ñ qûž^S´úLÓdÎgç }G+¤Zêapù;ÞQžm%Öl+°sçN|·ûò’µK"‘àw~çwê]Œ©·¬:1ÓäO¯üStþ‚•€/ZÕÕ!‚5s¨ ¬ÿð$Ìì6È.ñŠÒwfÖÞ ‰˜o)ðÝìüþ¼¥6Åÿ ¾Bߣ Ì£À Т`¤!ÂàÓ,µÂ™]SÓÀ÷ 0~¤3!ˆüYUB×3ž/™ïãóYj˜ifçV#Ðþ{€20»ÌñB©)nu%ΪNJýQªO8lݳ³ól½ŸnÕI§ÓìÜy„ãÇï㡇ÞR´ÿU7gŽ UÅjµ˜:KýÕÉ®F›ˆ@™Û„ÙIXýÔSø|>b1×ÐD…pèéÉvαT:mYÚÕ±»vMAÎ4­2ƒùY¢{°Œ_ÊRsìS+²XÆ7Õ`”g²³A9-Ýî—Œ ‚…'N0¶zµýÙ4M Ã(C>]æ……§ q-¯œƒ÷²é›œdzGæÞ§0÷gXôOük&УóeŒÒ4«SU¹Ù5-«™9;pðŽQ8µ?e´SÇ+ã›jl‰„u¾:®ÐàSMã"·¦„œåQ^jæ§§'{uœ;#ªúž*…•®[/­`°ðËJ}_§ÑùÙY&um°¾w,–5 êtT9MÓú.“|YÞÚ€n÷ãÅv$`Ù²e¼’|¥ÞE„ IÌ™ƒù‡+¡g÷Äg¼jö\´‡ÝÚn>¶ùcb„s7l*¸X,6õ‹xX Œ@€žD‚×?ù Þþ¾qþ"Kã´â¯ìê”kÀ– ÃÈÎç¥Ó`||zÆ-aÍC†ÃÖë6óA0_‚àŸƒö3ÐÂÀhV剅Ð:À€ð<_LÖû¡lÆS¬r„È}•‡†ó+FÍKêzî+Ýù¹Ø§{¾±Ðüc¡Ð²Åšê^ÜÝŒóþ>_ákÞqÇÏaňSq8¯½öù£?º1Çv  oj™¨ -¥þ/µ XªÌ{úit]§«Ë²¥Ö8(±:¡Â _Ó@MÎ'ÖI9+D£…Ç“áãpø—X‡U}–±Ly¦Ebp5pŸ-&¼Õí\“™iHû`ÀQþBKä'ãn9ÊáC­õuŽc½r†ñ£/tò¿Éf¡WI±ŠbRD¡îxÚ÷êK—²xÿþ ;ÓÏ}éKÿ˜Ö;ZàŽ¹µ¯‰:h]/lØ‚Â/çõÊéÜJÝ?Ê×&5ÍzCóZ+õ #û‚Òõâ™ú* ¢ n†a­Qi¶âñ|­¼³Ó:.È–_2Âáì “òZTFL]ÏflÐÆ?<ñ¯½²¢ðÎN8tÓ!üsüõ.¦ Lȃû¯aÁ‚CåÔvÁßõcÖÿr=5òW2²s£B*4 jv7™LÖ»(S#Çü?ÿ‡ÿ°÷oþôÿø·sí<…Ðu{î¯4*;œÛí'³lT9´ÇãÖ_M³^}Fv„hÖÉÃ9¨§°ŒuA¨ñgƪâ Aô(ðwÀ%;@Œ‚/;^Ï”)M®g†2TMôú/P1Åœ÷…úaš&þð>‚Á Ñè¶cÎFr=Ý‚X¶w:¸´¹ƒ†Åãùç µ¦Ùí.ªÂ؃¹ºµiZû”f<ž=^­œÉñ€2²íj¿3p¤ûøtÚ^¹“w_µÍ­Õ€3“Ü#¬Òà’Ì9Ã4kì³úå®x8 G Ö›[Ï'9)L õøãhzÈòèÉ„ …¬·Ì¥ÔV§‰^Ûªf/lG®"Ô³Ð|Žì¡iKþ³.3à·Bpmú ¸-sMµœ?žqèø‡@¶sW³(‰DáQ‰D®û®sÖÅíyV¨ÕL åHùc;¾IÍ–¨:rÖ‹» õôä¶/çX®ØòßX,{ýP(?•+tB&–‹®ºªjòU+ÒÀ…o¾IÓ‡>TÙ‰¢Ïzp‚w·<[rY¤Š9¤™-Á–é-P­4ÎR÷™ŒAJ×­ÔTòÑB)N£¢z‘8QëI #¸Å¹LX Yt8l½0Õ6/Æ(“9¯¿Îây'Éw%€#waŃ+à÷ê]JA˜˜}/]ÊœyÑõ”,C›;}cˆÜlñÎöX%‡Á;|ìcY°`7þå_Ú k!~ rEÀþ_—”þæ\á\ÅàÎX¢„Xý‹Ç³¿ÊàN¾¥ŒSJ/T×1Í܉×tÚšÈu¯;Vºc4šo(S†gà@]/ìÎ꼿ŠCmš¹ ³Ð9ÎÕ4šÏøàn‡‡‘Êò iв–}·'1àv`-ðEÞcÂ7}Ù¬–×ûàÕ>«¼+Óp ødæyüµ©r8^ÒZ!óî—ÕDƪ‰ ,RÊÖš÷‘‘ɇÃ)æh  u‹VÂÎ.8ž€·éð6üáGyâ‰Vv콚O75•q´ étÖÆéI¦–ü©ð&Ê`›ŒÜUʰ00`]GürëpÊãG­|ˆF­ób±|5µ¤\£L(T™ÂíÁÙ5»*XžÕv"XË ûú¬ÿ{•j{°²we>«ª™€?Î\S­ÖQë}ä.IÀ ¸˜Ö »*>àœ¸vl[ ývwsùßÞœ:uŠÕm×cÊQ­ÊöT·V?» öÖ}TÆæ¹™Bpx+~­ÊCÕj 'åxþ¹ÇQÓ5>*Ô.ŠQ‰Ráv¬PÞ¥ ÎÕ‹÷o·?L´ÊLÆ~6Ä¥3Ï7ñ®/vJ§Ólܸ‘c ›yF˜9¸—·6(G/ÎøzzyAïÁ#¼¼¯™7 }VÆ´V3ÛÁ2Ìì'ëišÆjO ¬5¼*Õ©3s¥Ïq}µ®×^Æè¸‡ÛŽuœãÆWäótÄ ó &páØ×]w9õWUº 2`vb=SÈIrÃ5\“y¨*C‰¨âÂ9óÔKœ›3§²“ªlsÏâ à‚×_Ç·þÊ‚û5M£¯¯/ûâj¼dšÂ,àÐܹ¼mí/¬gg,Ãÿ¸Ÿ›Zoò`j>AÈ' Ì{óM6m*Ï0¶1F(¢ãÒN_wºÞE„¢ô‘ï¿Ì§ö\Äæÿ]ž3ã$”€ÀRï­øÙ÷εN6›Ë ¦–”¦±‡Nã\K1Wƒÿ–1a?Å—ÊRJÃïþÞi–~é4íù¾w¿;»SÓ²q˜Ô:gl*÷²ºz£–\j®mq¬v¤Kþ`I½‹,1€Ž#òñílØÐ\Þ05øèÀMy^ô1ÁÚ¿ ꕞ2)Ì.r—©©&eâ ‚üþgΘÛOêsïÆ÷å/ÃÒ¥Ù8»jyçÀ@6v›®{#ˆ3Ü\«uK±â¥µ“õúL“õhÀ2rû°Ú‰Zƒ«Ú²Znˆã˜0–áN­Õ-†èe5eÉ3Ã<ö?/¯üD÷¤XÏx«O•¥øB Ù³`‹/›—³­¤ÂÀÒ„Yg=â,XPp{0D÷éÖ‹Ûér,cÎþq®¸â=9Û|>,`V,=f—~æQtýÖ¢û›?Ñœô‚G‰ç}”¯}í‚ÁëË;I¥ž4ÈÇâXºG¥¡mBduu­bmƶÁÛ¤yæùæ}âÖÒR•° –…0±äÚ©—otlW†´ VÈØ s ÏÊ;´PÑÝmd"O'ÁóÀÂÇæÒü¶·U~r„¬—£s ?H¿)ÔœÅëß 3ù= ÃÞ*3OérFêƒg=â àtÿº‚ûtM·^ð±Š.)5çØÃ—³o_³ý9‘H0øŽÁ¬W„ 4püøq4w¼Fô¶i»d`¼OC6¶d½ÕTDgêÖ>òc$ù61‹H?ÎÛÏî$ü7KÊ;¡‡¬öaÉ“èÑ‚ØsÑE9Ÿ à íÎÔìôâÔ¯Mð°!îño=ÏòÁóyÛ#‘§~û”%Ä#ÈŒ˜ài¼û4«VÚŸýøyÛú·C"äEæÿßüÌ’»¸› Ž_ÀÇ¿üñzQ&$6w.J¥8uÊÏ5×,)ã„ÌßB½U>A¨›ãðìm'ø_×__¬ƒj ¨–b+O7§,Õ. ,ãôVCQy¼‘9'”Ù&ë½$ÀØ®]œ¹báÄIq ²ÙhÅP+x…{MËöÁ†a`šŽìLÙ¬è*±ŒÈ²€‡—¦.]t„ßú#à7r¶¿ëºw±(±H–? Á[[ž¥åÍ7íÏ­‹ZsS¥ BpèágXüÆyÛ_þþË\qÓÌÙæÙ9A°1æÏç’_ýŠÍ›Ÿ¢¨¡‚«¥#*¼ÑÓÛËÿöa:Z{ÑõÞ©],‚%ç*XzŠ’{gS "Ë©„ªð³C-4=õ»¥R¸âu/x’4ðæ.ä‚[—ÚÛò–¥ª,Îâ'¸ðìèéÔ[^ãÌê\;a:æƒ7PYh^Ù¿=³DÄ0 âz\ŒpBÃ1ú›§ø÷þ·¼í¸ñ,ß´¼ÞÅ„²0®¾š}ßûAw–H•q/e”€l†FgöSaJ8—똦™ë1€º!‘ÈFðïéé±ÿÅböñÎãLÓ$‰ØÇE"û¸xþ‡ìÜ9I#\»ºXæïŽÌo”Ü%¤‚0Mœ~zm×\Sx§‰µ$¬$b„<Šqò$k×ÿ’µkGë”·,5ô©BA' ÙF.]װωF£9ÇÛøÖÙÙi½#KzœÆ2·!N•¥”!ÎYFuBÿ7̹–þÏùó=; Ñ\€å]Ïl Qw9f7—œ>Í›ÿö&úPSîŽÖŸrº˜áÆ §gp:¶ 8yƒeðqz« B÷p^Ëi$ržã¼^"‘ÈÙçž„q¾‹Š‰Dì}îò9'hÜßcxx¸Þ¥"ŒyóØÿÝkíÏÑh4çD™äŠâYCÜÙùóy|Í“tÒIÇh͘?5g|§,Ìæ¿þ:_y”:F;hm£}[;ÆÔ/.5äÄÞŬ[—õˆóÅ}„´(Bðwî\|¿{ïuxE°&÷¢d=àª: ‘HØ ç`Ãí5ÖÕÕeŸÇíÿƒ/§L×uÛ`¥iû÷ï·s¢Ñ(ápØ>ßiäêëëË1¤2Äiš–ó)ƒ—ÓK­R¢ÑhŽÑM(Ž18È«_œË;Ú˜;ØsVbßF€]ˆñMð /øý¼1üÒ¤Id~Ì-™þ®Šã<§q rûW·AÉiDZ¹r¥ÝwoܸÑ>¯˜Ç0X†'u¯RžÁíííöqÅ qÎw‡óçuu]Ï韓 nCW1Cœó㼞®ë9×sö5Îã|>_Ñã¢ÑhÎ{Êyœs‚Æy/MÓhnn®žÔ€]?8ÇøÁyĈ±’•tf~–²”•¬¤ý–vÚ¿ÒNŒiYF"¸ðl²†±½òä¿Án~ÈÆo$8¤ó›ø\Z²ŽžcØÐÐðá#ù„zrjÉ1Ò¤Ù:¸ÕZšª§‰±eÖ‡-óãî ut|®A¨5ÚÁƒœ{z^N¬Ã÷åÇ™çÎ`` @‚DÉkSV@ µ‘©Wè< ä¹>|˜·lÕnÍ2Ü´Õyî¿åœã¦œú.t¾»^œûK}/Ý.s:ó£¡ñâe/²dþ’Šë¹^<}ø -býú­ i²Ù˦€ÊŽ I$D"víÚXƒ>50qzt9 bº®Û^g@ÎÿÎã܆®©¾¼H#{¨Õ’øéÓœ}b›ÿÇüüά’>¬ØZ8Û³ÒE&Ûo:©Æ5jÅÉ“'Ù»w/k×®ÍÛwøða/^LSSSÞqûöí£¥¥…¦¦¦‚çµd– ïÞ½›Õ«WÓÔÔÄîÝ»Y¼x1w®ºÒ–ñCÉûCÐteMMMìÛ·¦¦&ZZZ8yò$‡fÕªU|ýë_çž{î`çδ´´°víZ>ÌîÝ»¹ýöÛyåªWê]­eóóW®bnëqÚ¹™`&xV|kÜþßI!¹ºñä>|˜O®ú$`ÁÂá0š¦ÑÙÙI0$ aš&ñx•&ýFß<‰P¢âgæÔsË­£#Gް|ùrÒ¤yöô³\{âZ–gÂÊ8p€+Vðä“O¢ù4®n¾šãǰdÉŽ?ÎSO=ÅúõëøñÌš5kX¾|9O>ù$/¿ü2·ÝvGŽá©§žâ¶Ûn`ûöí¬~ÿjÞuù»xòÉ'¸á†8pà>ø Ý6Ý×SÇ?wœä+IFÉ?îå'9óë3Üxã?wœ§Æž²{òµ'YqnKX’s½#GŽðüÞçYßb}_¾þKÌs&KX‘+ŽpæÌúéçô‚Ó¤.Nq-ÖR¶ôü4íçÚYÂöœß³ XÁ ŽŸ;Î×p7p¼ý¸ÝFO¿û4&&#ŒpF;ÃqÝÚ×H†¸9ƒÇ˜¯?J”(A3DƒÑ¼I&ÕoðàƒÒÔÔ„ÿv?|ˆ§ÒOÑO?Ï~îY~yá/™Ç< Û.21?cÄ,ùŽjĈYmCÏêß…PºZvüèŽOé6"UƒöövvìØQt²E&-¼ÅСN¬}€Hÿu:_"à‹úˆ–á’¬Þ»!Beé•`éfÎvc`Øö¥ç©wAœxžQÛ©;”Ò•> p¾_JMO¤WÏv.¯ÅFGGéïïgllŒuëÖÑÑÑQôØîîn¾öÏÁ«/6A4C³ŒUÊšcº~Ü8P¼£61ÙÁ:éœPÈÜžn£žz٨ᴈMF)î£zÊòà( 0a"DŠîWJú)féWun`&l?u¾‰ÉÑoåGýQÕÊ_ •È0Àºßú,{W]Å?¯åÆ“7ÒôÏMe üÊÅ9H.D9^3•÷Jg²Ý²Û  nœmÁùl!ë㼦:§cŠ—q¶…3_9Ã#÷Ö'S_%rœL&ù»¯à{¯Ÿç·Ï³½4!™L288ÈæÍ›ërÿJ丷·—mÛ®ãÜÝ?à²ÍOç›Ô_õsúôiÛÈ<88È%ë¬`âóSóY¼x1ÍÍÍ%= ïqwÿ¦ŽWýØD†ieLuŸÕ{Ôý^UÕ±N¶*ÓÀçÔEÜFYçwS×u“8¯ã,ƒúîj»ó;8‰###\xá…477çÕçT¼¬Üõ[ìZdû¡í¾Ó§ƒJuãéO1ÿºÿÇùÏ}• Att‚IìNT.À4M‰„í…³qãFÂá0º®‹ÅìX|*û¬šïìì´ÿD"ƒAt]·ã*O¤X,f{܆aÇìSKû€½¯ä2[¡,º»»¹ÿþûërïJõã¡.~ú™g9´ú§V ­öÒiåµ–¬E£Qt]Ç4Ͳ<ݺq¹º°óüRc•éBÝwdd„Ó§OÓÚÚj—ß©K»uï™Æßz£!ÆyJúü3:«~ëûüòÎÿ–§³Ž™×ãÄm;B`Ž,¹í å®^)dq“áå ôe°l§}Â1ŸsÂÑy ¨žåìÔ6u¾sRz"|d«"9õqâÄ Þûƒ÷ò…|aÂëÔÌ#.‰ÐÚÚJGG‘H„-[¶”|ñÎi{Ò×ÓÞÞÎþàþªÆm©–×OŒí´³ƒÝ©'‹ò‚*Çz^IY7²Ñò0ÄR Ê™õt*ÝJw*{è!B„(ѯB§‡¢2**%ØméW÷pÎÈwÑ…†–W}/UÑ’U!•Êðßa–-¾˜D"Áõm×Ó¤5Up·‰Q^FÅ×ä©á6,º—ç•Ó‡8&ÕÁ›˜l;¶­nß«R9~bQ3‹æ¼ŒnÞh ¬‚QK¹¼#B©TŽ_h{õ8Æ—è#F íŸ5—8¹ú$»wï&xs¹OÎåï¿ÿ÷|÷³ßµNZç¸@k½¿ñ fi}oßûPoî³®!•ÊqêÉÓ|æïÎeÎöÀÚ@Þu•Ìi,‹D"yñʹK©ƒÁ`NbˆbËޜǸ—í[¾æŽ=(4&ëÇ¿q˜ß[ý냽|ZpÕrP§¬•»ß­7œ.\çþÐ=YSëÉ×îŸvÃGëóÝ+•c€/4³á3WÓÕÕE8ΕS•t¤Ž3?…pÛCêÙVÒ¤é¤Óvr弈s©q€€µú-CrO’Á£ƒe]§&†¸¡¡!†‡‡í™eq.%Ü¿»â‚lâ.ªjˆ«¡ÌOµÑ3?Õ.«Z{®(Ç»ÈÙ@ËyîFRèx÷9Åîáîߟ“©Y¨ÎqÜlZ19àÙfþ|õ`„Öæ‹5h4;p½ÐÐØÁ{y¸ZË §‰jÙ€j’55™Lâ÷ûíÏ$“É’çüçÿ´b@'ÖÀ¯‚ÜÝÝ]û›ºH&“ôööVåZ“^Ú×ÛÛ;á3›ÉLF†~²í'–[rœº“«)?SÁ+òã…6]O&#ÇÁÖÿåÜ;Ñz4ô¸nÉrûc‘o–£^LFŽ;žêàø‡,ûÛe´ü´|õš¼"ÇRo09þþÛ~ߚ؋Àßõc†n‚¬ZµÊ6ÂA~põjůïô?Ro09>{ö,ßùïß,d9÷ÿ öKjE?ÎEä¸29Þ³g†að©M?%N£kºe·ˆ`Ù.V2«&­«!?j¹§×p¡d£jâ766F[[›ýY­{/Æþýû™ûþ¹<Þþ8/v¾È‰e' }åÁƒëÞI:tˆÃ‡×½†apèÐ!Ësµ¬6cccìÝ»—_|±n÷¯D†ö&÷òÙo|ãfƒg?þ,'~~~^Ûr‹üäRï6­äøðáÃu»%rü /кó,£¯ýšÞ`rvÑYX‡ÂPíÊ-òã­r:tˆ}ûö±ti}ÖÐT*ÇÏ<ó ?ú—±æ²5¬[·.äG“Ýy´¦åöŠK9rËpäÈ–.]Zóq“Ñ7õoâÚk¯å†n°ƒÍ?Ïó5í“ëÝÿH9riDýø\ò+v¬à±ß~Œ}á}œÝy¶æåý8—z˱z¢§R)öïßÏM;Osþëç9Ør£Wåàšƒ µôåuù*u¡ÞòãöîÝ‹i–oõdÖÔ`0Èîîæª«®bÍ¢5u+Çòå˹ꪫêZ§NâÔ©S\rÉ%u-‡ÏçcÑ¢E,Z´¨nõÐÚÚÊ[ßúÖºÖC%Üvçmüð²rÝu×±†úȱÈO.õnÓJŽßxãºÖC¹´··ójøU_|˜ë®»ÎÞÞBKMË!òã­r;vŒk®¹ÆuQ6là¥ö—Ð4–«²²»‚5-‡WäXÊ‘¥¹¹™'NÐÞÞ^׺(‡`0ȼÎã’K.É{§×²O®wÿ#åÈ¥õã߸ë7xþ²ç¹îºëx;o¯KD?Î¥Þr|ìØ1Ž;Ö0úñûßÿ~šïmfÿUÙpJ-™ŸÙF½åÇ+,_¾œÕ«W³lÙ²²Ž¯‰!Îï÷Óߟ‰444DsssÑãï¹çžZkBê‘AKð&•Ê0ÀŸýٟջ؂‹ÙÞ¦+•ã5kÖ°fMý&C¼†WäÇ+娕ʱR˜ÁM=Û’èÆRŽ™€èÇ3‘ãÊäxíÚµ¬]»¶ÞÅö ³]~•ÖCMbÄùý~††† ‘HH–$¡¡f"ÇÂL@äX˜ ˆ 3‘ca& r,Ôƒ¹Ÿÿüç??Ý7Qå­[·bš&;wîä¾ûî›pÆD¼‚Ȱ09f"ÇÂL@äX˜ ˆ 3‘c¡\0>>>^«›¥R)†‡‡ñûý"ØBC"2,ÌDŽ…™€È±09f"ÇÂL@äX¨%55Ä ‚ ‚ ‚ ‚ Âl¥&1âAAAAa¶S“q^ctt”X,Æàà ©T ¿ßooÿö·¿Íã?@[[[Î9ÅöU£,Î,µ.G?‰D"§.JÝkºêB(Ÿb2¬öyYާ« "ÇÇdäx:Ÿ›Wå¸mZ(‘ã|¤?n/ËñtÈÈqc29žÎçæU9®G›ÊGä8é‘ã\¤?nLD?ÎE为̶ºñ’ÍÄ+TÛv3ë<âÚÚÚØ²e‹ýyll €d2™ã¾ÜÑÑA2™œpßdå+_ùJNYj]u½T*e_çî»ï.y¯é¨ ¡2ŠÉ0x_ާ£ "ÇÉdäxºž›—å¸ÖmZ¨ ‘ã\¤?nLDŽs‘þ¸1ý8‘ãê2ÛêÆ+6¯0¶›Yç×ÚÚJkk+`Í„Ãaî¾ûnÆÆÆrÖìªc€’û&K$aË–-y©‘kYŽT*E*•ⓟü$~¿ŸÁÁA6oÞ̆ ŠÞk:êB¨Œb2 µ•¨\ާ£ "ÇÉdäxºž›—å¸ÖmZ¨ ‘ã\¤?nLDŽs‘þ¸1ý8‘ãê2ÛêÆ+6¯0¶›YgˆË¢¹}ûvúûûÙ²e @ '†Ét³mÛ6ü~N¿zÖ…rY¢»»› 6Ô»XÂ’áZ#r,L‘ãüú9nDŽ…ÉRo›‰W˜®6=ë–¦tww3::ÊÃ?lwÐ~¿Ÿ¡¡!ûµ.z¢}“appmÛ¶¡ë:º® ëºí¾X«r´¶¶æXiý~¿½î¹Ø½ª]ar’að¾O‡üˆ7.•Êñt<7¯Ëq-Û´09D޳Hܸˆg‘þ¸qý8‹Èqu™uSo›‰W˜.Ûͬóˆëïï§¹¹9o}¯ª¨ÑÑQš››I$yWhßdPûº®c`¹~Öªl۶;^2™´]&‹Ý«Úe*§˜ ƒ÷åx:äGä¸1™ŒOÇsóºײM •#rœ‹ôljÈq.Ò7&¢ç"r\]f[ÝxÁfâ¦Ëv3ë q*`¥²f* Ãà®»îbÓ¦MtttH$xàkF¡Ø¾jSê^Õ.Gkk+lÚ´‰¶¶6†‡‡¹÷Þ{KÞ«–u!¦” ×R~JQKù9nL&#ǵ~n^c¯´i¡0"Çù÷’þ¸ñ9ο—ôLJèÇù÷9®³­n¼n3ñ SiOŒ×û x‰T*Åðð0~¿?Ï•²Ô¾F.ÇdîU˺*Çër<e9žyxå¹yAŽ½Ò¦…Ê9–þx&à•çæ9–þ¸qñʳ9n\¤n&®‡ÙXG“©1Ä ‚ ‚ ‚ ‚ B ˜•ÉAAAA¡Öˆ!NAAAAj€âAAAA¡ˆ!NAAAAj€âAAAA¡ˆ!®†¤R)¶mÛF?£££u-G½«Ch’É$½½½$‰z%‘ã™C­že"‘ ™LÖûë ´Si¿-mPð“Ñ™k!Ç"ÃÞ Qt@¯ŒýœxU¯f'Ò–§1ÄÕˆD"Á¦M›#‘HÐÝÝ]·yxxX:_¡,¶nÝÊÖ­[ضm›6mªw‘lDŽgµx–©TŠ­[·288Xï¯+ÓÊdúmiƒ‚—˜¬Î<Ýr,2ìAôÒØOáe½^˜4B[ž.ÄW#¾ò•¯FÙ¼y3÷ß?À¬:¡1P3_ýêWÙ¼y3<𣣣2,4$áp¸ÞE„iÇËý¶´A¡\¼ª3‹ •à59öòûAf#Ö»õ¦»»¿ß϶mÛØ¼y3cccöç-[¶°aÆ)Ý#™L266FGG‡½í°ÿïïï'•J±yóæ‚å ôöö2::JGG‡Ý™W‹ÞÞ^û» Étȉšµ[¼xqÎö±±1`b¹îvåFäxæPígÙÛÛKGGCCC9Û½ õèã…êQ‰\ÔãÝ>Q¿].µjƒB}(%õî'Ò™+¡šr,2ìmÜϺ‘åØëïÁ{ÔB¦k¥¯Î¦ñܬ÷ˆK&“,^¼Ã0èéé¡··×þ¼yóf[¦B*•Âï÷ÓÛÛKww7ÝÝÝ93"­­­´¶¶-ßÐÐ=ö=öÃÃÃU¹Øºu+Éd’»îºkú+[˜6¦CNü~?6l »»›ÞÞ^Âá0mmm¬[·˜Xn§»]99ž9TûY&“I ^Ï 2<Ý}¼P]*‘‹z¼Û'ê·Ë¡–mP¨¥ä¯ÞýâD:s¹TSŽE†½M¡gÝÈrìå÷ƒàMj!ÓµÐWgÛxnÖ{ÄÜ}÷ÝœÏ~¿¿àZþRA YoS©Éd’ÖÖVîºë.;ÆÄâÅ‹éèèÈ™-)U¾ææfÚÚÚ Si™ÀöD"ÁÃ?Lsssm+]¨:ÉÉdd²3hî³rå¶Üv5Ù2ŠÏÊy–•ÈÈèè([·n¥§§§àõ¼"Ãåôñ‚w(W.êõn‡âýöDLÔ+-ÏDmP¨Åä¯ÞýâD:s9TSŽE†½M±g]m9®öØo"¼ø~¼ÍtË´óšÓ!—³q<'†¸I°xñâŠgZ[[Ù²e‹ý9•J‘H$ÊꌋÍèL¥LÉdÒvQM$U_&(Ôž‰ä¤Réïï'™LòÀØb8fûöíÓæ.,r<{)÷YV"#½½½¶’L&ett”¡¡!ü~ÿ´|ɾ„ÙÇt¼Û§Òo—Ó+-O=Ú Pµìw*•›©èÌÕ–c‘aïRKp&Œýê¡× ÞeºôÕÉ\w¶ŽçÄWÖ­[WÐu3•JÕ­L~¿Ÿ-[¶ÐÑÑÁÖ­[ ³Æú,”G*•¢­­-G.ÚÚÚ<Eäxæ0ϲ¹¹™T*ÅöíÛK¦GGGinn–”0#™J¿-mPðSÕ™«-Ç"ÃÞÅË: Ç~ × ³/·åéD q“`ll¬hêòB³~¿ŸT*eÏžŽŽ200`[{ÕK}*/ôJˤ„;ÐßßOoooά0ó˜ŒÜö÷÷Û §š V.ÏÕÛ©–QäxæPDFÜ3¼*˜­Úîf.õx·OÔo ÑÜÜ\pV»œ6Xiy&jƒ‚÷¨w¿8‘Î µ•c‘aïRêYW[Ž«=ö+%ÃÓY¦Rïav1]úêd®;[Çsbˆ›­­­½€›››Ù¼y3›6m¢££ƒááaü~¿½Îº¿¿Ÿ¡¡¡)e©´LN¶lÙ¦M›2PœÁT*#@€¡¡!>øÁâ÷û¦­­-'#àTåvªet"r> endobj 2 0 obj << /Length 3 0 R /Filter /FlateDecode >> stream xœ¼½I(Éu¥¹Ï_KiÁhŸ‡­UB7z¨z!\´$ -%Áâ¦ÿ~ßã6ÝïxÌ|‰|,¡2^X¸»¹¹ ×>;vìþ0Lñÿ~§ÿ\ËõñÿüôÃô9/ûóÛñSüúþp|Î÷²o[ýþy¬ëµ®ó¶},ÛüyÎÛºßõ6¿›>§çÇòñÓóµÏøÕ•_Ý÷ôy?ÿ[¯øÝôþ/úþïþÙþC¤ü÷”‘z›ùãÏÿöñï?üŸ¿8§¿Å½â÷Ë:Ç{oÓòüô_ÏOç?(ñùá?~ø–gÄgÙî'¿øáŸÿ%þþ_X>þ)ýÿFÒ±íºîÿûaž>þ·/³ñ?~ûGÆËý4^÷·~à5ÆMËk¶,÷çz=uèþís°.ógË@ýùû>ÿˆK{ê?¾kj±?ß½ý¼óÓj¿C¥+¥þ<¾þø]Ÿ^м<¾þüzþÿòßæxÎç2]?þ{ä¦Üÿ-W×wÙ×)²µ|Ìçt|×z×µ*ƒÿðãè¼7oqóúøç¿[¦éï?þåãÇúá|Þûo?i™×íóž–û<ýç?iû†'­×ö9ïKÜù:þ“Òwè1{/ýמÖ3Ç3½ã2êéwæûú\îù8çh)– ¤}ŸÜû§~·\Ë;79íûäf]>¯/Š¥þúûäaŠ Žøß6/¯Œä´ïÚ6¦Ïõž¦y;öïSSõl«©ÈÄ÷­©OnXS™›ïZSŸÜôšÊŒ|¯šúä5•ùª¦þŒQ÷˜â¦k¿×Sÿ:×ÏéˆYÁ¶íïjúØËõ‹B{Ìv*ç‘×c¹ÿÊcö_ù˜å8?Ïe‹!w¿Þ¹Ûcæ_û:SÔÆ)FùyÈå¯<çW¾Ïu,äNO˜ûW³àuž@,«(hb}š¹œ¿KL™ÉulŽHf‹Ê{GEœ¯ˆdö}»–í•è®~W#™ÿþßþ×ÿýÿø¯üäÀðoÀ<ër_W¼à±Ÿ×‘ÛyœþTýçyä¿þáß~úé÷üãïÿüç?ÿþ§ãúÏ?ýª`ŠÍsúËI|¨í¾—ãŠÙȦ’Ûæ9>”‡Óö¹ŸÑЦ}}&ÇëgjÌ[âÍ÷ôyѬÓþ¤E9E|;Í‹zÃÈèÅv=\`Ù¢Ê-Û3ß™£Xâm¦©]¶^«.»ôJËçvÏË÷“ßm˶]£l„ÁÑ8¢N•´ósÛô¹£⑟ûýý½–´èý·¸nŸõõïÏ-^ÿŽ1áI»£öOåñ³¾×E³_-íÚuÝÓN®ÏcŠ6‘·Òö(ÄC×G¤-Ññ¬g\ö¼Ã>ëw|™ÈËiÓ9]ÓôÜs_>—Så«zŹîkŒûS,{ô_§®»Õ%ŸqÕºÖ¼DÚuéºøç¯½ÐÕ.Û"0ËÎý‹¬ìQ‚qY¼ÆÇE4G©DM|’Ž÷) rÕ¯õî…²ŸŸí“ßñ=6]t5åVM¹ÎK÷›îø¨Çy?IWd8Zw´¶{™UT±(…'éþÜç½ôõ÷u-Šo/IGt¦ª&çº|ÜQÕ¶µ× 5Ú¨ç1v‡S/˜?ïå.uõÖ¤+öý.,ŸgÔk_㪧®Ìê:”²~îªñ9•·¨‡ÇY?äß`ÛK=¼éÕ¢î—ûíêožjYÛÏøñ‚5厸ŽýÔEËùOÙEU?U1b‚ö\5+ð(%=ú~쥎FRÔ“¥}§ãú\»TÑëŽ;Dou—š}ÜÑvâYgs´±cŽr=[ÊubV¦â¢hªQÍŽ'ƒ1F×^ª§^x‰GÅÇz’bÒ­nXµóŽÂ¼î^áÏè£ïx”žWmK”Å]êmŒ­µ5ϺjŽæujÄP’:Œ½VM¾Ö©}—ª©þa‹1¾~¬gÚ¥;-«n=Š}z>É©.a*53Þ+2º·º~žÑ«ì¥fNŸ›^±ÌÛ—óŠ>å.5óºT0½­FÒ¥~ã©™QßâI5ÒˆU®F!ñ¨M]é½<Û÷WŨ5s^tÃõù›ðc\ÑðöÚ‡Æû,úš$Ô¤K½Œ‡ÜG4§»|Œkʽ׊9E4vîOõPÒö¹µµR¸ö¨r¥ÿ´Rˆ¡m:ïR1ã¢uÒýžOI—þò©˜“º‚ý:J½ÎÏã®g”]4ø½5·øçvo_VLU¹§À£bÆŸêÉöý)ñ¸ÿUâ©™VÇ„ŒçÚqÆÏQ¤k|ðçïgôª5S#Æ]~MRKºkÍŒþnRR©HQQ£í5óTc,­>þp^[Ÿ}ò¤þt^kÒ½Ö>Ó.Ú£¹{½|®¹îrѽHë1gõJ{-öè ×úüšK‘Gí0Ùn")ºÜ^-S]ºŸ‚®ýåòyk ˆZIñ:ѵþ2†í]Môá†ë­èÜ­]c@˜Ï»×K\·¾Z©êû „ó“øÝ:Ìè0”…­Þ0b€»u˜ö¬ýs­S_=âŠc­ÙŽÁfj¦%Å02· GI—Ö3øªÑx®ý¥Úͬ®ïY„ˆ.:zÖ_F¯¥Ê±?)sT·µö—ì¨Öù§µõ—¯¤kýeÊĬ·oÝ¥ÂÛ¥ óQË>ë òÔˆ}U“‚ŸUä>ºË4@¬³Bñ»×ÊÔl”¤&ùEw·þŒˆ¬VK–ÅCþY»ËKß~8|R.Zµ^FCÞ£Š•ŽoiºZwi¯qB›´Y—¨Á÷è.ók=˜W­ìV[_Ÿ‡Dˆ¶¶Ó²¬ŸúD¥ZÚCÖè<ö^-Ó·ˆ¨õlᦚà©Z¹–ÇÇt»F›V•—#ºŽÚ[ª_Ú”ç'Љ|F[Ý{­Ì0’îj²µG#1®ö–3â9Ÿ¥8_ÙX5&Þ2½Õ=F‹2Õ6¦m?·R™#énA¦•mLëÏ£u–ŠE£³<˧X#:Gg™µi~Ö;Ë\€kÄA×è,sYDÒ}Þ27Þ5¡{ô–¸aŒ7-¾ÄHÝtë.Y€kÔÒ_²l1Ù›G™[o$]Ëè/s±Ç¬çXv¯¬šsôøÒî¥Þ¤ö—ñäø‡ E½,‚ a*mUÉÖn1Ò®bšßlsíübºs´áù“­‡˜–xô1zÌü}cª3õs}Þ¯üöjÑ¥uÙ1Ç9ztÉv¸+´˜¾Äטá,×è.Sh¼Ægêá¥Ý0â ûëþ2¦7g/9Åôfïá¥e#Æ›éëþ2f7s /ÓØ¯¤»…—ʆŠ<¦$O’˜Kí/—Ñí+åŠNäîy8kýe.ŒCÄbô—yð=Ôßì_M|êºTü2fyÛ3ÛÕ/ãÎ)¾Ä­"ÚG‡™ß5f7ó1:Ì<îFÒ}ŽWE tn_ã1»Ù[|i]AÌnÖ_²MÅìfnñ¥•x$Ý-¾ŒQ=È|ÆLéI‹0¨˜sD$÷Y,œ5&8G0ÙQÄg›G§™{Ƙà,óè4söçÔi¦â85fno1Á9Ö¯{͘àl)ÄÌýULp–c¦ p ÎÔcLÖ³Hj‘¾5‘SÑwë5WÅòÛ¼m%ï ícVž¦tÑÛGi^3ßðÒ«ŒYyþ`‘$flÙŽ)Α¢ÌOÇ„*z‘mŒæ©2Å gIAfŒb‚3¥3×¥HjkåV>—ˆÉˆ1óíbâ=ÝaÆÄeM!fÎxÌ»{ˆÉï§I\1Ùïh–»ŽN37Ó§Â}cêM¾Ž1ã›Í=ÆdñEÒbÌ\cczs¶S­*^kºjŒœ½™sŠØ•ñP2Y¸1:Íçè4se‰¤ûì&.Šp輿Í#æÜS9+6- +|å-¦8s21šk‰As­øÖû }Ó’Fã¥óÇ:ébÈôELWÅ0ñ1UÚNL#"ܶ¨bˆFL—?ç=*viª ¦ñM?oaà #1]"?D„ôl#¦‹æSÌ–×½¤%bº<5¤å™˜ ìžQü5,1} oÌ·÷2Õ0ÕŒ8z“6í0j—Ç5˜i4 xñÖ LÕjX_JÀôRø¯WÖñL£ù÷y•Ó9:ÈíTÔµ<ïbÚ½ž/Z: kÅ÷8ï­Ï„K_÷¼tŽÑgN3ÓÈuÔ¿[õ¾¼v"¦³`Lkcä*i ™ÎârK׺¼˜é³˜8/W¡é,6˜xG¦¦³hè.ŽSTƦ³VGÌà4*NôˆGÇñ™œŠð«5ŸÇTž7ÐiL`¢¾Æ´&vªÜomrfð4šF »Q¦ùEOõ WôGg%GŸÎ*òR?Õ+Ä8z·Ê• ª®›ÎÞ<2Aõ5QïêÛe„ªŒ]¬;*î‡1T]j‹füd3CÔù©PÛ¼·´DQUdKLâíaíuŽJúLk%ÊÕ‹,ƒÔhRñ®‘Ïû*Ï$U—Íêl*Ì(U—ÅØî•Jf–:+(¾¢®W–Ÿaª®‹ ôÖšj¦©ºNc_Ä@OÓ*8uV‹UþøÃ€ª¾Í¡°¶€LT_×%¤ªB¾®èóêJPfªó3Q\wÞ; ª.‹7žÎŠ[TÕÓ¢¨×‡‚ªê²¨™‘6½°ªž¶*¶YɇqU]7U†ùa`õ¹nŠ2®.“Õ§*LqϺ—Ñê}êÝ%c«þÝ2\Õ=M;jלéªÒ"Ö\—Z-3_}=/ÖW>bÕû-1 G{{1V]7)JRßOʪÇBüµEfÌúÔôµæÚ 2gÕu"Ü­EfÒªëÖ'Ô }L¨U×ÕÉy!…µê’íXPí¬UWlBâeͰU•èÖe?@[_OJ¸U›ÔÏ-%MÑ›½€«wŠm•Äõõ¸„\­ ºêºÜz¨—©«žwh%£Ô/p׸wôÎ{3x}]—È«ç3³W]·]s[|}ž§Jz–è7Ó××ó~õòÌüU×uüaVÏ‹Éfú}1ð]†x Øçõb$ÞK«&!U™aõ¸C_iZ °zž©s+ìTÖ‹?cاCÔDá6Ã>ouß}ê‘9¬—F&±^KŠ}]–X¬´ c½3}=.áX]÷Øæ‚ÇúÇÎ@ö©\Zš)ko@²ÏuËÒMf²žÏ e½oÈTÖó™¹ìÓÉž1¾—á `ÖA&³Þx2š}=o°Y]võUG°YïR2œµ¡tÖK3ãYÕ¿ó˜ÛÊ|ã³ÞJ3 }†6¬9¡—;¢2—†@«DÂ’Z÷2 UÚ!ÅC}³Dh…lScª‰ÏêšéˆÑ¨´´ÌgJ€Ï>Ãp5ä¥Õ£õÑeئÕu}ZmœYÌ”VmÒJÌkc±Óaù°Ìi-š¨}Òb4¯“Z/ÆŒjŸLžQ+£Ê¬ö a¥*)a*`í랃Ö>e"éN kŸà1&·U^ëÅ’­®“–ð,¡ ˆí2·‡wd«+b”ˆ7+=uf¶ºb‹o}–©9 ­rýHSô€Ú>ÕxŠî£ÌCm_× n«ËútÉÀí+›‰ÜjêÓÞ{.Ë`™Üê–Ëzªãß>ÝF¶.éŽJ¨Ft«,•µLn5‹Ÿ¯ýÝ>óÆ(û³ÈdÁnŸyÜ\ŸÀ­&\Q÷ŽL¹K‰ÇŸ”OÑ-î—¸­²5u©³€[]s¬÷^‡Ë n•tŠÔNäV9<¯®MºU;0vûÜR«ÌVo•Ëè}÷Š*@oçg–]S­vßjž¿ÌEü xkyàÛWZâ·šÉGM¹êÜW‹¨â^ÊÜ'#\S‚áê²C£]-ÅLquÝ*ù_™uãŠ}Š®Kü«{Þšn”©'H®®“वЌrç§!Ü5ËÕu—èp©ß™åê²K!X 3ÍÕ2M8×o™qnÔ˜ø°m²Nœ«Ùù±ôšy®A$]A«!(Ì@WW‰—Ô2]%E¸¼_þè žmúÂ=)öVƒÝ¦€éŠMêìJ-ztãüæ!Yß‚u%°?þ¯Êv¯õcùE|w¹GÄñâ»)Ú}ñݘ*©B%ðÝùôÕùnÔïZgŒïÆ0zô˜Þøî­¨Utæ ¾-p¡¡21ðݘfÅð×gàð.1Ü%£lˆ:^¥M’h]ó[+µÉÜG¼Ñ®ƒ>ðFÚ¡ÉS-² x—Iñà`w¼jÿÛÕ•d¼Ò?nkW òJ#§ÊW  ¯ÖhäE$äžù¯Zg@^õ¾“>m@€¼Ñ+ÞKojyÏQ­ä¦qn]› Æ{Jt´ˆŸŒ7†£>1^I'‡žŒ÷ÔÒâÖèïùlìë`¼1Œg4ƒJ”Àx ’ñFX5öŒ±áïºã$ˆãÎ, -þUê¥3Þ̆ y£Þξ¸È«²UG^Ù0/I;¯8®&^s‘ “ñîúˆEgïŒ×=¯êB·‚ñF¹OšAjf¼ŒŒÈxõîkÿB`¼QM=êÖÏf¼ˆ#;âEIÄï^Œ ´Æx¹®HÆ«8ujb02^Â~2ÞIÝ÷Þ`!/_™Œ—ÅHÆËÐ/¬ñF]=#®C&¯æÎñ /„¹^?ˆxYx}ñÆÓ}7ðÄk]!/—/ÁxU«Z[MÁx­›äµj È«ºsìC®š!¯­Çôúó2è{nšV0Ðk](`¯}=À^å@{­\2íõÇeÚkhokRÀ¼Öl€y­kj˜—Ê ’^ ¸@zýv™ôÚÒH¯l™ôúã2ée„ Ы¾lÖý*°Í ×‚y€^«X½ôZ%èÒÔ”êøôª•nn-Q/@¯EŒ½¶j Òk… ÒkÃH¯µ7^ë 3êµ ¬×¢°^–&Y/_¬—2‡õrÐøVÖ;uÖ;ÇßÿÐÍCò9½‰ƒÞ˜âÜ× J½ñÊÑô¹/¬$ç¾zgëƒhU×5wamv>x0ËÙè ŒÔõÜÇÑ‘l6>(ªaæ‡a^!ºEûFœ³Ž÷Ô&Ó½$ÔñFÚ<6ó µÝRÌ¿e¼‘ÙèB+P^®|PÆ{jOðÙ-¤¼BmC»OÊikÚ= ÊkP”—+4 ¼‡ )Ì×·”7Ò¶´Û¥¼N‚òÆ@|MÄ“ò^Zü8«*aHy±deR޸ѕCÉËÜ;åÍbc*y ­f%ï+))y Qƒò 8Ïm–ñ¢¼g’òÚ›CÉû"ÀƒòFÚ¢¾¿t¸Fyy)¯¥óbÐ0¯¥ó^r ZºF˜×žÌËOÌëï—1¯]Ìk—óÚuóFÚ™¶éóòºÊyí¥Éy_iƒóz8/sOÎ{IŸÝP9¯e>s^õ ٜ׮˜— ÅÆy--sÞCÆ-] AÎËEkr^[b çeŸÎk}08o4ŠxÑéüJËëyçµ¼dÎë÷Ìœ×úõÌy•$`P.Áyßiƒójû†Èymqœ—Yéœ×Aï©©÷ݧ—½‘¶+f«Ó)€^fŸ¤W÷Ì>‰ôrõޤ÷Ôuì éUO'ÁtU‘õú=3êeE!êUš¶´V…P¯§eÖûN¬—_ެ×Þ¬Wïí2ëµwëõçeÖË ;d½þ‰õžóˆþõFšÚN›õõjÀÑæð*ŽëµÇõÚ«ÖkïÖkïÖi‹öTàÖkåÖkõ+³^\f½VV`½ž•Ìz½<2ëõ{fÖK9 P¯]Ôkêµ[fÒkoÒ«´éî“^^»e½J’ÙX…g½vY½–”8¯¿Zæ¼^$™óòŽÀ¼öÚÀ¼Væµ»eÊkÕ”ׄF ¼Ò'ÍkŸë‚òÚKgÊëË”×: P^¿e¦¼~ÏLy½@2åµ Êëi™ò²4y_IƒñZãµ·äµ~ ¬×ªX¯e%£^¿,¡^Oʨ×ʨ×Ê ¨×®êµ·«¨×¯È¨×Ÿ”Q¯§eÖkcX¯§%ÖKQ Y¯.ÓJseO`½~ËÌzýž™õFÚu¬ƒrgÖkÏë´¸I_ë- 구õZiõzV2êõ{fÔËZ Òk%ÒëK¤×“2éõ[fÒk¹ðe÷ªœcRÒ€\ã½þëÌ{íÓ€÷ ò”Ш7ý:S^üº^ë¦xÓî’-î¦K2×UµÙ÷ÛÁuívບÌÛGr]OË`7Ò¶}ij x£?¸–ÁR3×õ4(xã&ÚþÐx0¼2í¹„5o¢t¦à}¥e oŸJ:×5y/$¼¸ãK›ÞÀ$¼–öúÕ·93ÌIÂûKìl#ÀÞµÿæ ËxW”ÂÎÖðílµÁIE8¿˜.¿ u#RèSuÚÙFžïa@;Ûè/¤@ªn´³}Æ–¦Y2;[u][7|ÕÕÔI¹z9ÒÎ6zØ{ߺð5c]îx#ÖUijƒ‰f¬Kù ±nÄ)nÝA¬åÏŽëÆ·¼ÔßV䬻É?#òY‚Z`݈Te¹Û5¿ëFð»]IØ›±îšÐ¤c݈µ¯m@y`ݸ.žuwÂY±nDîËyt­&°î*™÷Ô•oຑ6­ƒãƒëÆübÚ“¹næº1/Y޳owÏ\·ûÀé¦_gš¿ž÷>“æF5^§ÎBHs£ú_Û C ¹Ö4@s#m_¦®Ñͦqɬº JAsÕlÎAð@seÙ²G¯ÑÌ2Í]…Þºµ'`®¦’×c€¹ñ8yß5ð˜×{Òg˜»êì®±p‘anÜ2ª×`Çæ.RØ %N-mÀ\OË0W†4÷0øÌ]tLÂ^C˜ÆrõËuz°ÜÈà4'&›Y®e,×”Q®ÀQ«]æ¸ø}†¸q¯ù €¸Ëlh·ÁâFÚªqð\Ê=3ÄÕuik ®®ÓNÃåz‹ußiâŠzÌ}Ç!nŒ<ò…i€¸‘v©àš 7CÜåД)ʱšCdˆ»ƒ5:ÄÑÿš'o†¸Ë.y)# ׾Á—V7®º-ÜGó¶ 7ÒfmL)¦¤d¸‘¶IÞ\LÓÈp™F†i«Â²jjÕî²éÞûUÂ_ ÜHа¸YÆáÚe™àFÒ¡¼F“àFÚ¬¹tDAp•¤ 캽µºÊ‰¦Íý!ÜHÛb¢Y-ÈIpåÌÓѦÁ}^Tªi+e• ®e×ò ‚»¨ªÅ¨a1î²>Ää.»ýp#i_ÆæG\»,óÛHZ¤ã-6åä·‹Nö(“°[¿&³Û|Mâ¶‹võmä¶‘vjôÿÂWg ˆì´ 1¸­¥Ûò,‚Êmý—™ÛJ„ j¾¿ÌpŸ¤iký&±­ Ã$kž»ÛÆu“„íÕ•2cÛHÚ$w)½ ±­²¥åªé|Yâ*mVÀÓÌ%2¶õ¬dl»hÖ¯"ÞߨVÙ¼úNrÛE2Š«k@n»ˆÖH¹?½¹m¤]W ²ÜZ>nýžÜzZ&·žÏLn•&…u¥õ@·‘¶iºQm3»¤õè;9ÈnuÙýŸQ^/³[K¼´åÆ5€·J“œ­zf6xë—dzë—dzKaé-DIo­D2½]ôõµ«ä$ÃÛE˜|¦—UnT»¨ju_À[¿.Ã[¿.Ã[¥M}ëá­§exi§fGÕ,ôö6ðí¢A0š]õݾUšdu¼¾õë2¾õë2¾U^Nù#)>ø­§%~»œ9Áz=-³^õ’Ëðͬ×Þ;£^ÏdF½‘¶&Ôk½!P¯å¤¡^ëszýQõÚ×ΨWZ›5­‘dÔ«u¤Å„„z•´ÏÝn ¬×Ûhf½OÛN¬¬—ã,Y¯NBN°¬WÍñˆ¿nëµf Öë×eÖûN¬×Ó2ëõçeÖkí¬WgGÝ‘Œ¬—1Y¯§%ÖkýX/[P/ã¢^ÝòìçXõZ%«¨×›Q/H¯_–I¯Õt^kŒ™ôZãéõ[fÒkí¨÷6Po¤M ôõÚÀÔk­¨×jP¯Õ. ^«]@½JK«!@½ž–Q¯†—³ƒôZR½V)3éµÊÔkãP/ë2H¯Õe^Ö¾z­âôúÝ2èµq ×ž”9/sp^ÏIæ¼vÇŒy-)S^¿c¦¼Vø ¼öÞ ¼v(/ç1¤¼œ€òú=3åõë2åµÚš)¯Æ+­™gÊûŒei};S^çå}%%ÊkΠ¼–Ê«´uîn ¼OÚX2åµ,|›/ÞãÕ¶èQ–Uö Ǽ=?ý×óÓ3SPâóÃüðïßB’½> ^þ‚Fø«l|¼þeŒ—ûi¼îoý@~½××lÿX„ËjÓÿÍs°+Ê®¨?ßçÇtö Ô|×Ôb¾{ûy+ßé <¯?~×§—"/¯?ñürÇ?ü•|¬ÏvÆpSVí´‹Ù¿¤æ×µ*Kÿðcõ}ùÝÿ'=åÇ?}üóßE¿õ÷ÿòñã?ýð?þÜ'm›BÛ%^w»~þ“¶oxÒ!lª›ÞƒÐwè"{·üW—ÖÇ.2½cÄ£ ¼|·ñóË„Í1¾Gnt–dün){e˜›œö}rS·xFú®„ï‘a@!Í…g$§}×¶±¤@î{ÔT=Ûj*2ñ}kê“ÖTææ»ÖÔ'7½¦2#ß«¦>y`MeF¾ª©{€Ò9"ŸÇ¢Ã7¦Ç†J‚êkבHïjúØËõ‹ÇA{ÌvjžyÕ&ƒ¿ü˜ýW>FëÙOÄ»_ïÇÜí1ó¯}éÐÇ¢—¿öœ_ù>—0º¬øE©õÇ,þ:ãÚ°/kIÞÎlÚ$7¾'8œy{¼¼îã‰k›áۊʶžGW·(ézlÚtŠÁЯñ8êGB·ét°¾¢2ŸŸË65?ÌM§Hô•ˆÏsÕ†gv®cúŠÊ´.re.BR¥]}EE+¤ª3Eºé𪶤rKŽ Ç—š´AÐ-³Á¥™Øo:hª­¨h™9ž[wªlªçmAÅt¤®²SÛc,÷ßò!QiM%÷2†«¤ïº>/¥ZXÇ5¤@×-êe~–«î®z§t!Ð¥uØ~h¶ºœ®ºtÆUÇ|›´&m)%’îqXõö¨T)ŸÔ{ßb³iÃsÔðK—]¨ßv ’Æq›NriË(RQÈ©i-ü%êe×I71ÎÈØÎcH€¤Y4ì•S•γ+€¤•:ûRˆRºè’IêÀ@Òî´”Hº¾Qc;Óqr×üyŒãB¶kòŸKž]F¿Å?»úGç¬_}ÀVþY*§xé8«n‹Gwí?kÒŸK ¿ï†Ù¤ähK'q‡eë[¶GtQÉÞ%sà¾Ac“À¤-œXiH_ÒÖM<çÐýø+_C÷#ÛÙ·0l—ÔE“¸h•½ýö¬}n÷4T?—ÔM}¥EI]ôc”µh[2‘dgœD¼E ìš}ÿ©/WW !à¼ÈwoCñ£šûv%uÁò¶tÅ´È¡÷±r•z¬­–X ©»©‹%V¬ZÊhk%Vk0_ûN¿êZfpm %—Î\í»*¢I¥Ïõà϶#aWWÙ–Iüªä¥À×Ú%%i‹$ü†»”$m„_~—¤-‘H©§‘¤(}÷è黯Gbª³oÅSR—ø°“Ù§s(|Ø+íÓ5>Ò/M:Øð,™¿»ÀGK ãìÂ}ž†¾G}ÌÝ%ÌJêòµ¾U[Ì}Ì>ÏCÞcyÈ´«{” ¦ç³¶Ïë#îa/·ÏÛÐö(sSÛ?¨”®ì‘pìè £ÏÖ1‹÷yMë+Ic7ùzÙI8R*Òó1¦qÓ.áˆêDŒ¯b¸‡¨Çòw öÏ»D#mQÄ èÙøÔ:MÜO›ç\LÛé™ÐnǨç:oþ¾ÏŽÇÈÄq¯GmФ¦¨üëë†ûÐóX•XŽ¡ç¹$D“£ôYnx9$ysßp«¤®æ±r_"0RXyV¯~ß—ýÙÀ¯“è÷[7[ö¾gv_#$Šnp¹_ù^c,‰Žb™ã /9•«ÿ}iöÇccê•2ç{]††Ç¯6Õ¸3Î)Ý¥iñ¦ÿù>‹îjuÇzØcyŽ mõ2gþÐù(£³D’TË£³Ì}„Ž8¾÷^7ï­ÏÞvé?¦ÔYŽ–-ùÇ´9ˆKýÑãKéÏûFö]âyô•¹+žÅµjÚKÛÝúK&É£víýe÷ÇÖ_ò…e§ÕãËÑ|e ÕcK¼lYâ±²9·!ű?߇ÇjÃy<¦­Næb‹¤û¨}eÊT„=çè'sý>u¢ÝÔ‡nÜëû1«éú{£Hêò«Š {Ti7|6ï~2'E'8~òÎýÄø»Jl}eî‰Ô¶–ÑWæW–¾c}%’Ž¡»Iå3š-E”¹ù]×ÜX y¦Ø_÷“‘TúÝÒOæ,Üréý$ßöÖ^¤©ÞšH„]‡±ö€Òn·µ [:úõôuClcŸ6f4]k㉨§G”öQ’­UÉœkm>šz­Ì#d= ´¤{èl˜õcJ»–™‰CŽQbTRWÙhÌç1¡9{H‰Aðˆ ÍÞCJ»ê9T¼×JªØ¹MT¦Í”ÆGÉÏ[a|t‘ƒÀ­Ý0—óÑ%f[ª g©¥à£ÚgyÜÝþ|ty¶QhöP¯»óiÖñ9Êæ²ÑGùÕ¾!½<»QÙ¨ðâ97ë.°QÛ{8ªãœ'íIhœ6ÑQÓív©‡yÚ¡ à£æ# @jg €êìÙõhz<R={éèšerDÔ†ÒÖ‡€‘òO0R«)6Só˜ésZª ™‹u ©LqíÊÃ+%õC‚2&µ³µÀIå 8ë¥ê6æ JuþÙY'NFJŸ“ØÆ&ÞŒJe)Ao; ¬Tg%jQjii –*mÖÙJ$ÑRù ª·h˸T«óÔ·Ÿ—êõnÑÃùÅKÍnÀT~a‹<ÏÊè bª´cœHdª{®:5ª~¸ÌLå¦xž£¢ghú8-ÆpZíAMåq8IN[9{Ʀ*sT1¸écÑx·ã27•ÃáyêXö­ÈNeÞ¸o}¿'Èécì¨(KÔ@§ºç²]íÈ/°S™7¹ÊNõËEöMe–xj6‹ §*Žkì9>Uñ著ÌüôYHžº³n¨ºå& tÙ‚úEÚØ/·œ–n° †*‡Ém‘öò‚¨J»tlëQˆb‚¨ò~<îú˜Vj»^iÀ¨¯´ÄQÍO UŸ-jR;ý $Ui»v’¡=PªyM‚¥êºwÚYf€©ú²÷Ò]lASÍü1ãT]v.ÝG<Õ* €ª¹¾‚¨ê:EUµWRÕ—=·nóܘªn·kl,ƒ ª.‰ Ú˜¬ª´xãf®ª{®[?v'ƒU«_ «J[®n¶´*7Çsïg`­ª2œG?™tõ©|k7ß^5ËKðÕÇìó쇾€°êžËÚ}bÕ=£rµ#­ÀXŸ ¦˜½ì‚d5ãQPVs¤f}\BG¨Îj©­ºç)(P'X™´šó(p«?/ñV3¹p}üX­¹n/âú´‘u®±%ˆëë–ëçþj™¹zmÎÐU7‹Žœo]½Â&쪤cêçÀ»ÊöRÑmí·^_·¼³Iž½ÁMò`Ý öjºÀ×WZ¢¯ºç¡—(>ð«*ì¾÷ƒçÀ`«Öqx4 ìcZ^¾äsÏ&yÚÁÙÏg†}=ïÌ&yŠíûB:@ìëºDb½!gûX“Êï± L ÅfçX€XsމÕíf_V«buÃønñYË@WYìë×[6ȃ©*hì+ Ç~‘Öx¬nyn]¨ ûÊJ"²æ‰ $«{F‰¯K]œÌLÖk´>rfJë ¼pZo2¨}º™«OQjŸ®òèÇæÕúx“`­÷ؚٙW7­—~f¶>ÞdhëCX¦¶fj lë±\æ¶æapë=s&·>Ôft«4 µgt«l*Ëõ`I°ÛÇ‹\‹Oe‡#®"—˜G¶£å@p_Å’®Y»ƒázô• ®’¦­ŸrŠë±eæ¸fëAT%¹ºdÝûQ*@¹ÏíºPî+‰å>iãdß sgk¯e[Ë8ן–pî3û˜§vP7x®Í"tŸ ¹ÍÕèêžÒïVï5 ]›ƒé>SŒ©_¦k“$@]¥]ýx@]Ýñ#¬(3c]Íåí3-_ \WÓÑuëZB€ÝgZ)/ˆ†í¾ÒÛÕ=wñÑöî>ÓØ>­Ìp×æá »š‡Ÿ¢àe®m‚n>-óÝçtÉ ËÐRdá8øá[O³þeÞx&…þCn1ú/Ô«¥œf e¨WZ®©­ÚõjF¶\]|Ô£ÓÝOU"êåbßO´ ê5W¼#TÝNAÔ«´¾ m¸7Û£íÍ;à {i@CØeÔÏv‚É ö^²ù©°WzC}†Êl3ë•+êÓR1YoÄ4Ë€Y`½<µ‡¬W#µÂ­J {EA³b7Ã^­üžÍ—аW¡×XŸ í• ÝXÄ'îåºÄ½QÝN‰­«’¶ÑÞè/"ã5v3Ú{éo®:¡ í}‚ØîÜô¢½I2IØ«&°´s|{÷u˜t8ì\îÓÝ:ƒ½:¥[¶ì•/Q7P4Ø‹óq{wE'{;¯–´—gÙí•JwY3Ú›Ú­ÓÞí¯úioDk·Ô{ÑÞ¦Æ A{å÷f㨤½ª Ñþë2 h¯ºÜºMÖ«®fk_d½Ûjºø¬wSèÛ#õÆ÷ÑÞý¦{5Ö{Éü²Ö° {y\a¯œqtVL9nÍ`¯lrT¯ÁÞE®s%Š1Ø+Ó´«yëöFV¢OÒ`À^ºÐëY/x®±^äÄXï=På ö(ì†ÐVƒû¥™Õó•e¥“„½øÞ{‰ö"è0Ø‹áÜ`ofûÆzAÜÉzÅep|UUmf½ ãD½§Âõ¶fFÔ+L¶ôs7 õ2–$êÅQPõêd)ù¹WhNÔ;ë[`g¨wU»?ºP7£^…­W u>ÔK~OÔ+Ë Áï’”IïªVÚë3ÒYÖ ]ß‚ôru¤×ÞŽ¤ñ:Aï:b½VÑzm¦HЫ5ê~Ü6A¯Õ.‚ÞÇ‘ðê:á zyÚA/N­2ÎKOÎˬ€óQ€óÚì œ×&‘½þ¸zÓä w‚Ò9?¥±ôÚ×éå×è%`$èeôÚÔ ×æÆ½äQz—³2HÐkót€^›ßô’Üôò2‚^-µS‹@z­ƒrÐ xœA¯}=€^VXp^«]ä¼Ä\½üzd½¬c`½ì¤õ–6Ô«ðåhȆz ýzíňzOMð·òÖ"I¯=-“^‚%‚^¾A¯T€^œHg W–t{›½@o—zñ<€^25‚^òD‚^¬‘ó’;èMgÖèµÇôªÒ«RVe¯‘ÞŒs3èå·éµ–CÒËç¤7ƒÍFzæé–éeY‘ôÆ£“9·¯@o^Jèµ@Ћ@ÎËVΫNtžÛ´Ê8/ë$9/-ÀyùÑóâqóZÌkdÜ9oîOÀyýÕ3çµoÎkk+à¼üvÀ¼¶ž™1¯½0¯ÕI`^{`^+͆y-²æµçµÁœ×ú’Ìymtçõ7Ë —Õœ—k£ä¼öÉy¹(CÎËÆMÎË&NÐËIЋž’ —3A/WÒzô²ôú=3èåHÐË ÒËq‡¤—I/’^O+‹ùI ð­¤wê¤wŽ¿ÿ%˜7*ÐV9Oawp+Êž;€ÝÔÞ[?7v#æÜ<iwpêøˆ$õÍv§Léûv<+Œ×ø" ¯†çµ×3RÞÖ¯©w¤¼‡?û¸KÊ×M:‹«Š3ÓUõ𴫏¨ø2Óµ¦kŸL×j  .kscº ·ÉtH¬®¨éÆ vœ‰g¦1û¹$꜠®Îá“øªnXÔõ´ u ’ñкaTWtN²ØLuí@uucC ª«ç]gó·#Õõë2Õµ|‚êúu™êúu™êz^2Öµ8X÷H‡L9Öõë2Ö•“è°‰#Öµ¼d®«[jÇOcœ™ëZVÀußiƒëzZáºþÛÌu5_<î×ÕÔ{`|pÝçà‘¡9Í`÷Ð):ì*mJòã v¹ß•d×Ò@víy »ö »jvW¢™ìZ“ÚÕÇ¿-Îh×ï™Ñî;m ]¢]«{@»Ö€v­-íZÛÛ=´å¼KÜ3ÚµœízZF»VÕvýºŠvíÅ€v-ó@»~]F»ž„v­‰íú-3Úµö ¶ËëÈv=-³]Þ“l×Ó2Ûõ´ÌvYdd»¬'„»ÑyF“;ªH‡pWi’:–å^Â]2Mæ b Üe×A¸«Î:âè³2áÌvYnÞóyLo¶{è•pÛUÒÔ%Od»~ü¡ù8°]OÛüVßæÓ0'ï/±²Õl­©³^ö𫊩ÉnÌËöÔŒ©zH¸›8P» ºÎ~0,Á®ÉÜv)¾"Øå"&ÁnL–«3‚Ý%5(»ò“Ðd°ºSìÎ:Ô/Z}s|Í`wÖVŠyø1d°ë÷Ì`wÖá´ç£Nþp°Õª?ÀÉnTÇY vÏLv£†/ãtn’ÝhP‡DpÓËÏVÖ¿QtmOÉ®¸J9ëüíÖ0‰ å2Ðnô3›†„º%h7ú§SX§¹äV´ÝÝ9NïÛÕ™bçPlf´«Ó³®±g>“]åª5ÀúÊîš5qb»’që}«·jF»:t©jåQ‰îꬦ®ßÕ O¶ÖÌxW‡ín’ýWG…DxßIðꨩdä‘ù® ¤HW±ñÈt×L•3ܽ5RÞ«;mb»:A^µ3Þ•íɦ°ljä·Ó]¹4/Ó'¸ëI‰í>'oͽ^'´«sŽoEVk3®íd÷ÖhÊEµ M`÷½w/ýNæº P®¶aTWGƒÉÿ¢6Êji«3Z{D¦kwJHW¹Ô \ <ÝHZF¤HG[{Tr´Fyôõl:ÚÒÄ޶¢·}à̽Ôn† ޶ª¾Ñ¤ª“muìú˜”ÂѶØt´UÐ>äp´Õl"‰e³£­]•míYÙÑVÇD9m£ÁÎuÃ.ìlíAÙÎ6š¿òVzbØÙêcï6ìl_IÃÎV_Tßê|ÛÙê¸Ö¢ìü0rIOÏU<»­Ý+ÛÙFsÔf°å%Æ¥C9íluîùØšœílá]N;Ûî±³v¶§ü :5€í!6²Õ‘~¶L‚Ÿ­Îh\ •¤ìg«`Jß©zÂfC[MBõ5Ö—ׯʆ¶Â-c醶Šp}Ü/V{iŠK™ÃÐÖ’hh{ÈY£ØëÓÐÖ^9ÚÚ{ÑÔöLX5yÚòIð´šÛ†zÚ2ë𴵤ÇÓÖ’=mE PÍž¶š‹¨Ý/Ñ­{ô&S[Kɦ¶üJ0µ~I48›ÚÆÇÜD¿ç&Ʀ¶:¢Y!Ô\e6µ¥)LméH S[ÚrÃÔ–æ¡0¶…Mméè c[šsÂØÞ™ðµ¥_£ùÚf{CóµE}ma›|my|mí¢ìkK—GøÚÂ/±ÙÚÚßg[[Z9š­m6e„­-ì akK»`ØÚš»o¶µ¥ÿ0lmiï [[Þ¶¶tþ…­­Ý0ÛÚj¥jzNÀþ0þªŽårÚÚÚ³²­-]†ak«åí­B_[ËFöµµ«²¯­e#ûÚâpúÚòYÙ×–Á×–E_[©@o[a5·µ[es[¾+Ümq˜Ýmí)ÉÝÖ^(;Üâ°:ÜÚ£²Ã--¡ápËÂá–Ù€Ã-Ÿ‡[»*;ܲœàp‹s;èp+iK?˜·vUv¸µ«²Ë­]•]n-)»ÜÚ{e—[–av¹ÅÑ't¹?q†Ë­]”]nyòDs¹Å¹t¹µ ²Ë­=&»ÜÚ “Ë-OÅ€Í-Ï·€Í­Ý/ÛÜâHÚÜâ*ÚÜÆ³–èAö­% ›[TMÚÜZR¶¹µld›Ûü¡èr‹ïî6·xT¶¹E+ Í-ú8úÜ¢· Ïí+iøÜ¢áÃçÖR²Ï-aúÜZRö¹…åïìÞÙ߆O¥ BŒö #eÑí ÙYÒÒÎE§ BÆ66(ÒA§µi4\TÕ¶ø¤ŠÝ㔀T¨NŽªö¥¬z\š œãÎQÕ˜áÅê6q÷ ¶rÏÌQo±ÙÒ»~8G¡R ]¡ 8ªºœl«¥0ªäƉöfŠ*P³v›@TKÊ 5:½SÆ}Í 73Ô[DmÈ)\61ÔwÒ`¨‘¶ªk©2B0Ô1ÅÇl^³™¡zZf¨žVªà BÁºUúXOËúX½õÜëä±¶‰,Ë1+dõβ¯¾¨PÈFÚ¡Õž*Y“C)«áE ®£••²‘6Éò§,IQ-÷Ü5ù©K PËòÅ!–Õð§¦Y d±¬}Rˆeíõ –µjÁ¬½:³ú2öov Y0×m¥K~AÕ×=“`6’TÉ›s ³Vb̪Ëò¨ •³ÊŠÜ2›‘@ÌÆuó>ü9ª`֯ȂYë ˜õ´¬˜Uw±\k³FbV7§´C1{?QáUÏw¥bö6³ª–ç5¬²bVùÒ¾äºÁŠÙ[Ôr Y1«õ„mËVC1«eˆäX³l!Ìúã²`ö–ç_?’‚ÙH»ÔDÍ‚Y½,˜õë²`֯˂Y]'¤T·tfÁ¬.“óÌUi·F‚:u†`Vc’.lưY0«b¹¯áŸ³‘vTCø.—åØG½¬jZ{¨q×WZÖËÚ§ÉzY~lÈe-)«eýaY-{Ë tnßTËÞzòÔ]- –åã –UÒ½µ :ËÚã –ÕãÔ7UÓ¨e­œ¡–õë²ZVÏÅ®ÆPËúëe¹¬ç3ëes=€XÖ/ÊbYûY-›ï©l$ÜóÙœ½¨•ÅEY)koU”²þˬ”µÏ¥¬_—•²ž–”²VêPÊZ!A)‹—Ê2YºiQ&kiɪGk†JÖ/Ë*YË?T²ï´¡’µÇe‘¬%e¬ß1kds‰@ ëÙÏY«7ÈZñC k÷ÌÙœ¨c­@kuêXO«òXËä±ÖÎ!…Õ±JÒ ¡)n“:ÖŸ–Õ±þbYë™ÏêX»êØwÚPÇÚ=¡Žõë²:ÖÓ²:ÖJ êX{w¨có׆4Ö–¥±¸(ëb‘E±~·,ŠÅEYkùΊX»±~YVľӆ"ÖïY±Öd¡ˆõ+²"ÖÓ²"6¿q–Ãú5YkrXOËrXKƒövç 5çrXOËrXOËrX«øÃz^²ÖÆ-Èaýº,‡õçe9¬§e9lþ>ÐÂúòÖ^<‹aí2ˆaó³ „EB–ÁZn2Xü}ÖÀúÓ³e,’úÕß2«_=cYýê×eõ+Ó¨~Mù ôÕ„,zõÇdÑ+.ÊŠW¯~ìxõ´¬xe—BÅ+ËŠW¿g–¼z>³äծ˒WOÊ’WOË’WË&$¯¹,­l¿ÍÈàÏÁd1ÍÿXÙê3‘å™1é§âC‰Ïÿñÿ ¿ú€žvù zÚ¯²ñMú—=òÜꟷüÍVŒ¦¼fû‡|\@ù­s °¹e üü}Ÿ/%MÏ@ýÇ÷ÍA)öç»·Ÿ÷˜•wâïòÊãËßõé¥ÈËãëÏ_<¿Üñ%› f ÒwÄP¤Å€õ‘Žrô?VŸ”ßÍñr¸ýøñ§þ»ˆ<þþã_>~ü§þñÇŸû 3bü]$mV'øsŸ´ýò')²ûÔisÛùHóæ“Òwè"{·üW—Ö¿.2¿ã‘Öÿ¾×x Õ=A¹Ç‚É2´ï“ml,½¾s“Ó¾Ond úE±Ô_ŸFëë«DR¿ÿåÇì¿ò1ËqŠQÚ)ô~ÌÝ3ÿÚ×™ž¦Ë5KŒôWžó+ßçÒö½ë9%æ‹RëYüuþÆ#d¯V=Ç÷ˆ=b{§?Bÿyð¯ø·Ÿ~úýÿøû?ÿùÏ¿ÿéOüÓn¿*\ùøY»# G²°çÐ ·tÏ>°Ï[„àk[[>$ië#ˤúÝÝIIGúúÖïCŽô éiOöf•c”võõ‘Sl¬{ÂÇͳõÝ¥uí­îΓÖwצ;GÇsÉÖwQq·a‡¨V9œDdµ×·ã(íN"ܽY;úÉ¡¨Žh¨ïAú6œDà¢rœGT‰¾Ã}êºÖvÏ3I{üžgÒöDÚ½wÄœWö¾Ó²ðÛÉYpœf}|fÑ¥]SŒ;}‡»DzÍ ýÐÜÃHdŠ{h_Ù(-I['áÑ"‡$}©dOꥭùÀÞ´‚Ú{1N8ÑÊp¥-ב7Ñ´ êiÝy|Ÿ{_(Ù%Øé@5fŸk[(ÙÕpHRí %Rºí£2kkZ_(±¦ u}¡$Šê<ïvîá}¡dW/×…*‡N\ï %‘v¬}Â}HÇ4Î7Ñþ£.”8±R#}»4dG?ÚJ;1ûB ?¤·è %qÏ}ï „ãÁÎ öíø]aqH?Ò×J¬¨% ék%‘¶î]ñ ´!îÑšWïÒôˆ~J>Ò×KâŽ÷v´hI}ÉDËa:çºpʯ³-™ìBZ[jÊÕWLôÍ5µ.Û±£R$eÏ­‰n'){ì–¤°ÂÐìt{XˆJ W×BœÚ¶ÜLØ.¢‡Žr«läo=»æçÔNè¾f¢¶qvJÍS޶­¾ªÇiª% ]ådž“®‡ÍãÔ†í¾d’šâ9¯IÔ£Îçî*›SÒ‘¾^Â-åH_.±,Ê‹§u­‘tË(¸tå§„#}µd—ÉItõag’ô ‡WÒó<˨] ©´¡ç±:ßIÏðˆ€l£š¦÷\æÏµ¯•°QœËsÀC­¦,àEÛ#z·ªµtÉVŸÞø\Ö$çÙÕ¿u¨Z’óXý^ö¢èÑwž»:èÔ ­}Ñäçvm×ùøñöžÕ²x&E¤ì£ïѳæÚø˜µ÷ž•\š‘¾tbFº‘¾tâ×ÍIÓcýÄsîvïYÙO¬kõøu[õ°×=ŸóbzÏj÷Ü“¨Ç*ƒôÙRÏš[GDŽCÔó½ÎõJ¢+OöÓ—QØ“+mˆz¬òmSÒõXÓ rèzrãÑiQ×èXó{okÒõìÚÀÛ×P”6t=l;Û–”=žEñ×WÞŽ¤ìá€~J?2¥Ž5—¯ô#sêXs®!ì±”z¤®–¶OIØc¹Üç¤íáØ«´¡í±/½/IÛ#µÆÑU.ç¾&q;ÉQ‡¶Ç*Ⱦ'm}=ýH+ÒŽ$î±ÂT¯s¦Þ×]IÝcJÝ•:ØÜ)ÄÍqkN‹xt|ìž7pêasÿ%I\í–kÒøX;}ÌòöQ_ó«KF‚À5ò Z^U\^8Ëýu(s“!ñ9Kë$Ú¹\we< ìû‰3Jë«–ñ—CãÃjÓ©!ñ±ò8ç$ñyêE×<Ÿ‘±!ñ±6§™×‘ú×\1:räšß<¦SCâcýù¹'‰½Þy$‰ ;ç‘$>ž—3©|ì›JU2¥þ5šR•ŒÈÅ)QIŽ\sc•¨$‡®©>HS2"×h-〥S»™zä“<(PvYŸÚa;BW¾Üµ%±µGm öwR½7Y]aÒæÐ1¥Gýí‘´>z-mÉ)tN93ŒÐÕÒ®¤õѾ›[]ù3‘WZ—ûèqýÔÇ!’ÚÇÊþž’ÚÇzؘM%ó;‰M£NÖhX¶+#teNäñ2B×[žTu¯Ð©½wê`s¨)iÉ^£¾Ì1;.^{§¤%S"—Ž )Æþ§Ô%9|Í&É_£~êìƒâ–«´ä}—gê§68 ë;…È~Òú°nEA&­Ïþè}›P”M¶¾Ó»ÊÉ|­Ip¾KNŽWL§ºÚŸ5ê@v¾S[ï^ Q†Ùù~‡×´g绘Õlýô¥ç;x_ZÎwÙøšÎl|§éS·”¹b6•Œïd7sÒÂPbªšŒïà0­¤d|—P/)Lzø}Øt´# /)Lzô*‰ÝÑ"¼¤0ɶwÉfï’ÂdØÞÉ£û¬(mÈ=]?ŸñŠ ÕPIîwê\®©ds§í]²¶»æ#‰€"ŸÚT-j/KÜ£×xÞ>&pJ" O»’H÷ü+LäŠYÕòê'Ã]r/ëÑ+áߥ“:zô]´LmáJ" y1&œemãZ–!ÒãÖ~„Òµ¬ItiãÊÕ6œrq ҹÔ,nœD@§lÀÚ)áÏ2ƒ‚a ömû6Ûi&c»&©®Î¨“)¼¢N§º’tÈD°” ©®Îé‘ÃÍ›ê.Ï ²{ªê.*>M¦¾ º:ëz/C]mUÄs5à› îòP³î€º:ëzM‡§g¨»L÷(ƒºJ›ÇÉ}€ºâÙÛÞí“u—è³"Xk î=¤¢šj˜A¨+ºuDÁTK@Ý[QtïŸ uo ¦ºgù €º1„r|¨çôêrש®t–j—„gª#í=øHu%÷Ü—»½¨® É:ÔªûðÚ~„s§ºR–ÖjóáTWE×Þq<Àî•v¶8Ø}bœ£›…f°«hfn^äº×üp›»2Qp]…FÑ9tΚ¹®Ä½ò¨­ß\WÁu¶Mý¦àº<ùœ\×nTŒ[ûûj½×ÕÎxõ<'p]ub2Sªu\—¹î)}o4óÒ1’ëʱ@Ǹ–(Œ\7*·VÀÚÙR»—¶TœÍé…d—g!‘ìž²xì&‰ »Q,×Ñ÷œíêõ´ÂÞ€pF»š%¨ÿiØ7£Ý¸nÚ¦Ö\Û'-ÃŽ‘l÷Ô¶É~Ø5Ù®Ò´Ù¶tRd»Úé«E·²l÷Ô&¤~î3ٮꂯ"Ó©/é’ñj\ÓÚLˆƒñzãÕ6yïói0^žXFÆ«=ù}«=¯}7 Þxƒkï‡ÏñF.÷£Ÿ3MÄk߈W/ç$¼B1jÖe? ¯6ðÈE÷zC^ù,L=Ì"äµ/È«Ç-w³k åå´¢R^ûl ¼ü4€¼òˆ¸úQ넼vY‚¼œ„òžò”í´òzZ†¼R.\ý yBÞÇÉB¶©¥ûäµ Èk¥ÊkïÊiš¹T>R^EÃã¼hRÞSJ4g-(¯ç3SÞ'úîg­“òR™AÊki ¼QžÓ²4W R^ÍvUزè•)¯Up`nþ;ļlÄ ¼¹€òZxÌËÀ”—Ý )¯"uk*R^µ†¸g% ļB àK´Ì‹ 1/2IÊË)%)ï³o÷hªNyå!wîªòª•Ë¢´àŒyeU¨£'–š–1o¤©ÑU+2`^5.ÑáR—‰yOm+lüŠ˜Wε‹LÑ®Â:3çÕFß»#1r^Ñ.HçåÔœWÍö'(¥Î{i&§ 2åíÀy$Ò©9ïõ80¯š»œþ+væ´¨ÍØ˜D†”—8‰”W÷jq…ÿòÊ”)þ¿³ÌaAyeN>Έ 彟IEs¦3‰¾‚ŠMÇUpœ)¯¨Ž¦°%(jüê[O®þeö|¦âÕú‹¬¶Ž×Ûábo¸WÍtégâdÜ+-¦Wa)pïüøÓ´9ƒá^c³™÷:›ï"×îÚÊ5É{5,¶áޏWöFSc³º§$'Î^᩾¯Î`ï,‡±ê àËîE‚«òâöêºcéÖÍ{gÑݽß°fn/Ø»¯{ > öF‡;Gä]ê(a¯0­ðéÂ{…Njäø‚½šÀÆeÅÀ`o4UE•ÇörÎÚa¯–fÚí°÷”1ÎÝÜ öÊhoj'èöb¬WYuÒöŠçhå»AÂÞSÑɨ耽\Ÿ$ì夰—ó3Â^ðNØ‹L²^žDGÖ«Óç(òÆ^3ë•H©­À¼<嘗' óÚ‰WÀ¼§ŽÎ™¹ßÞCí¹·*b^®óòGb^žjo ^èAAyy.g£¼<"“”—'è‘òÊ*tX_“òb)—guòr¡×¼Åóòä\b^(LÌ˳Ö^"ެɣˆ—Z`Þg[pWyóz>!ã=Gä{LÊ«£d–ÎÓÁyy|üKÊ Å+´¼2<• ªÔjy--q^±nZÞèÎ?ðº."^*}ºˆWh`oëá&âÅ~ñRÙå"Þ¬jÎ^¹ÁôýЦáÞk½p5 /t³Ðð2'ÔðÊæg=køhÞUÞÂ6QÛw˜„7F¤Þ›‚—Ò-*xu®Á48-¼:v¤›d¾¼Y³í ^< ^¹[õáÞ¼ü®Tðb?ÉKÁ›¥EñÎÃ1Óùn«ªTïê/zœkò] Vw®E½+¦§Úq•¿‡zwѦ¦.§£zWç ôÐÜÔ»:™¤{¨R¾+ÑC_«6ù.+å»2ÀòV¨w7µæŠ¨Þ•ÜÑ$$¦ÞÕsGÓTïòqPï²9Q½;iÓD‡>TïÊ=½Ç—¦Þµ¬@½«CO毹nÓ'ÒÇ-Ò-Ôa—Ê]»'”»²xÒVš¥»:Aeé°H7:—[\]¬¢vWÆq{;ÝÚ]~WjwÙR©ÝE‹Õµ®ºiwí·Ðîò³Q»;)Þìzrjw…÷.IÎÒ]W!‹ð¨P»Ë‘ T×JTW¥¨(®‘Thwõ‡gŸNP»Ë^†âÝI:í½ƒïÊKöè‹qïÖ!ª]yTíâ´Wò\9…jßqƒ¶™ç²ˆ©ÚÅ{‘çF9N‰çª“‘¦ÌÌs5Ömó·fÙ.rI ë·ÌD—5œD÷vÕº:^š@7žtî]F ËQ@—Õ@W-C>SK@w“j"¥™èr ÑU`°tB¢ËfM¢ë÷ÌDWÕkéª<]¶`ÝI£2µ•N]ö $º ˆt‘éÊÀV­ý+¤«Ð-¡0]{w0]•gc–Ît9Ø‚ér`!Óå8F¦Ë•L׊z9¼q+Ó:Óãï Ð&%^¢ÎöeË Ѿzi¶ 1 ÌëÙ¶–Ö^ó´ex:×¥‰hË ãôZèç¶ Û—¿²eˆ¡t?$Ìz+x•6+/<ÖÂ,Ú2pí‡L—kœ´e¨QK(êr¹ž ^*gu)D Ô¥ƒP7 P§b Ff¨{ê´èeëªÔ uUðãUc¯A ™®jÔsÓôðFZ”}÷ýÓõëÔõË2Ô•ÙòÚ‡BÝH;DZÃ/¨«¾¹ê uuÊM?ã˜LWTôÔ’ÞéÚU º¥qVñå é&®øBº`°é©üUw ×վѓÕB®k\÷TìÞEr]á层d\÷”ÿȺ~Éu‘r]ë×MÙ$×U6ë©ÆFu‘ÿNuOé$î&o%ÕU˜ÑÏ„6ªkoª{~޳.Iu“ó‹ê2 ºq]tLmYר.߀T—¹$ÕÕ>@m})*)P]åóÖ ®Õë!SÝC‡)G€Üº º|?R]ÖP]¶R]‡Lu5ýÖb~#· º:ÍX¡_™ƒêêºqŽ5©®½¨®üÞ¹î/ƒ:êÒ)ˆl×Ó2Û´IaycÏ•íÚ‹í*M †¦´Íl×_:³]Eí+ƒ»~Ë w-÷€»ª'wÛAH¶kUl×7Àv­@Àví wµ”5Ž'Ýåré®_—é®_—é.óHwýž™îr·é®]ºkÏÝÕú˜AóaHt—+|¤»~Ë y¹Ë“×^¡P^ÿe¦¼ž‰Lymá ”×3‘(¯.º« ”×Þ ”×n Ìk¯Ì{h(‰i_•ÌfÊkYåõ´Œyýqóz63æõע—{G‰y=/óÚëeÌë˘÷Ð1DKÛýJÌk¯Ìk=c^¿eƼž–1¯•8¯½yã¼Ö¼Áy­Ê‚óRjDΫ¤1x«w­çµ4p^ëz­Œz­3éõ´Lzm±¤×: ^‡Œz­ãêõ¼dÔëyɨ×Ë%£^Ë'P¯}u ^ûê@½~]B½ž”Q¯e¨×^6 ’+ëµBëµQ¬×Ÿ”Y¯Åöz&2ìµP°Wñª¶û6+ŒöZ¼ Øk±%`¯§eØË9a/ÝÈ|#m—¾8æ«©ÞsΪܠSÃ-éïϰjÀD–Ð×ý âÕ4gíULÃÛ”=&ßÅ~ÊwÇvÛL»‹Ýôrç“iw--£^êЉz¹!¨—˜L»+^#‰úô…GönšIC´’¨âu†MÔ«\uˆgY„2ñ®ÎË;šTÂL°‘×Ä»ð 0ñ®ÎÌ‹!¬¢PŠw±±ÖĻجkêÝYû•â>[yÔ»³”½å÷PîR( Ìëió¾Óæ½x6Ý •»Ðš?ƒb)½õ\DÄðgÐÁS›Û’òRÞX B]múùm sRîþ"çÝh™Ke§/Ä«sÕ#̈×Uš@¼Ô™Òy÷”®õnzQ:ïÎên[\cˆ—Éé¼{K>œiá¼»IèÒ=!ˆx «ñªÍˆW:6µ¯ÂàHxDÝ߀„[ˆHxYá%%áÅv9ÞCC½Žž¯@Æ»;Ἃ„—úD^zÛðrL2Â+(¤q¼€ÐNx)zýùˆw•Xmi¢W"^¢N ^âX"^˜ýâ¿3Ä›^æÒ/ (¯Qc@ÞìdŒ¬íÅxÛ3ÆKi2^»§iw!ãµ{‚ñ’ ’ñ’~¹vWÃHHñ2L$ãµ[‚ñê¹­)½0oæ®æ5q10/€)/ø–iwñƦÝÍQ3)/qR^tFy9K"å5" Ê»}ª 7Ñ£SÞ<5qÊ›?)ïÓElm##)¯Y£¼y%€”—36R^#Ç ¼pv7Ê ¨AÈË (/È>!¯T@^Ö>B^Ny:äe] äe.Àx™dˆ7ó ^"oZOq¾ P ÀËðòK;àÍ­ƒ€—k ¼\qÀ h À‹â"ßåú“óÝ\!Éwù­Éw¹ D¾ËÙ;ù.óB¾Ë),ø.Û)ù.»Iò]~=ò]âŒÊwífà»l„ä»üÜ们&à»Và»6Èwñµ‰wù4â]V<ò]ù.gx7·nâ]Œ+¤»¬?¤»üؤ»ìcHw™KÐ]»%è.¿éî¥gÚîN£»ü> »ü¤»`hÙ@»—Û]mH´ËïIc“ИÁ|%2ÛÍÙØ…:ÁÀ.³N°Ë<ìâk’ëò‹‘ëò«ëfUƒa]Ë%°.+±.æ†u‰ëëæ!‡X—mñnjRNuó¨HªË‚ÕeGªËÖFª‹ïC¨Ëï3 nþ2„ºv .w¤9ÔvÎP×n ¨Ë¯M¨Ë]B]~mB]¶‡º€³€ºÜÙG¨Ëî‡P—¡.D3u-=¡Èõñ‚ºh dºªéâqdº¸¥¹ïÚe°ßEϰ˺lþ»x¹—ÿn*LóßE#þ»h‰æ¿‹hàeÀ› Ë xÍœ7ðF/P+é‡Ó]{kÐ]U!ÕÑé ^õßÚ€ó¥/:(³àµë2Ýe•%Ýe—Aº+œ6ü;èÀ ÂD¸k•¼´¶¥/Q?xgAï³Ah^-_tãTò]+ið]öS¼$I¼§Nàk.Uv\!×Jè¿ 5ãÃwQ¼ß†w¥1ÃõÌ:Bú2fP ÔÉ8u¼šZoKÛ&HgumIˆÆ jÇP˜AØFÂÍå­âÕ‰”êߎ͈×TÃ@¼ò¢]ç/}xå “ôÎj3âG¤«cãŒxcœ»äÎß| 2ãÕ8 ?W0Þ;#'ÝžŒ7FÛi6`¼®nÎŒ—¶0d¼¢¬êÄKg@Æ«€!ÆÙNBaÍ-PޤöaX3è´ù½y™5ŒmÈxel¿iBdúðšÍmóáÅùðÎ2ÇÚº0¬°e>¼“†Í­îס/Y͈Wý̹×-@fÄ;É¿å®z~3â…K·ñNšzvŸñFçrÎã2ñÂÖŒxÍ7S^xòÚ ½nyÒµ½/^\—!ï;-ùðÂO× Ìk×|x“Óˇ7å“>¼·£Ú™`¼þ4ØðÚÓ2ãusÜjÃ{«V·m¿tá5›`¸ðƒäíÂ;Š‘.¼–?¸ðòq´áåãèÃk×Á‡÷Is“#Ї÷æÃkiðáeùÓ‡‹Ñ„¼l „¼ž#^{ñ¾Ò’/Ô͈>efÄ 7dúðÚeÙ‡—®Æôáµ4øðbiß|xiFÝ}x-ðá}¥%^°™/|sèÃkIðáå›Ñ‡§+˜/³I#^»'ŒxišM#^»F¼–8ñÆ=u¬ló…€/ßN¼s˜/½ªéÄk÷„¯ÝN¼¼'xY.tâeYÉגàÄKGQ:ñ2›tâµl'^û%œxíf°âµLÀŠ×®Ë^¼v¼x¡Ý1/^f“^¼v¼x- ^¼–/^l;3/^¾½x_iɋמ/^Kƒï©ícK.ôâµ4xñZ™Á‹—Ÿˆ^¼Ì ½xY.ðâeVèÅ{Ž®Ây¯_/Þ¸îÚ›«S·âµLÀŠ—W$à«VÌJº1J¾OZÂØÙ‰—Ãx™F'^öétâµëàÄûJKN¼ì,éÄËA•N¼vO8ñr(£/ƒ:ñ^Ò)vg*:ñZ^àÄk×Á‰—Ï£/:ñÚu°âµ´lÅˬЋ×.ƒ/‹Œn¼vÏêÆkWÀמ7^D<4ãµË²¯Üx¯Ò×ÒàÆË{Ò—±Ýxa¶gn¼ ÇéÆËØ™n¼(šñÚepãµ×ƒ/&4ãåÌ€n¼ù2ÓñšÏ-„¼È‰©yy¼Ô¼x9“ô2'æÆkŽ»tãM—5A¯=‚^^=¯=z^{å¬ç¥®{ñ¦§QÎ˧QÎkióò–fÅˤ$æ5—^œ¸†y—á^KïEý·×^i‰÷Ò^Ùż“\U …3A/x¸†ÿëÄ5KK'®eÀטKò^ô ¾~½À5øâVßfÛð‡çÔµK޵«¤¹sÏÿU~,>‘üüð?üû·påø¸÷TÃË_P ™‘obÙ¿ì™ñv?¥þŸFL@Ê{ÖŸuP] mûçïW{ú~}ïgÇÓß½üü}Ÿÿ”÷ó½ë{пCÉ—gë‡ïú䧬˳Ë_<½Üï%ꙵ‚±>=µ†Ç+f/WÌ+";ÿðc5‚ùÝÿCÀöñãOÿüw‡üýÇ¿|üøO?üã?ó9Ç!÷sbʾ\?ÿIÛ7b¨Uëá{dwVÓÇ^«_<(Úc¶s’xêØŽåþ+Ùåc‰ªd±_ïÇÜí1ó¯}é1Xt$Êòמó+ß'¦áúÞϮϿò˜Å_ço’ª‹ö—”'{y’4Tõ\*iNÆî–û)VüÒœdñybJsò4¡÷4å}²ÐßÒœdc‡¡¥Ž?çÆ’´eCiIñ ϰ;Ú٢Ʈ°{ÚòþÔMeÕªä=í´K‚ê;æBi;‡]w$±µj×tUw„{:“ö»âò¦ÐMkhmã{4Œ¼[k“—;oìž':䤭y·D‚CߌŸJ»²-Rò ºç%ïÄÄ1%·|GÇ.©Y²Ï*'¼ç-oaÐv‰éª[)îªbì9VÚâê‘Æ7×"Ü’¯r#n[¡¶=øž¯¼c`Q}mbº¨ YÈ»HÍzV‹ ¥¥ÎTa¯Ç¼ÞË”·â(‘{™ó~ %š×ÚDx÷²d¹þ¢ã'–ªä½—5ËggZÑuJKûŠåb|7 ‡{Ùò†¿EúÙvB^ŒUyÎ,[ÿf'ùÍyËË9Ü44ÖÒ>ÞI§`Ô%,¥¤vöÒØÃL¬SZžØoí­r¼]Òqòè˜èRòZj¥Ý(pÂ`Äs¯kÞu"‰V÷à¿×-‹Áñvëž÷ÇOÚ.qÔîð^¼kuŠ ¹¦JK[ÊpLÔö²×CVçW³ÃŠò8tvUéþãÕó¦ôIÖËMËOÉ{E'mGl´[ié ÞS˰HÇÙ=÷¶ U²ÒÎcn‹%÷¶&‘ ßg¬àLœ{ÛÓª=á³ Œ•–ìÉ¥œŸš¶åÞŽ$Ž´³äºÜó =žJsoW:×$Šv¾ššáÞî|ôÀ­ñ¡­§)m¬.ñô¡[Öãsª±‹Žp-K¶·N>ð,My¸³EPÓuôG>£*s£´äÅý,+¶5Ô[‡«ôîg[ÝÏY3Ý{Ýnyäó‹Ö¿PS%Q‡h±ZÉJiô«‡¼¶ÛÂQ×x^Ú¥í5eÕ&Bµ|¨Ñ) ñV7§)mD­»¢Ö© á]>@z¦½˜7Ka+_OÞG£[ÍçyEöòIB¬®þi"mœña»’ï>Ÿu§˜UI:[½l2ˆrÊ'„OZý"4χ÷ðišhæ£àòç¹—³îÌô=)1ïÈ'ÿáì­èÏó¹\x…{O1+Oa»c²”N´ˆº»ö:•6bVûr1‘1+O—++LϹaÓÞŽ©Œš“Ï©Á{©Ôò1øjOâˆZ#qªJì’8ó”»YG~–ÐH¥ŸO£òg®ù Í'ût<·|~^ïI¡+x‹Ä=Ÿ5§ÿÜœÞ"ñHÁ+K;Ï|F‹¿g2¿Ú%»¾š]¤Ý#†}ÁyÃAustaù &ËÎ<ç8ÖŠ fO#’å q‘¸æ“£šNw;œûILç¬Y±K^’û\>sÏg”X±ÏGŠgYW#ñÌG(Ú7‘Ädûº&GâUÎò²ÉòÚ£/é•Ù^_“Ñû¢‰<‰)¬Ý5å8Ë$>Ò–ÖÚÛÇd*ŵè"Q‡wÇ˯w9„q’ÍbÙ ‰û_:=0ÒÄ¢Älÿý©"1…¶VTqå6—·ŒéÑ]'쑘¶ŸxyÇÄjD·è©æ¨âŸÂ©1/ZÞ/‰)¾µ—\uvr\9ÝÛûûÇä*E¸ÖìbváGÄ—Óþ*žGZù]%ŽT˜K¹¦c #qÏQn>O0ÒdÕ½¹`û«Ð±†‰KÕ×GF@qÉÝn{ÌXV$Xݾm“h;äåoì %^x^¹q,ýG:E x‘ä{P×̵WdœFþ»HPÖwV‚ÿ.ªÓÍõ1Ã_Cs­ãÈùL•6ËA³ŠË2þÜŽ"k>Ïà¿J›T]«ohæ¿Ú+¹vuÿËX~‰*­Š•2~%%¼<ú´¨»ÕÁ4àˆ½cÆ77¬ë¦]*érW&Àºnסšõˆ¥Œ€upú¡…ås `¥ÛÕ+m[úÖ* àe~$Ó]™°ÒŽuˆ3^ ›;æ=òë§Î~oû*Öo÷«ozVÚzWÔŒ€—GÉØíVÚ¹v‡ `¥mçÙg3kd“h ó2^pxõ33`¥íÚ¶[¶ƒëèúS[sË×ÖuêK:èMxyvÏ­ÖyLwg÷™ëªmë+ ™+)æ„kÇÉ놋ΞªÜ53`¥êS ÊÊXIqÇvD,pT,Õ¼v"9°®‹ªÖŽåÖu—\dë\?#`¥Éß©neVÚ¶ô…0`¥é”ðzZ&°6·GMo‡*+íº»Åg ú/–‹ªbŒ”Öuk}߄ӯ3ùÕÄ„«Žäן’Éï“vuQ_óxùUÚ.Ùw£Â‰ü¾Òù]¢ªì²1ª.“ßçyÓÜίËäW—]g?Z*Ó_%­ÛÙÎAþUÚqöõ"à_OËøw‰Ê~‰kW`–ù¯®[®î^þûEZ™uÏcéF!࿺N-¡®]ÿ¾ÒÖ=— ‡VÚ8{üWIûÑýÃ+ÿÕ²íµuËð_]1Ý=ü×3‘ù¯îyžÝüWiñá×ÎUÿ}¥%þ«´uœÔþ«4é¦Ëü÷ÉËÜM\ÀmÉüWiûÜaÀŸ¼ÌÝ?$ó_Y]œk÷¡ÉüWWiW_ÙÀ•6k) šÛ‡ñߥLC›ã øïó¸»/Meüë/—ñ¯.‹¹9ÿú-þUÒvQد¦³;1þ>MÝ›ð÷õj þ.1ÎÝ[·~}ÔÏžíD•¶œÝ@ôW·:®n ú«ëæDqç¬Z`2ýUÚ¶ŒµÒL•ƒSŒ›[µ‰þ*MÙ-ÈXi¼6ƒ>à×+$¬´}ÀJ‹Ú×lZ3~½Â ÀJZ÷n¸¬£å×Ù# ðó÷X“ÎXiñÚÍ¿X÷<Ö~ °¿A†À¯{&¬{žSwÐVZ};Vø•–(ðòBŒÅóL_åòPàWîÖZ£okãû›e ì­-c`¯'¿®KøI›ûÞR``¯_{]ÏøI›ÀŸÊwuW=€à×u{óI Øë^&Áþ™û-3 övI°7ŸL‚ýí2 öú•Iðëy ûûeüÔ½(§6}È(Ø{•Œ‚½–•=j8G?¸(Ø2Olu(XWîg?a‰(ø}Û„‚-?DÁÖ"‰‚ßW&üNL(Øš&Q°µt¢à÷{&‹,mxÜ^*=J(ø}Ý#Z{ËŒ‚_wÍ(ø¹íÔÄE$Áï»®9¨‰þÖE=$ÁïGn9¬ÝF…q,% ša)$ÁÏ)Áýho‚à/ÒRd»~>_¼îÛ¡ÃäÀ¯ÄÌuÃcïj'rà×+düN\r|ë·]s€++2)nê…‰¿ïºçwUO×üÚ‚_w=rˆ“‘ëH9q`%®w÷~~_x§(×Ò2VbŒM¹Eü\¹tK|ràç%û`N üdué:2bà÷#·ëÚ]VÚrô£-ˆ_ež9ð;1o<ˆÄY‡wÍ 9𫂬·}Áo=ðû—yš ˜ XÓÔ±E<8šèttË a9;鬜z€0™°º(ÍoHÎLx.@­3ÚÌ„çÇ;0 y{I‚ fm®§³ƒ «‡¾Ú±ÒDÂd(@ =„9"VÕ¸ÏæÝG$̘ˆHX;óGÏ7ޤU¾”õl!a Ïri´8aŽF$Â&؆(8¢‹Y³Çjµ QpZøïRÝL„=/Y¬¨ÛnïPDÁñŸU5¡Ì˜) –9äÙ5%ËPaí’Š‚oyŠ -DÁòv˜ûR-EÁ·Æ¦¨]A|+(ê‹mTËÈOÖQA,ЏyíD( ~âÉs€Ø, V<9ì( ¾ey·ó ö¤$ ¾3¬eBü|à!k€,X‘´ú:}†,øÖÎ}­±]û‰Å-_ʸeR@|kFïZ§7GÚ¬&RõßIK[·¢.Øï™uÁñîÒ_•Ë@¬ö© »*¡ ö´Gì¿ÌºàÇCòj.·ÔûuY¬¾E£TÍ tÁvtÁê@4O¬³cè‚/øÜÍP˜ºàKÓ‡­£/è‚=-ë‚ýžY]ç­\6¹nÖÇ=̨þ VÒ<íµempáÃèðüÌÜ÷>¬ê`ÿmVk½ûš(ÕÁ´¶¥:ø’áÝ"Kªƒ­´ Žç »Šƒy˜ÅÁ:²Ofⵓ…88M§ÏÅ!Žë®¹/SÏ;o5QqëºknÛ9²6˜B¬ýEëÖ·4@,ǧóhOQÌó© ¶‡68§[ÞßµÆBìiY|=^Ê}ÄÁV kºtþa¡ðPËCX#rmâPÛ»C¬OÒí®.îÇrﯣäÁòÔÒÊrãáYL÷cʃ-ëÐË y鋸Ô ½)ª.ý äÁW)îjwNyð¥?]¨âÛ¬¾4Å×®#1¢€‰ÛC!l% …0œ!–C™v•À a5S"Ì>©–'´g«‚*aOË*aüº2óáWâBñ3~%f> =‚ñáWf3 ~Ýõ`°‹r ~%&@üJK€Ø² @ìY!ö¬‚û]3 ö´Ì‡_7Ýò2;™{/>ì>üJ<ö21óáWâ«ü6 ‰9i‡‰pÞHþ‚ÃØbü l„ó€6‡áĆudeM•ŠB4¬á$&‚UAѰijS÷²¤h8ƨ˜šìaC4<éÚº£Áakݼ‘ªa3BΪaíóžú™H„ÃBkÝlÑàpLg¤‹jv€Ã·ºâ~Vá0ïI:|iWd;~ŠpX¨§á8,J°¶“à _Ú.ÕVþL5üÿóö.½žäØ‘ç¾>E,¥EÅø‹¤ûV€za€n`r'µ‘TTY óý1Çè|˜oTEFV^AÝʼ'é¯?~ø£ÑŽÅ+ØWÕð i_†!Õ°¨E5¼Ä¾S5,1S [LTÃ’ÏᩪaQ7›jX–,L5líD5l1Q Û1E5líX5\°hXSŠjX”শ#ŠjXcªÖ«TÕð#Õ0æ[ÓëPUÃ[k†MŸª† TÃcs°ª†õÖU5¼ÄfZÕd›Kè°=³—ûŠjØbL‡±zuO§:¡ÃxÈ{Œ'* ¶v‚‡íâÛÃ!ÄcBì1&Ä~>&Ä6 !Æ+ŒÝ‚ ±ý:LˆýtDˆý©0!^c²ü !öbë'Bˆí|Bˆ×Ø“¾Ñg…Û¯ „Øž˜b?&b¿–µ‹ !ö[fBlí„ÛcBl—'„Ø.O±•Bˆý|—ªä|Lˆ=Æ„ØÏÇ„ØÛ!öÇB„Øž¦bûA…ÛSBì1&ÄcBì1&Äö4…Ûí !^c7²ü4…{Œ ±_çÝSYþ´!¶ßF±Ý™bíšÊòU!¶_U±=!Ä~ÌëÛ©¬beMeåЦ²ü\˜ÛãdBl¿b;¢b»!Ä;4•åó !¶'&„Ø~!ÄÞ.i*ËOSñ»y•cMe%V ±Œ ±>H#ÄŸo ´FˆõY!^Zž–ÏJðúfBk„x 2!^‚Lˆ— b}zJˆ—†äµz#&¶ô;!^‚»¦¶<4·•s !þ xcà5B¼“æ·òS !ö$„x9lÑW[Þä¸FˆýÙ!ö–BˆýGB¼Mtõ°§fºrBˆ—–Is] 2"öG.ˆxiɈx Þ’ïêQŸo%¼Æˆý.…/AfÄËaMzå± %^Z^šöÊ] %^Z¦o&¾F‰—–ES_='Sâå!,Gû1Jü+&bò~7UÜâ8|Àmµïà2!1ü!Ç&!s˜ˆÛÞÇns˜@V6Š@˜ÃÄ… qªwÍa‚I‡ 2ü\&ÈŠsq˜ÈPáΊtdÞ¦îÌŠÅ}QY±˜!šÃDb»« Ía"ÃÁuTKS‡‰ ë†a·¬êL¡âYµXLç“YL°k‘zë€X‹›‹ùKhˆ ‡-ôú oð¥ÿâNŒX†´²Óc¿áˆÝpDn½Uü†#vA[Ù+û þBaØA*û Gì¼ÊØd*~Ãbý¡vÃÊB5¨Ø £ÙqMŸb7±ëÜ»Á´Ú ëŠˆÚ ãšÏQyí†ÕÜCì†BI¶¦ž»áˆÝX+î~Él7±tÏ*^â7ŒØ>Ëq‰ß°·c¿a]dQ¿áˆ¡Š]ߎ(~ÃÈTïYAKü†½PbÙYAK)ñUÿØ ¨”X–:%–•£ÄJi 6ž·]îJ‰14 ¡¹BbY 2Hl1ÄHü|=§¼AbY¬2H,¾2F‰XŽý J‰í˜L‰£Ù>‹ %–5.£ÄJ|ÃfpTøRJ|ClÑ7T%¶vB‰ñ¡œõêkH±†Ë"!b‹ "^b/Äi`ëˆX–î [ AÄ7LefÉ6AÄKŒRgY4D ›c*õ&ˆØŽ)ˆØÚ "¾á…Ø·f"^b’4?yúRDlíÛí)"¾áß óò¤"b=›#bnçˆXb†ˆùZK;CÄÖÝ’Â1/£:"–˜!b†ñŽˆå|ü熈%fˆ˜åˆ˜× KÌ1ŸÏ1CTGÄrLCÄÒαð\CÄD,°×1SnEÄ2DÌGü»!bF¶†ˆ¤"–Ø/@ÄÒα\§!b9¦!bÁÇÞ¾(VDÌÀÓ1Ç  z5D,í  {5D,í Ëù Ë1"–vŠˆy41D,­ sL±.G;"–v‚ˆ­ bY4WB¬!Ä>¬!Áá‡kèþº^ÚóM•ƒ£á5H¬w:˜ÁѰ {Pа {0©ÚA›¿©wp4ìÁ[c4ìg4l  ËÞhGÃÞRа·4l¤hÉPîú_áÂ(ì(ì—"Px RRëÁ…å:„ã·åJÛÑ”{K!ÂÞRˆ0_â`‰ ö³ –f‚-B ­P(°4cì­‹K§#`k¨Ø[ ö–‚€=(؃‚€ýœ‚€½¥ `o)XLQ{KAÀüÌÏåû1‰?Öbs{ü_Ž6Á(èSÿñOï?¶ê ¯ÿô_¿ûÏáÌ(L÷EÄÈÇ7ÄÈ_ÊÁí_xÒ¸¿ŸéžëSîñj¤~§í_P¥o"Ùßü Ê6Îüܳ׺Sã—~ÿå“ï¿>ô÷goÿœæðsž;;þéSÏý>ðÖåßþàüïÿøW®£Äôt¼œ× S,FÜ1É»#÷Kú§ŸšSÍï÷øßø®/?ýüå_ÿ!Æçüòo_~ú—ßýóOß{¦3’ó£äRJµsúÎ3]?p¦È»#U¾îý©>ßy¦ìgú”1rŒÌ¿yiƒñgŽ‘|Ÿ0¢‹‹ŸöQ¨…ú°tè—!ÁOºžIòÝj˜,×ÃÁOºÀëMûû']f–ؘtíÇz)üÜ÷ä¢ïÙçôYœÝû¬\Æ'÷Ùz=Ögõz>·ÏÖë™}V/åÓúl½ ë³z)õÙ¿ýåÊ1€òà>KÌírÁÞº;ÅÕ¦õõ}Iïwëõ,W‰ñ“sÞ|û,é×åƒ>®ø§{=ËÓϲÿÊ›Ù2jˆpJ;þÚi~ÝÝÜùÀO ðÁ#g9üfþÆ2T[ÚSÌc¦.¼!aòSàÿÔüûÿãçŸÿðç?ÿá/ùË~ÎþŸÿ¾~Uóå»¶›bÄþz¡Ýµz7øF3W_¾óZŠ&‚Ð8D˳¼ªžçȳÞ;ê]p HR·D0ޅ׬ç-æBEójðæ¥îkö¾!Žet‚‚¦t3è&^Š‘ÒÌ(LÜI‹êˆ`aG©ÀPƒÏ>`Ë/õ^h7‚7yºˆ<È<™ºHy†cÊyŽmÙâÁm]ÄÚ¨Ÿ4¶e‹)y6v9±RÕm #ˆÆj¢¹£àØt÷ñ¦¯o7¨Æþð½ìP3­Ë”ÿú‹ŒÔ½w^pÝ»{9´*«i@‡-COÁ˜ËœÃ¸óúzÄýôG»£Z0n"FˆmÚÞ÷º'ºuæ½àYt£›‹3q¼„ª¤Íº6þ'¦­/ï.ðÃÒtG¹ëé?ÿ3Âv‘ÑíÛ D ‹lû¨ Á‹–g¢áþò"üý¬0G G‰Ü&Z›ÁÒ<¡r;`<¢A sþzæ»{;E°ÐâL4ÜÎQM9‚u•½õá¾ã©v‘Z NïïxJûØU±g®Ïx»: Þ‡3¼^·îšÁVh¢eºº­~ÄbpÈ0CI‡hµÅöƒæ VI¯¡}«Á{ Ã8jÞ÷î© ­Ò,-Ó×k C¿žL 5ÈöYÒp?0ìõGÝ˨WYƒs±&_ÑÃf5¼ý¸£Sõ¬©UchrL„ýĆøÞ£E)³ä~î´b“ã)=CÏVƒOŒÑ2ªá;‚\j®ØDË&ÁïºwcTH£ _0ÈE÷ó¢› $;«Ùíg‚]dïÇvóœ+6¸Ú3Û³ 'Ê#Ž|EO™õÚö³ÐŠM¨ï‚¸½Ž1×¥÷Yën?Ÿ¹`g¼Ž«×؈Wn› 6ˆ=C—Xc÷‹ãÃ}]øÚ@ßÕ¹`ãOý:*@nùÀW­ëú"xÒ‚ žú1t‹ŒoÅ‹3Îqsl®×,7’¾æ1ç¸òm˜$D0Óª=º«ÖÛîýxgÃtî×zÓ¢MÜHÁ÷¹¿W|ÆH }Å6~|ê¢M†ü—^©+ýÑ“÷š•æ>§Ömp)ÇüŠï騸­'cÕ|Èûjp®ÛÄu¨øßS s0ŽGS†D/Zº‰snÛPF0ÑêMÆRÈÐE0ÓêýT©ÐúMŠTê:¹œK8|ÒØaÄ£%œÆÇpøRãßÇ*Á>”Š;þ}¬âÄWòØíõ¾æˆlýaŽÈq“qWÍðüýÉGG¶§“OZÄI’uáÛŽwp®áxßÀ84dÜcbµœk8þÌcœk8~ÊBK8xRÏPöíõ34d»É‡Vp /:‡Ì°R_?Qýö>ŒÅÕç‘$Lªá÷VŽ™ã*®{æ°qR(E7ˆøHª‘˜ŒÔx霩qô®ç™» Ô‡½¯!eÛ‘\ÍÔx9g¡Ô8Ή§RzÒXnJ—sÞ”Çañ)J.ôá8bO À½VÚ8g'–'ÉçL£á•†òk¯ ïŽS=ýÌaQti ǸÖcnᨳÆp-Kº¼½–ÅÃq´,ÏÐŒìÕ=mŒÇ¸Ë{(ìöj ;Æc>(¼6ÆãhÉi æwÍÌ8Ƨl,AWê1'‘†ªl‡éõÌŒd¢÷L›Ÿ­fÆ8àÖ(k1À1cTÊscP5#Ÿƒq³j hl+@.¬zmI¬A 3à «^[«®W»OA³ê¥!±jÄNÔ™n AXu½$6 «®-÷gVV¯§$X½<Þ´áA†ÕËJK—ûÑÒî¿Ì”ÑÕ°Úx·Ø@­•µöà¡ÆŒ æ[c¬‡åXK•[%Pn »6Àðö¾(·¶‰·rkÌKà>9$ÞŒ® ù(ºÞñÍ>®ÉÊoIÉ9³tí“KAמ’ ºvÄ"èÚFLE×–)ºÆ—!“Uе¥ Š®ÕÅÑÐõk`AÊqF×ÐéQ]Ã>ð9¶Á5]«1à@×ZÊе·t]½I)’jõ63Mµ‰ü]«Ù ±ë V¤Õeµš1™²ZlùLY­nqÆ®ÕdÒ”ÕOµ4œlŠ•Õñ.Ýe®¹‹²:^Â'Í¥?UVû刲º¾ø÷xcUZ­†f&­Æë›I #Òj©OeÊêÀvŸ¼L”Õq'%%´‹H«¸Bzt¯èzù¹DZo»çz¼J«Íž¤I« í$Œmµº9š¶ºú¡Î…aÕV«‘i«ÍtYµÕj£hÚêê0T&Ÿm5Œ„î¹ø«ÚjX ů:À¾h«ÍYµÕʶ½fYµwF–Uãw8æê¶Èª¸ÂL¶&ªêÓÈHš›Ù²ªª½[ˆª¾µñ›>Í ·ˆªá¡›°bz ¹àøõ°ª¤yF‰ªÚÌbTV­CGäÿCβêå‚XVýÔiÍ5†Ö&«ÆuâkÔ?‘UߨíÿŒ¤YeÕpºBêÛõجª~°ßè™XNTÕñÀcB:¿Ž¢ª^Nɪj8oçh¢ªöëaQ5l‹ÓE¨œUÕZ|ÛTÕ0X´ìK¢¢ª¾aàTFê+ªj¬ªŽv‘ Î@TÕqƲÒþ!Uµ?‘Uûo)²jX¤¥4·Ïˆ®:.(F¿‘Ûª®w I^äEW ³è˜N:Ÿµˆ&f6aµ?<Vßð}Æ\E•ÕÏV»ÚÖÇj(«ïêh¸·F¤Õð€»¶™8‰´ºZoßc©ÒêxkâY§J«ïêRu}j"­†]Ý€¾¤!ÒêžXÙîãŠH««-Ý>H«ol,Äœ¦=5‘VÖb›AÃYZ F{G:À>K«AZwJWXZmE†EZ7rœyîiµ?‘VÃÕüœz|QVÇMÞ' 8DY J¦¬^„ÕxmÉaµ?WVc‡düÌcg«­ºµ*«½“‹²Ú¶zª².‰)žÏ€Á¯²ú†i~â.,fqµV"®¶g*âj<Ãü̽U"®^ŽÊâj£§N½2‹«½'‹¸–ýI[D\}ù‹3"®öm§"®¶’¿ª®Æaq›ýÃËêjØý?1x™ÔÕè€÷Ô©¼Ú6ת¼:î²`#`û쪼ú†™þT>¨¼ú†#!‰E^çÌÂò¼:ºë¹cÛ’ê«ã§ÎçG©ÀÚ~Xk¡eÕWGÃ\ÒØ}¦k–šÀ¡H‚Íeµ½«*°ÆUb²«ÒE`mÇP¿µ½*°ög*këç*°Ž1/a‰r€wXûcuüÈèE=ŸSµuHXãwÄu—‹Àú†_;¼.›¨›ÖxAž§×Pu<Ì.ú”MÖ¸K¤úÛ‹ÀÚ»²¬½+‹ÀÚ_X_U>L`½–ÖËaY`}Ãq*U`í=RÖöQõ]¡r¯1Öönˆ¾ÚFX/Çc5n6ï³À¥K°£i°pXdZÏÜ, ëå°,°Ž`"¯M"T`m%XûC5*L.Ú¢è«}$`}u5ŽÐUÛ¬¯öq@ôÕ7µÏ±åUõÕþŠÀZsÕWûs}5| °eº+óE_}ƒc£©ê«o$çzrãÙ¬¯FÝ›;†ž!1g}µ¿ ¢¯öÛ}µ}ó«¾:ºêjñ.£ÿ(²Þ²Þã¿ÿE¼ú€:W½ÓG åžž‹ˆâáC%$ rÅD—@•WoÀã5Ο×ôø`\½Á»ŒlBqµ–¦1\ XŒá¡ 슫µf½€U§?b¢–þf¢víf"ŽÏæÿ2”ç®3õÿˆgU®¹=Hý?ð ÷¹g@ý?عéDý?`‡}ÎêÿOÒ/‹ýGÄVä{,ö°æŽ2d}Íþ$Û‡wÛ ½æ­Úâÿa4BYõº%Sí¦¬V혎¶óbâàH @"x¦2…Rʪë”wJ½”Uƒ¤]SB¤°Ú¦Àbbn5«+ŠÕ2VoÈ_¦jG`5€Ì>÷Z(¬^‚« ¬Æäð˜b…Õ˜W=÷\•Z½´dZ½´dZ3Å»ìC# ´ziÉ´úÙ+ÓFhõ¬´Úÿ*´sÖ’†æ@iõAÊÅ=È´z9'Ój>lYì;¶…V;ZýArq2­^‚L« ³vÆÌÚc̬¬ ´^2´v.%Ðz9,Skà lÌ죖Pkç€B­— Së8çg¢¾$Ô„Źay"ÔÚƒL­qµÇÜʤÔzáÝ/µö;TjíA¦Ö¼¢³ô¢ŒŠ­—–Œ­Q½ÛÜ)lm¼R°µß„`ë%ÈÜÚéŠrë5Hɸߧpk£RÌ­qÔH€Æš®pk£}Ê­Ù2·ökn’‰(¹7ÌÆ­åF”[ûa…[¿k¬Ú ·^‚­Ý*·6£5çÖzAÌ­qX–@¿Ø:–gìÒQlí”X°µý‚­ýî[¯dú{±5JœÝ·C˜µÝš kûyY;·7dÍ̵\ ójû„Ws#‡Õr! «Í"Oi5ÆÁù+)¬ökZm?¾Òj¹RAÕv>!ÕöÄ”TÛ¡¤Ú/FHu4ê•E™TK%Õv)Jªí*ªö³ ª¶ßIQõJ£ UÛo¡¨Ú^'EÕsŽëùUÛ»¦¨Zß %ÕÞIµ´SPm¡ïçÔúäŒS{P8µ^驹§¦^lZƒÔÚS R¯Aʌ儂¨§7DmSD­Ñõ¤´ØŠ"j¿AÔdD½œSõ|x¦Aõ7Bm PÛµ  F¿tO ÔkìæXjo)€Ú[  öke@í½@µŸSµPë³S>­# ñi?ªði v>­«­¨=(€Ú.EµŸMµ–õ@í‡@¡9+Ã=Fµ. ÖEnÔBmAµ]j]s7BmgTBíg<5'ÖS2¢^Z ¢ŽsÓÆÁµ_ jo)ˆÚÏykN¬AAÔvXEÔvAiy0?f²“ªú9T×z`meuåÓº½SùôVKÿ]3(Õ*{5>-5ÇÝ¡Z•‰¨­>¤;T{Kq¨>PWyìŽU‡ê¥¥8T»ñ5;T?ñ¿ÆÕS«öÛôÔ¶¯ZõÔ¶ZôÔ¾%YôÔÂøºOJEO½ã{¦FOôÔB¡Ú§û¢§öm¾¢§FÈ|¨¢YOí»™EOí;–EOà6¡›œÚ›¨ˆ65µo¹V#“¾«ˆÅĶ1«ÈŽM‚¯É ăbâ‡+o)V +ýkŸb‰ ý™ @l£µ€xP @l£µ€xK1Yƒ7ï6× €xP @l#·€xP @Ö y€ø9ÅÄÂkbMÔÄš¨ þåœ+%jâ-Åăbb7¨& 5H& Š ˆÅăìb1qYcäâAqñ ¸€xP\@ìS5H6 ~X±ñ Ø€øaÅăbâ‡ëAÝÄÚ¨ ˆÅăb²Žå&Ô$f]XñèˆX\@ü¨æ¢Aqñ ¹€xðæÑXž›»€hP]@$f. êŽa. êŽa. zTsÑ ¹€èaÍD[š ˆÅ¤yÜDO÷ý 4 Våoj2뮉±6ïj$¶ôwqü¿ß<ø~ŸåïbñAwú»{øMˆ±5bOùg±ó°|Eí<,%Q;o)v;²Ç;fÍuõj‹æº”\w<›[Ó\yÎâäAš‰ÿe×ì–þþíÌV¬;–à©™­¼šbÝáW-Ö%³Õ`ÖÌVƒE3[½ [3[mùXfkAIo)&ÞKlÿvŽ«ÞKðüvŽ«Þ¿™ãªwÇÌšãrßTïŽÙyÔ·Cÿ®™íT™ÿþuøY÷]óY šÏò[¨F|žSSÙñ÷K³X=SÒ,v4ÊšÀj£¬ ¬‹&°¼5Õ࣠l¿våà?ï’¶Ê.†ÜæÐŒUš±ŽF—&«rÑbÃñAP’ÕqĬyêø{ÑuüýÖìtüýÑÄTOþhbÚ5· þË®éèøû¡™¨^<6¸Ñ©I¨6º4 Õ`Ò,t1kªŠ& ¼5õàý- Îdg%¶Kª±CPJªÁSPÄYci™4íUL5ôï¥j¥Á¿-ãì,Ùlï þûr×?x­mŠw4ÝÚj›‘0k¦f›qÂÀj»»Ÿ³x=c°¿Gm'sÍPw]sÍp峸f¨#ëâšeoÓ²›iÆùõœ›WÍ3CM Í3C‡LñÌ0ƒ3a¼îo&Œ×|”ñšŽyfd˜‹¦^RÁ<3b¾õjÏAÕ3#Á¡`îÎSÏŒ¥N*™æžLø‡¡†xf¨0Ü<3`̹Ýt‹g†²~óÌÀ†ù›ª-6ÏŒHZ*æžèÈÛ=/A¦¼(;{S½÷ÌÕ!GWÞ¢cõ­”ªC~°²0öQ™ù†HyVRò ³‘ô ÓŒ˜= À¨Ù­8D‡¬‹$ªC¶þ¢CVé»zfÀ4e–M1ÏŒÈþaÝÞ÷Ú« 9C/LÆÌæ™±Ÿ³ü‰yf˜ƒšf`F<˘i†iñÕ4#ÁpdZ"«i†ûb˜i†¶Ó ì—šûºi†mþWÓ Óö«i†™8¨i†mPÓŒkÍÓíWM3tË€{fˆÛŠzfè2˜yfhýWóÌ07õ̰-ꜹìúîoqÎ8°Ÿ2qÑÁéœaÛ~‘u†œQ­3ló¿zgD§‡_Sï ÝáÖUò1ë Û;¡Ö;6cøòr=ba×#Î~=Í9ÃöbˆÙ¯D3l/†Zg˜‰‡ZgØÑ /A±Î°m%j±c_ô”©©w†íÿ òÒ’5È 5OšV¢AFKìë#xgذyñÕ` ²odqï ŠwƇ¬éø+d¿Ñ c'K¾§‹®hÝuE½3¼%k—–¬AŽ–¨“: (²y¹OóÎH%†»¾™"d?šˆ— ‹}§ŽzgØí‰×ʃ}˜ˆ}W‘ˆ—s²y9, ‘—òy9, ‘—‡pkÅîøôLÛ^Ö"{ŒµÈ¾IäÈ~­"GöU*G¶=P¢GÆ9wDÕòy ²ÙŸJ’Í%G4ÉËaY“¼E“ì‡Mr$¨(ZèîþgÑ$ÛE“쿇h’½c‰&Ù‡HÑ$û@'šdvD“ìC€h’ý-M²’¢I^‚,J^nå‘={ÜPTÉv#*K¶‡§²dëé*K¶^§²ä¥%Ë’í•UY²½@ªL^‚¬L^ËÊä¥%+“— ‹“mœèâd}•…%¯MXœüAÄÉ~X'ë+bâdŠ8Ù+âäž[i&B&Nö ¨“=(êdd ñŠt³Q'ëH êd±:Y©âdo'âduî2qòƒ]üÓ/[ÅÉq²VÄɺûÔÄÉâdÚd?¥ˆ“­¡h“íñˆ4ùÊqÚ‰wi²ªMšìA‘&?˜òNn•&ëY“&{P¤ÉzªL¶˜“ý "Lökaò Ÿ†áˆ®ºdÝ&kºd;ªè’­¡è’í±ª.ÙŠ.ÙÏ(ºd댪KösŠ.Ùžê’õ¡«,Y·›,ÙzœÊ’ýND–¬§TU²]kÚý…ù1ãŒ?Öâ„ñ._â%x¾òYÿñOï?¾Ó(Äë?ý×ïþóGàx<Øç‹ˆŸo‰Ÿ?¼”"ò¿ð¤åêç¬wú›Ÿ2º8Pê{«ýßtˆÁ¨~ók€Ô¸„÷_>÷ "‘ªk‚íoÿöÉOá}öïïßÿ%±ýàçüíÞþÔó·çÞÞ€ö/\Á{Ì?þ•+¹¯ Oìÿ"ÛBiå3g|äâ¢þé§fÖóû=þ7FïëËO?ù׈û¿üÛ—ŸþåwÿüÓ÷ž©ÔÅ‘øÄäåûÏtýÀ™žtÁöêSö3}ʘ9Fê߼ϴÁùSÇL¾Ï“Öì?í#‰+¶YTÿ ~Òõäô<Úî ½~ÒõœXjüàRÚß?é*°ó­k?ÖKáà'¿';­õ~JŸÅÙ½ÏÊe|rŸ­×c}V¯çsûl½žÙgõR>­ÏÖ«°>«—òQŸýŽ/7¦%ùŒqÆ¿ä;ÅÕ¦õõ}Iïwë—å,WA¿œë„çÛgI¿î,GFÍdTbM÷z–§Ÿeÿ•7³EoÜÒeµã¯æ×ÝÍLkÆûWÎrøÍü3Üû×çÀ :zWNµ4óv”\üø?õÿþÇÿøùç?üùÏøË_þò‡ŸóŸïÿþŸ_•Á|ù¾í®'ä)[ê‚>,öç×ûoùÎoiòË:-Ä,¦á1«2àŒÿxâ5µô<¡yçá{uƒÅ»ÕM¸œØ6ºs¾g•dzjgR›ìç»®9tÐ)~XëÛ§©â íÌ uã •î¯'ÊØ4ÕÍ Í š)2ð<×”¼ÓÕ¨N]í á„‚fÀ¬[c™waòy5ȨrM‹ÞsçÕñ Uß§ó¹cuüjÄãÌ•Uu-‚“#Ÿñl¡ iàâD¡ì;5˜v¢òÐÜc{¢Ð÷@b qT òDa¤'5´u:ͲDçÎë_4+OgéÁ'ž_MÖß"°oí6ÂEJ}ÄQ_ÝÖ Mt w…*Œüy×ÈÏ*žëèæÀ2xy:øï+ªŽCŸ³ú!Û;\짘ç<¶¯–‡@”r{Üq]sµò:ôÈ~qGgôœÚ“wôdÔÆi½üÈ´‡æS;‚XÜKëÈ(kjkM tL­€ãi5>>¿_Sô›·#CôÔ¼íZ¢ØÐE¼÷Œßt8ñMƒ\lZp!8!;Êg’ÑXŠÃÖ‘±RA®_‘Ðò ª‘|t¯JÂÞŽ¼/ZDü¶Ïè»[{ï¾@m—P•i;Gp.;ÇPsGñ¯Dü´­#/ÁBŒv FÆ¿ÑÛ‘1öÅTù*­œÏäþ<“£÷´…:Äjɨڑ7軪àmxm´œ}„ºå|” úÖ«õdhú kK1x ŸP ¸½”¢mPF,5ÏðëßOô†=³Öè¨Ðé½=²<èVûï¥ÁŠ!Ò,ñš>[;•mD†ì'Ïö^e®> tTWÍh±ûnãñƒEï_ÅÑy‘ü Ë‘‰†°F¯§ÆPò]yªŠá}.—¨Bœ£=­´WaWí¨6!NÿÒ>Wá¡ÕúåÖc>Žo~@éa°µc’¶c‰AÚÑÇaÿRšë ÐG^àÇmèˆ9Û0Œ‡¶Å«Ö¤XgdcÍhºm„±ØZ¶QNëL´l‹›‡|©)mâ³Ñ;cNqßöZ 4äXùÅ1žöfˆ2Úì¿{Þ';G=µsoo ¦’ìo´ã?hÆBLwýÍ×\Û«zdt ö±Çdäy^ÔzÚè+—ó\2F¬ÔWú}"9ûoý5¾T[­jØ.óžJ¨ ︑þµŽØ½·QÅÓ®)£ŽwÊ\°(ÏïÙ¾E¯nsQ9 «aïL!bû\zêGf˜OÄ5Ç`ÓFܪïF5Éö>Fl¬2BéˆÜày×ýÏ‚Ò]m¼Eí,Ü^L$ߨ5W®#v츿–X•4}T I¸¦ÅYò”E@v‰–íÊ=†wúí®1.ÈÚÏ^ÊTÛ@˜§ÙYà®ÚFZTÕº§CÆYž±4å—yG.v·qÖ/eYú¬ ‚˄԰½U(Hñ´QÖg¥º¸t¿ Ç'Ìë{Þ ùF| ¯þZÜ$`~ª.l(+{zÖ ‘å–†$ãŒD·k} ŒÞ©B»­ªóm=6ž|éúÈŸºžòF»h†Qæ}R°Yí/d„7dø-¿I—Δ±NÞó]3!Rè£=Ý…Ü?Sÿ`W±]aíZpŽžìúcÁ£è¹.Ô…™2ˇ4G4>e¦3x+z¦‹q¨ò:‰DŸšësæÀ©'WPF÷4—ŒéEOتP»¯¯¡&eË*06õ$"•kV»>’@رSþ[%îm|­Oï:bŒÐQ}Ïp±oä•®¯øØo=Á…ž(ÍzĈ Ý“]ɵÁ?ùE\1z:È+ò¬?{!ÏéÉmFÍ’·v=ôôlyòÓcwËnñ“¼mo~µ÷äVš Û3znk?Í…}=µµt¡LxÏl­— 6ÔWÐ:¡¶{˳ã ûš{^k½ùÚ÷)ê³ç¿ŸžÔZ/¹P? ç´ã`_D¶»{Jk6.}J* B%¨tµv [´>ì±qô©ÖÁp̪Öñ±†ì­õX{,{™0<–ceûôÄm}-=™…Œ8cSÌÛ½ö¹ ’3\rng;¶šô+w­± ïWr@ÅÜGX»’wÙÊûøi}6Ólý:HZ¿\EdV#‹—àžÛ„ë4,裂uû2^|ûú«=b—‘ÄÚ+­KOb±¬Sô½´Wî µ,TT½gÄžžÄB"€-­#ÄTkÈKüÆOLÚë§‹‰ÖP,ùó¬£ç°Ðù ”GûF_' á–óÕò¢o‡Åù6<¢Òbiê+}Ô€Ô¥§°ËùÊïú^….mˆÅW•˜Û‡øb¡Ër>ˆ©Þ!¶{n T.=……þe^ÚLú‚Ê¥§°ðl¿çÞ¯ *—žÂú£¾^¾¼œêœ2ÏåT‘ÝsŒ•§“«¡†W;>-¿bvµµDÖoÒ–žÈ.§Ã§6È¢¡œGK .[z"»œsÂ6ÈÂkįåÛt-=‘õ×'bOOdñ$÷»õ’´O¥)<á±Ápë°½Öc#ãKe8‡^éœf¹Dì6»ˆ¯Ç×½g±~ãéšz˜í¢ÌKcXá.=‹]Η§ yÓ¯ùÑîFæmˆµ—5æUC™èÏ9bOOb—ÓÍÍaÞ‹26‡µ!ÖÎÆ+ þËe”ñj#¬ÿrù´{ú±²½NÏßÚ«\8ði:×… ×ñsH§ CdˆÜ¤oÓc,ü*ËØÆ)X¸²¿•Oˆ§ÎÔ”e‚…!ÂLyV¥, ùf¯ڕUD…¡Mu®x5Zº}{»®Pá*Be»ó.ò¸¶„²mír C÷:rž?ö}¾…ÜðÔ;Kc(ìç Cß‹¥›³©ý CŒÝ]ãá1†à˜} »9 Cáª_F º&(\7—íšl—¨0‚;öÜõGËTø¨¿ìøÆw( …öSfv%PÁ vc‚Â~g ÍËP¸0‚[ÉÃi–¹0–Fâ‘¥Á·˜ W!:äƒm‡´€aÏ÷!¶»ç ÷¨q_Yt›¡0FðÁm‰‘€áã}{G•9â¤tßâ'`Ø7 FðDJض£ Æ€³˜>f0ì[ ?Hý÷ E × å1ð6Å·‚áøª·$\Ø Ù+Ö•wãÂVK¸°•ÇR.lû‡• Û&aå XÏ5êÌu.l5µ Û&_þWȰïÆe4\·Ý¾ß¼ÖØð»—BŽv…·qêX¡ÃµàÖ>'(‚‡}¬ðaß®*€¸n˵l[½…{ lAľ‘SqÝ toÃlC ±oÊTJl|„ûÁÄ^K8ñ$Pì{•„ûF FÅu7 !ŒŠ}K€°b—D ,öÒI{¡sÆ^ýH¨±— lìeŠ˜{=Ç^LFȱ—€tì•„»·¼Àc÷+zlñBÝT[𱛫 ?Æò;VæÇ0ÁyY›g‚ì•®!#Ýøc3d1AÞûˆÎÙ+6 EF0¿sÄvNÂÈV©“1²WúŽ\K=Ã¥E8²W¼VÉ8”èÌšXr­ŠZZIË?Lö®B“½4,ãd/á*<ÙKŸ PöŠÃB”ÌH]a—r[Z¤T˜2‚Gž™43e/G,PÙk” Uö’•kÍéÜ›}©`e/Å*\Áø­ffÂdy Z®¥X‘+ ÌySQäMÐô‹á2‚ù%Ÿ-h…ž©ìªàe¯,|Áƒ–…0{pAÌî[ºûg 3fìO¾!SjsÌk(³×ŽÌìå³™3¯1Íâ#׿,‚šD`žÛþé F½ÄR¯AÀäSG*œzmyZ¯ùѪò¿ÐÑшuüã/À:>_W›ÖìKˆ5¾ÄHz[r"Ä:>Ä‘±÷O‰5¬îùþ)±^ŽÊÄz×1J¬€ ²Þ°‡?M¼.È:n+rç9{dí P˜u<­=&Æc~#̺Z1ÂC±MŒEÈ ÜØ·h:fÌ@¥ÛÎmÕ1›eŸê˜$ˆ]¬+"æˆÜ˜uZ)"f3ÞS3<âdmˆ˜ÍéMEÌf_¢*fˆŒ0䀫ݠdˆ˜Í»EEÌæÀ¢"f,rÇ$xkCÕ1keH‘1[q^•1;ˆ³•gV³Õ¨V³USVsÌt³È†YÇìäCtÌÎDÇ ³£íœ|KtÌ>O3¦ÞÑÓzš¡¸:¦·¹÷UÁÕŽEWc†'Ý9–ðêåj™W;8^Ok¬÷0¯¾‘#Üe[áÕ~'«AIHÛ3^íÛxuçí6xÊ«ñÄwZ™^½œyµO¦™Wûü]xµÏß…WG0a(í,Nx5N‰Š_ý5^íówáÕ6 W^i89L*¯¶¹¯òj›û*¯®.cÓ¿ByµAåÕÑòtìã ðj›Û)¯Æ9Ë=l_•Wc»Ñ²§ðjÌDEͼº@ö<æÔ‚«—GÀ¼º@‚]ÆRàjÄ+­¶iñ Õ6WZmsM¥Õª¸z ®.Øžñ©n0—qõd\íSyÁÕ˜å>ûÔ³\}‚Ž?ûÐ= ®¶y³âj ®ÆN0|wŸÁ²'®V ´úDb2®Vû ­>‘ gg£ÕØ!0Kœ®6>¢¸Úè€òj›Ç+¯ÖÛT\}à›þe@dQY£FÄÀm7’ʬ½¥È¬× ɬmJeÖvµ*³öÊÌÚƒ"³^ƒ$³ösŠÌÚž­Ê¬ý°¬³Fa«s” 4µU…Öö€Thí‡}…ÖþWZÛUªÐzC~:ÃThíWÂBk[ÚS¡µŸR„Ö¶î§Bko)Bk?§­­¥*­íœª´¶§§Jk•›ÒÚ[ŠÔÚ­J­=(Rk?¬H­]mRk½Z—Zó×É¥ÖÚR¥ÖÜp‘Zó˜ÔZ_“Z{°±kû*»¶¼NÙµåàÊ®usƒ k[ßWvmÚe×KKf×KÙµí7Pvàîî´ÁìÚòpe×KKf×–û+¼^Z2¼^Z2¼ö'$ðÚïSàõrN†×Ëa^/-^/Oˆà5bH†ü™áõdxíGz­û<:½¶ù”ÒkÛþ¡ôÚæSJ¯ý×`z½4dz½œ’éµßÃk1¼^ÊðÚ/Gàµ?×KáõrX†×v±Â®a8{vÎÜz9"skÛo£ÜZÉÌZÌ«—[“Â66Â1¯æ# «¶¶²j›µ*«þ x—U¢¤¬Ú¯QXõdVíO’YõÒYõdV½Ü³j›$+¬öà ­ö‹\íŸ)ÁÕ~NÁÕË9W[çZí#ÐjÿÚ­Æ—¸®o¤Zm{•VhRZm[ñ”VÜZm ?ÅÕ~TÁÕKKæÕ~±ù=IÌd'•õ/sŠŽ/liëÌk)†U‹§ÈMœ¢µÒ’E[ª«FÑȦMPu<«ÐßÔ'9F™9² jôØöÉ úD£÷,¬¨úB!À¹YWm¢1۠ͨBª1îi4pª6Ñ°Ê Qê³Ö‹Šµ‰Ž¯l™ËÂf2Šc-Ä\¢!??GeV%Õñ±ÄË5±úßÇ%:ÞZóqTq‰Æ~ôžÖ?d]ëù]Ó®÷ïb ‚|‚ò±Kt 3Ðxvlóý.ÑðÇ&ݹºD_uŠñ”l³gÁÔKCÆÔðó~È _05ZbOKw2L½´dL½áÄ>ïn´Ì˜Ú[ ¦Þ`,@»ëSû} ¦FKVí ¦Æ‡)ïÖ+–ÑòÁš¢ùó ¦^Z2§F·Ð §^n…9õvÖc÷¹Lí†çŒ©a6”±ž×½ISouÎ<—ÈS»Uº`ê­¦1ÎtL=Fq#G·©LÝIœ¦ŽàÙ¼„Þ cj8³“zœ)5Ú5om]U(µ›¶ ¥FEûB©?NJívïB©ðQîoPê%È”Á×úçJ ZŒà„Ro@õ”~ ¥^‚L©Ýþ\(õ¢Š_¹-é ¥þ 8)5‚mÆJšÒbøÁ”~SP ôw–)5bXaï&ÝB©?Þ\ìç`Ù¿PêÊ£3]í«ÿRj3ïHí<F½Æ¦³ôƒ¡>€}!Ôn /€:á[°ÄZÚꈵô›ÖÒ°kÀ*T·akió½kiø1Ì_¬¥Í_¬¥Í_¬¥±û(Û¨!ÖÒcki«“ ÞÒpqÀûÒÝJØ[ú†‹ïÙ=2Õ[:žÑ í2ÙZÚ¼ôÅZ:b9M0¶–&K}õ•–»JƒWAJ¿¯ÚžÇp•ÖúB¡—Ah« ®Òò(ÄTZŸ„šJku•ö»Jkñu•ÖJê* ”ŠQóYáó3 <¬F®ÄUºÀ0xx-ª­´4P[éÿîj›ÙcÓVº,ÅËñ¬ÜÙïAl¥aŒA¶ b+í÷À¶ÒöËŠ­´ßÛJc/™1°¯tÆíÜ=‰S_i»uñ•Ž4‘ýÄWZk@t_éZÆ<¦¶çêìa%;ÔWi/Y"ˆ¯ôë¨Ùª‡¨¯´ÇØX[s#­¾Ò0LðP¹‡z§â(˜9YØ^‚)³ÿl ™p\¼>y5õ°¢‚˜—!f»=†Ìþ1c¶Z‚˜­v‡úJk ÍVbD8³}Ê3ʓɠf ?k5ï Ûe:Ù j‡Á—tè”jÞ1ܧ¯Œ æjÆ{|:5ƒPÀuþCÔ B±×šiíB 5GWIsÜ3¦€{ÇâšãŽ/Üw±µ€fÜò–î°% 9î*ÕYisÐwõ@Þ,@sÜÌqñ»ñc ù×Ûyàq|`@LìÔÏ#^³cNÅÎ#F­BcÕÎ#¾íCu;A¿ÿô»ÕΣk7ˆíè'ûˆ›ò‘DådÔÍã©#ñ°ãT71uG5sóp“qó0?dq ¦!O=1 F윖çj@Á8ò¤[b@½b§iV)Ô*ŸÃí¤[E³5~¼cšÁ«5òQ ޱõb…"Ô¬GŽÔÅÏâ?p|Kú†JõŸ>¿âkÑ-ަµÉñÕ€:oX™öù®P{P ¨ã§‰wlÒa6 v3h1 FéÙ2½#Ô€úüŠóô43 Æ‡l#1¶PCœ8H¶ùO»Å´øO?¨"Õw¸ÿô´²|d? ³ßg ÇÕ~õs µÕ~:ú½‰œëm"K!=ór›ŒœÑr,e0n^îÿÅÍ‹Ù7ãæÚæ  õŒ›Ñr¨îãf½Œ‰šk£L[5/?.£æõt„šŒÌx*Û5¯- 5/÷ϨÁ µ¨úà@Íõœ”½ j†:(äР3j^[j^ž£æå 1j–gΘY„˜—ÇÆˆy½JbÌëUNƼ>Rb̰"þ1/eÄ\Ÿ„ñQmˆymDˆ¹þ øø±3!æå×eļü€Ä˜×£c^c^ƒÄ˜,0BpbÌkKbÌ‹)?3æå6™1¯-‰1Œy c^n…3‚éÞI²LŒ¹¶ÄÒïÐmcþ(8ózNbÌæ+O=3æõ‚&cFìĬsHz‰1/×ÃŒy9%3æåéUƼ\CæõdD™×“f^[NÎ\O‰ýˆ]›Ï y=*æµ%‘æõN5#ˆQf ȉ5/‡eؼ´dÚ¼¶$ܼüŽÌ›k5ø†¶˜€óRÔ‚‰ózXBÎk˜óGÁg&È~Ÿ„׫%î¼'xö;øŒà ±&ÛíG½¶|ôÚˆôÚˆ ô$ àFúæÐë) D¯·O$zmI(z ‹öR`ôr'L£— b][B–Ô!)óèå‚H¼¹D‹ I¯ç$&½Þ Aéµ%Qé‚K/?'sé58Áôc2½ M/wÂlz¹ØN¯$:½ O¯d-4Öù@Ê:Uc-´ÌQ-ôÒõz=,†FËc*åT ýApª¡ýœ¢†69 ª¡ý°¢†^K¨z¹‘C/-Y½Y½Y½\-Ë¢— ëå·i´ÿžÌ¬=&Ìz 2³öëfýA°2ëåï̬—Ó1³öŸY˜õ$f½•™õÒ™õÒ’™õr'̬ý°Â¬ý°Â¬—–̬—– ­ÑòÜf ÖK¡õrX¦Öæð Ôz9,SkuëUh½œ’¡õò Zûsgjí …Z/A¦ÖK©µ_l^Nõcv¬÷“à#æHñ§Œÿ.á_n€xý§ÿúÝþ_úù"*ìã[*ì/å‡ü/§Ïæ’–>+—ñÉ}¶^õY½žÏí³õzfŸÕKù´>[¯Âú¬^ÊG}ö;>Æ× ›î‚™Q\%¹å;a‚º~¹¶/é?Ævš«lqåq­˜ |û4éWžæÈU–”!Ã\OóôÓì¿öv¶è‘[:î=&ší<¿ò~n¬ÝÝ[ÍtÿÊi¿¿qЧjßÇsôœjÑõí(¹ø)ðê þýÿñóÏøóŸÿð—¿üå?ÿÏŸÿç¿ÿçW¥1_¾o­~Ϩ š¥a¿ËÔ.ÙÂMŠS&“È4DéÚ§æ®kÃ"2^ž)ubÓôGGšJ–8ÜýXÛ×’®<µ*7D÷!`M:tÒŽJ/ðëìOb†ÎÒ±üž+½)íCQâ‡LÇÔŒ`Ï>–‹·Þì‚2Ï1µy7Á¤”¦ôŽnç\ÜK© Çz¦„ÃŽˆÛ;à ›dšÎ1a€èdWP$§;§Î&+ç\{Iø¢v: · üN¯’9Å:¤XmÍs$å{Š"àn­¼'ŒðáÆä}nZNeŸÊ¸-@ ßÄØ©zt÷®zBð>V{ŠtÜê·€¨úZë­ô¾ŠG4ÁxÂÂt§¢¨÷L%¿SõÊëõ@÷-álÆÏ°aÃNÛ;‘âßÇ‚? °£¥íeH¨qÝéä]5ƒcc‚kç°‹H?FÿŸ«òün ¢zã‡þ$Q¾g°A–©öòVr?™*«»*Ï…ñzwçØ ™°ÚF×å4Ï\܆yižÐ3Ô}lŃ/“ñçí˜ Ôvº ‘`ž}•;]ÆnÔ>²Â¢¥‚o¬Ìeä¥Ý=ŠaŒ‘¦(&ïÛ\ ÖŸ±~؇U?$Vǰª£AÞùWa!Rå77‹`™ ³¨¥¾OùeÆ–û± ¨.â_ñ±«$í\]µÎš}®Ÿ¢<ý†5¯w´Ëñ©+¤¨ú’Ö!chžk ¨}V÷,o­]ž«œ¨ÜþJiZìžë˜öMÉXþ£+¾SÛØæ›ã;Õ×"Q ‹¶ï&r„¹Úãaø-öçyÂøÅ£o«Éñ+†ø4`‘¸í‰Ègk‚öÊñ«~¨©û~íñ…ëzþ¬°4Æ×85Tÿùýþf,÷Œñ5^rL™öW7š± 3Æ×Ïm NóUæú´ ÏTÿÆåÎ4}…£‹Ì%2\&ê¶ý9s —‚-Æm§@ޝÔXç‚håí÷­]šËY°h¾Ð-[׃Î}Œ¯w|.ޱ?c…cŒ¯£¡b{7gf,bŒñ5~‘ÝäÏ3–)Æø 9þô>é|Î…#CršKCþ+ä< ž¹§ ;ç{.ï@Ë3 ÑEœ 8~weK4þ yæ7Wj|¦ øÇöt¿ô¿µ%Àh6N”×ݳãƒ_³X‘ìø€…rãøö-ÙqUöï³&…dǘ_p˜h2"Mñ’A>qöÜ™Óc¤xM»­éqÆFÙ¹-GÒc¸×lØåÕËù1v–d´[Q[I±y_°nŠ#òŽÃà36ZRŠŒm9Ç1·KŽŒMDW9G•GÉ’w”]½æ«(i2v<Å㛓%Q†]̶QM‰²ûaH¶\ý[°K¡OK8]®%ÐŒôŠóåŸÄ¶ácI˜aQqAàÖ%̘ëìHÚžVɘmCt˘Ý(Dr潪ŸÊ’0»adÌØ”²Oûý8ev7 É™üÐ(ÿÜKÎìN’4Û6iÉšm?ždÍнQ%'Éš}÷¿¤Í‘÷] ·¡[ÒfÛê*ióƒ‹Æþ›5k~Pvm»z’"YósâÛ:êHÖü@Àxw³Iš#ÙarÔßNšãt…*5IÒ§Û° ¢§Ÿœ4?èPÓ‘I’æ8ßS?¼wY²fŸ/sæy¦àí8qÆoñéè?zÍœQ”9Móḻ&7Z2g›ysæü $ Ê ÜkâlŸ?Nœã&Äæ=)“ÄùÁnÜQ×Aç§&vݶ@gËÊ8q~ŽºÍaf¹œ8[‚È™s<•K²jΜã}Øï–©rÚìßoËš1Gí;Ð ñž[ì5>°¥;Nïü’@GÊý„Ã÷Eh´,Ïô•Úö Jþ\7?n„79öB÷’?óÞ(Í z«¨Ú‘¾äΑûeÚ·®¹sFz27ñjòœ1¤c÷¹&ÏVKR“ç ²Çél¥Ésü—ØýwÌš“gŒOÇ,ã$Ù3„„Gœ¼m•—ìÙí&{úq» öKúÃ9ÊK¶íéšBûb§Ð˜rœÏä/šCŸØ÷ÁV½WrhØ!Ä hø"j}Am¹èQ–$ö1¢\¨ªµdÑñ¨‘dÅ/Ö“zJ£LÜ‘%ŽŽ{â°ùr]ç9¨®Öû'ÒûŽ_œi›õŸÒjuSZí1¦ÕNÁƒ©Lβ#vS•b…Õ¨s‚ÛeͱH§Ù‚ÀjT7†sSÙL« Ì ­Þ‘O¥¡EZ}@R5=Wï8Æ=–“W›4Jp5~®}¼B«wØq–n§«´J«7ìÐß{r)´³]šÐŠÎÃù =lµ¼{f¡BU ®Ž'š¦O• = ¯Æ[s°ÐÃÖV…WÇ Ï¿!a^mlIxuÁ¢ÍSTè¡÷ ¼ÚÛEé!+Š« DZòiÄú*ÓŸKio¾æÌ^‰µÝ‚ëx9é§ÄÚ~!Ö~L&Ö®abíÇdbm‰µýBF¬2É&±á…é<„k2±6Î(ÄZ„ˆ.ók0`ͺáÕ&F^í ÖX˜`ír Ö¬‹t`-×òÝÀÚÛ)°Žêµ÷‰¨ëŒµ¬§cÖ¶þbÀZîA€u|¾öéï¦ÀÚ¯å{uBŽ•ÚZ¨ðj±¯¶;(Éþóß¼ØeÏÄ”-{ޱƊ'Ó3*Éž£K¦^WÌTÐñî³Xº¦Î¨kƒC§Î& p4¶È µ£ª A¨ Z'rš8{v'2hõ×–¼ÙVN5m¶)µ¤Íl7.)³QɘÍÖY2æøFg$Ó]ÄË s\%ÜzJ'¢œ/Û,OÒeO;D ­ I–m‚+RhËD Ÿã+Í_¯I¡m½[´Ðú±-´êøT ­XC«Ò\´Ð IßÐ_ªZGU ­ïºŠ¡£Ã¼eÇVY‡œ*†¶cŠZeº¢…öf¬…¾ð`'-´Ýºh¡u v-´´-´O´Ð²P«Rh{b&…}¾äÉ~)¬…Ž¡ê¸ üRžlêjÉ“U„ÙòdI«:Àõ‘„Óä IÚ³æ4ùÂë;ÊÆhš_¾kújšÇD§K08MV±¹ë¡Yh¯iòÅÐ0Æv=ô]¦o¦¦É¦Í–4ùÂø>Œ.5O¶û“<þÌiÔ·Ñ<ÙÉy²þ¤š'Û=Hžl_É“õ§Õ<ÙÉyòj6ŒA5OŽñB·>=’<ùÄÌn’xΓíö$O¶ÇÙòdûÁ%O¶‹ç<Ù›qžlÚxI”í"$QÖMš)ãÕ›†xš)ûù8SFÁéM¨™²nÑLÙ®E2eÝ¡™2˜ê6×3$S¶½ ’)£Äâ…ôX3eÛ÷ ™²]‹dÊ€Û(º¤™²“3e?&gʺIG3eÝ࢙²=N•mO„¤Êº×¦¦ÊruŸd—n óÎþú;nÆ¢n‚’¢¬ sµß¾ç4Vs憛¯EÝš3_À »VµÑÌU‡q³&ÍËo°o¿¯5i¶ÌŠ›ë{뎱š4ãCmÃIisA‰¡YeÓäºÎbš6Ã:ÃzÜ ›ë]Û3ç*Á{>ÔØ › ÖjâzººEhódÚÁƒ•·ªîH(4ž¶æf§´¹`Êñ Õ¸–ü¹o¢üB›½%ÓæÉë\êQÚ ÇV*lÓióÒ†Õ~¬î@‚øÌz¨ªîð ¨;"¸å»|”H×XdæÝó]Åjæšœ§%•F0QõwàzNTàiRľcWÄÌËcM§kÅ.úÛ#⎞LºEÝáÚQwà‚ö¹XÄ‹ñsgwøQEÜQ@ÂÇÒ©j;–£²¶£¯C~º­yõrT–v8\’ú\¤~T–vøåˆ´c Vi|ŽE~ÍÒ\­ø‹´£þf1ó Úcùñ/Çœb~Òoð^@ÿçO=ÿûØÛôùà ¾Ã;ñÙŽÏq\JLä·„43¾{GY ˆ~¥Kã±c²ºA½ä»Ï´˜4~Ç™j‘‰Y_Ì-¾ûLùÓì çX9è߸ËôAù3ÇJºËc:•~Ú§uŽZóz–‹àا\M­•r×iËîW#±Ï¹šH'_G=½öçϹT[¨Û6×ßGbŸúrlóKö)=µž[{ª^ħöT\õT¹šÏí©õjFOÕ ù¬žZ¯A{ª^ÈG=õ;¾»¹–à»!9ãòóÒóxþ®ÆzX$bÇăëïiüh§<‰I[„óÌ¿ÂøÑ΃¢5˜îÅ´ïïkühçAÍÐçÎGÙý»ÿ¯ÿSß>öå§ÿ¤³þ~ûºÕÿ9Ê—ßÓ9éjø?©¥@Ý/Ü䵂y)ýRÎø-™ù¿ÿ×ÿó¿ÿÏÿ»\–Øïv=ã `[@L‚Kε¼ Šü\‘èïùoSþ¯1eþ$cJ4»¡8òýEFÎ^&Ù¿ùGùŠ}Ã1±çÆ'0òSW=PP¨Ê[ß%ñãºà2³U²2V74Nï‚Ãq¡˜ë+±@M²èè÷õ‚A„ 0­xõØR‰á»”÷ty.˜ÖjvØ—*ü>®‚R®ï&‚šÏcåñÕb×=Ëð¢âuƒâWRW1-jUÅr–\*¸@ Ex+¶GÙI”óÜßÓ¡®snÐ å6÷YDôH{­À[×|QùˆhKÇÛî¨Kcu÷(Ê”–§;+ŒŽY ðÖjgÝk±u ŒØX5«U‡&.zÃ×ü¢«¸, ¡Àî»,rÄ ]Ýäµ_wxÊ·'–r­½‹âž_PŠv;ßµ®‹ÿ´çÇ5 ޼¡gk Œµö-ÊÞ¼ËGºë*܃µ ï^¢OmïÝ=³ðîY /íñŸ|=ÑWʉ/ÅýîNx¯¯î‘ê_¹¦‡??Ðñ¢ÇÖ*ÂçüIó1kî¢]ÜÖxþù¬+z ³¢4ñ…ûfùz5`µÇ"gŽèezÞ‹`®g!„r»wNñ‹¢8ô¤ÜÑ?ç* 3Ïe0öMT¯Íî¹>ˆJÐØ&^™ß‘añEòÔvºAN¥Åžè$X•¨±cÒÏètµÒîÛaëŽØAïbP®«Šo‡Ýî¯sÁQ NŒÓA(U+VŸX©z{þû‚ïµY©ÎCoÇâM/ †XÝžõþ¢ºû†¨Z>;N·—K³Ân-»=6%%×5Ê·»b£MÜMïÊuõõ]Ò­•¼i<)7êF·îºU³¢¶!TÎy{k4KÛÙ7îå™ ›g-4÷ôÚðǽÅP“ZoõXe)øku xëÿ7–ÒÛøŠúå0[i뫈åPÄàxá?D쬕ußÞêí°¦ØÇ×ÊÁ"†ø6¾Âwª;×EÔ·»ÖRêÛx뫬»U C,žB|Åß~={·rB}ö)T< Åï¥Âªè·û`[@}øè­ØÀúŒjÛ¬•¢‹¼½U¤¾¤JñÐ ¶%žã©Ÿ¸ÞYÏ*6ïã6‹îOë­[u|Jû+Ô8°»ñH­·FO~h>°¹±?=„RŠôÝÇvTC­>ºb×X|naèSci®ñ¢Ö;4›Òçxr­¨ÛºkÝTÕ÷ïuKn]·çÝßÚ¾ŒØ+Õå5¶cÔû‹?w]nýµ:‘ŨQÞKyf9]\æ5—’Nl¹.ctݾ¾®'êê–TDÛ0ñ¸÷2FÖ {ŽVDZQE±¸“ç•gÔðë^·YmGeüèJñDûÀ*ãÙûJ¶‘K°y»êYËÌ‘•1¼ÿcMº^ä/Æ›% v÷í„8$¶óÆ'ú½’{.W×±g:á9`[ë«òkÖ{=ûÈ ·7lGz;]„Ï>²â—ަ÷­Alí …Îq÷5Vûuï¬<êÆÿÿµ}ˆÞPŒìï‡Ø5‹æ.‡„‹ûZyøDhTÌÕŸA,š·®zWÝu~ß|Z¯ÒGV«ß/òYï*F/ï»o?Šå¶®ZíãbnVpKíx^cdŒØø1´Ö=…‘\¾¥Rbü‘6#9·1²B¡“îë*oƒs.ÐÛÓ8Î9>×C¢*$Nйc`ÍÕ3ü|¿°1€|íwê#?€¤ïÙr]Õokþ: ×ÄëãjæÍoˆ=#oÝÒ»¹ì- dn”ÇE»s«ågßvÏ×wœ(ï¥Ð¦9䇑ÊöÞšÙ yå>×­‚ëxêu¬«9çH\ñbF*Õ’Óøb€C«Å"©*}hÝotCŽ;´˵@¯ÒÇV?fš²Ä š/ï#Ã呸âË÷¼^•QóÎ>¦Èᑏêó„%æH\í®mÖÃõ«¼0>ö¡Ž92ŠR•\˜ä#-Ý+æS×È\­7ÃÏfd®ñÄ7ÈzßïÞó©md®(ú9½-»gæz¢3ý¶/Èî{êzòvÛ&?œº>Sk‚éÎ1S×½îþ¿ÞýIç ÎÈ]å3¡¡¢ÀËý ïà¦\¾1¾&ÞñU>‰g]{ ñõ¬Ïä}ÓQ¥­ÇÊ'±û¡ñ•F¼˜MÎ\y8O×,}kÃr̦ΆWJA1­Ü9s% EÄI]a$ûNV΄}€•1*a§é_íˆÏkÜÑS×N°ïÄûÌÛÔøÈa/Iã+î]ó14Cqôñ5ŸS4â½<_µÖí`á6YÞé8bO¡V.pn¾@3üd ÆÏ_¥&m€-ðå¼áSceÖ¹µÀ û¬™¹ÚeÞ³Êír>T˜#¬þjîÄc„Õk‰ÙÔ5“Wl›ªÊÔ:ÂBÄ0“W !©˜Ékž=ôÝ#yÕ©Ê ‰àI#,] „Tc„ÍuîÓÞbhKgöj×RjmÛÖc3lpžW‹Ð=’W;â=µÇËF.b'6˜¾¼é^(ktÄU¾£ƒ ]ëh7ø3Ý ƒlÌ{;˜b¦{½³È>¿¦[»F ¤1"½íˆé^ö÷1V‘8BoÆL›€§ó™2]hšã¹vÒ-L÷©¨;n Ó…ˆšÈ‚0Ýxç¡dOX3ÓÅ@8ç<Êtcø!i¨2Ý» –]|]#EÛ_Ù²"]@Â>q¤[°9%´ß~.H· @ùö-Aº,ò¨´ÝrÏ)¦ÝRó´>)Ò-˜›Ç[Ñ€£ ]¨1¯î©DWŸ—Ý85ÖdÚ%ºØ!r…xEºXÏÙæ‚#ݺ×µ¥òŠt lns—Ž+ÓEfo~ûR+ÓŃ…±{{…éâV÷xlMÚ˜.œV¿8Ì­òý½oÐQ˜ "ù$Q`.’½øœm!M`. Ftç^ƒ¹PóÆ—é¾^e˜ . Ès ®yn{OÇ07Ú۵ýoJsíÇœ›ëFŒûhï´àÜC¶1wUž›Ë|ËçxÏŸ}ª<9>ìYÚ ðÜ‚·-¦ o²©@6jeŽät‘ÇÇ̤/¼ÐE;l?k͘ç–Zµc¬­0ÏÍxìãK*87~WlíЙi.æ6©Û‘)ÌÍp-ˆGÒ’<¹ÒlÝ ûÝ)µÝhëÞ>Ç «ýN‰®·c¢‹­Np¹ú„èF³ ûóßÏš]í¯Št1Ç ôùâHCiäííÛ¥H7vÄoöŽŠtëRŠt±? ,mº%H7EE׿£Êt!´/Ã&V™.J~>îV™.Úí‘f¿ßX…º~>¦ºÑ.^œ>áb¨›ñÏÇ· uñ…ÜË˜Þ Ô•·C™.æ÷ñ­i«èÊtµÙ‹tñßÏn'D7£S\#Í¢›¯Ù?éÊ[¨D7_Ã4Fp.¾ìi"5Á¹Ñ&§gL1…çf”§Ú&tfž+çb˜›1—Kñ s‘²Oý]v˜‹‹ôø4+̵$0WGpa¹öú2ËÕ¤OY.FPxp·…a¹øhÃÆðÝ®,7£2Í>Ö…åjR«,ó´³—T”k©“ \|—cHêKÉ‚rqîÎVåÚ-ÊÍO-âÖ××åZÞ!(gÛÆA¹u:yô!¹˜2ÂÖ©½àŒr‘fæÁ4åb6 çÐÆïåÆ!ýé³~e¹˜ÁO¬½Âr‘éǘ··¾Òanývû5æê$Fa.öX§xßÚÚ›À\쬅–«%seJ-,Sáçê6Êruâ¯,“Z´^)0·à³£Ú›¯ ËśĀ–[€ Ï®}S˜«Sj…¹‹^ñÀ:èe˜[æœÅY.ˆÇÎ(˽a³3ŽÎd™åÞu7fÇpÊr…¢(Ê…ENÌëÒ‹.åÞH~žî,"(·NÇ—¤¢\eGŠrïº]új3ïEšO˜JQ.¦èSÒõªü±=ÿé¹ÅçTù1¦›gþ±0]¼¹K êâ­þˆÆu±Ÿ9Ú"`·Žk©¥˜ ØE ÙäÂu#Y‹{bJåºÀ‹C‰¥X7&(ט¨`]¤Ùû(ç§X7ÚÍBgJu1óIظܸ'Sݘ0]Eª‹²%ûðRª–¸ÌN™ê^˜·ÆE=ï¨$T÷ÚëfÃAå˜êL#ënßm¡ºØÌr¡XXC+Du/ØCDÊÓ¡S]°çH$ûj—PÝ 2À˜$5i‰P]ì€S[úWªûl#3¨îƒ\{Ô÷•ê‚z´Å&ºxÕž+çô‘L–Ž$œe¢ Ïî}öUFº6ÓwH˜.οOé8L ®cl†ºàQñ)›¡.l¹§ìB¡.Æ;̈›Q ®ŽcJuÁâ¢Gt µP]¸YNÕ›R]¸™ Åé4˜©î]Mc†`U°. §ÁóÍ‚•ëÞÕià>áºèÓ2H¹.*f•QUW¸®a|áºÀ—¤f¬ gq I ­ ÖyPÚ£ueÁº°œ«ÎêÖº¾{‡ wñ>:ó\·nkÊuõ²ëVS›!g¬ sÒœ ÖÕ¯ªb]û-ëâÜs™Z±®_ cÝR«N¤ò‘JÙÝôªT¬{#óÈùnǬ{W‹£±ò%XÇŒ!a [ƺxìý]w¬{WC™¾Š­X{J¦"F±n•÷]R?&sÝeQ‡©¦p]„··¾*`¦‚S0¦dnZpwêííX뤫+h™ì"ÇŽÏm[ h÷ÆFØ@¾©¬ ]˜8åckE»qõqÇ}URÑ®¥t‚v-7c´[òíÚ1Ù-p52SÝ’áaã­TW×c”ê¬jý5JuKõÅë_t¥º:P(Õ-TÛø2Õ-5 ¹û Q¨.¬å¬D¿³@¡º7då©WÿPª‹)áoSc+Buq-0Çî _¦ºúª ÖÕ7U±.XP”â]žW¬ ³¶Hfú^¸®veåºÖù^°[®Z£¦óc!»°#‹Ïe× Ùµû²KýG°®.r Ù-•šõup%»ðt»Ç¦5%»:’Ô7­)Ùµ9¥ÀÝêö6$ wÛš_È.æé@´/V°kÓh»6ý°[üFAv»˜úOcK»Õ)ºÄ»AYÁ®MSìp­»ïžS²k?7“]›Ö Ùµ7_È®½5Bv ¼ÙJߦdîg}ø ²‹ÎÚ?Nv±=‹Z*ÙÕ‰´€Ýjmô4—=»vc va±×oÒÉ®ý¤Bvá]xSnËd6ipꌙɮ"»E}#¹iw»Eb¢Ñ2v%»Õ˜0®¤©Á„ìꢼ’]å#ví…°«w \×:‰p]¼û1uŒ–¹®ŸŽÐ®ß8£]8_EÿoE»ÖÁí&¢¦Oƒ‚vq ûðeíhWïYÈ®Ñ)!» š¹×|U°«R%»~ Dvm²«2%»¢.Q°‹§¸í½Ö€‚]ÕÎ(ص_FÈ.Þ*hÄ:ßd²ë1&»ø ö/§“]ûÕ„ìêï#`×BÂuõs$\W7<ØÕ1Á®}=ìÒ×H©.ì™ïn’ºP]F¦uuTPW‡…º¢0Y îbª-ûT¨kxV .vJbgoê±Iuérª{ µx:¿Tª+£º:¸*Ö =ÆÁÜöL)ÖÕ”E±®ÅëêwV±.6ïG ­F ×µ×J¸®=NẖÏ×-(ïéwÇÏ v-°k%ƒ]ëDv-W°«]Á®~ìê¸\Á.F”2}'ŸPOR©n¤9ûÖj/ ‘Šä¡D6†4'TNu!ðG§iŠ©n­¯¾÷½·bÀ€ðÙÈûªÃsVãê®’ԡ뙂a]Ç7bÀp?lÙ© x¯â<š’W\ÏÊ &2”N‰Ô`¾ìÀ€*]s{¨Êuo<ˆ!?W¹.ª¿Ìmë¢Ö5à%jÝ»ºK÷ÕiUë*ε®ÇL­K…ãM­+0LźŠÄ†Xwù3‰u6©X÷ªeL»[Ðî]͸Ǯ{UëªÖ˜Ù®Ý˜êuítÂv•Ü©`×bÌv=Ælר–²]yÈŠv/,Åvm•¡]p×±ÕSÑ®·3´+1A»z)Bv­“]¿!»zH»®k'c°‹ò:1¢v=¾€]»»Ú‹&ÙeÜ+`WO¤t×&tW»¤â];&ã] ÞµÓÞå;º«o€Ò];$Ó]»¥»z™JwåRîJ3»¸kVà.¶à¦.@ù~¸kfB wu€¸k!»µCW™(ܽkŃ®\Q¸‹¬“v ÝõX§»üåP¸{×¢ Cø§p׎&p×Ú Ü¼™;Xîzˆá®~Ä”î®1"¼cÂë1&¼ˆÑi!¼cÂkE¯·c«A ¯ö=#¼vL&¼Õ!¶ ´^œ/¾ ¹2áõó1áµC2áõfLxר$¼úÚ)áÕW«^ÿ#^Œ6€ áÕáK ¯·cÊëH”WG=¥¼cÊë—”×/…)¯Œˆ yuÔSΫ°‚^»L½ÞŽA¯^Šp^oÆœ×~<á¼vHƼúWÌk“1¯‡ój¥˜•䦕†P^½H¼jŒ×.]¯&Êx­Û äµM(¯u¦¼bÊë‡dÊë—É”×Û1åõc^ë$‚yígÌk¯‡`Þ561¯=Ná¼Ö)…óú1ôú=0èõ{`Ðë1½bÐkS@¯]Š€^;¦€^o÷‚^;“^1éõ£^ë|‚z=F¨×ɨכ1êõvÌz½³^ Ìzí˜Âzí˜Âzu`ÔëÍõZ3&½v•‚z=Ƭ×cÌzí Öë—I¬×ɬ×^Á½ÖNp¯½Â{í2;ïõ?3ïµìJx¯eBÂ{QB•|B„÷ú1‰÷Z–$¼×›1ïõÓ1ïõËdÞëçcÞk¹¸ð^K€…÷zŒy¯%êªã]b¤ã•™‹éxn™Ž×b"äÅx0­ÃTÈ›ÁÖ‡Œye‚eB^*™WNgB^¹LòÖ>Ðý~^!¯`Àߺü¹Á^ìFøëýkn»\bøš*ëÅp1vŠÛîSë8SV…½1ÆÙ7 ì½aÑÝë¬)ë­5§­£°^•…(ëÕEue½ºT¡¬Ûhã—Ù‡Ö«{4”õê^Me½º%GYo|U#­Ùǵ0ë­l”ÜöflŽGÛ¯“ao-îtô²°{ x>C'°7ŽQ¢Ãt´Ì¬7N]ÞM&¯ö—YoBNš‡†ÀÞ¸Õ¼M©h‡½ Iêk7 {Q kTTÖ?F¼D}ùIYoüˆØµû¡3CB9Ýë.õŠy‘Ü>ÓÿR0ï…bTó½Ì‹Êt7ùŸ`^Ôȼ†‰¡b^Ø?÷lÀ1o¼)Ñ"ÎkçÎ{¡Hà\aαøÄõá¼q¾{›ØI8oÄâ:¦3.ƒ^”N»É—A/Ї^“ª èżåøô"r±eÒ±ãœkBz=ƤÕß®á觤7bÑ¿ú6½Nzã¯çTà éõƒ1éñôNd¨Ë¤7bÏ1UËBz#6 š‘Þ n3(¼u†Iz#¶ŸX}VÒ±Vqõg@AØkޝ‚zÑî!¿`F½WM»ööš é…§Ïö±Û.êò‘—Ž^1鵫Ò‹yöNN¼Lz#v¦a¯" 7B%~¼ÜôZ3æ¼JyçÐöLOἈµ¯¤cÞÇЦ*æÅ·üîÄŠy=Ö0¯ÿ™9oÄR då¼ ç +çØu~Cıí kἓç¼Wt‹2%¤ÂyýÌy=Æœ÷ÂÐi`›»ç¿ÝìžÌyÑnïÕ×ó.¡IyílBy#–îøÀuo¦¼>£ì€R^1å=ã£Ü&{Êíög º…òz;¢¼Ø¨yL“²åõS^p²&ì‹SÃ^H /jVŸCµýÞó™×é€×Å€7b7Ï»“-ÞˆE~0dÒx#¯[ÏOðú¥0àµKg¾ëGd¾{â‘Í ’Âw!A>šN/0jN¯Çðâ|ûT àõv xOŒFcÛš^ذõïˆÞˆ•º±ú]Q»Æ&âņ½ã<»‹µ ^€[8Zvu0!^„ža&¥ˆ×cŒxO$a(MÚli™ñF,†î%§×còúá˜òz1ܵS^”ñØXå;!o„¶sªÕò¢rî67î ä}«êŽÍ y#ÃÁÕwó äXt ³»p äÅqÍ yqÌ2æÂx1-s0^1ã=1 ì.`Šxí*ñú!ñÚâµÓ1áõC2áõCáE³}ªÒ…ðF [";±Âk¿Þ³Ó¾:²Âk—Ò¯ ¯]…Þˆí÷Üi.„wMÂëÇ$Âë!&¼~H&¼ÞŽ ï‰ÊOcê^<¬©XSÂkR¯]‹Þˆm)~«&Äë1F¼qwš{„ñâ|û¦ âõC2âµ[Äk‡d»„àõ“1àÅÉbbÖqŸÞ3Ïží€×c x{ÒÙíY;à=Ÿæ6¼~8¼~8¼cÀ CÑYZJo„Ž ãÅ•ÀR­™Iã}ð;žcƒ~c¼Öò¢Kb¢Ó)(C^ë@y#V¨¦ @Þˆõ/˜1^{9„ñšéˆ0^oÇŒ7ÚåYóP¯=bA¼ÚLrÖ=ÆkÍñÚ#ˆ÷Ï:†óŒ ^ë$‚xíýÆ«ÝN/Bñ{µÌG¯’¯_%3^oÆç~âÝF¯qÈ«—¢WÛ ÈëMòÖÛõ*òúáòbLÀZRÀ yu¼È«Þò yë_‡¼”OúЄ×/…!¯¾r y¥ë)ãÕ.¤WG5¥¼x‹÷s ¡¼ú>*åõó1åÕÑF)¯ÝžP^?&SÞÕFÓ˜Ï2åõfLyå QÈëÍôÚÙô>yNÏéÅ#[ì!½úÞ(鵎.¤×Û1éÕûbÐë­ôêÛ­¤wMÒk¯œ^{å„ôbr×1<˜ôZ;!½kl’^»!½~>&½ö® é•ÌKAïšœ×BŒyíÍaÌk/€`^{qóêÙ„òÚ!…òZ/ï”×ú«P^1åµÎ/”×z`^{ Œyý ózŒ1¯½£‚yõõÊkoœP^m&”×nN(¯½rBy½c^ëæ‚y­› æõvŒyíÓ"œ×^á¼ÞŽ8¯7cÎë͘óÚ—S8¯ýxÂyíGoœ×z¥p^}7óú‰󮱉yµ+0åµ_[(¯Ç˜òzŒ)¯uJ¡¼v% y-mÈkO ¯u.¼ÞŽ!¯Ÿ!¯“)¯õ¡¼Ö™…òz;Ƽގ8¯7cÎë—Éœ×bÂyí%Îk©^ç¼þgæ¼cΫ•‡”ó"†¹òd¹'±ûuN˜Kœ×~8á¼Ö÷„óÚCÎkŸá¼ö çµvÂyí%Îko«p^1絪p^»Ná¼:TÐkµ€^oÇ Wg±zu¦ª¤×ɤW‰’¢^=²^%X•õ*åû1ã†?ÖkWL€¯ ¼6ïWý§?ÕzÝ}ÿé¿~÷Ÿ?B”A¿¾ˆZøø†Zø£ ù!ˆýËN‰»û™îø·>etê8è{£ã_òì ¿ù$äÚíú?êù«¯N¿€þ/Ÿyí±¿¿üø—jSü´ñóS~ƒ÷ú?êùßÇÞ. ÿËWðóåJb6ޕиÄ50‘nÇ×ìˆÁ*.êŸ~j1¿ß㯸ð/?ýüå_ÿ!²Ùüòo_~úÿy{›^=r$Ks¿â.«©ñïe' LN zQÚ ±èÌŠjd(&'&g~ÿØqw’ö^)¤«Š $2¤×D'N§ýù‡?}üÚš”ÙqŸ¦¸áåøúš–7Ô´]Ttšž„¦_WÑæ½ÃTÙæçßyÄ”9ù=§Êt—KšïöeÌPÜèíŠy#²í]Z#§T¿]¡ÓÖØÞ§5ÏioH=ÄðmuVg{õùÀö®/ÇÔ>dï2R¯º9RÙˆw©jT´æ}GêÕš:RÙ÷©W8RÙ×Fêo¢´ìÿ°M‡–¶ÊÍWÙöYñkÿ^ÖûõÍ_BV3Ú Æs%áªY¿³š«ƒâžÎXõÕœ¥šñ{oGI6Ì‹%ê—êùÞû™†íÊ‹Æñ õL~?¿Q‡ù¤!wÈ ”ëa_æõ:ôŸ«†¿üô×OŸ~üå?þúë¯?~úû/Çßþþ].ËËW]„Ò‚„ÆóôÄÊøWuZ?(göM¡”-…æ„YÔí¡´-ì¤/ëÞÎRqC¦£ibOJW2YD“?l9ÛaÊc·¬g:ÿpë6ü² é (øÅÃø0Ô=–è†U}yGÉVS@©Ü¤\D÷!Êe›R˜#-ۜ„Y¿·”ðÚ¦*÷ý­Œä%lN)Á«®l-Lhù^„ ¥kÑôá Ÿ5 ñ¢äju—åÚÀªYØ¥&«Û,“0LÍ-·(¥l³LZ€ íöÎ'4 ´ås±(¥n³Lc ùmLqBÓóÔ^Ž- F©û,ÊqӰ髟+Nh’\ãVNyèç'4žº³r.a‘0iÝg%YÓˆ/R¶­û,£Pä­â¾Hó¹n²Œ×îT‘_”øªl²èz1˜žãs2µ(¡QÙ¥¶’þw‘Ýd‡ø‡%ÁвŸ)HhP—Öô*‹’MÕM–Q™jØÎrŒ)HhTJŽs}iÉÖ‚„F%d¯ –‹2JÕ]–A§¡ëyÏEéÈë.‹òì¬õœè¢Ìu—Eí¬±dKr¸6×µü00£ÔM–°­:ÀuàÈÖ„b򯮵d-]ŒR7YeYÊññEÑ(u“%lk;)¿(¥l² J%^OÁÆk’â„WФdkqBƒ$ìK@ߢ¬ºÇ2HM¡ê..J±S÷X®‡:RnZ¡•pûIPµœKŠR bK–"[‹¤XQ3x,l.{,ƒ¶¦ë™ÓE[•ueXb€Öó—‹heÙdQûbDnwØÝ¢è½ºÉ2Hr¶&“­… )ª£¥Üˆ‹¥0¡AÝuÆ‹)N(lCË¿/|Š Û‚'nU0JÝeQÒ¨ýØYiÙZ Ð þrÊr½RH• 6þÝÜò¡¬juÝdQ3›"ÀzõD™`G½dsɰ¸J–¶n²ÈíXê™nÙZ ÂAzwg3[5Øê&K”Ó›úZ‡ðØ]ól‘B˜Öø[ ŠVLn[ Î6¨dS .²Õ´dñ\R PØîykLeWŒ_iI1B|±ek1Bn“>HtÓ1úòží–¦„²N‹<1e²›/ÛÔdûVV×Í•A‰`ê¡øuÚRˆgÐUZë볫l-DH±ÔMyeÔq{BGªïlª3뤃’’—_Ïîšs8FñÑ o·»½YÁ¨ñ"Çzä5[‹6W×yJ1BÃ^Ô2n«×&º´ÊYª1B +kÚj«BP*u”³ÊȯR°k ’MÑ\ç3`¥†¿—œu«NkÝ©qÇÞ¦1”›? «VVÿ•“¹l-Fh R‘Xux¸º¯ƒÒžÄ'뎽^uJ»º¯Q,FR‘X¯“éK¯ø®¬’ˆAò Ø}ÛΘGž˦Hõ`(ãUž{U&ˆ‰.E rÀjN¬U»Õ{ñ^åÖeO!BJ¸WÙÙZŒP؆F&ÖEoçRÆ«'œJìyßz8×+°Þߥš½åVE¢Ä„rW|áVaä˜Ožáª½Íiy4hV¢TçUߟµ¨ÈÔb„¬‘ë|Çéë2Ž%ækª#dh]SŒÐ(‡«2XcQ5Ìe‚ÕL¼L%†lñNÖ 6ùpë¥ä’&ØüŠµÒ²¦ vljEÑ{)Fˆ3ž4ešûÊ™$l-FH³E•’K!BšðšœTt¨T”ñªlZU¢*ÒÒnl¾ä")…6¿:|Þ&lÇ™æW´rM!BƒÛÆ·ë> µ*eHó+š²§!¿æ‘B„4g7½²õÊÌœæ×<©)eªóëÖ¶XRÅû XN{WV“:¿Z±)…õ¶3»¯gSLöš|À*ÙÉš&ع)?®J÷“ý×½‰ž¯J´Õ 6þÒ–ú²Å¿z¬r¸UÝ·XÐÄ<“&X40œª#M°É•^•“«ù¯Ù")FȦú°µ!ÝÁx“ÇUYîÎ4½æ[1(Cš^sƒ2Ôé•mT ʘ¦×d%Ã]m%ÄöL`»:9ºííÆÁvyK¶«ƒe‰:‚íj*/©³](Ií*,°­÷‰v©ëN´Ë<ƒD»L& ´»'§ÄÑîvÊ’æ†h÷¬%ÑÉ®2€­£AvÕ Š®/Tõ!»»3ÄÜpýIv½™ì*Ô9&ˆ'•)ÉîÕs{Ñù$ÜÝä•ÌåýÜÕÖÁQO&îÆ³ßzëwÝ–éîªy°š’în:TØ88ènØb<”Ô¤»ñâŽáèîkwãm)9›œí®Â"õ<3ÙnL±©›-€»zqc¥P³îFÍ:Gukî®:mõ=pW!óå¥v¸«µÌ^cWwW}üö³¼1©=’æä»\-“ïê„¿€ëõÉw™J|W_æ˜Ïï…xW,TaŽ÷>,ñ®®¨ý©Û—&Þ]õÚVõ¯ ï^ßøºñÿÖ´Ýߦôgœ7ž‚%õ8:ΫCýkùövœ·°°ŽóÆP[Xfæ¼Zw´—œw¯v–QpÞðëÇPÎ;iF%ü ÈyµúÖ þÀyG}4¦rΚ 7V¨K“AèUüòü$.y1ЫœåѪµ°ª zmPÌÓòèíÊeÒËSð$½:t-·£ÀêLz)6GÒ+ÖtÄsx@j&½ñ¡Û¶šú€¤W‰sZ4I/ó%õƇç×øQc$ꌫjC½5UL¤7å“bËzãËVErr^5Vá…gÎ+1†{]Ûñî—°E™>Èy•HÃ{—Ý8¯Ô+ƈEλ Ë­åSoœW~ÍRBÞÈy/?oÜž4 ä¼—ÇP¶¿z%k^bLAz·´XsÒ{ÉeŒ[%¶ ½ñ%:"‰OÒ+eÆp3Ÿ5I¯^¼g+¤W‹ú>>x•¤·‰£8è=•ܧ(IôêÅ‹d'9¯5„œ÷jñXƒÁyãÛªxWá¼ZUQTb^i©Ä;:•‹ó毌Q^¢mPÞhCx6E ˜×z˜7Úºo%G1¯ðõñ¸“ñÆÅ‡*tdŒw;- d¼üîã•ItÉý Äk—̈w•XcÕ5Ä ·Â¯ÈO]ðF/oí[EÂ6-¹K°! ¯‘ïLx¥âª=Ðg9› /¿÷$¼V/•ðÆÏ[%â富—éµÈxUÕÐbŒ3â5üÄk¸?#ÞhÄ©ø¦ç±ñ¡%╊nyíœòÚãå]µäºç´—ŽòžrFÖ2„@yy⇔—ž0)¯=R^®`ˆyO¡¸ñ9§BÌKš˜WžpŒ‹‚`Èy¥~ÔóÈy±üæÕÆ–˜Ù3.yí©ór]JÌË' Ì+ö¶Õ#'ļ¶Wó`Þ]»˜Å?2ÌË.&æ=E«¦ç+EÎëÈœ—´œœw×DYΓè½ä9k¼:A/÷‹zIRz¹¿@Ð+¹úUzyë½ä½ìN‚^ŽX€^®#ô’Áô’Qôr ‘ô’ôrר—˜…¨—¨—€€¬—X­²^« ¬W*Dí8Y/ïš°—´°¹ù örã‹°—›z¤½:ÆÙâHI{y{¤½fíåëhoB¥{ãc[O=ö/‘õòÎ;Ö›Ù2`¯±eÀ^¾>„½| ¤½9+¡Á^å§h±Ã€½*„½dL{ãÖJb_ƒ½1n¶v4£ÀÞC‹rŒÎ`¯Ù{9J{ù*ìÍ3›Ã^Ü´Ñ^Tg´7ϤN{sÔi¯‘RÐ^£¶ ½|õI{ùÑrÚ«™r| öb”õ“ëå#uÖ›{š°¯Y¯Ý`/߃½ örú"ìå a/_ {örÃ’´—ýÚk´—°—Q€½^[†½ô4 ö—#ìÕgy¨1Ó¤½@„½Œ!ìÅÝ‘õò ëÅø!êåÓ&êå Q¯TlçzÐ…¨—s6Q/3Âõò±õr?Ь×mÁ\CÖëÕÝ»ëiûð­¬w¨¬wŒÿ  Y»M¬AŸõ÷ Ϋӯñ‡BW©Ö3ïÙðEVk`fnª5èí-Æ@¯Ô'û«xÛ²Zƒ¦­v”j J­>T½ ˆ50Á=Å©©Öpô’ ­¯-zÞª#”½JæÐ‚Û- ×à*"zÇJqº€^Ä wê‰Iæe2ó. ÷\Ϙ·35Ìkñ¼:f\“Z@¯ŽÈÖ,™¤¼Ê—Qšœ)ïÙæÉóJ  æ-5Î{(/OŒ&è•(jMäi 7æl½–ϳèµêz¯A%A¯W—A¯EMôZoô†7Ë¥ ^3è•zî2·ÐÜ z­-½v½r¯¦š9” ×º 7lB"¯ê5x;3èU¿%ù)9¯ßB½ÖJ€Þ]BSAƒ$½êM9%–6“^Kñ ç%é [,ÌêæÑCzåRj¹ôlõÚpêåKÔ«{>jZZ¢^­IŸ]œŒz튙ôF©á¬™‰Iz­Xf½,FÖ«ñ3Ôܽd½šŸŽšË™¬—=LØ«)O>S¡¨ö^ùržÁä°WyoÒ±<À^og¦½º¿¹¦W&íås%íõr™örFíå+@ÚkÍíµê@{ñô{9ú*ìU£ù\Ðm¬—sa¯Û2ìUMûP>ü€½Ö¾Ìz½}™õº-³^-ðb˜rÖk6°^{2`½jæ Ùë{MÖkwÖk ¬×Û’Yoئ¥&Ì&ëõú2ëõú2ëµáÖëmɰ—Ó a/_f½:”äSÀz/eö¦ÈÖ«ÛRsÇ“õÒ yX¯M&`½œ’Ézyz‰¬×ŸX¯ˬ7lá+lOŒ Y/'f²ÞÞÖX¯]¬×n¬×n¬—þ!Y¯×—Y¯—ˬ׺ ¬×žX¯ÆÂX³9’õZ_ƒõúýeÖë¶Ìzí’Y¯ K°^ëj°^»°Þ°ÍÂøÏ–Ga½rÑÎV Ö+mlAž`½Tµ#ë=4‚ôȬ÷ÌQ ÙëêÎ$Ö¶%‹UdÖkÍëímõÚ5Ázi.5 °^Ù¦˜ÄØë¶ {%<:¦háL{½-™öz[2í ÛÔ„:{óBØÛÛìõ¦dÚkMÉ´—Ë9Ò^·eÚkMíuÛM{yÏ€½^ Ã^·eØk>[†½Ç•­³ˆ«€õ^°5ÖîÅëõ+fÖk6°^»&X¯u#X¯=°^*Þ’õêÙÏx\wì+X¯½<€½6J@{m ƒöòáöÚ»Øë·i¯7%Ó^ë²L{Ý”q¯Û2îå€öÚÚk%2ìõöZ±Ìzm,gÖëMϰ×k˰×`/«#ìE+Éz;SC½l%Y¯˨—š¬WC¾ ’õò³CØ{(eY•Á ì=¤‘”hu†½zš¢ a¯rŸ–!ï°W¯ß\xöš-ÃÞ0ÍM„°W¶¦Qbr×f˰׫[ü§·é6Œ)°÷[tysüm§v¯íÉÆI(Ì«ü•C9DÔ«‰© ¯õîrS†Ÿ Ô»)çÜPötˆzã/çV3ߘ0ï¤(¶íQ‘éåÖ2Q¯ëëfÔNø±5°ÔË”õ¦ËÆm¯ÒÔåU¯i)Ȳ¼ƒÞà*AÒ;) ÐR6vˆz'IL Uòª¼‹ò‹eSåäBÔgUÞp!ÖvΟª¼CËîå¨7Öççœ2êÕ0j¹ŸL•W Sç. Wál'á©Ê«žK2På]Ôü£oª¼zËj˜ ó L¯ÆôŽrÖýñý¿I˜w_ëA¢^³õºjoF½âE)4¨W¶+Ü÷~à®Ì YÛŒzG-Ýê ’N™õAš:–&ÍkõAš÷hï„£^*Å™4/Ô]š7›¾^šŠ=¦Í 5Óæ…®[ç…Ú˜‰óBùÒÄy­Äy¡7dâ¼›Æì³sKm^k;´y!Áfڼи4m^èžQš—ºg”楄¥y©êFi^H›6/•èÍkå2ìU¹æ ˜6/%"{MŒÚ¼æ¢6/Ä©{ÍDq^Ê Sœ— É€½Ò×J+Ð^ér%'´*ôÚÏPè•âNƒTè….´)ôB½”è¤@¯Ýz)ïL^H8S &êóÒy^‘ø=q8”ç45×3×Ôç•G÷]bi¡Ï;Õ䤎zMÖšò¼Únôˆò¼Wð J)ÏKíjÊóZ}çÕ×@åyq Pç½–uˆê¼l Õyµ Otê¼VîVç¹ãs¨Wˆs“:ïØÀ¡£^·eu^»/¨ójmv ä^¿aùÔ;hG¿mQžW;úm›„ú¼áN¥­6êóŠ[¶ êó*=gÛ¢>¯°lÛ >¯µ½£¾Ñe‡ˆú¼Ñ”´ÿ@}^ÑÜûå¦6//i^uAÛ¡4¯ˆlÛ– 4ïÐ܃¼ZE·ýiÓæ¤”]öØM›×. q^+WÄy5§W¥ jóÒ”¯*ª›Ý$¼© ój'v*‹#Þ¨Sßu“&ÞΖ¯l·í£‡ e^9Xuo€×¯™¯l{‹à•miÁ¼²Õ• ^Ùf®ØœowÍxe›4ë?Ÿ° x»{O„·«/Þ«\‹á•mˆuäü áõúá•iTTe‰¤M„×›’ ïU.ÞÙ¢“Ÿ ¯wKÉÉɈ·³%Ä; ÏŠŠ¯ßtB¼Ý%ÆÛÙãUu%´!^oeF¼~ÉŒx½û3â½în\ë ³Œx»úâõ^Ɉ·³%Ä+Û.ðøAñú3͈·kgb¼];ãí®™oW.1^/—ïu Dzî¯0^{zñúdÆ+[4»£,ŒW?G=%q4 ïU¤ ã•i–ÈãªeÈ+›ò•°ïy»Úäí.™ ïe;k®&@^Ùê´j×ÊòZ3yeÛÖz˜—ýÌËåµ»åµÁ Êk”×^HP^  ¼]}‰òÚtÊk“e¦¼^,S^ûœòÚtÊk#v{¾b©7ÞFy¿S¾!\“rdÚYï,È­Oëtœ£ —P¿aÒ´î;A¿aÔskú £â÷ë¦~èØçvRž:½Rš÷’vˆú ³\ô–ø ú W–ãz2Ÿú ñPÙ‡>®wÑ`ýµ$l²im6¿’„M¶ :Œö*Júç]ðk¢½²Å¨žÚÏ´×£«3î•-n½æìʸ·³5Ü{]²8²†{%m|žI!á^•Ûâ—'ޏW¶xíʉSà^uË o̽ý¯öº)±Þ«‹µHy`af½W´y$22ëíÚÞX¯L»¡žÓ÷™õv—L¬WM‰…Ìô °^ucL°•gÖ«k.ŠDoZ¼•õúSˬ×[f½>º2ëõ”Y¯wgf½>Ò3ëõ{Ϭ×ß‚Ìz»r‰õ榀 nEó¢±^¿df½×%sta½2m±ê®‚¶ õv—L¨×ßÿõ^W;cÉ·u:½šLD–Š0M&½ÝÅé•혖šÿ/“^Ùöð£ªânC½WuGKf™QïÕ‹g*ɨW¶Sôå™¶3꽚R>rÆzíöÀz­-`½6Ç‚õZ[Àz—ë£zÌUŽ!±ÞõÚ|Ö+[|r‹ìX¯lñj— $°Þõv¶Æze:ã> ’άW·7´ýB°^ûÔõvåëíl7ëõžÌ¬W¶t ?£Þ® õz±†zeYÊê½JÅb´dðȨ·+—P¯lÛZRÌ%Ðë…2é•mŸÆ*^‘Q¯ù @½ÞÈŒz»úêíl‰õâ2èí™@¯l‡Àã+1½]¹z/›”êK¼o½-^ï”Dz½)™ôʫֲa Ò‹ûΘ×ÛqaÞîÇ„yý†3æíZ0o×ò†y;S¼-aÞ®º„y/[{Ó2åõb™òZ± y»b òÊ]އÕTsäõK&Æ«b1Âö’‹)3^¿ñ y»¦$ÈÛ•K ×Ÿ]&½]¹Dz»r‰ôz[éíL‰ô^c\;lE7‘^ñ3éíÊݤ·+’P¯ßYf½Zi­W%ì•mlH´·»d¢½>ýdÚëov¦½^.Ó^/—iogK´·³%Úë=™i/¦‹Œz}fȨ—…çõá“9/ %ÈKC"¼]»áíjJ„×û"^ïúLx½ºLx}ÔeÂÛÙ.‹»Êx·kB»-áÝî®ÞíZðnwÉ„w»;N|×m™ï¾b;Ï̲-ñ]¿…Ìw;[â»-ñ]¿¿ÌwÍeÊx·+–ð®Kt×{:ÓÝ®•‰îvåÝõr‰îv¦Dw;[»~wïvån¼ëÏ-ãÝîr‰ïv—K|·³5¾ûŠéÌ>+l‰ïvMI|×:|×n|×® ¾ÛÙßµvðvåáíl‰ð¾b;³çŠúáµù„·»÷Dx»úáå{ÀëÅàµR™ïúÊ|·»bâ»mñû}›dÃOW¶Mµ/Ç˧Žhòr¡gýé>¾]ìYúÏþã-Y¼ë¡ÂÓgB…_kÈ›Àõ·U©»û”îøw®rWÞÈr£õ/[ ¿ äa—<~×ú•®6 üå][pwûýäë_¦¶AñNÏàiÀóçw­ÿîö§å/¯´à¾æO_hÉ"X¤­Ú0ãK´ûåã§—û§ðQþùåß_>þù‡?}üÚŠöX \ŠÒãô÷ëjZ¾½&ù€á¶…ï¹_q±_YÓæ5½ÃTÙæçßyÄ”9ù§Ê|—[§/ƒtØõ%¿B½½Ùö.­Á 9kž{Ö,çå7{CžŸß§ ãþA™8_{>°½ë˱$§î=FêU7G*ñ®#U­±‘ŠÖ¼ïH½ZSG*ò^#õjG*òÚHýНá4Š ŽÏïqæî±`u,jí¿QÃËz¢¾ý£‹j¦Cç¡%¼˜–/T³~g5×Ò(îéܶ¹¯æ,ÕŒß{;Ú/ßÖcšçsúR=ß{?“¶ýŽmÚÇXÇ~¾žÉïç7êPŽŸu;”äEÐG;+áI›×¡ÿ\5üå§¿~úôã/ÿøñ×_ýñÓßùûß–ïrY^¾ê覨“tŒóŸÜ¿ªLв)ê¤î«LZ›?Û¸²¦²¯¢£‰:Á7Þ¡f›¤Ô œDÒj:ÙŽº¯2 âöEônÛ‡œÀ[aÇ>ßÞð¶90HÉ•j¸þv%é¨ðYõçx's`Pg;ÚΠοÆåãÙV¨´W5\n_?,mgErŽ5rÛõ/+ÿÛâÖøÎM‰KÚΊ Ö^{zßs\⥶ýÈqAaÛ[xž”~ÚÎÊÚâ#Âv ñá©×”IÛYY…“Ÿ´)2ucEz›sÉ;½]z®Ê°^ÒPlŠ=i+øæmŠ=i+f[Ÿ¸ vˆ$NÚÆJŒ¯èü;}it{ ²«I˲ò?³溯"ì]“o‡rƒUþ'Õ©fc ÛÙöU”õ®iÞÎ!™mLA£²OV‰ýxYr`P4sÚ¦¢ªÌ=m_…·¶³í«hOäŸs†Û©<•ÿñþ”)¨m­(AîRòXo§–þ•ÿÅE¶šsgSôIÛ]±úŽd׈eÞr|_®yÏ!BÖÎ#‡Ùý9DÈ®yæ!¶erˆ¯¹Œ9Dˆ×¼’üuÐæ§®4×m«¥³yAœ»S ŒÛV oýÆãþã–£„ì¾ö%Äqr©Ï§y8R”]òÌQB´]Ùqê<‹0Ž9Jˆ]¶&ÄÛ[§&”=Ÿ]Ù,›3+Mózdd_—&ÄѬ4Wkšfs¯„-Å Ùím9NHJûõ Æ®LcÙ›Å5'dm9sœ§î°Çg¦ÙmÈqBülcŽ2Û”â„ØE¢4w6îõܦíÞÙŒ’ÝÙ|{ FÉî,lë'¤HõHxì9NH¡RõLÞ®¤ÍŸ5Û‘ã„XµÈÖâ„´Š+g}öíÌaBX¡ÄóÌaB| b‘•„8,c‘•„âî¦z"L¶&¤lU¢zEÖ–ÝÙüL÷%‡ q,Ä*+… ñÎc••"…´¨‡„dK‘Bv{âƒSdJsg¯éxy2ßìŠLiî,ۢȔìÎæ±§È”ìÎæÁ§È”æÎâñSŠâ¤"±©ìÍæÞ<–1d-YrÄÒÄ’ü–¤+[€£RÔ»»tó³7‹¶ï9hˆÏ-VY)hˆoÁq¤ !¬·Ã—ÈAClü9ä !³9hH‘Gõ¬r<£4¤r[¦3ÄUA,²RÌR»âR†„ ò%×2´·fɶå!®k–’}YÜÝžc†ŽvBE¶#Ç i[U‹÷ëÔošdÛ7äÐ öìÊnRe¹ÏÀ:Óß\ÙC‡ŠÖ€l)fH„§j¨XÛ›:ìBÈ–AnÉ’B†˜«÷Ö2¤\FU½@¶;dâˆ%V 2ÛžB†&Í€UöFQõdåÌŸU†#æ°2´67ó6µ!©e UJ÷P¦)… ™iN1CWPÍûóiÍ1Cø‚\›D“2IÅ„õæŒÞ÷Ž¿qÔ€ïsVxzÝ^]Ÿ9o8æëy”¼j¾ŠIuÂsÌ…Àwé9~ |c Žh£¥øÆÒin©u |guÄXNøjI9Û£ÁBà«Ó-%#¯AkßSÇxŠü ‰¯ÁÆÝ>¡2F| 0ƒø"܈/‰/±"ˆ¯öOâÆŒ_ñ…±dx%ñUœ’ìÞŸ _M‡:‰þ¤mòÕ»nµ;|cŠÕz¯žòÕ÷a¨)¨+òens"_é@kIÍKæ«(ȹ&h&ó•êN<‹ç4!˜¯$šìÌwÖÇ4–-K³5æ«ókÓ^…AÁ|%M£÷µpÖÄ|•éhòŠ`¾²qoz™¯G•š÷þ˜€ùÎçÖ\c¾WÒ¥ræÚ˜ïŽÄ¦ùôvÊÁ|e›å¢?/Wf¾² KMº æ«>[›D$˜¯ÒÈ/ʨ÷dÇÊÌW冣&kóµûóUÚúóœËZ8!ß˯@%Å ùêևмòU+‡ù*ïU¡qXëšâ潪fˆ'T€aæ½óåý-™7Ñ¿óŠ¿_©=ð^ݰ<îg_&ó^eÚc¹¶ßîeƽ~Ã÷^7uÖÎÀ½¸áÌzUWÓ€êU]“ò;¾‚z•Ìkˆ÷ï> Ò«bcûéUmkSõéUG¥»=^•;µ_|ï%‚ôv·H¯r‡-å<µ‘Þ®¾Dzu{ñûüÌ[ ½²GÍbÒ+›Î±?A™ôú­gÒ;ß3K9± Òkƒ!ƒ^˨V@¯núl¨½W2µ¡æäèín:^µp–DáÔ ôªØÑÔXzUݪlö÷¦%@¯?› ze›Æ£oèU}ëT˜ôª™“ÒÁ?c/ƒ^6ôª¾aª¹Ûz»¶$Ыú޳æèµ¼u½óµè-p^z™ó^Õ)!ã³&ÍœW¶¥)Ø‚ó^¶yã+œ×Þä„yÕÈz|Û0ïõðb%p+6€òv•%Ê+[¬pǽVQ^[™òúƒÉ”Wí{f´ xu¹pЏF¼×Å2áñšxUll*¿¼óul¦¦à½ZÝ~ÎßM ÌhW5Å'¹h>€ív5%¶«rZWìÙî+¶¦óçõm9¥Ë!‚²Å”ÑnWìÈ]0~3Ù•i=ê9 ]Ù–˜Ôo@€]쪑G¬Ž-ƒÝkì 55<À®×¶ät.ldæºíæºþªg®«"óTƒëâË›¡®Ï› êú¨ÏT÷z,å}2ª«rú`ßÒ€ºÞQêªX4·h¹êú(ÈP׆~fº>Se¦ÛU—˜®ãÌtUn‡¢O¦ëMIH×;3#]¨éúü‘®O¶éÚCHH×?óéz§d¤ëw‘®FQøáE5äAº]M éú&#Ý® éú·úiÛʰî¤ÍÔòÚvX7ßJë€ucpEûJúáBÅ@,©ˆu! Õò¦ Iú»6$ªH%±n,ý—Fìu%Š5Ï%ÆTwod  äåÑçLu/V3V .TWYeö"Žª«é]¯è“|Ô¨®¤À¦’9T× M¦º×‡÷8‹"ŽQ]|ïŒêªã¶²õª«•†¬ Æ'Õ…#cTWóùºTRœ¨îE=µMÑlgNxýµÌ…Îfª}ßù¡ô2¨î%QS¨ÕÍ zB]y•k ÉÔõKf¨+¯Rq%ÀP7æÂ½îÂê22¡.Ó\‚êâ_CÙ ÕÑ1µ„4¤º»”«Ú,Áî¡9¤n€°«lb-ÉÁîå‡Cø8™»‡$þëù‚]¦Ø•€HË…B²«W¢eÌ!ÙÝ¥I5NóHd—‰W w™Ôt7ži¬† ›énLÆñé­‹ à]&'Þeúaâ]¦C$ÞÝ÷*¤çtW·Þ¤yIw½)™î2Û#é®FõE\vÐ]ÉÛ4ýgÒ]Mñ{ ŸÉt—)gIwõITÀK ’Ít÷Ò:mP8Ó]}À¶ªmUé®–½M`˜twW6—êQ‘îîÊ»–"3Ýu[¢»^]¦»}Ý«Ü&é®Z?Tý*Ò]ëÐÝkÅß‚Aw¥štîÒfº+?`¬zˆ¤»a[×ú5&Ý•ÔÑÐÂ|@wãþbJ¬ËLÐÝýV©; »—ÓT=èî~éý”,i¤»ûØ–INw%üýUÞÐݨnšªš%éî®W0Eø&¼^Ü4W˜x7†ì9ŸÓf¾6Is¿Êw…ŽæêÛ<|WøJÜg(€ïŠQ U—|÷ÉÈÍ×i†¼›Ï\t¬ ymòZSywM^U•”WJË Yyxg&½»‘Ô]z’^ëb^=í¡îà“ôÊùŽ…Æø¼< ½Rh>«*0I¯ßzF½{z“õj!°Ô°²Þ]º‘kÝ!ëµaØkÝ Ø»Éý‹–=­‰õêÁn5"Y¯ fÀ^{¯{7içÅüûlæØkC°W6‡ç#Ø«0Uiiò^»±„{í‘÷rtö껢 Ç‚t3î½$Å÷â<÷ªÅñŒàCÜ»)­ÃPò÷îRaÏëÙÍïÝ´2:×2mƒ÷j•©ÀÀ‡‚÷JwOñ¥Ï&=x¯ôó´ày"_Í_’Y+ḠùªñK8í•.'äk<#_ó €|çm¹dÈצ6 ßëy1ÄŸ`ÏŒ|ùŠ€øZ?fàë}•¯½¾Þ™øê¶í(kf_]3Þ÷õ‰Éðõ;H¼÷úºïsÙòUg΢µOGgäëÃ$3_{yÀ|íûž™¯î.f¯éµ ^û¼SþC=]èùúX™îN\†ùVæ;Tæ;Æ¿ÿà;)†øÞ@èĈR)Þ „%Q'€¯)7ø2E¯R Ƭà6_‹`ÌÀ×9X¾ó=Q– i_‹ïðµ 60_Û(óå-¯ÅùÚF,ïÐ2,ùZ„ ¯0Óv¬åòŽœç³ Q _•ÓæÛã9eä«bƒ°û=êÁ|U,\ý±ddÊÌW¶¸µò’ùÊ(¼‡«0_Zh2_/’˜¯l§²=é«À|U.¦Ò"ú¯—ÊÈ—< È×[Bä+®ZSGù2Å=‘¯3ÑŒ|y$¾ÀžF|W}P‹wdÄw‡©» ¾9‹Ë+Èå€|W-ªùä»´ÔPŽ|Ã_äå|Îù.b°åKjÈ÷Gä‹fò%©òõê€|µAP³RWä»è._'C¾Ö _tˆ!ßE¡"O’n_‘“±Bn_¤ð# ,ÍÀ×*#ð%D&ð•Bõ­®e´wùIi†½êm¨=~)`ook°7n-Z_¢ê ö¢CÀzý¶3ìåþa¯m öãöÊÖòöÚ-öz¹ {­X†½šPã3¾÷š Ý3ìµMƒ{½H†½R*WoÐ^Ÿ±&I´¸^¨/’öÚíöz}™öú= ®W¹Õ÷èAÚ{(ã| x°À^»&"{­_ÙËr ½VŽ¡½ì³Œ{­[ÚËîdl/»Œ±½f{p¯M¥Œíå‹ Í%[ÛV4z[å½ö¹ïµyÀ×møÚ-øÚ¼àk·àëå2ðµùÀ×Û™¯—ËÀ—Ÿð^¿dæ½ò±b½V"7Á{­«Á{íóÞkßcð^ëêÌ{­™à½~ÉÌ{{Û™#PÝÍ{í[ÞëW˼×ú¼—ÎjƽæO€÷Z7‚÷š Þk®;x¯­0À{é[÷jU:‡›[„2îU‚–!œªg¯ ¸7zo‰7¾ìÊ÷jÙ:,ug¸WëѸ¹²“Ü{H]v*Û= ½\Æ’öjA­>/p3ÓÞãr Âiœ‹0C£½z$ã¼?:¤½<ÑÚËó&á‹X|‹ð5"|sR ðE µøÆ´¿ÕSÞ¤½ »"íu[¢½<]AÚËФ½§ÖÄçZƒ\3íÕ˜‰±Pƒj3í½Ò|V1aâÞÞÖxï©õg=üNÞ«ä4ëVÂ~È{y––¼WyÃã£Zræ½~ Þ«A:Ê¥ëy/-w!¾9Ìš!¾fCŒ/ñã{qƒð_žb)Ä×,ˆð5 >¬“¾<n¾1…(Œåy­àËŸÞ&Ù0¦ßo’éõé>®¨š×HïVLôZ\é×¢^%1Yç™8€zíœÅ7 ÞS9ÜJÜQ¯áXe<:Ô+˜;ݳQ‡zs$(P/ä†z1ïëU&»­d'&ëM(¥½iÊ$èŹ8ã¼Ñ ÊËv‡eóîíãÝaÞxÏÇ©ì0óÆˆ“ÓSâmyÍIÌ‹¯–a^ı5ÌK$k”w~œ—Žò"LÒ(/Á$0/Ñ*1¯àÂ\uˆyñùï0/X'0¯¸OÕþé0ïÿºjØówçµk‚óuœ7Ñ=ã¼Zì-rœ—¯9o††y£Â#*š-ļ ÌË¥<1¯âm޽ª³È^ÄÆó&vIÆÓÎQ5Yº°Þìp·°^ü Æ;Ö¼Úâµa‹êEgdÆ;jÑZŽ äåB•—ktB^ sƒ¼ìDr^#¡½šsbÎ~Ež×ƽ佃âð–rv›¤—Ýé¤7b€^-‚^z‚^ nr^ÆA;ç5˜Û8/k#ç%Ô èåÆA/[Ù8o^•‘órDÎËu9/‘ 8/¡,A/‘?A¯T½y©GЋ§íœ7¯nÉy­)à¼VsÞÌVÁy¹ý@ÎË# ÎyóKàœ7r^»ã¼`¥à¼X‹órXæÍÓ 1/[BÌ›æ^2^C¯7ã% ãµZŒñâ~Áx­\f¼¹q¼œæ xÙI¼¼;¼àSä»|£œïæ |—]I¾Ë>ò]k&ø._ ò]N&ä»Ä}ä»éî²/ wyAÀ]>Â]Nj„»œ­ wã£ÿàY=4¸K(äp7¿¹n0i h7Pgp—›a„»¼kÂ]N>wóWp—£„p×lwŒwù#ÜM#d—_D’]v Ñ®5h×® ´Ër_vù:í¦›×åkC®›ËêÒp"0!½ èæ·…D+ #º|XNtÁŽ3Òå[F¤‹ÅR‡të]‘çZ!繉ÙÎÍo­óÜ›Óóܶ03ž '‹8—-Í]uÔlªª  ¹Vp.ˆ†á\¼ñ†sñÆÎÉKZÓj$Î5[ƹ Ĺìhâ\†Øçò=«<—ã‡<—½Ež‹gCœËp+à\.â\ëcà\ë+à\†#粕 ¹‡èR=ðNškýšËïi.„HsXHš«Ó°S¼ŸEJ7Ó\l@ÍÅ™]FïjŽ8—¦Îè]R%Fï2Ñ»‡àCU•ÈÑ»¶ßè]¢%Kg¸Ií¤ˆ#z—'wï̈‡¸V)}ÐýNņxÞáI_}×aÝp¦ÆRugṪÃìkávœG9°Òe_Ëêˆà•±¥%˜Ü‹Ùר ÀìkúFÔÃz”l4ö^\¬aÂèY#Q¡×0¨õxõ†¥ÍË®×0Á-•¬A¯Aó¤.žèVè5Pò…z &,K^j @¯a¶Ôù€&“kˆÎRÞÐdÈ5hcí¬îuã˲žMh73]}tbQ)å ­S™îÅc¬ÙÈ5ä 2]3A¬áи­"Èk8¤³ÔHfkHÛ\DºÜæ"ÒÕZª$O˜3Å(A±³A¬a—Vò[˜/´aM‚WRcI"ÒIð¢œIðF3ŠdÁ7HðnrbŸó¦À ñ%k€­SàMbO¦À‹¦P‚7·ÄxíŠPàÝøSÏyS„×Ê="¼B5û# o¼°2 ^ÓÙ5 Þø×ÃþÐàµöA„—Å(ÂËráÝêáoêðnzcŸTŒ¦ÃKdxc1»Ž%)¤éð²dx­!áÝ%mWE(Ë·Ãdx±‘m2¼p¦ ¯ C…wÕW=›OÞU‚Z5uUx­\VáíLI…WÙÚò¦A…×lá5[‘áÅ®¿ÉðZÈðš 2¼Öü,Ëà“áEPƒÉðR¿š2¼íȹ™”á¥~exÙNÊðZ;!ët“*¥ /µô(ÃkmÉP×®©Ãk6èðÚ5¡ÃKùkêð²uxWIÕL;™êZWSˆwÕþpMCG%^«J¼f»•x­íPâ5[»~1ÈñZ¹ví‘RŽ——Ûµ{¦/‡ØnÔwŒUâÌDy­¾ w­å5Dy;[å5[†»ÞN¨òZ¹ w½_2Üµá ¸k¯uyÙŸà»Öèòš)ó]»=ð]»ð]·a^«*ó]·åàÝYxªú:Þ [¢òæyg«‰²Mš—6Jóò;Gm^³A›ŸNJóòHi^»$´y­™ÐæejóÒk 6/¿þÔæ5Äyy{Ðæe1jóv¶3û±°A›—Ýq^´„ڼл3m^« Ú¼f{´yíWhó²û©ÍËBš×Šei^>4Jó²%”æå=Sšž%•y­”yáÀS˜—<…yYtyéÛS——Å ËË>¡,/=jÊòš ²¼hŠ)óæ¦˜0/.Ù óâ’æÍWDÔ. ./¤~µ‹EM ÛÍÍ¢v­¢v͆¨]SÎQ»V Q»h ƒvy_ Úe3h×lÚeu³›»˜1»fCÌ.ïŽ1»¬!»ì/†ìÊo8j3R^ª'å%;!å5ÕgP^«”מB¦¼nÊ”×n”—Ü”×Ëuèãm ?]ÙÖ±Ðåx‰_Â+¸þøóýÇ{«XöûOÿùü…&Çó=_<}&:øÕ¦¼‰`[º½Oùžç:Oå!}î´üykÞíï_ÿz”Ú×ã½ëVJ­Rûóçw­ÿêïû‰—?§ý»ôýSûõ§w­ûêï§öçϯÔ_ñ§/´C[{ÚØ5i±òœŽøæMñ*EƒþøñÑùÃÿ[¢Ñ/?½üÛ?…SòÏ/ÿþòñÏ?üéãWÖ³ »¾[{x„q³_]Óò†šæi¿]¥X„~}M›×ôsc›çáR'á÷œó}ml¾Û·é‰½L]ü­A˜£µÆB ߣ5Ëy¹ÊÞçç÷iƒÆ›½!ÙöίÇÞ¾aï2VïÚ9X­ï:Z¯öØpe{Þw¼Þí©Öšò^#ön‡¬5åµ1û_àõJp!­»]ß`¥LˆÏäìsM¼u‰:YzìC(-=ö.–TU&ÃV7̯ÔÙc=~èdP˳¾Ö0<…—¬|ÙÆNw… ž> §ÖèþCñ5ÆRÓ+¥>Ù–£ž‹9ˆRƒŸ† (¹ÃkŽ+å9!–QûÒO2Ÿ+ ¥ä>r[ŠCQXÔ¸Š^ŽÅV÷†Un>—ñ o:‡Ò’Äœä!?çªÅ¡5Ô dŽ+iM:Äú”¿«0sËœsH±æ™b®qÙê‰l:dzý¾6t©ßEç=@à«…u…M}úœV Êzý:K¤à‘ÀTôI=ñíW;Ú®„lãÐB'~RˆáeS·Ý‰£O…Ÿ´Ô7(w*ü¤àa¥¾‰÷¯$±ŽßöeÛ%{GÉV·.›$–﨣S§ºkö,}abØ<ú‰ç°¤\âã¦Äß%Îá¼²ë”᪠‡BÂeÛÚ†ßuÍ©&ˆ­px™âí(Àíö”´ÊZr´Ø!µä|îÚÒ®¾—‡¶Çæ-dž¿e{Ç?É–B7… –tñZ¶îšsÛI÷vŽKÛÞêÊ%ê¬rû¹•¸)ÙR%kËÖÂed;¶šêåÁE~±£í&É6 5¯Ç©w¯-`6AÚ‚ ekéèuÍy(Á[ç4´øÙpßø8f·¶gì Œ‰¬îä\5”t'gÌMAÍ”äîÅIeªág2KÍŒq*ïÖ‘l¾»im»´]¹­m tåö¤À':5äK¦EJÖ7 –©†bx_NiÔk»Ä ê€¾Ô ‹ûKvÎcεI¢ ÐãpçZ¼•uX˜Ž6¿®Ú‹{¾›ç<·-Iï“yIiü¼%k ¥ìl[ rZî±[v•d;—4\QNÚ­Ýø™$ª¡«•} ÙÎÀèå–¡EÙM‡éÌÉŲilûonšRl²5d™[È l›ŽÜˆç’byºrkÚ^W¹xb÷V¦LG›^Ù’-'q³ÛÞk´Þ•7-<£gKû\Ž<ãumC[¦uËn°l- $3±ëò¦±ëØ"äÜ4µ¨™æµfš‰Ýv‘½#×9¥]´±³.-Ö¢µÏumai~wëÆE0êÛÛ®÷åzôÕ¤pÓ$Cù–·!¹®Ö¼mL®«5o›’ëjf›’ëê¶9¹®ÖÎ-EDØÙÖä¹z3·ä¹úímÉsõêöæ¹z+溺éL®«Ùö!»®l¥R0li´æVîcv]ãV•~ãv‡Oåý:ÒxÍw°ÏÙuµú–ìºZ}kv];[r]fÎ;SR¯¸÷=»¯xxûq{¯v±3{¯v±3{¯ìIO쟱’}Èsë(]û;Dï”ØKž\sG^GwÓˆÅ5çì½Z[rîW¿æš½W»æ–½×xwË® 3Bk²ù­“xš_ód~É{µÚÎì½²W”5lºƒ+t²ùéIP,ϰf;ò›ï÷ª5çZâNOe\ÓÍ=­X¹<Åæ^QÌLžbópÖÆyžbQßž½×%º¶fåz6ü¼CÎì½âbz²ûŠ^¾ŒgžfÓUG‘=ϳ¼ì”=X7ÎÙ…õ:—ìÄ¢[.ã™çÚôö…qÍ~¬·ìÈzƒöìÉbX„1Ë_Å Y±|wlgØÎäËbÈ\¶cyÝAcËÞ,bÇìÎÚec!5åY—Æ9;´½.-Kvj½µköj­Æ-»µÖíZ±å¹7½—19¶~Ùãöl½'][<Ç+ø¤fëÕ;EÌu ?ÉSpû„mÊþ­_u¦‡‹WdRF÷¸ùù\û»ŸÖìãÚÝÇÚJÙºã;¹½Rr£›K£Ž¥DÉe즎0ôtó£RjæW]Ý1†x «˜”Öã•GF8»x\±úб.ĹôM5ü]<‘Y9ÉãQnÃÚwϼÐååe9©A0î}Ä",9½ò€QƘ$¢{–mzåIûÞ&¦Xq‡N»¾Ú §Oþo;6Zò½üÆiQãÁ {Òòt›¾›‚µJr0áXKÍ-³x𢬛-í;x°Vn)xð¢ô’õL%yð¢¯o“UÛ¬ˆú'¿x0ó“gýCçÁ:M1ÔÓàÁ'• …‡¿Ïû¼­7þÌPXóÐ6nú¡p”Ó³xÆ%¡°ÑTsî kâ«-¡p”›t~¿Ü …gU^Õf …•°þ>I¹½žÇv„× p˜4=,Ag(s÷*1ÖÇÖ¼>UIKBaíŬú¼=„ÂñÙï­–ãÅ¡ðu^l+g» …g̉qy¯¾À„ã’óZ3)‘ {u™ ‹Ž5[™ð¤xì*†@&<éü{ÑOy°ý–‰p\j¾y`ðtO(‰ ƒ­vÀ`n©G¹qœKö2Â`UþN7CÎ0x’6•d•¤Á×±Àø|Þá¤ÁÞ”Lƒ'Ep×üO¤Á“ÎsÖ¼Q ÁnJ4X—‹Îap”Ò¸)û6€Áf ž´d­¢„Áºø¬È¶˜ÎJ¢;ÒàIèjWëö*£AÌÛ Ö%§y•Ë”˜ÉÃ¥3 –i¬™Þ VlóVUdƒ'9æB²3 VÈatlÜÕu1°àI뮪£J<‰Ç{Yò‚‡-柩 ÓLƒ5ÎÆ*~Bì—Ì4Øm™+âû¨Â(ÄÁÜp&ö¶$Lõ ð`nS÷¶æ#ë8pÕÍ&Ö ŒUî†@8lãQUwH„ã’ËPU±H„£Ü°VyaÝÝT…ƒH„ýš™Û-d ¬ÑV%©„uHz©êcV-Ûñ †!£à¸ÈÑ™‰‚s!p`† ë‚cã#Ö$ºíDRàIŽA=ÙG W\×ö±¶Ú2Ö%÷¨ëÙÔ¾fú*Ó ¦í¨Ò\ ÀZº.U×8lÃYu(I-A ,ÛT¥;Iu@~«*k¤À^.S`·e ¬¥ùV…#I½¾L­Ç@'-K§µ¸K Àê—©jK ìWËد–1°bR¦ªG lo0° /``z™«Ž1ð$W¿ÊÛhöró-¶± lŶy C`¿`†ÀÖ~@`·el£XõU™ØÚlÏØëËئ@`Æ/{¹ ½¾ ÛkÌ×Í 0ïÌ p\5ÖcEÝÙ 0ßFƒÀœn ë Újò&ƒÀ]É ;c†ÀqÙm®ºØ»:3Ö}Î5––XùPô÷É€9ö Á€§K«° cÀ~`ÀjÎ>†›>Ý„ Xj&{8ðw€…1`» à®ÊŒ€Ã­.BV†€ýª™wWÍX€vŒ%úíÍVÜÝB&ÀÞàInjMk`X—-/DG€»Ëf¬ËŽ5!à®dFÀ1#à¸ìÞގİw ð$þ£õ3F€µ²<ª,? 02:û5A€' Ü*SoØ. ì=¬0̳æ90Ü•ÌøcÀÝe3¶>ÿõÑþ;I‘¡f·0þ۵紷â­9¿¿M5xƼÃ@»°à–ÒÔ}8)00׌•…Åcµ¸`xðŒ·X+ô¹%ib\ð©oOŒú­çÀ#¼ë!¯°",Xè-¦ªã•¸`,Ó,.Ø›3Û±TZ‹ Æg—˜Ž10§b`¾¨]\0b†ŒHX‹ >?´Üë¤ÀœR-48ï(ÕÈ`lµZd06n,2…<ë}?ŠSÍÈàkJy²—Y`ð"Qê­20»OoéC¡„Õ"0Øl –Æàt>ã܃Ãi?VF\ð ˜áexb<´°!à¨ìÔ™‰Õ1.ø‚2 Fg,¯gº{úŰd¢çžy•XnÏ6Qs‹ †¼'ð°·´3`%‰qöl¯YP‹ë½î´ý¤ZðìŠ9ý®¹±èÖ>ø—I8É%.¡L0Ïø¾Ž¦Ì2› X ‚ÓTanfÀaš×½ÌYdÀºäxTn N•(G¹y+Vö¶d ×ÖºÑI ,]ËyªØm7–(Ë4rr`­¾¦¡®À‚)oK¬rÇZ±@°ê W³RÔ ‚¥æy%^Ú{LÉ\’`/—I03ñ‘:»5T¸,Û°TÀìå V·Äǹ¬ø3 –ŠéX7¡I‚‘J X™p\Ê´ ì¶L‚©#L¬yL­å»|}ð¦â—’K/èÿ” 8L1ßG˜ X¹û´Íùì¾ Ä·cz-4ØFºH°ÔŒæ¡8+„Á’9Žïjùì>J‚+ò`·e$,a¥¥îå Ûm 3]!¡°Ýò±4 åÅ@x&¨ …/=æi+ΩðäÊœ sp%(|%N òñ‡3–I «ã™Õ2¾nZÙi–+.eùžf(ì7—¡°eü6EiBaÝxt]Ù½ÍPø’œÞk@ °ß_†Â¦nM(l6CáK«:î ì»ÞPX¿†GPN¶ «¦I˱½ðâ$·Çg“™°lzLSa»‰ «ºu©Ñ `Â~c™ ûèÊLøÊðZO ›š6° tƒ Ëv>ûá™Ï³¬A)ËTçaoà™å°9x2öÆg|5dª‘3àÁ&/LÁrà`SÈ6dà`ïÇŒƒ}ŒgljæÀÁÔѾip×>È#BÁ‹4ØäÈIƒMß‹4Ø$žIƒ­=¤Á&«Nlè¤Á}k×,yŒ¾& f§SQ ,¸oê™ôÌÃoÞærª’0ØäÚ ƒM…‘0ؔ̃»ÞÉ0Ø´ÓƒMÁŽ4Ø´èHƒ»i0õIƒûöìYb×ïä¸5/½ß ESÒ`Sì$ î:5Óà®)™›6i°éô‘û°Ê0ئ(Ò`SÙ% 6]=Òàî9NY§Þz6á`*†’wO1ÓànTeÜ óŒƒMF™8Ø$®‰ƒû%Ü7hÍy  0MܨŒP”<Øß¹ÓJ+*ãß N±£Nƒ¥öpÔŒt NqdFƒcPl{qw¶tÔ.Ÿa& æ9Â`f"' ÖÄ&ð'H0Xyâ& ,ðsBIz•i°K4˜˜4˜ÔÊT"ôˆ¢£ËÝe<\ycÖª¤i°¾1ñ·cïa0±a0±a°`a,"o‰0˜ä‹08>¡»âšŸÐàAþ]Œ€­3 &ÿ)8ØÍ8øúXUK88jšÇX9Þq°‘&à`ƒ^àÁËÀƒ +ÅÈæ¶eìmÉ<ذx°®W/À„Iñ/5b,aã1 …×tç®#Ös¯#&פW?•„eƃ£œ2>‹ð`»ò`ò7ò`ÒEòàEžW •ö¶€ Ïš j` ©0ÛC…‘%ʨ0Ÿ(©°u$¨0H*<ëã^#†ˆ…Í,6KÇqH—3¥yþ8äz³üqÈd÷ÕùãîÊÒÇå¬U–>ÎLȸ‘“¨‘[‚µù6.mdºœ½Š˜Ù]<\J,DÌ\-„ÀvÉÕmär–?ÎlgÞeË)‹ ¶_-làÀLwãùãrJO ‡r–A.§úñr(g)ä`³r9§C9Ë!‡r Áf# ÎYyŒÃd$ØlŸK¬Ñ‘༿è$x—nùó‘sO¤èìw$å Ãf,6ƒÁ° †ÍhðsŽöŽ‹î·pãŸÎ6çÁùšÎƒóN‚ó`³ÃL*ä@m1 ŒkÎŒÐ0l™’“£‘ƃùþ—ðàdúzœMFƒÑ£Á È ÁVÎpp®Îh0¸o£Á$¾Ãg°Ä7Ñ`ØŒÃf46ÃÁÀ©†ƒóu<8ÛŒÃf<6ÂhËà'¦Épp¾"q0wŒÉƒyIà`š–Ï0Ó´}6~,˜›òdÁf»Y0¯Œï,}ñ †0Ø€Á^'`°—4Ì’ Án\?Èà4Ø;ch<ÌÎôðœ4ÜÛ’'kFÒ`7ŽŒhHg¤[[Iƒý² Á~YÐàéOâxGÁ°€ÃìU»ñÀ¸° v#p¾&0, ¿°ýÂî ¨¯7ØÅÀ|að…%Ó^(hí5ioo<ó£myŽw:íõ’ ½^´× ½½1Ñ^¯¸×À½nïu#€oîó¹ Æx›ÄOWj¸1žÃË.Iü4­ãõÇŸï?NˆHüƒûÿùü…++‘Ü B‹§Ï„¿Þ˜7Áìo¬tº-ÒmÿÞ•Žñz¬å^Ë_ÒÊæZ°µþëïZû$A×ú¬Ÿ¿¼ïý_þ<øò—©-úÞç ”ú¯?¾kíw§—ÿüå•Ü×üé -Ùcõ‹Ð˜–cy™‡5á#&¯)¦ÄhÔ?>ª3ãK4üåã§—û§˜©ÿùåß_>þù‡?}üÚšæpͧ}ÛÃu›¾¾¦å 5…ßýA´r<¥Åóµ5m^ӻ̕mŠþÝÍ3+¿ë\™ï4Ö_)é{} 9Þ5ƒzäïÑžùâxtò½=0¾S{–Xy„Ý5åùýZ¡5€ÒD¾ö”`|ß%:¡9zï2f¯Úm̲ï;fÕ³hÏ;Ù«=m̲)ï6f¯VؘeS^³¿ýíÚbm®˜‰ÄǧK[©û|Æâgí?]ÃËz¹¾ù ‰Z¦C; ±òÚcÝö…ZÖï«e¯ˆ°ns_ËYj¿óf.ý5QÜùœ¾TÍwÞÍt)ùoSø2㪙ün~£Š]¯v,s—!†§˜Ù²/á[›×¡ÿ\5üå§¿~úôã/ÿøñ×_ýñÓöËßÿ¶|—óòUI5g8ÑŽ'íöþøþËìÓaP4¬ó>^QBÃ’ô;Ï)‡æ1úä9|JØæz»æ+w𠼦E„óœóiÒC„½äsã¯QMÉ´+×tQÌ ãš£ˆ6EzLUKS*gÍÉ„œ¨aÜ™Êñˆ1^cÛ$G<ÖœLn<Ì1¥ÆšG6Çœël”`õ\“21»ØÃ%E!'—lçš’2¥H£dZ@‚qÂ8X÷”“)ç@¥%–SÏäH£äáΔ“‰%7ÅEŠ ÷e ß½ MéCmãž÷eÎF•/c,fæŠ^´sµÑí0ž%°¨óŒñó‡a+™A@—‘EzÿÁí0ê€yÈ2NÃòdš Û”wgŽèý¥è“‡qþðÜÂmŒy¤è‡qaxQŠ!ºŒgC‡ 2ŠDyWFÅ T’̇Qù+=DQw‹1jÑBa<®Èº‡!"ðç2æ0#Vy¦mšœ½£œ0•""Ð(Œ£Eí{‘ç£ô0*EtãÌX£ÚtÙŽí6® ½lDcÌ "îÊó[µ³GÍ "z{”K«ŽdÄE]Æ´kã%%ZG2¢¦ sò_•N½NÉ<Æ1ïÜ Òê2žuJ–ãŸÏG[3ŒãŽb]•ŸÇ9¦‡:'{×ÌKÞ¹AW×k>#¡X—ÁG¬SáTu$»qÏ;7~Y駑̈@J[1ž„ ¡Æ°uNfܘ¶:óÎ_uҤ؆2º]1¹ù´R c¸"uVîê\òΕó»ŽeD²…qË›7n¼ÂìÊXöy ÇëTz€:–ý²ç½…ƒ`·˜Êb‚hÓ²Õ¶ŽyÇKNJ"Qstù¼µ“ãúÂv¦Í»è6¤Í»æ6æÍšxÙÒÞŽ_tÊ{;Ö;Ûœ÷v¼ä’÷v¼=kÞÛAâeL{;^rË{;ìmÏ[;ˆ( ã‘—Úˆ‰ ã™·vxUøî§ä$ûïcæPvoû”œd¿êœÃ”̶$™Ñ——"Ü)l3Œkò‘»’[ŽTrãž|äî.«äwy$¹»ì™¢•¬à1¤x%{€Ç˜|d¿Ëc¢ö KNÉIöæ3ÕWxÙ%yÉ]c׸ä%·ä%wÆ=Ç.õÆ#ÏÇ4Ô`i‘¶a;³“l}p·“ì?ÙKæõ†Dè6mK´ÙQ¸aœ³“l·p.ÙIFŒn×ì${c×ì$»qËN²ÝÉž}d„þ†ñÈ>²ßÉ™}dJ•”fãô¾F+“‹Ì®›”ê)OÆù6¦K¹0 d–œ³ìÆ%ûÈ~Ù%;Én\³“Ì'" ’œdö]ôcòÙ #;Éx"²|¶ j9èÙE¦íÞ•ôÇì [ûcÁ6æm´?ŒÉA泟§‘çcô™B7×4ŠóÍ]ñÒŸ™'õR›­à–Ýc³íÙ;6Û‘c¿3;Çfœ†ä[ÏMcrŽÀ~Ù’sÌÖèìRЇmή±7fa„>k\³kÜ£‡lgvŠíé/CòŠÝ6&¯Øžý2e¯˜† !ûèðeÎ^±—ì»qÍ^±=ªeË^±ßÈ–½b7îÙ+¶>eÚšçaÖi;P(¹Ù-¶Þ[GŸÜÞvĵä½ù“­Ž¦E¶î—ýt¼ð:RÒuL«“·š*ÄÈô""TózšÖY“u›Jê$éxèó\3t™¾Võ5Ù­‘éEa°5#‚‘é0O>ÃÒ1>•ÉåI&`X:ŒÓR5Ή¥)I:—K‡M]^4i‰¥SkRŸÄÒ¯›Ë-Ê2jgs¸<€éx³ã¿ŸÓb7Ê‚ó¤q"˜æ fÓ1™ÄðiÜ`úšiÆ ìÀ¥5C­[ƒàÒWàã7‚IÏ:3´ŸÓó8 “ž¦ÙÕJÏið³Cé0Æ\И ô|‘ÆM¥µ/s´Dv„Òp[˨E(=_§NÛúPz^ÛሟJ_—-¡´íJë(œµ<3¡t|ž¶µæÖ"”VèѲµJÏ:*:•³AF¥ý¢ ÒóÜžÁÏN¥Å=•Ùà>ÙdT:Jnû\5ˆ¥£ä eËX:JN’ɽ-™Iw×ÌLzÖª¶ p i^2óhÅãn1ôË@Žbº›³l<Ü<úò=¢¦òº€GóÈ¿ñhñç$ó«V9iËÛݵ&Ãè0ÆRp«s8`tW¥oŸ®Œ¶]Âè®AFˉ֚'7 at×  £'¹ 1bËÎã£Õ”ø†<ɧ F3[žÁh]0:çI{g0:Œó*ã™n£Ã/ξ”é0šâ£õö)mÙŒžtÜtY÷G†0ºkP†Ñ:[CµnwFë„@Þ\ŒŽ’û0=éê£UNY`„Ñ*¦om™Å£uÓYçÀè(¸ÆÃyÄ Fë6fÅK=ó`´nÏgÚîzÒÞéQ²ã†VSö˜PË~-0´²+Å\iIÆÐÝ=d í3…Ö9ŽeXêö±0´fòxɪ;mÁäÐ:/^fõOÀ¡uç1ÃÖýf€èI¢ñé*›¸ Ñ]ƒ2‰–:‹>$:Œñù^ëN5H´…rDw·’ItWg&Ñ]ÉD¢™“$Ú›ÝÌ$º+™It|ÍϘ÷êÎ H´ßH4µmŒDw Ê$º+™I´´6½þÏÆ3H´(J¼uƒ $º»•Œ¢¥¡µäóqxH´‰žä?Îu‘Fí÷=éTê>T(º+™Q´œ«#^ÞòÁŠö»ŠîêÌ(º»•Ì¢'yÜáÏ,º»lfÑá'Æ·zm<>±h+­Ãf: ü,Ô£-JŠ0Új$Œ¶Î#Œ;b¦.0Z£cGëñ£'91"Ç‚0º»• £Ur ²bì £»[É4:ŒrW*©Î4Zw‹çF/-i”-ª¯T9óè®HæÑ”›2m ŽîªÌ8Ú f­çqªè<Óh/—itØbpTúݵ&óh¥•ü ˜5í%3ö‚™G{ÇGë´e¼F•rHw%3‘f #ÒÞ ÒŠ!Œ¿=9ŒHw%3‘în3i{\Òª>J~WÒªrÐe‡W€´t~âSSÒŽ íÍVù=m!‘ö{‘Ž’Gü_Yp“Hw%3‘îêÌHÚ¢6ɤëÀ’²Õ tW2Cé®d¦Òþ”¥½dÆÒÞãKS<ΰ´w¸ô”~v.í.ÝÕ™¹tWgæÒ]™Kw·™¹´êŒ/ù£vb\ºkmæÒêÙXðB.íÏd}T½5)û·I/~Sì4a µ- I¨/Ì‹û‡`PÛljÁÓ|p<=ÈQ=϶;=µ¯ãÏN¨-R—zÖ$¸—Mq2jƒ`ÔFœÀ¨g)EMUÏ€ŒÚQõ|}@«„©ÅNÛ"•ŒÚȵ°/O×¢öö všÒb§¹4¢6—ɵ(r¬à‹cL»¸‘üPjó•H©íÅ$¥–C½Ç"µ²æL©cÜÄ8^*l¥NÊ—†¨%P7m[}L@Ô]…Q+Þ<†t…4@Ô8B6…)WÖBm>A6ÐØL¨=äÞ¦9B-®¼o%´ÇMgaJ›>uæwΡÑ)$D~W¼¸ÑP«wö±&y³¸iFÓ0lÚB86­¾Qèä€ÛÐðiÄwgBíø&Ôœ½¨©¤êÓäô€i;¡@-G5>•jPwuZÀ4ƒ´0m0íÆL¨© ÛL3&ú«¦ÓÌt:n?V²g%©,Í b –¦ñ냥¡òêÁÒ`¶`in—[°´“3XÚÆƒ¥‡x8ÑE%XŒÁÒP‹¶Xi ö¤Xi†C#Vú^§œe¨2VšbØ+ º>V1mŒ•ŽÓÅôz¬4Á-VÚëD¬´õ8c¥­S+­õñÖv¯+³]Lw{©òDTHŽ]`¬t¼8ñîeÌ1Všº3,$+¬u+=*…ár¾+îÖ.¿z~R›šS+Í¨æ ©ÃаmÁÒL[Ý3b¬ôØÞ¤ŸR›n×,}yçmó…ÑÒÒbš+Åhi­|b:.|ŽÑÒIqÇb¥ã=‰Þ¯ZK˵ž†’ºÓ‚¥íÙ3XÚ8µ´–‡9fKÇL©ïe™5,=J"ºn32VZðxlÛŒ™PKI,œûº?…Xi‰ˆNÕe¨´õ8C¥'qÍ[æêgÔ>d*-elûoÔ][*= PŸå`$µ”¯ÏC@—ßë¡Òƒ¾m£¡Ò6“PZ^¬m®ÄJMØìg'ÔêÕdõT$ƒ¥ã/ñ™>*óÍ„z?³¬OþR–¶w‡ÁÒZqÆSf1KsPÇš$Þ¥F5PÇH^‡½íPG׌ú†?€ºë· ¨%9¾Îm;-jXÔÓÓ¼öÌ(ÔÃ5§×]Oêëèö¼—cXÔÖêáJŒP·Î,ZzЉ»ºiL@-Âè»ç@>­)"fÔ';ñi-½Â{)Û—äÓƒäÂ÷ºóK>=\²Ùu‹€Z½ïè³:xø´=D‹—æˆ#Ÿ¾V–{Ý"ŸŽ†Ìë^·ð¨µìŒA[ö-^šo µVÞs €ZgFÔƒt›·¶ÏF=hXOmߌچ9u<Óøæ&Ÿõ Ÿo¨;U`ÔQåຑõp‰þìtz¸ØJâ½™NûýNw•e:íOtzôö IÓWâ¥-Ù–¶=:=H­5†ôãNwÆŒ§½Ã§-¢Çº«L<íÆOû+ <íOxZ3ÃØb>‰§ÕÎèî†G<Ýž!ÀtLJÑÙ%]†i=¦él»KÓš–µ ‚éîÆ3˜Ö0Ü[T+Á´äIc¾¯Àt””¦x‰w˜Ölr&œ¸ttiø)mo\Úüri¿Ipé®5 Ó£cÁ¥5óîkÛÓ—ög .íŸpé+ÇѶÀ¥5§ÄÚøÙ¼&–ö ,íU KGMáß–öV,=T,=Æ¿ÿ&=K¨v¿PϤãS¦y÷áC¦çá²ÿEL¹qœIOŠ)ª“–AiªR˜ ‡Ñì)â³ò¡ô¨oëYãu¥mu(ígÜ¥;c†Ò¾2”f"uƒÒN<¥ÇKYµE?/ô¿ó²L:>ð±fMÀ:3i_ RÏC"ûkâÎ;Ýo,x©çïü )«ÄŽŸÅýÎ-õÝ•Ì|:uÐtwE„M»1£éΘÑtw‹‰M{;:ÝÛÒÑE7f6íÍœîJf8Ý3œî.›á´ß%àtgÌpº»,¤=ܘátwYH{¸ñ†ÓéáSÕÃ*¢ª‡]‹ª½ñ̳n­‚éwhy¤ß!ãá•@Æ#2ôû‘'×ú»év°’,Ü‘”2¨Ù"Ôì°‰ËE;êM¯£þnR¼˜éuÔB¦ÕÁB®ÕÉ Óê ÑÄ:ju¦ÓQÿMŽî7jtÐkškt°$4:¼$4:¼äÂŒ*4B£Ã/ 7B£ÃÐèègžUiÌÖXhtX9jt¸½Çá]Q£ÃÛ¿,4:Ü¿ìN– ÚéÏò²ýY–<éÏ¢dé°2éèphá S¤ÃKÎthqûého"õ9ð;ÜØúûF¶þ¾Óy­¿_ð[©Åa“%´8ê)Ñ~é·æzL#šé·²ÐB¿Õð[i\é·Ò¸}Þy5ñÔЃÎ+ t^i<é¼Âøoø¯#W':¯n¤óZ”,fú­õ÷….+/¶Òe¥q£ËêFø­4îô[ó k’n<é·â²ÜpÛ¿µÜ=Õ6¼ÌD—•Æ™.+ ]VÜÕ6¼äJ—•%7º¬,¹ÓeeɃ.+']V^ö¤Ë c‘Û° RnÃýV^p¦ßÊ’3ýV–\è¼Ò¸Òƒ¥q£KãN7–ƃ¾¬Ïͺ&·aFÈm¸m„k‹þ¡Ü†œèÚÒ8Ó¿åe:¹ùƒgr~Ùžny먴á…v:¹4tri<ééÂH¥ »Åµûímhø{•6’CÛkAOÑóCÊÂ-h¹3ÑøªAB“æ¹Ö£¿Fˆw9UIÁ±^±æƒ2B¼J·°¸5BL5\#ÄTµ5D<êCmñ$­µ xd© *¢Rjc”¸AŠ÷…ÔI·Im˜ (±”;÷©csB åN'ÄÏ_ˆ×".>é°I’»tØdÿ ¶Ÿ‰‡M|xxW&ÅwJ<¬\XѽEµ›xØ´Bˆ‡M›Î¥6 MGS»-cj»(Yµ«v#XµÁªÝV=JCe(b@ƪÝVMQFcÕ^¬Ú`Õ½ñÌ3Œ…UûÏ`ÕvûdÕvdÕå4Tm6j¿(Hõ(†Y©ž‘j»*@µÙÀ©;[r•½B`j¥¦ì()µÛ@©­BRj7‚R[k©ÍFm6 jo*8µÁ©½©àÔn§¶@NÍÆ®Ý»-Lý¿ý÷ñEk4òãüð_ôùóàzŸ®„‹´_&a‹éÊZN÷ÿüôÃ?>ÂЖ—Ÿ^þíŸþø/ÿû¿¾ü?ÿïßÿùåß_>þù‡?}üžìˆ¿1-†z±ˆåœ^æ+à\¯axŸX ž€±ûo¹w½¤Ã¼É?¾|ïÿóÿúåÿû/¹ùßL ÉÛ¿†Ü%8²ô·Ÿo¹ýø–]çëp쟽ý?ý¯ŸþúË÷ÜúÔÇÙóõžÖi;û>ÈÆ·ôÁvÏÿ\¾Ðÿãïýåÿòßþõ{»á[ÔnØë"_t_öù•× ßÒk|bI)¾ˆŸ þÛ¿þ÷?ýï{ ¾áàïû¤rËÖß6¾åþ7åíŒ 7ËŸ¿ÿÿóÿø—ï»÷ï| ´’U:×øþõ=ocú隤µÒNã¡:–m¾þøóýÇ{–½þÁýÇÿüá?ÞÒñ¹=ù-˜>7^mÌ›6f¿±ÒçsÓnûw¯Tyn×r³õo[ïІcúКðüå][pÁØÚ„ú·÷í…»ïŸPÿ6åƒÎïó$Jž¿¼k ž¾/ïAù[׆ËõÔ÷çx\Ï«Ž/ͽDzŠ=Ç'[ŸÜyXµRwÚ»ðÆ%.~Ï¿áÚsþýŠšökß<¾ñcÌ‹_]Óò†šÎUªåáQí—_ú•5m^ӻ̤mÿÝÑ3g¿ïLšïtNkéwûxHëN‚u×n{׌l|Ÿö\Èñ+oŒïÔžE™Ö¾)ÏïïÔ %hPÒÝמŒïü¢Œi³ã]ÆìU»Y6ã}ǬÚãcíyç1{µ§Y6åÝÆìÕ ³lÊkcö+¾]ÚÃÚf}Žã"Û>Ÿƒí?]ÃËz¹¾ý ™k™©ã¹Óò…ZÖï«e§sÜѹE×uµœ¥–ñ;oF‚™b™óë´/Tów3]j‡Û´Óø…j&¿›ß¨âÚ"Ü×覞ñL>,û÷øÊJx¸køËOýôéÇ_þñ㯿þúã§í—ãoïÂcÎþp kDHj Ý'ÖÂýÄ ¥ïô$´Ø•1q/ÂÔó% ÔòRdMþY±•·zž”#¤CÛÂoe¬»|Q ±à˨Ëõþû|ÊŒ*¾9Xjí>-pqV€å½¨/wô•ðæ8_Ù8žƒy?>h/ „YÊx(—6¢¶X·lUyV˜eù1Ù›(ÿ¬0˘5."/ctHë„1RÍÛ‘S çQ±TË Èç-¦åƒ,cÝvœ7%ž;×¢V;_‰ŒÖ{Û%ƒ.[ƒ×æqk»Úóª]¦è ZR=¶Þ› óª•É<·:S¸D ŽkÒ“ñŒþÓ®„ NIÁbV eÙ[˜…öSöàY‘–%|x¾Ð~Óˆ¾8lÙ®š…ã¶ü޳¶Ç'RBÒqzÍõ{ÝÕ"ó¸[ýT§Ð¥xEÚñ§:zYöræE™Bö³¹ÿÿ̽KÏ&»•9¯_‘C{P§ã~™°0èZ3ÃÐÄ.°ŽPêÿ£¹øFëYÌ”ÎÅúÜUªRæ»3" cóáÚ{¯ª{ZFF󪽛iëúy->£¹¼?©ÛàQ ë2x>£¹¼]÷²¨Üg,6ºe,#ézSl­’Í—áðÍ*…¸žMؽJæü«2<>C[Ƶ4þܟѼVÁîþJ‰dl³«¶á¶òÿŸÌÛëZÞ‹2x>£YÂËòº·§¼.}Ë_ÆË2(—­|1÷g4—¦_ót•‰ä9íÖµ$júÝó«ñÓóîÕWKÊ*cS*ɸXþØÚae®ÿ fõ³¥6Àn;zzzeúmzJ=¤.½žö(c²g/[kôÏ›lÇ^Õ ók»4£ÔÁ,eðy5Ù¤ÆLS È(Šø*K–…ìö fí)—OÁ[Àê!ãÏ 'xffÝ2ï6íŒM3¥WEùh_Õ{ªUzú˃qoû‹Õvm­`š^³§Ì£.9•oØq¼ñz{ÛžµÞëY%ÿÁŒŒe@æõ¦vª‰Ä_Y¬£ÌÓo=—ORüw6VeÏòéZÚMG­9ÈñךEÍAÖ{¥®Ç'¯…Ìšƒ¯ë§ˆÇ;Ç{W«ä5Y%1Ëšä Tª…Hšƒ,¤>²Ïç¡Vƒl²&¹ò!y=ÇZ]´9È1nŸ—íËå¦ïòN~ÞuÕii’¾Á¶+zöÉeqwôÊæµœgóE§í->R åÎ6—!ÿ–š­‹õæ×ȼž±–¿iÞñÐu=øs¬òúšwS¤äUMVê¦ÇÉ©ÂÚæK‘ºõÔŠÏ&YN5¤©yÇÅx*zí E©kÍ;Œ*×ÝæäÒkGq‡Þ\Œ¯ :_ÿFÙ¼c;n=d¶†nž6'ûdUH/›“§©'–¬)›w¬±0÷zð2ÞÝCŽ>P%÷É&åk-à‘µÖ,”ÝCŽi ,æ¶î!s¶R¤ðÒ=äUÞj«Ú£ã&Ñ®Èr´j¤2^ÍCމN Ìæ «„ív·¾5 º9ÈåÔóÝ9…R·ÈY­j0mNæ˜SG–Ù¤­Y¾}ì}R¶É|“³¹Èj·$=Æâ²5¹FËž­x¬¢É›`\W,W›_×SÆëüÌä*ÏþdòÜÊZ®E"èje-Ó8‘b×·æ!ëk5_mÁ^þ]qÛy(ÜùÊéöÀ|Ùš´®¥çØ„iš‡œ_ašæ!çó¹È*uôÔ¢›˜ÍGÖ|÷42ÞÝIžÅbZ÷âó÷È—¹§äÜÊznïNrôxYÏ­ÝIŽodYÏÍÝK.ßá2[?\B¶»;Éú&”=ÔO[ígw’ã™”åÜÞ½äèÚ]Îæfî… ºýê‚úáÍ*Æ»;É1èöÍœîàv(š¹ÍÊ1 p£#Þô² ›»—L?é³m‚Yî·evø¥NrêM"ø§°Ó¸#-篼? L]ƒT{g`ji¤¥x{½z`j5\Ißu’cjMÇÔkS×,š—Ÿ×˜ZÔ®‡ûøçN©5í•!·½µ3A©3¯1(õgª-Ž@ƒ°F©5G—;ÙÞÐsPjMý唯£H-›Ö[¯3H­Ê|tgZŸ¢²špH™ ©k²‡³Ï>€Ô‘ÕŒZ_\ÏgéŒZŸê2ñ·ú¸`ÔúÈßrPŸx%0jìÙC*XÇðeÔ™ŒZžÊö‰¹zÈ®1ê{^†Î[;Œ:ÖQgt)u†Qgp u-©xÙ¾‰3êZRQ3ú“bŒZ~ã]Fd#4Ψ·Õ³Üïô F-cqM¯7=3u–j£–qvPïŒZ.ð±ü9£–±ÌasÛŠpH]«<–™Žf³Úõâ§¥ƒÒâˆZžüõ<„ѵŒå¡moš ê,ò舺®|gÀµŒebho³ê¨ñ@-[y”WGÛݤæ{c€ZK£KÕßÙʵŒ›*ß½+{ÔYÒÞuVe ®Kµò~¾ŸAê¬6 @]+G–õE«¨kéÈò’¯ï³pH½ÕIøj9Ñ©µê<Ë|ÚP»CêaÔ!uV‡¤Þ>q†Íy¤îõ‡§£¼#èt–ÎJŒ ÓYÝtz4¢ÂN]޶4¨³¾u­CWü§F/Pg}]êZûní~Àç+ª˜Ž7K ðtÖž›—daÇÓYÞx:ë0OgàidÞ…;ø´8Ïí;¨«QI¤ÞíM'Ô2î¾u儺îÖ;PtB-£Pz#N¨G£!jѪ[{ÑOz j·Ó\/GÔ£ñðÚV‹jÝ·°C0êÑxzÑ4¥OÚúÖ±3êx³ yƒÔ²I°Ú¦‡Ôõ¬å†Û„åZÆòV¶zÅ€Ô¢„¥ûÇõ##ŸtüýV1£–m/Ÿ¢¶C{¡íhl˜ZÆÒSÇ›`Ë1u¥•[OyFL­`rßM¦.Æë­ÒñçÄÔß1vN}k^®Þ°¹sjE·û8u1ªmV§¾Å˧å}CÀ©e,´ À©ïò¢ùö&8õ­R¾ÅÝ&^`ê§u‘ 0u1.¶ÙJ}+=ÐÖªN“Rç©e+ïysuA©Ë_æÝN©CyCL}‹Q–®{|dbê[Kܾ‰EL­4¶ÝÔ0õpcêbœõÑ}¾ÄÔÃSë´¶ÛL}×D7­Z71uéàËv”‰©‹qwL}×È·éý$SãV|‹7ñ1µRùÙ¾1u1N{ÏûGL]ÓJ\­D81õp¤cêrdY–Lï7˜º/Û#§Ö5Ä©omä§Ö /ÿï-áGP}ëƒÞ¶ÿªoncüÕJ’o úÖö™m+T+_‡í Tëȹ'T$¨¾…ˆÊ“~)Õªo»»%Ž"¨V¢GñàÔÃΩ5ì[«åN=蜺5½kHrê[@¶§N=SÇØ9uÁ©5[:q§ŒÎ©oñxçàÔƒÑ9õ]¼ëõ/È©‡#Tãj»™ÕÃiTËè$ z8ÒAµòˆúˆƒj}ˆl§“¤ZDûo}ŒNªó¬ Õ·ð^ß—m¤ú®Zß¾©T­ƒ–ž”¨z0:ªNk¨Z)V`U:ªÖü¾÷WËBT­\@gÏFJT­#רˆªoquƒ¦@ÕƒÑQõ-ŽmÛ@ÕÙ  êb<| ¨ZjºƒUË(¢8UËÿñ] êÁè¨:´¶dÕ2‚€;«ŒÆªÓV=U+íÓÒ3Ä’UßJ4»´Õ}eÕìÐßËû«cZkú¬€h½jæ];ɵ.ÆCãè™àA­£¸©u"IPka¶Å¤ÖºyÓ0{­¾Lw_ ['ѶŽT›ÄÖ J­#ý!±µ„SåyµU¸µðî;+ÿ9¹µ`å¡w-³së„„àÖR$®s—G‚[kÏù¸¾/®Ö‘el¶%¸µìZÚIpë¤hàÖbve0·Ý pkír+3tCÐέL½ÜZLIUÖQZ-›Rã4ž l­M8I _Öl­ŒZ½`%¨õ¤h¦ðµN€j=ITlÊ{PëIpokÅ7H­å»/ÇÑVV Ö“–űza8¨uâ.PëÀo€Ö"Z÷Ü7­­Ê7ºmÝZë’óÔ÷&­£Ü4¡µ>çÜò“ZGUSbkMDå¾^† j-T$IÞ{—­@9´ÖÌ'œònÎZ ˆ-WËLj= 9_g§ÌŽ­£9µV¹÷ øç¤Öu)yôùÔZiÇËkÝö´@­‡–µNÎj­%oicgáN­•#p§ÖåH9ÚmoÔZ§-Þ[SÖZ—ÓjÜ6 ©u4ˆÔZ+ƒZF"µ–—ríóBj­øùMÀè:Ï v=)"|j‚d×Êy\戦É»¤Gv|ìZÈYš¯‚ÜÙu<0²ë8+Ùu4âêûÖT¸¿eP;¼ž´V)ÿy&rˆ«Åß\éqõpFW‹¢­æ}¸¸z8«‹«uÖëì[wW'›ƒ¸:8ÄÕ‰¡®}ó°È«uVÓ%R^}KÑX¾˜2»¼Zõ<úòê@lW'bƒ¼z¸¤Ë«Ëm.Gßü‡¼:¯èêꌊƒºú–úòî{—PW7iòê[Ê뽇.A^ òê¼IÈ«ó’yuÀ7Ê«¡Q^-ôé PX3…õpI kÉ5÷¶©Kr͹.nÉÙ+y¹Í p­ýÔ·ƒ ®ÓpM6àº\ò2éÁ5ÛCnM¨àZ`£|$Þ/Á5bkå]œ»çIrMnè:ûèº\³¸m¿èZ*Ù'èZñ5Å¿~EiD×ÁQ‰®ƒ†6t•è:"Ñ5*Éu0=ë@–$×;I®ƒ½’\g×€\k]4·À× ×’ü»†äZ~i×¹Výz׃\Gç‘\}MrMT r­ tî#º&½%¹Îöürthè:Ñn k ô_…®ÛõÏ@×dŮٔ_î7Uty‚t“]ˆºf+]/.[OtMZèzv}¢këÛï$×q'$×ÑX’kvÁu¶à:ú‡àºù0µÎ–‚ZgKZ‡Ô:ZšÔ¤ÖÞÒ†¬£¯‰¬³!@Öå_–Oÿ“ø4‘µÊLõ,"ëèN"ke×íq$Ö Í´ ë8À:šJ`{ÖqÖ’ì[¨u< ë¸ël-€u9MqCÎWë `½V0Ðâo¬ó6¬cS‡À:oÀ:ëÀ:lÖ‹V- ϰN#€ulÍk¶ñ·ë_“5R¨®Ôã~ýºT Qø´:加Õ³[éÝÆ€V+.bš@««7Rœ¼·‚VKѦïò´zÒëXÞ†7r´:ahuÊÁ@«UwNß»Wµ Zh ´ºOØZ½ÒêD5 Õ©Í­–ŸëQÙ Õ y@«“~WË8•õðë‡;®NŽ\]ŒšÛ®–É㹑 $áÀ› DK€cêBa¤IúT )B*X¤{&Ô!Hb dŒž $âÈ2=H‘ $Õ>È2=ÈpZÏ2é™@’ H’ ¤ùޱ§Œž dh­§Œž d8­¥É0RHå¡ùH’gE*ÁøI2œÐSdS $´RL’ÇS„¦‹©@†³z*ᬞ $À 2¨àb4^"‹l q ’ÜŠç?›Ì¹@È HÈ̘ $Õy ¬‹ë;]/æõT ‰¤ $ŸRd¿"Hr'¤É‚l ƒÑ²|ÇÖ³ä£D6PÚ°Žn³ g°Îƒ¬£¡Öó¬c$XÇã °ŽáA`ÏŠÄ: ÖqZë8’Ä:ÞëÑhÄ::ˆÈ:úÈš/‰ud\2™5Æ™u¼@dÖÑGÖÉ‚Y“÷YGßYÇü2dTÈš²æ\bH6‰5OšÈGY+ã•åƒIdæ²Æ‰¬1[%²&\dF koÏ€¬qI ë¤ä@Öy$uÞ&uÈšGYÇ8±NÍ0ˆ5%±ּ࠵ö ÔµÖTo‡ÖÚwPXßòí:’pb[“$Ö”Ó±æ6akîL±N#ˆ5õýA¬ÃHb­âQ‘VÇmVGcH«óHÐêèÒjo Hµ@©¹, L ¦ÆžlPê<«SjGH}U¯å-ƒ:£Ûƒ¨õiè)-Qs÷7u6ˆºÜªå#¢¦_Œ:Ï FF0ê¼0j.œ‚Qco8uØ@¨¹Sˆšë¸@Ôy$u\„:l Ôå-÷^êl5ÃaRÇ%Á¨£[Á¨ãŠdÔÑdÔi£ÎÓ¾˜:F1ut 1u©¹À%¦Ž¹˜:º†œ:àÔ*DP%¨ŽÆTÇ5 ªÅôÊGñ•ÁTsSÇ+IN³9uL€ÕäKÕב šÑmª•ž²'= PMlEP}KcØzœ:NJNSS ôÁÔhþoËò+êWýštÕå³'ÇéÄ1]µVÄsÏ& Fý©êÙÔ‰dÔ³OÏìFªO"êEÕž’ÙªS…lÕéU"[uìs0[uñÁÏž‰ÉªC^ÏdÕ—°Ô´µµ7’U—eÈú|?FOV­¢ÒåÞºðÙ“U_J¶eYœ‘¬Zë©£—hg²ê²;6ËYdÕZÙIÀÚt㞬ºWRy²êK ñÍ’g{²êÌŽdÕZ¾ª¢{K“á„ú¬Eƒ{nÜ—P9¡.Æâ÷õu¦¡VùôÃ$㎨˲ÿzðÇæˆúT…Z—Œ;¢V9wÅ¥·ÛwD­ úëÒˆ¨ÏEk4“;¢ÞPKßä·@Ô*>¯t-E†#j•­Ÿ,óu1ž’ƾIv€¨‹q—Œÿv@ÔŸ*cwK>âˆ:“¶Qeª|ílúPºS›ƒMc™¤zV°éb<â’¾rI°éãÒ ÞÓ28›>N €.›>äs– ¢‰˜M˨ÏVÓ";›.§=Uzicšêòs™Qz¶°i'Ëà 6]ŒÚm,ØÑtÚŒLªk»p ÓƒÑÉt1–ѯßSSËx>8åct4=Mc™R{T Øt¦òœÖ‘‹Éð§‹q>,‡9àô!?®L×M÷쉪âŠL–`tz8Òét9rÙººpZмy§£ÃéÁèpº\r¾,§…Ãia\OÏ8NF‡Ó:î™28=äpú;Ƨ‡3:œ.Yo op8=èpº7Ï@8=épúÐn½gq8=épúÐ×ÛêoNgƒ§ó6§¿cìpz0:œVkÛœ.áš,êtz8Òét1Ϋg.q:=4Èét¹æ.IØ+dx:/ <Oç%§uäi™T>xz¸˜ãéá|ާ‡#OFãÓå¬GÝ¥ÕÖΧ£óéÁè|z¸¤óé£Rµ^€º'{iÔºæi8õp¤êï; Ö´P†Éùfßp@=è€zhêb\<Ý u™¾Äšƒ@­k^´@­ÞSØ^Ëêí€z¸¦jÙÊôÑ¢¹A¨£#êÒØk9·Î Díiˆ:Æ+u "êr¹cmEÔT—×½‡HP—ã¶µG‘PWtB­+Þ]{zD(ªUqi»zÖÅÌÿá%¾ ¨Îì™þã*ß²ž'é?Náà« Ç‘þC…—®{eú³tÈÙ…mÌÿ1{þâ]Wà1ÿÇ!bÖõ.Ìÿ‘FÏÿQokk âzþñwK²ITß‚#ˆ|¯eè @”Ei»[)GË ‹‘DuAÎeê™C<ˆŒ“rO©Ž‘ÿc8Y«U›ôniw˜´zëcþÏÁªår¨ºˆ¤ÕÂÏ[‹£‰¤ÕsÇV=äefÒêâɆú™´š]À¤Õ‘¹™Y«# ³VË'[{5'f­^•YçlùÉ‘µ:DÖjö:³VG÷0muôú›¶:óK#muôÓVÏÊ}ÒRt i«™Óši«/åImŒi«³=H[]&D/ÉÉ´ÕÂ@k+9ŬÕÙVd­ŽK"ÿG¹dT}×ù?TŠCð{¥‡ÍPäÿP ò$k€œ:£y$nù?†öxþÁèù?£çÿÈÞCþ5V¶¯ÜÜÒŒ¦žýC½S^ÀöÅBöÁ虫õDÊw¹B|²d¯!wõpFÏ]­Ò'åF¯ïTVl–¹zhˆg®Î$êÈ\==sõwŒ=sõ`ôÌÕ9V‘¹z0zæê|±¹:¯‰ÌÕƒÑ3WFÏ\­Lò×|´Ð)d®ÐÙƎ™«‡#=su :$®Îꉫ3³½'®VF/1®–õÙWçY‘¸:ÛŠÄÕyÉOâêòëê僑¹z8Ä3WGzæêÁh™«›ÁêÑh°º¾Þ}9‡Õã‘«‡b«‡wX=äWsX=i°z¸¦ÃêñH£Õã‘F«k†µÛ6ØVG­r³9­Ok´zˆêtZ=^³Óê¼$hõh4Zí­®åî®#xiõø»Ñê¬ Z= Aµî¨ç5Z=ÞƒÑêì6ÐêñH£Õã‘F«Ç¶­Ö_Ž©«(@«G£ÑêÑh´zè<§ÕõÈò§õ{@Æ#VG®d¸z4®¯i¸ZÆâ÷²eŽ«ÇÓv\ÃÀiõp#Ž«‡“:¯Î¤¯ÖÏ¢SMÇè¼z4¯ƫǫu^=Ú X÷`Àz<Ò€õØÖCŸ¯–m9×uk¹1 Xgëáù;²ž¿3ëá6Y·éÌz¸M‡Öy›Î¬ÇK´4f=^јõ0 Y6gÖÃM:³.éÌ:Ûú ëñCÖÃÃpf=6Å õ0 Zg5h= ZFƒÖu@ªºÜ÷ G:´ëÐz¸M§ÖÃ5ZF£Öã5 [×ÊY õ¾¶F­›QëñŠWÇ­Ç›4h=¼é­Gc§ÖƒÍ©õÐV§Ö£Ñ¨õ0dáÛùÛÒ€ü©ÖYœ÷¹´®¼%å§CoÈç…ÑŸt&úŸ?þú—ßÂÇË“¾¿AȽüHÈýÝÆü&(ÿ+/º|JFöÛþŠ‹>wúùãÑÇÃ\]^|»þó—/mÁûxÛ“þÚûïüóÇ¥o¹|Uï¿×þò¥-xÇyòß¹úç|ú­(St™›æ¥:ßÊR]åŠ7µ”ïSiÐøÃ“lèŸçoÿ¼—6ûÃÏßþË¿›þý·ÿúíÿùŸþã~áeÊ'O›!ó|Í¥¡ç:å*ï…J»~ý¥vU›Ÿk"Àó—_jËK}ÙÄñ5³U¼­ÿ¦ÈÝKù~ÕW¡fÈ×¶¸8ÎÐ 7~M{*'“Èþ mB{`ü¢ölwu«‡¦<¿Q+´Ü~3^Mqã¿(«¹x_2fëÕc̲_;fÕž³hÏÙÚž>fÙ”/³µ1fÙ”ïÙ_ð9Þ®]j­’ÖòíR’²ÄQ)ÔñÛ5}Ûã瘗Y.Õ!. ¹³¬hÿÆeößy™c®5žŠ¹¬†ËÜïeæß{;uiU–¬eéºü­ëüÞûYTÞö:–s^æ¿q%ïçï8dª‡²wYìŸß„Š´ÍS|ëùÈkè¿êþÛŸþûÏ?ÿñ/ÿöÇ¿þõ¯üù_ÿò¯ÿó_—+óí—…ìÆ];OÖÄ:ƒŸ(e´M½nÓ^«ú´]˜å§Ó8ϾL?Ë(ó}Y,DV»@W/+´×ü”MIZüZ•hxòaìËn‘®Å8m½Pò.QT“èk¯wëY;öå²xÕ]:§^ŠlW•¦&´×Ö¼Ò½¼§•:¤‰éËòh·"H» M/¿©$O¿Íu³R¡6+w¾+8ºIßW%«ì©5Ë,t-½5÷˜Ì}½-ÔSºëöm¶`ÎEÁâ­–OéE ×\T±×.ÃÊ"2…ïo÷›ògß º”€Ì ¶ì’@4½ø¢îèµwwõe“„«ìýÚUˆû>÷ ÉE©3»švß‹ƒ\ª²§%Ù÷­†:?ïͨ*N–½ïgYÔ*ÆR•§nQ‰š­-±}ùѵ(±üã{UA¿#Yûo¾ã]ûûM=K–Ó3×ïÚÄo¢çºÐé©À÷ã´@阬òß~\å7W.Ú5»‚e›>y0άW^×»—¡+£ÙÂñ´c•÷s·°;iœ¦µU|ßÏÃ"ë´ÿcõvín7Y°.ieñví`7å¯N{\¯¦d¿f ÓÔbÅíÊ+dQnR#Mg+2_Fƒ²éSµôâkeZ³X5É‘¬ ]é( G+ÆÝ Ø•WÃ"ÎæÒ“÷Õv÷Z}ïǪkߣ†våîÞÄ\7^€øUcy-®x×îlwšçŸ®Òï~à® Ø7¾KŠ™mjÀ~N<£˜7p¨ÖM›¥7*Ÿ¨wËü˜ ÄšU³œök–IËb­¢GË¥,žª4è\϶?y¨V›uÍ⽕Žé²¨(IЦ£mÍÒ´ùX´”äQÞÊéÁMÒ2–ùø•±Úeœm$_–‚ºŒj‹SÊûœO‹E’^rê<åv-Þh©ˆ¬åI8j‘ºw$/j]—LËlQCõóUžØ£<Êez`Ð"!E7:To¯OÉJµÝcœJ_Yx¤ŸVËéÐN[Ÿ’9ÓÚMkS²*OÝkËr”o] Ä™LØUm«ÛH-­ã3”þ¯5Ú[׺æ¹ùò‘ë!3ª×¥”Ëï\Ï£lÍÒ&>bç£|ãzä‹’{Zªõ£|äzpË$ñêÜ‚øm0µYUíUëî3ÍÚGjs² IÈøLV‡¶ŠÚœ¬7¢L)í)n§›¨ ›¥Â(‹ (Qcµåø|Ê˨±˜‘rdY ´¨¡2ÿX\ˆúçÜ^~ñz,òCZ†2ö^ϱïá‘q?-Œ£êÔ?ûc¼,TCù¡·³%Í9´SÒæäì m†´Iy’xSýòÙc)‹ ª(ÆòÉkUÀÊäfq“†ØÒ2ê5EÀ;’ÕA}ªÜ(ÿ0éKÜK:– ÙBô¨.@/ÿª‡1dc±ÆSc-=I]ªÊjù™©[0›ØýïÅ „® å64Ÿ¥^†odWó"à€—7fžzÞm8àrÝ%Ýx5[pÀS×øÄcÁ ×’aÌyr˜Ð<¤ðÅ%“W¯¼%.á‹ë ÷Åu¤Ic¿´÷µ¶ªäôÅåZ§óͱG_üRÎÔ!¹»â¬HO¼ø‰“Ôç/ë„'^F¿|Êgª¥#~jÀ·ð9øáçùÓ'mòã¨Â—:ôêtíuÄw6÷ú¦tĵòYzqOx⑉ˆž¸ä¯Zj¿OžxiVy-쌞x¹~™Œ[D*=ñC {(zâ O½Ë'y{O랸b;7;«;âå…Ü÷îÑ/ݦôŠm] G\ƒZ¹xÞ•!<ñUƒg«(KO¼4¶4µtÅÇ{¡'®XÓåh¥îé‰ëE·Ø!zâÒ€õ›ô.·rW¾ìž¸Âf§öi£#.=ÿÝÒÒW ‚ÙvØà‡¯ ”í±UôÃå\XŒ9üpI“á‡/ò<Êòðr7¼ÌL÷67‡™nøªAÔ’á†+ÌáèÉÊè†óRôÂcÕK/\”Dòu#HÁï2ãöäàô•뉘è…+d¾œ÷§è…/Jþ¶”ÈAÁ%pnx‡N¸Ž²§®Ð‘^‰>8]ðò]›{AŒ à³ö–º‘Ü·ò~¯=m#=ðX‹ÓW¸Î.iþŽ®< VL#8{¸Âà{f8à±×G\»„Jfø|‘_|Qq+GGÿ[ûjW™üÞ˜;àÚ ìµ¦éÏ vî¥Rèç“€ÿ«súßJ­`U-ék+ÓŠ¾ÒÿÎΆÿ­Ð²§¡p½co¾wúÞ•kÇwô½cŒ¾wlžÒ÷Ö‹`…hé{Wk/ùÎwÛ6‚η¶²$hþµ;ß±•IçÛ·Ãó.^Sï—‘ž·Ë·«E;ÀóDâ&n<ï2Áçä©AðeMÃíV¨g›+ýny¤S_ÜÒñV"ÿ @ré³¾½I )?QDxy^Í ¦ü$œ<ÊO¦ôŽ·|¤ínóJ¨O&¥þ¸Þ%j¨Oôˆz*õPŸ(‘Îj®µûÝ«òÏ8S‡ü¤¼Ý–ž%ä'³¼»ùÝ ù‰žX¢ß= gèO}µ;Y#ß4ã5}m{fÎ`àzå{mÒ`àÚ¯X8 ë-ø,uÓëF¸ï=kÕÒ‹–Ç·ðu½g¹u½ªXèOBÕ.Ì„àŠºê¥šC¢Jõm'4¸ u/Úïüž%°2¸ƒØò’ |Ñ,nÞ~0p—;S”I*¢þO¢oÈoÎPáx+|z*ãìafÀ9‹ÒñV”š>‡ïkG®(ì»ë¡àxOõ}·Š\+:Û#'é ~(IÐõ–.^Ûý=J â@àqÖ+Ãõ<|òï]¢mjâòo§»Á¿cCŸü[3ÓÚ’+ÿhLþ¸‘ü;ˆéÀ¿•×êªÁ¿Cê@þ½+ÌþýzþªNü%þæNÅ€¿¯ëœ^}bàoe¸Ûîaà︓Äßx(‰¿ÑAÄßyZào}Bw®G‘«ÄEéÊgÊJþ}è‹õº'äßã_þ=Nþ{ ày>ðU¤¶>p™¼ôcƒhG{À“Ž€çiÀƒÄ&Ç ¸Ê†}áFNM¾hÚmÛäÀãNÀº€Ë_˜—î-‘OÎ#AÀ“ƒ€s“„nxlJ€ÇŸ?Þé5ºáÃÿ‡—m ¼¸ÎobÛôÁg¹‘G[„þ,žZxðÁ•–é:šNø®T³éN¸\Õrö±ƒ¾*½@§{ðÁ×ò¼,§på¤ì•´èƒ‡L>xêàƒ§v>x úà¡U¤žQøà³öMÌ<\)¸àÊ(zÍËöRj÷À•¿gÕ$óª¼Ý/–w«“B¼|*T_çÍŸH\ŸäÒÀ&‹p\ßò˶#àÇÎÔë‚ç<\ð“Áéƒ.x|–è‚Ç|F|Ò¨Þ[Ú[úà13Ó9‚>¸6F¦^>xl÷Ñ µ]èPʰzõêpÀc“0<ðr”rª½Ï‰¸v>zðÒPQÜæ ÃW~‹8Ã/_\åÀ;¿£ÏŽ£ÿMéhøßvûô½¹¡¾wœ¾w™ñ®½ ŸèzϼôeëãzÏJw;_ý­Ói€¼"+xßÑ_ô¾c_™ÞwÈ(é}ãù…óÂùV¡.Ó9ÐùŽMy:ßè´ð½'éoZyÎp¾ûó Ç»xÄ×ÝÖòáx§Ž7G8Þ|áxÇýÓñŽû§ã ¢ãÍΡßO™~w6ÖüîEõ0ž™~wœ•~·Êõ­}çããwë|÷Ñ*‡ß-1¿{<Ðüî¡•ô»£¿ÝïÖ‘×mÃØýîzZ«~·ü€žT~·ŽÜ­r ün—­,®‚¿»6h[mWøÝ2nkO ¿{<­9ÞµAÓÖÔ”p¼eœÌƒã-ãlÅáxËXüµ¶¹ Ç{QakÛ¤†ã-£R|=uâàwg5¿{<«9Þ2–y¯ï€¸ã]O;õ”ÜîxguÇ»žõêÛ©Ç3Ëe¦ûw„Aÿn削¬Ý)ó‚YÜËϦÍwB0åQ+ªç•[B{¢À2å¼E€¨=Ñ\]¾vM¨ ÷[t:Ú¬O÷ûÞ…Ýû9Ä'zEËbáMgIÿ;ªÓ×WÐcðà€ßR ï[ÃfŸO¥Ì´o­=:à·ißÕ¢~‹žû« ¤~¯5vÓÑÀ¿kžß«£lwÀ‹±Üãt~/SþÁÞk„ø¤ÆYoÍã…žgu¼Ø6ÈpÀó’î€+©G —±¼ƒ].òqÀoí­.}\Á/ÆòZLM&ä¸nï6½ð<+ðÁèx^ø`t\9\Á¹Ý¿µ«w´ÔÎtÀóH8ày$ð[Ræ„aF`úày »àCsÜ/]0yü&|ðÁè>ø­½D´t'|0º®Æ"òÓpõ…áÑÏ[#~kÈ¢MÝÏ΃'^ÿ¡”~<ñòëìñ ðÄ£yâÅ6y¬(<ñ8žø­ÅùÑR3ÓŒîŠÇ-Ð^£+>4È]ñ<-|qÝçÕŠ•Ðϳ¢;#‡îxéƒÉw÷áŽ_Ú†/^Ó9Ý£;~é!XÐ(ÜñK›=÷~~7 ³ À«|ß›Ìîø¥÷¾¬5Ÿ”|ôÇ‹q7fB\:CÛš"WkïÀ¿EÑwÊèÆ~v|0lÎÁÓ|0:ϧ>éþø`t>ƒãÖÉ1xö70ø`t >ƒFçàƒÑ9ø%EÙ¶÷ HçàƒÑ9x1ÞùD>ƒFçàåÍÛ,Æ ü–ÿm€ðK›ìS„+ýz/=‚)ˆâÅ;÷|ß~O~Íòû·rÿÅ¥.w\ÿôçú§½Î²~þôÏ­ù½†üƒ3½w÷³Ýñ?ú’Åw('ýÜhûËú…¹%«NäiÀûç/½¾Š®µ¼ùÊ<Ýþyòï_¤Ïû²ü–Ÿ~ÿ4àùó×^ÿÓíOž¿|¯¿ #åuÕ½†²z(ÞñVW¾w™»Eæþ׿¾,n[Y´– ó<ý—_iH}ù ®T‹wìÒH^ç/¿ÒñeI6û\Ù'èðy'寜+í.­†>íË> ‡Jœ W¯?~Íõ§i¸zùék>‹e™?Üûçǯ¹þ:^ý+fFæSÿ&}ɘ«×~Ưþ5c®^¿Ž9^ý+Æ\Tž1‡«ј«×_Ç«ÿºoáQ릖µÀz*QE”O1óíoežüõI y™Y‚ëûZ÷²äû_™à’—)ÁÊJçž3ñä5’ýIeà}{ÿW&¸Œëì*³·ïkY¦þò—ÿÇš¿iW¢,Åþð/vÕ–t]ÿ³œßþÙ®i­ñRË+ˆoÊâYnôRµ«ZjhÊZþïq0þÏÿôýßÿÏÿ;´gû©,çŸöüX$¹$U’0~×ø<–âR —ô›ÿö—OúÍã‹Òoê°²Tÿ莅¦­,»?áãs|þ´mmKdÙVà[q8ß„„,¡ü E}~6‚ßå·LûOŠÃÛ_ñ1IÂYºmý&Øü‘\ŸÕññÓ!¤½íú8Þ½lÖ0áÿVgV¶«Wa•í­$+Ó]7 ¦u*³Â¦_Îø±©ì° ƒÛÚËÛ>Õ½©š2¡\¥ÜB9n­=¨rQX®JaiGüé©ýü©ÿ•F´tè|ŸéžžM?©Œçn»êÆXÝ—Úkåܽ ‘:=5zv¯¡Tl‡ÍÛU·ÜÆÛ‹zeµR\•oj7«²å©jâÒ“€.åл6ëÚ6ŸlkÝd«ûjjâexleH<[–jb«…(›vò>eÉÕÂíè=rÔןk:¬íÓÉvô97-~uÑ'öí mUJ§õ>çãê›v»*"¯Ö#ÚÞ>CIûŸþÚ[Kk©SçTë Æj´ó¬‘NÏXU¤Œ½Ü§öÆžü«º‡¶c(ÛúÓò>•¼‡së5…u\+Ñô±µ!¯{¸l•·½•Ö9Ï^9yQ¦®W½¡~i»{²•K”§YíÒ+-É¢Œ¿fSGµC[ÛKUKN{3λo9ê¸OÃç ¾¦2Ñì}Ì– l*­¾ÁÜ`#®¥ÇtÈtœ÷à![»5Ârc-×Zë?#–]|Ië>½#6ŽÓÎø3¹æåŽº»ùŒXöâeG½ƒµM×—å¢ÕaG™ÝÊlþ±]½‚ðÐÊ»OßÙ‚·o!Ú|õõIxS+éœåDízwýÀ=v­Ç­ûµ±:?29Ei«ú}åõþϽ,÷Ö÷XsÀÞÚ¹'×b{ •É´÷Ý×*9°¹ü>jÝàg¼žýe‘MêÓ©Wï±[»ýï$«Sžûó³vkÛXõ7ÿ#OzÇ*æÞš+å´ùµÕ2•­fÖȾ¨:ÆwÄÔ‰kjókÕ³¿Іӭuï·V ê­ñw›_ã¸]áûm´~n¸fûšÚÔzöêæ²}¯8¦ Ù®¹M­x9!Õ¶‘‡ã$NhSkù‡=/­4ÈË:ÙH}v\eškyàg ¦I[ÏÏÄŠPsþnmb½ún‘lÅ£ÚÛÄÊþ—ë¹ÛÄê—Ûµ—؇© aÙš_f}<uû¢VÎ]¶S±4°T7è™ùkFëóS•%ðj/™¢-ôŒS|1j¶‚¶Î‹A°L½ÇÕ!æù<¼6Øb룴í©É¶öŸÞšansjضŸÎ¥Í©x/%Åß—ïΩ5 ïÒæTŸ7kXêjSª÷°z§ù«šã–­ßÁÕgÌ5›úÖæÔSâœ÷³U“hìmNmºxBóáÓ©]ªØîæ«z{e+îÔo˜w]¼²ýÜû0õá½n}£?_´Õ¾UùäF_6ŸJÃðÞözÔ’¿m°¶BŒ²)YiŸOí¬ÚínÓ)§ƒõ®Â6VýEÛ¦^í7ߦmêÉH†ãæšu GkYAm‹Í©8ÝÚ+ýV±×;EɶUÂg´[ÓV|l—û¬¶ÞQâ—£ù¬å6çö‰TžÑ­û¬’ÛÙ• Ñ‘›ä0?˜W·«kr(lRÊ´y•CaŸº'‡e­³µ÷!ëCOyÜeõ÷t_zÉ€ø$¯»-$ò U–P{wYãœ{ø—mµÉp?zeßêDÚ*¶»»¬»òÛ´sYLÍeå€(k©½û¬éû]Ëú>cÞºòÌÝgåí~·ù•OýÙŽŠ›>¤2§ÕÆ‚6Ýgm Ù¶ZÏ·yæ¹Év»Ïj÷zì-ÞÞsUúñ ؽÏ:²}Mž¯bYHͷͯ>‚Ž«¯B%Í\l~Vh>«Ó=ñÐzjËÎæW4§¥+먥¹¬1ÐË:jj.ktʹöUê`ÛºE¶¶$‘M¦­¯±|à•uÔÒ}V,87m6Çúè*¶Ë|VïÍóêjàX"¯ç]-m’õ~Mý¯Ù›×Ü¿ 1 Šé:Ç‘WVRG÷[wÌ£×ÚOžÃ¿¬¤÷[}Ä*[÷[ùà.mR¾s¬ U÷9¶¬¤ÎÉæXœòì­Î»ú“Š$1Kw]9ÍH?³ØëÓSYH«O±ÖéW›b}¾¨¢_›b­)Ro6ÃúGúV(ñÔ¬-S¤knõZô^Í“5sïáÀ²µ“ÈvôB½é0”…Ôü#VÂî׃í®Äö§M®Öý b¸lnµO÷(p˜’ηM­Öu“ÿúšÂ÷Ø*™­O{ØÑ]º7!îNBˆ»“×Å=öµ»8Aq%–[û, Š{(Ìà[àÛCYílyîøv°¿•^Ñ9›ó[ÙÌU¿­ÂKëç·Ò]6uð[‰DQ9¿• t_;¶p~+Íêf,Íù­T©ŽIœßJ$;õ°yð[³ya¹GMeщ†ƒÜCÕè?¿ÂÕÎÍblÝî¡TË{Ge@¸Š£ZÛåE¸Ò {?áÞöpáªMD˜WºhÃPŽpµÙÐô¹pË4´ª½o - \E3ø9Â-·`¡ˆpk¼‚5Óî}ÇáV½~Ÿ€pï£ÃŠD¸UÝï÷Þ›Â6 nlÂ€àŠ€ÙŒ‚{Oþ¡%Á½•ZdëÄØ ®´‚àÖˆ¡XG¸úþá ð½³_"Üëz²Á‘Þ*„ðãCoí;¹ýDŠ´kƒÜ ‚Úìrû  i÷r{¾+æà6ž?À­ŽêK:‚[}ñmLÜ^'¶@n%Ž7díàV‡u± Áí=¹Fp{}~r^ϼVç5Š ^{Oîà’×–æY)òÚ<§Û<έ†§í9°UoÛ< `«PÛ¯µÕq~ëNm¥Ÿ^¬Lh« •>4hG8³Í#œÙJÛÜ7†Èl5Äœ–:³Õq¶rf›‡9¸ÍÃÜ^»Oš·×Õ™H‚[÷0bg¶>˜“ÙVW½óc@Ûö²Øæ!l¯Ów{ l¯½?»¶×§ÄöúøKïêÄ“©mï ¶Š]ê®­Ê\:v`«µ an¶å¸Ù¡§[M2ýsû[õ„-Çl9£Øö[«åœGV{í®I«å”GVkWrN½N›WrNËù„œ6zœ6z°6†`í¥ZCÖæõÖ^FùÖÆýÖ–Ý|;ÂÚòQ» Ö–ëM¶%å°Vµ™ €Öj5ÜÅ$¤µ×Ñ©bÒZ½}ûž´öÒò¬Íš/¬÷°V“©íaÖ*Œw™ÈvX{¯ØÃ¬åG°öž]kõ±t¦ì°VsÆ&@k³)Nkïù]Ô–C>¸Ñ -ýZÝRÿ(’ÐÞ ˆ0í½bï „6sB]BK§Ž„V aî³-ýNZùöŠ‚Ð–ÃN{@hïݾ$´ å¶9„Vø­SBË¥ ­¼žî€ÑêiÚ D[®d"Ú{Å6¨#Ú{s¥í;xgÕñW<€³*o°uÞëlVù*ìU›ʱ/Ø,WHd³z(æÎrQI8{ïý;ŸpV+9Cr€³Êб„³åü# 8{Ÿ®m œ-ǶQçp¶¶zv6+ôGl6ZâlVËiÿb€ÍrÍüÂÙCiúŒH;œ=” ªS ÀÙ—l³·ÃYÙü[îpö¨%;úø78«Ãv›+ÎÖHç.øœ­§4Ò鬀†cVdzíÛJWÇ9J5:¬tö¨Å{úçÕélžÒèlå>6ƒ:NitöPe3û 9U(¶éÎÖïþ";œ4ò9È଎2½­3êD0,$ô§O‡`´º{¯>"üÛ§‹¯)ô´ví]>ÐÚ¹Oô­¥*´–B-ÐÚå[BÚò­wˆH+xé×Y­\"“gÕ*‘ˆ©ãÈjohfÁj·¹%èPí PT[\`'G@µÒˆØ-ÕJajja ZURp›£ZíuªÍãÕæqŽjïþáNT›dØ­Ò6úí9°-ë8ûŽØÖYçî*ãl/ÿÖت‰>l¶v°Õ¼Ø°=ÝŸs`3€­Ò;Ln°•[Ýq[MíŠ ÀöîŒ(€­ÚÙ0€­¤ÈßTë_ã µ–£(H­‹¿ºDµ+AT{9¶T{Bé¨6>ÉDµ5mÊö]±-ýT¢Ze@²>&ª½|Y; Z—ü“×j'ràµúæõÕ`@Û£££„¶å3jbN2[v0[ºÚ„¶7†? í=ù>|@["o@[.‘ڞطúåÔ–Ô¶æ^ùÜ6n=ñm{ˆ¾Åòø–˼À·Ø°Hz‹Ûsz›No“W;½å¢2è-GÐÛ«ûN¾-8º»ôâÛ{ÐŽü–ü ü–<ø-¶TÈocMüÖ˜ñ-pvà[òVàÛ Ä·ëOߣ·TAoÉHoo—°Â%Âå¾@ \ uÜšîª?Í$¸¾$K‚ë«"\‚Mç¸Ƕ$Ç]»#2rÜo¾=°L#¾•B°/ |«ì7?ÐÛ*Á-àˆo9àÁpïâØ@¸ÀËŽp£é@¸Ü| ǽ};8îÀBŽà:8îâbVp\|¸‚ãný%þU—L™—w; Ç=¿ ø–”“ø–[FÀ·‹»Xo9‹ßž^Å!ð-ŸZâ[sÍ:¿ÕØïl‰ü–C•üßòÛáà·1 ß^´„à·}§ èöB©V¢ÛàÕNnu¥®þ½åf4!nÌR€¸±ëˆ{Ibß–ö`¸¥û\€áj·éG 7Æ=î5Škõ”„nó2†nc†º9è6Æ/ÐmlòÝÆ†Óƒn¹iLt@·ñÝrp€Ürp8¸½€[D—ò:¿mCðÛS¸ñaÀ`ÜKÞKoà·—’ÏöS‚ß^rû^øm™­>Yº‰nˇôví¬£[áG×Ü:º1t[&Ž´€nuNGņnã+vßU°Û˜u «v«1îdôÖÆ‡ÂÚØ„°6/åÂZÎÐÕ*›qu×ÕÆË ]m6Þuµ±Å amxëPÖ†« ememŸÚ!ª¥Š„¢ZÊA¨ªªÚp%¡ªUZRÛ½€ª–SDµ|6ÐÔÆÞ´kjuÏ}}OamìïCY˯•µt¡(­…[^•µôZ¾ HddI0|"[qX[Ô2KÂ0d+ÝzGNl•EÛ¢ÒnËZÃe’@·5á¶¥4pt«âw?B·Aù€nË‚î3“’ÚäÛN‡ï’ÛªRœ+WÛªT˜d`ÛÑÖ±-·>ˆmU ìѼB\»|€ØªF–Ńخ®Æqí &B‰íúðBŠk’‚Õ®^°³Úá«ÅþVˆkWH7“Õºnެ6šá¬vAD9X­jIµ÷¨vùÕÆªZ Tûv¯CÚØ›ªÍþpV«V@® VËþpV;g¬v¸ž±ÚÞzRÚÑ)íp¶NisL9¥Ni”V6geNióœNióœOÒÍvKÎgó q>kGšÍGèh6[lhv¸¡ÙïØÚŠ·¾Þ?ÐÓÖ­WÓÌK“Ùš@ÛÚfcW`6öœf E´f5½z"³Ãõ ÌV›Cb³ÃqFfZ0Ñ ±‘Ù 02«Ûc7œÌµìãîÔ¶‘Ùá8#³a™=*µ¤2[6×ɬZ®idVÇyÜ›“ÙÁfd¶~ľŸ á¨EžKfãÓ2;ܘ‘ÙájFf‡sŸÍãœÏæÝ9ŸŽ3>;ØŒÏæ=8ŸŽ3@;´ÅíwlWW.„Ííp½h³ËÐÊÙ¦.]ˆS©M›“Út|² çFj³Ôæ=;©ÍÇQí`ë¨vhŠ¡ÚÍNj븩®f¤v¸#µq9µùŠ;¨P)€Úá8µùÔÔæ-8¨lj‡¶±ÚbÄ6»Óˆíp9#¶ƒÍˆmö´Û¼œÛêjâyˆmÌç ¶y×Nlky ƒžl«‚ÀxÛ Æk³׿»ãÐ6·CÛãS?²3`£¶Ù£¶Ç§œrO€`Ô6OéÔ6úÄ¡m\Í¡meÌ6ü0Û<£¼4ºšoèv¸íŽnóŒFnsrur›Wsr›Ç9¹Õ}{¤'Szô†“ÛÁfäv°¹Í{vtÆÐíÐvC·Ã=ºntr;fä¶^ζOÜfW9¹­6Óæ;ºÍNqt›×st;gü6»ÅùíÐNã·ÃqÆoã!8¾Í.3|;\ÍðíÐ+†o³%”Þò8Ç·ß±]çwô¨%¤·ÇG·MÍ-˜ÔÜÆÉ\s&hnÃÍmØ ¹û‚æ6Žƒæ–6ŠnC_ ÑmÑmØ ºsBu6WÝN'õ@u«JǶ·œªÛ¥‡‡Qu{"n!T·0…êÖ#!¨ºÅwvPÝö–Pt+6Ø÷>¢[üô..ì¶V®“Û®ÞÍĶ~À¶©ßòì¶60»­ÊàØŠžÙm÷0˜ÝV‚`*!»m9‰‡Ó3½íå{š‘Þö´ýÈn{ »m„#»mÜ:²Ûr¯‘ÙmËŒe±"Ìn{Mв"»íy3Ãg·Õ<íùj-Ím199CšÛòÅ€¬ÖÓÜ Øõ<ÂÈr[Lþ€"Aèm½YnÏ… ú†}F–[ÙLë† å» ¥®g¹=n*|-E‚»Aqu˜g €â¶wô-J¦H(AúWÜ'R$è}XBp{@üÝ–6{2Nèn‹ Éj=C/Ùm¼ÝÛú#Ùm±Ùòˆt ž×e·êvÏÅk²Û˜7 »=6¦ÔuÙí±#7®«nËaÀœ®º•ïêée?ª[U´t‰¬«ne³ðy¨n5B èCv[l¶óLÙmõžÓØe·irÙíhë²ÛêÌ÷QÙ­Š›Ù`†ìV6KlÙ­^\ÛƒìVB-?:d·q=ÈnË“ò.Èn¯‰™l]vß Ène³od·Åm­ën5_uíu·Åær|×ݪf²&(„DÝmÚ\wKu·ÇäÛžMw{LLVàº[Ùœºîv¿!{€îv¿!îV_Ц»-&W˜Ax«Ã<í® oµÖ}´®¹U?ÓŽ@s+›ÉV¡¼By«K½òVËñ.Ðð¶˜\´áí~A¼ámœÒ…·Åd± Þ›‹t ¼Í¦¸ðv¿üË áí~3I‚ o5M»ðVM1m¸aƒWÔã#n­ÚÛòW Z ö¶ÿSWÝæ®ºÝO)¢ì¶V†´T»&»U‘d¯@v+›IÍ!»Ýiš}ªnËa¾AÕ­Zb*B¨nå­™®ªÛ8'T·l D·arÍm˜\r[LÅ É­ä .†uÉ­JŸ¸T×µ·yœko‹mýöv? í­šiRIhoã”ÐÞ Ú-cŽ[ûùQÜê~\¤ëŠÛbsi5ù튔€¸ƒ­+n‹›o)·år“ |]q«* ¶j†ì6m.»ÕåL@ Ý­ ­šÂ[­FL›á­Ž3á%„·yœ o‹ÍÒÈQy«ãLpåmÚ\y›6WÞ¢: ¸ÙLWàf3M[LÆ' À•ɸû@î¾öC€›¶7unÚ\€+›© !Àm]›­7 î¾yü%¸ÅæÉ$ ÁÍã\‚;Úº7^Hpû« õín5®lÉ ùmçòÛ<Î5¸ÅæX \÷-àíp2ߦÍÅ·1„]|O âÛrJÇ}ß¾íƒê6€ê6nöUÝFßAu›6WÝ›§Pê¶Ø€L]v+›'n0Ùmæ²Û´¹ì6Oé²Ûl¦ËnÕ[¦……ìVU¯ ýCv׃ö6símçÚÛÑÖµ·e‚uPí­jfY´·BJ&1‡øVç´m¨oEÅlãêÛ4¹ú6›éê[>ªoÓæêÛ2Åv ËM›ËrËã‰å ËåüÓd¹OEfêqóߺWÕß̇WUã̇W%2ÍOu=næ‚\Á%ó¹ ÈÍã\».p}!Èe×SËTGä¦Í¹åœ¾f… w]±‡ 7º‚ܼž rË9}UA@«áã !\«vZ>\Óã†Å帪8ÑçÊq׃Ùå¸qä¸å ½z Ã#ÇÜ3grÜõÀ"r\¬GTPŽ›r9nÚLŽ[L?B¸²ÙÞ0õ¸:Žx·ëqó8×ãÛ®lVZ—‚Ü8‚Üh ¹eÆê® õ¸2}â¦ÉÕ¸y1Wãf#]«è€ÚÕ¸ºqÔ¦ÆÍSº7Zéb\Þ6´¸ëFøêZÜhÈ«Åå§„Z\Ùœó:Ëe#Y.—C㪤’¥Nt5®ó,¶®Æ-6Kk 1n1ùFĸñjCŒ¯6Ô¸ñC«·Øâ䡯Íã\Ž›Ç¹7Þ~ró8ä–ã>ÛAÐâÆœ1n¼Àã¦ÍĸôF¨ÆÕ8¶]$¨qãÕ€7Î 5n¼7f¨qc|C›Ç¹—¯ĸñ:¹7^¨qãµ7s9n¼Rã¦Íå¸1È!Ç•ÍÓ ¸7^ÈqcÐAŽ;Ú.÷Z} CŽ[lž³rÜ6!Äí¿º7†©kpã“ nÚ\ƒ6hpc˜BƒCÿÕàÆè…7OçÜx[ Å1 )îhkRÜlŠKqór.ÅÍSº7s)îhëR\~Ô ÄËA‰§„7”¸qëPâæq®ÄÁ%n|Ú ÄÕ7Êꉹ·˜\ÀàJ\}¾L£%nÚ\‰ËSR‰KG“RÜ´-yªß–IáOµPÙV¦™£òØ£LÃGXô§½NM²~þô?þé_~ 1ßú½ïò½ï÷ò› õ¯»¤îîg»ãô%ï#ý¹Ñö—Õå[ÿàìò­Ÿ¼þÒëŸ5¡àÓ€÷/_Ù‚§Û?OþýË mÚ—<ƒOž?íõ?Ýþ4àùË÷Zð9çŸþFKÊ⻬‚Ê*`/ËÕo›æÞâ$oó\þVõþðälùç¹üg+ ÿö‡Ÿ¿ý—W<Øÿí¿~ûÃþ§ÿø‡_z¥m“{»”Þ®_~¥í7\é(_‰»\JßÜ_|¡#/ôSeŸŸÿÁ#æ“¿rª´»ÜúÐü²/ÃaæÕ®ùן¦áê姯ù*j)¯þùñk®¿ŽWÿŠ‰Ñ‡ùbîÙWŒ¹zígÌñê_3æêõë˜ãÕ¿bÌÕIås¸ú¹zýu¼ú¯ú–•IY//—›{ùnÚ‘¿×]m¾Ó·ýóÙøÕ_§¸Ì,¥þ}­û¾_ã2ûï»Lùj«ý,{.ãeî÷2óï¼õܯŸæµ,pÊâöo]ç÷ÞO•Äîûz\eáùãë,y?ç‚¥ëYÆV™ÄÖµj2ŽE;Åy ýW½ÂûÓÿùç?þÛ_þø×¿þõ?ÿë_®ÿù¯¿Ëøö‹¢+7É><3^«F6|ðN—,l’}´mŽyvùÄVOÓB, Ú$ønÁ•“ïžÉt5:ƒ¤¶Ý*&U±VX›‡bìûì©Þ6 Ÿzpåâ;V›â]šH½tœ Çd»šH}½úþƒl‹GWn©²©LS©—súýÛOK“©¯—gÙÚÛÕtêâ–~œ19ÙzŠÁí8,¸²˜LäuXÀ¾ÎŽD¶ã²ØÊò=q  íÇ«R_QºF¦[¹\¾¥¸IúÑTêÅfê½MÒ¦R_nßÂÜ$ýhRu߯’mýW®ó3xÎÕÂ*%¾ê n;7 «\.ßÓÜÎÝÂ*—˵'ÛyXX¥öú€wSlé«Tgíz™zXe6Åβ¯“ñéM±`M¨.AN=@/B“¨/»gHÛ®Ùâ)G[¨”š¨£éíZ, ²Ü—É>·kµ€J•Ö±|mP¹ž˜l»,AlME#›JÊöÉѸÕD©¯’rÙ=SÛ&ÍG“§kçÓ†aÍ3ú*)ó¶ïL)ÍVß6ØîÉ‚)—ÍsNËÖƒ)UÞÞ?Õknút ¬:¬ßîÅ¢)—ÃÓ¿½¼@Â<ƒy*EÑôéËæÑ”²õhʺÜ禢7MŸ¾躩¦IÓ§ëœ}§s«õëžIu9,J´ü ¦L=–rÙ=HtSY¸¦N—­G'•#,–rÙ}º-}f±”¼¹]Ú&Q_Nþ•­ÇR–fZ–¹]Ú&Q}ïhh—ö£IÔþß»´M¢ÎǶ+@“¨¿¯¡~íQ”s»R84ºOß%Ù§Ë¢(ùhöZjjcÕR} ±ô(ÊÖUhkêt)7Ûä±—lŸ”  K_Yø¤†NWîóú ŸŒN˜MG(›• •­‡OÊÖ¡ù®|µï_Õn‹<ÞçÎÐslÌVífh½ÆL°KëÑ4êÑóÒzÌ9îRy4uú‚2jû2[ôd´bYú†CÞ´„sï‚ ™k^jy¸æ¥ÖXÈW0(›¹×Ÿ°Ìw^e‡ì–ä=g;Iï{­°ÝO©Žìš‡LïnªßžçÈ^.˦ÃUØ–žï={Y™#®6»ò­:¶ôî^M—q·é•÷wX…‡ÁvX<¥¤ý]&"ÆÔã)ù¦JîÑÖÍ3aÔ´+óÝÇ,NyweTŽueyy¦Æ|²çÜ÷us =ó[¨Ìs~¶wây+ã;¬Þǧ ›\Λ>{ ö`:»Â&ºø¼ºÄY_V“Ä× AÝcå=+‘Æmó«·²Øz<åz¢)e Õã)ãë&ÝÇÔ&XŽ Ë®±`¨5Bæ½XŸ¢.‹:ýŽ­UrñR«u5l–ghËe±• ÁÖš<›M³¸¿»¯ ë#jÂZ*pßÛˆõžV…µcê#Ö{Z•»÷Ї®÷ÝyåÍ©^æ9aYªóMT¥ïî»ÆÙ̫ˮ*«¨©ù®«W5‘éî¾ëúcÔæ“M¯ÞI·EVÂT aw§¤Jž³»®ö’ÊÖ+ÑHÁ_o¹G Ô¬êëöÝ‘Z“’oÓwGjÍòºÙÜŠsîWÉ.®µ"šëºÍN‚”4®ÇUJúÚ‰ŠÊRô¸J>R•³˜ïO®²µ”=i«%2úä “eЉ9™ÔŸ=]ÖR=¬2O¹Å£þma•o=“¿MIh{t.8l‘ήé#³ej0[ î·âà¶<%N¸UñgÇnkÑèUpª-€Ûcù°\ãµå'¯V^[ƒœ~ÀkËóû˜VõWllÓj$8S5Je/@ik¥áŽ)€i£9-‹“ÓJYÑ£8ÁiY‹˜œ–)þÈiUW´ß0­€X2$¦½V—ê6LËW„µÙ ‡µRyú¥Ö^+î °VÁkÝ¿¬Uâ¿þ!'¬eÙ8ÂZ$­•¶ÇÂÙ–haàd¶ òëÄÈV’]ÛM²UÜàƒPÖ2K"iíyâÖª¦™½/ µ _l«sÛsÁ ±-‡e"±Uë{¸,ˆ­ôÆ=´‹Ä¶:†}sÄV‘==)€­DÇýCI`«Söî l˯N›lÏݽNÛóvnI`{^DÃló¦ ØÆð±mÙ–SúÎíy½:²Õcë\™ÈVïeëc[.[¹ú¶b±Õ;Ô_7Û2à-›‰­$ã¶B±eNr[æ%·U;Í:·=Wßt·=8¹à¶êi2ÝNoÕ̾§H|{ðKo9œ¾=W¯ŽH|{îðWoµH37øVötßò5 ¾åüJ|«ÏÁÞ™µÓ[-%{EÒ[e÷ü d¸ZÐöúd¸yœ3ܸ0Ü<§CÜss?…Wa=?!î¹x°?!î¹{ÖIB\Ùú—“7ž n¶Ó!®&õ¶Ù†½†gÄ•­BHqË[Ňâ2.)nžÌ)nžÌ).¿Â ¸š¼z,)nžÒ)n9Î*Ù’âæ9â–sZ bܸ=`\N² ¸1±âž»×÷%ÅM›SÜ” ¸Ñ› ¸1Àr•”¡9ŽD¹È«L’¯H.g ¹ì.pÜsïÄ49nÌ–à¸ñt^Ž{îžP–W©&zZ*r\EÂCǽgl19Ç×7>à¸ñþ‚ã–sZÅqrܘ´Áqãœà¸çæåXÉq™a›WŸ¸¾®"ÇÕ½÷¤mä¸yNç¸JÏa<Ö1n°ËµÀn™eN$»èÙ »‹;!Av @vu½>{ì·{ AvQG)È.AÈn™aÝ%Ù=.¸Ä »³¿¸Av[/ê‡QB]ÍåU€éjéø=¦;{~2]•êûãdºåJ–ôœL·\Ê2’ébh“é²Ò™îqb…¨«ºQFuuÛÓÞê;Ç€ºÇá1 „ºìd0]}/;ÓÕå\ÏëLWßtÓ“é ôi˜H÷Ø~¬ËžÒUOÛžn¼ÄÎtYä‰L—¥¨ÈtÃ÷ÿÉtÕÑÎ>+ÓÕ…l{L÷Ø!;Óe¹/2]¡šÃÎiL÷XADÀtå©™¨Ø‘îqx ‘.Š„‘è+6z@tã‚è;@. éÊÉ?aëHW.êsQŒ,—EÓÈrå'Û‹–] ˜{lT;ÍÍ›vš×sœ[ÝòÖýÀ¹q5à\Q@[™ç–ôÎp®ÒˆØw87Þà\V˜#Îeí0àܼœãÜcCØp®Ò>­ÎU?~FHî±bó$÷ب¬u’ ’«eVçö¹1Lrórr•ŒÍtç¹,´F’›·ç$W£Õö[Ar£—Ar~ms'H. »äÆCÉ1’Ël$¹jŠùFÉUFq׺:Éå;Ë"q¹q˜Üxj ¹ñÔ@r£Ar•ÂS'¹¬H’{l@Ñ ¹ÊhŸ \^ ¹Ù'¹1‚@rËõ|«$·\Ï7›@rY¦$—O W£ÄDþ¹ñÒä²Ü#hnxU@ºÑJ Ýè Ýøô½H7<* ]½Â¶9¤¯)n^Ê.#ÂÉtãà ¦{löÓåX'Óåg€L—‡L—UAÉtå{\RéÖi®½“dº|ªdºú÷%™.[2]ºôdºtäÈtù !Ó¥K@¦K÷L—)™. ¢F6>!0]ú›ŸŒ"˜ô~+ÔÔË¿ÿD·–ÖxÞ¾!­ÂíŸ ]æ'Ñ-‹4«\ ¢;ÒèjeêÙ‰®/h“èN—ï ‘è>õç‰r5ø öåjð4Ê-6Ç3@¹©»u”;-R¯;ƒåN. –6°Ü‰*d°Ü zQ°\2Y²ÜJA²Ü φ‚ݰ9˽ïîè9Ë~6–{9l ˜{Q%ë0Wh«ïçæÞ§çŽÌÍS:ÍÍS:ͽOÏçIš«b&žͽOß"&Í-¶¾wH˜ÍÌ }6`î}Qèë0·<Ëè–0÷¾Á<sc 9ÌÕ°4J˜ûIî«3Y*Ü> ¹²Ù~’“Ü|iœä-É­ß^Û¸r’û[¯v2MÀÐrÓd·¶Ï¢ŒãÍ3Ž+›ù‡à¸²™¯ šÇéÆ ®l¾Æv¤+›{zŽtcŽÒ•ÓáÞ£#]9Îä鯠Ò=<³m ]Ù\äHw8§!ÝzNsœ é~Æ0HnþkǹÙ:ÿúÕ1b«ç¹Ùº‡çÖ²å˜óÜáã¹ñ‘Ïlçrç7f87»Ýq®l.tsœ›é87ïÜq®lÆœææÍ9ÎNi8W6_©9ÎÍ[wž;g<7Ÿ·óÜ¡Æs‡ëÏÍîtž›o·ñÜúÃwž[_ür”«Ÿ] æ(ש9~5‘8I6Šs(nÇŒ´9Å®ß1ëp˜QÜÁf7§FǸ²9´qŽ›6ç¸ÙLç¸ÃqÆqc ÇÍ.sŽ;Ølù4œÓpnNïŽs‡ã ç÷g87ûÚqîpœáÜl‹áܨÎsmÔ9ÊÕÈñž£Üü8=(W?{̼£Üˆ§ÊÕªÖvŽreó-JC¹:%è°¡ÜXšåª™®ût”[mžÕÀæ…<§óܼu繃Íx®lÈx`@w8΀nރݡt‡û3 ;\Ï€îÐNºÃqt‡vÐÍã èæåèÆj@W6'˜tcEúf1ϳÐZaD·^é)†sv¤;fHw°ÒnÌnçHW6ÇšFts˜8ÑÍWˉnµÙÞ¢Ýá8#ºƒÍˆîÐ#ºƒÍˆn!'º9Ôé÷`Hw¸ž!ÝΆtó0GºÙGºƒÍîpÎÒ­LÆñ«!ÝœgéËÒ• ¸·#Ýá0Cº²y"-Gº²y’$GºaÒ­Ë›N €te3!:®là¡ö¾6CºZ2Ò•muy¯!ÝÊpûÊHWœ Œéê“fåúÀtõ+€éVvç²`cº²™LW6Û¸w¦;˜ŒéÖSšB™ù¥Ùgºõk~ ÞêOV§ò·å[˜M¨ûkòä>e›ëSJ « ¨&µû5yr‘«Á³åîÀYÈ–{^Pe"[nùú™XŸÙrŽ7fËÝoÀ'dËeÉwfË•¯×ñ’凸ԓåî+"«‘,7“úz²Ü}BO#Ynq~s#[®œmk¦gaØVÈ@‘…a›™ÙÁ¨.sê–å‰Ë+Au#½0¨n±9Õ]Oä­hÙrw_áFÎÜÕƒ^Iu×Õ¥~¤ºªê2\§º‘?Ø©îÊÌ#™3÷üN¶Üƒ•éro,‰¤¹7R;3iîí³b$Íe¢àLšëƒ<“æºR@·¬ã§¾’Ф¹·ÏW‘4—•™9÷òé42ç^>eFæÜ ›GÈœ{bsO¦9@æÜÓUÞLœ‡Eâ\LxçF:]$Îe6h&ÎEŽØHœ»3ãç¾)—‘29©ÎÍ“!i.²Q£›Éo#iî“pö§ËeÎ8¦Ëã\¢»Ý+A¢»]d¯H—ËÄkL—‹TP‘4—µ˜4w%“FÒÜ–R ùrW`æË]Èf‘5wEÖ\š4w"©%Í]ÈJ‘4w!cEÒÜ…ÐIsÃæIs‘GÒܹ¯áé–ã¾ÉÕ©~Dru*OMÁŒ¹3‚Й1wîO;P®l¶«sgïfÆÜ‰I1—‡!aîÄü H˜;AµÎ„¹20a~&Ì › s5Paš07MH˜;½ s&Ë: +›Å¼ÕÂŽ‹šÜ´¹&WS&à&w¾½t!4¹óíå©ÉÍÃ\”[l»éu]“;˜º&7®MnÚ\“;߈j€&7Z M®Žc"Ý®Éõ×+X®ÞFóu(Í/ÏmFiî|yô¥¹²Ø€47ÏéÒÜ<§Ksã8—æÊäøÒµ¹ÅæñÖÐæÎ¢  Í// Ú´¹š»<-®ks%jqØëÚ\9h·]›{-VžÒÜ<£KsÕϦëÒÜùäÕ\š;Ÿˆb>·Ø¬%çÆ)!ÏODK@ž›6×çΧ¯W¨ÏÕĸ¡ÏM›ësÓæúÜùD€ ô¹is}n7M®Ï¦@Ÿ›Ç¹>wF¼k+ˆG¸>w>Ýã§@7sn^ɺÙxWèÎâN Ðͦ¸BW—ûxYçÆé Îo¯éNq.'mhsË)‘½Àµ¹¦ÙÞ#´¹:ÎüUhsçÃ3ØQ››çtmn¶Åµ¹:Îö€ ÎÍã\œ›ítqnØ\œ;ºmn˜\š&WæÊd^ð«ÌOäUƒ2W“Å”¹y:Wæ¦Í”¹ó`4(só0Wæê8Ûƒ2w>ˆ2]™Ë‘@e®|iKe.:’ÂÜ<Ì…¹z?ú2„Â\¾Áæf3]˜;‹ ¼ô‰Â\ùÔßP˜K÷›Âܼœ sG[ærÆ€0—s…¹å”UIa.g s9þŽçûe-øm÷wf[˜ÚÌ;$[X —ÉÝ0ð)ÍÝ& U—æ²9¥¹"t¹˜>wa½4$[˜‘ˆÉf&[˜\ñyto°(t—Ûwu¨ÐU­)t²¶é»(W6&bè]°²’Lè HY~ t—ÓU 覭£\™5Èró”.Ë­]Úy3d¹qd¹Ñå›G…B–›Ç¹,·Ø<ï$d¹ë„\害¸,W6K^Yn±!;ƒËró8—å¦Íe¹q.Ëež4Êr‹Í“c@›«jµ–+ÚÜ<ç£ÍÍÓ¹6—+js×ÙÅQÔæê]´] ׿ÆÃ67:ÚÜì,׿Æ9¡ÍsB›Ú\w޲ÜèDÈre³„?åª'mK ²\õäô}Ž;Ø\–›çtYn<9ÈrG[—å2ye¹asYnš\–«¥­eP,W*’žÌªÜ<ì£ÊÍ_]•Ë54U¹ñb@•›Ç™*Wí³-[¨rÓæª\}F,Ø ªÜb³b#TåÆ9!ËÍã\–C²ÜBåæq.Ëg Yn !Èrcb€,7d¹y=—åÆ#‚,·Ø,ûT¹q˜«rÕ›–9 ªÜxzPå® …‹öȶòAŸ¯»|7”Äñ¼Ëw±´mønLßöÏgã×y™Y¤ì.Ÿß}¿þÆeößw™òÔ2ó,{.ãeî÷2óï¼õܯŸæõ(ÿ{ë:¿÷~Ä:Ëjn=®iþ×Yò~þÎ56¡=®mZÏoëºk‹¶‹óú¯z…ÿö§ÿþóÏü·¿üñ¯ýëþ׿üëÿÜ~—ñíEX5M ÉrÚ6R~rÚÒ·C©>Ú–Çzy ÄQYß ÷­‹ã¸{qÅcÙ;oýØZê‡Ø^ûqNž1oõÔ0Ç9{Ƽ*)/ø+náµØUè±g"AT¾l-Ôt°Yµ«( }¨XeS¯3yÙQK̽âÊûp…ô¡òM½~o³&[°,>¨IÓë:Îù€Š;ôB×oª¢US¯ßÇ'2ðP}¬¦[/W2M£l÷«[Wk{ÜÒ€4ÝzéC¾VO™··sÈ´yƼ i'{[3æ­ ,ågT?A+Ϙ7ùÞÝq]mȪ)Çu5a ,¶p\ǘ¥•Tú¨ÉÖK+e©¼QO˜·"œûž½üÉí²žã^›nýº]ƒqܧ§Ìc-'éAzʼ¸ž•f«÷nù¶x]=ƒÜçdÅÕÖAö9Ù%*î»X§òÞ¼ºõb²'™ÚpÓ)mCíœV‹°äí“å¶­—ëØ/@@%«SÙšl=OvX€ežììÙlK윺G§4)Ké°®éÑ}L/¯hm1Ôeëa–ºƒ©}Ζ‹(»q¶”ÐÃ)W‹µäûxJ Ҵ뺻óý¹éÎô³íž’4Ùºb²²UÒß&ÕÙ÷‘O…Ð4Åziݲ?¿öØJV=gË!šý°L=î=À2÷´‘ù{øJÕãj‹UKVûLÄr*X´‰ÕÕëý#s.›EX¦mÿDXòù*"¸‰Õ£Ó%¾kbõ¼g˪Zb‘³Š“ú¤j»K§$ M¬^Ni›|§ôM¬În\­pLÜ€6šT=³v'f›RmÚëm–¬&[´ Û6Y eÚf ´ŒnÙ–hÉƹ٦F6s³¯¸l^F”E[òÎ?@4¯sX°eôþfµ\ê´ë¸dëÁ–q«ÇZÆ‘ü£ÅZj÷Ä åOk-ÙM£1gƒ>J(že¡Ð£-ãíØW‹¶Œ‡¦ü -Ú2>3»Õ»È1¹[$~ø`çn ­sßO‹¶Œ'º_mý¹›¶o8çmÑ–qeaÒ£-ãzªÈнV>uIAš×ÊG$%Èl3¬?")Aº×ú~¤q‡Õü³S™Ø—Áñf›Ný/ÕGwP×0œ*ØÔÕ÷ÑOU?\m:õmXÉþUùÆîŸ®?}šþÙ×)µ]üS¥p»gJOA%tÝ3uïó²àãXíÉÖ*ïÍ‚¿ÎËʱå5ì®L¤#Ý1ÝËfyñP®öš7Ï‹w=/@q†=%Rþ]5òÕà0Wò¥ØJO‰gsºl]ƒ£èFoÆå)ñFðRÌrO‰w¸ÐéRÞ¦ÁQcï¯ÅR5WLŸe³”x§’^–oæ­q-«‰o”Fàüüì› ÷ïÊÒòbz¼Ë™[Ý™o—[å[ÕäïN¡µPÉaCt}Ò›ÎJ%Ûàì~{T,à쎄[„³J×CègwFÓÎGûêÎáì†Xa°Ù²|ðŒ`³Zv´¥ ЬÊa[þ> YkK4[nųrÍJ‚åaËŽfC„4›ê:G³Šp³€W Ùˆú Õq6Q8 ­d½«6hÑËÉé€VÇÍûw­F¼ƒîÐêl–Œ€ö±Îr@«Ó¹ÄÏ­Šì˜nÖ ­Nis8­ÊŠÛ‡ „V—…5B«W߸­f¥åÃ'ÍêJà¡Æfu6¯‘älVóØäœØØlÿœ¿›Õfˆ΂ÍÊfѱ`³²Y´)ØìpÆf¥V´ Q`³:Θ5جîÏF$ØlÜ»³Ùï˜ÚD·`a¹`³Ò[ÞŽ{ÍÖý£ÕlݼxÀYÙzo°Yõ‡¥¾›Í(g³jŸ½`³å°od#M)¬lVëLV6›çÀduS¦å“•3a†ÀduÜäèØÀ¬nk³Õ¥ƒÙáŒÎê¸ËyªÑYgɾ@gÕNËTF«ã¬T­Ž³Q`´áHÒÊf ŠÒæ-8¤­^‚½i«2ØVži¹ÙÑiõógáé|V¿Ú· |¶>[99Ÿ­¹÷íu0>›Óùlv½óYÝoϹ>[MmŸ|Vg´ [à³á ƒÏÇm^ÒUÒÀgó©8Ÿ |¶vtPŸ"´ù4Ðvl6Ö,`³‘Dl6›hlV‡ÙÚÛѬai´€fëèß  ÙAÁ0o°ÙðšÁf_ÏP6£QÙá:·×39=Õ ¨ì`›½žÉåûÛ ²jŸ£§²õ)ö.¨lô»CÙÜeóöÊêr–«P6ÖS€²9rÊf3o¯f‚”ß`²ºœmƃÉf3Éfo“ ¹˜l¾fÎd³™Nf‡ãö§¤ Ç£“ÙáÎœÌÆ§ d¶F|ôg|V—³üqà³is>›ˆóÙì-ç³Ãq‹—4¹<·ølÎh‡vî^ÒäöÜr´ùŠ8 Í¯«Úáz——4‰vÞ^Òß'µÕ °ý'µùmpR«'kÙ,Ô¦ßä¤6?˜Nju¹¶ T«ËR|‚!ÂIq`[}KCõl‡{6`®½[™Vç¼Flu9ØæHpb[ýȾ±­¾¢éÛÖþè;¼NmÓÝuj+gÜ’Å€ÚVW¸/Ám‡¦Ø ’>°­C6§ÛŠÂuÕ°m ùê¨ØVmYúr”ܶœÓ¥Û”3n›ü Üö]á‚Øj½lS6‰­’6HЈ­Ò“÷sb[™[Y$¶Ê±l˜ÄÖ—$$¶j„á)[5Å6óHl•£Ù05ˆ­lý‰‘ØÎ»kIl•T¤+Hl‹«ïOÄV©Š»çMlËBÃÄ¶Ë Z l«dÛ*qy+&¶]f÷†ˆmµ²`[B(b[]ÏúØV@Æ‘®a[&è"½Uþ,@¸HuD‚ËÌc•à2»Ço-NýëràÊ]ûãH”»ßr‚ç÷е๫wîÆ”„º›çÏ%Ï-޾ë9És7Ôü&ÏÝ ¥$Ð]|O"€.C’è2™Dwó¯kݥϯÑ=|ÕL¢ËL¢ËœUAtwdÑe^]¥´6‚ ¢»y†õ ºž“èvªE¢«%µí ¼Dwž¼ô‰.‰[ݽûîIt¹xÑ¥ÇD¢ËEE]|Ht§Ù¡!‰nùÌ}öˆt§Ý÷‚é¤Ü6tÜ@º¬kL¤Ë _"]å#sI-ä¶74ó@ºÜ9$ÒåV‘®*x¢ÜvóxôÛBßN¹­—ô&ÑemËPÛRõAµ-OùˆmQœ@WåMQAµ-dô¡¶=àßCm{±èrO=Ô¶+êTÛîXœQn{Àµ¦Üö‚Ï ¹­Ò{˜ ‡rÛÕ·¢Bsë’ÜîQw{Ý>†î–º!ênO.êÛ4êÛëöìq¡¾¥ 9å·®Ìuõ-‚¨¾-ó)X1Ô·¯6 º[%´•^ÓÝ¢w¡»½Nìœ@wÁÔݺ ºÛÕ«cQw{}«Ëmß`ÃzÛš ³/¹Rpër'n¯»Ü^'^n³-!¸ua ·×áË* n¯Û¼Cêm†‚[Êì ¸· ‚[uÙ î^ä†YCpë5Ü*×KS]p{ØØ‚àV4ÁØÜ^§•çy·å:Æ" ·-§r½íuØÞ1å¶×…M<×Û^+pô¶×M(èm㦠·Uÿ:j…ÞvŦô¶yN×Ûª-6UAoËêmåq¹¦Öõ¶×îë1êm¯³;ô¶y=×ÛêþŒý@oË‚†ÔÛÆõ ·ÕäätÔõ¶Åõ=×Ûj¼º„×õ¶Æ‚íª) ·×Š­¯Wp{maÜnø\;Ö­™¬lßÖ±®.e ^Èn¯ º@ÈnÃÙmô/d·¥Cv«+}Ú‚êVwnÛËPÝÆ£†ìöZÁ(!»ÍsºìöZŸIncCp[0¾¯Á­°©ójWÝ›­L(½)ÒÛò—àÕ5·×†½W×ÜÆ—š[%¡¶hhn[O@n›ú¨nó<®º-6÷^¡ºeáªnãÍrÕmÞ’ËnãóÙí5Ãχì6æhÈn¯  hoc΄ö6æ)ho•¸Î6õ¡½÷â[õ­y<߯ÀvЛӄSÜáþ\|«¥±3Hß–¡h‚#P\Ý»-0@q£_ ¾mó"®ÊåLØ…·¼k`Üèå—ãÆGÊ[MÁÝïÇ­h0Ó9n|¡¼ÕDÕW|TÞŠ˜˜×9nžÒ9®là£Æq5F,R7¦pÜ:îúZ7Æ$8nuÌœÇÇ 8nwÝ­Ç ¿WŸ5‚ãÖ”VãfŸ9Ç Ç7ïÝ8nÞžsܼ=äÚà+;\nèÅß r§rçòïÅÝ-¥õ2eÐHq· qÚ¿4e‚ƒ!e¶!"eÂ…XyP\-óûôNŠëc9)®h„É4AqÓæ—;¹¤¸¯ê‰ü6“:8¿åî#ùmÔ/¿mb¢Ûà€@·Ô1ÝN;äÒŽn§Ý–ý$·Q¯ä¶ØŽhqËoñ@ÜNѲ“Ûb³1p«ýCàvC?Àíæ¬.Àí‚!p›tàvõÔÿnXôv}eºÎmõ™ì¾¥¸Ób8 °-`Û"Ê%°íâTŠJÜH!BlËÎ"¶]˜!Øvq/•¸¸= qó0âÆQ ¶¸9êp¬Cî´…=:\úÀ¶ `"„¸ÊJkPBÜ<΄¸ºÛ²ñP䪆"wZ°õEnܹ|¢¡È ÈU[–!KB=ÄÐĸÑ#ãæm»W6'ÒãÎÝid[ó ×¢wö|>!ÆåmSŒËGçbܸuŠq'+|D-®ZitˆZÜÉ u-n4Š\lòP‘›ÇA‘ËíWäN“C9 r'¯Ú‚\v#¹|l”äò¦)ÉE ZHrÃInØ ÉëA’÷I.{Œ’ÜÉÝÞä2¶’’ÜÉ£•B—Ëã¨ËåýQ—;y¥+êr± ÂÜÁfÂ\neQ˜Ë[„¹ñ#„¹q_Hš€`ÇÐ粯 Ï¶CŸË¶SŸ;}ûsÊrãŸ;¼-­»\ïŠd l9“%Ü›ý9u¹Ø4 ].o‹º\ö0“%D;‘,!Ú‰d ÑNs£_ Ì¥Â\¶“ÉØsã”æ²[(Ìl–2!.÷ sù^S˜Ë÷ ÂÜeG/C˜[l¾óa.é)ÌeS(ÌŬL].[I].Çu¹qt¹ø‚Q–­„,—“+e¹|ñ)Ë d¹ÑLÈrÙ+PåÂDUnšL”Ë/D¹qãåÆaåÆaåÆq.ʾ|U¹q6WåÆ}A”ËaQîh»Íuõ½ˆrÃFQns¡Ç zÜp®¡Ç W‚\¹øFª!ÈåZŠ‚\.)ÈÅr•z\™LŒ, ’â0‹BØ\Ž;mþ†·¬?L;i4 ‡×›€7MH£6ågD…Í–GñbTåRÇNU.åå‘GquPåF~ªr©yÎ< 6]S•K”ªÜÿ¸·éµG¯5çõ+bh*[E} Ñ¾  t3è‰í2àÌ‚QðÿGsio‰ëYŒ“é<}˱ߠHIõêáâ"4Ý¡Ê]üõ@U®o«‘ªÜ¹ºŒ”¢\ʯ!Ê͘‹r[Ì6V /¬Ã¥(÷ÑfCŽÛþ‰©fÂERdº(P2M9îáJNÈq33`ÅãN¤ÍÐãN¾8é¥Çœñ}Ÿ£ÂlzÜ_c{«äíN$†[áí †K7.…ÊqðqÉé¾Ùöv^¦ÙíÍÕ·G*sM}{ºÕ·ýåúgQ.ñ;Q.?ëå‚`åò˜D¹üÚM”ûþ$Å%GOŠëyÀ¸›‰qÙúã…ãF‘À¸þa”ס0nðip\~‡‘ãËüfŽËf~;Ç}qTpÜçž%¦Âå!ÂåU®Ýl"Ü8&n3.P,.O›Ó|D¸l .›B„Ë~žDö…py«‰py«é­À{C„w„Ë™×A.gÈrŸÞCw…@®pWàYÓ]MLŒ ×P).¹Ýsh®-¹BÄââBâ>‹ü–“q4VàÈ~ËVß²›’ßrvŒÆ œº|ø-'”Éo9Òò–~´¼¥9øm´ü–Ï-ù-¯.{.o.Û’”;‘ÁvÁuPH‚å@p9^$Áõñ‚—åÈpŸþüöË€my•€mÉÛúÌ*±-ob[æ'7¶å-&¶eÇ&·‚Ûœ%·Å9ƒÛ²ºoä¶>]OnlÜ–7¹­Q©Ûú5$· ?ZpÛ85€[¾In¡ r Ó ·á: r‹sr‹ó pï/Ž·x× þ·åî%žq p‹+ÖÁ-Fˆ·áÐ \t×·¸á·à n9Üò¾….Ïš¸zûÝú[e0SØî%ßôQ˜å8¼mɹ¯Ã¼mß–ÂÞ¶ï lêåô6½ œÞÊåõ#_Üö%j(ðV7¬ÛªÞ¶í»µA€ÛúŽÙ,ÞŠÌ÷!Žð–¦0”Ꮁo9ñ@x{ »®Åm„©NoçݧóÂFaÿ€ÞjBŵ§ ·Å:·î`;{Û( ?›Býá†ÑLã®P;Ä Xã1Ø(ØK2ŒqÃZƸaI {ÜÕ“þðRÛØã®n]–ûÒ·xB2Øã¢¾°ÇE}°Ç-ôn€=nñÌ>ìq‹k¦;®ü¥;®}k†9®gÁ4Ç#Â7|iaŽËÚÞÞ¸ßôƃÁ7baŽëß@0Ç-@3aŽ‹CÒ%wˆ™K.ý‡é’Ëúè’‹é`jr3—ÜB‚ —܈Á%O¸ä®ÔÃ%·6‚ær+‚pÉ}²C\ÎŽâ¾þ9ØmûçNÐá‰;„:ÀFÑ7Ê9À ¯¶à†›1nθq²p³œ;ãFup#€›1¸cìp5ƒM¨à†s5n‹ùºGÜ,çWŽ×}öž7Ë9Å¥’ƒ7cNq3æ7ÏÝ)n”Å͘SÜxtâfÈ)n4—&းÑÉ^7þ½3Ü9 ÷m ÜŒÂ808ÁR ¸áÑ‚›å㆕$0n–sŒ· 7ÛâwŒuŽË‹ Œæíà¸QÌ1nsŒ0. ”Ëû’!¹Q@n\€Ü,ç 7copŽ›59Ç­Àqë”æ¹Ç ×p\V’ãΣþv©mÔ6ÞJ ¶ñŠµÍ˜SÛP@m#µ”Ô6ësj›Çtj1PÛ¨è6ËÁ·|(½Ì 7²50Ü1vz†Šê^ 7k‚!nÄœáf †¸Ñ 7Ä q™-ƒáF9âF9âòúÓ7œmáˆ[dÃm#nÄàˆí„#î;mõ,mL]KËQ޳½³æ60µýÃoƒ–?ÎíÿÖÖè/?þüåŸÿ®½¢ÿþË¿|ùñŸþð?~c=ÛÔrïµî­Šv²ß\Óú5¹˜·ì´´Îo¯iËš>alìòïÜ]žAø3ÇF?ÏÃxÚg½ 4ï7Ö~ýø9õOÓPû%ÑùŒ÷à¶çþúñsê/cíßß|wGß *}F¯{ÕþîvQÿçô»W ®ŽõFÏ{-ï®Çú?©ï½ZP¾R¿õ¾ox+ÖUkkÞõ^Ü$íiïHµnx‡L_êëò«_UQͼMíUu”Zëñ ÕÔßVM{!¶ªyo—w_Æjλšù7žNÔËÖþwì¿TÏo=Ÿ*æ\kÙŽiþ…z–<Ÿ¿Q‡Hð¾-‹¾XKÑÞÊíoíËðÌ:ôŸ«†ýó¿ýüóŸþû/úë_ÿú§Ÿÿë/Ç®¿)¥øòMë)ݶøÑ%Ï—ÏzÂùO‚‡2k£g‹Ê ÐsØÆŸ¡»Q¬OOS[s‹MŶ0²·|`/÷ð8[>®œæ9wÈIó!ùÜŒC1›>!C“îã™»ÙØÌÝø*$Çq8ûÀlñAÙw}’ì£3®Ù—!nª›•’}ø^_n‘)ÙGG¾g×qWZøöf-ö-›ï+­˜ Î÷À=ÎêªP.†×j³¾Àf|‡Di¨wœ‡¯!©¾i€b¦ñ.îHíÑ•˜ð¤;%÷èz~xÙn€’ÊSbϾ~"SÅL\ Uñ)·\2š{ÊÕ¯Ë鱬òœªoŠpúŒô9m¾r2MźÀêìsÚMsˆÅŸ§¶Ù{L¤³¶4·êàõÔ¶+î9cùÔ¾¿·ï¹ûrzßÎ3×ô{_îèËû΋†?Ë<±Lï»gbbãsvSÝ,W» n(·ùJÃÝåzoÀÝFNm Ö”š†A±Ó|lë»`Z±Gzö¬j=¯åù¶Nº?ºç2»wÒîÆ°gúºGu;#3ï=¥9|Lc¹²Y1[­{¸õð©m–ú޼™Ku¿¢§ñ¶—U¬à<õá}ï.À¥Æí•fkd7ÉêûHh0éÊåsq“w­4îcÒY&³bÖ)]~–¹‹'{ÛËâ+S±nY±¾oÃYߥg±Â†¬æ||îî@–ʥ涃òY6߈ku3Ó­ê›yž-ŸÝëØŠÃ|†ãª{ëÅ‚òV‹m‚Âpëûœ«ï^v*KéëmÙV“Øh{½œža蘶ÉÚ¹ú$šDë+åεÚöÜI±îßÝoÚº¹_Bn·EÍ¥þ }ǽ(û;®= æÆ/å¤ ÷«g·Øu,ÍÌ?[.h¢ž|´ÿØ>q°–gö6â Vç]©Å\ª[9Ûáï¬kW–ÆÆ?­jÛJgÝ͉^ Gr]4T³¤}½ýä+ò—> )f‰6a1ñ{ËšlO‰¸]Ûl¦ïqÊ-yŸú8ºtué+fËá×›·tÜ6‹Ñ¦A]«qn¾“ùXFsnÕ,×u…»çÜl÷¦¼ú-fkÐy·Ûó³U‹®X×EÛÑ7RP?°d`³ý t1_êûvw–(.W¬ï¸×qŸÝ[×c_lƒhánsÊ{3ï«›:ðÞìÕö>оÐbfh‡ë¿o¶o€¶Eê&æíH==½v9ëvS­¹//Ì7²KhæÙlm9ÐÇÒÅ]{ÎcvWv7máÒÇÒÙ·j=âæØ9L±¾õÆ9ûZ’óp|îêvÊýX?íæ-§ïP[!r¶ïã)vRìÙCr KX´÷Ð3žúþ_íÝây)®~ûê›eÅ©©Y}PÅöP§îHTg_¢ Øáƒª-::O¸pØÜ¼b«ç¥<¹³z^õmž—N¾nêôä,¯¥rˆ>¨²«¼&yt{¶xúÎ-Zôy^Š y-1|¡R ΞŸâ!ÔÕ÷ íoÁâ)*N¼WÏQ1&\AKRqk[°z–ê=©Å6ÏRñX¶àîÉ*nn –­bëº;-]ÜËüŠY¾:ùÚ°yš'OX'' -8{Æ:ùÚÊ\e˜`-÷nE@¼—´Ùð¨#Þm†±$ï6Ã߈w›ü •ˆ·žn+HÄ[{§L«Y3–áUÌÌ*Ax¥.·­yœðÖÍQÞºùBGÞºù’`Þºúž ¼RÎ÷¯}Þºb¹0o-N xýó. o‹9ˆâmŸ“F&‰xµjÀ4@¼YÎoá&úF¼u†(o}E oûPöÝÀxë¤ÆËrd¼Zѳ$ Þ:›ñF1'¼ëá{ƒ‘ð¶bÅI´^Åœ‚ð®•ÞõsÂ+HÑGL^í%ï„wÝ»@x[ÌV%ƒðjSx'5 ¼ëî;P‚ð*ö¦1Nwu8ç*NwcÿyÐ]ÅÊ#Ü•M¨²:ÜUÌéÊK¡—?ÜUÌ¿î^~¤_‚êêGg;Îvµ˜ÝI€ÃݬÈá®Ê9 q¸«˜­ÚÜU̾¤œí^M1ðàlWsþQì„WåÌÕÖ ïÐ’Nxc6„W1ÿªw«ØÞÝAx³/H¼Yc^Åü³Í1ï¶.ø„tÖ{U×G½ Í ^ÒR ^Å|ÊÊQï»Pï¶ÎþšêU Ëó€z³¨WÇ´¨7ësÔ›gæ¨WåºÿH¯B–‚ô^ÕÙ0ç¤W1ŸÿrÒ;œ‚áÞ!Öq¯ö}3'Ç½× <†ž ½ ™» h¯ŽhËí}¯0ÜîÐW1óÀôÕ1} Ï¡ožgC}}UÎÌç}{Ù÷úφz¥8;¿þ¥Ù;õ*fÓü@½—Ç“‚ô^×À^:Nzóú8éÕ!mm Ho¶ÜIo^'½¡~êU¬/déͳ3Ы-XsÐ'z³.ã¼y@‡½×%±™]‡½CÌ`o‹} Æ›WÂï3Æ»µaËRC0^•óIbg¼×i9‹6È«cZJ È;ó‚¼ÃEò%ÐYÂP¯b¶¨7ŸG½ÙßõfvÔ›}ÑQot*'½±©%Hï3Ô›=ÎQïP®£ÞëìlÚÁP¯B®†pÖØYoœ›£Þì§Žzó‘wÔ›çæ¨7o«£Þ¡>C½CÌPïpLC½ù€8êÊ]k)ÊáfÌ`½Ñ—Ézãhd½qT²Þx Èzãi$ëGœ¬WAÓs‘õŽAc½ š½aïx*{¯àdìhòM^ó¨öŽå ö^gÙña¯‚öYLØ;–4Ø;\<§½ öE턽1öŽí1Ú«àä“N{ÇÃík´w+»¯Ü¿iïu‡‘E¬êË:íè´W%»ˆˆ°7/›³^ÔöÙ&ëj¨÷ºjý{ލw<ª¡Þáz;êOÒPïuØ>›Ô;ìTM1³ˆ êU°út‡£Þ«¤OÍ8êk¨÷:¬O¿8êNÄQïX§¡^WŸ‚rÖ;\vg½ãa¡ÈßݪŸ¬w<ì™7ñ{·»þuN{¡î]ûø—Ôžz!îÝIiü®+ö˜ùc‡3 Äœü¶oCóz#ùmrëGâ^±ógúÅ÷8Éo+*lä—]ä—3Éo©ÄáŽ~ùý¶ÎçÛµý¶cúžO@¿¥8d#úm‚;¢ßR xúmO¦ogô«A¤o´òÛ†S!üÊщñ‹üŠ9+vò»ìPƒü.»{Ü‘ü.›{eüÊ{ÊyßeÃ'Яö²Äè—’s²_q4C``¿ËêƒÙo;¦Ù[‘ýj1y‡YŽ~eÆå\ÛЯ3ÂD¿îá•è7Êý¶˜Ë~³œ£_•3tô›1ç¿4«$ÿmo~—2ÿÒ8“˜Û]+æøúÀÜðŠx™¡®žO‚cçÀú‡ö‘ ¼L€ÞàÀËE#8p\,pàù [†›cÄ·c:-ž¨š‚åjg/Á2Ês²`]“ìËÔÄ6Áí®[ž7Ÿ£% n­4ÌeÁó^ ¬îäØI0÷|# ¦Ó(Ip;¦Íü“»÷F'Á´H% Ö*£‚ Ázˆ>ý†ŸIp«Ïy4H°ÌIßB[‡Àúú°Ù P`š\ËŸ±k$HéKB LëR`zö€sM(0}䈳%aðâo`àh%00¡ˆ£-ÀÀ4#ŽvÓ‘˜¦kdÁÙÂtôº€0W8g5'-##Ž&€ËlÔ&¢@„[Ì_Þ Â-öÒIÓ–’0xžº·eÂ`Ue¹h°ª2m'ppÜÇÁ󄎃ç +TÀƒ[möeCLwIòàyòô•<8êÆò3rálÊÊ¥iKÿL ž'« s«Srá¬Ï¹pžÞ‹ ‹ðÌ`¿¶8-jr.|Áã»÷wõ.àÂqOÁ…¹¿+¹0/2°ðeDeªbXóŸÐ € ·˜'xàÂQcá<;ÃÂY›aá¬Ì±0) +˵y@pa-R´i4páÉRäÂÜb—\8ës.œõ9ŽÓžN$ÙàÂqïÀ…ãj¾¸pV„McN—”æ>ÅÁ…§Ã¥Á…¹%Ppá¡Nç­¤‘‡àÂÃa ·`ßb1°0¯Y`á¡JǪ²¿ü‰…¹)±ðP£ca}£õä&°p Ú~»…< A ç…ÎÎ .<¾fpá¼xàÂܱ6¸ð´»x'¸ðåLî’åÖ…ëgÁ…ó Á…uÀî›\˜[€nAûö 2Ì=rƒ u:Î>4<œŠ£áiw†hx8GÃ-Ø5Q$ÃÉÎóæÎÃA†ýQÈpv:a.52<Ô‰í¦âL@†‡Sq2<Ôédx8x¥l¾.ÈpÞ’ræÝÿ^2<=dxnÿþ×`áÒ?ÓòÖ4}°e+aú°ß"bú=øÏ&®¨?¼u\VQÉe±î Îß^9Àe¢C…à2»x2Ùð„=x€—^àåtþ@¼ìðw^lSÌ=Ë €“¡:¾‹nüàüWÅúóüwƒÏ*0v½øoqȇÿ þ üws£áà¿«Cö‘ÿ:ãÿ­î+@ü[}žqÀ¿ˆÿVÌÃ;þSþ])lvþ[ñ þŒü· ÿ)“ÿ®ø'ÿ]ûKuà¿+¾¹É£œó_žéPÉ£øï Ý6ùoq×”à¿Ø¨éæ¿+>É£€ó_¥öÉþÛÊÁXü·àÛ™ü—ÇÿÍrÎãbÿf9ç¿ú‡F0À³œóß,gü7OÁøo–rþ›µ9ÿrà¿Ü_‹üw)ø¸Žú€5cj_ç¿K5ÿ] ¤ÿà¿YÎùo–sþ›å^ü7uþ›1ç¿qÒà¿Ú/†*áž&G Á£>@à¬Ï!p«ÏI pÖçX匊·r“­®VÌ>óGrœµ9Î#:nåÜä8ÊG}€ÀºÒFÈ[9·ÞÎúÇ]޼, >€Àq9ëëŸïï¿X¦· ø÷)ðMøí%ùÆÉùÆM÷ÍrÎ}³\¥%™_Ppß,çÜ7ë3î×Ú¹o^ç¾Q Ü÷¹V ¾q4ߌ9ñÎäÛkrØÍ€½½„cÞ<ǼñÐófÛ^˜7K8æ‰n`Þ80oÆó>çÀW€·—p¶=l·—pªÛuž›Çq çï@7‹#ø4?€n\Ý(¢Ïˆn´D7ë+Ë@tã~€èf}Nt³>'ºyL'ºq9_D7èD— ¢ËÑeUAtÙþ ºCЉîP§]¹3úrwÝáTéu:Ò¥éc Ýá°†t‡ÆÒåU¤›5ér` ¤«ýùœ²éf@ºÙX Ý!èH7/;îÐZGºCkéæ=ҮРéít¤›íÒÍk¤;”t¤;”t¤›íÒÍÖéæµÒêt¤;”t¤;”t¤»,œgp¦;œÉ‰¤5‚éf`ºCIgºY%˜îPÒ™îPreêšÁÎtóž€éêúÀ£Â™îpž;ÓWÖéL7»W9óô¾ÏÙa6µï¯rñ?4wXlFNž‹¥CÁs!³ ®ñ »›ûÇï†Jxw…,ñîêF„AyWŸ‡ Ê[|*€”wõbÞõ½=$¾Å§ÞBâAâ[|â $¾‹O _ß—žß…F¼ø2^H|íñ–†ÇÔøF̯I_s3ÞüÕ¯FŒŽÉx3挗Ç$ãe92ÞŒ9äc¶xnî›S'ä-3“€¼sÈ›1‡¼YŸAÞ ä-³(ò u¹Ù!/§@y㘀¼jŠÛæ:äÍc:äÍry³>‡¼YÎ!¯¶ë¥½¯­Ÿ‹ry㺼 ¯Þræ3È›'æWpZîWâøÑÃ×v´§´K™=Ñ®NÇf€vó˜Žv£å@»­>wVÚµæÕÍ£ÕC¶FnúØÞ!NT·7@×~v–‹ŸmA¯8n47®8nœ8nÜ)pÜ,÷â¸ÖpG¸Ù8G¸qÕ€p£á@¸e‚­®b6„Û›z›U9½Í&:½g ô6îèmÖgô6®–ÓÛ 9½µspÛ³µŸ×ÚÏNjíg‡´qaiãi£óÒÆ)Òf}°nèMüÊ¿tJ›µÃµá9 0m4 ˜6cplàE c›XkÍ€YCf |èÖå`×ÍØ!.@耺!ø5°6ú5LæÙ{úðˆ"°jèE`×ÀËN»†^N Q œ†Øác$b”í)rã–ÜN Ñp85ð²Ò©1 Úhž‹V›EœÕÆøI“†ˆ9±µªÙÆu AC/âÌ?÷Ä2.Û2ÄéÀ–1ú2<Ñ“M¦'Cž Qž ƒ'Coì¢ì¢°cˆì"v¼ÓIü;†‰ ´cÀ-I;5í2;† –È.÷¯ðÙ¡œ&ºœÒ‰a àíÃä22èN Y¥;1d¹“Y& Ò‰!ƒ3óL—¯e›iÂ…àÂ`…`À…`À0Ó©˜ qê4`ÈÃîL:é«{ÜYç‡v» šf0Dï¥Ãô±ÝîXraþ‰3¤CáÁuÖÈC÷¯Ù/äa¿”ö yn¿Îí¢™²ÙY}¦$¦ýB–\˜š2û…lÐÊ4•%a¿ׇö „ýBw¦ªlì2xæ­ø> ûÛì¶©/Bì6hiÁeý\¶ÞȵçÄuriWØ.œp íÂþN8.lzíÒr0\øPoÚI, Ab[Èüœ‰cé^G+·FÁÀ°«/S ËÑÅ_o@6ÊÈ_J@sEd L‹ d㘲Q@6bNe#äT¶ÐITF1Ae#•Ðlé]wà³l !íâË}Ò8eÒbv- mÄi1»«PH»`,’Ú(RËIí³T9b µl Im”®pík}MpÚ(N1pZLB’ÓFÈ9m„Àil5FXˉmÄ€m#v˶à.°u!À¹îã܈mÜ&Ü'E p—Üfmh(î ÇR\6!Q®3Q.6"ÊcåÎn³6ÄÀs£-à¹ÑðÜ8æÆÅb8¦óÜù£}Ö®™å¾[q.HœË†éâDºQ,¸.bàº3v2ÛåÔ8Ù.[´ËÈ.C»lÇÃu?Øfm AB`!B]4€L—¥ê|Ξ\w†M>¹n”[©ðêjÈü[È<¶S>à“è ¼Qì€|<)ð#’òFl¦|À«#êå´Öÿ)½ïeîÞ‡ú?«÷]-(_©ßzßß~ŸlG]5§s”}k¯“M_Þg©jÜð:i©Àëmò«ßZ¬eÞ¦öÒ:J­õø…Zêoª¥ˆ¨×yo×v_ÆZλ–ù·LÙµéqÙÚÿŽý—ªùgSë&ûî²Óü Õ,y6£Š½}ŸëësÖ/¥TÝýmiŸÇgÖ¡ÿ\5üëŸÿíçŸÿôßùÓ_ÿú×?ý¼ýå¿þsýM™Å—oZ©±ö‡Cüëxo4ýLà oŸ:¹ET Î?¬¥•,ûka3.nÁÅ7[Á¾-X~xá–Ümí >= ÑsýaÛ(Rè£Ùr6³.9¹ú¹µ;ò¬T>¸?ý¹»yÉü çw윟…Ê•ûd‡¹—Ìt=O³/™á3ÛŽ÷C)7™wˆíæivãêÓ-^Áö¾™Íý[pqëjlЂ出ßLD6ývKæiu÷êŠíéç©þðêóË¢úèÛ´àæþÕ öôR°eÔo&B#êÜé`í›:Ͳ¥-f>é.«³‚_ÞÐæž4Ï28Þî¾£ öÙÚìβR?î®Ï¢ ïD¦àyÂj¹ï·7óôWÉ—‡iû½ÝÉgÖXd>´³dN÷\¯Ñï¶` îXÕùV„ >S,UóP>6I¢wÀã)î}ŠEuš[Ö|ц{¾ÎÂKmûøUpñ‡ií&¨*ç›´)vÜ#p½^ý}Ðî[ŸbQÉÉŸBí2}À*é6,ójÖej© [°%÷\ÛÛÊׯà3Å¢’Ù´Ùû=WŸá»‚[.ÇÖ¶#Ý#°êt¯ïy=ú‹ëÎ9 ž÷¬’/¹š¤”š]šØÒƒº=]¸Ây®s‹©¤™åµàòÃ|<]xƒo‹‚ÏìÊÕ óéžå|¾UÊS2[š÷< îµÏ®¨$^/5/O?®¾S î}vE%ÝÿZÁçÝq]g[võÌ®ŒužÏìŠbîלּþž]jÜæ>»¢w«·à“¨¤ ×Zpé³+c•¥Ï®Ô–’!ÙÖ>»2\Ÿ­öÙ•±ÎÚgW†K°m}ve¬sï³+*iàóvØgš„ËÅžÈíì“+W{ë´Â°­ÀêÃý>õŒX½÷ÙpÄ%Ÿ¶3ß—žë¨Hvk–‚&wjÁµ'Ä Öû×£ÄqZí cëqq·ŒÜz&Öž «N$RGíïC¶kmZlë™°b®j˜½g "‘jÁ£Ã‹éfZì艰b>9ØúnO„DnÚ>ЖW+MþÒ~ž{"|Ðß|-xô±¸"VÃúP\ýChÖsÒGâ¬qíyðuE=i8m§ÊëŠÚ®¸ ž}$ž1ÉÛ>q{:œW­½Fjˆgw5oAóýQyôyölXÁâß3Êþ¶Þý^,–˵³ÇçÕ2Í=ÎûÔ>ý{6¬ûžž-Vz2œç±L6€*è9¶‚G‡y›Û—VO†UÒ?1—iëÉpöãEVÿ}æåY´F‡ù¸*xô¸µ¶ûS·àÙ³â¼Ñïù¨sòU4í×ÙÓâ8‡ö]fiñ„ï¨åÚŸëéÈì‹6 ëcñäݵàÚsãëV=k@Z¬zjÌÛ¨ÍívëǸ0úïò„ÏÄeÞ=3Ž£žG—ÓF‰³õctŽk‡Å»ó Ëlyq´t™=/ž|ÁS .žÇA‹§ÅqQe=¾[7f•ÕÓâkD{ÿl qëöM°ÈEÿˆµ4ÊЖöMf qô íGÐâ eYNOˆóìOOˆŸvjáØ¤ìCðäû*´àâé0û˜Öô!xò¯ eÃ1VhmNƒ{ûªçÁqÛW™åÁÙÌÝóฎÚH¸¿qµŸ¼¿lçiy°¤šfm·¬“åÁÑXqD~ÑV­²;¬ß:©PÐòàmÚÜ‚>qBÚÜ‚ôÿ§„Îu!tp±§…júЍ€Îuñíu:×Ù·Ä!tÖQÕ :WånÎx:×Ó1„Îuö=W: HyOtÖò*ÿvt6“ÒŸ’9·‚«¾€9׉3Μ5vúSæ,´æO˜³pÀ»Ãg®©¿…‡1Yú¬ æwœ>+èçôYAàN§ÏÛzZA 2­’îÇ ÆDÐ*éθ@Ð ž>íàZAàBGÐ Q‚–3¾ZTlZå@îœ@_­ñù8'Ð*Y}èq­’þÂVI@Z'Ð ÚBoèn¹@ø¬ßÁ>ËÁw}|K|¾JÚ,ð›@« &U@« €¦èá€N ô­÷@ ‡“pMË hÅHŠ @?+FPlüYAðU_'b¹ø³6Èóm?_ͱ½þœ?_å:¾šãxÕù³J¾‹xÖ¯¾»&ÀsìâGð<¶ÄÀ³‚Åß ž·«/Û£ïàù*ùx¶s0ä'Ï×awšœ<+H@näYAäšNžÃ¡…äYA瀞"u̇eeæëày,¸¸K´ÆÁóv ©Îk <WÀÁóUÒ_ëžÇ’FžeRó6|tÏ óUÈgN:«$òw§Î* *ÿú¾jóI\§ÎÞDÎÃI;q+2ä¬ Ù°!gÝ9 Èm1Ú|mj„´y¸\Ž›/— O×7_mñ¤Ëq³‚áfÅœS;nV ÐØhsx‘6_ /9mΫÚ–F¤Í±‡*ióP¥Áæ±­»gÁY¥Áæá ž/ã5U¾VŸ¬KãÙ“`ö °æ¡•ΚÃ#ЬYA—º€5çà°ylíæIpÖ¹y¼…9m{x¼?= ^ˆ® 7+f {À›‡7_må]< Î`ñ,xy°ñêùoœ¼£æ±™Õó߸UœÃÒ‹Ày¼2‡'Á¶Xô§ÎWƒ‚F[%_Øy8 çÎWGˆÎU2Ù²%ÁY²xü\lGÎc «ç¿YÛæùïL‚êày¸*žUÒ%`ÏÃmrð<”4ð<œ½ç1¶xgéÏ´Juσãâ9xKVσçñóx–›çÁy*»çÁ34²ÀÏÃs……½{8x†Í:üö½;ˆÿ:sÃQóüœô¨yÞ¹²: 41QHžS,]huÈ h}E~(yŽO)h~bBa =AP /PèÌïA¡×ÂézPèüÆ…-9)t´‡:ÒRhK Ë²dÐëD< -žäßÐëD)4™­•醖ÀŸÇ˜9Él”p?«  ¡>Óë/è3íÅ‚>ÓV1ès{«Aï úLG³ Ï´ úLÁ ÏÊ0:}^N¼%@Ÿ±+|Àç0í |æ6èŸ[º°z|Ö&õÎPŸéIô™›“}æöñAŸ¹Ã}Ðç0$ }^*ÿ Ï¢ÍŒ_ôY.|>§úL+ª ÏËÁgô¹³A ÏËù~%‘;‡0¹³_¡Ÿ’;ËœÐq¸s âéw¦UQpgpÜ uä<´Å³nžs0gÎj¦O9·{ç3@Îòñ9D çeåüs»ÔXä»s9ËñÄ?𜹅rpçXìCî̽¹ƒ;s÷àÎrYqbôæÎyQÁïmhƒ8g= ÎÜG:ˆs6Ĺ9ÔY;rn×óüƒ6ë#È{ psö&àæØ§‘¸™'nŽÅQÀÍÙ)7ËÈÔg7€›—ùc¹snJê¼¾AÛa©hvê¬ïU)‚:k阽MuÕ}ètnïi‡ÎC•Ξ/X׃ŸÜàq¿ óU ù°Sgõ$«ƒ:_÷ö;sóâÀÎzE;îv–!“2ÀÎz C_ìØ9v®$vn/w¼˜—‰‚n§ÎZkçÔYP¾öÀ±s®àsìÜ*Ät°s6Ø9ï;°s®îvãŒÚ±sÞL`çL”€çïs°çvñ 3{nçIHîìy¸BΞ³½Ùs; ´ÀŽžóìÁŸs@n%‰u?˾ÙæàÏóA-8øs^q@hYSûÛº]q$5€Ð„Þ?¡[I¼†¡s§Cèv È„ B·©¯S踤ЭƵŒ¤ÐñôB+S·„:z)t»°®!†ž7b3`è¹R? ’ºÕyØû˜ZWÖæÞZ÷Â9 8t^Spèë“ËZã9t ŒCëŠÛ”9´,ûü#1t;Ê;y%ÖZcû2#ž·@âN uDsI £ûƒ@«»õïahY³û$ ô\ý@Ûù=çù@gGÖ§¸}±‘@gÇÎÛ ­j7Ôás;ÿî$|nÕ%™î™°î3”Á/ø¬aÝ×>ëZÚG%áóp@‡Ïzþw>çh­GÊEØN ³§@ggŽeö$гÀ ]ôá£0ff@ £“:€Î ë:ï"t^:h]tÐiÐíºú¤øóe9z~ÀŸsœÖæ ¶Ê•ü9ðg XöÒ$ëü9aPè|ªêëÁ¶ïõï¥ÐÓC¡çöï‚ÞzÂ2"苬ÃvcGR¶Y0èW04g Âv¾ƒÖ£aß–dÐÜ6% 4÷1 Íí<¡ß¾H¤Ï´îúLËö Ïð´øû3>—‰€Ïˉ×áórâ¹$Ö†®;N"ÝÞ ¶ÿRè„ Ð-¿†7Nn ½˜ÙüOÉ “*€Aëŧ‡Ðí•HݰCèd#€ÐËvÿv~ˆ>Àž‡ ³çü|{‚`Ï-ðϰçŒ9{^C»ëðy>Jx Ÿ³$às–tøœ 𹄠ð9ƒ€ÏA¿Ÿ‡:Ÿó°€Ï£Þð¹„^ØásVFøÇ#|Î àsAA è‡ß>Ú"|ΠÃg; °s ²ç^Æ©óBU6¨ó3êœA`ç…ÒjbçoˆÀÎ µÞÄÎ3UÙ ÎYÔ9.&©sÐPRç, êà’Ô™ñ†Î •Ü„Î3羈žyò$ÏyTçÅ·KòœAç€,ÄÏ ‘7ne‰¥ÚÄÏRíÀÏ~>¸ø9 ?Çû•ø9Þ¯ÄÏyTàçLÄÏ;Ò×ÀÏ~ÎÖ‚?ïø–&Nü9K‚?çy‚?GkE¡µw—÷ Rè, é)tœ;(´JºÞZ`w7ð;ØóL°çœç {N„ìðy™¸&a€Ïw[Œ;/-oÀ{ çla"ç öì7›äìÕmfA›cò´yh‹Óf•ôô´y(´êÐælí›6?;nÎGÜínÖžfcÚœó1 Ív± šn;h‚šsÎ  9gš³W4§ï¡ƒæxŽš‡˜ƒæ¸½ÍÑ’æ!è¤ù+ÁçýÞWœyh‹Cæ!è9FUBæ¯-íåhFȯ–7dŽq•¹‹È—ù»%»Ñ8 eH?g å!èhÙ«Û(µï[©ÙrÞQ°åxlÉ–ã±[ŽA p9 ¸£ár >{Ãý”„yhŽæÐH˜‡Ã:aä„Y%}v„y:f.cæ¡AŽ™³AoÌœe€™õYìø˜9Û Ì<Ö1sž0óPÒ9óP§sæ!èœy8¬sfÍu_!gëœ9ë 9OÓAsÖМSoÍÁšDsV&¿xžAš‡’Nš¿DòKF½3ùeÐI³°ÎÓIsž'HsHÄ.ÒÌÎú}F³é»³ ˜¹4ñ«cær€³3« Ã"fæÔ™f`fJ3ûˆiîLÖæÎY%ÜOLªÑݹØnà¡t¦(;”ξAé å•βC ¹`˜:çˆAæ+{Ašõ89‡¥Ö9Vƒ4ç"^jc.HóPòEš³ Hs.]i^+eC ÍCÐIótÒ¼VÎ'€4çzb 硤#ç!èÈY‹]ä<9?Ë”5%Œ5?™³ s®kd‚™{eN—û¯Ž•‡9VÚGXÓ ¬<ŽYÒ±roæËJ#D+ ÂJ#j¡•Fa¥ Äi¥‘%a¥‘ ‚•Æ<}m  + ÂJ#ƒ°ÒÈ [iD ^ÑVzid^yTxiÄB~zidIxiŒA[K˜Axidka¨‘%a¨‘­…¡F†yØ·¡FüLC¨†Q 5ò°0ÔÈÃÂP#ƒpÔÈÃÂQ#ƒ°ÔÈ ,5òT`©‘Á°Ô`Ð-5ÒÉ‚–tœ€¥Æã8ñM^¬$¼4ÞEÂDãëîäÉ)¾Þ¹+6Dò”'X/òä<¬k”£ 4ÊQ#…Ê„P9 ¡r¨Ù)TŽ«NµrèÏ©VÎÃB­¼Ó Žjå,¹s™KB­W–jå(y«•©² µrˆù©VŽkCµrÜÊ•ã¡\98j–_’å<*$Ë÷i¡d9žJJ–3ÉrÖ%Ë,HÉ2G,ã$C±Ì‡9˼¡XÎ’P,tÜ€j9šÑrÄ YŽcB²Ì¡.$ËÙRH–ð‡xI–¹°$$Ë쨡[ƒ§Èh ÅË„x9› ñr¶ æ1³ ¡`Î’Íâ(ꆂ9tàP0sÜ ‚9ÄÝT0gA(˜³$ÌÙ(˜ó°P0Çõ¡‚9´áT0gƒ `îjsŠ—C¢Nñ2Gã/GÉ[¼M¤x9ƒ/‡–âåÞ~ê–cquË„n9ƒÐ-‡º–ºå B·<-%Î:¡[-tË©0vÝr”ºå<è¯Ñ-£;¥n7Xz‡n™ Ý2ºe*ž¡[æúþA·Œ+”ºeŠš½·}Ÿ9ÆŸ¯ýÛs¤Mýª¨òº•ë?½þX_iœþÁëÿñ‡ÿöÝ®ëùÚèå#môWó]ÀýWVªüÙOûw¯ôeÈý>Ùço°:úÝÛpÈ¥ãnÂû/ŸÚ‚õµŽý¾å÷ß>÷*¼®ý»Ü“»N·#ùœ;q7áõ—ÏmÁûÚßÏÁûo_kÃë¨þ…¶kNXjýö/zWµì åó²©YÿðãÛšçsû¿µ5ýË?ùç¿kãößù—/?þÓþñÇo­i¿f@Ú;¡}»|{MëwÔtV9á¶÷zK¦¿½¦-kú”±³Ù¿{·yÒŸ;vú™Ë–>íuñò7ê»}J ¦i¬¿ýöI¯KQ¿¡þׯŸÔ‚ò•ú?g¤ô.?Šúœ7æVzïcýŸÔû®¼zëÿ”Þw2wïCýŸÕû®”¯ÔÿëÞ‘ÂÎúdØŠ^‘[Ë:ö³T5nxL_êëmòëßZ¨EsÇy”Zëñ µÔßTK{3jÖdo×v_ÆZλ–ù·LÙ«&Ƕö¿–cüB5¿ñlª|k-Û1Í¿PÍ’gó7ª¸Pì^Kë:_J©ºûÛÒ¾”ϬCÿ¹jø×?ÿÛÏ?ÿé¿ÿò§¿þõ¯úyûËñŸÿõ›2‹/ß¶Zµµ?S½ÅD}ñåwÀ±L-_škŸƒó%ž¥¥mÝö„äªH·rOJ§©¤‚§N;C—K·ò†om8ÂÎ×Åu+õÜà±T$`¹§ˆÛ%¦”E2–›8Öc¿{LÙê±Â£¬H½r“ºŸî]X$^i5_à£î;&Ël3ÓUóRh†f¦×„¨šî2÷Ïrùk¾/bÝ'ž]»ˆÛQ_x«n´E¬7©ª[Å t‘¹ôY_¼©n+¤Ie¶¹¨ºÍØ/\Á[²Ònô`Eú•áT ÔLS$`¹yžž>¾™žÜTP1·ùféÓ­V(ÁôûƒšÛ“ ÝB‘]ÝýtGô²¬}ö¢jîÜæ ÊRŸ¥ÀŠ9†.‹­Ú­šÅ÷+£Õë턯.\µ7µ‰IÊb0©ŠI™µeÎo÷õÕ…µÒàðÓ<;¦TÝ£hÏñúîÂZIäÝUÊŠ}+軉­ên}çÕ—µ7¥¡ï¢”ã¦xU²6[úPJ黥ŽÁµÏÓU‰ÓL†ÒêïjqǤ¾ºçÆ’[7­åÀÒñöéÖái{ÜZ·h5ý½¹äëŽÅÅþiuÌ;v´û÷êÊ\§>¿£ ÏEµ Ú¾'×w_–ŠÍPåM!t:/ \´Óf}Ã×ï†=|&óª–÷û¥ÖÖ¼Û{ VÐw2*k÷;®e…|¯haúÝ«¯ éˆJë¸Ï|@Õ­Þëýû3¢BnZV“^©mm²vs÷ªže· =`ÏÔ¥b»mZôVºáh•U¬i/|æÄtOÅr©XïÁXJ)ÖjR 5ÖŠmü¡ ïþ{QyÖ>?1\8±‘rÆRïÙöEÙÆ}Á†«§‘r½ã!h“©jíáÏLKÉî÷ö³]°{féôåJ¹Ѧ®ûóvº‚sgÛÞZ2«?ê'ß;¨¥j}®d¸O[éÃÙpŸÚ›ç™îÓVŸ®¢&NþBÝLäyM%T6m$xÄe‚WKøú‘ĸׂÇ|ÄšS²©Í6w%Š‚¾_Hi¯æ–¡Ýyº ¬Ú%és7úo¶]¾Ž÷,‘˜Í0*øLVÍê›J²ì6`+ˆ1x_ûsÕŽk>0ìµ·AA—P–}ëê…z½•ìfµà£UÐõ•eß»*flíÑ¥£ úÎ8¥åQ÷l’b¾:·-C;îQx9ÝkC±gnR1dŒGë…ç=+hÊÕ7‡U;̘ºeÔm(º⼤‡)ŒÇVj»÷{ Ö‹¾ëÎÛ}þª+A˱õ—œ‚®çl_?}RKAP–ãè;ˆ×–w¼õŠúý™#U!d=ÇÙå’W3= =§>û® ^ÍçܳGUç/ØsézÅ|+gÏ')–WŒç §)…tEc9¥ºÇamÓç£×YûüÚÕ{£è àɉ‡öl}övl^ä÷0¬öxvš *Eôaì<©F]¸ô¢]a¢ ë×ɤý únÍ >Â%]z¸¶Ï7eÙ”µ}º=Џ«:5×öéöLŽm©}"y¬®%gwR¬9K{¿¯“YŠç­Zõ)÷$ÅR ØËvŽ®w¹º±í5Ñç6ÝcqÜ+ŸÒì;«f7ž¤8Ô¾áž×v½òù>l¬ÚqåIŠÕyLÒ»¶¸gj[ÈE¶ >o_]ðº^[«œ½3Ûó³j‰''Ú³uÎØž½E\¬½ý|¤]CŸœžhÅó³Î} Íp¿ä¡ù$ÅòƒÍ)ËúïI‡óNÉùI‡ó¼§£¢ ²zRâ¥Bv¼.¦uÛ'%Ö¿´WôzU~Å+t²«ö}RâeEº¿Joð¤ÄC•GmUšöªÓä· >^‚ãªüâIŒe#oiʪëIŒÕ S:¯ZÆð$ÆË 5àz­™X{F·(¦w.‚´-Ob¬M)M)·[IWµjÄ™UYô“kßÉþá¿JÚòäÅ-æLh½¤-µ÷a‡«K[®*Mñ´–ÎfsMÓ*eË“k«Èw/^mÃõ»}ˆ­’µ<)±V¾†Qð¸ºñŠ/ÁU_ÕO^|ujÞ´íÉ‹‡êj— +èê’uíOËÐáVÛªh¬²½àž¼8/¶-O^q¸´õèÕ蹚²õÓGózÅŒEµ|°}ñ¯½»Žˆºìö Ô:ñsOŽã9ß^ï/Û²ìûÆÞÛóü­õ°Ñ[é»ÝŽÓÀ Vö‘D‹S‚ü‰®'\[I¢k|¬€D·ûê ºnØ  Z+p=»ˆ†9äO¢µÓ½ËA¤µ<˜Ç‰´(>Ð#im-t0Ãnë$st$­ ï$­…½î9 $­…½Õ!¿#é4k’NïD i­Ûöåvޤµz×w|’ÖÆó¾Ô HZA|+8“Öê]ý7“Î%¿`Ò ºJLúZòëŸûΤôQË™´Öíú m0i}çq0iÍäTÛ£ LZAÛ HúZ¨ ~nHúZŒë}Ç‘t¬â‘ε¸ Òée "­ ï‡"}íMïØÆ‰t.œ‘Ö‚RL08‘¾–¿:ap"­)zRp#Ò¹=tnåL:w/“Î5•`Ò¹¦L:?ÞÅXS0n­Ó×Z;燦sÍœƒé\â0k¦s}ètªØA§SY :=O§²xZF¸»Á)§Ói’ :»ÍƒN¿·Õ”ÖþWDËÕEó•3r¥sÏ-@imUE´lPúÚ„Þ÷Jç¾Z¥¯½¿}pp(ƒJ_À†uœIçÆÉ7˜Î}릯 ²5-7J˜Î °L_1û˜Î}{¦cë!péÜ>\:÷k—¶½€¤Óí\:-ïÁ¥Óî\:vÕ–VÌ=€¥sÇq`éÜ«XúÚ8Üé«céÜUXúÚ“ÝÇZÇÒ¹¯ciÅÀtJo×*!{¨K_ÛXûœŒc髤Ïq¾„Ú±)°´ö ŸîXÚv¢‘ζIçÔ@Ò¹5´‚Žª€¤sÇ\pék·l]8—ÎmÏÁ¥sWYpéÜü`ZÁ÷SáH:÷f’¾6éõÄ‘tn[ $=I+èHL:wi“ƒØ¾95Ƥµç*>HI+è ÂÀ¤¯ýo'¶³o4ÿìÑ|ãèÜD8úÚ}Ýò+àèÜš8ZûÓú~—Ž£ÇØæÛH~§;޾v½íïlÐhÅüC 4:w®‚N£Ç ÑèÜÄ4ZÁÉÆDÐè¼{ ´v÷¯A@h'A¡t½(´‚þ ­ oW ­ }aB+æ›z:…ÖÆÝ>± }-m‹K‹VÐsš{‘JûÙ¿u£Ç Ûµ(h#7`´‚Åg7 F+æ `ô4­ OëFA÷eh½Ò3Àh}¯OÀèÍyÄO£Ç’‹o" ÃÁ¢Ç‚¾özž ]‹VÐÕ `ÑcIcÑ :ØŒk0ºu=H£·k½µÍŒVÌ-¦£¯ Ïå9‘ŽêDZAÿZ¾‰ôø»i=u‘VÐbi}WI'ÒWÌr^é1hDz ‘«4"½iù»Sp'Ò ú~ Ò ÖˆôU4ÖôXÒô4$­` ^Ý¡ "‚’K’ƒ†¤œAº I+èPHZZ\ßeË‘ô3T $}UùθœÀ~7ßK^üïÝ'þWš3†*ÚD%Œ®ÏÌ9t%ß'‡žCJíZ tÛ9´ä¤Ý(‚Z”Õ?¹çˬZÌÈq8t2HpèØ—ž:6)"‡.?ÃÁ¡¥ô¯BphšåC‡‹1t8°C úùý†1bhÉŠükÄ1t,Q'†“&bèäeÀÐÊ À~ßZèþÁ ­©e`_ÇК”vŒ ­ÉZðrÃÐIv€¡§C'L†ŽmÁÉ¡cArè$àЉi¢TDdžnÑ?À¡[Ì7q"‡žvIpè$àÐ-è.õäйä:I8ô|À¾:‘8tìJG‡ªòÆÐF1@ ç=¨µh!°k'ÐI*œ@Ï;ÄlÐÃA@«™–Ï@çQ?·ÁÇ…CäÏB#¦å NpþŸøÐ2ÈñÐI\ ãL¢;S‚N¦úúÎt·#h}¡:K‚nAS•€@Ë9È…º Ð`>'À}ž«›6=ð9+|öŠœ;Ï…š~pg}ÎÛèâØY/?ê€e‡ëC°³^©>‹î,…Ü9 ¹s€ÂçÀ„ÏóH;sLB昄Ðyh 3J@g‚€Î —ó:/œt!t&fèœ rèí!u&/ ê<óNê¼põÙ›:/O–úÓI :Ï\ìAòL"BòL<äy~:ÇÅ$tÎv:g;ã‚:ÏL‚yÝÈœûI:G; ÉA:“ftž¹¨ŒÐ™¸' ó„Iz2çˆ9g[9Oøš äT†È9À sò“‡;£™ó;w"vžð­BìœçàØyh¦cç!î¬ëdjNrç<pç ™spç¸äÎqšàΉºÈã^@Ç©@Ç©@g:Oz pè ‹'ƒ@ç©8Ž3!ŽÆ’@AhÚ7Žf’@G·#ž0¡NÝ:OÐ tbIè!u=]VÖ?%{詳ç»{^>BgŸð™ ØásžaÂgÜ«o‡ÏÃy:|ž,µú)áópžŸ5ž™Šm€ÏàËŸóÉ|·#á3nÊ ŸcL{ÎÁ*Ù3!y°g=ÇÐsÆœ<çƒò<<ç½ yŽ»HòçAòwŠä9ƒNžs,'y~92çèŠdÎqwÉœó Áœó 9gwsÎîæÌ9OÌù9A"ç8rç8û­ä€ð½ðyzàóÜþý¯"ÏŠòÜ$p àgmÉàÕñ³ÆgkÀÏeÁFÇÄÏZ¸êÚXðçä àÏËÆï1ðç”Û?kuНZÖò+H€?Kïî¨ü9¹%ø37ž'~NýøsbKðç„àÏ).Vº`¨øyæÚeàçˆ?w‰É󄙞 Ï36•éä9¤c$ÏVì°„Ô9Ê:g;:·3u²CêœA®Ú&u¾OìbCbç8,±s–Ø™—‡Ô9z©sè?){¶À~°s¶ð9Úö1 ç<(Ðs\;¢çx‚=³ƒ=ÇAÉŸYøù ðÿáÏñ “?ÇÁ ãî“AÇ  t :O :ú84otÄ@ ã’@GsH 3Í*I¢³ H4¯AtÜC‚èƒî ÑÑÝ¢£­àÐyÔ?ó¨?£¤ú9”Ñß.~æQoñ3}.À¡3²ipè8wpèƒSüäÐÙJÀè#ü5£ó䣳NÀèh,aôï…€ÑÑZÒè1hH:ZK$uIça¤³$t–’Î ˜t`s>@Î1¤‚8‡Ä9VF’8çÛÄ9€‰³‚¾ˆÈ‰sqŽÕ‘äÎyTpçlì6œÀ÷oÌ&zþU&Э_>êÊÑ:d²tŽõ¸p^h“ è X ôB 'h}A 'è i]áÌVÐý0a½ô*€ô¶,x¾ ]CL+è3©°‚^¢µßj]zˆÒ ú¤øæ·[A" ZAWz‡Ð zyä¥t^¸H.Ð a­!Ô›4>9¿Aè“æ²4>Ã&Ð'%|4>8áBèƒrCš@oWÑz£M 7J¤h½b0L 7,‹ 賋4N›l˜@ËÆ5¨p¾ø®×i6Ð-v+p€ÖÎu~³à­ ;öÀZÛ™ûCóv€~vh]Æ è!è6Ð×¶}Ó×m eGãbCØ@ËÛ l ¿ì^ÐCн ‡ {Aë:¹¨fÐYfÐCI˜AŸœ¥tÝ :¯Ì ‡Á ú¤:fÐCI7ƒ‚0ƒÎúôÐZ7ƒÎ‚0ƒ>9ÖÑ úà€F3èôÇúà’~:Bçá½s- ¡3èŽÐ;¬ÒÂz§ÌœŽÐ„#ôF÷-:Bï!ñ†#ô : ¡»ß; ¡wHÉé½a]>ý µŠÊ_ðƒ‚îÝ‚xÃÃZAíÀº}ù>ý Ó~ÐCÐý ï£™ tû÷n¦èRépè,è.Ð*è#î˺ý ½\ K¥Ò.Ð¥R£ètqwh½ÒÃ:cn­õo>èÁº¬Ô Ã :7€túØÃ zº´ryà úÂiÝ ZAÏûaÝ Õ8¬ ô<VÐr÷r5¶[Asü)póü7Æú°‚®YAGAZAkÅ"Ý :öi×籂ŽKC+èØ”VÐÜ”Nб)œ ‡˜;A+eµ;AË0Í•ªp‚ÎÞ EÐ\9'hˆ{+»´vz0}< cA ϹŒ ó4aûÐZA×ÇÁ :ONб ó¸´Üæ ¬v#è¼›î18Aç%€tž%ü ó,ß~Ðy~0„ŽÝh'Cè¯Ch} ´YE¨`v+èáÔÜ :v³ ´œýÜ]VÐÈÿºtl¯A+h-WvbXAëCÐ—Ý :v¡t™éË'èBOV:Açi ºÐ°•Vб½­ óLÜ Z±žQÁ:ÏFÐ Â_ÂÝ ó<à·ëvƒÎS€tœÌ åÏgf7ƒ‚f=œƒ›AÇn&4ƒVÐ=2`=œˆ›A÷­Nè­ßa8á>ÐyðÎÛ#èØ$…FÐr°„w†A{CÝ:ö‡¡ôpÐ:v–¡ôpî=œ…y@ÛIÀþ9O‚ûWF;aÿ<Kv•ï£Ð¿Ýzãá£õFl’þX ÿ%ÄÑðž‘zÓz¢¢˜þw<¢ÿÆÎ}‚迱sËúolaLáúgÑ/yÐ?kL€I´ëŸ5šÀ$>Ð+5dô^Éè½ ‡ôJq} Kÿù)x´‚¸Õô.4/„t‰ ÃŽô^°GÅàÕûôB»oú@§y4| ªç齄ˆ ¡s: !ôW‚]-ÓjŸ¯€z¨Ò…ÐCÐ…Ð\É!´^™ ².„™ôåöB· ˜„Ð-"=ô:c— ꡇ ë¡ÓÖzhXw-†:¯-ôЖ†{ ‡NÓäÐYÎÕÐyÕ¡†ÎË5t^õ·z9?9t´rèuÆ6ÔC« Ñ1—C·Ó®ÙåÐÊÃÂè¹Ë¡ÓCrh5Ç 1!‡Jº:KBÝJËA—z衤ë¡ó"@­…:ïôк|NI!ˆJº"zº"zº"z¸|¦ˆ.K¢3IôtMt^½[=rMt^Uˆ¢„Á†‹¢‡ ‰¢ó.C=]n\PE%ØeÑ Þî".‰Îê ‰‚.‰¶#B Íß»z8˜+¡µ™«O¸A íGt´ÿîèá`.€šá 衤+ õ1èÜßЊùäÐÖN¨Ÿ³)P?çÝ|©Ÿ‡_]ýìõ¸ð™¿wÍsž­kž½ŒË‡2.wÎ+¹óW‚]îìÕ¹Ò9«ƒÒ9/:”ÎCЕÎÃa]é<”t¥ót¥ót¥sž?”ÎCIW:AW:krÐ=Í tJšÒ¹Åè¦ìJç!èJgaÒìJç¡ä[éí¤ày(ä‚ç!è‚ç° „ày¨ÒÏCÐÏqÝ(xJºày(é‚ç<ž‡  ž³Nž‡’.xþJ° žóNBð<”tÁóPÒÏCÐÏyÇ xJºâY%]HçŠç<*ÏYŠç¡¤+ž³=oÅóð³+ž‡ºâ9/*ÏCIS<1W<AW<gïŠç¡¤+ž‡’®xÎ Ï-èóËT<ç5€â9áótáópX>çƒðyºðy¨Ó…ÏCI>WÈ…ÏCÐôσ :ÛôPÒeÐ_ ïÞŒ›ôt5ôt5ô45t;[ìœ5ôÐNWCAWCAWCgEgc!Š‚.Šë¢è!èÚèá°®Jº6zº8z8¬‹£‡ ‹£óòAýâ衤‰£³ ÄÑÙˆ£ó¨G%‡ø>SŽ?_;Îun­kh?mzH^Ÿ‡úc}}´ê¼þøø÷ïàífŸ_ Â^>Raµ1ßEÝe¥:ÁŸý´?£Ò÷™¾þ›Çß½v¥óOýï¿|j îÛûÜéÏ=ÿ~˯?*Aíî Ÿsõïú_ùÜÜýüîò_«ýu¼?ÿB+Ú Ý2Óy¹Ò‹/=[ÊÝÞló²©AÿðãÛúçó—?ÖÖæ/?þüåŸÿnúû/ÿòåÇúÃ?þøÕ´·žæ:æù˜—sùõ´ZîŠZ»~}UõòžÛ§\Ù¿½ª5«ú´ãsF«xZÿ"«o~ûYC„œ ¾Rÿõë'µ`šÆúÛoŸ4DnûWÎÿõë'µ |¥þO"Ÿ.o/§Ïé}WíwïcýŸÔû®¼zëÿ”Þw2wïCýŸÕû®”¯Ôÿë^‘ëQ8ç]Ÿ.¥½O6©ËÎRÕºá}2}©ßùŠŒjæMøý(µÖ㪩¿­šörÔëo—w_Æjλšù7žNÙõ}V¶ö¿cÿ¥z~ëùh]ûÌ+Û1Í¿PÏ’çó7’¤iÿ¡¥笉Ǣ©Îeß–öÉxfúÏUÿþùß~þùOÿý—?ýõ¯ýÓÏÿõ—ÿúÏÿúMéÅ—o[oYSf]Cðnö‰$öU¶ÏìÈTa-Vµô·/J±z¬j_ØGb¯™<£¿u)¶´Tsá&Ž©KµÕ£±`¯.›-2¥J][J¡¼_}•ç"ðÚ?½/eìd2I4_ò÷G˜.jjƒ×‚…Gñ¸arK¯.2w1|½6#”‹”¼jùR_EÙZëZ'­ ë %CÜv-Ô{$ß‚³&‘¹Ö >ªîvµÜ¹N—º¯h”§¥‰Å«d6[¿B×2Ï[ø—W¨Î}é¡6‚6µV>Bõ_¸Yëz­" }¢–¾ö…‚yµ/6N¡ j}¹Ÿvê òö›-è[v¸ähÁp_³—§§õGÜ”¯š44ÆÚ Üçè}××i¢.:ÑÓ—Ï)Öu%ZPÝÈ]„½+¹ê>Û"8­Y·»Ú^w}›[js-â~´»âï¶xEË¿û’5mn‹0ª&™ nk/“«‡yÙꪻNƒF_\¦=“LåÓF0[E¦žìȱÚJ±e…Z©›-ÓîO¦VÒêù¾Þkyv8kÏ…­æÊ~Î}åV+ãSYm0¶ÕY u›o ÝÑýÙ×Ôé“õjï*ÓÕUÍŽÞ먲ñçi‹¥tïM—ÞNÊÖCIcgʧmZlÉ“ª4µ^ë³¶ªIuš0LÃ~_¸Ä~ªÝñúÒ$yÒš(FÛÛõåGqw_^ÕO/.8¯-'žñ8žãk˘>o9^×ôñ˜K/®~žñXòÅÉKÚ¢òáÒÊˣǶÒ_G_ž}ñòèãqI¾v«ïãq7[H#¦- ÔþjÅÇcÊ­¸¯‡‘ðÓžVí÷ö,yiUºÇmЄEƒWl2µm&£UÕ mDoûª…¼ííÖ&èR÷e©Û¶ÙúÙ\›þwÛ[b Ç7{…·1ØV´ÆºbtÛç¾P ;/>ær,z÷\» ß§ù¿ÍÎÿ–Ô?Rîµÿ%Sî Á˜r×Ím9qÃË&3n=]ö”"áN‹`$Üi<ˆ„ûzOØhË„û\}£@&Ü—l«mL¸K˜p·NæWùö¾FAÏ·Ã †ùö6a‡(æÛu…Çóím eæÛRä›h’ùv-pA`¾­ùc[˜o¯•]ù¶†J¤À–o¯†æÛ±îùηÃ:„ù¶äÜc¦]6<‰Ì´³¤ÚÒ±yocª}Â”Š©¶^-ž!ÕŽÅàLµc'sí¾úˆyv,ï<{ƒáóì¬yv¬’bž+ž"ÏŽôyvY°eËgû˜Ê<;ÂEž½sà`žŸ ̳™O1Ù΋Éö 3Ï>°Ä§çÙŸ˜goØàyvæûȳ,DŠ<;^í̳ù%yv&á‘g3 G¢] »D{‡·n$ÚËWòëÚ…1¿.P3¿Ž¦×Ò‡!½v»”!½vä3¤×^ef×.¨ˆì:î³knZ=d×ïô,ójw>`^½bíÀW»öÅòjþŒÄzÁ&Š™XûH‰õ ŠH¬#IÊÄíkËö˜WÇÅd‚Ÿ*™`ã+`H°½±™`#ÑÏ]ñWeÙwfùµSÒ!¿Æwók²ƒ!¿Æ³Æüº@¨7$ØB‡Û3–H°I™`¯X—4$ØÎÁ^ vqUú'íg?fÚ·èiÓã‚™öÆõÚLµ¯dDz3¤Ú5ާȵ+‡jæÚ5¸ríØa¹¶8½;Ï ÙVÉ÷¦œÈ³St‡<;7rC¢{6#ÑÎl‘hçÞ9L´Ûë¤ÝímšÑá‘h+è m½eßþ‡Ì±[ÿeêî9ö1Á7–9ö1ñ‹Àsìsbjƒ[Ë»L¾97É÷é^æ‘{Ÿ°†eî³Þ‘{ï¾—Sïâî#wK$îîræÜÐCG¬™¼ÛYñ»ãZ'î† ¸;ÚbÐî8hÒnö¤Ýî‘=àn|Åw/üdÆ%™qµeÊ=ó3¸;N…)w–îÝHBoÓÓóŽ3aÊS̹CýÁœ;K‚yϘãœ;¤:̹£µÈ¹áÈ;•*5þýï¾™Q¤Û&ŒËt[®ëB=ÝÖûïj¹÷%)ä^ïl‹î™ð”îimùæôë nº¤2Ï.*¸©û wh¢©à¦þ àzCº-u¯K-n·Ö•!ÝŽÏÎPp‡(– î=2j(¸ï¹«Ðncø¤Û˜É€t{…ËÚ Ý¦Îä•m§R–Òmp”nOL¨)ݦÃih·1J1ÙÖF…NG)ÝŽæPºÍ¹^&ÛÚÁnuÂívœ ’mm-ù!íÖê. F¶=”ôl{(éI·JB«‚¤›Ã&³î¬Y·YÙŒIw\=&Ý,ë™tç™0é¦a-“ny¡2[Ò­±Ðá;“î8ê{w\5$ÝÚrÐ?âtO0ÝgÒ͋Ɯ[[:ÆDÎgÇœ;ú#sniÿ\ŠÉœ›eέ]!¦öœ[6Gu̶µy"ÔÙžmëæºþÙ¶î‘>dÛñ 3ÛÎ3D¶ÝÛ‰<;î ól)"¡®ö<[»J»@yvÞ äÙ³½ê†d[;<;Ýód»ŸÒì¡È+ÏnÎóìl½çÙóî.ó̳[Ì)ól˜î ÏžéÍ<[mýlg[‘g ò<{Þ9C<[[b»\yv\¤Ùó,{¦]6³l]r,[†Pg{š­1ÁÖÖ0ÍÎ'ivï3H°³ß#ÁΊ;kC‚Ó'Øùx"ÃÎf¬Ëö>É©=D%}ï˜Ì³+?ü‰µÃôŽX[Ù¬½1!)Ñ_lñNÈ·w„z»à—ŠYÜù{Š’vÂH4¡(™WNçBV¢AÝ3q¨JF–½Í3“ ŠIÚ°lêEŠIæ…(Õsl½ðІ˜Äå3Ç~½Éú @K2O!½¶[A|x¢=–´L{“«¢ÏLX¦­Ö‚z¦=¯L[?øz¦}•>ï™¶bÀÈži+ˆÏÏ´ô÷ Rmñéæ©öÐXOµ‡ôTûjíGÂe+rm1Oä©¶‚¾‘Rm©Â¶T{ Zª}ÕéŸLžj%=Õ¾‚Žj=Õ¾ÎÓåÒžkA˵„Ãsí±¤åÚ b­‰çÚÃa-×VÌwú@®=\ƒ—_Õ´s%¹çÚ*‚&˵U«Ì=×΂H¶³J$ÛWБ±'ÛcIK¶Ç %ÛÙZ$ÛCIO¶ôùG¤ÜcIK¹Ç’–rgwEÊ÷)÷pXO¹ô5ÑH¼¯+b‹ =ïV 3îžw+ŒçÝÛµ¯‰‹Y,ïVÐA ò–w+¶€[ö=¯ì{üÙ²ï1Ø3ʼ0ž}gÌ“oÅ\¨á¹÷³Ô{ˆYæ}ŰŠÒ2ï1h™wÕï¡ 'Þý÷Ì{,i©÷XÒRï1h©÷ôÔ{ Zê=-õ¾zª‹§<ƒ–€+è.HÀ‡Îã øðtX~=§çØðŠ'gýf¾¿Å“²ýþåh§ýóÖi¹þôÓõ§}ÑTôõ§ßÝòk ù}‡î³ûÙÎø÷®²¥ í ¯}þ2}¢£6ºpÿùSëß®-ß ¸ÿò™-x_ö×þ²¢'ä뺿pÿùSë]öwî¿|¥ßàâxm¨o/äÖ”E˵#¾öÿi¿ÈeÖ'k0÷]\â[kì"¿¡¦k«‹ö-~Èë›kÚXÓÿö¿æ/Bíõøã¿ÿš3,2Ç’}Ó&±·×w-¿Ú{»¶Ã¿jþùËÿñeûß¿ü¥ýç??ɳ×ýñ;÷Úû½ð™ÃµeûäWê1móç½ZjvÙò±öëÇϩއÚçïµÁþµoæcÏýõãçÔ_‡û®Ÿ>µ›ÛkñSúÜU÷»Ï±öÏésWýWŸcíŸÑç®AåÝçPû'õ¹«þ:Ü÷Þ箊ö»<¾õ…rQ“£}Íœe/RP =¯{ûj+ÿ“Ž˜QM´©KÙÛ{óÔ3ªÑþ«í£hßåÖ3ê™ÛßöunÕ”¯¼þƒ#&ëYÏIt|ÚvÓ¿Õó£ã-s¸þß²ù£Uiñrm‘ <ß>U¯uùåµè{ÞÆXÚÿ'<ÿçÿú¿þïÿçÿÚã]ôo\9%ÎÓ¶Lí yÝ´öôZ‡jÓ®ó¿_vÛ'ÙuªØ#Ã;´ ófiùv”uPó-k¹qgiwèÐΔØ.Ú»ö­úrHÈÔIß"{™EåZ±Kò°…43ÍGùr8¾TL ­[kkÊT\:¸¬{_µŸ‡Ôe=úbý\\ÄÚZqÍ‚MejƒÑ9»Q1Í᜵uàý8|Oö¥e¨š ›Ê¶µXun²Ôùڸݙõ:¦™š-õµ)ü´¶Ž¹§ëT–Z®ÏíÔ°Œ%­Š=ójí!rþ½hU÷=ƒ¡¶˜‡Mûg-»oåÚy´Øò^§¼ÔíÚX›^o¿«uÿ¡'$ÅWI(t¾Ä*jÅüÒ{-õ¸&çÎö µ_'÷Y.cßz¿j&Ÿò[6[°ï§ë¤=²úrÕŠš¾ŠG??°Y1[F¹´Îù@a5Î\!—­\S~m8Z¯˜-”]6­š{¡æ+fúøvx9WOUë½oõÚý· hûÕz[û²l[†{é›Nßý«ìnðÓN¨Ï*fÄ`ÙÎæ[?ú•Ø©™›vZl÷5O­£][þ¾;éì2ÄE+ÂuŸ®Nš±¥UÑêkƒöuÚ&ì^v[Ù|Åú´Ñ²Û–Ôy¹öµ/gÐ%1iûÒ†œg³ßìûvÍb¾zê~ºÒ~ÑêôÖ;¯žº>/¾ìò+8ß=uŒi«ßWÝW™/ºY7co1ýÔNäé:+sP¸ùÊÅéËaû2çõÓ3xù!V®Í}ß½4brµz§º6Ï¿š‘_Ÿ^ŠbÛ5½úÕ^z˜‹ÖuKú”ÊÒ†Ÿg4W̤ó‹†Âû)æãéaw6ËÚk¨¾»ª®´SLóµOWõqÿ¼ÞjwW-î6´œÚü¼»* Ï–³tm­®‹IMÛÐÚgy3kÅe¦bÝ wÑê¿{²!CÛµ—ï{Håƒq¶ç²ÞC*–/§ôÏÊgFï±z¬íbÚ”v‘»iÑõüö9ìv^ÝÖ&.fQ¶¥ÈÒ•x×Þ§á{t˜1¾—öÖüóÓeÍV)l=Ÿuò•%íuûƒ®ãÝeQl»öï}uYÞš¢y†{莫¥Ø#qÊ™*î@qÑÁuµºN¢HpQ¦Þe›¯=|¿6º–KU⣫]êYòŠgtr¥ÏzÆë²(õ¬6º"VûdstuÅžyß¡ÜvM«?¶/×/s˨ö÷àz¿ê‹ÄkûÙ{«wÉ+{Xåã­ØÑ¿8ñþm)@ŸîÉküÂÇùãÒÅÿC¬ô¹û¼ð²;šmxµ!»\&JSï«ÞÇå›¶ôáÕ»±|–—³÷UÄö×] Ÿt•}]|ä1E>ò·XøŠu5PÑ2Ëjë?õ×âm^-ÇQì|ÒÕcvc§¢¡ÛÙû*Ê•~‹ó6ȉ §«ì Åœµ³ÿ—Ú¯¼bfÝUÊvmâûî«QNx¿°~÷Š÷g€]LÊVd Ò×ÙmuÊjïÑ<ƒÕ¶*WÌÜìJû–Ú–:\ÈöµöÄ•Àj«ã”¿Û·KY×Kñˆ;¬æž¿òY=eÚPŸþŠÖKœû °«»Þ´¾6Û}÷WƉóÁ^Û‡ÕÜÓØÕM¼îÉS:ý®f𺇭[N »cî»wÝ6žµ}€î>ßè µuÉ/L @÷hï_[+  {\.6Ï­Ð=ÖÝGaÝc=ˆ6 è+Ôûº‡2¹ÞËt²úH  {Q‚ç—»Øù„ çe¶pœ{ÈçɈ€3Ý@7«w {´/jÓÚèZ¨µXuèí#ݺÇ\}-€î¡µ~bFuó êÅ êsíw¨{HíÞób@]¹©¤u5{bù& ®NÝDÛ€º‡æQ­Ë9Ô=´æ¥V¾O_R9À`ƒº*7w@Ýcª77ž««eï0ðÜöXº¯,x®®–¥½à¹*ç}ÁÁîu•{u‹[ [ž°«ëkÉ-Àn+÷%x®þ¹Ï,8ÏÕd–©ÓœçæÝpž«b>9ÏU9óRÏ=43k ÕynÞEç¹*gnbà¹v媈9l媈#gG¹‡óÈ@¹*g‹Ã€r¯r=CÊÊË=ÚÛÛý檘—Ã\³õÆ€¹*fV,€¹*gëscf0WålØ s¯þÓ/À\±%퀹*gæ,€¹Ç4ùºIÀÜèxs¯ê 68ÌÕͱ…x€¹ªnu®l0WÕ™ù4`îPÎ`nÖç0÷*çðÒ`®ÊÙ†€¹YŸÃ\•3sNÀ\•³W+`®Êûvp˜«r¶20w(ç0÷jéêÎÚzG"Ý(çH7ž‘7Ò½²ìþ©áHw(àH÷<}®Êõe•$ºY¬Ýë‘rÂêDW ºz¾ ¢›"ˆn+g©3ˆn<ú$º­œMìƒèf9ݨω®:ÐádÖ‰îyú5Ý|þAt5–õôDW嶈î‰mYAt¯rëèfuFt‡f:Ò¥H7/ n–{!ÝÚ€tã8ÒÍþìH7‡GºyjŽtUÝëëØinöIÐÜè[Nsóͽ5vÔã47ž8ÐÜxtœææqš›ãÓÜ«:ƒÇsUÌhÀÜ¡:ƒ¹*g&€¹9P:ÌÍ×€ÁÜkœüæÅ æfus³·:ÌU¹Å±çkщRG¯Fs#)ÎD8w(g8w(×qî•-t÷LàÜë¶¿À±¡ÜÌåçkk&BŽr¯,¨˜£ÜHñ€rUÎPîPÎPnž¶£\¥¢I€ruL³›ÊUvîS׎r¯ô¶«-€r¯oA{À å^M±ÁÕQn|z媜 ?€rã[(W§à³¤o”›U9Êo: \}'‚(ʽÊÙ“è(W_Îþê3”«C:Gt”«çÍ.²£\}ä¾ê1†+ `( W3Ý ®ˆ€¿Fá*æ/ g¸ªÏˆ®bþs†+àaßv`¸ŠùÈä ÷¸’ÎþÄ9ÃUÌŸ¹â2&cÈU¹Ã‰l¹*v:26«XwšÇ=²ç-ä÷‡ïâŽ+XeÓÚ/ÿéÈŸ³çP]s €îá¯Ïº ‰@wv îœâ]ºù±Ý6P:ÐMØë@7ctÛ‹Ï逮 «pèj­·«RtOóºï…!$¹ÚÜÅ&P®|^ŒTƒå®äi`¹JüŒŒƒåjëã$s l2s \OsËIÐë0Wv4&²Ó½ ¹;l¿™îeÁÓ…Ó`º¥¼eÂÀ¹ú,0tœÛ>—\ œ+§Í®¹sš[fb§¹íCñ%h®\ûë–4÷ÚÁ±ë:sõ1h8W›zúνÜÝLÃê8×?uç^¦Î†lç^ûdôtœ+½~Å@sµu¸1ÐÜËôë]Ý ‡ã ¹rM3iª#]íbg³*@º—WrÇ•@ºÚËóA$ºóæ‹D÷2í3rú"º­6eL¢»ø†Âºq×@våäi³L »2 ¶!'»—_ª)gìêÍÙ§ov/ÇþìÊKÒy€Ýeq{‚Ý‹b™&ÕÁnkŠh »bGÖI@wåíxX9§»òþê“Þ¤»²üˆîÊÎõ±Nw£wîʥ̮ èî¼¢;ݽ|üL£ìtW·Á^˜ »ÌëHw#Ýåeyè.Ò]¥tÑ]Îa‘îjƒ½î‘LºË©6ÐÝy ÝmM1>Ò]™eé »b“ö Eº{ú°@ºË¹/Òݹ¼ìÉu£àºí/ö’%×¼\7rcpݸÈ产?ŽäººÊÆNœëêÌÀ­ ìê~Û繃]} ¹ `wžmØ×½æ í;\So³yrëbÝyq"í¯ºü ÕWàǺùñçXwžÍ T7;1¨nÜj§ºº†®ÄÕ•ùžÍ¤8ÕUÌ2 Rݹ¸o¨îuv6€êòôêæ=ÔÓs¨󙄺š¿í‰  ®b–¯êÎ ¸»AÝ¡:@]ÞW‡ºùu¨±C]Ý=׊ÝPW=¶»sêêpf–D¨ËWf@]âB]Ìk;ÔÍ‹ ²¸ÂÉ®ÎÌÄ™»Ç{ŘîÐgºêY}± n>€ºÚÏÕ>Úuu!ûÄ;¡n\HRݺkR]>¤º'Dg ºy~ ºïíNÉsˆþÈs¨þÀsoD{@¾ Ž{BE÷æ¸1°€ã FõT)8îÑ79._ä¸|ôœã>'€»añ)î…5).»(qƒâââ²Wâ2‘ÄAˆ»c]!.ñ"!.G@ÜÁÄÝ| .)(./&n\0\H„‚áP“áž)Õ=öñíO†Ë I†Ë‹• 10\¨ŠÈp7¬·Ãåx „$WF±ÏÎ!¹yäÆXËC’ãNN¥ÈqãÕ Ž× Wö§qÜ}Àq©%"ÇåÅÆ•3èÔeüŽquQ\iì7F.p\>ä¸Ï»Ž—‰ö…p§Í®}¶•á°°ö7×à°0û[ü6 &ømpGç·‹ä·J pß¶Ö?ŒÁo—nà·Ä5À·œ€oµ˜‰ó€oõ±`òAàۥнÀñ­lÍ Tâ^[Èq… —7 +¨È¥¶•ŠÜ ‹š!Én'ç·×ö'Ƭßè¿ 2AM.áÑÍo¯­6ž[FIî‰åÊÔäF šÜ“^ÐäžXDM.[Iîñø(8¿½vè\”jÜk¦ÁoípPâžX Nt{BSI%.r:\Þ«·~•¨Ã=ñÜ‚Ü÷¤—w…ä6ʹÍêœÜޱNnóô€nKt ?…ºMòéè6vGsv›åœÝf}Æny·‰nãˆÁn½˜£ÛùôwÑmè–w”èÕ‘Ü¢OÜF¿#¸åÛÆ”ÜÖOÜ6†9r[vIrÛˆ·¡ÜvŒun«áÛf,Ám9LnËc>ÜV´Ì{ඬŠÜö~ØfœØ.€ƒÛ 9±½¶ÂêŸì ¶Ä÷$¶½Îj³„³Zv²ÚŒ9±+ bõ‘Ø¢S±ýÿx{—]Knì꺯§8Mø,ï¸GÀpÇ€Ýp[=ÃPÇ.(e¡ ÔûãçŠ}á3ò¨òò딹W2È`0äàääaÛÛ׉ÕòWPÚã]+…Kþ ´YlRÚ¬ QÚÈN˜VïB`ÚˆÝ9­/fN{ØŽ@ V=Q€ÚȈ 6ó#¨KŠÔFQDj/±Nj3&Rù‰Ô²{ PëV#PwgP«HÚˆ‰Ôº6Ej³,"µ¾=“Zm‚ÔºÊHjýý4©ÍK’Ôzx¤Ö·.R›±'©å'+@­KaPëʨ›¨ÍRÔª™˜Ó:DLå'¦$VµoVë‡-V×4«õ5Íj/±ÃU]“¬6Ó‘ÕF=‹ÕF=›ÕF:²Ú, ˆmV ‰mÆDl¸bŠÜfº¹US0¸‹ÜfŒà6.InëÖ%l$½ ½Ìo/¡ã±ªÙmÄo£ùÞF) oÝO ÞF¼ÍámÆo£,„·®ÁÛLx›!ÂÛŒÞÆs¼Z¼½Æöí:$¼q‰àm^Žð6c„·1ž%¼1«àmÆHoc<+z;ÞD…o+†e&áÛÈOø6®)~׿ÍùmLÄo3?òÛóœu°]ò[KcÌo¬ p¯±Àa_òÛóõ× ðm–Ü%"Æ·çÁæ¯Å¢áƾ÷Ÿî´÷>=Ø-t²ÉnÛw†èÈî¸áD w\fev{³#-ÙmÅnC…'v;L<Øìv¸Ñ·Âð¶vÃYRð¶ÍT%{%¼õ~ ÃÛšÅÂUð¶v›¼o½ÝÇî¸ÞŠewÜÖ„/> [½Ø-oÜöõv.kÜc’5n:GgäTêöÇ=nÚ.ÜÃØî{h¿³ýq÷÷ÜÊ]˜mÀö¸r {Ü•zÛã.bû²Ç­ŽŸ& ´Ç­F_ µGnÆè‘[–.]")‹Üp-–E®™Â"W>6a‘ëkÚ"×¾£¶È}y.ÛWžLvǕŕÍqmšesܰ5¦9®mÉlŽk“D›ã¾,že‹ÏöǬ@}ùãJŠþ¸AuåK½§üqËŠù/õÇ {áôÇ¥ô4ýqå+ÜÒ†=î.« Ûãî²£=nY÷^B9n˜øÊ7LeŽ»í2)—9n‹a;šÍqË‹šYºãn»íh[Ãß®’=n a?‘íq7âö¸ÓrÇm!¹Ðw[¹×àåŽ[cwˆ&äŽ[G®R¥KwÜJG½*Ýq3Ýq«X§;î¶jc–ÜqköÅ~¹ã^cÝ7ctÇÝV~Q쎻­V˜Ò·bPË·ú)ªOéŽÛbÐÒ7ctÇ­s 7–;î¶Øª€î¸Û"¶Üq·…’Vù)T¨jiŽ[ÁÎEù)l‹6VèVŒ;õt+ÆÝ»²ÈÍØé§P‡ÓU€¹‘‘üª€t1 En‹Q[.C…ŒÁ"7/IC…LF‹Ü¼/*lóCÏ-sÜújÒû—V -ÆÝ2Çm±ù–[1J‘eŽ[ ‚PZ)ä5iŽ[éȇi¥ùÑ·F ï°Ü­ 2ŽÎ{ä»M\è¤Iè‹[›ûŽ;)l£ÕÉôج2FcÜÈOƸ-ƽÊ2Æm1:qÈ·ÒAV*c܌ѷŸYƸ-F«GãÖXºï? /nDh‹›¡-nÄd‹•)[Üz°Ø ÿ°ÅÍôÅÍqã–eŒ›%¤1n<6ãf2ãFíË7ÓÑ7‹IgÜLGgÜH'gܸ9ãfŒÎ¸ÛÈsíŒÛbÔË×íDƸuý¾QB¾¸gÖÐýÒ7s£/n¦ƒ/n†è‹H¾¸‘N¾¸£/nTØÓ·žMw6´/nÝ65Èw¸OÖ¾¸u¤3v±È7j‹¾¸¢/nfG_Üã®ùâV ÚXùâÖ¤½3ûâF:ùâfŒ¾¸qëòÅû“/®Ûžlqã¡Ê7c´ÅÍìèO]Þ¸yMxãÆS7nÜ­q}E[ãê}´3®»”Ó×=·ñÜïsT¨s…ãþ‹¡ÂdÇBÝêG_òm3ÝuJ"Ó­olGÅt—C73Ýy‡XÜH·.ÓF!ÝVâ8Ô þ¸7ÕtküGÄJ¤;m>dŒH7öÚÛwIÍ-üqWnú°·¦¥« ‡\jË¤Ç OéqkôÎû£«‚Í ìª°r¿X¸*,rx•*·Åˆ+)Êš©wSW–MfYyØZaÖFh©r§™‹,a­0uËZ!ìd­0 -JšÛªXJdY+X]nk…]ÛåϭtØÆ.}î¸É¶ÙÖ ¾¦­6n1 k›QH¡[M T Ýh²RèVMðV»n,Ð}9iPš×¢27¶”¹-†åB+s#”¹Îî!ÌÍæÖ aBÂÜiâ s£€æF%Q˜ÛB²w 27^)s󒔿æ-Pš;-Ú¯!in¼ˆ’æF9¥Í-q|r$Îm±Çr‚t¹ÑÚ$Ìõ#“.÷ê²Üz6Xi,7s£,7úÊr‹‰`B²Üº$¥·”åfv”庘–åºy½d¹.¡e¹Ä6Áv£çf:Šs3ĹYŠs³ô窫¶67Bèúƒbî´hû—ºÓ¢ÍZè¶v°IŸ;-Úx(}n$£<—NWÉ,ÓUº5´èËã§Jw:¸*þmN ¿œŸÍmö´ïãÛ§Ú\üüӯ矶ñÄÏ-zÿÓÿýð§oȻޤßÑ® ßĬ¿.˺»O¸ã?:Ëã„M÷}ýåFÏæ?¸K ¶xþùCóßNÁGžùÈ<ªýþä_‘îCžÁ½Ï?hþ÷jàù—Ï”à~Í_~§$m"Þ>ÛØÞ¥vµe›ë˜Â+nkêß~zxºüÓÐþ›[Áß~úôö_ÿоRÿøößo?ýçÿþÓ—æ4Ï5ÎÛ Ïû—ç4CNkûB-«úà~qF«3úçÿÞÊü}<Þ~úÓWÝàÖFcËÛR¦Ü·6Àše¼ä»”gذÜsþËÛ¿¾íÿòö©ýïϾ×è­û'ân´ÏÏÂGöָ˹¿öqºû¤gîóôÈØ.¹Û}˜Û@úrï÷?&ÿåòÜë§mæ#ˆÑæÎ¼mιL›;ó?Ûœsÿˆ6wv*6§Ü?¨Íù/—çÞÛÜùA)‹³ýK?(ãq~€öšú¶ÊVg^îsQØéú!»½=¾&_ý¹Œl–[ùc·þ|Û'›å;³©Å 6+ܶ}¼~‡ã™Íð½·3Ô©…óв™>óùïù|çýÌG­£­·­Íò'›1oçïdQ*•â:c›(Î5¬Ù÷a‡ùòdêgÿóËÿ~úôóßþöóo¿ýöó§¿þeÿó_¿kLñöE»Açe²“ßk½#¿?%Dï`ýõµS #^º”n•²ó\¥yÙºrèaÀZ¿íýŒÂ™û·Zaûv‘ÚkˆFórtÍPl[˜Û nìÂzm[˜×¡oÃØJýÑá`Åö—°~]°j^Ç.ÚZ¹° 7¯S— •Î;Ûï?Ž/]ýºrßUëiú‚mݹ7»b}h+,ÿçv™¾ ´$1}«Ê¼nÝWq[6.’ÎëŽÍ UÝÌ¥Ky ë—•ú õÝ që%Ky ë Àã’%Ky ë—… ÇóÛòm™èz7?”ê[«rø{×Ï};è²qã\[*^Âúâèä͵Ùå%¬_V*1Z·Œý yk[ߺÌÔWU¨ïÍKîØºLzà%Í{©ëëš L;,§·eàÆ±¹¶5¾ÔõËÄÓ;*Ö7„.7 |æ}ĆÐv{¯9aCh{:P3ÌíãÛ7„¶ûƒºcÞ!É'^ •—º¾–\pÜT)T^êúvØÓ8—B奮_õ¥Py©ëK€·¢œGߺÌܰ>Ÿ[ŒŸÊÏeàÉ*ëÛB—‰²ºùÀîÒJ‡pó1bsè²P¬ñ$ËÄM‹sëÏúæÐªä®«¨Xßr,غ,܆:·ïIߺŒØ?Ù:«¾74ëcÇÞÐZaë][dúRןRÅÞe¶†.#{à¥Î|©ë—‘{C—:„ó¥®/!h_\^J¤òR×/ U#ë›CÛ-`ÇìR"•—º~9°·”Få%®_vš·×Ȩoõ‹¼Ô„/u½»¡ŠõÍ¡Y” ›CóÖwl­ ¯L-eü”×»-/åÿü’×»ßXΓê_½,ùR§¬èdá0· 8âò¼ƒî÷¹ Ó}s¨ßà¥uûæÐZlí_ï…‡WT:(,60f…Ô¹FaV%ƒÖ2Àz÷|wú ØR"•§ŠÊ-v)ÊK\ßZì*Ö÷†:ÙˆÓ|+Nå\ʕ㥭_fj!—qìkñ^-#μˆ.½b¯U‰º»»EëRÆÁÏ:8î«öMîВ÷%Ï¥ðzéêýXÆ »B3ÖÊÎgÓG<˸÷eæŠaôrªS±†'䵉ÀëõkSnÿ_&4‘ÍdÂFˆ|7¦ûB£Üqm&˜±/4Új‰T@Ø (ˆÎ6W•‡^+{» #ŽŠá˜Ÿ¥–Öž¢²³a¡û™`/Ÿ5Üb}[¨¿ùmȂ͡Ñ5ÍðÇ©kb¿ûÒ†ã/¡f¾ômbòÒ¶¢AÎÐF÷2Ï}Ø–52ã{–ÉVì m!lž_æ »B+Ö…ië»B³BöÞuŸEé–f\Ã&¥$*Ålý NƒÌbÖUúèUw·Àå|¤}eyÔj|ÐÛè»B³ v…ÖP¯Û>-mðõz•âbÏfƒ¦¥ _mâìÍúºå²ì½íf/X_Õ+&vË ûžìbZ¬o ue­xŠÙk­cŸVf­¬8©8÷:wGƒ;«©Ÿ^#ß|=WE–¯çŠþùI.mJ²º5–6e@§Ê6PÚ”áÕ©úóTÚ”çL5{ŸíöÚ¹“ßÐ '`G×¶¹>bõ×&Ð>bõëûXºWß6c#hþÃAã…-ƒ…õÕ™j˜]±çêzŒ¤—­Žp~u¦®¡ zƒŠ<,bjØ1ëËR&/¯‘êºrˆ¾´—ªï5XvìïÊ;/9ÊíÕ—ú5*9Ê€•ý íƒrd\r”>Tõ­ïðn§´nîåܰ4^Û6ê;A3vôo¾-öÚÿ“ͪ¼–Þ¥ò“v`¯Ø£m#6€Fç|LØ¥k±ýÞBõVØÝ¥Í›úÐxcµûµUlê»›–c{íÈgyàà|&EùœKƒÂ1*žózƒ]`¼më €¸õŠõ  yMœ~5¹Þ`ïÕÒÚwÏž÷×72­u {©êÞ+Öw€.6®u^Mߺ)´qÿ'mnÖlÞ¢1®7Å3_Ï3QæÞP{Ÿ°Öau{ïPÑ„×:ÖrG‡ª˜—ôêt¿>JÕl}ŒªðÓ·íÿ|žÄòw¶}ßÖö¥Ge&½Ý4ÑÛÖñc{ é­Ïѽ]7šàâîµUœƒw_6Á+BÜ:æ ½yã'‚¸çR·6…Â.F76ÅõÑÌ‚¸µilŽ7¼EqëLÈáqm3)†g'‰áÆ d¸u8¢¨7îiT’N†[' aÛ§nŰ·S ·L qžÏ“áî_ÀµÈpÃ/Ñ —y0Üý6ÐwG ·Š/@1ÜcäL[ ·ì±¡R ÷´nä]“ᶯx$n ÉbÉp¯1˜ú}Ô™ ·¸ ȼ®ÍpKÐÙ§f¸EÑÄp‹Qv#3Ü6°ÀFS3Ü–Ž‹b¸µ3¯ïÃmã .3‰áʼÒ·}¢±9Ç·]½j„{Ì÷=¨OxYÝVº®S6»mÉàÂhvÛþB#X±ÛãÆ™ˆàí±p·¥ám¼¢·6Ö4½­}”W¢·íšØMjz[Û91<½m×$ð½Íéí±ò,+ãÛcçÎã[·cãÛã¦1±ðmù›‚r ßf9‰oÝ …oOý4H&ñí1ÑéÚø¶.9 F~k«Uó[7俵ͪùm»–6Ìo³øä·nEâ·îDÍo³„ä·yIÜš`B&€[ù=¨)ámMNzÿixÛ’ ÞF Þæ5 o[—ÂÙ¸n^“·]~‡&¸-]PÚNp[Œü_·6 ôÝ­&¸UNÒ]ÜcÖâ nd'‚ÅÁÍ n+&>©‚ë5|Ü*é. n^ŒWÄ"¸-BîK€[», p³¸‘ùm”QüÖÉ„o]HÑÛêÑ^ó#ÃÛHE€E·¶í¿&c¸Ç`¢J‚Å'À[7*R7Þ(\ß7ùm}ÚÁºÅoOk×Pßü6:]Üxgž·z%°9Üø,àÆñ©¸q´î1˜>àF§+€[[|È™ pÛ5ÉZp£—ÀÍ n»&qnï¬oãS#x›—#Åï‚(n^“7Z²(nuÖ}yÛ·ÄG@ñDqã@ŠŸDQÜhw¸5 ź0®5ROŒÏåH6yn|Äs£’És[2¶dñ\þÍs³®ÈsÛ çèÆ•@·æ|¯¯xÝš+âlWÝÌ@·ÙݶÄ@ד0Ý‚Œ}Fn [÷ÐM~ tkb€º€nm{èÖ=lW–{lô_0Ë=­Jz•å>N0Ä­Ù/iî¹Ñ®·oÑ\Ï+Ÿ4·žÖ½Àä¸'>ÀÀ^÷8¸qQ·ÒÁq–·B+ÞrÜóÈŒùÉq«É¬}Ê&Ž[1è ÅqÏX¼â¸ÅàC-Ž[1ŒçÄq+ÏhqÜ:¦B<÷Œ½Ãq+†/¶8nÅÀ¾rë¨ ˜‹äîÃÈýªB¹€„r+†îX(·b@…d¹uz¦²b¹ÃÜË’öÊ®÷>b¹õmF¯uŠâ}x÷·­ýu^~¡É=:¾¼PÝ•ƃêNb¦º7޳Mu¯Lukïß++¨»®r†º“b‚º | u—;§ u˾w ¨[ö”¼ êj;¼©nʯýãoª[΂d©¢º£Nd7Õ=¸Èd¬k 7c]»%ë®ôò7ÖCLb]Ÿ¡c¬û<4€î&ùæ è’ èúÀÝê=¸ŸÚ@7x´€î"ð [o:´ºŒ—¬É=vZˆçÆPB<÷J`)<·î Hæ¹ÉeÅsõž{HHNž}xð\ cƒç.t3ÏM žßÒà¹æêâ¹³•«â¹+ÍæèîwϽé2Ï-å}P݉K9¦ºÂ¦ºžÎÕ r+ª«É“©nT©nÐtQ]Ê ºž$™êŽRÛˆê–ME'ûAu=§7Õõ„ÍT—5m¨ÛZÓHÝ-¡î¾kÀõåP×SfAÝ}åCÝjÙê¶šÆfŠ€º7É u÷™+†º>‚ÊP·Ý:Vf_P·|B0ÃÔÝgîR0Ôݺ¢êÖ”#lBÝx‚º:™ÊL·É¢H™L·åÆ9™˜nM`úç4ÀnËÏ[`·&Z˜× ìî+MQ캒v_'Z™é¶J†ÅZ0Ý…ß3ݺm¬3ˆéî;¼`ºqÛdºÕ¹ƒk‰éV3¿âÜ}ÔdY8×ÏS4×›Í]8Ø|Ð\íe˜[§ˆ‘¯ æê 4ÃÜ–ÜšDsÛGUüZ8wÔT^8wŸ¨p0έ÷ö==nËC<·jSyÝzÎ@"ºåüŠy¹nlpk‚˜®J3Óõ!p†ºûÄÕ|CݲÂ} è uëô5ÐA]Z4%Ôõ™s†ºûMJpRÝÈNT7²ÕF&ª›Ù=¨nÊkIuÝœ­ÊÕ[cQîMˆL73#Ó'*¦Û;%áÜÈI8ׇÒç‘ÎÝG.4çÖȘb]2Ý:è˜næG¦SLׇà™éî7­¨Šéú@@3Ý:šOL·|¢¸š8·¸#©'pnaG|C…së®ñUÎÝë|‰s[Œ‹´œ÷+œë³sëˆ>BeâÜèЈs«é¿”¦¹õÄú”Á4·Ü߈©Is}°¢in5~RYÊs£;“<7ZˆhnÅ@–Es£kÍç&šë£Ms㹉æÆÛ!šOO47ó#ÒÍ:#Ò·˜H·¦”˜ìFsÙìDv[vØñúÒéÆG:Ýúxwš`¾[^€˜ŠH§›wnË$Â:Ýø¢H§›wF®Ÿuºþ®[§ëü¬Ó­ñ9-uºÌ[§Ûbµ¿ÔéfY¨ÓõWÑ:Ý‚}âj¡®Ç¼VêºÛ¶T×­ÁR]7 Ku}­´º~êÖêfL¦!~|Òêfvs¾úßÊwo/¾;´ÿ5pwìÙÆp˜Ã† »d˜‚»>i@pwºq«°Øî¸syÂp7c„»m^Äö‚»m>†#¶ë£ßÍv‡C;íÅv[:ÌÌv}´½Ùn›µâÓ`¶;ìÚï,¶;\‘n›cI=”º+ØBÝûQ’èBÆDw%ý¢»X–J¢ÛbTg‹ëA~rÝr™‡"Õ\wS7×]d?!®;Ì\3×͸n–ž\·Å@F,Ô-ÒÇz»ÃbQ-Áî0ã4TsÝ*%|EÈuÏKnwé+n–žH7Ÿ™..š[%À10¢¹Y¤¹—¬HsãY“æ^Šßin®/æžEA/Aš›uOœûxÅHqó]yØu¶f+ä ”Z)(ä'Æ­tP¯ ã^® Œ{–²{`Üó=ÇaaĸÑâVˆ¦ €¸¢b…÷ĭ΋ó BÜŠQÓGˆ¶<‚¸µa Å­Å.Lƒ qϺĮ)BÜûC»}þdOCÈö|ÂØædý§m^’È6†môòB¶y£dû¸!’ÚKÉ@j/¹ƒÔ^bÔFK©½¨6ë¨v?Ϫ}Ü mfCB¯®m„ö3±¡ÍZ$¦½Ä€ióÙÓ^î˜6Ÿ/1íåšÀ´YÄ´•e‹`µù “Õž1¬íÖ^ÒÖÆbïÃoxØ$¸$­½Ä@këbÒø‚Ö^ ØiíÙýPÍ Z{É´öRU µîAkÏK‚{ÖfQk/é0¹ÈbÖºŸ«½\¬örI°Úì°Éj/·Xû™ØÁ!©ÊX{IX›­ °¶B´¢ ¬Íö@X[1‰Xkó{ô€µ5àŽÂÚ˜'X‚[ý-(Ágë@\{^¢0âÚŠ‘è×^b`¶g ë`d¶™™m^“̶b÷D¶YcD¶—¢Ùæ%Al/É0Ìd¶—d¶#6$°=/Ù×Ék370ÛÙf^D¶™ Äö’ Ä6+ùᦞ¿‚Ø^b ¶gNp–"±Ê°­ˆ 5ˆí%b{‰ØfIHl÷óT°¾ö ým´céok4u ÛK:Û|½Il/×±½\ÄörMÛ|ßHl/1ÛK Ķbdø$¶ùz€Øæ%Il£ÅØf¼6šÑSˆÅ“7Ë\{):帑Ô¸yEªqã¡I›¥¤××´7cTã^c]ëü¬ÆÍtTã—;¬Æmãg¬Zk¦e5nq2Reªq5³wœ y)Æ­¡|_+³÷ëbÜñFF/1n†(Æm1°v‹qkÏûkÍÉZÜ‚åä©Å­yF×G}›¯Â-î×8ãâhì §µ¼UœÖÍÅœ¶=H4Äiy†g€ÚXȲ?n¹<ôzøã& ôÇ]Ga3ûã®%Ò°Æå§io\ۚʷ¾Ù°ýroÜ•ÃÑðƳì·òÆÝy¢½qÏ¡_w°5îÌÊ’5®}:mkÓV[ãÚûÓÖ¸22zYãÚÿ'¬qñpƒÖnu`:­h[GÒ:Ö¸ó!J[…:ÉÒPYãÎwßX†;[¾.[…:ü@O¶ 5ŸÂ­ËVa^¸SÆ2Üy¶åe¸óDùe¸ó¤­ø²U¨#í!D— w^Ø [†;ÏÒ¾J†Ûò“Ý.mæY®Ç’áÖ5áBnO‰§'ny•wø-î<©­K†û°8rNªpË|N~{ù•2ÜÙ»$ÃŒ¨ÂŽ­Â­¾½U¸óø¾ 7“Q…[ÜÚp©p[ŒÃj©pkGF0Rá΃} ¨Â­CJ!v w8Ø“7Ü8¥Âuÿk.ÏD €[1 ‰©Â-ÔB–Lî|ˆ’ 7¯ îtÈá–"Ü:ƒ Fdá¶Øôž·. ¥D¸-ÆmÊOn]Ú@‰pëÜ;²VŠp3Fná,ò[ˆpë,:R_Špë8@zFP…Ûb2€  ÷ë*ÜiçVp‰pkŸ ôßáÖ%‰9)­“ ©b¥·`_²w’™”•¸™ŽJÜ:2“ŠZ*q+”ÜRâÖÁÄ¡I‰;m\¬²·b±0E¸Ó!:*q«iañFJÜ:VÙ¶¹]Š[$• ^Jq3vJqëlç.à’·Ž‹Æ‚ޤ¸Óª]’âÖÞÐÉPŠ[§EÓ ˜RÜŒQŠ[1h­$Å­£Ò—·àº•bèŸ ‹p«´èR„[G¨Cz)î+'ÉoùkÞæu(¼Í…·ÓÂÍ’Þ¶÷BHy[G×C’%åm¦£ò¶b˜BKy;-¥Sy—”ò6“Qy[ß4)o+øÀSy›—£òvâÞ°SðænIoƒ§~Jz;͈˽Ä(½-ý eÃÔßFLúÛiæXëo3Fým^“úÛLGýmƨ¿ÍkR;ÍÏ ’ÞN³¶OHz‹$TÝâg n£eHpTªÛiæÞL©n§‰göZuÛbÄùRÝVì=Õm•žv wÕm¿' n³Üf(¸Í{‚à¶.‰M$RÜæ%©¸Í{¢â6bRܶf*VÜN+ô/ܶÔ'ÜN“¶jHp[Ùa-K‚ÛLGÁí4á uëm3õ¶òšô¶õ}Äú¥ô¶Y+ÔÛÖ7”zbêm+FGèm§‰çRHn[!ŒA%·dTÛ^B»›ªt¶Qjél§ÉÆÔÙNµu²µ©³½†ºÎví Aí4i»–t¶® ëlýP¬³u»²ÎÖå´Î¶ÊB]êl3Fm^“:Ûk¬ël«^úTÄ:Û:O›š_êlëÌlzúRgë^È:[wÖÙ¾ZŠ$¶î€,±uçh‰­?<–Øjº>>ShŽßFm¿ÓA=kÂÛUµmx»ŽÂ7‚·±Û›ð¶x˜„}qoèÜ,²-ˆÖ£4¶ó¦-ÛvÅÝÌS劻ʅ׮¸«¶ˆÛ@ay—ßV +„ÙÎóû"ÛãÆl‰l‹iÑ”jÛbZ8MIjÛŠ‘ SnÛb´¸ Ü¶ðDºTÛ¶TèRÛ!ìÏNbÛ²&¦{Ŷa[üÛÆÕ¨µ-†¶%­­Ë.©m–RÛ¨)JmëŠ2Jj¹Qií@JÛ¸e)m£ª$µuvRÚ‚…SrÛ¼$å¶5¿ë²2ËmóšÔÜÆ3æ6ó£æ6ÓQs·GÉm’[§’âÖmH‚ÛHE½mxeKxÛÚ‰)róšTä¶7åI‘;oÜœ*An&£ ·%ã} r[ŒÛ‚)È“ 7îN‚ܪ²M r«˜Øì}äF5JT‚Ü̈‚Ü, ¹QUä‚$"¦ wÞdò@=®Œô¸IŽ[ÏüZrÜãvOÉq3å¸ñ<¥ÇõÚzÜŒQÛb]ø’âzÌm).’P…[?cMA*Ü*í¨Âm1î³¥ ×{­Âí¥wÞßuK¨÷Ò>¸ígZYH€Ûbôz´Í¬(À­’ca†ܨ pqSÔÞf=P{[¥Çò´·Q io3µ·QQßf:Šo«F`..ñmÔ–Ô·QËRßfŒòÛã¶oÉo+†å=Éo5µü¶šmßü#ùm…°ðGým†¨¿YI€;<­ØÜŠaYó!Àwù¼H€[1#FnT£¸óÑ;eÜK!¨À͸YL*p#;)p[Œ®9Ràf:*pKÖÓw[ëG*n<7 p3FnfGn< p3Fn^“Üh'àÆc7ÒQ€µ)nƨÀ­vI‚{ÜHaÑ_ÜLB î|h‡ƒ4¸#“…·d\¨£³w>l)@n¦£w¹‘T[„»Üˆ•-Â-é±2E¸•Ž˜—"Ük¬‹póšáÚˆÔ*ÜLGîù%·¬Â]nÀýá.7.eH„5&n•Ð\"ÜŒQ…[¥ì3ÿS…[ç¼ß*ÀÞ~97[+ã}|ûôÃ>®çŸ~=ÿ´gMe>ÿô?üé[€qÑ­7©|ÇwT¾Ÿ+È71ê¯Ë²îîîøβ½Æ'È9oôõ—µRt j\ý,ÀãÏšÿ~j;xþåCKp¯öû“ýe£:ëCžÁ£?hþ÷jàù—Ï”à~Í_~§$s¢Öaµ™îð¶”p>{°m­2ýÛOW–Ús+÷ÛOŸÞþëZÿþoÿýöÓþðï?}iFíƒØf`mœ0´þð‹sš¿>§¶1[xnËüå9­ÎéŸÿcx+¯ŽñxûéO_s‡cû,·'ÚFd¥¶ÙÚwj/ùÖ×§]þžóŸßþõmþ—·OíñÍ~@oÝ?p£}~>°·æ]®X«ü¨Ó9.»ä~þø1ùÛ%÷Ó…à#>Ìm–s¹÷û“ÿryî÷#”?°™Ï~D›;ó~´9çþ1mîÌÿlsÎý#ÚÜÙ©<Úœrÿ 6wæ¿\ž{osç¥öªì_üAÇýǹM–a?Ú‡¬M—ö½}eŽ6I¹|Èno¯É×™M-ˆ-íÃÕ¾›ûïd³|g6mæß¦?ã¶íãõó8Ïl†ï½¡&¼mرÓg>ÿ=Ÿï¼Ÿùh“î©ÍZ÷öÐßÏfÌÛù;Y,Ka–iŸ×ñm^jÃ:óåÉÔÿÎþç—ÿýôéç¿ýíçß~ûíçOýË_ÿ<טâí‹öx®%C¡N赊•ߟöì\K†òZqY6JMÖ’¡¼V\6¹®GßUq.¾b+ñ ‡ÿ:þ <ëvƒPhÚH¶B¡iº/®ÛH¾M'm}ø.±—Øó›û&‚Ú{LWÇméc„%àºAñ_é¥×Vå89{àj]ÅpÆÊÀUšuÛyÆJ\ó !Ÿ|pÖr«}­»øÜ–uÇNÑØ[}ú….¯½ÎÜ‘]J”ײ‹³ÖR¢ô#VFyrî3 ùäN¸¶©Ñ©ª Á(×ù×ÚËxãúq»uòíÔZ­ûÖ™Qnn¯3 žk/±G½N èæ'²ªØKø˜w}Ü ŠbÖ¹|Ýü„^>ë1ÒON1kB×½O|w¼UÃk=°Qô¼½¾Ö½Ko5a?°òlîÜò_j”×úKlÝ?vZóÉk¢b°æ£wÐZjºŸ@e°•…ö']m7´0ÐÙnÝÎùtî袙 ½Vˆ_'ÿl·‰ž|ªÿí6Óœ/ q§Iå Ô•Cõч9ŸlÎA÷>‰œ¶ÞU{ Á­Ï-n!ò@ä4Θѯò)/3Ýú| mÒåAyÍ…v}~`ËJ»>¿ÖËF»>?›e§]Ÿ>Ëa·>¾ÜËA·>6ÖÝú|ëë@·>?…¥¼Üúü–&ex§[-Mʰ|¾©®3Ýú"¶<Üú|gmÒ·>nm·¾¥¢+¶Ó­oånŠѭO¥?èÖçoP›AÉ­Ï­M¡àÖçÞµM¤äÖÇ÷­ÅäÖÇž·M¤äÖ§t³íú8ÜÛÛõáõÙVºõ¹3o1Ùõ±l›íúØÔK–Â+ÛPÉRè×Ǫ.YÊkÄê1þy¤G¬|%÷ÁŽ}вìã:Ï3ø¬e_a2YöñîZL–}Š-OË>Ý×jË>>š}ƒbÈÿ:_L–}|Üu¬<û8€¨ß¶¶i”<ûø‘ªøèÙÇkó(yö±ëtÄ>nÕt»Nî늡q ì°N-\9z½=Ž%ÜúXŠb¡¨¬’£pܪ»ÆIӗرP4º*š˜¢Ÿ'=¿Æ­NW'zÊ­®brëÃû]]rëc²Én}xëÐ.ò`¦Ž9•_øyjñ¶¤™ÚyÐñ"1ñy˜ñkàšYí ùËXǦÞ0pŨûДëà×.òéû€ón¢1Ô±Ñ],·WR”×ðµ4íÀU¬‹…2¿ÙŽ}x‹OSü H€ eXíØ§Øö®cßi±ûŽcŸ>‰Å€ 3<r·ì)É<³xDzïœÒ²ei±ƒãWLO¨õÀê /Ã@XöE2.ŠDUŸ¬ïåÙ'(x.«´KA÷üm»?ŸG«üMŸ†¹u"Øs0|YH½=L\Ìq *‚‘ãn«tP⸛\ÅÌq·÷Ý™ã®ò12ÈõqÚÆ¹«vÁç®7Ò#œ[ÛFßùm Ã8ãÜ;.Åm£õN—¢¸!ŵðÕ×:HSÜÚ‡½Ô¢¸Þ'aŠëz¸e†³€„q[ dmãÖ~„.D5Æ­½8®é‰q}v¸1î´I>(Œ;A›·\{`ðIŒ;m’ ãN£X¦0n=t…§1®Ï2Æ-Ã2PrÜׂ„î4«•ŠàN7Zg™àÖ¦™Þ÷˜àNÃæÒð¶ý̵ÁÛi –4¼-rL.x;îÏîCàv9ã·nÜkn;݈¬Åm+Yo¾æ¶…}0/½­óšzýšÞ¶kb/ç“Þ˜ê/¦éíx¼úÁ„·ãÁ—Èðv’¸Nðv\i3ix;ÝÞ’ÙŽ;}¼ÌlÏÅÌ>³õ±Um>Kt;®´) t늺7˜¨¹[¹Ý{žä¶Åpì·ÉmMš1¹ÍüHnÇ~Z&·ãzwÀ2³ç%f»ÑÁÌÌÖë€f¶D§ÉlkW ¦Oh[?ÌvhLÐvÜhbfhÛò…†¡íF3£Û(¡Ðm<¡ÛàB·UfE¡ÛVL|ònÇ•æ«F·UuDÅD·Ü—™è¶Ò¡sºÍ{'ºmå„0?ÐíJ[ £ÛqyhÊMmãéÛfLØv¥ƒš¹íLI¾¹m<9qÛqæwÚÜÖçI<¸m%Àš—¸mËhä·mo †AÁmºz‰Þfv¤·ÑzDo3Fz-DøÖãoýh„o«…à“&|[@¸¾½Ä:¾­ü°` |[;šgàbâÛ,'ñm–“üöëü¶vI÷A”ùmoåB·Q•D·Õ²úôÁèÖ'œݶ|‰Œn£ôOt¯¼ÐmMû˜,Э¿PF·Ì€Dnø’ÜÆgCä6:‘Û¨,‘Ûq» Áí¸Ð¡Íà6>+·Qa·™Žä¶sïŠ“Ûø1ÉmôN"·Q"·ñ 0¹õ‡]ä6Þ‘Ûê`IFAn£Ó¹õS¹.Qä6jåAn£™ˆÜ–w@Ÿ(¹]^“À·Y·1h2¸õp[à6j_à6Æ®·1v5¸õ+gpëÑ€ÀmŒÈ…oc<,|;Ý*Œocö"|[CôÙ·-F¹ˆðm ‹z¾ÝèVk|[°Wó2½ÕÜLð¶fÄïÁ[߸á­ç¢†·ÓÈ…”¼=‡f¯©WÀ[N²ÌnkÔÖÇòf·ÓÈå ±Û6h×Ïìv ëÍnk°×Ÿ¨Ùí´ñÛav[N̽û5»b0³Û2¡ï™Ø¶Æ‡x.¶*ƶÅú‹cl[íýE5¶vÉÛÎ0£Il[v}Û;ØVþL¦¶¶T2µµ‘¨mÝîNÔÖn3!¤—ÖÔÔÖf"w1> þ­‡be_ÐÛ[ÿò%½ÝtjD \ª`„{³J—7ªn«Ié*„+»Õ@¸2 „»v¾zA¸“ù©îbî*„{sYŒpé‚va¹«ËÝDÈÌrå¨ua¹¤“f¹²Á0Ë-“®0Ë­þþ–b¹“TŸf¹7‚†`¹³u²b¹êû_,·\=û,6XîàˉånòÚË5Ø0Ë5 5Ë5¾4ËyDHr-{Ë­á–@Är‹£ ÑŠèŽOE MîòÂ¥„¹!DN9.>Ê&ºæ›èúÄë‹÷}¢kå³°nŒ3­Ç4ÚJ=.¾yÖãZcb=î@6ªÜñ9´ w3i}ru n ]/È[;‹{Y»i$(A®Îá½r)–³ wÔÔÅ‚\S‚\`n¸ê$ÁÝò¿ÜmA÷ºÜQóÑÝa%!Ü­ûÁ³±,wÔÜÀ²ÜA³/Ër—þ J¸[#Àþ„,wÐÙ²ÜA|…ˆ·nµ~Œx[;„y¨ïmçÒ¯oYat“¯®Ëõ] ñ²‘&â­ÖÆ"ñfñÖìâì„xëÀdPˆ7îÌêÜŸv#ÞÛ¬—Xˆ÷¶X LÄ{Ûá²fÂ[ûÉ»G o^’„7o„·Å`cgÂ;ܸ^iÂ{[Ës&¼5Ñ¡Œ–„÷¶Iý#œ„·zÌn(`ÂÛ*™‹_"¼Õ¥¢oá½m’ß ïmâ[/À[Î2¤±¼-Æ™¯ïm£vD€÷üH€ðÖ[øÚMb¾[»"A·Äw[n÷G"²{¹øm²u$²[#ADvkªK¡,ÉîmâiB&»íš}b°{[$'ؽ­’“ëÞÐ{%×m¹a°e®w'®[Ùu¢$¶{8fÛ½«˜íF)Åv㉊í¶ì¸ªöd»­Šq®”Ùîm¢žÁl·,7Þ½ Ü“n¼›×$Þ­§ß¡‡ñn5KRmàݪNÌHŒwoô>Æ»Q+»ѾˆwoƒV7„wë%Øîò_’]#Lvã ×lqߊvo/´;´ÿ\—:Ú‹Å‚Î, ‹…•Û}m± Wö°X0à³Å†£XÂaÁJH[,hqÊX·Mè°ÓÊX7Hž¾_a´`"g£ƒC-hy:Œ$Å £…YÕ"Ÿ…EtVT·µbø³†ÏÂDÛÚ‹Ï‚\h´0J,l£ éÃhA’SÝ4hèF wgé‹Çµе~>€näBao {õ_€Ç‚vF\<ÊW¡\Þ(WÛAåj¯A \m‚¹ð\’ñ/繯ҋäFŠ4V YƒŒ„V/Î ï tcÛ¥A®÷P…±¯HŽ;½Ò]Aü7ÜäÕðrWÚWî ¦é®À]†¹‘Ž07Ø5anÞ0×û?Ó]E1Ìõí%Ì•‘‚Lö'U2ÇåÏ@¸áï „ë½f¸ûÃu-šáz“»­l—‘Ö œ‘˜án΋á:;[+l½Ž’áÊ¢%œhaÒ ®wùÚYA2á¬÷,g7.)ǾÆ_€Î7®)…MsSû+DLþ ^T±¿‚êßö ®Û+Dv²WØÄ"Lp½’d{;®Ø^ÁùÙ_Á·'ŒëÛ#ÅåK–7îÀþ Wy®=ž¶ q Ù*DÙ*lo m³¨ôTˆ+ÉSÁõiO¯ÝÙSÁÙÙTaæ»;¶©B¤“©‚ïAì6Ë)S…(§Lî·,/…(ž¼®¬6¯.…í}Vµn…•³x{(Ä%å¡àJ°‡Bd'…¸æÓCaÓ’š=ü. Ö¶·G¥'¬m12.y(ø£auñ¶PðGà .¥-ÜSØB!b²PPOaW˜¢˜²Pˆt²Ppçj …H' S ׯWÅ ÎÍ ¾$ý"•ü"&?(ìP½ ÔFìŸàoº`m<û'DuÐ?!Ê@X1û'Ø.#ý8âNÿ„ˆ˜bð/Xf ‚µ‘L"\{x¥‡&€.ÊÚCA‚á¡òP¸Ž"܉' ˜Õ¶å£bµÞ!XëÝ †µ“‡Â 3L³ZÓ‘n¡ …t¨p#& íÏ …Ûµm¡ á¸e¸ÞL ‡²-q=[(böPˆkJ†{ãW,´¸£¼/k­¬¶w” U¬Ö:ºâFŒ¬¶0Dzw¡(À¬¶Å&BWÂÚŠõ5IÃÚLX›ÙÖN -IMl-d4±õÞí“ØÖI|ß{~ö1îW9âÎý5¼àÚEQ¸6ö®ÕÁQƵˀ­¦µó [‰Lkc·¾h­OÙ Z»ê€vÓZ©ºMk}V¯ií´ˆKšÖÎöŠÖ®ï‹pýMZ;É"À¸v’mq¥¬ \KÓ» µ'ŠÖZ”fZ›ž³¢µ²u [Ü…zý­ )dk·—°Å±^®¸ö›%±µ}Z¸âNü&˜Øz„cbk®¸“ ¾mqÃÞ6lqéÐkn;u†qá¶šõ·uÓ³-îl9°èm”Sôv³ÁƒèíÄ XAoCå*|»Ù(Aøö)?6¹õüÑè6¼e…nÙ†‚Ü®Zï2\[ävÑš¾ÉíjG†ábÌ-p;KdpëyªÁí*Q‚Áí,Gªp)T2¸õÌÒàvæ2nÐ[ÏAŒpýhŒp#] \Z¹¥;.íòŒp=Lw\©ŒÓ—~B¸žá†s.nxÏÂ!ÂK†;.ç)/ˆk´ñéŽ+kß`¸|8Áp×ÏÙãºÜÆ·Á7…o]´ÇÅÜ+é-îÙð6 wÃW¹¼U)EoÃŒ7ÌqeâôVÔWôv‡ÆàÖ‹+ nq׿¶Fäæ¶aJkpK¤+nû*ßÅ;’ݲc°Û~a[7˜À¶ºOá[? áÛ¸¢ð­iƒñ­Œñ­¯i|ë‡k|ëÅ+ãÛÈ/ð­0sà[öÐÆ·QNã[é…qÃÛV7¬tƒçª(â¹¾=ó\?=ó\¿á‰+çXñ\ß¹yn˜Ø¦'.= žÆ ¶¬NO\e%c»ZØXÁ{’Äs_/‡QnXІ.+#Y.f¸á*™P®›A¢\>j³\¯S¦®vÃWÒá°Ãev¸|ff¹^¾M;\~ê’æòÞ“ær4hS7”°ÃUv¹D…sýæ¤.WµŸ8׋Ÿé‡«â¹~8ÉsÙu…®nL<7e“ç>LhåâQ'Êå£þR”k¿Ë Êå^2ûáºê“åÒ‹\,7jD,7Z–XnÖYnÔ’åº –{ñÊËýpŸçb…‹Aƒ1nÚäÊ Wä‹ãº¢Ìq3FŽ+ûè‹.w‡îbÂ+'\ÞX:áÒ\÷â„‹*Æuk ŒûPL]M›ßÚ~ùa~Û÷æ}¹ý>…b~ÏÝÅFaæ¾HóÛ6 |¿­QlZÍo7a-Ü6˜%*3Á=hWw³k- nMÆ+¼mÓh½ÞÊ@#àíñ¾ÔÖö †·v>1¼ WÁÛiíø.ám›,®/é¼Ùí4ëL9±Û{ Yå0ëd5rÛêmÔØ¶M‚%%¶­ 2ɧ¬ž¦¯0M”•iB*È4A Ha€+ß›&Œìý/¸!„iÂhu« p-í¶n·ž•÷­Z÷'‘ùí`.,ó[9&]Ìoi3¸J¸<ϸáœKN›1Ùà>ì+|pe>+ÜHGÜA³.ûàÞ$$¬n⤲ÁKÞam–°¶] 3!±Ú¼-ÙàÆ%k³è¶ÁeÑÅj/¡Žj+i‡Pm‹ eՆѰPmdGR›Ù‘Ô†i˜Hí«õÒâgòÙÌ…|¶,Í€ñÅg½ãÂ|öî2,,ëpcÙp9#–\„e¹Ææ•ä{+•}\…63•Ýí!-Ÿ(l¦…Íìn]0¡Ø{éìr+™A¸ÜJº.·ÚcûÈFæ¶R:„¹­mšmn{ô§žè5‹ sÛH'sÛˆÉÜÖÖzv¸uÌ·¾¦nýD`ï5ec[׆míµ±í¡½ácÛøQƶ“±md$cÛHGcÛ{Ã×VÆä¶µuf¶µ½Ä`k«ÜìjëJ´«m\R®¶nñvµ½Äàjë:±«m”E®¶n6vµkÊÕ6îA®¶~Õìjë²ØÚÖÍ[Ö¶.Š­mõ`ílëRÚÙÖÉžüÕ¿Ê×öxßû`Ý´6+üz½ðk|Ä`£ïƒîW 6ÓÂF~‚°ñE„Ít„°á*›1BØk¬CؼBØúž`‰B6¯I#AØÈO6®)#!BØLF›1BبjAظõ„Í„°™!lÞ4!l^6†j‚°éK—‰/Db3I¬G›"±1ÊþbkÛ´¯ k[3¬mULYÛF:YÛºYÚÚ6eIb}Ͱ¶tò¶U“µ¹m„Âܶ×txÛrªÖ¶Œwk[ݳal&‘·mÄämëRÐÚV-!¬mu[v¶ufv¶Uf6¶d2¶un!¨e!-¨u2éi5) A­ )=­¼–CO1éiå™l9­‘-§µ¡¯å´šZ‡œ6b”ÓŠ)„œV“èÓÆ%%§߸Ëi…C¾Íá—óP²½ÚÍ>¾µ_ZõžüõþÇ6!«ìZüþ§ÿûáOßÛã=Þ¤ÚßQí~¶(ßœ¿.Ϻ½O¼ç?8Ï£–ôwúü³¬,ÿèü—ý™û²tÞí#óº÷ÇŸ?4ÿ³¾ïOüùçÀüêþ‘ûù§Íû¬ïGî?&ÿûùr,Õ?­Ë1µ>{©éþ|öjÛZú·Ÿæ*ÿ4´ÿæVè·Ÿ>½ý×?´/ø?¾ý÷ÛOÿùÿÿô…ù¬·rZYÚäri7ûÅ9ÍßÓT%µñéÔ¦¡_žÓêœþù?†·RÇÛOúª;lÓš\êtÃÛÔæƒí†/ù¶ØÒ.ÏùÓ¿¼ýåí_ßöÿ÷öçö¿Ù÷û=tÿ,üÁöõ)øÈš÷‰ã4?ì‹tn=¹ä~þø1ùÛ%÷sûòG|ÛÔòrï÷?&ÿåòÜë§nèx×G´º{îfùL»»—àlx‘ÿG´¼{ßòhzÎÿƒÚÞ½Ëõù÷Öw~_ÊØ|ÿÒïKû¢ƒ[ÛŸ·úVׂß>om2]¿k··ÇÇå«?Ÿ‘ÍRî®íÚgtÿl–ïÌf*ÇÎqܶ6»fs<³¾÷v†r‘œ‡–Íô™Ñ@Ïç;ïg>Ê8¯ö6!|?›1oçïd±–•⾭뼾͵¨¾·éø8Ì—'Sÿ;søŸ_þ÷Ó§Ÿÿö·Ÿûí·Ÿ?ýõ/ûŸ¿o|ñöE{6÷R•¼¨è6÷µŽü­«ü&KVò‚á…Ò±Éw'šªEƨmZŒ»u÷ ãµ€rÿuÄ–¥@ûkÆê1¡—io€·Žîû“ž„T“œ2ŽR˜¼6£Ž\+Ý÷~^]i‹ µÜwÔ@ž#$I‹Ñݱ&¹ít8ܨÐ*NÝK`ÒA+­ ÷™ôƒ}nÜ…·Õ›”vÇŒ5Úé ëpÅú:Ê´¼j¨B Ù¦Ž«ÞÛ;gDJ–öc»+*\ô뢱Gº&}é‡P•À¤óD¼u”À¤k£´'»º&ÛµÅÉJ×Wt*†å©Ž2Õì o¢fû¸ÍV(Ajt”—pg¶E†Gû¸ôEAŸV±Nê'=· øLâ¥ã¶S4QšØ^ ¬íÇÝ 7¬ÃùìcHÇ]Êa µ¦ÄaÇ0R“£CrŽa {–eö"(ì1Äyk|U ";IªŽÚ¤Ù"„@)3Ú¹â$ìy” þõÂqjÃÏi‡ŽRŠ’ú3;ÚJÚ<Ê+k=çO“Á+hgź²Ìªcœ© ‰lpVX,¬uð#VxW[‰Ž;·…c?£"–‹+eÆN/ÛcŒÅSdVë!ÝDz“£n§k3Ïj⫘UQDª!›x§K>ÍväP$uþ´Z¶„¥žŠA‚°Óxû(qéå€öcÚ©šŽ>¥h¹ðÐìËØë+þ^;>æ["ší#ºRåÿcž þó¹•Çü¾˜ç˜îv’J¢b؆÷·Ò:7ʲáú¸õ'Dï<¡ uG<»UçÝW f.nóu[ïHµÌÛž wm.£e,¬•e≩:A½bÐßûö–™n±:±ýX*Ðtê豬ԎDlói¥08Ùx¯ë¶EÇmítì8–ƒ"0ÇÖ›õ]ÊqÔÁ|Ý’Èw¼Ž¡ŽÜc}„ÚŠ Kô6§Ëwµêôß­7´Š-ÜFâ÷q]é ··bœÝ_©L^ªÎ^;Ṳ́¸»sàØ† Ü¿±õ/}‹Õˆ ÚëÝ¡~îcTJ{leGyÐíc„vCÜ5á·†g4f«Úê/1h}äÆtl+m­\ÇÛ†AªŽ¤nÏë¤ÆÃܤÞók¶<:Ú¬M™Vv®˜#û@³l_³MŽú(µ>£«µÉ$tªãÂiÍ‘Œ«Í‘l¡Sµ›ÿ¾bŒmnß(\Ó1¼;ÞéZ÷Ûn⾌Q£¾ÚÜúS§ks#hÅô@Ûܶh:ºb}ˆɨ·c×qÌÔ€º+9ž?îºls£;Vœ`q+=|ÜS¶‰ÓÊŽ•oÜ}Ù/è´o¢îžCÔõ>?†Í¹t ŸºaUµS÷¨Ó‚ÚViÁ™·6’ˆ3x¼Ó¹¶à‚Ñꥴ+‡rËoÁözY lkº$<0dÕC?c;»ØÞ†Û!L4À8j]9oÁÑ~P0ÂiÁ‰Xõ}g#Øx$¥;é½m<ÌRž ð‘”ô¤÷·\nA˜É\I©Oz›¥Ý|Øðùµ0žÆ©=yµb™íµàÀ1­Œ´Î d¬Q”’ŸôŽWv-8qX-²¼¼ZÝÌÓñ™†^ç$°óUÝT[Ý´¯ãµñT°ûí½v‹Õ¦’–nöü®· v•\bÆ¶Ñ KuÚjg]öñzÓ£ÛLÙúˆV;­†>óˆK94¢)ëYNeŸÖþºÞ.¦Š-Hád=‘¾¥ù Þ•;Ãg^Ø‚V½+^éwÕ‚­hõ3×úòå%ЊI¾•u:y«¡6÷ú\%ä‡äÛöx>Rù;[;y÷á¥T¿¬µÞž'Åðn†oK/$KÀ[ƒ$ìÍàÝ0 HÌÛÆd<:G˜w[yà¶1oçÑ]T˜·´ÈØ,Ì»ÍĖļ¥|Æ©éļ§((—˜w›¸kÒ˜·}¹X˜wµ¯[˜·§rWbÞí¦sßÅy·A|[œ7Ó‘óR€žœ·Å¨Pèm1Šzk]£Ò»Þ/BÂÛþ!}JExÛì“+ÞÛc Â[³¢÷o›Mqœ(ÂÛÒq2!ÂÛbœB‰ð® ¿S&¼>ÆÝ„·Å8Šá­Ù"‰2oíZè»êEx×¥ËÀ“ðúì$Þ£’ð®37 x׉þ3¼ëLORÞÃ)A¼ë„ÕóÝu¢+ªùî:õ-SÉwk6ßUÚæ»µ©ä%³}àÝøt×[ÛMw3ÒÝu¤Ñ¦ñî:ð#ãÝ»…ï¶::sÞvMøÕšóf~ä¼#ç]oÜTnλ45èÍ¢ô¹gè-tƒ9¢@o‹Ý0Åé]o40é]oìÉ|K†\Èw9h¬mä[ù¡CòͲùÑÂË/ö[ù¡Óû=El¯Æù@¿-Áx¿ ¡oû•k:‚¾-ÆÕ%AßeçÈз¥ãº /÷Š%ô]v,°šù.‡ÈÄ|ó’d¾qI"߂ӑoÆ€|—M OD¾ËN¥»‘ï5ÖÅËF#ß–ŽkRB¾us½×ñµ…†‰oKÆ%7ßÊ®û˜ø¶krÕJÄ7jEÄ·n½¼ˆoýKgË~-:‰oÆH|—•+;¾-ìÀL|ã’"¾µY‹j"¾Ë¦Õ.ߌ‘ø¶krÍFÄ7jCÄ7nÀwY¹«GÀ·î ¦È{óÞÈ{}L¢yï²r/¡ï²r”hèÛš ¶úVsÁê ¨oæGê»,Z.õÍk’úV _aQß,'©ï²j5ë}ójľy5bß(¡°o´Jaß–ãmcßhé¾×Xºæ5É}£5 üfŒà×mOÜ·%ÃV"qßxÈ}£y‰û.+÷;™ûFnâ¾Ñ¼~ãøÍÉo4‘ßLGò÷'ò›×$ù†$ò}ÃüV;_gä×9ÿ]>ƒ}Ý“ö½‰}ýæöõ‹Ø×wØ·]òŠÀ¾Ï; í}þÈ[_¾ˆ÷RlBÞǵw[¬.ÜmAÌ|î^‚„»ËÂ=éw³w+اåw—…ûæî^n…p÷3Á>`­Ë¾˜Ë‹í^’íV°ÏB‚í. ÃífÕˆí^‚d»uÙ‚힎 K‡°D»q"»mŒ9Cݼ(ÉnÆHv/ÙÍt»-ÄvÏÁàÒ_~ÝKJ‚ݬ7Ý–'ÀnÝGŸØÍ›ØmA­¶ì^Rì^‚»åÉÑWàì^*áÈ7æ[OÊþ:÷¾PòÂJã¢äÕé+%ïÜÇô½m¸Ëfz=+%ï!¦)ÐÛ†×8íÅ ·†×Œ ô¶r ôfYz+†ƒwHz³˜ ½E™ôúnÒÛæ1qšôº£3éõ`Ò»ŒDr&½mZÇ£ãEzËÇ5&Ò»ÜÈëLz—›žºHo+ËoP‘Þyç©ÞOÒ;ïäAF¾¶k4òméà8iäÛfñð4ò7â{#_ðcäë²ùγF«B¾-Æi½¯eáF¾óÂCŒ|‹—@Jä;’°ù’²$ò-Ì‚¢oëˆ(SòÍ™ïqäËä½-6@ñ'Þ[,ñõ¶k}u‚œy¯ÌWŸ¼7êJÔ÷4Àt&õ}¢¾v` ©ïD3dSßiâ2§©omF8ö­± ºeaߨ ìk7XsßPù‹ûf9Á}mc"îûÄ}«9aEMÜ7ö>Xêëš÷z­üÚâÄà7¶üÆ._kæÅ}íNcîÛ"Ä}ËÌ‘ÈûâïcîkG*s_;ÝüF~í5dðkµúZL-ð›e‘ÔW^‹!õõ~ß,ËjwTÖ‹¤¾´ ´ÒWÞw¡ôµ¶^ä7 b¹¯w­¤Ü—úgË}]–ÔûRwo½ohCïK9­õ¾Qé}£,Òûv•óCëÖ72‘Ö×™XëºVi}­à´Ö×Òmk}ý`RìKhŠ}¥è µ¯½RûZlj_åGµo­ö¥ø<Õ¾|Ô©ö•þVj߇‚5„¾Òþÿ#ô}e$‰od$‰¯+7%¾ÊIßWNwqodâ^ÊSÛk«¾]ø+mïJˆpÕöZ¡+moæ&mo()­í µ½+%.Wm/&ª©ííwhU¯3£ª7¤‡©ê•ÆÔ²ÞƒûTõ.¯wãs¢^iSSÔ+±£E½ §!)ê]z¿|õfžõfžõfžõ.ÔQÔ«çgQïBÑPŠz2«õ.iQêy—nÝ|Õó.<Â7õ¼Q~ëyuºVêyó.¤çÍ<%è9?JEïÌ3$BÑ›YRÑ;sšŠÞ™šªTôÆ]ZÑ7bEo܈½3$^)è]8MAïÒíC¯‚ÞÌR‚^¿sÖóÎ”Ž¥ž7³<ÐÖ¾•øÞ^Äwhÿþÿ/Ü+iZàÞ±…^Œfn… ã.'\|Vª[ýÆ—»7,·î»zqoØxt¸7윓۾a{W×[Ó<2bÙ7¬:¹Èö ÛãƒÞy¥Œ)œVË} zçEš¶nX¨U2è=mØß½-sè§÷%½-Å2èeú½3Å z'Z˜šóÎ:°Ëœw’ŠÆœ—— Ì;i cÌ;kælÌ;I%`Ì«º Ì«3Þóº˜Ay9å+šòR “”—“]S^3eS^ggÊ$W”wÒÄÔ”Wç*å5Iå­Ó :Œ3ä5û6ä96ã;?ïytÎb¼éb¼ó¨¡o”OŒ·õý29 äüy«)Ã%éÍüHzë[CÑ,IoM‘ßKÊŃXI~/"ù½TÉï%x'¿— ’üfPä7/(ò{IIü{ ÿ^‚Ä¿— ño>Iáßl¿ùv ÿ^ò$þ½T®a­šño¦þ̈́¿YáßKJâßKJâßKJò߬ñß×w(Ðïå‰~/ÙmÜê û½¤<òÓûmf$¿_cÛ»®½n¾F싳/ð÷†­ÎÁ~/\x'ŸÀ$Ñð7˜eÂß;8%õ-‘U·¢¾7R|™g¯OÔ¶ÄwàJXH|‹¢%ñ½q‘ $¾7®¡„Ä÷f7\I|»-ñ½õ£D¸ÐÞH'Ú{HgÚ1ÑÞˆ‰öFYD{#&Ü+CjãÞHFÜ»Ó÷8poÄ„{}Iã^§3îu){wY÷FL¸w‡×sÐ^‡D{%Ú¥îõ%ï´×¿ öî2%0ìÝû²Øë˜a¯Ö“öF:ÁÞ=½0jÞí‚ Øëª2ì˜`ï.µ©a¯VÒ {ã’„½‘J°7b‚½ŽöºV {µK `o¤ìumöFL°w_×›1ÁÞ(§`oÜûƒöƯ¢½íuÛ3íuÛ3íu M{㚢½®eã^7ãÞH'ÜëviÜëZ6îtĽ®2áÞ¸;áÞÈM¸w“ Šqï.ݸq¯kÚ¸×e1îõ-÷úÎ{#?á^ߟqo\S¸7®ù™.Üÿ\¸×ÄÌ7bb¾®ƒ_—Åà×Ulð×øõ=üFLà7®¹Z® ²üúÉüF*_çfðëKüFLà×ùüF~¿‘Nà×/ˆÁ¯kÓà×/éo\sóæ4•enNÓ¯BÀQ#‡7§1°Koìk»&€ãšBÀ‘N8Ò-Ç*¶zËš4¾Äú8sÔäÀ‘JØÏÔØéÌ}æÀqMqàˆMÇ*¿ÙãØÛ{~¾[=’eodeÙ<’Õ5ï8Ë¢y%¾5šEU&V9’gJq`69p¦œ=¤už‹Ç´N¹zPë”Û»£ÚàÀYØCãZç( Í38xh«²šÇ3~JÏg¢Ù#[Ý…0®¸zXë2Šþ¶/VGCIqÅý9¨õµ~û?7óDf¾qKf¾Q÷f¾óÍ ˜oHÌW}`2_Ü¢p/~éÍ‹íѺ$½Q|“Þž™!odfÈ‹Dâ»™“øn^Q|WÚû2ßÍ+ŠïfYÄw3(¾­hºô#߯w¿ÓÒaëŠõ¤¼5˜‚dÕÞ½7I*mép‡rrð9o»ëÂáä0SPqrx}‹/Ng¡áäp{_Ûk/Ätr âV˜w÷Yo¼òµ Ì+‡ªÀ¼ò Ì»R|˜w±Å­0¯ÜSÂÉa¦r œf.M„“ä'k'‡‰ÞƼ-†}ZOÌ'ñ ó†\´·bPP ú¶˜lj }Û¤ÛâÅ|mCaæɈ|õ|[lzÏÇ¡œ4î;íE{m¢aÚëUÑÞÏ; Úë5Ó^Ûr˜öFL´·Åð*šözm!hokñ°Œ0í=lpKÚ뵌À½Z¯Þ{‰a”|¼ïÛëuŽ'ð=ìÎ+â«åŠ ¾÷•‰@½^ 0ê=lË+Ôë• £Þ£7Ù ê½Ä®›)¨—Ì?Q¯ì|õ*Q/Ày^"ø$½\`HÒ;½+ëuº$½As¿˜ô!]H/¥»AzÅX¿‚ô¾ëàù}éU ‘Þçêy9k4äõäÑWCù€¼‘N7bó»ƒá€¼‘n‰ñ0Ó òF9 y#´kDü¬,ñÝM>ÐÉwé$d¾ëœÌw#ø®ó3ßÝä;m¾»ÉÛ|7bâ»mŒ Þæ»›üªÍw£,÷Ž¡YðÝÈF|×1ó݈‰ï:]øn¤ßµ‘“ùn\S|7bâ»[½-bà»®bñÝ(&ùîÖ.ßM€…wÊx÷ÞÕ%Mw]~Ó]'Ü]åLn¸—Üt‚»ÜÝè~c¶ëÐí®2œ1Ú|„v3Úµï˜Ñn¤í;¦˜Ð®Ëi´1¡Ýˆ í®2é1ÚtB»qD»«Lê…v#•ЮcF».¥Ñn¤ÚtB»‘Nh7bB»« òv#&´{‰aкÊXßh7bw´ÚUý'ÚÕÕífJ¡ÝL)´›A¡Ý ífžB»+­§ífJ¡ÝL)´‹oÛífB¢Ýk #Ùí®ô¦J´›A¡Ý íÆ˜ïfJñÝ Šïæey3¥ o¦ä͔қ?‹ô®?ŽŸ½Ú 7.hÐ%4èÍ @oæ)Л)z3(ÐÛïPœ7sìÍ aoÆ{_yóF³Þ Šõö õf¡ÞžFW»ç“ôæEz3¥HoÏMŒ7/(ÆÏk:²·ù6‡_ÎCÙ†Vño{«÷öÓ¸ ç½ÿqïwÐþÁýÿ÷ß¾…&·j=Þ$ß ¾0ß„°¿2ÓºÁO¼í?:Ó¡Îô~Þëó/7>é?¼Ûí•ÿùÇͽ ™úý?ÿò±÷VúãÁ?ÿ²ÄxæþñCs¿Wú³á?þò™ܯùËï”dZ‡5oû6µùüÛ²•äýQmkêß~zXÆüÓÐþ›[Áß~úôö_ÿÐ:è|ûï·Ÿþó‡ÿéKsšÚ(}ÜÖmÛZ ¿8§ùrjýk/Ïûкá/ÏiuNÿüÃ[ÌTßÿÓŸ¾æ·ešÚçi¹–Õ s,ã%ß:ð«]þžó_þåíÓÛ¿¾­ÿïíÏí³ï÷Czìþ¡øÃ›îãÛð¡=6ï´ÍqéG}¤î”ÿ’ÿƒýH †íšÿ°}ÔGzŸ?sÿ÷_?¨Ëõù×oÛäkíæ5vüÖwæþl}ÎÿƒZßY‚{ësþÒúÎ^æÙú”ÿGµ¾³Ëõù÷Öw~kŠfî_ú­Ykþ¶·|ÚGmÙnæ­MV¦ë'îööøÎ|õ—Ô¹,·6·o_´öAÝ'—åûr™ŠêŒí3º×ÏæpƒVuN:†g5´l^`qÚz+{_ë:Ó†mß-tôuBkhc{{_p1ó,ìø´l=Ä .N Å&-¥ Oâ8cû‹0N jÁk;YsåüâŒÓüTšãÊef.Ô·`ë^ˆÑ'xœÁ¾´SJ¶áò‚Æi‘˜ô¥²øSë^½q^¯N4èGÌÒXWðxõÆuoÄÓȃe&®;¶`ë^½ñå²3Ï–É˶ŽáÕŸš¥7p>ÒJ 5B ®­~µá,ÐÆCftÔÇpÂWw\EÇrÓPÅk“?Fi¯Û ÇÌD²2•xõưA„‡2ÔzõÆ-xGÌC ¾:âKš‰‡ÌLbÖC¥¾:bŸrrqÎÌØùÏ`ûP¼:âz¦pÞ¸(užXr ]•4âÕ·àŒ>ìT½ÚpÞIëz?œwrÜO›©utÏ¥;ìýp³•½Ž,gï‡ –Ï NœÉ<[§Ð;â‡c·ØÌ#gtL .XÚÉ„+ÏœÑ))-¸aeÇǹœÁ÷NiÁë;>®¦œ;Su…®h½éàåXgÞ÷Ž84ÖÉ<-¶`dì³oZpÅÈ8Ô¶ah|)ìÎãhZðÑým;FÅÓŽÌ-xÀk%вßú¨8Ÿm›4õQ±O,jÁÑçÑðæ[¬ŠëIÁë~Øq8Õ%áŒAñ¥¨ ÅUáûŠAñ¥¬Å—ËngyvŒ‰/ މ£á´©Úx9ç©ý¶éÙÄŽX•Ú¦gÇ= #GÅq÷mò†QqgŽŠã) GÅqƒÃÊQqÞÊÊQq<å6yè8«fç¨8ó<8*ŽËŽ7ŒŠo÷ŸŒ†ãÑŽGÃќڬme/¬:'މ³3ÇÄqsãÂ1qÜ\#s  «­Õ1B½~ÜÝÆãkt¾Z î ?ç]û ¯6½Ý‡Àq³õec·ëàÈSkä>žcìg‹­sã0­ Ž®‰;ggÏëçZpá8*xZ9¾WI!€~bM–cç‰5YŽcßÌêà‰5:N¯ÍÝoûÞ‹Q~z¯ƒjòŸòFFuŽúÚûoϧ‰b×Iý|š¸áyáù4YŽ•ÔdÊ•ÔèD¶Üx@M´„bìbõÂh•&S¶ ØÀ.V/Ì2d¯ôm»XŸ'ÔüÍ«A˜kˆ÷l×µÞ²ƒ0ï7ÛfŠ0oGàgæRyS”*Â\Âpž4&Â\£ß~öJæ7ó("!æ|ž7%ºÜFâ k†ËÃðLp¹ ï±uÍp¹Å¸•T¹Mú&­€Ê-F¢&¨Ü¦'O.&žÜ~çVióä„¶?xrí€g§yrí, ‹OÞF3%ñä6;p2›yò6Uˆ'oƒ±Ãƒ'óô¤_“'×dó?ñä *õ_“'·”’‹'· æ[Êëáa°€r]–£kå–’ßtåõÐY<¹Å„÷Å“[PK âÉmîÍNHÂÉ•ÏNÞJœ| '—;æ+ÆÉ—”ÄÉ-%åÆÉyoÂÉ•úså¬fqåspÛe‘æÊYÙâÊ™’\9;ó²ó¨åÌõF²[æe§¤Æ 9ëN yÙÅœ šóy 4g–äÌY¯âÌ—Ú!gÎÇ%ΜAqæÞZ—K“þփпÎ'ñkÄ̲ŒšƒØ…˜9•Î3{’mÔìI˜IsÌ&BËì6kÒ|ÑO7+ÃH5x³!Í›=EnN’"꜓baç¨:sçq >¯“Ū‚Ï5oÄÚˆás Áç²Tð9^MÃç>ç„Mðy™Ÿ‡T=¹óƒ¹7/«¥ÂÍ9n¶½càf{Tn~–ƒ”¹ð Ž(³í ƒ2·rHb.Ì\AVŸ0³ 83‡c«0ó2š(’7gYÅ›Û3™yQñföoŽÝæÍ>L*x³7(7‡²q³Ü÷‚6/7¿'¢Í6ð ÚlƒÆ ÍËèWþN›ëW²HÑær.å´\´YÇŠl.§N QÁæv7Œ ›[Ž#¦óbÍ6H Öœ¥!j¶h æeÕ¢¬s6çx†äÍ™ŽÀ9±€³æ8Û‘2ˆó¼›!‰8W ˆá’8Ûœ(ˆ³ýØ‚8Û1ˆs¶7ç–ÇS¨&Øûo ›Ã˜è ›c?‰s€5"Î/³³f{»kö!jÁšË"‘½žXó¼x­S¬9»D±ælN‚Í5†Åª²asÕ3—&›çÙßÂæ¬Òæy’@Ë´¹*\B`Òæy 1¡³1:Wsã%tÎtîY¸Ù΢›s'“ps>dáæ rì$Üìc︙nœ¿&nÎÍaÂÍYñællâÍó¨MwæÍ—‘7·ËJ/ÞœïšxsîVo¾ˆÀ¹†;xsìÞ]óÁc@Òýæ&Qšysæ¹ yßJo/ê<´ÿUÈyz-ð]‰ó®QSØgnp6Mü÷ ÌpÏXµ3%ˆó"{Øg˜0†}ÆbkgŸÐbâ\° c9!çä¶Ï8,²}FLퟱ[äbÿ *þ› ¬ý3ìVþk€ãÕ6è\Õ ÿ [•†FL…œs^ýàÎóì Ð5[“¤™“±çº"wÕŠ=çìýõB¡3(óŒÃ›L¡=€ íÙG˜gxæ™çê¡·ƒ›‡ÞÛg(tLKÂ6ãÐîn›ñºŽ 3â:6Ìèid•ÍÄVñ@m•(ÌVžØ„WFÔ®½2<{¯Œ¬ yeäee˜†f2ÌW3fÄ]Ú0#ƒ2̰o™ 32GfdPŽQ=v̈›4}Žº³cÆá…e;fÄÈ1#³<ÂEN´ôá˜^pvÌ8¼iÇŒ(¦m32(ÛŒhç¶ÍˆÎɶñV™>gÊ/ ÏÙEØ1£§ îì4ôÊx&±MFP0Ûdd0ˆ³m2 >Â&#™m2ò²BÎyYÙdÒ´MFe“ÀÇ6™§l2ü>m2ü…MFzÛd\ƒ;añÛddž²ÉÈ ðs?»@²ÉÈ l2ò²Ÿ”MF\V6‘P~î?gPø9jÖø9ž±ý2²<âÏyÙàÏ.PðgvRæÏ—ú þì”rÐÈÒ’?gžù³øsEüù’’ü9Ë)þ| Î û²rÐÈ”rÐÐÊ úBÒÉ cÂFY"èH((©taE> 5|öÓˆüÈŸ3ðsÜý?«4ÄÏ-Ha­ñó\ ?gžÂÏ$~Πm5|›Ë¥­}›«ÆÍó×Y7½äŸcÏ`Kž7ÍÝ¿Fì¼è»bgíp±³5¹FÏ–Øy–Qnˆ½ Øè¹„ÉP¼¤ØÙþÔDÏ!rzöÑáWô¬­¥BÏuÆ6wò =_‚Bχ÷t =ûXïDÏ!?3z=žÑs¨žßQ;×¹ÃÜ|øDÏ»÷ž=ï66zÞ¼UÐü9nÂü94bæÏ:Ç6ùóæM’Ð)H^|çN@a:9è¼Oè] @‡êRZ‡Á&€~‰ÝÌž£fÌž7™ì=Ç2ˆÑójÏ_£çX#1z^ìüjôõmþœyn>—S«+†Ð>ðñ ¡CSj":CèL)×zÑV±€ÐQLCè¸ACè¨TCèWÃ0~Žçkü†÷ÆÏYÎC‡rÚ€˜øy±q±ô«†ÏQ_†ÏK¸$ >/6æMݳK)økq†Ï©R^ã4N¹A >ç­„øÙ)ŸóVð9ÎHñóû¾Íq cÀg?%³çhV@G1S­geöœ—]ãHNVÙs¦Ü|0çSã-êœbljŸã!IûéRûì„¡}VÍXûmÑÔ9ž”©sèC­}Ž`t,T[—•:+A 謄P@«Nt$°:Ú·ÐÑžRm±µÐÑWZíÆot´5  _mÍÒç¨KKŸ³$’>ç=HúœE¡ô9Júœ±Áç˜Ð=Ýòçx–?Ç“µü9ƒ’?ÇSLù³ KjtËŸó²’?G­[þu õsTúSý¼h×q¨Ÿãù§üY7hùs^Vòç JþwoùsY í6.X,úrÑÉcÝ£µÐQ;bÑ—”‹ÆjŽbÑ—”ÒBǃ‹¾¥…ÎJ8<0V%<µÐ~PBÑ—$’BGÕX Å´:/;{`¬ª±:ó”:ƒÒCG›³:ž•õЙòððXï€ôÐîËE£3Gë¡ýH,‡Ž›´:Š*W]<8 ßõÞÈknûwŒuÑÃcÕ›•ÐÑÄ—!›Ì·±èïõߨû"÷õ0ÁVePІÿÆŠgY=Ocaõ<Æ|²zXÁáôlÞ@Ò‡6­’>´»4ô.5h i;ÎIÛÖ[¤'Ž•;¢.þ’…Ù#Œ\í¿©ößX,¬Hÿ É+D¤/æ$Ò-h¸N" á¿1…\œDúâPr'Òe­ÉÉ"ÒŸ ÂÃnžÒeù"érÊ|W½Öé=AtY}B»ÞÏa‡bïç Êü9Ì0mþ<Ê<ÌŸGËìeþœ éN)vnÍ»ÙÂ:|/í=ä‰0½ ¿H›@GBy@‡_¦= ÃÒЃ÷‹Û:,!톙ö€§É‡tüjè ʺ[7Úþ9Ì íÿ|‹3åÿÖ“öFv²~cF[?‡¤­Ÿ3(è Ê:l0en;r€¾yqÄЙPДtØOÚÚ–Ža)e)eU` èL) h>†ô5 h"¾ ý«] Ã^Ó.БP&Ðrˆ èH' èH7ÛøN JbÒ—  ãªDÒi-%$} L_²<ä{§„Ó™N`ºµ¤&0} L_‚Ó— Áô%('h?-qéË]’K_®J.F’æÒ”<û°ñ‚Å¥¯¿a`N¦æÒ— ¹ô%H.}É“\ú’’`:,; ¦/ÁÕÎt¾,ñô%%ñô%%ñô%%ðtx… Og:áé|ÂâÓáai>} ’O_‚äÓ—Â’O_R’O_‚äÓŸ ÊšNëAâÓ— u€:ì ¨/i¨³œÔ—”ÔÙ<¨/Aê¬rêKH¨³õˆP_.KD} Q_ò¢N³\2ê´U¤Ž—Ýú$¤NÇ\AêKJBê‹3!õåN©/)ÃÚÁ°„öe¿Ø:ƒ_æ í$é ­”á í`zBËÚ™”ú’òkL¡3(ShÃÚÁ0…vÒZFÍ6…v̦ж?Sh¿ÆÚ)ÃÚ)ÓZÖa íˆ5´S†5´ƒaí`DOaèq¼×1à ZiÒ&ÚiÂ&ږͲ‰ÎˆM´ƒ²‰Î`ØD;¸z´üôºƒh§Ù=P~¦ kèçχÇãõ˜Aü:h@üüuôP¸ÿ¬A°-¥'‚õìþœÁŃ`rÛý¹%ŒŸfóø×åÜ=þÞ5âÈ`?ë²Ë¥óý6#Ž_ÎCÛR'žy^§ó¿Þÿ¸giêÜÿø?üé[@w«ÖãMšëñ=Íõg óMtý+3­üÄÛþÃ3mM¼÷›}ýíûŽüš2ìuÂг¿|h æö-Ú^Exýícká^÷ðúÛÆ•…yÏ"<þò¡%xÔýó=xþí3e¸_õ—ß)Ë>/ Æ¥uÛÛ²Õá„xÛZÅú·Ÿ6@ÿ4´ÿæVô·Ÿ>½ý×?´nûßþûí§ÿüáßúÒœ¶s½£}ÚåËsš¿!§ã4±nëm¾<§Õ9ýó omZ_„Ÿþô5w8µ9è­å{'[Û:ËxÉ·c»ü=ç?ÿËÛ§·}[ÿßÛ_Úÿvßï‡ôàýÃñ‡7ÞÇ·âc{pÞ鄨‡}´jkÈrÍÿüõƒJ0l×ü‡o5ûêv± Kþ÷_?¨Ëõù×oÜäP¯i}gîÏÖçü?¨õ%¸·>çÿ!­ïìež­OùTë;K°\Ÿo}ç·¦MØoûk†Ó`oÛÖ©>Û%Øç­Ma¦ë'îööøÎ|ý—T¹,íó¶´/Zû î¿“Ëò}¹L·:õtܶ}¼~6‡ã™Ëð7sŠ:æ¡å2}fTгù¾»™[­Êݶ½=ï÷sóf~?‡b£K­%ÏÓñÖæïõð‡uæËc©ÿ9üÏ/ÿûéÓÏûÛÏ¿ýöÛÏŸÖ¿ìþëw 3Þ¾lcnëxÜoËSÌ„SÅ/Ÿ¢8³µšÁÜæÒ¯ã%¨,šn 4¡;… S gžÚ„ô‘¯ Œ/h30™å¹Wz•c’pfšÄŒ¦Î<×é÷&@¨0•|æÅ8‹ŸTðe§Úš¢ì§Òϼ Í©êè ©ô3mlyÇ1%$îh¸J>¬< dj‘|~P’ó/¨õö÷•[< yªáð¾< Ú0hgîÔrékàýÇÓP‹aËpUmAæÑÆËX »2¢¬àsý^¬ü‘äWª‡ÊçXÊ™ç¶é½ú,ÞÃ8CRžuÇóÀå½V Pc#ùöm’Tm±›zoo;Ýq§qƺÉmÛØ4./µÔvÒ†M­¼Vä*еνo%£ƒ8j¹¶c×èi,Ì5?q{5i\8ØÙ\wIÆij_k'÷FÜn„â¹ vÜ^A¬›LSÝõroÄu'´'˜¦ 9%ÞãsloÙK+T)éz0Ms_!¼^véÚžªµÅié Ð×àÚ¥1Û±j÷u›OvL[S í]Ýq ¯€ÏÅöրζ¼ËS 4•!ÞVQ¨VšNSäùÞŒ+¥7O.R?CW3ÕÙË£/Þj¼µ‚¯—¨‚\/˜Êät}ôÅg]ĺã²ÑT[¹žÚÛ­ä©°3˜jÇôs¢‚êÁÊsöyÚÐÄþ…i†ö«‚Ô<·ªìΫÛ1öËTp¹õÅÓJI ò´ ý‰JyƒB_«ò•’"äiëóxoÇ—-{Ôeé4;ÕhãülÇ‹dÓ²ôe‘ËS.p3-Ïv¼P 2Õ™iÏE·z(\Éœ–ZÌ]žÍx²ž,æ^IÁï‡yÆYyP“Wì%¨„Üß5•íÙå_žI© Ÿð½‚<‰oªÓ·åÚxV,ÓœAˆg+ø² ­+NÀ¶?÷õ¿Ëã(#ãÙ':xn¢H%|øþNkû,Ý^ýñ ôTÊæáÕòý­`û?Ûñ ûÞÖw-ÌöxæÛ­wy6ß›Œy[Môå¢KV[5çW7|£ JÅ^K£N{*‘ùüê…oòж¹/mWÖ¼í±µnèÕ ßüAÙÖ®œ8ó„‹p_ªåó²Ð¾¶ùG—ä\/»wyrGŽ%¶ãµ®u©¼½N¯{õ£̛+øZ=Ÿ0>{¥¯®8âgKÚKÞüê‰ù=O;äÍ—ÇØ‚Çðê‰y,¯pÝœ§}íJÝ3ï’ißúÛå¹ï{×Û¶a‹ì‘+øZ’½<÷úÇó«ŽgPl¹õ¬ç^µ÷>Už4žŽ±+I*¨AvuÏÑp5²9 RºÜÊù9{vÀµkòäÖÄzXÁ‡åïÿWÜÙóØn¤G8ׯ˜ÐVæW“<vàÌ€2ÃPb¯Ø{‚°ÿîâ’õÔ;#i®àq°Xi^‘l~œfñéêêù¡¹ägßÛÿîÉÄ*^ƒÄÇY˜ïw~È}pv¾å,Ì}°í;µd9çlÕ"Yg.œ÷KÖ™Kg?%Í%ƒU´Ïïå0дûæ50MÝíãZ²´<ûl¼dpîu± .uËé6û•'H+4lo´SK^28­AqÉàû·¦ ‘—ΧQÉ ÏßJ‹Åèj±ïÿTÁb ¾(Á"Íå “{Ó~Ž²×Œ”`MîšK÷3pv¦âãýè |Kÿ\»¾'--£eéŸkíÃyÕŽµ¯.8w»Ü““UôÙ~*^þ#fhœ}»ÔpÙízÛÕjk·ûæèÿl‚ÂÒ¿×.dþXµòüfý0j÷Ô³ò›Òàôn1~A‰§G“ØÖs·åçòm“qω~k.ás×ð×;ºÀçí· á³›õ¿|ö/•/IŸûÐjíô¹:ٺDŽÏÛŒ H„Ï1±—ðY{ ‚=ëãÑ¿jÀžûצ¯åHö¬¹»®ÓÁž×‚ìy])bŸ{ñ}ø¬©´Î|Ÿ#“ðyï÷×—„Ï1_”ðY/ÎÓàsÌ\$‚Ž©DÐ1ï‹:ì§‚n;ytÛ‘‚@Ý»‹“L?÷¿ûZ­ŽŸ5¾‚ÏLÇÏûšlÌÂù³ŠøŸ`³\ø9þ,ò…kæüYÏü¹îEüZÁŸÛ„îüYEïwÉŸ#ßü9×äV%ΟDêq«àϱà:ðs¦¦?Ÿc÷äÎG:©þwξÁ÷eÇìZÀçÌõ|ÎÍÓY™€Ï™—øœ!n€Ïåð9#%Ÿ3ð93%Ÿ5†x½¸¿|Îe¾ŸsiiÀçXÌì9—{>–ùòÌÙs.ÖöœË|=«Pãì9WÝ{ÎÅ$Ÿsa-ÀçZDØ|røœ«P>çJB€Ï¹6às®²xÂç\¤ð9×2|Î@zÀçZ¼ás&n>ç2 йš5´Š“?ÌN kñáëÔlT:Ž¢s±g h-$Z5ïÿœD—šè£æDÕA´ÆîŸìÚôñW[Zz?>#íçºNEôªN ‹QÀ ´ÖÃõh'Ъ¡?v­¢Ç7ƒ@«)⺯¸ÞHŽ!@k)TÏVÑ3@ UÄ£¡èZ3}ýõà(Zö Ÿº ­¢OÚŠ>Š>Ôå(ZEŒj:Š®ECÑ*>•Cè]éþûu­¢ÇãB×- B«èéX€Ð*â%ç$ú­¢-ñ0ñ è$ZEŒ =°vÈD¹i$zW<†×D×¢‘èã£sÚÑ—6O{Bhüiö•²Fä?c£æëKŒx5;z>jÖ™=ïÒš6úô¬¢­Î ò|Ô¬Çy.>|m¥CJ Ï*úgȳŠîšyVq°îäù­âõŠï-†‘@ž¢õ• Ï*úw$Ð³Š€úΟUt4ïø¹ÔŒÖ©†!£ÏªÙ((tÙδ^ úΠUÄðÃs¢L¸Â¡ë=FM> àZœ׀>þìðÕt-€VäÎt-zøÃ´‘é:€VÃN U$™5}_@«HlZE 2'Ðo/]‹F kÑôQôá 'Ð*‚–9V‘ÔÛ'r–âM e)ô´2hOêkì9mˆ`ÏGÑùã“=û¾Œ:ëÏ[pê\÷eìYEðSgϪÙ’ìYxÂÞdÏÓ \Gö¬¢½! Ÿs·NŸwûy} ø\öêðYúb>€Ï*º­ ð¹niðYE—ù€Ï*Îf±|®EƒÏ*zÔೊþø¬¢çÐ>«è„Äñó±áóíä¹ÿáÍãÜíÃä¹nùzlU¹o›‡ûáÈð?®þçÇ­2¾$‚N: =ëA¯oøž¹ɳ¢¾0/0-ÚNž#ÿ‹èYÉn0/;z>¾úM˜=+òi² ç$“@Ïʵòv ç¶Ð–ô,.ò4z€:'=uÖoÞ¨s›ÐG’:·_ÖN{çõ>uN¢ê¬ÎËŸ'PçXâ¢ÎIí@cERçX¨†èyYn¸õ%Ðs¯Ážô,öw²l§ÎÚ!X6¢e§àÕNû«kt=¨s2PgLç Î±6©s¢5PçX?žÔy™`ÔtN’è+[:—-A¹¬3©s,$Jøëv>'”"| ñŸc¹;ÂçdoOøÜ·y¾QÈtèæRpçyã°¸s/>Ày;'üwŽõɃŒ;«9þˆ;«9n[vV{ÞÅÎ ºÀƒ¸;'>vÖCâžH`g2UvÖ–î—vxsÃ"°s‚7`gOÊü’عo9’c¡ß†qY§ÎåÚ9uîÛaâ ¨s¶ÔÙ¸_œ:çA*’:/?&uºê¼ Q:¨ó9¡sðÂËÜÇ €:È.(tß«ó íwŠZÖÞDÄÐIvÁ¡{ÑÇ€ ‡&0v­-Ýt÷:[›šÀ884[—/9´Ïq)š ͉fƒH¢qE£=Dѵh(:Ús¢èh Qt\¢è æ Ññ G?hž'ŽÎVŽvoqô<¼þ :/(tÜ=Rèh)t\Rh^Bèò „Îö8„Î>‡:†Y¡ãBGcÁ ã÷OW úIÑÁ £9$ÑqƒI¢ÙÖ DGKˆ£³)ÀÑ1ÐBÝ®©ˆ£ãé&ŽŽ±âè¾!ŽÞ9Oƒ8šãZ¤Ñq"ÄÑqM‰£{qvn %ŽŽÆGGFW–8š7™4:®,itô*¤Ñ1 GWÏit´•H:®,‘t\Y"é,ÎÙŒoEÒÃ…¤Çþß„G÷ï°óÊWÍeJ šãh}úÛ'ã8v, B,Ë“K'AäÊêÁ¥e—sh .3pé^ôÑNré —NÀ.˜ \:]àÒiOœî}õ PîpzÞ‘ 6hl:A“³é¾¡O‘M'›N÷Øt@±M÷7ŒÛ@Ó"˜ ºŽ¦“\M÷£1"ÃÐt"8 é„AàÓsC& ð´Ð”Ïgž–) é"ާc¯ Óé4N:xt:Mj Óe·Ž§ƒ£N— N§õ tº´tº±{!Žö8œËàtÞÂéh+átã × §÷‘P·ðaƒPçA¨ãäA¨NO ¡Î½‚Pgc¨³DŽC"êåŠ9!Ž=’N7æOÇ ž> ¡äÒÑñ‘KGóÉ¥Ã0I.O¹tVré¸ùäÒvÉœIÇÝ#“Nœ &ØL:\ŸÆ¤qȤ£ýdÒñ|’IÇÕ“Îv‚Ig1 4æ-$”Æo‚P:w 4—œl:®Ùt“l:ŽI8§B8EÀé<&àtn 8§8wŒt:ž;âé<¦ãé¸ žŽ3!žN3ðôýÛy’é<ÎGìÐ4.‡š^iÚ¡yȰCóÔ€¦ó@Ó¹%Ðt ¼MGƒˆ¦Ã¼M4§I4Wh:‹@ÓqžDÓÙZ°é,‚MgkÁ¦ã ‘Mgk§sK§Ó˜ãèr0ÇÑ÷&ΡcL:kàÐy,pè7Š7‡Žñö“C—?;‡.EçÐ1!‡ ºœ„財ƒè²¡ƒèÁÍO(é^À·HgDºHgƒ@¤ãkHºìÖ‘tÙHšßmDÒ¥èH:¯tȤKkI—-Iók™H:ÏH:OHºlùdÒåïΤóÀ¤³ƒ“æW+tžŸé¬9.Çs _»ÒyµA¤ó€Hç D:Î@:¯€töÒù.¾ºFh-’k‘DÑz(á`v‰(úâ¢UôáOGѽæN¢èÜ(: ôí@ÑüÛ·årŒæŽþP.´>½ÎŽ·€he¸‰ :§­#zBˆ/@tZ«˜ þ€hÊCßFv"hQDY8‚.ÑÍŽ »€!Zÿ%È6¡¦ÌA÷3‚“ºŸô2t¿À@í@Ð =Œ˜‘ÐýËHè!’@n]ò{A+i×´uŒœA+Nâþ ¡K¤®Chñ¹àº¤æ:„V~)[H†i­a2tdQ2z ×5¢¡#UÓ)tÉ‚t ]ÒC«H´aèÈac2t^Bg¶“¡#ŠÉÐZ+ÖýnH†Ö'82=<º»Ãñ‹dè^„kÉÐBžÞ€dè皌„î|?zo,A.ô>“¤#zŸ]¿æBïs_Ï…Þ'¢äB÷-Ét=ºli¹Ð¢1ïæBïL™ˆ…î2lÚc¡÷‹u3º1±ÐZ ©Ç ½´—"zØ]#z™N‚Xè²¥ÇB÷¢û9‘ ½·¬ù(ºëé¸öTè}ˆ˜j…ÞLZF,t/‰Xh¥e!ÃÚb¡·}”ˆ…ÖÊÖÞ]!ZÐ݈…ÞLã>c¡·=2š=Zmñ±ÐÛs!R¡·Ö¤Bo{<{*ôAAm, ÑÐ}KLâB4tßozDC—öx4ô¶E&³gDkKEFô¶E`³gDgƒ½m˜ÍhíÍ­eÑM?!#Z{õþ Ñ¢Ï>~€Œèþ/nÚ`Ft/b®2¢Ën-#z[Ï‹Œh-þîÑq–ˆˆîŽÀËGDt¿ž¥ÉˆhMÇõˆè²?ˆÞÜ]vêQѺ/Òð¨èÜ©'E—½zR´n(¸'EÇ^½-TŠîELJBPôÖè™@PôÖÁÉ è²[ŠVzŸ‹E—ÖzZô¶0e iÑÛ Ÿ¢·³“]öjaÑÚ©Í\FVô6#fˆYѽèS™­ŒBŸ|fEo3'#+º1õYÑÛÌÙÅÈŠ.‡³¬èrHÏŠ.'áYÑe¯žÝ‹ç¼dÄD÷ž`Üs¢ó$­Ãyä r¢µl”äDë˜>£9ÑÛDnœèmÂl0æDo.̉.Çô h¥PÚkiÑ嘞­cºuiÑåT,-Z¯''ÈŒÎ3Ahô6ù¼6fFo!Ë33ºÌC£ËÁ<4zaTbjt9š¥Fëm§à¡ÑyÑ7 ¡ÑÛ„ñdFë…oŸúÌŒÎó@ft^;„FçY"4Zgéh ¡ÑZ>ÌÇ]Žé¡Ñqñ™Ñåž½Á´=3zñˆÌèmä@2£¥Ðj"3:/2£•ý ò33ºÎ3£Ëá<3Z§ÿúWŒÎS3(]~§ÒýÕ`ݰtm½aéZ4,}Ü Ahô6@öÐh5¦eÎc"4z…{ht>5ÞÒnÏŒ.íñÌèmÀî™ÑÛ@nÌh5Öä12£µW`{ Î+€Ðè8"2£ã 2:¸–}šþ£ÁëÝ/Öìè™3q™=ð;Úuÿ8HcpÇ V–˜Ï™hŠ #]¢™à~ LÝo=RY3ÁÄ„ ;£å˜à±%0Ác%.`‚G, ÉF€É…Ÿ|ŒñXø5Å%–vDŒGä»UúXÜX¥gì‹1YDxtDœ\áÑÓ3©)b£'‚tÆFO *`vôÈ'ÙÑ‘ŒÌìèHEavôȼQfGg{=ë2;z`³£3ÇÙÑQ+³£‡X*ÙÑmäÌŽÎhdG’#;úݵ 5;ZÌrz#À£dJ»EZÔ˜[¤Ëù¹E:/8ãϠjx£óJŸ©Ñ×ì¦èò#.úÁŒÆE?8/qÑúÞýàlÆE?ˆë‡D\ô¸qÑqo½³çf^ô#-ÍõqqÑŽ/ÂÏ ìÑ={t<µpG— Ý­GÐ3êáŽÎçaÑq/'ɰ踗 ‹ŽÝžé¼°Hë÷¹(°Hgz;,ÒùйEZ2âEÜ"Ï,ÒeK·HkK¹°Hç3‹ta‘ÎcÂ"­…E:/,Ò¥èéü±Ã"—é²[·H—ót‹tÙÒ-Ò¥èé|JÜ"­.ÃçÁ"Ï,Òù4Ã"Oó«OºìÏ}Òeî“~£xû¤ËnÍ']6tŸt)ºOºìÕ}ÒÚÒ‡Pà“¾^;0Hç&0H_›Àý)œÑoogt)º3:8£Ë–îŒ.[º3:%8£óц3ºléÎèrLsFGcéŒ.EwFÇ^éŒ.ŧ3ºüÝÑq†tF—¢[££75:~»´FŸO =Ñe÷D—¢{¢ËsOtÜ$z¢KÑ=ѹ[x¢¯s€ºìÏ=ÑÑÁÒ¯z¢ãu@OtÙ­[£Ë–n.EwHÇkél,Ò¹W8¤³±pH—â«CºìÐÒÙL8¤Ë–î- ‡´Ôâ»QeCwH—-Ý!]¶t‡tþJáÎÓ„C:0é.[ºC:[ ‡tÙÒÒoo‡t^w8¤ó"À!]Šî.­u‡tÙÒÒ¥hé<X¤ùÙ@‡tÖÜ ­ï72;òÏnΚ»£³ææè¼ûîŽÎíÜÍwwtÖÜûtotÞ!x£cCX£ã€pFç º3:èÆèZ»}ÑYswtiŒ»£—nJº£ËµqwtÒÍÑeC3Ggs`ŽŽkotÖ謕Æ[HÇOǪ…cë»Ø÷!¯úMìÇÛUÿ¸MÇIè?xýÇ¿|÷çoÝý6?^`ÁžÞ³`¿Ù˜o¢ë<¨Nð«Ÿögôy¦¯ÿˆ|´ÿó£K¬_Çþ˧¶à¼½×þÜó¿oùë?nŽæ?çêŸÇþ˧¶à|ίGþ£¿îï§_iE §CJ¼ô?­å)½¸­jпüðŒúÓøò§ÖÛüòÃ×—ÿü‡á_þëå‡ÿî_ø‡é¯:iŒã>Né7ŽÓr¨·ëã‡j£ü×cÿP›·ß¨…‡ú§_4ºÓ{ïþü¡sœÚºïí¥iÜk˜·þ¶jS9poT¿œíõÐ__þùåoýå¹~ZÏõ9Ýetÿ}´ Þ}Þkéõ»¶ÿùµû)-·züñ[£Á>ÜG3òøç|…OiA«÷_ûäGÞ–×üœ§oÝÚýôñøŸôô-x}úxüOyúŽ^æ|úpüÏzúŽ´zÿï§ïxÍ,ßOÃþ{_3E3ˆÆM_Qs½ÉÁ±/[ÿNžëëmxißøÆŽÃ´AôlÞúÛtÿ•ô?x™\ÆiÚ¶}ªïÌñqfü£§3Ó÷Û2öÃÌoˆ‚û8ð|–Ç Ü°íý¦¿˜)Oç·$Û,¯FÿPîÁËúÐÐÕθ”;£ÿ;Žðß?ýÏׯ?þýï?þòË/?~ýùo?ÿõç?¤5^~ßt\¾r4NuÒ‚£?Þ,ÝqÃüÏÖ•ÔíKŠ´Î6 f=Š)žmšnw‘æ1ú4ž6Í·h[wC6­Bu¹ô×–ŸnZ í2â¯#<$­7ðrúl•¶¼ÿ¦ëËO/ËqÞ&'Èešo3ˆlólSG—†å¦D—Ëÿ.€aN¦yè—Ž­ˆòiZÅä²±·³ÇÚlƒ]›|@æ!i‹9>Kkµ<Îe:·±…‰ú“eÓ.ûEð™EmYmf¥€¿_>™ .w¸‚>ýfë?¾ àbqfjiZ=ýôx÷ Ý¢Ôú™Ý“{ãœe¶~M¦£“æÃÓoì=eQ;7cd“áì4\kÌÓŸ~w²9§Mî¸Ë7-÷ëêiØÿ²E/+<ÓMÃû—óY{´95M#ø—¹Yk¬ß3?ÚºÙ¿eåSÓ÷4¾<ä6ØL=ÙVlNA;¬»çcÜŸTLmšp™‰óºmÍ&ÖÉÃa³|›¦_]öà~š>“©idû²ç5ÐèõåòPµé8MÔ—‘WSýmúKÓ,ÖÉcŸ«ÓöÅ&«‰ïš­ðÙ|´¸îûnSÎzÍW´ë? ›U–¿œÞ…]Çd%ò'ò1Ùܰƒ Û {…¼¶ySíañ.Çomõým÷4.A|µ<|NU?1›ŒÅ3_‡Éæ[i<ÊfM­zÝ_ý±ˆh{ýs³ùRËŽ\Ûþ6µ)QËá:˜cÔv86§iÙh»jòkV7Þ}0Áµ¿Þî9H*ZÁÚ_o÷4#­ôn–ÓuÜm*‘Æ2ìù\'[¾aÓŒý`ú{Þf-#&øwA`.à·j­Ä«Vƒ¬k_µ æÕ÷¢Ï ]5äv÷ÁÓsaUãiWï«`^Kp\û›íša³ŒXv¥+ ›D£7Ž=Ô«BΞ¾ý\«B”®ÎwÞ‘š¿*Rìê|G¬D³*ùøê|ÕH›Ï¹*§ùê|e/|žÙ±`ÛùÐæÕÐ8ÒÕù*¯Ùü-«ÆŠ®ÎWI 6üº.¡Ãù<éÕj¨›~×f&ýãÆš/rÕ*€Wï+w¥ë¯ýevOå˜_¯‰lÌW§+§yˆ×¶ÙœŒ…>óUÃ(W§«3ë[V •\n>`¹:] %ÚÈìÚßg÷‰g#×fSæ Þêu]mzƒŸ½ø×u· ²¯ÚR¾«–µ¼:ÛüYnã=aÞ±h?àò™Ø^. qÿ¶ gâúoÍ#˜eª2•µf¸ÛôTÖ1Ç€Êz]`£†²–Mþ]eQPPÖèe½Ë÷à/\WÖÊžq#”µr`lb„õ®×Œ+kEAø‚°~ xP)¬èà¯jë˜>Oa­éL.æ!¬· ½…uÿùLW kN¡®^g,1@]½N”OÐÕm‡ºZk¾¢æºZ;fgºtõó\CW7D]@W÷/ï1©«eÀ·¾6tõ†à"®Ý)Oq­]m4Åuˆ…P×!®R]ã@]k8ÞÉ¡®ã' u-k¡uvT×Z„Õ/Õ5EQ¨ëÆ' Õ5&Õõˆ<¡®)ÞB]7xáB]/>꺋 ¿T×&fS]ó]GuÝ›ã^¢S]DZ¨®ãNA]X/Ôuû¾U]­©·3>tõ‚èÐÕúÔP×lH¨k9SM…P]/ÈH£ÆN…Ýäá;¡±CQcˆP+B25¶'…ÆÆ›»jl‹Ð¡Ä.*:$¶+X*ì(vHØÔÙþü!í/AÓÙžS„¶/…“BÛ—×.Bú5…¶“£PÛ;Þ(UmÛ{³¨mã†ÚŽKCµ½b¡à¨íÝJ±Jª;6 Ñí«0†èÞð™_D·‡ºÑ/IŠî¸ì©¾=_¥¨oÏW*êW6Õ·÷h!¾q/)¾ãëakyrߺæÑgóV~¾±ÞPáB…‡€ ×t‹?£ _T Páš­ã*|ãƒ*\fAëf©Âû£Š×$Txơ ÃË–³/Ï3"‰ 2üX&×õ©Ëð _† ×ÂG>£2|wKxÊð]U½A.Ã÷i m2<ב£ì>O 2|ïïtÿ‡ W¸&®Éð]Cˆ•!Ã#Ëï”á c„:wþ˜ñ r®Ï(¢ ß'„íS†ïq*døþ@'C©š”á‘¶CÞ¿¦ð…®ì<€ ßWò]ÈðuãwdxDÑP†kŠ˜5B†+ozƒoë” •]÷†ù„"*ð¢W›1*ðuDöøÚð¦¡×ûÁ¿4 Á#\s/½=àë„wÉS‚¯ {HpÝlÿ͸?¦rÚ@‚¯†p¨ÄŸ \Ó­›¦×,VûÅP·3ä¨Àµ\žÝ=*ðuAZx“”{ÇCÞëNo¨Àeyöï(põ,ö‹¡o+FT©Àëöý£ Wº„Àr“‡LjpÍ;v Þ/‚S*jð3‚×jvà‡t¼ÇS‚kÄxuî<>é!Áûþ ‡!ÁÛLt ÞVdrP‚÷÷•w_”àš©´¼¼»pö*%¸4½¡¾5÷¿©¾9ÈFõ}ð@x÷[}á'GåÍñ>*ï¤ò³ñÏ((ïeA¾-•·õì3Â[TÆh…w øQx/”ÞKãÏß•woŽwÏTÞ18(oIŸ»úŒÕFÃV²Þó÷‹­¤!‰”²›‹WRukVŸ3A¨îq@MÕÝßÐ8p•<–á¡«D1Ö÷ *M%ýÝ‹w˜J"¬–¦’¤É`ß q³ñ²ï3¥‹Ð{Ý»Cè½®¡ðz¯¿G½{äÐ[ÄØt“N†ÚÖ¯ÛF%I½ÛŠHè½í;ôní9¸; 1'½â péž0¼¤;©s8Hö÷IwxkÒF‚;@ÉÎÏ(’î0×$é†É$}$§$Ý+‚9‚t¯ˆJ ÒÝ(#IºŸt5÷‚Õq/Ï"âžÏEà‚n/|ôH·g,t{ÆË9ðöD×ñö|ÍTLiÝ·s=t›tô¤Û#¿ŽH·í—ÒZ° ”t{¼žó¢¬—ážX$¶ ¼€Üav äÎÝäß#äG#IpLBî,ºÄ.v—Ø¥—›— ÛÜ ‰·qUÓB‚‘uˆë¼ªi!¡òàÎÝp‡ñÃùvܪäÛÃßFCÈ·{ƒ¸.`|;Ì×y’oƒü“o'üþÝ|»4|;¶„ÂN“Jòm0cîH[ ü@ÐÙeË Ü!áÞ==w’€›©ˆp?}—”Ø¥)`Ûaœ!ÛŽ-·7~%%ÜÆ¥9áö“ØZE)ôõr£·¢¯GHhÈëáIÖáÚžÂÒ ×ör÷Ý©¯Z0äš®íó] iÍÜÐðk¯Ü!ýÚ;¾M⦅Fí(ðioˆS->mhú´ ¨Ÿ6ü-ôiü¢O{ ÈíÊ:­±ôi‡›€>m~ð†O;Ô|Ú %áÓÉñØ]ôøâITÙ:[—¼ðik¡+k%}Úá\€Êæ‹–";†¡Cd_oáÐ×”/Ô×1˜N}¯)êë^tàD}="õu¼O¨¯{ÑWâ ¾Ž— evøæ(³c\2;€ÔÚy… µ£G¥Ö޵v¼«©µ³A.¶5HëƒvÛñšzŠí×§Ø#Äv9–‹í¾!LÉt”¬p}Qm§¹j[ý–û¿]lËüg(ÛéÆ„Ø–4s4%í2s»ÌÎfÊ앸Š2{ŘYÈì•ü2[‹Ž¾ï%i÷C[Äv^qšI®3¤¤‘¸Sf7ŒÅQfk¡u¸Ã!³¬Ð‡¤Î^àï?eö‚1¸ÙÑÈìØTö‚wL¨ìƒÔ¡²£9TÙËb;7‚À^ˆb)°ï=R[Ç©­L m=cúMhë¸.ÔÖ¹[hë°,´uœ"Åu!®ã<)®ãFQaÇ©PaÇ1©°ãчÂnÀU¡°Æ5_6žøO ƒý¸{²bá‡/…¶1Ò…vDdëüM”&n7ÓÃÍ¥LéQª™ `˜Gb‘zšG”|íÎ0ô¢¸Šyò æu{î€y¤iXqóH9`ÑyÁ¬óHLY¥yd§–§ydã(·‹nIÉÝ=R¶t÷È´Ñáíî‘R3Ñ-§ Æ3^E·þŒéa.ºë6æÑëë=÷ˆŠ$É&ºkÑTw1ä¸ê.Çté­Õu=¹Ò[[žó1Mb–£¹èVÑ'¬At×¢‰î£kˆ‰îZ4Ñ}œ¡¹è.ÎE·Šœÿh¢ûØÒg»è®Eݺª./ ºë1MtkK˜Ï\t—¢‰î£æ~Ý¥=¯Yyï]tEPì[t«†¯_ݹWˆnñNsÑ]‹&ºU¤ÄTwÝÒTwÝÒTw)ºê¶¥§!»ëF&»±‘)nÝv¼x]q§ÝŠ{Ÿf, Å­¢O ‚âVÑcn¡¸Uôy ÐÝ*–àº[EçÐÝ»ÇßÑÝåTLwë @Φ»kñÞõÏÖ5Õâ-¼kÍ”w)ºò.§àÊ»My×Ýšü®[šü®E“ߥè¼Mƒ«ó•kðZ4 ^‹¦ÁkÑ4x)º?Š>Šá¼ni¼Mƒ×¢ipÝ8PL‚—ö¸?~\©½õGÇ{ þÉÕöú7ó~I{ÿÇwÿ ³ͬ endstream endobj 3 0 obj 200498 endobj 4 0 obj [2 0 R] endobj 5 0 obj << /Resources 6 0 R /Type /Page /MediaBox [0 0 1469 828] /CropBox [0 0 1469 828] /BleedBox [0 0 1469 828] /TrimBox [0 0 1469 828] /Parent 7 0 R /Contents 4 0 R >> endobj 8 0 obj << /Type /Font /Subtype /Type1 /BaseFont /Helvetica /Encoding /WinAnsiEncoding >> endobj 7 0 obj << /Type /Pages /Count 1 /Kids [5 0 R ] >> endobj 9 0 obj << /Type /Catalog /Pages 7 0 R /Lang (x-unknown) >> endobj 6 0 obj << /Font << /F1 8 0 R >> /ProcSet [/PDF /ImageB /ImageC /Text] >> endobj xref 0 10 0000000000 65535 f 0000000015 00000 n 0000000145 00000 n 0000200717 00000 n 0000200739 00000 n 0000200762 00000 n 0000201192 00000 n 0000201061 00000 n 0000200956 00000 n 0000201119 00000 n trailer << /Root 9 0 R /Info 1 0 R /ID [<1842CAA22A38B08ED1D7F47F4F4CAB77> <1842CAA22A38B08ED1D7F47F4F4CAB77>] /Size 10 >> startxref 201273 %%EOF blis-0.6.1/docs/graphs/sup/dgemm_rrr_epyc_nt1.png000066400000000000000000005411571360743507500220070ustar00rootroot00000000000000‰PNG  IHDRâÜJ&¡ &iCCPiccH‰••gP“YÇïó<é…@B‡PC‘*%€”Z(Ò«¨@èPElˆ¸+Šˆ4EE\•"kE ‹‚tƒ,ʺqQAYpß÷?¼ÿ™{ÏoþsæÞsÏùp ˆƒeÁË{bRºÀÛÉŽÌß(ŒŸ–ÂñôtßÕ»­Ä{ºßÏù®‘iü常¼rù)‚t ìeÖÌJOYá£ËLÿÂgWX°\à2ßXáèyìKο,ú’ãëÍ]~ )úÿ†ÿsïŠT8‚ôبÈl¦OrTzV˜ ’™¶Ò —Ëô$GÅ&D~Sðÿ•ü¥Gf§¯DnrÊ&AltL:ó5204_gñÆëK!FÿÏgE_½äzØs û¾zá•tî@úÑWOm¹¯”|:îð3™ÿz¨• €è@(U  t0–À8à|AØø $ȹ`(E`8ª@-hM œà<¸®ƒÛà.L‚—@Þ‚°¢A2¤é@F²† 7È ‚B¡h( Ê€r¡PT UAuPô tºÝ„¡‡Ð84ý }„˜ÓaXÖ‡Ù0v…}áõp4œ çÀùð^¸®‡OÂðø6< á—ð"Â@”]„p$‰BÈV¤)Gê‘V¤éCî!Bdù€Â h(&Je‰rFù¡ø¨TÔVT1ª uÕêEÝC£D¨Ïh2Z­ƒ¶@óÐèhtº]ŽnD·£¯¡‡Ñ“èw †aaÌ0Θ Lf3¦sÓ†¹ŒÄL`æ°X¬ Vk…õÀ†aÓ±ØJìIì%ìvûGÄ)áŒpޏ`\.WŽkÆ]Ä á¦p xq¼:ÞïÀo—àðÝø;øIüA‚À"X| q„„ B+áaŒð†H$ª͉^ÄXâvbññqœøD%i“¸¤Ri/é8é2é!é ™LÖ Û’ƒÉéä½ä&òUòSò{1š˜žO,Bl›XµX‡ØØ+ ž¢NáP6Pr(å”3”;”Yq¼¸†8WJ)Hq¤"¥öHµJ IÍKËIÛJGJJ·IK”aÊ8ÈÄËì—é”y"‹’Õ–õ’Í’="{MvVŽ.g)Ç—+”;-÷H–×–÷–ß,L¾_~NAQÁI!E¡RáªÂ¬"CÑV1N±Lñ¢âŒMÉZ)V©Lé’Ò ¦$“ÃL`V0{™"eyegå å:åå–ŠŸJžJ›ÊU‚*[5JµLµGU¤¦¤æ®–«Ö¢öH¯ÎVQ?¤Þ§>¯ÁÒÐØ­Ñ©1Í’fñX9¬Ö˜&YÓF3U³^ó¾F‹­¯uXë®6¬m¢£]­}GÖ1Õ‰Õ9¬3¸ ½Ê|UÒªúU£º$]Žn¦n‹î¸CÏM/O¯Sš~°þ~ý>ýÏ&   ©†.†y†Ý†iñªî¯&¯v\½mu×êׯ:Æ‘ÆGŒ˜ÐLÜMv›ô˜|253˜¶šÎ˜©™…šÕ˜²élOv1û†9ÚÜÎ|›ùyó¦é§-þ²ÔµŒ·l¶œ^ÃZ¹¦aÍ„•ŠU˜U•Кij}ÔZh£lfSoóÌVÕ6¶ÑvУʼnãœä¼²3°صÛÍs-¸[¸—í{'ûBûªƒŸC•ÃSGÇhÇG‘“‰Óf§ËÎhgWçýΣ<Ÿ×Ĺ˜¹lqéu%¹ú¸V¹>sÓv¸u»Ãî.îÜÇÖª¯MZÛé—ˆÿ2Â6¢,b&Ò*²4r*Ê*ª4j:Ú*ú@ôLŒMLyÌl,7¶*öuœs\mÜ|¼Güñø¥„€„¶D\bhâ¹$jR|Ro²brvò`ŠNJAŠ0Õ"õ`ªHà*hLƒÒÖ§u¥Ó—?Åþ ÍŒ]ã™Ö™Õ™ï³ü³ÎdKd'e÷oÒÞ´gÓTŽcÎO›Q›ù›{r•swäŽoál©Û m ßÚ³Mu[þ¶ÉíNÛOì ìˆßñ[žA^iÞÛ;»óò·çOìrÚÕR V (Ým¹»öÔ±? ìY½§rÏçˆÂ[EEåE‹Åüâ[?þXñãÒÞ¨½%¦%Göaö%íÙo³ÿD©DiNéÄ÷e̲²·7¼Yn\^{ˆp(ã°Â­¢«R­r_åbULÕpµ]u[|ÍžšùÇ‡ŽØi­U¨-ªýx4öèƒ:§ºŽzúòc˜c™Çž7ø7ôýÄþ©©Q¶±¨ñÓñ¤ãÂÞ'z›Ìšššå›KZà–Œ–™“!'ïþlÿsW«nk]£­è8•qêÅ/¡¿Œœv=Ýs†}¦õ¬úÙšvZ{aÔ±©CÔÓ)ì ê<çr®§Û²»ýW½_ŸW>_}AòBÉEÂÅü‹K—r.Í]N¹<{%úÊDÏÆžÇW¯Þïõê¸æzíÆuÇëWû8}—nXÝ8Óâæ¹[ì[·Mowô›ô·ÿfò[û€é@dz;]wÍïv®¼8d3tåžý½ë÷y÷o¯ñy02*|ñ`úaÂÃ×2-<Þ>†+|"þ¤ü©üÓúßµ~oš /ŒÛ÷?óyöx‚?ñò´?'󟓟—O)M5MMŸŸqœ¹ûb݋ɗ)/f þ”ø³æ•櫳ÙþÕ/ M¾¼^ú»øÌ›ãoßöÌyÎ=}—øna¾ð½ÌûØú>|œZÈZÄ.V|ÒúÔýÙõóØRâÒÒ?B,¾“sMT cHRMz&€„ú€èu0ê`:˜pœºQ<bKGDÿÿÿ ½§“ oFFs¸NBIE] pHYsNNÆÊ/¥tIMEã1:© vpAg7O£¨u€IDATxÚìý{œõ}ï?ñcc¯=Æá¶v€1&l 0‹M ‰C;›¤%êT"'¥…¶ùJ[Ÿ49I¤–ä¤IK*µçüšâÖ¦IKrV©KÒÜwÒ”\6;Í…u0;Ü¼Øæâ1»ø†ÿþøè=3ÒjwµZíJÚý<ý؇¥ÑhnzÏg>Ÿ×ç}9åäÉ“'Ñh4F£Ñh4F£Ñh4Óʼf€F£Ñh4F£Ñh4F3hi!®§§§Ù‡0§p‡l6‹ã8SÚŽçyd³ÙfŸNË í¸µ ‚€l6;Êöµ—£í¸ý¨Å†Åþç ÚŽ[—©´ÅsÉŽµ Ï,3Ù7žKv Ú–gšÉÚrµ6Y÷G£í¸ýh;ni!ÎuÝfœ!›Íâº.¶m‡ u½A€çyÍ>¥–AÛqkÓÝÝ €eYe¶¯í¸mÇíG-6œÍfÉçóÍ>ÔCÛqë2•¶x.Ù±¶á™c¦ûÆsÉŽAÛòLR-Wk“ußx4ÚŽÛV±ãùögögÍ>!>ÿùÏãy¦i’Ïç‘ëü졇âÜsÏ X,òÕ¯~•sÏ=Ã0Âõzè!Ž;†ã8;v Ó4G­[ëz÷C=ÄâÅ‹¹ãŽ;Ø·o—\rɘËkÙÖ±cÇ(‹á¹ŽõÞ²¬š¶5Þ¹ù¾O6›å?ø¦ibÛ6¾ïsÉ%—ŒyüA°oß>‚ ŸÏcFø{ø¾ÏøC~çw~Pªó±cÇÆüÎl¢íx<[m;Ë&Ç;~×uùáHoo/—\rI¸Ýßùß©jÇàèÑ£³Þ†åºW³ãñl´ײ­±Î Æn#':·jß«-–kX,ñ<‡zˆê 4”FÙq+ô)&úl¢íMÕŽ§Úwü“m‹ç’×Û§kÓjv\ Ç·WÍn?÷¹Ï±páÂé_sÍ5ãžÛdûÆsÅŽåÚëþq{ŒóÆj“¯¹æš9=Æ“ë®ûǭ߯Ï&Ó?†™µã–ñˆ ‚€žž\×Å÷}’ÉdÙç===xž‡ïûtww‡J~2™Äqœðûq×Ùd2ª÷Éd2܆çy¡i­ë‡çy¤Ói’ÉdøÃe³Ù1—ײ­žž|ߟðýDÛªåÜ\×Ų,|ßUýL&3îñ˶åw’ýTâ8étÃ0jþN;Ó®v<ÑoÝv<–}wü¦i†Çê^0MsÔ1ˆ?þøã³Þ†a|;ˆåzh;{[ÛxmäTÛãx[,¿q6›¥P(4ÛܦFÚq+ô)&úl¢íMÕŽ§Úwü“m‹çŠ×Û§ëÞŠv\ Ç·WÍnï¿ÿþëOtn“íËï<›íXÎQ÷ÛgœWK›<ׯx ûÇ•Ûkå~ÅDßšjÇ'[„\.w2‘H„ï …ÂI9¼B¡pÒ¶íð³L&sҶ퓽½½'-Ë —ž4 ãäÉ“'Oöõõ…¯å;™L&|/Û®u½ñèëë; œ£j¹®íJ#í¸ú}6Ñö¦jÇSm‹':þÉ´ÅÂl·ãzú'O¶¶×cÃñíU³Û™ìOtn“íŸ<9ûíøäIÝ?®Ü^«óä{•mò\ã<©ûÇ•Ûkõ~ÅdûÇ3mÇ ¦Oâ›®ë’H$Â÷¶m‡¯}ß/si´m;TJ Ã(S*ƒ _çΧÖõÆÃ¶íª.¡c-Ÿèxâ³½oĹAÀàà @¨@÷ööŽ{ü–e…Ÿ™¦‰eYáL‹ü>¦i–}¬ïÄïv¦íx¼ßº]츚}íÚµkÌãw]—b±Èàà ¦iâ8Éd’þþ~ ºÏv–ëR͎Dza¹VÚŽ§~n㵫“mwíÚUÕ†óù<–eÍ*›­F£í¸ú}6Þ15ÂŽ§ÒgÇõ´ÅsÁŽëéSÈõje;®Ç†å˜ªÙ­\‹™è§R©qÏa²}ã¹`Ç ûÇ•ÇÓêã¼±Úä\.7gÇxr]tÿ8:žVîWŒ÷½±úÇãíg:ì¸eBS'óãÇWbÝ寯¯¯Ù§Òðk1Ñû©bšæ¨'~k¥òw9xð À¸‰gëÙO+£íxìk1Ýv\‰ìK¨bÿ©Tª,yg-v<Ûljÿm*Ï]Ûqã©×¾ä{ÕlØu]òù<§œr §œr §œrʬK8¬íxìkÑjv\O[<ì¸Þ>Ì=;n×¾ñ\°cÐýãñ®E+ŽóÆk“çêt¿b¼kÑjýЉ¾× vÜ2BœeY‹Åð}¼´²iše²¸ú) ¥(¡étºÙ§ÒvHÒN1´±r³Tây^øy-j±Ü|…B|>Ƈ÷Ù€¶ãæR;®¸âŠ1×—|ñïWëØ‹ïÛ·oÖÛ0ŒmÇcÙ0h;nõÚ×X¶_­-îëëãäÉ“áÀÉ“'µ£í¸QLÖŽ'ÛÏ;®§OÚŽÁLõ炃î7“zly¼6y®Žñ@÷+šÉLô§²Ÿzh™ÐÔT*E±X¤»»Ã0Ê:`¢ÂÇË(¡A¯]»6tMÏårÍ>•¶Ã4M‰ÝÝÝ“ºŽ†aÐÓÓ6ÖÕ¾cY©TŠl6K*•ªé;팶ãæ2YûÊd2¸®þ^c]{±ãÏ|æ3³Þ†al;ˆAÛq£¨×¾&ú^¼-îíímöiÎÚŽ›Çt·ÅsÅŽëéS€¶ãF0S}ã¹`Ç ûÇͤ[®¥Mžkc<ÐýŠf2ýã™¶ãSNÊôK‹Ÿ‚ («¸!1½R]HÔÊxL¯fòø¾ƸOtÅ•¾··7Œ¯žhv¥žï´+ÚŽgž©ØW­×~.Ù°œ/Dv<‘ Ë2mÇõQ¯}Í5»œ,ÚŽg–™h‹çõô)d™¾žScºûÆs Ý?n“±eA÷Ç>_ÐýŠ™b¦úÇ3mÇ-ã'Tsý“R¿b̎㔩“H(;¾ï—¹PW2Y¥´QÛkôqÕcl†aLÚ]³žï´ÚŽ›gÇõØ×d¯ý\°amÇÙ0´—·ê¶êµ¯¹b—“¥Õì¸UÛâFok&Úâ¹B=} ˜›vÜ®}㹂î·—-ëþqut¿¢}úõ|o¦ì¸å<âÆÂó¼0&[\<5ÍÃ÷}\× «GM×wfÚŽ§™°/mÃÚ†§“zíKÛåäÑv<}h{œ9´·ÚöëGÛr{£m_Ûðt2Sýã™¶ã¶â4F£Ñh4F£Ñh4šv¦eª¦j4F£Ñh4F£Ñh4³™–ËðÑ~”3Ï<³Ù‡ÁÃ?ÌE]ÔÔcadd„sÏ=·©ÇñÔSO±lÙ2–-[ÖÔãxøá‡ÙºukS¡V¶lÙ¢í§D+ÙO³€§Ÿ~š?ÿó?oöaLˆçy|éK_ÒöS¢Uì§Žcdd„ÎÎNn¹å–f_Ž ùÜç>ÇÐÐPÓí§UìXÇèãøÍßüÍ–ÏO§ûÆú8&:Ý?®Ý?.§Uì¸]úÇßüæ7Ù¾};—^zi³¥%hûiöïßÏâÅ‹ùÈG>2áº-)Ä=ñļýíooöapÿý÷³~ýú¦Ãîݻٽ{wÓãž{³“‹/¾¸©Çqÿý÷7uÿ“áàÁƒMÿÝ´ý”Ó ÷4À¶mÛš}51<< Ðôk¦í§õŽc÷îÝ 5ûRÔÄÐÐPKØO«Ø±>ŽÑÇ!m]+£ûÆú8&:ŽvA÷#Z¥l;n—þñO<ÑvÜ*´Šý´}}}<ôÐC5­Û’BÜ駟Ά š}¼óïlúq¬^½š‹/¾¸éÇ!ÇÒÙÙÙÔcX¾|y³/Ã¤ŽµÙ¿›¶ŸrZážå¡Ó,Y²„sÏ=·%®™¶ŸÖ;Žv/–-[¦ÛA}c²sçN–,YÒìK1!ºo¬cã8xž‡ëºÍ>­¶ÃÜØŸSZ4ûÀ4F£Ñh4Íœ%N—5­‡Î§Ñ´ Ž£¤ž|>a˜¦‰ã8$ ŠÅ"†a„ÿ›¦YUdK$†ïû‹E|ß§¯¯Ó4›}šUñ}ß÷1 ˲FŽÀ ”(&Tž‰ÌÌØ÷ "ñÌ<ÊÅ4«´ÌmC¾gÇö PŒ­—(­“ˆm×nö…Ôh4F£Ñh4mM6›%•J…c½øÉu]<Ï#™LbÛ6®ë’H$‚Ó4I$tww“H$Èd2ø¾mÛáX2‘HÍf±,‹D"ïû˜¦I>Ÿ§X,Ò××GOO¶m“ËåÈçó$‰ðóL&¨±h*•ÂuÝpŒ)ûJ§ÓeÇ•Édftê—þdT)’¥ŒóÜØ{š¥õƒØçj|'ãÇÎ=—5‹Õt ZˆÓhZÇq°m›|>mÛd2R© aß÷I$$‰°á*lÛÆ¶m‚ À0Œ°Ž“Ífñ<¯a  ˆfq<Ã0BP‚eY¸®ІçǼ+¯dß¾}¬Z·Ž‘‹.bÅðX?ݦ †Q&¢™¥×OŸ8ÁùÏ=Çi§ÆóóæáwtðÓÇ9åÐ!žY¶Œßïèàˇ³ì™g¸hd„/[Æyð.ݺ•ù_ý*GŽð§óæñz %ç3YDŽ"zù¥eÊáAÖ§´NÜI"á̈½Q-]ZߎYÚ8xX@( Ê(½7JÇ!Ëlààèi§Õôk!N£iQÇÁ÷}Ç E¬\.W¶Nå{Á4ÍQ¢ZÜS®˲H&“X–…iš‹Å²Æ_Ä>ñ²Ëf³eû¡ Ô,Šˆ‡ÒpËâ÷þ×ÿâ'÷ÞK*•²,]»–Ÿ,\È·žzŠU™ ‰B"0 ¼qÿ~Þ¸b§vªÛc|á}ïc߈išAA¦©f!‚€#¦ÉPàû>畼w‹Ø¶Í’ ïy†Á¥¶Íî’`ø¥Ï7Z†aðóÿø~xÙ,¶m«ó¶íP`t]7›ûn¹·«‹#GŽ0ÿ›ßÄß´‰ßG‰XÝ€ñÏÿz‰ñÇÌAà½(ÌÞ5<ÌïwtΦMœ¸÷^®8x“çŸO²t.â™÷D+ÙØ{Ë÷)š&&‘S†x6p÷ÝwsåM7a¢„3ˆ„6‚€îînúûû«ŽAs±kl3:ºI„C<,‹eO=ÅÎ矯éwÖBœFÓ‚ˆgÛ6}}}Óê®kYVYƒ](ð}¿LŒs]—Ÿ qßð0ýýýüìÈ–¼öµìÚµ‹_úÜçØ}àgŸ}6w=ú(ï}ï{Ù><Œ¹z5¦aà5|2» 3 «K/½TíÈ”Öáì³G§½nïý—gvÄ›..ŠÉ5”¼y™LfÌFϽQˆ+¶_>ŸW¿ßsÏaš&ÿ÷ýè´ýžF£Ñh4¦ýðÎl6K$‰P0ЇЯqÛ0èëëã«Çޱwoïê¶…ŽŠk1¥÷ñ˜üz‘ÜxÀ(NΩÙ9ïä¸*=¿ûÝï6õ¸4F£Ñh4š™¦2äR£ÐSÉÕ688ˆWŠâ‰g,ËRNDÞYY”¨–@9ÞÒr€~¢0J/ö¾rôäþ¢Eܹf Ÿªø¼2+AÒ9Y$ÊÈ0Œ0¨ˆ°T*Å}÷ÝÇý÷ß¾¥óO§Ó|ãßà¿þë¿èííåÜsÏåw÷wɘ&ßûò—y~Þ<6>ý4_]º”yÇóà«^Åwßõ.þ´ôý×¾öµ|ò“ŸäøûßÏ[_z‰kÞðžýÞ÷øÊÏ~Æ#Ï=ÇkÎ<“yçžË¿þë¿’äÚk¯åío;wÜq_øÂ° ƒ]‡ñôÒ¥Üþ¶·ñ{ýý¬Y±‚Ÿüä'ÊÄu¹çž{X²dIM×@ quAÕ<[ÍT‘¼i'Ožœ‘ýDÕ>Å[Í!r —™Ö`<ë¯(A\”Óh4F£Ñh4퉤k ‚ |o—Ò¹8Ž~&y«%™ä¶–ÏŠÅ"O?ýt³O§id³Ù0‡›Lä—åˆ#ò:Ë£D6Ÿ¹(qM%úJËû+ö1‘pÖÈ¢rÙl–äÊ•\qð ‹¡˜vôèQ_s OÞ{/‹Î8ã«_eß¾}:tH}1æþûïç~×å¿~í×øæ?ÈÙßÿ>‹-¢çèQ~éæ›Ù²e GŽák_ûŸ¸ôRö-^ÌM¿ú«,î9N½í6þì}ïãôÓOç=?ÿ9ß{ÿû¹ãæ›Ëœ6²?Î;¾þu~òýï‡) >sðx ›åK[·ò³ŸýŒ5kÖP,ùç{ïÅ÷}>jY\qÅÜöÙÏrÇwð¿ÿ÷ÿæÒK/eÑ¢Eüüç?'‘H°¦$dNÄœâ<Ï ˆÊˆ®ë’ÍfI$a‚{I6Žãày^è"Úèp6ÍÜfíÚµ˜¦ÙP/¸¸h&qù"¼I2ËTi"QrÊÔ8ÛAN£Ñh4F£Ñ4—x>ézˆwóù|˜Ü_¼–$/³¼Íq‰é´Jy/b(Ϭ\.Šq’ì_RîHØ¥eYø¾O¡P೟ýl³/åŒ Âc*•"›Í†ÚD.—+»†‚‹ÙúQmE`°´Ì`lámZ(y¨ ¼ÿý °éÚk!ŸWB[oo(¸e2æ=öGî½—§¿ô%Î_¾œëÓiþýª«ØhÛ¸žÇׇ—_~™;vðÉŽ¾r×]ܾ`ÖEñÀßýǯ¿žM›7«ýÛ6Ÿ8ã ‰\ptï½£<,ߺzµºŽ†Á–;ïu ¹\.ŒVªŒ“c·m› .¸€O}êSárqÂúÎw¾C>Ÿç·û·¹úê«ÃJ°;vì`g©0ÅDÌ)!.(%åuåÆ÷}?¬ "î ¾ïãº.Žã„ß—äì¢VK‚?AJóÆ+DŽuR®·qˬj'ù®â³ •Ÿ†çy8ŽC¡P7/Öt!^]¦i–í¿Z<»F! 8Ç*¾0Y$'[‘¨jŒE4KBl™F£Ñh4F£im\×ÅuÝp¬%až¦iây^˜›X>“Jš®ë’ËåðN§‚o.Qþ6)­¬½Ø4F£Ñh4šÖDƇâA&c¦žžžp<(Aooo8ö”q”ã8¤Óé²1f|\˜L&ñ}?Óù¾ŽÑz{{ñ®xdçÌ1–Ð!Ûh†#H;ñ¥]»¸ê¿þ‹üoüFUÇ™¸\©$r¦¤’×õ’ÍfÃÂr!ž§þ ƒ÷wüÍÞPæ$òÔ§>ÅïýÅ_P$ŸÏSøô§ÃÏÄKRlØ0Œ1£¼DøÁNB9¦4̱'Ôˆæ•~>ʽXŠ¥?QF•ßYÉkúZØ2ñfg¥—N§C1KÞ‹&¥~{zzB¯4Û¶G¹% …qÝ{ …Éd24´ƒŽ:ñŒÑIJ [–E:&N‡ûƒ* ¡@%ß—upp0Ü~Ü«..ÎÅ?O$¡p–L&ÃcÌçó¡w Ìœ$“ÉЋ϶m<Ïã”SN 29V¹1$îßu]2™ ®ëŽ*ÿ›ÍfCñÎ÷ýPÔô<|>eYá9ÉìŽ\«øqÆU~ûÛßn¶y5”j3!µâ lñRνL­aÖh4F£Ñh4#îïó‹sˆ8WÈXHÆO©T*kŰJ1,‘HÏçI$áØBœP„Z¢“´ˆ6ýô¯XÁñ«®¥E”>”Dé䬷0B5BÌuUh©a¨°SÛßçƒÏ<Ã%ÛÉf³X–E"‘ oãF@ibÓ¹\.Ô4¤ªëxÌT1·ô×÷¢j™Šu+š{¥÷yÔ@;¾Ì,}§ôãœñŸg°ëŠ]5Ö¬âÖ®]ÆéZ–EwwwÓ>886>â!wá3QŒ½¸çJ‚ÊJr¹\YCVÙXŠV6†o/ÞXÇ}­Ö`ÊŒ‰ìWóøù»®;¦–mÛãÞ$rþ===áú•Ç!›ì'îÉWy^ÕŽU~Yîºî¬â$z2=¨Ýf®fªÑh4F£Ñ´’âHÆ^"¤uww‡©dŒ#ázãœ(ÜÎ0ŒÐÁAÆY•c9"¨5± 4þ½FÎHHx§„f2™ºzN†D"1éýȱzžWö]Ó4yæ™g¦õxgÚŽÅ °æõQ³#"¼Qmƒá4qƲã™h‹5šFÐì>…FÓ´kf º_Q‰”ÏçâÒ×§ƒL&ŽqÄãMho4qjGšiÇ>-åÞ{åW~%,šg¢Æs*ÚI¼á`jpÙl6ò¾,E›ù¥(;I±¹’l1ŒÐÉHtŽ FU{zzBñ¸Q6+¡­“Êïæ¡±‰¾ D¸¸p'†\!¤á—¶çU/ô*Öë¡\ÔƒPì{é/^ªùðç5â sóÍ7ÐÕÕ†+ Ùl–¡¡!ºººÂÊ Fò½‰è&9ÉR©¹\nZf¤1mô¶ED¬æiW/r“Ä+à4É_×Hâ×3~ ¦{g¦íXòÞÕò;¨¶#.Âj gعWÓâŒgÇ3Ñk4S¥úÍTÑv¬™-ÌÅ~…¤ªuÝd2IOOO˜š(‘HÐÛÛK&“¡···,ò'µ4™ÉøFãéY””½'¢¥,³–ä!kßj4ÚË,ÎD•H&ƒÜ¤3%Ľå-o™¶mϤËlD-¢hÕ~äÐ"œfbƲãén‹5šFÑÌ>…FÓ(´kf ³¹_!yÜV®\IOOO˜R'“ÉÐ××G"‘½£|ß'™Lâº.ét:,tW(&Âù(!Ì#JUå¢u’(}amìÿli¹[zŸGiñüñ}À j2_Š>ö–þúˆÆ”‘CiòY/QîzÑ-ââ]»ÐL;ö€…O>‰™JñÓ³ÎâêÓN …PqÂuý§<Ö3 ö]r ~é¥8–…oÛ¤Óéðãb±H2™wÓáh4’>«ê¾eœ&r#”„é}ïÍÒ•# ÜÔÍR©³¦P†oVYwšd™†xÄuvvÒÙÙ ÀÐЙL†[o½PF¾zõê²u'âé§ŸfëÖ­lÚ´iÂõ¥ògeÂûx…ÑvCJR7©ÀÓxžWæ<44ÄöíÛ9vìØ´í³Ñvüä“O²uëÖpæ%Žq!ÕrÚiÆI36bÇO?][ÏzËŽë±ágŸ}ÏóÆ´cÍÜdÇŽÜsÏ=Ó¶ýF·ÅO=õ÷ÜsCCCe9º4š­[·N›Wÿtõu.MéW<ùä“Ó¶Fö+`üþñL!yÜ<Ï+ó ªÔ,Ë¢§§Û¶ñ<\.7iÑMy›(匌ŠDé©DžR20P:¬o”Ö—F‹ò‚0vêšFùí¥¨-=W-ìØ±ƒ;w¶]ÿxûöí5õ+<ßç¡!Ü5kpQ¢§eð)¯ P+Ùlv´sMÉÁã<”=d³Y Ã(K‹ËåÈf³µïh:×Mbžw§ ”ãÊDz]:©”øÍ¡Œ1M¤&Kï3¥‹¿TÊÝÇ#UZ·Cß¾};<ð@ͧßðb Û·oç¶Ûn›’;çé§ŸÎúõëY¶lÙ¸ë‰{°eY )¾Ð*Hî€F2^…ש2Q‰âzŽ5ŸÏ—]ƒeË–±~ýzî¿ÿþ†œFÚqGGëׯ¯ú™ëºvü³D¶šÙØq=ÜÚ©fÇ[·nôv–,YÂ9çœ3¦kæ&«W¯æâ‹/fh¨Fßû:hd[¼lÙ2:;;Ëf¼5P¡IO=õK–,™–íOGß8>`ÔhšÙ?®§_ã÷§Ïó(‹¡çMooï¸ÑS]5^K‘( Ôªò™x¦AäéQ’~™pŸì¤»ˆsÍÂbt*®z‘v­Ýúǵö+~zø0Žap‘@jœç±å+_σR¾6q¸ ‚ èäÿJodt\mÇK¤( R™"–nîúÎë¡«b½xIYeô£ÃEmÔà9žH]\+o”ÉÜ8“HÆÞÕÕÅÈÈHÍýㆠq›7o¦««‹/ùËttt”ÐöíÛÃ÷Ÿ=§Ÿ~ú¸³}===ôöö†•5šá>©Q˜¦‰ã8Ó.„vtt°aÖ/_>­ûi¤/_¾|L;y,²¨6«ÍlBìX*6MÕì¸^²d çž{®ö¾Ð”ÑÙÙÉÅ_<­9;Ù/[¶Œ‹/¾X qšQlذ;wN›7“}cÍܤ™ýãzìÆïO'žç……ý¤àÞDTŠp"¶9Da™’£-Ey¨¦äekß­ñ‘|õ@¼ÕÚ­\K¿"žyõ«¹~ùrLÊ5 “Iz–<ÙŽík¸õV:?üᲉA„UM]×U™·aøŒ.b0ÉÒzý߯¨zúŽÅïP¯ãÊveQ…Ê+®•:nŠU«»ºº®¹Ü!nûöíttt”Å]Çh``€ááa:::p]wÊÞ^~©¢G¼ê‡fbí¹‘ÂÞ®¹øâ̤»®;¦x)³gÓ;'¤™­ŒeÇÓÑk4ÓÁL÷)4šé@Û±f¶Ðîý 3æóùIUˆ¢SD pRÞ ô¤y»Ð,;þÜW¾Â«¯¾–/%ÊíE9&®«<àR)† ƒ¾“'9‹v‹³x¼™¦9}yÜ]F‡|ŽE"¶¾Ž»y¸åø-øGýò„yF+•FôÉx«µ" ↆ†Ø±cÇ(Ïóèììä–[náæ›ofÆ ¸®Ë]wÝ5¥ýù¶M±XÄqœ™/ŸÛÆHâF3ÛlÆÃ|¦ìxÌD”%²”yéj4“b<;nt[¬ÑL3ݧÐh¦mÇšÙB;÷+¤j"‘ —ËM8¶pJâè#yÞ%nÈQ§ý]¦Žˆ”3S²oj4ËŽÝ à’Å‹«~V³—Í‚ã¨0TËRá̾ã8aê)ñÜô}?k6L„«ö#ÛŒ®Ö᡼ß*ÇJ8Ëå\’„ˆ¨ÿWþîJ:^· Ê;¥Z~¶Y’·©!BÜ–-[ÆM¸yë­·ÒÓÓÃÞ½{Ù²eKMnËc!ÕDwîÙ3ת½1 Ïó.nIu¡F“J¥f\ˆ›);vgÌssPmM;<Ð4­ÉxvÜȶX£™.f²O¡ÑLÚŽ5³…vîWd³Y2™Ì¸^pÝ”‹n’ZÊ ÜÙG¶Ðz>ÍÃFi%íà˜4“v,¶d¢ô¦·œqÆÔ>—S…b"¢išcæzk¸\õ#Ç•CQ¨+O\BN+UÆ *×[ucÅÑ„Sï9•oZQþñ¤›¥4,GÜDÄ+•LÏóÀ¶y°”Ì_燫éºNÓµÝV-¾Ñ;+,UòIjwÍtÒ¨¶X£i&ÚŽ5³mÇšÙ@+Ú±DNù¾_µÏEï ”˜ä—þ·™„—’†QaÊv§‘v,E9ú€}‹óöÓN›úF- ×u ‚€b±8¦ÓÊ´„£Je‘øÍуʣ0:‡[|]·ô—Cy²­ELÅ€×0 øi•}K™ÙYÈŒ qB<â@‰5³!7ÙLÑßß?}qâššë70xý i4F£Ñh4“Ç÷}²Ù,ýýýá8±HTáSò½õ¡ÃK§Šx|iFã¢lqÞ•WN^ÜÍçyòª«X³qc¸È÷ýО{{{K«Í€c’GT‘$ŽR\mq%»»´LŠ.Äo´ꆬ5ÅÿlPyÇ`^³`²xž¾Ö¢Òä°,K —Mf¬‚íãÚ­Ñh4F£Ñ´étšd2I.— ÇfÒH¥ßJЍ6Nw 4F£Ñh4šÉQ,ñ<þþþp™ò„K CN§ qŒÒî1âuyçYgñÚãÇkÿbO$ |üãüÁ‘#eI>8×uq]wÌüp5“E‰\ñÎ'ŠÓ,¢ê$I”š-'Hi] G•Š&qñ,¨²ïl‘6ôˆ;ë⋸袋š}(MCIY,øk4F£Ñh4Ó‚„£ |Túªµ(GÐ'ӉɬΧ_76°ûÒKY?~m_p°mÈåXôË¿FþyžG±X W“ЍSÆC‰g•Œå©–@ VÓŒ×ìÒö*ë7&)í„Zˆ£Í„8×uCO¸|¾VF¦uˆç8]ñ†Ó3IF£Ñh4Míø¾O2™ Ãôò¨~õ Êy'‡Ž8™N¤rª¦œã_û¿Þyµ}¡X„Œ’ŒMÓ$Ç!‚²ÔR™L†D"QÛ6Ç# Œ Õ⌥² (!®—ÑUQ­Ò²jŠweŽ8MH[…¦~æÂ yqáÂf†FS7žçaš&>*åITÛ¦gê4F£Ñh4šÉqÇç?•ËáÚ6JÐQ&3‡‰ã\´““ð£`×.¾|ùå˜Ubõ}H§Á4Õ婌¦-'~5!NªNôcV«.˜)m3.ºåPª´•GÜO.äÁõëi€¬Ñ4 Ó4Ù£"£'4F£Ñh4ÍØA@:æÓï|'‹¯¹†Ðš}`s“ÑšÎ\æ…… yÃáÃ\?‘àºJA¡[ä¦i–ªl(ƒ¥¿8’P1k\9PõJŸW q©*ËÌ*ûЄ´•GÜÑ£GY°`áÓ´=j2!r›×h4F£Ñh4µá8Þô&nyíkùT³fŽc¡ÃSã=z´ö•S©*‹Ô²t:aSË '¹àrã|êGL ¨ÙÒÿâ÷r3'Øž¦fÚÊ#ng'õã7û04šº‘ÐTâ ´7œF£Ñh4FS+:Ä'Ï;]ɤCm$W¿F@C *†1õê¨ÕBP=”Ø&$Kÿ§PƒÓÞØg•9ã ”ë©fÊ´•GÜ©{÷òÖU«pš} šQT«–c¡î}É×`P½-ÏâÛÈ îû"ÕÅøv% Ó ó)øSÝ F£Ñh4F3Gð<_Ù³‡«Ï9‡¯ëÜá-ƒ‰óiaŽ-^\V`a²ôôôÐ××7uÊÚ2¨*>—ünRá4®!V;Ù0(o&ↇ‡éèèhöq*‚sýúfŬÀ%Æ@ÝwqQHЦT›Ýð‰r˜¨{7OyNG?ö—ªØŽ´0Z˜“Ê;ÅØrè<`]³/Ü ‚ µ·Ðy,4F£Ñh4šZ)‹ü __µªÙ‡¢‰‘C¥ÜÑB¬8xpü|?Ê WAôõ50q‘„_I8”‡‚úÑÜÒ_\„ëF‹nÓH(ÄíØ±ƒíÛ·së­·ÒÕÕÅÀÀ›7ofxx˜ÎÎNòù<]]]M;PÏóX¼xq³¯WÓÁJ¼ÌD4‘ªRT«öú ªŒWŽßkv•}UË»'‰á2PwjßsKÇo†V;?ñ ¶MßežV¤›è6M£Ñh4F£™ :Äïx>ýôfЦqâÐ(G¦q«:TxÌù¾O:Ʋ¬Æx "Œ÷ãŒÞ¦«dN †††Ø¼y3·Þz+¥ê™LÛ¶¹õÖ[éëë#“Éð•¯|¥iêº.‹ßùÎf_¯†#‚—Oä¥f¢î‡"Ñ=ã%÷7‰9«âs¨žw¬'¶^-aݵÞs¹Òq6òo»ÊöÂv?û,´é ˜çyX–E€.ï­Ñh4F£ÑÔŠüÊK/ñÎW^áÏæ°sF+#c¸¹œÿZ"ºÆâ\ªx½% R©Ùl¶qbœ„¤Æ«iHÞ¨ø•gt¨–ÝšV¡'Ü–-[%Ì ±eË:::¸õÖ[Ù¾};MóŠûñg°råÊð}»)îjiy{ÉyTz§IîÄ­/BKhj£·÷ž/î‰'àâ‹›}ÚuáûêW÷hü5Óh4F£Ñhf+I`Ùïÿ>Ÿÿò—›}(š1ðÔ¹œË?:öì+®¨¾B±–5Ê#Î4MR©¾ï/âM%.Hž)A¼x¦i·šê,à–[n öõõÑÕÕU–nõêÕ 7õ`× CIŒkU!.@ ÊB±´LB±]õCuÏ(] X…¤V†oZÀcO<ÑìC«ß÷±m›":4µQˆ—¡F£Ñh4fvâ«|ËÏ>»Ù‡¢É^©óÌ5öïß_ýƒ €l¶ª7œŒiÇÁ¶§!vª@äŒñù\þÑšÀ<€ÎÎNÂ…lذ¡lŽ{÷6ûX[¥þ¯$Êi(ÂÛ *7[/*ßZ®ôeŽDM9ÕDù°v×®fZÝø¾Ï ê6.†çy£ÞA0ê}2™Ä÷}ŠEõñ}Ÿ Èçóø¾O6›-ÛN±X ?×h4F£Ñ´/·{7¾þõÆæÎÒL ÕÓÍ%ÎK0N§!“ 7ß÷q€T*U¿“AµÜN’œÜB ¢”Vî"^MQ3#,èêêâÎ;ïdõêÕ€ÊÇv×]w…+mß¾‘‘‘Qâ\5¶oßΆ Â\s²lhh(|ßÙÙɦM›&u {öìáÒ·¿ˆ ÌRýÓ#jX”­ö¡<àr´~õ\o'ÃtØq,ݸqÖæMð}ÏóH$T@µçyxžGªTÈó<ŠÅb؉aÍ0ŒÐ;N‡nÙ¹\Žb±H*•"“ÉApàïðÅï~‘uïZG¯ÙË¡Sñûçý>{þaóß:³h²ækðM?¼îù|ž\&‡ø˜"…š¨™!Qĥꨛ%Qú\’2fQ7º$tL•>Õ8Yú¾Rçe]IæØ]úŽï¾÷Ý3ò›TÚq#Úbf&™®>…F3“h;ÖÌšÕ¯øþ‘#lÿí߯0´RÐêT¦#kE¦ÓŽ]`ÁÈÈè‚<z{G}dš&…B!|]7rá+õêøàÓ)ý¯o¥¦³`Ó¦M ‘ɨQ¨TN¸ùæ›â¶Ûn›pclݺ•ÎÎβΆëºSÎ-wbéÒP]ž.»ñQ¶)¢Dá¤NÝK$"ŸRZǤõs¹ijgºìØ÷ýYQ1U1yP¸®‹iš†z³ù¾Ï±‡ŽqùË—ÐÝÝMoo/©TŠÏýÏÏqßñûø“?ørÅïßÿ~Î9íúûûÁßöù7ÿÿþkÿ% ,Ãbûg·ó¡‡>ÄÂw-VdV°âÁ¸¸ báGrÕï^Yµþ ÷¾À.~C"øZÀ‰5'XøäB†?3LGÐÁðæa\Óåê]Íâ·-¦ãò^þâËŒt°Â[Í,9D®Üñ„ŽýD‰“¥ÏDÜK%CM>üžxÕô‡XW³ãF´ÅÍL1} f¦Ðv¬™-4«_ÑŒ¼êUl\³¦Ù—@S6Q¾óVdºíøÐ¡C*G\%® ‰ÑŠŒ&-2§íTË@Ócf=š4c²@^lÙ²%,ÖgË–-£òÅUcëÖ­ TÍ#·cǶmÛ6¥¹è¢†…ô¨ñr€Ö$g—8Ä$ˆ*™zŒ¶qƒ·Sh©‡¬é´cÓ4Û¢bª•d¡ò¿„‰Jþ‚B¡–Ù6=“®zT*Å^÷>;ÿ³|톯±xñbÖ®]‹×ã±"½zÁøšÁñÌq–Ÿ¿2ð7Î߀G~T@ÌA“÷¿«'yuƒö«×›ú7)á+MäbÛ²ÕÍüÒò,`ÁÒ_,å†â ÐK:¸o€^ùC6Þ¸ÿŸâw‹lÚ´‰Ã¿˜o^ýMnÙs ·½÷62+2x /éG iyT¼9±ý›D E–H™§€È¨ãălÿïfú¼âƲãF´ÅÍL0Ý} f&Ðv¬™-4«_á»âÂÇ-ĵ ½ÖjQ@3aLJbݺu£?0MU¤¡ I‡¨šqPc‹ÊA~¥Ã]¢Ê{&×,˜h…ZÂQPÄ«Œi¦³³3tù¬ÇÕ3 <ùk¿6e¯³"jŒl¢ÆÃQ´˜Çè*¥&³GÀjw/¬™b:íZ»bªã8álŒo†aàyë¾»ŽÄÕÝñʯPx´ÀÚµk) ضÍ×às~Žoü0o~ó›ù­ã¿ûúR}¬°W¨°Œ„AŠTÓíEXœZ¬nJ¹ ]”ø%¡ž&ÑÃ%Aù jÅ–Ë÷ãnÙ±e]¥Ëáò½<\ÍÜg–VË…×$—Ë‘ÍfÉd2äÉó»þ€óóË·o(Fç_’Û§÷·¬fDzaf&˜î¶X£™ ´kf ÍêW¸ÀâÏžÞ|s³/fXDÙ[Z‰™°ã=\zôh•‹íÓu]òù<}}}“àâx”AªaHU¼·-ÖišB(Ämݺ•;ï¼P9ãn»í¶†¸h 044¶y5ï;áðáÃìØ±#ôÄ €…ÿüÏï{_]ÇPçSD‘ãöšªk‹Ú³sff``€çŸ¾)û¯ÇŽŸþyvìØQ&fû´Ž(êºj:Ä÷}|ßó°IÈ©åY8i‡L†ßö Ã3Ì¥oº”yÍO…‘yL8ëðY|øÓ óŽNGàý`ö˜Q¥’¢Xn©££\4“×ñ‡„<£âOôÉ<Ý«)ê5(ûñ| †a`Yç¿þüðÚɵ’JGåu;>|øpÃ~ÇZ©Ç†9ÂSO=5ÊŽ5s›¡¡!vïÞÍHµ(ÓL=v<22ÂîÝ»éèèС€š2vìØÁSO=Å‘#Gft¿Sé¯^½º,ÄU3·ifÿ¸;†êýã±Ø><̆‘®Í°Q+µ244ÄÞ½{Û®<000ª_qâÄ ÎŽå›«z}l»þ‚ ‚¤¾)Û0J½.EêTsMl5…t–0000©þñP‘íÛ·sÛm·ÑÙÙŠr¨8ØÕÕÅ]wÝé† ¸þúë'ìlìܹ“Õ«W‡!±×|õ«P‡'‘d ”×èf< ½¼æÚÕudd„;wV ï˜ ê±ãááavîÜɆ "ᦠǞÏçÃ2Ø–aá}\ÃŶm|ß'‘H`8gþäLŽ¿é8»ùcüï³ÿ7ï~Û:T'ÈAÝLE#*R 'ÖÒz‰¼ØÄ« ʽÅÚ¨ð•œBqN*¿ÖRj\ì¸zløÈ‘#ìÛ·/´cTõÝ»w7eßõرqË–-ÓBœ¦Œ;w²oß¾â¦Ò7´§ ifÿ¸;†òþñxä|ŸõóçÏø¹i¦† ¤ÎÙDìÝ»·-ûÇÕ„¸'.¬©àB:&“ÉÍféëë›pýQØŒXæPâD‘¨²d–ñ£u4 C„¸ZYª#bÛvè†ÙÑÑÁÍ rîèè(Ë/WKçáÌ3Ïœ°¯…<Êã:@£™ É÷ÛÎÎN¶lÙÂÀÀ@Sö_¯Y³¦ÌŽ;¯¿ž™íê+‰†aÍfùàåäÌ?=k§Åƒ›ä­ïz+7æo¤¸¶H>™Ç6m:ÍN‰ÇÍãw)ñÍ´MeèÊ»ÍÖƒ(7?yJ¯ "/¶YxƒÈÖ4ÍPœ+‹xž~&â]œfÚq=6¼jÕ*,ËjH{¬™=ÈàID™¤;>÷ÜsY¿~½“5£Ø²e [·neÕªU3ºßfö5³‹vëWÀèþq5Ô|nÇ>€ý×=ã禙:½Dóó±aÃ6lØÐ6v,ýãj!¬+-Äļ:‹Å"‰D˲ÈåêôLÈ¡nnHXªAy‚{íý6clÚ´‰ÎÎΚûÇóäEÜ9c|çw²yóæðýŽ;&=“WW¥L‡œ`Q=»•™…šÈŒÐ;>¶xñ´zÄI1…l6‹ëº8Žƒã8aXe.—ã¡;bù¯,Ç¢ëæ.Nÿúé V|z¹¾¶aÓµ¥ ˲00Ô ”'*u-"D³+9¢‡€Åœ52˲H¥RáƒU~€l6Áh°a¦Ùh;ÖÌ´kfÓiÇyàòo}‹õëÖM=„OÓ¤–Y«§qj¤¨ÐÔ2<Ò庆a„áÖS²o—H(ñˆÒþÈgm4ט°XÃTÙ´i®ëróÍ7ÓÑÑÁÞ½{'òzÎÑ£“2R夣½0G£eõÑ;~|Ý:^?Ç(Ådfå[¯ÿ‹ß±˜l6Ëu_¹ŽUºŠn¸A itY]ð10=3r_öˆò äQ7QüFŠG_¶SLö Ÿý², Ã0Â’ä¦ibš&Ùl¶ª§ÜLÐÖhš¶cÍl@Û±f60vìüÉŸ°ë;ßiöij¦@•Áf°Ù2´c8>0€÷”+¡"}¤³I&“är¹šBYË.^çA”Ïøû"*¥Ðu’huB!n``€­[·–}Xù¾—xÉ—$tttp×]w…¥‚¥ÃdXUcÞƒu³»hN35¦ÃŽ¡±Bh6›%—Ë‘L&éíí ž»/¸››½‰·þõ[ž?Ì9»Ïá¬egqúÿ=Ö¡c5K"~ã™ÒÁùDb›4ÚZp«‹øCU~äg¦K·ãFÙ°F3“LW[¬ÑÌ$ÚŽ5³…™êW ?Î/_x¡.ÒÐæØ¥?уZ…é´ã•‰ú=*&á}ßÇ0ŒšòKKe»Ê÷â%§½âZ’ b¡Êâ²§=•pב‘‘š<â²(››INˆ4sƒzíØó<ýú¯7tBB<ߤ쵄AÞtåMlÞ°™m;¶ÑA/wÀµ(AÍGÝ$âÆl¡çQ5SbŸiŠrk×®mÚ1èdõšÙ€¶cÍl@Û±f6Ðh;Î+š¼—¦%iE!®°ã»y„W/[V¾Ð÷¡Â–Ç!‘HÔ%# § y»«­§iY€rɬ–l°•˜hFÄAyÄjÚZã]C£ 8eåʆ؊ã8ض]ÖAÙ·dÅß.rã/n„«à¶³oS7…êÑXDyÜ ¼ÏÔ¸cF£Ñh4fšPŽWíÞ­sÃÍDofûcϳÏòºxŽ8×– L®@CPúO—–"zÊÅ7ÙÔ”åˆfûöí 022BWW]]]Sw›œ"ÃÃㄸxÚó€(ÕL"FM-yúÄ ~}ݺhçò†›4Q:!Á¡<ÕŽ-‹Óœ´Ô𠫦 pà 7°}ûvV¯^MWWÃÃÃÜ~ûílÞ¼™áó´M•³"ñj¥iÔ=Ó‚¯˜5“á¢Êœuâº.¦i²eÑ~ïÕ¿ÇùËÏW ­TÆ‘Ù1ÐÚX5F£Ñh4-ìÞ½Û¶u~¸Y„‰Òˆ²Í>ifhÑ"þ¿+®ˆx^U¸îîîÚ7ê£b{+—ÅʼnûÕ·L[1”ÇÙæÍ›¹å–[øÊW¾Â–-[زe ·Ýv_þò—CA®Läqæ¢ì°‚¯ûÓhÆãØâÅu×q@gð}ŸL&ƒïûtïäÒ/åŠ_¡”è¸!ö%æL¡…8F£Ñh4MKãOþó?“H´zF1ÍdÉ1ZOšm_²¤\ð}¨b=©ÐT5– &X¯•KÓjª2`ûöítuuqë­·ŽZ¡££ƒ|>ëº ÍøúÀÑ£GË–Åuå"Íóº´€ƒ´OŽ8wê›ÐÔÉ#Ë—³nŠ^¥A°ø‡‹yÍ“¯Qâœd?•¼½´1j4F£Ñh41\àÅ’Gœfva †.³Õ+Î-ZT¾p ÁmÒö ÜáÂc´'vºh;æ K¯RHgg'6l`ïÞ½M9Èå<2æg.­_…E£Ù·o+V¬¨ë»RQ§P(pÊwNaÑo/¢+¨„œT‚Díš©Ñh4F£iK|T”Ö/Ÿ}v³E3MäP’×왲À…ŸùLùÂ17‰vª I9Tùí|ÑöÌ«uÅfæˆ;í´Óª./¢«òÖŠÉ›Ëðºu\P§—ÍfÉçóø¾Ï±cÇ8±æDäÛ-®ÊºjˆF£‹‰z¼ã‡<ô ]ª5F3­¸€éû:7Ü,Æ@ ]²LiÙNH7kå®]߸q>³×pŽ2`Æ Œ¹ÒÀÀÀ„^s3‡²Å¶-2‘^ËÀ©—HÉR&Î4\ˆ«L©——–-«kâÂó<|ßçâŸ^̃Ÿ}ó_9Ÿ%·,Qö#q =+¢Ñ´+ÕÛóÉLO”t¥›ñ…´" mZˆÓh4Í4â+úûuXê,'Áì+Üû÷×¼¾D;Õ„L–fÐH³Œy â”¥ C¥çÛŽ;¸ýöÛ¹õÖ[éèè˜ñ|á…ª.há”Xµx(å'ø~µÖ©XÚ®òPÈWì«H$ðMt õøÇ·™G•«­Ü‡l×EÜb;vlÒßI§Ó˜¦Io®—¾p›^Þ¤á Q!P7ž<ÔhšÃXmm>ö¹LZ¤‰Úödé%”y¨6¼§´~Ol]™XñˆÚU‡ò¶_&b*E9É#"± ùØ6“¥÷&ã·!ÚëV£Ñh4ÓŠìÙ³ÓÔq<³ªÛ2›Š7;v K 3øÕ;M¾ï‡N5#}@I²*=‘¦í™QA†¡¡!®½öZ6oÞÌæÍ›¹þúëÙ¼y36l`Ë–-M9À‡~¸jhª”AnI< Å¢j^<ÏÃó”Båº.®ë‚ ÷Þs/_üäùÙW~~Õu]>ó^_þüŸö}~Åb‘~€Ý7îfç‡v*/¨¢ÚŽJä¸päSGªz8x;Õú$Q7vX[©‡hÀ&‚  ø"±/[_‹.Ñ Ò º×Ÿus,œòù /œ´WbÊn¤ZŽGdô6åbœF£™dò”h%m˜[פ½Ì¢ÚÄ<Ñ$Šô:ã1J\O¢îo»ô™”K—Þ;±÷.QÛ,eÃÓ¥?ÚâûL•öQDµÍAéµ;^»t,"Æ9H$Æ•Úùå,oö/£Ñh4šYDúÒ—´7Gècö…¨†8GËŒ¾ï“Íf•P+âBÑx\ß"³‚ò¢³³“mÛ¶100‡mÛlذÎÎΦdµ$÷ͬÐ+¢ZOOŒ>p!ëf¹nè:®üá•x–Z/ŸÏ“J¥8üåà ­bEÿ ^üÑ‹,:±ˆsŽŸÃ“ß~’»î»‹ï¿‘…KrÎkλáž_¿‡×¼æ5,½)Þ{t¬ëàÊÇ®äÜŸžKñ]E.?ïrÖm^Çgnú ¯¹ú5\qð ž¼êIîýø½ôt÷°ê³«øîÞïò¶oã3×|®~œ‰º‰¨ÁÙZÔÍ,Õ7”w†YºÈÅÒº²,^{º5ðƒh€j „½>ÔÀO|¢à÷ u28í§Üã”Ò~¥}2)sÃýÄ>Ûšøã×ÉâÅ‹'Ýfööö’Íf9|ñanÈ܉žZ€ÓhjÇ!ʯQï÷-"¯±,ªMtKËEœrQ÷¦„šÚ¨öÑ*­/§^Tû×Û~?‘—)-—ónT{*í …j;Å;6Ûnº´-™D)”ÖÍŶi } Ê«‰è×SúÜ éä¡kª¿—¾ÿ4/u¬F£Ñhf÷Ÿ8Á»uXêœÁDuAò´ÿÐf”o‰ïC¢¼œ¤çy8ŽC__ßä6b ˆ" Úý¢i"!Nèêꪚ β¬P€šIv­\ ##M¹8žç….¦¾ïcš&k×®åŽëïà’}—„"\±X$Ns08Hî`Žl>Ë9gÃ'v‚§nzŠþ\?Å È›Òoâ KßÀÉ“'ùÆšopÕÁ«8ëÿw¾és¯s/—í¸ŒùoÏ©‹N…Ü’¸…¡·ÑÙÙIÎÊ…7ÜðEü¼çe>;üYné¼…W~å.;å2x+ܹûNr÷äð<ãw粯^Æ)—œÂË® ï(˜]”߸"!L„±x~¹>"WØFh ¨F!Ê Ñ!Ê@6…á Üu L¥mˆè—"ªZ ŸzâUO°ŠUM±‡™¢§§‡B¡@d2¥Q¹Qº†³±ÌfN#më´Qdüd¢qOÓ$ªMó‰D­¸·˜Ü‡éÒ6%ä³ÕnJ¾Ù§l·$^…ÞÂñ6¸/öy%Véâîß•â™LV˜¥u!ÚìØ2bû0bۈ﷟(D6MÔ6‹×^l;‡Ÿ9¬<¬5F£™">pöÐö†›cȰOæ ÛXöÒKÑÏ«ÿz N)?ÙøUDñdƒ¨U´“Ë„Êe| (^¢¯òÙº^ly®ôº§ô¿l§”Õó‰W=Á\1£ö0U‚ `䢋j^?—Ëóo›±ÍP×J<^&QíZ£i5‚ N‡Þž‰D‚îînNž<~¾råÊð½,‹WP‹OLšÔ}$a›9"/Sɯf—þDx JïåÞ¯`é5ŠøV)x‰`P=ÛD§2ÑçÕDƱ„½Z°ˆ:z&J(Ôhš€_ªž(÷APu îû>¾ïcÛ6¾ïS,I¥Rø¾_Ö^HªÏó0 Ó49räH³OU3‡ñ<Ó4 ‚€l6K&“)³Ù¸ÝK„ëºX–…ã8áçñçf;⫼þ绦-1ˆœûÛ>í™8,Œ!(×%4ËÄp¸´7Ü,¡å…¸Çœó޶í{žG±XÄ4MLÓÄó<Îä|^÷ÔëèëëÃu]LÓĶmV¿n5‡‡y|ûãfÀò_àæ_¾™L*£Z’…éð·@/œeœÅ¦ü&ðaÍ_¬ÿ*í´›È#AÂ<}¢—„5DdNlÉ$æ/µjï±ÞCþ‘ Ö•›z¬c=H{Ÿê0ˆ”Äߌ­Ké¼ûàÛ›¿ÍÜØ ƒ›<ÏãÌÎN¨±êp6›UnË{ˆÂÎÄã¥mËkæ ®ë–U<áL¿Åbß÷q‡|>išd³QuÃ0(‹¸®K"‘=DeÝÓÓCØY¸k€®›£{+V(28ùƒÿüžëyŽ3ÿðLüp?ù•ŸÐq^^<Àšî5¬\ÁÖÏoåCŸþß|ë7±n³8ð­<øÖ¹äó—p郗ò­â·P!Ó7yöƒÏ2 cxžç¸‘÷jv$xž¦{°,+mÛöʹÊóÆu]R©–e•‰Óî=G…4%äžÈþ$/®Øv¼_ÊÛ;•J…Û+‹Ø¶NhJòjÃ0Â{!‚𾉋òr[–NÈ{ž†•Åm+.»®Ú´ã8 …p=)Â%†Ú6¨{'™L†ûÛ,ËbíڵضM6›Å0Œð^{ñÅ›}ɦv½¿þuÌtzÊÛÒ´)¢Ôºó›}0S`dd„·XVUo8Pã»T*5ù~\e4”DXhÚž–â­\Éï¼îu ݦã8X–…eYäóyr9ÊÙõö.Òil߯:?ši ÝHß§§b\nÀ°öZ,\¸°æuS©Žã2R‘· kÚ×uÉçóa»âyÉd’D"A±X ÒŽãJ¥ÂAy±X ™L†d2I&“!N“ÉdÂA¹ã8d2zzzBo–Üû¶~b+›:0Mß÷éødóÏšwžÇ/–þ‚÷ÿ˜«ßu5ƒ ²pÏB>ûàg12·Üu N?À~þnzÛMìyÞ¹ìzþªw¾Ë³;Ÿ…Ãåâžçy$ ŒÇpÐ/bš ždà$m§išáMΔ0!b›\7]×%N‡Ûìíí¥§§‡ÞÞÞÐ"“ÉÏçq'L• m»´é2 Ôá>š8b³ò¼ÁA¼n …ù¼ªü!¶éy¹\.ü‹,d}Y2™L(LH$ã(WÓT*zýÄ=bäÂå²ïT*¾—×ÕÎSغuk³Í m#¨q€Øg.— '‹Ä^Dx›4M3œ´_»VuèÅÆåsiÛEK¥Ra[.÷Òàà`x,ò}y¦ÄíÞu] …Bè¹ï³g2™²ö\žK…B{ûyÀÁ¾>Ì|~ÊÛÒ´9”ŸÊÒk®iö¡L Ã0”gŒöh‘¶bÒT>¾Dc˜ÃãêÙ€¡¡!öÎpøg­[¼˜u4t›o¹ò-|ë‹ß"™L†Åã÷gÕªUô÷÷c$U8`˜Ÿ ¢J¢’ 1§Oç g½!Êc&!šâY&!> nÉ£Ö[å ¦ÙÅtÏž=Ÿoû°%Íøˆ,â¼îëë#ŸÏ‡âBÜ[Mlò,R©Ùl– °m›•+W†iz{{CÁ¢X,ây¶m‡Ÿ‹87Òï(ëWÁ„ùrª Zd"5þ~¼ïkzvãû>étº,ÜØuÝP°r]·¬±WIJl6Koooè‰÷N«·*Ã¥k¡Z?9>ISm\¾#ÿÇm¼ÒžE Û¾}{³Š)ágÌ›×ìÃÐ4‘~`ó~ï~w³ejT)ÔŒ9a4iT©¡iGlß¾;ï¼³ÙÇR•g–-kÈv¤cšÍfÉY9n9~ & g‚OÿÑé|ôæ²(Xå Š'0ˆÜ@ƒhù<^$ãËóºZŸ0CSÅ”uëÖ±òàA.¿ðBR—_./ †v6dàœÍfÉårô÷÷‡îø2ƒ'^•!hšêìZ¹’Ój¯v§Ì#¦,\Y£iAâí€ã8aHŽã8¡—Wåì½¼kðlFùÀÄAµŸ±±‡åZà‚•±à;°1»‘‰*Ô?K”߬Ԇ‡íT© 6r¥cô‰ÂPW:¯Ÿ¸jxµÔXLf'Û–? KèïïgíÚµ¡×[:& …B8÷Ô“¼Y•Bˆ¦5![B–ãaÊÉd2ôd‹{âÄï3¹/×®]Š …B!Ì&[Ü[²·W)Þ¦iŽé]Soþ&÷I3ÒÏÌçó†N Kˆ§m Út™l÷2¹#ÞaŽã„™L&ün¥•N§Câz„aÍÔñ€Õp±n'4³߇Šñ±LŽMø,tPBÛx]Å©äÖ´ ¶lÙ–-[²ÁíÛ·³aÃ:;£ÁËðð0Û·ogdd„õë׳aÆš··hh¨!bÏã>ηïþ6†aðâ^äë‹¿Næ›æ¿m>ÿþ?þ[~ë%|t£$ùJpóˆnŠ j 'Ïéx"뉜 Zà†Y¹rå¨e• ‚t2âƒEÓ4ËT|YǶmÒétØa/‘r»ÑH;Þµk+Ö­«i]É‘è|pš)SiÇSi‹«Q9@’¶Úq«ŠN".Ô,H™DÇ"ND#R!ItÏøDéê)0”Ýë5 mµ„Q†ÁÁƒËÚ]i§åÚ÷ôô„¿Èh§6ºÑ}ŠV#žCQ„ÖL&þ^âý(¹Ò$§T:=ѪåœA¡²¸IeªšH®i<³ÝŽ'Bž¹\®ÌkSRň§¥ˆÀò\‘œ¢â9fÛvî)}OIaï›ÂÄÞ–‚„ÖUÍp2ý xq÷n-ØkfŒé°ã={öÀùçWÍŸ—"Qnpͬ§¡9âغu+el6Kgg'6l ›ÍrÛm·Õ$®5"¡„I½cñ;øðà‡¡†~iˆÕ·®Æ(°n¹ýrj¡ry”w›„¡J¹Qˆj>+&s=kíËOfïeSšvÌMÔh;8ï¼ó&\§,Üæ³p¡ó´ñ}p•w XTyl[-·¬È Úu¡§R)¯† ·ŠÏ¤ ‚Ñù|_ýÉkÛnÜþ4“¦šOņ«!Þ²A„¡§2 Oh›P„áMòhJ¾Í,j¢$Þ<¢É—¨ã"f×Âít½mc¥÷]<‘Pö$yˆâû•ð«ÉzèÍ$ÓÑÏ4"˜%“É0Ä3zzzBo7É%í¿T﹟D¬èíí­ùwÓÛæ3ì¸VdrVò–InPɽ&ùEX–b=â)ñëÏÕ&vo§š-^A3>ÓÕ¯ÈéKX·ÞÚìSÔ̦Î¥›jÛö(o8Ra\âýÙ6Õ4µ±`ÇŽSšÛºu+ —-`ïÞ½lÛ¶ ˆçZŒÛŽ|ÿûpÅu—T«ë=­—빎ç½ç±:-:ïŒîùß@»Sñ>…*¶”ÞçiKo¥éÈë(!„KX«„—IX@»t.§ÃŽ-^Ìi¬óüÌ·>ùI‚õë±?U„ómøÛ$œZP‚Dâ›ïC.Ù¬zmšå‚—mÃÉ“°v­úŽãDÙà`ùŽÇ¨ìƒçA2©>ó<%¼z/"\_ßèïÈ1‚ûú¢c6Œrñ.”h(B¢ëªÿ%Ñi@&S5á©f|ªÙñTlx,î;~œSï¹ûë_—U E’SúiÔ ¹EÉ%hYV™h¯š+÷Q¼ã.!Êšöa6Øq5ÄS-ž;0žž@rŠm{žö¥Hˆ ¢ÇbºÛÉʨMu¦³_a¢=â43ÃtÚñ²‡nÌAÆ&âùæ5³Ž;wî"!NŒT ²$´µ²ݱc]]]áû 6pûí·×´Í8ûì³ë:1™ýWóžžþàÄp†y†ZÁ$RœÅ«BòÁ qãO”(Žª ;]ò»K¾9hÊLe6›m‹ÎÎtØñÕ«yíK/»ÎòW^!ûïðüƒ»2ð¹üiR±ã¨ÌíÔß?þŽS)%h‰˜U)håóJ¤³måMçû‘ègYJD ‚ÑB]5o8PÛˆ-K‰q Ä¸Ba´WºOqÝH€õz*ïJÀ’ T©ª 7[Ð믚Oņ+ PÍç³/¿Ì«.ºûÅÃå"´%QŽj2o‘DÕ\pJË{€ƒ@ò£jùZTÑéµÀ`’?‚^ºm8¸ò÷B΄ôÈ}zrÐ{¸ïœ8F«1Ó^ÂñÊ©â¯/ÀÓ*ÌÓÑOb,mRðHžyâ'9­$ì´ZÂwéô·úsRS;íbǵ"í…xC¡×›xàJ1žø VÂNa´}7[|Ñ÷[mLW¿ÂE=¿]éÿi4ÓÈtö¾ð˜ŸõôôÐ×Wƒ€ÐGä ¤™õ444µ###¬^½:|wË‹'Ÿ|’Í›7sâÏÿ¼nuY: 2ƒÜWèƒXd, «ž’G…9‰'pÕî“6O1]C+ÉWSIeÇ*ž{£ž\râF¼{÷î¹^•ÔcÇ»wïfóæÍpÓM¬«˜ ¯rÁ˜÷¥/aHÇõ+ÀíSìœN””=“QÉ$¤ÓÊË.ÎX¿O­j•Â\%¦9zŸ‘ͪ︮z Jà‹Û›ï«ó‘×¥ë‹i*=ñÒ›×-í\PQR$CªJ(°xºÅ=Øäz¶ŠØ©Í›7óä“OröÙgÏh޶©ômÛfÓ¦MÓ~Œ"Úa˜i¼ÍÈd2¡}{ž ZqO8¡ÙbÛl¦™ýãzì¢þ±ô+L´Ìu¶oߎëºm×?†HÜ«6já„ø­à–þÚ\‡˜+lݺ•ûï¿¿æþñ´ qõ°fͶmÛÆ;6®º<R±OB"}ßÇúà›àÆ:ñ9ÇU!rÚÉd »»%=»F!á¯"ÈU†å‚z?ÞÏu#Ï¿ñÖ©âÄKp²tw‡âe° ø'N4ûJÖÄ«_ýjÞò–·T/yä–-]Z&²Ù¥×âU/3ÝRÛNrÐ:¥ÿ‹€ùÜ+dϘGÈŸû^ÈÛ9Îë!qàùÓ“øö0ÎÆW‘ÙþMxi…µ—“_¾œ J”K;FþèQüyóH}ó›¸o{Áƒâ]~9öÊ•¸çœCª´oð&q×]xozmã/YBb`÷üó1_y/.» }€óÿV­Â*-K nìˆ(™)p …ð8Íl–‹-â¾þ~N¹è" &Pvv’˜7ôG?JîG?"}õÕØ<‚»næ×¿ŽùüóCCä;;I¡¼ û]7!K¡Ý££sûûûñÓq¸ïøq~õw~ï}ï ]òp%.­ï{qgL!nÓ¦Mtvv†Þô­ÎE]Ô°døâ5X,ÃÜVâa(•[9œmÛ¶±uëV^ýêW7ûP&DúÆÓTâç—Œ Í &T%?aeøkmÿ3K;÷…Ç{LOPÌq6mÚĦM›ÚÆŽ«õ=Ï‹ÒòÔK–¨Ù,Ö%f+[¶laýúõ5÷§]ˆëêêbûöíáû:::júî÷añ¾}“ÚŸçy¾ó0ïºàAK_ŽrM}; ç›p« à§À2•ÁçPÆ.6ò00Œ(ÄͶÇõtÑLŒ$à•ðÕ|>?#y’ÁTìàŠxžCñ¬ (…ƒ% ÕÙM£lÑ ø2·ßßCÒ¼ò:Ê®êûÞGvÓ&Ë¿é&r€¿q£º….½”Dé¸DK¼ðÎïü,^L¡tÜÙË/'ATŸGDŪO,^Œ¿xqø™Iyšº¾Ò>d_RÌUng°ßÿ~‚7¼3¾õ-ìßû=Dµ)X´ˆä{Þƒ ¤m[ï¾ôRU°{Ó¦p¿Vl¿=¶1xô½ôö0:cíá >¹%Gaýi¥Ù@Þׇù6ƒÜFîÝ8þA zî—YÍYgQiE½c­;Þn+Þ[ßaOÄI笳ø¯+xaéRR;w’X¿¾¬5–þ^ùäÉ újÀèèÀïèÀíìÄ{Ík¼â œ}ŒÌ±cpÝuØ%ÏÅ^à;—]ÆC›6ñðþ'oøa¾ÿÑröÐŽiÒùÕ¯²ïï`õÑ£ì±,Ò¥ãO¡æ$x‚ì”ÓJ#í¸V|ßÇq‚  P(è‚ š)Ó ;®Ä÷ýÐëMB«¥H*•"™LR(´@¢“FÙñÐ÷¿­Å]M“hh{\e?>a7!9J3¿Í¾*š™`¨XèÂV¯^]ö¾^:;;¹å–[¸ùæ›Ù°a®ër×]wMýÈGýF™·Ioo/ôÀ6k[4z°ˆ Ú3[`*ÇÙrÓÕzóyž¼‹Ç\:Ý€–yÅ)–‚´ C}Çu£|CžWî•J(…/âyÕCñj<Ö>Ëj9/W™Èd2aõ­l6;ª|}³™Š=çœHS“ß2VÞº¿¿Ÿt:¶P¦ÙgÜÂL”ûn²ˆM‡7K y{6²-þI,e€šÀ“PÓœïË/’û›¿Áìê¾ï>çÍ+o ÏJÀ¯'0ãns'M¸xº´ÁoëW/ÒŠ‰uå€Ó®¼’WïÚ…½göúõUןÇ3,X§Ÿÿøä‚~ô#¸òJXº4ÜnbÅ >ýŸÿ À×_ÏŸÿ<çŸ>÷݇ïû\œqÓMºZ®#Jg¡Üç J²6‹9ç©§XÕÕ…±wïôì@Ú“ &3LÓäSû®ëòµRÕOÏó8{hˆCûW ±¤³“/%“|îç?ç_\—ß§ÿöÛyë駳ࠧýZM{Ÿb\× «Ÿö÷÷·EMkÒL;®F6›¥X,bFYÑ×uuØ©f\ݯ¸ÿĉ Ñh ñv|òäI.Q9â*ü±"ùP¤ë–Gõ“u“\† ¥[±KæûðÀË9rdQMë™#nÇŽ£–M%ÑqgggÍÕtÆ$”Ûö¨œKPQ‰L’ôH&m¡Ñ±ÖÕ¼hƳŽÉV‰¬ðø« Ë‚ÁÁê×®Zk²Çe]@äé%baµýˆ×_%"àYäráïè8oúú×9~ð ?9q‚57ÞÈ«ŸyfòÇÚ@ê±ã‘‘‘è›1,Ë"A¢¼¬¤F3ML¥-–j©‡ ¸ J€ë}ì1ÜóÏÇø·«.¼I’´Té‹Q. ƒ(ÆÓdfòë@öKA)B# ÌÅÃU 9ŽC__®Lx¼ò ?zÛÛ°>þñ™=p™¤éí3'¤aüËßþ-¿þì³\òÚײæÆ)Þy'GK¿©išÀS—\Âo7Ú;u4¤O1RÙ[~çvÉeªi?¦ÓŽ«ù|žb±FÄ…·VŠDдS±ã‘‘º/nö)ÌòDIm«Um‚òŠZ£Eq^‘ä·õ×ä»â‚_ùXÌSR×ô…NxDNYµÞ‰§O0ÿñù\qÑ4‹©Ø±,­LE$—ÔójââŽC ¢Iê6¦2ãWÜ×'îûSv)J’ê_ü{Ä7)•Rß‘ eÕ0Mµž¤f/É @U½4DjmˆÏ‘¼wœ(°¢n´dH“@Çï|g%—]vZM×&âvìØÁí·ßÎm·ÝƆ ؼy36l`ÇŽttt`ÛöŒ–vxxÙ2Ê®S>¯Î²B8’ÁOX>]nru£ûDY¦ç*†ý ÌìSÙY‹¿«#—HÔ”ÛË÷ý(©eéÎ|õÞ½ô ¼zQm s+±¸²ƒëôš¦©x³¢±ÕÌn^xá_|‘cÇŽaRâ~÷w1®¾ãïTyÑÆò\•c)T‡ÌEÙ¼äEœ&CÄ”±>+‹X%o\Ó4I&“är¹²ŠÎÙl–\.‡ã8€šÝìëë ³B±X +h£Dñê½l÷n.444=?X P,Ãߺ¯¯OçÈÒ´=vZ,Éçó$ f­i ö-^¬Ûر±¬¼–ŠSªT€êCù@7åѪ¿UD‰]¢ SRYK„µ‘€çR.¦õU­’íKß7‰R@¥ˆ&Zcø¦ÏÂ'²Æ\ƒïû v˜Ëo¿\mº§‡¾D˜ðéû>MÇ<ÑÿD³¯zÝ\2† œ«Ç᪋SDÄ/ñ›ÈÃLüD¤v¥v>­¯E¶²Y%RÉ_Óï±`hhˆl6˦M›èêê ?ܶmÃÃÃd³Ù²å3Å ñ8ë P¿P¥§j”*UŸTWÕHCb£…¸6#ÝÒÝu:°ñÖ[ÙÜ„j“Se8žª¢ã+6k¦Lõ°Ó}M óðÃóóSOeEW)ÀúéO1öî%óú×-À‰gr %ÄÉl_ƒ<ßÇ©Z‰J<š’É$ƒ¥çFxž‡çyd2Òé4}¥§±jH²”‡@iÚñ[»vm8€ngVîÚWTwµv]—Ë_¾s©[]%GÓÂì[¼˜ã]],~äUUý _ˆòdŽE–h&µ€šÅÍ : u8醽â©åû>ù|>¬Påû>ƒƒƒ¬]»¶ÌÃL²x'0.¢eb!—™L¦Ì£­!ù“’I.O$Âë&^or ²Ùo|Ÿrîò¿äF•v:Ï]D4ñE¥ÁAÕ£*Cßÿx[_W<Í%ÌVD*€'Ÿ|rê×¢…ʨÅbQç„ÓÌ $U&7Ož<ÙìCÒhÊXøÂ ³3$ZígP’"¦ÙD›„{ÚDñ"¤eˆ"ª1VWĬqYü³ ÏÉiF$ª¡&ç@õ—‚ àûG¾OWN9êÄûMñ>Z|¢´a}¨å¹åËG©3žç‘ÏçGõ[«üœ‡!(¥Â—ÇDÌÃ,.²‰çše)™$“)ß}eÍÈZƒã*oGùÉâÛ–š“d:¶Ùê,åñCíèèKûÎdxêÑ£GkZÏó¼Èƒ ¢E<.$ø¦mð*†³•T*¥Èó)k4-Ⱦ˜Û½‘ͪ)µ‰*ëIßL8‰i…šì½§§§L0‹wö’É$…Bl6K&“ ŧLEî2iK2ãä4‹?B$¥a(1¬2ÿ„t—‹Üî%/†ä¯döÑóÊ _WËvÝÑ3|AÞùï!WZ(³œ©”z&IâÙÊmK¸mG‚Ÿäô‚(y­8ºÉ>MÓ¦¿¿Ÿ €÷¾÷3\w]¦iòÝï>ÈW¾2Äu×]‡‘é…’`hVø]«J‡Ò0 ²Ù,Ë–-ã¹çžk¶I6ÉÑ7gž=­BÕ6XËŠ(/ZŸ¨rœ”j6ˆétZ5ñYÎ,£ÝyKªòÚµQ„kOϸÅã4Md¶ „BOM1¶Ø­§§' ÓhÚãÇcd[Ç8O’§¢îÿñ?çÙg/Çó`ýúa^zén~ë·pøðI6oþw6m:Ûî'Nóü9†að[¿õQ\Î>ûlî¿?¦2uX¹\yî ƒŽ7ñR™”µ2!l5*ǶµôÁ H8Ÿ!n =åâۑij‰D$V†ˆ¸'ïƒ@ý­]-/¾¸Xwþùïaß¾]$“IÞñŽ÷ñðÃoæÈ‘èû¦ žçc¯~‘Ü=ãûJÄìíU‚hw·ÅºuÀÅÒ¥çàûß(Ój*ŽãÍfk«j6—‘è³bY<¿‘¸¸d‰Ä²¸ ÖCT=OÊ0KBrÙv/j "^ªÙF‰mâ¡*^äâqBiÉÒÿ9”ÇI$z¤(÷Á=[ïa=±\®³Ïóp]w\O`MÄ«VCÒ1HiqÅ‹Mr€ÕÜÔâ&6K„¸ðtZÉ+"$*ü'…zP¿«rñ¾ÃdëŸÄòÒi‰ _•Tq±víÚÅE]ÄÒo ó¹J¾ô¸G[\Š l’ V3uÆÊ~°zõqþõ_÷pýõ¯Ö÷ wÉtŠ`J[ ù“5s†›6m Öÿóþϲҽ®ërûí·së­·Îh~¸2’I5¨ žDš"êá–cÜië3ÕÖ‹ZˆÓLÿøè£ê…d¸Œ†ÛUb\£iA®|þy>Tã ið8÷ƒw2GÀ²U›ëû"|¼üò‡øë¿þkþá>I"‘ó¥R]‹/«ä¾Aº´EÕ‘yï{Gïß«è ÅÛõVêãÇWòÄ©L[MÜ«ô²“„·ã抒â^ô–…üf³YìR¨l2™%Цea>}#Χ^Gê-Nžß–lÖáU¯zÎŽœR˜aÎ "ˆð%ù)c/¶'ö¿%R|vl»"È™(‘Ì"¼Šgœl3þ\,ÄÖ«|^ÆÑ |"v„ïMÏóp‡B¡€ïûeÕŸ5D•'§b=¨Äøc=cé èF%ÇOÌÜûŽ{j(1o¬\N>Q8d¬’å'¾ð ØF[áQò´o$ñr†A•^Œþ.ŽÿÀüNy°Ê„Ûrð^KýŸ÷!ï€áC¶Th 2&Ðq¢¼²oÈþ|tžsq‡—|ÉoaÑq—ŠF‰÷Ú“ôGlܸ|Ÿ…?ý)óÏ8þ×ÿ"—H„“£•[SòèûѹÌrήRIÞqà—ùõWkyãU¬Ð.zÆ»QÚnαT¸mÛ¶‘Íf¹þúëéêꢣ£ƒ@ ucyËM7 _x¡ŸÇó<ÌRåT c Ëcù>9îFTÓ4éííeÇŽìÜy¸Ù—²!Ì™J’.ªÓ/·U5¨—pO¤Ÿ$‚@%>XD^f²JÁ,Ãèª{qâÊùUí8e|ߣE|ß§¯¯oöÛõdˆ<*Çú|-‘­Dâ°„”Bä)塯ñPhYߤ6†xPõ ìޯضìØûjÇÖMä™%t«í<ñª'XŪf_áI¿ôÔÃÂ}_u:D€3Í(¿Œî?oÀ“6ü2ðê·¼Ù„‡Xen‡Ž–rEd2Q^ŠJ¤hÕd:"c”•Œ•’B+¹\N‰qþçá,Ü¥ñ/e³ÐÝ].öI1¦±wù8"ÆólÈkIœ[)(‹åÉqãˆ9™2˜mÊ¡C‡èü¯ÿ*›%U… Òiõó”¥Ä’I(“ÈKæLþRb¼1nhhˆ;v044Ä-·ÜŠrÍbÙÃÙ°9ŽÃ÷¾÷V¯>]µ5Rž¹²ÌòÄ=5šéb9Œ²aÏóð|T.¥Âm4š6àØâÅlüâá‚ ªvªH¨‰É)”ÍFmJ2©<èDì—vXІú\÷s4ã±òùçG- ÃE,´§iŽ:Ä¥ÿþïpûíU?7MÕæZY"w{Éy‘SÏB¡€ã8:pß>X×새ÂX$‰³ëºanKÓ4U;•ËÍJ!.ŸÏã8NkÛ¡ îå²û(éÀÇs¡y¥×"&x¨N½A.wfˆç3Š âI¯’“åUo’çH¾b˃Äu£A¡„nÅ©¬eù‘Ê^ Õ<<¤úIU9:æX:–vFŠ3€ªÈžËÍQÅ4Íè\]=”'È(-{¤¥uä>*¢î¯‘fÝK=¥å}Œí ûŽë"ì™Dyåâ?™„­‰'\%VÛ~~pû¸‘›}õ'ÅOO=•+«ŒñF!³|¶=:CµÒ‘RÀÅS×&œxþ¶\ËZjJSÄ÷}‚ À²¬ÐkUÒCÄ+Ã×]X¥r0Ú¨íL™ŒHW°y3ïžþ#h8Æñã,\¸0|ŸÍª°Ô1'ùâ¹ŤgEaÍdX0õML>pôèQ%Ľç=eŸy¼ímüóÝüöoŸ€õ߬-`ìV},Éïû*W~"1º}‰·ç–½'È–ïIŸOrñ‹:Çœ¦F*¯¦ib:¦j€OÖ·If¦9õå—Yrýõ£–g³ªÏ'!@èuð /°”¥ø†“W®ç|Rü³ÎRÿOâ;“È ]~›¸ W׌w›ÐÛÛÛ\¯ÌÊ’·ÃÀ{p½­ÜJ¥’šü¶¦ ¯5a?°Û‡¬ ÷ŸL€iE"„Ÿ‡tI„’ò¿®y;Æ;=B6«:6â1ÑdYeÎÞ|>ê É9HCÏWßOãˆWg¼SçyÑ{ÀR–vÇó<|ß'•Jašæì¬vX,ý_Ù‰G[–rQ-8¬%¶~pk¹ÙÇ“_OÔ/‘{¨2ܽZ<û8šÆ²gŒ°nß÷YsÿæNr‹ÍÌã¢úý‹^yeôg¥ô)emd†0ΛÞÿ&Þrÿ[Èåreyàæ4*Ùw‹"¡šÉd’þþþ©o°Eñ<¯ñÞp"òTŠL’Œ»· + K¹º.0ÁMB`À‹À㬴à~¾˜‹:çp½¯¾pvfI50k+©*AMÔY‰{ ÅDU?ÆúÎd©Ç[£Úo5‘ˆ±ë;³@ˆo©œØÒ¡Öñ{£òõšðI;–—-6“îyJ¤~pM5ÒÍd ‘QcÔÃê–,üQðGœaÀiìñ k¨ï<’R"M)\4¾ /Z°ßÇD²À«m UîEÕÝ]â×Sqž¶=ÚfÓéHd®vVKloð§%ÑX¶²_I†ß¦<ÛÑQ}"Dò›¹®º†µö|Ôo+bQ~-ë8ÈI¶oétšD"Šo2¡5ÓpÌ%V܇ög?Äq~>¶‡cå⢜qš9CË q{öÐñì³£:M*|½Tj¾HØàNÔ¯óktù,UÛ_K?Qç˜ÓŒÇÈ‚\öÓŸÂ9ç”-w]—7¾çtÑÕìCÔhjÂuvŽ$Øv•Ij(€où|é;_ si"Œ#-YÍÔ Þu]úûûÃdÑ2ÀW[mw¤ªä¤Ü62`ö<ÕY~J)40€‚@ý< ž%O[° ð5+ª¸x¯`éÿ²óI + ¬U N3eÖ®]‹eY N}cµ2–è,c‚ã¬NÃ/—î %Œ,1à8pž÷ÙQ˜fe³ï…” ×à÷MøË¾ëÁ'2ÐDÕ}Ôà6•ƒ³så9Õ$Ç¡T©÷cë{€Y€Bé¾ö}0J÷³´Õ<±ê™¤(&µºïV"ºucã!ßWÚ};Gåœvôhõ“–‰‹Z®qåÙ8Q¡˜idåÊ•,yöb¿±ßæpÑÂ…eâúÕWŸœ3ö—*Ǫo0ƒM¸¦ù´´”=à=OBA£Š2¤©©Ò–a¨ïÖ2©Ÿ„Û»PŒF#,:vlTO©îM“pk×®²ŽF±¨ÆZ£´Œ,ø)Ÿ=ÚÃ𻆵½WÁêØƒÕ¢—Ŷíp!a4ÂlÉ;UW†dRõI‰H`³íòÉÂÊëÏEU4¢Dñ.êõÜ[¿]5M3)ÄãæÈÍ7—EâÕ„ D@õa*E$ââaëžå…©æ©u¤ Š‚H¾®A”ðæ£D35õ‰*ùbÁÇ,õ<ð¨Ý6ü8›È *:*œ$À}LŒ x%cçë¡€IDATÜH¨ýw€yã‡8×xÉÔïi‘¾¯šÓŒ RŠ>Y,ª&ERßÄŸ£…‚Z.š”|Çq¢Hp‰¬–¦èÜs/žê/?㜬–Z«ÊvR̸‡¹ïûeã¶°‹»fÚ €å‡‡m‡Ê=?Aÿ"æHDÅb4sŠÖâ*}òœÌçóØ–­¬?Í„Æ;™þ¶e¨åY¬#­4ã±ÿ~v¾îu£•ŠÕ·è@\£©Æ¥?×\¨Aå,ÄSmtþUyV$V4û°[Ž\Ž(M‹c—y`(2¿V÷òÆuݱ+JJÞ6Ǹ‡Q&yÀE@TQª)J~7»´LĶjÎxZ„ÓLÏó°m›Çn¼‘ºƒ=O©B‚ˆÍ…BTÜ¢’j÷A\”®æm*Õ+å)¢î‘nð¾Á0r`Áÿ8ŸÓ€Ôà? γàõ€q:‚ÿ,pz¯/¥S,B1[ŠXMDâ—i‚ÿ/j?f)b.§Ñ= ù´êÏ[–úŽ4q’gZòAK¸tZ]²¸`wüŽ×(‘ÿS©ÑÅâNN#¸Rç¯ÜFœÍ›w×ûë7•‚hR^cb;’7Sìgð}Ÿ|>V7Íi4“炟þ4ŠE°m7*xU ‡ŠM3h}!.ÖÐI®NÃH¨„ÒòГã Brk4Såøñã,[¶¬l™ïûò=ùfžFS2¸»öà}ïÔ¬ü˜Š9(X“ Á™KXžCö‘÷@¶¾ôZÍ ’É$™L† ¸ì²Ëš}HS:×u«{jJ±í¯òbŒ¿ñ"Q54ÜÔ2.Σi8_;~œK®¹¯Öâ/R±6nÿ¦©ô©†/g)UÈŽ¥OsÀßÎ 0„T<Ò‹ ø-0ÿð")0ºÁ¾¬>0ýW`œX`Ìs1Ø×©~z¼ö†¼ÎdÊoÛñ„«ø:¶ fU.N‰ÄhgÀÊ÷•‚™fbT "®¸B-¢5ãQ$ꌮŽÛ`ÇÁó< …†a´v…mMÓ8tè—9†¶G®ëŽ_˜¬ÒÈBõ4sŠâ¶oßÎÐÐPø¾³³“M›6ûC‡…¯'zP:Ž£:Ð6‘ÑNæ¥V·É’Lj¯¸¹B=6|lñbÖ­[W¶Ì4Mòù|$$—ˆçÑvݨÈ]<õeUï'W+0§ÑT£;ö€÷ßÏõßø†zï)»,³9ýÕ!žýÖ³<ÿ¥ç±´kϘ8¤0ÿ¤½ þ†Qæ°cÇŽ¦O=v,äóyLÓ,÷Ä0< ­›l‡!K4@LU^„¨’¢FSÁTìX(š&¿öš×Œï çû‘—§m<*‹xˆ˜l…ÿUô1âl]ܯ‚÷·ÀJeÏ‚ý»PÀx7Ø€¯‚@a=X硼› ¨Ô×z‰ØÄñZH];úÔÑ·?Ë´ 6yêµãýû÷Goòù‰…¸ jbÃfÚòÁe³ÙPtK$á„a³³±&¤^;>tè÷Û6ïµ,~ê©€oi1_S+3æ·cǶmÛ6©ïÄ…8×\ÁC•ÙEu jȳ%ù3|& TišK=6<ï•WèþÒ—àê«eYÞ× ùm°K¹¾+Ã$4BoxßWýiÓ,÷Äìë+Ÿ%–HhˆëªA¿ä2‘ÃP¢]¼¯!ÛI¥´—Ýl¤;|Î: #M–d`±³˜Zmöi¶4:ºeêÔkÇ@õw“y˜§QÂD†¨ÿ!CýÛj&ÁTì8 <¸|9¿ü³Ÿ±ðª«HP¦•©6©þ‰ R}²þSªp¨ÿ]pçkCð$΃ÌO %‘­ ‰?*mã×QžmóQ÷‚Oyõßj·8ÚjÍ£m©ÇŽ=`ñ¾}ÊN%áXHÎÍ~–[YŠœˆ§›ˆn“.⣙5L¥=®´Õ9/)÷ „·ý6<ì+ïû¥u×KÇ-x8Å„sRð3[]÷§M¸¨gp,Pû>ÀA^!Ò†ÖöÁ *ºxéÓOóØ;—säÈ~fšzúÆ;vì`õêÕtvv]ßÿ>뎯¾ƒIˆ Ù, Ðw˜>ô¾ %²½¡´‚Edìbävé"Šzl£ŒÄD u <æôdôҮýãgžzJåP~à±ó4HîM©>=E²Ù,–e‘H$(tA©Vbhhˆ½{÷røðá¦ì¿ÞþñÀÀCóç“^²(«196“Ð/4íÅÀÀÀ¤úÇ3"Äuuuq×]wÑÕÕÀ† ¸þúëÇíl|mÿ~º^yˆÞ¾ï«ËŽ]}j¹ x^ä#}ûDB ú-+V5Šêÿø +ŸK©ô¸Ð â…ô«ÒiµL¼£Dô‹ïϲ¢C-c×僌Z&ë*ïQ6Ç£rƒè¹Zùÿl½ûn–{Gš°±ž¾ñÎ;•D óŸÀ®µc' Þ:®Û…Ô!Èu¿Tú\¼>Å2~ÍDîžñËV­ù¯æ^©i(íÚ?Þ}úéüÆÓO—çŠã–þú˜RÙ÷ý0ô4‘H`µóÃw³wï^vîÜÙvýã­[Ç›ÿßÿƒw¿»ä8àã8ÎØjy¾iÚâjeF„¸ŽŽ:::Â÷Ò‹3Ï<“ëÖqñw¿ ø‡ô¬­Ç“¾óZ¢©è&‘H”'fŒ?K £¾dÜ’·K*ÌËÀ7.LT–SŸÈ»G Pƒ@y* #—ÄK¹WžW5$\O¼¤DHkУËÊK9FˆDØTª“-[¶ðoÿvÆÔ~°:™¬ ¬Y³†_¯¸‚óKçR(¼ÃxcSÎa6Ðî¹µ:;• 4eÿ“µãU«VaYg[W¿ø"PÅ ¸$àåó^nÊ9ifž 6„¢ÀL3Y;>÷ÜsY¿~=6l ÖÏŸ–…kY¡sh8â™Ö Ͽތr¡ª¡¯ÛŽO`Åû •!Éñ „Z(ÜûÓŸâIgÎóàoÿþå_ÆÝ†vÆãYx`¦P6.vYù›È]S§Ri:íÖ¯Õ?~|Å Þöío¼ÛE ÿSœ´v' ? N"3$JÚ™ –4,0z|$ëHª"I_$EØ*'·Å"žER¸@ô ’q™ïG¢ßKšIÁ’ÍFûö¼Ø„P¥¨q]5—%3’Ž2î "ÄÇ–’¦Eö-Q.j½ ÀNœhqžô7mÚÄG~úSN,]^7ÓT¢ï˜Äófjf›6m¢³³³æþñŒqwÞy';wî  îØ±£¦†úÕO<Db–išåI4Å+®‰†< †¡<å*«ÉÇ©§ã- ³iªFp¼¼½“Ý~åà¡^ï#ùNo¯zÈÃCèèxvò'Þêµá=]]ðÿž’_¬Í%M{R¯{Ày‹ãyåÞ­@˜˜è¬R!fº©×ŽA¥·¸iÞ<œL†¹ ©ŸzíXê,xâ åqjDûÍoŽÿ=²i0û¡·”×8HXt¡f´ó³¦D½v<22“ï~÷Ø©)ˆ¼ù¼‹i$‰Ð+I1Ï‹R´HTSo¯Z&ų¡ u¿T­$5ШÃå–‰â‰dŠçLŽG5ÅŸÕtÊe}}åé_2™h²ŸJñ¯rü–ËMÄ‘‹/¦H_ 瞤(DÕrJt“<¯2ÐO¨žR{ªiÆ£^;N£Æu‡÷î%“ÉD•_Ö­]¬¡„ëBú÷ õ$d¤z‡YÚXeå_fÔkÇÇ/®^¡4KT•ºFâD¶ ?úÑjþò/…DBµÛ’B(¾»TJ"U¢å}}å9€Y&©‡D8«Çƒ¹2 K5ª93Ôú<©L颩©ô+xé%_s ©7ˆ'¡JˆŠqy”øÖKÔÙÐí­† M½ë®»`xx˜®®®2÷Ïj<þøã;Nßøæ7áK_RŠAX5·–ý&ÿ—¨ÒHøaéfª©ƒzíøÐÊ•Õ?êÔ ¹²E´:p`7ÿýß øÎw.äž{.׳m•ÞÈ÷G{ƒUz”UËuG§—›½L¥=>ëÉ'9ëâ‹9«džç•{ÅIÁé?ë¶VSbF„8!L’\#Ë::Èf£ª£aÒÃ&U›Ôh&kÃM+R=í4§¥¶c&ªMÏv&kÇAðƒM›øñ)§ŒþÐ% ò€ó-è+yRÌVñ-‚ À4Ͳ¤æ®ëâº.¹\ÇqÂDçq+ŸÏcšæ¨\8žç•…åH¡­bQeøO$eɬ}ß'™LÒ_Šmêéé¡··Ã0BQͲ¬ð8e›¾ïcY–e)ϰ‰D"\¯/æ¦OœH$ÆÏá3ƒLÚŽÛó^xÒI‚aŒšq]È~únãÀUŒöÂHÐÖ…£4­ÃdíxÉÀ@(²‡H€*cñ"rñQ!”óFå&å½Ñ4µPOÿxþ+¯àï[ŒëHþó 7HŸr!N£)1¯Ù0§•ªMJ,¿ëºá,©6dM»°gÙ2\WòD¸ ýÒP³I£™4ÇJe¸‹ÅòYâ+Vpþ{Îoöáµz@о¸g+Ÿ}ãüóË?(‰pÁ¹à_«%þŸh¿ÐÖäµP,qJÂELi a4Åb1Ë ÃÅ­x¥ÁD"z‘¥R©py:¿ëºnøÚqœðµˆi²}ÂLÓ E8€þþþPHëíí-Û·¼¶m»l°q³gyLp€+7ž8Qžˆ°σB Œ÷ÝT÷\Ρ«šjšÂü“'±*U³<¥2ÐQNA9d¨×QŽ7îRb7™DÐhf’Xpâþ²ËFçPŽ£MSS…–➸àF~åWBWbÏóØÿµýlö‘i4“C’wš¦É²eËš}8ͤYpô(g^uÕ¨NÆþýûyA¼245“ËE•¡53ÇÝÏ=Ç%¥I¾2\p^†îÿ·T<³Õ8ß÷CÍ÷ý0—çydKÆU,Ã×–e…—eY¡p•ÉdÂå™L&·,Ë µ†aTÍå_^(ʶ#¯S©T(èIX¨|w&*ÎFÖœ8eYüÚUWUMU,*#“ëq”wQ?:1x‹âǬn,Ó¾çy¡píy^™`ÒkÎcÕbtœ8• /`«çcOê? qo|Á²¬2¯Wf¦ñ€e‡áýbiè}¹víÚò•üIoV3GhY!î•yó8wÅŠ°ÑÍXº>Ö¥siÚ†ãK–022ºÐ›¦Ys¾¦ÕxdQרNð/~ñ‹fVÛ`hf†Ç×­ã-óFw{Òwû²ÊãÙ*ÅŒdpîû~èQæû>étPÞnñA»ˆf¶m—y¬I8¦išá:£*ÐkÚ†UmÒ”Ê9¿ãù<¿[˜@{½ÕIü>«¼ç¼X#.‚w$“ÊcÀ÷}V–ò yžGOOOøZÖ ‚ ¼§A q²Ý¸øæU<0äþM¥R-b=ö^|1+öï/ww x£á‚@µÇc¥pèîîÛHí§i6ó Çzƒƒƒå+ÌÒÔš©Ó²B܉'س§#Šõ—zíº3¡ižyõ«Y°`ž§çí·moö!i4uñÐ%—°ë•׫N†K8»·qãF–.]ÚìÃk;zztEÍfð̲eœsôhÙ²äR`zï[h&¯— ÂÁ¼äLƒ¨`¼a-ž-îi¦™ÝxÀÒ'ªŠŽCèûG0–ÎÑÐÓ Bñ*‚0,Û÷ýª÷(qGî5Í€ªB\Ü 5ž»0Òmš&Ê=·,Ë¢··7\¿2¡xЦR©0Ì:Ï(ûiGÎÞ»·¼Ñu {\‰p¹ÜøíqoooÛž·fö1¸lYýTÝßÓŒÁŒk˜ GŒÐ¨ïþË»¹‰›´Gœ¦­¸xpøš ð98xpNvˆ5³€çŸgßð:~õW,`CÑ*’ȵßL|+`š:OÜLóÀò圻cöõׇËx‚ú·["¾Ù¶M±X î2èŽçL3 £,œS£9åÑGË8\©<áV~ Œ÷DëŒ.žÇC¥42‰æÁ!Js4÷ °ïØ1Þ~Úi|ï{ßcdd„ë®»Ž¯}ík\wÝuÜ}÷ÝÜtÓMìyöYîýñyeñÂÂ…<°|¹>˜&+“I•†Ì4¹ööÛqKçÙíºx+V`÷õQDÝsv.®êíU¯ 3•"ô•³¬èõ òÌ«_Ý„½ÖÏ«^~9R.|ðƒ·GyÂU£X,âû~XY£iöíÛ¾6M..E„ÕxºèÔšª´¬0<¼.œ¹éK%N÷C5mÄeAÀ?÷˜˜ï4UïV£i3ïÛÇÁƒW`.*-(Â}ûï#a%ôäH £Sãh¦ÿðaÞây B\Á z™¶~…xÍ …²j£A„¢Z¼’çl/2 ™:püøñr[ñ ýwº²Bö7£ue (º¥©[ìK8»ô^º0ñîødoñ¾}÷Ý\wÝu<üðÃìÛ·/Óžyé%~ë·~ ×uùÉ…ܸq#÷Ü{/Λ‡ûÆ7òØ«_ÍŠ+p£W_¾>㦛 t¾¬[ÇùëÖ©×K—rÆM7EâØWD¯×­ãÁÒyZ¥íÄ.k[ÑnBÜé/½ q.˜×Aÿ8ÕÄ-ËÒm¤¦åøÚñ㜹pa˜CÙ¶íòI³øL†FSAË qÇ×®%sÐâw´÷€¦ñ}ŸÕ_XÍ©=µÙ‡¢ÑÔ…mƒ5€Ê7Àß$þFUâ;Øì#kOŠÅÖÉG6:í4ÎùÉO¢iÈØL‹—ÍfÉd2Ê›¦Ô‡I$a.'=˜ÔÔ‹\ûýï³ø¼óP.[Œùy\™s0¬»Á¸©ñ]çZ} ã^ŸÙl6,²víZ …¶mã?öOœ`ãàŸ<‰Ø%1 »O27FW=.y°i`ó~ï~w³£&Ÿ?Ÿ«}TyÂÅïAæÕ×ugT8®FÓJ\pð ,‰È£ìÔD‡¦jƤesÄ-?t(|L&A{hÚ§~5Ù¬êH<ðδ ¬i;ž[¾œƒF¢Qü”¯lY‹puá8ºXÃLÓqü8oxðA@‰ Eƒ)‡ŠA& _»v-žçáû~Y¾¨vL¤®im^óÀœsõÕ*M€ ©?…Â* ¡L:ó ØWÎLw#ž7­X,†…\× ½BâUtC!ZÄj]Awîñ«½½¼ó+ÿ kÁùp)cz×KÞ>¦Y\<8H"Qž2D qšqhY!nÍc‘.FIJiÏêÜš9Ì™{÷òÜ“gb*üèêß¾ºÙ‡¤ÑLšg;:x÷àº@0ËYk&ï«„Ôš™ã$4 Ù÷ƒ•™ú6Ç ÂbYVX8AçsÓL>°ø™g”ØÛ ^Æ¥DXN1-)â… º»»òÊž‰D"¬Ø›ÉdBqMß šJNñEöŸ¹!±¥ºw¸p"Øj4­ÈËCCìß½ÏC4/§fÎÓ²BÜË/¿¬~Ø®–’5íÇ9ßÿ>Ýó2”¬=`4mʳ{ÏÂý*až‹Q¥Ù5“Â0šS¡s.sÊ¡C¬:÷]àCß÷Æ©n6ÕDˆ¸§F3<¾w/kÀ b¹&¨PÕ IñAMº‹jî놟RLÄ¶í°ØˆFS+ƒ'O²ã²Ë T¸¨òyXYÑV£iUvŸu¿Ù€ïQÁÜAÿ4cÒ²BÜü“'„Ã7fxû°N®iKV”ò—øóÚ†5mÉ«ž|’Ó^5‚} ª:Ÿvåš2™ Ôa½3N×`äjá‚ í½X,ây^™¡ÑÌ4ϟω5k ­¼jC8ԟÇ!ŸBODˆëëë Åæ\.§ótiŠ+®àÃ+>Mö “ÜÇ1M“¾¾¾IoW£iÉ ±,Bïà]¨A3.-)Ä_²„SO98zõQv¿w³I£© ç‰Íìû}ºâ¯¦-Ù=> V¿ˆÝC¿<¤ÃR5mÉü‡gÿ‹g„×P6› =á …‚Îc¥i:ËŸžUëÞ %.¬g`PódŸˆlžç…Þñb"‰DBÑL;ÁýPüò‰ÏóFçØÒhZœ ëÇ4UŸP¡¨RfÚEçˆÓŒIK q‹Ž!ýS„UºŠõï]zbDÓ†ô?%—_{y³E£©‹gÏ:‹U?z‘ ×wjo M[òªðg_3¡7œïû¡gP¡PС§š–âËz8çGWãLrb/î©‘Íf ‚˲ÂöÜ0 íí¦™QŠÄ<:cè M;qê±cï<^óšýlQ³E£©› wïæÈñ%޹Ýk4mÆ=/þ¿téѪE2|ß§§§‡ 0ÿÿìý{œ$u}ï¿–]Ø;P+Ë,Š5 ;'„wTâ½Z!Y­&Ñ%»'jµìq<šó;§*¢&QIºNüdÍžtÅ%‰v™ jHŒ]ç(j„Y§”DdV`J`w–a f¸ìKÿøô».}™éîéëÌûùxÌcúRUýéêw}êóy}ÞYf/8¦g™Å¥°_¬^e²Ó4ÃE@ªfÊ0½ÀsgÀ„8˲9 ¦_PöMàÕ¯^åÙ”dãt»…L/³¦Û ¨ÆñãÇñÄoÆ™ÛNF6›]ú¦ ?>ˆuG×áŒO­ÃòÝnÃ4Åe_ú¾ýÜïX `ú–/ú¸úµ—'^ó? Ò¨ ÿúª±ç×Öðƒé)µãm§¾/}Ũ*ûÚ3½C£v¼wïAH’á8Ó+4jÇ®+òjA€L&@¤ àqÓMµãÇdÀ0 ¶]¦ghÔŽàu¯ûM¼þõkñÎÛß)ª¤2LtÄ#njj ‡ƾ}ûDŠóB»ì`÷Wö¦¦¦0<<ÜÕ6ÌÍÍa~~ƒƒƒ]mÇÌÌ 6mÚ„®¶£[4cÃ/)žù7ºÛn¶Ÿ$½pMwûû7jÇg?»ê›¥‹ùáØ~z³ÝüþÚ1¡( òù|WÚÝ+vÌíè š±cÊ 'IR×Gz¥ÿávôÍØññãÇá8NWóÎòø8 ÛqcvüÌ3Ï`~~’È– ìÀŠÎ ·Òí'ÙF=tÄ#nbb"ñ㌎ŽbbbbÁ}þyÍg:Ñ´Ù»wo·›€©©)Œw»ÇÔÔT·›Ñ5š±á¬ý2A¦«ífûIÒ ×t7iÆŽÏÛò<¸©»6ÄöÓ›íèÍØñ_þå_Â¶í®¶»Wì˜ÛÑ4cǶmÂó<Ý+žÓ+ý·£7hÆŽ?Vôí<>NÂvܘ?ôÐC¸÷Þ{ñ_ÝŽ¹¹¹d¥ÔÈJ·Ÿ8dõиùùylݺ5|¾ØêÃÏþsüxíqÿ®û±qãÆN4±*Äž={ºöùðä“Obnn®ëô¡C‡pçwâ´ÓNëÊç?ýôÓ8tèž~úé®|~£6 ˆ ññÇïª ±ý$éö5Mv|âĉ®|~£v|àÀ|ñ/â”#§À wÑol?½ÕŽG}>ú(Î:ë,Œuüóµãïÿû˜™™A¸ë®»:Þ^¢Wì˜ÛqðàA;v êÊÔJš?óÌ3¸ÿþûqæ™gvþd•èvÿÃíHÒããûî»GŽÁƒ>Ø•6<>.§Ûvüè£bvv¶oÆÇÄ=÷܃[ÜŠo]ø-<³ç™®´»Wè¶ýô ‡Âìì,Ö­[W×ö+Öзß~{·›À0Kf±A†éuÆÆÆº"´0L+¡Š’ ÓÏðؘY.ðø˜éwþìÏþ¬ÛM`– MN¬8LMMu=žam˜Y°3˶cf9ÀvÌ,ØŽ™åÛ1Ó :*ÄÍÍÍ¥Ö¹ÓO° 3˶cf9ÀvÌ,ØŽ™åÛ1³`;fºÁê?þã?þãv)Ê×_=|ßÇm·Ý†OúÓ¬43}Û0³`;f–lÇÌr€í˜Y°3˶c¦¬*‹ÅN}ØÌÌ >Œááa6l¦/af–lÇÌr€í˜Y°3˶cf9ÀvÌt’Ž q Ã0 Ã0 Ã0 Ã0 ³RéHŽ8†a†a†a†a†Yét$G\¯177Û¶qàÀÌÌÌ`xx8|ýïÿþïñƒü°uëÖÄ>µÞkE[FGGëú¬v´c||®ë&ÎÅBŸÕ®sÁÔO-¦÷zÙŽÛÕ¶ãþ£;nçïÖ«vÜkš©¶ãJ¸?î?ØŽ+áþ¸ÿàñq%lÇ­c¥›^ÒLz…Vj7+Î#nnn;wî *¤¸® Ã0¦i†Ffš&\× ÷[è½¥°wï^ìß¿?ñZ'Û±wï^LLL`ûöíÇÞ½{ý¬v ¦>²a ÷í¸m`;î?šµãvþn½jÇݸ¦™ú`;®þùÜ÷lÇÕ?Ÿûãþ‚ÇÇÕ?Ÿí¸u¬¤sÓkšI¯ÐJífM·¿L§™ššÂÀÀÆÆÆÛ·oÇ[ÞòLMMáðáÃØ·oa|ãããPUuÁ÷–‚ëºa™äxû:ÕŽ™™ |ó›ß ÚB¡°`;Ûr.˜ú©eÃô^/Ûq;ì‡í¸?iÆŽÛù»õªwãšfê‡í8 ÷Çý Ûqîû'a;n-+íÜô’fÒ+´Z»Yqq[·nÅu×]>ŸŸŸLLL$Ü—GGG111±è{Í277‡›nº)Ñ–N·ƒŽ733g÷îÝ ~V;ÎÓµlè};nGØŽû“fì¸]¿[/Ûq§¯i¦1ØŽ“pÜŸ°'áþ¸?áñq¶ãÖ²ÒÎM¯h&½B;´›ç788ˆÁÁAb¥À0 ìÞ½óóó‰˜]ÚÀ‚ï5‹iš¸îºë*J#w²333˜™™Áµ×^‹ááa8pcccرcGÍÏjǹ`£– µ q;nGØŽû“fì¸]¿[/Ûq§¯i¦1ØŽ“pÜŸ°'áþ¸?áñq¶ãÖ²ÒÎM¯h&½B;´›'ÄBѼùæ›1>>Žë®»ªª&r˜´›ýû÷cxx8‘䯛ç‚\–§¦¦°gÏìØ±£ÛÍb¡š w¶cf©°Wž¶ãþƒí¸ò|°÷lÇ•çƒí¸ÿ`;®<lÇL³t[3éÚuM¯¸ÐTسgæææðo|#ì ‡‡‡155nCqÑ‹½× Àþýû¡( E(Šº/vªƒƒƒ •vxx8Œ{®õY­nÓÕlè};n‡ý°÷/Úq;~·^·ãN^ÓLs°GpÜ¿°GpÜ¿ðø8‚í¸µ¬ÄsÓmͤWh—v³â<âÆÇÇ100PßK'jnnp]·Âàª½× ”°Pžç®ŸjÇèè(öïßobb"t™¬õY­nÓ8µlè};n‡ý°÷'ÍØq;~·^·ãN^ÓLã°'áþ¸?a;NÂýqÂãã$lÇ­e¥›^ÐLz…vi7+Nˆ£„•¤fžça×®]عs'FGGáº.n¹åbE¡Ö{­f¡Ïju;‘J¥°sçNlݺ‡ÆG>ò‘?«“ç‚©ÎB6ÜIûYˆNÚÛqÒŒwúwë;î•kš©ÛqågqܰW~÷Çý+?‹í¸u¬´sÓëšI¯°”ëiU±X,vû ô3338|ø0†‡‡+\)z¯ŸÛÑÌguò\0ÓëvÜŽ6°/?zåwë;î•kši¶cî—½ò»õ‚sÜ¿ôÊoÇvÜ¿ð¹Yü<¬ÄsÔÌù`!Ža†a†a†a†a:ÀŠ,ÖÀ0 Ã0 Ã0 Ã0 Ã0†…8†a†a†a†a†é,Ä1 Ã0 Ã0 Ã0 Ã0L`!Ža†a†a†a†a: q Ã0 Ã0 Ã0 Ã0 ÓXˆc†a†a†a†a˜ÀBÃ0 Ã0 Ã0 Ã0 Ãtâ†a†a†a†a¦°Ç0 Ã0 Ã0 Ã0 Ã0€…8†a†a†a†a†é,Ä1 Ã0 Ã0 Ã0 Ã0L`!Ža†a†a†a†a: q Ã0 Ã0 Ã0 Ã0 ÓXˆc†a†a†a†a˜ÀBÃ0 Ã0 Ã0 Ã0 Ãtâ†a†a†a†a¦°Ç0 Ã0 Ã0 Ã0 Ã0 /„¸T*Õí&,klÛ†iš°m{IÇñ<¦ivûëô,lǽE0M³ÂöÙŽkÃ6Ü?ÔcÇt ¬4ØŽ{jý1Ûð°·—NŽW²lËí¦Q[æñqã° ÷½dÇ}!Ĺ®Ûí&,™U«Vu» U1M®ëBUÕ°£n– ày^·¿RÏÂvÜ[ŒŒŒEIØ>Ûqm؆û‡zìØ4MX–Õí¦v¶ãÞ£ZÌ6¼0lÇí£Ócã•lÇÛr;iÆ–y|Ü8lÃýC/Ùñšn7 AÀq€¦i ¾çû>E8Žß÷¡idY·÷}’$Áq(ŠUU+¶­w»…Úìû>dY†mÛe9l;ýØÔ.v I’àºnøýj=×u}Ñã,ô]|߇ã8˜žžäóùðÜÖú>A b…EÓ´ðü—ãy$I‚$Iuﳜ¨eÇ Ù0мK’´$ŽN?ÙñB6Yëû¸® I’Íf’$Õ¯d;n¶/Ðt»Ô¾˜ÚVíwï´ —«;^èšl¦?¦sç8N¸ïr§ÇñÏZîv\«?.ÿn+Ù†é7è7;îÇ1E3ccz¯žþx¥Û1«•>>þæ7¿‰7¼á mí›±e×{{\Ñ/zEÏyÄA€T*×uáû>ÒétâýT*Ïóàû>FFFB•>NöípÿxhC:WWÓétx ÏóBWÒz·«…çyÈd2H§Ó‚–e…m‹w=ÇH¥Rð}Ñç §žïâº.EïûaÛ ÃXðûбéw¡Ï)Ƕmd2H’T÷>ˉ…츖 Ó¹iÖŽ—jÃô9ýfÇ ÙW­ïC”øwªvóZÉvÜl_Lç¦ÜŽ;ÕÓ1jõ_@çl¸‘ïS˾»&íé·5M¹\®KÖÕ9úuLAÇX v\O¼’m˜¾o?Úq?Ž)šÇ¿P¼Ò혾3S¸óÎ;ÛÞ7cË<>^÷þ¸¢oôŠb‘Íf‹š¦…Ïs¹\‘š™Ë努ª†ï†QTUµ˜Ï狊¢„¯OOO%I*‹Åb¡PÓ>†a„ÏéØõnW‹B¡PPœ ŸÇÛZÏ©¦cLOO×õ|¡ãÔó] Ã(*ŠR”e¹¨iZQ’¤b.—[ðûбéõéééÄ9TUµ˜Ë劊¢$ö­µÏr¥–ײáb±¸d;^ª Óçô›/f“µ¾aEEEI’íc;n®/.kÛq§úb:F­ß½“6ÜÈ÷©e_ }—fúãb±öåõž~¦_ÇtŒ•`Çt¼òþ˜m8¢_í¸ÇÍŒãǯfûlÇ<>FØ¿µ»?nÖ–y|¼0<>îýqE¿è=šJnDÜ=²ÜµSUÕP5•$)¡ZÆÝ¼ëu+\ªû¡ªªá*W³(Š’XyXìùR¿K¡Ë2©ÏäFZëû(о.Ë2E tú=dYNì[kŸ…Ü_û™Zv\ˆéÜ-ÕŽ[áBÛv¼MVû>®ë†îúäÒœN§199 €í˜ÎQ£}1»ZvÜ©¾˜Ú´;n• 7ú½Ëíë'?ùɂߥÑþز¬Ðý%ÐÏc jÓr·ãZýq6›eŽ£~µã~S436¦ãW³}¶ã+¡×Y'úãFm™ÇÇ‹ÃããÞWÔÚ§×ôŠž MmÄ0â°,ËPU5ü+ Ýþ*-ùþ‹=_ ²,W\8Íä©(ÿfgg`ÁijË=F½¿Sùy`;n ‹ÙÝDÉþu]O$îd;n¾/–‡wÛ†æìk±þØu]X–…U«V…‰yW­Zµ, WƒÇ½oÇ õÇlöãþÑoÁvÁãc©êãFÎM½4cË<>^÷þ¸b±}zÅŽ{NˆS%Lp Q6Y–åDg÷x!µ’”ÑL&Óí¯Òó¨ª ß÷C#«• ÏóÂ}è1)Åtñår9X–Ƈ/´Ïr¤–ײa€íx)4j_” ¾µÁÑJ¶ãfúb€íx)T³¯K/½´á}ê …ŠÅbøÅb‘í<¦hÚñBý1Û°€í¸s4;6jÛ>Ûq;G3¶ÌããÅáñqçéÄøx±}ÚAÏ…¦êºÇq022V°ˆ¿çy^¢¬2€Ð ‡††Bw_ªöÒ++j/µ‹ª‹ŒŒŒ4tÞ$IB*• ;ëjû(Š]×aš&t]¯kŸåD-;®eÃÛñRÛÕˆ}†×uÃß§Ö¹^ÉvÜL_ ô¾÷ª SÛµ¯Fûã|>ßí¯ÙQxLѶµº?^É6 °w’fÇÆô}²ý•nÇ;I3¶ÌããÅáñqwÚÖîñq7ìxU‘–azŒøŠAej Šï¥jC¤\Æã{{ß÷áû~O®PÛê9oäJŸÏçÃØêÅVWšÙg¹PnÇ‹Ù0½Æv\?K±¯FÎõJµãfúbz­í¸mྵÝð˜¢3tª?^©°w¾mõž7îƒÇÇo[#çÇÇ‹ÃããÎЩñq7ì¸ç<âˆjF@¥~ɘmÛN(•­H`¸¾ï'ÜOËYH5¥r¡2¸¨®KiKµ¶5jh’$5|¡6³O¿Sþ}³a ½v¼T»i¥·Ò†æì«™s½Ò츙¾è];¦þ®Uö× v¼Òl²–ã˜b¹Ùq»Ï÷r`9Ú1°|ÆÆ÷ÇõÂããöµ¥ZÛµe/Û×–jtj|ÜI;îY¸ZxžÆe“‹'Ó9|߇ëºaÅví³œan-²/¶ã$lÇ­…ûÖîÀvÜZØ&»Ûq÷aÛ_:lÇý Û~¶ãÖÒ©ñq7ì¸ï„8†a†a†a†a†éGz®j*Ã0 Ã0 Ã0 Ã0 Ã,Gz2GÜ'?ùIœyæ™Ýnî½÷^¼ò•¯ìjæçç1??sÎ9§«í8rä6mÚ„M›6uµ÷Þ{/öîÝÛÕ6ÔËØØÛO‰^²Ÿnÿ&ðè£â3ŸùL·›±(žçáë_ÿ:ÛO‰^±Ÿ^hÇüü<±k×®nŸŽE¹ùæ›133Óuûé;ævT¶ã=ïyOÏç¨ã±1·c±vðø¸~x|œ¤Wì¸_ÆÇßþö·1>>Ž‹/¾¸ÛMé zÅ~z£GbýúõøÄ'>±è¶=)Ä=ôÐCø­ßú­n7wÞy'¶oßÞÕ6Žè•~°Wì¸_ÆÇ=ôPOØq¯Ð+öÓ  ÜsÏ=umÛ“BÜÆ1::Úíf൯}m×Û100€M›6u½333ÆððpWÛqÚi§uõómk·7¶Ÿ$½pMÂC§ذaÎ9眮Ÿ3¶ŸÞlG?ˆ°iÓ&lÛ¶­ëç¬Wì˜Û‘äÀذaCWÛP<6æv,ƒÇÇIzÅŽûe|,IÎ>ûìž8g½@¯ØO/ðÐCáèÑ£umÛ“B\¯066Öí&ôDç ;vìèv˜&`ûIÒ ×4Ó8l?½Ù¦1zÅŽ¹ÌRè•þ‡ÛÁ,'a;nŒ—½ìexÕ«^Õífô l?Øk`†a†a†a†a˜ÀBÃ0 Ã0 Ã0 Ã0 ÃtMe†©“€WzìKÿ­ôz»þÃ0 Ã0 Ã0 Ó Xˆc¦/!QL-ý÷‰`‰eå¯eï˱}鿌HLsÛPe[@*öXŠmGU&€ó®ºªÛ§a†a†afYBs7zLóF€[ú¯”¶qËö%Ç ¥´_zM‹mOÇ–Kïκè"¼ëêj q ÃtÆâ k^Ù¶ „0&—¶³JÿIP£ãI¥mƒÒëô’ÞjÔ¥çq!>[G¥ ·^¬ô=lyÿïo{·O;Ã0 ôÏó`šfâ5EQÍfáyÇA6›­Ø/•J¡P(ÇeYð<²,Ã0 èºÞí¯Æ0 Ó—xˆæCñÇ mOó@ÌiDs-»ô¾†H”¢¹˜ÈyA…˜{‘ðEÑD~•ýƒÒs§´-Í÷üØk"‘-ˆ½ŸûÑë)G-µÍErIßÓðƒ»ïƆ‹. ¿bŸ“pË-·àÜ7½ §ž{nø¾{ð ¼'Ÿ¬ë7`!Ža˜¶ ¦ë  `D Z• Ç*DG_Á ÑLª§u VyM^à½z‰ßШs'oºñgžiQë†a¦· 1"QÍ÷}¤R)hš† ày^Õý\× ·Ïd2˜œœ„,Ë‚###PŠÂ‰ †aʉ‹k.„FÞ\´,wVd g6Ä#_jµK-=¶9M±í©ô½ ®ãÀ×4(Â}o‚™LÙ|¾ª3Å/ùK¼õ­oÅO~òHR4« ªªâ‰'žÀ•_Œl6 ÃîAÀ±,¨ªŠ@QÉdðÁ~7Þxc¸ÿ_~ö³xÙË^V×oÄB\¸® ¥tòu]‡¢(‰az àº.ÇïûÐ4-ìdêV1â+$¼¹ˆÂEË;>º!ˆV"$,.ª‘ &£qÏ4†a†azY–CA­|ß÷I’Ëåªîoš&<σ$I0 Š¢Àó …,Ë‚mÛ¡`Gã @LÖTU…ïû°m…B’$!NC’¤žk0 Ô@ˆdļ- !¦eJï«Bš]zNâVù2CyôN¾ìýr%¡Ü!BBR8‹‡pš¦ Y–k¦°, ’$U¾ŸÉÙ, Ió'š#†íÛ·ã¥/})~÷w׬[×uñ’—¼ï:çÜñ·‹Á'ŸÄ¿Ÿ}6®ýð‡aY^óš×ˆs iØ|õÕ¸qÍœ±};~<1¹»ïÆ~ãñÑÁA@Uñÿÿ«¿Âôßÿ=xà|úӟƆ °ý/þÛ}e|äÆ1™Ëá¶£GñŒïãOo¼7Ýt$IÂÄÄ8P×ïÈB\Žã„«a¦i†7iº1—¯–™¦Y÷Š^+q]7(8¨( |߇ëºáê­V˽ÑI,ËBa›eY†çy¡ÀIí%aHÓ´°íLsø¾ŠoäýFäóy(Šƒ퉑\úâ‹xò¤“Âu•yÎZBbÙ*ýþA@–å0Ì„V¼=ÏC6›E&“A.—kKŠïûá5dYdY_ ‚ ¼Ðs²M6eY¿ ý¹®^§Ý¾þ†a˜•ƒmÛ‰{~7Èf³‹Þ¯ëáˆÉÉÉÐÛÆç…B!ñ’$…c]×ëZ£Èz_3 Ãô q¯5*@hñÚÔ+v,‹feým6›R)ÀóêëeÏÝqNyÝëð‘gŸEñ=ï ·7MÙl·:„sS)ŠC’pt÷n¬ýÁ I²šF¸°òÞ÷¾oÿó?Çóÿé?áKþ0àû€¦áCúÏ;/ÿÒ—ðŽ/Ÿù½ßKÜ+ì @ÞóËå°Õ÷ùñõ˜Ö 7Ü}©Òü鍊n‚·k$I‚¢(øÊW¾ÒÔic!.†ïûájy±äóùÄD¼ryï„+; <ȇÜêieÐ4M(Š’pŧýú A"cµ}I8£í2™L(PÐgÇ÷“eŽã„‹¢(áw o'Ïó`Û6,ËB.—[¬yL lÛÆgÿîïðú;áø>ÎØ¾¯{ßûð¦M8:8ˆôÉ'‡›`³ëâ²gŸÅ5—]–8N|(ZüUÍ΂ W©Iì¢mâ¡%µìtmI’Ó4Ã3‹AŸG!*®ë&„µ¸Ø¦( dY=`i„©Ãoô:‚©T*éú˜žžnñ/Î0 Ã0]×{®,ˉE)Çq`Ûv]í$ÁŽ<ù1¶%·ø÷¦±&…Ÿ6²ÆÞë Ãô"„ø–C”o»€Já­íjDaMQpð¿üüâ¿À•W^ 8žÃÑ+®H8ÒÜ=6†ÕssøAÉY(›ÍâS¦‰¯=.X%G ZùOû·xòÚkñÔüð™ÏàØ¿ˆÇn¸ÒW¡ bž{Ê)§Î»ï¾ çRêEÁ«2¿Œßwª9þ,äÉG´Â1dE q”s@èÂNîéå+lµhÅM›rÎQ­èQÈ›_Rj)<<Ë,ËJ ¶&''Ç%ƒÖ4 ###uMþãžS’$…)¡nÜ‹‰¼¨mqƒ‡ÄÃôª_yT¹\ét###Èçó¡ˆ#Iî¿ÿþýúËÛ¶ñÌΰƒSºŽó>ð<~ÒI‰0ÒÀÿFeN6ovëŽiêsI<¥²¦i°m²,'lƒV§›í¸ …V­ZUñÙŽã„™ã8áçRø8µ|üãâÜô4n±mH±yÓñ…ªª‰(=Çqð£RÞNÒès$I‚ôÕ¯â¼R;΋÷û±cPªƒj}v¿ñY±BœëºÈd2až7]×Q(¾Ó„@(<Õúñ)o K$\ E âÇSU5!¾Å©w•Ï0ŒEÝÿ©‚…†Òw£çñJ.—«û<5{!G¿Q.—C*•Â=÷ÜÓÔñ–#ß›ÃÀ×_ó|~Ý:|åíoÇúï}|îs˜ý‡hH$®æÙ¨Æ=#㢅½f³Yär¹ÐÃL×õ„çY« Û¤ j€XÕ0JnÊ4ø&!®Ö*µ½TóLسgOG>›a†az×u‹iªª"ŸÏ‡ãåïŽß)ëÐÐPx‚ ÂC^Q¤ÓièºF[QD€Šq:y®ÓxžSI0 Óml8BË•O.å e˜¦z‡¸nFúß¾üeœó{¿æÓô]j.‡t):éãÔVÛEJsðâvÊ!§ `ió±®§p%No‚'Ä‘|©c˜eŽ_%S‰zš†ÈóÖŽž)œC9`‹²ÇÅÚ±1 ðåŸÿ<‘÷Í—$@Q 4ŒSOÁ›~ñX[” –¤Ž®(!ŽÄ2ÇqA˜°~©Ph^6› B]© SÈår0M³B`ªE«óÎÅÝKÉ›ˆòÉõŠ¢ààÁƒÝnFW¹ð?€¯(¸Æqðä?üÔnÀÜÉ'cÿ—¾´¤ã’pL¹ù2™L˜ÑuݪùúºUYŒ˼jÍ0 Ã0+‡zûjM³tj‘aZ Í5;‘«|1H”¡ ÉT`-•J…9—© …*öB»{Â.á §–þ&‘¬RÚ*‚ Àú?ù¬ûáÅ ù¾eBÜÜÜn¾ùfÀàà vìØ‘xo||óóóؾ};FGGÛzN«±yóæ°øyÿ´JD˜­x-.¶‘˜¡iZ×’çÆ/˲ /‹!´Û#®—íx(pÊì,¾è"Œ\y%EÁMƒƒ-96•‚&›ß{í&I9™ÚÔ²ãnÛ0ÃÔK/÷Å S/lǽM¯´èex\єʣ¼ˆ«S„—RJÀO9Î㕚 …2™LB$¤‚k–e…ÅÑâí.÷¬j½jÇ6D”£UzL>gKᨠ)n*˰†aˆ”ëÖÁ)…øK’„Á+®@áŠ+Â4YcccØ´i¾öµ¯áôÓO=}ßÇÌÌ ¦¦¦§-ËJä™§?ÊO“E©¯(mW3çó’ð¹ˆä{jÙvJéGJ#àÔØûi“Bˆ;¾çx]ÒÒZ/˜››ÃÎ;ÃÃÃp]7qRMÓÄÌÌ †‡‡ašfCåÊ[‰nù|>¬¸ÔIoš^Y©v˜]UŸ›¤ÝâK¯Úñuãã0pàönÜI’0==|>ß‘Ïï%('»´×f!;îv_Ì0õЫ}1Ã4ÛqïÞpõÁãŠÎAáה˜œ92™ Òé4R©Òé4V­Z…t: MÓàû~ËD,*Œ’N§ÃÔE¦ibhh### ·u–eA’¤0UM6› #Ît]çÜÙl³³³a1•\.Fªe2ŒŒŒÀ4Müô§?mÛ¹íe;v ´ BjE¯$Ë2øéOtZ„ž–~O Êù­i>ýéO#Nchh©T ®ëbvvûØÇð|“““a?Ié·î¹çär¹0¶,ËÈår( ( Ðu=!À*Š‚éééð/ŸÏC×u¨ªŠw¦i†"o=œúü©­:Ï”Y:É~ì5QŽ·Tì=•±ÀÙÒ“+½—.;ž^úž|Í“u‡–xÄMMMa``ccc€íÛ·ã-oyKøÞáDZoß>‘âÜÉ6˲ÍfÙ‹"$Õ0Œ¾xH’ÔV¸^´c@öÒKñ_o¾·}úÓPë¨|»Ü¡Â&LujÙq/ôÅ S½Ø3L£°3ËW´Ÿ 044”ȹ,IRø‰Æ”‘ŠØ‘à‘Éd09Y_²*ß÷Ã0Qòp"Ï$%I =£4MÃääd(N•v«5§¬å­D*$àQx¢,Ëm-fÖKv@ˆnY=G…Ѓ ¥ÿMãy€ïž‡k¾üe̬]‹cW^‰¯^r >SJKõµ¯} 'Nœ½$Òk‰Ãx-)¢°Ø9$o9*ÚÓˆ£Å¦6U)¥ØÞ¸Yúˆ·|é1y¹i¥!Qšv²´}|Û8ziû €xP¤Àfß6 ÌÕ÷Z"Ämݺ×]w]ø|~~>|<11áááðùèè(®¿þúV|l]d2‘Àžo !ýêŠo|ðÁ¶¿×옼\OýýßÇ-wÝÕ·¿[«©•³ŽÔ²ãn÷Å S/½Ö3L3°3ËW´ß÷‘J¥A8_¥üj²,WDhN[Kxó<–eA×u8Žß÷Ãû###á‚vüØÅb®ëÂqœp[*ÐFâHµè®¥8vtz~ÞKvL‘:„nD³š%‰p¾çû·qò{ß‹MM!sÒIØÿõ¯ã À_üÁ@×õPl%/µZ6Ô)‡UUáº.dYF¡PHx\&pJ''fnÿöð¿ %³œ tRãïQ™„(´4þ¾ágCü(riãܲís?`9à鉧õ}ÿ–qƒƒƒ,媚™™aؽ{7aä[·nMl»>ú(öîÝ‹;vÔµ}5HYõ<¯î‚•¹¤ö333Ç¡C‡Úö­¶ãC‡aïÞ½áÊK#®<~×ýû¿ã»gœìä$‡cÆèGN ²ãG}´mŸQËŽ›±áÇ{ žç5mÇÌòdbb·ÞzkÛŽßê¾øÈ‘#¸õÖ[133“ÈÑÅ0{÷î =2ZM»ÆÆý˜‡Ëó¼Š# Yó<ŽãTM“J¥BQ€Âàâ4¼@ÙÝñq3v ,m|Ü)âIê[Ù:y°AI’BÛW%,`@cþz4È»<([:†,Ëan® *r´• cŒ>™˜˜Àún|<>>ÞÔ¸ÂEáX¦/ÕM"È2þÇŽ¸ïg?ý÷Þ‹w¥Óáïþº×½. Ã-/¼Ðmâß¡æõf—NU±ðúµ¯Þó!<Ò¤ÒÿZ¨ˆD´r Ø0‚¨ƒ!Â¥‘,Ј¬ŠOÈøø8~üã×ýÝ[^¬a||×]wÝ’î7bûöíØ´iSSûSÅÒ ú2Z;ñ}éé›6mÂöíÛqçw¶õsZiÇؾ}{Sû¦<ñ¡anÛ6Îa£[•Z[Ùq»ªÙñÞ½{>Ά °eË–¦í˜YžlݺÛ¶mÃÌÌLÛ>£•}ñ¦M›088˜Xñf@„&9r6lhËñÛ16ŽOûZ&Q¼€4M +-VƒBá|ßCïdYF¢(+>UE7ÇÇÍŒ+€¥;‰¾š¯ù¾Ši$˜ÐÜ”<Þh»rO³fÞeY†ã8p]“e‹÷†aÀuÝ0'Z/Î)¨_ë·ñq³ã BÇIcaݨ&AÝu߇W²)êg5MÃm·Ý@x:¾ò•¯Äµ×^›Ø½WçR’$áßyç}õ¼ä$´Å´=ò1ñ@‡Èg"J°'—N²ƒ(ô”ô>µtœ4"O9”ö)¯¿©"ò«ƒááaÌÏÏ×=>n™·gÏ ãßø ŸS|öBlܸ±éÕ>Çqà8òù<ç’*ƒ,ö#Åi§ÖÖÏi¥ŸvÚiMÙ±`ãsÏáéýûaÅVʘ¨BS¿BvL›ÚE5;nƆ7lØ€sÎ9§ï¼/˜ö288ˆmÛ¶an®Î$MÐʾxÓ¦Mضm qL£££8pà@Û„¸^÷´ÀXo”å¡"A’$är¹ªû;ŽÛ¶Ãjð4î¥×„yµ<σçya²{òÒËd2a¢y ·ó+ws|ÜŒÍÛ‰mÛ¡(fš&òù<,Ë‚ëºM‰©T*ZJá§Óe¹ž[5/#/»l6[UÈëõ4Mä­ÖoããfÇ>„¶Ó°ìhš@ž‡_œz*¾ùÍobïÞ½aÁ„òêµccc½Q”þbÝê«Ïy5ÎûüyÀ[‘ôB“‘ð†€-Ïn‰žd !R8áÑV«öCÉ ÑÀ4|xxsssu["Äc`` woÐÔÔæææ000Ðt‡VäzË+dÕég£ô‚û¢ý'î¸O—Žß+w{ºÉ0µ©eÇì‹f)ôB_Ì0K…í8 å%ŠCát‹Açfhhš¦Õ\×…eY( ð<étÓÓÓáëTm>•JAQA¾nFFF iZè]Dy´Ú¾Ü/,÷q…sÒï¯úišæ¢^Z4¿"7š‡æóy8ŽÚy;ç¦$<ó¹6ݶcßèø®(Æ iøåÐ2_ø^uÛm0 ’$…öE©¦§§{§ßÊ@ˆhqót ¼ObÚô[Þ‚Ÿmû.¶.Ž„8É¢ >ðÀÇÀìúYlALŒ#¹øef ÊóGÁòµ†–q333˜˜˜¨è`<ÏÃàà víÚ…;wbtt®ëâ–[niË—¡j,ÂURï`g%Ó vœ ù>þûïÿ>ç?©‡é.ÎBvÜ©¾˜a–B/ôÅ ³TºiÇqÏ/š”QX¨išÐ4 Š¢„^@äyÏÉÖÈv”sh¡|oÕhd\:99ß4Í0}üü:ŽNJUU …7×uC @è5B9¹è䥧iÒétxÌ•>[®ãŠx¥Òl6[!b©ªZÓ›ˆr‘†VÄ5çû~h3tÌNdA³0Ý´cbŽ7]zÜRá8€m‹j¨ù< (¸ü ñž÷¼ñQ±y5[î::„Ho)’&B¯³—Ýû2üð¥?Äů¿OþôI<~Úã9©\ÊÀàmƒxð¢+?£ük7ZÚqŠâðáÃÅ;ï¼³øä“O.ºíµ×^ÛÔgèº^ÌçóÅéééN}­¾¡P(UU-jšÖí¦,‰fm£U´ÓŽsÅbQ-ªªZ4 £8;;ÛÕïÊ´nÚq#6|çwoºé¦®µ•é]ºmØñM7ÝT¼óÎ;»ÖV¦·é¦}tblÜ Ð4N>Ÿ/jšVõ=‚¦)…B¡X(ï†QÔu=ñšªªÛѶù|>ñÜ0ŒŠÏŽï¯(Jqzzº(Ër_ŒÇúe\Ñí¶‹Åâììl@QQ”¢a n«ªjqrr2ñZ>Ÿ/ʲ\Ôu½¨ªjQ×õâôô4Ï?[@¿Øq£c |±XD±Xœ,‹jéCÌÎoÚµ«888΋ÅbQÓ´þ˜ÛçŠÅ¢R,ã—ˆZz®•Þ/Šç×\vØ%—+ʲ\,ÑûÓÓÓE]׋ÿñ¾ÿ(Þrå-ÝþVUiÄ6Z–#n1â•JÚA:†çyaòK¦:ìM´4ÚiÇ€mŸúž¯j1í£Ý}1Ãt¶cf9°’í¸‘pÏ `ÛvE¸X¹·EÄCY§§§!IRèGŸ½˜—›¦i°,kA¯(FÐovLž“äý¸ÂLPµS _e–í´cÂ1+eÙˆåA€L&I’pþùç#ŸÏ‡}’®ëÝ럨ƎRåõòרh‚(çyª•Lø·‡ÿ ¾ïömì_»ÿrç¿àgOü ù±¡ªª¸Ê½ª÷¼þš cB\;q]A°Çô-6„÷¬ûÏÿ\‘È•a†a¦Ÿq]«V­ ŸSè(…ø•¿G¡¯€Å\×ÅÐÐP(ÞAØ“ÒT*† ÒD­üuI’ÂcÖBÓ4 U|ÓŸxž‡ yëÒ(÷›eYaî7†i„QñÎz 4X–Ã0Âü…q' JÐÕœŒ&Dxi|ÚêBäd›E”×¾¼`‘(§U9ž$œQ¨ØÉ¯â×ñ¿ó˜}íl"WãrÉÛ¹l„8JÞÊ0ýˆ ଱±eÑ©0 Ã0 ꪢX,6ü^üõ\.ß÷C!­šˆ"I&''+rFÓëž'\8èõò¢qÑM–åšíbúÊùˆß¹Þñ¶ªªH¥Rp¹\nÅç dšÃ‡p¸˜]tCð<ÌüÑÁœšBp“““aõèt:Ýy1x’â ¼ÞdñºQ¿´…ÈóMBTx!!Ô)±“Bh@S5ض MÓpêU§b×±]ÍY.bxß qd ¼bµ8žç±XÙƒx}Ô#wܱl:†a†a˜VRoÁ¦Zc]%\y'›¦i¡‡d#‹Þ²,‡@X„cšÅ¯wÃT Ð4ìDᦛN§‘ÍfÃðÔB¡ÚcÇq j€ÚœÒ_\ˆË•^+ÿòré¯{?ÞUǺçÅtå’. ï…8RLÙ“hq¨¤6Ó[8.ššÂ3¥ê] Ã0 Ã0 Ã4iša[6›mjò^È0ÍP×ìζU…¯ëpÓiÀuñùÏ×\s €¤8ešfçì’ª›ÆÕÄ ô§Bx”Œ йU ;µyÏÑɈ?_Áô½ç8{ÕÝ|Xˆë=càè2Q÷†a†a¦›Ø¶ I’8jŠé*.êÔœ, O~ýëH§ÓøÍßüMlݺ×\s \×…¢(á\^’¤ÎF¸ùÂZ\÷ó &°¹Òë™ÒólÙ~$Þ•Oq9@pR·°({-]ÀËÅ•s¹àØ>7‡Ûn¹…Ɔa†af‰PÄ@cºM€J*ç##8xñŸô=ïa¸òÊ+C}£¼¨Œ,Ëí›3¨Œ£•)‰¢j©ñ/¥¢zü-mÏRMUúÚ#Î4M®”ڜ۠÷p¬þîw¡( q Ã0 Ã0 ³R©pH)ÓxH¤?«Ä¶1õ†7àšü ¢H[CPMT†‘:‚Zü#•ØP!¼ßT$Å5 »$á%‡Òû:Dh* qUé[!.jÙ|Cê=\Êßþ-tþm†a†Y¦8GÓ4(ŠÏóx±˜i -Åi‹˜^ÁC²ÆAºŽmÙl6áǪªíÕ9”RãâBœ…UC¢òi-q­ÜõOƒ瘪ô­GÊ0ýŠ`pj*,GÍ0 Ã0 ³q]Åb±âõ 8m Ó2,Ë‚aÝnÄP­ƒšïK\×MDùyžUUáy‚ hÏB…!ªUÃE”ÇÍ,=¦ç2*<”mÿòì3U“¾â|ßg!®Oð BÎãPâJz©X$~~☹ùfLrY†a†aV ¾ï‡q¶mÃqœÐ „<æÇmÛDšMÓày<Ï ÷W…#?V0¾ïò,àù!Óu‚Øßb:”mÛâ1õemíÓ‚*¯y“r ‘¨ RI\4Þ¶„Œd˜+“`Q!nnnÝng®ëò ·IDy!••‰i[ÊÑè¢òš¥B*^l_:ý¯V¡XF2¤Uúœ ¶=…ŸS[—Ûmõ_žgº.$¶c†a†aZE¼Z]92¢A˜1ø¢ •[cŸø6´ºª–=/GAÅ «iš‰çº®#ø¾×uaÛ6 …<ÏC*•‚ªªð}¦i†•/S©dYF°, ù|†a`dd$…ëºÈd20 ƒs‡3=‘©aq½ÊqLNNFFFÂÇÁ…3%Ï8êÛãBAµKªî’k qÇîÝ»1<<Œ©©)ìÙ³sss„eYîv{DùáV2q¡Ë½æ#©ÂWÛHz£É±÷(d\‰Ljo/Ãhg`im$ò‘ðæCô .D'@thëÞûÞîœðãxÕ¡CPxÕŽa†a˜V⣶¨F«œ¢ð„Å„¸ø64HSËžWÛ§¡ëºÐu’$%r#Ù¶ r€ðv¢ô4²,‡Û‘8Ǭ<ÇA6›…¦iK?ôˆI+iŠ; ªjžO¹ S©T¸øÐ\ˆþY…Q5!<ר ],º›Åµ–°fff°gÏìÞ½ƒƒƒÃ0 ª*vïÞB¡Ã0ðÍo~³Ûí nÖËÑí¸|ü_d ®ÕcÍãÅLÍ)©÷VF×ðR‰c¡k{Ïßýð¿Ñ‚Oì..€ÕwÝÅ+· Ã0 ô–j¹zÊ©2Tƒ¾¾Èó¨ÉB€D¾ÜxÞ¸ àºnøúrœ0C^‘žç±'ÓSK<ÕZÇÁW_ ûƒLØ®$IíáHh‹CÝmÀPéý¸jèAˆó€cêf €Ðnll €æfff066†ìÞ½ãã㘚šê ¯8×uûnÕÃFrÁ0îñIÂZ¹$S.råEpjÿþưõ[ß‚ô¾÷u») Ã0 Ã0]E*%,WUAÀóÄH™<ÝHÄ+oeV.ŽãÀ÷ýΆò1LPj¦ªk¶¹íÛñ¶÷¼Ùl6tÊ0MÙl¶õ"œ‹(ç[­5 •ª¡ ÚȘššÂ®]»Â …†‡‡¹á¶nÝŠ¹¹¹n·@ïjp á$±Œr ‘gZ\XÓKÏ çdÿ©þ‡låéÇ{ÊŽ{J¸LPEÙò׉åêË0 Ã0ËU«V%žSô òÅ¥R)d2ø¾zÄ麎‘‘d2bœÍfC9feÇaO8¦g©:ÇÀ²ðÏï?´—¿<áXD}¢ii¥TIBS Qh^|Z¥¢v¾Qfɬ€ÁÁALMMatt‰Ã‡w»­!q7ö¶"A­Ü«Ä65ö\B”/E3¦TÆé ÷4hŠßXÊ“’f2d³YAMÓN§aFxÍK’Y–aš& ÃXTˆ«%à1 Ã0 Ó9ŠÅbÍ÷(\>Ÿ‡ïûP%,Ö“““¡‡ÝÓUUMÜÿÛšO‰é9LÓ„¢(‰pf†éyLÐu|×÷+„6ªœJs–ABqáMG¥g¢2O qmd cÿþýغu+áerË-·„c~~¾Bœëíît)šr´‘G‰Éõ”"f˜ZPnä~ZUr' kéÊùŽ—f†a¦&étªªÂuÝŠE4^Tc⸮‹ééén7ƒaªâ¡FúLצ§á %¼9-Ë …8Y–[;g!¯!Q±žjÉæuT&‰ç £¶²vìØ™™™Ð¨r*ìܹ333¸îºëê:àøø8FGGâôÚÌÌLø|pp;vìhªÁ4Io%NJo „Ýih¸è³Lh§NÜw_"q/Câ—ã8 ·i]×C4Û¶A(¸Ñ~Ùl6ÌûbÛ6 Àçyp®ëÂ0 (Šºa+ŠEQªV>ÓuA`õ]«1pÉ@xaÚ¶Á£ƒ¸äÀ%ÿDƪ?\…»~z¼ÿ²R<ðÌ?3ÃJDtlI’ÄE¯ ¶²n"Ráã8ˆnjD"C¢Ä/ Úè”Þ×KÛw0½e¹·²/f˜NÐî1Ãt¶ãÅ‘e9ôˆÀi'z”^WX–ÅöÁ,‰NØq…”満ªv/ZGBTµ|nSO±¦¥¬¡ccca±†8cccùâj155…½{÷bpp01Øp]·eE‚ X²ñÒÜ[‚˜g{scbnÝòÓ.ÚmÇíÁñ`xÄ‘ÐFBšçyeŽã@UÕð?•׎‹mÙl6ÌÛ¢ë:2™Lè÷ÿåð¥O~ w¾ê81pB± D•žI@‘(j麶I­Rͤ¬üˆ Và–ª°™6ˆçȯ¼ñ•¸D»ÎsÏC>›Çáׯ?ïþg¼î†×áÐ=„[7âµ{_‹ù‡æqâúxñ´ñì‘gqøéÃ䊭`ÃÔ¬Ú¼ ÇÖÖk¶ºŸ‘Ò"™iQ²Ó¼h/lÓ‘ÎØvFÛ×jvÜʾ˜aÚM'Æ ÓnØŽë§åž LKé…q¥9áPd¦YºfÇ® (J8¯ŠCÎPm…<Œ,T☎³f± ê GÝ»w/¦¦¦ªt˜˜˜À¾}ûZÒà¥xÄ9scѼ¸¼R/³²é„èlžÃrâIƒ €ëº¡×š®ëaUU!Ë2²Ùlâé8F:44„ééi(Šæ9˜œœV˜¾ô¾/‰‹NÎý»sÅÏCX“ˆ’‚š7 ¢<¶±Ÿ^:Y¢©Ò>"ÖQo::æ†n¦Kûd€u;Ö ‘Ð’pñ‡/†?â#P<³óÈŸ’qóΛñÊ5¯Ä™î™ØzëVœ¸ï6ïÞ (À¬:‹©US°Û¸Á¿AtJéó5Džqñ›Z¶ôž„h•Iíy¬­¿q-;ne_Ì0í¤Sc †i'lÇÌr¡Æžç!•J%R0L#tÊŽ«ª¥t:N*…ééé°J*9@´Ä¦ÒÿxôM#ϲ¸—A1ä$ÊU !E´ßמ!Ž[J)"OÆVÝ `v%÷»>z¸9ØŒUÁ*\~öå€ ¡P(ÀölÈ8Ž&kÊ{”.¥´©fǭtjLÁ0í„í8‚rÄ:Žd¢ô­Âó¼0ï,¡(J8Ùu'ó”ãû>2™ t]OT2ĸɲ¬pÂlFk+ö½0® t(+íÜ3­£Sv\«W‹ç‚#ê—†ÜÇ»°rçQr`ºÎI€PÇÇÇqÝu×…j0‰rKejj 333¡acïÞ½ îsèÐ!ìÙ³SSSKþ|B„Ë!rb¢Ê¦L055…={öààÁƒ]ûüFíøàÁƒØ³gOâ5Àê»îªš­]ض @tþtP®ë"›Í†EÈÝ(’pþñ ¼AÈ Õ. ˆ"÷fIO6òvS Â6eD!…pÒ§ŒÒvùÒ¶ùÒû“¥çˆíOÇ&áo¡•©´ÿ/xI’ÜÈ[P×uxž†÷Ò9¬Ùñ¡C‡ZÿC/B36üÐCá;ßùN…3+›ññqÜtÓM¸÷Þ{;þÙÍØñ½÷Þ‹›nºiÑ형Ǟ={ðï|=ôPG?w)cãñññnž²¦I¥R$ ³³³(‹Ðu©Tª¥¹ŸilU(P(Ëåà8<Ï LÕ‚rÝ–ßÃI Ëçó(‹˜œœ E¹^ ›ããfì¨>>®‡  ë:‡//CÆÇÇûr|ÜȸÂ÷ý0ÇvÜ!£e" „¸Å§ š§1-eïÞ½‹Åbñ¦›n*~ö³Ÿ-÷ÜsOñÒK/-6Ã¥—^Z¼óÎ;ÃçO>ùdñž{î Ÿ>|xÑc_{íµ5ßSUµ8;;[W[rÅbQ-‹ÓM}¦YÈ6ZI»ìØ(‹Ðõ¢,Ëmk{¡P(NOO …Bqrr²hFqzzº899Y,‹ÅÙÙÙ¢¦iácÃ0Âm*«”OÅÅ”/‹z±XÌÅFÛ¨¥m¦‹Åâlé±^Ú7_Úv™CçÜ0Œâììl1ŸÏ×ܶvÜŒ ßyçÅ›nº©#meú‹NÙF+úâ›nº)q †‰Ó ûh÷ظ×ÉçóEUU+^ÏårEMÓŠ¹\®˜Íf‹š¦UUMÜ?ißøë“““Å\.W4 £¨ªjÑ0Œb±(Æ?埣ªj±P(T}/Ž,ËÅééé¢$I‰ñP¡P(*Š’Ø–Ž×Kô˸¢Ù¶f³Ù¢,ËuÏ™þ¤_ìx¡1ÐtQL‹ÊÑu½˜Ë‰ Ñôôt1—Ë…s³¦ª¼¦ÖùÚ>–Y˜FÆÇahj¼C+“ $ŽOTÛ,õÄPS4[ìùÆ,VÙñƒss@)[«¡B …—RY–¡ilÛÝú%I+1Ú ˆÕ ‘{³‡0×2+-”ÏM†ØWBÒ[AäÅÖÁ*¡Ý„Vºhe‹Ây«¹¡wƒvôÅ ÓiØŽ™åÀJ³ãZU5Mƒiše¶m£P(@’$¤ÓiH’Y–ašf˜˜?•JA–eA˲ÏçaFFFÛËÃSUU…ëº ¶>OÓ48ŽÞ·Ékehhš¦…Å«˜ÎÙ1…SÑ0†i%­¶cʲbÛxò5¯ëºÈåÄäˆ ÓØ¶ÍÅ¥Zü«('×tƒû2gÑb Keÿþý8pà@ò:11±$¯Ç<a_“íþrÌŠ¡Uv|ï /ˆÔiZëÔ©t: Ã0ÂÎݶm¨ªŠ\.XãÕ¥.ûã˰^] h"±„7"V"AŽD8€«î”(œÓM•Be‚ €ïûÝ)YŽÖ÷Å Ó ØŽ™å@ÛíØuÅßBè:P¾(X&`U ªâ/ŽmÕ PU;~”ÒAl®‡÷G]×CáLUÕ07m¼’»,Ëáö4Ö©}:>Žã„);$IJq€HGAbišð}…B¡k÷ó^¡Sý±ã8ÐusÃ1m¡ívì8ø·çžKÌýh>ïß&€˜ËÅ/ QÎlIǦç…8*å§ü9%8l„;vÀu]ìܹ8|ø0,Ëjø8€˜È.vÓ3!쯅ÑÖ Ó2;žY»©X•Ñ¥@Æ\.I’BO7ª„J+À” ŽXÿêõBP#áMƒÈÅæ"Y¡4žçr±M×ß>F@çÞ÷}X–…l6‹_ýêWoG+ûb†élÇÌr ív,Ë•‚Y9Õ<‹Û§š°¦(Õ__–åª é®ëV×ǽžhLC¯/æFù[ Çq‘Õ°mŠ¢„¿ïûaù¸ Hm5M¶m‡  +•NõÇ”W‹aÚA«í¸bIÀ÷ñýÇ}9MТBSHˆªOäwÄᔊ=Ë@¸`NMM%Š#ŒŽŽ6U,¡üF;00€[n¹%,<<<œpÿlÓ4¼ÛˆÂQf)´ËŽ×?üð’ÃRƒ H¬ØÒJ!…cd³Y˜¦‰Wü*¨Š UQ£²Á€è°ÕšJûˆBQ aŽî8¾»(Š‚|>ßQÑ nǭ싦S´sLÁ0¢ãv,Ë‹z£U¥™Ë:¼Â(5îõFUTiü?G®ëB–åÐcŽ„5s1½*P¥ÓZ8ŽY–‘™L&cANšã¬ÔÉnŒ+ü6¥taV.í´c1í+€ÒBDÜË6Þ§5–JÎ e¾¡¹°bÒõk¡·»lz+òÎyžWsõɃÐ&ÁšÓ>–bÇäx¼ÔÁÄÈȦ§§Ã|&€4ær9ÜõÝ»¿ÔÁ§ „5êß-DD}ˆ»„†¨3Ï—¶§ heG\´ Ã0ºZ…´•9@¦[°3Ë•bÇ’$¡P( N‡ãòrÒu¦i"‚°²ªïûÈf³$ ###Èd2á>ÙlvÁPS×u±jժ𹪪Èçóð<¯ê{”.Žªª0M†a@Ó4¸®‹¡¡¡°íå)?V:í´cŠð`˜vÓJ;gzž( |Ç©*Þ/©b*… bΗ…Dè5ÂéB q=J"GÜÜÜÆÇÇ155…ùùy cxx¸/:À „ŽÀ"Ó«øàyÎ?¿©ý)/ yÁe³Ùp ûÏ_øgÀ%.>Bª RÿŒÒã "ÆÜ93 Ã0 ÓFEÁôô4<Ï SÍÄ'¦ªª†Þqñyåg£cжñmâ¢X±X¬úùªªÖ|¯MÓâ\.— ÃU%IZñ¹á:I­ðe†é \SÇWh)M{ÂÕ"€H9B4ˆyÓ³$rÄíÙ³›6mB*• „¹ë¯¿ãããÈf³] ÿ ŠFÕ0‘Œ¤c˜^„4±f^¡iZ˜”˜rŽýæ§~ø0¢Ä¢ê¦."¯¸&ó2 Ã0 Ã,•…Æ@TI°‘}:E­¶1íÅqö>dúЏS|î† P^ÿúÄ6K.Ú§–þâÏ)Ò)ñA`/¥f ·={ö`×®]ؽ{wbƒ±±1ìÙ³×_}W"×*Ôà–þ¸B*ÓëøN¾ývàê«Ú¯<ɰmÛaÉë|>/.ꌩø r:¢U \Å„a†a˜ž‚ñ3Õ Ü|,€ö>’YldD)¦éW²e¾¡zo´{NxˆR™=¼m[·¿bKˆ‡¦þ@Q —ÙpËdˆ“[.º`!®‡Yããã®á‘Àв,\qŘ™™iKYêz Ä­åØ`méO=õTÃûQg­iZèW‘Ó„. "FÛ.=6¼k2 Ã0 Ãô,´0Õ°,‹½áÖâI si2F•mi;ÐH»¡?bnM2Q|["ú‘ž“ ¦Ä^’õàœØg±¶‘'#éȵÿ±Çº}J—L|"?\Y¸=€E+9/ ýHôC©ˆòÄÅ᮵§Yˆ°Ô…btt‡îšTÞ¨ýÒ_ïg°ë>žç-¨¾S¨ãJ­Õ |¯œŸox¿x…±L&#¼àqÇ‹ÿ¤iˆB ñ»À9à†a†a˜¾*¥ö£HK"ͨÂbmcè=bïA ÕØ1È«Œ¶ñ‘Ì.Câ– ‘ŒŽGû† dJé5µ´- ijì¸$ªQ9Å„§£½ÚÎø2âÂó#I@>ŒŒ´þCȵ0þ¡A“ÇbºÆšz7œ››ëv[+0‹Á^!P‰wJ‚ “™J’¾/÷íº.&''á8N¸¿¢(‚Žã„ÞU¤ÚSÂÜ~¼ö*¿xþy¼ª ¸L&I’ÂÊ_!&DÁªzª"r-ÏÀ0 S‹j#n†a†é‹9t’¸×˜ûOZˆ(ðD+="]$ˆí¯ ÍTDCx*n)CrÓˆÄ2‘&×h_«gkÕÎÇA‰}è⦠šqN×õÐÃ-›ÍÂq(Š’ìè½x{]× Û‘Ëå*¼ålÛnm¥—ºgŸmhP‘N§‘ÍfÃßÛ÷Ë*-Ð(Àð„ã;$Ã,OÊ( ½&Îá.½_Ë{Ö]öú —a˜žÄó<˜¦™xMQ”pìJ•à«áû>2™ t]O¤äp–e…]Ã0Ây‹“Íf¡(JÕ÷4Mƒ®ë‚¦i†ãmMÓ`FÏB+‰n q”óÌE2|SEšI‚š‡(  j’·Ór¹8Õn»jÙ Ã.½Cùp­šGN4Krvá}Y°%½ÇÇÇqýõ×cll,Qubb{÷îÅîÝ»»Z5H†¦:XxÞÑ \×…ªªp]7ÂlÛ†¦iPŽãÀ4Mär9d2hšz¸‘Èåû~8Xð<¶m‡«,Ë *èñVÕ®»h•–øg¨ªŠ…Žã@–e(ŠY–aš&EeY˜œäRõ°é…ÚžÎuÅà4aüôÓè€a˜þÈ+É!­S®ݧ°ðJçb,D3‘(Ÿ,tkÈÄÞ/%¤Y3¿kŸYÛí³Ç0L@ º”ïË÷}¤R©° ¼çy5÷¥qpyôF&“Áää$dYF ›]×E±X¬z¼…ÞK¥RPU³³³á¶©T …BŸSî€Ñj⡞”ÇŒDÊð¢#*F”þË}_mnÊIV.ÕòÃyž×uëwj!ãŒÓlðâè2` d¸þúëñ–·¼£££€Ã‡cff»wïÆØØXWJ7q ß È3‰D5Ó4¡ª*|ߪª"“É„\:F¡PWÛÊWöÈ#Ñ0¢uEQËåZÖîfŽE7ÁZžªª(˲’!” |³³³ ­~­†åî³!ŒŸ’LÈàj% ÓË8×kùzÅæÒk ¢‘½…h°Eݵ 1ú§XâšOAÌ D"-ß—÷ ˆez*æBí’"õ/Jé±[ÚW)=6Km,gÞz&.:tQ·Ï.Ã0}-.ÇÇöµp…B###aÊ”òüa’$%"8šÁqH’”˜«ªŠl6ËãÚ.ÐŽô86ÄmŒò¢Å=Ï82Ó (Ô® ”¢áâ‚2é E–ùˆb— ŠWfúš0GÜàà öíÛ‡©©©PRU£££]-Ð@Äo®”À²ÓPn5º9ÐÍ™Â< ÃÀÈÈH˜_MQ”Ðë-~ÁU»™—«åý…Â’W}g×uÃp\.K/úгgfêTAzT:ŽCòf‰5ûÓt¦ÿ¨"JKéFm)®%…H£åö4’1,Db˜!Dâœ!´9¥ãi¥}),=€èD›i¥ŠºBô£òjA©ñ$4(½¯•ÚJ‚ßHi» tü‚8Æ‘MGàððz¼¾Û¿ Ã0}€ïûá©ER [R¥xÇq`F8f‚¦iPUµbÿ¬òˆ’òvèº^3RÓ´Ší™öBs¨Và Zcò n…\èi' €¹¿ù|þÈø‡‡s¿ BÝ áÆ«n0ˆŠb ÃÃÃ]ÍW‹ò°ÔvCÚ¶ UU!IR6 D7m Ï$ñnîét’$U Ó\îîìå EQÉd`dYÆÈÈòù|¥ÕŽì¢=ÌÑ£G!¿æ5‹nG6®˜7 !É;…a˜öAý“‰¨Ÿ¢ AÙÒcÚ¨û er¶å]3JÏÉûÌ…¸‰yH®rÒŽH“Kÿãë^é¹!Ú©ˆ29K7IØ(œ‚ä̃>+[å5 É2kô˜B¦/°‘¬ÀØ ²X|θ ÌmL‹»$ÄÀääd˜_Î4Mø¾ß–RZpf:¥ïi2{ˆn]t›+_£b˜Vöž‡§wìÀtÌ'‚Ða¥!(}-ȸ™¾§îª©”³¬”‡¥Ò\£ÕPn˲Â|rJyÛâ•FÉk @ç]Ö-,ž½³ïÔZ¢„ jÙ¶qÞè hZÞ—”¶Íår€ȪŠp©T ΃ÂÛ Ð|MÜ)'‘ü1‡ <.jÜ%_ö«—uö<·À±cÇêòˆA¡PˆVoˆß›ˆÄ1”^£Õ¢ß!¡KC”»Ä¹¢Ù.-dPQ…ò…Šv%6¥ÌÒ Ã0}Dµj‚q¨HX¡Pÿ(<•B¼Êi8ä+;.ÆÅó:3¡™°Tʶ`A¬=åÀ"Ó=¤ÉIü|Ë– ;¦¾«ahQ—æ‚û– u{Äõ ¢ÖÍõLÓ„$IaQY–áº.<Ï WÔ:*¶QØS¼þuüã3ˆ’sëˆBŸDáŠ4©4)èÄä|›ˆ¼D™%irIjgQ®"%Ö¾!Dn‰fìóÉsÎ/µ•ò ©€r@Áï8Ѐ\ÒÀ»Õw‹ Z÷.—ÚIKZ³HN 7·Äoà7:÷›´ÓkTé*§"œ—ÕnV(a˜^e1*8« e^é=êCiÁ   „9êGã^j¹*ãó4ï$°GÃ0 „gÙªU«Â窪"ŸÏ‡Ëߣ¼pqTU…iš˜žž†ëºJä_¢ª¬‹ÿ,@JËf³( H§Óá1)Ï1 qÃu݆æ^´^o º8ÃtŠÐÆuñøi§U, xž×\È5izÏÕ™Y}!ÄÑꕎnF‹ Wv˲ ( TU )ÐÛ¶+\çÛF\ #(·U¬«6‰£0+¼( i<û¨Uz?!šÅ'ˆ»…€R…>§ô—G$âeKjì³g‰{@Ô9×U$o’·ÆÒÉï1 ”¨Œ½lÄ~Õx®#ê¯râù§Ø—BܱcÇêÚÎó…ú,+ˆÂL©ŒšÆ½Où:e† QUÅ‹‘ ½WަiáD6—Ë…c{*F,t¼…ÞSÓÓÓði•”(Þ°Ù$ PB3ù¼œÅÞgVåvܪ¾¸§ 4A ÄÂÅ4¢<•ñ²q±Ž¼ãtD}u|uq6ö˜/¡®ÒÎ1Ãt ¶cf¹ÐÎqÅb"å‚£[<òfš¥ÕvLécTt$|¿äxÒ”Wœƒä¿ÑbLÏrŒ…!q ý-ÆÔÔöîÝ[á]gš&fff0<< Ó4-W^%t]ªGœeYð<™ŒÈüoFs±Ú”쓌hRg!š(æ\ ÁŽr´@Lø D¹Þ4DõßóˆÄ9 ùDÙ YÈ©¯ÇãʳÙ,‚ 艀vØñ|ÞpTÌ÷}À4Kb„=qLF¼žJ‰ÿ™ `ÛɃ˜&04X–ø«Ee7‰ž'þ×M>¯v<ËÛ¹®xÜ¥*ËŒ š/µ/î f×â\¼z)J©J)u)yD… Hh㼋=M»Æ ÓIØŽ™åB»Ç‹U¨¥¬=:’A8 Óí²ãp;9Y1ŸUUµùÐÔòéZ<ÚƒékÖÀÄÄ,icïÞ½˜ššÂÜÜ\âõ©©)>|ûöí)Îz=‘—P£2M*•B.—ÃÈÈr¹\}Âiµ„+¿´M¼˜B¹›(LPÅrÓ„ÑF$Ôi±Ï‰_TT•”ù”²|€IDATTïp·QU¶mwÕ+®]v¼hßG.›Åç?ÿyœûû¿œy&àªQžª³BÜ’e@×ÅÛÿãhZôþȈ朒[Ðt,­ëŠ÷ ClÇóı=Ol²,¼ëh?E²Ù¤·]ˆ¿ø+òùÚ^yÕÚ¯(â³HÄc¾¦¨fÇ­ê‹;‚‹p á;€\­Jp)O¦½ÐÞe Q¶\ ÑÂÓw´{LÁ0€í˜Y.tb\áy £º»1}*€5¦y:5>îǦ÷Y qd¤dõ066 2¯ÂÄƇ‡Ã磣£¸þúëj¤ïûaÊŸF¡J¨³³³õïä/ò> *¦@”h‘ÇUÕ $äå…£ýª‰ã“M|Ñe‚¢(‚Žã@UUø¾V­êí²ãS}´ö*ŸiâÅ¿ú+çvpŠæ{°÷‰îÖ:HÏæ€tpײ(9q<]5ª…DÝuâ—q]øë×Czáøgž ÌÏ#X³F<õ«ÃÊñ8p˜˜€23#¾óúõ{ ÒSO!†ôðÃþà Ÿ{nt|²×¸À¼Àj)]þåÿC|¿Ò›´ƒLLLàÖ[oíÊg7cÇGŽÁ­·ÞŠ™™ìر£+ífz“½{÷†éO:ÉRÆÆý˜OÎó<˜¦ Ã0^&®ë²¬0dËqœDø–išeŠ¢T¼Çh\Ññq3v TŽ‘B¨ÚuHµæØny311ôÝøx||;vì@| @…-³·óÊ`||?þñëÞ¾íB\3lܸÛ·oǦM›pàÀá%…úóÃù¾×u1uË-øƒ/|Aܸ)Ì|µ²7÷!fŠ“³Ç¡ÒëñSÊé@Ì UD~Ò¢|CäÁ$ÅöÍ‚+œ4@>/N" ssssØ»w/Þô¦7u»iu300€í۷ðîÙg“oz^ò©ª8n¿]¨.:›ÏÏ0D^¹^w£VÕ(÷\:]ò¬"f³•B[=ç@×£Xºa.$x(J2Ü—!êe2"4—Ϋm㬿ÿ{üÞÓOãÔÕ«»}6ëbÆ زe ¶oß^õýÂ\5ˆ.”j‹±ÿñÇäÿh—ý7QŠæ?È«ûr ï™-€~°¶ÙoÏÂ9o3²_¹ú¾ÓØOcä·_ãßVAy' Ÿ ¤žÎ3ÏÃøéßÁ~/üÉIx¯z´[o…ó[¿ý{߃ý¾÷A*µUâ ßþ608ï¼ó NNÂ=÷\``@|¿õë!­Y“ -.¾ÚƒÇŽÁ_·î@zî9€üì³6o†ŠÈ лðBdßÿ~ø'N@Éfá¥ÓP/» þÿú_ÀÌ ì}û †E³-úm·Á¾ür(SSðQxë[¼ímð·lù àºH«*&Kߥ }©:ÿÖ­[±mÛ6Ì”ÄÈ^gÓ¦ML¬x3 lß¾GŽÁ† ºÝ”E¡±q|ÂØ/AÏóÂhÂqœ°P€D>jÊç¬ë:\×­+WõJdÓ¦Mؾ};î¼óÎn7¥nh|§–7-î±·¼¡~mr²?ÂÂh|W(`ÛUëšÊIÔ‘Ûˆé%†‡‡1??_÷ø¸íBÜðð0ÆÇÇÃçSSS(Mrj±qãÆpµªŽx¨¿HˆeY¸öÁ±eóflúøÇ;ïÞ4äíòÕ °:¨Šˆ“ÊCôð÷¥€ß°ÀïýL^+yÀy:`j¥ÄŒé.°eà»Þ*A O‡…î ¾/þTµvµKÛ®L®O;D¹øÉˆã‘ (Éú”Ol±Ï)ok¾¬N‘ç /$@´=îD碊w‚ ¤YÀáDZgÏž:­ µ4cǧvFGGá8vÏ=ÀK^}ït:á-æû>$I‚äK®ʗVÚ …•vÑæn4Mœ»j¶»TÈ[©üÜ4s]¯lŸ®c®ã4?Þ³ÿ¥£'®9Þ°aÎ9眚Þ¢äÉZJb Ö-äÒ6$0iˆ @Y²ôE¯z)´ïÝûM*ôwm‡òÖƒb>徫‘[}&à¹0¶HÀ‹ ðléË2fß»89ú´™x'Ÿ ó@nj Þ…BY½^& €ý¾÷AEäÀlŸ~:ð;¿Vºò¡”ÚFuuDuTÞI'ë×Ãß¶-ΦáQÀY¿>LSGÇÁúõHmß@ðÕ¯Bžš‚4;‹Ô¾}¡ƒu¼†O€{ùåÈÿìgÀÅ ‘rr’ÒâARUhëG.íðaÀ÷!‹°Þô&èHzf‹%§pªiAÎÚ&¢ûl|ÿr±mÛ¶Š¼W ;Þ´i¶mÛÆBSÁèè(8Ðq!n©cã~DQ” 1­¦øRášåZA `tt§vZÇ?»;¢ñq¿Ê܃ê0õp­9¦E bpp7ß|sÇ?{)ããŠq…ãÀW”D¿F‹ uyÅÑÀ2îÔ¤†ÇtžááaÌÍÍÕ=>…8ª B>>'ê MªÖ JŠ800×urÏô}_¸¤£Æ¤€„£’eÛ6dYÆ%tó&O8ÈD!ÀÒþ4’|¼)ÏÙ’@´Ù¯¯ë$19¤Š©*Ym8ÀH[Qâz z2üT*þ#!Š’ñWËE!rñ×u=ͪ ñ %Ò'E¾–÷S¢XÕ}r9ñŸÎ7}úœjŸ—NGâ  m)´¯G¼´–jÇ@Ì-Ù4ÅïPún©T Š¢@UU¨P…Yèì*ˆÑ'µ°Ék­Ù•¥L+l˜ðB’ ;Þ‹/B¹÷^¸ç‡|@Þ²:å_þšeA“$(«/ô£_ÎS‘Û—œ. š|ðŠÿddá¥ü-ñzïZFö¼óâ_¸òý²ç Yõ´åï*Û/t%¼'`nÚÿ…`Ü~;²5¼|U¸øâðy¡ì8”õÀ.}/oëVH'Ÿ ïá‡1ûÛ¿ ç¯ÿþgÀƒXk2!º–Âñ›~'ªä":¬Tù})ƒ`xãFtƒVÚ1Ãt‹•jǪªÂqhšzÇ•‹s™LŽã`ºšÇ9ÓS´ÒŽ}ßOìkAÜXŠeÚM;úãx:¢†Òˆl@”‹Y–¬„ =55…©©©ð­[·&ž7Ëàà víÚ…;w ï ×Å-·ÜÒÐ1N>ó̤C™ï 1Ã÷ùž<σmÛ˜œœÄÐи‰×ÊyE/SL¹uPèiV)]ª¸üQ‚‚¸CXµ$õqOµjÞe幦ê¡V¾¯z/lIªO¼h¦ã‰çàj$ÇX­óB”Œ{Ô•¸ªK®÷K±ã„¸®°‰ØwW²,‹Nߢs€úc±W¼BÞ­è‹ 7¥üe";Wææàn܈ì~çôÓ‘ýÄ'à¿ë]PßýÍ;ß ¼óEIôµ@¨XTa(¹eÉQ˜-u¬O¨§žŠàÔSá?åðá¦C=# Î: ÚYg¯}-ôßýÝÈ:—C¡Jÿ/##é8ä¹èAÜúˆŸe¤´-åö˾âØý£uü¶Òަ[´ÛŽm;*˜^9”S0CŒ±±±ºÜ–ãÜ»iS”Ï&ÄH¡ÌÓŒò†ß÷_IËBôô&¢Yd•â oDÁbZS|äÑá~½†$ÕÌñuëk_‹·w  ­´cÀú‡Ol»Âû,Qª¼àŠ c˜:)·ã¥öÅÇG4F u ãÿüH]ݶ¡Ë2°?äZž­Ó%±Ëí>Þ}öI‘ªôLù@‚ ¨HB-?ü0üÁA¨å¹#[…aDýŒç‰…¼HÉéÐC)„BŒ‹÷\O<ú(~/HÑ&Ú1¦`˜NÓi;®–¡E©\­g¸|xÖHJV O¥œqåa©Š¢„á¨###¡÷Ó;´k\·J_ÁÃ`¦]´ÚŽ˜Ÿë6_ˆ*›QñG•!+ š“$ñض£ ÁxÀc/àºÀ—¿|.dùum_3GÜÄÄDÅkKÉMA±ßâyÎûÀðñ$JÒ^6Â$ ’$A–åpE-%íQw<@þVéÇÓÅRIˆ2Šˆîì„ày;žºí6à£6ó:ô08xë׫뻆BÜÄÄ®¿þz\wÝuÅž={0::Љ‰ @UÕ®%‰}ÉùçC}øaq¶óù _wÏóàû>dYËœ‡„eë ”åI ó¤<LtZ*mK÷üRX™þæÒK/­Z“Äã^øeú YÄÑ£W­‚uöÙ˜þêWüÎïÔª:@Ý5å}SÐÖ•?Ê# ˆ|E¶mW,ÜF&“U»ƒš¦Á4Mäóyèº^»š¦AÓ´„'\ùñèžD±ÿöôÓ˜üùÏqûí·ã\ßG&“A¡P€eY‰¶I’„  IÒét"LËó¼Êþ£^Ê ýT†Œ>JéPÛ÷“0 ³‚Ð4 ###‹VGT†a NÃ0 ¸®‹U«V…﫪Zÿä–éiâÞpñÔÌòÄ÷}AþæË%G¦òÝïâØ±c㲦û)|µ`nH^eä?ݔ޼ÎÚ?L·ï‹ý(­»ï ¬$žB ‚èy\ £Tó@ô9å)Ìã©ìkyc—{„Wóþ®F5q×]xßOfLÒu`bâQ8p¼®ße ÌÌÌÀ4MìØ±#Qýcß¾}˜››ƒiš]­6æÐ\|Ó*“×uCõY’¤¤g"Êíþ"%ÕÙ Øwe­f˜v/HQ‚ÄãìzÌô>4@&¼' üû¿#PUȈp€è›ãŽZä‚O€V´C8Ž“X±mš¦…b‰h$zù%!ŒÄ3ÇqBáÍ0ŒÐÛ@(„ѱ゛,ËÈÖ£µ{ãF\µa~ô¾÷A–åpFít]žçAUUd2†Rãž$4µ, ¹\Žã@×õP¼«I:UJ^D“Á"Ã0KCUÕ°ßRÅb1|/>I-ŸŒ†¦ïˆïÃ,/ha ~}švÅ’RcãÓ4¡iEišP%\¸¤bŒ@Rˆëë"-A€{Î9ʺuMžÈäSÿÕ€ó9@Sjgº ‚hmU’*}>|?ªH5!ñ8^g1îuFeôZ÷}?Lg²è9¾pFIXsŽÁÿ@ÞcŽS)Ž‘—˜$‰÷]7Ì|_ ïJµY.&Õ“c´üý…„¸8å§¡–(·R8 ¦¦¦*¶xêÀÀ@XÚ·Ó¸‹øú>ðÆ7N%“.RåSÊ '‰ÿÒ9î¥MåkõšÒ>¼ü´‰ÙÙYlþÉODOXæ}⺮˜<@Ømº©`˜Ž£Ü?¤'ž€zé¥õ…¸vîC¸XÑ͸ìÆk–%r°K¾î”›-þ¾iš( aÿŸÉdt]…¨l6z\P¸g9Qþ·êM'·}Â÷£çž'=ôÜ÷£Pü}rÇ€ÔªU-ßé»Àää$$IJs~¼‚7•@Ìç…ï}­x†a†i3´(í€3´,‰ø”™JϺ±ÿTa S6ù¾G¾ðæFçpÿ+îY øÉK~‚»¿y7à×üñ5ùú@÷ud,° 32ö>·°)-‰ñ– (š.^I9Ï(½?žÉ Ü4ºŽÉb1± +IRÍ‚3Ž ›7##€ÿ0`ŒFSEò _(žk0=-†m““â¡y¶Ñ>…Bt Um¨VÓaÖÀ¦M›*ÞØ·o_âùÜÜ\WxÙ5×`ýï›&°aÃ<ï±dH*Uqï6@>(|aˆ*áºBÅU”¤º\/äFI¬ò˜çò’ðX#Ê6MþÈí4®€ëº˜ðY–8ÎBÊ5ÅpWsî¨ÓM“QJrH“N:µ‡þÇÝ`)œªš¨jôܲ’Ÿ«ªÑw¡ãúþâ祹võꪯëº.n†tSbA˜éqhmCûÿ@îmo«GJ,GÉg¨HN µ¬˜I\@"ïQîE>_àæ›ï†,+ o1Ûý õ/¦õm¾õ5A¥u±­$ZV$ •÷Ÿ¹\à8‘ÎNy3‚@ìï{éóf¿ø^ñA¶¼gÄóh¸nn@í¥SCÿésª­"zð•¯|^¸eáü$’$:û¡!ñ¿‘ˆ Ó hÌVk|f"ºw:î4¢{jük–û+ÑÿÓóíÝþ’ ³r¡4 ñK™AT’ž0%nõ•—Í"‘ ?1ôÖz=Öî\ øÀ=€í?ÝÏó°ñ¿lĶOnƒëºŸ–qVî,Ÿ,âùŸ?œ-Ž{ipi˜¬o½¶ëiFNeÔkõÉ$¬ÅŸË¨ï-'|eyöY\SQv¯ŒÏƒ¨ø+ &]†UÝPU1¢ ñ‹Žæ£tlYó&º€')ªÅã½i2jYâ³Hd£ãÄÿÓ¶„ëŠ?z<<èüÅÛ@ûQãŠ|ù9¡øòx :ůg³ÑD”&ÁýÜý§_T–16MSx°Xö7(fy@NÆ2mÇŽÆëÙÄ<’}àÆoÄQ÷£±TXðêWÃßþ탘˜Ðaš&' YVJý³öù¢cßö©´2¨(ÉjG¤/ù¾xm¡*H„a,±©iÉÕÆj}]×ì¹ßîE}:íW¾]ü?•moK÷ zžJ•/(ý~þsqoP‘«å3Ÿ¹ßûÞüáþ¿Rž®›E€,dpW‰ãy^t¿ ‚è¾AƒÇ#Gê/ÏÎ0xˆ&”$ŽiŒRzM‚˜ì¥c¯“G‡1át­¸¥ç¥ÒðžJ¢\®t¼D^¹6°nu“ù|†i ¾ï'Æm#îÙ@7ÕxÒªò1ݤiÕ¬<ù¹ÃÇ'A´âG“´8ÑÀEàyÀqøwgúÀz x:/úÂ,Dßõ×°Þž €gœ)‰Üçk\(wHð<çŸt>NËŸ˲ Iô‚ŽioÞ¥^˜;ö.ù.l/­:<ý¥§ÐcÒçi8 §ýúi‹ŸGΦSég¾ï‡ Ê®+ÆeßøÆ ®¸b0̽Fc«Š‚öb9³˜•Àرc\×…aøÈG>’(Ýëº.®¿þzìÞ½»+ùážÝ²¸ùf`×®Äëé409¹££Spœ»áûZØÊæ@»È_ (Èqe(J¤F“Ð34 kԇʲ¸pÈóÊê’UÎB1Eâ^ùöñRõVøˆNù„°å˸F-ÊcÅ›hÖsª½/ËÉNI–E'&Ë‘×Éã_µø÷O;†ÙÓO¯x]–eX–CâlLܬk¼<€£€²"_gPm «ÉáÇÆÈH²ìã©§žÂsÏÝŽßú­Oâ·~k$I„šf«‚JE2Wò(‹{ŸU£MißêF–uóO€/Þ ¡œ ²?¬Ç1­¼ï^èžáº*EÁ_ü…ÈWBâZ6»ˆjèII+á}C–E[œÃÓO×WžY†Ôš1“§™ 1æR!¢¦!&•¢’¼"o6@ ‘—E€È _z?€ÓHÄÄ„U+{\ІHìOHHzj¨ÀÝ{ï'§ ÃtY–—–w{{ÄßwœhE ˆ2Ü;NåM8„ŠBÛ—gÁ÷b«i¾D+}pW ¨• ±ÿ¿ø™ ü€œ|ÄnðYUô™ñ)A"6<σçya‘¨t:l6 Y’àÚ6$MÂiòiaÑ'ñõ”0ZL’¤p_¥ÜËÃ/Sân[@åŠd¹ ‡ö)PÎM[–ÉÀèŒÆ£8LxÇ;îÂøøÙ¸â qÊò‹…ßru¬Å@ä€Û·oLÓÄW\ááa „9ávìØQÓ[®¸®‹g·lzûí !ŽBвÙUå·”\š£…wÈP~TÚAA2n¾Œ¸gÄB ãý–¢ô·‡V?@žqqvîü!€·w»i qè´ÓpÊYgU¼V\¤ Ãôäx²¾ØP®äRŸiZ& …,€¶ã@’$är åt¸î,EÁ§?ýÆÄ'Ö“O­_"*eÐoƒ‹«;ú¹´BÛHuWZ™ÖU@qJÈ»iÓ“u—ggú ßÈÛ,€ʴضTËD$œ“Ï¢P)¾ŒÒþ41¬ÝÄ…öò6)Xøž©‚=̦Op]WT GEâÉY){=y0Ä“¸ÆÉf(P8‘e õ¤\<Ò4@ÖD_Dýž !¤d ú¿øÇY jeßôO%åå*ñ/,â)Ì‘KÕÞãšaa~Û°h¡ïCÎdÊOp$8JR¥Dž}@2G‰ŠÕD3Ur;•Ï £zȦE¿W&#¶# ñ%»¿RÚâE¾òy@–/ÕŸXÔühaªÿOS'kè‰q333˜˜˜ÀÌÌ víÚŠrÝbÛúõÀ–-‰×¢ÐE¾Ÿì˜¢3¼À :<bÙ'5¦6u» sþ½÷âô28*ÝæÃb7o¦OPff€˜×t9ñ°~] §rÀ—€®]}-Ž~z 4óG0 ’$…u®’V=Gs/év+¨ ˆ<=¶mGƒz]ƒç‘®ªº PòR£1QâDžfzéÏEäÁ÷F³ô*3¹Ë*ˆ<ÞT&|â Ã01‚ =âê^“–¤(oM¡pZ)ælW änOŽN”sÒ†˜oN"Z¡ úCÉÅ`ÁÅ0U D*ÞTU ïÑåUÜ•jÞ ”·‘ìüõ†9Å©'œª´ï oCIc yÄc>ÿæ7'Ÿœ8M€8½äŒBÞãñÓèçƒ+Œ5å/ bÇŽÝnWÈ‘sÏ…ôàƒásÛ¦ð#žçU¯DBƒÀx‚ß•1¿cz“_xó¯|eâµ ’U ¦xà_ù ÔO~²æ&ñ‚4Ä@µÀ$UÂ}[ïù§ž‹ü ý^«yÌû?ã…¯t÷ÆdÛ6dY޼sQepOù¨â ôßüæ®¶©Qµ< ¢Êyñ9]®´Ý‡|àqYxsd ráñ<ÀPÄD“B€R.*³‡x>&ºßÅÓ¾qï Š™Ž‡ÑûBUÍs‚*£Ôº¯V›l–B¥^ïûxa;‡¦2=‚RE¤ï{¥¿eRÕÀóèBUUlÁ–ú?s9òðÃÀkNïjhÀïû>2™ …BU¯DGQ Ñä „‡ q­#.h¶%¯—.—$ ï¿4¢B€ø±üB6ùÀdà.¸PïÉ&$3`³“<¾«DB–—¯¬EåàãUª¼XæH*7LÄE5j;•&âUh:füØqª…QŽ&öâ笮 ¨*Ý~ûJï˜Nbcñðê…&ã”q™qAà”sÏ­<´øÍy_5‚áé«C,DÐÂE€[º÷eÑðxÝó<AUUá8|ßÇÿùXˆ¨ÖˆÓsøð ˜‹ÍõD^Táâ;Úˆ M‚Yaô¼w|Ý:à%"†‡<TÏqãÄþÀ!Lסé–-<ÌgúÕ‹3™ê‹Ó÷¸Gþß¼éú7á±O<ß÷¡ëz㫼ËùáƒúžÓ»Ý ÑYF¡ts…‚p‘Ëó<˜%F£ÙÓÓÀÄD·›Ý{x^Ò{«ÜÛ‹Ä5Ï‹D&â@l7îc¥}Þ(Só€7(b☆˜8ª\ øŸ¹¨Ô8ä´¨Må³Ü\NüÅs-Dyûꡯ•f=]šÙ§ä¶{èèQâ–3 •ÊnäqZëã¨Òo­Kn1Í)@²rR­œh}‚ïû.¾89e#ÑŸVö–êé ÊÛFaö*€!DÞ¿ñ\–qühÓÊEAaŽ[Û–/>éùçñàƒB~ë[ïÿ}°íï&"* /õ„Ç¥…%2Ëžâ<ÏÃSW_ œwÞâS…¬4–Xv‡aZ‡àÌ_ý 8÷ÜÄë¦iBUU¨Š*:ß\S‡g˜Îò P«ôÇ"!mì…RÒâ æ.À¦Wm¹È>.C_.KøKåüó!Ÿ ÷` Ç,ËP%é˜*8Ž‹,‰T”ÊÊ}ª*TkQmJüÿß.p­*Æ0CòÐInATA†ñç%×J¯+±²¶q›ÀÆKÆ3L§!HŒ«MZéúR”äuæyQÒ¥ø~¾/®³øs ‹&±·ÜãÓugd]`w,,:’¡Í&ÄdÙ÷"Áˆ„0I×ëyN*•H,¿¾þ-ü ¢’Qùõ¥¤öFÒû*^ôgŠ>0"«%`€¦áª;¯êö¯Ù¾ï‹‚¢ôýÞù|kÄSÑÜ0d%Ó9m˜¦æ¼Õ4mÅå½eù—¿Ä3k׆¿ý;Þ±¾¿€q¹ˆr¶Ò,Ä­0zZˆ;öðÃ8÷á‡C!nd˜œmÛÈåbꅋȘو™âÁ#G€«®J¼–Íf…;rÿ.b2LeVáàF®ö*ð«—ý ø°\œÏŸ‹ÀïÝ"a’$!(M.«ýv333Ýnbgðý¨I<Œ’<°4- Ó*C/ãWyÌøö¨À¢äÞ2"Ï rA­6—ã0¦ÓT³mª‚è8IPYŽ*öÄûÉȈÄ4Jà^.ükZ$ÖÑçû¾ð–rÝÊÄô²,Dò4­ g~À›õèš#ñËuK׿ ãšRÝKt3€«!Ž‘BT-˜r]V×o<§˜ !MCñri[:ÝÙlTÁØðà JÛ~€l8þö›@;e9é ¨(Ânáâç ¥sc"ªhj ò4Œ›K“kñtô¢æóÂÆŠäøqL‹x¨¬ U]¤ÿ‰‡¨ëhÚ™þ¥§…¸gO=/”ÂRm;ºÿº®[ÙÑ‘§ vüÃt²á ,ˆ‰{p2ý³φƒ@äPžœ¬2^¨ÀÓ7‰?6Q]Hs ®b2NÞn]O|Ù0Ý€„gêØ)Ü™®8–%þF´]<`ùÍA’*½ÑCTh«^ ²üÞBו²@(ê’Uãír%î7bÇO‰²ž4ϼvIÊ"ª@ì ŠÚ!VÚ‡ÂËɉ`âú§Q?@ÉÝãé(¹»üðÂâ7ðÍþê]ÁóåB¼H«&Ò÷èâm³vìy^|1”'žN?½þÜŒè|Çók‘·GTþ£\oäíÖgS ü²t4ö/‡œ@}?ÊŠ ëÉô‡”¾ Ô£^)u!=€Í›£mU5ª_¢(ÑÜšŠÔÒ¸ŽÒRjÆxÍrŒ-ßDzj |q>~yë­ÀÕW‡¯­^½éÏÒ<`ÜP)Ž©*0Pèzt£ª6yÏf“…Ä(¯1mŸÉDvŠ"nü}¥ë⹈DLj ƒÌò¢;ö ²¨'Ÿ,žW„᪥bnûnûØm¸ü†ËÅ$ˆ…‡ V­aÍD;2‚fì¸&4È0ŒÆ&‚äï@LèS¥ç垢ì¹ÌÔ ¥v\‹T*šÍ Ýu¥Ï@ˆXñ~¼IɃ-.®ˆ Ð> ¢ê‚DyÖ€(„3…(L1ˆ;ƒ(<œÆ8R•65‚„¤¿iÖŽ­_éµk¡lØPŸ]RÞM Šžž­­8<ŠÆÅŽŒÝ+®ðôÓgA–Õp¬L*ñ}i,].BÕãL:9™¬Q²Ø8›„¸84g ÓÅ‘1­c)ýñ†©)Ȳ Ç¿óÐЦ§“³¢éˆ777‡ÁÁÁÐå³^WÏmO> åĉPõDnß÷“É¿Mp8*ÓVšµá“Ÿz O¼ô¥ásI’pàÀèÛ->x«“j7\Z¡#ªE›Ä)¿ù“u¨obÐáy‘È72’x;zÜ5žVïhГɈ×(¥#-ÈS.çòüæ’$>¾"ôý¨’= úÈÄD3hÀTëœézå ¦ï‹ç²­^ÆÛ_Á*Ýõ«A9¨m[´©ü9qÒI—6ö÷ˆfí8@”»ßÅ(¯N]J@¿ãÄà™Òk,æV…Eî¥Ñ¬W`š¢qÝäêÇBP2w S+@ÙÒcOè @Tøt Ñ<=Þ…Ðð¥|MQ®ò8qW¾ ×5ÈѧÚvt]—#/p\x*îõ€·©€DUE‚Rxß¿úÀ8Wîñ-±þõ)wxÀœ­ƒŠØænxN.×KQ¾&ð€cÎ.ý'óPv’R¾¡ødÛuÅ~O”¶]à—>ðß àŠRLy‡UxfíÚú~ûÓ2;^²ñ^‰‡§‚ ÄØ=‡Hd“y°Y×…Š’@GÏ·8„(4Ñ-3>Á­æÝ&!)ì1M³;þÎi§A›˜¨oEÊA”ލŠá$IDŠP¿dÛB±,iB…vï¾ûFÌͽÓÓ—@–wH6£õúÑ ~¥ÐŠþX’¤p.4½Ø˜‚û⦦¦0336ùØØXÕíŸ~úiLLL`ÃÚµÀ©§&:à eª› â–9sss˜ššÂ“O>Ù•ÏoÔ†àÉ'ŸÄ±X%‘ÄÔE.'Cî¡eÓjãó¸P Ë•níñ}¢jÆÉyÛÅãºÑ¼7žƒ?q¡ˆÂvI8K§ÅqE䨈'­5¸¡öÑq⫈ñ|ÔµŽQ¾êIaqÊ=u=N@^sss8pà>ìÛw°½?j µãgžyGŽÁªƒ¡oÛÎ[âç>$ ì¿|?ÞyÇ;±ñcñ—Ö_BÕTÀ^Ì®ë"8ŽUUáº.4Mƒçye9ªžæyÐužçAQø¾ŸÈÍâºn¸½¢(á~º®‡Ç'QöK£sI’AXx@–åð½øç׃,ËáŸmÛagÇqÂcÐçÐýжÕuù¼†»® ß÷¡ë:\×…ªªð}®ëB×uø¾ ÂsHá¾ï‡ç‡Þ§Ï’$ ¾ïW$“¦ï¿TfffpðàAÌÏÏ·Ò<ë¦Q;žŸŸÇÁƒ100…PÆhIªéʪÜ'œø[`?€üïÒ¼ðþHhƒ=ö=öÍ!K›ÕÌO¯—‹uôË=ë‹¿M p²$ÿÄi“N€Ó!D¨ç!tmÝýþÙ)¸ßNQ€ÁR É·½ˆ'„<âg©À³÷¿ˆÍWŸ„ó³â3§ßö"Nœ8ÑïŒÁãÇqà£Ç0wp#೟-bÕªU8~ÒIØ,›7ÏáŒS7â—žÄZOÂeƒ/à?ŠOãô_ž„Ÿ}ãd̯Y ÀêÕ«qâ̧ðÜ¿¯Ææ—[¶>ƒM‡ŸÇäÃgàÜŸFñØFüà™gðÈ+€ÖÆõëñ̳Ïâ…‡ÖàÌ G±qãF|î®y<³v-¦Ï<'Ö­Æ?<ð0·ñyîˆGÜÀÀÂç‹­äyæ™Ãßþßÿ+Š¥Ê¼R,ˆ›9U;ªVœYV bllL¬ãŒ3 ( f®º ¸ùf`×.XV$„Z–Ïó°zõjüýËÿ«ÿt5.üÂ…øö;¾ ®½`Ñöh5b4óùÆJJʲ\óXôþB¨u„ÅÐ6ñm©ªZ6›  UUáy^XÔ“€t:\.‡T*…[nÙ…'àœsîA>Ÿ‡ã80Mù|étªªB×u˜¦ kžç!ŸÏc¤”ºP( NC’$hšŠxŽã d³ÙPT¶m‡D2âLMÓ`Y`rr2}ßG:F¡P€eYð}ÓÓÓ¸õÖ[qÎ9ç,zÞÚA£v|Î9ç$Å ËJ” †ñ"ŒbÊg¼À©n¿§S:ŸfÉ ²|¢÷ßÿ<ðÀX¿þ7pòÉ'c~þ•8çœ+ð ӸòJ©T \ðçxíkåÐKtjj/.½ô8.¿üõ¡xÙïŽÒo%•DíOVL܃àõU…µz·~A/‹£kßhb…Fˆ¡{qF©M'ivl\“tzÉI'Ÿøå8ý§‡ÏiÑâü/žû?pâµrâø>ïXýüሇÖ<„Ï­þžzíSØò®-¸ìÙËð¿†þÞÿò÷ãÇÿòc?‹Ë/ÿ$ÞøÆGöìy®¼r/>üáïCUUÜzëS8õÔSË´‘Ó (÷®°­Žd‚å^µX."ÜrbÉvDzvw]7ô&€u?\‡Ëž½ =ôׄmúü©øóÿŸô8ÜÇ]ø’‹TIÅæ‘Í(¾­ˆÇ„‘7 AÂO<õW¬ÇÚ±µ€œëž‹Ë¼ËÄÅUÊù–ÉfÚz^ëYX©EQj^7Õ^‹Õ¨v}Æ·_ª—ÖÚ.å:\Š?ôk¿”¼Gªa~Pδ, ¢0ÔÅ /GY–^ØcV&K±ãà…0ø­oákg~»w‹kyAÑ—ª33+žŽq;vì€ëºØ¹s'pøðápå}!6ÌÍ…¹ €˜+7¤¤7ô™fmxõ‹/â’ÁAxž<üö›ÿø‰d1Žé ÍÚ1`˰Ï3øë?þ2²Ùl4‰HøDi;¶í‘$ ›7—^Z^©¸òfVMˆ‰ðâï“ÐW.ÒäðƒˆIáÆ€ª^\ú=èCÄEpÕU§võ<1½Í’ì8Îí>ðUøÕÝ+(œ×ó¼Ðà ˆÂ(i@vdÌÝ5‡þËqÒÕ'EU?Þ@Oa‚”ÆðA@ùªü+ 8]>ƒ˜hê¥ÿäØF.§Ì²¢Y;v]¯>唚^œžøÿßðëâµz͇™šñ’eV&KêŸ}÷nÚ„}ûJà‚ªaä SNÇBSo¹åLMMannÃÃà ÷Ϫø>žx"ñ…Þ¨¾š¬~Ä0m¦)°æ…0÷ÊQ|ÆáÕÿxæ?ŠÁ(‹LhÖŽqô(pê©øŸû&Þø¦7BUU<ðÓðćž$à\Òí¯×Wt³3—‹Q’Wåéæ}“e¦i†tÇqùî(ä•rlu[ˆkÚŽ “÷禬RJß7]úþTøC×õ¨ZÂ0¹î<¼ä쨶C·ó߯dⓜòðÈxþ9òü ‚¦i†×ƒiš0 ’$!“É`Ë–-]ùK²câC>ði`gT”ò‘,åØSU5Ñ'D'4ú?pÉð~WÑiˆB 9.ƒ¨Z°_z<!ÌÕBŒÒë„{ž§åX†4kÇ?Ù¼Oœ|rÕ÷œ«íÃ@þuE¸Å ïN]×Y€cf©ýñÃÇŽ…Þšñ±SU¸À$S¢#BV+«ƒïýÓ?áEA&¯’XZ»¦j8ˆ‹L‡hĆà© pÏ‘õ‘+}€¨Ãt‰FíÇŽ[¶àþÏïà%o9Žì_ŠÁÅÚ»×â’—àÙËže¹Oˆ{ÑÅÅ9*ÒÌñãIíIvžç…9-Ë mtóæÍ˜œœ„,˰,+L¸N•ŠË'<”¯‹Âµã!€ñdžaà¶ÛnëêùnÊŽ!ç2ÀÇuàzñýMÓ ½_gggÃ0öšÕ’}DL!º©ãkzˆÅobü"C$Ï7 r„ª½Ð= IçQq^ g¥ÂTe–BܶoßÞíf7…«(Àý€þUྭ÷!ý¹tXt#€à«€|—ßòùÒwÍílÛs_e2™0Ï UÑ¥Âô˜D3ªPJP"}ß÷aYV¢B/%³O§Ó˜œœ€0‡!UM&!.žŸB$1¡ŽçvŠçNŒW¢‹{¾ÄszQ•ø±‰Væåê*¶œ%ï×Ãs ˆ0u+×ÌYfBLI|š†H1  viј›ÄKBïþûµ´Ÿç!,R%I‘ÐU­! I¶-Ä­¸ˆ•N‹ýU5* +Ëb{]¯ÑQë+ǧÙlEÁd(JuO×(Oµ86í§ª‘VËŒ¥¬Ø*ŸW£¼Ø¢aôoµøZxì1œuᅉװþ (œ„º¼'}ßG&“A¡P`ŽéÊÁƒ8|Æ;B!®*îCô£>Ø;˜ÐÃBÜüàWóóx´4H°m;¤Ñ‚Õd¦Ç9¶~=€9È2 I26oÞÜí&1LÃxkÖ uÒIxáØËpÉÄ@èý6óÍ n“37L¯j$ZdËfƒ4Ñq]·kS—Š {>pð⃸醛Ïç¡( Üæ?úÇ=ß?k|ñÐLòSU–e…ÕsÓé´HëQš!øD9%IJ„,ÆÇWöã]z ƒÐqãŸA"\ù1k‰bñ¶1up\þ<‹ûþõ>ìÿÔ~Hï—*®ÙRJ9Þ4ˆ”xß-AxµÅ~ÇB™B*½Gùoñ_•¢×}_ì'Iâ=òZ#Ѭ\øÒõdȾ¢DbµåJѪžÅr3«'Uyá…§JjMˆz¼Z*y£AÇqÂk+^]•æC’$…ù<ûñºyò%/ÁöçŸO¼f}(œÈï­ï²,W·f˜òâ‹/âWÏ ‡ýÃÈÈHâþzýw¹2mâ¤n7 O=õ^˜_5…ÉÀE´‚Ç0=Ά©)Üwß@iWÁú‡×w»I ÓÝq†^ºJ JƒŠ»p·˜ÄqÎΆ(Ä_?ÍTU ½¢h²ÿøãw»Y a= ¨_” ÀÄÄ>ö± °Ó²{z¯*3…nQ‘ ò„F‰É©Ú+ <Åèw"G@hä-ÕŠ |¹§{¥tÀÝ ð[2n¸á|ðm\X„ÄØÙ‚ð|#ï6¶ ±-ý3!²¥RÂð¼¤àDýW¡ÌÎFã¯ëI¯3E‰<È(´³ßò,R!@D8ÐõHžÃ´ Áñ}CCCáãTIÁ ‚ |Òét(¢ÓµL^È„ëºáóøëäY ÷ÜsO·OQCl8tƒ'N„¹8- PÞPáé‡3™LX@¨EHfùxñ¹çpöÙg‡ý]E¥^vbjгB\¡XÄI÷nªœ¨(`cfúŠÍ›7C–£*N ÓwlÞŒ/ޱʀ$<%Jó€w¼ãì^¿B ª¬”okÛ¶mxì±ÇºÝ¬†ðŸÔyQöŒß9\pL°- ðA@ýŸo‰l¶m‡KJô ˆœ_’$%·xN´¸0¯¨Ë,s$9À´L\zé¥8ï¼ójoëA¸‚úbÏ(í/©`hLˆmú-B<+DX6‘ÍÆ+÷TÜHŠf¦i†×Z&“ _O§Óáë¦i†×ÀÿÇÞßÇÉU×wÿø²p’à†‚'D² Šœ5QQ©íY½”½¢3Ú…Z:SÓÆêÕÚ™Š¶V/¼fz]×5-íŽö÷EÅVwt+^E+{ªõž{"(º°§@²$ ä„ !‰Üä÷Ç™÷9gfgvgwçv÷ýÌ#9sn>sæ}>çóy÷ åX<ÄÚ0ŒÐãÓ4M&J.}¦i†“sÃ0Âåccc¡ ÷îŠ «¹\.<ž„“C¹Çê‹*Â<;#çŸO¿iBÜB“sŸ'˜ãÍ¢­e2ý•ŽÀV]pôÆBÕUVê£cCS^²bEøÄl``€‘¡‘Àµ~‘åIP/½GbðØÇãÄ'æœxVQ:‚5k¸ë‘UXëO s9Žƒ­Õæ…äBª'WP§2çꎀ{äA>rúwI‹ä&rd³àÿÆz¿nC{\—l6;-¿Q¼0F\OäuºÄ±‚>¸X,r÷ÿ½žþ)`’—Ú xn<L$ Ï?Ø$Ägú³–R=“2­Ä¬øÜ¤p䟡é3Ù 6„•YIJ¬Ph‹Úg 1T–6Þa„IêÏðÎ"Ì/ýýžw}žWü¸§‚5|6Pe·sžX„…á±™ÃûcÇŽqèÐ!Ö­[Ç¡C‡xà¸ì²Ë8tèwß}7W^y%`tt”7¿ùÍ<ðÀÜëû =†Á³6àÿ50ÀcÏ>K0Þþvö<ýtðÝJ• Ò±×¼ã|øŸ0Mõ1Ï8£¡Ý€œŸøy•ß§ì¸L-+Û!¿©Û>¾_xü-oi`ë›Ïñ3»el˜šyýjÕ‘¥8~ü8«W¯¨^¥ÚŸÇN•%AÇ q§žtö{ä\±8gêœÀÇЉŸÒìÛ´)œ¨9í«ÜU ë8Bé2~¾|9öÅObJ4ƒ ¢qó#•ê~!®ÛøÙOpùøŽãá_äv<¥nàÉÓfYù|>œ<ÆCÑFºÙRi?ŸÍ2¼lc·Ñ4Éž îšÈËêlîþ¿°r¼Û†ëþÌŸäöl Æ[o´j>Ÿ'“Éàû>Éd’‘‘ü£GÉþõ_“Ä?q‚Â7¿Iæ²ËðOœÀ}úéàÖ²n¼ùÍÁN.¼0øpÎ9Á€ÓN+?Ø©§¶å'é¶}õ«ðú×·»uó˳Ï ðm°f Û,‹Ø¶­^ÀJG""úUWM=‘Нš…2›#îàªU¼äèQyË#Á£Ž¼ŠÒIœöOSd³ð6þŸ I²N¾•.dõ/É›Þtó–àý_~)w‰Pê&¨¢ÜîV,-Ž=q+=G.—Ãq pÁC‘ˆÉñÜSýýýa2wÉ/•H$Ê*„*JC8Ùå_ü"Üx#ýÂÐÇ`È3 ÷¿vL€û‡ðÑ 0O#*ªÓ`<Ï«šçP®‡8ñ꼕¯3%¯5ÉA©,nV?ÎúÄû#Èÿ`®¢æGŸZ)Œw쌎rÍ5O…ÕŽ)Dñe ÆÉª](5èX8DZ±m8ñ¶A­C”.âü_þ’ö¿ ëjH$þþõ?BV”¢÷á‡Ù·ï•¡7çåo»<ððÉêÜ3g'OUZǽ§¯dãÏîÄ|ͯ“L–òó-pp,IÜ3™ cccáry"®yŒ”†ãû<ô¢p䦛¸ëœs£Æø!ð>Ö8 ®$˜3”çT^ 7¾ëºX–E±XÄ÷}R©Åb1ôúŒ0ˆ_™Xh¨^ Àªƒ9~òÉ›`èÕÀ5µ×°P”ŽãÀ¬Àx›…ç¹;_&ˆàë/­£^•t¬GÜÃ\@±`ݺuÕ“&(J‡ó‚·‘0‹*&+]ÇS§ŸÎ³÷?Ç÷¿ö¢Ð? ³SÇ…yáºàkΖòäs'8êa#cc0W=Àý`A†­L&S&2(JÓù7—?úÀG¹âá‡"».ø&X×Bj]iáúæ¸'[6› ==óù<@Y˜`&“ óv%‰é¹‘¥ çŸÏù?Žñ>°ªˆpžçÑßß?çý*JËyðA¾sÏ«pÝà!\:™"F íB:•èX!ús˜úö÷?~àæ©(]Ê7¿ùÍÀN+]ƱãÇY~è\ÊôŒÛªAÌ‹Llw+–®}gßù@ýaÁ’ï ñMB¤4Ç›Ò6¾ërϦ¼ç”S€@„K&Áÿ‚}C^ÊsâÇ =<‹Åb(º‰¸fš¦VíUÆŠgžaõáÃjÌíâö¦(Œ·f =aš¥‡Qÿ›!H­¥­•t¬wÊ¡CXo€#o:½gÜ$™U”. <ál8ç3ç´»9Š2oÎ~vöµðÀÙèSj¥+ù·«ÞÍÄyýÌÅ|‹Åb˜ïmllLsW)mçŸÏ]Ë‹¿µ'´EÀ¡!0ÿs~`íy^èÝišf6÷ô¬ZPQÈžç=ŸOöáÞP¾ÜuݰÏÕ0f¥øå²eôÿê X|ä])÷€³ ¼ãô¹…RƒŽÍw|ÿ œçÀ~çy\uÞUðT»[¤(sãСC|ëÉßЧÇJWsøáóð.4.,Ëý£(ÝÂñÓ®æ?O{9©ßŸy=I¶œJ¥Â;Eé>ýúñëwÄ÷ƒðöP«˜ƒ‡í† ²¬Ð»3.Ä)J³9ëðazùÂiú„çy*ü*]ʼn©)N?ãJ>÷§Ÿ „¸ø3»"š\©IÇzÄY×ïÄû6Üõí»"EYQºˆ»Ÿ÷<Î9ÿQxÇÓª‡)J·0~晌ÿ븭Ý-Q”ùó§¯ÿ9÷Ý÷Rê)bêk?¥Cùbòݼ±g ÙlPô¥^ÒétXÑtbbB*måùÇŽñè7z°/._žH$ÔóXé*Nœ8AñžR¢C“é¢[¶Ý-T:™Žâî6 Ö ?E± 1©Jw³æ?Öè>¥{yî9?p6Æ‚ÄÝŠÒ<ùä¹\xás5óÃmذ!ôÆÐ J§âûKN}ÅÅ8Ìæ°™Ífüoƒƒƒ*¾)…7 æç‚×q[U”nâ»O¾‚‹.:™b±HÁ-LCÕ e:64õÁã—pÉ‘•\ý/7¶»)Š2/žxâ"N}úTz.ëÑŠ¿JWÓ»ö ½$êq'R”ä'?9›7¼¡üÙ£ëºa"ú°°¢t0«Ž¾–›þñLjiÅÅbß÷I¥Rär¹v7WQªrÊÃ1W0Ó‡J·ò;7þ Ï­}%–eqîOϾ‚Îÿ”èX¸žÿBÌW´»Š2V®ìžÀàІCínŠ¢Ì›ÞÞÎÅOœ[s*Ý˺u¯âmo;«l™ˆŠÒ-üðªU½á$Á½mÛúÀDéxNÛ· ë…Aß÷1 C#G”®äÑŸáÕ¯^išœñgg´»9J—Ñ2!njjŠ[n¹…;v0:::ëú÷î9ƒ?ÜÑÖ“£(qæjçžz*'=rŸ?öùv7]QBæjÇÿi˜ô=³Ž¬§a©Jç0W;Þ¸q#¢#KÎÎ\.§â²ÒVæjÇ?¹øåÓ¼áŠÅb˜ÿM ¥ÌÕŽÙô#R_l7ŸÏ·»ùŠÌÝŽîYöBÎ:ë _ÿ»¯‡žŠR/-â²Ù,“““ôõõ‘ÍfqfÉ2k­†­7om÷ùQ”¹Úðúõ»ùû«Ù~Ëöv7]QBæjÇç'æ¥hE=¥£˜«ÿñïÅ0.™L¶»ùŠÌÝŽ‹EH$O¢t:  î5 Ui'sµãžžÇ0M4„Zé(æjÇSS§pIïq¬¢U^-UQê %9âÆÇÇÙ·o7ß|3(ÎÃÃÃ3VÆI½W÷¶ûü( 0?Þ´o“vÊJG1;~ñ²lé_†5[fpEió±cÁ4MÆÆ4{²Ò~æcÇ##`àûhuI¥#˜o\,5ŒZéækÇ¿õ[WpÙÇ7šêP™#-ñˆ¥¯¯/|¿eË–Y]>ýõí¯ž³mÛ¶v7ÑÑQvìhˆî\Üt#ó±áïþ=Ò¤ÛÞnµŸˆN¸¦ÛÉ|ì¸÷¥·3¶b°­íVûéÌv´‹ùØñßÿýß·½*_§Ø±¶£3˜ Ù°ºo»DŒNé´Á|ìø—¿üe˜" íÖñq„ÚñÜìx÷îݸ®ËÿxôÎ øá–ºýÄÛ¨‡–xÄ>|˜õëׇï{{götûùÏÎOú1?¸ö¬ZµªM¬ÊîÝ»ÛnXO<ñSSSŒ·µ{÷îåÎ;ïäÌ3ÏlËñ9ÂÞ½{9räH[Ž?Wx衇طo_[mHí§œv_ÓbÇÏ>ûl[Ž?W;Þ¹s'ƒcƒ¬\¹’»·ÝÝ–6ƒÚO§µãÑGåÑGeíÚµlßÞúÐû¹Úñw¿û]|ðA|ßç®»îjy{…N±cmGÄîÝ»9vìL¼ZÉ|ÆÆSSSÜÿýœ{î¹³í¾i´»ÿÑv”Ó­ããÇœ|°-mWÒn;~ôÑG9xð`׌wïÞÍ/~ñ îñîá˾ÌSÛžjK»;…vÛO§°wï^<ÈòåËëZ¿%BÜ\ùÎw¾Óî&(Ê‚é„'\в¶oßÞ¡EQ‰T”T”nFÇÆÊbAÇÇJ·ó¿þ×ÿjw”E@KBSûúúÊž8ŒÓÓÓÓîï®(u£6¬,ÔŽ•Å€Ú±²P;VjÇÊb@íXi-⦦¦pG“Ì*]…Ú°²P;VjÇÊb@íXY ¨+‹µc¥œò‘|ä#Í>ˆ(Ê7Þx#žçqûí·óÑ~T•f¥kPVjÇÊb@íXY ¨+‹µce1 v¬´ƒ“Nœ8q¢U›œœdß¾}ôõõ©a+]‰Ú°²P;VjÇÊb@íXY ¨+‹µc¥•´TˆSEQEQEQEQ”¥JKrÄ)Š¢(Š¢(Š¢(Š¢(ÊR§%9â:©©) …;wîdrr’¾¾¾pù¿üË¿ð½ï}€õë×—mSë³F´eË–-u«íÆqœ²s1Ó±šu.”ú©eÃòY'Ûq³Ú vÜ}ÌÇŽ›ù»uª·ãšVêGíx:ÚwjÇÓÑþ¸ûÐññtÔŽÇR;7¤™t Ôn–œGÜÔÔ×\s THq‡L&@6› ,›Íâ8N¸ÝLŸ-„;vpË-·”-ke;vìØÁèè(›7ofxx˜;vÌz¬f ¥>f²aè|;nFÔŽ»ùÚq3·Nµãv\ÓJ}¨W?¾öÇÝ…ÚqõãkÜ]èø¸úñÕŽÇR:7¦™t Ôn–µûË´šññqzzzؾ};›7oæu¯{ãããìÛ·›o¾ŒoxxÛ¶gül!8Ž–IŽ·¯U혜œddd„¯}ík@ ÐŽŒŒÌØŽÞÞÞ¦œ ¥~jÙ°|ÖÉvÜ ûQ;îNæcÇÍüÝ:ÕŽÛqM+õ£v\ŽöÇ݉Úq9Úw':>.Gí¸±,µsÓIšI§ÐhífÉyÄ­_¿žn¸!|øðaFGGËÜ—·lÙÂèè謟͗©©)>õ©O•µ¥ÕíýMNN†û¹îºëfìܹ“[n¹˲°, ˲B÷ÅVµ£···L¥íëë ãžk«ÑmPæG5†Î·ãfØÚq÷2W;nÆïÖévÜÊkZ™jÇÚw/jÇÚw/:>ŽP;n,KñÜ´[3隥Ý,9¸ááazzz¦Å÷Ê‰ššš¢§§Çq¦\µÏæƒ$ì,ËÂu] pýlU;¶lÙÂ-·Üîott4t™¬u¬F·A™;µl:ߎ›a?jÇÝÉ|츿[§Ûq+¯ie£ýqw¢v\ŽöÇ݉ŽËQ;n,KíÜt‚fÒ)4K»YrBœ$¬5Sp]—k¯½–k®¹†-[¶à8·Þz+ëævÌçX­<ÊÜét;nFÔŽò»u‚wÊ5­Ìµcíò»u‚kܽtÊo§vܽ蹙ý<,Ås4Ÿó¡Bœ¢(Š¢(Š¢(Š¢(Š¢´€%Y¬AQEQEQEQEQZ qŠ¢(Š¢(Š¢(Š¢(ŠÒTˆSEQEQEQEQ” Bœ¢(Š¢(Š¢(Š¢(Š¢´âEQEQEQEQ¥¨§(Š¢(Š¢(Š¢(Š¢(-@…8EQEQEQEQEi*Ä)Š¢(Š¢(Š¢(Š¢(J P!NQEQEQEQEQZ€ qŠ¢(Š¢(Š¢(Š¢(ŠÒTˆSEQEQEQEQ” Bœ¢(Š¢(Š¢(Š¢(Š¢´âEQEQEQEQ¥¨§(Š¢(Š¢(Š¢(Š¢(-@…8EQEQEQEQEi*Ä)Š¢(Š¢(Š¢(Š¢(J P!NQEQEQEQEQZ€ qŠ¢(Š¢(Š¢(Š¢(ŠÒºBˆhw5…Bl6K¡PXÐ~\×%›Í¶ûët,jÇ…ïûd³Ùi¶¯v\µáî¡;–k`©¡vÜyTëÕ†gFí¸¹´rl¼”íÔ–›Í\mYÇÇsGm¸{è$;î !Îqœv7¡.N:é¤v7aÎd³YÇÁ¶í°£ž/¾ïãºn»¿RÇ¢vÜYô÷÷`YV™í«צ[l–Ž×¢;Îf³äóùv7µå¨wÕúcµá™é;îFnõØx)Û1¨-7“ùزŽçN·Ø0t§7’N²ãeín@5|ß§X,H$füÌó<,Ë X,ây‰DÓ4Ãõ=ÏÃ0 ŠÅ"–eaÛö´uë]¯òƒJg'û3M“B¡€išÓ¾Kµï-mp'ü~µÞ§R©Y÷3Ówñt|ÜÙãŠnÑ+:Î#Î÷}pÏóH&“eŸ àº.žçÑßߪôÉd’B¡nmH&“áÓÕd2îÃuÝЕ´Þõj7lyŸN§I&“ø¾O>ŸŸõ)„l300€çy³¾Ÿi?õ|—øE(íÎd23¶_ö-¿‹§’B¡@:Æ0Œº·YLÌdǵlXÎÍ|íx¡6,ÇÀ6æcòVÚñLöUë;È%þ}«Ý¼–²Ï·/–sSiÇ­ê‹e0;n” ÏåûÔ²¯™Ú?ŸþX~Ûl6Ëàà`›¬«utë˜Bö‹ßŽëé—² Ë÷íF;^*cãøþgê—ºËwÖññwÞygÓûãùزŽgGÇÇ?®è½âD‡‘ËåN$‰ðýààà iæààà Û¶ÃÏ2™Ì Û¶O °,+\>11qÂ0Œ'Nœ8122¾–m2™Lø^ö]ïz3_gddäpâàÁƒáûxÛ«!ÛLLLÔõ~¦ýÔó]2™Ì ˲N˜¦y"‘Hœ0 ãÄàààŒí—}Ëò‰‰‰²shÛö‰ÁÁÁ–e•m[k›ÅJ-;®eÃ'NœX°7†ãëÍdžã۵ʎg³ÉZß!“ÉœN' Ã(kŸÚñüúâ'jÛq+ûâÊýµ«/žË÷©e_3µ>ýñ‰'¾|.ç³[éæ1Eåþ«Ëþ*ûcµáˆn¶ã¥06Žï¿ší«Gèø˜°kv<_[ÖññÌèø¸óÇÝ¢Wt\hª¸5 ñЄJ×NÛ¶CÕÔ0Œ2Õ2îæ]¯[a£ÝmÛŸzÕ‹eYeOf{¿Ðïâû~è²,곸‘Öj¿eYárÓ4±,«ìi§ëº˜¦Y¶m­m*CO µì¸– ˹[¨w‚ K;ZiÇ3Ùdµïà8Nè®/.ÍÉd’±±1@íXÎÑ\ûb9wµìx)öÅsýÞ•öµk×®Û?×þ8ŸÏ‡îÿKSt¾×ês¹œÚpì©wöØXö_ÍöÕŽ#t|l…^g­èçjË:>žwþ¸¢Ö6¦Wt\hê\Œ!Þ ›¦‰mÛáÿ‘‘‘v•†|ÿÙÞ/Ó4§]8óÉSQù;ž·žVŒÛaÇ'Ç0Fü‰ž ej‰ï•jC¢\Æã{[‰çyxž×U†ow=çM\釆†ÂØêÙž®Ìg›ÅB¥ÏfòLí¸~b_s9×KÕŽçÓË2µãúѾµ¹t㘖† p¾;n´ãn³áÊv×{Þ´?ž:>n}»çrÞt|<;:>n ­·ÃŽ;Î#N¨fRêWŒ¹P(”)•N`X‰çyeî§•är¹°z¶od[êe>†fÆœ/àùlÓíT~ßÙlškÇõÚp+츑6 ó³¯ùœë¥fÇóé‹Aíx®û´omÝ<¦X*vÜìó½èf;^ÈölK½Ìw¦ýq}èøxam™ ó±eÏŽŽÞ–¹Ðªñq+í¸c=âjáºn—-.žJëð<ÇqŠ;ÍÚf1£6ÜXZe_jÇå¨7í[ÛƒÚqcQ›ljÇíGmá¨w'jûå¨7–VÛaÇ]'Ä)Š¢(Š¢(Š¢(Š¢(J7ÒqUSEQEQEQEQe1Ò‘9â>üásî¹ç¶»Üwß}¼ð…/lk>ÌáÇyþóŸßÖv<üðì^½šÕ«W·µ÷Ýw;vìhkêeûöíj?%:É~Úý›<úè£|ìckw3fÅu]¾üå/«ý”èûé„v>|˜ÞÞ^®½öÚvŸŽYùìg?ËäädÛí§SìXÛ1½o{ÛÛ:>G޵³µCÇÇõ£ããr:ÅŽ»e|üïÿþï s饗¶»)A§ØO'pàÀV¬XÁ‡>ô¡Y×íH!â·~ë·ÚÝ î¼óN6oÞÜÖ6ìÞ½›Ý»w·½_ýêWéííeÓ¦MmmÇwÞÙÖãÏ…ƒ¶ýwSû)§®i€›o¾¹ÝM¨‹©©)€¶Ÿ3µŸÎkÇîÝ»™œœl÷©¨‹ÉÉÉŽ°ŸN±cmÇôvH_×ÉèØXÛ1[;ºGtJ?Ø)vÜ-ãã‡z¨#ì¸SèûéFFFøÅ/~Q׺)Ä­ZµŠ-[¶´»¼õ­om{;Ö¯_ϦM›ÚÞiKooo[Ûpæ™g¶û4Ì©­íþÝÔ~Êé„kn`åÊ•<ÿùÏïˆs¦öÓyíèñ`õêÕÚj;j²sçNV®\ÙîS1+:6ÖvÌ„Žç†ŽËé;î–ññúõëÙ¸qcGœ³N Sì§xüñÇ9räH]ëv¤×)lݺµÝM ···í3 W—¢öSN'\ÓÊÜQûéÌv(s£SìXÛ¡,„Né´ÊBÐñq9jÇsãì³Ïn{Xs'¡ö1ÛÐb Š¢(Š¢(Š¢(Š¢(ŠÒTˆSEQEQEQEQ” ¡©Š¢(%¼Ò£ô?¾Ì+½Oné¿|nVéu ð'¶.¥ÏEQEQEQ”¥ qŠ¢t%àEéo\,ƒ@H³bé½[±½_úo‰Ò²"`æÇö-š]Ú²ôÙ`E[dÿý±å6.CÚh{_ÿúvŸ>EQEQEQ”EK4os æ^vé¯ÌÓd>W(½O8T´âEé\/2?¶L1‡H0ˈañÎV:P;¶mž “ÍQ.¶™D‚x±‰X6Äì±IÔ±W#Wú›% Lj<ç Àè÷¿oyK»O¹¢(Š¢(Š¢(Êœ¨Œ š ‡`$ó¬xÄP<")þÞ#˜¿bë'JËØö"º”Ïñ ‚9Dó¶liÝLéý@é¯ÌïÌØëbi½D¬=vìøg”ÞÏ5úI…8EQšJ¼3u:K3öD–Euk “ˆ­?1Ëqí>‹w”ÒÙŠP6vërÓ!ö7ÜõÔS <«Š¢(Š¢(Š¢(ÓɉM.Á*Uú_(ý{%Jë‹Ó€C0ç‘õóDŽ q!,GäL!ÑHñ>"”ÉßTl[ˆ0ìØ>D€ƒ Ò(QÚŽX»enXda^m©Š÷"êÉwˈyr®’DœK %Uã—]Æõ÷Þ[×±UˆSeNÄ=ѠܽW-:/y‚!âU†ÎÌ•6DýOtêÅ&è¼;ñû*Š¢(Š¢(ŠÒý¸ž^9A*]Z&¹«“Àh•&˜£$Äl‚9›8<ˆð•%˜ËØDÑ=ÅÒçƒDs@‰*Næ{*ó@Y?G4?ªt„Ðó<\×%—H'Ü—çqý>ÀW¾ò•ªçÄ÷}’É$###5ÏÛ{ßû^>õ©O•-«œ·å<|\l;øëû`š$‡Ôÿ1œ~ø¾}çÜíûÔƒ qŠ¢Ìˆt¢ò_\oãùØä‰‹IÔ™KëÎæ¹Ö 4#7€AùÍGQEQEQe¡üÑ¡Cü¯³Î¢Häý–,}–"Æ„·!‚¹[¥ÈŸ£¤6 Ï„@¼‹¿¯Ü‡I”’GÞÇ÷iT¼¯\¦ÏÁdýw¿ûݸ®K"&Äù¾Ï'?ùI®½öZ …·ÝvŽã`ÛÁVßþö·ùûw¿›|ò“Ü>>ÎØØù|Û¶q‡­;w²âßþ56ðÀÙgó7ßÿ>“ŽCïñãÁ2ІišX–E2™ä³?ÌŠç?Ÿ+W²®XÓÃÏã3_ù }/~1W¼óÑw~üqÖ¨§(Ê\'.ˆV$r·ÝAg.O+j‘ pÕm´Àå-|åûóÉ©§žJï²eüÕŠüê÷Ÿu§†ã8¼ímo㦛nâºë®ãk÷ÜÃOl›“Ÿx‚MçžË)¹ûØÇø“?ù>ýéOsé?ÿ3{÷îå¹çžãï|'ÿñÿÁ W®ä­ýý|úÓŸæšk®áßø¯zÕ«¸þúëùÐ-·°ì/ÿ’}%!Î÷}rCC¬^½šwÕñ».9!.ŸÏãy¶m—)¬Lª …©TªêäÚu]<Ï n!xEY(Éd’ÁÁÁivšÏçÉçó$ ‰¦i–Ý$}ÃÀžšœä¹3Ïä%gœGäŽü¦#GxþþýlÿÁ¸¤·wZ' ¥ø7ˆþ¯Ö.Ë•WˆçyáZÞ§R©iÛ;Žƒçy8Ž~Gß÷ñJßõ‘G!{ÓMxž~îy…B¡ì8¾ï“H$Â×rÓð<ß÷q]Û¶Ãk>›ÍÇñÉÏ÷}lÛÆ4Íðú7M³l[¹!ÉÍÉ/=ñ}ŸƒR((‹á¹r]—“O>¹Ýæ£(Š¢(Š¢(!Ùl£ä$‚Z±X ÇêÅb‘|>išáØ×¶í2QÆó¼Px3M“p.s‘±±1Òé4ŽãÏç±,‹Ö®]ÛîSÐ|ß'ŸÏ“Ëåj®O”Þuìÿí‘GxíªUüðøqÌtš|îsüÖW„bY|Þ•ÍfËö_(p]7@š¦‡2W2 #ü}óù<¾ï“ÉdøèG?J__ß´ÐO×u±<£ú§8¾Ïo¿úÕ<öº×±ûsŸãž{îá‘_ýŠKﺋ?åÅ/~q(Ì&,‹O¾èEܶs'ÿþ–·ð7§ÆÀªUüûð0›Ï9‡›î¿?´£—¾ô¥üë²e¼çÀölÝÊ¿qŸüÜç8¯Ô†W¿úÕLMMñSËⓟü$ßúÖ·‚sQ:|kt”}_ù ?úÑøñL¡Pà_ÿõ_Ù¼y3…BO|âüÓ?ýwÜq·Ür †að»¿û»¼ãïÀu]>ô¡ñå/™-[¶ðÐCqÉ%—pöÙgÓÛÛ[—,)!ÎuÝЀ à Ïu]ŠÅb8‰/ d2‰Éd2ìT\×Å0 ÃÀ¶íšÊ}\-n4õì[.œš ué;×ú\ÎS*•*aäÜ ‰D"܇œŸøÿT*v¾ï“Íf)‹LLL” "hX–…ã8a§.S\¼ùå/ÙðsÚm$“Éð)“œ;~9™L†b±È½>ʃ>ÈŠ¿˜{?ö1.¾ç<ÓdÅ¿ýO­[Çe\@Ïe—‘.ûæ79冨íû\šHp»ër;„ö/ÿ!Ê Ã`hhˆb±þ–"dŒŒ€aP,Ùƒl¿Áá>åûáµiY6lˆ„ñRgÿý÷cY†aL&Ãs#£ mŠ.×°ØW¡PÀqœpPQyS”vÆ=i” ’q»@Ú,‹a_Q,ÉårÜtÓMí6#EQEQ¥ƒ‰{-É<Ô÷ý23YÇu]†††Êæn2&4Û¶I&“¤R)²Ù,™L&Ü8œ$“Éplš&Édß÷I¥Ró™vŸò–Pp]>LRK†ûüþý¼ë¼óø×'ŸäÎ3Îà¢àÿýæorÅG?ÊU_ÿ:‰D‚‡¿ÿ}Œ+®·“HL-Ë"‘H„Âßàà`ètÉdB±5¾½h¶mÓß”"G‚›ÿàøµÑQ>ýwÇ­«VqÚÈÔ _ÈÉû·üÏ;ïä–[náøñã¼åÊ+zâ ^¾e ïó<Œ’cijozïúßáãÿ8©—¾”C‡‘Íf¹à‚ ‘à•¿÷{Y»–üÝßñ†K/eõW¾Âe_ü"¼ç=a ÃàCúPhÿÛ†‡§¿¿ø‹¿`rr€«¯¾š«¯¾ºìóíÛ·³{÷n¾úÕ¯†Ëeøÿð ÞùÎwrÅWóFß•ÎÏþó\sÍ5\ÿõüæõ¿É7û&W_}5¦i2::ÊÎ;벃%#ÄI§588Y±X  “É„.aL±lŸËåBãv]— 6011vrÅb1ìÄLÓdll ÇqÊ:69–ad2™P4“²X,bYÖ4/¡|>O±X … :DDá*.–ˆp„]2™ /BC\H°m›t:Š "쎯H¥R¤R)úûûC/ÃT*…aô÷÷‡ß»P(ËåÂår3A$›Í–£Êåâ9µ”„¸J±´X,’H$p]—‰‰‰ð·»ÁÇ4ñ+“!qð ç-_Ρ“NâŸù \tÛî¾›ßK&) üýM7ñèg>î©)Ž;ÆøØXYÒé4ýýý¡&ÞdCCCáï“N§±m۶ðˆ¶"ÐÉÓ—j^¨Òv˲¦Ý”²›úI¥¿+V¬÷5ÛM¿Ö“AòĽ]+?Ÿ‰ø6•¹ˆt²\ÚÚÓÓ³pQEQEQ "˜ˆðá8NèŒF¶ø~Y´ˆÌŠÅb8w‹GiÈ\Ö‹“I:瞦i†â¬"ãÛøÜU¨³Ë<`!$ ¾ýío·û'h ßžšâà 3Ÿ>tˆ5Ÿÿ<¿³};k{ ãÝïfhh(ø½N; oýú𷃨0A<2'—Ë…B›DüÙ¶ÍÈÈHèå¸aÆpûB¡@¡Pmcll,šã$“<ýè£<ñÅ/rÎ{ÞÇ׭Ûæ½Ÿø·ß~;ϽÿýüÑýßøÆ7xÙË^F±Xä]ú§¼îu¯#›Í–Ù‡ïûœ}öÙär9<ÏcÆ †ÁÆC#ñïs'Ã`cL„ªESũު}~õÕWÕ(2L«Øo¯išQlð`Ÿ8pèËöÑ7Ön{æÏäü{ϯË'áiñD<ˆD(J$œtÒIaÀjÂW&“ ÃÕ*‘'"`‰Ë­eYd2™ðIš5kÂåqOš¸÷X¥—PÜûNö+"Ôàà`ø …étš\.*݆gÛ6žç‘ÍfÃÎ8ŸÏsðàA t†††Âó$ie¾,ñL«àâçA„²¸÷›044 ###a'?“.P&¾É÷‘vmÛ¶­Ý&ÖtD$–ßLl ™L211ž‹›7³móf.}ä¾²q#÷œ~:^ooX Çœ5kÂÜoÖöíeÇQèo§¦8묳¦µe¶| 3ý–"PWzVÆ‘ÁA½¬Ùµ .¿¼i羚ϹöDhTEQEQ–&2ß“¨#qî0 ƒB¡P6nÎçó†ÁÈÈH]cSÃ0¹l*•š1òIÆÞ3¿ãŸ5rl¬ÜûôÓPÒ*ϯìZ³†¯^~9é¿ùzvîä[_ùJ¸^ÜÁ@æÌ¡vL¶B¡z/Š­d³Yúúú`åø8}×\î׿ž•ãã 8ÄœB††¸÷k_ã‡ú@I?ù”msm•TEñÜqqMFŽ/óGÓ49qâDèä„ÚCCqJÿãÓVŸ ÊËÒgq?“@°KSžxÏ&ˆN—Ö·áܯžËÝ—ß]W³'ªªt0¢ôŠ`šf(Uc6±hhhˆðIEe|t&“ ŵjÇO¡ÊÎ2W/zÕŒQ<ÑjašfÙ÷¯;yªR-&¼²³ƒ¸‡_­Ïçs!ÉS O—Z.¾t: æ%Ëçóáy) A.3 xÙe¼íøñe—‘'*¶£¼ï˜éÈd2üí®]ìÙ³Ö­kØw;1·“‘¶6ºÓ׋¢(Š¢(ÊÒe`` l®$sÔ0ÿD â1—Ï827œiL;›7‘Ò(§Á#׊¥erYXm"O9£´®_ú,CùÄÛ!,O{dÓ=ÿѺšÔ0!njjŠÏ~ö³ôöö²uëֲφ‡‡9|ø0›7ofË–- ?·â’kYVø„AÜ+%¤4ÎØØØ‚'Êâæ[ÍÀf ‡‹^© ‹w[#aùþÙl¶áUifãæC¼ªN«EœvÛ±¸ž …9ßâyØòù<wÜAšR…œ /$IÐ?ŒÌó˜‡æðáÃ\ÐÀï#×F3pÖ¬ih[¦y®µCH®eÇ­°aEiíî‹¥¨+‹WÌŽD´HÈ x;ŒŒL{à+Ž•ÞBê|/ ïýÒ2™M;Ú„[·XZ–(ý•‘v|·´X±iSS¿G;íXR_% Vf2œùøãL”<ÏŠÀ¦}ûør?õôð¾gŸf·™'W›/g³YR©T1'mð}³Pà÷^ùÊ ŠéùçS(9Ù¶¨¢©ˆHü˜³ÍÏ«¥š‰Ê¼ðÓpŒ¤RIˆb³É%6AÁ, M³JÿSžq¥u 0Ø<‘§œˆx9#Nˆp"Ö%J³ððÿ|êKGCJ÷MMMqÍ5×Ð×ׇã8ejf6›err’¾¾>²ÙlS&èª)žbââX™¼]h„·Š¸7ºÓŒÇw7ƒFŠ[ñ¼>­¦ÝvœN§Ã8zˆ<àlÛÆö¼îuüìÃæ SSäˆn|C@nžÇX½zuÓÎi³Ä­fäXëï½z˜ÉŽ[Ñ+ÊBiw_¬(@íXY,,ÅqE6› ÃEg¢P(„Î¥â¨Ì?‡††Bç‡JÇŠxN¶Vàh.Ó1·ô_¢÷ò¥¿ÙÒÿÒÿ~ýa ´¼Xz¿¡ôY¡´?¿ôÚ!Ð9‰Ä¶ ž1TúLô‘TéïéO=Õ´sÐN;v€—=û,‰D‚ÿwô(<ö'ÄOŸ|øÆÓOó«·¼…“_ÿzþ­·wÖ¨4!—Ë͸®iš$|b¹ä …BP=tp/•Â/¥ÕªœËË{ÉýÞlfŒ‚ó .Ž(¼Ù*ëWN?ÝÒº&ÁÆ?7 Œ/W:F‘ÀP]‡(„UÖÏ”¶‰_¾6å!iuиññqzzzØ^ÊAµyóf^÷º×…ŸíÛ·›o¾ˆçFNxÅíW<ÈäGkê“Û¶C¯»F"!‰õ^„õ/äÐ(ä†ÓÈßS Ä«]¶‚vÚ±|_)vÁM{ddÓ²ìsÎáö7¿™fHE.pUCS…ftÜ®ëÂ…6tŸÕr44j¿­¦–·¢/V”FÐî1…¢4µce±°”Æñj“R,¡Ò»Gòxçr¹0ß¶$Åotاϴ<ò@ #‰RTˆt ™•:º€E UØDºz‚WÚ¿Yú+žia~i"gÑ"l¢(>bÇšéW¯‹U™‚ ày=ÔÐó§vìžsWmßÎIë׳é´Ó8ýž{8°f xì1ŽÞt™L†Ä©§6怮ËÇþþïyâ‹_„sΑ‘0·|.—#N“H$¢â±H=É9711Ѵߢ3žïÓE®‘GÚ„^*¢V˜(­c]0ñ¯hLˆeŸ#¥íòÂ[e{ª‘FëÿÎ âÖ¯_Ï 7ܾ?|øpøztt”¾¾¨’Ä–-[¸ñÆqؘ$±=P37[#±,«)FjYV'ÝÈ}6ÃsMDÃFïSÚÜJZmÇâ¦.yà‰DÙÓrÃ0p-+ôºmVF‡ã½½Ü?2ÂyË—7lŸ"Ð6ZÜZ±bESÄ­ZSz$÷G+©eÇ­è‹¥´{L¡(@íXY,,öq…çyäóù0·±T§èïï/ —K§Ó¡³D±X Ä“ÄÝ`ª ^d"ˆA %æü‘¦ʜز¡ØgÅÒû"QŽz›é¹ê=fêfJlÔ Òk;íøÇޱê[ßâ‡oz#Àÿظ‘?;vŒo<ý4ßÛºµ1óð<ž~úiýÂè½ä’ ü´$¶‰Ï”¦Ê4Í–‹p0KÈ«ÄF .Ë,"CÏ©Æ"q®RijKÿ«É•“íAwÏ&ކ„¦ööö†<99I&“áºë®#_¿~}Ùº³±wï^¶mÛÆøøø¬ëJµ)é,´Ã€E­pÚ…Ð,—RÓ4Ãõ©Oqß}÷5eÿî‹ï»ï>>õ©O±cÇŽvŸ:¥ÃضmwÜq5Á £YcãááávŸ6¥ƒhçøx>v ³[‰ã8 `¶m“ËåÊÆm¶mÓßßO2™$™LbÛ6¤R)FFFæ$ÂIt8Ù(ÿôcËE`!Ð&´…!‚yÁÁÒß‘Òÿ1Ê=ØÄ(C$ÂUR-åV»îÊñq½ãŠ;œßzøaòDZ‘a<÷øã\ø¥/ÍkÞT(ÊäóÜ55Åg·oçã7¸ýÀ0MdF-"—ã8aaÀ–!1Ëq<"CsÓCK%üÔ (Š …D~ËPÜ5F¶´nµ Ãz’«Ï1 ûŽ;æ4>nx±†ááan¸á†¹sžþù¡{h5¤:ˆ¼N¥R /@ÐN,ËjJÅZ…%B"‘ N7|¿•¡‚}}}Ü|óÍM¿q7ÒŽ7mÚTÓŽ%÷€xÅUû½ó}I³£ò¯8z”þ+¯lø~ÅM¿Ñì~䑦œ‡Vx`¶ÓŽç#BüÚ¯ý¯ýëC7~Eغu+½½½ìÜYg6ÚyÐȾø…/|á’O"®Tçæ›ofÇŽüÚ¯ýZSößʱ±²4é¶qÌ<>nù|ß÷Cï¶ZãUIy42Rßl\Æîò™½–”V å¹ÞëÉ7¿XÙºu+[·ní;žëøø¡eËøüË^Æ™”G9šù<ï{ÍkæþE|ŸWÞtØ6ˆóN*ÅèyH€IDATe™ —ÅV8qò©§ØBÃÍ0þå‹¥åqß)ñ|3*–É{»´q_4)aO¼WD‰Î3³;çlÌÁ7jûöílÞ¼¹îñqĸmÛ¶Ñ××Çm·ÝV–H½¯¯¯ìéÄg/é8!‚š1Ùo7Î1 !¤¾ð ÃhŠ:44ÔòqÐ:;–°Íb±XÕûÑ%èŸÆZðW¬XAã‚RËi†¸µ÷Üs^`B¼j Õì¸}±¢4‹VŽ)¥Y¨+‹…Å0®ð}Ÿ|>æË6M“¡¡¡£zjTˆ‡uJaˆRTÉ:9šÕ¢ÌvرïûœòàƒX›7OÓƒþæê«ç¦cø~jšNsð/ à8¤R©Ð;®R?ˆ pmE<ßâBœxÈIERYO\ýØßøWH\€r!®ÚiL•Ö-‰a›hˆ7<Q(©:È0炉J‹i—ÿÅw°éâ‹«~6g®¿L“c/~1ç¿÷½œ_ú(nÏétÏó‚‚Íá¤H½§Ê£¼úˆKàq"굨ÛrJâ•JŠ× êŸ$wp¶²†q“““ŒŽŽNN\×¥··—k¯½–k®¹†-[¶à8·ÞzëÂhY8Žƒã8 ¯,º˜k…U÷ÒJ;–°ÍJ/EŸH„딼 󥞒˖-cÏž=\Þî/W'ÍŸ™ì¸á}±¢4–)¥ ¨+‹…nW¸®K±X 72™Ì4Î'ÊÛ.•C‹Da¦Á˜\ÂI%=U‚¹;PÚK»ìø'O<Á%ëÖ;ŽüO$ —ã“ù<”Š…@¹XœÉd_ØNBFãHÌuåzR¡4NލrH¢Êö6ÁØ' #Í\„~•c/’Œd â¶oß>cŒôu×]ÇÀÀûöícûöí éÃ4É P‘GL™=W3Ó*;–'-¶m‡îñ‚|Ñ›zuäœoœš‚5kÚÝœ‘§SÍ𠜉™ì¸‘}±¢4‹VŽ)¥Y¨+‹…nW‹Eòù<¦i2888mäiRˆÑ!ÜFJŸ§ˆ¢ã¤:©ìIÇëÝC+íXÄ]˜8q‚ÜB„8Ç×ß§hÛXž pÙlÇqÊœmšâ—%¿â»*-—úõJÿ+…;Éû&®¤©Òöa£ .¸þŠãJõÓEßݰq³ÑÛÛ[w5™ð}, ,‹‘L¦åÞ&ÊÒ¦vœÏçÉ”l7îÑY è»æPœ¥aÈÍ¢ÓY¶l7n䬳ÎjwSêâàÁƒínÂ4Õ+J;Q;VjÇÊb íXŠ¡¡¡i„¤€± æÿ9@„6‡È Gg™K‡FÚ±KàÌeù>'¿üå ³£D"øP,– Ê’ç°éتtµZ’¢VËOLjÅk͉ícŒ@lKQý⪼'ç"ääv7`®Äø«§t#¾ï‡ÞRñÁÁB‹º,‡îèãzzz¸ùæ›»FˆSEQEQZ‡ïû‹Åi9²68×ä œq‰BLe-I?µHp”âèÏ]xá¼r~K†8‰DÃ0Èfƒò ©TªùÅÄíX±\¸ē‚ '{/'BÞa«‘@7‹8 Y× q;~øÃðµ†Z*ÝH5»ÞvˆafÅ_EQEQEéFòù|ß0(è’W~¤ô_]9”f"ºÓ‡O=•Kydî;Èf«,ʆçTä¡_F&‰~ì¯L^%|Tâ·%Ì4Nå$³^n‘Ó²ÐÔFpëø8k7mbظqc»›£(sÆqœªB\ö…†ªœ­(Š¢(Š¢t+R”¡X,†ázyï7çjÉ:¥UüâØ1®züqn¿òJ>øàƒõoX(@±†Aª¢ ¥ÌMÓlmT xŠX”‡•Џ– ÜcNÜI=ʽå☨]æwÓÚµ<÷Ö·ðþ÷¿¿ÝÍQ”yQ­ º‹ºÁ׋·ð](Š¢(Š¢(‹„l6‹eYLLL082B¿a„y—å¿Îû•VqàÀÆJBÚÖ .¨o#ß„¸Lpt:ãD®cR Á¶íªa«s&IT.Xävq ‚‹'A fû±å‚„¨ÆÃWÍÒ{ŸéÞ&)ô‚¤Ë„8€»6o&zÄ)‹‡"ÝQ(¡SP!NQEQEH§Ó@?Ë#(Ä#Èg£E”öðë/})æ`{…Ø6ØvAeÛ6ù|¾,G~Ãð™>©ò™ž ®2Ç[Ž@°‹ qWYÌaˆH¸S¦ÑUBÜ¡“NâÍ_ø‚ ¨J×M]#Ëú®vaP½Ž¢(Š¢(Š¢t*®ërï£bŒ°(|;ÇÕŠòÓSOeùÑ£õ×pÝ@ˆKñQ‰D‚\ɣζí²h*Ó4I¥G%NãˆrãâBœ e…ãd˜îY¢ øŒt•÷ðòå¼éÈ‘v7CQ„eYait‡(×e»È°¨ Ò(Š¢(Š¢(‹ ÏóH&“¬ÿèG jnMõ¢´Ÿ£GrÕºuõ­œÍB: ™ T¤/’먬æPgÕXϦ¼˜B¥÷[éá«31¸°¦/fºJˆ[79Ùü½ŠÒ‚Ðø<:`˜ ú`EQEQE)‹üÆÇ?Î.½”Z|Lé,êÏ  ½á à$5ž'n^¸ùઑ¥Üû-Mà%爼øÌ&˜¼Ö›WI'o5é*!NQº×u±,‹"Aæ¡Bœ¢(Š¢(Š¢Ô‹ëºüõUWñäߨ…”ŽãК5õ¯lYP!Ú% ,˶í…{ÃI.8¯b™äu‹{ˉ¸6L”Ö©Ìñf¢^n bY»0Ž=Úî&(%*rÑêå:· ®ÓxÎG3¶nüš6™zž+­Wˆg14ð}Ï0°¸úOQEQEQêÁÞ²~=[€ééiwseÏ=þø‚4Ë fضÝÀÙ¯Cäý!E Üc­Z®"-¶Ð4f⦦¦èé€Nîk÷ÜÃÔÔ¶m³@Í%Gp=Éÿj!㒳̭xï\·‘(&‰="¡MÖ.UÚÞ)}–Šm#m²ˆ)©~ì•w ¼gÿëúëÛ} Œ[úN*Â)Š¢(Š¢(J½Üzü8Ï<ó ¿×ÛÛî¦(JU–>ŠiUñKêV…X×ßßÏØØù|žL¦A¥ôl‚ŠñI¿Lê…ÁäTþ ¨»i …¸ÑÑQ†‡‡¹îºëèëëc||œmÛ¶155Eoo/ù|ž¾¾¾¶5ôŸï¼“KûûÛ}¾:—éâ´G”KÑ‹}.bW‚@äN—Ö‘k*.€ "²¥˜ýÚÑl¨Ê~L)cVüD´‹‹{" .;眖çFãy†a„ƒŠ¢(Š¢(Š¢ÔÇ?ìÜÉ?Ÿw_¿÷^lâ”n¥PŠùªÛ†††šs<‹òIy@,ˆ{Àe &¨ñeâ9¢4…e“““lÛ¶ë®»ŽÞR§–Éd°m›ë®»Ž‘‘2™ _ûÚ×ÚÚØœuV»ÏWÓï4Ƥ¢f‘(´[<ÃìØëø5%ˆŠåå9y‰§¨ãl¾¹Et«&Ðí~ê©¶¾µxž‡eYxhŸ¦(Š¢(Š¢(õòµ{î! |ù䓲§( Ä¡ŽtZ…BP ¡Ó4) ó†ƒ@t³( ʽBÞ:•‡Õ\pMezÂmß¾„¹ÉÉI¶oßNOO×]wÃÃÃŒ·Õ+.N·…+ÇÃ@ã9Ó ±×AÍxÈ¢x¥-Ä+´ÅìÓ…<xÞCÁ¦MínÞ¼ð¼À?Qòc*Š¢(Š¢(Š23>ðæK.ᜩ)®î€”IŠ2gÞ?\qEõ]7(Ð`–Ïî³Ù,©T Çq°mÓlPL¨xóøbÛÓ½hÄ‹§Ûƒ.çd€ññq6oÞ.¡¯¯¯,7Üúõ뙚šjw{CÜ…ï¢i8¥ÿY‚Ðê i²\DiŸ d{‚àšÈ•þ¥ï7Häý¶ÔHTùÞ&ÐóØcínÚ¼‰{Ä)ó§ÛDxEQEQeþ8€á8üñ©§¶»)Š2+Ë—/¯þïC: ‰Ä´D|K¥R$tµäˆ…™<ÜQO‘³  ··—ññq¶lÙPöZØ·o_[êWÔ2ê6“'òx“ü_’‡-ÃÜÅeõ­Nøó]»àw·ÝM™7ÿµaƒæ‡[ RÜcNHu’x‰ÞJ¥7^¥¤Hpã*°tÕpEQEQ”à¯~ö3,Ïã#’ªt83: äó`Û5…8ù[WèueŽªzÝxÿÄQ®å,èëëã–[naýúõ8ŽÃ­·Þ®4<<Ìáǧ‰sÕfË–-a®9Y699¾ïííeëÖ­sjè3«WsÕºu@ù\ºHq™ŸKA“À†eÎ>Ñ­•t²a=Àé-Ê× ;v]— Ù¬ös³QíÆÇÖ––g‰â·óaºôÙ ÐO$¤y”W,©L–˜.­+{¶´^¡´]¼\p—PiÇè‹¥•4kL¡(­DíXY,´c\‘þùÏùañBQæC3íø+<ÀŠýû«X,ÂÄÄ´ÅÙl–\.ÇœÈ2]LsKÿSëÅw-…”¶s2ÀÖ­[ “ÉÉdÂÊ©×\s ;vìà†n˜ugãããìØ±cš÷œã8 nèÑóΫYe³ÄCK³¥÷ØûA8©,“|‡,Â^sõÒL;ž8ë¬nÓrƒx¤AäB'O$ Å+‹x`‘àÆñpé3‰õˆí?K$â ”^ËÍ'AyÕ“|iûtéõHiE‚˜ñLì¿WÚ_¾ôy5SvÔ¢Å1ÉÕì¸6¬(­¢™}±¢´ µce±ÐŽq…Ü~ü8/ûÒ—0ŒNŸi)Ý@+ìø%gœ1}¡ä†«Â¼l[æAÕ–—·â½G÷{ç,–É‹íÛ·‡Åâlß¾}Z¾¸jìØ±ƒñññªyäFGG¹ùæ›çÝH—@ˆk¤'QÌ•ÌQNJ‹xFi½Qu“‰Òz§‚½§´®Äqo mñí¡üÉÐ`ìXÒñ~$ªdÄþJËÒ÷#åÆ(O™¨hG²tüZžxM –/Ô†¥U4³/V”V¡v¬,Ú5®È¿öçÎ›ßøÆvŸeÐÖñ±ça©U˜w•ÔJ€Jï·jب×!,›m…zÂQPij*”Þ©©)z{{C—Ïù¸zf#GŽ4ìK牪•Ì¥%-C ,¶ðÁÅö}šE3íØ²¬îí÷ “Q±L„,y-‚Ó Ñ™Bi¹¸–¦ˆ<ÙDŒq*K¹R)ÂU!vl‰ì#* ,B[|Û™nDFi{!ž˜±šR*ßÝ è0LÊsÇÉëbi]9ƒ±ïž*mßç¼éœ¦þ\Õì¸6¬(­¢™}±¢´ µce±Ð®qÅýÏ=ÇÙ?ú©;Ú} ”E@+ìø§§žÊëca®!¡ÕétšÁÁAŠÅ"¶mÏÍ+N’ÒWFkûTOïǦåQ:JuN–;vìÀ²,,Ëâšk®a||¼!grr24ìááavÌÒ™>úè£ìر£,V{ý׿¾à¶RFIµRqp±æÈc¥¿s­ºÍŽ»5v¼wïÞiët• *m©×ñd‰r€èb*”¾d²ôw$¶m®´,SZ.ŠwÜC,G$ªåÄ-‹êLZYf[?ˆnzåwÒ^y%ëÛ„ÎäU“2~qì-jxÄ|lø±ÇÃuÝY×S–£££|õ«_åá‡nù±çcÇ?ü0_ýêWn×)S:”;vàº.µ¸"ûBÆÆ£££íö¾Úä3…Ò†‡‡ç4>^ñsà 7ÐÛÛËŽ;¸å–[Èçóuíd&úúú¸õÖ[Üs[¶láꫯ®+¬ZµŠÍ›7³zõêpÙÆo>øÁyµAB¡óóa1õFØ Gç熋ӭY«W¯fóæÍÜyçm9þ|츧§‡Í›7‡ïžw+ÚÒú:ˆHHˆe6Q¬¶¬#bœT-$¸°äu?°&O[LQJ O¼Ê`v×én$Wãu‰Õ«WóÒ_)?øÅZÞ´ùØðÊ•+9ï¼óÊìXQÖ¯_ϦM›Ê–µŠùØñêÕ«éíí ·QaóæÍ<üðì\¹²¥Ç]ÈØX «) ´w|<;†éããZ€Ó~ö³i¥ÊâCúµ±±±îiî,d|\m\qlÅ LsæDD®ëây;Ïœ‹4¥(›²™c9󳉹îTY}}}>|¸îññ2€;wbÛvè†ÙÓÓÃ5×\Óõôô”å—‹WŒªÅªU«ÊBbçSW(¥Š¢ñ¢ÙL‚´Ò8zzzزe gžyfÛŽ?W;>óÌ3C;v‡S6nl¿GœGäÉ–&¸0l‚0Q©>*OR$|S.@ µKïE¤3ˆ<×ÄÓ-~¡%˜î:½D;þìg?Û–cÏÕ†W®\ÉóŸÿüºS(KƒÞÞ^6mÚT5ïU³™¯^½šM›6©§LcË–-ìܹ³åB\#ÆÆŠíÏÇŽ¡||\ øÒsÏqö‡?L¢ âŒÒZz{{éíííºñqµqŪG™u{ñòtg~Ÿ2÷ŠãÄ–P^mRi }}}LMMÕ=>CSãØÈÁê-·Ü¶mÛÂ÷£££uwÔÂò£GgU–«!"ÜQT\#Í¢›h»Ô¥4ÂŽÏ›ÅM¹áxD±ÈkJïãvâ¡¥Q¸©„™cëÉå'±Û ¢§,©Ø~ZP>¸Û®¹N¡6¬(íFíXY ¨+‹fÚq¸ø«_%‘Ð'¹Jsi¤;žWîÅ™ÍB±X}}Ç!›ÍÖµï2$¯–AùÄȬxí²8# ³kX([·nÅq®¹æzzzØ·oßœC^Ï™ššsìt?‘O³tn Iíæ6w°ã7näeÍnh¼’©x¼¥Ä5)š@é3 —›ä?“Á"´I8i‡U/Q!n~4†¥Ý¨+‹µce1ÐL;þùÑ£ìÿÀø÷ ­SšK£íøÈ¾}庅çM+ÔP(°,kþa©B‚ ª)Át9‹(¾Ò‘„BÜøøø´Ä„•ïg‹ù‡ æ9NOO·ÞzkX*¸¯¯¯Ìû®><§ü’/~°î-¥œfØ14¶/t]7º. P @ÊOš@ñ-E NK/^nRÑÓ¦ü‰I&¿7ÉJ%†ÜLÇ çÅžö‹E<Ï Koû¾ã8eO1Óé4¶m—­Wù<Ï£P(Édæž´T©JÜŽeÊÒJšÕ+J+Q;V ­WüüèQò±ñ¢¢4’fÙ± ¬úÕ¯ÊzTèR%µl.7dîŸãUFp[¥9D‘KJG± ‚Xèñññ²J©[¶liXåTXX¸ëÓO?]÷ļ@àÎ<Òœó5 Í·´˜¯{žÇ±Ë.kH¤Ó`ddË´ §ßtzTHÁ'2Î|ð~¼wœç½õyLýï) ÂôLNX'0ÆŒp¿¦i†‚›{܉žç…U} àX,’ÏçÉd2$ |ß'›Íbš&©T ×uÃí Ã+2[–E2™ E9ß÷Éd2$“I†††H¥R  (-nFXYÈ÷}˜GˆºRŽæÉRjÇÊb@íXY 4ÚŽ]àô{îѰT¥¥4ÂŽ÷;ÆÚ}ûf]ORn‹ÅÙ…8·ô¦*“ÕE‰*Äu0Ë pÉ”B ÈÑ£GëJd˜'°ÁZ'ŽÅÓl)J-<ÏãøUW5ÄVDˆ j0áô§Ngç7wÒ»®—ÞboíH†šƒ3Þxçüú9l;e¶m3vù!• zöb±ˆmÛø¾aá(™Lâº.K9r¹žç‘ÍfÉår˜¦‰eY¡`'®Ö"ÖÉ1Ãï!×õàà`xS¹l6‹mÛ8Žƒišd³Y&^ð(ísओÁ°$ú‰ž$>õ}Ó4êD†a„ë¨Ç¢(Š¢(JçQž¾óNŒ×¾¶ÝMQ”9ñ“'žàÒxek×­êDP(H¥Rõ…¦J¡½8•› xÉU.7P¡¢ƒ)Ë755Åðð0ããã>|˜¾¾>úúúæWÍ£A¸sX¯@à•ÙÊ)vIQ”YY±bECö#¢–W{öìáüß>ŸÍïØŒùA“{¾v÷üõ=¼!õH€oûa¸hêD 3k– cB.— =Ó’É$™L˲¿@ä™V:¾ˆpÀ´'—®ë†¢[ÁâýJµ§Ars’õ†††øÛ]»Ø\ýõ¼á”SÂýÅÿû~ð}EXO?i·ïû$“É2ñOQEQEi?_{ê)^ÓÀ¨,Ei>ðÊÉÉhç-<šG<â|f9ª‰&*Âu4e9â¶mÛÆêÕ«aîÆodxx˜\.×–ü>pòÀå——-w+ÖIÒÜ հ ¼ð¥ö/Pˆ[³f ###xž‡çy¼â«¯àÞß¿—sn;‡#Áû'þ.¹úN™:…o¾ì›<øžÃ<¦iR(Bo¶b±X Lû å"Y¥p5“•N§kpÉùËKýÀÆC\Ú â¢xÔI»«=H ‚×ãß]QEQEiE`õ}÷ñÊM›ÚÝE™3{O=µÜ¹Àu§å‡ª:DÌŠCP”ï·åÞ@ZµëXà¶mÛ6®½öZ®»îº²¶oßζmÛ¸ñÆÛVÑéɽ{güŸghh('¥ÈD6›%‘HtÍùQ”ª¸èMQEéx\ÀpÌ>V”VPN»å¬oŒÚvU¸l6FçÔ…IŠç ã¹EÄÉ„¥ÎT)¤··—-[¶°¯Ž* ­ .Äåv·6 Âaõz¨‚ƒ*1ž8ÁËΗÜ$Š¢(ŠÒ$\`Õ#èÃO¥ëð†IÏîÜYîWósN"\|üåP}¥ã9¹Þ§¦¦ÚÝÖixö¸$—稛´<%½ƒNÜbLmÜÈòåËç¼ïûô÷÷S,ÃBŸýÄg7P‹àéÈÓBJ-ËZÔa•ÍðŒ‹¥$tÕqÇ¡P(„9÷¥.¤’ÐLdKëÅ+cbïM¦÷§Ê´ZÇð)ZTíÉ3‹qY‚>GQEQš„üÚ}÷ie{¥ëð¦dMyXÏ¿e\(³=@Uº‚“¶lÙÂø ÕiÆÇÇgõšk÷<õTÍϲ´ÏnAøÌ>1›Ù&EkÊßšÕ¶od_á—ö9@4©\¼:Мyfõê9Ÿñrûßïøß ƒär9þÇ7þ¿ñ²ß(·Ÿ¨¬•¹Ò:'›åíŸù na¡P9ÅbÇqjþ2™ ‰D‚\.W&p†žˆJçR)X5ØPzí‰[Õ<ÇŠ¥ú•ôÑ>AŽƒ$úK‹¥¿òÚ-ý“ËÇŽ›£ú=Á# KÍÆ¶wJÓ¥ãçúrï;7ö×8}òtÎ||~^¾Š¢(ŠRIXõ£-êÉÊâÆ¬£Bªëºe9©gÅ"ʃUíÁ¬Òµœ AUA©Zéù6::Ê7ÞÈu×]×–ª©w>ú(«ŸyfÚr™ït¤7œG4Yª†;Ëç²½Oç8N¹;ê Qø§\ËY"O9ƒi®«þp´ôf6:ŸèBwbeò÷ÖÈpq÷D¯Ô†tëN{'óôgÌ{Û­[·‚ØÂÑ£G9ÿÊ󃤈K0}F.—ã‹×_Ï'þú¯º_Û¶ëºyB?N8îŠÅ¢†¬v*E&ÄI¼ý_ü!†ôɲéƒey©Ǥ¼ßO ¬|áÍ 9!¸¶3¹D¨óbí(m/Ûˆéö vôÓrÌT©mƒý²C4 “ýËòBi¹¼·ˆú{/ØÏ³=϶ðGTEQ3r[ª{<¦(ÝÈœ‹ÁÅ5;›òqŸÒÕœ QA†ÉÉI^÷º×±mÛ6¶mÛÆÕW_ͶmÛØ²e Û·oo[#_tôè´eò¿#‰MÜDDó< ÍøqðáŸÿ¾ïãº.6làöõ·ó…·|=ßÜyp²_ÿý¯sï»ïÅq¶mÛÆAû ÷ÿáýx†Ç÷>û½Ð[¢X,òÝÓ¾ËÁÜA²²ùð‘iÞ£À.‚‰™œ<‡iÞs@ä‰E ˜Ð‰\#˜8Ú¥¿™Ø6"Úù”{XÌ&FVyXðk¿üµvÿºóâÑ3Θs.AÓ4Éçóx†žÏSzj»¿Ê¢DrÁÕ»®”OBÉ700 ¡«­FÄ®™˜é'‰ mùزþز~N*ôX×d²´,þ‚>1.†ÉúñðyÂ)Þh]–Û¥¿c¥uR¥c•Ö•>\<‰ ·ˆç[®´)q?Vú|¨´,C4˜3JíÏôÛÅÒ>½ÒqGJëÛ=³úž8û‰üÀŠ¢(ÊbÇúêó*R”NÂ8-Èf¡Š×[¡P ŸÏÏÍÆ+#d¬¦t=ËäEoo/7ß|3ããã÷—È–-[ÚR)u6JE"ÛÂÞïìÅð ^úg/eb"ª×z[ßm¼yüÍ<¶ó1nÿÚíŒã$×$ùÜ×>Ç«ßõjRé½ý"¾g~ãZƒ¾÷öqÊúSøä‡?ÉØØn?À/ïû%íyŒc›ŽñÈ#ðÛ+~›žŸõðÆÿ|#^Êãño>ÎÚ¿ZËÐá!žu×x¯!kfé›ì£çu=Ü÷Ýûøøƒç[| ;oó³Û~ÆÃ/z˜»oxÏr'_xr乿i‘—‡ˆfÁ¤Ð!˜¬‰06F0+”ö/ŸïSdb:RÚ·x€DÇ/–^½Ti¿véo¾´íóú;®oÓ¯¾0V¬X1§êº¾ïcÛv(⚦‰ÑoÀ«ÛýMÚ‹ähK¥RmKæ¯8+ˆ 'ÉW³Ù,¦i†‚R±ïy#a“3=\¡-MäMœ'蟲ýO¢ôÚ&º¹øýÓHi}ùÙ¥“¾Ó.íÛ&² "O8ÙNþ&bí^Õ¾œjfÿ®C±×’ûw0ös5öY¹_éó¡ÜÕ\sf+Š¢(MÆNŒiXªÒ•¸À±cÇÊç&c[‰r“œßuçB¬'š¨GÜ"aY傾¾¾ª¹à¤r`«ÙÓÓÃËœ¶|¬…mð<×uq]Çqøà9äEË_ŠpÙl˲HìNð®W¿‹?xöøíwý6׿¯eß+÷ñ¹×~޼Ÿgü5ã\ÿãëqÎtX÷WëxâOŸàð›ó¹K>¯2軦ÇÞú+Ó+¹ô}—^°Ì”Lª†KÕ#mà¬R¢þ<ðMà?4¼rí+™¼m’Kß|)+^½‚_äŽòLæ^Ù{Z$ %ˆ<"$Ç›L8ųC$¾LÔr0£Â$ž¦:D”Ÿh‚(_Q‘(G‘xÑ ‰t¥Iî?_ùϼ—÷¶ð×o Ë–-«{Ý|>eYxž9âñ’`ÉWÊ‘ÒNZZY<Ãó<Òéôܪ$-R …©T*¬P»aÃNœ81íóº™M(Ê…Vʃ(,t„(”_¼ÂÄ#m A?ïÇ èF®I?v!>¨ö“7ûI¦sRþEQ¥¸À†]»TˆSº–>̯äëBEþmÃ0¦Íê¢rHlÌÕÕ+®ë©_hGå¬*B\£ñ}?LØþ'›þ„>ôB¾öª¯áû~ø_BÒžÿÏÏgßûXë¯Å0 ‰étš·žùVþòþ’ã·gÍÀ9ÿŒÿiÀ#Ë„‚’íØ‚3­39óê3ƒ»ÏI„aGç Ÿï)5lˆh)ùÖ$ ¢‹Pr•>_UúÇ \”¸ˆK¿(¶.ÛC0Ù4&”•¡^F鸕ԫ‡HH–„vUzkHx˜ˆ}EÊŸ”¼>v¯ßÝt[h‡_øÂº×u˲Hy©ÈY*¤.ñ4dÝ Â òÔ«X,’J¥ —/U\×%›Í’H$èïï·b±H¡P —Ë‘N§I$5ŸNêDD³€~p‡\6Nm¤çÙ°¡àH~9ÉY#g Çwçô[N2 ¿—pN ÿ êïÄû­dvžçÁH:S €éÔÎ÷!O@åûÇï%B¥W ëºmóøT”NB{ÏóÂkLŒÊý P(àû>©T*ô4Èçóáƒ×uÃ\ò+“Éàû>–eñøã·ûk*K±_±i¹×yž.“×–e…‘–eašfµ$ëú¾ã8<÷Üsíþj ÂŽÁå—·»)Š2/ÎxúiBÅÂó b;ïã Qº(¥«éx!nÿŠŸx—æÁ®Lv$O[Ïc=¤R)NûŸ§ñµï|Â=…ðضmóø¦˜âø³®°Žá‡9û‰³¹òcW2–ƒ"ô½ª>ìퟬ…[(žFˆÂA!˜ì¹ʶ|͸ç…\xq!¬Úé0¨î"s½·—/. Ó'»vl_Í𦈷¯rÿ•ßi‘9­¾ï¾º¡×ÔÓŸv¨— étšL&Óñ‚œª"ÂH{³Ù,§vZ»›×<Ï 5D¨t:M.—c``€T*>ôH$ø¾O&“!™L’J¥èïï'œL&ÃÉH*• skÈ>Öýõ:žùõgXûüµœçG²?Éö«¶ó;ÿù;ìêß…û/ã±µg+§­=÷}ë}¼÷†÷²óÙ˜žÉé¿u:·¿ævÞ¼ûÍ<¸ñAL?Èï>ù»|£ð .zà"®¸õ >á~"| #ùMÓ 'G2¸’ß:v ‚¬išá÷÷¡]ŒŒŒÍfCñÖ¶íP,ð}ŸD"A2™$‘HHDO)Ddšû¦(óÀqœªEDür'¼V,Ë"ŸÏ—yÁ:ŽC"‘Åf±÷b±ˆa¸®K"‘±¹öÇ ÅŠB!§É8¯53\×ell ×uÃû‡ôGÉd’sÏ=—7¾ñí>­J“‘¾¸ÖCéÛ-Ë¢P(„ë‹èëy^ØÿJ¦¸Ý‰­‰¸&ÛI/œaá:Ùl6¼Väz’ëHîáû~ø Jî-ÒæT*Õõ÷>QZEéFvíÚÅ…ò¦Ê%›Í’Édæþ 5Kù¼X[”®§ã…¸åG6|Â=ù‰IÎ>ùlòçà ÕóW<Ÿ¿þù_ã¾ÁÅxÒ e§H¤"Ú¦Àð 8ÈÀ+_üJžû‹çUZk‹g—û›!JžaºÈdQ.¸Ts5] 8¶gÏ&W®äÅgŸ Ë—‡ƒÇqÂI³ ,4¯Ucq]N:©®ue`—L&ùÞßcEf…Vž­ •Ju¼'í¦Ýp‡††Ø¶m[»›×¤O‘×"ºÉÄ'>Q·,+Ì­'¿i&“ ½ædâóãƒ?fWjƒƒƒd³ÙpòlÛ6Ùl–ïò]n}ìV&>1ÁOþ ·ÿöíܲéöÿr?¿9ö›\±ù Þ꾕Ík73¹n’W\ö z>ÞƒŸò¹øCsd߬#›2›Xë¯e3›¹Ó¼Û < ¾ö²¯ax¼Õžç‘ÏçÉår8Ž r~+'2"\(KñÀ·P(„¢y6›%—˅׬ÀLLL„6&v&צˆÍòÞ0ŒP“þ/›Í211A2™Êm^ìzhh(²Ù,###¡›išáEÚ—ËåÂ>E¼Üâb‰ëºaß'~Êç¹\Ž;v´ûçQæI<§u%ÒWÆ=ËDˆ‹÷ÓbƒÒ/‹]‹,bÛàà …B!¬š.Ç”þ]r×J‘¶¡¡¡²Ô6ò>~VcN¹£bÜu×]íþ9„ ZìJézlÛ¼áªÐ°¦š~dѰ `rr’}ûöµ»-UY^¥bê|£P(°õ{[YµeCCCì¸nßó¿‡išÜµû®`Ò4hFâGšraÌ+ý‚óŒóà“De„%ÉwܳL¶M0{.µ&rÞyç±æ”S¸í¶ÛxçE…ƒ9'ñP 60222íÉ¡ FtB77~qì+Ö¬©k]2FFFX‘\,”Ð9 «Bö¤ßéáp¾ßO¼[ ¹q]—þþþpy±Xdll,œ„¤Óé2Á_ÎxÙH[Ã0à$°  ÂðÞÊb)?È¥øà{M°‰œŸ úÞ$œfÆHfº§€‹ú±7ñ¦òÏæØÇUæüˆ÷›õüöƒƒƒÓ¼<ã‚Z±X$™L244D>Ÿ§P(022Š©TŠB¡zOˆw‘xväóù²<|ÊâA&®r=ˆ‡eÜ`Y‰D" •ñ…Ø´ˆÎâ$}…xêW›°Tëûe;¹v丵˜À¡´¸%žh²\þV»ÏŠÇXÜëR„ظ§ºŒkåaEÜŽãSdì+¡ 3Ý×eÝÓÄ&ãŸÕb)Ú¨œväHWï¥&ž²eÕ¦5ˆ‡‡¹å–[²Ãááái•V§¦¦æðáÃlÞ¼™-[¶Ôµ/¯´m#nJr£5M“µoZËØÇÇàm°íîmüöÿï·¹ûî»Y¶m¯8óœox³e‰â°%Y·4LªZ!£ˆ*ÎUóhkó<üŒ3Î`ðö·¿½LÓ©°†ÁÈÈHYBu´Ë (>ð_¬¢\#íxÿŠ<ïðáºÖµm;È!ïxãU•®Áq …¶m·ÍË´ÒŽçkõp]—B¡€eYd³ÙÐû+ŸÏ“ÉdB¯­x‘ ™ÀÔê;ÀöJŸÅ‹ÅİL+(´¯ÚãÁƒ¯¥?6©^MµÜVæ œI°K$Œ…“8YWÄ qÊd2¡—N§ ûoñ˜›W¢à6ÒȾ¸‰‡·Åù%Ô3î™&ýxðHžJñ +q—rVN~ÅÛTóQgŒEÌ2€íÛ·³}ûöïl||œ;vÐÛÛ[6ØÈf³ôöö²e˲Ù,7ÜpC]ŽôìÙƒõö·ÏºîL 088vð¤.à‚u²\yÿ•\øÜ…\ø« áv‚ªz6ÁÎ!ðdËLܤ ‚\6Ñ$O *,Ç—x(ˆ \$§\|b—Nnƒ¶m“L&Éd2Ó>ÝH£íêóì\³f ?øÔèË÷v$v61ë¦JbY¹\®m×C5;^ˆ ×âÇ?zÛȾ …a7Ó4É<ËH#¶Fý' k±AjÄdO-ée%obÿ.0>Nðà#Oô°Ä$*#ÈC“Õœ*sÕCÜÃBI£ì/•JQ,ÃI¥„#Jȯ¤Ý2‰lF_ÜÉÈC.ÇqBNB»ÇÆÆBoH ™“°å‘‘‘²ûtÜFª úã“Ýj_Û¶ Tîn¿ŸwKÍŽ!ò7 #Ìí—J¥BoKåá¤+ÏÄû×óUQf š/´/Žãt‰gïßOÏK^†y@1•"C^Ó*yÂÉPF²P‰C[±ô¾p¼³‡d£ôÞr!m€ßùÝÀ(Cþ4Èïd(~òo ºìÝã½ßÌ>Q¼ßlÛ&N‡^)ñ"’gI…’¿“hÖ˜¢ø¾OXtCιCCC‹E6lØVÏÍårÓ„4¹®D(ˆ¿‡rVéLºÙŽk!)KÄ#SŠnÄ«âÆ+ŒJ8©ˆÆ©TjZŽÐvŠ`’§P™™fŽ+<àЗ¿Œw¥‰4ÓŽ{öì©ù™¤|š•Á!·öZ)V”EAÓ«¦>|˜õëׇïãnùµ8r䣣£üð²ËX·nݼŽ+ùp‰®ëòþëÞÛˆÜ1¥×Ð&â[-c—0U¥ wŠ'V––tr’z!O맦¦ç‰'žhË÷œ?ñÄŒŽŽòðòå¼äŒ3f=®ë’z*;=øbNó ß‡D&&‚䟦9-ïÙ,‹ÁzR­Çq‚×óì\wa‰F¥­JbÇGŽiù±çcÃO=õ?ü0£££5=¦ Ó$͆oÒ•Jiˆ8‰ü/ÆÞ'¾ Å—ƒ} + s &  ¼èR;!¿2ß„ìÀ~òçÏCòoû™à˜‚¨Õ¦wÓ’Û­ZU,Ó¬n÷±Dør]UzªÍ„„ñÖCeÕTɧ”/MpD¤‹{+×Ê[R«zßää$»wïæp¹/É|ìøðáÃìÞ½›žžž²Áv#¯´T*zøH¶5kÖ„ÅlÛ.»Jèz=´[¨XÌŒŽŽòðÃóÔSOµô¸ ¯_¿¾®õ‰äuóÌÎ;§…wt2SSS·éK^†“Nª½¢ç±aÍ Û:ß÷˜ðOCðI.0¢‰}­s•Ëÿ …ài¢e\:¼®ÜÎ÷¡­šhNGIFå†âû‘ø78|‚ãÇô¼h=Ó ö‘Jíª†”¨_b/±ãv 4æÃSO=ÅþýûÙ¹sç4!Îö=ïyvïÞÝnó¬âV¯^ÝP!Îó<¤ò%ž)+œ t&;wîdÿþý-âæƒŒ¡~Ác¾HÑñn“0w±é\.G"‘=á*«ÕªpÜ:ºy|¼eË|`Í¡Cj3Kœ}ûöuåø8.ÄÍDÝcÀÉO Dj¨jW B\½4]ˆëëëcxx¸¬===3nsî¹ç²}ûvÞwìØ¼Ž™ÉdÂÐÈD"¿çA Œ»PZÑ#Ê—bQy3¿Úl7O‰‹ïïïÇ÷ý0—¿¸7]-z{{Ù¾};ããã­:eeÌÇŽÏ?ÿ|¶oßÎ?ç;µWrÈf9khˆ³L3J’5ÄÜEªTªÜnp0ð”«ÜO>ˆfŽ|–ÉDëÄžX³{ÆY EBç¯ ‚cUzB ”¯åï«…Þf³Á²f ÔD$nA®¬vÚñ|løœsÎÁ²¬iÅ|ö9‚»jÆúh)"m…ŠÊC½4QiÈ}ì !u°ÌcäDð»Z¿ÜFèf_OðÛ;yE¢0í´E*— ·ðAü“O&ÝÛ‹ñÀ$Ï;kï^ÜsÎ »|9‘@è,_Žÿ–·àŸ~:æñã˜'„Úi˜D· à(mSرƒ,U öJÇ–±ÓÀÆáÃKƒ@|äƒ ÏSâÏþŒ=ßù'ýã?ò׿ó;8¯|eYQØ€mãÛ6ÆÑ£8ýýاœÂÀ¿þ+>`NMá®X‰‰¥í¬Ý»ñOœÀìé!ÛÛ‹ |æ'?ᬃùÇ /äP:Íwÿøáü¾rÑEx¶Í=ëÖqüöÛY“Ïóí»îâ'¯=güó?Qá€jˆ(+¢@+™?ÿùÏohUJß÷I&“a¡¹¿e2ÆÆÆtBÙElß¾;vpN©h 7 Ïó(‹d2²ÙlXìE¢\×-+ö"S++â*­¥ÛÆ!¸Ý¯Ú½[½$—8[¶laË–-]cÇ2>Þºuk¸Ì4Mèï‡R±#!^ñ|VdÓ$QˆG¼8¤ÒÑlݺ•ÞÞÞºÇÇ¡'Däõ¾}ûÂ÷Â|}}}aRÄžžÇ©;Ã|òÃI±€ÁÁA}6'¾âÂã.<¬z ¸ß‚´ïÈÁe¥ÉwŽ@(q!^z8¨y$æÍX¬c’'ƒƒƒaÈCÝTXˆOÃq¬$B}lÕ*^éy÷qèýÖ€FÛvu{{¸8$“Aèk5fUŠWñßod¤ú6µ–Ï„i^5‘.ŽBÿäø¾œ_Ë ½‡¦mw!÷¼ ´7þ½*ÄB! –ãF×xõ5Ò†ÿý—¿„U«Â÷qÉÆ"ˆ‰è‰'¤>œ‡‹ xƒo¡ß„ïyÐ ¼m¤›JôYVäq92Rî±cð©§ ¯/Ès»vmð¢¯/Æd yï¯[ˆ[§Ÿ>m.%¯=¢b7&Zé'&ï%ÛA5\ »f O¾àüêŒ3¸ûå/'ÓÓ¦·ÏÌ+Âöå)=$íéÁ*µkPÖŠçž ?ûE`Ìq𮽖ë{z0zzH—¾öµ¬÷<þã¯þŠûo¿¾k®áG«Vqð}ïcÓÏ~Æï]tœu§¾â¬V?ÎSÛ·—yÝ ”ÚÐÎiwCûây ¹gmÛf$Ö—éDR©›vÛq5¤z©„®KUSÛ”Z4ŽúdZ8HYü4´?®bÇò£®t2Œ˜ t™,jG¡¥Î2ž¦Œ—©Ðëׯoˆ*ÝÛÛ˵×^Ë5×\Ö-[p‡[o½µ®m÷ôô°f¦>&gù|01. p_zÁ—øúê¯óáÿ6x0‘ &v6p« 8`[ðº–á$Œ"oŸZF¦&&•òuÙt^˜¹a$“ɰzÜØØÅbqZõ¬v³;¾oõêòŽ =¶Í›\Çq‚ï;ÌC§š7"ÖIN¹N%• °þþéB\¥_=ùéªmvªÙ^*œ£B¡)æêRQ<[lÛ&ŸÏóÈk_Ëš‹/æÑ×½ŽG/¾˜=òÎM7áärX?“ üwÚC#ìx¾ …0÷leÎSᔹÐN; …BXHD<Þ\×¥¿¿?´q ­Vf¢vì¿úú×1Óéve‰Ò;ž|ê)j% ¨;“T1“n7Gy胲èX]ܵr!T g¹îºë`ß¾}lß¾½.·e¡¦í¹nàRòPñlÏq0M“ÿþÒÿÎi›N+ÍjÌòr¡×X°ÎªþH¦I_-òù(V)Ì0ò,kú„?›-Ϲåá|0½ ù|ð=¡|[ÛŽUM˜ö7"ëT®+ž€Òñ$rÝHœ©iD•d† –ñ}Ô!JŠWxÌÍ%‰y³h´ÿÅå—ÇwØEéÜÌ%W^SH${9q¢}m¨¹¦$¤¶–P;!¼V’ýjëUë#Dë0*íx!6 Qþ·ãÇsÞÑ£8+V”•.v>ÏDéœò[ůiÛߎÊÉi—ÓW¿)¿:õTÖ¿ìeØ{öâY#¯AÀr]¬b¾ûÝi÷"É'¡g’'níw¿Ë+¯¼’O'¬™˜à§—\Â=kÖð‡D?QȽà¼âî»›~®ÝÏÏó( ‹EFFF:Ös[é\:ÁŽ+Û#p"¶¹®Ú’J§ŠRI£Ç°ìðaµ7¥¥4ÚŽÇW®äí/~1üüç hÁ€Z¦éK`<¼Ô©™#ntttÚ²…äWéíím\rY©™Ë^E†QJ"›N§ù“ÿƒ ×^¬›!JP³6Œ â 7‰Dù¤´Úþ*OËŠÄ5Ó ‰Zy³FFÊs_ù~àM$û«D„7ñì“Êq‚eÕ„8ß …h’´¿V‘€þþòöÆ¿OéüÈ Ð÷}žxà~þ¹Ï±zõj^ûþ÷7ð‡› ±ã˜Ç–LðÞø7råÇ®l­7œ`Ûó m–„•¦RÓCGÛI5!Oòð¹n¹HgY¬<~¼mMm„ 8p€óŸ~šý+VÁØaHÝ~{ Ð'Ñÿjýb²´”:íç×Y‘b¶ Þ¬ñ ¤¾ï‡ŸKÂóLÉͲ,žwÁ:÷\ø¯ÿjnc¥–>:\l…é¤ÝŽã`Y¾ïó£}Œg¾ò®Ø»—»÷ïçÁTŠKÆÇñ×­#›Í288ÈŸ?ÎÑåËÛö;4tLQß÷q'üU„ë d%}‡äû.d ŠáΗ–¥uÚìÞ ;®DÆÊžç‘H$H$ض]w_E©d!vì{¾øE¸ùæv e‰³;Þ85EïUçäe©,fD‡3R,Sª…Ü®âAP&ÈtN"ãñõâËãéÔ³YxøáóyùËç˜#ntt”o¼‘n¸-[¶°mÛ6¶lÙÂèè(===ضݰDÇ "™ ¾ýØX(ZIޱT*ˆ8.¥ ÙDñ;Eš3Ù›«Ûþ|<Ÿâ¿vüŸMø‹‹zÕòd busÉbšsB £v.²R¡Ëó<ÖÿêW\qô(þë¿ ™ä-?>÷ó×iT$ñì[×Ç•w^I͈­ “ÃRã$‘'f7ÍmÍå"¡ºPàÕ¿øE»[6o|àÉSOåzzøçbo÷nŒµk±¯º ¬¾¡„œ&ˆˆHÒ±šŸSòœAEıðû•êX–E6ÜHLÓ +ŠèÖßßÏàà`èQÒßßÏÐÐP™°AÕhÓ4CQÇ4Mžžšâ—]ÆÈý÷‡B]SCÀ*ömÛvèñbF– AºÏóø—;q~ö3öOM±bÿ~Ö]r ÙO|‚B¡Ààà ï|øav>ñDë~¸6P,Ãó2QǽK™'ñªp’„Ñ%ò˜•DqqÍ!ç™Dqñ~鵈üNiŸ'ÛÊëÒ¼èÿßy|ó"[ÌB¡Pþ™¦ÉÈȈ†T+mÇECû—*ñ\l®ëvõƒ®ÕÏ<¼¨âPÓI9|?ò¹©wýZ?"OTd ‚éO-ätH¡ÔüAK||$ØÈ²"ñõ ñ1ªæÇ$ÛÈqd*æ8ѾKC¼pZ)ÆÄ—Â4« qžˆ‚â'ß~÷ƒÌ–LNN’ÍfÙºukY Þ›o¾™©©)²Ùl]¥y›N>œéŠ›B¡†3–²ñMò-õ~¾Knt2aw\gtÒ_ý§âõínà9ü¼ç•/ˆõ¦ib¾Ï ’ÔKzŒÎé¯;Ó¬-ôtòÀ@^Kûs9îØ¶·´»}ó@æÁ+Ö®%˜žÙ,Cž7»We‘`r×û›0æ* $‰ª9&å>!^k¹\Žt:Še¶m‡y“DT«•¯RBée1V!°Ç…5Ù^ú´?†¿û]¾óïð’RBÏó`bb‚d2I.—Ã4ÍiBa#Èù>‹üï~ .» ·”§2‘Hày^èÑç8={öгg\r žç…ƒæn¼ñÆÆÿ€Dü!ŸRy‚NBâÕ½Òë Dã2ù\Bn‚{ßHl’NDJË/MÐgØýH¢´n Ò\<Ô=CyÂÆ!¢Š'—ó7~ÿlfs»Ï^Óñ<|>öu*~(ÀÆ©©v7A™>e}i\TËf³a…ål6‹išaÊxÑ‚J!®“rƒÏ•§Ÿ~ºægu÷µ^}«Å‰‹C"%Á´C'—ŠÅ@”’ ºx–©âuéR©HªF\¤Š¿†úü;dh%m(+ËŠ‚jjm#T ½«R©ƒÖ3¤«–‰¨=Õ¶aõê'¨³hj Ä cÛvÕª¨===är9Þüæ7cÛvKsX^V9+–UA.—ãö÷ßÎÙ?9;ðZŠ{I‚A—EÆ t<µr¦åóyN;í´v7¯¡ˆ— ‚I‚²¸èâAE5䡨Š+‚ù®„Þærµ¿«a€`2JÖ€y`5¡ªX,R,$ŸÏcY©TŠt:M&“ '4y+•›%³*š®Ëë×þ€aáqÄë*—Ë…Ò|>O*•"›Í266†8H~“b±ˆaÓŠÌH"ŽÃe¥êͽï{_ЮÒ#Rž ØVEQÖ9|øpSÎM'ÍfÃó:22ÒÕ“ƒ#âZ‚ H•ΈcñÉCü’ñKŸM”öc–>/]û&'Úår™ŠÏ*1)/Y<”Í Õd³ÙÐ#7(íÄžyæµÇz(Pž3¬HÐKÿ—,ýõˆÊ¼›D}¨< Íy;±uåaI¢´~2x×ÿ¾‹ .»cÀàȾ#‰D"­†báùñ™†1çЈÊ"½âö£ú6OþÉŸÀßý]ÍÉG¼ Òfñ¸“>QBhå½’"*ÆÅ£l6[!Õ’s¹@ˆûoÿ­´¸üw”öe2™p/¿ÁbEØçr¹Ð#nQ"ÚÄÓ ",MTy€HÈJy© åá—f5!.‚Å;Ú5bÎò¹R7®ë’L&C]óv‚ûd­ ©<[Ú•œÿè£ínFk‘|•rdÞ*;D o_£ôWÂêåÁEeŽKéo%\_9¹…Yýµ[°Xpè/qä³GèííÅ+z¤tÓÌ…ç?N&“Á³øzñëüæØo¸¹"iæT\‡ëqW. ضM2›dHfubhñÖ.I¥,ÊP tzZXu.— <„ÔcSéz`²·7x'çjáå}’‰¿ 6k F×uÃ0u˲( d2òù|èÕäj`dd¨¬Ï(9z‘H%qÉ&ž>ñ¼ Õ¥eEÅ©epãºÁÈL&*ŽmÁK6Á_‰Ê•÷ñýË@JÖéï{â:r%‘K`õîq,.šyž “étº¬ê¡mÛ3{ö™&=\Àè#15r‰Æ>â_ 9;cø¾O6›í~±B®;¹öÄ b€àúËL’’¥uDá¡Á=é`é3›éaž•fè”ö"ùv¥jWÛu·%p˜ï©6™9D­Xú»H„¸Gy„ß[ìqÆéôµ‘'!ü”>áDžw¹}n¥ˆR9Mª¼Í›L{¨M:É@`3,ÆÞtê½v/&&#™Ò€Å*€™H$ÂãåÇŒ{ë-ÑjŸ“W]E_…-×JoáïßÓ‹òùGaä_À¼²ÝßDiËúúú®š Ç\5¯¹f&>œ™p¼jÝ«àÍ3¬¸Ä\•#^¢%\T óª’³FQ:•ã?ü!×¾êUÁ¨¡ž*PŽ&¡×eöþüç¿‹?ýÓϱÿ1vïÞÍ~ô+.½ô<Ž3¸æš{ð}Ÿ¯|ååx^À‘;L¼Xgáû)<¯üÉ_ü!d­Z4ñ:8™Lyý©±‘ÍÛŠÞŸÇV~ýzNÇØ0ðe$)¤çzf<‡GE‘Sd˜œ|ª–Õ~òù|˜/°ëb Â" ‚ëO¼ &ˆ&ø9¢{ˆqòµS±u”®F¼„kåÍTD<œZŠ„ÄÅk‡(Zg‘ .Ä=¡f¿ö$ß¡·KEX»o†xÙw#.Áí\ ÊH.LÉQž'òd³Dµ,AD@å%9Hô[ÎÓ!ŸÏ‡âOeN¶ø¼¸ÀfwKÁ¶e†gË\uÕþîïžæ/8+Zß…ä6Ƚ¹2 ØRÈLYR,غu+ŽãÉdxï{ß[Vº×qn¼ñF®»îº–æ‡8(IÙ¥,E•A„$¹¾ä‡—´ñ4*JuÂÎ9^Û¸ÄÛ.7õÝÄ•_¿R…8¥+xàØòÐCܲuk}ªS‰‚ ©Òêé| < —ÄG>ò9|N?ý)Î:kŒ\î%ìÛ÷ ¶nÝŠe™ÀµÓöW-‡D2ˆf¹Üô$¯õz›U¯ÔÔhœÏïÇq rŸX^Ù[+Ame»*Ô<%ê ¾ë àBúûû±í!<ÏeÇH$. ·I¥`×®]\~ùåxÞ+¦§ÀûÎçž{^…çy¤Ó«Ùµkwßý<Ààøñ£Í99m Nãy^ŽÓ‰Ä'úI"o 5$ÐÇ·Ì:^+‹)Â288¨"\½øBÙL5¡òDž@`"OrËQŽ.ùïåH•Áá&.űu<‚kY~>¯´|(Ä1¬“ý×,”3u{úSŒdrá;j4•ý§C$”H~6)<“£zzz¡®–·\K(ªæ ’pÃ0HÙ¥¯0Ç}+JÙû䓬ؿ¿ª·ë/wƒ²,Ú£tOžz*gœqF]ë‹PÈÿ0XWJ #ËÄD®T¹ÔÀ¶ÍÒƒ p}ië¹W0šG¾¹v‘½ÿzìÛo'Õ_ذýVc a®ë’H$ÊòÅ™¦G:‹DH.™´é4ãË¿ÏÚ³NãýÀþá (FøÒ—þM›6ÑÓó›í>• A Qtœ'9‚Ä£4^¼ .Â¥(ÏÏ&v âÚ’¥P(„…j4~X —`R,Ü>åa݉Ҳ~‚ë1E Àˆ'”\£C¥uâ9¹FJû±‰„4JËr¥õN"º¶%UîKÞF(÷– Ö½ãewp}xí>ÜY6*ö!¹ú±×ÁùÏEñ|³™Y¼'Žã„¡å@X¬ Ê‹IÅï]]éѽ8ö‹_`\tQÙ2Ë‚ÑÑzÊR™ðà "W„ú†(Í%,K*bÜää$£££LNNríµ×†¢\»°m»¼,H–eE ¿Mô¤XQ:€ûî»SŸ|2J>GnèITˆSºxíÁÚ£MÏ ò©I¨eî¿õ,áÓ_)žày^Y¥Ò¥æ­áNmÄfWKŽ%ƒrߤZ¬ïûø¾O.—  ÑßßO"‘ˆ ; ñøç?ϦO~ŒT*…aá„©··—{キݧ²!‹Å¦UÈâ•#5IÊ+‹æbëÉmEÇ=J ×uÉf³¤R©ò"-‹™3‡òIõn§ôzŒàúqˆ®3ÓDÔNPîÁ&¡ÞÑØÍ*mW$ò’Ëgc¥ÏÝÒ2»´¤Mo:«Ô~y˜T,-Ï•Ž•'ʱ:Ät‡ƒR>®]ÛZsOi$.°¬]¸Å )îÙ&y^GΡô_>çsnà¸]rá†A6›Å4MR©Ô´ÂKqÁM½Û:«ÂéB^ÊÏfVz¡K"}PéÑJkXV¹ ··—­[·¶»]!+öï¯ST(ÊE8y‚wá®B”Whšƒ’¢4…M+VD  J¸®‹ë¹ÁS.½—*]Âs‡ñ¾þ°æç2 ë7T$œ6 #T.™ b-öì­¿ø‰…R:ÏóÂÄÍxÏUz&\ñÎwÂ'?É ãÀ† \ùôÓ|úíoçÑ—¼„oûÛí>‹ ÁqœöyÉçTS oݻڸ&3ëÞ•%F±X$™ ’°/™>Ö!¯ŠDÉïÅc "ï³ø=É#ÓrDIô3D¢šˆw2*á‰q(›°úd²å‰ë姈‡:ÊÄ;žT_ößVÄ7Yw=¸=xð —?Þø Sa ØgÀ;Íà÷ÏÆø.òàbÀ³ÀñàÅ><ìA¶Tn2Ø^©8Ul¿R¹)þGžDŠp&ž ø+Õ¡lß÷Éçóä2àò‰ ÎX½ž|’œaÇÈf1 cº»¿çIb%y¬eúuRÛ~²ÙÐéB‚ù’Éþ°ê}âA+–h‹¥Î²…¬Ø¿¿jçbšf˜ìÞ0ŒòH&ƒÝÉ„1—›_î E©—ÇŽMóè4M“u“ë‚7ꆬt '?÷Ë—/Ÿ¶|` xâŽIÅ;¡Ô·JõS ‘¬UEjÉqÖY-?¤TQÌår¸n0c”j¡q¯9Ïó¢]±A䩞ÇõÙ,ŸííåœsÎi÷\0…B¡L n8”‡§ Rͱ@&óñP§í>;J·áº.¶m/^o)„`ˆh’ _¦ Î-Âv±ô?CT¡>Cê=AI#Ý\eW ›T®¦oÊñ%|µ’jóŒD•ÏkåˆZ¤·Ë£G.<”ÒuK%'ýHüš"H“úsž5áECQ¡x!ð;>\m—o÷[U®Ç Ö‘ŽL"+ÛmšQ~ê ¬åÜo†lÃ(Ut7 cÍ\¾¯i–'¯-£ÉmµUVªvŽi–3Íé;Åbä¹âûÑwŽSùð*› ¶3Í`›xŬxµ©ÅLL¯È窕7¥@Ô·@ä=ÛAÎùJóéX!N.÷5»vÁoüÆ´ÏmÛ¦P(ðì?< ,-¬£/—á\$…Ââï”ö±Å V?>í†mÆ®nw ¥~N­RɺP¨ò0£”OeêÛSü˯ÿ 6àû>®ë†áKž]»`ଖÖ4ÍPpËçó b—&ŽãP(Èår¤Óé0wœP(uhˆM££LMMµû,.˜|>_ýiu#dëI¢I~–(Œä~гH'ÜJóq‡b±ÈØØØâ÷¯–Ë­Hä¡–(­#a¡Yà¸ØãCÖ‡KMXÜ <áý$F¢_&°²Ò·’„Ëø~ \XV”]Ý$ ­‡b1ˆl»¼,7^M"I¨Žx ¸n°M|/âFûçõwÜÑî_lÎüך5sËW,F¿‡À£~@¹p*Um_gç*ª$ÕÂó‚Ïãâv-¡»”zCîäóù0„My8iÃÄrñÌËf›Ê妧s.nkr«õ–,7ÍHÄK¥Êm´’\nnjßžâÆïy¼úüós^ẔcÇÊÞßtÓ$®;^ûw6)ýEˆS–+ĉŽ~ê“OV½éÈ„Ž P¸ì5õ¥H‰÷‰D Æ)J³Ø¿b†ïÃW”-w‡þ ýœÅYín¢¢Ô͉§žâ¹—½¬lYÕ*Ÿ%¢Çïᔜ‚iš‹×Cc>\~9¼¼Ý­({R+^æ™LÓ4ÃPÍø¤"(°á“É,Ž˜Èb±Š 'K"'•¥rb<¥ÕÔšDUC„ß/÷ óU¹Ÿ¸¨ ¯e]™ÌVŽçD¨/ Ï+÷¾¨æ­Q(L÷¸ˆcÛÓ'ÊÙìôõJ!f¯r]žÙ<÷1D:sp¶\„“ß ‚ó.vSÙŽl68ç³%¶x?ṗýcÁC1 M\C9Ês²¥ˆª§ĵü¹ O‡ó€åÀ•¥×ÿÓö¿ÿ*§$à›ùà{8Nd»q;Œßtzö’Úò¤ß²Ê¯apŽ1¦âùUc»sºðÁÈYÎn¯Åbð›HòÙÊ>Ä0À´ƒßÔ%fgªL:—g®;]D­ |H{mFk\tÅÏi=¥Ø+©,¡ß¾Q!°†ØrÅþ¾¿m¿Ûº³Ö|àhÅý¨·÷@Í[0} `RÛf•EKÇ që&'Y}ß}ÓT¾ïS(¤ð*p† 1½jp(úÇß{^´;ÓŒîgŽ|V-ZJBñ‡†‚~#ŽîÏ©ÔÜû7eiá¯\9íFãyÆXš@é¼ÒÿGÎ?³dËÍQU_3F&&ô]ÕVãT"ì5»°;hÐešfè©X9©0šŽ«*º@$Œ¯!HÒw— Ô$ªfPšÜÛ0äGb“ â„,sÝòЧJ/lj„¥¸X ¤RåÛx^ày’p#¡ò·Íçƒýʵ+žªdÛÓ'dÎ$ž²­ Æ*eòÝãBN<ì«ò< 2É”vÈ1¤Õ<†ª-/µgï“Or^c¬ m Î{ç;–Î'ì·ÃüSÖV9Ð(Ÿð‹`›ÏGç:a BA4áu>üëA8Óƒ ,ø'þØ„?“ý”ÖMxŸ9‚ÜQËTi9×Ö Qx×-Uì%~‰ŸOõÐS x]ç®^î¸ý6 ܶk{ø>O~úÂÑbN™I<”ÊOžLÖj —’ûMzÌö3ÆóôÕC•ûžÌCEh“âG†a´77c·…x-’¼v.°é¬³Âë¼P€Tjq|7¥¹t´W‹B¡ÀSO]ƒûZ ©_÷òу8ÇÊxT$I˜zy&ýxðʃ§TªüÁn&Q\W…8e¹ ¥xs¡ç?ø`¸Læa*‰g%ÇMŽ¿ù8/ú­-Ž0©’ËUãë0$?œäÄuÝòœ¬‹„г~n'(øÀûÝ ñ÷wlÈX7Æà¤K¹…ìÈñ¢2±¶^¥"e¢‘ZñóŠ(ï¡fuyL*ÂŒge>žó™V;ÿ³ ªæ3èšáwÞ;>ÞÕBœTB~ú-o©Í9+òTåK.ü[VÆÂ+Ãñô𠍋€• Ä’ ×Ê7à ÂFM"O¦xø§xÀÙ„)ÈQ.®ÍånÔÜØ­#†¬)ªyÖ•ÚóÐóžÇ¦Æ±iø9⦅¦zô÷×¹ôsµÎe‘à^:HT…¶Yíõ}ŠÅbÕ{ÆbñWæÏ…;w†÷à|lÛ›9ÿìš#\é|!îgþ0WT,óý ãã`¾ø6`‚Qº7‰h–Hc×f³µ¦˜fùøQŠžÔŠ¾ˆ‘ ¥&ÇŽcy•åûÿm?çýüüò—¿Öî³2gŽìÛ½)ÌáÉÔÁ˜£ÉZ˜S ‘’"SZí]©Æ±cÇ!βÂñ± µJ=´Lˆfrr2|ßÛÛËÖ­[gÜæØ±c°jUÙ²d>ûÙó®@0Xˆ—ÿ­A.W/yUë?ë{é0>´f ¿õè£eË<Ï D8£4!÷¢Šß’Š"ŸlK扅B4(óýò¿2דqŸ¤i‰ r’®EÒúÈ@¯šNR™KQY\ÌÇŽ®šœd])Ÿ‹Øb{ð¸—þ_ÈF… …`ÕL&XU ˜JŠCÂL3ZÊëLÈX(ðn‰ FÚ6X¯º £PÀyp#Î)o uû›1ͨ¦€÷£×â{ D5)‰ }ù|°¿T*ø/m­L‹X™ÖÐuÁ~ßûàáÏa¯ú…µWc]öl%»o_ûSçkÇË—ÇbFòù™'i>°&f; Ô9âE‰ …BX”H…·¥Å|íøÀ»à°mÜ|Ú€SÇ:Ê¢§eBœã8ôõõÍy»¸Kgs]KH~‹ºÝ‘gëã¹9ÖÃ"‰ÐQê`¾6ü¦¯|>øA rsw€ü‚{d¤|À¯X/ïÅK®Z1²¸½šfùö’QÒÀHE—g2ÑÜ3^Õ<_* &©…Òé`‘‘¨šyeq»Ù¼Ne]°ÛÇ|ìØžÜ»—ý/99T”Q€“ÿãdv}v»îÙŵöµíþª‹L»˲ð}Ÿñññ¶µa¾ý1DÞpfµäýtr23±wí»”²;.çîÝËäé§óôùç‘›Åbp#7ŒéƒŒª!Ë\ϵ°&E¼ÒzâÍ+9Ü2¡¥•câDi_T|VQ—ÀûÎá®»0ÿèõxûWyǃ¦ŽOr9° ƒ'(þÓôôÀÅ`õÀºj]ùaÁ%O1cš3=¤àd¼.‰ˆw–9Ë:˜©k1Þw-¦¶Ç4KÇM¬ [I&SÁSÏœ#r®=È”XÝsÛ¶;€·Ì͈Ä|Çg<ýtð¦Xœýé¯OSË"°Í"÷Û¶U|[Â,¤?^þ‚„#©ô÷÷36S¸¼:;(´Pˆåæ›o®{}XõÌ3eN…|æ3wqûû'¹ê{Wƒ…:'3Ùlwz(Ã\mXX}ß}áë ÙyŠìw ÷~°ÿ¢þýÌW¼2Íéƒ<Û.÷ª“tF3Ó+s,ºîôÐlÙ‡8šô÷[ó\7ÅcOžJǽ}?¹E€”H/ù<þ½jbQ¦3_;>kb¢$Àï§ÙJ<Ûãí'¿å½Ëç¼ÿ¥D.WnÞmX–Åèèh[Û0_;†àHvw•7Íéy¦Rþ ÀùN@âhK¤ÀðLIöe+¶­{m ‡DK©ÜÞ›á+8sy@/ëyïãðåxnì»I[¤=‘cl¼­µÒ|™ŸU®gT¼®Œì>VÚ`¹'tè7nŒ†Ô@9Dá¶:ŽƒaX–æ}SXX¼§§‡K"=ÙVÏ¥Z"Ä399¶ùöíÛ«®¿wï^&︃_¿ï>¸è¢p¹iúø>\ôÄEѺ6«%¬IØŸeEaƒƒÁúÉdy[ÄÛGÖŸÍ3i)2>>ÎŽ;ؽ»õƒe9þ\l`÷îÝ<þøãáû|ø!6pÕ®«4‡V bùáú¿+×<¡²€…xÏÅÃI*·°™Rˆ¯ç7Zq8¯> e1ÍàZ7Í œWÖ7 ø›¿ÙÏž=ÿÀOìm˹«?ôÐCÜqÇL^|1'¥ Ç.a¾Îþãr)}XÙlç‹¶31<<ÌW¾òÖ¬Y³ð̓¹Úñ}÷ÝÇwÞÉ+^ñ ¶oߎçyئ¸ìVˆpѼÏ 0Ì`È÷H3 ž6úV-} ”ú–ÒréãÊÂôJ…¥œBÔ÷8Dýš¬ãx±Äïf *\ØPÊ ê82KžiF©Eé»ûF Ð ÿ‰Âô #­ÁòñK¦Ž.!1Hy¨YŠÙ²ZâQ|³:ŠÚN7ÃSÛx妽<ôлزeKƒÍ™™ÏØxÛ¶mضÍÖ­[1€+ÿûÿÿ³÷îqnœõ½ÿÛ±ßâuÆâdscoH d¶v(4tÚBB1•Bi ¦¥Ò‰{L¡§§RIÛ”¦•Îi¹¹?ŸJ…†¤´ÕÀžp 4ìJHÀvw\ !k’ìĉ½Ø ÉŽ³Nb;N¢ß£gf¤•vµZ­.»ß÷뵯•4·G£¯=Ïçù^ø›Ï~òÎÕ§ªÎJôÞ*êÞFO ÃÜR®µQ5Ù|{äR­ãó–ñmséÖññÏÆÆxøH-o8?äÂô³Àó¼ ˆ”ˆpÅàà ¶msð`wNÅã¼²4‰Pc»|>?u˜s¿WB÷±cÇvïÞ]ÿø¸Øž~úéâ<UÚO×[¿„¹gªïÕðpùöBÁ·ƒk®ù§¶´u¦v¼{÷îâg>ó™bªX,þÏx¼X,VômÃÅb1S,>½ééâmoº­-ï©Q¿CCínIã(Ûh3µãÏ|æ3ÅÝ»wÏu]/S©âè?ÿs1U,)‹©¢?´ˆ‹ÅÌ¡b1w X,‹Åûìáaÿ3Îåüÿ©”ÿ»ŸH„û˜¦ß(R)»Ú¿Pðïf;é*í£U426VŒ‹Å‹zÈ·åb±X/9NqŠ¡§oäFÅóLéÿh±XÔ‚S‹E߆K_•b"áÛo&ÓòÛ$ÔI·û*¦R©òN0ŠQ,Åb1W,ÍÆÛ—J¥Š¹\®-÷F˜9ÝbÇÑ1PßO~R¹üòâÐ÷ŠÍ-`f2>n‰G\OO==aAooo]Ç=ÿüó@¸g†ï‘ÑaÞž*¤M幚n‘¥2™~%±XùЏ ÕsÝ0_–Ê]áºa©L&LÂþãh&Óœ>”P_yé©k$“aˆžza[*=’Ta,VyWy¼ÜŠø›²•ýÒª}§åJmÔ†êëcbÃÖP*r ’u˜ 3Õ÷½ÒëI%Þ¾½=+×Ú1À ú ð ú=Z7Z7¶å= ÙØ1ÀÎ9 HÇã$ð‹ðyD<ܶâ缚¦ŸŽVN§Ã"&†1¹ bµPùÊ´’Rfa1;vÇOîyþ@ôc›ú R'Ì çàÛºnâýá˜Î0j{Š ‚¢Q;>~ü8¦ÊÿQI)ôúµ7Ü Ók ó—–q»víbïÞ½AÄ={öLkààM/½ø¢ŒÍÚÛa!=jÀÒ,ÔÀ>*ªAyIs…¦ùapj›ªÀ©„´JTn,%äY–ÿu~"¨®¡^&Ö?º§¿ß¬R’¨$½Ñ$ûª=¦Š}• x£ù·²Ù0'—"—-{ÏÜ}SЈ ¼,âb­ëð¯¯üWnøÓÚò„Î@Ó §çɶ\»Q;ð–-CÓjˆùá!´ÙرmÛœmš¤ÿè:í´Ðt] ¿¡JP?éØpA̶CaüÇQ!MD5a:fcÇpüž{Ðn¸Á_5MxÛÛjÂüI_È‚‡¬ÞÀÍÀP ôðEiA˜ ³±ãåçž[}²¢3«Ð½|>¤ÒN¨‡ÙØñS/9Îa=ÏÃó¼ÚáÏjQDXð´DˆÛ²e ¶msã7ÒÓÓáC‡ÈV–[¬àÑGåØ9çÏ5 ’É$ï{ãûxÃñ7t”Šëºd2<Ï Ä7×uÉf³Jy?¢^0š¦Ïc‘•Gõn¿ýö¶ÜëFí8 |ôða¾~8Bi²gâÏ÷ʰR*RПõ¸éy¡^µã5ûöÂyq|g‹Ø´§(#ZˆAfÊlÆ‹K|*45‹•ýÖ&=|»N"Å EBœ¢¯¯~Â[»ˆV,ô„gæÏ”j“BÛ˜‰ ¼¸hŽ«ù•ël›Þ;{éû¤ˆqB{™©/~þyžY¾œl¶\ˆ[·nëÌuš:CTÎNavÌÔŽóÀi÷ÞË{·l™¼Q ÿTZõMçÞ‰Dµ¨§˜KY–¬ØGÅ-å1¦ë:ù|>À¢“ÜD"<ŽŠgñxœT*ˆg†a‹Åj iª Ã0Ê®¡DÀL$Î7šw'ÈŒ_±I¾Ý/:!І uʤ}¦v °ìž{¸¾·×W‡§ kp]ðL0,?‚¡CÞ¶0ÏhÄŽ'}U…Þ:E8%Àçr¹²þ ÓPivTºžhîÐÉïir.mEúž3¬Ò99êI¥RÝ¡ÚGu©êy5Tº ÛS1F˜â¨*í‘ãøûFxíx¼<ÍTô£Š¶çСmý¬±ãE'NpD3Ù•¯1¶«|Í"ôT4-âfÂK§Æ…]D¡ÔÑäóyâkâhy ÄùBè"Ô#À…w]ØîæÂŒyî¹ç€…áÔ Ta3Ì[@³m¨âT¢0çk§çxS˜ã8xž‡iš‡˜iše‚Z2™dxxÇq¯±¨G["‘À-ÍÆ¢¢WôqÔû-*f©ëjšV&„Åb±`R\KHÓ4­Ì›¶RdjóÿþÿÁ]/½ñSîgÉs óz0Þ!"\·õJUD¿çó ߥsY.4M+÷<Â{Tžj]S D…+%@yžÿ_ÓB‘¬Pð_[¿Þ÷UÝ×À@f»aQ“tÚ¿¶ò8UÏ+1 ¿-šæ·Å²ük©y¶]}Œ <éÕ{T©{T; #|ïá=š|íÊvDï:FûÔ¶Z¦Í%®Šõ)”°-BXí¾Ü}w{r(7Š]úÆ+¯ ÞÿÀÀCQ·ãJÑM¼á„+Ä:ýtƯ¼¯Ty,‘Èø†œ¢£òà Ât¨SÓ4afÅý¡#XòÜsZööIBÜ¿þë¿ò¶«ÞÆ™œÙî&vé´„‡µš¯?Îu/¾8éuÇñóÝ«‰B'aYV ¨9Žƒ®ëhš ô5Mò¬@«&¨ —¹†Q5t¨Tzb5¼±DH›;ÀxÕ«XêÇ@UÝ'ÿr0þÍŸ0þ¼Ý-žŸ(oNMÓp]7Ž~OmÛŸ¦›¦‰mÛØ¶M&“Áu]Òé4…B|>ã8är9lÛÆ²,r¹®ë’L&ÂqÒét0¹·mÓ4ËÎÓMüâ‚ 8óé§ËÕ¦,þünš®Ãó Ïsíµ[øÈGÖbš&ëׇۣ^cJç·íPìò?ÿyÔëÊ‹â‹å×­«EåN= :‰Dy¤ò*› ]¯}­©d+SþÔÓMWËñ=]ûªí£žGźJ £}ÅÌfÃkÿë¿°\¼§Lå‡gRîYA8­Ý ¨ÅÄÄà÷Ѧ‰Sb”Y„NçÐÆŒãº°víwÿÅÝb»BWòâi§ñÓsß®nºþß §ÝÀ™ç‹7S** sÎþ•+Yùøã¾àä„@Å×AömþMåƒk%J8S^.àO²Ó‘’éjÛ¶ƒ}†‡‡Ñ4 ]׃A¿ ÿæ7&&|!5•*7X ûÈ?ú×/2Ïp"nIÊ‹‚à‹ÞJ8K§ÓX–££áßÐ/š ¡Ð‹•?×õÐÍ4ý×§¥:m‘Fè~vò$ÚȤ…¨æ;+ÄoÈê·,•K‰ˆ!tÚ·σžž‡8ø“ƒ’ßPèJžxÅ+p_¼Ð¨:@?ì½k¯ŸxV_fŒª-´ŽcK–ð¶;îàÚžkýDÉäßÚQ(¬œ¾zx3Q“tÏóˆÇã€/¨‰t‹ł´T*5ÿÂÑ„ó /T÷`4Áù]H¾rïí£4-•‹Wús#ÏUõV7²½r_;òØì¯;‘ÿP^õµ2r-*¦¹®ÔŽã/%­J§ÓU[–UöXKy£ç)øâ˜ú®e2™@øR¹ÈÔëjŸB¡Pv¬zlšfYÃèçÖ¨j%]í}½‘Üó|ámýzßsÍ0à²ËFÈf³è:ìÚµNò¬ ã]„ã„©´œÊxdï 5èØÐT×Õ‚>:žË„Oè:Ž.]Zrg70Î6Ć…®ä©Õ«9vl‘¿xrpKþ†r[Ù]E4/xþñÇyzÑ"oZ 7ã qi(ü#péÜ]WåxŠÇãA%µl6äh‹†F'ì‚P XZÊÙYIú·Á{ ­ïÊR>ð’X¡Ä²(JLÓi|[­ÍÔB§¼’«N¨·hÀ§NqÕé§³ÿùçyòÅY¾|9cccôööòðódÉ.ºè"^½šåccôöö2¶t©¿0þÊW²dÃ,àÔÇ?ŽsúéX€—ÉàQr~ÍdpJmÑ"Éd°JmA×õÒøº‘ÏE‰›êþU[{ê7³ÝM®›S§N¡•*MDòª©ê“Ñâ"Žó}}¢¾ …êÿý´/å€ è 4†ëJ±†Vsä…Ð_¦?á+¥c›*•¦iŒŽú%Ñ¢IØ¡ÙØÀÅ÷ß U& Cbh9ƒÕ…Тy U…O…6 ×R©Tð]Èd2A¶n,. 4—õG2~ÖYþÒ¯¬]‰3—ËI¸¾Ð±,õ<.úéÏq]_„³**þ_ªÐ±BÜ /¼€mû+# 319k« t8Ú÷¿ÏÀÏúžDÕ´BðdO¯½óÙôÃJt„náK–ðËß_„óÁuÝ@ˆH$R¥Th)£ûöqÖOø Ô1HZ~>-24=ôÉq²Y?Ûª ªi±ReÓ4' m ‹çÂôœ:uŠãËJñÓ)(|¯\ˆs]7ð¬„Næ/¾ˆûãKÈçý¾¯êBœqB :Vˆ[‚_UîÉ¿~’'ÿç“ínŽ Ì˜eÇŽqíiÐuxü5‹'t%§Ÿ<É+Nb.¦”õZ˜-™ ‹ínÅÂ"}ë?òôÙO3ᢓBMÓ‚¶xi­fÿâżjÉ:ˆû^µŽS0tfU JUu]7ðxÖu½jUÐX+K ó’Þ+xõu×á~¯ºw¸®ëey5¡Sy¢Xdù¢Eµ«øÊÜO˜‚ŽâÎX´À“GNâ⊚,t%_â78ô;‡Ä†…®äŒXúŠgH¼ÆúÆd•ZèJÞx×n~ºñ2J)«ê"NcYº®ÕM5MNhÏ/]JߣÁô«I–̲!,Ë"]JÌ¥¼Ýt]<ž5Mï6aN¹ì2Ö¾û#$¯çáë®ë¡}ÒÏ Ý¾õQL&ot™Õ"‰0¿éH!î䊼ó–ÏaÛÐû™^6}v“$ºŽÇW®ä›ÏþFNza¡;ÙwñÅœúÅ*H@ïõ½²J-t%Ï,ûu¾vêUuí«’-' ñþ:Šg–/§çÐ…¸¿ÔXžÉhhµiš}' >„–â¾¼³Á|oøš#UŒ„.cñK/‡††ü.~‘I˜›*9¼¡#…¸¥Ï=DzŸÇþø¯¾K¡Ë8²b<}1ûþ×¾ ‰± t#?þüUA•I™¬ Ýȇ{G¸ûÁ³™NW³,K<2„Žå¶ë¯géË߯ÀoÍLˆ‹†›ªÐjñxÚ‰5±ß+-‹Éâ‡ÐUü|Ý:®ûÄ”¿˜&ô‚“µka :Rˆ¸ç¥7³úÞ§¸çïiwS¡!zzzxõ«NpÑß\Ôî¦BÃô?Žç½=OÆ$ÝÆ[þÛŸrñŧ¡ÕX™Àu]b±˜T=:–Ë~ê’_áç…›N'N§Óض „VƒÌBgàœN°0bYVPDº‰ ïzˆ ÏÀ²,ò*÷E‰äêbI»P“'Oòƒ—¿œ÷¿ô2Ì]7¶»9‚ÐK—®C{RcMÖèkÚÝAhˆ÷|ìcìaLÐ]™À ݉çiT:ÿ8ŽäÅ BJ¡ƒ¹GûÖ“©%bÛ6®ë}•è¦i%tmäÓÇC1Ù0Œ 0ˆ t¿ÿáŒl71 ƒÓž>Í/Î Ý­P'é·téR.¾øŒ—÷B×ò‹_\ÀE/½„kJXªÐ½üüĹœ}Ú"0üúQÿ±ª™ËåÚý6„ÎLí8!$®-ͶmlÛÆ0 Éù&´…™ÚñÄ+å†ß†l6‹çy2F:‚Fæy…åçrõòÃÜ{›Ë¬7Àh»ß…ÐM´Ì#.N366F___YÞŠZèËá‚w^Ðîû#3µá³—=ËÛú–ÊCè(fjÇ=+ßÚ —ã5R¢O戙ÚñüÁRÙöE€IDATAtݯ900Ðîæ 0s;¶¬0¯–Ê©eš¦Œ3„¶2S;>ý¢Ÿ`¾R©”Ø®Ð1ÌÔŽ6mz7ç÷8ç|úµja†´Ä#ndd„C‡±sçN Tœ§Ê;½êm÷ý 1¾üÄJL™ï D#v|Qq5oý¥Å2X:„FìxãÆg?|$š¸^ÚE#v\(€¦! #BÇЈƒN-œB§Ð¨k\ûâ~^8ÉÞ"Ì–xÄíÙ³‡¾¾¾àùæÍ›Ù³gÏ”Ç\¨ÝÕöüpÛ¶mkoðïÝŽ;ÚÝ vìØ1íg6ŸiĆ÷-ÚǧîÿTÛÛ-öÒ ßévÒˆÿÊÙâõM¿*8—ˆýtf;ÚE#v|Ûm·µ½*_§Ø±´£3hÄŽ-+ëºhšÖ¶TÒÿH;:ƒFìø¿ø–ÕÞdZ2>.Gìxfv¼ÿ~Ç!aؼôû/Á_«^èöEÙF=´Ä#îØ±cœwÞyÁóÞÞ©=Ý~úÓŸr÷©»ùÒM_båÊ•­hbUöïßßvÃzúé§™˜˜`dd¤­í8xð »wïfõêÕm¹þ³Ï>ËÁƒyöÙgÛrý™Ú0ÀÈ‘~vÚÏxpÛƒmi3ˆýTÒîï´²ã_|±-ן©ïÝ»—¿ðïYqh¯´}ù:Å~:«O<ñO<ñgŸ}6Û·ooùõgjÇwß}7>ú(?ü0>úhËÛ«è;–v„ìß¿Ÿ'NÔ•¨™426~üñÇyàÊŽk5íî¤åtãøø±Çã©§žjë}“ñq9í¶ã'žx‚ñññ®ïß¿Ÿx€Ñ¯Žòƒ«À‘Ïϵ¥éA»í§S8xð ããã,[¶¬®ý[V¬a&|ï{ßkwaÖt — ̆íÛ··Eh„fÒnÏ Ah26æ 2>º¿ù›¿iw„y@KBSûúúÊVFFFèééi÷{„ºæbÇÂ|@ìX˜ˆ ó±ca> v,´ƒ– q€_j}ºä‡‚ÐIˆ ó±ca> v,ÌÄŽ…ù€Ø±0;ÚÁâ~ô£ë‹(EùÖ[oÅu]î¼óNþò/ÿR”f¡kæbÇÂ|@ìX˜ˆ ó±ca> v,´ƒEÅb±Øª‹qèÐ!úúúİ…®DlX˜ˆ ó±ca> v,ÌÄŽ…ù€Ø±ÐJZ*Ä ‚ ‚ ‚ ‚ ÂB¥%9âAAAAa¡Ó’qÆÄÄù|ž½{÷266F___ðú?ÿó?óýï€óÎ;¯ì˜Zۚі͛7×u­¹hÇàà ¶m—Ý‹©®5W÷B¨ŸZ6¬¶u²ÏUÄŽ»Fìx.?·Nµãv|§…ú;žŒôÇ݇Øñd¤?î>d|<±ãæ±ÐîM'i&B3µ›ç711Á7ÞøRlÛ&•JN§#K§Óض7Õ¶Ù°cÇvíÚUöZ+Û±cÇöìÙæM›dÇŽÓ^k®î…PSÙ0t¾ÏEÄŽ»Fíx.?·Nµãv|§…ú;®~}é» ±ãê×—þ¸»ñqõë‹7…to:M3éš©Ý,i÷›i5###ôôô°}ûv6mÚĵ×^ËÈȇbç΀o|ƒƒƒ˜¦9å¶Ù`ÛvP&9Ú¾VµcllŒ¡¡!¾öµ¯¾B;444e;z{{çä^õSˆնN¶ã¹°±ãî¤;žËÏ­Sí¸ßi¡~ÄŽË‘þ¸;;.GúãîDÆÇåˆ7—…vo:I3éš­Ý,8¸óÎ;[n¹%x~ìØ1öìÙSæ¾¼yóföìÙ3í¶F™˜˜à3ŸùLY[ZÝu¾±±±à<[·nòZsq/„™Qˆ¡óíx.Ú vÜ4bÇsõ¹u²·ú;-Ì ±ãr¤?îNÄŽË‘þ¸;‘ñq9bÇÍe¡Ý›NÑL:…¹ÐnœG\oo/½½½€¿RJ¥Øºu+ÇŽ+‹ÙUûSnk”t:Í-·Ü2©4r+Û166ÆØØ7ß|3}}}ìÝ»—íÛ·³eË–š×š‹{!ÌŒZ6 ­µ˜¹ÏEÄŽ»“Fìx®>·N¶ãV§…™!v\ŽôÇ݉Øq9Òw'2>.G츹,´{Ó)šI§0ÚÍ‚âÀW4¿ð…/088È-·Ü‚iše9Læš]»vÑ××W–ä¯÷B¹,ŒŒ°mÛ6¶lÙÒîf ÓP͆[ر0[ÄŽ'ß±ãîCìxòý;î>ÄŽ'ß±ãîCìxòý;¥ÝšI§0Wßéš °mÛ6&&&øêW¿tÐ}}}ŒŒŒû¨¸èé¶5ÂÞ½{Ùµk†a`†aî‹­jGooo™JÛ××Ä=׺V³Û 4F5†Î·ã¹°±ãîe¦v<Ÿ[§Ûq+¿ÓBcˆ‡Hܽˆ‡HÜ½Èø8D츹,Ä{ÓnͤS˜+ífÁyÄ ÒÓÓ3)¾Wݨ‰‰ zzz°m{’ÁUÛÖ*aŸÂ0 Ç|×ÏVµcóæÍìÚµ+8ßž={—ÉZ×jv„™Sˆ¡óíx.ìGì¸;iÄŽçâsët;nåwZ˜9bÇåH܈—#ýqw"ããrÄŽ›ËB»7 ™t s¥Ý,8!N%¬Tj¦Âqnºé&n¼ñF6oÞŒmÛÜ~û퀿¢Pk[³™êZÍnGoo/ÜxãœwÞy:tˆ~ðƒS^«•÷B¨ÎT6ÜJû™ŠVÚØqwÒˆ·úsë;î”ï´P±ãÉ×’þ¸û;ž|-é»O¾–ØqóXh÷¦Ó5“Na6ß§EÅb±Øî7ÐIŒqèÐ!úúú&¹RNµ­›ÛÑȵZy/„™Óév<m;žtÊçÖ vÜ)ßiaæˆK<è”Ï­ìXúãî¥S>;±ãîEîÍô÷a!Þ£Fî‡q‚ ‚ ‚ ‚ ‚Ðd±AAAAAh5"Ä ‚ ‚ ‚ ‚ B !NAAAAZ€q‚ ‚ ‚ ‚ ‚ÐDˆAAAA„ Bœ ‚ ‚ ‚ ‚ ´âAAAA¡ˆ'‚ ‚ ‚ ‚ -@„8AAAAAh"Ä ‚ ‚ ‚ ‚ B !NAAAAZ€q‚ ‚ ‚ ‚ ‚ÐDˆAAAA„ Bœ ‚ ‚ ‚ ‚ ´âAAAA¡ˆ'‚ ‚ ‚ ‚ -@„8AAAAAh"Ä ‚ ‚ ‚ ‚ B è!n`` ÝM˜wäóyÒé4ù|~Vçq‡t:Ýî·Óˆ·ÏóH§Óe¶/6\?bÃK=v¬ì¡#vÜ~ªõÅ v<ÄŽ›O+ÇÆbÇ>bÇͧ;–ññì;î\:ÕŽ»Fˆ³m»ÝM¨‹E‹U}Üi¤ÓilÛÆ4Í ³nÏóp§Ýo©+;n?ýýý†ؾØpýt‹ Ãü¶ãjÔcÇétšl6Ûn±ãJ»Ov\­/±ã™ vÜ\Z=6;öé;†îW4jÇ2>žbÇK§Úñ’v7 žçaY±X¬æë®ëbF°Ý²,\×%‹¡ëzpŒëºhš†eY†iš“ö­w¿Z¨ضm4M ›¦9åûT×´m;xOµž'‰ºÎ5Uû]×Ų,FGG( X–¯ë:ù|]׃{ïyžçþ*K,+»ï•÷AÓ44M«û˜ùJ³ì¸U6 Ýcǵlr*;Vï)“É iÙlvÒ{©eÃÕ¶‰O>O#6=ÇlúceÇê^©~~!Ò¨wʘÂ4ÍIϧz¯nǵúâZ÷AìØ§ÛÇÆõÚñ|‹˸b.mYÝ£J;Ž/ããæ vÜYã µm&ããvÛqGzÄyžÇÀÀ¶mãº.ñx<Ø600€ã8¸®K™ÊÇÉçóÁñQ·Úx<(ûñx<8ã8+i½ûÕ"jØÑÇÓ“L&ÀuÝiŸOw®zÚoÛ6†aàºnоT*\+ãyÙl6¸¿êÜê³Pש$ŸÏ“L&Ñ4­îcæ+Í´ãVÙ°:t¾ײ¯©ìX ¶£ï«òKl8d*±ã©Î3޶¥Ñþ8jÇê³L§Óär¹¶ÙS»hÔŽ;iLQíùTÇuº×ÓƒØq”ù06®ö¼Ö1óul¬>K±cWÌ…-ײãh[d|<{ÄŽ;o\1ÝqŠŽ²ãb’ÉdбX,xžËåŠ@1—ËMÓ ^O¥RÁóB¡P4 #Ø6::ZÔ4­X,‹CCCÁcu\*• ž«ÛPï~Sݧžý‡††Š@qtt´®çÓ«žö§R©¢aE]׋±X¬¨iZ1—Ë×Χî¯:·Ú6::ZvßLÓ,ær¹¢aeÇ×:f!ÐL;n¥ Wîשv\˾¦²cu> 5M+ŽŽŽŠ × – «ÇbǵÏ3Ž^k&ýq-;.‹EÓ4‹CCC3ºó…Fì¸ÇõÓ-v\­/Žî#v\Î|×sÌ|‹bÇ2®˜;[®eÇÑkÉøxöˆwæ¸b¦ããvÛqG†¦*G…r•¬t댺¸+èŠuù®×Ͱnµ†a”­:L÷¼í÷ ±ã¹µãjLg_ê‡SÙ"‘~ņ'3“ÏLì¸94j_ê¸jvlÛ6Ùl–E‹Iy-ZÔU‰…gC£vÜ­6\í=wšOÕƒØq5dL1?ÆÆbÇ2®˜K[nÔŽe|<3ÄŽ;o\1ÝqhÇ)Ć$8‚|º®— Ô¢?Z¦iê¥RI“Éd»ßJGbš&®ë†V+7K%ŽãǨÇJ-V_¾\.G6› âç:f¾#v<÷T³¯+¯¼²æþ*@ôxe»bÓ©eà vÜ,µ¯Z¶_ÍŽ‡††(‹Á@±X;¦¶‹ ÏŒ™ÚñT}±ÚbÇQdL1·´jl,v,㊹¤Q;–ññÌ;ž{Z1>žÍušAG q‰DÏóèïï/Kú—H$Ðuþþþ Ä²BôúõëÜi[MÔÝ´Òõ´SPÕEÔýM&“uÝ+MÓþªc‰D¢ìLwÌ|Eì¸5휉}©„µê3©vņCjÙ°Ú&vÜœ66b_ÓWiÇ ™Fì¸m¸ÚóNa.úb;Ž2_ÆÕžw­/dd\1·4jÇ2>žbÇ­ic+ÆÇí´ãEEµÓDW¦!,W«P±¾ÑªCJÉŒÆû¶×uq]7X‘P;Õ¾zî•r¥/ A|õt+,3;žfc_õÞ_±áð>@y ±ãÙÓ¨}‰]6F#vÜI6\íy'Њ¾Xéö1EµçÄ\W´¦3½O2>žbÇsC«ÆÇí¶ãŽ,Ö ¨4UæWr>Ÿ¯ªrή떹žV’ÉdÊ>@õ¸žãšuý™Ðˆ±iš6ã/j#ÇÌ'æƒS®–ÌÄöšmÇØ×Lï¯Øðä÷.vÜ<;nÔ¾º]ΔFìx®“"×kÇŠùfÇíJ:ÝÍtÚ˜ÚkÇÝ:6^èȸ¢±ë×K£‚‚Œg†Øqcׯ—VÛeÇíW Çq‚˜låÞ)Ì=®ëbÛvPug®ŽY(ˆÏžVØ—ØðԈώFíKì²¹ˆÏ±Çö#6ÜÄö›Ørw ¶?5bdz£UããvÛq× q‚ ‚ ‚ ‚ ‚Ðtd±AAAAA˜otdޏ?ÿó?ç¯xE»›Áƒ>È%—\ÒÖ6;vŒcÇŽqî¹ç¶µ?ÿùÏYµj«V­jk;|ðAvìØÑÖ6ÔËöíÛÅ~Jt’ý´û3xâ‰'øøÇ?ÞîfL‹ã8|ùË_û)Ñ)öÓ í8vì½½½ÜtÓMí¾Óò…/|±±±¶ÛO§Ø±´cr;~ë·~«ãóÕÉØXÚ1];d|\?2>.§Sì¸[ÆÇÿöoÿÆàà —_~y»›ÒtŠýtGŽaùòåüÙŸýÙ´ûv¤÷Øcñö·¿½ÝÍ`÷îÝlÚ´©­mØ¿?û÷ïo{;î¸ãz{{Ù¸qc[Û±{÷î¶^&Œ·ýsû)§¾Ó;wîlwêbbb í÷Lì§óÚ±ÿ~ÆÆÆÚ}+êbll¬#ì§SìXÚ1¹ª¯ëddl,혮Ý‚ŒC:¥ì;î–ññc=ÖvÜ)tŠýtCCC<ðÀuíÛ‘BÜÊ•+Ù¼ys»›ÁUW]Õövôôô°jÕª¶·cllŒ¾¾>úúúÚÚŽÕ«W·õú3mk»?7±Ÿr:á; ¾‡N7°bÅ Î=÷ܶß3±ŸÎlG7ˆ«V­bãÆm¿gbÇÒŽröîÝËŠ+ÚÚ†z±±´c*d|<3d|\N§Øq·Œ5McݺuqÏ:N±ŸNà±ÇãÈ‘#uíÛ‘B\§°}ûöv7¡#:g€-[¶´» Bˆý”Ó ßiaæˆýtf;„™Ñ)v,ífC§ô?Òa6Èø¸±ã™qÁðªW½ªÝÍèÄ~BfbR¬AAAAAZ€q‚ ‚ ‚ ‚ ‚Ð$4Uð§â5 0»ôgfé±U:F/½nÙÒs§tl¡ÝoJAAAè(Dˆ¡+±ñ…0/òš¸„Y H—^ÓJnd½´¯YÚf•G¯¡„µXé¶tL!r>«ô—(½¦®ëùÒ6£t®ð‹ .h÷íAAAèzÜÒŸšÇ9¥?=òÜ-=×"û„ÎêxµÝ‹ì“爩Èö™"Bœ mA‰\J¤r;Bê› kðŮʎO‹¼ž.;‡ßqFÏAŹJmÉUÙ®ÚíȵÈsòŽY¡:v/½–/]ëœ7¾±Ý·_AAaÎQó:5wSŽzý•¦!”†ÉœÈñJ`Ódéu塿mZä\•‚r”PóRJûª¨(Õ^Õ"çì/Kmÿ¥ù•êº'"Ä ‚0g¸øâ“ùS+ JðRœ†ß‘ŽÔ*ˆ“›æÚCÏU˜)U^¿#®¶½²Õö©†N(â)øuë÷íãAAAèb<üùøó$-¤æsÑô?J8£bÿès¡¤qÂ4@JXKÎÇ”W/e^m¶MÊ,ŸéÅ"-Ë"‹ÕŸç³ï~7õôóRûá‡qž~º®¶ˆ'BCX„«*7šZ%ðð…²âA• UTÅ»GÝaúP«ˆ6Û®Ù^ks‰F˜NAAa®qðÓù¨"rùÒcBZ+Å*`0“µD8Û¶Ñu]Ó Ÿ‡’WÚ“wÝÅÚM›Hº.±X S…†æóÍòÓŸÿœs†‡ùñÏ~Æo_r ù|žóžžO~õ«ìÝ»— .¸€3Î8ƒßþñùÊgð“·½ í…xtñb^sêôº×ñ³o|ƒ½Ï>ËÐпÿû¿Ïã?Î?øA,ÓäWÿ÷1ãqŽ?ÎO~˜+¯¼’Ó\—L&ÃÛy„?ù“?áÒK/åÓŸþ4ããã¼ãïà±à+_ù žçaš&étšL6Ëøø8ú§J6›å“Ÿüdày722R×½!NÑd“JhËâwÄ:þЇÜ ÔŸ ­™9xÞìO!‚ ‚ ‚0/ˆ o~TÒ¡“ÅõEäÌ$B(›õýéR¡£~ƒúo¸3Î>Nž„D\Ì-¯{ï¾ùæ Dõ à¿¿ñŒñÿš&ú§ŠaF8³M$ÜqÇhšÆ#<2©ùßøÆ7°mMÓ¸ûî»'mÏd2†¦iüÃ?üCÙ¶={öÔý¹‰'ó¯V=T…•43…/ÌeJû¤ð]çB<AAAf‡_­3ƒ?ÏKâ;V¨\n9Âè¤FQBTTÀ²À¶Á0ø£o}‹Óž~ÚnšPäòù<‰D‚³žz ÇqÐ4l)ïZ"‘à‹¹ÐÏ4M<Ï ®aÛ6f,Æ÷܃ñž÷ðé/~‘¿þë¿ææ›o¼Ít]çCúPðü;ÿõ_“ÚþÅ/~¯”Ó­Z¹T*U]@,¡ëú”¹ç€)‹8Ô‹q‚Ð¥¸ø¯‰¿¢D·$¾;±‡/°©B •¹ÚTè©Uçõ:zݤAAAæ6aÊŸ4a7*f" MWÀu]_$³,Èf™X¼˜Ï-^̶“'ÙsõÕ\½u+xžŸ|`€¡¡!\× ÂR]×w ÉåÊaÓé4¦i†a©‘ëéºN*• „¶|ä#“Ú•ÉLí:¢ë-ÊÈýP@„8Aè2Âj5‰Ò•€3‹ßG5ü©ôün+0ÛpYAAA„nÁÆO!#,š`&“ çêÉÜ]2!Î.%0ÊfÁuynûvì#GÁ+ëº$††Ð4ŽãðCÛæy×ÅqR©CCC€/’e³Ù *i5‰DÕJ¦®ëbšf¹Þ\¢ªÖKeeŠ$0ÚøåOkÍ»¡x„ñaˆ)øîÉCL-¼ ‚ ‚ ‚ íÁu]Òéô”ûD Ôeñ#›”ô{ÿò/äóù ür¦üè»ßåußþ6»~ù—! %Hà sò¦›0M×uñÒétð! B'Ñ©v솣ÚLöxS¥©¦¶cé‹…n SûbA˜ bÇÂ|AÆÂ| “ì8*jUîùÖ·°ðç€ï»÷^úûûI$xž‡®ë˜U|>ÏÞðâñ8·_}5¿ñÉOòå/MÓæ×ÿàØó+¿ÂÇ–/Ç+%ˆÅb˜¦Éèè(ý×ÍÀÀŽã`Yù|ž|>œß¾} …@LS(¡Íó<úûûY¿~}p|:¦¿¿Ÿl6K?ét:¨¼ª˜‘çâ‹k3!]q¼†/¦õzÆõãOº£MSåhcøâ]’r—E_Œ‹ÃSo~ªîæ4Å#ndd„žž¶oßÀ¦M›¸öÚkƒm‡bçÎ@¨8×Ro¡]t¢»ø»*¾ #¢›05µìXúb¡[èľXfŠØ±0_q…0è$;Þý³Ÿ±n×®šÛóž‡û;¿C!—Ãó<>ÏS(ÊÂ@£bžçyd³Y,ËÂ0 Nþà žw‡Nž¤ç¡‡èÁÃÏß600À訟ØìÒK/åýï?þçŽã8$“I4Mãþûïç¶Ûn Î]fªrÇÅb1Òé4š¦á8NàÑfÛ6¹\.ðÆS¡* BP£žzªJkݸµn¾·ZTÓ‹ã‹iéÒöDd¿Déÿ@i_£ôšš€'*Î¥¹`<òz °aü—Ça¢¾·Ð¸óÎ;[n¹%x~ìØ±àñž={èëë žoÞ¼™={ö4㲂ÐT:юㄹ߆KÿÑ•‰jX–Uwî·®½æ®ëNZ…‰âyÞ´«aS¹”۶ݶUáZv,}±Ð-tb_,3EìX˜/ȸB˜t’?¸jÇ~íׂ¹‚ ¬Áw²rñÅÀL*E2™$ŸÏ3444)›aAˆëÀ€¯"ýôºëø_ÿõ_üãÓO³£·—¯¿ý휼÷^× BL•pýõ×sß}÷‘Ífq]—B¡ÀÐЇªšN•ëM k†aËåehhˆb±H"‘À0 …™LÓ4¡Í0 4M#‘HN§±,+8_M*'«J0³(ÏÛ–er5B_lÂà¬È9(í+]#‡/À©}³Lö¼KQ7¬ÈÁ³Ÿ­Ûšâ×ÛÛKoo/ccc¤R)¶nÝ øF~Þyç•í;dÛ¶mlß¾½ì‹!,\FFFرcû÷k4ÛŽ÷ïß϶mÛ‚–™’Æï3*»?×uI&“8Žƒae³ëº¸®K<aV[ÑQ®Æ¦iâšæ¤¾-›ÍÖ¬h Fu]'‘HT=ÿT‰8-Ë €ª*;žç%¯Õªˆzo*)(—åt:M>ŸgttMÓÊÄ5un]׃ÜŽã«.–e¡iétšL&üȨ• µ¢“L&ƒû`ÛvpÏÇ)ÛOýY–¸U;ŽÃøø8žç‘Ïç1 ƒ{ï½—û￟'N4dõPËŽ±áÇ{ŒoûÛŒŒŒ4lÇÂücpp¯|å+¬Y³fNÎßì¾øÁd÷îÝ\uÕUÁJ¸ lÛ¶ƒ²nݺ¦ç𫱱iše¹æ„…M;ÇÇØ1Ì~|,Ì?±m›ƒÎÙ5æb| Ìh\±~ýzb±«ßõ.Ÿ}6Ï”¾·6¡“•,þÛ¿%µcG0ª6ŸRsšd2éGp]\ÛÆùë¿æªW¼‚wŸ~07Êæ‹ñxœB¡@*•"‘H4œ—MÓ´À;®t]g|||úÓø*e rLéFi¥›7@XÍ¢PÚ EŽQyá*«Vj€jß~&ç…Ò¨š }ÇŽìÞ½»îñqÓ‹5 rË-·ÌÊóüóÏ—ÎY(£¯¯;w²mÛ¶9½N3íxãÆ Ù±‡/¾[žÇhEÇkY–e•¹"+òù<®ëUt”à¥.åÁ¥D*u¬[zgÜwÙo~3è°•H•J¥÷gu^õƒà8‰D×uqM¹&+êd2‰®ë—™¦iÄb1‰Ùl–5kÖJ¥‚Ã0Èd2ضëú¡z|òßÀ)‰ó–e‘ÉdX³f †aày^ šÅb± ­ýýý ©~„†‡‡!- ÃV€Ô½J&“†üPEÅ7u”]eR«SÉd28—¦iìÛ·W¿úÕsgÄT·ã;vÌø<\poyË[D¼Êزe ½½½ìÝ»wήÑ̾ø’K.‘$âBUvîÜÉŽ;¸à‚ æäü26æšvŽW@ããcaþ²e˶lÙÒ5vÜÈøX…Ú¶MÏ»ßͺ±1îeŒôÖ­[àСClß¾]Ü–…ޤ•v\­Þf>Ïšd’ñ¿ø bããeÅìRg¾T§;U’ËéVVt]gùÑ£ìÿÏÿdÛõ×7í^š¦xƒ5Sˆ[µjËnÚ9U~¹fýAl%SÙ±ôÅB7 c a> v,Ìd\!ÌÚiÇ=ôç¿éMÄ—/'F¨ -ýÚ×8~ï½ }ô£µÎç}Îó`x!Ç!›ÍR(„._ÍtT˜5uªÇ³L‰gnäÍ«â *·¥í•žs•Ó(rñ.J5ý±ZuÓ¡i9â¦#Z©Dº•fÙ±Wíy*ÅxPTUêªz¼iš6­ÇZ=ôôôpbÙ2Ωâ·¨,<ÑlZ-ÄM‡ôÅÂ|@ìX˜ˆ ó±ca>0—vüÃçŸçOzzÐñõ'%•­ÿÇÄ4Í©£hR)ÿèïïg¸$ÆAŽí¶-ý:òyøžo•Ó©4aè¨Êÿf”ŽM—ö±ðŵ¨˜æŠsQ:$âv¶œÖîÂBä™gžBAî“÷ÝDz¯Ó0‚ÎX¹«É¯|÷»]³†]x!¬YõããínZóßëLVêdùòåpìXÓÛ:U‡ à ›ÍÎþD‚ ‚ ‚ tŽãpÆoüSΆÒi_ˆËd $¾™¦‰ëº Ëåš>Ÿšª(ƒ‰ïÉæá‹k•ùà’øâ[·´Ÿø&TE<â¡…¸€æº¼mÝ:–?Žƒ>O=§æBˆ;ÿ‰'Úý¶fL[<AAAhŽã0NcY6l˜Z‡ŠÅ P(ó†sMÓæV„KS^hÒóÊ× BÁMT°˜\á”*ÇÖÚODˆ„–³ìøñ0lÑqmk{æJ&ÊårmË{×IHˆª ‚ ‚ , þ}Ñ"îþó?çä7¾Á=çœ3µg'œÂ²ü¥¶mÏ]19 ?Ü4Š‹_Í4Z!Uy¿)TηÊ7•crA‡aت0 MÚŽƒ/-¹âĉ°x1o~é%ž²¬ 1g»H0¹ßlÊyÛü¾AAA˜=^éOÇ×bÔüMåò·ñçr |-Gá”^÷"ê¹B9<ó–·´ûm6+‡‡ùø5×gæu 2¿‚as· ¯¼×¢¡£¢|RžÆÙ´Èþa±†©ÐëÜo2­711AOOO»Û)4Þ=Ýë:a稶™LÐU§\Ù¹F¿³P. «Ž[«Ø^é¡¥^3€_\pA»oݬ9zô(æ 7HEÍ9zôèœTMAA„ù„‡¯¹¥Ç*×¾ÒœÈ~JhSñ,Q§©hŽ~5Tóº4¾çqÂùæ¢NUÑeúÊyß¶o~ó7Û}Ëf;}ûXzÙeAjµF™ÓÈ¢þ`~(“½3 Ê?$õ†$óά „¸={ö088ÈÖ­[éëëcdd„mÛ¶111Aoo/Ùl–¾¾¾v·W˜—ÉU‚UžÄJq¬ÖWÛˆìcSžgQuÈJ<Ó#×HPÞ9Cع«×ëñ|S²zêZ°»Ë…¸Ê0y Ýœ9'Nœ`éÒ¥ín† ‚ ‚ ´Œè¾šUæÃ× # Õkʉ"ºŸÈT~}?Š0ê”aã;4Eg+ÊÃm&:Œ>Ã×»G7lವkkïσëúJxžÇÀÀCCCÄãñ9)$W†RNð?`¥Ô*ÒøQéѦíͪ4oX066ƶmÛØºu+½½½¤R)LÓdëÖ­ ‘J¥øÚ×¾Öîö.*Å(·b»MèlR^8šSQѨßÈqÑcŒ§B¯øa‡ÿÝ{î÷¼§ÁwÓ~àñýûýûa’È¿AÖ­[×î&Ô…*[.‚ ‚ SõËâë"Ê™A `:GøZ‰ÚGé(*º°Z¤Q”èœMœÛ¬²]!£ÚúØ01Q{£ã”g?§t¡P ™L’ËåhÊ Ò¨xMyÔð݇)7ß•²ZSdúÚ–'ÜöíÛ_˜cûöíôôô°uëV¯¸Y­öëáwœþwCu®¡×Xeg óTßùtßûÞ÷xóêÕ€ßáJ"ÿ™sôèÑv7¡.4MÃó¤T ‚ ‚¢B:³„sÃJÑÌ$ôn‹•žOå QM “yb‡bÛPElÓuB¡ÐÜk9øB[†0ìÔ¡ÜÝ1G¹WøF—Æ÷˜›cç¼…Ê€‘‘nºé¦àÅ¡¡!úúúÊrÃwÞyLL¥ìv®ëΙâzª)wa-ò?ÙO‰nªs_\æ/GŽaÛ 7´»]ÍÑE‹ÚÝ„ºO8AA„…E4š)ê€aæÔ6JÛS„ÞkFä8/ò\JŸuGŽáœK.©¾Ñ¶Áœ,©ª°TÇqp]—X¬IåFUÁ„Ê\pQLÊsNU¾.Ì Kz{{aóæÍe‡jw[gã8X–E>Ÿ'•J‘J¥Ê¶»®‹ëºØ¶®ëU«>ªÎQ‰Æ*gšÞ Ìi˜¢¼Ø œsÎ9ínB×röÙgéSínFÝ ‰ '‚ ‚0ÏP‚[žÐÁ"ŠŽ¯c¨PÏJÿ§J¤èqBw3±a—V›ïY¤Óe¹áªRjÓ…t|.Oht*q{”<¾xE% æ„%}}}ìÚµ‹óÎ;Û¶¹ýöÛƒ9vìØ$q®ƒƒƒlÞ¼9È5§^ ž÷öö²eË–9{S–eMÊ¿eYétšL&C&“Áq<ÏÃó<òù<®ëâ8¦iây^ Dï'?áSK—òèÚµ¸øbzzˆž'Ÿdý¢EXW]…‡ß¹Æ…7×õ»fÉÿս̅çâ 7+–-[V}ƒZj¬\+Ôº+øÃ:“P|¦ö°Nè>ZeÇËÏ=wò‹žTJõ+á,*|)/;µ_:„ÉÑÑQÇ!›Í"œ®ëAXn,Ãó<’Éd¦«¼ô\× öSž†‰D"ø THo:<Õ_ÔëÏóµLÏóèïïGÓ4†‡‡ñ<û￟ÞÞ^,Ë¢4bÇ===lÚ´©-ím:*œTeªUa›Êœ¢S‘}† =Çrø9ß” ªªãž! ñŒŠgÕ~“ÖµûFDÞ§Z:­†ºWnù>«V­bÓ¦M ·>C#6¼bÅ Î9çœùcÇBS8ï¼óظqcYbãVш¯ZµŠÞÞÞy—ÇH˜=›6mâç?ÿ9+V¬héug36V…ÕÂqÅîÝ»[~íFìZ?>v3—¨!Y®ñÓ s€ê׺m|\m\á8§Ÿ}ö”×t]—xúútyÙš€3ûS,HšÝ B;;æbÇÂ| íXs¨Ž KçöýD ïÙ¶<Ïf³A‘<`Rnô(ù|ðçét:˜G«\ߺ®“Ífq]—T*ÌU\×%“Éà8ëׯ'‹aYVP\OØ‹ÅbÁãD"A</â,Ëb||Û¶ï/UPÏ4M,ËâÔ©S-¿ÇͶãÇsÖ™gF?€ ZªÂu]t]§¿¿ŸT*Õ˜Ê0“´*¯‰èÖ±BÜÈÈ;vì(ÛXù¼ž¤šJWôôôpûí·¥‚ûúúʼïfŠmÛ$ ²ÙlU£U"œJ=%0Wv<—bh:~0ÕjUÀ¾_¾MJê[P"œFà»à†¼ä¾„^ÐA#¿MÓÄõÜà‡ÚvüA*• ÐN;Vu±²øëÈí CUcÜ|>Œg£f•b¼šÇªðNÛ¶ƒqv¡Pæ¼¹\®¦ð¶b±±X,XàVÇTŽŸE×õÆ*{vͲãË—³lÙ²ð×…ŠÏ`ÖÅ] ¡ðõ~Óñ•iÉ×±,ß%³²ôn'õªš´’ œHE„:‘G}”77ÁÛ+N344D.—+û¤R)þä 3ãB’øô`ÂÝÞÝ» †.âkßý_þö—Y¹z¥z GZ&“)ûÑVîèêG#ŸÏ«Xét:XÕ[¿~=££a<×uI&“d2™@T‹£êÚ %Æ)wyE?Ž®C¡À÷¾÷=Ö–çóy<Ï+$£ÅIªz ‚ ‚ ] òPÙTE-r«|gñxœ¡¡!,˪ê=±,+Ú‰…B!ßjå1¢A4M#‹ÕÜ·^ ز€¡Ð~væ™ œ~ºÿÄq`6ÅjáâG9¥“âWb–¶‰¿BÇR–#nbb‚ÁÁAFFF8vì}}}ôõõÍ®šGq]7è +ܑۤ¥û:8pàÀ¬¼·òù<‰D‚\.‡ëºÁwÀq^ñÌ+ðÎô¸æ5×pêWNñ¥?úñ/Æ9í†ÓxñÒyþêçqÎrˆcû®ílg{Ÿàáw=Ìű‹1 ƒx85XH=z”ëU±Û†*N*PÃø ´†‹ þÂÁO9“‹ç GYޏmÛ¶±jÕ*_˜»õÖ[$“É´=E¥Ey]ÊtZèD\àÌS§f%ø¨B%Žã^^ŠD2Á½·Þ‹·ÉãôÞÓYõo«xàžüœlëu×ûdŒÃ¯>Ì2oY$uhhˆ<ý2Z×vO·¨û¹Z©ƒ0ÌTQ+Ÿ›aUs?ÌꞪ‘ø(a0š«N…ÅêºN:Ú¯~ E˜AAè\lüh¼4¡1ãsØvfÅqœ@«tôÈf³X–p†a‹Å‚|mQfëÑ6_¨•#nA⺓ 5³O«£Q.z8ø_Œjxˆ@Ò¡,_pÛ¶m7Ýt[·n-ÛaûöílÛ¶[o½µí•pjMúÓø^—’â]èdzz¨¡ãÔª‰úÑWžqOÞõ$ßúÙ·ø>øÿ@Þ=ñn¿£ý{|Wdõ»—‚c7‚y'î蘦 : …ŽãL)tw Ÿýìgáð=ÛfØ©=5 PB¡ò Sùë²Ù,…B!¸Ï‚ ‚ Bû±ðS©|à3•xòù|ú$•JM+ˆ˜¦TiaáaÛ6‹7m íÒ¶¡ŠÝÌZœ´ñ¿JüH⻆VŠ!&þ—F¦3É€ÁÁAúúú&‰pàWÉf³\ýõŒÑ«\-[Œòb©DUÉnK«aîQÞZ*‡[,C×uôAMÆ&ÐáGïþW|à ÿ ‘ƒ“'Oâ~Ø¥ï“}e.ɵ„%Ux¡Ó¹æškøWüÐÔfõŒzò)ºl6+ƒ/AA„6¢¼à†˜™ó…ëºô÷÷£ë:žçÕUùSaFYêA˜ ÇqxöÊ+C´.f½Øoã‹lê‹`RÝó-xÃu0§–:U¥ÞÞ^6oÞÌ¡C‡ÚÖPÏóªvšY|XlLèdŽ92«ã=Ï#›ÍúeÈõÒ*J_…v`ÿ÷‡Ë‚:MÑùãñx»oQ]lذ¡ì3Ðu¡¡!âñxY.‹L&ƒ®ëÁji:ƶí®ðAA˜O$ñu…Jβ,-ZD>ŸŸtŒZPU9GGG%ýH Pù™Ú˜ydÅ ^sêTøB ±mF÷E•ž ½Æ>*œÐ‘,©wlj‰‰¶6´2'„9:£”DóÉçóÁ…ã8GøÂL´¤¢û†T¦Ô4 ]׃£º®/ì˜ýsâĉ†Å£E‹aš&†a|$ÉÄæ zìñ…·‡w§ÞíJýúÙÿýlÎæìº¯1<Ü>¥ûöíƒ+¯dß¾}˜W^ÙÔs×Êi§ò…˜¦$í…îñ"ºå0—N™SÇ÷ àÏ~¤ò– ‚ÐÔœ¯Ò¿È¶í@hSâF´’©š?Í:'—P7†a” q i¾¹÷ÅyËêÕÓî7£B JˆKM±ª*tK6oÞ>ˆ§GÿÛQNæt¿CÖð;ÞçyeE**©ì<ÏÃó<,ËAN˜9¡°•§vþ§´=C㫚kðó7ÔŠåéÇàÕêêcø³ Ó‹v‚ ‚ÐDÒø?AÕÖ£’É$™LÆ_¬N&q]7˜_†D7­£Òãp!y >²ho_¶¬¹'U*´ G…Éó=Y,íB–€?ÁäÖ[oeûöíeÕQ÷ìÙÃŽ;غuk[«¦F…¸'Ã0pþÞ 'òêwmu¼®ë6ÝÖfs.UÈÁ¶íÀƒ4× uº µê¨¯þl & Ä1—ò’òjÓÀ÷:3 …73rL¦t>uU Ë+½®f ÉÒþJ0Sᢡ§š…?`³"Û„8uÞRaâ¥m™Òu)µÕÃï[ÜÒëC¥k–ÎsîmçòðÆ™õi‚ ‚0²„?» ÇqˆÇã$‰²H$ÅÚ‹aŒ·»máø9çø¶šNƒiúÔ\¡)s™rw=AޏÞÞ^vîÜÉÈÈH `›¦ÉæÍ›ÛV)U§òøsŸ©"yf‹ 9Sá€æH$8ŽÃÀÀ…B˲‚‰¸a$‰²Iøðð0Žã”Mô§RÀ;1޾RôP?z*TV½Ÿþþ~ùñ«ÁÓN›Ñþžçá8žçñÖMo &ý} Evî<“™3ÒétfÝißÓ41{ªW¡Ã¨,_‰òKà Q*IKP,%¨¤<Þr¥miü’*¬b j*ãt?¾à•# ÿ„P8SžnJ,à{¦©ý,B¶ti_»ô8VúŸŠœO}m2‘ó¹–9Þ(½7¯âàÙÏrðUÛýé ‚ óå §ðxÛÛÀu¡b‘ß¶m,Ëš¹§Æ^*-‘äì7L*ÖÐ××W5\4ñb«‰ qªCž+û‹ÇãeIÙ Ã*%ª/Žã8Œ (MÓIJ¬Àk¬ò Öi‚A³ˆ†°ŒŽŽbY®ë!¬*ŸÝB÷–ëyòɺí@ œžçù"§šk„Þ3 ”n 5P9MÓô+Ý–B'\ו\%­fºUµ]Ê% áT!šê{©HÆÐ(M ]Ñ­LÅ13%Ú•D…´hø©^e_AAèÔ:–Bånô”‚0güxÉÞpüxóO¬R‘¨¨ÂYó„™Å˵ U¨Á¥yÅTUþþ~Òé4ñxœ¡¡!\×eýúõäóy4M#“ɹܔ÷[Wžf{ÿKÅŽ—$ GJG¶Ç#ǧ 3•s¨ßÿù‡/ç…=/`YüГР„×R9޲„å¢ÛìÒùJç/µç=ß{O»ïìŒñ€³f+¡P(ø"Îá($ƒ?±ÏÕ}šy‰ëº tEèÊ%õ¸v ú(%š©PO%¼åñû®Dé¹A(¸©c2„Z¬´O‚0´3*ÚeèŒJB‚ ‚Ð…¤ ë’šÍfe1SèXêéaÝØXÕ°TðãÂaò˜rºy¾Ðt…§h–ø«<Ûb±XPT… ª$ŸÊ nÎ&ÍÓEùzÓlW!Pnäq:r^+²o:rŒJÂÅÕÂIªM(Œÿ¥Ï–¶«Éh"rÝ¡—V4ÑxÒ?ïÆŸl¤*°vb-¤á+_4ŒkãŒÜ9â{»Ù‘¶:‘÷ÇŠz˜XðÙ·|vn>9d¦’‘*| £“Í3t]ghh¨+Ä,•ç¯L€šO?åýø}ˆyl-Päûõºè”·\´€AtLeDžËФ ‚ 4 U)U ÿ•óD,ëŠñŸ°ðps”7\•°T˜Eþè¨öÍá+t=KÆÆÆ8tèP»ÛR•›®r5¢ UΦ¨w[YØäœ¿!|¬²®J®­’yáÿyøÄ4þÄ0š$\‰e‰Òs>¥&”á/šúUS¹ŽR¥ë¨-·tÍ<¡ç•šZ„yˆ”k¢ß9h¥ëX‘H‰rFø>zÖ÷À­ðì¯?ËšûÖ€ÿì çŸ~ø>”WÉåáU%O”Ç~ôXë>§&á‹öíƒË/Ÿv_e§‰D"üìTb÷’œ³‹p'ès„9Dõ{QTÿ¨ú a¨i´ÿ5 û­LißhŸ*ß9AAh)j:¤FOù¼ïqÐÒ¹š Ì8ÿ‰'ü'¶ Uã†óV¦#q˜”ºIèN– ²k×®¦œppppR¥Õ‰‰ 9vì›6mbóæÍ3:gP¹”Æ£} Ã@Ó4âñ8º®ELÓœ›Ž}ªÆz¥í•û¨ÜD¡§’q½>”ádS w*œ3Q:6]z=Nyµ¿ÊIªòj‹ÆŸC˜°;Fèy§vªý*›ªyE#×Î×Îgà¯0Ágcj&ZL ;›6G7ÛŽO9‚±uë”ûäóy‰D(E{t³NÊÚ*íx¶}qG¢rµ©~UyWö¿.a>¸\äXÕçD7)þÜ1ÌŘBZر0_hŸÂÁŸ~xžG2™ ¢•rÑ7¡¹Ì…?þøãü†®ûB\Eîx×м@9â@èh#Ì Nؾ};ŽãLû7###ìØ±c’w]:fllŒ¾¾>Òé4¶mO{®(š¦5–ª®§ªšêºŽiš …Æ]œU³©¨ž$à´Ò9T®4(¯x§š¥B7-üÉ£ªÔ§áO*sø‚׌Òëj‚9Z:—Ê1Fé¹zœ#Ld>•º®Î1Ó}@‘p®¡¡!2™ ™LÇqð<·#‹Ï¥ÏìÂÏE\»’t:ݶ|vÕìx¶6ÜLê¾+뫼¦rHFO–%ÌY©Š2DßžòxƒPèW"žÐ‘ÌU_,­DìX˜/´b\aN²Ù,º®“ËåD„šÆ\Ú±®ë`šUCS&_ñܤºÎ tKöìÙ0«UŒ;v022ÂÄÄDÙë###:tˆ;w¡â0ª<¯¸yÁ€½{÷¡§ŒTd=lß¾˜ÿ¼gÏúúú‚ç›7oæÖ[o­û¼Ê›D¥>«åÅ—Éd°m×u›‚êR=oòr‹N„á¡ziŸ¨Ö/=(³Òå4Z…Ï.¯šçZ—†.F…7å§ i´ƒ¹²ã«žyfÊížç…CÖ^ _ÍÂ)Óïl- òyH¥ü?…mûàoO$ü×õ_O¥üU¡mt’ÏÖ†gŠrÐUéÖúñ».¯êètnýø¡(y õ(Øk!aÀú0\ïg`lƒõÃ0ô,$·#ÿ,.Ý+]Ð$ìUø½ÐuÌU_,­DìX˜/´j\¡¦P–eaš¦ˆpBS™+;v¥Ï=§Ÿ^u»ã8õ别'0ÆôE…®`Ϋ¦;vŒóÎ;/xÍQ®ëúÞlÌl>åy®ëÞp3áTøh%Ñ –êw!Nè2ª<ØT§òj5Jû¨‹ê<*g[Œr·j¡ˆ1ºVl› ê×qÒéΩÍ<;>®ªèLã8~‘/@ïu[á”N²Y¿öèh¹~"PMóÿû-Ëw!ŠÅ&' õ¦éµmÛñ'ùÇÿ«Ä²ü}Ã7£Ž:ŠÙöÅõ j‹¸Å“ÉÖSP£ºF Ð^ »Äìãþÿük|¡-}8 ò«¡ÄS¥s¿ßïnó‚sa)}fŠIÅ]ZâQêyá÷&úÚtß7¡!ZaÇ‚0׈ ó¹²c¿x™xˆ ­ Yv|ì?þ£æ¶ºç´i¦O¥Ñx˜ ÐQ,iwªqðàA¶mÛ¨Öš¦iÕêÁqt]Çó< ëPÖ‹Ö*£$•‡„9ŠÔÜKU*Í kQo8•N¹ˆ@X…Âäá° =8¢®»*/a6›eýúõ|ç;ßaÿþýínbÝìß¿ŸmÛ¶áþÝßñ¾iVALÓÄÔuxGnÈÀ/›°kÚ›U.ÎMW»¿ß÷S«‹š v¶ Éd(î©?Çñ÷©Öþt:¬ dþþ®ë?W¯)<¯\¨pÿµèq¦YµÊÐ|Bå¥8xð`»›R=ößþö·§õö=ÝT„½*ú«<ß(íÓ{ò$ÖÒ¥¼õG?"Å|ôÏNðé¿ZFê>p΄ü>ÙÞDâNȾ ÿÙ+aç^äsZÌE_ÿ6ÅW eÙQÐ./¥‡Ëø]­ÄŽõ=ñ~úSì5k0¿þuœ+¯Ä<ýt¬'0/Æ5Mt¢ÔÎ'?‰¾d ÞÈÚØÞêÕhgž ==06†{Ûm(§;ý«_…OÚ¯½€ü¢E¤>þq¼+¯ÄÜ·ïÿþß †D |a]%ñM$ ›Å;qç¹ç0V¬Àýõ_ÇØ´©láɽ÷^²§ŸNnp§·clÌÿΨï—úµÁÁA¾ò•¯°fÍš–^·Q|ðAvïÞÍUW]Œ+`Û¶m®6®°ñ×Õ’¶M.—›ñ¹…îdppÛ¶»n| ”+j‰ÇCC3(s=…„ v;vì`÷îÝuç\ˆëëëcpp0x>22BOOϔǜþùAç<66ÆÑ5kf$ü¦Ói6­]ËïlØÀÕê 10ŠïKÁÛÍreÏ%ÌÍáÌ1OX@A½î”ö‹ævSâ] &¨Â£ÌÈqêMª4z.ô%’(¡¥ËV–”xªi¿ú«¿ÊoýÖo±mÛ¶¶´¥;Þ¸q#;wîäŽ:<âÿsºoØ·¯$¡¸Û,FGÃPVðÇãaRÑX êÍÑ‹Mª4%žç_Oy )»ŒŠsÕÄ„ðÓ¬Ý>å4•çºå^|Ñc+…Ã9¢¯¯;w¶Åޱá .¸€·¼å-ÓŠjMâÕ‡ãžs~÷šVœ<‰½t)—ïßÏÞ x!FËdøáýg¿úÕ¼ã[·õ#ôýr'ßx„8üÞv럣YæsX_µØú™çys)§ß¹·ÿü>V-¾ûeêû=^\ñ—ýñï’½â òÀ_ž<ÉÏäs\À§NñãÕ«¹ëï份½˜##üð’K8¶f ÏŸ8ÁÙ‡qbùrÿÞïÁK/á=ó «4•˗éSè'Oâöôpüøq–.]ÊãÏ?Ïò'¸àíoçÙk¯ebñbÖ,ZÄ‘+Hïúò—Y~âÇŸ|’EkÖðŠgžáã?ÿ9'n¾™vïæÜ{îáM¯|%Ÿ|ÙËxôÒKù÷_þe.ûÅ/XÚÓÃsÏ=‡ ü·ÓNã-¢ÿ¬³¸oÃŒU«Höõñ?»—/ç>Šõš×:q"ÿ5(ÏŠ'\úÓøØ¹çò‰3ÎRê”w1õzžoÙ²…ÞÞÞ ­E§Ûñ%—\"U)…ªìܹ“;vpÁ´ôº³ ‚¢ÛÆŽ+ üÈKÅõ„…Ö-[زeKרqÍñq:]ÿ\ªn•çòUè¶oßΦM›êBœZQQ:<ž|¦ôõõI{zzfìjìyG׬©ÛIÌq†††xø•¯äâ /ô½€ Ì—åºð·.œC)¶ ¦²#Ÿ²à.ϺЯÃ\8O‡—i`–Ôl%jÿ^þ6  øp9àé0§$*¨É½úNæóåb€¦…¡MêG§ò œL†¢šãøÿ]×?·çùç¨TÚãñÐÓÈ¿‘á¾Ê›¢ò:é´r¨‘Ja£Ú1ù|øž”7øÏÕµ2™r‘Cå0«&ªg‘ý• Ç)‹3°¾æ1;>§Rˆó<ÿ^ë:Xv&ƒëº†á¾¸-òÑL¢mVׯ÷?£¹Ìg¦ëý0)»ö<ß›h`À­¿¿ü»S)¢i*oewêqÔ¶Óiÿ•Çäóá6…ã„Ï]×?¦Ò¾-«ùU“fÉlûâép€•O?Í¢å˱Ï:‹«Ÿžo-ZÄsŸúN*Eo*Å©+¯déW¿Jßý÷ó{¦ÉµCñòG.à™­Ïp X䢗^"¾üüóÇ¿Èâ/n ™Lbkßq-«µ >pÅì{ê]üô¶{8Ðw€ïþ<£'ǹó›·síÿ1ßüð‡yÙw¾Ã?­ZÅ«zˆÏß}7ƒßý.gŸqÿõÚ×rj×.zî»Õ«WÓû±±rt”Ï>Ëèø8ËŽçØ%—ð›oæŒ .à©×½Ž¾?þcV¼÷½ôÆbüÄ'xÃ[Þ“{÷ò‹U«0M“±±1\ËbÃ[ߊû䓬:y’‹V¯fù¹çrå+_I.—ãÍï~7G׬áèš5üÕÿý¿lþÞ÷àÈî|ë[Yþƒðî[nÁõ<ž¼ë.N~èC¼úÐ!rçžË[×®åÞ`õk^ÃÍ¿ök\öŽwðù7¼—8À'ßúVöÿïÿÍÿò/yéccxÝ:^øÝßå*×åÇ+Vðéµk¹bï^þþõ¯ç¬ÑQÞ0>΢ãǹ¤·—e÷ÜÃÛ/½”¡³ÎâMwÞ‰£ëšÆ\r ¿yêûy„¿zᆋEÞ¹j¯»è¢ _«®ë¤ÆÇ¹tåÊyiǂРĎ…ù@³íx€’g½m×—OKš@Sí¸JjžlÖ…KM¹¤PUÍ \5æ%KÀ…add$ØpÞyç•=o”ÞÞ^nºé&n¼ñF6oÞŒmÛÜ~ûíuï8§¶l©–š/¹¢%8ŽÃÀÀÃÃÃ\üÈ#þë•“]/¯zꔿ-ŸJÀGðŹ¾²x+aÑ„è÷ò# ÿÏqàß=ø}£$dÙu«OÄ•x¦þ;Îd!ª’F\³ …™“ÉÌ\,I$ªV‡™%*‘0ŸÅEåý”Ë• %ù<·­XAn||æï« ÌÆŽ+«¥aYþσT*Ö4-teÉQ,ölI$& S†t•°><<ós˜fm¯ÐZvߨ}kš/ çrþÿj‚t‹™m_<÷?Ž·|9?þ8ÅC‡Ø}Í5<ó¶·±ä+_áƒwÝŵ¹¿výõü|Ù2¬d˲øµßþmÎúA†çoøÏñJ´Ñ{ù»žá¦ÛoâæeÇY[ÎÀ¿pÚ §Ñsmß7Wã7¼’÷šf }ÈóˆÇã\¼g÷}õ«A»<ÏÃó<ôw¿»¬½j_+ ´cY†A>Ÿ'}ùåär9¿ðÆ7~éhE\' *sç*ΛM&ËÎío[½š?ÿ<ïøú×ÉmÛ7Ü´(;Þ²,.þ¥_â¢Õ«ÑJïÍó<òù<‰RãƒÄó<Þþ»¿Ë—]Æ};vð³K.á¼Å‹yèÉ'Ù}íµÿÄ'üL®‹óì³$¯¼’ë?ö1îûßáš±1ž|àνõV¾úÆ7B,Fú‹_äðûÞǧ¿ým¾óÔS¼þ™g@×é}øaß{/˯»Žß9ÿüYoÈ\Ú± ´ ±ca>ÐL;VÅÑ3øa©M+°'ÓÐ,;>|øpÕ×c±XýEGtüÈ:5-ÑhÝ|Ph=ÅqèСâîÝ»‹O?ýô´ûÞ|óÍÁãT*U¼á‘GŠC•;¥RÅ¢¦‹±X±8>^,‹Åñññb¡P(¦R©éd”þëÅb1W,G‹Åb¦ôú°ºF±X,‹E³X 0Z,c­ºkBÀðp±X(ïzÝëÚÚŒFìø¢‡*f2™pC*U, U?(×Ö·×ù$µï]§‘Ëùý“iNÚíãZÍLlx÷îÝÅÏ|æ35·ŽŽS©TñŠû·âÊ#GŠŒŽ_ÿ‰O¯ýË¿,îÙ²¥øwW]U|êÊ+‹Çÿð‹ÅL¦X-‹~_],ý~5Uú¯~bÅb1Q ·×÷¾FKçî6þ×í«_-þñM=o¡P( …ð…¡!¿¿I$ê:~||¼8<<\*úƒY,Ïzê©âÚ|¤ø¦(ŽŽŽ3™Lqxx¸ßÿþ÷·íÎÄŽ?ó™ÏwïÞݶ¶ M;í£Ñ±± TÒ-ãŠZmUÃñññ¢auG˜t‹WŽÍb±hÆãþ¸6˜E_kPŒ‹Å.™ö´’ááÙŸc®˜nî¥fޏ={öLzm6ùUz{{®BòPOOè•éy~xš®CÄ;ʶm²Ù,…B¡~wfU@A…¦J •a{¨ôXeõ–èÖc`ÜñÝïò–66£Q;6*CKîÉ®ëò/ÿó_øgƒ+þíŠQwV¨ðìná‰Å|oϹ ùm€ÙôÅ•¸®K>Ÿ§øÊWòú·¾øðyçqäÎ;Ù´a›Ö®õm½ÂÛRKjå¶®WÐ ;V©´³Ù¬„i ma¶v¬?—^:éõ|>_¿‡geM _X`_ ÇñÿT°œë†ÙŠâqÿ5õÜž¢Ê¬êJªÕtkÂPiÌ£uÕy¢×R¶ÔyTpÕ»ÞuïyϺÞg ÄíÙ³‡[o½•[n¹…Í›7³mÛ66oÞÌž={èééÁ4Ͷ$:v]—cK–øó4×õE¸TjR¸˜šðxS%øì†ñ“$ð{ý~É=ßÈU†ñþ,D¹ƒfñE9U„AA,(¹(kšÆ_ýFιã{½ÙÒ*±X¾ÜéhZc!´]„mÛœ¼é&^zßûHÝ}7©+¯Ä¬§‡AE(âŒÒ¶é—ù|]׃:&V 5­J ÄÇL&ƒU 3Ïf³ “N§ƒÖ¡¡!<Ïöí@ˆ³mÏó¸xñböþñsÝ’%¸®;©²wå ­Rè›1ÃÃST*ЕÓY¶ŒeÀÖÖ|‚Ð*®-ú•G^óðÇn:þªß×xðŠƒ¯€Mí~‚ è@Ö²mwS„bÛv°ØÍœ56VUˆ›1áj¨Aׇ¦*Ñ+š‚¾R‹Öð‹Çýíº `QӨ̾U¯MgM^í%Ä©”ö•׊¦(¯ÌDö•¯üˆ½{Ÿ¨ëÞ,¿2i:fË–-ôõõwîÜÉÄÄétºìõVây/ªÊ%étM.‘H“œšè„À¾07Šoàq|ZÃ7~5`ST>„:˜dù|YÎ?MÓ¸úÃWÃ?•^Ú“¯;èäWçŽ>®Ä”øeY“kàMG=éí+?‚iD€êugS·K1M³jUÔžž2™ ïxÇ;0M³®²ÔÍäÔ™g†úWÔ"ª0¥¡[„+£jp¦®Š‘mCUŽ—™ˆÐ“úº ?ø‘ÒJ½Œ?„.bâ»ßåü_ú%2?ìÿ:M%U®ðÕª <ÃÄ´•ÞbñxœBé{–L&Éd2äóy\×Å4Mâñ8š¦144„¦ihš†®ë E*O«ó)¡«ùÞf"¿Â•†z“ñÞüùϳÁ¶ÑK |+…º\I¬/TôJè„»X,†eYdJ…€Ia=5=êÖ¯÷WUUqaAãº.º®ãºn°¨©Äfm ¼>•÷‚¦i8Ž,„š¦ÉóûŸGs5^öëA‡“[Oò¿úÿϾþY>vçÇXúËK!G9Êxlœ“#'9ûŒ³Y²d Ͻí9¾ï}ŸT"Åm'oãì³Ïæ³7|–Ý_ÚÍåÏ_Îkåµ¼øû/rÑCñãþsßÝ÷qçš;ývŸpÑ4‰zÊ÷hµm;x_¶fãö»Á÷Öë÷ߣ®ëüÕîÛ/ øö¿þ«„¥v(Žã‘ª_M$¸®K<$•G´ øã’ègs©±çy“ Üu ÇŽãåK–TKÕÕàá pr—ƒw×ú/§Ó“²¨w–¸ lŠ®‡ëäÊË«¶íå•wZ”hÁ×zëÞÕ#ZU bͺ:%###“<Þ¢a¨===AißV‡§þüœsØ¢žÔøtò¥Ê©…©*…ª0(’# ¥!“Ÿêø!«‚0œ5E¥WÏóXÿÈú°óír÷caa`Û6Éd’OœqÕUd{{áâ‹§>H'T¦UW®’ÂÔ)@«PO]×;ö©r„&“ɰÒ)¡€åy…B!ƦñÓ4mN*·©8•BÞtµèv]×ÁÎu],Ë DÅJòù|°o££þòc>/B\QkЮ1%«¿èþ®ë¢³b`` LLSc±†aÍfƒ×ÌR5b%ÄÅb1ƾ6ÆÈŠ6-ÞDßXùlž›7ÝÌüCÞ·ü}üÉKÂ[nx š¦ñÕO~•ì¿gùÚÒ¯ñàɹéäMüå»ÿ’+¶\ã8œ3v7ÞÌÿ±þ­¾ˆÿ±êðì'Ÿå´ïœÆù?;Ÿ¿û[ Ç oå9Îq6Å7óbüÇcÿø®lxMÓøÌß|†Û´Û‚÷jY®ë¢´®ë†ëºÁD±ò>ÿîïþn»?rAXШ¬,*­ƒÐZÔoŠã88ŽŒ{Ö¯_„ «P•c]×ý¾Õ´p!tK]Ô‡} a¤ ¡qùy—·ûv4Ì‹^8iAÚq,˪KˆóÖC~<ÿgW÷}`Þ qº^î-VÏ×%:Œ­æå5Ý1 CĹb ÀªU«&mعsgÙóv)Ô‹O¹=™Lù|¢“»E!Oî¹ }ôåHí€ç@L¼„9dÙñã5·Åb1ßý8ˆpB×àº.ï»î:ÞqÏ=üýš5õ™®…ß7GQah‘>8N—y©ðÑxÌæ7³V_‹—óxî½Ï±xÓb>µîSÜtÙMôÝßÇvk;\×-¿Žñâ8û¾·/ÌÇv§ÿ>78‚÷|«q+àÿÞ©vþ±öÇå9}K&üVÞZv¿†††Ê&0™ifÓ‰ëQÔy_÷º×µÛ,„.¥šh®<0Õ÷˜TÌG}‡!ÒëzÙùTÿà•ÀnG\Z”×j7ãçž8W%…„P'a>UŒ|Á+Þ}€‹.º2~äÀgV|†ucë@»LãÒŸ_JïŽ~‡ÑT)WŸ ©|ÊÏÝë‚îê¾ÈÃ_Xµñç2aîõ<~Ä«†¦‚ï1WÍk®,ã§Œ U»2²ø“;åFYú½J~ rŸ€Ô« DÇ鸢†Â<ãÈ‘#ôŒOZNPIá ÓðÒín© Ô‡ëºü·#GüÎsùò:ˆ¥ñGÝC@ Þÿñ÷óyóó|íkcÜÿ:’I‡ûî»›÷¾÷ |÷»66l"󄹜ßa'“*ù«Ê׿?÷“¦ V(øš’JÔZ‰ÊK¡Gj© ëaáÛZ¿™Lùêb&ã_K×Ãä´ê|q϶ÏçÌ#‹‰ÿ¯Kþ©¿­RÐËç}aP¡D=µšUêTÛ†¿ÿûw°cÇ gõê—xÙËNãøñãÜv[/šæ²k׳<þøå¥ðÏoÏ€»÷úup×Ïàó?k®©šC#¬4U³èºAyg™¦I>ŸÂ8£áœj‚í8ãããd³Ùà÷¶e³YÆÇǃI|>Ÿrª‰y"‘&²Éd’¿û³¿ã’c—pÎ¥ç@ô„ŽŽ‹ WÈù©<\ž¾<,XµÈùÞ ÚMÄ MèXzx:§s6g‡¯E´ÛZèx­žPînOÞ-Ì-J€RžJtVéR©TPT'º`£t€ ÄNyQ:Žƒçy`¦cåÕ¢ª_ÁwQ¥ˆ””è}-* ©ïoÔÛ;Šò~î§^W¯ýû¿ÿ{»?ކq€õ<À2ù¾—åÆ Rx¸ø‚–/¶ —þŠaßq–Þ¿”ÓÞrG×å;¾Ãm xpï‰{¹(vPŠ +9­ôê½¶\[Îr­4¦Ëá‹däâÄ%Lõ4ÙnRY+í£z޳h[Åö{¶ÝÃ{xO»?…óP©»Ò[Á¾? Í19JDDç5K¶lÙ‚mÛ¤R)>øÁ–•îµm›[o½•­[·¶÷9¸ùæ²×t]'™L†á2â€"t ®ëòT*ÅE†3ÝÎqüAb¢ô×ùMà¦}Aéów|ž?ÑíOº‡_ýÕßäé§áê«?ÌÏ~v‚L&ƒ®ë<ôP¹wY,Šož:t©y~¥ãL=«ˆ•s€z]øk§ÚœBÓ€ï}ïÂåÄ®~M»(hoôZ‰Äôm®S瘴¨_(ʾßʳ8êiªžg³Ù 5B«Q‚ý¶mÛZ~ífaz)oë¼F…a*3Qžby|áKÇ3øýtºtLÉ«‚¿O6›õSn A2›$áøöÿ™ë>ƒö>?¥FÑ+r…wEÐ7ÿοSÞ–¨¹WK¢þf(aP¯8.Eufrd¾pñU_ߺu7ÞØô/Á­æW óãÈΡˆ),–€ŸnçΤÓi®¿þzúúúèééaddð…ºZÞrs‰ã8LlØ€¶mT„Ê‚?ÁøÛ¿`ÿþ«¹óN?ÝM@_ŒK€å}'ÄÞR~¼ªÖá8M‰B 5~¶íÐBz„(‰DÂ)S«_â™)t QÏ‚©º5+ öÀ};x+üU¿ûξwÝåÈä ~5P×½]¿(r†e¨I­|ª_IÁÖvb`œùWØÜÐôsWK°kšf0õ'À6žgáºzKÏ0´À#ŸÏ–åèQ¡¹JóQ^r{ö쯻<û|"ê “N§ƒ¤Õ@^æF\/u]ÂL£“ô©&ìÊû¥&þüßUiX… ãmú EðèäOEÆ'BPa–žç•yˆY–|7”÷˜¦iA%&†ì›J¥N…U«<˜ª?‹ {0949•JÕ×ðT+Pù]Ö4mÚi¡6ÊY>zÛÕxø}q-A%O(È©¾9êQ\Û\×…!Ð5Çõ=¢ ‰Š\èºoãê;µ÷iG„¦ò /Tý]øáðo,Â4}½!÷óºi.˜¯"¬µ‹˜ ó– ¶D‰qcccìÙ³‡±±1nºé¦@”kžçÑsÑEwÝUuûË^öK<ðÀ ~üãÓ‰ÇËã¬Ü$÷÷$˜/Bê““ÏQ(ø_ M =’Iÿy&ãO@”PgYaê×õ÷­ 'R%Õ8\½åáDÊ›C˜ÿœ}üø¤;š8›$þª’šL B‡â 5ù2¡¡åfÛ`öÙ±ÿØþÀòhÿQÞ¤ÝI>ÿ}ÀŸx©IŒä†™¢r5‰M§Ó$ ’É$àWb­¬ræ CZŒÂŒ¯;ÉçóÀ iÃÃÃÁsUä P(^3MX*÷Ó$þï…Zıð;R”Oþ¢^oòÛ"ÌåÕ©Äeó*‡g6›E×õ2ï5eûJTSÇ[–ÅÐÐPÐ×TŠX•bZô<Šh&t7*­·òZl •ÈJ'ÌQmò6@P z ÿqÌÃï‡Jû]ËU Û÷ðüþý÷sÁpáµ×)ÞtÓ/8S0Mp¥i¤Ô@«JÑ'¡}\öƒLÊcrÏ=Ëéï÷µÇ\®dj.þïxeäƒ|œ ŠII^z{{Ù²eK#皎­^]µÓK§Óüä'¿ÎüÁ5h¦i¾(fë¾CœömÈý ¯Æ¸VY0 ß“.›õ…5Ïó¿Cjìlš¡×œR³m;,ã[T-§N%¦é!•G:]¾Í0$Oö|⌗^šôª¼AA’õy°(, ž¼ë.¶^q排ÍúƒŒÑQü¶Êi’=£ãÞä²N[GJO-ØÕÚ}YR—Ý ç´þÚjЮ¼Þ”H—ÏçI$Aø•* ‹ÅüÆð$©T)ò4Qy¢ü nrªŠÔª¬[«‚íÌ.Ž/²eJJU’ëjÈ ^hÑ<†ªš¢ªJ­DûX,VV‘vºÐ̹ªDÝU(á¼Öw5š#¬ÖñÊ‹jào<|×0(×m÷gÕ]Äóy?U Ê«*©„«e ,¹1¹.l2as>cøžmi ‘† –ëÐiOÞG‰pQ/sûvíâêçŸ×eðsŸãÍïzg½ò•lX¼˜3^x\³²꽨D¶š¦’ΆÞ#Íöðð¼É"dô3ò<ÐWùzôÞª‰íàì»ï«|ºŽ“ex8å/N×Ò”¥°àèøl˽‡MêH=\7ÃoüÆ7¹üòƒÀùÁw¼Ó iyÊ ÊÕÚ^ë{^fÞ帀þÅ/66&V¥Í³Ù°ò’çù¯»®®E×ÃI]•jâiüû؆äáò _hSiÔ5`Ð2Ó&“UÅDR©žçñÍçŸçêÒ1["Ç®›ê$SMî* |X–?qTT–p‡òßuðÛU8„âš íÒ4JUž&_߉d Ž vJÄ‹âyþ"—óW´kíÄóe‚ºî¿ÿIŸ[ñ8 Ó']æ-ÄÝóÓŸrú/ÿò$ëUý-âüó7–mËd;Ш~×Á+GÑ·§œ¢}“£}Y¥Xháïø}ßÀ€>õ[ÓßîEÓÂß*åô úgÛöû˨ªÄ9µO<î_W¹ÛFEÂJ‰p!JõÕÊ3;º5oJXhøìgé_³†ÿü³?ãC„…¯âñê ÏÝÿÏ}žÿý‡ÙyÎÎÀëJB)@ß÷eÌ©¤>vi‰D‚²|q@)ÉÁe[·ràºëÚÝôYQ*‡C)Ѝôº*°Ïç¯7U¡½)¶«ªÎQº¸¯˜ø‰º Ôž„O5Á„ ªøFGTÈi*•ª¹`Ò¶ðÁ–ݪWzT,ŠÓ¯W9‡ÊÿäQÝ<©Ö¢U÷v«ìƒS©é= ÿ\j~¢¼øj½7õ_ySG°Ôÿèï†ò¶¶m½²}LØÀÓO?=éõt:Íu¯¾Ž7¼ý ín¢ ̈5¬¿â QZ ™øÑ;îÜÁëo}=ËËÉtòŠH+Y¾ÝÐÐ;̳)2fYV¦šN§‰Åb¡ÇB¡À#_øüô§ínò¬È397r<<7UxéÐL''SaN²c„j âÆ?µ L… 5…P$H$˜¦ÙÅ¢ƒÖ|¾<1Š*ó¬V¦•ø`šð5Ôàß“1M¸?¿M‚p^áÚ“Ãm@sBwð(ÿ ¼@÷½¨ "ÅUòp± ß’‘Õö£À`SiU{}ä|¶í{ (ö‹=Ÿãœ¯ÃÎ ïùÞ{Úý 5„AXU· U]……&¾§Wt?Ãh¼R“rPŸµÊ½©z"ÿe žOA¦öbºJYH$H§ÓèºN"‘˜T8¡¥ÔZ0ê/V•(½Š(¸¿ «ÿzÀYcchÛ|ÃŽãºÇ§>0ú1™È"Û¤£…8ýÀغCÕý‘ŠNQýFY„nÂ$¢}s3´¢çª––¡Úþ•¿¥ÑçÕDB]‡“'W´þ¦Í’eK—Nz-“Éø?úÇñt‚ЬçŽßùNR<»5¾ï.àÁºuëøïŸøï,{{ûC0;ŠÓOÇû³¿Ãs§ï+Û…Jή&H•¿¹ÏnÜÈ]¸r]‰…!¦ÖŽTN,Ã0ÊÅÇFQÞ5.~ŸoãOò”+ÎÒõ1]2uaQ1DÇqm_hi:†B@(š©Ø\®|r¾~}ùê,„+¸ù|ù ²<¯©˜¦VžmÐàþ_º8K÷Åð7–^Û¼W‡á8zI€£”û4"žÐà0° ¸øïÀ×4ÿ;ŸÃwH{ x›Ï¿|¸8 ü¿Ò1„žsþ¤'ù¡]_Yµ# Äá¹ÕßnÏç9 àøñã“=ãñp…ß4§#U¥Tåá2¨>W¬•·¯ŠH­ ó¨Šäê»¤Š’u0Ïúóe‘ñ¿ cšS¼G‚ÝEÆ ŽŽâ–D<âl[…›;“;meÌ¢(ϪÍ}t.½ô »Výôûîƒë¯¯²ÿR¢C6žIDAT‚6O „ùÏýºÎŸÞt1àí8q8\`òàµîqâOpÎÛÏ‘>¹ ΃«°w×MÓr¹éRžhêž ]ŒrV1 „(“É”U›­§ô—À÷¸PéoTº%©զÊdØP=GC>®X©ä3Pžd<ŠJâ] %hT£ò ©÷õ(2\Ù®rQ%ú>\×7ðÊð¤òŠ*ïd††Ê¯£¼bj¹òC˜T·ò:•ï!zB¡¹Iu;ŒhÁ•x<Žiš—k]”Uu, À+½CÓéÐ6£b©ºÇ†19OW,&Ì×4ÿ8Ó Å¸JFGÃm*ì³ÖÇ–œ UØ\Ö ½œŽ09ÏZ"áÿ¥Kç·ñ¿Ã6þï„qì µ-çï?TÚG}ÝT~7ø¾çà ?¶À7Jûž_:vii»…_”E…©«˜yK÷¯o•®­U\Ëoïü6oá-³7œâç=þ8U~§UXålÄšR‘(\ª¥jžX-ZpšŸµ¨°⢠U"Â-\Î>q"°×|ÆÇÇI§Õ¶‰¨7¼°`éh!nϪU\´"ô€Rã/•TÚ0Œòð;Rõ*Àžµk' &ÊB«gñ»­ÆºólaIèP–޳úùç±Ï8ƒÓJêÅ{«ØÞÉ›N²ô K¹ùÀÍ|^û|»›Ýy\|1Þ+A?«Ý ™ÕG9ŽC2™$—Ë9亙,aÔ™ê~üqÎð¼Iù|jR)%Zpº9`Ù°V‡']XïúáSÑy:=¹W¹ ºØ”Ï—ç—˜*ßO4±¶º†Jj *DT´Š^§ÚÍÐP˜OB‰„QQ¦ÚuÉCÔHøY3CŠ»åÕjÛ6¹\®>»VÉÚ£ö­’ª„›d¦OT?‰Êˆ(µRËâpäµh¥A%¶xLTBA-VúKG+U‘5ª*qž0¿´Êë¦5ðçÊ @]3I8±VmŠ´;Aù{0*Úk–ÚªÚ¥—þ«j2µnµÏ-}nfŸCà>ú(o®ì[jõkõœÐÆ¿*ô4GÃ^GÑ"&ª¨‘®ëõ}—„ųÏ> ¯}-àwŸ/¾ø8ΦڶCD8¡³…8ó=¡÷“eù¿÷Éd6\ÕÀ¯Z­R4]TAh3º®—­È*‡UÈ"› øUEv奯æDj¾”LúcÚL&,hXYìH{E°fÃSçÇ/q6p·7Õ˜¿}ìíüó­ÿÌç×~^VüªÐmú€*Þ`Ûv™w\·¢æÔÊÙÄöïßÏÇK&à…â„i7¦àc©°ÚÃoÙ8ôÇ`SéðÔ‰VC pœpP%ó=%Û¶J) „3C¼U¹[Ú7_zM§¢•†ãøÂ•*ˆ¢áÏr^dAžF›0|1UÚWÝšè-ª +Ûj9j‘ÿSÝZ-òž£_ùYDfv2ð²ÿú/^uÑE¾=VV¦«çê³×½•\(ÁMy2©¬ùðÛ'Ì=§Ÿå”gzܪ·-–ªE^O”öw#û;¥ýÕýSS©Š¨zÀ/.¸ )Ÿ]#4jÇ.ð‹dópüx¸Š\/¡gd ­ßòù<¦ibš¦ˆo ŒFíØœu˜fýéL§«Î,,Z&ÄÙ¶M__ߌŽ9~Î9Ác%D@¤PƒN¸œý•ï ahĆ9Â9;xàÃ$¿ö0ä¬ÉΕx4•O=c”hòüjcÓô+TÄQ2é¿r]ÿšÊ#/™ôÅ¿DÂÇG¯¥„¼jÕ}® Ž"ôµ—FìÀó<î^´ˆíÀ2«Êçè‚i˜Üÿ©ûyü‡sö÷ΖÂ95ðïŸivWŠ*Ó4q‡t:Í›ßüfzè¡¶µ¥Q;V¨I±{Ë-ܰk·¿éMÙ¬ßá©üU‰„ÿ?+ïLU…ö»‘ñFѨV(Oe§ë¡Saô·@¥²SßÇñÿTê0Ç Ï«>•O}‡”ƒW5Ô>®ëkQ.f¥Óð裗±iS{î×lí¸ò\Éd’X,Špª º¡££å«o¼ÈŸR+H­u[„‘’*ŠR'œS*ñ*K(b©s&çºRdæx1àYðLÐ/ïd)m"À“ÀCà®sØÁ/¢pxC©QKA_¶ßýUà4¿=n)wšŠì ¢Aÿ$\# D±„aê.åŽ**%]4â4ŠV±=KxM%ªAXãAµE9à DΡ—îoô¾+QPMuÔý¬zÀ¡›bGÐð¸xò®»X6Óðó¨Áe¨­RÖ‰J£iZwTæ„FíØ¼Ra>•A¥ó¨‰q-âöìÙÃÎ;gtÌÄâÅAßjÛ¾0àº.®ë†+ד~eanhĆÞì–»h?þÿ Ÿ£ªTú hFhitü]ÏØÇ0ü±»×§Rá˜^çÕs% øû*ï<>«&TjRÍã¬ê•è˜Ï‡¹ž»I¬ètµc˲ØXè¿Ü‚X¥Ý”*“Ýpà œýÓ³Ûý6;šnöÏ”öY¥…¢Æ…&ŒìXJ»˜W’N§) å9U^‹D"ø!uKÛUQN™Žtº¦ÜRža1B1ü"žÑHÒѧ ÐÏ/‰I·€û§[Œ€q)xŸípx2÷¾ÿ¿½ór£ºòü×ø·Û-\ÆØ$j ” XËWÇMfCÎQorbafzb’ͪ‡>ÓY2"|f“pÆi²™d­HkæÌ¶'žÃÒÚ(Ά®]'a’X.1 c£Û-Û`Ó…%ÿnâÚ?ž^©Ô-µZj©~´îçœ>­Ò¯zªúÖ«ûî»ï^@»Àf@úo`Nëy} ÿÅ¥§+Ê€ÅSk04Šz]3µÆSM7.¯ö]}/¼Ü{o~}uêÕñøOmmµß°Ž…«¨ƒp8lTÇ¢YC¢~Ÿ;w+_z xà( Ÿ·«rÅÒê=9âr¹¼^¯ò9ÓPÏË/\0F÷^² "ŸfuårèNvŒM޼.F>L^>[M+Mãpÿe4Z,˜g^ºkôñ&wòvò¶›#øX;,Ýwüñ6ûýÅœ|üûÜÎltü|[nÚÀw"eއŒŸ°ûW:ŸááÊŽ¢:õêØœÞ)à7ï¼StZäóÅœ“…XÈ øù)@¼¶X€G¸L†;6ÌËôÓW.Q½°©t0| ‹U`¼Ðï,à€› ãÄ_­cõ VáãrÑ'q¬ð Øs¿*ì猋y̽…ñëEà÷pQe0g‹A@ 2ge/¡°Í1! E’¼LŠéõ•…¾¿E&?#(äÇ/ôßüOöFˆ’“Ž­P¸Œ¢É$žC©3F{ˆÂýÌ)¸ÅÈ©7®¿8r¤ª~ÍlúãÉðeãf'œ @$$ N¹pløÜµ`:Ôò^ÀÿÏ€ðÝâܶ€Â‡˜âš<Ì,©Áû4Œ0:ÑÀˆÿÅ\g |¶pÖ±ˆë´ØŒ’hj/€KQV—Îo¸šÙèxÅø8-¦ ¦£j.v¦ Sî€M.Ÿ$æ¶g-Àü3Üf4Û’l¿•ëÝ4’™¤5O¢óô4æxs²$1ûžG Åt9<ºØ\(›7s2··Ò5æ%œæIøãÇíYb=›W?ƒªˆ:+9âÒé4²Ù¬!l.òþþþ²ï?|ø0úúúp.öïD@1.°™iìÎD±:1'I§ÓˆÅbØ·oŸmû¯EÃKþ}òÅí-[€[nù_Èå^F×8â꥖ Îrνj«x><3|ÛlH¥F†¹—ª nxáÑðàkÚòÛÜ„$¹Û—L&ñÃþ+W®´eÿµêxÿþýسgÞý«¿2:¦õbõîÝÅ@ë׳?󂯲ÇBLȼâd¢ð¡rZQ ¯sO\Å5 ˜‡5æpã-Íó“ü~Ri—9,&Þ§àVû8‰ ÿý¸êÄ‰Ú Æn°¨È*ý–Ù9ÆmÂ;Ïà‘G–!EÂÀ³Í¶l)~ŽÛ‡££EûÏјP<²GôF"EGœ¹ˆšª² e h+rÇ\ P:‘<\ü3?LJBìO×u}tT×]Eö÷ÑþÌ–vתã={öè;vìЇu]ÿÄO~¢C×õ©ïÖ\}€muê«00À´ÁõâF¸6ì VïØ±Cß³gÒuº®‹º®¿þÈ#úÏo½Õx_×õHá¿hÚÖ‡ eÈdŠý~vms]*žçáa]× Âõa5õÚÆ“ …Bz0ÔõLF¿ã]¼pAè¬Õ‹frE"ü‹tf›„tvø º®è¬o5½7XxÌmšáÂg2:»Ð8Âþ}DSp›}<¬ëú­?ÿ¹þ^•÷uÖgôQg2ÌÆe}l PÜ]_±â=}Ù²s:P´ÉÆÇu=)ýúÑ 6ùÐPÑîÉdØçÌö_½ýz&SúÙÉÛ­Š[tl¶Ú³GÿN¿>>>É_1~[~&aµØÇ–DÄy<x<cÛëõÎès+ ëÕâÝÌ“/"ó.‡ÁfÞÂ(†%SnM¢‰Ô«áñU«°|ùr¶„†çÔÒÀôKaÉ®Å\£ÜsæÊ¶@i.<èéù7ÿÁòv׫c¸yß>¹ë.D£¦U$…¬Þ7í» KW²F…Pór‘V#‘poŽ8'0ûœ8 ŸyñÆó<÷Õ'Íò^è(FÊ&ÅH~]OŽÎåÏSÑ=¢³Ñ±™D"ÌÐÐÝ-¿þ5B Ö–<ŒbTZ¤°­Euƒõé~°È6 EÛ›GÉq›œW*è¦ïç÷>–GÌ)f£ãób^µÈl”€’¨ÉxœEr™ E"¬Ï=|øn¾ù]Œ‹Ð´y„Å%¹YÍK!9•îÉæ>Ü\ùo¼ñHò?ð Â4ùELÇlúãW,ÀÂsçð7Z1Ÿ=ÇœŽDĤ¤šD«bIDÜæÍ›!Ë2zzzàñx066†(_0_|é%ü»€Ûo7•™# Â,ÜñÉ&,¢ s–Οînf(<¶û1xBž}Ž M½:VÏÃÕÙ,¡4§rn~K—/Å;kÞAG ÃîŸç*ªå@$*S¯Žd7ær¸úÉ'Ku— ê¿ZG!ˆ§Pp†"ܪªÐ4 ’$AÓ4¨ªj1‡Ãˆ.¾D"QZaÔbfcWp‰ú³Y–ÄJ’¦8ãÉy TÞ15ÿZ¹þ)czM»8xžf€ª¶(õêXpþ¹ç€(ÿ†€( n:·0ÛƒçBZ”@Q$ D"–/‘ j`6ýñ%/â߯_…¹½h4MÓŒ{QššÃ'6ˆ–Dz¥©»víB:F.—ƒÏç+ ÿ¬ÄÂ>0¡³% Ï_2BQ¯†œ9ƒc‚ß0<E ¶Q¯Ž/>Œ5ccåÒ¤¼<pêÆSØôö&à“vÿ:¢U¨WÇ|ôQœY¼Wþô§åßÔkîo0ßк‘ªªÎJMӌȒp8ŒP(dl׋¢(¢(+DQ„,ËP¡Pªª"‘H  AÓ4tvv"“a¡ÞÞ^ƒAH’„p8 QdËÌŽ8þyîp+­’¨Íú7̆Ù蘷ÿôà ¼7Ý4óa°È"^BUEù"Õ4œô˜Z²–h f£c^ir ¼ÄóÿeÊT"-G€ru2Ûþ(¦¬˜âN€õ‹2Š«Fxå'¢¥±di*Ççó¡««kFÂ>û‡?`KAX>ÓTUH%l¢ À™ pzÅÆB#rˆöS«Žµñqœ_¹“'%IÂíÿz;–þ÷¥¥ùˆªðjgDýÔªcÀÀv-[65N O°¸™ú6܈,ˈÊÿ…ÃacMoo/”‚0ãñ¸ñ8ÍΫîînÌ)Ä¿OUU„ å¨5MúuëŒ÷wvvß#˲aÛ©ª ¹PºO’$ø _c‚ N80l‘HÄÈÉã÷û¹$IŨ $oO0´ÕÇ©KÇv|ó›øæùóFxmÕîDEÑq¦¡˜çë|¶Ey¤Ñ’ÔªãcçÏW|M; ôfxÁd®TEZÓ4ôöö`׺®ÄÜ žþøÍë®ÃÙ}…ÛáTx:­^«JÇíþ¥„°ÔW o_¼ˆ….@ÓxIæ¸a¤Q4áö}ä#% bKÊá.¹p‹-2’"„ÁwLŸˆ˜‚¢gø ëHT8è½ $.*úœˆªª%Î,N8†ªªPì·'†ƒLÃÙå÷û G\(*qn•{lކãN.st™ †#L ›*YŒŽŽ–|¹ ÜI&‚ñAJ“^ï<ÿ<–|ý놡¡J0ZEû™/2Í6Ý‹Šøoæ|SŠ¢×¹æòÈkãã¸{b¢Ìïºÿn,M…Q ÿ\ž1!\AÇÁƒP!v¿ÏÁúZ?ŠÉhé¶FÀ¢¥©õpvÍœ]¶ o(,9¸ßoJXA¹u—°ôèQhÚµ…ˆ J=/á>N=‹ÃK?<Å9ñôÓOã“7|‡êûâ%`cgš¼·žh4Š-[&=hó¡Ñ:¿ÔÌK7¹ƒ- N/þ˜G†ð¨¿ÑBÉî@ P)Æ1dÅ=æ%Ÿü³fGZ=ßIÌŒËî~„EµUZ‘§šþæ|‹£¹f6IâÏàË»8|^’¦Fsé* {_ À>›H°çøÄ‘Ùg ²÷È2ûóûy‚u RJ(þþžO}*‡;î`3>ø6n¾y9r¹Uxÿý÷±bÅ Àûï¿o}K‡ ¸ï¾Ÿáôé5xê©[ Ë ìÝ».Ü„·ß~ÇŽí·ÞŠ÷ßO?ý4¥ª*ãñÇañâÜxã1œ9ó=ƒèï_ƒÕ«OaÑ"à·¿} œÄÕW_¶¶}8thݧ®&¾û.ÖOšœV`Ëí@èï§wÂñœá¢(ÒrTÂv–kÚÛ%曢I¾dŸOXÓP(àXGܱcÇpP¡¡X. \À‚|ªÊŒæN«D‚¥— Øs²ÌœX“ms²qùöö–nóïŽF§F«šq@q¼ÀßÇp~?pêÔ©ÂkËûö=¿ÿN\vÙI„ÃßB0Çs?ýéð•¯|o½õ‰Gyª ôövc``¸ÐÏU$ çÎÃÈÈ9Üyç*<ÿüsX¶LÄw~ÿôOÿŠr5ü~?¶n½/^Zø­V¯^ˆ‰ àÔ©U8zt×^ +pÿý½æçí—$øþñÇ;Œß'Ië1’ l@_ß­ÍZÓÈ_r Ó$€¦[üÀÐ €TåÈÁO8 , Ño,7úŸ)K¤ynN‚˜„cqçÏŸÇÊöµÆ ···Ò[_­‚¢âWÏç¡(,š!bˆJý.ä=A@nAÖ¿£ÚžªªU2„ëAUÙ • »YËkˆ¢á°ˆÇíy`øã®±¾=æ¢ÑhÔp¤qç›yY¦ßï/YÆI´6†#ŽON«€,šük[P\åÓ|¢ðZ…~‡GS”âÒy^°¤»»¸-I¬3;ßD±è +F¡¡â6<«Æð¤k¥ Óš¦AQXEÜ@@C4… D 9 ·`ttÔÈW AQÞ@<ǽ÷@UUœ>½~ÿPÕ÷ ¤Àc­Âc}¥°‡¸ÿþÆïòû‡MíÙl´ã™g6ƒÁ?3ûý_2ß}·×x,¢É™¸Àµe_9?Óäãçf_ÔÉK/Å¢ ÅcýXH_-ÿ~sUTÊG8€zò$4]Ÿ²,O‰ 'ˆJ86GÜ1¯ ¯5"0‚Á 3$Èþ$\ÄÚùó1>Ζæ EÈ G¸—|~¤ý0ÌvwwÏ )QŽxœk ¬cq6‹K^}7z½,¸$ Ä£@dfŸ« š¦9c¢Ñ¨ñØ\¬`hhÈXÞÉr¼¢(A” ie™E=J`¶1_¾Éß“sÆi`vsƶ¢°‰N8Ìm,WÇ6U££Ååô~?sHñ¿`°èhãŽ7Qœ”StD£QhšUU±eË–B{ã±,ËÆcæˆSŒÇÜA-Š¢±[E ± \’$ <‹¢(• §[ÞH×_ó¸úÍ7KŽo`>ù)*/³EúΤýøq(Šy2ÂÔñQ>b·pb·.Ãå~6[BžeÂmŒmØ€U…꿌¥Pá6r¹n¾y-Ä_ƒE_¬2‘ [Eɹk¦ž*1;–9‚óW] ‘»*BÀh7žêÂ\´ "”ÌŽ‡LaT„€¨‡‰Ï}Þóç!ä dÄ2Y ¸Å ¦Q@˜sG–q'œ$±¾(ÒÄ&æë!‘HÀï÷ClÙ²‘H¢(bݺu.É1(‚á–$Ép¦™#CEQ4®!sŽBÂ\˜?¿äœI/£ìÄžªªaD8š%K–ý©(–‰"éplDÜåãã¸þäïà÷ª„Á¼Ê~PTáVÿäH!ú%Œß<ÿrZ®$Ýuxì±Uÿl€—ë“IÏu R¡«9uqŸ\ ¯× Y4LÇ X¬iâq*F-d|çNsQ‚˜-g_{ þ ó|aµ©€Ñ‡é6[Y °î s™}T<Š­~+sÅ^~ ˜«ó𛫆B!à “ÉdŒÇfG59Öæ.ùµk±ôÄ ,B³÷OP±/æQ’áDþíìYÅ erE‚˜ÇFÄiË—ãõ×ð¾—ú`Â…Ù¿Úå,òaÍ/×mv·ˆ j§ýàA(çn€T04:;;1¥Â9„køï5ظ;…;¾z:;ÙR;a–ã|ž§‹Gl¥(‚ƒh/|ìcøî¶#úX\x²`#+¾`üjã¢ÝÌU{y4Àò7g 4‘¾9êÓü˜®ÖÆsú4<Çâ_¤*Ú|91A8‘·öíÃg_@1¡$Il”ò_ÓâØˆ¸ìUWaçN4 ˆ#, ƒ"á±úäI\}ËVALÑv¡mÚÜáT¼/çJrš n„Ã¥‰Î‰æóéý_|nç[¡PýÑ@²,C.œ‰ø…y˜'ö:¨¬ BèáTiŽ·zPňr‹Çã†3ŽG³‰¢ˆáBe…ÉQŸäh#¦cÉÄ„ñx`+ì)}]UUtvvÚÝL‚¨Ê¢ñq\øýj„ìß3&ãâ0r*ÓÒT¢ŽuÄù^y’¬œ÷>°Ì«L.BxñEÜþ‹ÿ AöüÍZÆG¸mñbègÿl9*h5[B!–øœ°éWYœ½’-!©55›¢(%Ëë8<ÇAXE|ùrÜô£Áo*´©(@Ø@œyeÒÉ$ £øAww·ñ<ïë#‘ˆ‘›úb¶üvÍŒ­ZÅ6˜2±g.²AŽæäI¼‘¿njÞßXT\„ATıޏö‰ D"ÀÑ_EnU΄[X’ÏãàØ€Wº^¡ˆN•èçÏCüÃB÷§n=eT¬#êG(GœÕ,ÒöâèâvÄã3ÿL"‘€ªª$ÉXV ‡AX;׎ááŸ}˜ ð ôöoÖþ]ñx‰BåY–„ø<ÊM’$rºMaÅîÝüëë±nJ´¬iš¡Išä ÜÀ﮼÷Þ¹¡Š“\Ó(ˆ˜Ç:â´yó¾PÈ«àAÅXÚJEBTÀ±Ž¸î¯þ„ó* 'áJN}ÐŽ³Ÿ8‹“¿;iwS¢.¯_‘ÿqFÄA¸tÞ€}Ûª.Û‹Çㆣ"‰Ž8‚p'ÛæãƒÏ|²Â"áf‚¦iX·nVÉ—ç0 ¤oÂ.¿ürF v ÀéÌiüÝKÿ€¯ákv7Ÿ Ô®ã_úQlÒÚ‘PH4µG8ƒZu¼o_¾ð…Ò熆hÍa/µêøàß‚_a•—9ñxªª"‰  >š°žZu<â9ûîcé.4M3ò„Ô3ÎûÎüUˆßp/ýÝQl6–!ˆjX‡‘Ífáóù‡!ËUJˆäå7/·ûø„A­^³ä4îYІ¯}Ÿœp„s¨UÇKW°rÍR#rˆ œ@­:þó??Ib‘Cv7Ÿ Ô®cs4Ï© É‘AØJ­:^·î%ƒ@("펡f_€›oþ::ÞÁúo¯§|öDÍX—N§166†'žx@Ñã<ÝRñ< EhMá êÑð:}üŸ·»åQ¤_­_ŠOÝ6Ë„C¨GÇ6œÀ–ëQ$áêÑq Àòjišf,G%;©GÇ ( Ua'C½:>qƒ;Fn6ƒ¢áˆš±$".•JÁçóÛ]]]H¥RÓ~æžOþki²Cèëë³·`Ç.‹ÙÝ Äb±ªçl.S†_]ü*ÂÛÞnÒO'\ÓvRŽ»W¾…Ek÷ÚÚnÒ3Ûaõèø™gžaÍ`_î,§è˜Úá êѱ $ ª*A°-’È)ýµÃÔ£ããÇÛ^%•ìãRHǵéxß¾}PA¿±å£áZ]?f¸6f‚%qù|ƶ×ëöý™L/ç_ÆÁ¿=ˆööv+šX–ÇÛÞI9rGµ½Š¢àÈ‘#±eÿù|û÷ïÇ[o½eÛþkÑ0¼õÖ[8{ö¬­çŽôSŠÝ×4×ñÑ£GmÛ-:~óÍ7ñíÃ߯Šü ܻϖ6¤§µãÈ‘#8pàV®´§t­:Þ»w/2™ Þ{ï=;vÌ–6ÎÑ1µ£´ ÇŽÃÊ•+g”¨‘ÔcŒŒà¶Ûn³µƒÝýµ£7ÚÇï¾û.ÚÚÚl¿‘}\Änóóáû8›Í"“Éààsñƒ?ùr»r¶´Û)Ø­§°ÿþš¢Õ-+ÖP @ .ÄÚµk±lÙ2ÛÚqÅW`íÚµ¶‹3gÎàÌ™3Xµj•­íEË–-³í|œ9s^¯7Üpƒ­Ç¡î½÷^¬^½6l°­ ¤ŸR쾦¹Ž'&&l=3¥³³_üâq饗ڪcÒ³ÚqâÄ \sÍ5Ž83aóæÍ8tèA°µÍNÑ1µ£ˆÇãÁÉ“']‘7ÛÆ«V­²õžnwÿCí(Åöñ]wÝEöq'ôƒ€ý:>qâNœ8áûøÓŸþ4Àóe#ú!»±[?NáŠ+®Àu×]‡Ë.»lFï·ÄçóùL&ít: ÇSñý_úÒ—¬hVU¬ž%œK­€G}Ôîf“hõkºV_ýõ¸þúëín¶cpŠ~œÒ»¨UÇÜ`&ˆÉØy-‘mLí˜ }<7 צã7bãÆv7Û1´º~8µKrÄù|>¤Óiär,lS–åªÉ ÂI†‰¹é˜˜ މ¹é˜˜ މ¹é˜°ƒùßøÆ7¾ÑìpòöíÛ¡ª*ž{î9<þøãUgLÂ)†‰¹é˜˜ މ¹é˜˜ މ¹é˜°ƒyº®ëVí,›Íbll >Ÿ„M¸Ò01 sÒ11 sÒ11 Vb©#Ž ‚ ‚ ‚ ‚ ZKrÄAAAAD«cIŽ8§‘ËåÇ122‚l6 ŸÏg<ÿÔSOáÅ_ttt”|¦Òkh‹¹Ê†ÕíH&“e¹äXL·¯f bæTÒ0ÍÉ:nVHÇî£7ó¼9UÇv\ÓÄÌ!O…úc÷A:ž õÇîƒì㩎G«'ùLœB#}7-—ËåÐÓÓ€UH‘e¡P‡ ‘…ÃaȲl|nº×fC,Ãàà`ÉsV¶#‹!•JaÓ¦MH&“ˆÅbU÷Õ¬cAÌŒé4 8_ÇÍhéØ}Ô«ãfž7§êØŽkš˜¤ãòû§þØ]ŽËïŸúcwAöqùý“ŽG+§ùLœB#}7 ìþ1V“N§áñxÐßߨ´iî¸ã¤ÓiŒá‰'žÀÄ—L&á÷û§}m6Ȳl”I6·Ïªvd³Y ãÇ?þ1桞¶^¯·)Ç‚˜9•4Ì_s²Ž›¡Ò±;©GÇÍ.…tÜXZíØ8Égâí»i¹ˆ¸ŽŽlÛ¶ÍØÎçó€T*U¾ÜÕÕ…T*UõµzÉårرcGI[¬nÿ¾l6k|ÏC=4í¾šq,ˆÚ¨¤aÀù:nFHÇî¤7ë¼9YÇV_ÓDmŽK¡þØŽK¡þØ}\ 鸱´Ú±qŠÏÄ)4ÃwÓrq^¯^¯›)…Bx衇ÏçKÖìò÷˜öµz ‡ÃضmÛ”ÒÈV¶#›Í"›ÍâᇆÏçÃÈÈúûû±yóæŠûjƱ j£’†kõÔ®ãf´tìNêÑq³Î›“ulõ5MÔé¸êÝ é¸êÝ ÙÇ¥ŽK«§øLœB3|7-爘GsçÎH&“ضmü~I“f388ŸÏW’äÏÎcÁC–Óé4úúú°yóf»›ET¡œ†­†tLÌÒñÔãA:v¤ã©Çƒtì>HÇSéØ}ާÒ1Q/vûLœB³®é–[š }}}ÈårxöÙgÚçó!Nïá뢫½V###„$I$ I’¾hU;¼^o‰—Öçóëž+í«Ñm ꣜†çë¸ú!»—ZuÜŒóæt[yMõA:.Bý±{!¡þؽ}\„tÜXZñØØí3q ÍòÝ´\D\2™„Ç㙲¾—¨\.ÇY–§®ÜkõÀöq$I‚¢(Xè§UíèêêÂàà ñ}©TÊ™¬´¯F·¨Jœ¯ãfè‡tìNêÑq3ΛÓulå5MÔé¸êÝ é¸êÝ ÙÇ¥ŽK«'øLœB³|7-çˆã +¹7“£( ¶nÝŠžžtuuA–eìÚµ ›Q¨ôZ£™n_n‡×ëEww7zzzÐÑѱ±1|ùË_žv_V ¢<ÓiØJýL‡•ú!»“ztlõys‚ŽrMå!OÝõÇîƒtŸ‡,Ëèëë³­C£Î—˜Û·oÇöíÛƒƒƒèéé±»I¤ã¹ƒç2›Íbûöí±ûçDS©§ß¦kpõÚÌÍÖ1iØ9¸ÁtÒØãd»žhMÜp-7 rÄYÄŽ;‰DÐßß'žxZVt„;à3O>ù$úûû±k×.är9š &\I(²» ÑtœÜoÓ5ḨṲ́a¢œ¦c'ߢY`w즯¯>Ÿƒƒƒ€þþ~äóyc{Û¶mؼyó¬ö‘J¥ÏçÑÕÕe<·k×.ãq2™D6›EÙöùý~Äb1är9tuuy£ˆÅbÆo'ÜI3tÂgíÚÛÛKžÏçóªë¶Ù×ÕdHÇs‡FŸËX,†®®.¤Óé’ç a+úx¢qÔ¢ ;îíÕúí™bÕ5HØÃtú³»_¬f3×B#uLv6“ϵ›uìôûá<¬Ð´Uöj+çZ>".•J¡½½Š¢ "‹Ûýýý†fC6›…ÏçC,C__úúúJfD¼^/¼^oÅö¥ÓiìÞ½»wïÆØØXCg.¶oߎT*…­[·6ÿ`M£:ñù|ؼy3úúú‹Å …ÐÑÑM›6¨®Ûf_WfHÇs‡FŸËT*…ááá²ßç 7»'K-º°ãÞ^­ßž V^ƒ„=L§?»ûÅj6óLi¤ŽIÃΦܹv³Ž| œ‰š¶Â^mµñ\ËGÄÀC=ðûý%Û>Ÿ¯ìZþé’ –óÞf³Y¤R)x½^lݺÕÈ1ÑÞÞŽ®®®’Ù’éÚçñxÐÑÑQö=µ¶ `b—eÏ>û,<µh8ÕtRF€â Úä³™êv¦×U½m$Ïfr.kÑH.—ÃöíÛFË~ŸS4<“>žp3Õ…]÷v r¿]j×`­í©v öQIv÷‹Õlæ™ÐH“†M¥sÝh7zìW 'ÞgÓlM›¿³ºlÅñ9âê ½½½æÙ¯×‹mÛ¶ÛÙl²,Ϩ3®4£3›6¥R)#DU–å†/$¬§šNjÕH2™D*•®]»Œ1 açÎM &·.3=—µh$‹H*•B.—C.—C:†ÏçkÊï¨÷þ@´͸·Ï¦ßžÉ5Xk{츉™ae¿S«nfc37ZǤaçb¥ 8Æ~vØõ„si–½ZÏ÷¶êxŽq°iÓ¦²¡›ÙlÖ¶6ù|>lÛ¶ ]]]ؾ};ü~ËxŸ‰™‘ÍfÑÑÑQ¢‹ŽŽGåG!Ïšq.=²Ù,vîÜ €i:—ËÁãñЊ˜“̦ߦkp³µ™­cÒ°sq² èıŸìz¢5qòµÜLÈWù|¾béòr³>ŸÙlÖ˜=Ëår6¼½ü¦>›z­mââöûýH&“ˆÅb%³6ÄÜ£Ý&“IÃàä³Á<乺mIÇs‡™žËZ42y†—'³åÏ;AÃÄÜÅŽ{{µ~;NÃãñ”ÕžÉ5Xk{ª]ƒ„ó°»_¬f3Öê˜4ì\¦;×Öq£Ç~Ói¸™mšîþ@´ͲWëùÞVÏ‘#®¼^oM7`ǃþþ~ôôô «« cccðù|Æ:ëd2‰t:=«Ê#µ¶É̶mÛÐÓÓ¿ßOÅ9L­ñûýH§Ó¸çž{àóù066†ŽŽŽ’Š€³ÕílÛh†t ¢*Nè§³=HÇÄLp²ŽíÔðt×AÖAqR©”6¯ªCN„tKÌUHÃD3±S_•úíd2IÑÄ´8¥_¬¤a€tLTÇé:¶SÃÓ][AXEÄAAAA„\bw‚ ‚ ‚ ‚ ¢øÿ Sëªj%tEXtdate:create2019-08-28T17:00:49-05:00jµ°*%tEXtdate:modify2019-08-28T17:00:49-05:00è–-tEXticc:copyrightCopyright Artifex Software 2011ºÅ´1tEXticc:descriptionArtifex Software sRGB ICC Profile †2tEXticc:manufacturerArtifex Software sRGB ICC Profile\~=Ÿ+tEXticc:modelArtifex Software sRGB ICC Profile1(‚¡!tEXtpdf:HiResBoundingBox1469x828+0+0‹QÊtEXtpdf:VersionPDF-1.4 G:xIEND®B`‚blis-0.6.1/docs/graphs/sup/dgemm_rrr_has_nt1.pdf000066400000000000000000006335641360743507500216130ustar00rootroot00000000000000%PDF-1.4 %ª«¬­ 1 0 obj << /Producer (Apache FOP Version 2.3.0-SNAPSHOT: PDFDocumentGraphics2D) /CreationDate (D:20190828165854-05'00') >> endobj 2 0 obj << /Length 3 0 R /Filter /FlateDecode >> stream xœ¼Ë®&7–çùÿ°{PÇ ^§ ´ lø&ÀƒBAW• n¥PÖįïµx‹½vµ2Sб¨T2ãƒAn~\\ûÿ~:_þÿø?5Ô×ÿþüéx;Cê{ÿ ý?å·³…ãü;þg¾®z]¯3ÆWˆç[9ã•Úü™×Ž·£ÿ¿^Ÿ?5òW?Ž¿jíxkýÿ]w<ÿB/úû§ÿõé'Tû”ü§O¬Èü™óõó__ûôß¿º¦¿Çoáï_á:ñÜñýO?ö?•ˆ?°°ÿáåx-±õúþðéOÆ¿ÿ˧ðú#nýP”câuÿïÓy¼þË»ÕøŸ¿ÿ-ñpŸïÇý½oXÏ7üèxÌõ¡½]µ÷¡öû×à çÛªÀüóÇÞ?ãÒ]ùZƒÙìý½¯?§·|ö¯ö:Ýhõ~ûùǽûhòqûùçÇýÿÃ!a‚ M'¤n‡òúƒ¹¥©Œý'x%'FtÜýj|m#zJ¨J|>1ºòæˆþŸþãþ¯ÿí>êc'È_i€ó|ËÚ·âsÊo蛨O.þ®üŸ~Ë¿üð×ÏŸ¿ÿé§ïþùçï?çŸþñoÿøM“Švæã—‡¼¨ØZÈQY};|Hg«á÷l¸´äÚjè‹„ë Šø /(àaSH-¥Q„fÂ4œ-ÞÚÛyfTêìEéí±G}g;ßÐC ¾Ù4‹êÅ«0מ%¿¡WbÚmã2¼µ —Å4†àxÅpÅ<îVÞbäËF#œhèt¦TZeõ ÝZ:Q“t¾µ”SDÑËÚzþq\¾Ùˆvi=¶ ,«‰×ñ#¹ð3š=ô`)$üÓÌërFžáºBŽg¿·ˆס²ãº#x%µ—…·Pp]d¯DC×€J^} _{áu u¹.tõ\B­‰²Zy]­¸.áKjx)­E„'¸¬$T%”7¼·|Ö0~2½E Iݾñ¼Ðíû££•BKþ‚cÞÖ…@¼Ôñåíž_Ê[)µ¢2q5v•ZX|V:*úL/«oåÀ¯ æ¸?YØÒyüdãKY£#^ecG>ú[Ï9ÙWÊůßLlõ,·ã*±£˜ûðeÍîŠ6¼Øh1­²Ê.Ò»+ßgCÓ‡Ñ%kÁ'»F؃-Šðd¼ÎZ1ʬÚþ€;jÙÞFÓ÷ÞŠ²Æñ¯7pCÑCFge{¸ }d–µs¯ìäxIÐ{MZŸãÖøšù#©¤Ô_gC¿Æl3z+ÆÐƉ,7Ó®þ°»·VÞ­ŒÞÓ°â»ÖøªO‡¢vÅ»³ÚkøÜ9löΊ§Ã†à¢ôw×0òÄ5¾â'ñöÌ24Dšã+.C—hèkãé*#•ÕYñ“üVºÆì¬ø¼Ï5î³ ŸQ^ã+ª‰OЧ<Ø[*k|ÅofÎdµˆ×q¾]%õâý¢Q{€ƒQf °¸‹#ÄQÖê`ÎeŽÚË®·ÒÖË*"Ì„\zYD­Öëï—Þ®I‚·ÃDu¬öà€Y%βòvœk€Åose¸õ걥˅¨oÔ7?÷øLj3æ>4î°ÆWtÌR˜%údt¨~¯ RÁ´:ž€ŸîµÇWÏYV¯5¾²/°…y⫸XÞ/%,#úØpa ›3ÒÙ|èêµ?g½´XÜüäx~ëÐɰ˜Áñç,«y°hêvá›óry€½ðaÇ2X†™‰u`¯cjY,âłϪ•YÆ× °¹p¦Ži–Õ{ńߌÇZ¾!@ÿØc,ÆÏÈÙ¸õ×Ðñ™+¯€‰jŰýÇÊ‘2†³^†ùçØc¬ l,kçcµ¹f fŒ%JqôLõiE±|mÁû3̼a±œ::ãÑú‚í¼öË爟XÖvËÛF'e´b«Å¢=jåœÙ§Üëm ½Ëb̈¼uŸz0`¨Ù£,º:ZaP¿×sG±'CÌÈ@vµÄòµøÆó¸Š³äcñè»ýº^† »ì1VßÚ…øjǰèicP=úȆ8åÌxf¾ŠÅZ›e­®A–³YâÌ2X;ˆÅuø*P“Ô[åÂ@·ƒXÔ=’‘D/ªœJg‡eÁ˜¦ã0cÝA¬‰.¬Ž;„Å@‡µ]BÕfY½CXÌ4'»r}/|-¹Çè11"tâ8ËêŽa9a4Œçc±´Ê;†eÅGÀ0Â2,­âŽau‹ž±ÆXŽ‘Í2Æ',­Ž;†EgN×ÎQO”ÕĺçKüÏ=Æb à’lvu¬­’ b9£–Q‚ùj‡°Œ7C¬¦^‰í¾GX†bìè)Ͳ¶CX¾tL‚1ΚàËÝ!¬öXY¥Âjĉq~¼W]¬.´?xîqáÝesdT¯YÖî(e_Ʀ^Öw|s`e•v‹Á_èZc ‹qf±Œiù‚Gƒ`euÞa¬Äô,kw«ã–Ve‡±…a •UÚa,/Ãbä¨ã#È A÷‹Q§±l>XåŠuõX ý‘¡Äè•™AÚbñµD6$F°^†ItDZüÊ0g bëu!x¼Yí XYÅÈrÕ•8MŒ©K«°Y\‡xËÞшþv ËßÄ$š0¤Í²z²²´B”qd²Dæ¯ «x™!Ö¶VaDZ\g\ìBñ\XX;Ž=ûÛBl–VY]q,ÍÎ}aa•wëF7,­bjï÷ØÊÕµdm_ÁÚê(fµ †²ZÆ ËÈwÅœ•Çdñ-Fvò1Bauw{rχ jô¬®ÂŽc9z|êÑ’X];Žu£ÊÖŽæ£sÕŽcQŒ¸ÇêBX]¥ÇžŒ:ZDÔ8ªR1›ìAwak…þ…T‚µ=È2†ÆGs1eíd . ­Ù뫲YF}ÉyÖWéd{Ï8V¼õÕµYÎ/hÀ pküçdÑ'þUõDY»YŽHûZC†š5Ærb@¤‰ø¬7 ÖWiDZ‘k¯†YzÄ{X_]Çâ ˜‹(ô3¬^ö(Û£±ð@³¬­8–OÎ…É !ÆÚqìÙaÃYÇüŽåUºÃX‰šøà„59Æ›=Ⱥ²ñWå1®õã›6-ã=_×yƒÃ¼Ê{1¾æl}Åå œžÑ!Ú ByC_ì#Èš,×`^LøŒEq°Ãy"i4^ìT—΋>Á{ã[žLÖp^ü6'©–ëà~–ó†3߆㼡÷÷€Q:ŒûÎ0Àí‰ÏqÞÀIž-ÿÃqÞÀU#Gˆ6Y®á¼Ñ:>óËã–ó†ƒ!·¦ÆJÝrÞ€Ejb@u„U¶9oÀ:;$Œ¨%>@/Äx =ÊxyÎÛø *c #œ—?1®ã!Æ#ÎK&°f¡DËyñ‚1”àKÏOÌK’oØn1//Ã*“¼<æm\sK`‚Ù‰yt‹Òý½9/GƒŠÏ"×ñs–óº‡Îˆ1|îÁ®r^|ò±¢Xì¼çmä;|VÎKÚ‚w·¿pÞF0ê´'çÅí2©¾·â9/[òHaÒ1Á¼}0à1šyÌ‹÷vðƒl“Ÿ̋ዱ{ê¹Ç¼º'c1//C¸ŒæJã2‹y]C[ÌËëp+Läï`^÷b-æåuøüµ)#˜-¶)Žb^^†QÓA^(÷Ƽ¬&:Ë©ó>ªiA/®ÃTq­ÝKzûu¸ÝÜàš*.ôñƒZ‘-„ôúv´¤—×a˜í6¤·ß¨ iã÷¤÷q¿›ôöÛ‘ì¥Ñ–ôöË0âÖxž/Gzyþ΀Àô¢9pIÚðØÞÞ¹°~Ä\4Ê,éåÛ6Cõú·-¨—ÓÒÜ‚³¨ó÷.ŽôrX»§®ñä–ôòÉ3‰á`UJzÙ¹ Ÿsô‹zy½¢öjQ/¯‹¼ýÄÀõöAÐ3ŒI‹zÇF;¦û ê}\fQ¯ûõ²Ã2Ì c‰&¨—£ÍÅ8EH¯¾S%½×÷åI/®Ãôz`ëѳ^\‡ØðZ±™^ý¤„ôº ZЫㅂ^_K zÙ8éhcAA/+y )aü¤½:Â*èu× èå( ÿÝ*»Ao#AW;[ zù~3Ú¨ŒÆЋ.ƒÇ^{Æ z7üòÅUÅ˃^^¹S;(¤€^? z¹BÁëõ¼AoÿÆ>ÂX[Ð놽ú(èu·ÐËúVÂ˳^WOa½:ì êe5 9Ôë¾A½îñõºÇØËëŒPi¼!½þ~öºøÅÂ^×WöºðE`¯»À^¾ìõ×MØëº¥À^7‹ ìå·˜¸´£¶À^7~ ìeOU£•-ì%r 1 ƒŒŸ´°×}ú{yå µ=a¯¯¦…½þ: {]à#°WqÂ^^×Ê‚f {ÝýöºçØëÚÅÒ^Îhx_º'$6´—e'&Ÿ¹Ñ(°×}{Ýð`a¯ë_{]ÿÚ«oAi¯v¥½þºG´ð-´—Záóõ?&ò­/,8¿ ûÆXTÔöž¸·¦>ž=±oß[LJP‡@T°ïÁ½üŒ³£rÁ¾\ëãk ±>å½èz{ȼ×!;‘÷V²…Ìw3ʬ¼·Rÿz–ÙÛTÞ‹Ï Cµ­¼·õY«¿×—þXia â6Ç‚Š¼·±Ž46UÝ‹ÇãBŸÝSÝ‹ÛÕûà›ª{q],è6«ˆ¼_dCêœ÷}]™•÷¢š!4,úëb»7öåTŽ2"N•÷ò#ÏèyPå¾ѾëñDÞ‹²Ôb p*Ø—“úß9.ÛØ7²uðÕøz`_ÔDÜ=H±`_W&Ø­Úðϧº·‘žãmLíœP_ – cÍÄ>J}e…÷ ¾7+¦¥ñ`B}ñFÄ[yÈX•újïRì›8û¯M;‡} ¦¼üT÷ò:¾€ôõEQ͈’g™U÷cUBí!RTì+ÿª{ùè‘  „×ûrdW8Ø× ûÊÀ*ê^üdå‚üŠ}QÂunáï-îõœ\¨o¦:0ä N­¼—sâÁ-¡9‡º×ý{«îÕùNÕ½hü£ôÝìóu«BU÷ò©ø¢8®3ê^ÑTÝ«ó¤ª{ù^ðɯ­:…¾Úö"ïuO.Ì—ŸúzˆYä½/:æ[ˆ8 °ë˜/ÿ<¯cO¾JДóÛWèK úr«Cúk oßV«\+_õõ€¾ºAL#×CßÛ×ß•üsʉúrðÅ›-cü2Ð÷ñ}/6-ÇÂú² CàvþËA_÷è}=²›Ô×Ñz¡¾} Å%ó±Ô× K}×êëö>,õÕ}¡¾_4Ô—e9¡ ¥¾2ƒ}¿i°o¯åÁý¸¡¬±Ø·ÿ&O6Q[°o/«œÞGxj±/[ìêJÖq™¡¾Ÿ4Ô—exÝhü!F²Ô×m õåNð†ŽúößÌCña‰¯o.!¾è·lg…øRŽƒþXÏEu7ñ}´ˆ!¾ý ÌXS‰d‰¯o’aMÀ&Ä÷‹€åA|ý,ñ}ÜÈ_ÿª-ñíí„__šeC|mZÚë:±…½ý“¡Ø?)‘…½¾3ZØëö%öòµàS*T]¿íõ×YÚÛŸúŠaž#Úë[ÒÒ^ûØ–ô>®1¤×·¢E½ò{ó>®1˜×!óúl1¯{3–òú§¥¼þÉ,å}” ÊÛ7 Fýc¨Š-åõã­R^™e”—Rñ¥W´”×woCyû“E<»”×b–òº=t¡¼¾¯ZÊû¸ÎP^?’YÊû(3”·77 §ØP^?Kyûî/&Ã¥ ´”×?ƒ¥¼ß4”÷QOCy}ç³”÷qÝMy{ÑxÖç!é}TÓP^@åíMƨgH™„òúßœž+²Ù/·GáVÊÆûø-y}¯´×Þòúæ°·¿ž‰ª/¼v$±€÷Q ¼:©YÀëTVxí½,ÜõÍaé®ï–îöæ±³%Ÿ5t÷ñ›†î>~ÓÐ]ß-Ýõmlé®Ó ÝõS¥¥»æ2t×WÓÒ]_Kw}/žt×EÇBwýœgéîãV†îÚ7jÉ®o Cv?gÈ®o|Kv}ß·d×É–ìúÎjÉ®¿Ÿ%»¾õ-Ùõ‡%»~³d÷²–~ahýr²K¥5õ?SkÐîã Ú}<ÃvU1h׿>‹vû² ‹ˆ ¸ÔÓDSЮ{µÝE—kߊvvOüû¯áº™+Ϋã§×å÷{.ÍÈôt,… çº™Žß‘óòÌdd£^«è–ó²Qq»)öW9¯¾C•ó¢õ˹9©œ—ÑÖžø\÷ñ›VΫa³“ó:-¬•óò<(—™ðVä¼®Lä¼î~NÎ;HØôX9¯*þ,Øuß¼ÓóªäÏ€]3Ò9-¯»•º,C×åz b^U#[ªû(T7tÿtÊ8“–êvš’j}Ï´Á}–BuûobI€µÕë:Va±®§0ëºe­`]~…F £}-ÕåeQR¼®‡–—e䝸ÕAŠ Õe~º®½Ku{Y²è×^Ì+ }Áº”˜§Xj7×ùÑ«y#¢JÌëëößD?ng}`Ý~þ«\e)}ëaÆðDG¶2­«š·Äç‘|‹u{Inì½~tj^w™Áºývé\§¾š—'å®:Ïp×e³à뮋v«š—{0ˆ Œr^­…Êy#‹ñäÊí>®9¯ 4~ôr^FËXŸ—i—`å¼\?' &c“H弉§•®y|ÖÉyÝuí>®=¯+3l·a8ñ“q4¤êy¹sªÜܷݲ»NØ®öYe»±Â:³îØn¦Jž;‘Šv#}âf©½heÊ ÚM¯Ö]¯Ö]¯Œ'ðºëðºJXÀë~R ¯{.¡¼2CyÝo å•Ó`Žòêo*åÅg–ó:Aç(/ÑB¢i㡔ו åÕº(éu× éuõÒë®ÚëÊ„ö&žN<¥úò´× íue‚{µLq¯6§â^W¶p/¢òÊS&ã©â^”af™&]÷ÚÒÑ^™çí-œ€¥àÚ›¹ÖNsÇÝÑÞ¾ÞªÓšEa¯ ëõEõfžYks¿Ý¡^Y":Ô+qC½î:A½z?E½œ™cÀ_ô*¨­@ÛcH.õººêu¿)¨÷QfP/OórŸïðî Ëõº2A½úŠzù’Ñ›ÛÒ)êÕß\¨W+¨¬×ýšÀ^Fì4ÇjÃ…A`¯«¡À^Wfa¯»À^töºŸàëO€¯~ |ËÛÐ>OÉ«_ùT•ùjU”ùj‹)óÕÇSæ«Í¢Ì×ÝO˜¯»N˜¯»Ÿ0_m2a¾®H˜¯«Š0_}te¾ZMe¾²ZÞ¹D˜¯»•0_w0_­¾r_m-á¾Ú"Ê}1äܶúV¸/Í-Ó9Õ:Žûf¶ÛõL¹¯ûMá¾tÀà•ŸŠÞ>O L[5”û"ð£IFšê[á¾üÐZ^çP”ûr0;Ë’Œ+÷­Œ uØG¹o¥ãïöÈRîË©ù$½ï”mîÛmÖò’t*÷e(Â9Árßnå…A ¤‡‰(µÓ©)÷펡£ÌôšÜ·û½¤åˆòm§‘ô~•S/¦Mv^¼¨ôõðÖ@ß³òðÛvÒP«ÞJv0[^̋ɓËÖ­L1/gÔFÐ7–»"æM´OéêÇw¼z¹K3¦qÞZļÇ™¼Nˆ¨˜÷â!Ö¶N¸©˜—±\Lûp¾zõ"0Ëey ©˜7ón¨-j^LtŒMEŠªyYví]=UófzM,»Šy3ÏAsdØÏŠ˜—3?Íë®Ñœ"æ%•]q›2ß3ñ<ó>Ø£b^”Ñ*kIðDÌ›¸ÞÆÓ…òó2xÉû̃ªyy]¹ÅŽKÍ‹GÁRô\”OÔ¼‰¾øtP5oâ¹£¡µj^T#´}œCå¼¼îºõ‘VÎë¼ EΛ9&lå¼(CgŒKï.r^F5ÌÇÓ¬eyÇ‚]弉'êÎsÎQ*çåãå[Z,r^¾ašI†ñx¢çM f(ÑÐNô¼|sÇ>€ z^¼b´ï:XªzÞÄ ˆ›Båiã8ÓÝ"OÑóºŽ)z^v›ó>Øoõ¼üDPë"zÑófº[Ý:Nôfò÷{—@½|µuï(«¢7ÓêìÖjNI/ÿö¼÷DÓËZФgj­EÓËÈù¸÷1DÓëka5½ìÀùÞ!°š^ößz{Ñôâ2L+XÝ_ËãáÖôæ¸ÃqßÇݬ¤7sÑYI™Æí¬¦7srGÁ˜ DÒ‹¢L5s~º8¸"ë׋Í¡AüzYFã±0Ýn­_oæúšó̺8ð£n›F¨ao&d¾¹¼¸8°¬"l {1TZ Íïʺ8`,@5búò'óaÈ®uqÀOž¥à+žöÇÖÅcªk”a¢¾¦SØrqàêhÝn8.Xmuq਎íš*.…éò`V‡>èœË7_L ¦ÀAaÔݺ8Ц¿ñ‡Ïœ¸8pQ6¬äâË1_>]ä†gœO`mðtµlmx?*W¦·…Ø8ö^ž³­‹ëÞ6™ç½´‡L„r¯ÄÆÁµ¦Ø8°,Ýñ®Ø8d®Žk¦6P–­¾í˜/Ÿ!ôòà‡Ì£Á²†0~½ø²0 a¢žØÚúõ.ÓyÌ$–[ºx80غ6„ übmèÈóиX8°,s+5Ï8¸.+®*bàà~S \“M¾ð„hÙ&X‡[ÎçÓÀe¥žXùŽ3qpp )¾†ÆÁÁõqp@Y:¯m+¾A¬ƒCîÁÿæNâÖ[ùžÎ8 ŽÆsòS5l X–·Ï¥8ð73ZåÌO.’9/ç„ià€ÿ-Ä…#\ÿ†þOÊ-»µþ .ìÿFª‰ç?Þ1ëÅobÖCÇ̳^_dýêõvŸÖVûüË2AìP(c›“ŽØ7(‚Sû׃ľ‘Rá.üžÖ½¡p%Öpuo`£ð´ç|:qopÁ—¸70V ¦½ëuïN¬z¹z¦!Ä”7‹{CÇCÚR÷ÞZ÷|l‘÷$¾,;ÉÊ–*Úº70ú:¶i«º7Èœ«æ ìØt ‰nÞ€çÅ(¾¼ ¾ôþFó†Â„a£Ó¼cÞpDÜy-X¯By—™MMN-î­ åææk™Ôl¨'3S¤áæ)¸—k·x‹î÷’áVÜn¶Ìz2ÊÛòNY/_wå¡£ºxîÍzOJÛaOYo`À-¢Ñ+…õ‚HªM-¬—ץܾëuó€°^έXŸæ üÉ|µ:ók(ë œ„Œ®e½°±5ű¿† `ǦÂz5› ²^܉k{\X/2ñÓÌ¿fao`Àÿ˜ÈÙÀÞÀýLc¦`a/9¿ûŸ°Wß·Â^f•ä³ÎjZØ‹ÇøÖ8òK ìí¹nCb½îñö&*â3Œã×{ÙxˆkæØø#ôS™ÙË$9±Ûí '°¿8'Í„*{µ_*ìÕï@a/ŸÏÊZØKBzìÝ*…½[ø±®”\{ùxm ¯—g½:p(ìÅÓᓟÞì°—#ÛQV  °W“ƺÜlúÚöfò!@Î}¯j÷ê'¬¸7p”ã)Ò¶œnÜ‹ß?ÍFÁ½¼]£ þ1´¿÷ölV–]å½l,,›¹¿„÷¢ÏF‘jo`ÈIJSgl/»(óíôª |™|¸´nS€¯äŒVÞ‹"Š'ðVý›Ët±x‡÷j>c—› ](ò†Ç2å½y/?xsöYx¯»„÷º~"¼× ÛÂ{Ý,(¼—¿i\:„÷ò:rðZ†½™i2_ÛCßëÇá½ Øî+/šå½.,Þ‹ßLÄ›#éÎâ½L‰m<Ì…÷ºQAx¯¿“彬=7;[^Ú_“ “/~ãhcô²¼×µ±ð^—§Ox¯¿dg#¹A0t=õ½~JÞËhµ2ZjC_ir6R·³/æ^ž÷º1]x/ßM ýñõò¸×MÒš›MŸNp¯›^÷Æ„ ^Ú’ðšìl’á]³³iü%¼×}<Â{5(Üë:º&gÓ€aá^÷`Â{Í´g°ð^Fˆ‘sãè±Â{pÜ Q”÷ºÑòÞ>PR®¼,1óíÜ,à‹²ÈEþH¥À—óD¸w¤ø2HÜ‘Oà¸/{Ãq¾;uèmËË×_Ìèd µ>íz¯Ð[ܳ^ÌógÄg“ã3-~ö¡Sk¥¬—“2ÖÓÍZX/¦òD„¦ã®e½ªàTÖ‹²½–r¬—³E“Ó¨CX/ %{ÿ8!­¬—,Ûqe½‘.ƒ´Ë;N½Ä{ü¼Ç©ý{©Ke :òª~›Ã=7úí #Îëó§Œà4ô’êâ:ö?üýÓß¾(|½D9~A9ü^5¾‰aÝ-KœwìOù{ß>·i>æú|\×½{ó;× 1Èž˜þØû—«ý˜ÿñ¡5˜ÍÞßûúsº÷)>è ôÛÏ?~èÝG“ÛÏ?¿sÿñ‹?ü;õ¸z.‰bGŠ5sO>ž¹–C•þå»ió‡ÿÇ•×wŸ_ú'¯ÿüúóë»?~ú×ï¾ôN12² xÜX¿üNñî”»{lSøe7ÊþF0Dîaùwî.k(þÀ!Ò<#UºëÌßGÍ}ƒáš«„”}Lm3,9_ʘ¢íÁœEúˆ·Â{»·"•øØ·Òk#oE+óÎ[ùõ'´ƒ0 ÇÑÅ€=—xBUÓsä9^i <_=¾¹Û„‘îïXE<îÒÖ]Âo¼MãB&ž±Û¨üòm¢¿Í¯Ü¿ö Ω'Ú=w¼Ý-ø?ýùᯟ?ÿÓOßÿüóÏßþÇOõßþñ›†íׇ‹Ü}·úˆÊ8Mï8î¶¼R”Eî¾ßÎ< ÆãÓuÄäÙ $ÈÛÒDž·Ù j¡ÎµßÄ¢z ‰¹9°Ug¨ó[Þd™Ûí<ç0d'©éq8ü@™“˜{®Òfm1ÄÜSk̤¬YT7Wf¢g*`FÂV¬¬›„åשq°Ú@¹û¦©1leýé˜zcÌ£svv³¼NÆE&eÙ@Ù½6f°Ù@™.rÌR9È£ŒèN‘‰¬e\—2¢ïQ74¯+·4¢[·0•üpædš‹ ”©Lá¾èµËni ôxÊ$ŽÓÛ#ñ-ä9±®ÂÊŠe¸ÂH#¨z +¥îÞÝîËÓ¬sƒ$ ÿÇÕcûNC¹ŸP—>…Óh#XÆѱï&'î—069bée…[a¼î㡟`Òˆãîp'õ}¼ƒn{ãÅa&èêÆ^Ö¿ì¾!×›ó¢YÆØCH¤ä·6‚;ü”Û¨K6Ú>;»åÐp¤PŒ6âä®7Ƈ!…cÙ­pßO¨}c­rŸøì®ÌÝÆóaZ£¡Lâ½R*¥‹ê<_»:Ï¡÷°þxµŽ=z/=¾Š3æ,»Å¤LøªCìc)æC#ŽÀ?FR]·:€Ù|â162÷Þ·8ââ¶õÊyÔ/qÄEÒ^•Y†jvYn=Ô}J.qïý¤wYÔU?óÑçðÔy}š]V÷É1:`Ä:f—%6j\׬8‚r—-õg™GÈŽ+Žè+úDôû!è7âFƒÛ¿-ÅÀi³ËêFNŠýK›]6ð<(]fûáF–¡ñf— éQ£5†Ë»²~tYîýfžR£CLVÁw²kSÌVÁcÖÇšÑRßXˆ÷^³Ùf™‘GPÊÉ-ËÛ§ˆ8‹AJﲨ0ï©)Òò¤Í.K! ­.Î1Îrž«Êvù}±Ä-øó˜]V÷ÚR Vqq¨X>¿,3òÑ¡ 2úîÌóPHž>ÂÕ>%£à§rî#‰þ×gù©d|ýÃ]œe6ôNºj|MtxHkœ¥($ì³Ùè»k’é´eˆKÝæà¸;-ÖÌeú! ´ú|Ëú ƒeF Á›¯S¦)ŸVÑW1ú")aqËf}½ÿ×0‹oä*4§)ãvtYÃ,«ÂT.q–Õ¶†Y\W9<›ˆÄ-ø[ÁóÔûPbêÄ·>‚ûw”òöO™[ð·<¢R·1O[&îÀßêQذ¬k¥ê6cJÜkUÕfp¬wç ËŸ<é±Ò=0R9»6yöX.JèÂÚƒàÄS·:BÞA V¡5ÔÝ·b‰VÙýq& ÅWoÅøëÀþ—…Ù_Qê>4Å2./Gå!ßklÀ ö6&žÏ²ÊåÛûƒliV‘¹Zƈ#ºß;õTcÐC™G$xCxÕûéÜÚ†Û!;qþØc,>iñŸ!7à=Æòæ²±IÜ?Ík?~=ÄŠ|ˆe·8‚×£h£ÌœzïÒ¢ùucqe„¢ZÃÚÁ #(³Þ®X"ØÃ«kÉn´:k¦Ð+†ÎRã’<íΊÏ}í'fæÌ‡é¬¦×auu+#8‰ÇcEª=Ãã]ElŲ!ŒY⩜[¡CaKVAIÍÜGcuvxè[…¤‘ÿpöV1¤ž z®(»-È7ßwëÞ'7ß­0Â(3À­ð•é;÷5ò]1#ÜVâyXÐß!Aeüº^'37ÒŽbµZÛq3s-¹£ØÐUë¨hæútG±&׆e<<²‡×GÙ-ào†kEœy8 .yÆ£k@C 9}ÄR(°¬Z}µ©û#:ó­Î>‹‰30%êX ±¬ÝQ¬Lœùä’Åv°ºN ežÆ¸£Øƒ#Og®?:®~Qâq¹tw†Ìe€±ôk­z a¿í,ÜJtñ+GàñòÊ̪ë/ÇÙ“xÄÛ5•iMÆ‚xCJ¹¦kÿÞˆ-c½uF¯ž¸ÂËŒ‘I¼ž„7Pá]HÛ†Å^ÚÒÕv«*á»4¥Cð°––xé‡m}>¯ËÓ!€—–µG¦Í“^çý¯€#B¾u_x{X–{œ^‚°°Ù^žñ8æ>xýý àué·ð2½4fŠ™ÄH/ÆPòœÅŒðrìÅ›ëÄ«ð2õt=Kæ¾×Mx{b¢ÐÉÍñR•Åac¬ñòðE‡‰Ñ-ãõû–ò² ‹w|¼ƒÞÌË3ÇèƒÓöD0ïã*ËyyWY>ÂyYKüõFTÐÛxÐ÷,3½ ‚^>AźxàVÐ[«E(èí·º7½¬gÎó ÌËX€ÓÖüóbNçÙy X£Ìb^¢ÖÔgòUvãn%Qºr^t…‹#äò+ç%Ø¥-Þ8Qm9o?\<ï€^ü;º$Nà ½øýÊeÙÙž uŽmËåôòÞŒ†rx^üZ£úqŒOÂyùc×­“ÎË6ïwTÂr^öåÈ(ç­ KÒòzÎëŸËr^–2‡óå1oe¶í¶ ójßRÌË×ÄÜS#0P̫ͨ˜—OGEh ñå1oõu);ó²bí;Šyù]Q£=Æ5żìç×ÞTÌ[ûiÝ ó2éü}0S1/ Þiᥘ¿Yhm>›ÌP^öBZGMN¥¼<H3·±û ”—ßG÷”›PÓR^W¡¼­c· ÀæÅÏa…J·¶†-æeÈN/Ãc’a‹yYÅ‹kòs\g1oe¼Àny>0oëÛð7‡¶˜×—YÌËUÇ–X1/¿õÚâ9ÌR^ª,HÇÔ¨”W"”òò:KH-åå\…çœ[wJyñ›SåQM¡¼|qX9 ‡ …¼:æ)äõ?i!¯¿ÎB^+òrÀåÂpÀWÈ˯ü>Þ®—uA¨7Ý‹òVñ:–1¸B^ÞòVžü,{é/WÇô y},ä­]ë¼É@^ ÑÉÙæºR ¯šäåOÒçtìü*äÅOb}N…¶B^61³––ñònašxÆËZ2“Ùð;SÆ‹ëJëìå/•ôÜ0žàE/³¸¶ôD¼„eÙ®ˆ·v?„kšÆ*âÅož´6‚ŠxùÖ86Ž4ŠxýoZÄËÉŒìt„ÍŠxýuñò2wÃñº7d¯Æ Šxù“L˜;Â_E¼œÍð•M»=E¼®9âuoG/kHÊ“Æh"ˆ—×ñlß0 WÄ«ýË^7¥ áe|‚eË4±SÂË!–{s§C¯ûè„ðrÅü>ݪ”ðr ¯mYž)âew¦he¨:ñúë,âÕ€H/_3„·'áå¸倥„×MBxù¨Œ;gU,âõÍb¯¯¦e¼=àÅ,^æu–ñr°¤bØU ãu·ÆËÖ¤ÏØÂxýOZÈës@^üÚÅ„‘eÒd y݈-×MVyݸ`!¯¯¼…¼nlÈë&¼j¥×Ey9\–2³ )äu­"×}úyÝ .”×MÅByݸ!”·Ÿ«*•n!/Oy9Z2•ãØyÐëz³€^Yâ*çu¿h1/‹xz|d^ÌË5u(ÃØS1¯kKÁ¼îÕ çuϽ8/.ž+ǃr^ÿs–óêÛÌë&‹yÝKÌëkb1¯ÚByuHPÊ«/M)¯Ž¢Jy5ÐPÊ«3ªR^y”òò ?ò:R§”W])¯~ÆJyyžâša†¦”×?Ÿ¥¼ª*åå"»ÃÜS)¯ÍByu‚TÊ«¼F1¯†¸Šyù%qob8¥u̫˙oMjüu–gNÒ‹`‚ÛÇÌÏòôÒZ ‘؈¤W9IoeøØæ{I/¿ýSÐÈ3¬’^TÒ‹±2]qå´VM¯³’Mo¡þßcÌCØj5½Ó©G™LD½ lé«} ”j5½jÛ¦šÞJ› :¸VM/nGóû™ Z‘oå K×z-|kzQv Š‹‰ ò¥Í™ÆÑE¾,ãAÿ4¯³È—¬i©GpA¾¬JëŽ)õå‘/‘æÅíåáõ'È—eÇ>ŠªÈ×½A¾< MÔ´Õ»øöé=¯ãÔJ|i]eįB|Õ_V‰/k¸ŽÀzâËóÚe{Ü ñ-ŒØ(Ìl Ë3r¾ºÑ¹ _ž§ÛûèéB| ÕÔlN–j‰oÿ &kˆ‹êÞÄ·pÁë(aÉ~oâëZEˆ/„~-yt=!¾ýynQµ0__Ë|y?fŽûøÊ|ù°c¦º^žù¢ì´raË| ƒŠ^4ÚL˜/^Vþ+)ˆ0_¾-fœÖÚÊ|Ÿe7óíñ%B–±®RæËˆIb:É:æKó©seã^Ì—}M&ÆèËþ‹é#[}yèËÃ2Ç>®Ð—nD‘#é½B_úÌ>Y}@_õi{“Ýú–>Š·™AA©/RR<’«(õeû3 ÌØÄSêËO‰ºà ôõ—YèËeõBc$Uè«®ù }ùeçÊ´£Ð·Pøƒ/j çúâ;+Bãüô•aO¡/¿3¦¬*#"èKç…û(ƒƒ¾ÖÀU /¯º½ úrž`«šW™¾t‹À?±¹@_zéEÎKƒ ôÕù冾UÄqîl*ô¥+£ðòÈWG‡|±Í”õÍ3È·TÅåÍ¢È×— ò=9"f[äë¯æ+v²Ê|9Þ’Ç|1r&:•N@k™o¡@ ŸÖH5 Ì—^üL{ Ì·;*†:›_˜o¡$âZÞŽùÊԯ̷t‹Ö°€½2_ŠU·Ç¥c¾ÚºÂýõ`¾î:Ë|uWæ+˜c¾¥ö¿c¾4źfò‡|9Ñ>{Þ¬_  _–áÛŸù‡•øº¡Dˆ¯Î*B|K¾ýq=ñ-Ýè䘉o•øÒ¬“ÒÝÉ/ùºO/ʰ`ºÖŠV/]hx|ed¶RæËIÓý\“ ò-Ä`m¥sÈW¿bA¾DO~û£› òå¼ÏAfò3A¾*òõeù¢,`,\¸H¯ÐùÒZ”¡ÙT[äKóÓ“Ï7ÖßùÊ'—ÙŠ_†L1¹ˆ_÷„øòå­¸Â_ŽVF“Ü ñÅ fuãçZâËþÅ-ú©§â[¸œff¾ºhðF¾ìz…¯p²l‹|ÝD,È—m¹%2Þ© _m,%¾Ú…„øjüäˆoèŸĘ̈ÄWGJ¾ “ñ#C~ä€/gP.£9J|y*Œ‡ó °B|»Õj›VüŽøÒ!Œn7cQâK«J„guŒ)J|]³ñ¥õ?£íë©êåD‡édF|y oY ö)À— -,ò¨‰_Yr:àkç÷‰{ÝH)¸—K)î)ç!wÜÛÿcŸ¥TÜ«1¥à^7 î-Ýø0Ïd^Š{Ý ¸×]'¸—>_ƒñ0NWÜë&á½î„÷ò³B¤¸¶„÷ºAAx¯›%„÷êŠSyoéËÿe¹¬¼—mx˜tòe |¹:°–‚¾¸ßyŸjUâë†QA¾¼®íÓŠ|5$ä««_E¾\‡Ç}Š\‘¯›Aòuc¬ _]¤*òu­,ÌW×rÂ|ýe–ù*EPæË^JçúaDë ¯„± }ùxXþÌí8…¾ZM…¾z?…¾ U™Ùñš× ôe×´\`ô¥Í$ï7¬0_É&â˜ïIƒîÁ‚…÷Ò?¬äÚêø=á½2;:Þ+‘½ã½²Zp¼W¨—ò^ŠIOާªOÇ Sƒçxox­:ZKyoxðtþfô_è·òÞcóÞÿþk`¯±ô}ÀÞÀǹ–ë)îÕ,×öröÆÇÝ®‡C7+ç9Èúì•ís{EYã`/új&ñ¼ž•ç28¢¦åÒ °Wˆ§À^&óàÞû8n®´—V ˜ÉK~:8pýrŸiv´“ -ä¯I_…ö¾qeÀö´×9Žöbþ6'ô•öRCN„ž=ì½ñ…‡½æ TØËZf̤áÉz#Ö먺Ê{j3YèL ¾`/‡s,^`¯~`NÞKªv¬¤« {;ÿ ËèVa/³œ#ŸñËË{#m ûõåao%!´êX½ìBô`½Žwô½ÚM„öâ:„ë¤Ò^WMÕ÷Ò”†ÛhC¯´WD9J{+C™ce.u _U‘ íå§Eʱ¾VÚ«âš§Â÷Þsu _Ug«ÂW¶AUá+J§ð•Ýw§ð•D§ðuâZ¡½NA»¾FKá$¾"#yH|2â!ñ•'‰¯«¡•øâƒ®×J?ôøJUDãËðäX'ñœÆWt^ã{ˆė¿®t›Nâ+Õ|H|ER,_Ù6u_L ·§£½riaÂ;´—çêÊLìhobŒÚýž´7¸½píåN*Ó¢ÍûYÜ‹þë(•ý¬W\y†î5Àï{3î:Tãp/zNFÑú±®÷ŠÈAi¯Ýït¸W¯ÚK»Á¶rš*íU±…Ò^w™Ð^Uø)íU™Ò^ÕȨ—>Íi˜WÚ«‚uUøªðÜ+|­2M¾¢p_UMy‰¯ü¦H|¹ì CÓ{_UÞ)îUù™—øšø!ñµò_Á½öpƒ£½ªô _«9TÜ‹ñý¼VÊà[ákY¾"ëp _T‚ãò„ôªðñµ|ÝÝDà+S€øêÐ¥ _÷›¢ðÕGV…¯ê¹UâÛýî·?](â7fÊ{U®_÷›"ñÕƒ*ñÅ]ËQÜ)|I¶ÊJî¾î'EáK¯4P:Úë.¯œ¯}¯ÔQå½z*@`o#À8Î;y¯þäd½¼¢Pù>]a½D#¾‹å#aY¯Jb”õ>ˬ¼×\^Þ+Ò_‘÷Ê9®‡¼W´ªNÞkïçå½v QØë®sò^ ÎÛåä!ï½v&CG{upòú^ûÍy}¯Qn9Ú«§…¼¾W4ÃNß+× íÕÓ*J{5¬qú^;R*îœÃ½:ð)î•oÉ{igGØ0î#ê^ý~UÝ«Q—ª{uèu¯+u/³ø¤uPÛ©{uTSu¯–©ºWÏq©ºWÆ U÷ê0£ê^­¦ª{õ'EÜ«Š{vç¶öUUÜ«/AŽîÉEÝ«Uq/.u³cðRu¯žvRy¯ŒyõêO§î¥7#”¹-£ê^­‰ ^ã)ûk(5êÅ_ãùW^JE½îÔ˜ª{µò¢îu54¤×WÂ’^öôŒÂ;%½¾Ì’^V”ô6Šù2_‹æÞ¤W'%½ZO%½l±Û,TI¯NJJz}],êõ×YÔ«'÷òä!w‰ÂDº÷ò\e˜¶Ük{³£½òñ8Ú‹Ën“9¥½vÜs°W>+{å3VØ«g$óc@ø6 ‡Óh{¿Æ¨÷dæxNÉ1>@ïÁc’ô=F½q*–†½€€^N5ƒþXÆ›itynv'Œ71Vl+©°óèå¢èZÉb•ñ"<ÇÄÃÂád¾æóöV“^ITì\zÉ]0Ú”épkïE8XÎ}?Ëx/æ Åj Æ§¢·'uã¨r<½ÐN®Œ÷bî3 W}2^ú„„³mÁ²e¼êA§7ô ˜+DÈËìy=!oà^ÚÇå…ò҇˅‰è…òÒ¦2Þ‹OõÚÙ”Wy”òjB ¥¼ô¨!6:&vµ”7PeP6ïÊËG;⢠ByÕ‰I)¯fUÊË•<ÞÄ4vTÊËÕ:ƒ¯é',”—¿yäm¨*”WÝõ”òººåU+¥¼’ þ3O»^*¥ÚÓ­—¦i1¯ãP yiÇT™^i|?yÕXP!¯Zœ)äU-¼lÍ5ÆxÈË2’žºŠL"cñšRÆË»±# ãe¼|‚ÀÏ X»^þ-×¢z ã%di×87á/ýn¸ä™‡„ñÒa‹ð$Ìß´Œ×}8–ñªù–2^ᨴn{"^÷™ âåOÞ4Šx‰¥ Ó5„0^MÏ®Œ×Ùl ã¥]O•§‹Cè™3 ¨§‹ƒ:£ á ÔP°OÖ¥Ù½ ¯Kxùù‡=g)áÕܵJxE£%¼ZK¼šíS¯ó‚Àë<$ðÒ6 ³'áÎË^Í©„70Õö¹RÒ+âåuüWmHÑ„ñ2Ña9o©²a¼¾ö–ñj&Fe¼þv–ñòºƒYaw™Éc,Ih•ñjŽie¼=;nY&ÞÊx57²2^M櫌?§¤±nÆ«ó”ñú2ËxYiŠ^ç‚C¯æFVÆ« •ñúë,ãeÚëÊÓxñáÕ«) ò:_W¼þº ‰Œc¨Kß6¯M «„7ôlæÇò«)‘•ð²vÜ©Œó7 â՜Ψ÷äYÇ}6@zñYöú|š8°0;ß<7 F½’àÔõ:#[1ê=©$_.ýΨWë¢F½Ú'Õ¨×ý¦õjQ£^ÉŒìŒz]™3êE<½b=Œzdl ºõºç£^ç¸kzië\¶®RzõÕz£^,;±ªOM/óèòåMeµ1êµ¾šÞ¨—V‹Óp_9/m†é=‡+¸õrƒ°¯x_ó^ÌUyÒûpTB|z¹Üå†ê;˜—·˜—£ÌËsßbˆ‚y¹ÂúzípªO¯ÞO0¯fAQÌ{õXxyd)æe]¸Á–Æó©Q¯»ŸÅ¼®]óúûYÌË4ÔCš¿i1¯o‹yÕ-R1ïE­OØ$K0¯k‹yÝ+ÌËÛ1^çx<Á¼j%©˜—U¡\ó¡Íä¼l,Œ•ÓøLA/ó óskÃÅT@¯/³ —SÂqmû# zé!‰÷´|ŸôúË,èeÙumL* ×µ£€^±ÝTÎË4­¯­Ni«½ÌòÎÓ”ËèÀ‚Þ‹ç˜vN.½®5…ôºÒËÇcŠˆ‰…ôº.$¤WML•ô^\”çÅ÷õ^´bÚ§ò„ôº· ¤×=žE½|:LcïªzYÆ3«_/S8›ü êÕ¼ðõ2ª(q¦ø^P/_Ί<êu%¬×}û–õª«²^u|UÖëÞ·°^~§5l)¬—GIlOUïE]Y©q•õòÑ&ˆZŸ¬—yh³:踲^:&÷³¢O ëåwÐÏ"†QfY/Ë(ù½Ú“õªóüð·Nůxõr….y†§ª×ÿœóêÍ$»K ì¼zí£‰W¯t"çÕ«?©f½Î×™õÚ7wF›o#½¿ÙÅ!§iTõörßÝ­ßIÌÆ|P´·ŽOÞëÌ(-ôÕ.1׋ýÐø;‰Ù¥UyÙ#(ôe’Ô- r™ÙÐmxÚfØë;è+“½B_‡úbEJâˆ@tZ }Ož«ëP‡KÍÆà,¬“†ÔlñÞ p©Ùè0ÌæœðÖ¥f»7†ø„¾šTI¡¯¿Î¦fës|ž_Ú#5[mÇ1%5›¦¨ÒÔl}á¼ÖÚšxGH&GÞ©Ù¨ñ^ml‘¯Ënæ2³äÎ ÛÌl‰æ}uÃY›š‡€Ékz1wzMÍÌÆMq^3ëgX/~.Òx8£kb643Ÿ{¤r‰Ùýf¹&-ªqµµÎ몡YÙð®.<Ø1Óù¬lF†m1/)à>êr²„ÑÛ¨Cs²Ñ8”‡¯‡”—.Ñ™›$ÃÝXs²¡VÌ—™Ï§W/ —µH”œlÜ C¼†þWs²ñs, ¹œl}8Æô4§ädë£?Âìwl~1'>p„&3¡™ädsy×$'j8µ<ò±•+- ÿG>¶bÄì.[‹×ÞÑ|l®1\>¶ŒŸ Øèó±Õ”w ÍÇ1L—µ•áò±]ðh>6´åý#j>6tsWó±aœ¤@¨å'âÕ̃Žñ2Æ «ÿ;Æ+[Êx5ˆrùØ2G”¶r{(ãen+|ù‘Žèìuæ™TÆëcMÇÆœeÅ<.Ó<Õ}‚N/ýc¼Žn~3^È£¼àxõž ÷®½ËÇFš×€æ/úlºVîAe¼”>Õm§ùØž}<æÇæò±%FƒkwŒ7r…Viîÿz0^M`¥ùØBŒ-ßzäc»-âãð¨ˆW¶jñ}ý¹5sTÑa9¿ƒxñÝöWñÒ:†XìÄ«IXñ2ék>wŽ1MÇF%&eœÒ`A¼‘ßÄNŸhïÁ5Îm`*ˆ—A4wVâ’øÞ„—=£ÑÒ« áeLŃyÍõò“%!=ž® î… ã=¹QVWÖTe¼L›˜éà™ŒMãe¼x¡ÑzøZÆëòù ãíßJ>g¾A¼Ö¸xM#™‹ ^Ü.ü‚O/>æ’éQŸ"^—R/ƒÁÌãã/Ïw]ç¾ËTŒL>0÷T„ïrŒ áÁzËw9“áÎïTø.îr]ûŸÊwýý,ßå“3P¿hé®{s–îrXÛó„îös‹Tù½“† 7Cx振tÇ·ˆ,*Þ>t3‰C{zôò£§Äœ§„îjÆf¥»|7ÜSœŸ…Å».K¦à]—šRð.Êš‹Í¼D‚wFÝ÷QÁ»LiÄ‚w]ÞPÁ»n¼‹Ç‹w®Å»\¡p° )¢à]—ÒRð.îW9îÏpAð.{C¨;™à]])ÞezMnÔc\gñ.?ƒx”5 Þ=øYÒž*^w?‹w9aG¯_«à]—eVð.ÆeœYé¨ï21ýŸx×åE¼ë:Šà]E÷?Å»þNïò¦åÁH¬x—ýä`^…чïº Gð®K7*x—ßÏÄÎÝ>á»nؾËïÀzÜZ¾ËûåÿøMËw]š_á»|qqŽjZ¼‹ÇË™ ëq¦LðîÁ ë>õ#x—It)È™–‚w]S Þåý.\ŽåÌpó]fÇ—ç»ì-•rÑ1äX¾ëÆKá»îñ„ï2!,ÓOÓ’.¼{r @'Çsïºn)x×}Âw9Ð’tÊ,Þ=¹JâÛ¨OÓÎdGÙq¶à] t #M©à]6ÓO޽>Å»*Þ=ˆ8ä·'Þ=¹¢Çà‘¸Gñ®ùŠwù¨ÚÅ»øÍ‹–:éiÔËH?s3¹0ÜÈ/x¹vav®ÁñžÜq¥ì;Tk¯¾=E¼'føèfV¶£Ùû{_NöÄ䇼qûñǽûhòqûùçwî?~ñ‡§‘Üãde°Œ¹‰»WÎN¡FÿòÝô…ùÉÿãfÊë»Ï¯?ýb“~ýùõÝ?ýëw_z£’y„ã,´žøò;ů¿C?ò[Ý~ᲿÓ ‘{Xþ»ËŠ?nˆ´Ï˜Í¦áGÍxíoyíθJHÙÇÔ†@ŠRÑ|=*cŠ>´Ä;hù·Â{»·"•øØ·Òk#oE+óÎ[ù‚1.0];‚W,2x„‰Ü+Žˆ¨û1ò¯4ž¯Jõ6¡oyöØôy—¶î~ãmY~Ðà´‰\§½YV7^æBõVd~C/÷]ò¼ÝøÊiTX/G³„)ÁÚi>öâÛ%Bri°¬Þ.Ö¿#—ø–7_Ö|™^ñ·I„ØQešiÞ&TVÕu¦0Ó)÷6&¦¿ÍXØÑ¼Ýš—Ø\ªE¨¯c¦gám p¿rùZ¦Ùäí¡U醫۰R«Ò=á–'0OňVéOÇ=øÛXlus½lj‹roP±,ÚÔWÔ¬Øi:ÔMyì‰/Ê9ÿÚä¶çþÜ3FmÃʈǤëOÍãyÛ°ò Hë±²®ÝÐwVF !¥Ö:ÖøÝ@`Vr¹|®D¡,k·)°x¤ãµkn “4"3£ðÜBòdæg¾]yvÏžGßc^ç;¹…Xx³¬Ý®À<1r.òžƸ˜irMP›ué&€ËeU\]1cu‚é²Ê¡¢Æi…–¹'·àa>ú,̲j“[tÚ±D-,3É-xÆ)óülæn¼Mnq¢%¨RGYánüFÌt‘=èzÜFÙɈ«ÓŠgx9¾ì ?T‡Å¢º³º¥–®ØS¸Xî¿oÄÌæÊŒÝÙA2]¦†¯ðüêm ,îð…§Mv‹Û¼»t­Èvã¡kEYǺ ÷àoW`ÉIT¸ ¿³:í—®5®Àùdµä.¼uFY[‘庌LB³M`n±É-Ä,¹\É&·à©á|M+4–™äÚO®l³[H@QFÞ: uý/WÕì¶Yš‘I¨3Ën™„ë ñ02 ¾ôóàɵ^tZ³3qï,1³3tQE\ç¸ì2* ×›QvË$4SB #“ÐÈf`)߸g·Ð·Æø{‘¬ÍÁø;»…|Ü܇ßÉ-ཧqQ³¹-´vé°¹-´’é´¹-ø#áœêz–™ÜÛ¹ ÔÝ2K ÖìL¿ítÙܧÁ–±¤hÍΆóÇÜY‹ÔäÆfŸe&¹…t-¬eLn }i©ØÜzYµ©-Ü“7›ÚB^ŠLf mè|™œ>$ãFɧÍl!‰f ÷â{|5IZ ·âï(V‡ nÅÛmwÖUÉÏfï–f.cIMƒ¸ÑI¸·†µÕÖI„ƒ›«mé’K®·N‚eWD_9ϲÛíLÇÐÜ¬Û™ŽØå°ng’ò© Þ5™-ô«ÂšÂ¸‰ 3ˌۙvØrY·3ñ½/ˆ…ÛYb$,þú;e,ls[˜M…™ln ¹_ÖÜùjTmŒç+šÛÂäÄ(\kýBn‹Â±ñv;ãù2jhdzógn·3±U.=}F»³±p3zÌÝÚŽ¯…"æs”ãuv1’Üýÿi¼Î.9@¿ê»í1­×™dibÙ­’pŸñàî.yaé ··ÛÞý-`.L`z»‰³}¡gÝív&y¼Xv»YCîB/ ÛìLü¿K;¬Ù™\F[¤ÛëlÛèjÚoŸ3kÀw>ØYf|Î0*"8Ÿö5ˆH­Ñ: zàÌW¸Q4Ïy»‹ ÷à3¸‡ïÒÑFº30 '5 ÷àïð•'ºuFnÅ:™g«Öå̦PCÕ­É™dá«”µß.gÑ…/¥—ÖåÌ]wZ—3Ó…+‡ªÏ‚â‘sj©]‘}ÜùLÒO´Ÿ59#¨Àâ-ãºdSKê8–Æâ†^¹-rÛô^ŒÓ\æWúòÛ´lÆæ½RÖ`Ó²Íæ­TÅì ÃÚ­og‘É_¬?Ç=÷Û¤—:åsº]Tî¹ß½’,¡vWº;ªm î¹ß½âóÎ2“¿ø¢sÐuµÑÀg´ù‹yÐŒ®Ë±Tjæn^Ɉ‡FÓT&Q=‹ÍeaïYdRYØ +)co&ætõl6›…d4Àäjíͤ1ûÙx3¢^]ö8®{UÄ»«ÚIÛ³šúÓ¥Ãæ²¸;¸tÓEèÔb³XžØ“rçñ¬qdHЫümçÝV‹_9æ¦@—GSñ¢ûÈóرêzüºXŽ…¸ èRN_–™¹òÜÀÔ¡wúkËs‰™3n%ËsY–(þçE…ç†.Eß~ÿÂs_}¸S¨XžzoÚñð\–…ºãºîðŸ]¦wn×Êì%<7t¹çͱ,Ï ]&ÊEð*Ú87ô¯ýžçò2D?+•©à\’ó.‰Ÿ¹j,Ïåo"J^J1á¹êKsdÔðÜÐÅû÷á[Ës;¨¿}Ô„çòºó>”/<Íœ4˪}ñ\þÜÁW:ÖÆÂsyI>êJ-<—e!ÑÂʆç²ìâô‘`Ðò\fÞ·£–ð\FОëÚZx.Ÿ¯ÕºÒ  Ï}”žËÛÝ@ÂsùèÎ×´‚VžKÍ"úß\Í[œË»ÕÛûhá\ÍЮ8·ñØB=æ 8—7b ¸áL¬8—âÈÛñEp.Ã@tçÍ” Îív”·!’òÜÆƒ„Á²ÞèòxÎ.MÀj®¿Î]¾ÃÛ|Fn£\<ÍQy.Ïhr%5‹Âs)'eÏ6ÖÂsùx™Ic†_¥ðÜÇ£[žÛp¦ „ç²Û“|ŒpÊs{ôÌ5Óçöã†ic`á¹oÝd¤HPžëÞžå¹GhÎ½É <—ÇxIÎGjUå¹iI®-[žÛú1“µ¿²y.kxô¸ïåq.¯ giQp.ßMn+µ¥â\Ö0”½ëbq.Œª×Õ–ç²=˜ã8”ÁB-Ïe1ðµ™íÍsq;ž‚bǗ繃E}Ñ’ZŽ‘8G©/»%ÁÎ\³«ˆ×ÝÉR_¸UÄ+³•SñrŠEç;’êëTfB}ù…,@O}k_~­DWJ}ñ›‰ÑØ5ö)„úºOD¨¯fjWêËù…èddnSêë^ª`_¶K®+·‘b_¯n¶Ø·ÒÒ'_ÓdA±/ËŽ}K±/Ÿ=ò B¬Õb_Môìt¼4¼c0!¨ÕñÒO–#w~b_~"¥¬dCŠ}ù“‰"1” öeÔÀ5Ùܿؗ)Â#“8·§Ž×õ/á¾þ™-÷u}Ïr_N^/³î–ûv–s¬]Vå¾N&Ü× Â}ñAÒÁ|HsJ^=äÙ…1œ÷uõT%ïÉl6ÇL 蔼Úõü² å>9¿<÷u¢=ᾜo«Må¾¾š–ûªM°¯FDŠ}ù“c‹ü‰}õ'-õå³q‡¼±¨`_×Íûª6G¹/GÌz-1Â-åe¶0zêŒ=$á¾äsçy³]Ã}ûßæÚ³²Ü÷²[Ê«½K¥¼´c¤[ozGÊË <ôuíËs_N…du0Z‘òbäÄz{!h•òŠÖÎIy9D࣫׀­"åu÷)/yÊÒK8)/*6²:)¯Æ–*åue"åue"ååqBŽcVU)¯¾Z•ò:Á®óš)Ùiym‰“òÚaC¥¼ Â|(÷e OL”Ƈ5¹/~­ÒR.Î;‰”72l/}+÷¥ð,.-J^YP«’WG_•ò¦À™Ùôœ’—dwÛâ9)/¾#š¼-œ*r^T™'ʈUUÎïµµ§¾<9Fo¬¹…¯r^­¦¨y5lV5¯Ž ªæÕÕ„ªyyÔ‘Ùj§ŠY½…I‘ò<:ê$½…IA1Rt ¨Š^ŠñxЉ WÅ zy6±–„­¤Wx'é•J:I¯t'é•1ê–ôʬâ$½ÒVNÒ+ïÆIzS Õy.Ce½Z9Y/Y•¨u¨TEÖ+£ÓõÚ³“õÊ÷ëd½Z•õF.™Î¡¨qª^mGUõjû«ª7Ò€6cyGÕ+‘œ“õŠ^©oeR„~ˆxaÑõÊ|åt½è'Ck‰w°Wë¢Â^í,"ìu—‰²W[Zµ½‚8}¯»]öÿü[©ï±©ï‰ÿ5È—£Àô‚| ߃)6!SäËÃ×uíg(ò¥åêÎlú@¾è‚«¯9ä+²‡|e'çaÛy>~žäÛ˜ÐaທG¾4cdVÑ)häëÒg+òu8ø‹‘¯ÃÏùjs‡|唲C¾rJY‘¯‚)‡|eQë¯zOò•5¦C¾x Ô‹·ü4n`'gr¾B_!nâ+ÌP€¯ÆY |{¬CwøÀøÊx%ÀWC0¾zâG¯ò%¾nGA€/'—¾øz_”µ ;Þ+£±ßÊ´-Ä"Ç»ÀWn瀯 ïð• åøâ7ÙËõtm಼ðÌÕè&J|Ã8â+£¿#¾Šo”øêá*!¾"C|‰A§wˆ/Wè±W~È|{5׌eˆ/÷x8&,gC|4sY4xƒ_îèóÀÍ´ªpÄ×."…øâ2*°§E‰¯ßðÕ»)ðÕË„÷:X*¼×±`á½ô al–3ƒð^¹N€/ÅwÛ„\/ƒ{¸¯O•¯GÀ÷¼MuðuÍ"ÀWâyå½ñ:Þ«\yo&|XîÌÊ{µU”÷j‡UÞ«+å½¢Œí—彺:Pàë~N€¯.¶øê²O€¯ä¸rÀW8øjíøR¯—ÓRí(ðÕ¾§ÀW[ |éA›ñò´mP«'|Õ1D¯»N€¯6§_JœiöütmÐíŽ „÷êjXq/×J%Oõ£ÒÞž®iePÚK+ n^ x®´W?¥½Ú&J{¹Âí l†Ä ½Ì/±=ÆíÕU°ª|Ã[ºý¯í- ˆä§:ØÒ^”öʘ °WÂ<{™CÐØèu°W‘‚Â^­ŠÂ^eô {©¬ßNïöbü«tÚÏKl`¯î·)íÕÏJi¯~âªòez¨çQ`G{-‹p´WZZa/îÆÃKÓèBa¯¾W¡½Ú·”ö²oìcˆŽöJ@çhoe*î¶ì•6íu-´W Nå‹è-‡½a *_Þ÷"O{õ#UÚ[­®£½tÁÊzÇ(í¥1Î:³é`¯,vì¥S S¤ …½ ¹Î`V`¯{Ùe®urÑÁÞÆç¹nKÃz™C‚&éuè…õêe‚zihx­dÉõŠ C½<ÆVWæY§ñå!$ õscLQ¯¶Š°ÞövÝù!ìmL¨µr°:Ø+O§¬·ñlìJ¼Y¯û1a½Ìv–Û²;QØÛ˜Ã“G¡^ÖË%iZg<•õ¶¾ÖXÖ\Êzyrlé;ØË eåïu°WŸ@a¯>³J|F|è†ç;_É4𛳰Â^ib…½¨JÉ+·¯“øÒº«ÍVß‘øöÜ|+¿™ƒ½h£Ê8|¨…õº'ÖK*¹se:…¯„ÌNâË¥¢¹œ^Ø+Ý\¾ÜªÝi(œÂ—ÔnçÖr°WÞ«SøºZ.…oO¤Õwy_…/s—p ‰ãV¢ðåðw–iåâ¾}6oS   _é NáËtŽyÙ;:…ïAEßÈÎ÷PøÀ1·¿'ñE×î™âŽXEâ+Ë 'ñÕGW¯($Æ—;¹ŽF;a/íN¢Â^â@|W]öò¤25¦£éUØËSÚ”;Oü+Â^îHbõpÎßÌý¯"Ó©ÓøßfápaïWÙòvr™=ße¢–«­7!|·ŸêÅò£Öð×½ù®;Ï,|—­†p%V¾«‹^廂ïrùw'?S¼K+-|Ë@@½vÇà zÑç³1”ºëÜu+¯aõŽîÊ~ŽôÚ½ZwO^K¡ªp—Bý¥ÙÁ]1Qzèy-îS¸«˜PàîIwY©É½ž×8¿8¸ëà©À]§õ]tWüTÞ@‡w]5ïŠ]â]â£ó6x¼+ʇwílëè.]NâÊ„íèn&å:¶¡­Ð]ÉBeÆ)‡weõäð®»Nð.ͶúYàQ&xWbf‡wu³Bð®ÎáŠwEœLsŸ‚^Î.ŒTã50´Å»:Ü*Þ嬹k4@§¥»ŒÅJXžÊêá@xÍ-œ!ÿºÛ(}Ábh‘_ñpm!¥»râf{8èIõpІêá ç’ÕÂA[‹…ƒ¶ër©…ƒžÖT =^¨zDO-ôà®Z8è‘Xõpг‡êá ã”îêQG¥»…‰¶·‰©z8 ÷"¤Ùá/öpÐóÓÞÄÁš¨‰ƒ¾5qÐg}ëÞÄáàñ·¹¯ &¤ÉášKõpÐSÛÃAxª‡ƒ"Tmïá`ÛQ<ôì¡z8hS©‡ƒv¯¯ñp°g$ÕÃÁÝÏy8Xû õpÐ.ä=¤žÞÃÁ4§7q°'~ÕÄÁý¦3qo 1q(Lò›˜8˜“¸ÞÄAÄÄA½-ÄÄAo§&úxjâà®s&R•aâàÜ"ÄÅAß·ò]9ˆúpq0ÇsÅÄAõzÛŠjâ ¾jâ ½KMäü¤3qЦR÷›bâ ”º8ˆ¿Žš8¨³ˆš8èh¢&ú“BwÝOЇƒvõpÐ2õpÐ>©ÚWÄÃU¦Oè„ìjâ€ï/ "™ÊC5qÐÆTí~ÛÅÁÕP\ôŠš×‰še¨ß¹8 ¸8”{íãù®s}QmuqÐî¥.:«‹ƒûMqqPó#uqpõmNµqp¿)6Uó5·ÔÅÁý¤¸8¸Ÿç¸ .²6v„—ħ¦~¸8غˆ‹êÌåÜÜwPy:5qЗ .ürËJ²]Üs‰‹ƒ†WêâÀDÉœfÆ÷­.x.üqmÞŠ‹ƒ+ýIuqÐÊ«‹ƒþ¦º8h;*â·.%¼j¦.•8vgXPñÉò&‡üÒxmJxQFOâ¹-¤& ¸ðJ|"^ßbbâÀÅ#ÁžCÚ*ˆWÁ!^ñìr.ROe¼ÒWã•Gw.•izÐ í)èUk·›ñÒÏõ\§‰ãåÞÎQ6ØÆ+œÊ1ÞzÇ’žñ6¦(^ CãíYÅ–Ùµ0Þn0W¯…:„ñ:c:a¼Ýž‘™Ë‡ Qo7¼hϘÿ5ŒWæ Çx%Ë•c¼•CÒJ^â/zÂû¹èH/F½V† Ç{ÅPxï£]„÷ê«UÞ+žƒŽ÷Vv• Âñ^´ sËÍ2å½®Hx/ßó:Uèp/ªŒî1w¾÷òé ³M²Ï\‚ýój²ñ§¾ ÷þFî£Ý˜à!ê Ü Z‡Cœ¨WLj¦·]yÉšÞ~¤zœÏqš^,¼ü4Ì€UÓËAALºžš^ŸÿLlT×*ÐW19ê+÷.[¦MÅZz+õ%“ÅçÊLïf©o!Ö@5ã"»šŠÍ¤íÒTlh2R¹™²\S±ñ¨šeZïâpuNaTlö$½OÅ&yÓl*6<)AË?Ù§b3‰²\*¶H÷Ô::MJ¶S±QÌØrˆGx=L8zkãÇ™8`ÔkTj·wR±%Ä(ljE5qHwvÒq5qˆLH¾¸¡3q¨\–­Û|X£OÊáL\:2—ŠÍ>ºOÅ–¸o˜w™qq ÊÅ YbY—ŠMÒÂI*6DÇŒ Î!Q×Tl’ˉzÈz)—çù½ø´îÕ3ÙNÔ«çÞ5›äyñ.6˜hz1MÐ¥hnž«¦×95ˆ¦×ÕD4½'!Ñ´·¦—õXs³Óôº_M¯²UM¯+SÔ5-¡jzOücâ¡ PQ¯fàQQ/nG2vȪÅïtGVU¯&žð.æ$­ŠzOò‚sêNÔ‹Û5NíÝTlV ¨.ª·VQ¯žäVQ¯æªQQ¯«‹¨z5Ï‹ªze“Ì©zÕ™@T½ÎüÁ¹8Èí\.6qxU¯¦œÙª^mUõªÏÏÅ&Õp6x ×ôô÷¹ØðùnÏå/NÆÆÉ‹VÙçWi2¶Æi;,ñªz5Y·q°É{TÕ«õTU¯:Æxëe ª^÷|¢êuuq66Ÿ“Êz½…èz]›9Û.ªëÕdH*ìÕï\„½'£µ4÷òœ°×9(ˆ°Wó2yÛdSØ«c½ {µ[zñ„a/*È…t8ŸÂÞƒ ·0_ü²zWYôNØ«UWa¯Ë&Â^­¥ {uÄWa¯¶ŠÏÇfÍETØûÈÕf„½:ž¨°×]'Â^¸'”{œþ÷Òïî^ŒVãcß̨¼WwÞÍŒ©Xù¼1U4Yàëì[ÌóPÙôŽWÐWtî6a¨K–>ïÒÖ]Âo¼ ÖôšŽ]´ÿË·‰þ6¿r‹Ä½¬à®òÊ)yÜø?ý÷ÿòÃ_?þþ§Ÿ¾ÿù矿ÿüŸê¿Åß4z¿¾èl\å.ü"=ô>‹<A-’å÷°·Élå.üÎ3Ë ¡´ˆ`­fÁ¸ÿt”% ¬õ´i…3õ; ]scaâ]LäyæE­Ýsjå7å–qú̯Þ[íÜ®í6 ãàu¥gÝ8‘ª¸NU:ä-„ÊŒ· ÿjÚy³lïÒÑ_€˜nyÛq+~«@¸ß=R¼ô]øc¦ÿåIÔsm!Ô¾ ¿“ÈžÔ,oà*»ðšµöÅÇ-ÇÁú}í!TîÂße+ä«Ü„·'`îümµ«âÑì<í²§$gWíGT·@kïi)Æ2# [ŸÓ1µ2‡Ì-Ö£%!~qø4TÖL²”ܧ¤*÷àÇaæÊákPªÊÝ÷[)%†t,3D–ª±…ÑËšå$bÎÒ¸û~xqhÜ~¿©˜µ‚iL÷{ïºÈar–*–B^n6!&Xíˆöðž8µ#©YÒÈ^Ú…íÈv¯C¼ YfÝÌ ½vƒÄï£æä2çRt»ñ´óPããЂ·ÔO òY&‡bkb¶ô6Ê‚Õ+hƒ—ÝB3»vFKöå”$®·¨MŽB²ÌœEíDÃxe¤ (/ ŸÇBÍBmü£‡¢tË –ÉñO“p¦a¬ý¥ùN»[¦‚%Ø’p? DÉàZ–5»ea¶OúÌyƒDЍè¬=9$Ý¡2<Ïeѱì¤5.uNCÌï](ÌlFòbMY"›Ð†Q¡!íÞðßvÖ2É >ÿy¡]§=Ì,¸ƒ´›ØÝSÝ¿…–)me¬n×e¶c(õÁ¿š.,튖’b,¾R™C±”µb²ÆÙd©xÛ•UàavVY&»®fß¾!(,ãvUË$µÇ^ÍÛÕ·#ªPÁȨ̈Xq7F-{ˆñS‹ái:åJ‹—j¦ðï–~·E'd0Û×-&s®Dõb,»y?e“^ô;qR};Ñ:ñœ®ú%ñû¬[Ùп`FGQ¿íàÜÊ‚ñ+çå_”zM›½Çv¥ï±vöÿÁ\BÓEz¨‚„SŸ·Ã£’àÀÕÐmt($˜Úúº¡œ’`vËk›9) <ù°AŸ¢`›äã`”¥º]mNPö?ùZŒo«ÄeË+4Ø•X›lïiÁ!ðˆÔ+ÇÂàпòí.0˜×®]'I50˜EW؉O…?~ÒÐ`^Ç_+±º¥Á,Û2RGƒ{ÙEÑ‘=ÝÒà^—´=Á„s¯¡wb›Aƒûß¶Th0Ë*qVBƒY–èwxN&mhp/£qáQº54˜e˜T–ç°Ð`·"8˜ehÒ•Dpp M[BE‡ƒCŸv&#ÁÁ,ã,?x ~\gp0ËB¤Œm^wãàþuçø³8øQKƒƒw38Ø×Òòà~]ÞX…³ìºVÆÁÁ,Š\AÉOpp¿,ïr‚ƒû©Þ¶…ó2IJËñOh°¿ÌÀ`QŠ>Óê vgˆ—`*ÐfyÙ fFðkús î7Ê;e Àà~]Úž~ƒYv†›r[ì+hap¯gÝ)³,q<ò÷zL50ËÂà^ví´€ƒYvû îÕÄgFP–÷«°<šž¡–ûûÿ¼½MïæÊqå¹÷§¨e{á2ߘÜ6à 1À 0w×0´±-­+Â|Lœ|sò‘T·dýn¨êfña’L&#yℇÁÇ s0mêÝŒxlc.ß¹û¾ Fœ\G™wbÁí°jèîéN0m(b?R£ÍVN³p&Áàã8ƒÑËFr·Ë‹¡—‚ÁG[ƒÁ­eïixÜ®ú^U·«Î˽’`°öÞÃ`=Ÿ‡Áí8¬$: >a¿ Y –£ >Žr$m!-5<‘àv̧ûf3‘àv÷/ÐȨ$MwX%`= ng»WÉo"Áí°¼7Q< Æ.ûû¬Ä4BÁ¡¿ìv¥ýyz¬WîQ°^GÁ­ 3TžMovÃ5¾+ý†H°^¹'ÁíÒŸ½?æI°^º'Áhƒ GϯíŠPù×ÕË@Ïðüz²×±(¾Œ6 ÷fzà£-øè³ðª5O ¸Vú/àvÜ»ŠªF›™3áˆ@p»öÚùª‡Àíº_Tæ­ñ›@à£ÿ‹\ùv9 ܆\rhüõ±+_µ§Àm8âÁÞ½'ž·6[³\O>0°ŽóE¼"ýXæ›@à&M +Û›(°ÞcOÛquoýz Œ6û’NÂÀÇùj]忾>t½Pf¦Öâ9°ÞÏõÊ<nmÏÞÃõ ø8.úÐàtïøyÜÚÞ•ÊF Xï–Ázížç{\èz¡°ÐÌró$¸•h|÷Nº'Áh‹÷JV#|´Ý>t½ðUY D‚ÛäMê= nmaå_ F›Ek—Ý“`–SÖ+÷ ¸]BÚ»åëÍô ¸gaûXÐOÅ»ôýõÁ+Ý+ÁòH·YÈ.ÍbÅA:o¿Ò÷A°ô‡Ap»™×ÊXa,OAðÙ˜}«?[|Kµ@·#Ó’  ¶ÿE%·©Ù ,‚IðqÛ= –qÏ$øx&žŸGFÏÊUz|<O‚ÏF‡‚qìÌœ9FÁÇ3ñ(XÞFÁígï¥Ò˜,øxžÙ³àö¬òJOd|œÍ³à³Ñ±`4ÚÕO ³àãÆy|ÜÏ‚eöf,Ÿ fÁÇò,ø¼ ›·ËLko–`ðñ= n· ¬äN¦ÁÇ+âipk + •ipûÙkIV·ÝãàO‹£±t7wøHløjîga”Ûb6lm)"ǧC)¢Ãú›žã.£dïØø!:¬mžk›£ÃX½>in3y8¬îá0:ymÑ;Áay܇åâËo–!Kp@…§o˜mb‰cxÁç¢ô €è° ¢Ãx°é^µL‡õú?OÍý ÂÃz;<–ÛOxXºGxø‚ɳö„ã’Qº=äìñ0úùÖµgGxX®ð°µYl»ö™Ë-&D¬}ñˆXÞ|ˆÅ`‚í7ßb½Oˆ¥“Dˆq\i‚øúM ±t„1nôóL™ b=Îb™ˆË5#–KŒX¯Ï3bí§gÄ8.”µ]6±~Öb¤:×{n Ö[@€øhô€YÒ¨Ú·wï¬kÃè)D«yc×ê%v6ÌéØÂ†å÷ ëý$4Ìï™ aýUO†1Ñ¿åÙZ_O†±³(¤ãÛA†õW3G·ÎdSÀðÑW†õÙæWÕcáã7ÖçNX˜‰Œ`aÀlÅ¡X&,¬=%,¬Ã‰°°Î„…å¶Ö«$*|4z*|4z*ŒS¦85=B…y&îT˜'…Ÿ¥Âע·ýûß‚„±À‡ÂÇÎu aÚX$lóš-ȦUá„ü§g¬."œ  ÃwA\#¢=® Æ€XðJ0NjO‘c"|cé‰0uø5¶Ë¹Ã(‡Ë@vôH¤ì» „Q`ÀÆx¯8Å@ØÞ! ˜Qn†0¢›¸uÉ„cÿHMXì¢i8ñÆÏZá«Ùš¦Q.K€pþiõÈ:€0RFÝË['ó(þxaoÁ¡HøÎ9Ž\¬ W¤tsD&ÂÂpn%6ì•èuÓ™ _"üÂì ë" Ä7Ò×»-/N©0æSÓøã@8Ùx½·Œ˜€°]´EÉK,DØ=Ât¯\ª]ÅG L½d l3Ã3ê†DØîU5……Ã$9Ûš¶óC%Â[bïü ö€‰0ßh&Â| L„#ª<Ô‘ð#DØžˆ†ñíQ%„v óRô@ÂÕ $LDH‰0Ý!Âô[B„=åc"ÌýS"ì '#aFBÌ„é 3R$ìo##aé¦ a‚Ú óS#$,=!$̯#a>#a^3fØÄHXð­ a ?Ž„]´KHøhs¡3¿X Kï óŽˆ"aB­‚„=‰9°ÇÅ‚„ý“c$̯#aî'#aÞYc$,X[0ïÇ‘°[‰ K1a~Y™ &Ll—˜0ßfÂöÉF¡®žLs0aÿd&,ÇæíTfÂ|ǘ ó&&Ì÷,ŸÛtÌ„icS‘°ÿä0æw€‘0?FÂüF)öo0#až+ S? Óo –¶×ϱ~,æ»IL˜n¦"aÿ+öC’‘0FÂü s/ Ëo–ß$$,¿IH˜#aþ„(öŸ¥„ý£a$,]°ß‘c$̽g$Ì/”"aÿ8°ÛYc$Œú9ÏÊa$l3KB$Þq¤"aj$ìw8˜ K_˜ û¯„@aå…ùÊ ó§Ÿ¡0o0æ-†ÂÒYÙàÁzGa½'ÞQvÕ6»- fï(lǽpРݵ,…ñìB©0Ü  †a'Y ã¸Ú2cOKaÜè—¡$Y [[BîÁðÐ$Ka=Ÿ·Æ Ä»ÓsØR¸[Úç!ÏgKa~A†¥°à-…µÍ[ ŸmÛMÚÈRow@lÛoY cÆ(aÙ&’¥0"}¤MŒí²~Ñ1lLƒ‰íĆ&8 =},£0º’ ÕC,¬÷˜…Q îygµ\rÖ^:Ga½8o)Œ×½\³v-[ cª¼ðù-‡X¸®"(‡XX»B–Âú›ÞSmñݾÁÞSXÛˆ _ð \^Ñä),×GžÂgÛö^Ó~vOaš”ÙR?¯Yè—-…Ñfò†ž L–Âr"²æï[ ëqÞSX† y Ë$Oaw×±oÍ–Âú“ÞR#=!Êì—N–ÂÚMÏ…û¤™}G–Â8[z—ÿ/Y kO¼¥°\y óÅ‘¥°t’,…e “¥°t…±0r\~Üd)Œa»šûÀ­Í`#sŠ-…eJ!KaýÍúá?9Ka\V°u\èC™,…y”3¶EK³è `a„׬“+XúÎòó‚…ñ#×L,|#tØÞQXî9 Ë$Ga¹-ä(Ìu*ÈR˜Çw–GFŽÂÒIr–ãÈQXžY Ë¥°\8Y £ÐÆs/ÿV²Ö¾xOá³­úù•®¯pKmÞTX¯¡» sήÂg[õ¬õoÙîzSa9™ #Z(yΓ©°D ä*,F®ÂÒMr–‡C®ÂÚæ]…¹B Û K(ám…yÉB¶ÂzuÞVX®€l…9Þ!Wa=,p ë{B®ÂòÊ‘«°ÜhrÖßÌÃR[á–~Ó» ë5<ÃÒoÖ1,¹ óë&®Ât#ÅT˜éÉ„-ŽKÓÞ—ˆ°ýH®ÓÙE‰0ÍÀb)ÌÃA,…ÆÌ‘lµg‡**ß” ŸGzKa~ùØS˜ï5{ £-P~òÖÉSX$Oá£1pDKWIžÂÇ9½§ðñ³ÞS˜G˜ ëÍ#Sa¦ b*|ü¬7>:Tgd»gP1æ·BL…µ'd*¬Ï˜L…µ›d*¬7•L…Ÿõ¦ÂÇ‘™#\™ä)|üª÷Æe>yV–Oáã9OáãW©°> 2æU”"aÑ|«`¸„0MßÄUøøYo+,·‡\…ÑfaÀ(T"¶Â:ב­0‡×â+|ùpÀë@J„iýÐ}…ÑI{S‡ËÁÏá¿ÑFõ8ìsfâÔ Û´Þ)¼94Ãé*÷òg‰àÃñLa° G[<¼«f‹†=Ûê¤Gä†m@Gøa-a;¼Þcç’ †)ZÀ09 nžæ\ ôp˜©Ÿ°a”̺²sv‘µ]ÀX7|;à°]ÀZØÃaHê56Sßø²]iT©#6Œý¢²ÄÙ̆ñ¦ÂU³Îšr.°FІ=×nB'l+­÷ZUñˆ cIsåxÖ›« Áö© Ù¸ÃaÔk¢Úû¬7gG`wñ½Ãtvå:¢^[ºLõæg £0‚Ô›ƒ3qÛêmTo.Ú·±œ……I9é°€£‚sÙ¾Îј ÎYxú2rå¹àœ û´­o¹àœ…üÏv²¥‚sø¦XCÊj$ѪµÕÏÃ8¬ØÀË×ó¡àœ]6NÒa$Ñ:ù8»]*8ǧã‚sÀºafÕHÁ¹Œê•Ë[€ ÎÙ·ÈcÊr©àœ/§%Åà‚s¨ß‚™¯N8Î&í<¦))8Gƒlœ³¾0+N:l¬À<©ºáã†PÁ9®©Ãç€êtZ‘‚søìÀ4¾|(8ÇUö¸à Ëïw7á‚s\ñ ÎÙ ]ìõÞ'®ø~¥‘0 õæì/­ŒRPTo®Àj¦Jp½9š0¤àŸŒêÍÙÄvvO†`8Œ£Ä×›ãAÂõæžFÇw‡á0® .Óx†Ãr \pîÑä3öˆ sG.7÷ ÂÒ;‹…q¹¹†Ý¦gÐ*7G•9¥Ü—ÑärsqP²ÏN̆iž6|Á­¼ÌZ€Ä†+ï2ó—zs¶Œ@Æ\ì#ذÄäzsXž=ÉÎÐY´‡Ãµ­>l>ÎzseÐÂt_b8,³=Õ›{PÎ"ÚWEí'iÔòU8,£’à0FPÁSÙ<¶ÓÅgDmJ‡¥Þ'Ña¼Å¾ŽB¿L‡1†RžÛùRpήÊ´ØÓ\ˆãŽAi9¶3‰ëoz:Œú¢ÐsŒ*jD‡¹Æk®ÇüCp¸¢*B˜yD ‡1É\ðMïU³¸Þß)ª7'w˜êÍáE|ìn_é Ãh³·ðšeרÞÆ2O³N§Ã˜ƒPü‡D»Ãï¬[Áp3ÞµŠ7H¹9“ËLïé0¢`d…–xÒa©…KtÃZ«¼Úv +Ü谼à\pއ+œã‡Gt˜¿\pŽÇÁa½<‡µ›Ë7uÀa¹‘‡q¦¼juHÉ9{ rÛ‹CXyÆK`·ûª½€Áa9Áa¹h‚Ãz\âšÉÔæápÅ^¾½½’”œ›²W{l<–‡ÃˆwòªBpØšÒø¸~S8Œ»‚|ãÜÁa o‰k[à¢ÉþNsÍ9|yíëöôPˆè°Ý¾j±\z£áö`Ë5Í ËC'8,=Áa¬w@^ó©Ö®46lÿÍFY¶[s ó|,h˜×U†9n—‚sxpÃé8pÕd·D’‚sXÆß°¶Ÿ~‰ÃX7„?ëé0¡ÃX<÷v :ŒŠçwÕÃFBn,ÓaÖB‡õ*‰cÉZÒTÑ ÆU{§sþ@‡õ*‰ãBÞU3Gè0 Ã\ö[àðñ«ãJbÚæ‡Ñh‘]¸ãi3Üîl-ïª8à0Ç…B‡9ì:ÌË>¡ÃڢüÐ:|ü¬§Ã˜q!ë¨÷©nkb( ßYÎÓa}c~¯Z,‚‡yÉ,x˜!ãa%Œ‡+Ê4!„YŽ›ëh¥šsz{¨æOcB‡±²ªí~B‡Ñø¾¯«+wû¹˜î+Ña¬»ŸU=Iè0~µ¼uU%$: Vt¯²9B‡+ê<ÍâJZtŽÂ‹‡yÂù97‰ß·Âs7V¯¢³ÿ„Ê¡#hüñéÏÃÚÛŸþãþýg ´ÝÙ÷©’ßQ%îÊO‘ïßxR»¾_Ý5ÿ½Oi<Òpú•Ž¿„w‡/èÁs­óã_{vo¬ëùâëo7½?öñçì‹sÅýgÇŸ¾ôÜý†!ßÿüáüýÿúñÜ6M¡"SL@±ÕRc±GD—þû/ñæŸnû?ûéÛ/¿~û_ÿÍ&èüö/ß~ùŸÿðÏ¿üè™">Û6×ÙÂ4üø™ÒOœÉBò¾§ò¶Š§?x¦¢gú’9rÍÌ÷3&㯜#ýu®Ç/û(ØÓ‡3q7…×nPãõ;)Ø(/ñìŽkûÚñܼý5Ïg×gCÝøâgÓúÃφ»óáÙüõ™¨ÔÜjV‹ñ¾Ýå‰ÈÌ­èÌ1]ßrŸ‡~ó|Çg ]Ç‚…ây’wž$ümgiûCÙÖpÕ¿p–¤gù+g(Ø_½²E×é[Éø…O†žÿÓNð¯¿ÿ·_ýÝÿø»?ýéO¿ûµüñ?ÿwú›æðo?”y‡7ù{[¨­¦‹úpý»ßzÎt¶ »Ôk¸×–ñ¶øÍñée\Qaj¥„¾ÁÂ[Åç0ù}”þZ~ª5:(°ýú3ÖÆoBÆ‘Ÿz£ôÄ=š¬1{k6Ð?[Ã÷GÈ Ÿù©ØEÆVkxä*-Å ¡_ˆ£Ô_k|ïUÔ%‘l¥5 !÷æž’ûjk«µTGg_rhse9ìý³«Ž+?ÕW™øÎoŸŠG~j­ñÍKŒÄÖŽwOfÝÛÑËÚš–e›zgÃ^*>ÕÚg ÞpE{Wz*å Zcñ÷÷¶Ó0J Ý­ˆ#9¥³5>,gv’lkD‰î¹Þ ŒO{ÖøÎ¤<'m¹1«]eŒc¨T¬Oe‘nNÒl£áFuð>ŽoÔ}©cÛÍûn¯l£­-xU3,ì ßÇ9m ± …‹]H&Ö˜¼®¹Ý¤Áò[ã»uuö/¡Fì¦gÖ˜½²šìŸßãH[ÄoaÉ\¬ññÚfäõ>÷L}¿ï Z?…uvG›LFa04nu3¼XÐÙÎcnÔ#Zº x.ÏbIw«y¸„u^ȃóz„ Hk'+Øü°À‰ý¨Í6>ûäq75×'´¡×ë–Õ55Ç,íO±m¡êScÎ&°´É‰]0Œ‡Êl,dûýkkCªã'3›ŠËl¬~§Ðm5Xcµ±µÈ mØŽ…¹žU`,ë{œ²"À;Þgc³"ÏLâÖøÎ ù8Y g'›ôö°Æø=ÏIY6 Μý<³¦‰5⣰ ©+Z£ÃÙ$*ýûó.h3ݼÓl'¶¹Û2ï^Ä,Öø:šñeÖ»ŒµÔ¢Ù Û¡Lq-Új\ ¿vùoÿfÙÃcšíÄRÖˆjÑ ÚìùÌÐÖ=Ìfg_mèÍ'ä4:˜m?SVEmkD%¤5ïïPÕ…^úEù<ÍfzjßK\k$Û‘iÕ~µÆ*4Û¡n4¾kN®(Ç—‡\fÛDv¯âê6]Ø+šCåÄãþ°äÛÓìˆÏç´Þ²Æ€ôÛ9”Ðö²MF££Ù¨D¾ªRcoó{]srÅ1¢Å|ŽdëOfO²QìzŒÔ\<Ɇ¬fV8†ƒÙôÏÕ¢Ñl;å½ÊnCIëA¶M•y£µ¦×al‹ëòª´‹]~‡±íþ¦]ξöžc£ û*”ÝǶ_­³8²µ±õŒÑcl‹ã.³w—ä16¤]«ô¬5fϱåæX£ãؤ\´Æâ9¶=oè<‡{… vϱQÀx‡w©žc£Pcž©£H ñ {Ò»º!È— ›”l2Ž]7Ž¥[Ý“1Šî*‹6Aú8vnÂL~±Æ°£c4Ú(¸ºé/úêä8%¦¢üú“v€Œ#sµÝãÛ‰5Cá8‡¦àð“ñ³ ™ß-¸Ý8¾Óª€Þw€|4V/é°I'¯’ë7¾^;@¶sŒ³¾±ÔB¢s¯±ÙŽ+v¨Ë´wi.>vB¹Ö\|,–ˆw|¬ãM.>–‘ófc<^ðZHi6îðãc.,–Þá1• ÌÇ—ök>~°=[ÞÉÀ€aÍÇÖhC´ÁÔGòÆÖ| ÑÌU§EkÜú~X­ðø6 %\{_ÛR«º¡l‘Öt¹»ç“›‘ý<Ö ×”ü`)|å!¹º[ªÀš’ñò{™kžšmfpCÙÖæuèTîVDqMÉ8Ò&—¡»›í÷š’íHHä‡4÷näkJ–;‹%îŽqg3.å ³qGÈ8gµ/ý“ŸÞøºùö£C)5¶0u•¾áØOf ¶!ãIaÙõôÇËö!?¨ÚV÷lÜò*c‡:Œ.@~'U»¤4“ õ†ÛzmÈúm½¶d;å›1÷u)w€ŒÎ<ÿqgÀ?Ö¤loOÁb&ŽS]€Œ·Ç®hh˜î–ð²&eLt×½’ÝZ©à5)cùŽW²¯gÁiV€Œ;`+§®íjM;>¶ãìŹG´n¡’ 1l¾ª=ìÚá±>É\||4f㔯w]=Øw|ü`–-(«2:[\|Œßó3zû¸øç„Bx”Ûññ±œ Õk6¾.>~ ¸‰‹" qÇÇøÛ1åvöcÄÙ¥á¡ýçÛÅÇ8›E² àµÆàâcœ-Æk.õ,Vpñ1”åO„„Æë]ÉÅÇ:]Ù‚mÇÈv¤EoöUìQ©½”.F¶sÚùžj){ÿ\ŒŒŒ+Îæ$ãûiJOH³qÉOÓp®œ>‹’\lçÄBmª“Bºv˜Œ;^±éËàÐ&Ç&ã­«á¾ÇÐIÁEÉO“Œ¬<Ê"‡Éa‘ì‰Ãd*†”]˜ü@&g+¶!C³0Í…ÉÒlÊÒ4î0G”(ÃÀ–s;LÆÌƒ*±G-!U&ãyÅUC7(°4gâªS¸0Yß‚*o]™0 ©qäu¯$†Ô¡%9×!5þóó”QÍš5Úàž6?÷ĨÛï]Ž#zFÝ>Œu«"‰Q£1Ä¥åfFï­}íìK88›gÔh|_‡Ë<£F#’æÌCŒºýì½4Ą́Ñë–±£F£ÍQ³v 3ê`ߤ\£n¿š·Z×3j„/Vwí}õŒÚœ¿$®©[¥ú¼5ƒD©q¤ÍüyJÝö¿JœÉ»L©ÑXŸ-8%JÆœ‹õ”ºu(l}Qj4Ú£”Œ©ÛÏÞŽšzLÆp»Å¡ÇÔ-b [=<a˜·~‘0uû={,£¤ cj„‡/6)Â@fSãH¬Ëõ˜GÚ2aºŸ3¦n?û¾£tSê‘–­(%JFÔ‚ª5zH6›Ç–9+Aê³;ŽR£1" ζD©ÏþlJÝŒ®?ŽR·®Þ‹zJÆ”óò§%JÝ~õÍËÙ–(5áíºP¬ÇÔíHÔ[šÛFScð¢PÓ°Ú%L#aE¸8ÇÔm¸f˜û¦n²ðdú&¦Æ‘)-çlÂÔ8ò²9zº4¦nç ŽÆ7JÝ®ßÖ2®£Ôh´ï÷°÷&F­¿æuèI­«Ä!êvñZÞ„¨Ñh“÷r"'L}é0u;§ÅXÃ61õÙ!Ç©Ñhk‰“<§>;ä854vïìÐ&Õí@ ïfMî™'Ô8²¢Vh!‚'Ô­C¯Û iÒX; çé鯄ºÝ°àð£'Ôíù·çá u/èÔ=OY¿ãÞóð„ú<0ùè)aiZS0¡nW‚zky{O¨Û‘øvåù³m9oâPi¡FcÂfqÈ _»ðX;ôºðËI ý®±™îõѨσõÈàÃc$ymާï5íA§¹¯ç u£8öe˜«"ÔÇð„º™±+4Ʊ'Ôgë‘ÕGÇzäë£ã Q*cXvDÝÔ?Ý4ÙQ·³·Ùâ5M O/¢ÇÓçO&c«ÈþešGfÛ¼øfaãù4p›ÂŒϧۑֹ©">ÝŽLÍQi4Vë9Ÿ>|]hÌ—éù´G|ZñéóHǧõŸF#6hGYKæÓzÛ‰OãÈuâ}òé³CŽO·#ᥓç=p|ºu¨î=LâÓç9Ÿ>Žì¹7¼[¥½ù:>Ý~ïÝ{´Ä§nz>}ôÄêvNûPMá êóH¨CK1Û»t¨ÛkŒUL7ÕjUë n³Up8ÌêÖÛ{ïý¡n½½ö~£'Ôí–2ý‰P?êu»ÊåáÀˆúx^Q‹[#êãW¡>t„ú¸éžP£1Âc<-¨Û¯¾{—u;Ðb ŸPŸG:@}ÙõqŒÔh´Ønš:3 >”Ôç‘P·®¤½ÅM€ºÖ>5ñéã†{>ÝŠH¼i#`>}žÒñév$¶Þ±aâùôy¤ãÓ­81ŠY毟>n¸ãÓÇ=Ÿ>®Òêã*= >&¨—Îj½ëžOŸéøtëOIÓ%šù4×nf<}Œ’ñËžN·¯’cæÆX>>U?[Ûú·YÕ©Úá SHÍë Ró:C85Ú™ScÆEy˜i¤ÈbêÅû5|-TLííK×zˆ©£­§f&5k©C‹IVåÖRÃ+^«~Žj©TCJœæz¯…Ìëþ8N}5îæÔø>v'jd-5¼¬ìñ\bï@ÜúZܧx—85¬¿ì¸ÁXJ ¡ñ½D©ELΔbòl¡^êE¥Ô„a¥Ó3¥¶ yv=(52öQXýƒ’úüÕ©a`×9JV¥Fñhk‰4ÄD©¹â¡Pj®Û-”Ø>ʦÔR>‰)5—òJmwóâtŸ(5—J;ü”it%”u±ì½¢Ô„aÉ•ió”šk 1¥FIq[«o¡Ô\@H(µ™Ó³1˜Rãgó³7ˆRs™@¡Ô¸?­êOžRÃù1œ¥ÆK˜Ó^¥–²gL©¥*‹©¹VPj¤"@†;VRc­“ž-Ý$J /(GXÌ”FŠý6-„½Cq6ÒJÍõ„R_€y/} SkƉ©¹¶ˆpjT ÀŒ=BXæÔ°o}Óú&2§F‡.T_ŠyâÔšßá9µ^‰çÔhƒÛÆ\«§ÖûCœúhôœ·^3ާ֧IrjñfN g—;.œEjj1ñeLÝÌd⊨SsKQS‹/«©õIN­s©©õ‰T‹/£jûÙUüäJ¤¦f?|QSs±AÕ WØè‘P5&Ñ®¢Z/ŸP5z û¯‰@ Uëó I5× aQ5—aTW«Æ-%QµÞU³ó<‹ªÅo™EÕzHT}ôÕ‹ªÙâ_DÕì9ÍšjqÂgb­ã‡ˆµN=¤©CꦩfçhÑTëÕ‘¦š ˆ¦Z‰ª_ªE'¢jM7#Q5†]êRF{MõÑÙÂES莱ÖAEšêãJ¼¨ZÐIT­CÜ‹ªÅTœEÕÚWUëó"QµN¬$ªæJ"ªÖ¯9‰ª_ªñ$¢jñgd­c•õÑX9ëÆ‰ª¥.ÁTUk?IU­÷†5U—`h-M^QÍe—YKMTFÖv²Þ#WÛ RœåÔúÞœú,\l©Ã\­3£WScá—í³9ÔL¤¦Æ JØÂVRSKÝVSsYCÜJ´Z>rL«%ÔcZÍ7…VKlÅjjÜulv†©7÷jjI‘jj.)jj½zRSëM%5õ ¹{åL°œš+Šœúè—S¿ÍXqˉIN}éåÔú¬HNF¸HO-ó ë©¥¶é©å =µž‘Õú¢ñ†Bê¿êÕ8j–!Ã%Aõñ«™#ä«ÛnÖÙèj«Ø$ŠÒq¼x$¨Æ“Nwœiµ,¨æòž"¨æŸ"¨æJª"¨ÖŸ‚j”5EªÚP铞¥RcÞ9¤§¶F¤€Ï\ÑS'ózj®À*zjð¯+Îr¢§–(y5—^ÍeEO-! ë©ñ” †ÃHÅ =µ^§×SS Z–Sã¦#wph:YNÝ>6ÓZÆzêfìùÔQAôÔsöÝ·ùÄÕú IN­šäÔÇ5z9µ}ú¬/;…äÔ:INÍ…‚EN­C„äÔÚ¡|ë¯ý,°¾°¾íßÿ&Zm!@ê®Í'¬î„•ø° UXí„!̪é «†ÛàÎ’9Xu¾vZÒÁªmܯ¬„ƒUD`Ýdö4þ@ 3…–«#-ß)RX O ›s†Dau@Þg^í VÛC*¶›Ú†Õ7eêváð°ºaVû²M ÃjÄöH&ªbXª+ å®tFÖöòï ç* ~âꄬ)þ \ b|žÞŸquAÚŒ¤W[ht¯ójaÙ W[Ì^Öj€qõÛB¹e3¸Ú>ÏÐ Ï|YÆÕìúëJT\Ít˜pµMÕÑ&®aEsàêˆ"5s÷@q5I¸ºØ9Â42Zè©–£`µ^ÁjóH“ a5nX wèBÇ€`µ}« V«…º3¬ÆXȳ¦ƒÂjÙ`XMµ’VÛ9Ÿ: a(¬Ös¬ÆzÇæ¥1é ¬†‘zúhüÑîAÅ@~?(ªS«ÖÆÎªù3ª–èCÁÂV£CÕ²WÁ¨Z®Qµž“PµuÖžÄÊ~ Eµ^£j='¡j{Œ8¿§ïG»ˆ¦Gršj”(ðqÛ“jé‘j9ŽIµ½VPªçü‰TóãbPmc¨¥ëŒkdP-·ŽÕú«Ä©íNAàžTŸ¿J ZFƒjy¯HOI Þï2*"=õqΪñ¸¯üB‚j¹@æÔvÛ &›Ò©íÀf•‘Tà ÝbŸ•Êe(¥¶ÆrÏâA"¨ÖI× ªq ¸ß4ª"J-÷œDÕhLײhfQµv‡„ÕG£Vk_IY­‡”ÕzHY}ôÖ+«uƒ9µ<æÔ¨ëÝíì­WV·rd÷JÌ‚°ZŒ„ÕØr½d”ú‹¤«F/J]ÆF$«>ôºêËÕøúƒ@êÐK¤™×κê£Ñëªq!öUŸá³PêÌ-¢«>~Öëªñ áÃ5 ×¼®ZÕ몶›­ñ*rFæ6ëª#½®Zf’U㉠`&È‘®úøU¯«Ö;@ºj}˜ ©õœ©í¥ì¼;ÒU?ëuÕØë·ïê¨&´tÕ2XIV ù3Uµ|6˜RË׈¤Õú¨˜Sã»U·Ãsjíqj;§}†  jû’¿Ï,{$Êj‰ãHX­?êuÕl@?Þ›YW-=%]µǺjTõ@ À°£d]µy!—aZ®§DŠÄÌj']5sŽ+1›tÕøKŽ³ÜŠ’jë­ ¨™*¤Z{K¤ÅÒÊ×R­Gz]µ5ŽlùºjÜ„Q«5v]µÞÒU·R)iÕra]µv“tÕÇÏz]õѯ«Ö $]5î¸=§Y…uÕú8HW}ôÖëªy]õqN¯«Ö;îuÕz ×Uë #]µÞVgôÂj½$¬Æ‘%¬$aVV·—Ç+«q$’]FýVVëÍ#iõq¤—V£Îi½Wî1K«yiµÞÚ!­ÖcHZm %£&K«E¶ÇÒê.JÖszi5.;ÃÉ‘µÕ¢V-7ŽYµÜ8ÒVëè!m5ŽD~Ñ´é mµ^§×Vëezm5Çû,­¨ÁÒjY~°´Z'’VëüBÒj½w¤­Æ€¼÷™ö^[­ïi«õÍ#qµ>iWë“&uµÎLùÖ+ø9ïÛ©«›A5 nã²U³NœI5ø.àßPi ª#±0²¾UT{Ä—yƒ¸@\˜õkU_-"ÉÓ…Qõq¤GÕòž0ª>Žô¨ZÖ°ìQb†qåÚ2ª†#@y–#£j”LÛ˜Qu€é[™5˜]u¥›i茪±P²iræ‹3ªæÛ.Êj¤¡£Äú2ªF~wß ê„ªÏÆ‰ŽÎÔgBÕ€ùžN#£šPõÑØY5~л+å–X5aœ2õ}Īuw€XõD,xº ­±jœž#Ó¡“Yµ½L¨:eh̪QÛqÕ)PVÁÃÒ1«ÖŸ%Xµ  •±çÀ°Z´ì «eC‚`µì*¬Fh 1ÿ”#¬†ê'¦)ŠcVmá¼¶§žŠYµ6«–î0«–[Ǭй¸ „„Uë‘εC«¡¶ e •VCä µÆtÍö°Z:¬ÆƒûÍ’ ­FئϑÀêÆmö’O3Áj¹>†Õr} «yP«¾‘s p<õÏĪep0«Fn{ˆS‰Ç¬Z;K°Zn±ê®ïÖ{Vm×»¼)à$VÍ×ȨZÞUFÕú£Äªµ‘Xµt•Yµ3æm¶Î°Zô°Z7V¿pÅÛ–ò «õg V¿(ªxoñ8Áj™=¬Ö®¬>=­ÖaÅ´ZžÓjé'Ójmô¸úèñj¹BâÕèmD­ß©ä&^-·œyµ6¯Ös:^Φ’·lØój9ŽxµÞYâÕú±c^-ÃxµÞ<âÕú™d^-O“xõñ³Ä«õHÏ«#=¯>îñj$ XË©Xëã'b­§bb-·”‘µŒ FÖÚ!BÖ2y²ÖÀÈZôÈZ Œ¬µC•3éa²–ÑJÈZî#k9##k=’µIÌZûCÌZó_u0³ÖÆç/ˆ:˜YË8`f-Ñ 3kyš“YË2´–~´ÆüðÔ²}2‚DÊ>l!hm1u@­—™ CÐI¨Ž^Ÿü@$CUü@n”)_UPZëkÉÐZïAåH™ïÁË‘2 ÂÖ|Ã[ó[ËlÍQ„Øðë,ØZôØà7¿uZˆç ‹H«8§ ØÀœâ-Û\›ì@ôH²ÑÆ—Cåþå–q}ÿp l¢‡ÈÙèì@.Œë<]}ÄD$;¾þ™Þ~–ì@¼ KïÎv ò”ÉD~•ì@äI±ˆô•ý@´‘ü@¤¯ì¢}%C=’ AôÈÂá2 WvÑŸ%GýYra Ÿ8‚ð4ºAäÙD~A䮲#ì¢j]Eì¢äÂEØÄD;D– HŠ%ˆ^'Y‚0KÆ8b ‚ ÕFë@nl "—I– r•l "}eK^¾‹%ˆÜ¶Ñ#3‡Ë|$y‚ðWVºãù5• û® èMµÁL%öüZâ×Ö[óåC[{C¿ŠŠ`³VlY_¿>ôüZ×ûį5©†ùµìƿƑ0)X.%äõ½§Ú’è5ºS!·ûd]O¨ÍŸ~ø ‹íç"&œé²AôZ.Aéµõâ^Õîˆ^#i(a::-A+‚ײ@<àµ_Æ0¼>ú#ðÚ>«’Ãëãg=¼>ŽôðZVÉ ¯ý²Šàµ,;^Û@FîË®Áè‚qy;ˆ]ëÀìZ~•е¼W„®õ@!ׄʈ\Ú¼¤OÖ²ä8È51X"×ÊÙA®5™LÉ5!»'׸‚µÅ´çðäú8ò‡Éõ˜…\óå)¹&ZNbkEú•ësoIl-›æ¿\ÓqJ®ýfã®éG\ípM»­¿ \sg\Ó>-‹­•¤ ¸¦ËnM[ñÊ­½nÀak¢Ù‚­i§U±5ÍEŠ­iŠc­5o5³ÔZî7K­åúXjÅ5B!Öwð‘‚­©?B­£Å ÓÚô ÖÜY¦ÖþŒJ­Ý Ek­Ç ´ö×(Zkîê­½œB´Özdf}‡Ïp­µù°¾õ[âöý yÇ…¼â2Ù|yý*I­¥?CjÍy³"µ† ²£§{G`}‡6’¾ÃeðŠÒ ]äÍÆÄòþUAÖþKu kfË‚¬‰œ)²&ÄÅJk»m]ß?_xøË$¡uÅTYærž…ÖrXh]ƒÕé}%Bk{Ü(zy}r°Ö±ÁBkyÐ,´æÔjZËE’ÎÚ:‹êPõ¯¶¶ç©i•ôc™µ^%ɬåWIe-¿:UÖò{¬²®˜WíZQYë’Êš{Â"kýUYƒ_a!÷ÉÀú8’DÖÚH"kÎd‘5ˆdSŸ ¬FYW”­Ìd­‘DÖrHd-WÉ*kýQRYˈc•µ¼;¬²–W€UÖÚYRYk‡HeÍtRTÖÚ[RYëÏ’ÊÚÖÓ©ÄÜ ¤ˆ¬åWó1¼Îä÷­Èâ‰ÐÞgëAûãú»²ííOÿñÿþ3P¤ïi¹ÃŸÓrìÊO‘øßxÒ'Ís>?{?Ó)ShÑa¿Ôù·€ñNùwïƒé» ý/_ÛƒÔꥭ>þöÅw¡ßûþüç_l¾—Åã×<‡Ñþç/=ÿ¸ïã ùЃþ›¿ÿ =Ï*§e›÷Pn9–5–‚uê¿ÿ2\ˆþé¶ÿKÖño¿üúíý7›±ÿñÛ¿|ûåþÃ?ÿò£gzÚ–ˆ} lñòãgJ?q¦7£ö­}ѱÔúá3=Ó—Ì™k¦þ»™19éœé¯3º(éË>ÀPñ8ÐnPãõà »öŽî¸¶/·ƒ4_òlpv}6Ô/~6­?ül¸;žÍÌDÀ•3±- Œi«} Ó¡ž9&¢ë[îóÐoŸïè,¡›Y^Ø2=NòΓ„¿í,/„ é¶rþ,IÏòWÎPoì€äÅo%—¶X (Y+§Àÿ´üëïÿí×_÷Ç?þîOúÓï~-¬ÿû?ÿ¦9üÛå/Ú›lá}Þ¥›Q9xçL'¥Â"²‚ï¼÷¡ß²Ë°Ç–ÿ÷.¼Þ2B3ò6ðÖhtèŠScÓ ä©6¨ßv·GMðÈšdKN8hŽ_}QûuºkÞ8ÿ’âEÐìe÷ZT \µ»ÑX{©R;eÅ×5û¾x„p`ùUâ‘¿»fÄqIÃxòA…­°¼Âãí¶AåíÙ…a÷±áŠ&Mpñ@qÊ MhÛöºjáîíĽLÍÃ`ô±ð³!m¼‹3n.(N¶—ïñúÏ÷¶¥å–oï^"ÊÝ®¢*hlï(¬7K@yÃU¿#¶*NÓ@³`[h×¾ˆÐ ØðésRÜ^ÀÎK9’«ÊmX[9^ô æó5 ð߷ᯃ5Ü÷Ìs(UÓøiïòaB¿]¡R{ˆ6%¹iÙîñ=Q~|Š«(qtqîš–åÖ>+Ub÷6oðî6§Pk/Û½>¢È1N:/³ æP–ξ—«°$صù, A¥µ9”m@º­ÜøWrçth|W°|œÓWq9Î ÍÆš˜/X¦§÷ª£1;ãsL’°Çcî…%䜗ñ9,ø ¦4·©~DÑË3~ßaˆþºyùry÷ñõµ î¬éZQ ªÌÌhlïX&¸è¯`·ÇÆ€­ŽJo„vnMËBðåY€Æ-³–kk™ï%ôƒ‚ÛKP¯`9 [¸3u4&¦™c9A,žF™à´be¼ŽPµð35£½9+ãS²÷¤à劕e('ØBg7+[`ÿÌ¥BB È+Û‘©;¡Ÿ}ágä†2Ý·wkZp•—«Í˜ qY±²|ÎTl+XÆ>­“Q¥¦ð³²DR ^>+Z¶#£“¡±®hY/þð+Z¶0ëqn¿ Šö-#²{1\ŸqdqURð³ÁnH¨£.ÆsZÆ£.p£ê¡—Êç´•§ý·×Øå¡¤Êœ–õRv‚C ﶸhM(Žƒåâ‹oûý±µÝ.€€OÈec¹KûÑöîX¹`ù²dúcwçÊÎ$<¡€–©“Áé–Û«úZä0JG%àŽ,cå”Q—|¼ˆ9]°lãÂ~GQ‘`ËòòàÙø`¹8óñyÁŽ–¡ÇßVÇ)øZef£íÝÁrÅ`Ù¶'Ä\;Xnßbh¿|Rv° ªmú”ìUÛÅcðø13Ô16©ìhÑû˜X¢/K$“2늖­Cˆélù=:”]æG[$[wbGZ¸¶¢eÈê·=Z‚È`ËzóšÈ`MÊò0©êˆ„ö "ƒ,Ë@N—+fƒ»^ñQïæÜ ƒ+ƒ@C—Ç`…È`ÅÊ-±EézðŠÆúôƒ ¼ ¯)ì³UvGW,f[¡2n©£fÉVvKºyÜ™“â1)£Í•½EÛ.€¢#ê‚*”6†]§¬®®Nh!Œ ÒñŽC]°Beëì #»{<È V¨le[£í]‘²ÞS[Ú-Ñ£³ÕK¶´Ë+R–Õd²¥Ý./gGV¬¨K_‡&Р*ã…¶F‰»qWd‘vŸìæd¾ÌâÊý`$;oòdK»¸BeëP†Ìpä%¯*Û‘ÁYì¢ñ]¡²¾Zy'ê¿ZoÌI÷ B¤Ú¥ÿ¼CÀ+jû ”qŸCYºSúwì®¶ÄêµÎ~.wqÖ†ùk)‹ ¬FÄÜד°¶á\ž{©”Xc",Ï’L°ÆÔ[âR!1°nOÒË(ÜÅÀw'à™LÚëVÇ‹ôñá"`F” ›æÆD¬Áå‹E& äxbMÐþB¬5m“ˆu“ÞÙš®•D¬!ÚCý:{¯"ö‰,¡å½½3•‹5´€Å/†™!1k4æ„K©'³Fã ƒÄ Ý<³†þ°`¬uÄM˜{¦‘k4Ú¨]¥_=±†Ò"þU÷ŽˆuJâ;1—D¬±bsó**MÄZS^'±n²ÌXVÙF"Öh´©9̰žˆuË“-¡,Úâ‰5 XwšGnbÝd¢öïfB"Öh (e>*±FcòKÿ@™Ñvd÷ÝkÈVmfYµÛX·ÒJHZëÐ"Õ{Õî"bݤ²óõñÈGZ=5ðqðY‰@>ážnBÖ­1b>»ÆÏ:dF› ga/"Ög²OÁ»nwö§tÄ ¬:lD¬[y)Ô¸,ãW°n:cÁ°n;p™wqÁì¬Ñøäž,Ü=°n~ä“Ë:`ÝjV¡äâøÌNµ˜ä±nZi›;¦]2kÍu%bÝr]=QrÄZ“R‰X£±X‡ì‘Ž ||f§¬²Fc„}Â}Ⱥå³özUã2²nG¶|Ⱥ Ío$ˆ Lî‘uË-EE–4¼¹œ7:f­ù¬Ä¬ÕΔ˜uKz¾qÙÏɬ[2=–EeÜwϬ[êòÝñ'/>‹TgÖZðŒ˜5jCk_Ϭáå™uk$Nì˜õÙxû<1ûà%Ôu›D·3ë6´FM·þßÉ $bU^ñfãbÖmhN^²¯›Yk241ëcØyfÝ2¥[щ¬µN!ëÖˆ¢‰eìwxd­YÔ„¬!é‘uËhF™Â”Od­µéY·¬g¿Ç晵¦X³nG&°ÅÙ!Ǭ[>TqL¡žYk21ëóÈÇçÒµ-ï‡Á3kÍP÷Ìúxg=³ÖLbÖjÐK̺%ÁŒH×ÏJ0ájIK&Zý¡íÝ›·ì›h‘Ç€ÃVkj1ÁjÍ %X­‰¹«5‹˜`õy¤ƒÕj²D°ZÓd VsJ;¡jIY%R­‡E®tèó2‰S·<`W‘8õÙè@µzǨVÿ6Õš´I ZÝÔ<¨Ö_%R­¿J¤Z³$‰Tkfæ$ÕçA‘íù Gª5g•Pµ¦ÂzT­yù„ªÝ{F”ºå9G0¯küêÝ#°Ø ¢Ôú¢¥–—‘ µ¦Ý¤ÖLx‚Ôj HZßc‚Ôš²NZÓÙR«ÅAjÍu#H­¹n©5› õy¤7‘çW‡(µfí¥>–ÇÔgcXN¾þÅð˜úŒŠrâ1ajMö˜úSÙcjµD"L-IÉD©[ò¦s'L­ŸÂÔ:˦ÖTXÂÔ-ÙÑïyL­Ù¥„©5»”0µ$´¥–ìB‚ÔGÛÆ´>/“ µ&X&΄ãLH©õŒRknAêÖ€ŸŸAƒOÚÅþA†à=ëHZôés©5Õ(uûÅ÷‰cÙEúhÛŒZ3D‰QÐ3êó*gzÚ³}§Æµ&Ö£nnƒœ õq<¤>Îé!õÙ½á,nðI;cRŸH©[޹ßåòú©«o(ÕFÑQWôx¥§HÚ««m‚ý«ù¬–ÀFÔÕ<91¬–ù‡a5ÊF»0†Õ(âRPSÝH_ö’aµÔ©eX˜Ìí\1¬– ž «¥úÃj©•İámaIoó¬‘'¶¼¦Æ‹Xµ5B]<=Û˜V# õ¢D¢ÕRk‹hµø¦g»gZÍN «aº0ªÀ3¬¶Æœj{Âê ZçüL_-–W‹aÃê “gtøÛË«_LàÏÖ¢yy5Vw%’X]Ý“Aòü ±ºúÅ×0”Q¤®>½º?k#y³s¯®F_!½†*ˆÔÕÖøØ•,áÁê +Ñ^¹ª7zXmpe[ú&‚ÕðfÿI‚ÕÖxÙÚ†. «ñ³XÝÆ)¾ö´®Ž`ÎcMI°î%ÉIó VãWm\Še‚ÕÇ‘žVì2 ÒØ:zZmƒë¾ì;ôŒd ¢ÕHûÀ%‚ÕpǼ ÕX`5~µØvOQs‡ÕÉ)Ï#BÕ0`y1 fB¨³w±'» £j{ßì_W›¤G£CÕÖfK$A¨Ú1êi#TE0²6&õ"T}t–ª¸ÃBÐùTMªFã6Q? ê£Ñ£jÌÙ¦œI½U를ºùåXD=‚–WãµÃ¶ÂˆDY^ žçÙÉ2$¯Ö·™äÕ0dSßB"Õ˜•¼ç¯zuµ˜‰²º6÷,©§'ÕøU[Ç-Í)‘j¼®ºÑW"Õh¬6aû-VWëä2ÕÕ˜wí÷–¨’HµÎDª1#…²w3‰TcdEÔ]TÍ“êæQdËæ0^c"ÕW³ŒÛz"ÕxcÙy$„ª­?¶"ˆ dªÖ/©«_䑨MÆL¬®Æ«0ÖKƒ){T þ„]Â<ù®GÕ˜%¦yÕÕX½I¯0ëíØÎ!T}ÁðÎ ðªÆ<ñ@T5Ú<©,KÆH5˜œIH˜Tcâ…Lo Œ‰T£?sêuu{g‘à67¦<©ÆUîJŒ ª¯vÅi³Ñ¹ï4¸q¯M4Õ8òÉ[,Ðiµ~ˆV7ŸcTF”— ÁÑtN´ÇA·Q':v´F\¶À{Ÿû­Ö‘A´úè§Õ8Ò.ÔBš4Ð%ï3‰ ÓjiŸ¨•hF´ú†ƒæÈ""Z­ç$^ý¡qëãgÉϱÛ= !#kܾ™iG¸ú¸ «Å£šq5?H‚Õú2¬Ö‡åaµŒ†Õ‘¬æàA`5%«–O˪F/«>=¬–Áj¹z†ÕÇzb-=k‰ó˜XK8ËÄG ‡o'#kÄÙú;1!kù0²–ï##kq0gd-1"#k4âou‚g¬eBfd}œÓ#k™­YòÈú¸CÞ8Ðá­…€5¾\Ñ'Ï%`ּ͋N˜ÀZßÖ2y °†º­¹F†™kžŽXëh%`}ôÇk} XË8gb­çôÄZ_s"Öz%L¬µ‘ˆµ zìŸ .ÏÀšgdÖ2pXót-Äš« ²æhF˜µ=êˆ7}ª®‰Yë "f-…˜õÄ­•/ÐZFCkéCë³±>ÆCk¶h}c‡qi …ZËH'l­WHØZ†ckm$l-ƒ™±5³ÁÖg£ÃÖ¼®lÍ»`k™x[ËW„±5ÓÁÖÑ ¶¶Ñ߉OصêÊHl-ó2sk^ ¸Æ»ÿ”iËàZ>y ®9üp-7ˆÁµLÛ%ú¸ëg¡õµ õmÿþ7kLTWŸ'Nb ¬vâãö©‘k_̈‰µl7ÄÚ». ±VœMÄšö¢X[Œ„S̼ZÖ\ë\€5‹`-5 »yf¯VºN¼ÚnLO®ú XÛ|T`Ãò~Wë)‰W£Œ "†¡6b^}6n^J%ikÚ™W,.V®™k‹ií½[ÊkâÕ°vEÒLZ"êΫ & { Ǫ—ÄÕÚæÕ˜©±U?‰«õžzq5NYíc•´ÕGw¼¶úh$O;ãS>»œ·Í‹«õBH\ý¡ÑñjVÌ«µ‘xµ s[€Û”3È2ñjÄl;åRxµ¼=Ì«åía`­¬õœ¬í/ÞÙ‚µ^ŠÖú«¬›áøÊªbm¿êS‹™XË=˜ÄE#¬_õ#³¶ÁnûÜ¥OÌÚÖ1¶ù¢7³~`&´¼˜Y믳Ööe¾|’33k;Òå› ³¶ùÀ¶âýĬ+–WaŒf-?ËÌZî3k»N8½cÂ"y5.% ‡kÈÆð`fmýAfSŸfÖÚY/¯Öï=É«¯fHá\ЉYÃt¿Ö™‚ÂÈi @Ï×'d­„¬åJYë‘Ĭ¥?„¬5µølèoYë) Yë‘„¬å™²–¾2³æ¾2²Ö YËÈZ>;Œ¬õ÷YËäÊÐZ>­õ”­µ‘ µ|ÌZë) ZË׌¡µ42´ÖF‚ÖÒ[†ÖÒ[†Ör÷Zk#Akù¼2¸Ö¸–σkm$t-_F×Ú[®ù£/èštÍýtÍ—¹uÖú‹¤³Ö_$5%]ó‚‰Ñ5‡‚®õW ]‹¬“Ñ5+7™\ëD®yx¹–[Àäš_ !×ú³D®yx¹æ×@ȵu(·|Ãĉ\s@$äZ/…ȵ ;&×ÚHäÚz‹,à‘íÌäZF³kž^…]s¤-ìZ®d²k¹ãÌ®µ‘صÜqf×zž]kOˆ]ë5»–çÈìZ³kí,±kidv-÷€Ù5‡á®µ‘àµ\ Ãk='Áë6’ÛR…áµ f†×rã^ËÛÅðZfB†×ú³^KÃk.$ðZ¤ÚÄ®et-v-’Ùµ¼sÌ®µ‘صŒ,b×Úb×ú«Ä®õHb×2<˜]ˑ̮¥³Ì®åíbv-½evÍËa×2+1»>_0ó9‰]ëu»–gÍìZ^ f×gãë#fnôìZ3³k¹µÌ®åJ˜]˳.Q_¸Ÿ3¹èú7™Y#wßÞ¿f uÀkU{x YªØÉ™d{Ãë»…´vŸÇö1ÁkøC—`¿ê´Ø ^£Ö዆±gÀfÖÌcœE™YÛúËB¦ÐîÉ^Ã<ÚîgžµÅÙÍÚ–u¶æº¬?ܬÔuÎuäf]°ZƒÜꢽ›õáíݬ ³Ã{gä’“µ­àQ¸iª¡ÈÎåGß%!3ë‚'ùb3kä.>uÚ‹³—5ŠnV»þ¡3$/ë )2¶OvÝL§‡…Toó^ÖomD6ð¿äeÒ„M¤ýÉË:ƒ`ìŒøåeÉ8ìýõ²}5{lg}ü¬·³N0%¿÷®ÙYë9ÉÎ:!–¨5]c+Žü¬†Ã3 ñXoS"QwlF‘ÜE¿‘ÂF_In3Ú-ž¶,·Æ‘Mm=rrHn;93šþ è¾õû±ÓzôÖøÙ sè)’ÞG[…ÙèܬqÊ®§‰w³NÒêrYc7k˜ì£rÁÜÿ$½µ^æÔ[ãtH·ã3HnÖhœïü„]7Û~ä¶åSn}<of-˜¼¬µÍ[YñVÖ8©Ïy|°ÉÊZKØZ‡#‰­ñ ÀbVf +ë¶Hy{+k¬ìd¯ë»·8ÙÊj-ùlš¬¬íÅBÂñÜg!+k½de}tÖ[Yk¹²²ÎÈ»² úß;²²ÎèúãìOœ•5VȘ¦3de¡Ò†Ý4¤öVÖÖY¬5Öî YYÛe¢ÐÌ,&Ò¬1öñy|&œöNÖ:Ë‘“µÎrädŽÀÊ|âUïd­“9Yã”,ÜONÖøÕëÙt•œ¬±ø„³ö ÜÉÉ“wFp9Þbr²–—‡Œ¬í”!…% k½äc3¢Nöµ·µGò±>~ÖûXcò(q§I’µNddd­+YëÍ##k<OÌ]2²Ö;ë¬ñáùlmåGNÖ8%L †ÂŠŒ¬ežg#k™–â75ï=¬¸ÆAÎE‘×øç&,Qõv²`À7ðƒâZ?æìd¡Ór•&^÷NÖG£w²>®Ä;YhÜNÖòc'k ÙØÉZ†;YËÀb'kyŸÙÉ“Ì.F‚;Y½õNÖac} ±9YÛ‘·ó½d'뛀ê@ºw²–)’¬åƒGNÖÚFVÖò a+k|BRZ€ìe-_çéeÍs[Y#†;Öð’“µLÙìdÈ’Çw˱—“uFù&{#¥‘¬Úm´`ì´ѹƒ¬å-'#k™:ØÉZ¾»ìd‘|½KXÂNÖxüµ:Fì¬1%OÄéâ¬Å_yC@r²–Y¬;êÞ¢"'k}×ÉÉZf{¶²>:ä­¬uÌ‘•5ÆÈ}m;oe­/,yYã”( 39yYË×™½¬ñf½ÏÞ˜^Ö²þd/kYر™µNXdf-+;2³–ðƒÍ¬e™ÍfÖ8¥}«§¶ˆÍ¬3Ò(Ó®M@fÖ¶»àB·È6³j€ùà̬ 7káìfͨͬõæ‘™õñ«ÞÌç/\‡U™YãÈËÖ%SÃLfÖ\ñéyÈlf‘-úns+2³Æe%X'OÙ´7³ÆJ7ÝËfŒÍ¬[ýÞ{ùL’™5j½Yü=mÂÉË:ã±×k. ÙË}µeaÆpñfÖ¡Ü3+‚4/댼‰2= Yÿ­>!öñ„‡#Œ»?©®ßj‘á,2ǪkÌîW»øÞèT×(WìF <ÄEacYÅòØ&/’½ØËF›Š0b%›ó=qÆ{áíJܺfUGŸ© cA‡½@Ö*ŒáÅÄ6zËUQÎÞÇ ô¸ #ê&<ìsÆŒ;¹Ñ,—aDÁ¢œK†T‡.+ΓÑ5"åí$èb›M®ëtµ+/(üK-»†ŠÑ¾~Ó…|±k î{’2º†1‹M¥i\ £k…Ì„®# q¶%á2Œ~bt­„€ÑµbF×ÈÃ)ÛxQË0Ú[ð¬M-Ã$ýL ªÂ± qÌã2Œ¨Sêìйc@–É–i‡Ôa¼ê³í¸£Ôôâ:ŒxG- {OÙu+1æ|½¹ £ ±„‰þƒê5ØRÙ5L¹#Ì ì‰,¨ME V÷Û`šŠ0âéØÝ™’‹0Úá=‹¹£T`ã"Œ ÙµyÙáÌ"ŒÑ^¡»}wû–*ŒT~¸5Ž„ùƪ‰è¹5ªYÙx\û~ž[k1âÖ¸Àà¶ýˆ[#ôÌ».,ak—ȞВ°uDízggLØÚ.${[ÂÖ(Ú®¸Œ [ãÈU~Ñá2Œ\XFÊ0r)ÈoÒ¥&Õö௠W÷¸–ú ®íH›Í×ú€Á5êT&ôvŒ×Go=¸†ÃfÈÉÊ·–ŠMÌ­1·Ù]]îåÄ­åG [KÝ•…­Q¤ÂV&OުꭑՅº»®Æ­µ(&qkÌÞÈ(|ÞÃÐ>©‘cê pïjê–éçáɵ5>/¢«é’âÉ5¾&v‰3~crm—ŸX“Ää6oCä:à#—{ "רQóþ<&3"×vª÷Ù!t/-âÛ: -ztàjÏyJœ=¹>:ëÉ5JïÀBä¾–È&×7>Ù(tƒ1¨ÁˆU dEËöÚ£ëVÇe1º¦GEອq×>X…`h¬=î)Ôöàu°¿¦­·× ­øK¥·\#G7¡ÍÜ ðàZž1ƒëæ¡ÿ¬"Æ ®›Ynœ©Ì­aìŒé0M²çÖ­$„Ó˜ÃskT¨XÜÔCÜ£6¦V­<âÖ0·îl‰[7%!  –AÜúÂÒ±ìzhž[7m^Úψ[£³¨?÷žÞÖw“ înÆÖH"Ë»0ýÄÖÍž9;ý¶çÖ0ƒM¸nIÈ(þ2k:zp}\ƒ×àéOGM×X ^IÜ~WÖ—<«™·†©=×Y†Á5’ta™¦ýŠ×p!Ï(.6•Ï\ã=ƒ#AeH \_X¬bŸ`Ò#pÝò”í#0Š¢0¸nbbLeïbÓ\Û‘ö]\Æ¡ ®A‰.ûN–éâÁõÑ[®›7¥©fcpm·/×½ôgp TÔòƒë&§Ýœ®1è0$ß÷¸nV*%Ë`טPQ‘çšÆ\[gb…7¸nìï ÷\·ZøFN³×Í]Ÿå{ \ãÛÃv›÷àºÉ1{<Àu3a«‹ì0¸nÎ-“g¸n.¸»dƒk»|XÃ+¸5¸nv[v`Ϋqƒë£Ñƒkë{žV!w3ôE¹ñ¹nv÷–«¶'×ð„bm5mnÚ¾¬Ë垸5ò¯p#ù³7znÝì¦p!ÓüÚsk®6ï`-ëÍ­­±jÏÍ8âÖø[ô5å!Ä­q=é0Œ³\Û•T¼…ÓL‡Àõ……×&C ®[¦í»K|–ñ]€©?¦Ÿ3 ù}«ÄˆÅÄ·`_aûO ýƒŒ?ö‹öö§ÿø‡ÿBnúýFšîðç4Ý»òSXþ7žÔ®ïWwÍ_qÊqí`ˆ~»ãßýìˆá×ùû_¾¶óáÎçüÅ׿xû“}1©öݸ¯¹÷ãìýÏ_zþ1Äç`ÿpîþk¿ÿ }€‹}ˆšÃ6DRÔhQ…ÍlÖÿþË0"ú§ûÛ?aûõÛ/¿~û_ÿíúÇoÿòí—ÿùÿüËž¦©ð0MW,‡ÿÊyî´Ndýúí§Ê¨Äh ½`‘ÀŸ*驾lÊø’iJ^Ó¯Ÿ³Û ý²oReP@ ©Ã´ÔøEýq ž£;´úÒñ]®Ø—<œ]Ÿ u㋟Më?î·gó“kª¶¾„ºØÝµ_¼JÍ­®Ö1]ßòON®ršðŽåû=Ïòγ„¿ñ4/¶ lRÃUÿÂi’žæ¯œâm¿ZC´%Š­zA .¨¢õøŸv‚ýý¿ýúëïþøÇßýéOúݯÿùÇÿüßÿù7Mãß~,c‘ç3Ð[ùµÝÀöZ?Î4ïïx¦Ü-£ ôÂÎ/Ô\i-¥r¸|ö H‰­-†–"‡à„µÆvòrÆ~ÛÄ*d[ç历rÈN.‘ ´ÈË1?£¢ü½ØðsvkÕËEÑmgïŸãåt Àë¨{4HEƆøB²ØAÆd€¾Œ=ïÛ­ïoX㿫䘜¡•ã}lå\Ç9‹“´ZK[5lÓ…“0[ÊØË^ TPW¶Q°5‚s {ßÿÅ.V]û26®ÜÎ~3‡«K“Sq›÷XÆ;ñKÆÆïæŽÖmõ;6frOíškXìEյ헡d›ô[.ã5Û*~北\u숰7¦¶]Ž*îöàã¶ît;â ì6ìMo!17uÆiÝík·þ-ùÍÈü\D®•©+«ò[Æv§‡n7’oÇ-Å®æÂjØ$Fº½1»=æVÜ./ô}¢ŒÔ× Ç CGJïìOu;ÅÀàºÏ¸¥öµÙ›Á7²6Ã*,ŸÛíø³·géöt!Jªí[÷d·wÛ ß¦FÕzcáíÙ€-æ‡26ôö¬ªù`Ón‘#í+öå$÷ªdh˜Û+Å>šÚçéÚø\“Û Ⱦ…t¬Œ#‹ÛòÜŸkõ»šJ­•fm/‡Û¸ÄÚÐYRØô´÷&¶8`ßÁ¢ý¿ýˆ:Zu‡î«-\|E]ƒ–3v¥v4…míwöÍØxZû„ö!¶ÐÂîÖh{ýV`ª5 ì\àò½7û®¶1ÝH‹½-´Ÿ÷„TÀQ÷žŽ‚7~oÌ¡Ôvºð»4Ó±9!™Áo¯¡(;´ôãœØ0XSr³ÐA÷º(¬`ïeï’ñÖdÁöÊš’[IR8c—½`«ÒOÉþùäëï=-7ä_g^ð/w5 ŽÂ¡….Û“”£pXu`šKŠÂ¡=†rdxä0œeü…gì™»àÎGáÉÚ¼¶*zpž!1¨«&2‡áq^i™œpž`ɱ7„8 Ç$÷`§0­iiéÃpä¤u­d8Ãðˆ„†¼¾Æ†ã… ò=Æc¡òÞÏS®+# ‘ë—Ü<@A¸5¦œ—uGáxñmξêb) ·#ï×gœ£pÉÙcޏÓr›®C$¹0\ù8 GºìòŒù( ·N"³aš pŽ„´+L£ŽÂ±¶{0鈑¢ph¹ãÎæ(< #²nâDQ8Òø¬ s<£p;òF`”Þ4é͏[|Œ¢ð€Ä˶Ü,gÞÐN¤ç(üøY…£ÑQŽÂµ·‡ãCt•%Âæ8rXÅŒíBŽÆ±L-aq Ç#¾Œ˜¤ï÷ Ç‘“ØA¯4\¶`8G~!fŒ±eÆáx@bp^V1Žäöír@Ž#+3<;Žá8òì·6A÷Ḡ ÇCm1™ýýC8Ž…l²‰+)Ç=EÂ{Èh8Ä“Y™Ãqd;\(ž1…㸸¬aœÁá8¨¤³Kçp<‚ÖÙ²z|™8ÇÀ‚`||î9øm/hÇÁ\ ”Ǩ£pïHË®‹âñ€¥|\éãœÎf›r»”ìŠøq@nìqEb8 Ç,R̹5†ú®È1qEkžû"ãuÏqeˆp@®çô¹°.Èaƒp#Y6nÎW÷• %Gâ(¨¥Ýú#ˆ¸¯7Añ–5‡érFÆß0ðÉ"ñ£iZp ùStŸ|Ä#Š. + Äáf€Ò¿ƒS Ž!ñîÙY"qÅÏгá3‡â@ºåq?K¡8»0I(Ž .$a„<ÜfŽˆbJ9Ö¡¸}•Qctª(‡n@<(„Cq¤'¬r‰ÛÝ»Z¾Ä‡@ÞæÈg™>GtãJ§å@/¼+ĸÄoˆc¢ÅþÅ;å'=Ç'ÄUVä@жâ²Ò‰Ã1\yLÄ1qC4pâ*vbnwûXeÜn đؤƘ„‡Û e%¤G&} ëŒï‰‡_ð×HÓc™qè@Þk¥Ïˆ, k¼ç…)˜› žT9qQP Ž#ÂÏý- ÄÁ<ñ«#N„)ÈIGÎ|9#q`?2ÈS#BºÞé“' áò!îwèR,@Yf‰K6:q{³†áÒ‰KîïˆÄa—„8sÞoâðªÙNZ‰£nØÜÄUâCqݦP¼ÕÐÝwŠƒMÅwÍ­Šsò.GâÇ>Çf4D†%}ˆÄ‘! ™2È'Eâ­Àœß3o;ª¹%Ê8R«®eeÈq8² mÁ±‚æáBL)Çvöó®-*ŽÃ[Á×7Mã3ŽÃíH{\öáŸäÛÇáØr·I`înr.¡‡+õcU ÓGåcp½ ŠÁ‘@÷àë|ŽÑ ±ßQ‚KÎ…àW³ÂÙá0 R°½|3¡·ÁY: !8.íÅþþ@Å‚KʪR"Llê’Q.²L!âpÚCö`ù¤H±fÍ)ã`"Þl‹ìœï‡ü8’ˆ8 ¤kƒvR¤pž¬0qh¸m›V¤ ;òã'E |@ Ú¿åÂÄ1ŽvÜ!LÙôï Š"žaölçŽ3q‹à±<t…‰ÃûáE²Ð‚ÛœPµiäR —îØ@Æ¿‡[—âüÝKÙHôÍŽ€>ú¶ÿA¶4>pùˆ¾!¸°÷äù, ÏxÃlˆ„A²(OÀcëmæ§°aVïÁ,)×}hÖ…CöjOîž«KÆô¡ ¨-{p"pÝf]¸Í8@½Sôɺð ›å )¿a‚ ™yN㛆­¬<Ô‹éˆÃQúl›%s¯˜ÍÄq‡Xn׉­±E*Êp˜}àWʇ©Ïð`ž]„á7çTû7lsSžBp(Ê-È[aÔ†CLü.»"†³Þ…áx5}V¤ hx1ÌÂË–£èÂùÊ8í9×3’,œ¥‚CgYZ)à \5ìcʰÓ?ãeæøfŒÐŠŽ]†S‚ÅßHEx°ðÔñ7lܰ±:±4ÅßvÎ÷áÙèpQ JΛÖ€‹A à²)˸øQp.M ÀÅÿ€p}%GÎÑ0àbãÁ8û;p~µhóžZ{ ÀåËÌ8rsa­wÝpøÚ4eÙû!åuáCŽÀá`—dO"pž$‡åFKxÂ!¸„D>a·o7oT•‘\OéCp܃‚[;D0‚K8·Ž%óø¼HɤáñSp+Ÿ'§ü!ˆª‡’àvÅ/f²”«ÅF®R ^<îm ŽÁE!0ƒpY`p.z ÂQÌ·.A ÂQzú„sáˆ_`»>pðVH0íí U…û5±á"QYJ}v1 ÂÙrF‚p~›%g“ Sî—g„# 2î=•…_ º%A8ÛîHž!EX¨J‚pøãÝËbO‚p„iY?JÎÞ2„CRÐÞ—18G¬KP÷g\%áluÓu) R‹‡óºsK,Žøzh+~›.¥`Yº¼%Ióþ>¦¸OIšp)C·XÜæ {h«FÇâð€*ÏNëâXîE©¾Ôá7¶]-ˆŸŸvŠÅ[¥îUbHBñܬ®V~"‡â¼XãP¿Zñµ½>Àp|Q6nࡸ~’¢ÉãV"q‰T)G,–vEŽÄ!VÄfÈ= !EâmC"·qƒ(GNTÝu 8¿ah—9J!pŽf‚Ø¥N8!9šld7CqE¤Š#…"¾Iô}(~7¨ûSÏâCqIw’M΢âXA¡-×z”“49•L’4¥C‹ÌQ¢q ¸%÷HŽÆNb ªùƒ:\>6·Öe¹ýJ4.a%EãXX¼¨/1¶Å(—Dã’ÿLÑ8GŒK+ãønÛ…”0ÑM¾ÖöºÄâì|űø-à½)±8»bÍXÜ‚I$ 8Ä¯ï© ×å4Gâ7|9_»´á­ì²\Þª¹‰Äñù+ÏR*øHâIJô~HÑ?ƒ@h8rR ŽŽcWâG¼ L±m…ã|qËË(‡Mj²A:†7ÇàÖö`'cDKƒL ü…GbÎKÂë#ðÁc¨koŸp8Ö[0Þo Àµ£>þ~a|n3ÜâØ.üÆÅ¿p˜ª¿Qã[‹éCüýfØ l±¹ ¿mà%¤Û,*í¢ïï˜}l'©ïÁ· ß7ïÝqн߿ògžˆ½&y·¼®²¶¯(ð¶ÓÕ Ž¡C_ããn\WB6ÐX"ú°[»é£n¼ØqãI¾_¸X_;»ÕÇÜ/6©ßõÅ¢û…ó¦-I¯|*Oôú|ÀmmWãuc±êãmkËM—>"Fn£ z•´P´mÓOÆòx€G ¶qÜkËü¹ ÷±6 “\Ä#Ð~±(ÜÕ(ζ_¼ïEa¶÷ ÀaEÙ/äƒ÷ÎUðQö ªö^ÓÇ”@7+q¢W÷<ÿAûïßR‚“‰­óÚŸþÐþš*­ýOwÁOù;;Í«ûÕ]ñßû” Øö ])ûaÿÝ{€j1³óÏ_z~䞮̿|eÆmïO~ý%4G½;ÿt.Åo~½óÏ_zþ~ÛGæ_>ôàøj…Ô‚ƒdkô+#€´É¡Ì­×_hÒu›- y~üL‡Õßœ©U(hµYì^üð™Ê—™ î¹rOÐç!3'寜+ÝU†íwùeŸ†ØÒ–gu‚Ú¾¦7ÀÆQ-S{ãÛ¾t(\{Þþ’ç‚sËs¡N|ísi½áç½ùô\~`N-m=oá$0ÑÂL[±GØHÿ—Z òi‚Åž¨±ôø/µ”ÓØêñ»­`^[V|¸š?ã%øü(!¾ciñË¿»³þlðÿÂóíŸÜ9]oü?iæÝ ‹ŽÛ$& 1ÜŽ®Dûÿcbÿ?ÿÇÿõÿ?ÿïѤCÕÑŸ¿r$TÈ€²uÖkÈy± ç.ÍêðÿëV‡å‹¬qXj„uJsÊ%uŽI8Ú*=£«¡Ž`c'NÁê…Âá!Z€@EÁ怶wnÈ;ÖÁðF%‘§é!¯Ù´а„:> Vw©WDéšç}R[©£íÁʨ-¿¡vJ>ÆžÄZýøQu‹ê“íÇï¾ÐyÊ¿ìß~k˜ý_òhBYËËQ+ éģܛݜFù¯XÊ7ÓÚµ ƒE H1ƒ¨åªwÁÛ±>mñàZòŒ Ô[µ Ð¿=¥ÜÚÒ ¯ÀXGÛÚ@Å3¤Új¶—lÝîÖ•Œº‹ …ÜÀâÕjé¡t›«å 0‚e>6QóÍY‘„ŒœÈñiuÇc&4½)¨Ý¶¦¨í³n@õJlDØûö-6èŽhªíËÚÛ•,{•½%ç¶'ÿ=b¤<öOcKÖl)­+xs kÄôÿúN«ñ‚§¦l %ì*–hCÙª”µ ö<°ma³’=œø|cŸ¢µ¡ÀvîãUï×–ö½öŸÝRÌ<íì8 >—o¸ÒhBKxc|‹­^¢ ¯úéÊÞ숭BŸ=ÓÒŸ[AÊ8{kÃÆùú%Ô½‚Â’¹÷¸Yos¾hC çsÖhk6,ö¾¡¬d#T=³ÙÆ]+^Ù‡¬]ƒfmÛŠ¨‘ –ÙÆ,RW›ëF¿²-í|¯ oÔx,Ý©±ÿfü>ÞðÞášv9fs“kØ*°ßä·}éÆ˜ÍM4n—Ò³Õc67·Ó’J#Ûö ›…Z³9'õZÇ¥½oÕ*/>óÛç,‹Y=MíYh o@ULÜð„y3ßÒªSŽIetžüÖfÓO“¬5!—|üÁÝ{}À–Ë—¨mÎÏyN²¹ ÅŸwöîÊ”XÞÃUÆ¡š}æ‹IÖ:Ý73BŽüàf[hg‡›iæ$›]-›Þ¶ÊRâ¸ô¢âT{mbÓ:ÎI¶`g¦ù=ÄÖÖRßÇ€--MbxBAì8ŠJà†ØŸ²'Ô¼FIÊ>`KKó˜»Æ(y½6á>´Õ9Åãa_È.ãêÞŸCWì/öd®~ö}ç$‹C«þØÂ‰Ø=‡Ç€-ÍÆ¢6é¡Ú5ŠQޏ¼oÓŒ6lëÀ 4·†+¡vIs’«¶  tÔÁŸ¥ÞfqVŒ]ЄJ׫%Úì²_TEhm9ÂsÈb ›g4­Ð£•íNÁí)À¿ó™slÁž»ÝÍ^v°Ó3æØÒÖáÒ¯¼"ÀÛCvÍ/h{[ Ê>dKólEõœ4ÚÖîb{m"Ǿ>ÚàEýÎ9¶´RqqÞ½ ø¬Õ[ +çÕ­ã c–a'b|ÇPÜzíJ¢6êS×þ0ÚP|rŒØ–b”C¯Ë[vÙœb [TšRÿMl‚)M!N#‘Ø*ÜÎ)–&ŒØòzç ;J¸¶S÷¶wűò°‘fºâX´!…*æÞK ¯Òœa 0®㢦µMÈsÀb7Þ"›ØB6”´¾w Ûï0+@Û»#Y{±‚ý«Øvñ”^ŸV¿Êw´ÕÆÚÇ<ŽJ|­ ûzc†Å79Á#­)zZX´ÂØÚŠIçz_½›ÏÞ'Æ*…å…Ж+Œ­@ÊB¡2ÚÖòÑ9œaíòª}!Jß FD’VûÀ¦…mo06(;gØõKW¸Ý¾ô+Œ…äòØöLÛ g…±ˆ¡ß™¸Ó‚£Å¢ø1*vÓA«¬$C·¬ý'›v}ŽWž5luµŠJbTVg#ŠBÛ»¢ØÒ\˜séþ³ø ?3ŠÅ¬ñ,bû¯(¶´$ÉÇ‚ˆ~WÞVPrŒÙŒ×LGÇÖ.¸N`8ûŠbefîûÇÅ÷|M±4¯ÙßÖæy‹ fÀ„¶ÔJIŽ–çµv[Ý —’ûíïÑ b‹[Q ©´M÷þí X<ÅUGRæ‹ÒVvs†…hºú’M«†d‹‘ìó=ÒV0dž¶R¹“‚ xuY!¬<5hW+“¶-­Âí&X\tîvÜ×í&XË=øÔäÜaɇQ¸ºøö‚ åzúùò~&ÇùÊ–èGâyZåÈ>`Û š06޶:£Ø§y#¼c„7·ì(>ËØàý°·) ú€}|-+\¥;ŒmžêX•µGTá x¹À]žµÕ'3}ÅÐX“,Ôp6¿õ5&ºäÃXGjj…ÆÞÄhãTÐñæ=d}¼fm³6š~jiÕ"×¥¶gØííQÍÛçíZ7©8ÚÞ­yÐ/q…nf…±ü·µÕ³ÃXŽüß–¸­#¡žða¬¿ePOø0Öyé¢íÝa,¯ P&»YŃŸ^¥ /ï06}ïI¼}°¿e׈ÄÛ›žˆêÊý¸§)0Æ,‹Ü{EÆê¥%1ÌY¶îu$šì¯;Ž}šU›}•Ûí|›œwGnéÈü¿4¿ÏÔþ!Ú°ÜÊ;2x¯ÀýýÖ5øO+`ú;Ûi éE¾N÷f>vÛ° Xu%ôß½ˆž±õ 75a#檈èmo­OôÂI+ÿúöŸt¨·®µ{dŸå¶€÷¨÷ÿçí\v6ɱª=ï«x‡0 çÉ ãš!Ô ‘ø«Õjqÿú÷ò)ö³âkº2K•AV:ã ‡Ãao?^^{+†XW³ éUŽÝ˜Ú'íæ~Œô*[prEêÝŠ]†ÒßVx”Q¯K^Þî‚õ–ÄÆ=ú0Ö«ÔÌ5ô!¬W)˜÷ÇɬWI¤[0Ö«ßlPËëUÂëK~¼ËYËëÕo^GÜïºêó=¬·dØNø2³^eÂ>çè·Õ´¬w+çÛŽ½E`½[É9Ã\urëúߟDíõ*—÷’d†½ºd*[(¥!3ëÕeÓ¡ý–Ú2ë-wŠñ¦! °Þ­8CDD»nõ7Ö»W̬ؑw+¦š"[åë-IÎﳟ’ëUC®J…Såg`½[Ihý¤õ…Ìz•m| hÆzÕÈ1éžZ|Œõêº#õÌzËuEX¹f½ºNÞÌý³Ë¬·\'Ë¡µ¢×˜é²UI«Êʨ·ô…ˆò;ͨ·ÜîÔÿ7êÝJ’£©§³Ê¨·&h¿š¤w+öØleÅÒ[jyÅŸµ¯dÒ[>žUš`¤w+.ßÅϽBϪäR~ù„Ó3é-W$,žI¯·G&½[e-{Gæ™ô–®—v.é}]–H¯n+VÌõãɤWe÷±¬Í夷| ÇpWé-×­QV|AzuÝ¡ Có]Ée"½ÞÈ™ôú™Io¹.Æ!ùz}ŒôúóeÒ«Þ³•,'Ò«ëöã5¡^]# ö ˸‘Y¯_–P¯¿…„zõpɨ·\¶éhOýä2ê-×)åtµŠëµ—Ö[^ªÅP›õ–+¦¾Õ Ò[.Ð{¸BM¤·|ˆm²0Ð[mù–Ö³N½z®Yîse ôÚcôÚgЫët¸±ºôúÃ%Îk_8¯ß.s^ëÊà¼eœyLÀym’ç}Ý/q^¼ÌyËuzC÷Õaîà¼þò2èÕuÑÊË\ízËý¶Xì ? ×Z3s^¿Ày9Eó–ï;þfª0À¼e(}’>4¥ª=UƼ6…óÚÀÌkõË”·4FŒÑ+òÚÊ«Ë"øP棎ræ--õ¤Uæ-µœæ~$˜·|kw å-·ÓŸº‘ Ê[ªy•=‹åõ§Ë”×?LyKx¥Ã1ëT1i¢¼*;÷ˆE«A.(¯bõ—ù¸:Ê”×?«ŒyËuÚ­FYà¼å~ò;œv缯fIœ×{y&½ºÖªƒØ&ÒëÕ̤÷u]%½ºd{væAzK”'/æº2é-Ý2Ö¥Éôz+gÒëŸA"½þÔ™ôúœI¯Mà ½Ö/3è-AÀÑ!ë$Ðk‹€ÞÒb{„µgÅ–ô–þ¼ &Õ¾—A¯äôúãeÒûº_"½¯ëé}]—H¯±™ôúTHïëv‰ôúÛˤכ3“^ŸA2éõI·©ñí}gÒëãWF½^ÃŒzýÅeÔë ™P¯7HF½>~eÔûz°Äz_·K°·Ì:WÌ” ôdØë¿™a¯Oöúý2ìµÉ'³^o™õú°žY¯¿…Ìz½ŸdÖ[Æ6Llu0ʬז•™õúX”Y¯w–Ìz}(ͬ×;Kf½þxõ–NgÖÆeëõ—YïëºÄz=ÚȬׇçÄz½E2ëõ—Yïëv‰õ¾n—Xo¹ßc$ ÖëžY¯ß/³^ï(™õúý2ë}Ý/±^s™õz´—YoY^Ége.ú"°^[Úƒõz›%Öë‘sf½Þ×3ë岇êwŽT@½å²GDXôóœ¦Lîc¾šs«ä%îU–G¼Dqo„‚{,ïïùÍ|‹ · î—¸WàóéUÈ|ù6È|çr̽;$’ù*“’ œ Þfè«,Eñ‹1 V œ¡¯œžTÒ„¾%iR<^Ýf"ôTb¼ª2_%O{tÙd¾6E˜]7‹Œù’Xƒùb½bÌãÕ‹ùŽiîÅ|­*`¾Z•ÆJ¡îwóÝ”6})y–^ÌWÆWÓÐ<’ùJêªJc¾Ú Ž•éñ’÷–ª Cg#¾ÚÒÚ{’ C¾:¾vC¾üI_î6øÊÂ%:ùQEÖ ¾ºu¼¸ê¥fÈWR=ù!ÖQŠÈW&•#±¡!_üÛ¶.H'ò•Œ/Æš­jê;òÕö_Ä*UdaÈ×®òݤvŽh¯×ùÆoîGŒôuP ò•4.Ú®æÆ"òÅÂÜ/÷Dˆ|µ™³ÒÖÊ€|íñ€|Ùˆ|7WïQ¾|Û¾|Û¾‹Fˆ‘®‚ÀW§JŠÅôò¾Êé+­z;àec=çÄׯËÄWe÷ÜDi$¾ªæ~7% €¯]”y¯Šj²åã¼w)¾MµBÜ«êG¤ÑÀ½l’Á{ùľ¬oÌß׬„±õ² |ý'3ð¶“•P=ÓàË“@¾*›c1Rs×øúCgàËwFâ«ëÎé'gâkâëe™ø.¬Ìò˜é4ø!¾2‹;w} ˆ¯µ4ˆ/º ¯’ÒNwl Áf૲'Ç4¯5 €¯áÉwàë?™‰¯_–‰ï»ìA¾Öb@¾²šY”!§ 7*òÕ_ʤ·)|‹9Ú<Ä^@¾ÖV@¾QvŸÛ#!ÎÌW×=Ç;À|­=À|…AËІƒùÊlyGÍÈ|ýñ2ó•»Î"Ë×»^—¡¯UÐW‚€þ:ôUÙ±  ¯ÕÐך ÐwÑP§å%í}•eèKᡯÚìŠIú¬# ¯~3êØâ@@_ô }½*úÚ+ô-]î*E@_«J‡¾~I†¾~I†¾Z%:€7¿ä½*ÛϘ¿›Ð×-C_UeUb©:ìúªì8†xÐ×^ ¯?^¦¾Q¶]g?éHꫲø¸—­ŽC ¾úMù2µC  ¾Å­|Ú?P_{tP_¿_¦¾ö  ¾Ö@}ý~™ú.’KïC±êë×%ê»èpÅ£²õ-ùe¶¡ºõÕíž,:¤¾öÚA}åﮑ]û[©¯UÔ×: ¨¯®;¡eõ]´ÙµH}ý~‰úÚgêkýÔ×ú ¨¯_—©¯W%S_+õµûúÚý@}Õ/÷3¢¡»£ÝûÚ‹öõßÌØ×^°¯½>`_v ¾öIfê뿘©¯ÞÞ“®ŽÔWá‹ï\=‹êkS(¨o”]O*¶A})§#õµÔWý2~¹ÁR_¿.S_Uñ\ºžÔ×>bP_ºA}U6Ÿ]:諟œ§qN Ðן—e›?/æ«D¡÷8ÌJæ/m¿ú™Oc¾M‚ùj²vfC¾—¾ðžOͯ„ßë°² òU65Ù(\_ _ÓÏù*Ž—V¹i€|oÅ:b ß?/ä«D{Zžõ: ß±z69C¾² \êï™ÆwŽ >ƇzM¾—¢½ýj– à½Wµôª)ýŒ÷Z-À{yy¯rx,1:žìVÞ{=TõÅ{©Í$ïå‹!ïU¦Õkl_÷ŠœÊ¶úzi|£»Æ”Ú»÷j*ïåvL¼w)ß\ HÛ[ã˦¨T?ìFÞkð ¼—«Wò^+ïeDBÞ«å]Œ™Çq¿5¾ÿH|‘øj¸•AhÛJñeø@äËÙ’È—Ó/çm"_¿,#_¿]F¾œe‰|E”&º~Åø*bê¬_» _ÆÂ¾Š+æøŸ¶“àkÏœ/#&_Õd™ût_ýä=¯}“Àמ ó^¯eæ½öt ¾vˆ¯U…ÄwUŠ™~ ܈¯Y„õLojÙð·x :|[ øjl}ü!@|úˆ|­*`¾ H#„ü ùZMÀ|ñ“Æ|¹þÌWÆ^2ÉŸ*MóÕê*Ïïõób¾T·á[@è»*ƒ@?ÞcÐ×î—¡/‘¡/ÞA_» onHc¾v;0_®¬É|Ùd¾¸#ß|;"_6&‘/ŸœÌ—·òå»#òµÛù²1|­&@¾|­D¾v; _^Gäkeùò¹ˆ|Ñ$¾ö[ ¾y¸0àkwËÀ—OåÀe¾¼x¯1]ã½ù½÷²·’÷âvĽ|8â^¶q¯•÷âöòéH{y`¯Ý °——eÖk7ëe“õòfD½æ õ*™sLˆû }¯õ¢^~MD½vP/_6Q¯]—Q¯õb.5ÔKªAÔk-ÔËÎEÔËÖ"êåýˆzñâHzÑzÙz­&½¼8/»‰s^+Kœ×ª’9/ïf˜¿h˜7=7)/ß)/;_§¼ö· ¼¬:)¯ÝÉ(o® /‹2ãe ¯Ý ˆ—}œˆ÷UvçØ570/ˈxù›D¼v/Þ5 ¯]9tÅÝx­ €I¾Ë"ð]3à»V¾kße“8ßÍ¿é|7GµƒïrF$ßeOv¾‹ëÀw­Æwñ›™ïò©Éwívà»VÀke¼¯2ÞÜZ¼”ðòñxù|¼¬ /›“€—a´^”ð’öð¾Êî¼â'x­ÌïÓC•¦½ù®’]ͤ|——UºDLÜQ¿wúé—œw¾¦E©é^hWyFŽ¡[%Úä“W¬b?ŽvM —Ñ®ÒX-Ŭ÷vKN¦}pQwë=ž¬KtëU‘ä*M0·Þ€6å[^rÞµ$ X®.{ƒ]ï¥ÞòPØõjÙ®îƒB¿ÞKŒ"Úõ^^lWÞ-Ö½yëf¿^™TEké×{–‡V²ðëÕ}m<ýzO¥=x^üz5Cf4ûõŠá)uÃvðë=•þmž; „_¯’«Ñ­kÆKúõ*cM,i# ¬7ûõž >¢½®ªì~½§(î÷0å͆½1 :0Û(9 {O­¹ïS?ù1Àk>A4ì=Ëògo¾¨Ù¯7⫤k=^%VЉ“¦\‡_ï)xø7üzí+,qÕú⻊[¦Šç^|Weç| 3eøõÚuðëUź%·ñÝâsXŽUÿiøõ–ê~%ðë•_μfOÞǯ7*¶M÷6̲_¯<ìtdjÛì׫ëŽûê§ðëUC4³â]•¬óu 3†l×kov½§1f8+…Îv½§ì) ¯âÖl×{ èÎsKÆ×ízÿ)óßѶٮ÷LA‹^]·/J5ÿ…]¯¾‚i¼u³]¯Õ>Ûõ*{T±F莼[o´Õ~ןüßUÙ­ô%msn½Š|·¹§í¢[¯µ?Üze¤S‹mdƒ[¯ªyGO¨~‘të-][ ½Ï—¤·¼Sškþìpë=Îg 0ÂkbæÖËv¡_¯œS/#ö^Jø5Gßxöª.÷H2 ÃÞ(:¢Å¶®X†a¯r6¯ÃІ½ºNGÛd@Ç^0ç1ìàØewÍRùyï믳c¯™WѱWç§–µŸŸ¦c¯6Ö¥í¥a/-*³a¯Á°W95«o×ǯÊîºÍÔ%¿É°WÇ®”¶ž¤aï¡U^÷ 2Ã^ÙM”-ÿ HaØ+ß‘å† 0ì=Ÿà×oiMióÛAöÒ”†½‡È9Œ`Ø«)zæú6r`R3ìe“Ѱ÷Ô¨µ G†lØ«¤$ûÞOˆÓ°Wgûâ=/õì" {ÅŒ”œtïü7ö²UhØ×MûÚ ª_/û$ýzOÝôègøõò·`׫ðê¸[:³ëµŸÌv½:|!z;ÍH»^zD¶µ›5$¿^»~½%ÙÜOüÒ¯·Üàꇫé׫ÓÀ:-Ò3Ã^v:ö²çѱWgã]oӗ޽ÑõûªÝ{ùeѱמ/;öÊŠb=Ú!-sìµÛ™coÌ×Rs¸šcoö¦¦_/Jš[¯èK­™[¯Œ9”Ñî%æ}]·^5Ú:dwtëµë²[¯F‡eH>éÖK‹OºõÒv½vüze£xu¼ »^kzÚõÒ·™v½›÷NÌ®wS°Û(¹õÊ÷›n½tS¦[¯•Á­×ÊàÖ ¾ÙÐÜzý’[¯_·^+Ën½ö‚èÖ«ƒ|רJ [ï&)i'ÒìíH¯ÞMÙ¢¿$½ö¶iÕËûd§Þ’^7Fª&ÀÎN½B#ûpz¤So”Åßý€ œzE 6¡¾¦ÿÍN½~»ìÔ«ß”¼äS¯®›¶¥«¬áÔ«%ü6‡Ó©WKx-‚Z2 8õZ]àÔ{ªeÇÁx:õž ~g8õž:¯:÷lStê•ýú6‰Ò©÷ÔnALÂX÷qêõgON½Q¤A²«·áÔ{ §ÒŒó–ÛÅÇÓDntêåíèÔËj§^þÔ÷‘Þßhß  _mæ}_­"XlëÕ_­å5ßD$gS,:͈:ðW¼Ä¥Üø*OÜú¥–W}vºbšX_ö ú äâKË«¯u›çžRÁxoÔK¹¾sÞï¦{bïÕ‰Ÿq8Ôxï¥)dX?’÷JÆˬ©Ë|{Óc¸W çõj‹dý1;ßÏhâ^­¦‡•qï¦jiº=ÒÞøÏçÈ8i¯žô<ÚiZ£½px7ÚËëíÅgDÚ{k¤WÒûý%çÕÐvD¸ô%íUY`Í€Ÿ´7~s>ŸôY÷ZîõªdÜ+ %¹ÿW¸— Ú¯æZ‡³i¯ÆÇe8-ö*`8â—ZBÀÞSç»Î¹Í!ë=5?F¤q´œg™õøë=¥ÞKb­³Þ³ ²£Åpd½tô'ë=åøX €õ*1GÔ|>ïì=•².YgØ{j±»Œ¤Ž„½×³ ÁÞCkµaÅ Ø{‰1ŒÃÂö^šbân>1€½"¸òShV7€½qÝ“hK×AØÿ0:E?áIØ×Å#¯ê‰ƒ½—fíGî ÚëÕÌ´WAÏ4¤°€½òbIÔSžö^Ê™2΢öª–'K'`o\kÈ‘7°÷R4ñH¾{µMs†}öê~÷8ÓOØ«ûmk„½ÖÒ€½6öj¸W:Áæ7Ø×íJ=×¶63ìUÊ›ˆ;úWØ«ø2ñn”Ø«ðwz6Y{ãíÅ÷?"lÀÞ¸n߯š{K”›t¹ö^%ÁíH„Ø«v=J(À^ÝêŒ!o»^®½%ú–4‰ö^Ê®{ŒLQ ½ÑJòšô׫™i¯ß.Ó^¶$i¯®›©hoù®¿6qx]—i¯¾»cS"í½4°>§8A{£ž«,¦Z¶+Ð^{  ½Úw>{VGÒ^5gôµåœ-nÛ^žÔX¯ºÊú¨‡3êÕ+XÆq¢^n›õ^:¿>ü)@zuÙü÷­¨÷|¼<€zõLS„”-ZêeHFÖµ.¬—cX¯µ+`½ÖŒ`½KÈzí:°^{Ù`½œ°ÈzùÚ€zu»mœÁ êU¶.Ù‚U5Q¯½n ^»]&½ê“GRû&ÒˤWïüJ èLz9ïôjúíÇI¯0Ãô8^tÔ—\rå¼_–½ºÓt%;…Œz5sÔÅ{uaȨWÐBgŒ¦/X¯n7=‡¢‘œÍ ÉÙb¶“WiƒŒžœ-â§a×ÁälŠG)“³Y53ëÕuÛ£,ëµñ¬Wù1N˜õ2&#êµa¨WÝÔM–Hz•²nŽvƒôªS*1î|¾4½Œ¢zí“Ë ×:%@¯u€^ŽËà¼ê“ë8¦GÒ«è~ŽÈ7ÖËTæd½|,°^5þýØô€õÚeõ*æZ“‘BB½ªÞZ“}õ2Ä#굨×>Q ^çË#$ëåÔ‹™ WMr<îD½ìƽ6HôZèõÛeЫ/ñ|L:z­Côúu ôZ“dÎk38¯ž.–`#åYæ¼v7p^û¸;èµ°¤× ¤÷Ò¢ ™(dÔk ¬o;“^"½|×à¼6oƒóÚ¼ Îka8¯Z¿w ç¼v?p^›€Áy­õÁyÑ$ ¼Š‚ÎÇ‚ ”7Zü<ÇIR^ÍSg²€È”W+œ=ÙQdÊk_~¦¼qY\5v,@y­/€òªšÛ×v v;R^M§ûïÊËñìûìþTr²mñö¶øÊùƒfŠ­|ïúÓRÖ*­úï?ü×÷°dQ¯DÃËß U‘ïÂ×ßvK=Ý/é‰ï[Þk9ª]tüÇñô„ß½»"ìVþçzÿ{úüÈ´f¯o~üGݦ˜÷ÂÈ;¨èþ¡÷¯ÍÞ*Ðÿã‹ÔßüÓÿQIßb¦]6iõ?ëTì˜ÉD¢Rÿüs3†ù§9þw‹Š~þåóoÿ!ì?~þýóó¿þá_~þµwR2Hi5ôþú;mßq§˜Ô¤¬^Ê”ûkotø~ÀPùŒÏ¿sécò*ÓSjüøûQ3ƒ¢+Ù–•ÍJ«Ê~LméšLÓk“Ë~hWXÒ‰¤ñ^to{/¨Ä}/¥6|/¬ÍWïåï?ñuíEƒÁhüH¬b!¢s§÷þ¦Ï^‡Ÿoåx›%–ÔZLl[,_w¹û]–ßzm2jϤÀ‡¿}ŸÍïówî±j“󌷣1š?¦¤­$aõ{èÿ•;üÇŸþó—_þøçÿýã_ÿú×?þò—?_ÿó—ß4|~ÕѸM;ñTbWÞ†*Õõa.ž(fâž1qÓNüc¡44ÓÒv(7íÄ?R ­bbS lû™ÎÆÑ7‡^Eנ̱ÀYæ£çã:KÙ7žgy#®ó2Õ5Á¤B„ëY%~Û1%¥„¶[un³ª:·’!þ~ôCSÛHke×£'†.u;–¤”8t<ììS›²Õ?zb­×¾®Œ¿—d¹­+w¥‘Þz¦–Mâ±™+ˆë»Ë*{¤»rE]sµI¨: ³¤àzunÛ¤ò9âÙe[u[9ŒÕ9ˆ¢˜iî*ÙMûñ2GXžƒ›ÚŽy›¤!íûY›¶ãdÞô~®ž2~Ó~ü€Ìʘ¢ˆ©æß´? ózjÛ®oJoçZ¥ñ×÷“yHýH%V}çÕä±ñ‘%©Äª.:Žüm瞤%Õõ17|»Eßx¤«¸âØúß$Oé”Y›s?3·qJ'!«ìAž{“f@æåÖÙÑãXêjú¼“Rb‘‘Ý8·Õ}´Öe•&æ"åMQ2«ìG6Töh%Ê~ÍH¾‰qȬ¤ÛÓ8·‰p ʼ\ñîÇá„MÌvPæUÄéžšܦýøA™Wí$rýÍ#‰%V¡—¡ÛPÙ#–Xµ1¥bæ­L–†h“äIß°iG~pæU€Gƒ¦ùΙWq™há«Â‰{Jb‰Uœy$ÏVÙ#–XEІºp»ç$–(Ç‚Ç Ëø¿I,¡Î¢s:Õ·®­Þ´I5é9j&èíÞ’XB¿¶ßǼN½ìK¨ŰØvm·{Ob‰UŸØ8«¶ÝGKDÙ=vn?y>Z‰Uü|(ãe&­D<³F ë¨](ʭĪ“ìCÝ?–Ä«ìÈ£l.‚ˆ¸"‰%Võ#iüÿ$–ˆºœóµ5«Ò]Ûñ5¯Ú3rD•=b‰µˆœºšt×vü€Íe¢™QwmÇÚ]h²¬ìÚŽ´9F½YçëX³OGKhóè •=b mE´†ût?Z‰MÛ c')Bž¤•ؤ…\MeV"â£óñ1Þç9i%´kö˜ÑÄ‹IZ‰¸îzÜw¥,mòØÆžÜ®#íƒ6+.x¬ Töh%´ ÷xÌî%Ïh´Õ5­åV÷ãw»+?™FþéüÔ=¿á‡¸k+¾v¯¹\6=¹0ö’ˆ:ž,&˜ÚoÆ^Ü-û0«§~¬{Töh%ìŠMefí7•bçh–1£¨LÙ¤âåDÝße "âñV…á[ƤëUÙ­Ygêh’»f`ßeu«~rïµ ­u/¬VeOR /;’TbÓ»Ìïª$¥DÔ2Z¨Ÿ%Úë¼ñôØ!¯¦c^¦1þ¾´žVÙ¤DDõÄãóTŒÅñ“{k•¨o;¿·Çð#J„½¯®·Jº*?Óû¬O„Æí0·ÊµDÜnbÈ½Ž«½Ëª^gŸ#+¡ò/X3úKÄ¡Æ1ײ=É%ÔT±¶C£*‹ï©wØCÝ©Ÿ¶Ùµ/ñPé°1z©uWwéøû&÷Ê»wÖxg±žÓÆz+»c(iõTj²¹%iŒpö'yp·Þº+b_–µ¾ëmOZ ½3mQÜíޤ•лÊΤ•P—¬[·G+{´*‹iHÖî¥Lzã­u×}R*Ìn‘ýSýJw½•h2–HekqßËóÚ]w o‡.v[‹¯¥v×}‘~zn¾»6äG»k§7¯Š%Töˆ%vébüjÍA{KìÚl»Zb¯}ß’Xb—Tw3cüKœ‹»âS¸¯²¶ÇâJÛµ¿rm¢²tj•eÄ«Škª·‹Øj﬎~ÇZ!>ƒú“åki6Ê®©‡Ñzù\Ü-ðxèh«Êbu•ÎÅÁ‹Ae8—¼â%ñ\\:O·Ç¢i;ûøÊC^1•Çíûøª%ZÄ-r¿.eÊÌÕÇ×ø‘X{w¥‘Ê®»°»Ìœ¯­ Ú”al4ûy/= Ù®]ùÆîe;±Eþ»6åGEñít?½œOì¤Ï§.:UvÏi€³Þµ)¿Œ–³Ö>ÀÚ4u~}CgÐõVj·X¢ÖÌÎ{¬®¹„ÊbpmŽ2{9†3XŽõ•À—¿•¨¯žÜÝ%mQ¬*?OýXè.ùòžFØü`Ò(c„½”£< ×ÕÊ"i#¬´72±n•Pß#ì%Õå ¨ú“rC¬Äz1ÕV2±ÇêêLØ}MI0¡Eñµô#‹*{j¬mëŒwíìßi”ÿèÖ½è.¶'$õN“xïÚ—ŸÆ(»+Ò‘§‚ïÚ—Ÿó(›^€öå—1Êò…kgþ c%%›úqÍø’h‚‹þX¢$ÑĦ …£ÄËH¢‰­h¦»ù@¬RµQV¹â¯ž&QeuàVŸÝuÐæè‡›vFØû(^îîܱß%ÍVe%?=cî®d¬®Õ„Mü÷šT*ÓÙƒµö²{­ª ›cyõ¨&vMˆSwì‰GLª ü±Xhzóˆ~Zž8VñðõŠÙ\i[§=¤]ëg[Ut?q¬:þ8³kcþ‰cÙQ´1ÿıè`‡6æs»éôDÍoyhcþ‰c—w­”ŠáûÞ1•Ä6Odp®CÃfŒ5ÛÓgS´ý\)åzŸEß;¦r~í‰ ðég^p$éĦ“"ƒ—Ç I't¶©£ •]I:(â‚n)sLrιŸÈ u>•Ý#Õ[hT#F´X½L#2Xu0³®ÓY§JÆ8cuÌ´Íí‡îP"Ó'½¥5‰Ékêºëâßw@®'¹ø;çâ öFÛ*çxÄ»ï=­ã NönêÃ'°W‡ë¢][Ø›l׌õ -GYŸ3ëݺLjÒr׃õ*Ãòq,UeÖ»ÅBf’pêì8w ^ÅÖ-q€zűGûêe¶A^3Æéµ† ½–¤WØüˆq°Í“ ½%—wÖÍJ¤WÇcÈêñ,H¯î§Ý›dÒkùDz•!:†JYŒUê9å$1ZGÈݬè€z•Í9ÂΫéÒz71•éꥣ^Õ~Kü+£Þ’¨:B©v¨w›th{8!õn1NDlЃ] ^ýf|ÑgËÑ‘Q¯~ò˜­A‡w8oiÆiØ$€óêº1%çUÙùl%ƒóª–rMkâApÞò¥žÃÙœWee›§Ñ—Ìyõ›«Ì«:œ×¾~p^û®Ày_Ï—8¯®;Ÿ-p^ûüzË{x¶e2èÕeÛ>5L½å'u^yøIr¡×óH'0ôz53èõGo W?w]³`ô ô–ïí¾.é} ôêç–mx»ô–Þ0Uüò!èÕOÏž@¯Ê"¶h&Uà¼úÅõ.Gà¼åó>‡¯8¯·æ¼Þ÷2ç-ÃÂùð‹ÌyKÙ6œÓÀy˾Óp^=ƒÜcÚÁJ€^•ÅJµûôê7—}x‰ô–ë¶{¬ð3é-÷»î±ÂϤ׆K^]àX§dÒ«ëÖsøÕdÒ«O2:ZOÛÒ«ËvIMj 9ÞÒdQ§&éÕoŽgí¤×Û#“ÞR¿yî‹ ÞMƒË¾vñ3HoùÍk¸ØdÒûº]"½úIÙdõM—ŒzU÷S¡j'ß öª¬²uˆÊ´·T3Bß¾’ioym=’1Ú[~3Ö ít+hoy7ñÍv´·Ôó–3 ½eèŽåIʃöªžQÍ~À´·Lv1ßö­—L{_e‰öê7çX·mWÐ^]§·ÐòTö–g8†choyGËÔÏ:gÚks2ho)›Ï~d´×{R¦½^•F{Ë4.ªyUú’q¯9÷–½Å«©&ÆÀ½þU%Ü[ƶyxl÷–1cnyÀ½>fdÞë·Ë¼×çêÌ{½.™÷ªL»µí°xïë7ï-ñ£3Õ¼×?„Ì{ËoîÃÏ ¼÷u¿Ä{-.ð-ír w,ß׳'àëcp¾ÞÖ ø¾Šðõjfà«2-Ó;§ÌÀ×Çà&{ú‰jõíæ |} ÍÀ·Œ˜rÉnT0߸m{Ï(—o‰f¯{ì»fà[úÉÑ]®À{_¿˜x¯à™÷ú8”yokb mdzÀ{ýϼ·Œ—Óà½4ΘTúF{æ½å7clh¬Á{¿({ò^Äo^ñêûŽzæ½"gÞëóHæ½>çfÞ[Ê®}ìDfÞë÷K¼·T3ºV;ñÞûº,ñ^.À€{}Ìl¸×¿©Œ{}¬É¸·¼ð{k×@{½ìõ~ža¯Ïªö¾®K°×Þ `¯õÀ^ `@{˃OÃõ´×¾Ð^›{@{mÄíµgí}ýf¢½6‚ö–A}^v ½–T´÷‹²;³i¡˜i¯Åê ½ÆC¨2× ;KÐ^‹Õ‹NËšïÍkümvh†}'9vÕ$¬_h|/u…Jì+ÍÝí]ågÀ¾LÞ ì;«]‡A¹ïTØØÜòû’û2.!÷þ,ùL3ž"ù•-Lôô¦ÐùÔ{?ûJò«%Ðý@m_~iD¿\:ýrì$ú´}} šœÉo<]|¨ë´Tó=_Ž«$¿ºîLZäL~ùуüÆeiôD¸F~' ö®&ô#ù$òºɯžN}¸é:I~¥…_{2‘A~• ¥¬ö/Èï4Î"¿Ào<ëÙÍH üÎ2XßåÜòy߈ü§¹»øüÚƒüF̰ŽM h|EÎâS˜¿>¸ï¤¤gSøE¿Z ÞÝyÆÀ¯LæŽîE¯ß½y¾ß_/Ë_ù-ÅÔ×I?Áo,£Îø›–à—ý„à—/ߘ@c•Ýí©ñµYã+«Ä#Èš ß(Ûåd?Ooï]œPzFnj|£,.»ú¶Ù¯¤E1&[ÖøÆ·†é¦r‡ÆWþZûÒÝܨñ½5,C²oüǺÜòÔøÞpE‡¿V‡¿*Š ¶'Å¡ÈWv^1R¶ÅE¾:V8?Ç ò廡È÷–~ëÑ@äË÷F‘¯ÊbЧþüµºPäË—C‘o”ÅëŽE{Î cP:⢻óÝÄ~¥çÙš§¡ßEê¨îÛaè7†²èÍ.ÆÐï"±îÚŒf‰~1ÈúÅlfèW{ÐkŸX ýÆLJE¾|~%fX–¦‚Ðï,.Û­ ýÎJóˆn~\Ž£,†~1ýÄýFCFpËÚúd`¿Md¿|d¿¬ 4¾Ú4€¬uA ¯¾ä£2¢±_•]JMRóÑûÅÐmìW6UÇÞ„Æ~1û”Ò$^øôù–²õì¹¶Œýjß÷޵~«'د•ýæ¹ÕÐožìH~1/ùÅÌjäWJ9çµQH~ÙÕ«ÆWë²xg_j|oI´JÈþ1ðû.z¾·ÄTíM¹œ¾¶˜¸×·ÀWWIïQSfPák4¾~]ùÊ/qê(ò½µ$›™õ-ò½uê<¦è¦‰‚ÈW3£r¼, g‘oôä5–‰1lö²Gä{küšZ. Š|u]4 ßf‘¯fTåe¨z*h|5.kDZ¦3¢ÆWžñÖ»8_•Å»êÚTh|5œžÇ‹ú–¸æŒÏf¯#zÖø2& ÆWnËÖ=‹©ñUK÷øÎ¨¯¿…®ñµá"_á‚yêGß(ò½eª³.]2 ‘¯u¢,òUš¢vmpùªLÜu9^ÔWO½(§Oû¶¡òU™$+K« òµ~•/£+£¾Å ©LrŸõE€hÔ—#©¯xS´ÄùVù–EƼvŸ)£¾Vê;+UŒÆµ©I}1úr^%ôåe`¾2ÐŒV¹*ÌóŠʘ¯úèÙíÈŒùbhÌ—3|ùô5í}Ÿ"ß[R„»™¥È—1'E¾r5RŠ‚òUÉVwYÞ_ÚóÒ}Ž©ñÕ÷¡1ÖKå …³©|óÉWùâ:SùB©l*ß'¦s‘oÒ]䛿Vùæi‰À—gZºÈÁ­‰|Ù›)òå¸@‘¯N–^=eE¾)òåpB•/§ª|9ªSå˪På+¯ªiØQåËoŸ*_»ˆ/‘QåËcQTùÚý òå!ª|Ù.TùÚuPùZ=¡òÕ”9£ª|U1ÍpN|}x£È—ON‘/;U¾|CCå+;N:K…Ê×.Ê— B™¯"±Øö­sÝGç»k•?0¨óe[QçkO /m¦óEKšÎu1/ašÎ‡>Mç‹Át¾8Òj:_œi1/Ž­šÎWÿUkR^È|µ»¶v–ý%óÅã™Ì7½u“ù²Ìd¾iùø’ù¦·g2_tÀ—Ì7õˆ.óMÿüûLæ$óý_åÛ|ßEFC? â«uoÌ0 ¾ƒøÚnw&¾r¸™ÏýeìP‹œ5’ _A½è‘MœOßC™¶öfG_%µÆßCk¹GZ ß]Û –fàën¼ÙÅw/k¹µ­jÍÅWÁÑÜ=+ÍÅW°;f»ºïk.¾ÙKêeâ› BÌÄWÊÍkX¸‰orÏ£‰/,ÌÅ7{Й‰/<èˆ{eÞxjc¾ ÷ÒóqàÞM3Àñˆq3îÕE¾âQgD!Õ¶Ê||eYÿjn¨5ûø®Ú«“Ûxòf_Ñ‚zȾ«’8žCa_ÿ¹ìáËÁ‹¾ö›Ð÷j@Œ€ôºßò^Ý6§RÞ+zwE7Ÿæ—µƒÊ®%†’jÒB _Ž¿ôðqûx2ÐÃWãý±õVôð2°ÝV&¾š –Qö­Û><&¾œ{ÌÄwR4?”–4ñUFâ£m_ÂÃWw“,ªûíÂÃ÷yáôïU–Ó}ÈKáßk-Ùü{•À{ïÀæß›n’…½jöujJ³îUúºˆë–·s/{6„½öþ!ìUw;®!„„°Wï*ÂÂsmÞYØËy›ÂÞµÐÍ­›BØ«ÔÇ1w/{í@Ø«ë&¹c^ÝÙ÷öÆuË5÷M{ {£,Ë¡¼‚°W}8>ÂknþYÙë¿™•½Q>´ªPöªì¸»¾Â^­Y"„ì:V{íea¯½{YQØk3 „½åUÎçð nÂ^u¯˜QººÂ^u°¸ëga¯FðùìŠ {5‘F”kùCÎëó „½š+£"_º÷Ú—aïZ²j©3„½jÉyïª, {£,šhœ‚}¯_—í{­_¾WZ,úI Ø÷Z‚}¯:ʱ Ñ<ì{ýºlß»JÊ}ö°ïÕxÐŽ¡~ŒóªL3G—¾Wé¸åŸØœ†`ß«ÛE°Ù ®³}oÍšÏÊ‚}¯Ýö½ºÝ ßåêN}o”Å¿éâÅ&ìU6•>uçõ ²®W÷‰Õ^3£®W—ÝÇPÌfao-1Lt“*{uY¬…»8ëzË`\²8Wœu½1†GS_ƒÖf]¯N*Ë$¼É ëµ~]¯ÊÖ{Ö€®WoF©¦ßW·;"~P9Ëzí…BÖ«GxN?S׫)ꌶlüº^%[*Ç?šü8ëzuzȺ×Ç˺^ÕEã»­oÖõz“e]¯ÊgHèzõb#hïÎÄÐõ²Å ëU‹Å±ûwAÖkO×e½Ö õ’8PÖ«[Eß74 ë]¥7¿ûJèz­{ѹ÷Òœ¶ôá‰Î½úH—3͸W9¤cú®½ËŒ{µôÎ_gT3îÕ#"±{}1ÞRëÒ–$Ëœ{í7áÜ«`9Z³F…æÜ+v-È6ç^8m›s¯¹ó¹7¦iÕªÁ9u½rç•@kkŽ¿Y×+ªµ•yïååÐÒÜ/PÙ«ë$¯®Ñœ{ZD«´°Àœ{%îŒñ¾ª͹—ÍIç^±ËrZνÏ_}äý^«pGÙÙ|Y9èß=YĨíç‹G8ªW$µ½ô ͤWǨ"ZïÌ ¤÷Ò¸z GT^ ÜËy ›Þ…ŽGJ Ô» °='Õz=IPo<«VÆkûÍŒz•P'ªõœI/-²Iz ôÆ*¤­’ÞXÔÜË}uÔ«17zoÛ›"êuÚœÜ`zW5罌fI¨W—=.’D½‹ô¬·,mÇ¡Z²^åÓ[¸¦—‹ïZ<‘·eXò6Ö»(K{2eȬW«ïMY 3Àz£_ÞI Ö·šcŽÎZ 2ëõ²Äzg-¢÷> ‘õÎÞì_¹øÆ‚ù§ë9XGÖ¿o¦O¤d½Qv=gŠÈzÅ@îuä±ë•MÊ^â‹/2¶mŠ,Ïx„—¯ÊæÇ¬7žï~Ž6’õúofÖ+(‘”±`½qÝ”ÜBÀz•åNïµ¥öëõ¬s‰õÆí¶GÝ@Ö+^1m}¢²Œm–+-³Þè´±Îí£¼elf¬WÎ G¬ëE¦ËØsg¬º#/P¯¦¤s8‚õj”¸õqF½÷O×[Ø« Œð¢¼³Nÿ R^¤w$å-‹âXoÔ‰À’µéÜ=Îs‘òîz}C¥Ê˹ž”w/k>åuÿÐî×çEyu†&† ³®;@y•À'âøâgä”×Òê€ò®²é¯5ý¼(o<¬rômë‹òîòЉç^{F¶‡òî2¡¡ ‘òZÔNÊ«Qâîþ"FyE?¦½C¨Ay5´ÇšµÓZP^¥A‹‡ìšß‡ñŠhMeAþÒõ–mBes©ú2^°X4p@λÊe༗d¸ëpö\m9Crµ1{9oôµ|û¦…ƒÞ 옫 )º˜ª ‘¤qÞMÝðî>ªä¼§,äÆéXæj‹©0>fâd¹ÚÎCH΋t’ÆyùE’ó.Z½ì-µ¥qÞS îî² ÎßÿÑÐ~½|a8ëÍ;$ÌÕÆ]—Êz5Jn1½•½ºO|uSÏn󽉲“÷β3æø*—d„@]¾Nà» L(1V-ð%…d¶6‚tßUãfÌE-e€/‘‰ßª—æÁoÀ÷–!Y? iÀ—96 |ã»íÓç øro…ÀWg™î~Í€ï*;ÔøÆß4¡§ß ëH|±_Cà»*½sÏAà«”3m7KÓ_ |…EcÌí™Ë|#Z•}ì^ñT¾Ê:G?UàËÊ“÷J {#à½ÖÕ3ïecò^=ØT(=ƒ÷^Ò¶>&à½ñ%FÓ óðÞC>ó©‚÷òË'ï%_&ïEÊXã½»Œ¦ž}ðÅ{“#ïåŒEÞË©€¼Wý>BÊ–‚À¯â…‘w Àט5€/Çf_î— _÷i™¯ [p佨È{±“Òyo4ë=\NÈ{ùÚ|£ûZm¶2¾¹©|9`ø²I|stBÞ LLÜ¿°,bpo‡’X(Æ´–è¸klýZB^W?AÞË­1ò^nÓ‘÷F£ßç1ìr÷®çÝýÛŒ÷²ïå¶0y¯^ýH¿c¼Wä.–N÷ü&ÞË-ð^Ž6ä½LÒìÀ7(¾Ü»À—Ûw¾6ÇøjÀ)`jPÀ—»2¾J»«÷[÷-|ýÉ2ðu.‰/÷óI|ùÑ‘øj<Œ¦9Þæ½Æé |¹ÝfÀW Ú—îFà«™'B›¡¤ÍÀ—-fÀº¾úøª]k?cnÀw~:°_IÂbYÒŒæ øB£àK]‹_­æ~Ò߀o ª#§—ñ^軎¶Ïþˆ¾ÏÈáO%YÛ!Eñ&?†+Ϻ¥¡?-Õ>9JëŸþûÿõ=PYôëáðò7„Ã_Uä»8ö·ÝRO÷Kzâßù–§Ž™ôÿq<ýà÷¯Bí^öçzÿ˜ˆÎQþ?´µÙë›ÿ±(‡lDéeýCÞA«@ûó½möVþ_Ô þæŸþšlÂGñCW ÌŸuÒ*èŠ PþQ§þ¹9ÆüÓÿ»E½??ÿòù·ˆ å?ÿþùù_ÿð/?ÿÚE0&½°ÈÕòëï´}ûêÈıB§¿öN‡ßé •Ïøü;÷˜>&ÿÀ¡2?å‘óþ ™a- m‡Ù*²S›åüié¼6¹ì‡v…-KúïE÷¶÷‚JüØ÷RjÃ÷ÂÚ|õ^~ÅH·,—ï1´^kNJ¿ªŸXîý=þLŸ½?ß> â6Ë!Å¿Ž”OÛû.w¿Ëò[o³·6·î-¦ÿã>›ßçïÜC‰Ž%º‰ø£°ó¦=4¿‡þ_¹Ãüé?ùåþß?þõ¯ýã/ùó_þgûMÃ÷çW;´%ÿh&”ÿ³¼ s0K,[ËjG;òÃHBD`ö²‡v仑D¬åtÖ°IߎCâš~ÂY2Œõ¾zÖɆ‘Ä*WÏÔç”ÓÏ@Ï}quÎ9í±Ô«G§öǹäÄÇÂÆc{*þQÎ|,QÁÔÓm¨ìFÚ0Õ&õ ‹?Å«í'œã‹ª9Aæz¿]ÚÁvZTê ¶5¥ý„³6/’éºÒ‰>嬾ŸÍ²#ã}Iøu×Ok7’ЮS’y]E½Õ:­Ò`'™W”ÝÃHÂZäžRêc•]CtÜs2D³jFàû¤À°jÞk2DóëŠçDï´ÚóÙZâàÞ”“ýé³kR•Ý»‡½ÏJ;ÎQrþ>þxgòCóë®dˆæ×]ÉÍšZ»óÃH‚oýÔîü0’PÙ“þÔîü0’ð²E²Œ§ÏÊ,¶6ËYRÆìOŸ]Ÿ­Î³dK›ž>‹ŸÜ’!šúó}oMƒß×ÐúÛøL[ŽòS'À‡“_€ÊC4¿®¿}ºlúrNÉѺ“„?ØýÓ2œ$4Ö<çfã#ýiNï²ÇM‰âÏuª;9Ñ“#š†6¿U wÎKrDó²59¢i¸”9Håð§¶æ§4ÌN2´©š*»§<Ìæëve+ê]V@ýžê>Á©½ùá#±ÇÍ­ìL†hþäWrD㈨²ÇM×ÝóÝØþ9ßÉÍ®[´@L£ìõ¢Çÿ´ni”Í¿¹ÄT»?£l~òEû:i”ÕiÑe¨lMŽh|y>†hV´W?4ý­¶æ Á‹Š$;4{3v†‹„½mµÀ•†Ø49žñO‡!šÕáN~h¬ˆ&ô) °¹Ìi€Í ¥=ùá!a Íª9 °ù½DPòø¡±–ë–ìÐü'÷d‡fO°'74k.ɳ‡ƒ„—É Í^µŽ ‰¸nÑ‘ò»Œ¯§ö`†…„~SÕ-TeŸ®àì1¼²*ÛœìÐ:EÛ$T$㟥^µ&74ÿÅ5¹¡yÙ–Üм¬BÿË#¹¡ÙkCþ±x°ëÎ䆿×]š5°6äGk—íSrC³ÛísrC[J¼ÑÍ€Uö¸¡é~JjP‘ý¹/É ÍËÖ䆿e[rCSY„›Mo2¹¡©.ñ&Zü®²;±ùÙ÷#¹¡ùëz4åÀ¹ŸÉ ÍŸïJnh~¿;¹¡y=ïä†fÏwLÉ M‹ºu_ÛÁ¯ó˜“šÊAØ©ùÄÚekrCó²-¹¡i,[ö»’SÙㆦ²â”R6cH®nh~É‘ÜÐü’3¹¡Iþ°Žçq%7´²±8ìýU6ÜÐô“×±6Wå ’šäQ)´sÐñB“šÊHFÌfÉ ÍŠ–ä…¦¢-"¤zPeš]¶&+4¿lKVhLÖ³ËßÏXZ=Vh~Ý‘¬ÐÞešUåLNh»´-?¥67¥6æ§4Àæ¦ÔÎüœØüÚ™ŸÓ‹²99¡-:ÐzMMB!õc…¦¢ëZÚÁà3VŸ ÖUšµÉµ%'4/«; þ\GrBóºŸÉ ÍË®ä„æ¿y=Nh~ٜЬ,–Uš—ÍÉ Mm¥,mõœè˪9°¹‰ï%9¡‘œÊ1q'N°,ñýÕ,€§¶ås‹ºìÉ ÍËŽä„fïTÛò9‚µ²Ç YFÿkaq»;¡±Y®’¨+°in¿”`K#¬•=Fh*»£.õ€û¥äfûþELpÉÐöHlªå%_Ä¿ân{2Bóëöj„æ•8’š—ÉÍou%#4/»#4ÿÉ;¡ÙeÚŸÒ›Û_òsaS·¼´!ŸƒØ|?íÈÏiˆÅo®É]èš·d„æ÷Û“_Ü|$4+:“ G•=6hvÙ•\ÐVß×–ÚíŠUÕcƒVõÄäX-bžM6hÖÐ:í’ƒX+»ĦÆ\–ÇÍ‹Ödƒæey¿ÁËödƒfí¼þWßwB®§½ø;ã òj‰vÕ¨íy‹|u DPÞeßýÜ'(oDåóúxôòiÊ0A'åÕ¹†ud…$å•x½®¦˜4Ê«,{ÉïøqÊ«¥Ñùä^å•|¨:'-§¼’ Í#)ïªSÃÌŒ”×(6(¯SìLyW]ãl†QÞ[9¢¢·Îµž ¼Ë‹kB­'(o92\úHy%Š«ùŒò–³wãÕ‚òÚMïvFy%ï[»Ý¦Q^¯aºf”W‹aØôPÞ)FrRÞxóÙwycŒJ” œä#â(ë³7ä]¥ï_› –WÃÏãÆ1õ²y£â l™Z ò Ÿ Ç%ƒ¼1Љ>U.È[†»«§Ë5È[%ÅTPëÈ[NÛ= ;R^IÃã)ÛN(¯€ÖcÇDÊ;+úé²[B^Ø5!¯?B†¼1wD;O 7®‹¦ìI yµÑø¸›òFÙùy5=C„¼ñë£;$äKÑjòêvñ´D-òƃMC!¯&Ìǯ‚Wüò1„ äUcSô«þf‚¼Â¥Jdв×gȜכ,sÞYŸ­^× œWõ\¦î9LÎ;‹ðŒÓjà¼ö5‚óúí2çU“Å’užÞ˜W—õ(-a^k`^¯`ƼŒóˆyuø9Ï Ìkݘ׾6`^uËçd9oÁQáš…‰ ×f?€^Azmúé²éˆ½&#éÕˆPËý@Òk}¤×f ^öÌzùÙõÚe@½VM ^ë@½šxbbn)*€zmŒêµ±¬—£=P¯Í; õZêE(Ðk/ ×[*^«]æ¼€óZÁy­K‚ój°_–’ÎòãœWÑçsfœœW”ìºz¢_r^µâcjCÎk±8¯ê9ÅDÐ!c^kL`^¶0¯õ,`^¾kb^Eë…‹-µ,c^¿.c^NÀ¼|åä¼ ¡Èyýv™óò¥Ϋv\÷½%˜!çU5Ö¹Lèç¼jÈaGBÌ«†Œ¦‰y¥2ñ„y58=‡¸‰y­‚À¼ºnþļœˆyÙc‰y­šÀ¼ÖÀ¼(‰yÙõÈyu]¬[B‚^NĽ~]&½³,m®îôCÒ«ÅècÑGÒ˰¤—‹$’^ë|™ôZ³€ôZéU˜øx(ôrÔ.¤—·ùÞ,Çßæ…fÈw”< k|!ߘùcÈê>röÊjþ˜;àƒ°wR(Ô|ùÊNëT Ü'¾Zew“)'¾³ÎüÏ=G0‰¯V>1N·V$¾â ÓÒ7?H|£,¢2¥5Y?N|§bòè3ñµÔ+ ¾ñ™w‰¯¸KY%o•gâ«ÓªÅþêzëz…‡¢Í}ÎÄ—–@|•/÷žÖ‡TfâËd$¾Å²è,çë?F|™”‘Ä·ð²ø´êÀE⫤™ÒtœëK×köxƒøª‰¯hߤ%GÍí“‘¯§sòe6S _&ËòÕOꩺ=%/iùzVóŒ|•f]ÏîñAäu‰5îÚŒ<ˆ|c®Ö ¾×Nƒñ-O~Æš|ÿ‚øÆegŒ¬ÍéŠÄ—9—A|õ›{ÍØV‰öÉ<ÇósæutínJt¬“ÙÇ8œ‘¯=>üîåMä;C±ÒòdqöƒÚ@¾%Év,gšeo‰D†Õ‘¯2߯EËù’õêÉc8XÚæ‰ï¤Õci°ZV‰¯náEË'â«Nö éL|UÁ¥¥šwâ;iú¿ûvH&¾úÉx®ÎÄ×*âËWâ[zlÜOžj'¾r+‹!(Fö·®—)}A|õj®h?Î|•ß¶0ʺèóÕÛˆµY hÀ|-a9™/ÓM“ù*Aï_r]²gä[Ò­kï¬K òEFo_&:ñÕ“G Ó/ˆ|™éÈW/h³.oéçË>Ð:¯úßvvï,"_vôF|m"ñµnâ›óá‚÷–_ÔÂ~9^¼WiØx›×\‹næ9N¹ÏÁ{ËOÞg÷0!ïÕcÝ›¼Äîó^èdž[ýt2ïõ¼WcPÛ$ðeGȼ·üäòÈíÀ{9 “÷râïÕož‰¶-ð^-ÌbåÐ<É{™ñÀ×ë â«=¦ˆ–lŠÄWeñ1µŒ8™øê'ýš_k/_ÍÂñ{ÍS’ÀWÓî±ô}lß2]=KïT¾éž´ÀWco„P=A ¯ÞÏQý@?|U¦ºî›øê7×X›÷óøÆeûõ¼i¾úÉh½šc¼W¿MÞ2øÚ˜Ýï¤vê§°|­±|•êwªúÇ/S øÚXàË¡ ¼×j’y¯fÚk[Ç|æ½kqô‰ùí®5ɼ·”Å궯ɼWek,Jšм7êQS÷ôï- šcëŸÀ{KDðx ƒ÷ªì”mîÚÀmâ½£ƒ÷z4àkmà«®^ÖVõÉÀ·Ô%BŒ.ÍÀ×l¾Ö,¾*‹Éª÷Y_Us‹8nmì9_võ|§ô |íSòÕ¢4§o"_}oËÕÝùÚ(ækÌ—ë2_,‘ˆ|m`òÕLµ4âÈW‡´7g×xÈW×EA3½%òµVòµÌ×F(0_["úZsú²ž„¾lNB_ÞÐWSœÓñb¾œéÈ|‹_ò²Odg¾~]f¾j•=»séºß«uÙç}ùê¹â-7¡‘/º?‘¯W"!_ö"_†D¾¼ˆ/?_{d_{¡ ¾l_¯e¾vYæ½*š”áéìâÝ9ǰ¨Iæ½útbОÿÇy/—ä½ZÍ(ÉYµY"ïe7'ï%½"ïå'Þkï¼×zx¯µ&x¯V›Ç¶5OÓÊ{£5¤d®þßË{§Á{çø÷ß{cMxµö{K>žaëo°WÆÞ±ú¿ÏZ–`o4¢>©áÓPï¬p³­9 õªCÆ×p5¼ Ô›÷´ŒôN2Ÿ»UHï¬N¯¢.÷Œôž1É´FzOùÜ2$½L¤ ÒËÜܽ1 Ý3¿^­ïæq¨ 7§Œ7΋••½ÎkÁy ¼‚óÎ1îlÈœ(Ë8/#Æy±Ú3Î{Ëg±Ûú?œw’güÐk“ó"²¡´Weêg-7N漊Ôcæì™ 3ç-ñlüH—O“óÞêå=¨q^mi- •½*ÚGjm*{9~æEù¼¹‘¡ì%ç1ÎËàœ7~Töcçeÿ"çUÌ¿ô ”ör7Î[ò—l]AΛ鄽ŠâËiû­¤¼¤…¤¼JˆrÍÍ–Â^åKXÛˆøÂ¼-×­‹:æåÚ Â^…+1â6¯Zü›¬Ž¶Me˜w“õÕ¶ÿ³®×¯åÝåK¾5Ïr£¼RWF<]s…Q׫²&»`“”w{BÍå•S|M± ]¯¬Qca܆åµë@yí²²Wö§×ù¸>€òŠ;,Ä”—ï Ê^½¡i91Ì{(™rÛ›4Ì»ËeonÉê(ìU÷šïú緲טW©LÆÁ*{e&{ƪ´&3Ìe1cuÝ21/{zWöƃ-2˺Q絟çÝ㣚֖êÊ^¿.“^…ÊØq¾¢LP¦m(QÙkLÒ‹N#½‡LŸ®Î¡ìÕ;½ÊŠPAzñ¾ zóTLa¯³&K {íÁy•„!½&‡®×ËÀy¥ù?ΪÁy%W=ZÎI {í7Éy­ œW2Ž»)ÏÊ^…<͵}Pã¼lgrÞ(‹Î|4qöúÝ*æ=”&ãŽþT«Ì«Äñ§­nôAÙ«73GàÜŽ\óJ*´OÇ›òJÜ×ýçeMÈyñTļl(èz¹›oœW|póò†°×œ7Ç©†yAÀ–àÄ(ïñ$ÄzQÞó>^”·$eé§Å(ëµa”—=™”—OY¯Õ”—]‹”—d½ñ“ÚégFˆy!ýx0¯Õ˜W\'¢®ª\1]¯ÎÆ4Ü0o”mOî&bÞC!÷Ù·…ˆyÙ…ÈyÙ"ÐõzYÖõņ®×Z ºÞ(»{šÀç¹d_‡£t½*{¢Q×1Á´ï=¯u½Q6Ç{î¶MÐõêºeïæxÔõ*[—nBfœë5ã¼J)p { {U-ØLü(ìµvçU¼m†‘óÚO‚óòñÈyív ô^½Ï–DÇ@//‚Þ(“^§¹ ôÆlÇð¸è=*z&€ èe;ôF,<ŸÝ/‰œW HbÍr¼¥½Öä¼—VøÃ)†œWÝâê¶ŒÆyùpä¼ñ1½vó)r^}¹Êê1w–›8oÔåÉIiœ×~œ×êΛ[Ì0/ng˜÷U–0¯\Q¯æEÌ7OÝÇ0o™d»Ía^<€a^…’w·z0o,}bêçäÉy/%$èî!ÆyE?/Ä‹7MÄ«n;wKC¼·rQö“Ò†x¿æÇÞŒ—$ãˆ#^kÛ‚2Æ«D/[3°3Ä i£1Þ[ðni™=ñF”xËÛ«¡h@Þø,·cm'u òÆ4«ÙFN òj *]¼–AÔ;?_¢Þøžç—öFO!êåìVœ+ºp7A^¡™r¼à$êÕa꫾‹z'eÃÞšÒÍ /¢^»¢^mÕ,kßYª4ž·›R|Ÿ}ÜD½ßdÓ+à'6¿å¼ZüÌg—s“ð.%_Í@ ¼V”ï\™ž<Œ—á8!¯ër3åÅfr^¥&Œ¯v«ÒAB^¬@ ò‘MW°0f¿½ 7yí~ ¼ÐÃåŦ‡Q^å÷ÚG†b^h_rÞ´kn˜›3/Ì›vÛ2æÕ£oO¦[`^ÛøÿÌ Ñ[Ǽ_Ðß'·1÷ÙMΫ¬m[O—e˜Wƒñ=¬ ó&i‚aÞó·_Èy“Ó8/4&ç…ÜáÅy3Å&çÞ3λj’¨Ÿò‹ójè:çvnÀ8/µÝä¼ü&Éy±v3Îû*KœW¹ÊIÎÊœÁy1_š…ƒØø2÷ÍxZ8¨C¯SW<Á¡D-Ç|ô’äà°(ˆíÛTæà€7spˆëbu¥^ûy@/ÏwÒÀ§ ià`•€eÁÀÇ’ià€C%fàeg‘ü̽ªJ¬ÎÚ©3pàP8ðÈ Ð*tp°ËààÀ£ÎtpÀ {8Ð[€;J3…0sw€ƒC:xéépŸ8Ø£›®ƒ»¬;8¤c/ ‡üháÀ#®°pà©YZ8ð ¡…[Ú-òiÛæá`~ðpÈÇ‹éàÀNâéË€Û× r=hà@³ø7ä*Ò½OK÷ž´¦}[ÐírEàÞ ¼°¼l£,¹7ðeÒ½? ó» æ 8ekæ LhÞÀnLó†øÍ#n¿VLBóTÞ v¼bŠ’›w{1Íp{x7ð?½Øü`¼òlŠ_n´n8Ìöse´nÀ™F³n Ó ­ÌÖ ì[nÝKX7hæJ_Ö ¹ýݺe¿Úº!U¿Þ¸Á~Æ V87°¥éÜ ì©1u·ýO:7èû‹_¸s²sb7ð«¢qŸÀòûéÎ pg0߆Cæ°Û2^m=Év͹ßœ¬È¬òOÒº/Ô­r5iÝý˜Y7ÐxÖ mÑÀm›É­`ùë¾P·nÈÞdônˆßŒ{Çjòþ8ß5{š7˜ÅÌÐñ̼¡„Üò7økæ éù̼aÓ»<šzƒæ Q•ýZšó¢™7X̹›yï1oP&ò‘ÍÙÌb¼}ÒÊ›yC Ñ«:N}0˜7Àäæ »@È߯üy™7àéÝs;ónPžï¥Ÿ§3wƒ’aD::NÖ |Z7ì"WS‰˜uƒ¢Õ}nûäfÝ@+Z7Àoά¤•’ qþºv“FyõIŒ,äFya¬hÖ ûý¿(o|ÉOjlRÞø:§ýîFdaÝ  ÜrÖ¨–ò²û‘ò*Hôºe(¯º(“óúý&½¿Ñºa—OVÝPY7hðHªyW-AûDCÖ«rê[˦ç¸k9ÇQw z…ùâ'·ûíÖKßc½›’´ßm$0Ø“Jô‘áž›`o9üseÝ‘2ÖE¼ª{jb_°^Åw÷nëµ3¡/Eï$¾HoÒéÅqJ^€^V2ƒÞr˜z™Ö‘X-ÞˆÖ•RdˆªzW½Öîoh W&ÅÛý•oCŒ?Ôv¼ó±Åwó¬î2è&>Õyzõ®1½ÇÔqö> Î{‰' bp^™|Ï}k†œwÒ-}¼œzËe{Å0ã¼1*¡W_ šmƒÎUç‰eWßÈœWO.<«öîÆ;?GÞE6çÙÄðômXaï˜H_z^Õe·ß 72çUYĦ,½|F´dœ·­U~Á»qìÄy‹OAMÕöâ¼ê{›\É[rúÌyý´eæ¼å:mã5‰}âæ±”6¾ç-"îè=ÅJ½~“Æ x€Þ×uôºC7ÄäyöúLzË7,«ô–Ã'“Þ—;"F{ü‚éÕ>BDÊÝ¡¤×Û˜Æ —„jSWedÒk»< ½Þ 4nˆûEÐô™ô–=³Mzf£Lzý0/½YÚM=/vÅzý3mhaAï«– ôÚ&ù6°¡3èõ›@¯¿W÷mˆÁ·‡G½¯ëè}94<Æ Ê8ØsfÒë­•IïËžÁ¬¦c˜ÜgÒ[j8Í{O<˜I¯×ðeÝn—Iïe#Ö+­ß]ºiÝѪ\ì¯í•“Í6 Az½YèÝ ÕÄ5 ²3é}Õ3¡ÞòVïûh 8Í»Á®K¨×p€z_×Á»ÏQ¯ÛddÔëC)¼¢(ºZÏÅ•Qo÷q" ¨×«Iï»N¨×Ÿ9£^7J¡wƒ]—`¯EC™ö–¦emšÅL{ýcÌ´·”)cd4÷¾l àÞ`×%àë3R¾¯º˜}Ã-Sý–Ó3_‡2ñu³¡L|½93ò-Ï/0F‹ú ùú;¥}ƒÕ3!_Eùú°Bû†šbד–&æ[溘r{6ÇÌ|},uÿ†õˆÅØÔÜwó}•Uèë_G†¾>»úƽíÀÔ™2õ-Дc»§zKÔ·D×5.ËÔ׃ 8pö§C^„˜&Ú7°›dæëŸí¬ ö t_¢}Ã%ÖsôT¡´oˆ®¾/ÛþŸ]¦¾Þe3õõ£}Ÿö h²}½£gêë=6S_‚L}}¨Ô×ÚÌ}_uOÜ×CàÌ}ËuÛÚÙGæ¾Þü™ûzgîë£I澯²Ä}ýñ2÷-j Xõ”g ûÚÁN`_h2öõù=c_§û¾~3aß×#$ìûº_¾ö¾}möµ˜Ø×ÞlƾöAûÚ£ûZd ìksOǾ6]û¾ª‘°¯Mྯ²‡ûZ°îëµÏà×ú%Àïëº~ý2øõ—Éo©ç\j]!t"¿þx™üÚ òkß$ȯw°L~_×ÁÄA[Ý£6÷ x8Àà׆€_¿]¿Þ[øµ¡à×ûl¿¯Û%ðk“À¯Õàû<þT2´]%™Ùõ‰¿‰ø üñÿÕ?.•OGyýÓÿ῾0Çë½?P/CEüeU¾ jÛ=õx¿ägþïyG¿ÞÛ“ö?Oœûûß¿úÝËŸ~è½9÷»·?ÿÐû—ö®o¼ÿ9-íHÛ·»—?ýÐ{—önwoþâþõÿôÔC˜X;«Æì)Ö ² ¸¥¨‰ ýóÏÍæŸæøß-*ýùù—Ï¿ýCD&ÿøù÷ÏÏÿú‡ùùWÞçˆh0"‡s½åkñ«ï´}ÇÖå‰%V_¿úN‡ßéŒÏ€ü;w—1ÿȱ1?ç•Ä•?j.XcIrH#¬«Ê~LmÔ+»û×&—ýàÎp&!Þx3åîöjXûnj}ør¬>_½_1²îÊwËwñ쇔bR?±Üû{š>{†¾y¸ãm–œµÐÑþ÷]î~—å·Þf9îXÄêKgþö}6¿Ïß¹‡¸Üy,‹V Zþ :—c¿‡þ_¹Ãüé?ùåþß?þõ¯ýã/ùóõ?ÛoÆ?¿êÜ¥ù¡ùSNâ»y—¼»E)#»ÊáÒÎüÐy ƃ<ו•W«òW»£Z6çÔ§„ c©eÏi‘ueÖ°x¾®%©·W-릡PºÖ$®\7YJwÁÅumIô¤´Ãroé^®=‹²s¾»TUeÏµà”‘šEûó#¥GÔ+Ö‰ÝÊ®lÏ7¹]Ii´*ÿ®¸HiKäÑ]çGgPËžcQvêcu½¿´;Ÿó&ëÌ’¼—²9É W¡ºXÚ¯Uô¤M÷¡îñëÖtò‰ K. †¦+Ê´-Ùκ©ìQ GÙ+Îæ‰}Ý{’ð©,¯ù$_Útª—å~@žÊÎrÞÈÿöJÞe>7ÊÎ;ÖÚGqý½´/ÿä¶¿%jÙšÇØ­}ù'#t,Õ«xo+es:åÃLÒw±ë]Ve…Ɖ‡ÊUì"QÀr57·[^ØOBù[ÆYí,ó-“¦ÇìB‰…§ùÚ÷ZÍ=­Q™€YukÅNï±Q¶,1T7]• I*ódßò”YÜoáx¡¼»]éðœ—åC-*»ïžæž§$6g í{ž“ TyïÏã®dREnŠ™Þoå š_•I7QÏÈÞå$Nï±Ì_Kìûdšæí²ðÒ/ÛsbñK[,s3Í»kbæÞa5Býøz[7ûWRUGÙZ´À·rFÁÕ/¹sïqÉ2¥Yº¤-/es:-a?§lßc`]„·nóp/kêVÑÆMõ ²çÔ¥ÿæ–Cé751Ô£Â1K¥3 Q6Éê³(o㉒z8ŠJžñºSt+:z;«åõœsô›]éø‘—åãú ï/±Ø­TòGê¤Ñº¯Ç­!ñL4Õ?î爡ýâ’Nü,R® C–½™[³Ë”úh9nYå1÷º%¥¬ÊÖµÛ/Äb6§€×T=w—{=Ò¹>¿ß‘ÎܨL¹jê¾à½žE/ƒr¡õÇ®„'ý‚;ÚU%$’ªf+q“t–nQîñ©{Шì9ââ×ÍI}®2§®,ñÞ–¤×TY| ÓßÛšÊzY>Àf ²íÙBÈê¹'Å·5òv$ÛS/;ŸSªjÍY^wýÉë94¦» Wî|ow:ÒáOµÖvÝ>%¯QkÍ}NŠ!]§¬u;+^d:³e»^I¸âŸ§£~ÝšÎÅct\¯»ÖeKÇ¿5vFðÐN›ßûžÎcÚè¿'ª¦ûm’sV®~ïg:Çàõ¼¾ø«·ê§¢â bßûâV]·ÄäTáwü!Å­*Ûïn¾ss \­ù%®Q¶nW7bRÙ¸úo®)p]JYÚ–Ç}äSAö ÇžWÿÍ#®ºNàÒ«ì \ý7Ï'põŸ¼žÀÕky§ÀÕZóœRäªV‰˜¼m ©ì‰\í7Ï9E®VËsI‘«u’sM‘«=ù¹¥ÈÕËö¹z]ö¹z]޹j~RÔQ7ºâ§räjírÕÈÕZ$]õ_»säÊëbAuäA6×0M[dqÝ’#×(kû_¥lÍ‘+ûóµæÈÕÊòA /Ûsèªàýìæa÷uäÐÕêræÐõUv§A·»RèjWÝ9teY¬š¶<Èæ·pÏ9t2IÔÝêhþºÚo.Œ]÷+›OÒ}¯Œ]cez*´-e[Ž]5žÝº5ºBŽ]Ùš’Ÿ©Ï¢žG^ã7mÁ®õ~g^í~eãc‘ˆÿè¾n÷ꂵ¤c¬ûq%'Fê³1t¿ðRxçvVÒµºÃ= ˆœy¤MõPóçPÖï¹æX•Â-³þ³[Žf½pÏá¬×öÈñ¬ž9¢ Å2» £0!ñ²;Å´þ”wjíŽÅ$u`Î9®µŸ•—FvÑx:Ž’Ç]/Dp‹'Ñ|yÑ´Ÿ‡^þìÁøïD»ðyð壜ŒpyÏ«‡¸l››A. Ë&üßêÌÚ…?ó‚lÛº[)Lq®. tñ±ÀB¤ËBÀ§_ï¯ wƺ|”¢mÊùP íò9¥D‘À|½_ór!ÞÅ‹pHc1~t•X?†¥ýZÞ?*4•Gc^ÃD% _¼äuaÐËÂ&¢}äñÿÙŠÔû˜›l: 7†½¼Rb2uƒù‹o6Öc)ðŸÕ \ÕÑQÃD´Ïv,÷…y{Bï ‹|K¡¤)qå\Ákûúµï;S×3cü£tF„¥ƒ=ëRðE„•[tXW’o:Ã4÷.I$e[‰”ê±G áMæÁÃ1–HXzékxá GÙ¶“3"aQíc¸: çÄŽ„ßeOd­µæ>ìÒ„7‘d¥Ž®v‡ «H‰3Zv a­lÏááO&¼IVG2a)ÓÏ!3Þ$¶>öžâHXZ÷{À¤¦LÖO6eéÇ‘°ª²ƒ-"áM‡Àž¼.@Âòßë TGÂ^—Œ„7ÖÎo ëo×9zWíz@›NLŒÔDÂRÿË;qi5ÌHxS*ÐKGÖ+ÚÍHxSÎÌ‘êHX˜ãØczØ*.ÎHxÓIÇø*çóþ8Öu]ÅèH8Êös$¬#Öé†{äÜ"öë2ÞÊÞXËa$¼)MQ¥ûñBÂ^ÉŒ„7µÒNÔ{e"luVÙºNmSD8']w$¼i‚Ɉ„ýºŒ„×[ÞÔ=g‘ð¦ôqÃLX×M#E ™ð*g¾ãœ¯Ær3޲sÉÖ÷È7G&\øÚH-G0¼Ê¯oä &ÖuwLmŸtXGgâk9ÜI‡µ[7tu¤Ãªü2³‘¯:Œ;’{’ëù¦ø:ê^ é°yi¢H‡ý~Û‹ËxX—]#»ð°×$ãaµÊ=¼‰‡U¶Ž„äÃÚÄ<âå5ü>eÑ Ý¤‘€XeÊ¡´vvüÄЮ1a×yŽ|8ŠäBÐ÷7À‡uÙ6¬ɇWÏ'!ȇõtj„¶‘>¼ îTë™×‘Jš€Xe÷pu$ æ3ƒ¯’\ËtïeOܬ‡ž‡‹'ù°=4ø°:Â…¾ öûe>¬ëŽq¤„|x•^¶¢õã|XçÕ¶qdŒ|XaÜ>NÅ€¯-#±àúÛ:|VɇU6³4äÃzOÇ8îC>¬'_†°Ÿ|Øž|Øž|XO~Ž#¸äÃ*›Gb)òa=ú©]²ŠÀ‡ÕbË8fK>¬ºã„1ù°ž}G_ Öm¶qÖ‡|ØZ|x•še™É‡×r@aîûBàÃÖüàëXõ8(K>ì÷Ë|Xe¼4¬>L±ù°=;ø°5?ø°žá§£Á‡õèW?<ì’ñ°}ÀÃ*»#2oX xØú$ð°üÇsˆ‡­Á€‡í%k5v©:ñ°ß/ãa/Ëxx†jyâaÝo üއuÅ5N@3ë ñ°u!àakàaÕ~' ˆ‡­µ€‡UóCK‰@<¬'›Çéâak-àaëÎÀÃÖ’ÀÃ~¿„‡½(áakMàaÝm' ˆ‡uÝ=ÎÄk=ÇÙâa»ðð*t2ζÇoFÈÖx[Ÿö²Œ‡U—cœ—!¶<ìíRð°W>ãa6–áaû¾Ö@Ûè0û³Ña6¥ÑaUgÊŒ¯Å“bzˆt¦Ãº²ò/:ÌWkt˜ïÈè°îyHF‡_…‰ëW§a"B:Ì7htØÛtØt˜ïÒèð«0ÓaÐa' ÃÞì ÃþÂ@‡õ(ó8$gtøõœ™«iqä}Ðá×Cd:¬Âe8 .^ÃRÀè°î¶Ï£Ã^ÐañÃåí8ì 8¬i|G s`78ìm8ì¯pøõ$ ¿~5Áaÿz‡½a‡_Wf8ìÕÖÏÎÓÁàðëÊ ‡ý«Öû’YXõL38üºg†Ã¯çÌpxÕxyö¦]o¾7wò·®‘çðE‰çŸb+ä*òuÝð~kOIBá°´ó±üŸ¿ Ä«úðzµŒi&Þ”½·[KšpX/~8X™p8EC/á°ð‰–ëûÂa-®îÈOJ,%鲌”2O:m;2¼dJ,mªèkeE¤ÄRožÃò‹”˜šIRb‰td‡_—×ÄÄTKפÌ(-015ÄÄÜU!&ŽA&>¸ž3˜x‘_ÊÜÅkÌ›Ë$ýy ‡ãmE_ifµC8w½Îêœd²áøÈç³å•5Õð*Ç‹©¹››jXÿ±^jXGÕpÄxÑCæ&þ…hXk¼xM×þ&Ä‹`ƵÄÝÎb îçÚŒÓL4¬7qÜ-5ƒ©†#mkó☎®ˆå7Õ°]—UÃñÇÿi‰¯©Ž¢ˆ=[ÚeS Ï ¦÷&.7Õ0Òç˜j8š/Ö-)…©†g‡ËZ9UÃñ“w1¬ø8!¶« Ž¿×ëz4ÃúæžÛÄDÃñ#¦m &Î9ÞM3ÌJVɰÍ<ö (V4¼Œ½ꆣìÙݶ|ëM+¡Iõ(4ñ0ê@í0‹ f' t˜½€ÒaÖ’Òa60¥Ã|rj‡ÙW¡†'µÃìÔÏO£;ŽÆ‹îpÛù…xØÊ ŽŽ+÷ƶBõptO¥w¯ö†&^¿ö7&^ž~æxXñdLêÛQùåòçý¬ü…òá¨K,˜[Ò“kA¼ |ßåÃj‚ˆšj’ÓËâèiL?Ìë¨Ö1•³gd2ýp @Æõà û}lµ_R?l×A?<Å„yö ,ʇuòå(™?އõ“óÞsЛ|˜NùðmQi¥!P+Ú2v‡ ŽaqD&Ž5ɹõ¼õ&f“P<ŒzP;,¿Ï£;³švx’?âÕº™vX±øÒSÚ›v˜Õ¤vxR"ž¾'Iép<øµŽí7J‡£×ÄHÓw(f[ kX¼æ±%Dép1*Œi¡jÁ)^dÝ:·lF&–_ÔÖÃZ“+ÓÞ2v†U—}î!/Ñp”Å_oýhÐpÚ•°dÿB:Ìæ t8šq¯AïÇÑpü¦ÃôCI@Ã*»§±ñé0[Òá(ŠÀªï‹R:ÌËÀ†µö¾ª+ïÇÙ°j³Cßv¤t˜- 6<—W¹ô£Z`Ã~¿Ì†­UÀ†³U¦³a¯Kfúnºû I²aÕsÓò®1óʆý©!ÖÚá»ü`Ãök`ÃÖ¿À†½,³a{j°a]w\cKlxÖQ³sl,‚ «ì<dž$Øp!1»ÔEÙ°_—Ù°®[®#ÖõóÇØ°%6l}lxÖ9ºãlùBɆˆIÐ6…Á†U¦°eíêà”ú@1ð9ö™†í2¤i+®âûqlµ&ØIS8¬ônÓÐv€ {U2ö²L†½¬a½˜íÜûqDaÙþ{?ldXm\ÄëP?A,ÙÐ0û‰¡av>CÃìE††ÙÝ]8,£ÆˆtëÑVCï{ŒfŸ%놱Þ1ݰ:V5óÚž2‘avk#Ã|#F†½aA†Ñ û3 s400¬½])a`øU˜ÁðëI2~Ý3ƒaFÜÐt0†9V0ìÊœ èû†9”¸l«— O?Õ£âS»ra|›æw— çhÎÀ0ƒ W c>vÕpšê H(,üéÑÿN׿ªTøPá€Aa+&¦r(£IOŠC(l A@a²@a.ö… …¹ˆ"&wæ†méO0Ìï`˜¡·aRÃþ gžQû‹ÁsF2†Á@3680l¿ 0¬pC§ì`8ƒÕÌ…í+¶Ç¶¯`Øz3À°Á‰L†­3g2lÍ4lhØj 4l 6lÝ‹l¸8/‚Õ2cÃy~$æ¾Ù0÷j‡­Å‡ÙEipœá°5#à0?n°aë?`ÃVsgÙ{9ÎÕt6Œ2$›e53¶êh8kކQ–а‘¡Œ†½’ +;ѵñ¿£á _h8½N aG®††rz¡a\4lõ44Œzf4lÍâh8¿tGøn aÜ Ñ‘`ã…†sŒäh8Ó»oAÃyîp4œ?ÄÎ×Ð0®34œº³“a<ž‘a£Æ™ ã²L†•˜pjv'Ãs4{?ãd(ÔÉpš’ ƒ(;Î× _:V:”õ`ÃÖYȆ¹ßJS‰WÙߌb ã*ÖZjk)(^h˜€×ÑðÓ¾@ÃÏx£aNméhøR®~Øè†Y¡Œ†_?khx’l=ÔehØx+аƒÑŒ†ÙA Çs(Ú¨–vo4Œæq4Œ‡|¡a\ 6ïY$kù G'ÝÖ~ÄㆱAàhMçh¨dXsùÖOA½É0dؤaÿ=襃aÜ `ØßÀ0tïŸÍ`ØZÆÁ0¾:ÃéÓƒáG3á\Ø>pakRÃÂlºQ.z°°_,ìÏH,Œ!Ó±°=°ð«BÀÂy {Sa^*ÌÖ~]x2ÚEÿ~ÞþuŸ•ÄœÔÂßâ/œd¾ˆð*Wæ¹ùëTû=5`D {Fò`õÛˆÁãÕô²Äƒ•±7j²V•'y°’×Ë”ÊgÁƒ±˜!f„KÌuy0û"x0¾aà`%ûÔ¸[îϲ³¼û0â`•Åb¾çmVrÑžnʉ°%¶á ÂÊO?_Cª "<ë=á.™°²ÐËr¡׌„-½;°ÒŸoçÜ=B€„•5Þc‡ë +y{<‡¡ ëŠ5l©ÌHx^‘äñ 0³~ ÏBëñ-/•è ‹1ö숎„ý73Žëä]/°–e&<+£Ï~+ƒéÇ™°Ê⣺šœ9#aUeÞ®–dܰÝ.!aµJ ›ëѸoBÂv³L„õ²å@Q÷6H„ 4;æa/ÞäI~mÇöv¯-[IÎHXí¼Çx[]_ˆ„ÕóâñŠ?ú +•ôÚO  ÇG×ê5^H8>j‚l‡„õÑKÏ!Ûð¬õ§8„g­÷q5|Æ^ûÉ2ãÁìÊàÁÖ*™ÛUàÁʸôgƃ㠎­)3Ìëȃ•'sçŸÁƒíë~цí 'y°_—y°×<˜o<ØÊÀƒ­ìzW.ÇÁhNÇÁ^¡‘l¨ÇϪ»WœóàøŠ"¨nTÃx°?IæÁš#Šœ×ÁН<ܦUºó`kò`»%y0º©áÜóç¡Âq°•G¬Z6[!p°5+q°·p°uŸG( ¼FœcMÇÁÖoˆƒãënŸG/¼óØ‹g ¶g ö{‚[»s¶2ŒAÊyp¼©éŒ‰cúÂ@âUŸ „½,aka»@ؾaŒš„­“ûÏ[Ã’Ûû"öGö+„ù瀥a4è÷áßf‘mq_öpö4. K Çj7zi7øõBሢíÚ&žá¸ß.X3×ë†#ö[Ö#^l½Baœ#4¡0NU†L`±ÉpW c$70œñœqáEÊgS÷ž•üI.¬„µÇ×içbúYãn½Á…5¥E«7B.ì×e0óä1 ‚áøÍM¦§uŸ\8æÞ5>Ê6Lu.sùµG'Ù¦7D½Ö­1Urሶ;™.g.<©W(»{KÙ–¹°‹x×Vy¸ð$áùQÌf.µ_døV!:¹ð$#ça¬L.¿yi¿å~K…­Ó8 .¬{¯ç8Ë.eñ擈ąÕ*l¯w=J›¹p\µÆŠ¼%5$Ö­#bÐ;ƒáh½eÚ‡ø`8ÊÔ°í\1¸pÜn«ræ·R8.Ûé°°ÞÕ– |3ަÜ뉎Z“Œ…£ìØãûÞ‡ø‰ª£Q–}xb +|Ýâ•Ì×Ûax’õ2¤Ü «Ã1i×¼ŠÀÂñb£©#T`a¯`ÆÂñÐÇ•x3Vdóa?& ,{Œ“ GÙyÎòlØË2V-ãÏý:Øð¤àðP½7ößÌl8ÊŽk¨“ ÇóEˆ=¨9!g6¬®óI{< aE#÷Ã@Ã;ŽJÄh8® Ç—Èî>Þ“ ë±ö­ á²Ûïø÷£aÎDÃZ€ÉBõn>™ Û ¶Î6ì×e6×)ˆîž@`ÃZ€ÆºB.ögÃ6f6l³KfÃÖ›Á†ÕbthçyȆãºaï–ù–lXOp–m•·„~s®ýdÃzºùÿ÷vMšäÖµÞ=E]Jì“2óò0‚:aY9Bs§PÌ…EŽNhzLeÿ~ï…ÄÇZ+ßât÷pÊCì*2‘H䯯ƒµóÐ66õÎe$V6l=-lŸÁV†„°aû „ £ IUŽÛÊ F[öm¨/åûϯø]k6Œy¥}_oΆmÎ6ŒIy9ªÔà›³a»¦°a¿ Û´/hOuÁ¶ÜWd2Œá3sõê Û$-dØ:QÈ0Æä6²ZÆ%Ó5bÆ™ Ûä'dت Ö‡2l†YȰù-B†môƈŒGMgzªHàÕíCöNÑ0^]Lˆý¼¾ a*‚†Ñ-Ñå="^Ð0RŒ¯û8ÂÕÐ0,l>‡” aëdAÃA§kÚD‚†áŒ®3ƒ a›l câ?g* AÃÏ2rbõÍ ¶ž6Œ·šŽ¡H%lÏwìCïHذ:þ†+ü‹O I†1Œ—ŸïS`Ø:Zа½6AÃðèö‘_CÐ0ú+ì|×G6 ç+æŒ&D'hþí2SS¶»eõayù!dxÁ”søšŸæ§ Æ5µë(42¼Ô/vù¬ÌdCy o3ådcÒŽV/ÖÈ0V ËÈbdXid5ËÕWîF†á‘æ¥42\×vÛ °2¬Ÿ¬‘a]…–E‚a¼øý˜çç £¯‹¾ fLî¬|ëÆ…½Ó… ë¸0.¬Ÿ´qá¨Ä-0Œ8Ž)… d/:Œ-††ëxYV ãžg™§ç £fÊ󬿠a¬B÷<õÖ)EÑ0–cåÚ¦„†°á)iR?]clÞ_XðXw¥^Hî­¬ºŒ ËÒÖÐ0ž>”)††u™mhXí¯¡a]!ÆeËHlcl؇³áÇU‰ ûE… ûƒöw,lؾFÃ1Å—Ûò(Æ‹NSñ·„†ka˜b$høY“Ñ𣱄†QóLû”Ca4ü,d6ìû~ùÜõm?Ôäs+\ÀËÔø‚­ÒÉÿÜs?¸ÿù¿ûó·`hä·z“°äí°ä×ù&öý•7Å~æÇþ­oºB„¸?kÿÖ@Ђc÷¯ÿüл‡?5Ÿ¿ÿð±Ï_;½½øþÃ6—‡óúýë??ôîw§÷ß~xÑ‚ûš?ü•–+N@:%Ö\o±4ÇCد-¬b4êß5Ášß¯ñß ûîóÛ¿þ]ë¿û··ïþñwüîKï´‡ ¿A;:&ñ/¿Sú†;äG; %Fø‹ïTüNb+§‰þÍM³Êj+ùI¯…w!?jzØOÈÞ‡{ƒÀo†~P{ÀÄ÷×û³=\ø±C"]Dñ>äíàîþv¤üvj{ìíh{^½_¶GåÌÍ{Ū'c>\a‹µÀÃ-où¶F_mõä.áÇîX©aóy“«ßdû•wÙ*G,W•ïß&ùm~áG,ˆàï£ñXÄ•ÂÃX‹ßÿSïðï?üéóçïú¯ïþùçï?—ŸþòŸéWÙò·/:Їï™qk¦šy;cQ™ö燈Œ5–¨ÀÕ3¿ªŠè’÷cõ”¼Q¸qrçx+ç\+]û§–õw³T™µPòâQZ¹(LØên,ùÏ•9sxTšñiW‰22ÜÔÍëÔ’?GáÁ¹;¶¬âÕõ!OàÈp³Få–¬9 OÊÞ- äÅhwb]5óãmÑ=ñ²îšuÅbvnêNÈvê([9AÞë¯è¨{O®^y$¸Y?…]Y;Z+—î^W )­‡¤Ã‰B0Œ±‹üƒçÖNyôµövÔ>l‰nã¯ÆÐ=€Ï–·/ WNîá5‘ô| ]{ú0”Þ‰㟹£Û¸cèZ§†$!7¯™ë1ã6t‘2Þé}¨’”›(eF!Òu±ë5FöB„jì=t‘2²UÄBjè¹Ys¢ìVøQqå\ö‘Õ>ÓPÖËîœíÃkb‡œÆr¬ë–ž§…$ëæ—ÍŸÊEcY/[XØÍ^XÂÓX¦ M·,íûq¦žU…×Nc™4JÎï¬öë¢Ð`–‡ˆ•,É»ESââíÈg"y f½ìÆÒÈÖμ:Ì[,Ó{@k|,ñ†í|œÊ8Ú=3qmb—µ¹1²†_ö` ¿ìÁ @°sP·Ãï÷‘OÖF¶1/Ê‚„‹åè™Ý„“62¦È¼wȼ–•µ‘6 &¬‚‹í²Ö܈kûtSvNÓ@ µ–šZ°¬\v´ö\|ÑM,øæO’)æ5ñ®[zÄphXòÍk¬ŒÔ‘eï)×r²6²_öbmd‰Û½!¤_íXÈ7ö«+g)ØóØzò¾õØÈ7FakÀ(ÜY«Â 9ÇË&ŽJöerŽ……ã’ã²ñð=ƒázä?trd²×<É9ŽÂ̪ÓY‹DŒíªçB*Æö”çJÎ1.šjÆà{Èë# *†ëvì½pzǨµ®çÎZÆQqÛÏX ´Š‰¼ãÇU3«K¢º(,ä?{¼›â®NïxÃv ŽçÖ?'k{ƒ.öŽí²×r{ÇVçZÙ;Ö®¹VvŽíÕ=i(káÎαß2±sl«5rŽíuD!9ǺÛÙNØEaaïØ/{°wŒ#:ûÖφ­×ÉÞ1öÐ÷ÒSjÆ›`ç´µ¼yä×E~²¶gîö9‡rŠO®_ÚþxÒHæ‡ÜjTd~Ž Áy äø>Ö~ÔhC`ÉFC9Åÿk káÉ6™rCe¢¡ÌÝësv–±`F Cn­=ØYö.8ÙYöì,c‡}½ú¸xFv–µ×ÛÆŒ_/Vk;[å=þ¾5Úp*²VÜØW¶Š»ºÊ¼Ø `òž«¼A+†]å#ì_Lý­°°«ŒõFE$©^l“åãä>Ûd-<ÙUÖ1!>6ÉR·d’c~>úa¢­ ~Ïq,¯ yÙ$káÆž²=ä¶³§ì5{ÊÞÖÌž2N8Æâ¢nBáÉ6™ß%>46ÉÚžƒeï‚“åúîzôo^ì(ûe/v”­±à"/žGŽØ$‡¹Xû‰± Pˆm²4%Ön+Ûd/¼Ø&K;caw²M–ǵ[a›¬­-ì({kv”Ítì';Ê^ódG4éê§n¢ðbG9Z»Bêv·´£Œó‘ËÞŽ·†nd’å!ÓÆ~ò³ðb›,MÅ‘4¶ÉÒ=±tËÛ{C9eõ“¥­…ÝdoOQ7Y^t,ë6ÉÚž“Ýddψ·ÓNêN‰_6/ì&›AËß}ÛáÀž=äÎ:ŸF@Ð}`뉧œUvÙð4hN+bÃÓH轎†òi,¬tÓ³} ŸÆ»‹Ö',åÓQ¸,G·JЧ±–Åõ8*x: sŒÄqØWðt®{,RËŒ%x…ÏY[Œãé„¶áh Md<2 Éû ¢ái_ˆµdËûa|: ÷p#Û‘JãÓñÁœeï9<ŒO'Äñ§®¶g€{?%JÇÒj2 ŽÂ²!$öl5Pc—"ï¾5è(€:!kO0@o‹öíg»'ê„àçF¥Õd@ì>Î_ @_§qÀÅuÂù§˜§€:áÈ8f€:Áljïû8^ê„=N# Ž¯ð<Æ©ÔØÁ)#bß5`Øvõœ%F¨qÙx -ç±ê„xßcd¸RBpöõlÃJ`5j–³L@ΰ –ð3Ç®¦ÀjÔ ¯ë«+¬FMè™_mÓB`5e 2Ð:Ãêò56KVãQbܰ±1ðÏŽi™Uû£ «ÆAÛ+/cçUXµ?€°jÔ\R¤4VšKé ˜ÆªwD ¥®0l¬O’óÞ'AÕ°²ç^Ææª j QTý¨É¨O²§.‘©¨zǮԾŒ½PFÕÞ‚ªUÄ"¬oK ªF –©ïÍ ªÆ-AûL(¬’£±í9Uã–i[Ǧ¥ j¯È¤úQ‘IµwjÍÒh¤úÑwLª½f#ÕþðBª+XX»®¡j‹‚ªña©šÒ©¡j<>þ¾ï< ªÞ±ã´ìcçQPõ£&£êû‡K{y‚ªñ(áÒ4ADCÕ{2ªÆeëiÓ¶¾T½WŠØÅU[EAÕVOYµ=ˆ²j ¼RV½×SR]ÖÒXõŽ“€W—ë4V½ÓüÑYµ½MeÕf]”UÛðRVýh³j¸åJeʪ½ûnV¿-8÷Õ¶¹”UûÍV£f:ºªªÁꪛpuQ£ÕšL«÷dZša§›ú¥ÑêGM¦ÕšL«Qs;»êµÑêGkW{3® ¿ïs ®F½åìú¥†«Q±Ä2¼mK)®öæ®FMhJôÁÕ8`…@À¶×§¸zÇÁâ¶s¯ö:«Q'ú¾ë ¸ÚŸ^p5ôA®ÔÅá”V£â‘¯± &´ÚïȰÚß”ÀjÜ1&ÞÉ£Vãª9¯c“H`uÕ2IC®Qaõ£×Vû×ðÚMÓêG=¦ÕhN,¯&VfZˆ×pÂ&fZí¯Rhµu¬Àj/cXíý*°Ú»N`µfs6XíoD`µ×Xío$¯~µoÍwýuJvF­±bˆµx ¬|`k˜‘!?ãÔgL‡‚Qk’˜d[rÒgT5/-ª:nySOÕ£ªÙŠXTu†Ëþ[«ºžGUïÓoûñETuAÖ쳇\kTµ–qTµ@‹ªÖ¶ZXu¼â-ìÅÙ\Q «NÔèÙ<¬:Çèâhx‹h–ëmŽ¡Öj-ÄÚ %ÄÚF†XGÍX¼6ub±Æ< ¿c}¬}÷kµºbmwÓksamFX«u´ku-ÂZaíO"ÖPïÍ](Ú#¬½µa­R¬¡c»G °6ã)ÖzM °K#Ó¬Íj€µN`ok»·~ïB °öËJ„µ7H"¬õ[Ökï °öÆJ€µ-¯¶æäÕ'¿oEÖË@ÖküýWEY#AÓ­}òj ½S`m¬±ßf’*Ö·X+‘4`-:Æ«•p¯Vp¦¼éN"œz!¹áÊq„WÛÉ[áÕ.# ¼ÚϹ ¯öƒ…«휓òj? '¼Ú1Žðj?='¼X!Æ@Ë©í¼ZiŒàj÷WÃ\¦K¼Jª2 ¶„í2 È.#÷Ü_ɀآHqµùÒ*b •wxì-¶ãjc5*b.ºÊ€¨6\-VÖd@l%¢2 ¶S\­Ž¶ ‚¨‹¢‚ VOyµZgSñš›ºálºWÛƒ(¯¶åòj{%Ê«Õi4^­ëãÕ6 ”Wë4l¼Z}›Î«q(¯¶T`mªÀZqœòj+\meB«íÑ•V×Ì!ØÉ‚!£ù”V“úøZíÝ#´ÚÌ€Ðj{ýB« Æ)­6¤´Úšã´Zˆ‹Òj¿¬àjç܆«…+*¯6”©¼Ú@Žój}”SýpÁrΫèM^-MQ^mO¨¼Úø—ój­i¼Zš¢¼Úd¼Zf Ö@Xûe XkkXûeXGá“ýÞ¶,XËñAåÕÕÖ=,ãÕ6×+¯¶‹nzÒQÊv=è¨uZÍÝú ÕüˆN«ÅPZíÐÙpµPYÅÕŽÏ WËÛª¸Ú¯ájy÷Š«3’ˆßãëǸZžÝqµÞSpµù@Š«½Ðp5#ÃÕ֥Ы V*®v /¸Ú¶H WK{ W ÎW\mU\mmU\mÛ@Š«mëIqµáQÅÕJU WÛ;q\-ó€âj{'Š«m°*®¶wÒqµseÁÕÖ«Ž«eÇÒqõ‘N¤µ½ËŒV+ZmîœÒj‡ÃB«Í{RZ­ët£Õ~ÙSe¦*Z­¢´ZÀ’Ñjá8Ž«µ¢ñj­i¼Z˜›ój!@ΫU8¯°ôàÕRh¼ZˆŒój}NçÕrÙ/âÕú[ãÕ²wà¼Z(ŽkéTÖ¼Z|k½§!ka•Ьu?ܵîj²Öïù¬õž†¬¥ûŒY˾ƒ1kyJeÖ^Q˜µßQ˜µí½(³¶Ñ¡ÌúY¨ÌZà²1ky›Î¬õ9Y3íTdýäÙ‚¬eèMd-DÖˆµRn#ÖN¥YËW®ÈÚA¸ k/díàÙµ> !kyL#ÖÚØ¯ ÖŠ]•X+þrbÍÓ’k]ö±Žq»ŸCîÀˆµ®AÄZÙ²kyÏN¬e¨:²{¦ÈÚ¶WYËÒßµÆ tdMwú6I•¢¬¿Nµ”÷ÎRöÄÕguãºì®áêï9a——6\ Úw¡àj 6\•Õ‘[†Æ'®Îá¯ÖWûã ^¶ÄÙK^ÿˆ•k—þ`^¬_WLXË3¾·ÛqŽ3ÍÊ«b¯¼Ep(®FšÑxó{Wè\mšÁŠ«½Ppu¥l[¯Ø¸–O[GáGŽØz¸¦PaÇÖÑ”¥D›·IMª Xækü ¶Ž‘Vd„J›&ˆª‰˜&HL‰yŠ`3´†`öý^JYGaXªk*d‹&ˆeÔhŒš{>¦®´h‚Ä=·X³^=M£h‚X.NÖ8£C²£pŠ‚Äšò:²Y$>ú ¡é…*ˆ~u& “ԞΖ‹K µ'øjí™8…ZGá…NÙ[, PkÏþ)Ôlx¯¹‰1 ´FÅ·ßZ‚Ÿ¡5´V¢§¦@6Cëv°Š¸¶Ðm†Öp‰Ò1bohm fmú,*dméKYGaÌr×ÔÇd­Д¬½¦ 먹†™M×+I'e?szdí¢0ЬMF‘u\¶¤»PµebdméXY‡/gêQ ²¶Qdí(WµC`AÖñºs8 K3<ެ²¾¿•®ûaÈZdÈZ„šYóNëYkƒ YKbÙ‰¬e{V‘µAyEÖÆÝYÛC(³ÆÔÛµº•Y_pY¡­ÌÚ$ÌÚ/+Ì*ß×2Ô³ Y_áh‘oEÖÖoЬ-•¯!ëã:ÊÐÀfmõ¼‹×ÃüX^HYã}ñƺ&·2k}HgÖ’;X¡µ©c)µ¶îQjíOYÔg?ݰµ™4åÖf^”[›y©ÜÚ®¦ÜÚ …[‡q*9cÄÝ…®í \Cå¼äT:Õpý,$pí—p­ðÈÀµ¥ÖTpí÷<ÕOæüÐÊ­½.9ŒÈ1‚­MJ±õL÷¬ÄÚ©Ä:k1Ô†»k˪ÄÚ:F‰µ7Tˆµš(±ŽšW>†R»kzDÕ¦{¦°zVêœÚ­*§¶¦+§63«œÚ ´rjÛýSNm}©œÚkfMB.»Ê©m÷O9µ×4N-™Ê©u{T0µ^Ô1µ€?ÇÔ¼n|`jÉgå˜Z`³Qj½¥Sj¹ªajåÛEs’ó–”ajƒpÊ©Ÿ…š“œŸä‹0µâ[ÃÔZÅ0µfK4L­ Ûó, 6L-É1µÖ4L-@1µ|ÅÔ:]¦Ö‰ÿ©e¦VÚ¬˜Zy²ajEƆ©µp?˜ã©µ­†©6¦Ö˦Ö˦šè˜ZÀ±cj­i˜š“„¦Vؼ¾Ÿ’ü©Y¿ñ©…Ò;¦Vúm˜Z/k˜Z/k˜šãÖ˜šã‹˜Zx©cjm­aj•rL­XX1µ<¥cj/L­$Ú0µôcj¥Â†©ex8¦–ÎSLí5 éÐG1L-/Ì1µô¬cj±ù1䀩ÿÛ?¬o0±ÑÊïþü»ÿ«Òç÷ÁõÏpß œÞ6P‹zn?. ì?|פB~ÿIoß}~û׿ûÃ?ý/ÿòöÿ?ùû·{ûî÷Çï~M‚Ç_äE­§ €8§þ„¯0Þß ^1A¼Ü˜¤W-ìÀþkž_k8[p»Ï¿þìÿÇÿùÓÿû7yø_Ìn©¶o,ˆéùø\ø-¿×õšèàx÷ñÿø?øÓO¿æÑ¿A+]û &'Z6H¢?û€ ¿¥ªòy¼ÿ˜ãßïƒþËŸ~úÃ?ý÷ùµÝð5ú;ÚJבŽýÅgÀ…ßÒY¨òé*å3øïÿòüç_÷|Åy}þXÿ‡%…ôHy>?~Ëó…Å5òûÏÿ¿ý¯ÿôëžýW~XÆÆ-1>{€ ¿Uê‡j¤ÂÉÂNcÝ}Lá½§{›ÿ¼­lýƒûŸÿñ»?KOÄ|{é\°½7^6æ›6f¿ò¦mº™ý›ßYZsØñS™ÔãÚpnŸfÚÚl}£ ã§í…»ïÛ?mH¸¬¹nH}Ì›èMh?|h Zß÷ï ÿôhCu=« Qs=ë=þší=Sq†LX\e_2V ÐM?ðï׿íox÷j¿àNGÝ9G.á°‹_|§ô wºbsÜê—~áŠßéC,é4à¿ù j6ûc-)?éNkê›<0C§‚ófHᵋÓ}¹…LíáÂ+gpÿˆ·ƒ»ûÛ‘f|ðÛ©í±·£íyõv¾Àag `I~öeE2DÈ^ùiŽ–·|[£¯·z|—­$„J-Hhú¼ÉÕo²ýÊ»Ä:ï$‚jNú÷o“ü6¿p‹º_räèªx¡Ñ÷ŸÒ‘Ñ?×Ë}‡ÿáOŸ?ÿÓ}ÿóÏ?ÿ¹ütþç‡Ð‘øž?±boA؇?o-ñ‡½ãcÕoGJ‡<3s-±hìÒ1eIØÆk£žd|GˆÙ–çÑW΄BÚôg)嫇`í5Ä,7D†§˜óû.é®!fá,9÷½!f÷–HXsÄ€]ûˆÏÚh66â/¡–ÔIPXG* ­ سÛj6˜æh¸Õ‚·)©±Éèç}½ä-½ )$Z8´-·¯$I7'ܯci¹Îýê…cfGäž×®Ò´Çs—3ßz‡{)‰ko­-s‹o¿ÖøTâ9ϵÝ[ÅùFÂñN?Õ¦¶0‹}¥­âýDÁ<ˆÂ; Æ0úàšb;BÍ:eÝÏó˜@ÿ;bͺPA¼ŽO[Ánî}GÀ¨Nï÷AŠëHo¿Ç¸ÀáìýÀ^ø¹õýü~ì íÎx#¡é·C£ë¥¡p[âãoiö[ÛñÌ{Ý©GjZõ;„»@[c0£,†òÞ”–wäêû%û±!Wø‰Ca8r÷XŽ&!¯óH÷K

÷HÎØ}£©áîi™»±{«Iiè¢C×Om(GáBâm=ˆ_#–¶ì÷¡Ñ=!¤º™åx¿ŸvÒì@áA̓"Qc Výœ{$#$‹eö”Ç6ËŽ/:³à§&2¿ÖŠ ‰}ìé˜x{:‘rlРð<›YFa «›rûL⇅4 Іt5³ŒÂƒvKÂÐÏ ´.“ÊÎAÊXÑ“V Gô ‰‹¢7c¦lC9:d¥Üèͱñƒ®Œ·¸tQp€Ñ0m({¿ç~}9>É>^ÇhmxJýDBÃú6$jæ“Ê’ïÙlû8Fôu,_šp5¦Í{TGÕžæ4y@«e˜dLŒéê“ûq̶gkO8È} û=¯±ý[WLf]ÙŽÁr£&"Üú1§Gáˆ+@Í0ág—ÕÚOÓw£Œ±› ,×f%du&;ÒC‘á¡ìÝUFáŽé°¬'¨…¥L% ¯uØä0stz&tDÓ¡•'¥~ucØ‘a“í5žÇ܇~ ôÑT™Ær|¹]ì…#À5ã3Þº$Ü»£»ÊµÃcÐTºàŽð™Çh rïž2n¹Pª^ø¢#&«F×wõU^ÝSFáYÄ«ëÌä‡Â0yL‚ü†I¶™>œã±/þ胫~Âs(Ÿ±Fï92P8Â.ês"Ó@Ÿ#.ì›\÷ûö.ÍClÆó °\”gk-ôFºM7ä:KŸ'ÃdÍ01XèXÂ]C#Å rx ­ù^Úì½pÖÂø~?zb €JÇ5Æâ$ÅÒnıÂÖ†Ï4²a%3¤…Ñ–0™©Ç:ÅÌtyø†±ª¸Ï, ðì®r4%– ¹ŸÖHK™±E°ý£¹MŸXY¥á+ÃÁŒ§=÷µ]õœ!k¸ìV¥OtX°-ÓWŽ//Œ|_k¢ðœ¾2èÎ^ÝÑì)–veøÊˆùÆS•Öqë:ãjë[ Õµ–±‚܆¯ òˆÖ¶,–)–v#&¥Zí wõž_Qx_9ÃîÍ4íñ3Ø …#¸©BÆsfx¸±Xη`PZËŒ¥«_PtH×ô ›åÝi˜K ×®©£ðš¾r†yíúÅXcïùÊõØð•õëÂY®½å ¾gSªÇQ¶5‘aF–¾®˜ÂkzË#,])²íøQOÕMoYgž–°fuÜ«(ñô–uB«Hº·ü(ƒ^U·Ì1°À!ÂYnÍX?Ïrlð0²®Ê á¶ÆƒµŠ„¬«pT”×ÜÈóDÖ® ȺŽ0ž{iü‹‘õc/‘uÕ@ˆ£gí…Y»@‚ ë:$ã]ô¬ ëzrþ€•ê´vÕ“óZHÈÚ  ²öcr‚¬ýè¼ k;«,ÄÚG ±¶Ãʬý˜­ëZˆ„µ=öõ³ð¢óJã8¯°j?&+¬ÚÏ¥ «~Ö¸ZÝVí'…Uû!HaÕ~ÊYõ³ŒXµ¨V퇛„Uû9-aÕ~fJXõ£&Ãêz,*:#z¡á_‚Õ(dÉoÕ¯ ¬®…$Ü-°º ”‡ǘ»`µ«Ì ¬v‘uÕõ²aÂÇ$›ÎzM‚ÕÏB9|…ñÂÂòÜs(Áj×’XíY|V×ÌÁ8„Öð̪ý¬ggÕ~rPXµ~Vý,ÌvZ‘”G™UûraÕ~ÚWXõ³æ©Ç«ùãVý¼ìõ¾Æ¬°j?-¬Ú× «ö€ÂªýVý,Lz0—¿haÕþ«®…+±ÍgVí߈°jÿ „Uû§Ç¬ÚS «¶t ‚ª=/‚ êÇWÐ<êªöce¦è¥-¤ëz’êZ1æ¸r5lH¤ºFÔ-Ð8j› Lª=ÜNHµ§ÚRBHLJM4W$§€jO* º†=ûG ª=¸ êšå;^þ¾4RɨºÆÌ1v]vÑYÐüઃ•YuD(ƒ°j\¶´É]x°Œ«®9ØJ|ígCŽzŒZ³·1¬~”1¬öŒ’«=Q¥ÀjÏ,ÚaµçLXíéV{¾HÕÏ «_•Iª,Nð)°Ús× ­vF¡Õž;Uhµ'\íYEW×Âøxb±ÔX6áêgÍÝ%)‰§ðjO-¼Ú“] ¯~Þ³he-$^]?Ì¢í@3°~Uxd—ã/¯Ž¹˜X?*2±~|îìon¡k—¨'´ßvÀñ«`*º×I±j‚rÏ3Öúp1™ôø[µÆü™sÏÝ#ìzUi{Á®MRÙµ¯I…]›ÌÐîY¨ØUQxm3•ÂkMìªðÚ&#…×UÆÔ°¯ ê2¼¶1¦ðÚRé*¼¶ÄÈ ¯-ÕÂkËZ¨ð: W‰ô0lf×PÆDàNGŒÂ®7P›ëÜžäÕ¢£ÏrÕîý‹ó¹65X%×pZ0$;¶ìèÊŸWõËú¯I§Êƒ¡68ÍäÚ„i•\C‰f‹EëÖïFäzƒ¼h8ªMATÉ5 aöí¹†Ëv†¥h¯IÀõ†ìwñämçWÁ5DsbyÓû3ÎúQSÀu•ÛAq ®QóŒi 3)×þ2,W§Y®½g\ßb¬#ÐGÁuæxåÑY3¸†’ptÙ0®1b•v\= šÁu†k´P,àÚŠ\ß®ò<þÀàúEÙרŽî±æ®M0YÁµ7¶ëG×>\…\? ™\? ‰\û ¹~2ºö0Ê®!e|¤ù-ì:j†UÈÊ®¡GTEb v—/râ*»¶×!ì…çáü.dv!¹aô÷xi‘Ÿ¬Ò#ûDÅÌ®ÑÚXÝŒa×QC׃@”]{ »Vã*ä*XoÑ,J®ÝL0¹®êÉ1ƒzÏä† fæW‘Ö‹ ¸†¹ fÆM7pí6RÀµ {+¹Faüf"o&×(,5í^k ‘kØ«°{=¦ÏȵÙH%×f•\{c…\›uUr uõx¡m Qpm@Áµ:×6\›YVpmUp­S“‚k›Ó\›±Wrí·rmKɵµ‡Áõ ¯}ëñ*F¯­9J¯­{”^뼆àj•Ä|¯ÕõPv QÆð‘ûö“²ë˜Ê6âD•]ÛûUvíÍv5o­õ‹°kpÊ®ãA¢ûÆé0e×vOe×VSÙµe×ö¢”]['(»¶·¨ìÚžSٵ̈́ʮ×OËôMjeטLÐ-2^Ùµ½ke׿ï»Ö7fìZ»Öص;ƒ×Û\üÈðZýkƒ×a¹—2Rn¼Ö^5x­N›ÂkMœ`ðZ­¹Ák±žÆ®ýé…]«Í1v-¹¡k,ÆÂÐõÈTE×øðc­¶÷mA×|Û RWt­¢ä:ÊîÏV(äZçV#×ö”J®½¦k¯)äÙÎÒs¹Ö¯RɵØB×Ö%×ÖJ®µï:¸¶×¯àZ—-F®uÊ1r­æJɵ·RȵúF®­c\« 4p­^€‘k«©äZõ®±PÛÇÑZ×ZŽæ½±~|Ê­‰q.ýŒ¨qk5Æ­ýA„[{¡pk{]Ê­­ó”[›ÙnmWUní…­Í^+·ÖrktŒ[GaŽåZʯ¸µÎHÆ­q¢{Ëý(¯rk¿ªpk­Š­í=*¶öBÁÖöôŠ­­¦bkëTÅÖê¶¶/V±µÚ¥Öf]”ZëjÀ¨µ ¥Öf<•Z{¡Pë ‡x¢–QHÔ:¾Q·é±ÔB­­=J­µ Z#Hîgá ZëZv_ |+³þÍH‹µŽ‹T*<5²eÍ3P¬ã2€t=€Y€µfæ1qM°bâ š[€µŠž?€µD‘8°æ”ó¬u“÷A¬iÔ€5ú£2Á»Lxu|ë1ÃWc8öqlUqµCgÃÕZh¸:F6NÖ4Ðm¸ú8s8í WK™Ðj¿¨òjžWÃj„é'~•WGaÌd#@Wyõþ©†³¬—ñêðÿÉ(°ÖIÖ€µ·E€µ·…µÚÖñ2â¥“æ ¬½±B¬£0n×y±¶w¬ÄÚî©ÄzwSr?XgÄÚ^¤kkkÊ ¬uè(¯ÿñ¼J¸Cúƒxuu.Ãiét]x5VPg™å«õ«S\y>šÖÃÔW릪âj»¨ÐjkÒjðâužQZ…;òîd¥Õºk´kòå§Å•Vë·Ñjëo¡Õ~K¡Õ~K¡Õ¶}¦´Úk ­ö§Z­çF«uoÜhµõ¸Òj»§Òj -0Zí…B«íûPZ溺/⬭HHµMʪµžjx\ç¤Rm»™Jªm+ªŽš×±öž†ªÕ RíR#a¦†F‡ê¸àqnCñAIµ_–Iµuš’j3)Jª­Û”TKw+§ö‹ §¶/NAµ\T1uCMCXM1µö¶Rj{ ¥Ô8†SýˆWáÕ^S0u´§ÄW}µÉS1µ½ÅÔ6nSû=S{k™S×DeÛPbRNm§œÚ†rj{’ªí +¨öë ©öšBª¡Äµìã8•j³ÆJª½PHµ ©¶á¨¤Zg8Õ"]e8ú ª½P@u<ÕŸbh~¬ìËëWNí…©åÓQJm ”ºúu9šý”y *¥ÔÖuJ©mÆQJí÷dJ­îˆQjy!©-ÄK!µ¶uBj«©ín¨-4P5£Ü‡(zádÔ~GaÔþð¨õ#6H­æÈ µÎ©õ¥¶~SJm÷TJí…B©Ÿ…„©í9S{MÁÔöœŠ©õë1Lí ˜Ú$˜š¿;…ÔÖ¥Ô2c¤~”]ìËStHmÝ¢Ú®'ŒZš¯„گɄZ͸j3ŒP{¡ j1F¨5׸ê˜Fbí“W •PÛ'§ˆÚÚ£ˆÚ†…2j5Ѝí»QDm–Jµõ¬"j¿¬ j¯)ˆÚSµ½iAÔvUEÔvUEÔvUEÔöN¢¶ñ­ˆÚ¾(EÔö®QûíQ{;-´Z_µßÒB«õ–Z-—õÐjyL­–Ëzhµ4ÈC«µ¦…Vk¡…V{¡„VK÷yh5ÏôÏÐj HmF×C«)^ÖµyFmmUFm§Úº@!µ×|X€oÓùŠ´>˜ê£¸å•~5øõT}QDí.8#jLnx•ýˆ« ê9¦·¡ ¦ˆÚ…¦U¿:ÕÇ6Oz ¢Ž¿ŒfœQRD½ yí9Ž6˜~utÈõŽ Huÿ–N¨òÕ1߆1*_ ‰è“´•”ÊWƒ«g¹ÊWç£òÔgL5\ò‚ãkÛùjèÆ mÇbL¾:c¦Ÿú*_ø§°§ÈW_Ð %yN–¯Ž²%‘¨«ÈW_P\¢ðy‘¯Ž¥NÁÐ[^„U£~ðˆ}ëúÕHå Åãmy¡_•UtÚ8ê-úÕN—“†ÀÆúÕQš¡MÉúÕtšH%@¬qÕs‹€u¦“4 DÀúQÈÖ'ÏãŠ*`”É k¤FÎó¤·èWŸ×§~vV¤«q·k$¦Uåj,sÓL½¬ÊÕçY'†q Y”«±zÎ3 ¤*WCØ|¥Ó΢\íO/ÊÕ¸ç1S¬ªrõ‰#éxê¶¥ ÊÕ¸gš‡dE¹}zÒagѮƓl¬ÞÌÚÕÔ©"[íwk²Õ'2ÃÓ±s‘­FÒë4Ï·ªlõã‚,[?éÀ:ËVŸø–ç_•­ŽæoëÌT®²Õhì9L«lõ !áËíéY¶: ó3oi/Bd«!‡¥sÈ–‹l5„ô¯xey![&OÍ4•­¾pÚûÙj†¥(¥«a³l5>ñ2…èT¶²ÿaû†x“ÈVÃmS§Me«Ãüíy*ã©lõ㞬[‹6D·úÂ|–ºVaÝj7ª"\ £F|†"³ru\õH$”#ÒÕQ¸`×°ÇSvéê >eLELj˜žÚÕhJY׳¥ÚÕÞ5¢]]!XxI#¶™´«‘¾¡J ´·!ÚÕ¦QÒ-íj®C2N¥«ãª6EÚE•®Fjø¯”«£ì¼HÖ^”«ýª¢\Á¬šz(1+Wc®ŽåëðWD¹Ùãw’<åê(ŽçïïB–«vc&rÕþQ‰^µt iUc¢_³lZgªU˜4ÚÒY¥ªÍoP©jˆ.UíoO¤ª1ê·²v=3•ªFû¯&uð£!êçeIªÚ?‘ªFÅ©ê é”⧦u§RÕþ¦Dª·„Nq›ªDª™ábÒè‰ÁTªó¶ä²5|‘ªF{r¬ÉÚ‚VùtÌ€µ‡Ž5óé(ŒÎ©MTªúqYÖªŽÂðJ_íªX5jBòè’ܨ½=¨H²§‘†IõåäÔNñ*žÆ4ß瞆¿Ò4‘ß¾OÿJá¸^¸ UÙ⤎I1‘GçqÔ+åÐ$‹*CfqÔê¦qÔvjד,Æ’cÌf–d’På ûû"ŽYœû?:¤?åœ)¸RÇ’v|©”:ž3LÅÈ!¨”žk¼Ì.w,¡Ôðó:ó$”: ã>ÇH‚#¡Ôá«ñYŽƒ’J#*÷˜ º‰Pê0‹XÙõkJ©mi§”¯(F÷?aJ­Î»Bê°g]—×2eÔ†.@JeÔ1 ¡´Ô3þ F õdÒUFŽ:R$í½’0jˆï[ÏScŒòãñô©­N˜Q;‹sF}]uF-Ð̵@3eÔÀ§H¹}¾bÔ•IÅ„q=µS<…ÔÎw™T{–MGÕ%Fd÷¤¨Z2i:ªN…ôxUCoRPReûŒÐRP'ïúñ¨p(+¨–Ȫaî$7|JTÒ[HÏSA5Nlgóí V#/¬k†4X½#³C‰âVï1¬×½Ü VÇ_61¢»P`uxò1·D`uX§<ˆ¬Þa>§ ‡Âj=ZPX½!Aé<„®°ê?tPZauXÄ1VW¶t”qG`5s'i Õ¸ìÅ* «OœR@j”£Ì°]{’V‘Àê›áá{ìCÛc²jtíFgÐ…UGa¬¥æIraÕè …‹ «ÆeOÇV}B@‹d*˜UŸj™:‚ªOØR:i/¨:.þ×8½­¨úÄYÚ½ô¼1Uãn©©«@ÌÛLÁ*¬W {_y»"³êÇò¶™òŽYõ‰…, ç«>qþkÊâ)«>H¡ÿ«cÛ´°…V£÷ÂD Ix¡Õþ²„VGaÔ©þ"´ú„B}MòüT­*ΘzÒ6ô=&®Föãe݇Ҽàj\¶~ùÍq\—M@¯d«kÍý™\WÇÖíó¡ͼúº#IáÕ>¼„W£æ1‹6^í_•ëÇ+ãa{fFæÕÞ1«ñ…¬,·A¼úÄéÊ·}ö^íoQ€5FN¸òÇ`}b^# ˜ëâì”®\€µ¿EÖxÌðF €kŸ XãQ–°µ=Œ™yõ O„Tc„WÇ(‚øË”˜f^Q„¡Ü·n…WGÍøaB{áÕü‚œ‡ð·ðꨉõúäòÌ«1’!@Ý÷HXGÍ/~Ùd`} MCž9ÿXãt Ö6VXã{=Ó\Ïw`mKuT —vì½h–EL¯Hß±¼H²hˆðê9*ÊÌä$¼½Ö<Ì»y5,E8ö}7Cy5^F\jz3¯> öm]Ï‘qòj:Ê«ñ6®<‰…ðjËæ®¼5Ãçè§}•WÃ\Å 2Û«1\¯ƒH7óê°Â籌}KÖº÷¹-!Ä…Ë66f”XÛÇ¥ÄÚ ŒkóÞ„X£ß3(ZDzL¬8*en"±Æ»ç¬o1)±ŽÂ5¬ûÐdmÄ‹™±c#ÄÚë:AM. Ä# ¡‘¢1±Æná{ ujN²ëõ±Á¦IýUI’EØäâ=gÅI¬1^c²JáB¬qOtÈPƒfb} ue¦öjí÷j}à™×±!®Ôú€Ïužý‰bkx aλC¤Ü5Ë9Si ¸Æ=÷eÌ)¸ŽG¹ó7=q×$÷1 Üú8Ì3'¨pk42ƒ#ŒœØõ!ò}¦!v}àøí62 +¼>ê’uäÞVxËÆÂžu­+¼ÆÓ_XDwi†×Ÿã&í,¤ÂëžmL=”WàõyêÜÓ# »~MtíoQе÷š°ë(ŒõJÇ;Š®Q‹‚%/èúÀ ¾$ÆÊ®Ñk8×Þî»öIDØuø&× ¾ ¯±šˆïifodx~]‘&Lá5Fò~î ¯ˆäYô-ðú°ÜF8‰Âk s„O·cø ¯ýáL‹N½]#0F3-¢bøy#¯dZt; ™½±åž®;ïí·FW¯o?Ôl‹qÓôc ºàÂmUñÏížñ÷?ÿãwþPŽæ7 ãÞÞ ã~Ù˜o¢ó_yS<àg~ì¸i{ÒûŸeއ¸;ùqÿöǶ ¿Þñ¦?öùç+¿ÿ¹!­d8ýç·jê|}ï÷û·>´}œ!ÿâî÷õ~ø+­@|ÌI[u-Þb…¹"ûËuag%ô‡ïšÒÐï×·ßçhóÛwŸßþõï–¿û··ïþñwüî oƒ†ÁÿÂFmLiý>q—~£h××ß*‡sŸ .Á—ß*ù­>Ìp|Œµ²¯õÿ™9¡ïGÍ ²gáͰ i«öͼ?ÛÃ…<$vÚ‰ÿ·SŽüx;ÒŒ~;µ=öv´=¯ÞΘ؄ ·õ€ë~ß²ëBA+<Õ‡=ZÞò7šX½ÍVbY”‘óeIÏ»\ý.Û¯½ÍV.°Š«nú¼Ÿä÷ù…I gævy½Ž7,˜«ã a-¿þ§ÞáßøÓçÏßÿô_ßÿüóÏßþËOùÏ¿ü*sþöe‡ͪÅâ áß õÛ>H;ýÇ€9æC ÜPuû¼-›o¥\#‹h†,q¦X;bËUGl@ãX­S(Còa€á³.Óc9¼·ÂB§ùÈæ_#pFÖ˜wË ˆÕoƒBùfÁ- YV‡¶oÆ6ù ´H/¿Ï˜õŒðAbcÁyPºä¼' OÈñ˜± ‰¹Uð¹Æª:­­æIQuE>“¨åý¢@^8toYå3V Õ}!ž–ÓFáØ‰&¥æèÚñÇ–ó½Ùw¿²ThS?UÖ7ˆsÆ~ð`‘hú>¡{NmΣ°Ä@hxr^çþ;Òª§‰Ír¬íç;QXÑž*çT·Ñx;­Å]dzÒNùŽäƒ“‰å|ÌÍpTÜ&À‹OûÝhI\1å{»&~I[Ú;Â8Žð’± :(¢Î;ç.Üi_z‰œkèŒýÌÁÒjŠóœ­ÞA›ËH£´ì[Ïž‘ËIûÇ^x,´EŒ4å väÏV¸Ò>pþˆÑÐ6fó±ÓV/Fîv]@0™¶tk²ñcíÛ™ìqP(ÄTC×ÊNژݑêcq6;yƒ&Õ”á×8bš±Y7ˆQUæŒûåµî´ƒ  Bu¶' £17I·J¥F8H> íƒBÓóŒ‡K7ŠÎ5Ê£ä(ܶ}î‹oƒv3köÞk$|Ì!ê FgÙ(ÛhO²æÝG¿{å…À!èWwŽ˜±S5|*X'0ñÿ±ÕwÑŽA¦ÇÛ¥ BdhOËH£OF{€5â>ʲÑ6Òe‡å8®;)¾;ÚɃØiZFMÜŠvë¢ðÚm_qYÚCÖSЉ+ËI{n€ÝëÑ£hb)H»j[:=ºjkY7Ú9C{bZê›;.Ã&ךÇÐä+8#?lrͶ‚²´ÓUSw¥!7Pb¾›»Y‘=JÏ*_¶…ö«¶ ¥Gίýåa’‘¥<º¼ï®¨†“Œ!wîã`oAÞÀ“FòF·ÛºqØa’r™s¥*Ør˜¸kÏ4]ÐÏà ×LšûP)HËÖ rM9޵aIiIžc÷˜Àø¬ëFMÍ?} ÅÚ‚ÒÌ‹ƒhTܽÕ9æv =æÄ~(¥ìí¨Ô|ŽÇÐ3-i¥MÌìË9tp 0û°ÈUA÷© pú°ÈHkŠžiØ·™‹\SVV,Ý ÚÀÀ~OTïâb%]´GQÅEÏðS0¹›ì£*o´×àÃ>ðÉê8ó]*9Ó¶AÕm½FŠ”xfÚxÜó$þ_Ó:¡á7Á. ÆÃ(CD-®rl·ÓR…‡QÆ>¾(e'R…½0 =óF‰ÙnÂxÄm&ùÞR¥âíH$Š9£m¸”rR¯I+®±©™¨9îIçHʱN0Ž4Ji©'l]„ʈ`ºRˆÃ9zøÑ·Egwuë_ Ê6'üÀø¹ó±»^åÔâP'2atßÝ=pºçÃ×Â/öÀ­.8Þèý*xÂ&øÒ¼!wÀ‘T>¾’ý¥žÛÑ xFî롚üpÀKäž±ÂÜß–c¤"xxàᦠsýðÀ1Þ#xøº]¿ñëp„@t=®¿¡‹—‘Êøá€ßší1xXÝž:Õð 1—‘Uáé€s28à1Ÿ —îá€×áØÎ+<pv†ÕG‚ˆyŠìá€t8íé€Çý[ ´ùß ÎÄHÈêþw̵õxøKÿ{‰QÓWÓ÷û²¢›ŽÓ]ùN)õ†¯×—0ŽõöˆtV/|ÁQŒjòÓï²8ì»çëá‰×#çÌåg(¼à‹ÛºÈˆ¢p V~`긮ÎGÔ?Ò„¦nñÄ$c"‡@±‹'Ž0ëXPO< Ã$ŽÃ£æ‰cÅ5eXÔ‡gšÇ |uÄ3–ce.ÄÆn×91²;âÑÐ)Žx4'œ ®R£~xu…÷!9üpÈ ,#šÝüp„{Ï3¯~¸Ý‹Ýðz‘}„šžÁ ¶~JUÝð¼#Êöê)ÇÔ Ç±ª#FHK3¯n8¼ûkʪ…W˜õ±©#n8z4©Ž(ÂužÔT7¼'Æ[l’êˆgLf5lïé‡'œ¹šrzê‡g2FcÛØ?Ë©ã]0@ýpDòŠ\üpœÌ[ÊÑ£ÿÔGƒ¶yÄSñhP¸Ëã(«:âqÙøC|yåéˆc[TbÕG˜åqì}¢G3ï1EÕÇÛŒNKœÛTaGuÅý1Ňݧ “êŠãÈoôeßiW<*fJ‘§®xÂqúÝG8ø6Œ©+Uް28PöpÅq¯rN>*®x‚ÀßHÝ®žx””P=ñ˜I¯5Q(õÄá<Å×ßöÒÕ¿oŸºx¦zâ ‚âã__yâ;BEâhÌM=ñh\8áƒ7©'^3ü…§Ô?ñÄ1íß[ŸžøŽ#aó|‰zâè­4·©'¬rÎÃdê‰C*cŸ^”zâØŽaÞâõôÄ,Ƴõ»xâ '1»w?õÄñœ1m[²#î·Oo,&ç~`_¼ØNÄ1A–.‡bD|ÅYîã8Ïõᆣ·ñ~›bŽºáë1³Œˆãè1ìµñøúcNî NFÄWøBCÖ¸×$n„Uq’ôÖ"h‰Ãÿ:‡â‡:âû"Ñ_Câˆì>ê!B∣&Éì_@ †²£:âk8MGÌØ-_¬:âqÙ4•}ÕŠá{³šê‡{EvÃWh ̽xuÃñªIHTýp¿¥øáøxIm\üðoö'}Õ‡KbãÕ_ëéô¥åIþÍÓÙ˜¢wÝ7'Òvá)®m:c'®ô’¶q¶Vð™âAýoc…FgNâ©PÿìlÙÆÁcõ¿ú ^Ñ—ÅÿƼNQõ¿´­Ð19;àXÊoÑœ\^plbãtZ“ðQ|‡½Z'–Ü·¿ÅÇ&Í¹Ž¤Ùê€Û6Ÿzàˆéñ}Âm¤«n±WêW‡)Ô©x –L:öêC20:gá²n3¥zà1Ì6LGu2°bS¾Q¼Ž¾ÅÞ·Ù*õ¾¡J-Š.½žÞ·ÍUê~¯Ø¯˜âïê~oU«hÈÁ¨ûíý)îwtÙ9EÕûF{JRJê}ã˜FL]îC½ïGMö¾«ã¹õXbõ¾1Q®«°÷ͽ[ßj}zß𮘴züºxß5Ö>DòÌûŽa’ ýÖ¾+ñ¾±U‹oâjë ×ía _¡P7­€ºß8¢ºÎè" ßÒeBu¿±WF»6ê~[×6÷[ÝKu¿1“»ºßˆ•ÂÀê}CÞå˜á â|û”#Îwܰ »:ߘX¡&°¾r¾×*˜¹ðñ¾W(Q¤±û¨Þ7$nàî7e õ¾Wˆåœ-tJ}o³6ê{WØëš$›}oaÍ÷FßÁÃé¡Íâ{£Ò1¨ï 9ž4w Õ÷^Ö=w Õ÷Æ™ãXïöE¸úÞ+z2Ö<=œZœoo­:ßayÂ^u‚eÎ÷¢¡­Î÷ «.Yìη6ï[ÍŠ¹ßÐ LC2ÌÝoŽ ÐxÌZáÔï=Û¼o /Vï¦ëÜPU[ãUD8<ßpX^(Þ·C{@Š,2ÕûöB HÓTô5ïa ‰`¶¤Øsº÷ÍS¤,Б ãVžÎ7Œ ‰‡[ sö¥wzH~ÁjzƒŒÌNçñåw*&B8må4пñéFù#m%=å­YÓHØÔïÎï½|Ô´”÷¾³Í}àk^¦Mþ>¯÷^ž÷þˆ>¯ƒ:=îÍ}þ¶°Ôƒá "Â>n–£é…Œß·‹êmÂoŦŵ@ òo)h·)uWåÄéÈ/ üoÿ°¾aýK„ïþLwý=dðŸíxû=Ý“ZÃRåºÃCÏü8‘@%Þ_Ü×› Éß7ƒü?þáŸþùÿ—G{Ìêlíù…À¶ÇŠ­ôšG¬§cñ R†ÿõÓ-eX>HÊÕNl&þÕ…PÇnÀòu³N ·zf§4â a‰¹Çð’¨eÑQ[“±)û6—e(ËãtG©AC:Eˆ@ðÛPÉXv¡¬Ä:5ª¥ [Šm£‘2gK5!Å #%_¦²s&±&“zWEöËv"WŒ_áÐÒ•cà‚’e8*UÚÒ0“ò_ny!¦ùØÊBÑVñþ’b8‚žÞ7ŠÞDùUb…‡‹¥ø“}­çÄQ6˜?0îã(K±ëµ¶3Ó‡†³=ïîÇÚQuÛ EÒF|ZëYwPo£A‹Ô WuÐâ@õYÍ4“›C;K(Âþ;jrɳ槼-ŒÃJÝrͱˆ×Îf ²©-÷ E¾ŸÙ-m!Å&¡æõj†Æ_q”Mº 5—O[M¶˜û˜=æÈ@Yªïã³Ð)¥ÖŸyäYÃ%yì…ÝÃVÐ=f¥ »ËFZJtÖqα%æþÅø¸DŽÒžeÇ;ù¼>mý+Äþ?aŸ×'mÌfù$q ¨ÃIoçU§·naW™o® $½Y¤$+U³ßæ>fÓ§£`”ÑÊõלŒ†wOs̲Y‰eÝØ¬B=~ W©¹(Û˜Ý?µÕûšvÏÛ˜¥¤„(ÂVœ#éÎs7´Ø ¥Oëºf"ÊjfWÇ“A£ÙmP”­5 %Z¸Íiy®×¾ˆ[ݼ»l$¡¬½5Ÿ )°±]Ö­Dþ!öØ^~41a ÜîšÆ)®‘€² Ú•g,d¸v[Ç䘠ì\‡¡]ø€³Ží7Ô£Á@;dj·Ì$uÛÉÐ’g¢ŠÔ“mЮüßxvÚunÈ£ [ƒÃЦ¹“²}ö.ž¡Lë†-“‘v²ï9ØëÞO&C[fÐVÝŠ*ÃÐæù…¢¬Ô]À1hÉèßAÞÍÐb‚›ßy%í2ð–ÊNM¹jÂÉ6h‹Œ2DbêB©î…^þæ9pìœ+‰ÓwÇÕóšæ^ÊöiQëˆã‹í5Åd©I^Ä"·a^)0ʰµÝÍk’ÑEÌÌ+9P5¨x'óJNR\Jd^w~„s&—¬sÑœ¾«oº¦K0öã£lǦÈ0¯'ûÂ5Ra8°©°I«š[õþþ(שü•îû”,±)‘Ã_ôQµçÂö¸&ùݵ¯ÉAŠmø±^GG±X7?–7ÄPvÕ}×6Ru$¥e&”´ÙªF·O?6‹=ÄÑÃæð· «9üØ”ïµ@=Ð?9hˆËeûtü}ÆŠ)O¶ÌÀ!”åi|r]íÎ1’Ë̉Р[QvMöœ“+ÊÂ'ìÅN1ÎûæéÁžì @YgŸìl¶cª-ËôÑPF‹”]Óƒ=d¾i ßFkAðy°rµ}6êQ/Õl‘ÃåqYÒT¸õÁPr1=X 1ñäéÁZµcfŠDG× eÑåÓƒ½ä;(ç̉žûc,×´‹©%i”U³²V}°ƒ6Ö½õÇV³C¶ÑzŠ#™Ôá»bABF9ÊÎ黪1G†Åé»2!!3ßô]e qçW¼æz‹‡ÁqÔÄ ¬z?„vH°h5‘ H°È€ÜƒÓw‰ ¿DÎÁé´ž3üe뜮ë3oÓߨ}s½w=rŧ+yš»b552AfY@YªÑ÷8UPó–O§õ”ù÷Ä®N·­VTjÈF æ[{ÌWþ¸‚ZºU…àyÇ8è5ÜÕ ôù©Ÿ5ø·Ó¼Ë ‰uÔ1ÜÕJšf‡\kMþxSr¯7ÝÆ)¸Õ\+!»ô:ÜÕÜ5œk<ÏÓWšù3²„ÓPºòüe´T…ÀÄÈö˜«pÕ«´úð\d{©Ê›+eˆµ¡Œ íUÓ@w†µ²Ï 8?/LIKzî*ÞÚ|£uC@_Öoªƒm7M¤úØþÚ9ÎS nÑUº\ˆË1Š%‚{=·8Kø¸Ö›K ·½O„Á-Êh‚p‹&êj!¸%Áí»jîè7#¸¥N·N#ñh&·%-‚טß"6‹;ˆùm©Né|,æ·ˆ;0¿-»bGæ·¸&câ·ˆfÛ´hð[\’>'á·ˆcËüApL[˜ßÖ°;bäߢ<ԘߢJ¦AÀü¶>5Nâ·µ—¿}ÜoòÛ² ~‹Û1»a~‹}‹Ìl—ø-z$3ž&~‹zÜËÌo7ÈÀžù-®I““ð[¼p²ïÂoñ #ü¶ìâk¿Å5‰Ä¿E=~«ÌoQ–è˜ßb@ Û%~ëñ[Tëaì†ok-—Œok0å5Ç%ã[Â4z‹j¼ÑB©PLÓÛÚ>-dÀoDðÕØ,0¼E5¦”ok'Òàbx[¿|¾$ÁÛ’tƒá-êñ€ex‹JÛUÌnkµ9© »E5â=ÂnQ±5³[»£ÛútŒg ÝúÐbtë½ÂèõxZatë½ÂèÖ?B·¥Ê½´}T hëß=C[xmQV ÚÖv3 %h‹*‰/Gжš&¥m«9`>LÐÖ+CÛúd´Òah‹Yæ÷)жì²&жÖ#w‘¡m­—éñÚ¢3†¶õQ'ÌÖoÇÌÖ«²EµVšŒlkGÓª€‘í£!ÛZo!ÔKȶÖ{Ùú‹%dkæZm5®Óßd[;“`#ÛúyO‡@Ø­¿ô;Nt?Þe·þÖ˜Ý>nDì¶Ôsß‚lëà  CÈwá¥#[{ÉLl}ä3±}܈m­Ç4—ˆ­a&¶Õ’Ñjˆ‰í£ÛjDs1±µÇc`k†S€­ùlÍ$`û¨GÀ¶vË\E1°­Õ˜9°5'@€­Û-¶>¾Ø>êÝÀÖ\k¶µ mà0¶õ¯“±­Mk‚mëçÒN¦(µõo†©­÷?S[ï¦¶æƒ µ…%gªÆÔV¶nÈÚúðbhë­¿7†¶þx m½[Ú>ÚIÐöq?‚¶zÚúÌÍÐÖ< ¶ÞL†¶æé ´õîlðfÚúÈchû¨GÐöÑB‚¶Ãûa\k„i­5Lk½ã™Ö>ê­u³ÌÌÖ{ž™­f¶~?f¶zÄl} 3³}Ô#fë‹™­f¶n¸˜Ù>î7™íãvÄlÍÍfë·ctë’Ñmu ió¨¡[YŒn·"të=ÂèÖ¼kA·îQºuוѭ;T p}Þg€û¸\7° p}Ð2Àõv2Àu‰®ßÜZö›˜äÚŠIH®ù r}€1Èõï‡@®÷&ƒÜG¯È&J£Áñ\ ­5Î> ×prmÖǤR1”»Îqö"—Ñ‹›?Ý•.‚·¦±„ *ȱ»Ìrá·2f–‹©D_„åÖLŸ½;ån›` A¹Èű»Œr­LP.Ô 8–™îvIl 0Ýšei’ aºHìDágÂt­[„én§rbfºÛ!äI˜.PÝè•h.”H3Í…~7‚i.ò%škeJs]æ"?-±¥¹v9¦¹H]K\Fiî©¡¬Ls!eA£•i.F ¥¹ ï½+͵W&4YȘ3Í­©yéšLs·K¾¡¹PÆ 8j¡¹®i(4× ·ÒÜE¢:•æ.l´æ.©K4w‘xo¥¹«Œ>¥¹¨4Wƒ3ç®òÖçnÅ+8wU\*<÷mwÕVÁ¸ë»A¸¸9‡\*ÇÝhf×.É·fâ˜ÐX0®DO*ÆÝ5Ž[1®ÈbÆ] ǵxYá¸+lŸ›l­)ÆÔ.oT(Á- +·ÈN˜\™‰— ÜÑBe· ¯W™¼\݇p‚Ë´O ®®â‰à²¿§×.g—|'¸®û¥W‰‡\»\öä•àêÂÀ .»Jpµ-Npß »}Ô3„ËÌI®•Ãåµ³2\«g Wê ÃÕgw†+ϧ —Cd”á*oU†«Ò.ßN®¾ÚÁpi ÃÕgv†Ëk|e¸JPæJæZ‘Á\ Ns­Lh®4ó+h® P£¹\Ïi.“¯¹ÜÓNsù#wš+¤×h.?ºÓ\î²Íåû)Í•jFsͰi4×Ê„æò5‰æ²)ušË‹K§¹-4W8î©ñ¿Âq/‰–2˽‚ÜÂ{½r³œkP+G äZ=¹ZOI®†E+ɵ2!¹Ò+$WŸOI®µEHnžô ¹ ¯$×î'$×ʘä–wÃo·’«eJrµË”ä õï Wß›‚\9#f W¯ ×! ×®É8·h­ð\k¦ð\íbå¹Öá¹…Y‚ñ\ÝWž«®ƒò\+žk÷žkÏ Žg<×.)„æ."Š¥47ÊÚÎ…u~G÷N3[Òq®_Žqî’¥ñ‚s [œ»h„˜à\{4ƹ~;ƹa¯yGDpn”‰Â/ãÜ婲æÉ–P\ù”ââË ˆL¡¸vM¡¸‹dwPŠ«iT”â.*H.wQd¡¸KR͉`µlR\¤s¥7Ãw‘¤JqývLqíÑ…âZ=¡¸v¿Fq½S\û…â.ª´.ׯÉÞÉ;ѹæ¸ÅõjLq•-Š»¨Ì¶P\¤&/yŠ©“…â.Iq%S\Œ/ JŠkmŠkãR(®—1ÅEynBqÑgLx™âÚ Š‹kr¤0Q\/bŠëÍdŠkÝ"עââS&÷ S\@¤x¤w-Zƒ!îb  q=Ó"×C\|ßôÄ GGnÇ7¼ŽDˆ‹\æd÷ân«ìÛ ÄÅ'I׈k3„@\LäÄ]Tü€®Ùa¸f„áÂef¸ñöXG®7“®:<ŒpS `„kþŽ \í0Ï¿§‚ºŒpÃé'·¬"Üuç‡ú6„û«%…z!¹ œTÂs79}- ׄ$ÿÙò¾ôÂ"9,ÿ™¬gçÆ×ó~þ³]ë ÎÝõÎÕ´SŠs³âUƹ(£Ã¶‚s1Ãs~2ɇ–¤;5ZÖˆZƹY ˇ¶ht¯äC[{V6¹IÂæäBÞ€\ÍS¢ W“uX&´M¸xǹõE̾ø\E†sMe—ãsJàøÜ({GI—ä‘(ÝÅx9JwÙÞÕ^ÀSSð…eBSꦙÐV‘ ÕLhªW­™Ð$ âÜý”Ärš MÓ5i&´Eve4Ú.Ï ™ÐVUà•Lhªê™ÐøœºdBÓøvÏ„&’œ ~:'WãLhñÙÞD”An|[”£ƒÜ¼it+ƒ\|”KN@n|á<%ôÄ9_ãÜ,3†¤@ Ã@s¦@Ãѧ&)Ðâ’€ýœ ¡w<’-ì‹zH ´h 8‚sÑ-ôV$¦–ßåhaõr ´|JWK 4ìURܾ¤@‹ûq7I–‹jór ´x>–³höÚ9ZÖéHpnTãí0Á¹9‰Â¶¤@‹fÒ7¬)ÐtHŒhùÐ<`œMŸZS åCÒÜH 4LK|M†ºXl’ÿÏP7^ï£K 4”1 æh¹¨°/§@ÜFŒER é·¥)ТžèÍr 4«'pëó7§ºèG&Åœû ý?¿8Í}¦vIsŸ¡C˜s&'‘Üg˜­ùÁ8÷YÔcê'¹ÏVÌœû,&yNF!¹ÏðºY ˜sŸE½í™þ 5h­z§?˧p&IV±Ç‰’þ ¯_åu'×-‹BeæºjC$ýYÖõš¤?‹jŒý%ýYÔ‰N†QCù–$ýYÔ㳤?C39ø•ÓŸ…?$éÁ8ýY¼MfQ’þ ÏGRÒŸeÙ*ÖôgãmJâ3ä}Gyáq'N|¦ùcìâ!a»HIËò»œø Ëù’ø —$6-‰ÏŠìÕkâ3Œ½'Ù­C‹s€qâ3¤4¦¾—ôgáÈq²+IÎá{d÷{‡ìÎ×"‰Ïüy9ñ™?'>à ~GB·~£Œ³9ñYÜ·($ñ™ pI|6Z/)ÏíöMRžY÷JÊ3Œ’òÌZ/)Ïl®“”gHVÏ©Ù8å>3V䥔gÖÉ’ò Í|뺖”ge‘½IyfVµ¥<‹ß.dw$åYü ÛIyfÃ@RžeåR’ò Ÿ'M0œòL3}kʳ|J\ž¤<3ë/)ÏБ̲9åY<+“³q)‰ÏŠ4iþ³(“PTΆÁM“~Ï#‘{KòŸÅ[}?ÿÙ% [óŸïŠè>šHùÏò¥™óŸyë% šZÉ‚VV1n’-®Éh% ^S΂†¶p`+gAÃ5ïœÿ¬,oFr‘ügÚ·’ÿ¬d^Wiþ³"a›šÿ,Þ¥èßrþ3§wòŸ¡Ú;* Õ Ï]òŸ OÔügåþÊ›§¢ùÏÂLö]ÓŸÅ•Èmÿ6‘…j ´í,рϿ+kªÿú±þ+ß"¾Qzÿë?~÷çoáÅ€[oñ»½ñûª!ß„¨¿î–xºÏôÄ¿õ-cLcc¾>èøaçYõ7nA†KÝÐÿý¡÷¹ç è?|d Z·ßo¾ÿ°Ê¹ŠywÚ¿?öþw··´^µà¾æ¥%±ÞÆBv‹o)Œo‚í O9­küúÃwMÎå÷kü7EÃß¾ûüö¯ùß¿ýÛÛwÿø»?~÷¥wJ ÞíœÎ/¿Sú†;• txÛ0Ë~ñŠßèLå´Ï¿ñˆé6ù#M%=%]|ØÌ€è"¿÷òQ³RzÜ¿úÐ×¼ÑFóGôy½÷ò¼÷GôyÔéqoîó_6±^ËŸÊvbyÃn`Ý~\{FÛvcyË·Ùøj뤷 sù6nó¼ËÕï²ýÚÛ”ò;±:Zc,¾Ÿä÷ù…{ÔœG¼‹ôû^7°Ë²ì÷ÀÿÔ;üûúüùûÿúéûŸþùûÏùéüÏ¿ü*³ûöEçÑöÈ9Œal¬¸‰Èræ,a{†e»$ì‘Ï0†“ÁEª€½…1¬E/ -Ö.®û¯‘+²óŠðhˆ:§|iôED$¬›ça´wØCÉŒ=Þ¹¬AÙ91ð)Ëšö3zA©z ?O¢(ê!~‹öÎ+4ê!áOâ´2CÙÉ0˜vßR)½pr¨S*$¥^7V)’¦œ½°3ïKØ&08ñ6Š$†öwzpÆ0œòаK>c.y<ì’φúTá°õÐbúøõ<‰–y)G¢Ð…$ânéÈt-IøY: DózÇ ]H‰·,P4O¢ÉØ‘Ž“N¢éÒö¸4rûï\8rA‚¹Ó¹jäíJ lF.ìr¶'G.Hôfª±´ Ø5Û=OÍ ã_(Œïyj‡t/´JwsŠ4™éDZÏW—š0„ü©Ü]jÒ³ëÇR¢\]jÔ¾|jEü-ýºØÕžU}èçh5’—±nšŠ…œ0²VùÒã:¸jˆÇól®ÚjžÒãžcµß=lÒõO,±¦n!æWÒkÄÞ¸NTµaV‰¸ôx÷.º…Œ£¸‘vokiú¼} W›qÀ0–ž×¼å˜ŒÎ]²X=¤t÷°EÏÄjvê8´]'ªÚ Ož3¾ªÑµOÝBÆ‘³¦n!ã˜tLºÖzw±8i×â#ƒÒåµïß]lA—×úì¨ÌK›m»èFËÜw\#º…Œ#Ô‘6u ¡5–lv5Å_p]›¼»XÂÃ÷©6¿Ø_ïŽÁ0â˾KðXrJûŠñ9k-ª™)½Šn!ã${`©[ð¼ˆWµWmÇæ»£ÝÑ?ô}öóÇé™AT-WGÛ1VìëÜ<Ž4A.‡×Ìœ»2›á[2ç®åüé+d˜ ‡±Î”+”öX8M¹Bùšvqˆ´¦¹ŽXWÿ ¯öÃÆdNY_Öqj½ÝsÕÐ~Ig¦NGÏqù0ظg¬|¯Ã)a*kqÜä8Ž?•µÐ:Æñˆ{F=©4š8VQî©kS™ù<ÎüÝSב&ê¦ãäÕœº6%‡sÁ=uÍ0Ù ‚)[_è7â„Ø”-ô|üÔE­@hž"뜱Âé8Òö{Æša>R×å6K?& Ço1ù+³QJGj@¼?%Q'Œ.`á é/g|ì ÙþáwΗ˜ÍïƒÙƒ=Þ¨ \¶Âf…\6TN“P)–-ˆ6 ,[¥¿ [á³AˆÓ;|v|ü:MŸ=bTLUøl¤Q Ÿ­1™o xö°tUì)xöðˆœå¥tÖâ®Î1-•Àµèl\·+ñ:q„§¿?àìCUœêÎaYߌʎù(ªZ©¬#x¥²¹T6læp]:£úVý¼g#²ÚžS‹ÇE£ú¦Z)˜­c!­û fã ¯1Ûy¦l+º^˜4@[!³vžd6Êâ\™l}µXE±ñ–ÚNÅ—L+е¶ $6 G0HlÜR@³‚Ø(=E~ b㘳(³b«¢ ±GV O·ToE±qp¥€Øx Piøñ 2툴Éý.ÉÑøQæGà°Q…ZõÊaã:Ð`á°Çƒæ ö¸§Ô¨pXÿ²”Æ>ÞKhlÜR–V ±G6ï¾ 06’d‡0¶nê —éV‰¬5 ÙãíÞ!²Ñß()P$kß&¬5< Ùã:Yp*˜=:Y›(˜}¼Ã³ö5ÌrºÌF6/Ì>®0{µÌÁO0k_7ÀìÑôÞŒÈÚ"Gl'”ÈmN³-+oJdã–ºLV.{tÛwï,{üìaÀg#MN €ÏÚƒÌÆ¯ŠÎ”ÌÚG¡è¨â9µ˜=tw¸à²ñZ Õ”ËzCT.{4D}gá²ñÁhCT.{¿¬•ËÆu2•Q.C†®¤”Ë·”uŽrÙ£ÀfÏ.{¼º”ØKHi-W¹l¤ÉyRpÙ¸™.•Ëú×'\ÖjF±l$iûV,kí@©¬·o¥²Ö†ÊŸŒ¬Êz™(”õÂW(ëMRѬÙŠf‘M±­ ÙG>Íã‚€lE³ñ꺠höhB(ÍZ)™µJ0ëu `Ö?³64ÌÎÎâD²G?!ŒM‘¬— "ÙÃlCjS‘¬q(’õ>U‘¬W´"Y›BÉrF"—)ùR"{ ²£DÖÛ‡Ù#›Ò×)‘=sE"k³"Yï#•ÈF¨«Ù{ë=Œ²ØãIBô•Åzå(‹õHXìÑ„”© ‹õ[Y¬Õ›¢ØÇe/q¸2Šb}¡(öh‚ÅÅz÷-(Ö¾L%±>wTk3jXH”ÄmGè®’ØxÐH!±‘¦TXI¬çEI¬½ž‚XïÄzCP;›£"X[”Áz“S{4cÙ³R{ÜS>E°>ƒë}´"Xœ"Øc¢-e¬öXªIa,öh²é¡,öøb ‹õÙ­²X¯Na±>X(‹õ P${T§t¬Šd­Ç’µÞHÖæž@²lu ²ö)‚ÈZ‰ÈZ‘}ÜSˆ¬ÕˆlÕÙ¦Y£C@³ÖÍÚhöèäåy‚f TÐZ?D¹tÔú\8ÕÚV¯iÖü4>®÷Ç<ÁÈlÇ ~dÎló‰èCL+ôŠbÚ]?xPÛø˜¿€Ún /Sj;ÖzŠÚŽÕ£XôÚ®Yµ_„¶ëF±ªBÛqt/¤¶aØåžJmc,¯l+iÏÀ¶k†2Ø6ò2RbÛpÑç)¶]7€5pÛ^1à¶al¢eÅ·a•9kôv-÷‚Þ® Ê=ÐÛuÓ1妷‘ •è*½ ”1$àm AÜ€·fÉ„»"’îò>Â]ý„—º á®ÐzáÚ=rÇuÚœAt×õ]qíÑë ÈÑσØU‘îÚH‰é®Ôeƒéúû)Ó]7å“éÒa“L7ìuUÌ+L7|sĤL7ÒTÖªLwÅ ™îštÿHwää…m_07jÈ·˜;ú&„¹áÌ£Xaîštd"Ìo^6¢æFΤùæFÙÊ®`î Ë)Âܵ`úš»vÈ-@sW˜\çÆódÅœ»fà"àÜu'BUœ»îD¯ŠsGQëú 87ªAtÀ¹+Ü…ˆsýžŠsW—×úS@q׆ÒÅe"Å]áuFŠ»ˆ.Š»VÌAqÃ2W¸6`îZ ¡æò| `îZ ‡Ì¥ÿ+aîŠýTÂÜ(Iá# ¹ñl¥—Js×ÊšK·`2ÝH“U˜n‹X…ºW„+âÜøu.¿sÃôVá±âÜȼ¢WŹ+,Q‰s×J²¬87ÒdYœ»Â 8דçFk•ÕpîȦŠÀ€sãTÕ{àÜ×Gв"a¥¸1©üW).-¾AqÙeãŽO@6¤@q#I ×ßF1î Û9r\NôÈqWx’ãÚ—ŽûL›w-è½Áq­×Ç]áfCŽ»v^§—£;9®Õ8n|BKr­‡Q’¯'Hntnòá‚ä®-$wíÔ½ž<÷ÕTq£§4FŒ £2ø+:nŸö1ãzšbÜèvµŒãÆ'/Že«dR9®§)ǵA׊7ÄÊx•ãÞ½)®Ï ¸ñÎ× ×F:`\TqW*¢qm8SŒ»Â9’w}Ò[ë`oùÂ'½µ^ôöœÇÛb‰Gj»V™ÚFŠl~€Ú®;¦ ¶Ç©faËJmׄ™¨m¬D*µ¥m¾Q[è»HmíKµår™Ø6æ¤ÒÍÛ®0'%¶]v;oÇuzd ð6jmV3Ø-ëè6:CÙ󺵙¬¢Û–‰n©m"º]q˜èÖ&iºµžèÖÓÝ‚ ‘ÜŽÜËž©‚Û¡ nW¸ÔÜÚ¸pKöCpËø/·$'·±P r»æ÷'³ †ÈGÁl9ù ³å…Ì–Snc¶p4f»ê®™-g­d¶ìÖÉlIˆ*Y™­½:˜í ç[2[+0۵ꪲž%, Ô2Ûåf¶ëø÷?l%"Ù×€­€6Û˜ú‰v À6!P€mâ9iÛ„sX¶ ñÌHl‚î‘Ø&ž½±Ý:I¯Ûí ‚HV»íä¸ÊjNµ’ÕŽ'A쫬6JjÔ ¬vëªsªÝ4$™ l;”  µã2¡Jj·Îb„ÐÖNýChÛT’HíKó yíyÔu kw d!¬í€‚Öîòĵ]iÐlZˆÍ&„i%šM+-Í&ª&ši27'šM 6/€fíž@³ ñN‰fÓŠºšM8J4›ÌŸAÑlÂH¢Y¿NÑlÚPC@³)½fC<._‹¢Ù„¸+D³ Á1ˆfSÒ“h6!^ ÙlÊ¢ù¹­_ „6™‰‚Ú„³þ$´qO5XPB›Ì.Am¢Í­gE mÊ΃ÐFVd} BË6!€ mB/Z¾ mj¥€Ð&ž\¡áB´¿ hü\Ðm‚­mÜR ­_'¤6]vÚ„È…´i£¹€Ú„‘´žvÚt™NÍòÃ$šMé±¢Y~a@³üf‰fOyÍ&žËšõ{*šM `h–Ñl|ÐL•̦ü>™þC ÈlàÅÙÖ€gý2峉”ÁgLPÈgã:iöà³ìEÈgÏmƒÏ&„³ŸM×qzÙ(Æë×ÉdãKÖÛ(“EOz"YË.Ø,{(²ÙÄsÊ`³¬`E³Ö¯ÍZ¿6k•6k}Ø,û_²Ù„ d³ ‘ Éf,’ÍZÿ 6ëÏS6›x"lö®gPY{kPYÖ3 ¬U7 ¬–BYÞR™¬½2˜¬#˜¬}ê`²ör“µ’œMñJ8›hàlœ¼SC³žC…³žC…³Ç‡åT6½ïwi•ÄvRÙ¸î=u-{=@YÎíeýq e­‡š¹“l<ÍZ/ 6ëiÊf¢°‘ÍZ{›õ4e³‰– ÊfãToªlÖ> @Zëði­o:)-û@@ZkW`µv™²ZÎ3kmjXk8`­§)¬µ{ÖÚw X;º{€\…µÉLÖZ÷X›t”°6!¶0a­`­?Oa­ E€µž¦´ÖÓ×&z×&šš(®MõK\k¯\kã3p­_wâÚ„ Ðĵ‰^ ÀµÖRÀk=l= lmP°µŠ°Mù]ó»ŽÀ– šÀ–¯G`ëiŠmù~ĶžŶ¬qb[¿§bÛX§ ¶iÛën lÙãØrM`ËY€-û^[{€­€-'Gu},Õ?懰ŠÈöG¼jGÒ:lUHK&¡,€´ÚN¨‚@ÚhÖªý£EíÂ4xÕ.Ê$iU»~ê¶S­ «Ú N«Ú†±Vµ,Vµ Vµaa*æêT;{uªmDVpª­ñ„G-ÃsÑ£¶Vʌţ¶Ò†µµÀ=µã%5óð¨­8v@ÚÊ3Û—Ym½ Ô¦6BêªÚÔúÔ¦¶Z.¨MmÅ).ØÔÖM£â˜M-‚Ц6*\H=|j»‚>µ§eºïЧv\§_!|ja†>µ5Ã>µÕlÔ§¶Ò$>µ5Ó7V}jGû3úÔÖB©°úÔŽöÙßáµ1ÉU(«>µmS°@ŸÚV(—UŸÚQ—Ú”áSÛ`2DŸÚ÷©Ë§6‚YI…ç¶5Tõ©mtFOmÌs•D«O­ßS|j[‡RnµÑ€E#¬fµ­žau©ey—Úhðs.“ÚF·0˜ÔŽ[ÂOA­jÍrVj;Epªø½ÊFÕª6ŠšÕª6FYIÁª6¢1«g‚Ö¶Žãa0¬m0O‚am£í kf74¬m\Ãð¶/Téªam§fù2¬¥ŽÚ¨am‡´ž†µÝd¬jXÛ©ORÃÚF#MÖ6šßÁ°6VAЩնv¤.«mmÛq¬þµQ9‚ à_Û!Чm?uG0®Æª*V5®mÜ8€qm/4%PãÚHS𫯵1ˆÈ·ãÚxi±jPßÚ^éM«¾µ*øÖöLR|kí[„om§0¾µ²HøÖŽ–;Ù÷6~”ößÚN=|k­á[kÕ¦¾µ­‚ÖÀ·¶SºßZó…om_!Ÿ…o­'·6î)yom/xøÖvˆ­`[k%óÚNQ1Ìk;Îaмv¼‚n Á¼¶¯ÐOü¶ZïªymÌ4%À¼¶U¦©y­UŸš×Úã`^;Ò`_ æµ}ƒtæµ·y­åæµþ(˜×R¥NóZÈÂÔ»¶S… ïZšz×öé>¼k- Þµex×vJÍà]{ÌVgS‡wmÇ:z×v*¾á]Û!_£wmÇ!*z×öDE¯z×ú=Õ»¶çËEA]k÷ò.Æ4x2¨km‡P®µ] øhZ;ò®¬¦µG°hZ{çý´«µvµ£>„î¶o$ÅjWÛ3¶øÔ®6§ðYíj­ª`WëijW÷” vµÇBf~L°«Ý;h$ìjÇäòWµ«µ×ƒ]mÏ”¿ª]­5ìj»IjÕ®öˆ+tWíj­\`Wy‘ »ÚhÞJ½Õ®¶ã1íjíyjWk¯»Z«ØÕvž]mÏ8rùÖŽÚÑÜƒàŽ¥ <Ô·–c"lk»É…ŶvñÇ Br1þøƒíÒÓßâ-´X¶kÁ™ŒF¼Œ?Ö¨’Eü1¨Œ,þBûÌ@d”7‚ðZˆl%ŽE 2M²@d–E!¼áh,Ä„·Ðã„·¬4ëU›¡G"ÓO „7"I…{$2õÔ`$26vF"KT#Y¢±"‘U¾á ßóòæl÷ô'Ô-‰ba…º…ö" u#r¼êXêÖ uk¡  îž ¨;¾;ýìO¨+UÌ*Ô­…^ uk¡ßBÝÚÞ5½4YÀêúãêÆ‚I6©@uGa+Ü­tšÜ­ Ht7:0EœJwG‘iDÐÝJw2"£D€È(-ÞâÔphŠwëÃíÖJ˜TWûe§º‘uuóª[-„–Rݪ'(uc¨¡ÁêVšïêZá_P7ÊáÍi®ü81níïcÜqb™ãêʈ—Ò"`ÜúʦÒÛ›ß :6rɪÒÛÈ:Ä*½µâ½­EL!nÝ!Ä4YâÖž€¸‘%Ð qÇuº¨ÄšŠè@Luâ6œ±àcŒï‡àcر"Äm‹§MˆÛw ·qö‚¸§hq+µâV®£q7:ô*Ä]  âFÞÕíA!nÌi”5+Äm 0ˆÛô”€¸ã:ýÖqŽ›â6F4Åm<¯ŠÛx€·­Ð#â6é!Åmë»JÜÇ»+Åm+ªw£§(.C¥)ŵ*ÅmQÐ\ë~As­M‚æF•* Ušk¯šÛ4¾ ¹­QÇ«4׺5Ð\/'ÍÊQê¬4×zÐÜÆ°¸ ¹VÔ ¹ ñ…Is) #Í¥½ÞSincä\¥¹V, ¹V} ¹#MÏ.€æ¶•ß“æ6†›C²f1Дæz6ç6ƯTœkcpnc(Mà\§Œ¬áHƒ‘µä& 3YƒNŸ8—c8q.óIœÛg“8—µCœûL›87ºuÁ²JsCÌòÍiòýçræFœÛ6X6qnÛÞsQ8f²saOœŸ(Šs©#ÎmY§ÎmÐé~ÌEá×#PY Iðøÿ/‰ek=Aü©¼Þ-òvüéÿò?Æ·Þ üÝÞQþ~-#âÔ?öÈrD[›oü'?r|ËÇ¡êãEï¿@…ùgç fÛWÎ?êóûqÄòÌÀõ—OÍÁ«Ø_5ý…†³ŸRg^þÜ翊ýÌÀù—¯åàuÏ_¿‘“¸hô˜=•õ-§=¤Ic[Ç5òô¯?Ÿv-ÿ²Žÿä‘ï·Ÿ¿¼ýÇ?>òŸßþóíçÿË¿ýü½j±ƒ”×–×Ñ)~÷“ò?)¦cæ6¦Ÿ­äïRõ'}BW9ûç?¹Å\}ò'v•ú–Uca~ÒÈ0¦g/Ÿ5*ådz_qs?±šE/ñ)e~<{y>û3ÊühÔùñl-óïè¡¶ð Ê£C_û>úºEèàÑ/޼=úå­¼ºïñ˜1­½’}y>e¿ž²ýÑǼÜÇbimñýçdÎï<#ïcÞ[{^R{K©D‰Õ-ö×üñ?Çþë×ÿþòå—üí—ß~ûí—/ÿÛßÿ/ÿ¡n÷í»Î¥ÕØë˜Z±ñð.¢tľˆòÛ­¡l°‚«Óï*‚àNVc³ævkÈi·˜ä!+«°XõO·=³^›XÍêä `@”-!²³®ZŽøŸò±vœÑI¶6‘ß®ërè³…Õâm WØÚd{ëp雥r èÛJͪŽ4Y…<‚>èiËP«ß– K“ãpµG,ÛÛªÁÚ½dé}ˆq/ ÈO‹­òi£» 5ÄVù´Ñ]±òî+Ö¾zð3„G‡î‚Ì1[€0F©‹±ØÂ æ]Œ¦•´¬X{4íóHñÂÈÙ}Ÿ&Öí²Ëiû´ÓM*NŦA6¸!î«8‹Å‚búºKêȦ~{g±1•SõØ.ꋃ/LqJÝco÷v‚\Qd»hÒ£¤ñ±í:R¼àdD}ó«%.±Ø8× íúuzŠ’Ð+v ÷°éNB‹-óé§»èÎl‹-óé§»HOÓ–iu4º»Ãˆ”>Ýt¤-Iƒ=,Ú‚Ú’5Ú¨\ Þ &óûlKÕ ˆàiô_E‹zŸþºÛ±ÁÜ–éhzÃäI#£k{rÙFo«èV,¸M;⛈³®à¦ßþÑ·ÌN¢E{žÎºˆÌÐ֤ѠHo±q>u)¤­ƒy -ámÓQ£iœëÆ[¸‚NgݤԵ…ŸëtÖ]•ëEšD{@`%¦µ.›ê ¹»cUQ]ÛÖ¹£kýAo´ÞÖº›êE"éV2ÝŸR‹ØnÓTwÓ^³mYã=l*Âl¾ê´–¡¼E@Çiª»é¶O¤I¼ÅÖ4Þ†¦¶u‰÷r‹Íó«w:üœ¥có|Úên:€¶´j¼‡M¶#IÂ=¤×AíÛæÓOºÉ–’z@ßÝRÖ@›öÏ- ô°é6}¤I }ªîŸ–šÆ{ØÐ„S×xGsI»†y`«N»†yÀä¤åEÃ<@/Ùòªaà ßò&a`|Ûb7Ít“Ž‘&a®:ÉY<° xA;kœ±O®‘dfÑbŸ\#=h¡Æ>ùŒôd Ñòw ô€c5‘&’n‰k:CýJ««z¸ÚUì”ßsSœlh5iˆf0vÊçÜÔ®Ëâ¾ó-¤‚ÛëY´Z5փ廩¦º-\$¦ŽØEÚŒõé‹Íï{rºâH ÍÅœœâT mœœ¢ÕÅÎØ õ€µAÓ­£ª"¸HKêS–5ÔK¥ õEáÐâàòœš²ÑÜË\LÉt·µ¦ñ‘¥ÅVùœšâÀb‹­òð'"Zl•Ï *ŒÈ#M> \R‹­òMúU9LÕDuo&å­'|àã$VâùAF 8z”¥wÕoøÜ‘°)`¯úǘÛX2IÄ!jq’RC@XÚŒQø¸]c@ ¨FÛQt{³%“Ä€à=Ç’ 1 ôãi»Æ%å\ÛÅwÆ¢×´Ø(Ÿ3Uò^4¢š´Ø(Ÿ1 pΡí1 ôkݛƀhøÊ÷þn ˆ?s¦ÚeÆ9JRc@t… }Y5ÆôHC™õe“ øBF;Ð pEÿDƒ@T‹ûR4*=Ò^1 àÄß—ª1 0 Ç¢_ÓIÝô€žL NÕô=]µËv£ÀýX6Ù[äsºj×m¡úš4 š^¤Iˆ¦§ztžsÖŠhýˆr÷²Á×µÇ:bcÌʉžgNZÛkõ¿Jˆû×®á_i×ð›NÇ ªápÈ¿ërŽaL"I¢? CèÛ&ÑŠ.µzDtžV»Lá<€õ#>q™ÍôRXôª?»">üα3Cµ"Øì$ÁÙ’¨¶ÂU¨6Œ‘$ÀXm] ãNT[§€¨¶tבՖª'Èj‹Æ£'ª-ÕœÒ ªÍ;³©¨¶,ÅÕæ®³²Ú±ÈÀ+(«Í•hXYmF„@²Ú @X›+©«ÂÚŒÅ`m,Ó„`Öf¸/ÖfÌkÇÂT£y_°öhÜwÅÖ¦×Ù%RÚ'RÚk;PÚDõ(mª¨0PÚDE(mRß&BÚD– H›ÔO‰Œ65€p0Z³‹£M0¬"£S±mx£*UUFà³b€h“!hµŒ¸@P›`êGPË×#¨¥#H-=y‰jé-JTKß|¢ZÜ Õ&ˆÖˆjÃyþ=T{YñÒ²˜ÀjĬdµ4'«M¹ª²ÚUY-ƒÕÒš™¬vä󵔥=¨‰g“ÊåIgiCK:KÃkÒY M ˜(è,ãÎŽlêt–û‹Ä³IÇ:Ðل㻤³#—Ú Àh©ŒÖžv"Ú£8U­]QöA+„–qq@hý½”Ð2, ­µmZ†·!¢M8³CP»é),rZFÅ!§M‹âv`Z"¦µË”Ò¦KyPÚmÇJËø<ĵi¶VZkåZK“mÒÚh[RJkŽT‘Ö2ií2Û(°·Ö&œ÷%¬M8@BX»áha-›·ÂZkÁ€µQ²²§X›p’°–škÓ‹°Ö> ÐZ§"­HRJO•Ö&û#­µæZñÌäó­M8gAZ»áð%i­Õ,hm´Ù­|Îi-c}ÙÚëÙúã”ÙF‘)—Ufk=òÅlýQÊl7X&³mzäÔ˜-¬%ÁlãƒQª¬ÌÖ ÌÖ ÌÖ Ð6*|ž‰ ´µ’´µQÐ6Á–Ж¡ÑHm£›”ÔÖßO©­Õ¨­çS©­åÔÖ:d [ëS€l9æƒØ&ä#²=Z²Z~ÜdµŒ\ VËš!«Å|‹¨9'ªÝvuܨ–å@TËÑ›¨–!QmÂaS Z~ÓDµ„‰jQË$µ¬e’Z6*’Ú„s‰$µ Ç´IjýyJj9ƒ"©õëÕrÌ'°e÷Il˨>Ķ\hÛržIlË6lkY±­½°-»b[kcÀ¶ G~zËÂøhÐÞs#ÆùØU¬ÅmÑÆÍØ¢âö!«7ÁîÜ$·‹š3›äƒüCs«,ŽšÛÌ4hn3X5·I]{ rGz] w+gt°Ûñ…eÔÙ2ö3u¶ˆÐn:ÛŠ³œ`·&ô¤Ð¶œø [lÚSakð [8„šÂŠSØvH o…-´Q¤¶!:õ-¶;Iµ²[*uÀn)&4…í®1SØ"'Ø.¦ÊnCú¡JmW8jÛRdÒÚbSS[PÔÔvêf•ךœ›šÚBR Mm½^IQ­5jjáªhšZ<É4µ•STÛ°¡ª¶A­AU-PTÕ¶ ®¾ m ;}ô´ ò%êi;U½ÐÓvŽAUKi Uµ«*ªj; ªZ*o¨ªEXSÕîä¨PÕ²tÁk·ÈØv[ÈJ•Û®û7TµÕ¸Êm7d¢Z ’(ªíߊmcú h¢ÚL U-ï U-kˆªZ(L[»£ùS[۱µpL6m펂¤¶ÖnmmÓÑÚÚNê m-õ'ÔÖ"òƒik;˜(´µ‹o*l)Ÿ£Âv×…Ž)lw,Ì©°µlBaËʡ–N…m‚œ‡ ÛŽ~’R[ʇ(µ¥< ðv[±Ü¤Ô¶C®©-‡”Úvª™Ô¶ÿô¥ma^JÛr@¥m¦¤ÒÖ^J[8ìSjÛAa)µåwM©-Û¥¶Ì&¥¶ðÚ6©m'iUzݤRQhmO‹wSÙv²Y¨lYUTÙ¦AeËÖF•-ÅjTÙBBc*[HLeÛ R¡²¥¨Z[¾µ¶i)µ…¤¶jÜØvƒ•‚aÛÛ¿À¶aa(¹PjIÛ ´Ý t¡´Ýq Ôv[ a´]0a¡Ò¶¡­Ri ù›)m«îÄRhÛP&ÚRáO¥-ÇC0Ûm¡èJ[Ë&”¶”·RiËQŠJÛŽn‹J[>Ï•¶SìàB[%Ò.´ÕNˆB[(MhÛ±-u m©â¤ÐÖÞ B[~øÚÚ“ThË‘„B[œD5¡m‡Ð™B[N£)´Å$Áu¶Ú'PgËÁž:[¾u¶¦Ý…ΖYQf»­ÏÁlm\³ Æ ·Tf»­Ä‘&³•NÔd¶˜à™Ì¶ëªÌu`èvÕYÙm¼ coÓWá­é‡!³í ÁPÙb(%º²š-*ÛN­¯¢Û ÁIMeûšÞ˜¼¶CåIy-ë„òÚî@W䵬/@[.QòZÀWÈk1]1}-¦¬¦²Ý!æÖ6Þ­­œò"´µÆh»màê ¶±æ’òTj;’t•ÚžÔŽ¸6>OÙ²®ÝVªsýCÿ(­]nZ»Žÿ#Š[‘äý=”kGñrmI6.4¼ t OèÚš@7>SØ Ñåj @w$Áš@nP®­õrÍ6âB¹1í‘Êå$7ÞK¥»Jr=óBr· ˆ$7r"ëuÜXÆVɵ(÷œ™øv%ßU†k- ×ê —‹2\NÉp­¬¨¿µç)Ã=f«rOe¸Ñ›IO†k÷ün†»mµÂp#I>z0\+j p-+Šr£Ó•~ü…r­ʵ–O.š1H®?GH®?MI® ¹Ö‚ärºo:ÜF Jrí#%ÉeƒäræAžkŸ"x.×8¦Ãµ¼(ÏI¨Œ Ôárϵ²¦—ïž»mjyAîMâ\z™ —9ϵ·ϵœ\8×®ÎÝàÇBîŽîŽ*Ü”¨<ת<ת›w±#~š«8çylˆ_j–Šƒ½«§‚0àkRß¾kPFÄíû¢Šß3’gßW òTݳo*›a¸ž=i|Õg?÷LE¿ž'ÝáŽ)ys 9Ì ¥|ß«ž¬*ˆè±^ü M]ð¹ê­ê#ibIÀóȱ ®ˆ¥ÆöEHÞ‘67÷eÕs2›"¿ñ?ª]OÚè" ÆR"û’ôÔ.ö%‹œ‚v £#ÔÃ)›ý=äÓícyµƒøužÇ‡iʾ4¸ÊÞó¾tU/ÀÞt_°«¸+ìÜý››UYÚª”–‘&gIwݱÙ×MEpõÚפûyG1„‘÷dî°[ÛW ±3.CüÊ›Eì㯲AßtWà\ØžO튶«ô}Ý@„Fšœ„áܾ-º Ž˜ìû¶êžUÓÏfß6¥Êè%÷ðºÐèóÒ-DšK,*îÜ·¬ÐU{ú=NÏÝ"¶=ÔcéÚ=ÛT®•îqŒ-MÒ4…“ûq@än™àâû†m¸¯ïiQŠštaO«õͨ䴩²£)ôŠ4ÙvE=%ÚH‹,bO™6¯¢ÂØSCâñSU¸q¤ÁþÕ¢S{íDljµ'1î;NBKG–v=ÊšD—0n¯’D]4Ù`L:’ìyUþ¿¡Ocð;2ÆËž“J’¶f,XU/Ð7f1M8ô2Si‚ÞY,¹ªÔÍ77È¢î¹Ë}V‰ÊžwÝNÛ0ÒdPo¨!Æ·¡*?D¼ÿD%8éµÁ»—MwÈ!ÑÿP·±ØâGš礛¿{ÉzzÓÓ;û˜ã‰â%¡Ÿ.U·§Ìi/M· ”9÷TÇOïŠÙö²«Èä¸{]¸,‚›½®ºYƒ]ä}ÌÏlüˇU7YZRYǪ;+{µ-X™.ïµè>‰êðöZUŠˆ[‘&¢-l°»Ÿ{í²û áÒø¸u£²³q+=DÏ—k …Rú©´•ªý4Û¦;Õ×Òí(ö–U ‰}ÕñXŠ”Dµi¢(€&moUwý ·Rq3KºõKƒXõ SÉÔ6ÒdŸ­¸/ºÓÆúî«j‹Y’}£ðO„ûXAœ£¥5Ò°…®}·í.KôbñϤ”{Uµ„†#ƒÔÆè 1ÒæöµŠKöÞçŒ4¶¥—î;õ´¢ß= êܦ„hßeAwÄ9›3‰]4‡‘¤ó]ôXnßUÄ¢P½Ç*è}Ù(QjA´DöªóÒGšÌNQ\»DÃ8tËsyzî<œ‘ÝÆídzÊ÷‰oHç§(Ú#QDáP¥Dñ ž©Œ×kŒw$Êâæf¼%nVÆë×€ñ Âaˆ÷q¡"Þ’j•ðD:1ÂëÅÄë ˆ×ˈ÷q¥"ÞG¢"ÞÙPîz5(ååDÈ0¯ (¯×(oAØ£¼3“à»%q#|×êx· àŒáÝ’¸Í¼ûHT¾[žÇø®iÚý«þhäÞ3#éÕÅóC©[ThJÝЄ@÷!“çÜ)a5®*=)Ð-¼]Øb=ºÜźrN|7cŸ”7cs”7¯—‚òfì¯öf¨{óò¾@7¯”¸B¦ûÚ­%åÍpà0}.vIyÇuêçÊä`—{*åMØÎ!åMåxRÞøŒä4+po®(ÒUÜiš{Ž ]Ľ©b¾ Ü›°½CÜ›*fXÀ½©€÷Бe¨o*—ÎVyo*Ђ÷޾LE"Ê{ƒ*©ØWxoÊDÜÊ{ÃãBßKyoôœs™Ü›2Џ7ÔçoÎyé˜HÎë7SÎyLÎ×)¯…¿h‚Ôœ7%,¢ÁyéWyrÞ´\‚óNC7+æ=FžÙÖ€yS†@˜w¤i;æµÖÌ›òûrÝUÚ*ì¥1&aoÂFaoÜS!±ÂÞ€£ú{S‰TØK‹WÒÞpì}ö¦Mõp€½‘Iº {SÂ*°×n©¬7’¦6¨×^¨7m+êµfÔkSÒ¶¥‚˜NÔ›6²èN?V¨‰õÆûË7 ÔÖ]²–êµê¥-Q¯}V@½ôg&êµ÷ê| <êµö ÔK_x°ÞøTò+¬7JLÐ X/ìú‰zí2 ^:Ñõò– ½i£XQïh¡Šê€zÓ† °^¸õÆeBø€zý õZwRú£ õ¦=(P¯Õ&P¯§)êµêêµ–ÔK×w¢^+E ^+* ^V(H/­IzGšN”ôzR‡É¼n€ô¦ ½Ï49N¶b—¤×†#Þº¥Ëéæ*¤×†|Þðœ=6p¯µ#à^ë1€{y¾í½6÷ŽÌ+cîµ~¸—-´×ʸ—ÚĽéIy­*Ay­Œ@y­@ycDà /ôy£1J«7-Ø‘äµa—Þõ„¼Ì$¯}`¼Ö6ÀxÏ(ÚµbÚ܉rh׳ h×ÓíÚ[d×Êd—%dd—aŒì²[1²ûHܵ¦C,«d—fd÷q[%»cöâ²Ë…‘ÝÇ3•ìÒŸd7­Ú÷ï"¼ƒá]Ö¼á]~‘†w‰Šw½¾€wÄð.OæÞeûŒó`x—! ïFÁ‚à6FD’/þÆ»Þ\XÀjx×›ø.Cxß}ÜV/C`àõ—àe<¼^¼^“¼ ¸j€7Â2@ª,˜—ËqbÞ”¸©Ìk ˜7–ÂàÊyO*`|×[(/»2£¼Ñ/*æe¨2üþóz æ½B™àõ¶ÎûÌ.裀w¹ï:þýÐÝ}γ¿Bwç¨ap7Ã8À ¯ŠIÝ… ÷»]ºnwä­2!2ƋȭÆx …È€¼…z]…¼†”òÒo7òÂÈŽŒ7ëÒ/Ækp›¨‡4‰zs’ÖH¯]¦¤7Ã7ž¤7oÔ+éuzü"½y¥YIo†0Io†.I/ù6I/í8ŒôîtpPÒ›y^¤7ÞLÙ¬’Þ ël’ÞÌ£‚ཙGÉ{¢‚¼wÇ* ¼×ÞNy¯¿xïÉ €o†´_k2]Æ}w•±ûît:öÅIMÔ‹ç)ï}É|óò#~hyU\ Ø‹²w)/.3-¯æÍµ¼HKœœ" ¬—Ÿ a/û/Â^¶aÂÞ×­žiUŒ—Ÿ”‹yá«IÆ»Ñù“Œ÷¾š†·«’hw£¨kx¥ìí⨣ÝDÁ(Ñn¦Kxùê&á•¶ûð2^¾‡ixñDò]+òÝ”št¦­Äº˜>>¥»ÌGæ¤fªÄºřĺöîĺ›ì6NLeÉ¡Ò]4'—î"‘\מF®kùté.”æäºVG亞!p]«,—î¾o¿{Ý 8×/Îõ7Pœë*Î5¹³+vñrĹV}Ĺ–â\k‰®ÜEf]¹Ë ç&çZoâÊ]æÖ”»²>tœ{æc÷÷1ŠûÇ|ªhÌÊݦ°Ð•»éîªòЇ?˜&¤»¼ÊÝŒÐYnÏ ˤ» Ó ¥j;K˜ +h‡¹@Ç„¹‚Œ0—”Ôa®†<#ÌÅ™ ƒ¹ ‰­ÒÜñ-Ã4×B¬)Íë(Е0jí}š»50n†QˤÇ£–¾F-Sî‹0jéaÔø43ŒÚ 'è­?=9nD0WxŠøi ´(ŒŸ¶ÐƒÁâ§A%¬ 7bi\3¹#í]ÕîH‹{ÄŠ”Ç ÈW½Œê?­ñ½?­SíŠøiÖ[ü4èç-~š]‡øi;1-ã§1¼©uŠeHm‡™áÔ:=Nm§÷Ã+œZ‡”“qÕîˆmˆ¨Ö kaD5†™cD5óbDµ™!#ªuêQ­S'‹ˆjýÔñ"”cë1”ë‘¡ÔXL¥fùÓPjZtÆRëˆÂÅXjÒeÆRãçl¹TTŠXj; kKÍb¢!–Úõ1ŠÚ®œ†AÔ:¤jŒ¦Ö©4F45~bWLµƘjüÐSi ªÖÐ)2¨ÚÕæNÍ®@8µFŠpjŒ0ÈpjÖ §V6N­aÍpjò` §V±Å†pjM¥YŒ¦ViWŒhj ÝÈhj >¦ÆÆÆj|o†TãÇÄjMå{Œ¨ÆêaDµŠo‚Õ´bŒ¨VÍj÷U¼*AFDµ³FT«t®Eh5Æodh5†8dhµBZ„Vc3`hµtÂÐj….º­Ö`òÍØjh ­ÆP™­ÆˆžJmcf¢–½Šo9u²Ðj;} ߯,Iõ°­Vµ§cdµ‚ƒŒ¬Vé–‹Èj¬sFV+ Yí‘ÕÊû¼k¥ÄY­Ð‘ÕØ‚r£äÃȵ𹌬Æ{2² ™ÕØðX­èÆU+^W­`Äf\5– ãª÷j\5FFUžI²uÎÀj'ÒXOд¹ž»â4'yn\¦^¹ÊsW›,²_¡Õ® ¨ ª–ñ%2¨Za 7EºkÆ ‰ÒGˆtX‰nDMÛˆîHtI¯LU=qµè¿ ¥ŠvãJ„$S´³xÈaí2Ö­¡ÝøòxMÑîšMI«lwÅùWc»¬ ²ÝRa[¼cîÊì으Îögh—ÁY ízÙñ2„°1^É5Ä!‹jñŽ¿PF«ˆ×彩7îjWf²ÏD†Àò$¼Ñ·LN`„w=åÂI,*–aÕºöTv9JØe\r»ã_‚÷ìŽI¬‚ÝÍl—v7³•áݪɋ•ð±Q©„wüÆ$Jx·Ýd·JxG"¾'Þm7{]^³ÂáÝvSó‚ð.ªit»Ðy‚„waá¹7¼Rœðsá%ráÍæ$ñ’Wʻ̡á×#ÎÚ:ªâ­¼vÝÊzüñ¯¯?–³`Ç?xýñÿò?Ê£p÷7(ƒ·w”Á_Ï̇Hö>´1ãäµÿ쇮Ñð¯w½þÑ쟞ƒ¶ÜÏ?þø©Oß"¾ï]×ç_>÷ýB?+þüKŒŸ0váÖÀõüøãç>ýUèWÃýåk9xÝó×o䤭ÇA§ÞRîù-¨1%ëºÕÈÔ¿þ|úÁüË:þ“GÆß~þòöÿ4:ë~ûÏ·Ÿÿý/ÿöó÷>)YûÖjkmä𻟔?ð¤—ÛDîë膿ÿIÕŸô)}åì¢ÿôFsöÊŸÚWê›î‹Fõü¬á!¦ §/Ÿ68åçÓã·Ï­î,Až?§ä§/_yú§”üѾóóéZò¿ßÔ~$wnut#5–Y{*‘¹G72†€W/òýž’|1þk<åùýzÈöŸòÚé­¬cµòþc²?æwÑÆJ&æéyÉo)•(¯±ˆ3rFüÏñ„ÿúõ¿¿|ùåûå·ß~ûåKýÛßÿ/ÿ¡>øí»N±Å—ùS:м¢ÑyôÅV@ûX÷é{I-Blt>î›Ê âö±/G;L£êH”èn+j{Ä »c¶¬veQ DÿIcËïßòZ=ÆF†ªXöFÿ2„ÞÙ#Vëµz o05šÛç[;¯üNŒ!6¥oë¸ßO)]«Çû$ôXë}™þ¼9â©kͱJ¥!„höÆ2ë§ÒîcÁ©ÒuÉ*€h‚LGZùéUïëKß!»•#±ªP¸*Dâ˜m\ëEĉM¥Âù~¿1ñJS ¤alF1^§Ù”@ŒßZêÕlcGSJ>E)¼lŒÿû©ö«ÙŽ ³Òœ11R­°ßvÌì—«Ù»¯R(kV-Ä®ç°#mŸÐ£#2úºCtTCìiLæÑa©¸ÆžÍd;`×zìÜßÌQ›Ä5o =ÒöÉš·†lÝÆôz2®>”ë&'È‚ÀèñÜñùþ´ÝÈcC€ã‘˜„7oZz$öy”GÚÚ&Á8 iW1F¾ñÉ]M8væ?®‘Áyl÷8Gwp#XéEÚDÏÛ ¨¹Ž"©7ñàæÅµ:Þ 5Kw÷e(ÏJlæÐ㔌Äýî}ƒä —Yãšôyg»9œGïflÌ ŸwО1Bþ´ÞoðAÁ‘(ð»c#±þÔîÞwC°¢‘Ø>﬋ÃGànÇp¹‰»Àç_N¬{î=¾ ËHëwç;•w­HwùÔáwnü¿9µèðsð¯«s«òHúÜX<‡éîŠC£U’«ÒçÎ&2†¹twÅ‘[¡Åë!¤Ð~‘¸ß2…E#qág(GF76:‡»Gæ6ùH\•Cw€ëuLÖ»GާiGV6Ê)”„¯G ¸«)Sæ2³Ê* ‰E847¹FbUaEEÃŠÐæw—¼b’i"¬ÀþHìB¡cE?²‹´[”c X ­@]ÔU(t4í;ëªòŠ ”µnB¡cƒ\¤­5©Â¢ 8âZ³Phn­Ä¢ ¨ŽÄI¡ãUtS«Ê,*œcÖÚ„B¯0T‰]…ÜIÓ¡Ð×îâ‹Æ?•Úk‹Ì„mlm«ª,¸A´¶Mf¶'µ¶¤:‹Ä*jYf¶Õ‰¢´H*i‰EfÂØÒiU¥‰\k2Ž]BÙdY[W±Eáü§u™ Û¦Ôè ßÓ[ŒéÆ2gÂkÆô®¯2^7l†¬}SÅE¾~›sác~$%Ö“J-’å"Ëdøñ¨¢j‹ÄvÛ«Ì†× ß|o*·H*àHœ“á¸P[fï*¸Ø°'3š¬L‰WD¹ÝørI.tìë±9%¦`áHÍEâ§7k2%N¬Ø=©ê"aÒ·g™ÇækÕ»Õ]l8U‰sJ%£ßÓX¬‰ôbS¡ÖHl2'^WÎß÷®â‹ !mGEÈœx]¹ Øw‘_ ŠG6E~±â˶¬2%ŽqD^r,øU€ot[’L‰)·‰Y%“©HœSb¿kQ Ɔ6°-UfÄQ!2æmci&*Œ:ˆm,ÐæŒ8ZcÕ+;Õºâ¯(SbO|m˜¬+æQ[ÔÌì‡WôõÛºQ‘¡f$Î9ñºB:±¦+sbXLÄ,sâjKsÜ¢vf?¼àßB™5ûa¸h‰2)Þ0íÙBe8;bƒmí:)Þ0o1£Ÿ1|ÀÇ¿YdR|!„-–*i¶aýh¶ãÀ݆L–·mÓùðŠÁu;úŠ»Ãür$f[%ÇškvÆ B>D¢Ì‡W¥·ØÜŸ}ñ‚Õ¶5/˜+o±»?;c¸pÄ]§Ã ¦C‘¸kg¬h‹Ýô×;èRa‹S*³7¶F4cN‡íS·`&wg<>ô()élØ <"Ijo¬ò€m¬Ïd6|·‹ˆÆy÷ÃËËlyüÚt¼°ÿ Æ4;`«ºÔu ¼B,´…kô쀭îó³Ý.ôþ ¸yO—Cøàwi½¨Öðë_¤õêh çýÙ³#K2™#ÈÀH,2^v cE6'À î"Q&À sÜm,×dÌùÉ8³ÊLB—k¶­ÐÙcŽuÙœ/;[E9›ŸæÇNÜ]12~ç ãæ<{áç¶Xfàæjràæš°¸¹&ra¥Í•¸’´¹&ê‘@›ëFÉpsÝ(Dn®v,¸¹âä;qsµ•Š›+•7W!s® ÍDò\_‡ä¹.\µ=Wc°@ÏDeϱøÅ›({>VÆ25{.;W‰`Ï‘¨3jèb,õDÐeÇÊz¤9žžÓíBu tÙ¹^¶-èò-ý¸­"è@:1‚.‹•A—Î5tq²­ z$bª ºPwKZ`Ð…¸ :`ŒV ôã®Ê KC'I·Õ…4Ž‚. º4(„‰ ý®Š G¾ èÒ!_:ΞhíA—á t©üjÀ  …•dÐãJð.0h{?0h?0h?0èÔ ƒŽÛjÏýxMeÐ ƒŽ?ºU]lïz$±*„‰:‹"„.õú‘…Ð#= tPíx¡KÁ–tä˜Yt)p, ƒŽü€^+ƒ.ÚÒ‹A[N€ ã­( èRØwA—Âîúq¥"è8¦= t)0!‚Ž¿h›‚.…Ô Ú6× ¡RaÈd@SíÙQm—AÇuº×] ‡ 葈1ºv×@Ð+AÇ3u7:®ŸVýx¦"è¸-hº"è’ à+‚öÛˆŽ ô ‰.+,’è’9ÈD—Bˆ§ 6C“Á )‰.ÔÞ“D?2¤(:>í¬€¢ãJí¬€¢#,ˆ»¢èÈŽ¬Š¢O$Ús=1´EÇ•:A“Ž+¥O’ŽÜ+’!ŒöŽ@ÒþJ¤MAC$íÙQ"j_"O|ýª(Ú3y¢h/.°è’9d‚EGorX¥ÐÅöäA¡ý†ö† íŸ0t\ F­ÚË úP”½‡¡½z€¡K~0ê9-ÎF……CÛÉ¡­ÀÉ¡KÂì† ºd ¢ãJi¢RW*“‰.<èCëL›H¢¯¢$z$¶ó%”Aû5/ÿXètdAùô| ೿0àóH|~?®Tøüx¦ÂçGn>Ç•2Ô>Ï×WììÙTø,W(vöü;G¢LˆSìü¸R±sI;?®Tî\`¸cÜù‘!åÎq%Ø»rç¸\¹sI„'wŽk£‚;îå‘;—Œ¥Á³ÎÏ^pÏþ†Ï+?—„E)ôãU”B?®T ý¸R)t\©Û`J¡ýB¥Ð%KV íU ­¡"þêz~ÐÞ OS]wl@  Ï_’@?®T]7×@ ã ™•@{nËê•ðÑPÍ?æùf(ZÄSøÌ•² Ÿ‹aØÍ&Ùï’h+}¢è¼óÜ,…Ï ½E]øLÉ4”ÏK[S>W Rù\xhÊç³ÃP>ï\óSù̼šþÙô`ѹsÆ%8`ÑŒ›l,:®3À¢sÁ(z´GŠ«E›jœ(:gÓsC ½›ôú…¢½hÀ¢³ ~¨†ÞMÓ¬,:›‚,:˜”®¤Á¢Ã^IyXt^¿Á¢¯7(ˆ‘÷úMÉ3£-y΋•‡’çˆ5£®äÙƒ<3⯑gšzÔ¨  ç0RÔ}  gúzŽsÚª¸Vò2|… @ÏáÎ¥íè9n]=GÙégögÕµå‚=?n{°ç°ŠÔÏì9Žš›ÄYü”1Øsj×"Ô9êAI¨sªFº;?;ÓïÒ°süðX±sÜö]ì {£Îó¬„òæÈ²Vºòæð О¼9eSD+o¶£úäÍáѯ€¼9Z¯¾x3o¦×4qóã®Ê›Í­–¼™¡¤Œ73&”ñæ”ØrNÞœ &èÎVàÍQl€´Ê›SáwÞlÝäÍ)g€7§¬fkÆ›½6À›£h ÐVÞü¸­òfZ:oN‰]¸go® œ£èÇâ<•o8‚…”À™Q 8§Ç4œ•ÎŒ fÀÙG çèN¡íVàì­Àùè×”c+pö pFÌ3ãÍ^ùàÍþ-‚7§dVy3c©oŽÕ!¼ùñLåÍ>`‚7ÛQ2àf/3àæH|7{%*nö¼*o~¤)oŽ›j¿ Þm\'màÍþu8[ €7?., oG¦¬Àù‘ÙÆwØ;qŽK;Pgÿ"ž£Y*­?Á³·V€ç˜÷èž+ÀsÊÔ9?G ŵâgoÊÀÏþ??^Bñ³9ð³w¬ÀψÏhôÙÐçT8§Wúœ2¦ô Ï^9#£tNÔG:3N‡AçhÁ7‹–¹ðFâÜÌ‘˜´9ª]Òæ”±ÍLÚlqH›í|*™3½\̈Œ9§è Ì™±oŒ<_dX³Å"!p~d®X¬;-ç´‘‚8?8?2¤À9m¹8û»+vfœbgÏà³7@Àg‹j@øÌOŸý-Ÿ½dŸÀÒà3ã|Ž ïÂçÇ3>‡õs¹~–™ðJuòrEAçêl¡H#ƒZ Î {iÔ™!8:ÇmÁ £‹èì›Ôa* :sžDæls2gë®ÉœmFæO|ý¸b,s\²æyÁƨ"(e@fo]@ÍæÿFÔl¤™¡N4›ÍI3“i~¼‰’æ´ `šý²zú(h^nмŽÿC‚çeŽÌ_Ã̺ g˜™êÃÌÛ·ü5–s}fU0gr’/7z¤:_¦hÛœ5tý÷àËç¯Ëï[jdz ,'B6Ë_3ÖÈ0ï4¢œ¨£"QNôÎP¶CÊÄ$Ê< eD™g‰Œ(óô‰r^EŸDy# Q¶‚(D,²I”yäâA”ú(>”Á2VÊ”-?J–ã¶(+bÎ<¥ó@ÌX¹1g;†¨ˆy\‘™"fçÖ@̈ñn„9‚ 芄y7¥1ónúe%ÌþH%Ìy!âaŽ€ºçÂìwUÀüL›sêxIµÞP¼œóì8ðrdCq>ðr^ÈW•.{þA—½Á˜‰Ê˜í퀘=M s^ÎsfÍNF˜½–@˜ƒëˆù‘¨ y§¢QA³Õ8säF8s^ %+gö/œ9/F¨Á™=¯Ê™ÏTÐì Ðì@³7€fkwàÌÖ.̼óUÌ/æ Îl_79óNe9óNlCÎìW*gö—'göÛ‚3[U‘3Á™=Q9³ªbfï 3{—BÌlïAÎì‰Ê™½±‚3çÅ´ËàÌÖƒ3?r Îl›¥äÌ~[pfåÌV°ÄÌV—f¶v Îì͆œÙê—œÙòHÎl¯Îl/@Ìì/ ˜ÙGbf¿R9³äÌVàÌÖo3[ (fö¬3Û=A™³9"€2Û\ 9–Hç%Ê—}R¾ì³4ðeïˆÁ—ý&_æyiãËž¸SiñU¾l=ù²U<øræ‰wòelÉ—wúz/›~ƒxy76ðòn¤xÙ"ñ²?|y7ïðe{ÅË×YspeÊ] 0ãfÆ—wl_Þ¿!jŽDV€2ï¦Àef±fög3Ûvbf;¿M̼c‹Ä0³%ž˜™Â8³½ 9óŽ}ãÌVUÎö‚Î;öÁ 8û•Îþ‚ÎV‘ÎÝTßÎ;4œ;UÕÎN(ÎvSçÒ[g{Kg{gÏ€³¿€³;³ßÀÙŠÀy§Š˜À¹SÚOìlÅwqg»!¹³• ¸³ÍßÉmY@îlÌÀ¸³• ¹s×ÝWÃÎ~aãÄM€àÙJœàÙËàÙžû­<:7îS’GW#ÙàÑfLm%Kí6ÑàÑ•Â7ió>‰´EL0"í‰J¤#X‰t”Í»D:Þra%ÒÛ‚H[Ù€Hg3%‘¾v¡‰¢ãÃ燢h/1¢èjl\YôÝÕ‚B[»…¶®Ú_šÚ.ƒ¶> Ú»=2hë§À ý‘Š ý‚ö2#‚öDeÐöÈ@Ðþ‹y`(‚öQÚ;= h׈ í‹ƒÎæÚK]9჎+ÌU(—NQáu§(ƒ‰;´tPVm/Im0!t1F»q:Lì MsYcÑ6ô‘EjÈ¢ýJ°hëÀ¢‰§þ "‹ŽDU¯œ,Ú¯‹ÎŒ÷k,úêÖ¡Í?Z´wC€ÐÙ᡽ B{õB?BûLÚ¿r@èl^òJ¡¯‚…¶>ú‘¸q* å#0ôãÊ̹ðWTÎ6)#¶öEþ÷ƒXYù³ÉªÈŸs1”®üùq¥ògÏí‹?çbYù³}]äÏþ‚àÏÖÉŸýÀŸís'Î…düùñ*•ó`¾ŠòçÇm•?G"€øÎy0ɬðg¿«ò篤u„¶ÊŸ‰ÊŸ½ðÀŸW*þJb×.í üùñ*ÊŸå£üÙzLòg£VäÏþž'ÎŒ—@þœ3q/ð3~~fÈÃÏžÅϬŒøù‘YåÏ+•?{¹??n«ü9#×ÊŸmŒöG*…~¤)…¶I)´×1(tL‹TÍœ9)VU8ô=q'}¶Q›ô9evNúü¸Réóãå•>{]€>ûmËã·Ñç?hµQå“AwúAW†¡§:=8³ˆ ­ê!…FTsBèñÐ ¡«‰¨¡…æðeZhš›zÃ8CI4õ$€Ð¾Å (ôHÄ"–=Ä"¶o(£#&½®ór°á2æ ëdÌÁÄ)&cn$Œ9¸}CmÇc$ô ‚ùCï¦'‡¶èaäл¡]phcûäÐÝ‚Cû•àÐÆïÉ¡;§Dwy$ˆ¶xLÑ Úó£ šÇ G7J‹È£Íý…@ÚhH[Ô2Ò4´p ­Øx4šÕHë…Æ£‰œˆG?Ÿ}ÃÀ4a¬i4VÒ8kA mKvi¿@Ú0iúˆ€H¹!‘6æC"ÍU“iã/ Ò¶\v" ‹æ…3q,kI¤¹Þ0"Í5Žió1"‘æ͈´Á'ÒP‚‘H ‘æD̈t1‡Î™8Ü."mŒ„DÚ0ˆ´ÁGi#^$ÒÒDÚ˜‰´µWi+é;d‘t¶0‰@Ò^6@ÒH:›Å‚I8ºU‚éÌ-’iË+Étf?O4M퀡錳·†¦ýM 'ãp7'›Î[Ȧ=·`Ó~åÎÉ8ƒû-¯ Чíõ§¿ÐKpúdZ €„ÓVj„Ó™[`„Ó~[ÀiOÐi ÑØ‹›e pÚÒ§yÎl:qà$›ö›‚MŸ$žÄŽ"‘´•‘t¢b€HÚo $meM$9åv$Í+ ISÍ|#iÆß’ÎÜÂw$ndšñ,I¦Ù$¦YµäÒV¤äÒÉôØàÒVÞäÒ~[pi¿\ú™8gƼ+ñ4›¡ái¾ˆáéĈhÄÓ @ÃÓ {ÆF©“QoPjÆ-5Jp>Ì(5MR?enì¥Jm$î¤Ô†àH© ÞRûý@©…¥¤ÔžJͤF©¯ÌƧÓ#6¡ÌŠý†àÓˆëix:Q9KþeÿÉ#ëo?yû÷?¿ýçÛÏÿþ—ûù{ŸÔŽ}10Œ¥Ë÷?)àIûµg îcäüþ'UÒ§ô³ËþÓ›ÍÙKnß©o*+¦Ï.bñxúòiƒU~>=~ûäê^5,í§=}ùÊÓ?¥äöŸO×’ÿŽ~$cÌÛjŠ®±†%âžJdîÑ,oåÕ‹üxo¥OÓÎ1Sÿ5žò|È~=dûƒO©kl¼÷VÖÑßLöÇüÎ#dÕJ…ý–R‰òªÛXXìþŒøŸã ÿõëùòË?þöËo¿ýöË—ú·þÿC}ðÛ÷_æXÚ•©øæ¿žÂ‚x¥%t¹h»®hÓà6µ •SìóÅÂôU‘ͱH”³ùjÃtìóñÒ–…KÂ>ÿvþ6rõBQ£÷ÛÒ/Å.ÿÍbbê ûS‘ØK>—¦ë~.ÉSìïÏðöÜHJ±¿? â Sϵ\Zuïn]UF6µ?--Ÿ«µ¥c[#'ö%  ó4²P{9YÀ’í‘U˜ê²ÙmЗ×Â|”#övÒ*€¾ìÝžÙÇb·™/¡†²Ÿb{ÿZëF¢·ûû£Á¼úŸóÕ·u2“ñ6KÒ¨”5¦µûrýõÆn¥wl¦-°[>‡•ÞØäFÕß8·tŠvÒÏG[4•Ì¥MÓ–žáp‰c~õj«ã£xM›,²K_ÀÓ‹ìüj«¥¯,ÃmŸÕXšµ†qʫіÖÀV#ñn¥U~Y‡Ÿky5ÝÒh›ŸâcºèF\©[ä)\«Ç­Ž Æ•ÊBSè.PVCG¤ˆùp~c#‘†©‘xo…•Fs×4>ýºæW –Æ ð鈯rB¥#Q6ÉG—øÓ¶å³íÖóQû%KíØN‡Q>›ìHT¼šÔ¥­Ô85ZÑÚ^í9 nFÃyµÝÊàç׊lüŒ÷ RÎî6nˆÖ–E²VjÅ–q |]Ïî6ñi‰Î¼3ë6´Œíìo» J¡‡¹é£hBØÓÏþ6ÑØ²¨Q¢^Í~ŒyÏWãµ>§,s#'ucÌ&~Z—|5Þ ;Øé÷-W㥈+†×1ôÍÆ‹¶RmGdbl6ç«ñZËÖ¸ÂѲWdhôý©\·±Õ JWʆUŠ(dùêw[ÆVN*²³o’¸,œýîÈ«nWDÚ½OÙѱ¸.£¸®n·VI]'î‹'Êi‘˜æ¥öÊ%ÚpŒ|Œ¶Äùê‡c¤šç†DT†ŠfR AËÙ7žÃOUÔ‘M ä5âµ_ýp£¿{Šð‚Ž–3ûzõѨ ¼ŠÔòqåhðc&v5eøK§øëvwôENÑìÓÝ /ÌGâ½E‰˜ýDyå«™Ã×=ï|#QwrG%޾èê†Ç;c ¥yïçF„Ac$özuÃ㶨F;»ÕÏgŽž´Ýݰu«m¿Aûã›cåÒßëc ½—0µsTiÝ¥?VÐ~â©F_î1}=ÒÕ7†I]T—Gk•îHܯÙïÑÑ íN}FkˆÆÁ\ŽÑu»»ã¹|—›øG"ÝóÝ7¸XD⽇‰*ÿJ}LË®yð1hß1ÖY÷žd¡jÓŽqW¯\ëÜ9·¹×ÍŸóD®^y\©Þüi EäVS¦=4W¯\)„u÷ŠGñé’dÌ8×k:\*£DGâݧ<ß3¶J¯n¹Vìl¤]öJ㶨•˜;_ÓáÊ êã3›»ïñHí óžèW¿<Ç’z¯}«Wâ­éˆD&ò˜Ë×crÁ`²y,×n‰ç3/± Ësr¡ó‡ñs³->eõŽÄ~MÇg¥ê™¼H_‰¯uRŽuÊ=5¶ZÊc…sÏØâÝnmΑ8:¶{jÝ>ÊsŸCt$"›}况ÍsòXqÝ+œ£ò¥ËÉcá¶ÝSãÊðöy,ÜîF·m÷ÏýžG#•N~d~îû,o¬™*÷¤¸6Ì^óX³Ý¢„£7–eY^Ã0ãîý Û»]£ˆ9rPŽ9)ögÎóGÓ—•Ó˜f7˳ßßû‘¸Î-ÈãJé"qŸ³ãйÕEêcÈ×–±…tXfÇ:ÌçM¤t¢Ù"k÷ì¸c6¯)XtS¼09^ÝqŒ¿Èf›j®h:˱Ý?gÇ„#yaàñqosá‰û=I®ŒWŸ#ŽÂ=I¶ñ/GÜ{’ÍXëñ8±Ìf¬‹¸§-îIrjV›Z‰£tÄ:‰ÉGÔñ»#fàî1Øü´ÎÉqá‰Ä}NŽ3ñÆ®_›“ã²+¹2'Ç•ÃíXÊ¥{rܺ&¥Ü­ÚŠ~…%4FÑ99Îó³=癣Ǖy®qDæîŽ3tüäû5MRy$Ž¹Úœ[ÔWW#}ÂÇŽ ^1S~ï´ ©säæúzž»c¶Ju®‰E±så!1bçj“r`çŠ8¤Îuá\Ô¹§~–jý«Óçb++Ðçbà ºtäº4N€ G"ò ]l‰=Á—À Õ}ð¯Î ‹Mº•A×㼄T´2èHÔKtìK …(ƒ®…- :±üV}lw¿`ôqU__©t$âõ•J‰ºrP*~*ò2©ôüG‹F©t$‚±(•ŽC±h®J¥#+¥Ò‘È‚*‰Úó€JG4"4¥Ò‘°§Tú™(T:±T*‰À!J¥#;EJ¥c㊠½è‚iPéHT5#¨ôqHÏ*}X7êjXðt¤a]¯xú°uÔ×T<íG-§ÓGm^§t+wÂsÅÓÇ¡OÀpœ¨nç¼ `ÚOf*˜v›4€éãBcÏ"Ýõl ˜vA€i?™ >íjàÓ~R|ÚOŸO›éð´[“Oû±5àé£aho®xúHÔ /ÅÓf :í§~§Ý)x:Ñ? ž~^¸ë1kÄʧÍ(xÚö^|ú0ŠÓ¹&úÏyâͧŸŒòéã%´uŸ>^^;åÓÇw£;7ʧݚ|Úá‚O»óøô£A*Ÿ6§ ðéGšàéG{T<ýxÅÓ~&xÚOJO»C𴟳ž¾ý•À¥Ý³\Úý”K»% ¸´¡—vßpi?u Xí8'¸´ŸM—>Φh‡¢\Ú*—>ÊtY¸´Ÿ/—öƒ—àÒ® —>Î’iÍ+—ö³dàÒ‰ri?..m‡É€¥ý<°ôqrIgÈŠ¥DWKûa`é¯%òà…¹Š¥ý°´ŸùU,퇀¥çyi;¢í§1.í'À£DeNÊ£Ÿ‰ElÜ”G{ vPégb׃iðy”>JZ×ä ¥½¥Ÿ‰+íO²,4ÚσFû‰M0iž &í'SÁ¤ýs“>ÁEËoï·öo@ ´Ÿ`öƒj Ð~¼úyåv®Òö£úy Ì+Ø ‚@û±,%ÐÇ×£íG ´Ÿe~|_J Ÿ‰8šþLÜç4˜çƒ@ W*~^¹ñÜ Xšh‹ ý¼­è#.ª6]%ÐGW!£´Cöáúñ:ÇÌÓga£`èHSÈ }\¨N1ôóÊMOÆnÜÀ91ô#+Š¡Ÿ †>A†‹ß_ÉüF±fuÇDaôóBÑñHì˜(Œ>®ÔKaôq¥~ç £­NaôÆLºueÑ7QýLLê(pG…æÏroåÖ«òçãu?j¿¡øùÑ©(~~–‰àçg¢:4Yšâg?9Hq»MŸƒZ=gïs*öѸÝ?èZgêg9`þT?¼sõsyC›‘9´¹Ú’Cû„ Úª† Ú¾-‚èÃHfŠ Ñ…$ÚÂ$‘D[i¢èlØÕb½‚²EGØ:øE}í{rh‹ÀEm>zDÑ£/F›%¶L¡Pmñ·H¢£)¼J¢ÇP„Å4Ht8÷AZ|’èÔ¨I‰#ɯ’èà˜ºŽ‰NT*ˆ~\§ z$’+ˆ÷. l„fªÑ©²¢ã™Ú®¢-ÐAtÄÒO :®T}x)VíÅšcÑÉ„«Ñ^ðÑ~L :¹˜[AtÌ€ž:Ù! èÔ¶ Ã€R{ èÇ•/m±šˆ 7Dd):ZD§NIž‚葆­€è( ­{€èøˆ¡­V‰zŠ Ú?€h¿-@´ç :Ùù€hï@¢í3ˆN./Vn³¢#ñ=t$¢€@¢G~ÈŒ•DGè Su(É…DG(¾’hï#ˆ¶ o$Ñ‹ô&Ñ÷’$Ú« $ú‘˜ÚmGI´÷F Ñññ¿~U]Ÿã@Ð#äÚ{T èd*x èÔ°Œ$ƒN&è„~\©úq¥Bèdz@èhþb Díý? ´÷c€ÐDeÑDeщª}EÑ©²¹EûªqjEÑ^x/™ÔÉP´O9€¢çh} GʶB{›„~$6F‰Äˆí§ ¡-æ-!´ƒ&„¶Ê„Ð>…„ö)(´ù+“B';ã §Õ Ð1!·J¡Ç•ªã%…öN:nûž8úÑ+…¶üBÛ#É¢í®„Ñ6^ß0úêûH¡ÙåBÛ(Lºi­B[GCmó:Bh›3BÇð ¶Rh›ßBG™(1…Nݨ±RèÇ• £m"Œ¶ˆ0:ñ0"a´ 9„ÑÖýFÇlºk¥ÒÖ‰OGë¢:}·ÅÒÖKs.E*wÓÆ *íy<©´­"H¥½¸@¥7T*ý¸­Piû0¥-1¡´E '”N;ŠLÚVödÒÖÕ’IG‡ A°2éHT:&m42i‹”N&mC ™´­˜È¤m™B&Ý;2¤LÚ2éÇm#ÚKH ´_(m£¡´Å ”¶Ñú†Ò^€Ò¼(”¶Å¡´¡@éX°j;”~$*”ŽZÖo PÚÆLBi/8@é¸Rú0i¿+ ´_¨LÚ˜™´÷ Ó6ñ#Ÿ>Ђ¢nåÓÞƒP[÷¦Š6•UÑÅ7 ŠöÂÆŒd$ÕPE› ªè +EѦ¤(šY;LÍ̧(Ú Qt½áj$wE§4ÚdFûF3Ù¬I£ ™¥Ñ~'¹a_Di´],¥Ñvµ”F[7i´@.J£½Êíe²6SFÛ@e´} ¨Œ6-¥ÑöS½½vîp+uJ£)G¤2ÚÞq*£Ÿþ+sUÀtáTDÛxP-!D˜TDg“KC)ÎVA´&DÓyÝÑçðD)´éZ)…¾ŽÙ™2rWŠ Mbë*hD .ƒ~rM@ã›D´)¹]@ÉÐ,›£«  ‰´¿FM«ê qV×A!›š cÓA³0ýø©C ƒöÊ2s{¡è íá:h€dè ý¬ÐA#¹²É ½FËwÄ*!ƒ¶BÊ ™ôÉdÐö)  Ú.ˆ:èbÔÛ²nãJ4S„›ÚO ´Y™•W ´Z¦]<1è Ù@¦ƒöB¨¡y±&‡.æúqÈ¡yû&‡æGÝäÐü4›ºšÊZåЇˆ˜:htT“A#²24s¥™ ú¬jgÎ@ êŸùlnúgBÍTx&ƒ®fç´Ýeмyª ýÀʤּMÈ¡ù’š.º•†.ÚO«ºh;+uÑÅX*tÑV%uÑVå¡‹ö_Cí…™éµ_h% ‹ö¡‹öB袽ºh†•¦‹¶·ºh{é©‹¶á‚ºèò9]´ÕI]´×iä]4-ºhkê¢ý´ÐEÛCq]4„.ú^ˆ<Ñ,Ta´ ™FÛmRÝ4~6]4·l^ºhNILmÏŠºèj^…Ù1JC}/]´ß tÑÖlÔEûÅBmuRmý•ºh«“Âh[(Œö#!Œ¶ŽEy´ ‘´ ‘´B%m=‹*iC©’.¾¡’ö#U%meTIÛg*i/„JÚžX½%ý6“ŽUTÒ_ç½~A?Ã-çÍ õ=Ç.ó`çñ "eV½6*Ì€ WÛ± ½nô¨V½˜¢ zÁb1ôbŽ‚Ð‘)[…У:ÐáÈŒ™½è2‚ïðî8aDÑ8ú/ ï… Ç\žóZDÀ!Ú,1à½q_?¢éÿãÑPç¹St3h³¤€Ytä,T³èð™•V³èQHC5‹Þ¹”fÑûkð< AÏam{Ëim;ÖaÝmK6Ì¢û'<¢»axDwf»¥Gt§Ç,¢ã2 ÊV‹èŽï7¢»Í4áÝCÀ!º3ß1¢{1eµ:D'Ð7B­¢;yœ¢;vˆÒ(ºaƒQt7ö4ŠîÿaÝm} FÑ{9iÝÊT0Êò+è|¿5Šî¶oFÑ ViÝ Â/º;TU¿èn»ŸáÝ™h“~ÑÝ3àíí¿è`AàÃêݳ©™Õ/Ú½âáÝ –Â/º„at7PÃènüG £ý€at7žÇh·Ë‡c´w¡Ã2Úo–ÑþÀ2Ú»:,£;ÓLÃ2Ú{,£;—Ó2Ú;,,£½«Ã2úv±j= UAËèÎÝÙôŒ¶q–ÑÝ6…Á2ºsuŽÑÞ>pŒîL×GÇèn« pŒîfÇhOŠÇènŽpŒ¾Õ)ŽÑÝÖ(àí¯£ûÎðŽÑháy1£»9Á1º›?£ûf•©ct·•wuŒî¶f ÇèÎ,¦tŒž—ücѳ.ýÀ*úV¨VÑ^¬¢o…jÝM¯«èîæjÝÝC­¢ýjaݹý™VÑþù€UtçÌNÑÝD$pŠî¦h€St7„:E÷D“U8EÇc|%†žUB¥¬NÑÝ8žNÑ=‘«Qt·=ó0ŠŽBPq5ŠîL £èxÆJlam]‡vÑ=½ÖDÏ‘(vÑ·‹U»èhTPYµ‹öÓÂ.úV¨vÑ£ 4ØEwÛVÓèÛiÕ4:öjÝÓgL£;·Ã3z”QÀ­žÑÞ±àÝO—u‹îÜóK·è¾R·èQŸZ¸EwÛ"ÿt‹î+.ºEws €[t\ç+¯Žò AU·h›ÊÐ-úv ºEGVnu‹öN·è[»©[´w¸Es£YtßLh¬fÑã¬p’…Y´¿0‹ŽgòJ}~ŽèÝO_øCÇ5@ϬþÐñáB¢þУØþЖz .Ñ}%KV“hU`ÝWÀ$:ž<¼§Ÿ&ÑûîiÝW%§0‰¶)­¢ûMÿ'ƒ»¢ûJ/b8Dû›‡è£&5†îȵjÆÐ><Àz i³úBÇY!ùU_h?Pm¡ãië° [èÛYÕº/ÔüÂÚ»l¡ýzÔÚŸ\¡ ×кW»q…î4Ò¤+t¿sæÙ!mà º/Tõ×[ˆýmœù÷»qœ²ßO žA (x6OK(žMõËd„!&#Ì ™Œp=$B_’†N¬LC¸¾&Í«n2aå^IÏE—æ"tÇ/OFh;¶=!…Ù–Œ[Ú!u¡0ø§'#„®Ê’bÂMÔ\ØDÍŸ1‚HMoŽƒ8WãÆ ÎÅ/ˆ³%a#q^‰*”8ù×½5ëô…¨¹¿v€ŽBÖÔlÓl¢få5›Z›Ìy5{h0çdÌy1û ä%ÜíHä%tG ä%4÷æ%¬ìŒÌKXNe52"ðæfv&Ê›[;Ž 5·dôWYsä€o²²æ`“y°ÐΗ°¹ØêC}d¿3óàúöóÌ>¸–5¿Õ:åÉ5”ÿZâÇò¸ÇøƒÇÿóß¾kßß žN¯ÔÓŸ¼˜obé_YiÜàG½í÷¨ôy§‘ô»×üYÿóïzÇã=ŸôûÞÿõÈç+ÖäÞ§õúÿxß+8úùÑå?Uûã|?æ*Æ =Æç5͸â-FÏe/Û:ÀqAÿúáiàó‡õíe\óÛ‡oÿñË?¾ýçÛ‡ÿáß>|a5c0ŽuíkÚÓoÔ3j9*×õõU•ÙoϘ½míË«Ê^Õ» ï3ZÙÛúaˆ,šàö½†ˆÄnµ/ï6@å{íñ»w~Ü20¿OËÏÚ—OÔþ.-?ûw¾×®-ÿCcž ñ!ë6Æ‘ÞJ\ÝmYÞÊ7¬f„´cæ4þOôž[-ûQKú½Õĺz[Ç\j=òu=ÙëùÉÒ~Ãì¾EÚb‰(µšFh½{ñŸYÃýüß?þô·_~úõ×_úø×_þúç¿þ®aøíËöøÙh´]Øl¾®—+õLC,’”2ò]œ-iÑwœh”Øàwí®Ë þ%Iæò™9GÙ$×`»ZR•mp•Kú%uÙéX¦Le“ydLÓt «ÄµRU«–Xƒ>Áéxª1(£M.E@aŠÖ¾°%ÿ+áÑ{qNîz(›,“Ä„êAñ¢ök}¾PaX²T0ªŽ*aÞsÒÆL×Ä2SÔœSX®•—XŸ=Éa@áFq¦kQ<3û„/'ÿ££ÅD(â#ȵÌËÔE˜ø³k½:Ó85°å¹&W%<1éµìœW>&»×s@½ºX;YÙtZ’žT…,‡&A—ÆcÑñZnôK…ÎkÑ7Ê‚¹L}«ha}u½q> Y) eÀµ0VwÚbÕ^Ù’‚°)`8QR¦Ñj¼t×2ªw³XwS"¤ë‹¡ß¸VC3ýt§òðä:sGµtÐ.W_2­Âç>›“ÎdæB™2º ÀõsåIWæNt¹ý¾ëÊ"HOyÎÉH¸0•BŠAxàœ­öR Yë[°lUö+rÜõŽw]º#9 Õµ:—éâ+Y€;††¯ÕµÌíУY@³².²DmÜPØuŒwÚ³k¥+3;H’sÜÍ´ IÛµ`•iv[ÇM¦Ècÿøµö”»ÕÙu}‰°v|¾e )Òz ] 垬ÕñI»‚2]ÔÇäLÖzn…U–s¢ÿ “«±p¾$š5xÿ5ú¬±…^ñ\}ñÇ9>i×Kfò†øÌ·Ç¯µçOûkô­@çjb÷§ßói~¾Þ"¥Øµž‘ii=íI¯a˜¯ð#ñÙ™9šLSsÎÔ)"}܉~dÂJ÷Z>ˆOª6.aDºmÆN:« ŽOÚú£P>±5V]¯Á8‡+ð…îÃOd‘!#¢4_ðÜk0¶Fd{ Æ>À" Gß@‚«jŽâjum²†ÎéŒ Kkí¼céC¾H5ôXçœ3ð|mWVǸMtLNz¾ÉÑ¢ÍýmJçÃÒù·Îjçk´÷P{´1>cµë°W#íXר‘v¬ ‰F‘¶[i#Ҷ숴cBÔ«‘ö´òÐ/¹FÚuåÊ?"íÄu¹‘ö^q=hLJLׄhïT¶1Пh]ºc Ý¿nŸÂ\Ú#VBìŠ@;”9òÒ1бvãÓ@œÝ2–Ëgx³ ÄÙµ3’}ÆÙÙ¾g›„qvÈÝ4úCœÚ<-DÀmZ8Üuahˆ€Û† ¸Ç_ªZ†wåä[ϰۄH »k~,À2Þ.ÜpÎx;–äSÍx»Ð8Äâí’‹·C)z…Œ·ËΩâmÓ'0Þ.ÜæÆx»lvZ·ËfGjÔ]¸sÌ¢nŠ©uû•Q7SÝ_Q7õ/Žºw>Ưˆº™ÖðukW¼ÇÞ]´Øc½Uíù•±·Æ‡{?ç·¨[€£n[¿EÝ 4,êæBø=êuÂ-ꮈ×=êÖHߣnÕsÜ¢nŠoQ7¦uëb÷-êFmQ7&WÔ­ïÓ=ê–•þ[Ô­sƒ[Ô­ îQ7 -êÖoÃ=êÖ‡|‹ºeD½EÝå“á¶B]¸ŽúUávÓ ×-ÜÖ'èá6bT·Ñ¨n/ˆÅnÛœãïn¯,ÜæÎ·ÑQoá¶ŽG¸}M}Þ)Í7ãîø†ÉãîLc.CÜ«n³bÜ–{¸m›ç¶Ã’LÃ[€í»EH¶÷Õu%ÛfåC²ÒkDÆJ¶ûÎ/'âmÛÇJ²Ý‘Fƒñ¶Ùç0Þn™Ñ âí–ÔÇ€áöì‚ʨt/Ø1 ÛöR1èö•A77Z1êGâ6u/|ÒŒºwb½#ê^ˆkÔ];”¦Œº¹«É¢n5êŽB±,ê.9e¼½"ÌnÏx˜ñuáC|]+猯íH„Ù|kÈË0»BäÊ0{‰¹ÂìØÆ„:5ÌŽí@\5Ìöe'„Ùuåê ÂìJ/D†Ù±UJ =ÂìjC ÂìØ¬¢ÓV„Ù¶avÈõõf‚Ð6x†Ù;'Q ³i¤„0»&ö=†Ù;?¥³mÓŠ…Ù;C^„ÙãH»2Ì.œì3Ì…ø 2Ìîx ³ íDf‡¶Q±Û¥!/§!n/Ô`»t‹ÄlïØÃ`»ð[bÁ¶Ý ‚íÛ‘lÛn6Û£§Õ`;6| ¶ ý„lzØ1ضm-G°]˜/„Á¶ß Û‡"›QvœN; ¢ìBç>FÙ¥rþ(»ä+¦¸EÙ6‹f”]hÑÇ`Û¶N0ضÝ3 ¶Çi1µE°m;YlûCB°]6¶ £Û…–X ¶mÓƒmÓÉ3Ø.Ì5Ã`ÛDô ¶mGƒíBk'ÛÎlz­ÔçNùj¼GÎB#ÜëÏÝ"mH{Ǫ‚…ÚC&ÄÜëÎEp îÕÄ@ÜëÆÈ Œ{ab2î¥Æ^hXàXCîñ d°Ž»ëž{‹¸‘ÒÑ"îd'Õˆ»‰“,ÖæÎÛæGeÁvbèÆ`›úÄÚme¸ÃXÛ&$Ü •ˆµiF`±6w®±öÁñøkû1Œµî0ÖΔû0Ön숌µWÃ߈¸Úìh·ÀÐ{#fèMk ½z#Ütœ1EÉŽÞw5‹nž„»2N$áî±A¸³±qî£nÖ Â]àÉc„›cùpc¢] w.á¦Õ3ôÎÔÔBoCí¦+ø…¡7…AF¸‹Al ½8’p×Ïnµ$Ü•Ëͽ];AÂM·}#Ü2ɹ7¼¬Æ¹™ÝÚ8wcЂÐÛI>9·ñQ„ޑℜ;SîBÎmÍGÎ]¸:NÎmŒœ;qþÎÁŒso …Îñ’s'²zpnJIsçÞ|’s›v‚œ;}&w>JÎ>Ç~Sí¯ÄÝ‹AkÀ·ý3xÜŠöWâî…râî•‹ˆÀ7jo„cŒÀg^m j#¡B¶J:™ î^±ÎÏ|ë|Gû­h¾UCáÇYµÊèÉau=âÅ@7ô~o§ÀTïˆVi4ðޤˆ4ðž.‚ŠÌ»]à Ø]7®èSƽ`šEØíÒTÞ¶GÕd܇­ ¸™Ü«¡lºcoï=ܶu<ÝþåEÐ=3Ó½"ÜáS¨qõÛÛçôÛ¦ÂFÔ=NËúí…Í©Qw|94¸BÔ†dº®pè·®(P¿½0x†~{³¡ß®Gx¬áv¨©õ±RÁÍï Ãíh)u_râ›v»½Š·]kŒxÛ¶y›‚{#b¼íšhWpc¶xûº DÚ¾ ‚‘6÷†›vÛ^JFÚôgµH›® ¦ÝÞ!*µH›îi¯ø@E¤x'ÏH›ÒhFÚ¡úx¹?içÆÈÅ#mèiWL‡-Ò¦¡EÚô9³H›ï´EÚÔ1ÒNŸ°M&Ê;™¶)A`óU³›&£`"X€m+Þ °¥Æ °W¢QØÌf¶é`o˜f2Àv‘ l/|ØÖ-\H¢/ °mŽàB’T>Z›ò€¡µiyZo|H.!a8Ðz#Ÿ§’Ä ­m áJÌWZ'øFYh½}.´¶)ÍM¸ =”$&P¡’Ä:#•$+äž7% ^†ÖŒ!LIBÅ«)I6J”LI‚T’,©Zñvy'‹jãÛé¼þÛ^É…”†x›’ÄÛK¥äQöÞm—¥FÙ!_Ð{%9G¿EÙ,ÄfI朷͒”¤Y”ùÅc¬½Øi5Ö¶nd±¶1g†Ü;¾Y-Ä;çrˤÅm¾eZ†Üý3¢Û*c!÷¡»d°mñƒmê#e³$çm³$ô)l5Y°m•1ænŸ‘“˜ÜÔbînÑ:bnªo‘·.ÙÞ"o<îÌŸ!ݦ«³È{3¡7"o*à,ò^?yÛü†»&ù1`ämàÇvMšT‘÷8ÓBDÞ[3:®‘÷Ü¢1»FÞ£PõȈ¼7z 3ò2,Ÿ‘w\ˆ® !òÞ ” ˆ¼11òÖØçyoÜ.ÂÈ;%jäu¦hä½m†Î5ò¾Õ©‘wøâéÕ"òÞ6F{ÁG!ô Á#S¯.’#÷[Am«!Bð-óƒ|ËÀ5u€k!®SÁÃt\Cð¸O½5ߢ‚o‹I¨%ß©‚ÇYTgô ÁÃM_ã/„à‰Û<‚2ŠÀ5O&jB ¾-÷ø{\:æ^ˆ¿ÓnÜ[ãï´³›"þNÔÑ2þöBÄßã‚EküºID4þN¦Bü˜ñwêÜ‹ø;1Ù ãïD·eÆß©qeñw¢ó1ãïdª6Äß~µˆ¿} m/¼ ¿£[é ñwêÔ'=;°@ ßãÚ7~ÿÖÇmü!/iþô—ùS›gŽÒÇOßݱïSòzŽ»û(wü½«ì3ðxÜèùåë"¿ÊqÇÏïZ®Ï 8þñžWðlöÇ“?ÿÑÞÑ5ïÑî 8~~×úÍþ¼€ãŸ¸‚/ð¹ë=$œÛ2.%TGyúFÁ³ý½õÒ³Ô1`¶6s¯|aM7C½/¨ifoßzûòš*kú§?®oA,Æçñ߾淭”:SŽÙÇÖÆ÷»¤» âøŽÓ?jþøö/oõŸß~ÿùó;¹^ÃõõøÎ½öø.¼çp-w9¦úz,u}¿¯SO÷ºÓ{}ë­îøÕ»>fù,¼K›ÏºÓ½î÷hóÙ©ë­î«Í瀒Ǭ²é€2çÌ}ÁéÖæf¨à޹èvû{zZ5‘Äk]r®cæüw5d=y­ü£ÁG=Ÿ3)Ì_4*ÿaŒµóÿ¥öö©S®FÿdÚnÃÁ}Ì"ÁÜZûíR¶ñÿŸŸˆÿ÷ÿëÿÿÿízô¡þF <¦hc2?棅ãÆì=ߪukÿ=¬ ë;YÆa=Vzyô&Z"ÏÆdG¶§¹N1ŽÛÆêUw7¦Xãzò¾·^°7Í%Ó8l•A•R~lNYÖ1çîv¤0ŠYm.1²î*‹O±ýú˜Ï÷´éÖ¢`ùȲÓ#aÈ%|N±µŒã¶eŒ‘ERëÛgRɽŒ<~¯ü.•Ç qÙješI:…ˆ4<çǵŽ"˜¦iosÿÑ-ûÒ4¥ØÑâ¸Ñ[ú»ˆ(;!¢LhCŠý),—’5wn —›˜¡ÛxÏM„ò)2ÆDr”-ÇÝÁƒ-Å®®ã‘/ ¤¹§k’Õ>ÎXTñžÂ¹1Úo¼p£¬ª‡N k#«NÛŸZŠT%ï]Û±²›â¥h‡_Wüúlµ¸¸k§qŠe¦#•N<aM)ÄSÑAÆeI[¢†#烲ÏËL—"ýÙ¸Ûèªq}Âw£(²GöZÚ(Ëh¥*fxÑJ‹ž²ýXžäo”AÀ“ªðÜ9ž¤;Ö}¼{ù´ÛÔe{PÀñçomnz¸î<2ÿE&”èªqŒS{l—œ}5ÊDЗÂQtõíË:ï]ÖS˜ÇÉŠdjùÊOZÜ=¢ì|Ãâ8ñHa säϹzHlˆÎÝ´íPï¤6þh¼‰nº)–I­Ï…g7íú~¶>SFÎ^=A;W¸=Hk‹ÄÇÑM¢¯FuzíO8°`7Xê±"y «öFëFñ¾ ³MêÛLùè«+¶,¤þÈä÷WWìYM=Vnžãê¯d4õ:×£fgí r÷DÙ™;ÒÛ±·k qÞ´Œ½_)$㜫^Šäð‹F–5ý4úü::Èì¬1 P²ÀrquÅ~Ýqµ3}äc`-JÜKã}(qKÑY£LT7)ÞÛÑMf Îv‘`N”É¢Q”íÛ9°r ïôùÏ[Yé#Ÿ#kSa똖þ©â]¶¢í±:ûYãRô1Œ²Xf{öYÐæÑWêÈy)×âݨøZy‰2Ùå¸î‰#ã«w½‘ZúüÂG‘è{¢ìÌýRÔ*‘u:Vìž}6éÈI§ËSŸ2ÊO3rNGÓÍ>»b+Ð6øŸlT·éa²ÏìeýÙg_W+²NŸ]1ÊdàŠìÓ9=ØÙ¼® ¦Óö`g™œ3ºéVŽ>[ôáDY¬ž}VV8"õtÍùê³’'xÓ·~öç]ŽËWžÈ[}ò­±w$ÊÎ,‘q¢~¬Ó±ùè³+RŒFÒéÜžÃìºë2Hd•Ní9Ìöã‡È*y"}6R9lRÝ~u/K#´Úq6*˜ü Ÿk×@22IogÜ)®U©H$}>$o‰Ø¹žlSQBä˜ngàºÂ"RL—#p]!Ž\Ò±Túì¬]¿‘JúL-(h”}"ŽÓΫ WàºëI¦Ï±jÖwE&Û\‚;ظNé ÛÃoçÙYyÛ:SC>:käý>c¤È/SÑG_M ú,¶c|Ë”A&ÜÎÏÀuÅþ†9õ<×N.Qv®åF™èr#µt;#ׄ )òK×#rõ;ñÀ1¾®»ê5"t:"׸=‰u"‹ô™2Ê$Œ²óA÷½W$˜Žlq…²©¥ÏA'Êt ÄÀ&l§Š\ÒËÀ†œI:CŽèó`cyX/q €Ï¶'øÆF–é|D°qg:ø†Ü¹.WŸ•eøH,½Ôs€eK†x÷øÓÙQd Ås;Ø+Ÿ÷(šÉéé²×w8rJ§#€;—«‘/`éçøÊ×'v»ïçøŠ¤Â›:RÇqò!Ž”ÓgòÇÙ÷¤YbärޝØ9¥Ïs¤’¦Ž<¢IÆ×džô1°6tºÒ§ãÙY±›&Io› ¬(Dé³Åçày}Ùc‡H–qUoù¹´³B5é+Ê¿®0ÌÙÔšqÞ¬tãéír¯]½E¶i'#ë6R8Ø´kx•8-RK—3~ö“>03_ŸÃ+‡Â*›”|Xeg¢Çy{ò=¬²`¶ãI6"©t]dt•8rJçõÅèÂÂ+|åcká½p†¯ü¶˜TœÃ+»BoŠY&­ÙB𒝮ªOo̪N™ÁüÈIxÑÚÌîøìª·²^®áUl =Ë1¼&X9E:é|†¯ö&öåÊì8Êæ¯dOÚl)>c“ý쪻*1#{t½ÂVŽc:•¯°µ>´‚±l”®€•±Ã˜H-WÀj×¹¡Žñ&Ê‘Bº]뎠´Ë<ÇÇÌâšcòEŸiç§@ã˜?•+Neh³×+ic|²à˜9­g„š°í7Êö#BMI™Täˆn¡jÛ;#T~½}IAl‘z=#Ô„¬E‰,ãÅwuÊ&¹íÒo«`‹NþHnÃÕBæà@·­©Šè6RÒ_ ä6<é„–‚ÜŽ2EB ·‰ºIn+´>$·•Hä6œ ¤>Û Y¢ÛjLTÑmÁv¢Û¥7ÑmIàç@·Û?€n{Æ‹Hv[’˜\ÝæªÃ Ðm­izç‚n{¼…×(ºí3¿ÄE\…ßöØÿ/0öo{f¾Í‰„Sùm†î•ü6z½"a¸q"šS€÷õ àÆaú´àöØ$,Mn§=On'+"Êoãê£Q~w® _ùm'#øm§/òÛxܯøm"]cƒ@º"ݬ ›$ºÇKB–˧J–›÷ϰÜEá>Y.G²\„·†rÛK®1" \¥$—_v#¹Ä2$¹˜ ÊÅx²Üx©ô°\R=²\ˆ;Œåb…ÑXn{%ƵÀ÷Ær»å’’å„’åVÌíäÀ¹°.7˜¬H@¯Â\~@s£>yM sIªsý8À\¼RÆrßn"&C¸o7rË!€ä–}äÖºÈmÑS¸-+©'À-f nËJ5¯‚Û8N…´ n§ÝÌ5F)¸-PýÜ,ãÜú=+¸êÎ*¸-ði#¸å§—à¶`ù—àÖSp[ÚkýmiºFnY_¤ã€ÛFÙ5ÆÛZ‘RÛÒ!<¶-Ø& lkgTj[šÊ4ImKÅðj[žšÄµñËKætÐÚñ[ ”@k#¨Êt•ÙÆçB"c0Û‚ d¶ç¥ ¬µ+SV[`­EVË «-pÄ#«-HNV—®ÂXeµV`­°¶Àa’°ÖëSX[*ÁªÂÚ({k#Ï‚c…µ~ ký8µaÔ¯E kã0 ûk $ ¤µVhmCæÚdõ$´õÚ$°'´-ÈBh[ÚKÉmØÈTÐv&¢ºz? ­W§Ð6üM®/˜mÜ ƒ•ÙZu€¶Œ¢ mG™Î”mã2•n*´µ§h[`tFt[}@·~Ne·ám&M vëÇ ¼-ðq&¼õÃÞF³Ð^ð6`š’X…·åéüvb[?‘bÛ¢Û€HmÕÈÆ©mAþR[¯N¨môiyP[»…¶ES–“ÙZß³½Bi-OXkï`mT$°ö¬˜Ö*RJkíJ[ø“”–mHk H{]ŸàYëBÀ³× fY;¸lé$¢Êe­åž\ö¬D–m kÝ@¶ '4¬Rx¬5x¬¥8Ö8öº+±ÖpÊa­'€ÃÚ‹{VëgS{/»lyî,$ƒ-HßLKAléXƒƒõs ƒ-ȆB[`3I˨›6Ê”p*„µúN‹'OËñ –½ Öšž öVv2بNU¯`°¼J X¾(D°Ld¯‘ÌR"²W6<Ø«5< ,–s #°ìW$°êZX;öe^T ^uDîjîŠIœqW\¸qWvµÃJRÞ!)ß ½žsG¯¶ƒ7åôAØiL èµêž*C¯™Õ½n A^ÇzE^ÃPE¸J^ËSrMæZ¸Ë›2Z<y¥õU´ü‹ä»ŸLE‹=@¦¢…W?Ñk÷æÐµÀ;ЕmKùì¦Z10ײQY«Ìµ {™kIŸ’ÏF;år-fH È•âê§Tâj­¡Äõì`­%¿ÖÍz™²Öb¦ÊZ£LìµFXHj/ÒZ›Œ¤•¼ÅHkë¦l–& ­d1&›­.©½HkA†0¢V0"W?L+qÙkA8ÙkÑ´“†^Ar ½â"ôÊÖ zå ¯|0$¯heÈÕ.L‘+Ù‘«ÕäzÔÖÊ6%k¥GY++"k5xÖÊžFÖZñÂ’µVX@¶6Šr[iÌAÚ ¶d¸ÕÊ ·¢>Å­VÞZ‰™·¢:ã­Êgˆ]‰|„»Â|À¸+¬Œ»B¼jÜUi›sWhl‰_QñWœÒø«•¿¢:㯪ütþ `hüUŸóWmòWÖçüðÒø+®å+ø«bs篸Nç¯z-Ê_ëk±¬_ŠñWgüÈÔø«÷䯬Èù+å}‘¿Ú9Á_ÙŽà¯ÆŽÁ_­ üÕª%nw‹sÀ²Œ–õÀÞÊÀZ,›šÖÊ`y°v,»,¬§Öª€e €e,/…–—rbXÍİVk—k—ËA–…¶¾ö<°÷€$d#±vNðX|¬ Èr-‹@–ç$e§%–µãÀeíZÀeyä²vN€Y;`¶ÑœAñl£ÿð,Á !-rÁ¥%$!¥%°9(­ ¬ÖÎXÛèÀXË;#¬åÖÚa µVp­¸öV&ÐÖÊ@mYFjÛ^»X#“ÝòþÈn›;#€ÝâœFp•˜9Á}é~àt×®n|0‚«TÍ .ªs‚+·îW«s‚«õ×Â5 „ËÖ"µ2 \;§"\cÓ@¸v®U†k·”kÇÊó4”‹²¯@¹ÚÙåZÙ£\\ P®ÒRCº@£w¤Ë²/cº þcÎt•Ûé*@¾!]=*yß;ɳy#ˆÞü†s˜p®É¿€sgÞÀ‹­(Îñš*Ñhk»À(®¶Èfl®¶0¶0W[jQéj‹}ž„º+¶ê®…Ž·JuÃUOì AuÇŒMÉ ¨.úHuWúB€ê†3 ŒAuýœ wÇŒ4Xàî(`Vº;ÊÆï†°߀wõQŽÞ]:€ìï.4µ ¹mþiºÛ6즻mC'ßeªÔV¾»Úï*å]ªú­òÔ·”w)0ÚUÈ»úB(åõS*æ|EP®bÞ%C¿Ì;Ê´›óŽ2Ýà Ì;®pU1oøcêqŠy”¨ã‚bÞñÚéÓç]±Ùœ–iļ´"#æ¥%9/}ÀÈyWóx€Þ•‚f€^šf’ô®ÔrÓãÖj‚Ç-EÒ |éÚD໚‚ß*É}é‹Iî»r«?¸ïŠ oä¾ôý4[lw5[úP¸Ç­~—À}W3jPî»Òä@±/ý;‰}×c[€/½,|ÃþJÁ­ß0UÓ¾ Û’|Í.À׬ëNs[$l4w[ú³ÑÝÖNwÛÓ”¾¶ø$Ò×–¡ékkðµå^búÚ>þÜìlU|G;ÛŒÙ>íl7È2hg›©”…í…'íl‘yÐìl“¶t³MÀöt³] qáf»ò2áf»Âº@øîh=ݵ ¾kîv4³]ñ\hf»B:M3Û› Ÿ–¶ í`i»@µEKÛ…f °´M¯õµüfÑÒv¡Å,,mÊyai»¨Šxwé¤ÐŠwGÙë.»zVëFÿ¥«µKÖxè•®6r0¨¬bÝèê«X7ÞSqÇUªKÇoR]†{¤º~œP]Fm¤ºÑ*ªðUªË ¤ºãR^:ÙZ‹Twi´UªUÉûªy5T «T7BíWêÚ¥A +P—!.¡®_¥BÝhÈW– òWêzƒ(Ô]k“P7ÂtÏ€ºñ¼_©l­/êF’  .Ýô uíõÔ]š+j/¨O¡¼9Îå8×+Rœk œk/p®Ýp®5ÖçÆ"RÎõšçZSçú9çúq‚s½:ŹKƒ^Vi®uÐ\¿¥¹KÇ·47ÆÜkÜÌEé̵·0×ú?`® N€¹qNµ«U˜kC/`îñÙŵ‘×úŽRܘbËZ(î‚4¹¤¸ö^ƒârÚNŠkãÖAq9ÐâzMð²]¼[ÙZϽµ¡Cé-û!à­½Ït²]_{ €xÝZÛ‚ÝÚø vkýìÖÒ[€ÝÚv» 4Ù­Eˆ`· ò:›£m"iUx»tº(¼µvQxË7ì6Þ\‰9Án#š¿î€ð–µ‘ÞFu×Ônâ[þêÛðíï´C“î›Y 5¹ësöJ5.w#+YÆ,”ø¶‚ßn„Lä·…æ ÊoG„.;foÇ B…aÀ·1»ññ-y ñ-9Í ßŠ²Õè­òÒÛôôv¥Äôv¡#.èíMJ¶ƒGÞ./“’q 5v‹—ÁØ-“ðv¼åªË¼M_ÞŽ2uú¼ã®Žð6%jfÞŽž  ðÖSxK×kƒ·L‡Dz[a xËgCxÛ°Y›ða‹ÁÛöxËLW„·VŸÂÛ€õªŒUxÓ;µ¨¼]© Vx«<ÆáíêBÜ‹Þú¥€ÞndÚ ·ëkƒ„xïÖ¶–þô–H‘ôvÃB‘Ó[8+€Þ6Úå*½-4O½Ý˜M ô¶ [’Þæ×ª]oÐÛÄ”Z ·0ô2z›ñÚ‘Þî<ôÖÈ®Ñ[HoËkÕn”‰­/ñí·0\Ú*ƒáÂÑ.,.S|Ã5X0\¡ —‡d¸L©E†KÿC2Ü‚ýÛ$¹ $¹ªG ÉeF-’\X É¥‘.-ïLMÆ~ì©É´?273Pyn2¨e‘›Œ÷ç¹É`N€Üd|O™›Ìò!7Ù(ê¹ÉÌCaQõ¾7ÈM¶Ó–A¹®%3´ÜdoŽs­5˜›lǺœä&{…sWMŸc4—ÍÀüdìüžŸLk¦ëGYz²×Pw ¶ìdô09Sc‚鎀ŒÉÉ>ik;Φó„¯IK*‹´dLGÉüd+:(hn¤Æ”ÞÄüd+Ó®!KÙk[[KüÉ,e´Ff–²õ1>[~2\ å'KÚ­ùÉÖß«Ã]ß~žIÊòèø½‡¢¶ŽsÅO™?µ4©ñ(}üô??üé[pqð­7ˆ}Ó ±ï§.ä›õ×Uw÷Qîø{W9ºsnóFÏ,:Ìç+(Q?/àøù]ë_œv^Àñ÷¼‚g³?žüùlnz—gð¸€ãçw­ÿÑìÏ 8þñ‰+xœóçÏ\ɘm|Kã]g+mÌZòŒZ‹ú×O—?¬ãy\øÛ‡oÿñ#ùÇ·ÿ|ûðï?üÛ‡/­)‡ßqJã†sÿòšò7ÔT÷Øá˜R|g¿¸¢ÊŠþéë[( ÓþöáO_uƒm^å­DúÂeÄRy/éVït4_Ë£æ_Þþå­ÿóÛÇñŸ?ó^ßa´¾>ß¹ÓŸ…÷­å.óõv¼ÛÇ)Ò°zÝé½>ŒõVwüê]s’é=Ú|Öîu¿G›ÏN]ou_m>”XÕî_: Œij @=Âé1 ´%ar@¶í>-oÏÑ䫇K«f¢·%ç:âù{5ûQMúõä=(ÁhðQOþL=Ùëù:òc¡eIc:ãCÐûZÇÔº{ñŸYÃýüß?þô·¿ýô믿þôñ¯¿ô?ÿõwÂo_´9.Çâ½ê*Nà#FÃ}ŽÅûM„‘H,Þ_›ãž$)‡Ë©«Péwž;#r’घ§!äANLÆt]dº1ÊÆgO¶Å%Ôå*Z˜¾¬Ï_õK`Œ,ÂyÜÝ)§h{Ñ©~®›È)–ªZññûÓ¢ÛˆoÒ ÓÓS·*Àˆ²SOõá¶ê¥§ˆs  ÌU|&ÛÎÜ&µ_zжc{~ŽUû'‘ŽÃ.uQ”œzŠ6B\Y˱féÖ‡«õŠn;æÄ¹ÉöÊÖ±Ú•GôRжÃ.~}J)ZÆÈ-_RŠ[Uå’RDUš™»ÕKJçô#ÏÊE·Ž%Å(:¥£te37¡ºm‡¯EޤÚŠŽ6TVLXÕú6àj™ß¨Ü×KKÑ:v*EÙ©¥ÇÍ_¥KBOÕl—„"ZC–ŠrÏ—„Â[#Vé?KÐŽËôŽÇ¯¿Ë^–hFutŒeúƒ@Çq›Ö×/ E›lë$¡9–éŸ:nAsóÄÝz&-µ/—„b¾KÒvA=qNíÝ{º$Ñœú¾<çz™VsôÜDGCê=ÊÎ Žšä×t*(fû_@k|›.ŬNÚco§‚b¾Eë—‚b>îkÉ-ÊNÅlÇ‹­“]<ÚQæqãˆKA1»zÿxÛ¯u„è&r-%é×)Ÿ(;ó:¯¥È‹ô—²¸üø¨(‹tbÉÚÄen-G_Å£)‹lžý¸IEõ’NÜŽ“¿8NÛ"ȃ@Çqbi^B‰ô$Ðq˜¼ReîO9FÔ{Ù©ˆö—Wª„Âç@ÑöÚ”­´s\ÅHQboV{tºKÍWFx>b!e9Xô<›4ˆn`kcäÑQ™›0ó¬a¶£^D»î%N)+|%š¢ƒDôNÜú8MT'ûh£ì”NÄq²V]œÆ°Hµö9Õ+¸Ê°*À¨„ï胷sF@—ÝÍ›%ÉÆ¹Ûq9"®Çwnö½k*I¾íq±šv^g½ó]_¾’D•6;Ê%ñˆ²SAq«OÖïâ8Y1U×"?†—x·$+bT0c#y [ìn.¿Œ÷[s”Ä¿"[ºmOýëÓ¸ð W³ª¢ì\1JÆÎ1:•6I*cu~rãœ2+5\’qu‡It¨sس»«!-<†ÕxÌòîVyµFÆ%éϰÕo¯]£JÌ­d»Ä"ý¶rNeçü"ιKk†IH>:*œÖÆ@tÞPœQ¿2fQÛvŒª±)Ó1‹:/c"Jš(Û¯¨5ãcúäóÖcuÊ'¢¦U/°\ú‰9¶\a|ia'|«VÓ¥ Ÿó9½ø ê&Ûð£ìÚþ[Æê”OD3ÊÌqr}Xf[Éh5ÊD>‘t÷}é«Ê'V<šXª_ÎQµ 't,g«È(kõë9ª¼Å11Iç¨ZLÎ9Ò9ªÂ¿ºhÜ9_,Q"ª;ƒW¯¯_qVôYw/1aÈçØŠ|TÁËÊoÇ·A¿à·qúÞ+¿s*$T~÷.ßyðÛ¨O&¿à·Q–õ:/~§ýøí¼e­‹æNè³Êo£©%–¿ÛÓáð©—ÊØˆ~gÓ å·ólר¥ü6ŠD±~§ÔPømÜ×%¦¾3Ñ ¾3Š|øv¾ ®+¾µs߯95 U|k/ð­5&ñmΘ³‚âflxŵq7º‰¢S¥¸q\ÕkŠk]7îA'Bqã0 @qç "Ñ©RÜùÈlK)î<§ÌU”âÚpPÜ8Noãr´ŵ×w¾7ZvQÜÙO$´VŠ;{ºà¥¸ö*‚âÆ´ƒí^[ßb‘§¢üv¾Š‚w”߯q:UR~;ÛJæÊom\¿õ6V~eâÑ~;¿2UR~{»Ná·6fƒßÚ·ü6ŽV~Ëëô”ßFÏ4£üvœòͰídQlw ,:U ¶0ÛÞÊÛÆÉ„B)µõj;‹ä=QjgœOIqí|òÒÓ×Î~+ãâÚy¯R‘âÚù$®7Oiíl>%²BkíCZ;;š¼zJkŸOI!­÷=…´ÇŸ ›µØlœJ>a ´³ñ„u+¡õF@‡é@'€Ö›\í,À œövJá´ÞeŸ Ö:µ³ýd˜¨ÍXP&¨Í…—qÚÛU¨OW>ÀJjg˜&6%µóýQt*¨Ö_EµQ¦,^Q­¯ŠjoÇ ªõ7OQ­EO`µÞ³•ÕZìV;Ã'!¤Êj½?+«C¯6eµÞÃÖÎ{—÷@`íóRF;¯®Ê_ £õ«SFëãÃSǾ턦ÂhoG£õûQFkSe´þ5TFëÝC­ iç—K¡¢@Úy{LÒZÜ Hñˆ|}Òú¨Ö¦€´óÞeF!ííÒž¥³>Œ(õwBéìü¸È7Yé¬÷¥³qØçEgmÖ:ëÕ)½• í(¼þIg}TR:;ïZ‰ªÐÙy‰&*½]Æiý©(¤Oåú)¤õ7TQm”=9§@Zk]@ZiíÛH;£œk,¤µù(­Å  ´3x¾º0X휫j}Êjs%0VV› ¯°ÚÛu «µ)X­Ñ(eµœÕÎxU®‚ê ] Ú9Ñ“Vy6°<´oM$üu¾e¦¯©3ÛŠý¦¯U±$˜m8si™ªlÉÁlc-R30Û­‚³€ÙFô-¨ÐvÌP¦ÐvKÀH€¶~œB[:©ÚnH±Lh»@VhªáUh;Ê)¶ 3Š‹û“Ûn E¿Êmc*%¬Ü6í®Á½¸møb¨(U¹møiUl;šJà2©m†Bð ¶iIµ“MK‚Ú¦LPÛ´«Û& –ÛšìØv[^cÛQ·bÛôÈüN^›:™¥òÚ´S7«¼6a©¼65r^åµ HÉkGÿPú ^»­:I ¯vÔã”צN ¯òÚ+'àµö€׮ÀÊàµ[¦dUyíÆa‹¼vÅe‚×nbî'¯Ý6HÃÉk9€×F™€o[ëȶֳØÆÉZˆ­µ1‰íŠÕ"ÛQ¦Ú4Û-S{ b›Al7ä3!±åC%±Ýªª/l¹¼B`»mà¶Q&6€í(Ó€­ŸSm|7T3,À6>arç¶[;¤¢Zv:¢Ú­`¦T»(kŸ¤–yŸHj£u/)Hí(ÒY.I-Pë•)¨eâÔV’kµQ&è¸6Î)8âZÄĵqœÊf×n(…¸wZËž X»ÌÆk­Laí¦«Òdµ~s`µõ©¥½~«|Öf¤Êg7¸o“ÏFõ2c"¨­d«µ•÷jù"Ôfè÷jùÒÔn+DJj9d©]¨½°ExeÀvÃbí¦»wl£QUM b»«ƒØFƒËÄÖîÄ6>óò~Ý®*¹ ºÚ[0ÜŒÏ. c¸«xºáÆ—Kz7.ËÀpW\%î ÚG†[€Èp Zód¸è=n¢W.¹.×@pnÄ" …•àZ¯$ÁÝð–‚àn Kj ¸Q& Ü,‚3¥ÂJpãþ„’àÚqJp7îÁÝ6’_\~ÉIpÛ‘àú½ƒà&õ"ÁeÜ‚ËpÑPîF ”ËþG”[ðø”»"#ÊExl(wŶ¢ÜåùMÄ]ð-'ĵ[ÄåÓ$Ä]ôãJ†‹).§d¸Q&… —Õá®X‰ Â}ü9ÈíHNr› ~ À]ôÓO~› p ¿åƒ$¿eû’ß®”Â*¿- à·«Ðä·Þ‰oWÞ>ð-ò¾EBT÷ü¨ßZ™âÛB0 |‹,!Æo¹Ýü6ë÷Î(.¸“Q\¤—4Š‹)—Q\¤ž$ÅÐéZ#ÅéØ5|âÆ×ð ÇHq·Mñ(!.–Óq7ä´"ÄÑõñ%Ľ—]7¢¿+<Ä(ó‹IqùB’âò{IŠw~vç qG;ýÎäÏ…â®ãïÿ^·hbw‚“w53„àÎXJK„ßÂd‘ü6s£<ømnä¢Êos#U~›±m“ü6?mvIns}Mn37¡ƒÜfx4›Ü¶¾tJˆã¤±(»åV~ên)„¥î6ÎÔÝbÛ¥én Í »ÍtQ€î{~Ip]¯{ènùX¨»µC”àæJ Ý­]„·• BpãR¤÷Px ÿrÞ²%ApíäŽ254ÈÍÜõk}Â[3$€ð–o…·ŸÐÛv,jPo»S«ü6«é&ñm¡ðí(Sì®ø¶ÀÓô¶,PõƒÞ–•N Jo RÄ’ÞrÐzÂÛQ@«Â[¯Hám\»ô7ÀÛHe#Ôð¶¬Ps+¼-ëKµmY ó»-+º)ØmÜLâÁnãœÊg•ÝF3ª‡²ÛG²Ûòtÿ$µ#¸(¶˜ÄŒÔÙª­Él;åÀÙîPÈPf«®Î¦²…%/U¶; ²Å÷ÎT¶ô• ÊN¦²…Á÷¥²ÕM&²Ý]€+*[˜wÝæW¨lwÌ©²Ýi¡ð¶ ámÁ:ámAbeÂ[„·ÖÇo9Þø’Þòí&¾õãß–åµÖÖ¯Sù-ßoò[{OAq9ð’å–?e¹£HûX.FI¢ÜhÍV sR:Q® #@¹íHrý\Jrý„äZãƒäŽÃ@¨•äÚ€’ëw $÷ìÀpy_@¸ví@¸£L.óÀáŽG©ƒn” yµËT‚Ué®\ëÄ ¸ñb¨6Xnë5.{œ\ë ¸¼ÜóÁÝÚ%èÖè6z*}ÝÆêµ)»yêü ðÖ.[Ù­õ5°Û‚“d·QöiŸÇ€n£L>j@·ÓÕ™ms™íy€ÂÚ²*¬-tC¬õ+SXëçTXëÇ)¬µQ°ÖÀÚx÷”È ¬=»0-JbZ»i`Z¾OJk/(­& ´öÝ¥õs «õ›UVëÕ)«½ZB)­½YÀ´Q&0mÙ$Ó–„ ˜ÖëSNo²à´£LW!l \÷ l yl­‘Al M‚@l9<ØÚ' ÀÖ.S­}<lÙçÀk9« °åæ¶‹^Ûhcß¶öllí™*°µÎ `ë7­ÀÖº€-C[>R[<6òZ6y-/…¼–€¼–·N^ëÇ)¯e³Ø–¬CmQÓòZŽ«äµ Ékñ‚×Úaàµ~˜òZãäµ^–½šo3HXEuûU~¶²£Óiíˆ2М6ä*}UN›Š®»Ô®¯•¶áô¥ðQIí4QÒS © g18Í ©mm{ú­*£m{ó•Ѷ} =mk%Rk^dæi‹(¤6¼¯TßGO[Ä{ô´C¸n‹‡§m ý"{„§-­ái;¾Bº+¦¶AñÔH¦¶iæÚÖöªµm$EÕã`m›f§9{ZÛFÆ 5€µ-6«ÒÚ¶cKŒYÛ& 3ZÛ®Øe¯Ö¶ 1imÛ××Ö¶6I´¶m;îÖ¶ ™ãimÛ°‹ˆÖ¶~œzÜÒg‘·­ÍÞ½åõQ[l3[ÚJÐã6Í<’OFó¸Ešwó¸5L Û†.[$í¢ÇmüCá&xÜÒÂŽ·TÒé–[IátkFbpºÝ1¬Óé–.Gtº Û,»êtKW%8ÝÒœN·á?¨~êt»gºÙ*½]¨Æ½](¸9œnÃLA±ZÝš!ðíB ð-²œÃé6L+UB«N·§Í%¸í‡"rÛQ¦ ·aN¦>êq»L×Àmgï¼âppÛ…*#xÜþ˜0·µÞ `»$i[/SsÛ´LÍmwþØ.Ô½Ø.Òܶ_ª¹m¥UíBɈ­UGs[x=ÍmÃÑNɰÛ…% Û‰ÌÜv£rXÍm›ö_[¿t%¶Q›²Ý•&5`»k l—ÙÈÉv™šl7aYŽp—_UÂÝÂlÂ]/S¸ËO'áî‚}Û„»‹*¬Íý£¥¹ßVXÇîÆ¥ˆ¨p—_qÂݘ ªrVà.ã,Â]ÆY„»üÊ´9x… ß·ÁÝßi©Ð®hþ&È…žœ ·dÈó˜² 7 лmØŠÎÌḛ I®e'ƒ$w)į zw2gà^ǯŠ{#EŒ:(î ç`Ñ~÷×îí Õ°Š{[§âÞ¶Ó!Aqo3[ŽãÕ¶¸7"I–¥¸wüÝÒQtáÞÏÔÔ@qoENÜ[êDºŠ{k‚¡À{GÓ­Â {+;po]Á{+U”À½uu¦{âÞÂÜVÀ½íñ×JyÛBù²RÞí_QÞrÝ^”·txƒòD+¤¼µ+å> Ž¶Jy+œºIy[8[VÊVÖzJy[¢ÄV)oÛ±VÊë[(o&—ÂLfLZÊ;ÚA׆˜É¬¢?‚òöBˆúÌdÆTkÌdVHr‘É,Óߩ̪âSKeVÐÊ,»WR™Á+ÖR™å×”·2Ë/NeÖ•-/Wì™ÄŒ‚R%¼“ÉÔA ¯Ó ^j2™Ë =ŒðBUm¹Ì,r™Aý ÀKeoré®Þ»&=•™¶&S™YÙ‘Ê̽¼”-!—ÙþÔN1‰ÙÎldŠv vfí­#‰Y<0uÔÕ$fó¿f¼*Ó7¾ËÃÃ,èLš‘ÃŒ ­Æw€Ç ó¾ÐåÎÜ[BªÃl?J¼‰N¼¼v6Í^s)y6È^fé¬ðN¸¥éÄ.Àk ˜½ÌÙË ó{ö2(=/ßÏ^ŸYË^¦¯ð*˜àÍ„ÌÈ^fÙÕ4{™—ð2;o¦ÄÙË*Öe˜½¬b¿¶#^¸$ñªë•%/cº&/ã o† ”„w#5Fò2f4b 3&P"áE$o„×Áîñ$,+ÊÛˆsÙĹsü3oóæ0oY…'Ò–m¸C¦-ËΚ¶Œ"2¦-ã@šK"i.ûî—'.ƒ‹‚\Lîo0Õ!os1oŸ;i.×#Is™Œˆ‰Ë*V˜¸ì™ØˆËvjŠ‘±lGggÊ2¾$HYÆ!‡)Ëøý`ʲ®±HY†ò+crœãf,.ã’Ì3cÙ†o42–mh*f,ã¸ÀŒeìªLYƯS–m%”ãÎaïÍ®/0W™iz‘«ŒëƒÌUÆÆ\e‰vÈU†¹¯å*ƒðâ–« À¹Ê˜]¹Ê˜#’¹ÊÒ+7OÒÈ\e”ÌUÆ]sÌU¶‘¸¹Êv;™ªlÇǩʸÆÇTe›§%»2–AƒcËøL™²,SvŒÄe¦PEâ²BW$.Ó i–·,S,‹¼e™Â]ä-ÓdŠ–·,S&Œ¼e·2É^ùí²òî”ßr À²—eÊ™‘½ÌnÙËØ,È^F4³—!ª·ìe”%3{Ù¦˜ìe25þ67…Ÿg³ô¸§·?¸ê“½ÅOmb¬(}üô??üé[(qÀ¬7(€Ó ð§.ä›Àô×Uw÷Qîø;WÙ7zþf ßû "š>.àùó»Öß§€ûyÇ?Þõ Íþxòç?šê/ßå"ÕÛ@¶¼=G“¯™YÍ6Áh³©ñ¯[5ûQMúõäñë1ãÏ#dïù3õd¯ç7ê(sµjëy4ü¸‡h²µ¦5w¯#þ3kø¯ŸÿûãÇŸþö·Ÿ~ýõן>þõ—¿þ9ÿ®Qøí‹vÌÕ¹ÄzÈ)ºØùˆÑ ’4ÇZýéÑ4ýW¥úÃ.¢fèê˜ÎŸâã ÕR”˜$¶'kⶈš"Ä"˜i’ßsn•”'!0Pƒ3U¾´M Î fU-¦þgª¶ªšŸ&V„3‰ÈˆZ˜Y™Ú¶Ç€ÚÄËr;ÖÖÔß ÒŒ(3¤í¬±oê28C‚ÁÚvMPÑt‘°öHvfm[¡.ê¢(±l\Q¶Ÿ–¿È†WcÅþ²üE¢©+öš¶MŸM¬Ø_–¿ ©XwNÅÓƯ/ƒ³H@¥‡T&V©Yo½õ,<¡•9M’éÄBéeú[±¸‡;Ó¶±!cç28ÃF¼:¦è’Y˜çÜ“fnðv Dt™þ6aQvª ¦“ýµZXƒ…_¦¿È(YwqSœÍ"oc¬Õ\¦¿]TÝ›fªèPçÅšýeú‹DNQ&™…›®LÔX³¿Lq--Öì/Ó_ä¡jAf.ÓߦŸê¹ÓôÍÙæ6‹ruZ¹½¶lš©¢j§mKÖL]û1‹¸f|.%SESŽ4?¥—éoÓçѬš¿MÐs[.ÛêÛžaI‚)¯ÈxK¯û´75Ê$SEÓ•³¶®š©¢ëjt[“fª€}} Éà©<ŽÛ»¤ß-î5› £Qv¶ù´…½ˆZ¬ßŸÊcfµn«tý™¨JëkšªÂÎÙ5UE“oYI¦ ¼vã› ™*š.ù¶s°Ó¸é"PK²ÏÔ¾–ÂM¨œ]V†õ(:iûì–ò†¤MóUtÕ´”5_F¸›ÚÛ-Û˜i ¤+ˆ2ÉXÑt &ÿ’±¢é§¿¨ëJYÑõëÞÒ®)+ %l±~Ùó…ÛVMYÁ—8ð5™ŽKš²‚ýrš|K27ÑQ¶p÷Õdn²Ó³þ²æØŠæ¬à»51È8«M­#‰w¢xbEÇYm³]³V¨û}IÒŠ®ñF I]Ù-RÉ]¹Ü´Iþ6˳Úò¦ ,R/Ê$E×åŠ>©šiXG±môª&°PWÿ– k Çúý•¿É[¬ßŸù+ºF_-–ï‘iXZ±,š¿¢ë*X›ŽAç ‹qScoý’4¨6"ed–µˆV²æ¯@ÊÐ]Jþ $íŽ2É_a÷P™zX_ºÒ4lBŠÄ" kegbYv‹2É_€"H—$°xæq -,ÒËf…)–ôÃ:†Æš>ÒK?Ž5}cµ±bM_Óã¸òØ>A ´E•{–£6t­âG²)n¾üÏ`Äeçö9dƒ3uÍ> ‡P¼Jöadõmcb…ìÃ2é 5ì«ìÃQö*ûpX—\z‹(ÓkÉ–}XË ³kÈÓêÈ>,ÓÔ(Cöa™Ó„2Ù‡õû‹û—a¯Ýƒ|¼,fК}Xï¡/Ì>,œ¬Ç%Ë>àkßœÝÕ˜V]ûæìÉ„¾öŒ^3¶ÿk(Ï ªx åEµ/!š½<ÐâQÈËÚ»l—ã¬(ÊÎírñ>èÝï²]Ϊ aä´r>8ÅCgÐj½wL¦®írÌç<… WÒaìb™ë¤gÐÊìª{|õPí…± ¿ÈxªCK,è_A+ß÷Xп‚V»÷&Ûåò‚É æüY† aS:p}gb+åµ].ÚìOç¶´üi:ðس*t@‚˜Ç6òrôP½…Øä{m—ó²,Ûåò"‚Øsí–cKGYtUcûþµ[.jº¢º¾4Ù-ç§“ ¬5rlÚ>·Ë±ŸDѵ].»‚ÁØum—Ëpþé«lLõ6ŽÅü3h%8˜Ú»3h ît}f¢ìÚ.çõeÙ.gM¶Í@ŒwrngÓ Ä8g³ ÄÒÖ¦ˆñþô1›’䛾sçà•;Æ€®ˆyieb½Î´21Ê’d æcHSËÔˆDßn/MAŒ|åsM`"Ús¤þ¶ýrGâŠßØ&g\w½¢’Û*,'Éu;Äì»m°ØmÏ­Dºã4«H7ö9 ´ÒmØ.¼ž pi.L€[°O‡—ž¸¹«Ø—v¨¸v_¸t‘&Àã„ÙàZ{àF:…¦ pi›K€Ë”¸%‘¨*Àk‘¦À¥­(np­O .“àêž '¸Ìà@‚ˬ ¸‘9@¦$ ¸ã2u‚k½—)-N‚kO7Îèé”àZOÁ|8zN!¸È*C€Ë\¸¾†¸‘ˆD¸.’rßæÎ~ko$ø-“›ßf˜¯’߯eÊÔüÙ{ˆoã2¥ç߯aöPp›±išä–9¾Hnã¸k)›ä–Æî ·LèBrkäÖ†/ÛèãJ['¹Í]¾¯·Öî ·‘€I0ÈmÆ>N[ëà ·ö ‚ÜÚq ·ÖT ·¹KƒÜúqJnÙ*·ÑSå¸ÍØ%Kpkà6.ó͉­½ ¶ÖAlý"”Øò¾lgÚ,ÒZ{ÈJkŸ­6wqÌ&£KR쫌Ö;íãü ³ Ã7ÉlÖíèFf±‡‚d¶Š͵ÃÐ, Ð,S“šE2C³—I4ËWƒhiÒ Íršµæ$šµëšåO4km4Ëãˆfy@³&Ô†fí8E³èVD´Ï ±ÁÙ†¯<álÇBÎg9¸Ò²g‘Òvã¥åe+¤å+OHË‘¶a‰–Á!m×=…i9ÂÒjº1c´ü8цL^)2Ú®³[c´MDÑò‘’чÅm×õ0c´MB-C´èý†h›.YÑâÁ¢mÀVD´ø&£m2gºmW®cˆÖ®ˆ¶©8Å-†"Úç{fpöüg1–œÅÇÈà,:ÁY;pŸJƒ³Mƒ3ƒ³Ë Îâ}38‹oƒÁY«Oá,sžœÅdÈà,¯“p™l–— 6Ë»#›åS ›mºäjl#̓͢1¾5³ð×y™ÒŽïèù>|B|»¨ŒV!m…ÅÕ·²dAH[`‚ySßB) õ-ú€j9æ˜úvÓi²©o´ÅTß®¤«Jl©ã2ùm"]5Y‰í¶‘+±¥4ÄÖ´£á•ÄÖôšà¶‘2\ÀD¸‰À"\ØÔ™׎Sn›àý{rÛ ¾ &µ2å¶©«*œÜœ .<›‰mS× ±m"¬·Mï¸mR+k“àÂiØ$¸tf¿Mº ܸ›~èM[Á¹©À%Á§7ëZ§)p ©À…½)p %±Pà" »)paðKnÅ”Œ Üâ î‚™¸…4ö)À-TC€‹Žgܪ‘0õ·p§þv¤þ¶"J¦þ¶â¾¨¿…/¢éo©q¦þ–beêo±ÛÂô·MñšËoU­zfG*VzkŠ9—ß*N¢ü6QB ýmÇS þ–]úÛ HýíŽIô·ÔR›)n5ý-¨/ô·“ŽS«Î3&¿­„°ßÚB~› }U~Ë„òÛ‚ö§üvE×£ü¶’oB~[(O…üwNõmV¢ú–­BõíJÈ õm%-†ú¶PÑ õíãWÝ–ïÐV~):[¦ÔÙÀÐZk7èlPhêl G¨³µSBg»’ò>t¶l0êl ½Ú®D²Ú¬9@h Ó^Úª‹½él ^êl©m§Îv}’w*lílPØb±ß¶H„` [v.*l‹.¦P`›Àö(°-•R`Ë.G­[¾ÆØrˆ¥ÌiK(³}¾Ô×â–)¯ÝÀl(¯­XÛ8ØíÖÐð€¸[Áàˆk2k@Ü©¥“*ĵ w«P6âÚC¡¾–ªg@Üð”ÆÄa^Æd@ÜQ¦ë €¸[ĵOõµ˜ân*D@ܽR+ÄõûSˆ»a Kˆ{/}m¦*V!îQ nI¸Šß| Àrã³)ü,7ÞS‰Ÿ,wœM>|@¹ÜH”ë'S”k#²\ÒÀr­ ,—ó²\ #ÀrmˆËÝŠnÀ$ËÝ*Ö$smÞAÁ-ÇBÀ\²s£L–qsí™æÆwTº3`n”) T˜»Áí4—ç$Í彃æò1æò«CšËQŒ4—½ìŵIsÙȤ¹|8¤¹ì| ¹$#dºÖŽ`ºÖ `º|¦dºì dºöLÁt㔣*ÓõëT¦k·¦Ë½dºŒáÈtùþ˜àvys”ßS¹< Üé>+UQ.#?²\k~e¹ÖÃÁr­Ár­åÚSKWþ­w9!î:þþk®,nÝî M$ ®&À&.´C€[ôËO€KDO€ -ñÛŠ"3OÐ-àÄ·ý€oM½kî ×pr£·*-$½ÅV³P°ã`¡@Y0àíNûsPt¼…¹¿Á[á-_µ'¼EÎÙ›I‚>wIx©µõŠÞî`Jî’ v3î’ ðÐ]ô½%½5sIб€ —½Ô]®¨ín’ /._2\Žtn’ã7IPœ †»»& z{n’ ºB·JPÑ›Y%¨Š×œ v5§ˆdÍ)AO”k~ @¹f¼=n%æ…iĨÇåþuêq"K·Jqœz\³'0«}õÝ*á%ʦÒ,n• ·çV ŠH©Ç]qQKà êq±1ùf•ðXPq“UP‰ ¯ qWÊ]!Ä]U1.C&Ä5ßóJÐæp¯]ä?ä¸fp9îJ 9®]ä¸ vgAk=.Ÿõ¸+Y,ô¸PBÝLô¦©ÇMª• —Æ[î™?èqÙ­¨Ç]‰[¡Ç…k¢éqW¬fSkM=îúã|BˆËÝn–> %..JÜ•ÈJ\#Sâ®d¾Pâ®b^RÜô)nÒoºIq‘óǤ¸›ª’(Å÷1Aî& Æô¸<ŒzÜ ¾ Ôãn:š—ÕAŽk§„wƒß帙XrÜLÈq7ÆMŽ»é›oè¶@èM9nQ!‘Éq3Y,丙àzÜü’áÆaú©ÇÍX6 7«@œrÜŒUŠôhDéÄßf•°Š÷k\p›LæãÖªûTˆq “}€ãn.¶ÇÕÜ"ä¸ðÁ0Ž›é$kB\øÒšW7±»¶ ¹E%7Frá¯C”Ë•f¢ÜXP+_ââZ Ä}~ƒ¸XW¼)pµ! q7)Hn7=*½'à™ÀsÒ3á¡6¿5ïå·Ü\N«„JÝ+¬øÑ*÷é”Puc+ØxtJà©`”P­`”PA­h”À›¢QOI£ôú$ØaðI¨oôI°ã`”P¡ë£Q­LŒ8ØÐôK¨0è¦__&ú%phq¿yxôK`u4N`³À8¡âKN+ƒ…‡aZ(Ø¥ ‰˜™ÀÍæVnÚ\nu• ì–ì¶!: »ÅäÈØm%»å¸Jv[¡ %»µãÀn­ ì¶’ë‚ÝV—è »­ƒµ­$Á ¶v6°[^¡»(hLä. ºàCx[©²UzËnBzke ·ì‘¤·RÛ'¼­t®¼e[Þ²‡ÞÚE(¼µ"À[vdÂ[ ¡d·l²[fV ðÂ5+FÈn+q0Ø­] ØmÅÒ…{)àÎÁnÙbd·G'µÅKoð¶‚®Þ6…à7¸à*¼Åhw³QPHxke€·•&³¼Ås1x‹ˆÿæ£ üû棠RXÀ[óŽ07¨]ÍM! i¼…+•Á[¼…ÏèÍLA&Foa r3S@} ·˜[½5³Ð[:x¸™‚ÊW¿ÆLê\Ð[Þé-ÞpÒÛ]CP£·0g3z»kÐM|.kêž[>c×Sû6|û;ݤÏÞ´¸öÔâR{7…Ê@BܪRV2\|Æáb:K†Ë•p2\K䆻m"‚á¨f †›vš7€á®ÈF† ˜1ÜBm W"Qî\‚á¦Fd¸+­ ”áŽãU…ᦪ’+2ÜÔÐþ`¸ã8èjᢙ(ìÉpÇé´÷€á¦¢Êdš(pQ$71ž’ÜQ¤{û颰@›‹üà¢Ðà¨A…å/xn4ä+%nêª8w$NÏ]°¦ž›vÜyîBá/xîj*r±ƒÜ¹+ŒM¨ÈM˜jP‘‹wêËm° –‚ÜQ¹ëkžëWùäÚÙ ÈåNW r”äÚ}ÑGáu³¤êqi¿à6 :¥Ë¦ÇM Òn£¹+¹ ”›‚\¶}˜E‹> ÌGYîŠ:e¹+|!ËMØ€GY.Ÿ:e¹+=f!Ë]1¥ƒ,—²\Z P–‹žNU.w—‰ÂúÚ×Ú‘¢Ü•|[E¹|¤å®d°f£ OQ.|^L”»bræ6 p¬…(71D¹Xļû((L…(we62ˆr­Œ> ¼?ˆrW̺)ʵûƒ4—íBi.£½B–|Ø+$*m¡Ð…™“)t×ׄׯ¾§B—ý„ ÝüÇípº|ߠеÃÌ^A»º<Ž ]ŽÀsB¡ËÇM….oýË]0P¡kÕA¡ËGJ….?ŽTèÚ¥@¡kÍ…î º4«¡B—ç„B7}Êe!a¡–ÚÜæ i.=xNi.}#(Íå Cº»ÑÔxwƒè. ÜtYØ ©¦Ë/…. 03—4MØh²€Ø„ lGz,0Ž Ç»=ìœðXH¯…¹Öè±Àƒ Œ“è±ÀîO~tà±À0‚ liz,$ïŠÇŒÃca}í±À¶¢Ç_3z,Ø}©ÇBzÔÌ)=82Ñc÷E…Õù¯x,láÂc!ÌÓcá0Ä¡»ÂúïZ' »Ÿ4ÝΚી˜¶ +sŽÁVáy23T@œk† ‰dT q0 Vbd* 6CL¿.C…Dº Gê=ç:÷5Ò<<îôøyQ•Þ÷®¿ô£öùÓ»Ö½÷ëÞŸ?¿ký³½Oüøùw{LeÛ?kŸ?½kݳ½Ÿµ?þDý3þü™ë(,—}ƒu‰™}ž£e«qAÿúái×ò‡uü/‹~ûðñí?þa aÿøöŸoþý‡ûð…õÔ¥…¢m{7ûÅ5åo¨i-‘Fºiæ—×TYÓ?ýq}[cZ¶¿}øÓWÝᘼŒÂ‹YË6¦}ã†oõFn¿qúGÍÿùí—·yëÿÏÛŸÇ2ï÷Fèë³ð;íù)xÏZïSÌßí‹4b‰[Ýé½¾†õVwüê´dy—VÔž>Qû{´û£g×{íWËϱ%ö·õ/[Æh¥ŽŸ[ŒÓ±Ôs‘ívÓ–·çÀòÕC§U³Moœë˜hÜ«ÙjÒï¬'_ÿ¸&õäÏÔ“½žß¨£F¾žÞjÍõ-Ç*eÓ–´æîuÄf ÿõóüøÓßþöÓ¯¿þúÓÇ¿þÒÿüûFã·/Ú×›ØõL‹³cÚìãFƒš>lÜlØuòÞtcKä#F&_M°Üen(5d[g—t³èšâtÙÊ2òÊ ³g™9׌3Í ûÀ«ñËkM¯bëUï—ÌpjE$éz¿RŸD‘¬Ú÷.¢¢ð{SÕsÖîØxÙ%gì!Ô»Šeûn—¦ëÚ=–íOôÄ„¶=–íÏybäÁ½Zcß4alѳ¾gYÁ è.3ÿ]ÁrÙ`¸!?%«º ï¢°ˆT š•=–í)ØdãAûs^V1¬ˆ¢ æ–„$ô±^2&ìÜc½þJ»¼j;îËÊܨrÓã?²T¹J¯›Ž2É"íپȶÁ™7U˲(?˜gj_ÄqŠ©õ:«¬0Ûg”ô’m¼/×+4³¦69J·º.÷D™.‚2óë¾.²4Ç´}_ņÀärQ&™iøu_ÓHô+K¼û*[ÉM£¶Ç÷M5ßB7÷ÕÒZ &Œ2ñÄ®ˆ}­º[ ÉÅžson±Ø×N™µŒt£‡¨¢ì’Ūà5èîiѽYØz¶§U÷Q )Öž%ÎÚic§EÏ©Šî!ÛSÖmQÈÏ”n^(ºÒ³‡¸ ûàšÔw J·Ãºìz¬ª~Ø5ÿ‹í›-©ÛØbÛ¢’Þ¬ëJ»®,ÏM… Ûšqã{Ɇ ÞAlнôúØK¿k‰yœ¼j¶<·aÊCP;C“F™lȱÛk?>wÈáÆºJW±©s§¦ª]V®>ö¼^‚åŒwJ­mlwæ®[Í a׽궉tÝ«× Ó…=c_[V,±GÃ^ƒ, vÝ?6ª ²:„Ìú.$»Kf¼yÊ*ÕuÙT–´îšáןM63 Áâ»&nëz;Q¶êFÑŒQv ÿ²§+«„n×EZ¿…Q&"h>…"úD³Ø#„hÒgµ¯—ª©6|—FT"»2£òøs MƯtçõ†¾ìº1rS5Þègºu [÷ºê¾ì“Þk¢/îäïñ[Ùï<¯uÄŽ²ý0ãëT±C{-*áGT½WQ÷z¯¯U7g„IÏö“¡eÌR®­9Ö »Êæ7|¾Ú"«]e[t?ï¦Z¡½­ºõnÃ÷uLd[ LOÆm©V]·MîM¬Zl÷®iõlp”É~·¤ÑJ«º…ïߘ-‰ GÒP´÷c£ü¦’²ñùÔ ¬IâÖ(’-fyNAFûëæ£‹©ÑªÚ;:æFIGQýÉ‘ìÝdÞE©Â>hL…锣¯h/©&|rzÕ=à|ø½é~Í„Ao²• ŽD{ï²½þ £ÿK¤Ê$ƒã̺ FFû¾êIvÃ=éþ¥„ÏãžÌ»EF”î ¼»=ëÞg~æö¢¡ê¦]{¯©=a¢„³w&ÝÎ2~Ðú±Dâ7„€h÷xo4F…©ì,” Í; WR“Îy£Å4Lõ#7Sám1 ³ª~dÖHí? ‹†ªxp㧪±ªßgÓ`õy6Ù4rûû]¢T¼Š³LÂT„¢ë§§Ê¼q®¨Ǥ‘*ÞÇQ¸i¨šìHì^OºƒkfVuÜeE£U¼x£°j¸êgm¯¢÷ÎÂ]WÙV0 û‡ÏÙG_c·/s®ÍkФÁÓª1«]fZ5hõÂD;¼ £Œ²M£V?pŒ£eò¶? Ó£_É’D¬~ÌF›ŒÏ_õQzJÌjÏaîçGæµß/SV¬EÓ.Q«õ˜-´ùc*=}²PâÖUÆ(cÂh˜Ñ8ùþ,¶dÕò·À³ãÖ¥ø`bpRµYøL­ÏvÕWj+¼"­À(CÂhž pÓp“öÔ?4C92ªÉk~¼5‚”+4Cû¶­sGúŠßØ1G´Û Þ–£V}’D»©¢vÇ?ÔGl·/Ø> ¶;*(”p·óî6î|Ým0ô ÞmÈ÷NÊÛº¢D…¼Y«ò6&)ämíÉ’•î6ì®&ÝmÜh ºÛt£7án:!ÜõS*ÝmذHºÛ€†HwÌ$HwŒYHw¨ÄAwÛ£r…º |‰T7¶+Jûê6Xòê6XE‘ê¶LʪT·q&ªëÇ)Õm„& º ~\¤ºÌNªÛ8]WªËfÕmE÷)’ê6a‘êÚƒêÚ•ë6Nu[&.U¬Û2)²bÝÆi'°n#PÖmŒ±u'HÀºí²@·(è6ò݆\èºm#|U Ûþƒç6Â*ð܆ü<ä¹^ò\k ð\»ðÜÆ™xnã\Qy®µ˜ò\»qʳAà\šÛ85ÍeJ@s'TÀ¹ 3â܆°8·!U=qnãŒ8·qÖ œÛ·ü ¸ “R\¿!¥¸SfP܆¨×n×SŠÛ†$PÜFŠk Š‹ÕeBÜ–ÔÑ”×/E nì‘×B!®U¦ ·!&Ãmˆ ÉpíÞÀp­MÀpÛsú@zÛ0i"½mIoL½IoíÎ@o½Lémà `ÒÛ¨æ½õ?WzÛ`€IzÇɈzk z{6 èm#¹Æm+V–€q»‰q­qqfĸ~ׂq­±ãF‘kŸqF¹qìωq•Û:ÆmjðOŠë‡)ÅõKQŒk ŒËD{ĸv)ŠqÛJH­·i®ƒâú5(Å=g2þãÚ͵G¦ÛÖ'þU˜Û0»$Ìm°¤'͵»Ím°«'Íeæ6$< ̵¶R˜Û­0·!!anCNÂ\»ÀÜ†Ô „¹öÔs£L¾ €¹m!°U˜ãŸË€¹öÚƒæúµ(Óµ&Óõv™P×P—eP—WaP·!û…A݆ä uù ê6d0¨{;R¡nC. ƒº É3 ê6dW1¨{+ºÛžéNÈuuËéëz›‚ëz›‚ëò1×½*×e5®ëm ®{+T°KužÝÛ­(ؽ]‚݆|6'Øõްëd—ÑÝç¸jH7¬‚b%º£¬( ÒõN¤ë·°ëU*×õ–×½*×mšô”\·!7¹®?p݆$FÆuí6€u½L©îí¤Šuí.@uãÀknT7žÕ*¯"¨nêú ¨îí´JuR5Õõ¾¸íÞ-¾53ñ×¢™rWæyŸPîʌƔ»X02å[»æû r·ÂcÒ]&Åví®ªU©Ý5-®‰wÕp‘^î8…†wÓé!5¼+uÉŠwëÂSÊ[á?EÊ[TG /¼ÙLûc'35¼–~ñîØæLo£"Þ¦¬‰xÛgD¼ k‡ˆ·`k+E¼E_ASñVlU¥Š·è*€©x 4ཥ€wQÅ ¥4y/3¸˜Šmä½cö­Q8U¼Õ¿Pñn¤³ªâݳÈwå»âaÊwn‹òÝêFýîmõ»”ý*çE ›(ß]1ëç2eÀÊyG™¶>8oYAxÀyË‚éóƒöš´7Ê„à€öš¨´·,?Þu»V8¯  ÁyM1Î{/»gzl“óz™rÞ²€¬€ó–S{å¼£ïHlÎëgTÐëe zË nÒëeJzíœ ½Óäèšfô–õµp—Þ§$½ñ¸H+è…:9¯ŸQ9o´³ŒKOàk'ë4œUmxo(ï_©v –rÉ{£s©¢w¥¥´.>Pµ»S)«¼·$ ¸à½ÜÂAÞ£ ޾1È,žªÝýµjw®ú@¯k÷½.–¯M¯»CNF½îŽå,êuwê|•õÚî߸g! ÔëîV›\fÀ&×ÝÑÄ”ë²Y(×Ý©¿}ü¹~r)×=šBÝp uyÑêîºD.´M­ËÇEÙî'Ôº_A¤Û±šD‘î¡Ûm®]ŒjsaÞLm®A›‹;§4¾â&Íå))͵2Hsw„ ”æîX±¢6×Ê ÍíØ{EmîãÒ¡ÉÝ©°…&×Êžš\îͽdåÍ9®õCªrÙi¨ÍÝÿOqgÓc9Ž^é}ÿŠXÚÀtYâ§Ãö»jgµ±» te£Qèÿá«{%ç0ntf*fàAg\–DŠ¢ÈLJ/i–…7Wÿ0k.œ-æÍeoþ[xs­˜ŠsÛ‚5IàÜHS0+8·-XÝSœ«_ªãܸ£bYŹœ3çzšâܶ¼öæÚÓçzšâ\OSœkµœå”8ׯSœëiε:Îe…Îe) çrúi8—å0œ;å©8wÊSqî;‰·*méœËc87vØ‚i+Îå+$Ω»úMê²9Ôõ›êzýêzµêzýêzuã¶`¨ u§D…ºm¡ƒP·-´ù*Ó»*NÓL×Û˜®'‚éúÓƒézn@»ì‡Œíú»Ûõ×¶kU´;=‰²Ý¶Ð% ¶ëwU´;=ˆ°Ý©¨Âv½vÀv½m€í¶õ\îÖõ6 ®ë/ \wÊMÁ®W À®7S€Ý)O»Ó• v§+ìrSÿìò ¾ì.Ø]Çÿ-TW¦0Õå⤺‹ý¤ºŽ`Au›®YÖE €^«b€Þë±€xý E¼Ö/ñrAŸˆ×煮W¬³^ÏOY¯Uä“õz)vZt=Ä7ÆPÅ¥J|M߀ø2"‰¯•È—Ö"_³#ùZ£òµgòµ ÈׯäkMK‘ïHëVäiBÜ|í €|í òõëùR?€øÚw âëŬ®Ú¼@|-»FÝŠR*ðµ¯à|ket ¾!µt5€ï9C3ÔËÏÉP/[–¡^ºg õŽ!ç^½0Òw½¿#½l&Fz§+¥+C5(é…ôÒ"DÒ;GI¯WH¯_ ÒëuÒ;]™©aQ ½ÇùGÊk•ôÆmïé“‘Þ)ÏF[´]õNW*êµfð$½S½íÔ²H饻ÊHï;‰»v¼0‚ôÖ'ŠUÀë­€wJTÂ/Wc€ðÆ•7_0ÂËÎÇï”§ Þ)M/{ C¼œêâå g —½Þbz½•ôú“ôNWVÛ·‰ïz…‚ïÖb”ïNYíÖš¿/Ã*¾Ýoе{o;r´{œ¯uc9 ÝÀy¯Ðnà¼{ð$Ú­;NËÚ­ NL ]Z‰v#à«b_E»ŒxI´kgšíV‹7!h×NdW´ËCÈ í&¸du7á¢]^GÇnR›¡Ý•.Y ÝU ðZï MÀ ÿ˜^øÇ ðÚuOÌ«6ý M¹à½ ´…ÝÕ™9ï‚鰇ݕ)x¯yu÷4ôšÁŠñva”è5ç@oxíD+è5•ò^s÷š{¼×¼7î©xSyoÙÝË+AÌè¨òx»ˆp ¼·ì˜Ò3Þ.-bŒ·kÏ€x»vOÄÛ¥sîo÷|ÏŒ´»3ö"íÚm̪«ô˜‘vÏœc—Q¡c—vCÆØ¥E“1vÑäb—æV]%9 ±kÅTÆKç)BìÚUfÕEUñ²$ ±ËïÅ­º ty§4ÑÇ yíV]-§[u¹WifÖU ïf]àˬ«ø””×ÒÌ­«=£»u_ÆØµ§v·îË»îæ]‰•˜×Ò€yù|ļ´žó²–yÙÀ€y—×1výŽÀ¼L#æå[ æå[pÏ.î ÌËr’ó&Éy“:=‰y¡G óò2P^Ëmn़VpP^^GÊË&âÆ]m’¤¼¬`R^¶RÞAHyí:P^v4d½ö š÷TØË¯ °×’{YÂ^æFØki€½|{îêf5WïKäkÃ]½v]½ìÝÞ ˜Ý©Yñ|Û©Yu òe7Cä˽#D¾ˆ‰gÈwaT _vyD¾vïÂx…gE?ù²­ùb? ‰/«Ä—W)ðå®_v”¾ÏmP$½|`’^V"I/wªõZZራÔkùõb³ Q/+‘¨—Ûòž¨—ûêˆzqÔ†£ÞåƒP ,†_<¶ßÅBdî7+ SH|Ñìœøz*·œ½cêJÒ¹ÛŒ%Ù aïÖj¤wJ¢†ÕüzÑÎô.j'rÐkµMл0PA¯_YLÉ¢´•R1H zYä¼øó¢O½A¯s79‹àµ õì‰Jöø þ¼D»úA:âµRñÚ…•*6Hx†É áEßá„×Z.¯U%o² ¼‰4š„× /k€€×j€€“ ¼ cðZõðÚ{&é]d†¤×K Òk5›§Iä÷¡Þߢ¡ßx2ó⸎ÉÌ«§XÓÌ»á:7óÊvH3ófD½…—w}íå­;½Ã |y x/—Ý÷šk…÷r%ï]¼aã†8=çMqoéÄÄŠ{ã,(¥›Š{yxqoœÞ¢Uq/Oâ î´*íe˜~ÒÞˆº­Q”ö£BØ[[xÀ^… N{óŽ` ½#MÔ ½ã3ßTÚ;¾RXq•öŽŽA÷Ø÷Ž{¾Ä½q »L }Ç=$Xi︱÷Ž襯w¤YŒÞK2¤4{ã®ôFG§1‹ôf˜ð zs…âè3_¬6ºdå½9#d@ï¸A"ôÆ¡ÀVA¯2:½# åTÐGÔkáôFµB÷æ„ !pïHÓÙpoNDŸŠ{óŠ“€¾¡a}G9q ›B_…}3Øúæ-Ôwä§a3@}s†O©oN4 õ͈‰Cê› C +õÍØÇAêä…³×Þ6 ïœvkå¼22®Bßñª¥Ð7¾½N¡¯S™ïHÒ'óÍØ¥r2_{k`¾™eÁ|- ÌwT¸B0߬ûȈ|3÷¦ùŽ[j]ùæ•ùfœqJä›G©’õF ʺXo|Qʤ…õFåÊGª¬×‹§¬7Òd• ¬7¸ž7¦¬—µÔ›z_õŽ["tƒ¢^¿NQoÔ¢€z Þ(Ê™!mô+åM¼ÃJy­åÔGcBHb¥¼# X)¯õ ¼öåòê§í”7¯<›M)ï( bî*åz— ”7m0z‚òÚGÊ› pIy£,¢”òŽ[j0¥¼yaDa¥¼ Û¶Hyí òÆR:¬”Wûl§¼öt ¼ig`]¥¼iÊ%(¯½PÞ‘†XJyeØqÈ›Ö센ž‘B^¿B!oƾEB^k²€¼Q@² yãÅÉàÈ;†Z tÈkÏÈMV#+äù©ÊäeEòZ[PÈë¥Ê;’äA)︣w ¼©3z‚ÂÞÈNá«ÂÞôÜÊ))UÊ›¸) ”מ ”w¤©ºåM0ÚóîQ±bÞ„°wOÌ›hæMØòf˜7uÓʛڔ—9å x¤MAy£WSÖ ÊO®¤ ”—ïÛ(oj„’€½ãJ‰¯e°7a_Ø›°}ƒ´7Z…ÒLàÞÔ,ЭâÞdFRàÞÔµk7Ü›šARŽ 6 ÷¦ján÷²îMº{Öhït¡âÞdÆPàބݤî¥â0Ü›v’pàÞÐèZPß÷à» cΩmðw$2°ÒßÜ¥+4ø;48Ù´Òß‘¸+nå©k }£zÂûùÏ?>÷ùJ¾øó|ÁŸñÎü~jîJ?þówJð¸çÏ”¤¯£Ã*}ë¹lå­ö1°”£sê- õ¯?>ƒÅüqÿWFÁß~üòöŸÿ0:Ù|û¯·ÿãÿöã׿”·×z_SùŽœ†,z­lë膿>§Æœþéß×·!rbøñOßò„½æ<ªz„}É£0{MS¾¿eÜþ‘ó_þùíËÛ¿¼µÿóöçñ?…Ïû)=ö=PüîM÷96|j­Oº/zÊèg R[z'÷ôiCd›sß>÷u‡³ÿÒMŸRóGîéÜ?¥æöÝæÜïš?ú˜ ZÛ×ö1-§—mä3:³!gƒ ”>”jž»¶åíÙ¿|sÊ\r¬â-eLFSšrÙÏ\Òo˦IÍ£¾G6åƒlŠgów²8Ú¶–-¿'ˆúZ[zñ?Gÿýóÿ|ùòÓßþöÓ¯¿þúÓ—ö—¿þù·õÈo_µ/¾Ó¶ ÛqŠìt™úÞ9‡ÛÇ#ŽùIÍýq,f{Òã32OÁÙÇŒ¯­2ûY`G¢¸3v†¿Ü˘h_‘7N‚öªÁÖ:O3ï íW(ÂJZ±w ·V9™ßcêtÅ",Üt¹op-q‹ú¾KÄ5¦ûýó‹ð 7¸.«E[7Q$îUbêaåcÞÌxkQpLÇ$ôr0mxãÓmy8,c$Žp¿Lº²2ÒOÒHõüyh ÓºÔƒq]:OÑuŸ‘8Da¾¬Kˆ5÷‡Ã±WÖñûK;Ûo%¢ŠDñ8nÿ÷CÛÎö˘9#-1°…îK_×Xä=Ûïó$ŠñkQƒ3âP‰ûm¿C䛑XÕ⌸î#1 —ÿ.úŒ·Â£4´U¯kX¢.ÿBɉ·Ï9á[Yׇj µ¤hw/üwcR ^ç³bÒè nã]BJYÍÎØ»q$n·ñnå× ÏÔÐX—cü¡¨ñNAÑšOÕàmGWpïp¬Æ‘ϳÀð‘¸ötµÞ!4ŸóuÅ1ÚÑè®Î—»FâJÛ32‹µâ«óå #1Ñö¬üy oÆÕùÆY$‚Ò×\Ôö¼"¼ðòÕùN·­´=£·ËÁT¯6¼"Hì6)u?+}=æ‰Wç;Uß.îçAbטŒå» k\‚HÛ²´a=k¼=úŸñ áG«â!@7_;pÉq÷Àö2GâÍÃaÚi(EZ²®|Œ è~F‡YFß°¾jÉeSûs:ÇŸÌ~÷ÀnÖõ±ùóèô[‹j¸{àÌF¬öÄ©Š÷r÷À4ú®Ú)¹{xýöw /ï:Ôlк²Fð†ôª k¿l[MFb§U£Ñ3½]ŒQ6hŒ3Œ@ß®QB"»›{ŠA\ÐöŒzh×tÓ¤4|g¿ß²¹ Ez­¡|n¾Ÿ-f¨%øŸÕ‰âÆ#±©ÿyå°>ä›pl¤‰âÀîJ ßjÉMÇñ›`+OäÀÈG$_}07ÄÌ­}½eíEðtÛÂc9X *Øë¥7œÐÁÁ4äûÕGÅè—Ø7¥À—Ø7ÀÓ•ûºÅÿv¶Ïm¤­"€ý=m‰gwxâ-ƒy®ÊHÌ<½C#µ¬[ì²Uîôãm›È`›[ç^?=N1o5<Ývãn?ŒRcfv«ažÂ1tÁrç/xÌÉn5ÌxG¢¨áÌ1hLØD {bæÁÐÑcNv«a½öª¡*pDÇ‘xkâXDÓuÌײŸÈ˜’ÝšØÛǾiÔŠ ÈQ4ÑÄe£ÀÙwÄ­ÐÈB£œ¸bCðô´¬¢‰y€ÉHL ]¡‚9-Y¤qÙ oÒ˜˜IðŠ x$ÞÚØj6Y[ÓåIZŸ–&Ú˜q(Fbg }ÓiÌÏnmyʉÐƺŒ<ž‘ÚX—ߟk9'Ë_WÕÆ”ÓiM f‰·6ÆQ3#-3š/,"£åˆ©#­U£Z°Y¥1I»¥1ñ9%®Å†Q0­]¤1Dz´ŠwÏ;ûÑøD{ëHË­ŒýýIÚÜŠšVQÆ8Oi¤‰Qü¸©ŒK)eÆñ-Ë8’xçç,«(c[JDQÆü\S8kîNÙšcx·ôX%hi_³ÌRx ïNÙ>õ0 ßòÆV™S²§W<æj·@ö–3æj·@öW5æj·@öº‰·@öº wÈ"MYÌTiLÕšvÊHk¢ãáetIYàT3c¦&N«ð¼©SÄ*<öÐhŸüàH©,"7µúìLÒ#)©EdÃÄ8EÛ…™ÛÕ–“b‹ödëû-U•qÇÄ/•¦N‘Žq5Å)â…íêÙ`KQÁº?Pg\t0Ÿâo݈N·>êE¨Ü÷í<ùø;Û ?orlá¼hÆÉ?ñó¶ÒGü¼q&Gü¼q[0èóf[+AŸ7Ûë úÜ©IŸ;U és§h#}ž…>wö: Ï–FúÜÙ•“A÷Íø­2èS‘1èÞx+ƒîSV0èNWtLcT³B÷N>Ý›qoEѽsê­‡Ïür£èñ³Î­Ðçèˆô}n ×H¡½_„ŽÏ¥U=ÝV!´w©€Ðþ}(„ö BÇ× #5(4:4 íÕíý`tÝ©s£}!`´?%`ôÕÍB{7 í} (´÷  Ð~å“Bûè ]5Pœû¢w2xú¢­G„®5 ôTL…Е‘Ç¡ëNÙíUíC tôœ*b¡k§Q =ÒàßT ]»Y•B×'. ´¿8Š®ÝìÖŠ¢£¨j|Š¶ç Š¶­©DÑ#6MiÛ·J ]•ÌlÑ~!lÑn˧-š[ZÍm¥›®L°éÊÀsdÓQeÿ`ÓS¢²i„¾74ªíhzJT4…ÔÞ¨Œõƒ%š¶hºnç*€Bi{}`Òž¦HÚ+HÚ/T"íO"]D:Ô ³é)Q‰´}ŠÒþ}H[G ­¾ç¶ÌÀ£=3ðèÚÉÁ£MÁL×'þU=e¥<ºvÇÊ£§r(ŽOP›x´wNàÑS¢òhoÊ£­BG{Q£½¨ÀÑÞ5Ñý,\ÐÖ÷C{Ç ].“Úö÷C×n.h{ÁtA[i邆8p4'+ß‹£— G¯ã¿ÿ&-6×™EžC½ÒЬ(z §¡(ºõ)J‡èì†Æj8*³‰. áH8•$:ã0x Åá`ñ¢EŽ9à¢q(›ð`0ºz0F¹ÈTÙôWƒD{b¥Ê†O†Ñ8vZa“Ã&ÍŒÉqŠwãØhY8ƒqìØ&aÁ8Œ10‡ÉlãxBkÄà0IÊ›qi„âèæBF(ŽÎYCqØ´‹¡86#ÚÅáϾQ]Zk(ŽE„â0ºÂPÔŠƒ#³Eä`wl!98HYH‹ÅÂì«-$†0‹ÈÁ1Ì"rp 6òl30FäàèwFä°·Hò¼Ñ¼R]# ‡½)‚g{ŠȱaÍrø•Ï›y”ãÔºŠc3zä|_ƒ †¢„ÃP`³Í„øƒprØŒžÉa3&¹ŒÂá·5ÚÌUjkrsàfÒJ§ÍÌr£¶æ“7ã»q³fFÚl™1ÇNO ¡³1CBg~ΜØ3ã†sæ•Æœ=Ú˜3okÌŽÌÙH§1g¼b@gcü„ÎvS‡Î/ƒqLüÌÙè©3g0>gÎ|cÎú.9“*+rn ¤Oäܳn9ÛâB g_rö»9‡¶ÒmBθÈÙÚ8³78 g=@Îw§ Øìí°Ùµ`sE—w ›­Ë@0¶5cÍúÚ‰ší鈚m部ٺGÍ´+jž 5ÓNÝ‘@âl©ÀyºÐ€3Ú€óÄ€—S ëœÌ€³ÃV#ÎÐêNœñ¹8›Äq¶é°ç9Q4±ƒlg`$ΔuΞ¥gÄ!3àLˆ»@Ã%êÀ™qJpâìP™’øe$‡®Nœ>œ8óJ#Î:e˜ˆ3¸Ž#g†ér~ÂÖ“4£F¾‰4{"H3ãhiurÒŒ¢;iö @Í|CÍXkpÔŒ—KÔì‰@Ív[ f®á8³Û4âÌ©º!gvÕrf° cÎxaΜ=Qt±‘c g¯GÎÀÊÆœ¾ÌÙ²tä †z!g4+gÎL4æÌD0g+&™³#dƒÏ¼Òà3²Ág^ið™dÚá3®tøŒç4øLzKøLÃ3é3]‰Âvu§Ïd·¤ÏôìC“ÑLš…5 Œ ýN£ÄdÏ–ôL0¡g `uõGú¾«X¡¿)ô& yÏ f>ƒcá‘K$Ï =ò×Çà0Ëéƒã#ðŒC§žãÂǯFœÓõó­²=EÍ8îuBÍÌÔP3#r¬tt`׸³æôe®;÷ŒzÈg`ÈgGN™aÚò˜ÏpÉxägH:ü Æç‘Ÿ¡°OØÌ#Ø&ØŒi·G~F  ü CgLs +­¬„ΕðÅ¡3X-¡³›t®ö˜€Îí£øÏÕâ(t®d€ÎóABçbtй“:W2 Bçòß¹V£à€ÎöNý¶ Î• ˜Ô¹Ò'Gê\è2#u®ü¸žÔ¹q²Hê\8«#v¶û;Ws-;rbçÂîˆØÙš±sá'Bì\¸ôBølM€ð¹2ht£• ºÓ-mõC]Í ]¹ÀD]8%&‚nøkû ´7"h¿-´}ydÐæ7"h¯ èzE‰~Âgë§IŸIŸëëYTváÀAúl]ñ³µ(âg«kâçn‘1€Ÿ›Å´~¶‡ø¹ÒÔIü쉊Ÿí“~¶VLülÝñsá*ù³5òg«vòg¶(âçBêCü\¹JJþì……åÙ–gë­ÉŸ Õáx¶—Žg«:ž­—"~¶2?[…?ÛÛ'~¶J#~öÒBûm¡+5'!´ç m/ ºr9šªŒÚ>9RhÓV¤Ð¦H¡=¤Ð6ªB›$…öÛ‚B[ë"†¶6B mã 14ésuG³HcÛ Ÿí[!|¶·GølÚð¹L¡žEû•€Ï~e£6†ö#|¶‘ôÙ+a§6ƺ:ès}ÚnjôÙ/}.°ô¹|DŸ‹9¥AŸ‹y³AŸ©S>×â@Û×hô™-Èè³'îÔÆ¨¡'†.Y¡Ù†fÿfºiÎÔÆ¨qbh«Tbh¿ºZ|éFmŒ&@ ] }C†´….Pèb`w})B[ B ÙœM£8 Ð…+¤Ð…P”Úo í m­‹ÚjÚo ],Þñ“C[9É¡Cö8‡†Ü9´£¡_åðO„ÚK m}9´5rhŒM†¡ ×µˆ¡‹YCs+†.“Ïyy…, C[÷B íWCÛƒC[Ó!†¶Ž‰Ú¯~þí…‰.ÜHCm¿Næ÷‘èß”CšËÌ£íÈv¡3” åX-´3œÐû«HwœO#t‡ˆž€´Nz''´Ž—^€²á„æŽzâéÅüÞ‚§ÝË<ífIâi3ŒO‡V€‡šÛY R7  Hm’ÚÀ!µiFBêl·¤ÎÜäHHH„nH ë£rt‹ÞŒ¨µÊ¨v„Q9ÚÖh,¨íkt\ *Œ¨Õ`<¢r4¾fFå¨v2"¢rTzˆ•£r¯#¢rÔ¢rTr*Få¨Eå¨[Q9C†A9ªÅ`FPn‰° •߃rXÌåðÛ*¤ö°. ÊQh†zå(gY!u¬®ÁŒ ~%‚rø• ©[±@ ÊQ,|3‚r³2+¤Žè3:gP MàÅœÜ ©Ûóñ‹£˜G[cqÌHÚCß0‡•Œ!821)tl¬ÑþHÚÃ1Gf_ $·…‡[‘tË\ï’ŽD]*’ŽuWµL+’žîªHz*ÏLÛxI0K¶j¥e,–\z¤¡?—öç—Žò€+—ö“Á¥-ô°ôTÅÒ㮈n,=]©X: ‹p$‚¥» –öç–n‰>°tK4@KÇm•ÊK·ÄEo`iÔ.=]©\: ¤# ¸t„¹€L[´ß¶h/bi»ëáŠN®h+"]ÑÜVG,mJ“X:¾~°\âÈÂË‚I¿“(RxexY0iO|0i/&˜´? ˜ôteb|:A DÒÓ] ÃÓ1Q‘´"i@ÒSaI[0="éx p$í/C‘´Õ€éøÐeöM"=%*‘탨ÉJ¤£v¬Z‰t$*u‘¶ˆˆ$Òñ ïÄ…?”SYô”¨,:âZY´¿ °è¸Rí²Oí/,ÚŸ ,Ú« ,Ú,ÚË íOío,zÊSYôt[ˆöºQm±$-@ôʘŠ£½<½Z$g…Ñ^ˆ¶ŒöšŒžÊ£0zº-"DûS*ŒŽúÑøàŒí‰J¥-¾§…Š^éà•öªëý­}_TŽŸã Ç·g çÒòñÏ_ÿìéèTã?xüóÿð§ï!ߣZ÷7X°Ó+ ö»…ù.Üþ™Æ~ÑÇþÝ3-ÇÑDχ½þBÌúß½ [ÌÎ"<ÿøÔ”1^õ«×_Ÿ[ º¶€ë/lþùœ7qáùǧ–àY÷çwpþõNwýùƒ²l%œI#at~ý­ö"ŽŽ¶·(Ö¿þøŒ ôÇuü_EûñËÛþÃè¶ÿñí¿Þ~ü?üÛ_›S?Ö?Æ0f0_ŸSùŽœöX4,cXÂáësjÌéŸþ}}5Æ‚ÿô-O˜Ç$uù>6œ÷Vöš¦|#lϸý#ç?ÿóÛ—·ykÿçí/ã6>ï§ôà÷Àñ»7ÞçXñ¹=¸>©ð×Ï´¶ôNîéÓ†Ì6ç¿}òë­ú95äžÞÉýSjþhßmÎý®ù£“¹eûê>f=ÎÊì½åè®cax+}ˆÖïˆÃ—ÃypÍÄóÎÃz0ÚÏ£oÌ\ ÏiÄ“¹›‡ø\c6”‰¸ã÷›f’S@ÃòlËù¤PãÛ ×0§pY”G+Î$Ô95YßÈ ó‰C>[q¦¹('Å™3þœ”g+Îß9íB›F¢.zçÇÕg+Ž"z÷åùÍDÚ4#ñ^ÀKÝž¤ýÐFÿhʼn>©!¾…~%î¬ÊÑSy¶âDÿQM÷"ª,µ§Ë[¦<›qâ6…ÑÞÓ'º-G•Ž9Ay6ãDïë9§°z#á\Â`wvÈ#MWË#ñ^I #`¸ÑRGgtvÈ#Q5¹ÔÃçÕ社Šêg‡Ömç¥ Ú_XF–a9;䑨4-µÎŒDõMêó£³C*­_]d9j$*rÍ5N"=;äøäeÅ)ïθ­°¿\cUþìSâ·Uu½|ôMº˜žÃa›Î9ÑÃ8wçT°&‰{>{äÑèta1ÇæÄk!#ÑŸP6gœèÜËU×Ç­„9FÆúì’½ãù8íd¿Û2êà8Yçì’7ìå ͨô¯ –Lûã"õßå¦Hg¢ |HíÑ)IÇ 9·©[4š‡Ž#q_¥cF…oõv]xÓÙÚ(˜t̺”—·.‹SÍ„5¬ÞM™Wª5ÌÚù6ÆÿK(Ç ¢½Î¾ÈÚª¿¨=”L¹›2ʳ'Y²‰»{tâW¿ÌíÇyW7Hêð\å=Ü W¿Ü`õMMO¦Ä!ß.¥<¤.Gâ½6‰ú6÷Xì­wS–uÙ¼ëZï±Öªu!BÎn×¼ÅBu×$mÏ|uÊ ýk$ÞÖ”¥³+cn×Òã"UŠeLën›SZñe”%, ånÉ*ˆGñeÐZr$n§ZŽVõø±É‚³µ§2æså’ÉöʘÏÝ.ëàFÛ=Q¹µ…ö'‘x›dL!”1Ÿk—L¶×°ÊzàÑJEx–5|:õ–ªuʘÎÝ«•¦’"q»e2ýðã dܾ©1¿þ¡Þ29aå°ŒùÜm®˜®SÊ[&[É·i'Z‡,¼ŽÇ3Ã"mXäLYecDTÞ-\‡îÅ»:ã}P’l#‰&§*¹—»«‘ø\ó WƒŒÕ%Ø-“W̰JRÃsz,X†„¸Õ±ß­Þ¦¡‘¦£A=G¿ÕñŠA¦„OáVÇ+úý>…[¯Ø¤U’ºÓÂO)| ªŽ‘ŒOWÌQ±ú‚rT…¨c•MeLæn»I<³Ì-J ­ôÂj+YmL1?ÒG ·Â­ŽŸYÉ>Ù©b¤p‹â¼&…[Û›=L W绲w‚I!Õ”0)\¢Øú¼X¬¹EñŠiZ “Â-ŠW8‚K¸nQ¼‚Dâv´]Î&ʘÅÝ>½´êô¯й5ñÀQJÇg\(}טÄ-—$^Ø[†;á–Ä v¤”p'Ü’xn+E‚˜‰ÚÃpkâʶT!‚GÍŽ‹ÄýÖÄ ,ØeÌán× ?ʘÃÕ[û•Y ©S†D»5ñsh9¶I^½0☕#žK½›2ZG„ź%ñ ~‰¸ÿ·$^±Á¨áî®NØÞt„ݽ%ñúŒdVªlå¹u½[[Us ÂGqË`û*ÚãS ó}Ï“]þÞ~F²è0Ÿr`^Bc« ‹ÞŒÃ*‹³ LAÁ¢Ã­¢,z$jÿM=¦7 f`ÑzDÍ/΢ǬéI"A¡ÇÏ@ Ð#³aPè^ÉË@¡cv'¬ Z]ê¿8„î³0èð¾ƒ{+„îYEôHS‡ôHäC*ƒîN¯B‡‰_çm€Ð1¥Ö¹0 t]¯“V@è¾àAN=~†Èƒ‰ $`Ðmç$ÚÎ¥‰n;¡ HtÛ1@‘DÄl†1m#š‰‰˜ÏD7#û ÑUZ•D·NðmçËE·ÎªŠn]wÒ“D·Fèû<›•D+XúÅItœZ¯ÝHtìóýVû<Áñ…DÛvM’hÛçI+R‚DÇ€?I´íÿ$ŠwýŒ°£ê?%„¶˜¤€Ð¶gÚvB·‚· mÛÉ m³´E(%ƒ¶]kdÐÜ Gm;Úˆ Ã-JŒÚösA{»‚‰øV ½]Aëί_AÛ¶Y"èH‡-G ´mU%¶M®$Б¨•wèéw˜° Âz@ǺxmÛ¶@ m+( ´m½$¶M›$ÐÑ"A™AÛÞ"hO‚¶}‹DÐS¢"ho¯@ж¥Ú¶èAÛæ0"è)±qw”ªx"èØo¤Ú¶AÛf$ hÛBm›bˆ Í‰Nm{?žÚ,èDжzºRtlîSä[±ïY ´¿EèfËÑ Ðqš ¾Eh;jÚâ?AO‰Š #Q‡ èøñAÇ| d;ó”B|•@Ðv´N¾M(€ ív"h;ù„ :AÓ…AÛ©æ„ÐvWRèéÊ•§&ª,¹(´Þ&…ŽÓu² mÛ“B¿“xQhÛCmŸa´mÄ Œ¶½„Ñm5B¯0º=™¯2h¿:6‡Ë€E‰Zi€ÐgVÊžG]{‚=Û7Jö| †WìÙ¾²çˆî.²ìÙB¿“=Û÷öÌCFÉž¯Àï¤ÎþJAm'ÐIýÝ€:ÛÙ^¤ÎvŽ=ñ³cüìßð³TOülç#?OYÚYcø4ŸãÓÔ&ü<%*~–ˆ¿8}¶“^HŸ§»"°#MH¤ÏvH 1´.E m'ÈCÛAWÄÐfD$†žò íYC{–ÀÐv–1ô;‰[£dŸÓ;h;Z®øÌóÔÈžíÜK²gCFôì7Uò ývAž#Q{yg;0…äÙL!y¶“'Hž§D%ÏÓm ÏÄâÐótÛÆC,1=û÷ ôìÝÐs݉ÂõÀ»ÚŽÃ …öÁÚåCË~Á÷2þáõÌÝn7Ôl.4¢ÒmfZà蟤Y£wÚ"iÞ@£ÇD4ÚÁh´Ã10éý\«LÚƒ‘IÇ•Z?`ÒŽ+¥WJG0Õé ÒÈ ^d¥Ò‹XÚ °t`+ž‰¥(–v2,ÝžTUi´_à@yˉ£7GÛ~ÃÑ ºÃp´'*ŽNÆ¿èHn8Ú¯ì „É&q4£úŽf„ÃÑibÕ‚£øÅp´‘âèd¶iàhƒ*äÑhd8Új8Ú£½°ÀÑV{ÄÑöª£íG3Ô„áh{ÕÄÑVžGûÏÀÑ ]fPšq» JÛ®@é̉6¡´bBéóH¬‰GëìtâÑ,¦óhMΣÕÁï85CíTÙx4óhz—Á£“QeðhÆ=2í‰àÑF£È£­ý“G{i•G§É.`TON ÍxnF¢­ ‰¶î–$Ú7I´}g$Ñì0¢mÏ A´Õ4AôõØa¡A@_—=[ˆž­ó%zv^k虤×Ð3^¶£g&:z–ºtòÌÂyvg4È3îÚ-Âx¶gºÃw„ Äs<[­<û•ÏVž'x¶&Mðì¸0>˜'ð>Oåy¶D’gOy¶!”äùülˆœ­;Ø-z"gO¬ ¥C9‘³½d"竘ʚ½‚5O÷ÖlÍŒ¬™ëDÍö5[³¿P³…p"jf.#ÍìfŒ4ó›wÒ Ÿ¬“fP'Í(Žf-4Ó/MÐÌDgFÿ3âÌ0}q†ÓšÄÙ¯z¦B0ôÌ¡ÉÐ3ÇCÏ~%Ð3£øzöD gv™DÏö$dÏžm­‹Ú¯|h{zh+& 4F!è銠‰øâ ý€¶®ˆzJ´¨®N§omÝ)ô”¨Úó¶^Úú9hë«  §ÄÊð ,è)Q´Wô;‰›*aVhO€¶±ƒº%ó +€¶àÐÓï™ñJõu€BOE©ùwmj„0zº«Âho Ñþ’£½<ÀÑž'p´ß8Úê4Ú{AÐhï–A£½¶‰i´è ÑS(öž4Ú‡tÐhïšGûcGûˆí]lSí^›¦¥ß‹£— G¯ã¿ÿ&]î×=±èpŽêì,:N‡ÃYÃtT³)ƒE›ÅÉÃt@eF'scF3«Áh.¾Œ^,D`ôB»a´y4£Ý/mK³„ѶfIm+„ÑnàŒ¶Å-ƒÑæ Œ¶•/Àh;Ó†0Ú–fI¥#a5”JÛÒìE¥ßù}eý •ö*•>ÀÆÿP*½ •¶&*=ÝTš¡3¥if0(mÖ° J+ú˜ 4¢c”&­5(M÷0 ´aP‡Òh;¤ÒæKt* ¸æTš^p£Ò¯ÃuL•@*]ÊN¥•Å8”«˜ ´ö>7”Vï™ô ¬·r|BÒOv\q†Ü`Ñ^:°h.ik1$Òž"m‰$ÒDˆ¤Q8’æm IãQˆ¤ ®I›åHz……œDzµ8ÑÆáh'‘^éZ¯Ä ‡ ÒŒìkDÚîJ0mw%˜öÄLÛ¨Û,N5lªÓ šo`Ú¯T2m¶Y’é+™4Ȉ‘iÏ dÁÒOÛOóü‚‰O30ø4CðO|š~bãÓX¬w>ÍDÔî~æÁ é¨ÉÄP“{¡ÆÚªjƒÐ Ô¸«jBh'Ô°Ÿ„šPÛ5jÆ 5ïg„c‰jÞÕ5Á5ïj„šOb„š·5B­CŠj§œO㺯ÀÓÙŒÖÀÓž<̓r&<‚Æ“fУÓhF§É‘Në\Öñ4¹¶ái&žx:Á1À¥¯_HÓ‡mDæ6#ÒÚãNDVÀo!ÒH6\à@z‡ Ø4BmÆm ¤ ©:æm H‚&6 Í< HQ8&7 M|n@š¥%Æc:r MÔë@‰7ÆÏ¤A¸ H3щô«ˆS–N¤L¤¿> ʈ4·‘&ð˜ˆ4ŒÍ¤ŒH?ãX‰ÆíœD+¥6}EçÆ ‘füéü™‰ÆŸùÈÆŸéÈ&ƅΟŸæbÏZWw>½€³2eã͸‘ñf"eçÍ_ š¯A˜Ÿ¿~ ZF‘,¿ ±aÏã`ùÐ÷¯Æ’Á™ %ëJÆ…F’Q8€dš& $?á²ócåµÎ.ƒcÙËø1Ú©ócô@Îy¥ñcH‹6eõ}A5V±3[Œçå[gxlÛ«Å/Ux–c…Ç…ŒG€+g³gSˆ€Ç™ÛŽãP_€Ç©­{@VÀãä¡£['Âã@¬ç¯75ŽA^}£ ÆCÀjÚfc¥Æk³§Wj¼VN…Æk%n4^-:8 ñš¹ËÐxT!ã`<¡ñøa“ÇÁr:4ŽD0j…Æ«Åñf<’ÀKÀŒCÍ"GeÆË†ÙñB§>ñ²¥Vd¼˜È8»® /4ÈxéŒØd¼Øñø_íÑŠŒãDB} ã¥që+ñòQ\8–cEÆK5×µ"㥾FÆ‹í92ŽSµ)ƒ/<œÌ8õIžÌxÁèdÆ‹GºPf¼Xàs@ãe#TUvià¸ÊŽ£±>~Uh¼lü¢‹ðh¼lÆ¢Çq“0Û*4öG4^< ‡Bãżµ€Æ‹-mO·Uh¼|äc^ a/4\‘/ä€ÆË‰2@‹§GZ¼M-^Œ€/Žo•/ŽhŸ´xq°ÒâeµøJ‹ÚðH‹wM -^ŒÃ‚ûk3öh¼p“/¡ñb¶yPãÅ(¨ñø„ñ˜ Æ£§y'˜óbñd€‹žJM\<±¼ïÁœ‰¨g‹Ô\\lÈ.=ËöDK.‰¨TÅÅ¥°.ŽžNa€‹K1«¸8x|üzpâ8Cá$8q)aÁ‰ãIQBÀÄÅ"—)&.™¶`â8Y\1q$¾ÄÄ~`0ñHD«&.É"V('.‰ÛLŠG«@¼0ⲚXq ‹@± Œ½±D†•V`| ¥ÉJŒËÊ· b\ ñ¬à¸,Üñp\¬*7æQ(ÄÆyg¤X`ããÄT·ŠmFtaãø]yèñt‘BäQ3ˆ. ˆSK5*D¶wLˆlï˜yH1@IPä¼M¤øÆÈCá=w• Ÿ_ÇqÎÔ´È!qYòHdPeÈC‰êû?Î ó/ò㑈—~<%*?ΔÑäÇ¡ŒA•DýÌÉCp#´ðãL‰M~œ«9s•DE>ÀǹšÅø§ŸgîÙ%>Ž …6àãÌ1ø8S•g†¨#>Ž!SʉãJ„ Q|œ¹‡˜ø8æMj?ΈnG|œ¹ÒFŠœKŒyüø-àÈ™›‰‰“󹱑$9®Ñõ äÌDÉ™Ò(9S…%gwH JÎY™³ûu•(ÇcÁ¬H9Zø{Væl‹5@Ë™ò•lyJT¶œm‰"ælkv Íyµ8 œ³í?pž®Tâœ-ˆsfÜ."ç)QÉó”¨:["èéJ%ÐÙ"AÇmÁ~•Agõ"ƒÎWEÑñA¼>lÐξ;ä‰ Óiƒ< ÐNDO76>¬§ >ÿ{2˜HÄqÆàB0È3W®zùƒXöàƒv–Ïäy0ÓƒtîÚƒè2üŒAÄD僫̧ zt“ n¡ zmĸёˆ0Í ¢×f”ZIôzEÝù+½vón+ŠŽñKãòº1&PôºaÉì$Ñ^) ÑñÜ`ÆJ¢Ç•€J Ñ‘¨Í^IôjëÚ ÑS–J¢G‡àå›DÇ{ÐQ$z5Ü =1ƒDDÐEèX¾EPhÒq¥~Òñ"q¢¡éµp ö%ci[j²åÙA¢×îA—ÑÚ‚^-6›"è5s1: Á!A‡:BÌ EÐkeoò Ñq? Ñ^JèÕŽôŠžE¯ÅÜÊ¢½%EGÍàðCEÑ#‘‘¬EDt@Ñ^  èp àp?EÑ«2½:áV½¾Äyµ˜| Ðkù€@¯‰+ Ыaxè5Yøg%Ð#c9ôj늠íB2h«42hk8dÐ,AhŸ zÍtBt4¸Š•A[ßD=úCÅTmÃt<þK½v; PôÊvÈ Ç•tn+‰$ «$zå2 Iôº…U}üqƒ?€èÕ=Hôº~Z#¼0ˆd¡$:š¤RSèuýàDÁÕV@¢W[‰^“n!Ñþ˜ Ñæ"‰Ž/O[:Pôšì\¾Š¹íEG1•oE['@í-@QôºàV½òl"¢è¸òåÙ‚Ó•Š¢ÇcÒ®(:Þ£Bl è¨ W(z]È¢£°8 PQ´·H èpgI `èÈRŸÎ\먀m;"5·ø@¶ãÕÆ?{:ò‹ÿàñÏÿýß¾‡|7½¿Á^ù¯ß-ÌwáöoÌ4ð‹>ögdú|ÒÇ?ÑX÷ÜCÅ_ù?ÿøÔœ¯÷zÓŸûü÷+ü³ký9µæÿüãSKp¶ó«É¿“ûã~?PŠÑC‡í>ÂâmÌ ÇÀr¨§Þ¢@ÿúã3xÏ×·?ÖQæ·¿¼ýç?,ÿøö_o?þÇþíǯÌf y[G¯·§¿“ÏÈåÌh”ëÛ³ªÇ0¿Ž\î_ŸUaVÿôïë[Œú£÷þñOßôŒ©¶m«oõqbJÃUMSÆ%¶@¬õ‘õ—·yûËøÿæ³~ZÏõ9Ý¥uÿúèªgê~Ö°+SîéÓÅ6ç¿}òëÎ"”>¥æÜÓ;¹JÍí»Í¹ß5t/eÈÿík»—½laè!ŸóèÖŽí)¥›çnmy«ßÙS[6±`¸.c¾³oûœÍ~f“~c>CaÇ‚[S÷­|Oñ|þÞà–c½{Ì)Æ|a«3/Zt$J”îÂÈ’j Gÿ™MïÜ9QK»þ{§?ªÆÒñÉ3{a-„ Æêð‰,#O¥ÁµÞ¢¢Þu廎&uó¸PqFÓüñD1Ñ&«øo{_íš~;#35Ô8ì„€q!J§Xžœ¯wž–[cÕôDyñðŠ j¬Žž/ Çz’ÖL“-}ã‰Úú½ÝO7:ï^ãêq¸·,0Œú^UŽGA“Œ:9Yo±N5þØž“õQ›×âp´]p¬ãïk8*!!±Ý ½ñRvmu±N¸\™ÒºI[Š<•YÖXî[¯ÆÌmE£»‰ÉÑ^õ]oå^X<Ÿ |¯5$¸³nÛ½>-]w±ïã^<†×}½¸Êq¡öj»¸q£öо³¸ÎÍO5–¼nåÔ€Vk¬j=;æ!ôáp¯÷^aŒ©Ømdp/"F‡¥UÚŽÍbW[îè°Zl¡¼:æV°JÐt‹h4Ķôûmö–ÑÖÚ²Ýkvñt ¿šr»áÕ7 puõÄñÔÒì[¬Õ 'ðׯ w­ŸMecÂÕljOÒxÛ*».¾M†£6F¹k‰+u!»%ÙèO¨>Ê! îš? $½Ø˜?ÞkRG¢t‹-öÍ_=q”VšE‹åŠ»'n<[¬HÜ=ñ °Üò½qŒ zaøÌïžØ3Ö÷ã´ø»£iae¬·îGOÜŽM&wëÕf߯èv-Çôð²È‚e‹½GWOì% L÷ÄôµÀñWOÜèykAܯž8"êÓ—~­Ç“4½í~/qôÚ*½ÿh7÷{+.LéVlœÅA™²ôrLŸ*”ã÷ùµÏˆÓϦmê{½×î]}qÔ÷º AõݹLõݸãê;üÚºŒ õ‰ŽU}»Cï%MˆïØ•&‹ ÐÞ‡áX˪Ú;¦ºŸÚÛ­&ÐÞáu¤¤oŒT¡«VÐÞÅAý¦ÐÞÇ.è¤RwÕ°¥…öRAÛ´wì=V»µ÷ÞP{Þ{KƒÒ{§¥Ò{¯0hQzé²~@§ôÞiפô KÁ ¼cö­ Ê{7Ƥ¼·Æ™"”wï,Ž+ouÖ™òæ’&•÷Ю:ÆPy‡•NT•·ßÊ;ƒz[*ï¦Iyc¦HåýÜ?KnÍÌ%7:HîÆsrMrsƒ%wÛMæ«änŒáNÉݰœà’›q!¹cJe6É3(ýT’»5ì)˜$7: “Ü겚$·¾ÜYrëÀê’c•KnV™ä6•èÚšÖµ·NŽ&í½@G›ÇàùY :Ú¸j¾IC ›‡ tåæ ¼¾#½Ù¡~“ôVº0Ioe=—ô&$›¤÷¦³8—Þ*,¿Iz«/w’Þ:òMÒ¯þ“¤·NÔ'é­}À$½™hÒïj’Þª‘)½;Æ£Izk¿:Io'éÃ¥7¦ó.½•sLÒ—Þº¹j’Þõ]ÑfwŠîûû¤£Ç©¾»˜Q]}wn÷3öÝ 5«ú.pßS|‡%oW}­â;™D„øNì—)¾ÃAª@êÛ<•”ßæÑ£ü^L–@~/çöu ïÅ„÷¡H½÷M½mdÞû†é™÷ÞMCwW É¼ÃFr ÝMFc»=uw¦Ì1Å uÅ=!»NÅ]1¤Rqï™S@•Ü»©Hîq!Á³Jî=³)ArïÖ!¹cô¸JnŸ;@rûÜ’;v2ÈÄŠ{¤)c£âŽÍZ?PÜ;%*îûÿ¨¸·k Þ±'£Î’{#>¡äŽ™“´{(îØ¢_ ÷Fo?÷Ö®’¨ÖÞ¸Y€Z{ãæ)híq!V€ µ½˜­½ÑpK­½%C颵·låP­½eÒ&hí‘ ­Ý7R5hí(¬ŽÁÐÚ±£H‡•v¬ƒ«@{*mÏ J{[°Ô¥=.¤DW¥=jFýÍTÚãmèX@¥Ý;w ´;sRi”ul*íó7Øð;@ˆòWl[}£À¶\ ìžóŽ»3ôv¨PmOØá(°Çˆ‚I-vçébØç’Q`OyªÀA­sìÎóp °ÛΩ2dvçFk",|Æ‘Š¦±eŸïä/a ÓØýŽilìƒ0Ý8²@cgs@c[Pjl‹AMš¦iìÂq;6ÞjžÔØfõ Òî$а—ì<ÝÐ7ÙԾ”-46{Sc‡$‚uÛ%4ö¶™é{Õ qTÚ[§MJ{±4°íŒ®÷TÚ.}ȶó$°7në6¦mÚ†L›œ'¦bisvß9Ë#ÓÞ©vÉ´9™›dçŒL›!Si7ú3È´ÍlD¦Måch›Ð&´ï…nÆ©šÐ6¡8Ü$‹©p¸IØ[ÚÞí¶@ÛѶ-émo†ÑmW~¾“›DU€£m F¢mómg<»»IˆÙmSÅmW‚x¢íß¡íP²"ˆ¶Lm'´ ÖH´P·†¶«i` í•ðŸh;óQˆ¶96´m®¢m¥`„›pƒºÛA$÷ g”n{)'á^)¡»c¾+n«6èîè¥_êîö<*‚‚;ü j:‚à>–öÁíN¢í ÆÐö9w§æž«õÍ]ˆ…š»r~nP›;U©¹Ã„¡žP›L€š;éKÍ]‹‰c@íÆ'¡v5ˈjîQtÌðµîÀ 6ãij3؃¡my mLÀ÷chLp·{à‚;â©Ù‚;"A7«à¬c÷IºÕÇÎõ‹Âǽ% èã6Û4}ܶL÷‚NÃ|Ü «[“›Fõq7Æ4÷Ñv+ÐUhÛÈJ½¶i½-}Ü Uô¶­¨Qo‡áJôv5·2÷ø(—WhÛ:& îÍ¥nÛ§àŽÙº<Á]8 Ê»2‚#•w4p5ëCyWÛ åûÏaòVåíå‰p°¨òާEžª¼½Â¡¼cÿ¹.?@yG/¨ZÊ»2v•·TÞG2•媼«o(ï¨h½Ê{bÊÛFB3•p5–Ê»ÚäÊ»îô«CyǸý tWóAy‡ÌÑ7öPÞÞþ¡¼m`†òŽñJ×/ ¼+ƒÑQyÇx¥+´PÞ>¢CyW+Aå]³ÙÃUyWr *oO„ò® Ÿ%•·½PÞµ˜5D•·u-TÞ^CPÞ!N tUyG éxå=®€…ò®õ[wå±TÞ·äƒæváFÍÍHÔÜ å 4÷¤Ÿš»ž‹Wæ'1/Ô6CJšŸdåpµM³ ü$ j~FÚ4?I2>n~õ‹QtOö øIÏ'+7|ÞÞ61r? æ.½aš¬Üº¶BémÎiJo¿Ò¤÷³ PtwNÒ(º7θ ºƒ(?§èî¬UˆnFÛœD7ú“þè.ÓoÝßtÛ¤÷~»§'ÖÍSÀŒu› ƒô‘5öRrí¥ìæÁ^ÊΕ`jp³ìBƒo+V’Lƒç×~î ‰†ÔàÕ\ÙªÁ»ÛÄ¡Á¹e{)¹0d{)÷ÃQƒï' ‡ø¦7ÐÄ·í¤ø¶ÍwÜDYh‚å&ÊÆý *¾›Í鸉’´çß'qR|ÏRÁ¢â;(™V&ÄwÌv^n¢l dl›(Ù¥S|ûf7ˆoë_l%m—¶‰r%0‡øn¶÷‚âÛæßîøÞÌ>ñm“ߘB|G^:ºk5wµŠoÓAß1ÉЖEñMK„‰ïb^pˆo›õ@|g.úQ|›§ÿ)¾3'4߉»9!¾³ }ˆïdÖkˆïÕv6B|ç°weÀQß‹] ñͰ^&¾WSÉßæl ø^(O(¾ýJˆïÄyÅ7’3ñÍ~&¾í9)¾­ú(¾—YsÞeš{á4‰Ê{¡ÍÊ{ôÜ/i÷HƒÊ»ð ×Sy®•QDz¶EGÚË­”Åí ª¿K7˶êïˆ+¨èï‘'‰¸êï‘çk?ww¹ê¨¿½´Ðß…Z¨¿#û,U{ ¿KåôwáISÔßž'ôwái‘Ôßq<‰ö,Pá¥\â\õw©&ÎU{9¡¿ O~ƒþް“/&…Û Ú³ K³ýýÿðÿÁÃ@ endstream endobj 3 0 obj 209665 endobj 4 0 obj [2 0 R] endobj 5 0 obj << /Resources 6 0 R /Type /Page /MediaBox [0 0 1469 828] /CropBox [0 0 1469 828] /BleedBox [0 0 1469 828] /TrimBox [0 0 1469 828] /Parent 7 0 R /Contents 4 0 R >> endobj 8 0 obj << /Type /Font /Subtype /Type1 /BaseFont /Helvetica /Encoding /WinAnsiEncoding >> endobj 7 0 obj << /Type /Pages /Count 1 /Kids [5 0 R ] >> endobj 9 0 obj << /Type /Catalog /Pages 7 0 R /Lang (x-unknown) >> endobj 6 0 obj << /Font << /F1 8 0 R >> /ProcSet [/PDF /ImageB /ImageC /Text] >> endobj xref 0 10 0000000000 65535 f 0000000015 00000 n 0000000145 00000 n 0000209884 00000 n 0000209906 00000 n 0000209929 00000 n 0000210359 00000 n 0000210228 00000 n 0000210123 00000 n 0000210286 00000 n trailer << /Root 9 0 R /Info 1 0 R /ID [ ] /Size 10 >> startxref 210440 %%EOF blis-0.6.1/docs/graphs/sup/dgemm_rrr_has_nt1.png000066400000000000000000005410471360743507500216200ustar00rootroot00000000000000‰PNG  IHDRâÜJ&¡ &iCCPiccH‰••gP“YÇïó<é…@B‡PC‘*%€”Z(Ò«¨@èPElˆ¸+Šˆ4EE\•"kE ‹‚tƒ,ʺqQAYpß÷?¼ÿ™{ÏoþsæÞsÏùp ˆƒeÁË{bRºÀÛÉŽÌß(ŒŸ–ÂñôtßÕ»­Ä{ºßÏù®‘iü常¼rù)‚t ìeÖÌJOYá£ËLÿÂgWX°\à2ßXáèyìKο,ú’ãëÍ]~ )úÿ†ÿsïŠT8‚ôبÈl¦OrTzV˜ ’™¶Ò —Ëô$GÅ&D~Sðÿ•ü¥Gf§¯DnrÊ&AltL:ó5204_gñÆëK!FÿÏgE_½äzØs û¾zá•tî@úÑWOm¹¯”|:îð3™ÿz¨• €è@(U  t0–À8à|AØø $ȹ`(E`8ª@-hM œà<¸®ƒÛà.L‚—@Þ‚°¢A2¤é@F²† 7È ‚B¡h( Ê€r¡PT UAuPô tºÝ„¡‡Ð84ý }„˜ÓaXÖ‡Ù0v…}áõp4œ çÀùð^¸®‡OÂðø6< á—ð"Â@”]„p$‰BÈV¤)Gê‘V¤éCî!Bdù€Â h(&Je‰rFù¡ø¨TÔVT1ª uÕêEÝC£D¨Ïh2Z­ƒ¶@óÐèhtº]ŽnD·£¯¡‡Ñ“èw †aaÌ0Θ Lf3¦sÓ†¹ŒÄL`æ°X¬ Vk…õÀ†aÓ±ØJìIì%ìvûGÄ)áŒpޏ`\.WŽkÆ]Ä á¦p xq¼:ÞïÀo—àðÝø;øIüA‚À"X| q„„ B+áaŒð†H$ª͉^ÄXâvbññqœøD%i“¸¤Ri/é8é2é!é ™LÖ Û’ƒÉéä½ä&òUòSò{1š˜žO,Bl›XµX‡ØØ+ ž¢NáP6Pr(å”3”;”Yq¼¸†8WJ)Hq¤"¥öHµJ IÍKËIÛJGJJ·IK”aÊ8ÈÄËì—é”y"‹’Õ–õ’Í’="{MvVŽ.g)Ç—+”;-÷H–×–÷–ß,L¾_~NAQÁI!E¡RáªÂ¬"CÑV1N±Lñ¢âŒMÉZ)V©Lé’Ò ¦$“ÃL`V0{™"eyegå å:åå–ŠŸJžJ›ÊU‚*[5JµLµGU¤¦¤æ®–«Ö¢öH¯ÎVQ?¤Þ§>¯ÁÒÐØ­Ñ©1Í’fñX9¬Ö˜&YÓF3U³^ó¾F‹­¯uXë®6¬m¢£]­}GÖ1Õ‰Õ9¬3¸ ½Ê|UÒªúU£º$]Žn¦n‹î¸CÏM/O¯Sš~°þ~ý>ýÏ&   ©†.†y†Ý†iñªî¯&¯v\½mu×êׯ:Æ‘ÆGŒ˜ÐLÜMv›ô˜|253˜¶šÎ˜©™…šÕ˜²élOv1û†9ÚÜÎ|›ùyó¦é§-þ²ÔµŒ·l¶œ^ÃZ¹¦aÍ„•ŠU˜U•Кij}ÔZh£lfSoóÌVÕ6¶ÑvУʼnãœä¼²3°صÛÍs-¸[¸—í{'ûBûªƒŸC•ÃSGÇhÇG‘“‰Óf§ËÎhgWçýΣ<Ÿ×Ĺ˜¹lqéu%¹ú¸V¹>sÓv¸u»Ãî.îÜÇÖª¯MZÛé—ˆÿ2Â6¢,b&Ò*²4r*Ê*ª4j:Ú*ú@ôLŒMLyÌl,7¶*öuœs\mÜ|¼Güñø¥„€„¶D\bhâ¹$jR|Ro²brvò`ŠNJAŠ0Õ"õ`ªHà*hLƒÒÖ§u¥Ó—?Åþ ÍŒ]ã™Ö™Õ™ï³ü³ÎdKd'e÷oÒÞ´gÓTŽcÎO›Q›ù›{r•swäŽoál©Û m ßÚ³Mu[þ¶ÉíNÛOì ìˆßñ[žA^iÞÛ;»óò·çOìrÚÕR V (Ým¹»öÔ±? ìY½§rÏçˆÂ[EEåE‹Åüâ[?þXñãÒÞ¨½%¦%Göaö%íÙo³ÿD©DiNéÄ÷e̲²·7¼Yn\^{ˆp(ã°Â­¢«R­r_åbULÕpµ]u[|ÍžšùÇ‡ŽØi­U¨-ªýx4öèƒ:§ºŽzúòc˜c™Çž7ø7ôýÄþ©©Q¶±¨ñÓñ¤ãÂÞ'z›Ìšššå›KZà–Œ–™“!'ïþlÿsW«nk]£­è8•qêÅ/¡¿Œœv=Ýs†}¦õ¬úÙšvZ{aÔ±©CÔÓ)ì ê<çr®§Û²»ýW½_ŸW>_}AòBÉEÂÅü‹K—r.Í]N¹<{%úÊDÏÆžÇW¯Þïõê¸æzíÆuÇëWû8}—nXÝ8Óâæ¹[ì[·Mowô›ô·ÿfò[û€é@dz;]wÍïv®¼8d3tåžý½ë÷y÷o¯ñy02*|ñ`úaÂÃ×2-<Þ>†+|"þ¤ü©üÓúßµ~oš /ŒÛ÷?óyöx‚?ñò´?'󟓟—O)M5MMŸŸqœ¹ûb݋ɗ)/f þ”ø³æ•櫳ÙþÕ/ M¾¼^ú»øÌ›ãoßöÌyÎ=}—øna¾ð½ÌûØú>|œZÈZÄ.V|ÒúÔýÙõóØRâÒÒ?B,¾“sMT cHRMz&€„ú€èu0ê`:˜pœºQ<bKGDÿÿÿ ½§“ oFFs¸NBIE] pHYsNNÆÊ/¥tIMEã1:© vpAg7O£¨u€IDATxÚì½{œU™ÿÿI€\* ¨5QH‰ ’U«e¿¢€ÑnØ…Ÿb·²ÆËîwí^ð‚ºøíÞu½5»Ýâ.¬¸+]˜Å ^èREMâ°¢ SaHÀ0E&„ôïÓOUuO÷ÜgºgrÞ¯W'ÓÕUÕ§ªŸ:uΧžËAÕjµŠF£Ñh4F£Ñh4F£™Vnw4F£Ñh4F£Ñh4šŽâ‰D»›p@Q*•Èår”J¥IíÇóLÓ¶îX×­Ý÷Ýw .äºë®cûöíœ~úé-—e_ûöíÃqœðX[½·,kLûéØ|ß'—Ëqçwbš&¶mãû>§Ÿ~zËöAÀöíÛ ‚€B¡€aáïáû>?ûÙÏxç;ß (Õyß¾}-·™KÌF;ÉVg‹·²É‘Úïº.?ûÙÏ(—Ëœ~úéá~ßùÎw6µãÇ{Œ½{÷Îy–óÞÌŽG²aÐv<–}µ:6hÝGŽvlͶkÕË9tÏó¸ï¾ûè ¡À”2UvÜ cŠÑ>m“µãÉöÅ#µ¼}ñdÇSȹé4;žˆ Ç÷×Ìno¼ñF=ôПsÎ9#ÛxÇÆŠ˹×ããÙ1ÏkÕ'ŸsÎ9ôOλwþ¸B>ÏøfÖŽ;Æ#.‰®ëâû>©TªîóD"çyø¾OOOO¨ä§R)J¥R¸}Üu6•J…ê}*• ÷áy^èF:ÖõFÂó<2™ ©T*üár¹\ËåcÙW"‘À÷ýQß¶¯±›ëºX–…ïû¡ªŸÍfGl¿ì[~'ùžFJ¥™LÃ0ƼÍlf¶Úñh¿õl°ãVö5RûMÓ ÛêZ0MsXÄŽzè¡9oÃ0²·²a9ÚŽ[ïk´c©œlï‹å7Îår‹Åv›Û´1•vÜ cŠÑ>m“µãÉöÅ#µ¼}ñbÇSÈyïD;žˆ Ç÷×Ìn7oÞÐæx ÇÇûëäqÅhÛ mµãj‡Ïç«Éd2|_,«Ò¼b±Xµm;ü,›ÍVmÛ®–ËåªeYáòþþþªaÕjµZ­T*áß²M6› ß˾ǺÞHT*•*P ßÛ¶ÝrùXöÕßß?¦÷£ík,Ç–Íf«–eUMÓ¬&“ɪaÕb±8bûeßòYÝ9µm»Z,«–eÕmßj›¹ÂlµãÑ~ëÙ`Ç­ìk´ög³Ù*Pª†aÔµ±ÑŽ®V[Ûq+®V«ÚŽGa,Ç6Z¿:Þþ¸U_\­VÃ{ÔXÎële*í¸Æ£}6Úþ&kÇ“í‹Gkÿxúba®ÛñDÆÕjgÛñDl8¾¿fv;“cãÑŽm¼cãjuîÛqµªÇÇûëôyžlר'Ès¼jU÷×éãŠñŽgÚŽçMŸÄ7>\×%™L†ïmÛÿö}¿Î¥Ñ¶íP)5 £N© ‚ ü{$wÞ8c]o$lÛnêÚjùhí‰?uíýT[ô÷÷„ t¹\±ý–e…Ÿ™¦‰eYá“ù}LÓ¬Û¾Õ6ñß{63›íx¤ßz¶Øq3ûºë®»Z¶ßu]Ç¡¿¿Ó4)•J¤R)z{{æv<×mXÎK3;neÃr®´OþØFêWÇÛßu×]Mm¸P(`YÖœ²ÙfLµw˜b´ÏFjÓTØñdúâ‘ìx"}ñ`ÇSÈùêd;žˆ K›šÙ­œ‹™§Óéa¼cãÁŽAÛÓéó¼V}r>Ÿ?`çxr^ôø8jO'+FÚ®Õøx¤ï™;î˜ÐÔñüøqã•XwyU*•vÊ”Ÿ‹ÑÞOÓ4‡]8ñsfÇé‹;žè˜<;ž­cãÁŽAG:8Ï©O>Pçx Ç#‹NWŒ¶]'ØqÇq–eá8Nø>^ZÙ4ͺY\ý…R”ÐL&ÓîC™uHÒN1´V¹YñPìx"c Ðv<ÌÔØø@°cÐããv2[©O>Pçx Çíd&ÆÇ“ùž‰Ð1¡©étÇqèééÁ0Œº˜¨ðñ2Ê@hÐÝÝÝ¡kz>Ÿo÷¡Ì:LÓ$™LÒÓÓ3®óh‰D"쬛mcYétš\.G:Ó6³mÇíe¼ö•Ífq]7ü½Z{±ã믿~ÎÛ0´¶ãV6 ÚŽ§Š‰Ú×hÛÅûâr¹ÜîÜ´·éî‹;žÈ˜´O356>ìôø¸LÄ–ÇÒ'hs<ÐãŠv2ã㙶リòø¥Cˆ?‚ ¨«¸!1½R]HÔÊxL¯füø¾ƸvÅ•¾\.‡ñÕ£=]™È6³mÇ3Ïdìk¬çþ@²a9^ˆìx4–eÚŽ'ÆDíë@³Ëñ¢íxf™‰¾ø@c"c Y¦Ïçä˜î±ñ†·ñز ÇÇ­ô¸b¦˜©ññLÛqÇxÄ Í\ÿ¤Ô¯s©TªS'§"¡ìHø¾_çBÝÈx•Ò©ÚßT·k"ÆfƸÝ5'²ÍlCÛqûìx"ö5Þs Ø0 ·ãÑlf—wê¾&j_Š]Ž—N³ãNí‹§z_3Ñ(LdL¦ÏÖ±ñ‚Ï.[ÖããæèqÅìWLd»™²ãŽóˆk…çyaL¶¸xjÚ‡ïû¸®Vš®mæÚާ™°/mÃÚ†§“‰Ú—¶Ëñ£íxúÐö8sh;î,´íOm˳mûÚ†§“™Ï´Ï!N£Ñh4F£Ñh4F£™ÍtLÕTF£Ñh4F£Ñh4f.Óq9â>ö±qÌ1Ç´»üîw¿ãOÿôOÛچݻw³{÷nŽ?þø¶¶ãÑGeÑ¢E,Z´¨­íøÝï~dž ÚÚ†±²~ýzm?5:É~Úý›<þøã|úÓŸnw3FÅóôø¸žN±ãÙ2>>á„8í´Ó:âœub?ÀO<Áž={Æ´nG qºuëÚݺººÚÞ9ú⚥hû©§®iÍøÑöÓ™íÐŒN±cÝÍdè”þG·C3ôø¸mÇã㨣Žj{Xs'¡í'b<¶¡‹5h4F£Ñh4F£Ñh43€â4F£Ñh4F£Ñh4š@ qF£Ñh4F£Ñh4Í  …8F£Ñh4F£Ñh4fÐBœF£Ñh4F£Ñh4F3h!N£Ñh4F£Ñh4F£™´§Ñh4F£Ñh4F£ÑÌZˆÓh4F£Ñh4F£Ñhf-Äi4F£Ñh4F£Ñh43€â4F£Ñh4F£Ñh4š@ qF£Ñh4F£Ñh4Í  …8f–àyínF£Ñh4F£Ñh4AýíyHÔ//ZˆÓhf ©Ôä.vF£Ñh4F£Ñh4¾¹\ô¾PPB€ëBOš‹ww«ÿS)ÈçÁ0&þZˆÓhf ¾¯½â4F£Ñh4F£iE©¤Ä²VärJl µn"–}P.«¿-Ký]©¨W6 ýýjy|nžÉÀÕWŸÆO,Sçµû$i4š‘ u‘›f»[¢Ñh4F£Ñh4Mûð<%¤•ËÊc-—SN+"¦Í„ž5—qͶ#O7ÀÞÞzï¶|>úÛ0¢Ïç㥒ÚW2©¶ùÖ·eÇŽ}c:-Äi4Îw¾ó3>þñW±jU/ûØ^l{m»›¤Ñh4F£Ñh4ÍŒãûN«¿ C l#9­ôöÖ¿·íH@3͉‡˜¦ÓQê(À+ö04ô̘¶Õ¡©M‡"®®×\óNŽ:êzúú6°mÛ6J¥v·L£Ñh4F£Ñh4šñãyÔÍiÀa}ÇQpB2©„4P^p³¬Éåy³¬¨ãå€âJ%å¾è8*ážFÓ‰ôôÀg<ÃÒ¥o# ¼æ5‡óÌ3]Üwß' ‚ êäqÉM$¸®;ñ/Öh4F£Ñh4M[ÈÅ+Ì'òs•¿MBC»»áŒ\î)A®‘TJmãûjÝŽ<5µ¹÷üæ?ýô˜69 CSEM&•êû:ÿ–¦³ð}8ôÐ;x≯ðÎweå1 ƒ (ñÅ/:,X°ÇÉ“ÏÃßüÍ~þ÷¥P€'ž¸×M²xñVì‰ÊóF£Ñh4F£™q|ß§P(%*+év7l8Žú?™T™ç)ϱ PšU¥¢ôßÄ`öàé‚ú,ŸWëÆó½y^}Q…iG’µ;Žj¨‰qñ(• ¥•?ÿ9¿ý˿Ӯç¤G\¡Ðººd©%ßÎ÷ÛÝb&Âó<~ò“=tuíà‹_|3étšd2‰mÛ˜¦É…ÞÄW¾²šùóŸf`àivïþ7zzrÌŸÿknºié´Å/y;k×>Æ 7ìi÷áh4F£Ñh4š9ˆ.c!“Qÿ»®žäÕå×NŽD8¹@†‘C7ÛM¡…š õa¤é´Ò¤èA±® 9”S@‰pYàÞ—ôšRI­/L©W*EB›4ÞqBï6@¹ìårªá==ª¼ª²P,ª$t¶Mßu×ñø‰'Žéëç¤õç/¾,—Sç¹·WýÀ† ›M'°zõQ|ñ‹¿áå/Žd2‰SÜÓé4—_~!A`34ôúú>L&ó{’I¸è¢Í¼ó¤X´X·îµüä'˹á†{Û}8F£Ñh4f\šÑÝ­´ PsnÇQsqðZ9ÏÌuD«pÝẅëºt]w^í䀩Ÿ,ó¼ÈP4šv ~’{îy—]6Øç¦ibšpæ™%<ï”JÐßߊuòtåä“ÿ–+®xoì"TÿpÑE_ã”SVñóŸw±nÝV<ϯ‰x‰fF£Ñh4F£ ÏkžÞ©PPsÛŽ>O¥Ôü;—ƒlV­“L*'£tºÞëi®ê€Ò,uˆ]]+Iàѽ{y`ñâ1Û)â6lØÀ 7ÜP·,—Ë100ÀÊ•+ÉårÓVÑÑóÔ¹-ï›çyaìµtS‰ïû81—<ß÷)•JA@.—£P(誖N;ì8™„|`%;vtašæˆO5Òé4Ùl–â(¾ÿýuÜ|ó ýýý<õÔ¹øâ»y÷»Ÿçßÿý^üÏÈd2xž7£"X¡ ®O×­¿ÉKŸ™H¨kY¼…5§ÑŽgª/Öh¦ŠvŽ)4š©BÛ±f® Çsƒ hí´"a£ñôY­'ᇠmB\ß)—ÕœÜuÕzÉ$ ªy·iªåÙ¬ÒDâÞT3E»íxÇŽüçÝw“Ë)=G´£LFÍ}¶ÿäU|páBV¾ã™ =Ë–á8Ùd’û[J@Xöå/ÓwÁ”?ò\"/4Ÿ(äS‚€B¡j ÈÜ êC‡MSýN–¥~SѬrD9Þä»”èV@‰ie”PUiu"ä ¥B 0úû±Þô&*¹é5kB—¼t¡@ºvl™uë(mÜ)„ þ'ÛVË%N6Ÿª@Œ@©vþ¾ûµ¯±ih( -eßç 'ŸD¦¨9`åÀF&CP*±õu¯ã©;î“Lihªëº Õ-ëëëã‘GaãÆ€Rœ7mÚ4mm[ k™Lóϯ¿þz^xávlÛ&N‡USs¹±w¾ïãûþ°cND8Ó4q'Tž …Ùl–t:MOOÏ´¿fò´ËŽO?ýg|ö³GðÕ¯¾zÔuÍq¸rÆÛ¯¾óŽwÜE©TÂq¬i.?ãûQ‚Ψ]Ñ |¾^tÏbßD;lj*æÌñvKˆïl¡ÑŽgº/Öh&K'Œ)4šÉ¢íX3WÐ㊹ƒ$ÖïïÆ¶¥’ZQtžDñId_±‰rù<¡dYÍs®‹6G"¡>2mºd¬ý Ø|>ЇðüÎ~øáüÓ/Éß|üã,þÉO(‹*ªÑó(•J˜¦Éþï~—cÿú¯y2—£ëÜsÉf³8ŽÃ«_ýjÎ:ë¬1ÙÁ”yÄ qÝu×qõÕW×-ß²e +W® ߯Y³fZ\>Å»f$}âÉ'Ÿäæ›oÆqœ:58Þqär9‚ª7$ J¥Rèáà8===twwS*• ·§§Çqêª^¦kîw†aAøÒtí´ãÏ|f'ÇóâiÅâ¤ÓiÇ!™œºØT1çx>xè©„â7"—¤x¶Ê¾äÆ$Ë|øÍ¾ñÉžü-´Š(+‹½½ð6C‰S ­Jà’0Í<õoÉÚÿ•Úz”Jõ¢Z&£^–U"…È*V¬D"š|‰ñ‰QV*užSF­ ½µW:š@å™»zhˆLm=å½ö¡}ûÔ1¹._øõ¯yë~À…Ï<ÃÁ×_ÝÝóþ÷C"Á~éK\sÍ5ÜrË-œuÖYüíßþ-ApË-·pï½÷òè?ÈŸù ç>ö‰D"ŒV»ä’KÆlS&Äår9®¾új7ÄÄîÞ½›N8!|ß5†J{öìaË–-Þ Ž„T‘ß»Z>¡k៑É$Èf³á2eŒ.ɤz¯5*ÅAàyÝÝݸ®‹çyxžV(q]Ã0Èçóá˲,òù<•J…|>O6›­óF²,‹T*E"‘óÅ•J%|ßoªTè ±eËvíÚ5­ß3•v¼k×®1wäï}ï“ìÜù<§Ÿ¾pZ¯–eMÚCSnâ«V½À5×|‘¡¡¡Pä ‚ú‚tüÝÝÑ Û²¢Ÿ¥’êƒ%lß÷£ÜŽ™L$žtP$މxæyQß.ž±âZ AõOà|? ŸUç'úÛqÔBö#Ç1VÄŽ÷ìÙ3¿hs;žˆ ?ýôÓ<úè£z`­©c``€û￟ݻwOÛwLe_¼{÷nî¿ÿ~úúúÚwÒ4É–-[xôÑGyúé§§eÿÓ16hïIÓtíOÄŽa|ãcM„ã ÝZårâ¢=O½2™(\T¸LFi’»^r»Å+z&“‘à&óíxz­R)ÊÓ>^g²^ùʇٱãði=‡S=>︢TRçüÉùÌúiö¿ì{|÷»_ãS·ÝÆß±—ÏmâÖCå½þßM£pLÓ u‚ ê"¤LÓä¦O}ŠƒÏ;3ßúVåäºô¢ò³®K~éÒp›d2òŒ¬T `@PK£æ¢„+%ÀejûHÖþ¯¢Ä6Kö]kƒáûX2‰’XVÕ¸¨8BMåu £>g~6«D·l¶µºsV ‚`X¾; %ÈU€²ç±uÉоO0=d¡@±XÄä­gžÉÊõëq}Ÿ^ÿzÞºd ½½½¼õ¬³xÃ+^Áw¿û]^ò’—ðÎw¾“«®ºŠK.¹„|>ÏYgE©Tâ%>ÊÛ;ŒBw7ùÈGBÛÙ½{÷˜ÇÇSšzà 7°råJÖ¬Y3»cÏž=lݺ•N8aØà¥òV*u¿OìóÉd’ùóÊùçÛØ¶Í²eË€z%Ù÷ý0›x°ù¾ëº8ŽƒmÛaÞ.Y”×ßß_÷¹»š…†A2™¤T*áº.™LÛ¶±,+ôŠò<×uÉår¡§xÓUf:N¯ìÞ½›­[·ŽKœ/SmÇCCClݺuÔýy|ó›{8ï¼C¸ðÂi;¼–äóyî½÷íüÃ?ÜÅG>²ªåz’[`óæ‹xê©÷óÑþ›7{X¼x'ŽóY,xÿú¯rÎ9[I&íð.îéâ’nÛê& •‹ãî뎣úl _-ê¯m8d³Ñº"˜ÅulI j?ò¹xÕ‹QâYñ¼ƒh°!ï¥Ð‹¸èËr9ˆ>“ö5ë‹ÄާSˆ›J;~úé§Ù¾}û˜ìXsàðÈ#pÿý÷OÛþ§º/!nÑ¢EuO½5š­[·²}ûöiâ¦kl c<4sŸ¹<>ÖÔ#­ããåÑÒ*INyÇQÛ÷÷+±,>m•@!™Œ„¶|¾ùx6¾­ ¡¬Ú:yä^üâ­ìرmÚÎßtŒûúúÆ<®ïB×…CNzžÕ¿úÿ³{7Gû>o»óN¶lÙÂoz_öY.*ÔçØ ©NjÆöçÕEQ•J`ÛJ[¸ùæÛÙ¹ó»|ð\v\ýÙ‡xõ?å7ç¯gÛYŸaÝÆ?ò•+_D±W‰hQ>9¢)…J(ak˜’Q(D2)„ !CŽÃƒgÅ6×B|ãUŠEnºür~ô£‘Ífq]Ó4±, /Ææ8™L†d2ê3"Ή¾²pá°ª¬çy¼ô¥/eͶm<õÕ¯w÷÷ãû>ë¿ó,ËÂ0 ²5Ï·¼å-ušLœ .¸€ .¨Uœ?–,T˜óxÆÇSâ·uëVn¸á†ºÆZ–ºzÆÕâ¾¾¾QŵcŽ9†õë×y !oÛVžÉd¸üò¡ÈðÐC‡ðÔS‡†?”xŸ¹®‹ë&ñ}åÉæÕâs¹\†šN§Éçó$“I\×­ÚÆ“¯KDûét˲( u?tôôôH$ðƒƒ‡„?`lÞü2ÞøÆ?”ÇšY¶msç{9òÈó°må1'ê©a”Ëe’5…D.\ñ\›(ñŠ˜¦iR*•Èf³”J¥ðüÙ¶M6›mZ=Ó0 <Ï#™L’Éd(—Ëahk¥R!“ÉÔåxn:9ò=AL&ÃB"¨•c=³xZ–~Š“ rçɱø¾aX–E±X =©,Ë êV s;i‡û>¼ðƒlØpîˆUR§›“N:‰/|á={ì¯Ã'žça¥R‰;î8†7¼a-ŸÿüEœ}öϰm›ï}o'‡z*®ër×]Ëxýë¿A:]¦T‚åËïåæ›rÔQ‘š$tµm%t‰¼x/ƒêÓÅôââ˜ú=Ô C¼Øâa¯Rú;^BòʕˑÀõ…ø=D<¡e`#9'Eø“v'‘èW­*—yº˜H¨uš=¡”ÊÙÓý3·²ãÊåò´öÅÍTÐî1…F3h;ÖÌô¸bæ‘hljƨ"ÄI(D³EŒ“¢…2¶vœ¨¨‚L§súU,Î|Ô±ÒN;ö’,À„£öÜÅÙÇK×püÒ¥ØDá‚x‚²…ÈaÁ$“ßþv?÷ÞûöîUçݶÅÓq)_þòRN8áLó ;ì›X–ÅY&|ò]¿Ãóþ §[Mxö¿ô¥üú]ï"] ËÊ£´”D¡Àq ¨ùºë’«é¼æ5|þÒK•!IÞ žiRÊå(–Ë”^ô".\êAà8gžy&‰D‚7¿ùÍd³Yzzz¨T*”J% Ãà•¯|%7Þx#;vìP•b³Y2™ žçaš&ozÓ›H¥Rìܹ“ÿûÿ/ýèG9õÔSUÑ…Ú=ö¤“NbÛ¶mlÚ´‰Ûo¿=v^ Ãà7¿ùÍÌaSZ5µ]]]\vÙe\z饬Y³×u¹é¦›¦ô; #òvÌd2¸®K:ýw¤RA( yžÇÂ…Çò’—(I¡J¥‚ã8 '‘NçCßqœP¨jd"^p­Wȸ[óY»mÛ”J¥0œU9¸*•JV+Æ/K.—Ã÷ý0ÜÕuݰ ¬Tz•ud¹t†aP©T0M“îîn<Ï#›Íbš&år¹®½===¤R©PT¡O<E˜› 7íé²ã_ýê)–/„dòâ¶ßÒ¥K±í$ž§îì…B¯|å!.¹äs|êSç’HÀ¬å‰'àì³OƲöò¯ÿúkÖüš\î_Y¶ìŠÅb(Šß|óBvïžÇ+^U@•ŠëF˜Äå½Pˆúr ëAN’ÅZV}ºrÁV×~*¥Ê¢Ûv48Éå"/8tH5¨|>ª‚*I_ ë&“jFóクW\D¡´Rª]HqÑÏåÔ²v Pf¢/Öh¦mÇš¹€¶cÍ\@ÛñÔïööªqc±=L–1u:­hËX¢è‰ú‡ÑPŠ:ݴѯ`BÌ„{@Ö„«wîä”Kyr‡yî¹Ì_¹’kPE‘y‘œOÉÇýØcçbpî¹ÿÌ™g¾›óÎ[Ýò{çÏÿÅâù¤R~¨gˆ“…”—Z,?R¡PàGG _úÜwø>O?M¥RáGçßú½ï}<-Å[ͨå~;óÌ3éîî&“ÉðÐCñw÷w\|ñÅ|ñ‹_ä—¿ü%¯zÕ«0MÃ0Âÿ% X¥Ráþûïç‹_ü"¯ýë9ýôÓù\Ž#<’N8÷¿ÿý …)Ĥ-’ÓଳÎjZÅ´0!Õâ‘G©nÞ¼¹ºk×®Q×}ßûÞ7®}Ûvô7*`Õ0Œ*PÍçóÕJ¥RÍf³U;¾bµZ5M³n™mW«¦}~òÉ_¯Ú¶]¬VûûÕ²L&¨‚Y[ß®û2‹Åj¥R©Õl6[ÍçóÕl6;â6¦iV-ËŠµßnºM6›­ÊO]©Tªfí {{{«ÕjµjFÝ1T*•j2™¬Û—´­X,6œ{;\¿Ù1•ËåjíäI{ÃmeY¥R©VËårÕ²¬°m¼ýíoŸô¹ž SmÇW\ñ@uíÚï¶õ˜ªÕj5Ÿ¯V³YõÕt:]½ì²íU1lV½*u‹ÕêᇨzÕUߨæóùpaéÒ'ª•J%|ßVþ–Þ§R‰¶•묖]ïåòðõã×j¹¬Þ7~Þ`“&¾¿r9:?étÔ§ô÷«¶›¦:Þ7¿ù‹Óñ3މñØðæÍ›«×]w]ÛÚªé\Úmã±ãë®»®ºyóæ¶µUÓÙ´Ó>¦sl¬9°h§}ŒÇŽÛÝÖN£·W e¼*ãD˘·XTËòyõyµªÞË4жÕßñ÷³‘ÙbÇãe«Õêº?SÅ0ª¯¸ô‘ªyú„Ë{Ǹ± Ó¬T?ö±_IuâzAµZ6o¿vݺºyÛÚO¬V-«úŽãŽSFÛÇEk×V«¶]ýý]wUËårµR©T3™Lõ¥/}iµ···zÜqÇ…šÁÆ«‹/®^rÉ%¡P­V«6l¨–Ëå–í¶,«zæ™gVóù|uíÚµUß÷[jÕjµnßíf<¶1íqBWW×´$—•˜yˆª—Z–…ëšX–òÄ’ÜeACQ_…¸ç (Åyÿþ#I¥¢ª¹åòRŽ8âÜ)?ŽÆÒÄÁjQKRèýVSO½¸«f¡PÃNãñññœrqE¹ÙwŽuyã1åóùº‚RlBŠQÈw›¦º¯ÆClƒ `I-b»˜j;¾øâm¸îÀÿiëqIxf¥ÒO©¹±Çs±ÅÃü}öìù|˜{¢|i¦ /ù\÷ŽšÇ¥ÚŸäŠ“'wòtNÂUaôœñ‡ÍòéÅ=ÕZ}>Þ²£ߟxç•ËQ.‡l6 m¹?þñO¦¶ã`ºúbf&Ñv¬™ h;ÖÌ´×Uk2¥¦YŸNráÃðô+å.‚ [a©i`:íØ~ûí½X¦É’—³ä8€w0ž ™GìØÑǹçÂ×¾6z.v‰tk$LAUËÉsÕ¦M|÷µ¯ÛÆó<>pÜq°nÛ]— ÆsÝP/øÑÝwlÛÆ¿ a4ßòåËyýë_eYÜyç\uÕUœvÚi<ÿüóäóyÞ÷¾÷Õ}ÿûßÿþÛ--ã^{§œrʈÇ9™’b íD:0Ïóp‡t:]+¤¤X,Ö U÷Ý÷ź|‚PÿÃIÈYO!R©[8?ŒÍõÿé§0ìল jCtI޲)ùß„l6Û4Œ¶ÙvqD¼ ùžÉz¼ªIã¾mÛ¦R©ÏçÃï"Õj•b±H?¯|å+§êT·Ï‹ÄãNÁ÷ë71 ç´m%2ÅÃ9%ÿ[>¯¹üå_>ÌæÍkñ<5Ä­ñËe"¦Ô$AÇÑÛ…Ë›f”«Cn ¶ ÝÝwµ»™F£Ñh4šB„5â‰(Šh%’:E¦w2æ•aq #Z/ž¦IÓy<ÀKºH:Æ­‰ZåØc( cšó†VmJO xé4'~éK¸®ËªÁAŠƒƒtuuÕŠZFBÜŠ+B'™_ÿú×uÔQÜtÓM|á _”Vðº×½ŽÅ‹cÛö0n,4js•Y/Ä©˯Uv,…qÆP/8AÀÂ… GMP)‰Ù“IeèÙlŽb±ÞËfëÖ3 :mÊ„8逽ÓF>ö|‘JòÁñÒè8Úºñ$ùƒƒ«xòÉeá:)ØšÏçGlC§v˜ ¤ÂQ'›"õ»Jqé·%7¨kA*ó¤Ó‘Ç—eE.<ßþöeuHÕ±NÌ>âßÝ(¦wòsf³Ñ±ÇŸZÚ6,^¼³ÝÍÔh4F£ÑL#ñib©§¥2©ãD"Z<ÂDäŽU\“œÉŽ£¾c–:Í< {ª;w~fÒ¹Ñs90 »–ôŸl6KèU6Œš‘É$óvï†Z>÷ÿå_èI¥8ÿüóq‡›o¾˲H§Ó,^¼˜‡zˆd2É=÷Üþ}ûšì6=©ùmÜ1g.3jhªTéD‚@½â¥måG_¾|y¸L*yBýÓ„‘ D…êA&Ä´,e¯Rññþû?Ì…þ /{Ù¡áv"D†ªì(•l zÊáºj™ãDÞ3’Dþç?)=v.°‡OúŒÐó®TR êÕ81¯ V_\¼Œ#a†Boo/¹\ÔÇQBÜm·uqãgS­F!xñ› D¢M§VÒi'ž¸yÌéL/p Å,+ú ›¹»W*Qq‚l®¿¾‹;ºZ:˜LÛf †1½•©4F£Ñh4K&£ÆÇ"´Ùv4¿ŠÏçʱlý–Í¿Æ:f¶í¨˜ƒã¨šöníuHü¯ç˜Gr9Q‘ɲ`þücÃB‰£1ÌÉ¥TŠ\.M34@H¬\I%Æëé©E* åg?ûO=õT¨›œ~úé\sÍ5¼öµ¯åé§ŸæcûX»O÷¬%â¶lÙ¦M›¸üòËY¹r%}}}\yå• ÑÕÕE¡P`åÊ•ínoŽ=ip]·.ôñŒ3ÎדœcÍhã$P§§ÓQ§(Þ-o{Û]<ùäQìØÑ–—–Î6ŽD‹B!òò}å %ûsÝH¸ÊåÀ¶eÙ²»€³víã¤RËëÚ$¬„½IUœ±Ðê¤Óéð3ߞΠJ(”Ú…‚ED7 %2^Ïm·}ƒË.Û@¹üòù¨B¤a¨ý©Ücê¸ã%´Gê?Æ’#o¶óðÃo§···#BSEn|òæ8‘·\*=¥+• Ò«žø¬OÁ…eøEÍ[í¡,<õÆåÌÿé>| ?µiª›Sõ¿Wûß®ý틟ž¾¾>ººº¸þC¸ÿ±Ç¸ä´ÓhÿYÒh4F£Ñhê ‚hžã8Qê$ßWó&ßÒºˆÿˆi6Ÿ MÄiAæ[ùüäzk¦p€=%¨»CO¸É8`¨yÚK¹à‚1Iw·š€‹‘‰(P*ñá×¼†ó/½´®êi†ïûø¾ëº¼ä%/áüc¸Û „â›ïûÙ5[™000À•W^Éå—_&)Ìf³Ø¶Íå—_N¥R!›Íò­o}«Ýí Éå”-•ËP*)·«r¹Ür}ÏóèêZX·¬YβVÞf6fYpÞy±cÇ÷X±â^ O6u„¾íǶUç\(¨¿{{£äø¹œº.’Iõ²mX³æ8¶m;•K.$™¬J’N«ý$õ}©T_šºP÷i8ŽÚNryõöFÉCÓiõ• üœ6nÜŠë28-“rÛ’+~C ‚áÞvÍ~¯©Î¿×iŒ'$xº!Xnþ¥’²­×&ádžz¢c–Á@ý}_A¹\ÀÝ.Tsðé<¬ÈBÒ€‡ÝÊÅݯ ݲM”Èf o6¬-s€“x€¡M›0 Ïóøñ!‡ð®¿þk}”oîÙÃÆ3Ï$fª»÷$^pcŽ4¸òJ|ßÇ0 òù<~¡€ÞyüÖóxÝ~ÀŠ£Žâˆ#Ž ´lÓ‘Ö÷}r¹\h{¾ï3W—[ ;F£Ñh4š¹…ã8Üpùí¶#Z®“H¨yÌǤ€‚ÌÕdN%Ž¦Í‹l{ry“ã´ò®Ó´q:Øìûÿû=ÿ4s_.É»…R)ò’1 ÛÆÍ_$UÒ<À_üÅ_088ØrncÛ*GïûíËã&²Ù°¬Ì¢°ìƒúúúX½zu¸°R©°råʺÜp'œpCCCínoˆTp|à0 c˜7\3·Þ³Ï^0íí{l ñWáfÇÑèµjU–“O~°n™„ºÊ6"²åóQ§.!°­¼ú~ò“åaqA<÷LÇ߉»®;!Á,—S…ZšTT®»ðçjÅ”;îx˜¥K—ޏŽG ÊmJ8+Ô^cÅoò>”POmÒ@Ñ€7å`ðÕ4<] pp2`:àuƒåA·åŠk¥A-X°wØ2ƒz..zòj•D¦år™r¹Ì)K—òû‹/æþýûYø¡1Ec–:J¥‰D‚d2I>Ÿ+» •!\×ÅóÖ¬YS·â#<2ÁM=¹œêü*¸òÊ?mZâ¶Ñ.òù<7Üp쌴¯ZúØ|©T ÍKXÛ¶Ôd=߯¯tùä“ËøüçW00p>A `ñаk—1áN6›­oãÄöí 9é¤Ö•3} …ê ä%Œ¹µÏÚß‚W[–¨m“«½T´ Õ%ˆÂJ ŒòhûbÎ.^[»~$|"» e72xÈf›$^ñŠ«F=…BîînR©TXaGÜ  èëàMÓä¸ãŽcªå®\.G©T¢··—d2‰eY$“I’É$‰D‚\.ÄD°s]7L”Úßßaôôôà8Žä4F£Ñhf!ñ¬wß}=Ï>{QXd¯TRÿK†D"r|o¸d²µãÅ\ žp]—\.‡ëº$ …H‘íñÏÅ‹J( A@¡P —ËÕm?—xúé§§/rÆó¢¼o2/•Àu¹ñ׿¶º¸” ¨2ç*ÆÂÕLÓ$?B¢BÓ4ñ<×uÇŸbÉ%*)›"š”Š Ä4‘’(o·öY®¶~2¶L°,õb›ô¢&ÐùÚw¶<—D“ìfB_3 ±ï«MÊ—ürɘOÇÁ+W®dÓ¦M¸®¾âÂÖ¦M›Ø½{÷0q®]HÁ€šA©Tÿ>;Må§Kéï~ã@‰&rÓÈå”wÒÆ[yì±èé©/§=¤cï…˜ÏG'æx*¸¦ìÛ·¯©Ç¨ë8ê/ä'- Ä2 èGyÝÚDžqñ>­ˆê³LTÿ“©ígÕYµu²¨~ÉBõgpiíi^.•?SŒßCd°!7{È3AÊu]òù<•J…äj´/ܾßìÝK*•Âq<Ï#“Éàû~xÃÇ!—Ë…7wù;Þ.ÏóðžŒ~*™®ðÉÉ\´â-Çìû{9@J¥8åy&ªôK5Ù-¯Áÿþï<Þóž#‡-/Õ^⥪HÕ–Å!΋mc ú øé4©mu œ 7•¤³¦…ƒzò7–ŸÚ¶íŸlAÞÔÇS=èœãç»CCìó} …Bè5'¡¡†a„)˲0M3Ìe ¹$3™ Ùl6¼ù4+Ú"ˆÈ–N§C÷ít:Ý´Í–e‘ÏçI¥RÓ&ük4F£Ñh¦?Š·Voo/†aàºj,|úéÛy晽˜æBÒéúàí¨N:ž¼Äžçašf8>.  lÛ&‘HP.—ñr.— ½ ‚ •J…¹pÎí8½½½d2™plíº.Åb‘ (•JáøºX,†û´m›t:MOOO(ÞÉ÷ A°yóæ™=¡Óˆ…ÒhV¯Þ4uZï+£¡#ž£J„¸t,‹dmN”L&)•J¡×›eèïï«£Ž×u)d ˜ñ§äS*¢Ä°$jÒOrÞl7gË?—°ÓtìsÑÝÚ+_û,ƒšgPb_㾜ØwúDá®ÒöºóMMQEMÆ“±ŭõkù—~Ÿû=ûöìÓyœ'¬_¿>,ÖgýúõÃòŵÛ†®®ŸWbc4[<[ÂԥܬzžÍF×äwül{?¦év”èeš‘è#Upç:¾ûö-dõê£ë–çPýH…H¼Õ§4+`¡ú€~Ff¬… ¤rªx*Š(×”&+dKiìL&S†:vïÞÍÚË.cÛ©§ÒõàƒØë×óÈ7¿ÉÇà gŦ¿þk^÷™ÏðÞ‹/ûÙø 0•J‘J¥Âëk,m¸þSN‰Ž…á\¥®¸àvLe\F£Ñh4B’Í'“I‚ ¨ Ï“¹É '<˪Uò©O1lû©žOÅRAP÷PÙu]’É$™L†r¹L.—öm ÃÀ÷}\×¥···.÷²¤}‘mELK¥RضM*•¢\.cš&…B˲BÁl6K:Æó¼0 Q&“!Ncš&Ùl6Œ(±.N“L&Caϲ¬¦¢¡äkü̶m¾ýío·Û,¦„!rŽš7 | ÃxõÔì8“Q6É<âU"M+Pžxò{ ’~hù5E<Õl5wúÐýRÞaâ=RDåBÊ1Ü#d2ˆGœâšµK–›DžrÍŠ6$‰¥K¨Y¾¶ßbßâ0˜eäIum޼ë»xfë3c:´y£­Ð)á¨qV­z‹/þ…Bë$Íœl*•©o‹äŠšJ¤$ðTÐ߯n&óç? Ú¾ê&-ˆÿNJj-߇®®Û0ŒÈv3µÿE¸‘0]„¶… '“Ãû'wÌj°#îíÅbq\žp‚išª?˜ê>k¼†*ÂJˆCʶ1Q*•Â'cñPω`YÕÞ^ÌU«(…ç÷6Y7þp$î½&}¯\eê^â „7¹ç¸DA*À|ß~ï{<ú‰OGõÝœ$z$©ö|ìcÜë­ÜñoÿÆO>˜›«UμöZ~ºhãlœ¢ßM£Ñh4F3:>™Ëå0M“K.ù…ÂðtFž'Îf8çL¥RA@¥R ‹|%‰ q’ßC6mÛ¦·WhÅCͶíЭ§§'¿犥R Çq°m› ‚bår9㢌xJh¢mÛd³Ùq9‚LD\²,‹B¡ÐÒ‰eþüùSóC·™§Pb\.¿ýíîÉí¬TR¡¨é´š¬Å='ïH±IÉ[=®y—xŒ‰ƒLžÈã-lj¢%^eÔ$H–Mµþ2•Õ…MšO[1 œ0`Ë–-lÚ´‰«¯¾š®®®P”ëÔê%Oü§xŸôBwÝfˆ ïO¦:GÜlâf ¾ÝÝpñŧrì±?kws¦ÆüFúg’©.£.IÅõ}²¶mYg<ö¯9üpõu/hFü›ÓÄ1PçßnØÆFyÕIá‹÷­^ «W×m+^uñnEþ¾û¹ç8ûu¯cÙ°á±ÇÈårœ—L’Ïç¹ôþû§öDk4F£ÑhF$•JaYÿþïOñµ¯Á׿®\7Ò7J¥(µiÚa±/Ã0BALÆ´’‡Ø¶í°B>Ÿ'—Ë…aœ‚(H&“¡W˜ïûMóÇßKÈloooKñKÂJ+^¶Z_r,Kn<ý¦9æ¹>×=Ñ€s’ÊŽ^þòEÛ‰x¹n½·J:Ý4ŒL¼,ãŒY€“œjqñ­• &¯q1Ë%êæXuàéä`€­[·bÛ6ëÖ­cÍš5\}õÕ¸\fßé§ :¤ñ\ÈÓ•#n®w&3…iBo/üä'˹óÎ㦭"m»yÇ;Ž m&Cë÷ÙŽ”¶®T*ض=e×ÉòåËyÉ‚áû©èïEl‹c4üß s„ï?kɹòJNúú×±m;|âtTÎMF£Ñh4š¹Žã8adƳÏA>¯"AâúS¨ÔxÎêl69_"p‰˜JqÇqÂÊ¡q$W›ëºažäñ¤7ÉMÂ\Çã¥fYVè-7SX–EùH¾ØR¶õÑÞÁé§/ÿr9¥âårÊ@Uœ´ú̲À¶é‘ÕpU 5N‡¢p]…P¿á}j2#ULûÙ Ì@%/Oœ*L>ÜÈÁòG|bØ©!©×\³|{l¥g€©4êgýЇúxâ‰%3~s˜)öîÝ‹mÛ8DÅæ"r3˜T^„&HeX˜ïç8^r˜£|þʧžâµ‹‡ÅWtáF£Ñh4š™ç³Ÿý1ï{ßU@XX2Œž’w–¥R³Ä‡®òUþ‡pŠ'¨ÐÙl6 ltj‘Ïel,uSxßwNêy‰DBO§ 5 8òÈý;·â’ÙÞC˜‰>xÕŠW©……Ú ”ðâ*Œ/DS3åŒZ¬¡Óø§šÕîV(,˪«°ÓéL—°%.ÙSE"ÑM¡0±Ü³SO}‡¾øß(0wE8POç’S•h.Fü9Út qqѯéQ>Ûp>ŸŸ³6­Ñh4FÓ‰”J%|ßçþû?ÅWM>?<²aÐ4Wœų́+LÛŠ ÖÝÝM2™¬ ,ŪžÅÓÒÄ«VNRœa¼H;f",õ@Áv¬ßÛ~ÿe^úÒ,”à+5¸Eõxn¿¿p#oÞþf–å–!Õè.üî…p9QQI–=C°f1¡×××dž ê>l|¿~ýúv·——¼b[9šU«Vµ»)uÕa¦šé žŽPOÓ4ën4SÁYg-aïÞãæìÓ™ øÒ‰'Ö ˜‹ NûwŒæ‘6Y¤¢u+F“Õ¤´<0-¢¤F£Ñh4¦9AËå8÷Ü 1MƒóÎk]Œ¬Ri]ˆr$¡ªX,Á°y‹xËÅó·u³É¡d6±àø}ÜöÏ7ÑÕeïww¸×€>TŸ$JÙ“]$Ÿ¹˜ÅÎb>üò³òÕ+9x×Á*±u¥¶˜«lW Ók6Ìèêꢯ¯¾¾¾ðƒ5kÖÔ½ïÞëCµ œvÔiínδ2[D(içTß\V­ÊòØcçÒÓ£òÆÍ<>ò¯1˜Ö",š)b²V­Ÿ0j4F£Ñ´‡7¾ñWÌŸ/Ë—w±bÅÈEû&3•i6o3M3âây‚5sŸî¿ØMÿ?ú”JÞ¸òrƒ?ìרÑD±D”˨¶ìhïh޾ähVlZÁâÏ/®÷tó¨ïÖö1—ðf)óÖ­[ǺuëÚÝ–Qù¥Oš@®àŠ×5Í(§¡0›RŽMu›iša™ìé`ª…¸… ·3oÞnþò/§­Ém!`ï‹ÖhÏ`F£Ñh4fð}ø‹¿ØÄ 'ÜÏÆg²dÉÌ·!N“H$(•JaÞdÍÜGâÏÒé4…Baô9²D–¥Óp¿ —%ëEŒ<Ê+Î ØDàÝ=°›Ý?ÝM×]êŠÿJƒÑ“^kÚB]ޏ¡¡!6mÚD__»wïfåÊ•¬\¹²cü}'=ÏŠ?üg3º²Ü¬¯é)È\gº…¸éàÐCŸb.Þ³ö._®½á4F£Ñh4šiࢋc÷îýlÙrUÛÚ`š&•J… ð}¿#CS5SOÜqÑœüçÐßß?úž® fZ fµpáx^ÁF-—ËQ©T¸í¥·anÒEMˆK0<äÊtrGVMíëëã /dÓ¦Mœp ¬\¹’¡¡!®½öZ®¼òJ†††ÚÝVþpò œmúNÎÞ!zâ¨è¤î¾?»<GãË›bÞž=ín†F£Ñh4F3çøøÇÂC=ÄøŠv7Ó4±,Kç >À¨>ûlXÈcTýésùÒ—i4M[)ë¸ò½'“pO¡® Bc(³„8ˆÍè•ä4ÃÁ›6mbåÊ•\~ùåÃVX¼x1…B×uhkcwþÇvìèšðöŽÓÖæÏY¦óFgYÉÀ²esCŒ{lÁ–T§¿š¨F£Ñh4Fs ðÕ¯nç{ßû-½½—^º²ÝÍÑ øÀ½{G_QdŠEå·£? A ‚€R©T—VÊ÷}‚ ÀóN¤ã‘‡ Õêðⳑ§æÍãÈÝ»ÛÝ F£Ñh4fÎðo\Å?üîiIïã  Oˆ’ñûµå…ØûéÊÆíÆ¾Wbß=ü&ïgWñÎÆ¶m;uä•\‰š—‰=»8ŽC¸®‹ïûäóyLÓÄ4ÍŽÉ߯™óƺb»sÄùÀ¡O?ͪU«DuFã ž‹‰ÿÇ˸ÜdÛ°?A’ššæÜ*²1ÿoþÈš½ø?ínŠF£Ñh4F3«)À0JìÙóïÿ'´U\ÒGÍ1]”Ð%Ž2ç4€\l¹]û;Q{ïÇÖ5ö“&,z (a¯„*ŠiÖ¶µ€L­ ñB—µºšá~dß^íe Òƒåˆ¢Kµå^l}io¥I»ûkëSÛWû®Æù¶S¡öfl;:{–ß;òJA ÂQ C©¹ Å’Éd˜W°±òªaZˆ›#ÌX³f ncBµ}}}£zÍ͵^O:«‘(•TX£U—ô°þ½¦³QI.#5‘€ÓNkw«&ÇsÏ=§;OF£Ñh4f Èåà´ÓîáoüvËu<”È%^lní}û ” $¢–{5"ÛM¾'ƒ¼\@‚±|”¸â µÏK±ý‰ø•¯µSÈÆ¾¯@$²õ zní%˜µuÅŸ!ÞÌÚÿ~­rŒÝµuÒµï.‰‚Ô–DbµýÈ9(ÖÖ-¡Ä;³áx}†ÿœsˆ>±ÿþ‘=3ã¢Ä'¾ï“Õ!}s–y¶m³iÓ&®½öZÖ¯_ÏâŋölÙ† ¸üòËë–·ƒ]žÁqIŸã]ˆó}0‡•ú…Z‘M‡“Íf‡ Vs!GÜ£"\º­ÝÍÐh4F£Ñhf5’fëOÿôj,XPÿ‘ð&b’’ÊDa½øÞVÔ­û³ˆ¼Ú@ Y"Tµ Øj%¿ˆà%¤‰„¾dÃvIês÷÷×Ö –ûÔ‡¨š±c¯—Èk/~~“µ¿µ¶ôEZzµöºµÏû™Ûb\x~Ve1ŒD‡TLuòš(½‰D‚J¥þ¯ ÌMæATáÚk¯åµ¯}-kÖ¬à‘Ga``€Ë/¿œõë×·µ¡ÿR‚ƒÚÅÙg7æ8v_¼ÏZšUíí…?û³sÚÝ´ pÈÏ·»F£Ñh4ͬÇu}ººúøä'Ïç)C‰k *žnI”ðÖÈLg¾‘0Q‹z/¼‰Ð¸m–ú°ØVëAs!̬í#Ãpg›HxŒïÏŠ½·€x):#3@ªv¼Y”× -Õ¾ke {®½{}HØ-cz‹E«šÏçqG{ÅÍQÂq]]]lܸ‘¾¾¾0LÕ¶mÖ¬YCW×Ä+•Nwù`.¿ÓÌj!îÅó ¿U»›1a¶ ?ðè´åÕÓh4F£Ñh¤²ä>²”È“& á,Nn÷SŽIÒé2µžaÍÅÆ‰´/îõ6™öPk“« KÔ禓0Ü pΊSxVf0þ{Y{ÿEüç΢B³ íï¤È¢æísEv2PéßGÈiî3Ì}Sò¤ÓiOüaÀÀÀ<òH»ÛÒñãIˆó©÷v«p¶¹*{À‘yõ¢„ê=TŒº$Žìä§“Ŷ;Ýa{nã{7ïçä“—´»)F£Ñh4ͬÁC‰p¼e?‡¾ðÊåNͰ¦Q…ÕÙŽxõ9N !ίý“®g†¡múdÀ¦M›¸á†ÚÝ––ø@£|÷`sQ‚Z%ºµ úmõ¹[[×"Ü‚Øvâ57S®¸šƒž{ŽÓNjw34F£Ñh4šYA©özYþ¸àÛ¼ûSgh!MÇ!Õfÿû'yùAÏ)o rìÃs\XëÀÕ»þHîÀcÀúõëY¿~}»ÛÒ x0ö>n¦âÕV&Ñ ÀõÁuAÁàŽ*Kfl=¹>ÊD\4š©Æž}öÙ1ç9Ôh4F£Ñhd2µÿßÀ·>Ï!‡|†tZÏÖ4‡_ûÿÉ'Ÿdé‘ÕáIé_åÁ™ÃÅ6×u1 C q ³&aÕ¶m§†O?âf*!¥ò†iF"œº8°ÙÞgî’ÍfuòÇ6Oýfâ4F£Ñh4šQPMEà±,Xp§Nµ£éXÄ#àeϽ¬>|ÏN 6ô<µB:Öó󔃶lÙ–-[ÚÝ–Y¶ì®a"†‡ê ›uÉq!ŽÚzo«0"I”i©¶~UÐÁenÏçµÔFŽ<óëúi‡F£Ñh4F3 âIü}>A6;ÛÊ2h$ à³ðEÛ뽄\ÀóꄸL&3ÞÝkælݺ•­[·† 7lØÀ•W^Ùî¶‘Wçòå?©[.yáZ¥5 UµDÈ3zElm~mÿfm»\í»µW®ö*µûähf ðÒÓ~Üîfh4F£Ñh48[ÈÜÍórüÛ¿}Tç†Ót,’k~ûö…<ôªGé°”ªìûP³_ÏóH§Ó$‰v7]Ó&æMÕŽ†††¸ñÆèêêbݺuuŸmÚ´‰Ý»w³zõjÖ¬Y3æýz(u™®Û€…Ëm”hÖª;ö<•#.ŸWï³@.½o…ˆoqzQÂ[¾öyu¡e€4Jœ3Q×Y±¶nº¶ŽWû,ÞNúÒSuò5SÆtÙñþöÿöÈvžæ¡•OƆ5š™dºúbf&Ñv¬™+Ìô¸BòÜKÊÍ›§\Öa©šÉ1v ìõÍo¾¯þƒÏ»ð`^yÙ¦ŠÊùÑÄ ÍœeJrÄ q饗°råJ\×­sÎår °råJr¹®;¾`Ïãøå/Ÿ¨[V¡µÊóÓ÷¡Pˆ–y&ÖŒÚßI"Ó% š(Q.@‰q^í}£ç\*¶ÌA sNmÝÆu²½iø;ŽK$ºko½ñ3vìyðÐC—´û5#Ùñdûbf&˜î1…F3h;ÖÌfz\‘¡~ž•˼îugµû4hf93edžáðÞo¼WMÆsÀ-¼% årlƒ Èår:mÔÌ”xÄõõõ±xñâ°òêêÕ«yík_~öÈ#°qãF RœÇ“ls› †1¾œ†Å"ôô@6«¼ã¦ uÃH¢Ä0 èA‰kIêŵxÉC—Œ-{Ê9µÿEˆsjßUª­Ÿ$*\!×z:¶žº™‰÷`¼ ñ„’cÁ§¾HF#qÏ?YWtO«v\Æ8¿s¦™N;î>æ)ž]vpn»S3ÇieÇSÑk43Át)4š™@Û±f®0“ã 5ˆ×E=ãŒïrþù¯n÷iÐÌr¦ÓŽÅÎuaß¾…úâCÕdØžLÂçê÷%•R|ß×9ÄPB¸¾¾>6lØÀ† Bƒ”÷òjÅ 'œÀÕW_¾ß½{wø÷–-[X¹reø~Íš5ã. ±/PÅÆ‹a(ϸÉxÂF‰zÏ<›H˜+ ¼çâ_/Å%$ÄU„3Ƀࢮ٠¶<.ÊkÛµå¢0×ÊKÐA‰]ùZÛòµõ—yËõÔÖ?ˆÈ{î Úÿ)¢Ráݱ—­(ÕÞS[–ªµAö™¨ý-^™Úñ§ˆ„Åfx±ã¡±ñ<´ýT0v|È ÏrÁ;¦ù4šÖv<}±F3L÷˜B£™ ´kæ 39®hV„ï‘G>¯Ejͤ™N;–üp® ;vtÑ÷¡>eÈðæá¶+Sµw`3TŒt__}}}á'œpBÝû‘èêꢫ« €²Ù,—_~9 Œü„N¨[w4üq6lØÀºu뺺83 ž3~!TåÔTJyÇM5­=Å$|Õ¨}ž¡–§忠ĥ2‘èd}Jµ £öy–Hì±Nž™@üØcµõœÚºò¾@ä]WŠýOm’gR*ÉŠXæÅÚí£„Á vN$7žxÝÉ~©íCDÅ8 "RŽß­µ¯¸ö¹çX¸};§=üðÔÿ5¦ÚŽ~øa6lØÀÊõë9rÿþik·fö000À¦M›xüñǧí;ZÙñDlxçÎxžÇ† Â'ˆÍ–-[¸õÖ[§mÿSÝ?úè£Üzë­ ÔåèÒh6lØ€çyÓ2š®±±Î'§‰#㊇Û0>žˆC4>n6®ð¨Ï¡}É%Û0 Ci˜ãlÙ²…­[·Îºññ¦M›êƦ ;vü8Y-h¦,C«ÃRç›6mâ—¿üå˜×Ÿ°nݺIN%ùá¦M›¸úê«'õäâˆ#Ž`õêÕ,Z´ضmÛ„IÉdä7Ó}¸\Ze¢Š«’kN¼Ö ¾8„x½At#JÆ>ßœÄëNÂBGCBdå4ˆ`f >i_³}‰wm@äÅ'*dŸ"®åˆJöÒDJµï‹´EnÀ²ß$JÄ\÷üóülñb/^<]?0µv¼xñbV¯^Ínà×,`sßÚim»¦óY´h«W¯¦··wò;fv<’Ws+?üpŽ;î8V¯^=emËår:1í,ç„N`ÅŠ LÛwLe_¼hÑ"ºººêžxk4 B“}ôQ?üðiÙÿtŒãFFÆ›7ožÖqDããf¸Dó Çþð.þë¿Æ—žH3û~m¶Çé4Üö){ÀŽ<â’ãÞ½f–²råJvïÞ=æñqËb [¶lö‰+¯¼’¡¡!¾ùÍoÖ 4V®\YçY'ñÙ#qÄG°fÍšp½».9jÂOB C½2¥RO5’ûl4*(An´kQ¼ìüQ÷‰WcÑÒm"ñ¬îüyîÉzPN QÎ7ñrk¶¯x»Üؾ+Ô猳[ŸIäå,`ýüù¼°d K–,ÃÑNœ©´ã%K–Ô=µ>öØé›´ŽJaò»ÐLžÅ‹³fÍŽ8âˆiýžfv<>üðÃ9þøãÇì}áû>©TªnY<ám©T¢Tª/%ã8Ót®™jºººX±b‹-š¶ï˜Ê¾xÑ¢E¬X±B qša¬Y³†ã?~Ú„¸éÕëHs` ãŠvŒ'bÇ0||,ÔÏ-6lèãÌ3wê°Ô€®®®Y9>–q…Ìqüã!V,_§P*Õ{ÐÄX¶lÙÌœXÍŒ²råÊqÃb [¶láÚk¯åꫯfÍš5\yå•aŒôâÅ‹±m»ådlÓ¦M,^¼¸.î:Þ ¾¾>†††X¼x1®ëŽ»C}èÞÃ0ß5qÍ4UÁ†tz»h¾_Ô…7Vߎ±:ŸD•XG£—áùÔZµUòÐű‰Š>ÄOˆo/7HyÅÃXãHØk\t˱‹—\\ˆ´cça,Ç9¦ËŽ]à gŸæÖ@¥œ:¨“©‘æ4­ìx*úâ‘Èå`Û¶ƒ ‚èJýÖ·~Í_üÅm|èCCÜxãŸ@®è\.G:&•JQ­VÛ}Ú4Ät)4…ïû†ëº˜¦†êÈryžçáûj„àº.Éd2\×ó< ÃÀ²¬ºåòY2™Äó¼PüÏf³Ój;ÚŽ53Au÷Ýé`¦Æqç‚ €»îZÆwèÐ=ÍÔ0v,é™>øÁ½|à%§Á=%¸ÓQž@MƸƒƒƒí>š`¨8é\.Ǻuëêžoܸ‘¡¡!r¹ÜˆO’زe˰8gÏóèêêâ²Ë.ãÒK/eÍš5¸®ËM7Ý4æúÀ¢£vM:ÇtDBMWzE£Æ‚TGË>û›¬Û ª#ivK·jû±ˆrljÀÇŽ­× áB£¼ouìÓÎr:í¸úè£,:aú¼GFD’JµŽõ?¼”ú™N$± ¨¸íÂ'JBwCŒdÇ“±á‘ÈdT €N¸õë£á£}O?=À¾°’—¾ÔÒ rë­wP(8¸®ËâůÅqœpžÉd(NGROͬa:ûbÍÔÁ°¨… Èd2d³YÇ!™LbA„Õâ\×%N“ɨQò¹ˆh"˜õööb¥R Çq ’iš8ŽŠl–e…}ˆçyaî7óÇÁqŠÅ"AP*•¦UüÒv¬ •åú‰‹ÎŽãN§q]Çq‚€r¹ ¨k#“ÉP©TÈd2AÀªU«¦­31®âsò Ÿüä>=ôV,+3¡ýi4L·¨üŸçì;¼~Ržžp<ÍÜ¡Z­V¯»îºêßÿýßWã¬Zµ*ü{×®]Õ?û³?«îÚµ«:Qyä‘êæÍ›Ç´÷½ï}áßvµZ]¶Ì«Nø»§“Ê4ì³·Z­ÛxLÙØk4ìÚzc9ví•oØw¥¶<þ>]­V­&ÛÇm£LÄŽ³ÕjuùÊGª7n™þöV«ÕròþØßƒUõÈr³:¶z²d«êG5cßm×Ú;Öã’ã(W«ÕdubŸ]UW¶¤´!;7• ӎÇcÛ7o®^wÝu£®Õj¹\­~ðƒ×TÁ¬–ËÕêºu߯ž}ö;ª¶mW¯ùà5ÕÁV«½½ÕêYÆÎê™g~¿úÊWþO5ŸÏW/þc¬ªeYUÛ¶«@5NWMÓ¬&“Éjoo½‘4¾×´‡±ÚÆt1;¾îºëª›7on[[;‘ÁÁÁj¥R©VËårµ¿¿¿îÚ¬f³êæ_ÞÛÛ[µm»šN§«¶mWûûû«@5™LV-˪š¦YÍf³U˲ªÉd² „ïmÛ·M&“á+›ÍÖñd?ÕjµšÍf«•JeØß‚aÕd2Y·,›ÍVº}öööV éZ­†ßÑNû˜èØXÓ9‹Åj>Ÿ¯³­|>_-—Õ`,™LVÓét5ŸÏWóù|5NW³Ùlx}†Q÷*‹Ã®¹fòy5H±m»Z,«¦iVmÛ®–ËåêYgÕ¶s0;®V›Û²Y­V©ªajµZ­{ì#Õ~ðš¶“¦=ÌÖñq¶Z­*ÕªaTªû.¿¯Z=Íj¹mooo]¡™[Œg|<T,t£Ç[< uñâÅ¡ÛæD+5Å+•L„N­–3ÏR¥˜C;qhÖ>a¤pŒäkŸŸ€:ŽƒeYºdü,g*íx. ž5b×¹\€|>†­ù¾O¡P ›Íâºn˜¿QB:MÓ$‚ÐËFÖ ‚€þþþp¿étš Âý‰«eYxžzšåóy<Ïò,zzzð}?ܨÐÒD"A¹\6Æ“ã°, Çq0 ÏóÂJs‚´»Ù²ø>-Ë"‚ºu-Ë¢P(L[~¸± í¸óð¾þõç¹ðÂûùú×ÿ–^XÂòåŸbÛ¶Eôöfyökfyiyd¯&<ü‘;¸äÃ}\ô¹wÂÇàß—®À-«ý5V˲m;œ¸€,Ë '1@]ø™ïûär9<Ï Cà$ÄGÈåráþâ“*ÉGåy^8!ŠOÄâ!u2Ñ¢ •äë1M³®mS…ä¸Ò•g Ć þd¡PÀ²,lÛÅ/Çqèíí¥T*ÕåRqZ&íñ‚*†a„¡Ÿ†aÉdB! ¿¿? 1õ}Ó4ñ}¿.ÿZ¡P`pp0l›išd2™ðúQB„…8ñm☦Y'Ä …0ü®™`×f$!ª4 ðÅb‘žžLÓÔbØ@/±±9¹DnÕwú¾ÚºÜâ×RO¡P N“H$H$X–E¹\&—ËaÛvøŠ˜å>˜Éd( áµ'mlÜ®™­Ï6ÔN†RA°rå;0Œé­ ©ÑL°Xºt)<äÀÇ[Û®Œ/5šy ’nÚ´‰õë×7]©¯¯¯©×ÜLpš[ûéË} ©g›Í’L&) ¡ø×LlI§Óć\.G_ßád³õ¢„ï«.$Àó >Æ“Ï úܲÔÿ†¡þ–fÊ>T[Úmisñk¶\D[¿$šˆ A„â´Ø©T áb± Ñýýý¡à&"\6› 2Ñâ¢(-•J‹ÅPœ1Z„p¡™ç™,kœdÈ~âÄEºÆåÒ&Ó4éíí µË—5›ä4g¶þö·¿ÕB\‡/ÐÑ(¼Êµãû>®ë†HäÚ‘÷B<ïšf®ëÖ [q‰Åb±ÎF:è ²Ù,¥R‰t:=Ì#³²ŽìGþïÃ9¦øw&“Ir¹\xl" Îv\ꟓÆsAj4³…£0žyÞd8€jŒÜиÌX·n®ë’ÍfùÀ>P7 q]—k¯½–Ë/¿|L%©§’XlmGMÐLq‹fa¢tñyIRÖV·Ùfú… üÛÞÐîÓ3!ÄËÈß>P€ë]xIï.µ.Ü+PÊùæ^\JØHyr5b3r(fœ$JPò‰¼ìQOSDžM>ê‡/Öþ10S,‚ÚX„8«ÉÿÍ<ÍÓFR¼àD‘Aа''ç}" ÇÅ:)áÕ0/ ‡V‹Zçðgf§ ,À“>œpú>ú>úJö¾KMÜ1àÏ~œG?²œÌ3Þ‰[`ðaB{0ln@‰·¹è\õ!0eàeÀ¬={q>¥_(qȶÁû±Åe—íãØcç“N«"7Ýt8}}.ûöÅ%—œ†ë®%—ƒJ,Ç⦗_Æ«ÿ|>;v<Ã௪|{ë®8í ²öQøŸ…ÿüþ¾ Ì*üääOqÚþCXpôïù¿?Ç®GH>kààÿ¬UµÀ¼åð¿ÿ{5<0ÈŠ¿á¤“nN¦\VW©¤^Å¢zÿ™Ï|ÿøï†¢Üý÷_ȲekI&Æ0 Ö¯ßþ}§³ÿ~r9“O>™SO=߇þ~5Q]´èVN?}wÞ©Ô³|`>;vtaÛJÌ\¾|®{ù<Ü}÷.Þóž%ض*ZjL&ë|_ÓéÚóR)Õf©6ÞÝ­Ö§ÅË/)¯ýöv›â¬¡P(L&CÏ󤧧'Ê€0NDdñ¤/ÐF¶mz{{Â"®abwñ˜Ëf³u^hR$%Žˆ`!œò²OPC£‡Ž|oã~%œ®‘f…Yš‰¦i62Æ"ŒÍD ˲øío;å¿·f|AÚ¹üN…Bþþ~J¥¥R‰  ½ÖD¬«T*uû‰ v“…B¡îZÑOļFñG®‡VâîHLÖk¹™m§Óizz¢s‰xñéOŸÁ­·Î~O?ÍÅÏÞǾ¥ƒP.¸žN¢æÊ·qãFr¹\p+W®dñâÅôõõJ¨kå-7ˆ^1Ö ¢š©c¼·ø±t)ñÜwy”CU+&ÙdŸ6ð†ë¯‡O}ªÝ§g\Àïï*`Û« x» ÿ¯[ð§i8´æZö霔ŒNŠçA.§Ü…@ÍâOhÿ_,ÐX¸ˆ§]ªöõ#ˆ—(£ðjËâaœjÐZ½©Ð\H,3:qïµÆ}ÅÅ9ŸHPœjâ^{r¾{jÿKµZ‡È{ц?<ò‡ihÈÌáƒ&üÓùìÙË¢°™ÃÛ¯;†´w ¥3«C!gBÒ¬™‡¸cI |¶v~2`Ôæù*áofÌo‚ù#ðl°+à-ŸËJB>yßvžÜk°{÷‹Øÿñ—QüXŸ¨5ÖŸdç×:µù€Á ²¬ç(Ì0ø9B±ÖÌÀ7‡@¾cîÅÙ²€Oœ<Ó„òŠÚoéùŒ·†ÒÑ`­UûÁ½i ìÓÀ;4›mÛT:5Á{;¦ ¶ýJúú"48–žžl;ze¨|@Q(ü5™LäIõüó‹Ù¼ùzèÏùÅ/ÞÂg¤éîvÂï‹\N yŽãP.—) ¡G¡mÛ ŽcÖr¹8Ž‹ã$©TÒ¤ÓjÝDB…irÊ)}ô9ÀEí6ÇŽGB¦%Ÿšˆ ’‡J<:MÓ¤§§'ôæDHhœ˜tÐA¡‡œ\ƒÉdr˜à%¡šOß“Éä° mÛþDz,R©Ô°08˲š iÍ<þDðh¤Ý‚‚aœxâ‰mmÃ\F<ÉDLó}ŸJ¥>ˆe"À%“ÉÐJD'É7èº.…‚ЧA¸Ur«ÏD —j»’§-‚¦ë‹×n;·§R©Ì‰pT¡ñ¡ü­·>ÅÐÐCXÖ%ínšF3.Î?38$JeÓ€ä]Õ©E4Â<ùCÄ8)í;00Àe—]Šríb— ç·ûp&v¥Þ U¸Òç(•JT*J¥½½½aRý8Éd’T*UWÄs"I.£r¹Ìò忪åPyéDøùÜç>ÇÂ… ÛmŠm'”w¡ïGÞ‚¢5üÙŸ}‡~(&H^·B¡þVâV£çM«ð³f<­<Äó/-=|EúfãSü\Ny‚ …‚ºµÊg§ŸþNÎ:ëáv7sN"9ׂ  T*ÕМk¶m×yµÅómf³Ùº°ýT*5)!Ê4Ͱ`Htww­¯…ry,Og–¹$ÂA¬ <þøò¾÷ù€â4³yüÕµ£«¥‘.¦id^ã‚®®.Ö­[×îvj~³ïá§Ø»÷¸v7EÓ)b9âóϹœ Âü¥I VxPpàäAðÈk«Ù жիTR³<ljâÄ|_ t•ÊÔ$r’x5Û®ßßhBÞhãAÉe—FG—Ï!nJ(Ï4÷ŸOäÌü€böm|c;$|’ü–Ûàío'{4ä}®¼Û0vîdÙ~¤vàûpˆ÷Öl.oB®fÏRˆãþئ¼ÊDƒ›ªó¨M°Ü$«Wq÷³ËxÉé ÔBΊ(aLSVeDN܆}”X«6|Òµ'©ì»Ní»º.,gjßQ$ ï–‚#ľ£æ%iåá”§Náw¿û=ÿû¿k”j\;ÉÇÚä€íÙ°þ'ù?jY.¶®ƒ¶O~zöO1_ª<8Šf1 ®øÕª*WÓUõ½.QAŠb,y»ºvð7õ8/yø Ø¥sÂózí®kùþáßo³%ίf{==Ѭ%î1ׄx>”axžš½d2ê½e©—ˆ%ɸ8 «Ò7«ÎÄvJ{õ½÷ò‡‚ýÕCÈÍçµSô}lQ+LS)(m' ¸{ö€çQظ‘ôêÕ‘¦eÛõ³äý>\“QnFÙÜcÂeœk€›V;ód§pË]2yÀ¥Ã=¸´¶O)^J4«y½a£D2ÅËSÜrã1õÉÚßbÓÙØ:Ô¾3ž?P¾KÚ䨶.Þ¿˜E,bÞs{ë½';Óf×O¼Ï¨í^0“g_ô,]ÙZŽ×‘'h)võž¯R8K½ X©}v;l®É/Z ói¸ÿ÷óøÖK¼ð<•;¯¿_u¹A\ÐG_ßøõ¯,kÇø‡]tw¯âüó?Îi§ýŽïÿoxâ õY6«.‹ßüæ8^üâÿ&‘P¹øDˆÏÓr•K]jãí.k&ÃK.^ÀC–9N”¡\.S*Õ‹h¶­Ž§PPçBr ‚ZæyáåO*¥Â¸Ux´Zfšê–NG^kŽ3<_¡‘gOÒî|^­ë8µ|r©ÛÑñ‹ê%mŠßN“Iõz×»~Í¢EËÚdU³ ÏóÂlRø@<Ü Ã VkÊå2®ë†¡Ëñ܃MÂ߬àÇdI§Óa’¹æi6[ˆn¡¾?øÁ:Êeý[hf&àÿz>—¿åÑ–Q@™L¦i:ÍËÁínÀHˆ›g8!?PT›™ÎõŸuȃ§Ÿ¾N[¦f 岚œ3âaXŸtZí+™„l–gÎ=7ž\ŠD"V•t]ÇqÂêzÃð<µŸÁA5ËÌ磙a#©”š5©`W¿K/|Ås" £“¤Ê"¼IµI Ù’§í²¾ëºá6ã¥YBqˆD¿L&>—s$m‘í35‘Ò÷ýð8$· û†‹Ò¬ŽÞ ¿~ÇOylíZ%¤!»ã`¬7¿¹¥*P’GA!™¤´z5 ”sZÃs Š0W;×<àÂ'€×J¼*×^iîIpê¡wóÿq¤Z·¼Â‡OÔlÕMA)AF½7\5P*£Ä§¢§>óRPJ€[§‡,SžyBbvÊ<¯&†_7R$$aèÐ!Ž»í8ì[0ñ#G$,Öª®¸u6¶ò²‹W0.ñ‰D9IÎÓJ0·áå/޾‡ó«ÕwDÂÞY!•fEHóò6 Ãà¶Û>ŵ׾›Db¯xÅ©a€K/ÏæÍkCaÔsÉDà8êý²eêÈå"O¯8™ŒZ79ù[Âf‰Èé9ó¨L¥¢cëé‰Ö•u¯–—Ë‘›8¿˜¦Z.:¡npp¸G[\¬‹/oöü*›mÞ5uÖƒí3®YB©T"•JÑÓÓC¡P ‘HËåH§Ó$“I²Ù,•J…|>O6› «úš¦9¬Zm§¡¸ö¿[þ¿ÿ÷ +VÖî&i4â#ç|’ÄæûZ~®ûM#íç/üËböï¯=áO1zÒwͬAOæ²çûþøÇüñ8x!ˆÜ Æ1‘•dÇbRâ¹üÓ¿ü ºñFv\p¾ïS.—™ã®ËªŸý »§‡=o;7wo>þx¾vÆÜøë_ÓÛÛËõwßÍŸùŒÚ±Äï´ºa”Ëàû ]}5ó7l`ûé§ó'[¶ðä²e¼ðÖ·ò•C!™Lâºn˜OꨃÆð<þê/ÿ<·v‡ÞqÞ㇠“¥¢_<‰©ä5’œE¾ï×UDó$ç¢i*EÂ4᯲ð½¬mH2%qrqÄ›Tâ+õ·”Åéí?ø¸sá,ÚÓŽ?<$|,d‰òÆÃe¥È¸QåÝFí„×ÅQïýׄ+îeÏžçŸßÉÊÛADzð-·lãæ›îºëxòÉ[ÂÄó¹\.L/ÉÝ£ê¥Ã÷iÛÊãúD¥yrÉ:r ˆZ2 wžG-Ÿzõ÷«ÿS)åH*æ Ê ã•e©v‰7]¥¢.‘b1zž¿ìdýx÷¿Ôe]y>cYõ&ÝêžlÎê\nrNÛšÖH1É „önšfhëØ¶MwwwÇ8 Ã0ªƚ™'>"êíäolw‹4š‰³äþ%-?›-}¢fæèh!à®»–ñŠWìf€š9E…(Ÿÿ\$»{/z¡æ©Þ—ˆ?¹\Žl6K.— EÏó¸jÛ6åùV(`¶msÏ÷¿ÏÀÊ•œÿÕ¯‚ïsD.ÇyçÇ‚3ÏämGÅ«kÕŸ8ë,õ;Aè*O³‘§à¥“Nbݹçò«;îà¼nà¿þë¿Hÿáœô'‚aQŽ×ÅÈd ™dm-îè Û†ï}´ëª™]:]? × ÓËRµš@(ë…žq¶Í«LL“…ÿñìÿÑÀuyÛÒ¥qË-ð“ŸÔ%\.—ËP*‘v5[Œ¹ˆ@¾–ʬU1ìZ°aÛµø,Ó4ën¬"è‰Ôt\ŽÇ7oféÒ¥ð±qôÐÐÌãà'yð¯ÿGÙßàªCކS|Øn¨Y|\ÜjÔ5HHIËÖêê,N\oF¢&¢­q8fœòõÂRÍ6êfëí4ŒúÏÅ^ĨqÝx5c¡R‰Ä¶\Nm—N+u$¼ðƒXÖÙín’F3.$Ïá<ó2æ¿úЦ눧ÑÄéh!Ζ>¹ŒËž¹LÉÍ’T,O”ˆÚ«ý?ÒüÏA]%ú©©¦ ô œ´û^X5¾Á…mòЇ:IraÛ¶É7xnMñ“ÉdWœo3¾ÑÊECMÅ 9&6Œyýñ~¯ï«mâÇ‘Nsë•Wò†é;{Ó†˜¬xú0^±ýîúììct¹o¬ež”WÜ2T‚q?7ÉfgF‘‹ÍHâªÜxø‡xѱø°=-NR¬¼cÜJޝY⡵£µQ2ìKlb©T/|ÇÔq’ê8êTäórZ=:¨‡d2 íPwzAA=ט}¤R©Pˆ“‚ Åbñ€˜8êÔöáÝë¿ò•‡8é¤ín’F3aN<ø±–ŸÍaÍÌÒÑBœ ,ØËòŸ,‡/ÕÚ Q®åF!ó/›(îÉ JLí%&rP`ô½W3<³k{¬Žª7Ž‚äS‰¿m Ã6P”|j0õÕEãßÕzÕvÒé1 >A'¹ªL>àßWêñ¢?üî¸cJ÷_AuÏ9&à7Sv1–ßTrεàÕ·QèþÎhGÛ`xÈïhDÉúáë˜TŠùK—Nõ´ PƒœNC¡à†Iè5š¹‚o–e…žVÍÛ\ç2ÒÌÑôÍuÎ9g_»›¤ÑL˜ÃžÛk‡ÏÉZ¥úÑh:ºjª<ûìcñìQ“…ê¹ó(.ª g×>+ D6áRD%y\¢ø§¹WÐPÓ¸ÀÞCáùSÞ6¦õ}ßgÙ²eA‡Eï²€IDAT@2™ Ã@Ñ94í`© öZèÞõ~Å+¦|ÿ’ÀGUgíAuáϬ8 Œ5pÑfùòŸÐµcG»dl”J*Ó¿x„–ËP©0`€{÷¶»uSN.F¿ãº.ÝÝÝ”J¥Ž®ú¨ÑL„R©D©T¢§§‡t:M?ÅbQ{ˆifŒø½ý¤“>Ã{ß{j»›¤ÑLˆ'}8ä y°z¸ÓB©TÂqœv7QÓt´··& ÍÿÉüÈ="‹J(”¤>¹˜][V¡®šÜªVqó7óØwS‚”꫎‘ãzM ^jzú©_º^®–·ipp°. U£é<€çàˆ§þÀÇÓ¼äQ!z¾ œ S(aÛ«-“ÿ%ó@§àyAËåð}Çq(Ôò¼•J%|ßgÑ¢ÿâ†¾Ëøï}çA“œs²Ÿf£}q›„ÅÆ¿§P(A˜{R¶iÌ{(Uª·òy\ß§T+°²ù¢‹ØuÔQí>ÝSŽëFªã8äóy¬X‘f.P(p'¬.Þž¦iê}šÃ'òˆkU…W£ét\”wÚ¡½‘ãPŒ|>¯phšÒÑBÜ<.üd(ª #MÓLÿ¢<' nùá-¼âï”çF.—ãžÓîQ¢žDâ8(9ù;‡òš“W/ö"¶Ü!ò´ÓŒ ?–h\&~2)lü¼Õv³ ã¤yäþ×ñ<ß÷1 C‚5Iñ³§Ùvëý ¬\9#ß™G=w±PÏYr(A®'ö H rÝA®—þdÿ~Îß³'ô°{ÑCñ§CC¬"ùÝïòÖ­[yÕw–|ýëüù=÷pÑý÷ó¡}ûøñÐoúÂøp_ï}òIþì{ßãUwÜÁ;~úSrÀQÿð¼uëVÞsóÍxÀ_|å+Ü8>§¿ùÍø¦ÉÑçÇ‹ÿ꯸{×.~tðÁ<|è¡ünÏ îÙ³‚?=À¶]»øðM7á8ŽòìéáîmÛp‡m»vá8™LP\|ß§P(„âž~¹\.|âê8Aày^¸m.— :ég…8YÖÓÓm_{(`šf8QÚ¸q#O<ñÄLšà´ÐÓ#u6”'œx&—GÔ \;Ùkf¾ïÓÝݦ¾Ða¨Äx‡¸>³¾ã‘GJ·ÞúüwínŽF3aVîÝ˽Kow34³ŒŽâàPÞ¾|…šQ‚x,[¶ Ã0´T•\¾b9Éd’×½õuôdz¢'ÿ>õÞqQH«„À:(— ˆ95Ë›Ì16x5ĽäsñŠ(•JA &kµ”¼‚€îîn š”¥R©Ð#—Ë ótõ婨çy¡GVOOAP(ÂI\OOOØÙW.— ·ñròÉòyÞsòÉ8É$‹$¿R¡Û4qòyÜl–pç'>ÁëO>™ÍŸþ4{®zFT,R0 r–Å3×]G0ŠEœd’à×&Þ¶m‡“p)úbšf(<–EºX E¾¸wíµ×rÔóˆËfUô­x•Ëåp®›"w»k¯žÚÿ.jHÐzN—#òÞôÑÏì4íÇóà _¸‘?ÿóÏ’Íf)‹áK{!‚7ÊçnìÕ4ºjÇ÷!ýåéP7ªc)5|.¤Ö•9‘Aä<0˹í¶tuuµ»Í„™w¨OêO–6ýLÏí4­èØb p˜í>­©×[#Žã`•J%«Òé4AN§)y%ÒVšJ¥‚aaØŸg+Wè À®åÞJyÌA½7^üÁaMˆ“$Œ’`_þÎçóø¾.qP¾;‘HÐßßOww7½½½¡'“Ú’Édè%¦iR©TTû=d2I? &V¦iR,CAT'N§ÃWoooÝúq/,9Gét:\^.—ëŽÑ²¬º‚–e…“·x…=iW2™ Ýrã•cä8lÛsðÄbK;e_W^yåŒÛâdyä^“cW<=â:â¡¢]—5ˆÌ- §l'Ë([Ó–ÚH.— ûMñ“¾¨\.S*•p]—ÞÞ^Õ­[G.—ãýk×âÙ6A Ïïešõ^ K—ªpÕÑGÃÑG«åk×oвeêU# °paËö盼_~ôŸófnÙµ‹³–,ùøQ·ÀÆâRH\¾Û?ø`Ë—c ¼}ÔíÊ?ƒúçM’*UÂ{©í߬-+™&ní½üÛ_ýâ‰8µï´æuìðaB†ª¯áû>ß<ì0‚š0*éfóµãK’¹o|Hà¡~¯$êš ˆ„83ö?µý¦©wh±bŸKøµE Îùõ©p—¨ˆ|³àåFçñ4•ï“à€$J ˆ#í§Ér©%6fľK†OR7Kì*¾,@‰í&ÔóÆcsи¬q]9?ñuÓ±ãJÆŽe®'€U ùë_ßÉa‡máÑG?mï™øÃŽgÄ ±n ~H1.ˆ K:¡u!Jñ5¿a}¹à¤T·C}ÙîxžƒÆ”ž%ÔÅ,gÜX{cëùµõ¤ê€¬ïÕ¾Û"RæóDùäš ù¤­Ò£á;…VËgÒguÔõœ~ú8=íãqªöÿT¦»ÑS:|±1ùu Ëϲ]‡ñ’Cš‚žÛiZÑÑ#é'¿ÿGîÛ¿hÔŽ5‘HP©TH$a‚ûÆ2Á²#B”xÊD@ÍE_¼µ,Ëâˆ_ÁÝ7Rp ¡˜gYV¸?×uCACK¥R˜"±JÖ3 ƒ\.G>Ÿ—I»¥}"æ5§L8+jŠ8–J¥(—Ëuš 7«Æ)ߟІëº8Ž SI¡P˜õUê<àìCo‡÷´®ØÓÓƒmÛäóy\Ó±d]µL¦0#ƒ ñª5M“B¡@±X$²Ùl]¿U,ÃþV<¶ûÍN£çîñÝ;VóÄi¿µðC«Û_ã‘™(Áø{¨ŠâŸePs@›ÈãK¤$‘ð—Þr≡P'Î/9î8¦¾\ÇÌã_ÊÁåYȶ‡¡íÛy].¦£…æÚC³ój¡ò6ÒLô‘y~¼À»yqANœl‚ØrÑ5¤¼ sEˆ´¡P;¹KûÔ o¥†}$©KÄDØØyñP¶éJDÂZ¼VVа®Ø¡ÔÙJé6ÃEÎø1ˆN“k²®S[¿B$\JCÜ(¨½ö_|1ÎïÏ\#à“ŸÜÇ~ðÏ>û\ý;ñýåŒ;ý¬$ãL£:Š"‘ $+"W\ É5¬ç]TAm]õÊÁeQž_Y¢5Ed¸êìe¸R+ïŰEÐÊÇ>—‹Ù‰µYŒÄ Æl"‘«Ùù•×bt1¬±-Xž¦È±T˜“Ô‰ò¾?þqE¼£Êýþ>‘@nåéÔx€’ ©‡z{{7¶‘NFrk>Ó£þ§jßåÀðB¤å©Ït.Ÿ65{ú2‹øõC‡òäQ;hòø¶cÇ¢šöÓÑBÜaK^`ÍšÝj4‘É4Oîº|µ« n^µ £æ!ÖH£`U(B/­L&C±X¤T*…á“âI'¡˜²ãy^è)f|ΣÞLH§Ó;°ÃŽÇtL ›ÏÖ`@­S{\kd Þ|ú›£§qñ'z.ÑãqÙÊcwAb¹@uÄ6agüùc?U ˆnðrsHPÓPyhÀÙÇÍêKVG£yÛD¿ вµ¿e”-7Ky*èFë^9t%‹_»X½‘cœ¥®? O†mŸÖ–ï7©ŸÛ¥‰æ£õ]~žæâo™7‡9¦-íŸ*Q/¥ šŒ73DbG£òJmýê ¢¦&P—ªíSöµ)¶Æ!½LÖE€Dpiu¤þ–÷%”íˆJ,¢ÄH'ZƲÓ=¿îŸü.:6]©¼žryœ'Ó#,ç-q/FqK6bÛ‰ø;íOÖiÕiCýÅg¢l[æ4ÅÚò­íSTI“ÈÓSܦ¥½ñã”ÿm†‡=˾ä†÷æì®í³X{snæ©)"t™Øy—ï‘Ïr ÛÆ¯ù=äz7bÇ$ÇœSû¾hÉEc6NÁv~ï1zšý•‡ÄºŸ¦-Ä-?{/—}¹ >\€F5Ùó —ã™®.Ž]¹’;n½•µÿßÿ޹œJ.ÏÓêÑ_<ù²i2ÙkEDØ3M³~ “†‡ïx˜{NTtl :éDsD7 é ¤³ÎEû’NÚ0 ξóìh€ãÍš TGØK4P…ú±´Ažø%¡ï¦>žþÄÓXXpÑT"ƒ¥ÆGÛ™ÚþâOBe¤ÀBw! ?³P}.u|Æ y2¤3÷ÎM}ÈB-¦jcfctì"üUà¢ÍM‰mÍ$O=÷¦Ã9t­ét$Ì쪮bÿ‘°zJöëû~X@<ÛÄsx¼OÇg“ëÿKNü-—=ðWÀ—ÚÝ”±ÎI„ÜïÏÖÇow³'L< ÓÛÎýgæß¸÷¼ÿýX "ܘ(•ÀÊÖ—’‡i+=õùiI¸À‚çkï“IkYêá&¾\€Ã 8†c=Ìj÷°, _­­_BÝ7°À•Àq&<¼Â߇IXoEã…ÜíÁaÆSƒO¥á‚Îôá6îµ!a+1D\ôöúêµx ØÀ™ž:ŽÞkõVôp-¶à¡,1á)^b©²ræá°dô€ðê|¾@y™ ‡À«‡€}:_Åb4¡´ÛJðžà Cý‡Yr¡Ë†ó+Ñäòë=doÉç_Ž}ÇG •†;|Xj“>gu){ÞÚn‹œ<¢-Ø5“t]8üð3øÙÏzÃ1†ï+Ó1db/[)8µßQÆ.jl&^q2\i–¡ >”±ˆ„¤^¢I¹0Xû<.8ÖMõóX¤xÛháf‹c׌ ™jüâ±´–bLH¾‘¼ÛF ¦‘¹Y<Ï€ˆÈâN+bÞX½K¨ë¤B½ˆ,}h‘H„k{Dó!™Kɼ "×g ÍŽ‡dÛDs'ˆÜŠ}¢|ñã®6¼ŸÌ[Å1¢i ‹DNÃy–ù_œ˜ƒ ®­¸àNì{\`n÷í¼7ŒñÄwWîü=Ë.®[¯X¯Ñ4ÒÑBÜS˜Ç±ýÇÁCÜÐðXȲðòyR©ÅK/Ån atÝú-¨¯ã¨A¯iB‹0Ô±ºzž‡ó=‡|9_Ÿ/@\™å !4ÌÐÄ;`!ËpaK?ž4¨‘ØÌjå¥+y`÷p Í1²/élŒËä"Þiw#ñí¥ r3ŠwÂFÃöñÐI#í©¹~ï\¼sL¿M'Qxîo9¯…ÎV*•°,K»-k:Xû¹ã¸7MxR¥S&†a„$¬t6 jeõ²»øëç~F' qr›ÞéÁ©Ü{焹Q›5Ã"L”w› ßñà O}~ˆ'™Êý+Ÿ¼ÞÀ©|¿ÅzL8ß‚‚{€B.0¢ð½þ–%áöyp¬Ç×n"÷—ÀϪqŒÜ“_íç\ø>° æ^°+PaÏ7¤à™ ü—©îÅÛØçÂ#œl£9%Ü=o«M%fÅ 5Qs}¸Å…¥ çÅί}ØQ;)qOx? Fº~æ8j–5¢ìkÓê%4zUñ÷¦ —ZÃÐÊÓ¯‡Ø½˜Ÿõ0Ÿ  ·WýVo‹>þ¹çñr& Àvp´™$“9N;­{ï¬[§T{Øg…hÊoO(! "ÔM¶[–Ÿ*>þë4Æû4B3)Ä´þÙ xùËŸk½¢„—B” N„܉Ð8Ï‘¼ÃÅ«±’a¹ÿ“9 GTÍGŽ!.ì6溋OUÇ2U˜hž¼fâe³}¥‰<ëâm³Ç°¿VHˆ1ðôü§Ç±açð¦ÇwñÔ+_9üЀ1­fât¬çw«ìxü°¡ùÕlY•J%¬2ZG£ ¾ç©Œµå²TçrQvæø:‰Dôt:®`›¦Zï,ÏÃJ§Áw!SFA¥±çr!7Jq;_SPD˜äØ#]C•x«[æªc0Œ¦ž€§½¯EXÂH{|`2™$¨­’ç´"Oý“•ZÓï|Éü%9‰†Ì,Cɾ¦IB?[Âé46Ûý4îó«9f‚ö*"œމƒ9Ð8çÍo†ûîkw3X2¨9Âçصx']µb9!"¼Y–\WÝO„‹kžh ^]‚ m0“jÌê,ã¦DØjtáêR,P<åË#…ÆöaÅþ¸ …‹Ži‚eDíÁ„wŒófnÛ­s¶jj3/ïdRŸ‘<À%íÆHs—VÛ‡miø< $­–Û?Ó×7¾óÑaˆ'œüÉcÏòŠWü¾ïsï½ßT+4«,")B Ô5 W(éD<¯^ õÉÆä:5ŒhŒÝl²ìû꺗¿M3r)ô}µMãý\Níêçò·e)‘¾qǩ߿´ÔwÄ·I¥ÔzA­kš\4JáŸNåÿwó7-Æ™ÚÿbŸIÆŸçm4¦ºÀC#]ól|㞬SÉ,öGð牕¬»ðôºå‰D‚l6«ókZÒ±B\¼°í`æUÉ Ã>Ç\)¬O<ää†].«eq, £¯Ü`e½ÆI£z*ÁAë¿iŽš{¹œºÑ¶*ÑlkYª­2‰ˆãyÃ÷çûõëÉß2ñ±ñ»âe˜MS FD„æƒö¸¨(È:;#Þ3‹ùÍ`•¿ªþxͰϤ°ˆF3¸ë‹ý_/º•JêÕ(xAÔ®ÆûM-­MØvÓŒÚé8ÊcRöQ(D4B\Pk6O&#wݨ½q¾Ùqv]ÄI§£óãÖ+¯œU}ùØcGðÆ7®P %ìTòÆ»æ €t“s.s™Ñ~fùÄóWò7-®[V©ÌÑJ+š)£c…8€Ã}3/iÚ¡ú¾O©TVA¯%ÉdýMk„ÐÔaË[Ý<³YèîVŸ$6¹aŽÊxnÊ‚a´ÞNrØÔŸÄáb¤,—ÿMsøù—ãq/•ŠÄÆÞxuƒy".íiõ”p0ÿÙgyîEM?Ó"œf6±ýw/ðŠEk©2N§éí­´œF3ý¸Ôò_{pÌÂûøðÒ¥˜âù"÷ dRåÛà«ìæ"ƒfúзÆ1á¥pr|XêÀàkå³Yåá)!}qm«W-¶?·Ëlø©°¯gŸ¯)Âbö® s95^á*þ€;N&£Ö‰‹RqO²fãJÛVãêf"ŠxÅI§[ã[aYêÁyœdr¸*7áx‰Ÿ‹øÜa*ñN×'ø˜æÙJ„sÏ{°ÑBÌ B<ËåúߨQX5 % Bdks éï›GqDhæL s–l¶þžà8Ãï‰D½‡ccްfÞ”ÍâíjæÔ!ó8™GɵÙJŒŒ;`ˆg¨œc±ÝÆïˆ¯×ˆëªõãß'¢º´[~¿8"&Ç©õ-+y„Ù† œÂ½ÀÙá2Éw| F~hÆFG qóž{ž…ǼÐô3 m3ÃÆBãºNÅhÊ:R¸ÉHÈy϶Í'£MnDœÅóäÓpÐðå®ëb†ã4³ ÿy‚êsãÚÎ0 LÓÄ÷}]DÓvÔ<¯dJðì³_ã÷]Wö9àÕò½fM8øœà4K5Z®HÁKSwQÙ·ûÂ Õ Q®âZA.ûMà>¼Ñ„[«õÕÃ<…ñŠ]0\Äm¼8Ú}:'³–5îq¼h¢{ÈáÉsfin\÷!­çE¾¾¥®¶íHßl† Ùã:’lS(ÀãŸ3}çhšøÎw¶³|ù#ÊVÿÓ‡g`ê%+£ÿîÍ„UAœ‡FG‰&’0n"úú>ôôÔŠóX‘×cã|¦\ŽŒAÒŒ2 Ñ ѺÑKqâÞ€®‰Ðü¼4:`ȹˆ/o–Ú)¾M044Ä K–°téRö}ç;ܺu+—\uWå+|øŒ38úè£ùæç?Ïš5k8öÕ¯æúýˆãŽ;ŽóÏ?¿ùïZ»8žž?ŸYÇþý<Сu‹ÇÁ²,–ª‘Îâžyž}¯9‡Æ¬’k¨# vBæ"A=Q’›Ü,­žùÃBv³¶Éáú)‰fVñžø~w‘¿ç=-ב" žçáyù9è媙½ÔŠˆð½ïíçÂ3ö°àö³.:U¦Ó]³fà¡Rïzœy³Ü[NòªOÝ)t¿õá-%ø«˜Pð!À?VEíÍÈaΨ±¤À}üRˆËi¢´sÕ“íìÚßfìÿ‰ÒèЪ˜î6W½=¶VŒ÷K0¿ÖØgbÎG/Nªuî)ÁC.¼< 'Ûjû-5Íâ$ ÞhC>¥´–l^¥Î+vÊ$¬Í«ã¸=¡Ö?µ¶oÓŒ¢We®-Q®•ŠÒ.2™úiB£žâû*Ý–h.A‡˜°¸v,’Þñ)`{í·ºµÎv³VŸÄ‚×fá7<;Y“šQ< {Üo¸øU»”P|'N¡sC+Õ0&&.ƒú¡*•zïÎVß!F!?îhcþ1¶ÉóãøÀà·öÑup½×EV§ÑŒŽâ|ÀßuËÿöUÃ>“ W3‡1 •ÛC¦Rê Ó,ãùÏsÐéw'†ËJ¥Òø¼95šà…={F}²çº.¹\Žb±¨!šŽCr9³ä—¼ûùàðð£rT(A£™D¼ŽÝùCî>ènÞºøqîôUåÜßÚPrj@ ž1Uz¸4J4K×ö0zz^uÅ‹V–ˆïàŽŒö›pTÿíÂïjóô<8ÍRÿ÷¤ÁNÂÏKpsÞœ+ ?Ê©Ï|x²«LØé«"–·9°Ï‡5&<ãóÿ³÷îq’Tåýÿ{aaaajEn³Üj@4Ô¸‹W$©ÖÄɚM¾Ú­›lÔĤ;£Ñ`ºsÓ°_1]1þ ’»Ì $h°+*Þ®SÈWÅY‘)ÝA@ÝÂY¹Ée~œ:UÕ=ÝsŸ¾Ì<ï×k^ÓÝu;]ýôés>ç¹p‘ ? a8džS€cS õø<~fÁ±Ñslx‹­®ó/.\UÍuõæ3DýDSz9«žØÔ£Öü2D‘ÀexÔ‡Àœ6 ¾‘Ü[®A  º¤Ÿ©sû(ÇLtÿ%žyØ›:ŸiÁÁŸœÚm³œQ¾ïWšð—€Ù'É¡õüs‰¼üc,šd2™8·X.—£\.ãEÂW±XŒÅ3qžOh.Èç Ã˲ð}Çq¨F^ªéó{ž‡eYqÄ‚vð"8˲ð£g…¸xöÿ ¿°uÚ6Û¶{ÃNX~LS…ÔÚ6ärœ{Þy‹?g9ìõ?dÓƒÿ \¿´s1„æ›O½ˆ+í'Ûn×tªÕ*–e‰Ç§Ðsè°Ô{ξ‡þàVÎ{ÖãÊ[@ö„>Ã!‘,®¿þ ÞuúÏúa0 ÿÇÂ{º€wê…øÑÐCFÅRujÌ]òH§g *J(ÊE‘jqñÑ(—ïÁÔ°-Ÿ‡ „ŸûðFK­«¾¥¨þ0¯Ž3€l>”Z›ÌGss×§i=¬X»ys.‘_í íZðv+ [×…9Õ#胗à'°€rË3{±Ð7ܯê§r pË-—ð7søŠ] ‚€ °mß÷©T*Ôj5Â0Äu]òù|œKL“¼Êå2¦i6Ìy›…7añ6}]-²¥IH/˜¦µ´0W]H®ÄUÆäQO3a6†¦¦OAhGÏ q´Ãó0 ) ”°Jˆò@\øú×w»%sæG§ŸÎÀ¡^^c›%\Oè7‚gà÷×ÿxÆùI0 Æ7¢DÖ«3%¬é|fZ€Ó)µÒsðf‡ŽtV ™« å‰õë±€+Ÿzÿ»®Îckšæ´Í4i‡-Â9ŽƒeYX–W„×kiá+}¾vé<ôy„þÇNr¯|üè†×åóæBÏ qý÷SüÆ1?àv“6ŠJ/ôkŸ¾Ÿµç&•K¥¶mK¡¯8-?l½­y5Xz‘}Çñ€÷Ç·ðàä>LSVG„þDûÒ”J`Ü·›3ЉÇLY]àô®ºc­&¢šÐyÝ}7G>ë.xGçÒX8އ`Z–Õ°€Øn11핦¿gÙl¶!\TÆô«—àkûyzh ~MçØ»f£g…¸»{†ßÞ¸Hò”J% }úŠWýâ-œõ‚Ä^E¬ú‘Ÿ~îN|æ÷¦½îG•²d°!ô:“‡ίþð‡¼òÎ2áï<§ÛÍ„c„!Ÿúä[[aô#£ÉÆ9 µ³i.¼x¤ ,†ÉNàŸ9Ľ›.â%Ë Ãé±Is±‚f±m>Î’›\hÇ~îÙ\¸³±j±äIæÂaÝn@;†Nz€·=ó¹†×Ä•Wè7&ØÿÐë<‚ÐO|ï´Ó8þà]\xÌO§mó}Ÿ0 Eˆzžû: ãkûyð¨MRµZè[<"!îO+¼ÿോE™ð }Çó¾úUÎøÀ]<|äâÄ­0 ã|k®ëR*•âmÚ+É4MY–•s×}‡sΙŒŸÛ¶-z…0'zVˆxà§yD’#ÎqYúŠÿ˜ÏüÙÖXˆ1NèKFžòÙü¢éÕ~óù¼ˆpB_pêãs÷|…«~¾YÈ}ÂB~/Û£«ê}´ÇL¿Ež~ÒåÞuc"*wtâý™ì©R©Äûi1h‰lj·¹®‹ã8ñ¹2ºBªh€>w©TŠ©T*ñ1¾ï7œ»_øÞ#&gm?sÞÇé¤=óµÇ[:ì4R*KMðàƒ¼óä/Äcá úö÷Eè<=+ÄÝ{ÍÉì<3~ž®0#ýÀÉœòøâçããã2ú’5ONbüZcG¡Pèv³aÎüI½Ž÷Ðsx2sb·›ÒÒBA3éI‚çyñä>=Á ÃJ¥ï¯'ý™L&>‡ã8ñulj Š) mEÝ—4{´är¹øqZ„p§¡mÍÂ…Þodd$n§çyñã´pùÈGºýqÍ™àq8ï=_äÙÅÏÞrD·›ÓQÒÞOa6ˆ\iÒ6”jší&m_zŸfQ+mÃŽãÌIˆÓAÐÐFÃ0âcÒ>ÍÞ3iï­Z­oËçóñÂn6›EØæ\gýÂ=ÏœÄ/¿m˼«T*ñ},—˱“†nB7Ÿ<‚o"~îyžhÂÜ™êAÞþö·O=ûâŸLýÓoüo·›"ôoûۻ݄9ó¯ýÔÅkF»Ý ¡é;¾í¶Û¦.û¦Î8ç‡SŸùÌ—â×<8U­V»Ý<¡‹ÜvÛmSW_}u·›1'®¾úê©ñ÷”§^xؽSõz½ÛÍ™Õj5nk½^Ÿ*‹SSSSSããã ß=Û¶§ÆÇÇãǵZmÚ1£££S¦iÆÇd³Ù©ÑÑÑø:éÇú˜ƒN•Ëåøqú¾5?>xð`ü\Ÿk>èöw‹«¯¾zê¶ÛnëjæÂÛßþö)ë‡SSÿ¬ÿoêEëÿµá¾÷*£££ñç;>>ÛÎøøxƒ­Ù¶“¶Ïr¹Û{­VkøèÇSSSS†aÄ×IÛäèèè¬÷)ý]égúe\1555õW¿þëS¯\sûœ÷OÛ„°²é;Öc  ÿù©WñÕn7Gè!æ3>îY¸3îø!¿øAµr^‰„~aíä©L­Qå¬K¥’¸* }ÉOã?<’‹/¾ ~Í0 ‰úе7ýˆC<ݱPê´ÇLÚË&6nÜ?N{ܤ'lÛŽ½_šÃ¬ÒÞ3Õj5Þ¯^¯ÇÞ!¶mÇž5–e1>>Óìe“~¬1 £ášéûÖü8í…²°ßù$L_íØŸ}˜ÿ÷ø:^ð¦Ã;îý“öB ðÁs,í%™ö^ô}?~œí4 £Á“©Z­ÆçJÛg±XŒk²ÙllŸÍy¿<ÛQÚ&-Ëšõ>¥¿+Bgxlü6öÔŒûxžÛK¹\–”BOòôÏ~Άcû8A˜ =+Ä÷Ó‡1MSÜ;…¾åGÇч«9›ÍÊdCèK6>púI<™q]×u»Ý,A˜ߺ'°4‹!éµ´x–¿L q¦iÆ¢–iš„^åÿÝ %Ĺ®Û6d^Z±¶Û h‡Ïù€ê|ÅóBèG~ø$<[9ÄÉJžÐ·œì}—5O=?~n† Š…¾ã_Ž·Ø0O³ Ã0Îë”ËåsÖ¦=iÒ^=i‘b®žKRøD˜¿¹ë.|î­XÖ¶ïy^lg…B!öˆ ‚ ¶Ó´f³ÙXø“<ç´uÓ^¯T*X–…mÛ ý© ô*¿þéÿá”W> З¹…îÒ1!nrr’Ý»wsèÐ!6oÞÌ–-3'è|ÎwÇãû¾TKz‚ùÚðþ OñÌÇR*•’ì B7™¯oøúmœ}ìáÀ…€ˆBo0_;þêϞ˯»wÆ}ôjvµZÅu]|ß§\.cšf,ÂÆ&,óµã;x*—ì8vÖóAG• êõ: „8ím™:Ò Þ2VæË|íø±WÞIáÝ¿7íõt Aè4óµc€08žM´!^¸„ùбÐÔR©ÄÄÄÃÃà ù#Úq÷†…•®„åb¾6¼~èëüÚ;÷`Y– l…ža¾vüíǞǺk‚€‘‘‘n7_€ùÛ±mWø­ßzñ´×]׫vf³ÙXœH磄åb¾vüù© )§/†AÐP!V‡–š¦‹p@,, ÂR2_; 2C×uãj¶Í9'¡“,ÄŽŸáa.»ì …‚äæMG<âÆÆÆ8pà×\s (Î3yV|þóS]_cxx¸k×u¯:Äàà`WÛ111Á† èj;ºÅBløÙÏ~v×êÅ~é…ït·ßÿ|íøö Oqù›NÄ4MjµZWÚ-öÓ›íèæûŸ¯ÿÞïý^<Á«T*„aH¹\n½[nzÅŽ¥½ÁBìø«·ý8P†Õ9 uXßrz.÷Jÿ#íè bÇi´Ív7"vìY†Ñµñq¯°Úí'¶¹Ð¸={ö4|8[¶laÏž=3Ó ÉÀwíÚÕí&066ÆîÝ»»Ý vïÞÍØØX·›Ñ5bÃO<ñDìiÑ-Ä~é…ït7YˆçrsÒI€î…,‰ýôf;ºÅBìøßøF¼º],»âíÖ+v,íè bÇÍÒ•B;µð×+ý´£7XˆONNÆãcÓ4»â'ããFÄŽçgÇ÷ÝwwÝuŸþô=RX±Ÿ4Ú6æBG<â:ĦM›âç³­>ÜyçLLLðÃþcŽ9f¶Ó/ûöícÇŽ]»>ÀOúS&''»ÞIïß¿ŸÛn»ãŽ;®+×ä‘GØ¿?<òHW®?_¸ãŽ;xâ‰'ºjCb?tû;­íøé§ŸîÊõçkÇ{÷î哟ü$kÖ¬aïÞ½]i3ˆýôZ;zè!zè!N:é$vîÜÙñëÏ׎¿üå/ó½ï}ã?¾«¯^±ciG¾}ûxüqUñn.ù€–’…Œo¾ùfž÷¼çqÖYgðÅ/~±Ãw¬ûý´£‘~qä‘GÊø˜Þè¡ûvüÐCqðàÁ¾ïÛ·ï~÷»ìÝ»—óÎ;g?ûÙ]iw¯ÐmûéöïßÏÁƒ9ꨣæ´OVM½õÖ[»ÝAX4’+@èwvîÜÙ¡E–’^ð°„Å"cca¥ ãc¡ßù«¿ú«n7AXt$4uxx¸aÅall¬ë±ð‚0Ć…•€Ø±°;VbÇÂJ@ìXX ˆ Ý £BÜää$ *:u#1§ ,±aa% v,¬ÄŽ…•€Ø±°;VbÇB78ü}ï{ßû–û"ZQ¾êª«‚€›o¾™÷¿ÿý¢4 }ƒØ°°;VbÇÂJ@ìXX ˆ +±c¡¬™šššêÔÅ&&&8pàÃÃÃbØB_"6,¬ÄŽ…•€Ø±°;VbÇÂJ@ìXè$âAAAAaµÒ‘q‚ ‚ ‚ ‚ ‚°ÚéHޏ^crrÇqØ»w/ ǯò“Ÿä+_ù ›6mj8¦Ý¶¥hË–-[æt­åhÇîÝ»ñ<¯á^Ìt­åºÂÜigÃz[/ÛñrµAì¸ÿXˆ/ççÖ«vÜï´0wÄŽ§#ýqÿ!v<éûOGìxéXm÷¦—4“^a)µ›Uç799ɶmÛU!Åó<ŠÅ"¥R)6²R©„çyñq3m[ »víâÚk¯mx­“íØµk{öìaóæÍìÞ½›]»vÍz­åºÂܘɆ¡÷íx9Ú vÜ,ÔŽ—ósëU;îÆwZ˜bÇ­¯/ýq!vÜúúÒ÷2>n}}±ã¥c5Ý›^ÓLz…¥ÔnÖvûÍtš±±1عs'›7oæ’K.allŒpÍ5×ÊøvïÞmÛ3n[ žçÅe’ÓíëT;&&&¨×ëÜtÓM€RhëõúŒí\–{!Ìv6¬·õ²/‡ýˆ÷' ±ãåüÜzÕŽ»ñæŽØq#Ò÷'bÇHÜŸÈø¸±ã¥eµÝ›^ÒLz…¥ÖnVGܦM›¸òÊ+ãç‡`Ïž= îË[¶laÏž=³n[(“““\}õÕ mét;ôù&&&âólß¾}Æk-ǽæG;†Þ·ãåhƒØq²;^®Ï­—í¸Óßia~ˆ7"ýq"v܈ôÇý‰Œ;^ZVÛ½éͤWXífÕyÄ 288¨•‚b±ÈöíÛ9tèPC̮ޘqÛB)•J\yå•ÓJ#w²LLLðö·¿ááaöîÝËÎ;ÙºukÛk-ǽæG;†ÎÚÌߎ—£ bÇýÉBìx¹>·^¶ãN§…ù!v܈ôÇý‰Øq#Ò÷'2>nDìxiYm÷¦W4“^a9´›U'ÄR4¯»î:vïÞÍ•W^‰mÛ 9L–›k¯½–ááá†$ݼÚeyllŒ;v°uëÖn7K˜…V6ÜiÄŽ…Å"v<ý~ˆ÷bÇÓï‡Øqÿ!v<ý~ˆ÷bÇÓï‡Ø±°Pº­™ô Ëõ^u¡©;vì`rr’o¼1î ‡‡‡‹÷Ñqѳm[{÷îåÚk¯Å²,,ËÀ²¬Ø}±SílPi‡‡‡ã¸çv×Zê6 £• CïÛñrØØqÿ2_;^ŽÏ­×í¸“ßiaaˆ'HÜ¿ˆ'HÜ¿Èø8AìxiY÷¦ÛšI¯°\ÚͪóˆÛ½{7Óâ{õšœœd``Ïó¦\«m A'ìÓX–…ïû€rýìT;¶lÙµ×^ŸoÏž=±Ëd»k-u„ùÓΆ¡÷íx9ìGì¸?Yˆ/ÇçÖëvÜÉï´0ÄŽ‘þ¸?;nDúãþDÆÇˆ/-«íÞô‚fÒ+,—v³ê„8°R«™ß÷¹üòËÙ¶m[¶lÁó<®¿þz@­(´Û¶ÔÌt­¥nÇàà ™L†mÛ¶±iÓ&8ÀüÁÌx­NÞ ¡53Ùp'íg&:i?bÇýÉBì¸ÓŸ[/Øq¯|§…ÖˆO¿–ôÇý‡ØñôkIÜÈøxúµÄŽ—ŽÕvoz]3éó}Z3555Õí7ÐKLLLpàÀ†‡‡§¹Rδ­ŸÛ±kuò^ó§×íx9Ú v¼òè•Ï­ì¸W¾ÓÂü;–þx%Ð+Ÿ[/رôÇýK¯|vbÇý‹Ü›ÙïÃj¼G ¹"Ä ‚ ‚ ‚ ‚ BX•ÅAAAA¡Óˆ'‚ ‚ ‚ ‚ @„8AAAAAè"Ä ‚ ‚ ‚ ‚ B!NAAAA:€q‚ ‚ ‚ ‚ ‚ÐDˆAAAA„ Bœ ‚ ‚ ‚ ‚ tâAAAA¡ˆ'‚ ‚ ‚ ‚ @„8AAAAAè"Ä ‚ ‚ ‚ ‚ B!NAAAA:€q‚ ‚ ‚ ‚ ‚ÐDˆAAAA„ Bœ ‚ ‚ ‚ ‚ tâAAAA¡ô…—ÉdºÝ„ã8”J%ÇYÔy|ß§T*uûíô,bǽE†”J¥i¶/vܱáþa.v¬¿« ±ãÞ£U,6<3bÇËK'ÇÆ«ÙŽAly¹™¯-Ëøxþˆ ÷½dÇ}!Äyž×í&,š5kÖt» -)•Jxž‡mÛqG½PÂ0Ä÷ýn¿¥žE츷À²¬Û;nØpÿ0;.•JT*•n7µãˆ÷­úc±á™;^>:=6^Ív bËËÉBlYÆÇóGl¸è%;^Ûí´" C\× ›Íθ-,ËÀu]‚ ›Íbšf¼†ëºX–…mÛÓöë~3µ9LÓÄqLӌۮ?lÝÎvÃ0ðf%ÑÎŽg²aX¸†±(N_§Ÿìx&›l÷~<ÏÃ0 Êå2†a´¯f;^h_ ,¸]l_¬ÛÖêsï´ 7Ÿk!v<Ówr!ý±¾w®ëÆÇ®túqL‘¾ÖJ·ãvýqó{[Í6¬?ƒ~³ã~S,dl¬·Í¥?^ív¬ïÕjßtÓM¼üå/_Öþx!¶,ãã¹!ããÞWô‹^Ñsqa’Édð< Èår Û3™ ¾ï###±JŸËåp'>>ÚËåâÕÕ\.ŸÃ÷ýØ•t®ûµÃ÷} …¹\Ž0 ©T*qÛÒÆ=—sd2‚ ˜õùLç™Ë{ñ<˲‚ n[±XœñýèsëÏE_§Çq( †1çcV3Ùq;Ö÷f¡v¼XÖ×é7;žÉ¾Ú½=@I¿§V?^«ÙŽÚë{ÓlÇê‹õ9Úõ_Ð9žÏûig_³}'çÛë϶T*Q­V»d]£_Çú«ÁŽçÒ¯fÖï·í¸Ç §Ï?S¼ÚíX¿gg¸í¶Û–½?^ˆ-Ëøxvd|Üû㊾Ñ+¦zŒr¹<•ÍfãçÕjuJ7³Z­NÙ¶o+‹S¶mOÕjµ)˲â×ÇÇǧ Ã˜ššššª×ëñc}L±XŒŸësÏu¿vÔëõ)`êàÁƒñót[çr«õ9ÆÇÇçô|¦óÌ彋Å)˲¦LÓœÊf³S†aLU«Õß>·~}||¼áÚ¶=U­V§,Ëj8¶Ý1+•vvÜΆ§¦¦mÇ‹µa}~³ãÙl²Ýû)‹SÀ0eFCûÄŽÖOMµ·ãNõÅúí>÷NÚð|ÞO;ûšé½,¤?žššŠûò¹Þ~¦_Çú«ÁŽõùšûc±á„~µã~S,dlœ>+Û;Nñ1qÿ¶ÜýñBmYÆÇ3#ããÞWô‹^Ñs¡©Ú­Q“vlví´m;VM ÃhP-ÓnÞsu+\¬û¡mÛñ*×B±,«aåa¶ç‹}/aÆ.ËZ}Ön¤íÞeYñë¦ibYV¬ ëÏÃ4͆cÛ3“ûk?ÓÎŽÛÙ°¾w‹µã¥p¡íG;žÉ&[½Ïóbw}íÒœËåÄŽõ=šo_¬ï];;îT_¬Û´;^*žïûn¶¯Ûo¿}Æ÷2ßþ¸R©Äîÿ«~Sè6­t;n×—Ëe±áÔ=êW;îÇ1ÅBÆÆúü­l_ì8AÆÇVìuÖ‰þx¾¶,ããÙ‘ñqï+ÚÓkzEÏ…¦ÎÇ0Ò°išØ¶ÿÕëõn¿•%yÿ³=_ ¦iNûâ,$OEóçpðàA€Ï®ô|sýœšïƒØñÒ0›}éQmÿù|¾!q§ØñÂûbXvÜm†…Ù×lý±çyT*Ö¬Y'æ]³fÍŠH4Ü Sô¾ÏÔ‹ +ÄŽûol Ég!vœ ãc£åãùÜ›¹²[–ññìÈø¸÷dzÓ+vÜsBœeYq‚C ¡l²iš AÚãE«•Z- Ý~+=mÛAY»<Íø¾£k¥XùªÕ*•J%ŽŸé˜•H;;ngà v¼æk_:@úøVƒ£ÕlÇ é‹Aìx1´²¯ /¼pÞÇÌÔ×ëu¦¦¦â?€©©)±cdL±TÌ׎gêņbÇc¡cchoûbÇ 2>î ±eÏŽŒ;O'ÆÇ³³ô\hj>ŸÇu]FFFâ ém¾ï7”UbƒŠÝ}uµ—^A»¢öR»tu‘‘‘‘yÝ7Ã0Èd2qgÝê˲Èçó”J%òùüœŽYI´³ãv6 bÇ‹m×|ì«X,ây^üù´»×«ÙŽÒCïÛq¯Ú°nÛ|ík¾ýq­VëöÛì(2¦èNÛ–º?^Í6 bÇd¡ccý~f²ýÕnÇ ããN²[–ññìÈø¸;m[îñq7ìxÍ”^†é1Ò+z]¦V£ã{uµ!­\¦ã{{… ‚ 'WtÛærß´+}­V‹c«g[]YÈ1+…f;žÍ†õkbÇsg1ö5Ÿ{½Zíx!}±~­í¸m¤o]ndLÑ:Õ¯VÄŽ;ß¶¹Þ7é燌;ß¶ùÜ7ÏŽŒ;C§ÆÇݰãžóˆÓ´2]êW³ã8 JåR$0œ‰ ÜO›™I5ÕäLep磺.¦-­Ú6_C3 cÞ_Ô…Óï4¿ßÙl–׎k7KiÇKið0ûZȽ^mv¼¾z׎u·Tö× v¼Úlr!¬Ä1ÅJ³ãå¾ß+•hǰrÆÆ ýñ\‘ññòµ¥UÛækË2>ž/_[ZÑ©ñq'í¸g=âÚáû~—­]<…ΞçÅw–똕ŒØðÒÒ)û;nDìxi‘¾µ;ˆ/-b“ÝAì¸ûˆí/±ãþDl¿±ã¥¥SããnØqß q‚ ‚ ‚ ‚ ‚Ðô\ÕTAAAAAX‰ôdޏ÷¼ç=œxâ‰ÝnwÝuÏyÎsºÚ†C‡qèÐ!N=õÔ®¶ãþûïgÆ lذ¡«í¸ë®»ØµkWWÛ0WvîÜ)öÑKöÓíÏࡇâø@·›1+¾ïóéOZì'¢Wì§ÚqèÐ!¹üòË»};fåºë®cbb¢ëöÓ+v,í˜ÞŽ_ÿõ_ïùu26–vÌÖÏ7Ò+vÜ/ããÿþïÿf÷îÝœþùÝnJOÐ+öÓ <ðÀ}ôÑüÙŸýÙ¬ûö¤wß}÷ñº×½®ÛÍà¶ÛncóæÍ]mþ}ûØ·o_×Ûqà 7088ȹçžÛÕvÜvÛm]½þ|8xð`×?7±ŸFzá; pÍ5×t» sbrr ë÷Lì§÷Ú±oß>&&&º}+æÄÄÄDOØO¯Ø±´cz;t_×ËÈØXÚ1[;ú'ôJ?Ø+vÜ/ããû'ì¸Wèûéêõ:ßýîwç´oO qÇs [¶lév3¸è¢‹ºÞŽ6lØÐõvLLL0<<ÌððpWÛqÜqÇuõúómk·?7±ŸFzá; ÊC§X¿~=§žzj×ï™ØOo¶£Ä € 6pî¹çvýžõŠK;Ù»w/ëׯïj悌¥3!ããù!ããFzÅŽûe|l'Ÿ|rOܳ^ W째ï¾ûxàæ´oO q½ÂÎ;»Ý„žèœ¶nÝÚí& @ì§‘^øN óGì§7Û!Ì^±ci‡°z¥ÿ‘v‹AÆÇˆÏÓO?ç>÷¹ÝnFÏ ö“0Ûb ‚ ‚ ‚ ‚ ‚ÐDˆAAAA„ Bœ ‚ ‚ ‚ ‚ tâAAAA¡ˆ'‚ ‚ ²àû>™L¦á¯T*ÅÛôãf2™LüØu]FFFX³f CCC8ŽÓí·%‚  Fª¦ ‚ ‚ ‚°,„a@½^ 2™ Ùl–0 ñ}¿åqžçÅû FGG1M“0 Á²,,ËêöÛA„y#Bœ ‚ ‚ Á4ÍXP› AÄdžAµZmy|©TÂ÷} àX,bY¾ïãû>ù|>Þ'›Íâû~,†aH>Ÿ'›Ívûö‚ «âAAAX6‚ ˜‚jÛvìõ6¶m044D6›Å¶íøµ4•J… ¨×ëT*lj» âý|ßǶm‚ Àqêõ:†aËå0 £å¹Aa)!NAAVŽãàºnWÛP.—g ‹§Å÷}\×¥T*Å‚[ú†a®ë’Ïç1 cÖóæóùøù|ÏóDˆA–âAAa…Ïçã0Ì^Á4MÊårüÜu]Ç™S;µ`gÛv,š•J¥Øã-ý¾Mӌźl6ÛpÍÙ˜‹p'‚ KTMAA¡cø¾ç|›0 ©T*Ó^oÎô>Õj•ÑÑÑ–ÇÓÂT5žç‰'‚ tñˆAAaÙð<5kÖÄÏmÛ¦V«áû~ËmºÂ*@6›Åó<†††bñ. Æ},Ë"—Ë‘Ïçñ}Ÿb±{ÉáÒá¬a’Édâ°ÖùxÐ ‚ +߇t†…ôó0„BÊe˜ãšÒ4DˆAAaY°m›©©©yoK¿^­V ‚€ 0 £eþ9Û¶+¤jÑÎ4Í8ÇœeY±×›Î—Ïç ‚@rà ‚ ¬RÂr9°m%¶y8Žz^­‚a€ëª}ÆÇÕ1†¡öYèú„¦ ‚ ‚ ‚ÐÓ˜¦Ù'®ºêisØ«~½Uè©>¯ ‚°ræ ®«Ä3Pâ›ÎN`9QãyêùÁƒJˆ Cõz¹ ££Éþå²:Ÿ>—æÑG×Í©}â'‚ ‚ ª¢× Z‚ sÃ÷•wZ­¦BCµwšëB6 ù<ÔëP*ÁÐÚ'Ô6½î†É(7ýzóšLóÏDz-Ç0Ôþ¹œ:÷è(LL¬ã±ÇŽšÓ{!NAA„UÅ\‹E‚ ½…ã(ñ,“Qžiù|ò§1 %²µÃ4•×ÛbÈç•]­¦ž>ÁÄÄOçt¬„¦ ‚ ‚ ‚ ‚ ËŽã¨b:¤Ó÷•WY+ÂPí¯÷Õ!¥å²ÒºéÜlYIHë|!Nú”B¡Û-AAA„öüf¶d”€æŸ5€(ݧ \éÃK2­Å¸J¥ñõRiziW Ã8ÝqßøÇýä's:L„8AèCôÊ€çu»%‚ ‚ íñ}ŸL&ÓðW*•âmúq3™L&~ìº.###¬Y³†¡¡!m[a,þ¥â*áÍuUø¨„Ex…à5YðóP2Ôówç2¼1§„¶MCpòàÊeÈ—!ˆò³U« ¯T:/ ¥:ŽJ†ªAÚ5/—S!FFÔ~CCœö‘°îÑGçtzÉ'}‚ï«XwÓœ^FA¡ £IK½^ 2™ Ùl–0 ñuÙº&¼hµ1 …£££˜¦I†ŒŒŒ`YÖŒTAV0ÔíVfÒ¢ Ñvƒ¥'”XfšJ õ8“Q¡švœ,<ü« ëŠðìéíÍ¢D¸zÔÎ÷gÁ5áÈŽ(³×Pï_ÿJس¼ï%#-¶†JH72¢×jɽŸa€ï³/ yhllN—8Aè‚@unZp/Õ_ºT² ‚ B¯cšf,¨Í… âc àZ­¶<Þu]2™ ¹\®AàÓ¯g2\×”7žã8”J¥/½B¡Ðpl¡P úÍE„~&Ýã¸@%¾…€ƒ°œÔö(¯3¢ÿ^´¿Ægáh§0PÎ ™ŒÒ¥¯«LFMt§ýd6(%’™û·ØaHñ9ωÝóœr™ `?ü0\x!~¡W\Apì±pØaØQ»MˆÝú´‡›ÝŸJt³aÈi_ÌŸÿùŸó'ò'qS.»ì2†††È_ußgÍš5ØŸý,G¿ã\|ñÅ|ç;ßáMozÓœnÁ’{ÄíÚµ‹k¯½¶áµR©ÄÄÄÃÃÔJ¥y­€ B7è¦{ž U×ýÑLå™GõIA ‹gœ¦ÙŽ¥/ú S+±ãéÌ'ÜS‡6c4y84‡º Åû¥_÷}Ö"Ùl–J¥‚mÛÓ®³š‘q…°èu;P‚–öÀZl`|³‡Z x³ gï‰ÑL`mN…jßIèk8HâfÒ(µ$ “ükž§ÏÊe•‡M/¨¤½œ]††’Š¥Íçô<墽ÚÊeòQ;íµk±_ô"Ê@}`€ñÃc<º™è=½òØc9ü›ßäõ=FT°•¯}â¸7V*|ô£åÅ/~1ŸúÔ§¸é¦›øÍßüM†††xüñǹîºë¸ó}ïãðW¿š3þýßq]—;3K.¹„«¯¾šÓN;mNŸË’zÄyžÇäädÃkccc8p€k®¹Hg{&uAºH·íØuO¸™¼ÜLSm·,ÕoiANÒ˜0ÝŽ¥/ún÷Å‚°ˆ'÷aÍš5ñs:êû~Ëmi¯;*:44‹wa6ìÊs/“ÉÄÅ²Ñ ªùuÃ0ÂO[‘ÍfšvÕŒŒ+„•@¯Ú±öZs™‡WÙÐOkµÆÜãy |+ oÍ“ˆp ZŽC%¢åó*Ô ”èf|ðƒä¿ÿû¿ùò—¿ÜðzŒ-u]ÕØô·RI&ÉM‹*À±Çòà ;2ÒàÑœ CBÇað o`llŒ§þþïY»v-ýéOq?Ó4)‹8ŽÃÄÄßýîw) \~ùå”J%>ùÉOF—¯ðž÷¼‡¿¼ürÞúîwc¼ûÝ‹EöìÙÃÄÄÄœnÏ’ q“““\}õÕ\ýõ ?l{öìaxx8~¾eË®ºêª¥º¬ ,)½`Çž—TIž‰B¡…tXèq¢a¨Î5ŸAn5ÓÊŽ¥/ú‰^è‹a±ˆ+lÛfjjjÞÛÒ¯W«U‚ ˆ…´Vm†a0::Šçy ûè×uN9ýzsч´èfšfÛv­Fd\!¬zÕŽ}Tε"Jøª-ît@✆‰¦†fÊ ‹,4?nIª*®ŽC±XTu]ÈçyÛŸþ)O>ôÿlÛJP³í†¤çwÜqßøÆ7Ã0î³}ôQÞõ®w1®˜Ï·nl¹Œïû¼ô¨£xüñÇ[6íÿñùîw¿‹ã8X–E¥Rá†nàw÷wù÷7½‰‡~˜ÿùŸÿáꫯæŽ;îàK_úRr, Ïó0M3Nöà.‹*UÂYg-ê3Z2!®T*qå•W200Ððú¡C‡Ø´iSü|pppÖsíß¿Ÿ;v°sçΆ/†°zc×®]ìÛ·oY¯³”v¼oß>vìØ¯°Ì]´ÅóZ‡¥ê ÈårÔjµxPé8I>ÊbqYo•°´ïß¿Y¯ÓÊŽbÃ÷ÝwŸÿüç›· +—Ý»wóÿñlܸqÙ®±”}ñ]wÝÅm·ÝÆE]Ô£KvìØÁþýû9ùä“—%'ÐrŒmÛfë֭ݾu]A[˜vž,³…£ö#Ý/ÄŽaáãcaå²{÷n<Ïë»ñ1°$ã Î ©¾9ïs¤æz>8:ªÒ™¦Ú–.º‚ àŽ-[ðàùë×3qÝuêÞ…!¸./øñyÿOÀîÝê ÛV‚að…/|/~ñ‹¼ï}ï#“Ɇ!ëÖ­ãî»ïæÌ3Ïäί~•3ë·ø¬ëƞ͚7¾ñ|ìcÃu]Ö­[ f:§ç§>õ)þôOÿ”ÿùŸÿᦛnbÛ¶m¬]»–k¯½6.à³mÛ6î»ï>LÓäÃþð´÷—^¤i÷›Óê÷d×®]ÜvÛms/‰wíµ×2<<¼dƒ˜ÓN;M:g¡ááa®¹ævìØ±l×Xj;>÷ÜsçeǺ5Íö¥®HfYµZ Ïóp]7î *åÙ뺪Öçzƒ~³ãÓO?W½êU"^ lݺ•ÁÁAöîÝ»,ç_ê¾ø9ÏyNO$_zk®¹†]»vqúé§/ù¹el,t‚~WÀüÇÇÂÊgëÖ­lݺµoìx©ÇÇ>*GY»)[&‘š æŒ–•Ì õZ6›„›j_Óœ‚:_ÒÝzîù­ÃgëpJ˜û\Àë×óM›øø­·ò_ëÖñüóÏ'*• Ÿþô§9묳xÁ ^Àk^óÎ;ï<òù<»víâŸø—\r o}ë[ù0pÖý÷ó·û·Ø‘€÷Ío~“»ï¾›Ÿþô§üéŸþ)ŸúÔ§øØÇ>F¥RallŒ×¾öµüïÿþ/ŸùÌgøßùÎ9çlÛæ¢‹.âïxG|~€ë¯¿~I>·fvîÜÉæÍ›ç<>^’b {÷îåÚk¯m¨–dYVìê966ï;666mePznÛ±ë&¹(M³µçº.¶mS.—1 Û¶ãP P!ôÙ¬ò VnÈAœàØó¼«„ +ƒvv¼víZé‹…¾ Û}± ,bÇÂJAÆÂJ —ìXçiÓøÌ\¡TRB›žß•Jêq.—œg|<Üòùd¹Pîñw¾\—B¡€ã8¸®Ë=ûï¹öZÆ«Už¾ä … ¼Æ>ôâóK?ù ArÉ ^Àý§œÂK_úR.½ôR Ãà'?ù £££|éK_bpp¿û»¿Ã0 8Ð ’½ímoã§?ý)ýèG9ãŒ3ø­ßú->ô¡qÁP«ÕØ»w/Ÿýìg9ÿüóÉf³ŒŽŽrùå—S­VùÖ·¾Å%—\Âu×]ÇŸýÙŸpà 74œ¿—X¸æU ˲bq`bb‚±±1&''ÀóÎWY,ÂÙgßC¥RÁuÝØ]·Z­NsñVíìxbb‚Z­&}±Ðót»/„¥@ìXX)ȸBX ô’kA­\U4ã$éÔG½¦§Ùl"´ú[HM™vÕ£u ¤ß»æÖ:ozÁ ¸ø_ÿ5 ±*• P 29Iö“Ÿ$‚8—šaüÓ?ýÅb‘Z­Æ‰'žÈ?üÃ?¨üq)Òš±,‹¯ýëœ|òɸ®Ë…^ˆã8 ó×üãq5ëVN&_|ñ²~~KÅ’VMmÅàà —_~9Û¶mcË–-xž·lî€sAçÔ„ù°Üvj¥£]òL×uÉårX–w<šr¹L©T"›ÍbŽã`š&¿ò+7òàƒP­ª„–Õj5îtG¢*2sɵ2_tˆ­ï+/¿ÅºCÏD.§¼ÛáûÓŠé¬Zz­/„… v,¬ÄŽ…•€Ø±°è†û‘vdy0d«ê¤iôœ0“QÏÓºUzêV«)An¾yß|ßgdd„jµJ>ŸÇq|ß'Â0äòç?Ÿ™ O<ÿù\ü?ÿ_ þè¶Ûø pÕ]waÉd¨V«ñœòOþäO8xð ¶mcÆ‚¢±6lØÀ/ýÒ/qÖYgÅzÒ¬ÁY„¸t¨ÀöíÛÉd28p€;wvÅmY'¶×F¶„°²è¤ëv”J¥¶«Ùlß÷Ù¸q#µZJ¥‚iš¼ò•¿ÁQG½Ž|þd@­är9*•Jo¯WƒÊ PËÅ_¬D¯J…o?õŸÝ¸‘bT‰”ö™TuQh¬6­Yȼ2Íõ×_k5ÆRT—èQ–Ý#N3888çj:Ëã8qŒL&C±X”}aÞ,‡û~RNºZ8›©Ú—vùÍårñê†Ê4†aP,ãÎqdd$Î7[‡ÉŠ‹ã¨Uš´@å8IEH®«ãÒÇg2J$³¬Dhó}õW­ª„£££I.íY×,Ä…a"Üékf³êOçKXˆXæyêœÅ¢z_–¥Ú›É$+OÚÏóÔó~Šöív_,Kر°XMvìySSSÓ^ÃpEO´V«ÉŽ…•K'íx0—3=$Ôüf±ùÝ@Íóù|쌔ÍfcO²j”É,•xúsŸcíoþ&ä#ð½ïA±Èã?ü!_Z»6ž_†aØàÄT*•(‹ËÖw¯Ä ×­è˜×MÂ0Äu]FGGã|Yþ\]ua™©T” Ó*Gœëº¸®Ûrµ!Ù\×m˜µ—ö2ËfËT*JdÒv™L†7rðàÁ¶ç×X-Kµ7›mt…6ŒÆ²ÙÚSN÷£:×AµªÚR©¨}µ§[³&®Û^Ò¯éÊA–Õ(ø¥EG}ýºi6V”õ}Õ>}Œm«ÇG˜jß杖ø¨ŸT(²,x{>X†­yøtu7â …dåJA„ÙÑcr•®¯=æ\×ʼn%åóù8@G¹è´‹õÂAè>­ÃQƒ@ÍqfH¡–Ú¿}.;Ïóp'v¸¨Õjª_MM d(“á˜}û8âÚk•wD4ézì½ïåäÔĪ9¿¸;ǬBœN\د”J%‚ ŸÏc–e‘Ífq‡ f8a¹©Õ’È4•JÇq¨×ësêì Ã`<‡iÛºrªòàª×•ÈäyJ\*•Ô>z@یϴåûêxÓL¼Ð*u^-¦Ú_…uŸïûê\ öÏå:ßO¼Ý´ç\¡l7Muný{P*%%¼óyÕ†|>ÙÇ÷ÁÌÃu>œ`Â{~t¬Ýô¼+:.E)]&%ª…aHñı^¯ãû>™LÛ¶ ‚€R©×3™ ¦iÆ ÅkµÅb‘‘‘‘X¸Aèe¾–³žÌ¢hP`nÅûÇ¡P(Ä›ïû±X†!…B/\v×>ò§<ïyI¿˜Ë©ÿ–Ep <ýÖ·&ÛÆÇñ<·Pˆ‹ûyž‡çyÓ9VJŽ6¯ëû[*5†äÊB½ ‚°ìØÌ¾Êo2}ð0—ÈÎü,Ïg Õx% ‹êj¨ßS=‡BýVê}CïxÁÜßÂZ ö„Û¹s' „¹‰‰‰¸jÈöíÛÙ½{7ccc}áW*•b£š)_D¹\îCV,ºŸLS(ð}Ú@3ƒêÒE@µ˜ßÎʵ÷›ÎI(迸ͫ¾æSoy ¿tûí¼Ý·NŒ<üoF€l þižíÃå>Lð$ð”ïáÍ\Žé¾ÀD>`À{3pI]õMßrà~`ƒE…—Sy¶§ÞœÔQžvÀÿª¯ PmÊGÛMÕ؉šÒG‰kÕj"Š™&¤õw~«Å4hü\ÒÅ+²°/zíÌèu-\ŒŒ¨çÍyó\WýI AAhÄ0Œ8çQ†ñø\{ºéñ|sx« B¯ïòáÖ~À{RÛ|æüÒºhŸž¦,Ò¹Ûtô &?Ù¬šøèêÕj,ÖeßùN,Tá¾ÑÑÑižoÕ¹ÄÇ.%-Ó´¾!(á¬B"¦é¤{ÚÓ»Ù³ÐEy³¹Ñöbô¸ŽšHiôr#z=]§µM‹nÙÔ5šoÕž¹¿íÃÆÆÆØ¼ysüb½^gxx¸!7ܦM›˜œœìÄG±h‚  R©Ló(j…þá„nÐÊ Ù÷}êõ:!*ÜÜ!ùÞךŽ×}Áª¿p›¶ë‚ÚóàAàÚnÎϨ¾ëâÏ}Žœ>ßÎ4 ê«~åuYÕ÷”³ xU'GäÀogá6.²tÛÕÿo×á>ÞiÀYøýè|°•7Ü|‚Á àMu¼Y‚gê>™Ý“ÔýÕ©ó £u˜hºðÄ\HGè@rNÓT¢ži&9ö´ðVŒ¼í<8ww@AúŸ5kÖ4ü¥E5UíÝ£P(Ä!Xúuß÷)  …” ‚ ô">øp1pn†“l³¬ÖÞp•(iœëº"[3A„óèð|ßçûj’šà˜¦‡÷ëðÖ‘‘‘îÞíUÒ,Çø(,M5)ÖÞpÕè5=!!ÉYª/¡<Út®S=™®¡&’ÅÔsýÓR$™›$á§’I`+BX{hîµPׂ*×;66Æ–-[k8БÏb©hU"½f‹Y¹æä‡^è4žçaY¦iRB}÷uÔî{¯¬T_ô)ºÏ¾ÀÏ,¸ø¤g¡ÄþÿüÔq¬ÿúïùå-Ü–U? ¯|å“|æ3?ÃÀ “Q«57ßücc_çÇ?ÞnHW8…Dxr%4¦;–â+õÁüÌ}`3KÒ&éï[µ£˜*Þ ¯­‹qd³*$öE/Z¿´A„f¦q¹·ªÕjA€eYq±PUÞµ‡œÎ ×ìÁ!Å×AèUßB)ò}õ§¹r™Øk Vk8.—ËÅs27òžÓzIOô«6IÑVC‰`iñ--¤éÇyZ‡Zé$êuæB5jR>“SIžÚðÔœO¹`ëÖ­LLLÄnºr*À¶mÛ˜˜˜àÊ+¯\ŠÛ¼ì¸®;/ƒ2M³¡ä/¨ µÎOÑÆ)¬H2™FQIÛܾ÷¿Ÿ2‰¸Ÿgî¹ TŸ£þ. ×û`›ªXB©/Í'ÕGÓ¡—^ú¯F™[o½x7_ùÊá¼ç=çöÛ×ðÕ¯þ¾ïðÛ¿}?Ï}îK¸÷ÞÆ÷}ŠÅ¯S.×ðýÄûëúëÇ8ùäa~X…èÄŸ@^I‚fÇq0 #d·üÁ‰X®ê£aÆíÈf³± ßÊcvdͦ¦¦Ø¸q#Ϻývn¸÷^>ò—‡$ä½V«aFC^¸ñqرã¾åy#‚ ‚Ї˜¦{ÄD¥Ý'—ËI.qaI¿¢ŠÂÍ6—©T*±\ÛTÏKNT.« ¥ëÆ _†¾ϯFG“„Ü3ͳ:N5ɵH„8³MK0½$ÅÌ!Ï÷þßÛ{çvº8ˆuçÎq±†4;w/®W ‚ÇqŒm6LÓ$‚ ˆÕbmøóõa>XVcâþ¡¡!ž¾æ.;é$ãôÕp¬ _V`´)g³*lT?×NkÚ;íá‡?ÏÙgߎeÁ>¥RÀá‡ð‚ÜÍ?ýÓQÔjçpÙeeòyÈå®ÇqT%ÏóxÍkNâÍo¾˜w¼ãÀ{ãŒþ^•J¥8w£çy «17nÄuÝ8OŒeYóönM£CÌ=ÏÃ~ŒLÓ¤X,Æç Ñ‘Â0Œ8ÇI7ض­ªE?vé™aûÐC|೟¥R,bY¾ï7$˜ö<¯AäA¡5¦i¶\„NR*•¢ÃÂ0\ÔxTt²¶ø¾ëºZF¥¢„·tø}$9pô<#µ]kžçQŒæ(Kò& (¯Ú"Ï¥ñ˜^ì`>(}άÙäú!”Á•J%òùü‚:Kíý–ÍfãxlqG–‹ h,àû>kJ%N=ï<–+R?¦W°Öù=7lØÀ«_ýMþñ—íÛ/åÃV{õ:‹fœ†àÓŸVíõ»0::ÊÐP‰~ôûüó?—‚€b1<Æå—?À£Z¬Y³¦ap­6Í«ÝõzJ¥ÂÐÐP\Š»¼ˆ£®ëÆÕgõ*»çy ygÂ0¤X,6¬iÔ`LçUÐç4 ÇqT.¿Í›ñ6oÆ!úͰmòѹ?ôå/ãýë¿ÆBœöR”¡œ ‚ Bo¡Ó3¹®ËxTå«P(ÄcIAX>°Éƒ“fXc‚ .š £i’øjâ¥C™r9õZ qM‹ÆÚ^³Ù,…B¡!:iΔhô+ ¼=*ó?U[Ò9Ýôx•ˆpåˆصkW¶mÛ6ÆÆÆºÝ¶y¡ãçën©½V …¶mS,ŸxSd³mË®ŽS*•¼:g$@‰bDÿµù‡¨ÉM"ÈéÌjzNôf­è8‡$ÇÛHtŽ<Ê“.Œ¶åPÞvù¦¶T`Qù™úŒµ{÷îŶm¶nÝ ÀÀÀÛ¶mëvÛæÅbD2­7¯ÃãtŽ)Ïób£Ã0Î/§=ga!x¾Ï%¯yͲ_K{±ù¾êÏÓ‹$¦©úzËJúz˜½J©iš‹ !mG¹\Ž‹(Ì÷»P(,Ižý¯T*±«¸mÛT*•¶âbºùкu¼lŒ¯ SŽ<mÛfÇŽK~¿A¡ÑiÒcÛ¶)—Ëñï©ÊÂ0$ŸÏÇb˜ëº±š~]çc ‚ N-Ój¢ç"¤¸®K½^gdd¤!%…~¬Ÿ†AµZqf… óëȉfZõ Ã0N]ROçíZe¤¿ºÒ±išñ÷=-l;ŽC6›#RlÛ^Ñù }Tdç(p«5Ã4ÆqœéEôD,OÏ…"[, äóy,Ëj˜+åóùöE\’SåíÒXY´¹XB³>X$ñ¢«Òè5JHs£ÿZ ´i¬6Újn¹”¹çú„8G\ºƒ®˜Ú/xž b ¥UBõl6K6›Å²Tž+lÞ0 ‚  V«‘Ëåâäó‚0W´ÇY±ï{ì1ž{ë­Ð!N“.‘ËÁèhâ§^Üp.‹är¹y¯<ë°Ô¥@'<Õ××Å\æÒ0†‡)£‘€ ‚ ¬&´XÖL6›¥T*aš&ŽãP¯×1 ƒ\.X*•J±Ø‘ÉdbaM/‹EFFFâq¼ÎFˆš©}úz:W´žÈêñýÐÐPœfCÆüý^äÕÞYÕjµí˜N/ÈBRüKÏ9µ·¤CVžçá8år™\. ™®ë†aýÐÅÜÆÇÇÉår‹Åøþ¯ôTP>J£zÔ‡óÛ85è9Æ´>E pmô¢Eºè¤&8µ—Z…DÌFJxÓU³&êÒ¾Z©òz3H¹Iì…ê««.‘ö¬ÅzsbAIg!Ý‘d³Yòù<•J%vm—²ÖÂb°,Õw=ù½ï‘Û¸±£×6Œ$ôÔ¶U¸l6nT]sèl·Ð9ÜôŠe»Á|št±…ÅbÛö4ïºùŠ‚,[AA˜†^Y› (6Í 9Ô5`hž0:Në0©Vço"‡5-fèêéê’v<É´m×uco½ÿl^oÞ,÷B1t5÷´*…®f˜ö„Z-âËJC HZL›és4 ƒJ¥Òà}éyŒmÂóù|ž|>{ e2™!Â0 FFF( ñ1åryÆPSÏóX³fMüÜŽ %é¢kÍÛZ ¶mS*•(‹q‚ù¡¡¡¸íºÂ¦Ð?¸®K¥RatttÉDÔôøPçN!Ð"sZlÓ‚˜öÓxž{ž¥ç¶:-’›Îf³˜¦I¡PÀ¶í†÷¢…í´êý†††âï >N/õ9ô÷ }ÎNÍeƒ6U@û—ùà¶œk;W1M¨Tà Y>ü¼óÎï¾S½^ìÈNÌ,p瀙òL PyÛ–J@“©iGhÈ799ÉîÝ»ãСC 3<<ܳBçy=Ù6ÏóªÄ˜¦‰mÛ] …Þ# °U§¢ÂÒЫ}‚ ‚ ¬f,Ëb||¨Õ‡‘‘ŠÅb¬”ëD¶-ËC|Û¶Á¶íøü®ë.¸®’£Û•¾žÎ}çû~\λX,âû~\Òݲ, …ù|>n·ÎS ꤦ:Œ’<`–eÅ•†VA¢¼ˆu‚`aîÌ4(( RÅLAú  KžC™¦Ù³öeYV\Ñw!m´m;Χ½þFFF5ÕyçKs(l'Ðáå;vìèèu—‹Ç¨¸JˆK§“Ôsé†ùÌI!|¢¬Š4D†iš±óJ³Wp>ŸW¢Ð׬–:<<Üv§ÁÁA¶lÙÂzFˆë¥|kí**éNL«ÞZ`ˆ+éèœzeD [z[ºlµ.±®ËZëª:šæJ8 ñÔmÐùí´;~¡PhðzÓ×ö<\.ÿèãÓžo###äóùXhÔbœÎs`šfœ¬×uÝ8QïJ#àpKyR¹„ţ˟ ‚ ‚ «íÑiAi>ضM½^_°P¨ç•Ú Ã0 lÛfãÆäóyÉ™ÜE‚@éj†¡þ4ZxU Ã#W¹—[j§¦ù¼žƒk[^‰*«µsݱ—b¶{-!{†3–‘N †¦iÆ+¹\Ž0 c:]ª]Ÿ³P(Än‚ “ÉÄÉ9³Ùl|\}_´¨§“Î×uq‡z½‹o]pþå_¸ðµ¯¥úÎwòý=þ8þùçã¦móƽ{áì³q+Ÿç›÷Üý÷ÞKøÁÃó°ë\x!9 ´mnüö·yÃóŸßpí´÷ÜÆ ‚ ~ú^sÎ9Ýþ¸…eÁOL0"!¹—츟Ѣo¯®z ‚ ‚ ËO.—‹«˜ö:‹·‹E†††â¹^¹\¦X,Êx¸Ë‹šó¥}J´“L½^W*Ýßûpª —MŸ jgûZD¸•ÉZ€-[¶Ì˜willlV¯¹N²EBÀgiªóÎ× Gÿ0è|:›þŠ…Àg·og< uÕa«Õj5®äô­Oļèx}7üè<§¾þõüìÄyó‡?̧y—é*ÇéëÜzÑElð}rǦÉ3·ÜÂE‡Ƴ;Œ“O>0Î9‡óS×tëì³±Pž^!ðåC‡¸ÿ~BàÄýûyóþ'¯»ðB T¡‚õM"\Ã0Êzk?€ÏþóKð‰uÀ€‹ \)Ò°P|ßo0ÓåÙAAè-tú’4–eQ.—ãÜGí¢ ‚ ˆÓ¤¼]×ñ›¦I±XŒÇÕ™LfÚyÊå2–eµÜ–ÍfÉçóq8£ëºqέ^Í/&´Fç¨ívS:FÚFu„•Ð]Žj‘]Wµ5 þLJXðÙl"¤Ðýa&“i¨ -¬,Ö‚ò¢Ú½{7W]u;wîd`` ÞaÏž=ìÚµ‹íÛ·7¼ÞM|ß_ð*G!úï¡ì¾J"(-at^-k$•…íèúQ¢þè¿ý×Ç„GIÅ41€þã?È=ó ÞñÇS0MìèxU}SSŒ®u/½øb.ß²ç«_Å@‰dnt 7µo5ºþG¿ÿ}Œ+¯¤V«ÅÅ8ûìYßs+ùÃ<æþ÷ºë(oÛ†÷àƒx=ïçÎá>Z–5-ÿ^>ŸçÝï~÷~ZçF6š½ZÝO¤Ó‚º:,ºV>Aaµ¡Sõ„RGyd³YÂ0œ±8˜ã8qÚ-Äiqntt4Σ422çQò<©©©–ç›i[&“Á¶m<ï«'Â2fëÇéz!¿N#…Nz ò©.#¿€o†ðz£¥§÷× ú±°òX IA†«®ºŠK.¹„-[¶pàÀ&&&ؾ};;wîìv[ctü\ÑâX%zlã(¬€¢´(6×óéóŒeÏ4í‘f¥Î7Šã ÑuM”fDû›®íEÇÇÃ’øšsÉ(–*˜GÅ»x€_wÞå—c¡„³"Ó¿ûwüí߯^E‹ýº§; ß÷>/-TÎæ¿dš&…B!vã5 ƒuëÖ-²eÝ¥îAƆÃPV¬€aq‘‹|>ã8Óò$ ‚ ‚лèÜGé\Âíp]—z½ÎÈÈHJ8³H„:_­Nch),€¶”è¢3¹¶.‡â®C ¦­Ì‘^.9¾PŽ´á À»t¥aÞè¼2AHÞAAhÂani@–“2³§~™I„5)Õ¡¦Ú#^OfGGGãür¥R),u©a‹ò´:C¥R‘|~BÏp¯Ý(ÄŶyC§L·S¢/6¼z˜sÕT˲fÌáÐ)fÊ­¢8ŸD ËäBk%éT*P,&qf´¿ q¶VÎè锲½&¼µb6‘-‚%ÿâ‡a |iAͤ±@ÄjḃРWœÀØiÄ›PAZ“þz Ó4Â>]×ÅqœjÇÁ²,*• $â¢ÅLÛ¶ã±k©TÂqœØK¾]ñ‡vÛLÓl9ÏYŠqÂòâ8βDõÂB9.å çºnã¼åvF¦÷)zΆá¬}£°2˜³×+´[• €KøÎŒŽ‚㪒ÁÚ„½hPƒ=\‚@•N?zq³ÜËà¡¥óx褷ñë(!N‡ë|{ZøÔa¿êsË’„ÿèôÓ»}«L£é*ª2!î]´h!B¯à/Ì'@:ÍR̦~>ý8ïz.ÑÂÊà°n7`¾´òˆ ßôaMAï£<ÝÒØ$žm–¥¸ZM‰uÞ\–õ9†aÌ,SβvÂiåµXD‰žYTî½ Ê«ÑAåöóQ1\ú_ˆóG}µº*q‚ ‚ ¬<ÏcÍš5ñ_:gRó¶L&ƒçyÓ 1Ù¶WÅ4M“¡¡!2™L¼ÿ\«§§¯µfÍJ¥†aP¯×Éårñ9‡††°m[„¸Å÷ý¸Ò­Ýc¶€¹Õ0ÏNg¤îI¬]x¨‰n>¯ˆžçÅsææH2aåÒwq͉ÿA 5çðð÷uå ÚjaÄóÔ6ÓT"¨Ça˜xÉÍàÉÞ×è­ÐU†–ºòäL½Å"É!¢k ‘xÂ5;™ïøêWáMoêè½[*àd«µ ‚ ‚ ¬DlÛfjjjÞÛšÉf³ñXµZ­Æaª†a4,ÒÏt¾™¶Y–Åøø8¾ï†!–e‰ÀÓÃ迹 °ÂâÐŽ.ù¼rhñ}5‡v]5ï¶,QQf:Jxd$q~ÑþA ŽÑÇ‹ÍÂJúª}¬o¯ë÷åhÏ1}b1-|UX¬˜˜˜àÀÝnËœh.8PÁ ã(å&¢•ìyªO¶Fòç8ªsÐQš«åûàyåryÉÃ%*ìéÂ+M €(/AYéAAX8ËUÔKÒ‡ô>ºÜøøx·›ÒW„átÁ+’¹p«ýG‰e–¥D¸0TsäRIý/—“ãëõÆ9øèh’¦Šp߀IDATJÏ« CÃ÷Õk:UÔJq† Û#ß—†H¾ÑöÇH~ÃÕÉZ€Ý»wsíµ×v»-s"ýƒë×ûpi ÁÌqTg‘Æ÷Õ—¼Õo¶a(µÞ÷!—SÇ®4!Î÷ý–¡½óqçïEZÖèw¾èÃàˆ•´ä#‚ ‚ ¤T*­º¼p7*1̲”ÕN»ñ}µï«ù­Žþ*•”èU­*´J%Â\W9ªd2êu혢Żæ¹qS p õÛ¶¡¥*•Ô\¼•èæûpë­ýõ”& ¹O ¹0Û(žzñ¾]}°sçÎX¤™é¯Û¤sk¹.¼<×pm‹ŽÈmS·}6½É²Tç° µ ºŠMu›nŒïû†ÑS_~›•)ÄíáÌÁºÝ AAAèKt_?F—riÎe>Û1A ¼Íl[Ûî­k‡’ôã\N…ˆêÔMZ$Ò)›Êå$¥“Þ'›U*Å¢òj[lF- ¶›Ÿë}.¾øß:óA,#i !ÎÁ>„úàRzJ©Tjø/¬>سg{öìév[fEçø Ï5áù3ìߦNÀ¬h·[uÍäõÅj‘­:Ýt(ìrbFËrí¥R©§¼áV2§`¡„<‚ ‚ ÂhUÈ£× C5QÞaí© PÂÙÆI‘ÇI¼ÜŠEåÑ–Ï«íA Î«=ÎÒžrù|’½ZMÂKµï…vЂFaÏ4•XgK–Í&¹äV:i×ß÷Ù|øf•sIÇâFh!¹^¯÷”CŒÐ9Ø»w/{÷î_ܵk;vìèvÛ¦–eñfÆ<øBµýŠ,N8 å>[(¨N3“QãÌ.ðù¾:VŸÇóÔkzÄq’}Ó«"a8³0§;Ü4ú}-ݶô54¶m7x6jï¸VÉ!ÓçZèýÓíò*Ç,üœýŒœaÁk_;Ñí¦‚ ‚ B߆!žç-yb{ fžU*j{¡ æ† æ‰­œš‚††’ma¨¼ËFG•0¥ÏW(¨m™LZ¯«ãè ñlk¸¤ç‰år" ÎÅãL{§­F–ÛŽ}`xI>ýןV¶úAþ¡ǤöóýxN†¡ôm«”Öâ$“““lÛ¶ €áááiÉÿK¥ S*•ð´¯í< ÃÀ²øµ:|xö:Ÿ~;ŸW›Ž× )õ ‰ï+·a×U´ï«ŽY{¿…¡Ziö K—n.Ôÿb1©Øš®£“g¦ÝˆMS§6ý¾ô1ZH«ÕT[††Ôþ…‚z~ï½çpÌ1OpÇã~ˆÁÁçS­&%©uj5uî5kTÛôŠÎÈH"ì¹n"2Úv"$A"z^"FR¢:ŸÀ²=µG¡öJÌåTû[yù-%ËiÇpk î¹çl M–•™ìx©úbAXN:5¦„åDì8! C|ߟ–§Øu]|ß' ÃxM¡P Còùü´mBgé¥q…ïûKâI¤ç4Ú ›U"–á4MµO&“ÌÕFGÕ>õz²4†pjQOÏýthh+´c‚iªs4ÏSua†™Ð¹Ûš=áfBçC_mÚO'ì8NŽªËþæÇ“B®O°=ÛðÛ¶Ûq~± ø„¾eI<âÆÆÆ`çÎlÞ¼™K.¹$ÞvàÀ®¹æ QœÒ‰AÀßúpÍuŒùÚ¹î8u'˜Íª×*Õyi·^ë¯cêu%íÕ5:šT­±¬ÄãM·)½*¢E=Ý)ë1[&ÞnZ˜Óçз²¹×Çjï´Bžyæ6.¸à6Ž=ö´h•1y££¾i*AN·E£½íl,£Å»v4o×+=ºôußOÄI}O?|aŸý\XN;€“€5k~œ¹|oBXõ´³ã¥ì‹a9éÔ˜B–±ãF,Ëš&¦ù¾ßrq²…]TÛ•…:J¯Œ+†††0MsIr[—ËÉÜ"=ßJ7=-VÍR©sµé4B³Í‰–š>K™×:aÇ>*Ê÷} àÒê¥pJkãI‹qÂê$âÆÆÆØµkWüøÀñs6Üf6mÚÄ•W^??tèPüxÏž= ÇÏ·lÙÂUW]5aˆñ‚_ÃÏ€1ʬe5m{ú*‚ë\(zÅCÇê×jJ”ò¼d5EŸ¦ßˆVmHwð­¶§…·ùÄ×§Û¡<Ïž…a|„|>m7ž¨ÕugëØ—+Ö¿Ý*Ír¦.\n;þý2T2{±íùÛ¿ Ì•vv¼T}± ,7SÂr#v<Û¶q]W¥™‰*_6‹s…B×uïvs…ˆ^WA@y-Ì7Å‹žÿé¼Úz¾³”*i§ |éM:eÇߨÀ7>}&lùºzáÓÓã… …Ùl6N»%i‹V'‡Š‘%ÀÊXõóôë­Œ xbb‚b±ÈöíÛeä›6mjØw6zè!víÚÅÄD’ØÞ÷}ŽûÕ‹ù£Ñ¹‰?Z0K³TÕÓ¸öæ*—{¿ãU}f<øé'&&&صkû÷ï_¶k,µïß¿?³u %r…žEÛñC=´l×hgÇ ±áÿøÇø¾?mQFXÝìÙ³‡n¸ûï¿YοÔ}ñý÷ßÏ 7ÜÀîÝ»»}ë„c×®]ø¾Ïüã%?÷r÷ìÙ3뾎“$~oþK§ÉdÇÆíŽIï£ÃõšŸ7ÿµŠ$Õ´Îõ¥ÃÁòù|ì'ÌL7ÇÇ ±chÏ=~6g™6²ÓJÓUK—:ÝÎ×ësÁ^eÏž=}9>nWøÀYlú郘† eTÅÔ&ŠÅbì ,"ÜÊa÷îÝó¯غukCÙ… “îÞ½›+¯¼rQBÏ1ÇÃæÍ›Ù°aCüZÜÿâ“›£VN«,n»¨ÕÈ~ÌC²aÃ6oÞÌm·Ý¶¬×YJ;`óæÍ€ÊPÉÌ>€V6ÚŽGGG—õ:­ìx!bÚúõë9å”Sb;P uçž{nÃbÙR³”}ñ† à‚ Ù¼y3÷ß?ëׯ_–ó/ÇØ8=alG>?{:ËšžØ}.‰Þ›#æ“gJ‡§êœoÍa©–eÅá¨###±÷œÐžnŽºH—ϕ拭ØâãœçÀQE˜0àÛ6lÊÂ.ÜÂUp pa]‰v~ô—þ´$"ÒHgÑýZ¿[+®ÈÃÝu$^x|Ûãõ|Pòí,†‡‡9tèМÇÇm‹5ìÙ³gÚßLìØ±ƒÉÉIn¼ñƆÆððpƒ7ŽÏž‰cŽ9†-[¶4ì®f® ºx$I3ÅÖ•׃š¶lÙÂqÇ·¬×YJ;>î¸ãªî<öØc]º{B¯ íø˜cŽYüÉf •/Ćׯ_Ï©§žº¤UÐ$ßvÿ388ȹçžÛ°X¶Ô,e_¼aÃÎ=÷\â„ilÙ²…SO=uÙ„¸åÏÕë¨WÉf³ …–¢dÚ3¤Z­Æ„öts|¼;†éããÙÃ×uÉçó Õ%=Ôüî¦@=¾¿2°ÛÚ²0 Üš…{ªpOvŽBÁVŽJU ŽZ0ÏD#$Ðûr|Ü<®Ð™O<<Æóþå}I•Ç&Äãwe2<<<¯ñqœ#nÏž=\uÕU\yå•lÙ²…;v°eËöìÙÃÀÀ¶m·í4wïÞÍÀÀ@CÜuºAcccLNN200€çy Z¼ë‡ç±æÞ©9﯋lܨžë2Ó«ÝeØ0 )‘܆å´ã1Nzì1^ö²Un€Â²ÓÎŽ—ª/žA Â?òyÕ' ©×%õ0SÂr#vÜšl6ËÈÈȬž/–eQ,Éår‹E<ÏcÍš5ñvÛ¶©ÏÅ…OX4ÝWär9ÊårìEä”×ڨτ°¹ _‹6ÍSçH{¸]Ýb(^E‰(JÔ+¡Ä93úÓïÊŠ^7Rç šž ½ÉrÛ±‹²“‘øúä9Ò0Tì~‹¢3Öj$ â&&&(•JlݺµAÙ½æšk˜œœ¤T*͸’<11Áž={¦•ïû rùå—³mÛ6¶lÙ‚çy\ýõónè׿w/ÿƒ¹ï¯ËEÛ¶£GFTü¾`Ú±œv¼>„ îêö[V3ÙñRôÅóÁqT?œÉ¨§ñqõ8 §çðM'Æ‚°Üˆ'¤«Z–ÅÔT²°žÓš'¿Åb1GL#t–nŽ+ÇÁ0 ²Ù,>C‰U”(æ—•P)ôc¦N¡OS‰þg¢ë(!΢1]•Lè]–ÛŽ}àüž˜º™#ËŸk›Ø^BRˆ„8]¢·UUÔÊå2—^z)¶m·tÕܹsgÛŠªÛ·o'“ÉpàÀvîÜ9'·åf?é$žsâó>NW±©×Ûz‡ °¼v¼xâ‰'VÍŠw_ãÓ¸ ÚgÌdÇKÑÏ… P‹€¾¯úÞ HÆ"–¥&k¯åB–9%ˆÐgtbL!ËØ±°Rèæ¸Âó<Êår,ÂÕß‚£ÆËíW¤³ÒÙ¨kåP"`%¼™(O¨þ˾½úXn;ö€mX›çðÃo+Â…aHâ'(!nlllšÇ[: u`` vÛ\h® ÁÁÁEåµ8ôÐiüö"&ÅbëÂR°;Çxôá‡1ŒÙ“,/9éå;av|Ô=óP#¯eXâüÕÛ~µkoo±}ñ\ÐÕȪU5IGÛ¦éêõ¤ò´ïK-ÌNر ,7bÇÂJ`¹ìXõðM“J„ \•û»Vëì{ÔC}ÙôÐ0 eÝíg <Öô‹±cí9æÃ+žø7êgOqi»}ƒ e±aõ±h™Pîšk®ix>99ÙÕ†vØaüBz§‚¬ àé#¾…e]ºèóÍ›µt –îÒ>÷lôG´·¸Îܫ۞%)¯eNôÞ²¨û;G‡Ç.ºWñªn¿ÓeÁ÷Õ yt´uøis5?-Ò‰'‚ ‚棷܉ÿ8%TA§¤ ñ•Ëmº†º(Nç˜ RÛÌè¹ë4j8©ÏÓ¼½™0ºŽ7öúsöö>Íß0NRŽJß 'ú«ÒÚ›,@2tÉ*íÕ F)y’sÁEÅXQ{4Vtž 3ǘ4.AZQÛu?:‡öœÓ×KãÀé?:}™oxgðýF“µ,%™¦zû&j XõÀ÷Àø)΃º¶Um‹e¨þF~ kÖ¨ãê‡JÙÃÁ|·ÒtÃP™}E’{®ò~(_ö£Pú˜ üA(ýøÁzFJP|5pp<ä oë\°Oþ *·€ʧCåÀS¿ òë tøÈf!Bá:pÆ’v…!˜§AöU¿"ñ´íÖÝD9ÉqœFÞ0ÔótÁ¡\N½g}ÌÐú ÷0{" Ÿä×:-D£¶éÑIÚkªytâFÏËÑŸöî¡Qxœ©½3…2X,Þ×_ zDm˧Qû}XôúE\¤7(•àzŽÎC>º÷†™è¦ |œÀ ¸_|ëÀð^: >8Ÿ€¼Ý/WÝ—úŸC¸Â×»”ç^a–á»OAé°_á:` œ÷Bõ/ÔW¦z2Ø—ÿ ÚeݧÎ_þ)ðê:åKIF¡.p2Ô7Žú¨jÇ€÷2È]ÅMPücbA¸Y[Í_Õ"SF^}æ.0v“nk˜ÏR_{ßOD;ÓŒ<ï þžFTùûhðïk ÙFçÙr9Ð@y ŒŒ(ÑìàÁÆö©s‚ÒýµP×>œ^?°,uË‚={ºm‚0?‚@yìj;AYz“{â ŠúÇ’Æ”ýˆþ)5Pk…è}ê¡azX®‡³3uQ:[J–dM[ÿ’ «Óà ?unY{ÜéPZ=Ü®¦Îg †³Aê=\×ë㉸˜ncúý©?-8öqm´tÈq¹¥Ìç¸ìE—MÛÇ󼏸ˆ @$ÄA"ÆéÒ¾\~ùå±(×Mnûè|ø5÷vû^ ‚yè«ñÂÓ£Ÿ$ßW³€lVÍ ¦¦wÖ—üY~'¹¸X°7TÊÇÑO–ödÓ &´ ¥E5ƒÙ6½´6_šýëõµô`I‡ïÙÌ,n¥µu[«möm%(êRZ³ÕŽO@Za1»X7š3úê÷5Dœ½wßä¾E^¤ûضò@;6*¸àºð¿OÀ‡¶%ƒÃÒF(Þªöýï/<ëäÂlŒ› Œ8‹øóÈêhò\þ ’,ãêx.ø%ðB~=g/Û¾lSçjXNP5z4Ží/ÀÁÇ¢Ï.‹²3;º¶9Ú`­WÇ™:£¾n$rÛµèØ×€÷-pþA½σry;²’:6[Î=éÖ‰°n«ö™50õH;ò€;Xö-ÑÆt¡¨ïÈËÔÖîŠVê>¤ršÑ÷Ù÷ñ°lG_¬Û¼®Û&سh¯MÇQ“Çt*ÏK< [†p¯`GÝ }?t^H]ÜÅ4“¿JäqjšÊþtȹëª{–Ï«{¬ÅhÏSÏK¥äõ0LÖµFFÔkú8í9šít¾TAfÅ޼ñF²Ù,•Jò_)X(± ZEæKžd-»F2¬Öžk0=è$ßô?ÍPôßÚÙ<,n>×LÃk1'hzM_ÃB »FQâä“/{Ùboo×Ð÷Ýǧ|Éô ‰eYÓ££„UÍÚæÙºuk·ÛÕÀ#ß;–MÙcºÝ AX0¿²s‚£¿¹ ]ï×k‰«‰eÔ“lŸÈ£Í€'mØxž á^À—£™ƒM4£ ág@ÕžþK8çe¦ÎÊÁ˲ðüP¹ ”H¼éôR©ó¹(q@ÿòhwMZˆ›ËâOzÄЊvƒ0fªÛ§ózYÑuuVÛ¹æñÒË{K‰ ³ÑûܱÄçï?Œþ¯µàð¸ä xóøÏýµ|óC¢xoНÁ|'xu#Î9’ÿÖz5ø´€ |ørÆjpE¨fçz‰6O^푈p:ü7c”BøpôµÒKÍioÐ"I|Hz”Y$YvΓxFêx RŸ›n$^Ž:\ÙHÚÓ`“ZÜ­©÷aP™¾\¼!ÕÆæïÉhÓs=b×írIl[ß z?ú{©…E½ô®¿—:&EÿOǘè{ E’10¾Aâ9kƒOÀD§¬­·Ð¡¾Å¢ú¯…%ý_;3çó깬^s%ü8N"Ô‹J Êå’°`vÖ¹ Z)0 ÕùšóCk‘µYÒbbóþa¨Ä.- ês‹‰È˜Ï«ía¨<дH–Ï'“µwZ}ÝMSí§'Û®›<Ö¡ÚºR¢a$Þœ™Œ:_¥’x˜æó‰ªUûè¶éãÃþâ/~››oîÿÅAX „À_>ù$'ìÞyùå]©ÚIfúΕ<ñÏó¢Ñëss϶Æ=[ÀŠÎu›¿úUxÓ›–à]tžïûø°ùŽÍ-·†!!©BkŠå%Ö¿u\d¡oñÌð£] ò ŒìÙEMŒuBzr©.-Ðå 0Z8© I¢§b¬.ì®ÂËKp“‘§}Øž…À€ŠVQyõÔQ¿ªi¦ýZ‡ÅH• ‰“’ú¾axžG>ŸÿA€išxž‡eY¸®Û²ªÐŒå¾Ó 1škÏh.?óynyúž·ÿy<>Dd³Y‚h6œn_†-Pƒ hÿC»‚Vs=”é]> 'P)ÁÑ ßÎ=ÂÿwîK0ÉÁ¿„r<²ëÈMκægx'(ÞúYFv¼ï°ÿ±¬fÎ< áÝ£XJÔU—Ë‘äøË£·H³Î÷K$ÉNtÖãlê5hëM¯AëQ£Þ¦Å, %£×ôgÛì]“þNhqKÛâ­À™ÌM¤Ö4Øu‰6âÝŠü,Ï[‘ºÆà}ÊòTÔïq‡¦zž2ÏJE=N‡Ûv"®•ˉ@¥_õº.l¢½¾tšPPÞ[io/ËJŽ/•’ýÓíÑ¢ža$žf†¡®Q޾^Z\Óû¤Å1-†é‚)†¡Û¶uX´» )h¢'ɺ½ZœÓ¹ LBBu]0ÓTâX©Ô:%êhJ„ÖB¦mO µ0gš4LØ[yÏ´ E5 øã?þð‚n›• ¬zt€Ãó¿þu.ŽªNŽŽ.ꔫ†æ°ÐÅ`£†WšyëëeÉ]s984~ƒ·L’{æ-·W*Š’AHÑóBœl\ûÿ0ÍßèvSaÁ|áŠ{øï[®ƒ3?ÆU^³Qˆk²ë ¨Ú[%ýë7SŸ­G(Ú# ùñÓOsÂWÄ»”J%Þ{ÔQýú×C$rm¾ã¾ôìgsùå—sÝu×qù;ß ¿`ÂÏš–u1 ÃP¿”:÷T$®é8 Ë"CNüÖ·¸úK_ây/}i,dÙ‘ ‚iš\öêWc¦Ä*Çq°m;ê´Ø•Χàû>ŽãPÕ³Â&±ë±÷½/ûÛ¼úíoçSŸú¯xÅ+8å¥/Å ì²M”J%jµZ|Þb±H©T²,²Ù,“WLrÚŧñdðd|ÞVBœïûø¾O¹\nh£ëº†A>ŸÇq²ÙìŠ\ý ©,LXðçÀõƒpå×áœgþÃvànþ/p q8.d·œDöw@µŠyD¢¥ÕòyL­ T"eºÍÐk(«E¢³NJ§C8sP¼Ìo…ÊíLoO£«í.4=‡ô´Ø;ßêóz™ùIàSÀ;Xüȹ¾ÈãgÁÞÎM`þ: ýÒi9½/Ú»¼ï1*õgš‰à£E$ÝUêâ%z»ÎBÐŒàÒyûôyjµÆc´'ö2KWÎå’ðMíafêž—l¾¯1­…:Pâ˜n‡ïlNŠ>2¢„¬|>´taMZür]ÕnZm“n`®©rÚ…¦éõ”bQ½Ç…¬Ù®_ÿÄüaÉqP?Ï•?ú#ŽÙ~s¼ˆ!tÓ)tøçn¿ùâå·óôÄçùÑé¿Ôr©”*4ÓBÜÄ¿£’l BŸrßç¨ÏŠªDyž(“Ln›+rF”J%Êår¼’244Äøø8…BjµŠã8äóy5Ó)•øÑQ —]†ãyAÀ 8úÞ{™Ü±ƒG'&xÝ3ÏpøæÍ¼ö‹_䙸ÂóÎÃùËUL³Q8j1 ÃÃ4ùÚûÞ‡õµ¯ñ“÷¾— /x_¼ã~ùÐ!â ~ºe ïýÄ'ŲJ+‘% ÉGn åhvi7/}–Jؾ…ßÿ÷§úÍoBpû‹^Äñßø†a022Âèè(œ}6§Þ}7¸.[Âð}žüçæÌ{ï…g= Ó¶ã©EíZR*)M&jWÀu1}?öµS÷ œÏƒïÇ9Ðð}Îûá9ï¼óÀó(6År…aÈ7ßÌ×þë¿xË[ÞÂí·ßÎ|ó›œpøáÝ6ÉEá—“¨ËË`ý~÷Ýß~ÚIgn&U:T×áðHోNÚö‚@)Ùlª,©ÙP ÔÈçÉßP‚ßuàØš:&ÕÀ0ÔÃÔ9uüÛ\ÅÒ´è5_’ðÖ³!Ü ÊÍØal{zñ€cö/êvë:‡ö~«×•ù¥s˜éû©Âs •so«IgZ€²m%¾éÐ0l³Òá¬iQª^oô~:§]óW"-ÖitVx^jÛ ÚU$¡ð€bðôÓ|èC'5ô[ÂÊFûô«`y'Íc?9ªåv)Ò 4ÓóBܘG}ë¹Ýn† ,˜Û<8ïðIÂä1~)š)è¾x‰¢³‘k„^IÄ*-A@†8žGhY”ëuJ¥†aÄ.ÐÇÁCŠ/|!'—Ë`œÿ(wiß÷Éf³˜¦‰eYT*•ø9$᜹\ŽZ­ÆÞy'Ï{ï{¹ùyÏ#ŸÏsit¾3Ú½£U,NŸÑAkW“äû\fYqÙ3ßò6FûG®ƶm°m "þ4g§7\A‹:êÍ%¯ëÇ­Ú“ÎP©pJz&†±+L^Ïžs9†£ìàþøÇüÛ“OÒè"£_w ›sß>øö·±¶eáï*¦ƒÙÏ—Žè‰þû¦I¾9!ŒŽoÓèÏ@ÇêÏà_ÍþuÌŸŽ¯3Mõz:v#“QûŒŒ$®FĹ5·ah¨1:¾O·%ί‘ ˆªZ„ðÒ—âÿp ìÛ¼°Ûë‚Øÿ{ûû;†dž‹qÐ&çd'ÆÙÅbúÙŸ4-Ô•Ëå¶¹Ó9 2™ µZ Ã00M3öÀ«T*Ôj5ªÕ*†aÄ+;ùù”²jç²0Óh+YÛ² RaãRäXHÏ6çêJ±·‹ N¾ºcý˜ŠV×½xØ…Ÿ{+Æk ^8 V§†ð±…3(m:*§xkKú3håö“Žëkg+:ö®•rÒÊ­©^W¯W«I|b6›$ kFÇê ö¦'ø²ysÍéX'…iBœN؉gÛIâ¯Vö‰Õq¦}_ÒîQKÌ#ç>« 2U‡œ6‡—v³‚Ÿ6i×]ÜyAz•M¢âûÜu×›ùøÇ»Ý"A˜ÁOŸÅÎ †6NÛæº®ä‡¦ÑóBÜí‡qlŸzO<ÁÀ#°ñü—͹4R†är¹†Âår9½tAíÅfÛvCeZp3Ms^îÐõHÌÐ×ÒxµÈC¨«…S,ke—Ïꎌ¿ü¼º÷š*Ñ[¹¶°pÍî[!©?Ð5Ú¹iÛO¦ÚÅÞi;m‘ ¬ JáñZ”µ¬DÔe9]W‰}i;ï'ü …Æöê’”ÍîI™LT™6 ßÕ•tLb«ØÄ&ŽûÆq÷“ã:ÿÙtßWzk±˜„¡öâu&ÂJAWè¼ùæxÙËŽo8¡¯ðîÚ€õäÎzëYÓ¶KXªÐŠžâ½ãpN;òHõÄE âÕ)ô“>eY˜¦Ùàí&ÝäâN¾ácp¶©ª™Â’VîÔ5K†P¡ª+1ú®T‚ÒÇÎÆ»çìÅŸ¬3¹˜¦ ÉÕ%5ËeåW«)‘®ùØB!)ÿ•Í<ò¡YûÔ¡nßÊ%' ÇBévA–%dxžÇ 'œÓ“È‚0n9ã–i¯U*¼t~iAˆèy!nêîgxÎsž£’Y¨lž’š=Nñ×oûk¾ø·_drÇ$;ž³ƒë>|],rB§ÙbÛθhš6S©TÃ˲p§ÛÍ„–ÜõÙ1_>ß]>7¡"0Žêâ ¨¯N.z®ÿWPB÷bÑ95^Tm¸ý¢·éÿžç5 °‚  y¹®K† çWßqàÞ{—íþ-9ù¼ëtU`Ëâ»}_ü>zs¤TRš£„} ‚ tµçyõº9çBæ‚ÐKø¡ÉóŽzÞ´×Ëå²8T-éy!në~Ê+^ñs%¸¨žº=Î’”Ý‹t¶Ï½òs¼í›ocë-[1,ƒkθ†Ü§sœ}ØÙñ>T¢?܋Χ £¿f¦ýaœiÃð3ëf ©NWÓÉçó’K@èIàg“‡c¾òôŽ\¯ŽŠ|µS‰{¨î¸lD‰súµÉúL.Ú^ˆþrÀ/ìÝË>Èe÷ÜÃÛ~˜ßýÞ÷ø×Ÿÿœ_Þ½›w>þ8{ üóá‡ó¼8ÖÎoÝ¿Ÿ_þ2 ã8¼klŒ_ý×Å®}äö>oüàùó/™Ð49|ófàëë×sóðËW^‰ïûÜôïð‹o|#—Úå‹?‡ý–3Éår ÜZô}?ør¹\¼Í2ôk±>‚x»¦T*Å¡ÞÏqœø|…B Ž­T*ñ¾¹\.>¾T*Å®aŽãÄ"ã'>ñ ~ò“ŸtÄ:…®yQ.Kµ>A„Nࡲ¾Ï¦MÛ»ÝA˜7Z|ÖÿçéÿÓíæ}DOçˆó#í&P3¬,IvZX.+/ßóÙÿ¦ý äÈår˜O›dËYŒ’Áºÿo¹é#\qÏJÑYAKÑ9uvð<‰»E-Ú¦;-ЩötX/ÑÞ¦iAœ´ßóùdÎýò—ÙtÄÜþðÃü™g2zíµ\þÞ÷ò ?ù 7Üy'O¿ð…\ô+¿ÂÀ½¶ÍeO?Í)/|!!𪷽žx‚Ó;Œçžv›Þõ.å˜].³xÍ{ÞƒüüÒKù6pøWðsÔÏ…±u+Àzík¹8ãškâ4zw|á ¿ðþð”Sø5Ûfà’KxÎ#°nýzN|Óä”Çãøáa~ôÔSl&¯¸‚_xúi8ã ôâ«°]Óä±ë®# |ðƒ\vÅ|ñŠ+xàÑG™ž¼ànøaÈåøâa‡ák*ŒãÇ ©˜œÍfã~§X,ªB1ÀÔYgÅ¿+é}6lØÀÑGÝ 3\6‚ uQ]-8ˆþBÔ@Û§‘ÚV&Ydz´¶‚ 3á¡úÊZm”ññ·v»9‚0o´¤ðºWÝÉ·ŽøsqÃöR©$BKzZˆ Ÿ;!ÖŸ[³V›t' lj'Aö¾e/àb*• Åb1ö@°ò‘²—þ~äI<íô6W5¨à°m;>>äD.×u1M3®f™n[†äóùXH+•Jd³Ù†Ð%¤ßó<²Ù,•J…|>O¥RÁ²,,Ë¢P(P¯×ã÷A,Ä9ŽƒmÛñùkµZüšö´(‹d2Êå2aâ8µZB¡@>Ÿo¨Æ©ÛžÍf)•Jq‘}ný~-ËŠ·†A¥R¡\.ÇaZµZ ß÷ ‚ >—.FËå(—˘¦I.—køìô½Õï¯ßÛ¯<œüKÏ›q?Ã0„TAèUÞiÓ5Ï`íÅåû>®ëR­VùHÒ=Z¯sãcqþ‰'â¼÷½äËeüŸÿœðÁù-ÓÄVå/Ö¯çâÓNk<ùÀ@ãó£ŽR^j7ß̵;w.éûØø·›Ùþƒ-Tμ‘?¹ôRuíôõSWxá _˜üD~8þùçcF…yôÑøÀŽßÿ}îj„€ÑE@"…€wüñquZ3Üʦ‰½æD¯„…÷]y%÷ÿÁpÒI'a¡Ö ~õW•½{÷vè“ï QÔm,´yÑí€o’ Et/.¥#€ŽjõQkzuVž§Iýþýè=¦í,}Ÿô¦â¶IyÂÐØ¥˜L;ÓǤ¯¥¯ï¤^óQŸ—Kã磿/ú¼zMõ'Ç­Üê¿‚Ðëø¨E¶×~åQvî|´ÛÍ„a?<í¿ZÎU³’ôPhCO qŸòà‘[ÎÀ6…±&´¤®´¡='ì{”ÈT”0¤Ež\.G6›½*• †a`šf,`i¡¬X,baÅ_2º£Ã õc]éÒó<|ßÏ)Êçód³ÙÏ6Ó4ã|CéœB†aÄâ¡išÔ£nÕT]M3­¸×jµi¯éð§zª \º§eYø¾__ bOß Þ×¶íøÞëNÇ0ŒøúéûdÛvC›õëºÍ Â§ªÕjüYX³UìA|೿ɛ?óføpë}t¨W¹\Ž…NAèEîõÀ7ÀZÂ…½V"»^è0 #³ÇqœXÀ×}ù+zÅѲâ¾q±â½išK.¼âgßâÆ}/Q"Ü,´KUf5ý·OFâ›Ùb¿46J *£ %øÙÑãjê%À¹ê*ŠÑv%6½î´ÓxÑ â|_hMâm’Â!³®B´ÿÁÔóltÿ*$"“ô¹µ³=s¼†F‹LaÓùõp?-LµB‹_ú3×Ç{$Ù?š3t¤³v˜©çfêxP£Îáh¤®§ß£n3¨”¿ú}k[u£ãÒš€ -ÏÒ(¨¹4Š}Aê¼YóH:ÑkI¦’¸í—™_ùÁæq÷AX*ÔwòÁÿ/Å⻺ÝAXðpýì–?äý8:CO q‡›pÔàõð> BRâa¨þ€ß:pÓu»ùfë[á”SZ–;3M3öÒõÅÈçóø¾O¡Pˆ=±´¦E(-¸éü]Û³ùÙE?£ôo¥x_-:ia)­‚ëc›ÝSÓ_ÐfÕ\O6µ·ßbi¢ªÏ«Û¡ïAsÛô¤Vß›æö¦'½z{ºÝ ÷.µo»°Ù´X˜èúËŽ¼–u?ÿ`Ëmi[„^çÁ¹Êkh©p]7îÂ0¤PP^ÇžçQ«ÕúËv®ý–eõUEª“¾õ-à%K~Þ¹~,Nà*_‰DTÑiÌ QŽÍuûO<±£÷m¹ø6a j ]kº‹IìPD‰Bú¾z©ÿº$§tNB-r¥Å°2Ézd)Ú¦÷×b­ö"#ÚW{|¥Å³4z[6úŸÉŠL÷ökw|úõLô¾\”M¥G-áw`œqq&ö P›Æ6±mçéÇóÔ¾©h(ºF³×a¹Åñ ½‡Cš&»>õ)ؼyŸrP©¨Û'ë}B¯à£ú™ë¯ãÜs_Óíæ‚y8€¾u¶}lÃëét*‚ÐLO qã&œtòwàÛ>XM#‡LF ²L“óm¯RÁ.—áÎ;•׆´ˆ‰Çœ”ÒÛšªV{ì±}- ËÏ8‹_9µu.¥´x+½ŒXÇÞŽõφí§-ø<Ú#”]­V‰ÃíMÓ$ŸÏÇûÌuð’öÒíu^úìûøÚÚoévShÕLø àªïŸ½=Ôí&/ %}£¬Ä¨–êÄa¨Ê¯–ËAÀ¨ï' è´:àa­[§¶iûsΉ ¬XUÈf Q^ˆÁ=÷À½÷âž>£»vaÜygrÞHaiXÊÓ%`õw£Rë\¸zT}¨:¹Ý·<øœ Ÿó0ž—¥üt÷ú‘PeÃö,æÛ£Å3¸Ã‡[ wÇZðð°ëLÊç€óÎ+(¾ÜR¢ÛF"÷?ÃuÕ9_V‡Oäà©H"ü¨"¯V,óÀë‡àhàH ~ù¹]n©ã¿ÂÆ"ø¶òÛ †ïÃ×Jª-A_­«qâ &ØY¸*ß×úW98€7Ýy7wæß»mŽËJ©®«Šˆ'ô Zàßµë Î9§ÿÒϨŸ–3~p+ç¬9üZötú)Ah¦§…¸Çxë­ëaíÓ=ܪUÃP¹Î, ³^oï¦Q©¨ˆe©ÈR¹s¤g-Ú·:ýU %½?,n©]è Üoþ:Û~q’lœ¶-]\Czxù±·cž·8ñH{B’À¶V«5„±¯t~ûÏðÛö™Ýnƪ§ ú0jÍ3‡›çA.:=„e%eV]ò¸À†_ÎÁ]>dlX?ýÇÞúÊWuD/üýû¿+ñ)S‡#ÔkÚƒ1¼ývÌñƒÿ茟F~êÃn` zy”ºXÞ`ƒ[‚á,\nÀ#¼fTm·¢}3ÀÇM%æ­)ÂŽóp²ü¿TÛ=À¶àªQÏl¢TÌ(ÎÕ  jÜÐ8V7¯ÚûÁücF%›lþ¾×I„²SÇ“âYcí?Š2À%À[ÖÕ±'TÔç“ÉÂÁ2œí¬ã¯þº'™Ûû»Ÿkñë¼rÐ]îø8ŒŒt»5‚ =âþï§pÍ5O/ì$:Î>翜C íÊœ§17€Îcà$mNx©ãêÓ 6›iž/2Ã~úÜé˜\tîæµI?un«éuý¼B²H3ÛT9í‚=—ýõ1ºòQ¶Å1œ0yÂ|?‘ž sï7Y»ñÇ4 q’N˜‰žâîúèÓ<n‚×¶0bËÂ$É4ch_±¨þ\W- êPÃPŸi&ƒè…P Öt,ˆ>Nªã¡â]‚hÿ4eTgT‰ö­FÇéj­ZÜÓÚOwž:¦Ewø ÷‰Æ:ÝáêH.Ý™êd1:9‹¾^sg›®«“¸8Ñc;u}î¹.r¥EÌ©kžyš£Ž:jÚ6N缄^Åœ'Ÿä/®ÿîôÁÝ,èÜZl.—ËŒŒŒ0::ºº½‰}¿e ¡38ÀA¨€QoÚ†Ó?ÇQa¨”b¾æÃQÀ‰–ú ¶|¬¬ú=×ÕôïáJl%€yÜ“tœ©þ¾ø#àÀŒ~Æ·nUû¼Q·°ìÆ *8eC% ¿1çXð[y(ª]Q*ÊÀÅÑ Ñ†é«ƒMîSÕ6ͦÿ­°”áÍ>Ûüæé—õ¤v.jƒdR—Â<øQå =¡Õ÷µEÒÅ»án6³²BS+eÂå²ÞjÍÓ0Ôë2äzx¾àcYsM I’=º¨¾O‹:zÎ’¶íL´}ºqø¬o4 ˆ¿ôwr‹Â{|¨ê\(¨Ž)k$eÜôÐÛ8ч7àª(ÏÉ ·Ïî~Ù‚ÆŽ4ëÁ9À/j"qkªgØðiÎ2á­ü¹®÷™ptû€g™°&Pm3̨Ô©¬Êä£'LjÚõÚèùo9 >l¨½z3éÁ†èï2ൖú±Ñ«NÀ˾û²e±µå⡎ùÙ“œ<|ò´m:Oúì׿Î{¿û>¶ÍÓy³R©ÄÅtµät¡™U‰e©ß¡kT€×{ðÒæ.¸TR¿a:¿X­–Œ2Y¸"„SßT“Ž£~Ÿò$“ “FÁ']¦SÓ<6/¢~ßu H&n$CeºØ5“ž[,ªqOz±Ñ$™tvÒ+ß¾¤gª³0ߟƴ`§YÓèÏh•ͪ[Ðìl¯…¹‹ý­•®ÐS#I|©ôg§=á4º{³i¿ˆ`“yо"SšrÓùÒÇ;4z¦…5-Ài¡±¹lõ;4í3H;“:‡ÕâxÜ4]Mj<µ=ýÛ£«éä¢sFïqߎ}s¸!½ÇîÑ-¼è¤õÉ[MW„vô´wìãóÔÏn9PtÏóâ óFwÚ­0Œd´âû­÷iuM=Xѽìx.p'pŠ=}ó~¤V/¼Ö×Ò+=Eà+!|Á‡_$Yi'€3Ÿ£þ¿Â3õ+o/ÞUyÉûõ½èž¤Úgßña0P/Ô' ì"ï´×]&׸xØÿÛÀK¢YIºRÖ»prÔæë|øpð9à›À9Y^õý­óÿ¬»È¡=Oñ¼'Öôxá„~!ðÁ:ê10æul­V‹óc¬–ÐÓYI/ G;pÕƒ÷¶šx¸®Šãs]µÀV¬E Èðn’ò¢L¾Ú1—!JÚ#C“.ŒÏáíXÍâ‰f9ŸF5(Z]GF—uñ ÛVãÐVÞÊ##‰È© gh¥¯Z~QíAjYðÑ.3àfþ<Ÿì«½›JÀk}ØÂ.ˆÎ­£4Ò‹Ê`E^«¾O°9c*©}ÃÔ±Y’•¿ àj.Á3Õkï àÂK‹IÙèðÑ üpJšžmÂQ†úQürÞ™ËùÀÿÏÞ»ÇÉQ•ùÿï@B !@.Ö2ÜjLTDjtvãW£d]·[²/»ß¥û+º".üºw½-qÑîÕ5(êÒ…QP\qÊ à%ÉN!:œH†„ÛLnóûãô©®îé™é™LOwÏ<ï×k^Ó]]—SÕOŸ:çSÏ¥; R°øa–ZpO „»/åT›’¨~ÁÌ÷~=§¸ÿsŠûÒ©…çàm‘ñ|Öƒž8,2ag Æý‹ þîÙmzâž{ž>¥J&•͵¡ß‡. rcˆ•¦²µl¶ü3ýCˆÚ²…zØ%»7ÍbžÎ¢ÍYFÅýÙ7^ŒžBÙKº¢ÐL±b{Ù±£•í2ŠÇ¨œäó¥XrËRëd³êZT[_Ÿ z8d%Ôúz…BIÀ³Q¿™p›¨<<Èg‹%ÓÍòJAž§Ú¤¯—öHìêÃàùcŽá¹ßÿž£: ƒ'žx‚ÃÎ>›ƒ>/ôLŒn¾†¸º¦§lÛ6I–5uø@ÿóÇó®ö»å0"©‡„Zhj!nïŸ_fû‡ŽX Ø4Íú…ôéýŽ'th"“Ì/SŒ1€K,õ7^Fk_µI¡ Ø‹*Ÿ¦ŽuYÂpÛ± ]¿òŸ¹†kÆß¦F±gOÍéfÕáz‚ÐÌøÀ¾/a~Ô¸ƒÁD)%‹|1 sL¸Ú‡S%î}Ì€?plN‰(Õÿ¯x27Ýuµ”ñª|á"âXh•¼QƒÒípª¥OÛé§sñųåᇹsûv>|Ýuàûüèë_çbËbÉ©§†ÕDc±üÛ#ñ¾÷±`Áž^·®öï²I9ìÙœyÁ™á²l6+ã_aLšVˆó€}œpñ¬aŸ¹®‹abÜ3§>Ýè&ŒyóØzä‘Ã{EOKÉ '´ />z Ö1£¯óË_þ×uY¸p!ùË_øÈG>Â3Ï<ƒëº¡øìû>–e…E|ß'•Jáy†a”U—Òï£}ý˜9Aa\Ô|ù»Ùâ&™T¢èD˶˓J‹#gA`¾ï«j¬×zyáoV‹õ@?ØòFŠbˆ´]ÿ×mõÄÙöímüqɱ]Øö-åýòСñâ%´ÍÛ6£îZÇñÿøÌ™7:a{[w_ì„^&þÍê°2r/ÐiR´J¾ÏÀÁ÷|G}ï†m‡¶«yÝåï mûvnþ¥¿tX²g Ûº†{åh;Ö¶½ŸéûW¡P ÖîÝË–9sx¤³sØC× dƒð7²gÏfÏÞÀœ9;9餓xxáá ·gÏu]æ•<¾öìYY¾«¯þgž¹ Ñ&8.tcÿ’oaY—¨…×§ákœÔ­ÒÙÔ£ï­tÐLtN9Òv•ÂvôýH"XѾŒ¢`vÏâÅœS\W‹;¦i’Ïçñ}¿i`Û¥B#]sí¥eša_lÛ6žç‘N§ÃPʨ°v÷—¾ÀÛæIß'›Í’ÉdxÔóÂsŠæûFûœdÛœtÅáù8r­R‘ïàk'œ@âÌ3ùÏãÇúS+¹[(^z‰{^>‰Á“6…‹f| ¡&šVˆ €ƒ÷<Ï \2ì3=˜“ɘÐì<|ļwÏ«J5ÚQaÕ–*´ûæ†òežçá8jräº.‡rO?ý4Gq\yå•eý´ž$g³Ùp¹Q|šª‰®ïû>A„œèAjåƒÛ¶ñ}'’{­¯S-*DÑïƒ •J‘Ïç‡ „–e…ž•û‹î£²Bcq€÷ð·ï%ç,Q2»JÚëf’ºj=Q Q´w€žÜW®¢ÿPتyX×Íd2X–E{{{.£…‘ÆR“é±­'Œét»(†L&zLÁ0qp$lÛ&z{{kZ¿™I§Á:eö?¾ïã'øÔ§p,³—è¼ô{p _žþÜ;¹óÎ;yÅ¢c¹ý¯`ÇA³yãmO1û‘|aí©¼ÀNK¥b~)Xàäá߸7¦ ¾ì½m7ËÞÿ2CKfÎûã…=€ùŽŠ bàåa¡£­üιy`ìòáeSMÊî\sbptžtao'`~ öz°' /̓COSYcî^xáþðôý-å§£Ž÷î]‹i|\ä—3êOçÉžÆDçét˲ˆÅbáƒËL1ERÁuÃhåhßG$“I,Ë"‘H„Û§R)|ß'ÓSô€Íçóa¸h?iYVY>³¨°·˜¦ L“9ž‰¦-9»øo%¼CáØ¹÷ÈO7M+Ä,88ªl™~ºšJ³tŸ 4€£OÞÁ'ž_Î7ÒDLš ­-’þR pŽãÉdˆÅb¤R)LÓ …­ñL´'ÒŸG=k‚ Y êéé)yMLð·¦CÃ0hoo'‘H„“v½Oí™]«‡ƒ Ìñx¼&Ñ ê}Qm}í©aš&žç…b„g¢ÞKºýZ¼ÑŸU £ë Òét™7âsÏ=ÇßøÆ ]ßF⡜,r1Ž.2ŠEŠt·nJafÕöQ¼ÆAà8N™÷WÔ>´8 %1}ý*¿ËJn¤½Ðô~¢¶Z)zG×éìTyJs¹\C9ŽC>Ÿ§»»»f¡l¼û÷/žpBËpèK/áÚ.t„ˤcZè:øÈCS¢W"‘ §§'ð%“ÉPŒ› *ÈÑ«¾GL´-Ñ}×ã¡OWWA‹Å&µ?ÐŒ&ó;H§Ótuu‘ÉdÊž˜oܸ‘M›6íÇž§5¿3 °¿t1p±šfQ“À²ÜMJÀÒBk>Ÿ'W†a„×8‹•‰Î•Ÿ×›‘&ŠÑq’eY MÙu®Fåä²Õö߬ÀG˜—žÅŠ7¼†ïÍŸ_ú°èòöôWŸæ_û/ºûºyúé}|tîgøÄÑŸà°¿?Œ¹Ìeñ¬ÅJÕ1; z7Áºþ¢•U¿n.‘P¢T—SL­hªÚ]Éx)õ–a(ñëÝÅ ”&°Ð€96|ׂWË"&l¥TZ,Û߿ӼX}ð¹Ò:Éd©¨±iªôYQt¬‘ÞW£²išašJŸO/S0Âþ …R°zu•º^«á>=‡ÿsñ¹0«ñ'£ç˜úž<š}ÀâyÙl6ô"K&“¡rt{˲ÊúåzÍšQ`›îì ^3g px¸l&Þ„ñÓ´Bœìݳ%^R¶Ü¶mqýZ†þká7ÏÏcMñýH-‚Ðìd΄mó¶ñºÎבJ¥†yÕH>ŒÚÑOÛ';<¯÷ÆL&3m¸ÀkÈæ‹uJpx¦‡“vp]7ô*Ô§¨è,Í€‡òˆº$™HEí}ûú›^ÏÖ][yÓËoâͯ}3e."¸Õ"›‡¿~ øaqƒ4ìÁK+QÊ4Áy@ I:ǼN7§¦YžZѲ†‹bÃÐIèŠè"¬Qôû‘†JÍp«ÑM­G¨1þŒ%Þ5ø Þ}ÂápÅÔ7N“J¥0 ƒt:iš$ ÃÇ5†a„áÒéô°}ÑT¦i–Ý#£¬d¼?ýñ€yg¾Èƒ¯ÿÑ'yâ4$ÔBÓ qÏúðÄ'/€—–éOZ…ÙO?ÍYýg…壘ŽÐ²äàßÓÿ>L„Óá2è¨Ö(ýÀÔâö_‡à”÷«ÐMvëû>‰D‚B¡ c ¡éIãÁaÈ¥àzoê¥ceçß}>‹b‹Ô‡Ç£\BH@ú•àv*¡H…pBÌË/‰GÓDšX÷ä¿Ðvàóõ=N±À€~XM¡ÐFJ©¡EºhDVt 4ÞTÂô"Žîïç°È˜!ZCF£y…¸;·Ðuè¾²e’ä^h%ž:î8::7“ˆäV¯!¡ÙàBÒ×uÊòèнÊüTÂè艀0ulž?k×.¬§ïÀºìý†¼™'W…BA&SBËÜ{/|û4¬9sÀ‚G¯z”­_Úʉ—œHæè¢¸ðJT¸µE)Y bŸ{%ÄŒòLM¦‚czŠMsºx—·ÿû‚òpQÇQžÍ¹\.ôxÓŒw©ÊHnra$6ÿ¿£xçŸ?Ðèf-HÓ qOœ»“«¹8%\&¡•Ø{È!œôà_xqÁnŠ ì§>Î+~ë«v¨ÂËS¿ñ¡Ÿ®O—ÏV î\̧žÂ|Ý1p3Ü{ï½d¬„œ -IzÇ^}Ø3dþùhòù<_ûî׸å3·p¾¾ZÁ¥T™$Š™un½ €ÿ¯³¸÷€¿å]û1|ˆV)w',ZT™3R õÂß½›y{å€Ã—ɘX¨•öõaßÞ}œôÔÁáû|>/O#„–cÓús8g€ÎÎN“…–dá1ÛyíÞG‡=IÖÕD…ñ¡Ÿ°‹4ulž?Ÿ…ÿó î3góÙç?Ë?íû'º»»å;hq¢÷Tß÷Ã*²Q*—G+;ŽÃîÝ»}ã8apþôÁŽüôâ£ñ;}n_{;?¿çç˜)³T‘¤‡V* ÙòDmJ瘄R…e(ЯÓét¸M6› =º²Ù,ŽãÊ>³ÙløZoãy^Ùö]]]¾ãbë)§ðÒC/qîœsÇ®; Ùl6¼ö©TJÆ#”ã?ù$}äm¼ýÍJ³ˆV?„±hZ!λëD~yè¡¥÷Þ$ù. Ââýâ :֍Щây!´"{,ÆöíüÏà†²§ÊÑI€0>2™LëXÎ4vïÝË}ÿñ"¼ò"¾õ£oMûl‹s¯äþ/ÝÏç?Ϻ»Ö•Æ ÷‚›¦Dí8újáJ¿ŽÚ¡¶‰¨U)е··‡ûŽÇãá6•¶¯m:*°A¹}yžWfŸú{ª¬¬Í3‹ÅÂ{­mÛ¡g—mÛ¡C‚išeÎ ­˜öäå¿À™ç¾8îíâñxøf2©N)4œW<~0C©1뺢Yµ3Ô„\qÅCǾ¯è¿Î{¬ÑMšŒ+®¸¢ÑM¨™ÿû‰O ½j©Ø°0œV±ã 6 ýÝ~0tü«¶ }êÍ?(û,“É4ºyBÙ°aÃÐõ×_ßèfÔÄõ×_?ôù‡†ì%ýê°Ç†r¹\£›4&ÑßW*•êîîÊåráëžžž¡T*544444000dÛvÙ6}}}CCCCC±Xl¨P( uww‡¯ûúúÂíõ¾ÂãëõzzzÂå}}}áë°-z½ñ¢ÛØH®¿þú¡ 64ºcrÅW rCCÿÓÝCï}ïsC¦i6Åõhû¢öRi{±X,\7—Ë…¶µÝ\.nSiûÑ}e2™p_…B¡Ìvõòðá1Z™VW ½ÿú[†Žj{fhç7wÖ´~,›Pÿ"´­bÇz ”øÍï‡:ŽøK£›#4ã7­GÜÎÍ XôW*¤/ú¤IZ…ó~·£_=i>µ„–âÅáe8ñ­Ï•-–TB+ñÙÿwþüÔ‘<|äSRô)ž–Ïç˼r´G×h^9Q*=d´'MÔ+Æ0 º»»Ãm2™L¸^¡P(óªÑ¯MÓ,ËS¨«‚ú}ëõ,Ë —›¦YæÕ ›H¦éì•XòïàˆïÈËwþoÙw<TzžEíUW†roD×u˼Т õ£¶“J¥ªÚXÔv‰Dh¯–e…¯+«_¦R©ðºÄb±²ýFÓh{5 C¼ª¦˜ö[ïà¸maþ¥óG\'ê•›Éd$ϛДÜÛw8s÷> ¨û¸¤ ÆCÓ qí}æÕï?” $ÖZhIzXÀóíÈ B«±û¥ãÙõÄ¡œþúÓ5±ŠNÆ¡øùKKxóK;éÌwî×~*Ã9;;;Ã1J<¯ÚiÛv8‰L$¡`V)&T šJ!,*&Hºƒ™…½q#÷=erþ¼Ç'U<ŠŠd®ë†6 Á¬â èjã¹\.»‰DøÚ¶í2¼RÄÕ¶›…éÉ£»æ€#ï‡Qº¯|>ڗ؃Ьìî›Å¡‹”ÍÊøXM+Ä=4h†OŤ²œÐŠôÍ^Àá þ0¬Ú¤ ´ >ǹ„“¬ dP,´¿}ù•ëTà4é…¦`¼v|Ûg°ü’_ޏN>ŸÇ÷}2™ étÓ4I$Äb±ÐîG{8(¡SÂx¯ÿò©ãXrê°íOºžïû˜¦†?k/L×uC¡­2„S#ca¼Œ×Ž—.ÝL2yÁ°åÑaA˜jÆkÇG>ø$gt£›.´(Sæ—N§éïï§££ƒt:=¦ëæ–E‹$ÖZh*ÆkËõ±|ùjò†„©b¼v<Ø7Äᇀïûaˆ’ 4šñÚñÑGßÈÊ•Öçóù°Šg4U&“™’\rÂÌf¼v|ë®×òÅ/¾mØò¨G›ëºaåШG”ç”q‰0YŒ×Ž<òÈЋ2ZÍ6šÓO¦šñÚ1Àw÷sé¥ó‰Çã’ \7Sâ×ÛÛËÖ­[¹á†€’â<š+û?þãCØö{}}˜˜ ~ø¼ç=çHȆÐ4LÄŽxÀ‰¼ó- K„F1;>÷Ü9áçÙlÃ0H$ÃòV ÂT1;¾òÊ–õ1 ”‡Ð¶íðµ¶éè>DØêÉDì8Фnš‰Úñ=˜XÖ±S^ Éd"cã /¼SO=•W¾ò•áò©¶éF÷?ÒŽrZq|ÜÛÛËË/¿ÜÐë&ããrmÇO>ù$-3>Þ¼y3úÓŸøõ¯Í­·ÞÊ‘GÙv7 ¶ŸfaË–- pðÁ×´þ”kwÞyg£› ûͽ÷ÞÛè&Â~±fÍš†-‚0™8ŽÓè&Â~#ccaº ¹´„Vçÿûÿþ¿F7A˜LIhjGGGÙ‡ÞÞ^.\¸{„©ElX˜ˆ Ó±ca: v,LÄŽ…逨±Ð¦TˆÓ!z®ëJ{¡¥¦bÇÂt@ìX˜ˆ Ó±ca: v,4‚¯¾úê«ë}­(_{íµø¾Ïí·ßÎ5×\#J³Ð2ˆ Ó±ca: v,LÄŽ…逨±0;Á¬¡¡¡¡©:X?[·n¥££C [hIĆ…逨±0;¦bÇÂt@ìX˜ˆ SÉ” q‚ ‚ ‚ ‚ ‚0S™’q‚ ‚ ‚ ‚ ‚0Ó™’qÍÆàà ù|žM›6ÑßßOGGG¸ü{ßûwß}7K—.-Ûf¤Ï&£-Ë—/¯éXõhÇúõëq]·ìZŒv¬z] ¡vF²aýY3Ûq½Ú vÜzLÄŽëù½5«7â7-ÔŽØñp¤?n=ÄŽ‡#ýqë!ããáˆO3íÚ4“fÒ,L¦v3ã<âY¹r% *¤¸®K*• N‡F–N§q]7Ün´Ïö‡µkײnݺ²eSÙŽµkײqãF–-[ÆúõëY»ví˜Çª×µjc4†æ·ãz´Aì¸õ˜¨×ó{kV;nÄoZ¨ ±ãêÇ—þ¸µ;®~|é[ W?¾Øñä1“®M³i&ÍÂdj7³}2SMoo/ .dÍš5,[¶Œ /¼ÞÞ^¶nÝÊ 7Ü(ã[¿~=¶múÙþàºnX&9Ú¾©jG?ÝÝÝÜvÛm€Rh»»»GmG[[[]®…P;#Ù°þ¬™í¸ö#vÜšLÄŽëù½5«7â7-ÔŽØq9Ò·&bÇåHÜšÈø¸±ãÉe¦]›fÒLš…ÉÖnfœGÜÒ¥K¹êª«Â÷;vì`ãÆeîËË—/gãÆc~6Q¹þúëËÚ2ÕíÐûëïï÷³jÕªQUk!Œ‘lšßŽëѱãÖd"v\¯ï­™íxªÓÂø;.GúãÖDì¸é[—#v<¹Ì´kÓ,šI³PífÆyĵµµÑÖÖ¨'©TŠU«V±cÇŽ²˜]½0êg%NsÕUW +<•íèïï§¿¿Ÿ+®¸‚ŽŽ6mÚÄš5kX±bňǪǵÆÇH6 Sk?0~;®GÄŽ[“‰Øq½¾·f¶ã©þM ãCì¸é[±ãr¤?nMd|\ŽØñä2Ó®M³h&ÍB=´›'ÄR4o¼ñFÖ¯_ÏUW]…mÛe9Lêͺuëèèè(Kò×Èk¡]–{{{Y½z5+V¬ht³„1¨fÃSر°¿ˆ¿bÇ­‡Øñðë!vÜzˆ¿bÇ­‡Øñðë!v,L”Fk&ÍB½~Ó3.4`õêÕ rë­·†tGG½½½á::.z¬Ï&¦M›X·n–eaY–e…î‹SÕŽ¶¶¶2•¶££#Œ{éX“ÝabT³ah~;®‡ýˆ·.ãµãz|oÍnÇSù›&†Øq é[±ãÒ·.2>.!v<¹ÌÄkÓhͤY¨—v3ã<âÖ¯_ÏÂ… ‡Å÷ê 588ÈÂ… q]w˜ÁUûl"è„}˲ð<P®ŸSÕŽåË—³nݺp7n ]&G:Öd·A?#Ù04¿×Ã~ÄŽ[“‰Øq=¾·f·ã©üM ãGì¸é[±ãr¤?nMd|\ŽØñä2Ó®M3h&ÍB½´›'Äé„•ZÍÔxžÇå—_ÎÊ•+Y¾|9®ërÓM7ê‰ÂHŸM6£k²ÛÑÖÖFWW+W®déÒ¥lݺ•~ô££k*¯…PÑlx*íg4¦Ò~ÄŽ[“‰ØñToÍ`ÇÍò›ª#v<üXÒ·bÇÃ%ýqë!ããáÇ;žïêêÂó<|ß§³³3Téãñ8ù|>Ü>ÚÇç«ñx<܇çy¡+i­ëDÔ°õûd2I<'²Ùì˜O!ô6]]]ø¾?æûÑöS˹D„ºÝ©TjÔöë}ëïE§’|>O2™Ä0Œš·™NŒfÇ#Ù°¾6µãýµa}P¶1Öû˜J;;F:=@‰žoµ›×L¶ã‰öÅúÚTÚñTõÅz0q;ž,ÏùŒd_£µ"ý±þnÓé4¹\®AÖ5u´ê˜B樂×ÒÏdÖçÛŠvÚöjèmúúújz?Ú~j9—T*5dYÖišC±XlÈ0Œ¡\.7jûõ¾õò¾¾¾²khÛöP.—²,«lÛ‘¶™®ŒdÇ#ÙðÐÐÐ~ÛñdØpt½‰Øpt»©²ã±lr¤sH¥RCÀ0dFYûÄŽ'Ö lÇSÙWî¯Q}ñxÎg$û­ý釆†Â¾|<׳Uiå1Eåþ¦«ëýUöÇbÃ%ZÙŽgÂØ8ºÿj¶/v\BÆÇ„ý[½ûã‰Ú²ŒGGÆÇÍ?®h½¢éBSµ[£&šPéÚiÛv¨š†Q¦ZFݼku+œl÷C۶ç^µbYVÙ“‡±Þïï¹Aº,kõY»‘ŽÔ~˲Âå¦ibYVÙÓNÏó0M³lÛ‘¶© =™.ŒdÇ#Ù°¾vûkÇÍ`úSiÇ£Ùdµsp]7t××.Íñxœžž@ìX_£ñöÅúÚdÇ3±/ïyWÚ×=÷Ü3jûÇÛg³ÙÐý& cŠæ·ã‘úãL¹FbÇÍ=6Öû¯fûbÇ%d|l…^gSÑ×–e|<62>nþqÅHÛ4›^Ñt¡©ã1†h'lš&¶m‡ÝÝÝ>•I9ÿ±Þï¦iûáL$OEå÷0000jâÙ鞣Öï©ò:ˆOcÙ—¾‰jûO$e‰;ÅŽ'ÞÃô°ãFÛ0L̾Æê]×%›Í2kÖ¬0aï¬Y³Z*Ñðx1EóÛñhý±Ø°Bì¸õÆÆPú.ÄŽKÈøØ¨úz<צV&bË2>7ÿ¸b¬mšÅŽ›Nˆ³,+Lp”•M6M³¬3ˆz¼hµR+£Éd²Ñ§Òôضïû¡‘” ÏóÂmôk­ë_.—#›Í†ñá£m3ÉŽG²a;ÞÆk_:@tûjƒ£™lÇé‹Aìx¨f_çœsθ·­?îîîfhh(ü;FÆ“Åxíx´þXlX!vÑÓè2µß«« iå2ß;•ø¾ïû-eØÑv×rÝ´+}¡Pc«Çzº2‘m¦ •v<– ëebǵ³?ö5žk=Síx"}±^&v\;Ò·Ö—VSÀ̰cM3\ïf§í¸Õl¸²Ýµ^7éLJŒ§¾Ýã¹n2>O S5>n„7Gœ¦šqèR¿Ú˜óù|™R9Ù +ñ}¿Ìý´’L&3êVËö“Ù–Z™ˆ¡†1îðD¶iu*Ïw,†úÚq­6<v<™6 ³¯‰\ë™fÇé‹Aìx¼ûÑHßZZyL1Sì¸Þ×{:ÐÊv¼?ÛOf[je¢“0ékCÆÇû×–ñ0[–ññØÈøxÿÛ2¦j|<•vÜ´q#áy^—­]<…©Ã÷}\× +îÔk›éŒØðä2Uö%v\ŽØñä"}kc;ž\Ä&ƒØqãÛßÄŽ[±ýrÄŽ'—©7ÂŽ[NˆAAAA„V¤éª¦ ‚ ‚ ‚ ‚ Ât¤)sÄ}úÓŸæ¯xE£›ÁC=ÄÉ'ŸÜÐ6ìØ±ƒ;vpÌ1Ç4´?þ8‡z(‡zhCÛñÐC±víÚ†¶¡VÖ¬Y#öS¤™ì§Ñß À“O>Éç>÷¹F7cL<Ïã–[nû)Ò,öÓ íØ±cmmm\~ùå¾crã7Òßßßpûi;–v oÇ{Þóž¦ÏQ'cciÇXíñqíÈø¸œf±ãVÿô§?eýúõœqÆnJSÐ,öÓ lß¾yóæñ©O}jÌu›Rˆ{ì±ÇxûÛßÞèf°aÖ-[ÖÐ6lÞ¼™Í›77¼?üáikkã”SNih;6lØÐÐ㇆ob?å4Ãoà†nhtjbpp á×Lì§ùÚ±yófúûû})j¢¿¿¿)ì§YìXÚ1¼º¯kfdl,í«­‚ŒK4K?Ø,vÜ*ããÇ{¬)ì¸Yhûiº»»ùÓŸþTÓºM)ÄÍŸ?ŸåË—7º¼ûÝïnx;–.]Ê)§œÒðvè¶´µµ5´ ‡vX£/øÚÚèïMì§œføMƒòÐi9äŽ9昦¸fb?Í׎V/=ôPé¥#²iÓ&9äF_Š1‘±±´c4d|<>d|\N³Øq«Œ—.]ÊI'Ô׬hûižyævíÚUÓºM)Ä5 +V¬hthkkkxç È«Eû)§~ÓÂøûiÎvã£YìXÚ!ìÍÒÿH;„ýAÆÇåˆÃ?¼áaÍÍ„ØO‰ñ؆kAAAA„)@„8AAAAA˜DˆAAAA„)@„8AAAAA˜DˆAAAA„)@„8AAAAA˜DˆAAAA„)@„8AAAAA˜DˆAAAA„)@„8AAAAA˜DˆAAAA„)@„8AAAAA¨ †/Ëf«/¯âAAAA„içÕ¶^(Q-ßWÛuuA>¯>O§¡³³$º¥Ó«Ïõ:A„8AAAA¡åp]õJ0ëê*Ó@‰l0\h‹ÇÕ6–¥D¶l õ‹A_†zoÛJAOOùþ»º`ÕªÓÙ½{nMmžÝè‹&‚ ‚ ‚ ‚ cJK¥Ô×U¯õg‰„Ì´çZ¨Ï %¸uw—ö•ɨe D·JôgÛ.í8j»înX¿þaúû÷Öt"Ä ‚ ‚ ‚ ‚ MO6[òP‹z¯˜¦ú%ŽiO8½¬’J¡m–¥¼Þt¡„ZE¸fF„8AAAAaRÐÅ\W…’Bé&¥uÓéR•S]¡Ô÷U-ÒÙ¶ 5í˭Õq‚ ‚ ‚ ‚ „‰æKK§Uèf@.Wž‹MçwÓ¯ å馋)†ú³íÖÝæö÷3w÷îšÖ8Ah1ôÓ‚F¾Î~)‚ ‚ ‚ L;´'(a­³Sý9´·«ù©ëª×®«D´žBÚÓ3vAËRë¦Óåa§M‚êyå g›NÓñÑrØ3ÏÔ´Û-Äåón ŒŸ¨ën=q]/r  ‚€|>ÏâÅ‹éìì$›ÍâyÙlV„9AAA„ý¤2eZ&²£Éh‹S 3õ<%ÀéJ¥Ž£¼Ù,Ky¯ ”{°ÙvíÇI$`h¨~…ªžØh$“*6VW‰ˆÇÕÉG“Ýi·¿0 ¾öZž<öØš?éBÜàà k×®¶lݺu¬]»–7Öÿ¢Žï+%w4a³^¸®K6›%ý…¦£í8*{œ}ö›øÊWž˜ôãø¾ëº$“Iòù|ø>›Í’Ífñ}Ÿ\.G¡PÀó<òù<ŽãÐÞÞ.vÝdTÚq£mXÆK3öÅ‚0^ÄŽ…邌+„é@3Øñh¾ åÂ[ð.¦NŒK§KsOËRb(‘¬§§$Äi®åp]%´¥ÓJ«& årÊ%¯PP¦©’ÛEÉdJËR)vrJÍM˜t!níÚµ¬[·®lY:¦¿¿ŸŽŽÒé4®–T§˜ P×ûšk¶ó¦7íÃ4•°™ÏO®‡Qá9ú¾ã8¤Ói‚ ™L’ÍfqŠ ¬ã8äóyºººrM„ê4ƒWÚd2©þ».\yåËÜ{ï£\wݲöÕâ‘–ÏçI§Ó¡Ð¦Éf³$“Iºººp]—L& rÙl6Ü<Ïöm,ËÂ4M\×¥»»›T*…çyxž7b[ôï¡««‹t:=¢pç5Küm‹SiÇÍÒ B­4C_,û‹Ø±0]q…0h´,_>8âç.0gËô,-»øZ/ëÿO&ɤj£Îãfš¥×:w[½É“æÞ¡+?TÒÝ­&Öù¼ú</óv»©·W¹ôY>Ídl{¸\½gï|áþæ©§¸oË–šš5©Å\×ep°Ü¨z{{Ùºu+7Üp çõë×cÇOqœ¤ÓJ¡­tkÔñʾÿQ®¸ÂÆóø~é{©UÍÕ!xVqß÷ñ<×uI¥RÄãq‰Žãàº.¦iâ8‰D"\§½½Ã0$¤¯Éh´»®²ÅÎNõÄ!›-ÅÇû>œxâ#¼üòŸ9üp˜?ÿQ²Ù­$Žã`šfh›±ŠÌ–Žã„¡¥¦i†¢Y"‘—çr9‰DhÛ†aàû>‰D" Aõ<£Ø ‚ À0ŒPˆ‹ÇãX–Ú´ ƒböNÓ4‡ýNt{âñx¸oß÷1 ƒL&¶Óœ2_åÖ§ÒŽÑ ÂþÐè¾X&±caº ã a:Ð vìy./¼ß_X5 ó'O<Á‹ÿøü÷÷¾‡yà€ ,ýÉOÈ]t±¹sq€\qyJ¼‹¶Øj©qà8Êù#“Qz”ÒJ”FUOòÅ6§P€nñ\2ÅÏò€…ãìâùx@7`€×òù‘“Èyž +-”8*fV‹húûíéâñ{ïåÒ—_fÉsÏ‘¢{î\>ûÈ#¼é„pw<ÿ<'ìÜÉÒßý޽ãxÉ$Gq›W­âŽM›Øó­o±ì½ï¥íôÓkº“æ788Èõ×_ÏUW]U¶|ãÆttt„ï—/_^w—O-jVû>Ö¯_ã8ø¾O,Ë–­Ç0²”ôå‘q]—®®.ºººÊ¼Úòù<ñxœ|>O{{;©¢Ad‹.Ž===X–…mÛágZ(ééé ­À;#Ä+ëAíÍT‰I„‰Ó v¬Ë4›¦úJœK$Tçx K¡`Ëå8æ˜?qÝuOÓÕÕ…ïûäóùÐ#MõRÓÞl¹\ŽL&ƒa¡—›iš¡§›Æ¶m‰ l¶P(PоÉEŒÈc‘B¡@___(˜åóy:;;I$$ zzzèîîÆ¶mÈårÄãq‚ N“Ëåèéé¡§§‡ …Bø;Ò¿1uJuA„v¯Ceg:Õì¸}± L”fè‹a;¦ 2®¦Íbǘµ˜½—ÎÇu•fQ9uypÁpxî9ò(ÁÍu]Ö\|1{÷î%‰Ù´†—¢•>*´u$UÀq൯½“;ï܂㔴,ÇQšU.7ùçÛE±]Ù½{±=4àoßNjp·¸^%¸eŠÛhQ.³@òÑGißû>ñ 5yîê^²5•R^n±þ=÷àž}vØ–?ÿ<³‚€ÿwË-\÷ôÓ¼ËvïØÁ%‡ι;vððu×qÁ‡>ĶÏ~–'ÿçðgÍâG§Æû¾øEû¯ÿâŸwíâ·Û·sý¶müøùç¹ó”SØwÇ|ùÔS9ü¹çjº&“æ—N§¹êª«X¸paÙò;v°téÒð}[[Û˜ûÚµkWø£¨ÜßXtv¿`oxrÀ矞;w†ž;¦™çºë’˜¦É±ÇžÉ7¾q/Çÿ4¹\®L\Ðý®®®0Ïó<‚ ‹á8…B!ôFŠÅb†a¡¨‹ÅʼyRõÖ¶mLÓ$ŸÏ“ÏçI$Äb±ÐËNçé2M3<¦ÎÙ¥÷›L&C!F‹|•mheéííå¹ {¢L¦?÷Üslܸ‘åË—×||-¸Ù6ìÛ÷0ßøÆIärª_Y»öRr¹[%ŠAÀ¦Mí,\x;§œòi‰'Bϱt: `Ùl˲BoL¶‘l6K*•ªÙNÆkOZx«†¶ó|>OÃÖ³,‹žâ“ Æ­=ø´7i§þÑChïíííÄb12ŧz}í LÈ»N –Æ}²µïÚµkBÛ×J5;žˆ ïÞ½›Ç|Üv,LoúûûÙ¼y3;vì¨Û1&³/Þ±c›7ofáÂ…eƒmAظq#?þ8»wï®Ëþë16^ºtiMë 3ƒFŽ'bÇ0±ñ±0½éïïgëÖ­-7>îííиâ¿n8”½ËŽÅqT”ã¨ù(áé¥/íRóùÿüOR©T]”J¥¸ï÷¿çÁ . ÙŸ§´\X|Ï=8çœCtÖöñ–Y³:ùáßÊ#|ˆ'ŸÌP(” hn²\@»ÙÅ÷…/|ëóŸ'‘J©ð¯D;“¡«xFQ¡,¤RJd+&¥³ŠÛ[ŽƒñéO“}ç;ÉœwþüùˆøÏ=ÇuW]Å–Å¿¤Rè@ã#ßö6.ííå°NàøM›xÓí·óù¿ÿ{ܾ‹2Ú–.åç÷w|ó _àîÛnãW}}$“I¼OšpþwX:Í®¼’W_}5ÛÛÛYý½ïa%“ôööŽk|<)Bܺuëèè蘴u×®]lÚ´‰¥K—Ž[ˆ+”W.ˆ¦‰Åblß~8oxÃÈd>̬Y³Êò¸ù~°iSÓÌbÛ6¶m‡¡¥ù|žL& _ñx<ô\3M3 Ô"E¥ P&¥»»› BE·I‡éår¹aâA>Ÿ½ât`ww7®ëAØn(‰Ú“)N“Ïç‰Åb¡@£swi¡¯ÙÄ»;v°iÓ¦aá“ÉdÛñàà ›6m×þÉ‚¶ßrñù?âÄ•cQtNàÒétèÜuhÐZÁX$Qžo ¹w/…¹s1üž=ôÌ›Ç=?øï;òH¾›Í†á£Ú³¯¥$þ‹« ¤#´Ëuyzófþêu¯Ãêé säÅÏÝÃ㬽{9ðä“ùþO~ÂÉ=Æ_ù ¿ûóŸyõÛßζßü†û|­[·òßýŽ¥óçóΛn Û~Á—¾„¿f @Õ9t*•âùë¯ç[gŸMÈ5-ÄÕʤq›6mbãÆeI-Ëâ†n ££ƒõëׇË{{{Ç×^ñŠW°¦xòµâû*lO% ÊŸClõñ@…ý „×F‡;ê¡öÌK¥Ráñ¢?V½®eYöª¤­­5kÖÐÛÛ;i×¼’ɶãc=¶f;hoWö‹ÁG>ò,Að_ÿõk‚`A1ôÒVÀ!‹‹•¶m‡w.—ò¬°ÿÐÞ†ú¦¥½dmÛ&“É”yØUæ‘Ô4ÒŽ?üásÿý÷‡Ëj±á#Ž8˲ÆÝ Ó=ˆÕ¢Àd3Ù}ñ1ÇòeËdÒ' cÍš5¬]»–#Ž8bÒ÷Ý ccaúÓjã ßøX˜,_¾œåË—·ŒëññŠ+&Ô–¹—>{c¥ÓIâ¢E€­,à.œ<÷£|é¿Öqå•ÊQæÎÃqbÜpÃ&ÿã•W™«"¨‚ÞñŽ=Ì›7­y¸3oÚù[n|¶‹­iøD þ+‹Úz¸õ±‹‰{ øNçéîî.›:(o5ƒRáI­d³ÙpžcFÕ9—N¬+¾þÅ#ÿÎwˆ_y%Ýo}+Ö²e€ £%“!÷ÊW*Í£§‡l6Ëû>õ).†Šûyé{ßãOßúgf2e‰üóù<‰XŒköîå þ¯ÿJê=ïÁ÷}º»»ùüò%KøÑÕWóã›o&ùž÷Pز…'žx"Ô%Çáç?ÿ9†að÷o{[Õïh´È)Ã0øòg>Ãù”çå[±bmmm5'EˆÓÉ 5zò ÊÕ´··—ÁÁA.\ˆëºuI~˜Ï+¯F•Ü>I"‘À¶í°‚š|¿Š%K”—ÉdÊ„8ý¥Üyç[yôÑ·ãy?Ʋ¬P­fpN‹ÅH&“#z;i@‹{ú …BèQ¤ˆèùA@WW±X, [´m; ÃÕ8Ž^cÇqÂ?-@èÊ›Ú6šo¬i¤k/Nσ+þ›]»Ú0 ƒÜÀ?þ£ 7nkÛËyçõóÝï~¸¬lûŠ´m˜¦Ùpûj.BR뵬EØÓX–EWWW™`¬¯Mô¸Éd2,.aFè9ª?Ú#6ŸÏS(B¯ûØ8üðÿ¤P×]VÌ«­ögÛpþùwó—¿ÜÃ~t·ÜróöžÆCù)†á°äÙ,ýö"¾oC¶ëËX\Ì‹gžÉåÿ8¶mó~ùKÒ¨ÜkITιJs|K¼¯¯/L“ÕYéýÓ‘×°äŸÿ™#»»øÔ§”@“L2øè£´·¼õ­xžG&“áüóϧ½½U?ûyç;ùÄ'>Á—V®äžY³xã[ßJ?®ãðo|ß÷™;w.ßýîwùãÿÈç?ÿy\×åSŸú«W¯æñÇçáL†w¿ûÝwÜqeÎQç åÈ2±è¦(û»‡I­šZ¶¶6.¿ürV®\ÉòåËq]—›"®“…ç)1Â4Õäÿþá î»ÐkÍ÷},p8çœR8›FO¸•x¦y®›/ó€›jj-›¬=ׯCå9U 6ZpÓ¢]Ô«'j´ÉdÃ0ÂwZ”Т†ö,Ò^{:¿—ëº$“*7Ÿ›z۱1|ãW…n¿ù|> -ÇãrH/[¶Ü9lû&Ð܆¡mx2ÜÇ#.êßïhâ¯öŒþÞ¢íµ,+,ÑÝݾ×áÝ:¬Õuݲ·Î%ÊÃ4‚0üÜ÷}¶mÛ65_B„©ê‹¡žˆ Ó±ca: v,ÔŠ~.ߌ󕩰ã•×ì«_½Ãx ‰³9úã?âî… ÉwpnŽJÀ)6ôyêBíÚõC^|ñXÖ­›‹e)¯½Ýó‰å+?aÏžüèG§…ήû€¢SRvÑg+^|ô¾9s0¿ô%²À «Vá§Ï›ÇmŒ‹*òðø¯ÍÛ¾øEì#ŽSù–E@õ¹XòÞ{ɽü2üð‡üqÁz :ónjkãCßý.|øÃÌݽ›£Ž>šw¿ûݼç=ïáÄO$‹ñ•¯|…ýèG\pÁtuuq÷Ýwó%KXÒI|à-o ‹Y¾ÿýïçÒK/å¯þê¯xík_ËŠ+øû¿ÿ{¾ô¥/qõÕWóÉO~2lÏ/~ñ‹Ðyc2·zQ!®2|lÕªUtuu±uëVÖ¬Y3î¼ocÑÕ¥Ü3»»•ð¦ÛûéêZV+ÕŒ¥pëé¹¥CJ'[ ªµ¢ªÎ7º¢¥®´0tέhÒüÎÎNfÍš pºÊ¬3€°z¦­Ôâ†×J¦ÒŽ}_ÙžmxžæÔ!¿Çs_ÿú‰œsΕö×Õvl»ùnr“iÇãâjµcmgÕö¯CÆ£¹çô>£¹‰Éd’¾¾>€ð©‘¦§§'´åt:=¥B\ÔŽëÝ B=˜ê1… Ô±caº ㊙‰ï—æ+žW-8*‹+GPÿ{zÔ~LSE%“jŸSí—1•vìqؾǞ½‡N4YrðÁaEÐßX6ÁIB*åÓWÜîškÎÀ²Ê¯µÊM¯^ßzë—øùÏÕ|DGáxž‰,ß”W'Ê î¯¿û]n¼ñFæ}ä#pÎ9dl›Oôöòóœ×qqT(©I©*ë «VñË{ï%oš<º~=‹Ÿ}–—Ž;Ž¥ü 'ïÝ˾cåÏ×]Ç‘GÅ_½ýí¼íӟ椓N #Žúúúøä'?‰aÜwß}|ó›ß Ó¥²Yå‘Épä‘GòÓŸþ´lNwÓM7…óK=÷Š¢ V6;u÷ˆÓ´µµÕ-¹¬eéÎÁÃË|¿ ß' Ks]—?ÿùÃcvA þjï'T®¶s±ª¾ž &»­9!¡v€IDATcåÓ×^ÿðµ÷[˜¦æíjoo=ïÒét˜ƒ+›Í†¢œö0úãÿ8©ç0^êeǾ¯žrhV_³hŽÀíÛÛ¸ùæ8ãŒö%Æù~yÂÏFQY¡U¨&ôU†~WËÛ¨í<‚²< •ëhê™Ty,êÙ ÂT!v,LÄŽ…逨ñô#:_ÎfÕ{“̶!‡TJÏ]Ô:±˜úŸÉ@:]Ý,K9ËD÷ÝÕªZ((1Î4áo<¤aç\O;ö£~ü<ÁßËžð™»· ˲Xþ«_ñ½+0PK|ðèíýY¨Œä¤õ=§Ž¢ctH¦,Jyؼ·¿óO=5,DÙÝÝÍý(7§RØ(.M©€Âá>Ê›æÍãMgÅ%ŽCïë_Ïœ9søŸãŽc÷Í7sÄ«_ÍüãŽãûø—\y%§Ÿ~:Ùl–ýèGeméŽDE».»ì2–/_Ža¼ôÒKÃÖmX¼J¢Õø”ÒZlÒÜþ¹5ïnÊ„¸z¢rø9tu)ÑF{éj¦Z€z≥Œ¥Eyžê r¹rÅß÷UBübåÜP¨«ÕˬVê¥Þ6"_X¥Wa4É£&‹ÑÓÓæÎÒßÉÐÉ Ã`õêÕS~SïC<Þ^“ûìâÅ÷ [–J){M§KO™„©Å4Íð)T.—#™LVõâ´,‹Ÿýìgn® ‚ ‚0Íñ}%x# ÒüV?È÷<5wp]5¯vµ,•RËb1õZ‹s==j[½žvdÑó-ÆU›wG…9=¯ÖB_OÏäÎi|€Ÿ=ÃÙÿ´;}#‡Ÿ´€³6mÂÝ·/ÔæÌù"çÕG§€ÒEæö‡¨ø•ÉdˆÇãaD‘v¨ÈÙvX„îÇaÃü7ßvïþóŸé4M^|ç;1^z óæ›X $ï¾›ÔW`{öìw»¾ùÍoòú׿~Ê¿'\Ê«/ø¨äyZ•,F ‘*~fQJŠç8ìãØÇÖt¸¦þ 'ŸhA]yºéëë C<=Ïcñâ­ÉÖ÷UgÍ*ž«Ku$ºÓÐa€ƒƒ':”Ú¦««ôy…ÝPšÙ=S‡www‡qè{®™Û¿?rÙV¯ƒ0'\5-`É’=Ì™³³êçÑêøÛ1¹Þ’3m£ÑüpÕ„¸éjË‚ ‚ BcЂ˜ï«ùh{»ZfjYt~ÐÙ©D4Ã(‰hù¼Ðl[9¤ôô¨íµXC†î µn&Sžï:+ {Õ| *çã–¥D¼#|¬Ñ—±.øÀ³ýsùîÇrÈ yå“O†ð]×%›Í’L&Ãb’cEÅA€çyc qãIë¤ctäÏé7ÝÄÜo ÷óÿüOp]v_|1óßÿÍ/8€Ç;xjÅŠa?¹\n¿æ;—\rI}çKÊÍOO³(Á-‰rL?3%q.…Ý’@J˜³‹ë¦ž|ç“lyÕ–šš1¦Gœ® Ò¬$“ð³Ÿý/'ž¨<Ó´!DÛ¬¹£c÷˜ñç†Qê”tç庥CDz».üå/Æ÷{xä‘ùßÿ-u~Ú+ÉóJb6;Ü-Wµ­ÑW°9È4CýéûY¿~äýårÊÖÆËd{vÖ“fn«-†Ñ½Pš*‚ ‚ ´:âÅqÔ<´"s ŽSšê°Q=ï­\wh¨ô:“)Ía+祱XmsUÛVóã(–Uåf:û0{ß&Lóm\¸ì6Ž8â@Ls  ŠÛõôôà8NMi©,Ë"ŸÏãûþ°ô9QTª.?<Æxð}Ÿy…¯ÿò—Co½+Ï9.ºˆl›_8ß?à~Pï/×G‰aúÿH¸(1-J‚—/þùÅ÷‰â{%˜¥‹û‹êBЇJ gE>3+ÖÉ ¼àú(•Š ¡GÜÆI¥RôööÐÛÛË…^È…^È%—\.o6LN<ñ븮Kwww¨ /[¶lغýý¹?ËRR.§„³¾>õD z<ßWŸ/\ø0úÓ6î¼ó­ jY_ŸêüR)µ½~¡Žòù’—]<®:R­+4±¾0åLua†FašêU-Æ¿'4º€£½âƃ¶ZÅS«Y½÷ì¢ë¶&ê!W¹ÞÞ½{Ý\AA¡Ép5_Œj'ù|)ÒJ{šµ·«ù$PÌ‹®^ww—<ÓjÅ0ª nãq©Ì™¯µÆ3lÿÓŸ^WŸ‹:Åè꣚û¿÷¦©.Äg?ûzþùŸ•&-ÐXk¡HK¼ZžµÊõF{¿þ¯þŠß|ûÛ%”ñuuaº.ó>øA|Û&/žI"ª»ïZ°€¿ûÔ§Æa¢¶ +UŒDh/^ÈxŶPJ^J,Ë D7ý¾  Dµ,% ”[4/ù¬§øùHÄŠëc¬Wô÷÷³zõj–.]&)Ô‰öo»í6V¬X1,d°ÑèðÑTª4ÑÍ=Óu].ªiß•¿ƒòJ%¥§—]ö{6o>ƒNøsÙ:©Tyçç8¥œ]úiƒûGý%“%qî‰'Þ0ì\uøâX×D»vFB{NÄ£Jo?‘°Èêçâ7µçÓd CžAÙp-BX-O4ÆûpB‹G“]¤ž´ŠhÕ…ÃsÏ=·ÑÍAA¦˜¨îQ‰ï«y˜ï«¹¡^Ï4KÅØ,«4ÕËÒi5‡+FέE;½n½±ír‡“Z8õÔ_׿aS„>íØwÇ>Î;¯ ¬(ŸišãÎó¦ îoþ÷7üîw¼æûßWWœ_ÞqÝu<½p!¤Ó,úÛ¿QðûÅ/~1v»ý⟦ΩÃ?»ŠI_¨|q=mrP^g  ›rï5¥vjÍÆD n›’'[%Ê5aðÝl€õë׳jÕ*Ö¬Q.’ýýýô÷÷‡å{W­ZÅúõëéíí¥£££ÑmJBœiÖ>)Å+^1)ÇŽF>úèI,_þ+à‚ªëÆbÃÝ£hÖU3U§{ o{›²JÇ) cííjX¬”3Ú)ëY«¿LµÂ õf¦ó#g|nïyñœaëLDˆ› úo»½'Ä»vãŽÅH'}ÿûa¬ÿ«¿bÅ#†¬ƒÀôGÊ“MçX+ Ä1íS’¢\\KDÞëé£IIPËSÞò”¼ÒZ˜Ù ÂP/¿üòpaww7eyÖ–.]ÚTyôÚ²rtvŽmÄ}}}$“GMj6l¸€G9‘K/ýΘëºn©ÍHçcšª³=òÈ{¹ë®£Ø»·þþRÂ˨Wm—:m×Uâ`¡Pòhs%p)¡r¹+±í{8úè/ÑÕUª ›Ï«Î8“Qb\¡ ¼òtÝYk§žh®kO¹|^ m¦Yz23oÞ6}ô$^÷º?•µQ‹‚®[îU‹•ÊTG½£û<é¤A6l8‰ Ê]²§wÞ¹ðpœ`ÜBÍXâ¤âÆë7ÙAÐBªnãdäžk…óAAê‹Û¢Îº‚i&£Ä7ý¾§Êtr²Çò©T)¢I¨? ”^tkvÞÿGŒË.›Òãïݾ?þZ…úº®Ë¼çŸçÃÿû¿pÝu¸¾eY_ÌŸ«˜h§R©òœlY”àVˆ¼÷(y¹éü€q”(—A n|«Ùh¢x,íy7 ˜ ÐÑÑÁºuëXºt) ¾¬›nº)\iýúõìØ±c˜8×H<æÎÝÁÞðBÕÉo FûÍ 'ü™gž¹ÃÛÆÛéµµÝÎþçÇG]G{¼UvÒú·} qÊ)÷³cÇ¡ìØqr™¨5’nJ ¿ªf9ꨭlßÞÅe—=Qµ}Ñý»nIl‹:0VžÃ’%{Â×ù¼úP7 Þ9¾ Ù¤<ôÐC€ ‰M$öP}˜ |¹£ƒ,à‘O¤õ@ Çð>.‘9÷¨ö²¬'¾ï×Ŭ^¹'Ã{O¼ßAAf6¾¯„6PsÒJ϶XL-Ÿê¨ÄÉòP ‚€|>O*•Âqb‘‰n6› S¾èBétšL&3£X›(±}_âÂöLùñ7îs¼û¹çxþùçy×þ±˜Š"uLÓ$‹‘L&‡W_ PùÚ@M4mÊ=Ъ{¤eP"]=ìZïÓ¡¼ÐB 3`ÅŠô÷÷‡^e«V­ sÁ­\¹’þþ~®ºêªF·µŒíÛ÷²sç–/_RöãoìTÆÛ¡zèCÀ9c®W«HÓÖv;oyË£5íSçër|ò¶ooã¯8‰L¦tŽž7}ªéÜsÏb vóŠõôM··µqÂ{ßËÆK.ác(‘® Õ/EÍ`´°T—BWþ­õð@ÕÔ³jêþ¶[:AA„™‰J T}Σ‡ý“%Â¥Óib±Ø¤Ž?uѼèÅ÷}òŪ|ºxœïû8ŽC2™$‹‘ÉdÈçóa:½~"‘ ‡Ql•mM§ÓÓîa¶žQüýšÛØ·irÓc…çy<õ®wñ¾ë¯‡tš57ÞHÿÑGC*…[áLÓ$‚êÕZ“(´Jt«œ¤?˜Ô?d4Fya†æýbÍš5xž‡çyaѽüÖ[omH"Á‘ð}xúé§yä‘ïÓÞ^½œ¨.ñe¦»ânÜøÒ¤ïsÞ¼m,Z4PÓº™Lmß‚/²}{>úÞ²å¾çžûé¸TuçùçŸçœs†ð}ØoKWkîBõe.ê‰Êe<ž%KxËúõd(yCé¡EôZEÇôk‹¢ÞÕ’ê%ÄÕû&½¿íÖÕ¦AA„éC:­œ ô”rbGƒ*tUÒÉ~.ëû>ñxß÷éìì$Nã8Žã×îwE¢Ñ#étzĨ-”U’L&I§Ó¤Ói‚  ³³“®®.‚ À÷}º»»éîîÆó< …xž:  …ÝÝÝôôôJ¥H$X–E<']œ¨§Ói/^ŠvÍ”~ÐMÀ¾M›¦nnPLÆnY™X†ÛÛÚ¸éòËyaíZ0 ‰);‹Áp’(꣼٠ä¢Ã²š‰i¤ç0Ö Ë—//+ÚÐ \”„W¿z#æÂ «·­šÐPœYú‰@³óªWÝÇ®][9çœÆùrZVíÉA/½ô;œrÊ—Ê–Æô)k}Ê)ëˆÇ“ÉdÊÄŸ4ªìAUjŽ¡·ðÞGeþ_ÿ5§Ü¿º”ª1ÛÀbJ u¡ ýº««TÜòÔS¸x\yÇiî¹g1ù¼ZWÿ~ººJE7šzzÚM†a”‰…õôÞAA&N5BÓÅòl»”ëÍqÔx:ªsyÞþ§ ‚€Å‹“N§q]ÏóÂÏÎÎNb±¶m‡Â—eYáœÔu]:;;q‡l6z¬9zbP$›Í’N§‰Çã,^¼¸L¬Ó›çy$“IlÛ¦P(Ëå( áx·§§'™,Ë"™L’H$0MÃ0Âÿ V§R)úúúp'<^*•"—ˋŚ.'ýþ`¡ìâæ›Ï®Ïü¥Š×ÑË_û¤Ó<ñŸ?NøÉg Ö2˜³sNq;JÕLÓ(o5±tP“Éi$v53¡·víÚ0¬jåÊ•ôöö6ºm#òìË/sÖY›F]§ZÈæd{ÕÓ;g²ó`utìóš5'œðgæÌÙY¶Ì0`pðˆF7mÒÐîÙºsN£¼{(a¡„¶/¾8â­ êÁ…î[Ù»ï«×¹\yAŽL¦¼®ïû,Y²‡dR­§:]DC6¢¢œã”Ä>¡:õÊg'‚ ‚ ŒŸ|>z—µ··“L&ioo¯*ÈiÏ7íá¦ÿl[}–J©<ÖQ‡§îî‰ qžçÇÉf³tvv’Édp]—xO__¦ibÛv™wœÖb±Žã„Þl£aY¶m×$:% ÇÁó¼Pȳm{ÚxÄiJE'Yˆ e€QŠ|žÿyñE—-ãÐÕÿ—g“Ïò¡—>Ä’ƒ—ð¾o¾Å÷£ Tq…JŒK¡&ÚÃcš„}¶lܸ‘õë×sÕUWqà 7°nݺF·mDž?î)þãÁ) #ºzVë+ª…«îõÊeYÖ¤{ÒX–Åw¾•±s¹5AT1¦Kޏ;ì}%¢e¹ÀÌXnÍ6%8ÓT¢™@èèQ“µ,5€èï¿×uY²d==J¬ÓOþt9ýÄO q¾_rÑ×ÁèE"\·ü~!‚ ‚ SA>ŸgñâÅäóyâñ8¦i200€ëº††^z^iLë8¥×Ñ1´a¨±µ~¨=ÚsWß÷ÃL×uÉf³äóùP ÓèyOôôôH$‚Ã0ÈårtwwcšfÕù§mÛ¸®K2™Ä(†F½×r¹\车E¹\.‡ad2r¹\è…ƒZ–æ‹X,6<éÿDçºú|,Ëjº(¼‰¢¯ çÁà࣓¿c×U˜Íª ä—H&¹øíwqÈ_>Ëœó?Å¢¯-â‹s¾ÈÇÞñ1þûøÿæŒkÎPÛFƒãlʽߚ;ðhÚqÀ¦bìòŠ+X¾|9W]uUS‡\κîevîÜ9j‘>J½r¨7{¸œæÙg³xñ=nFMø¾?LŒ´,˜;ww£›¶ß¸.<üð¾2aÍE=„ËÇr4[³PBœ6{Û®Íæyä2‚Àâ׿>?\_{ÐiR)åE§À´·«e©HgÞÙYÚ*$ù¼î¦ZˆÓU«öÇ1-Ÿ/yŽÛ¶'¥« ‚ ‚0qthgOOOø—)†OÙ¶Í¿ýÛ׉Åb|â7ÑեƴAPаÊç‡ q:DÔ7ÔvÚñCç=ëêê “É„"œã8†Q.êû>‰D‚L&F]i¯´Äù•¢QZþR…¨@‡h±.‘H”N°,‹žžžš®±E­˲ð<¯lœl§žzêä~ñ BO¶m{žNxfÿwèPªdÚ|ÑãÏͳ]]ð!ŸŸÌþÁÀ|Íä ïúß|ŸŠ*;pÙ`Á]GÝÕèK"T! M*кbj3°çñ%c®ç8Ã…¸É*ÙE?)˜Lêòzé¥ß©Ë~󓬮è'#•`‰ÄôÈ·u뭼㿔h ´Z¬h,¯8]5”½×R ä°Ã^lŽ9f^ä8Ãóù%¥'‚…‚Úwtÿ}}%±.‡žž>žxâ ZOßÇõï2.•vRÌ3âyJøko‡;ï|+Apï½Ë‚ꡱ®[žçβÔq*=¸ƒ $°¹®Ëž=KÂm²Ù’W`6;1!NCrÄ ‚ ‚ 4Št:M2™ ó2û~帮ÀW¿z{ö|”ï~÷áW‡ †ÃŽ4=3 õ×Þ®¶³mp'ÌÓ¦ÃG‰ÝÝÝX–…ïû¤R©ayÙ*ç”ã™gênµ0’3‹ö¬‹³óRÃ0ð®»"ôÊÓ$“¥Rñ‰„Ð µŽ©?=`ŠÇUâ•¥¿ÿMa1 ËRûK$Ô¹˜¦jG{»j»iª¶™fi¾¯@}N‡~"íí]aƒ ö‚%‚ ‚ Âþá8}}}á{íݦµ(í@Ïã¼óÀqþÃ0Èf RÅæXcj=Vô¼8ŽãcšfJZ‰išáÜI®½Ã*×O¥j/²7Ž!SVÝ5§™®¹”_zé%|ÿ1¶m39é¤qä½óQ9•¿­8+GxPÈ¥&ÍÛʧ6ý0œÖCêO(Äõöö²víÚ²+߯Y³¦ÑíeΜ-`Ë/_0êzÕú¢®®èÄ{ÿ©5f~"Ô£#¬‡wŽ>ÿ©ì¸[X‰oÛ8¨Ðü,åáú£Q‹Hk£<˜k-x£„¸Ú¾¿îîÚJ²+³°xÍkÁ0Jy £"¢&£óoèjT-¬i.»ìëxœxâ#<üð áòtº$pi!P‡§ëÜx–¥„>×Už{ÝÝ¥eGõ~=ôÎðØ¶­ÚšN—ú -ºi!/2¦Ãu•ȧÏ/‘€ÿ¸Ÿo} î¸cExŽúiéÖøå‚ ‚ "Z @£ šé‡³Ú‰¬Pߎ£„2¦i,1Ìó<þùŸ·òÉO.Ãq¼2ѯѱ¼iš¤ÓéÐs®¨¥0ÃdJ¥¦¥ç³— âû]|ìc¬ýZ@jR ï@!ŸsÁõÀHŠg §'ô*tgL!®™SŽÍdf´µµÑÛÛ[V)uùòåMW9Õ÷ásÿöüøX.ã²Q×­æuÒ*6XÏœs­’Ïnºâûð g„9áL”7ÜdV‰¶Qýt­û\´hÔ:6ºàÂXcefF±Råå¤ÓjP”ÏÊGºwU&s­vÿñ}µÏ¨¸}­÷mÛð⋠صk–õépûD¢ä™^Ûâ¶¥P[(‰‹Ñõ—,ÙÀöíma®ºBAyÚvÚ)5~;‚ ‚ Âx ‚€x<Š:‡[OOénåøÑ4ÍPxkoo'NãºnÄ3Î ½Òé4Ùl˲˜??ÁÏþ¯£æ/×Äb±PÌd2äóy2™Ì”ˆ_µ0/¼ýÁ0ŒiéÈáKöìáßçü5oûùµmØ…šŽÅP›ç)Aî~O©Å]]¡@6ŸÇ¶m²Ùì˜s{×u§åužÌX±b+V¬Øß}Õ߇Ÿßû(pz£›RWê)–M×XüVÁ÷aþáàsN˜n²£ø£á©Úr´oýU¯º ú©%@ö.à’î϶ GñN;më˜ëj®³¿óη²`AmOÏôþj1ù={–000özÚs¯Ò¹´2_Àí··18xGÝižH¨ë’É@¡Ðú!Ö‚ ‚ ͆®R JT2ŒX˜S8‘( pºàÂHèù˜ëºA@añ×uq]—°èB<þeºk ½Òó1Ã0¦Lø¦Ž'¾u0+qÝ›jótôP“5=—ð<5‘ÑClº»É»n˜¦È4Í1÷Ý,â®P²qƒƒƒ¬_¿žÞÞ^vìØAGGM£¢®Ù½›.y=/7º!S@³¸' “ËÃ?Ìî㎫1tâè×<ª_Ö;J¢<æúP‚Ýö¿óîwSéW-¼õw6üXYæ÷“£¼àÄ…ÏÁ_¶}¶xìè0EåÔîýÖUÜ_4íìöímœx"d³÷0V8­iÖ^9vÞ¼mìÝ[[Eátº6ÑpçÎ9<ùd)„^G*ÄbðË_>VÓ±AA„ÚÑ"œÊc .išZŠ™%‰PŒëì줻»›|>O6›-î#VV]Ô4ͦ™3 噇÷±ìÌ 8ýô½µ9Áø”ûCè§÷¦N:ËÂw]<Ï“b Ó„²q«W¯æÐC Ë,ríµ×²~ýz2™Ì°p°©fï3/3ç ƒØ[úÕÁ·’(,žkÓ“;vðü«^5é^p•ÄP¢W%¨µ£¼ãLJ\ºŠ¯÷u}}}¸ÅÏÍâ6ñâö•c•J4·øçS.ÄUÓ’ƒâºQtÖj¿ÏÊu/»ìëäóµyÄ¥RµF8꨻X¸ða¨!äÝukÀ]|q?¿ùÍ6;¬8±¶†‚ ‚ "™Lb¹\×-åúÈ´*fšJ¥°m;¬*jư\ocå†f°wï^–,Y—¾T£cž€üå/¹|Ã~v à “ÉÐÙÙI.—«ÙÛÍqIMÕ¤Ì%¸­^½šË/¿œU«V•­°fÍV¯^͵×^>hÏÞ7‡W²-œ0æºÕHÌtm«Å,˪¹TöxÛ:ÅÈ8’N¢ÞÝ¡MÉ_Ì¢äëæ¡úù%ÔeP‚›K©HÔR ÷Šó(=°É_‘cùÅmFr²×ž×Ñý@õß«nk%A`rûímcVoÏ=§­íöâ«ËÆ\7“©Mˆ[°àEfÏÞQ{#AA„ ¡CHuTQ>?¾‡²£M†/^oÂhÀ‹÷ÍaÉ–Ô¾Q…¦¶àÉ'9÷# çÃŽã„^—ÝÝÝanÂZp]—žhbk¡i8`ýúõttt á@%EÏf³¸®KC»sölÚ÷öb¡&ü£Q-ÓL§•ib±X]Üc§c€íÌÒÃiØñ-”¦C>£c2ÚMI¼óP‚Û,ÔEwù:U{¬™”ļJ¹>(.׿Ù¥Üuµ´5ʆ ï ¶m›×°k˜JÕ.òõõ ðªWÝ×°¶ ‚ ‚ Lw|ßÇqœÐ‹-ŸWËkyp*L á‘+P>GÏ×Ë£Ôç)ŸD·i‘šŠ5qÀ¾}œsN ɧ5ùÈkÏã€O,æ7T3!ß÷Ëò ŽU%U‹Å( ÓÒ¹e:0TXjGGLj+µµµ±|ùr¶nÝJ[[[Ã{Èà gò2s[ˆ«¬–Ã+2 ÍÍtt£5.ßË¢Íg6º)!•¿%“’…º1Z”BR-Jžrz™×¢bF‹yÚO£Cc£NÛ.ê^”«Ø>(®»cÇÉ€ÁÅ÷CÄ3V·q¢·˲ðý±z•ñ³{÷—9úhIÂ+‚ ‚P/²Ù,žç…E>_=EÊtA¹k™)”Réqv¢¸L|uJ 5¦O×7‹ïuÄ‹{‘ã')EÇÄŠïÈ_²x¼è½=“)øù9Ç×èK»ßLÈH)É$œx"Ïx^X”a¢=¤XCs3»ÖÝVæ¿u'°€'[ˆ3M EÒjnÔ–eMËÎãÁã^⢠it3†1ÒïÉD‰h)ÊsÃ麃ºqê›»G±ê6¥pU—Ò _ßÀ]Jƒ(:×\–’çžGé&¾dÉ `É’=8Åu¨ðZ%êù”nþ•èÜå‚a­O–ÆÃt´_AA„fÃu]úúúU(Œ/EI#ÑcS(¥pÑM7PãÛ8%‘LçfŠËœâúVñ}œò1påk-´zðí—9”Æïz|Ÿ/î3OiܬŹÊx(·xìLñ³Ê44QR(ñM qZ¨ûá)§4úëØo|à€]»F_Iép„¯$úéîfÃW¿Ê ç…i¥¤²îôd6ÀòåËqÝ‘B{{{Çôš› :}/ y‘Œ-ÄU‹–ÔUs„ægºÆ²7ÂE¯ze£›Q†Éðb=uóÕ´×™ƒºiꪬ¥û‰1Â1t1+²OýDOoãRò¢a°:_Ýk¼˜ÜsÏbrvi ’(~®Ÿª%(yõ(ï;}>>Ê-^lJ9Rƒâ2-&( vô9$¨í ¤¸ ‚ ‚ Ô—|>eY¸.$“Ë1jaÒXJùŽ£‰£Þaú}åØOk=JbXÙ_Ôs,YbR>–ÔëÄ(åeÖ^Uùâ_5ÎÖÇÌEŽˆ¬›,®;Ö£`ÝÖÊˤ½ÞtŠš€’8ÖÚz"ç5ZôÜAzýoq­Ž{ô—1Íÿ;òJÚó f9PÈÃÜ„* ƒ÷~ò“áªÎÑ/ÔÙ ¼¥Ö¯_ϵ×^Ëš5kʪ£nܸ‘µkײjÕª†VMõ€oσ×ïÇ>¦gÚ1¡…¾qÆ6§@S­UúéZ4¬TçxÓ.ëÚͬõˆó(Uo5"Ëô`C?eÓû·)õ²¨¼upŸ¡ž®ûK;¥§zÝÅí;Qƒ“,jàB‰pz0áE>×OÓ”Š[$‹ÿu;Û) u>å·j^wz¿zPÕÝÝͯÏ??ܯþAA„‰ãû~’ÚÝÝï«4DQO¸€Rž²JáM÷ôô0EyA2]ÄLcG¶ÕcB-(™”3'rl=f®,@Ö…»š‘Ϫ£Ë c|>_“‘Ž¥Ç±ú:ÅvÖÊxœõ¹ëïÂNýõ¯áu¯Ç^š`áã§XÒ¡Ðé*Μ¢Änú2J®½öZ.¼ðB–/_ÀÖ­[éïïgÕªU¬Y³¦¡ ý¹ C¿yû³vMÉÓiÉWÉt̹ÖjþO6º ã 䶮E$ƒÒÍ>Zx!AI´ÒáŸzà£*ÚÝÞ¢®êR´dŠŸEŸæY‘ý¾váBŽêXÉŽùóÃbúI M)·\ 5Èу JÙè`E‡Õjo·Ôë”AÉ0º®Þ‡>‡.ÔCŽ,¥P-X ‚ ‚ 'ŸÏcFEcY¥ÂaZçÐÞlÚ­ÚL¨Z=‡Tq›Å@¥ÈhÞâž*ÛèÐÏZŽ¡ÇÍóv*‹&háO{ùØc¾û…{ûû/.ÿ@Oˆô‰º”O"*D‹Å‹300@WW¹\NæðÓ”0G\[[7Üp½½½a˜ªmÛ,_¾¼¡4‡0tz/ÊGeÿ-‡ÔtFõÆ3ÿ­;á‘F·¢íu6Ú ÀD‰c•ëDÑ :”Ô£ô$-úy!²ß±žÚ‘uÍY³øC[X^Ÿßo-YÝt8må=To̤݇"ïEˆAA˜8Aà8N(Â}Û…ïøðpB¹tôC´ÙxÑb›–@¢¡£#­_+ú-”æ¥Í@†òp×z¢´–K®ÕЩ~îyâ åèØe(y0(/¸HNiÇq0 ƒUqµ{¦{Ms†kèè訚 β,¼Æv.8fG¼v׸¶ *Ê–äóN”¼r*+6Nw‰©èV…‘ø¶;o~óÿ4×S Ýš±žÆ§|Ô#m2Ï6“€tÞZÈ%Z j,¢yêFC‡0è´£Íq§Ñnø'Mâ5AA˜Id³Yb±†að·.ܘ„3rJ8Óã¹ÉH ^/È¢T0a¤<Í`,±q2©Œ,™è1ÿ|8§ôƒt>žÐ3®\ˆ3M3,Ð Lhtjå'®Ïó—Ö¼þóÀxªs[ŒrS¾ÆQ ÊKG‡Û ÂTp‡{~7F¸ëPÔÉX\`ÂÀ‚ÛPËàFç««âu-ý‡9ÂkAAa|ø¾ã8Ø™ ¯óá{I¸¥<»y<ËÆB§VÑŽ!Âô@‡ïÛ÷ÕÒB­ËÐZ”ÈX, Iõ}Ÿ 0 ƒ|>ßèS¦€–âîúéqµå5­7{Q¬)”ÝÏ¡¿¥DîµNÂa¸ÝƒÃ¶ÞÕèf #Åø±ÖJŽÉ÷8õýÚ‹®ÿG¢ÒS¼VÌ·©<¾<çAA˜ÿ½i{ó®öÁï‚ofà’|Ò©óÐIðáôÁ àæ‡”øÊN'»®tÓ´¬°ºHø¾išØv³d êIËqOí:Š×ìÙ3ê:i”÷['ð¦¢ç̪£N¤|R£”ËJêI,xé%Ž>ðÑF7e#%–mFü¢j^«SáxD¯ñ†èê°šjú`5‘_„8AA„ñ æl·t± àj>SNE‚Ð À>ßúö’§«ÝùŸL«’jY‰DÃ0fDô”PÌ×ßßÏÖ­[Ý–q€ƒ·=Ï«ÍÒ¶rÒ«ãìû(æo2ÁõÇö†‰ñâëé§.4ðü¦}¼æ5‡7º)-iB*U›§KÉ'3b-áÑB º:¬.g_†ZMˆ3 ï|çÔ_pDçÏ%æ ‰¡BÕke"”k­œ*‚ ‚ ÔŽ‹ò†K–ïã8L>K,¦¸Z‹w ÂTà¢æ/tnV“„N†'‡ö}U¤¡H<Ç÷ýÐáH˜9°iÓ&6mÚ.\»v-«W¯ntÛB\`Þ¼meËåQ"Ÿþs‹ËTH¬þ?’ˆ–eø<´W,óQBy–±ÑO…j™Ø§YûyÍ…Ée{söœÚèf# «Š×‚Oõ¢ ^•} ‚ ‚ ã#NÉÃÈó<\×åòË/it³aD´Þvè¡/qÁ¶cÕ›ÊÐiǾ>–~âxž'f8³'kGƒƒƒÜxã´µµ±bÅŠ²ÏÖ¯_ÏŽ;X¶lË—/÷þ½ ÿã#‹p <âÒiå çûª´u-Ÿ TçoS ;‹¥¯Ç)•Ðî¡$jï˜Ma’±ŠÍrKÉŽ÷׆aª¨g_,S…ر0]˜Êq…CyDïûÌûQ.¸`<å·a8õ´c5'w]øÃ–óžÏ½XÓvù|žÌhÕ$…iÍ \3Ahoo/K—. ßG—WcéÒ¥\uÕUáû;v„¯7nÜHGGGø~ùòåãªÐêgYÐÖvû„OÒ0 »»>0K¹ W(þ7Q"šMIH3Q7“’ØÍ/¥=ޢ瞠<ìÍ«²\¯›B‰f± :—UT„òüwFqÝlñ}>²_¯¸?ýPòôs‹Û8‘ëݯSeY­B…>ÖTSO;6ƒ@\ê§)&ÊK®Yn£#ÙñþÚ° Lõì‹aª;¦ S9®Ð?‰âÿý×[Ê‹/®Ëþ'»/~衇ذaçž{ýÂÌdõêÕlÙ²…£>zÒs´ÕkllÛö~Ù…é…WlnÀøx"v åãã(ú!¼~°°nÝûI¥æOÙõÃúõëq]·åÆÇP1˜HÀßyˆã6^WD6‚²dõºHƒˆÌÓ‹µkײaÆšÇÇ#k¨¦6HÑÉׯ_ÏUW]µ_nÉÇ{lØ9÷¿Zy«^ל5-šU†Š¯íâ:º€B†’ gUü™(!LfÐWQ#ÙŸö|Ór:¿¾RC‘ö(!â:ÑÿDŽõÊÓž>DŽ]IWñ¿E¹ÈV-‡ŸöˆéšŠû‹×Ñ"gè;ø`’çœÃI§œ²ÿ_Ú(L¦ŸrÊ)Üpà ¸ÀK÷ÌbîÜÝum»Ðütttpà 7°zõ꺧šW¦¨…ãŽ;Ž7¿ùÍ/‚®¹¦—•+;pˆÅ<^z©HrÕUëyÏ{.⢋:¨ëõ&—+VÐÖÖÆ¦M›êvŒÉì‹O>ùdI†/Tå†n`íÚµwÜquÙ½ÆÆ‚ iµq”ÆÇ•T>°¿ñÆÝ†Ë+_yy]ÏMh<Ú)¨Uìx´ññÈѯ=º´Àó W‰ë‹b\:¦»^ázBÃX³f Ë–-«y| q7näÚk¯åª«®bùòå¬^½:tÍ\¸p!¶m:ˆ]½z5Üzë­,\Xòvèèè`ýúõá{Ÿ=žþù¦ôˆÓžd…1Ö‹æg‹ŠcÚí:S±®ö Ë«² ”nVÑ›ÖHr¥Î/7V‘ }^õÀGyü%©^MRÇО:ÜÖ‹,;©NmÓÔËŽ¼ôGÝ_çÖ ‚¢šOF_<ù<<ð@?½½«ÂÁÅç?ÿ(_þrwßíãyiòy—gŸ øýïßËu×]ÈW¿ú¿A@ ¡ÛBõS­I”’Vxžzdèû~⾯Rfçóù²>Å÷ý°2ëºX–®kš&Ë–-«K›ÅŽ…éÂTŒ+*…¸\®·¼e°Ñ§.L#êmǯ{ÝþiÞiácyÈdÊ<âD„ (Äõ÷÷“N§Y±bEYŒô 7ÜÀàà étºly%ëׯgáÂ…eq×šŽŽz{{dáÂ…ã.Ñë»:KW^h"t‹&{ú¨ó¿M¦ ÁørXéӱЂÙXè0^²ªCm+É nĺúLŒ’xXo ¨§ ÂT1’×Û†}n¹e1Gµ PøõnÞÆ.~ù§m äp‡»¿|7Ÿýì6^ÿú6l¸Ë²ðŸ'‘Hìgk„VFúâé…çyA€çy$‰P ‹ŠiZŠi:l=ú™Æ¶m|ßDz,ºººèë룫«+\'‘H„¿eY†A.—Ãó< …Bx|Ã0ø—ù—ºœ·Ø±°?èÐ5P"t>ŸÃØ<Ï#Ó×ׇïûäóùº¶eªÆ%ç… €¾¾Eüö·+ëznÂÌ¡žv¬ïN,Z´¨üC×…\n\ûf³°2H5÷Ê… ’Éd¸ôÒK±m»ªBÜßßÏÆ‡‰ežçÑÖÖÆå—_ÎÊ•+Y¾|9®ërÓM7ÕÜÀ˜sèSMé-¡«‘N¶@Ôèhq³®!P lc]ƒè·§Ãn5µf0€­u M­—»ÀÎgž)–F™ Õây}J‰þjquf£ÙñþôÅ£C¡6ü;ï<¯~õi¾ýíßlÿ 'µÄY¯ÿ-†w> +Á ¯âÚ'ž"{Öaœú¡Cùà7ÿ~å#tuu‘ËåH&“X–…ã8¤R©2ï•ɘ¨f³¥ŠÚEÛV*‹ólÒiµ^,¦ÆP¾¯rh‘Q¯çyà8êµï—öêhô˜Õšïº¥ýY–j‡®ôíy¥÷z]ßWû™Îiõê9¦†ãº.¦iâº.‰DbXÞF-hÁL A„Bºiš8ŽnŸL&Ãíƒ »¡¸¦Çuúó¨hÉdp˲Âí}ßÇ0ŒaÞnŽã`==£×€ö!Ú¾Î>ûìº\W±ã™‡çyxžG, m4ŸÏ‡v­=3µ`±X Çq0M3¬ ¨Å5mûù|>üÝd2²Ùl(N»®‹ã8vØau;¯©W誓š+¯ü3gŸ}†!ù…É¡žvì‡*ôõŒygDËþV°¥Ói©–*(!®··w˜Ç[4 uáÂ…¡Z\-ÝbLÃPëèqW¨?-Ðe³ê3ÛV¯]·$Ðé}h!ÎuÕºQ! « úúeaõ§ÞcŠéLÔk,‚a…P\× Ã6Ý¢AiÎ0ŒPhBQM‹–eáº.±XŒ®®.|ß'•J‘ÍfÃߣý4M³l’¡?O$˜¦Yæí3•‰¬«vÑF³ÅŽ[—tZ•63 £Ì‹R£…_Û¶I&“$‰P3M“l6K__AN§C±Yÿ>õ¶¦i’N§±, ˲èéé!===8ŽîË0 :;;ñ<ß÷I$¡ð—ËåÊBë&›©Wx”?Ôÿñç±fÍ u;'aæQo;~ƃ;þÀâ¹ï-Ml£Ë±˜xPE!îÐCöAeÍÁÁý‹ÑV*pà ÒO5•7ı֭EÿÑÂZ‚ê"›ÆFåÞ«<~ øÎë^×Ðë2Q;ÞùÔ¡œtÒ3£¯äDN:@ nÝÅ×Q×ËJÔÐ¥vƒâsQÉ÷šÏyTh"&jÃ#aÛ°{÷,^ü‚—Oà ¶ŸpÿÖó8ïzŸË®–QRÖ=xøÄ‡ùïŸý7Óý7åEâcû¨™„^ØNGÇNöìùÇ9?þñkyË[.ç„þL_ß•lÞü;àbÎ;o+sæ¼ÀÀÀ"¶m;œ#ÜÅ5×Ìçž{ž¥·÷&þîïæÒÖv;»w{üö·6‡¢&=¾Ÿ¥PH„žžç10 ~\†aÍzAÛ¶I$Lâqå §Å2ÃÈdTøœï«ž+“aÄʯ†QòŒÓï§4.Ë媎Ñ0Íòíôq‚ à—¿< XH:ŠO“mÇ­‚q¹ ‚Çq ¾öXB‘M‹S‰D×uCSÃ0èëë+Ä<Ï£««+Ì]“L&‰ÅbôUߨëvÄãqr¹‰D‚t:zçd2™ª‘ z“)šÅb±Ð“¨Õ˜©v\O*½-µxì8Nø;ѱX,{“ÉdøÒvªE¯(Ùl6«{zzÂûˆ~¯÷ÕÞÞ†D6é6 Ó4Ë~zY*•*ó®‹Åbez6›Å÷ýº qc1vwAÀ®çŠ+$,U˜:&ÃŽ/žUÃ*h¶GBc˜ ¥$…#©Ä½½½U½æ¦‚Øûì³¼F3¥éÔ"k¡4 Z"ßu—c¢rÐ 8ûg?ƒ:WNlàÿð?,Yr‚Z =ÜúPb›‰ê´ýâ…Ñs #Ç›‘ÏôC‰yÑ}¥(•룾¨FyÍe‹ÿÅ׺mSIe&`Ÿê,ʼî `ïCñ//?Ãû{+™L†?|$ßì>sÁ'Éž“ÒõÎt&;IœPƒ$90LÀ…ûn=ã‹1>óqX ÁÀÁΊȠæµÜºî›¼ó¯Ï$•JqóÍ?£­m![¶¼ŠõëßÂóÏÃe—Î3ÏÜL¡P( Ò“ ô\H$t.î¤w{/ïÿû÷í³ÚI'Ó˜–‰ïûÒlÚx‹>ôÒámzòj‚”ÉdH&“a¨^4üHO®†¤”š~ha-ŸÏcÛö0ÐÞ2 &â¶m—yâhOíy¦E-Fض]æeÝ¿eYeŒG óÔö]Ƕmâñ8±XlÒ'APò¼uÝ’'n"1¼ÝÂôF‹k:]MÖhñ J¢™öô4M3¹´÷¦F{–éÏ5Éd2Ì¥CK»»»Éçód³ÙPn£EáZÒêT¦\¨”£ûµ,‹t:¶_ß[¦.¥[½ã8¬\Ùß”i‰aLô-6ԓ׊ߴ~ yA…Ù J»®K*•â£ýh™ìº.×^{-«V­jˆÛ|,YòA”j!L¹¥×š®(êÄ5ÕR¢,ݼ¹WfbøÀiýýp j‰ͺ"+èH %˜i/¸ZHRªj‘¥$ÄAéB:(A$O¹/þ«ôïd]]‰#ê©'Ký㽓 ïJÒ¨›fôºtQ*áë0¶*í£®uši³O\ølo:žÁ¾‚Û{3\tõÕºñ`æ~eˆ…[/U‚{F]>0S*tg‘¹ð,uÍô5O«•Ì^à`B¥Þp õ:FØ!¼mÝÛøÞµß㽟|o8aÒav7ßüa¾øô{8çÆ+{Š–òJØ µC…}úþO3ßž¯Ö{?Ê6M`\öÉË8~ßñÜôÛ›XúÛÍôÝy8Þ@©T6„Õoú?ªª!gîϰúäÕp¡ §Óù®tîPƒ/Ï»ƒGéà—¿ÜËå—œ-[î = Ãõà—J¥Ê&Zžçqæ™ïå«_}Û>Ï+å³ËdÔë|¾”óN‡Î àyi¶ooãÖ`Ûj}σիéïŸËÀ«^5ÏSÛ%êàâ‹wñØc³9í´m|õ«‡ñ?ÿó {öÔh“lY´'šÞtxZ2™ ÃÖòù|è„ð\.*ʸë5mS“YøD‡vFõ|‡¸ŒL&C"‘ÀqÔú©TɳÅߎã”B¸ µ¯|^ýõô¨íºº”Mww—lº¯Omçyj×Uù)¥øÜôB{nf2™Ðæ ýÊ<Ï s‰f³Ù0¬Ó0ŒÐë-‡ë¦R)ÒétUÏ´lÑ(u8u5QË0 Òét(|Åb±pÝd2öÑQÑh¤Ñ¶­lCeèv«¢çz8¸nÝ.Ö®¦)aÚ²uë.æì›SJ´î8*Ñp- …¸… rà 7N§¹ä’Kèèè`áÂ…ôööJ¨-ÏE=1azg¥nB,”–PË0"Vãz½®…š{g¨.ÊÕR¢•¸å?ã¼ýü~%*øÅ¯ 4Þ$*$J"–ÙÀºQ"B@I˜ (}:ïœnO€¤üÈ~µpæ׳(yßiÑ/A¹7ž&WÜÎ)¶Å:‹ËÓÅ}å#Çj/®£EB-(&(‰&^q‹rcr"mJÛâÿ{ÅÏõ*=ŽÕžŠzÿVñ3}­trÃlä3}ýò”êcƒi&¬Oß=èŸ Íåé£VðïŒy$ïEö‚#ÉPÒÂt¸9Åÿ ¾?]&Y{t&( š6¥ìÍú{æœ;‡÷.{/¸J¤À ,ùuÇÀÜ’­•(y›&ŠÚn¢¢‹¿Íã9ºàÄNd÷Ý»9ïèóJ'£XÅßrÛ!m`ÿýøßxnçseUÊÛè@"W.„¸®Ë-·ìáÿÚÚ>Ëc]Pö¹ö†‚€l6K:&‹…ù€,Ëâ¼óvðoü ]]»ÃP+•†$Ç[Þòàº2¯Š €øU|°ï†D¢‡t:çy<ý´OG‡Š<ª%±% »»‡cýþ³AggÀùçŸÏ1ÇœÖh“lt.C¾éy™L&ôÖÑÞlÚÃLWÕÛémtØh5ÁÀ²,r¹ž°Ð‡i>yžÄb1%n)2µ<+=À·¬òél¶$ð꼇¦©„³dRYHw·²÷hžDPëÄbjY.Wn_z}-¨Y @{»zŸÉ”;híAUÉfKË„æCب–³PãºnæœN§CQÍqœð7 «ÜêÐçj"s*• =äòù<±XŒX, i•Äb1âñxèAZ¨¨Ã@A‰b…Ba˜Woú¦a‚P=ÌÒôô¼Ë:¶ÑÍ„šñ€ÔÒßñoó,=À¯ÒG%“Éð>.³õ -ÆéŠ"ýýý\~ùå¡(×\`¡½ý7ú:Í( jwƲ¨]0ËQÒI2”ªk˜nÃŒ={ö¨A^DPÑók¼in¢û©&âi/8#²o-Æu£Ä0¿øÞA}QÙâ_%>ó{•‰vQ¯=]8"IiD•¥$X¤()2Ñ6j«;Ò.-xh#p"Ç÷Šç›Œì#z?sQ^~ J¡½YJª¯¾Vy”Z‘6å‹×H ¤nq›htV²x.Ú‹Î-¶)úe‹ûÖb£öpÔ×ÉEåMkQ´ö8ƒãLŸÇ Þ#³ ׆ŽhêRb]wÒ]‹³iHß?…ï|εÁ°Õw¥ÅË%{³‹×Œâ2-ü:Åõèìêä’oŸÆÂ…3ï‰KáÆzÑÄ”Eï»2ÏQ›’ýF޳ïê},úà"Ž8üˆ’Hž.·(èv|ðÁ¼õà’CqI×+ŠïvÂîaÉ’=üÃÀê\²‘óMadR±Kÿa)7ÿ7óèࣼÿ ïçÒ3.åã¿\5µ«½H˱À‡X!Æwÿã»|nÁçxëoåÎWÜÉsÙl–k6_Ãºï® '¥‰D‚„•(‰„¾º¾ÝF7nN zÙl–Ë¿Œ-ÚÃ-·œÃÑGÝhókz‚@yqü>|ðI/~–Ãû'Ló'¼á ÛùÛ¿`åÊ’Ixà€5aeÞ PbR"¡Ä0σ÷½ïïð¼GÝëš|ë[mÃÄ©X¬ù‹•D9Ó,åTÓr:ÔÓ0JU5:Åšç’EQÚóÔ_µ:ºº¯aÔ'pÁ¶•`hYJ諆aè°kâšhkÇqÂ:§š~` ½ã¢yc±Ùl6«»»»Ë„®L&C*•Âqœšò¢E=ÜF ÑÖy߯òjÓý§0¹è[žþ¶\×eåJ‰„Z ¸ýö6‚£{Y!÷"¡Ffïÿ.ê‡,>ô¡bõ»ËÝa’Ñ…j­ºÚªXó¶œønœè\&æþÌ|*ËÔF+¶Rlçd…úŒçËչG¥ZL‘•[=¢«í%e¡– xÎÕ&|:ü´2œu4T‡«F‹j´¸¡k§²;’p¼ ^vWÌšEÊ·ÔyR§ä"‰qKÙ¯üË/Ó}óÍ8ÿçÿÿàñ￟Ô7¾Q~0ß/&ŠÁ¾âG{9j¯EýyðÅ]÷sï—ZwQ{ƒê0cí®J8Öžp:ÜØ dƒY8!uœ]|Íi˜Š´AÛGôûÚ‘^ÇSí:ñį³ãwŸç˜‡N+W7¡$Úia/GpDi_QADÛTñ|~ðw?PÛý\Ðuüä•C( ¼ž’ YÜ&†ã´_lllÓ…Ì‹oÛÆG_Ø]òÞÊйÏT០è䤓ÞË>°M›þ†Ï|æTÞùζ žwPòNÓÞfÚ³L j¶­~:gžy,±¸®Éå—Wâég*ÃI5QKë£9ò輂é´z=RÁGý­—ÍQ8#·P_|ßs¸aø´išd2\×%—Ë…bš—×ù‘ª¨éîîßWz›i!l²ó¢Õ£’¯P;•õ²n»íh~üã, 3’»ïžË‰'Îñót:æhr!à„9;YøºcÝ ¡Ø”jLçô.ƒ<ðÛÓUèbnÓé4™L†ÎÎNzzzÂ|+#¡óHÕL+\|}:Q!aÏ#*úéB#]V-ŽÃnšy¡[ÀvN:î)î?þx>±p!ó2¿”˜6ŸWñb#ʼnþ~b÷܃yÙeašÂ®eË0—-+×`µkãÀciøÅpênH÷­¶“ó¿è1ÖÌ]SÊ gnüÈoÁ6ÀóÁ,ºé ”DDÓ¿¨D|Ù…X¦do•‚Dà•\F›7hÏMmÀ/þóDv™—œ£Tz Ž¶ßèzQ›6)Ñ^¡º\µ%æsŒ ]„"àÏ×þœ¿º¯é9e3¦cÿãX4œˆ µ,ž§<´R)í%pÍ5×ðÎw~Žï~÷Cäó&™Lª,y¼¦šX¤"•?•hn·ZŨÉ&S?Ì(©t>¹fÉÏ–Í÷ô&Bª …8Žz·é°ÓB¡P,Jã…BœFÿFŸ4j‚ 9¦–¨çyðØc'ï߃fAh¶'åó%T©:®ÃåAÓÔBœXwÏåÙg7º)BˆEþ¦ó-÷¥^uØ['«Ÿ"[–…ëºaÉk]­KXtþ’t:M¡P˜ž}€4{˜Üxæ“_­µV1¥ÅH±´§¿@ÛŽË1œŽbÈâEO¸Ñ&ZŸṵ̀ú$”ö£µBl[ÅÈuw¼O'ÇçxrmK€¡c劥Aý×1tÚ'2IÄ÷Õ9¸néóÓ§Xf"ª68Žj—N¢¥ËB&J (Ê݈tIÈÓm¸~³ã,n’Ž]w½³¨ºDÎo2:D]#êm§Ÿ~Å‹nR=œ<òÀàóàrÏâ¨o¿Ö?óá°ÿ=Œç^óÜ$4´uÐùÙ>ùÉ{qœMÜvÛìÞÝö·‰DbZô½™r•êŸP³_º»èéÙÿ} ÃÉçóau_ZÚÝÝïûd³Ù0÷‘ïûtvv¶Lþ4˲Â|uÂÔ£³Üxc?¦)¢¨Ðš,~v1³üÕˆãUñº*ij!à‘ßÍDˆ›®öMå;êù£Æ-åóù0¯‰ïû¡;sôÏ÷ýðIn2™$‹ Kª÷£s 8Ž3j…0MeymÇQ ¶R©TxÌzL8}ß'ŸÏ—U} ±ž®Œt~ÑÜ435_Ìœn»ûLÏ£ÛÉ”<þ´;Ê&ª/ÐÑ€þ ²\b5 ˆÅXƒÿU0þoé3Û®®T–l„ò¶ëcêdY•´hb.íIáûj¿¥ø¿èú¶­Ö¹ÑãÔÙ÷0x೜òä“¥2–¹\ùµÓîW: ½ütR. 9üÜG­“H¨<{†.o\ü׃º¥Öø5Y–ÅÚ¶µ\õ™«”0YüÂX÷Ì Z Úû«»<ÏáMoЇUf=Ï›±á&ͤµX–ˆpõ$ŸÏS(œnßMcš¦ÊCÙWÎ AˇӀÒíèG?ÚC<¾³ÑM„q¡Ó ÿò®·pÅËYˆ}hø:ŇÂÒÏQhtÆâOšÇÑG÷«7]”” AhÀxðE¿°xÜÞQŽãL&I§Ó¤Ó*«ºÿ0 ƒL&ƒmÛ¡@V(ÂÜ)ÉdÏóB1-™L†ù[´ktWWétšlVyüDÃN²Ù,¾ïã8A„O‹u%³ H³y;Ž&eŽÇã#ž‹ëºäóù²mô1õ²t:aÄb±0ÿŒ&* VŠ„žç {¯·ÕÇhoo/ÛG´Zšçytvvꆩ·O§Óa» ïGø¾^‡ Âýéë=Ýð€m䯇kŸ¾Lþ5­úæýŒQ³PÎV Tª?žµGZ­"`­!Ó,†ÂŽ2¡Ô¢˜e©ó¯&ÆhO<Û†™—T®µ}¦©D¿žžámת‚NØ¥?×m÷«\¡tZýyžªà8Jä[¼X…J€rAì)þO§¡«Kyë¥Ój}]v³H,ㆇn í’¶éíº< ºzï{¥««=|PP(Âʳ3Q„kf’Éê?aüè0S-¸ÕâÕ¡«1·‰DB*6 O>ù"ét|ÿw$SŒü´÷ÕÌ{ú7#Ž1[¥O¦Ž¦öˆó€Ãvì`É’ƒÕ¬L‡Ùˆg§ÐB<þÀnÎ?ð€ší6S(Â\+†a„·þLã8NUϱ¨÷\"‘÷,êÉ£i𡍤…:Ó4±m;|rxë¶n‡mÛa˜J4³ÒzzzÂíµ8}¯iY¶m‡bcô µ®´¦×Óá·étšD"ŠyAàºnÙ5Ñ^ƒ†aÐ×ׇçyd³Y …B˜¿F·©§èN‹ÅÂ'üÑ}U»z=Ã0ÂýEó⸮‹mÛtvv†Ë7lØ0åv8Y<ê±Û_äĹ'BÛ7à6œîÃÖXujœÄ(ÏiQãO'‘h.÷œ‘0 /®ÑËÛ0ª‹›#§.»9Ö`O_ÐT*’Ï,Å^ê0ÞX¬$BêpÝZáy°yó øþýìÛç„ýc+yûÌD,«äh*Œ}ßt‡l6‹aÓV¬ÚïâW„хоýím†at4ºY‚0!Θ÷³ÛO®ú™x Õhj!ÎßÞ¦&½>jV¦íØA¹%^)Q‹u:7Ž<¨ÌY//s-ؤR©P8«|ò\)ºé e•ëU*õ{]øJb’ïûaî9ýÙh¹R*øLÓ ½ã,Ë …'}üÊjgZð«ÖfÃ0ÂÏ´(¨=æ´¨½û¢×¥2!tå1«µ+ŸÏ ¯©•èÍ5zÚkO¯=‘˜©sÏ=wÜÇi^,¦PûÀS»Xð–ÿ€›×¼ùÍê‘~„ÊN|¼ƒãj7=Øž¬¶>‡zÜp*ÛX-D¦–°™Ê0²zL2´÷ßtšÀ<îÁ Îý‘ƒ¶Ý¥ŠÔ ]T?kq(uí¥¼2z½ÊoÝ¥T¬Ô ô|Ƨt[°(Ý*´Eøïë7«“Sv>ÊÂcwÕñ(“L&S o!(-ݧ¿ÿFr¹œ<Ñn1zz”ÉÎ@GÎq¡S-$ ‚  ³³3|0•›a»0µx”îµ{÷þ–÷¿¿}v' eÏÐ ¼nøÍFW—Oz¡’¦âöìÙƒmÚjVUÒ”fbzžŸ£”;N'£Î¢Ü(tŽÈ—éÙœ.Ù)uÂ޼¸ŸÇæþ€PˆÓUu(˜ïƒirå{ÞŠboyË[Ôz‹—òOM‚èQí&0Ù7†z1H§Óea³ÍÊt®ÀvKç-$‹v[O*%LÝkQ-K)kö˜‹Szf£;í­ýfd?Ú±Ú¡üÙ Å×}¨ÛŽ^׎/¾_&:{žWV]íIhÿ¼y|ô¼Û8ê‚ êzýFB{¬Npcp]fOceC¹ÝºuO<ÑŽmÛÓú÷<]1 uیǕW\ 9ÍÖ|>æ~ÓÞç:‡j&“ C°¡žè{*@ÿm¬Y3}\ 3‹G\Ø9çÁªŸÉøA‰¦â^ `Ñ¢äÏ)ͦ´×[e_]9'(¸|"“$ý`OëIJ¥NlD”êÂ’í¿åüóO--°måçy¥L¦Éf³øÅЦe•"]W v‹«ÄñÓx\ µT|&ÚÏÝö·öÝ —^2åm0+þë.ÝG p>êÖíʳÙlè¥ å"”~¦Ó…zÎcC½µp–F‰}wïÞÍé«Va üýÎ<zøa:?üavíÚEÛÀÝÝpÙeœ1>ÁÒ¥\ÓÛ‹¼ú¸ãØðì³| Xsÿýœµo»,`íÖS8óCØñï°è¢‹xðÁYpÁì¼óNŽ9í4¶ÞuKN=•·x w­[Çß\{-ÿ²ao|õ«ùÕþÀÛ:ˆsÎáá;îàçŸÏwÞÉÅsçbÚ6±T '™ä7óæqÏ=÷pñÅsÓ'>Áqïz—¾á \wÇ|Ý4ÃôT*Uv]ü¢¸¨?מ2÷Í™Ãé4϶·sô—¿ÌaÅB%Ó‰ €Ï~ùó7sÆ÷²vm¡æAtôy y=#³cT¦ ™è:ÕÖ÷)Å´E]Y£ÅŠŠ»e©Û¦®“|;ø{•@g̸/GZÒ tÑ'&Bò¥µ>åŽ -†‡º_û>Üÿ)@$ Dßï?ì5¯„ij!nŽ†áª™UôIæý´Îm¡“ͦR)•'Ê®Ø^{Ι(—‡ê×u¯PžtfñµKÉ›Np޵‡|áW<|âÉœ6a©£Q,‡…>ëO?~åâ?ýi+­££ƒL&T·íéé <*ãñ¸ª€›Íb›&v*E±\<$›ÍÒÓÓÉ>ÈU«V‘:ì0Råã=¸dÍÖ¼öµÊËîŒ3TF„K/¥pV®T·˜c ¼ç¼³Î¢Èœq>ð“ØÓüö†Oqdi/§ïÛÇéûöñ§§žÊ€=ôPÎYºwíZ,Ô­‰k®¡ 8~ÿ~ž3‡×<Û÷ïç]ÀN<‘¾ü2}çžË£ððÊç>GlÞ<Îß·{Î9‡=ÀÃ_ø 9„,p÷ßý.pïÛßÎÞ½{ùÔÞ½Üý¯ÿÊÿò/ÜþÛßòïÏ<٦ɮYÃYq kìûêWYÓÙÉïóÃçžãÀ¢EÌ6Òi5îxðÁ+xä°âê7D]ú¹BJÝCš‘ùñòôîÈrq*=.ÍÈ6!2´yüeÃ5"=ߨÜÎ`ð0Ø÷§§»Ôg{y¸}ýß| ¼ß‚{{ÀþW0 p®/oç àùòJU©›<Œ•?²[Àð,Øw‚»ü6à1` ð#uL\ öBàå,ç,°~® æBˆ_X`¸¼EÀb0ãeð:Ê'm.˜ƒàïßnóÇà]Æ+à¿Ü ¼J|Ï3é‡aÎ…Ðq‘š^Ü ÿÖ‡ü͹¬zùå†ÚÝLà8ð­oý8(¬T}/mv/󆡟 ª…ãèwýVÂ@çOÐóó„ N;"Óµ›¶z»d¡\ÔsJ”®òò‰ò>tÞ†v$³€Tn€cŽiÍܹ‚ð–E°û£k:KtttH~Y¡&M+Ä9À^Žz£z©ó&‹Å0 ƒx<TsÔ ÞMÓž_ |Vä¿ÎGJ¨ÓÿòòR¹U¨›ß<´˜½oyjÔetÑQ© ´øæ8J”ëê Ž‘ª-Ž…ëV~ކšfó&j×¹öämê´à£Æú¯:áÎ}î­ä¾ã´ÉntD+ù‹E …BàñVŠÇÉ?ü0ŽãÏç) ”J¥àaS{5•J%òù|pOˆV'ÖaÚ‰D×u¹û†ˆßp©ˆÝ_r„R$‚¢ „Ž9µ*¼ê÷9IÂg°/8ÀËÀ'¸¸½½bù9çÕÞò÷Šw«çŸ¯þGÖ‰¯\|ÎïŒyóp€Ì‚°`šyÈ!ÁrJ ²-Â\´ˆ_Ö7’ŠkÖBÖ0xC?··³§³“.À3 Ö|éKüÉ·¾Õ+˜^-z‘]‹®#70xKêßÀ(O/ ]\p~ \Q+[~†¿^Íç3(ìðŽsøG€ûðÏö¯k_y¿çxà§ÀÀBð_ ÞÕ~{˜f·CñTÕ[_!t¿zîT›Jv@üoÅeÁŽÎ,ïëGÀ…ª}þ½]† ‰mீâÇÁXª îX‚ñ¤Ú—ø_«ÝKÁèwAÙ«Õƒì¥[^)€j™È;IýéüþpׄZ†ÖL ÀØ^>—ÛQb¢ Ž©4‘üÇ€.8íþýÐ1gæmŠ©N¥œ_|ßaçÎøú×Ïã¾ûþxôZú„ëÎLÇÿ'ŤzÑ7xÕ6G:€àGŽ,Ÿ'¼`ô´Tä;TŽÙµè\ú».âæ^Ônù/A˜ó ЇºˆÌÈ÷\yý*w‹êPõÀ@uø”—Õ/û£ÉGû"û×íŠæMȕϛÞgTÁ¯Õu‹¢¾—^úçžÛßèæ„™·sÇ̾æ¼h±6AˆÒ´BÀ!¿z’½ýo¯¼€ã88ŽC.—6ÀÈår¤Ói,ËÂ÷ýÚ®Ïz]ôA3š8P8xyñ´sÜ}߇âEÃ\–}ß§»»›\.7±¼jÚ[”ð–Í*ÎóÔçj1.›U‚(ÑÊ÷Õòñ¸z2­°€Ží‰R,ªéz^ôÚK¥*«8:Ž~"§éüw†¡¦W¿‘÷<õç8j¹Z.ßétèɧ—+W, –¯>.íýçûªÍÚ«Ðój‹yºÑó¦¿ëc¨^^ ƒº}: ¹ÑÏ.Ço>”ã_3íá#¾ïâX>ŸÇó< …ù|>ȘJ¥‚\l¾ïÓ‹Q*•*~ë‘6«ó"ÖZβ,2™ ¾_Ç ˆÑ3¨ç.}…¿îž{ØzéÑþÄë+DµÉ½ÚG{6³«ÚãSö¼+o#z,õ÷óÈÖ­dÖ®­ØÆŸsÌ”µ½Ñ|ÏÔq»xè£û8áåëƒç㺟s£ÂA–ð!ZÇJ'ÁŽvCùò²¿W_m€ol*/ã>Ü{‘ߥZm¹¬ü µÅ„¿D ¶Z&°èT³»t¼ìµ72‹#ëî>^þ¬ æBTÊSÜUeo¬“tìóË—AÇ€äìåãÓF¼þráùÓF&ʱ(˜qø·´ Åßco͉ÖÈ´iè[s6 ‡v+ ÜD¡pÏØ^å.ažå"®¹ZˆÓîž#‘EÙn‰0Á¦Èr„êKªX7òYç}Öóµ2ݶ‰ÉÌÈqék·XžºŒò÷¨@˜-¯«N—ú‘Ô{«ê½}-òyTvò³m‰ üœ‹.jaEQ8è¹càyæ]ø—Ö˜7y³…Ö¦©…¸CÛp¹Õuã±mÏó‚Ü6 ’ºÇãq,Ë¢P+ò}¿ÂbR”:TUî¸ÀªcoaÙïî…/~qØ|í…S,Uøôd0ŒÑ…4߯ž´xV,Ö—Ó VØŠiªuµÇë†ÛÎfk¯cÛ¡§]9ñ;®«D«ZBœã„…-Òi¨vï阵xVëwœPHÓˆjGõ›+×UûÕÛI§ÕyÔçÌ4‡ïC¯S(¨}e2Ó^Ø`º±€Ÿ~e)õ–÷N©—Íf+l?™LbYV»L{§Áðê¶Úûê«–;^¦²ðHô×ÿ¿÷~„~wÖ7¦¼Íu[Õ瑞/noçâ* ýûß³í©Ñ=}›-–ö¥Á\{;ߊE˜ëB—L¨t8¯M8@é",9áÇ>ݺ8F¹ší+Ÿ…¿`éß-åéÏ=ÍOÁÂǪm”b~ª>Ï”fã]ß;þî´s­ÍX5j¡NÛ¦ë†BØx©^GÇ+=ßôþª¯‡±®è:‰„*W,ªýŒ%liÁ±Õ7ÄXL‰qµÜÇu¶oö¾mÿúø³ÙÚc‹¡É÷¾ø_\`M]—özëêêªXôK“±h¥$Ï+Oz…øŽÖìÏf Z °€س‡/ZO°ç†«ˆ×Êã ÕÓ¹ ,*s3Õr«'˜«±M"ÛhµBª+ÐÇ8ïì1uÉ1Ê3Ы^õÜ Øô‘$ŒÀtÛßo­\¹šO}Je°,+x¯5j—ª½Èt쯋úQ´—œQþ¬½Ï¢¹Ñz¨<×Úvjy™éé£ßL¤‚©ez|+L+ÊN{ì˜ñ¿ôÒ‚q´OΠžOŠ„ÏÚFµ-gÈ/꯫_žDÑÞ£&•!Ü¡e"2¿XÞÖT¼ë«öTÕ¢yô:¯j±#ËBxm·ˆÎxО±0«"Ê|à5ÏïãÍ¿½•_"dXJ,Aˆ0·Ñ çÏq¼¾\ Cç¢ÄbÍòÌ3Ïð¯}MyŸ”JÁß’d²²c÷<â¦I.—#•JÇ)‹c¶Cç%ÊçóA˜T±X :ª§?÷4^Ñ£ÿíý¼òêWxâò' ‡\­ròüÉQÂK—¼Ä–·l ríxힸü v\²ÒpKç-†Þ z)‹<÷ëçÔ¼¼òì¸ýœÛÙqÉ~ôúñâG_dÇ%;¸ýœÛ¡¨rã9ŽÃÓŸ{š›–ߨõܤ˾Ë÷ñËÃ~ ܲùî¹ì:¼¼œG:ÆñûêctxP‚|=ާÕg ËíÂ+y|óÌorížk! Ïžü,±X pýß^Ï ónàÖÃnåñ_<&$Í$_zû— ¨¶ÝŠÉ.sƒ—(råóùÀÛ§a$¡wØTowª±,U bªÏY©T[„© õx·i1®ÅE8ǰWȳ»ãèIm+ŸÏS,ñ<|>$×Þm“ö mžÖˆ {‚ÆE½+ÒÏmO|æ3ü¹‘9K¥ÚÅÊ+êd&þÀ"¹f›†£žy¦ÑM˜:…˜î9³Yøå/_dÑ¢ë‚<˜ú…_Ô<@‹ÂiÔ}eÛÑf2g×yÐ ¨‡úÆ=4ë5`Ö…6zwýõsƇ׿bÕk¯Eí …8¡Ð«Åºxy¹<°ÕCh÷#‘%|ybF]O˜K\·Q_úF£Ã¢c„âž^¾›°€GõvuùvýÝ©j‡ÞW‘0ìÛ Là:PèÏZÀÔç+zÝêåF;ÿúž¨ÿÒåÿùÚŸ'Hõo¦ÏTðNÂy÷G«áÿçþÎ_tß°yºµ Ô¢i=â<à·×Ïå}w˜ðàG`sÀ™R‰t:M"“ákµŒ¼zš)+‡ÅýnÙ2üóÏ'™LbÛ6Ùl–ÿïCxæOþ„‡žy†÷¿á <øàƒlß¾s/ºˆÞ­[Ù³`‰D‚l6‹išd³Yþêýïå¸]»øÃ-·0÷}ïcûg¿Í›®XËšO¬à§Ïý\¸°g)‡/Ñ¿gG}f¿'¾Àë/y ‡þ‹ÔEû¿îÿrèkeå²CéüÝ|ßç׾СåG0o·ÉËË_få+Yºm)¸P(”èhÀºUj¿™LFuÖ>¼á¢7@ Þpà q‰J6®E-kéxóx•c •CÉ+Öâñ8¾íÓ Ã ÍhS>M¸à3p†F°-M4ŒLçî3ÿÎ ~àÅ,¦d†ûÓ¬ûè:øhy=}üdȰ~ýúF›æ¸ØëÁü¹C#ΟÊз ÓJ¥¶ %„µÂÛ%èô¤kqþÈ‡ß¼ÜÆþ#Ž˜ð6tEk˲‚lQ–ÁÊ)§œÒè&Ôh­!ŽÒõï¾{o|ãÿÕî‹õÃÞX^ž¯Æ™L¥'´ö¦õË.KQ‘¾–—m4/§eU®_+7¥*ù¶A/jÜS"¯óWšfeê‚ZiŠE(¿X –qÝ0ü¾ú>¢ÃóG#“©<½Žï«£Ž£ökšªï¯~¹ÓѶ§:ï«å£Â¾.j¤·œŸô-ÿùŸüþ­o¯5­{(ÝÁ)ÿÅãðîwŸË5×ü%‰Ä5#o *è8Aí±2–UíÍ2[D+}G UE¯ÃZ×]uîÛêiÚ~£DsëjFʃ *@ç±Õ/ù¢ÛoÁ~:êøoî{‘7¾ñÀÈ ê°íñåŠÃµ°ª>G=تOST8ÓžÑ"•kú> ¯íí–Û¤—­öÕnÇ#½ŸŽ¶1ZÜD [Ú,´§·©Ïöô ½RµIj/Cm¯ëj—è¡G]‚°ÐÅDªsDFˇk¯qý{ù„åÄõy‰®›&Œ&+¿øÅi¿à½¼—VãÉþ~Ž9gYÅáé(¼ƒá³01šZˆ{ùéùò¯á²xÍïb±X¿‘Ï«ÝP(Šœïûœoø¾¸ŽóÛß²èÓŸf÷£òÂÎt´·Ãm·±Ê÷ƒ\X¹òÿB¡ Fògž ÷ÞËEk;ᬪ<îî€l±œ“x#çÜöKN0Mìòà×¶mÕ9Åb¬u.}„[jYÉ`èKß*k¬ÊÓÏó P`©k sËŽžÃàAXçÚó< ÏÃ(Ÿÿ胲xP™°_bÊËn¹Õßh Qe‚ìõàuÜæk‡Í+‹*Q£…¸VÂ0ZrÀ9xàŸ·rÛœÿÃGÆ)–¹®x'‰@Ì?¨I$ZKŸe8('Ç;ïÜÇž=?Â47…3Ë)ÔB@Áƒ¼3²—o:­î»µŠ¶D§‹a¸º.ä¢)ßË+ÖÍf+¿GCþk ¶Þc #÷¢Ÿ«—æìÔ÷îlv¸PhYJtÓEq4±rŽ‹j{ŽÿÑûןµøWÝè4žnw`”<Šãõ7ŒÊuÊBÞ÷þê¯xÝø¶ÔèšúWÝîÀ£§ï#Ÿÿ0o~ó˜“Èߌ²ë•¾*ònpà—.ô§Âw×U¶‡^èÚF vº ¨Ì«S_è{wõòúwÖ³FE²¬ÊëÎóÔ˜[ Óº=ÚÆtñ©‘Æ•ž§®U]>¯¼î´¬Û¤ÑöZëxGЬ‰‚i~´Ú»îgx…úT*ÊóùʶÔtsl~‚B¸žÁ7žΨ.8£ iè.fk¡EÀh¾ÕÛ¦ð\Î ëý5üôˆ¯WL3MSƻ¨4­ðÊÃsé|þÛúÙ°yÅb×u‰Çãõy­hy½ Ê4á’KX ,®§‘ÝÝê†^Ï€°Öݱ˜ˆGM­ý8޼$`TÍ‹¾é‰ê7× Ö‰>Hè¾ÔÛví7üº²¥þnÛá «Ö:-Ìv` »k“ïû¸®ÛÜy¡Ìö_,dÅásÇ嵦ÃPu~"¡Ì,êãZ JÔí産¶>õ¡wP‘ðÁÇ)Nç×yP)ši/ûžž°P®’\ý;g2£Ô‰bšã¿ÿ$ŒŒ¶|­ýÖSEE¯7Ú¾ŒQ^²Õèc¯³bñ„)•À0ØwóÍÓ»ŸiB{ÄéÝ×Ó°û_þÀ{ £ÒCçÚ̓q¸}w÷?/¹p²YÎ5áw>8EÈ”¯-‚åóê·ö}õ9“ E¡(®Šd±X8ž‹ÇÕuQË®´Gs¯F«”WÛ˜i*‘Êó´ZLÖëT·«»;¼\W-ŸËÕî‡'2æ®÷šŽRëÚÉdF>OÑk¦zz[[¶Œ¿ D;œ½üòÂñÎ' ¡ Twéê)ÆcÞt99eh|þ4‹á‚çX¢Z”‰µã ¨—´f¢,TEo -(Ä9Àkç<ÍIïzWÅôƒ%ÊC˜8M-ÄÍ?p€«=//ɽ{¦=Èhvl[µÓó*ÃU@MŸÈ1&R9•*’͆o[£á0,¼å’ËårÂÀ>àÙaÓuu_á„VÀ^õ´Ï¡‡=Q÷:žçaY–äÇš ‡ðùiË–AÒû²êÅU*gPœ0¼H?éðH?N‹l輎W kFšA4Ÿ‰>£Åû¥hqÒ|ìéç©y„^ÇyàŽ,œ‡&¸`=Î1/ÂÊTÙ3³†ñÖ¢ÚžG ±²¬ð:©öè뚈ÚÜXÕ$¢ó£ÿGZ'“ =àR©æ½>gCß1NnÿÞó,]úø§«Üi6JÄɶ E/ ÖÏ-ÑÏ´âû>¾ï«òù|ày‹Å‚4IétÓ4I$A®ðD"AwwwªÈu]<Ï›¹‚uÓˆ ìg ¢J§Óض-Ïy¨4µwè‹C¼´ø ©1O u ˜J&ò&¬Q4cŒz­\4Qª*V~oýzÞÜè6ƒÃö`ϧ›îO÷›~A˜bžy¢;ÿŸ 2ÇuÝ` –L&ƒœ“‚Ð D‹ë98ð Gþó˰ÀƒŸ¹¡'F‡Á Ãü¡¸„!©ú}æþýïÃZvæy1XcÃXeÃûL•HÝ®9;âqÓÚÁÑTiÑÏjs¾aâ`‚ ñ²cŸ~÷«ƒ1´3«ÖxtðF"ÎËçÃË_¿ÓŽfÐA'–¥–M&Ã6D#½õ:é´Òø3õ]™hÇ=Ý>Pó µ|:FÔº®r>Ìåà¿þëÃlÚÔè_¤~<`÷îßpÞy»Â*¼©ò‰tp­ð‹•ÏqQq#ʇ޸S)Øåó%‡£û³ñäÑyu ÃÒ|èbƒ:½M2™$‘H×HâëºA¸¤ã8xžG"‘Àu]Òé4¥R‰X,D1‹E|ß'•JáyŽã`ÛvPd.ú,ïEóÿÆãñ`¹è2ÑÐùX,ÖÒãÅýÌye/Xo¨ya$šVˆó'wËü÷®®9?‹µdMáàbÏSCüáÌáÓ%LOh5ìÛÇÙz̨Ëè·¦†a´ô J˜DÓ }â/^¡kð'ÄãŸP~Å"l.?9k¯]pEÄ7¡ÉÈFs‹pÌ1·³æˆC(z(ÞÆíwlÆ8#%S…û•ãÍ ÁžÂtb:êµÖ¢¯hú·¨ØU­¸.|$ ÇE–×éßâqõ.¹XT—«®ðý0M³Öi´°¦#c}_¥¿k³”á.®ÏàÏfáyÎ4áP>‘5eí&zÙGë-<\hÀN>ƒ¥6ìðà9|«\÷"¥ˆ“aõkp°Ë!ˆºp§Ÿ+¡¼k¢#D]# ˜q艇i¸°àÍH$ḵOO¡…M?ðøã¿àõ¯¯:AñøpÊÑÒú˜¦J¤ Îu•¡e³ažÎêB5ãE“V`uš‚hn¾ê1O4waDýݵkËßþv0 ²ÙlFžÏçƒgƒd2TñÖŽ':—¹F‹bæ-w]7î´'[T|+‹Ç¬išC‹eYÁ˜m$±LÙÒmÒݘ¿s¹Üõ>‹Å";wîœøïÔ|`÷ðG‡ôo˜ì愃Œ¦â<àÄãv°à‹— ›×ÑÑ!"œÐ,]·Sž¼ ¸0˜&¹á„VÄ{ñ$Þ3ŠÍzž¼e­0 B3à qøÃ‹¼taåPã-ÊtyÒ Â$Ð…|S¨çûk>ý"_ñÿ {Þ±x¯Aò]Gá¾ë# t ÂS TÂv³\̰:µaÎùV—•€&ô`… –­Ú¤ó-ÝÎ8$âêá7 ¸&ø¥0—.ÞaÊGß7£®}§üýœBXó<´àCåcÆVó»ÊmH™7ÃZ.”—;¹>˜V˜Þ¬ºÞ@5úÜêb‘Ñ¢Õû¼x X½^¢ÜÖ+ú —ùpÞŒØàT`½½‡ñÁKªŠ™MtlFå†b±¼%„ffßa‡1´sˆ%Ï.©˜îyžqBKáí‡Õ š ›Í†=AhB\Tþiß÷ùÞ’?ãû—~>þñÐNZ—07œçÁ™Ë·óûË.bnG–—8$OçÝ`'*×Kwa©¥Ä-Aب]Z²€Ûòª˜ê/2°È„ߥá™"¼â«õŸ5ál Þê+±*×–¯üB’¸…p›ï%¬â¨fò° w¹J`JðE i‡B£¶¢ùû£w¡8¡hU-§$ ZŒ&ŠÙ5nk^ù/^>gZ+1zÿ© zdüϵMTµeË£Ns˦ŽÁ¥K¸å›ïã†ß—LãÎj½ÔU uå]ÓTžgÀƒ?ú~à ô”Ã>G÷xž‡çyÁx^Gxž‡sÖY$|ŸGö3^œ7#—ceÕúÑñRç¿_üâøO{°i¯;ÛæXàØòìUå¿€\n¸ÇáXŒUl0ZI;ºŸêªÆc°ô;ßá'ë×ó®1—l.<€ý§.«˜6rß ÓOS q«î^5lZ´âŠ 438á>w8GùgÓ£î݂РsaÁ™ƶ¿6â2¹ñìaé_° xhݰáÎ~ïÙ´=ýtkåyBq àUsæ_{$ãy]iÖ9­Þméu›-ÉBf{7ƒK—b‹†ng^Û7¨üÕ§ß÷qG‰$†A,›UÅPÉõS©Ö•WÒså•€²Ùx<^!¸¹®K6› ¼¼²Gã88ŽC&“©xѾr¼œ(µâ½gŠ‘*pGÿÏbÚyXfeh¸qB=4­7ðCرûcæ'õÖN„ÆsXÿ£¼tÞp  r@Ô›WAšcür×/kΫØ B“òøÂ…*¼í*¸oËkøÌïÎ8(„ÙE4[±;ñ3ÞðŽÓXe¬ ãSJHÓ9µç”ïܲ7\2F¢éÜöQ¦ê™¾Öfd$¼øâ‹äæ½nüÝ´îG‘J$Aj=^‰æC-ŒPçDõ2]{²éÊ €TÈÓL7ºI‚0n‚ÜcžGöoÛ9bÅ-¼ó«„ ô¨LFöäÒN âÄ,4Š}‡ÆÎÿù=w&˧á™Îu]òù|੯ŸMÓœPúŒjMžCìøü^Nÿ㳃i’+Y¨—¹nÀHœôº§øÈiŸ¬˜æº®ˆBK±pÏPÅw]¹HZ…û?žE<ͽòì°yÚÃS<•…VÀÜoÜÿKbëç4º)‚0!\ý!™d]/‡ò|¥( Ï}B pô£²èÃOð»9—M~ce²Ù,é´zÁbYV ÂY–%žj´rbÿ^Î:+|ÞK$%"ÔEÓ qGíúÏÙ|/‹d21„–aÁž=ÜÿõÓ‚7"]]]A. Ah%ΙsÝgývØôL&#ƒ ¡%Øqøá¼êÓ÷qëÜÓÅfg)õÜ_µ¯þìºî˜ë4>`ïÚžÏчš×u/ñ¨½é4š|>_s|>?,ï˜^F¯ãy^ :Ÿ[¶§Ÿ¦×,ÇÉ“ÚN:ÎI*•’bQÂÌó"¬oÿOŽ?þ@]çò¬'ÔKÓ qøæÑ¼xÈÛ‚ïÅHÙdAhŽ~ôQ¶Þ}V —J%qgZ“ýÏòë×TÞ.¢‚ÐìlܱƒþÇ|oa—ôÃeªE©l6[1O£=_¡ò!£X,Ÿ]× ÆiÕ"Awww xåóù@Xp§b¹ŽŽ<Ï£X,VLÅbÁçt:]±­èrÉd²æ>ÓétÐÎèþu¾¨VÂùêÏØøôÕLÓ$WLz}e2™`[‰DbÄå¢ûŒÇãAÛ2™LðànYVË=Ä»Cp×gÃ}q)çúüF7g|md®ëVˆUÚ®}ßlT$¶·îîî`¹j6úàë8N°Ã0{«ÎÙ¤mÔ÷}:::¨E4Ä,jo–eÑÓÓ,}ÑšJ¥*ÖÑ6fšfE¡®L&¬“H$‚öDíÕ4Í`ºišB\­ci–ÒÆ‹u‹£ýV´*©Œ«…F³÷þW¸ú®ðºN$RO¨›¦âŽýÁËßr< žBkÒõÐ^™³P6,…Њì>ähæ>S9à5 #È¿"­Àâ_öñ˹gréÿ}ãŒì/*žE½r¢‚YµçØ’%Kjn+›ÍV„·i,Ë ®ËhX–eYÁËKÃ0F²´``F…(]'‘HTzx<^³b`µ¥·¢ç©ÕÉäaëÜsØÿæ­ IÇDů¨½G…´¨õ ôðãz'öÓœíh±ã¶¶¶@¸ˆÇãÁƒŸiš3æíÖ,v,íh&bÇétw06ŽV!Vnš¥ÿ‘v4ëJÝ"ããJÄŽÇgÇ;vìÀu]ò_½ß÷zî`·Ÿ(Ú6êaF<âvïÞÍŠ+‚ïííí£.ï½÷rË-·`Û6‡~øL4±&;vìh¸a=÷Üs ÒÛÛÛÐv<öØcÜu×]uÔQ Ùÿ /¼Àc=Æ /¼Ðý׆A…‡Ü~ûí<òÈ# i3ˆýTÓèkZÛñ&¿± 0^;Þ¶mßøÆ7¸ñƹ袋Òfûi¶v<õÔS<õÔS{ì±lذaÆ÷?^;¾ãŽ;øâ¿È±ÇËë_ÿúo¯¦YìXÚ²cÇ^|ñE€ºòM%Ÿy晜vÚi¼úÕ¯à‘GaË–-3~Τlžv´âøøî»ïfß¾} =o2>®¤ÑvüÔSO100Ð2ãã;vpß}÷qÛm·ñý>º!ínm?ÍÂc=ÆÀÀ .¬kù+Ö0n¿ýöF7A&ÍÝwßÝè&¤ذaCC„A˜J¢UN¡U‘±±0[\ZB«óùϾÑMf3šÚÙÙYñÆ¡··—¶¶¶F» ÔØ°0;fbÇÂl@ìX˜ ˆ ³±c¡̨788¨ 9’õ©OM÷N´¢|Í5×ày·Þz+ŸùÌgDiZ±aa6 v,ÌÄŽ…ـر0;fbÇB#˜34444S;ëïïgçÎtvvŠa -‰Ø°0;fbÇÂl@ìX˜ ˆ ³±ca&™Q!NAAAAVf$Gœ ‚ ‚ ‚ ‚ ìÌHޏfcpp|>϶mÛèïï§³³3˜þo|ƒŸÿüç¬X±¢b‘æME[V¯^]×¾¦£[¶lÁqœŠs1Ú¾¦ë\õ3’ ëyÍlÇÓÕ±ãÖc"v<¿[³Úq#®i¡~ÄŽ‡#ýqë!v<é[Gìxê8ØÎM3i&ÍÂTj7GÜàà —]v *¤8ŽC*• NF–N§q'Xo´y“aãÆlÞ¼¹bÚL¶cãÆlݺ•U«V±eË6nÜ8澦ë\õ1š CóÛñt´Aì¸õ˜¨OçïÖ¬v܈kZ¨±ãÚû—þ¸µ;®½é[ ×Þ¿ØñÔq0›fÓLš…©Ônæ7ú`fšÞÞ^ÚÚÚØ°a«V­âMoz½½½ìܹ“M›6Êø¶lÙ‚mۣΛ Žãe’£í›©vô÷÷S*•øþ÷¿(…¶T*ÚŽöööi9BýŒdÃz^3ÛñtØØqk2;žÎß­Yí¸×´P?bÇ•HÜšˆW"ýqk"ããJÄŽ§–ƒíÜ4“fÒ,LµvsÐyÄ­X±‚«®º*ø¾{÷n¶nÝZá¾¼zõj¶nÝ:漉288Èu×]WÑ–™n‡Þ^°Ë/¿|Ô}MǹÆÇH6 ÍoÇÓѱãÖd"v<]¿[3ÛñL_ÓÂø;®DúãÖDì¸é[W"v<µlç¦Y4“fa:´›ƒÎ#®½½ööv@½)H¥R\~ùåìÞ½»"fW/Œ:o¢¤Ói®ºêªa¥‘g²ýýýô÷÷så•WÒÙÙɶmÛØ°ak×®q_Óq.„ñ1’ ÃÌÚŒßާ£ bÇ­ÉDìxº~·f¶ã™¾¦…ñ!v\‰ôÇ­‰Øq%Ò·&2>®Dìxj9ØÎM³h&ÍÂth7JѼ馛زe W]u¶mWä0™n6oÞLgggE’¿Fž í²ÜÛÛËúõëY»vm£›%ŒA-žiÄŽ…É"v<ü|ˆ·bÇÃχØqë!v<ü|ˆ·bÇÃχر0Q­™4 ÓuMt¡©ëׯgpp[n¹%è ;;;éíí –ÑqÑc͛۶mcóæÍX–…eYX–¸/ÎT;ÚÛÛ+TÚÎÎÎ îy¤}Mu„‰Qˆ¡ùíx:ìGì¸u¯OÇïÖìv<“×´01ÄŽC¤?n]ÄŽC¤?n]d|"v<µŒç¦ÑšI³0]ÚÍAç·eËÚÚÚ†Å÷ê588H[[Žã 3¸Zó&‚Nا±, ×uåú9SíX½z5›7o¶·uëÖÀer¤}Mu„ñ3’ CóÛñtØØqk2;žŽß­Ùíx&¯iaüˆW"ýqk"v\‰ôÇ­‰Œ+;žZ¶sÓ šI³0]ÚÍA'Äé„•ZÍÔ¸®Ëºuë¸ì²ËX½z5ŽãpóÍ7êÂHó¦šÑö5Õíhoo'‹qÙe—±bÅ vîÜɇ?üáQ÷5“çB¨Íh6<“ö33i?bÇ­ÉDìx¦·f°ãf¹¦…ڈߗôÇ­‡Øñð}IÜzÈøxø¾ÄŽ§ŽƒíÜ4»fÒ,Læzš3444Ôèh&úûûÙ¹s'Ã\)G›×Êí˜È¾fò\ã§Ùíx:Ú v<ûh–ß­ì¸Y®iaüˆK<h–ß­ìXúãÖ¥Y~;±ãÖEÎÍØçá`žbÇÍK³ÚñüF7`$|ß§X,ÇGœîy–eó‹Å"žçÇ1M3XÇó< àX,bY¶m[¶ÞåFBÿÀŽã`FðÙ¶íQSïÓqœà˜FúžH$êÚÖhí÷¾ïê-K<¯8ïÕçÁ0 è{ÙÊTÙñLÙ0´Žd“£Ù±>¦L&€ad³ÙaÇ&62’ ך'v<|;±áè6&Ók;ÖçJ÷ó#µãfSض=ìûhÇÚìvÖ¶êi¿ã8X–…çyAûR©T°¯îîn|ß'›ÍçWo[ÿz?Õäóy’É$†aÔ½Île*íx¦lXošßŽG²¯ÑìX¶£ÇU}Ã͆Aìx´íLƆ£m™hµcý[¦Óir¹\Ãì©QLÔŽ›iLQëûhë5»×ÓƒØq”Ù06®õ}¤ufëØXÿ–bÇ2®˜[ÉŽ£m‘ññä;n¾qÅXëišÊއšL&3ǃï¹\nÊårC¶mÓS©Tð½P( Y–Ìëëë2 chhhh¨T*Ÿõz©T*ø®OC½ËFt™z–/•JCÀP___]ßÇÚV=íO¥RC–e ™¦9LJ ÃÊårÁ¾‚íéó«·­çõõõUœ7Û¶‡r¹ÜeYë´ÎÁÀTÚñLÚpõrÍjÇ#Ù×hv¬· C†a õõõ‰ ÀH6¬?‹¼ÉØpt_ãéG²ã¡¡¡!Û¶‡J¥Ò¸Îßla"vÜŒcŠz×i;®ÕG—;®d¶ŒëYg6‡†ÄŽe\1}¶<’G÷%ããÉ#vÜœãŠñŽmÇMšª]5ÚU²Ú­3êâ®] £*fÔå»^7ÃF¸ÕZ–UñÖa¬ïSÑ~ß÷·e­@ lÛÞØÕÚ¶žgš&–eU¼át]Ó4+ÖiÑÜ_g SmÇÍlÃz¿3mǵìkûöí#Ú±ã8˾viîîî&“Ɉ ×`$±ã©jÿhýêxûãíÛ·×´ãl6¸ÿŒLÄŽ[yL¡÷ÛÌv” ïXß'‹išÃ.œ‰ä©ˆ®cš&£&ž=˜òaˆO¯×b,ûÒ7Nmÿ‰D"¸9Š g<¿™ØñÔ0QûÒëÕ²cÇqÈf³Ì™3'HÊ;gΜ–J,<&jÇ­jÃµŽ¹Ùìx´¾ÄŽk!cŠÙ16;–qÅtÚòDíXÆÇãCì¸ùÆc­×ŒvÜ”BœeYA‚C ÈgašfÅ@-zÓ²m;P/µJšL&}(M‰mÛxžÚH¹Yªq]7XGÖj±¾ør¹Ùl6ˆmÙŽØñôS˾Î:묗×ù¢ëkÛÎH6 bÇSÅDík$Û¯eÇ¥R‰¡¡¡à`hhH옑íXlx|Œ×ŽGë‹õ|;Ž"cŠée¦ÆÆbÇ2®˜N&jÇ2>bÇÓÏLŒ'³Ÿ© )…¸D"ïûtuuU$ýK$˜¦IWWWPbY£ º££ƒX,¸ÓÎ4QwÓj×ÓfAWÑç7™LÖu® à ‹µÖ±,‹D"QqÆZg¶"v<3í}鄵ú7©u~ņCF²a=OìxjÚ8ûk½j;>˜™ˆ7£ ×úÞ,LG_ bÇQf˘¢Ö÷f`&ÇÆ32®˜^&jÇ2>bÇ3ÓÆ™7ÒŽç é×1MHôÍ4„åj5:Ö7ZuH+™Ñxß™Äó<<Ï ÞHèÏ͈n_=çJ»Ò … ¾z¬7,Yg6"v<=Lƾê=¿bÃáy€ÊbÇ“g¢ö%v91&bÇÍdõ¾73Ñ !­>¦¨õ½™˜î±± qÅÌ´s¼çIÆÇãCìxz˜©ñq£í¸)‹5hª B—ùÕ†œÏçkªœÓ…çy®§Õd2™ŠP®g½©Úÿx˜ˆ±†1î u"ëÌ&fƒ£¾-íMµOľÆ{~ņ‡»ØñÔÙñDíë`·Ëñ2;žî¤ÈõÚ±f¶Ùq£’N·2Í6¦€ÆÚq«Žvd\1±ý×ËD±ã‰í¿^fj|Ü(;nj¸Z¸®Ädk÷Naúñ<Çq‚ª;ÓµÎÁ‚Øñä™ û±ãÉ1Qû»œZÄŽ'‡Øcãn bûSØrk ¶?:bÇ“c¦ÆÇ¶ã–âAAAA¡iÊb ‚ ‚ ‚ ‚ ‚0ÛhÊqW_}5ÇsL£›Á<À©§žÚÐ6ìÞ½›Ý»wsÜqÇ5´?þ8Gy$GydCÛñÀ°qãÆ†¶¡^6lØ öS¦™ì§Ñ¿ ÀSO=Åg?ûÙF7cL\×åÛßþ¶ØO™f±ŸfhÇîÝ»ioogݺu>crÓM7Ñßßßpûi;–v oÇ»ßýî¦ÏW'cciÇXíñqýÈø¸’f±ãVÿð‡?dË–-œqÆnJSÐ,öÓ <ñÄ,Z´ˆ¿ÿû¿sÙ¦â}ôQÞþö·7ºÜu×]¬Zµª¡mرc;vìhx;¾÷½ïÑÞÞÎÊ•+ÚŽ»îº«¡û ÿÝÄ~*i†k`Ó¦MnB] 4üœ‰ý4_;vìØA£OE]ô÷÷7…ý4‹K;†·C÷uÍŒŒ¥cµ£UñqH³ôƒÍbÇ­2>~ôÑG›ÂŽ›…f±Ÿf T*qß}÷ÕµlS q‡~8«W¯nt38ûì³Þ޶¶6Ž<òȆ·£¿¿ŸÎÎN:;;ÚŽ£Ž:ª¡ûo[ý»‰ýTÒ ×4(Và°Ãã¸ãŽkø9ûiÎv´‚xpä‘G²råʆŸ³f±ciG%Û¶mã°ÃkhêAÆÆÒŽÑññøñq%ÍbÇ­2>6 ƒeË–5Å9kšÅ~šG}”'žx¢®e›Rˆk6lØÐè&4Eç °víÚF7A˜b?•4Ã5-Œ±Ÿæl‡0>šÅŽ¥Âdh–þGÚ!LW"v<>N8áþèþ¨ÑÍhÄ~BÆcR¬AAAAAfâAAAAa!NAAAAfâAAAAa!NAAAAfâAAAAa!NAAAAfâAAAAa!NAAAAfâAAAAa!NAAAAfâAAAAðýÊï®[9Íu'·}âAAAA„YA½B™ïƒã¨¿¨ÐV,B,Ns]èê‚dÒiõ9ŸŸxûDˆAAAAZ-¾%“JD‹ ežÝÝá4ÏSË'“Jts%®¥Ój~"¡þ´—H@_Ø6”Jj½­jº±˜ßè“%‚ ‚ ‚ ‚ õâyÍB.§¾g³J³,%šuu©e25?‘PB¨åº»Õw½¾^Nßgtšm+/VÛ8òÈúÛ.Bœ ‚ ‚ ‚ ‚Ðôø¾òJËçCa  P¨\®§G q¦©þ4Z¬›,¶­öc°ukýë‰'‚ ‚ ‚ ‚ Ì(ŽS)¦UãºÊëÌ÷!•R‚WW—Ѫ=ت1 %¸M'†1±õDˆAAAA¦× Å1] Á¶GÕ²Yêûjy¨2Újˆ'‚ ‚ ‚ ‚ Œ›hø§Ì¢ òy6ÚÝ­æçrjÙRI­×ÕUé—N+/¸R©2Ü4‘hô‘NR5UAAA¡pƘŸ¯k+ãØŸV$…áUBÓi%²Ð %´uu©ª¤—¥T› …²8gB:²ýÈÁåÉp™ŒZ.šÓm¶!q‚ ‚ ‚ ‚ Mˆ˜‘Ï1 $è €UžŸ,ÿÏ”ç'Ëë§€‰¤43 %´¥Rê{6«þg2*Lt$¯5ÇËVí3QZ,·'[n§cÁa9%Ü ¨ýEE8¿¼Ü4§{«ï+å±:Ù\±¨ê8ês*Åüq(‡â'-J~ª_u‚ ‚ ‚ Mƒt <È|”ˆ•By½¹(1˺QbU%À9åeµGyÊÓÜ1öëyêySçt+Ô4ÏSÞj==#BˆQßlõ9”Êí÷Q¢aŽP\ë¶Ú` AÖPÇ”,·?_ÞV¬Ž6OÝI÷aÉå΋)²X¬œŸN+ PÀõ<~÷‡?°gÁ‚ºv1åqƒƒƒÜtÓMlذ¡bÚ–-[ؽ{7«V­bõêÕ3u aB4³çóaÇhYÓ_ Fh]ªí¸YlX꥙ûbA¨±ca¶ ã a6Ðjvì „,%P9(!+N(¬õ DªXä»U^Ö*¯Oyù.`¡IS-eÏ5K9xåóÊ«ÍuÕŸÎÛ}îÔm–¶‘ò¾ÒzJLKzÂU×cÈE>(±.[>V›PxÓ"^7¡çߤI§•‹Ÿï«V€ÈfÕI0 °müXŒäâÅX(/DCWÈdD,‹¶¤ýÎ;ëÚý”{Ämܸ‘Í›7Wcšþþ~:;;I§Ó8ÎXQÍ‚ÐXšÑŽ“Iõ?ŸW‚¼iN¼\²ppPmǶaA/ÍØ Âx;f 2®f­fÇ.JxÊ SZèêA SJ¸* Ä.ý=C(ÂQÞF8. ‡¤•h–N«¿lV=c‹jº‹*x¨?P‚Xš²ø„ÎÒåÿå}ë°ÙB®Œr›3åõôg}¼”—¦2ÇܸÉç•ú‹…•%4¾38ÈÉgœÁÕï}/ÙxœXY„Ûõâ‹ÄÿÇ?æ÷ïçS÷ÞËŸ¾ò ¸æÞ·y3¿Z³†G¶l©« Sêç8ƒƒƒÓz{{Ù¹s'›6mBÅÙÖÒª 4ÍjÇÅ¢ê =O‰÷–5»X “£ÚŽ›Á†a<4k_,ãAìX˜-ȸB˜ ´¢k8Ã95•Þa£{‹z[å0Ó$`eÔ:ßìRÏ›Ÿ*Á—ÊûÕžhÚÓΦ2×[Gy%&–{n¼è}y@Þ÷ÉqÂzÁ£±ÍdÔçèCs¹$ë7ÿÏÿá=§œ¦‰oYäî¸ƒí¼œH°ëÚkùá²e¼ûøãù?Û¶±è¾û¸:‘àì¯~•?Ûº•‡^ýjÞô&NÊdXüØcÜ}æ™|þóŸvω)óˆäºë®ãª«®ª˜¾uëV:;;ƒï«W¯fëÖ­3ð3 ÂøiF;Žæ‡t•À2•*¿ð&¿}aöQËŽ¥/Z‰fì‹a¼ˆ ³W³V²ã$*7š.LªE.%†MíêYR×X°+dàS¦òBKæfË ò¸Yå6¥Ê9B¼qẕ9ׯ Øù<¹Ž\Ï#‰ c€ûÎwâ\=ùÛnã¶yóp>ô!²÷ßOåM蕞·žpúÇä²Cå‚ã}×.>yíµ|åÙgùÕ7òè{ßËæ‹.âé+®àŸ>š»o¸;vð«_ýŠ=×^Ë[·rÞ§>Å’XŒëÖ­ãg?û×^{-+W®¬û8¦LˆK§Ó\uÕU´µµULß½{7+V¬¾···¹­^x­[·Ö­& ³ŸÁÁA¶nÝÊsÏ=7­û™J;~î¹ç¦¤#O&U~Èê\p®+Z mÇ/¼ð´OĆ÷ìÙÃã?Þ¡yèïïgÇŽìÞ½{Úö1•}ñîݻٱc½½½;iBS²uëVüqöìÙ3-ÛŸŽ±qcOšÐT4r|<;†© ³‡þþþ–Of\á2¯Xžï¢<ÎÆ+¼yÌ™£ž!tëèPÓ¢Îa:4|Ç‚ž¸×l”×Gexi5ß ^:­póyU*uØ òèÁÞ¯}oüÙŸQÈç)•¡t÷»d_ûZzwîäOÖ®eËu×á¿õ­üô׿fíðúmÛxíK/ñÁ}ŒE?Îw~˜{¿ô%ú.äÅ~”]_̧;Œ ÇùÊW¾ÂÚµkÉård2,Ëâ#ùƒ7ÜÀÖ}ˆË,¼'{{{Ç5>ž!nóæÍtvvNYbÃ^xmÛ¶Më _h-vïÞͶmÛ¦Uœj;dÛ¶mã^Oç‚Ó}O¡ ªÒhÒå€xÓ”B ­†¶ãéhL¥ïÙ³‡]»vMÈŽ…ÙËÎ;§Uˆ›ê¾X„8a$¶mÛÆ®]»¦Eˆ›®±ñÎ;gú4 MÌÁ4>f/;wîlÉññDÇ· rÖC8ßE _9`€áªÑB›_V÷LúúÔs¢ç…)Ðúúœo#QK\›HÈé­ûßú‹¿(ÐpAmþÏ~Æ;.„d’¾ô_øÂÔ íe’Ï«†G½NŠEUÉ4æé䯾úU¼D"˜mïþÊWøö!‡ðL¡€Í²íÏþ ¯»›ÇÞ÷>Þsà |ìïäÑSOåÉ;xfñbî}Ç;xèóŸÇˆäˆûë¿þk~ýë_`Û6¹\®¢íïÿûyþÆ™;wnŹjˆ·mÛ66oÞŒeYXeeÀ²¬ÀÕ3j¤½½½ÃÞ VsÌ1ǰaƺ߬³Ÿööv6lØÀñÇ?mû˜j;>þøã+*¤Õ‹7íê E·è<Ý—YVeq¡ùÑv|Ì1ÇLÛ>F²ãùóçÛ†—.]ŠeY²caö²zõj.¹äŽ;î¸iÙþT÷ÅÇw—\r k×®mô©šŒ 6`YK—.òmOר¸Ùªù ¥‘ã㉌+`âãcaö²zõê–Ot\ñ¿=ÄÃ'Ÿ\1­Xõy´G<ÏSÎdšdR‰p]]ªà(±Í÷Õr©”ú›I^ؼ™soº),ˆNó®w½ Ã0¼í6î:âŽûÀ8éÄy°­/ùˬ»øbœd’=×^˾IFË´ÿ°y3gìÛdzÏ>Ëÿ¼öµ$ ¾ð…/N§yõ«_Ͳe˸ÿþ‡ÞøF~¹k¥R‰gžyÓ4¹ï¾ûÈf³|jÑ"~é¥üï»ß]>Of…7JTŠ–k×®×øxJŠ5èä†˲pËjA?½½½ ÒÖÖ†ã8M•üP4ÍbÇ™ŒàâñÐ+ײTeÏ«|‹Qý]F²ãþþ~ …‚ôÅBÓÓ,}± L±ca¶ ã a6ÐLv¼}ÉæÞñî‡c¡ò›u£¼ß<`¥oÿp?ÿýßÊ)©XT‚Ú-·ôsñÅí8N茑N+o7]Ð/Šã¨çÇépÜpÓ41#¢×^{-Ÿüä'yî¹çøßW^áñ¹sÙð®wñ¹3Ï䢯|…?,Zĺuëøâe—Ñ~á…är9’É$™L†b±Èµ×^Ë1k×bìcl;óLòŽCÊ0¸êÏÿœo~ó›œrÊ)¼÷ãç£?Ìí=ÄöBåË—súé§ó“Ÿüß÷±,«"üø÷¿ÿý°¶†1añ-ÊdÁ§´jj-ÚÛÛY·n—]v«W¯Æqn¾ùæéÞ­ L)ÓaÇi*]³YÕYZ–òµ¬Ê·º8C´ßˆÅTØêô%Â,Gúba6 v,ÌÄŽ…ـر0h„÷ü÷‹¼’>çy!ÎBgp•»^äßoûgà‹¸®Û x÷»_!Ÿ¿…Dâttt}8N˜Â¨Ú9c4®X,Ÿ …Î÷¹çÜsé½øbøîw¹õðÃéîîÆxÏ{Xjš¼ã_þ…=sæpõ3ÏpGy•£n¼‘kÎ; ûL$$ ¾ÿýïó‰íÛÙþðÃär9Î;ï<æÍ›ÇÐÐP°Ë£>š3Ï<Ã0øú׿ŽmÛÂÚ5×\3­¿ßT1-Bœ[|ùå—‹Åعs'6l¨ËmYÍtÛq–J!.Vo.r9õWç©7"QÑ-WëÕZ^ ÒŽ¥/ZS³±ca¶ ã a6ÐH;~¾­ ^8”Ÿ;²•ø–AyÅY@bï/ø—=×òë_„îî“( XLsòÉýìØÑŽë®Àó<Ž9æ1J¥úÃÒ‹Å"†a`Û6Éd2ø<‚õÒi Ã`Õ 7ðŽCåéo|ƒâßÈÆyÛÛÞÆ¿ÿû¿óÓo¬Xw´â2_|1‰D‚O=•D"çy¤ªâj7mÚoS"&6ˆi÷ˆÓ´··KÎ7¡å™.;ö<°íÑß\xžú‹hÈdT> QêEúba6 v,ÌÄŽ…ـر0˜I;~þÂy~øùßÒWñí9ø„UJ/õᛥ> ÃàÓŸ~±åR,)•JÊÛ¬ZyÿýÿÚ5‰šûèîî¦P(ßóù<Ùl¶"”ÔuÝ…¸t:M*• /ß÷I&“ <ÏÃó<o»?ùÊWXñÌ3Ü´|9ë>òŽ_³† /¼‹.ºˆW¿úÕ5CCÇâòË/Ú•É /UÑÊâ[”))Ö Âøpª¾k-‘YP«ÊW`Û*\u^AAA¡yXÐßϻ߽¿0Dš0áøV¶n=D"ÁÊ•›±,7Õ´ˆæ8©T o„‡?Ïó(‹øºŒ*Ê®§§ß÷Éçó$‰ŠùÕT1ð} ÃàŸçÏçÙW½Šl6ËŽ;xýW0ôÐCø¾ÏÆöö §ÒøC^ýêWOøüd2™ÖÈ7é>*±ßõý˜ïÃñ÷×ç¥(Bœ 45ª:Ã0”P—¨ñâ£XTsAAA„æÀ’ÀTŽð>½0¹¬û÷ÄQžp Šô‹yâñ8®ë’ÍfÉårA¥WÛ¶q]—D"1,åçyø¾ã(—è|×u1 ƒD"A±X$•J [?ŠmÛ|ô£%›Ír÷Ÿÿ9O®YÜo›_¾ø"ÿt÷Ýlûæ7Éår†1ªgݬ ˆÚÒ@+O÷ÊŸMT Õ,ùÉž:þ©º6;¦788ØèC„YO"Q_YiÓ¬í1çy*¿Ü(/6AAA„DË]É/|<0÷÷KX¶¬Ÿ·vmÇ|æÌQEø xì±ßbYV ¬EE.Ó4ƒâžç‘Ïçéèè ™LÒÕÕE6›­)Œi!/‘HÕJ«=ê|ßÂNÍkøéOJgg'vÿýܳs'û?ûYœxœ—vì`÷üù•jTPm Š‘$]þì ”ÒŠ“t Ä6 è)Π„·4ôó{y™w°ï°}u5%È·uëV¶lÙÂå—_Ngg'½½½¬_¿žÁÁAÚÛÛÉf³tvv6úÔ BË“ª‹œN¦ê©M-•¤zª ‚ ‚ B³à¡žý²©}Ík¸íø­˜¦YöH‹ãºªðži‚eù8޵LÓ–-‘H"›eYäóyJ¥ŽãÉdˆÅbø¾xÔÙ¶=LpK$œßÛËß<ú(žçaŽãpdž œ¾o ßð–¾êUœsÎ9\~ùå”J¥@ÈÛày}ÓM”þþï)‹8Žƒã8ùèš% Š¶PœäºQ"[¢ü=Uþ®Ñ‡¨KÝ‚ã4&ð‡ú›3 ¿¿Ÿõë׳bÅŠ Ia*•¶m¾ÿýï³víÚaÕ*A˜ùÓÒéqo&@¿ðh•‚ ‚ ‚ °dûv–-ÛÌMï?o9ã¥Àƒ,›UÅúâqí\áóJ¥‰9‰ô|íÝfš&‰DÃ0Èd2ÄãqlÛrÀyžišS¿úU>xÿýlüÌgèêêâ;ßùŸÝ¿Ÿ5û÷óúÿ˜>ô!r¹뚦IOO¶mcÛ6±X Û¶ë÷ˆÓÑ[.¡(VïI,Nò‡H£ÄµauŒJTëA l6J5Æ W£çO’¹@à §Ëõö÷÷Óßßφ hooçòË/ ··wò{aÅIt,¾¯þ\wr‚ž ‚ ‚ ‚0uxÀÏ¿ù \ļyWr÷Ý«‚y©”Šh2 H&“$“Éa¢ÙHض=lYÛ¶kVÒ{óÍ<}晼eþ|®øÖ·øö·¿Í߬YËïxËþsæ,YŸù̈ëë}Æãq†††ÆÜ_€Êµ–G‰^>µ=Tj‘% )ùòú‰ò¾µæ n)B!n˜J`[·n]0±T*ÑÙÙI[[[0mÅŠ’/N¦ˆ,ªpËÿs¹‰oën/ìÀAAA„æÀV<ðCR©³X¿þöìéÅ4àº.ZÃrÏóèëë›’}†¡¯a7SîŠgžáå׽޶ù¾ùf<ó ö•W•WpÆOLωÐB˜¦Þç_§¼nÏ8ö¥E>Ôi  * ;Q‘ef˜¹íííÞn½½½¬^½ºbÁ;w6¦…‚0‹Ðáä&ª?ðQžlã)4ã ^&èâ-?²BwfÓ¯8AAA„fÀî¹ç®¼ò, Î=·3šš¥««‹d2In2žÕûÌfY¾woðý™‡bÑ/‰mÛœüÜs´¯[GÖqXpÑEÓWñ´ÚãÍAy T‡|zcl§ˆËta…"µÃT³‘ýÆÊßu(lœót«—ùlÞ¼™+V¨sä8Ü|óÍÁB[¶la÷îÝÃÄ9AÆUþ3€]ĺ “»jjÕ‡d û"Õ¿ô”;´|^…©Öë!,‚ ‚ ‚ L=°|ï^žœû·€rœˆF1åóyr¹¾ïOiåÑŸÿüç¼ù‘G Xäê/~‘Ç‹ñÒKØ×^ËïöìátË"nÓRåP)²y Ï­æ¢n cl'G(Âåÿ&꡺z[ñòŸOèM×„ÏÆóÖ®]KPAWN¸ì²Ëèïï窫®jt[aV sCæ;Š£‹piÂ>¦Õ‡ø(!.êƒ<Â×BA‰qÅ¢ò’AAAfXúÈ#ÜûôZ ÒYBW5´‹ÿû¿ÙwÆNóñ3ÏdÓÚµP.üpów’¹øâ)þjâ |ƒ¦R8ƒ0W“O¥ÇšW^߈lÃ.OO „¶nT¸jõœ+Ï×á¦EB!® ™¯?lذ 6 [`Æ ÃòÅ ‚09Là[1ØçÁ/Jµ—I–ÿç Å7í ª_òPáîݨ – –Ž#Bœ ‚ ‚ ÂLâ¡&´nôXi€ÎÎ#P‰…B,Ëš¶°PÓ4¹öÄy´³“-¿ü%›"^uW¨—<µ+‰jO¶J(i·:ä+SµMPÍÑíê°V õPÜ™oP)ôÙ¨bê"~§”ùc- ᨂ0=,6á,•×-Š‹áL”¸æ£:q]àE÷/¿¾þz¸â ìòrÝ(Q.Wyâ®úð!Ö‹ÇãÄ'ê9‘NËEôð{ÞÃÂ… É-^ÌÞþvnmoç¥Õ«Yî•„m’˜ ÍG>gÊóGâR(±.ï ÔJª+ ŽTdÖ@‰pÓ”o²ÌÕ6n܈eYX–Åe—]VQ¼A„©Á‰|¾0WTu2J„ˆÊGÅ}ý?‹që­·I8µH§=oK¥P„ó}èîžšöû¾Êx©Ëk§#•!|ßÇ›îŽ^AA¡‰1Àžyødþú¯¶L"‘˜XŽ6߇lV…@åŠE޽ãüüç|ò¿þ Û¶yþùç¹ï¾ûXºtéô¬¼ôƒj7arsÊÓGÖ ”ç0Ü«n,¬1æ7©e¸­[·²eË®ºê*ÚÛÛÙ¸q#›7o&›ÍNvû‚ Ta£úÏg}Ô›ƒ2êe@ŠÚ}†çy˜¦‰ã88åŽwÍÀ´·`xÿñÒK¸›7ãy…‚’ò Cí/V}v¡¾Ÿ'‘H0Žãàû~ð¦F‹n¶må°µøfžçáy^°|<DzÆêAAAfQ·„lÖãøã=àø©Û몪ù¼úoÛ‹±¨Tb‘a¨ùÀ?øA\׺j¬Ñ0­nB/hÈŒ^xa$t…ƒˆùÛ¶mömÖ®UIÛÚÚ¸ì²ËÝ6A˜µäó°ð#Et1ݹ®[!dÅb1úúú*¶sþÍ7ã'¤Ói~ü¿ÿKÿ-·pK±ž‡vVËdTÿœÍB.O>ù$6ùÇüOþþïßrZx×ofÒé4¶mŸïû†A>Ÿ§T*aYÝÝÝçy‰D×uqgúJa ‚ ‚ ‚ 4!Z«¸í¶AÎ8ãÉ©Ýã°eÞ<Ö– tÖYdmß÷É‹d`L§‹QÑ¡¦&ÊcÍE=¬¦ó³u—çMb73Aޏh1]1U„éÁ²à÷fø!‹ò‚Ó"œçyÄb1ð}Ÿ|>„}ú¾O*•¶mº»»)‹¸®‹aìÿ·cé?ýO¿ë]<ñÄFÖ­;×=À¯~õ¯lÞüôöÞL:¦³ó‹ôöF6›Åqr¹\Å>,Ë¢T*ÑÕÕ…eY$ R©]]]¤R©@ Ô^wÕDCV«EAAA„ÙŠ8]¹òÞøÆ7Né¶_¸çßçiß'ŠV²Ý°4è„óÎi|T‘} ”øÖ90É¡"ÂK¨Ÿ1‹5‚0õضz[¢]—uQP!¡étZ½ÑÈfƒÿzžëºA¹iß÷q]7Ç|ßgãEñšK.¡¿ÿû\qŵ¸vË-›(•JضͿþë“lØÄ4Íò6S|úÓç³lY?†a`Y'žøžyf ‰Ä\<þá6sñŧ×}œŽã‹ÅÈår˜¦)^r‚ ‚ ‚ Ü{ï/xÿûß>ñ d³*Ô´P*ñ¾c×ôô”ggIE*¢êg¸±·‹ÒŒòç¡È–&ô|ó…¶êtv)ºpÒ©$âz{{Ù¸qcÅÌêï6lht{'…N4ŸÏç±m[¼t„Ç.õÁõÀ´B!Μ²]º®ä`ÓÞnñx¼¢‚µtÙëD"ˆs{õÕ •Ý“‰–e¡£z¹¿þëcÙµëan¸áž~ˆžžwKÈå:ƒB×_¿ÏSÅ|lûtN?]å›3 %(ŽôÒEç±3M“d2ìÛ4MÒé4–eMþ ‚ ‚ B“Í÷»ß]61ÍÁuUU±gð<Èåxä=ï¡ %ºi}£î†y(§ü9…ò€óQB[ºüY‡›ŽÖtƒ±‹%#2 ½½ÞÞÞŠJ©«W¯žu•Sužïûø¾/Bœ0£è¾ïW¹±­Êðûd2I*•"ŸÏb™9Íd2äóùaÛì)¿ ‰’>yì±ì2™LÅ[’j~ô£Ù¿ÿxV®¼ÃXƒa@Gôô(¡ TÿošJtK§Õ}Á÷ÕŸ¾Otw‡•ZW®<‡;~…ïûÁ±ÜtÓ¹|ýë»xÿû³¸®\ƒQ±^º»UÛ&“ú äóá9AAavá£ô)߇çž;yüp%Àé" ©$“P*ðåˆ{Åb±ösž-­®ÑЭ”s¼E—­º©0¥ÌX»vmP¨a6£=‹\×Åó¼@AžPÙ`A']¨¾ÏqB1Æ’¨þÏAyÀù¾O.—Ã÷}Òé4¥R©B¬r]wÌ䛋h{Ï{°S)ҌܧöôÀa‡-vÊë”ÀU,†B—îã‹ÀÝÀÿ³Êᵞí …°xÀï~×Åé§ÿŠ»ïžÇ‚×ñä“KøèGåž{6°sç¡$=¤ÓKpÃ0( ø¾O±X¤X,Ç9ômo£kÉ.>ýôÀKÐq …ž¾ÊL↡EDç…âb<®¼þÒéÉí#Šç©ßº«ˆ ‚ ‚ ‚Ð à×?Ìgüð†ñ­ìyê!ÔÚžVöÈ”L¢ÑR€z°t ÃN!ôúÐ9Þ¢”}–næF¿ ²yófR©ëׯgãÆAxY+Q,+ 3N‰èS©…B×u‰ÅbIåa:ÑoG»„þþ‹¸è¢N~ûÛÈdà”ïü{–}€ï|秬Zõ<Ï#›ÍÒÝÝa¼ó7óÌ3WðÏóæñžçž£»»;¸VµxþûßoçðÃ?Ë—¿ü W]µ%Öc±Ùl6í%(&“êó‡?|·Ý6ˆïûœvZ?ïÿ–,Qb[>]]‹©ß(S‚œÞTtšú-”gžGC p™ŒZoß¾Ãl‚ ‚ ‚ L5û÷ï§X,òÌ3ñ¦7µ{ý‡úSbåüàš¢eqû9·“x<®ŠŠ¨°«ê¡ÏB…HËß%§) „¸ÞÞ^ÞùÎw²eËV¬XAgg'ƒƒƒ\sÍ5¬_¿žÁÁÁF·µn¢ÕA sÙl¶BЈÇã¤R© 1~±Xlt³…YŽîµ–áºäPbœŽ,Ëb`` øõ|ÓÅê!6g÷\}5NyŸ>a•i§ü9‹ê³_Z<ÄÊ•›yë[¬ï¢¼ê`Øý>¯ ¯Ôi¢-ûèGUXûÞ½ËI¥„Ã… rÔ[æ“HÀƒžÉ¶m×P*•(‹ôôôJ¥¸úêsøõ¯OæØÿz/ïl{쯸暭ôõ /Î:+Å÷¾÷>Ö­»“¯ý:ÎûÔ§xÍk>žèîîÆó<ÞóžëÉçá¼óv‘Ífùö·Â%—ì ««‹ÓOÿûöÁÛÞv÷Ý÷"çž{-¥’K*å³bÅ #ô 7 X° ŸŸÿ|é´:‰„ú=c1%äuw—ÏŸ«<5ɤò´åi÷‡?œÐHSAAapQŽ@ßúÖ3<ûì’q¯Ò3Ïðïÿü¿êKÙ‹âô}§söóg³á¢ Äb1õ¦ý¥|TèUåå¦#yâTV:šŠù <áÖ¯_Ϻuë¸üòË+ذaëׯçšk® *76+Ú»Íqœ ‰½çy˜¦I.W$uéL§ÓÄãqI/L+å|pe«««‹¡¡¡@ ®®(ª=ã¢ß]×­+·áå+V°»·—k;;ɾ(ÿwP¢\¸ô+óÄ/Ûù·¿üK ÀFÝ@lBqÍE ‰'g ­¼ûîÈ2ú¿£—=k€N>ùz`SÐ.WéDB§@Pâ£Î1—ɨP×?ÍÃÊUóø›UkÈçá‘G¨ð4M“ë®39ýôu$]X¼H‰fé4üêW¿âíoÿ$øÃ ÌŸøÀåÄãq¾öµÛ·ÏçŸèSíq]ººÖpÒIÛø¿ÿ÷î¾ûvb±Ïç¸X´I¥Rd³Yòù<‹¿›o}ëm<ÿüÙ˜æMd³Y2™ –¯Hå`Y•E,Ê)ð}ŸeËðã¯l¤) ‚ ‚ ‚0 ,ß»—=xÉä)ãßÀÝ^|^üï™ûð\:óP€v»%åí¦ÃN„Å„–a>À–-[èìì&´µµ‘Íf¹øâ‹éïï§½½½Ñm×uƒªŒÏóFMoÛv7®^‘C¦’hႱò:Ž3LT®…išl®¥¶7²M(ÊlûÁ¬Yû^{;1”èf¦H}À6K­«_Âèm›åeõ_øÁçž{²¸lªÈSâL$Th§ÎG§µHÓ„We”x'µn»íB|zèŠ`ǘ0/ ‹á6wî|Šÿþï{yÏ{6ràÀxÕ«”˜V(ˆê–eQ*¸úê~v·¾õl~ö³ïªvzÉd’b±ˆišôôôpûí‡pé¥?áÄ?†çµQ*•ˆÅbضCO,#‹EªÞB©4€W®l¤ oø,]zÃ$-JAA„fc÷/sÊ)KY´èëõ9ùè*§I _äð .ž%R©”z>ÑmÖ1·Þ›9GœдÁêh'®ž À²¬ L5šè]¦ •g,Z3›ÍR,ÉårcÚ©öö«PC”zûìs“?©X§‡ÐËMõds9}lZ<3¾Å‹Y³æÏhk{—0OâœÈ6-7òŸœP¬Óâ^ÔÓÎN:é[Ì›w7'Ÿ|=Ý@°ß€!á}̰,âq%CZVèùªó˜Ö*Asÿý¯6Ͳ¬a!ëÑšL&C©T"‘HÐ××G"‘``à,>ò‘½´µ=H.—£P(aò«Wϯû7AA¡ùq}/½Ä±ÇJÿ‡Ç^Áó`}ù‰*ô”XüùÏi¶„ÙË\€Õ«WÓÛÛ;âB½½½czÍ5×uI&“BF*•"™Láªõ ¼b±(bœ0åh¿Lmc†¥UoFÁ4M …z|ÜÆÇ¢]»( TL³"ÿ]Ô}Á«šf–É«1Í9äy ߢó‹T m™O2™lô! ÓÄ\PÞ"ºBjµçÛÖ­[¹æšk¸üòËikùÝé@WDÔxžÇÀÀ@P€ªMŽGˆ3 Ã0‚Äì£áɶ0²Yø»¿›K>Ÿ„ãheÔ±˜Ž†‡rCC¼ë¤“†Í34-MÇ!” eDþôrÚãM‹Ù:ŒU_1vy¾A(ŠÙT }QO;í}g÷\}5»v-â–7|+²ÞEñ°ÈD©ü¿»ü¿ƒ0‚‡ª«÷a¿>ùd¶¯:…=ηN:©¢ ì’òg&«½õ<*«ÄBè—FyÝmýæÇ9å”A>«ty=- ‚ ‚ ÍG8®Ó¹ñÓ¨1¦ŽÌè(NŽ…ƒ›ǽÈYg Œ½ üØUÕÞ"é±t #ÇqêJ[$´&ó!,ÈpÍ5×ð¦7½‰Õ«W°sçNúûû¹üòËÙ°aC£Ûàû>žçFÚ …˜‚ZÓ4I$ø¾ã8äóy|ß.îª/ލð'õ`pÌ1·—?X–UWñ…édÑ¢Eª=5æéS5©– s‘õ2„â\ðm›çÿê¯8¢<_ç—3Q¢˜çt5YíëgE¦Ç G˜ÀÊO¼žûè>†þg?¹¨åãzç(‘ ”×]n{­BBÑ+×lkã.à7óçó޽{+æU_áéòÿ|¹}Ñ[£¡M•ÛôîvrõÕsé’·ƒ·82@AAhfb„/]Så?5γËß}ÂÈýa£ÆŠ¥òt]ØLÒ{\ì{|!Ï/ªcÁ,°ÀƒË•Ë@,£T*PÓ %4A¢¢ööv6mÚDooo 4Ù¶ÍêÕ«›®Rªã8$‰@dÉKÈ0Œš*rÕ‘ú¨m-$è°2˲ÈçóèfÛ6–e•«#Úø¾ï³Š  *z¦Ó½är¹@Ln4 ÷î­kI(‚AV%*G¯´ øÓ?å™×¿ž”§åªæëõíȾ4z@cP¹ÿ…O<Ác¿XÈq>,ñ˜êp¯ÉÅuôu:×SuÌú¢Ù¶/YLׂ¦q‚ ‚ 3G(œeËÿûFX¶0z*ÇwQªGÐ ”§£Dòˆw0áýß;†,ô… àB¼Ð‚2™ ®ëNK”Ð| ËÞÙÙY3œeYM!h\×Åóu¥oß¾E»va Ú3NAA˜Ѝç¿…ºJÀJ€Ë Æ¨:q•1„ŽÍãÆ"ÌK_·•5kŽy ѵqº(COOˆp ó'¿‰™Åó<âñ8Åb±fØi6 ŸË±x“­„Ÿ åDí¤Ó`š*D0™TÓ~RÃTÓk¡+[FÅ7Ó4ƒ°ÕpßÙ æd\JÓ¨‡öÑ<‰|¢ћˆN(o¢n@6êd£•$᛫ü]¯çU­ïEÚP«-zY7A‡ìQÞv1²¬Þžž§oJ9œ Ñ3¦]¸‹‘¶ºÀN8aÂ絑èßÇ÷•=Åãq,Ëj ×c«ê5ú·oªÐÉdPÍö÷îÝKGdzcn#Zða4Ìò6÷._ÎxÐùóFâÄOd×w0÷KAA?zÌCÍ,Ô¸u,9C?ÿhç “ÐYiªhü+wa&ñ€}Þ#þÀ-3†1ÌáG88h9!”Ç+„8ßWTοƅ\UÏW,B<®Ä7ÛVô>Ÿ,'7 TRÿ5¶mã8Î0!β¬@ˆÓœiš”J%b±Žã¡³õVm ÚÉèÞDá”@5ùîŠ*Ùò¼,¡àÍ… =†tÈUc_FÕ|?²@?;wîlt[ê"›Í’H$†U,õ}8/ÇöÀUq¨›éûJ€Óú]¡ßóá{,+*¡®£zz†{Ç9ŽSá&jÛ6é´ª¡˜ÏçÉårw\"‘òåóyR©†QŸŒ¡“ˆj´Ð¥+H¦ ‹Lè ŒÚó¬V.¯TdZšá¢‹5ÂgYc~4~­.êZ¶Öþôÿtù8 *=¨ô›)-æ-A‰¥[~ñ xï{ë:—͆ÉÈ^—¤ºúg30Z{9äy–/ŸW×vÆ ½dÉ’ºÅF]5KXÙ5šÐ`ùÞ½ü⬳8¤j]âAA&‡zŽpÔ8l¼Þg:dU×ÇQÑd†¡>+§‚‘×ñ<õç8êù»\ÃPÏÙÅ¢ú¬Ë<¯Ò¦ÕÙ½{„”7I”á>U„ª§hM§‹F ó¶lÙÂæÍ›Ý–1Ñ^gQCýpžpÁ.ÀÜü p¢ Éùµj:¬1à-è´”]:zÎiѤX,Ëåeš&®ë•M¢óµ§žïû$“É@Œ íõ¥õCí-f¡Ä®<Ê›¬D¥`Vo.-Ú5Ú=ºZØé­“6 Šl`KƒÛ?v•¨ 4T3™0Ò™dÍšÿáˆ#\–/¯O:Ô•JÇbଳX²}û˜ËiÏP]íU_WÑÊ®šååp×£*¶a?¾âŠÆžHAA„E{ÁEÃO›ð]wÓNC>¯œP@¥r²mõgšê{&£–q]%Š‹Jls]õ\œLªõGýõô(-ŸWÿ}_‰uñ¸ú¬Å¶x´/‹N£î8¡§óg{œuVëF>AI¶pá½À*gv—g€ŽðYp´|÷Âìf>À† ذaC£Û2&Õ9Ù>æÀ¿;ðÙŒò »Ã*wÂFmÑÍó†{$iO,Õ! ª³¾jáy^…»h±¨¶¯½ÞôÅíT@]X™L†ŽŽÇ¡T*á8NPd”PP$,m]*N£n,z¯&µCGÇC3¤}¬ö ëa9›Ê䦭"Æ.ÚÛŸÀ÷gÑ+Ÿ°wïržþº–­7Aî’íÛYtÜqc.Wíéi3za”½{÷²ðå—‡M_±cœuÖL2AA„–G{Áy¨Ô;­‚ÏÄR¼è|æ–¥þâqèêRÏ›™:Ž’I%‚¥RáòN¹P§ã( Âh1×UŸ #ôPËd*ŸŸ£ûÕÏÖñ¸ZW;³XV(¾EÑ^tÑy¥R¸ïÿû[_ˆ³€í‹UÎÐIÏså¸6Cá>¡1Ìغu+[·nmt[ÆDjøµ¹,œ‘‚´¥ì:ª±$,ך®E.=¿PPŠ~w7tu¬[w–eÑÑ¡®lVwŠù|¾ì§¦ߟI&“Á÷}²Ù,Éd’l6‹Æc¨‹6GX ¡–wÉÔ&mvlšC8œj¶nÝ:¦w¤0:¿úÕgxà#Ç\΢þAÏgœÁžeËÆÝ–Ñ*§._¾œ½{÷›ž:êð¾AAÐ…z&jµpÒÛ}åù庌øÌjz¹x&±˜Ë2™p=P"V4X2®ã8¡C‰…êüçj›¥’zæíë ·—Ë©gaÃP‚\*5zZÛVëZ–ú¯÷§¾ñ`YpÚi¿hÜ48À2„ARi¥^vww*Å•pp2`Û¶mlÛ¶-˜¸qãFÖ¯_ßè¶ Ã/»¹yÀÚ,¼Ù‚´­¼¥ªûíze¤7ÑŠžíJëypÑEÊ[0Wb<®:›üàr~µb÷<µ†în5*;AÛNqø‰×pÓï~G©Tâ?ﺋ.ßÇÎïí%îºÓé FQŸ7Y½ï˜9亽M£÷WªZ‹åË—sÈÙgsì±ÇVL ö왹“&‚ ‚ÐÂäQ Ÿ×¸XÎ+>Ž£µtZåo/uÈ&¨çËŽåÝÖÑnÏóÂçÕRI…Z–ú¬§[–Ç|_í#š-*šiOºÑ˜ŽÚq:§ÜÁˆ¼ä¢E»*g8¨€X òy<ñÄ :î`|Æ-U5Õ÷}Š(—äýÜXŸ›ïhzË •Ìd”Òïºpn7|"+pŽ©n¿_}*óÞw_ù?Gr_ùÕŒàt¼|¹*1wß}!/Zò¡<<÷ºÿ?¹ÞsßøâNþûÈ×ññ›¬[w7Üp7ËÎ<Š`õémåI&}#}ÐîÀ¦º/OÝ1Œ'nÝ÷}<ϱ*¨ïûäóy‰DÝ,AcYÖ¸ì±ÞË@Wû­—JïÔh%áê}ï?âÎY¹²1'KA¡ÅñPűzhÌKúhj£|>,X 6¢aœù¼ú³m5-“ׂ#€K"^l¹\èåfYa>¶êpÎz•2%Êuw+G”L&Ìã6Ó©ÇtÞ¸ƒµ¨œÿD/mmVÎ( ~$߇žæyñSNV€R8x˜2!npp›nº €öövÖ®][1oË–-ìÞ½›U«V±zõê íÃ4MÒ”ó¨Ù£wNÝݪˆ†]çó#w =(!N{ŽêŽ ç¸p·*w´w ÞƘ_™ËcGrZŠ&Øå7™LåÛ‰D–öÁÿçÀ_æa¥±ˆþ?œÀM7-àŸþéï¸÷ÞåÄãÐ×7ÀÆ/pòã; ·’/|a;žgò£ý;_â´Óöpþùê  冬]‡»»ÕgׅÉo<”‰8^‹E ÃÀqœ 8F©Tª¹œišÃ·X,†ïûUd£¤ÓiÇiÚ2ÍÓeÇ.ðLOiô!¶<õ q:Ô»ÞeëÅfxµaÕgD[f ò—4BnÉŽ§ª/„éf&Æ‚0݈ ³…FŽ+tQ†F‰p±˜˜ # ½Ô^où¼»R©0O^p6ª#K<¶¬áBÝhnÄ °c(A®‹0§âRpmJÝúP¢v0<8Ô‡/ÛJÀ3-°3ê|Iܸ è(/‰ O`kî¸c_ÿúGyÿû—^sÙìúûOã….bÍwqç¹å–[¸ôÒ7ó·{2'ô¥RާžúÎ;ïÌ™óJ¥ÙlšžŸx<‹m§Y¹ò\nºéZæÌQÂ\>¾qœ0Ç@±ɤC:æoüN;íç\qʼn$“ðÈ#óèéécÛ¶Mtttp oäÔS׳bÅF^õª/ðÿp(oyËÒé4 ÜÂêÕëéèèà…ndÅŠ;‰Åü±ÖI”ÿ<*/¤‘ªò˜fø¦ÁC‰w60•—]%ÞÕZ·†a`Û6ÎûÞÇÿW(Txœ½ÿýËñý×–½ÖÞïû|þóAWW®ëbš&…B†XÌáÖ[?Æš5·ãºoÅó9ë Èç“üüç—±gÏ6^~¹8Ÿ#\Ì?x .IJl V­:…L¦/pyÖ?›a€ižey\xá*~ó›ýè&ŠEõ¶èØc•·Û~t ú§oæ/þâäó—ðÉO¾Ë2)•ŽáôÓwñªW=Ì¿ýÛ‘,]ºŠ¯}í1\7O<žà?ø^xagMo¼éb:íØrH§_5cÇ3±,kİçFa£ÞÖÖ[¡uºÉާª/„éf&Æ‚0݈ ³…F+Ò¨ñsÉ¿ØìîVÏ^‰„×ê­›æy•ÑU†1r  ÑœD& NŠF‹#¡£¢1–Ÿn;vPŽo×_2/¾ø¤š8‚ã8X–E___½›f!óA¹föööÒÛÛÌX±bEÅ÷ÑX±bW]uUð}÷îÝÁç­[·ÒÙÙ|_½z5×\sÍ„;ðêWs Hý‰ŸÊ¤Óa5—Ñ0«þ«¼^ÕÙê0µ‰t3ñx<¸@«©Ò©«²êé¶mDZ,‹l6K"aɨù*f߇ë®Û‡i¾‡dò6<ïÃôöï ¢:ØÀ0EÒ ÎVyl9LìôýyÀO0 ƒ¿û»ðì~ðƒÿˆëº8p ï|ç9,]šâÄ¿É/~±—žž^xa#‡òÝÝÉx|ó›mÜvÛRN™ÆÈÎé´cŸéIˆ*4]½«£Ñ )3’Oe_,ÓÉL)a:;f Wx(ac*"+Óé0¯›áŠÅZÏ,ì͛ï!•:cÒm¨Î/<Õ¸®;)qKG"†çy¸®8y†xŽé\ßz_:‘öêÒëëÏPYŒÀ¶m\×Ŷ튼â“m=L·;(Ñø§ÀÂ…{ËLM¹Ó3 3Ï|€µk×Vä­/ííí´··ÐßßO*•âòË/”‘¯X±¢bÙ±xì±ÇX¿~=6l.ŒíK–pÒC‘ɬª«Mºr©m«ÏÅ¢ê\§ëecŽð:3˜Ü›Û¶Ç¥W‡êéB†aàû>ÝÝÝX–…a¤Ói à “ÉJÿtv|#dH¥RAç»aÃ-ÌŸÿ>ò‘K9ê¨gH&/夓¾L©ô]âñNb±‹-bÏžàÀÓÖÖ©¶ã;v°~ýz6mÚ„ÉÄJy ­AmÏ×j´çñc=6mmÉŽ'bÃ>ú(?þñéíí Þ –-[øÎw¾Ã’%K¦eûSÝ?ðÀÜu×]œ}öÙ#zö 'ëׯç±ÇcÙ²eSžÛjºÆÆ¶mOjÌ.Ì.ô¸bÇŽ™OÄŽ¡r|<iT”ÒDèîstC詖χ•K“Iõ?‘PŸŸ|rO>ù$O<ñ—^úkv츜ßüf>O>ù$ž÷Ò¤ž•l”H3hÁË÷} à ›Íâû>¾ïÏŽZÔò<¯âÏ4Í@ó<Û¶)‹¸®x©ù¾ˆo‰D"x†õì–L&1MÇq‚ýZ–E¡P¶ £?[ú¾O6›Å¶í 'žx"Ÿüä'9ãŒÉ‹ž#1ãcPƒÚãÑ^ÿúoqXïaSS-´7nä®»îª{|Ó×ׇëºA6ý&¢Öò3ÙF½ß›o~gyêਠµ»ûn\7M<'“N§9å”é­@9•v¼råJ6mÚ„‹²n¤­múÏ­Ðêé:;;Ù´iëׯŸÖ¶Ô²ãêTõp 'ðæ7¿YÄ ¡‚µk×ÒÞÞζmÛ¦mSÙŸzê©’ _¨É¦M›Ø¸q#'œp´lºÇÆ‚Ðjã ÇÇ#‘-ÿ¯—͆ÅÂ1´0§‹¼ûݯðÉOîÂ÷Ž?~?ÿôO§pýõça³hÑ×9ë¬Ãùã?¾×µëâ´Ãƒ´|ßç…sÎá߇_ýŠb9ay<¯¨–©½Ê´À¦‹çyžG&“)G5©g9Ýäóy2™ ñxœd2ä‹ dúYKG[éí'‰ GÚhýÒÀÀ@PÀÔ3¤ÝjQÏyÒ!Ó4±m›Gy„_ÿú×ãüÕÇÇt]´V¾ïó¶—ߺAfˉ‡îèè°ÔYȆ XµjUÝãã@ˆÛºu+×\s W]u«W¯fýúõ¬^½š­[·ÒÖÖ†mÛ£bׯ_Ogg'·Ür mµ¡³³“-[¶ßu|öxÙqÆœö©øÔ·¼®`#„¢—ö’ËW¹f5kµÛ¶I&“ôõõoq~ÿûßOë>§ÃŽ}à8ú^~¹éò› ³“Zv﹪ÎÀhÒŒž‰D¢"†Þ²,¶oß>mû›.;ö€'\8âˆý3~…ƒ‘ìx*úâñ’Ï«¿D‚ :t2©ªŠe³cW¿N¦{L!4Úƒ¥Ú9ßqÔ¼bQ‰÷:Ñz« v,Ìfr\QdÄ´Z#¢=Þj9`è~Åó<òù<Åb‘x{Ú¶?vìÛ‚§žú–®R^kˆpžçá8Žl`hhhèÊ+¯ºîºë†¢\yå•þßu×]CeçÎCwÝu×ÐsÏ=7æ²ÕûN -:çö¡Ri»f Õ¶1ÓLÄŽí¡¡¡¯•††/þ÷¡©kLnhh(^þ<044$×GËÐH; ßu×]Ãî õÐ×74”J õô õ )ûL©y©3‡†`h¨ïmCC…ÂÐÐT^ÂÌ1QÛ˜*ÆcÇ×]wݤÆ/­H_ßÐP©¤®¯¾¾ú–/jÏëé©ünÛêúÖ”Jê{<.oÛCC–54dês_Ÿj‹^FïS·/‘·3VŸ=¦‘Ú<i“ B”VWŒÖÖ¾!5f =¾ªì+jõ¹\nȲ¬¡x<>ÔSÝ‘Í0c— h;®¥†ÔcØš5/ ~úÅCCfyF¡PyÃêëë*‰¨1kÏøx>À‘G9L «N¢9888)Á/Z©d¼øÀüÇ—‹…Ðp&bÇ>p¯‹/ûí™[þ¯_Öx@7*]…zåâ•ÿ ÔkC¯¼^ÈPéûm„[Þ®\G5“é‹ëÅ4Ëo¦} F˜nÅ{.KÀÜN?t =‰‡œ0>fÂŽ›™tZ]g¶­®êë'Ÿ=Ò,K…tÆbÊ‹LW Œ²Ò^¬ñ¸úìûÊ‹­œÂˆx\yª†Ú†ž®ÛÒÓ棆‘ÃÄ¢iq´“€a¨6ŽæÑ%z¼®[3ýNËp°Û±0;˜*;Î2z‘IßWý˜e©ÿž§úð+Æ×žçEr¹œ„6).•ž‚y๳ÎjX{¦ÂŽ{z†øë¿î Ÿç\w˜ëv´`¡pp3Tlti”Ò¢½½½ôööŽš'n:é íå—›:AF#‡öö[G^À’(Ÿ|ý0£E·J\3Ëßm •ÉÖD qTˆêHõѽòv} t•U·m¦ñ"ÿçér{’>9­…®Få±0çJüÍ£lÕÛ…Ô3@û ç|õPͪðÒ¡„×CDïGÚTÕvg„kƋڈ_{»‚0ÝT\ eÒie·Å¢ ï,§ª@‹p¾¯D¸|^}7ŒPÓ6íûJ8ÓÃ@Ç vK%õ «?“¡çóêºÕÛÓ›e¿XV<>±üo™Œj±8þuAh|Ôv¤n@‡³§RJxÏd —sÈfÓtttLª\:&‹aÛvS‰pëÂC ±£I 5¼+–ÿÔ£ÌÑ>Úè&OŠ£~TÙœ~©¤FÐÕka>ÀÚµkƒÆþð‡+Ô`Çq¸æšk¸üòËVÑéá¹syßÚm@c„@A˜ `‹í>³ˆ(º¡'\’PĈG6T«ÈŽ^Ç.ogIy9»j­µÇPwF]5Ûlc&È–Û¦ËzçËíɔυ_> 5"›ÌK#§ê<ä GCFyÿ™È9HT­ë”§kA”rû’åÿúœjŃ̫ËuÃ`Ûwûsø¥i ä/ƒÄgÄà|¦|šË¹ òyß±<¤=0~þN0Ï„’~7¤3jñ¸¼ûÀX®ú=OýwÝÐSȘ§êÖ›”¨`JTp]õÝ¿öµã1Íûm‚-C6«ìMÛkW—:—Åbí\gZ\ËçCQKç\¬öëêR¿‰Î¯–J…lšj‘˶Õ:ù¼²•L&Ô‰ÚËWãû¡à7YjyõÕK&£®é¨X(Bk‘gto8P×zxuI&“†äÚZ²dɸ 1Ì$Í!N/ú}¿Rè¡q­¡üHNÌ·=ýt£cR<ùä“aAd²æIWàm6fžù  2lÚ´‰t:ÍÅ_Lgg'mmmôööJ¨«UÈa¦8ä±ÇX·îôFŸ+A˜0?úÑÓ´·—m8M(ø¤ ¡è]Ú‹L -Ι‘íjqË&ôõ·#˧ Eª$ÊÃÎ)ïÓ§vxëT‘"ôäsQB˜ÞwYtBh=BÏ9ƒá5íõqø„wùDä¼¥ËûÓß³¨»¿Uþ¬¶®ªs­×Ó£ §ª ZœÓžnc=^ Y8lßì(<£ÇÓ†÷¿¼;á^À9ÅÏBúÈ–À‚Ìw#毯òol®„ÿ3Àj°¶”ôÁûIàI ¾ vœ‹¡øMpç…BAnxçBü60¨i==(Û*‚Wû±ÊžnôƒsØ)·Ã„¿tBù¼åƒåå3àþ¸ 8G‚ÕApÍflÈÌàAà,p¾ü•j‹ö‹ŠfZñ<õ§ÇpÚóH j®†jL3å÷f MZ–ë¯?™óÏ_¬îZúNÕUþ_Kl3QÂA¡î](ôÝÏ¥2l5Šö´Ë¢T³Ü¦|d—úD¥ñà—Y{âiË"þ´ð–£R Ôb›þÓ⢇ºûkaÂ-¯¯¹D(¦e«öUýðèæÚ³ËÛÔç ]>?úÞ™*/§·Gyñ™TÆVhoG-v«uÎ:¾q90¦ŠbQý¥Rðìb¸à68r¤¾ÙK!µ’7@j®:]Å9нŒ½{q-¢€ð÷´À,¢Ÿ)ŸS- ?Ø©¦¥lõ›ØïzÀ+€ÿvÈžñ{ þ/@ouƒÕ>0ÁŠŠÜi°-BÓ×'jàß·¸2òû`]Œ°ïK4µ—¤‰»"/·#¹QŠ畷ßW^.­Õ΄Û3L°S£Ý¸]>†ˆk4͈i†^lZ˜4Ía»*ˆÎ‹ÞcõúŲ8gðþ÷?ζmûfÂÔZ’|^ýÅãêÜE"ý¹zL£E´lV]Wú7ˆ×óB†Ñçj, &wŒ¶ CCÓwÇC*zJêAh=o¸‘ÈÞ=kù„kKßMµ›’?FK]0R(«SÞÏH£ ƒr)ïò2µ^Å#Ëi¢wö¡÷™Åp¡Òªj[t[Õ# èçxy»º}Õç^—¾±êíêœ~}T¾Ô9›´ˆByZA-³òÒ•cý‚M±‹Çáoï§Þýÿ wÞ ©o‚ñUŸüˇcÿÃ?@&CrÒ®K6•Âòù<þ»ßMêˆg!n†IBº ‘h1S+Ñ^‰ÚcÑÓƒÔ•` ÞõòVùwH 3·üYÛK¼üûhÏÊ8¡°f¢äa²3²m¯ü»jaÖ!Ìߘ(¯«!v\ eÏ6E‹ïZÖ×"„ÉstñÔqáÜ.¡w­5z´¬OŸWQ{w–EÄÀCµlËñ7…ž§ídú¦YBüËÁyÊç•€ÙÓ3z¨dT0Šz•ÆûFÀ0ÔƒzÔûS„Ö :‹HżòýL÷¥Ùl¶©r¿Õ‹¾Õ6K÷¤ß;jÑM¿¿×í3©?CŒ~·°ûyÝvÛ ýý©1Ò'MÂQ…(ó'¿‰éÅ^xáé®F7EÆâ.º¨Ÿ‡¯?YÝõô×hcˆ:^)uttÐ××7òÚû«ÖÍÀ L$ •p:×™â(¬ÖÁéka¾»J,Яϴ(`ì»tµVk¾N81^tåÙ‘æA(|TŸÿ\d]3²L6²nô§H”Ï_4dV{ZYpý›¯g›hetXÖ€Ü`ÃX÷¼üèVŽzbYJ«Vqعçë<ûío“(•H¦R»aɰÎþ(vßk WOÑ}åó–„¼ôÞ–z[ð]ç–ÂÑ{ЇBüôoèçåÁ—9À{â0–u-cï'÷²ÈX<é+o#M(fiqZÛ_ŽÐ‹Ò'´w3ò½§Ü&-, _k´¨âæ(,û€»"Ó‡¨´ý¨ šT^¯ÅÈv´—\pT]  ££êèçTy=1z}XåãJAâH¿¬7Š|ð‡†˜]S ó±õõ©ÿ:ÿžëª”0©TÙÓQ˜QìƒÙCZŽ·¶]ym»®‹Ý¢{£Òóëa†~Ï®Å7ýî[&Š~¯Ùš¿ÊÔñýï÷³`AçˆÏ(:œZ4M/Äœ¶âIyÃ)´,póͽ|ð‘×N(™>Ÿ'‘H ÞF«t.TçNª—ÓwíÒ(Û‰zÎUû£§P"‡G(¾MÅËK-.LÄ#§ž>Ä@‰+U¸¾‹i˜Õ?àhçW‹qz¿³ÌËÅu!—‚X?{#¬»NÝø]Ž|p­R%ʃ·ww“[¶Œîîn¾|ÔQœüðÃlúò—yË~ÀáÏ?ÏœgŸeÝ׿ÎU÷w|ð¼óØþì³üׇ?L±\â¨o=Ãá{öð­×¿žå{÷â}ÐãŠ/™{äÕÛþ”oßóm6:ùWã%æ|ìn¼ÇßÊêÝ[ØÞµ7Üûî?ä~º>ßÅ]çÜEß%}œ´ë$wà‰'nåçÙ=ÄãqLˤèq²¶k“Éd†‡gÛ*ÇGçžNÚíö Ô5 ®p#!2ÑùåÁYú°oÎ% —·_Úc¶#ÜfMºÆ¾£ÇÔ¶a}àÿ'¸7‚ýv*óP¤B\6«<ÞL3Ýt±ZE„™Å÷%Wœ ´µ†u:¯f´Oõ<³EµóúL8”ëwuQO·¨P6Õ‚Yu ËÁÊYg °wïoà´«à¾á­æÅ)L?sÝ€±pç|R£›!“âöÛßÊÊCWNÈ'Ýó<<Ï£T*áû>®ë’N§q]—|>Lì–N§ñ<ß÷ñ<¯ævê.—yu“`®ëR,Õ¯†Î•EºêrÝîH¥Ëèe}ß'].!øØÙÕ|¥Xëx£8åØ×uƒýº®¬§÷åyY'|Öb>6ß÷ƒsžL&ƒí&“É`ùèoR‘¯n–Ý{z¼pë`­…¾%9º¯P +™$Nãû>Éd’T*ÅÃù—ì\¹’Ûo¿x<Îó7ÞÈߟ}6/\pW}êS¼É4q |ß§§§‡Ÿ}õ«üü oàÞû^6mÛFøÄYgqø?ȳ}Û¹ÿƒ÷sÁåL$yý;ÿ³žûgþbñbÖ¬yŠÝù1V=¼Š5/­aîÂ¿ãø§þ‘³ï¿‘×-ûüç‡>DÇ_þ%ë«=”t:Mww7–eñÿÖ®åü›oæÖ+`Î~÷ýïsDzeô/\H,#ŸÏsÁÇ. Ãë gl¼s?zýëyrÍ~ð¯ÿÊ“kÖÐcÛl¼ürb±X`øî¡û1-°/#¼NFaDûŽæGœjôvτĉâm¤Ñ9ßJ¥0[½9Ü„éGW ÖUgAh~je<ÉfÕKŽ(ét:¬HÙbL—|èf®Ðè|m ÔP¾DÅp\˜\`áÂÇy«÷[¬=àOÈ›:¡Š¦÷ˆó€g]Ýèf„y±,µ=Ø6!1&S~µ¯… x<ŽmÛÃÞ f2Ç ¯¨û³çy8Žƒišu»ôW?ük±Í4M,Ë"›Í’H$jn×4Mˆ«6»‹ òù<¹\.pÍN§Ó ’É$ñx×uÉd2tttÐÓÓC2™$‘HT$7}ã‡ÞH__ét:h‡aÁ¶òù|P9+N“J¥ð}Ÿb±ˆmÛiYVp,¾ï“Íf1M³â& £ÓõyÉD\.âñxð{ëéב³L€Sç \¥ó÷`^Ö5júÓx§Ï<“®®.r¹Åb‘DYXK•Ë8þǯ~ÅSwÝ…õ®wç6ã.YBö¿àqB'JãøãáŒ38<“/~‘c ƒcËóÎŒ´çÜ øÅG>³/ rÈ–-kÖ”7 þ-zîK´çóê)Ý4ùnwwP.ò ß'W¶‰ wǼy`Yüü¦›Ø÷ÎwÒÙÙI{o/¥òü ì¼ïÓÿÉOrÛÝws÷à V:Í‹ù‡ÿô§ìyøa ßý.Åb‘îînÚ S>q/¿ðŸ˜›–af³üèã?â®ûïgå±Çrk{;GìßÏ7|0°y-tëö™¦Øp<Ç+zøÇñ}ÇqÔ¼²÷€çy¤R© Ú²,Ç Ž9‘HŸõµaÆ ¾¼Ñ×Xïót‡ÃضÍå—_N{{;‚ ÔU4\¡rÃéš["‘˜ùµô‰HåñÁ 1ï¥X¿i ß®Ïåóö•„IJâR¡ âòÀü-Õ‹p ^#æÐ¾CÌyÛØ³îÙl–p8ì=Ø(Á ŠâU©ð¤Ä³r‚T¹A‹ò˜ÓI§ÓÞƒ£&㺩Et:퉒A”%“IÏSQ¿ù6{”Öæa¥éó‡pÎ]Éãÿö¼{íR’“$d†=¸ý[aŸj jP~öÀܼ™f©ê:õWœu6œrhFe’Õª@–å›o˜H¸O> Ãp݇¿8z”÷g2þâ0¶ÍwÜÁ¦M›}(k†ã¸‡%i¼Y)ïE],Sž¶*eérõ^…á«Ôª¯×ÿC1¤^…ó«ÏÕöÕ¤M,ó<¤ ࣣÃû¾až§<´Ujå]ÙÖÖæyd‡ÃaOŒS“Cù|Þó¼Tûëëëó6%6«ö†Ãa/%ƒòÚÖC>©Ôéî†tºè=®î#Žãpýõ׋Å{’ašRZ¨!•ò{«èŽ© d(ÏuwV^n™ÂŸ…¿’i5ñS2JSqdÏN=õŒ±ÖÕLv Ó‹@ qû{*9Xñú*G”mÛÄãqR©”/tOê‰ <Ÿ…í‡ͬ\[~¥| 3ù–e±ï¹çþã?fÑ'?ÉYoxCqÚoiˆédP‹òòêá­-•³O÷î˜ lµáø2ˆüèû¤n}/ŸÿÓ?å]£…ƒžÅÁy×KÎ)üÅ Ë,í¯RµDÏ©Ò(Ž9±sú8vùÒ·¤@2©â÷ Éÿ¯4h»m³­§Ç|Ói.}è!ž{Ç;ÝòšÉ¸ƒ¦Y»pÔÒê*4^÷êR9-ÕkÕ§ªIFµ S óVâ–ÚŽš”Q^cªŸW^jªï×Å:ýAõcjÂÆqÏ£ÕqÏã×—cq ª‹)¯b•f¡ÜwËyG—Þ#:: §'éÝFÕvÔzÃÃõ9Ñ‚ ŒIi*]Û.æáÔE¸f÷‡‰;¹¨r¼™¸c‹ñë£ ÁE ª»÷¯`Æd¦Ói/½ èZˆ³}CCÌšõJÅßѓҫ›гÕhŸ›ÚûÒäÈJ0,öÜêï¶ þ™çXæÝlü¹cí{jv\ÏcãzªíªßªöŸ-¼¶µ6T„º¾ôÜ|åú‡înžèïçwSÐ#NUÜœÌnV Vj\¢ ï¨{c*•ò öjœ¢ÝQÕ‡UîE5Ã>ÖýµTì+¥4eÁh †aÐÙÙ9"ÍišU{©T;þŠF£Äb±û®åÕØÙé>à—6áŽ;î`Þ¼yÚ¶ “‹r¾V}åæÍ›§Ì*Œ›ÇMU0¦jbø·³ž„‹G~>¼9…Úh!.œ}ÊóÜðÖ» û„Ò£­”LØ9ÀùÑóùÐ;?Äùo9ŸX"Æ–?Ù‘!}_š»>q‹ÿq1<ð‹»3ïÜy$»“#.%\†áåÝ|¹EôÊsúLMi•$_‰^ê¡Ü\• L%vWVgg§÷¿¯¯D"áU›ìëëó*Ý©0ŒT*å  Un¯ÑÂáÔ̲òT9ÔC€Õgªý*YOOoö]½Va*¥ÔÍS…ŽLÇÎhUÛ£üÑ}¯/>]«êª*i4J>÷ìcTa,›uŸ ô‰ÚF“ TÓ…©fïÿœ…¥mûxõÇ?&ùÉOÖu€¬ö¤ŽªÈêUÄâ…åJÈÊ—¬§WhS9X”hÅ/ˆ).…+È)AÌжe¶ïà ` qÖŽüþФ Û‰kûÉgüÓ‡Øõg­üÓ›ÎaîN¯|fR S‰CCC\×ÒÂÁƒ˜5k…Ïõö8…÷?xæ.¸àZ" Ìœ9“9‡ñÄÌ™ÞwÝ·ÿ5k¹ðÜsœ7¾ލDÒ×jògË–-u;ïµB ¹J´Œæ]Ï©Ò$áã1–׫ªš«B/•÷–ªÖlYÉdÒç©6›7o®ê÷MÖÄÅXyka¾Â=Ųü9§é4<õÔ9HŠ8¡™P¹kyPOÔdRñw‡³j¢ªˆpàÞkªëÅ…f"ð üƒx½ •h!`ùÕ[8ë¬óÜ7I\1NM'€ûä‡ó¿{>Ûÿa;§|åÞÿìû!+>°‚Ãÿr˜ø§âì ïáš;¯á²žËØ{í^fÍžÅÛ¾ö6œ‡ó~qÿ|×?óÈðñ§>ÎÑeGqþÂõ¦Ëd2d2/ÔðB<Ô‰àôU,𠼕×Õj°¬Þ—þW¹yJ«L*ôqµ-] ÊòõJ“¥¨Ïôm«eú6ÊåÓûëûŸÎ¡ÁWm¿ª8Ú(ãAdg³ÞCû¬PM‘HÄ«ô8a2ÿÓg“ð„Ég k¢°nùãüõí·“l€·k*•òú%}¯D5%˜AÁk.Æ)„3ø=ÞÆË©¢Bb×äräwîÄ´,ò™ †i’-TìÍtvbÄãd 7—ÕÖ­®0³?_\²„µ¶’Z¾œmÛ¶‘J¥ˆäóüãéŸàè¾—xí³?äq®§­­dO7”L&ikk#ÚÓC 8yûv’×\Cì£õ¼¼ mBÅ0 Ž^q_lmeNk+w=öáÓO'ŸÏ³$&™LÒyÓMÜ_˜4z÷¦M|£½ûÐ!–?ðù|€k_óÞÒI\µ?Ÿ3 ïx>¾bÜÛnnÉ¿ü2—_ýêW¹ñÆë~þ'%ŒªsmÛn7Z©ãU&“ñ&ÆJïËžW£mÓÓÓÜå>™Õ–Y”‘RÊ3Ý;õÏíûFÏýç8®GÝ,aP“æ¡õñ¾ê[U¤‰ÎÕ5Éd|±Õä¼ÊO¨žô│”úŽ^hK-×—é…Mº»»I¥R˜¦I,#F¬äîìßú0—9—¹Û¶Àv\/¶ñ.=A0Œ> ­„ºRA*°ôIDŸUáµJÐTâd$!‘HŒ8žÍÎÝ·¶²ÄÜÄ\0ê:årÜTDO;¥ŸÍB[›û©Ž]5Âg!O÷:Ÿw“$åóEq.õ?&éZg6VIDAT¡ªõ‚p®‚Ô–)ÌV.†ïÿÞïâ¯ì¤aŵõ‡Ý#9²|ùr’ɤ¦¯òŠF£Q¯_RZv:Í-—^Ê̇†‚¦¶©úíx<ŽeYe'*¾ŠûÑ °8ŽÃçß{œcŽÉŸ™®WYOA„Ó½Œ½‰ð캜‡v$ñ¼­zóyÂï)º@¤ßÇ>ßÞN ø«™3‰|àD-" l’á°çI˜¹ì2.ßµ‹KfÏfÙ›ÞÄßàj.ç|íkðoÔÕ&åÕ˜ÆÕŽÆë‚•V5¸?_Úµ‹_/^Ì«‘3.¾˜Ÿ¾ò ³gÏfóñã,=|˜Óöïç·ÃÃì{ÛÛxýë_ï}W‰Áy€ƒÀN`û§ ÅêuDö2ðuà"Èç ÀìbrþßÀy¬G ë^¼¬ÿãV œ(8óSp.ës`žÙ+!?ŒA° plÈ?ù6à à$`†Û ó+`-ë pºÁÙ á®[©Ul¿®óéáÒúqPÚž&®ç*}­Â€ôïª}è¡Úà×Õú‡Âðé4|¬ÂIxù?·gÕÑòš](ª„Js5«)ÙlÖ7± x±XÌKêÊ¡èªÄ3(îPÿª˜ˆiš^Š*²|ùr¯p‰eY¤R)_*˜åË—ÅIæD"ámSQ@’ɤ7ÆÕÓ΀{OPíÕƒ—¶ ä8g2/”½»»Û¶I$†ÁÏþóF›EÕø ØÅ!f,›rÂÔ'qx¡;»W2G=S4 ch!.üî»oļ×tËç©L–qÜxÀr=‰D1t"Yø|ÿ_Ó410!ñ¿ˆûîTÜN ,ÓŠhƒ†ÒÐØ‚gÞTj‰,¨‡~ÜÔ Eètvvz¹M+X9r„+ÏúØg—ÇTˆò„‰FÝ?Ûv“à¨\r}}~aJå¦+ jÉçÝ'OÓt¿3Ú Çq\1®4< _ˆã ‡Ýí†ûÚqÜÏâqÿ“m*U ÉF‹UU;JÃnÓ颸fÛîç¥6Nû«=ª}[VñéŒj&S<ª-ã]ÃêØYVñ79N1Ž¢ôû¶]ü]‰Dñ|49{xÙ€¯·|¸Y›Ei^+õ¦ _(1mÛ¶m^Ÿ’Q¹- §ð`¥BêÕCŽB¥P æÕÃL¹°põÞ¸áoYéúw” Ÿñ¬¿d¢æ-ÜÄxžvš·¬tŸc¡{Tè©ú* á1)|%ËÊíÙ¶oÙÂß¾îuœ¯-¿ùÈ»`ôÉ…f@yP&p‹ê8Ú¥¬Âš3Úë+vïæ¥gža×®]lùÒ—x_{;×-[†“ÍrßÀuà Ù,¶m“+1µö©uÔ}ÛªéÎmiîÄ™a¸4\V»ànÚ¿¿×|¨êL&Cww7é´p¯r«\‡*¡ÀÂá0™L†X,æ¥LQc:½³mÛ ÝVý°ªî®ç#†bj%B©{ƒ.Zé9Y3™Œ'Œ†Ì”à¥ú÷RO¬Òéd2éý>%èé¡ãápØ—²e¼ñëhe¹‰{Û¶½ß¤„ÀT*Åk_ûÚ†ÙÉDÑó²ªC¤¢‰šuÌ/L?`×sûxâÔ¼·Lw3#„± ´÷Š< ¬ô‡IDðÕº.½V„_õ#/$îÍÁݼîÜ)sm½ #³‰ £¢?ÀêÇ\Ïi§˜TÁõàÁlôϨš÷¯Épþ?m†®òbOww7‰DâÄgÿFK‚£P‚•Õ £X,b¬ÜC£‰T¦ézäå󮨥{Õ9ŽûZ+¯5ÀXÌ/J¦Ó®¨ –UlŸaKêmU"d9òùb½ÒãTx˜À0Šâ¥:&¥3WÅÏÒi·íê÷[–û¿´ ¶í®ÛÝí¶{¬óÒDüWÞpJõÞcQjû‰DÂóð²,‹t:M$ñ=)áZˆQb›*ÐS*ŠÖß”û\§V¹i~p{;×ÜùsrÐZ“=Lð@™ª¸wlßΦgžitó&D^ûoݼ6ßÞ sðRÏzíú :ÊÃÅ0 ¶õõùãXŒd<ÉëŽ¼Ž»çßÍò«—³âñðp=„gJìXük±Map½ývz&DZ ^QøŠÉõõ–¸ÿœSܹL=úôD‡.á%ÌB6ìnË»RlŠÕDÆ A±r-«æq;Àh½p׃2’Ä–RVy+3w]×ËL l*43bÛ6Û¶mó<•÷’šPµmÛóÚÚ³g7ÙÏçéèèðS©ÊM@”.3M“t!Ô¾ÜøD¢MTv=‡s¥è÷šr÷ŠZMÒ—Þ¿Ôûµk×ÖdµDåèTs£†áO?!Í€l¿÷[™|®d8©ÒWˆ'”#ÐBÜoLxã¿ Þ tB:âàuötwsy?¦ò´±í¢LNëèèFaŠFï¿¢Ñb^» cšµø@Akà¥ÿû5NÓܽaÂÛRUäâñ¸—ÓGyTL—p”w_5~bŸø†„ ¡"'M\]?™„H¡KQâ„ÊEeÛ6›7ofIn güËnœ£zÂO2ºà–H¸}•aø¢Ò\˜PÌAišîwòyW=Q‹úz*7%CýU^P=‡¥"UÈž¦¾7Z>PÇ)†áC±Ýê¿iŽÌ·©òfª|žÊø :‘Q Ù¬Úï%â³ÝuTåïrÐ*G©‡©œ¡à~Wë©ã–*Éç8î„SéöÓi°m.yâ ö¿ûݵ3¸ ;¹;êT„!‡T6Ëý?Î.Ã`÷u×1ïÅɆÃ>±É9óL¾€;\]ˆkîJHR—€lÜ\‹±Âß6`9EQ¬§°n†bxt¿Ø/,‹á*ëùÿ"ó*!J gj}[ûžÊñ¨,0^XÞIÑñàä .àÉOšYÚ6Õv”Шڭ„ʘ¶o‹‘9•x¦Ž™^”@µWÿŸÖÖí,üV%úEµ})QÏÒÚªºvïn€¥MuT—¸ãØ^>AAh6vÿ×É_´Ò·,‘HÔÌ“W˜VˆË‡ž?…ŽÃÁu#Gº¶m“Édª é³,wÊ%›-æËJ$ŠŸG"#¼ÎÎâ°”<ü2 á’ï$uà%vÙn•1u¤gðV6&Ý»­ÁdÓðWØ\€ûÿU 8’=p…Q•ì´á¹„WÅÌc—öúÃe~“J¯( K+q5¨ËÔtj©˜ûöÐÚö¦Ë“ÉdÓ„Ò ¤5†-[èûÓ7þ“ŸT婋żåù¦*ÌMám‘Èäyf ¤XÛEïRÒé´WÜ#Ò×ׇ‰éÞ¯•¹†÷>Ú‰¿˜L1æ¯(š©B6J,+ ÅÜñˆè”P5†—/ϧ¾¬\ߨD,%tuvºTi¡ÓtÇG¥y5õñ^E[ýÎÎNwÛjüdš`e]õ ´Ÿ‡Ý|™º¸ÖÑá¾.7æ¨Ö3¦œXîñíìt?Ó¯»ÂØèÁ®.¦š?œ:Å==ðï}ŽçŸž÷ÿr>OŸ{.¯>ÌÑ}ˆY³f±ì?þƒÇ×­£w8º wˆé2ÅšRNoá½ò:Sßí ¨QgqE+ ëo¦èEfàU¿La[“éCž¥èu×F!#‹ÕŒ‰RÑ”j†+œÅ7¹EJŠÕRðO´+M)rº‹^‰ Y8nVḕeµ>?§°¼Ð¦·l™Ä#W{”ªæT>XAh6àW?\Å‘y?ö-¯¤R´0½ ¬g3öïçâ—ƒŸñyF¯”X-*„µÆÊU¨®ê¡„µžHaê.f´X)R%ŸWߊ7öh"…[ïrÜ»ð2®2áJÃAÅžS7ï%Ìì+NI*úBzozMýW}A<î ëÓjÄ¢þ«©Å:áp‰ ¹˜Wxý¾ÛíU’%¬MÃÌ„Lø¥ÇLØ“…ùœŒÛ50VÇn°p!œT¾ãúË ញŸúz³' ËØo?±° •,°÷±},;e.K¯^ZÕw#‘mmmlÞ¼Ù›8©KNÄ #×~CÐooàÏçó,_¾Ó4¹é¬³ø‚u³®¼ž¹¶ ŸNÁï›-(q… <Ótïíá°ÂKMúUBwwyñ¨åª@GéD\<îþðrÂÝhy5G#/Ÿïó¤ðØá²j¿j PëôHdlqs ‘ÉÀnÞ‡µÝ!Ù}O>Éîë®cÎΜû¹]üzÍõì¹ë¯q~ò¬çŸÇ¾÷^¬PˆHalìõÎ*±œ®V;q•¶t,™É7 ¬… ±L,ðj§^ñ$ Ó‚¸æ"f=› ÛQ›/T7Ãþ ]%`©‹yœ¡|X­bCÔ*ðPª 2c½{ØbÃi&+€žƒÝ®PÖ[ø¾•‚ïgáyÜÒâ³ ÷o‡ Ÿþ2ãÅßpk¾ž¹…1ñ s†AϪvŸù‘/ü=¼í?ŠÅâ¢ÀMiøZ^cÁa æ;ðjÞ­žÜ\e1çМF›_U(Éß¶]SêìÌN(OŸ,ôb^ªŒ*¼gÙlÇq*Î Ù äcWž£¯½Î·\žó„ñ¬pòÐ'Í<»lÒâ@<ÈYø«³ªêªåbOÕÅX*–KÜÒWXnhÛé¦ÞªOuRœ…KáŠ{êÐd)NªíÄ ÿ”`J[GÍì©¶©&\ÖSLŒ¡üðõø€C¸Óyê÷„§£`hm p@áB»{öø×Ñ’a|~íÚfÒáX~ü)0ßâ[&Ut„fcö×wsÙŒ¹¼rÖ+U}/{ƒk±w¡‘ê*x·µl¶èˆ•-$ϲ,‹nË‚9ð‹/ÁÜ‚ÇxWI—fµåj&"'J5Õ êåÚ¬ç«JÒi ðp(ÇÎô÷Xðž÷ppÎ0M>6B}›1ž|ëóŸwòê,fG aÖ==¡è v&È~‡]i8Ë€yŒùœcÂë-è³Üq³º7vB‹ƒµÇÁ:׀߿a>pföšpÄe…q¤šPgá ¾8¤–Ã~`… ?Öð<ð‰ÏC/˜ç:˜ §ÊÃbÎï ¿3Œ;fí‰*¸/Î(¢¼Ã3Š í~ãÙi¿‹ÛÆ$¬Kº¿G‹Oµ( ˯~ƒ»žþ¯FÝ?/l=ê=þ™ÃGþü¢ÆÝQ¢hÌÛ.ã —ÍíRõ©ÔHxš’Íf½GÛ¶}ùuO]m´}¤ðOªDeªªMjøØ7?w3[¾×\a}åH¥`ðÔafÌÍñ›ísyÏ—¾D׈­yž|$ †ÑFÉ@0.z‡…ñ'1SB[`›6Ý•2¸Q!Üs£B”ãÀÛ éWî,œxa{ï)¬›Ä=w]=ňŒ0Å hmP¸ê²ëH¹aÀ`Á;Ïèv÷£ØQøŸL‚™…+€öp1ñ›ž\nþ$v–QŒ£…‰_ïží—,[%ëUŠaŒôr5M0M’¬åž&šªV§)wÇ‘Hĉb‘a¸ç:]õ)ºªrOBÑkx²Ç×*š©Î©QÔQ8öª«gßt:íâLÓ!Ä©å†aàÒ˜¦éÛ–î­–J¥0 ƒh4êåHUã·p8ìm_O+‡=ï6UiY¡œfô1 î„Ï罂1ÍüLdWàÀYW{˦mê¡*+ÄÙÀ±£GYx©9â&¥TxAhþkñ!ß{Û¶e†Dh:¶îlåÙe_àoŒkÇ\/N{}tÏX¡ý‚Pgl\ÍÀbéB手ÚXþìS Xw±jx§½ï4øäÔ(²Ò0ÔóT¿wœEyÑAO¡2å')¦Û0(fê·ðOÎEJö.YW x*gmOÉ÷ûà_ïûWÚ›_}‘AhY°°õqß2Ým[š8õøIÌ~á7c®gYÑhT*Ÿ E=Ó‡Ûöb~0¡msØ:ã.ß})ü?šÚ;pXã¼/GœâƒZ%)„ÆÛ‡^ªFžß)r¾?³öy®»ä»üúgûùŸŸù +G2œ¿ƒðé…Ê(®à¦„J“bÎ4]XËâʪtø° ißtG'åh¤ê‡)D ñUm1]ŒRÛqSZ®ÅÏóùâúªh…^sD‰jêut3•ÊN¯Y¢ž§uÇ8UY_Šî/ͺmñœãþï”ÓJÓLNÅacÞŸüú'E‘G?1ã‘ŃU…øNæ¦àÊN7eÎ&á.Ó]'Ü™‚W20÷À/±Üœ~¯8°Ð„ÅÜ‘ä¡ÇâêÏ^ ÂpYÖE¯ÍÅ~*Sx_¸nòVžƒýgsÖ#ÀEßèù?„/ sú/ö±px§œ| ½žsÚ?Χ|`&MÞñâlø½_˜}„S»º « ëQ`f‡¡M*ì·¯P,g¿ ³ŒÃ‚ƒׇáõa˜gð‡Z^š¢v«Œ!Xƒœ{±ÇŸæjàêÿ~þ» ü®,ž¼#ßisóg…ý"§y·àIh±ŠÞ³°¨Ã* }˜˜<Ÿ‡í\“„Ý…þr 7´}!¼ñÕWm–U³çÛ8mÎK¾e‰DB´ a\+ÄXoý>ä/÷î°Žã‹Åè«¶’– 4ˆcGñ±Áwú–‰'4%þä¥òaˆ·²T¼gñ|žd÷BöÆb¬ûÝC|ò“§À¦Œ(ÓÔèÞq˜(àæøÙuËxrå©üí›ÿˆð…Žúçï ûS¦XXˤ\:.ÿZ‰UJTËfÝ?åE¦„¬lÖMûkÅç OPSžcÙìH+™,Št0RŒJ¥ ­Íý^w÷ÈÏ+yN&‹í--j\ŽrZÑxÚ‘iú½F¤‹ž†óV6nDé¢E?`õê1 Êw­„%¼Y¸^›ºçä6pâÂXåË6¼Ïð—øµ’@Ò M~ág/pÖ¬³Øù½<õÔS\sÕ5`ïvþŠ«¹šl»ƒù>Ó4Šûéq'Õßûïå¼óÎ#I»9ÐÂqòNžìªD¿â MïÕ\?gk?k0ƒÞûз¾”y0ÏçÝe¥JÜ+­‘®ê"Ò —¼)g¬£Ù|xC²|Hð˜TóœnùÖÿéÚµüq•{k$ptÑ)ì;÷iàzo¹LF •h!îʧ¯ôå‹0MSÔe¡©x1{”'/~…«O|S‚ÐPþÇå_çôÕWŒú¹Jâ+AE q¶cpÎ?>ÀŠÏ}€îînB‘P£›&U“ÀÕfß¹ :–óºŸ¿Ž u®V ›¢§á÷ ¿øQá³÷9?_pN1q³LÆï¥fY®XÕÓã¾ÎdŠ^\ª¯þ|¯Ö×½ÈJ‡ï¥©ÎF³ÆJÑ6ZáÞj°¬Ú§}l@z±Àcæ¡Vþ¿{ˆü{I…‡b¾G«ðçÄ+ WÓÛGžX;_È›†A"pC0oŠ23<“Ë):¬c÷U¸ ££ƒmñË0 ŽÍ?¾ÎÒÎÆ0*w‡œì´7†1nzá+Î~Ï|½oùTª +ÔŽÀ qóðÐÖKG¤ßB³ðâ¹çòrv??9k;·–I¡É2Do9°PÉvE„‚މ+*tÞz½ù£âÁ)4%Ê9à‰?–3㪫ü+eðºpÀúÄÞò¶+†©üø*.¼±˜@¼%…J ¬GÜÉ{_æíƒïñÞÛ¶-†-4Oÿ*Ä‘yGèììN¡éxiþ|æ]ð,ëξÄgÉdRúe¡)Ø2w.KÎao¾”§¾ÿ”ÏCB˜>äµò™Ê£·™Èv.À¡ÛŸgø-+yêì×`‹/ˆØVstÛÉf³žme2ï3Çq|¯•÷¸ã8žÇ¸“Zêûº'M"‘ [¨<™N§É*^èßw‡ÎÎÎFŽª84gwæçƒ³ŠÂq•Äb1ï˜D£QO|3Ä}S¨#½¤‹k Ïz*]‹ TB`…¸™ÓùQËùÞûT*Õè& BUÌÙ¶í©–/_î­W*8¨‡„L&㉠ú ¾ž_©´-º¨Ïç}‚‡Ú†a¾P°îînLÓ$ŸÏ³|ùrÀt¯Äd2é}'™Lú<[ôí>-®²§§ÇûÌ0 ï«{êZ–å?öôôxû‰F£Mw_v~{î?Y86þÈ,šF„ÓmI·qÝFÓé´g£¶m{ö£ÖÓ½ÈtÏ3E©-êç[¹"‘ˆ'Œuww{ö£Ûu8öÙ¥n‹ÊÆÁŸ X/Îeš¦oÿz?Ç=/.ýZPEÔ÷Õ> Ãð yÍ6Ék:¹Kxó3o®:$UÆB°O;¹‘–ëZ÷Z{¡R›#nøÅ×°ð-®†mÛß/4ç¾ø"sOzp“áp¸éK‚ðÒüùì<‹³>ò´oy³ÍÀ Ó›×vÊž¡yì}“ÿþT]÷Íf=A@ ÑhÔÕÔëL&Cw·[&0“ÉøÚu›rà–eù„½º_2™ô=ä+"‘ˆ·˲¼q–þºô;ºx ¿ÖÛZ*~èâÁž={¼å•ŒëJ·5Í&¨M˜Çöòè‹—”Md_ïQr6›õŽ{*•ò<¶‰¦iF=ï®d2‰mÛÄb16oÞ ¸Þ›7oöα²Q]”Òm̲,Ÿ]ëö^‰-êb­Ú^¹×:ÓÆ®êÌœ—ŽqÖ©»9ríl¨ Š4‹aYÑh´)&R„éÃîÿ<Ä/7ºB³X¸½Ç[8åòù¼„@ MÉiƒWrìä“Ý A8!Žï?‰—ž_È5ïºð‡…B³`³“óåÌŸyÂÛÒ½Çô°KýºÐ—ë:úƒ¤î!‡=ð½Ö=lôz¨ÚX˜¦9jÎ$É¥Ôd\¸ÅOÕô!%ËNÖ™, íÔÄu¯KÝ;S÷ÖÒ=#‘ˆ'äZ–å‰pàŠÅÊþt!Nlrêóä«—cfÑûºN*•òžÿt/EA‡wã´¹3×KW4 ¡+ÄÍ~ò¾ïB ÃÏ ¡)Ùq`!¡S·’Nh^^9x‹çmõÞë¡:‚Ð,ü`ïöŸñÓ =Ì鹡Js›õõõy}»žp]_® i:R ^¨–~u6„Aת&jA©Tªl8¨ž×¬4™î…¦ªS‚+¸‰€&TÊðs¸’×A™ônz(°x$ AgÛïÎ`ñ²ç×›WlV¨†º…¦ ÑÛÛËðð0ííí¬Zµj솪”†aB£¨Ö†ûŒÝ`\á…É UÕÚñ5Ÿÿ{_½ ¸@#  Z;þùÑs0ß±Ìuô0:•³JåkRBZ©¡#¹l…j©ÖŽ_úÉ ¸©†ª²÷E)_xR+PÚ^>3ÝSRŸð. í”±‹0ÕÚñœùßÃXý p›o¹Ö,)‰„zS­Üt÷±èï.Ä›W¨žºyÄ% r¹¡PÈWŽ{4/8ŠmÛRyD ÕÚðܹ»X¶l«„ñ ¢Z;Þ¾ssZ猨'¤Z;ÞѲ•}è‚Ëuo7=Œ.ú³Ë[¨ÕÚñ®_ÁÊ·¸yöt™B·N½RliÝSS¼Ø„É¢Z;>ùô“yóß¾píU…óéaÍ‚Poªµc€§y  }•—¡Rêâ700Ààà ÷ÜsPTœÇrß<ýûÄ-Y ±áeËŽ²~ý›1ÍÛ*Ý Ô”‰Øqß±ó¸¶ý@W Á`"vœH,ô<,ô‚ –e‰×½Ð&bdzþåe.\ˆìÝ»,À~ùË_²ûðahoÇ4MŸw¦ôÛB-™ˆ·´´xýn>Ÿ—>Xh8±c€Ÿdqå•-;/N¡jêâ×ßßO(òÞ¯ZµŠþþþQ×?4goºâh£ k×®mtèïï§«««ÑÍ ««kÌs6Õ©Ö†fΜÙð¤b?~‚pM7’‰ØñÊë¶òÖ÷,À0Œ†…¥Šý³b"vÜÚÚêå½Ò'ù èÛC`PìXÚ &bÇ·½asƶ<ò+÷ìÁV_qØÞÔ><*(ý´#LÄŽ:D*•*/:S‹vËø¸ˆØquv¼eËlÛæŸøù|~Ú‹pÓÝ~t”mTB]<ↇ‡Yºt©÷¾µµuÌõœ§¯»ŽO­YÃâÅ‹ëÑIJlß¾½áôŽ;xá…ÞÛ¶Ù±c›6mjÈþ‡‡‡yúé§yöÙg¶ÿjlÜcöðÃ3gΜ†´Ä~Jiô5­ìø…^hØþ«±ãgžy†Gý&=ö-ž}¶qá"b?ÁjÇŽ;غu+ .lÈþ«µãÇœt:ÍÒ¥K¹þúëÒfŽK;ümعs' .¬(ÐdR­oÛ¶§C!®½öZæ¬\É»žy€C@+P^¡Ñý´ÃO3ŽŸxâ ¶mÛ&ãc‚ÑBãíXfçr9¶mÛÆæÍ›Éçm.¾øâ†´;(4Ú~‚ÂÓO?íMúVBÝŠ5TC$aÆŒœ{î¹ í¤—,Y¹çžÛÐcqàÀ8À¢E‹N|c'€išÌ™3§açãÀ´¶¶rÉ%—4ô8TÃ{Þó/^ÌŠ+Ö±?¾¦•¿úê« =•ÒÖÖF4eþüù µc±Ÿ`µc÷îÝœþù8•°zõjÚÚÚ0 £¡mŠK;Š´´´°oß>ÚÚÚz,*A-ZÔÐ{z£ûi‡O ûi´ïÞ½›Ý»w7Íøø­o}+^xa ú¡FÓhû K–,ᢋ.âôÓO¯hýºq¡PˆÞÞ^ïýÀÀ---£®ÛmÁÈ©UïÙQ!¸TkÃù—Ùèf %L÷kºZ;¾øâ‹§ý,ŸNPì'(íhÕÚ±0 B)¼–dl,í˜ Èøxj v\¯\¹’•+W6ºÙaºÛ¢ÚãP—q¡Pˆ†††·J™$š ±aa* v,LÄŽ…©€Ø±0;¦bÇB#8å3ŸùÌgj½¥(oذÇqøÞ÷¾Çg?ûÙqgL!(ˆ S±ca* v,LÄŽ…©€Ø±0;ÁIÇ?^¯år9 …BbØBS"6,LÄŽ…©€Ø±0;¦bÇÂT@ìX¨'uâAAAAaºR—q‚ ‚ ‚ ‚ ‚0Ý©Kޏ 144D:fÓ¦Mär9B¡·üßÿýßyøá‡Xºt©ï;£}6mÑ«lÔ»½½½d³Yß±k_µ:BåŒfÃê³ Ûq­Ú vÜ|LÄŽkyÞ‚jǸ¦…Ê;‰ôÇ͇ØñH¤?n>d|<±ãÉcº› i&Aa2µ›iç744Äš5k·BJ6›%H$<#K$d³Yï{c}v"tuu±qãFß²z¶£««‹þþ~ÚÛÛéíí¥««kÜ}ÕêX•1– Cðí¸m;n>&jǵö#v<¹L·c$Í$(L¶v3í<â–.]Êúõë½÷ÃÃÃô÷÷ûÜ—W­ZEÿ¸ŸM”¡¡!î¾ûn_[êݵ½\.çmçÖ[os_µ8BuŒfÃ|;®EÄŽ›“‰Øq­Î[í¸Þ×´PbÇ~¤?nNÄŽýHÜœÈøØØñä2ÝŽMP4“ P ífÚyĵ¶¶ÒÚÚ ¸3ñxœ[o½•ááa_Ì®Z󳉒H$X¿~ýˆÒÈõlG.—#—Ëqûí· …Ø´iëÖ­cõêÕ£î«ÇB¨ŽÑlêk?P½×¢ bÇÍÉDì¸Vç-Èv\ïkZ¨±c?Ò7'bÇ~¤?nNd|ìGìxr™nÇ&(šIP¨…v3í„8pÍ{ï½—ÞÞ^Ö¯_O8öå0©57n$ ù’ü5òX(—åÖ®]ËêÕ«Ý,aÊÙp½;N±ã‘ÇCì¸ù;y<ÄŽ›±ã‘ÇCì¸ù;y<ÄŽ…‰ÒhÍ$(Ôêšžv¡©k×®ehhˆûï¿ßë C¡Þ:*.z¼Ï&¦M›Ø¸q#–eaY–eyî‹õjGkk«O¥ …B^Üóhûšì6£œ Cðí¸ö#vܼTkǵ8oA·ãz^ÓÂÄ;."ýqó"v\DúãæEÆÇEÄŽ'—éxl­™…Zi7ÓÎ#®··—–––ñ½ê@ ÑÒÒB6›apå>›*aŸÂ²,lÛ\×ÏzµcÕªUlܸÑÛ^¿ç29Ú¾&» BõŒfÃ|;®…ýˆ7'±ãZœ· Ûq=¯i¡zÄŽýHÜœˆû‘þ¸9‘ñ±±ãÉeº› h&A¡VÚÍ´âTÂJ¥f*lÛæ–[naÍš5¬ZµŠl6Ë}÷ݸ3 £}6ÙŒµ¯ÉnGkk+¬Y³†¥K—288ÈG>ò‘1÷UÏc!”g,®§ýŒE=íGì¸9™ˆ×û¼ÁŽƒrM å;¹/é›±ã‘û’þ¸ùññÈ}‰OÓíØ]3 'r=tüøñãþA"—Ë188H(áJ9ÖgÍÜŽ‰ì«žÇB¨ž Ûq-Ú v<õÊy ‚嚪GìXúã©@PÎ[ìXúãæ%(çNì¸y‘c3þq˜ŽÇh"ÇC„8AAAAA¨Ó²Xƒ ‚ ‚ ‚ ‚ ÔâAAAA¡ˆ'‚ ‚ ‚ ‚ u@„8AAAAA¨"Ä ‚ ‚ ‚ ‚ B!®Žär96nÜHoo/CCC mGooo£‡Ð$ô÷÷ÓÕÕE6›mtS|ˆOêu.³Ù,ýýýþ¹‚Psªí·å‚ÆDÆÌõ°c±á`Ð,cÀ <ûéu\/LOšåZ®"ÄÕ‰l6Ëš5k&›Í²víÚ†uȃƒƒÒù ±aÃ6lØÀÆY³fM£›ä!vYtuuy¿]hNja'jÖnÞ¼y¾åÃÃÃÀøv[ë몱ã©ÃdŸË®®.V­ZÅÀÀ€oyl¸}¼0yTc¸·×oWJ½®A¡1ŒeîÇ3WÃdÚ±Øp°)=×ÍlÇA¿?Á£6]¯ñêtzž›öqýýýÌ›7Û¶I¥Rtuuyï×­[çÉËå…Btuu±víZÖ®]ë›imm¥µµuÔö ðàƒòàƒ2888©36l ¿¿Ÿ[n¹¥ö[¨µ°“P(ÄêÕ«Y»v-]]]Äãq–.]J{{;0¾ÝÖúºÒ;ž:Lö¹ìï璘¯¯ìö‚`õîã…É¥»hĽ}¼~»êy a,ûkt¿8Þ˜¹R&ӎņƒM¹sÝÌväûƒLêaÓõ¯N·ç¹iïpë­·‡}ïC¡PÙXþ±’ –Sos¹ýýý´¶¶rË-·x9&æÍ›ÇªU«|³%cµ¯¥¥…¥K—–]§Ú6kìÙl–û￟–––útaÒÏN&b#PœA+1«Ôn+½®&ÚF±ã©C%ç²bÆ ¤R©²Û Š WÒÇ Á¡R»hÔ½Fï·Çc¼k°ÚöŒw c4ûkt¿8Þ˜¹&ӎņƒÍhçz²íx²ŸýÆ#ˆ÷!ØÔÚ¦õmÖÂ.§ãóœq`Þ¼yUÏ´¶¶²~ýzï}.—#›ÍVÔ6£s"mêïï÷\T³Ù줇 õg<;©ÖFz{{éïïç¾ûîó:Äx<ν÷Þ[3wa±ãéK¥ç²éêêò ýýý 144ÄÀÀ¡P¨&¿c¢÷aúQ‹{û‰ôÛ•\ƒÕ¶§× Põìwªµ›3O¶‹ —zŽ§Â³_#ÆõBp©Õxu"Û®Ïs"ÄÕööö²®›¹\®am …B¬_¿žU«V±aÃÂáð´QŸ…ÊÈår,]ºÔgK—. T~±ã©C-ÎeKK ¹\Ž{ï½pmzhhˆ––y€¦$'ÒoË5(3O¶‹ — ƒøì× ãzazäk¹–ˆ7†‡‡G-]^n–# ‘Ëå¼Ù³¡¡!úúú<µWÝÔOä†^m›”q‡Ãaz{{éêêòÍÚS‰Ømoo¯7àT³ÁÊåy2ìöDÛ(v> endobj 2 0 obj << /Length 3 0 R /Filter /FlateDecode >> stream xœ¼½K¯-¹‘¥9_±‡™v§óåÓ² ªÑ  BBƒÎÌ:+•&õ÷{-ÒH·eû¤÷âT% ׯo‘FãÇÅeÿó‡óuàÿÿŽÿÓSý¿?ÿp|œ©Œ¿}þ„¿þŸ?ÔóN%gû;þg½®~]¯3çWÊçG;óUnû™×ïŽcü¿š^?ÿpörÊ_ýùW÷}|Üãÿ]w¼ÿ…žô§þŸþŒÛþ Gþë¼û™óõ×{ýûÿ×7ßéoñ[øûWºNÒøÓÿjàÁñ‡?ýð=×ÀgÉ÷¸ßŸ~øÃ¿àßÿëéõ{\úÿáš Ïû_?œÇëÿô6þûoI<ÜÏÏãþÖìç~t>æút\}´¡û·¿ƒ+ëìÏ_{ýŠS÷ Ø|éØkß}ý¹|ÔsôÚ/htó­ËÛ¿ôêó•ÏËÛŸß®ÿ¿ý—×ùHGýøï¸›ùû?ý­»ê¿R®·•^g;êGíW­½_¼Áúñ Þ¿;3~üõãϯ?üC:Ž|ýËëÇßÿðÏ?ŽçþûWJç•?î#Ý­Õò˯”¿ãJWÏgIøåÞ~ù•j¼ÒDÌ¥ãÖ³"óFL÷Œéi§_6<œ ±©”ÔĦprìkî&5œ~”TÊÛ+‘c_3tÖ£–Ö{jñfü¡/m•ÇÇuÇ™kùš6Âk‡6"7ñµmdܶ½›/m##FH‘›ù¬ü‚‘¦é£½ÜWãµ ¿‚L8çò•W™Aù›ƒ¸ÌUxŒ:µÜï—¹×eÒ¯½Îqá:å¼{>û߸Nþ•×i'.“¯vÞÇ'om_¦ÊeFV€Q>Ýšì9¾ãïÜ%ÝÍø‚obXÍhS7ZëÙ1¬–’{Êïo=øw6¬þ×ÿòßþÿó¿¿ÝÏRþÎ 8Ïz¥»w<`-õ£W¤­¶xUþϸä¿þôo?ÿüÇ?ÿùýë_ÿøsýó_þã/¿jd×^süç‘*ßwª©qG{¨)¿>é¼?î”ÒyÔ{LÔ®¼SäÐø> ÏzÖžSýŽ”Gî}öûã䋚ÓYêOCÆs¶ô‘þY³y!>Û…Ó2®pÖúÑK_ù¬ãXûÈ™_oá,çÇÑ­¥»Œcý}ßåÄ­à?zϽñò؇ÅyZ%ºÜÇq—†kw;Ö ÏÝñúh­ãÿŽq¬àVžW+BžµÕ3—{ü&.Ÿ+ÎÃÍâ¼ôqáuñÖÆ1>þ#³Y&;¯ZÒ1ÏCi<ï.ã7ÏŒ~ÍWc½ó¼ÞÑ€ñÙù6ç›.I"Nk¥²mÜ=¤Œ×8V>2¯ÇÀ±ãO^Òxahx=^ }¬Ü7ÛŽµ=Ä7_z¯¹ŸvèfKé÷q2´64¶ñžKÿh¦Tèn8¯à¥—£^m¾¯û£œÅ†žwáŸó¼ŠͦÒ.|ŸOSpCW·ÂŽ›x š®{çi}§{6X´FüÇU’}îš>šIÇOàXÿ¨úHíóç®Âf‚ïÊÈ÷‘®ãÆDbÜbÍîx¶Xüf¿[½[ž¿Yyf‹ET+GëwÍÉÝx½ã n“Çp÷öEÙHÙJÐç­L€šÝ úD-ÖbñÔø…žë=³à»[‹Å–+^r=Æ'­è0h%hŠó^ðJ.¼®ÓŽÝh%øh÷8-óvhu­k±GC‡G_À™ã&…œ ‹×’SÐvÆ!\áÆåîãµrÅ3Žcx³‹ŸãVð±ñjírñ·XƒÅO"ðau¯c e´XÜfɅׇ83Ä›>ÒšÐÉpiÜÚ8V?2®7›lx+­!Ük²ÞfB¡ó5 °¸óÔæ’y|3Œ}è±³¹Þ/´á«ÏnC-d¶VtÊŠvÀ‘„ÇÚÇ bh­ˆæ¹!"1aà!üÊmῈ<â\mÁ}¾ùöÞoü'ˆµUD»z`2?Œó´èŠŸ¼ð`wžAù#œ5UÄ-øêãÍ߉ÉÚ*"Æ…—€¹ó¼ÿ³¬¶Š_ tÂÅ/‹­¸> FlGîËB+ïCPµhqD™{6T¼”Ñ.qñq¨²EÏvŠsá3gŒãPãÃŒfŠ?"1¸Wä…‹Vþ+ÄæÒj·Cw±¸Šßæéèiããç[µ°Ê?#Y¼î .¶ØfQ¿€†sÔ6Þ"ÆÇÕÊøü‘ÙùÆß&‹©z<„üÐZ(ÎÂuxãÆÅ{GTt ×Ù~0cT)»bˆE˜Îu‡3ú†KLï2/…qé°pÊCzC·C_m¶O¼¼ÓŽUíP?-˜âFM¶Îya÷´XІÆ8}Î÷p£ý[(å+º8Ôàï1Pb`˜M“ßÃÀ…4¡å\EÙ ÞÇy¨_Dq¡Œ×Í+CèðÙ‚( Rz´õq{ØŠÅP¼$|ŽÑk^ iE±Š‡Â¸›ó9Æu´¾4k˜h‰H‡ÎËîTÍ»a"^t¼Àù\H šP6R pó,öi‹Ÿ|F¼¥ÜGH¾R0 ³ø‰F†×ŽçŸ áÑ-~òîñEqóÉõ5ïâ}pø,cÈÄ``bчÆ1 ¨0ñ¤Á‹iÒÊNù™zºÇøs%ÞcNä,FУ';tŸ8q¨1w˜á™ãŠ›x# ó䙉 @|KL5È!B¢÷¯°‰oŒ9ã<Ä+[Ød¿9[Aníн³R ÄÒì¶²Röäã}~‹tÌH0›%òÌ«¦4~ð:8µ_A“wA/°cüÏ'h¢÷¢q;r¯„T^ ’Sä8kpÇù7"_=ÇDÂ;úÑ0û0ÀáÏ£Í^È”V2Ê÷ŽÌÒ’¯ )ô¹’QÂàŠ1i>0q$ß ³ŽÖfü'­dTãâu¡};bú tõ‘Z»éÌ< ÏJEÙƒï‚Q½Ž×”‘œOÈÄt7Xn;ÔW"n3¤šJ1§AØy("îaÒ< ¢ÈÊC™÷W4ŒëšçávwйÄÁl¾|…Çú“‡’—£ç÷71=ª;=”™³ÏÓWz3G¬ýäOðCÎNC9Ë(hÀç|Häi¢Ì›Þ;ÖWÊA½¤Í€†©Q]I({+¦+$àiòÊAĉQzRPZ9ðVg”Á¼¨­T“ ó¢²2ÐxVøÔw…yѹPD‘zä†A¿Ø±{% 8-W$»gš†³óÏĹü|W˜••ò… °Þs¦ˆy:†ª'|b*‡|{~éJÖq|:²ãÐý$ŸxJL´1ŽÛŸë+ÌïKF8;Ç<áãÿŠ k0!ºvî©#{eN¶rOÆå1Ϙ1Çî{¢epdJiž†áÈ垈\½Ìü Ÿ!åÞMÔ¿?̆®'÷ä µ WÙ]íÿbNUyì^É'çÅ$h£;W¾¶•}âØÑ òv1ªOöÉl=ð(ãb6”]ö‰©#2Â1ï¼H$vöÉ4¾÷5}¿ˆwöypˆvuÏØ‡cýÉ>õ0ª+ýÇpaÃ!ÈÒ>òJ@ù›øÇøÑ1A@F|¹×TIæ5H_Öìþ“^ôŠ–íX/kvßRѰ®Ñ§¨;¤Êt âËžÝc¬à›nó a:”VÊ©s½Ñ ëHy‘, ÞìÙ=ZuÁ”h&t8ÔGkE6ƒyÏçÂt¨67·GƆ/8^¦C¹ï©½¾~L‡Ò“ˆ"aEêÜxþä;RIŽãÝ5;¶‚yZ¾ÈffL„¨{jOªƒh¯¢²’Q’•ÝMc|m̈®sÇTô‡Š£©ÍG ïÚSû1Ï/+/ïŒRkjÏùËqÖlŸS¢¶rRÎ{n$|yN𑾠wÝqõæÐr¯1 s¢ëÚq-(ñæè…9ѹÒÒ‘õ!µ¸×o"*{nÏdV–O`ZÔVbJÀUÏÊ‘|<;²Ñ²2S LŒ¤Ï´‰ùÊLÉÚîŒ6yÌ8Î4b¥¦„XèpèöŠWnBºgÛ¹)òåŠðµFΉWnʧ«$aã›ë’s̵ßã³ \ÙTpFõôš D@>òÌѺ¿k9cAÙóu=\6.û†e/G´N¦1‘Ë"cDÅ¿ɯrÙÄ.ŽxÒæ$ßsY¤shh‘Êâíb"GR=§­žÊ¢A u#)Íužæ¨,~™³ØŠ©õ$¯ŽÊ&RË´“H¡²‰ éOË•%Xæl5لφ¶ƒÎ_& u@6aø;9!¯mBWd1aû (é}‚ dÃF©÷1Ó²‰ù Aõ kd£ï3: ‘åorþ•渮D–sAæbÅîÓ3Y6_ôU|·y=Çd‰%Ú}Ï ×>L–Ùnê+…S&‹>Í€Þf˜R&‹Î™N´‘‰ÚÉÞLÍðZ®K,À¥ õÉ2Z!ü–ëxG²7Q.‚q A²dI9ãªvÌ#Ù›]¸¢•Ìöã‘lç$”Ý0¨G²¸\;Ñt®ù`žÈâ.+!¢§žÈÞ\1B‚’&ž"ÛÇÊ1Á±'²7±>áÝÚ+YÜæŒµÎ™ºYœ—0E˜2¡¥'²8¯!lÖé‘,ÎC“-|½¯ˆd;‡ìa®Ÿ’ Ï.H¶s¨!ÄM/LçaH:0Å}g²ñ<ÇdCƒ&ËÇÃÌ!>½"“EÛþ—]„Éò71ûBO™¬Ô3Y¶Ù1`Í×bLöf?Å‹kùÊN@ˆ·Ý¯eù’I²ª….¡²`3"l›‹žÊ¾Ý¡Ã²7»Úe‹ËòrL×ÐÒû+rYœ‡a©`Ôš ÚqÙñM/=ï`6¶!³ü¦Ç3|™½íO&¶¯fãÛ2˱žÑ¹Õw2‹I &$ó› ™åm^˜Ÿƒ÷h–×»Ò³¬çálŒQžÎƦîñll'ŽÏ¾æ-OëœäÙ <¡çÝÀ†Ç=¢o“ËF=£åyø¨+Y2Fâ‚@ZöodŽeªèÒŽ–w¶Ób¥´ þÈ©çyÓò»¡™#;žØÃqZþ$cW6¶åAm¼wjÙ»1ÏG¶Gv¤vÜ%ŒPó<‡jCTVË'èÏ£ðÚqb F¹yÌ[ž‡a¢]eNU=± C [ž—ÚCh<³åyèMmÁJmÇ}>ù£P[žwejHîulcÛñ…ðûõœ36ÏmÇõð®1áŸhÙ[ö2D”f]À-{ ÆèÜ옷¼¡}¾æ4Ä“ÛsÎeêBBÝÆó»ïGÎGÞÆçòüöíBàÆïæ .ÏÃ;­«>w|¶›£Î+ÜñÆöw\ yýžG¸¼G„§»˜vw¼Œµº.w¼´1[øR†ËNEuT/“¬:†nÄCÜÑßZîx!ï7¼dÏqãO:ŽËC™öY­Ý ——Ãû®‹þx’Ëó0£Çø7Wb<ʃՅ¦2A˜C¹±G Ëe¯yäÂrãÃy˜;Ä»ÏNXî¸z®¯R¤ËG;m¦ÿ ]M9éòŒþ¬u Ôe¶u`úiSy¥º:ª2*ÁºÌ]¹ºt vX—?‰ws™’E¹.ÎKè„×d©‚uyÏ›û„r‚uo~¿k/ÅÖåÓ‘¥Z ¬^¿Çºœyc–ï™~+×eÒÙ7LÎê¹.'½[z%X7!cCKºWn'X§q`»m¤¬{3½ÇË«³ Ö%iãú¼…BÁºœ±£•të9ë’ñsµÉXž`]ÍÌëò1"ý›oZ°.—ì0ç½Ó'Xa/qõÍ>ú⺌”©`tš—°‹c¼…Öæp/d—Y8’»µ"d— ÿì´%Ovãðd—+˜ˆ,æ­8²KBS©’]®£aÔ2—•=Ùååcg“' Ù%ÂÛÏÜËñ d—çu. [Ïòd—ça‚fÇ[öd— ÉÌqž†LÙåob WÖHàÉ.¾ ×nºÍ¯ãÉnBr:o±Æ“]R´rŒ ñ䩎ìò<ªZ×¢G»¼¼@Sx°Ë7ÝÃ' ð`w¼L®X͉Ÿ€]>@²Y’äÁ._ ÀRçB/Æ'ÍÇŸòl±BvyZã–ž99?'4¿N¨N!ïùú¿ ïöžø[o›‹âŸì>áýáQ‚xq 9)¾çÄ Šx9Gܳ6…¼èú肽nU®ƒ¼7Ÿ¹Ã¤« y1¼trÓŸ¾AÞãQk)äå’:žm®‰È‹ñï~½‘^\€ýÌ&ÛJz32'4’EI…ô¢“Ì€ÒdJz©x¥Š³-y­#½ 1ø ‹Øzé-Û%—- á)é¥Ú¿aŠ3OzÙ¯3W±ê„ žô²bôƽ\Kb{º4¹?kUôfÞ3¿Â|ô@z]–H/ǵDŠz9À,uȽ«FèÙÖNôâÒ‰Õôl© ÷ä4í²´OA/p­ûýÐëæmô*™Ò;Üc%AJz;×9quCUJz©p.k@TÒKd‡ÿÊ? ©F£jºzq+ç#9RЋ± tëK¹êA/o“‹b3R+èÅo6¶ØnàÕƒÞðõôrEïûœDM@/_u%Æ6bëAocnÕfaz1ˆ>K·® =éYÀVÐËÜôòGtÏÜßA/þ1;ó«¼ÐËÿÁím‚êA/î"ÍÄüŠ ·Q7µ0 zq¥Æ šIôâG0L#ƒ3*ë@/þFŒ{¾)Á¼|¥u¦¯ˆyñƒaŽk=æÅ1´„L{óò®0d3&¾"æ /E0oø4Ây;õÐ=ìÓçå½p£Y3&ë9ïH-ðg­œ7Þ‹ç¼ ¸½3^<ÛѦç¸#ŒM°>² e¼b¤f†›âÅ“Ý×ü\BÛñvN¢„¬ý âe DŠr,‚ío§úcÖRºzÄÛ9ÏG@Ÿ …xÙôǾ€ò¦Ãï¢ßÆd„ðâþ Zc7^©„—ó x‹zÂÛ9mÀdêÂË×Ñ1ü-ë oç|íßtWŠx…C[&%ˆ _¢,’¢ˆ—ð oØ´ƒŠxe€ˆÇðºmž®„÷¢¾Ú*N ¼„Ô÷m9M ¼uÚkå7^*1ê±æ‘Jx¹PŽ`¾H­'¼ÕÌÅæûõFx‘¨p3§ÿ xelÉ–¼ðrwøµ3/¼ªÐ’'RÀ‹Ÿ¬ }`Î’ð&~-®ÙOÞ)€×£XÇ&px×–)à•NïÛ1x1*%j7æ=ßeR|câX&dö|7Q–š»©ð&&ûÍÖuླྀ ¯j¾)%¼T°Ÿh°¦íÂKn…gØC/ž)d=ã ®ŒW"B^¼¼cdBFVòê—QÈË]:yO—”òJÜUÊ"ŠR^ÂK´‹TÞ)oèâ‚y1Vp;Ú æå—¬ý˜ùZÀ¼nÐTÈ‹³ò³Ù+`^íü ó†ø* ¢’Â$ÊÂy(ñ.ÒRþ çÕ–'˜W"¶‡¼<ÀµY{ äíÜBTÐùæ´_ oç#b ½ÎrŸ»ÚP©€—ÍñÙ¥€74G¼!ˆáeÞ„©p>ŒªzÂK´ƒIQ)Ÿ^œ‡9tOsë”^>Û³M /ãLÝ:#%¼8†óKžS;%¼|—ܹw~Bxy=¼[›ÄËAìÙ£¦W?·0ÞðV„ñjÆb‰1Þ0ÒãíÜ€zônÊsa¼’á(âåÛxöS*â ÂG¼a¤īïÞüâe!‚3a¢ ^¦vbÒaÈÛ#^†r4ìcñQx5ýTÄËÅ©zèNoñ’lrÎD*Ýe»k×FwÂxu Œ—­äHkНŒ7 pÂx™uuäh¦„ôŒW'ZÊxÙVΡk¶‘î²s»¥‘va¼Œäç|lv9a¼!ÓYŒ74a¼ŒþøTkÓƒ0Þxžñò«b†W³AW¯Þå—Û¾M‚xãÕ¼x—ß-7|“üŽxgîô`\/ÞåðDcXØØ‹wCûñnˆ±"Þ Ã¬ˆwuú¦âÝÓE¼bžˆwã1/Þ _OÄ»á‰z—χî“Mô ê]¶õŽDÁÖ9½zW?žˆwÃ"â]6|×6gåªÞÕ·©ò]6gÂô‰¢‡|WçÕß y yOüûo ¼È^o´°¡}3WàŽ¼k­)á=ÈCÏÕzÂËmËÈ_lÿŸ^Êí™a¦ öæ \›2 ”½¹¾u»H†&Esnm¤ÞÛX“˜+àÒøÛ¦Åâ­ÀvÈÔLê®Þ lZ7›ŒxoÞÊÑŒHг‘©s2þétmL¸«ôä2Dzò޵8nb7ù™X+pÛÚÐÆÉ"ä­\*ÙÚfòâó·ã|òÊÊfÀ»ª S¼‹ßÇÇKs·tÔñfŽ&fáB^Õø./o6¯®AÇË9Õ±öί¾Á»¼ ²²‹ƒŽ·’†"xÔ%ÖÝx74Å»ÜÜsžÝÌ@<Þ%A?1\, ¦ yGtÓöt—?YSOÙTÇ+댊wÃ1Õñ6’æµÑYñnèrªã çy¼Ëc¥îMäªãmÜæ^—Ô]ð®.Ù/™m^»uïò7ŸýªãÕÅÁ»ñµˆŽ7<žèxuMEu¼8ï BªˆŽ7ü­èxõ%«ŽWïBu¼h€RyÇ»A§/:^¤¸x¡çq.­®Óñê·Qo¸œèx·( Èš¤Xt¼áÑEÇ«ßTu¼ú*äÕï­B^Y‹À;¶K`^·©¼\ ³TÖŠ—­†Ûô®uÌ y™J$º~LÛò†c"äå–Š¡uþ¤×ñâH:læ#ªãerBî·5¾NÇËDü²¯AÆë/T¼ª]Ø2ÞF˜PZ¶¿öŒ÷ýØí¥a. Ê«-=èx1³Åäù´äTt¼8Ä|ׄDªãÅO"÷± JyãmŠ—"Ì´ÜX”òê;VÊËc˜Ç¯­By¹}œó>›¹ æ ¿)˜7(KóŽ »D·ó‹`ÞxÌc^ÿ^‚WÁ¼ñ<Ïyãy^ÈK½bª[ÿë8/sºŠYÙ‚ôÂyy^Í—møPÎËË!û³­—‚yÃLÌÿÒc^íÀŠyß=˜7¼~á¼áUyÎËÉSKÛ^) —ÇÐál;—‚^ÆX2åû)//Çõ ›… è ¯ÊsÞð“ªåU‘pÞ U1/ÓiCç×óœ—ÇŽÞ·®Øs^}ó†Ûô˜—Û¹´hb}óêWð˜7\ÍSÞx5Oyù‹ý~ÌDÌËÉC³ýÅByãYKÌ« Aå¼úXórã”zM<=æåÔŠöSÇ'Î CåÔûƒsÒ;¦]øÃÑg„õ¤—Ç®–7íñ¤wˆ¿¸ ²•79ïøÍœöZ™§½Q4æioÔUyÚ;ÎC(µ}?B{ã½xÚ;žqz‰Ö=íå±r\au´w„.ô´>E>B{ã{ñ´wüf'?– æ½1zÚ;D7ÜqºèòC{yˆ[Çø÷´7j÷<î·Â…*ÛWãqo¼œç½ñzæBÍñÞ·cŽ÷Ž;D–½0½¾ã…ô¼ñ¥¾oç=À—íâͪï÷Ž ¦0ᬾ<ÖjZ®o|‡®é87xòÀ7^ÎñÞ!Äû\lßóÞ·cŽ÷ŽÛ¤ï€-myÞ;.×ÊBÖ÷Í+U.éXj_‘ô¢ïwÓ»{Ú;ò[úÍTS;Úe´žö¾=£½ 7#‡šsO{G´¡x2M²£½|σ­\DEo*˜·góŽöŽY3¸lIßÓÞøVŒöާ¦IÛb¬Žö *¨µÀÓÞ(Oõ´wÜ>½ç⊠z·ËuëÛöŽ×ˆÑ{]Ñó♀ÖÖ{ãëz^y8Ñó†ëE=¯ûMÕó’à¶½ ¢z^y:•óŠ$0ÈyONãmQż˜ÏbÂ{B-¯è$ƒ–—CW2![òfº Ë®oœ÷“c˜÷í‡Þ\Â>ú 潸)v­j5ïÅ=2í˜ûúÞÔ¼÷ŽÍ#öÜ\79ú÷¹4œNËûMþ¹H¨î#WÅ7ÆËÉAß\R¯ Á@W6óˆ.µxÜrõ…Æ‹‚ZßT¼'›v,…D0Ðåô ™´]]m âÁ@—mÛ,}ƒ.ÝÑÝ&®õ˜—Có¹g²Šy˘@lÃ^Á¼8FègÁT1oI#|.caÁ¼¸iÜñôQÈ›ÉaZIë <äÍÌ;LÏæ!/¡Mì 7ÓÀ²!¨·w·4î¢!)}EÊ›‰Ú—2A©§¼L7iµ83©y37õ•”*æÍ\ÛËþŠyyûmË4óâØ1Ö¾wÌËv­Äm¶yóH1uiË\÷Á¼4CËuñ^¾~<¤ñ…¼œ8ß¡¼h”¹—‚¼Ê‹ØÒ#_Ê›©ÉX‹þ y3]àh¿3_¿@^\ïqÍ5òf.âO¶ô 7s!ë^»òò.gåë!ïE¿Ÿm¦7“™ä^ͳD /cÒóÒì7äÍ\à\…òòXÛj…¼øB'ÒÞ €=ä _O oæJãÒI,ÆË¿Ä|Ç2e¼lwíÑ4 ã ßÂx3“hn¶2 ë/[à(Œ7ô6a¼|f„ßÅ\…ñ¢c,F‡»Þ(ÑYJÅx9ÜÕme¯Œ×*Jx w)Þm k=áÇ<á娇vЧ¯“^ŒL4-´Í4Jx9Y¿žmBxuXT«àù.#6©¼i„ï&ÏÆTá»8†ãXi³ðݹZ mZ¾KLÁ±ÃÈ„ð]þ&õ ¦¾Ë{yE°‹ •ú˜Á ØåM¤g—´€ÝQœ‰À2»õ`—7Q¶§ˆ€] hæPö.à­\å;ϵTÀ.2îl\æìÂâÝ¡a`›gwõ$»L¡Ö6³Hv {x½lN"`·pB·kì†÷.`74TvãOz°[(½9®%¯°‹c7ÝùGö`—;H.ãÖìÆWåÉ.•ÞlW…’ÝBG‚«l·Ov+áLß]Â#Øåå0†-“€]vû\ÊR} Øå+;¶!’’]ô{ŒCÛ³XÈ._¦•[„ìÉ.²\äiç;²Ë´ùFØ5]¼ ]>þ°D«‚vyŒ\Èô‚vñ…hñc6d w PhS]mwǸËK]ÏÆxÑðêF,Õð6~Å{›Ïˆˆ—o‹£–®“ñ†v"2^>Fÿå¯(2^(\N1· ¯â%ìÄkEP^Ü÷Qò†Ó¼w”yöÅ‹·pI7-kò†Ë‰—“L‘s3ñ¬òrXaЏ0¬ò ›FyŸyÌ y$ŧ´æ%B^Ü'Þº©3TÇËXÉ«†cDÇ[©ÓAŠøîÔP¹“W£"ã-\óy¶°‹ŽwÐZºßÀîøÍóYV!o%|ø³ y S½J¯B^Ì ÎkcÑñò?îg'ºy+-(Þ™Ÿ‡y W®µAN„¼•ê°²Õx"ä­Ä&u›ÔŠ’·’T¡s×òîÁËpnY™*y¿î¹}†DÉË­™ïîIh½’ws˜rØ+y+“¢t¶OØ.7í]´Å¨ù]ÉËlù¸ÏØ.¿Cç–I[NõJ^Îd)!_¢b¯äÅ}MHÓߨ.ã=ü'õJ^6îf7¨'JÞÆeŸ¼ýu–’—¯‡Å–ÖÕ+y9¥Ë«­ï‹’·R½—Ê»/›EʶWH”¼f›TŸšËƒ“ò6NªöŽo•òòëcÆ·¶‰”-cDÚ.^ÊËÓë2ÎS)/'Í®x€Hyqž•…oãÜ1¯’Iªâå\a³YÐ/Û çl1ET¼lh®°€¨x9wG ^ÛöTÅ‹h˜mˆJxññÌÇ’âz ocmˆkiˆ„74r‘ðê¦$Ñðâþ»«~ ^Ùx ²¼þjÞfÐðÊå©áåt/x¶ß¿Î¨áo”HK¬òpµíÅJ¤!98·s®Þ›ÖbkÂK÷­ô¹O_yaõ”óoíµlõ¯^ƦGô)„—égÛnTðÒámɤ^¼‡vïJx/®‘¤åGøV"!"uR"#âöJ %Ò ±Lâôæ /']¤@ÇToi4ŠŽqj¬‘†ùršlB‘´k/ÊÏ^ñløt¯Ûßñ†êdZ$Mj1(â¥Û eû÷, åo(*´/=ž-ºJx ë¶ …õ<Å^&+‡3„ð„Ws7%¼…Fhiתò„‡jÛº.%¼ãv÷J¿2ÞJkœû±…pŒ—ÚòŒG«ïB^šèqåÄjÍyÂKµ:¦œõ9´/íDŸÁB/M¬¼ƒã»tУ-3¦/¯€wé¦užô&áCÀµ˜…À]’8¼—UDϳ]ºñNÓè:´K¤ÆU4+ËçÉ.F.ƒÓáà¥`—h³²åLê¹.ù#e:¦6÷X—# òh›’ Õ¥AW>û;Ô"†½Ë* ¨Kƒ`Êöj^Üv3Ý›yêÑ­êœ'º‰S¬#x K[6|¯fÔžçÒvÖ 7xœËQ>?—áf_•ã¶—½À\zgsù±Ô7ß]Ð>ÏX#õИÔà6v$÷æÖ5úä´7K·0\©,=®¯†yŒ›Ê(rúi54š¶¶ýÍ‹E±8WI»æÙS ËÐø¢VÓOÊ¡Ý :[£,åÐ.¦]窇'åФî”CKDƒ{°’rhä—\|ŸkrR1ÿzì{}946Ö†Ÿ“=)‡–á²™ñìrhøV4E¼ú¾e÷¥‡» ž¤çhˆJËûÀá[ºò!ø˜o†–CëõœÂ_ }œNo»´ûálΔ¥7äßÛ Tj¡qú@ÑÑõf°«!Ik¡qÐM»X‘”C“h¥åИ46ú+,žû”C㜠nK)‡&a]Ë¡q°Á”ä0Ÿ_>Eܪ8 RH94|7´€fEã¤Zx._â•cW¿ðÌ6TØÑrhZžIÊ¡1„ך@K94&¸­ï–º¡5Íjhdµš~C©‡F•ÈÑãõåÐÀ‘²,ê+°Zv'¤K,èË¡eÂi¼²YNÊ¡]ôŒjKí$ 6”¥ñ 6T”rhZnLê¡5nPÏÉ:ˆ§´¾‚ŸÔBc ×Ï%Ë}-¥Bô¶Ϯõд*ZÖk¥zÍ´Z UNN_çx@KçQ·¦Z*¢I™K­ˆ†a™ ·å¾ϲ‡¤]CéìIÝÄÞæ¦ÑnV9³4ýÃÛ8¬ªŽÀY%R`dMz×D#WÉ\´›òS©‰F$’ëqÏšmg¥¢í£.uígÙ]ží1 gO¢lòý¤&g™}£F³|!{#˜²Ù“›øù”v'R©ì½éŒÀÙD±Â9ÞÈ+ÒÙPHèl¨¢(t–…ªèÑl-\èìÉLèÙìÉ›u—á6y6{Þ#^›,„Íòp±ªËHA*£¹y™ÂY<¥Wi¿Eëb°*m¾0¡³]¾0øÎ´Aé,bgm¢‰ŸΞô˜F¶*œZD Ýê_>ÅÑ´’³YÉ4gZÞ;ç…ΆV"töÌcbqÏPé,† œ9Å,o6 ú·› ¸ÀY®Í\Ï.=á³eÌ"×–r峕“Œki•ŸeÍô'ã÷|ëfQXÛ$|–µËÚSBø, )žîÊg¹Âz÷3US>[™Ëï¾3ø,»9wwO[™ïóXøi”IÃ_é¢.ç´iL×ø§ypüáO?üû÷0`Òª—è{Ó¢ïýì6¾ ;Û%[¶+ާü­/HæUì1×$Ö Ý†¿ñfØvö篽>ÚmÛ7`ÿñ¥w`¯}|÷õçò¬-|Ñ—·?~éÕç+Ÿ—·?rýù‹?ýû¸Æ^À†èOõ‚%]%¨ÂÁÜ·ôO?š‡ËïNüG ×?¿þð­ÿñõ/¯ÿÃ?ÿøK¯”3“Û„ÇÍý—_)Ç•*íGp©¹ú /Tã…¾ Dî°ü7—Š¿0DºgDJºHË—ÜêÁ‚Ù4<‹7!ǾænK#˜–1Þ?ö5c%ë–q[ Ýp3þЗ¶Êäv/}EáµC‘›øÚ62îFÛˆÞÍ—¶‘#¤ÈÍ|ÖFþ~PN7&±¬X6P.õ“u{Êǫ̠üͱ?\æÚcŠû0ƒy»Ì½.“~íuŽ‹y<Ygÿ×É¿ò:ëêC9x|òÖöej¼Ìß¹+—D/¯ôªC)‰i÷º„KðÆþõ§ûùç?þùÏüë_ÿúÇŸÿòçþùUcçëmÌT-<ºªŒ ÆQ†8ˆëSÀ›©ZXDT°aUì)4Δ-<Â)—)üØ[)ùrââ¡þȯY~øª}náÌ\Ðä×2½Îø§°$g®ãâžc>Huô–\ܨœºm9ÂU?Ž-¿Nu,r"¿)v¬où5—(¸,;Adæ”s KN:çµÂ•1ý}„%ŠŸð÷iÓýóü¸Ÿš§ˆTbkõ‰sïNE‚Ï¥ ãAT.lˆÏ•nn/™ÅýÃçR%·zŒ2°<²E$áçF-o{é³}ï}䙵ÀÇý%v¶2;÷œërwæºøÂ÷S°qÕ)˜á¡GEB¡ù.ø“¹Î¾U$tÉÌÃrª¢"¡‘Ã1«ïæa¢6ƒ'ù&=ù¦P>Ó)‹,óͲgç™ìÐV‘P›Ó{5utK±9µ±®>.nÏk1°L]eá"×Âöl\€ž …¢…­"9p‡é4ŸÚ*~÷ án*ù E ‹Ù“é²ÄÜUŽüˆHø=O®ó6ŠÓàÇiõ1U­eT[’ÊÕt™±vÀC†¤ F|Íå³ÂÅ¡­!A`½)ì¾æ¡î4$•²3¾§ùƒ÷£!©¬OûéqÒXsóQÿò×4|Û£VÎÓiH¡ä4$è±íAT÷}Q!ŒGºÇïµt®Âm›©j9èÉ4ôP´4­×:ôÈGôí!«zô#´mÉkãí„xßPê;e¡…Z…­¡Uó½*jœ~äOx}Åš$íUÊ*Á[¨T@cMòä~’›­k¡Ø«X“yW¹ú°Z™MR£=$V¦Éñ¤aˆœû×y葈 9†“P®YŽ)—(ܰ¾Õ#TzõÌŒp¢c{žM²O%³m9(Üü1š$+;ÓUo¦‘<„©—5Ƀ»:ºU¸Ää€ýÖ$ÑIèß4å#øX|$þ`}Ô#¼ƒ“çgÏíQp¦æäœÑ-·G<ÂCõä{-‚‹ˆÌ&Ø.™¹°l@ý=ß³™£Yr´§Ä₈BÂQf³¼ÆhJçBySs6K.Ïq³ãì'¬ä2ÜÕ +¿äà_÷òq)³³2qè7×ì‚Üù·òK¾ܼy~”±?qÅJÑ‹á>¸Þ&Å H)ïá"ÂCÔdކ©½½ŒbQŸÆJ®Ö•'V:ý^Ù£¡Ñ"Z@š” b[§¬Ý­^$ 'ÜÑÕž`ÉÁ­Y­£¢’ËD{Üà ½JeŸ]Á2³0«÷Œ#™.*;VúÈ‚Cý~b¥Ë”#8™²Ó¤Êv~IQ5¦ƒÖf)GØ2e Æ[1cúB5–)#npò9%>ò³¥­Œz×[sÇtösª4 ±fÞT̯]Z<Ô­y¢ñ§)Ò+LvšÉíË·pÿÃVƒPîÜ uÕ+Ϥ\­!D[ƒ¦é3v²åöŠø›f¾ÍϺMÒvÓðb~cª V¦9Ԃ܇pÍpLµÁù„Ow“•bƒ­Q¹me¿ÙzÞÊÅ:Í=r:¦=•‹*ole›üÉŒ9eŸ®Æõàfr‹ «áß!¥Ëó<Æ|‹¡<ïzŠô Hp[þl¦<–žJ7<öÈAZÉôË6‚ÔQ‡¶8ýRëgºËüÍæä $#0Ÿ3QDSwr“„qî§¨ÔØ­Ôsh6¹ËåÍÇî•{²ïâ“$“V~fK>Çi¬«;?P=-mbkåi¬øH€5ŽypOùvÙ¥ˆ‘j:9ˆªÁæ@¦oôr ÿ¾íz«ÚÆßÙ¥§4õ¢m¨™_DšÊu} Ò–Y*M¥‚™Û†Ë;M¥¹[ª«è«‡©)±Ž^¦{Ý+°ÔÄ çS>ãQê¨mL¹íÔ_ JÅ+üÁÙЗG©,r" ÈVÚÄ£Tcî°‰G©¬mŒr™;³ T5b’Í÷B}6iå±›Ï>÷ë j ;Ì…µ#I6ôéãd°5Ô^ÚÊ_;ØÊ±®2ãZ]E·Že'4Q3ÞJí FÆËªÜyà:6¬³)wûÉîK)sÆ`+ž·r›>¿lWr¸•_¬ì-X‚[ÇǤÈœxÜ:ŒXàþHo¼uÜ 7¯Î&.À•¯ê¦kÕ´Íâ:Œž x‚\ÃN|a®c9¿ã;O ®£’8­ÒîW`®< ­ïZÈÂCW;Úö.êÊ_¤»ºíšöØõâ\›5„»ŽÖˆîdOÀë¸IÎE¦I×q+O&íÉë(ÒÍ–0žzå¯õg°×á0Àl©Nàíá+!çAÆßèëøl~ιSÐã×Ñ1fœ?éøëˆZÏÊØQ«R2Àé ìè˜ßX(;š8ÞÕFLW„ÂŽß<úªå%vlúß.ôvü&k­c8;ÚwàÌ`!±Ã¨¢÷åJ$<–½§"I²‚dcûòD6ôUOdù“·›·z$ËÓX݉^î¯ÀdCwL–GèôÓ>²<¶½•–“É™ì– 6.“Ë×<ð}ö70;|Ìhgþ dv<Ûùl\Íòc#ÜܶYɳٷ«98¬_„ÎÓž–h¿ó†gÇp‚ó5'ÝÏŽ@~¥eò%|vü¤›‘{B;¾g]+œBhGì}6¢ ¢ wâí¥òÞM"Œ–wY`ñvœwç²VÂ<¥ÇhR[ç]:JËŸäþÛ(˜v4nǘFRÂiã{ö¤6ŽÕÆöïY-a|(¶²*´–?y1È¥½®åy׳k}ñÚ1–ºål‡æŸ‰­­^zb_¤G¶o·ÿ0Ûÿ´Á×SÛà”#Øv¼d.+Î>ýBàòà6'Onƒy Û·ßtìvÄÌtmñ]èíÛ}:|;âS#$¨“";~;B½õºsw´„sî‚ËÔ#ت."w Œ—fO …yÙÉöü8@^ŠÓòVÈ{²ªÁ^"UÈK£²ÇPí òrÅßj}ª^–Þayû£Ê+”Þ(¯«i$˜7”ª ˜#øy.ETÀ¼¼ékI˜óÒôîÙç0ïpýXbÁ¼l¦\f=ê:ä0/C¥ËKyǼZÝ^9o焽¥Ùyžóê„ps^ò¹+­ê‚Êy9ÂÞÌ»ç›T΋̾îMÔÊyñêèU‹‘WÏyu‚  W ½2¯VÎ[¹ÿ¿ÍÎ+r^&zH9/Þ\z„ÊyyìQ2(è-úOCÌßô —ŸÆ½Ç»Ž–ÇÈd+)@/…k‡»rÞBµ%Fô9ÅQÎ[.[^hÊy+Ƕ~ÇSÎKSb"îf,×s^†‘v<|Øq^ú³^ÐazVÏyÑ•òSÒI9ohÂyéãq§Mf=æ­Ãtmæ]˜·RšRo›+æ­s¢gÕœ”óZ+iRÎË™×µ 5*ç­‘=!ÃsÞʱ‹.*çU_nE½´#dÄXàa/Ã*sÉYþJi/ÍÝi¡9…¦ör<¹·q‡Â^z<¦]‹XaoåüÏa¨À^në?v©r…½œ„÷–Y ì¥Ãʽ+Ú+ì¥ÃJ>7õÜ˧ãv…Iõ÷Ò€ug­bŽÚ÷rfŒ¨aAJq/­Yêð»|ǽ…ªøêÆv÷ªï¸ò^›žó<ã½ZJy/‹?q™íÎob[þs”Å„÷Ò¡˜ÁŽÐ^Žõ÷ªø¬°·ÐžµUröj)…½jî°WíÔöòzT‹DØKî2gP“”zØ«öí {Y9 {F(°7sù*m£À^V Ažj޶ {i†O)¯éÏö’ÑiÑVöÒE‰"~›` íÕ‚(J{Yº¢˜òN{‡wQÙ“4{iö„¹+²ïÉ>=î -VpoaPíÓWaoèVö²´MKËmTa/ï|H—LæZþÓªJ{™ Ͻ'óöí͘î²V „öj¥½….ó÷r|TÚKS©›ÀÙq÷²OÙÛ©÷âØ}MjñЏ—WÊæ†Â{ã1Ï{iÃu!Ã2Ñœ_~5ú©Ûš™_6 š¾NÇ4¾!hðeõ¡ìç7â+-Ax¯–mQÞ+…÷N]0²/(êp/«›L•òDº÷òûÐßÈ$k‚{ KQÙ ©Â^Z8s÷’ƒ{Yãˆb‰fÐÖÃÞðX{IwÏsÓ½ZE`¯ RØËþDÕãz“èÆo&°—’qC`{ %è‡iV=ìå1nöš.@ {ùZ0~_sÖËjR}›º)ë ™„°^ÆId¯¦­QÖ[¸Q6/´¡¬—…´ÒÕת«°ÞÐ\…õ*ZÒYÕ ëÕÒ@J{Yâ²"#˜B{ÙD1]²\{‡Ó÷É(ìÕ:7 {µØ¢Â^­8µ`/¯Ä-fÆ ö²/ ÄZ19…½™{3Ë,Íö†žèa/.W† °—cð4èVØ›)’aqFÁörc-g¨¶C`/£!ç“)ì ˆÀ^-r¦°wDÌfêÌÖöje,Ž!¢ îÍ\jGFbÏ.¸7¤J‚{ù‰äBsWhoè¬B{óhÚk²¦´—ƒYJ[9/´—A{T>Ÿ¿éi¯úß+íå×;¶«£Ò^6@jñ-‰ÚËNW)ÕßǺµY?Ü í¥_ÿ‰˜kz¡½áà íåPŒ/x,°ìh//Çu½ÍØË^0 uMºìa/ìNŒó…xØËn€)}ÿ öê8¡°çKSØ«O§°—EÙá¦Â^6¡º´zÊzóh3K\§°Wëö)ìåmb´…1Ž|ÑÇÆPŠ{Ù,;òé9â*ïe*È ¯Ù^‹ç½¼ä¶éZyï°-fé+¸—ÙëUƒ³÷j-FŽ,ÑÀlrÎÞøSÒÍAðåCÕsUdú^à{là{âßí¥é>×øßÌX±ts÷Ö:ö›7•ö×onµî;ÁTo„>éÈõ™7B§Ëw[uŒÔ!è#Å0llx²cÞA[ÔðRÙY)QYo â@úûÚÝ/Þ²V¡ŠÞ Ô«Z5E½º¬¨WËå(ê¥8ëÙ†¡¨·ŒDiñ/A½ªSÔ«‹rŠzõÉ#éõ ÏMzU訤é±-*•ôj‰#%½^¼8o§áóµ•ÊžóÊJQ༜- g݉Ž=èÕÕ§ èÕ¯¦ WUœ zéÒ}róÍ.!Èíô²ƒ¬JFŠyÃ(æ%L­ø†åóªPD9¯êƒžW”Êy™–îŽho® n›’«ª7ˆ_EÕ¤»AÖ›9ÏrB¬—Ý«,¬d½¸ ®ei̋֌[mk:¨²Þð›*ëe#·ÔAÖK«Ì< :©¬—ÎÐhzGYÒ]'땨d½ÚbUÖËÁL±oæ ¡³*ée¨Á¬T¶’^žwpao6•õj[WY¯,¬Y/Ià}Ú–œ ë½kZ3h•õJpºÞp=ÑõjßÒ«ê!½¾h.ÎzÒKƒnNvMº¥Â^鮪ë•¶t½úU×KÏed£Ë™G½Š¨õ²==±õêžZE½:º(êåþ^ºNù£¢^>ßÑúÆÎžõò7‘¢›²^~UšÂvc¯žõ²Ø žÊ|l”õêöee½ì/42šû0•õâ7/oKáY¯ŽÊzÃyövº¯r­=OkÑöê#ìåm^hƒ3ÛVØËßd¨9×[ê^¤ÁK¨ô}¶ §S÷~“. ¨ <‘]DÔ{P®u®jÈ‚z¹ÿ¿œ[þ¢¬W7R¨.DÇÍðA|pY¼é²^Á÷â½-ÝU\Vé;¶)‚¸àróäÞ× ¤—¹¹ÛM0Á¥Ð‘Å?½œ©]»¸¼‚^Î éÚ¸Lw=èe±êÜVæ ¤—åÒ¸n0)©€^ST´ñrOÝ®½'×g¬tb^!ð¼ú»qk•Y1ë0ïAóŒß¹^¯ˆyÑE «Q›HñÆ%Ⱥ‘žâËvÅôtBPo«†ub+ũ՗®ðÈ2Ó´ÊôÖ¸Rú9Zã–‘WœŸYã:3ÅàŒË"j×'¶ £È6Cóõθ™»~òñfÊ@·G™ï‹3®”¦Vg\©Oq-€ÌjMœqÕxUìq¹ ”>þsCg°Ç=-lž,ö¸#É;mË7b˱¹§y%Š=.š ǦѸÚãJ½fµÇåþXÒéòæ¾@whÚu%³˜uV‹º £ >©â« Süq¥·Ú`•íùlðIõx–·iYgnžÎÇdgçhe&èwV̱Öyãf*É—ÔCYÄÊêöå{&˪ÕÞÊÀ#Ùã}á¶öºÌq¹d”YàëÍ÷f·ÂP•fÛ {°Î(^Ð,C«@–eîñîé (@–ÿpWÕº“øãrïÕyõÛ¯÷Çít‘ïK8)þ¸RÍZýqÕ”[`,Ë…S bÞb«ÝZ réýÁzÛó¡Å · ¥fq¨¹œ†]øSz·X`jCÉêdeâ+ù‹:ä².÷IÆ7‡\¦ó7’¦‡¯Ç°t‹¤¬«~â‘Ëýy‰ݯaÃżE.{ ^M2\a:¦ ¾¦Õ¬ƒ°Œ SŽ’^Á†gž–Ö´˜áíοôVj¤¨C.ó¢;Y%á¯!8{üÊÏQÑÏ/Óûz‡\šÅ!7se¥˜S:ärgûãÃ"ð•f¡wMg1»_‰’¸iŲ5É%DÀ+³ÔKà+ç›T*ïôì••ü΋@ròUï’KùÖ£ôÊ´íIl}ôʇ›Å‚ßD¶¡mx•é¡r׃k±òÊix+ày_â }Ìv#àUkÉ xeâÅa^ÈëI(›…ü…¼žœÑ<Êj±Êõ³˜Å]1°Þn9I°ëI…ñΖŸÕ*—êêZÍ’×a×ø“Žºâî{ïLZ­rIOi†\Þ­rµ†¹RWv&ŠÝ§ S­rƒÏ¬‡®Ü~Ñ}s^¹>¼)s¥rÚ,ƒU._6nüœÞ®j•Kp…x×ÍF×3ד`¬]œ¤½Þ¬r…«\n]¬k-Xår#èµre®‰£uj÷L”•¹òµXÌé¢2W-%%Ìõde`×›‰]DJ°«jI»rè?¹M"Â]+Gн£ý——+tœíÆéßÀk£ãÚÇòÊ) &2h‡ož¹lO7åÒòcKnÍvB„òcÜØS–3Q(?ÆÆ¼÷î…òc•Ó•6M(?NóåÇÈX‚d–¯RôZ‰ èóIý±Î ?W}¯?FÿÑm*¨õÇn~¹Ö¯ú昫@FK]´NYew 2©q£%ÈdÒ!ЕÆ‘·ª[JÄò7­~_¹j$%Ȥȇ– SÒ%%Èdê#¼Uç7‚[™87Ì.æÞ3¡­œ“ Læ7ïÜÅÌ4~ÒJÑûÚv" Uû•³Ò§áÞ¤^8ëÉFkO¬@æŠ f•23ŠY}†+”U3;OY¥I¬?†Èt/§]¬ô(Á‹“y-@†ûáæµéç¯Ètñ’æ„Ø «–ŠÑdZBN ‘¿ÞËŠÙCVVþºùUî÷úc¡j™c¬­Íð;³ …úc¾ŸÔ#­a%"»?ÏX*SjVAAêIÙ¹X ã*†§¾l6cÅY!¿ €¤þÞFEsŠõÇN*;–©­c¬4º~Ì0”±ÒyÉÙäzÆ*=JU³Ì˜îã^õ€B2A³²J„Í,s)  »È—§¬ Ú¨%÷Š5àJ…¬wºN[³UÈÊ›}ܹd­s‹æ¿˜²²³<Ïœÿ‡dHüîõ=eå$a&þÊÊYL¿ ó_ï”ÕÃ"Á¬Z‚D0«ò„€Y}Íš€Y}‰4á¬:ÎJ¯Ö{¥‘³2”.ç‹Pì¸ÎUA9«–Ϊp/pVËb²“¥`m*!œ•ÛQhýo&´êzÐzÐ;îZ8­ZfK@«ò¶P†ì Œ³"E Ž%¿ibà få*úTÙ½"f„î)ë˜Ì³¼ð¼cOYZ7.¯ ¥¬Z[N(+ F%›Ø{Êʲ¥TH,òé)«V_ UÈ ÂY;ÊÎòUÈ$ªGÊ*¼ÔQVŽ-,þ{•wÊÊù Ò›â†*d®:©V!c™¥Ö}Ê*Cì/­B&ÕI=iå¨Ãiãžj2÷sR… ÷ýնz'UÈп¤ZeXÖ"dtdï,–"d±=få¨ÇõßU¯Ì_›óˆU×±*eÄŠnÃ(j=Jk"nº–tMJ!Y©T@NTê«0Àª=T«Æ¬œÓÌï¯XCDÀzÒÈ™âìùX°rNxô%õÈth“ d¤•y;Ô]Õr…‚Wm*/‡Wu‰D*I²­t•yü‰èzS¸†wat•ÙócêØXž&tõ¤¯+:^^n]UŒï˱" 6æÝ pÕWЏªå)®† ^¾ü˜ô)?¦ëUž¬råÚÙ~Kõ1™†ÆêcNM¹ê¶BQ¨*Uµ„©º¹W$ª¾N­ÔÃ;Áäo!øºcº/@ êA‡ËI¨ržý(¶P ɘT åÊN}C¦‡Ë.š¢ÝÅ«X™Ãž_{bJS#Þt054a©¢/Ѳc¬*ˆ©%ož¥0®UÇ¡ ®b«ŠRg.ŠÚú›W.ª½UãL mýÍ—V»U¨:FOí½{+ Ô2’e [ó˜C©|ÇèusvûVsìx$™¤"ø<úîRé*yžý¬Ÿ€Tº¨µõ.¤ò¶èÝr•wÊš€”Îÿ­æ®½ `+H¥Þ½ü˜ÐÓsT#†’cÜhr®ÈöVr¬ Qø|Ï|½£‚ŸFÙ1LÒ_˜€½~þÒ4";ÿ4ppüáO?üû÷ÀZ¦—ÈcÓ"ýì6¾‹Û%[¶+ާü/ˆ>Lº2sýë´1¿¼Ç^ºßú˜¯˜þÚëcÈiûì?¾öækß}ý™†äF„¿è ÌËÏ?~éÕç+Ÿ—·?rýù‹?ýû c¹²‚aCKLOÛ3$7¸£úÑ|P~wâÿ¸hñúñç×þyÈ?¾þåõãïøçé…Z宥‘e¤_~¥üíWbrGû¿šÛØóÿ ¯T㕾 Dî°ü7—Š¿.Dúg¬Þëü‹ÆÙÚn"l{þŠ»I˜«œ†DâÝøc_3V²@+æHÄ)áfü¡/m•ùI¡¾¤ðÚ¡ÈM|mw£mDïæKÛȈÒFäf>k#¿ þ§Ô)®À€ÓiÍ2L•Û£0£{‹Êǫ̠üíÃŒ^æ*¼ ‰²ý·ËÜë2é×^SÌBé¥uö¿qü+¯ÓNê.h¶||òÖöej¼Ìß¹ÄÅÅ:VÆÅ¤®V̪¸ð.Áÿø×ŸþíçŸÿøç?ÿñ¯ýëþËŸÿòùW¯_´ó®R_ð@8Ç«uØgÄQ&QsS/[Ÿ©,Ï:Ƙ6v«+\ë0yyz³h™æ)Zëýq, Ÿ8S¤äfºòáPßÜQ·‘lå½­!Ò`m×¹ÏST§QO·Ú’7Y+ƒÜ˜wô–­é\çAÂ61SöæéüÑiÂ|ôyE&®òÒ¢†|q¼¢«nó´Û]Û42ße¼ðxËн²òÊcP!ÅÙê¨+³ *¸ìrõ²ž½{—5âë½\X‡ûÇñ‘¤²Zî\"­ôÕðµç\ù¦Ê͵A××6ëæ±{מãšÎÞ[P)o³Së?oóºœÅƒ(ãò´Þ@Gqk•åö–©]Å9¬QËI7¹0ËcÃZåž¼Ÿiñ…æÖX÷†µŸæÅšóWãYh5æ)‹Áßù«±Ã±Z¶;¹¿ÃkïÕVyìñW ¿™ç¯&w’Oç®VYÉì^æÞˆë»švaJ™7Ù÷c¹›2×L§¶Q¿»É~¥$“f*i#Œ_š²…í®ª9wµÊœ q}ª¶xìqW«C"šîn7Øw5éŸ.loµÊ Ç7÷ßñâwî"äpäÞ‘ã²Õ—h¥¾sW¶ÆR‹ŽcÉ™«U*Yëã˜×»œ¹ZxÁ£¢@~RßçKqæj¡ã³êÌÎ]Ùñi%2—ÀÑžœ¹È£úk¥9sµø|Ý™«ñƒbNtMcoÜ•3W‹¿y;s5ùõpÖj‘‚º³Vc,AC±¬¶QѰ2W>\Ÿ51Ç¡Ëy«±áïmsPãèº3Wä\™ò¦õÏ2×VË4VcC@ ÃKœ·^±Zå¿i¨3Ž5·‹ü}×ÿiìv;u¥d–½ÂÂ'—ºJ*‰ŒÌ«1êrÿíT¸ã[:c5>Öu/×ñÆ:m>u½h¢oclKÎXÁ‡û¡æ\„Çž½|¬¢öè•Ëb?©kçVæ%8kœ¶<©«ä‹Å=wêUg¬V)ôêÜ乎=ûù(]g-éÃŽ5ç¬ï¥;g5JB!tfj;¸vJ3©@ÏG™Ã“»rÅ3­j8<æŠ&Óìl«¿•OîJ'7º89h<+¼R?Ž&;u± ó)ç­†´më*óa>åŠ&‹ã*=ÞjlzޅĹâ‡g̨¤h²«$Û†xkó_©/Ýz÷E“/V=ÃÈ5»iï®hr㈟¯d—»µh2]…Û¬±Ñ(0{Š&K)ßFaúc®Æñ¸r|<õ’EðP^­­<<æŠ&K­Äv_¾hòEMÜ*/ÕÆ–âm`Yé'SWR?6¤ÞOZŸ‡Rý𘫱Ëvyi”?xs5g Æc®l2wÞ–²ÚÅ Þ%XžïöæjâéÔ¹[xç¯,6:ÇÀÎ=bOÙdqíc¿ÏvWC|d#:FÕ1©zÊ&sŒ*Ük>/wiÙdWî¯Ñ—Mf–„8Çý~_6YŠ óØ´Wÿ©iÍ$;W§{5ô‡+­¼±ÍíóC“ÂC­fC½q rý2¾f­Î^•`1)éöBno¯6PßÚ‡Ó©~xìÕ¸#2­€Ù)kxìÕ2uê{µN Åšë÷óòöj´Ï§YÃi‡ž~TôÔ] ¬Í}Û]}p‹Bq¼»ÚžÙ~±zo5îíÊkŠŠ˜ê½Õ¨ÊÕ9o5JIò*êØ¹ŸîñVã‹Ý¨:fUÎ[úð]|ƒ­÷Vc—ÈkßDO§÷Vã£nVÉcÎ[ ÝcœÄiç­ÆûÊ+÷t9}ÊØ2R×1]I ÞêÜ…{ÐQ>…@çÞUŽÆ¢Y #çS}ßF¿UCãïìïS¼Ë‚Éœ€×÷3;wq¥»˜¢¥¼wä ÝÅÄîdÆ‘Á}à.Õì´…^¿èè.Þ*AÁ't—Ç0¯è-t7Qß÷îl¡»,ÁŒTâºQ½|¹dú×o‹Q¡»,ÏœËVÕ ÝMì2Ükdej<ÝÅM°üÞ®eîénb=¶oÐ]–|>Ûƒ?=ÝeYçNkb«Wîé.AúÁÝ*ÍîÓÑÝÄm-Yéùwù“…ÓÍòÐÃ]ò÷3íäÇÃ]‘ƾ“ž:¸Ë«¡‹Ø\OØ.O;Y!É*§{¶ËJ,ˆøXó¥ÞMŒáyc/Á»o§8¼Ëc—Ûyëùn:=ÏkQrÇwßNó|—²ìÊ­Âwù“ùN«Ü¹ð]VÀN¬d67*ß屓«lABø. ÃÈ…§‡–ðÝñ›í\n®Êwu |—çe†¾)CVÀË}4nKµÞ›z÷¶¼¡ðR}‰öneXðÒ»æ@È4ûK¼Üæ6lz§*Q/µ“Ü :çÏÂwé—BgÌ2•ïÆcŽïr·S˜bŒÔóÝxšç»Ü“Ä*!§ádÏw©ÄÆÔÚª ,ÂKÉ~îu%˜Bx©{ïçÚ‚¥„—2gú³ÂËÕv V!¼”£³lÚLƒðrë_ñÜÕ£€7<—^nÑ:L­x—Zßm¸¬t—–LöÖ1Ow™d_3ÚW¤»Ì•ihGð.óï m–˜Q¼Þ¤àÝø›ïò èµ7•¤Šw9O¸²Ú>öà]æû'w¾Î§ótWËØ+Ýe>G›ŸˆÔã]Jò™ýOé¸^>9ëk\Ÿ^*»ï]ËP/ƒ ‹!}¼ÜŽÑJ?gî¹/ï‚{gy,¼Lkî¾îÏã]¶:ÎjŒå ÞÕUVÁ»ÜHrìb„ŠwÇ, =gÊÔïÆcïjš®x7<œà]ªçïcmOQ¼‹ó:ëÙÞ†Œ=Þå–êŽyç´ŒV¾Ëý`iW¼S¾Ë*%sÕSù.wUm íJx¹ÏøÜEÚ”ðr0¡yÔ4ÊRÂËûwR!¼Ü"G6QŒ2{ÆËceW"TÈË͘cF?a­‡¼ÜiXvuC…¼¸ËÒvÝV…¼|ºs—‰TÈ«sN…¼úa„ñÆÓ<ã ½X/§B©󊌗GÓÍ©kPÆËfþÈñ•ñòuîÌK/ßfÛ¥Aò²%_×Úª—ĺ & äe@©Év)ã½u[6Êxi(ðì5PÆÚʼ:ÅS̸b^:!œÕfHo˜w¯ÁxÆËwÄ/-4ì/oïÚåj•ñx†¨?½¿”ñªI‹2Þp=a¼l?¸c3ÎVÆË×xîÄÊxù±Ë.a¯Œ7þ¦g¼œ¹—]}Uo<Ï3ÞЄñò>ï]PToèÆÂxC8ÆËߤqå¬&¤Œ—¨àÞ%–…ñ²©<ÆWÊx•>(ãå¶:ê/æ¾fe¼a0ÆËH‰È*Ò(ãefXòô”RÂË&tí’ Jx9š§PÏwÙ ÏU”Uñ.÷©vj¼f¿¼Ë|¥‚«aLwÉIYñ{š=+Þ¥íʱ‹Æ+Þ àÝðl‚wéIOÃò‚w¹Ž#MJ x7dû‚wù#>>Ã»Š‡ï2§>93k¼Ë/ÇÝòs¹Bñ.Ÿi†sÅ»œã »59â]Î/X‹Ä€ŸÇ» ¢ï`—#mÞÅ‚ìò­ÞËL_¹®æì‹ë†É¬r]z]à:ynO \—6F÷e÷ç©.·&„žnPÚQ]!—ÛÜS]nËcM»)ÕåMöë^'”êr(ãEŠ=ÕåyoÀ¼ó”êÞÃà´çëÞ,¯¹ …)Öå,¾ß«¶’b]n”äîVƒÏžëò´z­úšÊué4‚‹›ßr]‚´°c:Ç+×%l8ú*­¤\—í'æäG¹nhrÂu¹0J>63áº4‰n ´:®¾¬çºilÚÌÅf *šçËcŽ…¯ÀuÉ‘0E[N5CvÏmu¯§oäo3p{¼Õ’÷ÏïSýð2 Ú„S /¹íNâeÞù‰VÄËŠO{./ü=¿RÄ{2IÜ5šÞoÂͤ¹ /KÄn?ú€xYi-hÎâ¥r›ŒÄK˘Güªˆ—_nïŽC¼ÖcÒ+"Þc$›3³SÀ;¼‰Ô7ƒÔx)¿äfÜôNxŠüfäxEùnJ áe:S³eLðÒ†75]“âEzˆ?N UïÒ¢ oò·€W“ð*=Q¯hªàeÎÂÞnÕs¼‚7þ¤Þ“õîUH¼#{¥ aRU¯àí̶õ«*xùÔ˜+mi¬'¼ºÌ¦ Þ±K¿l /„—ÍÉðœÊwû°,iÖ´¨xƒ`UT¼ô­+§•:*^fíÈ5fAmżt =Êç˜7Š`EÅKšQž§*^Z¼äµ d¼´&Ù®ŒAÆ«_Ne¼ùÊ’!/íB S¤ù|óâUŸhÎVD9yÑ4–<>vB^Ýœ B^®á6“†%¯6"Oy í·Ùqò’¢ÕÔfqï7)ïÅùµA•òêö“òÒMî^ öAÊ‹>uï…ß åeж ƒ”WÙ+yIPÎUjðMËë_°Šyi.Å¢l×b¹NÌÛøükµ[9/-¾è lK+*æÕ{Q1/­pw1·71¯ß–¡b^N;Ó*mļú6£˜w™ŠFÎÚ–jy©qì{4V-/ Èr[P$jy1µÄ™¦Ø jÞ„Xv\&Ëõ ·ŒÄ°o¥¯Wó2ò!K²Æ¥j^.K" ç­Øuj^Ýê¡j^æ‰{õýôâK±ÕÜn½,ÇRÑL}ëIoP„ é-œœî©¾ zYC¨,¤ è%.ÀÅM‹¯‚^f,8é3Ø[FV’m³Vôr>]WIÊ è¥˜lK*‚ W7¤¨ ÷¤Ã^]K/*èÕ¶®‚^fãèô¶=I½Ã/›6hóÙEÐKT˜ÛõDÐKâ8âê:‹ —_u ß½a3ŽÀ^¼Oüy‹?UЫ‘J½ª½ØÆ½aÿƒ zu’ôºP`o‘÷†F$¸7lcóÀW#°ß à[h–Vé:¾l^×Xžx¾ì™7"â1C¢ß°;@€¯îàÞ˯M­ˆ­7 ï%Qi´Ô<ßy//#»¶|\x//Ç2Š?¸7DYÁ½Tôsl[ h{©x\:÷"Tólžâ^|Öë3Ü‹ct²:—‚{u›¡B_ÏúÊ“+ôÕaw‹y¹Md{ÿ+óÕ`¢ÌWã¯RßpúÊ®De¾…ùY¿îITÉ«÷(ÈWwµª—͇5¦\G‘¯&Š|µ/*òÕÍ,Š|ÙF0‰YŒ\˜o¼žg¾º÷M™¯fÍÊ|uW„2ßÂùÛ­)óÕ€®Ì—»'zß¾!ßø:=òÕ˜¡R^¾ÍÇ V¥¼ìüèS¶Ÿ_¥¼:wR^6á+5Ó¸/ò=6ò=ñï¿÷&Š¿Ï©ÒŒ¼—sÛ{´~ ¬ú±«p ï=tÞà×@Ã>–4˜¤K èÃw"ö™ñ‚6Ðö^¥7Ô°#D=×ÄG tQ76pJpîMçjØ@?hV­¼æ1oØ …݃aƒ–8WÃÒ-.æõwÆPÆ\ X=‰e©gmm1l`’ñˆíÔ°u&æøO h¦?u•jØP¸Mv-‘¨aƒìS † ÒY X]c×ËRäÛXçb×o|<Gª^é+)È—Sy„DÅ)òeZÉ0pÛ“yäËý$TIÍáBoãh~"5߯ òe†›vmE¾áE ò¥ë.•ï– eÖK¾l£Ü”´dÂø¶<`G>Lb+À—îÁu) ƒe)[šqVÏ|Õ'A™/¿ “qŠÀ|ùñà—%…g¾|¾§6£2ß`,®´•æùr­s—mÈ—ÙÉ.z*Ì—T„…a¦n_™/{§wÚðs(¾ P¹-]†_.sqùØ$r|+gç-Sà{3«]{*ð¥1}]KÒJ|ùÕ..MÐ%Ä7 uJ|uI‰¯U)ñeÖs°GΘ"Ä—ý1ïýnJ|µ+ñEgAê´ÄžB|Ã+âÂÍ"¾ñ¯=ñ I'¾”yä²f­|yˆ3¾´Îø†TC€oÈø†‘E€¯f |YÙb ZÊßøjSÞ½ð^“Ì…{á½ ÛO] å½!¤ ï×ó¼Wg彜L>Eä”÷2: !®U5pàvònBÅàßP‰H÷:„àÞp+÷†õcõo`±¨]´+ø7ð7ØÎí'=îm”‚´­=5Ü«²àßÀ"Ëô‰yEØ;êŽp.º ļ‹ð¶çVÍdÚÌ÷X£º½Ã^N(ŸÅ {é À’Ç6hpæ ,2u=fÞ@LÎé‰ Þ ÛºÓ³âÝ &Á»¡0A5P^öò63ëEÜïÞ jn¼Âã‰wKµÏŽìO;¥½ê…¼ –²ùÿ(íUT Þ Tr±ÙåÄ»µØ÷ÆçàÝ ½ÿÍ»¡?UÜïÖÞ•>”÷²lfߢ>å½èé{e=ÞU¾¤3\šÏåpoc‰IÌärö¼Ljž–(Ux/g-\ò?>Ñø²åQoXP€/­3¨´YZ]|y¬".Ã|•6)ð¿)Ö '3Àlƒf0oÚÌXÅIfÞ`gÞ@á(«†ÛybÞ0ØéT3ë†ÂX³VQ‚u3³±Aþ÷ŽÒ¦÷ù íe¬%¶µ. Æ ¢ŠSÚ‹cüõcnáUÚKyÙÿ„J{O¯kYß Ú«¶ßgÜp:ï7Yó"ˆÑ—ª¼+{Y±íש¤—¶ý_æžš@!½ÕvUÒ«»Ï#éí,–S?³æ•oÖ¼nï^°æeA̧~M´æ=i/znÛ^GzY÷fâ‹W$½è³hÑK ÿfÍ[Ó±wì é¥({¯N#ÙbÍËw»J ½ÞB" ‹3o§©R=×+ЋyŽÏ÷zã¼,?±«XÎK·Ð”–»­Ç¼Ë.Û—€yÊÌKùÅ.)0¯À¶ó– .ËÁU1/'×a`óú+PÞpžò6.¾®Müòâu QòlÌByYa™Rmë*ìeV„ËåA+œWÆÿÀyY*}WÜ œW‰bÞÊ"óË£ `ÞÑ71^所óÒç©&¥˜—%nóÚ„0/ë&×ÓHUÀ¼\ BÜ:¶m¯Ã¼ÌKt.+`ἬI†ôÍvÅ*ç%.8—Ä›´—‹½kf´½®ûm/gøhzKIì9/Ž­¯1/O£•¨áœGÚ[¹¼W¶ÿ®“öŽ¢-[G$œÇ:ƉµÿN¥½¬àÉá%½qÞx"íEjO7Y›— ç ?)œ—ùwؤF8¯ŽgAÚ˘Š4kIf=çõƒ®bÞQNü®k;¿`ÞΠ-K”`Þ {„Õ¼A·9 æíÌé·ÃbÞN%÷Y-ÌË·I‰«©óò7'`™(×c^.ˆ_Û†ÁcÞÎIÆ·6§²ó†'˜—É+‡æO„½áñóöYI|{ææiBÒÞÝøu¸9ÇD`‚yU¯˜—mÚÆ•<æUC7żŒQ5á¼<–÷¼"Tb;F{º" èåyý,nîAo¡+Û…7=ŸA@/ÎcÆÞ—õ —Lj–(Ö×b;ø¬uË€=êÅiéÚqŠzq+Ñj•¤Ô‹cxGÍ ­êå½·¼Êþ*êåîItv3¥Ò[Ð@óvîSÒKâˆ;o¶‚åIo¡CýÍ﬷С>- ޲^Þ:E3/¬—e‘1/iXtpÀ˜²&ZÊz M1ºT[úÖ[8¤oè<äQ//W¶—¢Þðx‚z «i­y’^nœ&éŸcãOÞ^ŒÏ éåÇÎÛÖOI/·j›˜*çåÏw\»|âÓ@íúèî ›yÎ[8QLi1.á¼á&…ôò]»z´’Þx¹eß ¡Kݘ+2’˜¯€Þ1µ9×h¤¤WI¸7P½öcKÝè¬so;(!½ÚsÔ¼$¯M zÃÇVó:°í zÙ*êãB, —ߦm·L½Á'FH/åZ“ÌËŠðRlõM0ot¦ð˜7>œÇ¼l“uû—*æåM¶ÇGVÂ1yýxÌËžBµÎªüæ9o<Ís^\îž5BÞmzà `œ7D unÐö*œ—Ž"ÅÜO•òò{ÒÐÒHº§¼8©²![«Æ »YÆ`¹%xÊ˃‚h[ÝQã¼î{×E Æ š¨qó¯m?Œ4‹Î në}pnPounà(ƒïf]NhôŒ$ÉtéjÝ  ›Z7ˆÍB°nH”T_÷ÜŸ¬œW½ æ•âïÖ “¨mÃEæÒ7Û"en­7ý¯Ø6 D`nëÖo¾ ®–êãÛ)êÚbJò ÞÇæÉѸáqy3nØ¥?Õ·'Mïèwã†`Î ¾ 2Û}3npåIƒqšÒy®¢TÁ¸A ã?~ómp #ø6ˆ;LðmzÆÁ·!œ'¾ GTÆKÙ6뛬ºn^ÈKjÆŽjÒ`õm ®­ñÜ~¼Î·A*Qã™3ªqƒ˜¹)åå.Š^ÍL<ø609‘*ÌC^ÈÛf÷²ÙÅòRõ×n…ï³møiTa#nAä~áo¸u-—;þ8)5?üé‡ÿ”Ìe²—¨…Ó¢þôF¾ _Û5ñt?»þ¯x£I{NûsºŸ¬ö·¿~éëêüÓ×^ãË~öù篽þxßã{ÛË3‡ÿ’7?¯Í?|é•Ç»ž×žüäêó÷~úwQ•*]C³Ëѱsa¥_¼úÑŒ`~wâÿ0ä×?¿þðHCþñõ/¯ÿÃ?ÿø ¯CúÁò7‘õ/¾RþŽ+]ôŸÁ8„T³ýò+Õx¥/ˆ‰;ÿÆmeÇÞ/Œ‰þ)ûÓ2¿l¢ïRÒØ»nBŽ}ÍÝžVy<Þ?ö5£#ëóL´R‹7ã}q»lϨñ%­d\=4½¯m'ó~´¡„ûùÒ–2C…4½ÏÚÊ/q0¢1þÜFfM ÏÎÄòŸW™áù›‡p™ÿŸ·³éÙÜF®ö~~E/ß,bè‹’¸ MÖÞ7I&@bcàÿ¿xë¢XdÒ=3vóA¦ÝlÝú¢ÈâÅS§l~³ÓP+Ý"ø×iªŸfû£ç±õç`˜–Mþ×ÏsüÁó°¶Ø ·bIõ×OsæÓüSêNÙ¿¾¥• _6v0ÓøŸöûÿùçÿúùçŸ~ùå§_ýõ§ŸÿòËý¿ÇšD¿ý¦Ä탃6R ‰q¬šòd³±÷=+n£}U|7\ögrépe;68Èã̈ôaVô@zymSÚÆ6m+¢¼íC)×ò?«W<¦xÅâU0oô&4ÂÜ×òhàï¦0® ‰;=Ëj*ÊÏ:ÉÅ>ÅÃUÀ´Q5z¸›óœ‹nò²ÎËù­é 1×5jÚ‡¡áB€3ÀØ­å+ˆ°†KêÝŠË Å8Û.ó‘ƒÜˆÆ&ú»¦>}ttˆî ñA³ÒÓ+ïVÌn™R¿e?˜T›E!0×Ñ%æw=bÁN©—JÛdó±âè]KTF†"b7·‹&’3ÐW÷èz³"l`@ñ0S+n;ú¸zþmAõŒ ¨¸¢ãFñ0j@Géû•÷45î|È W+’‡™@E­Úá'kÿ>&6Ps`9z.=mAjLò§=ág'².{D¤Ž`Þù@ằü‰$÷T:¨¤-]‹[l×åŒùÈ|æï§ÄWò+êr…Ôj‘kÕåŽG$ûÖ¾‰˜+D°%O’ëåû’ÞV©l>xkvß°¦mÊÑìêYÕô=Ýjƒ€¤ڌ൪Úsß–LàuóÚÖu=¢=J¥òÛò¼›5bN»ô4½ºm!Ù—ò»ïcÔõŒIx’[ü,•›ñ€Í¼×“³S×;Z‘ˆ·ƒuŽhÀÆéíÛ{´… [IÒ¬6Ìk3ÉO­Û“Q$Ù¿n[4‘,æŠÞpÚK1VŸ^¼n’Øz£­óýằŒ„˜.Ø,]~¦®Ïn*šo°w¹~e¥9Œ¤€‰]j0 ¬ŒP{·«¢)˜x¥_s <¬ûÝ.к-w·°[ž ú‹û­úNö"|ö¢-gX{eÀºïÑÏ1ÈíCnÝè4¡w·—˜¡-¥Fê~ÆŠ(è¸ìjŸ¬|Ú‚mˆýèôÚ‡ÇEæ Ðƒ×ã«ûðAfˆ×¯{iÑìÀØó~ö<í,Ñ“ŽRBW¯¯MÓÜVn2ÊÝkÅU‚•éÞbO  ¯ÚzlS›¼iê±ÇTähiS¨¹p`ò͈z”°Ÿ«Euh ~)üü¹ö=‘zœQ†ºÿÀv•“Ç5µaô­¥ûÖoàÎkâÍdÝ!"‰3 mÁª$‚ðZU~†j;öV¢¿¢Tͱ‘4¨$´L’ýóèB$Õ}h›Û $»?µ ZÓT¥fK-%H Ô ¶ZÖ‚;⦭çÝÖæ°\ÖBIX`ò“­EòØÅ1¨²ÉMƒ“^m"¤ÑWqÛàÊ&àZÅUª¢U‹fnÁs®6]Ùè¬+Úöí>³;lshE‹e}®ó§8j‰A\mbοÒ]ÙŠšC+iÏ£zm¡&’§Ý¶ç'¯àš—~ñ«P©b$c~ªÑ×þÃòÚ^aðÄ´¿XúVo½Öµª·mÅ +:¡Ú”éåâí®BÔšæ>Ʋ9¼ŠÙlm棿Šó*mÁN ¼*§˜Ã«xwV¾ˆ9¼Šk[eš|º{¼=w¨^Éû18'Òv‹ëÄ,ëU‰4æðªO9¾Š‹]%‘¾†.ÜŒ+ ñs|¥¡O~9m3tÅ„dÅ>‰ÝbQ$^ˆ•ÜÅ9²{xÕñJ¾`t¤Zjc×Ï=\ÑïP?­û >d²/^©]>¶£”¿’[6•Öâ"X[•ÂÑeůRíyް ‘¦ç¼ÙÓÖ_²×HÛŒ^­V¤w~œœ‘ll^ÿ¼’l4GXÙ;¯¬å¦ª½Rz)}r¤vêܦ'<´‰ O ”ùi b)[©DÍ‚CfijåÑœýíÈ^¶¾V±Ò[E>§¸;.Æž­1䟕ž2imk*ÇÛ¶X$éf3ùì*AkÜcá’&s»zlŽávŒa¥÷µÆ ÙçOk,1Š•ŠgÖx†0–g}€yã¥'RXÎgfÎûbkPËK=´Ö4¬õ’MŽës#ëåe×Q.Ô×(ûârÖ¸ÅV?2Ý£Z]ê]µÆPÿ‹%;u‰–5±*ËŸ{ëËk,Qx!Y"XÑÆŒ”›DÝíx† k¼¢N<_Ð"[-×À«yB[ ‚Xë„ØV•@L=1„eqK÷yÞr‡)Ц¤±Å¾O(×Åu£Ôóƒ*ptŠŠ&»ý}­½ž¹5î!Âeþ]ìnýÁÙÂÊ"£Åæîòîu¶² A®xÇZãÉÌ·`–ðÜçasîãcЃ”aÜr÷Â'Öˆªmy RIkŒYé]m5ˆ’¥:ŽõkÒ lP"toO[…G߃œ']ÎŽ©Ži3ENt²ÆMÓrë·FŒw{>ËÀ”„”4–YÊG¢Ö«Íéª)[pí¤HÍ3²(eëªaØÎ¥YOm5¢iË!Ñ;E|ìȵMH伯…ûl`0øof~_¢¢Wù;ù‰Šíå^V}íöµÓ“U#k mÆYn·VŒŒ¡Žtp¥ÀâUž‚° If8°û–ðÖ€ªOaŠ€­7ۈBàeÛ64Ri#]­×:lmwùØ'xÙ^ž#Øšð&裗Pà­± JãoÙ¢#»R(0m•ôæn’)0mç½OoŽ˜6ôÁžÔ)ðÖF[*<-·+¹‡Ã‚0`Ú,ÚqGaÀ´ÙèãInBÛq¬Î;ˆغºÝÝ0¥è ˜#–}¨~@0m¶ðÚO‘ÓTž¤­‡½ÜC_ø”Yœ¶A„sÜRö«'Ì nm¨L{l90¿yºšZ(0-6^x˜PàtT`À´0î9ªžøusÐ}ÙìÖƒåˆi#ï£ï¦í@üÈŠ·¶òäO 1ðÖò¬lÈwd»ÆÒòä`»"ÁÀ´-Û„ßó›6Ú迾00m7ÿN "¦í:7¨ ¦ ü½tØ@p;ß2|š\®†OÖ¤úÓ¶œÅ¹‚iÛ®áµ$ ˜6[æ×î- ØnÜ^¢M!KnׂCãÝÁlÁ´¡ÐtØAp»–c8+ Î×Apk«Ã‚N@0×Y×aá*,¸=—Õýµ" nGÝÃ-¢`š0É}R…Ód“›»· nÏË.²§ ¦í¸†°àöÖa†',øõ›Ó¶ŸsŸ$²`ÚÊu´gžPpn $xk*ÜQÅJHp~Ê‘·†eß@p»< x–Ç[Z@p{ëpÖœo+‚à|‘Óf×à…·6„øpDÌs½‡K¤àv>Öˆk?_@Á´-÷0vÌo^ð×GÁ,,øu¾À‚i»Á¥?LS!§¬3ÀÀ‚ÛÙ Á¾Pp3ø=æ¾WDÁ´íddÝÍÜnnŽÉÂ‚Û ”aM),8ßxdÁíæljtHYpk³›ëe·{ gæÙuü:_€Á¯¶ƒ›°`µÜE®>ªƒÛÃ_G™6Á´Yø6¶# Χ‰08iÓfê€y‹±«AÏÝ¡î£WÊÞ!ŠèmSղ雒çGap»N»â¦Ï½¦ó]!zM‡Ý!zei¼öaÁíÆ)õødƒÛa¶üîž1ƒó…DÜ~Q‘ƒÛ-¯é¸=¯©íˆá+¥ÀV—a nçkSq‡Ï%†¯,^6ÏIÌ ²˜Õ-e·óÑ÷Í‹¿žV€Áí×lØÝh§Ypîy‘狈,8÷ÊÈ‚óMGÜz%›}ƒ1²àüâ" ν9²àV¿ÒVFƒü:î 1lº½É‚SELÁíA_sÓ>Âàv¤¡‚À`ÚXêP`ð븃_ç 0˜¶BUœ¾Àˆ08?éƒswŽ08¿…ƒÛoZü².ýZ ÎðI;à¦o^ïÓõ" N( n¤G®½EÜ~µ¹/oœzƒâàvä¹{1zÅÁíižK·Rü¾Ø@ƒi¤¶lW¢( nû0HSü>2Ðàv=T‰¹:Î 4˜6ì|ûN’Ðà÷48u5¥ÁíŒìµžŽP ~=õHƒÓ‹VÜî²mÕt¢iðûœ¿A¤Áí ÁªÎt ~ÿl Áé»QüþÙFƒß?ý͸Ãe˜Ã+ ¦qÛìŸOŸ‹0øu²ƒßÓh³•›«+ ~Ý`„ÁiDQüz‘¿zV„Á¯žaðû‚& n·iaCßüêç¿A„Á¯‡ap{%e¹{¹P…Áígaw¯0¸I êê:À`×Ãù—_P€ÁX+.\Õ6Ààvä0³Püj‹¢üÁÚzƒöÜE£uók­ù~oêßçZ—ÄÁLLëSÛë%FÒz¹hãÍ…éÏTþâÂ×9ª‰¼¸ðµÛj¼ûZ*Nm‚†SÛ®Áu˜Ì kQ4L8FeEÃø!bè6• c£´ÏbI £}>gB® aŒ™öYÒIÐðÊG¿G¡‚†1{b¨ïmAà Ïe–‚4¼b|²›PAÃp¹³œdQ6LJv;­Ê†WüäF"¯ áF‡ÀGÑ0‡Ý6T-Ïa‘ Û,ºocóÄÉ0˜rX„(^Ø[×îw `˜ÃîQ"QÉ0(vÔ׿B†a©ÔzŠ€ašö±Ç©`xÁWlsŠ®`xi6[1 †­ªã=)]Ñð‚âÈW4¼0Ý3…ÃØX×kT Fé>—‚ãp W1…ÃÖ¶`’Ý¥‡¼^Ê„®§ 8Œ¥œ…b®;8ÌC¹‡„Âaê×w".`ÃëtÎ3 Û1Z %`Ø®ñÞ ·#æMíüâÓO “k°˺†yxÒ?¾" †í¦ìãÛ†¹øãú †ùMûS·¹W0¼à· `)`˜® yªÛ)¦[—m@üâˆÞË s¾óô" †é@h¤î.Í`ØÎgÿxé¹ †—æßp¿$Â[[}Q³DøCÓ £ík^  }a#`·­•ç{D» /蹆ٛ‚a#„3}GBÀp>.‚aÎg+÷ºwØÁpz&B†yÎÇpõP4Ìójʇhxž|” R4Œ I†uΆóÕßÉxw9Eä‰F_HzåpEÚ4£hØÚnÌDëùBÃÍ ùÑóÞ„ ³þ<¶‰c£H¸²ö²è9B†qlÁ6¿kô„ «°ª„ÕÂXTÂj|,*a¬›í t™‡ª„+ÃÄ=•ÓbumgCÓEe"VËg• ³È_mÒtým” W,ßö³O*VÛm• 3‡–á@¥:aæ^÷zpÕ ç¶¨æ|ë}=YA*®8Mo§&Vl• ƒCÎmùE&̦œÏÚéo” ÓfÁ««âD&\É–[x¥ 2a\„Öaƒ¥2a.ÝÞt©L˜7cAv×.¨L¸6éÕ'‘ Wô5ÃÙOe¸U·«S•0—2ã'‘ Û³yÒ®D&lM«Å„OU –mó§‹æ¶• ·±ç{•°zž«J˜6&ÍžL$*áæ¬{y8¬*áÔED%LÔI€]úqQ%Ì×á^¨*a\¥ì«Êã¨ÖüU ãàoÙãWQ kG‘0·î¡š(„S/…0m-èû4Q!¬UD!ŒKeX{ŒH„©ãÂæa­Q \qÇ]†ŠBœÌ»­oˆ@˜ŒÊRF‚Œ„Y]‡WST0>gT®{U Ìo>5e¶ÂéîD ,:TLr§…#Ôvü–pëB¤ýõ¡üѧ¾#ú`.ˆ»A.£>XË?$}°æ˜&0™¹&G…°çIaÚö{ß\)aÖ©öç^v( „+u‡6¯ÐœÂúŠ’@˜¥1jT\Š@˜á¦a‡¥?  Ö’'*ÆpÜú. IáJÞúêÙJI LããcÿA \Û<õÜ"ÆùJ]ßH˜F‹Î®®ÚIá×ÏF°}'QŸë8E ¬5;’@8? óª×­çœ%}°MCÌgw·â`o"œ†Ï¤棴€¸k¤’>˜#©àélRôÁ|ùöñ\ÎCEÌÏRFùî7(ú`nânöÝF}0ßI§]:ÕÁù©‰:˜Áí²s¬{¦‚2stKê`zTÌ0e³ã~¼xðkq°V¾Iâ`¾+[ºt'·$ægI<=ýg£88¿Jç]ÄÁùñˆ8˜#m Ù³#“8X‹È$q0ãÉÙç·@†š«{ÿêD¬ƒõ£ ޝþ{qð2pðjÿþ÷°`ÒÉ·‡Ád¬›°Ê‚ùÏ‘|œŒ"ö6«.Û§@Qð‚»§=†ý-¦âš³êMÛQ6ÅxÝIÁ¦s˜+…ÕÕµ` ‚™`Ãõš‰ QL¹û€¼<• à¥1?×ò( æÝ'æ’X0/Û6ÈsgÁ¸¥ž.ÞK0x±Ço{÷âT,¼(Á` n¡¸Q¬¼Kì"ðø°±m} y$¼bmâ:0…ÁxÌßCö 0XÑ›Â`%. ƒWºC'nZUIa°”…ÇЂbž©4XA’8Fp¶ÅÝ)”ëc¿¸É½¸q”úEPùiÞÄê!/â3&L}€kˆ%Ä/‚«<×~­ÛQ"§Ë=Ä/˜´œ^[ý"Òózü"S¬„Iü"8µao/¬oLÜ"ˆáÏQL@Ý"å«ÍîÏœ¥n‰0) –ÚZꑾ¸E¤—&nù:£a/†ì¬½;Wþç­DWˆIƒñ†°®c”mPÃJ˜±ÑÖÅübÁqÛÖë?«cD‚Šâ‘ 8FÐGXmn]A#*1– ¼OͦƒmİÙÈåBâ‘`¤8F¤J#xy¤òô,‡ÁJgÅ1‚‡E­æ¾xÍ48¸$%ŒeúaƒoçF˺ ;¤î4-#¤¾\‚ÁT3¸†^K#2:aúÉ:üÖÕ3B¾9qŒH½$³à9Ü ¶goÐå_B‚õÆ‹mÔ ÏSe ßeÆÀ¡‚_ÂÀñ–3&1ɳŽëûÌXŽKø˜Z‰>îQÐ$Q`ý¢:¶ó+ ¥À醅“56ú_øêæõß^XG,¥ÀÊË•+­Î8lh(¾ZäÒ _$LG²¡º¼ÔÁ¯»œî B`ª§Œò u!Sàrâ)‰ë`¦˜Š£ÜL¢À÷Ýs项BɵkÖ»S œ~S(°ÞºR`ݯQ ¬}E)0Ø÷~NNµÇ*Ö}¥À7#éÕS|Ö+T ¬³žR`=ŸR`Ý'Q œÎ'8]§``}J‚uçKI°¾!Á„£mSü±´ˆ$X¿%ÁzÊ‚YJŒª‰ëSQLÑ¢uøi) Ö%eÁº›£,Xû²`ý”ëÎ’Âàô›ƒµwvL§Q­%Á`Ø2 –¾a°¼‚ ƒåYf,óG¦Á2ªd,)Ó`»Í:ªe,óY¦ÁùœBƒ¥Ë$,¯0ÑàüðªÆ²r9JƒãÌa°Ì(KÏÏ0Xºi†ÁùHÁö2픋[m( –/#Ã`ï¨õ•a0eئiˆÂ`(æN€ÓOÙa°tý ƒÓW,ÃS†Áé2Ë'•a°ŒQ§Þ¡08=TÅÁé QœúŽâàô:ãì1J‚%¬NppꊃÓ}(NÀâ` kY`qô÷¥88õVÅÁ2eœÏ)88Ÿ³¤˜WîSppê]ʃSwUœ^æþ¨Rl­à5¿Ï-b áßå Ø®OÕ×—4XÒ¬^8xµGÜCû„ƒm%[ÖQ„^qðÆšcõ‚Bj¼P:v¿ý¸Èƒ“$Ty°…ĸo¬oeðßt¶xÜž6ÿr¶wäÙûŠƒ[ÏRD‚ƒ©²ä‘2 FÃÒK,) ¦žêÚS{ ¦äaí‰Ä‰‹:*±àC¦+,˜Ò^«-{Ÿwª,XPdr Pr KØä,hD}ƒSÉøn¬£¯ë䫯ÁíðÅ}GÕ8˜5Í>*ó©q0êëÆWWL* V¥‰³··X¨Õ™ƒsÜeïwïü5çߌÆÁ- \½¹³y·þM%,›íb¬›ZŠ‚ÓQ·j+ævW"Áqó^­ƒÕ‘:¡`-( æ‹]Ü?'¡`|@ÖaÇ*ÖÁºœP0Hs¹z115Öðƒä¿›( ¶ãÊåõÞ«0D̃ãF_BÁTÅ6ûzÁà¦r8lí$Fa°=F|N{ Ví‡ÂàŠ ÛN±¶¶ƒ‚Î}±¦08µª©û° ë£R /Yîâ 8ÂàV{wëìèL¿#Îè@Fa0…ÈlvìœIa°Þ¹Â`âˆâ“Q¢ÁÚI”§ã6URÈù„ë‹0;éöæz&G¢Áø}ìKp†3uN¢Jƒ-ˆ² ¥çtû`þÒ~èÚû Fw~»u¿Úë) Æsdé™& Sêt¶¶Š‚7[@üO›°`&­2Ýw…Û@¼Ûƒyü5TlÉÎE÷Ql7·­?_ªÂ` IlVt ÁOáÚËõ²NÏRp°>.ÁÁµ°·*Q¬¯Zp«‚ººB\0ýÓÔ®|T ,—¢DØÆaªm?Åþ¶·JlÜw-•§ã„c=r «V%ÂXl=K(á»ÌË7;Ëó¯Ëz•áƒ!@ÂX†¥ªa;Ë~N3\Â8©ØŠå©ï™€0e‘Ë0¼U ,…†Æ´Ýþ¿›;&%Ã>Ëû–úÄ ·êÍÅ3ú•S%{™fÅB„Ó­G"œî átB„µM‰0¾(Û°iV"¬/O‰°Þ¹aÂûÞ®òÛ-`–Þ7Y”Ûƒ¾‹+?Æõeñ•E"ÂTd?§ °a*¾—éH,DØnö*Ó'¸áôkB„2<<2a}oJ„õΔë)NÇ ¦ªõ°üKD˜y¤a&"Œ§ Ê›ó¶žbkò»Ûê+†ZQ†þ7į¬IF ªát)‘ã3PÖ«T"Œ)€îš Dx!¡ìì…Ö¾ DXߺá…LœâÎJ„S›áÔ&D˜ý{[Æ=Ä átB„Óý=D8=! _Ø9}„³µñòîM‰°¼¹L„[¥§eú,&´8Üß9áÓÖ Y2ÆOf¯#^‰0<»ûlg",ŸC&Âù!¶Fœ¹†¿E$Âìð×+ÚBŒx–…>db˜T¶•8þ nR!D¸!‚{ÎЄӓU$,ZFÂòÍd$,sGFÂø­=c8aæ×ݽÙ3ŽÑ@Â2xe"œÎذŒ1§þ¨@8=©o(F{€±Ëq§—¡@8_aùÀ3–1?áô(–±!apN½\ˆpz>J„Ó·£DØþ%ž}C¬+D˜âî×Õ†™§ÎªDÓ&âŽO@8=XÂé@áÁé]*NC–ò`fŸ'pÿöæÁiTÚk¥¿ÿAû9ÔZë»r)&ž¸»÷d~…ÂÉ£A 0åNOÍJP˜já§Ë½_Õäìš¼¾I‚Â8åÓ›8U“‹>J…O@Õ(þÛ©0KžÒ^ß2&¢Eãö´\¢¶ˆÔRëÉÉFTª''ÙA©žœ$¥¤zr‡-6 ºy‚Ô“³ÞŽëzWNçzröTë±¼DÂi­•À°® ËŠ;¡amRI9<ˆ–Úe¡¦œ}Cµo×¼jÊ­ûqljʱ*·8À%ÕRSŽù>]V¤5åXÆ]#A_kÊ]HhÜ:Õ”Kuꤦ+–i¹›kÊ-Çt`Õšr©$ÃŽWkÊQ€t˜2§šr,aYj]Þ6ëÊA¦ñ©Ô•Óúp¹®\¬§uåf;­)Çšqx5½jÊÝ×tØÓšrðS[N8Ä•šrôùé%'`øÆÉs£ ¦ÀÚ9 #µª ozõ N…Ù´ª¿ö˜{Šò†Å–{wÐ `ø¦*ÍÒ¦o ßLŠÃBèUUîÂ]|}{FäBzZUN/0W•³ocê}¥ªÜ‰â«Iß2fÂÀ½Ûí}¥ªœ–hÒªrçds×ã¶66×=W Ëi-4),Ç×>ö"Sa¹B è9X©°Üaá˜]H—ÐçÒrë1¦¤TZ®´Évé«K--§EÖ^¥åžõ'¥å´™ aºsÎ|©´œ–ÿÒÒr©Pœ”–ÃÂeõ$ÃQZÎ:ÆÝ˜þs…©´ÕÜ|ÒÒröeR¡Ò‚†oÛ™Zo>NKm9µ‡ªKkË‘8dCs—Kjm9æë`…Z[NKùhq9Êo^c;C«Ëa mÿü|ú¥V—Kµà¤ºœ½âmJÊ¥ºƒD©ã©ºÜ;ÛdÀ¡ºŸõ2Vþ¹ºÜn©CZ^N •iy9¶’û·¥ð«n›”—Ûˆ&sNååbŸÕúrú4µ¾œMÓúrZ)KëËi³r(9ëËiQª\_.–Ëõå,ê\|þÖúröø™Û{:½bj3ásÒc‰\_.–BÌquHÇ2"©ºœÍGuˆ%Ru9-ΦÕå&ÇØG“òr Sc/ç!|8ÕÓêrÌô,fžùJø0¥¬ÌöOÕåæÇÁè„ke°Wq9ûì£zãáTgSðpª¸¥Åå´¢˜àa-ô¤xXÊ`)¦´ÖÊz¯÷Ða­Ã¤t8•ÓÚr…êð¾/p8õ:ÃÜ3‰]ü*p˜û"åèþ`A›ÅݼE+ËÝ8GÖ®«T6¬…‹” S@§=µî_!•åÈyléOÏ-D6¬5¸„ SÍgÙFÍg©,gÿÌ~ãsi9)w¥l˜6û¾]¢/lXK)NwžK˰ž^¦¥åÐËí6 ¼Ý„S°TZéëVÜ¿FKË]6CØŠ¬»hi9)¥l˜‘`qUGÃ'€ÑÄïoçî Xr¹ƒT–“n’+Ë¥Z]¹´SþQjm¹x¹´œÅŽe,êÖ‡’KËIoy—– Åcsi9y¹´œ<¼TZ.•©’Òr˜ßØ]œ~­RZ.ý¨––ËUÕRi¹Ð·si¹ý‡V@swSi¹sʹŃ”–³«eÕZïd8¿i--Çbt±/»§ÆçÒr«}4çðœH¥å¤ŽW.-§÷¨šlñϰ+É¥åÊ}íCóKË-·MË¥Uhi9ç¾x9{ ¸RZN&¤‡™E×VÓôÛ‹ ¿ Ö-Ÿ,µÈ´²\¾ØTYNªje¹TÔQ*ˑƗÛ]YRe¹\õ•¿?RÛbâ@í|ïW[ã—x»Öò¨—oå§÷| gÙ gRÙÂáu–êgÙþàiX¬,,‰XuýõÓì4ËûfL¸|xdã,g>Ëß9ùµ–+ú«³`ÛlË`Ì`Ó)øŸv‚ÿüóýüóO¿üòÓ¯¿þúÓÏç/ùßãÍ¥ß~S$#œx)w«¾‡Žàl°ó5áhåek\ÙÝ£RðS>—êÃÁ½|,íj}Æm!±ô,­Ë÷"o$º_ 9=SîráÇz^µë¥ÈGºF! IìE^õY„£™õQö¨Ä=¯»ãQEºÞkÏçm¶Ò.xY¡þ««’[c]G¡…$½s^ë$/—¾Ü#Ïœ‘ yá\î§ô.>Øw/"´`·9²!Ññ%o¼R¼vk´׋!À¹6—HXãS"wVμžãi´÷sy!õ÷Æ#&EŠœ5Úk¯C&ö`ÖxF‹l§³+:G·Æ¹§€¾ñ¥T­eËèÃùÈ+ì)pä2ê|®m¥½Ž>,Bƈ ©ìæx¥+ûÞ– 6§ÿà ´lÞvûxÌž(ûþ‡½¶¨6ßÚ6J/$mž”Þ‡Ög±@›‹ÃžÂÍÏì‡=‚§€øxÌÅÞÇ,$GãÜS¸Ùµ‘­‘,~ïÄlšBhý.Ϩ”±Éâžb68Œák±oÊç‡ã[ åð‹éD4Ö1«ÇkÛRˆ=£,­¢ÄÓ/ê­U÷Ô³Æ5l'°{[G•PkܨÛ»0ﳊ­_†5Îí.cµW Iï1_`’ËýÔ¬ñÛ 7¥‚í1–>*”öá°Ov.ýgϰŸ€m™eûÖr…ýž(~ÊÞ­1hf(®¶»•5Þa?áºl¦¸¯õî£x©A5ƒ٦꾩fxÍðjÖ¶†ý„69ïªÖ„3ÅFÔQEÍ·¸ŸÐ>@¯}a{”Îäsa? ÌÏæÓT¢tÆÆL’ÜzùJçn:{¸#Ìa÷rŠgmO—é®mKk Ū>[û¾/7­‘9ï\m'ì&¤#¯åQuáçCªŒ1狼ÖHAt˰)šÆPÌÍQ=+~åM>ç1£ãv$iu>_ é˜2šGyܦWþñˆŽ›ôŽô³.ÂoâÅ+(¿È›wÇýÖWÆxŒØïÚfètÝQLƒ©Ç:£#kñqf¬,¨û)kPÓ0È×aÆß¾Þ¡XL¿É¸5†ã ÅûåÝkKìÆ*þ%ßÛ › ïÄ&¢ßǽM Ÿ½Ž@Îæ¼2GcÑï­mºŸª{tuÔ\‰"ftÜÞúqŸKÇ y¦®&y…èX•^+ñÐTÖ°¦ßî«4¯-vÃ1š4j~®=> {®sÛË>$’Î!Yôv­1(Äí9Õ‘Ó°¶œú1$ß„€ýu`‰0wxEF¹>ó£+ã2½xNöúx† c «ûæg+Ò1‡d»:È{·•ð}†ÇiAÏíƒö&sHÝÝÚ2 ¢Œq³;“\óK]™DmäÛ<&®Þ—YØ<ß÷Ô[±1$‹ØlmuÌîЗãKnÆÛ¢eœbºµUÄ›CòЇçb¯Í[gÉ( î‘èלÑç¼2ÑŒX¾yÍAya89{u°•åæŒS'a;ãcŽÜ÷‘æD‚ÉŒ¹ZlÔ—u÷Æ«ÒmmþõsPNWÛve8äÀ½gFa‡?ãcÄ„õyn¤ÈÌø-Û6rbZãŒyWÔŸëù_ ìA­°·º~Î#„È*‘[§ V°µœ½œb†ÉœÓfdOH£±F}cµÞá…Á3LF.¸N2çTÞ é²P¸óŽVzËé‰oKÞˆÜJP’§ç³­!LÎwi«¹ %·çcm¾Í0ùl£×Ñ ¥1 Eé õê°->g˜Ó ““bƒÆ ''ª»=]¤­%#µÁ§ôªÛv…0ùu'w“_§¬QRž¬!L&ذ>Ùí;m¬XZ˜ŒÔæþsûe”[¹â¶oQw´¯d•…09éuhÕ­eFÖ­Å‘!LNB».UÝÈÝíg”Ýäs^!LæZ)è°õqÎmAve¢­-DÉXüïî„`5FÉi˜;–¿ªº…NÕè>­m‹ªX’…Þ¸5Šê&ˆ#‘€Æ(ŠyŒ:@vDŒ’7ê¹<_¢à¦²Dô<Z$Ó[Ó‡•i¤Óíg-ºðÉZèt‹k‚²>TE:Í‘ö§sð‡H§9ÒB’õ±zS8ýDË6¯tmm„Ó¯¶§_'Œpš/ö}µátûU nr#œ¦ñÀ]ÂGÕ§ßN·%A©» §iÜ)ÿTÚR8ÝŽ\Ϋ§Ù)¦q#õÊ¡g¤Ó,Qª½÷n—¥tZw“…Msœ õÿo é×uD<Í!— cÛ.âéö{6æôñ4¯lƒD<ÝŽ´1wñy#âiYÇuW/ÅÓígí±<æ J§ÛëÈ×V:ÝD®çÛ¯‘N·EßmÑÈÚ§ùH§i´UÎêq·ÐévärºI®Ðé×õ:Ý “‘ˆôå£Ðév­væé4ö‘Ïí¢H§ÛµÖ°«ét»Ö¶‹"~ÝHäÓ¬˜)Îâ@Cøtò~V>ÝŽ<Â~HäÓMÞPV/6¤|ºU$‡ÛgÞȧ·–ên]w™|ºYŠœ»{+Ÿ~ÝAäÓ4ÚC8ûêXðôë2"žn$¡Õís|ÄÓízîê5©OÓ¸o#u_ñt;²½ø‘gToYeß\u^~Eõva—-âé÷‘w”oØ­PyÈ7Ÿ¦Í‚¯eéÃXÀÓí7íÒzÕ3ÅÓ4®Ð×k{ãé× #ž¦q9†Ó€âév9%l4E> ù±Ë)Õ7ý"Ÿ~=ȧۑGØ|ú}+Osä}‡­¸È§Û­Ô°}ùt;r¿Î!4xøôëö#Ÿ~øô«D>Ýnß‚CçG§ÛíãS²ô-±È§[IÏýðB¤ÓírN\—ï7¦ÑF¸©ˆ‰túõØ"nÏÖ]Ζ„N§ £J§Û‘p’Ní"ÎF:MÛQ//§tºÝȽ]0E6ý>.°éü„MÓÈx5€GdÓù6„M·#·ud&²éüè„M¿ lºÝÊuzé,eÓ4’ÜèÉd¦ÛÏ"·ÚûÏ>émårz9eÓïC›nG‹žR6ý>2°év$› ·Øôëî#›N… ”M¿ lú}µM·ç†dlïÜ: iþc±[ø)¢é×õ4Í`+‡4ýºšˆ¦ÛlÆÖ~ƈ¦_ï+¢év¤…œ^ EÐôûZš~Ðt{ç0£Q6ý>2°é÷‘M·W°]£Ì°ií¨QFØôëȇM¿Ž‰tšÆ¦ï5Òévd™ÚáÓï³>ýz‘Osd©ÃEùtûYòIû Tøt;Òž½—:>ÝŽ¼îQÙIøôûœP¿ÏõëœP·§wÔQG)jÚpô÷\dÔ¯ÇõûȨ_ƒYÔ¯»Œ”úÝxÆ@9Ÿ3Pê÷m^1P¾¨íŶ”R·sZä1@´¤/нìÍy…¡Ô¯q¢¬ù€ï-+þûL߸z@ç·”šÊ™5(‰EJÍÏlèíÐ9ájJÞMrÂÕ×9\€ß¸Úœi±©¸:7 ®¦pIð´T`Xã@oÝÑSÉ•Xo¤ò¯»žŠÄ:…jJ¬ÓòPˆ5Ñ!»ÖÎ$•X³KPK÷ÚIÄ:õz%Ör„á.´bÆ0%Ö98b9¾~b>&…Ö69oeØ %h½¢Ãnd Z¯l˜ÁH‡Ö~à^áÖ ­1›«AÙ!Ðz%?†ZÛXŒ¿@$ÐÚ¢š ,Ý…Öö/÷en°)´¶‰³WUOКK_N÷pNК X.,û9#´^¤Ãf+Ak.èÞ¼BC‚Ö˜Ú`p÷/Z 5–ËÜdj½ b·‘ÃqŸPk^ e7Jï;B­ñÞ¸¦3†Rë…MϧŸ=‘Z?ñåó¼bëæ¡±ÖÁ¦[¾_|#A°µTžJØš¾»O åÖx”a‡“¸õÒì,NŸr•[s±Û0spMé¬køK%p½"§µõȶ½õÔ¯ž#à§Fv­\Â+àúÕÁ5q?öŒƒMGp½°?ëŠÅÖ¯‰ØzÝuÈX[³ 9†SYÂÖÍLs›»/­sgÜza’®s;0rë ¥ûˆ‚ë’ÓWí´]*ÔšØåÂeÖô‹mî\+³Æks I?‘Y/Ø*NceÖÜÅSt­ë¢#³¦£ŒõKÈzaÕ1ЬYg³–]>k´oÎ ³~5>Кõ. »Sàˆ¬Õ•7!ë_4›q{ô È3‹I¶ž#²^ð ÚæÆ« k „0Úp$#Èš\~ü!³^ðqB}Ó¯'"k;厥‡ÏE"¨¦ÚðâGFd ‚ K§/òYÛÔ}[7¹ºpCõ‚—¶Îk"²¶xàFhæO@5ö´Kiå%½qFâ9ÝJu5YSƒÎÈ÷'AuûY ön©YƒhÐg-þ³Y/¼¼ê¹IRMv”} ‚¬0ºÓÈZœ±&šBlçc<ÄZ«ï%bM¾ªGùB¬y,JOY«÷tRTSKo¿íóî!‡ ë… °eHíOTuú†…X/m@¿·Ë•ÏQP­Õ¿’ Z«&A5[—É$…X¿PPT绌Šêæê|‘5ŽÈ±¸;"kÜÇXÝyÖ€(ªciä§&»Ï†«Û‡$T¿‹Ö`¼ðòh^Õ®³Ë½XÕ¤ñ”º¢XÕ© « ªm¥A°>ÒCDPÍÏ®lè8™^¼ºy,«‚êT}YÕZÛ1 ª+FJçÖ‹jjî€OÓ{£¨©SXåÕ˜ÏYD>RcDM­5È’ššÿØ÷™‰"jj\Éù}R5µcOjjª+Ú'°ùÖTTS§º³¢¦Neg•W§¡EÕÔŒpDN}FR55wuã‡ßår¢¦®ÍÓr,¬TMMafV»§K¿£š:EP5u[3’é§1ª©ñ¿ìwÊLO5µ=¼BrØî!ª©ÉVÝËâ©lª¦Æàžèxh˜›ššßcý7$ÈQM»£J©k3¾´ð:ë)¥V+ý$¥Î·.RêTµ[¥Ô)ŸT¥ÔÀ ›\_£Rêü\DJC$6gÙ·kܯӳÃUIMMÏRÜÏZ•ÔZ\T•Ô¹‹’Z ø%!µàLBêüDHÝJØ$×S¼UHM©[sœ§ßGR×<©’š" 訿µ¤¤Ö‚•II­õ°’’úuΨ¤¦$+„ˡ룤Æ´%ZôÎ!Zj­Ó–´ÔœÆ…¶¢¦æý#‰<ª©ÁbÇfÁC¿NQSkI†¤¦æÙXG»>ÑêTúGÅÔ¹'‹˜Z« &5õëN¢šš´òRo¯DM¿æ¨¦¦ÜNµ·ß§kUSk½Æ¤¦fN´)Ð?ª¦¦Þ†}sÂTMM= ÊRÜÎÀ£¤šŸµÕl/ê$ÕZŠ,IªµJd’T[HpÚªÌÑ‹Jªs_I5ß3k2OI5Ep­Û¹šð‘TK «ï…ÕË€Õ«ýûßCª›+&³ãN­Z=åÔI¡œšÑx³0Ç­%…S3ÈÛãê5¡³éÂQJ8úÓfÕ~lÃD\?°ƒÿ ©Æ·c¿gY!AÔ6c–cfœ(¢^ðõŸÎ£bøAÈ·î÷ ×Ñð#65üh¤vÈü“áLSŸ ?°©ß‡àuû ü;éŸ!nBfNs³ïÒÓäÕë¯Þ6Ù4¿‚öÍ…Êjõ‘æ—FÊ@ÉMó×÷µŒZaÓ™†›Î?(l2\Mƒ NdOOSÓ±°}ÂÒT¶§èÀ ½KØöJ±4ëuRG]Ò!Xšèõ°Ûpð$XzÁÎæ¶±¢~ÂÒ‰K–¦Èòzût­X:ý¨bi†õ2¼Š¥ÙG¨S5¬X:]Žbi‚[µ\Nz#–Î+~ÅÒ;)SŒ¬X:7 —NI¹t>R¸tB3Â¥ñâ&m.ÍßÚÛë€'.½Ú3Å?ÜUß‚¥s‡T,M|¿Ž­;ÓùþL—âÐïH;‚éŒQLï”Èš©Ç ¦)h¶ïc³HÀ4…lÔµ ¦õñ(—N]Étêʦ­‘ Ê>ü'6¾WeÓéÙ)›Î?+l:½L…Óõ°‹µ N§ž¬t:}±J§í»CTÄíãuŒòétʧÓW®|š²ŸÃï@ùt²ÑR@M øm&ˆ¦zaCÐF„O’j\´žæ~›‰OëmF>½h–±Ë(|: ‚ʧ7Ê€î½\}æÓ™ ŸNÝUùtzʧS§Ë|º¬—Ë|:ßf”T¿Ž>‘€Í°î† |:=vÔég N¯BuÚGP@|Ò^€:öUÑT¿Î)€š÷²¦àúŽƒ²¼ %Ô©ˆ¦šFjù¤hªsïMõëœB¨õˆ’êü´£¦:¿CÔéRPo¬xÏè"ŽxììŒ ÑT§KI5Z”ï ×D¨ÓÓIu~]"©^Xdï#?HÕùE5¢Ps? QTÓܽÜ,ãQTçÇ&ŠêüØDQM(ƒÜÚ‹DQƒQTçç&Šjê@Ú oZs£ÎW5Õ鹉¤:·EE5m 9€o»v©¸ xΕªslj‚jýQÑS§—Ÿø´Fɪ¨NÏFÕl}mÉílFÕéRUQÍöîi¢ŠêV—ó6sª¨^pÞ(×À(¢¨¦±®#ËKÕœóؽ>fRT§ý$UTçŸ}ÕùDQÍ!vl‡š¢§Î×!zjÞŒ-57ž~ݹ¨©ÓqQKš¢’úu)QIýºë¨¤ÖAJ•Ôœp ù¨¢Î¿UÔúÀ¢†:ß·h¨ã¹D?­w-êétª¨NMQ9Ÿ•(§9[Ûªr㎨œN—uÓé|Q5-wÓéç¢^¿Tôt}çËõÒùêD/ÂJÕKçOQôÒyT½ô‚E†MÇ»;ˆD½ôë:£^š#ﲸîOõÒ¹3ˆ^š#qq÷!ÑKëcµt|Ô¢”Η•Ò¹3G¥t¾yQJsó”åë9£ª”΃Š(¥i¤fðî,<*¥ó&Jé×E¥ôëN¢RZ›è¤_—uÒÚ³E%·(¤ó'V^ýßçã±…ôïs™¶®Ç>ˆüs¾Ï2V#/æòrÖœ™„œ-†°nÔ$ÊÈù¸B%EÎÜö4.{M/ØzދܙÉî˜R/îÜvXžº¦oîŒ åª~A< ÃCÀ3j[¶QÜU¥Ñ{¶'7\Ÿ#x¦ÑÖYW™Am?sLW³$¨«4•F+zOì™Þ‹G_d•>Sèu– Kô9)ø™~0Š9füL äô&Jø™üº=Ïmàg Û‚†?“7 %üœp¾âç$ýUþœÏ)š¢^³\bЬ@f•±Ä Óæƒ2h "Ž‚œ™AŸ$¢ì›w-eÐÔœuïƒ&ΜE½”A'³0èÓ¦m›ÏÜøVô‰¤ä8wo]ð`¾}ƒ?1è“}ãQi/1hëeÞU =Ò–‡èÔ&:‰.³0š­'G$ïC4åwëAŽè@à mÒv_Ð'yìõµ¿TþŒ,äÆ?Ÿ ú6ÝVo ±u!añMñ “ºU4õ–,,¾Ð Óý)€¦Üy_>:(€&¾<ö£­ @'¿ý¬ŒÆAi8Ú(€NÛZ Ó®–èV’p6´J ¹«ëÒ(%Ðér”@SaÚ¦Ï.W¯GtÚSÄáJ íœ6ðº¢(hò@îËÅ*‰@çs ¦,ùF¡žY\ìš¾bñvµrRHËKV.E 4…ݯi[¤:íC¾Òö\ÒIuüRH‡}HÐÈŠ–cÛ>XN·‹ÝÚçô¶œ~ŸR´]¹EY#Ftº!Ðé»Sm WŸ[•@'yVHk£è´Á«:ŸS´ìÔ*Ö&¡Ïùb„>§Í-¥ÏY[.ôyo…ª‡T£Ïé{RúœnMéóÑ¢ó{膅>§‹TúÜ–“Ó'Késº¥ÏIŸõÑgøâ>Ëfº¢çü›‚žó¥ z¶bõàŠSaÏigSس~mŠžÓí+zÖD%Ïv ~&î¦ä9íí+zNÝBÑsÚ×Tôœ€¢çô\•=§mOeÏiVÙsúYgÏúò=§w¡è9}ÛŠž,û/ß›Ðsz¨Šžíœ„Mîè¦è9=qEÏùjOiÈ£Qøœ:«Òç$ Qú¬Kà³uÜÒMé³^M¢Ïò› >+ÒOðYw½|Η#ðƒó:*Ÿ$ø¬»Þ >矽T¢!wr«B#î\&öœ¯§ªB#þjGϺûžÐsºH…ÏÚ|N÷®øY?ñ„Ÿó‘ uÛ&!hÝOš‚æÖêùôŠ ó}ÞªÐÐÆª é:¡ÑÛKÝ>¸y$UP"Ѫ³I4:]Žòèü³‡J4¢ø)1é|dQ‰†<»pê|¤pêŠô2‹8 §ÎG §¶ù|›Eo§†¬ò/œ6 §Öä£Ä©uû8qj6¯c¸Ò*§ÖíãÄ©ó §NŽCÊ©s£€êäÇ¢ šÉiÖ’I º¶¹d¸(¨®øVïÃ>K@µ8禦ÎJÕ7¦Æ?·Î¢¹Š©­‘<ÍŽ·Rc°ÃªA!u%z«ÃŽA!5ž¬³ŠP‚Ôb€£:'˜š {ÙGƽbê´Í­˜š…Ь•0uúÙ†©Ûx=J6%LLÜ„S§ívÅÔùHÁÔDB«BI”:µ ¤¶¶eeJ”:ŸQ8µi¾G1Š©“à@1u2 Lãª}¥µ.’C¸ܼ—Oœ~QJ˜úÆzÝ3NLÍâyùË S'/&ÅÔI¬ ˜:%C*¦¶§ƒÙ’çØ(¦Î$˜úÂ#zVŒVL­É™S§TNýn œ:½ åÔò+¥NW©”ïƒY™*QjžÄ:’ž¥¶Õ8©Ë£Ò¢Pj»‘g›ú]ñ}¤Pê|AB©ÓKLÛÊv ÁÔ‘³Š–bjV’¡Ä”rj}‰S«’%qjý°§¾ð¸(wñ „©¯מՑ8µ~‰S«9ZÕö³Û1ʦ'P/H@uúÙNªå;N Z󨶟# ¿øÏ ¨ÎçPÞ±‚j•%P­†k Tk‡L ZAÕé™*¨NÝCAujP:€€êôðT§ËQPÍ\R–……Sç_N}!j´HÖU8µ*ȧÆ%·êÿµÑ#jí»Äÿðk°H}^Âó_{ãü¿¾ø)<ÏþyÿþèD1Œ9¿¤ûµGß/àùó—ž¿?÷þôÿøpÏoþùo\ …vmT²IcJ‹¹Njž']Ô¿ü؃þyµÿ;ì¿ýøó·ÿ6dÿÓ·ÿøöã¿ýé_ü­gºÚ†H~©¿ýLÇwœÉæmôä»Åëo?Ó™Ïô%cæ©ÿá}¦Î_:fÆûÜC”ôe“0ÌËæÓë2¤ñ‹®ÇÂèËs ^׿hÅèzxÛõºœØöÅýs èèKú gÏ}E.ã‹ûJ»žÔWôz¾¶¯´!CûŠ\Χ¾òFêV# ÜåüÇi [X¼êå[yÆéß?ÈYöÒÔ'›-eêû,ÕϲýÁÓ,;b Öûoœæøc§¹Ø>l9[—lœåÌgù;g¸ñéf5goý,'”sÙØºK§àÚ þóÏÿõóÏ?ýòËO¿þúëO?Ÿ¿Üÿû—?4—~ûm9£{³_)®¥)èè@÷{ÆI…1÷¥U¦öT[ÞãvÝë[îäã„ýÐXðaoù_e¦‘Å$*Åj!Ê:öÆ”Š;¡ÚҪш»*7Ô tg‹ëaŒ[3ƒ¼ÏYT~o»§ÃaÔÖÛ,¾·ç£…Þ‡­P!wÛÙËvŸ7…qgÒ‰Õw›mçÅþEÛ ¿ŸÆ°CÛöOlñ]{yÓ}m^ÅÝBó²7T§˜€ÆéM|-§Ùº½‘_*Ý • ©m¢Æ}=ƒõõIRÜ,¦²¯YyÌRO2`wK?2Ú©ŸÜ´½´óX½Ñ>žÇºôÄ0zú°ïh8†i©Ö®Qûlo%qK Àa=H—y×`l[èNë¸ìÔšÆ0¼¤IQºÆßOÃä‚7«õÌÎvê“ÛOÆ­ÇEZÏ^´{™ÛBl#ýñ\;-’î&ªGÛìÉãûPÃîu”ß7ÊÑ{òÁøT‡ãÿÎJq¸ì­sŽRœû¾P¥÷äÓ¼ù Ü® J}rß)ÙRzOÞ/û´ëp“Þ÷mn'иì¨^ûMîM\Ý{2°n=y?‚Óü^Ø5<<ég·ÿ^ú‡÷mmÿrŠxhûŸE–Páh·Óœö–žž °\ΆϞÆ+Øþîø•^»—ϵ“í…Þ“y"a›~ßëô“FñR×em7¢õd´2aww?–àQŽò&1{¤ö½'[ãvÙÚ=ûâ›Xd»¬›Û…>ÍÜׇeB-ϰž·7Ž­X¶ö–ëy4֗γ˶ò¸ž?e:[¯xýYô´v }XnZ§`´[¯ŸÖÖÓÂtÖ ñ¾}XÞÃÖQs|?‚ ñ• zõöŽeÚQ}X¦Ò{°Ô·+ füll6[uµ‰}ó¸{ôÎÌþÁÖ¤•Þ6ö†_módïÊLi…ªë¥Ÿ1¨ hÜlœ[¬S>¨ |PF(f âëè–`´½ñ+ÖåüJ o¯wdkÛƒíÐ^Î`âŽ0¨Ý‡À¦¢Élg„ÊÖBeDm¡ÀƒM‰öu÷!™Çz"ÒµÍr¹ÑÆo›Š½#/h@/DOãü™©My½õ|Á¹5c`þÞ¾W×çRÎhüµÄvŒrk4’úôdªê>€ºÿâüäy&X‡ìJ*u’ÙwÚ(^ŸÇ¦¶Y¥Ç t†Ý•ľÊ¶ÅÇdæ5ûîö.€ÜÏ;Ô¾ XU¬º¿9÷êÃ2åßßü~'u*2ÐQÖ3LWÌÒ›Ë+ÂÕ Oççœ×œÈQ`2»Üûóôlò_v–W¬ñ¯‚žÃ§Å½ý,Â÷ר-ƵXÏûrÁ݆ó<ÚëepæÀžçÜŸ\ó§3s+ö8l¤{ž{[¦ø°Œ ”­(ëÞH`õôfT ltö={{ÆS$ÂÏâG°×­ß'Zl–‰ìü6>ýîªÓµÝ›ü´YÐ…ÃËÓ›¹“`:OãôÜg_ó°(ñêcÏuŒËÜIðÜäÚvu÷á¬aÁ’H>*ÞaÖà‘ëD·{­ë̘׫˹˜u‰ð~—Yrmåu[ÿè±Àw´Ê‹r;¨•žÆ+˜´SÈìDÃröëAUç£2šDæ. §qšÿ£ÇcûnïóËM±z•Q’Þ'¦º„¨Y½Câ±?beôdywÌŽgûˆ•9%©(}_‘Æ:be4›Ô´ï ‹{UpPa‡Y¼ËÈÇì_ÒÓŸÆLã—¶«‡ |¿ ÝF¬¼¶¥øU{Â:³$jؽ¸EùNa˜*£ØÝ÷!½´R(t‘ß&µM×z *¡2ýÇzËÑ'˜ƒšP#TnËF^?r EyøxØ"÷1É|º2ú1×yHøG¤Œ>€¸×Ù:~ØF¤Ì ÞXi>ÃÃóÈ”Ó{¤ñöH…ŽJéOÛªnð2¨!ÚÇìƒý*¿ïPÈ…NG­³®;PŽhÉ&OPÐ8‹ñ…Øï*º£ÉËìË‹E¼vñÏÕ"ŽÑ2Ròmú¾ê±6OFïËèÛ·Qï€(Œh™Gk—uÉâ÷ˆ–Û}›…CÏÕ’0¢e6]ËfëÝ'þ8ZÞ‰Ë<®:W£ÿˆ–_m+µ÷eÞÓgYª7ÎÂElãµÕ“,ŽgJîyi_ ž¸öXg¢ müÈÝe«G«ëÃ2 ~[kûbsu(þr)£j%uDË ¹5×HKë5dÜdJÏ—|qh·"ÅýY¡YЪÝpÇø,i³22ïÌ'éWÑ£mTPS°èmç×–Ñ/•7¹çoW(ÌE¿:n‹2žô@è1‚åüùlA%Ú:½®‡Ù4Ö,§‘ξåP%i.3èñ ;¶è"`÷¾\PĺcßBõ¥©[ú"õØÉµ;fOfްPñy{¬è„ÉB9Fê÷–!EPß®M:{Œhóšã–ãG[Aú¨Ì‹¾Æ¥7^¡¹‡Lz(p4½‡Ë$ÙgjÝdõÆYµ…G` 'OÙ;Ð{x¸ŒÆíÞ‡‡éÜc„Ë$:'ê±çWÑ{Œp™Ô:œ¨ì[{·PaªéêÊpΣñ¾žƒ¬#¯î|vØÚn.#1²>fj>,sƒg«êÖ°„rxHüð©ëÅ?Žƒ/°Ë?{žº@Û, ƒe]™¥”#Z^š°Øºw(=F´L‚Sãí×SaÞ•ÓsCé1¢e¦("ôýY1ÐXG´Üæ6{Œ>KÚÚn–RÃâ‘ê•KŸ' ¥Tư|bÔh¡`?å>µ½ùÁÚÒn±rî­Ö8ëÙà(LÂPéWC¸>Ædò—ì|õ1¨²Š%åÇS(Ô>Ædò Ïê¸}¶¡ÖVçJe·ÑXg¨Ü2·1–™3ÕÆ,¶JŸOr¦Æ˜lwYÏaž­;#y´;yÎaL®6û¥í³ìmÚ÷b—ß—Aê•uþ^â¨"kHŒ­¤€fod½!ò ±™ ë Øc§#¥H¬£-<÷œ%Ö»kbs%ÖÖh“Ë™ ±nêÀuFB¬i¤¯-ýj¯ËäË=’޼%cµ/ÇÍ¿„WÓ¸ÛÚÊMC„Wo-Ÿmö6«iÜÑö¼áÕÈ.Ѻ%€ðjíC¼îê<ðjíñláÕH=O¦´^6Zx5k«ÅØ‘[äÕˆK¯á[¿Í˜9°õÙ²ôxãjÚV0§_kÀÕmÃDVG‡W·ÂLÖ!ÆBÄÕ¹½ãjÔ³v¦e¬Â"®¦qÃë~ï`9ÐjÚìj>b.&øm X}Òêæ×‰G~ Ws Íê¥8´:{y ¯¦Ñ^Ũ *ÀšÆhÈ`Ö4’6êÆ€¬4WâÄž«'ÀšÆ“$NßuˆÀšF‹±®rŸo`M#£û@5X³qU-(¾PÀºeä.Ô¦Ú“ÀºeäÎ1Ou.°%Àº%‰ã…ÚMX·ÌÚ‘y.¸ºåÕ¶4Î~©5äî¬÷ZÝ2y ã¼DZÝ2kmÖr£¡Õ9íÖ5{¯¿´µ;&^™DhuN¸Zóã#­nm”ð­oXÝu-¸òM`uÎâXÝ2Y©šêPõHI³%n:DX³UVç¤SÕ­q¯£ž ÐêVÞBûv:¸:çj ®~7\-t…WjÀºå.e»t\€uî‘W·¼ën`ñ4Þš!n‘Á(ty5Ú ½ÅímÁ%wo$À„W¿^uäÕ9ÚyuKÇ&âõýÞÈ«[(A9lмºåHÛ˜ìÆÔ«C¼ˆª_Ý&¢êœt-¨š÷[O‹‚_FT™U·\åÍB]9#ªn¶¦oñÓÿ%TÝz#~{iGAÕ-­ø¼ª×¸TÝz£]ÚÚ-;U7A•¼}“0¢êwãsçI"±+ð`'¢êÖ4òÉ<¢êœx-¨º%ÖÛ+ö23‚ª[ÎqY¡{ˆ¨º5ÆíÅ€ª³»² ê–}<ÔÎ%ïÚ"ÌQ;VPuËJ®»—²tsÊÆT“ŠU¿ ¨ú}d‘Ìʘã+¨ºåênÅF¤¾íQuNŽTó UçBAÕ9ÍEXuNsV=¤…UçL&aÕï#%Û3Ÿ3°êœ&¬:gH «NI0‚ªŸt9±}ã6²êœä!¬úõ«UçôcAÕ-çÄ:GŠ‚ªi¼íЫ¯û#©n&ë îÏàê¨:›¯ ªÎ.Ø‚ªÛ‘¶ôp펠ê×¥LRÝú#ܯ»™©ÎUHuË?°*éX'’êwc Õ9ï\HuN-RÓõ…Tgû!Õ9ÃVHuÎiR“º…T·”Íë´uÆÑ©q ÕùsR?!ÕùsR“Z…Tç¤ÖHªsŠ¥êœG+¤:§ ©N_§u¤ü0ÕíœKyƒê×ïNÝç#¨~8uÎNs¼„S7Ó™¿ÅÍÀ>¬Å¾G˜‘S¿>æÈ©[cç~¹ÅìÒBcë¤þQFR ï…Tk1^áÔZßW(u¶ÂJMô…R·§C•ô5„Rç1I“óýOHýj‹ºU°Ð©_.òêÜáD^Ç#ÑWÓo¶mnŠŠ¾šYi½‡0@õÕyæ}5Ÿ‡…€coKôÕ|Xv—nÈ+úê<ˆ¾:ÏÌ¢¯Îñƒè«oŸ:˜‚иó?„ÆÂ¬3–f½Q †:9?Of½F{‚̬kL£ÏÌZ¯%ʬ_ס5êÔ{$ %h½±¢¬ca¨Ðšîpn—ã:ÑYCßm Øü Qh-U»’Î:UWSu*g¦:ëT%-Ak­j™ unŒ:ëWcÔY§*›ª³N%XUgêÝ©Î:Õ6UuúôDh½´º4÷OQi­E-Uh­%ZUgÊö©Î:•|uu¾uÑY§Òª³–Rª²æ¸ƒt7è(Rx/ÖæU™uª#«2k­#œ=ZP™õëR£ÌZª ªÈ:ÿ¨ˆ¬SufYh”ªÆ±h¡Š¬_ç” $º[¯"ëTESEÖ©J Š¬SUY§º”ª²N(Ueý:gPY§2Šª²ÎÓ€¨¬¹X|cz®*ë|=®²ÎŸÿ™Ê]Êç/*ëWã¡uTe$‹RëTÒR¥Ö¯_RëTÞN¥Öyæ©u~‘"µÎN¤Ö©D­J­s( RëT¿V¥Ö¯#£Ô: e¢´ÎÏ@”Öù6EiýºÍ¨´N#¤ ­µgZ§!Y„Öé ¨Ðz§ªÞ´@R¡uºZ§>Û…Ö鉪ÐZoOuÖ鹨ÎZg#‘Y§;W™uš8Tf-“€Š¬µxwY§IUEÖZ¼;‰¬ÓϪÈ:ý¬Š¬Ó¨È:MKõoT~O"ë$zP‘un‘u>§¨¬Ó„¦*ëñ×TÜYÏeÖZ93ɬ­@6dÈ"³~7J%úøR¦ÎZ/%é¬uŽ|é¬E—¦:k*ìØ¥õ­ZkD–„ÖùzDhïðÖòåz¤­ãWdÖðžåðMÍ$³Î"³>¸¶a¡*냱#ñ\EÖ¯¶ ±¶6˜óî'u« W‡ùª±¶îüp§냎â–ª°Ž¡WÒWç‹}µnòô¨¯¦d-w|ãDõÕéBU_M”ºoÑW[ãMºÀ¥ë–õÈ~Nëüƒ"°.¬«;&…5c3^„®ÛŽ ë¤õT‰µ­Ã!M}Ï-I¬ó)EbmÏ Ÿ%ûÆžF‘XkwS…µTIa­£uRX§ñFÖ:Š%…µô)ÕWçË…uúŠUcm7i}È%«*±ÖEK’XK7Vuêpª°N£‘(¬õCUµ^¨ê«©Ó{ø>w¨úêôÀ‡¾:]¥ê«åY«ºZ†6ÕV§'-ÚêôŠT[Šj«uÞKÚj½QV§Ueuz*ª¬–w«ºjy0ªªÎ?)ªj]$UuêöªªÖ5|RU§§ªªêô:TU­÷(¢jíjª©>Ðò\wOÜSMµ®b“¦:Ílª©NX5ÕúUœ{þ÷ßgý±Iõï3¬¶(ÑæÏ¢¿¹´ö ˜pi¸p¹Æ¯pé—ítÓ/ñ]DÓMw„:ȉ“V7@1ópŰڢúŠQƒ»&¨au‹wçâP «)ç·Mh)†Õ8KÛõpõVÓxï5¬¦tXpíVÃjrgñ[5¬>©m5óšÔ°š…vzþdXÝÂÁs8nªa54c ŠÕ¯6kïÚsê³_µÝÆ5²˜£_5upëµôMõ«¾©S<ÝJ³_õÆ¢³–Nuůú²óŸ¶¢.‚¦9ÈþáåþjɯÚÅvq’_õñÎce%zjF0lu?8€´ºÄFML «KÃÂvÕ÷M·ZÈu¡ˆX¿Ø¨§.ÌkS ¥–ÕìVÍùdYM½Ë¹Q—,«ñÖ æŽjYMunOª¿j›Àn„WÇ,ïö~í[Š–šiq½§b^´ÔêØ-JêS¦é…§JjJ²Õii¦Jj.êp™uTR[c9[6F¿œ¨¤&*°xd©G%5¿ïun±ˆ’š0ÂٻW\V%5ؘ -÷i%µýÇ-pº”úÀÿ#õˆ”ú`‡Ûní/X¤Ôn™ÁŒG¤Ô¯Æ ¥n—uO¹¼H©‰À/|”)5¥ÙÙÔ9êhœRêŽöãN¢”šHÛáÕ¥Ô;ÛãWG)uþY‘Rï$÷Ú7ºMõ”R㮾4'Ît£”šFûÃÚ‹”z'Þ°ù“Gb›á•*¥ÞAÁ=G´Ô;…×2̧DKMÎ0í¶h©wr&×){15-ì==Š©wrêËÔÛ‹˜š—ÒŒQûK1µý*©†CN.bj>Cì¬}ðu15¶ôLV>i‹˜ÚkLV5õN±Ãé]ÔÔ\ Y݃OÔÔ¼H`†_GTS·žô©bꤶ}¨ÞEKÍ;¶qÌŽÛ½qj©m`³Éd›À;j©Ú¢‹h©YÀ“‘Rïl‹/Sø-Rê´Xoí@¤Ô¬¡«þˆ”š#ÜÌýÈ(¥Þé×ÁF¤Ô;áû¶Œ|%‘Rs'‡N†¥Ô¯sF)õf°Û^]O¥Ô<=Ì—œ0G)õÎÂý©ËÛÚDJ½ƒ¦ƒß‘H©yaLLG)5·IÚ•o}?Rê6³‹‘RçÇ&Rê¦qÜvŠ”š—l#Ë$ÞAJ½³M°k}°«ÞÛð4}™DJýúÕ¨¥~=™¨¥æ;Ž~^¢¥Þ!ðv‰|Š–z'öyf·§1j©©1a{Ì’¢¥ægcæ¡h©[¡¢à$Zj*æ’åßh©i´h`qéŒh©-й¢¿h©_GF-õŽÅ'˜i}›€ÐX¢9•h©ó0µÔ;:{ðž6,bêô TLÍc#S?Ôi[Lîbjûû-Ø©œ:Q9uúTNޤȩ_§Œrj®s?=âU55ÑhpR5µ5òµx@¤jê×Crjh¾Â}¡rêô’UN½³à8W¥TNMùçr[LÕS[ãA[¥TOýjŒ‚j*¦¬sQ¨‚jŽÜî¡àWAµ5q‡þ]Õ¯#£ z'ò«#uBÕ¯#ƒ šÊØÁØHÕ;¾W»+TEN ] nI*§æGíé Éd—Ss{vÃ^ˆGåÔÜÁq³’|Ú¢šÚ~Ï>Ó{uÜ&jjrÅ£iMTS“* {DM½“\Z¦XÔÔÜ:…³Þ& «-ãJ¨J®ZjôMÊUõ¶)¥&å —ÙŠ”:߆H©_GF)õÞ†vûªú RêI¸…v”Ró¢1h©÷V…zªéEKÍ‘§-À¶OZjn:šäˆ–𠲕×^ý‚¢–š ýºM™©h©yîÑ'j©Y S8û¦~}æ"¦fIJiu¤)ˆ˜šŠJ-kbNÝ&t²RYG1u’EL&tSón‚U ˆ©­­žçé(SCq©Ê©Û{»ê°¤=µ5ò¯ö¡üŽzêTáJõÔ;‹‘ÅftgÀQPý:2 ªw,Uª–7±nÁûe]Ù fDPm•œøýPz ªÓrRÕ4Ú A½ªxÖ}L…wTS ,xÕª úà3°ó)vTó„:»ôÆ)¨¦+á±â½$ ª!Øyøˆ Úº˜ÅãWTÓlhÚ|(A5ÕÕÎ24IMPmÏÞ¾Òí>¿¿Šï5±ñÚÆÒmÿh²¶¸ë&ª©Æ¤.“¤Ú·™€©šjëFð¬{6J­E‘·ªªš¡ÖVäîõ¨µ3ôŽèš Õ&à¥% ]ß°¦1£¤Z‹š ’j-®ŒõÇH_t}S?h#xÕZ´'wϺ‡Rk‘ÊÁõ8×Z¬kpXtÍu³ßt]Ço¯µx1‡"tÊ®7¾ÂËó>´Ö"‹ÓvíUkñÀkéQ¾°kˆøÊ…)»Æa$äNvmÓPÇ_þ÷®­C®¾Vt}Â:‘–¢k®k&’(º^¹ù¹¡µ0Ɖ¢k2|Cê³V[Äiý:G­Re×$|X0æÕ6•]g+ìšô¤2m"•]gÎ,øún³ÄîVñu®â(øìloît[Á×ùHØ™4 Àfaå&ݯ±/>î‘U¬ÕòÓj‹Êµ¤Øb¢lZl1±KÁ×}¶à8]ðu&_©ð¼¯©œžø:—š|Mt-O_ófž±îi‹µ-ž³Ag:vDzMÄ›µÖ"›iÖå¶^òKé5^9Ó 2ÕZÔp=ÕZ$Š ÛŸZkä;ýÁR­Eìp-®ra¨[d'»òåuäé55.×fRþ^óù’Pvy)F©¶ˆS¨=Ð^‚*ÑkpíKÕî^'©õ 8hµà¹_Ô[<™~xšý>…^ë*P鵆ݩÞ"ʹ“ˆ5K½E–aoTè5olG>´v·i§×ÅsfÜ ½fýfQÇØ‰zÍ8¼Øá#B¯7ð†}«÷ù¦×Áó5*€(½¶iÁ>_zþ|½¡‘°Ìï>âë Ö^³—Î|MéŒë:‡i‹àk›¾Ê~Ú*НÛ4µSäó¿¦>*%¯u¸}L~ÍgKSk¹ÅTsRË-2£Ø¡}T|«œ ¾¦GÚsìµ¢R±ÅÚþcïR±Å›ô«â–—©Ø"éöm­ý«zM}TŒ¦¼øc¬µÈnµÅÈ>h­Å «Íë k©E#Øn<û}h©E–Zw[><^jñ¼¡G½¿i©E"ÌÍ~ÿ`²2rØŒ½ÖQ‡1”ZÄö+¨5¤ÔâÁË?Fv•–Z´«s)µX-¾?°ëü*„]ã}wÚ—VübS©E)卑!œëô(×Z‹eÍæ&°kŠ,ÁþOØuƒ‘3A^Ùu.8*ìóÿÍb!­„]ÃCJrªµ˜”Z‹©À§[¨U>ÔZdÙ±­ÃbRÐuºËTkQ~4•Zäk­¼Yo›àb›|ï­×ûrp ^ŸLÉçï¥Ô¢ÖÚ|•ZÔ’\ã`¿=ÎxÞK-îÑ®BÀ5ù¹ö2v¿œ®)ü ~G¥E ˆAŠTZD—µ2™w,+•íÓ²qÃgˆTi‘ê1%¨®#¸¦†ƒ5XW~ûWó³7¡»»gH¡E0Õk9§B‹PÍÆŸºZd¬´1ÐM]´Ð"&:å´ùíƒý‘‡ÐGÝW¡E›ÁÝO£Z¼Yž^•X¸uê‚­s}Ë\gÑ&ûb+!½uIH¿§Moש¤lª³ÈúܺßÒ­”µÎ"|ïùU©Îâ»qZD;t,Ùž¶H®1µXÂVÈ5u¢ë@.´h=aàʮӣð:we-´˜Þ”ZXÐZ¶ðúoZÄ;£© êPhÑ"( eÆÞ–ZLŵÐâE2±ª—ÙÓB‹Æ£þ/£k2L±¦ˆ:Zd}´°”ô6©³X–ôðJë,l@“­IË,”«¶(±~°Áu”ÕÛÙ«–YDzâ(³([‹,¦ ƒZeÑ&Òö~ý÷¤Êâ 2[m)ܯ$VYD‰²ÞWõrˆReèQ‡eª²Èt}1u}ðá>ußëwÝÂTcqAuç—šk,Z¬sœ^¿4×X,„3›Wg”‹ä4r¯R‘E6Rl~t— ­²ˆÖk*ËR•Eûûù§©ÌbÁdkñ $•Y´iGëx*³hç<± ó'$eS=M¡Öyð”2‹©¥–YwÛèгTfñ JchÞ´–YLë|b#2»–àûl@þÜJ-"èúf¡äù$á!$üñé[´·?ýÏŸþû{踽êúM$ÝÛ_“t¼”ïBò¿ó¤v?‡{þŠSöûl-ö>ù·ÿð³¼ó?ÿñµWà/×ßóßÿxáíOXýà¯yöýìÏŸ¿ôü½‹{gÿpîç×þü7®ãS›ˆ¶N°kqR[õ<™AìrþåÇn1ôÏë·fGîÛ?û÷ÿ·üÓ·ÿøöã¿ýé_ü§!u*dAçV·¿sžõ'²ëúý§*ͺh¥PòõÛOuäS}Ùñ%ÃTúL¿~h,± ÂWÍëUYx>®êù2¤ñ‹®‡-+ë¡­„Âëzbã  ÈrÝ÷v½.'¶}qÿÜCæÚ—ôΞûŠ\Æ÷•v=©¯èõ|m_iC†ö¹œO}å7L>Ç]~xX ‹~Ñ–3…õØ{¤^¾•ïœ|ÒiØ¡ZNêCÛàušê§Ùþèy–&žµEkª¿~žãžç—ÙÊ~­Ë‡§6NsæÓüSX<Ó U°©c«+(ಡ,H§àÚ þóÏÿõóÏ?ýòËO¿þúëO?ÿå—¿üï_þÐ|úí·eŽêĪÝr¹gWÑàîj‹°¾ÅYÖ;¦xâ^º^.I,­¼¸“$ ­V|ûŽI¡Üù ø)»±l{´$¹agjjÈeç¼lgP¥lhŠŽÍù”ýxž¬Hƒl¿– ›…˜c¹|UO³)h"?ƒGzi›Û²€Ý‹EK×Ãï ½ ¸Ë‡ÅU]î^ÈJ{¡(®uÇõêg¼ƒ LÎ.ÕÑOX§d”²Ëù ýr¬A”‘èT9¶©»¨¤]ϽáBfÞÆ°(K¯Å[Ð…L(¼üP b¤çjØvwì[)Š43 ëv«õ±ºøð¶ÚK™" ?“¾z<[f Úij'Jö¢ž+-RÊ€ì'=ä|þ¶L1ߎ ëìL½ŒÌz’}êý¸:%•"SNa7E”A¡Òå³RØcvÖir?ÏÁA [Ƀf"Ü–¾…½â ,q„ßJ„>×Ü»¯MÌ0jº—óž»óÔð¸ëeës{¤O°˜žÖµ†=v¸e,zÎM¹ö¹Nå!ÊîviXáƒuX) êz¶B‹MUs?œQÁÙ¼°‡:y²±uŽ+l”Nd·PVeó}›Âf¨s9*œÕXA¹6ÉÛn§|êj>Gئ|г÷ôÜÈ}Î=æü}¢¤sBF¡ö5ýÉaõ>7Šy'k çRë6ƒÑŒÝöXU¨¿á“º=ÕV- + ž%®ýåJàž’ZØlâb…]¾uvÞ휲Ôˆ;†_<]l¹ææÊ'ßé~3;»¬6ylžNs>ôÍA&ß)ÚÒgëõlõóF^ÿJžÆkîtÖ:G”Öv‡½L;ÿuZ^ž®Ã[~©¨Äž\ß¶>[ûÑ…*8ôêûÜwäaoe”>:ÙŒÞÌf×¢‰~Æ+ì Ú},¤îúOûÌã.áŠTg³Ùx…(ÆÙd Ù(Uf,ícÇ5wXŽk F×>t[ü¯H»­ŠÎW?gb㥠µçœI¬MB3»ÒÝoIcí¼dzbí–‰ ½K°:ñ æ¾m[/[+3ø3tH´½ní£>¬×í¯p;k%àF·cóëõòޏÑv°Ìêò¹ùÎlfôúÈs#ùÁÃj¯[}ÝM·ulÕ^χ¨Û~öDtÕÝSÔ­ò‰ºÛ9É*쑇„ÝY¼ôŠ»ÏP3*ÅÝ,ÔmÐð¥ŒÄÝДPþJâî¤Zи›\Ònæ%îFÈ?3$îÖÝy »›nã²ë:Žûw³?º³Ýh(Wüˆ=$æn'#vòîÚ‚n^Àâ¦ú!êÖè'GÝqC>EÝp}8ìJÔâüWÐ}Rÿ¡§…¿‚nû‡·FåCÐmc º¼‚î‘ß39íu“¾‰LìSÔWœ¯¨{%3¾/9sÔ-w™£nJ.WKÐgþÃ)è¶ï«¹.LA·j6RÐMfœùÑË¿‚JA÷I):÷p{ÅÜQ{òйíÓrÌ}Ùï{vk޹wÞyÏþO17É@ä}ôÅÃŒ¹åB4æŽb¸mÐAGÕ•¥q¯èóòŠN¯Û:çH–‘›ÆƒTû¿rÛ’ªú´(!7´Án‚±çCÄm¯u3JÄýzƒr§ï&†Ü p䈛ºµkÇ4âÎѸDܤ€RöE·FÜ+Ò>tÜŸ"îµIÍkÏwˆ»­Gíå[\°½Bî†MÈd\Ê;ææb­gÜ¥¿, ¹ÉF¸†s­„Üð­Ó"Öî5‘"î´èwV$é UŽmäC´-í‰_Tiþ}ßøõš+‰¾mÀÙpé¦-}ŸrnWín}“ WxÀëù޾‘.³œ¯FßX5bÕÜSž5ún™Y$•÷/D¢o„µç1ªakô}0oÏp@£ï™áa¿{ïèmåÖܶÞÁ7©u—ÝϾïà×#맯´%øÞï–Ä<–™|,ê(­ 7{,PÏRÞÁ÷NÀHªß“u¥Á÷NØ:|55ú>X*\ÃW£ï”é¥Ñ÷F'YQþŸïø;YWhü½·ûó‰rï‹­lR÷w Žæ=Óê18Ùž6–1*½cp2É ëQ”ÄàdÀ|ÆuÄœÌhjN É7C65ûØ¡A8&d+³ózáˆßCñi ÂS²šá6ž[,1ÊyjžìÔ$ç>w ?®ëûn‘ì4"(œ‡pâpõ`CÂpÒHyö×4 g?“LH¨4 Ç“tŸ@MÂpV÷t@)% ·W}á ºìû‡0\³N4 ÇÄ4Q'õ‡“@g³ìx´‡Óé(ì!^ŒÃsò‘â-ëå\ÆšAqÒÖ(S¶+üf^Æ,áòˆ:â$¬…òÜН'%/X ¡øŠ[L¹e ÅI uÝR(Ž©ó1œ½5'³(ÔîVü/H‚qì ¹çþ!¿ÉŸÜ-ª]ßÁ¸=ƒ‹LY?Rø7Ü#)pc4ηïÇ‚çØ%f%¤÷(ñ8:p[ÅÚ8±¿ãñ•½ @úÖˆÄã„rÌÝl<Åã˜ÚZ,8c<ž’‹”§TJ ÈÑs²CPúRá³Ù»çÑjDN¤îôp=DääåØÒÊwG5"''ÔBò½ÞCÖÞž¨]p÷ýŠšIqrñå<P/Å ÕJùŽ­*NìùµÂ8ž©âS!œ¡{Ð(Š¡ëðSÁÉÙòFMeœXã [òIJhx‹¡žðâã|]¡€‡“éHà 퇰FhxrÁÐpœÌ!r K<Žoð9Í`4O“ŸÆã;IþÖ{¯~+³˜©-ßíC<ž<Ý4×4w ÇíQn\ìÖ¥ŽÛ«=qÕî+I ÇÓĨá8.8ÊýHÇ3+–pœIÁ¾@{ìw Ç3ë•pÜÆû.lò:_Ѹ}yª¯Â>Y[`Û?ìÑ’ã\ô}íá¤ã)·7‰Oàd›Ý_'…“ScÓôáa¼ p÷u]¥* _Ìæv€ P4ó3¡pÇQUL£q|•(.ÐË5±1 ùËhœ édu]‹ pRäë(Κ$($¦Ù áZEáñÌá™÷Iƒr v³)óZßÑ8j;ÉþsŸŒ=÷u¿£qbW6E·óƒ Ý‹}ànPª@ ‚…Dη %óÛ.C±¡ô´ûÛ]M’d(6Πê+J€8"þ/õ·HŽ…Ïúˆ³ËDÙ͉WòØîË­A OS³HPNêË®‡fn« ÑÊ; ÏÊ•„³ÿ^§U’pJaÇÐih ÁÉäG5ØmŽ~»ü¤2¾[îG"ðʪõÏÒ¢©M}r¿âïÊ…mý@ÂoÞBÍ»#Ô}³G°Osn ¾í¸ ·üž|ª üà÷//Ä¡ <ÁÞzÛq|Rv]{Œ¼3ŽŒ‘7=»4 ¯;Þv…Žw—͈èÇ—éÂ(a7OyF‹ª9a1‡#ªÃñt?¶ƒ¢)ÿf!l]vu‘K¹óNŒNð æå¬×+à®BÒ£{h'ð›€õvW¿¿ %9îkñ€Zá÷ŽÏÅÙß@ ¶i[lœé†`ksÛºye µ“Ì2kMDü#Z‚»ÈnðâÞq*†Ùù=öf {ìcG,Ù¼w$×ÄD XùbÓ%(°Û­ï›Î„u©!ÏÜÿ¯ôŠ®mÜ®^Ñ5‰È× $º¾ñ|^G±®ÏŠþv htMäÆânû»Y¤_Olq¼Ãë ûÉ®4•ðšàªÚýøN±„× yùÖ–{}‡×É;AÃk,(0´v‘‚º'Çêæ[IÐ PõÚsIЭ«ñ$èÆz?îËσkdâëú!¶&"Á´«”±ušy_’î¸Õ(Á5#þ.>I7¢õÛF͉¤;=œY'™šÖiœš‚n/mÀþ(èŽó@ «Ù7x"¿ò «™Êpzì6ªç¦´z¸þ°DÐmXíÓs «ò’»¹î„‰;†ý͉ž›èð¶Q¦/ ’ž{ÎùÒ&†Óh½m!åTO¢iB ªf­÷;˜æiRŽ»³Us³ÍmϨ«6%”æÖ©Òxôw#ifxÔ®sŽ4¯}çÃñs”rï?ôi‰ + ŸéE/4ûHÿŸ·wé¹%G®,çù+ΰj hwúƒôiU  t3Aȉ¤P !¡ÿ¶EÒHnó/3ã¡øªT¥{ïña\ܶ͆ÆsÈWV wbÿp8Ê«†ã¼Z‡ï=Çó–àù¡¢—ƒNÖEÁ°™[~-tƘp˜KKäl‡ðúöýù5p“¢ÄÍœf1™ÛÚjؼ[œæé6£Î³‘m»×¸t—vG7?`q´e=<”°yÃxø)IàÌ“Ûu“ï!¯qs}Û68ë’¸y£Ñcãaþ*n–0OâfüÚmä,®é^âf;öëÐÍÆ4lÏ'as<¶„ÍØŽQ'çpངͅ®?]%l.x‘ØRÛõ¼kØl¿¹Kyëµí¼”'™_£fŒÕ®Y.xš €4=¾X 1ó†V·ìé1s!Þ|ˆXÞ!s!ÞŠ¯®–ˆ™ ò½DÌËäé¨3î›§Íyéëˆ9ÏÒ'1"Cí\Ù+3ƒ71ÞõEÄŒCs‚6˜Cƒ]#f†\<Ò^#fÊ.Ë%b.D%sûW"f> \À5=1c;… Ò[žýðq-|qQ×0Ó¯pƒèw032Zët»v ˜ ¯áÚm{ÌvìD(ÞV$`ŽŸv ˜ã±5`.xímÏ~ºeÊ ˜íй5«þWÀ\ª­óæ}5`Þé9¥ß0ÿF[•Wµfû" ’ú%H¡ôóðØÂ PššrøÊ¹€DÒ mœÇËÕ1–¦A2ALÿò©•kB$=‡Ô¥Ch$†OÅ­yCÔò5jF³’¯©Â^£æ\iÔœð‚Êî@-a³-ÿðšqºDͬGŽbÿø¤²‘¨9r. šÕ+Í™EÎ°Þ y!QKˆt~)‘2L!ÒAm§3¾x‰«>$^K€tHÑ‘Ðð“ÝÙLCg†œcX‘¾R!aÜݸ^Cg*­ÙòÍ›ð:³¶gµä!pˆœ×äP ÃSKèŽIèÚ’„Î1ÌÕÐy`:+&ÒÐÙu³€Ü—Ë:Sý‚t†ó­yHq¿ŸÝ“Ÿ%t+¬?o÷0ÕŽá󖟵FϘüR­àø"ÒÚÕ.¤‘èyGè86sCô¼•<Œ;côlßpjâ—§:{]køO[Âçpš„Ï/¸t)ïCÂg¦O,Û;nð9F*!|ÞÙ"ìû5>s^9ì{w¦$á3j³mÏ0ð™øíÌÃlSÂg¦]ë1gß<’ð™ë!Oí³õ=óÊÒ³ãgþŠžã£KôÌ^/âzÏ1 Á3…VÝö<#zèf‘Êœ´/ú žqþ¶È®{+s>x³£Xˆž›gÀö–]—<ç\ Ÿ¹›ëv9Ç>‡Ï¶ÆÏáËHü|£ò·WøÌë8òHÞð¹´_w«xÎÔ8@癊Kølçý‡5|¶[9ÎYƒ^ÂçØ`Cø|!x}ºîJÂg›œYûEÂg:ÒÙÝÍoŽÑú=C’õ}ío­uasÕF‡ž £á³ö‰žŠÇàÉþÖupŒÌÇÛ=g2¾józGÏ7¾¸x–<ïèÙæ<’Á|sLp3å÷ì5ûbK£gùè‚›5Ö"zæŸ?X<¿ÖׯÍí¿~úù¥O øSª¿ÌÑö§ßÝ ñ«ù {üé~Zžø÷¾$Å{®þ ã/÷üØ¿ûPðÆoÀÿü­×·5P7àùÎ;诽}ùñ—4W†ßô Ú øŸ¿õúíµ÷ð¿|q?ÃH°Øˆ‡¿~ÍdÁ|Á¢F›æ­ÿµ–…©Öóђ…Ÿ¥—cáϸR-²`Ñza¾üÙWº¿ÍqŽ•s€þ›ŒÊß9V.O™¦mç·M ÔfhY†`†›cßs7$ì¸&¿îf=öMÓæQ½ìžj!&7³úÖf¹Í9ä[Ú×mDnâ{ÛH½m#z7ßÚFÚ ±¶¹™¯ÚÈϘkîZDÒÂì#6.£Ó̇­ŸÿR'D½ æ - -ì¥b¸N­[D‰™œÒ©b¸Îv¡Â|¨¶ÿl+Äÿã‘3FñÏÿ¶\õеñRþüÃrÍånÖRMàa·¶²oNX4Æ[9ìÿõ™õÿü_ÿ×ÿýÿü¿¯ûAx_úýü7@q¶½l¶ú"1²eæZÔ¹ßÏ©ñ?›SãýMNœfkS‹5îò¡âÂΪö=ÈR¤‚íÁ§õætb È‚Û>5å«<¦íjr¬´½À»þä1½[3¯Fù½xH­/;k[ë¬[]­¹ÚyÓRÃVH6Ö\­µòQçþ¡õ…Zy´µV @—D"ÅÙQó´5WJ"¡×Lm¬ÉÏÜ«¥|O¤)Ø$\½µRëð.$Ö¯Ó–õüW\i¬ñ×§*lÆžÞXàN»ù½{j|2rzéL{ϵÔio«u«ÓÊ­]¨–žñ¶ª ²0œ½­ê;,wÝŠëm•=ãÍéçêÇFSÚjbÑ›Z…^?>ºb¡D•V=5à½vS}:Î{†]*µ"×l­Ä3>*³am¤·V\“÷Ò“˜8Æö^o­Ô¢±‘änÃÝS§8o­6Úc÷²è©j±o®¤ï>WßM'ø;0æ­í•¢IÖîRs KÏ9÷ëxw[ÝÚ°ÀžÞáÃ+%tíE¹îC&¤¼N´)è{ ìh>‰ì‚Ó‡Wv XÛ÷þT N^ Êé—ckz ¯t¿V•µc«±7Ø•9wµ¡ãs6U[Ñžs`R˜Ç»ÿP«ƒ·L×£:^õ6(=I2týÏö ÙX|-(öÞÊkrlÔ4åRu—RÝ ¦:;—­ÑÖ ´§íhç±'î,·Q¥„O»EÊYûûÔru=»Œ¢éÔ3mö`>ȾSJÍô±áÉ1kP§=uîÇÊîC,;+Üd.í6ËÜ ­‡ Êëjç±§îC,fiˆnRýàÔK·fÓíqT»°î«@¹tJ™¶FKc°žÐSã9Æ.jk´œg-·w<*©cÞÜíAÆVË…¨Ç,º|娕/ôÚyç¬cZÙ{4Sœ£Ö‘ôA›5†Ø+ŸýØ(cʳŸööþ^öê 7­èbQCÝÖ>Ê>µÚ¤ÝK{¼ª6óQ¶Ðð3Òzì©5L{P^Ù3·tk \K“EÓÜBcwlg‡H³0ö –¤½Ð­†{[Ýq¤:î*œîM6·tέ5õVȼ³6eâ8ÐG•ê@±„±oåê—{êyo²¤Z¬ß(ÔBßf [V·Ž•È’—»•^*œµr­¥~±PªqÒÏÈrŒñ«ÍžB•J úŸÝ¥Á`•mıøDbbRž~¨Œ0–zϹ:E·_ÄÌ¡²ñÐ]‹”¶&{tÖ•W{æ<·é©hïl ÐöqPìMúø>®Ì=|ŽíÖ¾¨Ó[UQoo²Ô…;FøÈÂèìE†Ûv4oñ£RqdO²z.rpë#\©ný·˵§®ce²öžYíGû4¶¸Ê#’Å*e˵Àw=vÎÒ¤µ·ú=sìÂWg¶Ø‡-Øq×£0iÆía{ÆO}¢ÉÒ]ìiHý®Ç2ûÛ½ÅZ4s÷*ç%]#”¥³Ú³ÚØÜá©UI{‹½HY±5P Jë‹¡l˜íØ3‚Ù㪶•böýÙ^›E5a|P–b—ÔÞÛ¡cjÂOµéˆ J«›œú±g²X˜X_»ú÷¼ç~~X2RýìÁ$¹c†×ž7Ï2¤õr´ä»õ^kôûd‰¼¦oÇFÒº°¥ó–~=ÜöÆ[š'NŸ¿3½Öؓ䮣ÎpõØ^e}€µ‘ qO Æ©nžF «Á#ÅÍ·Èò = ìÇÊ dQÝ6€ºç÷Œc­)QïólcvF¾3ÆWªÛ[*­Eæ{*3êOZ;óžs­=Ú[+u1‡ÚØkÇʈcs•ƒÚ:¢¾±ŒìgŒ¯¨6,¦¾ûåž*èh­õ¤•S7´ÝfÙfåQƲ}8÷PÔ|a¬FâËÖ07ÇXY3>\©ÆÓõ.l}uÏ0Vû…­¯ÎÆZ'V†õËYõ!½Éê¤RÈk¼fLPmLÚRc^`¯N”Íl‡bçy„±'Z.»’!è™a,½½üól·‰0i„±ôoöS¿Ü35' +'%3û¼ˆi†±”™(vz]ùÏò cA`ö z Rãc„Õ‘ÒÖWÇ cmÌ!M¢wIRÏ¿2¢aa,³œÅû¥­õ(q>Jòùž{äÄRáüa,lÓ^³/\Øça,³#~á-œæûca`öýÙ3îÇc™$~5rDuó<ÂXªÀ6#äv›O-2ÚÚìU» v |†°ãAÂò°ù8kм°¿bý±Ñ¹ºyRGuæÓþ«7~®WxÀ¼´êó¨}òµÙ ²7ßפÂyÏj`g ŠëÍyO†­›4Õç=‰à±Pê¼vá¼µ¼1ïº3®•ó¶ºÈÉçcá¼g C‡bX8o=¶cw”Úy 祊5µ3lUv}祈µ­ªÉo¯¾r^ªq'›çú¨) —bÜ76sPÐ þ¶97a ù  —ªàÛs †ß?ôR=Í C@/õËY µ O8/åÝË\H ç¥ûcMqkkÝ•óR‰žéÎÁÂy) oq¬Ûè=©bÞSÌ„ôÖ2ôÛ“Rz“Þ“²ÂÖPºËŽ“^n#ûŒ÷ÄÍÙâÑ«‘Ú…ðòï÷ƒfÖðÜJx9k³–`ó^»üBxÏZ+bCŒÞΛ„—>{¸Ý­ð]î™|ˆ¾!±â]NÚf©¼'f¦¤É;½^/ƒ‡½ú×O¼œ·§ÌRúÀËy'FWûþ¼ñ²‰CiñôWÄ{Ö‚;[îk2E¼ñ>'ã=)ãF¸0^^ ¦ß=0ÆËy¶jLˆv?‘ñ†½¦•òr^B"Úüòbîk3ìÑr„òÖ32/˜÷©Õ[|þÌK#²Æe=°ÿæŠyŸZ¥þtà·`^N³ð©M¶ y9ÉîÜnûÞra¼µáMg^e¼äpXàÓSQ…ñrÞMjX‹O”ñ>ÕðmÏ-æÆ[¿(‰ë{~1Þú¾Ø^+¹.Œ·vþ},T…ñÖߤlQcÂxk+)—Kµ…ñ¾îea¼œwàR:ÈŒ·~²M[°*Œ÷¬ÕÌ«°ÿùÆdž•ñò›¶¦¼x]/ù+×ñôhg¼µßäË3?•ñrÊØ÷Ä[ÛIaÆjk@A¼d·××-+â­ç‘ Ѓ¹ñjßÄËCÛœ€w{°ñÆË-ˆ·FZ6ÖïW¿ÜŠx9îΨVÄ[¯gq`Õñ>5ÇbÄé+âåz6Ü?5 HïS½­‘V¼ËµöYLKñîSg…³'ìÞå<BGÔ‚wq8†µ|+Å»µÅÁÕ@Žà]ú*6ÎOGÛïÖ€¶vÁ¶$¼Ëˆ‚ª¡¯åïÖRI>E(Ýåj4§V±Mé.SË^l”jT¸Ñ];Á&ؽ[K*ÝÕÆ*p÷©aËíÜGà.·7-»îêˆ'p·–·"Å­Üîr9»¯}Ü w¹Kr°;Î[á.Q‹ÝïÑš¸ ]!$w8Õ^Ñ.¢ A}i(h7t)A»d;ÏÝýXí†ï)h—uÏÝB;E»µ(cKQA»X eû¿}]%h×Î;wk®‚v¹UÛ”¤h—®X³ŠÚê}E»¤³Y ¼õqMÐ.½_ܶqªh·šní®lP´[­ûûž¯‚Ýðpv9¥kGäviü6';¨\Án.„ìÖá":>‚vÃT$hWC'A»\Îywƶ¢]fÌCÚþiíjø§h7ŒÊ+Úå6/ºU2+Ú'l7\OØîSs„¾Œ¶Âba»õz¤[µ5š°Ý2x·ÓV´ËåHço‰¾‚vëäñܹ/píÖQtîç¯h·š•=í^ÙW‹‚vùzutqœ¹ Ý³Î¯Õý…vëyWõbnÇZ3f±óö7Ü­aIÝ|j3ÇJwã7]ùîY}×¶ÃwgV¾[C²ÍÎ6Æ.|·OSŒ |7DOÂwyh6ÖÎ ïÖ§³Öc_íü¼bCÁ»a±&x7ÞæŠwCü'x·._³Eß}pXñîë7¼Ëyyª„ï²€µnx9–\ùn½^Ûxo0vá»õ7ïi„ïò:7êÜõ¹ð]N;/kA[;kÁ»õ#{ྱâݳln°žÆp¼[ϳϼ÷Ç•ï¢ Ä³µ¯ÇœïÖ´`¬cá»g5ÝdJø.ç]6ð 8ºð]n‘"”½J‹ð]æi¸ÇÕÓÊwùI›ž^õVøný6ù<†¯|·ÞJJ©iæïžÄ³xíçpÞ…êÜ‹˜bÅ»\nŸ5»ïÆ[Yñ.Çl&óMÅ•îòþwÒOúvÝJw9óz:Á]è.`­Þ« Ý­¿ikï³)y„îr¬î¿öAv¥»| {JçƒBw9/“†ÝcÇ…îÖkÙ´^éîë´…îÖ[iµÜ¸œJü÷j[î ¡»@²}Šj’ÀÎŒ=6–¿§(Í ó6ï¤wÇì[Z«lØF1o¦3úÞjÀ¼ÔÉ;¶†*æáPœ±QÁ¼ä Ó_¤WÌ‹yÆ15lŠy-F)*(æÅµ}H?…ò=ž#ºSÊk§ÙLék¥¼T9´†ç‹u¥¼Ð¹‹:ví¼•ò´P^~ód%8ž”×V66®ºMºR^–( 3ÿÍó¢tÄCÓ%Â+æM€ ’á×쎎áÌÜ^ ˜—Z¦(¤†­óâwݤ/A/€¬½ËXæ•d`Þº{»¸Â^ë5§ÍùìÇVÜ»4Ødî˜Upoõ÷wu‹â^»^†‰ôÆ·âÞʺowçg|1=ا¶[‰oªfþá”øÚ?´°.oý©…ø>€¼!]âkcˆ5¨rôí‘ôbL‘‡‚âE|—@Aˆ/Nž¦þø<‡Š0_[¥Y7é˜Uˆ¯L~B|™n*ìµ½T%¾öŽí>ÓKÒ[Yuéz"€ðÞƒ]Æçäóâ½lG‘á±»Þwá½ì‘Œê÷Ò%NßÉ~ñÞ¼Õi«©X[Â4ìŸËô–§¼WfDὑ{*ï%åórý•÷ÖÏÆ"®Õ{RÞ «Ã^$¾¥nìw5°_dc3‰aE¾ÜÊyÍO@¾Ú‚ùj+Qä»n¤¬À·.H¬Ç<}Rà[½£Æ\ ¢^ÅHø¢l=/kp¤ ð=™Ž!Îà+ 'õBGú®óç|„ ðE~Q/Ö‹Öyõ*ôRQoX®À·BÐÊ}ëØ<¤…û† Vá¾ut>òîŠ@å¾ ÑWî[¡Ñ] ¤ÎOà¾Ç`nûtÉòÂ}ë»Îx£¶¡M¸/s”Mnçý–ôÒ’Ž)E]¹od„+÷°IɯL+ù]·Vò!”’_„ÚÅkÊ«¦—ñë±vÙ$*/òKöªë×ò+î‹û.;Ò*é (?rß­;¿¸olH2éŠ,å¾2î«Ýgå¾u§kK¹¯¢„ûÖÁÙš«+é•ûêp"š^¦Ø™öâ¾+F_Áo—øUŽ.šÞ°aÀ/…C{¦€_ÙÅa_¡£«¢7ÀwQôªöb`_Ú´}³V*`_¼87_婨7lЬ7Ì©Š}­ õ]›«hzéÚçÝåM*é ô_™¯î((óÕ/-’Þ°Ï+’^Ù A¯†¾ø2×î*è ›†"èU•‡*z+¥²Ð©+•øjÃE/‘é¾yý0UôÊ^õªç­u­«ú¨à‹ …ˆZK=oèl¢ç  À—,¥{ëîøø®"ÕóU³ª>Î/x¯Ž‘Ê{õ'÷êÆ÷*ç}j1¨ûèóÈy«å鑊š7„GŠ{uÊ5/?i£n/x«jÞ%)îU5ƒâ^Ý5/ÃlìiÙF¢æ [¯¢æ }@y¯îr‰š—]Ô­®ÏÚy«š—¥ÉsÙ¤¹¿Õ¼Õš5Meñ¢æ c¨yU`¤j^:x/û‰j^}ÕÊ{uaߌY2õd‹š÷–< °½£Å5g-@¼XvÚ*«g´«w96§ç)ì%!Üz£Í»/Mo­`º¹òDaï^)ÚÙ·$öÖUζ{®µÀÞ=f¾²ìÝk=>¯ð£´ˆg×êKO¥½v/ûLâTÚKÁP̨»4ShïN]ÔÝžƒ¦—’ä#ƒBi/¿ù´ÛþDÚ»×Z²);bM/)j#ñX5½ŠD£¦75*ù†½Dî%» [a/·5RTöÆóDÓ{¬Fä {ùzØ\vƒ{dý»c.…½d·sÜomïNH|°Eø‰¬—¢=‹|Y7Ô\ò°…ô¢"¶ö’‡æw!½8õè[´Jz9†ô•ßÚÞ ‘>¡¤—ólÅÒ·aé­Î¥‡§µ‹¶7ܧ^Îë©*ŸHzÃo é n%½±u é ­KH/õ­)?/ÒK¶ •X:!^Yï^+k{ªŠ°Þ½øöl e½£Na×è ëÕ¾³¢Þ:4Œ<5%½áÁ;é }TH/gP„¥Ë™…ôÆc+éå7q™½;!^Ioø¤+éÕÇZAoý0i~UÐÞ”€Þx'+è…1ÍÊ·zcãÐF_A½œ‡¡}× êÕýQÕöêgÒOHoø¤BzÇiohxBzwVŒÓ4h%½a¬ÒÚƒH{yºY‹Z¥½òÑôr'K~ºƒ^}!zCÇWЭ Wï^A¯¾äôÆ›_9¯¾~å¼ñ VΫC€Þ×y«²W?›‚^† %ÁS@ox++è ½GA¯6!½Ú[ôJKÎö&…ó†7-˜—CT¹¯—¶7\lÁ¼¯û_0oüv‚yÃ;ÌÞeóþÝ©j´ù•bÞðAóR²OôãíÜžk¥¼ñ%®”Wß¡@Þð6òÆ_\¥½¡ßä·BÞð†ò†Ö³BÞø†ò†gȆ¼:: ä c½BÞx/+ä mR /ó¢‹.ëZ ol\+ä\ /mÅÖY.<ÈË msÐíbÖòÖ)ЖÎÓ;ä_U o¼Ô yÃ8#ò^;f/dï;¼ y©„…|wY!oø8‚yCÃyoh`+æår7¾úÍ _1oøp‚yÃ×Ì«ab^*F,9ô‚yC#ÌžAä½HC(fÑóMóò‰l¦Ü»ÞK0/“Ï\¾(˜7^oÕ÷’KYkôvï‚yuXʦNá¼,T¯ãéb•÷†6+ò^î¤ÞpÇžójú¨r^š—}ÒâRÜUÞÏ[å½Õ+çÝaŽÓp`½<Ø–,ëÚäUÞ>›È{í')KÚ­•õÆß\YoèY¢ï Ÿ[ô½œ‡Ãe߉}/D#5 ÈO¤½È„n6&v×ðN}/¿I§4HðÔ÷Úóí«iªïå½ô'úDÚkÝg§ìaßí}/ŸL”_þ ëåú(ïeN.O[¡Â{¹ÍÅA¾­€•¶Ä\¾;¥±ŸtõÝὬ›©ys‡Ê{í”Í¢‹Ôp“ |y²hØöUø†7"À7^j%¾v,M:D^¾aŒïN‘ÔËÃv¹Uà­ _Σ¬|—ǪÂ÷³^ ¾jNIoàK·xkð娕Æ~³|yºg<[¾;:ä}¨'DàËÓQe­¤·Àw§’å9ÒìDà»#±®m)"ð­Qh›"ð­±k~î–´+ßø6ò\DàËiÉ&å­ẻÀ—Ûܳ§J¯Ü¦_œ,íÃu!Gø&|†‹oüÞ%¸_´·V{K_9õÚ`ÅnWßíµvZ#§^l“nߺŒN½÷´ÞSÚË–bï6öÖ’ÊX««Qofã9õ±Cz­wf!‡Ë«Q¯ @žyŽœõ2NÞcžT£ÞL¸0gÅ©—o+}/N½@=»hÇúêÔ _܇æIzñÊÚ[éO€½LÒLÀžJ,N½›3Õ[œz/rÉZÇý(í%:)d™ÙͦSïUÃí³çZ¨U¯]Û"Á”s{+«UïYe³ø~Á^‚¯Tìÿï ÏnÕkÿ¹9EtyëjÕ{±šK¹[ÈŠSïE;±±ÑÅ»«S/.Êy·éñzù8rÚ½tÐjÔ{Q~yTÕS£^‚ÃZª«›<¬F½4§/\z­E< mÂõ2q¨—Bg׸ô^$xO=¹Hz¹Ô9ª‰©Ko<¶š8\TG³¥^o·`¨ŽWAï…øœb+Ÿ€wëk¦ªöÕ¥¾«?/N®ØuÏëáÏK&Ë(Û¤z^ÜÃÓôKÞ‹$i[Bwï}ñ罉l(ºß‚ÞÚ–®œ^½j⪂^zCåVTÑ›‰¾æ>€øóBY¨9Ò5æ¢è½ŸúÂÝ I½™5ÃPRª?ïMt9²QÕŸ×F¾B–ÅÞœ¶EÑ{ñ¬û0®E¯uÍûrKôR6“D‡ª«=/‹ˆ ã¥ÓXÇî•oñ†Æk_þìkæOd¼7Þ®„ðÞpT’3Ÿ—’7Ä Bx™ÌŽé7-„—n–‡]—^¾çâ·.„—À¼ì}Ý«€—˱Ó7ðr¹}©Œ¶^Ž•ðÚyø|{Æ€Þ°E&„÷Æ|kZÄË'irJx©Bcs† •ñÞÔbjk÷OD¼7êòYPm%¼7®FK½¸…ðòpÛâ±Þ›V(áÕ X ¯†Yx¬ÑÎoZ†M{¯^Æþm8° ßeà»g¸ð]ëN¶úøÒ°¡FJ×°Ï[ñ.g3%[ð.k>¶ˆ½˜ÚŠwé¼­ÐÄù‰x÷&VÕïÒsÒôž¼Ëà݇áOĻږïÒH‘^úDº{3¡#sß^&½a4WºkçÙø6 îJw™H·Q¨PéîMfÖâ»°Ò]€•îÖ%»…'­P¡Ð]¾98:ºËÌZ†ÿˆÒ]Î+³fŒÐ]:A†óJwoàÓ¨ Ùé.ïê•îàxèénxÇ‚wi&÷4Ö¼K3¹†àÝåÐ3ÉôŠwuP¼ËìÖrY_6½¯Ë­x—Á—tJw ^ñîÝŒ}ОÁ(x×®gózö–à]zÝcc¯©"|·"“ä~_Êw‰–¶aø¯|WãLå»|o§¯çmØP¿w[ ö;ù.Ï`ßÄw„ïáX¨â9Ø+ß%Ò:†ß à];kÙMºK8…àaÓ]‹˜lXµî„îf e߯t7“¢6Kî ݵŸ³:º£ÒÝŒÓ(Œ¡›F¬t·BYíQè.€é˜µ8Wº‹Åb?_ Ý…YÙ‹q§p¡»HU,xvWo¡»œwÏâ†Bw‰!í`îÂ4¡»¹š»§y*ÝÍüeš% ÝÕðDñ.ÓßVÓÏOÄ»7ëü{à|Á»a„¼Á»¡— Þe.x¦/¥àÝÐ{V¼Ë#èšâï2 Ú]ú. Þu§„7¼!¼Ú?:à%Ü~}ì x3[SHÒÒ»›Âw5žQ¾«!’ð]¾Þ‡}¾f#ị*ß…ã.Ú{©Þ“ôÓëôÄ‹ ÄîÒ E¼Üç4¡TÄ[WtÇ׈÷Æìõû—‚xízËuWÄ›©ð9 º â¥Ýá0äÞ +ã½±zßwÏñÆ{“"cgöZƒyyIešÒ®÷F9 | äµwTöYçV /Õ ¯ãq+tż¡®˜÷Æ+õñÚ“Ny±÷iô*”·éŽíq¼Ðôz4ªŒW_¿0ÞLÉ—½Ü½FïÊx9횥o•ñ†c+ã¬ve¼a4ŒŒ×"÷áš/™·fa¼ö›U'û¦Š0^š!•É{È)Œ—ýUñf|¨ÒéÛ³Âx6ÊT6 㥅î3™Eof-5%éÂx3õÒžað&ŒÒðŒÜ¬ñfRûlÒqkßñ†™Zo&ÁÂVC-ìWÄËo¦a ˆ×ÚÑV—4Æ{.xÿ:φ?Õ*lÖ?ýÁÉõOÿ_ýSÓ&r´ýéßÿðo¿†$½>¢NE'üÕü*xýË.ÉÓý´<ñï}IkÓ¤fÕ¹gKøÝïà"Ðî7àþÖëAŽð¿|çô×Þ¾üøKBÔõoúíüÏßzýöÚû ø_¾¸ƒö›úwbaBµu´¾dÓ·…(Ú6›ØÉMý»;Ì?ìö?§ÝøçÇŸ>ÿôß,ŽýïŸþüøøŸ?þÜ+U_ð”2 þ•Î_q¥Ûf‰§Fùç_èŽú†¡rŽÏ¿s‹ñ1ù;‡Êå)ÏÙ4¿mf¨¡Âu¥ÊæÃMȱï¹dNûvÍ_ïf=öM³æÁú½= 7³úÖf™–Œ¨ïh#\;´¹‰ïm#õn´èÝ|kiƒÄÚFäf¾j#X¶QìYXÉÙm»-ÑO£®~ËÛçjÃò/ýõ2馀™-#©aö¾Ìã—I¿õ:,p앜Ì;ã:ço½ÎvA/7”ýo\çŽ×ù;× ø<ÉN…ÑaÃD9Ÿ'â5ø_õ ÿò§ýé§?þù?ÿø—¿üå?ýÇŸËÿþß4~~V:â‰b*UÈÇî`+Î3¥y®bkôªºA1Œ;Rý°Ÿ×½*UÄBà´×=•* ÆF‡Ê¬ÊÇŽæíKt»çîûm˜“+¨ÏëY„*lz£ jÞ³'z´sÙOÝÓ(£cã‡m¥ý ?<«ÊYToŒëd»|Ð~`©]½K¦N¶Î¦štp;8Ùû›jîÛVÉ·“ù“ý£Uͽ?g·\äЪœ‰Í ¯ÇyV´é(êÀÊïHÃucÜQÔAA¬auwBÉì?`¬Ã‘弟)U90uv6Á‘©TIÀ»±…~²c0h¢œ¥oÚH"íOlc=æ3§E©’Øö­ð½°?!fõøÏS¨’Ø$[rÖšлgõ³IÑ€ývŒ¬KN Õ€ývìØìý6Â~æ<…* ÓQäœCSª’Q —è3—E¬’(Éuù\sæg«P“ez´že[Ô*lÅMS} ½J"]qslêUvv[x 5Qô,iÑ«H’Õ^…‘*í=©ºœ‹^÷ 4ÙM÷s"‹°_‹Ë·UNdƒõã¸`ãBßPåØÔ«`yQH‘j}YÄ`ýñVÊ¢WÁÐ¥z“è"œõchìîÒ@D¦^G>’¤ò86õ*Q@U[¾M‹^ÅΣ;ôm•óI‹^Åfñbã–õ¶xæ¿æ<Æö½Ÿsѫر“"<½ã?ç¢WÁ²M|Kÿ:Ÿk‘¬/Pû¬*ÃN‹¥¦b…côm|úDÇç¸ßΔ½'®žØ?Ü¿[¿LCÇ{¶¢#½Åî[ýý®¤?±e¸Ÿýyl&šRú‰}àþP7rp.ì°îßÉ „ºˆûÙ¼¶]Õ˜ÛGXªþPç©)—®V%Ç,y²{é†ôºˆAûíØU\a}¡‹°ŸýòY7ã¢ôÚ€ý Ãp šÀcS±b}5í©Ù×cyQ¬06f]®v¬,’L¦Ÿòµ=S²²cï=<‚¯}[$+:ÜplJVðM°!ìniv/iÀþDÙÝ}.^Ä€ý{a¨óYüÂìÚÂ,@|U(dM†÷U;íc©…<Š!\ØØ`R ·WÁ‚¯ÝöB°jÍ$oùõ‹”0«m«½k”_ÕžÖžËZ@òfi‚«^` ±;E½² Ǧf%|6ƒöói¦/²…)‹f…¿Ì𯗵ŒDx²ùÓeîµ|%¿=ݱ·Ù%OŸ~Ž{@*}|PB¤Y™üJUœ¹ÙØC3a"vѓŋfÅþòÌ‚3W­æC,>¹¯”ÍJb7Ýó³÷CS²±³aÇ_ë¡ix}%Š ÛÃÙ‡©Ç’EJ] |Õ³ÜÙö‹—»¬_we=™i”N•gEŽM½ oÙÔ¾oriÑ«$Ä»ýzÕÍ74XgÌY÷B 1ô*©ªüº:òB 1 @hh!ö³·ÕTEÄ5Ò©Ç,8ÛŸÖVû8lúLWo¬¡×èç¶ÞXu¶½ÈUôE=‹X…úúÛÿ^Ä*ìXL¶ˆUÂÕ,L<ˆLZSÕöq¤{SåG†}9Çlîô¦Ê¾*Qc¿væ7oª;åBÝ3å"7y„¯4q{-¥?°ì¿Òp-ëéÙ™We]×Mb͈_¹O[Rø$pV“o­|,øh9{W̓y¼µDÚ`ØrN.$6œ´ÖŠRnè¢/¨¦&½½JÀväÏö®»b]7ñ©WI¯õwÙÚªêUFð.¼p˜,ZÐ.2¸ÈAš©ˆ ›.Oư€iTo¯v,ÛïUx9†Ð¯¶W„+ûá•·.V·—®šg|ÙšŠÒŠ­½žø‡ç­Ëêíå-bMB¾lQµ¤"ڊ繋ENO?¶¤"ÚÛ/ÙÃì«Ú¥ŒTDFÌä¢c›Ê(´Þ[,«ö=›½×…`¯øàZ; ý^jjxo±ãÛpDãXy|p=¨ôwyöÑ…&bį¯º¸ÇUsO|tE÷•Æ®õUuct¥0ç剩›ì#€Mh´†¾›c¬•F‹µÅ%b¢ˆ4ÆWàÄï]ì*>¾ZLñ Á¬­°0iñK?öäm&\ä*¡ƒä´ÈURݼ÷/}ï£Þ yÍóÃzÈ¢WáŒYòƺý¢WaíDšY{WÏbe½È1[ô!ñäHˆ¸r5à÷!ö¬aC×°^u‡Ú‡X‡³ÄÑ•ŸE¯Â3—!@·s½ “õ‰f¶õ|;6õ*©IszÎÛe˪©W‰¿ÉÑd±âBÉ MÄöÌx@®G²ödubG‘|=H{9î^ëœcS¯Bt`z 6˽¨U×fÉ1ûR‹Z%‘Ba}§yC[¸¨Utê,TL#,*îQnšcmÐîíuŸµ¤.[UÝ—°7¥B¼µ5@2Gæ›gY»ëI‹XÅÚù¹¼° 8š×–’Ù-kˆcU­bÿ¹ÌúÖbµÊb·™·ÿ½ˆUx®sktÜ"pe|€µßÛìvå•õºZܪµVdaCÍ¡gD¯|[]õUë…bF¯$Π½o_)ÄŒ^‘Œ[¬Ü¦¾{Û­ A[b®)ÄŒ^mͶ ÉǦV…:·³„ç k›Ñ+ƒèH¼©w6£×‡ŠInÁsSpD¯˜jÔ¸7’x|€µxäAfÛBŽM±Êl©ø²ð¦²ê_ºÊ¨åroy«l5¦ÒËàÝ[YÄ*&-¯Ç(Líì¦{g8öŒö@ù>¬I-à .fk°’Ë{³>ì‚oØsêVBS£…îRå$1s¶àÝ“!‘¶ßÁÝŠw©€rNÿ%Á»kT(}Ý¥>3Ãx/ª"t·ZéÙ<Ø­™„îbØìÝÚ#¬t7˜c Ý¥<°5õ U/­,²Ë Þå<ŽŠÃòïRLÖÚ»½µŽpËZZüZg‡ö ï’)úÌ Føn­Ix ¯ð].—ï!lÀKÝgsxC16'¼µxëý„ð†ò§Jxµ˜²ÞZ'‰öÔBxQ“ÝsQ „vV-0Ò'ÞPáT/å:Ÿ;íMú®€÷!Klw¯Z¼ZµO¯ÖµTÀ«†Œ xµ€£^)r¥|×&Æê5rt¨ºòÝ:DZànH/5µžQ¢]/pR{÷Û\¯VVÀKN—5KŒVÀûPžööz> xXsrÇ`¼á+à}pÜFÏÀ^K¼n‹ÕCÉ!^,Øì{xkþ›M)Ž\WÀKÌBZR›—”ð’¦w õn4|%¼ìª“?øæ»ñW¾Ën¶èh>ÏÊwŸšnzz)|מÿžœ\ùî³Ïp?ò݇eÆ@ý x‘¼â.Õ‘ŸÞ‡Ý‘Á×ðÆ{Y /o¯œ-õSù. È.çë+á»ZúKù.á!Î9~'+ß%å‘\öçÍwŸš%v§–p¯|—›Ün÷IV¾nSø.Ÿ¼£ŸÈw¹Þžžn 3ø.ßÔþ“Ãá»$^ZÐÒ“‹ð.SS¬s¼ä€Úö¤D¼ñ´•ð>W³»a˜Þªb>ݪ] ¯6g%¼f1Ùë(á}ØŒ9S¯+¬„÷av©µu:] o]Z܇ƒ!¼¤ÆÃ6U oxÂ잤”—Á+¸¾VÊËKÂCê$w¥¼œg-½[ÿuÊ˨`¯¢‡ðJyù±œ.‡è‚y1žìŠyù͹‡)˜÷I•9´Ìû0Ê=~ż´»õž7¢˜éÌbVÐû av¸ zi±ìmv® 7´f½Ljq§†]WÌË$DFoó¶UÌË|5wóÚ/n¸«¶˜G1¯ÎsŠyã±óÆß\1ïC®ôðçUÌïsżL=ûðÒÌ YÈ—//ór¹dëç¶ VÌKÖüܧUÌk—³NrwYRÊûPyîa ç7¸r^Ž5;“ë8/¯ã²qKRÎËÈ\.OJTΆ ἡ1 ç ç çe,±¯{[*ç…ÞØúµg€(ç%¡%¿hå¼a|ÎKþÌÜåWÎûÔü¹âèR8o˜±„ó2°ÁÍKçÃ+çe¼±×Þüë•ó2¦ØR¢‘VΚ¬pÞ0ଜ—q£”³¯]”ó†!]8oxeÂyù´—µ£V*À9o,…óòk8t6ÿGå¼ñ×VÐB¶ô†Q@/ñZ„½CàôÒ±ÒmÓtk'zÃ. —v’˽u`. —‰±_CÕÂyi&3%K9/ã/%¥3Ù•ó2þb~¶}ÁyÃÛÎkp>ÃzN9/C”…NÛ…ô†QHo„ô†¹XPo|öõÒdQÈõðxE½¡5 êÇVÔfdA½!.uÔ¢_A½,¨Ž”›]aoˆ\÷ÒˆÈhlÆ!‚{ -Ù5Þ÷O¤½²šVØ‹H¯ÐÞLöò ø¡µÜ6…½§+ì%d¨ñÐñ†½::+ì¥×Us“»Ööòt¶Ž<Ì®°WY…½êø«Þý¼ör6‘mG@aoýZ·3SØÏ[ao¼Ïö>d}¡‰ÂÞºúÎÔö²ˆ>ŽÔ]ä”öêú%ÈØ¥Ô´ÐÞZTôÒ*…5E<±z:¯Öm9é_f?÷¾™2d´›ö%ÇbÛJ¥(ö%‹|³±`ûûB&Û^ò'`_,fæ¶bß‘–l×KÔk½D/ý°¯"aÁ¾,Ðy6ãžs´¨ôùDî{0L¡p_mÓcß+Pß‚G¾‰Ô—•Á0(QY/„“×,Lê›ØŸ:]ª©Ôw§ÑÜië„v¡¾œwÎ]_Õõ2%åÓÍ?T×K¯rƒóM}eÚP]/ÞS6´ºh\°oa jw‹ž€}QBº¢1P_¥yJ}Ì õåž—«””ú’Ÿ¯¶þQæ[(á»Ê| sœ]¿¤Ì7W¯FwrUæ[ê¼ê2e¾üåÎÛÊ| 2†»¶2_»ž}Õ­—'æ[°@¶?5se¾|‚òì=üVæËåöQLD™/Ù%‡WãRä[PÚæÒW…Ž|íB[óš}kz &ÑÃ:[‘o® ¬±$È·F6ä§NaWä[Ø&:ÜWPos|…¾8Û"¯ja´B_!ö{HÍúâ´—F],…¾XBœ¼’+ ôÍÕŠhÂâúV·@ëã©…û}éù¥êå›R‹³Kgúb(ö±蛫ω†Š}3% F Ož5Óh‚S쫞:Š}YÁ:&[±o†jÖœ—ÆFWìËk±²[”(öU[ ž¬Î6×1ô­cà(`= /FŽyxÄ*ôe¸µà­»b)ôÍÕ-Í]•úæêòå´qe¾ Òçî;3Ê|qaí‹ra¾xE^{¹žGîö2RÐ3îŽJWÜKLJ<ÓuÉ‚{íùn¿}Å-¸×®g¹W¢RÜ  *ç@˜‚{±’{þŠª³¦=ùœ¯¸ Ö 7ÅïŠ{ÕHq¯z­ îe¤±MîœEp¯–TÜÛ q½¶žâ^Bg»ÿ^žsàÞL»¸µ¨û¯Z;ðU0¾jW¨ÀW«`*ðUSQ¾á= ðe ²æ”ú®Ÿ_õ0Uà&[¾,eïs¤¦ ð Ÿ_1aVà{×Ç9}“^€/s…Írà ð 1ˆ_LÏ)à«– |µvƒ_5GTàKí†4åÜ|±P³eç¦ ð­®ê‡+êøÒìí{]Ð!ÀÓçgÏ®À‡×ûòµœ_õÙÀ—IÞV¡Oëøò¶¤ö|¾â¥¯¼W݈…÷† D€¯Z­*ñ Ã¥_ñ„Tà‹Gà„l |ÁG0¨Öšøª?¢_mz |õ»)ðå…ÙO͉U¯Ú¬+ð¥Ç>$ |ãó­ÀW½>øjݾü&Õß¿R÷òìTo$M€¯xŠ+ï WÞ«6åªîÍS¸=ÿôî±×âAþkyï6xïnÿþ—ÀÞ½qƒö¢@ß6ïêÁÁ¡‚“Òwö²X¶`ÖY–880’ÈOa/œÀÞji³“ÂÞ xh_­c.±p 7’à>«…Ýj³øæÌ.ä9Ü„å%ò…âÝøÐw*%¢  S­¤öB½aG 0\´åsÏFP´bKb»88°«R¨ Ý8’88è•:8 ú:ìn&±88¨¶F-T¶¦º­ºÙ`¯ýf«”%&˜àš-DMT $¾²Ï`/…ζœ ¨gáÔÕñŠze#é…zƒ¢ ð•¤€zõ³)ëµÎÎT¿ß_°ÞZv«¸/¾ÕÇ}Uø.âÆ ï½¬»7aêç%ð•ò>Aàk­×¾S/c¾Ø¼[xÖ~AàKšÌb&!_ì™›»Ëç%ð•'Xå½²)ä½²ñû’÷f4E]Ò­ò^ÜÛîJC¢"Ã!ïM­ˆ{Ãý‰¸W AÜ[ìÏ©qo&«¸·Ìlý ïeë®ì×ñ…}·bó±ý»/ä½ø¾·Ž ï•󂼸2R6‚¼o ߺ ê^Ìg­ŸuÈ¥ê^i"AÝ›qÝ({ªº—vmëÁnõ½Ë~½Ê{ÙÕ«AÙò^Î_òÞeÌPy¯ OUÞKAV?ÛÛ¾ŠâÍs'È{¥½¾ä½«ÀaÈ{õ›ª¼W¶Ü_ò^ €¶¾Ïä½Ôe(Jí7UÞ»6…(ïå=AOå½A,ò^2‚òdØ"ïÅèºz`ä½ÜsÞ=ñRå½,(ìÿºÙ‚È{eÄò^;fïÓóˆUÞ«ò•÷²¶ #ï_È{ÃoмWöƒ¼W& ï•¨&è{󋾷Ϲ;Ž}¯„½Œ‹ç‚øôŠíB^ñhTÆ‹»Å 7ãÝ)›:†©`Ô‹›ó˜Î‚QopΣ^ÔZÅ‹„+åØím]P½R^¨Ãc°ë¾…òÚ±²•]t)”—'›bý`Ô‹¶ßÆ"w¤])ïÎFüBkWÊ‹眷‚Q/–%Ó@z©•´$ˆQ/¥\'W£^P{>}"çÝÙœ pÁ©—˜u@–àÔK¼>2‚S/[2#Š N½ZøªýæêÔË:ù¯9`þ;Ⱦúôê7WŸ^œ(|Qlz‰æöÛô±Îmµé%ãz$Û›^ÈîÜ7Q›^V Ž>½3eS}z €lͳ¯†Å§÷ Üs££àÓK¾i_Hz9oITŸ^f¬úôRŸÛÞ·C`ñé=Èf±†0例Q/釸Ét*+F½¸hÚ´¹:õ’œ¼¢r^¬ÍW/„ôîT¹ÈC_ÙPïN^µç}?@P¯:+ê í_`oçmñÒwOVØËåÎ}*‚WÚ+6õÁ­W»ŽÀÞà[.´7ŒkB{wx¤½¹nX¢~½8S“ºd¡½á‹ í¥¦º-¶ÝÚ[hoèWB{i 8Úî_9Ðòl0pM ÐÞ årùŽÒÞ L›\j£´—g°‰ÂUTB{Õ®^i¯wwÿ„ô2@ímºgÁ z—3VÆKöÛâ¦"Œ—ÉÖî­;]:â%N[• ^¹ñîpÕé÷"ˆw¯-s쯭ˆ—"Ï5Rñ2Ì®zE¼L{öëØN"âÕêŠxyO×ÿ(âµ{I¶˜={ËÄ«A’"^&JÂt™¿ ^ŽazìètE¼,Õ“5â·˜—êupù½«öžˆ p—¶f/ØÅÏwC߸ËcSMÊ_87X9Mu„í2:Ú0ÑùáŠvÿê»$_Ю‘þ¼Ø`\žªh7Œ©í-Òã5ÓíjݵçÅ=qØÔ^0ÿЭ Ùe ;m´êûæBvu%üyY«oÙí6„ìòmÉê¶ê¢â¥Çb5¢þ¼[hnž©d7ôDA»a0´Kw›¾qÁŸí[5­xËxc¬õ—¾ç(l—À|qÈP‡Þ“DÚ^ß'ô&ÜmÜß*^úöÄ·âÐûÀuû&¿ ]L%·¤íîìèÎý"uè…ÂÔlߦð]Ñ®®Ž‚C/²?k½8Ú%ÝÚ®ç=[ÐnB~9ê©^ªÛ£t(A»LlÓSØîÁ¶Î-l×ûžãÛ¨Kº».—Þ³.?wÆ]Ù.θ|nͰxô®_[Á®½ Îê»^ûXÖ~{£d—˜ùòx%»'±v-~Ü£—>»¡ê¯c»¿ÑµFjKÅô.)ùrÈ ¥Ø62³;]TÆûsx*2^œ¤gi+e¼”,6þñÙw¯T`1Þ'ò­ÃSa¼´Ù!ï µØp,*ž3 / ½2*w)ä•’òfœ †o¨B^ÊDZC襴"ä=Wû\)Æ–ê ¢»a¿ /‰·ãzy±ÞR ye: ”Wj|Ê»±\ô}’PŽMŠ¢ÌKÑß!4˜˜×î~ NóÆc+æÝϹþ‰˜WƒÚ€yµJ˜`^ æEõ³}ó’cóx¹xż””ÊÖöÚ„0oBÃåãRÀ¼tÐá0/~qÉ3«^˜7“õïo9o”óZiN€¼Râ)@^dR-MûóäX+7T‹—<ÿ>tä%”n…ò¢È€yõ›GÌ›hÈs弸6Œ?Yä¼¼b¢{¦²r^-á8/žS´®œwæa8ïCâûóÂy©žt!s<\´»p^{áSø8/ZŽ!˜ œ—f?U‡ÊyYÛÏ u异±•ó&Œsί½R½H(/ÛP¶ñüy¡¼!°zÕcÛsvZ õØ@€#Æ7æ Ѳb^{òE õØ´ÕŠyCA$­Ç¶ãs $+˜W—ìŠy ^äC7©˜WÚæÄ¼lRßÃ:C1¯íŠy Ú-Ôì_@1/©X#¯&d“jr‚y¥Ÿ*æÝɘYŠye;F)/#Ô"Ù]!/k¸iöªÇ¶ˆRÈ‹“ºƒÝêO /.þS)¯7¼L¼#{xk¼¼•™b¢×®g­rz>¬˜wg™½„b^¾ÖâB§˜Wà€y¹ÀTè æµÙrá)æÕ¦§EÙ¤ºhÀ¼#=w|wÌ[FOp˜W!żÌGv ‚yé—C«²÷ænAóf Q]k~^œWË*çÕnªœ—L…ÉŽ•ój ÇX—míXÊyµ çÕ¹@9oèã±.ÛºøÎïeå¼´“¿¦êåÓ®JáP— j>Ü+„󲮆[ó², öf–mö%ËÛ°—½ûãö…h ¾ÚC”øÚ£OµÓD¾³ ò•-}E¾¡¢ž"_ÄS—­Ÿ¿@¾²åï];mÏ! ÈY5¥ý¾ppШ: _ý¨Š|% ÈWùÚ¬13ÅòÅQa,KCi¶,ñ!Q˜/Ã:É.=ÑD˜oØ=Uî«ûÆÊ}GÊÕúûjWVì{€6‡³r_V½¯€_lS£®…Ù m½¹o@ëÊ}EU1¸/>Sn©uÙ´\£€_ó’”#à7|m¿„ù3?K ³!kI›jðË*c¦ †Âl¡Œõ¹ü¿ZU¸¯ ?÷ÕNà¾ZOVë²1@mcÓX¹/jÏNØ•ûÒœ¦à_ë²±³%7wî⿲0•Ùvöç‹›‚)øÕÒÂZší øóp#‘Òl‰¤Îç¾ß’^¢‰™¼*³:*Üw`[/÷eȨ̦å…û¬"Û•ûbݹM³­Î&š6­Î¦¥ê´:¹M6G9‚î˪rš˜÷…ì‘IÒ¨F¨Îv=à‰ì/ò[òpŸ ÕÙ¤ŽÝ ýRÙ¯ó¯€~¥6ž¢_­îÐ/v0#‡? _»€u ^= _Ö<Õ{»ô{òÜÓ: _ê1ÛÓÜÛ»:›½Ï$¿OA¿Z nE¿¨y1[ô»Ôø ÅÙ6ЪµØç~/BÃñ¢{yÑÛ†Ø>Îü:û†?Õm7¾¨'òÜb1q+2ÈŸÚÆ!GÛŸþýÿökð2ì#ÊáôW”Ã_Ýȯ"Ú¿ì’<ÝOËÿΗÌ,ó¹gKøýï€hÛo ÿù[¯_¨lè7àùÖ;h¯½}ùñ—4w.¾éôèþÖë·×ÞoÀÿòÅ´ßüÓ߸“ˆd?T,òÿë b3`²ÉÄîéüØ}bþa·ÿ9í¾??þôù§ÿfAÊÿüóçÇüÃÿüñç^Èi[DìÙÖ:éç_éüåW"„vßHŸ~þ•îx¥o*çøü;·“¿q¨\Ÿò^Íà¿if£•pÁ„å;o­Ø{¼›õØ7Íš+ø§…›Y}k³<çò-m„k‡6"7ñ½m¤Þ¶½›om#mXÛˆÜÌWmägÌ©Š}p`*ˆ©¡8F‘´ú—·ÏÕ†å_>ÑÈeÒMÖæ}݉€þu™Ç/“~ëuš¿7+Œ”þÆuÎßz  ˜=Ëþ7®sÇëükœ¨(ð­;òø·°n¿ã5ø_õ ÿò§ýé§?þù?ÿø—¿üå?ýÇŸÿ㟿iþüü¬ÅmÄ”¯Ôk4õØkž¡ºCqkûm„3ÿóÀÕèÆCµ­’ P«.íÉk|¶aßsc íÌŸB(ü[ž‡Š#ÿ3ñõw÷©¾q96t¬ú©lÛ³n1Ï5GX…³gÔ,îœÖRÓTsÊ{/\f}e-5 C,wj&Ê+Žü9ÖlªbùÎuu’ê¶§â+ß|‘ßVöÔa{É•gù®¶èueÏyöj¯^3ïÎy­9bËçc¿ºZŸc³æTé,^ùÎe©9bç=h\ë²8?KÅ6^õ¶½»›’üùÅÇFÂ^ó.û›Ž×é=8ötà¬oÔü9-*Ç+¨#FÍ‘DY‚ýé:òuĨ4 ¡·çn¯ÙÂðZq$ÞõTáB'Ê&q£œÆ¨8ÂÛ(“­»àæÙQÇî©+º ûK EqÕl—Ýåùawܲ:sN9ö8î…Åïg›ÊŽÕ›w;´Oá ?ùu/¼~jŸ9íç´í8}çú®U·No¯6бÉÙ±J«É×Ûk¸Íjtwz{'žûÞ¶¨nlBœö×c³)3];í¯oåy¼Œö]«²l³¹]÷Wý.o®á¼2•+õ­X7õ²K¨#÷×óò(l‘QG8îo:£ŽpÜ__™}Ù¾+‘kéòm4Øåq¨8îçØ•m¸k>ây;¦r¥žôk›±y;§r%ô+çñŒ{*WÂCsl(Wê¯=¨À«HÃZ£M´çl±ò›¦ö–&4µªÖѸô«¹ã¹&©l³Å†cC¹o“̼aáÞ?âÿçœMvýnh­Ÿg6ÙdÓF‡óuÄ6†XF¤k9ölÏl²|Ïî¥÷:Â{“%¯Ü=Y=³—•Ö!¶ dfß&-CìÚNØpàÏûDJÖwGsݽY†Xy†gjWêˆXn×§ä*¸ñA–o4áiNT[ôA6|? ÿ5Yë©÷Ýœ!sUÎø «#Gf»¯Ùf×ÛD̲bêÓ!XÃØe ±g c—5HF1ÃØ*’L½^q¶ÅÕ5ÂX›¹lz²‘¿>x9¦j…"—˜Xuó׌ËÏ cíkÕĪÍ=3ŒE¯9R0|ŠØ ‘UM¸µµñT­Ô£bcÓ(XÓ›ª•×û(SµÂêü>ìQ{×-e¨VXdÖåÏvà™š•ºl·{íâK»¥©Yáb§uÕž%—©b=cXÖðw/’)Éü,”_›®–ȵ´òZÙ¡>\úñ9Ÿ!쎙Oî{îYÄ a‘õ©Ëg2²ˆ²©Ÿ{Q‹Œ*bF°úlLêý¾šŽ‘cHkþ½‰T?¾1²bfúxA•Œ׈`‚dü±fk‘©?­I,ÝfK´tÕ…Ù–‹ IhX”·žÃ±¡X©?‰l«ý`z•ÚŸäÞG…¦#|EfQŠËìNÁ Ç(ù²5;€²]‹R¤ÆmÉű’[«ËÔDo6FÅÖU÷ _É„ºJ¯Ój½q Vx®‹à®[§‡¾_§×Á)Û3+õuØ¿ëâ%=3|ÅÊçöZ0eߦ`åÄE F×â„‚b†¯vÏ5 ¼þ䞦^¥~ìVÌâ¬ÇŽ©Wá´DÒjºS?öÌèµüÐr¯ . #pÕù¤ nkª‘’§ê´â3pE—|]]_P*ŽÀ5ñvìë7£µRÓÆÐJ®ÔðM(HLgàŽ=‹ÿ¾ZûpÂ(U><†ÖLÔÓS9 óÁ \íÆîQB´TÂ5†VÒ†‡7B¡ÝŒÀU¦Ò‚^i®ø:ÏÓÝ•;‰ãéA }®È…Ÿ»ån×í$xz›ó×%)z•‘¿“›à®Ív^Êêwù”£Ì’Â]‹«Ó9EH+Ü=YòŒj\ wQ?Û¢»§k)Ý=I Ê>)Ý]k¡Dº{°o`?‘î¢&†{¨Ò]LßÎá,¦t÷ fðêVºËòFMJw —îÓubBwë’µäºËh<\ŽÝEÕ9<}ÝEÁ±jü¼ð.Õå.ï²+T“tú¼kŸÓ•ï‚ ž‘t¬x—û`MÞð®ÔÙxW0¼öÛö:N¿C¼²>z^Ú+à%Aáöáð’ÑM´àµÙ€m2VÀ««o¼•-ž¶/»£p¬^FWJE6Ó!%¼ÈÜ)¨ÛLcáµ|PbµÕ`‹ðŸÒ”-VUÂKò5˜:÷ÄË,ð$÷ÞVÄ‹ Ûfã/)âEçnkúü¼Ø•ùãDÀËØðÑý›ðbV}ÚºJ/Zoë=½R^Ì:ŠuÉ+9û€ß&åfx¡€wg1TÜtF/—Û,N½ú±xɳÆä‹¼øVÙÞó}ð2§'V*࣢ùïaó x±óIäk§vh¼\®^¬UJŸx1?³•˜¯Lðò‡àYù.î+öî»ãò]Ԣ肚•ò] Q,rèÛVÊw žöä¦VÊwyÓÖ<»‹žò]„VWÕª6f¼ò]²üÎVmûù.1Þå•iïòäÓÂ@ñnÃî}¢x—\DûïÝkEñ.wŒ‚wÉSƒÐ]rk÷QWOéîF©VíØJw7"ôOàº$ñî÷Zº¿r]lß·£.(×ç­\7¼Æuy»£ ¯bÝðâë2 Si÷¸ýØÄºác®Xá|ñ2Ju¹ó{”HTªË/ÎL¥º|’kTUªkSˆu¯}©T—ÏuŽê¯Juõ6ê†,P—Óî}àºñ6W¨Ëm–Ù+Ô¥åXÔØÝêên0M»zçyué‰6Dß: ÔÝÈ;Ü~Q¡.×ÛFù[…ºvŸÍ"¶m8®Tw«Œ' )ÕeZ>FMV¥º[uñ=]¡ºáéœêrƒ¶ÂîéÊu7`ϨD«\'ŒTBC»a¶†Xa»!¶»Õ+u÷~a»º%žÏö“+Ý%ŠÛ޽ÅiÂv9ë%»•í†Hà.n´÷æþ¼ wÃà ÜÝXÕìî~«pçÏ郦pWC$»,I,î8“P¸‹Õb©†ßç'Â]¸à ê…íî•øîž°]~›S£¦+ÙÅÒ¥-W]Ñ.î­àÇù‰h[ƒöä`»D'EŸˆvujT´‹µcºö¾g«h—cÓ~ ]ªÓáÙÄŠv«óFrK;E»œw>nJ¡h—EÆ™=S_Ð.IÈx»ïý´íBQŸäåîíâa?Ø]”í2Ð_‡WåS¶‹ÑUŒ[ò¿²]ÖÝÖÆzePe»º–WÀ‹IAÁϧ}¼x`ØðÐga¼h¦ÓÓçð*sPÀË›;Í+à%­¸›aE¾ ªh™ˆõ˜ð]EÊwY>Ööß%Q|º× ßE }¤g ß•ïr_¶"êæÄÊwq"IÖLÛÚIù®j@àÅ¡ÔVž-°þµµ¤™ Ý‹ô^¥/£¾"½itÎéµpÐÇi%½Ä‘›£ÿ@zíØ6âÈ@zY}0 ÞpLH/)ÑÛ*é¥dErÝG ½ýÃa–H/p*{¯¤×æáÍí^ 7ßu¨nÐY@¯MËÊÛºGA/],èì(€^L²qôbl6|Çé­ í T!½P+õBQôª¤8Þ­q¡/„¼ö;5JšbZI¯u[½{Ýy!½øy_»[ÒËÐj ÛÍ(éÕ~«¤³ë`½|‘‚^vÑŸÔ&z±§ÛÎÛåÞ ze  *zœ}CMAï$}|÷QA/qÇVØWÐkÃ×µÛwl÷¢ ×‹h7¶ è¥|Qž& z·º.O˵WÐkÇžÅ3T@¯.µônŒ+Ã@AAïгŸhà¢^­C¤¨7@õÖ¢Um‡R1oåºÃ+@0ïÆ\C¦ý'BÞÞ·÷ Ta¼ZúSïÆÐ’Æž~G¼\…]¡N”ñ=O×A¼µÔý4™QÄ‹•ÀÖÌu>ñ‚÷†‚"^²þ·§›îá …Ü„ðÖŠYÓ‚M ï†×IyzsTÄ»ÖR4 /ˆ÷(lÛº±Ü.ÓÝ×2|þ)W¼+e¹îÖêi[å²í¬îR½ÊÖ™éH] ›×BÒRtXàn­Q9ÝY„îÍ›¯8ç\é®Mö@OG„îRešÒNÝÌXèn-%=]~Vºª‡ ݵ™ŸÊ¾…&t÷¨n c']èîÁ"ˆ¢2quºj} ÝåçR+W¿Ý­O=Ûïò›Ç´¥_ñîц(Yï˜SÎ:‚wkó™¾î‚w¹•ËðWâ]žš!éñß\ð.׳‘¾ÆÝŸ€wmð ÍßÎWV¼{`”9+wÞå>ï¶„¼?ïR: kDßIZñnýÍTV^ðîëÙËZIº²×ŽèV¸[Û‰ÍÎÙÏZàn}ÓöòF­p—;±¹hCÜÿ p—7ÍPê‹øñ’sg¯Õ § âåVlÅí*A¼¼”›€¥kîVÄ[?гï݈¬!^̉Ó1ÕŽ+ã Åé„ñÖ/º³ûÔÙðÂxyfÙ'ОŒ÷($+}-Ýå¹®Î!?òr¹ZLmŠÈËs=³ÐŒ@^ÞãÔ( ã­¶ÌûÔ“®Œ—[aFp™á y¹œ†î¶.—ßÄ¡lë¤s…¼õÓ\¹VÈøÈûÇ yc?^!¯ ç6ÉÜ/¼õu¦±›/7~½òr½£÷êO€¼¼Î -mß=\ /n˦¡;]!o}Õe&‡¬—[a'?w øJyy<·~—³P>}«b…¼¼äÆVÈ˃Q×ázº¸x¼‡Åö6‘m½€ÍŠy¹y{gÜýK¾Ë­¤}§·¿0oŸöÔÈç=ª óˆl…óòÐö߇{å¼±·®œ7vŸ•óÖyNôÊyëë¤"N£NÂyëë|(³Òïsá¼q8Y9/×ãҾ˼r^žÁæ¬!©]9oí#ÛPŠ®˜—vò´}äó0ïÁ}mö6zµb^~ÒÆ»Û+¯˜7~¡ór9‹Ï¯^zF0/oìf!Úì•óƸa^~-%ÂÑí…yëx2¬…òòYk5‘úÊxã`²0^~Ïâ<˜Úi ãeî;Xlg.”——˜¯.ÆËófvGچŊxëÛÝÓÞÑž Þú`×"š]ol+â­cýÞ<?ñÖÑâ…H…ñ2íØ|íÛE+â­mÜÎOÛó „·¾Éí« VÄ[‡ÉcHcñr5‹=|?O oíÞ3÷I oíÂÛP· äåX~°Z!oms×vwM`ÞÚLž!ÝÌ[_ôf«³ÿä‚yk qŒtǼ¼kC|¼P^®´Í-9¡¼á“ åÕ÷¿BÞ:ÂÀŸâüwÎá¥hcyß½È[žÚO3üÈ>©@ÞÚ¯1x:X] oAÏ)]^!/¿‰£Ð¹wX»@Þœä¥Øm¸˜@ /×;èÛV’@^®÷ì—/6òÖg°n¼u ì yC´ ”—÷ÉÔØÖÞyk_µgè*0¼³ÁÕWÙy¹•b+ÓBÞØbWÈËiÇ6$úê¢]\ o½þ_5áw¶á¯E¼Û@¼»ýû_ÂwmZð-§ÈwYX8q\]v+6 XSÙ[êòÆ•ïJê½Ð]\H§9‰ÒݽN‡Ûp*Xé®î*ÝÕM^¥»¢þP¸ ª±âÞw7JL‡(ÜÝ(Ð`Ü'ÐÝZtü\dà ÝeOTØ·J„îÖròÖJúf­Ð]λ°¿o#ˆÐÝPô^è.Kî{ß·> Ýl\Ó.\è.ëqª»æVs¥»ð‰­L½Ò].g‹ûk8?,t·2ô¿ùMwY@òŠœ¿wºË*–ý“žT&taMÑâË&ê^én@/‚w9ÏÒÖÍW¼ËiÖ~\G/x—€#õ’Ÿ€wy2ûW{žïò¥}ƆF¼[¡À,Ë)x—Yù9&Î_ñnÅ9öp.E¼«‘àÝøè+Þå¼g–ð¼ð‘à]Ž]£ žÐ]ÕVo(+ÝårHûÖ¾Ð]ÎÛ*jA°ÐÝÀYäw³°^"}jµv%éÊz U\÷!NUÖKÜùØtÐ}!„õb›OÖ¬7^®³^žlš ¬6Ÿ”õòd§§—)ê%z»ÒÝ ­+êÕv)¨—Ñò¶vÞq‚¢^}‚z NÏ‘‚¨¨·Ö(é*õô!® UÒ«ï1’ÞµY*é­³ÆÈžVÒkþ¬¾H¯v-%½Ö,ÓHÖSÒ[WÓ6rßý„õRãôbw/Öûl#1°^R3g¶§²^¶ž¡ƒ_YoØJPÖ«_HYo¸œ°^™wõj£TÖ«”QY¯¾Ge½úÝ„õꤣ¬WŸKY/¹8ä6d·bXX¯Í”³h²²Þ0Z*ì¥^Cöjœö$ö¬YE½ô¹”²›dê%ê±H¬l_¡^m$Šzu0QÔ«›ŠzµQ*êÕñWQ/»~3Ó[Q/¡óãE$õÙô¦Ÿˆz)S7RuêÕ«¨W‡E½Úë-€RëÖù%èù5ˆ;[0j°ÖÀž“õRæn¤ýÖ+¡†¢^í‚zCÌ ¨—š÷—’Þú²ìKõÂvŠz‰½öûvSE½ºi©¨W¿·¢ÞBu•Ü KÔKŠcä¬(ê­P¬äæ×P¯ÎýŠzI7.Sݺ²Þ‡Â4¹n£~"ëe‹.aVøë}¨ 22ƒVÖËivî,¢¬ýQ$:°Þð“+ë¥æ²›f&à¨O›£ôÌXE½,cìŽÛ ¤—/úœ¾~WÖZÐÊz©Þ7 «+ëåîf‘ke½zç{¹Ú1ò«•örÚuM¥ìJ{YͺJ{ˆ¤=CƒgJ{Y¾Ù[ÚZå1¥½<Üåa¥½ÛC𥸗2„…¿ãe×V–Ê{ÃN»ð^»Þ=‹e*ïåzcñ¢¸—•äe£^ÏÆÜû,H%à^;혵Ž÷r'$ »LxŽÿðØ&^qïSëÏxÍÅ{a+³¦—ªzy”ô¸ûò^ûRv?[ß!QÞû ̱ÅÉÝ®´ð^΢#vv)¼W}‰•÷ò‹e½SÞkçí³Ú—òÞ‡žäÒuŽ:†WªâÞ•e9‡—Ê{Ñúì£ö®â^Vëö¡öë ÜûÔ"^HVq/àhêOÔôRòIW¯m¥š^q*ê¥(ÊVÿiSÙ®¼×üWîNQ/ŽW”‹ ~§¨YÖÛ‹÷r(m)wÕ£Šz³b¤Šz7 ¾]n ¤¢ÞR$Þ Šz+„¸}Ëõ×¹6싨÷¹ò2™öôœî}È^ŸŠ{k¡µg8ö®¸—i¾› v'îÅõçn ÅÏ ÷ž”€ñ‘Š{©²k¡g¯¤«¸wg¥\hTÜkS¢}’¾«x/ò÷²U®ð^îÅ>z)/SÞcÞK[]DÀÂ{¥«Þ+ÃXà½@¡t¬¦¸—xc ¸°R<—=à^JíÝ{OÉ ¸n7ªŒEÜ{Qzo¿(´—Õ™ Mç+´—wçº_…Ò^û‡÷ô¹´W€ŽÒ^~n–© ´—èo3¬°·N¦v©îü!°—(ØÁ¶á«°÷!Cm °—v·ïÐV`¯~½ÄUð×66ö>•yyj…½¢3 R^}ÿÂz9Íz^¯-®¬—éirTÖKØ2‹}+ëç­°— ŠY‘Qa/“ ©ÜYx…½¼ê{l.)/y‰£Ö›Â^æµgø¯ìå4‹¶‚A/åÕløk>PöÖÒ´ç0EXaïCÍÜQÚa/½›ÎèöênÎKÒkK—Û3ê…öê„&´—Ó¼àr`½ä(Ø¢®¯]”õR \Ån”½ÖîNÏrÖKÐhŸhïa½|ÂÍ¢Ï.YÖ‹j—¯\¨¢—5Ýž<‹_Mì¦gÅe5m‰{bÚ±U׫/Su½”Î}rß@ º^Õ\«kƒDA×K±éê-ñ2åïlum ’ÜvOãÝ•õ>xY”Ü7¥”õrì©)ÁµåÏ>=€]×»N,Á´a§Psé¨eƒöõRM¶‘Ò’^~Ð~gxBªgù£ ’^Þ¾5‰-}AzyäYÙ=x6P@Ú‹ËêwïÓèV,6ÆÆáL­– :܆¶Z6lÌ[‡Ûp¨eƒõ¤t l£– ÚÐ…ôÒ`!3W<ÑôR_xXUˆð|+é%ãcÖVÒ«Ù Á²As¸Å¸AsùÕ¸¡–N¹+›Ô¸A¥7h#ëÆ Ül>ÑêÜ ½FÂMˆsi²÷—†¼L#wßjÝ [þÁºb¦#fÖ  Þ†)ºZ7l?äY«5X7àc«NÓÕºA–Á»!UÐkDïYsïn: _jõn Äý>uÅ»aƒÓ ƒvõn8kÏVVïé­jÝ@õêkØ´ªuƒö,µnØYsT±n°á˜ÐöªÔ¹A_¦:7P™,÷)PX mGîøÁ>¡­ÏÜ™VÈ2ÙÜ„27ltî{¸ûŠqƒd¤©uC"Â:mµn¨fئªu°Ê¢z÷(ïýMõn°!ÝBµ£CõnÐd$õnД(õnÐÌ,õnd2ºÁ»!ý@±R÷¾RïªÕÖÜ‚ö|âÝ(¿›=H½`ÓKÒïH`òb/Á»°ysß»àÝ`“†Cr¿C´)Þ 'Ùß›ç‰w‰sDQMɼ„²ïkÛÖeœ_ºwƒ¤öïuQQï°Á=¶,Õ»tÈg¿òön ê´}f'Ø«wv( ªo_^näB¸ØB+ñnÀq#Ý#óF½Ô¬I¼¢ÃêÝ<Ü¢wƒ…Ûc;M½2‹·â 1oÀТìÏÞýäļ!˜]¨yƒšˆ‰{C0ªŠî ¤:­ÿÄth ®Çíw·Õ|ÌÆ…PÁ¿A<ÒĿ˥tô$upP{—ƒÃÆ;y†|WNÐÿ½¢Þ  l8)»o%eƒCxY«ƒ´…`àØJ@ÐðÒöžu=uôjTÁ¿áàž=%)ø7XW?h½ü2y!­)ÿ†§Ì£çVÿ†ˆ»cœ`ßp= 7Ø7k±o@ü8â ö ¤r?®- ö … Î+¿ÿ[îZ·vï^õo¸ ðºû°º7¨qEtoXr¯Õ½á¢¿Ÿ=A(¸78•ïêÞ ÞœÁ½aM,¯œ—ç³öorçýæ æ•«bÁ·yƒ ylŒÚ{B‰ÝªEi/ѾJÏ>WÞ+ê÷‚½.›B;·Þ+ÃG÷Ð×`Þ{²Ud£ÿÐðÞ1ØÊ{Cª³ò^•êÞ»l~ªwƒî¼hïc1fÚK÷¸s—(¼Ô½Ú8W(GÜkCYÏ9xáÞU1¬¸×&ÁÓ^ð=|&îÅŸØÚâÙýj…÷RéØFm4á½ëF¬â^LÜKœ}”qWqï qurŽª â^m ï-˜é“†z¼¼jƽ˜R¬¼— îv¿Lzëo¦YâOŽ’AĽ’wĽÖéžšUã,x÷J’@÷âŸÝ4*ˆ{Oº‹×;â^&†Ý}‚¸ÛÜÃB–~Ÿ¢îE!nŽ[ñŠº÷€ÆúÊ#¨{íÙ1êKBU÷l¯ÛÚ_uØêoÚP[¶NnWu/UmˆîêKQ÷†Ä?¾š6Ô½á6»º×^Öî%%TÛ{«¦»ç]ª¶7q¯¦í‹¸Wó^ø†ì<¾!•MŽk"žJ{™sŽ/ jvîq ¡Ò^§³d•J{‘=騴ÓìQRP¥½$nÙmÚ˜Þî*í Iu*í¥²Ž—iØË°isºwöZ(‘p¡Ú_°—»Üðýï‹)½éû3è¿ {Ɉ›R"ìµÓå9\¼»Â^Ž#üQØË#<Çpßa¯ÍöJö.érÖkÿ53I¥ëeÕKTéŠåU×Ï[q¯ÛÎá=.ÂÞ/ÂÞL\wïÃÞvŽšÚ.´7ãXnq°SâU×ËšþÈcçHt½™I•n?oö2»ÚÈìz4ö·öj.¤ {ãõVaof:Üß÷f|ÀÚO®²ÞŒ'‡/—TÕ^‡¨z胃þä*êyC¡¢ÞŒÞðYTY/ox&€*ëå6-°_,|§¬×nåÁºïÙˆ¬7ÜæªêÍWíb²DÕ:¸¨zãåVU/cu7G"¢ê H lò%ßïUz-6;õÒŠòp#TQ/-å«AõÚy ·'wQ/A<Õˆº?ˆˆz9fÓü°^aoà@{ñ¶º“«sWM/ƒL!ç ›O¬š^{ö ®%QQ/Ù²Ï,Œ þ òEÓ¼DÓ˪g?†'’hz9ÏFeN‹¦7ƒݹQ%½ö‹ûP ¾%½´Ibœ’Ф— é)ÅsDÒ‘ôæºÞpB¡’^ϺÎ0³]%½:[©{CaGÃÍ'UÑf²UÑK3ß·«ÛÞ©¢—!ûdC½|×s”×PE/OW†Í§KzãâÞ ñ/û†56Qo´¬XD½™9ë¬åY>ôÆùF 0PN—›@‹ª7DO¢êÍäM ÿàá€Íë ªƒƒ,ð‚ƒƒ®DÓËØçy`‹Ï-ŠÞL톚¼éjß©èå¬_~ö•Î_q¥ƒšœÛgWþùWºã•¾alœòïÜ\Æ ücãúœe‘U~×\ Uá&B¶ÕwÜÍy²+v[MŸÑ»Y}ÓV¬MEoç«¶ò3æœë'Ýöç̬CA L£ b¯zû\m€þÅ^†ì´nVóì¿.óøeÒo½N# ,0Rú×9ëu¶Ë®c ¬r–ýo\çŽ×ù;׀Т;a¡€÷[\·ßñü¯z…ùÓ¿þôÓÿüŸüË_þòÇŸþãÏ埿i&ýü¬ÜÄ+2K*@ƒ`¯ù†b¡C][ØêÚãaÿmd{–Uìv0gc5¥r¥Ž3{¥= ”íÑ!ot\œw ;PŽM2õXöÁhtÌCh¿i-ÄíàÄÃÓ_–4¬;ŸíZjЂPrj53½#Ç K¾ú!ûÕª0Õî\ˆ§ÕÆŸZµn4U ÜðUµ.±$ñBlö<[jÛƒ,÷¯šyª¸l4Vt&C!Á±©PãöŸ¡•y¨8=¤Õüæ1 àC9°¡xäEnÃÌû¡<ÚPÔsÞÝe#ϾÊ[Ãë·cKEŸôÃuµÏCÕ«¡jN¬Vì¸âµ³÷§ê,ÿâާ瓹ÖÛϧ¨¥´P9™L&3—w€¹€ew¡Þ Â,,idhz<ËþüŽÃdãœ|»§«Úz š®bå/ÿ1ÉàÃ'¢¯îP]Ýý©Œt£œWŒªì]!åA[*—Nb¾{Tý‰¾ ÛÏ)æ³a9e¨àZl8‹Vßûžtãö‡}ƒÝÏ V½£¹àm«F¹Üº|Œ¬(‘µÖµ9*¥…ì½tO>°ûJrmýÍž4ŽcÜUL6¥­Š1zçWJýâb]÷žS2ð“PÉ=ê‰Û†õ¥Fš|±|F»Ϊkt›/îŽp# ®0É ¡ÚV¾^Évîù®5eAXÓ±ù^Õ¹n­ÜjKu˜p6l?‰#7¦Ïš:,ÝÝ™2ðĆÄlë#ó@ÿöJR`y3õNŬ[lëž$l;KJcÄs6ÔM+¯Øš§F݈VîÔaók³Hc‰då'kª,/÷|ääÁ¡ú)ž r#°ÉÞM¨º5­®Ñ¶$öð|B>ë#ý¸GQ³è¯ÒÖWÅýÉkåë툮ҟ’±ÖRÍ>¶2rôÊ7s«<¾nØí/ó¸3ǯй^mÖØB[Š_a,Ù½¨æ=,tR‡ÅïÍì¹å¤7“/yFÁÇ”Xz’£ñÎclºD„9„µŸ‘à“„€ÇŸcXºDÁ)ˆå2­Öxä(vƒpÔ%£ñÎ-_PËqì.äå­ñL,Þઠkì9’¥®f9]‡·µÝ)–ÕGpç`V‘@—G\º[G¥„?êmÖ¸çx6LÖVS<«7Y²žúÞÝ›ËÒêõ´Ô"gs›éæX ç –:^Å\QíëFzkåW¯'¬¥©%.s\Kó,`sUOÈ|BC Ùê• ¿~ª'#ˆÏÃoè>{Üyü¥Î taæ°Gò~¦`y¶·äöÖx ƂÜíüàÈ3g‘Â\rø÷´§@w.3ÇÏöä[c*kùúÕ›¢\º‘Ь“@À^ŸÚJâ”˱ŗõE‹îC¦QkÛ9Ð¥‹ ½öxڹͷ7¨°Æœ¦­Ï®B¾‡^Pú»›ÛŒâÝí M †¿¶‘¶ßï/6bt@«Ð¡Ú‘e~Íù›¬·^ã÷¥.z=’¿’±( ÎO6-žy1`H“zðIbÀ\ÿ™0 ¨¤8‰ç IÅÀrÿDø¤¤†Þüx¿(> zî!U"lmG 'æÀÞwö!lmvÑžUO$ø€¦éðbG£õ¿ž€šAð'{T-"lK`h1Fr±¦u'¡à±t®³Ê“`á÷D‚Œ‹gB1 Æé¶ÓË.F¦ÂÙ½M@›A0Úîê‰a ‚­ }Ò5&Á£îêê´‰‚íoÁç'ôa|@›nw}EÛŠ§ÇVO8…0 >P6·GŒN(ØÚ¿y—‰m3 > Œ ìÀfün[ñ4òAŽí‘Á2æD`ÁÂÖ°¢a|l#Kׄ‚+ºLØÛ >€®X`gÌטY°ž+³`´]a_Ã,øù±”&lmö‡:‰k[fÁ¸·# D™WX}=*Ó/Ê‚Qj¾†ûÓà Û±°GbŒ2ô}· æMDƒaÙ ëá>4?Ö‰ip…JÛ}ÃW8V†ßÃ`$!•p‹bl?yÚÇq>…üËõ æm;†Á8ߎ| ƒq÷ÑæÎÃ`w‡ÍÃ`»Nø‡M¿2†Áx,W8e ¶&dÂOG8‚ÁùͥͲ: ƒñTú5eÛÌ‚Ñd§š†ÜÌ‚­­–0dŒÓÝáõÇ,m%ìF™c²S곂%<gˆH0þ¾F}k&ÁuH×Ý‘I0@îq{R&Ázý FîÛcRøpåL‚ñ4î°YdŒãj¸ 2 Æ®/¬_æv ¡`ÜÙY<€Q°—Q0îYëÏÄÇ,×RjîùÌ‚õ|™ã7»õž¹ D,×RÃF’Y°w£úÚ6)xbÁ‹ís& –×F(ï¢[ðöXƒ3 ¶¶fw0Ëû0 –',ç#Œ«<š'š2 Öã2 Æu>6!nÍ4X¯3Ó`œÏ™wÅ4ǽOÿ¦Á¢í'grÛ ÁøçGX½2 FŠg˜U2 ® MО™`0βÙ>¥NóO ÆO¢øöS1 ¶¶ý 7nfÁ8nG©”ù›™WQÙ<–Y0..þsgX°<|bÁ¸Ô9çù –ç›Y0š¬CÎhŸip…¿Æ(ëÄw¯rçDƒqçvŸåžm™c¨ÂËÜÏ!,/h°Ñ`¼½&Åó| ÆwØâ2æŒƒå® Kß#Œ«üŸ{¤„ƒq×{X_3~·­ðUîp°cïtçüúÙL‚ñ³%ÜÁ„s4ð `þ»ï­Oým.v„O?Æ\übÂ' ƒ¼ à‹ ÛÊÑ닾˜pßìùN¤ÇLøÕ–˜0¦êºÍùQ˜0æÀâµÐ„ ÃúÛ¢†YœµÁÄkDŒåE”m0J¸£;!aŽe‰ Ó ËH˜#gFÂÓ1¶n{Âëy^?!áŠÕ*gMpfÂU 7w`&lmö9ÕiÅL¸Âóäg¹ÄPP™Õ³?†P Ó<†Âvœ-Ây“ 0Œ‰ì[žÚ‡Âh5 Ÿ‹ÈL\a¥ÇL¸¢øª MisfÂv†y0 N·y0˜IëÓ JäÁö%ÕHdy0<^¢x­ÈƒaÄ7 ‚<Ç‘<†)åô’åÁÔowÁð ó'7÷#*,´ƒ4Â\}ú,L"a^Ýçty|i„íÛu‡å—FøÚÂð¥ÎbkÖ#GüªÇã+a[³FØBó¦r¢–ãD%|”Hê•°ýC»·ºOÑ/©„ÁëºMó|J¤´ùÞ“¨„íÇìSœ¶!¢ÆÕ†ñš¨„ù!³J¸ÂB}Ìš_ k›¨„7Øên€a£õp@x©„O»Ø¹!ôR g¥3«„Q£ŽáÏ:WUÂönØÈ*á,Y$0>ñ0¨0Ë Y#̲DÖÛ5B÷=Ék„ñð.ë—-`0˯X#ÌO Û¸5¾¨„YÛÇ*a–f±J˜õv¬–¶‡ ‹p‰U¬b•0éD%Ìâ+bÃ,Â`6\`$î'̆ \p»WÌ0 Áˆ Û'8i˜ ãïë™™qP\ÈîYŒ:sa|Ò5|dˆ [“­ _'0<Ö2{PÃp5F1Üò–+!0Œt@Æœù.X®†eƒax7¢ðцqœ½ g2†õ73.˜gŠ3Ãh»ÂKÁ°—ÁpAAé0Y`x¸ª__1.ðJŠK&Ü÷ûc2 3jÄ¢ç”Äf2Œß´eÍÄŒ†­m‡©Ëü| ³ù5£a2±f2 «ícš© Îdkì;쥘 ü>1sT#2Œß< ŒÈ0;{.0âj‹Qg2\`&–ZL†1³6lñMR›É0Jïpd2\ õK-&øL Øž"m †qÒâ¶)Î`Äâ~Œ¬¿(Æq[ØØ1Ö[È`Ø~Óf¯fÊ`GƒÕþT?`¸@pbÁÌñNXÏ”Á0j7&›¹pN¥ZGca4ÙÇ^æ>1aa´áóÆXÏêkEÆÂzß“»=ca˜ðÛ¸æqaaûͶ]žáÉXXï!aaòÙ'*Œ“ù0OLX:1a.ÀL˜2!a=,#a®. á¥3¯éÅLÕºksÿ˜˜0¸4rs‡–˜°^KfÂp.¶®yÄ„9ïõšz É„õ/3Fv•Ÿ`R 0bô/¶ 0˜ãôŸúò‚Â|JÂ܃T"¼½¢Ö¨0áwÛŠ`½¼ö± a~E*ÞGùŒ ã%Ù2‹¿0æ*Ì„q6öÌŒHa¸V{°sI#L]f·õô¨@ØÕ  õð«‘Âò.Y!,¯‹¨py€Ý4*œ>PaÂúXYLß“0a®aL˜¿%aÂvŒ…ÛôýV°trÛ‘µî3éR¨0F&¬hOg»™ ãÁØÙgš§Paþº… ãœ×qÕ5À™ ãû†Ýø6ÌTx¸ˆ}Ê UŒÕ^›9ô…µËe(üz< ëq… –=Ý>*ç¬×x¡·HPX?‚ÂútX Ã?€„©s&*üºZÒë‘™ ãjÛ~Le£Paq… ëÀCTøÕ8ÍâËYKþ{©ðT¸Ø¿ÿ$œ’Š„y£O¬"Pð*ªºÞQ¿|à9 ýâõY„u{fÛŒo™ïˆÁ¬ó<݇‰0`X-^C‹‰0ôŒf’ÁDX| È/¢ àJhQÉ/‚æÅ/‚Ä/¢¢»y-2ñ‹ µ¾8FPî/3a^ùŠc¥81Æ8jÃa™úhvŒ ´6fÂv¾óŽòâAfÂX´ÁËå¾³P ¹W+3áÓú°Éf&Ìh™0.eۦ̘0(-0®’3–•#1aYð–E61aYÔæzQÌ„ÁÚ27bÂXd׿Unˆ Ûél‘ºÌ„µ)3a½ÊÌ„ålÄ„º—R`$ ¶º7ßUd$,—BHXËHX^!a×Âiž‘° BÂ\»Š‘0®³†ïòDÂÂ, ø˜‡©5#a½±Œ„ñË6»\GäJ^ „q%g÷½YÂÒ·—AdC”B@¸ ¹csß/ˆäJ¸´3=©’ÌDuÂÖf‘·½Š/‚ƒåÙg ,ô‡€°à¸7dîO!aÜ› ¯ÕámÂx;öÚ·©¨Í&L¯L˜¤2aêzÊ„µ‘˜0}âÊ„i{F¡0ݺRam$*L»yJ…é S*l“ëu.% Qaš2„ ÓÞ¢RáV(¦5ƒ2a𱕠£šÏžôµÄ„å0¦€Q™0E%Ê„-¨}pâGXX¶sX›&ÅÂùs[T˜>z¥Â4(¦ÙH©pÞœT(,}•¡0  …ù˜ £^€Éõi„öF†Â´½¬PX/–¨°\O†ÂòæÃ˜ Óü¨LX‰ ˯F¾sI‰ÓêE‰0­•£ÎݱTæL„i}¯D˜ÖmJ„)´P"Œ¢Gû9ù}}-™¿Ï0¢$•ð7ù£béýˆë^0x”¥qD&0˜2Ä7%a«wd¡Á;¦’½îAŠSÞŒ6–Í,Ó`” ŒÒÒBƒQÚ®èë`hù¶Uù†a0Öè¥Ì=Á¤+dL ƒÉ Q`0 óuóµ=8Ã`Îàç8ËV &©Ö ÛhçY¢ ƒA™®âUµËqƒjÙÓj÷„º‡ÓIQÃ0÷`“ÎLœÇÙ`Á4Ú ¦•œÀ`{³¤aðDƒmš} –ãWO:Ý—a0íü æ y†ÁÒF08ï ¦½HfÁÒ”Y0¤‘‘M·VfÁ´a,,¹l¬vc\bÁ6U6®¦¶Ša0Œqmì›é† ƒwüÈîf´ ƒ15÷:¿Á°yæzoßT«ÙV×§pº@ºêÒtÁÛ _0+Ú²oO ÃàbÍùx .P˜zýwAÁ68_È+š§!¼Á)´¸£`¨µpxÒ †ÁÞªå‘Ì0 Þ¾>5]ŸÉ(xC•G{#ü·az®'UíSÉ/$Yû°':aŒ¯o+.ì$,÷X°uAŒuálA,Xn€X0Hl8ŸËIbÁèÖ¨„ù”“c¼!”|–_£­Û€qoŒß´ÎºïÇoƒ­ö™”"0Äê:_4fÁX(Ô£¹‘ ±`œnU6fŒGfwzLAÖdÁö·ýI {EfÁª|o‘A,o#õ6¥pÄ‚å8bÁ6Ûì>ÑÁ`h?lä-oeððWÎp©È,‡•ã ÕpfÁ¸’kø[< 9³`=.³`; ”٣͢Èp©H4xë!Á¸˜îôyí™rõ¶zd:L‚1à gÎ(D‚ñ<¬Lf›9p\`y÷D€qž0ñ¹"À¸>Ö<]†°— 0Ž+£$èC‡3–çA…}‘Ã5<ÚóÏw8†L«jBÀhC©Ü)«&Œ:Àqü#…'<&7{O[ÝŒ€¹¶0#à kÐ;2}oˆLlHšû|„€õ¸Œ€q¨ÿ:S#ã7-øœQ#`¹wBÀòfŒ)ß„{7d¼¢0GadŒÃàt†èwŬ8ÎVÝÉFbŬr•„€qwÈà˜f„€ñ›O¥·UžæŠaˆËÝÖ[È—Ùî¹n`,gËüM;–”-»ëßfþ‹Î…¤Û¹[MüWú>ñß …²Ã³ù/îë¼ú á™ÿÊùˆÿâñ£Ðô!!þ+ñ_é&ÄŸ*²ž+Êü—$ñ_éA™ÿêÙÿ•MüW.„øï†ÒÌvIÛ>4°ýÝÆ{¼Üv‰è¯Œ'„åÕþ•^NøwƒŸÎŽÂÊ‰ÎøWï ã_Üy‹´[Æ¿ÒUü«*ã_\„­ög>ˆàß ûê6öý×`4¢ Îu} Æ3AUnWØþåoXð/Ž´ðfs%!á_î+‚ùÞÿòP$ø—Ÿ¨à_~…Œ_§Lø—Ÿºà_ýQÀ´V ­S2MاA¿èÖû¨ótwËc-=pB¿ÜÕýj÷ ôû:2£_4^‘ä-è—‡A¿8gTõ`¿¯¿ÎìW/…Ø/Ž,6u»+±_kÜFëwyùÈ õÙüÅY$6!Dñ>J/›Ë³‰þr(!ô—‡t¡¿Ú©ˆþ"Z²¥Ïbâ‹¢Ú1þåVø/‹ÂqF›pgŠšðßבëÃ#¬ƒàב™ë# ŒŸ…¯aõ#3a¿^8güº L€ñNîð%{0bßÇ€¥SDE ¡k¤î¼œ"°\oží*$˜Ê^$¸Û[˜ûè/lë|Ò¾•wXQú¨ýÒ[ܹÍÍ.AÁ”¶þÒÛÒ ÙŸï*rì@ßÈmíÀø½ªÉÙ9ìí† ‘àŠ•Åýìš}Œ¶Çùcš'Œ6t-·´Î$Øþ¶á7<¾ ϸ—ÐúdŒë¬¨˜è•æ ÆùŒ˜?¨&‡ó¡Ì†WÒÊ,x´m}÷U×1XEÁà:ãÇc±¹Åí FW+S “að¸øïoa0^¾TÿÄ3 Ö;È0mv²íìS#›`0Ú[n‡sC‚ÁõBU‚}ÑÙƒÇSÁò´!É0o¯Ø`ïÊÃLƒqÜ—ŒùgŒóaúð¡!Ó`gkÝmªlñ}¾J0M7¼²æ7Ã`´ÁÆ"  ?ynÁï2 ÖO$Ó`=_ÆÁr™™£É†ÊÍ?áLƒñk:ÆÕ_ÅB¼¹—Ù0ÚTéIfÃh³Eåí]!³a´!ž ӇĆÇqÅ‹ñf2Œ;°ecÔ^Îdm;Öx³Ü@&ø;ì{I‡Œ†Çs¬íìmÓŠrãÆ-.Ÿ¦x £i?V⌆G›M3S™Ð° ®äê\WÆq[]^[£ ynh‘á0>GXkÔ©.Ïpç³è£O Áa´õ-V¢‡ÑvîË-Ãa´Yx>\£m?k+m‚ÃãZ®½ÍÜþãÆV!bÃ23'º`7ù]ĸÂzxfÃr̆ÑXÏ(ÛÌl¸‚ <dW$ËO“Ùðxí3*ù¢lãâ ?±óÈñm÷¬ÄhX:¡á÷,4<>°3Üê 7ßljó,òŒ†ñF^¯['2,]ùð_žÔ®p„X!í™[”,f>üz]™ãHû@mBNÒ™‹µ‘ât?è̇Ç9÷âõ€˜ãœ÷-ÜÜ"ñáqÎűK|Ç|Ç\{(ò˜ÛAè.Éc><úy C9æÃòµ1½.•Å‹´Õࢠ{øå1¼F]mæÃz1Æ`z•ð:d>,ã"óáWŸL|x ~5Ì ‰¿ÞFæÃãùÔÈÁg>·¯‰ívWõÜfýKk<²?ž…´ûé¹&ÖØ¾>"1l;—"1;CvÈÃøÓ]%3¯…å;^AˆSË9ú…rû£”yá …åQ§ËºÖÌø/H\™Ž‰€ôÏ~)Hm:½'ïŒÛ8½q bvln^‘ÝË×óò®löPOç/ÈÓ[åS€šŽH¾.ðnݼ/£Âȱž&^J¹ ¨@dÕÚtžw(íHŽªÛAÈ¢V I$°ö4¢lŠ÷eÕ(åÞëÓØs t—u¦'3»Àš.;ˆÖüØgcˆÎ¹>‰µÝTG·ékmÇÖO¡xÔkE ‹ÐÇàÈÛEÖh£D(÷A¢|3…¹–Êe=÷qªmWw°ëîè­ñÈÅT°OPm<ž}ÙÞ‚G®Šbg.§‚|ÖØBî?,½/“ÕåhLUªu=û@¦Ÿo±Oæ Á#Žûìzÿ@“zzézëU6&Ä Œß«5^ë#¹¨ óÂáh¼cTF%“ØeGQÕ\Ue‡„ËË×[cE‚Ÿ÷d*Wb×UáS§%z²þlûÉÊ*Öˆ"?Ñ“‘»ùæ7V\[ÅâÆ ÉcÙÃ2ºÊU¦,¶¶©¸Š¼ ,m#Eš¬kTu÷¶+FåWcÉVH#‰„QÚ;2XúâÚ6's”_ñm›¹m©=¬UÕnoKEV@ƒ×^ŠMŠ_Ï5(ËÓ9Î\f¢’Ø)‡ kLFа۷4Ç€ãÊuVÈêu4ÞkLƦp𣠷 k¨Ðåd :ÁèÇÀÒCîø<˜Vr©d'ï[÷1¾í(±ëýiͲïÓ—CL³Û™Å Kƒ¸5:²Üƒ…~K„ne¶.­%:Ü^‘s9öL"t.Íc=‰ÐsAŸÑ´$èøQ{l³¢—5^I‚Î5}õ¶$èøÍ{¸Á<öÜ–Ïw–$@çò;£q Ð_¿¹'º^ÌY“]Îx$ù9޳rø¤|¶$?ÿ qÉÏ_g’ŸCè‡À'å³'ùùëZ¯´úeC k¼“ü|tS)òÐ`.!„¿K2°èfÊ?­±¤™ !AT›"ä!U,mj欱fÓh =OâþÓx¤Ø)s ³Æd­çl)DÆ­Xì\Š_í™âzŸ=…ȯse‰¸¼ k\1òëœwò ‘_½¶d²acç¾ç¼x•"kÛΆ!t¾kÏ2¤MÅË%ZcͲ|þPÁôÔéã¸ZŽ©ÿ_gŽ¥_=ÇÇÖ-jèw^=ÇÇò&‡Æï§z1¶ÆjêÅô> LxÞ~ª¯e]r|ünLñ1•ʃIMŽ!·±!ø‘â`<Ïñ±<¶ûÈñ±]×yB3û<›»åø˜ê[ÆC|í® ·Æ3ÇÇdýb=ÇÇÒî+ÇÇPl”kênTåøxÃ@á÷Ѹâc²%‡„6ùˆðµî[Éñ±¸çøx,`\wsޱql­·ÿì‘ãc$‰ |ôóИd=gËn"7ò=Ïéag _·Ò³Ÿˆ5ÖâåF­ñJ2W\ÉQ’ζO1ìCR€¬ôl½þ¶¤Y¯¤ìÙWäݸd¬kúY|}G‡å,r#ÏðDmѧñH2—x³Æ–ÝE —íýºýœgŠ_tf½Ïž"d\ÐŽS=àb‡Éý²Ñ ºS„Œ#! žaÅëWŒ¬× ÔÞ¯ä(Ët6¤†åz„¶°8f.ª)ç ™P4¦ ™ªZ㑃d=²å yƒ)ÉLj±¶3ÇÈüØþ–GeþÑ+»ôݰöèS|?“K”õu›V3Öxg—>xÔ ãY—íHÇ;WO¦ç}”dÓ'g<öìÓG…Fcò飒ÖX³OŸþì‘údt9ZvêCtzþ‘5žÙ©OïòÌN}7*G÷9z6êÓë¹rœ,Síɯ¶-ûôÉ#hE{ñ÷eizÑ–¿’œ©zˆ6Ë(ñ¦Ô5æW";Sê™èJž'LÍåºScEÝRâ,aêçÕõ¨kO˜ú µ%‰û*¦Æ*Þ:Tx§¦¶ÆfÓ_x™§©ßFv䜉 ?<¯'ajë 'ü„]—15Úl"žæÅ‚©‘HrÙÛn‹D¯  2÷çÙ1¦> —<¼î¯`êWcÆÔHl±÷îØ(õ^eW3enL©”7º]þ,”z™HlJ-»L©1.œÇéåȘR൅Å&¥FÞN;úÕJ€è…[£½ˆĈR£ñ(‹¥¶o ŠýcöV¦ÔD˜5TuL©lTlûLiJOÔ>ïYSG(õ5Y_´ˆ(5~ö‘]O„)õ|ǵ/@£t=Ú¬d.Ú†w;lŒ ©ñ£­.“!õW¸s-ÃRãGíŽæòõ.¿y"AÔhV]Ë7BÔ ‹ÈZ¼&ö#FŸ½vOœFmó›ý»ˆ!˜QÛ‘ªxáeaÔw|ye!aÔ¯ŸÍŒÚíJ#>gFýúÙÁ¨+2ÝÏ0™Qcކ¯ÁˆQÈÚ5ùÜÂŒW²Ùˆõ8­ ¤¶Æ‚Lü›2¤ÆÁjN¯ ©+:J:efÔ¸ž}dÈ5o\8B‘ëðt\aÔ‰j‘4,Œ±È?½f$FëÁã÷q.Cjk³Wzº4› 5.雳Z#Cjä'Zø{|©ñ«ð](“|gFmmõ¼öæC 1jh#Ëùär£Æ~íY¼°–@ê!†+Í» ©q$Ò|çƒ 5.¨œ^?K(µ5Ú¬xmóG3£záÞ¼aÅÞ>9íŽ}Êɧ…%Oœñ42@mXp?ÆÓÖxÃÄË7hOWl{/n-xbãæ áiô»šê{„§_”ñ´5Ë7ƒé4®â=Ÿ‰ˆNC~¬ŸUˆOãzÎO#¼¿ž¢šOcâÓ8^¾=‘ùtE:Óá…©…PË} ®×FNÑ„¾Pë} F¦o¯—›á0¢Æõ up¦,0¢®H¯Û¼(´ êבQc °UmŸz}FÔ¸ »òif%ˆGÚt‡FÔ8Ò–Öî“4µýrõªÏµ„¨õQC™^‹\µÈ-QãÈ ÛÌÏ­¡ÆõÀãk&«0¡f{l!ÔzëD¨+cG‹3¡Æõ@‡àŠ"Ô8r‚YIÅ¡G&Bmm¶ô‰âD¨õFQ#ïüèQ[€5ެÅÅ]¨qärÞgDûx2îž>Eˆº>Io³Î– jÜ€`ñ#3¤Æ„w†=„@jœs « Ô¯#3¤~3Cj‘ë8¤ÖçFº"]§Di †Ôèt6—tc Rë»"Hw·ð¤gH øa=§ÏúuR#%f»¢öCj}p©_GfH‡p…_‡@j\-LH=$Í¿jËýYõŠ!uÅ*ôôÚQ©qƳº ¿@ꊼÞÕ.Rã”õ˜e;Qã¸~{53AÔrŒ¨a÷p‡ûƒ ê×ÏfD]‡ /ŠA0¢~ÝHFÔ8ò¼½Nš j=烨q™+ndB#5Î-<&Ô8—ýØ\­3 Æ{zÊ\Í3 Ö‹$@­—“ù´¨ã˜O¿®5óik„׎%a>­›ø4m¨5Ñ„Oë‘™OëÅf>ÇÓ¶aäím):¶­yµæÓú`‰O¿ŽÌ|ºŽpÁ+$ ŸÆó±kõ"+̧_w’ù4ˆl;½r”ðé×Ïf@ g±~EÕÔøÙËîn®åPë­L@Ä?d^þ×P³™jœ­†'‘ê ,Ö¢þj½NÔ8ò¸£Zêב™PãH¼Fßj"D­œu$7 ?1¢ÖGNˆ5?,fÜû&D÷pêaD­g$D­õëŒQëó!D­ï„µ>vBÔXmß6ù!ê×ÏfF#mâ¹ÓgF‡p_^xM5ŽôÏû ©õÈVô©}oyño³|Óê­LÔ7­>¡û\™L«Çêb¹k~­¶¨pó%À‹VÛÓßg¥Û7­æF¢ÕÚH´Æ ÉДiµÍwö¡…ÓjA`u;’Ϊj¦CŒ«…ñ1®¶¤ßK¨Ã¸zd „ž„hµ’¢Õ"egZ­˜‹pµ7ÂÕº~"\ °Ãû`)®S<¿%ëcÓð”q5Æ3ë˜×‡¸™ß-ì‹Wcì·HÔKv1®–é–q5ä[xÇ ®ÆXßö3Ô½„«%â`\-‘*ãjDò¶(š…dWWÈ{·Â`ÂÕ ‰7XëŒä Wc<µEl™;ÛÌ«eìg^]Á=Ú8ójäè¯ò{„«á G[Ç„«a¡vì{ì °†g›õåŇ3°†e@ ÷:Öu3Ú«t&5Ò lq( ¬õœXWh¼j kÅWŸ«íÊ-^ ý/ójx­„”òêéºé¯–¿e^ ÃÂÓk*¯®Èu,åÕÜoWHã™Wc“ ¼öC^­ç$^Ÿ¹î¥¸&`]a@²Å^kØi]Í7ɘWëõ¦à·Èƒ^ }¯WÕ̼ï¨àõUë³c^ M-œ"t¯†ËQÜID€5 m˜ˆkéÉ ¬aaÔßzb^-7ɼÚÞ:l¥|+ŒyõŽ×sÅ«ªQÿËþÊ?;&ÖÛ×Q•ëü@T­©@­ œsé”IU K"ôÆûU5>F8𸮔±µýìY½ ”bkØçµ{}¬ª†ÐÂV1Þ“[ÃX±•ò1¶ÆGXÜW¹µõ•ýzÊ5©¦Åk‰Á“UÕØ•¹¼Æªª!oÚ¯êûÖ¤ª¶@ÍÂqܱªnZË/GtÕvÆ#Œ%UW-o„±5‚”~xèÁ²jxP‡%œ «mUg·H,¬†Ýo;}¯‡uÕØ&ëWìv±®Zž+ëª%‚uÕ’ëźjû—sWfHªå‡XRKª¢z€°»M¬¨¶7tº_Ÿ ª1Û“tL‚jØùõðûA5yj« Z’êXPÔÁ{©XPmƒm&­ó úDWœ² Žo%|âYP ?é~E@F‚jLÃGw‚eRT?^^:D%ÕðB.^ˆE%Õpì.îZ­’j>~>O#Iªe¾aIuGXå DT-)5,ª±2‹ªù.·ÅVQµÑWÜ \EÕØ<ê%¶€XT݀Ϋs5UÃfÑ&öË5Ì$ª–÷Á¢jøÊœ!>b^½Ãµ{[ZU7W|y/¢j€Fû@<+•EÕØ?p¿zÕTK^!iªA(Ãh’q5›uîàJ¬©îØ’ŽÊ¢©†UZ ™hª9›S4Õ¬sM5vå®Ðƒ1°†ÔôèÈîi<9éð<îP1°FVäGvoÌÀÁÞ\à*Ö’ËÈÀá LÖ’ÈÉÄZ>&ÖXGXW¨§ƒçŒ¬¹¸€ kdyb•3å Œ¬e2Mõ…êU—ûº1³ µ¾á*2fÖÈÜúîá!3k|[÷åFû¬‘’XÃIR˜µž33k}™YKÚ23kœñ\vz̬ñ«ý:}ÍÊÌZ;1k}²Ä¬µ»³†Ÿ__>„¢©FÜC„&šj8<6wUcIuA} ‰¤šŠ ±–DY'Ö’-’jNUI5&>›S'*I5çUФ…6 Öù6TRMR[–TÃ.¯Ý³b¬jªáb‡NDÕ2š±–Ô#&ÖÃñ~í1±ÖûÌĶö-||™XSójpˆ}ù2¯¢bE; pæÕ8á&ºoI5bY×X1¯–RæÕúЉWKŠ5ójI’Mµd^-©éƒWã6w`ü^\½®.ö￉Uö·}ì­‚i1}õ…³jÙocŒ~þuŠ1A¡ëj!žÎ¬ ›$YgVH݆ŸRüÈ̪±vÙO/+)¬Z fÕ°•ØvjóœãØÁœÛ̪1ÍÝæ¥I3ªVç6açöÿçöÿà™Vü?d©Âþ<»‹ÿO‰âÿ0´üLÅÿC’ÿÙÿC2¼ÙÿÃïåkþkß\¨Âþ’øÍþ’¾ÍþöEË–Wü?xtÿ€ TŸ¦ƒ³m6X2^vÿà XÜ?$¿•Ý?ôÈÎÙ)sGÌ?$ó‘Ì?ìGﻣ!óÉfóm,œÜHé”lþÁiTbþ¡GVNn¤‹e÷yêìþÁIÅlþU]·?»m™HŽ+›Pj0[H‚ï´þ°fkKqÎÖ…Á0V~ «Ö»cëI5fïê¨ìü!¶ìü!ÉÍìü!ÏŒ?ôgÉùC\ ˜RoÀ°A¾É÷£`Ç ÌŒÙøC®†Œ?$÷™?ä-±ñ] Û~Pm4µý/ƒm?Þ)þ–”yöýÀ¾bÀˆZ’ÿÙøCo…Œ?¤c¹ñ‡Ø0£–³±ó‡d¯³ó‡<vþ¡‘µ )ìüÁéâü±a?ÚÂg—OŸœÖÈ?K”ZÜ™RK&:SjȉRËÅ¥–keJÍé¢B©eàd÷IÆgL­?K˜Zî’1µÜ%sj=§øГeNÍÚð¯Šý]ì´ÿ Ôþƒ.Cí?Ž^WÚИôZ+µÿà#¹ý‡ ×jÿAß1Ój‘Xú‘D«õjÅþƒLØþƒM,ØþƒFõÿ 9R @èŒjBÃÓjuÜ Zm—üÍ/–hµŒ­L«õH±á« xÕ„ÍSÄ„.Èi5"°=öhÔ„ ÃjšèUK¯bT-C²ú°OIFÕÈ1~Ô#ÍÉÿ#{0ª–LjFÕ’ΨúuÎ̪%)œXµ&Œ‹ÿ%K«ÖülbÕr9̪5_œX5’·¡:ŽÄÕš‰¯¬:תÅo€Yµ6«æRXµ˜0«æÍYµX£¨ÿ=TõÿÐFòÿ ÷ȬZÏI¬´·•âYbÕjǬZž³ê {ò‘zùòÿ ·¬þäš þÔyÄÿƒ~Uü?´ü?èɪÿŸQü?èZÕÿã@E8ß–PÿìR öôáª~]OFÕ¯ëɰZûÁj‰E’ƒg†Õj×þôP VË «Ö¶Œª_¦Uë™Tëƒ!R­fP­Ï…@µ(æôÌ^æÔ(æôäÄüƒó>NÍ?ȈCÌ?øH1ÿà#…T“­ÎËüƒ GÄüƒ GÔüƒ.HÜ?Ø6$“ê—5ˆ¸ÐÕªýÓí?–…Ï÷Ù”¤¬þ6›êja÷öìg¾5ÕÄU7‰95Šaµvý#£jΛNÍiq©tÄí 6NFÕ$Пê˜òŒ"XìSmK²bñ^\UƒaZü¦#dTÍrIÆÔl)­FÕOD¸Ä©/äøG9&áÔKñÃŒºÛ\_ùû_Œš¥?jQm¡ðq»Èõ‰ìî³¹p5|AÔÈq}"no$µÈe~ŸP\ ª3øToÝ^¢¬ePÝ®(v©€uå¾,þÔy'Aø4JZ[ÈH™O«‘´øSw»;ÏBRÄ£‚wàkò§f?h"ÔÊÜj›5«„PCÅ_öÔúÒ¬:È„Z6>ˆPÛon½£PîhcB-7É„Zö=˜P‹C³ÚSÓvŠúSó‘âOMÞÎL¨eÇ„5Tá«d£ jñoVjëçæ‚2†ÔbPþÔk/_5o€¾Ü©i·Hݩ뵪`¾ì©ë¶ª5¾ì©mn€/HM»EjO·¥_öÔY ð‚Ô7JV×¥Ô,hL­ç̘šŒ©å.S£„õ*;)˜ZƒjÙ6cP­ª1 áS_$±qåº]LÇ Z}¦Å š¬ŸÕ¡š/Hªi×ð—:TçKQ‹jöuþ‹jûâÏË3QÔ¢špšZTÓZTg€£ÕÔ&Õl4-Õ|­…"Y'ñ²¨æŸe‹jvaf‹jÚ©ù&“jz#Š©‰´)¦fãg±©ÎÖ©úzYT¯}3Ôê^-€šP›j¶¶&BMg|Zþ†é4û2 ¦ÇñMæÔÜ(túL_Ó N“qµxSÓ¦ùFÄšš6.^ÖÔ™+›NoV©ó6“’i‚²ßB¦i{NÉ4yZ ˜æ_0Í&Ò¦iBÁ4=9ÓüLs£€iÚS0_󲦦 Ót™ ¦‰Ÿ3š–G£hš.ó…¦ †“ŠZ698¦À*j½aÓÔE”Ms£°irÓW6MµÚ«ŽÂ÷±é_ëúqØøxmÃFá#Bm× {qýØçQÁ‹Pg+Å¡ÎÞu/Bí^„ÚNw3 ÆH»Ò¹P³ÛÁKGÝàZЗȚ’sºjNVaõ£°äBM€¸ßng$€Zjá1¥–¨’)µ (ÆÔ"bL-a.aj›%»žðC%L-›5Œ©C½–»%jL¾õ\­ªmNßìÓpÀý`jÙoËE»ŽÀŒlù±ã3êAðØòCòùÙòC2ïÙò£`Í0ºÀÓH–’˜Î–b0À–>»€‚-?Æ ¤…@•-? F½+¿jùa£áº²üØ0Óo(ÈóCòäTÛ`b7ìsæËóƒlØóÅÛms‰ƒjIÜgP­ç$P}È{ < ª±”îÝañüÀ”1¢´Ôè{`GTYLá8f‹Ð]õàš³?TË!Œª9KžIµ¤×3©E° ïE0©¶_µçíLªñ‰Ù‹r“j$3YÌVÓDªígmŠXnÒDª¥ò+“j½Z"Õ°Œ¶E‡ÄdRmö„-4‘ê NLe94gRmaé 5 ªÁ ªuPWM©¾Ð¯mÉî¿J¤ú‚û·ýÃ9*3©Ös©Æj« p!!R}ÙÏX—ûÀóÕm«½wߎaNm}Óæ—ê›*Ì©aó|µð€fN­?KœüÊîw÷»œœzL—%lŸ˜SÃKûì÷Ö?âÔXï¶§>Æ_œš·\SKQjžÍ…RKi_ÆÔÝFŠE7„Sw”3áÔïÆŠwxïËš8µN§–½câÔóc˜˜ §V; âÔ'̺·0VfNmGÖ=ŒÓ„Sëõ§†Ó³½fŸí˜TëÏ«–n¦Õö³chñF¢Õ0î{¸‘1­>á~„#1ÓêsLña0±–ÛcbÍF ¬í2¾õ2ÿ®í J'`}ÂpÜúâm XËÍ1°Ök%b-¿*ÖêîAÖt“'Z ‡»k>NŒ?léÞ›o(1°k5Ä `­GŠñ‡=âÛ-ƒ_Æ|A¥ðKe&Öö³¶¤[Hš„ôÄõî~á͸Zʆ3®–Ku^-·Ç¼ZΦ®³–ÂdÇÄ«Õ/„xµž“xõ /þº,¬—·•ž—E`Z-7Ï´Z^Ój½¢ÕRPiµ>ÓL«ó VËüT…V¯QIh5wÔ—åG¶KZœ°j;î´õaøL«faÕ,VÍÅæ_†Y?$¬ºÁæwŸraÕr—“U7„õ¬n„!†¶l°×Y>2ü»gXÍ–8«³¤JPµG¨ZÏH¨Z^?£j½±û GʨZÏI¨šôZLªOP™Ý½Ý™TŸ(S±¹¿½jþ2ÄíãaÛ5÷ÚÅíƒ?8!Õö`ç»{‰T[lvíײÚ&R-O€Ý>¤—3©ÖŸ%¿êÌ©Åg†9µ4:§–«d·¹Jvû€ißÂ195¿a6ûGʘZ•Ì>ôÈìCNI”Z•¼>¤3¥fù x}Èõׇ äõ!O€Ý>Ä»‰)µ|L©¥—3¥–‘œ)µ6§–/‹95M:ìõ!ƒ5{}èÉëC¼¤ØëCÆãöšüA©ÿ§ÿ^¾`dµküáß~óøüÓܺã¬ö¨±-½—a•Ñ ´½A°ûÃôù;üÏñ凿üÓßüöþçüòýßþÛ/ÿü凿ÿÍï~ø5UÿŠb 5¶ ªÍ¥ø‡Yß“Ø2ËææÂÎë¿åÞñ©Â²ÏÞèõó÷þ¿ýúþ?¹ù¿Z²’o”ßO£×íçÆï¹ý:‰û†Ïú'oÿwÿûþõO¿æÖ¿ÃŸAóðÞöó~?ƒÜø=Ï`õyö0~úü?ÿëŸ~ûÿíícøƒ~í@(Ú^?ø rã÷<ƒ†:i †¦÷ù3ŸÁûÇÿþ»ÿñë>‚oHÜàûïPYp§Ö×ýçÆï¹ÿsT<´åMù™ûÿ_ÿ—øu÷þ+¿¬a­ÚÈÿÁW¿×ÿéc² ÎqœõË”nã?§_*þÁóŸÿþ›ûž' }ž öŸê^ÌwíË~ãIçt³nû¿ü¤(ÚÚüfãOçŸp ×þu]Âüç^Áðð‹Kˆ?}îSxžýìñ§}…æŸõ&üæ>õ æ³÷ïÀÿôº†zbþ¹fè9Îñscïu4°$›²1åÖ­a­`ïÞ_ðß•Ã~ü-¶çñ÷œ©}sl±Ú¸ø‹Ït|Ç™n›ƒ‘|^ûˆKá™N=Ó§Œ¤kÿ/ïDsÌþÜ‘4ßiM êO›<êú“m$I/ƒ?éz **[Ø3òõäÆÏš\+6džˆ^NnûäZr‘øÏè+8»öºŒOî+ãz¤¯ðõ|n_yÆ ê+t9õ•_0VcÏÐæ¬˜~ ÊÔº ÷P½}iÏHýí3B> ªŸ=Ïvîв¼ÎrûYö_yšÚ; m·­o~æ4ǯ<ÍÖ:¬ÈËu\ågNsêiþÊ)Æ>UoöJ¬ÛØ+þŠÄ7˜Z¿×dÛs†ùÿþøãïÿôŸ¿ÿË_þòûÏ?]ÿñ)„ÊÆ°¯×Ö\w‰Ðèz2q_s• ¯:JsáW™² !m ÜØ\j^¼½n<:%KX´]Y›sëPø5×^_«=]¯E^Yá×z¨¾Ÿíö ß³1ñ๡€«PúÅv ~æ.¡)A£-Z'ŒÞPˆøˆÝ¸ ¥_ e{޶·eë³Y¡ô³Ž5а]ò-]¦UKÒðÔûBBJ¤0Ö ÏñPÚz£æå(ÎØ¼1v¿ìCÿŠÜp¯T öÑÚ³P!¶Eu¢™rP˹¶Uë¾ÔQµ`«¾=< 𽬠ðZÒV}½T¸»îY·=>ÐñÛ—e[!ôsÆ]/û®•\T¡ô³þóŒÅò#ö»ŸÛó«xõ¾q‚ [ꕄꓜioŸrʧvûq£ñêQ,´"é½3L€PéS{\QÊÁwìè¯ëVϬÀúÖyЗAgì&vW¤ÖAõ'⯧õÓ¦”©K®Oµ“§3£Ñ®uAç&à^O sn¹VˆÌ¬çŒ®\!gUºË¼{íäÔ×½‡Ý}­C6ýte;J±Gâ<c°ž;¼ "‡µBŠj]gtåjó’Ýb$–TÔló]4Ú*áŠ"iw›}ù„íÈÒmÊ†Ž¡Bœ” ÇWÔs_ž5ÞHŽuT½žÛYµaCb¯n¿^Gé÷cöå†ê«¨4(jì-'Ú£/žæ_A÷cöe°Æ†¬ÞxÇ–e…&ì©ռ͞ÖìË ^Þ#‹ì¹Xaô]—jë¥ÃžG<=˜mèŸ}¹çbñ¹"·¶ÑëÜê¬nWáN0»2´pvÅ®‘Fc¨uêa.•ÔATc“óìÊØÕIÚ™z´Øæª¨™Š­àÏ{ŸÃr›:!‚ÃïÄÞ)þ¡}Ä›GDã#)GOÆ’7?./6äÑX’Ùû¸ë{Ëã“ „ ôKéFû×c|‹|sXÆó(çØVjÞò¡ñôSRÞ«M“³/ãÈ”Ë_[’¤áÈ3eP®´û¸|@¢°DRØ®í\<»˜ê9h¼«Ëxîi‹ };„øÙ’äUèÛíðqùÀÄÙ/üŠï)Ä'x¶÷>Š5Ϋ½mdšã2ÞX*ï¶P4½ÞعٰÑVŒl]Õ>§çrβ6Gül#ì=…©cHémüàõŒƒÏ ži{Á?Â߯‰íåc-ÞNxíódÇ’,àMÕdr€a±ÝsP†\Ö¦—c›ÁÇy. KJ?ÆP»o>(£èjÊÍÆ Ú*4^9ü8!qöaù€CˆMJç1oä^š½×ÏbÖØ}X>`×°¬ý0ÝÄv7[Jª}·Æ‡eÜgdn£)4¯»ì¨¯ãƒ2“g¦…0K˜ƒÆ™ôöþ¾î͇d4%/L©¡÷WÊNG£…bÞåé ”wÝÊëuõ Á±wcäÑÚìçáE¿cómå.Q”AƒÅ£Þ¥ƒXãµ–zÐ÷­ÌmÄ"çíC2ÕÉ"ã!”cN–ö9Û;yGU±s:‹ &* AlÙ…¹ï²<Ò«…˜mÛJ…µ+BЇ÷b›»“§B´ÐŒ!.eÕ ªë¡B´…²Ö”S†¨°{˜<&C b¶¾·¥ZCÐ?CÛ~©¥ñøNå{ê½/ÜxO¾e3Ò³ÆûLãñ™Œ`à†ºÃÃŽ)yÖ¤«7Ô•1C‰½Ê!nU©’ùHD‰8Yç€û\Š—×º±Cã1 fVº+ùQn`O#"ì}<Þ¡°Tz¬–6OúÖ± § ïLj(—¡÷ĬS“ ’sG‡­éBBŒ_ÄDŠ—þ4ðL[òÞWÙP»þ%¾Aãí!+Z.‘üHVö VM¡çÂܼ§ šÃÖtGÄÈmä±]eqXŒ…Hp4âí×|h7VFÞ“ ®Bmh õ)&¤šÜ·l¡Š Õ‡cë[yvX† yÌËšË_dúyG†Fçî^ø(uIøáXËòš}6ûÓt,iY’ UÈV×_[ÄÇú’O’ØB¶8ÆÛTŒ¹q%âcD“{ù[4†ö}Πщ÷ayÿeå ¢óß)«oä~G|,ãícýƒ1=W2ïcƒq 3òËç^ÍøíöYixŸµ4§sØ´­ð~<ËMxð†ÓÛz‚ÌP¾zx ¹˜­8lY6ψp9†c¬eWbõcÓçÃ1ŠœÛJÖW2#GçJÃqkµÌì«‘pá1.ç*Ý3}Ðx¯ð˜›Q&v…ÇH±¼¯¹©=R#:¶å _Q|Ô-ŽèØ–C Ü`¦]€ãì[£ Å{ 5©„шCO»@ãÑ1ò©.ØaÌA ZÀˆŽ‘¾{XL€ãœö"=÷¹¾‘Òðn„ÐGcûCÙ[$þY 8®ö°Ùlššöã@»¶™µ€ÔãP˜âZímÝ®0EÒòÁ1nä>wž…lùõØMö§w숧£ãpñŒè—Õ[y”ùÓGDÇXÖÚÇæõáí ®ìŠq­÷û1zxŒ’ô­Díx´…,qHŽPŒsJýޱbôñ uµ^kñ€0Âc<6ûœX2FxŒ‡¿×Í“–è#<Ö`w„ÇPÇžÁ$,Ì[?ªýÄóPm1×"<1@ùÐYAêhue|àgŸñp å¦|L¶#!›:g™,4†N»…]sÀ¶¥\_rA®y³[ZÜ,@y4ÔÛ1N;ŒÅµ^KØ=^®¹øFÛãc{“×ÚʤE æN¯ÊkCº-Ý|@F¼¶Y¤=ËÊóFH¼©{ª“õ߯eEÇH«].Lc[™[þØ¿×Zà—J…O#ºÄï=Q 4®g¦!j|ÈÀô5y&ÔîRj&ÔÙÄàŠ¨1CÚÊí§ÌˆúÀ$u­•5!j¬:lèXÄ<#jĀܸ²›õŸ+º&B‰ νu¶e@5Ä~„áê1êÁ5Á—s…JËß_‡ÍóƒL¨Ñx`OhNÍD¨1ßÖ]Üš5Ï~ù\H€m6ºÍ~NxS½=îøOcÎ$Ã(yg˜¾ðô3—X`: <žÆ,Ô|ßÛOÛã‡tLB›ð4&7{ÍÅÛŽ§Ç¤g’É„§Ñ8„Ê3S€ð4-’KìúÈ9úx¾»ÝÈì O£mŽ×A W^› $<‰ß®óòBy„§ÇîÎÕ//òI€z4¢f¦C½ ¨lXϨ}fá Özû‹Ì€zX.l÷«½ ¨‡«Â¢Þsù™5·*è6ypÔìýoî‹M€z¸I?ÿýÔh¬ÈŠš+÷̧‡5‚]ê¢é‰OHÍ"P7E$>}ŒdŽít³6âÓjqùô÷´½“ùô«1óiu8 >=¼ø×<µ{â(@|zT™´À5˜MæÓRŸžð´šd<­Yø„§5ó›ø´–§'>=Rí‘„ÑA“›gÓŸÖlzâÓ#O½Ú÷rœo>= mn>[2ž~µ%:­9ãD§ß‰N7û™À™NÔtzøTXå^dD§ÕUƒè´Ú_¶”ôr™N¶ ƒÜäo™N¿63Ö¾•éô0\¸lÔjNo:­D§“;‘éñ÷ÐL;BÓ¯wŸÐ´ºšVsBÓh´G·Ór†Ð´öšL¦ÕkžÈ´&â™6Ø\Óâ‡ÈôÈ`ß*°ó„Á‰LÏT²6¡2›~™Ø´æ“›Ö}¢Óš¦Mtzä°åÜgáS¢ÓšŠEtZsЉNsÎTfÓšRLlZËdZs¢ˆLä· W:¿ÔGŬ—ŸÉ´žêà„èœ-I`š“~3–Ö8FëÕ]N3¬M„Œ¦ß‡õìArŒut?ü¹$4#íFÛ9]¨ˆMkqbÓ¯ÆÌ¦ÇÏÆVH&ÓZ‚ƒÈô¨ ’6ôˆLk‘"Ó£ìG¿`iâm¦µ^éÑØ-4êó›Ë`ú}dÏ^;Ø UqéÑIÒ¶åÁIê°XL;,‰K£ «ì>½L¿˜X÷NÐA`Z«‘8˜_VPI`ú}P"ÓZ¾ƒÈ´ÖïÈdZ=%‰Lk"Óã³´E^ud™Éô»1‘éQ-…‘¦­"‘éQ#yݽí^ѱ´•ìS@ÑCøH\Z †—I»D\zÔÚ°‘e"ÂÒZ„ÀôøÕín^‹Àôûz˜_«}Üu®tLËLž¹´–ðÉdúõ)g2=&l»¶YŒ¤¦éW@rVÚ¿/uô›S[_„ú´Ðúº>HO!|ýǵMÜw.ˆPŸP݆%›jÐÉ&gB¨7HKÃ%Ÿ 5€–¾ô&Bîªͯ'êᵑÓ:yðbªJŠLBÔu¤±÷å`F V׬sVWXgF­"sfÔfmÅ0‹Œ¨÷¯ÃÓÒ¹/jvïBïhïÓ‰P³£§0jÄrÀ…sÑDŒZVïQK½7FÔ8΢…Þ]ØD2¢Öeš#j]¡¢Öu&!j]„¢–Øžµ„fŒ¨Åž˜µDKŒ¨eöbD½#–ÓK2¢Æ‘Y­DˆÊÞróïŽ59„3 ~—µŒÎ ¨íÈ ²}ÿ°PKUXÔâ³Ì„Zìç™PK!&Ô°ùÝm‰ãÒd"ÔvA%«œ2¡.àz×莵8º ¡f_v&Ôø$Ò¦kj¹&ÔìÉ΄Zï5n`Dׂͮ¨íã´ß<·:ð…­íÚ«eDýºžŒ¨¥Ú*#jøx¹’“ð4’e’²„ñt/à€ðÅŸ–ª§ ¨qûÖÝÔ–5Çl´v¯TÔöÇkË™uƒÇ~4%@]`qhQˆKï Pø³&Y"j$ÇÙÅÆöê±(=×ö_ÔøÕ¬„$@ms™d' ¨­Ñæ²Ý=8™PãÈ$ B‰ûA.‚$B Gn[Ÿž¹B˜ÆæI­Â˜º@?bØénÆÔv Í€»[02¦†}¸½ª˜<S[£ý³ËKm2¦#°)Ä­è™Sx¥Ýg@â԰ȳ+òR›Ì©Ñh¿æÔ˜¿“¸„95lÒm¹4Ž85X¾ußÈ$Nm¶P<<.fN kv[ã:#L]à¾6Œ©í@зؒ%L ';½cÆÔ¸ûö|±Î˜à&ihTˆsÏ@µ5Ú+,ï¼1U—SªÆ;ÙìYúÌô êÁž®¨Ý˨º`Ép¯ì‚ÕˆílÔs±j¼(”ÓòíèL«-`´·»GÑêöX×¶1Ñjë8{;1­~Ý{¦Õ0‰´õµÛø3­›)a³‘hµ>ÂÕa!w Ž›quÁj´GácÆÕˆšís¹giÆÕh,çÚ'\ ²{ñJÌŒ«©ß5vÀ‰VãWáÞí£ Ñê×mf\­}Žpµ­n[o‚Ù? WãUïœ @ÂÕ#­­ OçÀWÛ)(-öœquÔÛ +¬íïm.¹¼Ä+k¼elšÎ¨uA•Ò{’AÀµœQ¬€uÈè¾ü``|¦Ìˆ5ÍV£vkä¿Ú4áh=ój܆Í`{HÅ3¯Ö%^m)K‘q5V¡fš˜;Ãjkºv[+Ì}:†Õã_ÚdêrO‚Õú:VÛÏZ'Vk¿!XŸµÆ+Ê2¬F£uÌ@d«õ…¬Æ}BãÌ%ÃjyËĪõ.‰Uã!_½¸ ‹Yµ^ÎdÕ苈7]¶K¬Z††Õx¦ÇKZ†Õ¸[š˰Z?EÂÕzó™VóxðZÛ2«ÖNE¬ZkfÕUV½|/ãj™ZWã&ïmqÜL«e8fZ+öëê[G=žŽÁ˼0®~]OÆÕ¸Mû¯` „«õ¯Öσx5ÇýŒŒsâÕ˜#7[J®AμZ;ñjí=Ä«1iÛütu!O^á>zPEâÕ:V¯–IŽy5¬G1vÌõñj Y™Wë(O¼Z"æÕ:æ¯Ö‹%^-Áë2|ëWŸ$b­S kÔ!B07k½O"ÖzŸ„¬õ; d=Ì´ë>ý?Y£÷Ø2Ê+‹3²Œp%ó3²Æi H¯hÄÐßÁSïjþl‚Ö:…´Ö!¨5b{VžýÉÔº W±^4?Pë‚ÅÍu<I¿Z‹'kd$X@0RdÞž¢ %bƒˆKQÉõC`¥º~dÛv±ýàMJ&ÖØ‚ŠÈ¹Ä%b9ÎúÆçÌÄzÔ‡Z™NL¬QYÊ> [¶To\ÄZ*ß2±VîDÄš«›2°VzDÄZ*%2²–R‰Œ¬ RH,À*Þ˜™5ÂŽón!ã$f ´”sL‰Y+ÌËк +ó•ñJÐÿÐÌR\¡5~5¥A0´ÆÏÚ4ÕOh­—Bм SäT0´–o ­qº†º`~d‚ÖJÁZ¿®3Cë×ÅJ5È–’ÓZK5H†ÖR R 5Üeh­?ËØšû+Sk®6,Ôš‹|3µþL'j]ÆØæ©‹/j˵ µö±ñfê`jý:2SëEÃÉÙYW ê^|h]5‚µ}ëû˜Z#µõ^ù¬«¦­Ü€Ö#÷ÅŸ@kLÉw w–UË€Åк ìò0–¡µÞAk½ÒUÃIÃîfûPWÍÛÑ¢«–›'Yµ\+˪ù@VUóF¾Èªy[tÕòúYWMÛüL­¥œ0Sk®+ÎÐZ*53´ÖÏ•UÕÛךÝ"š”G§Í RUsutQUË «ª¹Ì½ÈªåzBV<†~”æIíã\Ž[´Õzºƒ‹jÓ]¶Z$ÖVk#i«eo†µÕò®X\Íå¯E\-G²¸Zî„ÅÕ °¸Z9‹«¹×±¶Z.–µÕz=¤­æBÞ¢­æÊ⢭֋%mµ6’¶ZÏ™ÕÕr ««á˜~ùYäÕò£,¯æÙnª«e>bu5ÜIlŠr±ë«åçH^-mY_-M$¯–6RWËGÅêj°¹dùÂêj™¬Y]ÌÔdÓÆêj¾VWK‡buõ»ñÎQ2]««å[euõ«HNuµ6’ºZæ+VWë‘$¯–ñŠåÕzµY^ÍïDäÕÚHòêñÂUMäÕÜ'—ÀšÞ•è«yt}5¢¯æ¸ƒõÕ<ÞˆÀš?aQXs(' k€Gp×{“ÂµŽ¯-Ö÷¬°æ‰^ÖY½!újD_N»‰¾Z$}µ]«}Vž%(újŽE_­G’¾ZI_Íš¢¯Ö#I_­Y_-’õÕ<抾š{%Ë«yòjý=’WKOf}µtHÖWóœËújþŒEa­§ì\ OI kš©D`-=‡Öò³Àš)K¬åE±Äš/‡Ö< ˆÂš‡@‘Xë)Ib-ÏŽ%Öú³$±–»$‰µÜ)¬yra…µ\ +¬¥°ÂZ†kVXë‘®°FaÇe™& k¹NVXó„$ k½–¬°æ'ÃkyÅ,°†mØ.Ÿ"°¶Æs;c+ˆÖr9,°–ÎÁ kôêe›'ky>,±æÛd…µž’ÖÒåXbÍk‘Xïˆß÷Ø+{I¬IÕMk’Yb-3!‹¬å6³ÆZzk¬å.Yd-–EÖÒGι(_+öïsÿø†ÚI"¯ÆæõõÄmoXÝàuçé´lP½ƒ5uï¶² DɨÓûrÀ$R­Bµo0¨T‹|ƒHuÁù±4Ù Ú‚†#y•°AõX».Š85ÕYûü'4ΠÚâT›Íb‰IœzÛǺÂÅbO}ƒ¸+•¸SƒäYÈà †Ü©QǦµnßMîÔ7a2Ç wê?sxwö¦¾Qþº/ySߘ|WrfÔXªô;ÙI5õØÄâç°nM­þÚdMm‹±ëLîdM}Áº¡µ°³55–q%ydkê „s•‚gkê •¹¯lo\ÖÔ(¸µõv^.ôÎÖÔh¼VéV6§¶ŸµÖ|åFæÔ¶†¬Ô͚؜úB¾qJ·%sê I¼6ýøN™S_°eÍélN}!«í¾QÈæÔ¶”oG2$sê šõD[ÉœúBRÝi—T¢q™S_б-g>6§F}pûe1Í©/˜Ó%¬AæÔ¯ŸMæÔ$ â’9õ…ko¾ÌæÔläû$sê ‹©ͦ9õ5ŠÕ'9v6§Æeæl2§Féô¾&I6§~]J2§F[I[ndN $SVî»SÛÅÚˆ´è¹S£cí+•Ý©ñÜ Æôa—Ü©/ð‚±;õ…Œël]Ý©ñ®1ìeN}Èðv5›S_x®V5dN'»•u?ÙœZ?2§¾Ê—Ñx6§FGßW $›S_Èò]éXlNÆ}å²9µ=÷­¬\]2§¾ä›2ÒÉœZ•Ì©õÛ"wê »Èc³b’ãéN dwöe™OîÔö‹÷v]aWDöÔ¸–n£‹«ÝÉŸZ?ƒìO}AÈi=ÈŽäOëÖ6šìSÓÊNìäS¯¹$ôE>Õ˜²Q5†È;måQµ flT}áµ=R×§1UcüÜ%£ê “Fò'£jêȨúuº6SÉØ¨£ë{KdS-3ÛTãŒ[&àɦ7Ù×*ûT륒O5ŽÜÓžùTË4|ªêíiË|ªÑ“Û,æýGÕãtwÚÆ!£j¼-ÑØlT"`u©ìب“Ò¹´.lT׸/E$U¿N™ª%d£jR{26!£jDØ-™—Q5G5ìS@©ržÓA>Õ²êgŸj„»ÝBái^É>Õ²tc§jY‚°SµErPÔº#;U¿ŽÌNÕ:§‘U5†—sh'±OfÕøzÚ*YÂfÕèçÊrg³j¼ê-yn“Y5>ƒ&fÕ7†•â V߇«­'HAÊúÓÉÞÐú„óØHž‘ckH†š~®›DažÉΊ­³uŠ(¬E:ÂܰK.·ùåÜ“‰¦‰Z_¨Fèz¦ÖZs‘¨µ ž=Y¦(·îG_<‡¹õ>Æò…Ñ)¸Fغo©Ä“kD|gYDäºlôÓá “kÐR[„Ç6‘ëŠÚ‰+–Ø5x¨-QBÓMìz¹cõÈðý'3¼¶?ØÇžß¯oÄ~vb_"¼¾¡þJ…¾^£ Y²Èx½Ý;•NLðœé¾¼š„Àkxdž°YŸ§$x­¯í]-wM…× æýÛáµuÐ+•“bx-t…á5@¯9rx/&*ƒ1ºj¥è:ƒ &×ʘ\ÃþeÕ”y‘k ñúu{ùÈL®õµÑõ KëD®a±|·ph`r-ô‘ÈuÇÄcñ‰ØI®ÁgF —U9q‘뎚i÷ŽÈuï–ÙÄç4‹È5]d÷Êš™\w˜ú^ˉÈu‡YĹF"×5’A‘kÀ-xh¹ô˜ÈuG¾¿u¹x4™\ÛÏ6à‡(D®;Vй1 ‘뎗Úm¼¯ k< {‡at=J7ßkN#tm·b¡]”Matmv=Ë‚Ð5Îyf“ŽŒ®íÈÖ›¸Nè3º¶û¼-T¿C)Ñu¬²›òñŽÐuG…‡•(Gèz°Èdz@èZ кîÀ?É¡Ðu‡[Sª^ìèw8ßëó÷]ãkrÌ tÝ(¢R•[º¶yî…n_Ñ5ÞUß}9Ëäg<í{õŠ“D®;(Õµöw‰\ã¹mgí"ׯ»ÌäÝnqÓ.#ƒëÎЗ¯ëŽphåB2¹ÆM"rÑ&‘kýðˆ\ã.íê£ 4‘kô9{ħo›¹–54ÃkÜÊ1Äio[×Cðd#Œ{ŸdtÝá9dŸ‹WÂÍìÚÛ¥])ÎìÑbJ#vm¿ÚÄÂÄ#³ëŽ•Öž¹u‡íÓ¾ï.4`n í\ª’MÜiO>;Ä­Q!ºÀ-Ò]?·¶¶­$âÖ¸BŒ›^€¸5_=F~ÖÓ˜¹õë‘ep­kë$½¤šÀ5®;°¸î(Õz¯ò®Ñˆ˜Á_k Fç^–gv×:8¸î#Î8cK€À5ú{ë—/H\¿Î™Áõ«1“k‰\c,Û1‰º-J"ט_±XpJDäßF…'? ×r=L®q›Ûé†[cø+Ë…ˆ±5¾àó²ÏDÈ™ZË„ÌÔº£ÄöئØ5Sk 0À !t¦Ö2¦2µîXú¤„n¢ÖhîÐA?1kôÕ³Û¸z—NÐ6vóÝ­¯3´î7°úZë÷AÐç„õ@8Qgh­DÐ=gg­ÑzöÙÈкcgkC1´¦‰‘µDHŒ¬_7’¡u‡"q¹­³Ö—™™u‡?E üÀÌZ:1kY“· 1kœIo^´Ô™5.³ô̬ñ9Ë1‹™µ|Ç„¬íEÙˆ\=•˜µµÙ%®J'bÝìÝÖA€uÇ·—ýI2°Fäzaã=k<˜+ùŰ1v‚Ü'#ë×—ähAÀ_ÝeKxO•!b­ßk4¶äQCÄZ;9kšV W#–ñØåŠ«µS®–ùŠhµ~TD«±†~ŠÍ¼¼@ÆXv$ß'¢Õt„ª±tlËÙŒQµÝ"Ö‘ný7PuGąú}N µñA~Q·ýՉ܃Êâ?§ÿàùÏÿÍ¿}·K½¿„{ÿ) ÷‡ó]$þOŠü1ßögœtÞéóŸçêŸpvDîqþù‡O½½ñ¦?÷þ×+þs_û,ŸõôýüóŸzÞÏ£Ëpöç÷þð3W+.›öQ|±P Ó‘ÅQ»ÍMvA¿ýaú ý]ùòwÍ®ùË?~ù§¿ÙþöË?ùáïó»~áià (T æßÿÊyì,~"»®o?•ÅË_Qy·Xà—ŸêÐS}ÚÀñ9£•|­ÿ? ‘-Gø¬Y$¢Ú²uØÞëePã']J —)*{]Onü¬!´¢jáH‡ÑËÉmŸÜAkJZû”¾‚³k_¡Ëøä¾2®Gú _Ïçö•gÌ ¾B—óQ_ùÓÏíU[§Ú’Àbâ ¶í½ÞX`¼ÇêíKûÎé‡OƒÖØà;wÈ_§¹ý4û¯=ϳ5†UǾÿÌyŽ_{žÍVE'J–Wù™óœzž¿l¶B±gU ¦¬ÀþÂòzüÏ8ÿüá_üñ÷úÏßÿå/ùýþÓŸÿãÏ¿jNýò˲Fej'9Fýî1¢õ%£©'ðVÉ÷³.ºr†'Û·5dŒ‡fÀhQV¤¡tÇR £îu»\C]¶Åê 6TSY8/»µuÎ3åTž(Þ»µ}–Oi¨gP½AíÑF4T¨ lÞ6X;G’kƒ,!Ð86©±üŸ2ÈåAÐog}6Ir€0.±_ƒš)`v…1à~{âV¯¤ê}êõ¼Šÿì„P"tû¯Y®¤Ùi–6£ˆ[íØ“üZ ì<Îuµu¬¤°@Aí¿ç<òL" ”¿µßð]·†ý÷ À«-M¸Ü°Å¤åuÀGæþ»MKð0 è™6[$Møìu†¨±µcÈ€`Qo}"•ÖZR&@¥c÷î¾Í>ô Þ[HîìÅ'}’ÒíN]$i™$ðËÛ/h¯ž¶=‰PVd?£PEÃÖr0NäÁ-Ä¥ »Ç1‘>n}3:ÝÙÓn>êŽ\ŧk¼Ò†=lØöëÜf¹ÉÖ·´' kڵƒöÝ ÔA%¨trˆ¤lûÏfm 0A‡’ë<Ï —PjP@T,¹¶Ø$kØ Ô‡ñä„b¾ìŸÎC”ÍܰEÄÅ5®ûpyNƒÖ? œYú2_mHiî†ÚöѺŒ´!Å7Ðv0ìb7?å•v—±/bKßr•ƒ‘Ô1>‚-·»¬]b”¹@²…ÿª-/×Fð«ëß¡¹³`Äóq6#²…ÄͦÜí|0nàïÚ¢ÎI½b{µ\òèɸ7ªŽ ªµmiïÏå8»+híCIÛ«Ïm´r͹iµ QHísÈN•6IÑçvûŠç’õÌ´úì&ÅÖÓ¹]i«Snó,[ÚÍÄ4u®šf'öÂbXFŸCYí9 ž£BIte ýºÞ÷„9u ËÒ­NüR ˸Úk]Ûb ËûÙÃ|¤ëçð&ŒŽÜaþ]<‰ü„c ʨ¸m?¸ÍÑåD!Á”a‘jÿ¹_ÞØÒ¦|gëåuNTýŽAΫû‹=׊ Ÿ5(ˆ¥#ÛâĞΔ!º=B oÝaí°Á®¡Æës\­i ÛtvضyÜ1¶Éì¯ÄSsoá¬gÚ Ã“±§ëÚ£Óæ¶ØìÂ#„>bú@œ6Ï­ý,¼4ëo¾'gclÚ²ByÌã þ‰mŽ5(Û°oïî0ŸØÍXc2Tž6¹mºèÄŽEŒÉ(H³ívýð Ó˜lWl1[™¶ˆ0>>*Íçg-¤XÛ@(v¢âÀ³acAQÚêAw愃c‹uî‡q~T$”ÌjÇ‚WD%tŸ‚b’us‚äk9ɯ|Ø+o+-ˆBp(+°uwœÞ˜Bð¡8Éq Åà6™öŠ/d^Åà7æ)[LÅ)ÇàblÐØ§â”cpIâ\¶g9·x TÔ5ŽÁOèç‹ç–r~¢i ãÁÔc¼æŠŠBpû ­›») GàØ‘Ü°‚yÄ/¿~5GàXÀ¤ºæ·Š 4äClÝU<? ô… GàØÅîðÿŸ1RŽÀ1ºÛðZ§SGàXj¦ªfÈ~LKM Á!?²áý|‡àŠ”q޲¨»Es@çJ ê½£DàrCuϸuΞJóp>¾³æÉO€cy–¥€cæ¿ìșĸ-ÏJïÅ‹op‘Š‹ÌdpÀíNPìd›™âoŽ8üFuT 6fib޾±ì±AÓ÷è9ú†g”¾™.á÷Ìk댾VÈá7ª!ß=|8ü>¿D:~Ô«?ßg¿o¦‹R‚¯ð»ã¡Íפѷ'ÇÜFEßçÝà=Ó>Œ¾¡rðê}·º* ¿¢o¤š¸Ðö}çÕ—Dß¼LzEßöÅ[w›áGßóZ}ç%ˆDßȧÚk›ãü+ú曢odµP~IômQóÖÃØçÓ„ã﫨BðŠ¿9Цøêë'û þ}=B-ñ7Ë;’9þÖ%Åߨà‰ÞãS^1½‡ßvïðr¹=¦ð[B½~ãÖ·Qš¼~ËZ‘ÂoÄÂ6ÆA£ýAø\®c÷2ÆcÅŸòµ$þ†¢³„ü÷Û·uQ%þ†ÂôŽª3Ë99þãì¡™•ø[– ßPçÎ<Çß0Ñâ¶_¨Ï\•ão,m,Hv§)Ž¿…qüŽ`ݵ×¢oùÔ9ú>!B¥}#wv‹º_¯èÛ.+r54ü†4Ì–DíÃð›lãLö´f½³O*Pÿ ïû´·ýÂí˳ñ1@/á˜~-`ô”Âpëàö*›¯ï9 ?^‘ Ãÿ•ãöwÆa8²¸Ð§§œ•Ãp4¬÷×zø‘9 ‡Òòš©ƒC½yÃÄ#—ƒõæµVaƒC2\2“Î1ø(K|—È ¥|(m¸‹Ð5Ç௵FŽÁ!6êÖ™žNAøÈ›=ÛHÞáh´.m«¼9ø1Òv·Ói/á’ÉA8)™çQ~#›íŠjÇ„C¿‘}ÂAø‰éè¨s}ë18¤ðÖOk)Äàb™C18ò4,ºŠmŠÁí\LÊ}Ôw Þ±a¹ª#r Ž|A[MÇ㦹*÷¾ö(o'¬@;¦ üDÜ熃ð*åõ¢(…5Çà¶€± š„ÛŠUŽƒSn÷¼ß­Å"•‚p»Ø;Õùä :’SË9o#©dmÚPޤ:l\OÑ?‡á'š÷âpd¦P‚ãðCÂOað6²‹.wƒä8¼!6îkç*Çá…‚A^竦8ü€oÊrð˜q8¾¡íö|{Ñy×(_GqxjXÙ‡ãsŸÅAßqøJ_ölgr+ÇáÐïÚ@ïæh‡W¤€îÅÝ8ǵ޽pÇáPM?‰9ÄábC›Ê#&Îq¸³"Óª:"ÏqøN¿¬v8?àUµÌ,Ðv`OQxÅZs_ûdýÁ,ö º;äg”Ü”sÏöÌÎ…[*e¹C‹å@´ …ŒÏ¼#Ìz” )²W`¤Gm³î5SœX‚dZô^HBéF¬F‘ŒDFáùàËQ?’£X—ÙïecÎ(¦•¨É$¼aÉ~F0“p|¦Øg¬‘ðÈÍB҉숄ÈfÜÏ+âåŒÂ-ÈØáÍæç$Tœª_2 — œÃðŠ/l‹ý4ÃaŒq­4jÃ1°m+)—Âpì—+½XX8Ò [˜™{^ñ?ûŠ)WE ±pdkÞÁHDr¼¯ØŒâp»’£ì›ÛÖqnýoVYûHŽra'öls>9JògæÂpñÏߣã#ŽôÙ+ Z9 ßaðQ×V£ptë=êtr|¶õðèngÁ¦_ÿ@Œ²CyÖÝOPø7Ùû˜¬GP¸…#í aáÖ®#U†cJ>kèªX‹Â¬Gh8 G®+Q¦á¬)1 4tGdJ+ ·Q'ò`EŒÂ¤tFáûp¤²¡ã~Gá/âi8âZ˜;ÌçBQ8°uV_DáÊ—™†cF°wáÑ ÓpØAõm÷DÃa§½¼¢„†Ãå~¹7 GqG‚°ÐpL©=¾:¡áßÀD*ÂÂaô@ÏÌÂqW‹· ßaôßö¸ä?³÷i'&$>>{¤drnÓ½`‡à`¨p©˜é“B‰-2gUGà8co‘)É©%ÊS^`ûº‡-š€pH+˽,“„c[ö.îgù"áYkÆ$Üþ{³1vNqBÂ19®ÝU!áÈÏÞºËC_$Ïá7 D€»‡ü9ü>¬W™kÀeåÃ8œ%mÉä&߀ï°ÙQ/é-ŠŠŒ)ßaèSÃËŸ#ðÃÛðÆû@Žq¤¡ÞÖŒ)Ç,kÑyñ«¥\¶•9¹Gà[G¹œ"Sn‚]7¢àÜúoI~8—oŒ"pú49þ¶ïý$nýÄßököBv7°æø»@5Û×òæà¡È„‹ãoL#6ªº­<ÇßPèÚ׃,‡ÔB‹ÛÏbt‹|Š¿‡EÌb€ÛÝ÷ûZL¿Pû£+ìYoè´°c98¾cwÛd‘ƒóð+rpÔŠ¢P¢çRÔà;äÇbÒ¤—ëa5¸¼MVƒÃ¯h¯ûGrLyE@øç/¬æ—Ÿ¼jÀ78Ï××å|ƒ^-î(¸…Uç*·6p8 %#OŽÀѸ§utŽÀ­ /ÌeÛo° C$7ÃOŠÀ·ñÑÖsòxŽÀ± ¶­¡•#ð >kõ<[ÿ@²!"²çéAEà°^Jö ?F¼%öé)‡…ïˆÎè”"pä^•„–9¯{ˆr(ǧ•Jƒq Ž]Dd½œ~΃o04Yª…oç0Qô[¢p$*­Re…Ã&ßâ°»} GÁ0hK·:õz‡Û‚ê’½ ß°C»Êtqn/÷J®¤‡oxZˆ5낼±vÄŽ¡Çáv J>m+QŽyٴㆇË0/q¸}EÉ“—ãpù9ÇÍÛÐa âá0•NNæˆãîSÕ#ÄíÈ–lÎ9·.Ù“¾˜âp½ŠÃµ/S Žy ·ÝûˆoðTZÕB8GF^]áâ¶.δs n·¹Ý;Ë84´«Þ)âö:«E©®±ã@W›Šmq(®·B¡8\áæ¨ñÅ7È]a%}¿a8"”ËŽ¶ŠoàǨ¦2û^þîXå‚>É ]r„4çóÆ_9X}J‚bޱlF% ¯Ã4DÓÌÃ;¸DˆB9Ó9×¶+ÅãØ«>ÚRør<Þ ¨Þ‚JQ<~TtÕÐÅR8¾cX6玤Ž=7'éŽÛ Fé Çáº6<+?JÏ´ÙÜ;ò-KµÁç„Fá¸Â[ Çñ´ìû¹g†‡ãÐÔ^{ø?s8î–Gù”ž Wë+ŠìIzæ†:Hýð,KNÏ„Ïy±¾}Žsòäœcñ¸,78‡îÛç×;;S$hcÚ]ÎñSˆ„ãŒ%Gq+LA%ß¡÷:]„'@\ž(‡ã7TcGäƒpœ%ÀŽ3 ‘pÜÖF»ÍßS] á8G8¯x|³ÿ s&âðmÝ£¾)Çãp´ì£¾mÿ0‡ÔÁs·$×F È¡êrt­yKW9 GPн× Q@Ž l¿~# 5íÓQ@.¡®ä ÞÖ“Ïëƒxœ§hŽÇá ^›[©J<Tº’É%—ð—ãñŽDã;àÈÙÖ8½|ŽcÎ"ͯhÜ>ÂOæác˜MŠÆe²äh¼a<êKr4Ž éfÏEãz$Eãè'¥‡®˜£ñŽ2Żޚ¢qîTC hk™È^¤h\žã6Á^çå*KÆ¥ÇI0AòJ¡yãUJ0¾ï6Ô]%g iõX\V"ÿ/oï²³I’$Wîë)þ%¹è¿_¶Èˆf€©AÔ†ì"@f£Qè÷MÌÝTåèY•É ¢‰ŠHÿüfnnvLT„cñ:ð÷±¸ŒéM[‚±¸F“–žVÆâý³içX¼´œ:¿,¾¶ŒÅ娜Ye,.™ÌÈ»,#qÙa÷l­oÔfªÅÅn‡—¡?Çáåú9Ço–AøiáÄe®ìë¤*e.'ä h.ƒð6Œ¸2о Âåœñ`Ÿƒp‹$û„¯„ø1÷7ƒcp¹ß.ýSÿ1/ £ŒÁËôæÜëçâ÷8?Š+µËoÃhÑí£ûîéO{¿m}þô‡»>~ëDþ`G¢qu?ÛÿчTNÚþ^hüeý®‡½ôë=ñçzüC…öãÆ_~ä¼·ýyòã/bͲ|4a?ä<'ðþùÇÿ¹íï ¼ùÖü ¯Ä6ÏþIú™v*Ë—º:1',}x<ýNWÆ6½n-‰nNYºýÚ#}˜2þŠ#uy~›Œ_mÚÿëtü0ûÇì+³ƒþƒ›Ìè”d_iW¹ô¶9 ¿ü°OƒÆèÇ{WÏÁ6ý˜Ï”æð¯]N9ßôcÎE+KmâÕép=ßöCå”_ÒBú±ÑBx?²…ôW-çòC[H?¶žÌ·ZȯøÎ="³ ³×S{*;:ÛÔNgûÐæ±fj3±6Ø^ç6ÿ?jóÈ㈚j²(îýÿ¤Ëc9Œ)ë¡ú‡ö·_ëòøý§ùKаMñþüÏvÔÒ¢œþßr~ý“ÓÎÆÿI7»úmßS…*‚¯·œùó’ÛlýŸÞêÿýŸþŸÿ÷ÿûÿ?ÎGRØë=Ÿp¤Æ5õ>$c‘pû<¤Ýý8lñ ü·y<(äA©ÝÚìôÖêÅWŸËj¡}é¾­üøìm‚zw3Ä¥ÏѤQlû­í =Š"ÒGÚ$ú§EÎÜóÒz«U …(__Ú?m³l¹w}²ñ:D÷nRµA»oíy "’toÏnòri»‰t’§ÌãgárÙΟ¶¤~‰a¶Ô‡»2ûuW€ýa¿í,úR‹‚\¾D‡ï7¦çݦõTYH| +_{Tá-ûÔ—a¦õ8¾öýAÅÕË>÷Pˆöd¶¾ß¹··lŸý–¾B3µÛû%æ~-íÆì1/ûÚs_ïãVÏÙÚl÷MÇ´¿Ûbñf×?Œ ‹ÖÔß ¡èT6ŸŸ/»ÊX&EÜ·wL÷ïnoÃã!²ìG|UždÛo4=_˜»hai|XÛS¾Ïùl¯îònºÕXÚ{ôµ÷bëvËEÕˆZ*jùÒZË´ÌÓþ¬q.ûq¯úÍC‹®ÏÒùrHœßw®í mÊ1:”°ÚŸžn„æIÊ®¼öý+è?ßíUêmöc—%£^µps´Fy¯ÏC=Ö¾°Ôz¦íÙo‰œêöiÊêi³}Û}FÃTõk»jµY­?m‹âë:ÅÓ&ż^ÇÞž©Tpé{ºG®FéªÏnOùnSûþ¶Y½)ÛÉqåBÕ®œÔ­¯{>§©"Ô7vi_O¼öl¯ëÝv·v":Þ³½!S·v×^Ÿ6» dªäãyªJ–lîmvUymÛö¼æÊœ¹ÛñZÃê·e ½À¢hœç%Ÿû©˜ÒsQÑH®êKv[_—y7]Ï’b;“î’: ™Örî™íªe@y}žÏçò ¾MVë­÷t^oÝ•ò/Ÿ&«4¡gæ9Ëë'u»O“íŽ@{< ¥ ªSéMv•AËÜ^’ëùÉ;ר´_{Àc¹c¹¦Öáìo“UµÊIicf¯ú–%»¶K«ºo/«#»²¤Ÿ“WºûàÀº!ǵî2BéÛºKÑÛbue]œ¹=ûiIüíeû=Ž‚£EÉ·çö6ØÍë–ëèëyOƒ]å´_Ó³f§mæÚÏdï@º7 ë”Ðl4ØÙ]QÛ”5£\Û©+Íy~]ë–«µÃal¥ý®#;6Å4«Øái°"Îóˆ©Ô6­> VçÙ>!’õömýS÷6XeòœQ´¯8‰}¾ß«¤X;Ï{UÁìh± 0;†çJû|庢ÎS—>^Ö[«Õ£—í5¢Q@ÜzÑ\rÔ~Ó­d®ãÙï!þo“]DíG!ÕrŸ#¿M¶NKÜ£—]¥ÓÛçWªmZ§|›¬~÷Ô.³?ÚûÎðÖòhÛ•µCŒ^V¿)ûõ‡ ¯ÓÜ£[û“{ܳ‹DÆ£—Uáoû̽oÎùG/»ª?ˆxGë÷èeÛ)¶OàÕúÒ­oÛ~ÚïÑË®=üq¼9cßw4ÚmAl_à™F/«Ü3•`^õ¥³ç÷åÖ æúN:aÝèe—žâ<Œ`újÓàöÚ6kAöéOº”o‰^VŸíùûIn¸F/«nuÝ»ÇË>"òÑËJ¹pÆ7PÛ´òú4Úr.Rqn£—m¿Ù†½ûyÏoªjt³å<5"Ý£›Õ#n¨÷‰KݸG?«ì<aÝÞçˆ~V‹ø£¾¶gœ£›m‡k›–wÌ×¥çÛÍnJ»ŠÞ¹W£›•t=ÇuZˆWPëÛfµ˜©*¾ØÆb’¶Ü>^z=¼F›mwZÎ[Ïóå—Òb´1Áû!ëµ¹Sô² jm}>Ž]Ö=E/‹w¿WÚÎ{¶ØYOWªú·sŒeûí×ÊÚó¢öºõw,ÛoÕþŒCú¦£/6?-V.uí“tÌOKI܈hÝ9ÉúꟵ펱¬<á¶h̪Œ¡¬ B½Û…9[t²ZäU+éŸé^ü¸G'Ë»¯_‰‘¬®`z¸ÞmwŒdÙÓtþct²º)™Ú öÏ=Û«œ{¡wŒd{ä>†ÒÍ1”•5N{°ï3X÷\ ×nûg¤Ÿ1’m‡SHB\^ûf‘¬„Lé5¡ªò-F²+¼UW¿ÄHVϧdßJqµFë#ŽšÎ>åz¶]1’-í¡ ¹Žeÿ8 •FÄH¶œ½”X1’=•”š7kÛúrüÓb{,ó}Ž×fÓRrô±2}Þ¢3iécŒdë᎞Å}l2ŽÁ±¬Ab ïumü³>­y“.dÏÁÜú„wt¬mWŽd'µ¯áò(¯#G²*$)ˆjM¶ɶOO¾ø=ɺØI†o[Ø—. ˆ&{_×5Þñ]BÈNgv‡2ßÌ‘l/ð±Ý&G²÷(Ú}Í¡,¾ý²Ó™s(»ixŠýŽ®2xÇ*LÜÛ·ày »jMÇPv×n£fQ…ã{eù±Þï¿úö±ú$^#¢On¦se·ÕM°¶wÛCÙUÛ®»ŸÈ»@£K^5cÞîãò½]l÷ Ž/ŽÖÓr ‹áŽÂÜçÈ.)|z¶Ý9•ŒuPÃn„YÉAÎèEÛìjÏl»U/ËØ5²Ý`g\•l—bÛ&Z»öünаÕ.¿k¿:>´Çö¬]Ôwޱ¶’0ŽĶʦSz^S%oæ öR¾ñ:†B=!rô¯ê–v‹§Ä®ýõ]½¢‡s «ùíLÅCæV9G [z,wt¯ÒÉc ÞcÂc »É=+ü{ÖwŒae¬²wó©óÝvŶµÛ#|äº/VŽaeØÝFóÏuß]Úñ¶T Y×1AW¸ú’#X¾4mb5åVÂã-¾GmÛÕ«ÄŠÙY_Ь,-¯<Ԛɪ:y壾s-Ä-1‚Ý4¨XæÑÄ”#XyÁFñ’¶]1‚U|»#oGߦVgŒ`Õ{Ÿùa¿Î”—èÊÚÐv·ø’Ziô®2j¸²+¼îTž‘衽ÓWm»cû¤ÈÆWøÖPjô®­1ßí\dhÛ·Í=Rõ彨+Þá6µZc«À•ëÓ5ɘçÁn»UÛjÓ½'ðfro¨*Ýg{·£Ç¾å 4z×Cu9eQ¦U `eJ|äWà>»æm±í1Lá8ªm·`5k}›Y9‚•ü­]lï3Ú¼jñ«6¯ãùp‘÷YñìsŒ_Õ ?J¤¾VÑúr«ÍúCÎ âU„ìñè¥>Ö8;m7™ˆW«9é¾BÄ{t‘øhC@¼Â£×ˆWŸÏá•Äk{¢údÙ ¯TcÊažž-ÆwµeR…Ù ¡œïvD}Üc/£»Ç“»4Ư »RÓMk#§»Ó-w’9§»’ïÍG¨¹@w%ß›ÒÓ tW:B³Þ=” Íé®”‰©‚ÜÕ/¶+L›pWJÈ9d€»úÅÅH™ÃÝ.¯ÌI(àî# ”›Î‹2 îJ³ÙÆ0rNzPæwu{âwµŒâØÚáî!;íÇBäîvé¨cƒ»:ì|íj=¢=Ž;ŽflWGÛÒTlWg©‹í~\´±Ý®~mg2X«³ÝýŒíö«k-è% `»z8S{oÿ¶Û¯/ËŒÁvu¼öc:Üíë4Kdÿ€»ýÚÛÙ½³Ð]]ߦªàgh º«k8²³ÝÕ~ 8zý/œîêpm§ýýƒîê4ÛOÇRŒÓ]Ý–9ª˜wû´nh4g‡»º+²Ðî•ÎÉÙfIà®ö8ÚàÞ¶Ü=º£W"f‡»Ú¯Mç¯Æwû½ŠÃÝÞ–máÍán_‚ìRëw7ƒ»ÚÏ×nîêhm`sÞÜáîÇ~wû~OÄ8À®Îãºc °Ûïä3€]õ 1„)`÷x¬Ý¢E:Øíº½Åa ì–;é\·¿pÄ®«ÝÎãŒGçºõ–×­G3¬ÛÖš8Ǻu7£ºº—mµuÅt¡ºÚmà%©nyAAuõÆœ[¬ €êj¿s‹õ^P]í·­±Jª«Û¯<ù·Ù9ÕÕ®«*&¾ Ó-íLW½Á}Êlîe³Ætµ_,X¦û±Ÿ1ÝÞ£¥§:˜né}Àtû Ëo>˜nß/×5Átµ_ÂÆhÙ™nß6ëN<¤È™nßöÊ?ÛŒéêxKøùé~Θné”é–o˜®šCëï÷[¦Ûl¦÷nÓ­wÓ™®ö;rÜòê‚u~í$6w¦ÛÛMdœéj¿=‡j`ºõÕp¦Û¿í¡½kRÎtë“q¦Û?D™ ¦û±Ÿ1Ý~š­ŸÛ˜n}juëmt¨[>¥€º:M½|ã7êöÛlc¨Û‡&íÃ$Õ îÇ~uû~rI0Ø n½>§º½·9Y9Õ-2§ºÚÔfù@œêÖwÕ©n é2è¬QÝò•Õíoü—ÞË{©®v¹æ@4uëE;Ô=DY3Å P÷è™8yÑu˨ءnÿRÉç¥uûzžÛ•m¸5¨Û¿bKâ ‡ºýê¶-à¬C]]^ûK J‡º}¿sJ²iP·ßäsõD§ºüþêöË‹y¹#ݺ“Ýú@è–'ç@·²{\dz¶ç@·~‡èö[9ÍcØn½èöKÛ–@çtû[l¨ÂnŸqåz4€n}«Þºÿ+ä@ºúzß ~~/Ì‘nŸ"W=}—#]Nª@tŸj¼\2¤[÷2¢ÛÛø¼ =ˆ®F³­T9Óíw— ïúuuáë•”Õ¡®ÞÅö ñ8ÕÕ´/DâR£º:Þ­@Ö÷¥rª«ã)0`¬‘:ÕÕoZ-°®®¯ ebxåX·LÌu5ןl±Ç±®ÎŤÀºiV4ª«½ž¢¿çcìTW=ÔìðÙ¸nŸÓçÇѱ®.`Ké°®*%e¹2šÊ‹uõl”¢1¥c]ýÜ™á&ÀºÚoUÒü»ìXW÷jŸcH¬[ï±a]]´iì€uu¸ÖÎbeÒ±®öÛR…¬«mí£6¤ŽÀºI­C]Ç2cÝ~.¶üâXWÅ¥[kxof'°nLÀº:ÞžÊ7`]¨6V»l[p]ïz D¿ ×ðºÇì#×=¶>ÛŽEçºj ³Ñuçº:žÉ.ëê4Ûã‰E 纺¼³ÍNFædW?Ùó3q¡®¾ ,v™—?Ÿ]m»Û¨üw…¾–52dàÇ$è|`Þѳ~PÞ­u©$åÝ|P(oÿTŽefP^±·ü’ò«™ æ½D™QójÙ79Aï,³Û¥ôîÝæ!a´£Þˆ+ê=Þbý¯Ô+Wž;%Pï6õP»÷@ÔÛë­ÇBH!½=],î&Iïí1Õõ¶ïÍ”tƒ¬WÇÓü®’õ*ú'²d?X¯:É!ië];É NÖ«%^—¦:ë•K­Îÿù 'ëĄ̊°ÞYsšg¶^²1^¹#¦&• —òrÞY³ÊoªxËp—¤·ûÚ.©ÔuÒ+÷]SÎ’ôNž:RHï¤aì»2Ϋ:ô6oM œW¡åÛ;&å56™:)¯|‰ŽÐgò <˜”wî*Tˆ¤¼ZÍO¡h¥¼NcM ´·¼þ ½ú0º×i¯^ÿ-#¤½e¤Ú«¸Ðö–û Ú[¾žŽ{Ëíî{Â=ºà^|•@{Ëp´·¼VƒöÎU®N{9Ì&í-£5ÐÞòÞ€ööTäe¤öê;¶!²íÕ½Ê; Ø[n#`¯žÚ’õ>€½¥/ì-­°WŒ¬&ìÕÝÜWsÔËùH/wrΫîdÉ"3p^½VUÎË©9oy8à¼z¨s›8çå–œ—Ã^`^=€$ÇÅ>7™˜WÝ×õd‹5ûÏHÂÛ>mþ÷|°>¼ZŸÕcùž¹/½T× dÅ«(ˆWÀ=Û-¯ÆŒÁ¢x…S§OÀ«å’¬ç àժ΢ ^-—¤Ÿ€WË,sHx¹CÀ«ŠØ%ôEËËk –÷c›ÞEq0c&R/¯¡Šyåv;ÊÃIxçtÌ,„·GóCQþAx[ûã Þ²/bRIx‹BxÞžÏh>Nx{bbê:Áxµ­u'ñÖC9äÕ)þrÈ[® ·žŠC^-äU¡‘"8ŸY3!¯jÆ*òj:fo$0¯¶-¹òÌ[&@¯˜yôÖßtЫm»É€ô®ý#|„邃^oÿÐ[ÓAo½½åè­»9è­—î¤WÆó9Ë'é-û½¤WGj—ùmHzÕk´ó„IzÕ~‰ô–Ò[ƒ“^mz½ç¿*é­?é¤W§r†’¤·t ½õxNzÕµÑèòAzK Ò[ŽÒ[Ü€@zU…•ø‹¤W†æ>ÒK IoéLAzë~ŽzËÚ–£ÞzéŽzÕûµáé;”!êÕ¶#¤qD½ìňzùˆõªÑNQ¬@ÔË–BÖ«Òê5J#Èzùzõ•‚³^mJë²Þú“Îzëi:ëeÏ@Ö[笷lëåW—¬—#²^öd½j(™JÖ«Ænõ»`½u›³^Ž*{¥ñitÀ"À^ýfû‚à´WC´\E!íUxÌÞÀz}`GÒË÷˜¤·\ÚCzË}é-÷¬·<°Þú›ÎzË=tÖ[ÏÏY/ß{²Þú“Îz…TÛ¬"Ü&œõÍXo¹`½u›ÓÞr  ½ÚvF¹ioýM§½ú‡÷Z#ÐÞÒ´@{ëo:í­Ûœö‰«ÓÞ¢dí­?é´·ªfö–ÛÚ[ß ½J8*uÚË/nö¶Öf0‘Â^*º@{uëÜåÁh¯>€›i~öÖmN{ë6§½*‰ncžÑ‚ö®½ï53Ð^m3J Ú«ãÙâh¯ÊÕmy¼W¿Ù憣¿ï­Ûœ÷ê7¬÷ªJü—x/ä½åž÷ê7÷,(vÞ«SIyo½ç½z ò|Ëcð¥„hßµ§ „«ˆo9 ßrC@|u†ŽgøÊ_ÌÖøÖ sâ«WÄÜ@|u ÷ÞU ¾=f*É-ˆ¯‚£ ¸SßÛzyI ¾úM§¬N|‹ÐÄW&oWd“ø–ëñ-âLê{U–È‘ú^…ü¥îÄW^¶Äâ+EäþUXoÝÒ^uïIëÀzËÉSÛËGmïáï©íõÁ•½ÊÒIÀ aoëϯ,Â^䊰—*U{U·™:n {[Ûq² a︷”ôJÝ}E¹%½lÁ”ôªx.=yª¤w¶ÅQJzÙò«¤w³5 JzÍcå´Jzçë)Jz/EÚ}Óª¡Ô·Io{]×T¡SÒ+[•ô=ƒ¤÷R9ý&ôÊò-¤XEÒ 5ó‡¤wq#‡"é5³†.ñl}ÀÛÜþèð"ä¢Ê¯RÞív§"R^½Ý¹¨DÊ[Іòj¹&_ÒâÈ+„%;Å‘W¹i%GÞ[ÌÇ ØyÕë¬{èAéÈ+À} ªì~¼BAÇ‘$×ýx‘æÀW¯SÒ÷êÕÆcXF?Þ¶íJ+9úñ^šÄ2 y¯Õ+ÐhÈ{­îCCÞ«+aStk†¼×á! yEÁÖ”‹Â÷ì02/Ï yEÁZË—\7ämÇÓ·(,^C^åÚå—†¼g—3̃ÛÁW0.ßò þår yų†¼rFû%C^5¡ Ð÷<}›†¼õÜ÷”³k¾0äí_Ý|³`È«[–šòêòZWñMCÞ³Ëy—pÖuCÞóÄë GÞv;MÓGG^í—«²´äÕ!?v´ä=oèIaÉ{ö®#|_Ý’÷<\~DKÞóðÕoZò*äe›Ruì–¼úvo10¦%¯.Ý@âkÉ{^pY†%¯F#)†#ﵘ,~¼WwžN;÷ãmçÚ¾ˆË7üx¯Í+éÇ{í0oyáÇ{éµÜF•/ýxuZf…?Þr.ðãÕý±ºøñªó2§ Þžþx¤¯ÞWž¶8ðãÕyî|À«MîH;ÞºÍíx/yíQ;ÞK˹Wx½¸¯b0lɰã½v¯Ä¤oNšÑ…ÛñöÇ•šØñ^œäàmØñ^šyä`v¼r¹7[+Øñ¶+“0uLaÇ««6CWØñ*à³½†AcÍŽ·þ¤Ûñ¶m»™`ÂŽW 1Ûíxy—iÇ«»lUë°ãmwëÜrÈ ;ÞºÍíx•|jṴã½zÉî–nÇ«§c °ãÕñîT°ÀŽ÷ÚQ" ;Þk§y®Ûñ^'*páÇÛ^ÞÓ¼"ðªÕ´±t¸;àmÛÒºˆv¼å ÁŽ·\9ìx5?:BœÛÍx5‚w¦ÞÞ¶¦i,+ÒŒ÷êïW:çºïu¸+Íx¯^cVªnÆ[ž ÌxµÍlOaÆ[OÓÍxu7ܨ×ÝxµßžóJØñ¶ŽsqË]·ãUÀ­QyØñÞ*ò„7ÞÒDàÆ[Þb¸ñjâºæ¬n¼W/ Î7Þr[`ÇËñ/íx9n†¯>CÓPÁŒ·ÌÍxK?3Þëúéutøóêhæ4<¼x5c7nxñj›ƒî*ÔÞ^ÐÝIrã!h…¯Î}Maº;ñÞ‹àщW¯¼9ã‰÷žP 'Þ{뙸cÊ '^]œÕ—É÷ÖP>­±áÄ{kMnøÃˆWï¾9(È·m;ÌPF¼í ¶6Ûsñ¶m^­#ÞvWÚW!~Ò}xÛW€Ö}xÛ'Y§6Ãîëm÷·}xËÑ܆÷V4ìžâf·áU(¶™!Æ÷s[ÚðêÂ3%añêº fÈ÷Þ°\#^¤¦0n¤o¹Å0âådN¼·Ö ²jN¼÷â¢:ñv´Ïßòêh—¹.ëj¸a.Nu6}Iu/Þ{q•½xÛ¸m1ßYxñ*C<ëðèÅ{«sJ¥%ÌxÏ4ã@ÕÍxï?c°3^ýænnnÆ{ßDшWcRSƒºo9/‘¸#Þûp*D®;©™…°®Âѧ 8ĺÉfr`ÝéF^À®ÂÍ×óÓ®AiÍi¶Gª«xäìCuÀnöGÐPWGK6BÝé„>Юb“Íç›ÞI±4£S¿{˜Ù)Á®²–màT廿Ú®Û³ÔÍË^…”¤f„^ÍäÖXµ×¥³!¹îr`„@ o{×l•¾Jx­æ•ÞÖs´‹}.%¼·J•c=ŠÞ@>SІJã¢áíìfô2†×¤ÎßGv§Qƒ$–oÒ‡Œwñzµ"ã]ì…(*ÞöJº ®'®©ÃOÙGâš5zêxu×ðW/‰k‚PY„ËÄ5%bå„|·uäû’2KÞö%:ÂÆª^쮈AbâZûÜÜY_ÍĵÖ´ ¼²)6u3ï&Vº¦V‘k³[)•È5˜¥“ð¶8±òÎȵɰ7×Ô6ÂòŸ|W+o¦KßÕÚ ËÁw¤å²Û¸¶ð€ïªÃpê|·u4Ç:ßÕ åzÝIwï•FFwo!Í;j§Aw9""ÝÕ—Î#ãÖ.«Ü(ik‹uà%lm£õÂÖZ—id»L9*ak pÛ-[`»òü23b¦­µVbË-`»Ìó)qkž(C´«šÙÂÝYrÖ<¶©Ä¬-JÏ:›³Æ(;`Ýv°-½Ì³¶ˆÿDÌš¤#&"Ö•ƒX2rÝE¬0Â>JÌZ»Íîðë\Wîb_èÊ.8MÚtå§fkºL="Ð]ôE ýJÉW¥]#é@W{†Ÿ"_é1ÌWc@óÕ&Œ™¯&ŒÙÞõwPÆ|5a¿œÝ!_Mä$ç?º%³‰ùj ;ÌI(€.Zq‰WÓ$$5ŒWÃ&Ï]zðj¸\0^mú)G2éjKŽHˆsKBÓÕTÜ–šàÜrW€sËåç–(Æ« ؤ¡¢Ç«•ðÄ«é‹Ò^©Ø­Ä«y«ñj׋í%^McÒmùè W«’U?ûäékºäßÜó—ÃL›bÐîu¾æóó0÷8Ìò;Ó&ôšà·›ÝvûåÃl¿÷0­h1Xòõïç¨ÇùÇPÙéz*Õ®}:×.J8­“ÖcèúþÛ_ÿûÏ?ÿåßþå/ûÛßþòó¿þËõ?ÿõw}:¿~Uåá&݃+SÚHç©¢©Ý»``¾½›téÊq¸~n“îÁK§$^Û~Zéá"7J´é Â/-t‚ÛmïUƒßÔloûíÒaÖ ››j)ñ‡/emíj§@üGçGcZÛ®@ü’VXM鱘4E™%¹Š²µA{JSÚIoéUßþ»ò¨†Ú<ɧõÉþ„“¦”yi[JSºö%ä³Ûq˜4¥km‚Zo‡|±†Ú6GMí´\šrx Ò& DHS6‡+Ú”â1Ú3êi6I ñ‹é§1ä& „‹Sî#$›$‰øe[½¿L¾Mç_mŠÄy“ÏÕ´)ÏÜJÌìë!Û¹™6E™Ü·ÈæÓÀÎÝ´)ëé¨f;S§¬W÷Šgq¦8…¤[›RœÒÚÚ)Ý{U’„_ 7óÒ7ÕŠ%á º]“«SÚDè 3ßMU_ ù%š ƒm3yŠ""‚œ¶èò”S #[e»V—§œí}‰2œíÚ\ž²›ý&DBþÖ‚ÎýØ®ÃÕ)‡›Ak[ªS¸¶Iñ ˜“"1ÿ¦ÛrÇåÝ&O‘‚îºÛM‚yÇüšË—N ä”§€°o÷lò”U¡ßî…òóÖSçökç)®›„P®PYÚËùv¤RV'æ_;}Ë×¶6Ë2…Ê&”½žäN‰ù±ÞÓzbS¨`mo“ô8Y¿N¨m—³þ)­Ú™DE®Zg¶=\¤¢fx¿ö¿®RqË>-©Ø-Ö¶©è±ÉêyUw©!õ÷Î1.a—"PÿÑWÚ·áݶ›H«Äíûd¹½­m˜ðh9´Í4*X ݧÓ4*\çÞ%ÖÝl5¥V»¤ÒôKUÖÇ)Tá‚»¶¥Peí¶±ò¤¤ UÚÛ>ã;·ë3¤_Ûö{Ø)ïóB“8·!¤ Uð"j› U ‰ÙåžÜú’óšÖGà’T»”¿­•´3û*ê¹]2ˆÁ£ËºÿÞûwõ÷#åÈ2¨]:ˆª@‚ m)T¡ n—J•ȉߗم*|ŠÕeÚžKhc—ÑåíÊn­ä\ççpç5{—RÔäõ<¸c ß„]¡—OæåZ…1»B°R©"z$Œì “„R%e]d¬¿4Éy\©²¦¾»ß PZÛïÛæ,_Å×K×ìX¤ï‡h_ž6ê훌Â÷vÕº”¥}†?NE Ñ1R÷Û^³w¬Ô;´PªPPóÂA{–ÐöÞ1•*(ö¥+U¦#\žÇö¶X©ˆ²˜´kZ3é-¶´ ÙŒ´¾äi±ÑõуV„ž+%>ë³í6¥Šö[ÂÓ¦‹bÛžÓžë)å¤TEÍ+ëÊúèh³.Ö”1?fÛ‡ÌëQ+ßÙdýíÙú‚øÛdõì—u P4&|Só^aÞFJZYN©ŠôyiÌ¡afJUÔÖ¦°Ä‘::¥*ºgk˜©k[JU$ØÚb-ißT¸&‹7Y"îûõ$íã¡9J ú‚mëTz‹]º(}ŒŒ»¾»õ)O‹-Ls Æ–'+AD cuås÷héµ+U(âÕÀ”*§g{´–)UX¶¨¥ïÖƒ¼m–E†ÚÖfo›…H­ÝÏŸÞ1çüÑ4UÙ­—µ+MRª¢ç½DԦƼ&U¢±«öc [º(Õ@Ä@VíyŠœ•·óÎ6ëï¢%¯ÑÍRKÝZ¼zÈl³Þž%’Œn¶þæîröQ’FLÖÍz?$i„묽+•4"G²üJ1nšFHÄDZmvËUÚö —+ÈÓf5ËÚ9-ß·÷%Ûìeå\\¯¢¤Ì˜Éj›éU0Dy@ê›éUÔ··áÕxõÏÝõ*«dˆ¹ëëcï#œ:úŠù‘}왵¾Ý²ä},5õÝÙã}¬Æ4ª.#“¬@$<`’ˆwŸÅviK~ý®Ù+í¹·”0É5㘶øxH²ˆit±œ©Ê`™G+K«)›–á—ÑÅÊYl»ò7wS¬”E9Žåmi“«ÍµÖó~JO'ãØÝs—z%õ6ºØmwkm ßgçžÐûÛǪsKÛIU·k}Ûë&‚’cAÉ+|$»¶/ä›™#±@jV¤{"_©×¡¨ÅJgªŽŽ‘r §„©YQ~oFKôêÍÈn½ø~LÞU-:Å@öðL8mºs {ªÔm”%«€óŒ¬fWIÓ$uHÑ æÙª¶LÍJ»W­ën޽F+ƱmØatÚ–š)ò2=·5³ö×ÑÃî·£ªìr,¯\‹5²nï«¶u1š¬¿Â)UÛR³"Ó·t ìåó1=íS}ZŠVäÂ4ÞAm»L´¢ÔÝ´ÂPÁæÙU®HQŽ má¤Ð‡9gÞ—YãÆ·‹Ýfà”àd,˲ÈòYz¥5P­ÐÖ—¶î 3¦ßWŽ8ÒDþAba½²}•èˉmV˜“j²ÞCAÌ!##ë‰Ýcâ Ö»£Ö[ŠÁzK® X¯ÜðlPÖ«Ì“ùm_…õj›¹9€õªÈòHi$X¯0wL% ]=NDE/QÖ+OÂö I%Xo ë-©í`½%À¬We»Vž Ö«JÑk 'g½‡†0Y<Ö{,›'ƒõêpæÖ{ôEœʃõ*‘y~(ýËyU‹,÷¦¤¿ÍMFy{tí¥[ ¼J*6u)(¯Ž¿§¢Ó1oI™æ=:ÈÀ1¯~²ÊƘ·GÑå瘷¤±óö°ª´>æíɵ)Èæí©GçšXyõÀh¤áóöD¤# xÁyû~)bç-kà¼ê!ϬéçíÕÛŽ£/F›¨Ø9¯.}Ê RpÞâÎÛo§½ŒÎyK†8ïÑK|†“ÅP©©Ð=]iyKô(8ïÇ~;£Íáœ÷è\.9»q^Õy›!8or³ŽË9oOO<‚„‚ó–ÈapÞ’ÖΫß4® Ð[’Òzµm9sré ·ç·g¬!@¯:‡ödFm@oµÒ÷¤·?Ó9ª±@zK\#Ho<Ì•/Þ’W稷¤C‚õöŸÌ‚°Þ’¿ Ö[õÀzK<Þ`½%¬·>g½ýo9½³Þhꬷ„íõöF{F>Xooëá†Ô[¢*zK—ÔÛ(i”ÖÛïJ¥ƒõ–x^°^ýæ™®K`½=uŽXo¶Y³ÞÞ]æêXo7´È:°ÞrËõê'Lrêí—оJo¯ÔÛÍ2¦(ÐsÔ[›³£Þz–Žz·Ò«³´"µWŠËž¤·»dŽH¯†ì™ÍЫÕz‰¡ÅwÐ[‚Œz»`!]½ý,s) ·ÄBô%;@om¯zK3@o½<½õÝqÐÛCÀÓƒ ·_CÖôÖwÀAïÇyžÝîgôöV’ €Þ’4Ò[ €œôj7«魗¾ŽNzû¥g¶Ò ½=»:-|AzË(¤WƒËÖ÷&uÒÛ·­vIz?®ÌHoÿÀ_áJ ÒÛ;¨,¢éín~iæäÑš)ž±<ê”·D1ƒò–ÎÉ!oí{òÖStÈÛ³äÏc¸áò~œæÎÄh'UyáÄÛ{ì³Í»ßå G¼õ«ïˆ·ß”6þ 4lŒ·ß””99ã­ÍÄo§ O=ÝQñ֖쌷¾8o¥AéDòö–5ò €xûé Q@Þ~~Yé·öyëótÈ[ûP‡¼ým5¦@¼õf8ãí£â+WNœñöÔçÔ(ñö–•À`¼:K«ÜãõÊù.“kwû|k:ãàx·.ï–àmàÝÞø[£+­Žwë'Ýñn¿É%îÖîtW“ó%EQ »eF º«ý¶¬lt·ÎHïö0â”éïÖY‡ãÝÎÚgxŒÄ ïj|º,G,Ê9Þ­CeÇ»W}{T4šøn¹ßÆ ð®vkƒ¦Š€wûÍJ}àÝ>of总}Öt,ϯØ-©æ»:ôΠè;­6vÅÚç;÷;ÝeNNf÷ù»Úv$ÆØÕÃióÝÑ|vûUO±ìã`·ï–`WŸØs‹N†êu©ìJäjæÜ]¯Qt:}oVôoó™û ¼çðpú ¼ú$t!áݼ”“„÷Xý9‚ð} „wõâƒð^9N/„÷ðUÜÂ{§›x!¼ò61Ù*¯D6¡&+„Wà;>N…ð¶wd6j »Èz7˜ /R;Ix塲©ôuÂ[ð$ /†‰$¼ò^‰@ ÞR4 À;Ë«"¼|xÕO§ë¯:Öö† Öoï9Rù9(/B‰y9“(œWÖ*1‚$çU–Ë(伌'çWîóªÏh#®·¤œWÓ ŽÈy[ÏìR_p^yFݱnOÎ;]¾$K9¯p£µ/Èye{• ŽrÞ»;À„pœWžtÛ¤žWUÙΕ]Ï+_.sÔ„žW~Y­­½Åãä¼¢‡™cC=¯|Àr\ =¯Ê3Ü‹z^Yw]!ž¤ž÷îÁöcF=ïÝ£4ÓCÏ[NåÕóÊ!xÏŽz^Õ©g2½ï¤;½w-3q zÛñ,¥ ‚Þ»›?ƒ‡ W…ýgŒW(èU;IHA¯.áH)@oûËic zïÕóQ©è僣¢·{®I0ï$‰ãÍ®ëyuu{¤QÏ{wÔa1BA¯–éüBAïÝ3‡õ½Œ"  W¶íë:T6®è…?½íL6›Ä@Ñ«DŠ3¥PôÊ;"=¾¨è½{FûèCÑÛ›a\PÑ«—!ÅÏôêýrõ­ zeßžÆh%.襟'½—¥qcô^Ý5Ò½òÁ7…-½šc„k'õ¼W§¥¡ Wñ¦Pƒ —ÎèôÊ##­)èeª ½2$_ÎQ‘CAoŸÒÄ·Ÿ‚^yj€ WájW {)襅 ½òo4…-½rÉ\Qz{‡›Sz/-1F½²Ù » W¦°½å>BЫà4/¤ ·Ý5½k(èUØÀuG€ z¼DA¯š‰©Þ è-·‚Þn‘yÄ‚ ½šggQ½÷ã·So±i¡–WisJ-¯^*ÓhBË[^8hy•§azJhy¯n‰kÐòªŸLSCjy5§˜÷’&'K^5oçš=Ô¼Ì줚Wý| ë!çeËÜDÈyõ¾¥Ðr^ F3›™rÞö“6Ѧœ—©”ó2©ƒzÞ2…žWCÀ,ú€œWn zÈy™áF9oyló2éŒrÞòA…œ—‰S”óêckEEó^J_âÍ‚œ÷îi÷)v9¯>JÛªW“ój8ÔzŠÑò çÕo´ËCÈyõÓ‘r^ù¡Ù:èóÖC¹žW]ÛnŒØõ¼eø=/C” ç-ÃzèyËë =où½í>¬).¢ W“̴ߣ ·nsA¯Ãze>½ìö(èm¿¹$Ü£ W﹋’]ÐÛö›r•š‚^µ¶Ã8³ zÙSQÐË\-z/=“˜ãRÐ+r­ô²ÑRÐËé}ô²%|/÷‚ûÎíßÿè+[SáI¯*ô•¯l{C¡è»i4”DÐWµ¹É"œùjTß:ÿÁÈÀ|û$bKÄìÌwé1qCüBæ+ëöŒ*#óíŽÍ‘ØEæ«ì !óÕñ’ùjŽ”±\D¾ËåIlD¾JHqv¾ª"ÌÙM¾êCìL|/Ÿïà{åËÿ|o*SÒÛjW¢)JzeÑ8'2…¤W ŸQ\$½—wðEÒ{ëœCu=ˆïÚㆅeöb„ÄWÛÌì‚ÊÞÛ;ÿ¢ì½}Ì â+‘óaba({ogeo;•9\€I|Ëi‚øª1ìWHéA|ËoBÙ«Z¨ t ²Wo–‘i({u ·QV'¾Šhø¡ìÕñòÃGe¯ŠŒŒ*BÙ«±yŠì%_ùƒgÍ•½ëä>({ËnPö–Ý ì]û7#Ö ìUqÛ¨G£®W±$îíð{×È— bö®Œú/ {…G¦©F]¯žZÎT¨ë-OÔu½:÷³p]oÝÍu½ºSù•¤®W4s‘ˆ{µŸñ èzUÝfc4èzõÔÒg‚º^ž'oýM×õj[¯Üy†æÐõ.·ïPעº^m³‰&t½êeŸ”ôg›ëzu.mÎ;Ƶ®ëUI¹3S×õ–Ó„®—ýu½|©ë]z’d3]¯^áeú¶®—½,u½ú¹kQ×[Ót½ì1Š®÷‡®WÅùYu ]o½0×õª¬Õ„½ê…ÌÂÞÒ[BØ[ĽØ’Ľå7!î­û¹ºWŠŒ¦º·ôÀP÷–êÞòAÝ[ž8Ô½u›ë{åí”ÁˆÔ÷ª5äz#ä½õ']Þ[ÞI—÷–§yoi{÷–ÃAÞ«»bKa¯¼·4YÈ{KŸ oyù!ð-/ª |9|§ÀÃcê{ë¹»¾—ñ¦Tø–«ƒÂW‡sóWø®Z«Ëz (|Ë]ÂW¾O‰|¨ð%¡Êwí‹y±(™oCæ«k°…>È|Õb3¶†2_}íÎ%Þ ó-cYÈ|Õ‰¥6Àe¾Úk6¬Ë|Ë^®òíã‡,܃ʷ\ÀPù²¸™*ßusùU¾_­ßôpÐOú ¾‹|×®%8¿åáÐw³²)ˆ|)ì£ÈWöraï@¯|+îcz‰E¾²|É"Š|õ•8‚ÐQä+'“¬B¡Ì·îçR_]^&œRê+Ç• 7§Ô—ˆR_i(M»ÔW$1åø”úÖkp©¯îKêf(õ•ÕLÌú¡ôÕ€òHñ0”¾Š9j­ã›J_%ñ¥ÀŸJßµ+žBäÜopûÎgÅØ÷Y8̦ôý-޽¢ª×;¯ûÀ½ä—À½œÁ÷–Bn罓ôKíè/í¡coûý­çþ<˜޽›kRaØ+’júõêû•júõž+|!à×{èD5ýz•4äðtëmiãÕáB·^‘®Nc¿ íÕ0ÈäqŬWÖbK³Þ}õ>˜f½û & ³Þ]³³”X¬÷rÝÍzÛ|5ïcšõžóD³^åp‡+½z·®ÏEHiÖ»y™Z1ëÝ<ljf½Òz›~f½J¿² f½šÌ¤fn½´w¡[ï¦QvTASÞ« t3[¯<\–¨Q¢[ï¦Ä_“þº[ïÖ3¤Öq ÷jž³šýƒË{7婇•å½ \¿-ïUû²¯å½­+ˆ\nŠ{u3ïtU€¸WÞOíq„i„›õjZ•óŠ{7-&Ä ĽJŽßÓoÁµ½}^ªj{÷žu÷Ú^å™§³?µ½åž¼Ú^u¨95£¶wïh'Þ`h{uÓ'‹ÚÞ}ö%fj{•@g (®í•¹Îœ+ ÐöjÛšk+Ðö¶Û¿§C.µ½}VŽ/4ëÝOÏL§¶Ws?óå‚¶w?ý“Gm¯¦½‡™1¸[ï¥0K3NpuoQŸAÝ[ÏÓÕ½íC3»I…«{÷ ².¨{w­¥ï#Ô½êï­îÎÕ½Ê<"Æœò^}ÏÐÉ@Ý»_.覺÷P_Ð>¹o5'ԽʩÈȵP÷îêöÓ¾ê^}^o³vyïÑS¦ãfAÞ{ì^Cyï¡N5¥<÷ò®YRìò^}úrQÞ{ô73h ä½åþCß»Ë¹Ò t]ß«º†/ø‡¾WÛ¶wSß«‡ú*0¡í•¥ûžJh{u+÷œBÛ«±Äœ:h{5(8MƒìÚ^mÛÌØµ½í7]moÝæÚÞ>#Ž•Dh{/E›‚榶÷¸=‚œÚ^]Þ•¤Ú^ÅšCà£íÕ³¶âoh{e6¶™ó¯k{?·¥¶÷P=VZè¹¶WÍî~ƒq©ì=dÂ^q©-§öêY¯±äGa¯¶M õ!ìÕo)†ºW7qÍÅ-¨{Oy#uu¯hPNµ!îmÌö‡JBÜ+´fî÷êÕh¿>© îUÓÊàŠ{M7¦Mq¯ª÷t‘„¸W(Oám÷ª2QŠÂ¡e5q¯¼¥cAmo±†¶·Ý³{£¶W‰¯W}†¶· M)Cmoq•§Q¯øYšШ—rM×ö²ÀJ-:´½z3d›ÚÞã‚—5´½åÊ í•wŽ©íÕd£+„¸®í=n·œ'Ľí\ÖsŽ.âÞC%x‘ÑAqïÑ=ÏØâ^]»ù¡CÜ«k_#2•âÞvœV¤÷*e×¾ª÷ V®iN q¯>¹œîÚÞCô#k_ íÕ¬}‹‡´½z iáGmŽa·7¤½úB·YôxÝ í½dcŸÒ^%ÿd‡¶W—åšeÓö²s>bmï9y}'µ½º¥îþëÚÞ£hÃq ÚÞsöbLj{…R­uAÛ«¤à¼WÔöjôŸøÚÞvûÜDÚÞsu/;j{•)œ•²ÔöêÖŠ\¾ ÚÞSþx•‹¶×jÔö*1ÚLØ í=»%W¬ÁCÛ{ÊŸàõÐöž'm \Û«IòœOÚÞ«'…=„K{ÛÑ®#¯ ¤½Ð¾i/‹Ú(íDnöªf!û.ö^Öe¥"…½TySØë䊺Þë64Rd½—Á)ëíg|AW‘õ z˜MB‘õZ-o‘õžš¼Œq/e½*£KÍd‘õ®äÍõž’Ú@EÆ«ÉRkuááëŒw’»º™Q@ÖÛ}ú’ñª.æ5\‘õ¢¬WóÒjò¶mmÐ6ìÄŠ¬WƒŽè¥Š¬uÅ]Ö«év,b|ãýnêòÞ1ÚG6›œØc˜ù²ÙÚpgÎì"d³ÁÀªÞËg0$½÷ Äl6Ô‰Ö»»sTa½š…¥†òW³ÞvöÙ¶!›M<,<ïJ6[/f͆¸Wy»n¯à¸·qÛÕ]ßÐöj¸m¾°LfÓ:Ë<-J2›>{¨+{å Ó‡’ÌÖÚâv|SÚ«ÃÍ{¬Ô—d¶Ó?Q¿*™MäÇ<öÊ£Ùdd:áC½šœçÓ&ì•ú"ªÊK4ÛsTF³1þ®F³¹£ÙJæ¢Ù÷q+°w†Í1£Ù¤2œÇüŸ°W†øîõ‹l¶Ó PK6Ûa~m¤½²ìßS_Íh¶CC§´€ðh6áŸø<Ü+³FöÀ½3îÀ½ÛÙëKß•Ð^Yy¯¡(´·½7íquiï¦Ä ÓÌ:ímûm«©~önªøŸÒSØi¯æî§é‹ön"—3Ø[V'{Û/ºÉ4`ïÖO+&®€½<ÂÞMNŒ)ØìÕÓ^RÞ Ø«Þd±$8D³uœ¶`ŒfkÇ6€X¦÷xˆf»ÜÑlª`ÉáX/óÈzõ8ÏuÖ+¯ ùØ‚IëÝäv”v¤`½‚$6Õì¥Ï?a¯Ú‘á"À^]‚MWìeEØ«m{ Ä{uúF´{ùrön½ §OÀ^}â,û°·žŠÃÞMƒÞ4ìùšLië°—Á„½j³GN]{µŸasÀ^­Òµ®|J_}Uløn}ÂAqÎ{ÛáÚ>ÞÛê2e|x¯ƒ3dç½zzÆÁ{å}Õ>ÔCôÞ«…eÍ;ïÝ»=DÐð^Ž'È{µ-—ÆÉ{ÙŸ¾¼·ýG³_#ïÕ¶Ëmy÷–w ¸WÀrMYǽínlkj{|u4³ ñ-wÄW\ÕV @|õDϬñm§Ùæíf= ÄWóúmËÞþó\3'¾Z×7ˆä»í6𕯚ÉC|•:r¦¥€¯^Õ´ã#ðÕ<–„½\'ì­Ûö¶mmh= €½º–#õy ½»ÚtæûörÍŸ´·¼Šƒön6À¯´Wm$/…öÊO-Hô”b±þu ǨSÍaÙÑwäéTà^æ\^Å¥mi?§ýR~w¶¯BÚω,oñµ:MÊéùòª·SùféÔ,7Õ€×íU–3f´Ø5Å-Útè×K3O¨¶ŽÔìçÊåµafÚÏM]»3ô©ì³óÙÅä>w¥ýµ½Ú–ös¥-,§'ŽðNv[ÒѽÊß$ŠNI µ£ÜÈî~ø­©gõ) D:5Ë»~Òµ)íçtuí7_,|®‹‰Tt–0P¿µ„ëí)åC(SÔïGúâ)áC†Œ°!Jü!#Z Ž!í¹]i9WRC™¢3Ï"öÖÝzÈ«Î.€ü…~µmKeJ_ÓŠVûšçÜóñµg9qkݶ™ç\ݶ›ç\i#ûnžsÌkMÊ´)ÖVµJ™ƒV|ãÏý2¿9µ”Tœª€¾¬W½3ÓQÛÒo®½2V{*Ô™~såxÇl~s“”y!Æ;¥€ˆAk»wZŸ{9hœZAÎ1k/¥U7Ú–vsSÿ^Eÿ¢ŠÀåæ¶^,§Šår̪,ÎìTUæÎÉSÎ(NËeŽ4œ“µí²1뚪ó¸ÍnŽcÖö,ÍnNCë9òƒZ_nvsÓm.•vs}ÝóZFßß¶¥ÝœÆYœÐ’ÙÍM Ž¥Ç³M¦Òn®¯‰†CO»Ùf7Wži›L¥ÝœF^sÔKi[Ú͵q”ZžÒ@ä˜còS+­9fÕø9ê¬Îó6»¹ISÚìž$‚Èq«rô¤VÛÒoNóƒ=r¡O‰ ÂoŽQ‡íã’~sBf˜Ùy­æ77©æ {·k3¿9–úœ½*Æ®mÔs‡®e¬8µ‡c৪>rìÊ6ÛæR(­qm~ä‘Ò½ 7 ו~sí'm™ýT¹“J[ø©¸ ”–Â/šzO‘h@ÑÛ©·8UŒ–‰Ò>Åצ´›“t8ª†Î{5·¹I’…’Adž´D49"‘ yÒ‰DÎ#×^V¬dž)}·ÆüÊ©´ÍB¥‘¨~ê;”¡ÒR™†»Å©/b†Jk2û—Æm*-ñz”]ZÏPéÖß,aö¯m*ÝÇ¿ÃÔäêýñH•–ú9òÑ.2UzÖho)¶ÛÈTi+bi“O•ö zmêvs]9½Ð¥>7“'/p¹¦Óìæ”$|ò¸ô¥qòÚ…øŠæêø&¯^‹¢Mi7·HJUW—˜JØÍ-‹«N/éÒ8y‘¤ù|u —ôiœÌ«{¢ÙË’|?˜¶¥ßœrd³¤þRÈw†Jo.Ô½›±ÒsªiÛa~sm›É«¯ùd´ôv-ù›'£¥MžsÍ—GKï.¥¹æÛ£¥/̸„îÂo®4Ëe6‰J;À–ã/m³héˆÍ¾±Ò2”Ëf§•±ÒíÕÌc®f(¶"»ìKäÑ]“Íò¶¯‹Èy/³©¾¯q$‹üƒúÃu·,|ûX7”a^$™êjpdéÞu¥ÿÊa©n/ŒMª«‚C«Õ_NÇRÝ6<µjiRÝ]®e¡Þ$ÕÕ0:ˆuwé c‚G¬»]€ënº 0Ä&Ö•W[ƹëJ'›¶Xĺ҂gÖ±®ä¼9$ÖUíUöĺm¿9#âˆuEÞÖ9ÖÕ´/xˆuÀÅĺÛìXwòM]ÜÀºëís,bÝ6žLö¬«2Ü9ëªò Ó°uU¹xÅûJ¬K+JP]MÕ[÷0Ú¨îÚÜ® ›NuåE™#RÝU–.QTCªKSJP]­|´±å1ü-AuWÕÌÞu™nñîÓEB‘3>?Dºëêµ@ºÅQšX÷¡/£¼žX—^Á»Åh—ŽåD»ìsv{ÂÎú¢Û±"i6?ÔhËí®p¤º Š!ÕU.Ez>’ê2†Á©n»NO¨#Õ¥3©®®©Ý‡wtªÛ·Í9@Õ]ä?™³P]Frëê<·íë*mhò ¬[’¢ˆuùé$Öe‚¹nÛf¹qàºiXí7¹îbÕÊuu}i᮫mí{uçºÚ6Ûäß¹î¢bƒèØÀuËH„\wé®+¶9×m‡3ûÐàº*fKrÝe÷ª%r]³eˆ.¸®NqOK&pÝE%סñ'×]Ô9E§®«mmÚt„=WDõÐÛnÌ×rE0|ØÕPÏʤv륃ì.²$]bfæd·?sMäêtwÙÜ" xWÇkS1ÙâÕy®N™ñê¾d¬2¯¢Ó“YG¼õñöÇ—åFd¼å¾8ãÕc0âáŒWÝ KÁxµßdÄïN¥ýæ¨ U·šºd] o½ÿNyµß¼Ä””÷ã$Œòê Û"C”·ŸJvym"À[&W¼šÐÜéŽFÀ«'–îo¼tú àåD€Wû%ßÕi¦Ä|·Ì»ÈxUSÛÚÜ@9`¼š'Oâ-ψ×î$ðn9Eà]NµwËÞõC9ÛÕ>íû<>ß`»m›TÛe4Ù®šÔƒ-²] ßíÒllwÙ¼(l·lÛ]67&Û-/ ØnyaÀvË[ ¶«ã'J¶[(Ø®î²3hg»jW†$ÁvE¢F}Ù®¾Ü©2!ÛÕ—»MsÇ¢2Ø®:ëk‹Ñ$Ø®>bkø?‘íª‡Lã:²Ýòyv¶[^)°] ZŽw@O²»H²–ë¿ »z@Y¯?È._O€]!Ž”ì–~`WcÔ ¹Øeú&Á®„]G®kì.šØOu²ËÌ’]ŠÅˆvqAvKóÙeü&ÉnY¡Ù]§ŸÞÕ1@]†åêjê™%‡„ºH?%ÓåP˜L×NItÛ ér4;3?ÄòAtÕ#¤²‰Dwé>kCœB¢«É:^]2Ï{$ˆXÜ‘î*:UDºš‘žQ¬N¤««NS@0ݵ½µ@¦‹("] ÓôŠHWÿ5>(Dº:ËôØ!Ò]­qV¤+`pE÷I¤«ÓLE¤«ýö'é*&dŽE$ºí'[K_]ΩItÍsØ-»™ ²·¡ÅóQ!Ðe¦.³¤tËÝt¨[n& ®FàY†M¨»^¨\ n»{i‹õ@ÝÛ•Yß›ýÛÜå~ݵÙÓݵ0{Ò] á3jœtwu{#Ò]mËï8éî±ú©ÐÝÕý.?è®Ùº»Ê‘it…î ©E¯Zèîæë·…î¶n{^ÏW«Pè®J 3y†tw÷òeÒÝ6«3™I¡»]Ü“twrË÷Bw5´M¦ºËX$Ð]~lHwÙWî ÇNgÄ îŠŠ¤›OÐ]N‰Iwù n©ó=핽 »ZÝ1{G§»Æ“î.ÝgöEz¤»Lr%Ý5›øÍîÜK½_ÓTÂ]õz)8#Üå’pWŸ¬ó$»³¬"òÙïrU˜x·õܓًR²{·÷,íEÁwçͽàÈwgy^¯!ßíD.“”íj$gÔà»"•Ù˃ïRÊ ¼;ËÉ)éð®DÎ6‘â¥â}0ÞYCʔɀñ]•»—ú«dÚÎxe –öÀ`¼úög@ ¯iŽJŠrW.„‰pÀx'™¡üã•Zf¤“ñÊ3tÀ¿A¹+Óäz@¼“Æó¦)ýWE¼~¬)8âePU‘î?ý‚r·{³áÕí³)GUîºz„—z~^U8´„r÷r“‹¢Ü•‡yøp|(w¡‹…rWÍ>â—R¹»û „·k"ïx½‹rwõÊÞ>€'K*wõ Œ3á•XÕÜjIxYÄ@Â+õ}ZÝé.Åi”î–ØR”»—4®£~€·J§x¥7L?WÞ~¸)œ¼x‹<€W³¯ü8 ¯FÙËÀ+k›` ¯Úzøú|hx] ïì±'Îw妱™¸‹ÞîIˆ‚^8t /u﯈WR½DDñ¶/ý}ŒÅÍO¯ µÁwu‹¯Xw¾+/ **ÞS¾ÐÓ+`úPñz ä­êX¨xg^äÕ½js‚aˆÈÛMræDýy«bÛ!¯nç=…|”·;voÃᩈxá’]„¼”Ë;äí@K®ª8å-ï£CÞ¾i7),t¼“ì©ãî WF¾èk ·ß°5båzk£tØÛ%îihØÛ¯ÎÔÆëU3i/ïš[c½IQx“õJMœFM`½½YÚ*¸±^DkÁCÛÖ[<æÁzõHç5Ì·ÀzK>X¯N¥·å·å9ë-Þí`½ºÉíXo –“Þ_4Ò[Û¹“Þ¾-u ½U¨ï¤WgiÓÞ~–©éÕ~A% éí}CGP/–5Ò+³¦¡pÊ«}ÖÔ(9å­¶SÞ~i){ç­·Ä9oí_¿’R¯Fïì‘^½ÅÞ ·7­T:è­5½úè§Y+@o7§JàÐ[ûe½µãrÐÛKÅÒÚ¨·$"õöý®ð¨&ê-U @½,.ïíma‹ˆðÞ^é±DÜxo?—Éв_õ”Y½ à«ïŽ¥•ùöv¾èòíߤö-jIC¾:ÍöÆ 䫟lÍuˆ€|»§[¬ø–!â ¾e âË>À·D†øöŽ9ÓøöÒk¾ÝM/ÓÛA|ËÄ·¿Ë:æ, ¾ýýN:ßþmI­7‰¯žSÚß’øªé]vžN|õ ]lÄ·?Ó+ÒJ|Ë<È—¥D¾ª)l3þ÷ãä«ûiÉé`¾ýK¼…®Ì·t÷d¾ºö´¿væÛÇö)óÕ¶;‹ÛÈ|»×zäì‘ùª["Œ½yÜ=+rÄ|/ó‚ùÎíßÿà«Îò­êýðhÁ|æføÊ<Œ|×Û‡´¾k½¹Âö¡˜4÷ömàKùׇIƒE“¨ˆ|)N 𥠞ÀWì9]qiÐTâ<Ã. RÅÀWu[øµø¶—uËåßù0|ò^»4i|$/¼wî±]©£5Þ+þ¤¯ÆûhÒ@þÞ[ü)hÒ@Š0ir9?•¼“,Γ~Ó ¡½±[¬Aƒ†SAéÂà §g‘ƒ†öpm]°—µHÅ §Iƒ†þI ¯4ǃ†6üúúÀ¼¨+Î ¨+Î €!Å™¡gÈEc$æ'K†žë7Z!ð®hhÊÇŠ+C”GNÎÝuº«ËÊä>3l¢chÌо3iN¼;+‚(úìbÌ 9Zf_¼[ª iÌ€\ð®&Õ9g¡1k?ïªÐq¸û[Ux¥¨”p÷°„ÀÂvû Æ·teÐ’sXW¼Ôïêü¯¤L„»›G[¸Ûî¥É+«-Ñp¶ múSÂ]™·§‚t·G &׆/é.Úr¡»í~¶)ùP}ÐÝL­'Ýå*ô»z«–_¢»}Ñ1¦¤»ÛOOÞÑ‹\“îž&~¥/ƒ)sÚC€în©úAw_k’ÝÖª3Þ»x2Èç>)É.¯Šd—¥¾$»§§dR»;õ4ëÐÅC»[î]Ø€ˆvÛ žîËÅ™áðˆì‚vóvëúÛM¨ [¤bÌÀBe¨vËY@µ;Z¨®†uf£àÎ \j·T´Cµ[–` Ú-oÔ£Ú-KRPí£¨vu#y²@]öá°f(wÃE»¥#„j—G#ÓU[9ƒC¸«m¦x§3ƒd9WÔ[ЙA)×CïRŒt¥IThÌÀ7‘Æ ëOV´ Ýn©s'ÒU%Ø/èvËŒÞ åf›7“HW ;uù´fÑj<3l®g(Æ üJÑ™ÅñtfÐÜ7íáÌÀ2|:3ðcBõnw‰þ…êÝ˩ԻÕg¶r:3¨î,€:0J /C̾L×{ùPÕBÆ‹/ƒˆg–—Зát«üâËÀuPú2ðÙT_†vƒòàËÀW€¾ íGÖ5±èË€Qð‡/Õ‘/Å—áö”JjwÛÓÛ—ˆ;«¾ ¾J_Ì‘Š/ƒg?Ñ—¯ñðe`Q}æ ¦|·}Úêz—€ Þ-¢Ø2`âU ®:Û3…Pïjô{Øtõn9K¨wõ›V[õ®BÖ¦ˆz¦~W kÚ“@¿ËÙ\º·ˆk_83ˆœÎ£h¶83èíL:3´M†çg™cFÎmqfXtǺ~qfhƒ"W™çYœ¼Ò¾Ç¯iƒ3àL>ŒÚ•…¸‘Æ ª 6e0Œ”æœrÈx5]Øb|JïÜÙ̈O§Œ·M vìñjê’+Kñ f‰+e¼´Ž€Œw/‹âÊxËOBÆ+ ÔU”ñÎýU À ¨+0´ÇœƒPwVVRLu¶¨§ŒWIeBqf(ûÁ™ÁA™î|{`i1f˜<ó¯3€‰cD¯G¦+C‡4Æ‚ŽW»¥ð:^åfYy±šVÜa´J ]ŠÏêÛcQ¿Ï›a6ïoòÜÕ ôvc(D·}?³¡Èw•@Õ"…è^ú*ŽÂBtùd@tËyìj „Ý^Ñïö®0 &¨ß]’<}èwÉávçN¬Òú]Ù EþmÑï*H0]‘©ßÅ~$¼ÒÇf XÑï '¤” ú]a™È,Þ²okõïV¼øn‡€WN0YúF/¾¼$¼¯²¸Žoš4p•çCÀkÃ2ÞY]RÎyÙ³ï¦÷o[ïJŸœËêMRÈ[.·hžyi…ULÚü|Ù¾mÒ µ9LiyëµCÂËÇÈ[öäÕèëÎo@ÞºŸA^ ¾2T‘Wƒ¶L–'äÕ6Smò΂d9ûWOg1Xë·4L@^=¹,Œ!äeµ@אַ:;zµÍýfô– Ð[î@¯$ú¦Pè-  ·Ü-€Þ¶MÃ3bHЫã-©ˆè-  WÇË(=Â޹‚÷–¸—~vĽºö¬y"îÕ¹˜Î& 7Ï)’u“†Ù“‰{KkîUcw·]ǽõ4;î­ÿÑq¯žMªˆ{9!ïmûÙH¼—«ðä½¥P¼·«qC»Ói}ã†M“Ât÷…qCùÍáÅëE¶ÅŠ·ýei|±âÝe(ZVZñÂu^¼‡fÐCa]¼x!!,^¼ ‹®«xñ*øÜ$¾ðâzK ezñžr¹FI/ÞËCâ‹/LFŠ/ùН\Tß”ò^Y ÞÑçß•¦’µú6´‡5 >|¶9üÁ{WIüƒßÚDз¡½ÿgxŒÒ·gI߆E±ÕÉ‚áÛ  ô°-¾ ía%ex|`pñ}À÷wú6,2Yxâ,>°ïé#µ&ˆx%¿%öõl*b_U§Ä(§`ßC#Ü0­2ÞùŠ:wP_MÂ3þ¥PߢªõÕïOi¿ê»%¸¨Ô—º9R_ùÙ§s©ï¤å(,YkÐ(•¬5$𔬵E¥¢aÄZ©¯Sεfeî%j­è]µv¥ƒàGÔÚeZjF­én®OGñ´Ö:îG^¸tûZC6˜/ ~}К×ÃԷל±Ò\‚Ö Rt¼õe¤O¾íaßQÿ‘³Ö~b …‹šWëÞz¹o¨yK>›A_fí€ù–!0ßR æ[JKÌZ·Lf]`¾½4"íÛÁ|{!x¶¦Í£óÕO.súÐ:ó-ÊL[cM›#ß~–6dÏ´5+ùòý8?C¾5¦Ç‘o-ÐBÚ >ùÖ|"G¾õ~0mír[j ßZïæÈ·ÆaÑ´AêšTÂ8òýÆ6‹[câMHökMªåóÖvwßòíÕbgD]ùörE›æ3oM.MI’˜·†z7G¾¥d¬¸6œnü䫟œÒ~È·—ÎÛ*À‹|{¯“`¸6h8œ–Ž|{=nŠ–¼µ­ÒÂq8‘o÷ÓÀ¼¸6Hn~·È[k—²½_’¶&Ùœà8ðÕ6s/ž š"¦ ,6¦iÎikš´ä,Œ– r¿Ê5 F®µ1AûïCËÃØ5ù̦ÐÞq¯ö3'¹bÙ )Z¾´lG[¤”ˆnÁN5Ž{k98<4n𢛢gƒÈDJ£èÙÐ:‘ú϶°×³AQOs˜‘Ó³añ‘|ñlXdÌ>àƒ×VÉÓ²añpßbÙ0ë¶}Ó“·ß©9–à5M"Š‡Ž ÂiŠAÇ©{‡È›~ í²·L'¥_»†öŒLÍF»Ôž¿”÷‚ôêpk/Ï{¥Ëðkh/fûv·ƒ~ rÔÏÔM6´ßl]ÂXÁsÒÛ›ON´~½aƒœÏøêаAÝpVŒÐ°a‘›@¸ŒÒ°aªC;øÒ±AOqó’{[_hE„½¨X¤cÃj6ŠÅ°A.‘f^ Þ< ÊÍ¢aCwmŒòx6hðÆ€† «Ž÷Moï.R)ñaÙÐ>jáÝCËUöø´Ó²AýQ:ѲAH!ˆ‹eƒžbZ.Ó²aG2-VG´l(O¶ j Åc1là8ˆ† B†)c§a*øzµmŽ5â×pÈ·ç¹*üzòÍðk@é9ýÄúöTïA“ý›¡k6 ŰÁ¦;iØÐF=kÈ‹aCÙVZ+š¢^b :6G½Ì .Ž ˆ[-Ž ·¢‹Ôé•Òæî$½t‹+† mL·„¼6`îT f%?%1ä¼".¹LBÒ‹¬íâÖ Oæ/ôN†õ-@¯n^ü½³o’^Æ­’ôÒÝ­¸5´;d‘yNzÛOÞkÊÛAz' öa¡WÜPsUÜZwš.ìô²òàûÜþÚ£×®îc»µÿÒÞÝþÇÿõüqŸúRÃêúúçï!ÊíñÞ_P/¿  þæ©|ÅþmÇÔåýì×üóÖbì{¥ãÏkpÿøãï×8zÿÓ=¶hØ8úûçzü~¿Ÿ'þþYä~ô…?äÞ¿Gן~ì±ûý~þüù[Ç~ñ¯çÕn~—ÓyzJrÎÄS%v¹Ð­]{ÚÍ_RAd¦‡žÖ˜ ·-Y¥³?íÅ}-¦˜g-äu­&dÝ6÷︮ͨÃ6Ù;]»G$wä˜ _»UƬ¼ %…+=¤÷^] 1¤MÇOWf#\]Y<Ì b61p¤„ ßÚ1ä«^RB„î¥dß³ 7–z øgh#_/±ÉŒUCˆì¥Ïê¼ÒýHÛ,HOÙËYª+È›jI-Úgs÷É¥(Ò/­O¿)‘aÁtI‘åÉ(LÒ6Ë®ÓÂO裮·B¢¨%ÀÀ·DR¨×(8ð-DW]?µ'x Ùd(å”OñÙò>…çÄ=­^ .{»€4÷´yŠóÂÅñ–WR4µ‡³¿±_·\r²Ä`÷Œtm é¯. ¥ ·¼>¬öº]ÛXʺUš2oTÀßS©#jýÀxoYµ¥°ÿ’b{¤~ÝúÊ¥ÁÊ´-5j(0¸eêjTßóê¥;Ù¸%/KE= üï®âùÇsh›U£æô–t=kôzìÐôÊ^dqÎÝ+Óƒ¹IP‘·ž®¢—\ÇÒ¶céä{/“×Ãi©>RÚoi´S{·ª?ïý­VŸÚñ¥]d¬‰ß1gI²Qîu¯ “ˆU§[¹?Y‰&Þ?Ùá-±o†ÌòÓ•·³ön·¦œQ„ºÐ[BÙÐç·ž2m¥ïnA:¥³Çšk¡÷‚šŒvû2Büngª¤…›3¡q¼žæ0÷ºx•'ÜB´Í ¶n·b½•Uæ©%.N•r4ÄwíëdNp¼·ËÚ`­wo·»'¡,_ÛlÙ&„w4™ ”g¹Ï®Ó’/yáÞûâöh®s+?-‹Ü-T›l±†E÷¾¹SÑ·ÜçÞw7`½ÓÏÖW–%³ŒªmŸ2kù4*»´ÍÌ;˶Ӭõ4œˆò¿û¸Ìí QCíí Up^l?E?^¿º¶Íü2a­wŸ³ÚñÊÏŦZ3X÷1"n×âËC0ͼÏÍ­paùÙn•»U¶ájæÇi›É•óJòH:!›]vÛxÈ÷¾}‰¦ç2¥˜yŤ}£ q×6ª0¢í¹ÍOÔGkVóxw;G¶PÛvÛ ¹só¤0ôv{Žýz5–jÓ7Ú0wþé1_nÿ]Ašm§é~$3ו÷[©ÓÙ ó$ÛܪµØ[µ_¥n¨m„-çªç=1ûÆG©6”ù´»sU2ȶQé¶­?–7¯¤u7Ñä°BÍPÛ¨Z[ÎAÏÕo÷>Õ”+Å{ßW¶8RGþAµb¡À[Vé~,8Jâ^¤ÀÇŠcPà®f0æé¸ ž­b› XÝ@ðÑòÁ5‚ “â“Hì¡*•k[vm$Á‡øUÈ“A‚Û”¢î—`$X›Žy~û’à¶m‰N– X‹ýnÛÚc"E‚ཻ|ï*<‚`ýf¼JäÀ:Ká–ÌÜ„pàÀmÛ’ÕyäÀõTœï*y 38°þkkíïY„Œ x¿¢k[ª ‚µÞ‘%^ÁûIŽê ¸m»¢Ø¸m:2”¸m[²¸mËÑ1°vk•1GVYA&@× 0\OÒA°æçm"=“Á:É%‘@ðneï’GiA0k£I‚÷ëŽI‚Ë•ƒk›nàýpCZ’à¶mÉ@3’`1Œ|I°þcfÛïí—3?(x?<Æ›(XÛRÀK,è’F-DÁm?33! .ç Ü«V¸ï2”‰2S¢`mË24¢à}wW2¢`Ý—ö¸Æ$ÚQðÞ—¾‰‚wEƒ'ÿ nÛZ7;zf¢àrÇ€‚w±’ïÝPj(º‰‚÷Ý×.@‚w!ˆ×Ÿxﮕ«œ({H€÷ͽ©ˆ€ëe9Ö¶°X‡OÉ8 p¹ÀqÖ@¿ûæ^D¿åŠ€~÷Ý-‘É~;iŒ…V²_/] u—Ú˜{pag¿m“¹þ‘ýêpi¨Eö»÷u«±T ö»oî3ö«£¥Ù¯v»BÇLö»onûEö[šØoÝæìWW–`„¿{÷4LæêôWûÙ* èo¹tðßr§Á˵ƒÿî›»Ëuþ«¾'ïÿ•a‰4`ÝÆ¬3"Öa2–˜ºà}ÅJ$𾹩 𾹉 pûÍóJ \®¸ž§àÒ„œ—;ì¸îå¸ÜMà²pÝÏ ð¾º· p¹Ó ÀÚ–¶’$Àº›éíG\ç¸<à]&¶Q¹L¼¯n9pý5'À{Ÿ²$÷v\îpÝæ¸xßÜL“˜q’$ÀåªA€ë~N€ë¹8Vmo‘ï}}ŒçA€ë&#Àº›éK\®¸œ °¶M‘yE\ެýÚs Ô€€÷®ˆ àÒ†€€K{®çéX×>%÷Ö¶Ô"½xïèÊ‹˜M¶ `¾ó¬ŸM¦‚€µ1]¥ ÞW÷Ô-˜—^ðÇFGÀ|¸ë:Óë¡ `> "à}uË#"àý×۬組ý^AÀ{:æ“.X'›îú…kÏ´~( øã„œ÷GÈm ÞW÷¯. ¸>jPà]¶S¯oD0`¾j…×€Àü9 `mº#\² àÚ€ëÕ׿\_ àzG€?Žé˜]_AÀµq8þøUcÀ:%# Þb¬;µÅ…×»¼÷Äܸ €à#:ÖÆ´R) Xi8R@ðÇž‚?6:ÖÏNÆú‚k£\?šÍ÷†Pÿ6»‚„5{GŠHXÝB h?ð•ƒ$|eÆmAÂ*tAÂe°>Ÿá¶C$ÌA9‘0ï:‘ðÞ%oÃôHxWJ¬y kw$¤u$¼é…V…HX(ín½Ã|2áMñòÉvÁ„·ÃsÔÈ„ë6gÂRZ/á¼D&¼ž±F(¼iíxK¬êPX4k ~…µÍ…·ÕSh …·ÙVÞfÔ o‹L$V Àf–€ÂbY¿M(,ŸŸ D"Ö~YžN(L9©°üˆîHg#VÔ`ø§ ·M‡³k§Â«¬ˆ”T¸ub³‹y ËQ)Ó¥@…W¹ñ†í)©°:7#¸ Âí'7£3 ÂJ›7*¬+HRafºN“ ¸ez©°Ì¤ D€ —»*,ð–&c¤Â¼ ßBáUþcÉN(¼JÔ6dr@Âëá¾cDÂ2Ȳypyh@ÂëáöDÂëî¦DÂíûÙºú±¦L$¼vó±˜q k[† ëxÉâ@„W9QÆ€D¸mÛó³"¬¯ü‘¥ «¬´’l€¯ÝÖk NI„u†@„ånfòra¹›™¼DX¤7G­DÂÚ–ClBa:Ù Ó×hX†»&ß^»ÃR(~€†×ÅMƒ^×ôÅ"Öq2"ˆhX™ÖiĺqW¬•kPgòyb[öŽ©ˆi2L@¬‡‘F\Äm¿6£~_GðaâdøùpÛkÍhEòáÞdâ³>¼.îC>¼ˆUä·|x•áᜀî×ö—ʇÛ~ׯYÕC4¬]L74¬~§ ë6¦AѰ,MS4¬›œéTDÚ5´I eGúɑzÙɰnàþÄ$Ãú¥v/‡*d¸ÜxágjË" ÃnËM.\ѹpÛ6O[*| Ëÿ¼õ–ƒ& w‡Ç¨A#Ö¶mS raVw‘ «ß›Ïäׯ…Õ“N¹¨ì\¸Ïsá\¸õÍ­ËN \XGÛÂr—\xÑhe Q'¸°îs:º ëÊÛ «Ã+URaUÃM9²V&â4ŠªÀ„aM$Œ¦ ,+ü-W3_ Lwyá¥?…¨V>Iû€Žf ,Wï;׌„5§O»la}jÛ 8¾?Â:—sŠï€°ðCzï‹>œ9<¦û8°\þçð£%Ö6«

õ‚‚å:<ÇAAÁòµÌj‰‚‚õì“Æì­©€à¶ß|f6JÁõI³)üqLÁrèÌj˜‚—5à ë¶]í=ò[kiÅ!P°ì¡³è À`ñ]—§ë9ÞwJ>ƒ—%³€?`påYucÁºåü: ÖûšƒuÌÇ|-ZfqÍdÁ(Ã& V#OÜC¬¤ž,¡(,XOjs¤ë,X“ÎŒ'(0X•ßSÌ ^Ô#˜\4XwÀÅÝ Á ì)4X?E$×[ÌÄäƒyf›ž0øãݵ°ý{að0xnÿþ7à6²ÞF˜Á‡EÄêYÅ"bñbd’`M’ÎÐ>’oò’}‡dÅ#BüÆXoñˆØ–¾.ZÖÚP<"68SÒ$¢[Žñ9ðÖÓš&·ÇeÏ^ðP8°¢DbÖW8p»AWX6“¡ÌdËäÀ“Éå‰I ‰ãóT×:nÍÕ2*ˆWÙÍÆ Ž˜<‘¸ý¦¥?“h LZH ¼žž”A ¼öø_ Ø#ÖôèÎöïnsKÜMéS0I,_ŒÕÜÖÌ÷Ô–;’îèúA'¤ ŒŸt ,jiŒx™—ñ9û À—:l ðZ °Ÿ%!p¹€À¶…·îF½Ò×ný¹kr€wå²DÑ:ðîf¼É€g“V l¡…óîW l&Ã…ëÅË|¥Àæø](p÷«þà¿»¼‡Sl þ[˜rá¿Ní €•K’ÓÀ2ŽYð/ï;ð¯2”–@…Ä¿lÄ¿»™FúÛnâdÊYÐ_þ$é/o>é/^ Âßž¬Š>ÂßÝCV üÝ<…¤À_¾ó„¿\)ðWY{Mü«9jɈ[/èZà_®ÿ¶v`Gâ_>âßÝÖ þ-Çþåƒ#ÞÔK…ÚŠ˜MX©€æ‹ï¸?àÌ̧¬³Ìò€•ù˜S}PàUFóYCN ¬ÙgVÿƒ×ý@ùô@Û6³j/xK[ú ̧G Ìwdþ¹Yk Ì7X©0+˜xÝÜž øs[Ž]Ëy×ý—µ+€àzž‚Ëc^{HÀ˜×S1\–$‚µžg†JÁkñI‘¬ƒàr–Á¥1ënšÀ$¸¬è—§\®0XÇ3 *hpé6Àƒëo><¸Üð`­~† Š8¸|€ƒiLG\F¨ÀÁ¥_®¿é8xÝ<ÈŠ8¸tzÀÁå ®çâ8¸| —{ì8¸4KààòHƒK ¬ýÌÆ 8˜¹£ÄÁelƒÍ%…Hx•Ì 4Å7øF|°B:Cð@ß`åц©bñ ><Û³ø‡$¼{jñ Æ»0á6ä>c²0aÌÙ Æx©0a´Ó„Ñ&Œw»0a|‡‚ /ž;W˜pm»Ã@LXÐá:ãì…å8r„tƒTXù‹Ñš Vð侤³g*¬ï5È.L˜b82áY‚·•ˆ„å¿´$fž”˜eG§ßOúÇ0Hxò(K"á"ú߼KËfIr­9¯_ñ ¥AåÙ÷ ‡3è4ˆ†nèœ Q©R ÊBúÿ´-_w[툪ÈHåw„ "Âs¿ûæÛÝüñeËÈ„c>H <™°$Tæ¯&,©Y|A}%&¼Èßä)“dÂZ  y¨0äB`ÂjÙs‹L¸¨«^L˜ú" aý÷æ¥ $Ì+p \Ô;Â\m£ô6°ŽÛR \®X¸çXXÔVãd70v,,=Í9ŒÒ‰…¥”:rKXXaH:x ÇŸ×5]< K¢–;ú’ôœ'VDýshj Ëî|Jóa)bÄ¿N†[”•þ¬ úÕõè¦ADúÎÌØ Öù¶Quh˜Â¢á¢x®×ùBÃõÎ ëêçôÜ.g–Ê+ξ`ÖÕÇЇI áè4w†DõÍѰ禥ÐpQ± —' 4\Û Kèµç&£anÉ"hƒšÛ°£aõ=z ›€"Ú2ë‡hXŸË:  0¿H áÒ¿(æW4\ž4ÂìÏ@Ãå».zA áÒËöwÏd&¢ázËPË»hÔÚ$f¥ƒ¢ŽK?Ó‡aÞ5Â¥ a5ÔS˜H0_5”ŽR#ÌW0™Ð·­# FRº¢V ;DE#,Bô‘á]ov8ÎP#¬Ç;C Ë'gÔz-*aÔ€.*a¨+)Þ•›8¼º©ÖörnŽR)ÇÍ#DJaˆ‹R¸\<”Âl£R…¯‹R¸¥p9JaÞYU ã8(…!ã+Jáò›P Çq{:ØP)\îÁ•Â¥‰JáuMË$*…!©,Já]£ýØR¥R¸Hw¡†p¶(…£ÍL­)Þ¥Š[s” —ã .ª_ˆ…©§ZøÐà76û©+Í à·Z˜SÕÂ2»†¥,ŪXEcªXx×ÌÙW¢U, z W2ÔÂñ¤ÓÊü©žÓý½ª…«@jáxÓP&Tµð.½ú:€*ÔŸraÜäÂåùP.\„ÆU.ÌFÈ…«œ¶†y$õ±†clíË´§`ØzÜS0lî£U0\žõq†)O†`xóDó*.:k †ËÏVÁðžÖîU0\\çRö Ápyp ož‡_ÃñTst¡`xóLü*–‰Úع(†y'®.=€ŠaqŽøªbXÊ se¡b¸t*†ËõP1\ÞIU ‹ó­'*†ëA2\/’ᢛ§d¸þ¥´\tµÜIx”–‹@j¤ ƒ/“û!?JË­i›NB¬ã"8Dº”–sWZ0bñÀ#ë†}ki¹Ku,;7"Ž<Òúœ¥å¸~²´Ü C[––ÛabÉÒr,ÊXZNùt¹Ždi¹“¦¼¨-'oTò¢¶\|¶¹€fi99¤fÑ––SŽ×K IJr;<1YVn‡û軬œÿ\L°®Ü¿WÖ•SBCŠ‹XWnƒw"ëÊæU˪rqk®¢XU–õ¥ªœìͶáIªr|kU¹˜êÆU«Ê½,÷§•°BYÛ³(Uåâ'0@U¹øFmƒ¤V•;,˽V•ójtij&ªaå^ªÊi}}Çùô ¯âÅç–q)šW«Êá8T•{”ŽËºÞŸb‹ª¿¡7 Vý:˃¯eåæ‘2Í¢rW<«óé*<)&Ï ”“)ÏÈO)åTzsã+ÊéCÉÜrV”»õΆ‘%åâ¸{˜ò°¢\\þ•BA†Ûx²fMç‹ï3Žø©Õ”›ã|qÇñOîµ?þéõÇèÈí„ñ¼þøï¿ûã÷ðfÕìú€yùŠùËó]ûžT7ø³ßöo}ÒYÞrý^û_ÖìŸpç4Îßþø©g_ô-ŒwýþËçÞ{èïÿþ‹æÏ¶|Îèç×?÷쯇Þ;þë/_º‚×oþôW®$m1ˆèl+kbŠ˜ †±˜tQÿðãÛ¦æ÷süÿþñãÏÿüw1JÿýÇ¿|üøO¿ûÇ¿õLk„ïË)|\á7ŸiûŽ35Ÿçs»b0Ÿ¿ýLG=Ó§Œ•9Dÿææ=*êXéwzOnWÿYÓƒ€_÷²}\…µ}Òdu¨®÷kÕZ¯ÆÛ>éj„àeõ¾|ér¼ñs»§l×Fpõ)=¥=…Wñ©=¥}Ÿì)¸šÏí)íjJOáå|©§üíqú¸T âŠsG Óª#|ÆZJ—û¦cŠ}Ò¿x6àY&i ¦;ݱxœåîgY~ÝiÖ³)ãYÇQ_?Ëö+Ïï_ \AÃõ¯œæ¨§ù§8Eêbëc]w½–c‰uàãVô?í ÿúÓ¿ýüóþëÏøË_þò‡Ÿ?ÿçl¿j*ýø¦ôG a±–š¦ýj5£—æýMöèU9Ó£Qâ8r=ß×#½3ïÅõ-R¹šTð^¥ðR¿_kÁÖhåçøÙQà:EJF%¡IËÀyðRLärþp£øo4jS¢¯þ£ñHçäh<Ýos¢Ø﹯þÕèÈ龬NŠ6—Ǩ2 ò¶ô:œã÷Ô¹{¢°,¿Ï¡#œU|=-ò$6ì­ñÞG¢ðùÙµ–£qñŒHí8 R« ²ôDáMQCœ6O›çDÂß-÷^Ýà(¼Í‚Wï&Eš}FkŒ²ç «w kÁh<=-‰ŒÑ(wÆ‘'¬~8|bgˆ·U~ÚöGOˆõè]™†á­Ñ2#ù{m«¼÷dzGÛâÂ7êŽ6iöŽ\Ô²ó¼¹òÚ­ñNQ#M`gé&\Zî~¾³4)j„ð'OŠËMŠ/!ÆsŠ¥I¥­Ñå妿–÷éËã=E¿éLz^&IÑSÔ—:˜Ò,}QŠg½|]KK%í¢F:ÓÎqc¦1‡L©5^nƒjµì£qs•y|Iýß•m=ôŒÚY)ÔZÖ¸ æ–¢hèg厤žqÂ(¨FØÞ(šTç} ,­ÜÎa•ðf6EŠaaŒÆ³|ô1œ•ñšQ7Ô?­ñƒq8œM[ãB=Œ•˜ŽFNë}xncü0‘˜•Tš©àæstSùáŒÁ8.ÈŠ‘·FSÅh)6ѨLÞ‡ç¶Õâ£ñt]Ìîeuç¶Ôƒ±ÄF×È-ŒÆÛ”1Ð%Å›žL?ÁÖø0¢íƒ±TAiÕ³‹c´åÙÅê>ñ,{ž[ÁØñº¶ÕÅ1»ºPŽ‹’`Ž¡xÖGœ–»j4yŒªLÒ0Ѩ¡Þ%úºFѨh<\ ³¡ ß“Þ:Æb!Ô5-›£ÿ¹DFûcÿ«5Þc,ž7¯÷K$³[êÁbXc±$nf’2Ë.aŒÅÚ%ñÁ8‚¥y ÆÚ|É5CkÌM]‰ùN̪ß:Fã¹Õ:nÖs«=zò†Òt³Ê Ñx–êcTljÆÃ6æVö2g‡&ù=Yqž EûIÑ̲Ù·_¶© Í©¼ÆßïÜTÐ$³O9ÇSn*H %»Þ÷6Æ|̶«0kL[yo<ª1w„Ô³@J4.¶«ð8rµ]IÕ,ìŠGa» sû™®ä‰ÆÝv(rk¹­ I8Ý£çã°]…ÇÅž¾­‘X4^¾æ„vNæîÜVÀˆKvnÃè<þÍâc*ø´ÕãRšø<â8‹¥œ;,Š;W‹çÜ,>ž%fÖ­Ñä4wÜû(I»ÅǺZuÔ>Ÿ‡ j¸‹q‚ÅǺÚyH`£ñrIMôÍÓæˆhÌø˜ª¼h¼MTÓŠþåÈqM¦ª¹õaïc>»f‹¥ó²½³ùZ,>ž[Œ16Õ˜ñ±¶£b$“ݵº²FúüQwNB‹)‹ÆÝÅ5ÜX›¯ÃâcyÙLx.¯¬®5f€,©ñ>¬¤£ñr ¶äâµY|¬Ýô´Hˆ(ajò,Â(Œÿ<[€\vzÕ˜rÌ{šaGãb²t¼×–ßx¬Ø6—0Ad4m Kå ï1[Ý»ël$]½3½w u­™õ‡+m.)åz1ªh<-@–°{µá¾\k#kž‘­àÊä²}¬ÆÛ¤ŒG&«G <™Þ&®m¸–GÓlñ1µKѸ¸âÆ…ŽÑ¶Zx¬+Í,wÕ¤uÉ ôo­1Ãã&_¹ìѸStsgJk4#—§³H\岯) ¢ñ²Y]ÕÖÞj¼\Øè½`Ñöe9&³Q á½3V¾¨EÓÁ“ã_Ë/1êÍ.oÜû¿f|Lme4®.º‡lSY»«m(¨Yšlß4 «­ßÔµÍa¹™‹¢èÛ4 <çåjªŠÅüc.]Ik–1K€:¼c¥MØÆåj˸¸^ª)¶á´¨ªEÂq¤å˜.Z%žÖyçLyÆÝ"ãÇÅî”Û¼ÿõ°˜š¯h<]h£Ì…\hÇf!qƒ™ã¶(YÈ…6&yk‰N¬F»Ð¯(ÖhKteyq‹Jê¥Ð¢¶h\-&– Ǫÿ¨‘B›®Fãf1qÑK-2 w¡£é#£ñp¡ å2‹,ò]Enbëh¼(´¹³FXk4¡M| ר›· m¨üYd_fð–IW³èçbðm ØË¶¸ÒÆ•ž­Í„6e4ÜVÚœ^]87Ú”¡rÛ]h£€-×aËv¸Ð&¾…»gW“élêq§ël$è˲ܱ²°ˆ¸è´¸ £3š-:µEÄõ#ß_kÞ%ÿ¿/ç²Whù©–G+e÷=®>w?77,(8úؼÔWÁÑ*­íù¼ÀÑÇÊšTÀÑG«ä“Þ­ÀÑÇÂòYÀÑÑx¬¹Œ'Ž>žŽv&ÿ§Š£_ºû±Ž>fÖãqmû:’+ ŽÖ"²ŸþOGúË<¾âèC9ûˆ ˆ£Õ¸ÙZ8úÐLI¥€£••0t¥‚£w-Œ»€G«ñ>ÎŽ€£•qØ"8:Ï,ZSp´Vç“-rÞH¥çt*­sn¶6•n¼ÐÖN¥w¥Ñå² Tº>Pi]kVÓ)Tº>`é½yó=àÒºóàÒÚ¶E] –Ö¯:z–ÖYȽ`ieÀeŽoÁÒ»Fö+A°´ŽÜmƒ XZGŽ‚7ƒJ—[•Þ÷L .L:š,ä)LºçDZ‘ùÚ…H·´À ÍI¤õ£Y©iý—Yx é]¹Ï¹ø$‘–iq–“)Dz߸Ó"ý8Òˆt=Љô®ÄÛ;‘®7"½ë«Îe"‰t{X™ C"½+§5MaH¤äDºˆH¤udV)HZGúþ9´n%é ’VêhÖñ(HZ·’nÄ/$­Ò@£ é½í: Ë"é}õœù‚¤ud&â$­#ÓM¹ i h|7 HzWt>2DÒ¬«XôãHGÒ{Ë´ÉMY é}åt4V¥Hz×`˜ki é}õTþ‚¤[jpÚâIëH+A$­#}ï Hzo™Õ¹$½·--ƒÙޤë#’Ö‘[_eH?Žs ­ÁÍæðèÇ£s ­ç\žv ]ï@:ŽÁN'€tíÒõ HëÞGá™B¤w… û•$Òºs®'‘ÞE¿²l+‰ôãH'Ò{Ö ­;‘®ë@Z§´­PçÑåqƒGë°øÄÖ3O›‹¤Uè4¢Üé}œóh÷VEë˜}”–%Š.O›(úq Ž¢yë$Ñûâf…Dë@ÓPD+õß6VI¢yó/-Ÿmá’C×ë‡~é8ZGšô‰8Z¡Ç™zRéz Ò»ØÀ2²ØH¥uA¦9 •Ö‘þ¼A¥k•ÞÛMwïìB¥Õx'"bé½m Ÿ_ÄÒãKï*"*˜š5䉩ÞÒk‘ :®uZ³ÆIµ.TvJ-¸ïo£ BêMpÕÄŸ€Ô›V«qh‡Ô4-:îoó@j¹•ùN u4î§©ô©uA®T¤Žy÷ö @jMW›¡±¤´í?Bêí€:…Zh6ˆRËÍŒ*©ÕiöQÜ»@juÿô /:Þ¯{£R³ÖWÔºÚÍh õ¶C°AH]sC©·Ñùø R«mËIZ7²ºÙ!5‹H½)Ï@z“S†¾„Ô›|­ŒµRëb]Õ-7H©õ‰L–qJ­ @5!úWJ­Ðë22J]ªt’RëiÅÔ2FWPêU!óP}tL½‰ƒ˜œZ…Ô9eàTÍêôUGãr]¦dvT-£Ê+%€DÕºÚ;%8DÕ¬yVPµVRÎø€ªY™¹ êxw&»T³¼{Õ5  Z¾P­,½z ¨.5h ª›ñg¦k€SëÞ|ÇœZ ÁiM‚ N­u¹ÉÉ©U¢Ð6©WÅR«H´ç«8¥Ö Û (õÚ ð5&zPj½ÕË@©Õ¡|›”Z­ç‚R—¬¤Ôz®¹=F­g³¥ÖŒZ©u±¤ó'(µæèÝÆjPjåÚB”˜ºÔ¯%¦®‰oL­\DƒRË×Öð¤Ô, _(5ë J-·s£4ÄÔõ©K UbêR¤ƒ˜Z!Ók¢z_cjZ¬LÍ ‘…SG|vy6ƒƒjÙS9ârP­2—N¤ªK-]‚êE3ä<Þ1QµºNI}”'®ÖÛtÀ^­ÏðØyõÒ|t킜W7߯­{v`­Ž>§Î…ÀZvËé°\€µžlZ`­Æmø§wd­ß³L"kÖÐ,Èš¥ ²^Dl†{BAÖ*^i¹DÖòŒŽ9b¨3¬— ú:"k »W¦˜YëjÏÕ»#kõåø {¾‘µêUî™ Hd­[±ü ë8ð¶< k–l-ȺÙÉÆyY×®d­ku­;µ>JGn@Ö Q§7!‘5kÚd]Òš‰¬µP3Ã;"ëRìˆÈšU/ ³®Oèͬ㿙–Œ„ɬQˬ kVÅ+Èšåí ²Öë°œ%"ëæ˜‘5Ë<d]ŠDY«j„'AY—êIDÖûtd]_¤#kõå}2ÅðŒ26ñ4V§ÒVÆ&NCŒdÍ2Y$Öª¨åØš±Ö§´]y“ Ö¬§Vˆ5‹dbÍJ˜…X«´ äÖN¬YÅ´ë2É’X³tÈ‹X‹šä²ý{‰õ4ˆõÿý/ÁÕ.™}Ú|Èî*6Üô/6 rc‰«·–ü—ΩÀÕñ¬WëEÄÕª€cóe±ù@ œjó!q+Um>ú,Ul>VcÃÆ.$kÕå#zà‚I«pùx6f0¾ÉãÚp Õ[3½M­põ6™ÉQ¡Õµ ´:Æê¦-×›œ‡¶¾x|zHøj¢w !=>ÊJ½›|Ę?]¹¾¬V)†Í’XiòQÖÄ€Õk+‘3bqÀêJ?«c¤¾\à X]¡`5×Ë@ÕÂz,Uknpí/Pu#~¥Tݪ/$ÈsTmz4cÄQµÚ&£õ@ÕÅWëZgËš°®ÀZxà5~ °~4X+árÊOÀZW;ïI¬+ư®T‰ÀºéÀšµ°:°Ö!®Ò°Ö![FCÖõJ¬ë X×®`­#£wa$€õ£Ñµn%ëœ`­$ÖÓc‘`]»kÖxnÖšmÀZ_–IùWë0WÕƒW×·^]©xu}™àÕõ}WëZ/Ëe¯V#ŒIœWëe^–! ^­okKW«¼)ïÁ«õ±¯£(wåÕñ³»mͽyµ~p·Lnòê2z€W×/Àºž ÀZžTK†éX× °.wH`]/Àº0KëzµÖ…-Xןu`}¨Ûe~€u³ ¬Ÿ4Û€µJqÛŽ&µJ¨›aAÖüÙ¬y±Xã…X‡1ëz$€uéÌÖ*÷’y¡X·e­±ÙéùÈÚï½"kL;DÖâl–Bd]¾ÕЬy‘`ÖåöȬ+`wf]Ÿ7™u½Z@ëÒuH­Uâ8;­ µ. j­g”j¯ÔºôªJ­±_Aj]^©uy²¤ÖõgwÚáñj[?Á­UÂlßÈ­£ñȤê­Å-ňܺ4vn]nŸàZűGµ£ ®KPGp]W‚ky$É(àZeL‚Ap­‚–%p­#³páÖúïÜò%·J ±µ*–Ú=°õ3Xf±uL–EV°õ®áe¤µ[ó µ.m€Ö¸Å¬9t’Y—‚̺ŒdÖe°+ÌúÙxùˆìŸ™uÙðìÌZN¯¿ƒVóµ“V—gX]_`ué…Vs[’°ºlåV×gX]_auyI€ÕŸuX]Æ0ÀjµÅü5¶+«9ˆV—EauÞ«k‡"¬æ`LX­`éw„Õ‚¬–"HX­ÍǬvU`µ~vÉ¢ô„ÕêæQKXý8§Ãj5n­Cµ¶7«®¬º¾)ÀjýÞb ÛYu½°êúØÀªΪË×T]ÛœTëGNÂèÉGbß`'©Æ.19uý"S—g혺ž˜º¾]`êú£+cc6‰©ËÜGL­S.›S×÷L­®sõâT„ÔgãºÎR€ÔµºÌ6û\ƒ“ïsý˜MUýË|¨§´Š}j9;[‘%j¦‚>µgêèKÑ ú üÔdÐÔ+BPSKZõúCÈCÜZõʪ`P£Wñ¡Ž®>FeêƒNj$Ôµ„zƒR!Ôµ„µè+¡æ]D¶\õ Bêª ¦¢ZÑ ¼Q ïÇü; Õ‹~ÕÔßoAµ²»Ì.“‚j¥Ëd.MTÇ'³§þ ªcÞä¥TTi#ÕñIœs.˨¨.[ TT7XcžÚPT«Vñ–VTT!&Õ1)ÆÅŽà˜ºlD@P-\g¶0T—RP]´¤Ô·äÆ©µ¢¬Z—Ü{,²j9*šd†”Z–æÔ J-(9å~g¡ÔŠ'LâOJ­’)ì(”Z’;³ÛI©Uâ-õ"…R1æ›R—ÍR꘦SöR?-/'#¥®G‚R—‡JJ-ÓéL“PêhtoRê"Ä%¥–w¼™‘’R )™)µì=Í”Z6Ã`jE›ö S«à’IóÄÔeŒ˜º\1uÑS«Nó×(uÑø’RGs‹Rê"«$¥ŽF·"¥•6KsRêzä›R—®ªêcË-çB©UX|2ÃjPê¶¼¥L ¥VM3Nª²j*¹A©µÎË2Y5A©ËÎ))õ¥ Ù‘“DH]¶O ©ãÀÛoŠªÚ{uüƲé]DÕñã©ñ$£.ßu8ƒQó‰¨ã@w•"¢–’7Cɇ¦š D]¢n%AÏyQ×;¢. Q—AD­2™ç—Õe+šxZŸVJé ž®GO˦7µIO—½­*©V—ªiàézNàézµÀÓT¡N×Ëq8]Ú§åP›1Ùt¹ ²é2¼‘MÇU»¥2ÙtÙø#›.Ïœlš{ŸDÓåF@¦ëA¦ñPÉ¥‹þ\º¼©Î¥ËÃ&—.j ri ƒ\нPÃÏŒ\º5§$–`šûâL×_™®GMŸšý×4”›>µ=‘êU²i>‚¦ëÏžTqøF}aÓõ!\Tq°ñ¦Šç|³éú¯3UÞs ¡.'#¢.•ˆºÜ u=r§Šã0«Â‚¨©9)ˆºþìIö*i‡¸Ú›*WŽQ—ë¢.?JD]Ê8\ãQ5•Q—G@DM!KAÔÔ]D]>."êú³e¼•›2Þ 5·÷£.×IF]^25õ/Rן¤®—²QÆá  ©ËDJ]Ï L]ž*1õ³ñ«y‡TS]EPMýIu¹V’êòT—çSQ5ºQumª®Týl¼!¯ÃmV׫½(ãp5SÕåHÂêÒÓ÷¹öÇï£Õ¿ÖDæ-_+’»«S-f9 fMKÕ¬[ÑÔ˱ôäþOæTQJ'ªPÙ<öC ²¦§Ä£t¢ûCd-ÓÔ¾9ñÖnXþ €5é µ†ªuìç³nbY‰ƒW×€‹u‹R‚u9Дº‰ES[ë&BHSë&înßW 'N^ËŠ¢ê9‡¤?U`?!~ÆÜU/1ÙËvQuŒÈ±0 ‚Àúf©"ë[§6á4€µ2ŸÏt8%°¾YäˆÀºÔµë;®ÎdãÖL(ÀZ‹£î© X­õ¦-«ÏVig,«UyÀËC:­VÄ ogÕú}Ó½’UŸ,}C­„MkKV}°HYu+uðEõãœ`Õj UIŠÈÁ¢B´ÑvijȪ·¥ ŠÈòÝD›¡æL EqŠÈΪ@Õ$îaìÍ =îvd7Wݲ‹ŠˆÖì™W @vÔ@) Ìpy€Ð®ÄIu÷1§ @v ¡H´­W:ãÃd‹^m¾À4)?Jb¹C-º˜ÙØÒdÓœ“E iÒVC)£Ëõz$ @T˜ÃŒ§€ªKîP1‘m°ùêUW;—nR~¨ZV˜¦€¬*®cð ±Ø±’ÅD‰ãæo T]·eªµmo+½CUk¢Áb|È@Õz­V|„¨ºì†“U?ŽtV­ÆQE¤ºlõ:©ÖÃ±ï  ºÚÜTˬéºÌÄAu½N€êêR]wžAªëf7Hõ&×îUJN]} À©Ýu ŒZ?x™áµ's±˜^gÙ·Ìù¢Þ´·ÊÎÀÔÕ1 ˜úq¤cj5Ús¥Ö>½•E!¥®”ZGJ™ÝÇLPj]ìi…ZA©?ë”úñ³Î©«3’sêz sjù4ÝV¨ ºš8T¡ñ†Óa•SªuälæxÕŸuPýhtR­%ð‘ΗDÕ :©åà]s@˜XÝ–ÝV÷ ««`u½NÀju/ X] ·««F°ZÝc½¬¾¡ÃêÇ9¬~4:­~ܧÓêÇ9V«Ñ ‘V?îó†=žZ]ÎHZ]”0¤ÕZÈ/nð±Ò$ïx÷ÞWãF“¼Éª ‘V{0ÒêhŒ¯.å« ÕºÚËŒQA«?ë´ºÅKæ} Z]´B¤Õº žGA´­Ö¿Z%'Òj]f,åFÑ2ÐêÇï9­. "Òêz Õ&éSíjpÐêÇÕ:­ÖÝŸw–Ü­~\­ÓêG£ÓêÇ9Vëj·˜VßÚ Ðê"”­ÖqwJ I«u`ôœ¾Ò'­®Ï´úñ³N«õ³±F ÕE,FZ]û2hu±ô#­Ô´Š^¤ÕŸuZýxN«£ñeiÊrŠõ‰ƒV×­®7Z­›¸—TƃV×;­–kßu\YˆÑiõãœN«u‡â¡£î¡Ój5iéJZý…F‹‘ãj­üiu}ªN«‹b´ºÞ%hõãGV?~ÕiõãH§Õp=%«~ü¨³êâ£HV­Æ˜•zjPõã@GÕÅh•¨ZG®ægT]ŸÝ>×nü}æ?µ’Šñ9É^ºYNoÇÚþø§×cúÕõ¼þøï¿ûã÷@ñx¶÷ÜË×Ü_¼˜ï"ñ¿ð¤ºÁŸý¶ó“*tÚûÍŽ¿­ì~Â5DÈž—ðþ˧^A¬ÓÛ°ü~åýoŸû^ÏþÝúßTƒ>æ¨y¿¾»`é/~ý^ùÜ+x?ûþ¼ÿö¥kxýêOåZ®íUÛn×ìô¡Ù*"…Íç˜ã²þáÇ·õÐïçøÿ-.ýãÇŸ?þùïbäþûùøñŸ~÷?~ë™Î¶;"–þÍgÚ¾ãL·*ym1³Ÿëüíg:ê™>eìÌ!û7ï6ïQúsÇN¿ÓÕâ¥O›.Žy›¦«°¶Oš¼Že$?Ô«ñ¶OºšX Nûr¨zÙór¼ñ“»çìÕ€?evÓÙÙSxŸÚSÚ÷Éž‚«ùÜžÒ®¦ô^ΗzÊ7ŒÓ¼ŠÇUSOÜÓrÞ«¼RŸÃôô±¿Fé_>à,“vˆ¦{㟟g¹ûY–_wšõ”ùâÏ:ŽúúY¶_y–xÿZ[hwvý+§9êiþÆ)þ|ýÇþª©ôãÛ’HcÓÖD—ÕX9îÇ@_Š_®SDóÞÙ±nŽþÛÑèJrî®RqôÌǺÞS#¬Ü´cm*޽ë?hD¼BÅQ,jW©8^¸qiŽ—“íb®Òr Þ¶ˆ¢ÚIËÑœæ8‡É²Wi9.˜e_šûç«´ñ`^ËþyA­®uöÝZ]À–IYë¬ÝÚí½KßÌhTI8§›äë¬"Ãû›ðèõÙfИ]$ÙòËÖY;3û‹¶ì÷ )x|ë¹3³ßÒ´ÕxÇóúØ#PÜ­Îé*EG»ôTKîV®’tDz €wÛ/Ùöë2'Û¥ ÛÖQ€$ú誥¸_×ç–Â6ýû`®ñ ",Ï òusÝÞ3óµ£PÔºlÉò÷KZš,¸.ª§²µÞ/S¡ð †«¬L;ŒÜ¯I5_ä"¯¤íÕ›Õ8[þÔºNÙÏUÐWY^Gçi½y?Oö,yIwT·ŸG«ó»Lï)Óôx"­7ïÚ©‹1¡§µªq0àýÜPts]UâiõæýTáÑ䪫|È;ÄRãfæò«*Q¼²ÿtNµ9}A×Uõ:mSf{­2ñ|1'ߨ¨qlŠî‡´ðé¼Ê#žgëÍ­Ñ*­Òœuv¨Æ5}ŽW¥OtÉÙ~ÈÒ?ë+¯J|CéýØQEEmWt Wg>6ì=¯2ÝFp·|¤™õÚ°øöîÌÚß6ÌÛ×àñÏÛÖ­U>xweµù ´™lq?f$ ®RïYž{¹Jªóf˜Ñ+‘x¸J§Ô+)ïr¼õ¡Nò§NÇÕèBo5^×{`V£ç»¬›é’ö][ ÇÑëaÅã”4õÝ•ÕhÎ71Ô禞g«ÊQί @uåý‚¥ÐÚŠ ¾f5ºc™æãøBß]ù˜+ŸgRÕ}—T_^U¥zyÌz´^µdU)óŽëÛÏf‚ÚâËï=yBág›AjôÂNë®MÆ>.k‚6©óºÛ&£nÓ¥”1_h|uei?Ì&ëzL3ªÆ±i×N¹¥ð~½"v»û˜,9Úiù%µá1Ÿ£ BDÛ1õ19qüõ2®ͯ¼5Þ=XÖϺãú±HW¯<<Ò&D"p½ÎÜæQc¼¶Ž.)ìú˜¼ w.SØ©q6AÜzÝŠªÞ¹mútùf\nMïRKš¸gní¡²Ž›÷u_ν¤äAg¼Íf\w•÷&GL‘N #©¦Qãjº†ˆ4™¾{²¦EÛA¶ÛN:r6]CÌì1zOÞõf»BKmcGSž&¦‰x;>‰>"Khƒõ¶­òv=¶ÿ<¥÷ˆ¬­=ÛyŒÁ*5»B‚±{ü°M±J‘²„†‚öJCÜ£ÆÉ* n±¾“^íùïkªÆö&Íl‚Xâ«´NïÊ‚Sõ7›«Ï##b둲žéÐØm±R›öúÍè5­°Ó«1µ'KkR4-чäy5Rº÷YòΚájZ£Ú[7Ñþ'ëmØ®õ6Ï)bSãf…ßblRA–Þ•UŽ6·Ñ·yÍÍÞÚ;Ôx8yS•ÛŒ°ãREÐM¹+mÕfí]Y¢õe¬ú6­†dåy£éÉu`Œ›ýÖcI7 Yv:™õ­¶.ÀÒOf±A‚Láz7ŽÉhÍÊ3›¤#FÖ…ZòÖŠñŒñ¸þ¬ÉEÛmX¨Æ{ÄÈz,Yg ~>åzž“IsâYüpŒY2³ÒØZÁ1 kYvùꦙ"KanÐÛjŠ05ÎæY¨Æk„ȲÓ1ÔMz"KˆaVˆ›hψ‘UÁL·æ¸ÜGd%£[Àº5½G‘åíѬ=úC0½G;§Ènºµ#ëNÌ}w“ÜcÄÈÒ ˜QÁÖDcDŽ¡Ä’ã·¦"#ò¤NÝWj»Z?gÊš[Õ§ìÈÇ‘ÐöúPzGV©ùtéÙZºÃc¼7»¥­ÕuÝF_ÞÍøFmCzÒ:YÔl-3g ÉqCèuzµIì1ûl’ÑMbÙ†dŒ×{Œ ¹Ì×j¼’7³ÎÚOŽ ¹N àî#H~4®©òmS«q¯MXs³!y7iЇF?{˜èX۾爓µvnµªßãµjÂdœÌ+>îÖÇä9)̦unÆÉÊ{©ñq²f4SÏn{¦OµPÑìå6­93NÖ'‘‹pîD(GȨGtâ挓•Ë— 2m_ãIÝêû2I{Yž¿•@J^­d€õ½Z|nÆ_,„¼Z@ÅR.À«Ie¯aB^í9«ª¼:Sª’WkUm!yõ1¡|yµVýf¾F^­qÛ2 ¼z¿0!’Wk‘nˆ¸ZÔÁæâê–‚’ vÄÕJ͵ZÄÕ»ÊIÍCJ\­UºYWïZ”¾þÙ@µ6%.­®–ó ªÕ«NCÜ ª•$|;ÙqPýhtP­ÆÝòNªK=ûΩ•u¿ž<Ý9µ·3ë|ƒS· Q[†´àÔjœf[™§–9群úœúKƒS«qº3¢§VŽô=§šœZñ ²ã˜Zm“U¦nÓôl`ê–#ÃùxŽ©[ެx¦VãÓzŸ™€©ÕãZ.‰S+ÛõŒÉg`JÇÔjŒ°*‰‘cj5îVq˜º&ÑS·ÔÛ#À©=/×µþýð.gÔ5‡Œºå³YŒº¦ÉvÅ^I¤n{VǤ®ù¨€ÔjŒ…sò0ƒÔ-³vOso@êšÏ HÝRV#*Khnº¥¬ZQ@@êšÏ HÝROmHÝcUß‘ 3êš“ FýØ$uF]ó#Á¨[’èb£œ3j½d³Ô¢n½Ãé3ê–Dí;kΨ[︲ü uëWsz|€QW£gÔ7팺&XƒQ·Æ)=ÞÁ¨[ÏÛr™Ñµþ}Ї7v¢HÜ™î HýèX©[_^lŠ3HÝ2¬#߇çäÄ÷úWƒÓö¯—§w1‡TúÑ?K×g`éÇÏ:–nß„CKÇÒ­÷:{t0ýè N¦k3ÈtËEÖ`Ó7ÍL·ßEÆ^«“éš‹ 2­$NMÑcpw2ÝãÑ'›72]€L·¶Ëâ'Ó5çdºæÿ‚L·œÛÅö.^Âå’ø 2Ý}ßò‚5ĪW5@@¦[ά~¬oö™.yÁÓ5û`ºæ¤L· @CvÓíH߇w0ýht4]ަk^%ÐtM¯š®Y‡@Ó5hº¦NM·t‰ÝÄ!ަk.ØtM¡›®ye`Ó5SËÙtMñ›®iS`Ó5Ÿlº¦Œt6]s{À¦Ÿ›nöÚk–ã›®y&Φ™þ6Ýúùµ¿J°é֯洎›nGÚÂlúù³·çÁÒêlºuô%iÀ¦k›.yç@ÓÏ_54ýlÜ<ÁIÔ@Ó-GvÎ S ég£¡éöù˜fpº¦ãN×ÏÇátKÙœöQ9 pºæsN×Ñü©éV{@Å·ûn˜“éÖèÞÉt;甜dº½’˜‡{ïw0Ý>\ÇKWëpéš« .]3‹KׂàÒÏéu-Ò¸˜š|Z2 )ß ù´ìïæüzɧK òéâSG>]—üÔŬŒ€ºT±' Ö&©\XºÖ€ZáÔb,€úÑx|½º 15»')u10¥.³0)µ0à~ŸKRêâ×MJ] u¦.µÄÔ¥”.1õ£q+FÞfò L­J·+»Rz]dbêR˜ºTý"¦ŽÆ·AÐ[k}³–ˆàÔ:òH·6rjä)Í<É©[Ðq¤Hœúq¤sêUQ{—ÑRËgïN¥ )õÚlsQ J½Jh½”Z×æÛ  ÔºœÕtÏ Ô2˜˜­¿:©.fž$Õòë\\í¤Z÷—?–ª Õ«<òŒ¾Iõ*·eT› ©Žcöx¶ãîAªeºì¦mvR½¶A:¡““j=ð5Õ,$Õk³ØÉ juߨ©®/¤ºX™’T¯5¥—Iµ=vÛåªVX~ªV÷0]¢j™F^rU¯ò’ÛRlT­.yç«^•—•«ÈªçtV½Ê'²–©ö>R#H¬ÅD#&M¶kh$Ö‹–x™WFb]ŸˆõãgXë,&5ub]ŸˆµÆ•-«‘X—¡•ÄZyÊRBX+ËÞ²*I¬ë±Ö£™³”3‰µ–q–Ob]Ÿ)ˆµNi»ò$ÖÑ+‹3õÖN¬ëK±ÖÛ5È«uœ¹³“W×Ç^­FSŸW7û‚Õ´óêxG¬ëÇ˯®Ï¼ZGšñ9yu}êàÕ‹´w™¶M^­sšÞ¼Z?k¬…¼ºŽLΫkW¯Ös¿3Ù“¼úq¤CëúÆ:´Öºgåqbë:`[—€ŒØZc’Õ£¸®:Àµº§UE ¸Ö)M›Ep]»Àu½}€k=ƒÕtó×uظ®;Àµ.M¾Ep­ÏÒ$/×õ³¸® Àµ~ÖTa$×µ÷]?nÅÑõãj]+Ø“8]×~t­_µl@¢ëúÆ€®õ™¦®±ký[Jé¾—]Oƒ]Ïñßÿp½kØ{{yÌZµ'«¡ˆYY•F ¬ƒQŒ@ôZ®%Àu±Ï'¸®BÖ§p×E±Ap­ˆæH„HpÍ}³®[m˜¬\kqu¤ àºl\—}0‚kqÔÙIºƒëR€™àºY&¸.µ’I®K1\ëR,¸k–‚}k¯\È5 X&¹fàB®YÕ¸k–I.äš±+¹Æs#¹~bm#×·ªìÎB®¹¹Hr­½¼Ù8ȵ¶B×p:ɵ64o£'TXsß­(¬‹CÖ4×- kÚH…5æ‹ÂšþþEaMsû¢°¦8¥(¬i7_Ö‹jq¥áTÖTö•õ"ñŽc¨¬‹ø*kÁQÛë*k'õªa¬­)±f"±.‚AH¬)j*ë" ¤ÄšE?ŠÄºþ,$Öõg!±fY´"±^Õ7Æ÷J‰õªZ19°PbMV‘X³ÎH‘Xõ&%ÖT nM5@ÑXËzd¤r‰5ví‹Âš‚–¢°.:ÖTü…uù”©°¦¨§(¬¹»9Öõ˜â!ŽQ…k*÷ŠÀšÒ ¬ËlO…5%VEhMÉcÑ[ÓÓ¼è­YL¡è­©ù*zkjÓŠÞº\-õÖed¥Þº\õÖõÈ­”aÀAoMÉ[Ñ[SÒTôÖõg¡·¦¯è­YÅ‚zëÒF½5‹À½u$CoMéc×[W™2ôÖõ÷ŠÞJdê­YuçÛõÖU§]ô֘Ȩ·†0ϯª·6½èCn“ŠØºÕÂɽÍ*¶†4¼Š­y$ÄÖeH®bkØoQl]fŠ­ëÏBl]/è`ù/ ±u=bëò¾ ¶æ¯±u•ClÍG[ÄÖ|@)¶®¿¸²|bë¬}U„Ö¬´E¡5µö©5ô•”Zׯ"µ†8í!µ.:l”¢z¹H­=”{H­} xH­ýK~h­½ÂÏCkÍŸý%Zk<¡ªµæ9‹Öšúî“5x+ÐZ3„¦Öºœ’ZërJj­Kg¥ÖÚGSZû?Bg2gEfÍà ²fE6Ь‹Êž"kŽ·Ed͹¡ˆ¬ëÏ–¢q>…‘u9’"ëò¨)².D‘uýYˆ¬ëÏBdÍ®ˆ¬±jyh¬}Ü,krŸ¢±®§„Æ«„"±¶ òê2ŽQ^]ò7(¯®GBcͤ±ÔX׃ ²fØPT֜ċʺ<謹^#®~Ü!tÖe–¢ÎZu0s»›¸ºÔÅ,:k¦¾W?Žt\] uWûª®SPuMS#5ÖåéPd]ÞVUYãs¤ÊºL TY—Ï*k&±•u9%UÖ%ƒ…*kæ+Ôï³þ˜McýËܪ¹î¼¶_Z†~Gw{*ˆºdT:¢–lö:¿ «ŽñìŽÙa@'àiáú)k£O/ígÒØ˜>Õ²zúš®ZÌÔé#ðô¬"±–£NŸêxßñÝ,Ó[ØEŸêCf‚0ðôÔ,R<­K·Ý®âS}0Ÿ>Õ͆Ö|:àS=éYæ³¥OõLäIŸê uç‹OµT•éCŸêbEMŸê¾žÕ¦ú8Ò]­ØT‹Eí1›êÝSâiS}»‡&Õ¢ï.œ†IµÌ7s•êæÙø5ê[‚\­Ò£ú¢I&<ªe¨í²zxTkÑëëfxTWÓlxTÇ‘î÷GêK‘`<¨¾|„GõµÒŒÕZÕ;—ƒGµŒºWSµÑ£Ze"Ó‚:ý©ã'Ý‹þÔ8ʽ©Ï¶Ø3Å´{S«¬ímš'˜SGcô´ôYpsêó‚[Í©[±\³W…9µ ô.YUœæÔ¢:F¿Ì©Ï]ÔÂtÏîN}ÊK#çiºS‹!yÎ Ü©ÏèÐSÆÜp§®îäp§>£_ºß3Ü©uÑ-R‚;õ)Äb 2p§ÖÅÞ–‰wêSÆ –Vwêú àN·‚Ô¸S?ÝZm5K¸S‹ÝMÆàN­êÎ[¯ÑZ?{»‡‰ÛS«F³Ï,°§>µ‡{§|öÔñUÁn öÔ¢~nïäöÔêÌ_µ§ŽÆõ4ÓØS‹úë‚;õ)Lk7Ý:>Èé4ã¸S×oöÔõk…=µF‡ˆHÇðèöÔqó1«¦­/ì©Õ™g÷q{jñÐ5õ5´§>%®0;ØS«0,±ÝŸZGz& ü©ë×êÚ%áO·2ÅkKñ²;T×^‹jiÅåiQ­f‡êÚwàP]GI8TŸ' (éP}ŠE™uª?kÕq`Œ½¹í‡êø‹›pÒ¡Zçw‡{8TkÔŽ¥ùé^Õ§˜t©®o6ÕŠ=®+Ý`S}Þð÷†Kµ&бÇÛ€Kuý~àR­1d¶­ ¸TëÈÃm>Ü¥úTñ¶_mnR­/d2;˜TŸšQ̇&Õ š¶ÙànR}J4<&ÕúÕL^§GµÚ¶ô×p‡êú¹Â¡ú”ÃT™aQ­°ï6C XTsB§Cuíp¨nÁä4V˜p¨>eØ‘BE:T뱘›'ªãÈÃû ªO)V_õ^îPîJ‡j]•"¡Cõ©Nyet¨ÖmžnØáÕÉLBK‡êSÞðYWˆÕ§Š=™ ªçt‡êÇ}ºCµzùbfûp¨®¡ªuNKt¤Cõ£Ñªõh-±ÕuäCµîÄrHéP­/ˬd›Cõ5¹×ú÷aê_i¢âL]-úTT«dCî\­UA&îPQ])ˆ¬‹?K+®„£,­Øˆg.ÔjiEäþ²´âªš{Y±TV¼ÏœhIeÅ VVÜÐUÄúL¸J^­áê‚‘WÇjÔ2 ¯>(Uw^­Ò‰E»œZ¤ÛK†‘W߬ü^ÝtÏiYH^“íeƲÖ j]E¤0w`­õ…/~¬õ‹†Yk‘åfÍ@Ö·\Ò¢ŠÌZ “)+m‘YWò f½ÓòÌúQ9Ìz¡ëzeÖ^ 0ë•6 ³nÌYu ”ªÌÚ‹”ºŠbé-Ld]ð#°µ–A¶ŸAj½Ë_<="H®}%W¸õªXÈ>¸õ¤hcq6Ü:bo¯6Æ¢Š-XTñþºau Íz¼cká–9‘-±µœ^ã±w¢Â¢Šg1FQE!0óˆ¶ö ŒÐúPfCæ§ZÂXVëÐú™2 h}ì4ž´>´8µ­ @ëc§|ÐZOÀò; ­ 9,©¸cßžÐúبÞgIÅÎa¥¤¢–WùŠYRqƒô¨”T\©?³Ö¯Z+2kUÈô,gÖ*áH ÌúP¢†eãƒY×Gh-jì9Z‹C»¿ õ¡ZÈnäáÐZU)Ö"GQÅ‹–O,ª¨_c,ª`…óXS±T‡dUE†åXVQT³™| ¬b©É²ŠBöåZ«Ö…›ú°¬bÜÉe•n­Õ³Ï)wKYVQþ¹ìfYÅÞE€Öqàf€ÌZÏgòªƒÎ¬õ‰N‹ÑîÆ¬ã_7vÁ¬•yø~À¯FgÖ»”y3Zë©]¦Îuj­Af5÷e`ëÇέ÷…! Àµ†'Û~·nK:Kp·Öõ\níÜZ+E/†n§„‹¸µÆow\·–¿Ÿ{Ù€[ïª\c±'¸u­ n­$^cÜZËeÏ·Ö‘.R·Þ'ºà°®â ×pkUHÝ­X¸u9%¹µæ°ÛTW”ÜhÞFÕ4VWd]rkõego¬®xÓÎܺ~"àÖ UóµeyEYHŒX©®³e–êŠ1£\ÛÏÈ­Õ[³(±µ§aBPkU!q,¨u­„ l]+}[×ZuÀÖª c%lˆ­µ Z¢µVõ"«¶Cl-oY3š*¥˜Ý–ÒŠeüèàz_!JmÅZ®µã›>/c̨­X^#j+ŠÃÚ&%k+ƸÚÖ k+Æ)Œá•ÚŠr“Z Í£¶¢D%¶ÂÚŠ1èÁ¾µOx?j+¢ž+k+ª#mƼQ[1ÆÒ!»F]ŘÜ<—ueicv&(«Xd–UôÓ¡¤"«]—’Šåt¨¨øpýxNf!EþV­£¸Ä•ÄŒûTR¹Žb¼±Ô2}ŸéÇO­–¢r|?Ž£‘æC©G{Húc¬õGý¯?þûïþø=<<^îýÕöò5Õö/æ» ü/<©nðg¿íÏ8éûN_\³?|ÂÙ´ó¿ÿò©WÐ_ïxÓŸ{ÿùÊÛ‡Æ@:ï×÷~O¿üé÷ó¿þò¹WÐûyïò_:ûë÷~ú+Wƒ²1—I|hÈ”t;âöåÐýÃok¡ßÏ¿ßãš?~üùãŸÿnúûùøñŸ~÷?~ãitaW3b ù牳ôÅuýòSíÇÇ\sÅz~û©¶zªO8>g´*_ëÿ!r÷rŸ5DDTôCW{<®ÂÚ>iÀß}¯ÉëÕxÛ']rƒöXÄ,_ºoüäîiÉçô”vvö^ŧö”ö}²§àj>·§¼fö^ΗzÊ7L=›öycµk€5ÆéCÒ«{–zŽÓÓÇþSO9ͤ*“Ók…XŒy@?ÕJ²¤¥–5ÀÞÚ’–F™á–6Ø]üìzfFì+!µweí°ÊånµJå˜\²¹#­ûäÖ’ §>xF'°ýƵ¥'ÚÏž¶§¸¶œµÁॸÌ}CMæaS»m ®M0öä%ä´Ý?Í™‚/hnð•ñLî幇'/W˲jÓcL–ì‘“DóÒθ¶#>D+­lNŽÉ;òi[Mª1&K÷TóViŒÉª6fjY>œ¯^·MY9EuÇrçKΩs·™P½²ÜÚnξ> £ã¼ê©æˆìy²/Ó1"Çgd¾­ÒÝ‘õÍYÒE«p?Fäx,ž ÕdŹ]$Éyî «._îÅsð´Õ¼|H>¬t™‚ªÜÙѬd©™Ç¾Ùæz•ä±ýƒ• '‡ä˜Ì×4<¤]É}©$Ó!XÚêÜkY勱æ"^?[O^—¬³|‹í˜”i ÕYÌ!yGæÚ!\ÉFeTw‡ooh¶Ët±CÇ’´Äpl;ª›g‚CÚÿ>$¯’z¸üRG°üݶ^“T|Š~Ÿ¾›‰ÿ-å{ Á%7{kÂk.@|ev"Cpéě˂ïò—‚m#lôfDàúFµÛÖÍ—ëÛŽÇ1¶GØý#“0k42×ã¸ÆþÉ6ϫѫg2—¼'7ñß7ttŒÀcn·D À¯¶œÁðˆRæ3 ©0(å¼ÓT€¸£VC‚xɘf®üœÍV(À‹ì—ø±Á= ø©‚ÈWF‹À¥Î»zA~«^«y˜0üŽ:¾ð±‡‰ð[¢Ñ#Ë#0ü–ÐÇ*&2þÞovGÄßZÖm¶”Dü-åªdü-T<¿÷í!úÞnX¼0úÖÿ•rG†ßEÅð»l 3üV…Ø%ç]†ßš)M…Ìð{“®cëÞÂŒ¾µ­nF·Œ¾µ¿]#ç«Dß·D CÄèûñ³}ësˆÖ=}×…$¢ïM–3Y¡ì}û$è[QÅ–%_}—P¨Gß—R²?Öè;ÖžC’Åèû„¡D‰¿%bN‰X‰¿µ¥Ÿ–팿ýì)Ø+xÓ¨tÇ€_îüPâïÝ7Ìkü]Â(Æß«g <Ãï)ÞÃô¾„ߺK[ô>Ãï#EW%üV¥±” —ð;:§hx„ßî€PÂï3(ÃoÙ'æ2«Dß7òPJô]ãv¾£1ÀHœzDß«UOEô­zf‚R¢ï¹½=úÖŒ|¦y"£oÅ´{ÚU2þ–š33l€Kd–L%yôZ†¶à Rɼ|â À…| Àë:“øPÉ|›‘lTð Y:%—Þ$ ´K¾"KŸx±€_pUa.!y}=p7zxàx À‰Z€ëÙšaM À‰÷J~ÀYƒxL·£,ó#üÞ-øjá7—_ŸTþˆG˜ñ²ÙÍöŠÇ73•d,®Á)bq%¾¢Òý‹—ÄãJw:†s8cñxù—- Š *Z1ßnÔf(^:CqÍJÎЊ7µ£[ Å•d–€±x<«˜ÌP\ ËN„âL‰e$®ûœûFñ’Ç@\xÎeßø1£úñ¸¯¾@¼¤×1/‰¢ ÄUçö#ñ­Iú Y{ ®©g2BŒ@\oØŒ‚ˆï*ܶXã]r#l÷ž¸2Ö†ÕÃpÐáêQK¾%Äà’±oéÅʼd«1w8ù1¸Ôí0ײéHëÆàEWϼäÕ0×¥®e ^¿ Äàù›);bpåQ˜Ý£pÉ;⌯ü ÁM@ðu5K.Zó^rÕ¼%…ôþSÄ(òϪ¤ ÀçûS Àç…SðhDX‚|ž›ïÕ «ÀcÒp¿{àÓ샵(²Ö¶;µ(÷Ša‰$üÚ1Z ì(¼øƒ…˘yÎÊñ……ÏEã,¼¸j…[µÂÂð$ §óãïCjó(YXxD™Û5’Q _QÝ ,<¾Ô^¡ppƒ„Ïð*$cVá×”õÛ ü²åtáM6•p¡· ’¾*DÑ y'S( ¼ùŒä2†Þ1./fÆÇл9eXü\Høºf×BÂ϶Î~'tþ~r]A¸—-.$\ÅæsC¹ð eÊ{o²mÏ2s…„3©±p}­)ª($œù‡…„Ë«%Ý*$Üså¨C‰ƪÑ.Mâµt¡wð­:Š{&µ2øV 9«ÒJF<™1U% v I«f¤pÀÓ*E‰/|Þ…„—Ít¢ðS1EÎÌ…»nAá»é˜+ /$”(|ÇB± ð"bü°³¢p9¢ð‚I‰ÂË.<â瀞¡¥ʺU……—Ít²ðz$XøûÌ ² —à&1Kaá\G ç®Uaá¥{x®¼á#±M£¨öÖ–¯Š,¼d²ðøTï¬%YÄ(2;Î)¹ˆQ˜ð?X¸PîÀÀdáe? ŠQ°‡@.Û«÷ùÈÁWظ¾Àâ³pðˆ¦ì±ƒñu(+J° Þü-\k¢ôâ(¼6ƒËb&—Ò¤à·ôÜÙáHÁøQ’‚«kÒ™ÂÁÝåÙ^\.ÃŒì7¯TÂï%S­¿~{mà‹,Y\| 5¸ ÷¡É¢ü:9mS ´Õ…¡\}ÂÊy58ÝeŠ\ Ü—·È3Wt²¤EPQƒËŠÇ„§”ƒ« Ç>úa‘ƒËJoÝûʼ(ýŠ|VD¶mÓûñ!/3P‘ƒ+VÈ)šøÚ jS’<º-´ËˆÀóÍ5²ÈÁgòzÀËf5£ðu…ýêƒI3åàÉ!äàõ÷Š<]ýŠ\î[ мYÏ&ßG^¶b„ëg}-I5ø Ïߢ§¼°ÈÁ7m9ø&ß´ l)¯GB¾Á¥½ÈÁåàiûR”ƒ—tÊÁ#¾uÁãÛ!+W!”‚Ï?Xe½¢_a"Í<:êf®]%¸0õx[ˆÀk'€…CzQ‚KÉ6?”àʱuµ(²ó8L&îxyšˆÀËK*¸œëÓ§DàìŒ%×ÒÜ@&#ðæ÷<æAàìÅ%לަ%W­6ãfŒÀÅRrÅÃ<^ñõrÐÝŸ¸6c‰ÒGÎó3×Ïš»Ö3_³.e‰ÀUTƘ,"p…ÆG.¨ëH™W À5,)1f×¥!1×ù¾Ê1MgñwŒºcÇfÄßEŸPµ(X& þ–ãaº÷2þÖ­ïYõ¥Äßü¤ËÖÈ,CÇÏÎiVTÂï ùk%ü–áä6d®%ü.!ÂïE½(}·~/͈tîxÜpíž› p½Ã-¹P À›pôkB”nè%_à†Î|Ñê5kí”\涉K^ôaˆÀë'Y•à]•|Æö)CpÎHœ±û{@^†ãé'¹¡?ñˆz‚<’2'_R .;´´j-Qø†ªž%'3F+LV¢ðB\™“Éb% ×…›ZQøÞœ0G+Q8³fJN·¬…—˜Q¸9 ±…k{íh# ¿³˜ê—¢ðíʸDá\è–(œ %)³æ]" ?ÅžRÞ̤ÌL3 /zYDáÚü>Lj‚(üBý­…ŸReÿ`^"IDáÄg% /ËÆá¤C%ßP°¦Äá„'%/ê^Æá­ö¡atÄỬΦ/ëÂK‚Ð#-ÊÆá5gÓãpÅ!»íz1ט¹€%ßQÐ…q¸&"sïÄâÈa`,.)JÌT½KÖ`Q3ƒñ6Ä|E^f8㺠Ëòs=/ó&ƒñ˜Ã®)‹¿½ƒq™/îw7›d,.S}óÑG,®hÂ,‹/úe"WÖŒùV3×­ßçWbq™Aš>±¸ü'ç4jd0®ºñ_ ­‚ñhŒ~– eãª`âA㋜é²JY Æ£3즦e0~aߎÁ¸JœŽçŒG d™ô Æå³ir<ã¥Ï•`\¥Õ³"V Æd¦¥/£qý—†Â¿ :˜xÑøR$Žëâî##uÆã®WU¿£qÝÂû-!on¦i€@\ÊGós/¸JW¤jqxYI0 _‹La¸í—3•æ‡/M£—=qx܇ò!ûG¾l°I/q¸ªeZ"+ãð¸ô=kT—H|…|‰Ä›ßí`qŒÄu+ž’‹H\Ö´&æe$®ç²TOFâ37Fâÿf•x$®®h.–%_ÁV‰ÇÚ\QH\õL"6„Gâ‹ËGA$§\\Xs¾÷!þÃÇø÷«ñlÑ•®‹èOg{j}ýé77{üÒ…üÆvDýî~¶;þ­OÙê´¼otüeúD³C•ñéÐÿü©ç?¦úô¿|æ¼ûëÍ¿œ¹Èü¤wðº€þçO=ÿ뱿/ ÿå Wð ‰—äU1-Æ¥4Av¬¬ãG–ûüï6c\f-Xc¼Œ(jÿö3=¼¿áL­vDŒ±W Èß~¦ƒgúÿ{þºˆÙñÇ?þ’;Ñ?Z²u,>Öï}yhÆD?ÿ:óÏÿëãøŸŽÿùO2žÌá:çˆß¸×öyá3‡k»Ë¥y¢Æjeþ¼Ùé¤>‚¼kúœ™ò¼ÍE×âMŸ4k_Í&PŒº^‹5}j—´)ìSúG;7ú¯á3ûGû Ñ?p-ŸÚ?^ƒƒ÷^Ë£´Z‰Š×·ÔEh§ë^Ïf!´qÿ}Á8ñûí&ËiZ• •žÉ«Ýd9Ït¾ªÆDôßë7Éól§´¹§Ûæo÷›üÚ¬ú{• Ñÿ-çÇïíœv5þŸ4Ç}áèX›5n±ªüUt‘ãz\J¬hÿžâÿ¯ÿýÿ?ÿïÿ÷¸ï<ã ´ ½éX¦XnÇ­Ž#ú²=N[Ý0ÿëå†y|’¦‹µò}/ñD.}¿×r5éO‡Tô,S×–mýaSm›5ÞÐ¥TÂX¿·6—æñòºùˆ8S¹–=§i…t\&_Õuë)ajº7ækmÚ 9—íµ¾o1›KËèÜ`[$Ìz£ÝhSÁ¦‘ ²lWÖž½&IÙçó„•Úã¸uŠatRÙ®±¦6mœß{ôàx>^hÑ”[| ç}ÿðÒ .-5.‰ëŒCTÂlì–,ÍUÜ&ú¤~nJœ·è:.ºŠ~ÎÒ9Õ66“NU¸Z‡sQ­³ÒÑeˆˆ¾¨‚šèŠö§Ï{u¿ŠEeð”[°Æ:oUéYº…Ò²+ä=Á*cÈC–V¬Náñ©+™ÜŸqQébí\Å×ÖÚöxÒoýÓ²ßYjö¼—-ö®ê%çù%Õó<ž‰¾Z-×n¹|õ]ýë½Ü¯¾ª&3‡[´aØ+gé<úïÞL|Q’¹:H¼Ñh›ÝÄkQ]ÈmõUµ™Sá¢ê—ÛÝúêy‰Ä ð¥&Õ—ÕÑnÙœMâiæÎ˜.Å”ÃË¡r-/v¿)Uñ(aw•›fjó>~Üò¥{ÝQüžÛÀªíŽN"Nm»«z¢ÓµÊ²¯¾Ùv •KôV‡U[n«.ñ±¯wœîžæ×­§þw9×4²j?9Ì8¥÷RYj2YÚ®×öæÑ~Ò²½—sÏš²z ×1 ^—óh;w¯îªßLßË唄xuWå:_ƒØ-§îïw­wpµŠ²£¿ZŽÓ¢\ –ÛqYo¹¢Û«¯´{ÏiïÐéBôÉaM¹(3{ßzoÝõDmc‡Pm–ݼ\k+$ûî­‹ir—X¯nçÔ;뤯nŒž—6¥_këã³êëdGÛU|÷ÕÅ9Ô6ŠÈ>~ñ”AữªLrzÄš9Kȶ›Ks¥u³÷ק㦬—µèiGyõÕ¸sÏP›¶)ßãªr³G¥ån³[\g«#µÄ¯ÄÈñê«Ñ¹³¾¹ÚÖ|ìí'§!ÚXâe½ÍöĆ/¦šîu ­³·¡3w=wp´º±ï¡uöíÒåÖ&qZg×ýÇ7©¥­ro=ÆÇmÚ*}÷UŽä÷5cÕ3gOU‰{E:¯¾ººÞ:Í­bl›Rd¯rôó9†×Õ÷´Õ6êŶn¾¬JUª×¶ë»Ã⽩Pý~áuuñêÔëA¾zìÖÊß»ªD¯Z±£ËÚ.¹ ѽZµÅ$ØÓÐÔvÍ}x­mWnã¶[ÏZ°*Q?Ìv£¸@Uèãƒé]öt3¡W™Øw—mj€×¿jÛ÷5°ê×Ö¡ÁSeúxï‘õÞ<¦Raú÷T4?Þ™‚Ï^ôªÍÚÓÜ7çT{~ÚûÐz·mÅ®7PÛ( [æL•¥×>òˆÌýUUé•ÿê®Êêí[,ª:¿œ}d•næ!•ªÎ«*ì»·*õeèÕ66ŸÕ¶íï\V•£?î1®î®3ycS…7ij«jóë4†ÖÃMÇTl~ìX«Í\´Õ¦r°ïžÊ³D·Œ±5î>w•T†~_úØzx¾˜ ÍkŸûÝSÛÞ]ßVVùQ V§‹nÓ¥üj»·1¸j²?{L¢ô£¬¢¾ˆ,ºÚNè_CE멚䆀M5æ×} ®ªD=6Ub~îA«P3YZ›aôÖû«¦æaF£êóZð¾û+_ƒtbç]ýJð‘që©aªKU]~ö¸ÕÌ;Ô6öá[Û4Dº*<fÜ]÷z MÍŽ­¬ª÷7ä"ª9¿eÐzê!wó[••_2h•Î1F;¥˜eЪT¤á¢©¶k„­R9 ÿ4՛׆sܸT©ù-ÃV“ÊQ•”_ãÁƃÊ!|Ûš õÖ¶ XsPR ¾¶jÒ³¢‹*6¼ÃV5Y¹BU›ß2lUrüÂTO~ɰ5>Ö¬³®ròS[u{ó[gñnšwfб*Í#lUÞè¿ï4˜µYB¼ÇUݺY=­­hîž½u¿†¢X¥%¦¶êZ"V_•Š1÷¸µÅû0ÔS‘ù³Ç­íQß×]5æG×v¾\BªŒüÚ×úåÄZjTxm×rŒTzµÝ=pÕùŽy8d«Àüù\Õ4C§òò{\Õfµ6UA~í«N·æ¶¸ ÈE„nݪBªíîkíeï­¹I‘ÌÈ£Pe•½¯ºéX«w)´jÇ1…³VJW¥ãç¼¶—š^Zj»{ôÚ~3­´TUþ|G¯íó9†ý†ŠÊï=|m7–;úª)?Šº¶¾ž2MUŸ{øª¶·5ëün%]u)w CUPþìáë5K˜ð&"Ñv* Þ{ŸÝÜ Jåä·yŒ°üîb=µdøÊk9¸øõðB¹j»züÚ¸Áž³¾ªAöøU¿y¦fM¥ä·ÀªíNm‘ŠÅ™ˆ°G¬jó¸³Õr}÷ÙÃòÛÕt½ã×K!ؾvÛ‘?züzÍS^²Úî&-ywÙ˜çî¡®QøQÇUmæ£2ñS_5øMû0 PÛu¾±zÄ*ôøU7“L)*!?j¸êçŒ;ip¹mœ}'—·Ë¿$k㬠v/µ]wgõAO݉EåãÏ¿ªmɵYˆM½Ï–㤕ãìî>¼ª?d/º½-Æš[]Í5àÝg…²¦!àQéøsãìé¾U9~_Ç8«âCô¯Úðë:ÆY••»ìQ¥áçmŒ³r(²áµ9xŒq–÷woYºõjżQѽ½G±úîŽt]TYøQ¼UmVá^UáçÅ^RÜYd¬pýÜG§½2mAõâÏÆê'פ*¿_cœUmè1mqó…½LáçÈêC0'“¶3«üðùkÜe¾Ùc`WÖ^ù±É©è{â Øu \ÁîyvòéL·¡œ¹/¬Èt5 f! 2]9|ÇŒôžÈÉt…c"Ó=NOd$Ó•(zhòˆtå_[_¸;Ò½49Ÿ#‹€\w¿ÜÅŒ\W)·ÑÞ±¸î% »u/`ÝKŽYH X÷’EQеu/ÅriŬ«ß”Sú— .cQö^Mƒë^ÍÜÛ³qÝK’þL4×È·rê »—l„SñÙÉ®.Ãj·í^²>Æ`´{)y '3 ]· _!Ý«ùqŒøÒÉîÕÆd÷j6ÀWZ#»WóÅh¡¨îÕ#Føª«cF50ÝvÔ¼_N9ÓÕQæ ¦«{¾ׯt¯fµ<’8u¯–]0\.uÛq·êê0-öû.C]f‹m@Ý«IÚÞdÔ‘î%±núÛ;ÒÕAK\Réê0«g¤{µ2'G—·éê8Ã#@º:ÎV]¶§,ÓDSÀºíLd*jX÷jYÛ=~Ͱ®Ž›Sf ®{µ¤ÝAלë¶îs#€]æû!vu:û:Îܘvu>KBؽ”86垟ƒÝöNs] °Û޳! `·eµ¬c(ÙmY¹9y8ÙUG1O:ÝÒ™ìê0ýKu²{5ï¯@v¯V˜fä$;Ù½šeöHIÙÕéâ“ëÙ »:ÎÌÖAvuº)¦¸¾Tr²«ã̪¿“Ý«U*ÊXÇÉnù@v¯f{0v¤@v[ë1Àuu”ã+ãºë3®{5G€NŒœê^Í},Üê¶±ÐhŸCÝÖ• §:Ôm'[»©)˜®k@÷½Žs¦[Ÿ½“]—•v˰۾ï¾I¬ÛŽ2DèX·uÿ4sÖ-# °®>·=~¥¯â ë¶ñÚh¥sÝ«%›Ž„dpÝ6ôæÎ0¸nûºÓÖ dW“iz1¾5ɺ¼}$!ì¶×™ñÀ®Ž3ó €ÝÖçæáì–ÙÆÁ®Ngõ vu:«ü°[:«sÝv•Y\·¾PçºítqƒÁ×m—™9ˆäºå©8×ÕqÎ÷ë^J´I?zpÝ2î‚ìê|´ƒì^-Ót•Ùmç³'»í|•ëÖçï\·}Oçð«×Õq¾å\·gS’+çºí|ßß„ ƒ]=˜Ô²ì¶/&Í v[êÿ0õ#Ø]7¯{D°«YAŸÙ+(ØÕ 5‡{€Ý6Öeñ €]½³³$ØÕ»ÍŠ„»µë–)X—Á±îÖ°n_¿ëŽN [€nsDÛº+ n™ñtÕ ö¡$Ðe8B [ïÚ€®£Ì²Þûº›bYÖ÷Ët×@úH ž»Þî–HžÛŠ¥áÎs›óXWÎ箇{çj*IŸ/â\ݲíÌç–¹Äq®ÞZ&çêtY““8W#Vý»ΕÑ]&YçÆW)òÝ÷ü€s[aBœ«Œû}È0ˆsK\œÛ ™æ–0p®–K¶œÛŠfæ~p.#Sâܵ%²&—uœ«JIë°4&ÎÕJ1>Š9ÏÕ `ûˆà¹ñXöW%Y®<#Ó Œ,W%Z·!4"Ëe¬>X®¹×:ËÕº>ëü‘åÊ.sõÉr˃å*yÓvVœåê°s¤Ï“årqO–«ÒYŽ‚,W¶¶O–+¤‘nd¹zú¶÷–+Šw¤œ,·Q¼)™¬³\‘œ¬E–» „uåQnü伌p$W“VÖ™ ÉÕa¶A’ÛìUsšÉÕqgjœä6KÚQ|Š$WšÒ´<"ÉÕONCìH’+ñÔÔ‘ä FÆë|ï34’«¹1ÍÃ>§ZÏéFHñ¥4 5YùF]UoŒ¯åí"C¢ß°UêÖU]cxþëêÓOoá‚uõ—Tåëj„ìRëJ›ž%͈uÏÉu=äºÑfŸäºÄÁÀºÇâKĺBŒ¹ùD¬«ŠIi­C®«p!= ×m]¶G+亻L»’¹ƒëJQÓH´T¹î¥Ø*Å\人®¬ÖF®«ëʪ•亻‚öaëB®«àw+½äºÅ€ëÆ›šMC®»«_Þó{Ð%×U羇_K»šþì ìªCå¾Án«Å0Ê‚°{ýð"£OÉîÕj=ßÝU„pWaý2b'â]›Üeïn^K¯àÝßñîí2âÝM+•ë«xwËÂ0…ïj1?Œ1ÈwÛÓ Ãˆw˦ïî…o xåW<ü xA Èw·­íÑö/‹|wqaá»§U8îxW3^%ÞÕ*‘‚ww/PðîÕ·ÆHvŃ얯©’Ýy¸é?À®Åžì^.á!Øm¦ç褀]à°v±z,`÷t U»ZçdRÁ.À°kñl»2 ÙÕ´? E²[4Èn¡¾NvµÂ榲{Å ê“dW1Î(KQÈ.ÁÉ®êvë$»žK·û)dw¥ˆd÷ð2°…ì2$'ÛÕîä2D`»¤´í‚Òî2š'ݽ\OG¸}f…‡ Ü媕tw‡xp÷ôJÖî6«¤ñL*Ýð¦Ë· Ý=ÔՆЋ|W%ÀG’Kị[Þ¾K|D¾‹$òÝÓkÙ¾»¸'Ûƒïn×ܧéÂw‰–ÀwËBø xå)7&¢xñµÀ{x%‡xµ{þð’Æ:ß%‡ ß= 5%ßÅnôðZ%‡x †àݽ†A¼‡›ÞÀ»z¡†xw“ xŒE€WñÍÔW ðâi’ïnPÉ“ï"Â(|÷€ê´R^ÇÞ…òú& )//…”W•@‡k¡¼¸»y˯òb0!ã1îå<Ô9®1^ì•>ï/ÿíÅNÆ»Sy ÆÛCŒ‚w·6,xwuCÁ‚wÉöxÁ%+ßu…­ãÝÒʼnw9=ò‹ò U 7¸yù°y9ùò™òrwE !ïþÃ=Ä>Æë’wqCËy¹Ö!¯òGŽ!4'åEW%äå®H…¼¯ y¹« È ‰J¼|Š„¼è@d¼›3!ã¥0¡0^îñê­eºka¼‡{˜ÆËó‘ñÎŽ ã]¼âLa¼ÑÈx9(ñ¶l»ïÉxe6:œ¹Éx¹½ ÆKõAá¼-Sj¨•Éy¹»O΋Umr^,› çG±LyÉyÑQˆy9Ñó–x˜yÁ¼åJób'ŒW%ѯ‘4ÈÛìHSì È[6OyËÞ oÙ•äÕ7~eN oÙóåmõ|†¿%1on€yËÓæåŠ‹˜·l/;æ-0oÙYæí›‡Å'‚1^ÞØËq¥¥‰÷iè A'†#W”ïÆH¨Hu`Z81,fê^ŒâzMÝæt·Õ,Nغ«ªm zÂ]ñ¼}8·î’™î¸ÿá®±æ-GÜ•Áå1u¶O¸«¶øZ‡á‚Ó]•óÍ ¹¤»íÚ¶ˆv5ÖÍ_¦»qœU&Ý•Ùt:µÕ®VZfPT»GV jû Eµ+zöªvùЍÚÕjåL .ènÄÞsÊCݽTSv°Ò]­Ý ›Bµ{Éyãø2Ý•èœØtWßüØç­tw·ÄrÊv ùuº«;Ë¢!…¥º•t·Áˆ‘"NºK¹5è®®óN º[ntWçs_Ð]öÐ]]Ë’[  »Û•&°º[Ú€w´Òïx·žÏð®[F¢`Á»|œä»å8绥ӂï–[耷ÀË3ð–¾À[ú3(o=ŸS^>GPÞV¾sÆÀ¼å3æÕ@”Ū æeæU[î#æ-Çó–˜·<2`^ýæ=6ã‰yëqŽyŸm‰yËsæ­çsÌ[Þƒc^ ˜ëȪ$歗☷žÎ1/?sb^-§‡UÇ ¼ì ¤¼<)omsÊ˯ƒ”·¶åm³óžraǼõÆóòÝóòó!ç­Ç9ç-ççå˜HÐ[ @/?W‚^~½Àz9æôjŽ9RÐË9 WÕ†ßÄÇ!o½5‡¼üÄ yuª,EÈ[Ž£ˆŸÿòrê'ä­8äe HÈ[Ó!oýM£¼å¾@yëaNyò–î Ê[º(oé> ¼¥›ƒòÖãœòÖ6§¼åå-_8(oévÀ¼Ï¶Ä¼¥ïó–g昷昷>Ǽå'yËíó–ÇÒ9où€ÁyËлµkÙýŠà·±}NÐ+þô è­§sÐ[^hoé  ½œ¬H{ãR/«´·¼àÞò$Á{õ›Û¦÷ÚØÖ[®¬·Ì;`½åyõ–× Ö[æ°^µ×?;ë­‡9ëÝn¯-OØ»){6E€½Œ< {kÛ ö¶òÛPöîbÆQöj¨7ÐÞòA{wéaGî#h/⤽êLY´W?in À½e. ¤‹{â^o²\Jzù€{¹Ø&îUL¦ºÄ½ZܧÙq/UEĽ:. ¿÷B÷êþ,7¸—`¸WŸ\ºèIï­þœJgǽz,iMFÜ+¨’LŒ¸W·žÖ‰÷ªÆ™%n÷îÍ)blº÷Ê÷¼;E’öªå«ìÕX•ÖŽ€½jJW5Â^žËQﮂ/CõHØm§åÚö¶ñ+°WµVl‡°Wm™XÖ+ص¥_ X/¥d½:.V7ÝëÕqf‚ÖÛÚ`’õîÂ7fõÆqwšì‘õÆ>ÛΪ³Þ]5y>*å-Ï”W/K•‘õê"â ßkb²^—¾jõªŒ[æÝýÖ%Ç è=S ó½§¼‡¬ w×pœYëtf8*tÝ-—â®»qœJcuSºîòqÂuW—¹¤ ÀÛwWu\r× Æ»Š³äMÛ]µ¤emwu[iíEßÝÚf¾»ºãL0¡ñî%aÅ%ÐxWןÅi¼[:ŒwKG€ñî%…_feÀxW×’™ðÝŸ¼Mˆìx÷ÑfxWOEª–TÝx÷Ú¬Š}wã1X¡iï^ʨz%ïÆÝíišGãÝKuñ†]Œwã¬Bw/fc¾»ê7™1EßÝh»3#€¾»Ñ=ãq¥W÷ݽäê“nSðÝÕÅ›/|wãçÚZíîÃwW_‚%¦Áw÷j%è†Â}weþ´ÁußÝS5‘Ó¯¾»ºÌ˜óy}ˆxÛq1£Œ[pßÝ8ßœó:}wµÖ¸ IºïnüæéÒY÷Ý-¿ ÷ݶ~Iä÷ÝS¥\Sù÷ÝS{ªcûšî»§jB®ÃOî»õ:Ý}WìÖÛtßUêÕ” qwß-oö»Zf­oršï–þó] ï”NXïê"vñÍzWcšåáÁz·t,Xï–ˆõ®ŽËôZïêãˆ.Ù%Zn½{Mn¾EëÝÚæÖ»šAÌsֻʇ3ÓNXï– ëÝÒé`½«jÂkâ9ÛU[ÄOÁzW³ [öºõ®œØºwQt‡Î»:Ùüe°«ç«±X†ó®zñ0Ò¢ñ®3eæ»Kâ‚߃+Íwõ(Ó7Šæ»m‚Oâó]Îb4ßUée·Ë}q]ýóëÂþ@ྫñ?½îÀuK<®« QèmÈcë–j Îuφ;®á“ ÷Ý2ÐÃ}WsÊ• sçºíi™ã6ÜwGܦþu÷Ý2 8×UÛ”é2ອ£g6¸níèÎvÕf)Ž`»õ;Ûm¿#òõ”ñ¶K±ÜG»íû¿’Ž:Úm}ÖÜ íª§ÇLõ65r°ÛZRëê\·žË¹n»µôc×U/%È×m£±¥x;×Õwª¨Àuë]9×­Oʹn{ÀY0Ú¹n믖ï\·xÎuÛ”5ŒÁuÕ·bòi ÎuKqpÝæú¿æ—ï\·MTf}ì\WŸãe§Îuu-›Ùh8×mÅX¦îFåT·­§3 ³QÝÇ/ÕmË­3~œê–Ò/ ºu4ª‹:`º¥¬ ˜nÂéÖÞçL—eD:Ò-«A ]­ns)w¤ÛÌñ-!À¡®~3δÁ5ªÛ–ƒæemT·Õ¼¸÷/ºîê'­¼=¸n)®®Ûœå] l\WÇŪuxü8×mµ9Lêí\·0€ÝVKÉ6{ì¶2&Iw°ÛœÖmFu°Û°”%Œ9Ø5‰cÆçï`·Ùç[,°+çàcNXl`·ý€ë®ŒæëK®»­`†kẫž>¶(‹ëî*Zp gÝ꺛{ ïÒSûûðî¯ój8Û$ýbzºj“–=aðÔU‹”ù¬«¶*ˆÚ:ÖUc66ëª]Þ5J]µ6(ŠM€¼ZDL©¡äUÎf–•€¼±ªÚÌYwº¡&äw í-(o¼Ê)-$Iy…YGõ¼ª¡påㄞ·@e—óF¿´ôXÊyÕ/í±ò^nŽF9/K”âj—g>–âj—”G±*@^–ÙrÞ‚Ä!çÕËÉdÊyc©|w+HŠyç NóÆQçCƫպUP‚Œw–áýÙe¼³‚?«æ2ÞI®½)zâÕSŠ`zØ»Œ7G, GßãÕ.@<õ®Û†ŒWcœe›ƒñr§ŒW»gJ>ÁxY†Œ—C?¯îÏ\G ãÕ j›8`¼,¯°•’€ñÞý)\Æ;)EnÌÀ”ñêÖÍù2^Õ+H?¤.ãäa’.®â¥Á>U¼*Ejž6PñÆw'Ê‹p¯ŽKk"¨xYv€*^]{Æ|Tñ–šPñŠå™ø×E¼õt.âU™‰eÊSÄ;iµŸNÇñj5GVW“XŽ,¯‘ÄœƒD¼“²‘ÒÈ"ÞITbÔq„†·æ^õ ë+V *Ô°mYCÍ4¼“ØÏ¾ú¡†wÚ3N*˜·”™ †wjnë#N††W/ÖÖŠ]Ä+(ºdF0D¼“ne1>D¼Ó‘aeÁ¼í®ÏLƒˆwRÅã5K¯™ˆW¥<,à…ˆ7.s›³fD¼¥(D¼S3‹Ÿ¿(â-¥$ âÕ_fõ.âխoœÏ,t(âÕý¥E¼“¢ûÌo‡ˆ—}"ÞRº"^=—õìÿžÞ8×b>¼Œ> ânj‚]Ä[sï¤H+=ø âÕ¥díJŠxKÕ——ˆW~|Lƒ’ºˆwjiDÃø"ÞöâÇš^UÙË„·4™‚Wçš:’~WgŠå^—A¿;5™ä@_Ðï–7 ý.ÏæêÝò.¡Þ-ÕeX\íü¡ !o%1Ô»:ÛØ²¦xwj¹Gï=LjwË«„v·thw§&“ìÌØ•»e¨€rWßË1„_WîF·?·ÌS¹;5ybú»rw’€2Á6”»xÐí–:6]·;OžîFÝîtCxÝn))Ýî]Ô?üø¶„ùýÿ¿Å…üøóÇ?ÿ]„±ÿñ/?þÓïþñÇo=S„M1¸.qÃ1ã|ó™¶ï8SD}ÚáX4ß~ó‰žèüïùc–Æûþøñ¿èÏì÷]¾×SWÛ½/ó*}'~þuæ?ü¯ë~üÿó¼×O­sŠø;mŸ>s´¶»ÜþÞÞe‡’äÈ®÷WäP®Rñ~@ÐD€4иf‚ÀIwSYDƒàÿãÚösÂl/;YdU˜@£™•žqâåán¾|Û¶ú:~Øät­Ãõ}„Åý¬éÇL”&¹k×B5Þ™´…r^ÑR¿kú¡]r±hîG<‡qnô^ÃìãƒDÿÀµüÐþñ¼ðZ>úǨ7Ñ_;P/÷Ø/­'c >UáûÚD5×Ï búò¥ó4ÔN£õÜ-'ÐãøÆäýõªí±]ô>“˜§Öqcñ1=fQñqi¤¾Ïé?i›÷»„OVÆè¯µï·K‰¡>³Ë¸zbíRb¤¢¼õ)1RQ~hS(SÇ÷e²|H«¬à=¢ŠÊ‡Ô&[Y¸ï‹ŠûŽúAçøM+wÓ¨rÜñ·îµ ëø®×{{ WöE…¿†îx} ÙÇ{·d_vË„<4 ¤H7f&Ë„Ôû,=À¾œ– yH"“©j«LHuòRdGS¿•ÆÆù¬rá¾Üƒš’íë(<ãË:’4nëã«Þc|È16öqmYU×èz—¸w—UuæCêAn©‰|ÓÉö¡IŒ‘ùw‰1r1~^Lmñ%=U*Yòc_Ç÷×3ÜU¬ã±Úœ¶Õ»ŠçhKêÕY1¸îëmùº…;ë読ò!õºïåIɉ۷|ÈcÈ8žü¥]|7\MW·«ë¸juVíìÖFï>*BÞÕY9Ý5ÔvÇPòê¬ñ1ÚÖø®²_¯D‰µÏ¦»Ê7ž6¾úǸ–Ùî};-%RGiüÔV)‘í= wµíé²ì_Ãú-UPÛºŽXXMReÄx2z¬î|KEñþ2ñùv•(#cØc¨žt$µUJä14\ïÈdeÕȈlï-VT•Éè"f3WÌÄñU2|%UÌL¯.«¶«ê_¨-Ì_M\´Ç’êm~¾š]ô^]ÖJùÅÓ³ŒH4sFÓ»xp†°ÇíjµUF¤~3&–w¶ä~Ì–)½u¹4í‡Ò³s|Uþwæ†ï‡þiޝí|*éœãëå":µ]wޱ·)¼v‰22„mC¶:i†°:]é±ö×+®ˆ Ò'In×V;»¡šÚîùduë%ÚÛ5É>uÈtܹeJÛ®Ðt}² Sw}Ÿë3ȶÓ)ðÎ6Nçx"mµUR¤5{fÌ?;0-ö87KŠäÒ#>{KŠT7æœ÷¯)a+GY‰Sú¯¶Þ]–ßÔ©8%GY¬ºöXYíW޲ìê‚{Â.¾I¿+2CX-<_†ˆú{Kˆ\½Ì®<ù2ô¤.äjnÕ]ãœÏ¢q—$czFØ8—YŸí#Í9GØÅ½÷k¢xwW®£ÕV)‘ç쮇ûuXJä!“Èí.ì×i)‘ç䂦ýº,%rô¦Ì¢ˆXü«DbïþÃRUS[ŠÁ´4Œù³T‹Á=Ó{ˆU¿Û²~cÌ#ñ$ž!V·w¥ Ç~/–©oåÌR†û½ZRd‹ïõ•Ùºÿ½YV¤®pJ#œýÞ-+2Ú–*ĶǺjɶ_¢Íßcì©ìœLVWÓ]1,–j»Dö'QFư$‡Dåé!ì‘~Ç4[Vd·Vé:µUVä×<ãL,¿žÂ*>¾ˆ×ä~L*XúŒ°J#©ò燲2‚•ĪÌÚe9d{ÊU(cÇõþç¥ð”5ãØ[£öZwwzN$jŽ¡ŽÍ!6¾—h{¯géç2ŽÕKr¤Ô’Ë,µÄhû¬?‰,Ÿ(ö¯?/rV&Q°ñÕëˆFۮª<€gÙ£_if†ÍIe<½ö'Ð!¿/#ò©kò!Éve òäò~ì$J`}>Ó?á.mëwõÌÎ47ÜÕdÃì{H!ÜÕlÖ“D ¸«àÙHà®æíŠRwã? ;åU›åÖƒò^C–jØ(¯Ì—*Ê+ãÂm+å”Wù¡öŽAyUœ¥f@ÞaSZŸ ¯|iK ÆÛêׂñо“qÆ«ªë¹ìá—¿Ê„—•èxGâ#ËÿðŽ*­¹õöðÝVñ|·UXßm%ÁÀwGÊJïßç çœï¶]à»cŸâL¤êxw»]S ¼;Ê2•ƒ)ðî¨W)†À»£„Ý£žÜÕ/ZõuÀÝVEt·•UÝm¥Ð@wYÇpw<¯õÉ*ÛW9Õ`âl·ÕHÛm5¯:ÛÕ.ò\h°ÝVá lwd\—¨lw”ü+±/Øn«@ûäX'lW7=ÏŒ ´;*'?w´;côÖ)펟| 'ì¶:»­ Àn1vG·u)fj`·u€ÝÑÉtuØýøÍÅ+V·6C»ª¯yÎD9ÛeËïÌEÛmçÀvGa²šøÁv[mE°Ýñ<#žN¨}yÅjôg»­º4Øîh‹ xp™³ÝVt·ÕdÝ5*ïÜW|è®~n­-ÐÝV‰tw¼ñÒú‚î¶êlNw[7ÐÝ'btw”U¬DtÐÝñD¦ô`ÝŸpiªAw5dœ•ºÛ æîŽÛÛæí[tW¿y•Štw|ålœðŽû+·m0Þ˜Jã*ÐæŒWÇÍe< Æ;F‡ÚÿãÕožåÆ«{0³0ÞQ rÍTNg¼mÌãŸr«ãU[ÄïïÍ '¼ºÈ¥RIÞ`õãÇÞéõ ¼ºcs%áÄË1{þÒoÛFxÛˆ«뛪š ï8]%€ðªmYRÂ;~³rä@xû‹qÂÛJ.‚ð¶0„·Õ@áç36á„·wJ'¼ãÃ*·yÞß4ÂÛ;¥Þуj«„w¼ö2áὫ’rñŽo'Ã@ÞQ·ŒñAx[¤ÂÛ?¹7âmÌxûƒâž±••/ãJG¼ý3pÄÛB ÞVWˆW¿) ¨gúpÄ«ã¦9­þ€x[Ùd Þñ›Æàñ¶jã@¼ýí8âýF["ÞV Œ·ÎxG6¤éŒwLåÄÆ;jÑ–i-ïxf]ñŽç·óD7ÆxÇ«­\10ÞÑÕ×ôûãíÓo ïL‡Uo8u™`¼­È1o÷œñöOǯ‹èu}»ñŽê¼UAŒ÷ã¦o/_­ÕOÑ;g¼ýœóŽh¯À8ï(¾\˜çí}Ý9oóŽ8ëQ ¼=ÊrÊ;FÒ9è@y[•mPÞVe”wÜyá;PÞ±4.©(oï–Fyû‡ì”· NyÇÄ´'³å!÷t> á勨˜oóT›W¯Ö@Pò§¼­à=(o«¸ì”W‹þmª Æ)/ÊPƒñöå‰3ÞŸ¹ Æ«ÇÁÎó<ÀxÛE‚ñjîy­NFÄ ÈÛ¢qP^£ÕCçAyµüºËF”Wm{¹Ž‚òŽ}§¯ (ïµÍAyÕá@yõ<çòÈå÷~¥²œW×¹•¶sÞÑT¥6ÀyE·¢{?+Ϧdç+rÎ;ˆYÙžM¼<Ê”ý{‹Xÿ6ç»à{>¦ºÀw3ðŒ*c#_¹Î9±øÊÕ䤾ÚiÙS¾Cà{Jf{·ç:øÇXóD½@¿óå5ˆ~i–Cô«7º¤ú“èwV4ž›xD¿óíäD¿îDIð«›èï|‚ß8ÛRf‡¿óí.ö¿ôd ø'W,ü “ÚR àwVáùñÃ!ø…µ¹¯Ì4Þ g¾óR ‚ùâYøNô‡ ¾ò¨¨:‰$¾ô½Hâ;ë7ìÄW6¬ƒøÒd†ÄW–¶buâ;ËÉ à¼êv& çäÚ]•s^½:‘ôÒ-Œ —žM½ú¾«¬H‚^½í¸”gÇ  wž¼25A/M¬zõ<ö3É·ƒ^-¼-Ï WÓ_¥œôªÃš  ˜w:ºÊ×¼ïvˆÝyÕÏc }Ð0¯&þ3mˆˆyÛÌÛ†r`Þ1ÏÊ„˜W8bÉÅ1¯ƒ%ój6’Jþ½‘ ÌK'1bÞÖÑy§ b~ǼíåóÒ0Ÿ˜·}sÀ¼úü×”Ã=˜—F½Ä¼ô—%æ´ôÍ„^bÞE8æ¥'1ï¼ÕÃé˜W?iX˜Wb÷%‹#ó*’·MI`^úf󶀘w’ܵ6üyipOÌ«ï`yÖLä¼ê •ôKΫ±9V¦Hœ+(ï420òÞòÊ&ºªvòÂdŒWô¬Ja€ñΓ§©’ñêkœ™ñ*6®Õ5¯‚c—ì¾/­Éx^Tf%/žït"Ç ï†©4$áÕ,\ù©d¼mÄã¥Ý-¯Ú4¾>2^$³)kgºßY‰i2^y5Fp’ÊZg¼óêòf2^¹>Ûò$¼ñ‹ÑCž¬ ^­Ájš„WU×òjÑ“äj^Íó÷;75ïîÑg“óòR€vÛ¥#ÚÕùjø#ÚUñ Z&íê7ïåQ-í¶ç¶»"“ínc9{¤¼ØÙ®üµË­lW@-ÏÀv[Øn{,`»z,µP$ÛUݤJg'Ûm¯ïÍvÕg‹R‘ín+½p·½ÀÝþ›w[Gq¸ÛöƒwÛ§¸ÛÒᮈ-mwyá.ÏG¼»]n#N¼Ë['ÞÝ4¿æñ®&˜D »ÛŽx tWmuîn²kwÛawu˜{NàmÀ»íÈâÕOžWª«€xÕÑk“ˆw—]âí툗ó&/‡!"^ @¼¸-5ˆ·ŸÎ¯zÞ/ ^ýd "âmWÄ»¼ñê{4ƒpÞ~œsÞ϶â¼ýZœór°$çÕµXz98/Ç ‚^]‹)ôê8[‘ôr´$èÝNð½JE2»€^}ǵX!èå3Ao{ /ÒÛî ¤·=+Þm”¹I•Ho¿@#½ýœôö6'½Œ”Hz9õ¶W ÔÛÆX Þöz9õõr.&ëí¿é¬·Í`½íq‚õö6g½Ô"õncè|€®Ã^Ý‚%*ö¶ŸtØËåa¯~ËJㄽœY {†$ìÕ °Ý À^õË* CØÛ/Ãaïg[ÂÞö@{Û\ ØÛÞ7`o2€{ÛCîÝF9©DgÀ½­/÷¶O¸·õYàÞvïÀ½Û‰,àÞÖ€{·óúwÜËà’¸—q.qo»NǽŒ;‰{ÛO÷r)BÜÛ.ó{5åÎ%âîí¿æ¸·_¡ãÞí¤G„áÞþ“Ž{ ˆ{Us§vÍ@{‘KØÛf+À^E–Uœ‡°w»°%Ø«ã Qö¶KqÖ«Ÿ´,À^=s(ìÕ€Xu„½$,Ľº”§˜i¯Ò‚Íê ´Wy¹Òú¸w»½6 poûIà^uóãîÕC±ÍIàÞöîÜ«KU1†¸w»Ì°wÓ†l&¤÷¶%½76ü€{7­*׸WªêS÷òtĽºÌɤ«Ž{±Ú!íÝÆþ¤Éöî£îÓ{\&ìUSeöî“»¡÷î“›4÷òA÷î*˜Y(Ľº–¬]NÚ»*ZŹön·«1H{uº+õ\ ½*]\Ir¤½$𤽻ì)3o˜´wŸ½~Ê ½ªtÁû»þ>߆Ùô¼¿É–wd‹¿ÌÔ½ T„½Û愽«fË\ƒö6`ï$‰;èä\”ý WµÆÏrè• ír§M ÍyÏ1Ò%¹„9¯LXâ|±~aY7ç¥Íyez{TR>ÌycË@§¡^µ­[JhΫk™Ëøæ¼2}©2n4ç=ÆYÚ&Ü—þw0ç¥ Íyã^]Ÿ sÞCiÜ™ÈEsÞCË”9ìaÎÛ¼BsÞcöŠÆ4çÕ«zŠ®7Ø;L«B,ÍyuU»”æ¼Ç˜œŸš§4çÕ|_ƪ0ç'ëÒœ÷\¼6Íyå²i»4ç•§–9;¸9o{Z4çݽè1ÍyÝ7ðéΫǹeÑuºóÂvŽæ¼4n£9ﱸ§½yF3݇޼RÖï×7Qo7Ù…7/L'›7¯î4…vôæ=†l¬¼àÍÛœvÝ›÷Û©¥¼}{ójÇ,kš7oÞÉ«WÒ›WîԦЛ÷8 6oÞÓ+ÿvo^Ûÿ¦7o³…†7/íd›7ïåÛžôæÕG[Î!ôæm†tðæm6œtç½Q%(/+9“ò6qœyõ±›"μ\Ìμ*ÏnyypæÕjÅ!Ü™Wæâeg^º¦Ó™We +¡3¯¬ ,UÞ¼Z‚àÍ«Ai)<ñxóž§‹¶éÍ{û³†‚5o?“[óF›IíaÍ«QlÍЌּ*ݾ› ƒ[óêtw­ìaÍ«zg±Xój¶ÝŠ¦ÃšW«S³€5ï)`áoXóžZYX†5¯ÖÈ& ‡5¯úКI´çU}ÈJ3£A¯"”Ùà±ôªæäTÊ>ôžÊ42±ôêynEX`ÐÛÏç½ZëT’ zU Ýn½ê{{Éj`ÐÛz zÏÓJe¾ýyå»ÿ8Ñ¥WhÎ{IÞRž0çÕÜEjÜœ÷”3ƒ'šóª§ÈÀœ÷Ò°X æ¼®2Æ9¯ÎWé4狘j5 sÞK!X­VaΫ¥¼=˜ój)_uƒiÎ{©Bna ˜ó^‹û ÑœW<å*±8ÌyãŸÊ'€9¯Rû¶²ê€9¯žçV›0ç=•ÓT‚Y˜óêm…eÜWNugN€tçÕ£vÓwçÕfú6¸ó*áq)ƒÇžWùŒ“©€Ýž÷ÙS)F£¤wõüÉ&é]„n®oP^ýäU™U´çÕK­ô<Úó^¬kŠÉaÏ«ª¬kmfžW/ÎÒ‘aÏ«Ž¹”ü ö¼—æÔr6‚=otÚ݉¬Ûó¶7{Þvï°çx»kl†?¯Vˆ‡ùO¸?ï%ÂYNÈðçíǹ?o„w÷Vâ\øóFx'­Ú3ÁßW!´ ~ÝŸ÷š»^]Ê\6ðç½ôJ Uøó걘~úíÏ«9eåØóÆ»™,\ƒ=ï­TºRxž7‚g3KqwÞK¢,–àÎOÃÒléÎ+*w¦Šî¼Ñ›¯ÍŸ!î²_åK[©£uÊ;EKÎGpè½Æx•ûzpè½G$\nºîÐ{«U–Y§Cï=Ãé½ätèUQ¡ZxÑ¡w,Jz‡^Ú¤Ò¡WoÏl¢àÐ{Ë`.æ—÷¸ç wRAß'' þ¼ZýÌ[Îðç½7ø\ÁŸ÷V¾Eí<˜w ‚üò©êeæ üyoM÷å O]ï\pæÏº^Û^ ¬wªúVóJ¼~‹†ªWÉqÙ>T½¶ÁKÌ;ª:^œœù:ļJp*Ù9¯ÚÎyµ¼©áCÖkÑׇä¼J!ÞŒ+;ç]f¸CÕ;<ÜÓª^h«Éy•Ó{æR›ªÞÛ7HÈyçÓ×ä¼Lé%ç;ˆ‘÷Ñ ΫÞñ¾YÒ÷qÞßçÛ EÌó >hïõux|˨WÿÐ\QY…móE9h¯œãÙ=€ŒUØD¯ŽåV ¾*l_;­ [ »à:ñåˆDâ«Ê=Ϧyo ”1×i^à¼WÉÞzcö;Ëo¥¡^=«ré]½h|#½ Ò ½Â·%jj¤w$B=ñ[#½AÓ¤WÁ@ZÙôÆã†•­ƒÞýv§’zg zõêÍ”6A¯ÌÙrs• w×\³¤g+@oô–ܵè w¿|ªk 7¦›]š¬Æ#ÜR¨ÞÖ½ñîÓûºƒÞ})‡Bb^- Êž˜7®dMþBÊ»/Eµ>(¯¼UJ~Ê»_%èï”7^Í`Êo Ê;ÉrhÍJqNy÷«.¹cÞ]_Tf~6Ì;Áò˜W†Ù»©™ óªÙ& 0oë”ļ*#1Eü«”¹ADÌ+£ê,n“˜7Ý=³>‰yU©Ò|NX‚mu¢†yÇÊéÙz%æÝGÔ“9ŽyÙì$h%Øø%óÆONç·½íˆyuëW%+óîZ™åæÕ§^~iļšÝö\ó Û%ÿÌ4IiUØVE©Šc¶¸0[{² ^j«Â&•Â$ Þ]aDæµ"lít^„M¶­WBsa[¾¾^Öòè}­Ûìöä­ Û¬³H*ʰi©UX–a›ÜW«•aS]Ù‘`½Ú„»JœÖ«y/•ʰíúƒôjwî˜-±[Lã{Ö½šæ¢U¬Á6Ã5Ø´Ä*zÄlØkj5Ø·¨o5Ø7ãú¨Á6Çø°c”a«Ëôþc!¶Evœ™¬ÌBlâœfBŒBl«Æ £ÛŠœvb‹ƒgß‹œ·±jç¼­æ ±Éì¨úm(Ć]ãVˆ £âSˆM¤²hYˆÝ•…ØŸ•×# ±µ ôBlÂËÆš…Ø"ä( ë°)¬;¾éÜðQ¿ uØFjzš¯õ²8i«Ã–b,Á¦‘¶À(¯pe"ƒòªž¦Óa”`»(ÉuÊÛK·9åÕ¾£é¾X‚ 5²yu×µ¤f6Öäc¶Žªø-¯JÂ[a¶{ˆ"“Õ:ãUS™ñžt«ãm…–ÀxO…YŒW4#V’iÏéŒWm>nµl¦êh%ØXp Œ·U1ãey^2^ÖN&ãÕf¥9w‚ñª—åƒÊl¨œÄ«•†™Yñª>S<£,¥æˆ·}@¼ƒEÖÄ«ŽrTøÄk¥¼X~å¿X~…€wUÙóʲ×À»ê“–Š¼Ë§ºKy@«¾v*W5˧ú !ïJ?bŒ x·Õ¸dõµÃ³bÉwµr©ƒÕ×8~²úÚVæ’|7‚¡½\¹ÁwU¦óÌâè¬À¦ÝÍ£r¡PMe©œ¢Û…ð]iÁ,YŠØÚùàÙ@· ðÝVÓ|WïÄÐ)+°­®*ü¨Àf¨°U`–io=e+À&nXC^€­6e¶èÍseðjOÅÞ: °iR¨\°§[\ÒíUϼÛ=9±"àm{•,ÀFé€`k[‡(À¦åè¹<6Z¬¿v ²U±:¯¿v 2dÙtÖ_Óe•y)ë¯i÷&Ë!5À«ñ<×F¬¿¦UlMûͶa†fظ)LÂ;t)fc6íýü’’—f;͸F),ÀvµFuâ=<²7L¾âeƒí•7ãÁ„œw>¯uËã˜:¼Üº×² ü>ã†?Ž2l‡Î>0í]øcŒþt¾”ÖúœÆŸþß¿üû÷€d¯/Ð /¿ þÖ…|»þm§ÔÝýlwüO>å)sžçFó?¦ê ÿü+P¤ý\ÀûÏ?ôü1yÏüÐ+x=ö×›Ïÿ8k⽃÷¼ÿüCÏÿzìï xþãWðúÍ?þ+ÙŽU{‹¥N„DbyÉÎC×ô?~z;Äü—9þo‹ëþòÓÏ_þÏŠ å?ù¿_~úßÿò?úµ'„i›ÏX<-¿þLÛo?“‚ÀˆÛ"øTpñ«ÏtðLÿõÍ_ä" ‰Ÿþý·Üáss¼Ñ]5#|‹Ù*VÏý¼±Ýãç_gþÓ—ÿþeûo_~Žÿù oöŒÖ5Eü“;í3-üÀÑÚïòpïù49ªBï'ý¬éÇL”&€l×Bmä™´%©òúµXÓí’›Es?â9Œs£ð~dÿ$ú®å‡ö×ààýƒ×òÑ?Æ@½ÉfõWÔË"YqÌÜÚ¥ÚOÙé]1zß±ø˜ ¦/ïQú·Ïx<Íz IMw|cºŸÓ,¿÷<“P]üç´®÷ß9Ïö;ϳi—ùî ó7[žçèçùçØwÁ…õŠeò—X¹êÕÌÇ"ìØÎ¡ÿgø×?þÛÏ?ÿáoûÃ_ÿú×?üüù?m¿kýò«ò#É2ªÒÉTÛ}¤?GI§G4x¯¥‡ˆˆw-ú¥Êx,D´9S$ðP®QZˆh7±Ôœj»ÒBdWEJéÌìk|;™~t¼x%·[bõ!d]":n®¶Õ½ðß(RÛU"‹ïª’¼TaÀC(>ÓwOíúeÁ*í¦•ñ1j|gÉ*Ù§,û¡$€* ˆ‚¢Ç+o¢JVmKª\Á´Y÷¯¥Ü<ÎÛìðXÖñø©º€,7tÙ.㨥óU AmwÖD¹àCª ®^Šæ:£*lÑq“½RgT¥”|<®ý]éõaõ×Vé•åŽ!âÞ*81>q\*.ýrÛDm·\Ϭ÷sܓ׷F!»ãž½¾õÇ>Üè^¼¼õì*×xž^Þõ*Õv§ƒÈ¢d”Ì‹:Ú¬X%Tà÷°^~z,ʽiØZÓ@d¹¿J[õÞŽ<¤Î¨²€¨{HžQeg}.Oº©Ú¬ÐIôQKíE[uYŸ’gTY@+†zJœ‘ö!, zNC[÷ê°Ñ´—oš®_( xÊ :íCð,Ïi3'<•â,³ò¤ô¿=Ì O»Ó0oÌØé¢Ë«=ŸSâÎô‰¶µ'Ïa³ý`H”qð)qgUä#”S{z‡° ¦ÚÊeÏXZ—žºAm|žªž‘ºr–Þ9å››ºr–:%ÌH÷åv}ø9Jº<l{kfTE@Tâ›å0+¼~ÈaVx¬E+xSVxíIê?e¹~²¶ZÏ1 䯼Nõá”–ë7rûüƒc–’’H27™Ï18îÕgm˜95s¥´\d}czµJRsmƒŸ£ðö]]vÒwõŠ†Î˜CË /~r~\fÔR>xzÙ§&…Ñ_TA@TOû÷ÒÖ§F gª=×Ëlðô}T•—Suw^mËHm(mí7·Mfƒ×¾ÿmöÒÖ(xznKÙàé°ÚÑ>nå®CÿÉÕlðÔàÛÑöÜ6/n­§Ô ¾I1'ásø1nä £¶Ólð0ø*Rœm€Ž¬ÆuJ˜ñD°­¯J—±Ø[óÐ>™žï–â»s„³{uÕ»JÍ¨Í [_cÛí=“jqRxmxÝW/l=)Pxrìâã1 <ÎÜRÜ–žÚÊ”Gme§óÍóõ ¯ûá…­96í§YàµCrÜŒ_[Ùo³Àcùó!DÌVÍ•uœ‡®6ØvÇlx,>.ýYzàõÃVóÀkC¨Ä³ ¯[iHÔVxú¯,ã§ÿWekTôНlŸ;ÝOÒ¶L߯å§ìÈ,I®¶ªlÍoô¸Ío[Ì dÈ€À.©ÓVzyàµðDÅ&_¯2#n Á²cÆÂÊ*[·¶Í+[k%Ÿ‚"ɬ²µö­S¹+ÕKyàµéã<¼²µÃ(íÁ—^ë ’hL6Äâ‘Ý^ØšoV èŠ`eì’É#ãÖ#X+|?”‹ ±G)|´¹_…­£ß—KÒ`+l-£gŸ_êV×e•Õ†ºÖg¹V<[aFK¡ E«k½ŒÄ“·žtd”x]kwy]ëè±ûö8Aª­êZû@zݬjmËmÀ[U둯÷û”JˆªÖVáü•˜ù ²­ŸßK«jSËóïÕ«Z fOf±2ø¬ªu qwÏ\+yFUµ^T¹úñ:¯j}Ĥô<0É3*†•8wÊØþ>½®õêéƒJkd]ëZ~Kcgu­oßþ—ÀêZG*°2R«®u|œUgQmV×÷>ŠLe]kÔN~ÕÃIJ ¡j–Ói”‡Q«]=¼P¼®µÕqVÛS×Wxx]ë¡]~¦«W1“¤›Þê3ÍIç`u­å‘òБùݺÂ5ëÛŒ"(ÊĔeÅ(W•­7õ¶ç˵弲õ]9s£^U¶n¿¹zeë]¹#DfIôÊÖ–ºò*”ƒììYekiŠß†X]k#å Z]ker¥¤kü,¼¡CìáUײšvfUZx–Àm;Š'3÷;>TÓάR/¾W¯“ËSk¾/9ò©vòr"á•ðøõ±Ÿ(ÁTšÓðŠ–—¯ =*ƒ˜ˆW:·’÷ñž£·e ^M÷õˆxÏ1ºyœ#^¹ÄÍYo‹ˆWšáyOÎ ÄK€ Â+íµ%¿á=”S“>'$¼Î%Œ'á•ì¿jš‘ð*-©1Hx•¶§Î[pÄ«œ±ò!â USˆwè£3âU†š%ñ*â¯OˆWRg®I‰xµ2¹—:ÝñjqQÎMD¼š×¨HÄ»ÒÀéƒ Ä»ºMÅj ñöÃñîJÉ`‰ˆw_½ø¯¤÷US•ˆWmd‘ñÊ[3£R"Þ]ZÓ‚Û@¼t¥%ãÝgÏ_!ãÝ” Wd¼ÛíurÉx›«(¯Y…¥d¼4—&ãÕoZ ÆËR^ÙàV!WP^ZŠ“òÊY×Þ ¼" Uª’˜wSàUüîy·ÝK¸ó²0A¯ŠÕ`CÐ˲+½Ûê–o½2Í. ?‚ÞmI|Ø5½|úÔô*ÂÉõ%½š Z÷ÎÚí4Ý.4½|*ÔôÊ{°$CÀ½³òɲ(#q/¬›ªWý®@U½›§M‘öÎZÖÖ’´WOz1 .T½£NAv.ÐÞxÒѳRv긗ÞÛĽq)Ñ“š÷Ê qÎÑ佺ÌÍÔÀƒ÷jͲÀïÕÚ£Ò¥È{ãñ˃MÒ;u}³‘^¥YÖ¾0Iï,\”»•D½óå•Kš¦—}¨wº­H%Io|ðÓ•…IzµN;kíÒÛºPï¼yzYï¬=Ëã,;éÕHú.ìñ¥“^ —¥V!é•Û{ ÏöH¯î`I*’^º‰“ôjè>26$éä6T°“^M>&Ïé‹åÌ›#é[·b@$½óê&‘Izg¶JýIï|x=&’Þ6‚ôÊú´PHoÜ™-9Iz[éÕK-çh’ÞéÂ:H¯LîÏ‚ ½ñDbbË 3ÞöÂAziDÖ;9F&ë€õêþJgGÖ«ñgŸ3Rë:©üDÒ^…"P‚öj.;gÒ^uÌ2 'íNHÐÞv>§½óWƒ‘Åy ½míÕ·UU±ÚÛÆPÐ^ mUŠ´WŸÎõ`½Ã ÷r±o²ÞÞ䬷 '`½²"¶0¬·õ.°Þy8æ6XoܸYÞ‘õΛH#ë0¬$!€½íñö΂¨i.FØ;²ÜLìmC"`¯ÚÒºŽ¬—wÅŸŸu(Yo|äñ¥Y¯bü5‘;X¯î fÙ÷“õÊ„§t@d½ój…öˆz9c%êÕdUE‰zçáÍúìõΣÈ᳇EÔ;gígmÔ;lrÒÜ‘¨wºœVõòã&êÕo¾÷.¾tÔˉ¨—]¨WñΙÂ(¢ÞyÔi|Òщz3”±7Q/'¢^~D½ü̉zçQsî Hz‡©øülí“ôêË*÷~’Þv:^–.éÕc¨¥;Iï|:ò"éeLDÒ;_n‚8Hoœf.ù½¤wJÒ;Ç¿ÿ ˜÷TÙƒ÷÷üa|øÉViÞp Ñ1ï~º½%1ï®iaÞó'óRÆFÌ«Ø}O*ÓÜV7Jmî Ð÷4÷ìÇ7÷É2«À:Ýv«ÑÌ…Ûé@ó†a…ñûͼA[FíͼAÁj ¬iÞ€<r^æq6÷äB‘ô*'¶vó@z™ðKÒ» ÖÔzƒô*·µx!@o|Ç”ð"A¯HÛú Uï¢å^fxò.Šù ÷ò.£üÌ™Ø ¯® Mé›oÃ*sœÇˆWfòå3MÆ+Æc¤Àxù_gj2Þeu;S Þ~lh¿x Cˆxµ|/½_óm·i ®xiÔ3ú!âÕ«íìæÛ°x1;BÞ1‚fv!/¯³97h¥U787¨VÉ«Tß—y—±Sólð}@ÞËä­´nÐüP »Çº¡ Ö ›F£$ 4oÐ%-¥:†yCûM7o@Y(BÞö y5öfé÷fÞ°¨Ÿ$w¡yƒpŽ…Í¼!ž¿©3hÞ0ÌÉ3j%åå=ò΢"%ƒò¶$nR^­2jmJÊ‹Rc óе?ú@Þ¸yÙy/£Ø ﬗþlEѺÁ‡›æÜ0 A&žéŒ×J½4Æ;òBžt™¢¼#•$ļ„–ļʫ¬å81ïìvÒļ»—ªlÎ *ø™I˜×{ 1¯Šß” ˜wlqfê$9ïâÅN›wâe{ª÷ÈyrçŨ×Üd÷^œ¨sÞ­JÜ4Ð;Jæ>ÚŒzµ²*í¯ZÚìÔK“ÖAïm"J‚^Ÿ"éÞ0\B“{н!¾ž­JéÞ =Óªš{âõ@Rú·{CüåTiÀ༭Wҽ݄°W^·§výV¯ KÚ«7Sû 4pà0CØ»xEÜfà ?íÊI¦ÐÞ6lÓÀ¯†ÊWÎ0¼8ðöàßÀ·Fÿ†v)ðo˜½ä,q¯f‰¹(ýf/¼ÜüôáV*3ýTüªx©Û7Ì.™hö üiß rÆO¢ÍT¹»çëÕèmòCš7°'€õî;à$X¯êM©ÁëÕܾņyêΛb`°^M$–™Bó•ÄÈœz7ðî€zÛàÔ}2ŽO ^õÉÆž§Ô«ó™m½„@2ç¤WÑЖIŸÍ»AáAé<éÝcö\Ф·E£ ½qœe}’ôrQ@ÒË'í Wï®Ü z5kßÏ:”œ·…à¼m—ðÍyGÔp>æÿ伋XŽaYç¼MÒ«ED%³‚ôê—Ìâ'éÕÀµ'Á#é]¤yª$ôªd˜%nôÆé®+! Aos£èmF7½-¨èUçÚ®œ„z—Q2%ê½RX@¯Ö,¥À%èmo¤W>b[V¨$é•ÜDIz婾N%ÏuÒm{¥‚ôÒ{‰¤··9éîõ*²¿sã²¼f¯¼Ý¼  !êå"‚¨W—XuŽéÝ ¨Òð6¼TV÷žSI ï ZKI ï¸5ï†U¾s*~áÝ0kŒ(d ï†QNþÙšwƒÂ¡Œb›w­èÞ º“©ïhî Î…zeF2çú¨—. 4oˆXkÀ·MÍhB÷šƒÐ½-@½Ûìe(›{ê±âYÀ7÷:oнáÐí)oôòoˆ‰~ϯæûüfõþ“^ä}*f~ƒóZÇnœwsñ9¯ìÏÒé«qÞÝ7ç…Z½qÞË Ì5Î{{-²Æy‘‰CΫt´*Ú8ïæ…Éy›“ Wåisçðô6˜k ß{½šArÞû½»»w›Þr…"è]†EÅSð´ÙôòZza(×@/ãza6×Hï¡I"‘z’^w7l°–p ö^ÐEöêÉ-ù°{?œxö 6Ü{ÂùÀ½°l¸WßYm÷ÆpÓWzã‚÷b¦ü°é5‹ÌÆ{Ûµ€÷Âä¬ñ^sŸüpé¶ÔÅ5Ú ÖF{¹@ÚKë7Ò^€’ö ³ÙôºC`·é½À‚õ¶Ã è¼x za" 9/ÍÙ(ç…‘s“ó.ú´3÷ rÞI.©¢£œwö"f$½ŠTbú\:õ¼°³ërÞ9ÈH9¯FŽBG½|Àà¼Âr1Dd¼|éœ÷U÷™!›š÷ôzÒͤW‘ïõm“Þ~>缺̲òir^šŸôÒ2›¤·9×ôÒ†›rÞ1ã– ØQ¯ÞB%eõêt¦öêUHX¹j‰zõ¾×²êÕ© ˜ƒô6=Þæ:뤷)ÂAzõÌ…=@zõnâ“xT3 ½úíáóà¼ílà¼zTgVN&çe’9oosÎÛ>pÞö=Ò¦·µ9ç]U1¶²’Áy¹õKÎÛRÀyÛÇ ÎÛz,mz±è•ÁÙrGuЫÛÛkà— ‚Þx'qÅ_èmöŒ´éå0 ÐÛOä —Uz5úšT“F½pû'éU?\ÍÇÀI/Kô¶¯¤W×rÕ~ ­zy ½Zœ­µ±Ò+ÃÁòÌÆ·Ho?<°ˆ$êm.’@½«rÈž=ìæÔË1¤·u/^}º)%¤ƒj͖ޖâÐ+_G“ïô*ïé,,Ô«bêg¦ôé…°ƒ¨WÃÇ]®@4p Ã â¸)=éÓ‹â!D½cD*ñ!}z]¶BÒÛf[ ^=È©¼ hÓ»ée—½/lzw¥_={FtéUè_äŒ.½ítNzµ‚¯|ÎæÒ‹EQ¯†54;ê¥!g³éÕÉ{¦WBèÚˆ¦Mï©"7°hÓQÇRîê°é=ÄQÎÇÓ¹[8¡h>½ìôé½¾Îoü¶è=qV³èÕ¼T.h´èe¡E¯h¦*À¡W³YZïÓ»AÞ™CÞ¼hÆC‹Þ[aâS\¦y7èG®Â Í»a^²ÎʧwƒeIÑ»A¤­òßhÞ@ƒ š7hίwš7À&‰Þ ´¡wC|³&Œ wC3S€wC¬:Ëȱy7h|–!-¼T…Û¬vÝ»6Í»AÿQ¬Þ ¢¹Û×¼`VÞ ›—…oÞ '’N‘òFÛY{M¤¼:®äM ¼ò{‚³ï†Ù3Hye!l`§¼ÛáEž›wá%JZLлá-á£!å¥Z²y7À…”WQ|tIkòn‡ëè›wCó`€wC *cHy#jŠÞýthb^U}"û熽LÞ yO•#8!¯®¤À!¯Ö‚…ñ6{0ÞfBãU9M'¨Áxõ=ÅäùÞÿ>ÆûûŒThýy|Þ Ø²+z­ ë±Ér?|ôÊ©3'Fzá¥ÓH/üIzÊÍ5¤··9ée‰’^Ú¢“ô.šÓ%›¤—•>ë±Í™ÌûQmyþ–•Ø®´eáÕ*!ç’&åõôìÀk»—J^« DÀ«¢ûùšàm’\^Mk> ÞxsR§¼PàðÆhp¯U „w–åláJ^%GäûÆ< ¼³ì‡í¦ðR¶O«әé4oüfLäó\ð*o¯ŒÒ xãt»{(8àG¯¸ÍÀ·/Ó„x•ÿ_w–„sÀÛÏç€W”¬–öD¼zdKÑ~ ^]KÑñ*O¥ÖCD¼:_yKñjιÍËÖ¯r$MãâŒWi*úwè Æ«éè—\ôÖ¯R^òê‘ݵQÿ†¼ºÀ‘õ0oKÔæÕ„YÅb‰y[^˜cÞÞä˜W *ËVÕÊó¶t9`ÞvÓà¼êCµ»JΫDF+d׆]™²YÙœWù–†Ày[*8ï<<`ž}sr^~‘ä¼JþºJq Ϋ‹vú휗Ù6ä¼ÌQ#çßÜOóÆuÎ;Dîýƒó¶DAÚ6hµ\œWùÑs‰ÊÀyÕ“Ê#y8o¿zç¼ *÷rÎèU0z”æ w^^IeôÆO®µ±ßœb¡`ì WÉ~VT w^Ü ¨7ÚÌÁ‡¨—i}ͺA€Í侎zu{fÔÛiÝ0»mQ¯ÖåBDÔ;Ofõ*Ûq5¼&ß0oÞ “Ç›D½:®¬ÑˆzÛu6ï†ÕvçèÝ0é‘•M.¼&”›wƒÖ£UëíÝ?f¦&@½Ó°ŸËWôn {Pï<ÁÞQï$@yê¶Zôê0+BÒ;Ýð†ï“ ¥ ½“ŠŠWÝA:8ŒHøÙMèö<èí—‡ˆ Î4"èÕq–QЫ¶{Êá  ··9éÉí:ŸRmyã¯×Ê™"äÕ­YiJ`Þ6 Á½!~ãÌê0ļíió¶¾Ì+³+ë—î í*àÞ@»b^zpóÎn–G÷2toX¼RCsoàGO÷9¨T];º7´ãàÞ ï¡jÒ½aSôý•î 1…zí4½q-—UH¤{ƒ¾±Ú³¤{ƒ Tf{ôjÐØ—±ôö6½ºŒÓ½¡Y-8èÕýY):º7ppƒ{ÃäÅ4zÕiÍ†î ŠHÊ¢ ·({C¼r2lî œáèÞÀ•þ íêÝ¿añ‚%Í¿!†5Û}ï,;Òïˆö üêhß0kÂy¶èÞÀxî Œ†èÞÀ+yC; æ ÒÃ[­6˜7À¡™7XT§¤yÃ(D“ãÝvM‡WVsÚÛ,`ÞÐ| é…l¿›7Ä|{¦ .ÌF™©Ò¼A”%×àÞ`5qܽÁ–”ͽa¤FdÝ1º7ܤŸÑ¤¹7|X4”{ÀW‹;쥭Zso¸å5” bº7`åÛܘÿO÷†q¼Ü°bkî 1«–is7o(nѼàÝÒ¼ø‹°n€— I/3*Hze+W…rÈz™UDÖ»H£hî¾Îz­ws–ëe¢a¯Î½Èì]´Ê0w_‡½ž&:P¯r²·ôaü>ç†?Ž’l×p²•Ã/yüñϯ?ž¯Ï*Ú_úÿòï߃”ãåÞ_ ^~A5üÍKù.ŒýÛΩÛûÙïùŸ|Î[äç}§ÏŸ§ oÿùç߯çìãO?ôÜ1Ï使ÿüCÏ?ž÷ë?>k=ÿCžýûìãO?ôÜãy¿Ïþþó7ÎÿúÅ?þëØ_yR"‘_v­û·1fž‡.èüôv…ù/süßýå§Ÿ¿üŸÿAÉþò¿üô¿ÿåþô+ÏsL~oû¹Fô|ýú3mßq¦UŠèPc©qþú3<Óý_ó—1ãß_~ú÷ßt‡±®‰Æ}ìv­±$Œþ8¯,·ãç_gþù¿}ùË—ÿþåúÿ¾ü)þgãýþ€º¦…r§Í©àGŽÐ~Ÿ—Ѿ5#¹Rº_DÔ?bvÔrõ’µkñ¦4S_Yͤ_‹5ýàNyšúOâuvt‘v?²¼¾Jt^Íí%ï1»I»š~2FíX;NׯµcžÚ:âϧf@mÄ]²ÏŽøÿc¶˜¾¼‡ìß<)µÓ¬Â³ 6ŽoLJ÷sšå÷ž'ÊRkÊøþ;çÙ~çy6aéXQç4ã±åyŽ~žpŽCêÈë<´ÎÝ´ ÛcËoçÐÿŒ3üëÿíçŸÿð·¿ýá¯ýë~þ¿\ú}óé—_•#©RÓµŸ-^x½1gñÏÝÝ$²)ú+æùø#š×Ébè §º)Ved’ƒ|&^³m$Ë]oÞ±J h¶«sœnst]«×cõÜþø ƒR‚?ßÜuí&9&÷çP[íÞ³¯\’hxè•Xø §LÎEöhd… Õ¿^ž„§‹e”OwyU[m˜ª&޽)4rCÖYw^d‹ÑÁ÷ùíÜqIžQ%yvÀŸ{5Iľ¯ôÞl›rßÀ“nß;Py ÊiºîÝ€+K_ñU”âIÕ Æ^’gìãÚÍø’0£ê±^’„UZE3yç’0£$“(ÖuK˜Q…QBåžf¯Ð‹"#ñï½xæåü\mVÏn矙À÷´ºNql=JÑ{Ú¼,‹haªêïi÷¸‡$IÏS¾cè·š•ñÈçÔn¨­JÉ]¾ésË58Å(ovËj´ê¢fö=¡&-ïNÛœU'RzîóÉ$ºE«Q¾mzjfΊ°ÝÚK¯b}²{LØzk´4˜(}sl«ÏX®÷Ü ¦]wzͪ­ÄjÚ¹zJ@©í`¡çx7àãMP¸Õu‹#V=ÄUmOï¿EPKçŠÒ€j³²>¨|Ë2¿„ͳNö7÷Ñ.e'sËa©Jj·;“iîaá“ýuýz•gŒÚ¬z:ôÞ÷²yQc+…•m_²iÜ•+È] ~ký›•ecÜ-“ [SÖzD¡f5Ya+Vy/(î‰ÚŒ·rKK² +Ô{äÎeWEšÊ­,õ«XåýÒXZeÉR«ßJRªŒŒ»fZµm.–F¥×{d×dŽÔ޹áv¯‡çn×y°n®%ÜJ‚Ø»DÿVC¥×Id‘Ù¼÷z{-&¼4ýv®(Ы6+S‹£·^p®’†¤ÄãÖjtU:YJXnÉë½Z躾ӑﭕ¡žËŠìÞvÛvÖÝì)Q›‰þ‘ïpo‡;ßJ™ò+ßò–=3=ây͹ܹ·ËROíSfw• ºRzÚ/ÂS%~#úà3¶ª¨b¥pñõÄ„‡ü`K±¹cµ´=e¬OšPüsÏ£¹ˆ<>Yj3…û¤Ý·çî"°Š^ sçõÞwÏÉe?ß_ÆéÊ‘;"ÛÙT•°)³2!^E sç%þÊ«gÅ“ZRÓï·£ŠKwü+âQ{v/ï˜-tU±®})•ïci~U)Gm–è~éÂ?±[ñhežjXŸCíû@JßÙ±{²¿ãp‹»þãÕfùåðý¸Ó²>eN;–+•……û¾ÝGšÙ¬ò?dqk{™Z½UB÷©4²GäqËmå´¾Z RXàJ¡{Ü“Ï\zœ›6ÄLQèx4º¶ñ:Ú,»Qâë ðÎÃB׸–½v ã§\¬hÑù(×â½|GÙS—n}oµYJ!?úkrO'>‘kv”3b,×=Îý ÿwÇŠÉÒ†yg×êy|¼³ FJ’K¤]Uü˜…®Ê´, s¼z7@n`\ çêFÍíZ5UŸæuUäÊÌÙ˜mZ?ëy÷äÙø‡’0+;ÖL– ËÓÝ‹E®L‚U[Å®ô<Š9ËbWºDÝ÷æ9ðí:wÏMUÂÅ™ÑÓ}XðÊ’v÷à S¹KùË‹•]À^꽦lÏ%í_¢óx&èYߺvf§É½¸ðºGc…¯¬±¨Rî,Ç=~ÏøÆW«§cÆ9Ÿõn4mî%e–B·§ÞVVn´ínëà~\&<Ó:¢°Ô2FÛi1ìÇ…Z掫º8Ñv[F¼ÙÑfv û×ʘ‹iaò,g•åLÌhœ-”¥ÛZ4.LØY+›Aa¦»¦´kácÐϹy8ÛÞ£D³u`KÏÕ ç-ŸëH °‘Xᅥ¨ªŸ2+¾ý¸O%iÄ‘ÛüºÏèu¡Q4²¶¦:²Ê1XJ¼ c!«,Žöëõ`-'4ZŒ«,¸éÉfŽFѶ8rº_ïxŸ2-Mëcragò%§)™ò¸Ök©oymùâs¥,Æ—nn>û𻇺HÎÆCj¹/—÷ƒþ;ê›!°¡ˆÆˆ—â Ås™?ûòúúµ©Ê6}_åSå¤O_s•þØzDÝkB`¢žÓ‹†øš|¡I mÑžyŠø”ù%žÖfγê#Vœµ¤šXx{ÎTAàóò ð9<R[ç8žÜ~$L>O7H ޶µ†=àSÔ¬ªü€K¼•])ðˆWÏ'ë‡ Xââ-I)ðov¬+)«2àóðȃ ø<¼®/ð©o¯ê Ÿ£bnŠÏÞ X1z¯dÀ§~º”z`Àçî_9°63j$æqdÀçæŸ8p´]‡qWgÀ:® S‘Ÿ›»§kc¥†12`Wƒð¹¹ð¹Y\ÌÜh"`-°Î´; Ž6¥‰€ÛÍÇ‚Î& B`Ý@ÙÆ÷6‡ÀRæÇ÷øÒ€Àʆ® î¤À§ì]SäN | g %Ràh‹{ü‘ßø\݆ø\ÜM•ø\½®5)ð9¼ƒŠÊ:>/‚D ¬ ,{RàSîA[²'P`]K™Ì’ëþÞ¯ñK§ÀíÀOâô¾ Ös)¡>@°šJa |_«·ñ 9p{Ð ÁÊí¨Í-’`þ$@ð©ÒÃY ø\܉‰ Xi&åûDmë”þæÁ§´þY3” øœÝW˜ xP”Ì %î÷ðÁ2ï¸ÿ[GÀ:âLO}2`Ýoåëž*é“ ¸='0àsv÷2`÷ìp’ël[Ö¯!†zK¹¿tÜž.p´WZñƒËáj¢ƒŸÃNðIž >g·!#Ž6í€<Ì ¸=0àh»+– X×2¥p»s0`=±Ê+&Ö­W!°~óÈí0`5­µÃÜô~}^3àö>Á€Û‹n§noXoæL?%R`%•O)ð9»¿")ð9:,Ràö„A?Û,zer Ü›ŒŸÃ¦/180EäÀºóJ7&nw¬ß,oL‚àsvGD‚`w§sApœü¬tj‚à~>Áí-ŸÃú3w§‚ûù^ ¸_¡ƒ`ÂìµÜî ¸=I€àøMsk%Ö•1#Að\ÕÞ.@ð9†ÇL ¸=-€`]K™D3œÔÜ·'é$¸_Š‘` ŠÊ®’$¸=iàhÛcIò„ Á)‘ë7Ëq“$X¿Y6—$ÁѶVJIp{{ Áí©€÷ó9 î÷î$¸· ÜoÚI0V#Á|",ÝÜo$˜ï®‘às: Ö‘¥j,˜¥±às: ŽF³‰m4˜¨áàsøf?†_ÄÁÑ6ez%i°.µ  îÀÁýj€ƒûVcUÑh8XO îO8ø£Ñqp°ÀÁ½€ëÁ–]EãÁš…ªJòà;tÜûxðq»#sãÁýlàÁzÉå>Üx°~öNDãÁ½G‚÷g¬ ªlùƃ9à4üÑè<øã>÷þá@øœÜ§»áþ„{÷î— üñ³„?Žt Üû+€°ŽLµiãÁýËþx΃Û@Üûìú1Ÿ|o•ìßf«×¸ðT*œ.³V•ïûàÂæ-úÁ…¯9ý¬?¸pk«øúÎ%+v6ÜÛ@‡W7Ðmt4¤Ña–F‡±‚'F¸E:Œtø.~×ãáH:Œø“tøPÁëÔ—'J>G›zé“, :mZ= ÛàÃÇîMȇ-ÓÂ|øÐFIú˜‘ˈdÝòÞÁ‡ÕK´=|øX½Æù𡺤[úZ‚"NåÒ >|…r>f¯/J>¬Åcõ=òá8ß#®À:X«“ Ë:fÏn@0|hPJ¸.Mñ“ï‘…X˜¢t`á±”Nï"paeÊ6pá8lßÒˆ\Xn9G]$¸ð®˜h¬nÉ…÷óëkχH¸ Hxqw3¥±#a™ý˜j HX˜¢ 䈄ûqŽ„eT†D» £çÞÎ ÇÅg–!"Ö)¸Þ7ç#n— ¼Ëš#7R„wy+ææ °Ž3€ð.R˜B-ð`]Š©PÀƒõ“åéMOß\Ƀ•›P^‡àÁJu¨Msðà}sË á]} 4Àƒw¹õ¥|<¸·9nW ¼[@ܰÚÊȃ@x—Õ]m݋ư–O–ínï*ž]À@¸·½€°Ìº>püídÏ@8~gª]ÂʱªÔa©j`GÛâˆÖ𾸰R^$; - "¼«xq©ðA„•‰bÌDxÖ|ÅoïZŠå¦8ˆðv»“‰ð>ü­ž “D˜îÏ$Â÷-ýßI„õ+\'ÞòH¨"¬6#^ Âò¶6’"¼OÃïèU«Š@x>€9³ë"\HѳaÙMOi™?ˆð¦i»4µ Â:ÍZ¢~á]BÐ{‘+ ¨<ÕH„·ÛÀH„YŸD¸·9Þ‡½a& €ë|åEØtÁíZœ»³8yp{b΃·Ë½·Àƒ£É<áȃÛ/‚+ÃéÊZUäÁÊ+ªØ’<˜uɃu¾XQ<ßx0LΉƒõ“eØtÁbb%Ñî—é8X¯`ÎÄÁýÖ_8x“5–©šo¦´y¢p°Ž«Š³ÄÁfÓ÷:–qäžÆÀÄÁ:.—'\VŠÙq|+î¨ø8X£ß]^§ÁÛ€9%ý=˜Ñv[Šh°N·^E´+ŬJ‚Ë{ýLã`ÒàMUSîù€orDN ipË Ötq›ÞÖipËM nw¬Û[L§ë4XÇí‚o{ygumk­°HƒuYQû ƒ•xþ‚,سø fúbCÁ¬îÓP°.0‚ü÷°ÝPð&ã7E«tE\×[ÝßP°2o“!ëgw"; f‘†‚u+û/ƒY\•$øãF ë¸Ãtª@ÁŠfb„{´ ++q7"Pðh.#¯Ž‚ãÈmMs놂7IuRÕÑPp) ¿÷hÐP°zé™kƒ†‚?®ÖQð&IN*B Vã‘F~ 3ƒ1Q°’Gïtñk(˜Y– 3í´¡`~O +ñôZ–o£àM:š ™ Žáö÷Ü9P°üŠÏ4m(X{Òª†‚·ñ1Í‘«ñz„1$Á›ÄBã"A0ká4Ü/ ˜Åwá~¹‚û‹fQã‚õ³W’k ˜¥{ fri#Áê­wÀ7Ì<׆‚õd÷”½P0»Í÷¢à)Qðÿþ7p`Yß Â“ÒIp`øZ¹spàcóŠäÀ²m°JK´‰˜åTõìˆ6›Á£\•5›™ç>K³‰Ð*0uäÀdPäÀ2€^3R§O„†®’æÂ'$éÃ'⪔®æqÕôá1Ü®ó‰Ñ'âtGLEÄ RâÖfqxáf¡’ÇS§YE€`5«‘¸Ì©hVsÍ*"Fu)\ý¶ŠeUÂÞ7/ÜЬ"âý^œ˰‹ÀÀâB™ A ¼3Ü7ÐÞGG{öˆ‚! &³™q ¬ãL‰ÇM¦þ$ ^¼üa°j”å`ð†ˆh•ì–˜,ƒXD?Çø†Gÿ»L:l,xÕ M²àÅmƒ ÖÌ` [°àØM‰IÇ™–,¸ÝX0ºêÂùàɂ啜P£Á‹"è\‡’k$KÇ5,ÒS;Ò`R;Ò`¾Ò`¾â`•S)0hðê•B žÝBºÑ`Â>Ð`>~Ðà&£ÁæŠÓ`°,ô3bi0X–áéÝ`0n›,xñ‚,ó"É‚É?É‚Ûq`Áñ­Ä*åYR’?ñ„õdÁ°Û1ìÿ×aöáa3žû^$Âìûù:6ŸñF„g¯‚òA„ý®;ÞLG$LK$ŒB‰ ãö wZÜp|©iÔ6»£†„y:"a˜5} ab o&ù#æ"nç–•MÉÉ„eSR[2a~#ûûŸ»fLØ¿{áÙ‹ï·‡"Ü^ˆpÛˆV·3E8‰ð3Ë‚·W ÜYðìŽHƒÛ+qÜ6œ÷çá4xÔ‡)]'hpûìAƒõ¬,k4¸Ýh°Ú\Uì4¸}4ÀÁº½õxj멘n8¸í÷ët¬~bÚÛ7nÑ p°ž¤éKƒÛ Üf)ààÞæ8¸õdààvgÀÁíI·<¸ #àÁí)ƒ÷k1¬Ž¹”8Ãyð®b¥òV¿458xpë àÁí‚·]ð`Ï4áàÁ:Îŷ΃ۓÖý•¢ˆ<¸½ð`õ²X=±-xp¿÷„÷É«†ó•6$üÆÛ‡X{” óæðÖð>Êa•HXÌ…v@œTæmH˜gcÂ?kL8.ö0©¡#a>ô†„ûãîW$¬Îæ‚S áŸu$̭߆„õ`!:v$üqNGÂW{0® ‚>Ø¢w ÷nðFÂ×ÙŠ3ßÐÛ:æØÜp?pï:@Âý5 ÷«¦fCÂõþ8ò`Ës:V…èØ˜0儽 P˜c\ƒÂýù 3òhP˜e¸æ‚©AaM7UͱAáþd…9Q5(ÌåCƒÂxþ¸ pýûì"fÓÿ&Ûàáòúò>ˆ02ö–çB9¸’§¿mAqÌì *I„5á¬ÕæDX kYRQìDXbË)"ëW¤N",ˆYe…†ö­áx£æÖ"¬ ÐÌ™'Vø”›˜o^½µaU„-­+‰°öRL9 "Œ¹!aôª†„›7ïÊØÚꆄñ•6$<ÕXØ‘°ôRUOŽHx ¹3ïá…„¥ã©R˜Í=X>!é»H$¬%ùTÌ¿Þ=Xþ‰åýG÷àËËÓ}¸ï¢×7œ#$LÙÞîÍ;X q+éðF¥Åð4ß¾iŒÃ`|z}JZ*vœ¹u°–j´¤š´¦Â„ÖÁÚSHŸ®ëà2ÖÁr¢ºvƒÆZ”€ÞÁñ“æ|ѽƒã[L/8z7‡ã:‹õÊ­fÜŽƒy°Ü›ÊÓím E@3¦4̓µ!w~*„)²hÆÁ›J >I3–óPy}Ð8x+ÔÓ™0U Í8X[RéfÔŒƒ¡ÚhÖÁ£Wç† ­ƒ7u´”ÔÁ;xS…úÇÆŠÞÁíÎ/J+Žˆ¨R}7e…G÷`h'š{°öžÒ‘¾¹·ãàŒíoraw¥õUsޱv*·paÊ8š{ðZ•å:Öoîam³oæ¢qQNá&Òt†$ Ùó7i¼ª€pªiLcpÚ¯zÊ9dÓ>Ø• Í?¸æX˜ ƒæ ɱ0¥ÀÂÜÕ§R¸ý¢caÝ@üñq•–MöVôXxÓfRmB ·KæVÓYûS´¦c7¨ð6*päÆ'¨p¿L§Âí SYòR ³ƒƒ ë4[9­€ 7¯t*…Ýóœ:ažå¿A0må©^äÿÌÜ€Â<DÂ2Ÿ1” ‘ð¢¯-çt€ámq;·&Žã®ÚMLXÂ\1\&Ü®2aþ"e¼JÊ„y•” kr*Y>eÂ*8žƒUÂqºµÐ Óz:áöÄ Fyò¦æ•P'Çm¹Bo:a^%uÂí8è„[t³ç¼6ðìy´M'Ì·Gp;:a>ηP¸ý%„¸é.n 8J…q]*ŒëïRaÕèRá9FVgÑ £ÌE— ÷«…TxvÌ.ÆêZáE!ÅTFÁ®"ܾ †9³v­0úo× ·G@­0*‹t­°°€±zj… ]Q+¬–v­p{ÕÔ ÷Ÿ¥VøË§DuºD¸ßÞõİ\õ.n?H‰p»1J„çèy„ŨŠ!c‚$Þ”Çuζ·üqA÷Ÿ…DØ xt…0f‰®n½ a Pš@‘hS#,è áö=Q!¬ ÌI1Âí…P!Ü áY¥.jÛƒ áöX©î…p? á~µM!l…]ºB¸u‚õî_ö÷Áàßg¡ÿˆ‡8vÖ>0LM[%¹æ˜$¬bîY½ª!a¥ gbNCÂ1:•´ÿC$ìõΉ„/åßPfé4"a/ÅF CaƒcW@x¬éª<€ð¦éx„—ÂB‡^'Ï00Á°Öõg ! „±4$nMŽƒ£ãMgîËs”n8Xr©Ì'¯Z¿eàƒƒUÃ`ÙVS[EŽ fÍÀÁ´ '¦Ýi°ÜÒ-É·ûÖµìeû¬ß¼ÊÔÏ4$æÀݨ°Ç° /JL6Ô &Ìø½Aa…–U¼¼AáEô+…8 sÅF,¿²–ìŠX8Ž»×3©o!a-Øæ½Ð=°~Ñ-N€„µ†*1ZCÂZ­–aiCÂZ΋AVG‹,­L!°O«Ó$g|ùE ë_Ç/ a­»KŽ×°~¶yïrrÃ<ú}ŸiÄGI¹9^Å—kpÝyÙçñÇ?¿þx¾¯&þÁëÿï_þý{à³ —~yù!ò·/滈÷o<©nðg¿íöIçø:öç^Ÿÿ˜ªOü€+8§<ÿøã={„SuÿÏüØûýýâŸÿ8kyøcÞÀsþñÇzö×C:þû?¾q¯ßüãß¹•ù‹`äŠéíÚ¾Dì£Óˆ·ÎCõ?~z[Ôü—9þo‹ ÿòÓÏ_þÏŠÁú?ù¿_~úßÿò?úµgZ#‚_Î#¦Ò¸Â_}¦í;αyÄÒ[Lú1ÿê3<Óý_ó—u4üôï¿åOîû—]1WÛ½/ç=ôÎæýuæ¿ü·/?ùï_ŽÿïËŸâ6Þï±k¢ø§wÝ÷ÜðCGl¿ÓX+š]þš¤4íïàôã*¬íM™*ÛúŽhúÕxÛšÀ¯tîû¸kû±sø½>QÞyãìì'¼ŠÚOÆ×É~‚«ù±ýä5V Ÿðj>úÉ¿E¯_;~¯Ì­8oL{,²µ=qʦèsÚ˜¾¼Çîß<;ñ,±”‹óoLN÷s–åwžf5Ŷ3S÷ß9ÍöûN˲¡ ?ÎiþÆ3ËÓý4ÿàƒ‡^óv­_bŪ×2‹°Y;…þgœà_ÿøo?ÿü‡¿ýíýë_ÿðóñ—ÿøÓï›T¿üª\L f_/¡§k”*g{áËÿÔ^§ñ[J§8p=ße|/ó±¼/„¨Ä3c¼¥¯ßÀú*#èÙ³Ñhê›èiÑÕÒõ޾Ƌz‰hTé¤1÷î&}§ï™Gã(%ðÇØƒÌbô÷iÛ‡6˜î§ÌÅh¼ç‡G¨$Èd…Øï«62´gãÒÎû® )…L¡¿'W­§¢‘¶*¡9VÞ´ê›âN½ñÞ3g9b·{I«×yZhÖwÇBþ1•¼vÆŸœåãë4¥„'7OÎôj Ѷ}u‚ùåÉw_©7ˆÆÃ³3¥ )—\5F@û¤,Cõ§{ŸhK?½Š£qØm?)ËÛ×Ë*ÜÇ£}LûÖ9ó´æøû¯Óñôd&´ŽFøŸ˜[L4ªŠçÓ•i»m‹; ,ÊØNKÍyV¶ËÓ•[ºûC9g½C‹J HÉ¥Ô&ɵ½«Þá?¯R\ÞÚ xrë¢qsÝ{Üd<©GŒ=KíPŠKOt¶Ã…ïWœ"w%¢ñTÉ­Gpy)Õµ>Êh4é»P[úuFã+—’6Õ“íÞϵ…0¯ÂÂOO–w)v¢qvõû)wóܶPヲ,ÓK³ ²Íq<cxÈA™ŽöѸ¹úFóѨš÷©–\î~£ÑÔï1$ÅWüÎ7‹ÿ(÷ÿ‘»÷{´.~?â®9¶ƒ²Ìí÷µ¾¬õ6ñ;üÑ㣛Lü. ö¢£íÊAY—ºÏ郒4ÙéÌ“–+à‘ç»+à¥ÃMmf4®€ßôP§œ÷Ó%ðí¡î§Kà‘Ê—Kài?ï72<-5æ€ÉDð[ü»å)åm³‹à‘î;a“½®Yì=ÁÓ`}Vµ’Û˰ÉDðš°×š±ŽÝEðÑ öt¯&‚¹ØT.‚oOååëËñÑ®ïìºh¼\¿)NË n>nÁkŵß9¸ˆN÷G:*ëfGnïIEÀkDFÊv4.&÷;8WÏúlwpn&7[z5^žXÏsî&·:óyxò'­ðçò+Lîïø¼<ý³õŽó²8¹Ùïϱ®¨Ðvà5™Zêó5[œ¼ îc ªÂ)®WW¼FÆ£±âäW†Ï|­Lþ´,–¸X‹Ž[ÆÉ|íLÿ´Œc+3q:kʽN&€Z2ðh¬ð˜Ù¡Ñx1t*}v4Þ·œ›èç¯$Pº¿Ï±^«ð˜y‹£ÑÒ@Ûl‹¹ 5»Äüžaã½¶DÐ’GFãfá1Ó±£q÷TP¤5ÆÛÓ‘,{1– ê<l·¢ó}y:(SêâUxxŒÄáÑx[F’å´Fæó‰÷ƼЈ{Ók™ÙI–X±-žx稜X±Mž‘ä)Rj´™éœq ³Â{«Nw_jÑøÛÙdæ).óÂôPK 3µn‰ÕÒC=3® 3ýq‰ÒCç’ËFãá23@ÕˆôPÏ,\b5g2’]¢ñbz¨'.ÒëÔpÜ®v™,BÕws¹,³EÈÌ©\†4Ìú2¦‡ªôÐ#ˆˆ¯Ì#d- ³l|4nLõ¬ÜEú˜ûÏî${Ê~´#·—©4Ï"ÂG •ýl]™{3MÔÓòÔh1r{²ëô¤‰ZîQüõì1²&¼2)\”ºRãò(jü”}^ÖÕCäöd¢Y¢ž ¸(¨†eùFf`¹()É“Dy­“D—Gãé!rûÕ‹I¢øÆ×Ë#dQßÌV^”Uc2T›¤Guãû¬ÊßËëʳã{ÔùkH–B°P‡->n¯PqÉZ.!QßfìEfO5$ˈ0%ÖÑxx|üÙhñ±”8YüPª"U@åÌEÙ2°Iöãþ€î–$ê“– ^Öñ³û܇›ïK}*ÈüƒÜІ¨¯¹¼Q>÷Fg7†hŒZ9Ÿ»“æ…ÅÄ‘µF­ÕlÁ¨/IÏÍŒ:_Ͻ¢>G¢á•Œ€¨•Øj/šŒ:%MÉ)µÔÝ¥T'£V*m<»,¨åŒºµ‘QKH^Â߯¨Õ¨ŠRïY‹ŒZɼ³±;0êSóÛ’µ±È¨%lŸÎ±`Ôj4É)!µ¶*LåJH±ʺQ„Ô§J„£>¥ã+‡]2j­ªöL•JF½_iŠ×u4.U…¹1êh\'£Z`Ôç>¶Æž¹—Œú”?œC0j- #’QË‹³J?7Fí¥›þܵîƒQŸZþj£>…æœ|ɨc•{ωĜP+“â6:å„úTÞTZ|6B­…sÕn„ZÛX¶* >µŽ.-* õ)üQ I¨õ«“jÏùs'ÔúÙ*IÝõS| .¹ð¢>•#ïëxnÅõ©´][ØQ ;l·^ˆ:9ª RCÔÑxVe¢†¨OåÚ3#¢ÖÏVퟆ¨•d3Ž{:@Ô?ëˆZ\eI»®†¨OÅ¢Æ{€¨µW¹Uµ5"ê rF}Fàpó£>•Þ©ïdÔ§òÉJ F­ã*Õ»1ê8P>ÑO\CF­ÆJÍnŒZ©MU¹§Aj]ÏjúùÙ×· õ¹x¢xƒÔ§TÿÆg©u9¾åH­#ã-€Ô§ÄìY<Œ:Ú´‚z>æ7¢ÖU©©!ê3úæQÂy"j]Èé:œÖ1Uá¦Áév €ÓÊ=«â# NŸÊž„øDÓ:®ª>44ýqަ£qß •MŸcqWhZGN62:šî×ãhúÔèº>‹š7]MëŒU½¡i²j"54ÝïhúãHGÓÒ"T©˜†¦ûëš>•¥]K¢i=3ë&š>µ814­ ªzK/4­ª¸ECÓ: ª×44}ŠÖŠhZwPÅ/š–Çìñ‰¦õ³U¢¡é#MëÈ-5šÖ}™‹9Ñ´Ž,s‰†¦?îÓÑôÇÏšŽ¶Ã|ù¦Çc.¢i–®lhº? iÒlçɨuΪ¯Ó õÇÅ:¤>GŠv‚ BêsÜV®ê ©u¤9èRŸJZ¬µ!õÇ9RëÈ*L’ºÿ u4.K-Y ©iöÓ u©Ï!IL‹&Bêh\­F!up€Ôjtõ õÇí;¤>†DÅ«€ÔzOîÖŸ;¤ŽÆi®¥9 µ´: €ÔqÆxxR+—ÚĄԺÖõž}ZBêö©u­ÛúŽå‰¨Û ¢Vé×J‘#¡îm¨{›óiµÅ€ûDääÓ‡|eJ¦B>{L¯yùM§û ÓýT ÓÑ ¹¦ þht:­dø2´otºÝ:à´<¬žN8} ûå„Ó:rɤá§?nÓáôq»ñMƒÓzz¥$›îgt6­ÃÎJ€$›þ8ÐÙ´nrÊŒðƦû¥‚M÷Ku4ýq £éþÐÁ¦5ãß’[­ï‹u6ýq'ΦÛMƒ›þ¸ gÓýœo6­cLjF6JÌÍ]/°é~6Àiyo¹ýN8ýqN‡Ó€Æç‰I§?nÂéôÇ‘N§{ožþ¸ÇÓ½ó€OCë`êþ®P—Û5@ýq jiz;êÞë¨ûc Ö ÛJXJ@Ýß&uìÔѨ¤­'Ö% þ8Òõq¹­VÔý±ïsÿµï­sþÛ\ ?Iuº0’ê [¶TOt¦í¤6£T÷FjX¤‚T4:ª¾&·n¨ú£¨úpß¼Žª{££j9ÒK]£j‰9?ü†ª7sÀë¤ú£Í"qm<𼄠ZvZYZ ƒjyÉ›ü† zÛ¸JÙ Pg5ž¨žÝ ¯ƒjá`}T“:TK5?×&ÚªõMe6@PÝÆ ‚êKTk<•”œZc×\ÎäÔmz'§Ö ¼ï)ù¦>†9\-™©e<´–£1µ,BÏuÊõ 0µ|‰®© !0õðOÊ"$ÕG ¦–vN—º»pœúЇ PÁ©E-f‡Øª£Ñìø¨>äšPŸAõ!X4o…›T«Û°5P-ÆâÂO€j=‚ÛÒªå!eŽ ÕqN3’k úP—/aã‹S óm» œš5çîœúŽŠ…ZÁ©uÎ{5òëœZgßkp§Ö¿—¢uàÔê¬ñ‰çòœº}XÀÔ-k†˜ú˜ÝT«aj1­Év±Sê cj™}™v”ZÝÑÜ&H©ãÀ÷öØûGR4:¥Ö¸l,¥þ8§sjQ½Ãv¹À©õ}¼âÄw£sêGàœú˜Ýí®qjf‘Sò¯tq÷ Ts5ÚTÚq±Y Z–m.®ÀÓ”ÛÀÕ*œçÊ?ðê8§ù"6^= «MÉ ÖºC—ü‚Xë‚Jö^-k=sÀ!¯–Ùk—Á«„«èyõ1–t^}Èìq© ðêøÑÕ5¸àÕ:ãyÖ&8xµNé‚qðêcvûÁÆ«uAKmpW¢Yæáê>\W+‹+f¥ÒC;®>d}k$¸Z;9Ëv\-oÂÕv„«{êpµ¾׋W"%¥Ð­>äLgZRÐêc¸›¥PŽ´ú˜Ý²Ñêc†7iuœÓ3‚«å'ÓN)°VËéñ®Š®„Õë°úÆ›!8¬V—Ž_,Á°Áêcr?¹«û X­‡^¥‡¬Ö)o‹«U<óØçüt«ù,UábÂêö`ÁªU"3FºÜ@«þ¸gÕ»ì"k™JV­4Ub²êIN»¼YµÌ¢j}<[Áq¢ê˜-úU·ÔY¢j½’¥V V³ŸVË«Vê‰>;­ÞO8ÄWïJËdDâêý†Kqµìj×Ê7$®Þ/ø¾uüívg1„¬wñ&ƒQÖ¬XÛ€µîÏdWÖzÿqý®¼Zßñf ÀzW¹¼ ;¬w=Â=íÖz2sÅÖêWåÚ€uܦ"ïìÖzSgZ…“X÷GàÄ:Îh²XÇC!$ÖJÈ>+ã’Äš…‚±Þo÷ŠmÈz—Ñuå1Y+F[³(FCֻʡ^¹Jd­ÇnVšDÖ»fÔ­¾X ë–YïÃP´²ÞȺß>µ¢Ðm+Å´k¥œÇ#L¤b­04žQAi'ÖzK¥±’X+žÞÓÕ£kåÀ›†ŽÄZwr¿«õ6^Ýàj½ SôWë>¦JÇ%®–C€ùkW³~ quûÀÕýë®n³9qµF¹òCn¸:FÝtYÄÕúxÎÝÄÖŽ«û{®ÖÕn•èL\o+¾‘¸WëVL¼I\ÝpµV1f1:pµN5e9ãïÅÕSâê9þýoaÕ§Øâù1}:PÙÖœ?¸ÍLV-l2›ý-XµðW¬µµMsþSvå7çÒ¦æü1LÐË5–ÎXN5çˆQ½t?Ä‘2÷¤z—Oë¹”ÈÛHu Ѫµ`-¯îOß²iø~´ñ7ø~œŠ´~AS­ÉëÌêß0þXÍL€ÆŒAšñ'ÓãKšñ‡ŠJf›ò6þPíï»4þh6#4þP¬æJdìþÎ_0þØ£[›¦˜Æª4P)ÿôýˆ#œJá#}?ö¯®ª«–ë‰ñ-º~¨úƒ™ Ðõ£7:ªVÁ„=ëz7×}c¦†ë‡B`8;«î? V-›–ÃòÁªuÊÅ2ÅÁªuN¨ŸU뜋1w°êþÀªö@7í¬úãHgÕÁYu¼Ëè¬s~=`ÕzÑ€XÝOXÝ*`µœvL‹VÝÛUë:̧‰¨Z]õ0 êkuT­{?Ì-¨Z×ãÖ.`Õýó«Ö9÷ã-¶!©îߣ“j]+õT·‡Rýqœ“ê~¡ ÕýþAªû»©æÄßHõÇ‘Nªõp&ó¶©îo ¤šœúãGT4¾@uÿ*ªû7 P­\ͺÃ9õg›àí»[0uÅÀÔý ¦æC#¦n—ꔺæŒúã¹£ÖÇ/c†‡59£îuÃÁ¨ûh Fýq¤3j#ßq£þøY‡Ôßh¼}(fãÁðÛ?U0êÞæˆºï@ÔýŒBÔŸg‘qÿ5 ê¶‚&£þøYgÔz žeF½•Ù„8£Ö|!Cx0j5*\š HÝfb@êŸuHýqŸ©?Rº—•äZm³y¬R÷k¤Ö³s!¤n—HÝ; õÇ‘©?Ž¥n##)µ ‚XŽ)uotJ­FOº"¥Qfù¨<”ºCJ½iÖ¬=sPê~û¤Ôí‘“R+0³½/RjÔ-m˜úë͵]OLUC²cjM¶ýL- è9UÄÔ¤ÄÔ*¡svLÍk%¦nßVÃÔ|x SkM\g S72 L­FË8ûÀÔ¼`j­ä¹†©ûÏ‚Só4NÝÁ©Þysj¾©Æ©Û•S³4NÝ§ŽŽm°£ê¸Ì¹ì’>@5n šf• T«JhY†5Pݼ!óH€êv$@u; š“HÕB¨îÕýrª{cÕüY€ê~µƒdŸ¨æÞ@u x1CæAuë®Õ­= ºõ:‚êUQ†ùe€T·Ï€¤šH#Õ­K’TÇMÜ%gh¤º $ÕýVÀªÛxFXÝ> Âê~ΛѲÇa„Õ;¬n#(au «ÛÃ#¬nL‡Õ>«5Xݶ «û9«Ûó!¬îGV·Ù‰°ºu/Âêöí¯¿›Jšû}æ³i«›Eu¬d÷] ÈŸœzõ×S3·³qê[¹t†pÁ©)ïkœZ|èÌì߯©µº¬Í®Æ© 76N­B­õ…5PMåÓ‡E5*qÁ¢zTéªjk°¨n%ZTo(ÒETkS+åÚPu\ëêVz@Õ]iT­%oÕ=l¨Zò³ôi¨ZçîZÉUsc«¡êQq®bj¢j*ª>QŽ.Qõ)öý¤ËTKˆ½Tš=A5÷µ¨nç"¨n„Ÿ ú”›¸Ù‘€T‹UWn¤º TÝÉ9XõñU¯"C|²êf'NV­s¥a«ÒÍkÛYuÛ «n[dÕ{ŒâUp­±j•‡3A&Yõ®X¢|vɪ%«–_pÕŽûdÕ…|°êŽ£''$Ø!«¶b‰þ`Õæ?¬WÒYõò‹ }OXݤH¤ÕfV7¼Òi5–Õ¤Õ ‘æ’•’Vcrë´Z/ÎdîVó>Á«UÄÚMGnÄâ@/àÕâBfsB`ÝØ5Ÿ¬Ûê™Àº `Ýn„ÀºbkîU¢ë†¬c ƒ—ÉÅX+v"ë~Ÿ÷‹{ˆÞõª1¨”jDÖ,Y+07A.¡u4®ÛõmfÝ ™µªyÎå6OfÍз1kÆéZ«Ôg|ïØºß Àuotp­[N`k•vu¯i`ëEsÉœgÆá(È@j½(x-!0©59¡5u: Z˪öN‰FƒÖíZÁ¬û圌”æAjÝ`©õ¢eÓã3˜u\„ÛØYã%XKQ©aÖý7¬ÛX«ZrùÜ5`ݰn×^ݼš´º7«–³+ÔU·ª `ÕZ›[4Pu “%~U·UËÑzLw¯6jýŠûAƒTÇk›]¥ R§t‹’êEŸD)îIªã.絪ITã©’S«ø°e0S·}8õ¢G¸ý‚šºÝ95.„”Z‹ºªQÜ(µ†j÷É¥î—JÝ7)µ’ÌÕ›”º=RêöŠI©5®kÆ>¤Ôý™:¥f)5¯µQjvF©Õ¶1/hPêYsYéïH©ñé4HÍO§Aê9>³‰%¤Æ){¨©ùÔ¤–3«ûaRÇ%hZ [³ý~Cjåh”SƒÔí©ÛeRÇ«¹ÝÐZ¹f³É°©{# µ< Q·—AD݈ºu"ê~—@Ôíœ@Ô­WQ·Ë!¢–‹¶[HQÏšpJJDÝ^5¿È†¨[/ ¢n‡ˆº D÷e~^ QÇ}ÝWêõ¢Ž[1?¯†¨ÛCxu{ªDÔ퉨c½™¿6u{‘DÔræ6ïz"êÞDݨû­QÏ ,Ò± !êÖ{ˆ¨®4BÝ^êöð@¨Y¬êY3{9“P·ÑŠ„ºuWj0å‚Öu{<$Ôm !¡^tµ•ŽABÍY´jÎÔœ' F õâÓË×óûm©§ëÇ©îòN;û$Ô»,„Í*£ÕPœLõÑj(ÒH±ÕPììF¨=¼jExs.å>uDÃG5‚P+ä¼íœ ÔZ[ÞVš±jÏ#&¡ž”|džØ7’§cù†9uË«jBêó+$ÄÐQ7nÙuÔ.Óo:ê&£ŽºñGÂéU½œ§›Ìêÿ/îkz-DZ#÷õ+ÞÒ¦Ó’H‘Þ°Þ P;èÝe +BÿÌ ^‰'"ô²:3 ý ØèÊdêê‹"ƒÁ8JNèSrÚx %§mr“Óˆ Eõü[˜™Ïè œI§ž-qã/NO£2ÿ`i1ýØ?‚£é-¦¨sÉoÇ\?t3Å\?à"MêLuý@g’Y\?ÌgÃ]?ú2¥}âùá'dÓP;,jӞĆªé‡Õ­«é‡&“šéG4 ÃTÓ <}š~ì¹Yi¦Vò®¦1És‚–š~ø‘fú±oÄ„¨é‡ùrý(pHþL]?ÀŸ±1³¸~P©d죮(|!æM]?À–‡²º~Ô®“_ˆ¸~h…й~Ä×RÔ™ëf¶ŒR×óCP×\ùd„Ôóï†=?ÌA¬j_Ý>bxÛˆ~Q·{*êöa{Œêö#e¢ÔíC+ÏÕìÃÌ ÜìãäÌ-5ûð#Åí`˜¬Io·˜(‹ nÞ(nÑȦõêö OŽÆêöa—¢nPÉÕËÜ>¢‘òÌíÃLÔíÖ)ix3PN‘ù}øÏŠßÈdrõ¿åÓI!=ê÷g¤Øóû€;Ùïªß‡V‹™ß‡™L¨ßÇ dÒŸIý>.R‘~Ä9òæUóX^Ú÷èÕð#.ˆ²ƒÌðÃïS ?´þþeøa[ójøánbøaAØêøá?ëŽr¤8~øEŠã‡9e¨ã‡ý)96Šéy8~èõ˜ãG¥$sü0&¨ý”æø±q¥¡:~ÀCø¶Þ5¿½E¡§í03ûˆï-«aÜìƒÛU·L– 7ýøQæ¦G27íZe§ÍšCØi»ÿ‹œv/!§ßi«élBOãÍÓ~†;}œø£ì4>_Š3v:–Ÿå¶$6nZ›˜™öG¦ŽÔ´ÔRVÚ¯’YiEÌJ[›[|ð 2V¡áÇtÇ27jí1‹c™.FJû)…•>áÿ’ÖÂÊJÛ¯ )í¿*¤4œ³ÕsZ>bžR#¥ýgÅÚÁEJÛ-¸ÃÅ×'í 'mW©œ´–A?>âu“±´pÒþ³ÂJ‘᮲Òþ³æð¡?+¬´7 +mAXió*1‡¹Ve¥¡1$Çne¥ÕÉÈXi»Vwøˆ• ¢…•6Ke¥ÝDXioVÚŸ°ÒRòn¤´¹î() wí5åØ7)­j>#¥ý!¥í””¶'®žÔ6Z))­ŸZRûbIÕS›´€qÒ6<¨%µy)'mÃ’Ò:`«%µ]¬XRç `„´ÇjG­“ŠÙQ3,0:Ú^£ÒÑjtft´½F¥£ÕÌèhëÄJG›ã’òÑæ¤|´}rûêàìûÜ=~‰‰ñ)Á¿ýó[û?oŽÿ9ô~?dωãïÞy¯¹âcGp¾ÓBíÃ&­£,3–çqÔöASh?çÆ¨_ ·}Ô„p÷Ú6x\ µ}pç\‰ºúg1ήýD¯âCûÉø:µŸÈÕ|l?yÒOôjýdŒßÈ×=¾zü¯ŒõA+˜ ±ÿ~ N"–Dicy»ÆîoŸä,±6C~ œ×Þ™œÎû,Ûï<͂ļZÁ$œ¿qšúûNS;¶˜z,¬–õg6OÓü4¿} ð…{¬¿z¬¤ßb‹÷²¶ EvüÏ8Ãýüߟ?ÿô׿þô믿þô¹ýùøÓ_~׬úöuÕª1š ¼5=\û+öö1æ[~oYPbµgˆC+Û¬C*ËNÛ²¨Q !¹½}WBmò¢· ÉN®ðdñWTC‚ý–L.м˜Ï—Jm¡ìÃ%Éäþba½íÓùmÇ^/Ö;®rP $™ÜE…çHJ „$¥^<xÚ.+oÇ»[h›©¬Ø.®+PÎO½eŽ“k-Cl0K àß±_lSA^Q ¢ÊÚˆÀ/5ú _-6„ö‹ú)#«{VåÆK¥ ¡²IÐÏx|ƒ‡)£Üº¸%É$TÊ"É;R’è?¯‰b^ëm^Z¶•8ºX( ˜wvƒ Aûø{¶äÄß'û ~¤;eû[¯¾¼íbÉY¶JÛ d4Y˜]ð£ïŒ¾¼ /ÏY‚_â,¹;´gö3Ø_}yÛ´£oÌîl«l[– ìN½ú2„*¤*ÛI¬á6´M³´ÄaîW_Þ ùÉê74&½¨ŸéaQ ¶í÷«3û­Ä‹ÍMŽh|Ý}ô“Ët¹Œämÿ=,7˶M¶ÃKWÜ~õâm•ýG4æì¶ÙV¢W·ã_½x[„O‹õ˜#ŒY?À ÇÀrï?Ç,“Äø:l¨g­Úâ{¹ºñzˆr© m·D#ôƳ„£óÕA÷“ùMˬ]\†K…Vò‘±°d:s78¹88:i G÷ˆ\Å[êžTj \øXjË<ècþp3(&¢è×*Â=4Ç="¯î³ä<:4íü¬U*Æây¢ôêÅh$¯ˆãi;[3äÂQv$ÕÝC26u(©¹=½©å*Ú®÷Œ½"’¹—Uë&‚ì‚¢×í’ñøÈЯ yå=$c¦¢Û2L´ï!yÝÅ´ªÀ–ynI­»˜Ü”!½‡d¼*d-;ïs®»XÃP¾û5$c2¡BÆò¢ØïÎ\XŽ\ ­h÷ŒS_Ã1 ç©™Û^ i¹†i*¡Jã=ŒµH) ãÙß}y{\qî­£¦t–ó(/ÏkDÆ4KÉÊ…õœh\)<¹´˜å–{DŽÙ»SVqsÆÜ4Z‡Ì} 8ʈ»GäGãIºh¬<Ï0å{D^!öÍýˆ2rÌï9°F§àºÒ¡ÌºGdì”Rž)s·.àÍÙ3$ ½»/¼‰y«†Ñ&0)).Þb J÷À+Åì•ÞHXçŒ;‹•Ñx´{`~ül'¡JYHÆ]úÉæêË v3×éóßCnóDÛNqiåÀYæÀŒâBQ#–rÌ+ÀêÜ’ŠÑýS`±»7GcË} ‹¬Ž¿Ý–Ü‹ ¨cÒ—Ç>jŽ kñ)’çDÊÑÈÃò±§b&š%Ä–£eô ãr¦*6Útòwq@àwËh¼‡Š«1·1ñŠ÷ 3)G<þ ”±uk°9òž m’ãe0d;²M¤ d¼¥ø'f"R^ø‡wB¸:‡eîÜx.' yðPÒk9!乇åXŒ•Ü5¶ž;m‚E[¡p¶øP]y÷dLééÆÜ\Ųá*µ,'6íïQyµyF4¤MûhÜH“öÃP•pÆxž{Æ)TLmãyRh%‰’ñn)E3.Ÿd Þ+0¯O”Œw»fhWÀ[i0ŽÞ±ÞÂÀº"LæŒñ4H8ŒÆ]=Ž<Š¡1zâŒr‰ÇŸ>¼ªÔ Pó©L”ŒÉ>Æ Ú÷_Åêg¢äxç–9H÷¶^Tæ²U,A&H¶7µ±zЧ#·+w#¿Ó8%` Fˆi“ƒ\ÖZ¢m£ ÄŠ…è„È~ ä©s(†Ü-]‹ëÆòÔwωi’Ëñõ’Ƨ¤*ø JÞý7¹®X¿O„¼`‰ºÌI3>Ì@Z5;r šSÀ=ˆ4iŸ…&mÅ J’ò©PL„ Ї µ*T'!c¬8gN…èdd´ÍìÅ:$'s ö²ä”ÎQ!9¹áq´Å<;K£*¶Þ&†ªH‰Xe’€$ò‰[EæùÕ­WAr2ñ1NÙÓ•¨Br2ññ2 ÉÔ£THN&>F#•÷THN&>Æõ´t_ªœL|ìç܈æx ¤’~ÀuT¢ÎyÇ70—Õá/˜îsñYQ¹–øÕWiXa¸4ñ1¸JJ'EcŠrü>áò5ñq4îk®Øj,çRì…û$Ž©ŽÒ¾š]yÏœå wµ„ÇHgL×w4ž Aê¥_|Ý©XË{¶l'<öÞ%û4¸â‚*r'øêv¯ðž|5‘Ù¿8_ñ+©Àb¾úÀ 8Í𔯎ÆJÆ;ÆWb¥ð ã«cd¥ã«Wpµ)ÃUÂ#R!Ú™ kð -}•°›yfÙ³Ö°ƒ\ŽT¯ a úðÈÑT kç…°Æ2›BX»^]ëXõ´Ç¢Œõ 2ùM)kg䄲OÑ* ¹‰²6»8¥¬]Êf[ I•³¶%ð$­ÕðÇHë,´7ÂZd”°Æò&^Ô›3am† JXÛ‚A k¬âvR§ amÁBÊXÛ4¬Œµe¬mfTÆÓÂB:ua¬mÎPÆÚ—2Ö:ˆcI, ÷\â c £³õÞ0W¾dà%ìQ®Úfiåª-èY¹jœ-ÞÎÔ|1WmyäÊU[2rÕH#jij©\5rŒ`×}÷Õ‹«¶`$åª-‹I¹jd˜—Tv(WmAÞÂU[»rÕ«­Lµ%Õ*Smy0ÊT#ÚFtÚÌT[.2Õ´¢LµY³*Síç¦Ú*•©¶<eª-1G™jó W¦Ñ8¼¹(LµY8+Smá ÊT[ˆ‹0Õ¨[hãA˜jKmP¦Ú¯G˜jIA™<µ×1 OmvôÊS?®„éjËv¾Úï]øj”Nñ©ðÕæá­„µÙt+a'#zr&¬÷1Ææ¾›Öf ®„µÙ‘+a øö%Â÷y¤…™ÖF&¬½¸Lk|iåK„õ6³BXõIÎ…°ÞGGš $%¬Gam}£¹^ëwÉŽ .2 f„°ÞUÀ_Œµÿ­0Ö1>iîcŒõ‚ÞIì¸0ÖqU=-(…²®pÿI:J)k4òþ»RÖ»a”µ?¡¬ãNë e=Æè5KË„²®CĦ˜X(ëEçBYÇ‘R ”u=Yªc”5ü\·t+RÊG²](ëzš›)ë …'sËLY?®–)ë ­*í2eý¸Z¢¬íé)eëÙ÷9oeÿ²¤#•RÖv›“²®c„JÒV(k”uïNYëg®œujÜÔZ1gmÏMYk0%!¶òÖö(qýøY&®×ÃÄ5+sÓL\û9…¸Æ[&‘¿×ýêèï‹«ý‚„¸FãÊ”7×ð5¾–‡¯F&®ãH®®PâOˆV¨J\?ŽdâÚ{‰׃ŒÊ!%®õsW⺞R\£Ä5n“@J\ûÅ^ĵ¿+%®Q”N‰J\Ç‘\!¤Äµ÷I&®ë!E)J\?.“‰k€àN¤”P×F¦®Ï©k\,Iœ”º©X™fêÚ¯V¸k¿ á®Ñ¸Ò>‚p׸Z*ÛRîÚŸ×~ŸB^?î“Éktž=7 •¼ÆÕF¼Á‹’×#‰½®‡T'){•dgÊ^ûm {ýøÙ‹½®(èsy£ì5Þ$Åc+}íC„Ð×Ãq6/…¾^¤¢Iéë:H´¹ºQú¯MšWÿúZ½”½ö)MØëzH…²×þ…½öž.ìµMëÊ^ãœí6 {íïJØkïÂ^×CjД½†‘ý±ÍTe¯ýëöºŽš¯Yÿ­ì5n¥Ðv³×x'\‡$ìµ?a¯ý¹ { hs& 6ØkíŽßË^/“½^ãß uÝ!Ü»4YêÚµ¤j¢I$j¢1;Ñ uí*µ1Y‹ÚèV¯Ù`åŸC‚R×N¾ uí<¢P×IôL”º¶ ¥®‚êÚ¶´”ºŽF$ØÌûêÚ‚~”ºvVL¨k4î¤|êåb3BÔµE)u fÅèB]?™ºv~瑩•ïâ¿÷%7oêzW¡®íU(u=LÎ{â7êZ·º\¿eýàƒºæàu-öN]W*7pæ:à™ԭ1×Ê? s}€R™U€Æ\«N˜kû&•¹6–r×x®,7PþÚdMÊ_›ÎJùk ´8þÚ´fÊ_«ÒÆøk“°)­ÒŸ©µ6­ j­cxB*Ô͇ªÖÚEk­qeÊ_³êÉ”Öþ›]=»E‡¨üµþèñe#xc¯Õ]ÚtÖ´¥k{jÊ\«´1×v» °üR‰kQ­o}`7è ““$,Z1Úø0¦õ› SÚÚöµ”¶";(¡­–@ÓhkU m x–xÚhk?ò&®U?eĵ]‹×&±TâÚ–‰k•q}@µ’Õ-J\û‘B\›ˆR‰k6B\à (Ÿ×Vn¥(qm[›J\Ù¥Æ^‰k?Rˆk8•¸VצAUâÚä´J\3¥tµQâÚ…¸Vû}%®MM«Äµ«†—/çs­úÅÔZ³¥ûCk­²hÓZ¡ÉMk­W¢bkö‚ˆ­UÁmbkŽ_xˆ­õH[‹ö›‰k‹RâÚFHá­Û0ájs¤Þš‡rá¬1”WÚ+8-ÍGÆyá¬]ž/œ5&@‚#ª²¶ BUÖ2‘©ÆÚéi¹œ¢¡k>˜ÆÚ4ÁJX› yÕl!^óÞtµE2(]ýhdºÚÞ¼ÒÕö´]b-Z`Y³³ãSf½µ,­w5C8ÓY›8ÙuÖ<ØÝ–” Þ@ö’òi&¨AãR¼­ÔÛ"C“Ônõ+õzpÁŠðÓnæ+üôU_0Õm£"x¡§aäÊÞÚ7=½•ä A L¹fî¯2Ô°›Üç$¡ µ{"2C GÚ}V†Ž’íõ³wc2Ôn^( µÙ *C s´4¯üt4uñÁ`~Úl• ŽOwg}¼Ôc1CÏUjswR‚z§î(5~ö …¨Ô ”d,õ—4€BP£‘ò‹• ^?L„Ô ÆÒc1Am™&¨ã)Æmk&õ2ðÍÌ@dÀÊý+ÀêÍ•%‰BP÷³«=Ôý œbBP£qá/"¨ñ«æÝ»\)j7¸¾#°Æqs&¨Ñ¸ó–SÔ£1>×tÌ<õ¸Vv½bžz¼}¦Í(O=.–˜§FãÎJXæ©Ç­°¢yj4öPgžzÜçIÆ'ÌS Úȇyê>ˆBÖmO=γRÊÄS£1^W–à0OŸíG¯Ó%™yêñ³l†Î<5ŽŒŸÁ÷þp¬~´1O=NI¦¦ÂSwÍÖåÌS£1m™(¯P¿øÛ­IË<5w­3OÝÏUü˘¦Æì{F,õ¸Æ˜:¦í³ÔÏFb©qÂÊ.ØDR÷ÁiÊ$5¨3KFñ[c–:¾Íh¤©Œiêq=¢½&šú½ÆIU?‰ªÆ9O¤ U=®–•âLU£1À×dœ‰©?J²[aªÇ]®l3B\õ8W}«k˜«îãaцqÕã@Ò­ WÆÊÒ?æªÇ}ˆÚ™Èj4n¤ç¾ÙjüâÆâ,f«G#Ëû˜­oC”ÎÄV¯†J˜­öN'lµ¿Fa«½[ÆËiÒ“­ÆÏ6Ï2[¿ÓŸ¹êÑôªm;/3UÆ£‘“SÕçÊTõ8’qLU?^SÕãH–Ä1UÆx;YCÎT5Oó´žT5¹‚W¨ê~RûËT5\Óñ›‰êÑÔ¨h‰êG/g¢Gv^ §¸*P¢zÜ»]QÝGÈùO0Q#cm>ÕãNr¼`¢zHfŒBTã”ì·(Dõóg‰¨Æ‘1h¤åÕ#™¨o±owþˆðÔãá‘e¿ðÔnÎ<5KZV‡JVþBQÓ²Ø`Š;;ò3Em=Š êÇ›b‚z\ÎI~ÄO?> "¨ÇçÖÉb‰ j6˜ž¯i'¦§ýb.vúy ÑÓhŒïe–ô ==Ž\Øx„èégcÔãW9À ê@hŸâM#õh¦©Ç@æüÌR?>cb©q#Ì# K·¾ÎD Ï<ºŒ…¥~¼“V<•çûXêßéOçÛmçÉU/X(ìS-¬\5Ž$¹¨È©Í8WÄE"4WôìE‹WŒ•šW‹W7`WŒU¹vH¼bÀ1ö¼ùúxÅõ §Öë¡2}!¬×ÓdÈLX¯§Šô„°^‘ás&;$„õzªÄQkÈ­ôýaí¹tBX¯»:Bao"^Wê¿™°FÏ&¦«‘JHŠ­%bÑÒ÷$bÑO(‹] 4bÑRä4bÑRòîˆE{j±ÝŸsm5bÑÏ&‹ç;»LsÄ"ÆOâv4b1¾0¦F4bqU»X&#]X\µ.]#±$~K#­…׈EËòÒˆE»OXÄ0sKµ%^q‘ÊP‹W´ø4W\¤hÖâ-uY5‚ŽØß^dÕ¾Q&²jäRML-¢jä1à,ªKpœÉý‰¨ÚÂÊDSm‘LSS½Œj7 PLM5"©vQ5ÕUê´Ï)šj ³M5øŠŒ‰DSn„·USíY}¢©Ô •ƒjª£q'Ô¬šj £‹Û{ƒ5Õ¤’j ÞRI5’Ž8@ÃtÒÚJ$Õ¥’jäKEg÷"©FÔ ›…i¸"–íTá$’êâr˜×lEÄn)ÙŠ0¨ZÓ+C²cŽf§tQT[J’*ª‘0Ô &‰¢Zwƒ_zê8ÕÁQ¢§~œŠõÔqdC'¹÷NEOm¹R¢§F:ÐN %¢§F# 34Z±"ý&?FVÔ,#ÕSƒ˜d8«ÑŠðß"'%VŒ{ælQT£ß°ÿ¾(ªÄzYË(ªj4bšƒœ¨ªÌÈkVyi´b<÷•æHÍVŒahëùhÊVŒÙ4²ánDOš­Ã=ÍVô«ålÅŠçLhÉVŒþHlźnsŽ´lŸ“>ãh2ZQ_¤E+껲hÅŽ)`I±2+«ñ³£évà&e5ú+%t¨²Ús°4a MÚIYÂb¼GZIZÂ"B˜3eÁÁ²íDãJÄ¢7JÈbÁ¨³¾oGú«WKYÄ ‘ÈÌRãg)¨ÃRµ¿ZÊbÁ§7 -e¸---e«×Ì?·”ŘßY¨)‹pÝÜ)ö‘Sã>Ïä24eÑN©)‹XT¤3–¥,nx@I>Ý)‹ ý2áÁb­ hÎb'SËY„…[ »4gÑ/S‚7Éó² Åø:ãAÌî¡I‹@ѤþդŘû)àÁ’W&,hÑ~UƒíW5hÑ~UríñhÎâ‚?•œš³ˆVÈjÎ"6í2CÊr1I“óˆä,ªÎÈ’Ôžj–´ˆí7ÏJÒ¢}<š´¸"b-kZ4j1Yæ­Q‹ÈâVÑõ+jQç‹Z´¡N£80,^vÚ’´hŸª$-ª˜Ì’íkÒbÁôšä»-põ}i5FÊÿ± Åø¦Ïtܳ Å"æÂ´hïXƒ­whÐ"†'²žÑ E$,X– E°YÐâ „8Eò𳧤ŠGËYŒéË´¸þ¦É’-z#-byL›Å´úŒv™5hqÅלõ´hOö Z$ ïóùy„-F‹«{Yy4|!¯‡‰ÿì/ªÿàõŸÿóÿ‡!w}¾‰Œ{û’ŒûÝ‹ù.ZþOŠüÌ·ý'½îôõŸKö‡8;pü<ÿõ‡½‚ûõÎ7ý±÷Ÿ¯üõŸ=7]>êéßç¿þð¡Wp÷óÙåß9ûë÷~þ«ˆ1.Û€oñP÷ {“ÞpAÿòãå4ô‡õí{\óÛŸßþã–|ûÏ·ÿý‡ýñ+Oƒ*6Cf±Ûß8Oœå>Q\×·Ÿj_ ½Æ"®ô¯?UÕSýÓ¿­oØŠÑùÇ?~Ó=Æ’(ÐòÛþªê1cÅjÎOsúóÛ?¿ý9þÿOz¯6r}ÌpiÃÅÿ½s@ÃGMK£çþ?®‚Ú>hÄDòŇúÕpÛGßàn_bŸÇÕPÛwÎB¸îCžÅ8»ö½Ší'ãëÔ~"Wó±ýä5VH?Ñ«yô“1t×O[,¿rè>ë± ƒXœ”˜2@8µÇR´<§ŒåmÿÎYÐN3He”(´öÎÌtÞ§Ù~ïyDcWd)•ó7ÎSçyj‡ÓýØûZßyló<ÍÏó· 2ÎXÆ Ž…^MÌí ŽìøŸq†ÿúù¿?þé¯ýé×_ýéó_þü—?ýåwͬo_WÀjÌ’Õ`clë$í9¥@sÜ@êì’¦ŠdG äÜz؆^qn ìÛF5ì[I™kúv¨5RΫiòµŽÏ)Õ2½wIÀØ!™²t„§MÛ—–$®:5/y»ÄïìÐJLuùØT¼7öRR¸Ò!`¢@¢½Ô¨tÄÎ/I7íPúL%82‡z 0âKIoÐeBRüwJI§ÅG’i{t‚)G‚¤¸Èã½n©éðß+ËÍaD·JÍGoEèØ½¶”ut$ë’Õ^{*7: ÝsãrÇ–ÿÔH#ìÒå šà–A#›jà÷}K.u‰£n2w Y>b¡‰3Ú‡-ä=c“:)šw{k’Û¨xœê‚}?©>ÜIÚ¤íH%šÂb¼{Ú\ÝÇ9ní0~eK}à>r©ïnC?§0ìØÓž:`ìÑfÔôÞ:UÃÁ ŒŠþ¤SÁh|†÷ÎAÌTÓÊCÛûJukxT”p²÷B¥iH\ÃsßIß© /мéöި̬ ®i:åìÄŸ¸ ¾¦Œ`ÇŽî”Ãâg3ŒjÇží¼â,Ç,3‹Ñ‹J¾`rAV‘ûQ©ª ÚˆxJ³‡ · ©XÊ ¢Ø‡Þtöcˆ%rS{?N*¿òG{®Ya…wræ†÷ŽŠ«9éãùQw­À‘ÄEµ»;6,'žDŠMÉ=•{’w½f.аˆ·K%M"¤lœ6o–-!ü‚Äþ »³2 Ƭ=x ôÇRË¥¾ŠQuhYª­iCÅ0;ò *x2ü 9ï£þò’¬7TÏÁüòöú’_•ú 7¦æ–Kæ[Ç«ÈTWs8¶G× ñšÃ1šèÚzPñ øÜTú·m¡ò¸úQQpÌ÷TAÕ%÷üP ¢•HÕbžËRøÈRíjƒžqÇxY$Ò°ë4Gãª+I6–r8^¥À*Æû¬JƉBŽ¢§PéÉPGeQc|£ºÙ@„b‚ 4Ötæj¥gHTûÜÈkÏÎñØFª\VzàJzn6l¶ä€l§Ä¦Ê¡'&QWÃÆÉ¡Ó"ÄV;Õ^ì£jsªŒìö爌䪓iHƒ™#2(û4~hûFu謤³mÌjÔ“;9ú7XÔ“W™µ 7{rÁ~Ðä÷Ûð±½{rŸàÔx7L}s@† Æ7 ]9"~£ŒŽ3FV@YG•­íTH™_t¹Ù±Z£Z|”)”hí jìÜl¹Ü€à怌wBÅ©­¯)êß™nº C#•¼UÎÔç&Ö÷Ióo‡ó¿¥È7ð½b‡ìõ-;øFˆ&i†|ë¦fX±7„e¤ UìX£‘a½`oä ööè5ÁÞÇØôœÚÞ¸ó)Ð{é D ÷€Ó¤%àú>¶ºð É04%3ø†xóÞYðݳR²3+øŽ¹xí©Äð=6ìÈ›IÁ÷YÃvFßæF èuC‚°}Žd@ŸÂo`* ¿¤ TøÝÑòm^èš;2ŒUôÝ1 eíŽÀï!IDÒñ õ~w$’åתø»ï"Vü˜TÒõþÞ$ìKñ÷gèb€õiê퀷D§6Ú8ò²NËø!6 ÀcP83EиÎéÎ0Úñ÷ÂëlÇßý¼aÁïÓý¬»xÂoò6øX•—à ¿ÇÇ3ýF ~C95о1ÆïÏå› oäÖNëWßÈ>³Ðãß¼$gŒ'ø>RE¤àÛNeØ{‹Îq‹ˆØ{;ó=öîRãóÀÞ õ¾ |“œü ¾É‰ä¿¹|Oáwðv>Ñü–©Uá7ðìË À7!7 €[wU>.}Æo>ø¾L¨øÀßr±À‘DlÀuŽPŽR¼,…SŽI©Óz ¿¾ òúûÞœ¡o¬} ñ©{ ñUpŠÄUy ÅEäq ÅÙ:òÅ!RY2çV¡x a¹Š·Cl4‹þï4{  Ù3jF±xkâ± XqŠh@ÁâøÐˆí(ØI%z Å1„ØJZž 8V %o+¯Cy:™âX«ÄReÎÅQÃ⤠Š£Œ“]„Š£æ´—ÌÛ(ŽÑ•’LŠ£Ê7¦à÷™ð!àoÉL\X|¨ò(M°ø1ô ´Œ#,‹Ë-™[)XÜTŽÅGùïX0ExôÅ ÇZ þ‡wÍŒBq+ëW(;†“ ÅG)Åx ‹C2šQ(Þ§™v¦ ÅÍ„@¡8Lî(a\ øe’±bñciÅâ°Lؘµf4Ô¿5w®ïDŠ)R4Þ!9OFGÑxüK6BU4~âêb>¯1DàøYuyÀp¼oRª©x¼£€&çê û…7#ÁãØb!ÚIyG Ó1Ë(Ç)ãnöYà8LNB~Ç£srQœâñŽ:­,4R<޵3%”(E|ÝÊ ¢R<Ž»ÝÅϊƱø§‘§¢q˜=ÐâGÑ8æ%ê6ŠÆñ sŸPÁxß$‹UÁx|*;Ñ Æ1O’Í´‚ñ/ÊxP0Ž'E•ŠÅm›M±x‡ÁIº(‰n:‘,cql£¶ÜfS,¤Cža7Çó¦oã­ÈT¯dø)Eµ Æ£SÅÝLt¡`|”V¦Ï©qÙ—U,Ž=äX8Ü bq¿Áâ½dqÌ‹Ãó;:μXÁâH]¤)Y±8öYKn¾)ï«dYö *X<Þc'§Åâ}‘ QÅâÑ•ÙÀ°8–Èif¡XSA>Åâ@bå•?Ë`÷ÙÓCÚØpÔ §‹‚q0D”:"`¼U)p30ŽQ2·°o˜®×ª|Dè§"qôêÛø!H®¼6MUŒ‚r&‚Âa&q¤Ñ‘¢ðm˜¼Í¬&EáÛ!Ì¢pÔ‚m9Å( w¨((|=%–PQøÑÜoU ¨¦“ñ–ñáZí«|)ŠyEŒbŽÀ*Fí1±ƇÇ]néÕ¯b”½0Kq’ɇ›-“ñáE¬òáñ2ÖèpÄ#d&±ÒáN• > Q.¸0ÙðU¢jŒ ÇöÃ>Í• ‰üò ×ñÞØp˜ä®°²áàÑŒ _ÅÑâÁ†ŸŒ>• ‡¡DzðÞQ z\îyF†CC_‡ ðaÿ ¤‘áä}>0Τ:Œ ߥ®ßØð"eQƆWý>CÿDÁtƇ›ŽI8t#×µø6fÒ¨ð˜Sc0™"W¢Âê·s‹rá1 R+)®”æM†7ñûVðml¯’áH`8f8¼±ápÒÏì\ßñÀ¸dô¡D‰Ád’ƆWqe6:<þP²²Üèð"æf:œý¾~ïòu25ʱ¦jÎèp;§ÒáU|«j”sYŸÜ=¯2” ß>1¨bÛNW2|Sc#ÃÎS2|bÃÈð*Žà2¼P"»’á]Ìä ‡­FÎõ“ 7öYÙðUb• _Å—ÙØð"f߯†Ã·#M €/âXkl¸ñËʆc¤Jì`l863aÛØðCÜ) ߆pBeFà #ðG£ÉQ¤ï<ØðkTôÉ—Eߘ¶ÉHÑwY’{ðоë)t’¢oÈ_È]èðŠo™ä/F…Ë.ƒ o¬1úÆÅF§œ‚¾ë)¦®/* ¥ß{å÷0£ôžþzc« ½0~Ç'Ã…Õ ¿WÄڵ陣ð{é²1-8¶½H•,Zp`ªÇ*,§ Õ‚c²'¨Rð]\ˆL cfˈƒ›fYÄà ÈGÂÄàñ³K Ì£`œÒílcËh‰çzY›|x¿Oó'ÅßÐÊÅ¡øƒA!VQðwÅ‚©M3શä+€ãWÉY8Àd¤n^‡_ô4±SG»XaÍe#pø"ñ•"ð:Fâ¹W(:ÓGx4F' % #pØ-á<Ž~œw"¼îâ:©o†ü¨‚Õœx‚Ç­d„ª¼"–fJ^ãelgÊœÇÅÆ_ärA8Þ <¿ÊMV3¯ƒ%± #ð ±ì’;S‚Àá1Ù\Åßu ` o„£‘Ø!áøzJ^„×"‘´Âã+ãŒJáñ«Ûšhâáµpd®bðºqÔ®@ð k‹4ÉPïž…a Á‡I ÁqäÚ¡<Ö€)Çèãêý–‚Ãþ„–_ Áá¡I)æ¦HY9Ç^8ž6ÅU)÷ÛŽF‡¯›Ä´*w"¼®B2(‡gc‚œ18žA'ˆ*¼ÂY•V7‚Á1žáÜ÷)Ʀ{ ,ƒ£‘|`ƒW]¦u¼bpÔ…“´ìÆàÃn‡ÄC‚Áa^Æàñ{p'¼9Åàñ‡ºfI˜bp@ÛÔúÁñ¡R”ªbpül|ÿsOF0ø£‘1¸?SÅàÖwƒÇÔÞh6W åÌPÅàñ´T…Ý+FÁà6è(‡{m! Ç; ª…á¸Ú>^eMû ŠÂv46¾Iö©‚pÜd‡µ|{„û+×JጂÁ Àû–K8ÅàØD$ÌÕ‹ã;¹× dÕnHüLk±wª2™’xTe{¬48ÖÀéf4xtDQª0AL|ŽÕdbU•"EÅáÒÄHO©5™¶?-@¼/büª@< Û,+oÃ+7éˆÃ” #«©µ*s´GU&»Ïï¨ËI\ø"m*ùµ*s‘”CÅá(ÚѸTeâ¹§©•ebèhïáÆ/YYæˆE˺ ˆ[yq(j™Ñf nü‰q#¬ˆŸ±S+·e†àp˜…mé‘§8|_$ÓÇpø)9†ÃO-cPêuÉÅŸâð5üÇ»L8VeŠZÃá("McFÃá°—M=²áðSEŠÃ«dàǧ´Ó‘†Ãc9OBAâèæYR @<]Íp¯_Ȟр¸­ªˆ÷Ü+T>€U.5/¾é¢ð"ÁêÃv w¤-05w™Ãø€áì †˜[N†XQø´D¢qCá1FïëÂ`ÚP8–Ú÷·o0^Yî¯0hiIãkƒáуf‚plweZ¢p  ÂãñTšâTáž©ÓÝ@ø‚sÌšá kƒCWGr‚Á£‘S˃ƒSØ3ÂÀ1¸ÂlÆàÁ. Žc¶i¥°šXÁc²Ë”à~ýÀ+xnªU¾I^½p|9d‚ùàÒk€¯Ä{/0nLMpÿ:”·RIp!žÓìÁxL¡‘ýÀ; ;ï\ÿ¡D8R*©ŒC‰p(`³PI!øŸ3/Cˆ7‘ ap[m)‡;G–¾}ºpöS†Rñ}f pë%W7¦nþÿ~øÿçLæ> endstream endobj 3 0 obj 238799 endobj 4 0 obj [2 0 R] endobj 5 0 obj << /Resources 6 0 R /Type /Page /MediaBox [0 0 1469 828] /CropBox [0 0 1469 828] /BleedBox [0 0 1469 828] /TrimBox [0 0 1469 828] /Parent 7 0 R /Contents 4 0 R >> endobj 8 0 obj << /Type /Font /Subtype /Type1 /BaseFont /Helvetica /Encoding /WinAnsiEncoding >> endobj 7 0 obj << /Type /Pages /Count 1 /Kids [5 0 R ] >> endobj 9 0 obj << /Type /Catalog /Pages 7 0 R /Lang (x-unknown) >> endobj 6 0 obj << /Font << /F1 8 0 R >> /ProcSet [/PDF /ImageB /ImageC /Text] >> endobj xref 0 10 0000000000 65535 f 0000000015 00000 n 0000000145 00000 n 0000239018 00000 n 0000239040 00000 n 0000239063 00000 n 0000239493 00000 n 0000239362 00000 n 0000239257 00000 n 0000239420 00000 n trailer << /Root 9 0 R /Info 1 0 R /ID [<8F02FA314207DF7239B245A4CD6BC31C> <8F02FA314207DF7239B245A4CD6BC31C>] /Size 10 >> startxref 239574 %%EOF blis-0.6.1/docs/graphs/sup/dgemm_rrr_kbl_nt1.png000066400000000000000000006407631360743507500216220ustar00rootroot00000000000000‰PNG  IHDRâÜJ&¡ &iCCPiccH‰••gP“YÇïó<é…@B‡PC‘*%€”Z(Ò«¨@èPElˆ¸+Šˆ4EE\•"kE ‹‚tƒ,ʺqQAYpß÷?¼ÿ™{ÏoþsæÞsÏùp ˆƒeÁË{bRºÀÛÉŽÌß(ŒŸ–ÂñôtßÕ»­Ä{ºßÏù®‘iü常¼rù)‚t ìeÖÌJOYá£ËLÿÂgWX°\à2ßXáèyìKο,ú’ãëÍ]~ )úÿ†ÿsïŠT8‚ôبÈl¦OrTzV˜ ’™¶Ò —Ëô$GÅ&D~Sðÿ•ü¥Gf§¯DnrÊ&AltL:ó5204_gñÆëK!FÿÏgE_½äzØs û¾zá•tî@úÑWOm¹¯”|:îð3™ÿz¨• €è@(U  t0–À8à|AØø $ȹ`(E`8ª@-hM œà<¸®ƒÛà.L‚—@Þ‚°¢A2¤é@F²† 7È ‚B¡h( Ê€r¡PT UAuPô tºÝ„¡‡Ð84ý }„˜ÓaXÖ‡Ù0v…}áõp4œ çÀùð^¸®‡OÂðø6< á—ð"Â@”]„p$‰BÈV¤)Gê‘V¤éCî!Bdù€Â h(&Je‰rFù¡ø¨TÔVT1ª uÕêEÝC£D¨Ïh2Z­ƒ¶@óÐèhtº]ŽnD·£¯¡‡Ñ“èw †aaÌ0Θ Lf3¦sÓ†¹ŒÄL`æ°X¬ Vk…õÀ†aÓ±ØJìIì%ìvûGÄ)áŒpޏ`\.WŽkÆ]Ä á¦p xq¼:ÞïÀo—àðÝø;øIüA‚À"X| q„„ B+áaŒð†H$ª͉^ÄXâvbññqœøD%i“¸¤Ri/é8é2é!é ™LÖ Û’ƒÉéä½ä&òUòSò{1š˜žO,Bl›XµX‡ØØ+ ž¢NáP6Pr(å”3”;”Yq¼¸†8WJ)Hq¤"¥öHµJ IÍKËIÛJGJJ·IK”aÊ8ÈÄËì—é”y"‹’Õ–õ’Í’="{MvVŽ.g)Ç—+”;-÷H–×–÷–ß,L¾_~NAQÁI!E¡RáªÂ¬"CÑV1N±Lñ¢âŒMÉZ)V©Lé’Ò ¦$“ÃL`V0{™"eyegå å:åå–ŠŸJžJ›ÊU‚*[5JµLµGU¤¦¤æ®–«Ö¢öH¯ÎVQ?¤Þ§>¯ÁÒÐØ­Ñ©1Í’fñX9¬Ö˜&YÓF3U³^ó¾F‹­¯uXë®6¬m¢£]­}GÖ1Õ‰Õ9¬3¸ ½Ê|UÒªúU£º$]Žn¦n‹î¸CÏM/O¯Sš~°þ~ý>ýÏ&   ©†.†y†Ý†iñªî¯&¯v\½mu×êׯ:Æ‘ÆGŒ˜ÐLÜMv›ô˜|253˜¶šÎ˜©™…šÕ˜²élOv1û†9ÚÜÎ|›ùyó¦é§-þ²ÔµŒ·l¶œ^ÃZ¹¦aÍ„•ŠU˜U•Кij}ÔZh£lfSoóÌVÕ6¶ÑvУʼnãœä¼²3°صÛÍs-¸[¸—í{'ûBûªƒŸC•ÃSGÇhÇG‘“‰Óf§ËÎhgWçýΣ<Ÿ×Ĺ˜¹lqéu%¹ú¸V¹>sÓv¸u»Ãî.îÜÇÖª¯MZÛé—ˆÿ2Â6¢,b&Ò*²4r*Ê*ª4j:Ú*ú@ôLŒMLyÌl,7¶*öuœs\mÜ|¼Güñø¥„€„¶D\bhâ¹$jR|Ro²brvò`ŠNJAŠ0Õ"õ`ªHà*hLƒÒÖ§u¥Ó—?Åþ ÍŒ]ã™Ö™Õ™ï³ü³ÎdKd'e÷oÒÞ´gÓTŽcÎO›Q›ù›{r•swäŽoál©Û m ßÚ³Mu[þ¶ÉíNÛOì ìˆßñ[žA^iÞÛ;»óò·çOìrÚÕR V (Ým¹»öÔ±? ìY½§rÏçˆÂ[EEåE‹Åüâ[?þXñãÒÞ¨½%¦%Göaö%íÙo³ÿD©DiNéÄ÷e̲²·7¼Yn\^{ˆp(ã°Â­¢«R­r_åbULÕpµ]u[|ÍžšùÇ‡ŽØi­U¨-ªýx4öèƒ:§ºŽzúòc˜c™Çž7ø7ôýÄþ©©Q¶±¨ñÓñ¤ãÂÞ'z›Ìšššå›KZà–Œ–™“!'ïþlÿsW«nk]£­è8•qêÅ/¡¿Œœv=Ýs†}¦õ¬úÙšvZ{aÔ±©CÔÓ)ì ê<çr®§Û²»ýW½_ŸW>_}AòBÉEÂÅü‹K—r.Í]N¹<{%úÊDÏÆžÇW¯Þïõê¸æzíÆuÇëWû8}—nXÝ8Óâæ¹[ì[·Mowô›ô·ÿfò[û€é@dz;]wÍïv®¼8d3tåžý½ë÷y÷o¯ñy02*|ñ`úaÂÃ×2-<Þ>†+|"þ¤ü©üÓúßµ~oš /ŒÛ÷?óyöx‚?ñò´?'󟓟—O)M5MMŸŸqœ¹ûb݋ɗ)/f þ”ø³æ•櫳ÙþÕ/ M¾¼^ú»øÌ›ãoßöÌyÎ=}—øna¾ð½ÌûØú>|œZÈZÄ.V|ÒúÔýÙõóØRâÒÒ?B,¾“sMT cHRMz&€„ú€èu0ê`:˜pœºQ<bKGDÿÿÿ ½§“ oFFs¸NBIE] pHYsNNÆÊ/¥tIMEã2£ˆø­ vpAg7O£¨u€IDATxÚìyœeÿß¹ÈE†ÔÈ1„"nRc"gD»É r˜¥‡SˆŠÝ¨xu+²žh7º ·KVƒ²»Ò½Fa•.Eî$NrM¦H CB€)2!!Ìï§Ÿªêžž3ÓÓs<ï×k’îꪧŽþÖÓÏó©ï1ª½½½…B¡P( …B¡P( …BQVFWú …B¡P( …B¡P(Š‘À âÂáp¥aDaš&‰DÓ4÷©Û¶I$•>Aƒ²ãÁëº$‰¶¯ì¸eÇCžØ°´ÿ‘‚²ãÁ˾ôÅ#ÉŽ• ,96Iv Ê–šÞÚr©>Y;¢ìxè1XìxP q–eUúF ‰D˲…B^GÝW\×ŶíJŸÒ AÙñা¾Ã0 l_Ùq!ÊŽ‡=±áD"A*•ªô¡ÊŽ/ûÒ$;V6

Š®ë„B!ÇᨣŽêôø]×eÓ¦M¸®K*•BÓ4ïûp‡Ç{ŒË.» ªó®]»:Ýf81í¸+[*vÜ™Mvuü–eñØc‘Éd8ꨣ¼v/»ì²’vüÆo°sçÎaoÃòº—²ã®l”÷¤­ÎÎ :ï#»;·RÛuÖËk˜Íf±m›^xA4èWúËŽØ¢»Ïºko_íx_û⮎¿·}ñH²ã¾Ž)äµlvܶWÊno¿ývÆ7 cãSN9¥ËsëíØx¤Ø±¼öj|<4æyõɧœrʈžãÉë®ÆÇƒ\!?ëÍøÖŽGœëº„Ãa,ËÂq >‡Ãضã8Ô××{J~CC¦izÛ]g<õ¾¡¡ÁkömÏ´§ëu…mÛÄb1¼/.‘Htº¼'m…ÃaÇéö}wmõäÜ,ËÂ0 ÇñTýx<Þåñ˶å÷$÷SŒišÄb14Mëñ6C™¡jÇÝ}×CÁŽ;³¯®Ž_×uïx@Ü º®w8iÇ6lö6 ]Ûqg6,¯‡²ãÎÛêîܺê#÷µ?öÅò;N$¤ÓéJ›[ÙèO; cŠî>뮽}µã}í‹»:þÞöÅ#ÅŽû:¦×}0Úq_l8Ø^)»]½zõ€»;·ÞŽå÷<œíXž£y^Oúä‘6Ç5>.no0+ºÛNRQ;n$$“ÉöH$â½O§ÓíòðÒét{(ò>‹Çãí¡P¨=“É´†á-onnn×4­½½½½=—Ëy¯å6ñxÜ{/Ûîéz]‘ËåÚöÖÖVï}(êtyOÚjnnîÑûîÚêɹÅãñvÃ0Úu]oD"횦µ§Óé._¶-?knn.¸¦¡P¨=N·†Q°}gÛ †ªw÷];î̾º;þx<Þ´횦c±noïÜŽ;³áööveÇÝГsë®_ímÜY_ÜÞÞîýFõäºUúÓŽØ¢»Ïºko_íx_ûâ7}±d¸Ûq_ÆííƒÛŽûbÃÁöJÙí@Ž»;·ÞŽÛÛ‡¿··«ñqq{ƒ}ž'·+î“Gò¯½]‹ÛìãŠÞŽÚŽÇ–Oâë–e‰D¼÷¡PÈ{í8NKc(ò”RMÓ ”J×u½×]¹óééz] …Jº„v¶¼»ã >uèî}œ›ëº477x t&“éòø Ãð>ÓuÃ0¼'-òûÑu½`ûζ ~ßC™¡lÇ]}×CÅŽKÙדO>Ùéñ[–E6›¥¹¹]×1M“††Òv<ÜmX^—RvÜ™ Ëk¥ìxßÏ­«~µ·ýñ“O>YÒ†S©†a +›-EÛñ`St÷YWÇÔv¼/}qWvÜ—¾x$Øq_Æòz f;î‹ Ëc*e·òZ ÄØ8vy½;5>.>žÁ>Ïë¬ON&“#vŽ'¯‹ûÇ3˜Ç]m×Ùø¸«ý”ÃŽMhjo¾ü ñÊXwù—Ëå*}*ý~-º{¿¯èºÞáÆ ^ãžRü½´¶¶t™x¶/ûÌ(;îüZ”ÛŽKÑ}ÉTiÿÑh´ ygOìx¸Ù0ôü»)>weÇýO_íKnWʆ-Ë"•J1jÔ(FÀ¨Q£†]ÂaeÇ_‹ÁfÇ}é‹G‚÷uL#ÏŽ‡êØx$Ø1¨ñqW×b0Îóºê“GêÔ¸¢«k1ØÆÝm7ìxÐq†aÍf½÷ÁÒʺ® È‚ê§T(¥‹Å*}*C™´SZg¹YбmÛÛF¾–j±¼ùÒé4©TÊ‹ïj›á€²ãÊRʾ,XÐéú2@pûR{iÇ›6mö6 Ûqg6 ÊŽû‹¾ÚWg¶_ª/Îår´··{íííÊŽQvÜ_ôÖŽ{Û;÷56 v j|\IúbË]õÉ#uŽj\QIb|¼/ûé ƒ&45’Íf©¯¯GÓ´‚˜Táƒe”Ï kkk=×ôd2YéSrèºN$¡¾¾¾W×QÓ4Âá°×Y—ÚÆ0 ¢Ñ(‰D‚h4Ú£m†2ÊŽ+Koí+cY–÷}uví¥ßvÛmÃÞ†¡s;î̆AÙqÑWûên»`_œÉd*}š‚²ãÊQî¾x¤Øq_Æ ì¸?¨±ñH°cPããJÒ[îIŸ<Òæx Æ•d ÆÇmÇ£Úåã—ABð)ˆÄuÝ‚Š2¦WV’je0¦WÑ{ÇñbÜ»»ŽÒ•>“ÉxñÕÝ=]éË6CeÇϾØWO¯ýH²ay¾àÛqw6,—);î}µ¯‘f—½EÙñÀ2}ñH£/c ¹L]Ï}£Üc㑆WŽÞزD;?_PãŠb ÆÇmǃÆ#NRÊõO–ú•Ælšf:Ù e»Âqœêbz«”öW{ý}\}16MÓzí®Ù—m†ÊŽ+gÇ}±¯Þ^û‘`ÃÐÑŽ»³aZv”=ÊŽÊöû޲塲}eÃåd ÆÇmÇý*Ä­ZµŠ––jjjXºt©·¼­­U«V±mÛ6.\È¢E‹ääо ìX1(eÇʆC Õ+†ÊŽÃ5®P ”+ c¾ûÝï~·?Z±bMMM„B!n¿ývZZZ<#¾æšk8þøãùÎw¾ÃìÙ³‡uܸbè¢ìX1èÌŽ• +† ª/V ”+† j\¡(;V &ú%G\KK ¹\Ž{î¹€Ù³g“ËåhjjbãÆÜzë­€¯8wå®yÝu×qðÁWúÚðâ‹/ò¡}¨¢Ç°mÛ6¶mÛÆ¬Y³*z¯¿þ:S¦LaÊ”)=Ž_|‘+V”¥íþ¶ãåË—+ûÉ3˜ì§Òß À–-[øÁ~P–¶;³ã¾Ø°mÛüïÿþ¯²Ÿ<ƒÅ~ÃqlÛ¶šš.¿üò~o»¿ûb9Ø®´ý ;VÇÑñ8Î?ÿü~Ïß£ÆÆê8ú8z|Ü;5>2XúÁÁbÇCe|üÇ?þ‘U«VqÌ1ÇTú’ ‹ý 6oÞÌĉùö·¿Ýíºý"Ä­Y³†ºº:ZZZظq#³gÏfÙ²eŸI-ZÄõ×_ße{¯¾ú*Ÿüä'+x «W¯fáÂ…=†uëÖ±nݺŠÇ]wÝEMM óçϯèq¬^½ºlm÷··¶¶Vü{SöSÈ`¸§ïǾtfÇ+W®ìµ ·µµTüš)û|DZnÝ:ZZZÊÒv÷Å2¥Òö3XìXGÇã}]¢ÆÆê8ú8ÊEŽ+@ƒ –~p°ØñP¿ú꫃Ž ƒÅ~¹\Ž^x¡Gëö›G\KK W^y%uuu¬]»–åË—³téR¶mÛÆìÙ³½ukjjºm¯µµ•Ûo¿åË—ÜÍ?ÿó?WÝ;­©©)P’ëêê<7ÿºº:ššš¼Ïššš¨ªªªôuR(: ìX1èÌŽ• +† ª/V ”+† j\¡(;V 6úEˆ[´h7nôkÖ¬ñž2Hã–ŸY–ÕmO…¢(;V :³ceÊ¡‚ê‹ÃeÇŠá‚W(†ÊŽƒ~ M­©©!s饗2{öl6nÜÈ¿øEï³Ë/¿œK/½”E‹aYwÜqG¥Ï[¡è€²cÅp 3;V6¬*¨¾X1Pv¬.¨q…b8 ìX1Øè·q2íÆ©««+pé\¶láp˜7²|ùråî©´(;V :³ceÊ¡‚ê‹ÃeÇŠá‚W(†ÊŽƒ‰~â ëÄ—ƒ%)¦BÑÊŽÃÎlUÙ°b¨ úbÅp@Ù±b¸ ÆŠá€²cÅ`¡_rÄ) …B¡P( …B¡P(Š®QBœB1B±mÇq*} …B¡P( …B¡Pô¶Ý»õ‰Â÷–Õùº®»ïǧ„8…bˆbšfÁûba-“H$H¥RXùžÄqLÓĶmøõ¯ï%‘H°xñ•>’Ȱ?:;…B¡P( …B¡P m\ 3Ͷ!‰qõõþü2 ¤RpG~ýXLü%`µù}:Žh7‘óÔ{lñyš]§(!.@*å¿v]Èf+}D žˆ&ihhðD¶l6‹mÛÄb1b±ápØäl;ÎþçV­Ú@,ã£]Í‚çsõÕGpÆfïÞÏ‘LžÏÊ•ÛyõÕm=GÇ…ç ç÷‰,Xð÷ÜÓˆU s …B¡P( …B1øÉf µ•ljk}¡¬¾Š|L<\„Ðeêð…üüÑÊ iµµðÕLŽÂ‰`b©¤ð÷Še‰|»ÊÂiøzVì/æfÑîM0Ûë:' —ºð¥¬8¶¯'„X÷°£ñÒ£k2b…8×-TH-K(›Ò%ѶýÏS©B‘N¡HÃüùóyüñc…\®¿~šæH4ðþû/ö»¸%ï#ñZÜ4¶m3kÖNÄrÓ„Oú>øàNþøÇ÷Y¾üaN:é9þð‡sø·›ÎñǯåÀ/äÍ7¯Ç0 >ûÙ-½ Õ½ªP( …B¡P(ýƒãÀŒZX–×H¤w›|HøÑK±¼¤Ý0mxÊ…à%bF~; œFø‡FáË)¸È†W¸87iÂmS~] ©¼cÆ'¢BLk2 ™ïëB K"„±,€'§á F‡üVˆ D¹LFh?ç¸ð·„øÌ¾ªÁ9øvÕ! äD“躸¡h# ¬?{"¾µG×ql¥¿È&›õCÛÒi_”K&…*:ÓW×…kb<.¶O$Ä¥ë}Û¿ãˆ/YÓ:~fÛ¾ŠcÐ4?l/®ôÕS $±X Û¶©©¹Ç™e%p]×Ü®¹âñ£9çд(sæ¼´qØaÞyçIn»-J6ûðç·˜óÎ[ìµË lmm@×5ž|òIB¡}:Ö{îyŽ)§M•+\¸Ô†Ûn[ϱÇîÏÚµÿú¯âÇ?6ùáÿ›]»¾ÍÚµÇsíµ³gÏoÈåfóøã_£¶Vç‹_üßúÖ´ü1é<ú¨Žx"¼ýî¼óNÀJ‹ûQ èɤ¸‡b1áNÜÐb]Çéû}«P( …B¡P(#‘lVh™ Ø:”ƒß6À®<`Á’œ¯>Þn1`‚m:œd3ðq„HG¬—Íÿe'.Ä1'?¿K&Éo,æR‡©¯úÌmqá¥F~ûàTOC´•^ÑaF.Ò…Êï³ÐY4h"Ü4•ßµA~ƒèºÐ•¤àqmãéyóztMGœר(¾¸hT\´XLmÙ¬ør5M¼N§Åç†!þÀ÷ “ëcYâ3ÃíƒïM§ëbY,&ÞÛ¶0žPÈo_&ý“¢!!õ÷'½–ä6Šá‹eY466²råF~úÓy8N3‡²š'žbY:-ì-•‚PH§¡A§±&ON1y²°åH$B(êv_sæÌáá‡ÿÈ7¾QZˆK$D£Q4MÃ4M6}4¿œy2{ÿ>–ñŸzŸw®>˜†iW½ÊŽ;¸û¿÷°ûéiüéO Ù±ãFÞ{/I6ë²u럹æš÷¸÷Þï’ÍnãüW4]ÇjBðê…ÓÈß"$ÝGP­<òQÞzë̘q-ŸûÜéØöR®¹f7¿ÿý6>ó™'~”H$ÂÂ…ò…/|€aLõò=Ú¶¸×åÓ ÃOod‡®D:…B¡P( …B1Ô‘QTý ¤"0Þ…ÏXðl2:¬JÿšðÛì¯ a+¥ - X:Øqñ^ÉÿI¢àÍý‚3Ö CÑrs:Ó,ÔMJMåâˆ|r»mŸ¿‡ðvK~æ3hÿöoRRÃéÙ…11Žº Óœ@$"æä‡5Žá¤sŸƒÓOïvó%Ä9Žï& ÑKNÂkk…â Šr’p^jM&Å—žÍ íÊer’/9éÕ¢M×û”â€Ì‘e¾0çºþvšæ{Â%¾H¨¾444FÙ»w ¦©“Lêüå/;yì1xíµðÌ3Âve’I)ÞÊ`íÚ½,\8MÓÐJ¹_°#Nœ™|È[æ8'ÅRì÷pÄãh›7Õ_ý*/ìc´¸qÆŽzƒ1Ÿú?8p4ot cªcüeñÖWNdÅ/Fó½e09 §-jëëYø¹ ÑÞ‚§žƒ¦Mäî‡ÿÆë÷ÿÖôé^¥™PÃO†YìùÂX¶nw\S¦GkëSŒ{¿úÕÉdª8÷Ü/ÞÂóÏ?ÊsÏ]Še]“OÌå´Ó~@*u î-Ë‚\?wàˆ(<øàb±946ú÷j8ìß …B¡P( …B1PÄb1Ò=˜ô»@²µ•puu‡½È§îÎ'AÌ‘žŽÂN ÞŠÁQÈ%Á‰Šyã¯Þf†×±xÚ¢SG2#½Ñ¿â¬'ûo¯§‘çY]-&zº.&u–… 6ëpë y!Û†‡'‰ôrïx"§¿…[]‹a­Çuãœ|ò®71¢rÄI±,(Œ†xTS¡tèh.ç{§…B¾8‹ùn¶-ÚL&Eº.BädØ«ðijÉf„BbÙÙgïÌïÓ&ÛÊViWòØÓéîÅ)ä)†&¶mÓÐÐÀQGýA¸ã—]6Ñ‹]FýPL]÷;ÃöÔÖÖÖ£' 2}Z Ø…“NzŽÃ^~™Ã>ø€óÞ{­GßÌÌ#þÊÒë¯gí×nåÑÛogWëaÌÍÂGoœÎ® ±îÏgP¿{7õ‰QqÄ»L¾z#gð?: ƒ«ë,ýãyñßÿI¿ù Yj5 §þ•WÈ":òFÄ““8¢Ã–^È¿7ŽuÇãó '¼ÍêÕŸeùòÎ8ãØ¦Éòå·óƒûÍå8õÔËùýïÏÛoÿæá‡ÿHpҥϱʀ[2í<±î n×aÕM4ž¼“›_ø2ŸþôÎY¬¾Ô¡V( …B¡P(Š.èj®nÛ6f`Y´Àq\×ÅATèLÚ°{yÄ«þiõŽƒÛØèúûgªñ=Ï$²XåZÀ•)H˜Ð|‚ûÓðTRx›…sµ®]V-\ˆ=juãÇc#:ñ§—,áÆº:rÁM§°S7òëø 5A„Ú¦Óqâñg5ƒ¬¦ñÜòåÄòçÊÀCGÌúN:±ßpXôoyç78ùd>8–o…à'øñúRëNåã„èÑuÑO}rˆÑh_WÀ†iÓ˜‘_/uuLq]î<á=š^z‰+‡ø"Oô`¶ýn+{ÇŽeýw/ãÈ”hwïÞFjkÛH&ªoú$[þe:M/¾I*µ‚Í›khiYŽãäË]Ç{tè …B¡P( …Báñ.¾SC©Õ2¿›ã8؆!ænÿò/üá ¹H×YŠð Ë^wétš„apßMüïÙ³xþ‘çøÒM㩺h"·¤àC¸è¢UìyëGœ qg6ÉÁ9x"Ïýá1~þ¡ÆF3Äœ±Ýqع3Ë‘G\.£ë:™L¦û“L¥Ä “èËʆišlxðA®_»¶´T*%<šLÓß„‚iÛBK¥ÄkJ(«q½™Š“¾%ž÷”™Í ~òòÇ?Îz]'$cue¢qéA'=aðhºKÕ#Î#®¯Ø¶Muu5–e‘ÈKØííÂhLÓdË–pÖYã¹å–óhllD×ujkkÑ4l6Ë“OÞÀ¹ç®#žŸÁ‡B!\×%“ÉÇÙ¼ù>n¾ùrr¹›7×píµpÇqH$Üzëõ,Y²ææf®ºjŸþôYlß~7{÷®%ECC†aÉdH&#Lœ¸‰Ï|æ+Ô××{‚^ð\lÛÆ²¬·×D"A"‘ ›Í(ñòµü<•Jy®±’T*E¶”b­è1¦i'¹çžåìÙs^—â©ä@t˜‰üÿmóæ±÷øãq‚—ô«Çæ’†„þ«º?ÁoLF×#.\­ÎɇpËýv'FÅãù>³ÄgFþ¯§Èѱü9€p½^vôÑ iŒ?Q)GŠŒã^{ Í08|ôhrùk2cÇÚgmbçÌ™ì¸ûaÈ{¶Ö$¥÷'&MjâK;w²ùø½|äÛ¯¡»%ßÇ[Œ?c+7ü|/áp¡·´iŠ~]ÞN¶ÝÓ³S( …B¡P(#…ÿ¼ë]F»Çsæ’Þq=ôZ^Wr…Bbξu+Ùl–[×®ås?úiÄ<Äqt]§ººžk®©áçŸÏõ_¯âŸøcǎ囿žÀ‚œ‡¿þu™L†G} "ð-NÐàg?û’7××s·l6K<îòöÛw¾F æÿò³Õýh¡7𠔉ñóžiA/¹L:M$›eûu×aI½Az¶Y–X?ÞfAµ2÷<ОÝo?_ä …üÅ<‰Rn‡¶ ©kO?X,æ…ïÄR)Þ{ã ^‘a®¢ò!»—-£åž{È:–eá¸.ä5¿Y;Ÿ2i9ÿøÇ?zdÃÒ#Î4;¯xñÒK/ÐuÝ tMÓp'*6ÖuD"뺘¦‰®ë„B!4M#‘HÐÖ&|5W®œÏ}÷MF×/DzjÐ4Í[OVšL$â477{ÇT_C¡¹@ìëâÅÿÇý×E\x¡®ëX–Å•W.äÊ+Åç×]7ŸoœÍi§E0M“¹s_"›]B&“Ä4 Òi¨®~™?ÿù¾ýí¸'æ%ó*J*•B×u²Ù¬'ÆãqlÛÆu],Ëò„B¹¬¹¹™T*E(ÂqR©™LƫșJ¥ˆF£žx$ S¾—¢bO*xŽD„àÙ̹ç®cÆŒÉ,X0ØÖa=_ÈÊ"DµD§Ù€È!°éºë¸müxV¥©¥tL¿ôh %ÅëŠ ºñdçÉ3{UU&@ˆÒ]Wä : <êl„@‡¦yׄüù…€š#ŽðòÏÉÏ6<ø {ZZ`ñb ˼…UÞ5©:þ%®;ûl¬ûïx›-ÃÌóÏbúÎ?ÞÂ4«™ô›ßàn>ƒ§ì·ù¸y0‡Û°|ùsÜt“Ëîݧzâd"Q:^¡P( …B¡PŒ\Þ™±ƒ~¹¿Hi…ˆ~ŒFaÉ’­|ã¯ðÒ[o1ãÆÙ|Í58À7Î?Ÿx<Ž®ëžhÖÐãÇ/`ãÆK8è _ðøãÇ ipáúõ¼Wÿ W}ñ,`ÆŒÛÑõo±iÓc$“B¤’s~9ß—ž]Ùl–L&C8Ÿ?‘H …ˆF£¾ødš´67“5M>¹s'k¯¸‚…¿ûøÌqx÷ÿý?~˜Í’ xÏ=ô«_qÓsÏqû’%œÓÔÄ‹‡Êwﺋ-[¶Ý¥¡Ã"&sL¡×Iü1®\¹’/Üp›6mÇ]2kÛ¶mæä´h4ÊSÈ ¡ÎÓO{çã8þóŸÅuˆÇyÿßþ n»M‘ßúi6>û,;³Yžzê)¾ô¥/qõÕW{ÚŠã8Äb1B¡‰D‚–––ÙÁ°âB¡Ò®žW]ušf“L&=1Ì4MÏóK~Q†!D°T>ƒ{.—Ã0 /›0L!*mØ0ÍKŽ/…§xÀ](¥ÛkšFKË=lÝz †a‰D ]&óë46Šã±,‹Häpþýߥ¾>ì%œŸ1£…õëÿº"q‡Ã$“ILÓÄu]R©ñxœd2I}}½'@F"F…¦iX–E&“ñ¶ —ÑhÓ4ó^\¢cŸH$¼c6 ƒD"áUïTB\iäÓ…íÛçsþù¥ räkƒàà'í´¢–,Xc/Ÿ®®î°}ˆBa®Y¡×¶EØi:-~’ô?½âd‚Ð$Â+ÎØ ƒ`=¡PþÚ3fͯ#…¶ÿøGÚöîEŽÝæŽ/ÎXøá4._Î+o¼LÝð.ÿ¸‰ªc«;û\N–eqî5×ÀìÙ4— UÏ(™LòÄO0oÞ<²Ùl>G~Ö»N¡PˆGy„%K–J¥X²d .Äu]æÏŸÏ7ÞÈäÉ“ill$Çyì±ÇˆF£œò¥{ïeÿ]»¸ê¸ãøûßÿÎ_þò.»ì2®¼òJ,Ë"›ÍR__O:Æq~ô£qå•WFY³fÍÈâdõHY(!›ÍbG5‘xÛû"Aˆp–eyF-%Û¶Ñ4x<Žaž€$;Œ [æÄ‰»<MÓzœ£Í0 ZZÎbÆŒ–.;')ÜÉ(ú=™Œ/àÌ™3ÛÞ‘Ó nº£ŽºÇ9ª@ü³,Ë»Á‚b™TÉ¥qËe‘HÄ)]×% ‘Íf½|tÒƒPÓ´‚ÒëPzÖ©ÜuâúÝqÇ}@Ë*íI%·8³MÇä‚Qú`ÌœYr?ÝÀηaU•DñóÜ5Òyµ#žB”4Û[÷ßϘ%K0€Üïïm£«>šÝ55ljk¦½õc·m£eÒ$^² 9†H$ '²à¼ýyòÐ# F²û¥90z/ßùùר¾à\{í4ô|Ú•KN¡P( …B1\¹è¢§3f‘¼þúN6l¸–øë×ÿ‚{ïm ¥åaÀåÙg7ÒÖ6+®x‚7Þø/,ø87ß|9‰„ñúi3yjû~Ìxn ¯í7…¶'È ÇǶ¾È-ÿq8×^»…݇Öò ~tÑE¬ÿÑoyçŸ`Û:k×Þ À[o½D4j{^YÙl¦`?uêTþöÒK¸À®]»¼y¿t42^š¦yÚ†tHÂq8ùÈ#ùò÷¾ÇÆ»ïâ[>—9®‹©ëümî\?óLâçœÃe po(ÄAä9ä$“I¾ð…/ …ˆÇã°`‡ÿøÇüþ?ÿ“ù—\ÂI‹³yóf@8.¥R)/}–ŒÚK&“^:-©ß|æ3ŸaãÆÜ}÷ÝÔ××£ë:ñxœÇœD"A:fóæÍ|ík_óôˆ SÑ—¾ô%Ï1éè£&•Jñè£òï|ZÇO~ò9ä²Ù,{öìá /ô>³,‹SO=•P(Äå—_Λo¾ÙÁqª' ;!nÔ(†,Ó¢ÕÖÖz_΄ ._øÂ„B!ÏsK¢®ëD"ïK—î—Ò“«ñ%„±íö~?‡7Þ8cŽÙŒëñ6ùpgO|˜2eŠ·ü˜cN ­mÏ??x<îݬÒÓ¯˜ÆÆFïü¥˜Œ£–qéÁkbš&étº@¸“ᵺ®cš¦çÚr8cš&?¾› /lãk_«ËòŸe"’¬6*¯¶ƒ™¤_æðFÃßzòì,„¸nÒºÒ@,B«ª*é÷ú‘G‚ãð×}˜°s'û¿ôۯ؋aˆ®á¿&Oàƒ¹s™õêkìþð.j¯žÎÚ+ª¨]¶Œõã›ÙåLcÜ»IÜ4Þ+¨£P( …B¡P UGDÅãB|»þú½X–ÅO,e÷îwøË_þ—]»ãä“'°{÷c46Nä‰'çÁÇ0mÚnLÓ`âÄï1zô‡™>ý2,ë9\×/R9‚¦dú¬ŸËgÓ㮓žcü__`Þoá½'þÂó'/aüÊ•|ô÷pq.÷®[Çì·ÿ™³¿LëþâtÏ©§N ˆ9俦&NÈëÒ'(ÚIG¤H$â·sá5×°+™¤æñÇaóf°,fî܉«iDs9œDÂ;†/ýìgœßÒÂæÍ›I¥Rž·Ø¯ýkž{î94×…ÃGÓ4vß|33ÿéŸ:\MÓ¨®®Æu] ÃðrÒ™¦Éi§F2™dëÖ­|âŸðŽ{ÆŒD"σï¸ãŽãšk®¡¥¥…x<îé8A='©'ÃIeZ1Úˆišär9/×~p{™®ì²Ë.ã`åÊ•ÞgûÛߨ±cGìaX q– ¶qà ÿASÓ—pÝ^QÛ¶ijšÄG?z¢' Y–E<'‰r¼n,Ò0^xaãÆ½Û¯çQ]ý$ï¿ÿ0ºþÝoc^>A®¸b./¼0QäùŠÃ/~q¶ ï¿/Ĺ\ÎWm¥1ulÓðþ7M³àÉõb!Nz¿†ažû¨â‚U`JQÜæH@xeÚ´·W¡ëB|3gáÝåàW•‚œL¨©(MqH­Ú‹£F UU?úO'“d€ÇþúW~mÛ6÷4üŸÎôüúVu5»jbçïNæŽ;î8v½ÿ>¦¶ÓvÂËTU=ËŸþd°aÃ<¢Q%È) …B¡P(®ë’H$øÂnàí·è ˆI'Œü`2÷Þ;çž»‹GYBCÃ'yë­ëùÔ§eüøñsÌ…XÖ4fÌ8‹‰'bHL'‡1c6bš1N?}?l{10™wßý!ÿôOŸeòälÙòCžyæÇ•¾eÇq„ð8î‚­|óÃæWmm\󵯱ªªŠ¿,YB5™;—Ì5sxjáBj/‚_çkD"`šc¼ï­»ÐÞõÀŒ––‚y}*•â'ßú–·ž—{.‘às?ÿ9³¯¹†¦šꮾZÌ?óEZ=Ÿú”ÆšËå8餓½Ûë#õdQ>ºP(D:öΧX'ñ¼ß²pBäС¡¡L&ÃÚµk9ꨣzdæjªm /¸çžûÍÍ9ZZÆ{‡ÌëöÇ?~”1cz_\$! L=%‰ðöÛ_ç¾ûîcæÌýz.o¿ý &NÜÔ+A*ž€Á÷Ó§OÇuÅë‰7áº0eÊ‹Ûd4íR| Æ‹Kä±=é"‘étºÀ›Pn/Å>)ÔÉïÆ¶í‚Š&µµµýz-‡º®sØaSq]ÑGY)‰ðä²Bœ¼ú¹üò$RPt¤³{=ܺv-¤RžçÝÉG‰ëº¼8eJÁºÁb¨;}€—]ÑÈÛsçòÖœ9ŒÿÅz:iÅ*ƒ¡©Ç_q…W®U×uþ±z5d³ü~ìX&~ï{üøƒ0M£åè:d³LýøÇ™>}:wÞy'¦i²sçNvïÞíå—Üxã^Þ9O´2Œ%ÑÞ¸q#àWY®Ãm·­÷’‰* …B¡P(ýmS0+þ,•s¬ú§UÄb1Âá0µµµs±\.G$¢sÒIÏ % DÛ¶Y¼x1íí­üèGsy÷Ýðå/OBÓ4¾õ­i"‘Œ"‰Dü"p’\.—÷šŸ‡B!æÍû)»w?N2™dÚ´%•¾”eÃ.rÄÿ÷dჿý €ßM›ÆïÆŒñæ|Q|Ç‹PH|ÅYœdd_wóe <«,Š˜«ÿöŠ+„@÷tÓu¯_ϺcŽ¡ùüó¡½ïþú×âØóA1 &ŸvëׯçÍ7ß$‰0iÒ$"‘ñx¼À©çÊ+¯,Ð`q"=È£ÖÖÖÆ£>ŠëºÔÖÖú9ëz,@éa!ªöH$¹³ÏájNár4¼I¢Üߨmc9àízÔö° M¡_k ‡mlF^ã8^‚D"A}½†a9Sª«ûÂâÅÿÇ„ ;—Åýv.2v{_0Mqó66†Äµ¶.àwºß¾µuá°¸®†7žÈÌ™ïÄ£K¡MÆq#ÃSÁ÷••^ÁWó‚ÁðTÇq0M³ƒËèpÁqžþ&²Y8> ËC‹* Ï3\Hwê⟇‰'’ÅÏÉ'çyÊAზ±cÇ2æ¸÷i~¢…;cs—¹ï¾ÿæâ‹ý¢( …B¡P( EWÈYA¡#Ÿ7ŸT ^yår¹¯€_Ãp¹âŠŸcÛ«xá…{¹çžª|;:¦ 55÷1iÒåØ6hZ”Ÿü.»l'mm×â86´ÑÔ4‰gŸ=ûìNK)q%_ÞzëOÌ™£×ÓÒrÓ§73±€ç³ð@K ­z Ú]t%/iZék)u‚žæYß¼y33‹ ù]0z4ÿ5y2—æ$ÿ9f nßN<ñæâÒ›-—Ë ÏMãÌü\½»ÐØž¤ú*ÅM7ÝDmm-Guš¦yùæJ^Ô,%«vÐ%:óˆ ç·JB „L±¹jù6¢}$(ç¾ë`v¿¶»Gç:l<⤋­4ˆ>¸Ó‹í•!‘Á/¥/•-Š™1£…ÖÖF¦Níÿ“ûæ©'Ä·LÆ÷uÝ/½TEkëÏE9‘b]à¡‡Ç /Ì"Ÿ;Žhgÿý÷/¸áròž¥nDMÓ ÖñÍ/â +§Ú¶ã8†á¹²BG»á†ëºLÚJ:  QU…›V†R?hxBœDÆç½.fþôéÔÏë—L§öò&.¸`=Ç¿—O~òö}Ø …B¡P(ÃÛ¶½ê‘©TŠY³&`š&õõõy'¨®ó8À“O~Çx<Í—¿ü=Âá0‰D‚D"A8œå‚ $™läÓŸž‰m‡‡Åè5›× ‘HÔç_‹ùã¦MyöÙù„Ãa¾ûÝ{9ä¯xžnпÕMƒa…UU/±uëúJ_þ²`—кâ4.8ð6æµöý¡¼¦i˜¦Ù½–søþûóûW_eÞ¼yþrÇËâÙùóÅü&?/Ÿ9s&7nô„_™Oß‹æÑuдg™¾¤÷ê Ó¦MãñÇ'—Ëáº.!B/žå:Ù6Q´Ì¦ce@™7*Z´žì²þâ1mcØrÈ–Ò°âêꞥ¶V÷Ôaa ¯\­$—ûF¿îwýúÃin>Œýö;³ßÏi_ ;-te7î¶mɤ_ùFÓ¼ðpMÓØµëoûxÜw &bì.bg¿º*óD£QOe—H¯9Çqؽ»g óPáÏ~Åóº2P+¥~Ð DßìãµüòI›7{B\ð{;÷˜c¼×ï¾û.W\ñ2K–<ÈÞ½UL[²dD…^+ …B¡P(ºÇ¶mjkk©¯¯ç‘GÆóÓŸ®¢©i»vMäk_û#¡PˆT*ÅoûѨ˜Ë…Bðüó÷2~üÝüë¿.âW¿º˜d2I*•dóæÞ~ûc„Ã1,KˆvÑ(ärBSÑuxä‘ñ\}õt"Þ(t–$·Ýö;Î<ó:Î;¯ÿ¢¾Š Î%>úq8àåJ eÁ`‰<ðÀ78ýôª>·%#κŒì³mØ´‰immŒ;V„¢Êåá0¤ÓD"LÓô"Ô®¸â ¾õ­o á+_MTÚd1²f¹xúé§©©©AÓ4Z[[ýÊ…˜<û8Øtç:›z5æ×µŠÖ•hC^n¹Ü@ˆs‘üq䙼nr…¸ašºe˦O˜eËâ„B!"‘áp¸ƒWY©p²}aÆy¼ðÂvæÍ;´_ÛM§Óý’'.­äÉ'«Ñ›¦ät½cÞDQXÁF×ýÝ“zæ84,„¸mÛ>Äž=SˆF›V¡ÿa:&›ׯû=à€صë,8˜Ûoíö‡gY…ÂÙœ9/sxãÙØ¶ø<ö½á§0—\[›paml[2±Î-·œl÷Ö“î±ûíw –%Ä=Ù~WѵÅ7®LtQ–˃¯‡“çº.Í,[6—¿¡¼á#:âaICþ})?Õ¾€Z,¤¶®;†ÿøùÅ/Öc»kj¸ýöÇ8∳+}j …B¡P(ŠB¦'Éf³D£Qt]çŠ+öò‰ON4â8…Db¹œ˜KÉhF=BŠJS§¶b},m9ˆq€Y6|ðÁßÐuH$²Ï¹àEÃyc1M¨­bøÿMÆeä™Ì/FI$r{¾k§wÅ”.–Ð¥ÖÕéB =â :&\—ÍEíɯ!žoÇÅŸÈ9¶ÃíÒσžšX¿ma[OsÈ{Ä=õÔzª««™:µÐØ}tb[ì9&Làä“?ÍÐ (4…„ÀfÛx9Ü"#7î]@£´p¯¨,Añ^ qÁ~Ý c¡žâáŒüm°&Näµ-[ØrÈ|V˜0åò)´ö}…B¡P( E…‘a¦©TÊ‹öq‡d2‰eYD£QïsM‹81D"~‘¼PHx¥57ûóªàüJ×õ!Šñ¸ÐdúÃÿDæ‘+'Ñh´À»KÎ?‡6pÀÿ¼Imí¨þm8‘_t*%þO$„ZxãHíJÌ;#$ Ï.ãñxŸ¼óâñx«¶\ˆîôGé±f­kӱ؂›_'†?I³“j£ÄºÅDK¼6ñ'åÙüvÉÛËÐ1.¬?üðaåÙ©P( …B1’q]—D"A}}=ñx¼@„“U.]Wó’I¹sO`ÆŒñD"bž%Ÿÿš¦/¾}˜ ôeEùÐ-[ö'®Ýç¶ Î+·Òý±H Éf³¬{ï= c!Èw`Ò)Ã:CC(‡ÅÞo2)EGM†˜Zæ 2)¬p*‰ä×-þ:ŠÅ;Q Ú(%K4Ó+†´÷ »¨®NpØaæRY¹\íÑèàíìB¡ÂÐÑ v°sçLtÝÖ¤;4ˆûYnóä“cô ;l*¡PÈ¼ƒõš &\×eÏž¯ð…Daˆ¹bp¥{!®ø;Ô££°7*ÄÖ¦¸pßå—«\‡ …B¡P(à ÇqHtáYàº.©TŠêêjDªÆÆÆ‚ˆ Ó„êjñ:ö=ß ¾ùÍ'1 1K&ý¹Y:]þ¼lŠÁÎÿ3fÌÓåq( N˲Èf³ñ®õ-.Ù/^ˆ6¥Eªl‰eé.Ú(8püü>Aäá‡íoM¹<Ø^Fm;zªø¥l)j£xY”Òâ_)±°t+ĵµõ<áÜ@óÒK{ùЇõ.Y`?2ØÝ~‹±o|ãÚÚæyy¢ÑÒ•M]7Âĉsìe³…aurÛT*¸mÿžC°hÃpã7f³+ÞyaÅàAlènIñàç,ŽÈãøæGàÝqãØ¶hQ¥OI¡P( …BÑ áp˜l6ë…ô1M“úúz\×¥±±Ñó‚Ó:x% O7™?;÷£†dTR1#Ù¡a0ë}Áj5˜6mUßqÝÂ5×õóAu‚a$“I&NœÈË/¿\ž“ë,ij”vmä—Ûê) Y•Bœ_Ôlgä×·)ñ¤—›‰?a““7›Ž!£J'h/öÚÓóëö³¾!ñ„¸5kÖÇijj ©©‰ÓO?ÓO?sÎ9Ç[>˜hk{³ÏÞÔãõû;\µø¾ì\wݳLŸþ°÷>èdݺÙtж’çDæ0H$üÏú;_ž1LÇaÉ…«xDëXØE14 öݲ’jñH~¶¸'nÙ …B¡P(ŠǶm‰µµµ^±¸‚t2ùÏMÓ¤±±‘d2‰®ëÞN–…—KÛu…‡›ë -M•LŽlÑ­ÃMˆ³ƒ·?Ëé§ïí{#©Taî§ÚÚ’n×u±m›X,†¦ièºÎرc©©©Ù÷)¥ûIÛ êv‰õ¥GšF¡xf!&ÁÁíeX©hÃ-j/‚ð|+žS…ð«—JL 0Hâtî¥WLš²yÍŒhii᪫®böìÙÞ—Ç …BÜsÏ=,]º´Ç^gmmm¬X±¢Ã²•+W²bÅ Ö¬YÓ/nš&?ûÙwY¸pZ·éo‘ȶEG;TR—Íœ¹³ÇëÉPÖ –…—ó@¾—ÞpòÚÊûz­åÓ§J%ð,·;ŽÃÎO~’^Ö›Q b:KóË…€º;غuë€S±—£/V(ÊÉ@)Šr¢ìX1\Îã ˲hhhÀ0 Od3 ˲0M“††ïó\.W0G‘Ž ¢$¤Â+NÑ‘t¾(;v5™æ};7é9S_/ ªµâñ •JaÛ6‘H„XL¸“7ŽSJD¹õšRÎLù"t"Y!„•ââ ZBˆ ŠdÁ÷2ßš¬ˆ\'KljW”Ž‚™ôiãh€U«V±lÙ2–/_NUU---´´´°|ùrjjjX¶l@¼âV¬XÁÊ•+ –% ZZZ¨««#‘Hìs¸a6›eÕªi@ŽM›zn\É~öF‘9‡_ü.ùH÷壣QñC †éŠÜqB¼ïËW­ë:nǸö’rÛñúõ‡óû籠¸F8gÒ¤Û_±÷w_¬P”›S(åFÙ±b¸0Ç®ëR__O*•"“Éäs¼i8ç ]×I&“D"‘ŽÁ©`È©œÕ×þ”F•f /ÂŽe„å3·}¸C޶!•ÝhÔÏ3Šó&“I¢Ñ(¡P¨<¢f©(@ƒB/·(ƒ,¸LS(Æ l/×/åÈ# 0ßtÌEgÐQœ+µl1„Àvùå—{ s¹uuuTUUyËfϞݭ»¨eYÖijjbãÆÜzë­€PœW­ZµO MÓdãÆ«3gv¾žë&»ìo›Ôuán<Ü8眣9眣;,/Îc ‹;ÈD£Ûö½âL³÷ƒ}ê¬ú‘°ãM›6±ká¸ÁÜ7(ÊH]]]Ù÷QlÇå苊r2Pc …¢œ(;V †ã¸Â¶mˆÇãDÞ–%æ0¹œF:ÆqIJâ¹mC,b¾“ÍŠõ25$uÃè8/UT†°ch@èRÿ1áQtýýlÀös@éº_Ñ#@._…Ò¶m\×-8^iËý6Ï´E ‚ºGQ!&*§ðAÑN,®ëþO ŠÈÿƒûó‚ò@’žW:à±½a4@MMM·[SS‹Š’‰oܸ±Ë†ÚÚÚ¸ù曹öÚk –¯Y³¦`Ò¹hÑ¢~qù|çw¸âŠ—;×Jy¿™&#š}U÷3™ŽËB¡Â™„4™ûâØæT ß@ر뺼°g5cÆ øù)p@YÛ/eÇåꋊr0Ðc …¢(;V †ã¸ÂqÈd2"ø?ŽSøWŒm‹¿àgÒŸ ›õçH™ŒáeÇ."•Y8üðÛzïd"UßN¶³,Ëóˆ+{™•?™Rö+½Ó¤'›ÔZ‚ÕM E·b9"”ÿ‹•hß@(šÅ§(“qqFƒðÎXµj–eyAUuÕªUlÛ¶­ƒ8$‘Hpíµ×xÑlÛ¶Ù³g{ï{’0pË–-¬X±‚–––’Ÿ[ñ8›7oæì³'vÚ©{iŸ8SÑ¿X–(Ç âšKÓéÊѲº/žá8Ž×qµ´´p×]w•=‰gÚñk¯½Ö!'Œ8w‹)3.ejYÏD1iiiÁ¶mÞyç²î§”÷ņßzë-lÛ.iÇŠ‘Ëš5k¸ë®»xýõ×˶þì‹_ýuîºë.V­Ú‡ªeŠaÉŠ+°m›·Þz«,í—cl<”„EùiiiaÅŠ¼öÚkeÝO+ óññ@’J¥hhh N— ã“ÅGx¼s¼™faŽìHÄŸcF"~´T2©Ä·ž"Ç;vì(ë~ú{|ÜÙ¸ÂÓ\øÛßÞîýJ%¸B¡ñxœp8L(ê›÷^‚ÒᦥÐ),ŽÜ.‰ï­&…¸hÑ2;ÐŽ|­µ!E½‚Íÿ?DBÈV­ZÕ«ññX€¥K—ÒÒÒâdX¶l™§ _z饴´´txšdåÊ•ÔÕÕu)Ôõ†É“'³páB¦L™RzMÝäœs:Ïv)C"U<¾@Ó´²)æA¯8Y¦;î¼hƒôLì‰Ã›Ì0eÊæÏŸÏƒ>X¶kÔßv\UUÅÂ… ;,·m›±~ŒO Ï}E?1eÊfŽÅK‡R¶}ô§Oš4‰™3g–´cÅÈeöìÙÌŸ?¿Ó‡eûJ÷ÅS¦L¡¦¦f@BÂC‹… òúë¯3© ¹;Ë56NŠ)S¦°páBV¯^]¶} Ôøx H$ض]PpÁ4ýÒÚZ1”NŽ#þw]áD,Pçº#†¤®7„"t{„CaäaqÊ}AŽ+z衲9ÆÇ+ìüµ^‘}¨ÎaÛÝæÔÒ4x<^à¸RŠN[±éèif"¾ÜRyõÝüŸž_G6¬­S¼S¼¤è2·›X&)•îKVB"ÔÕÕ±mÛ¶GËË—/ǶmlÛfùòå—ß}÷Ý]*­k×®eåÊ•†á=U0 Ãsõ,{-~2XÌäÉ“Y´hQçëF· [)â çÿ¯(å®D#›w]ÿ)R6ÛñÇIzÂE"½û>ªªª˜?>ãÇ/Û9ô·pÀ%;{Çqh­ªâCe;Å`¥ªªŠY³f±»?J‰wBgvðY8ÿëñžºFŽ+ÆW¶s-Çø¸«q…¼ðÂ.ª«ŸìùA:Žp·ìFçb²ëºd»ù‹SºN‚G±<à rÁ‘ÅJ}&‘^oÁöð½ßä~d•UÂJ¨]ÑÚóËWiêêêz5>ÛÝ =é(erC‰a^IÝ––šššhkk£ªªªCØko¹ë¡‡`ñbƬþ¯.×+å WÊ;k$1•H#ñ#•L–.Ô`YB€ëÎÎqœ’9íÞ|óͲû@Ùñ–={øàµq«¼5G$!à2¶ß™·´´Édú­/V(ÊÅ@Ž)Šr¡ìX1\ã Û¶I¥RضM2¯¢9ŽˆàÑu1?imâ[:ݱe¦Þ)³C¿ =Øœü_¡¸ùÿ¥"½ÛÀ/2éàçÎ'¿,†©!òßk1.…ÐT»ãR%ìø©§¶rÌ1½ðbÖ4¡ w³)&vçhÓåwRJ‰âW1•M+Ê©y)I¡TÊ©RÂôŠ+.À0ñ„¸`ߺº:®½öÚ~yz\SSÃå—_Î¥—^Ê¢E‹°,‹;Ïí½˜W'¸8¸Óõéï@EèâѨã‚ùO£Q!ÄuW8Ãu]R©”.]iúÓŽ_œ2…è¯G|ߣ`ú»/V(*²cÅp@Ù±b80TìXVFM§Óhšæ‰2Žôp“â[q^·TJqѨé*PKNœBX‘…Rh³žO:"¯}07¾‘¢£óS?:PnS¼N±Ã_¤Äç)„·\Ž}ŸÛ”Óá¢3ÊaÇÚðó0|°§šOìEM%x¡CÈ©ã8˜¦‰®ëŠ‹ô ñÅ5-“ÅJi|Ò£­ØX‚†\×¥£Ûda¨Füdx,ˆ‰«V­âÚk¯¥¦¦ÆåR©TŸµ‹\Ï–-[F8fãÆ,_¾¼Gn˱y÷n~ýÜÌ.ד.Å*IæÀ"¿úDÂO ö²swÞ‰étšX,æ¹þ”­\v<þ¬³Fz¿£@‚vÜŸ}±B1P”sL¡P ÊŽÃ…¡4®p]׫Œ*¸DBÌELS„–F£]Gêºp"ëôÆNFëIÑLÎj¤CP ¡KDÚ„ø“†ØÖ ´+gEr[-¿,Ø>…ܺ›÷u^¢!Ò‰…šŽxß[z]Yt)·+O=u{Á\¶7˜¦éyo‚È  …°,‹l6K$éS»~ƒt4 _l“$ð+›ÊÏ’=h „q;ßH÷I5BÜÚµk …B,]ºù.½ôÒ~ÝQMMM¿äµøŸ_dÂO²ÿïöïr=Ôn’IF<åÎŒ~ æP=áºã"‘¦i–ýx{KØqË·ÿ‰‰Fu@ŠŠÐ_}±BQI”+†ÊŽÃÁlDZXŒh4Z0Ÿ‘étÏDµx¼wy­e„ž ý„Â<÷22ÏAˆd‰ü{éÝ&‘„fQ,¤Y.è­É/ `R$BÇ*'>zþ¸{3‹h!®˜þ´ãqGîDwþ€ë¾Œat#Kš¦ð ŠÇ <ˆ’E“h)Ä麎išû.Ä%éøeñc%Å^n J–FG—Iz±lâk*¿ƒ9¡ñ˜iÓõô˜>m;H¢‡5º^øTI&< ‡Åû`>Éî¾Á&Âõ¶mÓ>nŸV"œB¡P( …BQLÓÄuÝ‚47©”ð‚³íž§1’yã …^`A²ø^pD¸f!Reº‡4çÿ’åÉüzI|á®Tî5)zQby%ÑóÇCè,² Co8è ƒ*|ýƒƒ{”J&!¬/´&Çq¼üî±XÌ{­ëz‘®O”šf]8¥øœÁ7.-¿]5+}„:i³¢h‡+£÷½‰Ã¶mvΜÉô>Š#½XÃ@†¢Êâ ²¿pñyOÆãña'ƹ®Ëþîªôa( …B¡P(Ã’D"išdòeM ¨­ÿƒÈ ×BD…Übù÷Ò{M LFþó„†a!t‡(_¤Ø&eA=°<è› CUéò‘Túðû…]ßÍîÝ—“Íf»ŸËÚ¶È —Ëy‹,Ë"{B^$éâÚ]£¡è½T•]Ä—L*BMíÌÈThi¿ákhjjbÅŠ¿_¾|yEÖu]Þ¯®ïóö‰D+ʈÞ‰ÂaMs¶-:¹EyóÐC+} …B¡P(ÃŽl6‹ã8är9o.áºÂ ±±sg€…¢áÌ¢P, #„¦$BdŠå? ç·ÙWÏ´`‘Ê¡Nq}d1ÎâÚÕ)‡¾Ï9{îѺžÏcž!e¡Pˆæææ‚÷ˆÑó‹Y,ÚI!Nª¾) ÃMƒ"[ ®ê ÂS±ÏŒÝÔÔDSS“÷Á¢E‹ Þ6MœÈ»«bÖ}ÛÞ0ü¤•²ÚŒ,½,cðý }w!¶Éà·n*52*Ý6®?Ñ¿ÿ~Zé#QT õI¡P( …¢<ضÝÁƒ(á¥ÌÍ–Eh 2À§X jü¼ojÙ‘ §#ÄÎj„~¤!æâÃEt”¸À~Ÿz“o´ÐÚÚÚõÊ=ˆ‘N$¥CQm:¯nÄ¡PH?Ž„WKǼm ü䃊²0`éÒ¥^¡†Á̃ۧ3ù mLÕ(¨ÓS"I¡Ä˸u ¿t³‰ê@Ë…ëú¹ár9áý‰øUˆzREu¸ð× mìyuZ¥CQA”§P( …BÑ¿¸®K*•"›ÍrÁßÄqÄ<£”Ö!=ß$&BÐ)VÚÃMHêOdÁy=e}€úüÿ.ÃÏCÎöß²¥gé•,«dÉÞD"A$ñþ/I° oW½ß‚ÛêEï‹E=¿Šˆ¢,Œ ¾ikkcÕªU455±mÛ6êêꨫ«+íY}c2ûi »â5©ÌÖ…}fñ݇¥»±¬F#sFb0ˆ'RR뾡iÂ1(´éº®êºâo˜¥‚ë”·žŸÉq3†TjF…B¡P( …bP“Ífq]—\.‡iˆ¨M+Ì_í „ ¹(Ka8èðKŒSYŠ]"øÎWõtE‡¯Åã•OŒí|ˆÙIåB]×Ñu\W9µŠÅ´Îpò&¾Ñ˰@iìR J>rÝÞVÜPôOhjjâÜsÏeÕªUÌž=›ºº:ÚÚÚ¸þúë¹êª«hkk«ô±ÒÖÞαsßæ+Ý q-ëBt3ÂÚʰ_u×@¼6)¬Tc ¶Iàç5LÑ»|ˆ ŸxÜÏ'õ\]÷CS-ËOœ:ؾÿþÌûжJ†B¡P( …B1lÈf³Äãqt]'ò¸Û2b^g!Â$úƒŒ®Ê! C*žBn¼¿m,êj¾'«jšø+"¢i–eyÕR pŠþï ©t/´G$¥„¸ž¶¯è3£AxÂ]uÕU\~ùåÜsÏ=,_¾œåË—síµ×r÷Ýw{‚Ü``êÔ©]>©È"l& üÍ6%½ßöà‡¢ÊœpQ Ÿ€HaYÚ¬_!GÑw‚‰Q]W<¡2Má)gš¢?ª­ÿwjvì¨ô!(*ÌÙ7ß\éCP( …B¡¤R)4MCχù…BbNað'Ãw2ó»Lþ/ÄðóÆJDñ ^ô¤ªêPÀFðûï¿§ó•l»Ëp0;JfY–W9µÖgöð Bt-ªÉC)Öü,Tx` °jÕ*êêêX¶lY‡ªªªH¥RX–EKKKE¶uÛA]~.ŸrXÀÓ ¨¶ÅëÂVw":Û4¾7f±s–ß^&ÞÌâWz1e¬‡ªwœUa•Kî¾8\q?dY……‚ìÞ½»¢ç°¯ìøûXfÍšXéÃP( …B¡P(†<–e‘Ífùæ7Am>ÏPʆµ˜º˜Ë¥Â[4ÿ¿ A hˆˆ4áøÒ6m˜äÑî.’ÐqJzÂÈu˜Í'WO&“¥sÍic.ž/[tî=$Ãþ £à&Õèb‘Cz/)ÊÂha©uuu®TSSâE‹Ø¸qcEvÒ3ï2sæÎ’ŸÅòÿÇöW§C[>ÝùÃ@Ø ôŽ3ñÝ—müâ! ˆCÑs¤¸&ÝÅAˆròµÌUÙ•÷æ›oVú4ö‰½ûíÇ¥—Öí{C …B¡P( ÅÆq‰§žúÎ?ÿ«…o…áSˆBxS¢Ûà&ƒ˜Ã!n÷öý:/Öà8% 4˜¦‰i𥫤‘yµJ%Í/Óä:Q|·ÃR‰ù‚9ã‚Ë”ËhÙèqÖøÁ#n÷{ïñ¡uL|h"l,ð¼4€£pGDÜØÅaÑ]ôˆ‹à‡K‡ð½ã¤7舢ì7"‘¢D©NÇ×®+^ÛvçBÜPgÔÍM•>„Ê3ßm‚ÒO‹öåæ•ñê²­RçSê‡ÑàsW( …B¡ƤR)"‘--u8\’W]8€•†˜Ë©È:Å@â»ß©FëÄã Ç)–jYº®豈C§; ’¢ã\CV"‘ùßä¨X`‹£n–f4À¢E‹hjê\ hjjêÖk®Ü¸À~!—%Kæuø,…b¢cÂMa“U=Øo‡~.t~R Q(8Ûø"sƒ/νҕo £°txqß pÑNÜ`gÏž]ÑóØWøàƒJBepð,JU?ÙWqLªãÅû’m»ˆÒLò})e>xÓÊ¶Âø?h²Í ›­IÇü ¢³øÜE¸ífÅçç­>¯,—Y¡P( …b$àºn>™}Û†‰I¸?pކ°qþüJÆ>ãUS^,ý¡¬R؉Gœ,Ðè®’¡¬,YŠbq.Šïé&óm)£Aˆ4² C±çÛš5k¸þúëY¶lUU=‘²ÊÃ6mb¿‰/t°[“Ò‰6³ÙÂjœ°¤‡r¿¬rc¾mŠ Ðœ_G67 ŽBØ¿…°óZ:ê 1:Þ;.s_tªÎ0AQÎuÅ{]ï´_¸®ËŽ5•>Œ}G O¾q=ÜÀzÕù× |A*¸íK‰cÝÝv‰íÍüþlü’ÝRU—7¦çÌ@;ò˜„š.S tÕ¶¢í‚B¬B$í[n[‹¯ÞÛðVÕ[åýŽ …B¡P(†1·ß~óæý„Û-8? 'ÇEÔtè.²O1øNÚ©qÜ×J‹ ã,áq …0 ƒP(Ô}hªœ{Oëstôˆ Î¥"(Ï·AÄXð 2\ýõœ~úé,Z´€7ÒÒÒ²eËX¾|yEôá‡ffÛ<\—£“yÛŠ)Ö›Z¡ce†>¢ý/K_;š8bþÅŸûËy¹tÜ‘!¯a„·_?GyïH$²ïôɤ¨–Ì  _5Íνâ†*¿xåƬûP¥cßž]|aMVâ±÷Xq…9™xQ6߆^“±Š@ûâiÍ/#n ©À¾äMfçÛ³óí[ùcMæÛ”í§ð«Ùø¥»åMœÈ·ŒWoÈ/“ç©ö+׳ð’Èxy¹Nþ|Ýð(sq¥¿M…B¡P(Š!É­·®á¥—n…¼ò#ðC—Ý“&UúúÛîÄ“ —+éu"×7ŒZ°œ#Ë<Á&uÄ|F Á¹”D Õƒ/áZMM ·Þz+MMM^uÍP(Ä¢E‹¨©©¼϶mÛØr{œ/ÞaÒ•>¸<:Â[„Î õ€¾#M NË»tšI#î'émç ´‰hþÏ l[ŠRyƒD"¯¬÷`@ mÙ¬ðˆ“^rºîWXnèŸ}XPéÃè²Î{ð7Áüů’"…8éq&E8™¤QƇëø"™T¡|[¡ü>#ø*¶t•á¬EB<„8&ÝPeé%yÜR  –É›ÔÂæ‚bY¿žz_”‹ãÿ¸ÿ#¼¥2øtà …B¡P(û†mÛŒû!.ˆÃË¡îçHŠÁÁð(Ö°Ö†Ião,ýa'óp˲z.Â)Ôøò†(öÁ‘9«•Wq:T>¨««+™ Î0ŒÎÕÝàêj9kšæ×>MPÚ®3Âa!DÂÆü1JÍÄ|ü¹¿ÌG'w²øÎ7¹ü²0¾þ óÖ¤sR4𙼥–‘Ëd0ñÅËÁ¤šHÑ-—ÿ[–ðf :¿¬VÖ%‚üeýzª™UéÃ(T…¥è•¡se7N¡‹'øoÒCMzÌI±*‡0h)Ê…òëÊÒÄN~;y£HUZþ˜Hã–Jv‘ó-Šß²ù¿bwl¹¿`,¹$(¾Yø9‚Ç,‘ŸiEmKðLª6…B¡P(вàº.±XŒº~Ϧm§ÚhÀÙ7ß ·ÞZéCÙ'w¡u„^mÓ« PÚYÄœ©x’_ì±$§ÇUS+ÍÖä”ó¶~ª%‹žw¼]…S:Ò€¯[ÄðE7 _ÂøÂ¢Ñd\+þ<_¶'5 )È¥÷eþ}Xœ¯Q:Õ#4~ڮƲ •ò_ËpbË*,&#+©yä‘8ÒþáyÍà#k¦Tö ‚_¶ô^“¯]„!JÃMÒ±£–^d.~h¦‹0Ü`Å™›­Ø[,HkþóL~[ùZnÍ¿—±ßòx¤žü¤x–£tém¹ngÄóÛvåµ&EvF_ÀS( …B¡P”…Å‹ïçÒåËypÏ!|5üR .¦PSs_×w×u{¼¾Ø’__ôYws– Ó¢¢ !îýmcÿ²_qÒÅOùÔt}à¼á@ØwÐ[¯8UwH½#H&ß®¬@Ô;ä{7¿žÔ3’øúˆÔ\â'"Y6…¸‡ct,Ynâq?\Þ0Dî¸HDxÁש 3f¿ñÎÆýXÿÄäÛa˜ÒEò¯e®6ðs¦É¢én) $”_WŠxÒ³M¦´d(jo ªÒ:…J¡°’Šá‚HϹ¦»=…B¡P( Å>qÓOòì³—pCÍå,Ôá!5üR \`œ'nêøa'“Y˲pöÅcHΓ‚âA¿p]¸ÒWEÑc÷½‰aÝfQÝ>Îö— †„œ=®tºwº‘ȼñ=9¿bo¶ÎŠ=S[¸Oãø…ñÃbKU¡-Á¼pA¢Q¨¯÷+©jÚÀ{4–ƒöìaîܹý×`¿(€Dæj“b”¼¾õø.’ÁJ;Áü²ô¯l/›o_æW“$űîâÂÛËp#Ýì·X¸S(úÙÉ4ÎS@è‹Ågá°xx`"ÿeü# ­¦ëeÙéçÛ×4QMZîǽ ÜsIJ¾¤Ùgdá5ÛQ( E¹aìAl Ãëåg*û€ Œ±]Þl-‘<ƒÆÆ‹£ÝU'”éw‚‘EÁI}py°ž,ä0EIF´´´°fÍš.ÿ*Íû»'ñõˆ? ènN ‰IK6+¼«²Y‘¬¿é®@BÁ1õÓ:Áu‹÷ßÙ½&¯U±/Ã^u„vãà{¸Zˆ{„F#«úó2&…êëÅ{×ý•\gÕ˜è3»×.)<îÅ7IÇ/YÆ/‹"È/W‘ /mÀWde¥PYÝTVU(F–å÷Mö=¢?J$„ÐÖÐàÿž˜×@ý?‰>KÓ„Wo*%Ö¨¿¯“®¯/ñPÁ‚ðâwª¡Alçºâ}}=¸Oó?âu}½Øo}½Œ@÷r/#:P†ßN…B¡P(zÃwwî¤mí^¢Q¿à›B1Øxõ¶Ñì·ß™… M³Ã“Ô‡£Ê|\AdÈ®èMè ¢"ŒXµj+W®¬ô±ôŠÆ¬c¾W‚iÒï"ˆ¼*iãGŠÑž% F€ˆ8”a¯ÒÛÕÀÏÇ¿¯çmÛâ{I& —e³ÐÜìO6‹×ŠX/Uõ¯ 쿃†àþ—b›Ì§Fþ½Ìïf㇧Æ_n¡ÆŸª(#€lVˆhÒ£-ƒÐ\°…È÷…Ðþž$?Éqxb$">sÑFT–¼Fl›ÍŠÿƒ÷l²êcby2)<$“ù~ÏãtH~Î÷…„‹A&FwU¦cˆû»³‹Î’þJdrS…B¡P(*€Üútß0 méП(†7sæ¼,o\W .‹¼áêëëinn&“ë*V?—wp,³…)14 R; gÆ,_¾œåË—WúXºäýÉ…¹µz2'OK¢QáEÐïÞHe$Xu¸+Ò=X'Hw×-0gÄEÜã er¾'ïkédÕ\׿*8Æ¢ƒÌå*ŽÕ߬½ïBœ‰_uTz¹É»Íˆ/CæÐèhH ø^q2Çš¬nª!Ü#%Ãáš+ylÛuR_/–‡Bbœ$sVB>¯h8‚N‡âVEµ¦ù·®ãUVƒ¦Ìo¶A,O=£ãñà–~úßÜŒçñZÿ!ˆ__´Iw¡ý1 «c䯃šø( …¢¤[[ÙýÛ?ñÙ¥—)o8Å ¥¤îešbpà9ŽC:-fñÉîTe¿Bcgâ@g‰èejÅ dÈkµî½>o+½´!òD_ÞÄ=âºK™Õ[¤6#=àÒïà ¾g¬Ô€dJ±þFþÀJïÃÿ÷¶ Ì`côþ{ö½‘amžqÁÜmbrmç_'ð“&ñ #htì¤Q“nÅ$‘ðû *C> ±|Ô(¿ Lc£èc,Ë÷F+ÀÆÚ C4] ‹žHÕ-Œ¸ÇÜÀºÁøþü±™yAÝþ;hï‹öãqÐþ†ðJ-Þ©s ÷Ÿ¼NœŸ WM¥òûº R[ÁÑÀ¹§è¸d›Ý%%"¿ …B¡~d[ÿœrTéßi…bàÓwí*\hY]ÆRÝy›H/˜à°xަý/‰¢*¤bÆ^¸E‹UúxJb{ßÚ»OmCÉ…¹\¿/ÝE/IŒüº2Iê42çcšÂôbÑ]þ¼RýN{ ñj.ççJrÒuøá ªLùx3kL÷’ Ëò¹Ò3NVA•ëÈŽ×ÅWuS%ÚŒ£r¿)†4Žã‡ljšè+"!ÀÑÆIÐÚ*¼â4Mx’ò<ÜŠ‘ÅJ²`…,ªŸ¬fAvAaG™ÂZa q) ÉN3 ÎÛ]QâIÐeG™À¯^,«Ë*;ÝåAèÓž)8Ó}g4 Æx°ª!{¸¿ä;`î†ÈS E)<‡Îh@yÈ* …¢"$€±×ÜÇGq¥E¡è–¦5ïsÜ-þÇé 뺎ëºÔ×דÉdлR—e4TpÒ]<&3ð#œ‚M©ùÝ f4ÀÚµkY»v­·pÅŠ\uÕU•>6 8`æë•>Œ£Î!ö=_w#~UV9Ï”H(X£ëÐôžx(Ú¶ø“Å7Þ|óÐ2\òóð³Ïæ…¸}Dº"‚˜Ü=Ú‚ùÞÀÿ‚dißÎ&ÒŶBQFR©T§Ÿu®äÀo(x/ûˆlÞöÇ¿††wÐ4—tÚæÄ½ÛùÌI[!þ'â‘B6›Å4MLÓ$•J‘H$Xázn¿évÞ½ë]¶Ÿµ3ï6·}åvÖ/\OëÔV~øÙòðʇ±4‹ÖH+OÏ{Ó6Å}–=ÿØÃ;7¼ƒmج»eíí Ã[Ç¿'Ïzþ¶“a:þ J _2A§,˜R\&›À{B8Oç M<š Æ<ˆŸñ›!)sÅý b‹¶S@ø>HýsþÚ§üö¼þ¤DßQýçjNºç¤Ê‘B¡P(F60}ãF.üÄCd³…ÞŠÁÈö·ª8ùä#Å›"œXì ëz÷SÁ/¨´ýâ"Z2‰{w©HƒŠ±•>€ž`óK·VúPJ[9Êñ;Ó“+àG1¦K,—ÛËÏdtS14ñÓ¹ùϤcGoe1™¿i(óÜP½å}o(‹ßÉÊbyÁÁ¿ÈÕøªi0ï[Oªœ(}@†Dvu¯Ú¶M"‘ pA«¯¯'—k$BÐoü€_ýê:d¨¨Í/~QË„ ˜<ù Z[?ÏâÅopÉ%w³ß~—ðÍo¾Ã 7¬çùçOäoû›77‘Í+sé7Ól½c/æ” hšF*•âðõ‡3÷¹˜†‰ã8œ<ód>Wõ9æþß\xùîž}7ï>þ.¿xã\ºãRß²˜û6ÝÇßöÿ7yº¥sï'ïåK/~‰×.à–7náéiO³è¹Eüuê_ùþ/¿Ï××|ÓµÓ¹îÙë0M“ص1ÚÛÛEz=Ø›!kB2‹Ç‚c1ù£¼·MĽo"îcÙóDêy "¿ðgóë8à¼#D9-ß?D“`ÖCuZCàn÷qÏ­ ôû }R~{)ŒjˆÐÛŒ8†Éë&óò!/WÚì …B1Œ±M=ÆŠh”Tjè§©Q olàˆŸE“Þ&2ÇR©TŠH$Ò3!ĸ0˜Ë7þÝ!>_i !nËž=T½ôR¥£$ý›£MR®( ž8>½XWzÂåó…“Do‘À²`Äd6ÿ¹Ôƒº ‹w]áå’Ïe‰®Ã¬Yë€3ÊtuʇûÜx¶þo5ô¥&Š…¸h2߈ ŸÅâ´À:Ðyâu^¦(Ù,œxâvb±¯x hAˆoº®‹Åд8çškšH§3ÜpDZí õõ0mZï¿ÿ>Ï>û/À™´¶Fxé¥ÑÐp“&}™ f²cÇxæÍû“'_@"‘àØcÿ_¼ý5.»ìlÚ§¾Ì”)c9üðј¦I.—C×uÆ,ÃG_ù(g8g°xõb2™ VƒÅ\q°,‹¤›W–Ì[Â]ÿv Î_À×'|÷7.ÆåÓ£ÓYÂq¿™pIòV†Wrñsî3ç`š&óOœÏ]Sïâ;ç}‡cËm oÃq ÃÀ4M²Ù,{jîç¡ÃšøYòA–ßy>›íÍX–å ”%Ü5$81=¢‹û?ŽŸÈ7ÉF ŒýBÌÉ`ÖŠð\‰ëæEÓL~,ç‚ûÈN}«¸‰ß€q#Dtp“úÄï͆íÇoçå)JˆSôÇñ–凓ƒÈq(súÊ"sò3ykضï9éÃàëž{žãÞ{'°fÍû,]:‡SN™XéË¡P(:ÁVìÞͶL&üºëŠJá Å`&TwÞ@.*ù„:8NîÅ÷Ìû­R !nÛ¶m¼úôm•>Œ’t—­/$©lµa)º÷DT—ºŽ‹˜æòÿËâ-Nà9øÂœÔºê‡B!1ø–½pÕUë*xeúÎ~U{Ñæ¼Õ·K%— ¨L BLÈ%JpS 0–‘È8Žïï8 „Ã1î¿;wÈñǯeÅŠ ‰DÞãÆ—3uj;§žz oñÿ~ÿ,{öB("ûO#çÀ“/>)î•jà—P›ª…„œ¡dÈ¿ŸB0':‡ ·Nà„>…W/dõ«EE« ;qþ)d.—÷_þ-Uùꊗ®DHl,#ò—¿´ðþÄ÷™0a§]sŽã iš÷—J¥h,*sOˆB"›àøµÇ£çtßÍY£ài©}#°"¿]ô—@ßXØœ®õ¿hw9æ9`ÿ"Kò‚Èß"úâÖ5­°ÅEz¥SLX–x¸¦i¢q(T˜Ÿ×¶ýâK²˜H2)D|Mƒ§ŸncæÌ $û±nÝ{z蛼óÎdžyæ]žxâ];¬_þò ÆŒicïÞµ¤R?%‰pß}çÐÒr;†áx•èt]§¡¡Û¶Ù»w »wï&‘ø3ßûÞ—¨©i¢¦¦‰;ïü—]v.MM•¾š …À¶m/q{ð5ˆßS×uÑuMÓ°,Ë o®ÔÇÿõ¯Œ}8Ó ßó^¡¬ìqáÑG_'ïlj˜ø¿?⩘\[T ñ'œð6ñxœ;î8ޝ}íMZ[÷ç‰'"ÔׇùÎw¾C(ÔÈâÅëhkû ¹Üñd³ðÜs-ÜvÛáüZžzj!PG8Æ4Ÿ ¦¦¦Ò—_1L±mÛû“YäkÇq¼?×u±mÛ¶½ß>iûX–åý>är9,˲,t]÷~Íš5«Ò§Û¯dCÛû>w+ãÆýžùó‡~šÅðçù,¼ÿüÉþÃàNhhh ÓS÷Î`J"ðCÎT5Ô!ÏX€šššššh <œ={vÁûJ2÷嗹ᆧ€Ã+}(C–|ª ² ‹úõ´bÑOÇ÷”“¢œl¶ºö*,Îq¹zõyñú×wî¤ÏC$—B!@^Ìb"ÞS…U¡(©”¨r|É%[hiÙˆ0Í_ýê&MšÄܹsùÝïÆ¡ëKqÝÓqóî4¹\Û¶Éf³Bp3 ÞX¼˜3? ¡3iŒ%fö¶ÍDÓ$™ÉøqíóŽùñy¬›©1îˆ#˜ÿØcÐ<ž±à$M¨I:2á þü€o„`R Ôaj^IŒD„ÒPñóm€C¾'_‡Ã"VÆ0|ÕÂuÅgñ¸Ø.æüÆFŒü¤¬ªêOÀi€˜dùír¹8÷ß?Ïnßî]GMÓ0 ÃË{×ÝO #Á÷Åa|ºÞÃ\;Ñ'bâ;6 áí´zu¥­NÑA“-&èE#ÃAMS|¿ îçlVx>ÊÚ*¶ ŸúÔ˜W(#Ókvsê´!õNà‰hGzœ ºJ¯7™œ]1¤ °téR–.]Zécé”;wö¯ëu0œg„`ýߟ8ˆ¾ 8æ/ÖŽ¤—\>ú 3ÿ^o,ô¨+¦¹¨ÐhUUÃ;+Ì ¿;ýV®èÃÆYü§¢RF>ŸÐ1O€B1€Èp3]‡ãŽ[ÃêÕb’ðýïßGK‹I&G×§y ¿”Jôú •¢ªªÊ÷•ŠR&#\mâq!È™¦¯,ݧ3Úú_øüç©];Š×¾ýmôÛ-‡xDÈO:° ¾c !neÎ2ᎌP$¤¢¨ý8$B•q µµâu&#fÅ.B2þN*!†Á(MCO$˜vå•ÔÔÜœÆÌ7ÞÀHý»Ÿ4+›ÓäLÇáÌæfH$8íÜs™Ýul\¾œ_ÞwŸ'¢‡-u…®wœÀôt ˜Hä=ã"~[‘Üu×ÁŒ@ÅìN!¦ãºâ)ª}ýë{9õÔÍ|ï{5ž÷c8 GÝÄúõWò•¯|†L¦•×_ÿ4sç~ž… ¤¡áO„BØ6œpÂ$O±cG+V\Ã~ûÝÅk¯=ÁGX¸®(²òÖ[¿dÚ´¹\vÙ\ž>Žayä,n¼ñ}8à¼úê_q]W„ýª«¿ÌæÍ~tà 7Ïþ°‚óŸ3çOxàD"âÜC!øÕ¯yî¹Õ|úÓþ$háÂyìÚ5‹P訂í'OžÍ¦MG)!n„aÛvA:‰eºÀÆ‚ÂZ]×½¿H$B<÷.É>:›Íz)¤7\góÏÛáI- å¼2©TÊð‚ÇðÀTú²ö&bÞðÄWž`¿ý.QyáC]ÿ_} D~Ô庡޸w×TT°¡ÓqkÖ¬é°lÑ¢E9Èíÿ·?öÔÃèᜣ{diÏ}tqÖZÈPÑóé!Î@èDÅË~P”ý† é•….ìüú­t-/þoúÞYaÞmÇÌÓ÷âø§/%h‡ Ã=ÊPÃqüðqÓ4inÎ._ÿúKL˜|'Ì=–ó¾úUÆ'âÕ"!ÀišPÒi_jhÇab.Çü ÇW=Ð)tézG÷KVÛÀ&˜×6ãÜã`”/ÄE'“Åï„þ#Óè¼™ïtšò˃Ua$‰„Ÿô*H.ç/ ®oÛby±ûY2é }U™ m Ñknš>]lï8â:È$[º.Ä>]çÄ|ÌŸö_ÿũӦݱƒ¦yó:NM“׎8‚ìÙÓ»^¾cð¿¦hTü]vÙxöìé‡JЊ^ÑÐà{3¦Ópî¹»9ùäüå/£Y¶ì~.¼ðB~üã—9ôЯòÄ'‹Må½÷ÞàÐC?‹ëÞÁ³ÏNå¸ã4®¹æûTWgÙ¾}3ùË¿bš&{÷êüéOâé§— ÝÆ‡?ÁqšX±ââñƒ8üð‰Çãž}vØüñ÷“N§1M“H$B(t ?øÁ»\tÑ™€/¼OŸþ7ª«Ãçòá ª«wxË Ã`ÇŽ=úëΞ} Ј¦ŽbÚÛP[[ øº÷œ9sغuqÁz†cÆ,aóæ–‚åŸüäó¬Zu--ã+ýÕ*ú éyxâY6›-ðZ qº®“ËåˆÅbžè¾çq<÷D6)ÈɰÐμZŠ’÷É]c‚ÛêºNk ÊN("winã"ùÐ>ýлTW¯sS _€{Z˜¹sgÇ<Áõ˲zî'Ç»’@nbÅÐÆâÖ¬YÃõ×_ϵ×^Ë¢E‹¸êª«X´hkÖ¬¡ªªŠP(Ô¥×ÖÖÆí·ßˆP× ‡][[«V­bÛ¶m,\¸°W‚ž ¼ß4 íÌ~<ë~RÏ‚i…†åŠ^,ŽŒ”—6˜+N¾wÛè}©:¿,Kå½lËeÇ3gîìÛA/Ê>œ¬^Ù]XÛ@#Ÿ<'"·ôþÉf³û^ihЙï« ãGaZd³Y,Ëá‚ ~Áï8I¹)d¬/põé§“ž7Ïß(•³âd²0ÙXg¼K…O¶†üIØcpèÖ÷y~ò‘õx¾7Š1:&¯5 _"»g©A“® ÷¢â§@ °§óRކá)k×ÞÊ7¿ùÇ·¦0›}*%<ïǯTS[+—L†¥ \µp!UÇî8Äb1Ò²íD‚N?»7nϘJˆqápØKdßS: a=ï¼-¬^½¥Çíô–röÅC©Ñž}öc\vÙÉèºË-·¼Ç¶mÿÉ/ùiÆŽ}• äÈ#OåƒÆó™ÏÌ : Çq¸í¶™ärS¹à‚¹óÎÏó׿¶0~üÝì¿ÿ!èúÑ$“I\×¥¡!ŤI‡n®ë²|¹4aß®æÌ©â¡‡j‰F£^Þ+MÓøàƒQ|üã…÷óŒ3صëcËN?ýx Æ9çöéçŸßμyuËÂáj~ó›û(%Ìšu6çŸ?¹`Ù…NãÕWÏïp fÌ(lwêÔV¶mû"55¯•å{Sv\>äØðÆÒS¾Ò4ÍË9*Å5™kM tíííÝî·8oé@ÓØØØcoèþd Æ ~ÞOzî9f̘Áºu¾ç½B±¯”ÓŽ éš£Ùvþ?1¹›u;‹‹nbœ˪{aØ0 ¥¥…D"A8¦®Î¤Üzë­<øàƒÔÕÕ,/¦­­K/½€ºº:,Ë"ˆ‹N$´´´PWWG"‘ÀêEÙàùìóÃ_ ÆI*ú…R݉t:”ýE_G’õdq‡8þWRìq;P_S9íx×ĉû~€Ås/Iå“ýÈc7MÀKÜëÃÉ?–¼Åû ®'Ÿ:Çb1lÛöržÈíS©¶m“J¥¼§Ørà›N§I$^®¬R¸®ëµee•—O¡:l'‹Èã‘× ðë²ýRmËõì g²ïÊŽ÷ņKað‰O¼†eYD">û­Û¸ûÙK˜¨éÌ«ÛAäâ‹1.$¥i4ÈA‡a@*…sñÅ]—M–htìòD½§ÕØ3fÿ±ÿßà: jL© R½¢¥·ZO‰F;Ÿ-„BÞù3ñ1ößS§¶vÜ^îWþèÉø|ÃàÝqãx©ª ]׉Åbd³YÚ.¼PxšÆ®^ ¥¥E„ñ†Ã"™[ÀÖ-ËÊ'¨7{õ=Çb«ÕÕÔìæCÊ#Ä•³/ 44ø¹Ù;ìN>ùÉ›øüç_æ›ß¼ï~÷2æÍ»Š7Þø ÿú¯eÉ’ \|ñÁ˜f5étš)SÖ0eÊ›¤Ói/Lî’K¶³wïKœuV]ÐpôÑsðÁ¾%úZ;Ž,˜èkšÆ¤IßçØc÷/8ÎcÝÂÔ©¢½àÃ0:±Ø;œqÆúçšNw¼_¾ÿýÓøô§O.Xv sY¹ò_:¬ûÐCÓ9ï¼âãÚŸeËfwXwÍšªúôI'-çè£/Ë÷8Òí¸?†ƒÊ1išd³YêëëI$ÞüÅ4Í‚j†aÐØØèy®år)^H€IDAT9Òé4ÉdÒ ÿ”ßGoBý+MgÇÙÖÖV¶}ä¸ÂÉÿ5ÝwK—‘}Ÿ*”ߎ`ú/3㬳º\OzØ–$A¡ ø©ˆ$Åù C–±«V­" •¬ŠZUUE2™äÜsÏ% QUUÕa¦¦&ªªª¼í.\Èé§Ÿî}¶qãFn½õVÀWœ{ó4i—ÛOBlD¨Rázåäÿ71›Š>§£[°êkñ·¬Ö ¶a R9…_Mc~ÝD`»r{˕ӎÇîÙÓù‡¶í{ÒÄbâ}&ã<ù Ë_„x÷ûñ£’L&1M]×Éd2ضM2ï‡=¥¾¾žÆÆîoâ!ÑuD"áå-)ÔF"‘‚$ĉD ©ëtÂÑÙµÇãžÐ&²d2Imm­WMÌu]âñ8¦iüȦÓiï‡Wæ‰F£¸®K,óŠÈ|/Rl …BÞd@>u— ™Aˆx™LfPxvfÇýÑwÄâ‡?ŠA4Ÿ+í·ìfÛ¥éO~Œ†ûî#ƒ=—÷³~ùåD5ÚCñjtãd‚zjÓ±3‘%™óe½ÿ*{çì-¬(ܹÖx—™Þ{úÔÕz2°râ9üüÎ[3¦ºp®Ât›Ï>}Ú4B¡w?ìë_'sÁ‡?Ì´“OfÇÉ's÷Æœ›ÉW¶†/ÄVnW__ß㈼nÚ+ýr_)÷˜¢’ÈþŶí‚cN$|¯#܉iㄘ2e6kÖÅAýÖÖFO\ûÙÏ~ÍO~²š©SJÄÃÒÏ}n-°Xàµ}Ê)G±wïÞâV[Û<¦M›Z°,˜@>ÈI'=ÇÂ……“‹sΩaófñãäSê·dÙ²sK^Þ$¬Þ—œˆqÎ95lذ èÿ0ëálÇåFVŽ–^–ÒÓ ð~³e.6™? Äoq±$“I¯út)äóÁðûÝW Ã`÷îÝek Ç1Ä0÷;7Í`ùòÓ”'œ¢ß(·»Àï½×my_ T²Ï †Aé p°˜˜bH3„a{¼]2«ªª¨««£©©©¤«æìÙ³¹öÚk½÷Û¶mó^¯Y³¦ íE‹qýõ×÷ê 'öÖ›(E¡+V1&bæ£wF Fln®Î(UóB£´#Jñ:²‰þ—žrR'Má;¾”›rÚñû›w3åè)âëúU!_’0!Bàd®(]1»¬LÀ¯m¸5 s Qý1•ž2²øí×ÖÖ’L&½pOém …ˆÅbÞ$)—Ëy9×N?^C2™1´˜ÖÖÆ™gžÉÌ“OF·,œPˆ†††“,9i‹D"Þ¤àSùÔGÓ´‚å}}ú,^±h4ê=O§ÓÔÖÖÒÜÜì €Rˆ4 ƒD"A,#“ÉdH¥RÞçò¸‹örâ(Áà©\϶m6˜÷àËå…)Š–“Îì¸?úâ ‰TWWã8{9S×q»ÖÖ†aü„æ|ßAtµõ€[W‡^WGq»ˆ‡¡`9vY˜ ;™`8ª$ß'·ÌœÁçnýœxÓC­[4re‰ÞøØDÂlìÕ6m󿉾3/ËÉe*•ÂF™wÚiÄ¿ö5b±§Žë²ýÌ39í‰'D¨k¾ëòñãyç•W˜ÚØ(ú¢.T‹HDt5é‘Pî1E%BB8öúæææft]çË_Þ…iÚLœøMM—1~üÉD"lÛæW¿Šò±Ía÷îEd2~õ¢ï~÷K|ï{sç> >4.Ó´jD&VÀcÝT´Ìè D¿YÜw†áõ—ŠŽ G;‰¦izêR©”þ,û=‰|Ø')5®èn2mFÉ" C‰r‹ˆ5®D\—ÓVñôÓ—ïs[ …¤Üvüض­Ó'—®ë’J¥¼yLɾ)˜TÄ ÙE oÔ±S¦LéðT…%]¹<×ÔÔx§ZZZˆÇã,[¶ F>{öì‚u»cûöíÞMáTU±ÛéåŒEG­”Âì fˆÅ%>-Ä 1?iç—u5æ”Õùݦà“E‚ÈK›È&‡EðÁÐøÎ;LߺµlÇÜßv¼uëVÖ¬Yã‰×K–äóde³~ŸxÜÏ)%Cà‚å®Ã:ì1àó:ü0áWblnÆÊ{T8ŽÃ!_ý*=ó ]v™çõF=//Icc£Øw>O—žOUüè·¿¥ 1á …B¼ûÌ3ò©OѲc;««Ù~ã4毇IJDn°t<ùR¯Œ_ Y¾„à8~^6ëWœqŒò³â$ø¦‰U_ÏÔX #/jjŽãy:ɄʆaÐ\\z7hsI¤+‹éÔ}œÎ$†á‰„rR‹Å¨©©aÚ´ilß¾½W¶Ù:³ã¾ØðŽ;xýõ× ìØ¿^Ð>k'N¾2\ Ñ­¦ªª ¼ÃälDÜß²†B°~±ëÀ= 2O‰Ïâ;A—ƒ(~Þ4#:ˆ~‚É$̹t)s†È£ó«¶­âäc¥¾»$"Eèº^0aÔux<îy††òžÒþ‰š¦yžž’ºñãyíW¿bêý÷ ©Taÿãí§ð}KK ëÖ­+ú“þî‹·mÛÆºu뼇ŠI"‘ S]]M$!™LrÇ“ظq Ù¬Ã~°Ž#Žxˆo|c—]v>µµ-$“Ñ‚‡óæ9ì·ßÕÚž>ýn®¼rEÁ²ÎD‡`ÂwÉ~ô±’øRbF©e>ÑB¬Y³†×_;vì{cE”kl<{öì!WåUz‘[–Õ!Ï«ÌÏ&½ëÇ!—ËyùdµÑRôW8©ôTŠ´µµÑÔÔÄûï¿_¶}ôç¸:Žƒ8ˆÈƒ³ÎªCéü#‡––6nÜ8äÆÇAg¦m88µÓíºÍ1|-ß[(G !BSSS¯ÆÇcAÄI¯Zµªdhªl´”×\™üpÕªU\{íµûä–¼}ûvÖ®]ËìÙ³qªªØ¯úààÞ5RüÛtÁ’®ã{ÏéO¹dþ}ßUÃÉ/sòïå©YV`•yèzó[^|,‘^n?DéÊ«6„¸´và5£G3~ûv&–1ô¯·µµ±víZÆ.ZÄäk_E×§‰GˆNÁ¥'0á{ý´æO>/耳Ù,M#‹‘L&Åòâ‹9cÆ B> &ˆãÖur‘ˆØWPL’^wà‡Åf³œs÷Ý<¾y3Ÿ`-^ ­­ÔˆrLCC‡Dô!Ç¡~Ô(‘Jlè·+Ë+Ê®+\©4ÍÏoÛ~%ÍàõÖ4±.ˆvd’ÿ|ÜÑÔêjq<é´ø, (èºÎO/ºˆH*%ÎOng¥½òž:&Ã’ÛÏAŠ‡Ý 'ÁÊk»víâ§?ý)[¶”/ɽ´½b;^±bE¯ÛÙ±c›6mbíڵ̮ ︞Ñ7ÜÀ¸^ ¾~=ÆÜ¹]¶'ï{ùœ ô/9 Ü3 Ö >¯ÿh¯Cîþ|?á î Ë,öóChľgÏê¦?Ë¡½Ü®»0>9)Õ4l6ë…@Ê<òó &pøm·Á—¾ThË–%ñ‰pÿý~>ÊtÚ$)üž?þñ©¬^}q‡ýlÞÜû{:ÈPú“µkײiÓ¦²qPž±1ô\ð¨‰DÂËÁ&Ó;Èü¬ŽãxŸóÇ‹6åŠR „Ø+ E E¶mÛÆÚµkyûí·ËºŸþWȶJ+@üÌ[–ņ · ÄåS 6nÜÈÚµkË*ÄAÿƒúÈè;غ穈dÎë. z¢È÷Š!ƒâzÊX€¥K—zy“¾øÅ/üÀ[–Åõ×_ϲeËJ懓\uÕUÔÕÕq÷Ýw¬'E¾àvÕÀÁ\ yÑ¿"Tª^Pü»-½äœÀgR|“¿½)„àa"±`y`© éøn¢òs) Å^q|o» ‰ü:N~½4¾x'÷[2aR`{#ßþ(„P#c8e²%y^ÝË·Â~²ù×Íg¥)cEKÅ¥ö†ˆ–²®†ì9ä9¤ÿó¸éO;>äCX¾|9PõÒK°`ï–Ë•Î'U|MBt°™×-“ÉxÅ ¢Rô²,_ìróÉ‹â'ó öÑù¿’„B~ÈlÑò©º?þqG/8YR3ˆ¦•ÎÏ%½ƒíG£b¹¬ ‰î#Ç¥i%E˜[׬×ZngÛâ}6+D;Möm¾~ÅüeÚ4ÿXƒ^x®Û14±¡A\WÛ.%õõâxGl‰ø×:/Š`\|ñŬ\¹’rRÊŽûbÃÓ¦MÃ0ŒilDÔôñÓ¦qÖƒ2ãwH^z©_X  âgïÆ@š·1}–Arð °Côlpu0¤DºÝÝ dôèѼùîÁlݯœº®`…B„Ãar¹©TÊˇ(…¸—æÌañêÕEiyoš¦×5M¬S$èËÉ“ÊAöųfÍЪ”O?½ýèxn¿ýn¼1éyÛ<þøn¯ûÿÏÿÃ+¯Œâþûl[JlŪ½gŠþeùòå¬X±‚iÓ¦•¥ýrަiF Ä6˲.FŽ÷ÿ÷Ïõ,XÐ};ŠáâE‹X´hMMMeÝOeÕU˜2å}ÆÖWwºì/»$Ja±U%u±téRjjjz<> "Ü­·ÞJ"‘àœsΡ®®Žªª*ïfXºti—?þ«V­¢ªªª îZ"s˵µµQUUå%>ï)ëׯïÛ•ÈâÇ?J5'D¡x$«ÈÊ{à‡¥æ•÷¤"E4™q\æ2 å×Oà & ø!ªf~ÒÓMÆo…òŸEñ“0ÊåÒ-UzHqL~.oÐz =èRùóˆâ«éAmD§¬B(Å4î%ÏÝÉŸ«,~̺åõ0òÛÇñU|)VJWZ+¿†õüûp~û4…9 f;ØÚáÚ ÉVH”ùpYíø3ç¡&™ì<‘§Ô“‚ù²<ç5Û«òÕÐÐ@&“é–!Û … óÐí+šVºÂe°ºc1½ÔvÖF<^(\«x@Ãìò^‚ÁÌóº®ó­Ÿü¤ÛĪ46Bmmáñض/Þ†h/‘%–Ç™6B‘ëß|³_BÏèÌŽ÷Õ†ƒ¸À;:4×Ö²æì³™ùýï÷H„q«{Ã_L >ø0A:Yã/wCø¼ü&ÿ&Ž!ÝO°•]Œ|æ‘ÅO;G`wY ŸÉ”r˜.cÆŒá©g>¡µ¯p\?´Wü”U†dëºî}ÏÁ"+³N9…‡6l`qgž&Ѩ¸ïäý㺢Œ¼w >õ©²]ŸröÅåÀqŽ>z .äˆ#®Æ²,?üÏìÜ™!O*ñºK/]Å×¾6‡3θ¨GíwÕÍ*/CÍŽ{Šöe•pYÀH ÎZþ!W6›%{žq²È‚ ™O§Óƒ&´¹T‘‡¡ÆAT¶¶b\âw}ëúõ´¶~CõyŠ~§œvìÇÖÊÜã×KK¯#º"BLJ̃£›T”±ò…ãZZZX³f ---\~ùåž(×r›â1Û¶©©©áòË/çÒK/eÑ¢EX–ÅwÜÑã|í½Éì|æâ¯–^m ˆÄDÒ›-èX'g_RˆË &ƒrFL•#ïYiUïÎGˆv6¾—Y†( ŠIO23°)RÉIhðµÜ—žoK:æ˜ø"˜¼YÝüþÓùs”Ç"EH™g©*·mÆÎZóíKM^›0~¾&)àù}9ùs´óí»ó’Û¹ùcN¾ƒæü¹ÖŽ'Æ×!u8“ÁˆƒõÏ`\ü ÚÊôÔÊkÇß Îëb6ï¦ìi-ŽÁj€'ÆuI B㦿ÏC× Å¹‰wv\Å!·ÒCO"áƒ%#áMgY¼:~Ù.YWv¼/6ä%&ðµ÷ÝÇÔ .èè ~ÅSIÿAB aûQˆ$òMÀIŒƒ{7؃}šx!o•…ÏYêóÿËŸùPQvËþ3 ¹M©tÝò…¬‘ɺìò‚?à?Ñ—]r„ÒyvßÜ1x¥O׿;‚¹»¢Ñ¨ò%mb¿ùóù¿Å‹YÜU#Á>F†š'“Âë6cüÂ…e9v(o_ÜŸÜrË|ùË9äó?þÞ~ûtý$r¹¦9×K‡‹®§8ò~ñâòzx+*ËP±ãÞ s¹¹®ëyµ9ŽƒëºD"ïoÔ¨Q¤ÓiÏû#èm–ëiÕ꤫ªªŠW€ø]tç,]z<†Q¾ßÅȤÜv¼k×®.?ﶸ‘ƒ?X•:†bxÓ>@lܸ±}õêÕí[·nívÝ+¯¼Ò{=oU[ûüùÏô~‡ííí¹ööv½½½½9°<ÝÞ޼ϵ··Gï“EëKâùåÍùõÛÛÛ[ŸgŠÞË}¥óÛØoºÄºÉüòö@Û­ùårÿòóæ¢íÓùöÀºÑü¹ë6zrͺ"×ɲæ.¶o|ÑNÚ•ÛGý6BùÓɽÞÞÙœ?­ß¶·ŸtÏÃ=<™òÐ;Ž···•N··C{{:ÝõFò{ÖÚ={‰D"í¹œ¸ø¡P¨½¹¹¹]Qa¢ÑööL¦÷Ûåríí¹\A7ÐôƆW¯^Ý~óÍ7wX¾8ÙÞ^uÎcíqçÔ[BíÂÎsí~¿o÷û ¹,Ø¿” 1°Z<ÐDñçí¢‹–ÄÛý.»¹]t‘Á÷ÁÃlnÏ÷EÃÓÛýŸ¹LË·aä?ÓòÿëùeF~¿Éöööã'=Ö>küÆö;¾x{{c ­¾’ËåÚãñx·ëȾ#—˵íɤÿãÐØØØÞØXxñx¼Ý0Œööx¼ýÍë¯oÏH›ÏåÚs¹’¶1PôÆŽo¾ùæöÕ«W÷Û¾“Éöößüæ…öiÓÖ·/YòTûi§mnÏd2ßAssßn ÅÀÓßöÑú:6hÛ£Ñh;ЉDÚãñx{&“iÏår~¿ 7„Œ¿¹¹yXŒ«†Ê¸¢³c•¿…jœ;²*v\<>δ··ÏºøÕö+®ømÉõ›››Û[[[»nTj™v1mmï8°U z:›;•bì¾ y=%X©¤7L:î}>²øÿ€cz¾‘ C ?® Å¡ÙB}®F„“væh±3%>/¥ZG‹>×J,—÷k”XܱkE4¿Ì ¬+=ød˜Ì?וßW~¨›e¥¶—^‡]E­Éó D>J/•ÐÌ@tìðÐ]åMFÛ}µã£Ÿ~Z¸Et—ÀDx%fáOü€3í3½ÂÙlvP>I‘¤Ó}ÛNºÄrQ 4}µá s/ßLû{&ò/¥sö„â|ˆ²oŠâ÷•Ý8+Ê=ËÂÖz'ŸÕu¼Ö(ì~µ¢íeW*EÞÁ}IçÞâç!xØ©@ûá|{›jkÙùìt®ÿæLÊŸrþO”ØOè‰7GÐó3 ǽp2Ã00MÓóp¼$ëâ;‰òï7ßÌóO?-<[B!Þ_³¦‡GWúÃŽ{‹ëº8ŽÃí·Ïà•W`Ô¨zþøÇ“ó‘ꑯNªè •°ãÞà8ÙlÖ+ö"«wÆ9TÂlA-éúÃŽ-àÈ;1ŸŸÉˆ¨Z§tì‹Û€æº7mRÉÏMÓ,È‹Y… O ›T ;Lˆë+;¦aêÔÖÞmßIQ*?ƒ ¢!fNåN„œ¤¼1Þ2V¢S˜ä?‚I"ÃÅ‚§ðàÀyçUúðzÅS÷¾ÎØ'ëá³=(-/Ƕi8Ó=Ã0¼ü**lB1Xؼy33gÎì{N$“»Iâ½oNE0Ýg!CM%‡’”~Î<ù çð•-´´oz—ÛgÌüŒ üÔy2¤µ»lˆ=™ …ˆÅbd³YB¡—`Ý4M"‘ˆ—‡2•Ja».÷>ú¨&?RÉf³ÄbY&Nü.ç·…%KÞzœQ¡2˜¦YPx! uV¥Pì6`<ösæ|®§Åé¥èA!¼dÎðâ!„L2t4õŠóÙÃo㤚ÙÀÙ>ëqÿY<ÕëØX1t]éèŽ7X¿þðÞm$Uä4mnbÏG÷ÐÞØ@CCCÇõ5Dn3ƒòM4tJ qr #‘Io‹I$D†"™èQ¾ï1Ò?².Æp`ëöýø`ÇŒ{¹® º˜\ó õDŠáÃêŸÏañ±ÇöleÙU™×ÒE­·Æ.:÷²vNõEÁv­cDz; ®KϼF gÛXøyæL„n)Sj«Û÷ õoés …pÇq°mÛó“UV#‘†axY5MC×ul™øl„ñ/ÿò,_ÿúñ,\ø;.»ì(þç®gÙ²s+}X E¿S]]í qÑh”d2Iº¯^àŠÞãÐ÷Ž}cN6K81n\÷+Zˆ…_ÄRz+÷Sw+3ô™h\’J¥ úaé'—% ,ËòúÙX,æ%0—ŸC‚¯-ËÂq¶ý¹öÿ³÷îa’”åÝÿg9-ì²µ  ³r¨eT¥Æ]5bªåM4à‹vk”5ÑîH²FóæMw4Š1Át›ƒIÖ·[àÏ8e6€Q£]Ƴ°›)4¢*SrØáìÖ²ë ,óûã黪º§{¦çØ=3÷çºæš™ê:Ãr¹Wl^mT*>õW'ó§:Æ…nàmoÛØë&­Ì=ØÍ½¨[‘Øò³ uŽi)•J  …QE{÷î¥\.OJ¥,<µÆO'|QÅcE³xû¾Ï>pþêâºëؤü;»nü–‚~%ŒM b.¤ ¬¡Yƒ¤ê”D‰•Û˜EkФL$™ÛøLÒ USí 0©„å3U”c{$Å¥#®ˆ–9Þ7Ÿ÷Í^3sæ?¿ùž¶uëÔÓ÷ýö4­ßÿb„u(}M߇¦>õðlÙräÌ+Êh­!ÂEQDDÄ•{%¾ïóèCò§¥?¥X,ÆbUµZ¥R©`Û6¥R‰z½çб«Uñû¡+K2nª`lÐ&é%÷E#œIÇGjy¬x _1­ sí–ßí’î¦s™ÙvµF`Vç'y6Òv?ÒæxË”¸eâe<ë‚S¦|&ãÆ&L_2Ns(‡v½«Š¾âà©§žâøãŸène!~Þx—e³Y“dö³ù('[! Clj½ÏóbA«X,Æb—l###±ˆ•o¾Ò‰móù|| ™d¥Iß i¡n&D0“6ˆ(©r¹O¾d2†!•J%¾éýHân™<d2FGG›–)Ýsø‘GpNüoàM×Ù·owüñl|ýF¬P8¥©:Ä'ºñv1ƒÌ ff7þ®¦>ŸçàKú,Ùe.—Ãu],ËjÊuÄľïÇš¼Ô>4›Í6yÈ Üw_Ü—Êç@“ð–Þ>ŸÏÇ/;€8Ïš<,ËâöŸÝC´öT^½õü¦~>ßRÌEÂÁ¼0ägÿýßœsà ”n»z½Îwÿšœä…?ûï9ùdòÀ'o»6mâÏ,+0"’¬ñ‹¦ÆgåryJÈûL!ðrM_óš×ðâ¿x~_^ŸóÚ×>ŽïGsÌ•|øÃ½M’S2ÜËñ0ó¢IJñ"Kß^^j{Ù—ˆVvê™+¦Å3¹ÛÓ*5±’98èŒÐ<ŸH‹ti:¥'êD:zJæ™­ÇÁÐiY^¡¹аãúëaË–EøÖú‰˜Ëi^ Lƒä è4D÷0†Þ)Œ|˜Ú‡„VIÔãÁÔ>²¡&®^F³²]ƒ÷ýæû¸z™*q>pðÖ[9ñı"S‘ŠçW‘Dh“NSRIÇ-!§uÌ÷Ü*ú¶&¤mÊ ½m¦scŒ²ð¤sþÊ= ¤vï #žvè>`ª×α'Û%ÔÚ~TV4}-ÄE?Ýǹo8·Ë•žxâ ²Ù,AÄ!5ÃÃñ&ƒ Û¶Û6d›tx x7È„N&f2áJ r²n™,vC;¡n6´ž—„⊸ã8N|­ÂšœG:\KÚ“nÿðð0õz=À)¹ÿÔS9òâ‹§]gt|oÜ£jWu¬ô-7~év¿ê>¿ÿ¼¶Û,Ì`¬€<ÊÌæ”¸P(4å+.yã(ý²ôaÙl¶©ÿ·,+Þ^D8h~’0¥×éú”-«i»ôß®ërö_þ%ßù¿ÿ—7u™AúØ´@ÿ¢3Î Þòÿï}¯™×]pµƒùð#pߺuñÜ­†+< ›ÏÇŽãL™OÎTøÁu]öîÝËæÍ›Wl_%BgŸñÜçžI¥2Ë‚Q)Z[ì–ÏD ‚dþ¾„J˶IØtºH¼¤’ùOÚ l”îæB³±øÖ[¸Óh “EY>óI’4Já-ªâQ;BGÄ«¯éBÅ>œ ’Ú$Š¢¦ðÓüL•ÝW¢âδN§çW»” !Í)"ÌK©,ÆÖQM¼åJ$á{!æÆ×Õ/¨®xQ¯/ÜÜ €“ÿó?yüñÿ³<½áÄfämˆ¼áHW{’’êé~(íþ›ö4óYÞ!ŠòFgt™ŸÇ,9åÉÛ.¯T*íÇ’yŒÍäi Ðz}ÊRÓ×BÀ“ß}§ìÃØ·Çøé-?åšÜ5±Ø³¨VB2iêf@"b_zÝvÛ•J¥¦7Ž3Ñ:Ùœi‘0“ÉÄá­3] kM{‚ˆw‰xìI­G®‰H81Š84¸©ó 5pC»hÇ!ÍŠÒÜ6>Éž¯ŸÂQ[»L/*¹áò$êÀHw›¶"á¢é~¦P(ÄÞº­ž¼i7¹§úåÞz΃f°vï·¾Å3æ f‰×v¹\ækï}/k€¨RÁ/¹äõV…!¸.Žçá”ËœèdÇúõL¬]KŽf9i…x ÎD©Tâ–[néõ¥\p¢.¹d?ÿù§9餇ãô3!!›â‰&ÞZâ½&‚§ÌÑd~&ópH„³vQ§O#wº -s’Éª×ø©“ô¸º¹$ê±ôâATkìOò’ˆŽU ñÂÁCtÙ{ì±Çøaî‡Üý½»¹àypÊÛš]C#Ô¯R1£Æn<&Òa Ý°ÐB–ä¼›wG;Ñ0íM(ç5‡Û§spŠ—›xÁ-—ûç2Ï㼿üKøçžÓöiq½¢¸gŸMtÊ)&%Cã9ù>/âùÇÇ·žzŠ¡("l\+ÉÛ,‚œ„–5¥ÐwqÞ<€¿øÅLNNöúR.(a¯{ÝaöîÝË£ÞÂ[Þ2Öv=Ñ ™Ë|KæÊé2‹$·µt6IªÄÖ\i=£“'(‹­îk"rIþ"ñT±#K"Œ¥=cÓ¹‰ $±¥bÒ¢×.gQÄ•Pö/ýLú7*ðÅa_²nÚ´‘këúâõü"¿Øë«¿$H¡2˲ºz)»ì"D€Fç´“]qI[‘\^rsWÿ‹-JR|¹œ"º”Hªìˆ¶GÚ*LMàî¤vèTi¤ w]qWo®ù<‘Sÿ",nXjkâË™^ ¤«¼ËºR$AÄWÙäy“]ÖWQeÕðN[³¦ígÓνå™ZfnÇ”eM_ q°ùÌQ ñ–Ç÷¡TË2#fË2Â\²öó£|ôø-¼ñÌ·³ñ‚fâyP«m›¹dÛô¤-Ì:a8}–PÙ¶1!‰=Â0É™ä8´¾Ò™)ܧ•…|vóõi'hŽŽŽÆaòù<Žã088{®Ôð¥nyäÄŸñ¬ Çwü<d 0B¦æ†Sú™»<Û|‚ç=ïÝo4Ëw ’@ ãHNKȤ󯥙틆^²~ýzN>òHžñš×,Èþ‚–ü’ˆ ½slÌT@náå/xŽã!)ê.Ö'á}ÕÔ>Wz_þ¡íãŽ;>ÍÕWŸÊ9ç€mMY'$q°ï60cèé„´vO^yÊ÷D€KOü!QÇI’HgI’ʉGšxšùMÂå|ÑLr_ɽ?Sa»xÙ™†?­±§é>ÆniG:ay«°×ðÚ8´ãÐR^ùžQ*•âe};Ö¨`¾ÛNÏ I\?]þµéðS¿Ó.¨µÔqExK'ÕR눸kan|©Zi‘ä£e{òăªywVñÓ¤Ïw…é§>°nlŒW¼âA^ö²E<$¿„D0‚šx¯…$âYºâ‹T„‘Üéõ ¹oéù¥WÜ?pß”eé‰MHsº¯Ó`²UG_ q=ö'ÿâ.àR³ \6¯KRbÀòÖö>ø­Ë±"+yg³F¨ #àÉ$- Ré,ºY¤=ºr9³M™Ï¤Òi6‹•Ï›ýd2æx`þO iµšå3Ç¡©6·ë¶÷ô“cú‘ƒm›m[ÛœÁ|Þ¸6"T¶+Ú°´VÖÇ÷ý¸zkú3 gm %[‰À¡'ãüy穨¨(ýΩo~Œ½Ñpœ·N¿b: õt«µ„›¦E·tH}Zx“¢Ë™µk×òÂÇY¨œ™^°¤_ÙÀ Ãм ©Vc‡‹€¤¨™Tb€Jã%KßNÞˆ}û>ÕW˶m—LùL¢)%R²¯-0(’±D#\™¯[ð–. aO6uRüŽ/pàòþÓ‡ç9ðDÔx™™OÆW0T€ÐJ^búÿ-ÙiŠB!CÙv2¶³¼UH¯T’qWzŒ#/R³Ùd,U©4¯'m Cówë8ÃóÌu CN|Þózý- a’Ífû{Ì5Sn,W§»§óx·_ÉYZ# aMBœ¥º‡äh7V£…ô±ÄŸœZ']5d!ºÐ6Y€s¿þuî¸ã \sÍîÔ¡ù…ADò] ’|ǤÖsM4ÜË{}e”åÄÓíˆã>¼¡i¹Œs§Œ[Óù•UK_ qßx䎺sóBËJ„(Û†¼ûß²±Å”²\â5'ƒ>×5ƒ·n=âȯX4GÑÔm=Ïo¼1òùfa.—köškçef=hŠðØN<ôýdðEf@ê8æoÙGµÚT€a±(•Œ/w¹\6ÞaWMWƒái¥{XDÀº;þ¼Í‡7²Žç¨ûº²,¸ï´'ÙvƉ3{÷z3ïKú£t®"˲ÚVò\iBõ¦ÁAXÀüj³Me ‚šmÛ|ð‹_äå/9¬[çÿð22žGeãFÎ~éK)Ø6k'&xÁÀ±ŸúŠ! ág?û4_ܜ̧Bâè’%‰˜œB¥Òìu_«™ñ†ˆCéu¾oÖOEÒßa.×ì¹ïyP¯7¿°ûÏgscË‚†p“‡íp8ÔØßEø@†0ÐPª$ÞnadZ_oÜÀÛR®dQÔìåfïÌšc{ži›Œ‘Düj¥ñb0Ë@2j'òŠ˜&ã0Ù§lé~A®³Œ}dÿÓ‰Çñä…Ÿø÷üʯ,šmõ’(ЍT*d³Yn¾ù~ã7¾Jß»fêî§Hš+7¶zOÚ-?RXAj¤ bDnÄc*Kâý6]^Äv¹Àl`îõ^VŸ§^¯¯xA~ }p¾R¸çž‡=6fòN7<”ŠÅ"õz=.Æç^r 6|îs¼ú ©œuGœ}6¯¸ãŽ^ŸÆ‚ðïÜÁ¯þêÑ ˜ðIq&óïààAª_ý*î«_ݼ¡xa¥=»ÂÏtßoÜ,+yYf¾còÂ.Ö¡Ø•<ÏlS(Àè¨ù»|"€Ç}ø °É2^k¿…³l¸³Ÿð6¼0ÿÙIä°Û^¸jÅåú³M1›õgë%›Ú÷Ã7ÞÈÚ‰‰Ùm¿L¸á†¯S©øüÃ?üˆ—¼ähUÇ*‡“}øaF6m2cåO~2y•Í%# “ðÆt: Û6/À\"¾ìÃëSÏLË2ÏFË2¡œùr³G´\ëÀ#N#VÑ ’è}†[9©!¡zé‰f,$¨j0_yãÙ2ÐE¿¶Ìøƒ?¸‰k®YŒ²~ýAêõõqÖ’ž!a›Hç$L)ˆ-`ž-Z(Ãz)Ü!—Û“ˆ3iÚå‹i³Ž¼ˆ–‹'·¢kµfoÌÖ~>ŸŸú"&—KòX‡a"ðËOë‹þ HÒݤ8{ÏžÅüæùŠkAÀ‹^tb÷FÀç8>‚ómm³£•U?HY&ÀÁÏ?Σ?›š\^zNa¦¾PYôµwØø>Wþó?óè;ÞÁ£>J>—ömòù<úÎwšAÉø8…B×uñ}YOÚ¸sÛå;^¾ƒW¿ïÕ«ÏHYžDƒ¿|ßì¶‘Jo' ·m;®zºj‘t}BºÐEk°Mj¬ØxŽÖj5~ñ‘ݽnøñÈ#ßફŠT,Ëh^×]‡ó$ÏÊ0Lž‹éPõz’š"M6ŸE×é÷]–=5Äm`ÄŸD­`&™Eš_8®â[H™žJ¥Âg>ó¢è«|â9ÿüõÀ4Cá´¸”FRŸÈ8E^ÎJÈð´;ml/ëû>|Ü‚b¶¹ÿ‹"s_<Á«€!à[ ð‡EòÍÖë7eA%„-ÀáFYÌŒÛ,Ž×1÷Y¥bî×R#„:é!i`Òç"ù¥Ó…Øä¤®SS±aÇ!xá ‰¾÷=ì?ÿsì_ùpüM›°}”ðéOŸzJ%“ªà·~ çÎ;±þøãë=ûÙqNw9–{çD/9ÁsŸ‹ó£Á '®YýË/þÕ޾ýgœxâ?vXÁÌšÌ,Á1!ü p pMJü”蟅cˆCûûDîI#´Œ”¹ñ’§}• ¦ q篽~)¢ô}-Äí9ê(önÜH;GÏ0 ‰ˆ°ó¶™8¼h†ÉÀy¡ÉçûjB5-¥ÒÔ‡Ç|:˜ô~'ÉC²\æ8à8ˆ¿ß xÇ•W£B¥BµXÏ#Û¸¾…B'Ÿ|²×WqVøÀÍÿqüuË ]6´âr_)+—à'?áØ‹&áÀ +¦&K¡R+Õ(—Ëq8¥A}yÆÁHðåÖwâ}øáS'¬[‡wÿýŒžr Öç>gž[ƒƒÉ3^~§½SdyH’\ÞrKUÆILÅÄ\c¹Ä»64ê4Ïäå‘ê’]anºÜTŠÒ Šà½ïý ÞÿþÓ¹è"pœõñg¾ö½ßÂþ¥g$Þ[’SO œ¥‘ˆ ‘N p`Öo'^µŠ–EôêWÃÑûÿýø- '€Àkô‡Ô¶^€{é¥Xg…÷¹ûàù‘½æQ¼ãÎÄùï#ú7bv^_Æ™˜ zâ 8âX8‚SO…ý`mh\Å0F:¸b¿XŒ‹aJX¤µgÎþ'mÇE2,Ç1ýô¡CDG}ôÑD§=¹¥¶„,·ÇìsË·¾5®ªl§¶‘§ |{.œ{.öe—Q8öØ8}]Ú!J£té¥qwá?ûÙñ:7ß}7ïéµñÍ;>{7¦òÃù~"ÌŠ@šž¿Õ0/FZ=EÐõýöó½RÉ|&",4¿A6M­–üN¯×zŸHÑ>Ù_kžÌ €V¡´V3â°m'Þ–éüáQ4uœ ùG[‘û­]Qšô9É5J§šn\&yÇÓN­×V¼>¥(PšÖ¢:B&“ôr- 'ïßÏrääÓä´ÔýA\ GQ:Ñ×BܺíùëãþŽ)ïI2ð¹ÏqãÄm{/)ËÉ›i6yìz€„¬=ûw~‡3Þö6Óùçóì۷מvýïÿÎÕW_ÝëfΊûnzœ ûöÑšÚüÞ¹—M÷nÂrU˜P–wßü#žüá3±ÿ¨CŸ'3I€Œm\×+¡*ËÛ¶ñ}ŸÝ»ws 'ôº9ó"~ïmOñêwOxãÔ¿ð¬ú'óáÈH2q›N4-‘Ì}ÌäÐmü/s’*F¨ó0“È"‰Xæ~i}§%áÝ‚¬kÓç%[•^¯xş󢽉w¿ûÍFþ›nÄ}Î]Øÿüõ$waÚÛ­Uìµ,°ŠÆ^­D_öIŠþYéÿq°êõXx’Ç‚ „‡À>¬ã!ºœ½À™¿„»j§!ûêS á÷Cñ1>ý›0dŽ^Ô_( Ä)ââÛbC"¤Ù˜Û­šjC»§—·y3á[ßJž$o:7ÖΜ:DnõyèŽ=6NQt!ÝÆÇ\ñÍoÂoüÆ|Z°äÀ·îáôÓÏL ÌÁÔüÕ`úPñ"j×%Ï”Ó[Ò ¤I‹Zí)¡«Õé=ìgªÐ6­‘DécJµèvç(b_ús×M¼7ÓóR5[Ûê8‰wwz-"zºêt6›´³µMiaO #Êñ£ˆ}_þ2{¾ÿ}^`ÛDQD™<¬”ËÉ #ž§g?ñÄì®cŸpÏ· Ñáš NA½á”}-ÄñÔSœwÊ­ÀÅS>Û~êv¶Ÿº½ù•‘2=}î"yâþ¨Tâcù—ø• —–Ël¾ÚXgm¢~âÞ Ä:ùNàš–÷­ãÔ³AYVÜòÓMÜÿ£gu¥®`fbqnE±¸®¬<Ø¿Lß\ /)…œþüG Žý»¿3â›m'Ü&uiÁ9‹ØBELÚì%_[Ds’ðÙjFôS=[é@üËüOú÷x×%·™¼†é¨Ë‚Ë/7ÊN«CÜ AC±òGu$„¯é»lŒs§`r+¸Ó´ÞÁÕÀ `= Î@c%Éý&bµW¨5~&ÉíÖXcËÛŸïhûÅMC«r›e­´“WfûÔZ¨Ñµwå•-ëÌ_Ù϶?Üd¼¤:‰S0µ-íÆ"3y‘| KéÕ4ݘI¼°[ˆ¢ˆý]Äé­BÜti}ÚS>ß\‘ÚqâqœeY¦°^$…õ!àR¹\,6y‚ZQñ‚FK¥###à8Iî´–óùæW°¼ädÓ}íóçóÙNò¶/l"õÂZYÝôµwäSOñÈæ©ùµžxâ önÞË){NÑ<Ä+ Û¶itêþð{Ýœá¸ãŽ›²lãë7r®}n¯›¦(]³ûà)°f_çÄ… …ïûd³Y̓¸Âp]—]»v±{÷òÍwÜz ÿ5¶c/Îo†Ö—U^^•¿G0ƒénÔóœD’xÒ)J¶þÕ~îü›ßåþ_¿—ÿxÇ|¿ŸñÁs9z$nÙ½ÀFp>iþ¶û€ÌÄQVîÔGÀï‚ûMÑm”æpmH\Ú ²Åt‘Çô­:Ešt]‰”yâì#õYR<±ƒ‘,«Õš£Kþ"½]¹œè-’žOœ‰$ºwÊå’w™Ls´c©?üáy‹{ñ8¼w’ÍßýJ{N réÍ|OŠëôú:¶mR1Õj5Â0¤\.ÇÑQáŸ~:ù.ò‹GQDÓ*•Jo–[¾ïÇÇÃ$÷Hº@<Î Ã0þÛMµGæw‚0ð< (—Ë”J%:ÔëË=k¢;îàħž"~cÑøÎôE´2}+Ä…ÀäáÃ;ò»Gòá—|8¾ù••‡„´-wŽxâ }ô”å^䑵5o€²|¸?²xòÈÏ`²b·',‡ìøÀž|ðIvX;–u‘eeóû¿4øÓ™½"Œ˜à`&ƒ#3ï_Q–’0„ð“ëyÝ;¿ÊÆCb#Õç1Q}ðîöáýåR¤Ø_ÀØø Q,×Xa„$þ´Bsá‘4"¾Ùõ¬Æo‰ µ˜ê~–ÁN -­é¦$JNÒT¥ ‡ÊT “i^_HG$JñcÑV2™Ä¨%ªŽl6I÷ÕZ¯BêD‘iGZ©ã>¾ e³Ù䜤Λ8 U*f{‰IòþK$¢ï'ïZµ¡bþ÷ÿ^\[ ž÷è£Üòĸ}Fû$ªr8]¤²(ŠÈår±XAê˜o<¯Ò¢˜¼(•JñüØó<œ†W[E‰hçûñ+Š¢8„2Ÿz:ŽCEMsíô¼,½nº]³Í‰æ8N,X9ŽÃ 7ÜÐë¯bnÙ! ×¾-aûSVý-Ä}÷HÎÞñ+S?³Bá”eÁ6žÉúãoZv÷×îæñO>®ï”eCÜùðIœóx³-Ç €óÛ÷}N:ûÕê§ô/᷾ſUÏåòu×Íœç5$ñrèmJ)ô /Ù••‰ þþâ#Ø´íq&ÿðbàâ¸kÎè_¦ÈäÍ4G8»R,$ÄØ¹Íã˜j¾â‘4ÒØak¹JcYØ8P‚oƒó>ó±‚ßððŠ*FL’òù—ËÅu¼€¤ŽDºPi:•m'^jB7ŽK­ëÈÿ¾?5%$ÞnB;!½¬¥ ª¹¾mö›^&5!Ò¤ëL·ŸôúÏ{Þ7aõÀ¯àôÓ:q}ŠŸ­æB­VÃuÝØKLÛ¶›¼ÊÒ™eY±Í¢—`ÛvÛÜ»é¼dQCùMooÛvÓºÓ|z©œ!ÒÙl–¯|å+KrÜ…d_qÄÑÇ4-k{]åå‚‹¦ÖR€>âq^¸¡iY†ü×ÿÏNgDT”>åñŸ=Ås޼™9…aÈã~œß|éoöºiŠÒ5°~ÏA~õòæfV¬öƒEEé7jŽÃ±7üœ¿:ùï`:ÏdQ2B`/½÷Ê„\Š"D×}ïç|èq>zúm”8‰#‘Lå0/Kîm³ Ftk­ Þjiá7 ÁùÁ‹ ð€Í|Æ„ðÍ4â”ÓÃL{rÉÜ^¼ÇÀˆi£- ßfr¦n'x͇€±ìˆ€uëÆø“<œ3u© ²Ä„aH­Vkò*›I”Š¢Ïóâ1PœÿŒæ¤ý¥R)ó„é²¹’çtL¶4ŒíÝÈúS<úÅS¸f¥?èk!ÿ8%þ¿µsT”~çè×>À…‡ÿ¿á_7pä­GÂõ½n™¢tOì{|ýT7ûFÒv7÷þä^¶L¶ª(ý‚÷è£lµ ð_£óÀå0Æg³çEÄauepWf$-‹Éÿ8Š—¼°N6›EœÛÆi£û`ÿ3¸ïÇxºIÑ)öØ—’œen£C&TŒ×V%ÿ °¿ÖP´€ÓÀyh„QŽ¿)9¤HT”VBààÏ~65‡¬ÄRϦdì<ñ}ß÷)—ËSRãtJ±Q*•°,+÷L{¢¥½ÙÒ¡š®ëÆ…”•Û6ñäCÅÿ‡aH¡Ph² ¹`“¢ÐçBÜ‘YŒÿ·,‹í/Ûž$~U”e@Z¼8ù+'·/»®(}L¬=ãöŽo…Ÿuü³8ù]'÷º™Š2#!@òο|{çÉG’Óª_¶hÔ·ÒÀN?|˜ÿð^Šï3†1íÐ8v£¨Æm΃ZÑx®Ï€p0ñ6“î¾ZMB@‹ÅÎÅ„5dZ™ ácñô÷ƒóýѼ‚‹0Ï+•Jäóù)a¡éý¶O›7%ò®,Ž:|˜Nx2þ?Œ´T޲üx,‚MǵÉ}§ÿß§ã·ÅŠÒï|rç‘l|ô›Í ’¤ôA¯[8 ýÜ6eÉù\÷ä¦é«†˜(’Ød€ýAówþ*ãñV,jž4ei¹éða.Ýú×Í KÌ»*u†är¹øá€ íX SYµlüæ={lRpD¼-c¤¨™¢´Ð·BÜÿ~Ç=–,¨À%Ÿ¼D¢Ê²âœ7~—³Îj”´`èeC½n’¢ÌÿÛ‡xè¶NY>öí1Î=÷ܶ•»¥ß¿õ-¬ßzìkîiþ †y[]§¿Ó^¨g’Ò ö·ËÏë ÆÆÆ¦,Œ‡Ñ ñD0jª,/Ò“û Xæ¢8íœT%öÆy¶éû>¥’)ÍÛZM´›QE™+ýåI¬[÷òx™ÚœÒ-}+ÄmþÅyå¥_L¸pìkŽ5îÊjßÊ2á ’9 ÐüpʲdíC?á—ÖNLY¾iß&Ž~Éѽnž¢t…챜ûùÏó¦7½&Ya„Š~ Emežž"Ê Áó¨\vë×|”¯ýÃ׫a^^—1¶£ù‰º"-pA@Eñß¾ïFøñëµñÿxáöºIŠ2+œvOHê¡Ô-u‡S–'wnÞÜ4Ñxø;›Æd¯[¦(ݱÿG?bß7å9/>©ùƒqVDÞÙ´‡N†M‚TZdH W¥R©­çMM“ÝÁÁÁXtÈd2ñq*•J¼oß÷›¶IçhJ‹ria¤Uˆ‹¢(žL§ÅR˲âINEMŸ¥s>åóùXÀH q"`Õj5ÞG¹\æ´ÓNëõ×7+>7y&_¿bÿÔÜp>F€ó0¢m‘¾ó‚K‹g•J¥I<“ÏjµZlWé¿[=ÊÒûJ ci›p]7¶‰b±‹Z®ë2::Ú´½Ðj+b{Ùl6~Z–ï·µÊfúøiHÅ¢„#O8'Åe—5Òµä¦_?ÝO´ö ŠÒKÂÉI=öXýÔ2î£BœÒ‘¾­šúØÄZœ”4oË_m·÷ºUŠÒ=äøƒüh^»S”žñ௟ –T„Z³o QU ZY>üø¨£xòËkxã‡Ï2 ÂÆÏ'¨¬5‡L†ø¾Oî3™L,d2êõ:Q1<<Ìøø8`„´l6OLÓ~ÙëDÕuÝx™ˆÐ\Aжí¦ÏäxÐ,R¤½{\×m#Òë¥÷Ýzœôÿéc¶Šm‚eYM”ۡ€Ûeå˲8餓ºZ·ˆî¸ƒ‡ÿûDö|ÿ+¿ÑâÊiÓ³ôžçÅ‚”ïûDQD6›Å³z½N†xž×ô݈øÚ*¨ ù|>¾gÇi²õ´½¤m"m/Ýäj²m[s:-1Os OÜÑð ™v|EQlCÐlŠÒk¾}ËÆ>r>¼Óüß6g²v/Jú×#îçOçì³O‰;à•ðÆZY}q„¹Å …ßßþý^7GQfÍú‡b2jžØnøà†®'ºŠÒ|÷ˆ“xâç?絯½À,1oª‰tž¨´ç™nÐìÖ*˜¥=qdjYV“(V.—cÑ!-„¥=t,Ëš’;i&/õÜé_üÛoçØ¯àõ¯ß8õÃEð~‹Çà¿ãÏr¹\[O% ”eb¿¶m7…S¶V³ì$†©H¶òO<ƒcNi¤ê ‹Ç¤eYM¾¢ôûx‚ ›öæùžöLŽN.+è[!nÝžƒ\|ñ@†°\r¸(Jƒ»ÆÏäèu&yn>Ÿç¿ ×MR”Ys÷É'sÄcIëZ­Æmëoëu³eVüàΓ9å™?LØÌipEQS’vÉ3Í!œéeÙñÐÏ~ÆÆãÀÁ1y[e™ñØGrÜq‰gÛ6§|â”^7KQfÅCųŸñh² ך³LðcÿÁcØúœ@³—:`l[ëô)Ó°d9âöïßÏÎ;9pà[¶laëÖ­Ó®¿Žˆ0¼—0 qƒÆhYûd¥‡ÌÖ†Ù¿Ÿ âÙ<}à>`3Ð}î%EYLfkÇ?ÿ棼æÝÇ& D¼H RíÌq*•Jì¥ÖZù0›JQæÃlíøžï=?¹ä§Í m`$ùWÄb×u ÃB¡‡öuʨ˜Ê|˜­¿à‡øÀ^hþ ˆ+Bçóùf!CQ–YÏ󀃷­ç9¿ô´¦bC1)ÛV”v,™G\©Tbbb‚¡¡!J¥RSµ£v¬9t_â6¡†¬ôœÙÚðæÍ·ñ–·ü×u§ú…ÒÌÖŽïž8•uë™LƼ¹Ö7|J™­»n…ßüÍ—& j@`Bò$ /í T,5)¸²èÌÖŽ}yÅâÔÁ„B¿;UŒM qвPÌÖŽ7lxÑ~o¾ùf>ý¥O¨§ô”ÙÚ1À#ÇÇßx¹\njXjuÀP¦eI<âÆÆÆØ³gW_}5(ÎÓyVüÝßg>€ =)Ö066ÆÐÐÐÒ8Åþýû9pà=mÇÄÄ'œp6lèi;zÅ\løiO{Z2‘ º9Ê£öÓL?ÜÓ½>ÿÙÚñ1ßù/þ×ëÏÅq“h»Æ’¿QûéÏvôòügkÇ¿û»¿ ¥R‰mŸßÆÙîÙKþÔ/v¬íèæbÇ7Ýôð, U©ÔwÙ·iœiÖIW]¬v÷Cÿ£íèæbÇiÎY/yÃK–¼Ý:>nFíxvvüÈ#pàÀ&w=ˆe=kjAŸU倱Úí'ØF7,‰GÜ®]»š¾œ­[·²k×®i·‰|ŸCÛÁ0Æ{à5¿cÇŽ¥?h cccìܹ³×Í`çÎŒõº=c.6|èÐ!vþ¯Ãä è «ý4Ó÷t/™‹¿áUsÙ7N… ØÛ¼Yb[VûéÏvôйØñ#û{½0e¯ÌÙ{Î^òAr¿Ø±¶£?˜‹Ÿô—'ñèyB²¹,¯|ý+Á‡sùÜ%ËåÖ/ý¶£?˜‹~è0×d`ýýë{"Xèø¸µãÙÙñ]wÝÅücþîïþƒ‰'ÌKêF·ÆÌûVQ݆Õn?iÄ6ºaI<â8ÀæÍ›ãÿgzûðƒü€ó¿{>{ÑÇøÁE?€ýÀKÑÒfn»í6®¸¢NñðóÿþžwÒwß}77ß|3'žxbOŽðàAî¾ûn<Ø“ãÏÖ† äð]‡)ÿr™».º þjéÛ­öÓL¯ïi±ãÇ÷äø³µãÝ»w³wd/Ÿ}ò³Œ¿¾Q-ï,àãŸ%Bí§¿ÚñÀðÀpÊ)§°}ûö%?þlíøë_ÿ:ýÑGÙ¸q#/Øšª^½Ä—°_ìXÛ‘pÛm·ñX£*t7ù€’¹Œßtû›8þŽçÌ3Ï4}±°„}r¯ûmG3Ëq|üàdòÁIÞwáûx謇z2ÇÓñq3½¶ãx€½{÷.›ññm·ÝÆøCÎûó¸ë¸»øÁÓ~Àm›oã¶-·%+õ`Þ×+zm?ýÂÝwßÍÞ½{9öØc»ZÉŠ5̆¯}ík½n‚¢Ì›¯|÷+½n‚¢Ì‹íÛ·ÃÒë,в Hî,EYÎèØXY)ìünï=Ñe>üÅ_üÅ”e¯âU½n–²ÌX’ÐÔ¡¡¡¦7ccc=…W”Ù 6¬¬ÔŽ••€Ú±²P;VVjÇÊJ@íXéK*Äíß¿ß÷»Nâ©(ý€Ú°²P;VVjÇÊJ@íXY ¨++µc¥ùþ÷¿ÿý‹}Q”¯ºê*Â0äsŸûøÀTiV– jÃÊJ@íXY ¨++µce% v¬¬ÔŽ•^°frrrr©611Áž={RÃV–%jÃÊJ@íXY ¨++µce% v¬¬ÔŽ•¥dI…8EQEQEQEQEY­,IŽ8EQEQEQEQEYí,Iޏ~cÿþýÔj5vïÞÍÄÄCCCñòO}êS|ãß`óæÍMÛtúl!Ú²uë֮޵íØ¹s'¾ï7]‹é޵X×BéžN6,Ÿõ³/VÔŽ—s±ãÅüÞúÕŽ{qO+Ý£v<í—jÇSÑþxù¡ã㩨/«íÚô“fÒ/,¤v³ê<âöïßÏe—]˜ )¾ïS,(•J±‘•J%|ß·›î³ù°cÇ®¹æš¦eKÙŽ;v°k×.¶lÙÂÎ;Ù±cÇŒÇZ¬k¡tÇt6 ýoNjѵãåÇ\íx1¿·~µã^ÜÓJw¨·?¾öÇË µãöÇ×þxy¡ããöÇW;^8VÓµé7ͤ_XHíæ¨^ŸÌR366Ɔ ؾ};[¶lá /dllŒ={öpõÕWÆøvî܉ëºÓ~6|ßË$§Û·T혘˜ ^¯sã7F¡­×ëÓ¶c```Q®…Ò=lX>ëg;^ ûQ;^žÌÅŽó{ëW;îÅ=­tÚq3Ú/OÔŽ›Ñþxy¢ããfÔŽ–ÕvmúI3éZ»Yuq›7oæ=ïyOüÿصkW“ûòÖ­[Ùµk׌ŸÍ•ýû÷ó÷ÿ÷MmYêvÈþ&&&âýlÛ¶mÚc-ƵPfG'†þ·ãÅhƒÚñòd.v¼Xß[?ÛñRßÓÊìP;nFûãå‰Úq3Ú/Ot|ÜŒÚñ²ڮM¿h&ýÂbh7«Î#n``€À¼)(‹lÛ¶4ÅìÊ:À´ŸÍ•R©Ä{Þóž)¥‘—²LLLðö·¿¡¡!vïÞÍöíÛ¹ôÒK;k1®…2;:Ù0,­ýÀìíx1Ú v¼<™‹/Ö÷ÖÏv¼Ô÷´2;ÔŽ›Ñþxy¢vÜŒöÇË7£v¼°¬¶kÓ/šI¿°ÚͪâÀ(š×^{-;wîä=ïy®ë6å0Yl®¹æ†††š’üõòZˆËòØØW\q—^zi¯›¥Ì@;^jÔŽ•ù¢v<õz¨/?ÔŽ§^µãå‡ÚñÔë¡v¼üP;žz=ÔŽ•¹Òkͤ_X¬{zÕ…¦\qÅìß¿Ÿn¸!î ‡††‹×‘¸è™>› »wïæšk®ÁqÇÀqœØ}q©Ú100ФÒ ÅqÏŽµÐmPæF;†þ·ã۵ãåËlíx1¾·~·ã¥¼§•¹¡vœ ýñòEí8Aûã勎ÔŽ–Õxmz­™ô ‹¥Ý¬:¸;w²aÆ)ñ½r¡öïßφ ð}ŠÁµûl.HÂ>Áq‚ ŒëçRµcëÖ­\sÍ5ñþvíÚ»Lv:ÖB·A™=lúߎÃ~ÔŽ—'s±ãÅøÞúÝŽ—òžVfÚq3Ú/OÔŽ›Ñþxy¢ããfÔŽ–ÕvmúA3éK»YuBœ$¬5S‚€Ë/¿œË.»Œ­[·âû>×]w`Þ(túl¡™îX ÝŽ2™ —]v›7ofÏž=¼ãï˜öXKy-”öLgÃKi?Ó±”ö£v¼<™‹/õ÷ÖvÜ/÷´Òµã©ÇÒþxù¡v<õXÚ/?t|<õXjÇ Çj»6ý®™ô ó¹ŸÖLNNNöúú‰‰‰ öìÙÃÐÐÐWÊé>[Îí˜Ë±–òZ(³§ßíx1Ú v¼òè—ï­ì¸_îieö¨k¼è—ï­ìXûãåK¿|wjÇ˽63_‡Õxær=TˆSEQEQEQEQ”%`UkPEQEQEQEQ”¥F…8EQEQEQEQEYTˆSEQEQEQEQ”%@…8EQEQEQEQEYTˆSEQEQEQEQ”%@…8EQEQEQEQEYTˆSEQEQEQEQ”%@…8EQEQEQEQEYTˆSEQEQEQEQ”%@…8EQEQEQEQEYTˆSEQEQEQEQ”%@…8EQEQEQEQEYTˆSEQEQEQEQ”%@…8EQEQEQEQEYTˆSEQEQEQEQ”%@…8EQEQEQEQEYTˆSEQEQEQEQ”%`Yq™L¦×MXÑÔj5J¥µZm^û ‚€R©ÔëÓé[ÔŽû‹(Š(•JSl_í¸3jÃˇnìXîÕ†ÚqÿÑ®?VžµãÅe)ÇÆ«ÙŽAmy±™­-ëøxö¨ /úÉŽ—…çû~¯›0oÖ¬YÓë&´¥T*áû>®ëÆõ\‰¢ˆ z}J}‹Úq1<< €ã8M¶¯vܵáåC7v\*•¨T*½nê’£vÜ´ëÕ†§GíxñXê±ñj¶cP[^LæbË:>ž=jÃˇ~²ã£zÝ€vDQ„çyd³Ùi? ÃÇqð<0 Éf³Ø¶¯†!–eáyŽãàºî”u»]oº6‡aˆmÛÔj5lÛŽÛ._¶t„3íò,|ßϯÓÿù|~ÆýLw.aâyãã㌌ŒÄ×¶ÓùDQDE€yÃ’ÍfãëßJX–…eY]o³’èdÇÓÙ0ÌÝŽ-Ëš— §³œìx:›ìt>¾ïcYår˲:‚W³ϵ/æÜÇη/–¶µûÞ—Ú†[÷5;žîžœK,×Îó¼xÛ•ÎrS¤µÒí¸SÜzn«Ù†å;Xnv¼ÇsËgÝôǫݎåZ­öññ7ÞÈË_þòEíçbË:>î÷÷¸b¹è}çE™Lß÷ Ã\.×ôy&“!Â0dxx8Vés¹µZ-Þ>ÚËåâ·«¹\.ÞG±+i·ëu" …¹\Ž(ЍT*qÛÒÆÝÍ>2™ aÎøÿtûéæ\|ßÇqÂ0ŒÛV,§=Ù·|/rœVjµ…B˲ºÞf%1w²a¹6sµãùÚ°g¹ÙñtöÕé|d€’>§v¯ÕlÇsí‹åÚ´ÚñRõŲNý, Ïæ|:Ù×L÷älûcùnK¥ÕjµGÖµt,×1…ìc5Øq7ýñj¶a9ßåhÇËqL1—±qzÿÓõǫݎåœu|œáæ›o^ôþx.¶¬ãã™Ññqÿ+–^1Ùg”ËåÉl6ÿ_­V'¥™ÕjuÒuÝø³b±8éºîäÈÈȤã8ñòñññI˲&''''ëõzü·lS,ãÿeßݮ׉z½> LîÝ»7þ?ÝÖn.µìc||¼«ÿ§ÛO7çR,'Ç™´m{2›ÍNZ–5Y­V§=Ù·,oº†®ëNV«ÕIÇqš¶í´ÍJ¥“w²áÉÉÉyÛñ|mX޳Üìx&›ìt>Åbq˜&-ËjjŸÚñÜúâÉÉÎv¼T}±ì£Ó÷¾”6<›óéd_ÓË\úãÉÉɸ/ïöz,g–ë˜Bö±ìXö×Ú« ',W;^ŽcйŒÓûogûjÇ :>&îß»?ž«-ëøxzt|ÜÿãŠå¢Wô]hª¸5 i÷ÈV×N×ucÕÔ²¬&Õ2íæÝ­[á|Ý]×ßrÍÇqšÞ<Ìôÿ|Ï%Š¢ØeYÔgq#ít>ŽãÄËmÛÆqœXA—ïöí¦m;m3ûër¦“w²a¹vóµã…p¡]ŽvîÿqE§múM¯è»ÐÔÙFº¶m×uãŸz½ÞëSYóŸéÿù`Ûö”g.y*Z¿‡½{÷L›xv¥çÃèö{j½jÇ ÃLö%Q±ÿ|>ß”¸Síxî}1¬ ;îµ ÃÜìk¦þØ÷}*• kÖ¬‰ó®Y³fE$n‡Ž)úߎ§ëÕ† jÇËol Éw¡vœ ãc«íß³¹6Ý2[ÖññÌèø¸ÿÇ3mÓ/vÜwBœã8q‚C ©l²mÛMAÚãEÔJQF …B¯O¥ïq]—0 c#ë” • âmäoQŠåæ«V«T*•8>|ºmV"츓 ƒÚñ|˜­}I.€ôöíG«ÙŽçÒƒÚñ|hg_çž{™®?®×ëLNNÆ?“““jÇè˜b¡˜­O׫ ÔŽ—޹ޡ³í«'èøx阋-ëøxft|¼ô,Åøx¦mƒ¾ MÍçóxžÇððp\Á"ýYMe•Ø cw_©öÒ/ˆ+j?µKª‹ ÏêºY–E&“‰;ëvÛ8ŽC>Ÿ§T*‘Ïç»Úf%ÑÉŽ;Ù0¨Ï·]³±¯b±ˆïûñ÷ÓéZ¯f;žK_ ýoÇýjÃÒ¶ÙÚ×lûã‘‘‘^Ÿæ’¢cŠÞ´m¡ûãÕlàv¼”Ìul,ç3í¯v;/%s±eÏŒŽ{Ó¶Å÷ÂŽ×LÊk˜>#ýFO2µ‚Ä÷Jµ!Q.Óñ½ýB†„aØ—o¤mÝ\7q¥‰c«gz»2—mV ­v<“ Ë2µãî™}ÍæZ¯V;žK_,ËúÑŽûцAûÖÅFÇKÃRõÇ«µã¥o[·×MûãÙ¡ãã¥oÛl®›ŽgFÇÇKÃR{aÇ}ç'´3)õ+Æ\«Õš”Ê…H`8a6¹Ÿ¶2j*_ätepg£ºÎ§-íÚ6[C³,kÖ7ê\¶Yî´žïL6 ‹kÇóµ›…´ã…´a˜›}ÍåZ¯6;žK_ ýkÇÒß-”ýõƒ¯6›œ +qL±Òìx±¯÷J`%Ú1¬œ±1hÜ-:>^¼¶´kÛlmYÇÇ3£ããÅkK;–j|¼”vÜ·q‚ ŽËOeéÃß÷ãŠ;‹µÍJFmxaY*ûR;nFíxaѾµ7¨/,j“½Aí¸÷¨íϵãå‰Ú~3jÇ ËR{aÇ *Äíܹ“‰‰ ¸ôÒKãåû÷ïgçÎ8p€-[¶°uëÖ%;AE™-jÇÊJ « +Ë í‹••€Ú±²RÐq…²P;Vú…#ßÿþ÷¿!v´cÇÆÆÆp]—k¯½–‰‰‰Øˆßõ®wð¾+¯¼’Í›7¯ŠØqeù¡v¬¬:Ù±Ú°²\оXY ¨++W(+µc¥ŸXqÔëun¼ñF6oÞL½^`llŒ={öpõÕW‰â<›æ{ßû^žþô§÷úÚðãÿ˜ç<ç9=mÃ8pàÏ|æ3{ÚŽ{N8N8¡§íøñÌŽ;eß mÇÛ·oWûiÐOöÓëïààÏþìÏeßìx.6ŸùÌgÔ~ô‹ýôC;8ÀÀÀ—_~ù‚ï{¡ûbl÷Ú~úÅŽµSÛñú׿~ÁóöèØXÛ±ÔíXêññ\ìt|œ¦_úÁ~±ãå2>þÏÿüOvîÜÉ9çœÓëKÖô‹ýô÷ÝwÇwò'2㺠"ÄíÚµ‹¡¡!&&&سg›7ofÛ¶mMŸ [·n媫®švwÝu¯yÍkzx 7ß|3[¶léin»í6n»í¶ž·ãúë¯g``€³Ï>»§í¸ùæ›mß mÇ{÷îíù÷¦öÓL?ÜÓ@ü°_ :Ùñ5×\3kÞ¿?@ϯ™ÚOÿµã¶ÛncbbbQö½Ð}±„ ôÚ~úÅŽµSÛ!}ÝB¢ccmÇR·c±XÈqèø8M¿ôƒýbÇËe||×]wõ…÷ ýb?ý@½^ç‡?üaWë.˜GÜÄÄoûÛb÷îÝlß¾K/½”°yóæxÝ÷·wï^®½öZ¶oßÞtc,5/yÉKz#¾aÃN8ᄞ·cbb‚¡¡¡ž}cccìØ±ƒ{ï½wQÏq!íøÞ{ïåÚk¯]Ô‡ÊL¨ý4Óë{ZìxïÞ½‹vŒNv<~衇øþ÷¿ÏÃ?ÜS;Vûé¯vìܹ“/}éKlÚ´iQö¿}ñ~ðöïßÏöíÛ{vÝúÅŽµ W\qwß}7çž{ßbŒ]×mÊ5·ÔôºÿÑv4ÓËññ\ìt|œ¦úAè½ïܹß÷—ÍøøÐ¡CìÝ»7ÞÇj§×öÓ/ìØ±ƒï|ç;qÄ]­¿ BÏ qõãŠ+®˜ó@áYÏzVO;g¡n¬~蜞úä:\}õÕ\qÅ‹zœ…´ã³Ï>»çv¬öÓL¯ïéåfǧv¯zÕ«z~ÝÔ~ú«—^z)ìÞ½{ÑŽ±}ñsžóœ¾H¾Ü/v¬íH¸úê«Ù±c§vÚ¢ì_ÇÆÚŽÅf¹+@ÇÇiú¡„ÞÛñ¥—^Ê¥—^ºlìX¼¿z}Ýú½†íÛ·³eË–®ÇÇÝÉu3000Ф$ ÅnþCCCŒÅŸ±aÆ^_'E™‚Ú±²èdÇjÃÊrAûbe% v¬¬t\¡¬ÔŽ•~cA„¸­[·²gÏžx€±k׮حSŒ[>ó}Æ$žŠÒ ÔŽ••@';VV– Ú++µce¥ ã e% v¬ô š:00@&“á²Ë.cóæÍìÙ³‡w¼ãñg—_~9—]v[·nÅ÷}®»îº^Ÿ·¢LAíXY t²cµae¹ }±²P;VV :®PVjÇJ¿±`9â$íž={jréܶm™L†={ö°}ûvu÷Túµce%ÐÉŽÕ†•å‚öÅÊJ@íXY)è¸BY ¨+ýÄ‚ q`”æN•F¦ûLQú µce%ÐÉVÕ†•å‚öÅÊJ@í‚  T*5-s‡r¹LxžG¹\ž²]&“¡^¯ày•J… °m›b±H>Ÿïõ©­*t\¡¬ÔŽ•~aA…8EQEQEQ!Š"€XT ÃL&C6›%Š"‚ h»ïûñú…BÑÑQlÛ&Š"†‡‡qÇqz}zŠ¢(Š2k¤Xƒ¢(Š¢(Š¢(ÊLض j݆a¼ €eYT«Õ¶Û—J%2™ ¹\.ø‚  V«5­#Ë*• ¹\ŽL&ƒçy½¾4Š¢(ʆsÛ.üÆü4ö'¹î;zÄ)Š¢(Š¢(вh„a8%<ÕuÝØëm:¤‚áàà Ùl×uÛV5¬T*„aH½^§R©P«ÕbÁ.LÍš‚ Àu]Â0¤V«Q¯×±,‹\.‡eYZ1QQe™EP($¢Ùè(X–ù,:ùPÙ®8 ï àY\P†p(€µ”Ø^‚ŸG;Gà–N¬]Ë#k×vÕVâEQEQe…P«ÕzîÝU.—g íF„FGGã|r¥R)ÜÒǰ,‹0 ñ<|>%³¯iÈçóñ>òù<¾ï«§(вÈxž¿ÊeÛ†Fö‚ñ?ðàÖœ…o˜åÁ xcFF T‚JîþÕ» O·!Œà¹6sòð›YÓžj¶…pDž°  l¨Á·|x|œ,\’5bÞ÷ø•üQ>5ÇÏwœy&¯øÑº:âEQEQe…Ïçû®mÛM<Ï£V«uÕNì\×E³R©{¼¥ÏÛ¶íX¬Ëf³m‹@t¢áNQE™ž(2"›mo´Lþ² §;@‘  `ƒ !0>ž„zFÜÁí&Y —3šïƒã@Í‚¿É‚3 ÿ«[}³/ˇ=yÈ6+Å¢Ønuá?Êà7†ð{˜(Âiü«7õÆ£â¤>\5Ûyœ ì(C$\µ àÀPÞ繿ɻÿï¶ÛØýÀ]]'ͧ(Š¢(Š¢(Ê’!ÕO»!Š"*•Ê”å­Â™¬S­Vm» 0%LUð}_Å8EQ”ð<#ŽyžÎr9#¼Ôj0< ÷à󷓇·:pIgg D>¼ÅLcŸò8øF[3ð7%³^­–„–†¡ñžûv`D5 ømžçC¨g᪃ƒƒ±xEQÓ:ŽãËåÈçóA@±Xˆ½äÀˆpépÖ(ŠÈd2qXël<èEQVQ¿^õ‘ñ Cxs†#VåBäo±[Bxù(|ß2yÔ2!ŒÚPÇZ•<ä##Àm³à6Œ·Y³ý»l¸ijØ9 Â÷FM;òy°óN¼àºàBò6 bæ“VcßÅ–óÉçÍv–•kFŒëlÖ„¶ÆÇÁÌu[Å68ÁïU`-Àÿîþºª§(«”0 ±,«ë·¿éê15Œàb:.h„ç³ÓYLíøÒÈ›ƒ“S„W­šÏJ˜y]ºHi6õ·m›öW*I޹lã|Ë}޶œ¯ã˜Ÿ4.°·e½Ñ– ‹-Ç–øÛ7Z°áS.ð`÷ß¡ qвŠ7¿`„¸b±Å"¦ò1W€éüʘί†µDÜ*b:M0\ú´ÓØO Ó©Šë°xºYmÒ#eLÇ&âœ×øñëWi/ÆÉ:YŒ˜¦³-5¶«“tÄ"òɃ@Þ¾Tç6~t®(Š¢(ý‡mÛ3†³vªz:]5Ônö«(ŠÒ+$ú(™KÕHÄ53rI„'qnhö «Ì{¤7 CBú€ÿéÀ]8‚+CxÇÙP.&2¿IµCŽÝŠÌ[ñý$Õì={Ÿö4êk×6 ‹2?µç25<Œ‡$!¬E`¸±lß5•J\E C£ØU*ÏSN?'­B\­Í¹(Š¢(в2é·‚Š¢(í(aæcÌ|G¼¼Ä)b/f>”ÃÌwFë‹õ—\è˜mÅéâÃ!ÜÒØÁ3«° xÀ†Ep(‚?-›yR›fÁ­:ëÜh¶Ý<‡ùü瓪•ŠùÝ -ä‰SH+ùíA¾:sÛ¤€Á ßÿ>¼àW¬TŒ¨eÛP©˜— R Ìïáa#~ºÒ/J<ÏlëL3{Ìçñ̲Ì<ô«_ŹûnóY˜VQÃд¡^oˆxžI —Í6 sa˜¸Ú Qd¶³móS*a‡!Ø6{ù—y`÷[­šª(Ëé÷8ÙlRºX­òº/~‘ (•J¼êUkxõ«¯alì###ËeÆÇÇÙ|Ýu8ù<æ GøÙÆüôˆ#xêÄ9tß}]½uh¥“`çhY.âÚLˆçš„¦zAN¼ìÒ¹üÔ¾o`¢Ös‰wœ|¥Ž¤~§ÿ®¤ö)ÿÓ²­¢(Š¢(Š¢(Êb‘ιmaæ]2÷’”?B–fG82jÖ´ÀÌ+_|8ÿV4û"3Ï|ð‚ÔNƒ “Ɇa<׌¢ˆááa¾ñ·Ëq÷Þ˶óÏoªF àyµZ͈jRf5“1fllŒ(23+ß÷™xßûŒH62’x¤©Õ’¿3ÓØ42Anæ±€âÑG'å\[ö5IW­&Ÿ·^,Ûž*Ä‹`Û|ðƒäïxäóÜ÷ðgögìÙ³§«ïV=⥠CÓïd³IHjµj~ïÞó×ÿ×z3GžqÏöÃ`†c^ù"ö~æ\žûÜûFÌÆ[ßKí[âóm~ãŸà²¡!¬SO?ŸI,;©´Ÿâ~NÉvÞ®õ!1Ýz‚Câ: Dl¨™¡‘ÛìawGΠÁEÙä!%oÒbš‚ûÏ8»e wõQO9EQEQEQ‰4jýß'©äÙ Qd„¶04óÇbÑhSQd4§(2Î^­ÚR$sÃ}ûöQ(±,+.j“Ë刢×uÙ´iÕj•l>„Å"žçÁ°²éyßÿþ÷ÉÿéŸÆ!œ?Ù½›_ÿøÇÝ»—?~ík¹ÿW•7¿ùÍüÕ_ý‡bhhˆ[_ö2îÛ¶Í4H<à²Y³_ñBæPQ!Ÿ7‚_>Ï£ï?C×\ÃØãsœxĹ.¿ó;¿C¡PàÄOĶmÁ C>üêW³ãsŸ›²[Ïó¨T*Œ¦BÑÞùÎwò¥/}‰û￟onØÀºûîã™<ÂM7ÝÄóž÷¼®¾+õˆS”>¥TJÄyë,Û„Zî*Â×^pˆ£?{,ãy¢lsÆÛ~™¿ñÖpjŠëº|ô£åYÏzVWß»zÄ)JŸr£_Nú¡Frh„JZÀ[(àÔ£Žâ×Ü$ZX6ë åJ¸W+–eÄ_ýÕ0år9²ÆÇÇc¡éºë~‘Ë.»¨¹ `‘u]s°B§Xd÷?ÿfb­lÛæÝwÞÉox"_øØÇ¸þŸÿ™>ûYÞí8¼ç=ïáæ›oæôÓOç-oy ïxÇ;8ûì³yË[ÞBµZåSŸúTÓ¹]xá…ø¾eYxžÇÈȈù á÷±g?›×9^­çùŒ¢ˆR©ÄîÝ»¹ó»ßå/FGYsÖYdm›¯d³Œ>ûÙÜò…/P}ík±,‹óÎ;ü㌓Ëåp]—çžz*?+—y÷Ãóµ¯}7¼á üÓ?ý_øÂ¨V«A€çyDQD>Ÿgbb‚bÑx Z ÷Â|>Ï®]»˜˜˜èê{_õBÜðð0ÕjUäJ_ñŒÙà»I"Ìówã±uÌ­·ò“/=Ÿÿ÷SãØ*!”’°e¬Cxþ0ÔªF «×˜&/”¥‘¼•RCôŽ;&ù?ÿç"~ý×o%>ÂÄÄÏþqäóþÙ…o5¶‘ndþņ/Z©*¦5s¬JÅ<$òyø§º› 6`Y'òŠW<ÁÏ~4Ž“¼Åoºz=y«ól8Þ1^wõ:¸Ž9Ù|ÞüÔjfÝ'Ôg[#]ÀÍm¼= à¡<œøyØìCP€žtãlu 2χ§ÛÆkî8î j¸ukŽgEQE™=¶mÇ‚Z7„aofÌR­VÛn/y‰,Ë¢X,Æãû8_f²”Íf ‚€ âÜGŽãP.—) äóùxÛB¡@±XÔ⊢, fÞäb¼â IåÓÚë™S's$0ó'ŸL&ÃÈÈÙlð}‹_ÿõ;yøá‡)—Ëq?EQ,&Ù¶M[gc ’ ¹[êu‚†x÷“×½ŽS^þrλôRÎÞüþ÷S,q]—+¯¼’‘‘.¾øâxw¿ñ¿ÁïüÎï´½Ïþóù·û7î¼óNöïß{íÝpà œsÎ9ìÝ»—§ãضëºAÀ[ßúV.ºè"Þýîwã8çŸ>§ÅF?ÿå/™(ЍÕjd³Y²Ù,¿ôK¿„mÛMᦹo›øÃ<ðÀ|á _ Ü˜äJñÃl6K¥R!Š¢yëG«Vˆ“7paR©Tµó°†«(Kϧ¸Ä6pL¡‚Íoy Ù¿ù*–Å»>ÿy>¼};ÿpí÷€õ@RLGn‡P¨Ï3)îЛE¤04y>ïð[¿õç|÷»ß¥^?“W¾Òò,‚ Ì5×ÜÅm·}„LJ—¿üžw•Ê>îÃ8úœgòŠWÜÂW¿úÓ+d×÷îæŒ6mÙÂÛÞö|èCÆÇÍ1¿ó‡ùó?ÿî¼s'Qñ™ÏÜ«_½÷¿ÿ<\7OØð¤“(É_à8ðU bÃ}ŠæHnÍMvóC+›¯Ï1ç»/„gÛ°sØ\°»á¿Ûªp³÷&ÿÞFî.ÀÝ6d«ð­"àÃ'#Ø$âc©å½EQEIÃpJªëº±×Ût¸ ׌ÁÁA²Ù,®ëÆËÒø¾O¥R¡^¯¹\Žñññx¹Œõ3™ ŽãÄáG###‹E†‡‡Éf³Ø¶çyq´@*Â)вh¤óÀIuThžÏd2âÙfÄ·ôIÈfM_›Ë(—ËÔjµ8¿Û%—|ޝ}m7VÉf¿ ÐäͰɚ$ÿ†‡qÝÍ ,¡T↗¾”õŽÃKþýßyÀ¶ñû?ýôÓ9ûŸà…}‹EžûÜçbÛ6=öØ”ó½æšk:^‹óÎ;÷¾÷½<ù䓬]»ÏóÃoûÛqhh©T¢^¯óo|#ôD0xßûÞ˜gG©TŠ_̤=«Ûõéi=(þn9ò2™ ¶m®ëR­Î&s_{VeޏR©D¡P “Éàºî”ÜXâz&†¹Û7vвDÀí6üMÞx´Õ€Ã‡sÃ5×P«ÕØðÊWòáíÛ©×ëÎ_ß´mºK±m#¾ –e<ÉÒäó&ä¾P°Ø¼ù¾øÅòÞ÷ƒãÔ‰¢lÛæ8Ÿ‘‘‘x Z¯×‰¢ˆG}”ãnÙÇèhòVºT*±víZ6mÚÄøø8Ÿù̇8ãŒsÉdÉårüò/ŸÉþý_att”l6˧?ý<ó™?ÄóV§|oíèF„FGGãIR©TbÓ¦MmRjxq¨ëºñú¾ïÇ›mÛd³ÙxœoÛ6ŽãÄÞ Q‘Ífã¶yžG>¯¯ÛEYX)Û¦‰³hŸK{d$)´à8ÄýnEyT‰\.G&“‰sºI±XäúëÏ&—»…7¼áUF|/70A€sÄw(×MèO¥R‰ûÉ 0_r Žãðç7ߌçy”Ëe\×åìë¯oªþE\pÁœ®Ë™gžÉÉ'ŸÌË^ö²ø¥‹xEËÿçŸ>CCC\qżå-oáž{îiÚG>Ÿ'ŸÏÇBc6›—ÍÖ‹­ÒHØ>::J½^gdddÁž «Ò#.-®Ù¶Ç ËÃXÞ~•J%<Ï‹¿À4âÆ©( É\Yƒ7W“NøìƒyÚ?ýg5Žû£¨ë‰éD+0/8¤k¹| çžû¿ÿûÇÍ"^˲(—ËFÌúkpŽ>%ä†aˆû’—`$rß8ŽC.—£X,ÆËŠÅ"ƒƒƒäóyÊå2•J…J¥Ò8¯ccP«å›:;ËšZåGð}óSnSÖôíˆï=ã)ë1~uÛýü=g32 ¯+À±QÄGC{7òÓ/€}2Å,TÊÜpÀGQS ö‚,^ò*vøp™ÕðæC=äEQ”Þ‘~n:Žçfƒf¯ÖÉÄ|×›.g›mÛMëJ¨h7Å\×÷/`Ú+! æ±yº-­c¦é^²Ëºaây^Sè’¢(Ê|ñ1©†<Ì|©iȲL”˜>Q^–ˆ‡Vk8>˜þÙ²¬xYÜ_JÎ Ñ7²Y“ûgà‡D¿öf¬BË¿ým>ýÖ·Bcþ†aÓKŒb±ØE«ç†eYœuÖY>|ß÷§­á¨­c;¤CYÇÇDZ,+~a(Çži%y€d‚«(вP|̇->¼0gÙ+£‚Ñ̆‡MN8ABð¥¿ë”NkJ¿ï›F_{úÓ¹ ß::нÃ&÷ƒ+©;%N-—ùýFž¿I޵ÅäCúgœqAÄsÁ4i ¦T*‘Íf;¾Êçó&ªB2ÁޝÓ_ü4LÜpúqÐHoDJ\Ûzžõ#­šS*•bWF1Ü´k¢< !É'^pâî†!¾ï†aì™V¡5w„²ÛT‹Åßê¾_WEQ”•Žïû¬Y³&þ_BGƒ hûYÚëNBE›ªäÕ[òmäóy2™ …B!öÞh·\ÆøÓ…Æf³Y§CQe¾ÜÂY|¥ïnéb<ÏÌg²Y3· øµ_{Œ\îÍØ¶ësª]*!β8©‘G-Æq°N& l„uF g¥n<Ï’½èE&‡©_{þ¯O‡Öw"ÓŠp1>Íy„„ 0J³K¢ˆké)jÐøñh{j#ämúò&N|âÄ®ÎsÅ q™L† ˆ¢¨)GD OÍçóM¹ ä^.—ÄqlÛ&ŸÏÇ•3d{â”ùòì”ÎV(¦t|‹é<ÄË4›ÍÎÕ; ¾¿¤C-—Ëd³Yr¹\ü÷ôÛ§BFÃz½Îàà`,â‰×ÝC™ /yÑ_óì;žÍ/dáøã²x^–»'MbgÛ²°Ý3†2€IDAT\ ðªUø£?âsŸûß9é$p_Â×3ÃßxÏüö}<ñØ)ÜÿšcÙéÃ!|&Ø*Æ)Š¢(ŠëºLNNÎú³ôòjµ¿ O‡Z¥±,‹ÑÑQ|ßoZG–KN9YÞZô!-ºÙ¶Ý±]Š¢(ó!Êç0‚ ¹àBHþn5—JçÅ‚S§þo …‚ÉAEÉ$Ñ\­ÆÀË_§ªÑÐ(rçy±£Q/sdJŽ·_zô—Œ@–ž@œñ.®E„ÉýÝJ»²´YLÜpzÊ-‚›O"ĉ0Ø¢Q>ðÚøÉO~Â…\8c³V´–e±wï^:æy¨V«S&ùi#¯V«Fn$q…ö e®üQ ®m˜ tˆýä7–eÅXŸîrt¢ÕsÎqFGGcQ}º·2ahBme .«´Pn™jœxщD‘ïl,+K½~&‡Oü„aÈèè(QaÅÑQ6ûØÇxâ#áˆC‡xîÙ²ñÍc|óÕ/`Íó…×ËxžUƒOåàØ:¸Í¢(Š¢( B·/¾;yÐÏÚ{DQe HæIÙ¬ñ~sœD7“iNš4nPŠ£º& k]áMôÉY,Aœ‚KªD?þøM|âsÓMɱzš?>>nD¯\Ë6Fkõ”+aÄ´Öî>¤¹2†ŸÚmÖMï7‹à†SËÓ_dêïC‡84q¨«s[±US£(ŠCR!©tÑɘf<¤\zz½b±H>ŸŸRuuº6uC­Vcxx¸×—PYB¾Áq¶¹ÏK¥™LfÙ£»–|r:Þ ) &or ¹wÓ8ŽÃ©§ž:eY­¶‰-ßýnì%+~˲xãþýüŸW¼‚CõÈ# œ³ž+kÜpöAŽØ¸\ØgÁƒ§^Ë&¾ÛR>µÖø© (Š¢(Š¢(Êj#înøñ¸®ÑËäïB!Y/Ÿ‡ju’~tÓì#¢<ϨzŽ“dÀ¤=" Ã8_¾89üÍß¼”Ïþz}‰š°,ˈfМˆÛÂx©µæC’Ò4"µÌͰËÒëg ÎE”^Ïj,—uÓm)$íxúõOïúÜV¬W©Tš<ŠÒ“ê…Db—Ót*ß>888m> ©æ*erÁÜ0éÿÛgq ceyò×¼¼¡»I…®^ºÏ‡)^º B7Þr{I˜,4WËÊå2ï{ß©´^bÇ¡c?ñ© xÿïÿ>‡×Nðhcµ“N:‰ ý5lœœdÃáÃ{ì±Üá¡T]À¼ ñÑbŠ¢(Š¢(Š²Úˆ0ºÏÏó™r„¡ÑËšSRF¼ò•Á›ÞôèÌ^i­NAžÇ¿>lÜëR"ž84HÈiES…zí×#º¥3¿å·´Y1µPƒx.Í^Y’0TAd–|jÝ´G\3ÑÃäˆë–)Ä jµÅbqÑ)=i—^Y€“k°6µ,›5…æÒ„aÈðð0gµŸ+¯|Gû‰÷AA#o}dÛì?ùä)›T«UjµZì$a©‹N…©ÅB¦ f`¼ZÉ3ÕûÍÇx©µîW¼A–e­ùÜœÆ~mL5Ôôºr,™¸É%²1B,—©ºÛXæÃÞ_ÞÛõeY‘9â¤ÒäR žçÅ‚™T„$W¸}Ê:â±#U FFFb T*ÅáuÙl–R©D±X4Iäm;¾‰Êårœ7«P(ĉhç{ý¤B¬ã8}[ `¥ðÞœ‚[¤)Y¦2{.¹äqr¹Ü¼¼ ¥Zr'^¸{7'Ÿq;¾ ßF“ßïÅeÓ?ß±w/Øðä}ëøPÇüç Çÿ.ÌÁþ:K¯^*Š¢(Š¢(ŠÒ3BLÚšß/ÂRE»i¶Qè8"Sé¡Z%ïñ<ó32##MÅ=¥èè ¹\޽{÷v̧¹àˆh–n”T"mm‚ÅÔj¤•éòu[uDíÒÎ2÷¯»eÿí|7Z«¦ å6û•ãúðÀÿ~vwwYVœ'%}{áM†!Q‘Ïç)—˸®WnŠ¢×u ‚ ®âZ*•°, ×u§$¨·m›R©D½^=¤ÄCN>o-ìóÀ.›¾ùCçžË >Ê“¯ø›?Ÿ‡®:›îãI6²Û‡ UˆSEQV™LfÊ2©Á²MÇ¡(ŠÒ oñÁÍÃk\xt†ISÔß7œi®k*;¤ JÊ\=›ÍR,ãH!×uÙ»·{¯­£UpË2µ’iÐa]¯±nZñ,ýèH mijõ¤èÃ(Í¡¥`rÃIÕ|KÛZçn‰˜>Ÿa8Êí^^[qB\&“YR. !ãÍ4::Oâ[Ë¢ƒÉ)'Þ6â7>>>e¿ù|¾iPbÛv\ÑD.í!—Ë188H½^Ÿu^¼B¡@±XŒÅ  b9Û¶©T*¸®‹eYäóùXa—ó¡NrtµV¸T¯©„@Ö2E‚ ˜"Êö+6S½—ß÷;¾Áá{QÏ»Qµm„Fq ~1ßüÌ8ñöÛyôÊcyÚW†xÈŸÝþ3^û®ux¥cøü¶û9¯×MQEQz…ïûLNNNY./uEY¹HÚ0sÁ%óÄêàç@Æ2ÎlÓuyé¹vçFt“y~6k„¸–y·ïûÓêó"ÄQÝtßí½ÎZŰNe¤ri|¡0Â\DâY'ùá$g›\$ UÚ^Ò9ùìÔ¶éc´3Õ ±ôºbk'ÖÒ-+Jˆ«Õjd³Ù%}«–6lñÆ› Û¶q]7ö’ëfÇqâs:)ñžó}ŸB¡WCé–0 ›öŸÏçcqÓ÷}FFFšögÛv\¬Âó<ªÕ*¹\®m 0BiOTø>Å÷á+!T³¦HÇrò†ë…×-/¶Ð+û—~ÙuáWlCçž{.OÜr Oœ|2÷åÍEúÅ{îåÄçŒBõ ÜZÞË{ö,ñ•SEQ”þ# ÃØ#Nr*ËsV&éÜËù|žl6G˜ÈöéÊŠ¢ôAkä0³,‹0 WÝ\0ŽñE¦8\Ûõ‚€J¥ÒÖI'Æ÷Mjº¿kTGõ¢§¡GÈO©TZœ¾±†Ÿºñr0¢V–D “*v’“ ’‰eZD“°Öôqóß"Ä•0^nio¸ˆÄ“.- IåÓÖï  ·,·0á°í¼ùÚ¥Ö+ÃÁ].4uÿþýlذ¡»½õÏózöžÎK§•‘‘‘ø `·¢ëº …®…Çq°,‹Z­6+«VQ0›ÍR¯×;ž›„¯‹Åx½|>{x¥Û,á«»Úß‚^çÃYޱŸb±¸¬òñY´¹ÑK‚ Xôk(áä‚ëš$«4:yxì_þ…ãÞô&Î}Æ3¸Éãï?‹nŽñN…ãz}¥EQ”KHç7evãG¼,’‰‡ßa›ô:’ÀÚmù¿‡)ž’úDª}aâû>µZz½Nd2\×S¦Ôå3™ ¶mEææÈÈÅb‘áááÅóüP”UŽÌ[å~”Hr”Ëe<Ï‹#ÅDP’”IE&‘V«É+.NÇèeíN; ÃX¬¬×ëÓÏ+•öÕæFFˆjµø:‹°hN³‘\âb±(&^légUHRUD4hžpÖhÎg§ÖKŸj‘ä”^ž% Ommc–©á¦íž¥­ùîæH\5u×®]‹EÆÆÆã /ä /äâ‹/Ž—÷+rÓ÷â†!µZ­ë0PÇqp]wVmµ,‹jµÚÕ6âX,ñ<ááᮎÑ)¬oºŽ2ŸÏ7•BÓË`(—ËÅ¡¸ò»T*Źˆxƒƒƒ Rhb¹qFr.MærAŠÍô 2À_êkEæÅTš—}ÿûdn¹…<ð\ ~ôÅ øŸÿá„ãœü»?gbâÔ^_.EQe¥"ÕèÚýȤB&E­UèÚý¤×‘ПÖÿ[fY&Ü÷ýømzŒ\«ÕbA. C\×mwÊØIÄ9EY DQÔ7ó¢R©Äðð0¥R). ˜Ë娴iÃÃÃq.ôJ¥B©TŠÃ*E —ûÙ¶m²Ùl¼~šZ­Ö7ç»ÀcaûÔ(ŠÈår‹EFGG§Îõ+S˜A(—ÛWx€¸•>r6N? F¥Í²2Fk•Z…8!KçCešŸIxå¥÷e“TU Z–Ûößšb_<þI3> `bb‚+®¸‚mÛ¶100`ΧáÝ´mÛ6êõ:Åb‘o¼qÆîß¿Ÿk¯½–íÛ·7-Û¹s'`Ë–-lݺuÁODr’õ‚|>?%¤s1èvÿé®^¯S(º ›õ}Ö7« ~Òñþ<22WÍçóM¡®ë288H­¡ÜGQ¢ê±½d©ìø?,sÏ–À“k¥Ð.䌘9×"%ó¡Ó¸áŒÛo_¬dî=ŽgÞt'ýþï³É^σž¶$mkµã¥è‹e!éÕ˜BQ’%·c—™'6S=ºñpÈÏðÿ4´‹ZI?ÓÓc¿ôKµ(Šð}?^¾šÆC×®åä“w.I;Zíx±ûbEYhz1¦P”…Fíxf,ËŠ¯AÚȶm,Ë¢\.k¸>`5+:7çCE 6…iû¾§ó©T*±×Y>ŸŸr=eÎ+Î už"äø¾çyñO.—‹9™Ã¥½VeîkYãããSÒ!‰\qà(•Jñß###K:÷[J;–zéîË÷ý؃pÚ´:â9X­Nyó_©TbM‡ æóù¥yY!BZ«‡´äf‹O#ˆË©P &7›Ün²¬ØX? !}œVjL×lÚçƒkÍ×#Ž#°]~ùåñÂz½ÎÐÐPSn¸Í›7³ÿþiwæûþ”uÆÆÆØ³gW_}5(Î i Ré³_&áýF±XŒcò§»î2Ø™-Óm#oM$Héd¹ °¤ìUο¥²ãxpj¤•dgC'!n.Þœ …eµOºšæ/¾x þû®?[Ïãk?I\«/E_¬( I¯Æв¨'¬Y³¦éÿÖÜ™L&Žâ±e>Ÿgxx8‚  \.¯(Ág¹°šÆR8$ Ãéç7ÜhRŒ¯V«5EK¹®χD`“$úRŒDæiRÔOŠ‘¤©T*q:¤ùæY«T*ñ9JtÒèèh,ˆe³Ygå¤ÑÍ<ÒqªÕêš0 9묳îKìÀRÛq»‚¡AP­V›òíµÅóÌ}Û†zâêEQSxo'oâc ÐêœbIJ´èfUá­ÝÅðiÖê}4{eçÁÆþZ/`»DåYš…>ÙG§¶ôG 066»a¦ÿöÌPåoÿþýüýßÿ=×]w]ÓÃq×®] Åÿoݺ•«®ºjAO" Ã%­”:Ò…?dî.Å>\šs .$Ý„¥ÎõƵ,kÚm]×exx8~£ÒJ»7 ò°Zj–ÒŽ?Q‚Òˆyx¯„AËR ÕƒÅ6Š¢X¸íEXªiÓT!®“€lÛ6Øp×¥‹Ú¦vv¼}±¢,½S(ÊB¡vœ0]ˆä€! Ã8:BÆFiq ®\žŠ°âbÊâ°šÆ68]FÏó¨T*‹Å8厌ÅnÓ÷±XŒ=Ûdþ%Å Ò6mYV\AÒ ù¾Ïää$¾ï“Ëåí:"Krš‡a·µZ­¶ º!gK§KYÖ ;€óBÈ-Í÷}êõúÌúEB›k%viÛ6ƒƒƒ‹ò=M¡U@“Í´sfúó(µŽ,—PÔVÙ “"EZM£ÝeËÒ>7i™þ«.Øà(€¡¡!®¹æ6oÞ ã¸îºëâ•$fzºxéR©Ä{Þóž)V8ïˆsÐMÇ<ÀŽ;¸ôÒKg\9yy˜ü…"-!ÔVêó&g˜TäíD:çn:L:ÀØa™©6žÎãÍ ÷wÏ|þó 16œ. 5“D$UZ;!E#Ú k®ë6 ¶dY©TjZbb‚;wr÷Ýw/â7´°v|÷Ýw³cÇŽ¦œ0iÖÚà:ËyŒŽŽÎ¸?…8ùk¥R!›ÍÆž%y-6làÉÇã”S.êqÚÙñ\lø¡‡"‚iíXY}ìÚµ‹ë¯¿~Q±}ñ=÷ÜÃõ×_ÏÄÄ—^º¸"¸²¼Ø±cA,Úp1ÆÆ+9W.—Ãuݶ^îËe¬¿Ôôr|<;†™ÇǽDª÷f³YÊå2…B¡­ç™äá’"õz=.F¦“Ý–Ë娉¡“3ƒmÛqèf¦‘¬_B]×óoKŽ5ñmE÷—J¥8RIª ÷‹#À®]»¸ûî»9ꨣõ8 =>Þ¹sçŒã °jI}…nr’Ë™ :¼Ø—¹·ç-Ñ2KRåTH{-¿%[«À愊¶¾; HrÄ UÚ‡¡¶£]ÞSXÒÔ;wòßÿýß]¯À¥—^ÊÄÄD|soÛ¶-V†/»ì2&&&xÏ{ÞÓq'×\s CCC 60X¿~=[¶lá„N˜qÝÅ@Í– I]Êòåâ–Z¤Ù&=LÝ‘Æò&L:j¬ï2Õî#Œø\n,/§Ö±Ç+TŽÏC<°I“kì+ ÜxÆüôþˆÏ¦–E sRÔDDÂsotƒmÛŒŽŽ288Ø1_냪ÕKî„N`Ë–-Ü|óÍ‹öý-´oذ-[¶´ý, á j<Ü5¬º{¤bš Ž–‹ˆé8ëÖ­ãÄá5œ|çþùï° iÇëÖ­ãÏxFG;VV'›7oæì³ÏfbbbQö¿Ð}ñ 'œÀÀÀ@ÓoEزe ÷ÜsëÖ­[ð}/ÖØ8=a\Iض{Ä}#ô;+m|ÜK$,:2Ÿ‹GZ†qø©ã8s*2×Íú¶mÇ9×$L5=Q-›ÍR©Tâ<‹[(°,+vÄ(‹äóyr¹\,4ö ›7ofÆ >|xÑŽ±ãã™ÆòÃuìßj5ŠíóÞ´î¿á´èóÈ"í«¡Ò8Añ *a6å$\TŠ%´zÐ¥é'û\ qàÀ®ÇDZì¼}ûö¶o&¶oß>%_\+»wïf×®]MIÇáꫯfhhˆ;“ÄäcccÓî Ì`£Û›Ä÷ý¾¨6)E9j4çÌ`ì®Hbi²AKnÇrjF¨¯4c‡acô¾ÒÝy#æ‰çgë1E`³€Q`°xÝí·sÉ9çP"¥Ççæ‘ˆŠÍž¥¶ërßË_>íõ‘ÎlßhJµ×‡zˆÿ÷çÄO\¬¯pÁíøÄOìhÇaO9Ì;¿Ãj¥Z­ÆÂîr1-Ëbݺu<ÿÃk9õŠGí8ìø­o}+·Þzk¼¬^·nÏ|æ3W¬÷…278ûì³gÌ;Wº/>á„8ûì³UˆS¦°uëVvïÞ½(B\/ÇÆËñRºgÆ lݺµ'ã㹌+`úñq/¢aN q–\lRåˆóÆInµ¥‡¶‹>’t-Ry4}ïxžç}«×ëMm,‹}wŸ ðÊW¾’þð‡‹vŒÅÏ4®ðSˆl“Sf˜ûU*àû&'\ÛMt©TÂuÝ…SÛy¥Iˆiزž žD².$!¥rÒ§#‚ÝrœNÓáeä =44Äþýû»ÏèÿÙMG)É QèÁ¸L±ÿ~6lذàÂÃR…¦¦í¥F’ÓM<Ä|Œ¨%ö'Ý„y¶ giZ=&ålНµrãx’±]ñvû¬4Ž™nå+9îÚk ID8'u¼ïÛî¿÷œsÇ! ËoÏM$‰+¶ï¯]Ë&Œ¨Xc~µ<$g$U‚.ºè¢ø6Û³”v| pý#$/GZËé«~xÞy‹vŒNv<11ÁÈÈÈ¢õÅŠ²PôrL¡( …Ú±²RXÉã ‰®ˆ¢hJÅO0s’R© né(Œ^‹Y ›Édâ¶Ëw Þsí¾ÕæÝ ;€{J˜I5IEèv¶˜|p]xÃ-ˆ—ajõPÉ{Õz"t6Ihœœ_LZhH缂DœèV,èÇÍçùcÇŽ±Ú~Ùe—166¶ àòË/ç²Ë.㪫®bçÎlÛ¶mAö½P"œTÙO°AŒG™ätË4Ö©äd«clLÖ[’üoB•¹‡&g1"W¾±ÏbãØí<ëÚm[nlS>sÄÜñ¡ñÝýû§„[Üwþç’\$â¡ì+-ÚÉ\‹ôý%|²ñ¾ƒîXéRؾï388'],Œ™Xh;ÞëÂï¿÷`|žÊì™k•ß^!Âá°á¡‡–üø‹Ù+ÊR¡v¬¬V£K¨Ü¦M›X³f ¹\®m±®ù ¹´Ò?¥R)þLþnG†d2™8ŸVÏófÍš5ñ˜TYÞv†!¥R)ö‚‹¢¨mú×u©×ëŒ÷¬(Øt‹Å¸izžµœr¨÷šÅ²cÑ´‹o¸ ô•Ë02Š8äóùiû±YÓÚK~ª4í*”¶[&ËZ—gIÂWW9GI¸sçNÞóž÷000ÀŽ;¸æškâJ-³¥õÁºmÛ62™ {öìaûöí]¹-Ï„çyxž7£¸`<´\Œ=IÂGÇID¶<æf)ˆºÆnÊ[©‘äUsËÛå!\HDÞÛø-¹Úf¢5$Õ†÷íã‰ÿç‚ š¼êîûö·~æ3ã;-¼É¶"z1÷]D’÷.ÃN Pˆ¸4þ®'wðfëÔ!åóy*•JœÌqœ%âÓŽ`Ãg?»ìÞ*óÇ6ßvÛ’/mÇ‹Ñ+Êb³c EYlV»Ka®½{͈Ö÷}2™ õz}Á ”ðBפÊútŸ„JUv! C …B\2Š"†‡‡g,T¶’Y ã yÁ_­Vã\Öï¹×pÊ›ª*ãrzYÝ ÛŽK˜ù¯¤D“j¼S*]ŒAr¾U0B‰Lðeë“T¤¤2e»Üo:ýBÜîÝ»q]7®ú±aÃ.»ì²=ÐÀÀ@×Õtº!<Ï›ñí„O"…$ùÍäÆpé\t õñš'±EhŸm±)Ó׎·íÝË·ví‚ .hªÖúÂÆ÷/´«,´Þ7"Êɾ$ÜRd%¾ó¡ñ“{ïÕ½'¡›’ÿàöÛo_’ë܉…²ãÑìÿÊ×¹úêßîéù(«…î‹¥¨++ÕbÇ’¯*N%ùä…«eQ‘Ïçc1Ìó¼ØM–AOLÅó§]¨–äÍn¦6Öëu†‡‡ãüÄä*NGkT«Õ®ö¹ZXnv\*•ð<ñññxÙJUEx‹¢¨éÜ”îXH;–ÚvÃ.Š"2™Lûü|•ŠIÞ¢iäS^qQ5å[Ïç©hHç$ó‰·’›úÌÇxì"6d§©+8G\Zù] ¥Zj»·éæ>ÊÆÆ›Èjj?óLH‰V²D"žK‹'Ž?žýgE©áº­žaé D®ëƵ^ q Åã>f7Žsõüw¦(Š¢(ŠÒ§t “Ëf³q¢üZ­§pÈårX–ççÏL& k•J…‘‘ŠÅ"ÃÃñp'!‡i\×Å÷ýiÛ'ÇËf³xžçï•ñ©ˆr]W£–)RéÔ÷ý¾ 3]æí%Õc|ðÁ^7aÁ°0éÞl›¸rm[- MθtŸ™îsÄ9eÞÈd]ªBB’ü=lYÏmüˆãhºƒœì(SC]Ó2…‹5ô+R±¦•ˆ$÷ ˆµuŒWÇØŒxoU»:ÒÊ$}ï„a8§A…„ìÊ},¹åä~³1߈žIèj»¢,iÒa–eá8¶msèС^_ºy/øËýœþýéâ®(Š¢(Ê2Æ÷ÍÏtäóS+óÍ”{ÈuÍOšZÍxst³ÿ¤ª£Y=O<óù|,œ¥ wIª×u±m»)dpº—Êþ ×BÒŸø¾eYMBÀèèh‰“Î+¶¼¨V QÅžpÙlV¿»>Ķm¾ô¥/õº ŽeÍ ž¥ÃRAWHõ<|>ßä•» ¹’[*cJ&÷3ÑN6è$º­àb ó%âÆÆÆØ±cGÓ‡­ÿoß¾½×íhrOaB¥ÀÎó¦,R<î·4‡†çIBEˆ«‘x²¶Û*©Ï¤è…ˆp£££DQÔ³b I<ã÷ôm® EQEQ–1¶=U0k¥]þ¨™¶i7nqœöËSû·m»m~6ß÷Û "éÜVQÅBšeY3¾8¶m»)LUB[§ åªÕjq~-0ó ™k¤Aik©T¢V«u®|¨ôòÝŽŽŽjî´>e%Í‹¤· CÓEz^§q™ks-$d¿5pADdi xÉú’ÐB‹f¡LU˦Nü;¡LGŽ=66ÖT)uëÖ­ V9u¡iWVXªš¦«ð¦Øbw»^ñ´…á\³Î%Hñ’¤½âÒëRY53™x™„‹lÚÅz¥<8?^ƒÃ´¢8Š¢(Š¢ô ¶=£7Z[ærÙŤPBPÓ^oâ¡”Ïçã\o‚xˆç‡ks©(©l:!…ßÒãÍB¡{ÅI8cë¼c¥ŒIWR¡·^¯ëØ[Y¤°c¶!JT*³â,ËZØŸ'ŸÏS*•âdæ–e†!år˲¦P(ÄÛ”ËåiCM}ßgÍš5ñÿ®ë222Bm?“¼pi\×¥T*Q,Éf³ø¾Ïàà`Üö(Š–}.®Õ„»*Â)KÉŒR}©d„¸âTõBæç’_s^¶b¼•ºÑõ¼ÆzÙi¶qIò)ó¢)GÜþýûÙ¹s'ccc8p€¡¡!†††ú.)i†MvWfÚÍ…£ÝÛ»tܹÐI‹©cîUñV¬´ÙfrãF ©¾Ñœ72Íàà`®ÞÂqxð'»qœ?ìuSEQEQÇq+£:ŽÓ4.•Dæ­y‹%?›ìCÖM¯“ÇNNN¶=¾ëº?k%›Í6‰sÕj5W•”)Jÿ#ân6›Õ0beI¹/€yPð Zm‚{·ÑY$œçyq%é9c3}¥Ç fÒí„JÅŨñ“&I¯Ì›¦qW\q'œp™F¨àþýû¹êª«Ø¹s'år¹©²j?QBE¸¹P*•毲w ³$œ ÷·xÀ‰—ö˜kå%/yIÏ®×B±¾QÆZÃEQEYML'buòZêáK=ª–R|cACü¥ Šàx \&ð§ÍšŸ6ˆÍ.Xu_›f/—Ö¬T>S‹-dËÛMW÷.Ée\ñFp»âŠ+¸üòËÙ¶m[Ó Û·oçŠ+®àª«®Š]ö’Vc;ÒGãì‚€ –ü Qã!ça„·tÆ:›¤ŠòJåúކ…©v£(Š¢(вÌQ¡DYH$¯_†My •þçÁìu„»"xFg+•ÂYGJHê‚à“TO”f¤›#Ýo:Q»¬ãaÂÛ”Eá€;w2444E„ذa•Jß÷™˜˜èu{§„¥¶æìß7×[—ù¾©"¼RhAbÎStÛ½HEÔ©Þqi< ×Ó+µ8ü8€gØ+«2¢(Š¢(Ê\Qo3e!) ø¾OESòÿ)ýËJLpfÃoh.¢Ú\ ÓtDµ§õt¸ªTGõiö†qYÙÞ1}À`ÂR‡††:®400ÀÖ­[Ù³gO¯Û'Ü<Œçd·ÔjFlËå R193³¬R1ŸoÚdò'.QdŽcÚoJ§ CÓž\.i_«H¸PHž ,÷ßL]‹Ûa›$45¢¹Òjˆùnç_ϵ¿¸8tÄÃ:à\* ½n€¢(Š¢(вˆW¯×.¬OYVZÊÑÞÚ†¦AÛm<ÏÃó¼…-“ÇäðJO=}š'ÙÌÄ»užÅTHU…#º]±ŸªUŠˆ!ĴŦƒÀˆZ­Hq߇½{M‘Ï3Uƒ£ªUÑQ³^­f~ÄC. Íòé<æÄ«.ÅëyP(˜ŸRÉ|>Š¢(вB‚`J¨•ã8”Ëe‚ Àó<Ê®"®äóù¦0CÏó¨T*A€mÛ‹Åx~ EçÒ”ËeÇiûY6›%ŸÏE¥R)®T˜Íf)‹+*dn%#"œF(½f_ã÷”°Ô\ÎL²ÛäŬÕjضeYÔjµ™sg®Z A‹˜–î²jÿe‚m3µ’j–ö‚›‹Î[‘£ÀxIíܹ“«®ºŠíÛ·7UGݵk;vì`Û¶m}S5Õ¶íxÎn¹BâÑ&ŽFíú`ñ8K¯ã8Í"œ02’ˆe¹œY'Ÿ7ÛÔjF#Z‰'û¯ÕŒ0&‚Öøx³p%¢e™u[/m*P'Bœˆp­ÎTÅbRxÅ÷ÛÞÛMø¾ßä®íûF̳msÞÅ¢ù[¼âr9³\(’ói­§#Ý´:Y$¹!‹À ‰ƒÓJp0¯ûðÀG¿Œûƒwöº)‹O'ÏÑØ…•DÐ HJf×H"Œ¸ÕšPÐI#h¬_Ĉa£ÏüÔzac¿åÆ>]ŒWœOòð±0n¶åÔ6Qê§Œ1FYo„D¨Ë’¨×Ø÷HjßÒΚ9ö]O»«×ߎ¢(Š¢, ’ŸX­Â0$“ÉÍf‰¢(N›ÒŽZ­FExž q"ÎŽŽš9A1<<Œã88ŽƒïûLNN¶ÝßtŸe2\×eïÞ½ñº™L†z½®b\ŸS©T°m{ÁÒî(Ê|pòàXP«ÍQP¶m&êmìTœdÄ3®+"š½ØÄÑ Ý]Ibö ñw™ö öÇQ–˜£ )ÈpÕUWqá…²uëVöìÙÃÄÄÛ¶mcûöí½n+`¾ÐìhE&œ´Zízºôâ'^jò ÎçÍßµš¹²Y#ƹn²®¬'´ÞoòÙL9DEðk'ÀµîK<Åk®ùü­)n²¾o¼Þ‚ Y˜l O1ж¯ÀLÆ\û04×£\6žeMõ¤‹…D’þ! (¹äÒù$GI´‘ pVw_sßrÏ<öy½nÆÂ •Mûª)Iü¸¼¡É“f"~ɾÄ(Ê$y äÁ‘¶ãRã3I"êaD¯|㳉hæ’{ƒ«bT^‰vËdßÙF›Š$Þrã$±Ôõ`7Ö©5–ýåSÇ/7>/6ö™ƒ³;»×ßž¢(Š¢ô)ÎÐMyÉ—4<4ñðZmd³‰&yóZ‘Båò´ýBŒˆk­×3Ý_H­ã$߉œ(—Í>¤jlÀ™}Ý]‚‡møºî4/ VBƇgmÙÏyç=³×Í莴¸–FD(y»I.´,F “ñpZxO¸"F@Ë‘äQË“ä\ÑLr®•H¼ÙŠMr³M—DT–‹ gÑ,ŽµÛ®Ó} Ë]¡ÑbjÉf›Ä[.íúi¥~Wá®k44UQEYä]W/)3³Ã…?CU0ÏóâPS˲b!`tt4Î/W*•ÃpQBH%’DéO$—–æ…ëɾ˜„a•Õšâh¥ã÷{ºaì½ ˜ Òá—ËåÙ÷3íj’ˆ§ÛtoÝÓS•£º]Ñqœis8,3í/’¼píhgçaȪ öšÉ¬V3×Jò׋¦c éít3áûÍbˆ‚tȵšñŒó¿m¬÷ð±ls:眳=g/ß>X·ëÉ^7czÒÒ¡£‚t~ãs'µ¾x¿¥Å9AŠ3ˆç™Eû71£mþ±-"‰IŸ¦ÉÒ@,æ§èZÌ?‡‚ í|hž;QEQ”öäiŸ¨—ضÝöéyÞŒ ÉkµŽãPiT Ã0þãQ'Â[©T¢V«Å3ËÓT-k÷™mÛmç9¾ïk~¸>E¼áÆ»™È(T*f~\*™ôCÓ9a”JfÝô|:-® Aä=O;ÀHñÂ0lvÚ(•’ÿ§K»´ÜyqÖiêkfP?gåÑ)s*hž×Hœ:!©s”žsD¯0Äm<ÖÓ kÅâTï­ÕØOW«¦Ó“J²•JR]u6…$Ôt&$‡œÐMŸòlî á,†‡˜—FªÖ:,o!àþ`}¯›Ðž õ[^£KÒ>H:ÉÝVÁx¬)~ Þ_Ò¹§)¨Ð­–Ï$_[ö.׊¢(Š¢,$Ê¥žçaÛ6õz=þ‘ðÔ(Šbq.Í|Beßi1Nª¨Î*\LY2¦Ê´ÔjfŽ&ÑPB% =Íz…‚qèùŸñó{Íš¤h©”ü ɳZmŽ’’y¢ÌG{ìã³hl|ÕX–ÕTé¹òRbV˜ùP¡ey»h¡Ök\Déºöˆë,ËÂFóÓ‹½Š!ã®Sxi7ˆˆ×¡ÐKÌLc• HÄ9ÙÍ9„ü×WÍC@ÞØÈú¹œÙ¦> ²Œ…¸xê©§ú³¢S™.g®ZhüvSëˆ`—îÐ%gZÈ԰͹"á®ÒÚ‡—OQEQ”Îø¾Ïš5kâÿ]×edd„ Ú~&yáÒ¸®K©Tb||ß÷ŒÅ¼(Šâª¬3‘>˜j…år™z½N.—‹÷)ùTˆë?¢("Š¢®„Ž^Q(tŽÜš‰Vï3Ï›š§»ÓºA`Žíºf^–Ë%Eö,+™cy^âœ!Û‹I±> a4Û‹fß®kDµ‘ó·eMMeYS‹þ¥S¥IU©”8ôñW9k¢[áEÓ¼h— årÇqfÿÁ"™o¥‘àthjk·˜G 5ô ËJˆ ‚€³ßû^l©r8K—ÚR©s>¹Õ€o˜‹T©˜Ns>1þ¶m:ý¶ýR ‚lÒ6ù®Òy,àõå2\}u¯/åœuÇìu3ÒUO-’ÊŸaãïæ>“üléï>÷’oYNc[¿e›¹"…@E8¥gø¾Xú¾LJuhÉ{ ¦¯’®ï›k1%T{žù|:OäZ­ )Š¢Ì×u™œœœõg­d³ÙXx©V«q˜ªeYMá£ÓíoºÏÇa||œ ˆ¢hndeIð}Á_lû¾ùé4WŒ¢$ϵˆTâ!&¹°ÓÏü(J§Ùš‘ºK¾“B|ù|"¾…¡ñZ“ñ˜ßé²ù|ûœáÙ¬9—lÖì«U “õÓc×M<èÒû›MŠ>9—ôöù¼9ϯ|eA¿ÒžÿýgëyÑ+S [ÔÒt…æ9å7L”k‡80tZO…¸¾à(€‰‰ öìÙÓë¶ÌHEܶ~=a*ÁÌ¢št’¿¾RÝ_»e¦Ê¨ÓaÛÝ…¦N‡ã$“Ùxq ˜zCS,áOú,y°Ôjðàƒ§õú2Ι}!°Vƒòaø­fý|>iÃR$\’²Ö:U¥°m{QÆTš®¿©ÕjT*•8à|‰"3ï³ôx PHòdË 60ŸÕjͲDº®€yv‹È%cƒbKˆ`ú™žÏ'ÿKDR6kÚ“Ï›e`¼ÉFF’¼oíœNDäjçq–^·ÛË)×d®´›¿Ëyüã ò•ö”8êÑG¡ØóÌ–VGSÌ”+³k$\µN³ÃR£‚«Ò°sçN®¹æš^·eF¢("60³'ŸG‘ySvµ]­Ìç¥^>Ÿ¹ŸJà–›‹Úx"fßgÖÉç“\›2¸CÈ¿Üÿ†h$I ^vù‘Îu“OhÒ“yf±h¶µíä™.¹ÖÄ»MB=!ù’±†Œ ÙsLŽQ.'Þxâq'/ Û±aŸíò°+ EpÄáÃÉ‚6n–•J…J¥B±Xœ[UfÉ—”¨–‚xo’ß[…¸¾ä(€íÛ·³}ûö^·eFlÛæG!<·KAixØLfŠÅ©îÂÊì°mØ»wþûi(ÊŸVJ$µíf÷k¡X„|¤×Wbîü0‚}÷Þ»¸I‡›z$UB¥Ú¨K’سŒq]®`:ê*I"‰§“oe"E^D“Áµ„oˆhEI¼ã$Õ¸å…@^ªw¢Æq3ŸÁÆ>j©ekíÉ5þR×´ã÷ÓØ&h3ÇÊáçæÉ%ÒB©T" C¼8ɱI"ŸZ_¨Šã…>û–#zÝ€n ‚€û7oæCõ%JN­, ó©Z»Ü‹müô¦Ç6©°<šE9 dI:ä,æ‰è5–ÚI+KŽ$N#yYZ—µC ‹ç³„’tÓÇ̪ 0÷NŸDœ’~IòpȽ$÷c såÌÿÖ§'À:l)˜BãsŸæ‘¨äqL‡Š7p}°’WÚACŒh¹—ó/€úIP?ÊYØëB¾1h+ü_¨¼Ÿ©oQ[ÑYÊ ÷r&?òÿtÏwÏK<_e›Z-Y–Ë5{¤´ŠrRá0 Mؘmr+iÖ×çA@&“1Ñ-)|ß'“ÉAP*5OÛK¥µZ­ígÊê£P(0ÒEܦçÁ™Ø´ÉÜç"By‘(‡û$uÊ*$¹îKHþ.ˆZí(ÁÍ&ÃÁDôÚÔø¿B"òI›DPËa.i_‰äñ.ÂWó¸m¤±Ž´5%q,K2œ¨cD7y¼;eÙÆOú³áÆïLÓxã³°±¬ L6–6Ö‘l7^ãG®qº{¯¹*É”eÿÉ'/¨Íô‚û-8ñÄŸ™Zª¥ Rh¦\.Ï¿2³ä+†f!.KbˆJ_rT¯Ð-Qñø9çh ›UŒëž=ß^Õë¦Ì‰ûÿô8¬ϳ³M{°„˜7$yZŠ—[©±¬JRxA¼nZ…lͨ,2‘#ÄI²ñq#¢e2æm°2Èå’ÉÕjsìô¸Eò­]¦.˜%ÌU^KË«ÛôÃH„íI,†l'#Õæ>lŒvÃC¸\‰ä¦ÄŒ”HBÈíT²©}‹˜îƒûBØô¨ï'J}–'‹~¬—A~xUàb¨}ÑìN@»r®Eš »48ñ¿Oäß~ôzYe‰hwÿIòðr¹1!nØ‘m'¶&¹š$¯"$Uþ¢()žR©$ÉÏÁä.“—³R8 ar2ê ³ï´ÇÊÈHª Š²èDQDxž×VèyAÄÉʃ”"[(¢|>ïûMŸ)«Ïóp§«Š¶>ð¤Ö$D9xQ';ðÆ68ðD.ä¦XÅ<¢%ìRÕ2Ä–GºM’_B8呦¶ñ)vy˜ÄÁ^<ïœÆgâ=VlÙ§¼3œîÝ¡8Ô ¦Öo¤±ßQšßåu“}ÆIkkª\›æ÷sÓ}##$!®Ftmðiž‚ØÀ½+@ˆr€³¾¼»ãÀtÞURÓc2qal'’dÑŠ÷}̲â~üÁÿŸ½ós£®óÿ³ÐRJé¶³ThÙu–«¢¥³´ ¢Ñÿ•TÎã›Hµ ÷=INÎß‚‰§‡Ò.9OAñÔŒTà€S3Š€r´ì ²…²Ãvm e§ÝýÁ|ÿøä33É&»Ùíî&»ûy>ûØ$óë“™÷|òù¼æý£ィ<5SÓ„µk'nޏCŽÛµÿO=Ü*ïÃ#é#'ìò^1JË«=Q÷“bœbk™°XV Wv—é¤È–HoÕr¡†˜èËÜ+#BŽ„AŒdå¨6N0È g5Nˆdòséuj†^ËÑy12O Î^ ¦<žú“#vº*‹¯HQ]†$ ?@ÿ3h³Kç8B VŒ÷29pnbDúíb_q 3‰gÀúä¯ãg¹Òo }ŸÒȹe} OÍzj ­EÑlÈB#Ò£Tz™¥B#²²z8\Ó‚¤ä0ðþÌåĽ,ç$é´ø,‘B¬ … ¡ºì?¤P—Ë•W*”H!ß0‚>£ÒÛ6ÝÿjðŠú1 c€˜&ïW"E¸ìH2Ï+&%¶mšζa³ŸIÁò,Ü-=C‹ƒWRÊ,KüîEÐãåÃß<åÏÒä°Z¢\ò'^Z³Ì£J©$_ÚÆCˆf‚¡9ÇÉóD³´_­ôºPz]G݇Ai{$FÅöòµôî /Ó€1.i7.ô÷÷sŒütœªù±†²ç!‘ª°$ìî&âš_ˆëîîfÍš5þëÍ›7ûï%¬¬úÀã£-™0‘´ E0÷ÿ=ñô~ºëÈÇsr§áÌ«2Ì-ìg*ÿ›¢áH/•Ê’÷²bY­ðS)¶É*§ƒ‘Ï×ÈAÜÕ¶÷<°51úî#ˆ‘Â×›mpÌ@l3€¤_Žn´áÔÒŽeŒ…Þä}'ÃZO ¢Ӏ…5¾ˆÚOøQ±éËâ+•Ûè}'èß+}öÄë*_]VmÓõMH¾‰;!y ¸6k!× é3Á1Àý*D?%¾ë‘?:û‹Ãͤ£˜ˆÈûÓ0„@æº'[4ÜÓé´ð^K$‚œIR«&xÙ¶ø«¼·ãñÀû ÄÿT*ü$²J`µÈŸ\N[Τ0W9²,8í´¹>ÅSÓ4±,‹h4ê{ÇUŠs‰D˲èÙ¯§*ŠÉ†mÛ5…Ù+rðG ÈC«X`”ú©uȇ‘HÍhA_³¨¦[ÈÐÍÁ †ìæ4Dgåá÷7âKJ:Í.]Ëó`39õ!x៦ñÆJÑ[étUc–'].ëWÎíjÍõT(aSs@[[ ¸îîn-Zä¿Þ(ú–-ãÃß8@å‡SLHàà¿üeÿ=â¤Ïº|Œî`]!N&y¨¤Ù¡“’DBL¸«E”ȪeƒbÛÕºTJÄ¢Ù¶xmYè®-’„'vpeÄ_‡ÖV¸ÙDéõZôüMJ]&ð ú;áÓ1h+©{Y %'ʲþ*·f`±Üg†#DºT+ä"¢]i„–JÁ;ÛÁûЧÁ}à ß7ÍÀÇê2ÄT>âNSž¿NýÙ’¨!«%”?¶/W¿Ñ›¡Ïƒä/ÀzX ÆRüÁŸu8¤¶À_?úWv¶s,ÌE1Ž„“™‡I¥Äíáyâ¶ÉdÄz]]Ae?Ó,â4M¼ç“T&%—Ç–^±al{`Ÿ`ÕÛéyE¾\N?<^Ôu±nåÒ0à<–]»&T€ÈärBp¨ö'Ï—ãˆ÷áÔkµ¶ ¯“J•çä“ï+ÿª]/)À˜ˆF+TX™C.û^q EeHsð9Ýßµài8^ƒ§ÒP¨2þ•Œ*ý²uZìc¡È¼n…,µ# Ø| nÚ~sÈý¼ûþûśƖN²âj¹Š«¡ºÌ Çt€•+W²råÊF·eP6¶´(QW1ayȆ-ß?ã¶ŽýÛ‘ú/ë‹Ë ¬Y‚l²*¹º¢ mÇk~Â'™ð-•³p© IU@NÎdâ7é“ͳv˳êT*Pr9±Žø¸.,°à½¼!êðë”p™ù×NX‡xÁýõ§\–„}JÉíÜm %âŒ܇x-Gí²•*—¤Ñ®övÐuœ/ßûÜaÝ:p Téªæy¥G÷© î.ŒÚwœ3‡p,H±Þ é–4Ð/êÖ’“ 4+3»û|nJ"ÍCVv]!’¥ÓõåG”y-Kü×õ@˜‘"›¼oãqaÖñ¸Ç2™Ò}L0¡­$:ZI5q­Ú~r¹ŸiZð=ÃT•å„»òxÉùPmÛ`õêGÇî¢5€x¼ú5c»—jÝM%•×`8óIž*sÆU†¥†á{=uvvúÞsŠ©ã8„8ø/ð.~Y§B6˜§h,ð³F7b?qN¹‡·õŽâƒJYµ#Œ¬8¢˜ÐÔ|¸nݺŸ­X±¢a ý—Óxm¤þÀ EƒÙìÙ»wÿw” Èþ*Ùá<J€S4þ¤:“ Ä0)–Éø_峺HD¨ é´ØVƱÙvPÁAV0•bTؽÆq Û†C²ðHV$P‘…^tÁ(À?¤…Xõi‚‚¤á«Àí.ôÙ€Y>³]íK ÉäÀX;9»ÇÁ²H]9cÅVŒig'J¶Ù0Ä9‘.D–¸ɤ[RUÁ#ðZÞ‰aä¡ #È­%«N ðü󯎥Ù(JÈbYÈÀ²‚ªÁ2ZX«–:w.'´°øÚÓ#n/ËÛÈÛ+ò¶…oE)Æ…Ñõ¡¥²ÀB¥&ó±…÷©ëâóJ8ÓøY8?\˜jâšø* küávuw;Ì™sàø^Ø)L4%‘HTõp G d³Y"‘ÈþåQRLx<Ïöí2"™2ÕMÂ/ #ØD麊±Ä>dY¼2ˆƒ“mÛlzHÂEú` 0Ë%R”BSAoçž{®/À­ZµŠ›nº‰U«V‘J¥”O\×å¥WŽâPoÿ÷¥P4‚]À¼;F¶±KPd!\ÂH†þLâŽ×ó<<Ï#—Ëáy±Xl¿÷§]dΨjHg7sév!,ɇRèéªN¡ ”‚žžòQ²tÝ1M±\.“jˆ{C RÄ’¹Þ¢QÐ °@ïeñøÇ|P]‚Áœì›À¹¼b•ÇeAu±Í4ƒ˜½ÁÐ4èï/ž¨ ¦d’ÎfŹïK–Ÿ Ç ÊÏÛÛÅ_­ Q¥ Ãõ_¯–È¡Ø?R©—OF]ËnOH§Å}%/Ó Ì%‘4]™ƒQ f2 –*Q‰¦Qø¿yìÖºIE£Õãéä²l6p’žq Ú&³äK—&I,VµÄìpBsdÕZé¥4^"ÜXŽ)I¦”ç%•‚¾¾@(“Õƒ++Ê*¢P~M3(v ÍN9EÚd•a˦P(Ô¾~Ò&¤°^)¦ÙöÀÏj™k­°ÔýulÚߟ¨FtדՎ‡‹iš¾g›a‹EY![éý–L&I– /¼bü¯qE%¶mû…;rˆŸòSr°Æ©/”Z1q0–íÛÇôcmÇ>ø:§ú¦yuÕå#ò†“«ZTÏÿ­˜°°víZLÓdõêÕ´´´”­ÐÒÒB:fÍš5ô—žâWÒßßÏE]@GG¶mû?œ &Á½½½tttŒØ»n÷ÞCT¡Ř2–vì¬{•ƒ¦ízEË3¡ÖÖÀ=", @P—¼ÎÎ8—ËaÛ¶ßæh4Šišär9b±Øˆ HX–…ëºd2\×%‰`Û¶ÿ‚ÄËžçÑÞÞN.—#•J¡iš/´¥R)\×%™LljÇãD£Q^íÉ‘¬²‹ÅücÈÐò{¦R)2¥Y¯Ìòù<º®û?€®ëâ8º®£iš½¥8)!®îö®“¯åùh$ƒÙñhôÅå×AˆeÞ2¹¬]K~ÖÅD­ö1Ç”oTJêT—J­DTeë”þ<`kéšÄ Š@3-ìMªQ;3³¼í¥’1šêSÉ­É\Ö‡>¯oà²ZHeEž—ðk*GØ¥Êuƒœ|2‘XµÒ•C u>Ù¼ý­7Sã1¦h²z°Ê2™ÀÛ-—JV•ëHíµ½}àþ§zh¥`%£™«åb³¬òÏd†Ñ¾Î£!ÄM4&«+¦ã9®.Òàº`;p±ÿ–V"Üdå”[o³}‡÷ýþ`þ¸ý§5—›¦9ò¼—)‚\Æ©‘íBÑ\LQ-µÒã-¬·´´ÐÑÑAwwwU…¸»»›––V¯^ ÀòåË9묳üe›7oæúë¯Åy89\×eús§3}¾“˜±´ãW^}•w3ÈS™ [zô„½²\཰҄ßÇácv PÈÙ’ãðp?KÏ: ˲0 ƒH$BOO/:IáÌ0 4M#›Íâ8š¦BW*%fgñ8;/¸€_xÁªÇ!£—f~®ëúß_×uâñ8™L†B¡€çy¤R)?鲦iþÍJòù<‰DÂõ:D¹ßl6‹¦i¾pF‰F£†QvmÂ?zÒ[/‰”yÙìr¹œïÕgš&étÚoS"‘ ««kÀuÏd2D£Qt]÷Ï•ôºK$~{ǃZv<}q%étà•p ùŸðÈ3ï%sí×±Ïþ)D>Yÿ è:ЉГ5F!õ§ D#pTÅàÆ í\ŠÙ ¢‰@öc³Ðö̲B[ÝÈ\{žx Êû:Ÿ×ÊVßdŒõ˜¢„ÓBà¹.~Ð×'žÁHq[ g²€B%Õ&¢Õ²›f©®H¹ØjÍÔ¤wøLF;VLMÆs\Ʋ,_(‰Å‚ÊÍMŒ hrÆÅŽÛ§¡Ï¬>q]·lîT7)Ä6â4¨ïi¶¢Ù™0gΜ ¤1JjyÃ,Z´ˆ«®ºÊ¿k×.ÿõºuëÊD¾+VpõÕW3îß»—úU"]ÅØ2Övì#E·BA_ÒãFfE¯œñXÀÆ$Üœƒç:yeú[™qÏ=œ÷–·pÛ#”âc¼áå—yú®»üðÎB¡@$ñÃ>òñ8ßùýï¹û›ßdåúõ`š¥œS^2I.—cÏž=œü•¯0ûÇ?æ7Ü@‹®ó‰ïŸ×/F×ur¹¹í6Nܶd)×—$I ðE¬šßßqÊDl6[^bSÆÃÉ ášV>CL¥ÀqÐJ³U]ÓDª¼\# \O­¥ÓéªÞ€•Þwaa°«««ªÈv1—ç@~Çñ_ó¤X …¨ÖÖ`b¬ëµg[`FáqÞnqöΟòÛl–kMÏóXé¥ÿ÷´ÍŸOñ]ïbqk«˜xÇãe¹W°,>ã8gŒ c3 ¿(‚qÚiÜë8œšH°{Þ<–ÿŸÿ¿þ5$“†!Bû,¿þõ¯éîîðpF†µ àÜò4ðï¬ü8!y8ß^t"Y‚ˆP)º%Ñ Rˆ3]1¾È Ì]zßÇ#Bí¤´òòåO…KR¥Aðô° ÑîFóvîNkxˆU´,¿)Ý©Tpÿ ¢þ¬]»–[n¹…ÖÖÖ197£Ý?ñÄ<ðÀœrÊ)5Ç9£I&#N¥<­UÒôÕ̯6¢›¢~V­ZŦM›X°`Á¨ç¶«±±iše¹æS›FŽGbÇP>>®…mÛ˜¦IXž¢ðoé[1Ö®]‹mÛn| øãŠ?zðÚôé5óG˨™aª‘9ÿÌTQ…5kÖðÀÔ?>.‹Å;w?ò‘¯¼òÊâæÍ›‹a …Bñï|gñºë®+ÆÎ;‹×]w]ñï|g±P(øŸ_wÝu¶]¶lÙ ûºì²ËÊÞ/ýå/‹—nÜX7úŠÅbÏЫ¥Óéb1_ߺÅb±XìªñZÒS,öôôûúúŠÅb±˜Íf‹]]]Åžžžb6››uu»ºÄÆ…BÁ_§P(“Éd±«««F‹}}}Åx<^ìéé)&“Iÿu·µ=Åb1zÝWey´ôyzˆce‹Å¢VZ§Ú9ì í+D¥mŒ6ceÇ­ž\Ó,{zŠÅ¾>ñ(’â|}êo>Uì ­/¯y:.F£Ñb±X,~î ‹…tºö¾ …b1Ÿ¯ÿ„äób_µˆÇkï/]]bz¾k%]Un”¾¾êŸWbÅb4*֕礲}º^,&“bBAl&›ßQÓÄŸ/õˆ}g³Å?ž}¶X'ÿCv#×Ý|ÛmÅÇ~ô£â×^[ìëëkˆĆxàš}4Z,®Œ‹æÖ­Åe­NñoÖÜYœyô+Åžb±¨ÞÒ=EqËS,Íb±X¨qLÙÄKÿ‹‹=T¬”,­PíTša¾Ô€&ÆìØ,l¨PØÿ„x\ؾi–®ëâ¯dÛƒ1˜mŒ£Ù_wÝuÅx`ÌÚZ‰ìâGÒå)ÆŸ±´±+a&ʸ¢Þ¶&“Éb>Ÿ/êÅ!Ž“ˆ‰bÇÕÆ@×w‹ "¡ù^]õÌ]†¢«(ƽÉýÝ‘b¬Îøx:ˆp×_=©TŠsÏ=—ŽŽZZZèîî`åÊ•C>E^µjÜvÛme¤·DÆg‡;vp΂ã¦fn¿a;=r-?iÁq?÷Ô‘×ÉÁß>€H$"BÐrWŽ80 èA(Õ¡øªOí 8¯ÈÜwÏ¥ëë]uèQ,üØB~zõO¹ðs²þ†õ,¹p ~è@îŸy?3WÏäô;OgöŽÙl>h3{܃ëºôÞÞËoîú ç_y>'v23^œÁkë_C[®qúÞÓ10üP8™ ìsÏ|Ž+.¹‚Ïìù ‹?¸X¸¤XàØzZ÷çk®$0·Jÿ ‚ØtáÖb"\Z2ˆð¯R²i?:•]¬ÒvzéÉØöxé¬ô™H$EéRéçq<\qÇÊŽ_KYtâ¥ïᮃ…ÅBß]‡5w®ñÉ £áœmWÿä'ÁöÕö=ܼC%,÷Y8ÕHc ªmW-ëx5Òé`Ýjë'“⯽]¸wy^yì˜5…d¢Ø±B1ã1®È¼¿gîs˜ìÅd…B2–v<߀ÅWý ã¹ê#\™gzØÈy8ˆ¬cLüXO)Æõöö²nÝ:z{{¹øâ‹}Qn0Ö®]KKKKYܵDyèïï§¥¥Åw5[ùN8ùP8t”¾u 6³‘»¼r`d ½ïQ.Þz1¤aûúí<þøãì³öáº.žçaÛ6§þôTþrø_øã-ä{ïýÇÄ36)e³=Müe“YýxK_[Êš‹×? ÎOÖý„™ fræ™grÂm'pÛc·qú§ch{2ÆEó.¢çÝ=|ä;’§ùiéÓ Wλ’M›6±õ+[9äþC8óê3á?ËJmˆY0!q­ôËu|í¶¯1{ßlž>öiþ=õ邏i|zzöÿÚúø¡,]º”ÖßÀy7ŸÇÌOϤ­­ ;jc:âzõßÝÏ_Ïû+'ÄN€ìgäåL „9YåSÆœÉX´öÒ6Aöv¯ôÖ{´Òyì)mçáǶÍ?uþ(Á@ÆÒŽ÷=:“ß·OˆpáB ƒa‰ïÝWèãwþŽ×¼×üĵÃ*{=U©÷ú¤ÓâºTKd/cʤ >ïñøÐ£ÃH¤ÂœÙ³ÇìkײãÑè‹Ãx8½½DŸ}–—o?•K£39sWùªVÛŠ=•}Ž/Þ›§ƒ³DtÚoÀº XÚÍ -çdÐ\<è†Y -º› JUêý2EðÜ@ t&A}Y`µ1rû x™Ò¾d˜-”eu©]"&×°ÜìáÊ«áÏ$áÁ¢ç ÁZ†xq’ž±SŒ©”âäíÝðkªh*&Š+C1^ã ‰eYœðùÏsÛk-l—?´ Å~2ÖvìGlÞŒvÄU—H„sX˜AåIœ4 HºÖÖÖ6ìÜR¼«Œ‰v‡¶¶6.¾øb.ºè"V¬XmÛÜ|óÍÃÚÿ¾Ggrî~8„KÛ¶Íøo]ðVô·è˜†É–ǶðÇþÈ–Û·ð×è_yÓö7Ñÿé~®Ê]Åuo¹ŽŽ‹:øÆ|ƒ¶‹Û8ñ²¹°ÿBX*ö÷®õï"ïåùOí?Ùð¿øÏ?ý'·|ãî½ñ^®úÓUÌœ9ÍÔHGÓàµ7^4¬Ž~âhZS­Ä8,Òu¢¾˜G!npíh‡§{ŸfƦ̾p6'Ú'3=™Œ)WzDÌ ³0ÛšMï}½,>i1éOƒmhƒ˜º §:¼·ó½´~£•Wg¾ I°s6fÒIJ,VÞµ’G¾÷'p7Ýt_~qÐ.yüJqN–E1k—ÚWé®BèsY&¼ŸÒ¾^¾úå‘ÂŒ•»€öÙxûG? Ÿüd}I€Jn;/þ"Û¶mãµ¥¯¡iZYOÅ(1”×_½Âi5*½‰t3ÿ÷ùÙªUcöu³ãýí‹Ã˜&xmmÄÛÚpŒtìí…þˆ>£”0.k"ú°’mô.à à!±\~C©ÝQZÕ¬q˜vÄ3étÛ…¸?¥þ'|å–(í×Br6☙Ò_Oèuñ,¦“ÀëÒçZ¨ òA¦ë‚ìúdμqcE£õf(UR&/¾±SŒÇùߤþnYåiùÒiñ™ÒSмv¬P —ñWH,Ëâàûîã?‚h“æyUL<ÆÚŽ7XðÇÍÂøÉ(Žâä|Y19¯xÙÍ›7xàâÎ;‡\·2>ü »¶í×± …BñÆko,Þò¹[Šñx¼hšfñ‰–'Šétº¸§mOñ_Oý×¢“qŠO]ðT±P(ÿ|ÄŸ‹ÅžbÑs=ƒ,Š\D2[æ&2ŠÅ§zªX4Š"/Q¶XÜ{ÞÞâ\ðb=£ôW mßú¬«(r©+ö/'ó¤ÉÏ¢¥Ï)¹Úäº}E?—X‘âÀL}ÅâæÛJùÿòÅ¢“qD.º|q`œ¹QúŽ%Êò̋ž¾¾b2™,öôôMÓ, …²Üe#&]¬ž;/D£s¤ŒÄŽ Åbñôÿ¸¸ùœsê?P¡X¼õ„[‹===Å®ø(äP4´ãáØp­<==A.·ýÎr¦‡vÎífƒ¼–zQôS2œìŽÒâu¡(º¬d±¾öt‡ìjüÃJM©$[e]­ôyWQtÃ2+a¾´Ÿl±ÔýCq™)ÖÑKmσ¼xzé/ZlÒébäeëqC1;­`ù¼H¯—L)(ŸñÎ!fÆÆ E˜‰2®¨§­†aõb±8·gð4ÅŠÉÇD±ãjc Å«ú‹'¿íkU×ÇãeyBk"µIXPL†#n<W*.‡±8|DÛZ–…®ë˜o1yòÿ>ÉY…¦i¼ü¦—9«å,föÎäCæ‡X°lôÁb}1œä`ž1O¸@.PÅ.Þ[¥œj}ê .M\*Ö‡¶“qM:åla—©xËüi2„“Šm²°éøMêŠfkbýpiãð~%´Û毓OåI’ëW:Uñœ ‡BJϬH$B<Çq<Ï#•J‰¼yˆ²ãº®ã8Éd>åÀõ|¤vÜ×"ÚÎ=wèeL›o{ÛÛ„=6&E“b’²?}±äïRðØ;_eéªéûŸCMö_á°TÓ™Cô¯×2— ¯0Ä6&Aj=©dê}V)½äª=Œ¬ ͺlÝ&òÎ…BÓ5ϸn-êÍ1æy‰D‚B•ìÓÑhÔÞä{×uÉåre%š‰ÙRˆžã8hš†VšÁhSd&³ýÁÅð•EC¯˜Ÿ½þ3.8ìv·‹T*¥òÁ)šŽKÓð1w:£1b"ú×¢oL ú³B…Ê!úFÙKAN&g+Q«øÂþ ‡ö[ïú’Z‚  üçŒO²áɹDöôº¿$"ÔUÖ¾‘$b¥ÓÓI)Þ(~çHåiŠbYAYg§Vý…B¡Pì·÷öòQŽGEä)&îœ9,[¸°ê²ºÓ IÇð{Ťå€F7`(®ùåóØ}Ȱ¶Éd2ÄKI\âñ8ù|ž§û4­‘V!ü8ˆY’˜éØälGNe¡ð䯚‡’â ˜†g…áåû‰aôôôˆD’}czúÑ4­ª'Ïí€Ó ë¤Ói¿š§a¾XgÛ6–eaÛ6X–E*•Âó<Ç!‘HŒí—iëOžœ¡Wváýÿý~ú }´v¶ÖïM¨PŒžzìÉŽ¼p“XÈlÒ3Wºz…EyÛÈ0Òc¸²]Œ¬°²,ÈS Ùm‡=Ú<Ïóô€x¸J¥Ä×±í²¾,—Ë•-÷¡rm*&¯^ù:˜õæŸ;Žãó†¤rŒ!õ Ť¤é=â=xwÝë¶··ÓÊž,«ž¬\º2p'(UäƒÒk9ù“¥ïdåS9Ë4Bëˉ` {”5;š¦aš¦?!Íf³¸®+Â…MÇ3rÃ0ˆÇãxž7a¾[½üyÃ>æ¶lŽt=ÏóÐИ}ÉlnÝp+§¾åTÚ4²¢h.¼a#_=Öù³…=ƒÃE_$iu)Æ_¥®÷ɹmÛe»©TŠd2‰çy˜¦Igg'ù|]×Ñ<ì)*«›¥Ï,ÊïEYXV*W+–Uˆ¥³kx˜®R¬#ºÑuTà m+‡s²B²j‹ zˆ*Ôòö·³üÕW}YŠ)çyloiá¨ßÀþ£Ñ­Q(êÇv?:“wDެº¼n¸Ê'Ü5>+&M/ÄmÙzðzÝëËIQgg§(œ"¨Àé!ÞW†‹Ö3!ì¡©ð¿ßAÓ4ß»+< 6 ×uqÓ4‰ÅbÄãqr¹ù|Þí&4Û¶1÷å¡«½Æb1î较GÓòã#nµBQáÕWá‚ F¾1#Ï2ðI_e\ç$‹èâ#‘ù|Þô¥Ç­®ë¾è溮2ŸÏçË<ܲÙ,¹\Î䇚¦iX–…iš~¸¸ü/+pÉ÷Ò X rr¹¦id³Y{è!–ý´…·l>‚­ào'PÈ|›r;3T†S×uálíº iX¹ñhS×ùÞîÝlyøañ] —° 7Ñ~;…çA,ˆp$,œAà /-ù_ Kâö’Uxevy+¡õõÐ{)Š99-ç~ …c˜ÊªÅ²½2Õc,´ß4AÀA8œÜíO-È}§aØ"m®l¯|ŽÆ£<ƒˆ<6¡í5‚ ÉðùM› F8‘B¡?nøÕ¯˜þÃ÷YÌ^¡hFf½ôRÕÏ‘¸ÒçrÅ)¸IKÓ q|òuŽzcpÔëJo‚·ßôvº./M42ñ< òÉQkxÂì&m"äW5“ÉøËóyQBvZ2GZ&“YGÖ è:›fo¯ëm½jkÍ09…¢°¯{Œíç¿mÙ¡#ßIhöHï4)× ‰„ïa+ûŽt:M2™¬*n…û'Ã0üu*Ãïe®Ëðº#áÝË–ñ«_döÚµD¯¼rÀòjy!S©¦iú&r¹ÑhÔÏãyÚûßÏ Ÿù ‹/¦÷Ûß&ùåD¢C1Ž»èÑGyËwøi÷µ±¬± C•‘ÙáÚ$Òc,\œ#ì&½Ð¤÷Y¸iᢠÒ,_ZWz CÇêk…שÿ×3?®Ü¦PjKøîÑÙ¦rû|é;xÛU››Èb&CÍ[ò¡õWBœBÑüzë^xlé»Ý…bøìÜÞRÕyDŽS‡Ä!x'xš¦˜´4½pbÛÐ9âR©™L†t:Mgkg0Ò5(¯>’EÄ&T¢’!6 ~Fz³Ha.N£i«V­jts‡Í_~3W:£išþÃÃ9Æ<¦ÑMV(ªã8x»¦Óræ¡Ã¯Ú)+NK)ŠpsÉ2¨2ÀHaM×u\×%“H$0MÓÓd˜»Ü¦Ò·0Ý?ÌÂaLþãñ¸_|ʽñdØëˆŸ»®Ë/Gæ>ù$',\ȉ³gãy$©ÖVn{é%þaölŒÐ%i޳Óx ‚Ïïß~¤š¬E. ÁxÛCÜ ¥¢è~±v=´^ØKl(䯩Î@Óz®ñhÛA¸(üH‘ž} …bòÒµ{!¸ìÀF7£y‘Oe\„£IeÎr9—õ?$ƒ9˜Ø}Ž JÌ"ÈÕ+Ý–å~¥+öhÿHÈï3!6Pœ6­ª—Ëå<È­yÂ.ç< L14M/Ľôø¡vaÿ ëH¯)ß‹À&ˆKÈ"nnéñfרI„ŸLUª}¨d"ç;䣻é›ãÚ?ÛÕQ‡ãtîäTaÖá:MR`“bš¼‚.ÕÇÙ•W¸²~ɤ%œp®–ëžLŠX­(ÖPû†êIï Eùûá‡áãoãæ#Ý’!È æ–^‡û»N‚¢‚ÒMÚ -Ï <`âÇ'^ÚV&ìÌä'ù” !È/!å·è<åN/2'€l·^:N8¯¯Cð„JŠ…ùÐ÷‰ÏßÂgþA›«~^—SèÇ^!iz!îµØ.8qvðëeÌ4 ’I1Ñ’Ÿþ#þ\N|~·÷¹"ÎÄŒO§zì‰ ¬%þxžØVÓÄ1G[$ —0²½¥Ü@Ž+'[Cá8åí®u¼&ömî¹çžF7cؼzìû?j.D"$¥O ðhÅÔÃ]x¯]„u ½bЕ&ȶ.yU½ád6À÷rS-Ì=N:ˆF£<üðÃ,f¿["a™ôr]MÓ|/ºÊ¢Ÿ»ðB>~Aˆ´¦aŒñe2û‰sFGÇÞpƒyÂI­È¦\p‹‡^'F ¶æÿÕ­yBÂCéi!comÊUGy²RˆÉ tÿ‹”NR‚ ^7M0LäÍ‘"¾ŒEÍ"ÂE *݇+Rh>Yõ!ŠŠÑV(š„Ëï›ÉñsžƒK&™'û)‰ì›ä­ºÒýX8¹L±d!ú0Ñõ úÉ"á¥ÎÀÁ™|pAÅ2)ÈÉÄŸá§ðzC?¡‹äB}¤Ô&)ðè×Ã^z.åI?'A1¤çg̨ú°´n&Å @1š^ˆ;Èô8iñ2ñÆu!b”¦‰$.¹q×ÅÖuLY-UÞäŸr¡]ƒw»°PÂ\4tˆ™Œxü­iͦ‰cTæéÉdÄ:bRüÒõ£wߢiÁºr²èº¢-•Çhm(’ÉïeÛB€¬†m ïŠÊý¥R¢ ž'–išÈ@mA{#qL×…x¼ÜK#“Ÿ†h¯ñÂßeœ0M“SN9e\9ôW–¥« ,ˆ²$“â‡H1yyY‡ãÌa42Ü@ª:.b0&'¿Ñ•®³Ù¬_PÀó¼ÉQ°¥‚7}æ3üxÑ¢a qõÇÕu½,o^X°tMÓðl›ºànZ°€¯psÐAþØ>–JQHO'ž'~ûi‹!æ:ƒÏOîÃíyâ/lcáÏyüáP:O@ Ê¥Óâs‰a”Ó2t]üÙ¶åäëd²|¶‹•‹ñx°­$—ß#ìÑ2Lo¼™3gïû7Þ×_gÚÛ:k¯`#„‰©æ~¢˜p¢Ã©Ã ȉ±Uú“-!žZäóyß?=I YGÉ¥—^:®Ç¬!+Y{žÇâ¹s¹xï^~ðÊ+äçÏgÑwÒzÎ9´}ùË€ÐOï¼óNÞ~Î9“öyA­Ùrþ¤3p4bäï©aˆßÔðo²|à'2ùÀʶ9Nð».ŵhT¼¶¬àwX"î^ôàd¶{ÐíÂ)qøs:È?ó`§%Úù°û<¸Ð„ó;A7 -÷¤p3°Cƒ<øX©í»·Úð² ZÒ¡¹ðCLù°Ð+=ô³30Û.C¹®8/rÜeÁÃUÇåmÃ&•çM>¸=§´¾|8:Iû–ÁCÎýqØP(ÆŠ×?õ‰­ht3†GWM m ¼½¤¥eq†NÜþ©Ö˜˜^»2!êÂfü—Ÿ›¦YÝKÎAxö<¹³˜˜×[1bšZˆs]xýƃ ;5HÓH§ÓX–…¡bð(]b Téì 1¨¬³¢Q†U+[ÞPÕÂC‡C=£!)’tä„ b Z™“-Ç©&òÉóçN ‘a¤€)˱X l꺥·¡[Và(Ûi•þñ¸ø«v¬ Ƽ“wræ©Õ+Lær9Ž¿çxÎüꙪ<µ¢éù¹ †“5]'(# 5dá®»îâ÷r¹¦i’ËåH§Ó®ôˆi2?9HÔ€¥ÏÒºÎLÀš9“ÞwO¼ím¬Z±‚ùˆùÄfÏÙÁšÏ ~åOºCâF¦¼ñ!Gü¦ Áx$— J­ÊV>@«ÎLSüVVŠB–<ä {ÈW®öl—µ'øÝ”¿ß2<Ô+­û66èâÞ”y>YúÒЩ‹ßæÊ¡Q¸œlY8Ó0+[èzp^*Ç_ÕòƆ…;é ×ÕU{ VMh3M1¾˜À9gGŠëŠK7Á‡SŠIÊ=›61wùóÀanÊ@,Ä€ì¦t‚ðù4¥¤¬ˆ¾4j ådhüPL†‡ò£Q¡g‚±Û…YYÿÒ›pêý)B4µ°àœ^°n­*ɰšt:-:Ê å! •ÈÐ̉„ô@­ÑSµn½"¤¼ƒµE×ÅyN§ÅD$—¯å“m)À¹®˜ÄTËsgb²"ÃpÓ ÈöÏÍçПªº,rÈýCWV(Žçñ§¯¿Ž~Áaõ‹Æ2!z°Dî²X,†išœªêjÙ¯œŠQ'—Ëùׯ¾vÊ)üÓÎ|nþ|@ŒÏ[²„Õ÷ßß覎Ò÷ žØ/¾HbæL´3Ð_z köl!µ·‹ß3éi•Ë¿mñ¸øm”öù| üH #ø”¢R¥×ˆmó¶£i% CLúL,SŒ‰d¿BIô'…zyHLµŸ×ZáNÃyh9š„ÂŽ”)è '5á\.Èr¢P4ü¦•cfþ5ˆ€já~Q-eĶ,ÁÆ0ò9Ä`]öÄÆ(ê`sÎã°'ø<•JÇ«§XñSeä7UL)šZˆ{f~? ÎØ³NÒÇ!•Jù¡5~ø“CíÜMæ…PuTmZÂmŸûÁ¾“\&'(Ž©ˆD8zÞ¼F«asø[ÑOh{RHÖz5õ­h~‡¯¿9Ãy–!«c%Ár-þpÝ8æ˜cH$8ŽqxžWµC¸ ªÄó<Ç Üy¡ú\.G4õ+¬‚(æ +/Û¶®ë¸2?”½ïßó˦£fÓQGðö?ü_¼ÿý,ùú×9ôÐC¹÷Þ{9àþ‡SöícúôòiÙ_{{yéå—Ù{øá|ÿôÓ™ñç?sÊžÃißÖÎÿÎüïÚµ‹éóæñÄa‡±ð©§êû’M†xßšÎçS/”/ð¼àÁ}e?ê8¢/–9¸e)û^éq\JEIšà„,Ô"£¨bà——ËdõPZjxWK©3uoE‰=K_ãômÝ>çì-CŽ£¥ÿ²h‡bJÑÔBܽ=Å[~Ó_ý»Ët]'—OÈdÒß)rÒSÍûÎ0xÇÿw£[6lžýì1è½øÜu]<Ï#N\¹&+šÛfÑW:¸¬mà"Çq°, UO÷ýßñÝÿù.—o½œ'z‚ÙÎl¾ûÝïòÌ3ÏpòÉ'ûEä=ÉdüI²ü“ËÃb—$•J• c𦕉v‘HÀä‰ ˆd¹ßL&<Ô!›‰º®ûíÑuÝŸØËö‡ LDˢѨ/.¦r9r†á¯FËŽ+ÛcY¶mWÍñ6VTææ Ë…²]×\s ûö탳ηv%šn6‹õñŸ=[ˆp¥Dÿ…c-s`×Ñ$&*Çí2ø`Šo|#o|ï{ùò°xñbfïšMïk½tžÃ8Çàé§ŸæäWg3ãœsxàõ×ù¯5Úç~€?oÛÆÓëÖñÆ“N⑟ÿ¼Ñ&7"ñàµþ~>ù‘¿H¯bÛ¸ÕX†pñ ‹bá<’÷»%ïàÐ6kmÐ]¸È„»€.=¨êî!~èÂ-Ò@¼ÓË ,fS™J£4î¨*ØÕ*Ð%2uôà­TÎe¡ÁJdªŸðxÉuË÷]‰ÌùÞF¦rœÀ:ü ²Z1C™š¡Úu’}@8 Q¸}2wgé;],Ög8MÆÂ·låà‡_ }5×ÿ½«ŠK0¸ŠˆÃŠ)ES q÷_{}µz’{×u˽(äS ѱN`G2E Òiþ«¿Ÿw4ºåʊmÛ8Ž#&¼™ìS¡hOPž¿Å¶mr¹\ÙäÝ4M~{öo9é£'ñ“ŸüGs˜þìt>üá׌íííôôô”yÅÞWR3l`?‘"€ã8Äb1òù<–eñíGåCgœA&“!“ËåH¥R˜¦é #íííþ2)ˆèºN$ñ…»x<®ûÛ$ë(êSK䬗]»vñÚk¯ú9ol»Tgèø{°:;IÿÃ?]³FL4\7˜ Œ™LÆ· é âzüãÆäèçæñ£ç„GN wf/×q-÷⦅kÞu Ï´?C_ªO\3³d×)M×°ÿ¯äúÓ4âñ8–e¡i¶mû6bÛ6š¦F}Q9,ŠË6IïÓl6[óÞpÛ¶Éçó¾ÝJ¬^ªy¾Ö¢Zñ–jâàpY³fͨ^ãf •óßJÝØØ|Þ;o„ÃÿLô‡òÌ-aDÓÀ4±wî„eËࡇ°ZñÆíÛ™óê«,8ì0 ÓÄ9öX¬#ŽÀ›5 hݾ}ûö1ïé§éß¾W{ŒW_}•Ù6ðúI'1ëê«y­¥…¿õ-–®_ÀŽÖVŽØ¼™½'ŸÌÜÞ^æê:óŸy†Ù³g£_z)-72ýþûYðë_³ùmocéæÍÌ{ùež>ø`?óL¼£ŽB_¸×¶ùêÌÉ3grêæÍœ¸hmwÞɧMci4Êß~ç;¼éÇ?拳f¡·¶V?+Vy¨>ºú: €ä‡E±0oxƒøòÇÁÛs—sBÐ+¥\Á²:Œ |x;°x1tâWT´Žã<Я=´sátxè»g¶¼áó_·½Dë1Á›n4:tÁ;(v¤À›Cœ×N‚¼m—”D)«taöz)Ϭ".e2b›žž¡=E #(ÅœÏ J+û°°§ŸDF UóÈ–í ÷“®TŽ–BxØ‹0œƒTΛ.Å5ù¾–Km)¯÷ÆnàðSNaîâÅüáŽ;ØñÐCœSÊC‰Åüâ\±XŒ5Û¶qÄK/¥a€e±ìë_þyl¸ïþ#5”Gܤ©…¸­sæpLëCÀ‰eŸÛ¶M*•‰½Û ÊB'®7ìC)c‚wÏ=Ì8ò­À¬²Ï˪èŒà·[¡w‡-§ÍW¶m“ÉdˆF£$“I"ìyz»ÿº›ÿÏ‹ÇlX´aLE8œ¤R`Jp«ð´°÷lŸaD"Ú>õ)¢ŽC2™$™Lú¢œl§L¿/%Á—Þ>ÙlÃ0H¥RX–…ëºd2òù¼ï54”ç8‘HÄ?öHøÀ>ÀúÒdy¢âyðÙhoûá,fnÙBö'?Áxùå`…ÊÊß¶÷ü‚#RÄ’–ëº,8Ã0ÊŽYé™Zû# U¶[!°mðí%ûå­ ˜Ùw˜ç Èó)Sc¹ˆù`1”ž¶cÝ{÷Òuúé¾x1;Z[Ù3kó~˜|€¹×\ësæÐ¾cxî94ÓdÇ™gòÒ#À»ßí;uF£Q!¦YJh.CÛÏ<³¼ý€þìÄò9K—¯—, öC©ÑtTFÊ1—‹j®,-¸òJº:;Y6c'®XArˬrO•JœÒI’9剳 ¶“#m„×EP9ð@ô^À<é$žv¿éMþ¹-„„Ê|>¶[z8/E`šÜz÷ݼgìÏú¨Óóíè_ˆš!©å4¤hn!îÕ;YpàΟ۶-D8éÓnü°)5YÑD8sæpÐÞ½ÕæðŸn*Íέ»Oc~d Â%¥pT)ähš?†ç¿ñæõôÐ7opoص‹½½¼ð³ŸðöŽNíí B‡5 SÓ0‰ªíH$,Û¶ ˲( ÃK÷+s¦Tki9: Vz6c™LF„ÿ˨³ô礽‘ïM‚ÜŒò¿ÌÉ(+i¶S©#k¥¶IÁ.YÚ&zDÌe“té…—FxzÉçJò{ðëÛ~Í;&^ÌÏ?rW͹MLÃíËÂå­Äù–]e=»შF sbB¹øýO$D£ÑšytÃÒ2Ì?•JùûŠD"¤Óiÿa_8 ”¼Ë `À8E2ä}ošPòÔ–ß#—Ëqà6úTgýz^Ùõ¦òÏJc¼A£ÔüoÊÓÔBܾƒbv)¹i?<ÁCüPå(OT¬P4  JøV,#ïä•Ý*& wN_Á±¥AC­ –mÛ\ù¯ÄnµY|ÒbøêáHä x8T)œÉA§Ói4Mós‚™¦é t€Ÿ+“É`Y–ÙÕÕåçÒ‹Åbd³Ù¹ó&;7ühïo€IDATxïýì#üdÖiå^6X–E,óPMÓü‰IÙ„'‚˜ J‡…saž>O¼î,-× ’ƒ˜t+£@âí o}ÓΣWRx³wíbíÃEvuìbÏwpÔsÏqörÙòå˜e…iŒ«¯Q_F}oÝa{,JAª–—Â/T“jÑä¶Ø§ëׂ® pîãï¢Äí8\bà\\Æí@ œC€+À˜ ^¬Oƒ;ô¿×ÍïÀ쳫”ò*Ñ6¸Qá¡·Áëë›àÜš8+ÙØq0KiÍ2± ’гÀ8¼¯€¶Ò¥ÔdáÚº÷2j¶4ž¼òìtæÿö+Ã+"+™f‡¬²ÌW)…±ðk¿ï–eaY–ï/½¯¡¶ø•L&ý{6,ÖÉ”2%¼/+ø…_‡SŒÕÞʶd³YV­ZÕ «2r¼éÓá…×ËÆ¾¸_ U!UA“ qÏ?ûNü›ÍeŸ…U} ÄÓ ÁS&…¢‰Øp춤(Ïpï{¿Lù®bð|_‘þ@±ÚËó<¾ú¯ò—_ý…Ó9]õÉ5ÞSC…þ…Ñ4­l`¬i]]]þ ;¼,N“ËåüÁ}>Ÿ÷Ã\+Ôííí~Á ™˜¿§§'w)]gYÙrR„f287=JÿÞëøÇןN+[,óÊücš¦ÑÓÓSý»K7¹(œ“+Ü¿W«\ªPŒÞ%Wàüõ*ò;˽ԤƒØí/¿Ì¶ 8ð =̾å¾Üò8Ñ’¸f¿'ÜŽd‘›jË}8Ïb–/DyAj+s/˜9pLÈ]ÆÙ‚}d^ïHðÚAŸÌ€èÉ¿]8Â嶃ù9Hž ö!+‚óE£ÜE`<\ŠTÌ‚ƒŒVz¯—rÎ{ Ÿ-7Ã)³Ìš/Ón9IÐ<ñ:2ˆ‰m¢—•çñ×uñyø}8ÊPæý—¯ÑƒÜøñxAÿoÿö1·³ÑÆú£èúÛ†·‘…°£gc³!‚I¡,,ŒÉßW)¾É°þð=Sé ¯I­FxûZ¢Ú¤ø o6zˆƒßpÐâ4äõR©4¹·ïÀY|L¹7‘L\–´wè¾I¡hÛgïã°%›uå³B1Qø±›Hý¿TÕ¤éØ5£bP<³Ñ­mnd8ãþNzkm_9€—ŸUÒUªú–Édü@P‰/ŠL§Kÿ#þbŠ)I"AçÏ¿Bò_çB늚ç1{Û6pvë[üìß@»ÈÄé€xüÌý:dM"àõÀ9ç|sÏ -«6ë€û0> ,'×…D"TŒ1”O^j¦Y*&f‚‘£ÌO90ƒùSDHè#¡ŠB(ÏBT‡¨Ì' ˜×Š?+Þ|ˆ_]þUŒBùôBÒ ¨dLÃ^"‹Ë†ß•ò¬2e©Æ€uÂT¦/‹Fá®›ë:FüaËÎÞÔ)Ä9Ç‹(£êMìy–eùcóT*å{—ÉBKµ{´…Ã9åCI-aL‰dŸ-ÏÏaÏp‹WM0/NÅØÐÔBÜÞ·Ï,Ëêb~§•"Èã P4!¯uNcÉÇ•/ù •í*&¯´¶Ò·umMqbí‡Ö²òꕪüz il0F)Q²¬Ú*sÊ% ?FN,"‘¦iòÝï~wBOÖ­\Ê—ß'^K@ù¯Ò3aĬ<œ‹HÆþ)ãŒÝžÀüȘq‘©å+{÷b|ñ‹ìþë_yÇ‘GŠõ‡ò׎þÃ@„Y:Žø/‘…Óip–@êoÁüÛ‹e (¿ðp4 Ü \$ÞKB×K'=À­TD‹ '›¦MI Ëâ:âÞ{k¨Áòž ‹4á×UºÝèp<·'n—×”¬ùf?G¼eA}+»‘P£pd¡£l6ëW— –¿Uæ2“sѰw[84´²˜bòrgÇ9òÆ'QIÙó)óóœ4Ï\\¡”]ÇÏà€ßþ£F8åW” 2ÈÜŠšTõ(l ¦iú“ Ã0è+õY•yeÕÖH$²eË&¬—ÎMfšJ¥üï[3'–EPVRNäPcECHd—qaRxÁ¹ÀQgŸMòŸÿyØb¿s;Øy°·•„1„§½P¸0.„4× <¸Œ³ z@®ë%/7yÚÎ"(v JR¥÷Fé/Š»ã¥/CTÁ,yËåùu÷lsu­ŠÁð<6ýª…+>òRíu\ÄC™®À~ á¼k2ç§$ü»ŽZ1 £L|“9B«m7Uò«*Êye^‘Cp¥þgƒF(g E‰¦âvïÞ]öÞÏW“A<áR“=E“óÇË÷Ðþ–cý÷®ë¢»º˜Ô©üYŠ ‚,Ï‚Ýn×L>;ÿsóÝLÅ~PM¬&´†A2™dÉ’%ôöö6ºÙÃÇq¸Æ1øm?/ž¦ieaCUÑ‚ÔN‡¡Òd(@¡1.\»–_]s Ùl¶¦'ŽçA,&¶±íàOÓ€^0ÚÚ‚B ±X,x<1IShÛºuàÀ·ÓåžSïQ ¦Éd²¬_›0¸.·_{ý9‘?˲,Òéôàá#R(0bA*¤DÑ4Üî nþÃó«k®¡P( : ;ìèºðd+JçBúG`hˆþÜ"ÈÇU@x´Ùˆ1¸G† Gà¹/­#s´µ„˜Ê‰§^ÚŸŽ(bR@Œé㨉éT¶ٓ$/ZaKIoÉ:‘ù>AäG ‹h#ñZ›4…Šc‹3f”½·m»,ÔÙ'…°iåL¤šXˆ{ö‡¿çèÞöß—©Ëª/TLæ¦vpÆ[Ÿóßwe»Ä`4²cÅ„áNîøÐžß{ã½Ì\0““/89ð„P(š×å®sÎáâ´ð†“Þe3!>¤Jï ê³íz×S(FƒH„L¶ `Ø6Û/½tðÐjðá¤S³â4aﲸ†ðî”Ý}4ô™ôNK!1p‹p][ªƒX,æçO&“~Ø© UŒ5ÿûÕÙ¢/¬……ð,Ñ/Ê>Q1åiZ!N›×Å1ÿ·Ç¯ëºâT"pÅâµda)ÄÚ:¢Â—Œ-÷ßOËžÇqÿå`f/šÍ!—¢ró(šÛ¦ë¬cðî¼×u«çpI$„§ô¿Þ'×j|¢4ïíhåÚ¯=“J -Âð0êÝAi2O‘œzAõÒÿ,åžHÒ£-Yú³Jÿ{Bë„ÃW5&…©ëº¾Øåyž/ø€ð~ñOq.çõ,Ëò—Ù¶íGõxžçç¥H$þ6±XÌ_/—Ë‘H$$_D"¿=©Tªº÷M“óÃgW0{cËÀ9D¿[Çë𹓅v@‰oŠqĶyå€8çœÿ#Ó4Ë=(-ÄÜ/ŠèS”Ñ´BÜ#ºÎ±›7ûïý-™(Y¡˜Ÿ÷Ã(ãñ¸Ÿ“Õ0Œ²ü¬aáu°œ€ÍÌ3ûñ¶™UòŒÆ©iSžç•õÑhÔ?¯*tTÑ(Žû»ÇY¸P8]„Ó|¤·±BQAÓ qYÞS­þû¥-KÅÓ:ùÄN¡hrŽ~þyöÜ1Ë€òñ¤O¡˜`<¨ëœ4w®ÿ¾ò‰¾BÑìÌ}ðArs/e×-,«ˆ.‡©¨Ð 9`X¨ôÌ‘‚Bxb`Y–/N8ŽS&N´··—‰UaA"¼!\×¥µµµê13™LY{ä~ËòýV´-ìEàWg`âëJCnÇý¢#•Û„+§ÓiÑh”C9¤1o¤X—nz›>õ)òùüÐ"L!¾… !Èʦ2ttœq]×·‰°xæ8ŽokžçÑÙÙéoSövÃ×½Ò°áó¶§p޽t:íÛ`¸²´®ëeÅm*«mJ; ‹Dáÿ•Éþ Ør\G?ÿ<;··ðѳBq²[ÂÃ"¿*š h4îöíìÛ·Ïï7Ç衪4bE šVˆë)¾‘K–ûï/ì¿P(ÊQT%Å„à½{9èˆPYvY%G¡˜`<‘ÓYr`yUTÓ4……+&¯>û,î³o`«“(ÄIO¡"#_ âŦ2¬M¾®ôô¨ôà’ˉD™(öÒ oN.«3 ÃÏdF™ˆÐÓÓS&V… )B„·Ñu¾¾ R\Ø3'N—GžóJÏžt:]&¾……‹°7Q-!N×õª"F½• Ãà°ÃáEo®Ë…úOvßµydm.Fàá)ÃIåi’¹ÜF‰J›?¤ Ûqgg§/ΆC8Ã×ÝOCƒ¸†áB*•bª\/ú×:l+Õ¯jD£Ñ)'„5šCöî…^®‰KÍÕžçÑÞ.lišF2™T×KÑ4üù¹Wxúïöm2™L–÷56JˆSÔ¤i…¸—6̦sî åƒKÕï*&/,Xà¿N¥R<|àÃBHV(&G=þ8Ï?}ïÿiþgZNô•§˜8üå°Ã(®{•Õ«ßLâbEFHÕ0ÊÃ,Ã!“aѶí²·ðSôJÑA YÙl¶ª(V)Z„Å…°—NXôïÛæ—»V0mý5ÕsÊ .Љ°q!` ö\³,Ë[–UfÓáΰM…í¸««ËÆÂ^‰º®×ôSö99ùñïaºæ}±Æ€<³òƒ¦iôôô ï Å8ñÛçfNû ü¡—‰áW(ªÐ´BÜžƒæýïžWê¨Ó¨øjÅ„¢ûõ“8ä%áFYzÖRå=¤˜ÝÀ› “ÉÐ{_¯*½®˜Pܾ÷-8ÿ™ro8ƒañ<¯,ï”eYþÀ»µµÕ-Â9¡Â!“áKÓ4Ë„´°wšBQ ÷™gØ÷›i\xáü^A.¢ØÏ>Dèi\&“ñm7‰øÂšmÛe6-½£Ñh™MK/ǰ}+µèÞ·ˆ%ñ6Ze|<«Á*¦mšÁ‚—_Ê=}Šz˜ÞèÔâ•GBÓf•…'(‰'ÑØwÜ. ÍUs”Gœb‚ñˆ¦sȼ>4íH@LƉ¢<”Š?ì=Y3þM *›U†çyž‡ëº†áç!ÒuÝÏG%ņ°³ ‡iNÄäéŠægÕ3gS|ßøâ?]¾@`( ¾½mÛ¾ÇYgg'ù|Þ÷’”v](; ‹jj,®-f;ï=f;Љ !C†WõøT(šŒ-;Z9|ÙSÕ¦%PqŠšŒ›×ßßÏÚµkÙµkË—/gÅŠƒ®ð’~\W ŠM݆\¨ïX ÅX0\žîº¼±øFR©ÿ|Ê?s¨s¨â g¸vÜóT+‡,~Bœi(·NEã®ϼg§”Œbâe¹[lÛÆq à —Ë¡iñx¼LŒŸ)£Ápíø¾Næ}GÿM«X/J™—² •Û¥¸fÛ¶_ «+(“*=ÝŠ‘0\;noŠK.9S¼ q _Eã®<¿ó0.>âåê ¥m+5·ÐÔT*Eoo/e¥ãkqà¶m8Ž#Ü’mTXª¢á ׆›ù(+Ž¿MÓ8ôñC•‘¢)®¯?ô­zàþ„Q…O¡h õã¹ûîⳟ=)ø 8"$Oæº ‡Û%“I%¸)ÆœáÚñ«·ìä›_òÈY\yåùâé^ •NÑpFbÇ}÷sλw¢ëg‰P %*+ÊHìø“Ÿ<ÜM¥R|àpŠyŠ R4Œ‘Øqæê_¡ëø«MÇ䥗^âý/ñxÓ4몫PŒ#±ã0º¦sìûŽmô×PLqFjÇÓÖo÷ód–‰È6AÅj…¢ãâ·nÝ:::‚¼,+V¬`ݺuƒnsØÎB¼hGq<âV­Z5þ­`ݺu¬Y³¦ÑÍ`Íš5C^³ÉÌHlàÁ¯?(ª-Ò+û)§îéF2;¾ì”{HmXкð†ç9ž²ŸælG£‰¿õ·ÒgôA¾øû/rʦSÆ}\Ñ,v¬ÚьĎ/Ùp¯¼óˆÀ çœÀ²ä2HÁ‰ç8nœÍÒÿ¨v4#±ãþþ~r‘D ec ‡žyhCÚ­ÆÇÊŽ‡gÇ6lÀq~µÙñÔ4[ºEgéÏbJ…¥Nuû #m£ÆÅ#n×®],Z´ÈßÖÖ6èúùË_8§ë~ðÖðàÙB?Ѐë»aƆÖÎ;éïï§»»»¡íØ´i<ðsçÎmÈñ_zé%6mÚÄK¥*¤ãÍpmàùuÏ3oÓ<®;ó:6œ½¾9þíVöSN£ïiiǯ½öZCŽ?\;^¿~=Oßò4·î¾•mçmãå™/Ãà?KㄲŸæjÇsÏ=ÇsÏ=ÇGÁêÕ«ÇýøÃµã{ï½—ï?ø}^x0'-)…§¾¸j|ÛÝ,v¬Ú°aÃöìÙPW> Ñd$c㋞¸ˆ×–¾ÆßøFѬ`\ûäF÷?ªåLÄñqÿÃý¶õ0Òg¥yvɳãÞƒWÒh;~î¹çèëë›0ãã 6ðØcqÁ-ðì!ϲ¾m=ϾáY6¼mƒ+CCæ}¢ÑöÓ,lÚ´‰¾¾>>øàºÖoʪ©÷Üsÿúïø»F7G¡·?z;gsv£›¢PŒˆÕ«WÃøë, ŨbYV£› Pì7á±±B1‘‘ãc…b¢òõ¯½ìý[yk£›¤˜€ŒKhjGGGÙ‡îînZZZýÝŠºQ6¬˜ (;VL”+&ÊŽ“eÇŠÉ€²cE#W!®¿¿T"YÅDBÙ°b2 ìX1Pv¬˜ (;VL”+&ÊŽàÀ/}éK_ëƒHEùꫯÆu]î¼óN¾ò•¯(¥Y1aP6¬˜ (;VL”+&ÊŽ“eÇŠÉ€²cE#˜V,‹ãu°ÞÞ^6oÞLGG‡2lÅ„DÙ°b2 ìX1Pv¬˜ (;VL”+&ÊŽãɸ q …B¡P( …B¡P( ÅTe\rÄ) …B¡P( …B¡P(SqÉ×lô÷÷“ËåX¿~=½½½tttøŸÿä'?á¾ûî`Ñ¢EeÛÔZ6mY±bE]Ç‹v¬]»Û¶ËÎÅ`Ç«s¡¨ŸZ6,—5³U”O”W?¾ê'ÊŽ«_õÇ 5>®~|eÇ£ÇT:7ͦ™4 £©ÝLoô—oº»»iiiaõêÕ,_¾œ³Î:‹îîn6oÞÌõ×_ã[»v-¦iºl°mÛ/“nßxµ£··—B¡Àí·ß…¶P( ÚŽ¶¶¶19Šú©eÃrY3ÛñXزã‰ÉHìx,¯[³Úq#îiEý(;.GõÇeÇå¨þxb¢ÆÇå(;]¦Ú¹i&ͤYmífÊyÄ-Z´ˆ«®ºÊ¿k×.Ö­[Wæ¾¼bÅ Ö­[7䲑ÒßßÏu×]WÖ–ñn‡Ü_oo¯¿ŸK.¹dÐcŹP Z6 ÍoÇcÑeÇ“‘ØñX]·f¶ãñ¾§ÃCÙq9ª?ž˜(;.GõÇ5>.GÙñè2ÕÎM³h&ÍÂXh7SÎ#®­­¶¶6@<)H&“\rÉ%ìÚµ«,fW® ºl¤¤R)®ºêª¥‘dz½½½ôöörÙe—ÑÑÑÁúõëY½z5+W®¬y¬±8ŠáQˆa|í†oÇcÑeÇ“‘ØñX]·f¶ãñ¾§ÃCÙq9ª?ž˜(;.GõÇ5>.GÙñè2ÕÎM³h&ÍÂXh7SNˆ¡hÞtÓM¬]»–«®º Ó4Ër˜Œ57Þx#eIþy.¤Ërww7«V­båÊ•n–bªÙðx£ìX±¿(;x>”O<”<ÊŽ'ÊŽžeÇeÇχ²cÅHi´fÒ,ŒÕ==åBSV­ZE?·Ýv›ßAwttÐÝÝí¯#㢇Z6Ö¯_Ï7Þˆa†€a¾ûâxµ£­­­L¥íèèðãžkk´Û ÕlšßŽÇÂ~”O\†kÇcqݚݎÇóžVŒ eǪ?ž¸(;PýñÄE”.SñÜ4Z3iÆJ»™rqk×®¥¥¥e@|¯ïŸÛZí÷<Ïóñ„%úç¿Ú9Ñ4 MÓêÞf2QËŽ³a¹kš¶_6 åvlưm¸²­ãaǃÙd-;¶mMÓH§ÓhšVs<•íx¤}10â>vûbØ;-®Ü×Hìx°ß’‘ôÇòÜY–åo;Ù™ˆc ˜:v\«?6MsÀù˜ª6,¯ÁD³ã©26–Ëê駺Ës5ÕÇÇ·ß~;§Ÿ~ú˜öÇ#±e5>®5>nîqÅDÑ+šÎ#Îó<"‘¶mãº.±X¬ly$Áq\×¥³³ÓWéc±¹\Îß>Ú‹Åü§«±XÌ߇ã8¾+i½ëÕ"lØò}"‘ ‹áy™Lfȧr›H$‚ëºC¾l?õ|—ðM(ÛL&m¿Ü·¼.ò8•är9‰š¦Õ½Ídb0;®eÃò܌Ԏ÷׆åq@ØÆHlXîc<íx0ûªõä%ü}«ýxMe;i_,ÏM¥W_,÷#·ãѲáá|ŸZö5XûGÒËk›J¥Èf³ ²®ñc¢Ž)ä>`òÛq=ýñT¶aù}'¢O•±qxÿƒõÇSÝŽåwVãã<ðÀ˜÷Ç#±e5>5>nþqÅ„Ñ+ŠMF:.F£Qÿ}6›-Êff³Ù¢išþ²d2Y4M³˜Ï狆aøŸ÷ôô5M+‹Åb¡Pð_Ëm’ɤÿ^î»Þõ#¼N¡P(ž¾>ÿ}¸íÕÛôôôÔõ~°ýÔó]’ÉdÑ0Œ¢®ëÅh4ZÔ4­˜Ífm¿Ü·ü¼§§§ìš¦YÌf³EÃ0ʶ­µÍd¥–ײáb±¸ßv<6^o$6Þn¼ìx(›¬õ’Éd(EMÓÊÚ§ìxd}q±Xێdz/®Ü_£úâá|ŸZö5XûGÒ‹E¿/Îùœ¨Lä1Eåþ&«ËýUöÇʆ&²O…±qxÿÕl_Ùq€ã÷ocÝÔ–ÕøxpÔø¸ùÇE¯hºÐTéÖ( ‡&Tºvš¦é«¦š¦•©–a7ïzÝ GÛýÐ4Mÿ©W½†Qöäa¨÷ûû]<Ïó]–¥ú,ÝHkµß0 ÿs]×1 £ìi§ã8èº^¶m­m*CO& µì¸– Ës·¿vÜ 6,Û1žv<˜MVû¶mûîúÒ¥9‹ÑÕÕ(;–çh¸}±n„7Gœ¤šqÈR¿Ò˜s¹\™R9Ú +q]·Ìý´’t:=è«gûÑlK½ŒÄÐ4Mö <’m&:•ßw(†±µãzmx<ìx4mFf_#9×SÍŽGÒƒ²ãáîG¢úÖ±a")¦Šõùž Ld;ÞŸíG³-õ2ÒI˜êëC÷¯-Ãa$¶¬ÆÇC£ÆÇûß–á0^ããñ´ã¦õˆ«…ã8~\¶tñTŒ®ëbÛ¶_qg¬¶™Ì(]ÆË¾”—£ìxtQ}kcPv<º(›l ÊŽ²ýýGÙñÄDÙ~9ÊŽG—ñ7ÂŽGUˆ[»v-½½½´µµ±råJÿóþþ~Ö®]Ë®]»X¾|9+V¬·/¨P eÇŠÉ@5;V6¬˜H¨¾X1Pv¬˜,¨q…b2 ìXÑ,ø¥/}éK£±£5kÖÐÝÝišÜtÓMôööúF|ÅW°téR¾øÅ/²hÑ¢);®˜x(;VLjÙ±²aÅDAõÅŠÉ€²cÅdA+“eÇŠfbTrÄõööR(¸ýöÛX´h…B€îîn6oÞÌõ×_Šó`nšŸÿüç9üðÃ}nxâ‰'8î¸ãÚ†]»v±k×.Ž<òȆ¶ã¯ý+sæÌaΜ9 mÇO<Áš5kÆdߣmÇ«W¯VöS¢™ì§Ñ×à¹çžã«_ýê˜ì»–ĆÇáç?ÿ¹²ŸÍb?ÍÐŽ]»vÑÖÖÆÅ_<êûí¾X¶m?ÍbǪÛqþùçzÞ56VíïvŒ÷øx$v j|¦YúÁf±ã‰2>þå/ÉÚµk9ñÄ}Êš‚f±Ÿf`ëÖ­Ìš5‹þçrÝQâÖ­[GGG½½½lÞ¼™E‹qÉ%—”-“¬X±‚«¯¾zÐý=ûì³¼ÿýïoà)<ðÀ,_¾¼¡mذa6lhx;n½õVÚÚÚ8á„ÚŽx`Ìö=ÚvÜ×××ðë¦ì§œf¸§ÿÇ~,¨eÇ7Þxã°m¸¿¿ áçLÙOóµcÆ ôööŽÉ¾G»/–!(¶Ÿf±cÕŽí}Ýh¢ÆÆªãÝŽ±b4Ç ÆÇaš¥l;ž(ããgŸ}¶)ì¸Yhûi …=öX]뎚G\oo/—]v¬_¿žÕ«W³råJvíÚÅ¢E‹üuÛÚÚ†Ü___7Ýt«W¯.»1Æ›~ðƒ _´h'œpBÃÛ!ÛRÏõ º»»Y³f [¶l³cŒ¶oÙ²…›nºiLT†BÙO9¾§¥÷õõÙ1jÙñHlxûöí<úè£ìܹ³¡v Ê~š©k×®å׿þ5­­­c²ÿ±è‹ÿò—¿ÐßßÏêÕ«vÞ 9ìXµ#`ÕªUlÚ´‰eË–ú¾ÇjllšfY®¹ñ¦ÑýjG9ÄŽA«µ¥Ñýq£íxíڵض=aÆÇpýýýþ>¦:¶ŸfaÍš5üùÏfæÌ™u­?*BÏ éêÙÝÝͪU«Fé:Б´µµ5¼s~suttpýõ׳jÕª1=ÎhÚñ 'œÐp;VöSN£ïé‰fÇG}4ïyÏ{>ÈPöÓ\íX¹r%mmm¬_¿~ÌŽ1š}ñqÇ×É—}|ÕŽ\ýõ¬Y³†£>zLö¯ÆÆªcÍDW€‡i†~oÇ+W®dåʕƎO<ñDvïÞÝðñq³ÐhûiV¯^ÍòåËë0mkk+S’;::|7ÿŽŽº»»ýeÝÝÝ´´´4ú<)Pv¬˜ Ô²ceʉ‚ê‹“eÇŠÉ‚W(&ÊŽÍÆ¨q+V¬`óæÍþcݺuþSiÜr™mÛC&ñT(²cÅd –+VLT_¬˜ (;VLÔ¸B1Pv¬h6F%4µ­­H$ÂE]Ä¢E‹Ø¼y3Ÿþô§ýe_|1]t+V¬À¶mn¾ùæFo…bÊŽ“Zv¬lX1QP}±b2 ìX1YPã Åd@Ù±¢Ùµq2íæÍ›éèè(sé¼ä’KˆD"lÞ¼™Õ«W+wOEÓ¢ìX1¨eÇʆÕ+&ÊŽ“5®PL”+š‰Qâ`ðÄ—Í’S¡ eÇŠÉ@-[U6¬˜(¨¾X1Pv¬˜,¨q…b2 ìXÑ,ŒJŽ8…B¡P( …B¡P( …B18JˆS( …B¡P( …B¡PL(\Wü&ž¶=¶íVBœB¡P( …B¡P( …bB‹‰?I$‰Dõu=oèý¹.tv-Äí¯ø§„8…B¡P( …B¡P( EÓã8Bp‹DÀ4Å_.'–%“Í ¡L o¶-Ä5Ëï;;Å>$©”Ø—ç‰uâqH§ƒåa¡Ïu¡½=8ž$—ƒÞÞ™u‡Q-Ö P( …B¡P( …B¡P …ã€aïS)!ˆ¥Ó iB K§…˜&?w]!¸áÂÈ÷¹èºÕt]ˆsò8Ù¬×zzÄût:é, òùò¶hš8¾iŠ}uu‰Ï üR)H$ærä‘õ}oå§P( …B¡P( …B¡Ž30äÓq„0bY{»xË aM†u&â}&l›L ±«³SìC×…€–LÛE£'\-LS´Ã¶ƒ}H Cì#• ¼Ý¢Ñ`¹®ûˆÇÅú¶xÑI‚ÐØd|pnÝçM q£@=±Æ …B¡P( …B¡P(‰L¦<”S’Ë *ìM–Ë áʲ„p–H1 ÄéÉ&=ÜzzÊõMëäóbr}<ÕêÁ4Å1Â"_˜d2ðš“XV¹¸gšâ»˜¦XV-÷\W—hW2 G¹§îö)!®‚TJ\°á$ßËdÄEô<¡èVÆ ‡±íÁÿ9Nu#¯Ä’Ö^w´Ë†(£Œã8d2"‘HÙç^©v]§ŽAÙºB¡P( …B¡PŒ–U.Xe2îÑÕm©T d%“«M kޙɈí4M„†zžõäôÎ0„H>n˜:$4Mxºy@¦Ê²lÞ‡â¯Ç„D²|=HéÁwª†ô¶ûä'7Õ}N•B*œ‰D êVÊ¤àæºâä·¶ ’Êp5áæ›·rÓM÷¡i}¾¡yžÇÂ…½¸.üèG{øÑ„’êyŽã`YŽ™L†X,Æ´iÓˆÅbxž‡çy¸®K*•"‰J¥èììÄqlÛÆq_ôd2‰„1#Âób±Žã`”9èºN6›õgÛ6†aà8º®û"_2™¤½½Ó4ýï“J¥ð<®®®ªß=‹aš¦—x …B¡P( …b?±m»î9¸• ±(Œ/°2ÜòÆœ[ʃöMÞ›…Ÿ'à)ø÷´¬þýõ×ùðÙ›(ñ·w"V-âq!–9¥õ4„ ¶Ï€ó²bY0G‡®¼xä ,…XϤ›… ¤KË*¿»U:/QೆØ._:~gébZx{“@´3BûË—Že––E€¾ÐváõëeJ qáðP)¬E£Aeް.ÑÙ)D»®._lsÿý§“N¿´BÐÐu‹L&‡çűm›W^9„å˯ãÚkàˆ#n#Ÿ_D"Ñišd³ib±>ø$K–œÂO~b¡ëzÉ‹m?û™Ý{ÛÛîåôÓOàw¿;’?þñü«XÖAÄbN›¾¸ÑÕÕ…®ë¸®K{{;'Ÿü¶o_ÌÏþ Î<ó(?Ðó<’É$Éd’D"A&“ჼ†ùóŸ!›ÍbÛ6¹\ŽxP( …B¡P(ÃÆq¾ô¥Öâdeºô>ƒ‹1NÎB¤à¤!„©ðÞ2@xÖU)]uÕ3\}õ18Žp&:?ß肇mpMXšIñ…TŒ/D—ñä¬-,ÿèBŽøÑW¹6çïKm”3ªBh¿RF‘™º¢a+Zj¯^jGTƒ”_(}Ï8#õжçKÇì,-Ë–öÓÉ@1Ð -‡@x£´ŸX©Í•çH’.}·téûž‡‘Ë¡]qöAa—>wJÇNíÝ »v‘tÎØ¸±n;˜rBœñ§ëâ½ë–'þ‹ÇE̳t£,ĺ×^û³f]Ioïa|ãÒi—\Nç†6ÒߟÃ0 ß#MÓ\n¸á$.Ô8ñÄ3øÓŸÞËSO}SO=e"„¼³ÙV\÷œqÆ6Òé4ŽãÐÙùqr¹?£ë•*„˜$ æÍ;ËÒKÕE„Î|ÿým$“ðÊ+GUùþŽï%½ÚÛ!ù†Ög†ïéäy7ß|'¿ýíyßûŽçþçqN>ù=œ}ö|LÓT‚D‰Åbäóù× —Ë•‰^ÒóM†'§Óiß‹LÚ«®ë¤R)_„ l£Çqð@ôÊ×™1S×õa{^ Ñ:[æ±ç8NÝûÑ4t:M&“ñ…e€ÎÎN_˜“Þ¤¦iútás_•a²JV( …B¡P(#aýú×xè¡LÍå9.9‰‡–¥Cëd „;9CJ Ä(­´]ª´ü/¾È™¯¼‚ãh|ç;ᬳ6’N›ä€ö’Frt¾pÄK|;eãY‡¿ÿv~ó¥7sÚ–o°¾ðCÏKâòËqKǶKû—â”å2¥÷9„Ø/ý—ï)­ãO³,å!¤ni_rÿaAQŠfa1ÐAˆgd2b>šK¥ã'J¯Ëf•¥$u¦®“þÖ·Èz(^?ñ/|fÏ&ú…/`~:ÎÛßN\Ó°k÷n ï{ÚWÐyúél˜?Ÿ7Þ]v0儸ÎN!ªÉy´aN†ÏÉ øC%°¬yD£²ê‡Å¶m LÓàç?Oó®wý”\îa¢Ñ4[¶ÜÁÖ­=eÇ çÇÒuÛ¶‰DnäóŸ¡pµ¼€æÍëã?ÿó@Î:K+µÑ ™ŒrÉ%³á¹—JÁÞ½wðì³märA(­atÏ„×_¥f!ÃÃÄãpë­+xå•ÿç·ëž{þ™LæÖ®]˦MÇsÈ!%—‹2þyé%›\NµÈÈA±ßär92™ ®ëðŠ“Rx {§IAKºB§Ói?¯ ô:«Úºººð<ϯF*@4ü9ÜÓ4ý°Øáw ½Çýe2¢¨¥ð&=ûÂÅ$ä²Ê0[…B¡P( …B¡¨‡Í›7³{÷\7(Lž®H±Jz‡Iϸ—oº‰ÿúð‡1gÌ ‡Ÿ¤Ä”AˆaB˜;ôïÄúÝïpKï?ÿÈ#\øÐC\|ÿýÌ\ú÷Ì~ÏoùÞÓ'ø"ÞÏ“ð®N˜•„—­ûŽ®û#žN¦YLf6K,ãâË/'JW-†ð4³J¯)µ?Zúr6§ˆprž§™¢à\ÊŲÊðOÈìÞõúëDgÏÆ*—\NœÜHDœÜxÜ€â¥?Çq‚* ¥ª›kçÏç]Ó¦a~üã˜ù<|ö³"tÒ4‰©W^A{üqâšF;}äôO~Î;.à3{ö0ýðÃ벃I'Ä9N^Z™úÊó„È•L¶–}.ÉË|Vñ¸Ëš5_FÓÆj,Ë"™Œ£ë:†apá…9r9\Þô¦ÚQ)XHáC†zF[ÛWXºôH #0;†'F:H&E\w,&Å®®òã†íËq_h„ ªI<ßüæIlßîò™Ï¼ƒ÷¿ÿMtt¼Ìyç=ÎÆóæÏߨJ_ŸË¯ÝÇI'­ä _X­·ö ¨’H$Ðu½L¤Å$4MÃ¶íº½¨Õ±,‹O<ƒÍ›ÿ•ÛoïEÓDÞ´T*…mÛ¾÷W.—óÃ+eÎ6™#PŠQÉdÒ÷”«f›Ò{2NHLs]—lv¤Y Ɔ°¸§ëºo—Ò#ζm4MóCm¥Ø)ÃVå}$m\žãðù ‡ö* …B¡P( À sŸ„3ĶOÁ¶ËsÓ[Àé{÷bvnWQ„Øtà ë¹bÕ%œcšÄÚÚ|.“šÓ÷ºà³¼àÁ1ì`Ó=÷p/æ}©z<Ηþíßxö½ïåò›oBž¦iâº.‰D‚h¡€¶iÚÞ½tÐAœsýõpíµPºŸÜ´‰õÏ=W—Lºª©Bøtj”²Ø±ã=èz¹W¬°¨išŸ4>—ËÑÛûQ^~ùQr¹ŽãN§ýI¼È‰å¡i0kÖ–ºÚVišuíO×ûÊfËCk%²°çAoï9xžGOÏK¢¸i CØ×»ßÝôðÕ¯žˆëjœ{n==&­­ÌŸß_ªŒ’夓“ËÁsÏ…ãØ6~>-Y9³²Z¥T»3™ŒïM$=›Z[[Q Äó`îÜõeŸ-ZÔŒ”<á„ð¾÷O¡°ŒX,Fgg'ÿñ­äó]Äãq4M#™L–¼)Å™ÛÌó<ßó±^qMîsøßõ0Îp¾º±$ÜfyLiÓ2ží¤M‡+º‚K‡ªÚªP( …B¡P(¦;Z[áŽÂ¶Å>üÜÞŽ~â Ç$K) 5«V-ç⋯cÑìç?“¹îãq¸&‡>ðÚW¶²wןhI‘ͱaÛ¶3xç…2í‹Ÿæ€³Ïæ—K—rÌ5×`X™HÏóH§a‹›2¶Œü³m›ÖÖÖ²9Yú“)¯ÂÜzë­|úÓŸà¬_ýŠ_¾öÏ/]ʶ»îbï%—”­ýÃÈnÞLîµ×0ÿßÿ¢FhhÛ6ŸüÝïØòæ7cÜr ]ïzQ`ÉÚµè¥ãæJbÇûï»_¿þ:¿úàItvbYßøÆ7X¾|9W^y%F*Åü%KH¹.tÝÝÝXÉ$‰L†ˆã‰DˆD"<óÌ3Ø¿w/ž®sØÙgsÌ šÆ~ô#Î<óÌaÙÁ¤âäµvÝ CDZÑu¯,¼Mz·d³YºººÈçótuu• QÅÑ󼪞- þ• Gµ†’ˉ?‰¦á·oÛ¶3xñÅ<ýôþM[(^ƒ—^ú$ÐÊÏ~Vô=9u]gÖ¬#ùÖ·^,‰~qr9qäóó8ì°[°,üð>˲ˆF£~•VѦœ )=‹,Ë"‰Édð<¯,—¼&•ŸM%R)øò—÷Ðß¿œ»ï~˜ ᤓÖÓÛ;×…¾¾e¸.|üãsذaÑh”K.ù }}Wâºb{‰Tù%Ñht@Þ5ÉÓO;ªßCÚÀhy…DI›Ã•YAØc,ó=åägÒÓ³V±%Ä) …B¡P(Š0¿üïÓà6~ÿûÝ¥ù´Xfýk×2sæEìùÑpøÂ^båÊk¸è¢æõõ!\Yœzê#¸n‚ùóûyüÿ<ÎÎÛOã•Wþ†aÐïzX)17ìèø6×™ðË–ìÒEæœ7 ƒH$Bgg§ U Ã0ÊrjÇãq2™Œ¯¯Ü}÷ݤãqfööúó¥o¼‘Ë.»Œ[o½•+.¹„¿9è ¶Ýy'k9„Í?ø7nÞLÿ‡>D,&‚[gîs¬>ýtâßü¦ýÌçË„¸ý×å©ÖV~ø×¿òÆeËøÝô阖ÅO/½Ôh’…)·mÛÆ¶+®`é¯~E¾”šè–[naÖ¬Y|⟠£ilüÇĶm …¶m³víZÖ¯_O6›å“Ÿü$Ùl–'žx‚d2É·¿ým,ËâÂùóÙxÌ1´··óûßÿžçŸž|°n;˜tB\<>Ð;,•Já8wÞ¹—H$á炳,‹X,†iš¾Çœh'“I?”4™LVÍé6kÖLsl#™³j8”<'ˈFÅç‡>ƒ'ž˜CKËF2¡¼‡õ)<œtÒ\'È¡7sæÎ=÷-ÄãRœûK&¡­m ·ÝöHéØA1î¾ ÃÀ0 âñ8¹\Îääw #½æ¦*™ |ç;²Ùy¼öÚF6n<†ƒþo yäó8œ{n]]:étšÍ›ÿÃk&µtLéõYÉ–-³¸çž÷• ¹û‹ì¤ÇC@Í6W–·,«,4ð;ød2I<Çu]¿À ´épñ“©,2+ …B¡P(&òau&“¡½½X,V6öUÔÏ«‡Êœû¹8-æJ½|Êa˜žýË_X±â¼ð“ärÐÙù\xát]ç®»fûó<Ótùßÿýžçñ«_-fÉ’åþ\Å0 >ùÉ?cš"zî±Çîð+–Êy2™¤««‹t:M2™$ŸÏ*ÂId4Üý÷ßaœ}öÙ¬ZµŠD"á;ß¼òñsÔÇ?Ž—J‘úÆ78ñ«_åŽ;îàûßÿ>7üô§¬ýÜçXzÖYlX´Ïó¸à¡‡Èöõùú@ÊóøÊŽ,ïïÇ3MR–E¤¤ÝxžÇ¦M›øÒ—¾„ã8vØaܲs§¯hJ‡‰ .¸€¯}íkÄãqº·nåÞü½$´sÌ1|ë[ß"™Lr{o/±XÌwVÑucŽ9† /¼]×9ôÐC}AÓ4™1cº®³çÍoæ=ÿñäóy²Ù,_þò—Y¿~ýçO2©„8Ï"Ñ#ÜËÓO?mÛd2ßóêE×M¢Ñ(±XŒX,F4­jp•ÕLkWKÁh¸žDÑh  U2o^Çû½Ò¾Ë—K£ʼüÒiaÓ®[.î†HäÌ17øçH† Ê¤úR¨¹¶¤h!C#¥Ø>‡‘HdJ‹paONðøóŸoåŒ36pë­GðÖ·>ŽçyÌžýãPqÃß.› ’Iq͆ÃÂ…»¹à‚ï1š ˜HâÛ`í—O}äý/?—ö+?ÓuD"Q¶y?É~ðû#…B¡P( …¢™¨5sJáyÒ¹%Çý"qŠá±/ o>àÌ8éu¶˜°Þ‚+èK‰óýùϟʆ Û°mغõf?\ôù禳S8E"²Ù,ù|žd2ɽ÷vø‚š¦izè+˜¦˜·W 5 Ïÿ‡ G•Hç„h4ÊË/¿Œ®ë¬Z±‚O<€K/½”¯þÝßÑÕÕÅÇ>϶ùå×¾†‘Íb¦irùå—ûs«d2I.—ÃŒF™ûÁF¹ûî»yä ƒx×»ÞÅ¥7Ü@gg§ßÆgžy†T*Å‚ |ŒÇãì;ï<žœ;ÇÈ›;w.úÓŸ| BæP?á„Ê" Î8㌲\þ2MÙ)§œâº”çFFþ¹®ËkÇCËYgùÛ™¦95=âd8êÖ­køÕ¯~Êï¿›\.W*²Ä¶möí»‹H¤ÝO¸Îù¶?ŒvBöáV†.¦IUÁEQ/“ý±ë–‡½J¡§¥e£Ÿô¡Bz¼éºNOOº®ûa«²à…x?¶¦±ùÞ{Ù|íµ´]r ·œ}6Ùl–d2É©÷ÞË#'œÀµ÷Þ‹“Lráç>W&0¤Óé23)vÅãq ÃàüóÏà–[n¡P(ø^{ñxœuëÖá8Žÿ½óù<ñxÓ4¹ì駉”´éø#Ö*N>ùdÿ}$áë_ÿú',]×yüñDZm{ÀüÍ0ŒTásü /ÔuŽ'gÛÂ즛>Íúõ7ÐÛÛ‹eY¾ò °cÇÏ9餹èºî»`îo"ù±ƲÒâ=÷ü¢¦Wí¸ñ¸XWV¡Õu!øH{ìí=‡îîCذá²%¥[î+,Ð…‘•JdiÜR¸“bœ,þøUm';wÞÙÍ‚ xâ‰æÍ{7gžùc_øyë[åÅg°téYþúÒ R s¥‚3AžAN™e‚ÛOÌáOZtÖª]eÏÿÃû•¿³ž¼+ÆZÈ* UCwu]§«« …Zô²ðqÃ0Êr#†ìd¥bùQ‰q …B¡P(Šý!•JŠØeÛ¶/„ȱ®YÊ£•H$ÈåreÞBa CäSïééÁónX¼˜«úû¹bÃX¾œ[£‰|ß?ßÒ3.|¾dØä`×d4s /_~9_ܽ³t¼¿Ý³‡¥Ï>‹÷ŽwðÙ¥KËlI²ò꫇u¬°f–eù6+S…5…ZŽTr›óÎ;C9¤æñäýþžµ¾?@>‡v G¼—ó¹jíˆEbløß u}ïI!Ä OØvÆW.;;;‰Fã¾áZ–ÅñÇÿÇaÔò`¥çÚhïûøãÿÌ’%s˜5«X¸_ûÒu!Ì{ìyøáozÓîí¯vcŠíWTy#ÈÄ‹ñxÜOÒ‡ÝüùóGûT7¶ çœsš¦óÅ/¾µt¢ Ù5ýú`ö_e_¤•JIÒE”»–Nµº.öišâZe2â}¡‰„µÂ‚Û–-[€CI§ËïÏâ›ôŒÔuüʹr½t¿À‡Ô˜z¨•Å‹ÅkéQ™Ë‰u ñÞuq®–·´Ô^#±m[Û¹crþ++¥J†rã/WTÕ4 Ïó|–ýišSþi B¡P( …bpdè[&“ñ7Ó4}¡«««Ë牄_ìNzSÉarì)çf2ÜP†Êuª‘Ífýy™¤ÖÃqù ß4£tv‚aDI¥R¼ðÂg9ì°Áu]¾ÿý_H|œ®®.ßÉâÑGmŒB¶¨¦Å¾ò×ÝœrÊ&2±h‡¦iô.XÀ›{Œ»¿ùM.Ëç±m»"o{ºªØ#ó²ÅH"÷¤cÁû¿ó8öXÎۺΠ¢±N¼â 6ýë¿ò»÷¾}ëÖ19_R#ÇZùú+Ñu™3gÒÑÑ1*mI$$Ú,s—D ކ).+²óÕuí{RqÑ($àúʱ,Âöˆ›;wî¨[zÀŒ6£½Ï zY¶¬R(úˆHt=pæÌy‚¹säŠ+Þ´kÒ+QÓ4ß½SVVÊrÇE£Q,Ëš´BÜwìaß¾EìÝ{?—]&’þ»Wå  'N{ˆ¹W.bã²e„^= WZ¯ Q §P^gò:%“â>Éå„ÖHwíÚUâ–ø?‚Òk-Â^eã8ÐÕUYuWü—6bÛObÛÐÚúá~3 òØišS)±¿HD´[zë†hC4 }}âõ'>ñElÛ¦·÷œÒqÄŸã‚ ç‰6×› a(±­ZIÙùVïdHv8\ ÜYëºÎæÍ›GÑz …B¡P(“™š'‹Q(Èår¾è–Éd|+ŒÌŸ•Ëåü±©ëºþ\8<–•œ}«’YZ(sSW›OÚ¶¿ bLn&wÜñ>ö±Óø§º€¯}M*‘ˆÅK/ÁηpÄaÍš6~ûÛ§iÞTrÂà¿{äN?¥c_káÄŽ—0ŒãiûÙÏxõ¿þ çÑGýë>ßµÂë K•sè¡Ò;…óe2:[[9`çNfΜÉÒíÛƒ(Mã‘ÏžÌW¾2f^ya¸T*EjGŠyçÏã:·¯K„´rŸÙlVL¶çá qš¦Q(Ä…5þ³7Ì®û{N !î®»þŽY³®a÷î@À²¬<•Y¯½v̨;œ´p´‹Q£Åž=³Ø²e°{Äû¨öôcÖ¬-¼á ß⢋n#•^¸¡DªýRÀwÒ%µ««kÀ™ÉD>_Ä4Þò–3|áMÞÓÒÛíôÇþÀ-ÉO0íNDô á G³ô™¤ò‰•¦ o´JÓzå•Wxæ™%eBµ4éÊ>,“)M®Å–-³Ø±£•þþ%AûLaá¾\Št®+Ú+ÛœN‹cF¯.…O|B¼Þ¶í \?ìÕ0DÛ¢Q1Èå‚ãH¾Z¿²£¯Õa÷Gf¨½x<Îm·Ý6¬}* …B¡P(&/N¨Ú£œ_Æb1’ɤŸn©Ö˜TæÞ®gÌ*„°ù|¾n/£ÎNñ_>¤Ïd‚é2G´Ë{^ððþ/9ͦ‰Ç…H·cGŠ›nz‰#œE$Ò‰eÙœwÞß±víW¸ôÒF_…±ÁfoÚÄ}בƒÅú8ì_þ€s¢Qb±˜ïõ8¦iÖUxÁ4Ͳ¹H¥¨'t]畯}\)5T>Ÿwõjj¹)&“ÉQKóUÊœxóÜyÃÚ~@Û„×JåíúÂÏ@ˆs¾°¥÷qÄÄÛELÔbˆpÕTiy–\µ„»“w×ÕÎI!Ä=ñÄÛØ½ûȲDëÕ…‚áU ©—ÑâÒ#Q³†`ëÖ6î¼³––çFu¿‡>ƒ{î±ÈåD';’¦Kñ­ù|¾,çÖd¤µ-zÓôð’¸à'îÔ¹à,Àííå÷ÜCæ{ßøàlDŸî"íÒßÞÜnQæ¦i⾨ì›D>·],_~`ÙºÕ®e.W[Ð ³qc úÓ ç=þgº>ðØ Ú(½Øb1ñ#¾s¹À3n×®ã€8âˆ{Ñõe h˜ËÅ+ÂiØ\Wì_\·zŠá&¼­ö”JæÂ¨üñ+ÿ¾¢,·B¡P( …bj"½Þd^m×u}CFœ”_Šcr -ç²Ú©œ{ÉϬއ?·· 1N.7Í`¼-?Ëfޱóù à@.7½{çùÅßž{î~ýëÛ˜3gu£/ɘá¯ú8ïߎâ&¼ÅÏÅíúiµ¤çãPÔ#ÖÕBV´F£Ìüô§ùÝôé<·cïÛ´ \—¨œäY–PY›€x<.³ý‘[¤‡‹G¹T¬²n®´Nx¾k—ö‘.½–“oy¯d¢\^xW}SaTMM¥`ëÖ·åU¥íÔcØ“™ƒžp­­Oê~eü¶L¾_™þªÞtX†aT ež› µÍ@[Û^Þüæ‰RȈ{8)ÂCyq âþwâ¼è;~êeaººþ@îÞ½#ŽØÌÂ…C{K&“õåX\¸p7sç¾Àk¯µÔuäà Z› # Ì™óçž+r„¿‹lȪ±Ò\Òé@€s]1p] +EºÝ»–öW/_Ù§Èþ§ÒN'«Ý* …B¡P(êÃq,Ëòs§R)߃-NS(ˆÇã~8heQƒ02ZDìWx¬…‘!¤¶-ÆÆ2?³ç‰÷š–öE¶övñ™,Ð&‘©_@Œ£‹År‘NˆzÁØÛ¶ŽáeÔJxxmšåion¸áƒcê]Õ üe÷nfîmãV .s)Ã0ü¿B©Êèhñ³ŸýŒ={öøïï¹çßaà±;î`ÖþÀGy„yDLãñÀñ¥§§þ?£M­(Zé©^¯ÞúwÂÃ¥ZpWå}eìÒñ£”{ÀÈI»Ž˜ˆ[°é“›êþª^ˆ³,˜?í†T?ÝãO”Iö‚½\zé“{ì÷F}ß"'_PQ‚ˆÑ,t:%º› ×…Gý4ë×ÿ­Ô+KoØJÑ^Tç@öR¬÷BS£ÀŒx¹-óÈU³ÖW_à '<Â’%ýC¶·Þˆé%KúYºt=--Ûë>µ<æ’Éà‡}Ù2±ÂÖ­mU× "‘`@¡ëb™ëŠõõ©{«œ cмÓûÃÂ… Ë„¸ã?ÞϡڛÞDk&ù<ó~ö³r—ÅñÂ¥ºè«±n”Á©*ûȕ֯ų̈XW†«f«|¶væ™ÿ‡ò@øÑágúôú»˜°ç[˜jŸÝsÏû|&Tz•šXX$ñ¹|ªgÛ"×áhQ9‹b. …B¡P(Šæ'•JFýB^–eašf]ùÙr¹à¡²ô>ëêã\ËŸår"­‹œû†sD‡½Ï$òw2)ÆÆÑh¹§›ÜNæil«ë¢pƒçíÿ[×õRñ¸É…ôuuñÿxö€ec‘Ž „>±eË;ì0@8 ¼éàƒýÊx®¦¡g™Z™C­rr*=I$Õ§¤Z•Ïe¡§bÒó$ìÜa!r=¥r¼Y¥ÿI “@è“á§a*Û˜}&ûŸ{býÅA'¬'Ým·oÿK–h&»ÕœPFÛÖ¥04Q¢sÏ}GQÜòpÎA¸ŽB8YçH‘nÛ“ÏóøÃ¶ð±}†7}ñ‹LpO&íVÚ›,èP¨X7\ðAzÌ¥}OOh½{îy­­sê ;­—{îùmmwÖ½¾ OŒ‡Ê ²j¤r iå"žãy)sç¾Àœ9¯ŽÚ÷3M“cýÞ°„¸±Æ0ÄGvÒ_Þ¯¼r(Bà º®“ÉÔ›¬@¡P( …B1™°,‹D"Agg'–eÇä?sÝcÍpÕÑXL„—f2Çšôl“ᥠÞ÷õ L:¨„‘a©ûC29~©Âvî<Œ}ûfÏÁÆ‘ß=û,³73®‘Ÿº®óÖ3ÏäËÝÝLÛ±ƒUÇÇ /½¥„£â‰W™³m0ÒuîCæeŠPZ-O…¶•Ÿ…¢Ë!Èë§<»¼&ÉR;,‚0U»´Ž|&[Úå½aÀ«Ã˜O)Äõ÷7ªQ|÷»¯ÐÖvHÝB\$2úm˜hÞ.ÇóÓ1ÝXp k”õmÛÕ7÷îÝ;¦í/D1Ý»ÿ›åËçûk#!É@o]égx‡×‰—–»Às³ÞÁÒ¥ëëÊ7/þ--GuŸË–%yñÅ#ÚV&©•îøš&;ºë×_Ï®]ÇùOe>¹áˆr²/˜Hâ¼B¡P( …bÿÉår¤R)<Ï£««‹l6®›RårBdó¼@l‹DÄ粸AW—øŸHÔwlÓ¬‰´¿Ñ‡ã½ø¶·­cÞ¼žýßQ“ñÜ¡‡òÙm!îË—_Î=·ÞŠeYìùÌgøM{;óL“¿_¿ž W\1º“¡•ÔòIHUyïU¬/Å1Y°’pž6é‘fSžW.CÕ*öË ûÓBû ‹×ƒùWȉö~à qëÖ­#™LÒÝ-|麻»9묳8묳8÷ÜsýÏ›‰_œÁ;Þ1«nwßÑF×õ†{¸˜¦É‚½ìÚuÜ‹ò|2ß—tjN˜j­œ{š¦ñüóÏ÷©„çÑ×'ª"íO®Ny…e‰ìËâˆþÈ?vésYlfúU»}:†ÅOÌÑvɤؘ¦ÚdRY]‡Ý»¦¿ Ѩø,• \ú‡c»J„S( …B¡˜$êPÂlÛ&•JaYÙl–|>m ‘-ÿ;;ƒô(étP¡T¦Déé B; 1F†•Ö¢Z.¸‰Æ¼y}̘ñb£›1êl›=›åË—c}ñ/áÌçžósØÿhÖ,¾óÑòãóΣÉ’Ñ=˜ÃÀðRZ)|E(/ˆ ·—…‚—œ¼FCë…·•!^rò­±)'¸”‹€.B¸«œ|‡“­Ë÷QÄ„¹Rš°©^q˜ÐÛÛ˪U«X´hmm¢¡,›|ûí·³råÊºË ÷÷÷³fÍšŸÝxã¬Y³†uëÖí«;v°t鯘7¯¯îmêíÌêE×õ >Ùß?Ê7a‰x©s¡„û¹n ÄFº·ñô>k;v‡N¸Õ·Ÿ±øfª®„¥ÊÐv˜Þñ2ßþþ‡Xµ|¹ßoV> hž|òR>x÷~íGV’ÂÛ+¯<Ϋ¯Îñ½8e@4*ÂYëõŒ«‘ç°ÒŽÇ¢/V(Æ’ñS(c‰²cÅda*Œ+ÇÁq<Ï#—Ë•žd2b±©TŠX,毓ÏçÅÃu7ˆJ&ƒâ‚ÒÄøRÓ‚*¥ xúécyùåãr¬ñ²c8ø§ÛÑ´1|Pï8Âs “ñ ÊÐÓéÓ§óŽ¿ý[zìçym¯ñy8×N£º7šI¹@'_g *¦)sŠ:H¤7IØy2YZ7\E5WÚ¶ÒsMN‚+'ßQ†§ECÛ„1©žãn˜°víZ.¹äV¯^MKK ½½½ôöö²zõjÚÚÚ¸ä’KêòŠ[³f 7ÞxcÙg©TŠÞÞ^:::H¥R~åÇ‘‹ÅøÕ¯¶óðÃßäüó—íÿY˜"Ìû¸¢Œ…â2 |12‡æXÑß¿„9s^å˜cFòÚ^ú!‘š®L‚;kÖ,@ü–ɲî‘Hç°ÞÔo†a”yÅ5Zˆ«´ãÑî‹бf<Æ ÅX£ìX1Y˜Ìã ×uioo'“Éø"yÇ!‰ø¹ßLÓ$“Ïçùÿì½}œ\e}÷ÿ& ,9%¸áé, «€ÈYE+bgñ.(ØÔmI{Û™šÔÞ·Ìx§­ÖŠÎØÞ­%mtF¬ Øzïà*XluŽú é@6{’å1î ›„'÷÷Ç5×9gfgvgvgvfv¿ï×k_;çáš3ßsÍu}®ïC6›Åq Þ«ùP<JpK&ƒœmétã‹v:O>y6O?½tNÎ5Wvì{¿½¸!)“ŽíºJ$ÎdØuÄŒoÜÉ$¹\ÎÏYÝÕÕÕ˜h—ÉyÒ´wG¬l»ò꨺ªIé$Q7Ë(îS)ß\ %xÅËö+¿žúØá¼mÕsبœ·®RNÅjÇh€êbPÛµ×^´­P ··—®®.ÿµU«VM›/ζíIÛ ³cÇ6oÞ (ÅypppVždjeÂAYDe9²Rç¦Ë:/TN?ýû>ìmÚ9ô5¶í 'W*U|Û¶+ÚHOOO}šsaÇ¿þõ.妜L’§¶óz<-*W€Ö›!È÷ßœû ®µû©ÖÏ‘G>À…ºÀÚº÷¨üzW×a&&FˆD‚d¹¶­PžW»GgØsX—©oåvÜŒ¾XšÉ\)¡™ˆ ó…ù:®p‡X,æ'±Õ²žžLÓĶmß;Nó"˜æäª£¹œ? •†‰ê` ]íT¨Ì’%Kxùåcš~ž¹´c8úé¥ ÿÞ]×¥¯¯ïþÓ?ñf×eùÀ|éK¾ýB©ˆmå]ùO£«‡BéÓr¾G"¢VyXg%ºy” dá¤çá×lÔä4^vŒðdÕ*+ÇäÐR¨=¿[„æ„®Qôˆëîî.ñvfÍš5%îØ1uÅÍññqn¼ñF6nÜXòú–-[èííõŸ¯Y³fÖ.ŸCCï¡»ûN è*NT2øz+wÎ7æBpåI¤óoͦÒN¹‚ŸL&yýë_ßÔöÏ…{žÇc=ÆÊ•+}ϳZDõz…8ê^rn‚~'~Onˆ  ƒ]|ÖRu~NÝjï_îïtþæ‘Gž™ñ¾Õ~9æž|òÇ~Ž ÛV.k)Û®1 ÃOme!—JvÜŒ¾XšÅ\Ž)¡Yˆ ó…ù:®Ðžo …±b``€d2I&“Áq^ûÚ·JEÈç•èj¡¶§G=×ãÆ\­a$B +W®ä¥—zg )˜k;v€Å/½Ôðʳ†aP(xó¾}ÜóÖ·NzOÏEÂ)sfå§C?KNTü¯ó¨é¬C?u4‘α¦CS=J+–BàR~‹ ¨C=I-ß¶|3ôÞL‰Îrÿ)XÐÛÛËàà ¶mûaxppÝ»wOç¤R)6nÜXâE°{÷nV­Zå?×9è¦bûöí¬_¿¾j(ì®]=¸®ê «Mv¥¬ŒÑäÔ:_4ªÄ8ÇiŒ:<<ÌÆùõ¯ÝÔö7ÒŽ·mÛÆúõë'½î8çœó‡l+>ÏR»ÐÇkþ Lî7Æt¿b|ü7Ðû…’ít{"¡ÿº¿ÓE’Åלâ13”ö£áœ‚ðÖXñ±ÍÌr]nÙò6îºë÷êÜkjŽ9æ â{¾éª© úÓ ¼:§öþ¥\Œfýúõlß¾½¡í/§’ÏĆŸzê)~ðƒT´caá288È7ÞÈ£>Ú´s4²/~ôÑG¹ñÆ'åè„õë×óƒü€§žzª)ÇoÆØxpp°µMh+ô¸bÛ¶m³?Ø4j\ÕÇÇs‰çy¤R)\×ehh¨¢HaY‘H„}ìç$“i®»î“D£jÁV CÍ}âqµØkYµ§3¾“ýû÷7õ<O7®xr|œ}cg6üs†¡l6çæ½{«n7¥S@?µçJ£&–šðäMç;òOJàÁ¡'áÚ+®|:›er¸hµb åáªzÛJNŒsœ´iÓ¦ºÆÇ‹Ö®]Ëèè¨VµnÝ:_¾úê«´šææ›o¦··wJ¡®N=õTß=´2Ï<ó5,«ú>ŸWÉÖ[è”Ò–Ì…—ŽÖ_ÊÃQ§>—ì°ÜÛÛËæÍ››úÃÝh;>÷Üs+Ú±mÛLœp-?ŒÀÿ¥¾ót³b«»ï¤RÈrølºn”]|]{ÿºU¦õ×›0M,ËbA-Öi”07:‡:~9¯|åQÍùøÝwòÜs¿RªÞÔ g˜zBU¡Tüî4;>í´Ó¸ì²ËذaCÓÚ+tk×®¥»»›­[·6åøî‹Ï9çV¯^ݰã ó‡Í›7³iÓ&N;í´†{îÇÆÂB¤ÓÆP}|@àa¡‰†¶ñ/.Jˆ+0ÊBZ%æqeáÅÓmP‹j\¾ªaY–’5::Êðð0ãããtuuUMÂ_+Žã°dÉ?²ù·]Š$Ãl ®«Vƒôc­AD£õ}'…B¡öÄ\Ù±ëºt\Ñ&BñâÅÓv@P”Á£ò‚C9Ë–-ÈDˆR[Ž.,¡ïh‹ "µ®ØšCõûû¯[ÊÊ-£d¯¿Þ_hñR8”¦ ¨U»ˆ}ûN‚œp#¡²Øú«ÖÞpíœo²šŽŽ200а¾XšÅ\Ž)¡Yˆ ó…ù2®Èçó¸®K¡P¨(^xžïpä÷²ÙÊâZ¸0ÐN9eÇ¿X×”ãÏ¥;ŽCÆóxÝè3\{ý«ór9¶>GøÃ\xá…u_Å<ñåyÚ@M¬Ê'kºC¸ê©K©gF´ì?!PT8^?jb·Àõ.ãÛÛÛËÆKΔîîn®½öZ®¾újÖ¬YƒmÛÜzë­³:æ±ÇËñÇ?9å6ÚƒEBSç–p¿âê¥!¥–H£íø€ ïnõ‡*rÂgðµ¯}‡§Ù.BžZ ÿúíocPÙ›¹íÍ\¾èa_ËÏýÌ[W±ý¶£ØuÛ òñ h²¸þMIÿP¿):}A¾øZ¹'6ÀÁƒË€Ê6ëyJ|‹Å” Ü‰ýJ3úbA˜kÄŽ…ù€Ø±0è;v]Û¶Éd2UE8P¹ÝG¥%©ç‘¦)žoæ”SöqôÑwÏùy›aÇ?üá¸ßZMäÂsvô”Ù7²¯\—ÕEL×u1kâ\×-ÉÛí£=ÖÂv×4á÷µ“‚®D « UP£+và\ªÑ,U-dpp7újqØm³^Ê”¯[·Ž/~ñ‹D"n¿ýöš“yVã˜=Š /œ:öÖ¶'‡IÌ~ó‰DJŸx<(àPž3®Ýi–†ÁcmTLÄöRÛÄJôªõ¸•òk–^`©F%¢=;Ü̓¾\%¶eQ}¼á⨰V}«Û(/j],Bÿ %ª¼:@Oϯ9ê¨`Ū|àå8*wœa¨~e¶Àç’°7º/„¹ Ùc A˜ ÄŽ…ùB'+\×¥¿¿ß/ÊàyfIÁ­XL…¦Rj¬W(s™N\tídöì9б±åsv¾fÚñ9kvÃÓKÉåÜ’ùE=dÂU?††HY®çá8‰Dm®FQ´«èàR@M”Â?Mjâ6Ýœ=ÂdϹÅ$á!’T®ÊgQ{hêNWJÕÛ†·±P_r¹§›öŽ(GæÿUY4ûCÌ?þñ8»v%xf†!Ž }¢\1>¼‰è •„*žçqÖ{ßÀŠ6üÁo†Ãb¥< åèb ÓºúvqhÕq¬X3}‰pí‘P;ôoLºxÌðó7[$R9ŒZØúûƒ‚ÚsNWÖšôùÛ¹ªƒ ‚ Â<Äó<<Ï#›ÍŸ«Tÿ­¯O=7 •ºh¾§/J¡™ÂÃ[—Òñ·Cmãñþâ_Oñ/Q¢(õU者ð㡇‡‡ý¿5kÖ”<nu[9å”}œyæØŒ÷{qº9¤­('žË)O¡rïÙNÈ£ÕL»£—k[݈9 'HƒÚtÉÝßìbïÎÓ›Ò®tZ-ð FàéÉ$´„r„=çÊ«gM›—AAAh(¹\ŽH$R"`är¥ã4ËR!ªÏ˜J ã¡+^kõªÐ¾J̲ <Òú >‹þä&J€Ci/ú±QÜ¿@ÞE‹d•æÇ.7[²x\ýg…Ε§òx^/ºëmµ&jxÌE»nn]ޏf00“ühžÇþýûyëUWÍZ„³,‹l6‹•·J'@:oñ@Å÷ ‚ÄÚ”í;Ï=HçŠÅk×®õ 5´3{öÅÒ‹_ª)[ü#p”ˆ¼Q`c¾WA Õ‘µI¾üyI.§VŸ‰ _œã¨Ç ÕîÇãã12Áëç¹7œ7mFDòQkvsò›¨i[‡R8íýfL±=«‡úµŠ),åå94¤ìÚ-æ™ ”˜%ÿAA„æ‘Ëå°m›B¡€çô÷«qÚÀ@à ½ÄòqõX/ko2PóCíý5“$#:5K-ÃýLñ\ÚƒM \:·q˜TqÛ8A(©…ÒOt>¶‰â¶S¥ëŠ„ÎEè’À³Í(n—,>O2õ¼YG«6Å+ æ ú¸o\ÿmàº\ñöⱟôâöÖêÜ1•ÛæÞeËXæºÅÐԊŪMFƒ u¡M;hTOÂ_’®¼WÉÚ(×y'S’hi||œÁÁA†‡‡Ù½{7½½½ôöö¶Ä›ªwݵ‡ç¶­š>Ö øL$èÈt§ðÏyx_\Ùme—Úæ\TçUÉv>1—ߥ.ô’Ë)×o×Ub\y¾¸Nª89[FÆÆèêÞÝê¦4£ì£8nÏöî9ª¦mu.Ñðb”z/ÜV=È ç•õûÓï+yí…«çrAÉ{M:ö“ ‚ ‚ Í!—Ë‘Ëå( †A&£¼Þ tŽ‘@“©°bÅçCc=½˜k æŽº8ØT$PsJ½èöö*ŸæèiFÙyó¹ôu.ãjL!“–§öqw¥ö@i‘K“ Ð‚C áTÃByÛÕŠ.k« °ãÜsë8Bû²wd‚ÓNœÁ\/…h”KŠâúûû«WHÕÞFÓCÐjmX<³©\€!Bந§/’îºiøÅ†‡‡¹êª«dÕªUôöö2>>Î 7ÜÀúõëou[yrl cÅ `j›p¥.<æ©Î,в­Å¨=[ü!p;Ö"œM¼r¾Ù]2™œS!.쎫]Wåiç´íÀ£h¾óäÙg³ôÈ#[ÝŒ9£áBÜ9»Ù{ï©ìyjúb µ¶!üû^Ô A&Ê¿þ‡ž_ÆÞíÓ{Å•/þh¦L˜eïë°öòâA…B .çóÁãLFª3 ‚ ‚ Ì%¹\Žh4Ša%s ‡ Z?j>W)N¡R„ŸÖ-´÷\x|ØÕzæ‹”FUéügéâ{Q”ÇX%®YL-ª%‹Ú©¤UBU¤…çîT{líi˜*„‹õôôTáêÁ!ÿIUÙ&ìºF»b MaÀàà ½½½¬[·nÒ]]]d2lÛftt´ÕíeÅyû+ºÆz¨ŽÐAÙÌ&œoGè.ÛG‹¼Ú•6ÜÁêUÕÉj‘x¾yɵÓ,õˆsœÒç󙱓NâÕg½ØêfL¢ÚQÓ¾Ò+Fé­áêÅrª q•Ä}½jš%pÑ· ‘€ìiÁÙuÕ_.'‚œ ‚ B³Éår%Õ%óy5{S*ðDÓ!ÚËm&„…5%Þ ÆŠz¾iM³/ѵè4Z¬ke’¨¹-æOÕÔÅ/½TûÆ™ ôõ•ˆq###S(¤¨ mœù)¶)PÙkn>x#µ)‹@…¥öööVݨ»»›5kÖ°cÇŽ–6vïQG±nl²az(7ß$Ën:7YAg[W¥Žu¸´Nˆ™Cuè:·œWÇ1*áœY…bb-î»®òš[(ùá^þ¼îgµgFûW[ÕÓ¿gá÷¿=á|“a,K…`G£Ê†õoèB µAAh•BRoÍC< Ï[A8g­!›S.V0@iƒzÇ»ÚC® ÌMhh5ÂiÚ‰¼ëÆ[øIÇÄÄDíÛ¶j“H$pk™@è*#õÎ5t¼³;ÂXU¶—T×McQ­¶Cޏ#䢋.šôzÕAèÐÒ*'V¯Ëo$½LT€Ôö›`rÎ(¡>G‰tÑhô~¾sô‰/rÍ5§´º%„+(5’!ßw¿¦Üpú¬ŽQ«—œQ幃ÊgYJ| l‹Ìár¶­ ! ‚ ‚ !“Éø!©ëta,®*¢¢j~Ø(©Úܲ‘稄„…v6ðÛýûkÛØ+ºB'ÙlÏóÈÕVe²'…ÙJÎz¬âëÇBKY°f͆‡‡«n4<<<­×Üœðš§' 6vñ¯<T*5yâ­±—ÔI)uå=Iw‹ç± J_ëíõkB€NIªþ%•  ýºþ>gr>¹ù†ìûñ“~Ÿ»hô ÞEcõÔcª:Aw/•~£tx€åÀ Ù³a(ûÖvöò\(¡×‚ ‚ sëº%!©·Ùp› oNNÎù+sÉÊ÷Þ_Û†¶My29Ã00§ 3¨6j39|'N©×…NŒ/´œE‘HÄ/ÈPîù¶eËn¸áÖ­[GWWWËê8Ëßþ[Êsj®‡ªx FgY-DºÜŽAå¦Ó¹äêÁ-î“§TØ›/èï*W"…ãD:/\>±X°OX<µíùÞçÇÞÙÝ9¢L;Ä_ךgÍaÖ.ªºL=÷¹1\ƒ°wvußê’÷À¤¾)_ÌÃà8,(ñUA¡Ùär9’É$†aà§ ¨hŒ*_„¹ÀŽ6køÛ¶š$'±X ×u1M“Èt•t5Êr½N åÛ†+¸ÌM¬±0-‹ (È0::Ê¥—^ÊúõëY¿~=W^y%ëׯgÍš5lذ¡¥ |òIŽÚZŸ)Ï뤩Td¤DÛuùœ bõÓ¡mL‚2Ó)T¾º~Ô¨«¸†Iß· ò&¦ŠÛ–çPÔûºt®P§½kÓiõX‡ôiqB{ÅÅ㥠îs9%fÌ—¾­;w²øä½í†®H’cêŠ 35Ê| ï‡ÛâœÝ»!—ã”}û&ï7‹DÿnY€õrHˆ ÚN–¾/óýQ>¯ò­Úv`Ë©”ZìêVA¡I$8ŽC¼èeñþ|Ôšyj"Ah°ð¤©7ò<5iÐyš€d29½'\%ÂN+IÔd%<×*wjˆÐœ*}BÝø9⺻»Ù¼y3·Þz+½½½~Õ;å"ÀÈØû7ž„çáaT7-§’ØQ« “¥6¡¸RŽÃJ¨%¶rÔ±‹mÕÚ…Q|=z-M©–A‰zº(JªøZ¦x}ß¹Å红£žï·‹p§½Ú,Kõ9†QúýhQ.›-Pu5çù"`<¸go¹äW4¢uÓpP†4G Œ1l„)*\ŠÀmLW9‰Qù@°V¥&¹\/;x ÌðëZ¯d':f\·W«ÝeÇÍiuÃÝŒƒñ8‡(ñ 4)½¿LÔï™ x?­à Yé4µ—0AA¦Ä¶m …_wá‹68yxe»L|„ÍË/¿Ìž-ÓT5 5ñ-üI±UÏQ ¥sšpBûð¶a²Lï!Ì ‹Ë_Ð"\9–eá´X9éu‡0ŒÀæbÔçYÙl$Šš˜(·QÞuZwÐB\–à3XÅçTiÕNI:ì¶Ÿ $N .JS(íä_ÌSù¾++KPÒ-[‹‚Pš¯ùtÎ,Û¾oYA˜ªëâ›i*Qc¾ˆp»víby%O®3é¶W%°)õï×yò.¢m¹à¤E¯° ™ÍðŽ)n'p5ØÚFmp#[ç@4OÁ¯ñÁ´™­CÇ7‹'‚ ÓòŸzŠ3O; xU>iÂàZ¼g_ ö ß¹œ²_×…Â?ýà|žzÅS³ý&…¹Bç¨FÙ=•ɨü\®OÈ©îÉZÂŧ눧› ÈFAèîÚ¾ox·÷öbeÀ¶àJ®ŽÖV”OšÍá]G°ôáÚ#  è/,µœÁ\EëyÁdDÆxmKÇq/¿´„·…ª¶L³}$RêgÛÐßOÃif:‚é¼ÎtnªZt*}{k‡$-¸Å ô’tñ=5hŸG Ä9›@O Õ¬U+ÓpZÓá©ÑhðÜ4•p‘Lªí£óÄ}v÷Ð='4ûÕJø ‹c•D…—›v“ ÿ&Ø(µV»MjÏ8ŠÛ™?Zé-÷®‹ ÂDÍP;’Åíʽé4•n‚ÒÆ _V«Â6a/LÈõÔ×ã´2žÁð’_륲}Ï HøXÜå¿%Ê”|ˆ}îå5~‰B[²µTJ l±XññzÈý½òòÅ‚°d]˜”ô÷WöîMd w,xÅJ÷Ž+V„¶ ¦<¯Š‡ðt¹>tõ j„qÕÞ—œ ‚Ðbþäùç9eï^ò¹ :=óAh5/ýb1¿øÅšê8NÉJmn&ùkÂõ &â6:÷¶Ð–,eË–-SþµšCÏ/Ã|í1uíã8Á¤¥Y^UÊ£¦Ñ)¯jA‡–Ös~í䤽íÒ”j fh;³l?ýš‰rÐH„ÞÓµ7¡çšðd6¼–N+ÑM?õãª=‰æË ×áñÎw.mÎÁ+Ùw¸ o8ù`xŸpÅí-¦¯·±zB<‹¡•%ïke×Fuúáüna!Ì ´ÔðJ¼šJhÕmª%.VS³+ÙTèÚ”ì–}–Hþ†Ž-ÝÎÑÇŒÅX^3¤»¦ŠÂ¤ùv¸¢cÖ@$ºúmOØO€÷Ê`ÌdjÀ<¶¸P„Ü=Jt³,ÕŸ ©?«X;2¬^7‚GÆ„€IDAT %ÊÙÅ{%÷…âmø;Ð÷.%àéü˜ºúnêŸ!Ñ´+•R‚Ÿå<º=•ÐV~ï—#"› ‚ÐæxÀŸìåöÜ GÍ )‘*´Û™B¯°mH$x饗È•doºL:z(Œöš5—J¦Ó„ç/B[²`pp›o¾¹Õm™’CÇË›ë˜;Žò@R6ŸË5§ra¥è·Ùö\›-ÄÕó±ÂQ•´ˆð1ßËBéÉâ6aGŸpÿ _O_ë§TèÓ^ŠÚËM‹mú»ñ¼ ¨ƒžd$xשXtbc˜B}Q•“sjOmTºú‡7Yü¯½ËÂ9© ÂBY¥<¢Ú¨,J«h·ÊjÔr?ëcëD1\œ¡:/\µmʽõteÕB…¬åç(}+üß}8âÛàû/Â[Oë¹î-$±,xä‘>»ÐR)ÕÿäóJôŠÇÕkŽ£ŠWiϵlVõE±¿‚¡·€óKõ<™DÙKô`\ CEÁ5Y)‡ÖO€›ŒÃÅUüdº û-Õÿ%=0Šž¢Ù,¤Öîc½:T4ªúÂpêL’o…Äiþ¿A‘›’ß;æ] íÕZ&çWA„©°ìÛÇÑ÷ô ½uú¼@˜Ÿœðüó,Y²¤ò›‰ 0 äR),Ë"™LN}@‡ /¶¦šQžH* ·5‹6lØÀ† ZÝ–)9tèP]Ž1–ccê±a(‚F q͘kõ´£6Ñ®•æVZ?©ôzX¼A9J•‡¦:ÅÇéÐã°æQïºAUP?¬ù|à]*Õ¶ƒ°ÖNeñð~,ëèÆÐ#¨2¯ò¾.å«¿ýj÷‚©un¡ ²Þý¦#CPRXÇE×r/G§9f¤ø_Ç_ë" ³hfæèbq“SáÇ€ô?‚}¹âÎ(^ùR|¤ñ<åI6K-%hÅãª?1Mõ{ ½×@ýFè}²Y0#V’6p7$/>†ÔŠÂÕÐAŽD·Gf!ôÜ2`žù߀u ޳X¨Ä(v¾‰/BúÅü„ïk%p’zϬó”à§3Ì“À¼ Ÿ‡T17#å9ubÏ”ð¬û½šRíGg¥÷¶ ‚ Ì!ŸÚ·Ãÿc#oyý_áˆ·ÐÆzj1+W®œü†ëªA¨ea…BÏóp]sª¸j=¡.'<§ÎÿÊçG>wžï,Ú&ütJ.›ñ®z’Õ ¸f ïµZ…ÇZ…ƒzîE^§)¯PkTe ï£#uèzœ©Ó•çò t€ ÿC§mÐiÖ.ôáœy”w–Ür¨Éõ q@{»éd€1Weý—§±nšFÅlk‘@_ÄBŽ%øò•IŠ/Õ ­ë? ½Ñ£ÇÁÎw«v'ß +"°ÒR"I>û÷Û ‹#8Å\|á”ýýAž6÷ŽÉy(uJÞ×µyy÷3A1 È‚y$˜thÚuR‹¹Ú5ÇßOS±XÅ<ØÆQ`ÞJ ó+Zïãã`þoH_J°šSçÚPü,©âö‡ÁØÖ;ŠžuIõ{g‰uú2¾'¬ýwª puÕbý§ó Bu~º‚‚ ‚Ð$~¹w/o9é}D£jÀÚ)s:aaᇶÅ)§T(Öà8“&“ïc=·ÔúH¢ø\G?æ pÖŠi–æ’ó<%ÔE£-Ä9À+ÿêîÙ(,Hé2·úµ úix¤åŠïkÑ.œ 0:V»¢‹(T«v: ” ¾¡·j¹eûhç"ÿ’§Îq¼cYøiÑûó­رçÕ-º˜í‡Î‘¡Ç!©”ºïõýz&£žêSÿä‹õù¼òòw]%º%“ðWu;øÀËd³.ɤ‹¹©Tìw]›O]÷2ÏÝó=Çaçgw²ïSû|!ì–ënážoÜ£6.ÞG[»¶B^ºã%¼~Û°ïûxùµ/ãFÜ@ÔÕ«’Ú(\T§X Õ6ÿ8îE`ë’Õ :Þ,ij9ß’Áë¾È§;æö ®Aø÷.Nb/ü{g[°Ý€nö¹Êž·Æa_¨ôBÆu]b±©TàýšÍfÉçáÇ?Ä4'¸òÊ—9ÿüýÜ}÷^|p;ó7pþùûÙ¸q¾¾]\wÉw‰Çþó?ðñǼæ5_£¿¿Ÿþþ~¶nÝÊãú¸J†ë@"‘à;îâ…^ ‹áf\þæëþ×îãåc^fõøjlÏ&gåÈÆ•J–J¥Xÿ•õôØ=$Ç’¶AÆËpÓ¢›Èä2$Ê]ì™<ÃÇÞ„OëÕ ]èDß“ûÀíŸ"Ü^? 1§®0l îß,[²ßW îbNãÝו (ü’ŸRs™Åü¤"( ޹Ÿ‡Ú WSô9µg^±úø ÿ}K^ª’EAÄg·nåôŸž‚eIq¡ý9|ÂoxÃΚüFHˆ‹ÇãX–ÅÈÈÈôLS[Jž ¯Uò¤Ú†Žân»ï>ºþ¿Ý­nFEš¥ 5#T­…%â(½¦Âr;­éó&¨Ý™)†‘ç3Ÿù/¾øÅFxÕ?C¢/Á‘G>ÉÅïÀ²,ÒÉ4ý’N§9ýðéŒ ŒÑ××Ç}—ÝG$áÁ¸öÙkàôÓO‡(|ôåò‘}am×Zî}þ^܈Ë}±ûxüL%â 100À§‡?M.›ãæ7³á—øƒoþý‰~Þò©·ðÉŸ|’/ô~Ïßöyåí§$"ø¢µ[…ŽVõÒ+»á×tX¶ÇÂ9à"¨û#‰òjÓn)&…›ZWBá‘àü~ŽÎ— ±„@ì7 DûÛ3pÊ×Naé¾&U„)Ð9ujÁ¶m<Ï#“Q+8Nqu/ŸÏcÛ6±XŒ—^z©ÕI„ è`ïÜ|.îêaºœö‚ж.½´Â‚»aøž&¦ibÔ£*;”Îý¦ó:‚Å­n@-Ü÷≼xëéð­nÉdšzÝL¡ÖÄzD¸éô1}ÎZEËp¾Êùâzþ›½GsøþèÞ9”àN¼§¯QœÚµðuí$OÃ6á¦ÃDÙqÊ„ÑHZðäk~ üQ«›ÖPúûÕç4ŒÒ¤³Žãðww"ÿïÿý†ã߇?ü¾ð…]8N޽{ßÄ1Ç|‰‰!¢lðéOŒt:M&“!™Œ_ƒi~Phǹ˲0M“3~s|œ/<4Ê=÷\B6%no„([únaÃŽ \´ÿ"²é,»ŽØ¥*äºì€d2‰÷èëëã®ÿî[uXʤxøé‡ýŠVVh%S‡œwåy®,õôô`Y dpã.q/NOO###†AúËÀ©*L¶bo[­£Öö¯ÃWõgÔÞvúPá¥tõãår*äW/Èf³à\Ä õ^ Ñn° È9à]äÖ_=΋'¾Ø2{:×u1 ߌèçù|Ïóü~IJ,\×õE4Çqp]—B¡0åD¦¯¯Ã0°m˲È“Hš¦‰ëº~bÛ6o{ÛÛZ}9A(Ãþýçû9ö'/ñ_®œõñ¡Ùü‡+ªåUËårµ…¤B° ªs ó_ˆfÓ¦Mþã;vøÏ5­ª¬úìØ=ç oiÝ•ª‚ÃdÇ…Ù¢sé7£­Ö1jIT@õùé7‰p•NÃW¾Ò„ 3}ÂoyÛ_üøàì”G]ÐÔ§–NExR/4}oX–ê'Ükþ¬.¬_?NWW‚BA SwÜñÑèŽ;Îà«_ÝÁßþíÇ“L&‹bÖù8ÎOI$$“I2™ ¸®ë»í¯]I·ÙíŸÇ4M‰„/†™95Y¿ä’›xÝë¶`šåš ¸þK׫N(d`ùØre÷ÅÊ2Z á8ã8~Y€H$B"Ÿ`Ī!t D4%bš&±XŒL&ƒçyäóy5ðŠÀw¾sëÖ]ÏŸýÙeضM4%™LÒÓÓC¡P˜º‚Vø­J‹‘*Û–oQb\øTÖÀ/æ4LØ0t3dzŠï;j¿§¯y¶",@ªM ÇÁ²,2™ ‘HÄ·áD"A$!ŸÏcš¦/€% ÃÀ4MLÓôóE†eYäóy ÃÀq¢Ñ¨ï §H$°,kÒ¶…B\.‡eYd³Y¿]^Ñ­¾§§‡l6K4Ŷm_ࡽp€ó^ø/ÞýOiusaZ~œñ8úá3áºTç€ çRTÿ Åb€îîn†‡‡ößXµjUÉóVòäëñ§w´º“¨'ïÚLŽÝHÂEÝÎZ†°Aj¯þâózûËê\!î±®.Þ06Â#ºjhXÄ™IiÏj´s¡†FGj´6bÛðÝï^ÇæÍ­nacw¾s7ÏO2™ô]󇆆0MS k!UȲ,Õaè[Çó° ®zë[yÓŽ˜¹ ¥9xëA~uï¯øÊ/¾Â}ï¹OmÛZz°mXgÀ+(nb8ð–x³*fVÇ´ sÕn¹©”R  µB`šÊ}¯<¾4#­W=m›B:Í=ßøË–-ãÝ™ ñxœþ~8ãŒnN9å“ärL<'“Éø^ù|žH$â{à•“J¥‚vÍ‚H¤z¾ÓT³Ø$LSõÁœ«S˜-X ÿÌår$“Iÿuæ™H$ð<ÏóH&“¾×Y?Ñh”ï'Ÿýì÷yÕ«>ÂsÏ)ûÖ"œ¿\×%N—x´E£ÑÐùÕm-K>íº*¼}dÄãê«ÿš{îYÅþç¿°fÍÿ “ù3"‘==J5Ö ú˜ú\Z„Óüæ7¿iõ¥0®x‘Û¶M¤†Êr®ëúžÀÔ 7ŒlðA>ö®Ë[ÝA¨‰ÝÏä5OŸ×¾fo8ÂPõd¾ÖÉ·Ðö,X»v-k×®mu[¦dõI'µº “è$ç–f:¯Žcšêâäó¼©X•áox?¹æàëìÞ½˜åË—sà€A<'íy<ñgƾ‡æÿÜt™L†l6‹iš8ŽãÚlÛ&“É4DˆƒRo¸jïçrêãêS:lØp.×\#.qHxâï8ù|×u±m›B¡€ã8lÚt'££?á›ßü>¿øÅ —]6ÈCrèÐë¸ú껹çžåä“&SevÇÇÏæ¤“^dëÖüñ¼øâ"¶m»œt:I4šô=/- 2™ˆ/çr궉F•¥R*üÙuÕ­¤mO¿fY*ºÇ0 n½uýý`šïçË_†B¡§&#|O[–%9℺ß?ÕÐ^–®ë–„Yë܆®ëúïéÿÚcS/ÂTêãõ¹S©”¿e™L†7½éM­¾4 ǹñe¸\¼Bg°äcÏq‘wÊ´Û…¾ê"EIÎa¡³©š#nË–-“^[³fMKùì'WrßëÆçÝv®iFXy=Ÿ‹&ô8C}žqù<üâïnÚõi&/½pˆ—_~U};ePKWEÕÄÏÏEÐyE¥{ä]ïºè|—8ÏSå`,¡dùo|ãî¼s˜‡Ê044 6òyÞ18ÈÙ##‹‰k-K©y‘ˆR{r€UtÇ:èÂ×]¸&$(‹Å = öMïu¹ìi•¯ì¼gσÏ/$`O òð’¿S¼a,ÀvaCñV±ÒôhT½¾b ©ó¤Ó¥ÕðdlòÅPÇÑtÑ(òž÷ðå]»(xð¾÷Áé§?Æ=÷Ã^ ƒ3þû¿!“áó;wò?ÆÇùÜõ×sÖÞ@>Ÿg×'>ÁÛ>óÆwì(žÎ«/Ño¦Óót•ºðv–]]‡çÀ„zÐ^lÚ6<Ï#‰Éd°,Ë·—üÇœþùx^Ž—^ºšsÎYÇ¡CÇsæ™ÇpÕUÌ’%qä‘÷óÍu×=M>?Æ7¿¹Š¾¾wrùåGcÛðÕ¯þŽ£lò”—ÚýW©ùÇbÑhÃPÛi±­PP‚›m+OZÇQ·’iªÛÎóÁMw ýýþmäßvZ ‹ÅÔ¶‘H×UÇ}šŠFÜCÂüÃqß6´7§Î_˜Ëå( %÷T.—óÅ5Çqü܃€/j1.‰ËåH§Ó~˜¶çy~.D-ŒkÏçT*å·Kïç8¡|S®ëòÈ#´ú²5œ_Œ¾ÄâÅë[Ý A¨¸ê¸ãJ_ìëScI½M1UIÍB\Œ "ªÎž­mW¡½ñ…¸-[¶pà 7°qãFÖ¬YÃúõëY³f [¶l¡««‹H$Ò2!nÿ3Çð§ÿ|f«¯UÇÓ ñ¼žüx:T/N/._G»"ض­3W®÷={,+WΠѬƒÜ’Ê%¨ ç"+"mN3ò2¶Ž£&Ô×_?FOÏxþù Xºômd2ø§In¾ù¯±_óUUÖ¶ý8GÓ0J%€š}k¡MÛ½i‚‡oyJˆÓBÙÐP1N²¸müî³#<ÖõzÎŽþ(Îüß? :žòEœ¨£^Ô³úòɸVž C‰kåjU¡B¹*­" ¥]($}ѯÍc1ú]—IEË#¥(Äãñ›ß0rÓMÄžx‚üOð‡»wsôÇ?Îy‡ñ­“NÂqœš¼¦cºq a(O¤òÁnxŒ­â×r2™Œ/†ÁϾŸ½{/gÅŠÏpÇ£¼ç=ÿÀßýÝYœ{î/yà5¬^}?+Wî¦P"•R‹\¦©„1¸Ý¿”c§I2 Éd`ÓሚXL‰iå·3P"”W; ßRa3íRòܲ*Ûj4üA ë;NPm]´6ðsêÇÉdÒ¯üJºz;-hG"lÛö‹~D"úûûKR)är9¢Ñ(‘HÄÏšJ¥ŠB´âb±ñxœd2I6[y­î7Õ®¾¾>ßKN uCCCôõõ•zjyá…Z}‰ÊþåÅñqœ±1ò‡!òÿ€ñÖ·b=¨hk£¯ø <(Ñ+~'Z°$UöŸ@Ý:w\¶øÜ3ýä(Oï{œ³ Cm—Eu0•:¾ð¬~ª#$MÏê§"™,uש‚iš¬_?ÎòåË9sûöàHÄ÷¾; ð>ÿyÌL#—ã«ÿð¬¼ùf>üêWðá»ïf×-· FÑc"c¼åU¯âͯyMmî@5L*FGÛÎÍStº’h,#‰Çq‡k®åСG9ýôï³jÕùìÙó—8p7§öÇ|ìc«Ø¿ÿ­|üã]ìÛ·‚þçcøÔ§ ™\뎔„Õ¥Ó•³¦û¾#‘ ân³™ÊœËÅ»r¡Ð²T[‹‘′k}Ìíáû°ÁˆÏ-:Ôð‹xèçÚkMÑUuµWŠö@‹Çã~‘©ÎÓÓÓãïF'M¬+=¯ÕF €º:p˜± ù†#‘?øÁšzmçr\á™ã„ÿG:ý®¦~.aaÑL;v]8tÔQ¥/Ö2~¬=':žEƒƒƒD"6lØ@WWWÉ]]]¤Ói6mÚÄøøxŃŒsõÕWÐÛÛ‹mÛ$CËŸ©TŠÑÑQz{{I¥R¾»v­Þqtc?µ‹R|f‰è!õ'È7©T’Î]Af%<–,ižG\3íxé¾}õ qVñÂD Š3èEÐÉBóÉ2÷qSÙñlûârî¾û—<üð?1<üs"‘ÙBšmÇÏ1®%yûí人Hvw“|ଋ/&öÖ·ÒO /÷©ÿ[,uQPƒ\‡ýŰT3t1ûQ÷ƒ®cÀ G<Î9‹‰laö«Ú3®ÖAT JU$áW¿ZÆâ½{é-W1BûëÓx<Îï._ÎQo|#Ùl–l6‹›Lò÷<@OO [*&ÐrR)–~êSÚÐ ´7W4{LÑÎär9î¸c”K.ù[R©o}ëÇxÝë~ŸûîËðÉO^i`Û6ŸûÜr.½ô†††xÿû¿Ê\›ÞôWüü竈Fáê«{ùÙϺyúéc(qª’¸‰Ì¿Ü•šlVÝÆ¶̇"‘ ðH&§žzjSνíx.Èår¤R)‰™L†X,F"‘ ¿¿Ÿ¾¾>zzzü÷òù<étš‰‰ FFF( LLLøÇÒ᤺`Èt9à Ã`llŒB¡@¡P¨É»EWÒ®•t:]âUÚJær\‘G-Ðýøo¸ê-?“ðq¡a4ÛŽ]ŽêÞ_úb!N÷]I1}!†(2œ',žäñV‚»ººèííexx¸¢B<<9\ˆýJ[õÅ &‹‘ÍféëëchhÈŸd&6½½£|úÓ)<Ïâƒ<‡;–²gÏg9ñÄE\=¼úÕ{¸à‚cýÉüºuê˜Ñ¨ç'¥çÒßY£ã; ÓœÆq`×®XSÎ=Ÿí¸™8Žƒëº~.5Ó4Kò§é{FW™ŽÇã~©®ôöTÓÞ¤•Â9ÇÆÆX±b…úÙNÔ+@íß¿¿®íëa®Æ ~Ò àÅ_,&{Ÿ$ÂG³íx°è˜—Ô ¶­þB¡è©Tjꊩ&¥á4•ô ù™˜7,8þøã'½¡QSÍ`ÕªUlܸѾ{÷nÿñ–-[JD¾5kÖpà 7ÔÜ@8úØýÀñ5ïS`ö—™á1t¸S;ÐáɧÂs:õ=ë~GÿwÂXA­è¿ðÂOw7¥MͲc8tèPmHx»¹Å?›ŽàtÕ¯J+¾zÒ‹ÅðÃGjøÖR¥¬Ö6N5¸mÔyZI5;žm_\ŽëÂÑG…ëºtô£ä) ëGÅßÿÛ¿A>?ÉlÓ¨î7CHlÿ,ô_éûÀy\u±‰ßøˆ|zÞ #?³õ¦CÐ)h¯Qàü;ïœ[µh†X–ű‡du÷œwä³³:@¢¨¾¸¦Ée/¾È}+VðÝ\.èåóJ5àgŸù ¿üå/¹äàÁºÂ%æz~ÚÌ1Å\£û”D"á‡Êé¾2“ÉËåxõ«¿Í¯~±Øs `š¦iáy°i“òÚRbÒ²âßdLS}¥¶=½Ûiª°ÖsÏ®høñç“Ï]•×0ŒAL{¢é\h‰D¿?ô½£ßÓùÓt!…Çñ+ôï{¹wZ6›òw¿Ý¸™`YÖ”ó´Ù2Wã P?éYÛæÙ7? \ÝÔë&,,šmÇ÷ޱdl œ~ÊòÜ ÓzÆ–œ=:z¾/LÍbPƒƒ¾B\ÎððpE¯9Mww7ÝÝ*‘æèè(Éd’uÅ¥ÚÝ»w³jÕª’m§cûöí¬_¿^µ§·—E‡T-Gà†¡•Ÿj¿Í:™Y¸2¥]e;£ø7Ûĉ©âþfñ\åƒé<êf4>&'¨Óªy-7¬V»Zäñnb„@Õ ×úRÅ׆‡‡Ù´i¿þõ¶¦µ©Ñv¼mÛ6Ö¯_ÏÚÍ›9î‚_½ò¼é¡“åi·c½4X½ª¢;ôL&C2™$‘HÇýÁ¥®ÌÓhI'.ÖùW€ÂyX?1±ôŠ·þŸN§ý}t‚c½BmÛvÉ`[£«…WÃËуùL&ãç[Ñûh4J"‘(É¿ÒßßÏHY¸ ã8“BŠfжãfæ$ªfÇ3±á§žzŠüà OZœQ"A7'¼þõì}ÿûýz"Éßû=¥T±·$êþ·@Ùz¦²ž†hx—2ÿ>À¾"!u- „;ýUGÀ9©x¬r×õ¶ß}‡»êðaæz|¶Óp)Ã4M¿*æmò'|ó›ßäþb ®QD -‹¿J¥àÌ3Y{ÆœeÛÊ“®¯O©<ž¸VMÃàà ßúÖ·X±bES®Q£ûâG}”{ï½—7¾ñUÇ9ÍÀu]zzzxÓ›þÇĶ3~"öd2ÉÕWßΙg>ζm08ø—^z}ÉþÚTju^¬µR¨°~ýzÆÇǹóÎ;ž£­YcãH$R’k®q‡\.ç{¶¹®ëWÒv‡þþþ’>Qÿö†áçiÓÕõka!/ü¸ZôZ<Ë´H؉èqÅÞ½{›vŽFŽ+ —+ 8²m®¾ºýÖ„Æ188ˆmÛ7>üqÅãÛ÷°déRÈ}!(ó]Æ´ÂŒ` ,t›6mâÞ{ï­y|¼`íÚµþDóºë®+1@Û¶¹á†X·nݤüqatòÃÁÁA6nÜ8+·äSO=ÕïœG“7þÀ{gwe ‘¬€š¥%P3µrñÌEÝñâvºX¥¤Ä Ô¬1ŽÂÂ1–å÷Ÿ:¾N4ÞFç­‹;‚šÙÚÅ6ëpÅAÕ”lñxá|Iº-Õbȵw vKÑUê¡|V~] ˜P5†ÝŸˆS2§öÿk!Nk‹ïìí%ºy3gþïÿ]gC룑v|î¹ç²yóflàØ•£œyæ[¦ßI'ÉÓ×vŠ XyÛtõ/-zéUd]±«âé¦ñ«„ÑÞoù|ÞOTìº.©TÊ4ëêcz°¬CM"‘ˆß~×uñ<ÏÅtŽ=p×Èô€>›ÍúÇП+“É`š&ñxœT*…a¸®K2™$ŸÏOʳ’H$0MSåÛ*®ÔÇãqFFFJ®›þ_)¤e&ôöö²yófÖ¯_ßãU£’oÚ´©îãœvÚi\vÙe“Ä raõ‡ïægþOþôž{Hÿä'Ê\kP "9‚@fˆ¢nþéb×4郊@Üx—@ô®¢Çl<Ø5‹ºeúŠÿM‚BÃáôsxæyýŽnŽèŒÐ¶y‚îW§jÌôOá[U|Õž¾ÕÖf¾õàqÌÉ¿äŒY~×ú^‰F£°|9'¾á `ÛxžG*•RýAqà§èÕ™ c##Ê…*U"œe©ÜhT½ž­îþ½víZº»»ÙÚIJ©ì‹Ï9眦'Ã{ÓöôÀþýWñž÷\Äõ׉›nz/ïz×1Ü|ó\7ðZ[µ*¢Óûq饳5ë)Â&(6oÞLoo/«W¯nÊñ›56nWÂ÷®üiš&CCC~åÑD"a8ŽS26Ðã†Vµ{Êp±6F+n»í¶¦ž§Qã ÆÇåèáïí·¿‰[o]U÷q…ÎeíÚµ¬]»¶£ÇÇ<~¯XáÂ+ŸjrŒð(®jL–E“î6lØÀêÕ«k/•nóæÍ¤R)®¼òJz{{éêêbxxP7Çt«Èëׯ§··—Ûo¿½D°ÓÞvŸ]+?ÝWg‚ûjä(5d-\3¯Jð2Q³ºêè+nï¢f[:dPW-Ñ3/«x (-êÌÜt˜¡‘ÕI“ Åým‚r¬j¨oJ=ÈÎP*Æéóê m¬øy¼b;tÛ4Ú›N»@jeÞ/\Á3UŸÇu]þçÿò‘OñòË«øÒ—Ô6º‹H&ƒ¨íD» mL§Ùq­¤R)?Ì4—Ë122B*•òSE†á§Ê§ÏåùV(ÚNd©'gm»òþ÷¿¿iÇž‹q¨ß½W¾üïz×Ï€ËçèÊ …fÛñƒGÅ[ÇÆª¾?mŠí8G-JëŸ^Íæ‹Ë_èîî®;÷„ïÊÄÇ¡»»›k¯½–«¯¾š5kÖ`Û6·ÞzkÍÇ>0~$Kv¿¢¾O¥ÃEÃá›c«„Ú>\$ì eD,-¢é™¾QFP³¤§]‚ÀNO@µˆGÙ9õ½oTÍ(Û&L¹Zî;´_ÁL¯lrë_‡‹Y¡}u»)¶Ý ]/ý\çÇs‹Ç‰—íCñZ”…Ÿù3[³xÚ£À¸Œÿ Îß'CÄãX0óçpðÜç3볇i–;ÀÊwþøÝé7žBÇÐk€¿²œJ¥üJba¯­t:=I, ÿ¨„?£.  E¹l6ë{Ðèc„s¿éí§£<ÿZ#ÐùãʽË™ªÕ„Äðö–e‰DJBl´' ~7óžž’ɤƪ'!­`*;žM_æ.¼p'OþýßÓõ‰O(afºILØ“Øa²j¦M¥M³Ò7¯×EjY8œÞzK[Ë1µWÞüH…ã}÷†µô=ú(—ß{/_¼òJ=(Âä´Ÿ:;vDΟ—õ´Mêû#“ÉP(ˆÅb•ÃÕÂr1ÿãÎÇYmÛä\— ¿ÿ}Vy$Ø6g/_Î/úû븒µÓÌ1E£ÐbýWìeÅŠ•˜fÑÑÝþ}_ƒi.Ÿ´_8 sD'Øq=èÔaï1Ã0X±bÉd’d2é'"ÏçóUÝÚQðjeXl'0ã P¿y'}ï¬]{a«?²0i¶?ûOK9óÄ‹*¾7UA»Ièù¿^Ý '<æsÄŽ;&î½÷Þ‰_|qÚm?ô¡ùßúì³ïýò—ë;Ydbb"Y|lÿÇ'&&&&&¬âûšÂÄÄD´ø8911Añ5ý\';1116111211a5RG›êÝ¾Ñ …>K›22¡.ñÄ„úŠôןP—b¢Ô6ZÁLì8911qÑõ×ÏøœcccCCCCCCñx|ÆÇJ&'ÁÐÐЬ1W L ´´ š¡¡!¿###ccc###þ5«ºo+í¸¾÷Þ{'n¼ñÆI¯''&&N¹æš‰H$2í1Jv ÷§É Õ—6è«Ô}F}Ö<5Ù‰ÒŸ“FÑûÑG&Ž?éʼn÷?ôÐÄD±ÍÕ~Ì õ³5QlGdBõ‰ÿk´ NLLL …‰h4ê¿>444aÆÄÀÀÀÄÄÄDÍ÷P4ø?'4±ÿª«& ØÈZÖÄDÑÆÝTjâ{üÇ ¾2µSßxã÷Þ{oÃÎýÕ¯~g¢+WÞ<ÑÝýÌD:]˜¸êªý—_¾§e×C˜9¶z˜éØx®Ñ}J$™Èf³Ùlv"Nd³Ù‰t:ݲv5‚±±±)¯;…NWTkëØ„ú½[¾üK-ûBëé;®4>^|ɉüýߊÛ×ÔWê±±&^ük¥~ ÔMµ¹S%Ï^Ê«p¥’zØsÔQœñøãõŸP{n…½»´·WØ@{ÍéÐÓ8¥Yº5ÚQÀ rцéhu˜°§[›R.ö‡CÂÚer&vü²/=[ã>:,š Y&“Á0 ß .;Eõé¨äj­=ëÊÃ5«1›ÐÒÙ¢½þUŸ/9o'¡Câ¡öp³våß{ïWL½Q?jFB‡€„¿±XlVm©$´éÜ(ŸÆó<ÿ^i5•IÇqüë™L&‰D"þ=˜N§éééi›öφ‡½+WÖ¶q87¢ŽÖÕy2uˆ{(¯ŒÚtÍ›FO7Ÿÿĉmzœ=>>í¶:%¨nÎ^,¾ç¢.q ¥mÚú¹iV½:Ïa<÷óE& ‰„ÿÔ××ç‡YfGFüpõ…ÌÆOrñÅ{¹ÿþùÑNâ›ß<Ã5FK#{…6%ÜQÍù¼ŸŸ×êVµ z\™ÉdˆÅbôõõaÛ6–eULy!4gy*:¿¶DÅ´ûsÿ͹çîmuS¡nlàhw–£Ð4¥cb“ʉ‰…yCÛ q/<¿ èsYHg´.A¸m×mì+V_Õ↞x8ŽÃÝwò/—ü 'C"‘ð'&•D,=Ñv]wÚ u,ó·Ëd28ŽC__Ÿ_QJO† â ÃÀ0 òù¼_q*™Lú¢mÛ¸®K¡PÀ0 úúúüóèĸ‹üÊq]ן\UûÌ­ Ö¼LÀè±ÇòŠãŸz#=£100€UÌÓÌÚ®a;lw<Ïkë¶êDøú>† Ï0M“‘‘‘±¼SY|Þ~.ß»W•œ.¬áèüÿ tl†`o¡r±Uû ÷ŸäÑèß½­¦ëÀ6Ž<î]t¶m—T;jÅK¨‹9h'Ãê²÷‘Ѳ,†††ˆD"äóyúûû1M“h4Šã8d2¢Ñ(ñxÜ/ø ½ìÚùþkžç‘Ëåøç>@&sO&“áÿü~´3úÊvBW6÷!à<0îÏóT ¯B~Y Þ×Áü[0ßZÜ÷gàà¹à¼ "ÝÀFpÞ ¦ Æ}`ÿ;˜ï÷;`ýŒ¯€ý:µ¿q'ç€û>`%°œ`-o%»à'…=üÃV_¥Ö¢+ÿBP¬¨•i(Úš ww%tÕž™Ÿã(¡¸áðµþ†¶8Õƒ{wæCZÝêæÂŒxú{çCrò<Í4ÍÚÆMáT.‚ í…8€SNÙW×ö©T ˲¸{ûÝœõ†³8à 'ÖÃÀÀÉdÓ4I¥R%eÕuþ-&9Zx‹F£¾œ˜„ÃøÇÁ²¬²°GŒ®¶Xkµ”©*M–çÆªT}ª|ÿh4ê—›F£%a&| ôg*Ϧ…L-ÚMÉ{¥SüÍöÜw§-y©ú6%É­´HÜ ¡¢Bu¢Ü|Â.Û²…£¶o‡é„a5ÂÖB\å¢wk`6]q4—Ëù•C!èËôãT*å‡z†A"‘ð5<Ïó'¤~®ûïh4ê÷ãú¹ö"Jrêï_Û€Sv]î{K–°âóŸÇøÇôC4ºOÕž”¹\ÎïŸu5Ôpe^ýÙWíßO×Îd‹9NÂó³>”wþ7¾ýþ÷c£tÿ2>ÜfPHÝÅãqzzzŒWœëº8ŽÃÇ?~)?ú‘ÕÒj:bIû ºCJ­ ¸Ï†¾Ð1´cšÓÖéwË£½‡À; ¼®ÀöòÅÿÞA`Œap‹•ГT¶' ¹ßëàBpÇ!ýy°?X¬¼ øØW¯ãIp– ,÷t0/çAà˜}EáoCifH.µy9,ýåáÕ¯nÅ×ÚrÇ!‹ùÞ®étºD˜*`3µ—gê*‡úÆ _b eƒ®*®·ÕÇé!(—-î“(>îðÉ»×N:éI@BÅ[Š.Ó¥ž™6ê‹Ò«ÚsS<¶uyVôŽV}¿&Ç–ù2ñj¦­…88¼]^Y©TŠ wl ;­&"z²T(J<¦ZéÓ^kaaªÚ ÄuÝï…jÛi³Z˜­Z¥ýË?ït^Eú}|>•J144äOTÓé´?YÓœïaØ€UÔS{æwà쳋¹ \W‰a[(þèiÀó¼–z# £RJ¥&y¼v ® ÿøÁÃüḲ¡Ø@ÝÌ:4,]|M»®Í « P ®g4õ½„A‰ÙÚ-—Ëaš&Éd’T*E6›%—Ëù 1þ=gFIÿ¥ÏöB®Ôw–ÏSÝ»žçÑ÷šïò“Mâ ûŠ)?k,óCHA-éÏZ)_Û{&&xxÿþŠÇŠ<ðoÚ±ƒdñë° ÂV=”(bR:?ŒF£%‹ñxœL&ßüÉŸÔÿåuž§þ>üáíÜ}÷÷X¾üX,ëŠÙ¸Ò¹Ò=éß½\è=='Òãs]@DkØQ”ƒN¸°Hx¥Ñž‘z^oQªŠçÑß¹¶]»J {Zäs€èVð¾ñëÀü-1™Å¢àÝ™gaà°z7M-^Ñ¥ø‚paEQo0 ÷¤áLTèµy.pnèÜQü+§¸®xÀ*BI¥^ëÙ_lÊwÛîèÄZ|ƒÙ?;¥BL³®˜S —Ê7¡&C`£9”‘ëÎ8ŽºYã¨û*O¨ÝC‰qZ÷PŽr(ºw´úâÍœ pÌí;8ë, Kºÿ ÿ èþêçPví¡ìE/êˆP6ç†öÕûé3tlŠÇ‰T’Ò¶›#°eŠÛèD¶…â±SÅí´¨Wü ç>îì;‹cž{ªŒû'ÍÝâõÓùE ãu¡~ÚZˆ8´í8LszËu]žùÙ3˜¦Iww÷$£¯Ç»¨VÁ,“QIjñkï&=­Ó4I§Óäóyb±“:í%§C{ÁT‹‰Æ‹†ÁÙ+ÎVOr9Èç•—N«xœ£M81È¿¤CR…ùI<ç¯ÿú¯[ÝŒñ3žûñ‘¬:¹Ù[§ßA»ÅÄQˆ 2¢—ëº%©Òé´ïèÿZxƒÀ[8܇…½—k9çlÛî8WÝw‹2™Š¶0zQHOŽ …‚ÿ8Nû˹\Žt:͵‹qB•‰tÈmÝJ<™TÛÇã~Ȫ›çQãîbzU²®O<'‘Hðï|‡W½êU ¹.íÆ'>±‹lv9+W>Áw¾óþY}ÿZXÓsw-~iÇœ[òÅדãr=G×^ná0OMù¯nXÈ3]pÍ £>nø\ú¸º-ŒƒÕU<@ ¿Bˆ‘‡H8 8ømèDQe0.´ Œ¢²PÂép7PÉg8~ú,¿Àönµ5©TŠ\.WµjrÇ¢]7gú‘œiöÕ^ÞyJo¼pˆ¨î@+µMwZ@ÓD ÕájN[ß,+nV­ªCP½'oyä-s}µ‚vª:éå¯óÞ^ÖêæÌí.\ 6ÔONÚWwîºßÕ«$zÅ$zÞ² -|é•xñ¸-ze…â{áŽ?z¿|Œ–fêq[R/.e«zÁÕ!èû˧ÊÉâ±ó+.ì¼ygÓ¿¦fqôÑGWŒ©ÚÇjR‹q∼àh{!nñ¿N™v»mßÛÆë6¿ŽSþãË!â5$¦ .t:wÑL&ZÔ¬f—Éd|/ž¥Cƒu¥þþþ)Ãm«1_††'.{žã?¤ž„…ÊLFuØù¼µà –eA*¥Ü"õßqÔ¶º3DÔ_£p]uN€õ\ ¦ ¶­ko¾V‡­xžº&‘ˆj[;´©FLÓ¤«««Õ͘î¾}\tÙ6N>e?ÚôgÕ躮ï!¦´wœû¬Ö×´“°íº.´mÝt÷e2\TÇ}\>˜Ó!¸¾çßëÏ8ÿžç‹ÅÈf³%ùõâIÄ4ýÅhÒTê9e?“ véþþ_þå_楗JÁ¿ÿû!–/7¹òÊ~"‘úòáio5-hj] üë© ¼Ðó©[H-½\\Oè\”wNŠÑŒºÕž‘â˨‰S 5ÁJÌ’3ntz"§©¶Ù*AL¼ ª¢óVjáy%šԻÌΦ»É¢¡mtÒM;ôZRå=z/\$zÍdêòÜÚsɤr¡†øéúŸòGüQ³®lÓÐÝ•óØ÷y÷»ohus¦o,T¶‘je+‰²C½Â¥½Ã’Åý”æËÕâmŒ Ì8B9'ø‘H¡ìË Ô“ªRþѱª¼þ!š :êA? wZ4¦Ø·¼&ììL!îYŽš˜¨ø^Å1¨IðÛ+>Öޮ‚¡­…8Xú»àÓn{ù¹—Ã0Œÿá8î îœx uŠA’û™qS}Îp.&}l‡N wá‰ñB̶ô~ÃÙ£ÅÐTí Æ×p±CÉÚI§•fÛ+ÏuA¬|ðì8‹©ã븪t:xœL–®Ò¸.$°¦E>Ý6Ó„hTO‡ÓêJ޶­°òÏÑÓœÃqT»õ~Ѩz^¾½iª÷-«´§nù>¹œ:VOڧ蕊eMâr95«ö<õžç©óékRê8ø©ÃþÂ×9•š|ÿqÇKüúÈqFW®¬m½Ò^f¡óræóy¿8M<'™L–äNJ’ß6«ªp£I§ÓŒÞqëÎ?Ÿ“/š]µÆpè领aP(üâ/:=CØs¹|üÖX´CT2TxÄFyx¯[·Ž·¿ýí­¾œ å‹_|Œ[n9–“Nz+##Ó+Æ:$‚ù¿Öº´ç ®q¥^£!=‰Ê´·…ž¨xú艷  ˜È›fé<—SÛ™f© a@R }Â4Õ9A áO{6jûNèuÎ1ü»¦ 7­t¬pg£C§ô¤0œ§o¡°oß¾êhñGÞŽ«¥¯’qùu¯$6…ßQ÷†amúZçó¥¢–i‚žã(û-¦ªyÝUKԞͪ÷ôy´HVÍr&yÒòù@œœ*¡-GFj»Ïl[ “l_.6Zôõ©ïE‹‡ú³ê{^‹xZÈë/½ZYö¶Cx_ü –õ®©7ÔŒ:Ñ!îºèìµðˬD'yw¼â©§& ê BW|Íçó~ÿ­…´pa¡éDÒ¨¹æïíÙÃÝþ0WìÝË=LJônØÀÏþs6lØÐêKÙ0¾öµ§yöÙÏLV.²Îß.`à æXá ¶ G‹jåÕûPƒw}¿Éþ¸Ié„K¯Ä—ǹ†+=tÎíP@ õ;áÏkn½uÛ¾•;îÈ—„¸w:do* qªß.AÕ7³ö@Ò÷Sž@@I1ù¾ÐC§l_½v'{”ÚÌþ¾jÿõ¦ºp€mßÝÇ9se“z;ÝéÂE7•q²¨>F¿¯û`=,ÑÞÇZh+_íÒçÑe^qØôè9í¿€R!Nk'͵=EÄ„G[ q»\ØýäÙ¥/¦R˜‹eñþË/'óe‡äÃOq”^’ÖD£ê¯?T \ a:VOñõ•…´pn§JBX¤™ÎÓiâ§žôcÞöêC?¸öú5P‚™Î¯¦C;ËÉT¦Ñ¾þîÂëY‚°ËJkÊ(]-tG.Ÿàüó›ôzÕù]¥¾IXp´µ÷£_ìæà“gÞ'PÕ &9]…É"‘©=‹*¡·/Ÿ¬ho¢LFMܧìUϦB{Õê5T.â…szåóP¼ü÷tûÂd¹' ´—U^>ºÀƒúØ•«ÔF"‘APç7 Ou%ÖNö sc÷ì ®]˜#àð…‡YvÓ²ŽþŒsJ¥ÐÞFî¦ð #ÈKFßå¿ã”>Ïç!‘àÜ·tfRåÿÞvrüiÓÛë49Gtˆºçy~¨ÔÀÀ€>?ß„·IXÖdjÕ †QÆkÛ¶Ÿ3Êq2™ ÙlÛ¶ÉF£0:ÊGó•¿Ç~u迺ä’V_Ɇ ut×uI&“þBµÎíâ­on¥Å…»,=ÁÓUãÔ¤N'lÖû銊¹âcΤ±ÜbåF,<ŒPë§4·¨Ž$ОÐfÙ¶ù¼ê¿ô½§ußÞ^Gd206Vš’`RøD„Ãáÿå AzÁµ\xÓÏJÏcÛjaàh Žs•x¢÷uÕÎòs '}à‰'Øvç³¶£vÃó<¾ùÍÇX¿~5½½?á_þeM«›4=:œ³šX¥+@–£Åh't-¨é £ZÌÖ"Š6;-¾TZd ¾õùgÂüYk Gøçw^ã¨mD{'BпêðdfªC-/7]< "² e<÷Ó¥ìÚµ¢öæaf¡~ÚZˆ;úôÃ\rÒúŠ"–ã8x17é·âS¯î6cETfm»ñÇwœú¼”Ê {?„W÷µçœeM4G"“…¸T*1õ€^Þtºô<™L îéœ_®[*œ¸.QíÕWTÛÅJ¡ù|žH$âçˆÒÅ:Ý[îuøzé ü÷~ä^¾›ÿnÕP;¡ ‰Ç«‹Ñåý@ùó|’I.úÚ×Zý)f„÷w'rÉßÔPhÂF…å”åÂщÂMÓôŶ<ÏÃ4ÍÎ —š m"6jo8-®š¦I6›-ënìîö·ÏårD"R/¼ÀÖ‡‚K/mõG˜5Ÿüä(>ø –eùÎgz>¦SþL9ÿö<èKÀÿÃ%V(Ýðÿ®Þ¢QµnO¥{^/ÒéüžÓyÂE"ÐvP9_v):+ðµM›X=;jKR©·ÝÖÃùç?ÆW¿z.–u!ýý“³¦Ì)•Êþ–c3õ çUØ>Fà™4BVªC¸õM “CL“Ó´g®†gzÌ ¥÷u8çnù¶úÞíàÅÜŸºðì-£Dîÿè줓šj1Ö¤4A§þ¾ã¢›.”!È(¢©0ž»ûèŠB\¿ŸDÊik!nÉ¿å÷þgÕƒa¬:noþÚ›Õ@vªÁ—ÎÝÖŒ"ÍH^OX]=d2jp]Ë„7ì%¤Ãgõ½æÂxž:¾HT,t®Pž½7íØÁEçÇŠb"aÛ¶KÂUûûûI&“Û‰p •¯wÞl½™7óæV7Q¨‡j“ÇZ(ÞOÿöä“t¢OÜѬmCÝ5„g;ŽƒmÛ~¨ä‚Ý*Ñfâ»î_§óv4‹)6nÜÈUWµarí0>>È]w}kD ^åÎgG3› D0½@”HÀ7òp¤צàlàõixo¤èM“ÉÁ¢(tð½¼5n$È!H*áÍqàââom4xÇCð»9QƒÒdYµMt¾ÛjÇ©´}<®~óõÂ^¸ªwù¶åã®é~˧[ИŠ4A’ôÌ<ŽmÛŒEøêW/Ʋ”˜®õÛš {RÎVìq]x› ›M¸¦|q²xž‡QÞla‡ág øߎ•|mà6W尹ˆˆ7:1 Uy ÒAòü(ðCùÉ"–e)Q9›-]¼X±"°Y× Þ× Óñx©çó7*ie”÷ÿ}}êèc‡½Yõõ _ÿbjž°þ=ýô쾟ñ¸G<Ûo8])G‡OE8œ‚|‚0C^õêí|ä¾{–¼>00Py\ÕCå°faAÑÖBÜSO/åd׃*{D™ž —¸•O…þÁ\èLB[ò}* šz`áy¥!.a´g£ŽŽ+N0\×UáT¹ Ç!‰ð¯ù—¸™ ;ß5M‚ø6Ä\ ×—…Åàà`u\’aÁ⺼æFaí»ì\»“¿Ïü=étÚw”0l&{7t:}À5×\Ãøøx«›Ó¶ÞÊ;îÿ/ŸzªŠ^Ò¿az"¬'¼+Va•á‚7¯M‡âððÝ„át|«®þìÿ†V` àRK_àÒ÷ÂY½i5šäÚ íý´€QÃw’LÆY½úl.½4ðhõµYÛ<%uq'-Þ–Û˜.fT>¬*¬Ã‘õ±µà•ÏÃÁü³ ×”E¹èÐè%¨¼‡·ÿíÀñ–Rj,«X@ u å].ö›ž€ÝgÁÛ^„{~çN<³ŽÜ…ºpBžR!£\\„9ä¸7îá”îñŸ»Åh¯ª9•;s(4˜¶âvß´ˆží®8ðt‡øŸ>ÿÓ,I/™Þ «U€\hÌÅ ^¯.NE•r¢¹Ó&>tì±àºœêºœjðä“­¸b³Âv,‡ÈE%¯ïùÎnþòÍdŽËTÍã$íÄwþûqŽxv?æ»^[yÄ:”&ïȉ™7Éçóbçaæ7à¹çžËÖ­[[ÝŒYsÉGö±}íÿæÙ _¤ÒùÏÜêV Íà_¿ýíàG4”¯ã5U„ÙvåÀÁƒœwü£À¥oX°ì’e,3–µº‰‚P…G^Ų£ŸÎ¯¼K2Q…úúú©µò³ Ì!9à+Ïdõª­ü,‘V›žÀE£“½ºµ"‘BM›VfÃÛ¯y‰ç·Œsçºu¬ùÒ—”€P–'Ð]ÒËÓ[Á¸4貊ÍKÁ8½XHÒ÷Hˆ\¨œ>#GC…ŽVbu˜J]±"¥e!ò8AÅÊUü¨*žZߦåhM^s„5úòí´NãºÁ:²®ƒ’ËM.ìyAJeÛVÛD"ê5Ç j¦iF]€8™TÛèlgŸÝê«T?÷zÀK£JÈ WX.¯Öì¢lÆ!È­©CõÃ9ýl[ýõõ©ý“ ¨¼Ή­+Lg³¥bœõÖÅí*åTlpT–.~¥#‰R©Ñh´î|ÚžçẮ¿hê8ŽN™H$üjö©Tª¢¸ÔßßïólÛÆ4M Ã()´ç8Žÿ¼PLQ亮ÿz&“Áó¼Î.\7~dɸÁqœ©¿ ÑrÚXˆó€Eœ±frãvœñÕã¢&ÏcæÓJɾ%K&½–rRDŒHõÜ‚Ðf¼|ôÑìÜõ ð{•7ÐBEîüù\a\!"œÐ¶d€%ß{Ž[ÎÿZi!¢Jž¹âª Ÿ ´!?yð(>÷ûò¦#ކÛn+ÿlF]øA~úE]ù&”­g‹/ödºÆTs|]fX‹q „¸*—W.´m÷‘ÖBtêF=t ë6:t8ýÿuµX-\%“E'«âs}­ëyj-¢é4vñx ²iµôÿrñN[×aˆDÔã°ƒ•.F–dY¥uÂ瀛g[ÍÂq`ñÁ{±¬„ú"l[å’ö«Æ74 \§šè4DÕL&0¤ð…ž®‘ÑèÔBšàôc]zªß•TªtÃÀõ<ÌH ƒ¿¹ñF®¹î:LÓ¤§§G§ðN¹æ~gÿ~ÿ¼éHB†ÎËX íþùÏyÅ+^ÁIãã ïßÏS»wóŽ³ÏÆH&Éd2~ûчòÕÞO¼ø"+¿þuÈçUTxñØ=== aC¡|…a!Í }®J›a¼ò«_Ç ¼V3NêÐܳ†áqFèëM‘Íf+&“ ¡HÛ q°èÀ––Å[YÃ3`-¢( mÍoŽ>šçN;­ôÅüùïý9Ë.o8¡s]É‹®¾ ¹hŽ|.iš\Áµ\æˆìßÏOÉY‹~ S Æ©âd¼!´%)àö?ÛÅgËοùXªb5ó(ÓÍn1áa·¬dH%rx¡ÿÊæ{P¡×Fñ=í>Ö$bŤ â9ób›>™Î’b= [é¶b™ŒÒb´#S*¥¶Ñb–®æ§i4';é×u}Ï›¬Ÿhg+v€Êy¥<Ít-i§Ë·©Tœ¸üåsöò¢ÃSÕIëD^ýª—YúÚŸ‰É_ êe¢Å¿F…œk5WWª…@}Õj­&— *fOE…tlÛÆ0 ¬xœûn»ûî»;·ne§ëbE¼wõ÷û"Ž¿¨‹ñÉ}ûXü•¯À×¾ÆáB„Ŧý¹Ùl˲øüÈk9†+®¸‚ŧŸÎÁeËàï¤×qè µ]çð5MóÚkýjÝg¼ýíAq"}3®XÁHH,5´°ÙßOñ ¥×:UûJ ´ù<—\pA©Q;NG qyphÑ¢’× eŸ·(RXð´­°û±.ÎþЇJ^sÇWî¡ÝyqÇѼ²,GÜøÆqN}áÔV7Mêcb‚·¼åUÕß7UEMÃ0ÄÓShk>óÌËì8ÿYþüȯ”†5UBç“Asç°rÄy·Þ{£Ÿá½Igéï“!p^‹£ÌWkU60i9ÅF .¥!×JL«”¼,¥ $¸—€1 î"ð–ñwùJ¥˜I(m"WŽ9‰âs6Þaét0oO&K…ªZ¦rýÃ4ë/ä[K}1¡ñØ€±ôi¬iƳNÖ&l`UÂVKÂ3ãqb¶=#GèL&C<Ç0 úúú|qL‡bš\týõèRn'ÿ4U:h¡À1Óœw(t\ÿ¥/ùÏžNÑÕè³ý[YI”4Må–©ðàb…åTºQ#¶­_?ƒ«ÝZîýÞs¼òÈ'Ó¦ÞPWaé`ñ\h,‹fˆæ±ìwKW©u<ù€'"œÐ,z×3üë?ßùÙl[µ­ÕÍ„º°CO,ªœï¢Æ<Îã?ŽišD£ÑyZ.Ì/àБGòÖ‰4WoýƒÊåfT µFtåÎÁdA¥-q-âð×aåY?âëÿë÷ñP"ÜJC‹„/‡ ü¿âcíé œSáÅB•:j8¸'Š/@ß-ù{Hû pÎ#Tü2 æÚÑ(Œ)§˜BAÍÙ³ÙÊ©´„… ¸›UWèн9,\×% µCqÇÉ ‚®†ÞÏqúµWXCCCþçÔ9׿•nfíÒ9?oˆfÿÒ}þsÏóJì Pß_ü/EÚÚ#îˆËï†Ày€UVµºY‚Paï “~|'ÝtÒ,Ž&­añkŸ™œë 8òÉ#¹ÿó÷sgµº™‚0%y`ÙO¿Ë«¯?“3^ÿúêÁ‚tæ è;s?øí ¼ø£½\õ±€rªê¼iw‚û”è–C]¯,`;WZˆ``ÀxSDÔaÆ^®püh¢ÃC¡.àå λpÿä7¨Ðè& “è»®K"‘ P(`†_„À(ËifY¶mãyÑh”\.‡çy$“É’¢–e•„#&Q(Bè~kÀqç[òZ´’ x× e´­çË—//yÍ4MÌsMØÑêÖ Bm¼˜7xlù˜ï~xçaŽ´Žlu³¡.àä÷ýËú`éÅø§ãÖG´¡ñ#‚ÐþÍ…£þz/›ï¨b¯*žOÚœ°Ú±9rUŠhTEŠL9õÏ€õYH®@¹ÊÅ>HÝ ùD1çÚò Êg¸€@'ç!Úˆ÷Ïô^ý7Á‹yTÎÂ'òù¼Ÿ2#ŸÏûBœiš¾pfÆ”žü¦iúEÂUC ØŸÞmB]¼¼úH^ûÄýÀ[€ v‘BD8¡"mšºå{Ø÷'—¼v„Nà¥ç—±{w w?ùä“8Ž3‹# ÂÜã{ï¾»â{w>{§ôÍBGàËþõ)ÆF_*õîôP1J+; G„3Ùùoößz'étºre>PÞE1ЉãŒuà ©bP† }³ªWR((Á-/â¡)<÷«m.}-…º‡g)Äe2òyåÚl†D£ÑŠ<§Ã4MÜ„ª<½o+GGýç¶mûáË@àe/e´­GÜÄ“pîþ»²ÏýÅs°Ir(t ¯>ç¬Ø­ž¸`ZæÂJ$-Ì ýÞ~ûèŸLz}×®]œóºs8É’pk¡ý±]—5'ã]r/Jy+’A‰7 FÄ™·,|~Îà ¿þ.þëžCÜ|ó4Ú (ŠJ×ä8ó>m“Ðælyáx.=3T’ …àf8ÇËårD"_4Ó™šÍö/¬dÅ«bþsÏó|J@ÆBUÚÖ#ne÷³¼éÊŸ/äáý—¿†hlAh"'ž¾“SO]¦žè\,‚Ða<¸å*Õ5Ûùýì=w¯g:çÀÎxöç\~ù›‚=TÒú8 Ê›Jèlr°øÀ6>ýéó«oäŒ;†ðdž¡ FŠÇÛì G88Žã?·mÛ÷ÈÊårärÊÕ¶¼(@?žçáºnI¢ÿþþ~ߣ&•Jù^ç™LÆßß¶múúúüãÞÿý­¾uqBÆãW«® ^Ъë@çkƒÒðR]Á]æ‚ãŸÚÍ9çìöŸG£ÑÀƒR÷ÂP¶âî?õT<è?Ïx™Òì‚Ð<ñŸ§úq_ŸLò„ޤû÷¶sÙÛþ«ôÅ<œµè,.¼ôÂV7Oj¢ð“%üb`¬4‰rjqOÆ5S-½B¹(¡'Ⱥâ=(OðvZ¬%8hñ!ìQ`ÛvU!#¼mÛ¾`BÊ«†…T*Õq†ûùÏc_v'=÷Ç%ùªÈÿlTHjªø§Y`ãL&ãÛF.—óí.ŸÏûß.—óí)‘HÉdü}c±˜¿½~ìº.+V¬ðÏ>nØæÃ¡Â‘HÄê\eðß4Mü×ÃáÆÉdÒß?™Lú¡•‘H„¡¡!ÿ¸«VuN1»ýÇËÞácùý¼6x±ÆÚÕú©Ø.´ï‰'èùÈ#\pÁ©€ê#J¼á<\ß+ÔNÛ qÏ~y¯ÚóêV7CfÌ §ÆÓÿ~šÿ|(2Ôê& ŒؽdIÉÄÂó<¶~v+ Ìâ ‚0‡,eǽË8ðêW—NÖ Ô<lwÊE--6„‚|>ï‹ ŽãH!º===þãD"áOx3™LÉ>ú<å^<©TÊ?OØ;(,Ĺ®[’SRW#„R/–L&S2áÖýO¹ž|›¦éoî¯Ê«¦ÓiÿÉd’ÓN ~§;O¯ìeågžŸœ³Ê.þ%P¡¨: ßu]_0ÓÉô¡TXuÇ̪ëº%vö$ {H…“í‡í$‰øÂV6›õ+]&“I_‹F£þcÓ4óÏ—Ífýï!ûveš¦/ø‡ó‹•Ûoøq¸_ ·ÓÐÛ¼â¯hõWY3㧜§ûâE‰`< a5‹ø&´glŒc¶o÷û”\.WšÎF„8¡*m+ÄíÙv»W.÷I{žŒ”…à rÜ¢ƒ¤FƒÐ¡Œ|¹g’7þwã’ïPè~±?ûÿ¿Å|ðƒ¯W/x¨>yŽÈåaZð<¯DH ±XÌ/•JùÞaù|Þ'Âb(ñ@OòÃBœeY%AØ;pddÄ’ɤ/PX–å ᪃…BÁŸ‡E‰H$â{nY–åKŸ§R8YX,‹D"þãr!ò¬ªB\‰çc ÃàÄOœ[#˜ žÇÏþ|õ®’ë(/¸BñoŽ+ÿ†m/ìÕX†©ßëëëó÷Ñö\Ža%a‡aÒvgšf‰Ý ßÂ"M؆tÅL½¿0÷¼r×.–žºþÚƒ³ åbëÀÀ€|oBÛðÌ£‡xè3—øÏÓéôäE1W¡ m+Ľ8±œßýÝ  ø#- B‡±äÀŽ\¦Â«3™ Ã×·ºI‚P7'üæ7þá% 3aJd¡£xôÈ#ùíþý¼ûÝųCC*™…=ºÂ”‡ÅiA",Ä…½Óʺðds``À•Òé´/H„+š¦YR 0|¿Æãñᡚw™ÐþxwßÍ¢-‡xÇ;Žœ,Fhe“†™uþ2˜Ú+MÛqøq¸Z%”Š®CCC¾†Ã0ù•ÊEc_æîÞs™8òHõÄ¢bn8í Y.¶ B;ñ'±lÕsǤY0E„„úi[!nÙø8–e±Ö%tã^i˜ÁÊ+V¶ºI‚P7¿>ñD<èO r¹Ïx®ÕÍ„ºØúèit-ûUð‚ÁŒV©Ës”Ù¶í ===sDÅãñÁ¬!M­„JÜþÜÆ²„üõ¥o$˜•‡rXPŽÅb%^—Ú3ìeXî•ÝÌfƒxØp;Ó4Å®^r\^9QL×R¥/Îçó•… Ah#vn?ŠWsÚ‡SBB-´­·ç±.@ýpOr¿„`tÛN>þQ"fѨÐyì~z)KŽÞë?7 ƒ½_Ý;‹# ÂÜóÔC+9ïŒß/XT ß ‡Š†óVé\j¦Yê Cñ…†h4*00à hñx܃†19äJfÀÏñº×½ žè܆”_ {P B»2úÂñ¬êÛ(›-ñ2w¨+ÿ¡°ðXýé·«'6þØ8œóQæšzí`×Î.ÞØwrå7m$µ–0%sæ—J¥¥···&×Í£Ÿz*¨ºå IÁ…–S¯ wÜsœ~úc,_¾\lXh êµã/ϲC‡‚ ‰y ÓêO!,têïÿ™ÿù?/ °Kó·%“ÉO p¨¨ 4ƒzíøðwñ‰ ½ßÓžD¶m—„“†«|¦Ói:„†S¯wuuùÃ<ðwß}7PZYæšzí`ÏÁe¼ç=ˉÅb¥ÕRAyݡ0sâ7<<ÌŽ;ؼy3(ÎS­*îsCD£Eë!žDBK™‰ ŸsÎb6oþ z"®ÉB0;Þý£'x÷»—aç¨Ü?9¤”ÐRfbÇÿú¯îOü2™ WþüJz#½â $´Œ™Øqò²ŸãuR: Ñ”…i˜¸-[¶ÐÛ¬à­Y³†-[¶L¹ÏÉGŸÌ¾Oíƒ>T)Ö¯_?÷'-cË–-lÚ´©ÕÍ`Ó¦MÓ~g󙙨ðþýûùæg¿©D¸-éÅ~Ji‡{º•ÌÄŽ¯½v˜¿XvFÆÀL™Ê#nŽ…8±ŸölG«˜‰üσ<»îYè‡ë¾{½õι—r»Ø±´£=˜‰ÿõÊ+Ø¿n?ôÃÊkV²<º2°öÒµsæÁÙ.ý´£=˜éøø¦÷Ý 8é“Xvɲ–´[ÆÇbÇõÙñ¶mÛp‡›¿ð]<ÏÃtM-’z€~TDßa¡ÛOmµ0'q»wïfÕªUþóîîî)·øá‡ùÃýC~töØòº-ììÚ -ø~·mÛÖrÃzñÅgxx¸¥íؾ};÷Þ{/'œpBKοwï^¶oßÎÞ½­I_¯ |øpKÎ_¯oݺ/çñà‹òíßý6;ß´“ƒ;apîÚ-öÓ^íxþùçyþùç9ùä“Ù°aÜŸ¿^;¾ûî»ùÖ–oqÁ² 8묳Øvö6¸Ø8·ín;–vlÛ¶—_~ ¦|@d&cãuß^ǧ^@×é]t€ŸžþSvvíä©W<ŶwlS}…9Ÿ÷µŠVÛO»°}ûvÆÆÆXº´6/ß9+ÖPwÝu—ÿø ®husaFüÇ#ÿÀÅ\ÌñG­nŽ Ô͆ `îuAh(:_– t2á±± t2w>¨ŠN’HVè$Ć…ù€Ø±0;æbÇÂ|@ìX˜ˆ ­àÈO}êSŸjöI´¢|à 7àº.wÞy'Ÿþô§Ei:±aa> v,ÌÄŽ…ù€Ø±0;æbÇB+8bbbbb®N6::ÊŽ;èííÃ:±aa> v,ÌÄŽ…ù€Ø±0;æbÇÂ\2§Bœ ‚ ‚ ‚ ‚ ,Tæ$Gœ ‚ ‚ ‚ ‚ ,tæ$G\»1>>N.—cëÖ­ŒŽŽÒÛÛë¿þïÿþïüä'?`ÕªU%ûT{¯mY³fMMçjF;±m»äZLu®f] ¡vªÙ°~¯í¸Ym;î.E츱,´kÓNšI»ÐhífÁyÄ­ZµŠ7úÏwïÞ À–-[Jܗ׬YÖ-[¦}o¦Œsã7–´e®Û¡7::êgݺuSž«×B¨j6 íoÇÍhƒØqg2;nÖ÷ÖÎv<×÷´PbÇ¥HÜ™ˆ—"ýqg"ããRÄŽËB»6í¢™´ ÍÐnœG\ww7ÝÝÝ€Z)H&“¬[·ŽÝ»w—Äìêm€)ß›)©TŠ7N*<—íett”}èCôöö²uëV6lØÀÚµk«ž«×B¨j6 sk?P¿7£ bÇÉLì¸Yß[;Ûñ\ßÓB}ˆ—"ýqg"v\ŠôljŒK;n, íÚ´‹fÒ.4C»YpB(Eó–[napp7‰DJr˜4››o¾™ÞÞÞ’$­¼Úeyxx˜õë׳víÚV7K˜†J6<׈ ³Eìxòõ;î<ÄŽ'_±ãÎCìxòõ;î<ÄŽ'_±ca¦´Z3išuO/¸ÐT€õë×3>>Îí·ßîwн½½ ûÛè¸èéÞ› [·nåæ›oƲ,,ËÀ²,ß}q®ÚÑÝÝ]¢ÒöööúqÏÕÎÕè63£’ CûÛq3ìGì¸s©×Ž›ñ½µ»Ïå=-Ì ±ãé;±ãé;ˆ7–…xmZ­™´ ÍÒnœGÜàà ]]]“â{õ…§«« Û¶'\¥÷f‚Nا±, Çqåú9WíX³f 7ß|³¼-[¶ø.“ÕÎÕè6õS͆¡ýí¸ö#vÜ™ÌÄŽ›ñ½µ»Ïå=-ÔØq)Òw&bÇ¥HÜ™Èø¸±ãƲЮM;h&íB³´›'Äé„•ZÍÔ8ŽÃµ×^ËÕW_Íš5k°m›[o½P+ ÕÞk4S«Ñíèî¿Ÿ«¯¾šU«V±cÇ®»îº)Ï5—×B¨ÌT6<—ö3si?bÇÉLìx®¿·v°ãv¹§…ʈO>—ôLJØñäsIÜyÈøxò¹ÄŽÇB»6í®™´ ³¹ŸŽ˜˜˜˜hõh'FGGÙ±c½½½“\)§z¯“Û1“sÍåµê§Ýí¸m;ž´Ë÷ÖvÜ.÷´P?bÇÒÏÚå{k;–þ¸si—ïNì¸s‘k3ýuXˆ×h&×C„8AAAAA˜d±AAAAA˜kDˆAAAA„9@„8AAAAA˜DˆAAAA„9@„8AAAAA˜DˆAAAA„9@„8AAAAA˜DˆAAAA„9@„8AAAAA˜DˆAAAA„9@„8AAAAA˜DˆAAAA„9@„8AAAAA˜DˆAAAA„9@„8AAAAA˜DˆAAAA„9@„8AAAAA˜DˆAAAA„9 c„¸þþþV7aÞ‘ËåH¥Rär¹YÇqR©T«?NG vÜz<Ï#•J•ؾØpíˆ ·/µØ±¶ÿ…ŽØqë©ÔƒØq=ˆ7ž¹‹+ÄŽÏLìXÆÇ³Cì¸}iW;î!ζíV7¡&Ž8∊ÛT*…mÛD"¿³ž)žçá8N«?RG vÜzúúú°,Ë·}±áÚé†ùmÇ•¨ÅŽS©™L¦ÕMm9bÇåv;Ÿì¸R_ bÇõ vÜXæzl,v¬è;†ÎWÌÔŽe|<;ÄŽÛ—vµãÅ­n@5<Ï#ŸÏF«¾îº.–eùïçóy\×%bš¦¿ëº†A>ŸÇ²,"‘ȤmkÝ®ú ¶mÃ0üÇ‘HdÊÏ©ÏiÛ¶ÿ™ª=Çã5kªö»®K>Ÿgdd€òù¼¿¿išär9LÓô¯½çyxž¨U–h4ZrÝ˯ƒa†Qó>ó•FÙñ\Ù0tŽW³É©ìX¦t: €ad2™IŸMl8 š WzOìxòqfbÃác̦?Öv¬¯•îç"3µãvSD"‘Iϧú¬ínÇÕúâj×AìXÑécãZíx>ÅŽe\ÑL[Öר܎ÃûËø¸1ˆ·×¸B¿WÏø¸ÕvÜ–qžçÑßßmÛ¸®K,óßëïïÇq\×¥¯¯¯DåÅbär9ÿ°[m,ó•ýX,æÇqß•´Öíª6ìðãéöI$ô÷÷ãºî´Ï§;V-í·m˲p]×o_2™ôÏ‹Åðó•FÚñ\Ù°>´¿W³¯©ìX¶ÃŸ«üKl8`*±ã©Ž3·e¦ýqØŽõw™J¥Èf³-³§V1S;n§1E¥çSí×îv\K_ bÇaæÃظÒójûÌ×±±þ.ÅŽe\Ñ [®fÇá¶Èøxöˆ·ß¸bºý4meÇmH:žˆF£þól6;Ld³Ù‰H$⿞L&ýç–eùL†11111Q(üÇz¿d2é?×—¡Öí¦"¼M-Û … `bdd¤¦çÓ«–ö'“É Ë²&LÓœˆF£†aLd³Yÿ\cccþñôõÕÇÖ”\·H$2‘Íf',Ë*Ù¿Ú> FÚñ\ÚpùvíjÇÕìk*;ÖÇ&€ Ã0&FFFĆ«P͆õc±ãêÇ™ ‡ÏUO\ÍŽ'&&&"‘ÈD¡P¨ëúÍfbÇí8¦¨uŸN±ãJ}qx±ãRæËظ–}æóØxbBìXÆͳåjv>—ŒgØq{Ž+ê·ÚŽÛ24U»8j´«d¹[gØÅ]»†ṴËw­n†­p«µ,«dÕaºçh¿çy¾Û²V ˆD"þŠ]¥cë÷LÓIJ¬’NÇq0M³dÿjûLåþ:_h´·³ ëóεW²¯ûªÛ¶í»ìk—æX,F:®@5±ãFµª~µÞþø¾ûî«hÇ™LÆwÿ_ˆÌÄŽ;yL¡ÏÛÎv\­/;®€ŒçÇØXìXÆͶåJv¬Ce|ÜÄŽÛo\1Õ~ÕÆÇSg.ì¸-CS«]ØrÊs+˜¦I$ñÿ …B«?ÊŒ>ïtÏg‹iš“nœ™ä©ïcš&cccS&ž]Hù0ÄŽ›kÇ•˜Î¾ô§¶ÿx<îÿ8Š O¦žïLì¸1ÌÔ¾ô~•ìØ¶m2™ Gq„Ÿ”÷ˆ#Žè¨Ä³a¦vÜ©6\é3·›OÕƒØq%dL1?ÆÆbÇ2®h¦-ÏÔŽe|\bÇí7®˜n¿v´ã¶â,Ëò~> Ó4Kjá­H$â«—Z%M$­þ(mI$Áu]ßЪåf)ÇqýX«ÅúæËf³d2?>|ª}æ;bÇͧ’}]tÑEU·×ùÂûkÛžL5±ãF1Sûªfû•ì¸P(011áÿLLLˆSݎņë£^;žª/ÖïƒØqS4—¹‹˸¢™ÌÔŽe|\bÇÍg.ÆÇ³9O#hK!.ãy}}}%Iÿâñ8¦iÒ××ç—XÖhƒîéé¡¿¿ßw§kÂî¦å®§í‚®.¢¯o"‘¨éZ†A¿ÿWi˲ˆÇã%×`º}æ+bÇsÓÎzìK'¬ÕßI¥ë+6P͆õ{bÇiãLìkºýÊíx!3;nG®ô¼]hF_ bÇaæË˜¢Òóv`.ÇÆ W4—™Ú±ŒëCìxnÚ8ããVÚñz9¦ ¯LCP®V£c}ÃU‡´’Ž÷K\×Åu]EB?nGtûj¹VÚ•~``À¯žn…e&ûÌGÄŽ›Ãlì«Öë+6\(Í!v<{fj_b—3c&vÜN6\éy;0}±ÐécŠJÏÛ‰f…Œ+æ¦õ^'ׇØqs˜«ñq«í¸-‹5hÊ B—ùÕ†œËå*ªœÍÂuÝ×ÓrÒétɨײ_£Î_316Ã0ê¾Qg²Ï|b>Ø10åjI=¶×h;ž‰}Õ{}ņ'v±ãÆÙñLík¡Ûe½ÌÄŽ›¹V;ÖÌ7;nUÒéN¦ÝÆÐZ;îÔ±ñBGÆ3;­ÌTPñq}ˆÏìüµ2WããVÙq[{ÄUÂq?&[»w ÍÇu]lÛö«î4kŸ…‚Øñì™ ûž±ãÙ1Sû»l,bdzCì±õˆ ·±ýÆ#¶ÜˆíOØñ옫ñq«í¸¡BÜàà £££tww³víZÿõññqÙ½{7«W¯fÍš5-ù°‚P bÇÂ| ’‹ „ôÅÂ|@ìX˜/ȸB˜ˆ 푟úÔ§>ÕˆmÚ´‰ááa"‘·Ür £££¾ìcà /䓟ü$«V­ZqäBû#v,̪ٱذÐ)H_,ÌÄŽ…ù‚Œ+„ù€Ø±ÐN4$GÜèè(…B;U«VQ(fÇŽlÞ¼ç©\4ÿê¯þŠW¾ò•­¾6<ú補sÎ9-mÃîݻٽ{7¯zÕ«ZÚŽ§Ÿ~šã?žã?¾¥íxôÑGÙ´iSSŽÝh;Þ°aƒØO‘v²ŸV'Ï?ÿ<û·Û”cW³ã™Ø°ã8ÜvÛmb?EÚÅ~Ú¡»w僚»›k¯½¶áÇnt_¬Û­¶Ÿv±ciÇäv¼ç=ïixÎK;æºs=>ž‰ƒŒÃ´K?Ø.vÜ)ããÿú¯ÿbppóÏ?¿Õ—¬-hûiž}öYŽ9æþò/ÿrÚm"ÄmÙ²…ÞÞ^FGGÙ±c«V­bݺu%ïiÖ¬Yà 7Ü0åñžzê)ÞùÎw¶ð*î½÷^V¯^ÝÒ6lÛ¶mÛ¶µ¼ßùÎwèîîæÜsÏmi;î½÷Þ¦»Ñv<66ÖòïMì§”v¸§ÿǾT³ã›o¾¹nhù5ûi¿vlÛ¶ÑÑѦ»Ñ}±Aiµý´‹K;&·C÷uDÆÆÒŽ¹nG³hä¸d|¦]úÁv±ãN?õÔSmaÇíB»ØO;P(xä‘GjÚ¶aq£££|èC¢··—­[·²aÃÖ®]ËîÝ»Yµj•¿mww÷´Çã–[naÆ %7Æ\óÆ7¾±å1â]]]üñ-oÇèè(½½½-û>†‡‡Ù´iÏ<óLS?c#íø™gžá–[niêÊtˆý”Òê{ZÛñØØXÓÎQÍŽgbÃ;wî䡇âÅ_l©‹ý´W;ùÁ~ÀŠ+šrüfôÅ?ü0ãããlذ¡e×­]ìXÚ°~ýz¶oßÎE]Ô”Ï׌±q$)É57×´ºÿ‘v”ÒÊññLìd|¦úAh½bÛvÇŒ÷ïßÏØØ˜Œ…N«í§]Ø´i÷ß?‹-ªiû†q <'´«çðð0ëׯŸñ@áÔSOmiç¬i‡«:g ¥ƒ>}6oÞÌúõë›zžFÚñ¹çžÛr;û)¥Õ÷t§Ùñi§Æe—]Öòë&öÓ^íX»v-ÝÝÝlݺµiçhd_|Î9ç´Eòåv±ciGÀæÍ›Ù´i§vZSŽ/cciG³é´qÈø8L;ôƒÐz;^»v-k×®í;ÖÞ_­¾ní‚\ņ X½zuÍããÚäºièîî.Q’{{{}7ÿÞÞ^†‡‡ý÷†‡‡éêêjõu„Iˆ ójv,6,t Ò ó±ca¾ ã a> v,´ âÖ¬YÃŽ;üÆ–-[|·NmÜú=Û¶§Mâ)­@ìX˜T³c±a¡S¾X˜ˆ óWó±c¡ÝhHhjww7ýýý\}õÕ¬ZµŠ;vpÝu×ùï]{íµ\}õÕ¬Y³Û¶¹õÖ[[ý¹abÇÂ| š‹ ‚ôÅÂ|@ìX˜/ȸB˜ˆ íFÃrÄé´;vì ···Ä¥sݺuô÷÷³cÇ6lØ îžBÛ"v,̪ٱذÐ)H_,ÌÄŽ…ù‚Œ+„ù€Ø±ÐN4Lˆ¥4W«42Õ{‚ÐNˆ ój¶*6,t Ò ó±ca¾ ã a> v,´ É'‚ ‚ ‚ ‚ ÂÔˆ'‚ ‚ ‚ ‚ ,8 U|ìIÞgƒ]|^NpœÙS„8AAAA¡£pÝRQ¬¿ %¢…qP¯}ǶO}@.ôþ]ØÔ/{êyHð×6|*}}pA^½Ÿ/¾·{ñb^Z²¤¦v‹'‚ ‚ ‚ ‚ ̶ ±˜ú_+9”øÕƒȾ ü,Ë+1íŠøª ŸñÔ6nñ/|<×fÔöðo.<È$à 6üØ„oà;Q°ò0„ä^ѧÎÿ°`À `äa Øø—SOå³Ï®ésˆ'‚ ‚ ‚ ‚ 4%Tý³«žç³3plüs, 2™`û|þ x¡},§Ž‘Ï+O6%œ §;0nÂÆü›ýÀÏ X…ßËÁ³+`£«Ä»$0nË8¼Ë…½ýJt]pm¸ÇQ‚Ú•¦:·mƒc×]8`¨ãþ|>mÀ” õž1a$ ËÀlˆ›ðïyX‡¨©¼çÀ2•x6˜‚B0ÔßXñünzz ›…Œ  RPïÿMþ)CCp¦¡„Àxܤú\šHD €Ù,¸¦®(æ\K$”èu~¾ì(Í, ü?IÁOÒêù§ÈçÀµ  èNÀ31ø¯%Œ]½ñR²r~ç,xÀ‚ܼ‡Å!‚ãlÔþýýËUâÂÄ›¢êsh #Ñr9õß4+ïªÂ  ÚQí”I {ÿþš¿âAAAA:Jñ?JPë'M¥Ô_,¯µà51åi–7À‚{ p~ÞÔ‡ ˆÆáMøY:Ò¾Ÿëãðú¢€µ. _RE®ËÂp‡/´Zøïø™¡<Ó®2ॸò¾‹ <ðìâãHD‰‡µÛ²Tj%¢Qå Wh´9ß“„¦ ‚ ŒÙù×€IDAT‚ ‚ ‚ tZÊ¡<Å<”§šò~3P‚™ƒò‹ävëNMÂþ \˜„‡ãpEÆ]È™p·©ö/ ¨ã%P"^ôÿgïÝãä¨ê¼ÿwB$!!5"InÖpÉ( PcâaÑjX]aYk–U\±[£QP×î•Ýõ²?×îçY‰F»eWX¼v­TÜÕ)/ °§€d@˜â’ ×0&„ôïSߪꞞûLæ’ó~½z¦».§ª«O:çs¾”@æûJs]èí­?/å¾™5ìç9 L”åž}‡îh™U‰¾·•UVm“a _Öp"ÞDÐqF£Ñh4F£Ñh43 ±fó¢Ï.Êê¬%LUQB[%´uG/ å2šö3¢Ï]üÈ€áØÈíó‡yøž f§ÚN°^wTPnª• ‹ÍÏ7ËİI¬îÒo*á(á,;ÑyÞ°«üã'Û Êo7ÔÿÆr …Ä¿w”h!N£Ñh4F£Ñh4f D·,Jd÷ÒnTL2I*obIV(€(QKô!ßW±Î*µƒ×$߀ïm–6nËçU¦Ò‰¸l–J%Î?ÿü¦ëÊѹM9™ŒÑÂP]°aîüóÏgÇ;ß©²T€ºxêBz?ožZoƒºЮ©F£Ñh4F£Ñh43ˆN”XæD¯H¢™çg‘èf*]¨RQ ‚@Y¯äbišP.3)äóyî¿ÿ~~øÃÆËÂ0$,Ëâ·¿ý-¿ýío Ãc²|L!Q›e^*ž‡mÛ<{×],ÌHDëÅ+hIíS©T¸|×.XÚÙá9v6˼R‰.Ëbà׿æçÕ*NZ‘ܼyÔ§ª-â4šYÆ­^5F£Ñh4F3  ”E[Ú:Ìbø¬£¹œ+f³*†[©4zÎÉÔÃîºë.zzzâÏ7ÜpozÓ›ø³?û3n¿ývžíë#kÛ”J%/^Ìé§ŸNÜpà £*Û¿ü wxààÊÒ­P¨¿¦aÈëÖqü·¾…išl¸þz: ƒúúú8é;ßáÄsÎᤓNÂu]ÂBöövþõØc¹êª«( „¦ÉÃ;v°gÏ.ÛÛèó›±, ß÷ikk£­­_þò—£¾FZˆÓhféÔËF£Ñh4F£™P\žÄm´‘J¥¹'¤iª—a¨Ì¢¶ÝÜ8l*¸üòËÉEÕ›o¾™Å‹†!¼ãïàüóÏ笳ÎâCúŸìïç²ýˆ°TbãÆ´¶¶r 'ð˳ÎâéÛoW bK T*¼xÊ) üú×êÚD±Úþõ_Y¼{7强B¾ïóÎãç÷/½DØÝ­ÜL]×uéèèà¼Í›yí3Ïpïw¿Ë5×\C6›Å©ÕÈ/_ΙgžÉ7ÞÈ»Þõ.*• ß=è v_q»ví"*• |âŸà‹_ü"Ën½•—N9€ÎÎNªÕ*]]]|ñ‹_dëÖ­£º^ZˆÓhf!A \ÒGˆ1©Ñh4F£Ñh4š„‰Dx(÷ÓF¡P R©ðØ¥—ò¿ÇÇmmm<þïpî¹çÒÙÙÉÙkÖpÓ«^Åu×]ÇÏ.º ¾}úé|÷k_ãïxo{÷»ùÍqÇñ؆ d³Y,ËbÑa‡±äðùð 1 ƒ|>OWWÿûðÆA?Ï?ÿ<üàY²dI¼ßùÇK¦RÁq.½ôR|ßÇ4MN:é$úûûGuý´§Ñ̤­ó<Õï­Ù F£Ñh4F£ÑLÏó¨T*M×¹ ¶‚“Øn–¥Œ1 C%KèíU‰ÂpìSC £¥­­ Ó4q]Ïóغu+ÿýßÿÍyçG¹\Æq à\.“ÉdÔN†Á<×å›óç³ß§>¥¾H±HI’!¥?;üp¥ Ú6n6Ë­Ë–q 'àyÙl–¿þõü÷«_Í‘ÿ8gÍŸÏöíÛéííåÂk®¡õ ¹ãŽ;¸îÎ;yϪU´÷õñ£/~‘L&Ã¥—^Ê~ÿ{ýøüòË1M“ý÷ßß÷±m{Ðw´,‹ ‚Û¶ëÜfßt½½½ V¯^M„aH±X䤓NÕ5ÔBœF3K(•”gÛ*Kã$3#F£Ñh4F£™ùÖàHo!Ê"Îô•†Ä7 %Â)ñ, X$ò,K%b¨VG>¦ˆf¹\.~?Ô¶i±P¶mkkà‹_üb,^½þ®»ð}˲øà?ÈùçŸã8|yýz6žwïÿûU!޶ÍÏ>Ë!Ÿú•Jß÷©¦NþÌ3Ïäê 4MÂ0¤P(ðío›k®¹†îînlÛæ _ø÷Þ{/k>øA^»p!¿ùÍoâý-Ëâü#Ùl–ç; £«‹Ð4±m›W¼âúÚ×òò—¿*•XhŽ0 ñ}Ÿ3Ï<³>©DôÞqlÛ¦X,Ð××7ê: …8f†"“a¨·l¶>€m«Ï¥’j¨ÛÚ”og§Nè Ñh4F£Ñh43+| !ƒB(¯ñTü·o~óÿâºj|¹\×uã÷a³ qCàº.¥R‰\. lþ0_ùÊWêÏùÊ+9í´Ób1êãÿ8•J'X|饬¾óÎxÝ÷¾÷=õÞuùàš5CfEµm³‰‹—eYœxâ‰d2Þ÷¾÷áy^]ÆÕC=”-[¶¨«Õº²išA€ëºØ¶‹}BµZåóÏãÇÉñ†Â4Í¦çØŒJ¥Âµ×^;êßD qÍ Å÷ÿÿBA ïC l®«L“ …$Hg³m‚ ù_(ŒÍ¢.´À§Ñh4F£Ñh4ãå¡cÓD†U.*.\œÂÉ'ßJ™ù—¨T*‘ÍfñùI>µß~¼â¯`õêÕÊBÏ4yöË_æ´E‹qËuÕK\¹†À4MJ¥Ò AÐq6nÜH¹\æüóÏŵR©„alÛ¶-Ç,kPºW˲”Hè8‹ÅÁn§Ù¬8{# m–e -Ô5Œ‹]×åCúШëâ4šH.§¬w;;U;!þÿQ{¨6ÇóT[â8IÜ€|*°€´ƒ ö¯TÔ>…‚ZÞ¬]iç¤ ‚d™Ñh4F£Ñh4ÍèøÓÒ¥,ûþ÷ùñ³ÏÊ5 T;00ðkL–.}Û&«ŠÅ"Aàyù|~€%Ör öI»]A€išäóy à­­[o½µnÿn¸!.×0 8ë5¯áÛ?NW±Hëç>ǹO<ÁÇŸ|’¯ýÿ–eá›&7Ÿp§=÷\Ðܲ’˜JÃX› B˜mÛ¼ýíoÇ4M ÃÀ¶m,Ë¢ež°,‹Ó^uš2#¢<Û¶K<ŸúmMS „(tp'Y |„SçrþÙçÝÈEeGtww©,GÝÑh4S„ï«WÂ6ìn°-ð*ð¸¡âV¦'ÎÎB`€e¨6£»[ t…Bb!I\¹0Të[ZgÑ %²ÉÛV›變 b†j_q™f¢C£Ñh4F£Ñh4»V­ÂúÎ=xo ¹dñb%Äùšj|†adýv9wß½ˆGù#]]]€²s]—b±H.•¡¡R©P(Èf³±°$âP÷¿»»›\.Wç2zÍ5×ð™Ï|†íÛ·sõÕWs饗’ÍfÙXvÔQÉ@6Ëü|ž—JpÊ)ض[årä‘´žy&ÑAÔÿQŠpC¹®Šõ_¹aÐjš&«ŸZ 4/3ŸÏ×»íe8hÀ©Ý§Â'SËr€¨oÊšÎ2@•ÊV¢mljâ4šBzâ ZU÷|T€·Øª]øVƒåì‹YeÎìE¯®¨1C%Â9ŽÞ::Tfi纺”€ÖÒ’ˆwo®«Kµµ•Š þY.+«8õ`Pe¦aÛÉk¨‰Éð3D;«Ñh4F£Ñh4s–R©„•Ï3ÿª;Ùö³yüÓKñ.tt&^M’À Ré ”¥•ˆU¶m†aì®Y(ð<Ó4éêê¢P(Ä¢Ûp1äòù|,Þ}÷»ß¥V«ñ“ÿøìØA¾\&/'óùÏ+a-JFÄïmÛ&“É( µÞö¶d›†xC‘f;Çq”¨Fô•A¿M0 £^Ü ©Јöušìl¡×ÅtѶ•†rd\^@¥ºã8W»¦j43„JE‰Yÿ“U÷³ˆìð_QÜ·O–à_*Ðí#ñÂè%V·ŒÚNÛVz£õ›X±uu)ë7‰G×Û«Ž#ûˆè&n¬b­‰¨æºJó<åRÛÒ¢„¿º‰O-Ó1æ4F£Ñh4;F¡Pàƒ>È¡÷>Áòåý,رCÝ\5î*•$ÔP€a8ŽC¹\®•lÛ®sÑô<îînªÕ*–eÅI Ò®«"Ôù¾ÇY³,‹_ýêWüðÊ+¹í5¯aËþÀÉ_ÿ:'\t×_tQrÒÃÄO3M“r¹Œëº¼òmo«·¸ÈfG-ÆH•É" €Å8D7âQïž*Â^Ù"J`K—!r>‰ÛªO½ðÖx~£@ qÍ "wõ,ì4” âøÑËV™°Ÿ¥Ú’¤ òQ툇Ú÷&S5èAdM;”û¨J…=xy6[¿Oµª¶µ,%¨2~ý믦»[ tžýè£twà '<Æ 7<[ùe³‰È—Îð:RÊè©$‰H£Ñh4F£Ñh&XºgË6ofË 7ðòžÅ¬X±‚#ß·™.àæ*8NˆmØ¹óºØ¢­i¢Åb1vYLÓ¤R© J0°{÷î81çyAÀ}÷ÝÇ>ùIPÉÃPYi4mÛu{SBˆঠP‚Y£æ5ÙVÒiÑ.å~Çy“ÁµE]ì·X|«8Y&e>z-Äi43€ ˆD1T;‘¾ÿm‹ÜÃ-uïgP÷¼ˆqpÚsÏññ={ Ú?GÒA›(û¾ïûƒL–Ã0Äu]:::­«T*´µý_ŠEµ]©Tâæ›7ËuR.óÏ^ÍO~òQ*•×]w·Ýöž'I%< ……B×-qé¥?#›íçMoú~Ý1\×_—^Ú‹~#Q*Õ[à¥IÇ1õ£Oj¡¹èÖè!kGËÒBœX©5îßìØ9”ë™…²tk8)»@}œ8+µÎ>§Ï)JpÌíǰeÍ–Q}U-Äi4ÓD‰0ŸÍ&¼‰jJ$B{–DÜ÷ýóøÍoðßüfÊ–Eq°ÎÇ/¹òy*™ —üÏÿpKGoúú×™ÿ÷Ïwnº‰ko¸Aù‰¶µñé•+YöÀÜsÏËxðÁ™7/ÀqÂ0Œ3ÕôööªcGé¬ÇI‚xFË»ººâà˜ŸýìXVŸüäB::NDz¬Ø¼Ú0ŒXûÂvñàƒ*xi__;íí_æŠ+NÀ¶m*• ¾ï“ËUb3jÏóèêê"—ËÅ‚a¥R¡Tr)—_ëvÄALÃ0¤\îÂq”çºð¾÷݇mË!‡ÜA¹¼™ûîû+öÛ`¥ôõ]ÍW\(Kº—½ìižzê÷Úˆk4F£Ñh4à÷^{-ö%—D™Fáý.eXàºn<Ö2‡‰É6,Ë¢Z­âû>÷Þ{¯2xøa=ú(T*°jË{zàë_ß{@’¤‡PbÖ¨ÿ‰kizl&—¤e¥Bª,q[Í2tÓô1:PB]z[)«-3¢e•T¹‚cãy›êûì>b7[Ž×BœF3c‘¸niJÔ[Ìö6¬ï*—¹-—ce_÷G"XØÕ…˜‘Ki†8ŽÃÎ;ùY¹Ì·Êevë[à8´|ó›ÜyòÉpÐA¬ìëãË7ÞÈ9­­‹ã(Ì4¡«Ë§P(DçV¥\ËòÉårtuåéëû2øÀ{8ðÀ]€%† |êÒÙÙÉ~û•¹ï¾+W~‰M›îàÔSOÅ0Z¸æšG¹þúÝ\|ñÑlÚ4À­·~›·¿½5~`îÞý;~ûÛOÓÝüJâ¾;¥q4F£Ñh4ͬäþ—^âø… ñý$æ¶ *•Êã©ñR©T0 ƒ×½îu¸®KË/ɾ÷=¨VYdÛ,W¬½‰G½×ÌJÎOm›âÊMö“ÿ‰`f¥ÖURûI<9H,Ú­ædp.çX$‰ —¾T6I¼¹FÛŒnx`ópëè.‰Ž§ÑL"ÜIÕÞ¤ïó\.W·Í7ÿþïñ…[¶Ä³%•J%ŠÝÖ§­®V«l¸ôRòaÈö`éæÍ,?õT¾wæ™`Y,^¼˜ã·láæÖV–½ø" ÚŸeÕª]üêŽç8]]]äóùøxù1Uûú×_dþüãXÍøÂ–ó±D>ŸÇ0BN>ùE2•h"ŸOD¸ €JÅŠ…¯eËî£T×µèì¬àyðç¾€?þñÓäóE²Ù2¶msã[¹ì²utttàyŽãP((—Ëœ~ú9œ}öK—þ‰R ¾óÇùÍovÐßßÏ…ž@.—‹âáï«,Dñù—J%J¥æirÄšP£Ñh4F£Ñì{„ÀÒ'ŸÄ²,2£Zt7ß÷ãñÉdbY–e±k×.Ž=öXþ뤓”7”Äê™ê˜nHæÁF*$1×äb5jŠ V$ï2©õfCù‰+YˆrCšÇ“Ør"®$â]³alŽ 3¢7000ñ£h4š:Òy<^0“û>‚ØMèL&C†œþüóìzäÇÁ²,\×… itA™5Kl€o¼âøÛ·«tצÉO<ÁV­ÂÎÞo?8oo}k+Wö {ξïcÛvl 7¦iò«_½&>¯¡0 8唃1Œ"\p'ÿïÿB¹¬\u C‰q岊ëæûÄAG,ØAw·z=òÈ+ƒ/}iG‰vyº»kuÔ6,Ë¢R©D‚ŸçÙ˜&|à/㡇¢¿¿ŸåË»yüñÿá#¹œ¾¾>þéŸÖsè¡ÿÎ?ÿó=ÜyçNz{{c—ØR©„çy±%b#BªF£Ñh4F£ÙwðýzˆÃ?0T¡rdØ`YÖ„cÂ¥ ‚ß÷± ³TâÅ_ä÷¿ÿ½?MÐåuBìÊ’d$ìh»´œG"z¥¿B¶a?åZ*"› WÝU­hY¡á|̆ã‚úñ:ŸÆ•%µ‘Ø5uóæÍlÚ´‰‹.ºˆöövzzzX·n´¶¶R*•hooŸø5MhoÛ°Û®âijP(D‘jU^×]r ×]w?¦i"— »\×ä¶Ûþ/Åbß÷ ‚˲èíí¥R³ð3xôÑ>Î8ã;¬\ÙÇ»Þõ:^xáÕ´´¬Àó±bÅóÏYgý‘‡Zaض[ÂY–…ã8T*lj-ƒ ‚Aß³Y¦ZF£Ñh4F3÷ðgþð>ø­8N’$®\V!{&Kˆó<›~úSî¸ã>½k'¿ï}œyKMYVÓÁÖbÍ\N›}Å%º å!+Öi¯M(D奅=5p•@ëiÄ]•†íÓïk¢¹åQl;æôõõ±nÝ:?üpZ[[å‚fÛ6?þñ9ï¼óFí’600À† -»êª«Ø°a›7ožøYk4SÌT×c‘b¾îv1O‹8ÀSÒSŸÚß϶ŸÿPb—išq&ѱ`µè¸/ô,æÊ+Á[üv:Pí’G’¥àžO²¬ õÂÑGßϲe÷ê|G ’#£ú9æÊx‰iÂÜö¼$÷SO}Ó4q‡r9iE³YeaðªW-båÊ>{¬•“N:‚£>šÓN[„ë‚mÜpÃ3Üxã¬]»V];Ë¢T*‘Ïçc+ÄR©D&“ØH§EKß÷ãD•JÏóxòÉ'Çô{—Æz¬ÛbÍlC÷)4s]5sݯÐÌöf=€7ÞȻ߽ŠjUydØ&ž:ã¡Ñ@¶m>ý»ßñ£-[xùÃC6K>ŸŸt·×˜ÊZ¬‘õ®_{M˜iä«©2åk•¢í-êE/%Ž5–c¡Ä¹´õZÇçžetmʲnòŒë˜Ä–pëׯgÙ²eôõõÑ××Çúõëimm墋. §§gÄ7lØÀUW]U·¬P(Ð××G{{{< Õhf2S]}àB¾œKÚP³é†U¬§òù|Ë_,Z®Va2ŒàÄ{Š0¤?Ÿ‡BAY5twÓúÓŸÆÛe³YºººÆy”aq­q™ÄZK/s£åñà„Lj[N %”9¨h£k©¸§6.kk8.Ôlj)ôè]J`»ð ã…]]]´··³lÙ²xÙá‡>b¼8ÏómÓÓÓÃÖ­[Ù¸Q¹§ °iÓ¦8{¢F3ÓØ[õøAÖ8ª­êZZZp×uÉf³†A¥¢ZÇqâXp陓ɸjÇäY•~H~ðƒÇ²ß~Ä×?}?»®‹eY˜¦Y'¤Š[k[[G}ôD¶i¬Çº-ÖÌ6tŸB3ÐõX3WÐý Í\`oÖã0*Ï4M2˜L=¬Z­¦aðû_ü‚W¿úÕ¬qqo$c(¢.—$éK’(A–‰èeQŸ µ;õÞŽÊJ»ªÊå/d*%UV#Y'„pPÔi 7óZ[[ë¬Ýzzzb7,aëÖ­Ã400ÀW\Áe—]V·|óæÍu±åÖ®]«M—53–½QÅîq~c'îïaÆ1ÄÄ5ܲ,ŠÅb,¾ME|±.º‡ƒ^¾ƒµ ÔeƒJÀÚßü‡À0£eÒ®J L?ún?õT¶·´ðû§žŠ-”EàóG{bC]?üô!×›æÈB\&Ϩë¯Wîø|?´‡®.Hy™R«©ÿÙ¬²¢ ‚¡Ëõ<Ïó°m{^6›Å4M,Ëâðßø7 Íê±n‹5³ ݧÐÌt=ÖÌt¿B3ØÛõØޏ÷^,Ëvü0‚ P†ÁÙ·ßžˆp{ 5ˆM첨xjLŸa(/Ù,õ.bñ—dèXnÍtR«Éç*ÂA$ĵ··³iÓ¦x)IaÓ¦MìØ±c8—¦P(pÙe—ÕYÑìØ±£nÐ)1è†cË–-¬[·nT®°³t}*‘Åoc°…åhËsQ.Ú[ƒŽ¥ù/RGóºO´MçH…N"’<äž{î™ÒãLf=¾çž{X·nÝ åbuû˜}–ÿE`·T‰ gFKêT˜ü·´´ð›/½%þœ%i'ó¨vîµ7ÜžG…DP«¢,ù$)M/jrC"±õìÞͼ\.N€ã¢&;:¢ý;¨Ï<Ýì:5ÊŽ·ÝV²êcÄM”U«vqè¡_¢¥å—±xÖl"É÷¡RI2©,®Éz±Š. yOO/½ôÏ?ÿü¤3šÕãñÔá‡~˜_üâMë±fßeÓ¦M\qÅüéOš²cLf[ü§?ý‰+®¸bPŒ.fݺuüâ¿àᇞ’ò§¢o¼iÓ¦é½hšÅtöÇSaèþ±fßeÓ¦M¬[·Ž-[¶Léq&»gN¸à‚ ذaàټ4W]uíííà ucáˆ#Ž`ãÆs.Kk'JxW€ªƒªŽH&Ý´{s£«³ÔÛJ¼Ë¡„‘|´ú-MöBá.m/ÂH[TN%UQùmQ¹9—C†9ÎdÑÞÞÎÆY½zõ”c²ëñêÕ«c3ç4!ª½Yeª¶Ê÷}:;•¬iš&ÝÝÊVײ¬º¬§j6%di5QžÝÐÊã=ÉC'O"Ä9Ñg˲ PˆÛÚ,C»™À‰Ï?Ï3Ï<ÃòåËëÞ¸¨ºž¡Þâ˜hY.úœ!©ËÒ^—€/Æ0 ÿý§’#¹‡†{IÌ»ºs4”ˆfÛpÜq X¶ì>Z[¯Ç4M #® Å`YJŒó<%̵´$Û4û}Òâi{{;·ÝvÛ¤ÿ†i&³y䑜uÖYMë±fßå¼óÎã#ùÇwÜ””?ÙmñqÇÇG>òÖ¯_¿7/“f°qãFÎ:ë,Ž<òÈI/{ªúÆçwÞÞ¾LšÌ\êkö]Î;ï<6nÜÈG1eǘŠþñHý Xvß}“×ïÏd —ã/7o¦mûvú×C™±X‰hÐøå“-ˆ–è3úÉé6$›¹–¦ÝQÇË^´ˆ[¿~ý˜úÇóÓ;ú¾ïûu•jýúõ\wÝuÃúIßzë­\uÕUqAPw1õlt{mœœ+„Ô ¥è%â•$ô°QbD9úŸGÕqc.¤öéL}îŒÖ[QYÝÑþÝ$‰B¼ès>ZçqžbÙÔI’¤ýá¸- H\ ëÑrT¾•'Âvgjaª…ºÉ`oÕch á¡{îÁBYÃyžgA)ñÂx³ëL9¦MR‡cåÊ• {l¼L²GÛ@?ªn•I,/ÓõÒBÕE9žm³«µ•ÿø.¶qn\']k»fVšR?!yæ¤/ñ;ß¹°nû¡žg–•¸¥º®ã‚ ùJˆküý¦RtkÆPõxÁ‚ûL[¬™Ýè>…f. ë±f® ûš¹ÀÞ®ÇÔØæîŸþ´.ä͸‰»wïfÙ‹/²ð7¿á¨Ïžr¹<ñ²GBDf¤OȀߦ¹–ŽiTlRN¾ÉöMŒ#æ FÚ`4ªq㬆eY±‹]__=== °lÙ²An¯ÓEÚEt,g“ŽÕXW:RÛˆ´â¬˜ mI$ÖGEQ¯€º‘û£ÿÎ0ûK]MË>J KŸo[ôߊŽ#õÛN-—2qĈ^Nj{¹'ExsHb…U¢ï.‹“:'±\Iì­zµž]úߘ¬䎺·Yðî]pÕè¶«¶Ñœ¥ˆpéºÔ¸Ÿ•)BœÔ¹²mï•ûïÏ=÷<ϼž±ÏVõ*="ryêë£XB—¢eIÀT/ w˲ûT&Ö®.eåÖß_ìbQ‰t…‚ŠCç8‰kªqžçQl0§ÛÛBÜPõ¸¯¯jµ:ãÚb¦‘ÙØ§ÐhÑõX3WÐý Í\`oÖã5æèþüØóGŒ]=**~ÛÞÎõ+Wbš&“xU éa‹¸‚¦µ=ο m²Ü$âü†mœè_@„†F†Êž:ˆ…¸tßööv.»ì²Iq mmmå /ä‚ .`íÚµxžÇ·¿ýíiýÒ%aH,j ’z'¢R€ªCY”H Ö_Ùh‰)(bb-6T=‚‘E?¹ÄzÎJ•5š®±®v“c…Ô9J–ËÑ„þ·H7å~”ýÓ1æÒtiWðÎèØiQ/ËÈÖU3ɬÇ&JÈ¡çRò\B% 1 ƒ|>?¢hÓÌâj¢<ó¦£YséW·¸­Ü3#a?XºHêoZÄêºTœ,"ÍKÀ£.b¿ùó1Iܲ $¢w%6ÛÑñ‚èg´Lêlêþp£Ï8#âl»yŒ8Ï‹~7’Œª¾¯²´‚²ôý‰\˜:fb[¬ÑŒ]5s]5s]5s©¨Ç2ö|Ÿ³™ }úúÝ»yáÍo¦xÑE ã‰,߀Ç`!À¦>v›| ø^ÖhùÓ˜!ì;½<‹„YÌ` Y*[ȦM›¸ì²ËhmmE¹ñú7F/ºè"2™ [·neýúõ{ÕlYb¡‰à_Þ¹“ÿzòIN9úèØÂ ×Ð$`½ì/–;åÔ¶ª‰+]™ÉwCÎ2|ü«‘Ë<%N¤-ÑòŒNˆk,¯ñú©uƒã6ʽ)b&$"¦d9ž‰Le=¾æš›¯R!‚8£æpBœaA0éVs Ÿy†ð¹»9eùÃn'Ç…°lß¾=ñ}“z×åáêÃÂ;xôÑE,8xA|¯‰å©XZŠUh%U¾¸‡$B¢Ä•«)¿w¥ÒüØ–¥¬åÒ?‘e%ñ䮼ò¯BÝ,Ú¸¨N"ézâñÇÂöíÛY±páˆÛ‰7šÒ÷kv©÷éÉ¥Æ}Å¥Z\ÁÅúRîUH„c èÞ¾¶èú»C44 bÓò¶6âìG®«ÜTï»oðC{&qìí¶X£™ t=ÖÌt=ÖÌt=ÖL”™ ÉLf=–Þ¿ïûlßþWŒÕ™©çÛßfÕ{Þo~³r½±íx¢ßu]‚ ˜œñ`£¥ž¸­¥…´û[%~4þ`ƒÅ+ZÞÌx¯{R.ó¬'vMM+¿³=[©dQ”ì¹YÔ@ÛªUÌŸ?ŸÜµ×Ö }ßÇ0ŒqÅr’ò'ÅO{Š+¡ê8ö‘­Y;’q"º‰4”“Ž—¶XÝW"Jˆ%ïSK_B\WÉB£¹&[„uï÷`e7Þ8góÒ)E‡a´/‹×÷qôo€»TBh.(§ÃÚNܧ¥¾<÷ÜsøÊWŽÊí¶PH9ÓT.«¾úÓÒºßÑ¶íºŒ©F£Ñh4Í\$m|Q"ékK,h1Ü~¹¼oK}>v Ç›ÉÜýÜsóôÓl C.™;F1³4Tb¡ømÏc,xáPu^©2¯ Xq¦I‹ÁòàeÇC­Æ^©ux^"Äåóê}g'<óÌBjµ7é$3Ï"N£Ñh4F3÷‘~°‘ú,IôDË“$ö“m¥l6ìïEûÉY47*W&¹’8Í^jÄ-‡¤?îxRÍ{ì1ªÿôOœöÜs<óÌÈÞAƒð}Vÿã?‚e±8 ÉŸpB¼Ê0Œ±%»]š"‰«P:K^:8<$ð!±|kæ.Ø(Î {!©ël%âzzzذaCÝÊÆÏëׯŸîó–ýÏÿP=õT޹ðB¼}ˆ¢mã?½æÚO=•Ý]D6ò'ËçóA€išX–…ëºX–…ã8 *ΓqiÕÙ÷}‚ Ã)±NšJÆku6’ÕŸÏÐ *ÒVti7V£Év›Ï$ ¤?yøŽ~÷“?ÖeP›n<ðÀ·«wÈÙgºÎur»Ý›á™m/rèë^/oï37.Ä0”ÿ°kÕ*^¾`AìÒ>A \QÕ5‰Î¿ r98í´Kð<•]5ŸWmÇLY5F£Ñh4³1< Q}Y°ò ÛtŒ½'§m­Dk‘¾t6µ½Ÿ*£+µ,2Ñò2c1$ßCÆ î÷ŸéìØ±÷Úk©ÕÆê‹¿þ5?wŒ0Ķí‰%$ccz /?n£è&?bzn6¬—Á}ãà_qo›D€ò‰îéé¡§§'^±víÚºÏ3ø·Cáà³Ïæ†nà-¯}-yÛÆô} †AxÑEuBBÚÚ-›ÍÒÒÒ‚ã8ñ«££×uq]7âLÓ$ C\×Å4Í:!N\ÐJ¥ŽãÌ9kº¡°Q÷o³{ObÒɺ¡šôÃ$Hí7ñ€»¿þ"ÿ{ÓsضeY3¢®œ|òÉ#n#&Ñ£a¬ãÑ>^æ?¾„ýOØÍbëÙQ—Ý(¬Iü¸2ªîf_¢ÄÈ;€-À¿ £±÷÷C3‹pÀU«’€¬W^yŒ:þ8Ýz5F£Ñh4sÿ¸$qÎ¥ ÆEN´^ ”¾"}î‰hJ˜“FÚh"Ýç¶HE³uvjÝx{¹Í ¨ž<òÈ©¾Ô{…G-â­o}+÷oÙ2ö="ñÍó<<Ï›xrÉ®(?–˜&©#ÕÌr®Bâ¾$1ßTÓâÛ„XpÞyçʼnf+%àà÷¿Ÿÿ‡ rè¡„aH¡PÀ÷}òù<žç ë&V,c÷TÃ0°m›B¡@6›3UA@µªTíÎÎN<Ï‹·K¹r¹L.—Ã4M Èñƒ²Ž‘L´b}7Ûiæ(H¦Y±VŠW2¸Ñq=½n¬ [f á­/rÄO`ʽrº…¸ÖÖV:?|RËœ*°Ö<‹w-„óFµ½Ëàg„Ì6Ö­£Ž:Š>g•ÀgP1èÚPCéïÔ81U((wTÛN¬äî»o£ ‘§Ñh4F£™ÅˆwOãèNÂjA2ΑmÅAâš$ƒé˜ø¢£ô69¦„çÊôo½†OLl¬5™SÍ&ðèâÅ“Xâôñä /pJ_Wî¦R±õ–}âØ–…mÛT*“à´›v/…¤ÂA}ü(—IJU e™ аá&L]Œ¸6mÚDOO;vì ½½ööö8KÇLåÏ7mâÅÃç-ÇmÛX–E&“! Cz#WT˲† lØ(†Ø¶ëºCºŸæóyr¹ÝÝݸ®Kwwwl6šÏçéìì$ŒÌI%»‰˜–Šxç8¾ïcš&¥R‰|>_g}7[èf]ÚåP<"ÐIÜHfE$ö$m@9µÍ\ ¿ï¾øÿ½Pñ §›E‹1²cêÌ`y?ÿÔä`kä ¥Ã%i´í[¸p!Ë£¬±ëmèŒ0Òñ‘ "ILÒˆï+N<=Ž=vK—4Ý—L£Ñh4F3Bêõ‰³&^A’hÒ ‰—&/±B/!3õ*Sï $úÈhGÞi¯£ÙÒÇ$víÚÅ+W­Â÷UH›1‰oÙH»˜ƒf¦éØQV2br ƒ•e‡ÉU`÷aêbÄ­[·Ž¥K—’Éd%Ì}þóŸgÓ¦M‹ÅºÌª3‰_½æ5¼óK_Š+ªaƒ*îX:Ž3ìöéìˆbý&d³Ùºc‹Ûjgg'ÝÝݘ¦‰išT* …B§®££ƒ0 qgLn€ÓÍX2yȈ0ÐO"‰[ª<ô$°þì·TßiÉ!±Ð:—㇥- '‹'~²Š÷ÄËþìÉQï3Ü–̉'žH¿loÀŠ– :Xx6½ovgš¦ÚÏ÷1néÒxæ™-Àñ“|4F£Ñh4{ƒJt“pZb¹æX¯I,må¹W%Ñ1šÅÆN“6LÒÌÔx|ûŠ_gg'åryb:’­±™Û©¬÷,Ɖ¥Mã>ÍskÆÅP‚Ûºuë¸ð ¹è¢‹ê6X¿~=ëÖ­ãóŸÿ|ìV9“øêãsèþÀU_þrÝr±„›#YŠà7Úrúûûãe–eQ©Tâì¬Ùl–R©D6›¥££ƒ ¨T*T*ªÕ*…B!ÿ<Ï#ŸÏ׉}aAlù'ÏL /÷zúsú} %zäI¹Rôy.q÷†pÀîÝñç™bù8 I°Ñ,;îDyîÑE`Á’ãvŒ¸íhëLÚ"Û¾O„aYŸž’NÁ0”çº`Y*QÃüÇ"}4dá·LÁÕÕh4F£ÑLê³{V©ï/¦'›u ͯµŠ}—ÇZ[áÁÇ&Ä Å‚“˜p]]£Mg7 $è $Á’Ä ƒa®ÆŠšÌØ´iíííƒD8€eË–Q*•ð<¾¾¾é>ßAl|áN{tz Y ÈŴ±"n©Åb1Ôòù<†¡ø …ت/—ËQ,c˺îîn*•J¬–‡aH&“¡³³“ÎÎN:::Èd2ø¾O[[Û ØxaŽJ@œ lÔì‘Ðh~-q3Cžš|ñañþ;Õµ°í‰ßœ$¦"ex:üÀd!.©GÝwߨŽ?Òs£H½µ¶Ó •ÐX?A=ËÒNÅÅ¢â‚@½·mxôÑE€Écµ2C’ãj4F£ÑhRH’ƒ*$N‰jfGú“io>f(Z~¾uëÖ0*¦R ZZÀu''\#Tor¦?âþÓHuch¦„ ÜRÛÛۇܨµµ•µkײuëVZ[[§ûœc+¯ßu>ý47ýÕ_Õ­/•Ô ¸Y.„BAe< C5`®TÔ{±h‹¶dQÅ™d]mfugY¥R‰®®.,˪SÅÅÅVâÐÙ¶»³ŠÅœmÛ”J¥Øíµ1Ö]©T" Ãáþ*.ªñw§>)Ô["ÍvÀ¼_boœîS‰‘ë?Ùv“½L¥ÝӃǚœv¨Y‡Ê0” &“DR%$B@ó° âž*–qÂÊ•}„á1Sp%4F£Ñh4ã¡-úoxßdÑ1è5gד/P{f!…‚ò’߇®.°,‚B Èår“g 'æ›Ã¹ ‰Ýx®CYÉi&…£Ýp```ºÏ5Fü¥?úÇ?’;é¤(£©²a±X$‚ÉuI¹!]ñ‘[ã:IÉ«™æ¬]»–žžž!7êééÑjnoâû>AððÚµüS{;•Šüú¾²hKg/LcY‰ ™ã(á­Z…þ~µ<T9¹œôF"çm21MsT7 ad³Ùaoš&¶m“Édb×VßÄ Öu]:;;ãëšËå(”j&“”ÙÓ÷ýxýd“%i/BÔý/±f;sÙåvoqh{tÁ1k&bÀf]ú9¤þKßJ| %âÙ¶jož{N‰p…‚zi4F£Ñhö..ÊË®3zï¡ÆÒ¿Ó"ÜÈxžò:KG<KFÐ\Ny`J¸P¨÷ ùC v/^<Ý_sâ× XðÜsØvsï¼:R"œŒÓK¥þTÅ´WÙAÖ¨ltÕ,s#Hû e>(WFÉÚhù¶yóf>ÿùÏsÑE͘¬©aòõ_dÕÿ¨Ì‰KJPËç“ðP!ÐL3àÄ‚Ô¾Ž£„<ß…)‰…ÚTX“Y£’ÐU†Ö‘\LËår]Œ¹ p'NøP*•‚×uÉd2ƒÄ9ß÷©T*±˜—禢±htU•8^³]ï*@ÿ©Ó}*³–•}}`ñÇ;éeË$ÐP #ÚF,âÒnª>ª-éŽLèl;Ž·J¹œL tv¢Ñh4F£ÙK”P¦YTØ”**îÛlôÚ˜(¾¯ŒMZZ’þ®x‹É{ÏSýÕL&y ±^D*­¥:: ­M½¼af´=O•ÝÝŒ±m;éû>üWv?:7¼H>ìnìÑø8û~ì‚'ñáÊåò¨u€AH¦¹4b`6¬kfýÖˆ‰öÕžB@’áóŸÿW@Yc†ÁM‹ñ†ßýŽÂoߊi2Èô³™W((±m¨º-–r®›Ä‰Ë8Û&¾¡Ä­Õq,lÛŽ]SK¥Ñ x“‰ÄŒ ÇqÈd2qܸä;+ë;Ó4éèèˆ3¸J™Åb‘¶¶6ÇÁ0 2™ ½½½ …8!D†xžG”Ëe<Ïò¬ gkm–Qu6c'¥Ûn›îS™ÙH<‚¥^6‡.|æŽÛÑ5ÕGMq—ûb˜¬?IòIºÐH£U\³lªÍÚÓL:c™9Ôh4F£ÑŒ$ÙVÕO“¬§³™\®Þ€d¨aV*áKB5…¡ê‡ j\kšêU©$aJ$ëì$¶äj’ #çZ–Œ…O´RIí# òù$\‹„€JÛ®¤ÇÖ–ÿ±Ùo°sËÂ8ÔÕ°¤biU«Õ‘  $Àu|ŒÔ{‹ÁÖpši#Ž×ÚÚÊÆééé‰-§lÛfíÚµ3"Aƒ†!–eñ«#¤õ±VÜ뛋_ÍŒÔFcéfšª!rÝÄüV0×ì^V.wj¹4@ã±§ì\×ONT÷ôr'eK[­VqÏó0 Ó4)‹±†!•J˲Èd2ñ~§—Gi2“oµwÉfA³ |-]\’Œ>TIÔ®+ûúX³n½ç=ƒËÎ0ÕEU *I¼ƒÔö’ ÔýÛØ~Hfo%Ë3®Bâ:PŸ}V8×MþMÔ5F£Ñh4㦀ê™$ f«¼YŸÐu µrY jž§³l6±:ëèP–穲ÒN¡ôc …DðÊfo4¶­Æ¾2-é1µe%çîCW«bÜ¢f¢}?¿µ×,!Âÿ>ÿdF¶Š+!ŠÙ>)×E †o˜Ñ6“‚N3~%khooo ⬩óWžç‘Íf¹õ‰'xï{Ï峟…Úœb´ÛÊ,‚ï«{Äu·ÖŽW®PHL|M3iZZ’ÆQfÜ”Kø^Õl³káÄûKbU†!¥R Çq0M“\.GœDBÈårø¾O>Ÿ³¶öööÆ®¯Ùl×u±,«ÞbO~·½{©¦ ¹V3 qÿRdjRÌÇ|êM}Ô,N ÕcJ§ÍMqà®]¼üG;8ñ¡A;„ô´’L…èËI0™µ›|iW½ŒbTßR‘{Ó›†Ô'¢VÑ¿*ç$Û™f7Î÷ÇÖNi4F£ÑhF$‡”.ŸÁÌár¹$ü‘ˆg†¡„šÎNµ®³3I&ãPÏSÛ…¡Z'cGËRãÐrYí j|*Vj2&MÛD I)½N r<ÏÃ4M,ËV$ï(1Т„€‰†„4ƒ Ó4ÉG'†!®ëò›ÿýö;aæÿŒh9ì¹Q%}Ä0‚€B¡0ö 9T\·Fê³z ëd¬¥™vF5u¦ ¢Í¡®É7¢Ù‚fLF²‘lV5hÒöHL¹înõßqÔÿÆÙ…j5±„‘FQá"Îuv&qé@m+f¾ãn4B ôe_ÃP¯fn²ù)ð›5 #vEI¬æÄ¥U~7Û¶c!JâΉK«ã8är9ŠÅ"Ùè‰ÒhY·~îslœôo1õˆXsÝuï"›]2ݧSǤȂ…¨ Æ:ë’˜Ž‰gd®pQÓ•>êaâ¡b^&&Ô©4¥Ç?s<}oícE¸ZH*Jõp‘cHyuF꜑ãt ‚ˆDÂ\£RÆ7oQËÌ—ÀkIâÞ‡Òú9Cú¿‹2~hV¼ö’˜,Ø´iW]uÕtŸË¨Æâèœÿ³Ùd°#î ã©çÓA³I è9^LS=L$~ ×DÒJ7;-Ð7fUvµý+^ñÎIáè£ï޼"ÊÚá™yóèíퟵ¡O}!Š'ý×¢ÛŒeÀúõëY¿~ýtŸËˆ(ÿôH`af>Ýæe2jp2CÛ½‚mÀ³Ùe$dûï|çŒxHÊ‘¢ï·rÅêø/ŸWƒÜtbÕLF ŠÅ]Ö4ó,^¼×ý¹\ˆe-Ç1C¾óÞ÷N÷¥œÛ·oŸÞJ’„m(!NîI^`¢z;b5–®+Nªl7ÚßB {3È µq»­ „µ‘,ÓßÃ\@~¾úþY‚s¡s ô¾¾ö¢Ã…Ð:Ê$,šñÓ,솸xª6)±D”“d72Ó-Ë„üIÄ?´Ì„7ZµÉºÆ †ìñàü.y&ÅûeÀ+BþD°ûpè8jÙ(QÐͪ¼ÌõêÐÅÅêl[µ—’‘Û| ì­`_’ ‚ ülHbÖÅZ5¤¹(×ô¢¢­á4F3#IB£Ž§KåûõÖéi‹vÃPÏNÓ„ë®»Ž{îùŸ!…+‰«Aœ”Î4MLÓ¤»{x½j4»g˜[\E%îöHtwwãûþ^q#*^{íµpÖYÓ}ã&–¾ðC¹}ªUlTŒ<Éë8ctªO¤ôOËH¿Oú¤3u<¶³`óæÍ¬]»vºÏgXTö–( áÛŠU–Ú/qÃMÒÍØHDzK“Ï×Ǩ“Á­´5KI2½ŠÅµÈfùío¡¥eÝÝËéïW³8gõéþºã"òàðç)cª¸–¥eDTWï6TãÅd‹×‡ ¼Ñx/Y¨}:qÃ$7üÒ™°}1úŽRL)ú¾ãhâ]L”oD ŠgDIX? ÇËúÜ|3›6Ý3¹fG:Ïž§ÿjU ýù¼zU*I\ËînµM£•tº4\ÖI Ï÷0ªçæ­)ƒPã0`eÁ*B‰söjUŽýQð Àƒl‘XØvVƒõh4fF !AÆÚÅ”±ac¨‰|¾¾ày…BÃ0êª2‚ØcÇ4MlÛÆ¶mòùü˜0s”™ñì1ºCÍfn.Ç °¹Ùʆ™bÉB;®1M:ÁÄñ ¨O²ç n–tØ!ÍŒbÀ­·Þ $B܆ èééaãÆÓ}~ƒp]hE#⎸cú¾jpµ·÷tÜFÊ’$ý|(#·ª+!ÛNÒ{Á'éêRe”Jð_ÿµzº¿Ö¸ð3Šàû?.Þ;M»†vFïÓÙ+¨x‰eŒX³¥Ü.‹%iz ?!',Ë¢V«MBA(sît?hœ~ë.‘,ý2Ƈ㋪MzòÈ#'ÿâìÃd2‰Ë}>¯Ú‹r9éXçóõíϘ¬£}’¹¶Ü’!—Ðüµaãвe-* Mv÷w`ïØ`]åíÀ}Ô¥«žƒªHQ’¸¿&IDòÑy„PÍFç ÍÞO¡ð•(±Î'\*?€ì|Èz‘—ÎN,¸¨]9u|É$¬ûùF£™JÑk41á$LN5¸®ä|ß'“ÉÔ%7(—Ëq<µJ¥‚aø¾ïûäóùØúM£Iã=?Ù,¼R2.•J˲p]7ŽŸ>¡ƒJÿ2]%³@ M“if³&eµ¢ù<œ0 1Í0Ô¶"â4Kd ™zFºæbÞh‘+®ÄòûIY¯|åï¦û+›I»úC¼O/³Hb¹Éo!÷4Ö%+9Ù¦›úŒ¦ãékÌôÁzÊ’‰"S"JÜ^ XhÁLø¦ 7{þtû‰çÁ¼yÍ3\K|ÛLfpb„®®$K©t°Û”A™FG‹ì¢îIN"³‘å5µ<¿%ú\FY™IçH’tBæÍÀ±(sÉhZß8¬3IîÏt_I—Ëd¦Ók8Ÿ@gp ”þò‡C1² ¬DY­»þ+ ôȼ åz 7)_bAšI¹dH*¶F£Ñh4{ ‰€ÒÅ躥…‚ê7F}$™”«VJ¥T«Uz{{éíí­Kt`ÛvœÅ´X,’ÏçÉf³Z„Ó4%V>q/gõôà•©ì"äŽÛ‚±ÂàþZ£gê‹jk¸ˬâ$åòxÛ=ÓœYYS÷F²@,“L‚ Ê.8ËùþÇžg×®U“W 4ÀÊ:§‘.WT¨RûQ}ɪ£™TÄÈH±­À£&ìïÀ‚ÿšýéÙǃçA[”j½PP®ë’ॳSYÈŠ+)(Kæ–µÞó«þÎNÕ¹–8“2ÙMÚù‰Ð<ê%¹§ªÑç êªÄ樈cÒÊRï f‘"ÆUEѲ£inqš|#Ÿ$ŽA°í›!ÉdÜF’ÉøHuÍŒÕ>¥R’hÂ8òå$.žï«k_ø´ý?ŽTß!p x”ÄWŽiÂ_;‚ž=`/ÔF£Ñh’îêHÒEªçŸi&‰è$d[6 oyË3 2™LÏm(P˲¨V«äóyLÓ$«]«4Ãà«Ý‚ã4IEŸÊ¼(É9Æ]Ÿ5Ú$‰ýÛˆ®®3šY#Ä<ôÐÙñ@B³orË-çN÷)Œ›;oÞÍa‡=7ù‹uŠÜ…Ô: È.ý ¯Éò™o½6 ivIÿx, ËßøØtŸÞ´ 3Ó•ŠáÅjM&JÄV2”†ê<›¦ZW­ªõýýÊ NâJ‚*ÓÊ2t´B\gô_’yȽ•^UTg'Gìbß[2ë(ⶤ•«6q‡¶€]Àpš|Ú UÜÅ-”¥ªˆ€b%'>Ðfj}̪Ýíï«uV‚–•›ò›Tn„ü¹`¯Î?—ƒÊõQÙâuê^ö«—±{ñîI¯#šÙÃPý1„ ¾¥R’Y=“Q⼈ï•Jý{É,B¼Ätòý$”F£™ût’äŠB! y“ŽQ-¨Àø>þñWÇJ³ÙìÄãk4)Vöõ5_‘Š‹R‘ÝDH±HaýLœU,7===lذ!~¿uëÖø³0Ý™UŸ~úe±…”6Þ7yík¯fg6·¬ïcÑ“Xs}”5Œˆ# D2SR$œCT£H2s2ÁК¡‘KlwXðnÞþ«{7N÷éí5dàmÛQFÐHàJ CÍ&Çb-nîƒ ÅÎ$š‹êéI\?]µK$ Lš‰é{®™Å[ p*ðÇQœ[zÔ!ß%=¸HʱRÿ­úëÛìú¤çtw´4 +†êõäï€[Gqþš9ƒ nÅŠ5 •(®2+aÎóÔÇI^id±x/—ëïót¢‘lVÏ÷“ã5†»°,U¦ï1ö£F£™5„¨GíH‚³ÙzK8Á÷}Â0$—Ëá8ÝÝÝZ|ÓL pÿNaûöŽa=ñÄÝyÒðPýWýœU,hmm¥§§‡žžžxÅá‡^÷yºñ}Ÿã_ŒeÏ”„sÒh¦”Öögé¿s‚…H€ŒÈ= ÕêçI¬yª$q« ¹aÜDÚe{JD“±ñRôp|C·¿lºOo¯"ÙK…IMêUr!Žc¨A¥¢FÿqpIÔ,aš ‘ÅZd&îœ'‡ðµŠÊÈ7’,Â’=&›MÜDç°Õ‡l“ùy¹·‚ Z—¦Â§þº§"˜:-Œ¤…‹‘~“Že…(bÌk_{Çf–†J¬’AZݧ·×hÈÖ>˜tjeH1ƒI¬ 5ÚîîVïóyðMø— ¼,T#þ Pbœø²õöÂC‘$Ú X.ô˜p}®páeøœ£~°£|ø„ G»pM3Š ùÈ…¡êå[”"Óž.¨ó5M¥2mmI<9çÔ÷—Ðø¤U’) 9n,ˆx"z¢|¥WqÙe:FÜtѨñJv@¹× …ä6J~;qã.—ÕöAä†,ôÙlb¥ÖÑ‘ˆcãÞ$Sqã6Ý݉›jD·Î8Eøf X×U*Éùh4š¹AÕÕNc¿üòg)B®¸âúº˜[žç‘Ëåê’/h4SMGÇbÛ¹‰$ø¨ äÔäê ¤YÂD3«X0ÔŠÍ›7Z¶víÚi;Ñÿx%¯}û´_£™pÏ/åòMp -‘j%6OZ׳†x¯™6šýÚonسo$kð<¥_u5óqôýD,³ßW#ÇI”}wv&Ÿ …D°ó€<ÆÈfUå²*ÿè ˜6älµíþÀá.äÁA½ðu>‡º—|x· ]a$âv‡›d5ÄÌOÔ‹ÎÎúïç8ÉH+ž=­˜f;f¥ ÎIRÀJTéô„IèÄ’i¬HŒôoX,Â#<Á½÷1áóÒŒQå7¬TWÍŽµ\¬Ä¤º¤];;“˜Š’U’ŒòC‰Wé8ŒÍH[ 4CªsÚ‚m*!N\c»»'^¦F£™^<Ôœòpñæo¿ýi …gøó?ÿ‚` Üþr¹aÒÕÕ¥3œjö ðü㳺±¾µµ z˜f2ºšvއ(ØEÝ Cû >T‰fV'kؼy3oûÛcnݺu\}õÕ¬[·ŽB¡€7Íq{{_ÁâÅ­Ó{µ4šq²+€Û¶M|FN„8-¶if …çwíí‰ÕÙÆ P(Ô-oÆÀÀW_}5 bÎ¥]]Ø´i;vì`Íš5㲬;è #¦ÒkG£¦®ï `ÅŠ­ã?1ÞTÃk¡rÎb™âò›×ãÉh‹GCD^¥VPoÉø›6aÝzk"l5bÛ‰IeÁ·}¸ÀJ¶5ŒÁ&vÍî 5à˜"œB*ó€ eòŽêÔÈ`]\ZÍ{J¥z—Ó¡4¤Òéoâõ•›^…ýúõØö©Éò0ln'ç—Ë)•¡TR>£"NA’’6í¿W*%)g'€>µÕ«w20pÿ„ÊŽ©îSÌ$ùÅpcEIpÐh6(¹F×ÐZ쾘È@tìj5ÑÇÅ]7 §ÔßaÊÏGÅ~k<™•È¡‘,°½ùð‘Ú`+­ˆX–²hM‡~(a±Ó¤pÿÅØïXŽùŽS“åâ/(â][[rNðK¬ÅÌIBä|_‰u ”ßW*$î¯cDbˆíM£‚ÛO’A¸­M}¥©º®úé::’Ÿ’ð£¹þéqšÄÐ’kÞÒ¢®¯ëª÷r{Ls¹kö-öf¿BÈ1X„uïþà/ñ—ù]8à¾õ­Õñºb±H>Ÿ×"œ¦)S]}àñÛæqâ‰Ï' ]wçû>•fç¡È£ 0ÄW'!ÓÌJôôô ²xK+ÁË–-£½½žžž¦ qOOË–-cýúõ¬Y³†3Ï<3^·uëV6nÜ$ŠóX²ÖA@KËÑÀÔZ‘höm¦²ß[«ñòñž˜¤Ý,Dÿ-T£\A[ÅÍB¦ZЪOF[܈è<é$¥¾ÿåÁ®‹Và›_#hi¡D’ôé` ô­o‘GUéßöõqIk+”¥}mçyËb¿=, À <,Ë"—ËñÍ¿ÿ&}00pìËüep¬ÿærë‹'sâïNdCçŽzê(ÎyÕ9H’¢Ô <¬~ Ïõ0 #~U*BÓĈ\=3ZA@††Añu«T*†3«3àXŠ#]öju°?ép·$fÚô§ZUªC[[’Ó0’˜|ém‡@²gîÍDISݧ˜$Ž›ü¤¢¡Z–zêçëíUœ@Fbô¨;r›¤]yù 6l˜šcÏÅz¬Ù7Ù›ý H´†ÆR*øîw÷ð YN>9ÏêÕKêžMcykö=¦º‡À®ÿz‚ös_¯:ÃbA Ǜ˴Ôq$ÌŠKs¥Z3«X°téÒA+¤2 CYÃ~øá\vÙeñç;vÄï7oÞ\'ò­]»–Ïþóc:ÉÛnÛNKKËt_+Íg*ëñA×ïæÐCŽÿäŠ(«8åb7‹eAàyÞ JÏSб¦Bnk» X=árš1T=žŒ¶8M$á%á§ç)a… §¿kuÜO( ú ó¶ogû~@%—£X½s'ïýö·ùv>ϲmÛÈràˆ={xâ»ßåçííö‰Wqüe_媟ÿS\WîXwÛ·o玳ïà†{nàü5çóÞߟˮã_É¡>”{çÝKÿiýõÔQ\}õÕüò‘_ây¦iFVa"ÀA€išX–Eø¾a¸®²×³m› ‚ nÙÞ4Mü(>ïûq}·mÛ¶ñuèÁ·ùÃnàˆ#ÂuݺŸëº˜¦‰iš£XXVsˆ´K¯¸ª ™ŒZ>Bdû½bgªû{×M·fÈO&M¤ÜKårríu˜£‰3Ô5üìgßÃõ×ß3éÇ›kõX³ï²·úB‰ÁYRK%õ:ûìÏð¾÷½ Ûžš>”fî²7êñžC°–ݧD¸Æ\÷7ÇD5Ì’„SÑazç @™gnÚ´)Vˆéééij5'´¶¶ÒÚª)ôõõ‘Ïç¹è¢‹UÉ?üðºmGâ‰'ž`Æ œwÞy´¶¶òûß?Åk^3Äý4s—¾¾>6mÚÄ–-[¦ì“]·lÙ† ”Ë÷{8å¹qÆSª fCÊ$ÙR‡±º/•JX–5å3ë":X–[ ™¦I¥R©ÝLÓ$\×Åó”’eYø¾išƒ„¸B¡@>Ÿ' ÃA3F"pŒ‡0 )•J‹E|ßÏao¦´—z|È!7c UÇS‡·mÛ†ïûq=N#Võ¦ ÿégCïwgÙ»_ä¹ÏF8ýñÇ)½þõtwu‘Ëå°m›í…½™ ®ë*Q¬T"†|ã¿àÆOšcþsöÛK–,áŠ7ŸÅ~û-á/ÿ-áÊó£-[ø¿--|ëïþ¥ËóG.¡íÃïâîï|‡…Þ§¹úWrÏ%‹ùøÇ^M¶X$¸^ó—ðÒ!§`V«þøã ì·grH¬eKg?¾ÿýïóøé§óÿýíßrBk+7÷ôÐ÷ãSŽ”J¥B.—£V«Q(0 ƒr¹Œëºø¾mÛø¾OµZ¥P(P©T(‹ñ=†!•J…Ò·ñ©O=Èõ×_ÎñÇo‰ï0 "Ó'¹?J¥¾ïã8Ùl6¾/Dôdª>ÇUWõqé¥ßç{ßÓ¢õ\cóæÍÜzë­<ñÄSvŒ©èoÚ´)îW„ÀK;W`ÿú25“ÖäÁ?.·i香 Ÿ½D3ílÚ´‰ßÿþ÷£Þ~ÀyçûIä#©«€žçñùÏž‹.ºhPü¸4üpÓ¦M\vÙe–,Yš5kbK½¥KÿÄ'?¹|º¯íؘ’ Ša6(:;;©FîF…B¢S+ØYlPâÏx•Äõ½±Á¤I:$ežþJ²>¯Þ/[°Œ5kÖðøÏŸü‹šb2ëñ²e꜖ž±‡å›Ç9}D×ÇbT™R-h*•J\oŠÅ"¥R Û¶Ç-@yž‡ëºäóùX@‘­»»;DD¨V«±Pà8NlY$ï3™ Õj•ŽŽz{{±mÃ0(•J8Žƒëº±%R¡P ¿¿ß÷cñO–W«Õ8cV>ŸÇu]²Ù,…B!þ¾"¾‰µSZ@k©0 Éd2ô¦"£‹ÅÒD­÷–.]Êš5kèîžÚãÍêñ†qø_-^¼˜U«VÅõ8ï+-§\†Û=Õi¶3¿äÕwõq£ùw7ÞHär¹øw²m›ÎÎNLÓÄó<®¸ð .w/ç?ËåX„•:œ}Œc×b*ÂÇKàžëa 8M¸~òéOóðÃßã‰å[¹éo.æá—GMÍéÒ¯Á®[±‚Ø„ê0Y¨ðrj{óüó9bçN.]²D}ÉövzÛÛãïz×»X³f  ,ñ€ÀtªÑ9›¶MÀ`×Vi³Çáú„!~Å+(ú×ÒÝÝ=è~”ú-u[D@ꤾJë¹Q×QÓT?ždjµí$º½øC¡~øá¬^½š¾¾¾I­·i&³-^ºt)­­­#&š¢gV*ê²J¢´¡rv ÇxöÑŒ5kÖðÈ#×bšgMIùSÑ7N5éWÜrË-SzœÉêW@}ÿ¸¤›À €õëŸåæ›7èøosi×f[ÿ8ݯ€=Û–%á?H{VŒ‰twRb·èðD3öövvìØ1êþñP âÆ) ¼ýío§½½eË–ÑÓÓ(¡n¨Y aݺu´··sÝu×Õ vbm'ˆöp,Y²¤n¶oáÂgöḭ̂˜}Fý%ôÛ¡ ñ@>‚XüD'ñtNÚ*Æ%6ÅÁÿâÏ-æ¶ŽÛ¸qëuƒ8×uã©ôrÇèx~ó›ßpñ›.&ŸÏS*•XyÀJ Ãà¥7¿Ä‡]Á%w_‹!TP7pd±å†-ÜòÄ-8¶£žx!j4j£‚@f£sÓe/Þúl¡Lf-TA´OÚrK"®¨Q«¸V†Ñ>…èœò$þïvT†¯¶[.em×ZÚnn›ÒŸ~2ëñÁÌÚµkñgÿwÏ=·hl'Ó]7‡:‘2—ËQ.—ëbSIÊårqJìR©D†q ÒJ¥[Ù˜¦I¡Pˆ;4b±cFlu&õN„*±t—;ÙêE‡|>‹Åb1¶Ü‘‡OZD6M“r¹Œa±ð%e7 ®ëÆËDHËf³†/O‹â.˜.Gî#=,Ë¢P(Ä¢¡œgwww¼¿Xüyž7áø#Ë–-cíÚµqƦ©¢Y=O^¼x1‡vØÖ¢möáÁXÆ3<öû_àûo"“É`šf,r–Ëå:·Oùí˜ëkêùwÀ¢xˆ¦¼ðÀy‚[Àþx÷ƒu,8Gy¨áë-Üs¡Â'Úª™ê±á“Ì3Iæ,€%K(¡š®I“d_]²„òÉ'Sжï"éÉí*ñs’ø6fù¸8´ðê/P.—›Šâò¼úkÛ6¥R‰Z­ ȆaÍfñ}ŸRuÞ¶mÚÚÚâû@®ÿ»Ė³meÂ%æX’¡!—äSÙÚÚÊêÕ«‡ Y1Q&³-^ºt)«W¯ž!N4ÌÎÎär JÇÔá¾f>k×®åÖ[oeñâ©I 3•}c’~ÅÁ<¥Ç™¬~$ýãfx Ž §rƒïûu“¤š¹ƒX«Í¶þqº_ÑßßÏÙvøVÓ}dR̆éÍ ÔX\[ÌÏHÚÛÛuÿx¼1®¯¯Í›7Ó××Ç…^‹rñiÓ&–-[Vçw>¡žžX¶lÙ¸\Ën¾ù„½šµí¹Ÿ>Ç݇ÝM—ß[ý„aÈ®¯ìâ§ü”w®àâ§.æ˜ê1|ùõ_æÒo_ªn¤,ÔS$€mß߯£Ï=ÊwžÿÙJ–ß^ð[z÷pþ¯Ïçg÷3l˦§µ‡ÇZã=Õ÷pæÑgrúkO§çôª§VùÀ1`õ×VSð ´loá“§~Rà»0ÿ€ù´^ÚJko+«ï\Í3ßz†®:€ù•ù\²û T!€_°ìÖetúÊzî°Kcå +¡¶üf }Ÿîãµï|-Ü´û&®¯&,…¼ç°÷ðÒ'_¢õÂÖÄ«õeÔ)¢-“Èì‰I­m#œLjuEËLê…½nTcãç+Ϻ’BÓTˆgªêq<ž¯ÁëÇxByT\¸Ê–…[X½s5E§ÈÓóŸ†w nÀHôzcðFèJ°$¿„;¯º“ƒ¯8˜åÎòDÔÊ.Üó³{XóÁ5\ñØP€ý®Ü7fÞˆgzœ|áÉÅDp]egšÊJCŒâë´å}¸ƒÜÌ ‚XœOÇ•m%~§Ä@”'²mzDÊK‡ðG±T*ÅVÑ›1¨E&˜¤œ§Ÿ~šë¯¿~Ò¯ÅL®ÇÍXØ[ý ‰X#xs̃qìUf"Lu=ï:€;îX3æýF¤ñÆq¶föSÛKlݺµvË-·Ôž~úé·ýà?X÷ùÄ?W÷¹«V«åÇpìr¹\+‹µ|>_Ëf³µ®ùaí©SŸª•ËåZ¯Õ[ËÛùÚ ÿxCíŃ_¬e³ÙÚŸüßµšQ«uuuÕ8æÚõ®¯ÕºjµgW=[»û’»ÕÁ»TÙ½ÅÞÚ +n¨u»j?<û‡µgßðl­fÔj×~ëÚÚ÷>ÿ½Ús­Ï© ûkµšU«ÕÌZ²6úÜU«ÕìZ­æÔjµrRv­\«Õ¨Õ¶ß¶]}¶kµZ5U†Ð-+«ïZë¯Õj½ÑºÞÔv²]«ýaõjÕjµVë¯Õº»»kß>çÛj?)ËŽö-'»îôÏ%Çë®ÕÇ©uww×lÛ®ÕjµšeYµîîîZooú )†X<ëÆÞf<õ¸«V«½ö7¿©uuu¸OLw-¾öß¿øûµ®®®Z±X¬uuu©ßx‘߬¿¿R˲Œ“r¹ÿ|~,-ÀøéïïW÷Étww׺ººj¶mÇßW¶—Ïýýýƒ®ítÖã±Ôá[n¹¥vÅW4¹.µZ>¯¾¿óïÔrë××z{{kÝÝÝñkTHÛW­Õµ«1VjY6úŸ¯ÕµK‚4[“IW­V#:½ñ¯©[¹²½_°æéÔj­aX³jµšÑx}£eN-i¶ËÑyÇx–eÕ²ÙlÓuår¹fF\W{{{ãû+ŸÏ×ÞÞÚZ{lÆZ jÔLÓ¬}oÙ²ÚŽýhȺ±·K=¾âŠ+j·ÜrË„ÙÝ]«YV­fšµÚ$7o3¹§ÓT«ÕZ>Ÿ¯Y–?û¥Þ†Q3M³–ÍfkÝÝÝ5Ã0j–eÕ Ãˆ_¦iÖLÓ¬ñg fYVÍ4ÍšmÛqý3M³æ8Nü²,«æ8NͶíšã8µ|>_ëêêŠÏ3ŸÏ×lÛ®‹ÅøyY«©6¸\.׺»»kýýýqßE>OVýék4ifK¿b¸sujÉ33^æ8cë?kf5³¥7ëù½'kÞ~]Óí»»»kŽãŒ|7AW´L3kKÿxÁD…¼Ñ’ÎT2VV¬¸qBÇ·™!=éã'±pËBŽ?ôxî{ñ>^·âuüèáqú1§SΗ•›äCÊJæÈ'äè ,:mí}íÊt!J2`fMÌŠ©L¢Ä´Á9£,¼ÄuÜ'Éz)~SITR UnÚì" Çpð)'ñØÄµ3½dÔô ¨O²ÞN¬HºàÄðDN4N"ËŸkS³yb-Ïôps*6×?ÞðÉñ,âÙ)Û¶cK ÆxvâöêÜ/˜Õîã©ÇpÀ³ÏÂÂ…£ß)² ‚€>ýC²dëb¯M&b=à8Τþ6#¹œŽ•´5ÐTº¥‘6£Yl8I<`šf’X•J%ºººbëq[•8ÓÅDÚb! GF†\ÛÚÊ1aÈ7Æû{‹¦½—lYT[$ÅÊÿ! 3M&‚ÐFyÈ×þ²ØPVl#tÜp ö½ðºµk㚂ğ3QVrb1g‘xí]ž‘¬õ†s•–z-÷ªišu÷Y¸qÅ ŽÚ¸‘%?ÿ97Û6+Ö¯çò+¯ä¶–^õªWMò=“QÇ‚ïCG‡ —7Óc…W*•º8ži×dÏóâ6H2H7³®T* …x¹ÔtìQß÷iii‰-…¥î¤“ðH,C9‰ï)áÒñ"ëéX­°‡²”–XŠÂL²¬ÙÛõX£™ &«ËP”«¿a„qfª™p=>v€Ï¾ô ”›[=–eî™&¡ZÒ3nβׄ¸ñây?~ú¸÷—˜X \tlÛæÙ#žå¥Ý/ñª»_Å‚µ X¾|9çÏI"sôƒ‰ ¯!I`¡F>¨›B’tGË$óg-*#›*¯€Y¥ïAlÊû†±’çy uþ錙e mõ*Ž6…JêXžçÕÅÖJ›¿Z–wœ‡Bâc5º Š+I&“‰ã:¤¨âf¸/pÏÇO‚+zFÞ°‚RòpÓs7±+ØÄ­f²;$3iP2âº7;eC=8‹Å"•J…ÎÎNúûûm/ƒ[J|½éŒ7y×Dý/•|öXooÇET$ c“´¯’,FÁË5KŽr2™¬*í2kÔúyýi?N%ËàÀÔéãfIúeê"î«‘ÊpQs†sån(±m[M¨¯û?ÿ‡Q>åâ‹y|ófžzê©É¼Ô3ËÇQ¯½Mcœ³¡¶õ<ÏårÑ9[uBœ´Ÿç²³³×uãd"¸ÉvòÜñNÜœ%Xz†Ø¶=l›Æ¶ºÑuz_éh4šáI‹pîÝïþ 'ÉÒhö*Vò1Kr›Q(øèŒ¨û3^ˆ{ôÑEìØñ7cÚG Ïóèîî&“Épø’ÃÙ¸m#wgï†KÀ2,ej }D–Ry$ƒÄ I¬3Ù'O’ù³˜z/È`ÔÈ©qýH íALò3Iö7‹meư1¯:;;Éf³MÅY&Ù'ÅrÈ÷}<Ï‹c»@"^4ËŸ „ÀóÏ??º£[Þ²…ÏüÇgê-YQ5Ó‹£i.Mã´««+¶®ûõ¯=ݧ?ï­fª ¯ÜÅŠ–|ÃÈŽÕ.‡jG’l \Ía\ŠšÃ¤7C|ÿd†Þó¼XÀ >$ž ÀËNÙÁCóVñ·Q"$“Á²ŸôÅ’yabF¬[¶‘XÑ5ž‰$n–eÑÙÙ‰aª=~ç3Î=—W­âÖ[oÝ W|ú©TT½ß[sòL%‚U*•XP•¬ÃÝÝÝAO¤IÌ3 ŽX©TâÄ<®ë’Ïçã:æ8¥R)Ž9*–¼3ýœ‘øƒ•J¥Î9kT£ÑL‰{(Iâä? ħ­íÃ0d÷îÝÓ}ú"œ¨T‚·¾µk®¹"ɦ®ÑÌp–^û ÷/8ŽfQâ:;;GgøÐÊü%Ê´Nþ4§™BÜÊ•}À1£Ú^Ü&ÏóT‡Ò:Á £ŽwPâ›<äÊŠM‹i·Q¥­ØFꋎ3QJv/øÀLä\y8g¼Å%¥Y¾³³“|>?§ÌÐïða^d90"‘…åAáA”Ë庎—ΕԙÀHÖDXáJ]—쬣Mk=ñ}øº þ—kœõÒK¼r×®±ÿ&6ªÍ5RŸE|ÛË2bZZ“u•J%Å%˜|Zˆ“ Çqhkk‹ÝÈ%#o6›ƒÆA@Ç¡‡òÃï\Ä“‹~ííA@.—£»»›B¡[%¥-š,ˉÎ%‚ø|ª†g³¯^i‘+@Ö0Æ=±jFlý‹yû ðâºÊ%u²á+ÆA²S†‹ŸR¤‘IÆ0 ikk« K¦ëjµ?kÒ“hÍ,ÐÇšÁm¸ö. ^–ÏéÞÁHã É×’Öä!qJH“nB$A{ºK–O½©Ÿc­ã1ð_ÿzÖ¼ð˜GÍÜDî±$M'ÿ÷ìr¹O–ˆ -Ö¥²¿„j©T*ñ3@åˆoB³ i)WÄoÏóâþPGGa²jÕªé¾tB²¥†¡js-ëÒ8á—F3è»å \~jS!nL“VéG­vMÃÌx!®ÃKX–Å«}7l¹Û¶9ê¾£’A^õ¾ •Š.áTEOk%ÕÔrm"Ú”ñf³è¥R)…¦ŽŽºººfýCø…^zé÷À©ÃnW*•øÄ¶O°.·nPÜ1Íì$=–޳ïû<ùä“Ó}j$äô_Ü€{öÙüUKËØw7IFÑÒÁÃÂI6m+•Ju÷RZéèèˆ53*• žçÑÛÛ‹g‚„: Þ´²ˆ‘mV:–¦ðñuÿÊ®—æÈsÏgIeÖ¿ÑZ¬|ß§š Pæº.žçÅu,\°s'[®½–|×»(‘ôåä‘—Ëåê„Ñ0¨ýE®ì!”%\>“1ÿ!«ÄNKùmÅjRb­‰\Z<“å®ëR*•p˲¸oÙ2>¸f çðƒ¼xòÉ„¨ÛË'eôoYu"W%p¥õ}’>¿üêéîQ]ü&aMnãdβQ„ƒæ»ß°<@9Ã&‰“è¡ $ˆ„Ï•ãy@f šo} ¼¥àì§>=`|¸ ¸tMB­ÑÌ4ÄC„mHââJìÂ0 )—˱KwÚ•[ܶÃ0Äqœxr-—ËÅÖj–eÅ“3Ò¿u]—jµÿ÷íÄ“5rßËDNz}.—#›ÍÆíx>Ÿ' C.¹ä’é¾ÜBÚ§RÞñŽ~~ùËÙSZ³oáÏ=ÐBÇ鵦ëG%ÄÉCXºh!ɃN3'™ñBÜÏ~ö<ðlݲáfU-Ëâ ¿<ãeö§låËs~´RÚs©Ðe·Òtáé„ š)¥³³“b±wP€ØÅF´é@Ó³‘GåË—¸iš¼ðä ÚÝgŽbÛvÜ©¼ãŽ;¦ûtÆïÃKwý–—¿ú^:÷\FÝM®ÄºÌˆmc¬êù¯‡aH&“‰E­¶¶6²Ùll‰XвLÈþÍ\^Òâi£`5”‹LZ,uà·ÞÈ—nÿïfŽp¯KÒFlj…qY¿~ɼ+𠺊ÅXL‘f¹L.ú)òc¿ôû A â9Lä±S*•b÷Qqù”½ˆr ÄZÿ¶¯7žpB,t‰•c`Y*Œ¨ãðĹçâ-\HÕUùW€“OŽ=Y¤F–H„1Ÿ$ ©‰škL÷§ÒVdu×#ú¿·»DÀ3`ßBâÅØ!X.ØÿÆ3ÑÆq_Î.@°‚Åê³s8X룋á¢|¸ï~8`T—òÂIÚ"n&Q(°,+¶6kœ IiËEÐMâŠ&ñ }ß§¿¿?Ñ*• •J%vw‡jµJ.—£X,²"mßLö“ÿiAI¬ä›=+êâ53xb&аÌ0 ?üðéþÙÆMÚ-µR5k¾¸W<‚4šÉ"CÌ7¿bÐ: M2bÿPÌ»…ƒ„4sŠ/ÄíÞ½›Lfɨ¶- †ÁO\ÀÙ¯=[-lœ‚-¤Þ7»oÍ”ÒÌ_^:KÒÁk•Ù*Ä-¼s'x`²ÀuUOÃq” ¾a@dÁuP´š˜_ú¾Š®ÑL3áÛ©Y0ïàƒåS1q‘¤5é ©c@h2 q Ô ¥Z­Æ bºººê™bezäM7qО=[¶Œ8°É]b¥³Y]Ö ”»T>O!ú þãÑGùþ¿ü • ( ye´(—¦TR®¨#cˆ¦BÈg‰í„!«Þö6.¼äZ::Øÿu¯ \«Ñ²};O|0•ùócq Ã`CT~:<ŒdÝ•—9–,ÜçMÓüPC2%œG¸F¥Êã¯l1Ïs;‰ËFõõHÌñÚR_γ·ÉyçI„“cݼáfÖ4u&ÒLضm:::(—ËñgéëU*•ØåR¬ÏdqÅ–‰‡´Õ˜h D2ù,¢·ˆÜBZ3 ƒ|>'#I‡‘(O¦/zfJˆ™‚ÜêJ„àÖ[¿Ï÷¾×;ݧ¥ÑŒ‰Ï-j:{7êÌ¿ÖoEêu ÍœcÆ qtЈÛ …8nN«×šø/4úHŒÔóÔn¨Óޏ oi¤ì­3™ã¦íà åÞU(ÄÁÏ{/fÇÅóЧ>…c9j$èyJ¬s]%Ä9ŽÚ¾™˜P*©Q£e©cø¾ú†êÕ¬Ó7’¸W*©}A™ˆ¤ËÈf—ê%åJÙ¾¯–Ûvý>rž ÖEb –5²Jã¾ÍhHGS:}>õÛg™÷ø<î¯QËòy%"ù>‹Jœò¼D kì0šf²N:©…B"•Ëõ¢T¥¢¶7 µˆ|éÎO>¯¶1ÍúQi¥Ðè¦çyj+ï Ñ4Žk'9¿0LÎ’´œi“”LF-³,u=::¿1ÙF®ï'eÉ5”kÒxÝ*U®ã¨m|?ù²Ùús‚äzÎRë̉ây`u¿<ù(Ö3Š:ªýñ-‹PI|ã.¥X³ 2P+£0 ãNMz@6S¬ÝFòeË8÷ ƒ&¥5°¤3ïe³Yþêç?gñŸý—úÓ±ë–iš¨G¥ƒúyJ(ͤ}v¨ s7jƒÄ…)IŸçytvvæóÙ,‹/æ™Ý»yñâ‹ÙjšäZZ8í/þ‚#úû9c±UZÕYÛšT¨¿¿%ü\¿N’t¾²¾@iLãíÛ¨µ”SË›U@YÖŸ².k†&M7= ".‘2nF]<6Ã0â¶z4.à l´“‰”;ÙŸfôHoÀßýî'ZÕÌ:|`Á²mM † ©R‡ËÐÏ8ÍœdÆ q†árî¹"Xøª—ü•¯`¤\ýþuÝ:^<ê¨äAŸEÝÿ Ï÷úªS(ƒèjßø`/†:5·¬ú¬qÒs,+°çóƒZž—XþH™‚iå<“IÊM™àÇ7z£e’ˆÍ±!Aì6IDATX™9ŽúîC3Œl"°¸ýöÛ§ûÆ„ìØ±cØ8ð ÏÀxOCNÇ©¯kÍêëPÛ‚ª‡Í0PõÆq ¥Eý¯T0M3qc¨sh\·^(kd8׿a$%²\N‰€Åâàã w}†¼ø‘´ÐÙ©î#¹w›}±L‹qÒ4ëÀÏAwbÓ„—>2´?͙˖¼ƒøÀEMHÒ¹§J¡««+Žé&1$Ųm® ”úûûYrÎ9°u디ߘ]øÝoxOÍŸ–xJÙl‹$¨¿E”µõU€¯íÙÃŽ… )•J\}ÁìnmUÆJ f|÷aÌȼÇPÍõe=Ä¿uû­YÂoä¥K1.»Œïm܈ûéOFÖ‡ÅOŽÊVtf#]êãêBd0Êò]§$¦›înêC}L–·ÁHÍÀÜh&ö:"àKÛ!žaR*•bË5Û¶)—Ëq"›R©'4èîîŽcûJB’™N:~«fzðRöÛoSœôB3 Fð(„X0› ËÄ$Q,ŽåI¦ž"Iu;µÜ'1|I#ÌET¿Ï§ÞCM:åè}Yí‘¶gÏ^¶¼ xUÝrß÷)•JMC1 ¢‚¶€Ûǘñ=éc“béòƒÀÃÃþûƒëré¶mrë­‰€%ÊÓœîÂlxCQ5®› ¾›¹­9ŽžÒÖ9"‚5Û>-4ˆ%ìÛÑ¡„ô~²½¸ñ¥þÙl"z "VT*ÊQD€´˜×HºLù®†¡Ä†|>ÒVK¾Ÿ¼Òû»nbé$B¨eÕgÎs]%ˆ[¤iª×P¦¸r-Çñ mkkó>ÓI<ÝiðçGÿ©éú ØríúŽéãÅ7NîÁGrË4 ¨ÕFWÖx˜H|(Á¥Xœ\KêèHfãi¡ÓóA®ÑíWÄÏ P÷µšœš "’ ÞÑ/±üŽ^8õÔQmOÕÁJ›X¥.*•Jð6ªŽË,d»ë²è=ïÙkÇ;é ƒxmês:§´Âqí CLàˆê‡ÿú‘G¸üå/ÇÏçÙ2bÌpo`ý4_ËÉd¨&«€êVü: Y~æ™ÌÛ¾jµŠ¹x1æÆ˜QæÂûIJͱ’±AÜA=’øk"š—I¬ÛzI¬Þd 53BmlæööåàÖÚ³mÔø¾ïûqòÉ *qÖ Ãˆ]FÓõ[Ä’fñÔ3þj4C š+ÏÓüþ¬²n3é¬Í>Kv‡TÙh;‰­[ ¾ÜÔþbDØý—ùÍB´]@’^ZLó¨vž¨$±Ñ1dâ’p"rnò<È’Äý”e"ÂAb “þ¾éðb%=‹­™wìØÁ 'Ü \R·Ü²¬¡û²’º<Í èBhö3^ˆ»ýö¿NÞi+•Ûnƒ®.n}ßû8öüó#i ¨é ² ‘Ätl«4CÍ–5Zü4–ß(6Ô1i´Ä‘NRÞhϧñ;ärÉ1‚ y_©  åû8Žú 2™Áîza¨Ê¬VUùbaW©D~lѹtv&ÖF"؉)¸ Pe† ›\«e£±Â™a,|áÞçë:ê(޾ðèé>ÍÙÁpõ{o1Üñ%¨:ÜÙ9wD8 ë®g9äÁÛ9ë'ÿ1¼E£ Ö³¨P2JqG»££ƒ®®®8® òæz|Ó4aÑ¢½w<šO4‹5âÑ/½Äÿýßq.¾8ŽÅé8UàrÔQqH¯U»vá_s ÝÙ,_}àjwÝgž9Ý—sˆa{£_~²k+vîdÉßýË®»Ó4é‚Aeˆ›Þ´!Öõ¡¡¬ÿ˨ûîÇl áͼ2„ÐR¡›BX@WÔ6U¢í¿êÁWCXä£V1õ½ ” j’AÍ2a'q} ßOúâ Òïj  á Òíg:ög£G@¥WE“”¡U?±iÛÍ-¼'?÷!ÄjÍó¼ø¾·,‹ÎÎÎxà¨-ÅöITcGFyJâ Èså—M©ÜþìpÓj.É%â”`Y”`f‘ümÑ{qù·Qît’H+™@ñÖwS/hIŒÞ¡žtWNB;¥É6”eñy¸ i4Ù· R;û"з{1¯iéýb˜vEÕÉ"÷9f¼÷Úå½Pټ̀­!ìgA¾+ÉH|Ÿ7¸pµWŸÀ¢8›Ñ÷ÉGç°ØjÀ‘uoGG²mºO#“®[/¨‰µãdš„0Íäø²}³ðò½ÅÂ8½\öov½çgáu‘u>Ÿ|g¿‰©EK XçnÙÂ#kö¬©â‚Z*•ð}Ó4éïïÇuÝ8óèLCÒÚ†ˆUiQ*bŸ0õãökvǦkŠ•úì£cÆ)}¬4vj9Ní¤ö1Så‡ 6&•m£%#uŒ‰fbOyä^úE&¹®žçÎáW,Ì$Ü(¬‚ƪ$?¼ˆPPŸ)º1©D‡‘Ä4à !­!Þ7[6K.élg~Wÿýßµƒ– ÇܦZ$¿¹Üìz,¸Ï1£…¸0 ùÐ’ûÿ6]/7Û¶UÃ'O¾¦Í&â&6”Ð×(ØÉC8ŸOÜfSÁøÏše1â^xáö ‘ù×u]œÐÑ1mæédô÷sÛpîtŸã88l×yhçVlûÏG·ƒ¸ÀeCuHÒ±€›î|û rÝ‹0ÈÒ³1¶SàÀ!ŸÇuÝ9ÃGò¾€S½tÿý¼aãFŒ%K\—£¶mãî¿üKð®›f¹\ÖA„³j5™$ΫºàIB˜|>¶Òñ<=/)K¬Ç!±¨Èï+ÂÙ‘uÛ­¼Å€S~qo" ŸJ Ü"ÄyÞ` üfV®•JýJ_4Ç}v걊ìòý庆lVYüÐ\V³~4‰xí† ÌU. “Ÿ0“Q—aÛ¶¯óë_ÿ+¶m×e)Mg]ž Òa¦,Ôã@ò¦¥×‰V‘ötK{¿©ý¤Üôö!¯#µ¿O½â‘ÌÕÇ¡°n»páB²G ¯sóÁᡇðŸy†ò]w%÷«ÜO’ð Õ†äÙÃ,Œ¶-§Î ߇Ë}X¬N8ܵ‹Ò9çÐ}÷ݘ+VDm‚Ú90Á{è!̧žÂ_±‚ÞÑ„‹˜€WPšùü4ÇñÌ'QJ ’ض÷RÔØ65Tbß ãõ°Õ1Âf'½ýüÅÀ¯3ê;Ž3rW›iÑtŸcF q¾ï«ï3$¥R)‰Ma£Ãº› ˆ%\:Æl™)kà®ï½š”Ýó<<Ïà f~0cÍä²mºX¼pÑóüÝÿ1ò¬u…¤Caª¶Z¬«¦Ý…o& nú³ÏófEöÑb˜ªìÖÎzùåÜîyüObÛ6ÕlVM IŒRÉêœÍ&ƒp™¨*ê¤rY ؇‹‹šŽ©*>g6Iœ¶“™<à¤:õöü¡t/F+Œ´ÝLÑEå™a§5]¸®ª’†çŸÿßýî-yä‹ô§=ĪÑ0’Œ%)‚ŸýŒÂŠdwîÄ;òH‚E‹ð8cùrŒ‚ƒœ?¿Î2,Ì={pvîÄ?à‚ùó±·mà X´óÐC1/Æ ùÉÂ;ïÄØ³»¿¿¹Ëp”) Ò$œ‰XY6kS£hu­UGv:´|wq©~}Þ®„8Ô5éìÇQµºýÎwo÷áê†äo"ZßÀQ&ÆûŠéûKܱ·Yp¼º(P|Ç;ÔúX¸Wç`V!ûóŸC`{oص þøÇi¨e#ÀŶ§Ð’b¸´ßé E"¾ÉÄGªÝ•¸›‰{þg§/?÷ Þyï=ñç Ãpè‰ qKuÙmW3g™ÑBÜ×>Ž#ý(—7é¼^yå•^.~÷9f}æ É`¡¡CøÝ3Îà´é>·1àÛ:„Ö·×ÏTú¾ÏM?½‰r±œøKh43ß§·­xá…‘·MÕé+¯¼ç_F1#¸/Q©Ô»ÛÍ2Êå2›7ožîÓ˜02Ž?µ óLeìðìk^C¼|ÁVìÜÉ–}Î8º»Ë·t,ÙFKñÆÏ†˜å¤‹ ñ³óˆü³HD7“d’Q[MžFË”}ÎNX¼ø1®¾z%^¸Ïû-a²ö¨æâ•;”Ëq$@¹Ù,þñÇãüá¸]$ÕÓÂL†ìÿHçk^Cö¿ ÿðÃXwÝ…¿ß~GùÉOÖÇ8+wà  tÉ%äûú’lÛ`üó?'ËÒ‘ñFf¼mV>—ËI[Úgp(ŠÅ¡'vTŒÅ·§–YV}Vy±\jàœL .èj¾>C½»¢š*Vsøþ0çŸö—5²êxE¸çì+gUÿÔå^îÁqkض­IAÅ5Nüˆ8 Iìêf–» Æ…¯ ’LÏò;TI|˜³@§ÛÝk†ÀZÂþSOåèhY†#»ùïãsÒš.ÄýöÖe¼kÙ‹M×­ZµŠâÅ'† ¨Æ´Œ643†çBXºcÇ å–ea¬dð¥ÑÌp‚înú¿òn¸bG³ÄwÜqKXª-à‘X‚šiåo+pOüWÔè½áFJÿõ_°{7ÅÝ»yü_ÿ•g?œ#Òa†Jæ42l܉›“¤ H,+«…ž%׌‰yxÍ55N=5Ï]wý’|>çyO¯äÑ“æ”IxðÁ¸$^v™3ΠˆêB—Iyâ-\ˆuòÉJ>çœø8Ã&ÈMO¢ù¡b#¦Mø‰ì/kL Ò˜ñ ‘†¹©äžîø’]XLù¬è"†¨þœøÄúÔg0v!NGí ¿<‰/«o75É:)ß¹$Þª ´DåFÁùO»{¶Épê«.õa¿ýJ ×Uílgg"ÈJœiAÄÑ0TIcÄšÐqà4| .7T%Mü€….As¨ŒFš¦”JIÒ­J¥'ÛJ[¹®‹ïû‹Å8)ŒÄ Íd2”Ëåº Ú`ONܳîX¶e“8æ#fŽžÝ_W3IÌh!îÉåËéxÕsƒ–ßrË-üŸU«Ô‡ÉÌ”•(X=ŽÃŠñ†BpQƒ» ª/"€’„Ré,xÔ'™ÒL ‡}~€sÈõ—òŽûïçm<£óæa•ËX–Eà8|£Or4+•ç%J›=8-¦‰.a5ü3“i -«C‰&a¨ÛW³ åq-ÂL‰Dh“Éþ|´¬5ö(Eç#"Ä“~]†D¤…SL Óç/ÂdehLà’µ³¢å]ÑçTöŠ_œò V³zò®õ^Àžña׋?Qî‰%ec‚˜fˆçLÈ—Á«À\8ÒSY²€4[4šJ-Öw’8F’Î5ºí{žzɳCÜ%™Í"ÐÇõDYëAõßLÓ$›Íây¥R‰®®.‚  P(Ä žÂ”¸eYq/-¦9އ°0 £N”êêRÖ¢•JÃ0pgÖ qók5Îúع£ß!DO´if¶°5Êä–æè£VVF2#%>üÍ dÉâÇ-ëèè «« C«ÆšY·¿Œ•†mg†Ý®}e;ä _̳ÐXHëÛ[§ûÔ5š:BàÕúÑ;ßÂέ[9iÉÊåòøbߥ]KÅB¦Ñ¸'JXG—&IÓ„}ù‘ø÷}^üõÕ܆¼öG?â´sÏ¥“¤»üQî àMvŸÁ ïŠ-BÖ×%n÷Sïþí¡½Vô’ðmYë5 ‚;À< uŸ¦Eõ¼Wñ÷^fäkåÁ÷À.«ýýµ>v4Ñ꿼À¬(#-ó·,QÇïÜJA”;ÅLçUÉ«ãEÔ8'Êü*Ñù‹N•s¡ÿ€gùÈtÿ~c$ñ!äß°?ÅÓ,—Uø†ÑX{(aR²{³õ"kº=Èd’x€Çêæ4âºj;y64ËÀ J„3ŒÄ$5 6 ÕÜ(Þ•JìÚµ‹§|•+WðåoäÒÏ} ƒ\dmæ8N"’Y÷Ýz+7|éK\üW–‹p¾ïãyù|>±B‹¾Ãm·ÝÆ©§žJ??þ8ííí`¬¸óN›oŽ1#Ši{¶eA¡€ \±x±²>Ìç)¦ÒX` ÃDWJÇx4 ËR·túúg?ô+î¾[/Zv÷ÖÁ1µg:ðÂÎýë–uvv’Ï燶ŠÓÞ{fºçÃsÿ爦«Ž=öXxõ`• ›cLÊ¥Ñì 6xà e¿üæ/9Ø8xºOM£5÷î9‚%-¿DMó7'.<áBè‰:g:qŽfRZ7¾€iïàÙ?Ë{O?|4Øm$.õôÔ#®ƒû• …OÀõ/¼ Þ¸™¯»ŽðqTµ“¹jÏ„Çf²á‰ à…IâaIòkÛ`;l·¤ÖÛ¶Ú¦ò)`%ø¿cÿ ¸œ£!ûSð?»Õ>Õ"P¶/¡Ä¶~0—€ÿ OCo7`E è²PBÜ!X Æ×!Pe•JJL³£g÷g΋ ]ÊàW |ø/‹´™ÇÀzW½æ#^—ò9^¯ŽœÒ¨ÌïÝË"›…¿þ볦û×37pÈ’ÇyÝëìú¸}#‰pžº±û®X½ Gd…‹c’€G¬ÕŠÅúÐDÔ÷ëc%¬Œ`½çyVb•(]Ö0èïëã®;XyÁ†üÍcÅٹ˒”øÓ±–űõWIR»\"«³.¸ >¦išj›l‰Tݽð}NL‰ciQ­î÷CVFÛÇ׬««ñ &YÁåúIæo:‹Å:!î¨÷½/‰ñm³m– q{žž_·¬:’«½â4Ì`!Nú oxÿà ÷›·nå‚sÎI‚ψi<¦9KÒ1†ˆ²»Ü˜if ÷.§%¸yØmn¾èf~¼ìÇœòÔ)œÑÂ]£™)„ÀCßÜÎç¯Á=pžçÜiN“‰ É£ÚðþÑ滋bšÁŸë”J|kÏ/yÝiŸaÑ\O—.Gc¨­½A&Â’÷m`eœßWzˆm+íÅBƒÙv"rÉy… ½Ìÿ\0 `/ª ‚ïË"£º + ½y”h¹†öŠ1Ô ¾ÆQÀbk¼F¡ÿš|©þóh’7ê:#ycÂà\–gu%°qòÀ)ä¬×ýÓÈ™Ø1IÔå±Ög‚ÊCÏú¾a˜]]T*²£ 'A`Û6AÐÙÙ[«yž‡išõ–g@kôVÊ„P31²™&×.ºÑNˆ>›¦9r’¬¡DEÃhžÐd8wá(¬Ã˜hŒñlûõ¯ÇVÆLàÁiY²8tôûèñŸ†Y Ä‘1Ä'ýœ£¢`±ÃdÕhfYpÈ«7¨‡»ëºX¾¥aͬbÿçŸç­o]Ùt]†<ô7qÁ]$™5šÊïÈ^ô<ÆW6ttÐ?ÚØ:õÁâg²eѾJ³ µsÿúëùôg~Gø‘g¹öÚoÑ†Ò’Š Ö(F“3a$Dà â€AqßÄߣ-8™ä᪜šÊŽZ?Ç<OüÙ²,,ÃR1LtpnÍ,Áæ±gÈYë0 yùÃ/W–A.ºMÖÌh5ᣟYļyóâ,n#â£Úí,“6ÔLû`ˆ’÷üù:z_¿“ýþq)TÕʾ3”6dµ26ˆWžeAÑïû˜/yN’ÌÒ¶Q-kAö~UŸÈH¢„ÅÑçN0ª`‰›¡$HD8IˆÐ}1Iñ*¤†"‹¹A¤·ÑƒßƒüéoŸP}â±0!9 C<ÏÃqœø½ôkÒ¢Yú¹ ÙAs¹\,ŠÅœaub˘-û4s‚ÿÙ¹?KÚV·lDkD†™.Ä5 ©€ _‰èÙhÍ çÙŸ¿ÈÊÚë—±K£™¡,G–õš¦ëLÓ„ŸF´¥§fãK\ðð겺5% øn¡]P53ŽððÆ'yÓkîå«;#N¸9K^,Ö@Y­…!„[ÁyŠ]`ÚP>ÌO°+Ð{XTøPˆÜ‚m(Ñ-¤ÞrTNN2GH’It"“ë2ÁžN´`¢ûMsŒð™gxúÇpÜÅê’w“Ô1’võSÁ÷LÓÞb©ô³bÔ8š}‚ů~Ìé?N— [Gĺ^O ìóÌX!î§ÑÌYج’Šw”®™á<ó³çÙ¾'‹Ê{N{ÿö𿱰¼pºOM£5>°ç‰'ÏðæBî;ï>Ö|pÍtŸ¦F3"ë¾s-ÿÙ†w¨↵`H qÍ £Üóüó´üê|¾ó›ß`0rU5 e¥æûÊ2. ÁYVŒo‚ù ”5Z7˜¡D †/VjC!bY%zù(ñZöK l¤Ê*GËÒ–ki#)md47¹í6Ö|h;ö›üÀ!IÆÚqˆn¤2;ŽCqh£‰X¬ik7ÍPôp@ÝgÏó°,k° ²$ÚW“ i1âEL üðAZz#8(“Éè,#šYEøŽå¼õ­‰EÜWø* ÿA‹pšÙÅí<óS-ô¦GÙ¹s'«¯Z­\†4šL°e / ìÇÊ®®‹ 4q‡³Qƒ@F@3Ãßû^þÉ…‡Î¾¿yËkGN*¢˜n–å,Tó2€š½$‚™‹ÈĨÃ@ù»–PâœX¼ù¨Ñr+ÚÇB¹“‚زQÍ”B-tï›üéOì8ìÑæípÕè…ØB¡Çg Ã0.מHFlf<÷±çÙ½ûñçt¬@@µ•Ô¤„êWäÇv ÍÜdÆ q‡úNþ³Íu˺ººÔCùtŸF3:Ž;òO¬Zµ+þ¼ì¼eÚuO3ëè¿ÿ~^~à®AËïø·;XpÜ–mÖõZ3ó©¼ø"¹wòÔS? Z­6˜-–JਠD³ªž‹w‰¸‹I¸Æû ObÉFå8Ñ>.ÊŠT>Ëvû˜!QÚýÑó¼ØK‚þË{Ïóâí\×·«T*ñ:ß÷ãåžçÅË=Ï£eÂ윳__·?ÁýG^Q ™!™uúºÙ¶·éÙlvt 4šI`ÑsϱjÕªøs6›ÅJg¤-¡ÚÆ.tßBSÇŒâ>ôPöo›.•’œà~ÛtŸF3:öt×âÆ8“ɨªîhf»_ó4¿öÚAËßòÚ·°ò‚•c/P£™~׳›•÷4(P/Rˆ«œfRH‹ i$k¡-|ßE‡ âíÂ0ŒÝÎàÿoïþcÛ¸;€åÄNªXô({U2žÃ¶±/鯘‚ˆ¢]·lÔ\ÌÈÒ “1tì …Ô`Ö?VˆZ  Büo0ÖÐV.4¤°®Îà˜.5V¯Í¨šE±, cÇôZñl7±£ˆL”¸ž£ýq¾ÓÅ»{G~?Aóç=Ý}ïñÝ»wïÔN í3ÉdRŸ,Ëz§G6›E"‘Ð?clOÎÎÎbmm ^’é%ÈCCø¿ÿü<~úÓcÍ0 u®­Y¨mÚÝIµ¬7ÓŸçÇFg\êåŒá»ït3æ°V.d‰DÂÔA¦ejvvVÏT6›Åàà é3ÆehŸYXXп»<ûƲ„B!½Ý¨ß¤N[ýw;—Œï±:×™(^Z“°ÓÐy¡+¿LÙ |ÿ7®7cG‘SäwÞÁþäÇ8p@Dvee¥²^iãÝ~©³Ûwé_àÿùMý±ê€þÖ¾“Èi—ž{To|÷ßåå{äI¥-[€={LÏÍÎÎrªòŒ.\Àÿþx'¾ñ›•“(ËØ¸“µGO–/…1¬–¼1ΗdüŒ,ËzB>Ÿ7uV ëï;|ø°þ݉DB•#Ë2&&&LŸÑ–gggõÏTëˆ3~¦Z™Ë?c<à‰F£z«qôQ(ªyišßïÇ{ï½/™Ý±x«úDµdïþï‡Úa6ƒö¶i¿Éf³ú6®Õ »²²bvøðaý}ÉdRÏXy'¬ñ3ÆN\c‡›1‡ÆË&ë{||\ïü ‡Ã¦;n...êÿŽÇãz‡Ùøø¸^÷„B!ýnœ033£gר¡d¼¡L45u¾GÜxí½·nA¹ö þÎå'µ+ù´Ë˜k0îÿÆõF䊋ñÁêª^W$“IS@ͶÛd?aoÖ°ö«{ðöÞ‡¡úÍ`fcÎ"¸§¯÷l¿ à~ÀC¯=´qÙ‘‡\þÇâ#Ÿý¼þX?0Öæ"Ü¥;wÐóß[ñÖ[§Ÿ6¿ÅÆ|V6[YY©Úybì4Ò:Ï´ðááaýàÞøïD"P(„ññqȲ ¿ß¯Þ8?Í‚~011'NÀï÷C–e„B!„B!D£Qý=Ú÷hŒ §Nm\+f…cü|ùgŒŸÆke¯ö}Æçß[>úÇØ™aì˜0v¼ùý~ÓkÆÏŒŽŽâúõëvnòöÊç1EÏß¾†ï}¯Î]ùú¡Žì C=\€zâÄ›œ³¬è×;Ï´l âĉ‡ÃH$z†òù¼¾hÅápØ”k¿ßoÊ1—å™0n{c.OœØXƼ—çÚø]Ô{Þ|+7|øÄg~O}bêˆä*õðÊÊ †‡‡qþüùŠíN䶬χŸÿó 0©>®Ègl'SMÂŽˆ»ù«]ø¸¤^ò´°°Pћ̎eÝÛ¶¡·çmêÓ¥;—6æ]!òˆ7o¢gé}ô~ý¹BÜgã‚<ãROVÿç>:d¸”z¶R6^f™L&õ‘<Æa‰DByV>¢ÍØ‘V«ó`ffFï°×;ŸŒ£mü~¿é3‹‹‹z§ÃøøxÕ‰Òý~?'7TöÜ9ôþàúú2Õ'¹_šé0Ôy‰ï>7ƒ–;ጣ“ɤi™6òÃ8:2ŸÏ›.Ô:Õʳxþüy½skffFÿ{Œw3ŽF£5G‘µs$çsÖÚ}÷aË®"8 >‘GEûØØéj¬‰Dòú½;¯m¯ýmÞM¢*„íˆÛzû6Ò²Úe§`:èãñ‰îÖûBO:Ïa8ÆÇŸþ8ç Ϲ²s'Þ}m»~À$Ë2n=‹£“ÉSr¥ßEïÖåÊQ-û›Œ—Fæóy}n(mÔ†¦|î"­c!ë 333zÇ‚ñ=åF¼ «»-ìÞ åô6|ë[Wƒ u¸8ÔKPßBSíã\f²,ëÄÙlÖtÉ1Óe“ZfÂÑhTÏoùeÁÌ/@aÜÞ~{ã‰0*æ…›˜˜0uƉèú• |l€ùä„®ÖÝ¢‰ pGÜo—·â/îC–ÁéµÈ[^½Òžê<‡£áÑ[±yÈÖ[·°ã#×ôÇù|ÊO6,ÈSr¯ï…ÏwÉñx\ït°4—Q?ʆ°eÛOð¥/ýù…$ÔKûF¡Þ@¡Æ‰ãAâÄÄ„éæÕæÔ ‡Ãu/û$jE¡° ÷|òõAzûØxC ãè]"Q½z=€Þ{KÔú±b¤r²…/¥®áØqÅbét¥R CCCˆD"õ?°ý6ü+ïáµíÛ‘,m…üÿ˜zWk6gÉ –3¬(õ¼ÙÙY<ù¡'1ðêG‘ë¬æøåå}¸÷Æ]+&º'rÕ¿óãU^¯ûžÙÙYd³Yœ8q‰D~¿ñx£££zGZùœS¿ßÏÎ6²ÌjŽó¹¢CK•/h7)>‰‰ ýÒ¾ÙÙY½cØØY\>‘VsüÀà[S›ÄÚD¢µçÐ$r‚åã<¿]Ý‚?=¸ @•‘›y¨£ˆâ ¿†º”c#â‰E$IÕ‡n–yàÆ%u"ál ù~µcœf‹Üa5ÃÛ¶½‹;n"ŸÏcàê@“K!²—Õÿzm7ܲŠ|>¯Ž ’Á3|ä:«9¾sg ±˜dø æÑnãããú¨6ãe£<0$»XÍ1þ¥„o~såóQ ÷¶“ÙlV¿;h(2ÔçüU#l›1ÇÖr|áÂd³YüÍ—ˆ••„ýaµ}œ€:_çGä-µst{~Œ´l4Ñq¥R Á`Pê¾ÿòåËxðЃ8ÿ‰ó(|¶€å¯-óN”ÔlyyÙõJúêÕ«¸víšëåÈf³¸zõ*–––6ÿe-(•J¸xñ"®\¹âÚò­d.f.âÙï<‹ì²xõ¯_ÅêÏVŸ9[næÇÌí}ZËñµk×6ÿe-.ßJŽ_ýulýá;(¼û þí©³ìh¹EÉ1Ëa.Ãõë×ÑßßßÔ|@íÔJÛxúÔ4¶j;}ôQ ¬`¿ÜõKGëd·ë–ÃÌ‹íãᅧĕ'qöÐY\Š_­3·/7ÛÇfnçXÛ^i+Š‚Ë—/ãè…ßÇÜÁòÀ2Þxè ¼ñÐxósobõÏV3®ü)®p;?¢¸xñbå%Êu8v³+FGGñ‹/ÿ{öìÁ@ïàÎüZ»wïÆž={\]kkkX[[î]»\-G(Boo/z{{][@=ö˜«ëÁŠƒy/}ø%ìß¿û°Ï•20?fnïÓZŽoß¾íêzhÖàà nþýM¼»ã]<´ÿ!ýy§ëdæG¬rܸq?ü°ë¢###øõà¯á÷û1°g#»Ýšc–cƒÏçÃêê*]]ÍÅÖ/oÅ®]»*~Ó̲ÛõËaæÅö±4&á忌ýû÷ãÓø´+e`ûØÌí߸q7nÜðLûø‹_ü"À÷Œ—÷\ÖŸï¿û_·q;?¢Ø½{7yäìܹ³©÷;Ò'IÒé´þ8—ËÁçóÕ|ÿW¾ò'ŠÕÓgGI\V3 _ÿú×Ý.6•éö}ÚjŽ÷íÛ‡}ûÜéD‘(ù¥n±šc­ÁLTÎÍ}‰mc–£°}ܘck9>pà8àv±…ÑíùÑX]ŽÌ'Ir¹ŠÅ"@–冓‰„¦NÀS'`Ž©0ÇÔ ˜cêÌ1¹ážçž{î9»¢õ(OOO#ŸÏãÌ™3xþùçž1!3L€9¦NÀS'`Ž©0ÇÔ ˜crCÏúúúºS S…B’$1ØäIÌ0uæ˜:sL€9¦NÀS'`ŽÉIŽvÄu+Gæˆ#""""""""êvŽÌ'šb±ˆÙÙY,--AQH’¤?ÿ /àܹs€`0húL­×ÚQã]6œ.G:†,˦uQoYv­ j^­ k¯‰œc»ÊÀ{O+9¶s»‰šc7öijs\‰õ±÷0Ç•X{ÛÇ•˜ãöé¶u#RŸ‰(ÚÙwÓu#âŠÅ"ÆÆÆ¨wH‘eñxH$ô% Ȳ¬®Þk›‘J¥077gzÎÉr¤R)d2 !N#•J5\–]낚S/Àø9¶£ ̱÷´šc;·›¨9vcŸ¦æ0ÇÕ—ÏúØ[˜ãêËg}ì-lW_>sÜ>Ý´nDë3E;ûnîuûqZ.—ƒÏçC, á‰'ž@.—C¡PÀñãǨáK§ÓˆF£u_Û Y–õÛ$ËçT9EÁââ"^|ñEjíââbÝr[Ö5¯V†µ×Dαùa޽©•Û¹ÝDͱû4596c}ìM̱ëcobûØŒ9n¯n[7"õ™ˆ¢Ý}7]7". bjjJ\*•™LÆ4|9‰ “É4|­UÅbÇŽ3•Åérhß§(Šþ=G­»,;ÖYS+Àø9¶£ ̱7µ’c»¶›È9vzŸ&k˜c3ÖÇÞÄ›±>ö&¶Í˜ãöê¶u#JŸ‰(ìè»éºq@@€z¦ ãèÑ£(•J¦kvµ÷¨ûZ«‰¦¦¦*nìd9E¢(xúé§!I–––‹Å022RsYv¬ ²¦V†góXϱe`޽©•ÛµÝDαÓû4YÛ±>ö&æØŒõ±7±}lÆ·W·­QúLDaGßM×uÄjæÉ“'‘N§155…h4jšÃÄnsss$É4ÉŸ›ëB²œËå099‰‘‘·‹E T˰ӘcÚ,æ¸r}0ÇÞÃW®æØ{˜ãÊõÁ{s\¹>˜cj•Û}&¢°kŸîºKS`rrÅb§OŸÖ+hI’Ëåô÷h×E7z­KKK˜››C8F8„Ãa}ø¢Så¦^ZI’ôëžk-«Ýe ÖTË0 ~ŽíÈsì]VslÇv=ÇNîÓÔæxëcïbŽ7°>ö.¶70ÇíÕëÆí>QØÕwÓu#âÒé4|>_Åõ½ÚŠ*‹ðù|e¹"pÕ^k…6aŸ&#›ÍP‡~:UŽH$‚¹¹9ýû2™Œ>d²Ö²Ú]²®V†ñslG~˜coj%Çvl7Ñsìä>MÖ1Çf¬½‰96c}ìMl›1ÇíÕmëF„>QØÕwÓuqÚ„•Zo¦&›ÍâÈ‘#C$,˘ŸŸ žQ¨õZ»Õ[V»Ë0<<Œ±±1ƒA <óÌ3u—å亠êêeØÉüÔãd~˜coj%ÇNo7r,Ê>MÕ1Ç•Ëb}ì=Ìqå²X{ÛÇ•ËbŽÛ§ÛÖè}&¢ØÌþÔ³¾¾¾îö EQP( IRÅPÊz¯y¹­,ËÉuAÖ‰žc;ÊÀwQ¶›9eŸ&ë˜cÖÇ@”í&BŽY{—(ÛŽ9ö.®›Æë¡×Q+ëƒqDDDDDDDDDèÊ›59qDDDDDDDDD`G‘ØGDDDDDDDDävÄ9€qRsssH§Ó(‹®–#N»½:È#2™ R©dYv»(&Ìqçpj[ʲŒL&ãöŸKd;«õ6÷AM+mf'rÌ ‹Á+m@QŽýŒDm×SwòʾlvÄ9D–eŒ¡T*A–eLNNºV! V¾Ô”ééiLOOæææ066æv‘tÌqçpb[*Š‚ééi,--¹ýçÙª•z›û ‰¤Õ6³Ý9f†Åá…6 HÇ~‘ÛõÔ¼°/Û…q9vìfff‹ÅpüøqèÚБ7hg(¾ýío#‹a~~Åb‘g‚É“âñ¸ÛE ²Èõ6÷Aj–¨mff˜¬-Ç"ÿ>u£{Ý.€Û&''!Iæææ±X ¥RI<55…‘‘‘M-#“É T*!‰èÏÍÏÏëÿN§ÓP±X¬jù¢Ñ(R©ŠÅ""‘ˆ^™·K*•Òÿvò&;r¢µëëë3=_*•4έÝûU9æ¸s´{[¦R)D"är9Óó"d؉:žÚÇJ.ÜømoTo7Ë©}ÜQ/n׋ÚÌV´3Ç̰ØÊ·µ—s,úï‰Ç‰L;Õ^í¦ã¹®—ÉdÐ×ׇl6‹d2‰T*¥?ŽÅbz6CQH’„T*…ÉÉILNNšÎˆšåËår8{ö,Ξ=‹B¡ÐÖ3ÓÓÓÈd28räˆý+›lcGN$IÂÈÈ&''‘J¥Ç 144 qníÞ¯Œ˜ãÎÑîm™Éd°¸¸XõûDȰÝu<µ—•\¸ñÛÞ¨Þn†“û ¹£^þÜ®µ™›ÕÎ3Ãb«¶­½œc‘HLNdÚ‰öj·Ïuýˆ88zô( šK’TõZþz“ Vë½U™L@GŽÑç˜èëëC$1-©W>ŸÏ‡`0Xõ=V˨a—e§OŸ†Ïçsv¥SÛ5ÊI+6Π•Ÿ1k6·ÍîW­–‘9îÍlK+)‹˜žžF2™¬ú}¢d¸™:žÄÑl.Üúmj×Û4Ú­–§Ñ>Hî©•?·ëÅFmæf´3Ç̰Øjmëvç¸ÝÇ~ˆøû@b³;ÓÆï´#—Ýx<ÇŽ¸ôõõY>{055¥?V²,7U×:£³™2e2}ˆª,Ëm¿Lœ×('V3’N§‘Éd0??¯Wˆñx'Ož´m¸0sܽšÝ–V2’J¥ôH&“A±XD±XD.—ƒ$I¶ü­þ>P÷±ã·}3õv3û Õò¸±Rsœ¬w¬æf3mæv瘗“mÀN8ös£]Oⲫ½ÚÊ÷vëñ;â044Tu覢(®•I’$LMM!‰`zzÑh´kzŸ©9Š¢  šr …š…9îvlKŸÏEQpòäIj¦‹Å"|> ¨#m¦Þæ>H"Øl›¹Ý9f†Å%rPÄc?/´ë©;‰¼/Û‰q-(•J5o]^í,‡$IPE?{V,±¸¸¨÷öj?ê›ùA·Z&-ÜÑhét©TÊtÖ†:O+¹M§ÓzƒS;¬ ynGn7[Fæ¸s4»-­d¤ü ¯6™­ö¼¦ÎåÆo{£z;—ËÁçóU=«ÝÌ>hµ<öAÛõb£63àlŽ™aqÕÛÖíÎq»ýêeØÎ2Õû} îbW{µ•ïíÖã9vĵ Xúöù|ˆÅbC$A¡P€$IúuÖét¹\nSw±Z&£©©)Œ!ò@±ƒYÍH4E.—ÃSO=I’P( MwÜln7[F#æ¸sÔÛ–›ÉH9Ñ2LÅßöFõ¶v™]£ï¬µ2ßÏíz±Q›`Ž©Rù¶nwŽÛ}ì×l†ÛY¦F¿Ô]ìª7û½Ýt<׳¾¾¾îv!º…¢(( U‡³§R)6 HHÌ-u2f˜ìäV¾jÕÛÅb‘óQC"Ô‹õÚÌ15Cä»™ázû9‡#âTëVÚÚ]uˆDÄÜR§b†ÉNnæ«V½N§9úê¥^¬•a€9¦ÆDϱ›®·o‘s8"ŽˆˆˆˆˆˆˆˆÈ[Ü.Q7øÿžµut<±%tEXtdate:create2019-08-28T17:00:50-05:003‡õg%tEXtdate:modify2019-08-28T17:00:50-05:00BÚMÛ-tEXticc:copyrightCopyright Artifex Software 2011ºÅ´1tEXticc:descriptionArtifex Software sRGB ICC Profile †2tEXticc:manufacturerArtifex Software sRGB ICC Profile\~=Ÿ+tEXticc:modelArtifex Software sRGB ICC Profile1(‚¡!tEXtpdf:HiResBoundingBox1469x828+0+0‹QÊtEXtpdf:VersionPDF-1.4 G:xIEND®B`‚blis-0.6.1/examples/000077500000000000000000000000001360743507500142775ustar00rootroot00000000000000blis-0.6.1/examples/oapi/000077500000000000000000000000001360743507500152275ustar00rootroot00000000000000blis-0.6.1/examples/oapi/00obj_basic.c000066400000000000000000000220161360743507500174470ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name of The University of Texas nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "blis.h" int main( int argc, char** argv ) { obj_t a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11; obj_t v1, v2; num_t dt; dim_t m, n; inc_t rs, cs; // // This file demonstrates the basics of creating objects in BLIS, // inspecting their basic properties, and printing matrix objects. // // // Example 1: Create an object containing a 4x3 matrix of double- // precision real elements stored in column-major order. // // The matrix dimensions are m = 4 and n = 3. We choose to use column // storage (often called column-major storage) and thus we specify // that the row stride ("rs" for short) argument is 1 and the column // stride ("cs" for short) argument is equal to m = 4. In column // storage, cs is known as the leading dimension. dt = BLIS_DOUBLE; m = 4; n = 3; rs = 1; cs = 4; bli_obj_create( dt, m, n, rs, cs, &a1 ); // If cs is greater than m, then extra rows (in this case, two) will // be allocated beyond the lower edge of the matrix. Sometimes this // is desireable for alignment purposes. dt = BLIS_DOUBLE; m = 4; n = 3; rs = 1; cs = 6; bli_obj_create( dt, m, n, rs, cs, &a2 ); // // Example 2: Create an object containing a 4x3 matrix of double- // precision real elements stored in row-major order. // // Here, we choose to use row storage (often called row-major storage) // and thus we specify that the cs is 1 and rs is equal to n = 3. In // row storage, the leading dimension corresponds to rs. dt = BLIS_DOUBLE; m = 4; n = 3; rs = 3; cs = 1; bli_obj_create( dt, m, n, rs, cs, &a3 ); // As with the second example, we can cause extra columns (in this // case, five) to be allocated beyond the right edge of the matrix. dt = BLIS_DOUBLE; m = 4; n = 3; rs = 8; cs = 1; bli_obj_create( dt, m, n, rs, cs, &a4 ); // // Example 3: Create objects using other floating-point datatypes. // // Examples of using the other floating-point datatypes. m = 4; n = 3; rs = 1; cs = 4; bli_obj_create( BLIS_FLOAT, m, n, rs, cs, &a5 ); bli_obj_create( BLIS_SCOMPLEX, m, n, rs, cs, &a6 ); bli_obj_create( BLIS_DCOMPLEX, m, n, rs, cs, &a7 ); // // Example 4: Create objects using default (column) storage so that // we avoid having to specify rs and cs manually. // // Specifying the row and column strides as zero, as is done here, is // a shorthand request for the default storage scheme, which is // currently (and always has been) column storage. When requesting the // default storage scheme with rs = cs = 0, BLIS may insert additional // padding for alignment purposes. So, the 3x8 matrix object created // below may end up having a row stride that is greater than 3. When // in doubt, query the value! bli_obj_create( BLIS_FLOAT, 3, 5, 0, 0, &a8 ); // // Example 5: Inspect object fields after creation to expose // possible alignment/padding. // printf( "\n#\n# -- Example 5 --\n#\n\n" ); // Let's inspect the amount of padding inserted for alignment. Note // the difference between the m dimension and the column stride. printf( "datatype %s\n", bli_dt_string( bli_obj_dt( &a8 ) ) ); printf( "datatype size %d bytes\n", ( int )bli_dt_size( bli_obj_dt( &a8 ) ) ); printf( "m dim (# of rows): %d\n", ( int )bli_obj_length( &a8 ) ); printf( "n dim (# of cols): %d\n", ( int )bli_obj_width( &a8 ) ); printf( "row stride: %d\n", ( int )bli_obj_row_stride( &a8 ) ); printf( "col stride: %d\n", ( int )bli_obj_col_stride( &a8 ) ); // // Example 6: Inspect object fields after creation of other floating- // point datatypes. // printf( "\n#\n# -- Example 6 --\n#\n\n" ); bli_obj_create( BLIS_DOUBLE, 3, 5, 0, 0, &a9 ); bli_obj_create( BLIS_SCOMPLEX, 3, 5, 0, 0, &a10); bli_obj_create( BLIS_DCOMPLEX, 3, 5, 0, 0, &a11 ); printf( "datatype %s\n", bli_dt_string( bli_obj_dt( &a9 ) ) ); printf( "datatype size %d bytes\n", ( int )bli_dt_size( bli_obj_dt( &a9 ) ) ); printf( "m dim (# of rows): %d\n", ( int )bli_obj_length( &a9 ) ); printf( "n dim (# of cols): %d\n", ( int )bli_obj_width( &a9 ) ); printf( "row stride: %d\n", ( int )bli_obj_row_stride( &a9 ) ); printf( "col stride: %d\n", ( int )bli_obj_col_stride( &a9 ) ); printf( "\n" ); printf( "datatype %s\n", bli_dt_string( bli_obj_dt( &a10 ) ) ); printf( "datatype size %d bytes\n", ( int )bli_dt_size( bli_obj_dt( &a10 ) ) ); printf( "m dim (# of rows): %d\n", ( int )bli_obj_length( &a10 ) ); printf( "n dim (# of cols): %d\n", ( int )bli_obj_width( &a10 ) ); printf( "row stride: %d\n", ( int )bli_obj_row_stride( &a10 ) ); printf( "col stride: %d\n", ( int )bli_obj_col_stride( &a10 ) ); printf( "\n" ); printf( "datatype %s\n", bli_dt_string( bli_obj_dt( &a11 ) ) ); printf( "datatype size %d bytes\n", ( int )bli_dt_size( bli_obj_dt( &a11 ) ) ); printf( "m dim (# of rows): %d\n", ( int )bli_obj_length( &a11 ) ); printf( "n dim (# of cols): %d\n", ( int )bli_obj_width( &a11 ) ); printf( "row stride: %d\n", ( int )bli_obj_row_stride( &a11 ) ); printf( "col stride: %d\n", ( int )bli_obj_col_stride( &a11 ) ); // // Example 7: Initialize an object's elements to random values and then // print the matrix. // printf( "\n#\n# -- Example 7 --\n#\n\n" ); // We can set matrices to random values. The default behavior of // bli_randm() is to use random values on the internval [-1,1]. bli_randm( &a9 ); // And we can also print the matrices associated with matrix objects. // Notice that the third argument is a printf()-style format specifier. // Any valid printf() format specifier can be passed in here, but you // still need to make sure that the specifier makes sense for the data // being printed. For example, you shouldn't use "%d" when printing // elements of type 'float'. bli_printm( "matrix 'a9' contents:", &a9, "%4.1f", "" ); // // Example 8: Randomize and then print from an object containing a complex // matrix. // printf( "\n#\n# -- Example 8 --\n#\n\n" ); // When printing complex matrices, the same format specifier gets used // for both the real and imaginary parts. bli_randm( &a11 ); bli_printm( "matrix 'a11' contents (complex):", &a11, "%4.1f", "" ); // // Example 9: Create, randomize, and print vector objects. // printf( "\n#\n# -- Example 9 --\n#\n\n" ); // Now let's create two vector objects--a row vector and a column vector. // (A vector object is like a matrix object, except that it has at least // one unit dimension (equal to one). bli_obj_create( BLIS_DOUBLE, 4, 1, 0, 0, &v1 ); bli_obj_create( BLIS_DOUBLE, 1, 6, 0, 0, &v2 ); // If we know the object is a vector, we can use bli_randv(), though // bli_randm() would work just as well, since any vector is also a matrix. bli_randv( &v1 ); bli_randv( &v2 ); // We can print vectors, too. bli_printm( "vector 'v1' contents:", &v1, "%5.1f", "" ); bli_printm( "vector 'v2' contents:", &v2, "%5.1f", "" ); // Free all of the objects we created. bli_obj_free( &a1 ); bli_obj_free( &a2 ); bli_obj_free( &a3 ); bli_obj_free( &a4 ); bli_obj_free( &a5 ); bli_obj_free( &a6 ); bli_obj_free( &a7 ); bli_obj_free( &a8 ); bli_obj_free( &a9 ); bli_obj_free( &a10 ); bli_obj_free( &a11 ); bli_obj_free( &v1 ); bli_obj_free( &v2 ); return 0; } blis-0.6.1/examples/oapi/01obj_attach.c000066400000000000000000000133401360743507500176330ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name of The University of Texas nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include "blis.h" void init_dmatrix_by_rows( dim_t m, dim_t n, double* a, inc_t rs, inc_t cs ); void init_dmatrix_by_cols( dim_t m, dim_t n, double* a, inc_t rs, inc_t cs ); int main( int argc, char** argv ) { obj_t a1, a2; num_t dt; dim_t m, n; inc_t rs, cs; // // This file demonstrates interfacing external or existing buffers // with BLIS objects. // // // Example 1: Create a bufferless object and then attach an external // buffer to it, specifying column storage. // printf( "\n#\n# -- Example 1 --\n#\n\n" ); // We'll use these parameters for the following examples. dt = BLIS_DOUBLE; m = 4; n = 5; rs = 1; cs = m; // First we allocate and initialize a matrix by columns. double* p1 = malloc( m * n * sizeof( double ) ); init_dmatrix_by_cols( m, n, p1, rs, cs ); // bli_obj_create() automatically allocates an array large enough to hold // of the elements. We can also create a "bufferless" object and then // "attach" our own buffer to that object. This is useful when interfacing // BLIS objects to an existing application that produces its own matrix // arrays/buffers. bli_obj_create_without_buffer( dt, m, n, &a1 ); // Note that the fourth argument of bli_obj_attach_buffer() is the so-called // "imaginary stride". First of all, this stride only has meaning in the // complex domain. Secondly, it is a somewhat experimental property of the // obj_t, and one that is not fully recognized/utilized throughout BLIS. // Thus, the safe thing to do is to always pass in a 0, which is a request // for the default (which is actually 1). Please don't use any other value // unless you really know what you are doing. bli_obj_attach_buffer( p1, rs, cs, 0, &a1 ); // Now let's print the matrix so we can see how the element values were // assigned. bli_printm( "matrix 'a1', initialized by columns:", &a1, "%5.1f", "" ); // // Example 2: Create a bufferless object and then attach an external // buffer to it, specifying row storage. // printf( "\n#\n# -- Example 2 --\n#\n\n" ); // Now let's allocate another buffer, but this time we'll initialize it by // rows instead of by columns. We'll use the same values for m, n, rs, cs. double* p2 = malloc( m * n * sizeof( double ) ); init_dmatrix_by_rows( m, n, p2, rs, cs ); // Create a new bufferless object and attach the new buffer. This time, // instead of calling bli_obj_create_without_buffer() followed by // bli_obj_attach_buffer(), we call bli_obj_create_with_attached_buffer(), // which is just a convenience wrapper around the former two functions. // (Note that the wrapper function omits the imaginary stride argument.) #if 1 bli_obj_create_with_attached_buffer( dt, m, n, p2, rs, cs, &a2 ); #else bli_obj_create_without_buffer( dt, m, n, &a2 ); bli_obj_attach_buffer( p2, rs, cs, 0, &a2 ); #endif // Print the matrix so we can compare it to the first matrix output. bli_printm( "matrix 'a2', initialized by rows:", &a2, "%5.1f", "" ); // Please note that after creating an object via either of: // - bli_obj_create_without_buffer(), or // - bli_obj_create_with_attached_buffer() // we do NOT free it! That's because these functions merely initialize the // object and do not actually allocate any memory. // Free the memory arrays we allocated. free( p1 ); free( p2 ); return 0; } // ----------------------------------------------------------------------------- void init_dmatrix_by_rows( dim_t m, dim_t n, double* a, inc_t rs, inc_t cs ) { dim_t i, j; double alpha = 0.0; // Step through a matrix by rows, assigning each element a unique // value, starting at 0. for ( i = 0; i < m; ++i ) { for ( j = 0; j < n; ++j ) { double* a_ij = a + i*rs + j*cs; *a_ij = alpha; alpha += 1.0; } } } void init_dmatrix_by_cols( dim_t m, dim_t n, double* a, inc_t rs, inc_t cs ) { dim_t i, j; double alpha = 0.0; // Step through a matrix by columns, assigning each element a unique // value, starting at 0. for ( j = 0; j < n; ++j ) { for ( i = 0; i < m; ++i ) { double* a_ij = a + i*rs + j*cs; *a_ij = alpha; alpha += 1.0; } } } blis-0.6.1/examples/oapi/02obj_ij.c000066400000000000000000000170541360743507500170000ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name of The University of Texas nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include "blis.h" void init_dmatrix_by_rows( dim_t m, dim_t n, double* a, inc_t rs, inc_t cs ); void init_dmatrix_by_cols( dim_t m, dim_t n, double* a, inc_t rs, inc_t cs ); void init_dobj_by_cols( obj_t* a ); void init_zobj_by_cols( obj_t* a ); int main( int argc, char** argv ) { obj_t a1, a2, a3; num_t dt; dim_t m, n; inc_t rs, cs; dim_t i, j; // // This file demonstrates accessing and updating individual matrix elements // through the BLIS object API. // // // Example 1: Create an object and then individually access/view some of // its elements. // printf( "\n#\n# -- Example 1 --\n#\n\n" ); // We'll use these parameters for the following examples. dt = BLIS_DOUBLE; m = 4; n = 5; rs = 1; cs = m; // Create a object with known elements using the same approach as the // previous example file. double* p1 = malloc( m * n * sizeof( double ) ); init_dmatrix_by_cols( m, n, p1, rs, cs ); bli_obj_create_with_attached_buffer( dt, m, n, p1, rs, cs, &a1 ); bli_printm( "matrix 'a1' (initial state)", &a1, "%5.1f", "" ); // Regardless of how we create our object--whether via bli_obj_create() or // via attaching an existing buffer to a bufferless object--we can access // individual elements by specifying their offsets. The output value is // broken up by real and imaginary component. (When accessing real matrices, // the imaginary component will always be zero.) i = 1; j = 3; double alpha_r, alpha_i; bli_getijm( i, j, &a1, &alpha_r, &alpha_i ); // Here, we print out the element "returned" by bli_getijm(). printf( "element (%2d,%2d) of matrix 'a1' (real + imag): %5.1f + %5.1f\n", ( int )i, ( int )j, alpha_r, alpha_i ); // Let's query a few more elements. i = 0; j = 2; bli_getijm( i, j, &a1, &alpha_r, &alpha_i ); printf( "element (%2d,%2d) of matrix 'a1' (real + imag): %5.1f + %5.1f\n", ( int )i, ( int )j, alpha_r, alpha_i ); i = 3; j = 4; bli_getijm( i, j, &a1, &alpha_r, &alpha_i ); printf( "element (%2d,%2d) of matrix 'a1' (real + imag): %5.1f + %5.1f\n", ( int )i, ( int )j, alpha_r, alpha_i ); printf( "\n" ); // // Example 2: Modify individual elements of an existing matrix. // printf( "\n#\n# -- Example 2 --\n#\n\n" ); // Now let's change a few elements. Even if we set the imaginary // argument to a non-zero value, argument is ignored since we're // modifying a real matrix. If a1 were a complex object, those // values would be stored verbatim into the appropriate matrix // elements (see example for a3 below). alpha_r = -3.0; alpha_i = 0.0; i = 1; j = 3; bli_setijm( alpha_r, alpha_i, i, j, &a1 ); alpha_r = -9.0; alpha_i = -1.0; i = 0; j = 2; bli_setijm( alpha_r, alpha_i, i, j, &a1 ); alpha_r = -7.0; alpha_i = 2.0; i = 3; j = 4; bli_setijm( alpha_r, alpha_i, i, j, &a1 ); // Print the matrix again so we can see the update elements. bli_printm( "matrix 'a1' (modified state)", &a1, "%5.1f", "" ); // Next, let's create a regular object (with a buffer) and then // initialize its elements using bli_setijm(). bli_obj_create( dt, m, n, rs, cs, &a2 ); // See definition of init_dobj_by_cols() below. init_dobj_by_cols( &a2 ); // Because we initialized a2 in the same manner as a1 (by columns), // it should contain the same initial state as a1. bli_printm( "matrix 'a2'", &a2, "%5.1f", "" ); // // Example 3: Modify individual elements of an existing complex matrix. // printf( "\n#\n# -- Example 3 --\n#\n\n" ); // Create and initialize a complex object. dt = BLIS_DCOMPLEX; bli_obj_create( dt, m, n, rs, cs, &a3 ); // Initialize the matrix elements. (See definition of init_dobj_by_cols() // below). init_zobj_by_cols( &a3 ); // Print the complex matrix. bli_printm( "matrix 'a3' (initial state)", &a3, "%5.1f", "" ); i = 3; j = 0; bli_getijm( i, j, &a3, &alpha_r, &alpha_i ); alpha_r *= -1.0; alpha_i *= -1.0; bli_setijm( alpha_r, alpha_i, i, j, &a3 ); i = 3; j = 4; bli_getijm( i, j, &a3, &alpha_r, &alpha_i ); alpha_r *= -1.0; alpha_i *= -1.0; bli_setijm( alpha_r, alpha_i, i, j, &a3 ); i = 0; j = 4; bli_getijm( i, j, &a3, &alpha_r, &alpha_i ); alpha_r *= -1.0; alpha_i *= -1.0; bli_setijm( alpha_r, alpha_i, i, j, &a3 ); // Print the matrix again so we can see the update elements. bli_printm( "matrix 'a3' (modified state)", &a3, "%5.1f", "" ); // Free the memory arrays we allocated. free( p1 ); // Free the objects we created. bli_obj_free( &a2 ); bli_obj_free( &a3 ); return 0; } // ----------------------------------------------------------------------------- void init_dmatrix_by_rows( dim_t m, dim_t n, double* a, inc_t rs, inc_t cs ) { dim_t i, j; double alpha = 0.0; // Step through a matrix by rows, assigning each element a unique // value, starting at 0. for ( i = 0; i < m; ++i ) { for ( j = 0; j < n; ++j ) { double* a_ij = a + i*rs + j*cs; *a_ij = alpha; alpha += 1.0; } } } void init_dmatrix_by_cols( dim_t m, dim_t n, double* a, inc_t rs, inc_t cs ) { dim_t i, j; double alpha = 0.0; // Step through a matrix by columns, assigning each element a unique // value, starting at 0. for ( j = 0; j < n; ++j ) { for ( i = 0; i < m; ++i ) { double* a_ij = a + i*rs + j*cs; *a_ij = alpha; alpha += 1.0; } } } void init_dobj_by_cols( obj_t* a ) { dim_t m = bli_obj_length( a ); dim_t n = bli_obj_width( a ); dim_t i, j; double alpha = 0.0; // Step through a matrix by columns, assigning each element a unique // value, starting at 0. for ( j = 0; j < n; ++j ) { for ( i = 0; i < m; ++i ) { bli_setijm( alpha, 0.0, i, j, a ); alpha += 1.0; } } } void init_zobj_by_cols( obj_t* a ) { dim_t m = bli_obj_length( a ); dim_t n = bli_obj_width( a ); dim_t i, j; double alpha = 0.0; // Step through a matrix by columns, assigning each real and imaginary // element a unique value, starting at 0. for ( j = 0; j < n; ++j ) { for ( i = 0; i < m; ++i ) { bli_setijm( alpha, alpha + 1.0, i, j, a ); alpha += 2.0; } } } blis-0.6.1/examples/oapi/03obj_view.c000066400000000000000000000170711360743507500173500ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name of The University of Texas nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include "blis.h" void init_dmatrix_by_rows( dim_t m, dim_t n, double* a, inc_t rs, inc_t cs ); void init_dmatrix_by_cols( dim_t m, dim_t n, double* a, inc_t rs, inc_t cs ); void init_dobj_by_cols( obj_t* a ); void init_zobj_by_cols( obj_t* a ); int main( int argc, char** argv ) { obj_t a1, a2; obj_t v1, v2, v3, v4, v5; num_t dt; dim_t m, n; inc_t rs, cs; dim_t i, j; dim_t mv, nv; // // This file demonstrates creating and submatrix views into existing matrices. // // // Example 1: Create an object and then create a submatrix view. // printf( "\n#\n# -- Example 1 --\n#\n\n" ); // We'll use these parameters for the following examples. dt = BLIS_DOUBLE; m = 6; n = 7; rs = 1; cs = m; // Create an object a1 using bli_obj_create(). bli_obj_create( dt, m, n, rs, cs, &a1 ); // Initialize a1 to contain known values. init_dobj_by_cols( &a1 ); bli_printm( "matrix 'a1' (initial state)", &a1, "%5.1f", "" ); // Acquire a 4x3 submatrix view into a1 at (i,j) offsets (1,2). i = 1; j = 2; mv = 4; nv = 3; bli_acquire_mpart( i, j, mv, nv, &a1, &v1 ); bli_printm( "4x3 submatrix 'v1' at offsets (1,2)", &v1, "%5.1f", "" ); // NOTE: Submatrix views should never be passed to bli_obj_free(). It // will not cause an immediate error, but it is bad practice. Instead, // you should only release the objects that were created directy via // bli_obj_create(). In the above example, that means only object a1 // would be passed to bli_obj_free(). // // Example 2: Modify the contents of a submatrix view. // printf( "\n#\n# -- Example 2 --\n#\n\n" ); // Modify the first three elements of the first column. bli_setijm( -3.0, 0.0, 0, 0, &v1 ); bli_setijm( -4.0, 0.0, 1, 0, &v1 ); bli_setijm( -5.0, 0.0, 2, 0, &v1 ); // Modify the first three elements of the second column. bli_setijm( -6.0, 0.0, 0, 1, &v1 ); bli_setijm( -7.0, 0.0, 1, 1, &v1 ); bli_setijm( -8.0, 0.0, 2, 1, &v1 ); // Print the matrix again so we can see the update elements. bli_printm( "submatrix view 'v1' (modified state)", &v1, "%5.1f", "" ); bli_printm( "matrix 'a1' (indirectly modified due to changes to 'v1')", &a1, "%5.1f", "" ); // // Example 3: Create a submatrix view that is "too big". // printf( "\n#\n# -- Example 3 --\n#\n\n" ); // bli_acquire_mpart() will safely truncate your requested submatrix // view dimensions (or even the offsets) if they extend beyond the // bounds of the parent object. bli_printm( "matrix 'a1' (current state)", &a1, "%5.1f", "" ); // Acquire a 4x3 submatrix view into a1 at offsets (4,2). Notice how // the requested view contains four rows, but the view is created with // only two rows because the starting m offset of 4 leaves only two rows // left in the parent matrix. bli_acquire_mpart( 4, 2, 4, 3, &a1, &v2 ); bli_printm( "4x3 submatrix 'v2' at offsets (4,2) -- two rows truncated for safety", &v2, "%5.1f", "" ); // // Example 4: Create a bufferless object, attach an external buffer, and // then create a submatrix view. // printf( "\n#\n# -- Example 4 --\n#\n\n" ); // Create a object with known elements using the same approach as the // previous example file. double* p1 = malloc( m * n * sizeof( double ) ); init_dmatrix_by_cols( m, n, p1, rs, cs ); bli_obj_create_with_attached_buffer( dt, m, n, p1, rs, cs, &a2 ); bli_printm( "matrix 'a2' (initial state)", &a2, "%5.1f", "" ); // Acquire a 3x4 submatrix view at offset (2,3). bli_acquire_mpart( 2, 3, 3, 4, &a2, &v3 ); bli_printm( "3x4 submatrix view 'v3' at offsets (2,3)", &v3, "%5.1f", "" ); // // Example 5: Use a submatrix view to set a region of a larger matrix to // zero. // printf( "\n#\n# -- Example 5 --\n#\n\n" ); bli_printm( "3x4 submatrix view 'v3' at offsets (2,3)", &v3, "%5.1f", "" ); bli_setm( &BLIS_ZERO, &v3 ); bli_printm( "3x4 submatrix view 'v3' (zeroed out)", &v3, "%5.1f", "" ); bli_printm( "matrix 'a2' (modified state)", &a2, "%5.1f", "" ); // // Example 6: Obtain a submatrix view into a submatrix view. // printf( "\n#\n# -- Example 6 --\n#\n\n" ); bli_acquire_mpart( 1, 1, 5, 6, &a2, &v4 ); bli_printm( "5x6 submatrix view 'v4' at offsets (1,1) of 'a2'", &v4, "%5.1f", "" ); bli_acquire_mpart( 1, 0, 4, 5, &v4, &v5 ); bli_printm( "4x5 submatrix view 'v5' at offsets (1,0) of 'v4'", &v5, "%5.1f", "" ); // Free the memory arrays we allocated. free( p1 ); // Free the objects we created. bli_obj_free( &a1 ); return 0; } // ----------------------------------------------------------------------------- void init_dmatrix_by_rows( dim_t m, dim_t n, double* a, inc_t rs, inc_t cs ) { dim_t i, j; double alpha = 0.0; // Step through a matrix by rows, assigning each element a unique // value, starting at 0. for ( i = 0; i < m; ++i ) { for ( j = 0; j < n; ++j ) { double* a_ij = a + i*rs + j*cs; *a_ij = alpha; alpha += 1.0; } } } void init_dmatrix_by_cols( dim_t m, dim_t n, double* a, inc_t rs, inc_t cs ) { dim_t i, j; double alpha = 0.0; // Step through a matrix by columns, assigning each element a unique // value, starting at 0. for ( j = 0; j < n; ++j ) { for ( i = 0; i < m; ++i ) { double* a_ij = a + i*rs + j*cs; *a_ij = alpha; alpha += 1.0; } } } void init_dobj_by_cols( obj_t* a ) { dim_t m = bli_obj_length( a ); dim_t n = bli_obj_width( a ); dim_t i, j; double alpha = 0.0; // Step through a matrix by columns, assigning each element a unique // value, starting at 0. for ( j = 0; j < n; ++j ) { for ( i = 0; i < m; ++i ) { bli_setijm( alpha, 0.0, i, j, a ); alpha += 1.0; } } } void init_zobj_by_cols( obj_t* a ) { dim_t m = bli_obj_length( a ); dim_t n = bli_obj_width( a ); dim_t i, j; double alpha = 0.0; // Step through a matrix by columns, assigning each real and imaginary // element a unique value, starting at 0. for ( j = 0; j < n; ++j ) { for ( i = 0; i < m; ++i ) { bli_setijm( alpha, alpha + 1.0, i, j, a ); alpha += 2.0; } } } blis-0.6.1/examples/oapi/04level0.c000066400000000000000000000143071360743507500167330ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name of The University of Texas nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "blis.h" int main( int argc, char** argv ) { obj_t alpha, beta, gamma, kappa, zeta; num_t dt; double gamma_d; // // This file demonstrates working with scalar objects. // // // Example 1: Create a scalar (1x1) object. // dt = BLIS_DOUBLE; // The easiest way to create a scalar object is with the following // convenience function. bli_obj_create_1x1( dt, &alpha ); // We could, of course, create an object using our more general-purpose // function, using m = n = 1. bli_obj_create( dt, 1, 1, 0, 0, &beta ); // We can even attach an external scalar. This function, unlike // bli_obj_create_1x1() and bli_obj_create(), does not result in any // memory allocation. bli_obj_create_1x1_with_attached_buffer( dt, &gamma_d, &gamma ); // There is one more way to create an object. Like the previous method, // it also avoids memory allocation by referencing a special "internal" // scalar that is invisibly part of every object. bli_obj_scalar_init_detached( dt, &kappa ); // Digression: In the most common cases, there is no need to create scalar // objects to begin with. That's because BLIS comes with three ready-to-use // globally-scoped scalar objects: // // obj_t BLIS_MINUS_ONE; // obj_t BLIS_ZERO; // obj_t BLIS ONE; // // Each of these special objects is provided by blis.h. They can be used // wherever a scalar object is expected as an input operand regardless of // the datatype of your other operands. Note that you should never try to // modify these global scalar objects directly, nor should you ever try to // perform an operation *on* the objects (that is, you should never try to // update their values, though you can always perform operations *with* // them--that's the whole point!). // // Example 2: Set the value of an existing scalar object. // printf( "\n#\n# -- Example 2 --\n#\n\n" ); // Once you've created an object, you can set its value via setsc. As with // setijm, setsc takes a real and imaginary value, but you can ignore the // imaginary argument if your object is real. And even if you pass in a // non-zero value, it is ignored for real objects. bli_setsc( -4.0, 0.0, &alpha ); bli_setsc( 3.0, 1.0, &beta ); bli_setsc( 0.5, 0.0, &kappa ); bli_setsc( 10.0, 0.0, &gamma ); // BLIS does not have a special print function for scalars, but since a // 1x1 is also a vector and a matrix, we can use printv or printm. bli_printm( "alpha:", &alpha, "%4.1f", "" ); bli_printm( "beta:", &beta, "%4.1f", "" ); bli_printm( "kappa:", &kappa, "%4.1f", "" ); bli_printm( "gamma:", &gamma, "%4.1f", "" ); // // Example 3: Create and set the value of a complex scalar object. // printf( "\n#\n# -- Example 3 --\n#\n\n" ); // Create one more scalar, this time a complex scalar, to show how it // can be used. bli_obj_create_1x1( BLIS_DCOMPLEX, &zeta ); bli_setsc( 3.3, -4.4, &zeta ); bli_printm( "zeta (complex):", &zeta, "%4.1f", "" ); // // Example 4: Copy scalar objects. // printf( "\n#\n# -- Example 4 --\n#\n\n" ); // We can copy scalars amongst one another, and we can use the global // scalar constants for input operands. bli_copysc( &beta, &gamma ); bli_printm( "gamma (overwritten with beta):", &gamma, "%4.1f", "" ); bli_copysc( &BLIS_ONE, &gamma ); bli_printm( "gamma (overwritten with BLIS_ONE):", &gamma, "%4.1f", "" ); // // Example 5: Perform other operations on scalar objects. // printf( "\n#\n# -- Example 5 --\n#\n\n" ); // BLIS defines a range of basic floating-point operations on scalars. bli_addsc( &beta, &gamma ); bli_printm( "gamma := gamma + beta", &gamma, "%4.1f", "" ); bli_subsc( &alpha, &gamma ); bli_printm( "gamma := gamma - alpha", &gamma, "%4.1f", "" ); bli_divsc( &kappa, &gamma ); bli_printm( "gamma := gamma / kappa", &gamma, "%4.1f", "" ); bli_sqrtsc( &gamma, &gamma ); bli_printm( "gamma := sqrt( gamma )", &gamma, "%4.1f", "" ); bli_normfsc( &alpha, &alpha ); bli_printm( "alpha := normf( alpha ) # normf() = abs() in real domain.", &alpha, "%4.1f", "" ); // Note that normfsc() allows complex input objects, but requires that the // output operand (the second operand) be a real object. bli_normfsc( &zeta, &alpha ); bli_printm( "alpha := normf( zeta ) # normf() = complex modulus in complex domain.", &alpha, "%4.1f", "" ); bli_invertsc( &gamma ); bli_printm( "gamma := 1.0 / gamma", &gamma, "%4.2f", "" ); // Only free the objects that resulted in actual allocation. bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &zeta ); return 0; } // ----------------------------------------------------------------------------- blis-0.6.1/examples/oapi/05level1v.c000066400000000000000000000130501360743507500171150ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name of The University of Texas nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "blis.h" int main( int argc, char** argv ) { obj_t alpha, beta, gamma; obj_t x, y, z, w, a; num_t dt; dim_t m, n; inc_t rs, cs; // // This file demonstrates working with vector objects and the level-1v // operations. // // // Example 1: Create vector objects and then broadcast (copy) scalar // values to all elements. // printf( "\n#\n# -- Example 1 --\n#\n\n" ); // Create a few vectors to work with. We make them all of the same length // so that we can perform operations between them. // NOTE: We've chosen to use row vectors here (1x4) instead of column // vectors (4x1) to allow for easier reading of standard output (less // scrolling). dt = BLIS_DOUBLE; m = 1; n = 4; rs = 0; cs = 0; bli_obj_create( dt, m, n, rs, cs, &x ); bli_obj_create( dt, m, n, rs, cs, &y ); bli_obj_create( dt, m, n, rs, cs, &z ); bli_obj_create( dt, m, n, rs, cs, &w ); bli_obj_create( dt, m, n, rs, cs, &a ); // Let's also create and initialize some scalar objects. bli_obj_create_1x1( dt, &alpha ); bli_obj_create_1x1( dt, &beta ); bli_obj_create_1x1( dt, &gamma ); bli_setsc( 2.0, 0.0, &alpha ); bli_setsc( 0.2, 0.0, &beta ); bli_setsc( 3.0, 0.0, &gamma ); bli_printm( "alpha:", &alpha, "%4.1f", "" ); bli_printm( "beta:", &beta, "%4.1f", "" ); bli_printm( "gamma:", &gamma, "%4.1f", "" ); // Vectors can set by "broadcasting" a constant to every element. bli_setv( &BLIS_ONE, &x ); bli_setv( &alpha, &y ); bli_setv( &BLIS_ZERO, &z ); // Note that we can use printv or printm to print vectors since vectors // are also matrices. We choose to use printm because it honors the // orientation of the vector (row or column) when printing, whereas // printv always prints vectors as column vectors regardless of their // they are 1 x n or n x 1. bli_printm( "x := 1.0", &x, "%4.1f", "" ); bli_printm( "y := alpha", &y, "%4.1f", "" ); bli_printm( "z := 0.0", &z, "%4.1f", "" ); // // Example 2: Randomize a vector object. // printf( "\n#\n# -- Example 2 --\n#\n\n" ); // Set a vector to random values. bli_randv( &w ); bli_printm( "w := randv()", &w, "%4.1f", "" ); // // Example 3: Perform various element-wise operations on vector objects. // printf( "\n#\n# -- Example 3 --\n#\n\n" ); // Copy a vector. bli_copyv( &w, &a ); bli_printm( "a := w", &a, "%4.1f", "" ); // Add and subtract vectors. bli_addv( &y, &a ); bli_printm( "a := a + y", &a, "%4.1f", "" ); bli_subv( &w, &a ); bli_printm( "a := a - w", &a, "%4.1f", "" ); // Scale a vector (destructive). bli_scalv( &beta, &a ); bli_printm( "a := beta * a", &a, "%4.1f", "" ); // Scale a vector (non-destructive). bli_scal2v( &gamma, &a, &z ); bli_printm( "z := gamma * a", &z, "%4.1f", "" ); // Scale and accumulate between vectors. bli_axpyv( &alpha, &w, &x ); bli_printm( "x := x + alpha * w", &x, "%4.1f", "" ); bli_xpbyv( &w, &BLIS_MINUS_ONE, &x ); bli_printm( "x := -1.0 * x + w", &x, "%4.1f", "" ); // Invert a vector element-wise. bli_invertv( &y ); bli_printm( "y := 1 / y", &y, "%4.1f", "" ); // Swap two vectors. bli_swapv( &x, &y ); bli_printm( "x (after swapping with y)", &x, "%4.1f", "" ); bli_printm( "y (after swapping with x)", &y, "%4.1f", "" ); // // Example 4: Perform contraction-like operations on vector objects. // printf( "\n#\n# -- Example 4 --\n#\n\n" ); // Perform a dot product. bli_dotv( &a, &z, &gamma ); bli_printm( "gamma := a * z (dot product)", &gamma, "%5.2f", "" ); // Perform an extended dot product. bli_dotxv( &alpha, &a, &z, &BLIS_ONE, &gamma ); bli_printm( "gamma := 1.0 * gamma + alpha * a * z (accumulate scaled dot product)", &gamma, "%5.2f", "" ); // Free the objects. bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &gamma ); bli_obj_free( &x ); bli_obj_free( &y ); bli_obj_free( &z ); bli_obj_free( &w ); bli_obj_free( &a ); return 0; } // ----------------------------------------------------------------------------- blis-0.6.1/examples/oapi/06level1m.c000066400000000000000000000156151360743507500171160ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name of The University of Texas nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "blis.h" int main( int argc, char** argv ) { obj_t alpha, beta, gamma; obj_t a, b, c, d, e, f, g, h; num_t dt; dim_t m, n; inc_t rs, cs; // // This file demonstrates working with matrix objects and the level-1m // operations. // // // Example 1: Create matrix objects and then broadcast (copy) scalar // values to all elements. // printf( "\n#\n# -- Example 1 --\n#\n\n" ); // Create a few matrices to work with. We make them all of the same // dimensions so that we can perform operations between them. dt = BLIS_DOUBLE; m = 2; n = 3; rs = 0; cs = 0; bli_obj_create( dt, m, n, rs, cs, &a ); bli_obj_create( dt, m, n, rs, cs, &b ); bli_obj_create( dt, m, n, rs, cs, &c ); bli_obj_create( dt, m, n, rs, cs, &d ); bli_obj_create( dt, m, n, rs, cs, &e ); // Let's also create and initialize some scalar objects. bli_obj_create_1x1( dt, &alpha ); bli_obj_create_1x1( dt, &beta ); bli_obj_create_1x1( dt, &gamma ); bli_setsc( 2.0, 0.0, &alpha ); bli_setsc( 0.2, 0.0, &beta ); bli_setsc( 3.0, 0.0, &gamma ); bli_printm( "alpha:", &alpha, "%4.1f", "" ); bli_printm( "beta:", &beta, "%4.1f", "" ); bli_printm( "gamma:", &gamma, "%4.1f", "" ); // Matrices, like vectors, can set by "broadcasting" a constant to every // element. bli_setm( &BLIS_ONE, &a ); bli_setm( &alpha, &b ); bli_setm( &BLIS_ZERO, &c ); bli_printm( "a := 1.0", &a, "%4.1f", "" ); bli_printm( "b := alpha", &b, "%4.1f", "" ); bli_printm( "c := 0.0", &c, "%4.1f", "" ); // // Example 2: Randomize a matrix object. // printf( "\n#\n# -- Example 2 --\n#\n\n" ); // Set a matrix to random values. bli_randm( &e ); bli_printm( "e (randomized):", &e, "%4.1f", "" ); // // Example 3: Perform element-wise operations on matrices. // printf( "\n#\n# -- Example 3 --\n#\n\n" ); // Copy a matrix. bli_copym( &e, &d ); bli_printm( "d := e", &d, "%4.1f", "" ); // Add and subtract vectors. bli_addm( &a, &d ); bli_printm( "d := d + a", &d, "%4.1f", "" ); bli_subm( &a, &e ); bli_printm( "e := e - a", &e, "%4.1f", "" ); // Scale a matrix (destructive). bli_scalm( &alpha, &e ); bli_printm( "e := alpha * e", &e, "%4.1f", "" ); // Scale a matrix (non-destructive). bli_scal2m( &beta, &e, &c ); bli_printm( "c := beta * e", &c, "%4.1f", "" ); // Scale and accumulate between matrices. bli_axpym( &alpha, &a, &c ); bli_printm( "c := c + alpha * a", &c, "%4.1f", "" ); // // Example 4: Copy and transpose a matrix. // printf( "\n#\n# -- Example 4 --\n#\n\n" ); // Create an n-by-m matrix into which we can copy-transpose an m-by-n // matrix. bli_obj_create( dt, n, m, rs, cs, &f ); // Initialize all of 'f' to -1.0 to simulate junk values. bli_setm( &BLIS_MINUS_ONE, &f ); bli_printm( "e:", &e, "%4.1f", "" ); bli_printm( "f (initial value):", &f, "%4.1f", "" ); // Since we are going to copy 'e' to 'f', we need to indicate a transpose // on 'e', the input operand. Transposition can be indicated by setting a // bit in the object. Since it always starts out as "no transpose", we can // simply toggle the bit. bli_obj_toggle_trans( &e ); // Another way to mark and object for transposition is to set it directly. //bli_obj_set_onlytrans( BLIS_TRANSPOSE, &e ); // A third way is to "apply" a transposition. This is equivalent to toggling // the transposition when the value being applied is BLIS_TRANSPOSE. If // the value applied is BLIS_NO_TRANSPOSE, the transposition bit in the // targeted object is unaffected. (Applying transposes is more useful in // practice when the 'trans' argument is a variable and not a constant // literal.) //bli_obj_apply_trans( BLIS_TRANSPOSE, &e ); //bli_obj_apply_trans( BLIS_NO_TRANSPOSE, &e ); //bli_obj_apply_trans( trans, &e ); // Copy 'e' to 'f', transposing 'e' in the process. Notice that we haven't // modified any properties of 'd'. It's the source operand that matters // when marking an operand for transposition, not the destination. bli_copym( &e, &f ); bli_printm( "f (copied value):", &f, "%4.1f", "" ); // // Example 5: Copy and Hermitian-transpose a matrix. // printf( "\n#\n# -- Example 5 --\n#\n\n" ); // Create an n-by-m complex matrix into which we can Hermitian-transpose // (or, conjugate-transpose) another complex (m-by-n) matrix. dt = BLIS_DCOMPLEX; bli_obj_create( dt, m, n, rs, cs, &g ); bli_obj_create( dt, n, m, rs, cs, &h ); // Randomize 'g', the input operand. bli_randm( &g ); // Initialize all of 'h' to -1.0 to simulate junk values. bli_setm( &BLIS_MINUS_ONE, &h ); bli_printm( "g:", &g, "%4.1f", "" ); bli_printm( "h (initial value):", &h, "%4.1f", "" ); // Set both the transpose and conjugation bits. bli_obj_toggle_trans( &g ); bli_obj_toggle_conj( &g ); // Copy 'g' to 'h', conjugating and transposing 'g' in the process. // Once again, notice that it's the source operand that we've marked for // conjugation. bli_copym( &g, &h ); bli_printm( "h (copied value):", &h, "%4.1f", "" ); // Free the objects. bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &gamma ); bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &d ); bli_obj_free( &e ); bli_obj_free( &f ); bli_obj_free( &g ); bli_obj_free( &h ); return 0; } // ----------------------------------------------------------------------------- blis-0.6.1/examples/oapi/07level1m_diag.c000066400000000000000000000260361360743507500201020ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name of The University of Texas nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "blis.h" int main( int argc, char** argv ) { num_t dt; dim_t m, n; inc_t rs, cs; // // This file demonstrates level-1m operations on structured matrices. // // // Example 1: Initialize the upper triangle of a matrix to random values. // printf( "\n#\n# -- Example 1 --\n#\n\n" ); obj_t a; // Create a matrix to work with. dt = BLIS_DOUBLE; m = 5; n = 5; rs = 0; cs = 0; bli_obj_create( dt, m, n, rs, cs, &a ); // First, we mark the matrix structure as triangular. bli_obj_set_struc( BLIS_TRIANGULAR, &a ); // Next, we specify whether the lower part or the upper part is to be // recognized as the "stored" region (which we call the uplo field). The // strictly opposite part (in this case, the strictly lower region) will // be *assumed* to be zero during computation. However, when printed out, // the strictly lower part may contain junk values. bli_obj_set_uplo( BLIS_UPPER, &a ); // Now set the upper triangle to random values. bli_randm( &a ); bli_printm( "a: randomize upper part (lower part may contain garbage)", &a, "%4.1f", "" ); // // Example 2: Initialize the upper triangle of a matrix to random values // but also explicitly set the strictly lower triangle to zero. // printf( "\n#\n# -- Example 2 --\n#\n\n" ); obj_t b, bl; // Create a matrix to work with. dt = BLIS_DOUBLE; m = 5; n = 5; rs = 0; cs = 0; bli_obj_create( dt, m, n, rs, cs, &b ); // Set structure and uplo. bli_obj_set_struc( BLIS_TRIANGULAR, &b ); bli_obj_set_uplo( BLIS_UPPER, &b ); // Create an alias, 'bl', of the original object 'b'. Both objects will // refer to the same underlying matrix elements, but now we will have two // different "views" into the matrix. Aliases are simply "shallow copies" // of the objects, meaning no additional memory allocation takes place. // Therefore it is up to the API user (you) to make sure that you only // free the original object (or exactly one of the aliases). bli_obj_alias_to( &b, &bl ); // Digression: Each object contains a diagonal offset (even vectors), // even if it is never needed. The diagonal offset for a newly-created // object (ie: objects created via bli_obj_create*()) defaults to 0, // meaning it intersects element (0,0), but it can be changed. When the // diagonal offset delta is positive, the diagonal intersects element // (0,delta). When the diagonal offset is negative, the diagonal // intersects element (-delta,0). In other words, think of element (0,0) // as the origin of a coordinate plane, with the diagonal being the // x-axis value. // Set the diagonal offset of 'bl' to -1. bli_obj_set_diag_offset( -1, &bl ); // Set the uplo field of 'bl' to "lower". bli_obj_set_uplo( BLIS_LOWER, &bl ); // Set the upper triangle of 'b' to random values. bli_randm( &b ); // Set the strictly lower triangle of 'b' to zero (by setting the lower // triangle of 'bl' to zero). bli_setm( &BLIS_ZERO, &bl ); bli_printm( "b: randomize upper part; set strictly lower part to 0.0", &b, "%4.1f", "" ); // You may not see the effect of setting the strictly lower part to zero, // since those values may already be zero (instead of random junk). So // let's set it to something you'll notice, like -1.0. bli_setm( &BLIS_MINUS_ONE, &bl ); bli_printm( "b: randomize upper part; set strictly lower part to -1.0", &b, "%4.1f", "" ); // // Example 3: Copy the lower triangle of an existing object to a newly // created (but otherwise uninitialized) object. // printf( "\n#\n# -- Example 3 --\n#\n\n" ); obj_t c; // Create a matrix to work with. dt = BLIS_DOUBLE; m = 5; n = 5; rs = 0; cs = 0; bli_obj_create( dt, m, n, rs, cs, &c ); // Reset the diagonal offset of 'bl' to 0. bli_obj_set_diag_offset( 0, &bl ); // Copy the lower triangle of matrix 'b' from Example 2 to object 'c'. // This should give us -1.0 in the strictly lower part and some non-zero // random values along the diagonal. Note that since 'c' is starting out // uninitialized, the strictly upper part could contain junk. bli_copym( &bl, &c ); bli_printm( "c: copy lower part of b (upper part may contain garbage)", &c, "%4.1f", "" ); // Notice that the structure and uplo properties of 'c' were set to their // default values, BLIS_GENERAL and BLIS_DENSE, respectively. Thus, it is // the structure and uplo of the *source* operand that controls what gets // copied, regardless of the structure/uplo of the destination. To // demonstrate this further, let's see what happens when we copy 'bl' // (which is lower triangular) to 'a' (which is upper triangular). bli_copym( &bl, &a ); // The result is that the lower part (diagonal and strictly lower part) is // copied into 'a', but the elements in the strictly upper part of 'a' are // unaffected. Note, however, that 'a' is still marked as upper triangular // and so in future computations where 'a' is an input operand, the -1.0 // values that were copied from 'bl' into the lower triangle will be // ignored. Generally speaking, level-1m operations on triangular matrices // ignore the "unstored" regions of input operands because they are assumed // to be zero). bli_printm( "a: copy lower triangular bl to upper triangular a", &a, "%4.1f", "" ); // // Example 4: Copy the lower triangle of an existing object into the // upper triangle of an existing object. // printf( "\n#\n# -- Example 4 --\n#\n\n" ); obj_t d; // Create a matrix to work with. dt = BLIS_DOUBLE; m = 5; n = 5; rs = 0; cs = 0; bli_obj_create( dt, m, n, rs, cs, &d ); // Let's start by setting entire destination matrix to zero. bli_setm( &BLIS_ZERO, &d ); bli_printm( "d: initial value (all zeros)", &d, "%4.1f", "" ); // Recall that 'bl' is marked as lower triangular with a diagonal offset // of 0. Also recall that 'bl' is an alias of 'b', which is now fully // initialized. But let's change a few values manually so we can later // see the full effect of the transposition. bli_setijm( 2.0, 0.0, 2, 0, &bl ); bli_setijm( 3.0, 0.0, 3, 0, &bl ); bli_setijm( 4.0, 0.0, 4, 0, &bl ); bli_setijm( 3.1, 0.0, 3, 1, &bl ); bli_setijm( 3.2, 0.0, 3, 2, &bl ); bli_printm( "bl: lower triangular bl is aliased to b", &bl, "%4.1f", "" ); // We want to pluck out the lower triangle and transpose it into the upper // triangle of 'd'. bli_obj_toggle_trans( &bl ); // Now we copy the transpose of the lower part of 'bl' into the upper // part of 'd'. (Again, notice that we haven't modified any properties of // 'd'. It's the source operand that matters, not the destination!) bli_copym( &bl, &d ); bli_printm( "d: transpose of lower triangular of bl copied to d", &d, "%4.1f", "" ); // // Example 5: Create a rectangular matrix (m > n) with a lower trapezoid // containing random values, then set the strictly upper // triangle to zeros. // printf( "\n#\n# -- Example 5 --\n#\n\n" ); obj_t e, el; // Create a matrix to work with. dt = BLIS_DOUBLE; m = 6; n = 4; rs = 0; cs = 0; bli_obj_create( dt, m, n, rs, cs, &e ); // Initialize the entire matrix to -1.0 to simulate junk values. bli_setm( &BLIS_MINUS_ONE, &e ); bli_printm( "e: initial value (all -1.0)", &e, "%4.1f", "" ); // Create an alias to work with. bli_obj_alias_to( &e, &el ); // Set structure and uplo of 'el'. bli_obj_set_struc( BLIS_TRIANGULAR, &el ); bli_obj_set_uplo( BLIS_LOWER, &el ); // Digression: Notice that "triangular" structure does not require that // the matrix be square. Rather, it simply means that either the part above // or below the diagonal will be assumed to be zero. // Randomize the lower trapezoid. bli_randm( &el ); bli_printm( "e: after lower trapezoid randomized", &e, "%4.1f", "" ); // Move the diagonal offset of 'el' to 1 and flip the uplo field to // "upper". bli_obj_set_diag_offset( 1, &el ); bli_obj_set_uplo( BLIS_UPPER, &el ); // Set the upper triangle to zero. bli_setm( &BLIS_ZERO, &el ); bli_printm( "e: after upper triangle set to zero", &e, "%4.1f", "" ); // // Example 6: Create an upper Hessenberg matrix of random values and then // set the "unstored" values to zero. // printf( "\n#\n# -- Example 6 --\n#\n\n" ); obj_t h, hl; // Create a matrix to work with. dt = BLIS_DOUBLE; m = 5; n = 5; rs = 0; cs = 0; bli_obj_create( dt, m, n, rs, cs, &h ); // Initialize the entire matrix to -1.0 to simulate junk values. bli_setm( &BLIS_MINUS_ONE, &h ); bli_printm( "h: initial value (all -1.0)", &h, "%4.1f", "" ); // Set the diagonal offset of 'h' to -1. bli_obj_set_diag_offset( -1, &h ); // Set the structure and uplo of 'h'. bli_obj_set_struc( BLIS_TRIANGULAR, &h ); bli_obj_set_uplo( BLIS_UPPER, &h ); // Randomize the elements on and above the first subdiagonal. bli_randm( &h ); bli_printm( "h: after randomizing above first subdiagonal", &h, "%4.1f", "" ); // Create an alias to work with. bli_obj_alias_to( &h, &hl ); // Flip the uplo of 'hl' and move the diagonal down by one. bli_obj_set_uplo( BLIS_LOWER, &hl ); bli_obj_set_diag_offset( -2, &hl ); // Set the region strictly below the first subdiagonal (on or below // the second subdiagonal) to zero. bli_setm( &BLIS_ZERO, &hl ); bli_printm( "h: after setting elements below first subdiagonal to zero", &h, "%4.1f", "" ); // Free the objects. bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &d ); bli_obj_free( &e ); bli_obj_free( &h ); return 0; } // ----------------------------------------------------------------------------- blis-0.6.1/examples/oapi/08level2.c000066400000000000000000000226141360743507500167410ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name of The University of Texas nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "blis.h" int main( int argc, char** argv ) { num_t dt; dim_t m, n; inc_t rs, cs; obj_t a, x, y, b; obj_t* alpha; obj_t* beta; // // This file demonstrates level-2 operations. // // // Example 1: Perform a general rank-1 update (ger) operation. // printf( "\n#\n# -- Example 1 --\n#\n\n" ); // Create some matrix and vector operands to work with. dt = BLIS_DOUBLE; m = 4; n = 5; rs = 0; cs = 0; bli_obj_create( dt, m, n, rs, cs, &a ); bli_obj_create( dt, m, 1, rs, cs, &x ); bli_obj_create( dt, 1, n, rs, cs, &y ); // Set alpha. alpha = &BLIS_ONE; // Initialize vectors 'x' and 'y'. bli_randv( &x ); bli_setv( &BLIS_MINUS_ONE, &y ); // Initialize 'a' to 1.0. bli_setm( &BLIS_ONE, &a ); bli_printm( "x: set to random values", &x, "%4.1f", "" ); bli_printm( "y: set to -1.0", &y, "%4.1f", "" ); bli_printm( "a: initial value", &a, "%4.1f", "" ); // a := a + alpha * x * y, where 'a' is general. bli_ger( alpha, &x, &y, &a ); bli_printm( "a: after ger", &a, "%4.1f", "" ); // Free the objects. bli_obj_free( &a ); bli_obj_free( &x ); bli_obj_free( &y ); // // Example 2: Perform a general matrix-vector multiply (gemv) operation. // printf( "\n#\n# -- Example 2 --\n#\n\n" ); // Create some matrix and vector operands to work with. dt = BLIS_DOUBLE; m = 4; n = 5; rs = 0; cs = 0; bli_obj_create( dt, m, n, rs, cs, &a ); bli_obj_create( dt, 1, n, rs, cs, &x ); bli_obj_create( dt, 1, m, rs, cs, &y ); // Notice that we created vectors 'x' and 'y' as row vectors, even though // we often think of them as column vectors so that the overall problem // dimensions remain conformal. Note that this flexibility only comes // from the fact that the operation requires those operands to be vectors. // If we were instead looking at an operation where the operands were of // general shape (such as with the gemm operation), then typically the // dimensions matter, and column vectors would not be interchangeable with // row vectors and vice versa. // Set the scalars to use. alpha = &BLIS_ONE; beta = &BLIS_ONE; // Initialize vectors 'x' and 'y'. bli_setv( &BLIS_ONE, &x ); bli_setv( &BLIS_ZERO, &y ); // Randomize 'a'. bli_randm( &a ); bli_printm( "a: randomized", &a, "%4.1f", "" ); bli_printm( "x: set to 1.0", &x, "%4.1f", "" ); bli_printm( "y: initial value", &y, "%4.1f", "" ); // y := beta * y + alpha * a * x, where 'a' is general. bli_gemv( alpha, &a, &x, beta, &y ); bli_printm( "y: after gemv", &y, "%4.1f", "" ); // Free the objects. bli_obj_free( &a ); bli_obj_free( &x ); bli_obj_free( &y ); // // Example 3: Perform a symmetric rank-1 update (syr) operation. // printf( "\n#\n# -- Example 3 --\n#\n\n" ); // Create some matrix and vector operands to work with. dt = BLIS_DOUBLE; m = 5; rs = 0; cs = 0; bli_obj_create( dt, m, m, rs, cs, &a ); bli_obj_create( dt, 1, m, rs, cs, &x ); // Set alpha. alpha = &BLIS_ONE; // Initialize vector 'x'. bli_randv( &x ); // Zero out all of matrix 'a'. This is optional, but will avoid possibly // displaying junk values in the unstored triangle. bli_setm( &BLIS_ZERO, &a ); // Mark matrix 'a' as symmetric and stored in the lower triangle, and // then randomize that lower triangle. bli_obj_set_struc( BLIS_SYMMETRIC, &a ); bli_obj_set_uplo( BLIS_LOWER, &a ); bli_randm( &a ); bli_printm( "x: set to random values", &x, "%4.1f", "" ); bli_printm( "a: initial value (zeros in upper triangle)", &a, "%4.1f", "" ); // a := a + alpha * x * x^T, where 'a' is symmetric and lower-stored. bli_syr( alpha, &x, &a ); bli_printm( "a: after syr", &a, "%4.1f", "" ); // Free the objects. bli_obj_free( &a ); bli_obj_free( &x ); // // Example 4: Perform a symmetric matrix-vector multiply (symv) operation. // printf( "\n#\n# -- Example 4 --\n#\n\n" ); // Create some matrix and vector operands to work with. dt = BLIS_DOUBLE; m = 5; rs = 0; cs = 0; bli_obj_create( dt, m, m, rs, cs, &a ); bli_obj_create( dt, 1, m, rs, cs, &x ); bli_obj_create( dt, 1, m, rs, cs, &y ); // Set the scalars to use. alpha = &BLIS_ONE; beta = &BLIS_ONE; // Initialize vectors 'x' and 'y'. bli_setv( &BLIS_ONE, &x ); bli_setv( &BLIS_ZERO, &y ); // Zero out all of matrix 'a'. This is optional, but will avoid possibly // displaying junk values in the unstored triangle. bli_setm( &BLIS_ZERO, &a ); // Mark matrix 'a' as symmetric and stored in the upper triangle, and // then randomize that upper triangle. bli_obj_set_struc( BLIS_SYMMETRIC, &a ); bli_obj_set_uplo( BLIS_UPPER, &a ); bli_randm( &a ); bli_printm( "a: randomized (zeros in lower triangle)", &a, "%4.1f", "" ); bli_printm( "x: set to 1.0", &x, "%4.1f", "" ); bli_printm( "y: initial value", &y, "%4.1f", "" ); // y := beta * y + alpha * a * x, where 'a' is symmetric and upper-stored. bli_symv( alpha, &a, &x, beta, &y ); bli_printm( "y: after symv", &y, "%4.1f", "" ); // Free the objects. bli_obj_free( &a ); bli_obj_free( &x ); bli_obj_free( &y ); // // Example 5: Perform a triangular matrix-vector multiply (trmv) operation. // printf( "\n#\n# -- Example 5 --\n#\n\n" ); // Create some matrix and vector operands to work with. dt = BLIS_DOUBLE; m = 5; rs = 0; cs = 0; bli_obj_create( dt, m, m, rs, cs, &a ); bli_obj_create( dt, 1, m, rs, cs, &x ); // Set the scalars to use. alpha = &BLIS_ONE; // Initialize vector 'x'. bli_setv( &BLIS_ONE, &x ); // Zero out all of matrix 'a'. This is optional, but will avoid possibly // displaying junk values in the unstored triangle. bli_setm( &BLIS_ZERO, &a ); // Mark matrix 'a' as triangular and stored in the lower triangle, and // then randomize that lower triangle. bli_obj_set_struc( BLIS_TRIANGULAR, &a ); bli_obj_set_uplo( BLIS_LOWER, &a ); bli_randm( &a ); bli_printm( "a: randomized (zeros in upper triangle)", &a, "%4.1f", "" ); bli_printm( "x: initial value", &x, "%4.1f", "" ); // x := alpha * a * x, where 'a' is triangular and lower-stored. bli_trmv( alpha, &a, &x ); bli_printm( "x: after trmv", &x, "%4.1f", "" ); // Free the objects. bli_obj_free( &a ); bli_obj_free( &x ); // // Example 6: Perform a triangular solve (trsv) operation. // printf( "\n#\n# -- Example 6 --\n#\n\n" ); // Create some matrix and vector operands to work with. dt = BLIS_DOUBLE; m = 5; rs = 0; cs = 0; bli_obj_create( dt, m, m, rs, cs, &a ); bli_obj_create( dt, 1, m, rs, cs, &b ); bli_obj_create( dt, 1, m, rs, cs, &y ); // Set the scalars to use. alpha = &BLIS_ONE; // Initialize vector 'x'. bli_setv( &BLIS_ONE, &b ); // Zero out all of matrix 'a'. This is optional, but will avoid possibly // displaying junk values in the unstored triangle. bli_setm( &BLIS_ZERO, &a ); // Mark matrix 'a' as triangular and stored in the lower triangle, and // then randomize that lower triangle. bli_obj_set_struc( BLIS_TRIANGULAR, &a ); bli_obj_set_uplo( BLIS_LOWER, &a ); bli_randm( &a ); // Load the diagonal. By setting the diagonal to something of greater // absolute value than the off-diagonal elements, we increase the odds // that the matrix is not singular (singular matrices have no inverse). bli_shiftd( &BLIS_TWO, &a ); bli_printm( "a: randomized (zeros in upper triangle)", &a, "%4.1f", "" ); bli_printm( "b: initial value", &b, "%4.1f", "" ); // solve a * x = alpha * b, where 'a' is triangular and lower-stored, and // overwrite b with the solution vector x. bli_trsv( alpha, &a, &b ); bli_printm( "b: after trsv", &b, "%4.1f", "" ); // We can confirm the solution by comparing the product of a and x to the // original value of b. bli_copyv( &b, &y ); bli_trmv( alpha, &a, &y ); bli_printm( "y: should equal initial value of b", &y, "%4.1f", "" ); // Free the objects. bli_obj_free( &a ); bli_obj_free( &b ); return 0; } // ----------------------------------------------------------------------------- blis-0.6.1/examples/oapi/09level3.c000066400000000000000000000224051360743507500167410ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name of The University of Texas nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "blis.h" int main( int argc, char** argv ) { num_t dt; dim_t m, n, k; inc_t rs, cs; side_t side; obj_t a, b, c; obj_t* alpha; obj_t* beta; // // This file demonstrates level-3 operations. // // // Example 1: Perform a general matrix-matrix multiply (gemm) operation. // printf( "\n#\n# -- Example 1 --\n#\n\n" ); // Create some matrix operands to work with. dt = BLIS_DOUBLE; m = 4; n = 5; k = 3; rs = 0; cs = 0; bli_obj_create( dt, m, n, rs, cs, &c ); bli_obj_create( dt, m, k, rs, cs, &a ); bli_obj_create( dt, k, n, rs, cs, &b ); // Set the scalars to use. alpha = &BLIS_ONE; beta = &BLIS_ONE; // Initialize the matrix operands. bli_randm( &a ); bli_setm( &BLIS_ONE, &b ); bli_setm( &BLIS_ZERO, &c ); bli_printm( "a: randomized", &a, "%4.1f", "" ); bli_printm( "b: set to 1.0", &b, "%4.1f", "" ); bli_printm( "c: initial value", &c, "%4.1f", "" ); // c := beta * c + alpha * a * b, where 'a', 'b', and 'c' are general. bli_gemm( alpha, &a, &b, beta, &c ); bli_printm( "c: after gemm", &c, "%4.1f", "" ); // Free the objects. bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); // // Example 1b: Perform a general matrix-matrix multiply (gemm) operation // with the left input operand (matrix A) transposed. // printf( "\n#\n# -- Example 1b --\n#\n\n" ); // Create some matrix operands to work with. dt = BLIS_DOUBLE; m = 4; n = 5; k = 3; rs = 0; cs = 0; bli_obj_create( dt, m, n, rs, cs, &c ); bli_obj_create( dt, k, m, rs, cs, &a ); bli_obj_create( dt, k, n, rs, cs, &b ); // Set the scalars to use. alpha = &BLIS_ONE; beta = &BLIS_ONE; // Initialize the matrix operands. bli_randm( &a ); bli_setm( &BLIS_ONE, &b ); bli_setm( &BLIS_ZERO, &c ); // Set the transpose bit in 'a'. bli_obj_toggle_trans( &a ); bli_printm( "a: randomized", &a, "%4.1f", "" ); bli_printm( "b: set to 1.0", &b, "%4.1f", "" ); bli_printm( "c: initial value", &c, "%4.1f", "" ); // c := beta * c + alpha * a^T * b, where 'a', 'b', and 'c' are general. bli_gemm( alpha, &a, &b, beta, &c ); bli_printm( "c: after gemm", &c, "%4.1f", "" ); // Free the objects. bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); // // Example 2: Perform a symmetric rank-k update (syrk) operation. // printf( "\n#\n# -- Example 2 --\n#\n\n" ); // Create some matrix and vector operands to work with. dt = BLIS_DOUBLE; m = 5; k = 3; rs = 0; cs = 0; bli_obj_create( dt, m, m, rs, cs, &c ); bli_obj_create( dt, m, k, rs, cs, &a ); // Set alpha. alpha = &BLIS_ONE; // Initialize matrix operands. bli_setm( &BLIS_ZERO, &c ); bli_randm( &a ); // Mark matrix 'c' as symmetric and stored in the lower triangle, and // then randomize that lower triangle. bli_obj_set_struc( BLIS_SYMMETRIC, &c ); bli_obj_set_uplo( BLIS_LOWER, &c ); bli_randm( &c ); bli_printm( "a: set to random values", &a, "%4.1f", "" ); bli_printm( "c: initial value (zeros in upper triangle)", &c, "%4.1f", "" ); // c := c + alpha * a * a^T, where 'c' is symmetric and lower-stored. bli_syrk( alpha, &a, beta, &c ); bli_printm( "c: after syrk", &c, "%4.1f", "" ); // Free the objects. bli_obj_free( &c ); bli_obj_free( &a ); // // Example 3: Perform a symmetric matrix-matrix multiply (symm) operation. // printf( "\n#\n# -- Example 3 --\n#\n\n" ); // Create some matrix and vector operands to work with. dt = BLIS_DOUBLE; m = 5; n = 6; rs = 0; cs = 0; bli_obj_create( dt, m, m, rs, cs, &a ); bli_obj_create( dt, m, n, rs, cs, &b ); bli_obj_create( dt, m, n, rs, cs, &c ); // Set the scalars to use. alpha = &BLIS_ONE; beta = &BLIS_ONE; // Set the side operand. side = BLIS_LEFT; // Initialize matrices 'b' and 'c'. bli_setm( &BLIS_ONE, &b ); bli_setm( &BLIS_ZERO, &c ); // Zero out all of matrix 'a'. This is optional, but will avoid possibly // displaying junk values in the unstored triangle. bli_setm( &BLIS_ZERO, &a ); // Mark matrix 'a' as symmetric and stored in the upper triangle, and // then randomize that upper triangle. bli_obj_set_struc( BLIS_SYMMETRIC, &a ); bli_obj_set_uplo( BLIS_UPPER, &a ); bli_randm( &a ); bli_printm( "a: randomized (zeros in lower triangle)", &a, "%4.1f", "" ); bli_printm( "b: set to 1.0", &b, "%4.1f", "" ); bli_printm( "c: initial value", &c, "%4.1f", "" ); // c := beta * c + alpha * a * b, where 'a' is symmetric and upper-stored. // Note that the first 'side' operand indicates the side from which matrix // 'a' is multiplied into 'b'. bli_symm( side, alpha, &a, &b, beta, &c ); bli_printm( "c: after symm", &c, "%4.1f", "" ); // Free the objects. bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); // // Example 4: Perform a triangular matrix-matrix multiply (trmm) operation. // printf( "\n#\n# -- Example 4 --\n#\n\n" ); // Create some matrix and vector operands to work with. dt = BLIS_DOUBLE; m = 5; n = 4; rs = 0; cs = 0; bli_obj_create( dt, m, m, rs, cs, &a ); bli_obj_create( dt, m, n, rs, cs, &b ); // Set the scalars to use. alpha = &BLIS_ONE; // Set the side operand. side = BLIS_LEFT; // Initialize matrix 'b'. bli_setm( &BLIS_ONE, &b ); // Zero out all of matrix 'a'. This is optional, but will avoid possibly // displaying junk values in the unstored triangle. bli_setm( &BLIS_ZERO, &a ); // Mark matrix 'a' as triangular and stored in the lower triangle, and // then randomize that lower triangle. bli_obj_set_struc( BLIS_TRIANGULAR, &a ); bli_obj_set_uplo( BLIS_LOWER, &a ); bli_randm( &a ); bli_printm( "a: randomized (zeros in upper triangle)", &a, "%4.1f", "" ); bli_printm( "b: initial value", &b, "%4.1f", "" ); // b := alpha * a * b, where 'a' is triangular and lower-stored. bli_trmm( side, alpha, &a, &b ); bli_printm( "x: after trmm", &b, "%4.1f", "" ); // Free the objects. bli_obj_free( &a ); bli_obj_free( &b ); // // Example 5: Perform a triangular solve with multiple right-hand sides // (trsm) operation. // printf( "\n#\n# -- Example 5 --\n#\n\n" ); // Create some matrix and vector operands to work with. dt = BLIS_DOUBLE; m = 5; n = 4; rs = 0; cs = 0; bli_obj_create( dt, m, m, rs, cs, &a ); bli_obj_create( dt, m, n, rs, cs, &b ); bli_obj_create( dt, m, n, rs, cs, &c ); // Set the scalars to use. alpha = &BLIS_ONE; // Set the side operand. side = BLIS_LEFT; // Initialize matrix 'b'. bli_setm( &BLIS_ONE, &b ); // Zero out all of matrix 'a'. This is optional, but will avoid possibly // displaying junk values in the unstored triangle. bli_setm( &BLIS_ZERO, &a ); // Mark matrix 'a' as triangular and stored in the lower triangle, and // then randomize that lower triangle. bli_obj_set_struc( BLIS_TRIANGULAR, &a ); bli_obj_set_uplo( BLIS_LOWER, &a ); bli_randm( &a ); // Load the diagonal. By setting the diagonal to something of greater // absolute value than the off-diagonal elements, we increase the odds // that the matrix is not singular (singular matrices have no inverse). bli_shiftd( &BLIS_TWO, &a ); bli_printm( "a: randomized (zeros in upper triangle)", &a, "%4.1f", "" ); bli_printm( "b: initial value", &b, "%4.1f", "" ); // solve a * x = alpha * b, where 'a' is triangular and lower-stored, and // overwrite b with the solution matrix x. bli_trsm( side, alpha, &a, &b ); bli_printm( "b: after trsm", &b, "%4.1f", "" ); // We can confirm the solution by comparing the product of a and x to the // original value of b. bli_copym( &b, &c ); bli_trmm( side, alpha, &a, &c ); bli_printm( "c: should equal initial value of b", &c, "%4.1f", "" ); // Free the objects. bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); return 0; } // ----------------------------------------------------------------------------- blis-0.6.1/examples/oapi/10util.c000066400000000000000000000210471360743507500165150ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name of The University of Texas nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "blis.h" int main( int argc, char** argv ) { obj_t norm1, normi, normf; obj_t x, y, a, b, c, d, e, f, g; num_t dt; dim_t m, n; inc_t rs, cs; // // This file demonstrates working with vector and matrix objects in the // context of various utility operations. // // // Example 1: Compute various vector norms. // printf( "\n#\n# -- Example 1 --\n#\n\n" ); // Create a few matrices to work with. m = 1; n = 5; rs = 0; cs = 0; bli_obj_create( BLIS_DOUBLE, m, n, rs, cs, &x ); bli_obj_create( BLIS_DCOMPLEX, m, n, rs, cs, &y ); // Let's also create some scalar objects to hold the norms. Note that when // computing the norm alpha of a vector 'x', the datatype of alpha must be // equal to the real projection of the datatype of 'x'. dt = BLIS_DOUBLE; bli_obj_create_1x1( dt, &norm1 ); bli_obj_create_1x1( dt, &normi ); bli_obj_create_1x1( dt, &normf ); // Initialize the vectors to random values. bli_randv( &x ); bli_randv( &y ); bli_printm( "x:", &x, "%4.1f", "" ); // Compute the one, infinity, and frobenius norms of 'x'. bli_norm1v( &x, &norm1 ); bli_normiv( &x, &normi ); bli_normfv( &x, &normf ); bli_printm( "x: 1-norm:", &norm1, "%4.1f", "" ); bli_printm( "x: infinity norm:", &normi, "%4.1f", "" ); bli_printm( "x: frobenius norm:", &normf, "%4.1f", "" ); bli_printm( "y:", &y, "%4.1f", "" ); // Compute the one, infinity, and frobenius norms of 'y'. Note that we // can reuse the same scalars from before for computing norms of // dcomplex matrices, since the real projection of dcomplex is double. bli_norm1v( &y, &norm1 ); bli_normiv( &y, &normi ); bli_normfv( &y, &normf ); bli_printm( "y: 1-norm:", &norm1, "%4.1f", "" ); bli_printm( "y: infinity norm:", &normi, "%4.1f", "" ); bli_printm( "y: frobenius norm:", &normf, "%4.1f", "" ); // // Example 2: Compute various matrix norms. // printf( "\n#\n# -- Example 2 --\n#\n\n" ); // Create a few matrices to work with. m = 5; n = 6; rs = 0; cs = 0; bli_obj_create( BLIS_DOUBLE, m, n, rs, cs, &a ); bli_obj_create( BLIS_DCOMPLEX, m, n, rs, cs, &b ); // Initialize the matrices to random values. bli_randm( &a ); bli_randm( &b ); bli_printm( "a:", &a, "%4.1f", "" ); // Compute the one-norm of 'a'. bli_norm1m( &a, &norm1 ); bli_normim( &a, &normi ); bli_normfm( &a, &normf ); bli_printm( "a: 1-norm:", &norm1, "%4.1f", "" ); bli_printm( "a: infinity norm:", &normi, "%4.1f", "" ); bli_printm( "a: frobenius norm:", &normf, "%4.1f", "" ); bli_printm( "b:", &b, "%4.1f", "" ); // Compute the one-norm of 'b'. bli_norm1m( &b, &norm1 ); bli_normim( &b, &normi ); bli_normfm( &b, &normf ); bli_printm( "b: 1-norm:", &norm1, "%4.1f", "" ); bli_printm( "b: infinity norm:", &normi, "%4.1f", "" ); bli_printm( "b: frobenius norm:", &normf, "%4.1f", "" ); // // Example 3: Make a real matrix explicitly symmetric (or Hermitian). // printf( "\n#\n# -- Example 3 --\n#\n\n" ); // Create a few matrices to work with. m = 4; n = 4; rs = 0; cs = 0; bli_obj_create( BLIS_DOUBLE, m, n, rs, cs, &c ); bli_obj_create( BLIS_DOUBLE, m, n, rs, cs, &d ); // Initialize all of 'c' to -1.0 to simulate junk values. bli_setm( &BLIS_MINUS_ONE, &c ); // Set the structure and uplo of 'c'. bli_obj_set_struc( BLIS_SYMMETRIC, &c ); bli_obj_set_uplo( BLIS_LOWER, &c ); // Randomize the lower triangle of 'c'. bli_randm( &c ); bli_printm( "c (initial state):", &c, "%4.1f", "" ); // mksymm on a real matrix transposes the stored triangle into the // unstored triangle, making the matrix densely symmetric. bli_mksymm( &c ); bli_printm( "c (after mksymm on lower triangle):", &c, "%4.1f", "" ); // Digression: Most people think only of complex matrices as being able // to be complex. However, in BLIS, we define Hermitian operations on // real matrices, too--they are simply equivalent to the corresponding // symmetric operation. For example, when we make a real matrix explicitly // Hermitian, the result is indistinguishable from making it symmetric. // Initialize all of 'd' to -1.0 to simulate junk values. bli_setm( &BLIS_MINUS_ONE, &d ); bli_obj_set_struc( BLIS_HERMITIAN, &d ); bli_obj_set_uplo( BLIS_LOWER, &d ); // Randomize the lower triangle of 'd'. bli_randm( &d ); bli_printm( "d (initial state):", &d, "%4.1f", "" ); // mkherm on a real matrix behaves the same as mksymm, as there are no // imaginary elements to conjugate. bli_mkherm( &d ); bli_printm( "d (after mkherm on lower triangle):", &d, "%4.1f", "" ); // // Example 4: Make a complex matrix explicitly symmetric or Hermitian. // printf( "\n#\n# -- Example 4 --\n#\n\n" ); // Create a few matrices to work with. m = 4; n = 4; rs = 0; cs = 0; bli_obj_create( BLIS_DCOMPLEX, m, n, rs, cs, &e ); bli_obj_create( BLIS_DCOMPLEX, m, n, rs, cs, &f ); // Initialize all of 'e' to -1.0 to simulate junk values. bli_setm( &BLIS_MINUS_ONE, &e ); // Set the structure and uplo of 'e'. bli_obj_set_struc( BLIS_SYMMETRIC, &e ); bli_obj_set_uplo( BLIS_UPPER, &e ); // Randomize the upper triangle of 'e'. bli_randm( &e ); bli_printm( "e (initial state):", &e, "%4.1f", "" ); // mksymm on a complex matrix transposes the stored triangle into the // unstored triangle. bli_mksymm( &e ); bli_printm( "e (after mksymm):", &e, "%4.1f", "" ); // Initialize all of 'f' to -1.0 to simulate junk values. bli_setm( &BLIS_MINUS_ONE, &f ); // Set the structure and uplo of 'f'. bli_obj_set_struc( BLIS_HERMITIAN, &f ); bli_obj_set_uplo( BLIS_UPPER, &f ); // Randomize the upper triangle of 'f'. bli_randm( &f ); bli_printm( "f (initial state):", &f, "%4.1f", "" ); // mkherm on a complex matrix transposes and conjugates the stored // triangle into the unstored triangle. bli_mkherm( &f ); bli_printm( "f (after mkherm):", &f, "%4.1f", "" ); // // Example 5: Make a real matrix explicitly triangular. // printf( "\n#\n# -- Example 5 --\n#\n\n" ); // Create a few matrices to work with. m = 5; n = 5; rs = 0; cs = 0; bli_obj_create( BLIS_DOUBLE, m, n, rs, cs, &g ); // Initialize all of 'g' to -1.0 to simulate junk values. bli_setm( &BLIS_MINUS_ONE, &g ); // Set the structure and uplo of 'g'. bli_obj_set_struc( BLIS_TRIANGULAR, &g ); bli_obj_set_uplo( BLIS_LOWER, &g ); // Randomize the lower triangle of 'g'. bli_randm( &g ); bli_printm( "g (initial state):", &g, "%4.1f", "" ); // mktrim does not explicitly copy any data, since presumably the stored // triangle already contains the data of interest. However, mktrim does // explicitly writes zeros to the unstored region. bli_mktrim( &g ); bli_printm( "g (after mktrim):", &g, "%4.1f", "" ); // Free the objects. bli_obj_free( &norm1 ); bli_obj_free( &normi ); bli_obj_free( &normf ); bli_obj_free( &x ); bli_obj_free( &y ); bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &d ); bli_obj_free( &e ); bli_obj_free( &f ); bli_obj_free( &g ); return 0; } // ----------------------------------------------------------------------------- blis-0.6.1/examples/oapi/11gemm_md.c000066400000000000000000000210271360743507500171440ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name of The University of Texas nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "blis.h" int main( int argc, char** argv ) { num_t dt_r, dt_c; num_t dt_s, dt_d; num_t dt_a, dt_b; dim_t m, n, k; inc_t rs, cs; obj_t a, b, c; obj_t* alpha; obj_t* beta; // // This file demonstrates mixing datatypes in gemm. // // NOTE: Please make sure that mixed datatype support is enabled in BLIS // before proceeding to build and run the example binaries. If you're not // sure whether mixed datatype support is enabled in BLIS, please refer // to './configure --help' for the relevant options. // // // Example 1: Perform a general matrix-matrix multiply (gemm) operation // with operands of different domains (but identical precisions). // printf( "\n#\n# -- Example 1 --\n#\n\n" ); // Create some matrix operands to work with. dt_r = BLIS_DOUBLE; dt_c = BLIS_DCOMPLEX; m = 4; n = 5; k = 1; rs = 0; cs = 0; bli_obj_create( dt_c, m, n, rs, cs, &c ); bli_obj_create( dt_r, m, k, rs, cs, &a ); bli_obj_create( dt_c, k, n, rs, cs, &b ); // Set the scalars to use. alpha = &BLIS_ONE; beta = &BLIS_ONE; // Initialize the matrix operands. bli_randm( &a ); bli_randm( &b ); bli_setm( &BLIS_ZERO, &c ); bli_printm( "a (double real): randomized", &a, "%4.1f", "" ); bli_printm( "b (double complex): randomized", &b, "%4.1f", "" ); bli_printm( "c (double complex): initial value", &c, "%4.1f", "" ); // c := beta * c + alpha * a * b, where 'a' is real, and 'b' and 'c' are // complex. bli_gemm( alpha, &a, &b, beta, &c ); bli_printm( "c (double complex): after gemm", &c, "%4.1f", "" ); // Free the objects. bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); // // Example 2: Perform a general matrix-matrix multiply (gemm) operation // with operands of different precisions (but identical domains). // printf( "\n#\n# -- Example 2 --\n#\n\n" ); // Create some matrix operands to work with. dt_s = BLIS_FLOAT; dt_d = BLIS_DOUBLE; m = 4; n = 5; k = 1; rs = 0; cs = 0; bli_obj_create( dt_d, m, n, rs, cs, &c ); bli_obj_create( dt_s, m, k, rs, cs, &a ); bli_obj_create( dt_s, k, n, rs, cs, &b ); // Notice that we've chosen C to be double-precision real and A and B to be // single-precision real. // Since we are mixing precisions, we will also need to specify the // so-called "computation precision." That is, we need to signal to // bli_gemm() whether we want the A*B product to be computed in single // precision or double precision (prior to the result being accumulated // back to C). To specify the computation precision, we need to set the // corresponding bit in the C object. Here, we specify double-precision // computation. // NOTE: If you do not explicitly specify the computation precision, it // will default to the storage precision of the C object. bli_obj_set_comp_prec( BLIS_DOUBLE_PREC, &c ); // Initialize the matrix operands. bli_randm( &a ); bli_randm( &b ); bli_setm( &BLIS_ZERO, &c ); bli_printm( "a (single real): randomized", &a, "%4.1f", "" ); bli_printm( "b (single real): randomized", &b, "%4.1f", "" ); bli_printm( "c (double real): initial value", &c, "%4.1f", "" ); // c := beta * c + alpha * a * b, where 'a' and 'b' are single-precision // real, 'c' is double-precision real, and the matrix product is performed // in double-precision arithmetic. bli_gemm( alpha, &a, &b, beta, &c ); bli_printm( "c (double real): after gemm (exec prec = double precision)", &c, "%4.1f", "" ); // Free the objects. bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); // // Example 3: Perform a general matrix-matrix multiply (gemm) operation // with operands of different domains AND precisions. // printf( "\n#\n# -- Example 3 --\n#\n\n" ); // Create some matrix operands to work with. dt_a = BLIS_FLOAT; dt_b = BLIS_DCOMPLEX; dt_c = BLIS_SCOMPLEX; m = 4; n = 5; k = 1; rs = 0; cs = 0; bli_obj_create( dt_c, m, n, rs, cs, &c ); bli_obj_create( dt_a, m, k, rs, cs, &a ); bli_obj_create( dt_b, k, n, rs, cs, &b ); // Notice that we've chosen C to be single-precision complex, and A to be // single-precision real, and B to be double-precision complex. // Set the computation precision to single precision this time. bli_obj_set_comp_prec( BLIS_SINGLE_PREC, &c ); // Initialize the matrix operands. bli_randm( &a ); bli_randm( &b ); bli_setm( &BLIS_ZERO, &c ); bli_printm( "a (single real): randomized", &a, "%4.1f", "" ); bli_printm( "b (double complex): randomized", &b, "%4.1f", "" ); bli_printm( "c (single complex): initial value", &c, "%4.1f", "" ); // c := beta * c + alpha * a * b, where 'a' is single-precision real, 'b' // is double-precision complex, 'c' is single-precision complex, and the // matrix product is performed in single-precision arithmetic. bli_gemm( alpha, &a, &b, beta, &c ); bli_printm( "c (single complex): after gemm (exec prec = single precision)", &c, "%4.1f", "" ); // Free the objects. bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); // // Example 4: Project objects between the real and complex domains. // printf( "\n#\n# -- Example 4 --\n#\n\n" ); // Create some matrix operands to work with. dt_r = BLIS_DOUBLE; dt_c = BLIS_DCOMPLEX; m = 4; n = 5; rs = 0; cs = 0; bli_obj_create( dt_r, m, n, rs, cs, &a ); bli_obj_create( dt_c, m, n, rs, cs, &b ); // Initialize a real matrix A. bli_randm( &a ); bli_printm( "a (double real): randomized", &a, "%4.1f", "" ); // Project real matrix A to the complex domain (in B). bli_projm( &a, &b ); bli_printm( "b (double complex): projected from 'a'", &b, "%4.1f", "" ); // Notice how the imaginary components in B are zero since any real // matrix implicitly has imaginary values that are equal to zero. // Now let's project in the other direction. // Initialize the complex matrix B. bli_randm( &b ); bli_printm( "b (double complex): randomized", &b, "%4.1f", "" ); // Project complex matrix B to the real domain (in A). bli_projm( &b, &a ); bli_printm( "a (double real): projected from 'b'", &a, "%4.1f", "" ); // Notice how the imaginary components are lost in the projection from // the complex domain to the real domain. // Free the objects. bli_obj_free( &a ); bli_obj_free( &b ); // // Example 5: Typecast objects between the single and double precisions. // printf( "\n#\n# -- Example 5 --\n#\n\n" ); // Create some matrix operands to work with. dt_s = BLIS_FLOAT; dt_d = BLIS_DOUBLE; m = 4; n = 3; rs = 0; cs = 0; bli_obj_create( dt_d, m, n, rs, cs, &a ); bli_obj_create( dt_s, m, n, rs, cs, &b ); // Initialize a double-precision real matrix A. bli_randm( &a ); bli_printm( "a (double real): randomized", &a, "%23.16e", "" ); // Typecast A to single precision. bli_castm( &a, &b ); bli_printm( "b (single real): typecast from 'a'", &b, "%23.16e", "" ); // Notice how the values in B are only accurate to the 6th or 7th decimal // place relative to the true values in A. // Free the objects. bli_obj_free( &a ); bli_obj_free( &b ); return 0; } blis-0.6.1/examples/oapi/Makefile000066400000000000000000000122271360743507500166730ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # # Makefile # # Field G. Van Zee # # Makefile for BLIS object API example code. # # # --- Makefile PHONY target definitions ---------------------------------------- # .PHONY: all bin clean run # # --- Determine makefile fragment location ------------------------------------- # # Comments: # - DIST_PATH is assumed to not exist if BLIS_INSTALL_PATH is given. # - We must use recursively expanded assignment for LIB_PATH and INC_PATH in # the second case because CONFIG_NAME is not yet set. ifneq ($(strip $(BLIS_INSTALL_PATH)),) LIB_PATH := $(BLIS_INSTALL_PATH)/lib INC_PATH := $(BLIS_INSTALL_PATH)/include/blis SHARE_PATH := $(BLIS_INSTALL_PATH)/share/blis else DIST_PATH := ../.. LIB_PATH = ../../lib/$(CONFIG_NAME) INC_PATH = ../../include/$(CONFIG_NAME) SHARE_PATH := ../.. endif #ifneq ($(strip $(BLIS_LIB_PATH)),) #LIB_PATH := $(BLIS_LIB_PATH) #endif # #ifneq ($(strip $(BLIS_INC_PATH)),) #INC_PATH := $(BLIS_INC_PATH) #endif # #ifneq ($(strip $(BLIS_SHARE_PATH)),) #SHARE_PATH := $(BLIS_SHARE_PATH) #endif # # --- Include common makefile definitions -------------------------------------- # # Include the common makefile fragment. -include $(SHARE_PATH)/common.mk # # --- General build definitions ------------------------------------------------ # TEST_SRC_PATH := . TEST_OBJ_PATH := . # Gather all local object files. TEST_OBJS := $(sort $(patsubst $(TEST_SRC_PATH)/%.c, \ $(TEST_OBJ_PATH)/%.o, \ $(wildcard $(TEST_SRC_PATH)/*.c))) # Override the value of CINCFLAGS so that the value of CFLAGS returned by # get-user-cflags-for() is not cluttered up with include paths needed only # while building BLIS. CINCFLAGS := -I$(INC_PATH) # Use the "framework" CFLAGS for the configuration family. CFLAGS := $(call get-user-cflags-for,$(CONFIG_NAME)) # Add local header paths to CFLAGS CFLAGS += -I$(TEST_SRC_PATH) # Locate the libblis library to which we will link. #LIBBLIS_LINK := $(LIB_PATH)/$(LIBBLIS_L) # Binary executable name. TEST_BINS := 00obj_basic.x \ 01obj_attach.x \ 02obj_ij.x \ 03obj_view.x \ 04level0.x \ 05level1v.x \ 06level1m.x \ 07level1m_diag.x \ 08level2.x \ 09level3.x \ 10util.x \ 11gemm_md.x # # --- Targets/rules ------------------------------------------------------------ # # --- Primary targets --- all: bin bin: $(TEST_BINS) # --- Environment check rules --- check-env: check-env-make-defs check-env-fragments check-env-config-mk check-env-config-mk: ifeq ($(CONFIG_MK_PRESENT),no) $(error Cannot proceed: config.mk not detected! Run configure first) endif check-env-make-defs: check-env-fragments ifeq ($(MAKE_DEFS_MK_PRESENT),no) $(error Cannot proceed: make_defs.mk not detected! Invalid configuration) endif # --Object file rules -- $(TEST_OBJ_PATH)/%.o: $(TEST_SRC_PATH)/%.c $(LIBBLIS_LINK) ifeq ($(ENABLE_VERBOSE),yes) $(CC) $(CFLAGS) -c $< -o $@ else @echo "Compiling $@" @$(CC) $(CFLAGS) -c $< -o $@ endif # -- Executable file rules -- %.x: %.o $(LIBBLIS_LINK) ifeq ($(ENABLE_VERBOSE),yes) $(LINKER) $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@ else @echo "Linking $@ against '$(LIBBLIS_LINK) $(LDFLAGS)'" @$(LINKER) $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@ endif # -- Test run rules -- #run: $(TEST_BIN) # ./$(TEST_BIN) # -- Clean rules -- clean: - $(RM_F) $(TEST_OBJS) $(TEST_BINS) blis-0.6.1/examples/oapi/README000066400000000000000000000031131360743507500161050ustar00rootroot00000000000000 BLIS object API examples ------------------------ This directory contains several files, each containing various pieces of example code that demonstrate core functionality of the object API in BLIS. These example files should be thought of collectively like a tutorial, and therefore it is recommended to start from the beginning (the file that starts in '00'). You can build all of the examples by simply running 'make' from this directory. (You can also run 'make clean'.) The makefile assumes that you've already configured and built (but not necessarily installed) BLIS two directories up, in "../..". If you have already installed BLIS to some permanent directory, you may refer to that installation by setting the environment variable BLIS_INSTALL_PATH prior to running make: export BLIS_INSTALL_PATH=/usr/local; make or by setting the same variable as part of the make command: make BLIS_INSTALL_PATH=/usr/local Once the executable files have been built, we recommend reading the code in one terminal window alongside the executable output in another. This will help you see the effects of each section of code. This tutorial is not exhaustive or complete; several object API functions were omitted (mostly for brevity's sake) and thus more examples could be written. If you've found object functionality in BLIS and are unsure how to use it, or if you are unsure of what additional functionality is present in BLIS, please feel free to join and then start a discussion on the blis-devel mailing list [1]. Thanks for your interest in BLIS! [1] https://groups.google.com/d/forum/blis-devel blis-0.6.1/examples/tapi/000077500000000000000000000000001360743507500152345ustar00rootroot00000000000000blis-0.6.1/examples/tapi/00level1v.c000066400000000000000000000134761360743507500171310ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name of The University of Texas nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "blis.h" int main( int argc, char** argv ) { double* x; double* y; double* z; double* w; double* a; double alpha, beta, gamma; dim_t m, n; inc_t rs, cs; // Initialize some basic constants. double zero = 0.0; double one = 1.0; double minus_one = -1.0; // // This file demonstrates working with vectors and the level-1v // operations. // // // Example 1: Create vectors and then broadcast (copy) scalar // values to all elements. // printf( "\n#\n# -- Example 1 --\n#\n\n" ); // Create a few vectors to work with. We make them all of the same length // so that we can perform operations between them. // NOTE: We've chosen to use row vectors here (1x4) instead of column // vectors (4x1) to allow for easier reading of standard output (less // scrolling). m = 1; n = 4; rs = n; cs = 1; x = malloc( m * n * sizeof( double ) ); y = malloc( m * n * sizeof( double ) ); z = malloc( m * n * sizeof( double ) ); w = malloc( m * n * sizeof( double ) ); a = malloc( m * n * sizeof( double ) ); // Let's initialize some scalars. alpha = 2.0; beta = 0.2; gamma = 3.0; printf( "alpha:\n%4.1f\n\n", alpha ); printf( "beta:\n%4.1f\n\n", beta ); printf( "gamma:\n%4.1f\n\n", gamma ); printf( "\n" ); bli_dsetv( BLIS_NO_CONJUGATE, n, &one, x, 1 ); bli_dsetv( BLIS_NO_CONJUGATE, n, &alpha, y, 1 ); bli_dsetv( BLIS_NO_CONJUGATE, n, &zero, z, 1 ); // Note that we can use printv or printm to print vectors since vectors // are also matrices. We choose to use printm because it honors the // orientation of the vector (row or column) when printing, whereas // printv always prints vectors as column vectors regardless of their // they are 1 x n or n x 1. bli_dprintm( "x := 1.0", m, n, x, rs, cs, "%4.1f", "" ); bli_dprintm( "y := alpha", m, n, y, rs, cs, "%4.1f", "" ); bli_dprintm( "z := 0.0", m, n, z, rs, cs, "%4.1f", "" ); // // Example 2: Randomize a vector. // printf( "\n#\n# -- Example 2 --\n#\n\n" ); // Set a vector to random values. bli_drandv( n, w, 1 ); bli_dprintm( "x := randv()", m, n, w, rs, cs, "%4.1f", "" ); // // Example 3: Perform various element-wise operations on vectors. // printf( "\n#\n# -- Example 3 --\n#\n\n" ); // Copy a vector. bli_dcopyv( BLIS_NO_CONJUGATE, n, w, 1, a, 1 ); bli_dprintm( "a := w", m, n, a, rs, cs, "%4.1f", "" ); // Add and subtract vectors. bli_daddv( BLIS_NO_CONJUGATE, n, y, 1, a, 1 ); bli_dprintm( "a := a + y", m, n, a, rs, cs, "%4.1f", "" ); bli_dsubv( BLIS_NO_CONJUGATE, n, w, 1, a, 1 ); bli_dprintm( "a := a + w", m, n, a, rs, cs, "%4.1f", "" ); // Scale a vector (destructive). bli_dscalv( BLIS_NO_CONJUGATE, n, &beta, a, 1 ); bli_dprintm( "a := beta * a", m, n, a, rs, cs, "%4.1f", "" ); // Scale a vector (non-destructive). bli_dscal2v( BLIS_NO_CONJUGATE, n, &gamma, a, 1, z, 1 ); bli_dprintm( "z := gamma * a", m, n, z, rs, cs, "%4.1f", "" ); // Scale and accumulate between vectors. bli_daxpyv( BLIS_NO_CONJUGATE, n, &alpha, w, 1, x, 1 ); bli_dprintm( "x := x + alpha * w", m, n, x, rs, cs, "%4.1f", "" ); bli_dxpbyv( BLIS_NO_CONJUGATE, n, w, 1, &minus_one, x, 1 ); bli_dprintm( "x := -1.0 * x + w", m, n, x, rs, cs, "%4.1f", "" ); // Invert a vector element-wise. bli_dinvertv( n, y, 1 ); bli_dprintm( "y := 1 / y", m, n, y, rs, cs, "%4.1f", "" ); // Swap two vectors. bli_dswapv( n, x, 1, y, 1 ); bli_dprintm( "x (after swapping with y)", m, n, x, rs, cs, "%4.1f", "" ); bli_dprintm( "y (after swapping with x)", m, n, y, rs, cs, "%4.1f", "" ); // // Example 4: Perform contraction-like operations on vectors. // printf( "\n#\n# -- Example 4 --\n#\n\n" ); // Perform a dot product. bli_ddotv( BLIS_NO_CONJUGATE, BLIS_NO_CONJUGATE, n, a, 1, z, 1, &gamma ); printf( "gamma := a * z (dot product):\n%5.2f\n\n", gamma ); // Perform an extended dot product. bli_ddotxv( BLIS_NO_CONJUGATE, BLIS_NO_CONJUGATE, n, &alpha, a, 1, z, 1, &one, &gamma ); printf( "gamma := 1.0 * gamma + alpha * a * z (accumulate scaled dot product):\n%5.2f\n\n", gamma ); // Free the memory obtained via malloc(). free( x ); free( y ); free( z ); free( w ); free( z ); return 0; } // ----------------------------------------------------------------------------- blis-0.6.1/examples/tapi/01level1m.c000066400000000000000000000156131360743507500171140ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name of The University of Texas nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "blis.h" int main( int argc, char** argv ) { double* a; double* b; double* c; double* d; double* e; double* f; dcomplex* g; dcomplex* h; double alpha, beta, gamma; dim_t m, n; inc_t rs, cs; // Initialize some basic constants. double zero = 0.0; double one = 1.0; double minus_one = -1.0; dcomplex minus_one_z = {-1.0, 0.0}; // // This file demonstrates working with matrices and the level-1m // operations. // // // Example 1: Create matrices and then broadcast (copy) scalar // values to all elements. // printf( "\n#\n# -- Example 1 --\n#\n\n" ); // Create a few matrices to work with. We make them all of the same // dimensions so that we can perform operations between them. m = 2; n = 3; rs = 1; cs = m; a = malloc( m * n * sizeof( double ) ); b = malloc( m * n * sizeof( double ) ); c = malloc( m * n * sizeof( double ) ); d = malloc( m * n * sizeof( double ) ); e = malloc( m * n * sizeof( double ) ); // Let's initialize some scalars. alpha = 2.0; beta = 0.2; gamma = 3.0; printf( "alpha:\n%4.1f\n\n", alpha ); printf( "beta:\n%4.1f\n\n", beta ); printf( "gamma:\n%4.1f\n\n", gamma ); printf( "\n" ); // Matrices, like vectors, can set by "broadcasting" a constant to every // element. Note that the second argument (0) is the diagonal offset. // The diagonal offset is only used when the uplo value is something other // than BLIS_DENSE (e.g. BLIS_LOWER or BLIS_UPPER). bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, m, n, &one, a, rs, cs ); bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, m, n, &alpha, b, rs, cs ); bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, m, n, &zero, c, rs, cs ); bli_dprintm( "a := 1.0", m, n, a, rs, cs, "%4.1f", "" ); bli_dprintm( "b := alpha", m, n, b, rs, cs, "%4.1f", "" ); bli_dprintm( "c := 0.0", m, n, c, rs, cs, "%4.1f", "" ); // // Example 2: Randomize a matrix object. // printf( "\n#\n# -- Example 2 --\n#\n\n" ); bli_drandm( 0, BLIS_DENSE, m, n, e, rs, cs ); bli_dprintm( "e (randomized):", m, n, e, rs, cs, "%4.1f", "" ); // // Example 3: Perform element-wise operations on matrices. // printf( "\n#\n# -- Example 3 --\n#\n\n" ); // Copy a matrix. bli_dcopym( 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, BLIS_NO_TRANSPOSE, m, n, e, rs, cs, d, rs, cs ); bli_dprintm( "d := e", m, n, d, rs, cs, "%4.1f", "" ); // Add and subtract vectors. bli_daddm( 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, BLIS_NO_TRANSPOSE, m, n, a, rs, cs, d, rs, cs ); bli_dprintm( "d := d + a", m, n, d, rs, cs, "%4.1f", "" ); bli_dsubm( 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, BLIS_NO_TRANSPOSE, m, n, a, rs, cs, e, rs, cs ); bli_dprintm( "e := e - a", m, n, e, rs, cs, "%4.1f", "" ); // Scale a matrix (destructive). bli_dscalm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, m, n, &alpha, e, rs, cs ); bli_dprintm( "e := alpha * e", m, n, e, rs, cs, "%4.1f", "" ); // Scale a matrix (non-destructive). bli_dscal2m( 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, BLIS_NO_TRANSPOSE, m, n, &beta, e, rs, cs, c, rs, cs ); bli_dprintm( "c := beta * e", m, n, c, rs, cs, "%4.1f", "" ); // Scale and accumulate between matrices. bli_daxpym( 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, BLIS_NO_TRANSPOSE, m, n, &alpha, a, rs, cs, c, rs, cs ); bli_dprintm( "c := alpha * a", m, n, c, rs, cs, "%4.1f", "" ); // // Example 4: Copy and transpose a matrix. // printf( "\n#\n# -- Example 4 --\n#\n\n" ); // Create an n-by-m matrix into which we can copy-transpose an m-by-n // matrix. f = malloc( n * m * sizeof( double ) ); dim_t rsf = 1, csf = n; // Initialize all of 'f' to -1.0 to simulate junk values. bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, n, m, &minus_one, f, rsf, csf ); bli_dprintm( "e:", m, n, e, rs, cs, "%4.1f", "" ); bli_dprintm( "f (initial value):", n, m, f, rsf, csf, "%4.1f", "" ); // Copy 'e' to 'f', transposing 'e' in the process. Notice that we haven't // modified any properties of 'd'. It's the source operand that matters // when marking an operand for transposition, not the destination. bli_dcopym( 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, BLIS_TRANSPOSE, n, m, e, rs, cs, f, rsf, csf ); bli_dprintm( "f (copied value):", n, m, f, rsf, csf, "%4.1f", "" ); // // Example 5: Copy and Hermitian-transpose a matrix. // printf( "\n#\n# -- Example 5 --\n#\n\n" ); g = malloc( m * n * sizeof(dcomplex) ); h = malloc( n * m * sizeof(dcomplex) ); bli_zrandm( 0, BLIS_DENSE, m, n, g, rs, cs ); bli_zsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, n, m, &minus_one_z, h, rsf, csf ); bli_zprintm( "g:", m, n, g, rs, cs, "%4.1f", "" ); bli_zprintm( "h (initial value):", n, m, h, rsf, csf, "%4.1f", "" ); bli_zcopym( 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, BLIS_CONJ_TRANSPOSE, n, m, g, rs, cs, h, rsf, csf ); bli_zprintm( "h (copied value):", n, m, h, rsf, csf, "%4.1f", "" ); // Free the memory obtained via malloc(). free( a ); free( b ); free( c ); free( d ); free( e ); free( f ); free( g ); free( h ); return 0; } // ----------------------------------------------------------------------------- blis-0.6.1/examples/tapi/02level1m_diag.c000066400000000000000000000173311360743507500201000ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name of The University of Texas nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "blis.h" int main( int argc, char** argv ) { double* a; double* b; double* c; double* d; double* e; double* h; dim_t m, n; inc_t rs, cs; // Initialize some basic constants. double zero = 0.0; double minus_one = -1.0; // // This file demonstrates level-1m operations on structured matrices. // // // Example 1: Initialize the upper triangle of a matrix to random values. // printf( "\n#\n# -- Example 1 --\n#\n\n" ); // Create a matrix to work with. m = 5; n = 5; rs = 1; cs = m; a = malloc( m * n * sizeof( double ) ); // Set the upper triangle to random values. bli_drandm( 0, BLIS_UPPER, m, n, a, rs, cs ); bli_dprintm( "a: randomize upper part (lower part may contain garbage)", m, n, a, rs, cs, "%4.1f", "" ); // // Example 2: Initialize the upper triangle of a matrix to random values // but also explicitly set the strictly lower triangle to zero. // printf( "\n#\n# -- Example 2 --\n#\n\n" ); // Create a matrix to work with. m = 5; n = 5; rs = 1; cs = m; b = malloc( m * n * sizeof( double ) ); // Set the upper triangle to random values. bli_drandm( 0, BLIS_UPPER, m, n, b, rs, cs ); // Set the strictly lower triangle of 'b' to zero (by setting the lower // triangle of 'bl' to zero). bli_dsetm( BLIS_NO_CONJUGATE, -1, BLIS_NONUNIT_DIAG, BLIS_LOWER, m, n, &zero, b, rs, cs ); bli_dprintm( "b: randomize upper part; set strictly lower part to 0.0)", m, n, b, rs, cs, "%4.1f", "" ); // You may not see the effect of setting the strictly lower part to zero, // since those values may already be zero (instead of random junk). So // let's set it to something you'll notice, like -1.0. bli_dsetm( BLIS_NO_CONJUGATE, -1, BLIS_NONUNIT_DIAG, BLIS_LOWER, m, n, &minus_one, b, rs, cs ); bli_dprintm( "b: randomize upper part; set strictly lower part to -1.0)", m, n, b, rs, cs, "%4.1f", "" ); // // Example 3: Copy the lower triangle of an existing matrix to a newly // created (but otherwise uninitialized) matrix. // printf( "\n#\n# -- Example 3 --\n#\n\n" ); // Create a matrix to work with. m = 5; n = 5; rs = 1; cs = m; c = malloc( m * n * sizeof( double ) ); bli_dcopym( 0, BLIS_NONUNIT_DIAG, BLIS_LOWER, BLIS_NO_TRANSPOSE, m, n, b, rs, cs, c, rs, cs ); bli_dprintm( "c: copy lower part of b (upper part may contain garbage)", m, n, c, rs, cs, "%4.1f", "" ); bli_dcopym( 0, BLIS_NONUNIT_DIAG, BLIS_LOWER, BLIS_NO_TRANSPOSE, m, n, b, rs, cs, a, rs, cs ); bli_dprintm( "a: copy lower triangle of b to upper triangular a", m, n, a, rs, cs, "%4.1f", "" ); // // Example 4: Copy the lower triangle of an existing object into the // upper triangle of an existing object. // printf( "\n#\n# -- Example 4 --\n#\n\n" ); // Create a matrix to work with. m = 5; n = 5; rs = 1; cs = m; d = malloc( m * n * sizeof( double ) ); // Let's start by setting entire destination matrix to zero. bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, m, n, &zero, d, rs, cs ); bli_dprintm( "d: initial value (all zeros)", m, n, d, rs, cs, "%4.1f", "" ); // Let's change a few values of b manually so we can later see the full // effect of the transposition. bli_dsetijm( 2.0, 0.0, 2, 0, b, rs, cs ); bli_dsetijm( 3.0, 0.0, 3, 0, b, rs, cs ); bli_dsetijm( 4.0, 0.0, 4, 0, b, rs, cs ); bli_dsetijm( 3.1, 0.0, 2, 1, b, rs, cs ); bli_dsetijm( 3.2, 0.0, 3, 2, b, rs, cs ); bli_dprintm( "b:", m, n, b, rs, cs, "%4.1f", "" ); bli_dcopym( 0, BLIS_NONUNIT_DIAG, BLIS_LOWER, BLIS_TRANSPOSE, m, n, b, rs, cs, d, rs, cs ); bli_dprintm( "d: transpose of lower triangle of b copied to d", m, n, d, rs, cs, "%4.1f", "" ); // // Example 5: Create a rectangular matrix (m > n) with a lower trapezoid // containing random values, then set the strictly upper // triangle to zeros. // printf( "\n#\n# -- Example 5 --\n#\n\n" ); // Create a matrix to work with. m = 6; n = 4; rs = 1; cs = m; e = malloc( m * n * sizeof( double ) ); // Initialize the entire matrix to -1.0 to simulate junk values. bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, m, n, &minus_one, e, rs, cs ); bli_dprintm( "e: initial value (all -1.0)", m, n, e, rs, cs, "%4.1f", "" ); // Randomize the lower trapezoid. bli_drandm( 0, BLIS_LOWER, m, n, e, rs, cs ); bli_dprintm( "e: after lower trapezoid randomized", m, n, e, rs, cs, "%4.1f", "" ); // Set the upper triangle to zero. bli_dsetm( BLIS_NO_CONJUGATE, 1, BLIS_NONUNIT_DIAG, BLIS_UPPER, m, n, &zero, e, rs, cs ); bli_dprintm( "e: after upper triangle set to zero", m, n, e, rs, cs, "%4.1f", "" ); // // Example 6: Create an upper Hessenberg matrix of random values and then // set the "unstored" values to zero. // printf( "\n#\n# -- Example 6 --\n#\n\n" ); // Create a matrix to work with. m = 5; n = 5; rs = 1; cs = m; h = malloc( m * n * sizeof( double ) ); // Initialize the entire matrix to -1.0 to simulate junk values. bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, m, n, &minus_one, h, rs, cs ); bli_dprintm( "h: initial value (all -1.0)", m, n, h, rs, cs, "%4.1f", "" ); // Randomize the elements on and above the first subdiagonal. bli_drandm( -1, BLIS_UPPER, m, n, h, rs, cs ); bli_dprintm( "h: after randomizing above first subdiagonal", m, n, h, rs, cs, "%4.1f", "" ); // Set the region strictly below the first subdiagonal (on or below // the second subdiagonal) to zero. bli_dsetm( BLIS_NO_CONJUGATE, -2, BLIS_NONUNIT_DIAG, BLIS_LOWER, m, n, &zero, h, rs, cs ); bli_dprintm( "h: after setting elements below first subdiagonal to zero", m, n, h, rs, cs, "%4.1f", "" ); // Free the memory obtained via malloc(). free( a ); free( b ); free( c ); free( d ); free( e ); free( h ); return 0; } // ----------------------------------------------------------------------------- blis-0.6.1/examples/tapi/03level2.c000066400000000000000000000230641360743507500167410ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name of The University of Texas nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "blis.h" int main( int argc, char** argv ) { double* a; double* x; double* y; double* b; double alpha, beta; dim_t m, n; inc_t rs, cs; // Initialize some basic constants. double zero = 0.0; double one = 1.0; double two = 2.0; double minus_one = -1.0; // // This file demonstrates level-2 operations. // // // Example 1: Perform a general rank-1 update (ger) operation. // printf( "\n#\n# -- Example 1 --\n#\n\n" ); // Create some matrix and vector operands to work with. m = 4; n = 5; rs = 1; cs = m; a = malloc( m * n * sizeof( double ) ); x = malloc( m * 1 * sizeof( double ) ); y = malloc( 1 * n * sizeof( double ) ); // Let's initialize some scalars. alpha = 1.0; // Initialize vectors 'x' and 'y'. bli_drandv( m, x, 1 ); bli_dsetv( BLIS_NO_CONJUGATE, n, &minus_one, y, 1 ); // Initialize 'a' to 1.0. bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, m, n, &one, a, rs, cs ); bli_dprintm( "x: set to random values", m, 1, x, 1, m, "%4.1f", "" ); bli_dprintm( "y: set to -1.0", 1, n, y, n, 1, "%4.1f", "" ); bli_dprintm( "a: intial value", m, n, a, rs, cs, "%4.1f", "" ); // a := a + alpha * x * y, where 'a' is general. bli_dger( BLIS_NO_CONJUGATE, BLIS_NO_CONJUGATE, m, n, &alpha, x, 1, y, 1, a, rs, cs ); bli_dprintm( "a: after ger", m, n, a, rs, cs, "%4.1f", "" ); // Free the memory obtained via malloc(). free( a ); free( x ); free( y ); // // Example 2: Perform a general matrix-vector multiply (gemv) operation. // printf( "\n#\n# -- Example 2 --\n#\n\n" ); // Create some matrix and vector operands to work with. m = 4; n = 5; rs = 1; cs = m; a = malloc( m * n * sizeof( double ) ); x = malloc( 1 * n * sizeof( double ) ); y = malloc( 1 * m * sizeof( double ) ); // Set the scalars to use. alpha = 1.0; beta = 1.0; // Initialize vectors 'x' and 'y'. bli_dsetv( BLIS_NO_CONJUGATE, n, &one, x, 1 ); bli_dsetv( BLIS_NO_CONJUGATE, m, &zero, y, 1 ); // Randomize 'a'. bli_drandm( 0, BLIS_DENSE, m, n, a, rs, cs ); bli_dprintm( "a: randomized", m, n, a, rs, cs, "%4.1f", "" ); bli_dprintm( "x: set to 1.0", 1, n, x, n, 1, "%4.1f", "" ); bli_dprintm( "y: intial value", 1, m, y, m, 1, "%4.1f", "" ); // y := beta * y + alpha * a * x, where 'a' is general. bli_dgemv( BLIS_NO_TRANSPOSE, BLIS_NO_CONJUGATE, m, n, &alpha, a, rs, cs, x, 1, &beta, y, 1 ); bli_dprintm( "y: after gemv", 1, m, y, m, 1, "%4.1f", "" ); // Free the memory obtained via malloc(). free( a ); free( x ); free( y ); // // Example 3: Perform a symmetric rank-1 update (syr) operation. // printf( "\n#\n# -- Example 3 --\n#\n\n" ); // Create some matrix and vector operands to work with. m = 5; rs = 1; cs = 5; a = malloc( m * m * sizeof( double ) ); x = malloc( 1 * m * sizeof( double ) ); // Set alpha. alpha = 1.0; // Initialize vector 'x'. bli_drandv( m, x, 1 ); // Zero out all of matrix 'a'. This is optional, but will avoid possibly // displaying junk values in the unstored triangle. bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, m, m, &zero, a, rs, cs ); // Randomize the lower triangle of 'a'. bli_drandm( 0, BLIS_LOWER, m, m, a, rs, cs ); bli_dprintm( "x: set to random values", 1, m, x, m, 1, "%4.1f", "" ); bli_dprintm( "a: initial value (zeros in upper triangle)", m, m, a, 1, m, "%4.1f", "" ); // a := a + alpha * x * x^T, where 'a' is symmetric and lower-stored. bli_dsyr( BLIS_LOWER, BLIS_NO_CONJUGATE, m, &alpha, x, 1, a, rs, cs ); bli_dprintm( "a: after syr", m, m, a, 1, m, "%4.1f", "" ); // Free the memory obtained via malloc(). free( a ); free( x ); // // Example 4: Perform a symmetric matrix-vector multiply (symv) operation. // printf( "\n#\n# -- Example 4 --\n#\n\n" ); // Create some matrix and vector operands to work with. m = 5;; rs = 1; cs = m; a = malloc( m * m * sizeof( double ) ); x = malloc( 1 * m * sizeof( double ) ); y = malloc( 1 * m * sizeof( double ) ); // Set the scalars to use. alpha = 1.0; beta = 1.0; // Initialize vectors 'x' and 'y'. bli_dsetv( BLIS_NO_CONJUGATE, m, &one, x, 1 ); bli_dsetv( BLIS_NO_CONJUGATE, m, &zero, y, 1 ); // Zero out all of matrix 'a'. This is optional, but will avoid possibly // displaying junk values in the unstored triangle. bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, m, m, &zero, a, rs, cs ); // Randomize 'a'. bli_drandm( 0, BLIS_UPPER, m, m, a, rs, cs ); bli_dprintm( "a: randomized (zeros in lower triangle)", m, m, a, rs, cs, "%4.1f", "" ); bli_dprintm( "x: set to 1.0", 1, m, x, m, 1, "%4.1f", "" ); bli_dprintm( "y: intial value", 1, m, y, m, 1, "%4.1f", "" ); // y := beta * y + alpha * a * x, where 'a' is symmetric and upper-stored. bli_dsymv( BLIS_UPPER, BLIS_NO_TRANSPOSE, BLIS_NO_CONJUGATE, m, &alpha, a, rs, cs, x, 1, &beta, y, 1 ); bli_dprintm( "y: after symv", 1, m, y, m, 1, "%4.1f", "" ); // Free the memory obtained via malloc(). free( a ); free( x ); free( y ); // // Example 5: Perform a triangular matrix-vector multiply (trmv) operation. // printf( "\n#\n# -- Example 5 --\n#\n\n" ); // Create some matrix and vector operands to work with. m = 5;; rs = 1; cs = m; a = malloc( m * m * sizeof( double ) ); x = malloc( 1 * m * sizeof( double ) ); // Set the scalars to use. alpha = 1.0; // Initialize vector 'x'. bli_dsetv( BLIS_NO_CONJUGATE, m, &one, x, 1 ); // Zero out all of matrix 'a'. This is optional, but will avoid possibly // displaying junk values in the unstored triangle. bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, m, m, &zero, a, rs, cs ); // Randomize 'a'. bli_drandm( 0, BLIS_LOWER, m, m, a, rs, cs ); bli_dprintm( "a: randomized (zeros in upper triangle)", m, m, a, rs, cs, "%4.1f", "" ); bli_dprintm( "x: intial value", 1, m, x, m, 1, "%4.1f", "" ); // x := alpha * a * x, where 'a' is triangular and lower-stored. bli_dtrmv( BLIS_LOWER, BLIS_NO_TRANSPOSE, BLIS_NONUNIT_DIAG, m, &alpha, a, rs, cs, x, 1 ); bli_dprintm( "x: after trmv", 1, m, x, m, 1, "%4.1f", "" ); // Free the memory obtained via malloc(). free( a ); free( x ); // // Example 6: Perform a triangular solve (trsv) operation. // printf( "\n#\n# -- Example 6 --\n#\n\n" ); // Create some matrix and vector operands to work with. m = 5;; rs = 1; cs = m; a = malloc( m * m * sizeof( double ) ); b = malloc( 1 * m * sizeof( double ) ); y = malloc( 1 * m * sizeof( double ) ); // Set the scalars to use. alpha = 1.0; // Initialize vector 'x'. bli_dsetv( BLIS_NO_CONJUGATE, m, &one, b, 1 ); // Zero out all of matrix 'a'. This is optional, but will avoid possibly // displaying junk values in the unstored triangle. bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, m, m, &zero, a, rs, cs ); // Randomize 'a'. bli_drandm( 0, BLIS_LOWER, m, m, a, rs, cs ); // Load the diagonal. By setting the diagonal to something of greater // absolute value than the off-diagonal elements, we increase the odds // that the matrix is not singular (singular matrices have no inverse). bli_dshiftd( 0, m, m, &two, a, rs, cs ); bli_dprintm( "a: randomized (zeros in upper triangle)", m, m, a, rs, cs, "%4.1f", "" ); bli_dprintm( "b: intial value", 1, m, b, m, 1, "%4.1f", "" ); // x := alpha * a * x, where 'a' is triangular and lower-stored. bli_dtrsv( BLIS_LOWER, BLIS_NO_TRANSPOSE, BLIS_NONUNIT_DIAG, m, &alpha, a, rs, cs, x, 1 ); bli_dprintm( "b: after trsv", 1, m, b, m, 1, "%4.1f", "" ); // We can confirm the solution by comparing the product of a and x to the // original value of b. bli_dcopyv( BLIS_NO_TRANSPOSE, m, b, 1, y, 1 ); bli_dtrmv( BLIS_LOWER, BLIS_NO_TRANSPOSE, BLIS_NONUNIT_DIAG, m, &alpha, a, rs, cs, y, 1 ); bli_dprintm( "y: should equal initial value of b", 1, m, y, m, 1, "%4.1f", "" ); // Free the memory obtained via malloc(). free( a ); free( b ); free( y ); return 0; } blis-0.6.1/examples/tapi/04level3.c000066400000000000000000000255101360743507500167410ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name of The University of Texas nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "blis.h" int main( int argc, char** argv ) { dim_t m, n, k; inc_t rsa, csa; inc_t rsb, csb; inc_t rsc, csc; double* a; double* b; double* c; double alpha, beta; // Initialize some basic constants. double zero = 0.0; double one = 1.0; double two = 2.0; // // This file demonstrates level-3 operations. // // // Example 1: Perform a general matrix-matrix multiply (gemm) operation. // printf( "\n#\n# -- Example 1 --\n#\n\n" ); // Create some matrix and vector operands to work with. m = 4; n = 5; k = 3; rsc = 1; csc = m; rsa = 1; csa = m; rsb = 1; csb = k; c = malloc( m * n * sizeof( double ) ); a = malloc( m * k * sizeof( double ) ); b = malloc( k * n * sizeof( double ) ); // Set the scalars to use. alpha = 1.0; beta = 1.0; // Initialize the matrix operands. bli_drandm( 0, BLIS_DENSE, m, k, a, rsa, csa ); bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, k, n, &one, b, rsb, csb ); bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, m, n, &zero, c, rsc, csc ); bli_dprintm( "a: randomized", m, k, a, rsa, csa, "%4.1f", "" ); bli_dprintm( "b: set to 1.0", k, n, b, rsb, csb, "%4.1f", "" ); bli_dprintm( "c: initial value", m, n, c, rsc, csc, "%4.1f", "" ); // c := beta * c + alpha * a * b, where 'a', 'b', and 'c' are general. bli_dgemm( BLIS_NO_TRANSPOSE, BLIS_NO_TRANSPOSE, m, n, k, &alpha, a, rsa, csa, b, rsb, csb, &beta, c, rsc, csc ); bli_dprintm( "c: after gemm", m, n, c, rsc, csc, "%4.1f", "" ); // Free the memory obtained via malloc(). free( a ); free( b ); free( c ); // // Example 1b: Perform a general matrix-matrix multiply (gemm) operation // with the left input operand (matrix A) transposed. // printf( "\n#\n# -- Example 1b --\n#\n\n" ); // Create some matrix and vector operands to work with. m = 4; n = 5; k = 3; rsc = 1; csc = m; rsa = 1; csa = k; rsb = 1; csb = k; c = malloc( m * n * sizeof( double ) ); a = malloc( k * m * sizeof( double ) ); b = malloc( k * n * sizeof( double ) ); // Set the scalars to use. alpha = 1.0; beta = 1.0; // Initialize the matrix operands. bli_drandm( 0, BLIS_DENSE, k, m, a, rsa, csa ); bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, k, n, &one, b, rsb, csb ); bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, m, n, &zero, c, rsc, csc ); bli_dprintm( "a: randomized", k, m, a, rsa, csa, "%4.1f", "" ); bli_dprintm( "b: set to 1.0", k, n, b, rsb, csb, "%4.1f", "" ); bli_dprintm( "c: initial value", m, n, c, rsc, csc, "%4.1f", "" ); // c := beta * c + alpha * a^T * b, where 'a', 'b', and 'c' are general. bli_dgemm( BLIS_TRANSPOSE, BLIS_NO_TRANSPOSE, m, n, k, &alpha, a, rsa, csa, b, rsb, csb, &beta, c, rsc, csc ); bli_dprintm( "c: after gemm", m, n, c, rsc, csc, "%4.1f", "" ); // Free the memory obtained via malloc(). free( a ); free( b ); free( c ); // // Example 2: Perform a symmetric rank-k update (syrk) operation. // printf( "\n#\n# -- Example 2 --\n#\n\n" ); // Create some matrix and vector operands to work with. m = 5; k = 3; rsc = 1; csc = m; rsa = 1; csa = m; c = malloc( m * m * sizeof( double ) ); a = malloc( m * k * sizeof( double ) ); // Set the scalars to use. alpha = 1.0; // Initialize the matrix operands. bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, m, m, &zero, c, rsc, csc ); bli_drandm( 0, BLIS_DENSE, m, k, a, rsa, csa ); // Randomize the lower triangle of 'c'. bli_drandm( 0, BLIS_LOWER, m, n, c, rsc, csc ); bli_dprintm( "a: set to random values", m, k, a, rsa, csa, "%4.1f", "" ); bli_dprintm( "c: initial value (zeros in upper triangle)", m, m, c, rsc, csc, "%4.1f", "" ); // c := c + alpha * a * a^T, where 'c' is symmetric and lower-stored. bli_dsyrk( BLIS_LOWER, BLIS_NO_TRANSPOSE, m, k, &alpha, a, rsa, csa, &beta, c, rsc, csc ); bli_dprintm( "c: after syrk", m, m, c, rsc, csc, "%4.1f", "" ); // Free the memory obtained via malloc(). free( a ); free( c ); // // Example 3: Perform a symmetric matrix-matrix multiply (symm) operation. // printf( "\n#\n# -- Example 3 --\n#\n\n" ); // Create some matrix and vector operands to work with. m = 5; n = 6; rsc = 1; csc = m; rsa = 1; csa = m; rsb = 1; csb = m; c = malloc( m * n * sizeof( double ) ); a = malloc( m * m * sizeof( double ) ); b = malloc( m * n * sizeof( double ) ); // Set the scalars to use. alpha = 1.0; beta = 1.0; // Initialize matrices 'b' and 'c'. bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, m, n, &one, b, rsb, csb ); bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, m, n, &zero, c, rsc, csc ); // Zero out all of matrix 'a'. This is optional, but will avoid possibly // displaying junk values in the unstored triangle. bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, m, m, &zero, a, rsa, csa ); // Randomize the upper triangle of 'a'. bli_drandm( 0, BLIS_UPPER, m, m, a, rsa, csa ); bli_dprintm( "a: randomized (zeros in lower triangle)", m, m, a, rsa, csa, "%4.1f", "" ); bli_dprintm( "b: set to 1.0", m, n, b, rsb, csb, "%4.1f", "" ); bli_dprintm( "c: initial value", m, n, c, rsc, csc, "%4.1f", "" ); // c := beta * c + alpha * a * b, where 'a' is symmetric and upper-stored. bli_dsymm( BLIS_LEFT, BLIS_UPPER, BLIS_NO_CONJUGATE, BLIS_NO_TRANSPOSE, m, n, &alpha, a, rsa, csa, b, rsb, csb, &beta, c, rsc, csc ); bli_dprintm( "c: after symm", m, n, c, rsc, csc, "%4.1f", "" ); // Free the memory obtained via malloc(). free( a ); free( b ); free( c ); // // Example 4: Perform a triangular matrix-matrix multiply (trmm) operation. // printf( "\n#\n# -- Example 4 --\n#\n\n" ); // Create some matrix and vector operands to work with. m = 5; n = 4; rsa = 1; csa = m; rsb = 1; csb = m; a = malloc( m * m * sizeof( double ) ); b = malloc( m * n * sizeof( double ) ); // Set the scalars to use. alpha = 1.0; // Initialize matrix 'b'. bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, m, n, &one, b, rsb, csb ); // Zero out all of matrix 'a'. This is optional, but will avoid possibly // displaying junk values in the unstored triangle. bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, m, m, &zero, a, rsa, csa ); // Randomize the lower triangle of 'a'. bli_drandm( 0, BLIS_LOWER, m, m, a, rsa, csa ); bli_dprintm( "a: randomized (zeros in upper triangle)", m, m, a, rsa, csa, "%4.1f", "" ); bli_dprintm( "b: initial value", m, n, b, rsb, csb, "%4.1f", "" ); // b := alpha * a * b, where 'a' is triangular and lower-stored. bli_dtrmm( BLIS_LEFT, BLIS_LOWER, BLIS_NONUNIT_DIAG, BLIS_NO_TRANSPOSE, m, n, &alpha, a, rsa, csa, b, rsb, csb ); bli_dprintm( "b: after trmm", m, n, b, rsb, csb, "%4.1f", "" ); // Free the memory obtained via malloc(). free( a ); free( b ); // // Example 5: Perform a triangular solve with multiple right-hand sides // (trsm) operation. // printf( "\n#\n# -- Example 5 --\n#\n\n" ); // Create some matrix and vector operands to work with. m = 5; n = 4; rsa = 1; csa = m; rsb = 1; csb = m; rsc = 1; csc = m; a = malloc( m * m * sizeof( double ) ); b = malloc( m * n * sizeof( double ) ); c = malloc( m * n * sizeof( double ) ); // Set the scalars to use. alpha = 1.0; // Initialize matrix 'b'. bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, m, n, &one, b, rsb, csb ); // Zero out all of matrix 'a'. This is optional, but will avoid possibly // displaying junk values in the unstored triangle. bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, m, m, &zero, a, rsa, csa ); // Randomize the lower triangle of 'a'. bli_drandm( 0, BLIS_LOWER, m, m, a, rsa, csa ); // Load the diagonal. By setting the diagonal to something of greater // absolute value than the off-diagonal elements, we increase the odds // that the matrix is not singular (singular matrices have no inverse). bli_dshiftd( 0, m, m, &two, a, rsa, csa ); bli_dprintm( "a: randomized (zeros in upper triangle)", m, m, a, rsa, csa, "%4.1f", "" ); bli_dprintm( "b: initial value", m, n, b, rsb, csb, "%4.1f", "" ); // solve a * x = alpha * b, where 'a' is triangular and lower-stored, and // overwrite b with the solution matrix x. bli_dtrsm( BLIS_LEFT, BLIS_LOWER, BLIS_NONUNIT_DIAG, BLIS_NO_TRANSPOSE, m, n, &alpha, a, rsa, csa, b, rsb, csb ); bli_dprintm( "b: after trmm", m, n, b, rsb, csb, "%4.1f", "" ); // We can confirm the solution by comparing the product of a and x to the // original value of b. bli_dcopym( 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, BLIS_NO_TRANSPOSE, m, n, b, rsb, csb, c, rsc, csc ); bli_dtrmm( BLIS_LEFT, BLIS_LOWER, BLIS_NONUNIT_DIAG, BLIS_NO_TRANSPOSE, m, n, &alpha, a, rsa, csa, c, rsc, csc ); bli_dprintm( "c: should equal initial value of b", m, n, c, rsc, csc, "%4.1f", "" ); // Free the memory obtained via malloc(). free( a ); free( b ); free( c ); return 0; } // ----------------------------------------------------------------------------- blis-0.6.1/examples/tapi/05util.c000066400000000000000000000224101360743507500165210ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name of The University of Texas nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "blis.h" int main( int argc, char** argv ) { double* x; dcomplex* y; double* a; dcomplex* b; double* c; double* d; dcomplex* e; dcomplex* f; double* g; double norm1, normi, normf; dim_t m, n; inc_t rs, cs; // Initialize some basic constants. double minus_one = -1.0; dcomplex minus_one_z = { -1.0, 0.0 }; // // This file demonstrates working with vector and matrices in the // context of various utility operations. // // // Example 1: Compute various vector norms. // printf( "\n#\n# -- Example 1 --\n#\n\n" ); // Create a few matrices to work with. m = 1; n = 5; rs = 5; cs = 1; x = malloc( m * n * sizeof( double ) ); y = malloc( m * n * sizeof( dcomplex ) ); // Initialize the vectors to random values. bli_drandv( n, x, 1 ); bli_zrandv( n, y, 1 ); bli_dprintm( "x", m, n, x, rs, cs, "%4.1f", "" ); // Compute the one, infinity, and frobenius norms of 'x'. Note that when // computing the norm alpha of a vector 'x', the datatype of alpha must be // equal to the real projection of the datatype of 'x'. bli_dnorm1v( n, x, 1, &norm1 ); bli_dnormiv( n, x, 1, &normi ); bli_dnormfv( n, x, 1, &normf ); bli_dprintm( "x: 1-norm:", 1, 1, &norm1, rs, cs, "%4.1f", "" ); bli_dprintm( "x: infinity norm:", 1, 1, &normi, rs, cs, "%4.1f", "" ); bli_dprintm( "x: frobenius norm:", 1, 1, &normf, rs, cs, "%4.1f", "" ); bli_zprintm( "y", m, n, y, rs, cs, "%4.1f", "" ); // Compute the one, infinity, and frobenius norms of 'y'. Note that we // can reuse the same scalars from before for computing norms of // dcomplex matrices, since the real projection of dcomplex is double. bli_znorm1v( n, y, 1, &norm1 ); bli_znormiv( n, y, 1, &normi ); bli_znormfv( n, y, 1, &normf ); bli_dprintm( "y: 1-norm:", 1, 1, &norm1, 1, 1, "%4.1f", "" ); bli_dprintm( "y: infinity norm:", 1, 1, &normi, 1, 1, "%4.1f", "" ); bli_dprintm( "y: frobenius norm:", 1, 1, &normf, 1, 1, "%4.1f", "" ); // // Example 2: Compute various matrix norms. // printf( "\n#\n# -- Example 2 --\n#\n\n" ); // Create a few matrices to work with. m = 5; n = 6; rs = 1; cs = m; a = malloc( m * n * sizeof( double ) ); b = malloc( m * n * sizeof( dcomplex ) ); // Initialize the matrices to random values. bli_drandm( 0, BLIS_DENSE, m, n, a, rs, cs ); bli_zrandm( 0, BLIS_DENSE, m, n, b, rs, cs ); bli_dprintm( "a:", m, n, a, rs, cs, "%4.1f", "" ); // Compute the one-norm of 'a'. bli_dnorm1m( 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, m, n, a, rs, cs, &norm1 ); bli_dnormim( 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, m, n, a, rs, cs, &normi ); bli_dnormfm( 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, m, n, a, rs, cs, &normf ); bli_dprintm( "a: 1-norm:", 1, 1, &norm1, 1, 1, "%4.1f", "" ); bli_dprintm( "a: infinity norm:", 1, 1, &normi, 1, 1, "%4.1f", "" ); bli_dprintm( "a: frobenius norm:", 1, 1, &normf, 1, 1, "%4.1f", "" ); bli_zprintm( "b:", m, n, b, rs, cs, "%4.1f", "" ); // Compute the one-norm of 'b'. bli_znorm1m( 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, m, n, b, rs, cs, &norm1 ); bli_znormim( 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, m, n, b, rs, cs, &normi ); bli_znormfm( 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, m, n, b, rs, cs, &normf ); bli_dprintm( "a: 1-norm:", 1, 1, &norm1, 1, 1, "%4.1f", "" ); bli_dprintm( "a: infinity norm:", 1, 1, &normi, 1, 1, "%4.1f", "" ); bli_dprintm( "a: frobenius norm:", 1, 1, &normf, 1, 1, "%4.1f", "" ); // // Example 3: Make a real matrix explicitly symmetric (or Hermitian). // printf( "\n#\n# -- Example 3 --\n#\n\n" ); // Create a few matrices to work with. m = 4; n = 4; rs = 1; cs = m; c = malloc( m * m * sizeof( double ) ); d = malloc( m * m * sizeof( double ) ); // Initialize all of 'c' to -1.0 to simulate junk values. bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, m, m, &minus_one, c, rs, cs ); // Randomize the lower triangle of 'c'. bli_drandm( 0, BLIS_LOWER, m, m, c, rs, cs ); bli_dprintm( "c (initial state):", m, m, c, rs, cs, "%4.1f", "" ); // mksymm on a real matrix transposes the stored triangle into the // unstored triangle, making the matrix densely symmetric. bli_dmksymm( BLIS_LOWER, m, c, rs, cs ); bli_dprintm( "c (after mksymm on lower triangle):", m, m, c, rs, cs, "%4.1f", "" ); // Digression: Most people think only of complex matrices as being able // to be complex. However, in BLIS, we define Hermitian operations on // real matrices, too--they are simply equivalent to the corresponding // symmetric operation. For example, when we make a real matrix explicitly // Hermitian, the result is indistinguishable from making it symmetric. // Initialize all of 'd' to -1.0 to simulate junk values. bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, m, m, &minus_one, d, rs, cs ); // Randomize the lower triangle of 'd'. bli_drandm( 0, BLIS_LOWER, m, m, d, rs, cs ); bli_dprintm( "d (initial state):", m, m, d, rs, cs, "%4.1f", "" ); // mkherm on a real matrix behaves the same as mksymm, as there are no // imaginary elements to conjugate. bli_dmkherm( BLIS_LOWER, m, d, rs, cs ); bli_dprintm( "c (after mkherm on lower triangle):", m, m, d, rs, cs, "%4.1f", "" ); // // Example 4: Make a complex matrix explicitly symmetric or Hermitian. // printf( "\n#\n# -- Example 4 --\n#\n\n" ); // Create a few matrices to work with. m = 4; n = 4; rs = 1; cs = m; e = malloc( m * m * sizeof( dcomplex ) ); f = malloc( m * m * sizeof( dcomplex ) ); // Initialize all of 'e' to -1.0 to simulate junk values. bli_zsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, m, m, &minus_one_z, e, rs, cs ); // Randomize the upper triangle of 'e'. bli_zrandm( 0, BLIS_UPPER, m, m, e, rs, cs ); bli_zprintm( "e (initial state):", m, m, e, rs, cs, "%4.1f", "" ); // mksymm on a complex matrix transposes the stored triangle into the // unstored triangle. bli_zmksymm( BLIS_UPPER, m, e, rs, cs ); bli_zprintm( "e (after mksymm on lower triangle):", m, m, e, rs, cs, "%4.1f", "" ); // Initialize all of 'f' to -1.0 to simulate junk values. bli_zsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, m, m, &minus_one_z, f, rs, cs ); // Randomize the upper triangle of 'd'. bli_zrandm( 0, BLIS_UPPER, m, m, f, rs, cs ); bli_zprintm( "f (initial state):", m, m, f, rs, cs, "%4.1f", "" ); // mkherm on a real matrix behaves the same as mksymm, as there are no // imaginary elements to conjugate. bli_zmkherm( BLIS_UPPER, m, f, rs, cs ); bli_zprintm( "f (after mkherm on lower triangle):", m, m, f, rs, cs, "%4.1f", "" ); // // Example 5: Make a real matrix explicitly triangular. // printf( "\n#\n# -- Example 5 --\n#\n\n" ); // Create a few matrices to work with. m = 5; n = 5; rs = 1; cs = m; g = malloc( m * m * sizeof( double ) ); // Initialize all of 'g' to -1.0 to simulate junk values. bli_dsetm( BLIS_NO_CONJUGATE, 0, BLIS_NONUNIT_DIAG, BLIS_DENSE, m, m, &minus_one, g, rs, cs ); // Randomize the lower triangle of 'g'. bli_drandm( 0, BLIS_LOWER, m, m, g, rs, cs ); bli_dprintm( "g (initial state):", m, m, g, rs, cs, "%4.1f", "" ); // mktrim does not explicitly copy any data, since presumably the stored // triangle already contains the data of interest. However, mktrim does // explicitly writes zeros to the unstored region. bli_dmktrim( BLIS_LOWER, m, g, rs, cs ); bli_dprintm( "g (after mktrim):", m, m, g, rs, cs, "%4.1f", "" ); // Free the memory obtained via malloc(). free( x ); free( y ); free( a ); free( b ); free( c ); free( d ); free( e ); free( f ); free( g ); return 0; } // ----------------------------------------------------------------------------- blis-0.6.1/examples/tapi/Makefile000066400000000000000000000113041360743507500166730ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # # Makefile # # Field G. Van Zee # # Makefile for BLIS typed API example code. # # # --- Makefile PHONY target definitions ---------------------------------------- # .PHONY: all bin clean run # # --- Determine makefile fragment location ------------------------------------- # # Comments: # - DIST_PATH is assumed to not exist if BLIS_INSTALL_PATH is given. # - We must use recursively expanded assignment for LIB_PATH and INC_PATH in # the second case because CONFIG_NAME is not yet set. ifneq ($(strip $(BLIS_INSTALL_PATH)),) LIB_PATH := $(BLIS_INSTALL_PATH)/lib INC_PATH := $(BLIS_INSTALL_PATH)/include/blis SHARE_PATH := $(BLIS_INSTALL_PATH)/share/blis else DIST_PATH := ../.. LIB_PATH = ../../lib/$(CONFIG_NAME) INC_PATH = ../../include/$(CONFIG_NAME) SHARE_PATH := ../.. endif # # --- Include common makefile definitions -------------------------------------- # # Include the common makefile fragment. -include $(SHARE_PATH)/common.mk # # --- General build definitions ------------------------------------------------ # TEST_SRC_PATH := . TEST_OBJ_PATH := . # Gather all local object files. TEST_OBJS := $(sort $(patsubst $(TEST_SRC_PATH)/%.c, \ $(TEST_OBJ_PATH)/%.o, \ $(wildcard $(TEST_SRC_PATH)/*.c))) # Override the value of CINCFLAGS so that the value of CFLAGS returned by # get-user-cflags-for() is not cluttered up with include paths needed only # while building BLIS. CINCFLAGS := -I$(INC_PATH) # Use the "framework" CFLAGS for the configuration family. CFLAGS := $(call get-user-cflags-for,$(CONFIG_NAME)) # Add local header paths to CFLAGS CFLAGS += -I$(TEST_SRC_PATH) # Locate the libblis library to which we will link. #LIBBLIS_LINK := $(LIB_PATH)/$(LIBBLIS_L) # Binary executable name. TEST_BINS := 00level1v.x \ 01level1m.x \ 02level1m_diag.x \ 03level2.x \ 04level3.x \ 05util.x # # --- Targets/rules ------------------------------------------------------------ # # --- Primary targets --- all: bin bin: $(TEST_BINS) # --- Environment check rules --- check-env: check-env-make-defs check-env-fragments check-env-config-mk check-env-config-mk: ifeq ($(CONFIG_MK_PRESENT),no) $(error Cannot proceed: config.mk not detected! Run configure first) endif check-env-make-defs: check-env-fragments ifeq ($(MAKE_DEFS_MK_PRESENT),no) $(error Cannot proceed: make_defs.mk not detected! Invalid configuration) endif # --Object file rules -- $(TEST_OBJ_PATH)/%.o: $(TEST_SRC_PATH)/%.c $(LIBBLIS_LINK) ifeq ($(ENABLE_VERBOSE),yes) $(CC) $(CFLAGS) -c $< -o $@ else @echo "Compiling $@" @$(CC) $(CFLAGS) -c $< -o $@ endif # -- Executable file rules -- %.x: %.o $(LIBBLIS_LINK) ifeq ($(ENABLE_VERBOSE),yes) $(LINKER) $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@ else @echo "Linking $@ against '$(LIBBLIS_LINK) $(LDFLAGS)'" @$(LINKER) $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@ endif # -- Test run rules -- #run: $(TEST_BIN) # ./$(TEST_BIN) # -- Clean rules -- clean: - $(RM_F) $(TEST_OBJS) $(TEST_BINS) blis-0.6.1/examples/tapi/README000066400000000000000000000031031360743507500161110ustar00rootroot00000000000000 BLIS typed API examples ----------------------- This directory contains several files, each containing various pieces of example code that demonstrate core functionality of the typed API in BLIS. These example files should be thought of collectively like a tutorial, and therefore it is recommended to start from the beginning (the file that starts in '00'). You can build all of the examples by simply running 'make' from this directory. (You can also run 'make clean'.) The makefile assumes that you've already configured and built (but not necessarily installed) BLIS two directories up, in "../..". If you have already installed BLIS to some permanent directory, you may refer to that installation by setting the environment variable BLIS_INSTALL_PATH prior to running make: export BLIS_INSTALL_PATH=/usr/local; make or by setting the same variable as part of the make command: make BLIS_INSTALL_PATH=/usr/local Once the executable files have been built, we recommend reading the code in one terminal window alongside the executable output in another. This will help you see the effects of each section of code. This tutorial is not exhaustive or complete; many typed API functions were omitted (mostly for brevity's sake) and thus more examples could be written. If you've found typed functionality in BLIS and are unsure how to use it, or if you are unsure of what additional functionality is present in BLIS, please feel free to join and then start a discussion on the blis-devel mailing list [1]. Thanks for your interest in BLIS! [1] https://groups.google.com/d/forum/blis-devel blis-0.6.1/frame/000077500000000000000000000000001360743507500135535ustar00rootroot00000000000000blis-0.6.1/frame/0/000077500000000000000000000000001360743507500137125ustar00rootroot00000000000000blis-0.6.1/frame/0/bli_l0.h000066400000000000000000000035411360743507500152270ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "bli_l0_check.h" #include "bli_l0_oapi.h" #include "bli_l0_tapi.h" #include "bli_l0_ft.h" // Generate function pointer arrays for tapi functions. #include "bli_l0_fpa.h" // copysc #include "bli_copysc.h" blis-0.6.1/frame/0/bli_l0_check.c000066400000000000000000000174251360743507500163650ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define object-based check functions. // #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* chi, \ obj_t* psi \ ) \ { \ bli_l0_xxsc_check( chi, psi ); \ } GENFRONT( addsc ) GENFRONT( copysc ) GENFRONT( divsc ) GENFRONT( mulsc ) GENFRONT( sqrtsc ) GENFRONT( subsc ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* chi \ ) \ { \ bli_l0_xsc_check( chi ); \ } GENFRONT( invertsc ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* chi, \ obj_t* norm \ ) \ { \ bli_l0_xx2sc_check( chi, norm ); \ } GENFRONT( absqsc ) GENFRONT( normfsc ) void bli_getsc_check ( obj_t* chi, double* zeta_r, double* zeta_i ) { err_t e_val; // Check object datatypes. //e_val = bli_check_noninteger_object( chi ); //bli_check_error_code( e_val ); // Check object dimensions. e_val = bli_check_scalar_object( chi ); bli_check_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( chi ); bli_check_error_code( e_val ); } void bli_setsc_check ( double zeta_r, double zeta_i, obj_t* chi ) { err_t e_val; // Check object datatypes. //e_val = bli_check_floating_object( chi ); //bli_check_error_code( e_val ); // Check object dimensions. e_val = bli_check_scalar_object( chi ); bli_check_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( chi ); bli_check_error_code( e_val ); } void bli_unzipsc_check ( obj_t* chi, obj_t* zeta_r, obj_t* zeta_i ) { err_t e_val; // Check object datatypes. e_val = bli_check_noninteger_object( chi ); bli_check_error_code( e_val ); e_val = bli_check_real_object( zeta_r ); bli_check_error_code( e_val ); e_val = bli_check_real_object( zeta_i ); bli_check_error_code( e_val ); e_val = bli_check_nonconstant_object( zeta_r ); bli_check_error_code( e_val ); e_val = bli_check_nonconstant_object( zeta_i ); bli_check_error_code( e_val ); e_val = bli_check_object_real_proj_of( chi, zeta_r ); bli_check_error_code( e_val ); e_val = bli_check_object_real_proj_of( chi, zeta_i ); bli_check_error_code( e_val ); // Check object dimensions. e_val = bli_check_scalar_object( chi ); bli_check_error_code( e_val ); e_val = bli_check_scalar_object( zeta_r ); bli_check_error_code( e_val ); e_val = bli_check_scalar_object( zeta_i ); bli_check_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( chi ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( zeta_r ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( zeta_i ); bli_check_error_code( e_val ); } void bli_zipsc_check ( obj_t* zeta_r, obj_t* zeta_i, obj_t* chi ) { err_t e_val; // Check object datatypes. e_val = bli_check_real_object( zeta_r ); bli_check_error_code( e_val ); e_val = bli_check_real_object( zeta_i ); bli_check_error_code( e_val ); e_val = bli_check_noninteger_object( chi ); bli_check_error_code( e_val ); e_val = bli_check_nonconstant_object( chi ); bli_check_error_code( e_val ); e_val = bli_check_object_real_proj_of( chi, zeta_r ); bli_check_error_code( e_val ); e_val = bli_check_object_real_proj_of( chi, zeta_i ); bli_check_error_code( e_val ); // Check object dimensions. e_val = bli_check_scalar_object( zeta_r ); bli_check_error_code( e_val ); e_val = bli_check_scalar_object( zeta_i ); bli_check_error_code( e_val ); e_val = bli_check_scalar_object( chi ); bli_check_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( zeta_r ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( zeta_i ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( chi ); bli_check_error_code( e_val ); } // ----------------------------------------------------------------------------- void bli_l0_xsc_check ( obj_t* chi ) { err_t e_val; // Check object datatypes. e_val = bli_check_noninteger_object( chi ); bli_check_error_code( e_val ); e_val = bli_check_nonconstant_object( chi ); bli_check_error_code( e_val ); // Check object dimensions. e_val = bli_check_scalar_object( chi ); bli_check_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( chi ); bli_check_error_code( e_val ); } void bli_l0_xxsc_check ( obj_t* chi, obj_t* psi ) { err_t e_val; // Check object datatypes. e_val = bli_check_noninteger_object( chi ); bli_check_error_code( e_val ); e_val = bli_check_noninteger_object( psi ); bli_check_error_code( e_val ); e_val = bli_check_nonconstant_object( psi ); bli_check_error_code( e_val ); // Check object dimensions. e_val = bli_check_scalar_object( chi ); bli_check_error_code( e_val ); e_val = bli_check_scalar_object( psi ); bli_check_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( chi ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( psi ); bli_check_error_code( e_val ); } void bli_l0_xx2sc_check ( obj_t* chi, obj_t* absq ) { err_t e_val; // Check object datatypes. e_val = bli_check_noninteger_object( chi ); bli_check_error_code( e_val ); e_val = bli_check_nonconstant_object( absq ); bli_check_error_code( e_val ); e_val = bli_check_real_object( absq ); bli_check_error_code( e_val ); e_val = bli_check_object_real_proj_of( chi, absq ); bli_check_error_code( e_val ); // Check object dimensions. e_val = bli_check_scalar_object( chi ); bli_check_error_code( e_val ); e_val = bli_check_scalar_object( absq ); bli_check_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( chi ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( absq ); bli_check_error_code( e_val ); } blis-0.6.1/frame/0/bli_l0_check.h000066400000000000000000000064361360743507500163720ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype object-based check functions. // #undef GENTPROT #define GENTPROT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* chi, \ obj_t* psi \ ); GENTPROT( addsc ) GENTPROT( copysc ) GENTPROT( divsc ) GENTPROT( mulsc ) GENTPROT( sqrtsc ) GENTPROT( subsc ) #undef GENTPROT #define GENTPROT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* chi \ ); GENTPROT( invertsc ) #undef GENTPROT #define GENTPROT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* chi, \ obj_t* absq \ ); GENTPROT( absqsc ) GENTPROT( normfsc ) #undef GENTPROT #define GENTPROT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* chi, \ double* zeta_r, \ double* zeta_i \ ); GENTPROT( getsc ) #undef GENTPROT #define GENTPROT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ double zeta_r, \ double zeta_i, \ obj_t* chi \ ); GENTPROT( setsc ) #undef GENTPROT #define GENTPROT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* chi, \ obj_t* zeta_r, \ obj_t* zeta_i \ ); GENTPROT( unzipsc ) #undef GENTPROT #define GENTPROT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* zeta_r, \ obj_t* zeta_i, \ obj_t* chi \ ); GENTPROT( zipsc ) // ----------------------------------------------------------------------------- void bli_l0_xsc_check ( obj_t* chi ); void bli_l0_xxsc_check ( obj_t* chi, obj_t* psi ); void bli_l0_xx2sc_check ( obj_t* chi, obj_t* norm ); blis-0.6.1/frame/0/bli_l0_fpa.c000066400000000000000000000045061360743507500160520ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define function pointer query interfaces. // #undef GENFRONT #define GENFRONT( opname ) \ \ GENARRAY_FPA( PASTECH(opname,_vft), opname ); \ \ PASTECH(opname,_vft) PASTEMAC(opname,_qfp)( num_t dt ) \ { \ return PASTECH(opname,_fpa)[ dt ]; \ } GENFRONT( absqsc ) GENFRONT( normfsc ) GENFRONT( addsc ) GENFRONT( divsc ) GENFRONT( mulsc ) GENFRONT( subsc ) GENFRONT( invertsc ) GENFRONT( sqrtsc ) GENFRONT( unzipsc ) GENFRONT( zipsc ) #undef GENFRONT #define GENFRONT( opname ) \ \ GENARRAY_FPA_I( PASTECH(opname,_vft), opname ); \ \ PASTECH(opname,_vft) PASTEMAC(opname,_qfp)( num_t dt ) \ { \ return PASTECH(opname,_fpa)[ dt ]; \ } GENFRONT( getsc ) GENFRONT( setsc ) blis-0.6.1/frame/0/bli_l0_fpa.h000066400000000000000000000037771360743507500160700ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype function pointer query interface. // #undef GENPROT #define GENPROT( opname ) \ \ PASTECH(opname,_vft) \ PASTEMAC(opname,_qfp)( num_t dt ); GENPROT( absqsc ) GENPROT( normfsc ) GENPROT( addsc ) GENPROT( divsc ) GENPROT( mulsc ) GENPROT( subsc ) GENPROT( invertsc ) GENPROT( sqrtsc ) GENPROT( unzipsc ) GENPROT( zipsc ) GENPROT( getsc ) GENPROT( setsc ) blis-0.6.1/frame/0/bli_l0_ft.h000066400000000000000000000100311360743507500157100ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // -- Level-0 function types --------------------------------------------------- // // addsc, divsc, subsc #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH2(ch,opname,tsuf)) \ ( \ conj_t conjchi, \ ctype* chi, \ ctype* psi \ ); INSERT_GENTDEF( addsc ) INSERT_GENTDEF( divsc ) INSERT_GENTDEF( subsc ) // invertsc #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH2(ch,opname,tsuf)) \ ( \ conj_t conjchi, \ ctype* chi \ ); INSERT_GENTDEF( invertsc ) // mulsc #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH2(ch,opname,tsuf)) \ ( \ conj_t conjchi, \ ctype* chi, \ ctype* psi \ ); INSERT_GENTDEF( mulsc ) // absqsc #undef GENTDEFR #define GENTDEFR( ctype, ctype_r, ch, chr, opname, tsuf ) \ \ typedef void (*PASTECH2(ch,opname,tsuf)) \ ( \ ctype* chi, \ ctype_r* absq \ ); INSERT_GENTDEFR( absqsc ) // normfsc #undef GENTDEFR #define GENTDEFR( ctype, ctype_r, ch, chr, opname, tsuf ) \ \ typedef void (*PASTECH2(ch,opname,tsuf)) \ ( \ ctype* chi, \ ctype_r* norm \ ); INSERT_GENTDEFR( normfsc ) // sqrtsc #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH2(ch,opname,tsuf)) \ ( \ ctype* chi, \ ctype* psi \ ); INSERT_GENTDEF( sqrtsc ) // getsc #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH2(ch,opname,tsuf)) \ ( \ ctype* chi, \ double* zeta_r, \ double* zeta_i \ ); INSERT_GENTDEF( getsc ) // setsc #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH2(ch,opname,tsuf)) \ ( \ double zeta_r, \ double zeta_i, \ ctype* chi \ ); INSERT_GENTDEF( setsc ) // unzipsc #undef GENTDEFR #define GENTDEFR( ctype, ctype_r, ch, chr, opname, tsuf ) \ \ typedef void (*PASTECH2(ch,opname,tsuf)) \ ( \ ctype* chi, \ ctype_r* zeta_r, \ ctype_r* zeta_i \ ); INSERT_GENTDEFR( unzipsc ) // zipsc #undef GENTDEFR #define GENTDEFR( ctype, ctype_r, ch, chr, opname, tsuf ) \ \ typedef void (*PASTECH2(ch,opname,tsuf)) \ ( \ ctype_r* zeta_r, \ ctype_r* zeta_i, \ ctype* chi \ ); INSERT_GENTDEFR( zipsc ) blis-0.6.1/frame/0/bli_l0_oapi.c000066400000000000000000000206211360743507500162300ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define object-based interfaces. // #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC0(opname) \ ( \ obj_t* chi, \ obj_t* absq \ ) \ { \ bli_init_once(); \ \ num_t dt_chi; \ num_t dt_absq_c = bli_obj_dt_proj_to_complex( absq ); \ \ void* buf_chi; \ void* buf_absq = bli_obj_buffer_at_off( absq ); \ \ if ( bli_error_checking_is_enabled() ) \ PASTEMAC(opname,_check)( chi, absq ); \ \ /* If chi is a scalar constant, use dt_absq_c to extract the address of the corresponding constant value; otherwise, use the datatype encoded within the chi object and extract the buffer at the chi offset. */ \ bli_obj_scalar_set_dt_buffer( chi, dt_absq_c, &dt_chi, &buf_chi ); \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH(opname,_vft) f = PASTEMAC(opname,_qfp)( dt_chi ); \ \ f \ ( \ buf_chi, \ buf_absq \ ); \ } GENFRONT( absqsc ) GENFRONT( normfsc ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC0(opname) \ ( \ obj_t* chi, \ obj_t* psi \ ) \ { \ bli_init_once(); \ \ num_t dt = bli_obj_dt( psi ); \ \ conj_t conjchi = bli_obj_conj_status( chi ); \ \ void* buf_chi = bli_obj_buffer_for_1x1( dt, chi ); \ void* buf_psi = bli_obj_buffer_at_off( psi ); \ \ if ( bli_error_checking_is_enabled() ) \ PASTEMAC(opname,_check)( chi, psi ); \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH(opname,_vft) f = PASTEMAC(opname,_qfp)( dt ); \ \ f \ ( \ conjchi, \ buf_chi, \ buf_psi \ ); \ } GENFRONT( addsc ) GENFRONT( divsc ) GENFRONT( mulsc ) GENFRONT( subsc ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC0(opname) \ ( \ obj_t* chi \ ) \ { \ bli_init_once(); \ \ num_t dt = bli_obj_dt( chi ); \ \ conj_t conjchi = bli_obj_conj_status( chi ); \ \ void* buf_chi = bli_obj_buffer_for_1x1( dt, chi ); \ \ if ( bli_error_checking_is_enabled() ) \ PASTEMAC(opname,_check)( chi ); \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH(opname,_vft) f = PASTEMAC(opname,_qfp)( dt ); \ \ f \ ( \ conjchi, \ buf_chi \ ); \ } GENFRONT( invertsc ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC0(opname) \ ( \ obj_t* chi, \ obj_t* psi \ ) \ { \ bli_init_once(); \ \ num_t dt = bli_obj_dt( psi ); \ \ void* buf_chi = bli_obj_buffer_for_1x1( dt, chi ); \ void* buf_psi = bli_obj_buffer_at_off( psi ); \ \ if ( bli_error_checking_is_enabled() ) \ PASTEMAC(opname,_check)( chi, psi ); \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH(opname,_vft) f = PASTEMAC(opname,_qfp)( dt ); \ \ f \ ( \ buf_chi, \ buf_psi \ ); \ } GENFRONT( sqrtsc ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC0(opname) \ ( \ obj_t* chi, \ double* zeta_r, \ double* zeta_i \ ) \ { \ bli_init_once(); \ \ num_t dt_chi = bli_obj_dt( chi ); \ num_t dt_def = BLIS_DCOMPLEX; \ num_t dt_use; \ \ /* If chi is a constant object, default to using the dcomplex value to maximize precision, and since we don't know if the caller needs just the real or the real and imaginary parts. */ \ void* buf_chi = bli_obj_buffer_for_1x1( dt_def, chi ); \ \ if ( bli_error_checking_is_enabled() ) \ PASTEMAC(opname,_check)( chi, zeta_r, zeta_i ); \ \ /* The _check() routine prevents integer types, so we know that chi is either a constant or an actual floating-point type. */ \ if ( bli_is_constant( dt_chi ) ) dt_use = dt_def; \ else dt_use = dt_chi; \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH(opname,_vft) f = PASTEMAC(opname,_qfp)( dt_use ); \ \ f \ ( \ buf_chi, \ zeta_r, \ zeta_i \ ); \ } GENFRONT( getsc ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC0(opname) \ ( \ double zeta_r, \ double zeta_i, \ obj_t* chi \ ) \ { \ bli_init_once(); \ \ num_t dt_chi = bli_obj_dt( chi ); \ \ void* buf_chi = bli_obj_buffer_at_off( chi ); \ \ if ( bli_error_checking_is_enabled() ) \ PASTEMAC(opname,_check)( zeta_r, zeta_i, chi ); \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH(opname,_vft) f = PASTEMAC(opname,_qfp)( dt_chi ); \ \ f \ ( \ zeta_r, \ zeta_i, \ buf_chi \ ); \ } GENFRONT( setsc ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC0(opname) \ ( \ obj_t* chi, \ obj_t* zeta_r, \ obj_t* zeta_i \ ) \ { \ bli_init_once(); \ \ num_t dt_chi; \ num_t dt_zeta_c = bli_obj_dt_proj_to_complex( zeta_r ); \ \ void* buf_chi; \ \ void* buf_zeta_r = bli_obj_buffer_at_off( zeta_r ); \ void* buf_zeta_i = bli_obj_buffer_at_off( zeta_i ); \ \ if ( bli_error_checking_is_enabled() ) \ PASTEMAC(opname,_check)( chi, zeta_r, zeta_i ); \ \ /* If chi is a scalar constant, use dt_zeta_c to extract the address of the corresponding constant value; otherwise, use the datatype encoded within the chi object and extract the buffer at the chi offset. */ \ bli_obj_scalar_set_dt_buffer( chi, dt_zeta_c, &dt_chi, &buf_chi ); \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH(opname,_vft) f = PASTEMAC(opname,_qfp)( dt_chi ); \ \ f \ ( \ buf_chi, \ buf_zeta_r, \ buf_zeta_i \ ); \ } GENFRONT( unzipsc ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC0(opname) \ ( \ obj_t* zeta_r, \ obj_t* zeta_i, \ obj_t* chi \ ) \ { \ bli_init_once(); \ \ num_t dt_chi = bli_obj_dt( chi ); \ \ void* buf_zeta_r = bli_obj_buffer_for_1x1( dt_chi, zeta_r ); \ void* buf_zeta_i = bli_obj_buffer_for_1x1( dt_chi, zeta_i ); \ \ void* buf_chi = bli_obj_buffer_at_off( chi ); \ \ if ( bli_error_checking_is_enabled() ) \ PASTEMAC(opname,_check)( chi, zeta_r, zeta_i ); \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH(opname,_vft) f = PASTEMAC(opname,_qfp)( dt_chi ); \ \ f \ ( \ buf_zeta_i, \ buf_zeta_r, \ buf_chi \ ); \ } GENFRONT( zipsc ) blis-0.6.1/frame/0/bli_l0_oapi.h000066400000000000000000000060241360743507500162360ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype object-based interfaces. // #undef GENPROT #define GENPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC0(opname) \ ( \ obj_t* chi, \ obj_t* absq \ ); GENPROT( absqsc ) GENPROT( normfsc ) #undef GENPROT #define GENPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC0(opname) \ ( \ obj_t* chi, \ obj_t* psi \ ); GENPROT( addsc ) GENPROT( divsc ) GENPROT( mulsc ) GENPROT( sqrtsc ) GENPROT( subsc ) #undef GENPROT #define GENPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC0(opname) \ ( \ obj_t* chi \ ); GENPROT( invertsc ) #undef GENPROT #define GENPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC0(opname) \ ( \ obj_t* chi, \ double* zeta_r, \ double* zeta_i \ ); GENPROT( getsc ) #undef GENPROT #define GENPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC0(opname) \ ( \ double zeta_r, \ double zeta_i, \ obj_t* chi \ ); GENPROT( setsc ) #undef GENPROT #define GENPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC0(opname) \ ( \ obj_t* chi, \ obj_t* zeta_r, \ obj_t* zeta_i \ ); GENPROT( unzipsc ) #undef GENPROT #define GENPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC0(opname) \ ( \ obj_t* zeta_r, \ obj_t* zeta_i, \ obj_t* chi \ ); GENPROT( zipsc ) blis-0.6.1/frame/0/bli_l0_tapi.c000066400000000000000000000134071360743507500162410ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define BLAS-like interfaces with typed operands. // #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, kername ) \ \ void PASTEMAC(ch,opname) \ ( \ conj_t conjchi, \ ctype* chi, \ ctype* psi \ ) \ { \ bli_init_once(); \ \ ctype chi_conj; \ \ PASTEMAC(ch,copycjs)( conjchi, *chi, chi_conj ); \ PASTEMAC(ch,kername)( chi_conj, *psi ); \ } INSERT_GENTFUNC_BASIC( addsc, adds ) INSERT_GENTFUNC_BASIC( divsc, invscals ) INSERT_GENTFUNC_BASIC( subsc, subs ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, kername ) \ \ void PASTEMAC(ch,opname) \ ( \ conj_t conjchi, \ ctype* chi \ ) \ { \ bli_init_once(); \ \ ctype chi_conj; \ \ PASTEMAC(ch,copycjs)( conjchi, *chi, chi_conj ); \ PASTEMAC(ch,kername)( chi_conj ); \ PASTEMAC(ch,copys)( chi_conj, *chi ); \ } INSERT_GENTFUNC_BASIC( invertsc, inverts ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, kername ) \ \ void PASTEMAC(ch,opname) \ ( \ conj_t conjchi, \ ctype* chi, \ ctype* psi \ ) \ { \ bli_init_once(); \ \ if ( PASTEMAC(ch,eq0)( *chi ) ) \ { \ /* Overwrite potential Infs and NaNs. */ \ PASTEMAC(ch,set0s)( *psi ); \ } \ else \ { \ ctype chi_conj; \ \ PASTEMAC(ch,copycjs)( conjchi, *chi, chi_conj ); \ PASTEMAC(ch,kername)( chi_conj, *psi ); \ } \ } INSERT_GENTFUNC_BASIC( mulsc, scals ) #undef GENTFUNCR #define GENTFUNCR( ctype, ctype_r, ch, chr, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ ctype* chi, \ ctype_r* absq \ ) \ { \ bli_init_once(); \ \ ctype_r chi_r; \ ctype_r chi_i; \ ctype_r absq_i; \ \ ( void )absq_i; \ \ PASTEMAC2(ch,chr,gets)( *chi, chi_r, chi_i ); \ \ /* absq = chi_r * chi_r + chi_i * chi_i; \ absq_r = 0.0; (thrown away) */ \ PASTEMAC(ch,absq2ris)( chi_r, chi_i, *absq, absq_i ); \ \ ( void )chi_i; \ } INSERT_GENTFUNCR_BASIC0( absqsc ) #undef GENTFUNCR #define GENTFUNCR( ctype, ctype_r, ch, chr, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ ctype* chi, \ ctype_r* norm \ ) \ { \ bli_init_once(); \ \ /* norm = sqrt( chi_r * chi_r + chi_i * chi_i ); */ \ PASTEMAC2(ch,chr,abval2s)( *chi, *norm ); \ } INSERT_GENTFUNCR_BASIC0( normfsc ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ ctype* chi, \ ctype* psi \ ) \ { \ bli_init_once(); \ \ /* NOTE: sqrtsc/sqrt2s differs from normfsc/abval2s in the complex domain. */ \ PASTEMAC(ch,sqrt2s)( *chi, *psi ); \ } INSERT_GENTFUNC_BASIC0( sqrtsc ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ ctype* chi, \ double* zeta_r, \ double* zeta_i \ ) \ { \ bli_init_once(); \ \ PASTEMAC2(ch,d,gets)( *chi, *zeta_r, *zeta_i ); \ } INSERT_GENTFUNC_BASIC0( getsc ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ double zeta_r, \ double zeta_i, \ ctype* chi \ ) \ { \ bli_init_once(); \ \ PASTEMAC2(d,ch,sets)( zeta_r, zeta_i, *chi ); \ } INSERT_GENTFUNC_BASIC0( setsc ) #undef GENTFUNCR #define GENTFUNCR( ctype, ctype_r, ch, chr, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ ctype* chi, \ ctype_r* zeta_r, \ ctype_r* zeta_i \ ) \ { \ bli_init_once(); \ \ PASTEMAC2(ch,chr,gets)( *chi, *zeta_r, *zeta_i ); \ } INSERT_GENTFUNCR_BASIC0( unzipsc ) #undef GENTFUNCR #define GENTFUNCR( ctype, ctype_r, ch, chr, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ ctype_r* zeta_r, \ ctype_r* zeta_i, \ ctype* chi \ ) \ { \ bli_init_once(); \ \ PASTEMAC2(chr,ch,sets)( *zeta_r, *zeta_i, *chi ); \ } INSERT_GENTFUNCR_BASIC0( zipsc ) // ----------------------------------------------------------------------------- void bli_igetsc ( dim_t* chi, double* zeta_r, double* zeta_i ) { bli_init_once(); PASTEMAC2(i,d,gets)( *chi, *zeta_r, *zeta_i ); } void bli_isetsc ( double zeta_r, double zeta_i, dim_t* chi ) { bli_init_once(); PASTEMAC2(d,i,sets)( zeta_r, zeta_i, *chi ); } blis-0.6.1/frame/0/bli_l0_tapi.h000066400000000000000000000076111360743507500162460ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-like interfaces with typed operands. // #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(ch,opname) \ ( \ conj_t conjchi, \ ctype* chi, \ ctype* psi \ ); INSERT_GENTPROT_BASIC0( addsc ) INSERT_GENTPROT_BASIC0( divsc ) INSERT_GENTPROT_BASIC0( mulsc ) INSERT_GENTPROT_BASIC0( subsc ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(ch,opname) \ ( \ conj_t conjchi, \ ctype* chi \ ); INSERT_GENTPROT_BASIC0( invertsc ) #undef GENTPROTR #define GENTPROTR( ctype, ctype_r, ch, chr, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(ch,opname) \ ( \ ctype* chi, \ ctype_r* absq \ ); INSERT_GENTPROTR_BASIC0( absqsc ) INSERT_GENTPROTR_BASIC0( normfsc ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(ch,opname) \ ( \ ctype* chi, \ ctype* psi \ ); INSERT_GENTPROT_BASIC0( sqrtsc ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(ch,opname) \ ( \ ctype* chi, \ double* zeta_r, \ double* zeta_i \ ); INSERT_GENTPROT_BASIC0( getsc ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(ch,opname) \ ( \ double zeta_r, \ double zeta_i, \ ctype* chi \ ); INSERT_GENTPROT_BASIC0( setsc ) #undef GENTPROTR #define GENTPROTR( ctype, ctype_r, ch, chr, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(ch,opname) \ ( \ ctype* chi, \ ctype_r* zeta_r, \ ctype_r* zeta_i \ ); INSERT_GENTPROTR_BASIC0( unzipsc ) #undef GENTPROTR #define GENTPROTR( ctype, ctype_r, ch, chr, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(ch,opname) \ ( \ ctype_r* zeta_r, \ ctype_r* zeta_i, \ ctype* chi \ ); INSERT_GENTPROTR_BASIC0( zipsc ) // ----------------------------------------------------------------------------- BLIS_EXPORT_BLIS void bli_igetsc ( dim_t* chi, double* zeta_r, double* zeta_i ); BLIS_EXPORT_BLIS void bli_isetsc ( double zeta_r, double zeta_i, dim_t* chi ); blis-0.6.1/frame/0/copysc/000077500000000000000000000000001360743507500152125ustar00rootroot00000000000000blis-0.6.1/frame/0/copysc/bli_copysc.c000066400000000000000000000072601360743507500175110ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // NOTE: This is one of the few functions in BLIS that is defined // with heterogeneous type support. This is done so that we have // an operation that can be used to typecast (copy-cast) a scalar // of one datatype to a scalar of another datatype. typedef void (*FUNCPTR_T) ( conj_t conjchi, void* chi, void* psi ); static FUNCPTR_T GENARRAY2_ALL(ftypes,copysc); // // Define object-based interfaces. // #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC0(opname) \ ( \ obj_t* chi, \ obj_t* psi \ ) \ { \ bli_init_once(); \ \ conj_t conjchi = bli_obj_conj_status( chi ); \ \ num_t dt_psi = bli_obj_dt( psi ); \ void* buf_psi = bli_obj_buffer_at_off( psi ); \ \ num_t dt_chi; \ void* buf_chi; \ \ FUNCPTR_T f; \ \ if ( bli_error_checking_is_enabled() ) \ PASTEMAC(opname,_check)( chi, psi ); \ \ /* If chi is a scalar constant, use dt_psi to extract the address of the corresponding constant value; otherwise, use the datatype encoded within the chi object and extract the buffer at the chi offset. */ \ bli_obj_scalar_set_dt_buffer( chi, dt_psi, &dt_chi, &buf_chi ); \ \ /* Index into the type combination array to extract the correct function pointer. */ \ f = ftypes[dt_chi][dt_psi]; \ \ /* Invoke the void pointer-based function. */ \ f( \ conjchi, \ buf_chi, \ buf_psi \ ); \ } GENFRONT( copysc ) // // Define BLAS-like interfaces with typed operands. // #undef GENTFUNC2 #define GENTFUNC2( ctype_x, ctype_y, chx, chy, varname ) \ \ void PASTEMAC2(chx,chy,varname) \ ( \ conj_t conjchi, \ void* chi, \ void* psi \ ) \ { \ bli_init_once(); \ \ ctype_x* chi_cast = chi; \ ctype_y* psi_cast = psi; \ \ if ( bli_is_conj( conjchi ) ) \ { \ PASTEMAC2(chx,chy,copyjs)( *chi_cast, *psi_cast ); \ } \ else \ { \ PASTEMAC2(chx,chy,copys)( *chi_cast, *psi_cast ); \ } \ } INSERT_GENTFUNC2_BASIC0( copysc ) INSERT_GENTFUNC2_MIX_D0( copysc ) INSERT_GENTFUNC2_MIX_P0( copysc ) blis-0.6.1/frame/0/copysc/bli_copysc.h000066400000000000000000000043541360743507500175170ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype object-based interfaces. // #undef GENFRONT #define GENFRONT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC0(opname) \ ( \ obj_t* chi, \ obj_t* psi \ ); GENFRONT( copysc ) // // Prototype BLAS-like interfaces with heterogeneous-typed operands. // #undef GENTPROT2 #define GENTPROT2( ctype_x, ctype_y, chx, chy, varname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(chx,chy,varname) \ ( \ conj_t conjchi, \ void* chi, \ void* psi \ ); INSERT_GENTPROT2_BASIC0( copysc ) INSERT_GENTPROT2_MIX_D0( copysc ) INSERT_GENTPROT2_MIX_P0( copysc ) blis-0.6.1/frame/1/000077500000000000000000000000001360743507500137135ustar00rootroot00000000000000blis-0.6.1/frame/1/bli_l1v.h000066400000000000000000000046741360743507500154270ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "bli_l1v_check.h" // Define kernel function types. //#include "bli_l1v_ft_ex.h" #include "bli_l1v_ft_ker.h" // Prototype object APIs (expert and non-expert). #include "bli_oapi_ex.h" #include "bli_l1v_oapi.h" #include "bli_oapi_ba.h" #include "bli_l1v_oapi.h" // Prototype typed APIs (expert and non-expert). #include "bli_tapi_ex.h" #include "bli_l1v_tapi.h" #include "bli_l1v_ft.h" #include "bli_tapi_ba.h" #include "bli_l1v_tapi.h" #include "bli_l1v_ft.h" // Generate function pointer arrays for tapi functions (expert only). #include "bli_l1v_fpa.h" // Pack-related // NOTE: packv and unpackv are temporarily disabled. //#include "bli_packv.h" //#include "bli_unpackv.h" // Other // NOTE: scalv control tree code is temporarily disabled. //#include "bli_scalv_cntl.h" //#include "bli_scalv_int.h" blis-0.6.1/frame/1/bli_l1v_check.c000066400000000000000000000267021360743507500165530ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define object-based check functions. // #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* x, \ obj_t* y \ ) \ { \ bli_l1v_xy_check( x, y ); \ } GENFRONT( addv ) GENFRONT( copyv ) GENFRONT( subv ) GENFRONT( swapv ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* x, \ obj_t* index \ ) \ { \ bli_l1v_xi_check( x, index ); \ } GENFRONT( amaxv ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* alpha, \ obj_t* x, \ obj_t* beta, \ obj_t* y \ ) \ { \ bli_l1v_axby_check( alpha, x, beta, y ); \ } GENFRONT( axpbyv ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* alpha, \ obj_t* x, \ obj_t* y \ ) \ { \ bli_l1v_axy_check( alpha, x, y ); \ } GENFRONT( axpyv ) GENFRONT( scal2v ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* x, \ obj_t* y, \ obj_t* rho \ ) \ { \ bli_l1v_dot_check( &BLIS_ONE, x, y, &BLIS_ONE, rho ); \ } GENFRONT( dotv ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* alpha, \ obj_t* x, \ obj_t* y, \ obj_t* beta, \ obj_t* rho \ ) \ { \ bli_l1v_dot_check( alpha, x, y, beta, rho ); \ } GENFRONT( dotxv ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* x \ ) \ { \ bli_l1v_x_check( x ); \ } GENFRONT( invertv ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* alpha, \ obj_t* x \ ) \ { \ bli_l1v_ax_check( alpha, x ); \ } GENFRONT( scalv ) GENFRONT( setv ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* x, \ obj_t* beta, \ obj_t* y \ ) \ { \ bli_l1v_xby_check( x, beta, y ); \ } GENFRONT( xpbyv ) // ----------------------------------------------------------------------------- void bli_l1v_xy_check ( obj_t* x, obj_t* y ) { err_t e_val; // Check object datatypes. e_val = bli_check_floating_object( x ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( y ); bli_check_error_code( e_val ); // Check for consistent datatypes. e_val = bli_check_consistent_object_datatypes( x, y ); bli_check_error_code( e_val ); // Check object dimensions. e_val = bli_check_vector_object( x ); bli_check_error_code( e_val ); e_val = bli_check_vector_object( y ); bli_check_error_code( e_val ); e_val = bli_check_equal_vector_lengths( x, y ); bli_check_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( x ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( y ); bli_check_error_code( e_val ); } void bli_l1v_axy_check ( obj_t* alpha, obj_t* x, obj_t* y ) { err_t e_val; // Check object datatypes. e_val = bli_check_noninteger_object( alpha ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( x ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( y ); bli_check_error_code( e_val ); // Check for consistent datatypes. e_val = bli_check_consistent_object_datatypes( x, y ); bli_check_error_code( e_val ); // Check object dimensions. e_val = bli_check_scalar_object( alpha ); bli_check_error_code( e_val ); e_val = bli_check_vector_object( x ); bli_check_error_code( e_val ); e_val = bli_check_vector_object( y ); bli_check_error_code( e_val ); e_val = bli_check_equal_vector_lengths( x, y ); bli_check_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( alpha ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( x ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( y ); bli_check_error_code( e_val ); } void bli_l1v_xby_check ( obj_t* x, obj_t* beta, obj_t* y ) { err_t e_val; // Check object datatypes. e_val = bli_check_noninteger_object( beta ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( x ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( y ); bli_check_error_code( e_val ); // Check for consistent datatypes. e_val = bli_check_consistent_object_datatypes( x, y ); bli_check_error_code( e_val ); // Check object dimensions. e_val = bli_check_scalar_object( beta ); bli_check_error_code( e_val ); e_val = bli_check_vector_object( x ); bli_check_error_code( e_val ); e_val = bli_check_vector_object( y ); bli_check_error_code( e_val ); e_val = bli_check_equal_vector_lengths( x, y ); bli_check_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( beta ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( x ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( y ); bli_check_error_code( e_val ); } void bli_l1v_axby_check ( obj_t* alpha, obj_t* x, obj_t* beta, obj_t* y ) { err_t e_val; // Check object datatypes. e_val = bli_check_noninteger_object( alpha ); bli_check_error_code( e_val ); e_val = bli_check_noninteger_object( beta ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( x ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( y ); bli_check_error_code( e_val ); // Check for consistent datatypes. e_val = bli_check_consistent_object_datatypes( x, y ); bli_check_error_code( e_val ); // Check object dimensions. e_val = bli_check_scalar_object( alpha ); bli_check_error_code( e_val ); e_val = bli_check_scalar_object( beta ); bli_check_error_code( e_val ); e_val = bli_check_vector_object( x ); bli_check_error_code( e_val ); e_val = bli_check_vector_object( y ); bli_check_error_code( e_val ); e_val = bli_check_equal_vector_lengths( x, y ); bli_check_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( alpha ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( beta ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( x ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( y ); bli_check_error_code( e_val ); } void bli_l1v_dot_check ( obj_t* alpha, obj_t* x, obj_t* y, obj_t* beta, obj_t* rho ) { err_t e_val; // Check object datatypes. e_val = bli_check_noninteger_object( alpha ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( x ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( y ); bli_check_error_code( e_val ); e_val = bli_check_noninteger_object( beta ); bli_check_error_code( e_val ); e_val = bli_check_noninteger_object( rho ); bli_check_error_code( e_val ); e_val = bli_check_nonconstant_object( rho ); bli_check_error_code( e_val ); // Check for consistent datatypes. e_val = bli_check_consistent_object_datatypes( x, y ); bli_check_error_code( e_val ); // Check object dimensions. e_val = bli_check_scalar_object( alpha ); bli_check_error_code( e_val ); e_val = bli_check_vector_object( x ); bli_check_error_code( e_val ); e_val = bli_check_vector_object( y ); bli_check_error_code( e_val ); e_val = bli_check_scalar_object( beta ); bli_check_error_code( e_val ); e_val = bli_check_scalar_object( rho ); bli_check_error_code( e_val ); e_val = bli_check_equal_vector_lengths( x, y ); bli_check_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( alpha ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( x ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( y ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( beta ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( rho ); bli_check_error_code( e_val ); } void bli_l1v_x_check ( obj_t* x ) { err_t e_val; // Check object datatypes. e_val = bli_check_floating_object( x ); bli_check_error_code( e_val ); // Check object dimensions. e_val = bli_check_vector_object( x ); bli_check_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( x ); bli_check_error_code( e_val ); } void bli_l1v_ax_check ( obj_t* alpha, obj_t* x ) { err_t e_val; // Check object datatypes. e_val = bli_check_noninteger_object( alpha ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( x ); bli_check_error_code( e_val ); // Check object dimensions. e_val = bli_check_scalar_object( alpha ); bli_check_error_code( e_val ); e_val = bli_check_vector_object( x ); bli_check_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( alpha ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( x ); bli_check_error_code( e_val ); } void bli_l1v_xi_check ( obj_t* x, obj_t* index ) { err_t e_val; // Check object datatypes. e_val = bli_check_floating_object( x ); bli_check_error_code( e_val ); e_val = bli_check_integer_object( index ); bli_check_error_code( e_val ); e_val = bli_check_nonconstant_object( index ); bli_check_error_code( e_val ); // Check object dimensions. e_val = bli_check_vector_object( x ); bli_check_error_code( e_val ); e_val = bli_check_scalar_object( index ); bli_check_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( x ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( index ); bli_check_error_code( e_val ); } blis-0.6.1/frame/1/bli_l1v_check.h000066400000000000000000000101671360743507500165560ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype object-based check functions. // #undef GENTPROT #define GENTPROT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* x, \ obj_t* y \ ); GENTPROT( addv ) GENTPROT( copyv ) GENTPROT( subv ) GENTPROT( swapv ) #undef GENTPROT #define GENTPROT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* x, \ obj_t* index \ ); GENTPROT( amaxv ) #undef GENTPROT #define GENTPROT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* alpha, \ obj_t* x, \ obj_t* beta, \ obj_t* y \ ); GENTPROT( axpbyv ) #undef GENTPROT #define GENTPROT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* alpha, \ obj_t* x, \ obj_t* y \ ); GENTPROT( axpyv ) GENTPROT( scal2v ) #undef GENTPROT #define GENTPROT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* x, \ obj_t* y, \ obj_t* rho \ ); GENTPROT( dotv ) #undef GENTPROT #define GENTPROT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* alpha, \ obj_t* x, \ obj_t* y, \ obj_t* beta, \ obj_t* rho \ ); GENTPROT( dotxv ) #undef GENTPROT #define GENTPROT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* x \ ); GENTPROT( invertv ) #undef GENTPROT #define GENTPROT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* alpha, \ obj_t* x \ ); GENTPROT( scalv ) GENTPROT( setv ) #undef GENTPROT #define GENTPROT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* x, \ obj_t* beta, \ obj_t* y \ ); GENTPROT( xpbyv ) // ----------------------------------------------------------------------------- void bli_l1v_xy_check ( obj_t* x, obj_t* y ); void bli_l1v_axy_check ( obj_t* alpha, obj_t* x, obj_t* y ); void bli_l1v_xby_check ( obj_t* x, obj_t* beta, obj_t* y ); void bli_l1v_axby_check ( obj_t* alpha, obj_t* x, obj_t* beta, obj_t* y ); void bli_l1v_dot_check ( obj_t* alpha, obj_t* x, obj_t* y, obj_t* beta, obj_t* rho ); void bli_l1v_x_check ( obj_t* x ); void bli_l1v_ax_check ( obj_t* alpha, obj_t* x ); void bli_l1v_xi_check ( obj_t* x, obj_t* index ); blis-0.6.1/frame/1/bli_l1v_fpa.c000066400000000000000000000044121360743507500162360ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define function pointer query interfaces. // #undef GENFRONT #define GENFRONT( opname ) \ \ GENARRAY_FPA( PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft), \ PASTECH(opname,BLIS_TAPI_EX_SUF) ); \ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( num_t dt ) \ { \ return PASTECH2(opname,BLIS_TAPI_EX_SUF,_fpa)[ dt ]; \ } GENFRONT( addv ) GENFRONT( copyv ) GENFRONT( subv ) GENFRONT( amaxv ) GENFRONT( axpbyv ) GENFRONT( axpyv ) GENFRONT( scal2v ) GENFRONT( dotv ) GENFRONT( dotxv ) GENFRONT( invertv ) GENFRONT( scalv ) GENFRONT( setv ) GENFRONT( swapv ) GENFRONT( xpbyv ) blis-0.6.1/frame/1/bli_l1v_fpa.h000066400000000000000000000040731360743507500162460ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype function pointer query interface. // #undef GENPROT #define GENPROT( opname ) \ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( num_t dt ); GENPROT( addv ) GENPROT( copyv ) GENPROT( subv ) GENPROT( amaxv ) GENPROT( axpbyv ) GENPROT( axpyv ) GENPROT( scal2v ) GENPROT( dotv ) GENPROT( dotxv ) GENPROT( invertv ) GENPROT( scalv ) GENPROT( setv ) GENPROT( swapv ) GENPROT( xpbyv ) blis-0.6.1/frame/1/bli_l1v_ft.h000066400000000000000000000116641360743507500161150ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // -- Level-1v function types -------------------------------------------------- // // addv, copyv, subv #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \ ( \ conj_t conjx, \ dim_t n, \ ctype* x, inc_t incx, \ ctype* y, inc_t incy \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTDEF( addv ) INSERT_GENTDEF( copyv ) INSERT_GENTDEF( subv ) // amaxv #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \ ( \ dim_t n, \ ctype* x, inc_t incx, \ dim_t* index \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTDEF( amaxv ) // axpbyv #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \ ( \ conj_t conjx, \ dim_t n, \ ctype* alpha, \ ctype* x, inc_t incx, \ ctype* beta, \ ctype* y, inc_t incy \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTDEF( axpbyv ) // axpyv, scal2v #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \ ( \ conj_t conjx, \ dim_t n, \ ctype* alpha, \ ctype* x, inc_t incx, \ ctype* y, inc_t incy \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTDEF( axpyv ) INSERT_GENTDEF( scal2v ) // dotv #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \ ( \ conj_t conjx, \ conj_t conjy, \ dim_t n, \ ctype* x, inc_t incx, \ ctype* y, inc_t incy, \ ctype* rho \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTDEF( dotv ) // dotxv #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \ ( \ conj_t conjx, \ conj_t conjy, \ dim_t n, \ ctype* alpha, \ ctype* x, inc_t incx, \ ctype* y, inc_t incy, \ ctype* beta, \ ctype* rho \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTDEF( dotxv ) // invertv #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \ ( \ dim_t n, \ ctype* x, inc_t incx \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTDEF( invertv ) // scalv, setv #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \ ( \ conj_t conjalpha, \ dim_t n, \ ctype* alpha, \ ctype* x, inc_t incx \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTDEF( scalv ) INSERT_GENTDEF( setv ) // swapv #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \ ( \ dim_t n, \ ctype* x, inc_t incx, \ ctype* y, inc_t incy \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTDEF( swapv ) // xpybv #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \ ( \ conj_t conjx, \ dim_t n, \ ctype* x, inc_t incx, \ ctype* beta, \ ctype* y, inc_t incy \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTDEF( xpbyv ) blis-0.6.1/frame/1/bli_l1v_ft_ker.h000066400000000000000000000125271360743507500167550ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_L1V_FT_KER_H #define BLIS_L1V_FT_KER_H // // -- Level-1v kernel function types ------------------------------------------- // // addv, copyv, subv #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,_ker,tsuf)) \ ( \ conj_t conjx, \ dim_t n, \ ctype* restrict x, inc_t incx, \ ctype* restrict y, inc_t incy, \ cntx_t* cntx \ ); INSERT_GENTDEF( addv ) INSERT_GENTDEF( copyv ) INSERT_GENTDEF( subv ) // amaxv #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,_ker,tsuf)) \ ( \ dim_t n, \ ctype* restrict x, inc_t incx, \ dim_t* restrict index, \ cntx_t* cntx \ ); INSERT_GENTDEF( amaxv ) // axpbyv #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,_ker,tsuf)) \ ( \ conj_t conjx, \ dim_t n, \ ctype* restrict alpha, \ ctype* restrict x, inc_t incx, \ ctype* restrict beta, \ ctype* restrict y, inc_t incy, \ cntx_t* cntx \ ); INSERT_GENTDEF( axpbyv ) // axpyv, scal2v #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,_ker,tsuf)) \ ( \ conj_t conjx, \ dim_t n, \ ctype* restrict alpha, \ ctype* restrict x, inc_t incx, \ ctype* restrict y, inc_t incy, \ cntx_t* cntx \ ); INSERT_GENTDEF( axpyv ) INSERT_GENTDEF( scal2v ) // dotv #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,_ker,tsuf)) \ ( \ conj_t conjx, \ conj_t conjy, \ dim_t n, \ ctype* restrict x, inc_t incx, \ ctype* restrict y, inc_t incy, \ ctype* restrict rho, \ cntx_t* cntx \ ); INSERT_GENTDEF( dotv ) // dotxv #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,_ker,tsuf)) \ ( \ conj_t conjx, \ conj_t conjy, \ dim_t n, \ ctype* restrict alpha, \ ctype* restrict x, inc_t incx, \ ctype* restrict y, inc_t incy, \ ctype* restrict beta, \ ctype* restrict rho, \ cntx_t* cntx \ ); INSERT_GENTDEF( dotxv ) // invertv #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,_ker,tsuf)) \ ( \ dim_t n, \ ctype* restrict x, inc_t incx, \ cntx_t* cntx \ ); INSERT_GENTDEF( invertv ) // scalv, setv #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,_ker,tsuf)) \ ( \ conj_t conjalpha, \ dim_t n, \ ctype* restrict alpha, \ ctype* restrict x, inc_t incx, \ cntx_t* cntx \ ); INSERT_GENTDEF( scalv ) INSERT_GENTDEF( setv ) // swapv #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,_ker,tsuf)) \ ( \ dim_t n, \ ctype* restrict x, inc_t incx, \ ctype* restrict y, inc_t incy, \ cntx_t* cntx \ ); INSERT_GENTDEF( swapv ) // xpybv #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,_ker,tsuf)) \ ( \ conj_t conjx, \ dim_t n, \ ctype* restrict x, inc_t incx, \ ctype* restrict beta, \ ctype* restrict y, inc_t incy, \ cntx_t* cntx \ ); INSERT_GENTDEF( xpbyv ) #endif blis-0.6.1/frame/1/bli_l1v_ker.h000066400000000000000000000063141360743507500162610ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Define template prototypes for level-1v kernels. // // Note: Instead of defining function prototype macro templates and then // instantiating those macros to define the individual function prototypes, // we simply alias the official operations' prototypes as defined in // bli_l1v_ker_prot.h. #undef GENTPROT #define GENTPROT ADDV_KER_PROT INSERT_GENTPROT_BASIC0( addv_ker_name ) #undef GENTPROT #define GENTPROT AMAXV_KER_PROT INSERT_GENTPROT_BASIC0( amaxv_ker_name ) #undef GENTPROT #define GENTPROT AXPBYV_KER_PROT INSERT_GENTPROT_BASIC0( axpbyv_ker_name ) #undef GENTPROT #define GENTPROT AXPYV_KER_PROT INSERT_GENTPROT_BASIC0( axpyv_ker_name ) #undef GENTPROT #define GENTPROT COPYV_KER_PROT INSERT_GENTPROT_BASIC0( copyv_ker_name ) #undef GENTPROT #define GENTPROT DOTV_KER_PROT INSERT_GENTPROT_BASIC0( dotv_ker_name ) #undef GENTPROT #define GENTPROT DOTXV_KER_PROT INSERT_GENTPROT_BASIC0( dotxv_ker_name ) #undef GENTPROT #define GENTPROT INVERTV_KER_PROT INSERT_GENTPROT_BASIC0( invertv_ker_name ) #undef GENTPROT #define GENTPROT SCALV_KER_PROT INSERT_GENTPROT_BASIC0( scalv_ker_name ) #undef GENTPROT #define GENTPROT SCAL2V_KER_PROT INSERT_GENTPROT_BASIC0( scal2v_ker_name ) #undef GENTPROT #define GENTPROT SETV_KER_PROT INSERT_GENTPROT_BASIC0( setv_ker_name ) #undef GENTPROT #define GENTPROT SUBV_KER_PROT INSERT_GENTPROT_BASIC0( subv_ker_name ) #undef GENTPROT #define GENTPROT SWAPV_KER_PROT INSERT_GENTPROT_BASIC0( swapv_ker_name ) #undef GENTPROT #define GENTPROT XPBYV_KER_PROT INSERT_GENTPROT_BASIC0( xpbyv_ker_name ) blis-0.6.1/frame/1/bli_l1v_ker_prot.h000066400000000000000000000131731360743507500173260ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Define template prototypes for level-1v kernels. // #define ADDV_KER_PROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ conj_t conjx, \ dim_t n, \ ctype* restrict x, inc_t incx, \ ctype* restrict y, inc_t incy, \ cntx_t* restrict cntx \ ); #define AMAXV_KER_PROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ dim_t n, \ ctype* restrict x, inc_t incx, \ dim_t* restrict index, \ cntx_t* restrict cntx \ ); \ #define AXPBYV_KER_PROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ conj_t conjx, \ dim_t n, \ ctype* restrict alpha, \ ctype* restrict x, inc_t incx, \ ctype* restrict beta, \ ctype* restrict y, inc_t incy, \ cntx_t* restrict cntx \ ); \ #define AXPYV_KER_PROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ conj_t conjx, \ dim_t n, \ ctype* restrict alpha, \ ctype* restrict x, inc_t incx, \ ctype* restrict y, inc_t incy, \ cntx_t* restrict cntx \ ); \ #define COPYV_KER_PROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ conj_t conjx, \ dim_t n, \ ctype* restrict x, inc_t incx, \ ctype* restrict y, inc_t incy, \ cntx_t* restrict cntx \ ); #define DOTV_KER_PROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ conj_t conjx, \ conj_t conjy, \ dim_t n, \ ctype* restrict x, inc_t incx, \ ctype* restrict y, inc_t incy, \ ctype* restrict rho, \ cntx_t* restrict cntx \ ); \ #define DOTXV_KER_PROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ conj_t conjx, \ conj_t conjy, \ dim_t n, \ ctype* restrict alpha, \ ctype* restrict x, inc_t incx, \ ctype* restrict y, inc_t incy, \ ctype* restrict beta, \ ctype* restrict rho, \ cntx_t* restrict cntx \ ); \ #define INVERTV_KER_PROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ dim_t n, \ ctype* restrict x, inc_t incx, \ cntx_t* restrict cntx \ ); \ #define SCALV_KER_PROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ conj_t conjalpha, \ dim_t n, \ ctype* restrict alpha, \ ctype* restrict x, inc_t incx, \ cntx_t* restrict cntx \ ); \ #define SCAL2V_KER_PROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ conj_t conjx, \ dim_t n, \ ctype* restrict alpha, \ ctype* restrict x, inc_t incx, \ ctype* restrict y, inc_t incy, \ cntx_t* restrict cntx \ ); \ #define SETV_KER_PROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ conj_t conjalpha, \ dim_t n, \ ctype* restrict alpha, \ ctype* restrict x, inc_t incx, \ cntx_t* restrict cntx \ ); \ #define SUBV_KER_PROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ conj_t conjx, \ dim_t n, \ ctype* restrict x, inc_t incx, \ ctype* restrict y, inc_t incy, \ cntx_t* restrict cntx \ ); #define SWAPV_KER_PROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ dim_t n, \ ctype* restrict x, inc_t incx, \ ctype* restrict y, inc_t incy, \ cntx_t* restrict cntx \ ); \ #define XPBYV_KER_PROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ conj_t conjx, \ dim_t n, \ ctype* restrict x, inc_t incx, \ ctype* restrict beta, \ ctype* restrict y, inc_t incy, \ cntx_t* restrict cntx \ ); \ blis-0.6.1/frame/1/bli_l1v_oapi.c000066400000000000000000000340111360743507500164160ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // Guard the function definitions so that they are only compiled when // #included from files that define the object API macros. #ifdef BLIS_ENABLE_OAPI // // Define object-based interfaces. // #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* x, \ obj_t* y \ BLIS_OAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_OAPI_EX_DECLS \ \ num_t dt = bli_obj_dt( x ); \ \ conj_t conjx = bli_obj_conj_status( x ); \ dim_t n = bli_obj_vector_dim( x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \ inc_t inc_x = bli_obj_vector_inc( x ); \ void* buf_y = bli_obj_buffer_at_off( y ); \ inc_t inc_y = bli_obj_vector_inc( y ); \ \ if ( bli_error_checking_is_enabled() ) \ PASTEMAC(opname,_check)( x, y ); \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) f = \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \ \ f \ ( \ conjx, \ n, \ buf_x, inc_x, \ buf_y, inc_y, \ cntx, \ rntm \ ); \ } GENFRONT( addv ) GENFRONT( copyv ) GENFRONT( subv ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* x, \ obj_t* index \ BLIS_OAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_OAPI_EX_DECLS \ \ num_t dt = bli_obj_dt( x ); \ \ dim_t n = bli_obj_vector_dim( x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \ inc_t incx = bli_obj_vector_inc( x ); \ \ void* buf_index = bli_obj_buffer_at_off( index ); \ \ if ( bli_error_checking_is_enabled() ) \ PASTEMAC(opname,_check)( x, index ); \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) f = \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \ \ f \ ( \ n, \ buf_x, incx, \ buf_index, \ cntx, \ rntm \ ); \ } GENFRONT( amaxv ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* alpha, \ obj_t* x, \ obj_t* beta, \ obj_t* y \ BLIS_OAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_OAPI_EX_DECLS \ \ num_t dt = bli_obj_dt( x ); \ \ conj_t conjx = bli_obj_conj_status( x ); \ dim_t n = bli_obj_vector_dim( x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \ inc_t inc_x = bli_obj_vector_inc( x ); \ void* buf_y = bli_obj_buffer_at_off( y ); \ inc_t inc_y = bli_obj_vector_inc( y ); \ \ void* buf_alpha; \ void* buf_beta; \ \ obj_t alpha_local; \ obj_t beta_local; \ \ if ( bli_error_checking_is_enabled() ) \ PASTEMAC(opname,_check)( alpha, x, beta, y ); \ \ /* Create local copy-casts of scalars (and apply internal conjugation as needed). */ \ bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ alpha, &alpha_local ); \ bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ beta, &beta_local ); \ buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \ buf_beta = bli_obj_buffer_for_1x1( dt, &beta_local ); \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) f = \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \ \ f \ ( \ conjx, \ n, \ buf_alpha, \ buf_x, inc_x, \ buf_beta, \ buf_y, inc_y, \ cntx, \ rntm \ ); \ } GENFRONT( axpbyv ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* alpha, \ obj_t* x, \ obj_t* y \ BLIS_OAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_OAPI_EX_DECLS \ \ num_t dt = bli_obj_dt( x ); \ \ conj_t conjx = bli_obj_conj_status( x ); \ dim_t n = bli_obj_vector_dim( x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \ inc_t inc_x = bli_obj_vector_inc( x ); \ void* buf_y = bli_obj_buffer_at_off( y ); \ inc_t inc_y = bli_obj_vector_inc( y ); \ \ void* buf_alpha; \ \ obj_t alpha_local; \ \ if ( bli_error_checking_is_enabled() ) \ PASTEMAC(opname,_check)( alpha, x, y ); \ \ /* Create local copy-casts of scalars (and apply internal conjugation as needed). */ \ bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ alpha, &alpha_local ); \ buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) f = \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \ \ f \ ( \ conjx, \ n, \ buf_alpha, \ buf_x, inc_x, \ buf_y, inc_y, \ cntx, \ rntm \ ); \ } GENFRONT( axpyv ) GENFRONT( scal2v ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* x, \ obj_t* y, \ obj_t* rho \ BLIS_OAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_OAPI_EX_DECLS \ \ num_t dt = bli_obj_dt( x ); \ \ conj_t conjx = bli_obj_conj_status( x ); \ conj_t conjy = bli_obj_conj_status( y ); \ dim_t n = bli_obj_vector_dim( x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \ inc_t inc_x = bli_obj_vector_inc( x ); \ void* buf_y = bli_obj_buffer_at_off( y ); \ inc_t inc_y = bli_obj_vector_inc( y ); \ void* buf_rho = bli_obj_buffer_at_off( rho ); \ \ if ( bli_error_checking_is_enabled() ) \ PASTEMAC(opname,_check)( x, y, rho ); \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) f = \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \ \ f \ ( \ conjx, \ conjy, \ n, \ buf_x, inc_x, \ buf_y, inc_y, \ buf_rho, \ cntx, \ rntm \ ); \ } GENFRONT( dotv ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* alpha, \ obj_t* x, \ obj_t* y, \ obj_t* beta, \ obj_t* rho \ BLIS_OAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_OAPI_EX_DECLS \ \ num_t dt = bli_obj_dt( x ); \ \ conj_t conjx = bli_obj_conj_status( x ); \ conj_t conjy = bli_obj_conj_status( y ); \ dim_t n = bli_obj_vector_dim( x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \ inc_t inc_x = bli_obj_vector_inc( x ); \ void* buf_y = bli_obj_buffer_at_off( y ); \ inc_t inc_y = bli_obj_vector_inc( y ); \ void* buf_rho = bli_obj_buffer_at_off( rho ); \ \ void* buf_alpha; \ void* buf_beta; \ \ obj_t alpha_local; \ obj_t beta_local; \ \ if ( bli_error_checking_is_enabled() ) \ PASTEMAC(opname,_check)( alpha, x, y, beta, rho ); \ \ /* Create local copy-casts of scalars (and apply internal conjugation as needed). */ \ bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ alpha, &alpha_local ); \ bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ beta, &beta_local ); \ buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \ buf_beta = bli_obj_buffer_for_1x1( dt, &beta_local ); \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) f = \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \ \ f \ ( \ conjx, \ conjy, \ n, \ buf_alpha, \ buf_x, inc_x, \ buf_y, inc_y, \ buf_beta, \ buf_rho, \ cntx, \ rntm \ ); \ } GENFRONT( dotxv ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* x \ BLIS_OAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_OAPI_EX_DECLS \ \ num_t dt = bli_obj_dt( x ); \ \ dim_t n = bli_obj_vector_dim( x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \ inc_t inc_x = bli_obj_vector_inc( x ); \ \ if ( bli_error_checking_is_enabled() ) \ PASTEMAC(opname,_check)( x ); \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) f = \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \ \ f \ ( \ n, \ buf_x, inc_x, \ cntx, \ rntm \ ); \ } GENFRONT( invertv ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* alpha, \ obj_t* x \ BLIS_OAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_OAPI_EX_DECLS \ \ num_t dt = bli_obj_dt( x ); \ \ /* conj_t conjalpha = bli_obj_conj_status( alpha ); */ \ dim_t n = bli_obj_vector_dim( x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \ inc_t inc_x = bli_obj_vector_inc( x ); \ \ void* buf_alpha; \ \ obj_t alpha_local; \ \ if ( bli_error_checking_is_enabled() ) \ PASTEMAC(opname,_check)( alpha, x ); \ \ /* Create local copy-casts of scalars (and apply internal conjugation as needed). */ \ bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ alpha, &alpha_local ); \ buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) f = \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \ \ f \ ( \ BLIS_NO_CONJUGATE, /* internal conjugation applied during copy-cast. */ \ n, \ buf_alpha, \ buf_x, inc_x, \ cntx, \ rntm \ ); \ } GENFRONT( scalv ) GENFRONT( setv ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* x, \ obj_t* y \ BLIS_OAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_OAPI_EX_DECLS \ \ num_t dt = bli_obj_dt( x ); \ \ dim_t n = bli_obj_vector_dim( x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \ inc_t inc_x = bli_obj_vector_inc( x ); \ void* buf_y = bli_obj_buffer_at_off( y ); \ inc_t inc_y = bli_obj_vector_inc( y ); \ \ if ( bli_error_checking_is_enabled() ) \ PASTEMAC(opname,_check)( x, y ); \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) f = \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \ \ f \ ( \ n, \ buf_x, inc_x, \ buf_y, inc_y, \ cntx, \ rntm \ ); \ } GENFRONT( swapv ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* x, \ obj_t* beta, \ obj_t* y \ BLIS_OAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_OAPI_EX_DECLS \ \ num_t dt = bli_obj_dt( x ); \ \ conj_t conjx = bli_obj_conj_status( x ); \ dim_t n = bli_obj_vector_dim( x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \ inc_t inc_x = bli_obj_vector_inc( x ); \ void* buf_y = bli_obj_buffer_at_off( y ); \ inc_t inc_y = bli_obj_vector_inc( y ); \ \ void* buf_beta; \ \ obj_t beta_local; \ \ if ( bli_error_checking_is_enabled() ) \ PASTEMAC(opname,_check)( x, beta, y ); \ \ /* Create local copy-casts of scalars (and apply internal conjugation as needed). */ \ bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ beta, &beta_local ); \ buf_beta = bli_obj_buffer_for_1x1( dt, &beta_local ); \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) f = \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \ \ f \ ( \ conjx, \ n, \ buf_x, inc_x, \ buf_beta, \ buf_y, inc_y, \ cntx, \ rntm \ ); \ } GENFRONT( xpbyv ) #endif blis-0.6.1/frame/1/bli_l1v_oapi.h000066400000000000000000000076521360743507500164360ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype object-based interfaces. // #undef GENTPROT #define GENTPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* x, \ obj_t* y \ BLIS_OAPI_EX_PARAMS \ ); GENTPROT( addv ) GENTPROT( copyv ) GENTPROT( subv ) #undef GENTPROT #define GENTPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* x, \ obj_t* index \ BLIS_OAPI_EX_PARAMS \ ); GENTPROT( amaxv ) #undef GENTPROT #define GENTPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* alpha, \ obj_t* x, \ obj_t* beta, \ obj_t* y \ BLIS_OAPI_EX_PARAMS \ ); GENTPROT( axpbyv ) #undef GENTPROT #define GENTPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* alpha, \ obj_t* x, \ obj_t* y \ BLIS_OAPI_EX_PARAMS \ ); GENTPROT( axpyv ) GENTPROT( scal2v ) #undef GENTPROT #define GENTPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* x, \ obj_t* y, \ obj_t* rho \ BLIS_OAPI_EX_PARAMS \ ); GENTPROT( dotv ) #undef GENTPROT #define GENTPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* alpha, \ obj_t* x, \ obj_t* y, \ obj_t* beta, \ obj_t* rho \ BLIS_OAPI_EX_PARAMS \ ); GENTPROT( dotxv ) #undef GENTPROT #define GENTPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* x \ BLIS_OAPI_EX_PARAMS \ ); GENTPROT( invertv ) #undef GENTPROT #define GENTPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* alpha, \ obj_t* x \ BLIS_OAPI_EX_PARAMS \ ); GENTPROT( scalv ) GENTPROT( setv ) #undef GENTPROT #define GENTPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* x, \ obj_t* y \ BLIS_OAPI_EX_PARAMS \ ); GENTPROT( swapv ) #undef GENTPROT #define GENTPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* x, \ obj_t* beta, \ obj_t* y \ BLIS_OAPI_EX_PARAMS \ ); GENTPROT( xpbyv ) blis-0.6.1/frame/1/bli_l1v_oapi_ba.c000066400000000000000000000036701360743507500170670ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // Include cpp macros that instantiate the API definition templates as // omitting expert parameters. #include "bli_oapi_ba.h" // Define the macro protecting the object API definitions. #define BLIS_ENABLE_OAPI // Include the object API definitions here. #include "bli_l1v_oapi.c" blis-0.6.1/frame/1/bli_l1v_oapi_ex.c000066400000000000000000000036661360743507500171260ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // Include cpp macros that instantiate the API definition templates as // having expert parameters. #include "bli_oapi_ex.h" // Define the macro protecting the object API definitions. #define BLIS_ENABLE_OAPI // Include the object API definitions here. #include "bli_l1v_oapi.c" blis-0.6.1/frame/1/bli_l1v_tapi.c000066400000000000000000000214661360743507500164350ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // Guard the function definitions so that they are only compiled when // #included from files that define the typed API macros. #ifdef BLIS_ENABLE_TAPI // // Define BLAS-like interfaces with typed operands. // #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, kerid ) \ \ void PASTEMAC2(ch,opname,EX_SUF) \ ( \ conj_t conjx, \ dim_t n, \ ctype* x, inc_t incx, \ ctype* y, inc_t incy \ BLIS_TAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_TAPI_EX_DECLS \ \ const num_t dt = PASTEMAC(ch,type); \ \ /* Obtain a valid context from the gks if necessary. */ \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ \ PASTECH2(ch,opname,_ker_ft) f = bli_cntx_get_l1v_ker_dt( dt, kerid, cntx ); \ \ f \ ( \ conjx, \ n, \ x, incx, \ y, incy, \ cntx \ ); \ } INSERT_GENTFUNC_BASIC( addv, BLIS_ADDV_KER ) INSERT_GENTFUNC_BASIC( copyv, BLIS_COPYV_KER ) INSERT_GENTFUNC_BASIC( subv, BLIS_SUBV_KER ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, kerid ) \ \ void PASTEMAC2(ch,opname,EX_SUF) \ ( \ dim_t n, \ ctype* x, inc_t incx, \ dim_t* index \ BLIS_TAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_TAPI_EX_DECLS \ \ const num_t dt = PASTEMAC(ch,type); \ \ /* Obtain a valid context from the gks if necessary. */ \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ \ PASTECH2(ch,opname,_ker_ft) f = bli_cntx_get_l1v_ker_dt( dt, kerid, cntx ); \ \ f \ ( \ n, \ x, incx, \ index, \ cntx \ ); \ } INSERT_GENTFUNC_BASIC( amaxv, BLIS_AMAXV_KER ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, kerid ) \ \ void PASTEMAC2(ch,opname,EX_SUF) \ ( \ conj_t conjx, \ dim_t n, \ ctype* alpha, \ ctype* x, inc_t incx, \ ctype* beta, \ ctype* y, inc_t incy \ BLIS_TAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_TAPI_EX_DECLS \ \ const num_t dt = PASTEMAC(ch,type); \ \ /* Obtain a valid context from the gks if necessary. */ \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ \ PASTECH2(ch,opname,_ker_ft) f = bli_cntx_get_l1v_ker_dt( dt, kerid, cntx ); \ \ f \ ( \ conjx, \ n, \ alpha, \ x, incx, \ beta, \ y, incy, \ cntx \ ); \ } INSERT_GENTFUNC_BASIC( axpbyv, BLIS_AXPBYV_KER ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, kerid ) \ \ void PASTEMAC2(ch,opname,EX_SUF) \ ( \ conj_t conjx, \ dim_t n, \ ctype* alpha, \ ctype* x, inc_t incx, \ ctype* y, inc_t incy \ BLIS_TAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_TAPI_EX_DECLS \ \ const num_t dt = PASTEMAC(ch,type); \ \ /* Obtain a valid context from the gks if necessary. */ \ if ( cntx == NULL ) \ cntx = bli_gks_query_cntx(); \ \ PASTECH2(ch,opname,_ker_ft) f = bli_cntx_get_l1v_ker_dt( dt, kerid, cntx ); \ \ f \ ( \ conjx, \ n, \ alpha, \ x, incx, \ y, incy, \ cntx \ ); \ } INSERT_GENTFUNC_BASIC( axpyv, BLIS_AXPYV_KER ) INSERT_GENTFUNC_BASIC( scal2v, BLIS_SCAL2V_KER ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, kerid ) \ \ void PASTEMAC2(ch,opname,EX_SUF) \ ( \ conj_t conjx, \ conj_t conjy, \ dim_t n, \ ctype* x, inc_t incx, \ ctype* y, inc_t incy, \ ctype* rho \ BLIS_TAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_TAPI_EX_DECLS \ \ const num_t dt = PASTEMAC(ch,type); \ \ /* Obtain a valid context from the gks if necessary. */ \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ \ PASTECH2(ch,opname,_ker_ft) f = bli_cntx_get_l1v_ker_dt( dt, kerid, cntx ); \ \ f \ ( \ conjx, \ conjy, \ n, \ x, incx, \ y, incy, \ rho, \ cntx \ ); \ } INSERT_GENTFUNC_BASIC( dotv, BLIS_DOTV_KER ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, kerid ) \ \ void PASTEMAC2(ch,opname,EX_SUF) \ ( \ conj_t conjx, \ conj_t conjy, \ dim_t n, \ ctype* alpha, \ ctype* x, inc_t incx, \ ctype* y, inc_t incy, \ ctype* beta, \ ctype* rho \ BLIS_TAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_TAPI_EX_DECLS \ \ const num_t dt = PASTEMAC(ch,type); \ \ /* Obtain a valid context from the gks if necessary. */ \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ \ PASTECH2(ch,opname,_ker_ft) f = bli_cntx_get_l1v_ker_dt( dt, kerid, cntx ); \ \ f \ ( \ conjx, \ conjy, \ n, \ alpha, \ x, incx, \ y, incy, \ beta, \ rho, \ cntx \ ); \ } INSERT_GENTFUNC_BASIC( dotxv, BLIS_DOTXV_KER ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, kerid ) \ \ void PASTEMAC2(ch,opname,EX_SUF) \ ( \ dim_t n, \ ctype* x, inc_t incx \ BLIS_TAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_TAPI_EX_DECLS \ \ const num_t dt = PASTEMAC(ch,type); \ \ /* Obtain a valid context from the gks if necessary. */ \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ \ PASTECH2(ch,opname,_ker_ft) f = bli_cntx_get_l1v_ker_dt( dt, kerid, cntx ); \ \ f \ ( \ n, \ x, incx, \ cntx \ ); \ } INSERT_GENTFUNC_BASIC( invertv, BLIS_INVERTV_KER ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, kerid ) \ \ void PASTEMAC2(ch,opname,EX_SUF) \ ( \ conj_t conjalpha, \ dim_t n, \ ctype* alpha, \ ctype* x, inc_t incx \ BLIS_TAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_TAPI_EX_DECLS \ \ const num_t dt = PASTEMAC(ch,type); \ \ /* Obtain a valid context from the gks if necessary. */ \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ \ PASTECH2(ch,opname,_ker_ft) f = bli_cntx_get_l1v_ker_dt( dt, kerid, cntx ); \ \ f \ ( \ conjalpha, \ n, \ alpha, \ x, incx, \ cntx \ ); \ } INSERT_GENTFUNC_BASIC( scalv, BLIS_SCALV_KER ) INSERT_GENTFUNC_BASIC( setv, BLIS_SETV_KER ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, kerid ) \ \ void PASTEMAC2(ch,opname,EX_SUF) \ ( \ dim_t n, \ ctype* x, inc_t incx, \ ctype* y, inc_t incy \ BLIS_TAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_TAPI_EX_DECLS \ \ const num_t dt = PASTEMAC(ch,type); \ \ /* Obtain a valid context from the gks if necessary. */ \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ \ PASTECH2(ch,opname,_ker_ft) f = bli_cntx_get_l1v_ker_dt( dt, kerid, cntx ); \ \ f \ ( \ n, \ x, incx, \ y, incy, \ cntx \ ); \ } INSERT_GENTFUNC_BASIC( swapv, BLIS_SWAPV_KER ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, kerid ) \ \ void PASTEMAC2(ch,opname,EX_SUF) \ ( \ conj_t conjx, \ dim_t n, \ ctype* x, inc_t incx, \ ctype* beta, \ ctype* y, inc_t incy \ BLIS_TAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_TAPI_EX_DECLS \ \ const num_t dt = PASTEMAC(ch,type); \ \ /* Obtain a valid context from the gks if necessary. */ \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ \ PASTECH2(ch,opname,_ker_ft) f = bli_cntx_get_l1v_ker_dt( dt, kerid, cntx ); \ \ f \ ( \ conjx, \ n, \ x, incx, \ beta, \ y, incy, \ cntx \ ); \ } INSERT_GENTFUNC_BASIC( xpbyv, BLIS_XPBYV_KER ) #endif blis-0.6.1/frame/1/bli_l1v_tapi.h000066400000000000000000000116261360743507500164370ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-like interfaces with typed operands. // #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \ ( \ conj_t conjx, \ dim_t n, \ ctype* x, inc_t incx, \ ctype* y, inc_t incy \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTPROT_BASIC0( addv ) INSERT_GENTPROT_BASIC0( copyv ) INSERT_GENTPROT_BASIC0( subv ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \ ( \ dim_t n, \ ctype* x, inc_t incx, \ dim_t* index \ BLIS_TAPI_EX_PARAMS \ ); \ INSERT_GENTPROT_BASIC0( amaxv ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \ ( \ conj_t conjx, \ dim_t n, \ ctype* alpha, \ ctype* x, inc_t incx, \ ctype* beta, \ ctype* y, inc_t incy \ BLIS_TAPI_EX_PARAMS \ ); \ INSERT_GENTPROT_BASIC0( axpbyv ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \ ( \ conj_t conjx, \ dim_t n, \ ctype* alpha, \ ctype* x, inc_t incx, \ ctype* y, inc_t incy \ BLIS_TAPI_EX_PARAMS \ ); \ INSERT_GENTPROT_BASIC0( axpyv ) INSERT_GENTPROT_BASIC0( scal2v ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \ ( \ conj_t conjx, \ conj_t conjy, \ dim_t n, \ ctype* x, inc_t incx, \ ctype* y, inc_t incy, \ ctype* rho \ BLIS_TAPI_EX_PARAMS \ ); \ INSERT_GENTPROT_BASIC0( dotv ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \ ( \ conj_t conjx, \ conj_t conjy, \ dim_t n, \ ctype* alpha, \ ctype* x, inc_t incx, \ ctype* y, inc_t incy, \ ctype* beta, \ ctype* rho \ BLIS_TAPI_EX_PARAMS \ ); \ INSERT_GENTPROT_BASIC0( dotxv ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \ ( \ dim_t n, \ ctype* x, inc_t incx \ BLIS_TAPI_EX_PARAMS \ ); \ INSERT_GENTPROT_BASIC0( invertv ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \ ( \ conj_t conjalpha, \ dim_t n, \ ctype* alpha, \ ctype* x, inc_t incx \ BLIS_TAPI_EX_PARAMS \ ); \ INSERT_GENTPROT_BASIC0( scalv ) INSERT_GENTPROT_BASIC0( setv ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \ ( \ dim_t n, \ ctype* x, inc_t incx, \ ctype* y, inc_t incy \ BLIS_TAPI_EX_PARAMS \ ); \ INSERT_GENTPROT_BASIC0( swapv ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \ ( \ conj_t conjx, \ dim_t n, \ ctype* x, inc_t incx, \ ctype* beta, \ ctype* y, inc_t incy \ BLIS_TAPI_EX_PARAMS \ ); \ INSERT_GENTPROT_BASIC0( xpbyv ) blis-0.6.1/frame/1/bli_l1v_tapi_ba.c000066400000000000000000000036661360743507500171010ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // Include cpp macros that instantiate the API definition templates as // omitting expert parameters. #include "bli_tapi_ba.h" // Define the macro protecting the typed API definitions. #define BLIS_ENABLE_TAPI // Include the typed API definitions here. #include "bli_l1v_tapi.c" blis-0.6.1/frame/1/bli_l1v_tapi_ex.c000066400000000000000000000036641360743507500171310ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // Include cpp macros that instantiate the API definition templates as // having expert parameters. #include "bli_tapi_ex.h" // Define the macro protecting the typed API definitions. #define BLIS_ENABLE_TAPI // Include the typed API definitions here. #include "bli_l1v_tapi.c" blis-0.6.1/frame/1/other/000077500000000000000000000000001360743507500150345ustar00rootroot00000000000000blis-0.6.1/frame/1/other/packv/000077500000000000000000000000001360743507500161405ustar00rootroot00000000000000blis-0.6.1/frame/1/other/packv/bli_packv.c000066400000000000000000000032341360743507500202400ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" blis-0.6.1/frame/1/other/packv/bli_packv.h000066400000000000000000000034331360743507500202460ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "bli_packv_cntl.h" #include "bli_packv_check.h" #include "bli_packv_init.h" #include "bli_packv_int.h" #include "bli_packv_unb_var1.h" blis-0.6.1/frame/1/other/packv/bli_packv_check.c000066400000000000000000000037711360743507500214030ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_packv_check ( obj_t* c, obj_t* p, cntx_t* cntx ) { err_t e_val; // Check object datatypes. e_val = bli_check_floating_object( c ); bli_check_error_code( e_val ); // Check object dimensions. // We don't check for conformal dimensions between c and p because // p has not yet been initialized. } blis-0.6.1/frame/1/other/packv/bli_packv_check.h000066400000000000000000000033451360743507500214050ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_packv_check ( obj_t* c, obj_t* p, cntx_t* cntx ); blis-0.6.1/frame/1/other/packv/bli_packv_cntl.c000066400000000000000000000050561360743507500212640ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" cntl_t* bli_packv_cntl_obj_create ( void_fp var_func, void_fp packv_var_func, bszid_t bmid, pack_t pack_schema, cntl_t* sub_node ) { cntl_t* cntl; packv_params_t* params; // Allocate a packv_params_t struct. params = bli_malloc_intl( sizeof( packv_params_t ) ); // Initialize the packv_params_t struct. params->size = sizeof( packv_params_t ); params->packv_var_func = packv_var_func; params->bmid = bmid; params->pack_schema = pack_schema; // It's important that we set the bszid field to BLIS_NO_PART to indicate // that no blocksize partitioning is performed. bli_cntl_free() will rely // on this information to know how to step through the thrinfo_t tree in // sync with the cntl_t tree. cntl = bli_cntl_create_node ( BLIS_NO_PART, var_func, params, sub_node ); return cntl; } blis-0.6.1/frame/1/other/packv/bli_packv_cntl.h000066400000000000000000000045641360743507500212740ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ struct packv_params_s { uint64_t size packv_var_oft* var_func; bszid_t bmid; pack_t pack_schema; }; typedef struct packv_params_s packv_params_t; #define bli_cntl_packv_params_var_func( cntl ) \ \ ( (packv_params_t*)( cntl->params )->var_func ) #define bli_cntl_packv_params_bmid( cntl ) \ \ ( (packv_params_t*)( cntl->params )->bmid_m ) #define bli_cntl_packv_params_pack_schema( cntl ) \ \ ( (packv_params_t*)( cntl->params )->pack_schema ) // ----------------------------------------------------------------------------- cntl_t* bli_packv_cntl_obj_create ( void_fp var_func, void_fp packv_var_func, bszid_t bmid, pack_t pack_schema, cntl_t* sub_node ); blis-0.6.1/frame/1/other/packv/bli_packv_init.c000066400000000000000000000147231360743507500212700ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2016, Hewlett Packard Enterprise Development LP Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_packv_init ( obj_t* a, obj_t* p, cntx_t* cntx, packv_t* cntl ) { // The purpose of packm_init() is to initialize an object P so that // a source object A can be packed into P via one of the packv // implementations. This initialization includes acquiring a suitable // block of memory from the memory allocator, if such a block of memory // has not already been allocated previously. pack_t pack_schema; bszid_t bmult_id; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_packv_check( a, p, cntx ); // First check if we are to skip this operation because the control tree // is NULL, and if so, simply alias the object to its packed counterpart. if ( bli_cntl_is_noop( cntl ) ) { bli_obj_alias_to( a, p ); return; } // At this point, we can be assured that cntl is not NULL. Let us now // check to see if the object has already been packed to the desired // schema (as encoded in the control tree). If so, we can alias and // return, as above. // Note that in most cases, bli_obj_pack_schema() will return // BLIS_NOT_PACKED and thus packing will be called for (but in some // cases packing has already taken place). Also, not all combinations // of current pack status and desired pack schema are valid. if ( bli_obj_pack_schema( a ) == cntl_pack_schema( cntl ) ) { bli_obj_alias_to( a, p ); return; } // Now, if we are not skipping the pack operation, then the only question // left is whether we are to typecast vector a before packing. if ( bli_obj_dt( a ) != bli_obj_target_dt( a ) ) bli_abort(); // Extract various fields from the control tree and pass them in // explicitly into _init_pack(). This allows external code generators // the option of bypassing usage of control trees altogether. pack_schema = cntl_pack_schema( cntl ); bmult_id = cntl_bmid( cntl ); // Initialize object p for the final packed vector. bli_packv_init_pack ( pack_schema, bmult_id, &a, p, cntx ); // Now p is ready to be packed. } siz_t bli_packv_init_pack ( pack_t schema, bszid_t bmult_id, obj_t* a, obj_t* p, cntx_t* cntx ) { num_t dt = bli_obj_dt( a ); dim_t dim_a = bli_obj_vector_dim( a ); dim_t bmult = bli_cntx_get_blksz_def_dt( dt, bmult_id, cntx ); membrk_t* membrk = bli_cntx_membrk( cntx ); #if 0 mem_t* mem_p; #endif dim_t m_p_pad; siz_t size_p; inc_t rs_p, cs_p; void* buf; // We begin by copying the basic fields of c. bli_obj_alias_to( a, p ); // Update the dimensions. bli_obj_set_dims( dim_a, 1, p ); // Reset the view offsets to (0,0). bli_obj_set_offs( 0, 0, p ); // Set the pack schema in the p object to the value in the control tree // node. bli_obj_set_pack_schema( schema, p ); // Compute the dimensions padded by the dimension multiples. m_p_pad = bli_align_dim_to_mult( bli_obj_vector_dim( p ), bmult ); // Compute the size of the packed buffer. size_p = m_p_pad * 1 * bli_obj_elem_size( p ); #if 0 // Extract the address of the mem_t object within p that will track // properties of the packed buffer. mem_p = bli_obj_pack_mem( *p ); if ( bli_mem_is_unalloc( mem_p ) ) { // If the mem_t object of p has not yet been allocated, then acquire // a memory block suitable for a vector. bli_membrk_acquire_v( membrk, size_p, mem_p ); } else { // If the mem_t object has already been allocated, then release and // re-acquire the memory so there is sufficient space. if ( bli_mem_size( mem_p ) < size_p ) { bli_membrk_release( mem_p ); bli_membrk_acquire_v( membrk, size_p, mem_p ); } } // Grab the buffer address from the mem_t object and copy it to the // main object buffer field. (Sometimes this buffer address will be // copied when the value is already up-to-date, because it persists // in the main object buffer field across loop iterations.) buf = bli_mem_buffer( mem_p ); bli_obj_set_buffer( buf, p ); #endif // Save the padded (packed) dimensions into the packed object. bli_obj_set_padded_dims( m_p_pad, 1, p ); // Set the row and column strides of p based on the pack schema. if ( schema == BLIS_PACKED_VECTOR ) { // Set the strides to reflect a column-stored vector. Note that the // column stride may never be used, and is only useful to determine // how much space beyond the vector would need to be zero-padded, if // zero-padding was needed. rs_p = 1; cs_p = bli_obj_padded_length( p ); bli_obj_set_strides( rs_p, cs_p, p ); } return size_p; } #if 0 void bli_packv_release ( obj_t* p, packv_t* cntl ) { if ( !bli_cntl_is_noop( cntl ) ) bli_obj_release_pack( p ); } #endif blis-0.6.1/frame/1/other/packv/bli_packv_init.h000066400000000000000000000036321360743507500212720ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_packv_init ( obj_t* a, obj_t* p, cntx_t* cntx, packv_t* cntl ); siz_t bli_packv_init_pack ( pack_t pack_schema, bszid_t bmult_id, obj_t* a, obj_t* p, cntx_t* cntx ); blis-0.6.1/frame/1/other/packv/bli_packv_int.c000066400000000000000000000104341360743507500211120ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T packv_fp typedef void (*FUNCPTR_T)( obj_t* a, obj_t* p, cntx_t* cntx, packv_t* cntl ); static FUNCPTR_T vars[1][3] = { // unblocked optimized unblocked blocked { bli_packv_unb_var1, NULL, NULL } }; void bli_packv_int ( obj_t* a, obj_t* p, cntx_t* cntx, cntl_t* cntl ) { #if 0 varnum_t n; impl_t i; #endif packv_var_oft f; // !!! // DEFINE packv_var_oft type. // !!! // Check parameters. if ( bli_error_checking_is_enabled() ) bli_packv_check( a, p, cntx ); // Sanity check; A should never have a zero dimension. If we must support // it, then we should fold it into the next alias-and-early-exit block. //if ( bli_obj_has_zero_dim( a ) ) bli_abort(); // First check if we are to skip this operation because the control tree // is NULL. We return without taking any action because a was already // aliased to p in packv_init(). if ( bli_cntl_is_noop( cntl ) ) { return; } // Let us now check to see if the object has already been packed. First // we check if it has been packed to an unspecified (row or column) // format, in which case we can return, since by now aliasing has already // taken place in packv_init(). // NOTE: The reason we don't need to even look at the control tree in // this case is as follows: an object's pack status is only set to // BLIS_PACKED_UNSPEC for situations when the actual format used is // not important, as long as its packed into contiguous rows or // contiguous columns. A good example of this is packing for matrix // operands in the level-2 operations. if ( bli_obj_pack_schema( a ) == BLIS_PACKED_UNSPEC ) { return; } // At this point, we can be assured that cntl is not NULL. Now we check // if the object has already been packed to the desired schema (as en- // coded in the control tree). If so, we can return, as above. // NOTE: In most cases, an object's pack status will be BLIS_NOT_PACKED // and thus packing will be called for (but in some cases packing has // already taken place, or does not need to take place, and so that will // be indicated by the pack status). Also, not all combinations of // current pack status and desired pack schema are valid. if ( bli_obj_pack_schema( a ) == cntl_pack_schema( cntl ) ) { return; } // Extract the variant number and implementation type. n = bli_cntl_var_num( cntl ); i = bli_cntl_impl_type( cntl ); // Index into the variant array to extract the correct function pointer. f = vars[n][i]; // Invoke the variant. f( a, p, cntx, cntl ); } blis-0.6.1/frame/1/other/packv/bli_packv_int.h000066400000000000000000000033751360743507500211250ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_packv_int ( obj_t* c, obj_t* p, cntx_t* cntx, packv_t* cntl ); blis-0.6.1/frame/1/other/packv/bli_packv_unb_var1.c000066400000000000000000000061171360743507500220400ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T packv_fp typedef void (*FUNCPTR_T)( dim_t m, void* c, inc_t incc, void* p, inc_t incp, cntx_t* cntx ); static FUNCPTR_T GENARRAY(ftypes,packv_unb_var1); void bli_packv_unb_var1( obj_t* c, obj_t* p, cntx_t* cntx, packv_t* cntl ) { num_t dt_cp = bli_obj_dt( c ); dim_t dim_p = bli_obj_vector_dim( p ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t incc = bli_obj_vector_inc( c ); void* buf_p = bli_obj_buffer_at_off( p ); inc_t incp = bli_obj_vector_inc( p ); FUNCPTR_T f; // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_cp]; // Invoke the function. f ( dim_p, buf_c, incc, buf_p, incp, cntx ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ dim_t m, \ void* c, inc_t incc, \ void* p, inc_t incp, \ cntx_t* cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ PASTECH(ch,copyv_ft) copyv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_COPYV_KER, cntx ); \ \ copyv_p \ ( \ BLIS_NO_CONJUGATE, \ m, \ c, incc, \ p, incp, \ cntx \ ); \ } INSERT_GENTFUNC_BASIC0( packv_unb_var1 ) blis-0.6.1/frame/1/other/packv/bli_packv_unb_var1.h000066400000000000000000000040441360743507500220420ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_packv_unb_var1( obj_t* c, obj_t* p, cntx_t* cntx, packv_t* cntl ); #undef GENTPROT #define GENTPROT( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ dim_t m, \ void* c, inc_t incc, \ void* p, inc_t incp, \ cntx_t* cntx \ ); INSERT_GENTPROT_BASIC( packv_unb_var1 ) blis-0.6.1/frame/1/other/scalv/000077500000000000000000000000001360743507500161445ustar00rootroot00000000000000blis-0.6.1/frame/1/other/scalv/bli_scalv_cntl.c000066400000000000000000000045651360743507500213000ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" scalv_t* scalv_cntl = NULL; void bli_scalv_cntl_init() { scalv_cntl = bli_scalv_cntl_obj_create( BLIS_UNBLOCKED, BLIS_VARIANT1 ); } void bli_scalv_cntl_finalize() { bli_cntl_free_node( scalv_cntl ); } scalv_t* bli_scalv_cntl_obj_create( impl_t impl_type, varnum_t var_num ) { scalv_t* cntl; cntl = ( scalv_t* ) bli_malloc_intl( sizeof(scalv_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; return cntl; } void bli_scalv_cntl_obj_init( scalv_t* cntl, impl_t impl_type, varnum_t var_num ) { cntl->impl_type = impl_type; cntl->var_num = var_num; } blis-0.6.1/frame/1/other/scalv/bli_scalv_cntl.h000066400000000000000000000042131360743507500212730ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ struct scalv_s { impl_t impl_type; varnum_t var_num; }; typedef struct scalv_s scalv_t; #define bli_cntl_sub_scalv( cntl ) cntl->sub_scalv void bli_scalv_cntl_init( void ); void bli_scalv_cntl_finalize( void ); scalv_t* bli_scalv_cntl_obj_create( impl_t impl_type, varnum_t var_num ); void bli_scalv_cntl_obj_init( scalv_t* cntl, impl_t impl_type, varnum_t var_num ); blis-0.6.1/frame/1/other/scalv/bli_scalv_int.c000066400000000000000000000053741360743507500211310ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" typedef void (*FUNCPTR_T)( obj_t* alpha, obj_t* x, cntx_t* cntx ); static FUNCPTR_T vars[1][3] = { // unblocked optimized unblocked blocked { bli_scalv_ex, bli_scalv_ex, NULL } }; void bli_scalv_int( obj_t* alpha, obj_t* x, cntx_t* cntx, scalv_t* cntl ) { varnum_t n; impl_t i; FUNCPTR_T f; // Return early if one of the matrix operands has a zero dimension. if ( bli_obj_has_zero_dim( x ) ) return; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_scalv_check( alpha, x ); // First check if we are to skip this operation. if ( bli_cntl_is_noop( cntl ) ) return; // Return early if the alpha scalar equals one. if ( bli_obj_equals( alpha, &BLIS_ONE ) ) return; // Extract the variant number and implementation type. n = bli_cntl_var_num( cntl ); i = bli_cntl_impl_type( cntl ); // Index into the variant array to extract the correct function pointer. f = vars[n][i]; // Invoke the variant. f( alpha, x, cntx ); } blis-0.6.1/frame/1/other/scalv/bli_scalv_int.h000066400000000000000000000034261360743507500211320ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_scalv_int( obj_t* alpha, obj_t* x, cntx_t* cntx, scalv_t* cntl ); blis-0.6.1/frame/1/other/unpackv/000077500000000000000000000000001360743507500165035ustar00rootroot00000000000000blis-0.6.1/frame/1/other/unpackv/bli_unpackv.c000066400000000000000000000032341360743507500211460ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" blis-0.6.1/frame/1/other/unpackv/bli_unpackv.h000066400000000000000000000034101360743507500211470ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "bli_unpackv_cntl.h" #include "bli_unpackv_check.h" #include "bli_unpackv_int.h" #include "bli_unpackv_unb_var1.h" blis-0.6.1/frame/1/other/unpackv/bli_unpackv_check.c000066400000000000000000000042271360743507500223060ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_unpackv_check ( obj_t* p, obj_t* a, cntx_t* cntx ) { err_t e_val; // Check object datatypes. e_val = bli_check_floating_object( p ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( a ); bli_check_error_code( e_val ); // Check object dimensions. e_val = bli_check_equal_vector_lengths( p, a ); bli_check_error_code( e_val ); // Check pack status. e_val = bli_check_packv_schema_on_unpack( p ); bli_check_error_code( e_val ); } blis-0.6.1/frame/1/other/unpackv/bli_unpackv_check.h000066400000000000000000000033471360743507500223150ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_unpackv_check ( obj_t* p, obj_t* a, cntx_t* cntx ); blis-0.6.1/frame/1/other/unpackv/bli_unpackv_cntl.c000066400000000000000000000046321360743507500221710ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" unpackv_t* unpackv_cntl = NULL; void bli_unpackv_cntl_init() { unpackv_cntl = bli_unpackv_cntl_obj_create( BLIS_UNBLOCKED, BLIS_VARIANT1 ); } void bli_unpackv_cntl_finalize() { bli_cntl_free_node( unpackv_cntl ); } unpackv_t* bli_unpackv_cntl_obj_create( impl_t impl_type, varnum_t var_num ) { unpackv_t* cntl; cntl = ( unpackv_t* ) bli_malloc_intl( sizeof(unpackv_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; return cntl; } void bli_unpackv_cntl_obj_init( unpackv_t* cntl, impl_t impl_type, varnum_t var_num ) { cntl->impl_type = impl_type; cntl->var_num = var_num; } blis-0.6.1/frame/1/other/unpackv/bli_unpackv_cntl.h000066400000000000000000000046371360743507500222030ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ struct unpackv_s { impl_t impl_type; varnum_t var_num; }; typedef struct unpackv_s unpackv_t; #define bli_cntl_sub_unpackv( cntl ) cntl->sub_unpackv #define bli_cntl_sub_unpackv_x( cntl ) cntl->sub_unpackv_x #define bli_cntl_sub_unpackv_x1( cntl ) cntl->sub_unpackv_x1 #define bli_cntl_sub_unpackv_y( cntl ) cntl->sub_unpackv_y #define bli_cntl_sub_unpackv_y1( cntl ) cntl->sub_unpackv_y1 void bli_unpackv_cntl_init( void ); void bli_unpackv_cntl_finalize( void ); unpackv_t* bli_unpackv_cntl_obj_create( impl_t impl_type, varnum_t var_num ); void bli_unpackv_cntl_obj_init( unpackv_t* cntl, impl_t impl_type, varnum_t var_num ); blis-0.6.1/frame/1/other/unpackv/bli_unpackv_int.c000066400000000000000000000174301360743507500220230ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T unpackv_fp typedef void (*FUNCPTR_T)( obj_t* p, obj_t* a, cntx_t* cntx, unpackv_t* cntl ); static FUNCPTR_T vars[1][3] = { // unblocked optimized unblocked blocked { bli_unpackv_unb_var1, NULL, NULL } }; void bli_unpackv_int( obj_t* p, obj_t* a, cntx_t* cntx, unpackv_t* cntl ) { // The unpackv operation consists of an optional casting post-process. // (This post-process is analogous to the cast pre-process in packv.) // Here are the following possible ways unpackv can execute: // 1. unpack and cast: Unpack to a temporary vector c and then cast // c to a. // 2. unpack only: Unpack directly to vector a since typecasting is // not needed. // 3. cast only: Not yet supported / not used. // 4. no-op: The control tree directs us to skip the unpack operation // entirely. No action is taken. obj_t c; varnum_t n; impl_t i; FUNCPTR_T f; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_unpackv_check( p, a, cntx ); // Sanity check; A should never have a zero dimension. If we must support // it, then we should fold it into the next alias-and-early-exit block. if ( bli_obj_has_zero_dim( a ) ) bli_abort(); // First check if we are to skip this operation because the control tree // is NULL, and if so, simply return. if ( bli_cntl_is_noop( cntl ) ) { return; } // If p was aliased to a during the pack stage (because it was already // in an acceptable packed/contiguous format), then no unpack is actually // necessary, so we return. if ( bli_obj_is_alias_of( p, a ) ) { return; } // Now, if we are not skipping the unpack operation, then the only // question left is whether we are to typecast vector a after unpacking. if ( bli_obj_dt( p ) != bli_obj_dt( a ) ) bli_abort(); /* if ( bli_obj_dt( p ) != bli_obj_dt( a ) ) { // Initialize an object c for the intermediate typecast vector. bli_unpackv_init_cast( p, a, &c ); } else */ { // If no cast is needed, then aliasing object c to the original // vector serves as a minor optimization. This causes the unpackv // implementation to unpack directly into vector a. bli_obj_alias_to( a, &c ); } // Now we are ready to proceed with the unpacking. // Extract the variant number and implementation type. n = bli_cntl_var_num( cntl ); i = bli_cntl_impl_type( cntl ); // Index into the variant array to extract the correct function pointer. f = vars[n][i]; // Invoke the variant. f( p, &c, cntx, cntl ); // Now, if necessary, we cast the contents of c to vector a. If casting // was not necessary, then we are done because the call to the unpackv // implementation would have unpacked directly to vector a. /* if ( bli_obj_dt( p ) != bli_obj_dt( a ) ) { // Copy/typecast vector c to vector a. // NOTE: Here, we use copynzv instead of copym because, in the cases // where we are unpacking/typecasting a real vector c to a complex // vector a, we want to touch only the real components of a, rather // than also set the imaginary components to zero. This comes about // because of the fact that, if we are unpacking real-to-complex, // then it is because all of the computation occurred in the real // domain, and so we would want to leave whatever imaginary values // there are in vector a untouched. Notice that for unpackings that // entail complex-to-complex data movements, the copynzv operation // behaves exactly as copym, so no use cases are lost (at least none // that I can think of). bli_copynzv( &c, a ); // NOTE: The above code/comment is outdated. What should happen is // as follows: // - If dt(a) is complex and dt(p) is real, then create an alias of // a and then tweak it so that it looks like a real domain object. // This will involve: // - projecting the datatype to real domain // - scaling both the row and column strides by 2 // ALL OF THIS should be done in the front-end, NOT here, as // unpackv() won't even be needed in that case. } */ } /* void bli_unpackv_init_cast( obj_t* p, obj_t* a, obj_t* c ) { // The idea here is that we want to create an object c that is identical // to object a, except that: // (1) the storage datatype of c is equal to the target datatype of a, // with the element size of c adjusted accordingly, // (2) object c is marked as being stored in a standard, contiguous // format (ie: a column vector), // (3) the view offset of c is reset to (0,0), and // (4) object c's main buffer is set to a new memory region acquired // from the memory manager, or extracted from p if a mem entry is // already available. (After acquring a mem entry from the memory // manager, it is cached within p for quick access later on.) num_t dt_targ_a = bli_obj_target_dt( a ); dim_t dim_a = bli_obj_vector_dim( a ); siz_t elem_size_c = bli_dt_size( dt_targ_a ); // We begin by copying the basic fields of a. bli_obj_alias_to( a, c ); // Update datatype and element size fields. bli_obj_set_dt( dt_targ_a, c ); bli_obj_set_elem_size( elem_size_c, c ); // Update the strides and dimensions. We set the increments to reflect a // column-stored vector. Note that the column stride is set to dim(a), // though it should never be used because there is no second column to // index into (and therefore it also does not need to be aligned). bli_obj_set_dims( dim_a, 1, c ); bli_obj_set_strides( 1, dim_a, c ); // Reset the view offsets to (0,0). bli_obj_set_offs( 0, 0, c ); // Check the mem_t entry of p associated with the cast buffer. If it is // NULL, then acquire memory sufficient to hold the object data and cache // it to p. (Otherwise, if it is non-NULL, then memory has already been // acquired from the memory manager and cached.) We then set the main // buffer of c to the cached address of the cast memory. bli_obj_set_buffer_with_cached_cast_mem( *p, *c ); } */ blis-0.6.1/frame/1/other/unpackv/bli_unpackv_int.h000066400000000000000000000036371360743507500220340ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_unpackv_int( obj_t* p, obj_t* a, cntx_t* cntx, unpackv_t* cntl ); /* void bli_unpackv_init_cast( obj_t* p, obj_t* a, obj_t* c ); */ blis-0.6.1/frame/1/other/unpackv/bli_unpackv_unb_var1.c000066400000000000000000000061451360743507500227470ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T unpackv_fp typedef void (*FUNCPTR_T)( dim_t m, void* p, inc_t incp, void* c, inc_t incc, cntx_t* cntx ); static FUNCPTR_T GENARRAY(ftypes,unpackv_unb_var1); void bli_unpackv_unb_var1( obj_t* p, obj_t* c, cntx_t* cntx, unpackv_t* cntl ) { num_t dt_pc = bli_obj_dt( p ); dim_t dim_c = bli_obj_vector_dim( c ); void* buf_p = bli_obj_buffer_at_off( p ); inc_t incp = bli_obj_vector_inc( p ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t incc = bli_obj_vector_inc( c ); FUNCPTR_T f; // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_pc]; // Invoke the function. f ( dim_c, buf_p, incp, buf_c, incc, cntx ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ dim_t m, \ void* p, inc_t incp, \ void* c, inc_t incc, \ cntx_t* cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ PASTECH(ch,copyv_ft) copyv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_COPYV_KER, cntx ); \ \ copyv_p \ ( \ BLIS_NO_CONJUGATE, \ m, \ p, incp, \ c, incc, \ cntx \ ); \ } INSERT_GENTFUNC_BASIC0( unpackv_unb_var1 ) blis-0.6.1/frame/1/other/unpackv/bli_unpackv_unb_var1.h000066400000000000000000000040661360743507500227540ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_unpackv_unb_var1( obj_t* p, obj_t* c, cntx_t* cntx, unpackv_t* cntl ); #undef GENTPROT #define GENTPROT( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ dim_t m, \ void* p, inc_t incp, \ void* c, inc_t incc, \ cntx_t* cntx \ ); INSERT_GENTPROT_BASIC( unpackv_unb_var1 ) blis-0.6.1/frame/1d/000077500000000000000000000000001360743507500140575ustar00rootroot00000000000000blis-0.6.1/frame/1d/bli_l1d.h000066400000000000000000000041501360743507500155360ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "bli_l1d_check.h" // Prototype object APIs (expert and non-expert). #include "bli_oapi_ex.h" #include "bli_l1d_oapi.h" #include "bli_oapi_ba.h" #include "bli_l1d_oapi.h" // Prototype typed APIs (expert and non-expert). #include "bli_tapi_ex.h" #include "bli_l1d_tapi.h" #include "bli_l1d_ft.h" #include "bli_tapi_ba.h" #include "bli_l1d_tapi.h" #include "bli_l1d_ft.h" // Generate function pointer arrays for tapi functions (expert only). #include "bli_l1d_fpa.h" blis-0.6.1/frame/1d/bli_l1d_check.c000066400000000000000000000134251360743507500166730ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define object-based check functions. // #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* x, \ obj_t* y \ ) \ { \ bli_l1d_xy_check( x, y ); \ } GENFRONT( addd ) GENFRONT( copyd ) GENFRONT( subd ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* alpha, \ obj_t* x, \ obj_t* y \ ) \ { \ bli_l1d_axy_check( alpha, x, y ); \ } GENFRONT( axpyd ) GENFRONT( scal2d ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* x \ ) \ { \ bli_l1d_x_check( x ); \ } GENFRONT( invertd ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* alpha, \ obj_t* x \ ) \ { \ bli_l1d_ax_check( alpha, x ); \ } GENFRONT( scald ) GENFRONT( setd ) GENFRONT( setid ) GENFRONT( shiftd ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* x, \ obj_t* beta, \ obj_t* y \ ) \ { \ bli_l1d_axy_check( beta, x, y ); \ } GENFRONT( xpbyd ) // ----------------------------------------------------------------------------- void bli_l1d_xy_check ( obj_t* x, obj_t* y ) { err_t e_val; // Check object datatypes. e_val = bli_check_floating_object( x ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( y ); bli_check_error_code( e_val ); // Check for consistent datatypes. e_val = bli_check_consistent_object_datatypes( x, y ); bli_check_error_code( e_val ); // Check object dimensions. e_val = bli_check_matrix_object( x ); bli_check_error_code( e_val ); e_val = bli_check_matrix_object( y ); bli_check_error_code( e_val ); e_val = bli_check_conformal_dims( x, y ); bli_check_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( x ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( y ); bli_check_error_code( e_val ); } void bli_l1d_axy_check ( obj_t* alpha, obj_t* x, obj_t* y ) { err_t e_val; // Check object datatypes. e_val = bli_check_noninteger_object( alpha ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( x ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( y ); bli_check_error_code( e_val ); // Check for consistent datatypes. e_val = bli_check_consistent_object_datatypes( x, y ); bli_check_error_code( e_val ); // Check object dimensions. e_val = bli_check_scalar_object( alpha ); bli_check_error_code( e_val ); e_val = bli_check_matrix_object( x ); bli_check_error_code( e_val ); e_val = bli_check_matrix_object( y ); bli_check_error_code( e_val ); e_val = bli_check_conformal_dims( x, y ); bli_check_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( alpha ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( x ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( y ); bli_check_error_code( e_val ); } void bli_l1d_x_check ( obj_t* x ) { err_t e_val; // Check object datatypes. e_val = bli_check_floating_object( x ); bli_check_error_code( e_val ); // Check object dimensions. e_val = bli_check_matrix_object( x ); bli_check_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( x ); bli_check_error_code( e_val ); } void bli_l1d_ax_check ( obj_t* alpha, obj_t* x ) { err_t e_val; // Check object datatypes. e_val = bli_check_noninteger_object( alpha ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( x ); bli_check_error_code( e_val ); // Check object dimensions. e_val = bli_check_scalar_object( alpha ); bli_check_error_code( e_val ); e_val = bli_check_matrix_object( x ); bli_check_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( alpha ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( x ); bli_check_error_code( e_val ); } blis-0.6.1/frame/1d/bli_l1d_check.h000066400000000000000000000057261360743507500167050ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype object-based check functions. // #undef GENTPROT #define GENTPROT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* x, \ obj_t* y \ ); GENTPROT( addd ) GENTPROT( copyd ) GENTPROT( subd ) #undef GENTPROT #define GENTPROT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* alpha, \ obj_t* x, \ obj_t* y \ ); GENTPROT( axpyd ) GENTPROT( scal2d ) #undef GENTPROT #define GENTPROT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* x \ ); GENTPROT( invertd ) #undef GENTPROT #define GENTPROT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* alpha, \ obj_t* x \ ); GENTPROT( scald ) GENTPROT( setd ) GENTPROT( setid ) GENTPROT( shiftd ) #undef GENTPROT #define GENTPROT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* x, \ obj_t* beta, \ obj_t* y \ ); GENTPROT( xpbyd ) // ----------------------------------------------------------------------------- void bli_l1d_xy_check ( obj_t* x, obj_t* y ); void bli_l1d_axy_check ( obj_t* alpha, obj_t* x, obj_t* y ); void bli_l1d_x_check ( obj_t* x ); void bli_l1d_ax_check ( obj_t* alpha, obj_t* x ); blis-0.6.1/frame/1d/bli_l1d_fpa.c000066400000000000000000000043241360743507500163620ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define function pointer query interfaces. // #undef GENFRONT #define GENFRONT( opname ) \ \ GENARRAY_FPA( PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft), \ PASTECH(opname,BLIS_TAPI_EX_SUF) ); \ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( num_t dt ) \ { \ return PASTECH2(opname,BLIS_TAPI_EX_SUF,_fpa)[ dt ]; \ } GENFRONT( addd ) GENFRONT( copyd ) GENFRONT( subd ) GENFRONT( axpyd ) GENFRONT( scal2d ) GENFRONT( invertd ) GENFRONT( scald ) GENFRONT( setd ) GENFRONT( setid ) GENFRONT( shiftd ) GENFRONT( xpbyd ) blis-0.6.1/frame/1d/bli_l1d_fpa.h000066400000000000000000000040111360743507500163600ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype function pointer query interface. // #undef GENPROT #define GENPROT( opname ) \ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( num_t dt ); GENPROT( addd ) GENPROT( copyd ) GENPROT( subd ) GENPROT( axpyd ) GENPROT( scal2d ) GENPROT( invertd ) GENPROT( scald ) GENPROT( setd ) GENPROT( setid ) GENPROT( shiftd ) GENPROT( xpbyd ) blis-0.6.1/frame/1d/bli_l1d_ft.h000066400000000000000000000106051360743507500162310ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // -- Level-1d function types -------------------------------------------------- // // addd, copyd, subd #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \ ( \ doff_t diagoffx, \ diag_t diagx, \ trans_t transx, \ dim_t m, \ dim_t n, \ ctype* x, inc_t rs_x, inc_t cs_x, \ ctype* y, inc_t rs_y, inc_t cs_y \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTDEF( addd ) INSERT_GENTDEF( copyd ) INSERT_GENTDEF( subd ) // axpyd, scal2d #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \ ( \ doff_t diagoffx, \ diag_t diagx, \ trans_t transx, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* x, inc_t rs_x, inc_t cs_x, \ ctype* y, inc_t rs_y, inc_t cs_y \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTDEF( axpyd ) INSERT_GENTDEF( scal2d ) // invertd #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \ ( \ doff_t diagoffx, \ dim_t m, \ dim_t n, \ ctype* x, inc_t rs_x, inc_t cs_x \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTDEF( invertd ) // scald, setd #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \ ( \ conj_t conjalpha, \ doff_t diagoffx, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* x, inc_t rs_x, inc_t cs_x \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTDEF( scald ) INSERT_GENTDEF( setd ) // setid #undef GENTDEFR #define GENTDEFR( ctype, ctype_r, ch, chr, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \ ( \ doff_t diagoffx, \ dim_t m, \ dim_t n, \ ctype_r* alpha, \ ctype* x, inc_t rs_x, inc_t cs_x \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTDEFR( setid ) // shiftd #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \ ( \ doff_t diagoffx, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* x, inc_t rs_x, inc_t cs_x \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTDEF( shiftd ) // xpbyd #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \ ( \ doff_t diagoffx, \ diag_t diagx, \ trans_t transx, \ dim_t m, \ dim_t n, \ ctype* x, inc_t rs_x, inc_t cs_x, \ ctype* beta, \ ctype* y, inc_t rs_y, inc_t cs_y \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTDEF( xpbyd ) blis-0.6.1/frame/1d/bli_l1d_oapi.c000066400000000000000000000263431360743507500165510ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // Guard the function definitions so that they are only compiled when // #included from files that define the object API macros. #ifdef BLIS_ENABLE_OAPI // // Define object-based interfaces. // #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* x, \ obj_t* y \ BLIS_OAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_OAPI_EX_DECLS \ \ num_t dt = bli_obj_dt( x ); \ \ doff_t diagoffx = bli_obj_diag_offset( x ); \ diag_t diagx = bli_obj_diag( x ); \ trans_t transx = bli_obj_conjtrans_status( x ); \ dim_t m = bli_obj_length( y ); \ dim_t n = bli_obj_width( y ); \ void* buf_x = bli_obj_buffer_at_off( x ); \ inc_t rs_x = bli_obj_row_stride( x ); \ inc_t cs_x = bli_obj_col_stride( x ); \ void* buf_y = bli_obj_buffer_at_off( y ); \ inc_t rs_y = bli_obj_row_stride( y ); \ inc_t cs_y = bli_obj_col_stride( y ); \ \ if ( bli_error_checking_is_enabled() ) \ PASTEMAC(opname,_check)( x, y ); \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) f = \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \ \ f \ ( \ diagoffx, \ diagx, \ transx, \ m, \ n, \ buf_x, rs_x, cs_x, \ buf_y, rs_y, cs_y, \ cntx, \ rntm \ ); \ } GENFRONT( addd ) GENFRONT( copyd ) GENFRONT( subd ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* alpha, \ obj_t* x, \ obj_t* y \ BLIS_OAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_OAPI_EX_DECLS \ \ num_t dt = bli_obj_dt( x ); \ \ doff_t diagoffx = bli_obj_diag_offset( x ); \ diag_t diagx = bli_obj_diag( x ); \ trans_t transx = bli_obj_conjtrans_status( x ); \ dim_t m = bli_obj_length( y ); \ dim_t n = bli_obj_width( y ); \ void* buf_x = bli_obj_buffer_at_off( x ); \ inc_t rs_x = bli_obj_row_stride( x ); \ inc_t cs_x = bli_obj_col_stride( x ); \ void* buf_y = bli_obj_buffer_at_off( y ); \ inc_t rs_y = bli_obj_row_stride( y ); \ inc_t cs_y = bli_obj_col_stride( y ); \ \ void* buf_alpha; \ \ obj_t alpha_local; \ \ if ( bli_error_checking_is_enabled() ) \ PASTEMAC(opname,_check)( alpha, x, y ); \ \ /* Create local copy-casts of scalars (and apply internal conjugation as needed). */ \ bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ alpha, &alpha_local ); \ buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) f = \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \ \ f \ ( \ diagoffx, \ diagx, \ transx, \ m, \ n, \ buf_alpha, \ buf_x, rs_x, cs_x, \ buf_y, rs_y, cs_y, \ cntx, \ rntm \ ); \ } GENFRONT( axpyd ) GENFRONT( scal2d ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* x \ BLIS_OAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_OAPI_EX_DECLS \ \ num_t dt = bli_obj_dt( x ); \ \ doff_t diagoffx = bli_obj_diag_offset( x ); \ dim_t m = bli_obj_length( x ); \ dim_t n = bli_obj_width( x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \ inc_t rs_x = bli_obj_row_stride( x ); \ inc_t cs_x = bli_obj_col_stride( x ); \ \ if ( bli_error_checking_is_enabled() ) \ PASTEMAC(opname,_check)( x ); \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) f = \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \ \ f \ ( \ diagoffx, \ m, \ n, \ buf_x, rs_x, cs_x, \ cntx, \ rntm \ ); \ } GENFRONT( invertd ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* alpha, \ obj_t* x \ BLIS_OAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_OAPI_EX_DECLS \ \ num_t dt = bli_obj_dt( x ); \ \ /* conj_t conjalpha = bli_obj_conj_status( alpha ); */ \ doff_t diagoffx = bli_obj_diag_offset( x ); \ dim_t m = bli_obj_length( x ); \ dim_t n = bli_obj_width( x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \ inc_t rs_x = bli_obj_row_stride( x ); \ inc_t cs_x = bli_obj_col_stride( x ); \ \ void* buf_alpha; \ \ obj_t alpha_local; \ \ if ( bli_error_checking_is_enabled() ) \ PASTEMAC(opname,_check)( alpha, x ); \ \ /* Create local copy-casts of scalars (and apply internal conjugation as needed). */ \ bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ alpha, &alpha_local ); \ buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) f = \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \ \ f \ ( \ BLIS_NO_CONJUGATE, /* internal conjugation applied during copy-cast. */ \ diagoffx, \ m, \ n, \ buf_alpha, \ buf_x, rs_x, cs_x, \ cntx, \ rntm \ ); \ } GENFRONT( scald ) GENFRONT( setd ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* alpha, \ obj_t* x \ BLIS_OAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_OAPI_EX_DECLS \ \ num_t dt = bli_obj_dt( x ); \ \ doff_t diagoffx = bli_obj_diag_offset( x ); \ dim_t m = bli_obj_length( x ); \ dim_t n = bli_obj_width( x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \ inc_t rs_x = bli_obj_row_stride( x ); \ inc_t cs_x = bli_obj_col_stride( x ); \ \ void* buf_alpha = bli_obj_buffer_for_1x1( dt, alpha ); \ \ if ( bli_error_checking_is_enabled() ) \ PASTEMAC(opname,_check)( alpha, x ); \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) f = \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \ \ f \ ( \ diagoffx, \ m, \ n, \ buf_alpha, \ buf_x, rs_x, cs_x, \ cntx, \ rntm \ ); \ } GENFRONT( setid ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* alpha, \ obj_t* x \ BLIS_OAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_OAPI_EX_DECLS \ \ num_t dt = bli_obj_dt( x ); \ \ doff_t diagoffx = bli_obj_diag_offset( x ); \ dim_t m = bli_obj_length( x ); \ dim_t n = bli_obj_width( x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \ inc_t rs_x = bli_obj_row_stride( x ); \ inc_t cs_x = bli_obj_col_stride( x ); \ \ void* buf_alpha; \ \ obj_t alpha_local; \ \ if ( bli_error_checking_is_enabled() ) \ PASTEMAC(opname,_check)( alpha, x ); \ \ /* Create local copy-casts of scalars (and apply internal conjugation as needed). */ \ bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ alpha, &alpha_local ); \ buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) f = \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \ \ f \ ( \ diagoffx, \ m, \ n, \ buf_alpha, \ buf_x, rs_x, cs_x, \ cntx, \ rntm \ ); \ } GENFRONT( shiftd ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* x, \ obj_t* beta, \ obj_t* y \ BLIS_OAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_OAPI_EX_DECLS \ \ num_t dt = bli_obj_dt( x ); \ \ doff_t diagoffx = bli_obj_diag_offset( x ); \ diag_t diagx = bli_obj_diag( x ); \ trans_t transx = bli_obj_conjtrans_status( x ); \ dim_t m = bli_obj_length( y ); \ dim_t n = bli_obj_width( y ); \ void* buf_x = bli_obj_buffer_at_off( x ); \ inc_t rs_x = bli_obj_row_stride( x ); \ inc_t cs_x = bli_obj_col_stride( x ); \ void* buf_y = bli_obj_buffer_at_off( y ); \ inc_t rs_y = bli_obj_row_stride( y ); \ inc_t cs_y = bli_obj_col_stride( y ); \ \ void* buf_beta; \ \ obj_t beta_local; \ \ if ( bli_error_checking_is_enabled() ) \ PASTEMAC(opname,_check)( x, beta, y ); \ \ /* Create local copy-casts of scalars (and apply internal conjugation as needed). */ \ bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ beta, &beta_local ); \ buf_beta = bli_obj_buffer_for_1x1( dt, &beta_local ); \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) f = \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \ \ f \ ( \ diagoffx, \ diagx, \ transx, \ m, \ n, \ buf_x, rs_x, cs_x, \ buf_beta, \ buf_y, rs_y, cs_y, \ cntx, \ rntm \ ); \ } GENFRONT( xpbyd ) #endif blis-0.6.1/frame/1d/bli_l1d_oapi.h000066400000000000000000000055021360743507500165500ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype object-based interfaces. // #undef GENTPROT #define GENTPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* x, \ obj_t* y \ BLIS_OAPI_EX_PARAMS \ ); GENTPROT( addd ) GENTPROT( copyd ) GENTPROT( subd ) #undef GENTPROT #define GENTPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* alpha, \ obj_t* x, \ obj_t* y \ BLIS_OAPI_EX_PARAMS \ ); GENTPROT( axpyd ) GENTPROT( scal2d ) #undef GENTPROT #define GENTPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* x \ BLIS_OAPI_EX_PARAMS \ ); GENTPROT( invertd ) #undef GENTPROT #define GENTPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* alpha, \ obj_t* x \ BLIS_OAPI_EX_PARAMS \ ); GENTPROT( scald ) GENTPROT( setd ) GENTPROT( setid ) GENTPROT( shiftd ) #undef GENTPROT #define GENTPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* x, \ obj_t* beta, \ obj_t* y \ BLIS_OAPI_EX_PARAMS \ ); GENTPROT( xpbyd ) blis-0.6.1/frame/1d/bli_l1d_oapi_ba.c000066400000000000000000000036701360743507500172110ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // Include cpp macros that instantiate the API definition templates as // omitting expert parameters. #include "bli_oapi_ba.h" // Define the macro protecting the object API definitions. #define BLIS_ENABLE_OAPI // Include the object API definitions here. #include "bli_l1d_oapi.c" blis-0.6.1/frame/1d/bli_l1d_oapi_ex.c000066400000000000000000000036661360743507500172500ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // Include cpp macros that instantiate the API definition templates as // having expert parameters. #include "bli_oapi_ex.h" // Define the macro protecting the object API definitions. #define BLIS_ENABLE_OAPI // Include the object API definitions here. #include "bli_l1d_oapi.c" blis-0.6.1/frame/1d/bli_l1d_tapi.c000066400000000000000000000321051360743507500165470ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // Guard the function definitions so that they are only compiled when // #included from files that define the typed API macros. #ifdef BLIS_ENABLE_TAPI // // Define BLAS-like interfaces with typed operands. // #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, kername, kerid ) \ \ void PASTEMAC2(ch,opname,EX_SUF) \ ( \ doff_t diagoffx, \ diag_t diagx, \ trans_t transx, \ dim_t m, \ dim_t n, \ ctype* x, inc_t rs_x, inc_t cs_x, \ ctype* y, inc_t rs_y, inc_t cs_y \ BLIS_TAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_TAPI_EX_DECLS \ \ const num_t dt = PASTEMAC(ch,type); \ \ ctype* x1; \ ctype* y1; \ conj_t conjx; \ dim_t n_elem; \ dim_t offx, offy; \ inc_t incx, incy; \ \ if ( bli_zero_dim2( m, n ) ) return; \ \ if ( bli_is_outside_diag( diagoffx, transx, m, n ) ) return; \ \ /* Determine the distance to the diagonals, the number of diagonal elements, and the diagonal increments. */ \ bli_set_dims_incs_2d \ ( \ diagoffx, transx, \ m, n, rs_x, cs_x, rs_y, cs_y, \ &offx, &offy, &n_elem, &incx, &incy \ ); \ \ conjx = bli_extract_conj( transx ); \ \ if ( bli_is_nonunit_diag( diagx ) ) \ { \ x1 = x + offx; \ y1 = y + offy; \ } \ else /* if ( bli_is_unit_diag( diagx ) ) */ \ { \ /* Simulate a unit diagonal for x with a zero increment over a unit scalar. */ \ x1 = PASTEMAC(ch,1); \ incx = 0; \ y1 = y + offy; \ } \ \ /* Obtain a valid context from the gks if necessary. */ \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ \ /* Query the context for the operation's kernel address. */ \ PASTECH2(ch,kername,_ker_ft) f = bli_cntx_get_l1v_ker_dt( dt, kerid, cntx ); \ \ /* Invoke the kernel with the appropriate parameters. */ \ f( \ conjx, \ n_elem, \ x1, incx, \ y1, incy, \ cntx \ ); \ } INSERT_GENTFUNC_BASIC2( addd, addv, BLIS_ADDV_KER ) INSERT_GENTFUNC_BASIC2( copyd, copyv, BLIS_COPYV_KER ) INSERT_GENTFUNC_BASIC2( subd, subv, BLIS_SUBV_KER ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, kername, kerid ) \ \ void PASTEMAC2(ch,opname,EX_SUF) \ ( \ doff_t diagoffx, \ diag_t diagx, \ trans_t transx, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* x, inc_t rs_x, inc_t cs_x, \ ctype* y, inc_t rs_y, inc_t cs_y \ BLIS_TAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_TAPI_EX_DECLS \ \ const num_t dt = PASTEMAC(ch,type); \ \ ctype* x1; \ ctype* y1; \ conj_t conjx; \ dim_t n_elem; \ dim_t offx, offy; \ inc_t incx, incy; \ \ if ( bli_zero_dim2( m, n ) ) return; \ \ if ( bli_is_outside_diag( diagoffx, transx, m, n ) ) return; \ \ /* Determine the distance to the diagonals, the number of diagonal elements, and the diagonal increments. */ \ bli_set_dims_incs_2d \ ( \ diagoffx, transx, \ m, n, rs_x, cs_x, rs_y, cs_y, \ &offx, &offy, &n_elem, &incx, &incy \ ); \ \ conjx = bli_extract_conj( transx ); \ \ if ( bli_is_nonunit_diag( diagx ) ) \ { \ x1 = x + offx; \ y1 = y + offy; \ } \ else /* if ( bli_is_unit_diag( diagx ) ) */ \ { \ /* Simulate a unit diagonal for x with a zero increment over a unit scalar. */ \ x1 = PASTEMAC(ch,1); \ incx = 0; \ y1 = y + offy; \ } \ \ /* Obtain a valid context from the gks if necessary. */ \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ \ /* Query the context for the operation's kernel address. */ \ PASTECH2(ch,kername,_ker_ft) f = bli_cntx_get_l1v_ker_dt( dt, kerid, cntx ); \ \ /* Invoke the kernel with the appropriate parameters. */ \ f( \ conjx, \ n_elem, \ alpha, \ x1, incx, \ y1, incy, \ cntx \ ); \ } INSERT_GENTFUNC_BASIC2( axpyd, axpyv, BLIS_AXPYV_KER ) INSERT_GENTFUNC_BASIC2( scal2d, scal2v, BLIS_SCAL2V_KER ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, kername, kerid ) \ \ void PASTEMAC2(ch,opname,EX_SUF) \ ( \ doff_t diagoffx, \ dim_t m, \ dim_t n, \ ctype* x, inc_t rs_x, inc_t cs_x \ BLIS_TAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_TAPI_EX_DECLS \ \ const num_t dt = PASTEMAC(ch,type); \ \ ctype* x1; \ dim_t n_elem; \ dim_t offx; \ inc_t incx; \ \ if ( bli_zero_dim2( m, n ) ) return; \ \ if ( bli_is_outside_diag( diagoffx, BLIS_NO_TRANSPOSE, m, n ) ) return; \ \ /* Determine the distance to the diagonals, the number of diagonal elements, and the diagonal increments. */ \ bli_set_dims_incs_1d \ ( \ diagoffx, \ m, n, rs_x, cs_x, \ &offx, &n_elem, &incx \ ); \ \ x1 = x + offx; \ \ /* Obtain a valid context from the gks if necessary. */ \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ \ /* Query the context for the operation's kernel address. */ \ PASTECH2(ch,kername,_ker_ft) f = bli_cntx_get_l1v_ker_dt( dt, kerid, cntx ); \ \ /* Invoke the kernel with the appropriate parameters. */ \ f( \ n_elem, \ x1, incx, \ cntx \ ); \ } INSERT_GENTFUNC_BASIC2( invertd, invertv, BLIS_INVERTV_KER ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, kername, kerid ) \ \ void PASTEMAC2(ch,opname,EX_SUF) \ ( \ conj_t conjalpha, \ doff_t diagoffx, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* x, inc_t rs_x, inc_t cs_x \ BLIS_TAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_TAPI_EX_DECLS \ \ const num_t dt = PASTEMAC(ch,type); \ \ ctype* x1; \ dim_t n_elem; \ dim_t offx; \ inc_t incx; \ \ if ( bli_zero_dim2( m, n ) ) return; \ \ if ( bli_is_outside_diag( diagoffx, BLIS_NO_TRANSPOSE, m, n ) ) return; \ \ /* Determine the distance to the diagonals, the number of diagonal elements, and the diagonal increments. */ \ bli_set_dims_incs_1d \ ( \ diagoffx, \ m, n, rs_x, cs_x, \ &offx, &n_elem, &incx \ ); \ \ x1 = x + offx; \ \ /* Obtain a valid context from the gks if necessary. */ \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ \ /* Query the context for the operation's kernel address. */ \ PASTECH2(ch,kername,_ker_ft) f = bli_cntx_get_l1v_ker_dt( dt, kerid, cntx ); \ \ /* Invoke the kernel with the appropriate parameters. */ \ f( \ conjalpha, \ n_elem, \ alpha, \ x1, incx, \ cntx \ ); \ } INSERT_GENTFUNC_BASIC2( scald, scalv, BLIS_SCALV_KER ) INSERT_GENTFUNC_BASIC2( setd, setv, BLIS_SETV_KER ) #undef GENTFUNCR #define GENTFUNCR( ctype, ctype_r, ch, chr, opname, kername, kerid ) \ \ void PASTEMAC2(ch,opname,EX_SUF) \ ( \ doff_t diagoffx, \ dim_t m, \ dim_t n, \ ctype_r* alpha, \ ctype* x, inc_t rs_x, inc_t cs_x \ BLIS_TAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_TAPI_EX_DECLS \ \ const num_t dt = PASTEMAC(ch,type); \ const num_t dt_r = PASTEMAC(chr,type); \ \ ctype_r* x1; \ dim_t n_elem; \ dim_t offx; \ inc_t incx; \ \ /* If the datatype is real, the entire operation is a no-op. */ \ if ( bli_is_real( dt ) ) return; \ \ if ( bli_zero_dim2( m, n ) ) return; \ \ if ( bli_is_outside_diag( diagoffx, BLIS_NO_TRANSPOSE, m, n ) ) return; \ \ /* Determine the distance to the diagonals, the number of diagonal elements, and the diagonal increments. */ \ bli_set_dims_incs_1d \ ( \ diagoffx, \ m, n, rs_x, cs_x, \ &offx, &n_elem, &incx \ ); \ \ /* Alternate implementation. (Substitute for remainder of function). */ \ /* for ( i = 0; i < n_elem; ++i ) \ { \ ctype* chi11 = x1 + (i )*incx; \ \ PASTEMAC(ch,setis)( *alpha, *chi11 ); \ } */ \ \ /* Acquire the addres of the imaginary component of the first element, and scale the increment for use in the real domain. Note that the indexing into the imaginary field only needs to work for complex datatypes since we return early for real domain types. */ \ x1 = ( ctype_r* )( x + offx ) + 1; \ incx = 2*incx; \ \ /* Obtain a valid context from the gks if necessary. */ \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ \ /* Query the context for the operation's kernel address. */ \ PASTECH2(chr,kername,_ker_ft) f = bli_cntx_get_l1v_ker_dt( dt_r, kerid, cntx ); \ \ /* Invoke the kernel with the appropriate parameters. */ \ f( \ BLIS_NO_CONJUGATE, \ n_elem, \ alpha, \ x1, incx, \ cntx \ ); \ } INSERT_GENTFUNCR_BASIC2( setid, setv, BLIS_SETV_KER ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, kername, kerid ) \ \ void PASTEMAC2(ch,opname,EX_SUF) \ ( \ doff_t diagoffx, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* x, inc_t rs_x, inc_t cs_x \ BLIS_TAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_TAPI_EX_DECLS \ \ const num_t dt = PASTEMAC(ch,type); \ \ ctype* x1; \ dim_t n_elem; \ dim_t offx; \ inc_t incx; \ \ if ( bli_zero_dim2( m, n ) ) return; \ \ if ( bli_is_outside_diag( diagoffx, BLIS_NO_TRANSPOSE, m, n ) ) return; \ \ /* Determine the distance to the diagonals, the number of diagonal elements, and the diagonal increments. */ \ bli_set_dims_incs_1d \ ( \ diagoffx, \ m, n, rs_x, cs_x, \ &offx, &n_elem, &incx \ ); \ \ x1 = x + offx; \ \ /* Obtain a valid context from the gks if necessary. */ \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ \ /* Query the context for the operation's kernel address. */ \ PASTECH2(ch,kername,_ker_ft) f = bli_cntx_get_l1v_ker_dt( dt, kerid, cntx ); \ \ /* Invoke the kernel with the appropriate parameters. */ \ f( \ BLIS_NO_CONJUGATE, \ n_elem, \ alpha, 0, \ x1, incx, \ cntx \ ); \ } INSERT_GENTFUNC_BASIC2( shiftd, addv, BLIS_ADDV_KER ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, kername, kerid ) \ \ void PASTEMAC2(ch,opname,EX_SUF) \ ( \ doff_t diagoffx, \ diag_t diagx, \ trans_t transx, \ dim_t m, \ dim_t n, \ ctype* x, inc_t rs_x, inc_t cs_x, \ ctype* beta, \ ctype* y, inc_t rs_y, inc_t cs_y \ BLIS_TAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_TAPI_EX_DECLS \ \ const num_t dt = PASTEMAC(ch,type); \ \ ctype* x1; \ ctype* y1; \ conj_t conjx; \ dim_t n_elem; \ dim_t offx, offy; \ inc_t incx, incy; \ \ if ( bli_zero_dim2( m, n ) ) return; \ \ if ( bli_is_outside_diag( diagoffx, transx, m, n ) ) return; \ \ /* Determine the distance to the diagonals, the number of diagonal elements, and the diagonal increments. */ \ bli_set_dims_incs_2d \ ( \ diagoffx, transx, \ m, n, rs_x, cs_x, rs_y, cs_y, \ &offx, &offy, &n_elem, &incx, &incy \ ); \ \ conjx = bli_extract_conj( transx ); \ \ if ( bli_is_nonunit_diag( diagx ) ) \ { \ x1 = x + offx; \ y1 = y + offy; \ } \ else /* if ( bli_is_unit_diag( diagx ) ) */ \ { \ /* Simulate a unit diagonal for x with a zero increment over a unit scalar. */ \ x1 = PASTEMAC(ch,1); \ incx = 0; \ y1 = y + offy; \ } \ \ /* Obtain a valid context from the gks if necessary. */ \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ \ /* Query the context for the operation's kernel address. */ \ PASTECH2(ch,kername,_ker_ft) f = bli_cntx_get_l1v_ker_dt( dt, kerid, cntx ); \ \ /* Invoke the kernel with the appropriate parameters. */ \ f( \ conjx, \ n_elem, \ x1, incx, \ beta, \ y1, incy, \ cntx \ ); \ } INSERT_GENTFUNC_BASIC2( xpbyd, xpbyv, BLIS_XPBYV_KER ) #endif blis-0.6.1/frame/1d/bli_l1d_tapi.h000066400000000000000000000105301360743507500165520ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-like interfaces with typed operands. // #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \ ( \ doff_t diagoffx, \ diag_t diagx, \ trans_t transx, \ dim_t m, \ dim_t n, \ ctype* x, inc_t rs_x, inc_t cs_x, \ ctype* y, inc_t rs_y, inc_t cs_y \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTPROT_BASIC0( addd ) INSERT_GENTPROT_BASIC0( copyd ) INSERT_GENTPROT_BASIC0( subd ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \ ( \ doff_t diagoffx, \ diag_t diagx, \ trans_t transx, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* x, inc_t rs_x, inc_t cs_x, \ ctype* y, inc_t rs_y, inc_t cs_y \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTPROT_BASIC0( axpyd ) INSERT_GENTPROT_BASIC0( scal2d ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \ ( \ doff_t diagoffx, \ dim_t m, \ dim_t n, \ ctype* x, inc_t rs_x, inc_t cs_x \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTPROT_BASIC0( invertd ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \ ( \ conj_t conjalpha, \ doff_t diagoffx, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* x, inc_t rs_x, inc_t cs_x \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTPROT_BASIC0( scald ) INSERT_GENTPROT_BASIC0( setd ) #undef GENTPROTR #define GENTPROTR( ctype, ctype_r, ch, chr, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \ ( \ doff_t diagoffx, \ dim_t m, \ dim_t n, \ ctype_r* alpha, \ ctype* x, inc_t rs_x, inc_t cs_x \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTPROTR_BASIC0( setid ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \ ( \ doff_t diagoffx, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* x, inc_t rs_x, inc_t cs_x \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTPROT_BASIC0( shiftd ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \ ( \ doff_t diagoffx, \ diag_t diagx, \ trans_t transx, \ dim_t m, \ dim_t n, \ ctype* x, inc_t rs_x, inc_t cs_x, \ ctype* beta, \ ctype* y, inc_t rs_y, inc_t cs_y \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTPROT_BASIC0( xpbyd ) blis-0.6.1/frame/1d/bli_l1d_tapi_ba.c000066400000000000000000000036661360743507500172230ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // Include cpp macros that instantiate the API definition templates as // omitting expert parameters. #include "bli_tapi_ba.h" // Define the macro protecting the typed API definitions. #define BLIS_ENABLE_TAPI // Include the typed API definitions here. #include "bli_l1d_tapi.c" blis-0.6.1/frame/1d/bli_l1d_tapi_ex.c000066400000000000000000000036641360743507500172530ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // Include cpp macros that instantiate the API definition templates as // having expert parameters. #include "bli_tapi_ex.h" // Define the macro protecting the typed API definitions. #define BLIS_ENABLE_TAPI // Include the typed API definitions here. #include "bli_l1d_tapi.c" blis-0.6.1/frame/1f/000077500000000000000000000000001360743507500140615ustar00rootroot00000000000000blis-0.6.1/frame/1f/bli_l1f.h000066400000000000000000000042461360743507500155500ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "bli_l1f_check.h" // Define kernel function types. #include "bli_l1f_ft_ker.h" // Prototype object APIs (expert and non-expert). #include "bli_oapi_ex.h" #include "bli_l1f_oapi.h" #include "bli_oapi_ba.h" #include "bli_l1f_oapi.h" // Prototype typed APIs (expert and non-expert). #include "bli_tapi_ex.h" #include "bli_l1f_tapi.h" #include "bli_l1f_ft.h" #include "bli_tapi_ba.h" #include "bli_l1f_tapi.h" #include "bli_l1f_ft.h" // Generate function pointer arrays for tapi functions (expert only). #include "bli_l1f_fpa.h" blis-0.6.1/frame/1f/bli_l1f_check.c000066400000000000000000000303741360743507500167010ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define object-based check functions. // void bli_axpy2v_check ( obj_t* alphax, obj_t* alphay, obj_t* x, obj_t* y, obj_t* z ) { err_t e_val; // Check object datatypes. e_val = bli_check_noninteger_object( alphax ); bli_check_error_code( e_val ); e_val = bli_check_noninteger_object( alphay ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( x ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( y ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( z ); bli_check_error_code( e_val ); // Check for consistent datatypes. e_val = bli_check_consistent_object_datatypes( x, y ); bli_check_error_code( e_val ); e_val = bli_check_consistent_object_datatypes( x, z ); bli_check_error_code( e_val ); // Check object dimensions. e_val = bli_check_scalar_object( alphax ); bli_check_error_code( e_val ); e_val = bli_check_scalar_object( alphay ); bli_check_error_code( e_val ); e_val = bli_check_vector_object( x ); bli_check_error_code( e_val ); e_val = bli_check_vector_object( y ); bli_check_error_code( e_val ); e_val = bli_check_vector_object( z ); bli_check_error_code( e_val ); e_val = bli_check_equal_vector_lengths( x, y ); bli_check_error_code( e_val ); e_val = bli_check_equal_vector_lengths( x, z ); bli_check_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( alphax ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( alphay ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( x ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( y ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( z ); bli_check_error_code( e_val ); } void bli_axpyf_check ( obj_t* alpha, obj_t* a, obj_t* x, obj_t* y ) { err_t e_val; // Check object datatypes. e_val = bli_check_noninteger_object( alpha ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( a ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( x ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( y ); bli_check_error_code( e_val ); // Check for consistent datatypes. e_val = bli_check_consistent_object_datatypes( a, x ); bli_check_error_code( e_val ); e_val = bli_check_consistent_object_datatypes( a, y ); bli_check_error_code( e_val ); // Check object dimensions. e_val = bli_check_scalar_object( alpha ); bli_check_error_code( e_val ); e_val = bli_check_matrix_object( a ); bli_check_error_code( e_val ); e_val = bli_check_vector_object( x ); bli_check_error_code( e_val ); e_val = bli_check_vector_object( y ); bli_check_error_code( e_val ); e_val = bli_check_vector_dim_equals( x, bli_obj_width_after_trans( a ) ); bli_check_error_code( e_val ); e_val = bli_check_vector_dim_equals( y, bli_obj_length_after_trans( a ) ); bli_check_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( alpha ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( a ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( x ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( y ); bli_check_error_code( e_val ); } void bli_dotaxpyv_check ( obj_t* alpha, obj_t* xt, obj_t* x, obj_t* y, obj_t* rho, obj_t* z ) { err_t e_val; // Check object datatypes. e_val = bli_check_noninteger_object( alpha ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( xt ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( x ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( y ); bli_check_error_code( e_val ); e_val = bli_check_noninteger_object( rho ); bli_check_error_code( e_val ); e_val = bli_check_nonconstant_object( rho ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( z ); bli_check_error_code( e_val ); // Check for consistent datatypes. e_val = bli_check_consistent_object_datatypes( x, xt ); bli_check_error_code( e_val ); e_val = bli_check_consistent_object_datatypes( x, y ); bli_check_error_code( e_val ); e_val = bli_check_consistent_object_datatypes( x, z ); bli_check_error_code( e_val ); // Check object dimensions. e_val = bli_check_scalar_object( alpha ); bli_check_error_code( e_val ); e_val = bli_check_vector_object( xt ); bli_check_error_code( e_val ); e_val = bli_check_vector_object( x ); bli_check_error_code( e_val ); e_val = bli_check_vector_object( y ); bli_check_error_code( e_val ); e_val = bli_check_scalar_object( rho ); bli_check_error_code( e_val ); e_val = bli_check_vector_object( z ); bli_check_error_code( e_val ); e_val = bli_check_equal_vector_lengths( x, xt ); bli_check_error_code( e_val ); e_val = bli_check_equal_vector_lengths( x, y ); bli_check_error_code( e_val ); e_val = bli_check_equal_vector_lengths( x, z ); bli_check_error_code( e_val ); // Check object aliases. e_val = bli_check_object_alias_of( xt, x ); bli_check_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( alpha ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( xt ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( x ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( y ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( rho ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( z ); bli_check_error_code( e_val ); } void bli_dotxaxpyf_check ( obj_t* alpha, obj_t* at, obj_t* a, obj_t* w, obj_t* x, obj_t* beta, obj_t* y, obj_t* z ) { err_t e_val; // Check object datatypes. e_val = bli_check_noninteger_object( alpha ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( at ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( a ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( w ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( x ); bli_check_error_code( e_val ); e_val = bli_check_noninteger_object( beta ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( y ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( z ); bli_check_error_code( e_val ); // Check for consistent datatypes. e_val = bli_check_consistent_object_datatypes( a, at ); bli_check_error_code( e_val ); e_val = bli_check_consistent_object_datatypes( a, w ); bli_check_error_code( e_val ); e_val = bli_check_consistent_object_datatypes( a, x ); bli_check_error_code( e_val ); e_val = bli_check_consistent_object_datatypes( a, y ); bli_check_error_code( e_val ); e_val = bli_check_consistent_object_datatypes( a, z ); bli_check_error_code( e_val ); // Check object dimensions. e_val = bli_check_scalar_object( alpha ); bli_check_error_code( e_val ); e_val = bli_check_matrix_object( at ); bli_check_error_code( e_val ); e_val = bli_check_matrix_object( a ); bli_check_error_code( e_val ); e_val = bli_check_vector_object( w ); bli_check_error_code( e_val ); e_val = bli_check_vector_object( x ); bli_check_error_code( e_val ); e_val = bli_check_scalar_object( beta ); bli_check_error_code( e_val ); e_val = bli_check_vector_object( y ); bli_check_error_code( e_val ); e_val = bli_check_vector_object( z ); bli_check_error_code( e_val ); e_val = bli_check_equal_vector_lengths( w, z ); bli_check_error_code( e_val ); e_val = bli_check_equal_vector_lengths( x, y ); bli_check_error_code( e_val ); e_val = bli_check_conformal_dims( at, a ); bli_check_error_code( e_val ); e_val = bli_check_object_length_equals( at, bli_obj_vector_dim( w ) ); bli_check_error_code( e_val ); e_val = bli_check_object_width_equals( at, bli_obj_vector_dim( y ) ); bli_check_error_code( e_val ); e_val = bli_check_object_length_equals( a, bli_obj_vector_dim( z ) ); bli_check_error_code( e_val ); e_val = bli_check_object_width_equals( a, bli_obj_vector_dim( x ) ); bli_check_error_code( e_val ); // Check object aliases. e_val = bli_check_object_alias_of( at, a ); bli_check_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( alpha ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( at ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( a ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( w ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( x ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( beta ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( y ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( z ); bli_check_error_code( e_val ); } void bli_dotxf_check ( obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y ) { err_t e_val; // Check object datatypes. e_val = bli_check_noninteger_object( alpha ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( a ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( x ); bli_check_error_code( e_val ); e_val = bli_check_noninteger_object( beta ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( y ); bli_check_error_code( e_val ); // Check for consistent datatypes. e_val = bli_check_consistent_object_datatypes( a, x ); bli_check_error_code( e_val ); e_val = bli_check_consistent_object_datatypes( a, y ); bli_check_error_code( e_val ); // Check object dimensions. e_val = bli_check_scalar_object( alpha ); bli_check_error_code( e_val ); e_val = bli_check_matrix_object( a ); bli_check_error_code( e_val ); e_val = bli_check_vector_object( x ); bli_check_error_code( e_val ); e_val = bli_check_scalar_object( beta ); bli_check_error_code( e_val ); e_val = bli_check_vector_object( y ); bli_check_error_code( e_val ); e_val = bli_check_vector_dim_equals( x, bli_obj_length_after_trans( a ) ); bli_check_error_code( e_val ); e_val = bli_check_vector_dim_equals( y, bli_obj_width_after_trans( a ) ); bli_check_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( alpha ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( a ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( x ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( beta ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( y ); bli_check_error_code( e_val ); } blis-0.6.1/frame/1f/bli_l1f_check.h000066400000000000000000000055331360743507500167050ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype object-based check functions. // #undef GENTPROT #define GENTPROT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* alphax, \ obj_t* alphay, \ obj_t* x, \ obj_t* y, \ obj_t* z \ ); GENTPROT( axpy2v ) #undef GENTPROT #define GENTPROT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* alpha, \ obj_t* a, \ obj_t* x, \ obj_t* y \ ); GENTPROT( axpyf ) #undef GENTPROT #define GENTPROT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* alpha, \ obj_t* xt, \ obj_t* x, \ obj_t* y, \ obj_t* rho, \ obj_t* z \ ); GENTPROT( dotaxpyv ) #undef GENTPROT #define GENTPROT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* alpha, \ obj_t* at, \ obj_t* a, \ obj_t* w, \ obj_t* x, \ obj_t* beta, \ obj_t* y, \ obj_t* z \ ); GENTPROT( dotxaxpyf ) #undef GENTPROT #define GENTPROT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* alpha, \ obj_t* a, \ obj_t* x, \ obj_t* beta, \ obj_t* y \ ); GENTPROT( dotxf ) blis-0.6.1/frame/1f/bli_l1f_fpa.c000066400000000000000000000041571360743507500163720ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define function pointer query interfaces. // #undef GENFRONT #define GENFRONT( opname ) \ \ GENARRAY_FPA( PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft), \ PASTECH(opname,BLIS_TAPI_EX_SUF) ); \ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( num_t dt ) \ { \ return PASTECH2(opname,BLIS_TAPI_EX_SUF,_fpa)[ dt ]; \ } GENFRONT( axpy2v ) GENFRONT( axpyf ) GENFRONT( dotaxpyv ) GENFRONT( dotxaxpyf ) GENFRONT( dotxf ) blis-0.6.1/frame/1f/bli_l1f_fpa.h000066400000000000000000000036521360743507500163760ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype function pointer query interface. // #undef GENPROT #define GENPROT( opname ) \ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( num_t dt ); GENPROT( axpy2v ) GENPROT( axpyf ) GENPROT( dotaxpyv ) GENPROT( dotxaxpyf ) GENPROT( dotxf ) blis-0.6.1/frame/1f/bli_l1f_ft.h000066400000000000000000000076771360743507500162540ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // -- Level-1f function types -------------------------------------------------- // // axpy2v #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \ ( \ conj_t conjx, \ conj_t conjy, \ dim_t n, \ ctype* alpha1, \ ctype* alpha2, \ ctype* x, inc_t incx, \ ctype* y, inc_t incy, \ ctype* z, inc_t incz \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTDEF( axpy2v ) // axpyf #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \ ( \ conj_t conja, \ conj_t conjx, \ dim_t m, \ dim_t b_n, \ ctype* alpha, \ ctype* a, inc_t inca, inc_t lda, \ ctype* x, inc_t incx, \ ctype* y, inc_t incy \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTDEF( axpyf ) // dotaxpyv #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \ ( \ conj_t conjxt, \ conj_t conjx, \ conj_t conjy, \ dim_t m, \ ctype* alpha, \ ctype* x, inc_t incx, \ ctype* y, inc_t incy, \ ctype* rho, \ ctype* z, inc_t incz \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTDEF( dotaxpyv ) // dotxf #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \ ( \ conj_t conjat, \ conj_t conjx, \ dim_t m, \ dim_t b_n, \ ctype* alpha, \ ctype* a, inc_t inca, inc_t lda, \ ctype* x, inc_t incx, \ ctype* beta, \ ctype* y, inc_t incy \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTDEF( dotxf ) // dotxaxpyf #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \ ( \ conj_t conjat, \ conj_t conja, \ conj_t conjw, \ conj_t conjx, \ dim_t m, \ dim_t b_n, \ ctype* alpha, \ ctype* a, inc_t inca, inc_t lda, \ ctype* w, inc_t incw, \ ctype* x, inc_t incx, \ ctype* beta, \ ctype* y, inc_t incy, \ ctype* z, inc_t incz \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTDEF( dotxaxpyf ) blis-0.6.1/frame/1f/bli_l1f_ft_ker.h000066400000000000000000000105601360743507500170760ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_L1F_FT_KER_H #define BLIS_L1F_FT_KER_H // // -- Level-1f kernel function types ------------------------------------------- // // axpy2v #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,_ker,tsuf)) \ ( \ conj_t conjx, \ conj_t conjy, \ dim_t n, \ ctype* restrict alpha1, \ ctype* restrict alpha2, \ ctype* restrict x, inc_t incx, \ ctype* restrict y, inc_t incy, \ ctype* restrict z, inc_t incz, \ cntx_t* cntx \ ); INSERT_GENTDEF( axpy2v ) // axpyf #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,_ker,tsuf)) \ ( \ conj_t conja, \ conj_t conjx, \ dim_t m, \ dim_t b_n, \ ctype* restrict alpha, \ ctype* restrict a, inc_t inca, inc_t lda, \ ctype* restrict x, inc_t incx, \ ctype* restrict y, inc_t incy, \ cntx_t* cntx \ ); INSERT_GENTDEF( axpyf ) // dotaxpyv #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,_ker,tsuf)) \ ( \ conj_t conjxt, \ conj_t conjx, \ conj_t conjy, \ dim_t m, \ ctype* restrict alpha, \ ctype* restrict x, inc_t incx, \ ctype* restrict y, inc_t incy, \ ctype* restrict rho, \ ctype* restrict z, inc_t incz, \ cntx_t* cntx \ ); INSERT_GENTDEF( dotaxpyv ) // dotxf #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,_ker,tsuf)) \ ( \ conj_t conjat, \ conj_t conjx, \ dim_t m, \ dim_t b_n, \ ctype* restrict alpha, \ ctype* restrict a, inc_t inca, inc_t lda, \ ctype* restrict x, inc_t incx, \ ctype* restrict beta, \ ctype* restrict y, inc_t incy, \ cntx_t* cntx \ ); INSERT_GENTDEF( dotxf ) // dotxaxpyf #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,_ker,tsuf)) \ ( \ conj_t conjat, \ conj_t conja, \ conj_t conjw, \ conj_t conjx, \ dim_t m, \ dim_t b_n, \ ctype* restrict alpha, \ ctype* restrict a, inc_t inca, inc_t lda, \ ctype* restrict w, inc_t incw, \ ctype* restrict x, inc_t incx, \ ctype* restrict beta, \ ctype* restrict y, inc_t incy, \ ctype* restrict z, inc_t incz, \ cntx_t* cntx \ ); INSERT_GENTDEF( dotxaxpyf ) #endif blis-0.6.1/frame/1f/bli_l1f_ker.h000066400000000000000000000046271360743507500164140ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Define template prototypes for level-1f kernels. // // Note: Instead of defining function prototype macro templates and then // instantiating those macros to define the individual function prototypes, // we simply alias the official operations' prototypes as defined in // bli_l1f_ker_prot.h. #undef GENTPROT #define GENTPROT AXPY2V_KER_PROT INSERT_GENTPROT_BASIC0( axpy2v_ker_name ) #undef GENTPROT #define GENTPROT AXPYF_KER_PROT INSERT_GENTPROT_BASIC0( axpyf_ker_name ) #undef GENTPROT #define GENTPROT DOTAXPYV_KER_PROT INSERT_GENTPROT_BASIC0( dotaxpyv_ker_name ) #undef GENTPROT #define GENTPROT DOTXAXPYF_KER_PROT INSERT_GENTPROT_BASIC0( dotxaxpyf_ker_name ) #undef GENTPROT #define GENTPROT DOTXF_KER_PROT INSERT_GENTPROT_BASIC0( dotxf_ker_name ) blis-0.6.1/frame/1f/bli_l1f_ker_prot.h000066400000000000000000000077411360743507500174600ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Define template prototypes for level-1f kernels. // #define AXPY2V_KER_PROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ conj_t conjx, \ conj_t conjy, \ dim_t n, \ ctype* restrict alphax, \ ctype* restrict alphay, \ ctype* restrict x, inc_t incx, \ ctype* restrict y, inc_t incy, \ ctype* restrict z, inc_t incz, \ cntx_t* restrict cntx \ ); #define AXPYF_KER_PROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ conj_t conja, \ conj_t conjx, \ dim_t m, \ dim_t b_n, \ ctype* restrict alpha, \ ctype* restrict a, inc_t inca, inc_t lda, \ ctype* restrict x, inc_t incx, \ ctype* restrict y, inc_t incy, \ cntx_t* restrict cntx \ ); #define DOTAXPYV_KER_PROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ conj_t conjxt, \ conj_t conjx, \ conj_t conjy, \ dim_t n, \ ctype* restrict alpha, \ ctype* restrict x, inc_t incx, \ ctype* restrict y, inc_t incy, \ ctype* restrict rho, \ ctype* restrict z, inc_t incz, \ cntx_t* restrict cntx \ ); #define DOTXAXPYF_KER_PROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ conj_t conjat, \ conj_t conja, \ conj_t conjw, \ conj_t conjx, \ dim_t m, \ dim_t b_n, \ ctype* restrict alpha, \ ctype* restrict a, inc_t inca, inc_t lda, \ ctype* restrict w, inc_t incw, \ ctype* restrict x, inc_t incx, \ ctype* restrict beta, \ ctype* restrict y, inc_t incy, \ ctype* restrict z, inc_t incz, \ cntx_t* restrict cntx \ ); #define DOTXF_KER_PROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ conj_t conjat, \ conj_t conjx, \ dim_t m, \ dim_t b_n, \ ctype* restrict alpha, \ ctype* restrict a, inc_t inca, inc_t lda, \ ctype* restrict x, inc_t incx, \ ctype* restrict beta, \ ctype* restrict y, inc_t incy, \ cntx_t* restrict cntx \ ); blis-0.6.1/frame/1f/bli_l1f_oapi.c000066400000000000000000000275511360743507500165570ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // Guard the function definitions so that they are only compiled when // #included from files that define the object API macros. #ifdef BLIS_ENABLE_OAPI // // Define object-based interfaces. // #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* alphax, \ obj_t* alphay, \ obj_t* x, \ obj_t* y, \ obj_t* z \ BLIS_OAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_OAPI_EX_DECLS \ \ num_t dt = bli_obj_dt( x ); \ \ conj_t conjx = bli_obj_conj_status( x ); \ conj_t conjy = bli_obj_conj_status( y ); \ dim_t n = bli_obj_vector_dim( x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \ inc_t inc_x = bli_obj_vector_inc( x ); \ void* buf_y = bli_obj_buffer_at_off( y ); \ inc_t inc_y = bli_obj_vector_inc( y ); \ void* buf_z = bli_obj_buffer_at_off( z ); \ inc_t inc_z = bli_obj_vector_inc( z ); \ \ void* buf_alphax; \ void* buf_alphay; \ \ obj_t alphax_local; \ obj_t alphay_local; \ \ if ( bli_error_checking_is_enabled() ) \ PASTEMAC(opname,_check)( alphax, alphay, x, y, z ); \ \ /* Create local copy-casts of scalars (and apply internal conjugation as needed). */ \ bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ alphax, &alphax_local ); \ bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ alphay, &alphay_local ); \ buf_alphax = bli_obj_buffer_for_1x1( dt, &alphax_local ); \ buf_alphay = bli_obj_buffer_for_1x1( dt, &alphay_local ); \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) f = \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \ \ f \ ( \ conjx, \ conjy, \ n, \ buf_alphax, \ buf_alphay, \ buf_x, inc_x, \ buf_y, inc_y, \ buf_z, inc_z, \ cntx, \ rntm \ ); \ } GENFRONT( axpy2v ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* alpha, \ obj_t* a, \ obj_t* x, \ obj_t* y \ BLIS_OAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_OAPI_EX_DECLS \ \ num_t dt = bli_obj_dt( x ); \ \ conj_t conja = bli_obj_conj_status( a ); \ conj_t conjx = bli_obj_conj_status( x ); \ dim_t m = bli_obj_vector_dim( y ); \ dim_t b_n = bli_obj_vector_dim( x ); \ void* buf_a = bli_obj_buffer_at_off( a ); \ inc_t rs_a = bli_obj_row_stride( a ); \ inc_t cs_a = bli_obj_col_stride( a ); \ void* buf_x = bli_obj_buffer_at_off( x ); \ inc_t inc_x = bli_obj_vector_inc( x ); \ void* buf_y = bli_obj_buffer_at_off( y ); \ inc_t inc_y = bli_obj_vector_inc( y ); \ \ void* buf_alpha; \ \ obj_t alpha_local; \ \ if ( bli_error_checking_is_enabled() ) \ PASTEMAC(opname,_check)( alpha, a, x, y ); \ \ /* Create local copy-casts of scalars (and apply internal conjugation as needed). */ \ bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ alpha, &alpha_local ); \ buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \ \ /* Support cases where matrix A requires a transposition. */ \ if ( bli_obj_has_trans( a ) ) { bli_swap_incs( &rs_a, &cs_a ); } \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) f = \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \ \ f \ ( \ conja, \ conjx, \ m, \ b_n, \ buf_alpha, \ buf_a, rs_a, cs_a, \ buf_x, inc_x, \ buf_y, inc_y, \ cntx, \ rntm \ ); \ } GENFRONT( axpyf ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* alpha, \ obj_t* xt, \ obj_t* x, \ obj_t* y, \ obj_t* rho, \ obj_t* z \ BLIS_OAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_OAPI_EX_DECLS \ \ num_t dt = bli_obj_dt( x ); \ \ conj_t conjxt = bli_obj_conj_status( xt ); \ conj_t conjx = bli_obj_conj_status( x ); \ conj_t conjy = bli_obj_conj_status( y ); \ dim_t n = bli_obj_vector_dim( x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \ inc_t inc_x = bli_obj_vector_inc( x ); \ void* buf_y = bli_obj_buffer_at_off( y ); \ inc_t inc_y = bli_obj_vector_inc( y ); \ void* buf_z = bli_obj_buffer_at_off( z ); \ inc_t inc_z = bli_obj_vector_inc( z ); \ void* buf_rho = bli_obj_buffer_at_off( rho ); \ \ void* buf_alpha; \ \ obj_t alpha_local; \ \ if ( bli_error_checking_is_enabled() ) \ PASTEMAC(opname,_check)( alpha, xt, x, y, rho, z ); \ \ /* Create local copy-casts of scalars (and apply internal conjugation as needed). */ \ bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ alpha, &alpha_local ); \ buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) f = \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \ \ f \ ( \ conjxt, \ conjx, \ conjy, \ n, \ buf_alpha, \ buf_x, inc_x, \ buf_y, inc_y, \ buf_rho, \ buf_z, inc_z, \ cntx, \ rntm \ ); \ } GENFRONT( dotaxpyv ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* alpha, \ obj_t* at, \ obj_t* a, \ obj_t* w, \ obj_t* x, \ obj_t* beta, \ obj_t* y, \ obj_t* z \ BLIS_OAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_OAPI_EX_DECLS \ \ num_t dt = bli_obj_dt( x ); \ \ conj_t conjat = bli_obj_conj_status( at ); \ conj_t conja = bli_obj_conj_status( a ); \ conj_t conjw = bli_obj_conj_status( w ); \ conj_t conjx = bli_obj_conj_status( x ); \ dim_t m = bli_obj_vector_dim( z ); \ dim_t b_n = bli_obj_vector_dim( y ); \ void* buf_a = bli_obj_buffer_at_off( a ); \ inc_t rs_a = bli_obj_row_stride( a ); \ inc_t cs_a = bli_obj_col_stride( a ); \ void* buf_w = bli_obj_buffer_at_off( w ); \ inc_t inc_w = bli_obj_vector_inc( w ); \ void* buf_x = bli_obj_buffer_at_off( x ); \ inc_t inc_x = bli_obj_vector_inc( x ); \ void* buf_y = bli_obj_buffer_at_off( y ); \ inc_t inc_y = bli_obj_vector_inc( y ); \ void* buf_z = bli_obj_buffer_at_off( z ); \ inc_t inc_z = bli_obj_vector_inc( z ); \ \ void* buf_alpha; \ void* buf_beta; \ \ obj_t alpha_local; \ obj_t beta_local; \ \ if ( bli_error_checking_is_enabled() ) \ PASTEMAC(opname,_check)( alpha, at, a, w, x, beta, y, z ); \ \ /* Create local copy-casts of scalars (and apply internal conjugation as needed). */ \ bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ alpha, &alpha_local ); \ bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ beta, &beta_local ); \ buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \ buf_beta = bli_obj_buffer_for_1x1( dt, &beta_local ); \ \ /* Support cases where matrix A requires a transposition. */ \ if ( bli_obj_has_trans( a ) ) { bli_swap_incs( &rs_a, &cs_a ); } \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) f = \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \ \ f \ ( \ conjat, \ conja, \ conjw, \ conjx, \ m, \ b_n, \ buf_alpha, \ buf_a, rs_a, cs_a, \ buf_w, inc_w, \ buf_x, inc_x, \ buf_beta, \ buf_y, inc_y, \ buf_z, inc_z, \ cntx, \ rntm \ ); \ } GENFRONT( dotxaxpyf ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* alpha, \ obj_t* a, \ obj_t* x, \ obj_t* beta, \ obj_t* y \ BLIS_OAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_OAPI_EX_DECLS \ \ num_t dt = bli_obj_dt( x ); \ \ conj_t conjat = bli_obj_conj_status( a ); \ conj_t conjx = bli_obj_conj_status( x ); \ dim_t m = bli_obj_vector_dim( x ); \ dim_t b_n = bli_obj_vector_dim( y ); \ void* buf_a = bli_obj_buffer_at_off( a ); \ inc_t rs_a = bli_obj_row_stride( a ); \ inc_t cs_a = bli_obj_col_stride( a ); \ void* buf_x = bli_obj_buffer_at_off( x ); \ inc_t inc_x = bli_obj_vector_inc( x ); \ void* buf_y = bli_obj_buffer_at_off( y ); \ inc_t inc_y = bli_obj_vector_inc( y ); \ \ void* buf_alpha; \ void* buf_beta; \ \ obj_t alpha_local; \ obj_t beta_local; \ \ if ( bli_error_checking_is_enabled() ) \ PASTEMAC(opname,_check)( alpha, a, x, beta, y ); \ \ /* Create local copy-casts of scalars (and apply internal conjugation as needed). */ \ bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ alpha, &alpha_local ); \ bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ beta, &beta_local ); \ buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \ buf_beta = bli_obj_buffer_for_1x1( dt, &beta_local ); \ \ /* Support cases where matrix A requires a transposition. */ \ if ( bli_obj_has_trans( a ) ) { bli_swap_incs( &rs_a, &cs_a ); } \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) f = \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \ \ f \ ( \ conjat, \ conjx, \ m, \ b_n, \ buf_alpha, \ buf_a, rs_a, cs_a, \ buf_x, inc_x, \ buf_beta, \ buf_y, inc_y, \ cntx, \ rntm \ ); \ } GENFRONT( dotxf ) #endif blis-0.6.1/frame/1f/bli_l1f_oapi.h000066400000000000000000000061061360743507500165550ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype object-based interfaces. // #undef GENTPROT #define GENTPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* alphax, \ obj_t* alphay, \ obj_t* x, \ obj_t* y, \ obj_t* z \ BLIS_OAPI_EX_PARAMS \ ); GENTPROT( axpy2v ) #undef GENTPROT #define GENTPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* alpha, \ obj_t* a, \ obj_t* x, \ obj_t* y \ BLIS_OAPI_EX_PARAMS \ ); GENTPROT( axpyf ) #undef GENTPROT #define GENTPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* alpha, \ obj_t* xt, \ obj_t* x, \ obj_t* y, \ obj_t* rho, \ obj_t* z \ BLIS_OAPI_EX_PARAMS \ ); GENTPROT( dotaxpyv ) #undef GENTPROT #define GENTPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* alpha, \ obj_t* at, \ obj_t* a, \ obj_t* w, \ obj_t* x, \ obj_t* beta, \ obj_t* y, \ obj_t* z \ BLIS_OAPI_EX_PARAMS \ ); GENTPROT( dotxaxpyf ) #undef GENTPROT #define GENTPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* alpha, \ obj_t* a, \ obj_t* x, \ obj_t* beta, \ obj_t* y \ BLIS_OAPI_EX_PARAMS \ ); GENTPROT( dotxf ) blis-0.6.1/frame/1f/bli_l1f_oapi_ba.c000066400000000000000000000036701360743507500172150ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // Include cpp macros that instantiate the API definition templates as // omitting expert parameters. #include "bli_oapi_ba.h" // Define the macro protecting the object API definitions. #define BLIS_ENABLE_OAPI // Include the object API definitions here. #include "bli_l1f_oapi.c" blis-0.6.1/frame/1f/bli_l1f_oapi_ex.c000066400000000000000000000036661360743507500172540ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // Include cpp macros that instantiate the API definition templates as // having expert parameters. #include "bli_oapi_ex.h" // Define the macro protecting the object API definitions. #define BLIS_ENABLE_OAPI // Include the object API definitions here. #include "bli_l1f_oapi.c" blis-0.6.1/frame/1f/bli_l1f_tapi.c000066400000000000000000000143621360743507500165600ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // Guard the function definitions so that they are only compiled when // #included from files that define the typed API macros. #ifdef BLIS_ENABLE_TAPI // // Define BLAS-like interfaces with typed operands. // #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, kerid ) \ \ void PASTEMAC2(ch,opname,EX_SUF) \ ( \ conj_t conjx, \ conj_t conjy, \ dim_t n, \ ctype* alphax, \ ctype* alphay, \ ctype* x, inc_t incx, \ ctype* y, inc_t incy, \ ctype* z, inc_t incz \ BLIS_TAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_TAPI_EX_DECLS \ \ const num_t dt = PASTEMAC(ch,type); \ \ /* Obtain a valid context from the gks if necessary. */ \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ \ PASTECH2(ch,opname,_ker_ft) f = bli_cntx_get_l1f_ker_dt( dt, kerid, cntx ); \ \ f \ ( \ conjx, \ conjy, \ n, \ alphax, \ alphay, \ x, incx, \ y, incy, \ z, incz, \ cntx \ ); \ } INSERT_GENTFUNC_BASIC( axpy2v, BLIS_AXPY2V_KER ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, kerid ) \ \ void PASTEMAC2(ch,opname,EX_SUF) \ ( \ conj_t conja, \ conj_t conjx, \ dim_t m, \ dim_t b_n, \ ctype* alpha, \ ctype* a, inc_t inca, inc_t lda, \ ctype* x, inc_t incx, \ ctype* y, inc_t incy \ BLIS_TAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_TAPI_EX_DECLS \ \ const num_t dt = PASTEMAC(ch,type); \ \ /* Obtain a valid context from the gks if necessary. */ \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ \ PASTECH2(ch,opname,_ker_ft) f = bli_cntx_get_l1f_ker_dt( dt, kerid, cntx ); \ \ f \ ( \ conja, \ conjx, \ m, \ b_n, \ alpha, \ a, inca, lda, \ x, incx, \ y, incy, \ cntx \ ); \ } INSERT_GENTFUNC_BASIC( axpyf, BLIS_AXPYF_KER ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, kerid ) \ \ void PASTEMAC2(ch,opname,EX_SUF) \ ( \ conj_t conjxt, \ conj_t conjx, \ conj_t conjy, \ dim_t n, \ ctype* alpha, \ ctype* x, inc_t incx, \ ctype* y, inc_t incy, \ ctype* rho, \ ctype* z, inc_t incz \ BLIS_TAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_TAPI_EX_DECLS \ \ const num_t dt = PASTEMAC(ch,type); \ \ /* Obtain a valid context from the gks if necessary. */ \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ \ PASTECH2(ch,opname,_ker_ft) f = bli_cntx_get_l1f_ker_dt( dt, kerid, cntx ); \ \ f \ ( \ conjxt, \ conjx, \ conjy, \ n, \ alpha, \ x, incx, \ y, incy, \ rho, \ z, incz, \ cntx \ ); \ } INSERT_GENTFUNC_BASIC( dotaxpyv, BLIS_DOTAXPYV_KER ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, kerid ) \ \ void PASTEMAC2(ch,opname,EX_SUF) \ ( \ conj_t conjat, \ conj_t conja, \ conj_t conjw, \ conj_t conjx, \ dim_t m, \ dim_t b_n, \ ctype* alpha, \ ctype* a, inc_t inca, inc_t lda, \ ctype* w, inc_t incw, \ ctype* x, inc_t incx, \ ctype* beta, \ ctype* y, inc_t incy, \ ctype* z, inc_t incz \ BLIS_TAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_TAPI_EX_DECLS \ \ const num_t dt = PASTEMAC(ch,type); \ \ /* Obtain a valid context from the gks if necessary. */ \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ \ PASTECH2(ch,opname,_ker_ft) f = bli_cntx_get_l1f_ker_dt( dt, kerid, cntx ); \ \ f \ ( \ conjat, \ conja, \ conjw, \ conjx, \ m, \ b_n, \ alpha, \ a, inca, lda, \ w, incw, \ x, incx, \ beta, \ y, incy, \ z, incz, \ cntx \ ); \ } INSERT_GENTFUNC_BASIC( dotxaxpyf, BLIS_DOTXAXPYF_KER ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, kerid ) \ \ void PASTEMAC2(ch,opname,EX_SUF) \ ( \ conj_t conjat, \ conj_t conjx, \ dim_t m, \ dim_t b_n, \ ctype* alpha, \ ctype* a, inc_t inca, inc_t lda, \ ctype* x, inc_t incx, \ ctype* beta, \ ctype* y, inc_t incy \ BLIS_TAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_TAPI_EX_DECLS \ \ const num_t dt = PASTEMAC(ch,type); \ \ /* Obtain a valid context from the gks if necessary. */ \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ \ PASTECH2(ch,opname,_ker_ft) f = bli_cntx_get_l1f_ker_dt( dt, kerid, cntx ); \ \ f \ ( \ conjat, \ conjx, \ m, \ b_n, \ alpha, \ a, inca, lda, \ x, incx, \ beta, \ y, incy, \ cntx \ ); \ } INSERT_GENTFUNC_BASIC( dotxf, BLIS_DOTXF_KER ) #endif blis-0.6.1/frame/1f/bli_l1f_tapi.h000066400000000000000000000076141360743507500165670ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-like interfaces with typed operands. // #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \ ( \ conj_t conjx, \ conj_t conjy, \ dim_t n, \ ctype* alphax, \ ctype* alphay, \ ctype* x, inc_t incx, \ ctype* y, inc_t incy, \ ctype* z, inc_t incz \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTPROT_BASIC0( axpy2v ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \ ( \ conj_t conja, \ conj_t conjx, \ dim_t m, \ dim_t b_n, \ ctype* alpha, \ ctype* a, inc_t inca, inc_t lda, \ ctype* x, inc_t incx, \ ctype* y, inc_t incy \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTPROT_BASIC0( axpyf ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \ ( \ conj_t conjxt, \ conj_t conjx, \ conj_t conjy, \ dim_t n, \ ctype* alpha, \ ctype* x, inc_t incx, \ ctype* y, inc_t incy, \ ctype* rho, \ ctype* z, inc_t incz \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTPROT_BASIC0( dotaxpyv ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \ ( \ conj_t conjat, \ conj_t conja, \ conj_t conjw, \ conj_t conjx, \ dim_t m, \ dim_t b_n, \ ctype* alpha, \ ctype* a, inc_t inca, inc_t lda, \ ctype* w, inc_t incw, \ ctype* x, inc_t incx, \ ctype* beta, \ ctype* y, inc_t incy, \ ctype* z, inc_t incz \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTPROT_BASIC0( dotxaxpyf ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \ ( \ conj_t conjat, \ conj_t conjx, \ dim_t m, \ dim_t b_n, \ ctype* alpha, \ ctype* a, inc_t inca, inc_t lda, \ ctype* x, inc_t incx, \ ctype* beta, \ ctype* y, inc_t incy \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTPROT_BASIC0( dotxf ) blis-0.6.1/frame/1f/bli_l1f_tapi_ba.c000066400000000000000000000036661360743507500172270ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // Include cpp macros that instantiate the API definition templates as // omitting expert parameters. #include "bli_tapi_ba.h" // Define the macro protecting the typed API definitions. #define BLIS_ENABLE_TAPI // Include the typed API definitions here. #include "bli_l1f_tapi.c" blis-0.6.1/frame/1f/bli_l1f_tapi_ex.c000066400000000000000000000036641360743507500172570ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // Include cpp macros that instantiate the API definition templates as // having expert parameters. #include "bli_tapi_ex.h" // Define the macro protecting the typed API definitions. #define BLIS_ENABLE_TAPI // Include the typed API definitions here. #include "bli_l1f_tapi.c" blis-0.6.1/frame/1m/000077500000000000000000000000001360743507500140705ustar00rootroot00000000000000blis-0.6.1/frame/1m/bli_l1m.h000066400000000000000000000045711360743507500155670ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "bli_l1m_check.h" // Define kernel function types. #include "bli_l1m_ft_ker.h" // Define object function types for variants. #include "bli_l1m_oft_var.h" // Prototype object APIs (expert and non-expert). #include "bli_oapi_ex.h" #include "bli_l1m_oapi.h" #include "bli_oapi_ba.h" #include "bli_l1m_oapi.h" // Prototype typed APIs (expert and non-expert). #include "bli_tapi_ex.h" #include "bli_l1m_tapi.h" #include "bli_l1m_ft.h" #include "bli_tapi_ba.h" #include "bli_l1m_tapi.h" #include "bli_l1m_ft.h" // Generate function pointer arrays for tapi functions (expert only). #include "bli_l1m_fpa.h" // Prototype level-1m implementations. #include "bli_l1m_unb_var1.h" // Pack-related #include "bli_packm.h" #include "bli_unpackm.h" blis-0.6.1/frame/1m/bli_l1m_check.c000066400000000000000000000124431360743507500167140ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define object-based check functions. // #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* x, \ obj_t* y \ ) \ { \ bli_l1m_xy_check( x, y ); \ } GENFRONT( addm ) GENFRONT( copym ) GENFRONT( subm ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* alpha, \ obj_t* x, \ obj_t* y \ ) \ { \ bli_l1m_axy_check( alpha, x, y ); \ } GENFRONT( axpym ) GENFRONT( scal2m ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* alpha, \ obj_t* x \ ) \ { \ bli_l1m_ax_check( alpha, x ); \ } GENFRONT( scalm ) GENFRONT( setm ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* x, \ obj_t* beta, \ obj_t* y \ ) \ { \ bli_l1m_axy_check( beta, x, y ); \ } GENFRONT( xpbym ) // ----------------------------------------------------------------------------- void bli_l1m_xy_check ( obj_t* x, obj_t* y ) { err_t e_val; // Check object datatypes. e_val = bli_check_floating_object( x ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( y ); bli_check_error_code( e_val ); // Check for consistent datatypes. e_val = bli_check_consistent_object_datatypes( x, y ); bli_check_error_code( e_val ); // Check object dimensions. e_val = bli_check_matrix_object( x ); bli_check_error_code( e_val ); e_val = bli_check_matrix_object( y ); bli_check_error_code( e_val ); e_val = bli_check_conformal_dims( x, y ); bli_check_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( x ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( y ); bli_check_error_code( e_val ); } void bli_l1m_axy_check ( obj_t* alpha, obj_t* x, obj_t* y ) { err_t e_val; // Check object datatypes. e_val = bli_check_noninteger_object( alpha ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( x ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( y ); bli_check_error_code( e_val ); // Check for consistent datatypes. e_val = bli_check_consistent_object_datatypes( x, y ); bli_check_error_code( e_val ); // Check object dimensions. e_val = bli_check_scalar_object( alpha ); bli_check_error_code( e_val ); e_val = bli_check_matrix_object( x ); bli_check_error_code( e_val ); e_val = bli_check_matrix_object( y ); bli_check_error_code( e_val ); e_val = bli_check_conformal_dims( x, y ); bli_check_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( alpha ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( x ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( y ); bli_check_error_code( e_val ); } void bli_l1m_ax_check ( obj_t* alpha, obj_t* x ) { err_t e_val; // Check object datatypes. e_val = bli_check_noninteger_object( alpha ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( x ); bli_check_error_code( e_val ); // Check object dimensions. e_val = bli_check_scalar_object( alpha ); bli_check_error_code( e_val ); e_val = bli_check_matrix_object( x ); bli_check_error_code( e_val ); // Check object properties. //e_val = bli_check_nonunit_diag( x ); //bli_check_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( alpha ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( x ); bli_check_error_code( e_val ); } blis-0.6.1/frame/1m/bli_l1m_check.h000066400000000000000000000053401360743507500167170ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype object-based check functions. // #undef GENPROT #define GENPROT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* x, \ obj_t* y \ ); GENPROT( addm ) GENPROT( copym ) GENPROT( subm ) #undef GENPROT #define GENPROT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* alpha, \ obj_t* x, \ obj_t* y \ ); GENPROT( axpym ) GENPROT( scal2m ) #undef GENPROT #define GENPROT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* alpha, \ obj_t* x \ ); GENPROT( scalm ) GENPROT( setm ) #undef GENPROT #define GENPROT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* x, \ obj_t* beta, \ obj_t* y \ ); GENPROT( xpbym ) // ----------------------------------------------------------------------------- void bli_l1m_xy_check ( obj_t* x, obj_t* y ); void bli_l1m_axy_check ( obj_t* alpha, obj_t* x, obj_t* y ); void bli_l1m_ax_check ( obj_t* alpha, obj_t* x ); blis-0.6.1/frame/1m/bli_l1m_fpa.c000066400000000000000000000051251360743507500164040ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define function pointer query interfaces. // #undef GENFRONT #define GENFRONT( opname ) \ \ GENARRAY_FPA( PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft), \ PASTECH(opname,BLIS_TAPI_EX_SUF) ); \ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( num_t dt ) \ { \ return PASTECH2(opname,BLIS_TAPI_EX_SUF,_fpa)[ dt ]; \ } GENFRONT( addm ) GENFRONT( copym ) GENFRONT( subm ) GENFRONT( axpym ) GENFRONT( scal2m ) GENFRONT( scalm ) GENFRONT( setm ) GENFRONT( xpbym ) // // Define function pointer query interfaces for two-datatype operations. // #undef GENFRONT #define GENFRONT( opname ) \ \ GENARRAY_FPA2( PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft), \ PASTECH(opname,BLIS_TAPI_EX_SUF) ); \ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp2)( num_t dtx, num_t dty ) \ { \ return PASTECH2(opname,BLIS_TAPI_EX_SUF,_fpa2)[ dtx ][ dty ]; \ } GENFRONT( xpbym_md ) blis-0.6.1/frame/1m/bli_l1m_fpa.h000066400000000000000000000042021360743507500164040ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype function pointer query interface. // #undef GENPROT #define GENPROT( opname ) \ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( num_t dt ); GENPROT( addm ) GENPROT( copym ) GENPROT( subm ) GENPROT( axpym ) GENPROT( scal2m ) GENPROT( scalm ) GENPROT( setm ) GENPROT( xpbym ) #undef GENPROT #define GENPROT( opname ) \ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp2)( num_t dtx, num_t dty ); GENPROT( xpbym_md ) blis-0.6.1/frame/1m/bli_l1m_ft.h000066400000000000000000000105351360743507500162550ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // -- Level-1v function types -------------------------------------------------- // // addm, subm #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \ ( \ doff_t diagoffx, \ diag_t diagx, \ uplo_t uplox, \ trans_t transx, \ dim_t m, \ dim_t n, \ ctype* x, inc_t rs_x, inc_t cs_x, \ ctype* y, inc_t rs_y, inc_t cs_y \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTDEF( addm ) INSERT_GENTDEF( subm ) // copym #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \ ( \ doff_t diagoffx, \ diag_t diagx, \ uplo_t uplox, \ trans_t transx, \ dim_t m, \ dim_t n, \ ctype* x, inc_t rs_x, inc_t cs_x, \ ctype* y, inc_t rs_y, inc_t cs_y \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTDEF( copym ) // axpym #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \ ( \ doff_t diagoffx, \ diag_t diagx, \ uplo_t uplox, \ trans_t transx, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* x, inc_t rs_x, inc_t cs_x, \ ctype* y, inc_t rs_y, inc_t cs_y \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTDEF( axpym ) // scal2m #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \ ( \ doff_t diagoffx, \ diag_t diagx, \ uplo_t uplox, \ trans_t transx, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* x, inc_t rs_x, inc_t cs_x, \ ctype* y, inc_t rs_y, inc_t cs_y \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTDEF( scal2m ) // scalm, setm #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \ ( \ conj_t conjalpha, \ doff_t diagoffx, \ diag_t diagx, \ uplo_t uplox, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* x, inc_t rs_x, inc_t cs_x \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTDEF( scalm ) INSERT_GENTDEF( setm ) // xpbym #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \ ( \ doff_t diagoffx, \ diag_t diagx, \ uplo_t uplox, \ trans_t transx, \ dim_t m, \ dim_t n, \ ctype* x, inc_t rs_x, inc_t cs_x, \ ctype* beta, \ ctype* y, inc_t rs_y, inc_t cs_y \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTDEF( xpbym ) INSERT_GENTDEF( xpbym_md ) blis-0.6.1/frame/1m/bli_l1m_ft_ker.h000066400000000000000000000120371360743507500171150ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_L1M_FT_KER_H #define BLIS_L1M_FT_KER_H // // -- Level-1m kernel function types ------------------------------------------- // // packm // NOTE: This is the function type for the structure-aware "kernel". #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,_ker,tsuf)) \ ( \ struc_t strucc, \ doff_t diagoffc, \ diag_t diagc, \ uplo_t uploc, \ conj_t conjc, \ pack_t schema, \ bool_t invdiag, \ dim_t m_panel, \ dim_t n_panel, \ dim_t m_panel_max, \ dim_t n_panel_max, \ ctype* restrict kappa, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ ctype* restrict p, inc_t rs_p, inc_t cs_p, \ inc_t is_p, \ cntx_t* cntx \ ); INSERT_GENTDEF( packm ) // NOTE: the following macros generate packm kernel function type definitions // that are "ctyped" and void-typed, for each of the floating-point datatypes. // However, we will only make use of the void-typed definitions because the // functions such as bli_?packm_cxk() (currently) use arrays of function // pointers to store and access the function pointers for various unrolling // (register blocksize) values, and therefore they must all be of the same // type (hence the use of void* for kappa, a, and p). // packm_ker #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,_ker,tsuf)) \ ( \ conj_t conja, \ pack_t schema, \ dim_t cdim, \ dim_t n, \ dim_t n_max, \ ctype* restrict kappa, \ ctype* restrict a, inc_t inca, inc_t lda, \ ctype* restrict p, inc_t ldp, \ cntx_t* restrict cntx \ ); INSERT_GENTDEF( packm_cxk ) // unpackm_ker #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,_ker,tsuf)) \ ( \ conj_t conjp, \ dim_t n, \ ctype* restrict kappa, \ ctype* restrict p, inc_t ldp, \ ctype* restrict a, inc_t inca, inc_t lda, \ cntx_t* restrict cntx \ ); INSERT_GENTDEF( unpackm_cxk ) // packm_3mis_ker // packm_4mi_ker #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,_ker,tsuf)) \ ( \ conj_t conja, \ dim_t cdim, \ dim_t n, \ dim_t n_max, \ ctype* restrict kappa, \ ctype* restrict a, inc_t inca, inc_t lda, \ ctype* restrict p, inc_t is_p, inc_t ldp, \ cntx_t* restrict cntx \ ); INSERT_GENTDEF( packm_cxk_3mis ) INSERT_GENTDEF( packm_cxk_4mi ) // packm_rih_ker // packm_1er_ker #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,_ker,tsuf)) \ ( \ conj_t conja, \ pack_t schema, \ dim_t cdim, \ dim_t n, \ dim_t n_max, \ ctype* restrict kappa, \ ctype* restrict a, inc_t inca, inc_t lda, \ ctype* restrict p, inc_t ldp, \ cntx_t* restrict cntx \ ); INSERT_GENTDEF( packm_cxk_rih ) INSERT_GENTDEF( packm_cxk_1er ) #endif blis-0.6.1/frame/1m/bli_l1m_ker.h000066400000000000000000000114131360743507500164210ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Define template prototypes for level-1m kernels. // // Note: Instead of defining function prototype macro templates and then // instantiating those macros to define the individual function prototypes, // we simply alias the official operations' prototypes as defined in // bli_l1m_ker_prot.h. // native packm kernels #undef GENTPROT #define GENTPROT PACKM_KER_PROT INSERT_GENTPROT_BASIC0( packm_2xk_ker_name ) INSERT_GENTPROT_BASIC0( packm_3xk_ker_name ) INSERT_GENTPROT_BASIC0( packm_4xk_ker_name ) INSERT_GENTPROT_BASIC0( packm_6xk_ker_name ) INSERT_GENTPROT_BASIC0( packm_8xk_ker_name ) INSERT_GENTPROT_BASIC0( packm_10xk_ker_name ) INSERT_GENTPROT_BASIC0( packm_12xk_ker_name ) INSERT_GENTPROT_BASIC0( packm_14xk_ker_name ) INSERT_GENTPROT_BASIC0( packm_16xk_ker_name ) INSERT_GENTPROT_BASIC0( packm_24xk_ker_name ) // native unpackm kernels #undef GENTPROT #define GENTPROT UNPACKM_KER_PROT INSERT_GENTPROT_BASIC0( unpackm_2xk_ker_name ) INSERT_GENTPROT_BASIC0( unpackm_4xk_ker_name ) INSERT_GENTPROT_BASIC0( unpackm_6xk_ker_name ) INSERT_GENTPROT_BASIC0( unpackm_8xk_ker_name ) INSERT_GENTPROT_BASIC0( unpackm_10xk_ker_name ) INSERT_GENTPROT_BASIC0( unpackm_12xk_ker_name ) INSERT_GENTPROT_BASIC0( unpackm_14xk_ker_name ) INSERT_GENTPROT_BASIC0( unpackm_16xk_ker_name ) // 3mis packm kernels #undef GENTPROT #define GENTPROT PACKM_3MIS_KER_PROT INSERT_GENTPROT_BASIC0( packm_2xk_3mis_ker_name ) INSERT_GENTPROT_BASIC0( packm_4xk_3mis_ker_name ) INSERT_GENTPROT_BASIC0( packm_6xk_3mis_ker_name ) INSERT_GENTPROT_BASIC0( packm_8xk_3mis_ker_name ) INSERT_GENTPROT_BASIC0( packm_10xk_3mis_ker_name ) INSERT_GENTPROT_BASIC0( packm_12xk_3mis_ker_name ) INSERT_GENTPROT_BASIC0( packm_14xk_3mis_ker_name ) INSERT_GENTPROT_BASIC0( packm_16xk_3mis_ker_name ) // 4mi packm kernels #undef GENTPROT #define GENTPROT PACKM_4MI_KER_PROT INSERT_GENTPROT_BASIC0( packm_2xk_4mi_ker_name ) INSERT_GENTPROT_BASIC0( packm_4xk_4mi_ker_name ) INSERT_GENTPROT_BASIC0( packm_6xk_4mi_ker_name ) INSERT_GENTPROT_BASIC0( packm_8xk_4mi_ker_name ) INSERT_GENTPROT_BASIC0( packm_10xk_4mi_ker_name ) INSERT_GENTPROT_BASIC0( packm_12xk_4mi_ker_name ) INSERT_GENTPROT_BASIC0( packm_14xk_4mi_ker_name ) INSERT_GENTPROT_BASIC0( packm_16xk_4mi_ker_name ) // rih packm kernels #undef GENTPROT #define GENTPROT PACKM_RIH_KER_PROT INSERT_GENTPROT_BASIC0( packm_2xk_rih_ker_name ) INSERT_GENTPROT_BASIC0( packm_4xk_rih_ker_name ) INSERT_GENTPROT_BASIC0( packm_6xk_rih_ker_name ) INSERT_GENTPROT_BASIC0( packm_8xk_rih_ker_name ) INSERT_GENTPROT_BASIC0( packm_10xk_rih_ker_name ) INSERT_GENTPROT_BASIC0( packm_12xk_rih_ker_name ) INSERT_GENTPROT_BASIC0( packm_14xk_rih_ker_name ) INSERT_GENTPROT_BASIC0( packm_16xk_rih_ker_name ) // 1e/1r packm kernels #undef GENTPROT #define GENTPROT PACKM_1ER_KER_PROT INSERT_GENTPROT_BASIC0( packm_2xk_1er_ker_name ) INSERT_GENTPROT_BASIC0( packm_4xk_1er_ker_name ) INSERT_GENTPROT_BASIC0( packm_6xk_1er_ker_name ) INSERT_GENTPROT_BASIC0( packm_8xk_1er_ker_name ) INSERT_GENTPROT_BASIC0( packm_10xk_1er_ker_name ) INSERT_GENTPROT_BASIC0( packm_12xk_1er_ker_name ) INSERT_GENTPROT_BASIC0( packm_14xk_1er_ker_name ) INSERT_GENTPROT_BASIC0( packm_16xk_1er_ker_name ) blis-0.6.1/frame/1m/bli_l1m_ker_prot.h000066400000000000000000000102731360743507500174700ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Define template prototypes for level-1m kernels. // // native packm kernels #define PACKM_KER_PROT( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ conj_t conja, \ pack_t schema, \ dim_t cdim, \ dim_t n, \ dim_t n_max, \ void* restrict kappa, \ void* restrict a, inc_t inca, inc_t lda, \ void* restrict p, inc_t ldp, \ cntx_t* restrict cntx \ ); // native unpackm kernels #define UNPACKM_KER_PROT( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ conj_t conja, \ dim_t n, \ void* restrict kappa, \ void* restrict p, inc_t ldp, \ void* restrict a, inc_t inca, inc_t lda, \ cntx_t* restrict cntx \ ); // 3mis packm kernels #define PACKM_3MIS_KER_PROT( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ conj_t conja, \ dim_t cdim, \ dim_t n, \ dim_t n_max, \ void* restrict kappa, \ void* restrict a, inc_t inca, inc_t lda, \ void* restrict p, inc_t is_p, inc_t ldp, \ cntx_t* restrict cntx \ ); // 4mi packm kernels #define PACKM_4MI_KER_PROT( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ conj_t conja, \ dim_t cdim, \ dim_t n, \ dim_t n_max, \ void* restrict kappa, \ void* restrict a, inc_t inca, inc_t lda, \ void* restrict p, inc_t is_p, inc_t ldp, \ cntx_t* restrict cntx \ ); // rih packm kernels #define PACKM_RIH_KER_PROT( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ conj_t conja, \ pack_t schema, \ dim_t cdim, \ dim_t n, \ dim_t n_max, \ void* restrict kappa, \ void* restrict a, inc_t inca, inc_t lda, \ void* restrict p, inc_t ldp, \ cntx_t* restrict cntx \ ); // 1e/1r packm kernels #define PACKM_1ER_KER_PROT( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ conj_t conja, \ pack_t schema, \ dim_t cdim, \ dim_t n, \ dim_t n_max, \ void* restrict kappa, \ void* restrict a, inc_t inca, inc_t lda, \ void* restrict p, inc_t ldp, \ cntx_t* restrict cntx \ ); blis-0.6.1/frame/1m/bli_l1m_oapi.c000066400000000000000000000276121360743507500165730ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // Guard the function definitions so that they are only compiled when // #included from files that define the object API macros. #ifdef BLIS_ENABLE_OAPI // // Define object-based interfaces. // #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* x, \ obj_t* y \ BLIS_OAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_OAPI_EX_DECLS \ \ num_t dt = bli_obj_dt( x ); \ \ doff_t diagoffx = bli_obj_diag_offset( x ); \ diag_t diagx = bli_obj_diag( x ); \ uplo_t uplox = bli_obj_uplo( x ); \ trans_t transx = bli_obj_conjtrans_status( x ); \ dim_t m = bli_obj_length( y ); \ dim_t n = bli_obj_width( y ); \ void* buf_x = bli_obj_buffer_at_off( x ); \ inc_t rs_x = bli_obj_row_stride( x ); \ inc_t cs_x = bli_obj_col_stride( x ); \ void* buf_y = bli_obj_buffer_at_off( y ); \ inc_t rs_y = bli_obj_row_stride( y ); \ inc_t cs_y = bli_obj_col_stride( y ); \ \ if ( bli_error_checking_is_enabled() ) \ PASTEMAC(opname,_check)( x, y ); \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) f = \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \ \ f \ ( \ diagoffx, \ diagx, \ uplox, \ transx, \ m, \ n, \ buf_x, rs_x, cs_x, \ buf_y, rs_y, cs_y, \ cntx, \ rntm \ ); \ } GENFRONT( addm ) GENFRONT( copym ) GENFRONT( subm ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* alpha, \ obj_t* x, \ obj_t* y \ BLIS_OAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_OAPI_EX_DECLS \ \ num_t dt = bli_obj_dt( x ); \ \ doff_t diagoffx = bli_obj_diag_offset( x ); \ diag_t diagx = bli_obj_diag( x ); \ uplo_t uplox = bli_obj_uplo( x ); \ trans_t transx = bli_obj_conjtrans_status( x ); \ dim_t m = bli_obj_length( y ); \ dim_t n = bli_obj_width( y ); \ void* buf_x = bli_obj_buffer_at_off( x ); \ inc_t rs_x = bli_obj_row_stride( x ); \ inc_t cs_x = bli_obj_col_stride( x ); \ void* buf_y = bli_obj_buffer_at_off( y ); \ inc_t rs_y = bli_obj_row_stride( y ); \ inc_t cs_y = bli_obj_col_stride( y ); \ \ void* buf_alpha; \ \ obj_t alpha_local; \ \ if ( bli_error_checking_is_enabled() ) \ PASTEMAC(opname,_check)( alpha, x, y ); \ \ /* Create local copy-casts of scalars (and apply internal conjugation as needed). */ \ bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ alpha, &alpha_local ); \ buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) f = \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \ \ f \ ( \ diagoffx, \ diagx, \ uplox, \ transx, \ m, \ n, \ buf_alpha, \ buf_x, rs_x, cs_x, \ buf_y, rs_y, cs_y, \ cntx, \ rntm \ ); \ } GENFRONT( axpym ) GENFRONT( scal2m ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* alpha, \ obj_t* x \ BLIS_OAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_OAPI_EX_DECLS \ \ num_t dt = bli_obj_dt( x ); \ \ /* conj_t conjalpha = bli_obj_conj_status( alpha ); */ \ doff_t diagoffx = bli_obj_diag_offset( x ); \ diag_t diagx = bli_obj_diag( x ); \ uplo_t uplox = bli_obj_uplo( x ); \ dim_t m = bli_obj_length( x ); \ dim_t n = bli_obj_width( x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \ inc_t rs_x = bli_obj_row_stride( x ); \ inc_t cs_x = bli_obj_col_stride( x ); \ \ void* buf_alpha; \ \ obj_t alpha_local; \ obj_t x_local; \ \ if ( bli_error_checking_is_enabled() ) \ PASTEMAC(opname,_check)( alpha, x ); \ \ /* Alias x to x_local so we can apply alpha if it is non-unit. */ \ bli_obj_alias_to( x, &x_local ); \ \ /* If alpha is non-unit, apply it to the scalar attached to x. */ \ if ( !bli_obj_equals( alpha, &BLIS_ONE ) ) \ { \ /* Create a local copy-cast of alpha (and apply internal conjugation as needed). */ \ bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ alpha, &alpha_local ); \ \ bli_obj_scalar_apply_scalar( &alpha_local, &x_local ); \ } \ \ /* Grab the address of the internal scalar buffer for the scalar attached to x. */ \ buf_alpha = bli_obj_internal_scalar_buffer( &x_local ); \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) f = \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \ \ f \ ( \ BLIS_NO_CONJUGATE, /* internal conjugation applied during copy-cast. */ \ diagoffx, \ diagx, \ uplox, \ m, \ n, \ buf_alpha, \ buf_x, rs_x, cs_x, \ cntx, \ rntm \ ); \ } GENFRONT( scalm ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* alpha, \ obj_t* x \ BLIS_OAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_OAPI_EX_DECLS \ \ num_t dt = bli_obj_dt( x ); \ \ /* conj_t conjalpha = bli_obj_conj_status( alpha ); */ \ doff_t diagoffx = bli_obj_diag_offset( x ); \ diag_t diagx = bli_obj_diag( x ); \ uplo_t uplox = bli_obj_uplo( x ); \ dim_t m = bli_obj_length( x ); \ dim_t n = bli_obj_width( x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \ inc_t rs_x = bli_obj_row_stride( x ); \ inc_t cs_x = bli_obj_col_stride( x ); \ \ void* buf_alpha; \ \ obj_t alpha_local; \ \ if ( bli_error_checking_is_enabled() ) \ PASTEMAC(opname,_check)( alpha, x ); \ \ /* Create local copy-casts of scalars (and apply internal conjugation as needed). */ \ bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ alpha, &alpha_local ); \ buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) f = \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \ \ f \ ( \ BLIS_NO_CONJUGATE, /* internal conjugation applied during copy-cast. */ \ diagoffx, \ diagx, \ uplox, \ m, \ n, \ buf_alpha, \ buf_x, rs_x, cs_x, \ cntx, \ rntm \ ); \ } GENFRONT( setm ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* x, \ obj_t* beta, \ obj_t* y \ BLIS_OAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_OAPI_EX_DECLS \ \ if ( bli_obj_dt( x ) != bli_obj_dt( y ) ) \ return bli_xpbym_md( x, beta, y ); \ \ num_t dt = bli_obj_dt( x ); \ \ doff_t diagoffx = bli_obj_diag_offset( x ); \ diag_t diagx = bli_obj_diag( x ); \ uplo_t uplox = bli_obj_uplo( x ); \ trans_t transx = bli_obj_conjtrans_status( x ); \ dim_t m = bli_obj_length( y ); \ dim_t n = bli_obj_width( y ); \ void* buf_x = bli_obj_buffer_at_off( x ); \ inc_t rs_x = bli_obj_row_stride( x ); \ inc_t cs_x = bli_obj_col_stride( x ); \ void* buf_y = bli_obj_buffer_at_off( y ); \ inc_t rs_y = bli_obj_row_stride( y ); \ inc_t cs_y = bli_obj_col_stride( y ); \ \ void* buf_beta; \ \ obj_t beta_local; \ \ if ( bli_error_checking_is_enabled() ) \ PASTEMAC(opname,_check)( x, beta, y ); \ \ /* Create local copy-casts of scalars (and apply internal conjugation as needed). */ \ bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ beta, &beta_local ); \ buf_beta = bli_obj_buffer_for_1x1( dt, &beta_local ); \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) f = \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \ \ f \ ( \ diagoffx, \ diagx, \ uplox, \ transx, \ m, \ n, \ buf_x, rs_x, cs_x, \ buf_beta, \ buf_y, rs_y, cs_y, \ cntx, \ rntm \ ); \ } GENFRONT( xpbym ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* x, \ obj_t* beta, \ obj_t* y \ BLIS_OAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_OAPI_EX_DECLS \ \ num_t dtx = bli_obj_dt( x ); \ num_t dty = bli_obj_dt( y ); \ \ doff_t diagoffx = bli_obj_diag_offset( x ); \ diag_t diagx = bli_obj_diag( x ); \ uplo_t uplox = bli_obj_uplo( x ); \ trans_t transx = bli_obj_conjtrans_status( x ); \ dim_t m = bli_obj_length( y ); \ dim_t n = bli_obj_width( y ); \ void* buf_x = bli_obj_buffer_at_off( x ); \ inc_t rs_x = bli_obj_row_stride( x ); \ inc_t cs_x = bli_obj_col_stride( x ); \ void* buf_y = bli_obj_buffer_at_off( y ); \ inc_t rs_y = bli_obj_row_stride( y ); \ inc_t cs_y = bli_obj_col_stride( y ); \ \ void* buf_beta; \ \ obj_t beta_local; \ \ /* Create local copy-casts of scalars (and apply internal conjugation as needed). */ \ bli_obj_scalar_init_detached_copy_of( dty, BLIS_NO_CONJUGATE, \ beta, &beta_local ); \ buf_beta = bli_obj_buffer_for_1x1( dty, &beta_local ); \ \ /* Query a (multi) type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) f = \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp2)( dtx, dty ); \ \ f \ ( \ diagoffx, \ diagx, \ uplox, \ transx, \ m, \ n, \ buf_x, rs_x, cs_x, \ buf_beta, \ buf_y, rs_y, cs_y, \ cntx, \ rntm \ ); \ } GENFRONT( xpbym_md ) #endif blis-0.6.1/frame/1m/bli_l1m_oapi.h000066400000000000000000000051431360743507500165730ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype object-based interfaces. // #undef GENPROT #define GENPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* x, \ obj_t* y \ BLIS_OAPI_EX_PARAMS \ ); GENPROT( addm ) GENPROT( copym ) GENPROT( subm ) #undef GENPROT #define GENPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* alpha, \ obj_t* x, \ obj_t* y \ BLIS_OAPI_EX_PARAMS \ ); GENPROT( axpym ) GENPROT( scal2m ) #undef GENPROT #define GENPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* alpha, \ obj_t* x \ BLIS_OAPI_EX_PARAMS \ ); GENPROT( scalm ) GENPROT( setm ) #undef GENPROT #define GENPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* x, \ obj_t* beta, \ obj_t* y \ BLIS_OAPI_EX_PARAMS \ ); GENPROT( xpbym ) GENPROT( xpbym_md ) blis-0.6.1/frame/1m/bli_l1m_oapi_ba.c000066400000000000000000000036701360743507500172330ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // Include cpp macros that instantiate the API definition templates as // omitting expert parameters. #include "bli_oapi_ba.h" // Define the macro protecting the object API definitions. #define BLIS_ENABLE_OAPI // Include the object API definitions here. #include "bli_l1m_oapi.c" blis-0.6.1/frame/1m/bli_l1m_oapi_ex.c000066400000000000000000000036661360743507500172720ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // Include cpp macros that instantiate the API definition templates as // having expert parameters. #include "bli_oapi_ex.h" // Define the macro protecting the object API definitions. #define BLIS_ENABLE_OAPI // Include the object API definitions here. #include "bli_l1m_oapi.c" blis-0.6.1/frame/1m/bli_l1m_oft_var.h000066400000000000000000000042761360743507500173110ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_L1M_OFT_VAR_H #define BLIS_L1M_OFT_VAR_H // // -- Level-3 variant function types ------------------------------------------- // #undef GENTDEF #define GENTDEF( opname ) \ \ typedef void (*PASTECH(opname,_var_oft)) \ ( \ obj_t* a, \ obj_t* p, \ cntx_t* cntx, \ cntl_t* cntl, \ thrinfo_t* thread \ ); GENTDEF( packm ) #undef GENTDEF #define GENTDEF( opname ) \ \ typedef void (*PASTECH(opname,_var_oft)) \ ( \ obj_t* p, \ obj_t* a, \ cntx_t* cntx, \ cntl_t* cntl, \ thrinfo_t* thread \ ); GENTDEF( unpackm ) #endif blis-0.6.1/frame/1m/bli_l1m_tapi.c000066400000000000000000000271371360743507500166020ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // Guard the function definitions so that they are only compiled when // #included from files that define the typed API macros. #ifdef BLIS_ENABLE_TAPI // // Define BLAS-like interfaces with typed operands. // #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, auxker ) \ \ void PASTEMAC2(ch,opname,EX_SUF) \ ( \ doff_t diagoffx, \ diag_t diagx, \ uplo_t uplox, \ trans_t transx, \ dim_t m, \ dim_t n, \ ctype* x, inc_t rs_x, inc_t cs_x, \ ctype* y, inc_t rs_y, inc_t cs_y \ BLIS_TAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_TAPI_EX_DECLS \ \ if ( bli_zero_dim2( m, n ) ) return; \ \ /* Obtain a valid context from the gks if necessary. */ \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ \ /* Invoke the helper variant, which loops over the appropriate kernel to implement the current operation. */ \ PASTEMAC2(ch,opname,_unb_var1) \ ( \ diagoffx, \ diagx, \ uplox, \ transx, \ m, \ n, \ x, rs_x, cs_x, \ y, rs_y, cs_y, \ cntx, \ rntm \ ); \ \ /* When the diagonal of an upper- or lower-stored matrix is unit, we handle it with a separate post-processing step. */ \ if ( bli_is_upper_or_lower( uplox ) && \ bli_is_unit_diag( diagx ) ) \ { \ PASTEMAC2(ch,auxker,BLIS_TAPI_EX_SUF) \ ( \ diagoffx, \ diagx, \ transx, \ m, \ n, \ x, rs_x, cs_x, \ y, rs_y, cs_y, \ cntx, \ rntm \ ); \ } \ } INSERT_GENTFUNC_BASIC( addm, addd ) INSERT_GENTFUNC_BASIC( subm, subd ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTEMAC2(ch,opname,EX_SUF) \ ( \ doff_t diagoffx, \ diag_t diagx, \ uplo_t uplox, \ trans_t transx, \ dim_t m, \ dim_t n, \ ctype* x, inc_t rs_x, inc_t cs_x, \ ctype* y, inc_t rs_y, inc_t cs_y \ BLIS_TAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_TAPI_EX_DECLS \ \ if ( bli_zero_dim2( m, n ) ) return; \ \ /* Obtain a valid context from the gks if necessary. */ \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ \ /* Invoke the helper variant, which loops over the appropriate kernel to implement the current operation. */ \ PASTEMAC2(ch,opname,_unb_var1) \ ( \ diagoffx, \ diagx, \ uplox, \ transx, \ m, \ n, \ x, rs_x, cs_x, \ y, rs_y, cs_y, \ cntx, \ rntm \ ); \ \ /* When the diagonal of an upper- or lower-stored matrix is unit, we handle it with a separate post-processing step. */ \ if ( bli_is_upper_or_lower( uplox ) && \ bli_is_unit_diag( diagx ) ) \ { \ doff_t diagoffy = diagoffx; \ ctype* one = PASTEMAC(ch,1); \ \ if ( bli_does_trans( transx ) ) \ bli_negate_diag_offset( &diagoffy ); \ \ PASTEMAC2(ch,setd,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ diagoffy, \ m, \ n, \ one, \ y, rs_y, cs_y, \ cntx, \ rntm \ ); \ } \ } INSERT_GENTFUNC_BASIC0( copym ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTEMAC2(ch,opname,EX_SUF) \ ( \ doff_t diagoffx, \ diag_t diagx, \ uplo_t uplox, \ trans_t transx, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* x, inc_t rs_x, inc_t cs_x, \ ctype* y, inc_t rs_y, inc_t cs_y \ BLIS_TAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_TAPI_EX_DECLS \ \ if ( bli_zero_dim2( m, n ) ) return; \ \ /* If alpha is zero, then the entire operation is a no-op. */ \ if ( PASTEMAC(ch,eq0)( *alpha ) ) return; \ \ /* Obtain a valid context from the gks if necessary. */ \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ \ /* Invoke the helper variant, which loops over the appropriate kernel to implement the current operation. */ \ PASTEMAC2(ch,opname,_unb_var1) \ ( \ diagoffx, \ diagx, \ uplox, \ transx, \ m, \ n, \ alpha, \ x, rs_x, cs_x, \ y, rs_y, cs_y, \ cntx, \ rntm \ ); \ \ /* When the diagonal of an upper- or lower-stored matrix is unit, we handle it with a separate post-processing step. */ \ if ( bli_is_upper_or_lower( uplox ) && \ bli_is_unit_diag( diagx ) ) \ { \ PASTEMAC2(ch,axpyd,BLIS_TAPI_EX_SUF) \ ( \ diagoffx, \ diagx, \ transx, \ m, \ n, \ alpha, \ x, rs_x, cs_x, \ y, rs_y, cs_y, \ cntx, \ rntm \ ); \ } \ } INSERT_GENTFUNC_BASIC0( axpym ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTEMAC2(ch,opname,EX_SUF) \ ( \ doff_t diagoffx, \ diag_t diagx, \ uplo_t uplox, \ trans_t transx, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* x, inc_t rs_x, inc_t cs_x, \ ctype* y, inc_t rs_y, inc_t cs_y \ BLIS_TAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_TAPI_EX_DECLS \ \ if ( bli_zero_dim2( m, n ) ) return; \ \ /* Obtain a valid context from the gks if necessary. */ \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ \ /* If alpha is zero, then we set the output matrix to zero. This seemingly minor optimization is important because it will clear any NaNs and Infs in x that would otherwise propogate. */ \ if ( PASTEMAC(ch,eq0)( *alpha ) ) \ { \ \ PASTEMAC2(ch,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ diagoffx, \ diagx, \ uplox, \ m, \ n, \ alpha, \ y, rs_y, cs_y, \ cntx, \ rntm \ ); \ return; \ } \ \ /* Invoke the helper variant, which loops over the appropriate kernel to implement the current operation. */ \ PASTEMAC2(ch,opname,_unb_var1) \ ( \ diagoffx, \ diagx, \ uplox, \ transx, \ m, \ n, \ alpha, \ x, rs_x, cs_x, \ y, rs_y, cs_y, \ cntx, \ rntm \ ); \ \ /* When the diagonal of an upper- or lower-stored matrix is unit, we handle it with a separate post-processing step. */ \ if ( bli_is_upper_or_lower( uplox ) && \ bli_is_unit_diag( diagx ) ) \ { \ doff_t diagoffy = diagoffx; \ \ if ( bli_does_trans( transx ) ) \ bli_negate_diag_offset( &diagoffy ); \ \ PASTEMAC2(ch,setd,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ diagoffy, \ m, \ n, \ alpha, \ y, rs_y, cs_y, \ cntx, \ rntm \ ); \ } \ } INSERT_GENTFUNC_BASIC0( scal2m ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTEMAC2(ch,opname,EX_SUF) \ ( \ conj_t conjalpha, \ doff_t diagoffx, \ diag_t diagx, \ uplo_t uplox, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* x, inc_t rs_x, inc_t cs_x \ BLIS_TAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_TAPI_EX_DECLS \ \ if ( bli_zero_dim2( m, n ) ) return; \ \ /* Obtain a valid context from the gks if necessary. */ \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ \ /* Invoke the helper variant, which loops over the appropriate kernel to implement the current operation. */ \ PASTEMAC2(ch,opname,_unb_var1) \ ( \ conjalpha, \ diagoffx, \ diagx, \ uplox, \ m, \ n, \ alpha, \ x, rs_x, cs_x, \ cntx, \ rntm \ ); \ } INSERT_GENTFUNC_BASIC0( scalm ) INSERT_GENTFUNC_BASIC0( setm ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTEMAC2(ch,opname,EX_SUF) \ ( \ doff_t diagoffx, \ diag_t diagx, \ uplo_t uplox, \ trans_t transx, \ dim_t m, \ dim_t n, \ ctype* x, inc_t rs_x, inc_t cs_x, \ ctype* beta, \ ctype* y, inc_t rs_y, inc_t cs_y \ BLIS_TAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_TAPI_EX_DECLS \ \ if ( bli_zero_dim2( m, n ) ) return; \ \ /* Obtain a valid context from the gks if necessary. */ \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ \ /* If beta is zero, then the operation reduces to copym. */ \ if ( PASTEMAC(ch,eq0)( *beta ) ) \ { \ PASTEMAC2(ch,copym,_unb_var1) \ ( \ diagoffx, \ diagx, \ uplox, \ transx, \ m, \ n, \ x, rs_x, cs_x, \ y, rs_y, cs_y, \ cntx, \ rntm \ ); \ \ return; \ } \ \ /* Invoke the helper variant, which loops over the appropriate kernel to implement the current operation. */ \ PASTEMAC2(ch,opname,_unb_var1) \ ( \ diagoffx, \ diagx, \ uplox, \ transx, \ m, \ n, \ x, rs_x, cs_x, \ beta, \ y, rs_y, cs_y, \ cntx, \ rntm \ ); \ \ /* When the diagonal of an upper- or lower-stored matrix is unit, we handle it with a separate post-processing step. */ \ if ( bli_is_upper_or_lower( uplox ) && \ bli_is_unit_diag( diagx ) ) \ { \ PASTEMAC2(ch,xpbyd,BLIS_TAPI_EX_SUF) \ ( \ diagoffx, \ diagx, \ transx, \ m, \ n, \ x, rs_x, cs_x, \ beta, \ y, rs_y, cs_y, \ cntx, \ rntm \ ); \ } \ } INSERT_GENTFUNC_BASIC0( xpbym ) #undef GENTFUNC2 #define GENTFUNC2( ctype_x, ctype_y, chx, chy, opname ) \ \ void PASTEMAC3(chx,chy,opname,EX_SUF) \ ( \ doff_t diagoffx, \ diag_t diagx, \ uplo_t uplox, \ trans_t transx, \ dim_t m, \ dim_t n, \ ctype_x* x, inc_t rs_x, inc_t cs_x, \ ctype_y* beta, \ ctype_y* y, inc_t rs_y, inc_t cs_y \ BLIS_TAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_TAPI_EX_DECLS \ \ if ( bli_zero_dim2( m, n ) ) return; \ \ /* Obtain a valid context from the gks if necessary. */ \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ \ /* If beta is zero, then the operation reduces to copym. */ \ if ( PASTEMAC(chy,eq0)( *beta ) ) \ { \ PASTEMAC2(chx,chy,castm) \ ( \ transx, \ m, \ n, \ x, rs_x, cs_x, \ y, rs_y, cs_y \ ); \ \ return; \ } \ \ /* Invoke the helper variant, which loops over the appropriate kernel to implement the current operation. */ \ PASTEMAC3(chx,chy,opname,_unb_var1) \ ( \ diagoffx, \ diagx, \ uplox, \ transx, \ m, \ n, \ x, rs_x, cs_x, \ beta, \ y, rs_y, cs_y, \ cntx, \ rntm \ ); \ } INSERT_GENTFUNC2_BASIC0( xpbym_md ) INSERT_GENTFUNC2_MIXDP0( xpbym_md ) #endif blis-0.6.1/frame/1m/bli_l1m_tapi.h000066400000000000000000000077741360743507500166140ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-like interfaces with typed operands. // #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \ ( \ doff_t diagoffx, \ diag_t diagx, \ uplo_t uplox, \ trans_t transx, \ dim_t m, \ dim_t n, \ ctype* x, inc_t rs_x, inc_t cs_x, \ ctype* y, inc_t rs_y, inc_t cs_y \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTPROT_BASIC0( addm ) INSERT_GENTPROT_BASIC0( copym ) INSERT_GENTPROT_BASIC0( subm ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \ ( \ doff_t diagoffx, \ diag_t diagx, \ uplo_t uplox, \ trans_t transx, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* x, inc_t rs_x, inc_t cs_x, \ ctype* y, inc_t rs_y, inc_t cs_y \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTPROT_BASIC0( axpym ) INSERT_GENTPROT_BASIC0( scal2m ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \ ( \ conj_t conjalpha, \ doff_t diagoffx, \ diag_t diagx, \ uplo_t uplox, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* x, inc_t rs_x, inc_t cs_x \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTPROT_BASIC0( scalm ) INSERT_GENTPROT_BASIC0( setm ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \ ( \ doff_t diagoffx, \ diag_t diagx, \ uplo_t uplox, \ trans_t transx, \ dim_t m, \ dim_t n, \ ctype* x, inc_t rs_x, inc_t cs_x, \ ctype* beta, \ ctype* y, inc_t rs_y, inc_t cs_y \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTPROT_BASIC0( xpbym ) #undef GENTPROT2 #define GENTPROT2( ctype_x, ctype_y, chx, chy, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC3(chx,chy,opname,EX_SUF) \ ( \ doff_t diagoffx, \ diag_t diagx, \ uplo_t uplox, \ trans_t transx, \ dim_t m, \ dim_t n, \ ctype_x* x, inc_t rs_x, inc_t cs_x, \ ctype_y* beta, \ ctype_y* y, inc_t rs_y, inc_t cs_y \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTPROT2_BASIC0( xpbym_md ) INSERT_GENTPROT2_MIXDP0( xpbym_md ) blis-0.6.1/frame/1m/bli_l1m_tapi_ba.c000066400000000000000000000036661360743507500172450ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // Include cpp macros that instantiate the API definition templates as // omitting expert parameters. #include "bli_tapi_ba.h" // Define the macro protecting the typed API definitions. #define BLIS_ENABLE_TAPI // Include the typed API definitions here. #include "bli_l1m_tapi.c" blis-0.6.1/frame/1m/bli_l1m_tapi_ex.c000066400000000000000000000036641360743507500172750ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // Include cpp macros that instantiate the API definition templates as // having expert parameters. #include "bli_tapi_ex.h" // Define the macro protecting the typed API definitions. #define BLIS_ENABLE_TAPI // Include the typed API definitions here. #include "bli_l1m_tapi.c" blis-0.6.1/frame/1m/bli_l1m_unb_var1.c000066400000000000000000000353611360743507500173600ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define BLAS-like interfaces with typed operands. // #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, kername, kerid ) \ \ void PASTEMAC(ch,opname) \ ( \ doff_t diagoffx, \ diag_t diagx, \ uplo_t uplox, \ trans_t transx, \ dim_t m, \ dim_t n, \ ctype* x, inc_t rs_x, inc_t cs_x, \ ctype* y, inc_t rs_y, inc_t cs_y, \ cntx_t* cntx, \ rntm_t* rntm \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ ctype* x1; \ ctype* y1; \ uplo_t uplox_eff; \ conj_t conjx; \ dim_t n_iter; \ dim_t n_elem, n_elem_max; \ inc_t ldx, incx; \ inc_t ldy, incy; \ dim_t j, i; \ dim_t ij0, n_shift; \ \ /* Set various loop parameters. */ \ bli_set_dims_incs_uplo_2m \ ( \ diagoffx, diagx, transx, \ uplox, m, n, rs_x, cs_x, rs_y, cs_y, \ &uplox_eff, &n_elem_max, &n_iter, &incx, &ldx, &incy, &ldy, \ &ij0, &n_shift \ ); \ \ if ( bli_is_zeros( uplox_eff ) ) return; \ \ /* Extract the conjugation component from the transx parameter. */ \ conjx = bli_extract_conj( transx ); \ \ /* Query the kernel needed for this operation. */ \ PASTECH2(ch,kername,_ker_ft) f = bli_cntx_get_l1v_ker_dt( dt, kerid, cntx ); \ \ /* Handle dense and upper/lower storage cases separately. */ \ if ( bli_is_dense( uplox_eff ) ) \ { \ for ( j = 0; j < n_iter; ++j ) \ { \ n_elem = n_elem_max; \ \ x1 = x + (j )*ldx + (0 )*incx; \ y1 = y + (j )*ldy + (0 )*incy; \ \ /* Invoke the kernel with the appropriate parameters. */ \ f( \ conjx, \ n_elem, \ x1, incx, \ y1, incy, \ cntx \ ); \ } \ } \ else \ { \ if ( bli_is_upper( uplox_eff ) ) \ { \ for ( j = 0; j < n_iter; ++j ) \ { \ n_elem = bli_min( n_shift + j + 1, n_elem_max ); \ \ x1 = x + (ij0+j )*ldx + (0 )*incx; \ y1 = y + (ij0+j )*ldy + (0 )*incy; \ \ /* Invoke the kernel with the appropriate parameters. */ \ f( \ conjx, \ n_elem, \ x1, incx, \ y1, incy, \ cntx \ ); \ } \ } \ else if ( bli_is_lower( uplox_eff ) ) \ { \ for ( j = 0; j < n_iter; ++j ) \ { \ i = bli_max( 0, ( doff_t )j - ( doff_t )n_shift ); \ n_elem = n_elem_max - i; \ \ x1 = x + (j )*ldx + (ij0+i )*incx; \ y1 = y + (j )*ldy + (ij0+i )*incy; \ \ /* Invoke the kernel with the appropriate parameters. */ \ f( \ conjx, \ n_elem, \ x1, incx, \ y1, incy, \ cntx \ ); \ } \ } \ } \ } INSERT_GENTFUNC_BASIC2( addm_unb_var1, addv, BLIS_ADDV_KER ) INSERT_GENTFUNC_BASIC2( copym_unb_var1, copyv, BLIS_COPYV_KER ) INSERT_GENTFUNC_BASIC2( subm_unb_var1, subv, BLIS_SUBV_KER ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, kername, kerid ) \ \ void PASTEMAC(ch,opname) \ ( \ doff_t diagoffx, \ diag_t diagx, \ uplo_t uplox, \ trans_t transx, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* x, inc_t rs_x, inc_t cs_x, \ ctype* y, inc_t rs_y, inc_t cs_y, \ cntx_t* cntx, \ rntm_t* rntm \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ ctype* x1; \ ctype* y1; \ uplo_t uplox_eff; \ conj_t conjx; \ dim_t n_iter; \ dim_t n_elem, n_elem_max; \ inc_t ldx, incx; \ inc_t ldy, incy; \ dim_t j, i; \ dim_t ij0, n_shift; \ \ /* Set various loop parameters. */ \ bli_set_dims_incs_uplo_2m \ ( \ diagoffx, diagx, transx, \ uplox, m, n, rs_x, cs_x, rs_y, cs_y, \ &uplox_eff, &n_elem_max, &n_iter, &incx, &ldx, &incy, &ldy, \ &ij0, &n_shift \ ); \ \ if ( bli_is_zeros( uplox_eff ) ) return; \ \ /* Extract the conjugation component from the transx parameter. */ \ conjx = bli_extract_conj( transx ); \ \ /* Query the kernel needed for this operation. */ \ PASTECH2(ch,kername,_ker_ft) f = bli_cntx_get_l1v_ker_dt( dt, kerid, cntx ); \ \ /* Handle dense and upper/lower storage cases separately. */ \ if ( bli_is_dense( uplox_eff ) ) \ { \ for ( j = 0; j < n_iter; ++j ) \ { \ n_elem = n_elem_max; \ \ x1 = x + (j )*ldx + (0 )*incx; \ y1 = y + (j )*ldy + (0 )*incy; \ \ /* Invoke the kernel with the appropriate parameters. */ \ f( \ conjx, \ n_elem, \ alpha, \ x1, incx, \ y1, incy, \ cntx \ ); \ } \ } \ else \ { \ if ( bli_is_upper( uplox_eff ) ) \ { \ for ( j = 0; j < n_iter; ++j ) \ { \ n_elem = bli_min( n_shift + j + 1, n_elem_max ); \ \ x1 = x + (ij0+j )*ldx + (0 )*incx; \ y1 = y + (ij0+j )*ldy + (0 )*incy; \ \ /* Invoke the kernel with the appropriate parameters. */ \ f( \ conjx, \ n_elem, \ alpha, \ x1, incx, \ y1, incy, \ cntx \ ); \ } \ } \ else if ( bli_is_lower( uplox_eff ) ) \ { \ for ( j = 0; j < n_iter; ++j ) \ { \ i = bli_max( 0, ( doff_t )j - ( doff_t )n_shift ); \ n_elem = n_elem_max - i; \ \ x1 = x + (j )*ldx + (ij0+i )*incx; \ y1 = y + (j )*ldy + (ij0+i )*incy; \ \ /* Invoke the kernel with the appropriate parameters. */ \ f( \ conjx, \ n_elem, \ alpha, \ x1, incx, \ y1, incy, \ cntx \ ); \ } \ } \ } \ } INSERT_GENTFUNC_BASIC2( axpym_unb_var1, axpyv, BLIS_AXPYV_KER ) INSERT_GENTFUNC_BASIC2( scal2m_unb_var1, scal2v, BLIS_SCAL2V_KER ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, kername, kerid ) \ \ void PASTEMAC(ch,opname) \ ( \ conj_t conjalpha, \ doff_t diagoffx, \ diag_t diagx, \ uplo_t uplox, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* x, inc_t rs_x, inc_t cs_x, \ cntx_t* cntx, \ rntm_t* rntm \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ ctype* x1; \ uplo_t uplox_eff; \ dim_t n_iter; \ dim_t n_elem, n_elem_max; \ inc_t ldx, incx; \ dim_t j, i; \ dim_t ij0, n_shift; \ \ /* Set various loop parameters. */ \ bli_set_dims_incs_uplo_1m \ ( \ diagoffx, diagx, \ uplox, m, n, rs_x, cs_x, \ &uplox_eff, &n_elem_max, &n_iter, &incx, &ldx, \ &ij0, &n_shift \ ); \ \ if ( bli_is_zeros( uplox_eff ) ) return; \ \ /* Query the kernel needed for this operation. */ \ PASTECH2(ch,kername,_ker_ft) f = bli_cntx_get_l1v_ker_dt( dt, kerid, cntx ); \ \ /* Handle dense and upper/lower storage cases separately. */ \ if ( bli_is_dense( uplox_eff ) ) \ { \ for ( j = 0; j < n_iter; ++j ) \ { \ n_elem = n_elem_max; \ \ x1 = x + (j )*ldx + (0 )*incx; \ \ /* Invoke the kernel with the appropriate parameters. */ \ f( \ conjalpha, \ n_elem, \ alpha, \ x1, incx, \ cntx \ ); \ } \ } \ else \ { \ if ( bli_is_upper( uplox_eff ) ) \ { \ for ( j = 0; j < n_iter; ++j ) \ { \ n_elem = bli_min( n_shift + j + 1, n_elem_max ); \ \ x1 = x + (ij0+j )*ldx + (0 )*incx; \ \ /* Invoke the kernel with the appropriate parameters. */ \ f( \ conjalpha, \ n_elem, \ alpha, \ x1, incx, \ cntx \ ); \ } \ } \ else if ( bli_is_lower( uplox_eff ) ) \ { \ for ( j = 0; j < n_iter; ++j ) \ { \ i = bli_max( 0, ( doff_t )j - ( doff_t )n_shift ); \ n_elem = n_elem_max - i; \ \ x1 = x + (j )*ldx + (ij0+i )*incx; \ \ /* Invoke the kernel with the appropriate parameters. */ \ f( \ conjalpha, \ n_elem, \ alpha, \ x1, incx, \ cntx \ ); \ } \ } \ } \ } INSERT_GENTFUNC_BASIC2( scalm_unb_var1, scalv, BLIS_SCALV_KER ) INSERT_GENTFUNC_BASIC2( setm_unb_var1, setv, BLIS_SETV_KER ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, kername, kerid ) \ \ void PASTEMAC(ch,opname) \ ( \ doff_t diagoffx, \ diag_t diagx, \ uplo_t uplox, \ trans_t transx, \ dim_t m, \ dim_t n, \ ctype* x, inc_t rs_x, inc_t cs_x, \ ctype* beta, \ ctype* y, inc_t rs_y, inc_t cs_y, \ cntx_t* cntx, \ rntm_t* rntm \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ ctype* x1; \ ctype* y1; \ uplo_t uplox_eff; \ conj_t conjx; \ dim_t n_iter; \ dim_t n_elem, n_elem_max; \ inc_t ldx, incx; \ inc_t ldy, incy; \ dim_t j, i; \ dim_t ij0, n_shift; \ \ /* Set various loop parameters. */ \ bli_set_dims_incs_uplo_2m \ ( \ diagoffx, diagx, transx, \ uplox, m, n, rs_x, cs_x, rs_y, cs_y, \ &uplox_eff, &n_elem_max, &n_iter, &incx, &ldx, &incy, &ldy, \ &ij0, &n_shift \ ); \ \ if ( bli_is_zeros( uplox_eff ) ) return; \ \ /* Extract the conjugation component from the transx parameter. */ \ conjx = bli_extract_conj( transx ); \ \ /* Query the kernel needed for this operation. */ \ PASTECH2(ch,kername,_ker_ft) f = bli_cntx_get_l1v_ker_dt( dt, kerid, cntx ); \ \ /* Handle dense and upper/lower storage cases separately. */ \ if ( bli_is_dense( uplox_eff ) ) \ { \ for ( j = 0; j < n_iter; ++j ) \ { \ n_elem = n_elem_max; \ \ x1 = x + (j )*ldx + (0 )*incx; \ y1 = y + (j )*ldy + (0 )*incy; \ \ /* Invoke the kernel with the appropriate parameters. */ \ f( \ conjx, \ n_elem, \ x1, incx, \ beta, \ y1, incy, \ cntx \ ); \ } \ } \ else \ { \ if ( bli_is_upper( uplox_eff ) ) \ { \ for ( j = 0; j < n_iter; ++j ) \ { \ n_elem = bli_min( n_shift + j + 1, n_elem_max ); \ \ x1 = x + (ij0+j )*ldx + (0 )*incx; \ y1 = y + (ij0+j )*ldy + (0 )*incy; \ \ /* Invoke the kernel with the appropriate parameters. */ \ f( \ conjx, \ n_elem, \ x1, incx, \ beta, \ y1, incy, \ cntx \ ); \ } \ } \ else if ( bli_is_lower( uplox_eff ) ) \ { \ for ( j = 0; j < n_iter; ++j ) \ { \ i = bli_max( 0, ( doff_t )j - ( doff_t )n_shift ); \ n_elem = n_elem_max - i; \ \ x1 = x + (j )*ldx + (ij0+i )*incx; \ y1 = y + (j )*ldy + (ij0+i )*incy; \ \ /* Invoke the kernel with the appropriate parameters. */ \ f( \ conjx, \ n_elem, \ x1, incx, \ beta, \ y1, incy, \ cntx \ ); \ } \ } \ } \ } INSERT_GENTFUNC_BASIC2( xpbym_unb_var1, xpbyv, BLIS_XPBYV_KER ) #undef GENTFUNC2 #define GENTFUNC2( ctype_x, ctype_y, chx, chy, opname ) \ \ void PASTEMAC2(chx,chy,opname) \ ( \ doff_t diagoffx, \ diag_t diagx, \ uplo_t uplox, \ trans_t transx, \ dim_t m, \ dim_t n, \ ctype_x* x, inc_t rs_x, inc_t cs_x, \ ctype_y* beta, \ ctype_y* y, inc_t rs_y, inc_t cs_y, \ cntx_t* cntx, \ rntm_t* rntm \ ) \ { \ ctype_x* restrict x1; \ ctype_y* restrict y1; \ uplo_t uplox_eff; \ dim_t n_iter; \ dim_t n_elem, n_elem_max; \ inc_t ldx, incx; \ inc_t ldy, incy; \ dim_t j, i; \ dim_t ij0, n_shift; \ \ /* Set various loop parameters. */ \ bli_set_dims_incs_uplo_2m \ ( \ diagoffx, diagx, transx, \ uplox, m, n, rs_x, cs_x, rs_y, cs_y, \ &uplox_eff, &n_elem_max, &n_iter, &incx, &ldx, &incy, &ldy, \ &ij0, &n_shift \ ); \ \ /* Extract the conjugation component from the transx parameter. */ \ /*conjx = bli_extract_conj( transx );*/ \ \ /* Handle dense and upper/lower storage cases separately. */ \ if ( PASTEMAC(chy,eq1)( *beta ) ) \ { \ if ( incx == 1 && incy == 1 ) \ { \ n_elem = n_elem_max; \ \ for ( j = 0; j < n_iter; ++j ) \ { \ x1 = x + (j )*ldx + (0 )*incx; \ y1 = y + (j )*ldy + (0 )*incy; \ \ ctype_x* restrict chi1 = x1; \ ctype_y* restrict psi1 = y1; \ \ for ( i = 0; i < n_elem; ++i ) \ { \ PASTEMAC2(chx,chy,adds)( chi1[i], psi1[i] ); \ } \ } \ } \ else \ { \ n_elem = n_elem_max; \ \ for ( j = 0; j < n_iter; ++j ) \ { \ x1 = x + (j )*ldx + (0 )*incx; \ y1 = y + (j )*ldy + (0 )*incy; \ \ ctype_x* restrict chi1 = x1; \ ctype_y* restrict psi1 = y1; \ \ for ( i = 0; i < n_elem; ++i ) \ { \ PASTEMAC2(chx,chy,adds)( *chi1, *psi1 ); \ \ chi1 += incx; \ psi1 += incy; \ } \ } \ } \ } \ else /* ( !PASTEMAC(chy,eq1)( *beta ) ) */ \ { \ if ( incx == 1 && incy == 1 ) \ { \ n_elem = n_elem_max; \ \ for ( j = 0; j < n_iter; ++j ) \ { \ x1 = x + (j )*ldx + (0 )*incx; \ y1 = y + (j )*ldy + (0 )*incy; \ \ ctype_x* restrict chi1 = x1; \ ctype_y* restrict psi1 = y1; \ \ for ( i = 0; i < n_elem; ++i ) \ { \ PASTEMAC3(chx,chy,chy,xpbys)( chi1[i], *beta, psi1[i] ); \ } \ } \ } \ else \ { \ n_elem = n_elem_max; \ \ for ( j = 0; j < n_iter; ++j ) \ { \ x1 = x + (j )*ldx + (0 )*incx; \ y1 = y + (j )*ldy + (0 )*incy; \ \ ctype_x* restrict chi1 = x1; \ ctype_y* restrict psi1 = y1; \ \ for ( i = 0; i < n_elem; ++i ) \ { \ PASTEMAC3(chx,chy,chy,xpbys)( *chi1, *beta, *psi1 ); \ \ chi1 += incx; \ psi1 += incy; \ } \ } \ } \ } \ } INSERT_GENTFUNC2_BASIC0( xpbym_md_unb_var1 ) INSERT_GENTFUNC2_MIXDP0( xpbym_md_unb_var1 ) blis-0.6.1/frame/1m/bli_l1m_unb_var1.h000066400000000000000000000100101360743507500173450ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-like interfaces with typed operands. // #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ void PASTEMAC2(ch,opname,_unb_var1) \ ( \ doff_t diagoffx, \ diag_t diagx, \ uplo_t uplox, \ trans_t transx, \ dim_t m, \ dim_t n, \ ctype* x, inc_t rs_x, inc_t cs_x, \ ctype* y, inc_t rs_y, inc_t cs_y, \ cntx_t* cntx, \ rntm_t* rntm \ ); INSERT_GENTPROT_BASIC0( addm ) INSERT_GENTPROT_BASIC0( copym ) INSERT_GENTPROT_BASIC0( subm ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ void PASTEMAC2(ch,opname,_unb_var1) \ ( \ doff_t diagoffx, \ diag_t diagx, \ uplo_t uplox, \ trans_t transx, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* x, inc_t rs_x, inc_t cs_x, \ ctype* y, inc_t rs_y, inc_t cs_y, \ cntx_t* cntx, \ rntm_t* rntm \ ); INSERT_GENTPROT_BASIC0( axpym ) INSERT_GENTPROT_BASIC0( scal2m ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ void PASTEMAC2(ch,opname,_unb_var1) \ ( \ conj_t conjalpha, \ doff_t diagoffx, \ diag_t diagx, \ uplo_t uplox, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* x, inc_t rs_x, inc_t cs_x, \ cntx_t* cntx, \ rntm_t* rntm \ ); INSERT_GENTPROT_BASIC0( scalm ) INSERT_GENTPROT_BASIC0( setm ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ void PASTEMAC2(ch,opname,_unb_var1) \ ( \ doff_t diagoffx, \ diag_t diagx, \ uplo_t uplox, \ trans_t transx, \ dim_t m, \ dim_t n, \ ctype* x, inc_t rs_x, inc_t cs_x, \ ctype* beta, \ ctype* y, inc_t rs_y, inc_t cs_y, \ cntx_t* cntx, \ rntm_t* rntm \ ); INSERT_GENTPROT_BASIC0( xpbym ) #undef GENTPROT2 #define GENTPROT2( ctype_x, ctype_y, chx, chy, opname ) \ \ void PASTEMAC3(chx,chy,opname,_unb_var1) \ ( \ doff_t diagoffx, \ diag_t diagx, \ uplo_t uplox, \ trans_t transx, \ dim_t m, \ dim_t n, \ ctype_x* x, inc_t rs_x, inc_t cs_x, \ ctype_y* beta, \ ctype_y* y, inc_t rs_y, inc_t cs_y, \ cntx_t* cntx, \ rntm_t* rntm \ ); INSERT_GENTPROT2_BASIC0( xpbym_md ) INSERT_GENTPROT2_MIXDP0( xpbym_md ) blis-0.6.1/frame/1m/other/000077500000000000000000000000001360743507500152115ustar00rootroot00000000000000blis-0.6.1/frame/1m/other/bli_scalm.h000066400000000000000000000032461360743507500173140ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "bli_scalm_cntl.h" blis-0.6.1/frame/1m/other/bli_scalm_cntl.c000066400000000000000000000042051360743507500203230ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" cntl_t* bli_scalm_cntl_create_node ( void_fp var_func, cntl_t* sub_node ) { cntl_t* cntl; // It's important that we set the bszid field to BLIS_NO_PART to indicate // that no blocksize partitioning is performed. bli_cntl_free() will rely // on this information to know how to step through the thrinfo_t tree in // sync with the cntl_t tree. cntl = bli_cntl_create_node ( BLIS_NOID, BLIS_NO_PART, var_func, NULL, sub_node ); return cntl; } blis-0.6.1/frame/1m/other/bli_scalm_cntl.h000066400000000000000000000033551360743507500203350ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ cntl_t* bli_scalm_cntl_create_node ( void_fp var_func, cntl_t* sub_node ); blis-0.6.1/frame/1m/other/bli_scalm_int.c000066400000000000000000000064341360743507500201630ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T scalm_fp typedef void (*FUNCPTR_T)( obj_t* alpha, obj_t* x, cntx_t* cntx ); static FUNCPTR_T vars[1][3] = { // unblocked optimized unblocked blocked { bli_scalm_ex, bli_scalm_ex, NULL } }; void bli_scalm_int( obj_t* alpha, obj_t* x, cntx_t* cntx, scalm_t* cntl ) { //obj_t x_local; varnum_t n; impl_t i; FUNCPTR_T f; // Return early if one of the matrix operands has a zero dimension. if ( bli_obj_has_zero_dim( x ) ) return; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_scalm_check( alpha, x ); // First check if we are to skip this operation. if ( bli_cntl_is_noop( cntl ) ) return; // Return early if both alpha and the scalar attached to x are unit. if ( bli_obj_equals( alpha, &BLIS_ONE ) && bli_obj_scalar_equals( x, &BLIS_ONE ) ) return; // // This code has been disabled since we've now added the alpha // parameter back to the object interface to the underlying // scalm variant. // // Alias x to x_local so we can apply alpha if it is non-unit. //bli_obj_alias_to( *x, x_local ); // If alpha is non-unit, apply it to the scalar attached to x. //if ( !bli_obj_equals( alpha, &BLIS_ONE ) ) //{ // bli_obj_scalar_apply_scalar( alpha, &x_local ); //} // Extract the variant number and implementation type. n = bli_cntl_var_num( cntl ); i = bli_cntl_impl_type( cntl ); // Index into the variant array to extract the correct function pointer. f = vars[n][i]; // Invoke the variant. f( alpha, x, cntx ); } blis-0.6.1/frame/1m/other/bli_scalm_int.h000066400000000000000000000034261360743507500201660ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_scalm_int( obj_t* alpha, obj_t* x, cntx_t* cntx, scalm_t* cntl ); blis-0.6.1/frame/1m/packm/000077500000000000000000000000001360743507500151635ustar00rootroot00000000000000blis-0.6.1/frame/1m/packm/bli_packm.h000066400000000000000000000044261360743507500172630ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "bli_packm_cntl.h" #include "bli_packm_check.h" #include "bli_packm_init.h" #include "bli_packm_int.h" #include "bli_packm_part.h" #include "bli_packm_var.h" #include "bli_packm_struc_cxk.h" #include "bli_packm_struc_cxk_4mi.h" #include "bli_packm_struc_cxk_3mis.h" #include "bli_packm_struc_cxk_rih.h" #include "bli_packm_struc_cxk_1er.h" #include "bli_packm_cxk.h" #include "bli_packm_cxk_4mi.h" #include "bli_packm_cxk_3mis.h" #include "bli_packm_cxk_rih.h" #include "bli_packm_cxk_1er.h" // Mixed datatype support. #ifdef BLIS_ENABLE_GEMM_MD #include "bli_packm_md.h" #endif blis-0.6.1/frame/1m/packm/bli_packm_blk_var1.c000066400000000000000000000653571360743507500210510ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T packm_fp typedef void (*FUNCPTR_T) ( struc_t strucc, doff_t diagoffc, diag_t diagc, uplo_t uploc, trans_t transc, pack_t schema, bool_t invdiag, bool_t revifup, bool_t reviflo, dim_t m, dim_t n, dim_t m_max, dim_t n_max, void* kappa, void* c, inc_t rs_c, inc_t cs_c, void* p, inc_t rs_p, inc_t cs_p, inc_t is_p, dim_t pd_p, inc_t ps_p, void_fp packm_ker, cntx_t* cntx, thrinfo_t* thread ); static FUNCPTR_T GENARRAY(ftypes,packm_blk_var1); static func_t packm_struc_cxk_kers[BLIS_NUM_PACK_SCHEMA_TYPES] = { /* float (0) scomplex (1) double (2) dcomplex (3) */ // 0000 row/col panels { { bli_spackm_struc_cxk, bli_cpackm_struc_cxk, bli_dpackm_struc_cxk, bli_zpackm_struc_cxk, } }, // 0001 row/col panels: 4m interleaved { { NULL, bli_cpackm_struc_cxk_4mi, NULL, bli_zpackm_struc_cxk_4mi, } }, // 0010 row/col panels: 3m interleaved { { NULL, bli_cpackm_struc_cxk_3mis, NULL, bli_zpackm_struc_cxk_3mis, } }, // 0011 row/col panels: 4m separated (NOT IMPLEMENTED) { { NULL, NULL, NULL, NULL, } }, // 0100 row/col panels: 3m separated { { NULL, bli_cpackm_struc_cxk_3mis, NULL, bli_zpackm_struc_cxk_3mis, } }, // 0101 row/col panels: real only { { NULL, bli_cpackm_struc_cxk_rih, NULL, bli_zpackm_struc_cxk_rih, } }, // 0110 row/col panels: imaginary only { { NULL, bli_cpackm_struc_cxk_rih, NULL, bli_zpackm_struc_cxk_rih, } }, // 0111 row/col panels: real+imaginary only { { NULL, bli_cpackm_struc_cxk_rih, NULL, bli_zpackm_struc_cxk_rih, } }, // 1000 row/col panels: 1m-expanded (1e) { { NULL, bli_cpackm_struc_cxk_1er, NULL, bli_zpackm_struc_cxk_1er, } }, // 1001 row/col panels: 1m-reordered (1r) { { NULL, bli_cpackm_struc_cxk_1er, NULL, bli_zpackm_struc_cxk_1er, } }, }; void bli_packm_blk_var1 ( obj_t* c, obj_t* p, cntx_t* cntx, cntl_t* cntl, thrinfo_t* t ) { #ifdef BLIS_ENABLE_GEMM_MD // Call a different packm implementation when the storage and target // datatypes differ. if ( bli_obj_dt( c ) != bli_obj_target_dt( c ) ) { bli_packm_blk_var1_md( c, p, cntx, cntl, t ); return; } #endif num_t dt_p = bli_obj_dt( p ); struc_t strucc = bli_obj_struc( c ); doff_t diagoffc = bli_obj_diag_offset( c ); diag_t diagc = bli_obj_diag( c ); uplo_t uploc = bli_obj_uplo( c ); trans_t transc = bli_obj_conjtrans_status( c ); pack_t schema = bli_obj_pack_schema( p ); bool_t invdiag = bli_obj_has_inverted_diag( p ); bool_t revifup = bli_obj_is_pack_rev_if_upper( p ); bool_t reviflo = bli_obj_is_pack_rev_if_lower( p ); dim_t m_p = bli_obj_length( p ); dim_t n_p = bli_obj_width( p ); dim_t m_max_p = bli_obj_padded_length( p ); dim_t n_max_p = bli_obj_padded_width( p ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); void* buf_p = bli_obj_buffer_at_off( p ); inc_t rs_p = bli_obj_row_stride( p ); inc_t cs_p = bli_obj_col_stride( p ); inc_t is_p = bli_obj_imag_stride( p ); dim_t pd_p = bli_obj_panel_dim( p ); inc_t ps_p = bli_obj_panel_stride( p ); obj_t kappa; void* buf_kappa; func_t* packm_kers; void_fp packm_ker; FUNCPTR_T f; // Treatment of kappa (ie: packing during scaling) depends on // whether we are executing an induced method. if ( bli_is_nat_packed( schema ) ) { // This branch is for native execution, where we assume that // the micro-kernel will always apply the alpha scalar of the // higher-level operation. Thus, we use BLIS_ONE for kappa so // that the underlying packm implementation does not perform // any scaling during packing. buf_kappa = bli_obj_buffer_for_const( dt_p, &BLIS_ONE ); } else // if ( bli_is_ind_packed( schema ) ) { obj_t* kappa_p; // The value for kappa we use will depend on whether the scalar // attached to A has a nonzero imaginary component. If it does, // then we will apply the scalar during packing to facilitate // implementing induced complex domain algorithms in terms of // real domain micro-kernels. (In the aforementioned situation, // applying a real scalar is easy, but applying a complex one is // harder, so we avoid the need altogether with the code below.) if ( bli_obj_scalar_has_nonzero_imag( p ) ) { //printf( "applying non-zero imag kappa\n" ); // Detach the scalar. bli_obj_scalar_detach( p, &kappa ); // Reset the attached scalar (to 1.0). bli_obj_scalar_reset( p ); kappa_p = κ } else { // If the internal scalar of A has only a real component, then // we will apply it later (in the micro-kernel), and so we will // use BLIS_ONE to indicate no scaling during packing. kappa_p = &BLIS_ONE; } // Acquire the buffer to the kappa chosen above. buf_kappa = bli_obj_buffer_for_1x1( dt_p, kappa_p ); } #if 0 if ( bli_is_4mi_packed( schema ) ) packm_kers = packm_struc_cxk_4mi_kers; else if ( bli_is_3mi_packed( schema ) || bli_is_3ms_packed( schema ) ) packm_kers = packm_struc_cxk_3mis_kers; else if ( bli_is_ro_packed( schema ) || bli_is_io_packed( schema ) || bli_is_rpi_packed( schema ) ) packm_kers = packm_struc_cxk_rih_kers; else packm_kers = packm_struc_cxk_kers; #else // The original idea here was to read the packm_ukr from the context // if it is non-NULL. The problem is, it requires that we be able to // assume that the packm_ukr field is initialized to NULL, which it // currently is not. //func_t* cntx_packm_kers = bli_cntx_get_packm_ukr( cntx ); //if ( bli_func_is_null_dt( dt_c, cntx_packm_kers ) ) { // If the packm structure-aware kernel func_t in the context is // NULL (which is the default value after the context is created), // we use the default lookup table to determine the right func_t // for the current schema. const dim_t i = bli_pack_schema_index( schema ); packm_kers = &packm_struc_cxk_kers[ i ]; } #if 0 else // cntx's packm func_t overrides { // If the packm structure-aware kernel func_t in the context is // non-NULL (ie: assumed to be valid), we use that instead. //packm_kers = bli_cntx_packm_ukrs( cntx ); packm_kers = cntx_packm_kers; } #endif #endif // Query the datatype-specific function pointer from the func_t object. packm_ker = bli_func_get_dt( dt_p, packm_kers ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_p]; // Invoke the function. f( strucc, diagoffc, diagc, uploc, transc, schema, invdiag, revifup, reviflo, m_p, n_p, m_max_p, n_max_p, buf_kappa, buf_c, rs_c, cs_c, buf_p, rs_p, cs_p, is_p, pd_p, ps_p, packm_ker, cntx, t ); } #undef GENTFUNCR #define GENTFUNCR( ctype, ctype_r, ch, chr, opname, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ struc_t strucc, \ doff_t diagoffc, \ diag_t diagc, \ uplo_t uploc, \ trans_t transc, \ pack_t schema, \ bool_t invdiag, \ bool_t revifup, \ bool_t reviflo, \ dim_t m, \ dim_t n, \ dim_t m_max, \ dim_t n_max, \ void* kappa, \ void* c, inc_t rs_c, inc_t cs_c, \ void* p, inc_t rs_p, inc_t cs_p, \ inc_t is_p, \ dim_t pd_p, inc_t ps_p, \ void_fp packm_ker, \ cntx_t* cntx, \ thrinfo_t* thread \ ) \ { \ PASTECH2(ch,opname,_ker_ft) packm_ker_cast = packm_ker; \ \ ctype* restrict kappa_cast = kappa; \ ctype* restrict c_cast = c; \ ctype* restrict p_cast = p; \ ctype* restrict c_begin; \ ctype* restrict p_begin; \ \ dim_t iter_dim; \ dim_t n_iter; \ dim_t it, ic, ip; \ dim_t ic0, ip0; \ doff_t ic_inc, ip_inc; \ doff_t diagoffc_i; \ doff_t diagoffc_inc; \ dim_t panel_len_full; \ dim_t panel_len_i; \ dim_t panel_len_max; \ dim_t panel_len_max_i; \ dim_t panel_dim_i; \ dim_t panel_dim_max; \ dim_t panel_off_i; \ inc_t vs_c; \ inc_t ldc; \ inc_t ldp, p_inc; \ dim_t* m_panel_full; \ dim_t* n_panel_full; \ dim_t* m_panel_use; \ dim_t* n_panel_use; \ dim_t* m_panel_max; \ dim_t* n_panel_max; \ conj_t conjc; \ bool_t row_stored; \ bool_t col_stored; \ inc_t is_p_use; \ dim_t ss_num; \ dim_t ss_den; \ \ ctype* restrict c_use; \ ctype* restrict p_use; \ doff_t diagoffp_i; \ \ \ /* If C is zeros and part of a triangular matrix, then we don't need to pack it. */ \ if ( bli_is_zeros( uploc ) && \ bli_is_triangular( strucc ) ) return; \ \ /* Extract the conjugation bit from the transposition argument. */ \ conjc = bli_extract_conj( transc ); \ \ /* If c needs a transposition, induce it so that we can more simply express the remaining parameters and code. */ \ if ( bli_does_trans( transc ) ) \ { \ bli_swap_incs( &rs_c, &cs_c ); \ bli_negate_diag_offset( &diagoffc ); \ bli_toggle_uplo( &uploc ); \ bli_toggle_trans( &transc ); \ } \ \ /* Create flags to incidate row or column storage. Note that the schema bit that encodes row or column is describing the form of micro-panel, not the storage in the micro-panel. Hence the mismatch in "row" and "column" semantics. */ \ row_stored = bli_is_col_packed( schema ); \ col_stored = bli_is_row_packed( schema ); \ \ /* If the row storage flag indicates row storage, then we are packing to column panels; otherwise, if the strides indicate column storage, we are packing to row panels. */ \ if ( row_stored ) \ { \ /* Prepare to pack to row-stored column panels. */ \ iter_dim = n; \ panel_len_full = m; \ panel_len_max = m_max; \ panel_dim_max = pd_p; \ ldc = rs_c; \ vs_c = cs_c; \ diagoffc_inc = -( doff_t )panel_dim_max; \ ldp = rs_p; \ m_panel_full = &m; \ n_panel_full = &panel_dim_i; \ m_panel_use = &panel_len_i; \ n_panel_use = &panel_dim_i; \ m_panel_max = &panel_len_max_i; \ n_panel_max = &panel_dim_max; \ } \ else /* if ( col_stored ) */ \ { \ /* Prepare to pack to column-stored row panels. */ \ iter_dim = m; \ panel_len_full = n; \ panel_len_max = n_max; \ panel_dim_max = pd_p; \ ldc = cs_c; \ vs_c = rs_c; \ diagoffc_inc = ( doff_t )panel_dim_max; \ ldp = cs_p; \ m_panel_full = &panel_dim_i; \ n_panel_full = &n; \ m_panel_use = &panel_dim_i; \ n_panel_use = &panel_len_i; \ m_panel_max = &panel_dim_max; \ n_panel_max = &panel_len_max_i; \ } \ \ /* Compute the storage stride scaling. Usually this is just 1. However, in the case of interleaved 3m, we need to scale by 3/2, and in the cases of real-only, imag-only, or summed-only, we need to scale by 1/2. In both cases, we are compensating for the fact that pointer arithmetic occurs in terms of complex elements rather than real elements. */ \ if ( bli_is_3mi_packed( schema ) ) { ss_num = 3; ss_den = 2; } \ else if ( bli_is_3ms_packed( schema ) ) { ss_num = 1; ss_den = 2; } \ else if ( bli_is_rih_packed( schema ) ) { ss_num = 1; ss_den = 2; } \ else { ss_num = 1; ss_den = 1; } \ \ /* Compute the total number of iterations we'll need. */ \ n_iter = iter_dim / panel_dim_max + ( iter_dim % panel_dim_max ? 1 : 0 ); \ \ /* Set the initial values and increments for indices related to C and P based on whether reverse iteration was requested. */ \ if ( ( revifup && bli_is_upper( uploc ) && bli_is_triangular( strucc ) ) || \ ( reviflo && bli_is_lower( uploc ) && bli_is_triangular( strucc ) ) ) \ { \ ic0 = (n_iter - 1) * panel_dim_max; \ ic_inc = -panel_dim_max; \ ip0 = n_iter - 1; \ ip_inc = -1; \ } \ else \ { \ ic0 = 0; \ ic_inc = panel_dim_max; \ ip0 = 0; \ ip_inc = 1; \ } \ \ p_begin = p_cast; \ \ /* Query the number of threads and thread ids from the current thread's packm thrinfo_t node. */ \ const dim_t nt = bli_thread_n_way( thread ); \ const dim_t tid = bli_thread_work_id( thread ); \ \ dim_t it_start, it_end, it_inc; \ \ /* Determine the thread range and increment using the current thread's packm thrinfo_t node. NOTE: The definition of bli_thread_range_jrir() will depend on whether slab or round-robin partitioning was requested at configure-time. */ \ bli_thread_range_jrir( thread, n_iter, 1, FALSE, &it_start, &it_end, &it_inc ); \ \ /* Iterate over every logical micropanel in the source matrix. */ \ for ( ic = ic0, ip = ip0, it = 0; it < n_iter; \ ic += ic_inc, ip += ip_inc, it += 1 ) \ { \ panel_dim_i = bli_min( panel_dim_max, iter_dim - ic ); \ \ diagoffc_i = diagoffc + (ip )*diagoffc_inc; \ c_begin = c_cast + (ic )*vs_c; \ \ if ( bli_is_triangular( strucc ) && \ bli_is_unstored_subpart_n( diagoffc_i, uploc, *m_panel_full, *n_panel_full ) ) \ { \ /* This case executes if the panel belongs to a triangular matrix AND is completely unstored (ie: zero). If the panel is unstored, we do nothing. (Notice that we don't even increment p_begin.) */ \ \ continue; \ } \ else if ( bli_is_triangular( strucc ) && \ bli_intersects_diag_n( diagoffc_i, *m_panel_full, *n_panel_full ) ) \ { \ /* This case executes if the panel belongs to a triangular matrix AND is diagonal-intersecting. Notice that we cannot bury the following conditional logic into packm_struc_cxk() because we need to know the value of panel_len_max_i so we can properly increment p_inc. */ \ \ /* Sanity check. Diagonals should not intersect the short end of a micro-panel. If they do, then somehow the constraints on cache blocksizes being a whole multiple of the register blocksizes was somehow violated. */ \ if ( ( col_stored && diagoffc_i < 0 ) || \ ( row_stored && diagoffc_i > 0 ) ) \ bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); \ \ if ( ( row_stored && bli_is_upper( uploc ) ) || \ ( col_stored && bli_is_lower( uploc ) ) ) \ { \ panel_off_i = 0; \ panel_len_i = bli_abs( diagoffc_i ) + panel_dim_i; \ panel_len_max_i = bli_min( bli_abs( diagoffc_i ) + panel_dim_max, \ panel_len_max ); \ diagoffp_i = diagoffc_i; \ } \ else /* if ( ( row_stored && bli_is_lower( uploc ) ) || \ ( col_stored && bli_is_upper( uploc ) ) ) */ \ { \ panel_off_i = bli_abs( diagoffc_i ); \ panel_len_i = panel_len_full - panel_off_i; \ panel_len_max_i = panel_len_max - panel_off_i; \ diagoffp_i = 0; \ } \ \ c_use = c_begin + (panel_off_i )*ldc; \ p_use = p_begin; \ \ /* We need to re-compute the imaginary stride as a function of panel_len_max_i since triangular packed matrices have panels of varying lengths. NOTE: This imaginary stride value is only referenced by the packm kernels for induced methods. */ \ is_p_use = ldp * panel_len_max_i; \ \ /* We nudge the imaginary stride up by one if it is odd. */ \ is_p_use += ( bli_is_odd( is_p_use ) ? 1 : 0 ); \ \ /* NOTE: We MUST use round-robin partitioning when packing micropanels of a triangular matrix. Hermitian/symmetric and general packing may use slab or round-robin, depending on which was selected at configure-time. */ \ if ( bli_packm_my_iter_rr( it, it_start, it_end, tid, nt ) ) \ { \ packm_ker_cast( strucc, \ diagoffp_i, \ diagc, \ uploc, \ conjc, \ schema, \ invdiag, \ *m_panel_use, \ *n_panel_use, \ *m_panel_max, \ *n_panel_max, \ kappa_cast, \ c_use, rs_c, cs_c, \ p_use, rs_p, cs_p, \ is_p_use, \ cntx ); \ } \ \ /* NOTE: This value is usually LESS than ps_p because triangular matrices usually have several micro-panels that are shorter than a "full" micro-panel. */ \ p_inc = ( is_p_use * ss_num ) / ss_den; \ } \ else if ( bli_is_herm_or_symm( strucc ) ) \ { \ /* This case executes if the panel belongs to a Hermitian or symmetric matrix, which includes stored, unstored, and diagonal-intersecting panels. */ \ \ c_use = c_begin; \ p_use = p_begin; \ \ panel_len_i = panel_len_full; \ panel_len_max_i = panel_len_max; \ \ is_p_use = is_p; \ \ /* The definition of bli_packm_my_iter() will depend on whether slab or round-robin partitioning was requested at configure-time. */ \ if ( bli_packm_my_iter( it, it_start, it_end, tid, nt ) ) \ { \ packm_ker_cast( strucc, \ diagoffc_i, \ diagc, \ uploc, \ conjc, \ schema, \ invdiag, \ *m_panel_use, \ *n_panel_use, \ *m_panel_max, \ *n_panel_max, \ kappa_cast, \ c_use, rs_c, cs_c, \ p_use, rs_p, cs_p, \ is_p_use, \ cntx ); \ } \ \ p_inc = ps_p; \ } \ else \ { \ /* This case executes if the panel is general, or, if the panel is part of a triangular matrix and is neither unstored (ie: zero) nor diagonal-intersecting. */ \ \ c_use = c_begin; \ p_use = p_begin; \ \ panel_len_i = panel_len_full; \ panel_len_max_i = panel_len_max; \ \ is_p_use = is_p; \ \ /* The definition of bli_packm_my_iter() will depend on whether slab or round-robin partitioning was requested at configure-time. */ \ if ( bli_packm_my_iter( it, it_start, it_end, tid, nt ) ) \ { \ packm_ker_cast( BLIS_GENERAL, \ 0, \ diagc, \ BLIS_DENSE, \ conjc, \ schema, \ invdiag, \ *m_panel_use, \ *n_panel_use, \ *m_panel_max, \ *n_panel_max, \ kappa_cast, \ c_use, rs_c, cs_c, \ p_use, rs_p, cs_p, \ is_p_use, \ cntx ); \ } \ \ /* NOTE: This value is equivalent to ps_p. */ \ p_inc = ps_p; \ } \ \ p_begin += p_inc; \ \ } \ } INSERT_GENTFUNCR_BASIC( packm, packm_blk_var1 ) /* if ( row_stored ) \ PASTEMAC(ch,fprintm)( stdout, "packm_var2: b", m, n, \ c_cast, rs_c, cs_c, "%4.1f", "" ); \ if ( col_stored ) \ PASTEMAC(ch,fprintm)( stdout, "packm_var2: a", m, n, \ c_cast, rs_c, cs_c, "%4.1f", "" ); \ */ /* if ( row_stored ) \ PASTEMAC(ch,fprintm)( stdout, "packm_blk_var1: b packed", *m_panel_max, *n_panel_max, \ p_use, rs_p, cs_p, "%5.2f", "" ); \ else \ PASTEMAC(ch,fprintm)( stdout, "packm_blk_var1: a packed", *m_panel_max, *n_panel_max, \ p_use, rs_p, cs_p, "%5.2f", "" ); \ */ \ \ /* if ( col_stored ) { \ if ( bli_thread_work_id( thread ) == 0 ) \ { \ printf( "packm_blk_var1: thread %lu (a = %p, ap = %p)\n", bli_thread_work_id( thread ), c_use, p_use ); \ fflush( stdout ); \ PASTEMAC(ch,fprintm)( stdout, "packm_blk_var1: a", *m_panel_use, *n_panel_use, \ ( ctype* )c_use, rs_c, cs_c, "%4.1f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "packm_blk_var1: ap", *m_panel_max, *n_panel_max, \ ( ctype* )p_use, rs_p, cs_p, "%4.1f", "" ); \ fflush( stdout ); \ } \ bli_thread_obarrier( thread ); \ if ( bli_thread_work_id( thread ) == 1 ) \ { \ printf( "packm_blk_var1: thread %lu (a = %p, ap = %p)\n", bli_thread_work_id( thread ), c_use, p_use ); \ fflush( stdout ); \ PASTEMAC(ch,fprintm)( stdout, "packm_blk_var1: a", *m_panel_use, *n_panel_use, \ ( ctype* )c_use, rs_c, cs_c, "%4.1f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "packm_blk_var1: ap", *m_panel_max, *n_panel_max, \ ( ctype* )p_use, rs_p, cs_p, "%4.1f", "" ); \ fflush( stdout ); \ } \ bli_thread_obarrier( thread ); \ } \ else { \ if ( bli_thread_work_id( thread ) == 0 ) \ { \ printf( "packm_blk_var1: thread %lu (b = %p, bp = %p)\n", bli_thread_work_id( thread ), c_use, p_use ); \ fflush( stdout ); \ PASTEMAC(ch,fprintm)( stdout, "packm_blk_var1: b", *m_panel_use, *n_panel_use, \ ( ctype* )c_use, rs_c, cs_c, "%4.1f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "packm_blk_var1: bp", *m_panel_max, *n_panel_max, \ ( ctype* )p_use, rs_p, cs_p, "%4.1f", "" ); \ fflush( stdout ); \ } \ bli_thread_obarrier( thread ); \ if ( bli_thread_work_id( thread ) == 1 ) \ { \ printf( "packm_blk_var1: thread %lu (b = %p, bp = %p)\n", bli_thread_work_id( thread ), c_use, p_use ); \ fflush( stdout ); \ PASTEMAC(ch,fprintm)( stdout, "packm_blk_var1: b", *m_panel_use, *n_panel_use, \ ( ctype* )c_use, rs_c, cs_c, "%4.1f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "packm_blk_var1: bp", *m_panel_max, *n_panel_max, \ ( ctype* )p_use, rs_p, cs_p, "%4.1f", "" ); \ fflush( stdout ); \ } \ bli_thread_obarrier( thread ); \ } \ */ /* if ( bli_is_4mi_packed( schema ) ) { \ printf( "packm_var2: is_p_use = %lu\n", is_p_use ); \ if ( col_stored ) { \ if ( 0 ) \ PASTEMAC(chr,fprintm)( stdout, "packm_var2: a_r", *m_panel_use, *n_panel_use, \ ( ctype_r* )c_use, 2*rs_c, 2*cs_c, "%4.1f", "" ); \ PASTEMAC(chr,fprintm)( stdout, "packm_var2: ap_r", *m_panel_max, *n_panel_max, \ ( ctype_r* )p_use, rs_p, cs_p, "%4.1f", "" ); \ PASTEMAC(chr,fprintm)( stdout, "packm_var2: ap_i", *m_panel_max, *n_panel_max, \ ( ctype_r* )p_use + is_p_use, rs_p, cs_p, "%4.1f", "" ); \ } \ if ( row_stored ) { \ if ( 0 ) \ PASTEMAC(chr,fprintm)( stdout, "packm_var2: b_r", *m_panel_use, *n_panel_use, \ ( ctype_r* )c_use, 2*rs_c, 2*cs_c, "%4.1f", "" ); \ PASTEMAC(chr,fprintm)( stdout, "packm_var2: bp_r", *m_panel_max, *n_panel_max, \ ( ctype_r* )p_use, rs_p, cs_p, "%4.1f", "" ); \ PASTEMAC(chr,fprintm)( stdout, "packm_var2: bp_i", *m_panel_max, *n_panel_max, \ ( ctype_r* )p_use + is_p_use, rs_p, cs_p, "%4.1f", "" ); \ } \ } \ */ /* PASTEMAC(chr,fprintm)( stdout, "packm_var2: bp_rpi", *m_panel_max, *n_panel_max, \ ( ctype_r* )p_use, rs_p, cs_p, "%4.1f", "" ); \ */ /* if ( row_stored ) { \ PASTEMAC(chr,fprintm)( stdout, "packm_var2: b_r", *m_panel_max, *n_panel_max, \ ( ctype_r* )c_use, 2*rs_c, 2*cs_c, "%4.1f", "" ); \ PASTEMAC(chr,fprintm)( stdout, "packm_var2: b_i", *m_panel_max, *n_panel_max, \ (( ctype_r* )c_use)+rs_c, 2*rs_c, 2*cs_c, "%4.1f", "" ); \ PASTEMAC(chr,fprintm)( stdout, "packm_var2: bp_r", *m_panel_max, *n_panel_max, \ ( ctype_r* )p_use, rs_p, cs_p, "%4.1f", "" ); \ inc_t is_b = rs_p * *m_panel_max; \ PASTEMAC(chr,fprintm)( stdout, "packm_var2: bp_i", *m_panel_max, *n_panel_max, \ ( ctype_r* )p_use + is_b, rs_p, cs_p, "%4.1f", "" ); \ } \ */ /* if ( col_stored ) { \ PASTEMAC(chr,fprintm)( stdout, "packm_var2: a_r", *m_panel_max, *n_panel_max, \ ( ctype_r* )c_use, 2*rs_c, 2*cs_c, "%4.1f", "" ); \ PASTEMAC(chr,fprintm)( stdout, "packm_var2: a_i", *m_panel_max, *n_panel_max, \ (( ctype_r* )c_use)+rs_c, 2*rs_c, 2*cs_c, "%4.1f", "" ); \ PASTEMAC(chr,fprintm)( stdout, "packm_var2: ap_r", *m_panel_max, *n_panel_max, \ ( ctype_r* )p_use, rs_p, cs_p, "%4.1f", "" ); \ PASTEMAC(chr,fprintm)( stdout, "packm_var2: ap_i", *m_panel_max, *n_panel_max, \ ( ctype_r* )p_use + p_inc, rs_p, cs_p, "%4.1f", "" ); \ } \ */ blis-0.6.1/frame/1m/packm/bli_packm_blk_var1_md.c000066400000000000000000000246471360743507500215260ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #ifdef BLIS_ENABLE_GEMM_MD #define FUNCPTR_T packm_fp typedef void (*FUNCPTR_T)( trans_t transc, pack_t schema, dim_t m, dim_t n, dim_t m_max, dim_t n_max, void* kappa, void* c, inc_t rs_c, inc_t cs_c, void* p, inc_t rs_p, inc_t cs_p, inc_t is_p, dim_t pd_p, inc_t ps_p, cntx_t* cntx, thrinfo_t* thread ); static FUNCPTR_T GENARRAY2_ALL(ftypes,packm_blk_var1_md); void bli_packm_blk_var1_md ( obj_t* c, obj_t* p, cntx_t* cntx, cntl_t* cntl, thrinfo_t* t ) { num_t dt_c = bli_obj_dt( c ); num_t dt_p = bli_obj_dt( p ); trans_t transc = bli_obj_conjtrans_status( c ); pack_t schema = bli_obj_pack_schema( p ); dim_t m_p = bli_obj_length( p ); dim_t n_p = bli_obj_width( p ); dim_t m_max_p = bli_obj_padded_length( p ); dim_t n_max_p = bli_obj_padded_width( p ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); void* buf_p = bli_obj_buffer_at_off( p ); inc_t rs_p = bli_obj_row_stride( p ); inc_t cs_p = bli_obj_col_stride( p ); inc_t is_p = bli_obj_imag_stride( p ); dim_t pd_p = bli_obj_panel_dim( p ); inc_t ps_p = bli_obj_panel_stride( p ); obj_t kappa; void* buf_kappa; FUNCPTR_T f; // Treatment of kappa (ie: packing during scaling) depends on // whether we are executing an induced method. if ( bli_is_nat_packed( schema ) ) { // This branch is for native execution, where we assume that // the micro-kernel will always apply the alpha scalar of the // higher-level operation. Thus, we use BLIS_ONE for kappa so // that the underlying packm implementation does not perform // any scaling during packing. buf_kappa = bli_obj_buffer_for_const( dt_p, &BLIS_ONE ); } else // if ( bli_is_ind_packed( schema ) ) { obj_t* kappa_p; // The value for kappa we use will depend on whether the scalar // attached to A has a nonzero imaginary component. If it does, // then we will apply the scalar during packing to facilitate // implementing induced complex domain algorithms in terms of // real domain micro-kernels. (In the aforementioned situation, // applying a real scalar is easy, but applying a complex one is // harder, so we avoid the need altogether with the code below.) if ( bli_obj_scalar_has_nonzero_imag( p ) ) { // Detach the scalar. bli_obj_scalar_detach( p, &kappa ); // Reset the attached scalar (to 1.0). bli_obj_scalar_reset( p ); kappa_p = κ } else { // If the internal scalar of A has only a real component, then // we will apply it later (in the micro-kernel), and so we will // use BLIS_ONE to indicate no scaling during packing. kappa_p = &BLIS_ONE; } // Acquire the buffer to the kappa chosen above. buf_kappa = bli_obj_buffer_for_1x1( dt_p, kappa_p ); } // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_c][dt_p]; // Invoke the function. f( transc, schema, m_p, n_p, m_max_p, n_max_p, buf_kappa, buf_c, rs_c, cs_c, buf_p, rs_p, cs_p, is_p, pd_p, ps_p, cntx, t ); } #undef GENTFUNC2 #define GENTFUNC2( ctype_c, ctype_p, chc, chp, varname ) \ \ void PASTEMAC2(chc,chp,varname) \ ( \ trans_t transc, \ pack_t schema, \ dim_t m, \ dim_t n, \ dim_t m_max, \ dim_t n_max, \ void* kappa, \ void* c, inc_t rs_c, inc_t cs_c, \ void* p, inc_t rs_p, inc_t cs_p, \ inc_t is_p, \ dim_t pd_p, inc_t ps_p, \ cntx_t* cntx, \ thrinfo_t* thread \ ) \ { \ ctype_p* restrict kappa_cast = kappa; \ ctype_c* restrict c_cast = c; \ ctype_p* restrict p_cast = p; \ ctype_c* restrict c_begin; \ ctype_p* restrict p_begin; \ \ dim_t iter_dim; \ dim_t n_iter; \ dim_t it, ic, ip; \ doff_t ic_inc, ip_inc; \ dim_t panel_len_full; \ dim_t panel_len_i; \ dim_t panel_len_max; \ dim_t panel_len_max_i; \ dim_t panel_dim_i; \ dim_t panel_dim_max; \ inc_t vs_c; \ inc_t p_inc; \ dim_t* m_panel_use; \ dim_t* n_panel_use; \ dim_t* m_panel_max; \ dim_t* n_panel_max; \ conj_t conjc; \ bool_t row_stored; \ bool_t col_stored; \ \ ctype_c* restrict c_use; \ ctype_p* restrict p_use; \ \ \ /* Extract the conjugation bit from the transposition argument. */ \ conjc = bli_extract_conj( transc ); \ \ /* If c needs a transposition, induce it so that we can more simply express the remaining parameters and code. */ \ if ( bli_does_trans( transc ) ) \ { \ bli_swap_incs( &rs_c, &cs_c ); \ bli_toggle_trans( &transc ); \ } \ \ /* Create flags to incidate row or column storage. Note that the schema bit that encodes row or column is describing the form of micro-panel, not the storage in the micro-panel. Hence the mismatch in "row" and "column" semantics. */ \ row_stored = bli_is_col_packed( schema ); \ col_stored = bli_is_row_packed( schema ); \ \ ( void )col_stored; \ \ /* If the row storage flag indicates row storage, then we are packing to column panels; otherwise, if the strides indicate column storage, we are packing to row panels. */ \ if ( row_stored ) \ { \ /* Prepare to pack to row-stored column panels. */ \ iter_dim = n; \ panel_len_full = m; \ panel_len_max = m_max; \ panel_dim_max = pd_p; \ vs_c = cs_c; \ m_panel_use = &panel_len_i; \ n_panel_use = &panel_dim_i; \ m_panel_max = &panel_len_max_i; \ n_panel_max = &panel_dim_max; \ } \ else /* if ( col_stored ) */ \ { \ /* Prepare to pack to column-stored row panels. */ \ iter_dim = m; \ panel_len_full = n; \ panel_len_max = n_max; \ panel_dim_max = pd_p; \ vs_c = rs_c; \ m_panel_use = &panel_dim_i; \ n_panel_use = &panel_len_i; \ m_panel_max = &panel_dim_max; \ n_panel_max = &panel_len_max_i; \ } \ \ /* Compute the total number of iterations we'll need. */ \ n_iter = iter_dim / panel_dim_max + ( iter_dim % panel_dim_max ? 1 : 0 ); \ \ { \ ic_inc = panel_dim_max; \ ip_inc = 1; \ } \ \ p_begin = p_cast; \ \ /* Query the number of threads and thread ids from the current thread's packm thrinfo_t node. */ \ const dim_t nt = bli_thread_n_way( thread ); \ const dim_t tid = bli_thread_work_id( thread ); \ \ /* Suppress unused variable warnings when slab partitioning is enabled, since the slab-based definition of bli_packm_my_iter() does not actually use tid or nt. */ \ ( void )nt; ( void )tid; \ \ dim_t it_start, it_end, it_inc; \ \ /* Determine the thread range and increment using the current thread's packm thrinfo_t node. NOTE: The definition of bli_thread_range_jrir() will depend on whether slab or round-robin partitioning was requested at configure-time. */ \ bli_thread_range_jrir( thread, n_iter, 1, FALSE, &it_start, &it_end, &it_inc ); \ \ for ( ic = 0, ip = 0, it = 0; it < n_iter; \ ic += ic_inc, ip += ip_inc, it += 1 ) \ { \ panel_dim_i = bli_min( panel_dim_max, iter_dim - ic ); \ \ c_begin = c_cast + (ic )*vs_c; \ \ { \ c_use = c_begin; \ p_use = p_begin; \ \ panel_len_i = panel_len_full; \ panel_len_max_i = panel_len_max; \ \ if ( bli_packm_my_iter( it, it_start, it_end, tid, nt ) ) \ { \ PASTEMAC2(chc,chp,packm_struc_cxk_md) \ ( \ conjc, \ schema, \ *m_panel_use, \ *n_panel_use, \ *m_panel_max, \ *n_panel_max, \ kappa_cast, \ c_use, rs_c, cs_c, \ p_use, rs_p, cs_p, \ is_p, \ cntx \ ); \ } \ \ p_inc = ps_p; \ } \ \ /* if ( row_stored ) \ PASTEMAC(chp,fprintm)( stdout, "packm_blk_var1_md: b packed", *m_panel_max, *n_panel_max, \ p_use, rs_p, cs_p, "%5.2f", "" ); \ else \ PASTEMAC(chp,fprintm)( stdout, "packm_blk_var1_md: a packed", *m_panel_max, *n_panel_max, \ p_use, rs_p, cs_p, "%5.2f", "" ); \ */ \ \ p_begin += p_inc; \ \ } \ } INSERT_GENTFUNC2_BASIC0( packm_blk_var1_md ) INSERT_GENTFUNC2_MIXDP0( packm_blk_var1_md ) #endif blis-0.6.1/frame/1m/packm/bli_packm_blk_var1_md.h000066400000000000000000000045621360743507500215250ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_packm_blk_var1_md ( obj_t* c, obj_t* p, cntx_t* cntx, cntl_t* cntl, thrinfo_t* t ); #undef GENTPROT2 #define GENTPROT2( ctype_c, ctype_p, chc, chp, varname ) \ \ void PASTEMAC2(chc,chp,varname) \ ( \ trans_t transc, \ pack_t schema, \ dim_t m, \ dim_t n, \ dim_t m_max, \ dim_t n_max, \ void* kappa, \ void* c, inc_t rs_c, inc_t cs_c, \ void* p, inc_t rs_p, inc_t cs_p, \ inc_t is_p, \ dim_t pd_p, inc_t ps_p, \ cntx_t* cntx, \ thrinfo_t* thread \ ); INSERT_GENTPROT2_BASIC0( packm_blk_var1_md ) INSERT_GENTPROT2_MIXDP0( packm_blk_var1_md ) blis-0.6.1/frame/1m/packm/bli_packm_check.c000066400000000000000000000054021360743507500204060ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_packm_init_check ( obj_t* a, obj_t* p, cntx_t* cntx ) { err_t e_val; // Check object datatypes. e_val = bli_check_floating_object( a ); bli_check_error_code( e_val ); // Check control tree pointer. // NOTE: We can't check the control tree because we interpret a NULL // value (in bli_packm_int()) as a request to skip the operation. //e_val = bli_check_valid_cntl( ( void* )cntl ); //bli_check_error_code( e_val ); } void bli_packm_int_check ( obj_t* a, obj_t* p, cntx_t* cntx ) { err_t e_val; // Check object datatypes. e_val = bli_check_floating_object( a ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( p ); bli_check_error_code( e_val ); // Check object dimensions. e_val = bli_check_conformal_dims( a, p ); bli_check_error_code( e_val ); // Check control tree pointer. // NOTE: We can't check the control tree because we interpret a NULL // value (in bli_packm_int()) as a request to skip the operation. //e_val = bli_check_valid_cntl( ( void* )cntl ); //bli_check_error_code( e_val ); } blis-0.6.1/frame/1m/packm/bli_packm_check.h000066400000000000000000000035141360743507500204150ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_packm_init_check ( obj_t* a, obj_t* p, cntx_t* cntx ); void bli_packm_int_check ( obj_t* a, obj_t* p, cntx_t* cntx ); blis-0.6.1/frame/1m/packm/bli_packm_cntl.c000066400000000000000000000063221360743507500202730ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" cntl_t* bli_packm_cntl_create_node ( rntm_t* rntm, void_fp var_func, void_fp packm_var_func, bszid_t bmid_m, bszid_t bmid_n, bool_t does_invert_diag, bool_t rev_iter_if_upper, bool_t rev_iter_if_lower, pack_t pack_schema, packbuf_t pack_buf_type, cntl_t* sub_node ) { cntl_t* cntl; packm_params_t* params; #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_packm_cntl_create_node(): " ); #endif // Allocate a packm_params_t struct. params = bli_sba_acquire( rntm, sizeof( packm_params_t ) ); // Initialize the packm_params_t struct. params->size = sizeof( packm_params_t ); params->var_func = packm_var_func; params->bmid_m = bmid_m; params->bmid_n = bmid_n; params->does_invert_diag = does_invert_diag; params->rev_iter_if_upper = rev_iter_if_upper; params->rev_iter_if_lower = rev_iter_if_lower; params->pack_schema = pack_schema; params->pack_buf_type = pack_buf_type; #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_packm_cntl_create_node(): " ); #endif // It's important that we set the bszid field to BLIS_NO_PART to indicate // that no blocksize partitioning is performed. bli_cntl_free() will rely // on this information to know how to step through the thrinfo_t tree in // sync with the cntl_t tree. cntl = bli_cntl_create_node ( rntm, BLIS_NOID, BLIS_NO_PART, var_func, params, sub_node ); return cntl; } blis-0.6.1/frame/1m/packm/bli_packm_cntl.h000066400000000000000000000073141360743507500203020ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ struct packm_params_s { uint64_t size; // size field must be present and come first. packm_var_oft var_func; bszid_t bmid_m; bszid_t bmid_n; bool_t does_invert_diag; bool_t rev_iter_if_upper; bool_t rev_iter_if_lower; pack_t pack_schema; packbuf_t pack_buf_type; }; typedef struct packm_params_s packm_params_t; static packm_var_oft bli_cntl_packm_params_var_func( cntl_t* cntl ) { packm_params_t* ppp = ( packm_params_t* )cntl->params; return ppp->var_func; } static bszid_t bli_cntl_packm_params_bmid_m( cntl_t* cntl ) { packm_params_t* ppp = ( packm_params_t* )cntl->params; return ppp->bmid_m; } static bszid_t bli_cntl_packm_params_bmid_n( cntl_t* cntl ) { packm_params_t* ppp = ( packm_params_t* )cntl->params; return ppp->bmid_n; } static bool_t bli_cntl_packm_params_does_invert_diag( cntl_t* cntl ) { packm_params_t* ppp = ( packm_params_t* )cntl->params; return ppp->does_invert_diag; } static bool_t bli_cntl_packm_params_rev_iter_if_upper( cntl_t* cntl ) { packm_params_t* ppp = ( packm_params_t* )cntl->params; return ppp->rev_iter_if_upper; } static bool_t bli_cntl_packm_params_rev_iter_if_lower( cntl_t* cntl ) { packm_params_t* ppp = ( packm_params_t* )cntl->params; return ppp->rev_iter_if_lower; } static pack_t bli_cntl_packm_params_pack_schema( cntl_t* cntl ) { packm_params_t* ppp = ( packm_params_t* )cntl->params; return ppp->pack_schema; } static packbuf_t bli_cntl_packm_params_pack_buf_type( cntl_t* cntl ) { packm_params_t* ppp = ( packm_params_t* )cntl->params; return ppp->pack_buf_type; } // ----------------------------------------------------------------------------- cntl_t* bli_packm_cntl_create_node ( rntm_t* rntm, void_fp var_func, void_fp packm_var_func, bszid_t bmid_m, bszid_t bmid_n, bool_t does_invert_diag, bool_t rev_iter_if_upper, bool_t rev_iter_if_lower, pack_t pack_schema, packbuf_t pack_buf_type, cntl_t* sub_node ); blis-0.6.1/frame/1m/packm/bli_packm_cxk.c000066400000000000000000000154311360743507500201210ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ conj_t conja, \ pack_t schema, \ dim_t panel_dim, \ dim_t panel_dim_max, \ dim_t panel_len, \ dim_t panel_len_max, \ ctype* kappa, \ ctype* a, inc_t inca, inc_t lda, \ ctype* p, inc_t ldp, \ cntx_t* cntx \ ) \ { \ /* Note that we use panel_dim_max, not panel_dim, to query the packm kernel function pointer. This means that we always use the same kernel, even for edge cases. */ \ num_t dt = PASTEMAC(ch,type); \ l1mkr_t ker_id = panel_dim_max; \ \ PASTECH2(ch,opname,_ker_ft) f; \ \ /* Query the context for the packm kernel corresponding to the current panel dimension, or kernel id. If the id is invalid, the function will return NULL. */ \ f = bli_cntx_get_packm_ker_dt( dt, ker_id, cntx ); \ \ /* If there exists a kernel implementation for the micro-panel dimension provided, we invoke the implementation. Otherwise, we use scal2m. */ \ if ( f != NULL ) \ { \ /* Under normal circumstances, the packm kernel will copy over a panel_dim x panel_len submatrix of A into P. However, the kernel now handles zero-filling at edge cases, which typically consist of the outer (panel_dim_max - panel_dim) rows or columns of the micropanel. (Note that these rows/columns correspond to values beyond the edge of matrix A.) The kernel intrinsically knows its own panel_dim_max, since that corresponds to the packm kernel's leading dimension. However, we *do* need to pass in panel_len_max because the bottom-right edge case of trsm_lu will need all elements above the extended diagonal and beyond (to the right of) the bottom-right element to be initialized to zero so the trsm portion of the computational kernel will operate with zeros for those iterations. For example, if trsm_lu is executed on an 10x10 triangular matrix, and the gemmtrsm kernel uses MR = 6, the computation will begin with the edge case, which is the bottom-right 4x4 upper triangular matrix. Code in bli_packm_tri_cxk() will extend the diagonal as identity into the remaining portion of the micropanel. But before that happens, the packm kernel must have set the 0's added in step (3) below. packm kernel packm kernel packm kernel packm_tri_cxk step 1: step 2: step 3: step 4: x x x x . . x x x x . . x x x x 0 0 x x x x 0 0 ? x x x . . ? x x x . . ? x x x 0 0 ? x x x 0 0 ? ? x x . . -> ? ? x x . . -> ? ? x x 0 0 -> ? ? x x 0 0 ? ? ? x . . ? ? ? x . . ? ? ? x 0 0 ? ? ? x 0 0 . . . . . . 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 . . . . . . 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 x Copied from A; valid element. ? Copied from A, but value is unknown and unused. . Uninitialized. 0 Initialized to zero. 1 Initialized to one. NOTE: In step 5 (not shown), bli_packm_tri_cxk() sets the ?'s to zero. This is not needed to support trsm, but rather to support trmm. (Both use the same packing format and code.) In this case, panel_dim will be 4 because four rows of data are copied from A, panel_len will be 4 because those four rows span four columns of A, and panel_len_max will be 6 because there are a total of 6 columns that can be written to in the packed micropanel, 2 of which lie beyond the values copied from A. */ \ f \ ( \ conja, \ schema, \ panel_dim, \ panel_len, \ panel_len_max, \ kappa, \ a, inca, lda, \ p, ldp, \ cntx \ ); \ } \ else \ { \ /* Treat the micro-panel as panel_dim x panel_len and column-stored (unit row stride). */ \ PASTEMAC2(ch,scal2m,BLIS_TAPI_EX_SUF) \ ( \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ ( trans_t )conja, \ panel_dim, \ panel_len, \ kappa, \ a, inca, lda, \ p, 1, ldp, \ cntx, \ /* The rntm_t* can safely be NULL as long as it's not used by scal2m_ex(). */ \ NULL \ ); \ \ /* If panel_dim < panel_dim_max, then we zero those unused rows. */ \ if ( panel_dim < panel_dim_max ) \ { \ const dim_t i = panel_dim; \ const dim_t m_edge = panel_dim_max - panel_dim; \ const dim_t n_edge = panel_len_max; \ ctype* restrict p_edge = p + (i )*1; \ \ PASTEMAC(ch,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge, 1, ldp \ ); \ } \ \ /* If panel_len < panel_len_max, then we zero those unused columns. */ \ if ( panel_len < panel_len_max ) \ { \ const dim_t j = panel_len; \ const dim_t m_edge = panel_dim_max; \ const dim_t n_edge = panel_len_max - panel_len; \ ctype* restrict p_edge = p + (j )*ldp; \ \ PASTEMAC(ch,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge, 1, ldp \ ); \ } \ } \ } INSERT_GENTFUNC_BASIC0( packm_cxk ) blis-0.6.1/frame/1m/packm/bli_packm_cxk.h000066400000000000000000000041071360743507500201240ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #undef GENTPROT #define GENTPROT( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ conj_t conja, \ pack_t schema, \ dim_t panel_dim, \ dim_t panel_dim_max, \ dim_t panel_len, \ dim_t panel_len_max, \ ctype* kappa, \ ctype* a, inc_t inca, inc_t lda, \ ctype* p, inc_t ldp, \ cntx_t* cntx \ ); INSERT_GENTPROT_BASIC0( packm_cxk ) blis-0.6.1/frame/1m/packm/bli_packm_cxk_1er.c000066400000000000000000000103241360743507500206640ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ conj_t conja, \ pack_t schema, \ dim_t panel_dim, \ dim_t panel_dim_max, \ dim_t panel_len, \ dim_t panel_len_max, \ ctype* kappa, \ ctype* a, inc_t inca, inc_t lda, \ ctype* p, inc_t ldp, \ cntx_t* cntx \ ) \ { \ /* Note that we use panel_dim_max, not panel_dim, to query the packm kernel function pointer. This means that we always use the same kernel, even for edge cases. */ \ num_t dt = PASTEMAC(ch,type); \ l1mkr_t ker_id = panel_dim_max; \ \ PASTECH2(ch,opname,_ker_ft) f; \ \ /* Query the context for the packm kernel corresponding to the current panel dimension, or kernel id. If the id is invalid, the function will return NULL. */ \ f = bli_cntx_get_packm_ker_dt( dt, ker_id, cntx ); \ \ /* If there exists a kernel implementation for the micro-panel dimension provided, we invoke the implementation. Otherwise, we use scal2m. */ \ if ( f != NULL ) \ { \ f \ ( \ conja, \ schema, \ panel_dim, \ panel_len, \ panel_len_max, \ kappa, \ a, inca, lda, \ p, ldp, \ cntx \ ); \ } \ else \ { \ /* Treat the micro-panel as panel_dim x panel_len and column-stored (unit row stride). */ \ \ PASTEMAC(ch,scal21ms_mxn) \ ( \ schema, \ conja, \ panel_dim, \ panel_len, \ kappa, \ a, inca, lda, \ p, 1, ldp, ldp \ ); \ \ /* If panel_dim < panel_dim_max, then we zero those unused rows. */ \ if ( panel_dim < panel_dim_max ) \ { \ ctype* restrict zero = PASTEMAC(ch,0); \ const dim_t offm = panel_dim; \ const dim_t offn = 0; \ const dim_t m_edge = panel_dim_max - panel_dim; \ const dim_t n_edge = panel_len_max; \ \ PASTEMAC(ch,set1ms_mxn) \ ( \ schema, \ offm, \ offn, \ m_edge, \ n_edge, \ zero, \ p, 1, ldp, ldp \ ); \ } \ \ /* If panel_len < panel_len_max, then we zero those unused columns. */ \ if ( panel_len < panel_len_max ) \ { \ ctype* restrict zero = PASTEMAC(ch,0); \ const dim_t offm = 0; \ const dim_t offn = panel_len; \ const dim_t m_edge = panel_dim_max; \ const dim_t n_edge = panel_len_max - panel_len; \ \ PASTEMAC(ch,set1ms_mxn) \ ( \ schema, \ offm, \ offn, \ m_edge, \ n_edge, \ zero, \ p, 1, ldp, ldp \ ); \ } \ } \ } INSERT_GENTFUNCCO_BASIC0( packm_cxk_1er ) blis-0.6.1/frame/1m/packm/bli_packm_cxk_1er.h000066400000000000000000000041371360743507500206760ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #undef GENTPROTCO #define GENTPROTCO( ctype, ctype_r, ch, chr, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ conj_t conja, \ pack_t schema, \ dim_t panel_dim, \ dim_t panel_dim_max, \ dim_t panel_len, \ dim_t panel_len_max, \ ctype* kappa, \ ctype* a, inc_t inca, inc_t lda, \ ctype* p, inc_t ldp, \ cntx_t* cntx \ ); INSERT_GENTPROTCO_BASIC0( packm_cxk_1er ) blis-0.6.1/frame/1m/packm/bli_packm_cxk_3mis.c000066400000000000000000000131761360743507500210600ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ conj_t conja, \ dim_t panel_dim, \ dim_t panel_dim_max, \ dim_t panel_len, \ dim_t panel_len_max, \ ctype* kappa, \ ctype* a, inc_t inca, inc_t lda, \ ctype* p, inc_t is_p, inc_t ldp, \ cntx_t* cntx \ ) \ { \ /* Note that we use panel_dim_max, not panel_dim, to query the packm kernel function pointer. This means that we always use the same kernel, even for edge cases. */ \ num_t dt = PASTEMAC(ch,type); \ l1mkr_t ker_id = panel_dim_max; \ \ PASTECH2(ch,opname,_ker_ft) f; \ \ /* Query the context for the packm kernel corresponding to the current panel dimension, or kernel id. If the id is invalid, the function will return NULL. */ \ f = bli_cntx_get_packm_ker_dt( dt, ker_id, cntx ); \ \ /* If there exists a kernel implementation for the micro-panel dimension provided, we invoke the implementation. Otherwise, we use scal2m. */ \ if ( f != NULL ) \ { \ f \ ( \ conja, \ panel_dim, \ panel_len, \ panel_len_max, \ kappa, \ a, inca, lda, \ p, is_p, ldp, \ cntx \ ); \ } \ else \ { \ /* Treat the micro-panel as panel_dim x panel_len and column-stored (unit row stride). */ \ \ PASTEMAC(ch,scal2ri3s_mxn) \ ( \ conja, \ panel_dim, \ panel_len, \ kappa, \ a, inca, lda, \ p, 1, ldp, is_p \ ); \ \ /* If panel_dim < panel_dim_max, then we zero those unused rows. */ \ if ( panel_dim < panel_dim_max ) \ { \ ctype_r* restrict zero_r = PASTEMAC(chr,0); \ const dim_t i = panel_dim; \ const dim_t m_edge = panel_dim_max - i; \ const dim_t n_edge = panel_len_max; \ ctype_r* p_edge_r = ( ctype_r* )p + (i )*1; \ ctype_r* p_edge_i = ( ctype_r* )p + is_p + (i )*1; \ ctype_r* p_edge_rpi = ( ctype_r* )p + 2*is_p + (i )*1; \ \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_r, 1, ldp, \ cntx, \ NULL \ ); \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_i, 1, ldp, \ cntx, \ NULL \ ); \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_rpi, 1, ldp, \ cntx, \ NULL \ ); \ } \ \ /* If panel_len < panel_len_max, then we zero those unused columns. */ \ if ( panel_len < panel_len_max ) \ { \ ctype_r* restrict zero_r = PASTEMAC(chr,0); \ const dim_t j = panel_len; \ const dim_t m_edge = panel_dim_max; \ const dim_t n_edge = panel_len_max - j; \ ctype_r* p_edge_r = ( ctype_r* )p + (j )*ldp; \ ctype_r* p_edge_i = ( ctype_r* )p + is_p + (j )*ldp; \ ctype_r* p_edge_rpi = ( ctype_r* )p + 2*is_p + (j )*ldp; \ \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_r, 1, ldp, \ cntx, \ NULL \ ); \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_i, 1, ldp, \ cntx, \ NULL \ ); \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_rpi, 1, ldp, \ cntx, \ NULL \ ); \ } \ } \ } INSERT_GENTFUNCCO_BASIC0( packm_cxk_3mis ) blis-0.6.1/frame/1m/packm/bli_packm_cxk_3mis.h000066400000000000000000000041071360743507500210570ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #undef GENTPROTCO #define GENTPROTCO( ctype, ctype_r, ch, chr, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ conj_t conja, \ dim_t panel_dim, \ dim_t panel_dim_max, \ dim_t panel_len, \ dim_t panel_len_max, \ ctype* kappa, \ ctype* a, inc_t inca, inc_t lda, \ ctype* p, inc_t is_p, inc_t ldp, \ cntx_t* cntx \ ); INSERT_GENTPROTCO_BASIC0( packm_cxk_3mis ) blis-0.6.1/frame/1m/packm/bli_packm_cxk_4mi.c000066400000000000000000000105661360743507500206760ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ conj_t conja, \ dim_t panel_dim, \ dim_t panel_dim_max, \ dim_t panel_len, \ dim_t panel_len_max, \ ctype* kappa, \ ctype* a, inc_t inca, inc_t lda, \ ctype* p, inc_t is_p, inc_t ldp, \ cntx_t* cntx \ ) \ { \ /* Note that we use panel_dim_max, not panel_dim, to query the packm kernel function pointer. This means that we always use the same kernel, even for edge cases. */ \ num_t dt = PASTEMAC(ch,type); \ l1mkr_t ker_id = panel_dim_max; \ \ PASTECH2(ch,opname,_ker_ft) f; \ \ /* Query the context for the packm kernel corresponding to the current panel dimension, or kernel id. If the id is invalid, the function will return NULL. */ \ f = bli_cntx_get_packm_ker_dt( dt, ker_id, cntx ); \ \ /* If there exists a kernel implementation for the micro-panel dimension provided, we invoke the implementation. Otherwise, we use scal2m. */ \ if ( f != NULL ) \ { \ f \ ( \ conja, \ panel_dim, \ panel_len, \ panel_len_max, \ kappa, \ a, inca, lda, \ p, is_p, ldp, \ cntx \ ); \ } \ else \ { \ /* Treat the micro-panel as panel_dim x panel_len and column-stored (unit row stride). */ \ \ PASTEMAC(ch,scal2ris_mxn) \ ( \ conja, \ panel_dim, \ panel_len, \ kappa, \ a, inca, lda, \ p, 1, ldp, is_p \ ); \ \ /* If panel_dim < panel_dim_max, then we zero those unused rows. */ \ if ( panel_dim != panel_dim_max ) \ { \ const dim_t i = panel_dim; \ const dim_t m_edge = panel_dim_max - i; \ const dim_t n_edge = panel_len_max; \ ctype_r* p_edge_r = ( ctype_r* )p + (i )*1; \ ctype_r* p_edge_i = ( ctype_r* )p + is_p + (i )*1; \ \ PASTEMAC(chr,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge_r, 1, ldp \ ); \ PASTEMAC(chr,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge_i, 1, ldp \ ); \ } \ \ /* If panel_len < panel_len_max, then we zero those unused columns. */ \ if ( panel_len != panel_len_max ) \ { \ const dim_t j = panel_len; \ const dim_t m_edge = panel_dim_max; \ const dim_t n_edge = panel_len_max - j; \ ctype_r* p_edge_r = ( ctype_r* )p + (j )*ldp; \ ctype_r* p_edge_i = ( ctype_r* )p + is_p + (j )*ldp; \ \ PASTEMAC(chr,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge_r, 1, ldp \ ); \ PASTEMAC(chr,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge_i, 1, ldp \ ); \ } \ } \ } INSERT_GENTFUNCCO_BASIC0( packm_cxk_4mi ) blis-0.6.1/frame/1m/packm/bli_packm_cxk_4mi.h000066400000000000000000000041061360743507500206740ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #undef GENTPROTCO #define GENTPROTCO( ctype, ctype_r, ch, chr, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ conj_t conja, \ dim_t panel_dim, \ dim_t panel_dim_max, \ dim_t panel_len, \ dim_t panel_len_max, \ ctype* kappa, \ ctype* a, inc_t inca, inc_t lda, \ ctype* p, inc_t is_p, inc_t ldp, \ cntx_t* cntx \ ); INSERT_GENTPROTCO_BASIC0( packm_cxk_4mi ) blis-0.6.1/frame/1m/packm/bli_packm_cxk_rih.c000066400000000000000000000107051360743507500207620ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ conj_t conja, \ pack_t schema, \ dim_t panel_dim, \ dim_t panel_dim_max, \ dim_t panel_len, \ dim_t panel_len_max, \ ctype* kappa, \ ctype* a, inc_t inca, inc_t lda, \ ctype* p, inc_t ldp, \ cntx_t* cntx \ ) \ { \ /* Note that we use panel_dim_max, not panel_dim, to query the packm kernel function pointer. This means that we always use the same kernel, even for edge cases. */ \ num_t dt = PASTEMAC(ch,type); \ l1mkr_t ker_id = panel_dim_max; \ \ PASTECH2(ch,opname,_ker_ft) f; \ \ /* Query the context for the packm kernel corresponding to the current panel dimension, or kernel id. If the id is invalid, the function will return NULL. */ \ f = bli_cntx_get_packm_ker_dt( dt, ker_id, cntx ); \ \ /* If there exists a kernel implementation for the micro-panel dimension provided, we invoke the implementation. Otherwise, we use scal2m. */ \ if ( 0 && f != NULL ) \ { \ f \ ( \ conja, \ schema, \ panel_dim, \ panel_len, \ panel_len_max, \ kappa, \ a, inca, lda, \ p, ldp, \ cntx \ ); \ } \ else \ { \ /* Treat the micro-panel as panel_dim x panel_len and column-stored (unit row stride). */ \ \ PASTEMAC(ch,scal2rihs_mxn) \ ( \ schema, \ conja, \ panel_dim, \ panel_len, \ kappa, \ a, inca, lda, \ p, 1, ldp \ ); \ \ /* If panel_dim < panel_dim_max, then we zero those unused rows. */ \ if ( panel_dim != panel_dim_max ) \ { \ ctype_r* restrict zero_r = PASTEMAC(chr,0); \ const dim_t i = panel_dim; \ const dim_t m_edge = panel_dim_max - i; \ const dim_t n_edge = panel_len_max; \ ctype_r* p_edge_r = ( ctype_r* )p + (i )*1; \ \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_r, 1, ldp, \ cntx, \ NULL \ ); \ } \ \ /* If panel_len < panel_len_max, then we zero those unused columns. */ \ if ( panel_len != panel_len_max ) \ { \ ctype_r* restrict zero_r = PASTEMAC(chr,0); \ const dim_t j = panel_len; \ const dim_t m_edge = panel_dim_max; \ const dim_t n_edge = panel_len_max - j; \ ctype_r* p_edge_r = ( ctype_r* )p + (j )*ldp; \ \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_r, 1, ldp, \ cntx, \ NULL \ ); \ } \ } \ } INSERT_GENTFUNCCO_BASIC0( packm_cxk_rih ) blis-0.6.1/frame/1m/packm/bli_packm_cxk_rih.h000066400000000000000000000041371360743507500207710ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #undef GENTPROTCO #define GENTPROTCO( ctype, ctype_r, ch, chr, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ conj_t conja, \ pack_t schema, \ dim_t panel_dim, \ dim_t panel_dim_max, \ dim_t panel_len, \ dim_t panel_len_max, \ ctype* kappa, \ ctype* a, inc_t inca, inc_t lda, \ ctype* p, inc_t ldp, \ cntx_t* cntx \ ); INSERT_GENTPROTCO_BASIC0( packm_cxk_rih ) blis-0.6.1/frame/1m/packm/bli_packm_init.c000066400000000000000000000523311360743507500202770ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2016, Hewlett Packard Enterprise Development LP Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" siz_t bli_packm_init ( obj_t* a, obj_t* p, cntx_t* cntx, cntl_t* cntl ) { bli_init_once(); // The purpose of packm_init() is to initialize an object P so that // a source object A can be packed into P via one of the packm // implementations. This initialization precedes the acquisition of a // suitable block of memory from the memory allocator (if such a block // of memory has not already been allocated previously). bszid_t bmult_id_m; bszid_t bmult_id_n; bool_t does_invert_diag; bool_t rev_iter_if_upper; bool_t rev_iter_if_lower; pack_t schema; //packbuf_t pack_buf_type; siz_t size_needed; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_packm_init_check( a, p, cntx ); // Extract various fields from the control tree. bmult_id_m = bli_cntl_packm_params_bmid_m( cntl ); bmult_id_n = bli_cntl_packm_params_bmid_n( cntl ); does_invert_diag = bli_cntl_packm_params_does_invert_diag( cntl ); rev_iter_if_upper = bli_cntl_packm_params_rev_iter_if_upper( cntl ); rev_iter_if_lower = bli_cntl_packm_params_rev_iter_if_lower( cntl ); schema = bli_cntl_packm_params_pack_schema( cntl ); //pack_buf_type = bli_cntl_packm_params_pack_buf_type( cntl ); #if 0 // Let us now check to see if the object has already been packed. First // we check if it has been packed to an unspecified (row or column) // format, in which case we can alias the object and return. // NOTE: The reason we don't need to even look at the control tree in // this case is as follows: an object's pack status is only set to // BLIS_PACKED_UNSPEC for situations when the actual format used is // not important, as long as its packed into contiguous rows or // contiguous columns. A good example of this is packing for matrix // operands in the level-2 operations. if ( bli_obj_pack_schema( a ) == BLIS_PACKED_UNSPEC ) { bli_obj_alias_to( a, p ); return 0; } // Now we check if the object has already been packed to the desired // schema (as encoded in the control tree). If so, we can alias and // return 0. // NOTE: In most cases, an object's pack status will be BLIS_NOT_PACKED // and thus packing will be called for (but in some cases packing has // already taken place, or does not need to take place, and so that will // be indicated by the pack status). Also, not all combinations of // current pack status and desired pack schema are valid. if ( bli_obj_pack_schema( a ) == pack_schema ) { bli_obj_alias_to( a, p ); return 0; } #endif // If the object is marked as being filled with zeros, then we can skip // the packm operation entirely and alias. if ( bli_obj_is_zeros( a ) ) { bli_obj_alias_to( a, p ); return 0; } #if 0 pack_t schema; if ( bli_cntx_method( cntx ) != BLIS_NAT ) { // We now ignore the pack_schema field in the control tree and // extract the schema from the context, depending on whether we are // preparing to pack a block of A or panel of B. For A and B, we must // obtain the schema from the context since the induced methods reuse // the same control trees used by native execution, and those induced // methods specify the schema used by the current execution phase // within the context (whereas the control tree does not change). if ( pack_buf_type == BLIS_BUFFER_FOR_A_BLOCK ) { schema = bli_cntx_schema_a_block( cntx ); } else if ( pack_buf_type == BLIS_BUFFER_FOR_B_PANEL ) { schema = bli_cntx_schema_b_panel( cntx ); } else // if ( pack_buf_type == BLIS_BUFFER_FOR_C_PANEL ) { schema = bli_cntl_packm_params_pack_schema( cntl ); } } else // ( bli_cntx_method( cntx ) == BLIS_NAT ) { // For native execution, we obtain the schema from the control tree // node. (Notice that it doesn't matter if the pack_buf_type is for // A or B.) schema = bli_cntl_packm_params_pack_schema( cntl ); } // This is no longer needed now that we branch between native and // non-native cases above. #if 0 if ( pack_buf_type == BLIS_BUFFER_FOR_C_PANEL ) { // If we get a request to pack C for some reason, it is likely // not part of an induced method, and so it would be safe (and // necessary) to read the pack schema from the control tree. schema = bli_cntl_packm_params_pack_schema( cntl ); } #endif #endif // Prepare a few other variables based on properties of the control // tree. invdiag_t invert_diag; packord_t pack_ord_if_up; packord_t pack_ord_if_lo; if ( does_invert_diag ) invert_diag = BLIS_INVERT_DIAG; else invert_diag = BLIS_NO_INVERT_DIAG; if ( rev_iter_if_upper ) pack_ord_if_up = BLIS_PACK_REV_IF_UPPER; else pack_ord_if_up = BLIS_PACK_FWD_IF_UPPER; if ( rev_iter_if_lower ) pack_ord_if_lo = BLIS_PACK_REV_IF_LOWER; else pack_ord_if_lo = BLIS_PACK_FWD_IF_LOWER; // Initialize object p for the final packed matrix. size_needed = bli_packm_init_pack ( invert_diag, schema, pack_ord_if_up, pack_ord_if_lo, bmult_id_m, bmult_id_n, a, p, cntx ); // Return the size needed for memory allocation of the packed buffer. return size_needed; } siz_t bli_packm_init_pack ( invdiag_t invert_diag, pack_t schema, packord_t pack_ord_if_up, packord_t pack_ord_if_lo, bszid_t bmult_id_m, bszid_t bmult_id_n, obj_t* a, obj_t* p, cntx_t* cntx ) { bli_init_once(); num_t dt_tar = bli_obj_target_dt( a ); num_t dt_scalar = bli_obj_scalar_dt( a ); trans_t transa = bli_obj_onlytrans_status( a ); dim_t m_a = bli_obj_length( a ); dim_t n_a = bli_obj_width( a ); dim_t bmult_m_def = bli_cntx_get_blksz_def_dt( dt_tar, bmult_id_m, cntx ); dim_t bmult_m_pack = bli_cntx_get_blksz_max_dt( dt_tar, bmult_id_m, cntx ); dim_t bmult_n_def = bli_cntx_get_blksz_def_dt( dt_tar, bmult_id_n, cntx ); dim_t bmult_n_pack = bli_cntx_get_blksz_max_dt( dt_tar, bmult_id_n, cntx ); dim_t m_p, n_p; dim_t m_p_pad, n_p_pad; siz_t size_p; siz_t elem_size_p; inc_t rs_p, cs_p; inc_t is_p; // We begin by copying the fields of A. bli_obj_alias_to( a, p ); // Typecast the internal scalar value to the target datatype. // Note that if the typecasting is needed, this must happen BEFORE we // change the datatype of P to reflect the target_dt. if ( dt_scalar != dt_tar ) { bli_obj_scalar_cast_to( dt_tar, p ); } // Update the storage datatype of P to be the target datatype of A. bli_obj_set_dt( dt_tar, p ); // Update the dimension fields to explicitly reflect a transposition, // if needed. // Then, clear the conjugation and transposition fields from the object // since matrix packing in BLIS is deemed to take care of all conjugation // and transposition necessary. // Then, we adjust the properties of P when A needs a transposition. // We negate the diagonal offset, and if A is upper- or lower-stored, // we either toggle the uplo of P. // Finally, if we mark P as dense since we assume that all matrices, // regardless of structure, will be densified. bli_obj_set_dims_with_trans( transa, m_a, n_a, p ); bli_obj_set_conjtrans( BLIS_NO_TRANSPOSE, p ); if ( bli_does_trans( transa ) ) { bli_obj_negate_diag_offset( p ); if ( bli_obj_is_upper_or_lower( a ) ) bli_obj_toggle_uplo( p ); } // If we are packing micropanels, mark P as dense. Otherwise, we are // probably being called in the context of a level-2 operation, in // which case we do not want to overwrite the uplo field of P (inherited // from A) with BLIS_DENSE because that information may be needed by // the level-2 operation's unblocked variant to decide whether to // execute a "lower" or "upper" branch of code. if ( bli_is_panel_packed( schema ) ) { bli_obj_set_uplo( BLIS_DENSE, p ); } // Reset the view offsets to (0,0). bli_obj_set_offs( 0, 0, p ); // Set the invert diagonal field. bli_obj_set_invert_diag( invert_diag, p ); // Set the pack status of P to the pack schema prescribed in the control // tree node. bli_obj_set_pack_schema( schema, p ); // Set the packing order bits. bli_obj_set_pack_order_if_upper( pack_ord_if_up, p ); bli_obj_set_pack_order_if_lower( pack_ord_if_lo, p ); // Compute the dimensions padded by the dimension multiples. These // dimensions will be the dimensions of the packed matrices, including // zero-padding, and will be used by the macro- and micro-kernels. // We compute them by starting with the effective dimensions of A (now // in P) and aligning them to the dimension multiples (typically equal // to register blocksizes). This does waste a little bit of space for // level-2 operations, but that's okay with us. m_p = bli_obj_length( p ); n_p = bli_obj_width( p ); m_p_pad = bli_align_dim_to_mult( m_p, bmult_m_def ); n_p_pad = bli_align_dim_to_mult( n_p, bmult_n_def ); // Save the padded dimensions into the packed object. It is important // to save these dimensions since they represent the actual dimensions // of the zero-padded matrix. bli_obj_set_padded_dims( m_p_pad, n_p_pad, p ); // Now we prepare to compute strides, align them, and compute the // total number of bytes needed for the packed buffer. The caller // will then use that value to acquire an appropriate block of memory // from the memory allocator. // Extract the element size for the packed object. elem_size_p = bli_obj_elem_size( p ); // Set the row and column strides of p based on the pack schema. if ( bli_is_row_packed( schema ) && !bli_is_panel_packed( schema ) ) { // For regular row storage, the padded width of our matrix // should be used for the row stride, with the column stride set // to one. By using the WIDTH of the mem_t region, we allow for // zero-padding (if necessary/desired) along the right edge of // the matrix. rs_p = n_p_pad; cs_p = 1; // Align the leading dimension according to the heap stride // alignment size so that the second, third, etc rows begin at // aligned addresses. rs_p = bli_align_dim_to_size( rs_p, elem_size_p, BLIS_HEAP_STRIDE_ALIGN_SIZE ); // Store the strides in P. bli_obj_set_strides( rs_p, cs_p, p ); // Compute the size of the packed buffer. size_p = m_p_pad * rs_p * elem_size_p; } else if ( bli_is_col_packed( schema ) && !bli_is_panel_packed( schema ) ) { // For regular column storage, the padded length of our matrix // should be used for the column stride, with the row stride set // to one. By using the LENGTH of the mem_t region, we allow for // zero-padding (if necessary/desired) along the bottom edge of // the matrix. cs_p = m_p_pad; rs_p = 1; // Align the leading dimension according to the heap stride // alignment size so that the second, third, etc columns begin at // aligned addresses. cs_p = bli_align_dim_to_size( cs_p, elem_size_p, BLIS_HEAP_STRIDE_ALIGN_SIZE ); // Store the strides in P. bli_obj_set_strides( rs_p, cs_p, p ); // Compute the size of the packed buffer. size_p = cs_p * n_p_pad * elem_size_p; } else if ( bli_is_row_packed( schema ) && bli_is_panel_packed( schema ) ) { dim_t m_panel; dim_t ps_p, ps_p_orig; // The panel dimension (for each datatype) should be equal to the // default (logical) blocksize multiple in the m dimension. m_panel = bmult_m_def; // The "column stride" of a row-micropanel packed object is interpreted // as the column stride WITHIN a micropanel. Thus, this is equal to the // packing (storage) blocksize multiple, which may be equal to the // default (logical) blocksize multiple). cs_p = bmult_m_pack; // The "row stride" of a row-micropanel packed object is interpreted // as the row stride WITHIN a micropanel. Thus, it is unit. rs_p = 1; // The "panel stride" of a micropanel packed object is interpreted as // the distance between the (0,0) element of panel k and the (0,0) // element of panel k+1. We use the padded width computed above to // allow for zero-padding (if necessary/desired) along the far end // of each micropanel (ie: the right edge of the matrix). Zero-padding // can also occur along the long edge of the last micropanel if the m // dimension of the matrix is not a whole multiple of MR. ps_p = cs_p * n_p_pad; // As a general rule, we don't want micropanel strides to be odd. This // is primarily motivated by our desire to support interleaved 3m // micropanels, in which case we have to scale the panel stride // by 3/2. That division by 2 means the numerator (prior to being // scaled by 3) must be even. if ( bli_is_odd( ps_p ) ) ps_p += 1; // Preserve this early panel stride value for use later, if needed. ps_p_orig = ps_p; // Here, we adjust the panel stride, if necessary. Remember: ps_p is // always interpreted as being in units of the datatype of the object // which is not necessarily how the micropanels will be stored. For // interleaved 3m, we will increase ps_p by 50%, and for ro/io/rpi, // we halve ps_p. Why? Because the macro-kernel indexes in units of // the complex datatype. So these changes "trick" it into indexing // the correct amount. if ( bli_is_3mi_packed( schema ) ) { ps_p = ( ps_p * 3 ) / 2; } else if ( bli_is_3ms_packed( schema ) || bli_is_ro_packed( schema ) || bli_is_io_packed( schema ) || bli_is_rpi_packed( schema ) ) { // The division by 2 below assumes that ps_p is an even number. // However, it is possible that, at this point, ps_p is an odd. // If it is indeed odd, we nudge it higher. if ( bli_is_odd( ps_p ) ) ps_p += 1; // Despite the fact that the packed micropanels will contain // real elements, the panel stride that we store in the obj_t // (which is passed into the macro-kernel) needs to be in units // of complex elements, since the macro-kernel will index through // micropanels via complex pointer arithmetic for trmm/trsm. // Since the indexing "increment" will be twice as large as each // actual stored element, we divide the panel_stride by 2. ps_p = ps_p / 2; } // Set the imaginary stride (in units of fundamental elements) for // 3m and 4m (separated or interleaved). We use ps_p_orig since // that variable tracks the number of real part elements contained // within each micropanel of the source matrix. Therefore, this // is the number of real elements that must be traversed before // reaching the imaginary part (3mi/4mi) of the packed micropanel, // or the real part of the next micropanel (3ms). if ( bli_is_3mi_packed( schema ) ) is_p = ps_p_orig; else if ( bli_is_4mi_packed( schema ) ) is_p = ps_p_orig; else if ( bli_is_3ms_packed( schema ) ) is_p = ps_p_orig * ( m_p_pad / m_panel ); else is_p = 1; // Store the strides and panel dimension in P. bli_obj_set_strides( rs_p, cs_p, p ); bli_obj_set_imag_stride( is_p, p ); bli_obj_set_panel_dim( m_panel, p ); bli_obj_set_panel_stride( ps_p, p ); bli_obj_set_panel_length( m_panel, p ); bli_obj_set_panel_width( n_p, p ); // Compute the size of the packed buffer. size_p = ps_p * ( m_p_pad / m_panel ) * elem_size_p; } else if ( bli_is_col_packed( schema ) && bli_is_panel_packed( schema ) ) { dim_t n_panel; dim_t ps_p, ps_p_orig; // The panel dimension (for each datatype) should be equal to the // default (logical) blocksize multiple in the n dimension. n_panel = bmult_n_def; // The "row stride" of a column-micropanel packed object is interpreted // as the row stride WITHIN a micropanel. Thus, this is equal to the // packing (storage) blocksize multiple (which may be equal to the // default (logical) blocksize multiple. rs_p = bmult_n_pack; // The "column stride" of a column-micropanel packed object is // interpreted as the column stride WITHIN a micropanel. Thus, it is // unit. cs_p = 1; // The "panel stride" of a micropanel packed object is interpreted as // the distance between the (0,0) element of panel k and the (0,0) // element of panel k+1. We use the padded length computed above to // allow for zero-padding (if necessary/desired) along the far end // of each micropanel (ie: the bottom edge of the matrix). Zero-padding // can also occur along the long edge of the last micropanel if the n // dimension of the matrix is not a whole multiple of NR. ps_p = m_p_pad * rs_p; // As a general rule, we don't want micropanel strides to be odd. This // is primarily motivated by our desire to support interleaved 3m // micropanels, in which case we have to scale the panel stride // by 3/2. That division by 2 means the numerator (prior to being // scaled by 3) must be even. if ( bli_is_odd( ps_p ) ) ps_p += 1; // Preserve this early panel stride value for use later, if needed. ps_p_orig = ps_p; // Here, we adjust the panel stride, if necessary. Remember: ps_p is // always interpreted as being in units of the datatype of the object // which is not necessarily how the micropanels will be stored. For // interleaved 3m, we will increase ps_p by 50%, and for ro/io/rpi, // we halve ps_p. Why? Because the macro-kernel indexes in units of // the complex datatype. So these changes "trick" it into indexing // the correct amount. if ( bli_is_3mi_packed( schema ) ) { ps_p = ( ps_p * 3 ) / 2; } else if ( bli_is_3ms_packed( schema ) || bli_is_ro_packed( schema ) || bli_is_io_packed( schema ) || bli_is_rpi_packed( schema ) ) { // The division by 2 below assumes that ps_p is an even number. // However, it is possible that, at this point, ps_p is an odd. // If it is indeed odd, we nudge it higher. if ( bli_is_odd( ps_p ) ) ps_p += 1; // Despite the fact that the packed micropanels will contain // real elements, the panel stride that we store in the obj_t // (which is passed into the macro-kernel) needs to be in units // of complex elements, since the macro-kernel will index through // micropanels via complex pointer arithmetic for trmm/trsm. // Since the indexing "increment" will be twice as large as each // actual stored element, we divide the panel_stride by 2. ps_p = ps_p / 2; } // Set the imaginary stride (in units of fundamental elements) for // 3m and 4m (separated or interleaved). We use ps_p_orig since // that variable tracks the number of real part elements contained // within each micropanel of the source matrix. Therefore, this // is the number of real elements that must be traversed before // reaching the imaginary part (3mi/4mi) of the packed micropanel, // or the real part of the next micropanel (3ms). if ( bli_is_3mi_packed( schema ) ) is_p = ps_p_orig; else if ( bli_is_4mi_packed( schema ) ) is_p = ps_p_orig; else if ( bli_is_3ms_packed( schema ) ) is_p = ps_p_orig * ( n_p_pad / n_panel ); else is_p = 1; // Store the strides and panel dimension in P. bli_obj_set_strides( rs_p, cs_p, p ); bli_obj_set_imag_stride( is_p, p ); bli_obj_set_panel_dim( n_panel, p ); bli_obj_set_panel_stride( ps_p, p ); bli_obj_set_panel_length( m_p, p ); bli_obj_set_panel_width( n_panel, p ); // Compute the size of the packed buffer. size_p = ps_p * ( n_p_pad / n_panel ) * elem_size_p; } else { // NOTE: When implementing block storage, we only need to implement // the following two cases: // - row-stored blocks in row-major order // - column-stored blocks in column-major order // The other two combinations coincide with that of packed row-panel // and packed column- panel storage. size_p = 0; } return size_p; } blis-0.6.1/frame/1m/packm/bli_packm_init.h000066400000000000000000000040471360743507500203050ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ siz_t bli_packm_init ( obj_t* a, obj_t* p, cntx_t* cntx, cntl_t* cntl ); BLIS_EXPORT_BLIS siz_t bli_packm_init_pack ( invdiag_t invert_diag, pack_t schema, packord_t pack_ord_if_up, packord_t pack_ord_if_lo, bszid_t bmult_id_m, bszid_t bmult_id_n, obj_t* a, obj_t* p, cntx_t* cntx ); blis-0.6.1/frame/1m/packm/bli_packm_int.c000066400000000000000000000074501360743507500201300ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_packm_int ( obj_t* a, obj_t* p, cntx_t* cntx, cntl_t* cntl, thrinfo_t* thread ) { bli_init_once(); packm_var_oft f; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_packm_int_check( a, p, cntx ); // Sanity check; A should never have a zero dimension. If we must support // it, then we should fold it into the next alias-and-early-exit block. //if ( bli_obj_has_zero_dim( a ) ) bli_abort(); // Let us now check to see if the object has already been packed. First // we check if it has been packed to an unspecified (row or column) // format, in which case we can return, since by now aliasing has already // taken place in packm_init(). // NOTE: The reason we don't need to even look at the control tree in // this case is as follows: an object's pack status is only set to // BLIS_PACKED_UNSPEC for situations when the actual format used is // not important, as long as its packed into contiguous rows or // contiguous columns. A good example of this is packing for matrix // operands in the level-2 operations. if ( bli_obj_pack_schema( a ) == BLIS_PACKED_UNSPEC ) { return; } // At this point, we can be assured that cntl is not NULL. Now we check // if the object has already been packed to the desired schema (as en- // coded in the control tree). If so, we can return, as above. // NOTE: In most cases, an object's pack status will be BLIS_NOT_PACKED // and thus packing will be called for (but in some cases packing has // already taken place, or does not need to take place, and so that will // be indicated by the pack status). Also, not all combinations of // current pack status and desired pack schema are valid. if ( bli_obj_pack_schema( a ) == bli_cntl_packm_params_pack_schema( cntl ) ) { return; } // If the object is marked as being filled with zeros, then we can skip // the packm operation entirely. if ( bli_obj_is_zeros( a ) ) { return; } // Extract the function pointer from the current control tree node. f = bli_cntl_packm_params_var_func( cntl ); // Invoke the variant with kappa_use. f ( a, p, cntx, cntl, thread ); } blis-0.6.1/frame/1m/packm/bli_packm_int.h000066400000000000000000000034221360743507500201300ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_packm_int ( obj_t* a, obj_t* p, cntx_t* cntx, cntl_t* cntl, thrinfo_t* thread ); blis-0.6.1/frame/1m/packm/bli_packm_md.h000066400000000000000000000033211360743507500177340ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "bli_packm_blk_var1_md.h" #include "bli_packm_struc_cxk_md.h" blis-0.6.1/frame/1m/packm/bli_packm_part.c000066400000000000000000000220411360743507500202750ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // -- Matrix partitioning ------------------------------------------------------ void bli_packm_acquire_mpart_t2b( subpart_t requested_part, dim_t i, dim_t b, obj_t* obj, obj_t* sub_obj ) { dim_t m, n; // For now, we only support acquiring the middle subpartition. if ( requested_part != BLIS_SUBPART1 ) { bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); } // Partitioning top-to-bottom through packed column panels (which are // row-stored) is not yet supported. if ( bli_obj_is_col_packed( obj ) ) { bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); } // Query the dimensions of the parent object. m = bli_obj_length( obj ); n = bli_obj_width( obj ); // Foolproofing: do not let b exceed what's left of the m dimension at // row offset i. if ( b > m - i ) b = m - i; // Begin by copying the info, elem size, buffer, row stride, and column // stride fields of the parent object. Note that this omits copying view // information because the new partition will have its own dimensions // and offsets. bli_obj_init_subpart_from( obj, sub_obj ); // Modify offsets and dimensions of requested partition. bli_obj_set_dims( b, n, sub_obj ); // Tweak the padded length of the subpartition to trick the underlying // implementation into only zero-padding for the narrow submatrix of // interest. Usually, the value we want is b (for non-edge cases), but // at the edges, we want the remainder of the mem_t region in the m // dimension. Edge cases are defined as occurring when i + b is exactly // equal to the inherited sub-object's length (which happens since the // determine_blocksize function would have returned a smaller value of // b for the edge iteration). In these cases, we arrive at the new // packed length by simply subtracting off i. { dim_t m_pack_max = bli_obj_padded_length( sub_obj ); dim_t m_pack_cur; if ( i + b == m ) m_pack_cur = m_pack_max - i; else m_pack_cur = b; bli_obj_set_padded_length( m_pack_cur, sub_obj ); } // Translate the desired offsets to a panel offset and adjust the // buffer pointer of the subpartition object. { char* buf_p = bli_obj_buffer( sub_obj ); siz_t elem_size = bli_obj_elem_size( sub_obj ); dim_t off_to_panel = bli_packm_offset_to_panel_for( i, sub_obj ); buf_p = buf_p + elem_size * off_to_panel; bli_obj_set_buffer( buf_p, sub_obj ); } } void bli_packm_acquire_mpart_l2r( subpart_t requested_part, dim_t j, dim_t b, obj_t* obj, obj_t* sub_obj ) { dim_t m, n; // Check parameters. //if ( bli_error_checking_is_enabled() ) // bli_packm_acquire_mpart_l2r_check( requested_part, j, b, obj, sub_obj ); // For now, we only support acquiring the middle subpartition. if ( requested_part != BLIS_SUBPART1 ) { bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); } // Partitioning left-to-right through packed row panels (which are // column-stored) is not yet supported. if ( bli_obj_is_row_packed( obj ) ) { bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); } // Query the dimensions of the parent object. m = bli_obj_length( obj ); n = bli_obj_width( obj ); // Foolproofing: do not let b exceed what's left of the n dimension at // column offset j. if ( b > n - j ) b = n - j; // Begin by copying the info, elem size, buffer, row stride, and column // stride fields of the parent object. Note that this omits copying view // information because the new partition will have its own dimensions // and offsets. bli_obj_init_subpart_from( obj, sub_obj ); // Modify offsets and dimensions of requested partition. bli_obj_set_dims( m, b, sub_obj ); // Tweak the padded width of the subpartition to trick the underlying // implementation into only zero-padding for the narrow submatrix of // interest. Usually, the value we want is b (for non-edge cases), but // at the edges, we want the remainder of the mem_t region in the n // dimension. Edge cases are defined as occurring when j + b is exactly // equal to the inherited sub-object's width (which happens since the // determine_blocksize function would have returned a smaller value of // b for the edge iteration). In these cases, we arrive at the new // packed width by simply subtracting off j. { dim_t n_pack_max = bli_obj_padded_width( sub_obj ); dim_t n_pack_cur; if ( j + b == n ) n_pack_cur = n_pack_max - j; else n_pack_cur = b; bli_obj_set_padded_width( n_pack_cur, sub_obj ); } // Translate the desired offsets to a panel offset and adjust the // buffer pointer of the subpartition object. { char* buf_p = bli_obj_buffer( sub_obj ); siz_t elem_size = bli_obj_elem_size( sub_obj ); dim_t off_to_panel = bli_packm_offset_to_panel_for( j, sub_obj ); buf_p = buf_p + elem_size * off_to_panel; bli_obj_set_buffer( buf_p, sub_obj ); } } void bli_packm_acquire_mpart_tl2br( subpart_t requested_part, dim_t ij, dim_t b, obj_t* obj, obj_t* sub_obj ) { bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); } dim_t bli_packm_offset_to_panel_for( dim_t offmn, obj_t* p ) { dim_t panel_off; if ( bli_obj_pack_schema( p ) == BLIS_PACKED_ROWS ) { // For the "packed rows" schema, a single row is effectively one // row panel, and so we use the row offset as the panel offset. // Then we multiply this offset by the effective panel stride // (ie: the row stride) to arrive at the desired offset. panel_off = offmn * bli_obj_row_stride( p ); } else if ( bli_obj_pack_schema( p ) == BLIS_PACKED_COLUMNS ) { // For the "packed columns" schema, a single column is effectively one // column panel, and so we use the column offset as the panel offset. // Then we multiply this offset by the effective panel stride // (ie: the column stride) to arrive at the desired offset. panel_off = offmn * bli_obj_col_stride( p ); } else if ( bli_obj_pack_schema( p ) == BLIS_PACKED_ROW_PANELS ) { // For the "packed row panels" schema, the column stride is equal to // the panel dimension (length). So we can divide it into offmn // (interpreted as a row offset) to arrive at a panel offset. Then // we multiply this offset by the panel stride to arrive at the total // offset to the panel (in units of elements). panel_off = offmn / bli_obj_col_stride( p ); panel_off = panel_off * bli_obj_panel_stride( p ); // Sanity check. if ( offmn % bli_obj_col_stride( p ) > 0 ) bli_abort(); } else if ( bli_obj_pack_schema( p ) == BLIS_PACKED_COL_PANELS ) { // For the "packed column panels" schema, the row stride is equal to // the panel dimension (width). So we can divide it into offmn // (interpreted as a column offset) to arrive at a panel offset. Then // we multiply this offset by the panel stride to arrive at the total // offset to the panel (in units of elements). panel_off = offmn / bli_obj_row_stride( p ); panel_off = panel_off * bli_obj_panel_stride( p ); // Sanity check. if ( offmn % bli_obj_row_stride( p ) > 0 ) bli_abort(); } else { panel_off = 0; bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); } return panel_off; } blis-0.6.1/frame/1m/packm/bli_packm_part.h000066400000000000000000000050561360743507500203110ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // -- Matrix partitioning ------------------------------------------------------ void bli_packm_acquire_mpart_t2b( subpart_t requested_part, dim_t i, dim_t b, obj_t* obj, obj_t* sub_obj ); void bli_packm_acquire_mpart_l2r( subpart_t requested_part, dim_t j, dim_t b, obj_t* obj, obj_t* sub_obj ); void bli_packm_acquire_mpart_tl2br( subpart_t requested_part, dim_t ij, dim_t b, obj_t* obj, obj_t* sub_obj ); dim_t bli_packm_offset_to_panel_for( dim_t offmn, obj_t* p ); blis-0.6.1/frame/1m/packm/bli_packm_struc_cxk.c000066400000000000000000000424541360743507500213460ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, varname, kername ) \ \ void PASTEMAC(ch,varname) \ ( \ struc_t strucc, \ doff_t diagoffc, \ diag_t diagc, \ uplo_t uploc, \ conj_t conjc, \ pack_t schema, \ bool_t invdiag, \ dim_t m_panel, \ dim_t n_panel, \ dim_t m_panel_max, \ dim_t n_panel_max, \ ctype* restrict kappa, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ ctype* restrict p, inc_t rs_p, inc_t cs_p, \ inc_t is_p, \ cntx_t* cntx \ ) \ { \ dim_t panel_dim; \ dim_t panel_dim_max; \ dim_t panel_len; \ dim_t panel_len_max; \ inc_t incc, ldc; \ inc_t ldp; \ \ \ /* Determine the dimensions and relative strides of the micro-panel based on its pack schema. */ \ if ( bli_is_col_packed( schema ) ) \ { \ /* Prepare to pack to row-stored column panel. */ \ panel_dim = n_panel; \ panel_dim_max = n_panel_max; \ panel_len = m_panel; \ panel_len_max = m_panel_max; \ incc = cs_c; \ ldc = rs_c; \ ldp = rs_p; \ } \ else /* if ( bli_is_row_packed( schema ) ) */ \ { \ /* Prepare to pack to column-stored row panel. */ \ panel_dim = m_panel; \ panel_dim_max = m_panel_max; \ panel_len = n_panel; \ panel_len_max = n_panel_max; \ incc = rs_c; \ ldc = cs_c; \ ldp = cs_p; \ } \ \ \ /* Handle micro-panel packing based on the structure of the matrix being packed. */ \ if ( bli_is_general( strucc ) ) \ { \ /* For micro-panels of general matrices, we can call the pack kernel front-end directly. */ \ PASTEMAC(ch,kername) \ ( \ conjc, \ schema, \ panel_dim, \ panel_dim_max, \ panel_len, \ panel_len_max, \ kappa, \ c, incc, ldc, \ p, ldp, \ cntx \ ); \ } \ else if ( bli_is_herm_or_symm( strucc ) ) \ { \ /* Call a helper function for micro-panels of Hermitian/symmetric matrices. */ \ PASTEMAC(ch,packm_herm_cxk) \ ( \ strucc, \ diagoffc, \ uploc, \ conjc, \ schema, \ m_panel, \ n_panel, \ m_panel_max, \ n_panel_max, \ panel_dim, \ panel_dim_max, \ panel_len, \ panel_len_max, \ kappa, \ c, rs_c, cs_c, \ incc, ldc, \ p, rs_p, cs_p, \ ldp, \ cntx \ ); \ } \ else /* ( bli_is_triangular( strucc ) ) */ \ { \ /* Call a helper function for micro-panels of triangular matrices. */ \ PASTEMAC(ch,packm_tri_cxk) \ ( \ strucc, \ diagoffc, \ diagc, \ uploc, \ conjc, \ schema, \ invdiag, \ m_panel, \ n_panel, \ m_panel_max, \ n_panel_max, \ panel_dim, \ panel_dim_max, \ panel_len, \ panel_len_max, \ kappa, \ c, rs_c, cs_c, \ incc, ldc, \ p, rs_p, cs_p, \ ldp, \ cntx \ ); \ } \ \ \ /* If m_panel < m_panel_max, or n_panel < n_panel_max, we would normally fill the edge region (the bottom m_panel_max - m_panel rows or right- side n_panel_max - n_panel columns) of the micropanel with zeros. However, this responsibility has been moved to the packm microkernel. This change allows experts to use custom kernels that pack to custom packing formats when the problem size is not a nice multiple of the register blocksize. */ \ \ /* if ( m_panel != m_panel_max ) \ { \ ctype* restrict zero = PASTEMAC(ch,0); \ dim_t i = m_panel; \ dim_t m_edge = m_panel_max - i; \ dim_t n_edge = n_panel_max; \ ctype* p_edge = p + (i )*rs_p; \ \ PASTEMAC2(ch,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero, \ p_edge, rs_p, cs_p, \ cntx, \ NULL \ ); \ } \ \ if ( n_panel != n_panel_max ) \ { \ ctype* restrict zero = PASTEMAC(ch,0); \ dim_t j = n_panel; \ dim_t m_edge = m_panel_max; \ dim_t n_edge = n_panel_max - j; \ ctype* p_edge = p + (j )*cs_p; \ \ PASTEMAC2(ch,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero, \ p_edge, rs_p, cs_p, \ cntx, \ NULL \ ); \ } \ */ \ \ \ if ( bli_is_triangular( strucc ) ) \ { \ /* If this panel is an edge case in both panel dimension and length, then it must be a bottom-right corner case. Set the part of the diagonal that extends into the zero-padded region to identity. NOTE: This is actually only necessary when packing for trsm, as it helps prevent NaNs and Infs from creeping into the computation. However, we set the region to identity for trmm as well. Those 1.0's end up getting muliplied by the 0.0's in the zero-padded region of the other matrix, so there is no harm in this. */ \ if ( m_panel != m_panel_max && \ n_panel != n_panel_max ) \ { \ ctype* restrict one = PASTEMAC(ch,1); \ dim_t i = m_panel; \ dim_t j = n_panel; \ dim_t m_br = m_panel_max - i; \ dim_t n_br = n_panel_max - j; \ ctype* p_br = p + (i )*rs_p + (j )*cs_p; \ \ PASTEMAC2(ch,setd,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ m_br, \ n_br, \ one, \ p_br, rs_p, cs_p, \ cntx, \ NULL \ ); \ } \ } \ \ \ /* if ( bli_is_col_packed( schema ) ) \ PASTEMAC(ch,fprintm)( stdout, "packm_struc_cxk: bp copied", m_panel_max, n_panel_max, \ p, rs_p, cs_p, "%4.1f", "" ); \ else if ( bli_is_row_packed( schema ) ) \ PASTEMAC(ch,fprintm)( stdout, "packm_struc_cxk: ap copied", m_panel_max, n_panel_max, \ p, rs_p, cs_p, "%4.1f", "" ); \ */ \ } INSERT_GENTFUNC_BASIC( packm_struc_cxk, packm_cxk ) #undef GENTFUNC #define GENTFUNC( ctype, ch, varname, kername ) \ \ void PASTEMAC(ch,varname) \ ( \ struc_t strucc, \ doff_t diagoffc, \ uplo_t uploc, \ conj_t conjc, \ pack_t schema, \ dim_t m_panel, \ dim_t n_panel, \ dim_t m_panel_max, \ dim_t n_panel_max, \ dim_t panel_dim, \ dim_t panel_dim_max, \ dim_t panel_len, \ dim_t panel_len_max, \ ctype* restrict kappa, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ inc_t incc, inc_t ldc, \ ctype* restrict p, inc_t rs_p, inc_t cs_p, \ inc_t ldp, \ cntx_t* cntx \ ) \ { \ doff_t diagoffc_abs; \ dim_t i, j; \ bool_t row_stored; \ bool_t col_stored; \ \ \ /* Create flags to incidate row or column storage. Note that the schema bit that encodes row or column is describing the form of micro-panel, not the storage in the micro-panel. Hence the mismatch in "row" and "column" semantics. */ \ row_stored = bli_is_col_packed( schema ); \ col_stored = bli_is_row_packed( schema ); \ \ /* Handle the case where the micro-panel does NOT intersect the diagonal separately from the case where it does intersect. */ \ if ( !bli_intersects_diag_n( diagoffc, m_panel, n_panel ) ) \ { \ /* If the current panel is unstored, we need to make a few adjustments so we refer to the data where it is actually stored, also taking conjugation into account. (Note this implicitly assumes we are operating on a dense panel within a larger symmetric or Hermitian matrix, since a general matrix would not contain any unstored region.) */ \ if ( bli_is_unstored_subpart_n( diagoffc, uploc, m_panel, n_panel ) ) \ { \ c = c + diagoffc * ( doff_t )cs_c + \ -diagoffc * ( doff_t )rs_c; \ bli_swap_incs( &incc, &ldc ); \ \ if ( bli_is_hermitian( strucc ) ) \ bli_toggle_conj( &conjc ); \ } \ \ /* Pack the full panel. */ \ PASTEMAC(ch,kername) \ ( \ conjc, \ schema, \ panel_dim, \ panel_dim_max, \ panel_len, \ panel_len_max, \ kappa, \ c, incc, ldc, \ p, ldp, \ cntx \ ); \ } \ else /* if ( bli_intersects_diag_n( diagoffc, m_panel, n_panel ) ) */ \ { \ ctype* restrict c10; \ ctype* restrict p10; \ dim_t p10_dim, p10_len; \ inc_t incc10, ldc10; \ doff_t diagoffc10; \ conj_t conjc10; \ \ ctype* restrict c12; \ ctype* restrict p12; \ dim_t p12_dim, p12_len; \ inc_t incc12, ldc12; \ doff_t diagoffc12; \ conj_t conjc12; \ \ /* Sanity check. Diagonals should not intersect the short end of a micro-panel. If they do, then somehow the constraints on cache blocksizes being a whole multiple of the register blocksizes was somehow violated. */ \ if ( ( col_stored && diagoffc < 0 ) || \ ( row_stored && diagoffc > 0 ) ) \ bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); \ \ diagoffc_abs = bli_abs( diagoffc ); \ \ if ( ( row_stored && bli_is_upper( uploc ) ) || \ ( col_stored && bli_is_lower( uploc ) ) ) \ { \ p10_dim = panel_dim; \ p10_len = diagoffc_abs; \ p10 = p; \ c10 = c; \ incc10 = incc; \ ldc10 = ldc; \ conjc10 = conjc; \ \ p12_dim = panel_dim; \ p12_len = panel_len - p10_len; \ j = p10_len; \ diagoffc12 = diagoffc_abs - j; \ p12 = p + (j )*ldp; \ c12 = c + (j )*ldc; \ c12 = c12 + diagoffc12 * ( doff_t )cs_c + \ -diagoffc12 * ( doff_t )rs_c; \ incc12 = ldc; \ ldc12 = incc; \ conjc12 = conjc; \ \ if ( bli_is_hermitian( strucc ) ) \ bli_toggle_conj( &conjc12 ); \ } \ else /* if ( ( row_stored && bli_is_lower( uploc ) ) || \ ( col_stored && bli_is_upper( uploc ) ) ) */ \ { \ p10_dim = panel_dim; \ p10_len = diagoffc_abs + panel_dim; \ diagoffc10 = diagoffc; \ p10 = p; \ c10 = c; \ c10 = c10 + diagoffc10 * ( doff_t )cs_c + \ -diagoffc10 * ( doff_t )rs_c; \ incc10 = ldc; \ ldc10 = incc; \ conjc10 = conjc; \ \ p12_dim = panel_dim; \ p12_len = panel_len - p10_len; \ j = p10_len; \ p12 = p + (j )*ldp; \ c12 = c + (j )*ldc; \ incc12 = incc; \ ldc12 = ldc; \ conjc12 = conjc; \ \ if ( bli_is_hermitian( strucc ) ) \ bli_toggle_conj( &conjc10 ); \ } \ \ /* Pack to p10. For upper storage, this includes the unstored triangle of c11. */ \ /* NOTE: Since we're only packing partial panels here, we pass in p1x_len as panel_len_max; otherwise, the packm kernel will zero- fill the columns up to panel_len_max, which is not what we need or want to happen. */ \ PASTEMAC(ch,kername) \ ( \ conjc10, \ schema, \ p10_dim, \ panel_dim_max, \ p10_len, \ p10_len, \ kappa, \ c10, incc10, ldc10, \ p10, ldp, \ cntx \ ); \ \ /* Pack to p12. For lower storage, this includes the unstored triangle of c11. */ \ /* NOTE: Since we're only packing partial panels here, we pass in p1x_len as panel_len_max; otherwise, the packm kernel will zero- fill the columns up to panel_len_max, which is not what we need or want to happen. */ \ PASTEMAC(ch,kername) \ ( \ conjc12, \ schema, \ p12_dim, \ panel_dim_max, \ p12_len, \ p12_len, \ kappa, \ c12, incc12, ldc12, \ p12, ldp, \ cntx \ ); \ \ /* Pack the stored triangle of c11 to p11. */ \ { \ dim_t p11_m = panel_dim; \ dim_t p11_n = panel_dim; \ dim_t j2 = diagoffc_abs; \ ctype* restrict c11 = c + (j2 )*ldc; \ ctype* restrict p11 = p + (j2 )*ldp; \ trans_t transc = ( trans_t )conjc; \ \ PASTEMAC2(ch,copym,BLIS_TAPI_EX_SUF) \ ( \ 0, \ BLIS_NONUNIT_DIAG, \ uploc, \ transc, \ p11_m, \ p11_n, \ c11, rs_c, cs_c, \ p11, rs_p, cs_p, \ cntx, \ NULL \ ); \ \ /* If source matrix c is Hermitian, we have to zero out the imaginary components of the diagonal of p11 in case the corresponding elements in c11 were not already zero. */ \ if ( bli_is_hermitian( strucc ) ) \ { \ ctype* restrict pi11 = p11; \ \ for ( i = 0; i < p11_m; ++i ) \ { \ PASTEMAC(ch,seti0s)( *pi11 ); \ \ pi11 += rs_p + cs_p; \ } \ } \ \ /* Now that the diagonal has been made explicitly Hermitian (if applicable), we can now safely scale the stored triangle specified by uploc. */ \ PASTEMAC2(ch,scalm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ uploc, \ p11_m, \ p11_n, \ kappa, \ p11, rs_p, cs_p, \ cntx, \ NULL \ ); \ } \ } \ } INSERT_GENTFUNC_BASIC( packm_herm_cxk, packm_cxk ) #undef GENTFUNC #define GENTFUNC( ctype, ch, varname, kername ) \ \ void PASTEMAC(ch,varname) \ ( \ struc_t strucc, \ doff_t diagoffp, \ diag_t diagc, \ uplo_t uploc, \ conj_t conjc, \ pack_t schema, \ bool_t invdiag, \ dim_t m_panel, \ dim_t n_panel, \ dim_t m_panel_max, \ dim_t n_panel_max, \ dim_t panel_dim, \ dim_t panel_dim_max, \ dim_t panel_len, \ dim_t panel_len_max, \ ctype* restrict kappa, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ inc_t incc, inc_t ldc, \ ctype* restrict p, inc_t rs_p, inc_t cs_p, \ inc_t ldp, \ cntx_t* cntx \ ) \ { \ /* Pack the panel. */ \ PASTEMAC(ch,kername) \ ( \ conjc, \ schema, \ panel_dim, \ panel_dim_max, \ panel_len, \ panel_len_max, \ kappa, \ c, incc, ldc, \ p, ldp, \ cntx \ ); \ \ \ /* If the diagonal of c is implicitly unit, explicitly set the the diagonal of the packed panel to kappa. */ \ if ( bli_is_unit_diag( diagc ) ) \ { \ PASTEMAC2(ch,setd,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ diagoffp, \ m_panel, \ n_panel, \ kappa, \ p, rs_p, cs_p, \ cntx, \ NULL \ ); \ } \ \ /* If requested, invert the diagonal of the packed panel. */ \ if ( invdiag == TRUE ) \ { \ PASTEMAC2(ch,invertd,BLIS_TAPI_EX_SUF) \ ( \ diagoffp, \ m_panel, \ n_panel, \ p, rs_p, cs_p, \ cntx, \ NULL \ ); \ } \ \ /* Set the region opposite the diagonal of p to zero. To do this, we need to reference the "unstored" region on the other side of the diagonal. This amounts to toggling uploc and then shifting the diagonal offset to shrink the newly referenced region (by one diagonal). Note that this zero-filling is not needed for trsm, since the unstored region is not referenced by the trsm micro-kernel; however, zero-filling is needed for trmm, which uses the gemm micro-kernel.*/ \ { \ ctype* restrict zero = PASTEMAC(ch,0); \ uplo_t uplop = uploc; \ \ bli_toggle_uplo( &uplop ); \ bli_shift_diag_offset_to_shrink_uplo( uplop, &diagoffp ); \ \ PASTEMAC2(ch,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ diagoffp, \ BLIS_NONUNIT_DIAG, \ uplop, \ m_panel, \ n_panel, \ zero, \ p, rs_p, cs_p, \ cntx, \ NULL \ ); \ } \ \ } INSERT_GENTFUNC_BASIC( packm_tri_cxk, packm_cxk ) blis-0.6.1/frame/1m/packm/bli_packm_struc_cxk.h000066400000000000000000000102051360743507500213400ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #undef GENTPROT #define GENTPROT( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ struc_t strucc, \ doff_t diagoffp, \ diag_t diagc, \ uplo_t uploc, \ conj_t conjc, \ pack_t schema, \ bool_t invdiag, \ dim_t m_panel, \ dim_t n_panel, \ dim_t m_panel_max, \ dim_t n_panel_max, \ ctype* restrict kappa, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ ctype* restrict p, inc_t rs_p, inc_t cs_p, \ inc_t is_p, \ cntx_t* cntx \ ); INSERT_GENTPROT_BASIC0( packm_struc_cxk ) #undef GENTPROT #define GENTPROT( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ struc_t strucc, \ doff_t diagoffc, \ uplo_t uploc, \ conj_t conjc, \ pack_t schema, \ dim_t m_panel, \ dim_t n_panel, \ dim_t m_panel_max, \ dim_t n_panel_max, \ dim_t panel_dim, \ dim_t panel_dim_max, \ dim_t panel_len, \ dim_t panel_len_max, \ ctype* restrict kappa, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ inc_t incc, inc_t ldc, \ ctype* restrict p, inc_t rs_p, inc_t cs_p, \ inc_t ldp, \ cntx_t* cntx \ ); INSERT_GENTPROT_BASIC0( packm_herm_cxk ) #undef GENTPROT #define GENTPROT( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ struc_t strucc, \ doff_t diagoffc, \ diag_t diagc, \ uplo_t uploc, \ conj_t conjc, \ pack_t schema, \ bool_t invdiag, \ dim_t m_panel, \ dim_t n_panel, \ dim_t m_panel_max, \ dim_t n_panel_max, \ dim_t panel_dim, \ dim_t panel_dim_max, \ dim_t panel_len, \ dim_t panel_len_max, \ ctype* restrict kappa, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ inc_t incc, inc_t ldc, \ ctype* restrict p, inc_t rs_p, inc_t cs_p, \ inc_t ldp, \ cntx_t* cntx \ ); INSERT_GENTPROT_BASIC0( packm_tri_cxk ) blis-0.6.1/frame/1m/packm/bli_packm_struc_cxk_1er.c000066400000000000000000000440341360743507500221110ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, varname, kername ) \ \ void PASTEMAC(ch,varname) \ ( \ struc_t strucc, \ doff_t diagoffc, \ diag_t diagc, \ uplo_t uploc, \ conj_t conjc, \ pack_t schema, \ bool_t invdiag, \ dim_t m_panel, \ dim_t n_panel, \ dim_t m_panel_max, \ dim_t n_panel_max, \ ctype* restrict kappa, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ ctype* restrict p, inc_t rs_p, inc_t cs_p, \ inc_t is_p, \ cntx_t* cntx \ ) \ { \ dim_t panel_dim; \ dim_t panel_dim_max; \ dim_t panel_len; \ dim_t panel_len_max; \ inc_t incc, ldc; \ inc_t ldp; \ \ \ /* Determine the dimensions and relative strides of the micro-panel based on its pack schema. */ \ if ( bli_is_col_packed( schema ) ) \ { \ /* Prepare to pack to row-stored column panel. */ \ panel_dim = n_panel; \ panel_dim_max = n_panel_max; \ panel_len = m_panel; \ panel_len_max = m_panel_max; \ incc = cs_c; \ ldc = rs_c; \ ldp = rs_p; \ } \ else /* if ( bli_is_row_packed( schema ) ) */ \ { \ /* Prepare to pack to column-stored row panel. */ \ panel_dim = m_panel; \ panel_dim_max = m_panel_max; \ panel_len = n_panel; \ panel_len_max = n_panel_max; \ incc = rs_c; \ ldc = cs_c; \ ldp = cs_p; \ } \ \ \ /* Handle micro-panel packing based on the structure of the matrix being packed. */ \ if ( bli_is_general( strucc ) ) \ { \ /* For micro-panels of general matrices, we can call the pack kernel front-end directly. */ \ PASTEMAC(ch,kername) \ ( \ conjc, \ schema, \ panel_dim, \ panel_dim_max, \ panel_len, \ panel_len_max, \ kappa, \ c, incc, ldc, \ p, ldp, \ cntx \ ); \ } \ else if ( bli_is_herm_or_symm( strucc ) ) \ { \ /* Call a helper function for micro-panels of Hermitian/symmetric matrices. */ \ PASTEMAC(ch,packm_herm_cxk_1er) \ ( \ strucc, \ diagoffc, \ uploc, \ conjc, \ schema, \ m_panel, \ n_panel, \ m_panel_max, \ n_panel_max, \ panel_dim, \ panel_dim_max, \ panel_len, \ panel_len_max, \ kappa, \ c, rs_c, cs_c, \ incc, ldc, \ p, rs_p, cs_p, \ ldp, \ cntx \ ); \ } \ else /* ( bli_is_triangular( strucc ) ) */ \ { \ /* Call a helper function for micro-panels of triangular matrices. */ \ PASTEMAC(ch,packm_tri_cxk_1er) \ ( \ strucc, \ diagoffc, \ diagc, \ uploc, \ conjc, \ schema, \ invdiag, \ m_panel, \ n_panel, \ m_panel_max, \ n_panel_max, \ panel_dim, \ panel_dim_max, \ panel_len, \ panel_len_max, \ kappa, \ c, rs_c, cs_c, \ incc, ldc, \ p, rs_p, cs_p, \ ldp, \ cntx \ ); \ } \ \ \ /* If m_panel < m_panel_max, or n_panel < n_panel_max, we would normally fill the edge region (the bottom m_panel_max - m_panel rows or right- side n_panel_max - n_panel columns) of the micropanel with zeros. However, this responsibility has been moved to the packm microkernel. This change allows experts to use custom kernels that pack to custom packing formats when the problem size is not a nice multiple of the register blocksize. */ \ /* if ( m_panel != m_panel_max ) \ { \ ctype* restrict zero = PASTEMAC(ch,0); \ dim_t offm = m_panel; \ dim_t offn = 0; \ dim_t m_edge = m_panel_max - m_panel; \ dim_t n_edge = n_panel_max; \ \ PASTEMAC(ch,set1ms_mxn) \ ( \ schema, \ offm, \ offn, \ m_edge, \ n_edge, \ zero, \ p, rs_p, cs_p, ldp \ ); \ } \ \ if ( n_panel != n_panel_max ) \ { \ ctype* restrict zero = PASTEMAC(ch,0); \ dim_t offm = 0; \ dim_t offn = n_panel; \ dim_t m_edge = m_panel_max; \ dim_t n_edge = n_panel_max - n_panel; \ \ PASTEMAC(ch,set1ms_mxn) \ ( \ schema, \ offm, \ offn, \ m_edge, \ n_edge, \ zero, \ p, rs_p, cs_p, ldp \ ); \ } \ */ \ \ if ( bli_is_triangular( strucc ) ) \ { \ /* If this micro-panel is an edge case in both panel dimension and length, then it must be a bottom-right corner case, which typically only happens for micro-panels being packed for trsm. (It also happens for trmm if kr > 1.) Here, we set the part of the diagonal that extends into the zero-padded region to identity. This prevents NaNs and Infs from creeping into the computation. If this code does execute for trmm, it is okay, because those 1.0's that extend into the bottom-right region end up getting muliplied by the 0.0's in the zero-padded region of the other matrix. */ \ if ( m_panel != m_panel_max && \ n_panel != n_panel_max ) \ { \ ctype* restrict one = PASTEMAC(ch,1); \ dim_t offm = m_panel; \ dim_t offn = n_panel; \ dim_t m_edge = m_panel_max - m_panel; \ dim_t n_edge = n_panel_max - n_panel; \ \ PASTEMAC(ch,set1ms_mxn_diag) \ ( \ schema, \ offm, \ offn, \ m_edge, \ n_edge, \ one, \ p, rs_p, cs_p, ldp \ ); \ } \ } \ \ \ /* if ( bli_is_1r_packed( schema ) ) { \ PASTEMAC(chr,fprintm)( stdout, "packm_struc_cxk_1er (1r): bp", m_panel_max, 2*n_panel_max, \ ( ctype_r* )p, rs_p, cs_p, "%4.1f", "" ); \ } \ \ if ( bli_is_1e_packed( schema ) ) { \ PASTEMAC(chr,fprintm)( stdout, "packm_struc_cxk_1er (1e): ap", 2*m_panel_max, 2*n_panel_max, \ ( ctype_r* )p, rs_p, cs_p, "%4.1f", "" ); \ } \ */ \ } INSERT_GENTFUNCCO_BASIC( packm_struc_cxk_1er, packm_cxk_1er ) #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, varname, kername ) \ \ void PASTEMAC(ch,varname) \ ( \ struc_t strucc, \ doff_t diagoffc, \ uplo_t uploc, \ conj_t conjc, \ pack_t schema, \ dim_t m_panel, \ dim_t n_panel, \ dim_t m_panel_max, \ dim_t n_panel_max, \ dim_t panel_dim, \ dim_t panel_dim_max, \ dim_t panel_len, \ dim_t panel_len_max, \ ctype* restrict kappa, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ inc_t incc, inc_t ldc, \ ctype* restrict p, inc_t rs_p, inc_t cs_p, \ inc_t ldp, \ cntx_t* cntx \ ) \ { \ doff_t diagoffc_abs; \ dim_t j; \ bool_t row_stored; \ bool_t col_stored; \ \ \ /* Create flags to incidate row or column storage. Note that the schema bit that encodes row or column is describing the form of micro-panel, not the storage in the micro-panel. Hence the mismatch in "row" and "column" semantics. */ \ row_stored = bli_is_col_packed( schema ); \ col_stored = bli_is_row_packed( schema ); \ \ /* Handle the case where the micro-panel does NOT intersect the diagonal separately from the case where it does intersect. */ \ if ( !bli_intersects_diag_n( diagoffc, m_panel, n_panel ) ) \ { \ /* If the current panel is unstored, we need to make a few adjustments so we refer to the data where it is actually stored, also taking conjugation into account. (Note this implicitly assumes we are operating on a dense panel within a larger symmetric or Hermitian matrix, since a general matrix would not contain any unstored region.) */ \ if ( bli_is_unstored_subpart_n( diagoffc, uploc, m_panel, n_panel ) ) \ { \ c = c + diagoffc * ( doff_t )cs_c + \ -diagoffc * ( doff_t )rs_c; \ bli_swap_incs( &incc, &ldc ); \ \ if ( bli_is_hermitian( strucc ) ) \ bli_toggle_conj( &conjc ); \ } \ \ /* Pack the full panel. */ \ PASTEMAC(ch,kername) \ ( \ conjc, \ schema, \ panel_dim, \ panel_dim_max, \ panel_len, \ panel_len_max, \ kappa, \ c, incc, ldc, \ p, ldp, \ cntx \ ); \ } \ else /* if ( bli_intersects_diag_n( diagoffc, m_panel, n_panel ) ) */ \ { \ ctype* restrict c10; \ ctype* restrict p10; \ dim_t p10_dim, p10_len; \ inc_t incc10, ldc10; \ doff_t diagoffc10; \ conj_t conjc10; \ \ ctype* restrict c12; \ ctype* restrict p12; \ dim_t p12_dim, p12_len; \ inc_t incc12, ldc12; \ doff_t diagoffc12; \ conj_t conjc12; \ \ \ /* Sanity check. Diagonals should not intersect the short end of a micro-panel. If they do, then somehow the constraints on cache blocksizes being a whole multiple of the register blocksizes was somehow violated. */ \ if ( ( col_stored && diagoffc < 0 ) || \ ( row_stored && diagoffc > 0 ) ) \ bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); \ \ diagoffc_abs = bli_abs( diagoffc ); \ \ if ( ( row_stored && bli_is_upper( uploc ) ) || \ ( col_stored && bli_is_lower( uploc ) ) ) \ { \ p10_dim = panel_dim; \ p10_len = diagoffc_abs; \ p10 = p; \ c10 = c; \ incc10 = incc; \ ldc10 = ldc; \ conjc10 = conjc; \ \ p12_dim = panel_dim; \ p12_len = panel_len - p10_len; \ j = p10_len; \ diagoffc12 = diagoffc_abs - j; \ p12 = p + (j )*ldp; \ c12 = c + (j )*ldc; \ c12 = c12 + diagoffc12 * ( doff_t )cs_c + \ -diagoffc12 * ( doff_t )rs_c; \ incc12 = ldc; \ ldc12 = incc; \ conjc12 = conjc; \ \ if ( bli_is_hermitian( strucc ) ) \ bli_toggle_conj( &conjc12 ); \ } \ else /* if ( ( row_stored && bli_is_lower( uploc ) ) || \ ( col_stored && bli_is_upper( uploc ) ) ) */ \ { \ p10_dim = panel_dim; \ p10_len = diagoffc_abs + panel_dim; \ diagoffc10 = diagoffc; \ p10 = p; \ c10 = c; \ c10 = c10 + diagoffc10 * ( doff_t )cs_c + \ -diagoffc10 * ( doff_t )rs_c; \ incc10 = ldc; \ ldc10 = incc; \ conjc10 = conjc; \ \ p12_dim = panel_dim; \ p12_len = panel_len - p10_len; \ j = p10_len; \ p12 = p + (j )*ldp; \ c12 = c + (j )*ldc; \ incc12 = incc; \ ldc12 = ldc; \ conjc12 = conjc; \ \ if ( bli_is_hermitian( strucc ) ) \ bli_toggle_conj( &conjc10 ); \ } \ \ /* Pack to p10. For upper storage, this includes the unstored triangle of c11. */ \ /* NOTE: Since we're only packing partial panels here, we pass in p1x_len as panel_len_max; otherwise, the packm kernel will zero- fill the columns up to panel_len_max, which is not what we need or want to happen. */ \ PASTEMAC(ch,kername) \ ( \ conjc10, \ schema, \ p10_dim, \ panel_dim_max, \ p10_len, \ p10_len, \ kappa, \ c10, incc10, ldc10, \ p10, ldp, \ cntx \ ); \ \ /* Pack to p12. For lower storage, this includes the unstored triangle of c11. */ \ /* NOTE: Since we're only packing partial panels here, we pass in p1x_len as panel_len_max; otherwise, the packm kernel will zero- fill the columns up to panel_len_max, which is not what we need or want to happen. */ \ PASTEMAC(ch,kername) \ ( \ conjc12, \ schema, \ p12_dim, \ panel_dim_max, \ p12_len, \ p12_len, \ kappa, \ c12, incc12, ldc12, \ p12, ldp, \ cntx \ ); \ \ /* Pack the stored triangle of c11 to p11. */ \ { \ dim_t j = diagoffc_abs; \ ctype* restrict c11 = c + (j )*ldc; \ ctype* restrict p11 = p + (j )*ldp; \ \ PASTEMAC(ch,scal21ms_mxn_uplo) \ ( \ schema, \ uploc, \ conjc, \ panel_dim, \ kappa, \ c11, rs_c, cs_c, \ p11, rs_p, cs_p, ldp \ ); \ \ /* If we are packing a micro-panel with Hermitian structure, we must take special care of the diagonal. Now, if kappa were guaranteed to be unit, all we would need to do is explicitly zero out the imaginary part of the diagonal of p11, in case the diagonal of the source matrix contained garbage (non-zero) imaginary values. HOWEVER, since kappa can be non-unit, things become a little more complicated. In general, we must re-apply the kappa scalar to ONLY the real part of the diagonal of the source matrix and save the result to the diagonal of p11. */ \ if ( bli_is_hermitian( strucc ) ) \ { \ ctype_r* restrict c11_r = ( ctype_r* )c11; \ const dim_t rs_c2 = 2*rs_c; \ const dim_t cs_c2 = 2*cs_c; \ \ PASTEMAC3(ch,chr,ch,scal21ms_mxn_diag) \ ( \ schema, \ panel_dim, \ panel_dim, \ kappa, \ c11_r, rs_c2, cs_c2, \ p11, rs_p, cs_p, ldp \ ); \ } \ } \ } \ } INSERT_GENTFUNCCO_BASIC( packm_herm_cxk_1er, packm_cxk_1er ) #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, varname, kername ) \ \ void PASTEMAC(ch,varname) \ ( \ struc_t strucc, \ doff_t diagoffp, \ diag_t diagc, \ uplo_t uploc, \ conj_t conjc, \ pack_t schema, \ bool_t invdiag, \ dim_t m_panel, \ dim_t n_panel, \ dim_t m_panel_max, \ dim_t n_panel_max, \ dim_t panel_dim, \ dim_t panel_dim_max, \ dim_t panel_len, \ dim_t panel_len_max, \ ctype* restrict kappa, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ inc_t incc, inc_t ldc, \ ctype* restrict p, inc_t rs_p, inc_t cs_p, \ inc_t ldp, \ cntx_t* cntx \ ) \ { \ doff_t diagoffp_abs = bli_abs( diagoffp ); \ ctype* p11 = p + (diagoffp_abs )*ldp; \ \ \ /* Pack the panel. */ \ PASTEMAC(ch,kername) \ ( \ conjc, \ schema, \ panel_dim, \ panel_dim_max, \ panel_len, \ panel_len_max, \ kappa, \ c, incc, ldc, \ p, ldp, \ cntx \ ); \ \ \ /* Tweak the panel according to its triangular structure */ \ { \ /* If the diagonal of c is implicitly unit, explicitly set the the diagonal of the packed panel to kappa. */ \ if ( bli_is_unit_diag( diagc ) ) \ { \ PASTEMAC(ch,set1ms_mxn_diag) \ ( \ schema, \ 0, \ 0, \ panel_dim, \ panel_dim, \ kappa, \ p11, rs_p, cs_p, ldp \ ); \ } \ \ \ /* If requested, invert the diagonal of the packed panel. */ \ if ( invdiag == TRUE ) \ { \ PASTEMAC(ch,invert1ms_mxn_diag) \ ( \ schema, \ 0, \ 0, \ panel_dim, \ panel_dim, \ p11, rs_p, cs_p, ldp \ ); \ } \ \ \ /* Set the region opposite the diagonal of p to zero. To do this, we need to reference the "unstored" region on the other side of the diagonal. This amounts to toggling uploc and then shifting the diagonal offset to shrink the newly referenced region (by one diagonal). Note that this zero-filling is not needed for trsm, since the unstored region is not referenced by the trsm micro-kernel; however, zero-filling is needed for trmm, which uses the gemm micro-kernel.*/ \ { \ ctype* restrict zero = PASTEMAC(ch,0); \ uplo_t uplop = uploc; \ doff_t diagoffp11_0 = 0; \ dim_t p11_0_dim = panel_dim - 1; \ \ bli_toggle_uplo( &uplop ); \ bli_shift_diag_offset_to_shrink_uplo( uplop, &diagoffp11_0 ); \ \ /* Note that this macro works a little differently than the setm operation. Here, we pass in the dimensions of only p11, rather than the whole micro-panel, and furthermore we pass in the "shrunken" dimensions of p11, corresponding to the toggling and shrinking of the diagonal above. The macro will do the right thing, incrementing the pointer to p11 by the appropriate leading dimension (cs_p or rs_p), and setting only the lower or upper triangle to zero. */ \ PASTEMAC(ch,set1ms_mxn_uplo) \ ( \ schema, \ diagoffp11_0, \ uplop, \ p11_0_dim, \ p11_0_dim, \ zero, \ p11, rs_p, cs_p, ldp \ ); \ } \ } \ } INSERT_GENTFUNCCO_BASIC( packm_tri_cxk_1er, packm_cxk_1er ) blis-0.6.1/frame/1m/packm/bli_packm_struc_cxk_1er.h000066400000000000000000000103151360743507500221110ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #undef GENTPROTCO #define GENTPROTCO( ctype, ctype_r, ch, chr, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ struc_t strucc, \ doff_t diagoffp, \ diag_t diagc, \ uplo_t uploc, \ conj_t conjc, \ pack_t schema, \ bool_t invdiag, \ dim_t m_panel, \ dim_t n_panel, \ dim_t m_panel_max, \ dim_t n_panel_max, \ ctype* restrict kappa, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ ctype* restrict p, inc_t rs_p, inc_t cs_p, \ inc_t is_p, \ cntx_t* cntx \ ); INSERT_GENTPROTCO_BASIC0( packm_struc_cxk_1er ) #undef GENTPROTCO #define GENTPROTCO( ctype, ctype_r, ch, chr, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ struc_t strucc, \ doff_t diagoffc, \ uplo_t uploc, \ conj_t conjc, \ pack_t schema, \ dim_t m_panel, \ dim_t n_panel, \ dim_t m_panel_max, \ dim_t n_panel_max, \ dim_t panel_dim, \ dim_t panel_dim_max, \ dim_t panel_len, \ dim_t panel_len_max, \ ctype* restrict kappa, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ inc_t incc, inc_t ldc, \ ctype* restrict p, inc_t rs_p, inc_t cs_p, \ inc_t ldp, \ cntx_t* cntx \ ); INSERT_GENTPROTCO_BASIC0( packm_herm_cxk_1er ) #undef GENTPROTCO #define GENTPROTCO( ctype, ctype_r, ch, chr, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ struc_t strucc, \ doff_t diagoffc, \ diag_t diagc, \ uplo_t uploc, \ conj_t conjc, \ pack_t schema, \ bool_t invdiag, \ dim_t m_panel, \ dim_t n_panel, \ dim_t m_panel_max, \ dim_t n_panel_max, \ dim_t panel_dim, \ dim_t panel_dim_max, \ dim_t panel_len, \ dim_t panel_len_max, \ ctype* restrict kappa, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ inc_t incc, inc_t ldc, \ ctype* restrict p, inc_t rs_p, inc_t cs_p, \ inc_t ldp, \ cntx_t* cntx \ ); INSERT_GENTPROTCO_BASIC0( packm_tri_cxk_1er ) blis-0.6.1/frame/1m/packm/bli_packm_struc_cxk_3mis.c000066400000000000000000000547461360743507500223100ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, varname, kername ) \ \ void PASTEMAC(ch,varname) \ ( \ struc_t strucc, \ doff_t diagoffc, \ diag_t diagc, \ uplo_t uploc, \ conj_t conjc, \ pack_t schema, \ bool_t invdiag, \ dim_t m_panel, \ dim_t n_panel, \ dim_t m_panel_max, \ dim_t n_panel_max, \ ctype* restrict kappa, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ ctype* restrict p, inc_t rs_p, inc_t cs_p, \ inc_t is_p, \ cntx_t* cntx \ ) \ { \ dim_t panel_dim; \ dim_t panel_dim_max; \ dim_t panel_len; \ dim_t panel_len_max; \ inc_t incc, ldc; \ inc_t ldp; \ \ \ /* Determine the dimensions and relative strides of the micro-panel based on its pack schema. */ \ if ( bli_is_col_packed( schema ) ) \ { \ /* Prepare to pack to row-stored column panel. */ \ panel_dim = n_panel; \ panel_dim_max = n_panel_max; \ panel_len = m_panel; \ panel_len_max = m_panel_max; \ incc = cs_c; \ ldc = rs_c; \ ldp = rs_p; \ } \ else /* if ( bli_is_row_packed( schema ) ) */ \ { \ /* Prepare to pack to column-stored row panel. */ \ panel_dim = m_panel; \ panel_dim_max = m_panel_max; \ panel_len = n_panel; \ panel_len_max = n_panel_max; \ incc = rs_c; \ ldc = cs_c; \ ldp = cs_p; \ } \ \ \ /* Handle micro-panel packing based on the structure of the matrix being packed. */ \ if ( bli_is_general( strucc ) ) \ { \ /* For micro-panels of general matrices, we can call the pack kernel front-end directly. */ \ PASTEMAC(ch,kername) \ ( \ conjc, \ panel_dim, \ panel_dim_max, \ panel_len, \ panel_len_max, \ kappa, \ c, incc, ldc, \ p, is_p, ldp, \ cntx \ ); \ } \ else if ( bli_is_herm_or_symm( strucc ) ) \ { \ /* Call a helper function for micro-panels of Hermitian/symmetric matrices. */ \ PASTEMAC(ch,packm_herm_cxk_3mis) \ ( \ strucc, \ diagoffc, \ uploc, \ conjc, \ schema, \ m_panel, \ n_panel, \ m_panel_max, \ n_panel_max, \ panel_dim, \ panel_dim_max, \ panel_len, \ panel_len_max, \ kappa, \ c, rs_c, cs_c, \ incc, ldc, \ p, rs_p, cs_p, \ is_p, ldp, \ cntx \ ); \ } \ else /* ( bli_is_triangular( strucc ) ) */ \ { \ /* Call a helper function for micro-panels of triangular matrices. */ \ PASTEMAC(ch,packm_tri_cxk_3mis) \ ( \ strucc, \ diagoffc, \ diagc, \ uploc, \ conjc, \ schema, \ invdiag, \ m_panel, \ n_panel, \ m_panel_max, \ n_panel_max, \ panel_dim, \ panel_dim_max, \ panel_len, \ panel_len_max, \ kappa, \ c, rs_c, cs_c, \ incc, ldc, \ p, rs_p, cs_p, \ is_p, ldp, \ cntx \ ); \ } \ \ \ /* If m_panel < m_panel_max, or n_panel < n_panel_max, we would normally fill the edge region (the bottom m_panel_max - m_panel rows or right- side n_panel_max - n_panel columns) of the micropanel with zeros. However, this responsibility has been moved to the packm microkernel. This change allows experts to use custom kernels that pack to custom packing formats when the problem size is not a nice multiple of the register blocksize. */ \ /* if ( m_panel != m_panel_max ) \ { \ ctype_r* restrict zero_r = PASTEMAC(chr,0); \ dim_t i = m_panel; \ dim_t m_edge = m_panel_max - i; \ dim_t n_edge = n_panel_max; \ ctype_r* p_edge_r = ( ctype_r* )p + (i )*rs_p; \ ctype_r* p_edge_i = ( ctype_r* )p + is_p + (i )*rs_p; \ ctype_r* p_edge_rpi = ( ctype_r* )p + 2*is_p + (i )*rs_p; \ \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_r, rs_p, cs_p, \ cntx, \ NULL \ ); \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_i, rs_p, cs_p, \ cntx, \ NULL \ ); \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_rpi, rs_p, cs_p, \ cntx, \ NULL \ ); \ } \ */ \ \ /* if ( n_panel != n_panel_max ) \ { \ ctype_r* restrict zero_r = PASTEMAC(chr,0); \ dim_t j = n_panel; \ dim_t m_edge = m_panel_max; \ dim_t n_edge = n_panel_max - j; \ ctype_r* p_edge_r = ( ctype_r* )p + (j )*cs_p; \ ctype_r* p_edge_i = ( ctype_r* )p + is_p + (j )*cs_p; \ ctype_r* p_edge_rpi = ( ctype_r* )p + 2*is_p + (j )*cs_p; \ \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_r, rs_p, cs_p, \ cntx, \ NULL \ ); \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_i, rs_p, cs_p, \ cntx, \ NULL \ ); \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_rpi, rs_p, cs_p, \ cntx, \ NULL \ ); \ } \ */ \ \ \ if ( bli_is_triangular( strucc ) ) \ { \ /* If this panel is an edge case in both panel dimension and length, then it must be a bottom-right corner case. Set the part of the diagonal that extends into the zero-padded region to identity. NOTE: This is actually only necessary when packing for trsm, as it helps prevent NaNs and Infs from creeping into the computation. However, we set the region to identity for trmm as well. Those 1.0's end up getting muliplied by the 0.0's in the zero-padded region of the other matrix, so there is no harm in this. */ \ if ( m_panel != m_panel_max && \ n_panel != n_panel_max ) \ { \ ctype_r* restrict one_r = PASTEMAC(chr,1); \ ctype_r* restrict zero_r = PASTEMAC(chr,0); \ dim_t i = m_panel; \ dim_t j = n_panel; \ dim_t m_br = m_panel_max - i; \ dim_t n_br = n_panel_max - j; \ ctype_r* p_br_r = ( ctype_r* )p + (i )*rs_p + (j )*cs_p; \ ctype_r* p_br_i = ( ctype_r* )p + is_p + (i )*rs_p + (j )*cs_p; \ \ PASTEMAC2(chr,setd,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ m_br, \ n_br, \ one_r, \ p_br_r, rs_p, cs_p, \ cntx, \ NULL \ ); \ PASTEMAC2(chr,setd,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ m_br, \ n_br, \ zero_r, \ p_br_i, rs_p, cs_p, \ cntx, \ NULL \ ); \ } \ } \ } INSERT_GENTFUNCCO_BASIC( packm_struc_cxk_3mis, packm_cxk_3mis ) #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, varname, kername ) \ \ void PASTEMAC(ch,varname) \ ( \ struc_t strucc, \ doff_t diagoffc, \ uplo_t uploc, \ conj_t conjc, \ pack_t schema, \ dim_t m_panel, \ dim_t n_panel, \ dim_t m_panel_max, \ dim_t n_panel_max, \ dim_t panel_dim, \ dim_t panel_dim_max, \ dim_t panel_len, \ dim_t panel_len_max, \ ctype* restrict kappa, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ inc_t incc, inc_t ldc, \ ctype* restrict p, inc_t rs_p, inc_t cs_p, \ inc_t is_p, inc_t ldp, \ cntx_t* cntx \ ) \ { \ doff_t diagoffc_abs; \ dim_t i, j; \ bool_t row_stored; \ bool_t col_stored; \ \ \ /* Create flags to incidate row or column storage. Note that the schema bit that encodes row or column is describing the form of micro-panel, not the storage in the micro-panel. Hence the mismatch in "row" and "column" semantics. */ \ row_stored = bli_is_col_packed( schema ); \ col_stored = bli_is_row_packed( schema ); \ \ \ /* Handle the case where the micro-panel does NOT intersect the diagonal separately from the case where it does intersect. */ \ if ( !bli_intersects_diag_n( diagoffc, m_panel, n_panel ) ) \ { \ /* If the current panel is unstored, we need to make a few adjustments so we refer to the data where it is actually stored, also taking conjugation into account. (Note this implicitly assumes we are operating on a dense panel within a larger symmetric or Hermitian matrix, since a general matrix would not contain any unstored region.) */ \ if ( bli_is_unstored_subpart_n( diagoffc, uploc, m_panel, n_panel ) ) \ { \ c = c + diagoffc * ( doff_t )cs_c + \ -diagoffc * ( doff_t )rs_c; \ bli_swap_incs( &incc, &ldc ); \ \ if ( bli_is_hermitian( strucc ) ) \ bli_toggle_conj( &conjc ); \ } \ \ /* Pack the full panel. */ \ PASTEMAC(ch,kername) \ ( \ conjc, \ panel_dim, \ panel_dim_max, \ panel_len, \ panel_len_max, \ kappa, \ c, incc, ldc, \ p, is_p, ldp, \ cntx \ ); \ } \ else /* if ( bli_intersects_diag_n( diagoffc, m_panel, n_panel ) ) */ \ { \ ctype_r* restrict p_r = ( ctype_r* )p; \ \ ctype_r* restrict one_r = PASTEMAC(chr,1); \ ctype_r* restrict minus_one_r = PASTEMAC(chr,m1); \ \ ctype* restrict c10; \ ctype_r* restrict p10; \ dim_t p10_dim, p10_len; \ inc_t incc10, ldc10; \ doff_t diagoffc10; \ conj_t conjc10; \ \ ctype* restrict c12; \ ctype_r* restrict p12; \ dim_t p12_dim, p12_len; \ inc_t incc12, ldc12; \ doff_t diagoffc12; \ conj_t conjc12; \ \ /* Sanity check. Diagonals should not intersect the short end of a micro-panel. If they do, then somehow the constraints on cache blocksizes being a whole multiple of the register blocksizes was somehow violated. */ \ if ( ( col_stored && diagoffc < 0 ) || \ ( row_stored && diagoffc > 0 ) ) \ bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); \ \ diagoffc_abs = bli_abs( diagoffc ); \ \ if ( ( row_stored && bli_is_upper( uploc ) ) || \ ( col_stored && bli_is_lower( uploc ) ) ) \ { \ p10_dim = panel_dim; \ p10_len = diagoffc_abs; \ p10 = p_r; \ c10 = c; \ incc10 = incc; \ ldc10 = ldc; \ conjc10 = conjc; \ \ p12_dim = panel_dim; \ p12_len = panel_len - p10_len; \ j = p10_len; \ diagoffc12 = diagoffc_abs - j; \ p12 = p_r + (j )*ldp; \ c12 = c + (j )*ldc; \ c12 = c12 + diagoffc12 * ( doff_t )cs_c + \ -diagoffc12 * ( doff_t )rs_c; \ incc12 = ldc; \ ldc12 = incc; \ conjc12 = conjc; \ \ if ( bli_is_hermitian( strucc ) ) \ bli_toggle_conj( &conjc12 ); \ } \ else /* if ( ( row_stored && bli_is_lower( uploc ) ) || \ ( col_stored && bli_is_upper( uploc ) ) ) */ \ { \ p10_dim = panel_dim; \ p10_len = diagoffc_abs + panel_dim; \ diagoffc10 = diagoffc; \ p10 = p_r; \ c10 = c; \ c10 = c10 + diagoffc10 * ( doff_t )cs_c + \ -diagoffc10 * ( doff_t )rs_c; \ incc10 = ldc; \ ldc10 = incc; \ conjc10 = conjc; \ \ p12_dim = panel_dim; \ p12_len = panel_len - p10_len; \ j = p10_len; \ p12 = p_r + (j )*ldp; \ c12 = c + (j )*ldc; \ incc12 = incc; \ ldc12 = ldc; \ conjc12 = conjc; \ \ if ( bli_is_hermitian( strucc ) ) \ bli_toggle_conj( &conjc10 ); \ } \ \ /* Pack to p10. For upper storage, this includes the unstored triangle of c11. */ \ /* NOTE: Since we're only packing partial panels here, we pass in p1x_len as panel_len_max; otherwise, the packm kernel will zero- fill the columns up to panel_len_max, which is not what we need or want to happen. */ \ PASTEMAC(ch,kername) \ ( \ conjc10, \ p10_dim, \ panel_dim_max, \ p10_len, \ p10_len, \ kappa, \ c10, incc10, ldc10, \ ( ctype* )p10, is_p, ldp, \ cntx \ ); \ \ /* Pack to p12. For lower storage, this includes the unstored triangle of c11. */ \ /* NOTE: Since we're only packing partial panels here, we pass in p1x_len as panel_len_max; otherwise, the packm kernel will zero- fill the columns up to panel_len_max, which is not what we need or want to happen. */ \ PASTEMAC(ch,kername) \ ( \ conjc12, \ p12_dim, \ panel_dim_max, \ p12_len, \ p12_len, \ kappa, \ c12, incc12, ldc12, \ ( ctype* )p12, is_p, ldp, \ cntx \ ); \ \ /* Pack the stored triangle of c11 to p11. */ \ { \ dim_t p11_m = panel_dim; \ dim_t p11_n = panel_dim; \ inc_t rs_c11 = 2*rs_c; \ inc_t cs_c11 = 2*cs_c; \ dim_t j2 = diagoffc_abs; \ ctype* c11 = ( ctype* )c + (j2 )*ldc; \ ctype_r* p11 = ( ctype_r* )p_r + (j2 )*ldp; \ ctype_r* c11_r = ( ctype_r* )c11; \ ctype_r* c11_i = ( ctype_r* )c11 + 1; \ ctype_r* p11_r = ( ctype_r* )p11; \ ctype_r* p11_i = ( ctype_r* )p11 + is_p; \ ctype_r* alpha_r = one_r; \ ctype_r* alpha_i = ( bli_is_conj( conjc ) ? minus_one_r : one_r ); \ ctype_r kappa_r = PASTEMAC(ch,real)( *kappa ); \ ctype_r kappa_i = PASTEMAC(ch,imag)( *kappa ); \ \ /* Copy the real part of the stored triangle of c11 to p11_r. */ \ PASTEMAC2(chr,scal2m,BLIS_TAPI_EX_SUF) \ ( \ 0, \ BLIS_NONUNIT_DIAG, \ uploc, \ BLIS_NO_TRANSPOSE, \ p11_m, \ p11_n, \ alpha_r, \ c11_r, rs_c11, cs_c11, \ p11_r, rs_p, cs_p, \ cntx, \ NULL \ ); \ \ /* Copy the imaginary part of the stored triangle of c11 to p11_i, scaling by -1 if conjugation on c was requested. */ \ PASTEMAC2(chr,scal2m,BLIS_TAPI_EX_SUF) \ ( \ 0, \ BLIS_NONUNIT_DIAG, \ uploc, \ BLIS_NO_TRANSPOSE, \ p11_m, \ p11_n, \ alpha_i, \ c11_i, rs_c11, cs_c11, \ p11_i, rs_p, cs_p, \ cntx, \ NULL \ ); \ \ /* If source matrix c is Hermitian, we have to zero out the imaginary components of the diagonal of p11 in case the corresponding elements in c11 were not already zero. */ \ if ( bli_is_hermitian( strucc ) ) \ { \ for ( i = 0; i < p11_m; ++i ) \ { \ ctype_r* pi11_i = p11_i + (i )*rs_p + (i )*cs_p; \ \ PASTEMAC(chr,set0s)( *pi11_i ); \ } \ } \ \ /* Apply kappa to the part of p11 that corresponds to the stored part of c11 that was copied above. */ \ if ( bli_is_upper( uploc ) ) \ { \ PASTEMAC(ch,scalris_mxn_u) \ ( \ 0, \ p11_m, \ p11_n, \ &kappa_r, \ &kappa_i, \ p11_r, \ p11_i, rs_p, cs_p \ ); \ } \ else \ { \ PASTEMAC(ch,scalris_mxn_l) \ ( \ 0, \ p11_m, \ p11_n, \ &kappa_r, \ &kappa_i, \ p11_r, \ p11_i, rs_p, cs_p \ ); \ } \ \ /* Update the p11 section of the ri panel. It simply needs to contain the sum of p11_r + p11_i. */ \ { \ ctype_r* p11_rpi = p11_i + is_p; \ \ for ( j = 0; j < p11_n; ++j ) \ for ( i = 0; i < p11_m; ++i ) \ { \ ctype_r* pi11_r = p11_r + (i )*rs_p + (j )*cs_p; \ ctype_r* pi11_i = p11_i + (i )*rs_p + (j )*cs_p; \ ctype_r* pi11_rpi = p11_rpi + (i )*rs_p + (j )*cs_p; \ \ PASTEMAC(chr,add3s) \ ( \ *pi11_r, \ *pi11_i, \ *pi11_rpi \ ); \ } \ } \ } \ } \ } INSERT_GENTFUNCCO_BASIC( packm_herm_cxk_3mis, packm_cxk_3mis ) #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, varname, kername ) \ \ void PASTEMAC(ch,varname) \ ( \ struc_t strucc, \ doff_t diagoffp, \ diag_t diagc, \ uplo_t uploc, \ conj_t conjc, \ pack_t schema, \ bool_t invdiag, \ dim_t m_panel, \ dim_t n_panel, \ dim_t m_panel_max, \ dim_t n_panel_max, \ dim_t panel_dim, \ dim_t panel_dim_max, \ dim_t panel_len, \ dim_t panel_len_max, \ ctype* restrict kappa, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ inc_t incc, inc_t ldc, \ ctype* restrict p, inc_t rs_p, inc_t cs_p, \ inc_t is_p, inc_t ldp, \ cntx_t* cntx \ ) \ { \ /* Pack the panel. */ \ PASTEMAC(ch,kername) \ ( \ conjc, \ panel_dim, \ panel_dim_max, \ panel_len, \ panel_len_max, \ kappa, \ c, incc, ldc, \ p, is_p, ldp, \ cntx \ ); \ \ \ /* Tweak the panel according to its triangular structure */ \ { \ ctype_r* p_r = ( ctype_r* )p + 0; \ ctype_r* p_i = ( ctype_r* )p + is_p; \ ctype_r* p_rpi = ( ctype_r* )p + 2*is_p; \ \ dim_t j = bli_abs( diagoffp ); \ ctype_r* p11_r = p_r + (j )*ldp; \ ctype_r* p11_i = p_i + (j )*ldp; \ ctype_r* p11_rpi = p_rpi + (j )*ldp; \ \ dim_t p11_m = m_panel; \ dim_t p11_n = n_panel; \ \ dim_t min_p11_m_n; \ \ if ( diagoffp < 0 ) p11_m -= j; \ else if ( diagoffp > 0 ) p11_n -= j; \ \ min_p11_m_n = bli_min( p11_m, p11_n ); \ \ \ /* If the diagonal of c is implicitly unit, explicitly set the the diagonal of the packed panel to kappa. */ \ if ( bli_is_unit_diag( diagc ) ) \ { \ ctype_r kappa_r = PASTEMAC(ch,real)( *kappa ); \ ctype_r kappa_i = PASTEMAC(ch,imag)( *kappa ); \ dim_t i; \ \ PASTEMAC2(chr,setd,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ diagoffp, \ m_panel, \ n_panel, \ &kappa_r, \ p_r, rs_p, cs_p, \ cntx, \ NULL \ ); \ PASTEMAC2(chr,setd,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ diagoffp, \ m_panel, \ n_panel, \ &kappa_i, \ p_i, rs_p, cs_p, \ cntx, \ NULL \ ); \ \ /* Update the diagonal of the p11 section of the rpi panel. It simply needs to contain the sum of diagonals of p11_r and p11_i. */ \ for ( i = 0; i < min_p11_m_n; ++i ) \ { \ ctype_r* pi11_r = p11_r + (i )*rs_p + (i )*cs_p; \ ctype_r* pi11_i = p11_i + (i )*rs_p + (i )*cs_p; \ ctype_r* pi11_rpi = p11_rpi + (i )*rs_p + (i )*cs_p; \ \ PASTEMAC(chr,add3s)( *pi11_r, *pi11_i, *pi11_rpi ); \ } \ } \ \ /* If requested, invert the diagonal of the packed panel. Note that we do not need to update the ri panel since inverted diagonals are only needed by trsm, which does not use the p11 section of the ri panel. */ \ if ( invdiag == TRUE ) \ { \ dim_t i; \ \ for ( i = 0; i < min_p11_m_n; ++i ) \ { \ ctype_r* pi11_r = p11_r + (i )*rs_p + (i )*cs_p; \ ctype_r* pi11_i = p11_i + (i )*rs_p + (i )*cs_p; \ \ PASTEMAC(ch,invertris)( *pi11_r, *pi11_i ); \ } \ } \ \ /* Set the region opposite the diagonal of p to zero. To do this, we need to reference the "unstored" region on the other side of the diagonal. This amounts to toggling uploc and then shifting the diagonal offset to shrink the newly referenced region (by one diagonal). Note that this zero-filling is not needed for trsm, since the unstored region is not referenced by the trsm micro-kernel; however, zero-filling is needed for trmm, which uses the gemm micro-kernel.*/ \ { \ ctype_r* restrict zero_r = PASTEMAC(chr,0); \ uplo_t uplop = uploc; \ \ bli_toggle_uplo( &uplop ); \ bli_shift_diag_offset_to_shrink_uplo( uplop, &diagoffp ); \ \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ diagoffp, \ BLIS_NONUNIT_DIAG, \ uplop, \ m_panel, \ n_panel, \ zero_r, \ p_r, rs_p, cs_p, \ cntx, \ NULL \ ); \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ diagoffp, \ BLIS_NONUNIT_DIAG, \ uplop, \ m_panel, \ n_panel, \ zero_r, \ p_i, rs_p, cs_p, \ cntx, \ NULL \ ); \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ diagoffp, \ BLIS_NONUNIT_DIAG, \ uplop, \ m_panel, \ n_panel, \ zero_r, \ p_rpi, rs_p, cs_p, \ cntx, \ NULL \ ); \ } \ } \ } INSERT_GENTFUNCCO_BASIC( packm_tri_cxk_3mis, packm_cxk_3mis ) blis-0.6.1/frame/1m/packm/bli_packm_struc_cxk_3mis.h000066400000000000000000000103201360743507500222710ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #undef GENTPROTCO #define GENTPROTCO( ctype, ctype_r, ch, chr, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ struc_t strucc, \ doff_t diagoffp, \ diag_t diagc, \ uplo_t uploc, \ conj_t conjc, \ pack_t schema, \ bool_t invdiag, \ dim_t m_panel, \ dim_t n_panel, \ dim_t m_panel_max, \ dim_t n_panel_max, \ ctype* restrict kappa, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ ctype* restrict p, inc_t rs_p, inc_t cs_p, \ inc_t is_p, \ cntx_t* cntx \ ); INSERT_GENTPROTCO_BASIC0( packm_struc_cxk_3mis ) #undef GENTPROTCO #define GENTPROTCO( ctype, ctype_r, ch, chr, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ struc_t strucc, \ doff_t diagoffc, \ uplo_t uploc, \ conj_t conjc, \ pack_t schema, \ dim_t m_panel, \ dim_t n_panel, \ dim_t m_panel_max, \ dim_t n_panel_max, \ dim_t panel_dim, \ dim_t panel_dim_max, \ dim_t panel_len, \ dim_t panel_len_max, \ ctype* restrict kappa, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ inc_t incc, inc_t ldc, \ ctype* restrict p, inc_t rs_p, inc_t cs_p, \ inc_t is_p, inc_t ldp, \ cntx_t* cntx \ ); INSERT_GENTPROTCO_BASIC0( packm_herm_cxk_3mis ) #undef GENTPROTCO #define GENTPROTCO( ctype, ctype_r, ch, chr, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ struc_t strucc, \ doff_t diagoffc, \ diag_t diagc, \ uplo_t uploc, \ conj_t conjc, \ pack_t schema, \ bool_t invdiag, \ dim_t m_panel, \ dim_t n_panel, \ dim_t m_panel_max, \ dim_t n_panel_max, \ dim_t panel_dim, \ dim_t panel_dim_max, \ dim_t panel_len, \ dim_t panel_len_max, \ ctype* restrict kappa, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ inc_t incc, inc_t ldc, \ ctype* restrict p, inc_t rs_p, inc_t cs_p, \ inc_t is_p, inc_t ldp, \ cntx_t* cntx \ ); INSERT_GENTPROTCO_BASIC0( packm_tri_cxk_3mis ) blis-0.6.1/frame/1m/packm/bli_packm_struc_cxk_4mi.c000066400000000000000000000507201360743507500221120ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, varname, kername ) \ \ void PASTEMAC(ch,varname) \ ( \ struc_t strucc, \ doff_t diagoffc, \ diag_t diagc, \ uplo_t uploc, \ conj_t conjc, \ pack_t schema, \ bool_t invdiag, \ dim_t m_panel, \ dim_t n_panel, \ dim_t m_panel_max, \ dim_t n_panel_max, \ ctype* restrict kappa, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ ctype* restrict p, inc_t rs_p, inc_t cs_p, \ inc_t is_p, \ cntx_t* cntx \ ) \ { \ dim_t panel_dim; \ dim_t panel_dim_max; \ dim_t panel_len; \ dim_t panel_len_max; \ inc_t incc, ldc; \ inc_t ldp; \ \ \ /* Determine the dimensions and relative strides of the micro-panel based on its pack schema. */ \ if ( bli_is_col_packed( schema ) ) \ { \ /* Prepare to pack to row-stored column panel. */ \ panel_dim = n_panel; \ panel_dim_max = n_panel_max; \ panel_len = m_panel; \ panel_len_max = m_panel_max; \ incc = cs_c; \ ldc = rs_c; \ ldp = rs_p; \ } \ else /* if ( bli_is_row_packed( schema ) ) */ \ { \ /* Prepare to pack to column-stored row panel. */ \ panel_dim = m_panel; \ panel_dim_max = m_panel_max; \ panel_len = n_panel; \ panel_len_max = n_panel_max; \ incc = rs_c; \ ldc = cs_c; \ ldp = cs_p; \ } \ \ \ /* Handle micro-panel packing based on the structure of the matrix being packed. */ \ if ( bli_is_general( strucc ) ) \ { \ /* For micro-panels of general matrices, we can call the pack kernel front-end directly. */ \ PASTEMAC(ch,kername) \ ( \ conjc, \ panel_dim, \ panel_dim_max, \ panel_len, \ panel_len_max, \ kappa, \ c, incc, ldc, \ p, is_p, ldp, \ cntx \ ); \ } \ else if ( bli_is_herm_or_symm( strucc ) ) \ { \ /* Call a helper function for micro-panels of Hermitian/symmetric matrices. */ \ PASTEMAC(ch,packm_herm_cxk_4mi) \ ( \ strucc, \ diagoffc, \ uploc, \ conjc, \ schema, \ m_panel, \ n_panel, \ m_panel_max, \ n_panel_max, \ panel_dim, \ panel_dim_max, \ panel_len, \ panel_len_max, \ kappa, \ c, rs_c, cs_c, \ incc, ldc, \ p, rs_p, cs_p, \ is_p, ldp, \ cntx \ ); \ } \ else /* ( bli_is_triangular( strucc ) ) */ \ { \ /* Call a helper function for micro-panels of triangular matrices. */ \ PASTEMAC(ch,packm_tri_cxk_4mi) \ ( \ strucc, \ diagoffc, \ diagc, \ uploc, \ conjc, \ schema, \ invdiag, \ m_panel, \ n_panel, \ m_panel_max, \ n_panel_max, \ panel_dim, \ panel_dim_max, \ panel_len, \ panel_len_max, \ kappa, \ c, rs_c, cs_c, \ incc, ldc, \ p, rs_p, cs_p, \ is_p, ldp, \ cntx \ ); \ } \ \ \ /* If m_panel < m_panel_max, or n_panel < n_panel_max, we would normally fill the edge region (the bottom m_panel_max - m_panel rows or right- side n_panel_max - n_panel columns) of the micropanel with zeros. However, this responsibility has been moved to the packm microkernel. This change allows experts to use custom kernels that pack to custom packing formats when the problem size is not a nice multiple of the register blocksize. */ \ /* if ( m_panel != m_panel_max ) \ { \ ctype_r* restrict zero_r = PASTEMAC(chr,0); \ dim_t i = m_panel; \ dim_t m_edge = m_panel_max - i; \ dim_t n_edge = n_panel_max; \ ctype_r* p_edge_r = ( ctype_r* )p + (i )*rs_p; \ ctype_r* p_edge_i = ( ctype_r* )p + is_p + (i )*rs_p; \ \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_r, rs_p, cs_p, \ cntx, \ NULL \ ); \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_i, rs_p, cs_p, \ cntx, \ NULL \ ); \ } \ \ if ( n_panel != n_panel_max ) \ { \ ctype_r* restrict zero_r = PASTEMAC(chr,0); \ dim_t j = n_panel; \ dim_t m_edge = m_panel_max; \ dim_t n_edge = n_panel_max - j; \ ctype_r* p_edge_r = ( ctype_r* )p + (j )*cs_p; \ ctype_r* p_edge_i = ( ctype_r* )p + is_p + (j )*cs_p; \ \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_r, rs_p, cs_p, \ cntx, \ NULL \ ); \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_i, rs_p, cs_p, \ cntx, \ NULL \ ); \ } \ */ \ \ \ if ( bli_is_triangular( strucc ) ) \ { \ /* If this panel is an edge case in both panel dimension and length, then it must be a bottom-right corner case. Set the part of the diagonal that extends into the zero-padded region to identity. NOTE: This is actually only necessary when packing for trsm, as it helps prevent NaNs and Infs from creeping into the computation. However, we set the region to identity for trmm as well. Those 1.0's end up getting muliplied by the 0.0's in the zero-padded region of the other matrix, so there is no harm in this. */ \ if ( m_panel != m_panel_max && \ n_panel != n_panel_max ) \ { \ ctype_r* restrict one_r = PASTEMAC(chr,1); \ ctype_r* restrict zero_r = PASTEMAC(chr,0); \ dim_t i = m_panel; \ dim_t j = n_panel; \ dim_t m_br = m_panel_max - i; \ dim_t n_br = n_panel_max - j; \ ctype_r* p_br_r = ( ctype_r* )p + (i )*rs_p + (j )*cs_p; \ ctype_r* p_br_i = ( ctype_r* )p + is_p + (i )*rs_p + (j )*cs_p; \ \ PASTEMAC2(chr,setd,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ m_br, \ n_br, \ one_r, \ p_br_r, rs_p, cs_p, \ cntx, \ NULL \ ); \ PASTEMAC2(chr,setd,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ m_br, \ n_br, \ zero_r, \ p_br_i, rs_p, cs_p, \ cntx, \ NULL \ ); \ } \ } \ } INSERT_GENTFUNCCO_BASIC( packm_struc_cxk_4mi, packm_cxk_4mi ) #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, varname, kername ) \ \ void PASTEMAC(ch,varname) \ ( \ struc_t strucc, \ doff_t diagoffc, \ uplo_t uploc, \ conj_t conjc, \ pack_t schema, \ dim_t m_panel, \ dim_t n_panel, \ dim_t m_panel_max, \ dim_t n_panel_max, \ dim_t panel_dim, \ dim_t panel_dim_max, \ dim_t panel_len, \ dim_t panel_len_max, \ ctype* restrict kappa, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ inc_t incc, inc_t ldc, \ ctype* restrict p, inc_t rs_p, inc_t cs_p, \ inc_t is_p, inc_t ldp, \ cntx_t* cntx \ ) \ { \ doff_t diagoffc_abs; \ dim_t i, j; \ bool_t row_stored; \ bool_t col_stored; \ \ \ /* Create flags to incidate row or column storage. Note that the schema bit that encodes row or column is describing the form of micro-panel, not the storage in the micro-panel. Hence the mismatch in "row" and "column" semantics. */ \ row_stored = bli_is_col_packed( schema ); \ col_stored = bli_is_row_packed( schema ); \ \ \ /* Handle the case where the micro-panel does NOT intersect the diagonal separately from the case where it does intersect. */ \ if ( !bli_intersects_diag_n( diagoffc, m_panel, n_panel ) ) \ { \ /* If the current panel is unstored, we need to make a few adjustments so we refer to the data where it is actually stored, also taking conjugation into account. (Note this implicitly assumes we are operating on a dense panel within a larger symmetric or Hermitian matrix, since a general matrix would not contain any unstored region.) */ \ if ( bli_is_unstored_subpart_n( diagoffc, uploc, m_panel, n_panel ) ) \ { \ c = c + diagoffc * ( doff_t )cs_c + \ -diagoffc * ( doff_t )rs_c; \ bli_swap_incs( &incc, &ldc ); \ \ if ( bli_is_hermitian( strucc ) ) \ bli_toggle_conj( &conjc ); \ } \ \ /* Pack the full panel. */ \ PASTEMAC(ch,kername) \ ( \ conjc, \ panel_dim, \ panel_dim_max, \ panel_len, \ panel_len_max, \ kappa, \ c, incc, ldc, \ p, is_p, ldp, \ cntx \ ); \ } \ else /* if ( bli_intersects_diag_n( diagoffc, m_panel, n_panel ) ) */ \ { \ ctype_r* restrict p_r = ( ctype_r* )p; \ \ ctype_r* restrict one_r = PASTEMAC(chr,1); \ ctype_r* restrict minus_one_r = PASTEMAC(chr,m1); \ \ ctype* restrict c10; \ ctype_r* restrict p10; \ dim_t p10_dim, p10_len; \ inc_t incc10, ldc10; \ doff_t diagoffc10; \ conj_t conjc10; \ \ ctype* restrict c12; \ ctype_r* restrict p12; \ dim_t p12_dim, p12_len; \ inc_t incc12, ldc12; \ doff_t diagoffc12; \ conj_t conjc12; \ \ /* Sanity check. Diagonals should not intersect the short end of a micro-panel. If they do, then somehow the constraints on cache blocksizes being a whole multiple of the register blocksizes was somehow violated. */ \ if ( ( col_stored && diagoffc < 0 ) || \ ( row_stored && diagoffc > 0 ) ) \ bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); \ \ diagoffc_abs = bli_abs( diagoffc ); \ \ if ( ( row_stored && bli_is_upper( uploc ) ) || \ ( col_stored && bli_is_lower( uploc ) ) ) \ { \ p10_dim = panel_dim; \ p10_len = diagoffc_abs; \ p10 = p_r; \ c10 = c; \ incc10 = incc; \ ldc10 = ldc; \ conjc10 = conjc; \ \ p12_dim = panel_dim; \ p12_len = panel_len - p10_len; \ j = p10_len; \ diagoffc12 = diagoffc_abs - j; \ p12 = p_r + (j )*ldp; \ c12 = c + (j )*ldc; \ c12 = c12 + diagoffc12 * ( doff_t )cs_c + \ -diagoffc12 * ( doff_t )rs_c; \ incc12 = ldc; \ ldc12 = incc; \ conjc12 = conjc; \ \ if ( bli_is_hermitian( strucc ) ) \ bli_toggle_conj( &conjc12 ); \ } \ else /* if ( ( row_stored && bli_is_lower( uploc ) ) || \ ( col_stored && bli_is_upper( uploc ) ) ) */ \ { \ p10_dim = panel_dim; \ p10_len = diagoffc_abs + panel_dim; \ diagoffc10 = diagoffc; \ p10 = p_r; \ c10 = c; \ c10 = c10 + diagoffc10 * ( doff_t )cs_c + \ -diagoffc10 * ( doff_t )rs_c; \ incc10 = ldc; \ ldc10 = incc; \ conjc10 = conjc; \ \ p12_dim = panel_dim; \ p12_len = panel_len - p10_len; \ j = p10_len; \ p12 = p_r + (j )*ldp; \ c12 = c + (j )*ldc; \ incc12 = incc; \ ldc12 = ldc; \ conjc12 = conjc; \ \ if ( bli_is_hermitian( strucc ) ) \ bli_toggle_conj( &conjc10 ); \ } \ \ /* Pack to p10. For upper storage, this includes the unstored triangle of c11. */ \ /* NOTE: Since we're only packing partial panels here, we pass in p1x_len as panel_len_max; otherwise, the packm kernel will zero- fill the columns up to panel_len_max, which is not what we need or want to happen. */ \ PASTEMAC(ch,kername) \ ( \ conjc10, \ p10_dim, \ panel_dim_max, \ p10_len, \ p10_len, \ kappa, \ c10, incc10, ldc10, \ ( ctype* )p10, is_p, ldp, \ cntx \ ); \ \ /* Pack to p12. For lower storage, this includes the unstored triangle of c11. */ \ /* NOTE: Since we're only packing partial panels here, we pass in p1x_len as panel_len_max; otherwise, the packm kernel will zero- fill the columns up to panel_len_max, which is not what we need or want to happen. */ \ PASTEMAC(ch,kername) \ ( \ conjc12, \ p12_dim, \ panel_dim_max, \ p12_len, \ p12_len, \ kappa, \ c12, incc12, ldc12, \ ( ctype* )p12, is_p, ldp, \ cntx \ ); \ \ /* Pack the stored triangle of c11 to p11. */ \ { \ dim_t p11_m = panel_dim; \ dim_t p11_n = panel_dim; \ inc_t rs_c11 = 2*rs_c; \ inc_t cs_c11 = 2*cs_c; \ dim_t j2 = diagoffc_abs; \ ctype* c11 = ( ctype* )c + (j2 )*ldc; \ ctype_r* p11 = ( ctype_r* )p_r + (j2 )*ldp; \ ctype_r* c11_r = ( ctype_r* )c11; \ ctype_r* c11_i = ( ctype_r* )c11 + 1; \ ctype_r* p11_r = ( ctype_r* )p11; \ ctype_r* p11_i = ( ctype_r* )p11 + is_p; \ ctype_r* alpha_r = one_r; \ ctype_r* alpha_i = ( bli_is_conj( conjc ) ? minus_one_r : one_r ); \ ctype_r kappa_r = PASTEMAC(ch,real)( *kappa ); \ ctype_r kappa_i = PASTEMAC(ch,imag)( *kappa ); \ \ /* Copy the real part of the stored triangle of c11 to p11_r. */ \ PASTEMAC2(chr,scal2m,BLIS_TAPI_EX_SUF) \ ( \ 0, \ BLIS_NONUNIT_DIAG, \ uploc, \ BLIS_NO_TRANSPOSE, \ p11_m, \ p11_n, \ alpha_r, \ c11_r, rs_c11, cs_c11, \ p11_r, rs_p, cs_p, \ cntx, \ NULL \ ); \ \ /* Copy the imaginary part of the stored triangle of c11 to p11_i, scaling by -1 if conjugation on c was requested. */ \ PASTEMAC2(chr,scal2m,BLIS_TAPI_EX_SUF) \ ( \ 0, \ BLIS_NONUNIT_DIAG, \ uploc, \ BLIS_NO_TRANSPOSE, \ p11_m, \ p11_n, \ alpha_i, \ c11_i, rs_c11, cs_c11, \ p11_i, rs_p, cs_p, \ cntx, \ NULL \ ); \ \ /* If source matrix c is Hermitian, we have to zero out the imaginary components of the diagonal of p11 in case the corresponding elements in c11 were not already zero. */ \ if ( bli_is_hermitian( strucc ) ) \ { \ for ( i = 0; i < p11_m; ++i ) \ { \ ctype_r* pi11_i = p11_i + (i )*rs_p + (i )*cs_p; \ \ PASTEMAC(chr,set0s)( *pi11_i ); \ } \ } \ \ /* Apply kappa to the part of p11 that corresponds to the stored part of c11 that was copied above. */ \ if ( bli_is_upper( uploc ) ) \ { \ PASTEMAC(ch,scalris_mxn_u) \ ( \ 0, \ p11_m, \ p11_n, \ &kappa_r, \ &kappa_i, \ p11_r, \ p11_i, rs_p, cs_p \ ); \ } \ else \ { \ PASTEMAC(ch,scalris_mxn_l) \ ( \ 0, \ p11_m, \ p11_n, \ &kappa_r, \ &kappa_i, \ p11_r, \ p11_i, rs_p, cs_p \ ); \ } \ /* PASTEMAC(chr,fprintm)( stdout, "packm_herm_cxk: ap_r copied", m_panel_max, n_panel_max, \ p_r + 0*is_p, rs_p, cs_p, "%4.1f", "" ); \ PASTEMAC(chr,fprintm)( stdout, "packm_herm_cxk: ap_i copied", m_panel_max, n_panel_max, \ p_r + 1*is_p, rs_p, cs_p, "%4.1f", "" ); \ */ \ } \ } \ } INSERT_GENTFUNCCO_BASIC( packm_herm_cxk_4mi, packm_cxk_4mi ) #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, varname, kername ) \ \ void PASTEMAC(ch,varname) \ ( \ struc_t strucc, \ doff_t diagoffp, \ diag_t diagc, \ uplo_t uploc, \ conj_t conjc, \ pack_t schema, \ bool_t invdiag, \ dim_t m_panel, \ dim_t n_panel, \ dim_t m_panel_max, \ dim_t n_panel_max, \ dim_t panel_dim, \ dim_t panel_dim_max, \ dim_t panel_len, \ dim_t panel_len_max, \ ctype* restrict kappa, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ inc_t incc, inc_t ldc, \ ctype* restrict p, inc_t rs_p, inc_t cs_p, \ inc_t is_p, inc_t ldp, \ cntx_t* cntx \ ) \ { \ /* Pack the panel. */ \ PASTEMAC(ch,kername) \ ( \ conjc, \ panel_dim, \ panel_dim_max, \ panel_len, \ panel_len_max, \ kappa, \ c, incc, ldc, \ p, is_p, ldp, \ cntx \ ); \ \ \ /* Tweak the panel according to its triangular structure */ \ { \ ctype_r* p_r = ( ctype_r* )p; \ ctype_r* p_i = ( ctype_r* )p + is_p; \ \ dim_t j = bli_abs( diagoffp ); \ ctype_r* p11_r = p_r + (j )*ldp; \ ctype_r* p11_i = p_i + (j )*ldp; \ \ /* If the diagonal of c is implicitly unit, explicitly set the the diagonal of the packed panel to kappa. */ \ if ( bli_is_unit_diag( diagc ) ) \ { \ ctype_r kappa_r = PASTEMAC(ch,real)( *kappa ); \ ctype_r kappa_i = PASTEMAC(ch,imag)( *kappa ); \ \ PASTEMAC2(chr,setd,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ diagoffp, \ m_panel, \ n_panel, \ &kappa_r, \ p_r, rs_p, cs_p, \ cntx, \ NULL \ ); \ PASTEMAC2(chr,setd,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ diagoffp, \ m_panel, \ n_panel, \ &kappa_i, \ p_i, rs_p, cs_p, \ cntx, \ NULL \ ); \ } \ \ \ /* If requested, invert the diagonal of the packed panel. */ \ if ( invdiag == TRUE ) \ { \ dim_t i; \ \ for ( i = 0; i < panel_dim; ++i ) \ { \ ctype_r* pi11_r = p11_r + (i )*rs_p + (i )*cs_p; \ ctype_r* pi11_i = p11_i + (i )*rs_p + (i )*cs_p; \ \ PASTEMAC(ch,invertris)( *pi11_r, *pi11_i ); \ } \ } \ \ \ /* Set the region opposite the diagonal of p to zero. To do this, we need to reference the "unstored" region on the other side of the diagonal. This amounts to toggling uploc and then shifting the diagonal offset to shrink the newly referenced region (by one diagonal). Note that this zero-filling is not needed for trsm, since the unstored region is not referenced by the trsm micro-kernel; however, zero-filling is needed for trmm, which uses the gemm micro-kernel.*/ \ { \ ctype_r* restrict zero_r = PASTEMAC(chr,0); \ uplo_t uplop = uploc; \ \ bli_toggle_uplo( &uplop ); \ bli_shift_diag_offset_to_shrink_uplo( uplop, &diagoffp ); \ \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ diagoffp, \ BLIS_NONUNIT_DIAG, \ uplop, \ m_panel, \ n_panel, \ zero_r, \ p_r, rs_p, cs_p, \ cntx, \ NULL \ ); \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ diagoffp, \ BLIS_NONUNIT_DIAG, \ uplop, \ m_panel, \ n_panel, \ zero_r, \ p_i, rs_p, cs_p, \ cntx, \ NULL \ ); \ } \ } \ } INSERT_GENTFUNCCO_BASIC( packm_tri_cxk_4mi, packm_cxk_4mi ) blis-0.6.1/frame/1m/packm/bli_packm_struc_cxk_4mi.h000066400000000000000000000103151360743507500221130ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #undef GENTPROTCO #define GENTPROTCO( ctype, ctype_r, ch, chr, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ struc_t strucc, \ doff_t diagoffp, \ diag_t diagc, \ uplo_t uploc, \ conj_t conjc, \ pack_t schema, \ bool_t invdiag, \ dim_t m_panel, \ dim_t n_panel, \ dim_t m_panel_max, \ dim_t n_panel_max, \ ctype* restrict kappa, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ ctype* restrict p, inc_t rs_p, inc_t cs_p, \ inc_t is_p, \ cntx_t* cntx \ ); INSERT_GENTPROTCO_BASIC0( packm_struc_cxk_4mi ) #undef GENTPROTCO #define GENTPROTCO( ctype, ctype_r, ch, chr, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ struc_t strucc, \ doff_t diagoffc, \ uplo_t uploc, \ conj_t conjc, \ pack_t schema, \ dim_t m_panel, \ dim_t n_panel, \ dim_t m_panel_max, \ dim_t n_panel_max, \ dim_t panel_dim, \ dim_t panel_dim_max, \ dim_t panel_len, \ dim_t panel_len_max, \ ctype* restrict kappa, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ inc_t incc, inc_t ldc, \ ctype* restrict p, inc_t rs_p, inc_t cs_p, \ inc_t is_p, inc_t ldp, \ cntx_t* cntx \ ); INSERT_GENTPROTCO_BASIC0( packm_herm_cxk_4mi ) #undef GENTPROTCO #define GENTPROTCO( ctype, ctype_r, ch, chr, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ struc_t strucc, \ doff_t diagoffc, \ diag_t diagc, \ uplo_t uploc, \ conj_t conjc, \ pack_t schema, \ bool_t invdiag, \ dim_t m_panel, \ dim_t n_panel, \ dim_t m_panel_max, \ dim_t n_panel_max, \ dim_t panel_dim, \ dim_t panel_dim_max, \ dim_t panel_len, \ dim_t panel_len_max, \ ctype* restrict kappa, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ inc_t incc, inc_t ldc, \ ctype* restrict p, inc_t rs_p, inc_t cs_p, \ inc_t is_p, inc_t ldp, \ cntx_t* cntx \ ); INSERT_GENTPROTCO_BASIC0( packm_tri_cxk_4mi ) blis-0.6.1/frame/1m/packm/bli_packm_struc_cxk_md.c000066400000000000000000000333671360743507500220310ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #ifdef BLIS_ENABLE_GEMM_MD #undef GENTFUNC2 #define GENTFUNC2( ctype_c, ctype_p, chc, chp, varname ) \ \ void PASTEMAC2(chc,chp,varname) \ ( \ conj_t conjc, \ pack_t schema, \ dim_t m_panel, \ dim_t n_panel, \ dim_t m_panel_max, \ dim_t n_panel_max, \ ctype_p* restrict kappa, \ ctype_c* restrict c, inc_t rs_c, inc_t cs_c, \ ctype_p* restrict p, inc_t rs_p, inc_t cs_p, \ inc_t is_p, \ cntx_t* cntx \ ) \ { \ dim_t panel_dim; \ dim_t panel_dim_max; \ dim_t panel_len; \ dim_t panel_len_max; \ inc_t incc, ldc; \ inc_t ldp; \ \ \ /* Determine the dimensions and relative strides of the micro-panel based on its pack schema. */ \ if ( bli_is_col_packed( schema ) ) \ { \ /* Prepare to pack to row-stored column panel. */ \ panel_dim = n_panel; \ panel_dim_max = n_panel_max; \ panel_len = m_panel; \ panel_len_max = m_panel_max; \ incc = cs_c; \ ldc = rs_c; \ ldp = rs_p; \ } \ else /* if ( bli_is_row_packed( schema ) ) */ \ { \ /* Prepare to pack to column-stored row panel. */ \ panel_dim = m_panel; \ panel_dim_max = m_panel_max; \ panel_len = n_panel; \ panel_len_max = n_panel_max; \ incc = rs_c; \ ldc = cs_c; \ ldp = cs_p; \ } \ \ \ if ( bli_is_nat_packed( schema ) ) \ { \ /* Sanity check: Make sure that kappa is 1.0. Mixed-datatype alpha values are never handled when packing for native execution; instead, they are passed along to the micro-kernel. */ \ if ( !PASTEMAC(chp,eq1)( *kappa ) ) \ bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); \ \ /* Treat the micro-panel as panel_dim x panel_len and column-stored (unit row stride). */ \ \ /* NOTE: We ignore kappa for now, since it should be 1.0. */ \ PASTEMAC2(chc,chp,castm) \ ( \ ( trans_t )conjc, \ panel_dim, \ panel_len, \ c, incc, ldc, \ p, 1, ldp \ ); \ \ /* If panel_dim < panel_dim_max, then we zero those unused rows. */ \ if ( panel_dim < panel_dim_max ) \ { \ ctype_p* restrict zero = PASTEMAC(chp,0); \ const dim_t i = panel_dim; \ const dim_t m_edge = panel_dim_max - i; \ const dim_t n_edge = panel_len_max; \ ctype_p* p_edge = p + (i )*1; \ \ PASTEMAC2(chp,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero, \ p_edge, 1, ldp, \ cntx, \ NULL \ ); \ } \ \ /* If panel_len < panel_len_max, then we zero those unused columns. */ \ if ( panel_len < panel_len_max ) \ { \ ctype_p* restrict zero = PASTEMAC(chp,0); \ const dim_t j = panel_len; \ const dim_t m_edge = panel_dim_max; \ const dim_t n_edge = panel_len_max - j; \ ctype_p* p_edge = p + (j )*ldp; \ \ PASTEMAC2(chp,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero, \ p_edge, 1, ldp, \ cntx, \ NULL \ ); \ } \ } \ else if ( bli_is_1r_packed( schema ) ) \ { \ /* Treat the micro-panel as panel_dim x panel_len and column-stored (unit row stride). */ \ \ PASTEMAC2(chc,chp,packm_cxk_1r_md) \ ( \ conjc, \ panel_dim, \ panel_len, \ kappa, \ c, incc, ldc, \ p, ldp \ ); \ \ /* If panel_dim < panel_dim_max, then we zero those unused rows. */ \ if ( panel_dim < panel_dim_max ) \ { \ ctype_p* restrict zero = PASTEMAC(chp,0); \ const dim_t offm = panel_dim; \ const dim_t offn = 0; \ const dim_t m_edge = panel_dim_max - panel_dim; \ const dim_t n_edge = panel_len_max; \ \ ( void ) zero; \ ( void ) m_edge; ( void )offm; \ ( void ) n_edge; ( void )offn; \ \ PASTEMAC(chp,set1ms_mxn) \ ( \ schema, \ offm, \ offn, \ m_edge, \ n_edge, \ zero, \ p, 1, ldp, ldp \ ); \ } \ \ /* If panel_len < panel_len_max, then we zero those unused columns. */ \ if ( panel_len < panel_len_max ) \ { \ ctype_p* restrict zero = PASTEMAC(chp,0); \ const dim_t offm = 0; \ const dim_t offn = panel_len; \ const dim_t m_edge = panel_dim_max; \ const dim_t n_edge = panel_len_max - panel_len; \ \ ( void ) zero; \ ( void ) m_edge; ( void )offm; \ ( void ) n_edge; ( void )offn; \ \ PASTEMAC(chp,set1ms_mxn) \ ( \ schema, \ offm, \ offn, \ m_edge, \ n_edge, \ zero, \ p, 1, ldp, ldp \ ); \ } \ } \ else if ( bli_is_1e_packed( schema ) ) \ { \ /* Treat the micro-panel as panel_dim x panel_len and column-stored (unit row stride). */ \ \ PASTEMAC2(chc,chp,packm_cxk_1e_md) \ ( \ conjc, \ panel_dim, \ panel_len, \ kappa, \ c, incc, ldc, \ p, ldp \ ); \ \ /* If panel_dim < panel_dim_max, then we zero those unused rows. */ \ if ( panel_dim < panel_dim_max ) \ { \ ctype_p* restrict zero = PASTEMAC(chp,0); \ const dim_t offm = panel_dim; \ const dim_t offn = 0; \ const dim_t m_edge = panel_dim_max - panel_dim; \ const dim_t n_edge = panel_len_max; \ \ ( void ) zero; \ ( void ) m_edge; ( void )offm; \ ( void ) n_edge; ( void )offn; \ \ PASTEMAC(chp,set1ms_mxn) \ ( \ schema, \ offm, \ offn, \ m_edge, \ n_edge, \ zero, \ p, 1, ldp, ldp \ ); \ } \ \ /* If panel_len < panel_len_max, then we zero those unused columns. */ \ if ( panel_len < panel_len_max ) \ { \ ctype_p* restrict zero = PASTEMAC(chp,0); \ const dim_t offm = 0; \ const dim_t offn = panel_len; \ const dim_t m_edge = panel_dim_max; \ const dim_t n_edge = panel_len_max - panel_len; \ \ ( void ) zero; \ ( void ) m_edge; ( void )offm; \ ( void ) n_edge; ( void )offn; \ \ PASTEMAC(chp,set1ms_mxn) \ ( \ schema, \ offm, \ offn, \ m_edge, \ n_edge, \ zero, \ p, 1, ldp, ldp \ ); \ } \ } \ else \ { \ /* Mixed-datatype packing should not occur for any other schemas. */ \ bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); \ } \ \ \ /* if ( bli_is_col_packed( schema ) ) \ PASTEMAC(ch,fprintm)( stdout, "packm_struc_cxk: bp copied", m_panel_max, n_panel_max, \ p, rs_p, cs_p, "%4.1f", "" ); \ else if ( bli_is_row_packed( schema ) ) \ PASTEMAC(ch,fprintm)( stdout, "packm_struc_cxk: ap copied", m_panel_max, n_panel_max, \ p, rs_p, cs_p, "%4.1f", "" ); \ */ \ } INSERT_GENTFUNC2_BASIC0( packm_struc_cxk_md ) INSERT_GENTFUNC2_MIXDP0( packm_struc_cxk_md ) // ----------------------------------------------------------------------------- #undef GENTFUNC2 #define GENTFUNC2( ctype_a, ctype_p, cha, chp, opname ) \ \ void PASTEMAC2(cha,chp,opname) \ ( \ conj_t conja, \ dim_t m, \ dim_t n, \ ctype_p* restrict kappa, \ ctype_a* restrict a, inc_t inca, inc_t lda, \ ctype_p* restrict p, inc_t ldp \ ) \ { \ const inc_t inca2 = 2 * inca; \ const inc_t lda2 = 2 * lda; \ const inc_t ldp2 = 2 * ldp; \ \ PASTEMAC(chp,ctyper)* restrict kappa_r = ( PASTEMAC(chp,ctyper)* )kappa; \ PASTEMAC(chp,ctyper)* restrict kappa_i = ( PASTEMAC(chp,ctyper)* )kappa + 1; \ PASTEMAC(cha,ctyper)* restrict alpha1_r = ( PASTEMAC(cha,ctyper)* )a; \ PASTEMAC(cha,ctyper)* restrict alpha1_i = ( PASTEMAC(cha,ctyper)* )a + 1; \ PASTEMAC(chp,ctyper)* restrict pi1_r = ( PASTEMAC(chp,ctyper)* )p; \ PASTEMAC(chp,ctyper)* restrict pi1_i = ( PASTEMAC(chp,ctyper)* )p + ldp; \ \ ( void )kappa_i; \ \ if ( PASTEMAC(chp,eq1)( *kappa ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ for ( dim_t i = 0; i < m; ++i ) \ { \ PASTEMAC2(cha,chp,copyjris) \ ( \ *(alpha1_r + i*inca2), \ *(alpha1_i + i*inca2), \ *(pi1_r + i* 1), \ *(pi1_i + i* 1) \ ); \ } \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp2; \ pi1_i += ldp2; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ for ( dim_t i = 0; i < m; ++i ) \ { \ PASTEMAC2(cha,chp,copyris) \ ( \ *(alpha1_r + i*inca2), \ *(alpha1_i + i*inca2), \ *(pi1_r + i* 1), \ *(pi1_i + i* 1) \ ); \ } \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp2; \ pi1_i += ldp2; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ for ( dim_t i = 0; i < m; ++i ) \ { \ PASTEMAC3(chp,cha,chp,scal2jris) \ ( \ *kappa_r, \ *kappa_i, \ *(alpha1_r + i*inca2), \ *(alpha1_i + i*inca2), \ *(pi1_r + i* 1), \ *(pi1_i + i* 1) \ ); \ } \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp2; \ pi1_i += ldp2; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ for ( dim_t i = 0; i < m; ++i ) \ { \ PASTEMAC3(chp,cha,chp,scal2ris) \ ( \ *kappa_r, \ *kappa_i, \ *(alpha1_r + i*inca2), \ *(alpha1_i + i*inca2), \ *(pi1_r + i* 1), \ *(pi1_i + i* 1) \ ); \ } \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp2; \ pi1_i += ldp2; \ } \ } \ } \ } INSERT_GENTFUNC2_BASIC0( packm_cxk_1r_md ) INSERT_GENTFUNC2_MIXDP0( packm_cxk_1r_md ) // ----------------------------------------------------------------------------- #undef GENTFUNC2 #define GENTFUNC2( ctype_a, ctype_p, cha, chp, opname ) \ \ void PASTEMAC2(cha,chp,opname) \ ( \ conj_t conja, \ dim_t m, \ dim_t n, \ ctype_p* restrict kappa, \ ctype_a* restrict a, inc_t inca, inc_t lda, \ ctype_p* restrict p, inc_t ldp \ ) \ { \ const inc_t inca1 = inca; \ const inc_t lda1 = lda; \ const inc_t ldp1 = ldp; \ \ ctype_a* restrict alpha1_ri = ( ctype_a* )a; \ ctype_p* restrict pi1_ri = ( ctype_p* )p; \ ctype_p* restrict pi1_ir = ( ctype_p* )p + ldp1/2; \ \ ( void )inca1; \ \ if ( PASTEMAC(chp,eq1)( *kappa ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ for ( dim_t i = 0; i < m; ++i ) \ { \ PASTEMAC2(cha,chp,copyj1es) \ ( \ *(alpha1_ri + i*inca1), \ *(pi1_ri + i* 1), \ *(pi1_ir + i* 1) \ ); \ } \ \ alpha1_ri += lda1; \ pi1_ri += ldp1; \ pi1_ir += ldp1; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ for ( dim_t i = 0; i < m; ++i ) \ { \ PASTEMAC2(cha,chp,copy1es) \ ( \ *(alpha1_ri + i*inca1), \ *(pi1_ri + i* 1), \ *(pi1_ir + i* 1) \ ); \ } \ \ alpha1_ri += lda1; \ pi1_ri += ldp1; \ pi1_ir += ldp1; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ for ( dim_t i = 0; i < m; ++i ) \ { \ PASTEMAC3(chp,cha,chp,scal2j1es) \ ( \ *kappa, \ *(alpha1_ri + i*inca1), \ *(pi1_ri + i* 1), \ *(pi1_ir + i* 1) \ ); \ } \ \ alpha1_ri += lda1; \ pi1_ri += ldp1; \ pi1_ir += ldp1; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ for ( dim_t i = 0; i < m; ++i ) \ { \ PASTEMAC3(chp,cha,chp,scal21es) \ ( \ *kappa, \ *(alpha1_ri + i*inca1), \ *(pi1_ri + i* 1), \ *(pi1_ir + i* 1) \ ); \ } \ \ alpha1_ri += lda1; \ pi1_ri += ldp1; \ pi1_ir += ldp1; \ } \ } \ } \ } INSERT_GENTFUNC2_BASIC0( packm_cxk_1e_md ) INSERT_GENTFUNC2_MIXDP0( packm_cxk_1e_md ) #endif blis-0.6.1/frame/1m/packm/bli_packm_struc_cxk_md.h000066400000000000000000000054711360743507500220310ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #undef GENTPROT2 #define GENTPROT2( ctype_c, ctype_p, chc, chp, varname ) \ \ void PASTEMAC2(chc,chp,varname) \ ( \ conj_t conjc, \ pack_t schema, \ dim_t m_panel, \ dim_t n_panel, \ dim_t m_panel_max, \ dim_t n_panel_max, \ ctype_p* restrict kappa, \ ctype_c* restrict c, inc_t rs_c, inc_t cs_c, \ ctype_p* restrict p, inc_t rs_p, inc_t cs_p, \ inc_t is_p, \ cntx_t* cntx \ ); INSERT_GENTPROT2_BASIC0( packm_struc_cxk_md ) INSERT_GENTPROT2_MIXDP0( packm_struc_cxk_md ) #undef GENTPROT2 #define GENTPROT2( ctype_a, ctype_p, cha, chp, opname ) \ \ void PASTEMAC2(cha,chp,opname) \ ( \ conj_t conja, \ dim_t m, \ dim_t n, \ ctype_p* restrict kappa, \ ctype_a* restrict a, inc_t inca, inc_t lda, \ ctype_p* restrict p, inc_t ldp \ ); INSERT_GENTPROT2_BASIC0( packm_cxk_1e_md ) INSERT_GENTPROT2_MIXDP0( packm_cxk_1e_md ) INSERT_GENTPROT2_BASIC0( packm_cxk_1r_md ) INSERT_GENTPROT2_MIXDP0( packm_cxk_1r_md ) blis-0.6.1/frame/1m/packm/bli_packm_struc_cxk_rih.c000066400000000000000000000431561360743507500222100ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, varname, kername ) \ \ void PASTEMAC(ch,varname) \ ( \ struc_t strucc, \ doff_t diagoffc, \ diag_t diagc, \ uplo_t uploc, \ conj_t conjc, \ pack_t schema, \ bool_t invdiag, \ dim_t m_panel, \ dim_t n_panel, \ dim_t m_panel_max, \ dim_t n_panel_max, \ ctype* restrict kappa, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ ctype* restrict p, inc_t rs_p, inc_t cs_p, \ inc_t is_p, \ cntx_t* cntx \ ) \ { \ dim_t panel_dim; \ dim_t panel_dim_max; \ dim_t panel_len; \ dim_t panel_len_max; \ inc_t incc, ldc; \ inc_t ldp; \ \ \ /* Determine the dimensions and relative strides of the micro-panel based on its pack schema. */ \ if ( bli_is_col_packed( schema ) ) \ { \ /* Prepare to pack to row-stored column panel. */ \ panel_dim = n_panel; \ panel_dim_max = n_panel_max; \ panel_len = m_panel; \ panel_len_max = m_panel_max; \ incc = cs_c; \ ldc = rs_c; \ ldp = rs_p; \ } \ else /* if ( bli_is_row_packed( schema ) ) */ \ { \ /* Prepare to pack to column-stored row panel. */ \ panel_dim = m_panel; \ panel_dim_max = m_panel_max; \ panel_len = n_panel; \ panel_len_max = n_panel_max; \ incc = rs_c; \ ldc = cs_c; \ ldp = cs_p; \ } \ \ \ /* Handle micro-panel packing based on the structure of the matrix being packed. */ \ if ( bli_is_general( strucc ) ) \ { \ /* For micro-panels of general matrices, we can call the pack kernel front-end directly. */ \ PASTEMAC(ch,kername) \ ( \ conjc, \ schema, \ panel_dim, \ panel_dim_max, \ panel_len, \ panel_len_max, \ kappa, \ c, incc, ldc, \ p, ldp, \ cntx \ ); \ } \ else if ( bli_is_herm_or_symm( strucc ) ) \ { \ /* Call a helper function for micro-panels of Hermitian/symmetric matrices. */ \ PASTEMAC(ch,packm_herm_cxk_rih) \ ( \ strucc, \ diagoffc, \ uploc, \ conjc, \ schema, \ m_panel, \ n_panel, \ m_panel_max, \ n_panel_max, \ panel_dim, \ panel_dim_max, \ panel_len, \ panel_len_max, \ kappa, \ c, rs_c, cs_c, \ incc, ldc, \ p, rs_p, cs_p, \ ldp, \ cntx \ ); \ } \ else /* ( bli_is_triangular( strucc ) ) */ \ { \ /* Call a helper function for micro-panels of triangular matrices. */ \ PASTEMAC(ch,packm_tri_cxk_rih) \ ( \ strucc, \ diagoffc, \ diagc, \ uploc, \ conjc, \ schema, \ invdiag, \ m_panel, \ n_panel, \ m_panel_max, \ n_panel_max, \ panel_dim, \ panel_dim_max, \ panel_len, \ panel_len_max, \ kappa, \ c, rs_c, cs_c, \ incc, ldc, \ p, rs_p, cs_p, \ ldp, \ cntx \ ); \ } \ \ \ /* If m_panel < m_panel_max, or n_panel < n_panel_max, we would normally fill the edge region (the bottom m_panel_max - m_panel rows or right- side n_panel_max - n_panel columns) of the micropanel with zeros. However, this responsibility has been moved to the packm microkernel. This change allows experts to use custom kernels that pack to custom packing formats when the problem size is not a nice multiple of the register blocksize. */ \ /* if ( m_panel != m_panel_max ) \ { \ ctype_r* restrict zero_r = PASTEMAC(chr,0); \ dim_t i = m_panel; \ dim_t m_edge = m_panel_max - i; \ dim_t n_edge = n_panel_max; \ ctype_r* p_edge_r = ( ctype_r* )p + (i )*rs_p; \ \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_r, rs_p, cs_p, \ cntx, \ NULL \ ); \ } \ \ if ( n_panel != n_panel_max ) \ { \ ctype_r* restrict zero_r = PASTEMAC(chr,0); \ dim_t j = n_panel; \ dim_t m_edge = m_panel_max; \ dim_t n_edge = n_panel_max - j; \ ctype_r* p_edge_r = ( ctype_r* )p + (j )*cs_p; \ \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_r, rs_p, cs_p, \ cntx, \ NULL \ ); \ } \ */ \ \ \ if ( bli_is_triangular( strucc ) ) \ { \ /* If this panel is an edge case in both panel dimension and length, then it must be a bottom-right corner case. Set the part of the diagonal that extends into the zero-padded region to identity. NOTE: This is actually only necessary when packing for trsm, as it helps prevent NaNs and Infs from creeping into the computation. However, we set the region to identity for trmm as well. Those 1.0's end up getting muliplied by the 0.0's in the zero-padded region of the other matrix, so there is no harm in this. */ \ if ( m_panel != m_panel_max && \ n_panel != n_panel_max ) \ { \ /* We don't need this case if we aren't supporting trsm. Why? Because trmm's packm control tree node should be using k dimension multiples of 1 (kr == 1), which means there will never be zero padding at the far end of a micro-panel. */ \ } \ } \ \ \ /* { \ if ( bli_is_col_packed( schema ) ) \ PASTEMAC(chr,fprintm)( stdout, "packm_struc_cxk_rih: bp copied", m_panel_max, n_panel_max, \ ( ctype_r* )p, rs_p, cs_p, "%4.1f", "" ); \ else if ( bli_is_row_packed( schema ) ) \ PASTEMAC(chr,fprintm)( stdout, "packm_struc_cxk_rih: ap copied", m_panel_max, n_panel_max, \ ( ctype_r* )p, rs_p, cs_p, "%4.1f", "" ); \ } \ */ \ \ \ } INSERT_GENTFUNCCO_BASIC( packm_struc_cxk_rih, packm_cxk_rih ) #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, varname, kername ) \ \ void PASTEMAC(ch,varname) \ ( \ struc_t strucc, \ doff_t diagoffc, \ uplo_t uploc, \ conj_t conjc, \ pack_t schema, \ dim_t m_panel, \ dim_t n_panel, \ dim_t m_panel_max, \ dim_t n_panel_max, \ dim_t panel_dim, \ dim_t panel_dim_max, \ dim_t panel_len, \ dim_t panel_len_max, \ ctype* restrict kappa, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ inc_t incc, inc_t ldc, \ ctype* restrict p, inc_t rs_p, inc_t cs_p, \ inc_t ldp, \ cntx_t* cntx \ ) \ { \ bool_t row_stored; \ bool_t col_stored; \ doff_t diagoffc_abs; \ dim_t j; \ \ \ /* Create flags to incidate row or column storage. Note that the schema bit that encodes row or column is describing the form of micro-panel, not the storage in the micro-panel. Hence the mismatch in "row" and "column" semantics. */ \ row_stored = bli_is_col_packed( schema ); \ col_stored = bli_is_row_packed( schema ); \ \ \ /* Handle the case where the micro-panel does NOT intersect the diagonal separately from the case where it does intersect. */ \ if ( !bli_intersects_diag_n( diagoffc, m_panel, n_panel ) ) \ { \ /* If the current panel is unstored, we need to make a few adjustments so we refer to the data where it is actually stored, also taking conjugation into account. (Note this implicitly assumes we are operating on a dense panel within a larger symmetric or Hermitian matrix, since a general matrix would not contain any unstored region.) */ \ if ( bli_is_unstored_subpart_n( diagoffc, uploc, m_panel, n_panel ) ) \ { \ c = c + diagoffc * ( doff_t )cs_c + \ -diagoffc * ( doff_t )rs_c; \ bli_swap_incs( &incc, &ldc ); \ \ if ( bli_is_hermitian( strucc ) ) \ bli_toggle_conj( &conjc ); \ } \ \ /* Pack the full panel. */ \ PASTEMAC(ch,kername) \ ( \ conjc, \ schema, \ panel_dim, \ panel_dim_max, \ panel_len, \ panel_len_max, \ kappa, \ c, incc, ldc, \ p, ldp, \ cntx \ ); \ } \ else /* if ( bli_intersects_diag_n( diagoffc, m_panel, n_panel ) ) */ \ { \ ctype_r* restrict p_r = ( ctype_r* )p; \ \ ctype* restrict c10; \ ctype_r* restrict p10; \ dim_t p10_dim, p10_len; \ inc_t incc10, ldc10; \ doff_t diagoffc10; \ conj_t conjc10; \ \ ctype* restrict c12; \ ctype_r* restrict p12; \ dim_t p12_dim, p12_len; \ inc_t incc12, ldc12; \ doff_t diagoffc12; \ conj_t conjc12; \ \ /* Sanity check. Diagonals should not intersect the short end of a micro-panel. If they do, then somehow the constraints on cache blocksizes being a whole multiple of the register blocksizes was somehow violated. */ \ if ( ( col_stored && diagoffc < 0 ) || \ ( row_stored && diagoffc > 0 ) ) \ bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); \ \ diagoffc_abs = bli_abs( diagoffc ); \ \ if ( ( row_stored && bli_is_upper( uploc ) ) || \ ( col_stored && bli_is_lower( uploc ) ) ) \ { \ p10_dim = panel_dim; \ p10_len = diagoffc_abs; \ p10 = p_r; \ c10 = c; \ incc10 = incc; \ ldc10 = ldc; \ conjc10 = conjc; \ \ p12_dim = panel_dim; \ p12_len = panel_len - p10_len; \ j = p10_len; \ diagoffc12 = diagoffc_abs - j; \ p12 = p_r + (j )*ldp; \ c12 = c + (j )*ldc; \ c12 = c12 + diagoffc12 * ( doff_t )cs_c + \ -diagoffc12 * ( doff_t )rs_c; \ incc12 = ldc; \ ldc12 = incc; \ conjc12 = conjc; \ \ if ( bli_is_hermitian( strucc ) ) \ bli_toggle_conj( &conjc12 ); \ } \ else /* if ( ( row_stored && bli_is_lower( uploc ) ) || \ ( col_stored && bli_is_upper( uploc ) ) ) */ \ { \ p10_dim = panel_dim; \ p10_len = diagoffc_abs + panel_dim; \ diagoffc10 = diagoffc; \ p10 = p_r; \ c10 = c; \ c10 = c10 + diagoffc10 * ( doff_t )cs_c + \ -diagoffc10 * ( doff_t )rs_c; \ incc10 = ldc; \ ldc10 = incc; \ conjc10 = conjc; \ \ p12_dim = panel_dim; \ p12_len = panel_len - p10_len; \ j = p10_len; \ p12 = p_r + (j )*ldp; \ c12 = c + (j )*ldc; \ incc12 = incc; \ ldc12 = ldc; \ conjc12 = conjc; \ \ if ( bli_is_hermitian( strucc ) ) \ bli_toggle_conj( &conjc10 ); \ } \ \ /* Pack to p10. For upper storage, this includes the unstored triangle of c11. */ \ /* NOTE: Since we're only packing partial panels here, we pass in p1x_len as panel_len_max; otherwise, the packm kernel will zero- fill the columns up to panel_len_max, which is not what we need or want to happen. */ \ PASTEMAC(ch,kername) \ ( \ conjc10, \ schema, \ p10_dim, \ panel_dim_max, \ p10_len, \ p10_len, \ kappa, \ c10, incc10, ldc10, \ ( ctype* )p10, ldp, \ cntx \ ); \ \ /* Pack to p12. For lower storage, this includes the unstored triangle of c11. */ \ /* NOTE: Since we're only packing partial panels here, we pass in p1x_len as panel_len_max; otherwise, the packm kernel will zero- fill the columns up to panel_len_max, which is not what we need or want to happen. */ \ PASTEMAC(ch,kername) \ ( \ conjc12, \ schema, \ p12_dim, \ panel_dim_max, \ p12_len, \ p12_len, \ kappa, \ c12, incc12, ldc12, \ ( ctype* )p12, ldp, \ cntx \ ); \ \ /* Pack the stored triangle of c11 to p11. */ \ { \ dim_t j2 = diagoffc_abs; \ /*ctype_r* restrict p_r = ( ctype_r* )p;*/ \ ctype* restrict c11 = c + (j2 )*ldc; \ ctype_r* restrict p11_r = p_r + (j2 )*ldp; \ \ PASTEMAC(ch,scal2rihs_mxn_uplo) \ ( \ schema, \ uploc, \ conjc, \ panel_dim, \ kappa, \ c11, rs_c, cs_c, \ p11_r, rs_p, cs_p \ ); \ \ /* If we are packing a micro-panel with Hermitian structure, we must take special care of the diagonal. Now, if kappa were guaranteed to be unit, all we would need to do is explicitly zero out the imaginary part of the diagonal of p11, in case the diagonal of the source matrix contained garbage (non-zero) imaginary values. HOWEVER, since kappa can be non-unit, things become a little more complicated. In general, we must re-apply the kappa scalar to ONLY the real part of the diagonal of the source matrix and save the result to the diagonal of p11. */ \ if ( bli_is_hermitian( strucc ) ) \ { \ PASTEMAC3(ch,chr,ch,scal2rihs_mxn_diag) \ ( \ schema, \ panel_dim, \ panel_dim, \ kappa, \ c11, rs_c, cs_c, \ p11_r, rs_p, cs_p \ ); \ } \ \ /* PASTEMAC(chr,fprintm)( stdout, "packm_herm_cxk: ap_r copied", m_panel_max, n_panel_max, \ p_r + 0*is_p, rs_p, cs_p, "%4.1f", "" ); \ PASTEMAC(chr,fprintm)( stdout, "packm_herm_cxk: ap_i copied", m_panel_max, n_panel_max, \ p_r + 1*is_p, rs_p, cs_p, "%4.1f", "" ); \ */ \ } \ } \ } INSERT_GENTFUNCCO_BASIC( packm_herm_cxk_rih, packm_cxk_rih ) #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, varname, kername ) \ \ void PASTEMAC(ch,varname) \ ( \ struc_t strucc, \ doff_t diagoffp, \ diag_t diagc, \ uplo_t uploc, \ conj_t conjc, \ pack_t schema, \ bool_t invdiag, \ dim_t m_panel, \ dim_t n_panel, \ dim_t m_panel_max, \ dim_t n_panel_max, \ dim_t panel_dim, \ dim_t panel_dim_max, \ dim_t panel_len, \ dim_t panel_len_max, \ ctype* restrict kappa, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ inc_t incc, inc_t ldc, \ ctype* restrict p, inc_t rs_p, inc_t cs_p, \ inc_t ldp, \ cntx_t* cntx \ ) \ { \ /* Pack the panel. */ \ PASTEMAC(ch,kername) \ ( \ conjc, \ schema, \ panel_dim, \ panel_dim_max, \ panel_len, \ panel_len_max, \ kappa, \ c, incc, ldc, \ p, ldp, \ cntx \ ); \ \ \ /* Tweak the panel according to its triangular structure */ \ { \ ctype_r* p_r = ( ctype_r* )p; \ \ dim_t j = bli_abs( diagoffp ); \ ctype_r* p11_r = p_r + (j )*ldp; \ \ /* If the diagonal of c is implicitly unit, explicitly set the the diagonal of the packed panel to kappa. */ \ if ( bli_is_unit_diag( diagc ) ) \ { \ PASTEMAC(ch,setrihs_mxn_diag) \ ( \ schema, \ panel_dim, \ panel_dim, \ kappa, \ p11_r, rs_p, cs_p \ ); \ } \ \ \ /* If requested, invert the diagonal of the packed panel. */ \ if ( invdiag == TRUE ) \ { \ /* We don't need this case if we aren't supporting trsm. */ \ } \ \ \ /* Set the region opposite the diagonal of p to zero. To do this, we need to reference the "unstored" region on the other side of the diagonal. This amounts to toggling uploc and then shifting the diagonal offset to shrink the newly referenced region (by one diagonal). */ \ { \ ctype_r* restrict zero_r = PASTEMAC(chr,0); \ uplo_t uplop = uploc; \ \ bli_toggle_uplo( &uplop ); \ bli_shift_diag_offset_to_shrink_uplo( uplop, &diagoffp ); \ \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ diagoffp, \ BLIS_NONUNIT_DIAG, \ uplop, \ m_panel, \ n_panel, \ zero_r, \ p_r, rs_p, cs_p, \ cntx, \ NULL \ ); \ } \ } \ } INSERT_GENTFUNCCO_BASIC( packm_tri_cxk_rih, packm_cxk_rih ) blis-0.6.1/frame/1m/packm/bli_packm_struc_cxk_rih.h000066400000000000000000000103151360743507500222040ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #undef GENTPROTCO #define GENTPROTCO( ctype, ctype_r, ch, chr, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ struc_t strucc, \ doff_t diagoffp, \ diag_t diagc, \ uplo_t uploc, \ conj_t conjc, \ pack_t schema, \ bool_t invdiag, \ dim_t m_panel, \ dim_t n_panel, \ dim_t m_panel_max, \ dim_t n_panel_max, \ ctype* restrict kappa, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ ctype* restrict p, inc_t rs_p, inc_t cs_p, \ inc_t is_p, \ cntx_t* cntx \ ); INSERT_GENTPROTCO_BASIC0( packm_struc_cxk_rih ) #undef GENTPROTCO #define GENTPROTCO( ctype, ctype_r, ch, chr, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ struc_t strucc, \ doff_t diagoffc, \ uplo_t uploc, \ conj_t conjc, \ pack_t schema, \ dim_t m_panel, \ dim_t n_panel, \ dim_t m_panel_max, \ dim_t n_panel_max, \ dim_t panel_dim, \ dim_t panel_dim_max, \ dim_t panel_len, \ dim_t panel_len_max, \ ctype* restrict kappa, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ inc_t incc, inc_t ldc, \ ctype* restrict p, inc_t rs_p, inc_t cs_p, \ inc_t ldp, \ cntx_t* cntx \ ); INSERT_GENTPROTCO_BASIC0( packm_herm_cxk_rih ) #undef GENTPROTCO #define GENTPROTCO( ctype, ctype_r, ch, chr, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ struc_t strucc, \ doff_t diagoffc, \ diag_t diagc, \ uplo_t uploc, \ conj_t conjc, \ pack_t schema, \ bool_t invdiag, \ dim_t m_panel, \ dim_t n_panel, \ dim_t m_panel_max, \ dim_t n_panel_max, \ dim_t panel_dim, \ dim_t panel_dim_max, \ dim_t panel_len, \ dim_t panel_len_max, \ ctype* restrict kappa, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ inc_t incc, inc_t ldc, \ ctype* restrict p, inc_t rs_p, inc_t cs_p, \ inc_t ldp, \ cntx_t* cntx \ ); INSERT_GENTPROTCO_BASIC0( packm_tri_cxk_rih ) blis-0.6.1/frame/1m/packm/bli_packm_thrinfo.c000066400000000000000000000043461360743507500210100ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_packm_thrinfo_init ( thrinfo_t* thread, thrcomm_t* ocomm, dim_t ocomm_id, dim_t n_way, dim_t work_id, bszid_t bszid, thrinfo_t* sub_node ) { bli_thrinfo_init ( thread, ocomm, ocomm_id, n_way, work_id, FALSE, BLIS_NO_PART, sub_node ); } void bli_packm_thrinfo_init_single ( thrinfo_t* thread ) { bli_packm_thrinfo_init ( thread, &BLIS_SINGLE_COMM, 0, 1, 0, BLIS_NO_PART, NULL ); } blis-0.6.1/frame/1m/packm/bli_packm_thrinfo.h000066400000000000000000000057621360743507500210200ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // thrinfo_t macros specific to packm. // /* #define bli_packm_thread_my_iter( index, thread ) \ \ ( index % thread->n_way == thread->work_id % thread->n_way ) */ #define bli_packm_my_iter_rr( i, start, end, work_id, n_way ) \ \ ( i % n_way == work_id % n_way ) #define bli_packm_my_iter_sl( i, start, end, work_id, n_way ) \ \ ( start <= i && i < end ) // Define a general-purpose version of bli_packm_my_iter() whose definition // depends on whether slab or round-robin partitioning was requested at // configure-time. #ifdef BLIS_ENABLE_JRIR_SLAB #define bli_packm_my_iter bli_packm_my_iter_sl #else // BLIS_ENABLE_JRIR_RR #define bli_packm_my_iter bli_packm_my_iter_rr #endif // // thrinfo_t APIs specific to packm. // #if 0 thrinfo_t* bli_packm_thrinfo_create ( thrcomm_t* ocomm, dim_t ocomm_id, dim_t n_way, dim_t work_id, thrinfo_t* sub_node ); #endif void bli_packm_thrinfo_init ( thrinfo_t* thread, thrcomm_t* ocomm, dim_t ocomm_id, dim_t n_way, dim_t work_id, bszid_t bszid, thrinfo_t* sub_node ); void bli_packm_thrinfo_init_single ( thrinfo_t* thread ); #if 0 void bli_packm_thrinfo_free ( thrinfo_t* thread ); #endif blis-0.6.1/frame/1m/packm/bli_packm_unb_var1.c000066400000000000000000000214331360743507500210500ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T packm_fp typedef void (*FUNCPTR_T)( struc_t strucc, doff_t diagoffc, diag_t diagc, uplo_t uploc, trans_t transc, dim_t m, dim_t n, dim_t m_max, dim_t n_max, void* kappa, void* c, inc_t rs_c, inc_t cs_c, void* p, inc_t rs_p, inc_t cs_p, cntx_t* cntx ); static FUNCPTR_T GENARRAY(ftypes,packm_unb_var1); void bli_packm_unb_var1 ( obj_t* c, obj_t* p, cntx_t* cntx, cntl_t* cntl, thrinfo_t* thread ) { num_t dt_cp = bli_obj_dt( c ); struc_t strucc = bli_obj_struc( c ); doff_t diagoffc = bli_obj_diag_offset( c ); diag_t diagc = bli_obj_diag( c ); uplo_t uploc = bli_obj_uplo( c ); trans_t transc = bli_obj_conjtrans_status( c ); dim_t m_p = bli_obj_length( p ); dim_t n_p = bli_obj_width( p ); dim_t m_max_p = bli_obj_padded_length( p ); dim_t n_max_p = bli_obj_padded_width( p ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); void* buf_p = bli_obj_buffer_at_off( p ); inc_t rs_p = bli_obj_row_stride( p ); inc_t cs_p = bli_obj_col_stride( p ); void* buf_kappa; FUNCPTR_T f; // This variant assumes that the computational kernel will always apply // the alpha scalar of the higher-level operation. Thus, we use BLIS_ONE // for kappa so that the underlying packm implementation does not scale // during packing. buf_kappa = bli_obj_buffer_for_const( dt_cp, &BLIS_ONE ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_cp]; if( bli_thread_am_ochief( thread ) ) { // Invoke the function. f ( strucc, diagoffc, diagc, uploc, transc, m_p, n_p, m_max_p, n_max_p, buf_kappa, buf_c, rs_c, cs_c, buf_p, rs_p, cs_p, cntx ); } } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ struc_t strucc, \ doff_t diagoffc, \ diag_t diagc, \ uplo_t uploc, \ trans_t transc, \ dim_t m, \ dim_t n, \ dim_t m_max, \ dim_t n_max, \ void* kappa, \ void* c, inc_t rs_c, inc_t cs_c, \ void* p, inc_t rs_p, inc_t cs_p, \ cntx_t* cntx \ ) \ { \ ctype* restrict kappa_cast = kappa; \ ctype* restrict c_cast = c; \ ctype* restrict p_cast = p; \ ctype* restrict zero = PASTEMAC(ch,0); \ \ /* We begin by packing the region indicated by the parameters. If matrix c is dense (either because the structure is general or because the structure has already been "densified"), this ends up being the only action we take. Note that if kappa is unit, the data is simply copied (rather than scaled by one). */ \ PASTEMAC2(ch,scal2m,BLIS_TAPI_EX_SUF) \ ( \ diagoffc, \ diagc, \ uploc, \ transc, \ m, \ n, \ kappa_cast, \ c_cast, rs_c, cs_c, \ p_cast, rs_p, cs_p, \ cntx, \ NULL \ ); \ \ /* If uploc is upper or lower, then the structure of c is necessarily non-dense (ie: Hermitian, symmetric, or triangular, where part of the matrix is unstored). In these cases, we want to fill in the unstored part of the matrix. How this is done depends on the structure of c. */ \ if ( bli_is_upper_or_lower( uploc ) ) \ { \ /* The Hermitian and symmetric cases are almost identical, so we handle them in one conditional block. */ \ if ( bli_is_hermitian( strucc ) || bli_is_symmetric( strucc ) ) \ { \ /* First we must reflect the region referenced to the opposite side of the diagonal. */ \ c_cast = c_cast + diagoffc * ( doff_t )cs_c + \ -diagoffc * ( doff_t )rs_c; \ bli_negate_diag_offset( &diagoffc ); \ bli_toggle_trans( &transc ); \ if ( bli_is_upper( uploc ) ) diagoffc += 1; \ else if ( bli_is_lower( uploc ) ) diagoffc -= 1; \ \ /* If c is Hermitian, we need to apply a conjugation when copying the region opposite the diagonal. */ \ if ( bli_is_hermitian( strucc ) ) \ transc = bli_trans_toggled_conj( transc ); \ \ /* Copy the data from the region opposite the diagonal of c (as specified by the original value of diagoffc). Notice that we use a diag parameter of non-unit since we can assume nothing about the neighboring off-diagonal. */ \ PASTEMAC2(ch,scal2m,BLIS_TAPI_EX_SUF) \ ( \ diagoffc, \ BLIS_NONUNIT_DIAG, \ uploc, \ transc, \ m, \ n, \ kappa_cast, \ c_cast, rs_c, cs_c, \ p_cast, rs_p, cs_p, \ cntx, \ NULL \ ); \ } \ else /* if ( bli_is_triangular( strucc ) ) */ \ { \ doff_t diagoffp = diagoffc; \ uplo_t uplop = uploc; \ \ /* For this step we need the uplo and diagonal offset of p, which we can derive from the parameters given. */ \ if ( bli_does_trans( transc ) ) \ { \ bli_negate_diag_offset( &diagoffp ); \ bli_toggle_uplo( &uplop ); \ } \ \ /* For triangular matrices, we wish to reference the region strictly opposite the diagonal of C. This amounts to toggling uploc and then shifting the diagonal offset to shrink the stored region (by one diagonal). */ \ bli_toggle_uplo( &uplop ); \ bli_shift_diag_offset_to_shrink_uplo( uplop, &diagoffp ); \ \ /* Set the region opposite the diagonal of p to zero. */ \ PASTEMAC2(ch,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ diagoffp, \ BLIS_NONUNIT_DIAG, \ uplop, \ m, \ n, \ zero, \ p_cast, rs_p, cs_p, \ cntx, \ NULL \ ); \ } \ } \ \ /* The packed memory region was acquired/allocated with "aligned" dimensions (ie: dimensions that were possibly inflated up to a multiple). When these dimension are inflated, it creates empty regions along the bottom and/or right edges of the matrix. If eithe region exists, we set them to zero. This simplifies the register level micro kernel in that it does not need to support different register blockings for the edge cases. */ \ if ( m != m_max ) \ { \ ctype* p_edge = p_cast + (m )*rs_p; \ \ PASTEMAC2(ch,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_max - m, \ n_max, \ zero, \ p_edge, rs_p, cs_p, \ cntx, \ NULL \ ); \ } \ \ if ( n != n_max ) \ { \ ctype* p_edge = p_cast + (n )*cs_p; \ \ PASTEMAC2(ch,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_max, \ n_max - n, \ zero, \ p_edge, rs_p, cs_p, \ cntx, \ NULL \ ); \ } \ } INSERT_GENTFUNC_BASIC0( packm_unb_var1 ) blis-0.6.1/frame/1m/packm/bli_packm_unb_var1.h000066400000000000000000000044111360743507500210520ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_packm_unb_var1 ( obj_t* c, obj_t* p, cntx_t* cntx, cntl_t* cntl, thrinfo_t* thread ); #undef GENTPROT #define GENTPROT( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ struc_t strucc, \ doff_t diagoffc, \ diag_t diagc, \ uplo_t uploc, \ trans_t transc, \ dim_t m, \ dim_t n, \ dim_t m_max, \ dim_t n_max, \ void* kappa, \ void* c, inc_t rs_c, inc_t cs_c, \ void* p, inc_t rs_p, inc_t cs_p, \ cntx_t* cntx \ ); INSERT_GENTPROT_BASIC0( packm_unb_var1 ) blis-0.6.1/frame/1m/packm/bli_packm_var.h000066400000000000000000000064211360743507500201300ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype object-based interfaces. // #undef GENPROT #define GENPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC0(opname) \ ( \ obj_t* c, \ obj_t* p, \ cntx_t* cntx, \ cntl_t* cntl, \ thrinfo_t* t \ ); GENPROT( packm_unb_var1 ) GENPROT( packm_blk_var1 ) // // Prototype BLAS-like interfaces with void pointer operands. // #undef GENTPROT #define GENTPROT( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ struc_t strucc, \ doff_t diagoffc, \ diag_t diagc, \ uplo_t uploc, \ trans_t transc, \ dim_t m, \ dim_t n, \ dim_t m_max, \ dim_t n_max, \ void* kappa, \ void* c, inc_t rs_c, inc_t cs_c, \ void* p, inc_t rs_p, inc_t cs_p, \ cntx_t* cntx \ ); INSERT_GENTPROT_BASIC0( packm_unb_var1 ) #undef GENTPROT #define GENTPROT( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ struc_t strucc, \ doff_t diagoffc, \ diag_t diagc, \ uplo_t uploc, \ trans_t transc, \ pack_t schema, \ bool_t invdiag, \ bool_t revifup, \ bool_t reviflo, \ dim_t m, \ dim_t n, \ dim_t m_max, \ dim_t n_max, \ void* kappa, \ void* c, inc_t rs_c, inc_t cs_c, \ void* p, inc_t rs_p, inc_t cs_p, \ inc_t is_p, \ dim_t pd_p, inc_t ps_p, \ void_fp packm_ker, \ cntx_t* cntx, \ thrinfo_t* thread \ ); INSERT_GENTPROT_BASIC0( packm_blk_var1 ) blis-0.6.1/frame/1m/unpackm/000077500000000000000000000000001360743507500155265ustar00rootroot00000000000000blis-0.6.1/frame/1m/unpackm/bli_unpackm.h000066400000000000000000000035071360743507500201700ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "bli_unpackm_cntl.h" #include "bli_unpackm_check.h" #include "bli_unpackm_int.h" #include "bli_unpackm_unb_var1.h" #include "bli_unpackm_blk_var1.h" #include "bli_unpackm_cxk.h" blis-0.6.1/frame/1m/unpackm/bli_unpackm_blk_var1.c000066400000000000000000000206031360743507500217400ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T unpackm_fp typedef void (*FUNCPTR_T)( struc_t strucc, doff_t diagoffc, diag_t diagc, uplo_t uploc, trans_t transc, dim_t m, dim_t n, dim_t m_panel, dim_t n_panel, void* p, inc_t rs_p, inc_t cs_p, dim_t pd_p, inc_t ps_p, void* c, inc_t rs_c, inc_t cs_c, cntx_t* cntx ); static FUNCPTR_T GENARRAY(ftypes,unpackm_blk_var1); void bli_unpackm_blk_var1 ( obj_t* p, obj_t* c, cntx_t* cntx, cntl_t* cntl, thrinfo_t* thread ) { num_t dt_cp = bli_obj_dt( c ); // Normally we take the parameters from the source argument. But here, // the packm/unpackm framework is not yet solidified enough for us to // assume that at this point struc(P) == struc(C), (ie: since // densification may have marked P's structure as dense when the root // is upper or lower). So, we take the struc field from C, not P. struc_t strucc = bli_obj_struc( c ); doff_t diagoffc = bli_obj_diag_offset( c ); diag_t diagc = bli_obj_diag( c ); uplo_t uploc = bli_obj_uplo( c ); // Again, normally the trans argument is on the source matrix. But we // know that the packed matrix is not transposed. If there is to be a // transposition, it is because C was originally transposed when packed. // Thus, we query C for the trans status, not P. Also, we only query // the trans status (not the conjugation status), since we probably // don't want to un-conjugate if the original matrix was conjugated // when packed. trans_t transc = bli_obj_onlytrans_status( c ); dim_t m_c = bli_obj_length( c ); dim_t n_c = bli_obj_width( c ); dim_t m_panel = bli_obj_panel_length( c ); dim_t n_panel = bli_obj_panel_width( c ); void* buf_p = bli_obj_buffer_at_off( p ); inc_t rs_p = bli_obj_row_stride( p ); inc_t cs_p = bli_obj_col_stride( p ); dim_t pd_p = bli_obj_panel_dim( p ); inc_t ps_p = bli_obj_panel_stride( p ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); FUNCPTR_T f; // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_cp]; // Invoke the function. f( strucc, diagoffc, diagc, uploc, transc, m_c, n_c, m_panel, n_panel, buf_p, rs_p, cs_p, pd_p, ps_p, buf_c, rs_c, cs_c, cntx ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ struc_t strucc, \ doff_t diagoffc, \ diag_t diagc, \ uplo_t uploc, \ trans_t transc, \ dim_t m, \ dim_t n, \ dim_t m_panel, \ dim_t n_panel, \ void* p, inc_t rs_p, inc_t cs_p, \ dim_t pd_p, inc_t ps_p, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx \ ) \ { \ ctype* restrict one = PASTEMAC(ch,1); \ ctype* restrict c_cast = c; \ ctype* restrict p_cast = p; \ ctype* restrict c_begin; \ ctype* restrict p_begin; \ \ dim_t iter_dim; \ dim_t num_iter; \ dim_t it, ic, ip; \ dim_t ic0, ip0; \ doff_t ic_inc, ip_inc; \ doff_t diagoffc_i; \ doff_t diagoffc_inc; \ dim_t panel_len; \ dim_t panel_dim_i; \ dim_t panel_dim_max; \ inc_t vs_c; \ inc_t incc, ldc; \ inc_t ldp; \ dim_t* m_panel_full; \ dim_t* n_panel_full; \ \ \ /* If c needs a transposition, induce it so that we can more simply express the remaining parameters and code. */ \ if ( bli_does_trans( transc ) ) \ { \ bli_swap_incs( &rs_c, &cs_c ); \ bli_negate_diag_offset( &diagoffc ); \ bli_toggle_uplo( &uploc ); \ bli_toggle_trans( &transc ); \ } \ \ /* If the strides of p indicate row storage, then we are packing to column panels; otherwise, if the strides indicate column storage, we are packing to row panels. */ \ if ( bli_is_row_stored_f( m_panel, n_panel, rs_p, cs_p ) ) \ { \ /* Prepare to unpack from column panels. */ \ iter_dim = n; \ panel_len = m; \ panel_dim_max = pd_p; \ incc = cs_c; \ ldc = rs_c; \ vs_c = cs_c; \ diagoffc_inc = -( doff_t)panel_dim_max; \ ldp = rs_p; \ m_panel_full = &m; \ n_panel_full = &panel_dim_i; \ } \ else /* if ( bli_is_col_stored_f( m_panel, n_panel, rs_p, cs_p ) ) */ \ { \ /* Prepare to unpack from row panels. */ \ iter_dim = m; \ panel_len = n; \ panel_dim_max = pd_p; \ incc = rs_c; \ ldc = cs_c; \ vs_c = rs_c; \ diagoffc_inc = ( doff_t )panel_dim_max; \ ldp = cs_p; \ m_panel_full = &panel_dim_i; \ n_panel_full = &n; \ } \ \ /* Compute the total number of iterations we'll need. */ \ num_iter = iter_dim / panel_dim_max + ( iter_dim % panel_dim_max ? 1 : 0 ); \ \ { \ ic0 = 0; \ ic_inc = panel_dim_max; \ ip0 = 0; \ ip_inc = 1; \ } \ \ for ( ic = ic0, ip = ip0, it = 0; it < num_iter; \ ic += ic_inc, ip += ip_inc, it += 1 ) \ { \ panel_dim_i = bli_min( panel_dim_max, iter_dim - ic ); \ \ diagoffc_i = diagoffc + (ip )*diagoffc_inc; \ \ p_begin = p_cast + ip * ps_p; \ c_begin = c_cast + ic * vs_c; \ \ /* If the current panel of C intersects the diagonal AND is upper or lower stored, then we must call scal2m. Otherwise, we can use a variant that is oblivious to structure and storage (and thus tends to be faster). */ \ if ( bli_intersects_diag_n( diagoffc_i, *m_panel_full, *n_panel_full ) && \ bli_is_upper_or_lower( uploc ) ) \ { \ PASTEMAC2(ch,scal2m,BLIS_TAPI_EX_SUF) \ ( \ diagoffc_i, \ diagc, \ uploc, \ transc, \ *m_panel_full, \ *n_panel_full, \ one, \ p_begin, rs_p, cs_p, \ c_begin, rs_c, cs_c, \ cntx, \ NULL \ ); \ } \ else \ { \ /* Pack the current panel. */ \ PASTEMAC(ch,unpackm_cxk) \ ( \ BLIS_NO_CONJUGATE, \ panel_dim_i, \ panel_len, \ one, \ p_begin, ldp, \ c_begin, incc, ldc, \ cntx \ ); \ } \ \ /*PASTEMAC(ch,fprintm)( stdout, "p copied", *m_panel_full, *n_panel_full, \ p_begin, rs_p, cs_p, "%4.1f", "" );*/ \ } \ \ } INSERT_GENTFUNC_BASIC0( unpackm_blk_var1 ) blis-0.6.1/frame/1m/unpackm/bli_unpackm_blk_var1.h000066400000000000000000000044451360743507500217530ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_unpackm_blk_var1 ( obj_t* p, obj_t* c, cntx_t* cntx, cntl_t* cntl, thrinfo_t* thread ); #undef GENTPROT #define GENTPROT( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ struc_t strucc, \ doff_t diagoffc, \ diag_t diagc, \ uplo_t uploc, \ trans_t transc, \ dim_t m, \ dim_t n, \ dim_t m_panel, \ dim_t n_panel, \ void* p, inc_t rs_p, inc_t cs_p, \ dim_t pd_p, inc_t ps_p, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx \ ); INSERT_GENTPROT_BASIC0( unpackm_blk_var1 ) blis-0.6.1/frame/1m/unpackm/bli_unpackm_check.c000066400000000000000000000046321360743507500213200ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_unpackm_int_check ( obj_t* p, obj_t* a, cntx_t* cntx ) { err_t e_val; // Check object datatypes. e_val = bli_check_floating_object( p ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( a ); bli_check_error_code( e_val ); // Check object dimensions. e_val = bli_check_conformal_dims( p, a ); bli_check_error_code( e_val ); // Check pack status. e_val = bli_check_packm_schema_on_unpack( p ); bli_check_error_code( e_val ); // Check control tree pointer // NOTE: We can't check the control tree until we stop interpreting a // NULL value (in bli_unpackm_int()) as a request to skip the operation. //e_val = bli_check_valid_cntl( ( void* )cntl ); //bli_check_error_code( e_val ); } blis-0.6.1/frame/1m/unpackm/bli_unpackm_check.h000066400000000000000000000033541360743507500213250ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_unpackm_int_check ( obj_t* p, obj_t* a, cntx_t* cntx ); blis-0.6.1/frame/1m/unpackm/bli_unpackm_cntl.c000066400000000000000000000052601360743507500212010ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" cntl_t* bli_unpackm_cntl_create_node ( rntm_t* rntm, void_fp var_func, void_fp unpackm_var_func, cntl_t* sub_node ) { cntl_t* cntl; unpackm_params_t* params; // NOTE: If this function is ever called, figure out whether the // bli_malloc_intl() below needs to be changed to bli_sba_acquire(). bli_abort(); // Allocate an unpackm_params_t struct. params = bli_malloc_intl( sizeof( unpackm_params_t ) ); // Initialize the unpackm_params_t struct. params->size = sizeof( unpackm_params_t ); params->var_func = unpackm_var_func; // It's important that we set the bszid field to BLIS_NO_PART to indicate // that no blocksize partitioning is performed. bli_cntl_free() will rely // on this information to know how to step through the thrinfo_t tree in // sync with the cntl_t tree. cntl = bli_cntl_create_node ( rntm, BLIS_NOID, BLIS_NO_PART, var_func, params, sub_node ); return cntl; } blis-0.6.1/frame/1m/unpackm/bli_unpackm_cntl.h000066400000000000000000000043251360743507500212070ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ struct unpackm_params_s { uint64_t size; // size field must be present and come first. unpackm_var_oft var_func; }; typedef struct unpackm_params_s unpackm_params_t; #define bli_cntl_unpackm_params_var_func( cntl ) \ \ ( ( (unpackm_params_t*)(cntl)->params )->var_func ) // ----------------------------------------------------------------------------- cntl_t* bli_unpackm_cntl_create_node ( rntm_t* rntm, void_fp var_func, void_fp unpackm_var_func, cntl_t* sub_node ); blis-0.6.1/frame/1m/unpackm/bli_unpackm_cxk.c000066400000000000000000000060071360743507500210260ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ conj_t conjp, \ dim_t panel_dim, \ dim_t panel_len, \ ctype* kappa, \ ctype* p, inc_t ldp, \ ctype* a, inc_t inca, inc_t lda, \ cntx_t* cntx \ ) \ { \ num_t dt = PASTEMAC(ch,type); \ l1mkr_t ker_id = panel_dim; \ \ PASTECH2(ch,opname,_ker_ft) f; \ \ /* Query the context for the unpackm kernel corresponding to the current panel dimension, or kernel id. If the id is invalid, the function will return NULL. */ \ f = bli_cntx_get_unpackm_ker_dt( dt, ker_id, cntx ); \ \ /* If there exists a kernel implementation for the micro-panel dimension provided, we invoke the implementation. Otherwise, we use scal2m. */ \ if ( f != NULL ) \ { \ f \ ( \ conjp, \ panel_len, \ kappa, \ p, ldp, \ a, inca, lda, \ cntx \ ); \ } \ else \ { \ trans_t transp = ( trans_t )conjp; \ \ /* Treat the micro-panel as panel_dim x panel_len and column-stored (unit row stride). */ \ PASTEMAC2(ch,scal2m,BLIS_TAPI_EX_SUF) \ ( \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ transp, \ panel_dim, \ panel_len, \ kappa, \ p, 1, ldp, \ a, inca, lda, \ cntx, \ NULL \ ); \ } \ } INSERT_GENTFUNC_BASIC0( unpackm_cxk ) blis-0.6.1/frame/1m/unpackm/bli_unpackm_cxk.h000066400000000000000000000037601360743507500210360ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #undef GENTPROT #define GENTPROT( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ conj_t conjp, \ dim_t panel_dim, \ dim_t panel_len, \ ctype* kappa, \ ctype* p, inc_t ldp, \ ctype* a, inc_t inca, inc_t lda, \ cntx_t* cntx \ ); INSERT_GENTPROT_BASIC0( unpackm_cxk ) blis-0.6.1/frame/1m/unpackm/bli_unpackm_int.c000066400000000000000000000050001360743507500210230ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_unpackm_int ( obj_t* p, obj_t* a, cntx_t* cntx, cntl_t* cntl, thrinfo_t* thread ) { bli_init_once(); unpackm_var_oft f; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_unpackm_int_check( p, a, cntx ); // If p was aliased to a during the pack stage (because it was already // in an acceptable packed/contiguous format), then no unpack is actually // necessary, so we return. if ( bli_obj_is_alias_of( p, a ) ) return; // Extract the function pointer from the current control tree node. f = bli_cntl_unpackm_params_var_func( cntl ); // Invoke the variant. if ( bli_thread_am_ochief( thread ) ) { f ( p, a, cntx, cntl, thread ); } // Barrier so that unpacking is done before computation. bli_thread_obarrier( thread ); } blis-0.6.1/frame/1m/unpackm/bli_unpackm_int.h000066400000000000000000000034251360743507500210410ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_unpackm_int ( obj_t* p, obj_t* a, cntx_t* cntx, cntl_t* cntl, thrinfo_t* thread ); blis-0.6.1/frame/1m/unpackm/bli_unpackm_unb_var1.c000066400000000000000000000076441360743507500217660ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T unpackm_fp typedef void (*FUNCPTR_T)( doff_t diagoffp, uplo_t uplop, trans_t transp, dim_t m, dim_t n, void* p, inc_t rs_p, inc_t cs_p, void* c, inc_t rs_c, inc_t cs_c, cntx_t* cntx ); static FUNCPTR_T GENARRAY(ftypes,unpackm_unb_var1); void bli_unpackm_unb_var1 ( obj_t* p, obj_t* c, cntx_t* cntx, cntl_t* cntl, thrinfo_t* thread ) { num_t dt_pc = bli_obj_dt( p ); doff_t diagoffp = bli_obj_diag_offset( p ); uplo_t uplop = bli_obj_uplo( p ); trans_t transc = bli_obj_onlytrans_status( c ); dim_t m_c = bli_obj_length( c ); dim_t n_c = bli_obj_width( c ); void* buf_p = bli_obj_buffer_at_off( p ); inc_t rs_p = bli_obj_row_stride( p ); inc_t cs_p = bli_obj_col_stride( p ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); FUNCPTR_T f; // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_pc]; // Invoke the function. f( diagoffp, uplop, transc, m_c, n_c, buf_p, rs_p, cs_p, buf_c, rs_c, cs_c, cntx ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, varname ) \ \ void PASTEMAC(ch,varname)( \ doff_t diagoffp, \ uplo_t uplop, \ trans_t transp, \ dim_t m, \ dim_t n, \ void* p, inc_t rs_p, inc_t cs_p, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx \ ) \ { \ ctype* p_cast = p; \ ctype* c_cast = c; \ \ PASTEMAC2(ch,copym,BLIS_TAPI_EX_SUF) \ ( \ diagoffp,\ BLIS_NONUNIT_DIAG, \ uplop, \ transp, \ m, \ n, \ p_cast, rs_p, cs_p, \ c_cast, rs_c, cs_c, \ cntx, \ NULL \ ); \ } INSERT_GENTFUNC_BASIC( unpackm, unpackm_unb_var1 ) blis-0.6.1/frame/1m/unpackm/bli_unpackm_unb_var1.h000066400000000000000000000042231360743507500217610ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_unpackm_unb_var1 ( obj_t* p, obj_t* c, cntx_t* cntx, cntl_t* cntl, thrinfo_t* thread ); #undef GENTPROT #define GENTPROT( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ doff_t diagoffp, \ uplo_t uplop, \ trans_t transp, \ dim_t m, \ dim_t n, \ void* p, inc_t rs_p, inc_t cs_p, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx \ ); INSERT_GENTPROT_BASIC0( unpackm_unb_var1 ) blis-0.6.1/frame/2/000077500000000000000000000000001360743507500137145ustar00rootroot00000000000000blis-0.6.1/frame/2/bli_l2.h000066400000000000000000000046161360743507500152370ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "bli_l2_check.h" // Define function types. #include "bli_l2_ft_unb.h" // Prototype object APIs (expert and non-expert). #include "bli_oapi_ex.h" #include "bli_l2_oapi.h" #include "bli_oapi_ba.h" #include "bli_l2_oapi.h" // Prototype typed APIs (expert and non-expert). #include "bli_tapi_ex.h" #include "bli_l2_tapi.h" #include "bli_l2_ft.h" #include "bli_tapi_ba.h" #include "bli_l2_tapi.h" #include "bli_l2_ft.h" // Generate function pointer arrays for tapi functions (expert only). #include "bli_l2_fpa.h" // Operation-specific headers #include "bli_gemv.h" #include "bli_ger.h" #include "bli_hemv.h" #include "bli_her.h" #include "bli_her2.h" #include "bli_symv.h" #include "bli_syr.h" #include "bli_syr2.h" #include "bli_trmv.h" #include "bli_trsv.h" blis-0.6.1/frame/2/bli_l2_check.c000066400000000000000000000244151360743507500163660ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_gemv_check ( obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y ) { err_t e_val; // Perform checks common to gemv/hemv/symv/trmv/trsv. bli_xxmv_check( alpha, a, x, beta, y ); // Check object structure. e_val = bli_check_general_object( a ); bli_check_error_code( e_val ); // Check for consistent datatypes. e_val = bli_check_consistent_object_datatypes( a, x ); bli_check_error_code( e_val ); e_val = bli_check_consistent_object_datatypes( a, y ); bli_check_error_code( e_val ); } void bli_hemv_check ( obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y ) { err_t e_val; // Perform checks common to gemv/hemv/symv/trmv/trsv. bli_xxmv_check( alpha, a, x, beta, y ); // Check squareness. e_val = bli_check_square_object( a ); bli_check_error_code( e_val ); // Check object structure. e_val = bli_check_hermitian_object( a ); bli_check_error_code( e_val ); // Check for consistent datatypes. e_val = bli_check_consistent_object_datatypes( a, x ); bli_check_error_code( e_val ); e_val = bli_check_consistent_object_datatypes( a, y ); bli_check_error_code( e_val ); } void bli_symv_check ( obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y ) { err_t e_val; // Perform checks common to gemv/hemv/symv/trmv/trsv. bli_xxmv_check( alpha, a, x, beta, y ); // Check squareness. e_val = bli_check_square_object( a ); bli_check_error_code( e_val ); // Check object structure. e_val = bli_check_symmetric_object( a ); bli_check_error_code( e_val ); // Check for consistent datatypes. e_val = bli_check_consistent_object_datatypes( a, x ); bli_check_error_code( e_val ); e_val = bli_check_consistent_object_datatypes( a, y ); bli_check_error_code( e_val ); } void bli_trmv_check ( obj_t* alpha, obj_t* a, obj_t* x ) { err_t e_val; // Perform checks common to gemv/hemv/symv/trmv/trsv. bli_xxmv_check( alpha, a, x, alpha, x ); // Check squareness. e_val = bli_check_square_object( a ); bli_check_error_code( e_val ); // Check object structure. e_val = bli_check_triangular_object( a ); bli_check_error_code( e_val ); // Check for consistent datatypes. e_val = bli_check_consistent_object_datatypes( a, x ); bli_check_error_code( e_val ); } void bli_trsv_check ( obj_t* alpha, obj_t* a, obj_t* x ) { err_t e_val; // Perform checks common to gemv/hemv/symv/trmv/trsv. bli_xxmv_check( alpha, a, x, alpha, x ); // Check squareness. e_val = bli_check_square_object( a ); bli_check_error_code( e_val ); // Check object structure. e_val = bli_check_triangular_object( a ); bli_check_error_code( e_val ); // Check for consistent datatypes. e_val = bli_check_consistent_object_datatypes( a, x ); bli_check_error_code( e_val ); } void bli_ger_check ( obj_t* alpha, obj_t* x, obj_t* y, obj_t* a ) { err_t e_val; // Perform checks common to ger/her/her2/syr/syr2. bli_xxr_check( alpha, x, y, a ); // Check object structure. e_val = bli_check_general_object( a ); bli_check_error_code( e_val ); // Check for consistent datatypes. e_val = bli_check_consistent_object_datatypes( a, x ); bli_check_error_code( e_val ); e_val = bli_check_consistent_object_datatypes( a, y ); bli_check_error_code( e_val ); } void bli_her_check ( obj_t* alpha, obj_t* x, obj_t* a ) { err_t e_val; // Perform checks common to ger/her/her2/syr/syr2. bli_xxr_check( alpha, x, x, a ); // Check squareness. e_val = bli_check_square_object( a ); bli_check_error_code( e_val ); // Check object structure. e_val = bli_check_hermitian_object( a ); bli_check_error_code( e_val ); // Check for consistent datatypes. e_val = bli_check_consistent_object_datatypes( a, x ); bli_check_error_code( e_val ); } void bli_her2_check ( obj_t* alpha, obj_t* x, obj_t* y, obj_t* a ) { err_t e_val; // Perform checks common to ger/her/her2/syr/syr2. bli_xxr_check( alpha, x, y, a ); // Check squareness. e_val = bli_check_square_object( a ); bli_check_error_code( e_val ); // Check object structure. e_val = bli_check_hermitian_object( a ); bli_check_error_code( e_val ); // Check for consistent datatypes. e_val = bli_check_consistent_object_datatypes( a, x ); bli_check_error_code( e_val ); e_val = bli_check_consistent_object_datatypes( a, y ); bli_check_error_code( e_val ); } void bli_syr_check ( obj_t* alpha, obj_t* x, obj_t* a ) { err_t e_val; // Perform checks common to ger/her/her2/syr/syr2. bli_xxr_check( alpha, x, x, a ); // Check squareness. e_val = bli_check_square_object( a ); bli_check_error_code( e_val ); // Check object structure. e_val = bli_check_symmetric_object( a ); bli_check_error_code( e_val ); // Check for consistent datatypes. e_val = bli_check_consistent_object_datatypes( a, x ); bli_check_error_code( e_val ); } void bli_syr2_check ( obj_t* alpha, obj_t* x, obj_t* y, obj_t* a ) { err_t e_val; // Perform checks common to ger/her/her2/syr/syr2. bli_xxr_check( alpha, x, y, a ); // Check squareness. e_val = bli_check_square_object( a ); bli_check_error_code( e_val ); // Check object structure. e_val = bli_check_symmetric_object( a ); bli_check_error_code( e_val ); // Check for consistent datatypes. e_val = bli_check_consistent_object_datatypes( a, x ); bli_check_error_code( e_val ); e_val = bli_check_consistent_object_datatypes( a, y ); bli_check_error_code( e_val ); } // ----------------------------------------------------------------------------- void bli_xxmv_check ( obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y ) { err_t e_val; // Check object datatypes. e_val = bli_check_noninteger_object( alpha ); bli_check_error_code( e_val ); e_val = bli_check_noninteger_object( beta ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( a ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( x ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( y ); bli_check_error_code( e_val ); // Check object dimensions. e_val = bli_check_scalar_object( alpha ); bli_check_error_code( e_val ); e_val = bli_check_scalar_object( beta ); bli_check_error_code( e_val ); e_val = bli_check_matrix_object( a ); bli_check_error_code( e_val ); e_val = bli_check_vector_object( x ); bli_check_error_code( e_val ); e_val = bli_check_vector_object( y ); bli_check_error_code( e_val ); e_val = bli_check_vector_dim_equals( x, bli_obj_width_after_trans( a ) ); bli_check_error_code( e_val ); e_val = bli_check_vector_dim_equals( y, bli_obj_length_after_trans( a ) ); bli_check_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( alpha ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( a ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( x ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( beta ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( y ); bli_check_error_code( e_val ); } void bli_xxr_check ( obj_t* alpha, obj_t* x, obj_t* y, obj_t* a ) { err_t e_val; // Check object datatypes. e_val = bli_check_noninteger_object( alpha ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( x ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( y ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( a ); bli_check_error_code( e_val ); // Check object dimensions. e_val = bli_check_scalar_object( alpha ); bli_check_error_code( e_val ); e_val = bli_check_vector_object( x ); bli_check_error_code( e_val ); e_val = bli_check_vector_object( y ); bli_check_error_code( e_val ); e_val = bli_check_matrix_object( a ); bli_check_error_code( e_val ); e_val = bli_check_vector_dim_equals( x, bli_obj_length_after_trans( a ) ); bli_check_error_code( e_val ); e_val = bli_check_vector_dim_equals( y, bli_obj_width_after_trans( a ) ); bli_check_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( alpha ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( x ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( y ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( a ); bli_check_error_code( e_val ); } blis-0.6.1/frame/2/bli_l2_check.h000066400000000000000000000055351360743507500163750ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype object-based check functions. // #undef GENPROT #define GENPROT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* alpha, \ obj_t* a, \ obj_t* x, \ obj_t* beta, \ obj_t* y \ ); GENPROT( gemv ) GENPROT( hemv ) GENPROT( symv ) #undef GENPROT #define GENPROT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* alpha, \ obj_t* x, \ obj_t* y, \ obj_t* a \ ); GENPROT( ger ) GENPROT( her2 ) GENPROT( syr2 ) #undef GENPROT #define GENPROT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* alpha, \ obj_t* x, \ obj_t* a \ ); GENPROT( her ) GENPROT( syr ) #undef GENPROT #define GENPROT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* alpha, \ obj_t* a, \ obj_t* x \ ); GENPROT( trmv ) GENPROT( trsv ) // ----------------------------------------------------------------------------- void bli_xxmv_check ( obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y ); void bli_xxr_check ( obj_t* alpha, obj_t* x, obj_t* y, obj_t* a ); blis-0.6.1/frame/2/bli_l2_fpa.c000066400000000000000000000066671360743507500160700ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define function pointer query interfaces. // #undef GENFRONT #define GENFRONT( opname ) \ \ GENARRAY_FPA( PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft), \ PASTECH(opname,BLIS_TAPI_EX_SUF) ); \ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( num_t dt ) \ { \ return PASTECH2(opname,BLIS_TAPI_EX_SUF,_fpa)[ dt ]; \ } GENFRONT( gemv ) GENFRONT( ger ) GENFRONT( hemv ) GENFRONT( symv ) GENFRONT( her ) GENFRONT( syr ) GENFRONT( her2 ) GENFRONT( syr2 ) GENFRONT( trmv ) GENFRONT( trsv ) // // Define function pointer query interfaces for level-2 implementations. // #undef GENFRONT #define GENFRONT( opname, varname ) \ \ GENARRAY_FPA( PASTECH2(opname,_unb,_vft), \ varname ); \ \ PASTECH2(opname,_unb,_vft) \ PASTEMAC(varname,_qfp)( num_t dt ) \ { \ return PASTECH(varname,_fpa)[ dt ]; \ } GENFRONT( gemv, gemv_unb_var1 ) GENFRONT( gemv, gemv_unb_var2 ) GENFRONT( gemv, gemv_unf_var1 ) GENFRONT( gemv, gemv_unf_var2 ) GENFRONT( ger, ger_unb_var1 ) GENFRONT( ger, ger_unb_var2 ) GENFRONT( hemv, hemv_unb_var1 ) GENFRONT( hemv, hemv_unb_var2 ) GENFRONT( hemv, hemv_unb_var3 ) GENFRONT( hemv, hemv_unb_var4 ) GENFRONT( hemv, hemv_unf_var1 ) GENFRONT( hemv, hemv_unf_var3 ) GENFRONT( hemv, hemv_unf_var1a ) GENFRONT( hemv, hemv_unf_var3a ) GENFRONT( her, her_unb_var1 ) GENFRONT( her, her_unb_var2 ) GENFRONT( her2, her2_unb_var1 ) GENFRONT( her2, her2_unb_var2 ) GENFRONT( her2, her2_unb_var3 ) GENFRONT( her2, her2_unb_var4 ) GENFRONT( her2, her2_unf_var1 ) GENFRONT( her2, her2_unf_var4 ) GENFRONT( trmv, trmv_unb_var1 ) GENFRONT( trmv, trmv_unb_var2 ) GENFRONT( trmv, trmv_unf_var1 ) GENFRONT( trmv, trmv_unf_var2 ) GENFRONT( trsv, trsv_unb_var1 ) GENFRONT( trsv, trsv_unb_var2 ) GENFRONT( trsv, trsv_unf_var1 ) GENFRONT( trsv, trsv_unf_var2 ) blis-0.6.1/frame/2/bli_l2_fpa.h000066400000000000000000000061311360743507500160570ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype function pointer query interface. // #undef GENPROT #define GENPROT( opname ) \ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( num_t dt ); GENPROT( gemv ) GENPROT( ger ) GENPROT( hemv ) GENPROT( symv ) GENPROT( her ) GENPROT( syr ) GENPROT( her2 ) GENPROT( syr2 ) GENPROT( trmv ) GENPROT( trsv ) // // Prototype function pointer query interfaces for level-2 implementations. // #undef GENPROT #define GENPROT( opname, varname ) \ \ PASTECH2(opname,_unb,_vft) \ PASTEMAC(varname,_qfp)( num_t dt ); GENPROT( gemv, gemv_unb_var1 ) GENPROT( gemv, gemv_unb_var2 ) GENPROT( gemv, gemv_unf_var1 ) GENPROT( gemv, gemv_unf_var2 ) GENPROT( ger, ger_unb_var1 ) GENPROT( ger, ger_unb_var2 ) GENPROT( hemv, hemv_unb_var1 ) GENPROT( hemv, hemv_unb_var2 ) GENPROT( hemv, hemv_unb_var3 ) GENPROT( hemv, hemv_unb_var4 ) GENPROT( hemv, hemv_unf_var1 ) GENPROT( hemv, hemv_unf_var3 ) GENPROT( hemv, hemv_unf_var1a ) GENPROT( hemv, hemv_unf_var3a ) GENPROT( her, her_unb_var1 ) GENPROT( her, her_unb_var2 ) GENPROT( her2, her2_unb_var1 ) GENPROT( her2, her2_unb_var2 ) GENPROT( her2, her2_unb_var3 ) GENPROT( her2, her2_unb_var4 ) GENPROT( her2, her2_unf_var1 ) GENPROT( her2, her2_unf_var4 ) GENPROT( trmv, trmv_unb_var1 ) GENPROT( trmv, trmv_unb_var2 ) GENPROT( trmv, trmv_unf_var1 ) GENPROT( trmv, trmv_unf_var2 ) GENPROT( trsv, trsv_unb_var1 ) GENPROT( trsv, trsv_unb_var2 ) GENPROT( trsv, trsv_unf_var1 ) GENPROT( trsv, trsv_unf_var2 ) blis-0.6.1/frame/2/bli_l2_ft.h000066400000000000000000000111261360743507500157220ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // -- Level-2 function types --------------------------------------------------- // // gemv #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \ ( \ trans_t transa, \ conj_t conjx, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* x, inc_t incx, \ ctype* beta, \ ctype* y, inc_t incy \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTDEF( gemv ) // ger #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \ ( \ conj_t conjx, \ conj_t conjy, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* x, inc_t incx, \ ctype* y, inc_t incy, \ ctype* a, inc_t rs_a, inc_t cs_a \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTDEF( ger ) // hemv, symv #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \ ( \ uplo_t uploa, \ conj_t conja, \ conj_t conjx, \ dim_t m, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* x, inc_t incx, \ ctype* beta, \ ctype* y, inc_t incy \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTDEF( hemv ) INSERT_GENTDEF( symv ) // her #undef GENTDEFR #define GENTDEFR( ctype, ctype_r, ch, chr, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \ ( \ uplo_t uploa, \ conj_t conjx, \ dim_t m, \ ctype_r* alpha, \ ctype* x, inc_t incx, \ ctype* a, inc_t rs_a, inc_t cs_a \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTDEFR( her ) // syr #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \ ( \ uplo_t uploa, \ conj_t conjx, \ dim_t m, \ ctype* alpha, \ ctype* x, inc_t incx, \ ctype* a, inc_t rs_a, inc_t cs_a \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTDEF( syr ) // her2, syr2 #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \ ( \ uplo_t uploa, \ conj_t conjx, \ conj_t conjy, \ dim_t m, \ ctype* alpha, \ ctype* x, inc_t incx, \ ctype* y, inc_t incy, \ ctype* a, inc_t rs_a, inc_t cs_a \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTDEF( her2 ) INSERT_GENTDEF( syr2 ) // trmv, trsv #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \ ( \ uplo_t uploa, \ trans_t transa, \ diag_t diaga, \ dim_t m, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* x, inc_t incx \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTDEF( trmv ) INSERT_GENTDEF( trsv ) blis-0.6.1/frame/2/bli_l2_ft_unb.h000066400000000000000000000105371360743507500165730ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_L2_FT_UNB_H #define BLIS_L2_FT_UNB_H // // -- Level-2 function types --------------------------------------------------- // // gemv #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,_unb,tsuf)) \ ( \ trans_t transa, \ conj_t conjx, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* x, inc_t incx, \ ctype* beta, \ ctype* y, inc_t incy, \ cntx_t* cntx \ ); INSERT_GENTDEF( gemv ) // ger #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,_unb,tsuf)) \ ( \ conj_t conjx, \ conj_t conjy, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* x, inc_t incx, \ ctype* y, inc_t incy, \ ctype* a, inc_t rs_a, inc_t cs_a, \ cntx_t* cntx \ ); INSERT_GENTDEF( ger ) // hemv (and symv) #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,_unb,tsuf)) \ ( \ uplo_t uploa, \ conj_t conja, \ conj_t conjx, \ conj_t conjh, \ dim_t m, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* x, inc_t incx, \ ctype* beta, \ ctype* y, inc_t incy, \ cntx_t* cntx \ ); INSERT_GENTDEF( hemv ) // her (and syr) #undef GENTDEFR #define GENTDEFR( ctype, ctype_r, ch, chr, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,_unb,tsuf)) \ ( \ uplo_t uploa, \ conj_t conjx, \ conj_t conjh, \ dim_t m, \ ctype* alpha, /* complex alpha allows her variants to also perform syr. */ \ ctype* x, inc_t incx, \ ctype* a, inc_t rs_a, inc_t cs_a, \ cntx_t* cntx \ ); INSERT_GENTDEFR( her ) // her2 (and syr2) #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,_unb,tsuf)) \ ( \ uplo_t uploa, \ conj_t conjx, \ conj_t conjy, \ conj_t conjh, \ dim_t m, \ ctype* alpha, \ ctype* x, inc_t incx, \ ctype* y, inc_t incy, \ ctype* a, inc_t rs_a, inc_t cs_a, \ cntx_t* cntx \ ); INSERT_GENTDEF( her2 ) // trmv (and trsv) #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,_unb,tsuf)) \ ( \ uplo_t uploa, \ trans_t transa, \ diag_t diaga, \ dim_t m, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* x, inc_t incx, \ cntx_t* cntx \ ); INSERT_GENTDEF( trmv ) INSERT_GENTDEF( trsv ) #endif blis-0.6.1/frame/2/bli_l2_oapi.c000066400000000000000000000301501360743507500162320ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // Guard the function definitions so that they are only compiled when // #included from files that define the object API macros. #ifdef BLIS_ENABLE_OAPI // // Define object-based interfaces. // #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* alpha, \ obj_t* a, \ obj_t* x, \ obj_t* beta, \ obj_t* y \ BLIS_OAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_OAPI_EX_DECLS \ \ num_t dt = bli_obj_dt( a ); \ \ trans_t transa = bli_obj_conjtrans_status( a ); \ conj_t conjx = bli_obj_conj_status( x ); \ dim_t m = bli_obj_length( a ); \ dim_t n = bli_obj_width( a ); \ void* buf_a = bli_obj_buffer_at_off( a ); \ inc_t rs_a = bli_obj_row_stride( a ); \ inc_t cs_a = bli_obj_col_stride( a ); \ void* buf_x = bli_obj_buffer_at_off( x ); \ inc_t incx = bli_obj_vector_inc( x ); \ void* buf_y = bli_obj_buffer_at_off( y ); \ inc_t incy = bli_obj_vector_inc( y ); \ \ void* buf_alpha; \ void* buf_beta; \ \ obj_t alpha_local; \ obj_t beta_local; \ \ if ( bli_error_checking_is_enabled() ) \ PASTEMAC(opname,_check)( alpha, a, x, beta, y ); \ \ /* Create local copy-casts of scalars (and apply internal conjugation as needed). */ \ bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ alpha, &alpha_local ); \ bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ beta, &beta_local ); \ buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \ buf_beta = bli_obj_buffer_for_1x1( dt, &beta_local ); \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) f = \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \ \ f \ ( \ transa, \ conjx, \ m, \ n, \ buf_alpha, \ buf_a, rs_a, cs_a, \ buf_x, incx, \ buf_beta, \ buf_y, incy, \ cntx, \ rntm \ ); \ } GENFRONT( gemv ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* alpha, \ obj_t* x, \ obj_t* y, \ obj_t* a \ BLIS_OAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_OAPI_EX_DECLS \ \ num_t dt = bli_obj_dt( a ); \ \ conj_t conjx = bli_obj_conj_status( x ); \ conj_t conjy = bli_obj_conj_status( y ); \ dim_t m = bli_obj_length( a ); \ dim_t n = bli_obj_width( a ); \ void* buf_x = bli_obj_buffer_at_off( x ); \ inc_t incx = bli_obj_vector_inc( x ); \ void* buf_y = bli_obj_buffer_at_off( y ); \ inc_t incy = bli_obj_vector_inc( y ); \ void* buf_a = bli_obj_buffer_at_off( a ); \ inc_t rs_a = bli_obj_row_stride( a ); \ inc_t cs_a = bli_obj_col_stride( a ); \ \ void* buf_alpha; \ \ obj_t alpha_local; \ \ if ( bli_error_checking_is_enabled() ) \ PASTEMAC(opname,_check)( alpha, x, y, a ); \ \ /* Create local copy-casts of scalars (and apply internal conjugation as needed). */ \ bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ alpha, &alpha_local ); \ buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) f = \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \ \ f \ ( \ conjx, \ conjy, \ m, \ n, \ buf_alpha, \ buf_x, incx, \ buf_y, incy, \ buf_a, rs_a, cs_a, \ cntx, \ rntm \ ); \ } GENFRONT( ger ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* alpha, \ obj_t* a, \ obj_t* x, \ obj_t* beta, \ obj_t* y \ BLIS_OAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_OAPI_EX_DECLS \ \ num_t dt = bli_obj_dt( a ); \ \ uplo_t uploa = bli_obj_uplo( a ); \ conj_t conja = bli_obj_conj_status( a ); \ conj_t conjx = bli_obj_conj_status( x ); \ dim_t m = bli_obj_length( a ); \ void* buf_a = bli_obj_buffer_at_off( a ); \ inc_t rs_a = bli_obj_row_stride( a ); \ inc_t cs_a = bli_obj_col_stride( a ); \ void* buf_x = bli_obj_buffer_at_off( x ); \ inc_t incx = bli_obj_vector_inc( x ); \ void* buf_y = bli_obj_buffer_at_off( y ); \ inc_t incy = bli_obj_vector_inc( y ); \ \ void* buf_alpha; \ void* buf_beta; \ \ obj_t alpha_local; \ obj_t beta_local; \ \ if ( bli_error_checking_is_enabled() ) \ PASTEMAC(opname,_check)( alpha, a, x, beta, y ); \ \ /* Create local copy-casts of scalars (and apply internal conjugation as needed). */ \ bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ alpha, &alpha_local ); \ bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ beta, &beta_local ); \ buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \ buf_beta = bli_obj_buffer_for_1x1( dt, &beta_local ); \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) f = \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \ \ f \ ( \ uploa, \ conja, \ conjx, \ m, \ buf_alpha, \ buf_a, rs_a, cs_a, \ buf_x, incx, \ buf_beta, \ buf_y, incy, \ cntx, \ rntm \ ); \ } GENFRONT( hemv ) GENFRONT( symv ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* alpha, \ obj_t* x, \ obj_t* a \ BLIS_OAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_OAPI_EX_DECLS \ \ num_t dt = bli_obj_dt( a ); \ \ uplo_t uploa = bli_obj_uplo( a ); \ conj_t conjx = bli_obj_conj_status( x ); \ dim_t m = bli_obj_length( a ); \ void* buf_x = bli_obj_buffer_at_off( x ); \ inc_t incx = bli_obj_vector_inc( x ); \ void* buf_a = bli_obj_buffer_at_off( a ); \ inc_t rs_a = bli_obj_row_stride( a ); \ inc_t cs_a = bli_obj_col_stride( a ); \ \ void* buf_alpha; \ \ obj_t alpha_local; \ \ if ( bli_error_checking_is_enabled() ) \ PASTEMAC(opname,_check)( alpha, x, a ); \ \ /* Create local copy-casts of scalars (and apply internal conjugation as needed). */ \ bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ alpha, &alpha_local ); \ buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) f = \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \ \ f \ ( \ uploa, \ conjx, \ m, \ buf_alpha, \ buf_x, incx, \ buf_a, rs_a, cs_a, \ cntx, \ rntm \ ); \ } GENFRONT( her ) GENFRONT( syr ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* alpha, \ obj_t* x, \ obj_t* y, \ obj_t* a \ BLIS_OAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_OAPI_EX_DECLS \ \ num_t dt = bli_obj_dt( a ); \ \ uplo_t uploa = bli_obj_uplo( a ); \ conj_t conjx = bli_obj_conj_status( x ); \ conj_t conjy = bli_obj_conj_status( y ); \ dim_t m = bli_obj_length( a ); \ void* buf_x = bli_obj_buffer_at_off( x ); \ inc_t incx = bli_obj_vector_inc( x ); \ void* buf_y = bli_obj_buffer_at_off( y ); \ inc_t incy = bli_obj_vector_inc( y ); \ void* buf_a = bli_obj_buffer_at_off( a ); \ inc_t rs_a = bli_obj_row_stride( a ); \ inc_t cs_a = bli_obj_col_stride( a ); \ \ void* buf_alpha; \ \ obj_t alpha_local; \ \ if ( bli_error_checking_is_enabled() ) \ PASTEMAC(opname,_check)( alpha, x, y, a ); \ \ /* Create local copy-casts of scalars (and apply internal conjugation as needed). */ \ bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ alpha, &alpha_local ); \ buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) f = \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \ \ f \ ( \ uploa, \ conjx, \ conjy, \ m, \ buf_alpha, \ buf_x, incx, \ buf_y, incy, \ buf_a, rs_a, cs_a, \ cntx, \ rntm \ ); \ } GENFRONT( her2 ) GENFRONT( syr2 ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* alpha, \ obj_t* a, \ obj_t* x \ BLIS_OAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_OAPI_EX_DECLS \ \ num_t dt = bli_obj_dt( a ); \ \ uplo_t uploa = bli_obj_uplo( a ); \ trans_t transa = bli_obj_conjtrans_status( a ); \ diag_t diaga = bli_obj_diag( a ); \ dim_t m = bli_obj_length( a ); \ void* buf_a = bli_obj_buffer_at_off( a ); \ inc_t rs_a = bli_obj_row_stride( a ); \ inc_t cs_a = bli_obj_col_stride( a ); \ void* buf_x = bli_obj_buffer_at_off( x ); \ inc_t incx = bli_obj_vector_inc( x ); \ \ void* buf_alpha; \ \ obj_t alpha_local; \ \ if ( bli_error_checking_is_enabled() ) \ PASTEMAC(opname,_check)( alpha, a, x ); \ \ /* Create local copy-casts of scalars (and apply internal conjugation as needed). */ \ bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ alpha, &alpha_local ); \ buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) f = \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \ \ f \ ( \ uploa, \ transa, \ diaga, \ m, \ buf_alpha, \ buf_a, rs_a, cs_a, \ buf_x, incx, \ cntx, \ rntm \ ); \ } GENFRONT( trmv ) GENFRONT( trsv ) #endif blis-0.6.1/frame/2/bli_l2_oapi.h000066400000000000000000000053221360743507500162420ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype object-based interfaces. // #undef GENPROT #define GENPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* alpha, \ obj_t* a, \ obj_t* x, \ obj_t* beta, \ obj_t* y \ BLIS_OAPI_EX_PARAMS \ ); GENPROT( gemv ) GENPROT( hemv ) GENPROT( symv ) #undef GENPROT #define GENPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* alpha, \ obj_t* x, \ obj_t* y, \ obj_t* a \ BLIS_OAPI_EX_PARAMS \ ); GENPROT( ger ) GENPROT( her2 ) GENPROT( syr2 ) #undef GENPROT #define GENPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* alpha, \ obj_t* x, \ obj_t* a \ BLIS_OAPI_EX_PARAMS \ ); GENPROT( her ) GENPROT( syr ) #undef GENPROT #define GENPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* alpha, \ obj_t* a, \ obj_t* x \ BLIS_OAPI_EX_PARAMS \ ); GENPROT( trmv ) GENPROT( trsv ) blis-0.6.1/frame/2/bli_l2_oapi_ba.c000066400000000000000000000036671360743507500167110ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // Include cpp macros that instantiate the API definition templates as // omitting expert parameters. #include "bli_oapi_ba.h" // Define the macro protecting the object API definitions. #define BLIS_ENABLE_OAPI // Include the object API definitions here. #include "bli_l2_oapi.c" blis-0.6.1/frame/2/bli_l2_oapi_ex.c000066400000000000000000000036651360743507500167410ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // Include cpp macros that instantiate the API definition templates as // having expert parameters. #include "bli_oapi_ex.h" // Define the macro protecting the object API definitions. #define BLIS_ENABLE_OAPI // Include the object API definitions here. #include "bli_l2_oapi.c" blis-0.6.1/frame/2/bli_l2_tapi.c000066400000000000000000000332531360743507500162460ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // Guard the function definitions so that they are only compiled when // #included from files that define the typed API macros. #ifdef BLIS_ENABLE_TAPI // // Define BLAS-like interfaces with typed operands. // #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, ftname, rvarname, cvarname ) \ \ void PASTEMAC2(ch,opname,EX_SUF) \ ( \ trans_t transa, \ conj_t conjx, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* x, inc_t incx, \ ctype* beta, \ ctype* y, inc_t incy \ BLIS_TAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_TAPI_EX_DECLS \ \ dim_t m_y, n_x; \ \ /* Determine the dimensions of y and x. */ \ bli_set_dims_with_trans( transa, m, n, &m_y, &n_x ); \ \ /* If y has zero elements, return early. */ \ if ( bli_zero_dim1( m_y ) ) return; \ \ /* Obtain a valid context from the gks if necessary. */ \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ \ /* If x has zero elements, or if alpha is zero, scale y by beta and return early. */ \ if ( bli_zero_dim1( n_x ) || PASTEMAC(ch,eq0)( *alpha ) ) \ { \ PASTEMAC2(ch,scalv,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ m_y, \ beta, \ y, incy, \ cntx, \ NULL \ ); \ return; \ } \ \ /* Declare a void function pointer for the current operation. */ \ PASTECH2(ch,ftname,_unb_ft) f; \ \ /* Choose the underlying implementation. */ \ if ( bli_does_notrans( transa ) ) \ { \ if ( bli_is_row_stored( rs_a, cs_a ) ) f = PASTEMAC(ch,rvarname); \ else /* column or general stored */ f = PASTEMAC(ch,cvarname); \ } \ else /* if ( bli_does_trans( transa ) ) */ \ { \ if ( bli_is_row_stored( rs_a, cs_a ) ) f = PASTEMAC(ch,cvarname); \ else /* column or general stored */ f = PASTEMAC(ch,rvarname); \ } \ \ /* Invoke the variant chosen above, which loops over a level-1v or level-1f kernel to implement the current operation. */ \ f \ ( \ transa, \ conjx, \ m, \ n, \ alpha, \ a, rs_a, cs_a, \ x, incx, \ beta, \ y, incy, \ cntx \ ); \ } INSERT_GENTFUNC_BASIC3( gemv, gemv, gemv_unf_var1, gemv_unf_var2 ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, ftname, rvarname, cvarname ) \ \ void PASTEMAC2(ch,opname,EX_SUF) \ ( \ conj_t conjx, \ conj_t conjy, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* x, inc_t incx, \ ctype* y, inc_t incy, \ ctype* a, inc_t rs_a, inc_t cs_a \ BLIS_TAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_TAPI_EX_DECLS \ \ /* If x or y has zero elements, or if alpha is zero, return early. */ \ if ( bli_zero_dim2( m, n ) || PASTEMAC(ch,eq0)( *alpha ) ) return; \ \ /* Obtain a valid context from the gks if necessary. */ \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ \ /* Declare a void function pointer for the current operation. */ \ PASTECH2(ch,ftname,_unb_ft) f; \ \ /* Choose the underlying implementation. */ \ if ( bli_is_row_stored( rs_a, cs_a ) ) f = PASTEMAC(ch,rvarname); \ else /* column or general stored */ f = PASTEMAC(ch,cvarname); \ \ /* Invoke the variant chosen above, which loops over a level-1v or level-1f kernel to implement the current operation. */ \ f \ ( \ conjx, \ conjy, \ m, \ n, \ alpha, \ x, incx, \ y, incy, \ a, rs_a, cs_a, \ cntx \ ); \ } INSERT_GENTFUNC_BASIC3( ger, ger, ger_unb_var1, ger_unb_var2 ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, ftname, conjh, rvarname, cvarname ) \ \ void PASTEMAC2(ch,opname,EX_SUF) \ ( \ uplo_t uploa, \ conj_t conja, \ conj_t conjx, \ dim_t m, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* x, inc_t incx, \ ctype* beta, \ ctype* y, inc_t incy \ BLIS_TAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_TAPI_EX_DECLS \ \ /* Obtain a valid context from the gks if necessary. */ \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ \ /* If x has zero elements, or if alpha is zero, scale y by beta and return early. */ \ if ( bli_zero_dim1( m ) || PASTEMAC(ch,eq0)( *alpha ) ) \ { \ PASTEMAC2(ch,scalv,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ m, \ beta, \ y, incy, \ cntx, \ NULL \ ); \ return; \ } \ \ /* Declare a void function pointer for the current operation. */ \ PASTECH2(ch,ftname,_unb_ft) f; \ \ /* Choose the underlying implementation. */ \ if ( bli_is_lower( uploa ) ) \ { \ if ( bli_is_row_stored( rs_a, cs_a ) ) f = PASTEMAC(ch,rvarname); \ else /* column or general stored */ f = PASTEMAC(ch,cvarname); \ } \ else /* if ( bli_is_upper( uploa ) ) */ \ { \ if ( bli_is_row_stored( rs_a, cs_a ) ) f = PASTEMAC(ch,cvarname); \ else /* column or general stored */ f = PASTEMAC(ch,rvarname); \ } \ \ /* Invoke the variant chosen above, which loops over a level-1v or level-1f kernel to implement the current operation. */ \ f \ ( \ uploa, \ conja, \ conjx, \ conjh, /* used by variants to distinguish hemv from symv */ \ m, \ alpha, \ a, rs_a, cs_a, \ x, incx, \ beta, \ y, incy, \ cntx \ ); \ } INSERT_GENTFUNC_BASIC4( hemv, hemv, BLIS_CONJUGATE, hemv_unf_var1, hemv_unf_var3 ) INSERT_GENTFUNC_BASIC4( symv, hemv, BLIS_NO_CONJUGATE, hemv_unf_var1, hemv_unf_var3 ) #undef GENTFUNCR #define GENTFUNCR( ctype, ctype_r, ch, chr, opname, ftname, conjh, rvarname, cvarname ) \ \ void PASTEMAC2(ch,opname,EX_SUF) \ ( \ uplo_t uploa, \ conj_t conjx, \ dim_t m, \ ctype_r* alpha, \ ctype* x, inc_t incx, \ ctype* a, inc_t rs_a, inc_t cs_a \ BLIS_TAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_TAPI_EX_DECLS \ \ ctype alpha_local; \ \ /* If x has zero elements, or if alpha is zero, return early. */ \ if ( bli_zero_dim1( m ) || PASTEMAC(chr,eq0)( *alpha ) ) return; \ \ /* Make a local copy of alpha, cast into the complex domain. This allows us to use the same underlying her variants to implement both her and syr operations. */ \ PASTEMAC2(chr,ch,copys)( *alpha, alpha_local ); \ \ /* Obtain a valid context from the gks if necessary. */ \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ \ /* Declare a void function pointer for the current operation. */ \ PASTECH2(ch,ftname,_unb_ft) f; \ \ /* Choose the underlying implementation. */ \ if ( bli_is_lower( uploa ) ) \ { \ if ( bli_is_row_stored( rs_a, cs_a ) ) f = PASTEMAC(ch,rvarname); \ else /* column or general stored */ f = PASTEMAC(ch,cvarname); \ } \ else /* if ( bli_is_upper( uploa ) ) */ \ { \ if ( bli_is_row_stored( rs_a, cs_a ) ) f = PASTEMAC(ch,cvarname); \ else /* column or general stored */ f = PASTEMAC(ch,rvarname); \ } \ \ /* Invoke the variant chosen above, which loops over a level-1v or level-1f kernel to implement the current operation. */ \ f \ ( \ uploa, \ conjx, \ conjh, /* used by variants to distinguish her from syr */ \ m, \ &alpha_local, \ x, incx, \ a, rs_a, cs_a, \ cntx \ ); \ } INSERT_GENTFUNCR_BASIC4( her, her, BLIS_CONJUGATE, her_unb_var1, her_unb_var2 ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, ftname, conjh, rvarname, cvarname ) \ \ void PASTEMAC2(ch,opname,EX_SUF) \ ( \ uplo_t uploa, \ conj_t conjx, \ dim_t m, \ ctype* alpha, \ ctype* x, inc_t incx, \ ctype* a, inc_t rs_a, inc_t cs_a \ BLIS_TAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_TAPI_EX_DECLS \ \ /* If x has zero elements, or if alpha is zero, return early. */ \ if ( bli_zero_dim1( m ) || PASTEMAC(ch,eq0)( *alpha ) ) return; \ \ /* Obtain a valid context from the gks if necessary. */ \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ \ /* Declare a void function pointer for the current operation. */ \ PASTECH2(ch,ftname,_unb_ft) f; \ \ /* Choose the underlying implementation. */ \ if ( bli_is_lower( uploa ) ) \ { \ if ( bli_is_row_stored( rs_a, cs_a ) ) f = PASTEMAC(ch,rvarname); \ else /* column or general stored */ f = PASTEMAC(ch,cvarname); \ } \ else /* if ( bli_is_upper( uploa ) ) */ \ { \ if ( bli_is_row_stored( rs_a, cs_a ) ) f = PASTEMAC(ch,cvarname); \ else /* column or general stored */ f = PASTEMAC(ch,rvarname); \ } \ \ /* Invoke the variant chosen above, which loops over a level-1v or level-1f kernel to implement the current operation. */ \ f \ ( \ uploa, \ conjx, \ conjh, /* used by variants to distinguish her2 from syr2 */ \ m, \ alpha, \ x, incx, \ a, rs_a, cs_a, \ cntx \ ); \ } INSERT_GENTFUNC_BASIC4( syr, her, BLIS_NO_CONJUGATE, her_unb_var1, her_unb_var2 ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, ftname, conjh, rvarname, cvarname ) \ \ void PASTEMAC2(ch,opname,EX_SUF) \ ( \ uplo_t uploa, \ conj_t conjx, \ conj_t conjy, \ dim_t m, \ ctype* alpha, \ ctype* x, inc_t incx, \ ctype* y, inc_t incy, \ ctype* a, inc_t rs_a, inc_t cs_a \ BLIS_TAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_TAPI_EX_DECLS \ \ /* If x has zero elements, or if alpha is zero, return early. */ \ if ( bli_zero_dim1( m ) || PASTEMAC(ch,eq0)( *alpha ) ) return; \ \ /* Obtain a valid context from the gks if necessary. */ \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ \ /* Declare a void function pointer for the current operation. */ \ PASTECH2(ch,ftname,_unb_ft) f; \ \ /* Choose the underlying implementation. */ \ if ( bli_is_lower( uploa ) ) \ { \ if ( bli_is_row_stored( rs_a, cs_a ) ) f = PASTEMAC(ch,rvarname); \ else /* column or general stored */ f = PASTEMAC(ch,cvarname); \ } \ else /* if ( bli_is_upper( uploa ) ) */ \ { \ if ( bli_is_row_stored( rs_a, cs_a ) ) f = PASTEMAC(ch,cvarname); \ else /* column or general stored */ f = PASTEMAC(ch,rvarname); \ } \ \ /* Invoke the variant chosen above, which loops over a level-1v or level-1f kernel to implement the current operation. */ \ f \ ( \ uploa, \ conjx, \ conjy, \ conjh, \ m, \ alpha, \ x, incx, \ y, incy, \ a, rs_a, cs_a, \ cntx \ ); \ } INSERT_GENTFUNC_BASIC4( her2, her2, BLIS_CONJUGATE, her2_unf_var1, her2_unf_var4 ) INSERT_GENTFUNC_BASIC4( syr2, her2, BLIS_NO_CONJUGATE, her2_unf_var1, her2_unf_var4 ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, ftname, rvarname, cvarname ) \ \ void PASTEMAC2(ch,opname,EX_SUF) \ ( \ uplo_t uploa, \ trans_t transa, \ diag_t diaga, \ dim_t m, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* x, inc_t incx \ BLIS_TAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_TAPI_EX_DECLS \ \ /* If x has zero elements, return early. */ \ if ( bli_zero_dim1( m ) ) return; \ \ /* Obtain a valid context from the gks if necessary. */ \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ \ /* If alpha is zero, set x to zero and return early. */ \ if ( PASTEMAC(ch,eq0)( *alpha ) ) \ { \ PASTEMAC2(ch,setv,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ m, \ alpha, \ x, incx, \ cntx, \ NULL \ ); \ return; \ } \ \ /* Declare a void function pointer for the current operation. */ \ PASTECH2(ch,ftname,_unb_ft) f; \ \ /* Choose the underlying implementation. */ \ if ( bli_does_notrans( transa ) ) \ { \ if ( bli_is_row_stored( rs_a, cs_a ) ) f = PASTEMAC(ch,rvarname); \ else /* column or general stored */ f = PASTEMAC(ch,cvarname); \ } \ else /* if ( bli_does_trans( transa ) ) */ \ { \ if ( bli_is_row_stored( rs_a, cs_a ) ) f = PASTEMAC(ch,cvarname); \ else /* column or general stored */ f = PASTEMAC(ch,rvarname); \ } \ \ /* Invoke the variant chosen above, which loops over a level-1v or level-1f kernel to implement the current operation. */ \ f \ ( \ uploa, \ transa, \ diaga, \ m, \ alpha, \ a, rs_a, cs_a, \ x, incx, \ cntx \ ); \ } INSERT_GENTFUNC_BASIC3( trmv, trmv, trmv_unf_var1, trmv_unf_var2 ) INSERT_GENTFUNC_BASIC3( trsv, trmv, trsv_unf_var1, trsv_unf_var2 ) #endif blis-0.6.1/frame/2/bli_l2_tapi.h000066400000000000000000000110651360743507500162500ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-like interfaces with typed operands. // #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \ ( \ trans_t transa, \ conj_t conjx, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* x, inc_t incx, \ ctype* beta, \ ctype* y, inc_t incy \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTPROT_BASIC0( gemv ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \ ( \ conj_t conjx, \ conj_t conjy, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* x, inc_t incx, \ ctype* y, inc_t incy, \ ctype* a, inc_t rs_a, inc_t cs_a \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTPROT_BASIC0( ger ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \ ( \ uplo_t uploa, \ conj_t conja, \ conj_t conjx, \ dim_t m, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* x, inc_t incx, \ ctype* beta, \ ctype* y, inc_t incy \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTPROT_BASIC0( hemv ) INSERT_GENTPROT_BASIC0( symv ) #undef GENTPROTR #define GENTPROTR( ctype, ctype_r, ch, chr, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \ ( \ uplo_t uploa, \ conj_t conjx, \ dim_t m, \ ctype_r* alpha, \ ctype* x, inc_t incx, \ ctype* a, inc_t rs_a, inc_t cs_a \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTPROTR_BASIC0( her ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \ ( \ uplo_t uploa, \ conj_t conjx, \ dim_t m, \ ctype* alpha, \ ctype* x, inc_t incx, \ ctype* a, inc_t rs_a, inc_t cs_a \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTPROT_BASIC0( syr ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \ ( \ uplo_t uploa, \ conj_t conjx, \ conj_t conjy, \ dim_t m, \ ctype* alpha, \ ctype* x, inc_t incx, \ ctype* y, inc_t incy, \ ctype* a, inc_t rs_a, inc_t cs_a \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTPROT_BASIC0( her2 ) INSERT_GENTPROT_BASIC0( syr2 ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \ ( \ uplo_t uploa, \ trans_t transa, \ diag_t diaga, \ dim_t m, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* x, inc_t incx \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTPROT_BASIC0( trmv ) INSERT_GENTPROT_BASIC0( trsv ) blis-0.6.1/frame/2/bli_l2_tapi_ba.c000066400000000000000000000036651360743507500167140ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // Include cpp macros that instantiate the API definition templates as // omitting expert parameters. #include "bli_tapi_ba.h" // Define the macro protecting the typed API definitions. #define BLIS_ENABLE_TAPI // Include the typed API definitions here. #include "bli_l2_tapi.c" blis-0.6.1/frame/2/bli_l2_tapi_ex.c000066400000000000000000000036631360743507500167440ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // Include cpp macros that instantiate the API definition templates as // having expert parameters. #include "bli_tapi_ex.h" // Define the macro protecting the typed API definitions. #define BLIS_ENABLE_TAPI // Include the typed API definitions here. #include "bli_l2_tapi.c" blis-0.6.1/frame/2/gemv/000077500000000000000000000000001360743507500146525ustar00rootroot00000000000000blis-0.6.1/frame/2/gemv/bli_gemv.h000066400000000000000000000034701360743507500166130ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // NOTE: level-2 control tree code is temporarily disabled. //#include "bli_gemv_cntl.h" //#include "bli_gemv_front.h" //#include "bli_gemv_int.h" #include "bli_gemv_var.h" blis-0.6.1/frame/2/gemv/bli_gemv_unb_var1.c000066400000000000000000000056641360743507500204120ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ trans_t transa, \ conj_t conjx, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* x, inc_t incx, \ ctype* beta, \ ctype* y, inc_t incy, \ cntx_t* cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ ctype* a1t; \ ctype* x1; \ ctype* psi1; \ dim_t i; \ dim_t n_elem, n_iter; \ inc_t rs_at, cs_at; \ conj_t conja; \ \ bli_set_dims_incs_with_trans( transa, \ m, n, rs_a, cs_a, \ &n_iter, &n_elem, &rs_at, &cs_at ); \ \ conja = bli_extract_conj( transa ); \ \ PASTECH(ch,dotxv_ker_ft) kfp_dv; \ \ /* Query the context for the kernel function pointer. */ \ kfp_dv = bli_cntx_get_l1v_ker_dt( dt, BLIS_DOTXV_KER, cntx ); \ \ for ( i = 0; i < n_iter; ++i ) \ { \ a1t = a + (i )*rs_at + (0 )*cs_at; \ x1 = x + (0 )*incy; \ psi1 = y + (i )*incy; \ \ /* psi1 = beta * psi1 + alpha * a1t * x1; */ \ kfp_dv \ ( \ conja, \ conjx, \ n_elem, \ alpha, \ a1t, cs_at, \ x1, incx, \ beta, \ psi1, \ cntx \ ); \ } \ } INSERT_GENTFUNC_BASIC0( gemv_unb_var1 ) blis-0.6.1/frame/2/gemv/bli_gemv_unb_var2.c000066400000000000000000000067631360743507500204140ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ trans_t transa, \ conj_t conjx, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* x, inc_t incx, \ ctype* beta, \ ctype* y, inc_t incy, \ cntx_t* cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ ctype* zero = PASTEMAC(ch,0); \ ctype* a1; \ ctype* chi1; \ ctype* y1; \ ctype alpha_chi1; \ dim_t i; \ dim_t n_elem, n_iter; \ inc_t rs_at, cs_at; \ conj_t conja; \ \ bli_set_dims_incs_with_trans( transa, \ m, n, rs_a, cs_a, \ &n_elem, &n_iter, &rs_at, &cs_at ); \ \ conja = bli_extract_conj( transa ); \ \ /* If beta is zero, use setv. Otherwise, scale by beta. */ \ if ( PASTEMAC(ch,eq0)( *beta ) ) \ { \ /* y = 0; */ \ PASTEMAC2(ch,setv,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ n_elem, \ zero, \ y, incy, \ cntx, \ NULL \ ); \ } \ else \ { \ /* y = beta * y; */ \ PASTEMAC2(ch,scalv,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ n_elem, \ beta, \ y, incy, \ cntx, \ NULL \ ); \ } \ \ PASTECH(ch,axpyv_ker_ft) kfp_av; \ \ /* Query the context for the kernel function pointer. */ \ kfp_av = bli_cntx_get_l1v_ker_dt( dt, BLIS_AXPYV_KER, cntx ); \ \ for ( i = 0; i < n_iter; ++i ) \ { \ a1 = a + (0 )*rs_at + (i )*cs_at; \ chi1 = x + (i )*incx; \ y1 = y + (0 )*incy; \ \ /* y = y + alpha * chi1 * a1; */ \ PASTEMAC(ch,copycjs)( conjx, *chi1, alpha_chi1 ); \ PASTEMAC(ch,scals)( *alpha, alpha_chi1 ); \ \ kfp_av \ ( \ conja, \ n_elem, \ &alpha_chi1, \ a1, rs_at, \ y1, incy, \ cntx \ ); \ } \ } INSERT_GENTFUNC_BASIC0( gemv_unb_var2 ) blis-0.6.1/frame/2/gemv/bli_gemv_unf_var1.c000066400000000000000000000061451360743507500204110ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ trans_t transa, \ conj_t conjx, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* x, inc_t incx, \ ctype* beta, \ ctype* y, inc_t incy, \ cntx_t* cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ ctype* A1; \ ctype* x1; \ ctype* y1; \ dim_t i; \ dim_t b_fuse, f; \ dim_t n_elem, n_iter; \ inc_t rs_at, cs_at; \ conj_t conja; \ \ bli_set_dims_incs_with_trans( transa, \ m, n, rs_a, cs_a, \ &n_iter, &n_elem, &rs_at, &cs_at ); \ \ conja = bli_extract_conj( transa ); \ \ PASTECH(ch,dotxf_ker_ft) kfp_df; \ \ /* Query the context for the kernel function pointer and fusing factor. */ \ kfp_df = bli_cntx_get_l1f_ker_dt( dt, BLIS_DOTXF_KER, cntx ); \ b_fuse = bli_cntx_get_blksz_def_dt( dt, BLIS_DF, cntx ); \ \ for ( i = 0; i < n_iter; i += f ) \ { \ f = bli_determine_blocksize_dim_f( i, n_iter, b_fuse ); \ \ A1 = a + (i )*rs_at + (0 )*cs_at; \ x1 = x + (0 )*incy; \ y1 = y + (i )*incy; \ \ /* y1 = beta * y1 + alpha * A1 * x; */ \ kfp_df \ ( \ conja, \ conjx, \ n_elem, \ f, \ alpha, \ A1, cs_at, rs_at, \ x1, incx, \ beta, \ y1, incy, \ cntx \ ); \ \ } \ } INSERT_GENTFUNC_BASIC0( gemv_unf_var1 ) blis-0.6.1/frame/2/gemv/bli_gemv_unf_var2.c000066400000000000000000000070711360743507500204110ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ trans_t transa, \ conj_t conjx, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* x, inc_t incx, \ ctype* beta, \ ctype* y, inc_t incy, \ cntx_t* cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ ctype* zero = PASTEMAC(ch,0); \ ctype* A1; \ ctype* x1; \ ctype* y1; \ dim_t i; \ dim_t b_fuse, f; \ dim_t n_elem, n_iter; \ inc_t rs_at, cs_at; \ conj_t conja; \ \ bli_set_dims_incs_with_trans( transa, \ m, n, rs_a, cs_a, \ &n_elem, &n_iter, &rs_at, &cs_at ); \ \ conja = bli_extract_conj( transa ); \ \ /* If beta is zero, use setv. Otherwise, scale by beta. */ \ if ( PASTEMAC(ch,eq0)( *beta ) ) \ { \ /* y = 0; */ \ PASTEMAC2(ch,setv,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ n_elem, \ zero, \ y, incy, \ cntx, \ NULL \ ); \ } \ else \ { \ /* y = beta * y; */ \ PASTEMAC2(ch,scalv,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ n_elem, \ beta, \ y, incy, \ cntx, \ NULL \ ); \ } \ \ PASTECH(ch,axpyf_ker_ft) kfp_af; \ \ /* Query the context for the kernel function pointer and fusing factor. */ \ kfp_af = bli_cntx_get_l1f_ker_dt( dt, BLIS_AXPYF_KER, cntx ); \ b_fuse = bli_cntx_get_blksz_def_dt( dt, BLIS_AF, cntx ); \ \ for ( i = 0; i < n_iter; i += f ) \ { \ f = bli_determine_blocksize_dim_f( i, n_iter, b_fuse ); \ \ A1 = a + (0 )*rs_at + (i )*cs_at; \ x1 = x + (i )*incx; \ y1 = y + (0 )*incy; \ \ /* y = y + alpha * A1 * x1; */ \ kfp_af \ ( \ conja, \ conjx, \ n_elem, \ f, \ alpha, \ A1, rs_at, cs_at, \ x1, incx, \ y1, incy, \ cntx \ ); \ } \ } INSERT_GENTFUNC_BASIC0( gemv_unf_var2 ) blis-0.6.1/frame/2/gemv/bli_gemv_var.h000066400000000000000000000052301360743507500174570ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype object-based interfaces. // #undef GENPROT #define GENPROT( opname ) \ \ void PASTEMAC0(opname) \ ( \ obj_t* alpha, \ obj_t* a, \ obj_t* x, \ obj_t* beta, \ obj_t* y, \ cntx_t* cntx, \ cntl_t* cntl \ ); GENPROT( gemv_blk_var1 ) GENPROT( gemv_blk_var2 ) GENPROT( gemv_unb_var1 ) GENPROT( gemv_unb_var2 ) GENPROT( gemv_unf_var1 ) GENPROT( gemv_unf_var2 ) // // Prototype BLAS-like interfaces with typed operands. // #undef GENTPROT #define GENTPROT( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ trans_t transa, \ conj_t conjx, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* x, inc_t incx, \ ctype* beta, \ ctype* y, inc_t incy, \ cntx_t* cntx \ ); INSERT_GENTPROT_BASIC0( gemv_unb_var1 ) INSERT_GENTPROT_BASIC0( gemv_unb_var2 ) INSERT_GENTPROT_BASIC0( gemv_unf_var1 ) INSERT_GENTPROT_BASIC0( gemv_unf_var2 ) blis-0.6.1/frame/2/gemv/bli_gemv_var_oapi.c000066400000000000000000000062001360743507500204600ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENFRONT #define GENFRONT( opname, varname ) \ \ void PASTEMAC0(varname) \ ( \ obj_t* alpha, \ obj_t* a, \ obj_t* x, \ obj_t* beta, \ obj_t* y, \ cntx_t* cntx, \ cntl_t* cntl \ ) \ { \ bli_init_once(); \ \ num_t dt = bli_obj_dt( a ); \ \ trans_t transa = bli_obj_conjtrans_status( a ); \ conj_t conjx = bli_obj_conj_status( x ); \ \ dim_t m = bli_obj_length( a ); \ dim_t n = bli_obj_width( a ); \ \ void* buf_a = bli_obj_buffer_at_off( a ); \ inc_t rs_a = bli_obj_row_stride( a ); \ inc_t cs_a = bli_obj_col_stride( a ); \ \ void* buf_x = bli_obj_buffer_at_off( x ); \ inc_t incx = bli_obj_vector_inc( x ); \ \ void* buf_y = bli_obj_buffer_at_off( y ); \ inc_t incy = bli_obj_vector_inc( y ); \ \ void* buf_alpha = bli_obj_buffer_for_1x1( dt, alpha ); \ void* buf_beta = bli_obj_buffer_for_1x1( dt, beta ); \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(opname,_unb,_vft) f = \ PASTEMAC(varname,_qfp)( dt ); \ \ f \ ( \ transa, \ conjx, \ m, \ n, \ buf_alpha, \ buf_a, rs_a, cs_a, \ buf_x, incx, \ buf_beta, \ buf_y, incy, \ cntx \ ); \ } \ GENFRONT( gemv, gemv_unb_var1 ) GENFRONT( gemv, gemv_unb_var2 ) GENFRONT( gemv, gemv_unf_var1 ) GENFRONT( gemv, gemv_unf_var2 ) blis-0.6.1/frame/2/gemv/bli_gemv_var_oapi.c.prev000066400000000000000000000062061360743507500214410ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENFRONT #define GENFRONT( ftname, opname ) \ \ /*static gemv_vft GENARRAY(ftypes,gemv_unb_var1);*/ \ static GENARRAY_VFP(ftname,opname); \ \ void PASTEMAC0(opname) \ ( \ obj_t* alpha, \ obj_t* a, \ obj_t* x, \ obj_t* beta, \ obj_t* y, \ cntx_t* cntx, \ gemv_t* cntl \ ) \ { \ num_t dt = bli_obj_dt( a ); \ \ trans_t transa = bli_obj_conjtrans_status( a ); \ conj_t conjx = bli_obj_conj_status( x ); \ \ dim_t m = bli_obj_length( a ); \ dim_t n = bli_obj_width( a ); \ \ void* buf_a = bli_obj_buffer_at_off( a ); \ inc_t rs_a = bli_obj_row_stride( a ); \ inc_t cs_a = bli_obj_col_stride( a ); \ \ void* buf_x = bli_obj_buffer_at_off( x ); \ inc_t incx = bli_obj_vector_inc( x ); \ \ void* buf_y = bli_obj_buffer_at_off( y ); \ inc_t incy = bli_obj_vector_inc( y ); \ \ void* buf_alpha = bli_obj_buffer_for_1x1( dt, alpha ); \ void* buf_beta = bli_obj_buffer_for_1x1( dt, beta ); \ \ PASTECH(ftname,_vft) f = PASTECH(opname,_vfp)[dt]; \ \ /* Invoke the void pointer-based function for the given datatype. */ \ f( \ transa, \ conjx, \ m, \ n, \ buf_alpha, \ buf_a, rs_a, cs_a, \ buf_x, incx, \ buf_beta, \ buf_y, incy, \ cntx \ ); \ } \ GENFRONT( gemv, gemv_unb_var1 ) GENFRONT( gemv, gemv_unb_var2 ) GENFRONT( gemv, gemv_unf_var1 ) GENFRONT( gemv, gemv_unf_var2 ) blis-0.6.1/frame/2/gemv/other/000077500000000000000000000000001360743507500157735ustar00rootroot00000000000000blis-0.6.1/frame/2/gemv/other/bli_gemv_blk_var1.c000066400000000000000000000074061360743507500215130ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_gemv_blk_var1( obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y, cntx_t* cntx, gemv_t* cntl ) { obj_t a1, a1_pack; obj_t y1, y1_pack; dim_t m_trans; dim_t i; dim_t b_alg; // Initialize objects for packing. bli_obj_init_pack( &a1_pack ); bli_obj_init_pack( &y1_pack ); // Query dimension in partitioning direction. m_trans = bli_obj_length_after_trans( a ); // Partition along the m dimension. for ( i = 0; i < m_trans; i += b_alg ) { // Determine the current algorithmic blocksize. b_alg = bli_determine_blocksize_f( i, m_trans, a, bli_cntl_bszid( cntl ), cntx ); // Acquire partitions for A1 and y1. bli_acquire_mpart_t2b( BLIS_SUBPART1, i, b_alg, a, &a1 ); bli_acquire_vpart_f2b( BLIS_SUBPART1, i, b_alg, y, &y1 ); // Initialize objects for packing A1 and y1 (if needed). bli_packm_init( &a1, &a1_pack, cntx, bli_cntl_sub_packm_a( cntl ) ); bli_packv_init( &y1, &y1_pack, cntx, bli_cntl_sub_packv_y( cntl ) ); // Copy/pack A1, y1 (if needed). bli_packm_int( &a1, &a1_pack, cntx, bli_cntl_sub_packm_a( cntl ), &BLIS_PACKM_SINGLE_THREADED ); bli_packv_int( &y1, &y1_pack, cntx, bli_cntl_sub_packv_y( cntl ) ); // y1 = beta * y1 + alpha * A1 * x; bli_gemv_int( BLIS_NO_TRANSPOSE, BLIS_NO_CONJUGATE, alpha, &a1_pack, x, beta, &y1_pack, cntx, bli_cntl_sub_gemv( cntl ) ); // Copy/unpack y1 (if y1 was packed). bli_unpackv_int( &y1_pack, &y1, cntx, bli_cntl_sub_unpackv_y( cntl ) ); } // If any packing buffers were acquired within packm, release them back // to the memory manager. bli_packm_release( &a1_pack, bli_cntl_sub_packm_a( cntl ) ); bli_packv_release( &y1_pack, bli_cntl_sub_packv_y( cntl ) ); } blis-0.6.1/frame/2/gemv/other/bli_gemv_blk_var2.c000066400000000000000000000074011360743507500215070ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_gemv_blk_var2( obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y, cntx_t* cntx, gemv_t* cntl ) { obj_t a1, a1_pack; obj_t x1, x1_pack; dim_t n_trans; dim_t i; dim_t b_alg; // Initialize objects for packing. bli_obj_init_pack( &a1_pack ); bli_obj_init_pack( &x1_pack ); // Query dimension in partitioning direction. n_trans = bli_obj_width_after_trans( a ); // y = beta * y; bli_scalv_int( beta, y, cntx, bli_cntl_sub_scalv( cntl ) ); // Partition along the "k" dimension (n dimension of A). for ( i = 0; i < n_trans; i += b_alg ) { // Determine the current algorithmic blocksize. b_alg = bli_determine_blocksize_f( i, n_trans, a, bli_cntl_bszid( cntl ), cntx ); // Acquire partitions for A1 and x1. bli_acquire_mpart_l2r( BLIS_SUBPART1, i, b_alg, a, &a1 ); bli_acquire_vpart_f2b( BLIS_SUBPART1, i, b_alg, x, &x1 ); // Initialize objects for packing A1 and x1 (if needed). bli_packm_init( &a1, &a1_pack, cntx, bli_cntl_sub_packm_a( cntl ) ); bli_packv_init( &x1, &x1_pack, cntx, bli_cntl_sub_packv_x( cntl ) ); // Copy/pack A1, x1 (if needed). bli_packm_int( &a1, &a1_pack, cntx, bli_cntl_sub_packm_a( cntl ), &BLIS_PACKM_SINGLE_THREADED ); bli_packv_int( &x1, &x1_pack, cntx, bli_cntl_sub_packv_x( cntl ) ); // y = y + alpha * A1 * x1; bli_gemv_int( BLIS_NO_TRANSPOSE, BLIS_NO_CONJUGATE, alpha, &a1_pack, &x1_pack, &BLIS_ONE, y, cntx, bli_cntl_sub_gemv( cntl ) ); } // If any packing buffers were acquired within packm, release them back // to the memory manager. bli_packm_release( &a1_pack, bli_cntl_sub_packm_a( cntl ) ); bli_packv_release( &x1_pack, bli_cntl_sub_packv_x( cntl ) ); } blis-0.6.1/frame/2/gemv/other/bli_gemv_cntl.c000066400000000000000000000205241360743507500207460ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" extern scalv_t* scalv_cntl; extern packm_t* packm_cntl; extern packv_t* packv_cntl; extern unpackv_t* unpackv_cntl; gemv_t* gemv_cntl_bs_ke_dot = NULL; gemv_t* gemv_cntl_bs_ke_axpy = NULL; gemv_t* gemv_cntl_rp_bs_dot = NULL; gemv_t* gemv_cntl_rp_bs_axpy = NULL; gemv_t* gemv_cntl_cp_bs_dot = NULL; gemv_t* gemv_cntl_cp_bs_axpy = NULL; gemv_t* gemv_cntl_ge_dot = NULL; gemv_t* gemv_cntl_ge_axpy = NULL; void bli_gemv_cntl_init() { // Create control trees for the lowest-level kernels. These trees induce // operations on (persumably) relatively small block-subvector problems. gemv_cntl_bs_ke_dot = bli_gemv_cntl_obj_create( BLIS_UNB_FUSED, BLIS_VARIANT1, 0, NULL, NULL, NULL, NULL, NULL, NULL ); gemv_cntl_bs_ke_axpy = bli_gemv_cntl_obj_create( BLIS_UNB_FUSED, BLIS_VARIANT2, 0, NULL, NULL, NULL, NULL, NULL, NULL ); // Create control trees for problems with relatively small m dimension // (ie: where trans(A) is a row panel problem). gemv_cntl_rp_bs_dot = bli_gemv_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, BLIS_N2, scalv_cntl, // scale y up-front packm_cntl, // pack A1 (if needed) packv_cntl, // pack x1 (if needed) NULL, // y is not partitioned in var2 gemv_cntl_bs_ke_dot, NULL ); // y is not partitioned in var2 gemv_cntl_rp_bs_axpy = bli_gemv_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, BLIS_N2, scalv_cntl, // scale y up-front packm_cntl, // pack A1 (if needed) packv_cntl, // pack x1 (if needed) NULL, // y is not partitioned in var2 gemv_cntl_bs_ke_axpy, NULL ); // y is not partitioned in var2 // Create control trees for problems with relatively small n dimension // (ie: where trans(A) is a column panel problem). gemv_cntl_cp_bs_dot = bli_gemv_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, BLIS_M2, NULL, // no scaling in blk_var1 packm_cntl, // pack A1 (if needed) NULL, // x is not partitioned in var1 packv_cntl, // pack y1 (if needed) gemv_cntl_bs_ke_dot, unpackv_cntl ); // unpack y1 (if packed) gemv_cntl_cp_bs_axpy = bli_gemv_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, BLIS_M2, NULL, // no scaling in blk_var1 packm_cntl, // pack A1 (if needed) NULL, // x is not partitioned in var1 packv_cntl, // pack y1 (if needed) gemv_cntl_bs_ke_axpy, unpackv_cntl ); // unpack y1 (if packed) // Create control trees for generally large problems. Here, we choose a // variant that partitions subproblems into row panels. gemv_cntl_ge_dot = bli_gemv_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, BLIS_M2, NULL, // no scaling in blk_var1 NULL, // do not pack A1 NULL, // x is not partitioned in var1 packv_cntl, // pack y1 (if needed) gemv_cntl_rp_bs_dot, unpackv_cntl ); // unpack y1 (if packed) gemv_cntl_ge_axpy = bli_gemv_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, BLIS_M2, NULL, // no scaling in blk_var1 NULL, // do not pack A1 NULL, // x is not partitioned in var1 packv_cntl, // pack y1 (if needed) gemv_cntl_rp_bs_axpy, unpackv_cntl ); // unpack y1 (if packed) } void bli_gemv_cntl_finalize() { bli_cntl_free_node( gemv_cntl_bs_ke_dot ); bli_cntl_free_node( gemv_cntl_bs_ke_axpy ); bli_cntl_free_node( gemv_cntl_rp_bs_dot ); bli_cntl_free_node( gemv_cntl_rp_bs_axpy ); bli_cntl_free_node( gemv_cntl_cp_bs_dot ); bli_cntl_free_node( gemv_cntl_cp_bs_axpy ); bli_cntl_free_node( gemv_cntl_ge_dot ); bli_cntl_free_node( gemv_cntl_ge_axpy ); } gemv_t* bli_gemv_cntl_obj_create( impl_t impl_type, varnum_t var_num, bszid_t bszid, scalv_t* sub_scalv, packm_t* sub_packm_a, packv_t* sub_packv_x, packv_t* sub_packv_y, gemv_t* sub_gemv, unpackv_t* sub_unpackv_y ) { gemv_t* cntl; cntl = ( gemv_t* ) bli_malloc_intl( sizeof(gemv_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; cntl->bszid = bszid; cntl->sub_scalv = sub_scalv; cntl->sub_packm_a = sub_packm_a; cntl->sub_packv_x = sub_packv_x; cntl->sub_packv_y = sub_packv_y; cntl->sub_gemv = sub_gemv; cntl->sub_unpackv_y = sub_unpackv_y; return cntl; } void bli_gemv_cntl_obj_init( gemv_t* cntl, impl_t impl_type, varnum_t var_num, bszid_t bszid, scalv_t* sub_scalv, packm_t* sub_packm_a, packv_t* sub_packv_x, packv_t* sub_packv_y, gemv_t* sub_gemv, unpackv_t* sub_unpackv_y ) { cntl->impl_type = impl_type; cntl->var_num = var_num; cntl->bszid = bszid; cntl->sub_scalv = sub_scalv; cntl->sub_packm_a = sub_packm_a; cntl->sub_packv_x = sub_packv_x; cntl->sub_packv_y = sub_packv_y; cntl->sub_gemv = sub_gemv; cntl->sub_unpackv_y = sub_unpackv_y; } blis-0.6.1/frame/2/gemv/other/bli_gemv_cntl.h000066400000000000000000000066721360743507500207630ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ struct gemv_s { impl_t impl_type; varnum_t var_num; bszid_t bszid; struct scalv_s* sub_scalv; struct packm_s* sub_packm_a; struct packv_s* sub_packv_x; struct packv_s* sub_packv_y; struct gemv_s* sub_gemv; struct unpackv_s* sub_unpackv_y; }; typedef struct gemv_s gemv_t; #define bli_cntl_sub_gemv( cntl ) cntl->sub_gemv #define bli_cntl_sub_gemv_rp( cntl ) cntl->sub_gemv_rp #define bli_cntl_sub_gemv_cp( cntl ) cntl->sub_gemv_cp #define bli_cntl_sub_gemv_n_rp( cntl ) cntl->sub_gemv_n_rp #define bli_cntl_sub_gemv_n_cp( cntl ) cntl->sub_gemv_n_cp #define bli_cntl_sub_gemv_t_rp( cntl ) cntl->sub_gemv_t_rp #define bli_cntl_sub_gemv_t_cp( cntl ) cntl->sub_gemv_t_cp void bli_gemv_cntl_init( void ); void bli_gemv_cntl_finalize( void ); gemv_t* bli_gemv_cntl_obj_create( impl_t impl_type, varnum_t var_num, bszid_t bszid, scalv_t* sub_scalv, packm_t* sub_packm_a, packv_t* sub_packv_x, packv_t* sub_packv_y, gemv_t* sub_gemv, unpackv_t* sub_unpackv_y ); void bli_gemv_cntl_obj_init( gemv_t* cntl, impl_t impl_type, varnum_t var_num, bszid_t bszid, scalv_t* sub_scalv, packm_t* sub_packm_a, packv_t* sub_packv_x, packv_t* sub_packv_y, gemv_t* sub_gemv, unpackv_t* sub_unpackv_y ); blis-0.6.1/frame/2/gemv/other/bli_gemv_front.c000066400000000000000000000166721360743507500211470ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" extern gemv_t* gemv_cntl_bs_ke_axpy; extern gemv_t* gemv_cntl_bs_ke_dot; extern gemv_t* gemv_cntl_ge_axpy; extern gemv_t* gemv_cntl_ge_dot; void bli_gemv_front ( obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y, cntx_t* cntx ) { gemv_t* gemv_cntl; num_t dt_targ_a; num_t dt_targ_x; num_t dt_targ_y; bool_t a_has_unit_inc; bool_t x_has_unit_inc; bool_t y_has_unit_inc; obj_t alpha_local; obj_t beta_local; num_t dt_alpha; num_t dt_beta; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_gemv_check( alpha, a, x, beta, y ); // Query the target datatypes of each object. dt_targ_a = bli_obj_target_dt( a ); dt_targ_x = bli_obj_target_dt( x ); dt_targ_y = bli_obj_target_dt( y ); // Determine whether each operand is stored with unit stride. a_has_unit_inc = ( bli_obj_is_row_stored( a ) || bli_obj_is_col_stored( a ) ); x_has_unit_inc = ( bli_obj_vector_inc( x ) == 1 ); y_has_unit_inc = ( bli_obj_vector_inc( y ) == 1 ); // Create an object to hold a copy-cast of alpha. Notice that we use // the type union of the target datatypes of a and x to prevent any // unnecessary loss of information during the computation. dt_alpha = bli_dt_union( dt_targ_a, dt_targ_x ); bli_obj_scalar_init_detached_copy_of( dt_alpha, BLIS_NO_CONJUGATE, alpha, &alpha_local ); // Create an object to hold a copy-cast of beta. Notice that we use // the datatype of y. Here's why: If y is real and beta is complex, // there is no reason to keep beta_local in the complex domain since // the complex part of beta*y will not be stored. If y is complex and // beta is real then beta is harmlessly promoted to complex. dt_beta = dt_targ_y; bli_obj_scalar_init_detached_copy_of( dt_beta, BLIS_NO_CONJUGATE, beta, &beta_local ); // If all operands have unit stride, we choose a control tree for calling // the unblocked implementation directly without any blocking. if ( a_has_unit_inc && x_has_unit_inc && y_has_unit_inc ) { // A row-major layout with no transpose is typically best served by // a dot-based implementation (and the same goes for a column-major // layout with a transposition) because it engenders unit stride // within matrix A. Similarly, an axpy-based code is better for // row-major cases with a transpose and column-major without a // transpose. For the general stride case, we mimic that of column- // major storage since that is the format into which we copy/pack. if ( bli_obj_has_notrans( a ) ) { if ( bli_obj_is_row_stored( a ) ) gemv_cntl = gemv_cntl_bs_ke_dot; else gemv_cntl = gemv_cntl_bs_ke_axpy; } else // if ( bli_obj_has_trans( a ) ) { if ( bli_obj_is_row_stored( a ) ) gemv_cntl = gemv_cntl_bs_ke_axpy; else gemv_cntl = gemv_cntl_bs_ke_dot; } } else { // Mark objects with unit stride as already being packed. This prevents // unnecessary packing from happening within the blocked algorithm. if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, a ); if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, x ); if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, y ); // Here, we make a similar choice as above, except that (1) we look // at storage tilt, and (2) we choose a tree that performs blocking. if ( bli_obj_has_notrans( a ) ) { if ( bli_obj_is_row_tilted( a ) ) gemv_cntl = gemv_cntl_ge_dot; else gemv_cntl = gemv_cntl_ge_axpy; } else // if ( bli_obj_has_trans( a ) ) { if ( bli_obj_is_row_tilted( a ) ) gemv_cntl = gemv_cntl_ge_axpy; else gemv_cntl = gemv_cntl_ge_dot; } } // Invoke the internal back-end with the copy-casts of scalars and the // chosen control tree. bli_gemv_int( BLIS_NO_TRANSPOSE, BLIS_NO_TRANSPOSE, &alpha_local, a, x, &beta_local, y, cntx, gemv_cntl ); } // // Define BLAS-like interfaces with homogeneous-typed operands. // #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ trans_t transa, \ conj_t conjx, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* x, inc_t incx, \ ctype* beta, \ ctype* y, inc_t incy, \ cntx_t* cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ obj_t alphao, ao, xo, betao, yo; \ \ dim_t m_a, n_a; \ dim_t m_x; \ dim_t m_y; \ inc_t rs_x, cs_x; \ inc_t rs_y, cs_y; \ \ bli_set_dims_with_trans( BLIS_NO_TRANSPOSE, m, n, &m_a, &n_a ); \ bli_set_dims_with_trans( transa, m, n, &m_y, &m_x ); \ \ rs_x = incx; cs_x = m_x * incx; \ rs_y = incy; cs_y = m_y * incy; \ \ bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ bli_obj_create_1x1_with_attached_buffer( dt, beta, &betao ); \ \ bli_obj_create_with_attached_buffer( dt, m_a, n_a, a, rs_a, cs_a, &ao ); \ bli_obj_create_with_attached_buffer( dt, m_x, 1, x, rs_x, cs_x, &xo ); \ bli_obj_create_with_attached_buffer( dt, m_y, 1, y, rs_y, cs_y, &yo ); \ \ bli_obj_set_conjtrans( transa, &ao ); \ bli_obj_set_conj( conjx, &xo ); \ \ PASTEMAC0(opname)( &alphao, \ &ao, \ &xo, \ &betao, \ &yo, \ cntx ); \ } INSERT_GENTFUNC_BASIC0( gemv_front ) blis-0.6.1/frame/2/gemv/other/bli_gemv_front.h000066400000000000000000000042721360743507500211450ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_gemv_front ( obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y, cntx_t* cntx ); #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ trans_t transa, \ conj_t conjx, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* x, inc_t incx, \ ctype* beta, \ ctype* y, inc_t incy, \ cntx_t* cntx \ ); INSERT_GENTPROT_BASIC( gemv_front ) blis-0.6.1/frame/2/gemv/other/bli_gemv_int.c000066400000000000000000000070331360743507500206000ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T gemv_fp typedef void (*FUNCPTR_T)( obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y, cntx_t* cntx, gemv_t* cntl ); static FUNCPTR_T vars[3][3] = { // unblocked unblocked with fusing blocked { bli_gemv_unb_var1, bli_gemv_unf_var1, bli_gemv_blk_var1 }, { bli_gemv_unb_var2, bli_gemv_unf_var2, bli_gemv_blk_var2 }, { NULL, NULL, NULL }, }; void bli_gemv_int( trans_t transa, conj_t conjx, obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y, cntx_t* cntx, gemv_t* cntl ) { varnum_t n; impl_t i; FUNCPTR_T f; obj_t a_local; obj_t x_local; // Apply the trans and/or conj parameters to aliases of the objects. bli_obj_alias_with_trans( transa, a, &a_local ); bli_obj_alias_with_conj( conjx, x, &x_local ); // Check parameters. We use the aliased copy of A so the transa parameter // is taken into account for dimension checking. if ( bli_error_checking_is_enabled() ) bli_gemv_check( alpha, &a_local, &x_local, beta, y ); // If y has a zero dimension, return early. if ( bli_obj_has_zero_dim( y ) ) return; // If x has a zero dimension, scale y by beta and return early. if ( bli_obj_has_zero_dim( x ) ) { bli_scalm( beta, y ); return; } // Extract the variant number and implementation type. n = bli_cntl_var_num( cntl ); i = bli_cntl_impl_type( cntl ); // Index into the variant array to extract the correct function pointer. f = vars[n][i]; // Invoke the variant. f( alpha, &a_local, &x_local, beta, y, cntx, cntl ); } blis-0.6.1/frame/2/gemv/other/bli_gemv_int.h000066400000000000000000000035411360743507500206050ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_gemv_int ( trans_t transa, conj_t conjx, obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y, cntx_t* cntx, gemv_t* cntl ); blis-0.6.1/frame/2/ger/000077500000000000000000000000001360743507500144715ustar00rootroot00000000000000blis-0.6.1/frame/2/ger/bli_ger.h000066400000000000000000000034631360743507500162530ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // NOTE: level-2 control tree code is temporarily disabled. //#include "bli_ger_cntl.h" //#include "bli_ger_front.h" //#include "bli_ger_int.h" #include "bli_ger_var.h" blis-0.6.1/frame/2/ger/bli_ger_unb_var1.c000066400000000000000000000053211360743507500200360ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ conj_t conjx, \ conj_t conjy, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* x, inc_t incx, \ ctype* y, inc_t incy, \ ctype* a, inc_t rs_a, inc_t cs_a, \ cntx_t* cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ ctype* a1t; \ ctype* chi1; \ ctype* y1; \ ctype alpha_chi1; \ dim_t i; \ \ PASTECH(ch,axpyv_ker_ft) kfp_av; \ \ /* Query the context for the kernel function pointer. */ \ kfp_av = bli_cntx_get_l1v_ker_dt( dt, BLIS_AXPYV_KER, cntx ); \ \ for ( i = 0; i < m; ++i ) \ { \ a1t = a + (i )*rs_a + (0 )*cs_a; \ chi1 = x + (i )*incx; \ y1 = y + (0 )*incy; \ \ /* a1t = a1t + alpha * chi1 * y; */ \ PASTEMAC(ch,copycjs)( conjx, *chi1, alpha_chi1 ); \ PASTEMAC(ch,scals)( *alpha, alpha_chi1 ); \ \ kfp_av \ ( \ conjy, \ n, \ &alpha_chi1, \ y1, incy, \ a1t, cs_a, \ cntx \ ); \ } \ } INSERT_GENTFUNC_BASIC0( ger_unb_var1 ) blis-0.6.1/frame/2/ger/bli_ger_unb_var2.c000066400000000000000000000053141360743507500200410ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ conj_t conjx, \ conj_t conjy, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* x, inc_t incx, \ ctype* y, inc_t incy, \ ctype* a, inc_t rs_a, inc_t cs_a, \ cntx_t* cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ ctype* a1; \ ctype* x1; \ ctype* psi1; \ ctype alpha_psi1; \ dim_t j; \ \ PASTECH(ch,axpyv_ker_ft) kfp_av; \ \ /* Query the context for the kernel function pointer. */ \ kfp_av = bli_cntx_get_l1v_ker_dt( dt, BLIS_AXPYV_KER, cntx ); \ \ for ( j = 0; j < n; ++j ) \ { \ a1 = a + (0 )*rs_a + (j )*cs_a; \ x1 = x + (0 )*incx; \ psi1 = y + (j )*incy; \ \ /* a1 = a1 + alpha * psi1 * x; */ \ PASTEMAC(ch,copycjs)( conjy, *psi1, alpha_psi1 ); \ PASTEMAC(ch,scals)( *alpha, alpha_psi1 ); \ \ kfp_av \ ( \ conjx, \ m, \ &alpha_psi1, \ x1, incx, \ a1, rs_a, \ cntx \ ); \ } \ } INSERT_GENTFUNC_BASIC0( ger_unb_var2 ) blis-0.6.1/frame/2/ger/bli_ger_var.h000066400000000000000000000047371360743507500171300ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype object-based interfaces. // #undef GENPROT #define GENPROT( opname ) \ \ void PASTEMAC0(opname) \ ( \ obj_t* alpha, \ obj_t* x, \ obj_t* y, \ obj_t* a, \ cntx_t* cntx, \ cntl_t* cntl \ ); GENPROT( ger_blk_var1 ) GENPROT( ger_blk_var2 ) GENPROT( ger_unb_var1 ) GENPROT( ger_unb_var2 ) // // Prototype BLAS-like interfaces with typed operands. // #undef GENTPROT #define GENTPROT( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ conj_t conjx, \ conj_t conjy, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* x, inc_t incx, \ ctype* y, inc_t incy, \ ctype* a, inc_t rs_a, inc_t cs_a, \ cntx_t* cntx \ ); INSERT_GENTPROT_BASIC0( ger_unb_var1 ) INSERT_GENTPROT_BASIC0( ger_unb_var2 ) blis-0.6.1/frame/2/ger/bli_ger_var_oapi.c000066400000000000000000000057221360743507500201260ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENFRONT #define GENFRONT( opname, varname ) \ \ void PASTEMAC0(varname) \ ( \ obj_t* alpha, \ obj_t* x, \ obj_t* y, \ obj_t* a, \ cntx_t* cntx, \ cntl_t* cntl \ ) \ { \ bli_init_once(); \ \ num_t dt = bli_obj_dt( a ); \ \ conj_t conjx = bli_obj_conj_status( x ); \ conj_t conjy = bli_obj_conj_status( y ); \ \ dim_t m = bli_obj_length( a ); \ dim_t n = bli_obj_width( a ); \ \ void* buf_x = bli_obj_buffer_at_off( x ); \ inc_t incx = bli_obj_vector_inc( x ); \ \ void* buf_y = bli_obj_buffer_at_off( y ); \ inc_t incy = bli_obj_vector_inc( y ); \ \ void* buf_a = bli_obj_buffer_at_off( a ); \ inc_t rs_a = bli_obj_row_stride( a ); \ inc_t cs_a = bli_obj_col_stride( a ); \ \ void* buf_alpha = bli_obj_buffer_for_1x1( dt, alpha ); \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(opname,_unb,_vft) f = \ PASTEMAC(varname,_qfp)( dt ); \ \ f \ ( \ conjx, \ conjy, \ m, \ n, \ buf_alpha, \ buf_x, incx, \ buf_y, incy, \ buf_a, rs_a, cs_a, \ cntx \ ); \ } \ GENFRONT( ger, ger_unb_var1 ) GENFRONT( ger, ger_unb_var2 ) blis-0.6.1/frame/2/ger/other/000077500000000000000000000000001360743507500156125ustar00rootroot00000000000000blis-0.6.1/frame/2/ger/other/bli_ger_blk_var1.c000066400000000000000000000073451360743507500211530ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_ger_blk_var1( obj_t* alpha, obj_t* x, obj_t* y, obj_t* a, cntx_t* cntx, ger_t* cntl ) { obj_t a1, a1_pack; obj_t x1, x1_pack; dim_t i; dim_t b_alg; dim_t m_trans; // Initialize objects for packing. bli_obj_init_pack( &a1_pack ); bli_obj_init_pack( &x1_pack ); // Query dimension in partitioning direction. m_trans = bli_obj_length_after_trans( a ); // Partition along the m dimension. for ( i = 0; i < m_trans; i += b_alg ) { // Determine the current algorithmic blocksize. b_alg = bli_determine_blocksize_f( i, m_trans, a, bli_cntl_bszid( cntl ), cntx ); // Acquire partitions for A1 and x1. bli_acquire_mpart_t2b( BLIS_SUBPART1, i, b_alg, a, &a1 ); bli_acquire_vpart_f2b( BLIS_SUBPART1, i, b_alg, x, &x1 ); // Initialize objects for packing A1 and x1 (if needed). bli_packm_init( &a1, &a1_pack, cntx, bli_cntl_sub_packm_a( cntl ) ); bli_packv_init( &x1, &x1_pack, cntx, bli_cntl_sub_packv_x( cntl ) ); // Copy/pack A1, x1 (if needed). bli_packm_int( &a1, &a1_pack, cntx, bli_cntl_sub_packm_a( cntl ), &BLIS_PACKM_SINGLE_THREADED ); bli_packv_int( &x1, &x1_pack, cntx, bli_cntl_sub_packv_x( cntl ) ); // A1 = A1 + alpha * x1 * y; bli_ger_int( BLIS_NO_CONJUGATE, BLIS_NO_CONJUGATE, alpha, &x1_pack, y, &a1_pack, cntx, bli_cntl_sub_ger( cntl ) ); // Copy/unpack A1 (if A1 was packed). bli_unpackm_int( &a1_pack, &a1, cntx, bli_cntl_sub_unpackm_a( cntl ), &BLIS_PACKM_SINGLE_THREADED ); } // If any packing buffers were acquired within packm, release them back // to the memory manager. bli_packm_release( &a1_pack, bli_cntl_sub_packm_a( cntl ) ); bli_packv_release( &x1_pack, bli_cntl_sub_packv_x( cntl ) ); } blis-0.6.1/frame/2/ger/other/bli_ger_blk_var2.c000066400000000000000000000073441360743507500211530ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_ger_blk_var2( obj_t* alpha, obj_t* x, obj_t* y, obj_t* a, cntx_t* cntx, ger_t* cntl ) { obj_t a1, a1_pack; obj_t y1, y1_pack; dim_t i; dim_t b_alg; dim_t n_trans; // Initialize objects for packing. bli_obj_init_pack( &a1_pack ); bli_obj_init_pack( &y1_pack ); // Query dimension in partitioning direction. n_trans = bli_obj_width_after_trans( a ); // Partition along the n dimension. for ( i = 0; i < n_trans; i += b_alg ) { // Determine the current algorithmic blocksize. b_alg = bli_determine_blocksize_f( i, n_trans, a, bli_cntl_bszid( cntl ), cntx ); // Acquire partitions for A1 and y1. bli_acquire_mpart_l2r( BLIS_SUBPART1, i, b_alg, a, &a1 ); bli_acquire_vpart_f2b( BLIS_SUBPART1, i, b_alg, y, &y1 ); // Initialize objects for packing A1 and y1 (if needed). bli_packm_init( &a1, &a1_pack, cntx, bli_cntl_sub_packm_a( cntl ) ); bli_packv_init( &y1, &y1_pack, cntx, bli_cntl_sub_packv_y( cntl ) ); // Copy/pack A1, y1 (if needed). bli_packm_int( &a1, &a1_pack, cntx, bli_cntl_sub_packm_a( cntl ), &BLIS_PACKM_SINGLE_THREADED ); bli_packv_int( &y1, &y1_pack, cntx, bli_cntl_sub_packv_y( cntl ) ); // A1 = A1 + alpha * x * y1; bli_ger_int( BLIS_NO_CONJUGATE, BLIS_NO_CONJUGATE, alpha, x, &y1_pack, &a1_pack, cntx, bli_cntl_sub_ger( cntl ) ); // Copy/unpack A1 (if A1 was packed). bli_unpackm_int( &a1_pack, &a1, cntx, bli_cntl_sub_unpackm_a( cntl ), &BLIS_PACKM_SINGLE_THREADED ); } // If any packing buffers were acquired within packm, release them back // to the memory manager. bli_packm_release( &a1_pack, bli_cntl_sub_packm_a( cntl ) ); bli_packv_release( &y1_pack, bli_cntl_sub_packv_y( cntl ) ); } blis-0.6.1/frame/2/ger/other/bli_ger_cntl.c000066400000000000000000000171041360743507500204040ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" extern packm_t* packm_cntl; extern packv_t* packv_cntl; extern unpackm_t* unpackm_cntl; ger_t* ger_cntl_bs_ke_row = NULL; ger_t* ger_cntl_bs_ke_col = NULL; ger_t* ger_cntl_rp_bs_row = NULL; ger_t* ger_cntl_rp_bs_col = NULL; ger_t* ger_cntl_cp_bs_row = NULL; ger_t* ger_cntl_cp_bs_col = NULL; ger_t* ger_cntl_ge_row = NULL; ger_t* ger_cntl_ge_col = NULL; void bli_ger_cntl_init() { // Create control trees for the lowest-level kernels. These trees induce // operations on (persumably) relatively small block-subvector problems. ger_cntl_bs_ke_row = bli_ger_cntl_obj_create( BLIS_UNBLOCKED, BLIS_VARIANT1, 0, NULL, NULL, NULL, NULL, NULL ); ger_cntl_bs_ke_col = bli_ger_cntl_obj_create( BLIS_UNBLOCKED, BLIS_VARIANT2, 0, NULL, NULL, NULL, NULL, NULL ); // Create control trees for problems with relatively small m dimension // (ie: where A is a row panel problem). ger_cntl_rp_bs_row = bli_ger_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, BLIS_N2, NULL, // x is not partitioned in var2 packv_cntl, // pack y1 (if needed) packm_cntl, // pack A1 (if needed) ger_cntl_bs_ke_row, unpackm_cntl ); // unpack A1 (if packed) ger_cntl_rp_bs_col = bli_ger_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, BLIS_N2, NULL, // x is not partitioned in var2 packv_cntl, // pack y1 (if needed) packm_cntl, // pack A1 (if needed) ger_cntl_bs_ke_col, unpackm_cntl ); // unpack A1 (if packed) // Create control trees for problems with relatively small n dimension // (ie: where A is a column panel problem). ger_cntl_cp_bs_row = bli_ger_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, BLIS_M2, packv_cntl, // pack x1 (if needed) NULL, // y is not partitioned in var1 packm_cntl, // pack A1 (if needed) ger_cntl_bs_ke_row, unpackm_cntl ); // unpack A1 (if packed) ger_cntl_cp_bs_col = bli_ger_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, BLIS_M2, packv_cntl, // pack x1 (if needed) NULL, // y is not partitioned in var1 packm_cntl, // pack A1 (if needed) ger_cntl_bs_ke_col, unpackm_cntl ); // unpack A1 (if packed) // Create control trees for generally large problems. Here, we choose a // variant that partitions subproblems into column panels. ger_cntl_ge_row = bli_ger_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, BLIS_N2, NULL, // x is not partitioned in var2 packv_cntl, // pack y1 (if needed) NULL, // do not pack A1 ger_cntl_cp_bs_row, NULL ); // do not unpack A1 ger_cntl_ge_col = bli_ger_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, BLIS_N2, NULL, // x is not partitioned in var2 packv_cntl, // pack y1 (if needed) NULL, // do not pack A1 ger_cntl_cp_bs_col, NULL ); // do not unpack A1 } void bli_ger_cntl_finalize() { bli_cntl_free_node( ger_cntl_bs_ke_row ); bli_cntl_free_node( ger_cntl_bs_ke_col ); bli_cntl_free_node( ger_cntl_rp_bs_row ); bli_cntl_free_node( ger_cntl_rp_bs_col ); bli_cntl_free_node( ger_cntl_cp_bs_row ); bli_cntl_free_node( ger_cntl_cp_bs_col ); bli_cntl_free_node( ger_cntl_ge_row ); bli_cntl_free_node( ger_cntl_ge_col ); } ger_t* bli_ger_cntl_obj_create( impl_t impl_type, varnum_t var_num, bszid_t bszid, packv_t* sub_packv_x, packv_t* sub_packv_y, packm_t* sub_packm_a, ger_t* sub_ger, unpackm_t* sub_unpackm_a ) { ger_t* cntl; cntl = ( ger_t* ) bli_malloc_intl( sizeof(ger_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; cntl->bszid = bszid; cntl->sub_packv_x = sub_packv_x; cntl->sub_packv_y = sub_packv_y; cntl->sub_packm_a = sub_packm_a; cntl->sub_ger = sub_ger; cntl->sub_unpackm_a = sub_unpackm_a; return cntl; } void bli_ger_cntl_obj_init( ger_t* cntl, impl_t impl_type, varnum_t var_num, bszid_t bszid, packv_t* sub_packv_x, packv_t* sub_packv_y, packm_t* sub_packm_a, ger_t* sub_ger, unpackm_t* sub_unpackm_a ) { cntl->impl_type = impl_type; cntl->var_num = var_num; cntl->bszid = bszid; cntl->sub_packv_x = sub_packv_x; cntl->sub_packv_y = sub_packv_y; cntl->sub_packm_a = sub_packm_a; cntl->sub_ger = sub_ger; cntl->sub_unpackm_a = sub_unpackm_a; } blis-0.6.1/frame/2/ger/other/bli_ger_cntl.h000066400000000000000000000060251360743507500204110ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ struct ger_s { impl_t impl_type; varnum_t var_num; bszid_t bszid; struct packv_s* sub_packv_x; struct packv_s* sub_packv_y; struct packm_s* sub_packm_a; struct ger_s* sub_ger; struct unpackm_s* sub_unpackm_a; }; typedef struct ger_s ger_t; #define bli_cntl_sub_ger( cntl ) cntl->sub_ger #define bli_cntl_sub_ger_rp( cntl ) cntl->sub_ger_rp #define bli_cntl_sub_ger_cp( cntl ) cntl->sub_ger_cp void bli_ger_cntl_init( void ); void bli_ger_cntl_finalize( void ); ger_t* bli_ger_cntl_obj_create( impl_t impl_type, varnum_t var_num, bszid_t bszid, packv_t* sub_packv_x, packv_t* sub_packv_y, packm_t* sub_packm_a, ger_t* sub_ger, unpackm_t* sub_unpackm_a ); void bli_ger_cntl_obj_init( ger_t* cntl, impl_t impl_type, varnum_t var_num, bszid_t bszid, packv_t* sub_packv_x, packv_t* sub_packv_y, packm_t* sub_packm_a, ger_t* sub_ger, unpackm_t* sub_unpackm_a ); blis-0.6.1/frame/2/ger/other/bli_ger_front.c000066400000000000000000000132261360743507500205750ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" extern ger_t* ger_cntl_bs_ke_row; extern ger_t* ger_cntl_bs_ke_col; extern ger_t* ger_cntl_ge_row; extern ger_t* ger_cntl_ge_col; void bli_ger_front ( obj_t* alpha, obj_t* x, obj_t* y, obj_t* a, cntx_t* cntx ) { ger_t* ger_cntl; num_t dt_targ_x; num_t dt_targ_y; //num_t dt_targ_a; bool_t x_has_unit_inc; bool_t y_has_unit_inc; bool_t a_has_unit_inc; obj_t alpha_local; num_t dt_alpha; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_ger_check( alpha, x, y, a ); // Query the target datatypes of each object. dt_targ_x = bli_obj_target_dt( x ); dt_targ_y = bli_obj_target_dt( y ); //dt_targ_a = bli_obj_target_dt( a ); // Determine whether each operand with unit stride. x_has_unit_inc = ( bli_obj_vector_inc( x ) == 1 ); y_has_unit_inc = ( bli_obj_vector_inc( y ) == 1 ); a_has_unit_inc = ( bli_obj_is_row_stored( a ) || bli_obj_is_col_stored( a ) ); // Create an object to hold a copy-cast of alpha. Notice that we use // the type union of the target datatypes of x and y to prevent any // unnecessary loss of information during the computation. dt_alpha = bli_dt_union( dt_targ_x, dt_targ_y ); bli_obj_scalar_init_detached_copy_of( dt_alpha, BLIS_NO_CONJUGATE, alpha, &alpha_local ); // If all operands have unit stride, we choose a control tree for calling // the unblocked implementation directly without any blocking. if ( x_has_unit_inc && y_has_unit_inc && a_has_unit_inc ) { // Use different control trees depending on storage of the matrix // operand. if ( bli_obj_is_row_stored( a ) ) ger_cntl = ger_cntl_bs_ke_row; else ger_cntl = ger_cntl_bs_ke_col; } else { // Mark objects with unit stride as already being packed. This prevents // unnecessary packing from happening within the blocked algorithm. if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, x ); if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, y ); if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, a ); // Here, we make a similar choice as above, except that (1) we look // at storage tilt, and (2) we choose a tree that performs blocking. if ( bli_obj_is_row_tilted( a ) ) ger_cntl = ger_cntl_ge_row; else ger_cntl = ger_cntl_ge_col; } // Invoke the internal back-end with the copy-cast scalar and the // chosen control tree. bli_ger_int( BLIS_NO_CONJUGATE, BLIS_NO_CONJUGATE, &alpha_local, x, y, a, cntx, ger_cntl ); } // // Define BLAS-like interfaces with homogeneous-typed operands. // #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ conj_t conjx, \ conj_t conjy, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* x, inc_t incx, \ ctype* y, inc_t incy, \ ctype* a, inc_t rs_a, inc_t cs_a, \ cntx_t* cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ obj_t alphao, xo, yo, ao; \ \ dim_t m_x; \ dim_t m_y; \ inc_t rs_x, cs_x; \ inc_t rs_y, cs_y; \ \ bli_set_dims_with_trans( BLIS_NO_TRANSPOSE, m, n, &m_x, &m_y ); \ \ rs_x = incx; cs_x = m_x * incx; \ rs_y = incy; cs_y = m_y * incy; \ \ bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ \ bli_obj_create_with_attached_buffer( dt, m_x, 1, x, rs_x, cs_x, &xo ); \ bli_obj_create_with_attached_buffer( dt, m_y, 1, y, rs_y, cs_y, &yo ); \ bli_obj_create_with_attached_buffer( dt, m, n, a, rs_a, cs_a, &ao ); \ \ bli_obj_set_conj( conjx, &xo ); \ bli_obj_set_conj( conjy, &yo ); \ \ PASTEMAC0(opname)( &alphao, \ &xo, \ &yo, \ &ao, \ cntx ); \ } INSERT_GENTFUNC_BASIC0( ger_front ) blis-0.6.1/frame/2/ger/other/bli_ger_front.h000066400000000000000000000042131360743507500205760ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_ger_front ( obj_t* alpha, obj_t* x, obj_t* y, obj_t* a, cntx_t* cntx ); #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ conj_t conjx, \ conj_t conjy, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* x, inc_t incx, \ ctype* y, inc_t incy, \ ctype* a, inc_t rs_a, inc_t cs_a, \ cntx_t* cntx \ ); INSERT_GENTPROT_BASIC( ger_front ) blis-0.6.1/frame/2/ger/other/bli_ger_int.c000066400000000000000000000105161360743507500202360ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T ger_fp typedef void (*FUNCPTR_T)( obj_t* alpha, obj_t* x, obj_t* y, obj_t* a, cntx_t* cntx, ger_t* cntl ); static FUNCPTR_T vars[4][3] = { // unblocked unblocked with fusing blocked { bli_ger_unb_var1, NULL, bli_ger_blk_var1, }, { bli_ger_unb_var2, NULL, bli_ger_blk_var2, }, { NULL, NULL, NULL, }, { NULL, NULL, NULL, }, }; void bli_ger_int( conj_t conjx, conj_t conjy, obj_t* alpha, obj_t* x, obj_t* y, obj_t* a, cntx_t* cntx, ger_t* cntl ) { varnum_t n; impl_t i; FUNCPTR_T f; obj_t alpha_local; obj_t x_local; obj_t y_local; obj_t a_local; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_ger_check( alpha, x, y, a ); // If A has a zero dimension, return early. if ( bli_obj_has_zero_dim( a ) ) return; // If x or y has a zero dimension, return early. if ( bli_obj_has_zero_dim( x ) || bli_obj_has_zero_dim( y ) ) return; // Alias the objects, applying conjx and conjy to x and y, respectively. bli_obj_alias_with_conj( conjx, x, &x_local ); bli_obj_alias_with_conj( conjy, y, &y_local ); bli_obj_alias_to( a, &a_local ); // If matrix A is marked for conjugation, we interpret this as a request // to apply a conjugation to the other operands. if ( bli_obj_has_conj( &a_local ) ) { bli_obj_toggle_conj( &a_local ); bli_obj_toggle_conj( &x_local ); bli_obj_toggle_conj( &y_local ); bli_obj_scalar_init_detached_copy_of( bli_obj_dt( alpha ), BLIS_CONJUGATE, alpha, &alpha_local ); } else { bli_obj_alias_to( *alpha, alpha_local ); } // If we are about the call a leaf-level implementation, and matrix A // still needs a transposition, then we must induce one by swapping the // strides and dimensions. if ( bli_cntl_is_leaf( cntl ) && bli_obj_has_trans( &a_local ) ) { bli_obj_induce_trans( &a_local ); bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, &a_local ); } // Extract the variant number and implementation type. n = bli_cntl_var_num( cntl ); i = bli_cntl_impl_type( cntl ); // Index into the variant array to extract the correct function pointer. f = vars[n][i]; // Invoke the variant. f( &alpha_local, &x_local, &y_local, &a_local, cntx, cntl ); } blis-0.6.1/frame/2/ger/other/bli_ger_int.h000066400000000000000000000036061360743507500202450ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_ger_int( conj_t conjx, conj_t conjy, obj_t* alpha, obj_t* x, obj_t* y, obj_t* a, cntx_t* cntx, ger_t* cntl ); blis-0.6.1/frame/2/hemv/000077500000000000000000000000001360743507500146535ustar00rootroot00000000000000blis-0.6.1/frame/2/hemv/bli_hemv.h000066400000000000000000000034701360743507500166150ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // NOTE: level-2 control tree code is temporarily disabled. //#include "bli_hemv_cntl.h" //#include "bli_hemv_front.h" //#include "bli_hemv_int.h" #include "bli_hemv_var.h" blis-0.6.1/frame/2/hemv/bli_hemv_unb_var1.c000066400000000000000000000114111360743507500203770ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ uplo_t uplo, \ conj_t conja, \ conj_t conjx, \ conj_t conjh, \ dim_t m, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* x, inc_t incx, \ ctype* beta, \ ctype* y, inc_t incy, \ cntx_t* cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ ctype* one = PASTEMAC(ch,1); \ ctype* zero = PASTEMAC(ch,0); \ ctype* a10t; \ ctype* alpha11; \ ctype* x0; \ ctype* chi1; \ ctype* y0; \ ctype* psi1; \ ctype conjx_chi1; \ ctype alpha_chi1; \ ctype alpha11_temp; \ dim_t i; \ dim_t n_behind; \ inc_t rs_at, cs_at; \ conj_t conj0, conj1; \ \ /* The algorithm will be expressed in terms of the lower triangular case; the upper triangular case is supported by swapping the row and column strides of A and toggling some conj parameters. */ \ if ( bli_is_lower( uplo ) ) \ { \ rs_at = rs_a; \ cs_at = cs_a; \ \ conj0 = bli_apply_conj( conjh, conja ); \ conj1 = conja; \ } \ else /* if ( bli_is_upper( uplo ) ) */ \ { \ rs_at = cs_a; \ cs_at = rs_a; \ \ conj0 = conja; \ conj1 = bli_apply_conj( conjh, conja ); \ } \ \ /* If beta is zero, use setv. Otherwise, scale by beta. */ \ if ( PASTEMAC(ch,eq0)( *beta ) ) \ { \ /* y = 0; */ \ PASTEMAC2(ch,setv,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ m, \ zero, \ y, incy, \ cntx, \ NULL \ ); \ } \ else \ { \ /* y = beta * y; */ \ PASTEMAC2(ch,scalv,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ m, \ beta, \ y, incy, \ cntx, \ NULL \ ); \ } \ \ PASTECH(ch,axpyv_ker_ft) kfp_av; \ PASTECH(ch,dotxv_ker_ft) kfp_dv; \ \ /* Query the context for the kernel function pointers. */ \ kfp_av = bli_cntx_get_l1v_ker_dt( dt, BLIS_AXPYV_KER, cntx ); \ kfp_dv = bli_cntx_get_l1v_ker_dt( dt, BLIS_DOTXV_KER, cntx ); \ \ for ( i = 0; i < m; ++i ) \ { \ n_behind = i; \ a10t = a + (i )*rs_at + (0 )*cs_at; \ alpha11 = a + (i )*rs_at + (i )*cs_at; \ x0 = x + (0 )*incx; \ chi1 = x + (i )*incx; \ y0 = y + (0 )*incy; \ psi1 = y + (i )*incy; \ \ /* Apply conjx to chi1 and and scale by alpha. */ \ PASTEMAC(ch,copycjs)( conjx, *chi1, conjx_chi1 ); \ PASTEMAC(ch,scal2s)( *alpha, conjx_chi1, alpha_chi1 ); \ \ /* y0 = y0 + alpha * a10t' * chi1; */ \ kfp_av \ ( \ conj0, \ n_behind, \ &alpha_chi1, \ a10t, cs_at, \ y0, incy, \ cntx \ ); \ \ /* psi1 = psi1 + alpha * a10t * x0; */ \ kfp_dv \ ( \ conj1, \ conjx, \ n_behind, \ alpha, \ a10t, cs_at, \ x0, incx, \ one, \ psi1, \ cntx \ ); \ \ /* For hemv, explicitly set the imaginary component of alpha11 to zero. */ \ PASTEMAC(ch,copycjs)( conja, *alpha11, alpha11_temp ); \ if ( bli_is_conj( conjh ) ) \ PASTEMAC(ch,seti0s)( alpha11_temp ); \ \ /* psi1 = psi1 + alpha * alpha11 * chi1; */ \ PASTEMAC(ch,axpys)( alpha_chi1, alpha11_temp, *psi1 ); \ \ } \ } INSERT_GENTFUNC_BASIC0( hemv_unb_var1 ) blis-0.6.1/frame/2/hemv/bli_hemv_unb_var2.c000066400000000000000000000114731360743507500204100ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ uplo_t uplo, \ conj_t conja, \ conj_t conjx, \ conj_t conjh, \ dim_t m, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* x, inc_t incx, \ ctype* beta, \ ctype* y, inc_t incy, \ cntx_t* cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ ctype* one = PASTEMAC(ch,1); \ ctype* zero = PASTEMAC(ch,0); \ ctype* a10t; \ ctype* alpha11; \ ctype* a21; \ ctype* x0; \ ctype* chi1; \ ctype* x2; \ ctype* psi1; \ ctype conjx_chi1; \ ctype alpha_chi1; \ ctype alpha11_temp; \ dim_t i; \ dim_t n_behind; \ dim_t n_ahead; \ inc_t rs_at, cs_at; \ conj_t conj0, conj1; \ \ /* The algorithm will be expressed in terms of the lower triangular case; the upper triangular case is supported by swapping the row and column strides of A and toggling some conj parameters. */ \ if ( bli_is_lower( uplo ) ) \ { \ rs_at = rs_a; \ cs_at = cs_a; \ \ conj0 = conja; \ conj1 = bli_apply_conj( conjh, conja ); \ } \ else /* if ( bli_is_upper( uplo ) ) */ \ { \ rs_at = cs_a; \ cs_at = rs_a; \ \ conj0 = bli_apply_conj( conjh, conja ); \ conj1 = conja; \ } \ \ /* If beta is zero, use setv. Otherwise, scale by beta. */ \ if ( PASTEMAC(ch,eq0)( *beta ) ) \ { \ /* y = 0; */ \ PASTEMAC2(ch,setv,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ m, \ zero, \ y, incy, \ cntx, \ NULL \ ); \ } \ else \ { \ /* y = beta * y; */ \ PASTEMAC2(ch,scalv,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ m, \ beta, \ y, incy, \ cntx, \ NULL \ ); \ } \ \ PASTECH(ch,dotxv_ker_ft) kfp_dv; \ \ /* Query the context for the kernel function pointer. */ \ kfp_dv = bli_cntx_get_l1v_ker_dt( dt, BLIS_DOTXV_KER, cntx ); \ \ for ( i = 0; i < m; ++i ) \ { \ n_behind = i; \ n_ahead = m - i - 1; \ a10t = a + (i )*rs_at + (0 )*cs_at; \ alpha11 = a + (i )*rs_at + (i )*cs_at; \ a21 = a + (i+1)*rs_at + (i )*cs_at; \ x0 = x + (0 )*incx; \ chi1 = x + (i )*incx; \ x2 = x + (i+1)*incx; \ psi1 = y + (i )*incy; \ \ /* Apply conjx to chi1 and and scale by alpha. */ \ PASTEMAC(ch,copycjs)( conjx, *chi1, conjx_chi1 ); \ PASTEMAC(ch,scal2s)( *alpha, conjx_chi1, alpha_chi1 ); \ \ /* psi1 = psi1 + alpha * a10t * x0; */ \ kfp_dv \ ( \ conj0, \ conjx, \ n_behind, \ alpha, \ a10t, cs_at, \ x0, incx, \ one, \ psi1, \ cntx \ ); \ \ /* psi1 = psi1 + alpha * a21' * x2; */ \ kfp_dv \ ( \ conj1, \ conjx, \ n_ahead, \ alpha, \ a21, rs_at, \ x2, incx, \ one, \ psi1, \ cntx \ ); \ \ /* For hemv, explicitly set the imaginary component of alpha11 to zero. */ \ PASTEMAC(ch,copycjs)( conja, *alpha11, alpha11_temp ); \ if ( bli_is_conj( conjh ) ) \ PASTEMAC(ch,seti0s)( alpha11_temp ); \ \ /* psi1 = psi1 + alpha * alpha11 * chi1; */ \ PASTEMAC(ch,axpys)( alpha_chi1, alpha11_temp, *psi1 ); \ } \ } INSERT_GENTFUNC_BASIC0( hemv_unb_var2 ) blis-0.6.1/frame/2/hemv/bli_hemv_unb_var3.c000066400000000000000000000114051360743507500204040ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ uplo_t uplo, \ conj_t conja, \ conj_t conjx, \ conj_t conjh, \ dim_t m, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* x, inc_t incx, \ ctype* beta, \ ctype* y, inc_t incy, \ cntx_t* cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ ctype* one = PASTEMAC(ch,1); \ ctype* zero = PASTEMAC(ch,0); \ ctype* alpha11; \ ctype* a21; \ ctype* chi1; \ ctype* x2; \ ctype* psi1; \ ctype* y2; \ ctype conjx_chi1; \ ctype alpha_chi1; \ ctype alpha11_temp; \ dim_t i; \ dim_t n_ahead; \ inc_t rs_at, cs_at; \ conj_t conj0, conj1; \ \ /* The algorithm will be expressed in terms of the lower triangular case; the upper triangular case is supported by swapping the row and column strides of A and toggling some conj parameters. */ \ if ( bli_is_lower( uplo ) ) \ { \ rs_at = rs_a; \ cs_at = cs_a; \ \ conj0 = bli_apply_conj( conjh, conja ); \ conj1 = conja; \ } \ else /* if ( bli_is_upper( uplo ) ) */ \ { \ rs_at = cs_a; \ cs_at = rs_a; \ \ conj0 = conja; \ conj1 = bli_apply_conj( conjh, conja ); \ } \ \ /* If beta is zero, use setv. Otherwise, scale by beta. */ \ if ( PASTEMAC(ch,eq0)( *beta ) ) \ { \ /* y = 0; */ \ PASTEMAC2(ch,setv,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ m, \ zero, \ y, incy, \ cntx, \ NULL \ ); \ } \ else \ { \ /* y = beta * y; */ \ PASTEMAC2(ch,scalv,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ m, \ beta, \ y, incy, \ cntx, \ NULL \ ); \ } \ \ PASTECH(ch,axpyv_ker_ft) kfp_av; \ PASTECH(ch,dotxv_ker_ft) kfp_dv; \ \ /* Query the context for the kernel function pointers. */ \ kfp_av = bli_cntx_get_l1v_ker_dt( dt, BLIS_AXPYV_KER, cntx ); \ kfp_dv = bli_cntx_get_l1v_ker_dt( dt, BLIS_DOTXV_KER, cntx ); \ \ for ( i = 0; i < m; ++i ) \ { \ n_ahead = m - i - 1; \ alpha11 = a + (i )*rs_at + (i )*cs_at; \ a21 = a + (i+1)*rs_at + (i )*cs_at; \ chi1 = x + (i )*incx; \ x2 = x + (i+1)*incx; \ psi1 = y + (i )*incy; \ y2 = y + (i+1)*incy; \ \ /* Apply conjx to chi1 and and scale by alpha. */ \ PASTEMAC(ch,copycjs)( conjx, *chi1, conjx_chi1 ); \ PASTEMAC(ch,scal2s)( *alpha, conjx_chi1, alpha_chi1 ); \ \ /* For hemv, explicitly set the imaginary component of alpha11 to zero. */ \ PASTEMAC(ch,copycjs)( conja, *alpha11, alpha11_temp ); \ if ( bli_is_conj( conjh ) ) \ PASTEMAC(ch,seti0s)( alpha11_temp ); \ \ /* psi1 = psi1 + alpha * alpha11 * chi1; */ \ PASTEMAC(ch,axpys)( alpha_chi1, alpha11_temp, *psi1 ); \ \ /* psi1 = psi1 + alpha * a21' * x2; */ \ kfp_dv \ ( \ conj0, \ conjx, \ n_ahead, \ alpha, \ a21, rs_at, \ x2, incx, \ one, \ psi1, \ cntx \ ); \ \ /* y2 = y2 + alpha * a21 * chi1; */ \ kfp_av \ ( \ conj1, \ n_ahead, \ &alpha_chi1, \ a21, rs_at, \ y2, incy, \ cntx \ ); \ } \ } INSERT_GENTFUNC_BASIC0( hemv_unb_var3 ) blis-0.6.1/frame/2/hemv/bli_hemv_unb_var4.c000066400000000000000000000113021360743507500204010ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ uplo_t uplo, \ conj_t conja, \ conj_t conjx, \ conj_t conjh, \ dim_t m, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* x, inc_t incx, \ ctype* beta, \ ctype* y, inc_t incy, \ cntx_t* cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ ctype* zero = PASTEMAC(ch,0); \ ctype* a10t; \ ctype* alpha11; \ ctype* a21; \ ctype* chi1; \ ctype* y0; \ ctype* psi1; \ ctype* y2; \ ctype conjx_chi1; \ ctype alpha_chi1; \ ctype alpha11_temp; \ dim_t i; \ dim_t n_behind; \ dim_t n_ahead; \ inc_t rs_at, cs_at; \ conj_t conj0, conj1; \ \ /* The algorithm will be expressed in terms of the lower triangular case; the upper triangular case is supported by swapping the row and column strides of A and toggling some conj parameters. */ \ if ( bli_is_lower( uplo ) ) \ { \ rs_at = rs_a; \ cs_at = cs_a; \ \ conj0 = bli_apply_conj( conjh, conja ); \ conj1 = conja; \ } \ else /* if ( bli_is_upper( uplo ) ) */ \ { \ rs_at = cs_a; \ cs_at = rs_a; \ \ conj0 = conja; \ conj1 = bli_apply_conj( conjh, conja ); \ } \ \ /* If beta is zero, use setv. Otherwise, scale by beta. */ \ if ( PASTEMAC(ch,eq0)( *beta ) ) \ { \ /* y = 0; */ \ PASTEMAC2(ch,setv,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ m, \ zero, \ y, incy, \ cntx, \ NULL \ ); \ } \ else \ { \ /* y = beta * y; */ \ PASTEMAC2(ch,scalv,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ m, \ beta, \ y, incy, \ cntx, \ NULL \ ); \ } \ \ PASTECH(ch,axpyv_ker_ft) kfp_av; \ \ /* Query the context for the kernel function pointers. */ \ kfp_av = bli_cntx_get_l1v_ker_dt( dt, BLIS_AXPYV_KER, cntx ); \ \ for ( i = 0; i < m; ++i ) \ { \ n_behind = i; \ n_ahead = m - i - 1; \ a10t = a + (i )*rs_at + (0 )*cs_at; \ alpha11 = a + (i )*rs_at + (i )*cs_at; \ a21 = a + (i+1)*rs_at + (i )*cs_at; \ chi1 = x + (i )*incx; \ y0 = y + (0 )*incy; \ psi1 = y + (i )*incy; \ y2 = y + (i+1)*incy; \ \ /* Apply conjx to chi1 and and scale by alpha. */ \ PASTEMAC(ch,copycjs)( conjx, *chi1, conjx_chi1 ); \ PASTEMAC(ch,scal2s)( *alpha, conjx_chi1, alpha_chi1 ); \ \ /* y0 = y0 + alpha * a10t' * chi1; */ \ kfp_av \ ( \ conj0, \ n_behind, \ &alpha_chi1, \ a10t, cs_at, \ y0, incy, \ cntx \ ); \ \ /* For hemv, explicitly set the imaginary component of alpha11 to zero. */ \ PASTEMAC(ch,copycjs)( conja, *alpha11, alpha11_temp ); \ if ( bli_is_conj( conjh ) ) \ PASTEMAC(ch,seti0s)( alpha11_temp ); \ \ /* psi1 = psi1 + alpha * alpha11 * chi1; */ \ PASTEMAC(ch,axpys)( alpha_chi1, alpha11_temp, *psi1 ); \ \ /* y2 = y2 + alpha * a21 * chi1; */ \ kfp_av \ ( \ conj1, \ n_ahead, \ &alpha_chi1, \ a21, rs_at, \ y2, incy, \ cntx \ ); \ } \ } INSERT_GENTFUNC_BASIC0( hemv_unb_var4 ) blis-0.6.1/frame/2/hemv/bli_hemv_unf_var1.c000066400000000000000000000140031360743507500204030ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ uplo_t uplo, \ conj_t conja, \ conj_t conjx, \ conj_t conjh, \ dim_t m, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* x, inc_t incx, \ ctype* beta, \ ctype* y, inc_t incy, \ cntx_t* cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ ctype* one = PASTEMAC(ch,1); \ ctype* zero = PASTEMAC(ch,0); \ ctype* A10; \ ctype* A11; \ ctype* a10t; \ ctype* alpha11; \ ctype* a21; \ ctype* x0; \ ctype* x1; \ ctype* chi11; \ ctype* y0; \ ctype* y1; \ ctype* y01; \ ctype* psi11; \ ctype* y21; \ ctype conjx_chi11; \ ctype alpha_chi11; \ ctype alpha11_temp; \ dim_t i, k, j; \ dim_t b_fuse, f; \ dim_t n_behind; \ dim_t f_ahead, f_behind; \ inc_t rs_at, cs_at; \ conj_t conj0, conj1; \ \ /* The algorithm will be expressed in terms of the lower triangular case; the upper triangular case is supported by swapping the row and column strides of A and toggling some conj parameters. */ \ if ( bli_is_lower( uplo ) ) \ { \ rs_at = rs_a; \ cs_at = cs_a; \ \ conj0 = conja; \ conj1 = bli_apply_conj( conjh, conja ); \ } \ else /* if ( bli_is_upper( uplo ) ) */ \ { \ rs_at = cs_a; \ cs_at = rs_a; \ \ conj0 = bli_apply_conj( conjh, conja ); \ conj1 = conja; \ } \ \ /* If beta is zero, use setv. Otherwise, scale by beta. */ \ if ( PASTEMAC(ch,eq0)( *beta ) ) \ { \ /* y = 0; */ \ PASTEMAC2(ch,setv,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ m, \ zero, \ y, incy, \ cntx, \ NULL \ ); \ } \ else \ { \ /* y = beta * y; */ \ PASTEMAC2(ch,scalv,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ m, \ beta, \ y, incy, \ cntx, \ NULL \ ); \ } \ \ PASTECH(ch,dotxaxpyf_ker_ft) kfp_xf; \ \ /* Query the context for the kernel function pointer and fusing factor. */ \ kfp_xf = bli_cntx_get_l1f_ker_dt( dt, BLIS_DOTXAXPYF_KER, cntx ); \ b_fuse = bli_cntx_get_blksz_def_dt( dt, BLIS_XF, cntx ); \ \ for ( i = 0; i < m; i += f ) \ { \ f = bli_determine_blocksize_dim_f( i, m, b_fuse ); \ n_behind = i; \ A10 = a + (i )*rs_at + (0 )*cs_at; \ A11 = a + (i )*rs_at + (i )*cs_at; \ x0 = x + (0 )*incx; \ x1 = x + (i )*incx; \ y0 = y + (0 )*incy; \ y1 = y + (i )*incy; \ \ /* y1 = y1 + alpha * A10 * x0; (dotxf) */ \ /* y0 = y0 + alpha * A10' * x1; (axpyf) */ \ kfp_xf \ ( \ conj0, \ conj1, \ conjx, \ conjx, \ n_behind, \ f, \ alpha, \ A10, cs_at, rs_at, \ x0, incx, \ x1, incx, \ one, \ y1, incy, \ y0, incy, \ cntx \ ); \ \ /* y1 = y1 + alpha * A11 * x1; (variant 4) */ \ for ( k = 0; k < f; ++k ) \ { \ f_behind = k; \ f_ahead = f - k - 1; \ a10t = A11 + (k )*rs_at + (0 )*cs_at; \ alpha11 = A11 + (k )*rs_at + (k )*cs_at; \ a21 = A11 + (k+1)*rs_at + (k )*cs_at; \ chi11 = x1 + (k )*incx; \ y01 = y1 + (0 )*incy; \ psi11 = y1 + (k )*incy; \ y21 = y1 + (k+1)*incy; \ \ /* y01 = y01 + alpha * a10t' * chi11; */ \ PASTEMAC(ch,copycjs)( conjx, *chi11, conjx_chi11 ); \ PASTEMAC(ch,scal2s)( *alpha, conjx_chi11, alpha_chi11 ); \ if ( bli_is_conj( conj1 ) ) \ { \ for ( j = 0; j < f_behind; ++j ) \ PASTEMAC(ch,axpyjs)( alpha_chi11, *(a10t + j*cs_at), *(y01 + j*incy) ); \ } \ else \ { \ for ( j = 0; j < f_behind; ++j ) \ PASTEMAC(ch,axpys)( alpha_chi11, *(a10t + j*cs_at), *(y01 + j*incy) ); \ } \ \ /* For hemv, explicitly set the imaginary component of alpha11 to zero. */ \ PASTEMAC(ch,copycjs)( conja, *alpha11, alpha11_temp ); \ if ( bli_is_conj( conjh ) ) \ PASTEMAC(ch,seti0s)( alpha11_temp ); \ \ /* psi11 = psi11 + alpha * alpha11 * chi11; */ \ PASTEMAC(ch,axpys)( alpha_chi11, alpha11_temp, *psi11 ); \ \ /* y21 = y21 + alpha * a21 * chi11; */ \ if ( bli_is_conj( conj0 ) ) \ { \ for ( j = 0; j < f_ahead; ++j ) \ PASTEMAC(ch,axpyjs)( alpha_chi11, *(a21 + j*rs_at), *(y21 + j*incy) ); \ } \ else \ { \ for ( j = 0; j < f_ahead; ++j ) \ PASTEMAC(ch,axpys)( alpha_chi11, *(a21 + j*rs_at), *(y21 + j*incy) ); \ } \ } \ } \ } INSERT_GENTFUNC_BASIC0( hemv_unf_var1 ) blis-0.6.1/frame/2/hemv/bli_hemv_unf_var1a.c000066400000000000000000000111671360743507500205540ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ uplo_t uplo, \ conj_t conja, \ conj_t conjx, \ conj_t conjh, \ dim_t m, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* x, inc_t incx, \ ctype* beta, \ ctype* y, inc_t incy, \ cntx_t* cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ ctype* zero = PASTEMAC(ch,0); \ ctype* a10t; \ ctype* alpha11; \ ctype* x0; \ ctype* chi1; \ ctype* y0; \ ctype* psi1; \ ctype rho; \ ctype conjx_chi1; \ ctype alpha_chi1; \ ctype alpha11_temp; \ dim_t i; \ dim_t n_behind; \ inc_t rs_at, cs_at; \ conj_t conj0, conj1; \ \ /* The algorithm will be expressed in terms of the lower triangular case; the upper triangular case is supported by swapping the row and column strides of A and toggling some conj parameters. */ \ if ( bli_is_lower( uplo ) ) \ { \ rs_at = rs_a; \ cs_at = cs_a; \ \ conj0 = conja; \ conj1 = bli_apply_conj( conjh, conja ); \ } \ else /* if ( bli_is_upper( uplo ) ) */ \ { \ rs_at = cs_a; \ cs_at = rs_a; \ \ conj0 = bli_apply_conj( conjh, conja ); \ conj1 = conja; \ } \ \ /* If beta is zero, use setv. Otherwise, scale by beta. */ \ if ( PASTEMAC(ch,eq0)( *beta ) ) \ { \ /* y = 0; */ \ PASTEMAC2(ch,setv,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ m, \ zero, \ y, incy, \ cntx, \ NULL \ ); \ } \ else \ { \ /* y = beta * y; */ \ PASTEMAC2(ch,scalv,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ m, \ beta, \ y, incy, \ cntx, \ NULL \ ); \ } \ \ PASTECH(ch,dotaxpyv_ker_ft) kfp_vf; \ \ /* Query the context for the kernel function pointer. */ \ kfp_vf = bli_cntx_get_l1f_ker_dt( dt, BLIS_DOTAXPYV_KER, cntx ); \ \ for ( i = 0; i < m; ++i ) \ { \ n_behind = i; \ a10t = a + (i )*rs_at + (0 )*cs_at; \ alpha11 = a + (i )*rs_at + (i )*cs_at; \ x0 = x + (0 )*incx; \ chi1 = x + (i )*incx; \ y0 = y + (0 )*incy; \ psi1 = y + (i )*incy; \ \ /* Apply conjx to chi1 and and scale by alpha. */ \ PASTEMAC(ch,copycjs)( conjx, *chi1, conjx_chi1 ); \ PASTEMAC(ch,scal2s)( *alpha, conjx_chi1, alpha_chi1 ); \ \ /* psi1 = psi1 + alpha * a10t * x0; (dotv) */ \ /* y0 = y0 + alpha * a10t' * chi1; (axpyv) */ \ kfp_vf \ ( \ conj0, \ conj1, \ conjx, \ n_behind, \ &alpha_chi1, \ a10t, cs_at, \ x0, incx, \ &rho, \ y0, incy, \ cntx \ ); \ PASTEMAC(ch,axpys)( *alpha, rho, *psi1 ); \ \ /* For hemv, explicitly set the imaginary component of alpha11 to zero. */ \ PASTEMAC(ch,copycjs)( conja, *alpha11, alpha11_temp ); \ if ( bli_is_conj( conjh ) ) \ PASTEMAC(ch,seti0s)( alpha11_temp ); \ \ /* psi1 = psi1 + alpha * alpha11 * chi1; */ \ PASTEMAC(ch,axpys)( alpha_chi1, alpha11_temp, *psi1 ); \ \ } \ } INSERT_GENTFUNC_BASIC0( hemv_unf_var1a ) blis-0.6.1/frame/2/hemv/bli_hemv_unf_var3.c000066400000000000000000000140111360743507500204040ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ uplo_t uplo, \ conj_t conja, \ conj_t conjx, \ conj_t conjh, \ dim_t m, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* x, inc_t incx, \ ctype* beta, \ ctype* y, inc_t incy, \ cntx_t* cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ ctype* one = PASTEMAC(ch,1); \ ctype* zero = PASTEMAC(ch,0); \ ctype* A11; \ ctype* A21; \ ctype* a10t; \ ctype* alpha11; \ ctype* a21; \ ctype* x1; \ ctype* x2; \ ctype* chi11; \ ctype* y1; \ ctype* y2; \ ctype* y01; \ ctype* psi11; \ ctype* y21; \ ctype conjx_chi11; \ ctype alpha_chi11; \ ctype alpha11_temp; \ dim_t i, k, j; \ dim_t b_fuse, f; \ dim_t n_ahead; \ dim_t f_ahead, f_behind; \ inc_t rs_at, cs_at; \ conj_t conj0, conj1; \ \ /* The algorithm will be expressed in terms of the lower triangular case; the upper triangular case is supported by swapping the row and column strides of A and toggling some conj parameters. */ \ if ( bli_is_lower( uplo ) ) \ { \ rs_at = rs_a; \ cs_at = cs_a; \ \ conj0 = bli_apply_conj( conjh, conja ); \ conj1 = conja; \ } \ else /* if ( bli_is_upper( uplo ) ) */ \ { \ rs_at = cs_a; \ cs_at = rs_a; \ \ conj0 = conja; \ conj1 = bli_apply_conj( conjh, conja ); \ } \ \ /* If beta is zero, use setv. Otherwise, scale by beta. */ \ if ( PASTEMAC(ch,eq0)( *beta ) ) \ { \ /* y = 0; */ \ PASTEMAC2(ch,setv,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ m, \ zero, \ y, incy, \ cntx, \ NULL \ ); \ } \ else \ { \ /* y = beta * y; */ \ PASTEMAC2(ch,scalv,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ m, \ beta, \ y, incy, \ cntx, \ NULL \ ); \ } \ \ PASTECH(ch,dotxaxpyf_ker_ft) kfp_xf; \ \ /* Query the context for the kernel function pointer and fusing factor. */ \ kfp_xf = bli_cntx_get_l1f_ker_dt( dt, BLIS_DOTXAXPYF_KER, cntx ); \ b_fuse = bli_cntx_get_blksz_def_dt( dt, BLIS_XF, cntx ); \ \ for ( i = 0; i < m; i += f ) \ { \ f = bli_determine_blocksize_dim_f( i, m, b_fuse ); \ n_ahead = m - i - f; \ A11 = a + (i )*rs_at + (i )*cs_at; \ A21 = a + (i+f)*rs_at + (i )*cs_at; \ x1 = x + (i )*incx; \ x2 = x + (i+f)*incx; \ y1 = y + (i )*incy; \ y2 = y + (i+f)*incy; \ \ /* y1 = y1 + alpha * A11 * x1; (variant 4) */ \ for ( k = 0; k < f; ++k ) \ { \ f_behind = k; \ f_ahead = f - k - 1; \ a10t = A11 + (k )*rs_at + (0 )*cs_at; \ alpha11 = A11 + (k )*rs_at + (k )*cs_at; \ a21 = A11 + (k+1)*rs_at + (k )*cs_at; \ chi11 = x1 + (k )*incx; \ y01 = y1 + (0 )*incy; \ psi11 = y1 + (k )*incy; \ y21 = y1 + (k+1)*incy; \ \ /* y01 = y01 + alpha * a10t' * chi11; */ \ PASTEMAC(ch,copycjs)( conjx, *chi11, conjx_chi11 ); \ PASTEMAC(ch,scal2s)( *alpha, conjx_chi11, alpha_chi11 ); \ if ( bli_is_conj( conj0 ) ) \ { \ for ( j = 0; j < f_behind; ++j ) \ PASTEMAC(ch,axpyjs)( alpha_chi11, *(a10t + j*cs_at), *(y01 + j*incy) ); \ } \ else \ { \ for ( j = 0; j < f_behind; ++j ) \ PASTEMAC(ch,axpys)( alpha_chi11, *(a10t + j*cs_at), *(y01 + j*incy) ); \ } \ \ /* For hemv, explicitly set the imaginary component of alpha11 to zero. */ \ PASTEMAC(ch,copycjs)( conja, *alpha11, alpha11_temp ); \ if ( bli_is_conj( conjh ) ) \ PASTEMAC(ch,seti0s)( alpha11_temp ); \ \ /* psi11 = psi11 + alpha * alpha11 * chi11; */ \ PASTEMAC(ch,axpys)( alpha_chi11, alpha11_temp, *psi11 ); \ \ /* y21 = y21 + alpha * a21 * chi11; */ \ if ( bli_is_conj( conj1 ) ) \ { \ for ( j = 0; j < f_ahead; ++j ) \ PASTEMAC(ch,axpyjs)( alpha_chi11, *(a21 + j*rs_at), *(y21 + j*incy) ); \ } \ else \ { \ for ( j = 0; j < f_ahead; ++j ) \ PASTEMAC(ch,axpys)( alpha_chi11, *(a21 + j*rs_at), *(y21 + j*incy) ); \ } \ } \ \ /* y1 = y1 + alpha * A21' * x2; (dotxf) */ \ /* y2 = y2 + alpha * A21 * x1; (axpyf) */ \ kfp_xf \ ( \ conj0, \ conj1, \ conjx, \ conjx, \ n_ahead, \ f, \ alpha, \ A21, rs_at, cs_at, \ x2, incx, \ x1, incx, \ one, \ y1, incy, \ y2, incy, \ cntx \ ); \ } \ } INSERT_GENTFUNC_BASIC0( hemv_unf_var3 ) blis-0.6.1/frame/2/hemv/bli_hemv_unf_var3a.c000066400000000000000000000111631360743507500205520ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ uplo_t uplo, \ conj_t conja, \ conj_t conjx, \ conj_t conjh, \ dim_t m, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* x, inc_t incx, \ ctype* beta, \ ctype* y, inc_t incy, \ cntx_t* cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ ctype* zero = PASTEMAC(ch,0); \ ctype* alpha11; \ ctype* a21; \ ctype* chi1; \ ctype* x2; \ ctype* psi1; \ ctype* y2; \ ctype rho; \ ctype conjx_chi1; \ ctype alpha_chi1; \ ctype alpha11_temp; \ dim_t i; \ dim_t n_ahead; \ inc_t rs_at, cs_at; \ conj_t conj0, conj1; \ \ /* The algorithm will be expressed in terms of the lower triangular case; the upper triangular case is supported by swapping the row and column strides of A and toggling some conj parameters. */ \ if ( bli_is_lower( uplo ) ) \ { \ rs_at = rs_a; \ cs_at = cs_a; \ \ conj0 = bli_apply_conj( conjh, conja ); \ conj1 = conja; \ } \ else /* if ( bli_is_upper( uplo ) ) */ \ { \ rs_at = cs_a; \ cs_at = rs_a; \ \ conj0 = conja; \ conj1 = bli_apply_conj( conjh, conja ); \ } \ \ /* If beta is zero, use setv. Otherwise, scale by beta. */ \ if ( PASTEMAC(ch,eq0)( *beta ) ) \ { \ /* y = 0; */ \ PASTEMAC2(ch,setv,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ m, \ zero, \ y, incy, \ cntx, \ NULL \ ); \ } \ else \ { \ /* y = beta * y; */ \ PASTEMAC2(ch,scalv,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ m, \ beta, \ y, incy, \ cntx, \ NULL \ ); \ } \ \ PASTECH(ch,dotaxpyv_ker_ft) kfp_vf; \ \ /* Query the context for the kernel function pointer. */ \ kfp_vf = bli_cntx_get_l1f_ker_dt( dt, BLIS_DOTAXPYV_KER, cntx ); \ \ for ( i = 0; i < m; ++i ) \ { \ n_ahead = m - i - 1; \ alpha11 = a + (i )*rs_at + (i )*cs_at; \ a21 = a + (i+1)*rs_at + (i )*cs_at; \ chi1 = x + (i )*incx; \ x2 = x + (i+1)*incx; \ psi1 = y + (i )*incy; \ y2 = y + (i+1)*incy; \ \ /* For hemv, explicitly set the imaginary component of alpha11 to zero. */ \ PASTEMAC(ch,copycjs)( conja, *alpha11, alpha11_temp ); \ if ( bli_is_conj( conjh ) ) \ PASTEMAC(ch,seti0s)( alpha11_temp ); \ \ /* Apply conjx to chi1 and and scale by alpha. */ \ PASTEMAC(ch,copycjs)( conjx, *chi1, conjx_chi1 ); \ PASTEMAC(ch,scal2s)( *alpha, conjx_chi1, alpha_chi1 ); \ \ /* psi1 = psi1 + alpha * alpha11 * chi1; */ \ PASTEMAC(ch,axpys)( alpha_chi1, alpha11_temp, *psi1 ); \ \ /* psi1 = psi1 + alpha * a21' * x2; (dotv) */ \ /* y2 = y2 + alpha * a21 * chi1; (axpyv) */ \ kfp_vf \ ( \ conj0, \ conj1, \ conjx, \ n_ahead, \ &alpha_chi1, \ a21, rs_at, \ x2, incx, \ &rho, \ y2, incy, \ cntx \ ); \ PASTEMAC(ch,axpys)( *alpha, rho, *psi1 ); \ } \ } INSERT_GENTFUNC_BASIC0( hemv_unf_var3a ) blis-0.6.1/frame/2/hemv/bli_hemv_var.h000066400000000000000000000060041360743507500174610ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype object-based interfaces. // #undef GENPROT #define GENPROT( opname ) \ \ void PASTEMAC0(opname) \ ( \ conj_t conjh, \ obj_t* alpha, \ obj_t* a, \ obj_t* x, \ obj_t* beta, \ obj_t* y, \ cntx_t* cntx, \ cntl_t* cntl \ ); GENPROT( hemv_blk_var1 ) GENPROT( hemv_blk_var2 ) GENPROT( hemv_blk_var3 ) GENPROT( hemv_blk_var4 ) GENPROT( hemv_unb_var1 ) GENPROT( hemv_unb_var2 ) GENPROT( hemv_unb_var3 ) GENPROT( hemv_unb_var4 ) GENPROT( hemv_unf_var1 ) GENPROT( hemv_unf_var3 ) GENPROT( hemv_unf_var1a ) GENPROT( hemv_unf_var3a ) // // Prototype BLAS-like interfaces with typed operands. // #undef GENTPROT #define GENTPROT( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ uplo_t uplo, \ conj_t conja, \ conj_t conjx, \ conj_t conjh, \ dim_t m, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* x, inc_t incx, \ ctype* beta, \ ctype* y, inc_t incy, \ cntx_t* cntx \ ); INSERT_GENTPROT_BASIC0( hemv_unb_var1 ) INSERT_GENTPROT_BASIC0( hemv_unb_var2 ) INSERT_GENTPROT_BASIC0( hemv_unb_var3 ) INSERT_GENTPROT_BASIC0( hemv_unb_var4 ) INSERT_GENTPROT_BASIC0( hemv_unf_var1 ) INSERT_GENTPROT_BASIC0( hemv_unf_var3 ) INSERT_GENTPROT_BASIC0( hemv_unf_var1a ) INSERT_GENTPROT_BASIC0( hemv_unf_var3a ) blis-0.6.1/frame/2/hemv/bli_hemv_var_oapi.c000066400000000000000000000064421360743507500204720ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENFRONT #define GENFRONT( opname, varname ) \ \ void PASTEMAC0(varname) \ ( \ conj_t conjh, \ obj_t* alpha, \ obj_t* a, \ obj_t* x, \ obj_t* beta, \ obj_t* y, \ cntx_t* cntx, \ cntl_t* cntl \ ) \ { \ bli_init_once(); \ \ num_t dt = bli_obj_dt( a ); \ \ uplo_t uplo = bli_obj_uplo( a ); \ conj_t conja = bli_obj_conj_status( a ); \ conj_t conjx = bli_obj_conj_status( x ); \ \ dim_t m = bli_obj_length( a ); \ \ void* buf_a = bli_obj_buffer_at_off( a ); \ inc_t rs_a = bli_obj_row_stride( a ); \ inc_t cs_a = bli_obj_col_stride( a ); \ \ void* buf_x = bli_obj_buffer_at_off( x ); \ inc_t incx = bli_obj_vector_inc( x ); \ \ void* buf_y = bli_obj_buffer_at_off( y ); \ inc_t incy = bli_obj_vector_inc( y ); \ \ void* buf_alpha = bli_obj_buffer_for_1x1( dt, alpha ); \ void* buf_beta = bli_obj_buffer_for_1x1( dt, beta ); \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(opname,_unb,_vft) f = \ PASTEMAC(varname,_qfp)( dt ); \ \ f \ ( \ uplo, \ conja, \ conjx, \ conjh, \ m, \ buf_alpha, \ buf_a, rs_a, cs_a, \ buf_x, incx, \ buf_beta, \ buf_y, incy, \ cntx \ ); \ } \ GENFRONT( hemv, hemv_unb_var1 ) GENFRONT( hemv, hemv_unb_var2 ) GENFRONT( hemv, hemv_unb_var3 ) GENFRONT( hemv, hemv_unb_var4 ) GENFRONT( hemv, hemv_unf_var1 ) GENFRONT( hemv, hemv_unf_var3 ) GENFRONT( hemv, hemv_unf_var1a ) GENFRONT( hemv, hemv_unf_var3a ) blis-0.6.1/frame/2/hemv/other/000077500000000000000000000000001360743507500157745ustar00rootroot00000000000000blis-0.6.1/frame/2/hemv/other/bli_hemv_blk_var1.c000066400000000000000000000133101360743507500215040ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_hemv_blk_var1( conj_t conjh, obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y, cntx_t* cntx, hemv_t* cntl ) { obj_t a11, a11_pack; obj_t a10; obj_t x1, x1_pack; obj_t x0; obj_t y1, y1_pack; obj_t y0; dim_t mn; dim_t ij; dim_t b_alg; // Even though this blocked algorithm is expressed only in terms of the // lower triangular case, the upper triangular case is still supported: // when bli_acquire_mpart_tl2br() is passed a matrix that is stored in // in the upper triangle, and the requested subpartition resides in the // lower triangle (as is the case for this algorithm), the routine fills // the request as if the caller had actually requested the corresponding // "mirror" subpartition in the upper triangle, except that it marks the // subpartition for transposition (and conjugation). // Initialize objects for packing. bli_obj_init_pack( &a11_pack ); bli_obj_init_pack( &x1_pack ); bli_obj_init_pack( &y1_pack ); // Query dimension. mn = bli_obj_length( a ); // y = beta * y; bli_scalv_int( beta, y, cntx, bli_cntl_sub_scalv( cntl ) ); // Partition diagonally. for ( ij = 0; ij < mn; ij += b_alg ) { // Determine the current algorithmic blocksize. b_alg = bli_determine_blocksize_f( ij, mn, a, bli_cntl_bszid( cntl ), cntx ); // Acquire partitions for A11, A10, x1, x0, y1, and y0. bli_acquire_mpart_tl2br( BLIS_SUBPART11, ij, b_alg, a, &a11 ); bli_acquire_mpart_tl2br( BLIS_SUBPART10, ij, b_alg, a, &a10 ); bli_acquire_vpart_f2b( BLIS_SUBPART1, ij, b_alg, x, &x1 ); bli_acquire_vpart_f2b( BLIS_SUBPART0, ij, b_alg, x, &x0 ); bli_acquire_vpart_f2b( BLIS_SUBPART1, ij, b_alg, y, &y1 ); bli_acquire_vpart_f2b( BLIS_SUBPART0, ij, b_alg, y, &y0 ); // Initialize objects for packing A11, x1, and y1 (if needed). bli_packm_init( &a11, &a11_pack, cntx, bli_cntl_sub_packm_a11( cntl ) ); bli_packv_init( &x1, &x1_pack, cntx, bli_cntl_sub_packv_x1( cntl ) ); bli_packv_init( &y1, &y1_pack, cntx, bli_cntl_sub_packv_y1( cntl ) ); // Copy/pack A11, x1, y1 (if needed). bli_packm_int( &a11, &a11_pack, cntx, bli_cntl_sub_packm_a11( cntl ), &BLIS_PACKM_SINGLE_THREADED ); bli_packv_int( &x1, &x1_pack, cntx, bli_cntl_sub_packv_x1( cntl ) ); bli_packv_int( &y1, &y1_pack, cntx, bli_cntl_sub_packv_y1( cntl ) ); // y0 = y0 + alpha * A10' * x1; bli_gemv_int( bli_apply_conj( conjh, BLIS_TRANSPOSE ), BLIS_NO_CONJUGATE, alpha, &a10, &x1_pack, &BLIS_ONE, &y0, cntx, bli_cntl_sub_gemv_t_rp( cntl ) ); // y1 = y1 + alpha * A11 * x1; bli_hemv_int( conjh, alpha, &a11_pack, &x1_pack, &BLIS_ONE, &y1_pack, cntx, bli_cntl_sub_hemv( cntl ) ); // y1 = y1 + alpha * A10 * x0; bli_gemv_int( BLIS_NO_TRANSPOSE, BLIS_NO_CONJUGATE, alpha, &a10, &x0, &BLIS_ONE, &y1_pack, cntx, bli_cntl_sub_gemv_n_rp( cntl ) ); // Copy/unpack y1 (if y1 was packed). bli_unpackv_int( &y1_pack, &y1, cntx, bli_cntl_sub_unpackv_y1( cntl ) ); } // If any packing buffers were acquired within packm, release them back // to the memory manager. bli_packm_release( &a11_pack, bli_cntl_sub_packm_a11( cntl ) ); bli_packv_release( &x1_pack, bli_cntl_sub_packv_x1( cntl ) ); bli_packv_release( &y1_pack, bli_cntl_sub_packv_y1( cntl ) ); } blis-0.6.1/frame/2/hemv/other/bli_hemv_blk_var2.c000066400000000000000000000134731360743507500215170ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_hemv_blk_var2( conj_t conjh, obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y, cntx_t* cntx, hemv_t* cntl ) { obj_t a11, a11_pack; obj_t a10; obj_t a21; obj_t x1, x1_pack; obj_t x0; obj_t x2; obj_t y1, y1_pack; dim_t mn; dim_t ij; dim_t b_alg; // Even though this blocked algorithm is expressed only in terms of the // lower triangular case, the upper triangular case is still supported: // when bli_acquire_mpart_tl2br() is passed a matrix that is stored in // in the upper triangle, and the requested subpartition resides in the // lower triangle (as is the case for this algorithm), the routine fills // the request as if the caller had actually requested the corresponding // "mirror" subpartition in the upper triangle, except that it marks the // subpartition for transposition (and conjugation). // Initialize objects for packing. bli_obj_init_pack( &a11_pack ); bli_obj_init_pack( &x1_pack ); bli_obj_init_pack( &y1_pack ); // Query dimension. mn = bli_obj_length( a ); // y = beta * y; bli_scalv_int( beta, y, cntx, bli_cntl_sub_scalv( cntl ) ); // Partition diagonally. for ( ij = 0; ij < mn; ij += b_alg ) { // Determine the current algorithmic blocksize. b_alg = bli_determine_blocksize_f( ij, mn, a, bli_cntl_bszid( cntl ), cntx ); // Acquire partitions for A11, A10, A21, x1, x0, x2, y1, and y0. bli_acquire_mpart_tl2br( BLIS_SUBPART11, ij, b_alg, a, &a11 ); bli_acquire_mpart_tl2br( BLIS_SUBPART10, ij, b_alg, a, &a10 ); bli_acquire_mpart_tl2br( BLIS_SUBPART21, ij, b_alg, a, &a21 ); bli_acquire_vpart_f2b( BLIS_SUBPART1, ij, b_alg, x, &x1 ); bli_acquire_vpart_f2b( BLIS_SUBPART0, ij, b_alg, x, &x0 ); bli_acquire_vpart_f2b( BLIS_SUBPART2, ij, b_alg, x, &x2 ); bli_acquire_vpart_f2b( BLIS_SUBPART1, ij, b_alg, y, &y1 ); // Initialize objects for packing A11, x1, and y1 (if needed). bli_packm_init( &a11, &a11_pack, cntx, bli_cntl_sub_packm_a11( cntl ) ); bli_packv_init( &x1, &x1_pack, cntx, bli_cntl_sub_packv_x1( cntl ) ); bli_packv_init( &y1, &y1_pack, cntx, bli_cntl_sub_packv_y1( cntl ) ); // Copy/pack A11, x1, y1 (if needed). bli_packm_int( &a11, &a11_pack, cntx, bli_cntl_sub_packm_a11( cntl ), &BLIS_PACKM_SINGLE_THREADED ); bli_packv_int( &x1, &x1_pack, cntx, bli_cntl_sub_packv_x1( cntl ) ); bli_packv_int( &y1, &y1_pack, cntx, bli_cntl_sub_packv_y1( cntl ) ); // y1 = y1 + alpha * A10 * x0; bli_gemv_int( BLIS_NO_TRANSPOSE, BLIS_NO_CONJUGATE, alpha, &a10, &x0, &BLIS_ONE, &y1_pack, cntx, bli_cntl_sub_gemv_n_rp( cntl ) ); // y1 = y1 + alpha * A11 * x1; bli_hemv_int( conjh, alpha, &a11_pack, &x1_pack, &BLIS_ONE, &y1_pack, cntx, bli_cntl_sub_hemv( cntl ) ); // y1 = y1 + alpha * A21' * x2; bli_gemv_int( bli_apply_conj( conjh, BLIS_TRANSPOSE ), BLIS_NO_CONJUGATE, alpha, &a21, &x2, &BLIS_ONE, &y1_pack, cntx, bli_cntl_sub_gemv_t_cp( cntl ) ); // Copy/unpack y1 (if y1 was packed). bli_unpackv_int( &y1_pack, &y1, cntx, bli_cntl_sub_unpackv_y1( cntl ) ); } // If any packing buffers were acquired within packm, release them back // to the memory manager. bli_packm_release( &a11_pack, bli_cntl_sub_packm_a11( cntl ) ); bli_packv_release( &x1_pack, bli_cntl_sub_packv_x1( cntl ) ); bli_packv_release( &y1_pack, bli_cntl_sub_packv_y1( cntl ) ); } blis-0.6.1/frame/2/hemv/other/bli_hemv_blk_var3.c000066400000000000000000000133101360743507500215060ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_hemv_blk_var3( conj_t conjh, obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y, cntx_t* cntx, hemv_t* cntl ) { obj_t a11, a11_pack; obj_t a21; obj_t x1, x1_pack; obj_t x2; obj_t y1, y1_pack; obj_t y2; dim_t mn; dim_t ij; dim_t b_alg; // Even though this blocked algorithm is expressed only in terms of the // lower triangular case, the upper triangular case is still supported: // when bli_acquire_mpart_tl2br() is passed a matrix that is stored in // in the upper triangle, and the requested subpartition resides in the // lower triangle (as is the case for this algorithm), the routine fills // the request as if the caller had actually requested the corresponding // "mirror" subpartition in the upper triangle, except that it marks the // subpartition for transposition (and conjugation). // Initialize objects for packing. bli_obj_init_pack( &a11_pack ); bli_obj_init_pack( &x1_pack ); bli_obj_init_pack( &y1_pack ); // Query dimension. mn = bli_obj_length( a ); // y = beta * y; bli_scalv_int( beta, y, cntx, bli_cntl_sub_scalv( cntl ) ); // Partition diagonally. for ( ij = 0; ij < mn; ij += b_alg ) { // Determine the current algorithmic blocksize. b_alg = bli_determine_blocksize_f( ij, mn, a, bli_cntl_bszid( cntl ), cntx ); // Acquire partitions for A11, A10, x1, x0, y1, and y0. bli_acquire_mpart_tl2br( BLIS_SUBPART11, ij, b_alg, a, &a11 ); bli_acquire_mpart_tl2br( BLIS_SUBPART21, ij, b_alg, a, &a21 ); bli_acquire_vpart_f2b( BLIS_SUBPART1, ij, b_alg, x, &x1 ); bli_acquire_vpart_f2b( BLIS_SUBPART2, ij, b_alg, x, &x2 ); bli_acquire_vpart_f2b( BLIS_SUBPART1, ij, b_alg, y, &y1 ); bli_acquire_vpart_f2b( BLIS_SUBPART2, ij, b_alg, y, &y2 ); // Initialize objects for packing A11, x1, and y1 (if needed). bli_packm_init( &a11, &a11_pack, cntx, bli_cntl_sub_packm_a11( cntl ) ); bli_packv_init( &x1, &x1_pack, cntx, bli_cntl_sub_packv_x1( cntl ) ); bli_packv_init( &y1, &y1_pack, cntx, bli_cntl_sub_packv_y1( cntl ) ); // Copy/pack A11, x1, y1 (if needed). bli_packm_int( &a11, &a11_pack, cntx, bli_cntl_sub_packm_a11( cntl ), &BLIS_PACKM_SINGLE_THREADED ); bli_packv_int( &x1, &x1_pack, cntx, bli_cntl_sub_packv_x1( cntl ) ); bli_packv_int( &y1, &y1_pack, cntx, bli_cntl_sub_packv_y1( cntl ) ); // y1 = y1 + alpha * A21' * x2; bli_gemv_int( bli_apply_conj( conjh, BLIS_TRANSPOSE ), BLIS_NO_CONJUGATE, alpha, &a21, &x2, &BLIS_ONE, &y1_pack, cntx, bli_cntl_sub_gemv_t_cp( cntl ) ); // y1 = y1 + alpha * A11 * x1; bli_hemv_int( conjh, alpha, &a11_pack, &x1_pack, &BLIS_ONE, &y1_pack, cntx, bli_cntl_sub_hemv( cntl ) ); // y2 = y2 + alpha * A21 * x1; bli_gemv_int( BLIS_NO_TRANSPOSE, BLIS_NO_CONJUGATE, alpha, &a21, &x1_pack, &BLIS_ONE, &y2, cntx, bli_cntl_sub_gemv_n_cp( cntl ) ); // Copy/unpack y1 (if y1 was packed). bli_unpackv_int( &y1_pack, &y1, cntx, bli_cntl_sub_unpackv_y1( cntl ) ); } // If any packing buffers were acquired within packm, release them back // to the memory manager. bli_packm_release( &a11_pack, bli_cntl_sub_packm_a11( cntl ) ); bli_packv_release( &x1_pack, bli_cntl_sub_packv_x1( cntl ) ); bli_packv_release( &y1_pack, bli_cntl_sub_packv_y1( cntl ) ); } blis-0.6.1/frame/2/hemv/other/bli_hemv_blk_var4.c000066400000000000000000000134671360743507500215240ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_hemv_blk_var4( conj_t conjh, obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y, cntx_t* cntx, hemv_t* cntl ) { obj_t a11, a11_pack; obj_t a10; obj_t a21; obj_t x1, x1_pack; obj_t y1, y1_pack; obj_t y0; obj_t y2; dim_t mn; dim_t ij; dim_t b_alg; // Even though this blocked algorithm is expressed only in terms of the // lower triangular case, the upper triangular case is still supported: // when bli_acquire_mpart_tl2br() is passed a matrix that is stored in // in the upper triangle, and the requested subpartition resides in the // lower triangle (as is the case for this algorithm), the routine fills // the request as if the caller had actually requested the corresponding // "mirror" subpartition in the upper triangle, except that it marks the // subpartition for transposition (and conjugation). // Initialize objects for packing. bli_obj_init_pack( &a11_pack ); bli_obj_init_pack( &x1_pack ); bli_obj_init_pack( &y1_pack ); // Query dimension. mn = bli_obj_length( a ); // y = beta * y; bli_scalv_int( beta, y, cntx, bli_cntl_sub_scalv( cntl ) ); // Partition diagonally. for ( ij = 0; ij < mn; ij += b_alg ) { // Determine the current algorithmic blocksize. b_alg = bli_determine_blocksize_f( ij, mn, a, bli_cntl_bszid( cntl ), cntx ); // Acquire partitions for A11, A10, A21, x1, y1, y0, and y2. bli_acquire_mpart_tl2br( BLIS_SUBPART11, ij, b_alg, a, &a11 ); bli_acquire_mpart_tl2br( BLIS_SUBPART10, ij, b_alg, a, &a10 ); bli_acquire_mpart_tl2br( BLIS_SUBPART21, ij, b_alg, a, &a21 ); bli_acquire_vpart_f2b( BLIS_SUBPART1, ij, b_alg, x, &x1 ); bli_acquire_vpart_f2b( BLIS_SUBPART1, ij, b_alg, y, &y1 ); bli_acquire_vpart_f2b( BLIS_SUBPART0, ij, b_alg, y, &y0 ); bli_acquire_vpart_f2b( BLIS_SUBPART2, ij, b_alg, y, &y2 ); // Initialize objects for packing A11, x1, and y1 (if needed). bli_packm_init( &a11, &a11_pack, cntx, bli_cntl_sub_packm_a11( cntl ) ); bli_packv_init( &x1, &x1_pack, cntx, bli_cntl_sub_packv_x1( cntl ) ); bli_packv_init( &y1, &y1_pack, cntx, bli_cntl_sub_packv_y1( cntl ) ); // Copy/pack A11, x1, y1 (if needed). bli_packm_int( &a11, &a11_pack, cntx, bli_cntl_sub_packm_a11( cntl ), &BLIS_PACKM_SINGLE_THREADED ); bli_packv_int( &x1, &x1_pack, cntx, bli_cntl_sub_packv_x1( cntl ) ); bli_packv_int( &y1, &y1_pack, cntx, bli_cntl_sub_packv_y1( cntl ) ); // y0 = y0 + alpha * A10' * x1; bli_gemv_int( bli_apply_conj( conjh, BLIS_TRANSPOSE ), BLIS_NO_CONJUGATE, alpha, &a10, &x1_pack, &BLIS_ONE, &y0, cntx, bli_cntl_sub_gemv_t_rp( cntl ) ); // y1 = y1 + alpha * A11 * x1; bli_hemv_int( conjh, alpha, &a11_pack, &x1_pack, &BLIS_ONE, &y1_pack, cntx, bli_cntl_sub_hemv( cntl ) ); // y2 = y2 + alpha * A21 * x1; bli_gemv_int( BLIS_NO_TRANSPOSE, BLIS_NO_CONJUGATE, alpha, &a21, &x1_pack, &BLIS_ONE, &y2, cntx, bli_cntl_sub_gemv_n_cp( cntl ) ); // Copy/unpack y1 (if y1 was packed). bli_unpackv_int( &y1_pack, &y1, cntx, bli_cntl_sub_unpackv_y1( cntl ) ); } // If any packing buffers were acquired within packm, release them back // to the memory manager. bli_packm_release( &a11_pack, bli_cntl_sub_packm_a11( cntl ) ); bli_packv_release( &x1_pack, bli_cntl_sub_packv_x1( cntl ) ); bli_packv_release( &y1_pack, bli_cntl_sub_packv_y1( cntl ) ); } blis-0.6.1/frame/2/hemv/other/bli_hemv_cntl.c000066400000000000000000000166441360743507500207600ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" extern scalv_t* scalv_cntl; extern packm_t* packm_cntl; extern packv_t* packv_cntl; extern unpackv_t* unpackv_cntl; extern gemv_t* gemv_cntl_rp_bs_dot; extern gemv_t* gemv_cntl_rp_bs_axpy; extern gemv_t* gemv_cntl_cp_bs_dot; extern gemv_t* gemv_cntl_cp_bs_axpy; hemv_t* hemv_cntl_bs_ke_lrow_ucol = NULL; hemv_t* hemv_cntl_bs_ke_lcol_urow = NULL; hemv_t* hemv_cntl_ge_lrow_ucol = NULL; hemv_t* hemv_cntl_ge_lcol_urow = NULL; void bli_hemv_cntl_init() { // Create control trees for the lowest-level kernels. These trees induce // operations on (presumably) relatively small block-subvector problems. hemv_cntl_bs_ke_lrow_ucol = bli_hemv_cntl_obj_create( BLIS_UNB_FUSED, BLIS_VARIANT1, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL ); hemv_cntl_bs_ke_lcol_urow = bli_hemv_cntl_obj_create( BLIS_UNB_FUSED, BLIS_VARIANT3, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL ); // Create control trees for generally large problems. Here, we choose a // variant that prioritizes keeping a subvector of y in cache. hemv_cntl_ge_lrow_ucol = bli_hemv_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, BLIS_M2, scalv_cntl, // scale y up-front packm_cntl, // pack A11 (if needed) packv_cntl, // pack x1 (if needed) packv_cntl, // pack y1 (if needed) gemv_cntl_rp_bs_dot, // gemv_n_rp needed by var2 NULL, // gemv_n_cp not used by var2 NULL, // gemv_t_rp not used by var2 gemv_cntl_rp_bs_axpy, // gemv_t_cp needed by var2 hemv_cntl_bs_ke_lrow_ucol, unpackv_cntl ); // unpack y1 (if packed) hemv_cntl_ge_lcol_urow = bli_hemv_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, BLIS_M2, scalv_cntl, // scale y up-front packm_cntl, // pack A11 (if needed) packv_cntl, // pack x1 (if needed) packv_cntl, // pack y1 (if needed) gemv_cntl_rp_bs_axpy, // gemv_n_rp needed by var2 NULL, // gemv_n_cp not used by var2 NULL, // gemv_t_rp not used by var2 gemv_cntl_rp_bs_dot, // gemv_t_cp needed by var2 hemv_cntl_bs_ke_lcol_urow, unpackv_cntl ); // unpack y1 (if packed) } void bli_hemv_cntl_finalize() { bli_cntl_free_node( hemv_cntl_bs_ke_lrow_ucol ); bli_cntl_free_node( hemv_cntl_bs_ke_lcol_urow ); bli_cntl_free_node( hemv_cntl_ge_lrow_ucol ); bli_cntl_free_node( hemv_cntl_ge_lcol_urow ); } hemv_t* bli_hemv_cntl_obj_create( impl_t impl_type, varnum_t var_num, bszid_t bszid, scalv_t* sub_scalv, packm_t* sub_packm_a11, packv_t* sub_packv_x1, packv_t* sub_packv_y1, gemv_t* sub_gemv_n_rp, gemv_t* sub_gemv_n_cp, gemv_t* sub_gemv_t_rp, gemv_t* sub_gemv_t_cp, hemv_t* sub_hemv, unpackv_t* sub_unpackv_y1 ) { hemv_t* cntl; cntl = ( hemv_t* ) bli_malloc_intl( sizeof(hemv_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; cntl->bszid = bszid; cntl->sub_scalv = sub_scalv; cntl->sub_packm_a11 = sub_packm_a11; cntl->sub_packv_x1 = sub_packv_x1; cntl->sub_packv_y1 = sub_packv_y1; cntl->sub_gemv_n_rp = sub_gemv_n_rp; cntl->sub_gemv_n_cp = sub_gemv_n_cp; cntl->sub_gemv_t_rp = sub_gemv_t_rp; cntl->sub_gemv_t_cp = sub_gemv_t_cp; cntl->sub_hemv = sub_hemv; cntl->sub_unpackv_y1 = sub_unpackv_y1; return cntl; } void bli_hemv_cntl_obj_init( hemv_t* cntl, impl_t impl_type, varnum_t var_num, bszid_t bszid, scalv_t* sub_scalv, packm_t* sub_packm_a11, packv_t* sub_packv_x1, packv_t* sub_packv_y1, gemv_t* sub_gemv_n_rp, gemv_t* sub_gemv_n_cp, gemv_t* sub_gemv_t_rp, gemv_t* sub_gemv_t_cp, hemv_t* sub_hemv, unpackv_t* sub_unpackv_y1 ) { cntl->impl_type = impl_type; cntl->var_num = var_num; cntl->bszid = bszid; cntl->sub_scalv = sub_scalv; cntl->sub_packm_a11 = sub_packm_a11; cntl->sub_packv_x1 = sub_packv_x1; cntl->sub_packv_y1 = sub_packv_y1; cntl->sub_gemv_n_rp = sub_gemv_n_rp; cntl->sub_gemv_n_cp = sub_gemv_n_cp; cntl->sub_gemv_t_rp = sub_gemv_t_rp; cntl->sub_gemv_t_cp = sub_gemv_t_cp; cntl->sub_hemv = sub_hemv; cntl->sub_unpackv_y1 = sub_unpackv_y1; } blis-0.6.1/frame/2/hemv/other/bli_hemv_cntl.h000066400000000000000000000072741360743507500207640ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ struct hemv_s { impl_t impl_type; varnum_t var_num; bszid_t bszid; struct scalv_s* sub_scalv; struct packm_s* sub_packm_a11; struct packv_s* sub_packv_x1; struct packv_s* sub_packv_y1; struct gemv_s* sub_gemv_n_rp; struct gemv_s* sub_gemv_n_cp; struct gemv_s* sub_gemv_t_rp; struct gemv_s* sub_gemv_t_cp; struct hemv_s* sub_hemv; struct unpackv_s* sub_unpackv_y1; }; typedef struct hemv_s hemv_t; #define bli_cntl_sub_hemv( cntl ) cntl->sub_hemv void bli_hemv_cntl_init( void ); void bli_hemv_cntl_finalize( void ); hemv_t* bli_hemv_cntl_obj_create( impl_t impl_type, varnum_t var_num, bszid_t bszid, scalv_t* sub_scalv, packm_t* sub_packm_a11, packv_t* sub_packv_x1, packv_t* sub_packv_y1, gemv_t* sub_gemv_n_rp, gemv_t* sub_gemv_n_cp, gemv_t* sub_gemv_t_rp, gemv_t* sub_gemv_t_cp, hemv_t* sub_hemv, unpackv_t* sub_unpackv_y1 ); void bli_hemv_cntl_obj_init( hemv_t* cntl, impl_t impl_type, varnum_t var_num, bszid_t bszid, scalv_t* sub_scalv, packm_t* sub_packm_a11, packv_t* sub_packv_x1, packv_t* sub_packv_y1, gemv_t* sub_gemv_n_rp, gemv_t* sub_gemv_n_cp, gemv_t* sub_gemv_t_rp, gemv_t* sub_gemv_t_cp, hemv_t* sub_hemv, unpackv_t* sub_unpackv_y1 ); blis-0.6.1/frame/2/hemv/other/bli_hemv_front.c000066400000000000000000000162551360743507500211460ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" extern hemv_t* hemv_cntl_bs_ke_lrow_ucol; extern hemv_t* hemv_cntl_bs_ke_lcol_urow; extern hemv_t* hemv_cntl_ge_lrow_ucol; extern hemv_t* hemv_cntl_ge_lcol_urow; void bli_hemv_front ( obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y, cntx_t* cntx ) { hemv_t* hemv_cntl; num_t dt_targ_a; num_t dt_targ_x; num_t dt_targ_y; bool_t a_has_unit_inc; bool_t x_has_unit_inc; bool_t y_has_unit_inc; obj_t alpha_local; obj_t beta_local; num_t dt_alpha; num_t dt_beta; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_hemv_check( alpha, a, x, beta, y ); // Query the target datatypes of each object. dt_targ_a = bli_obj_target_dt( a ); dt_targ_x = bli_obj_target_dt( x ); dt_targ_y = bli_obj_target_dt( y ); // Determine whether each operand with unit stride. a_has_unit_inc = ( bli_obj_is_row_stored( a ) || bli_obj_is_col_stored( a ) ); x_has_unit_inc = ( bli_obj_vector_inc( x ) == 1 ); y_has_unit_inc = ( bli_obj_vector_inc( y ) == 1 ); // Create an object to hold a copy-cast of alpha. Notice that we use // the type union of the target datatypes of a and x to prevent any // unnecessary loss of information during the computation. dt_alpha = bli_dt_union( dt_targ_a, dt_targ_x ); bli_obj_scalar_init_detached_copy_of( dt_alpha, BLIS_NO_CONJUGATE, alpha, &alpha_local ); // Create an object to hold a copy-cast of beta. Notice that we use // the datatype of y. Here's why: If y is real and beta is complex, // there is no reason to keep beta_local in the complex domain since // the complex part of beta*y will not be stored. If y is complex and // beta is real then beta is harmlessly promoted to complex. dt_beta = dt_targ_y; bli_obj_scalar_init_detached_copy_of( dt_beta, BLIS_NO_CONJUGATE, beta, &beta_local ); // If all operands have unit stride, we choose a control tree for calling // the unblocked implementation directly without any blocking. if ( a_has_unit_inc && x_has_unit_inc && y_has_unit_inc ) { // We use two control trees to handle the four cases corresponding to // combinations of upper/lower triangular storage and row/column-storage. // The row-stored lower triangular and column-stored upper triangular // trees are identical. Same for the remaining two trees. if ( bli_obj_is_lower( a ) ) { if ( bli_obj_is_row_stored( a ) ) hemv_cntl = hemv_cntl_bs_ke_lrow_ucol; else hemv_cntl = hemv_cntl_bs_ke_lcol_urow; } else // if ( bli_obj_is_upper( a ) ) { if ( bli_obj_is_row_stored( a ) ) hemv_cntl = hemv_cntl_bs_ke_lcol_urow; else hemv_cntl = hemv_cntl_bs_ke_lrow_ucol; } } else { // Mark objects with unit stride as already being packed. This prevents // unnecessary packing from happening within the blocked algorithm. if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, a ); if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, x ); if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, y ); // Here, we make a similar choice as above, except that (1) we look // at storage tilt, and (2) we choose a tree that performs blocking. if ( bli_obj_is_lower( a ) ) { if ( bli_obj_is_row_tilted( a ) ) hemv_cntl = hemv_cntl_ge_lrow_ucol; else hemv_cntl = hemv_cntl_ge_lcol_urow; } else // if ( bli_obj_is_upper( a ) ) { if ( bli_obj_is_row_tilted( a ) ) hemv_cntl = hemv_cntl_ge_lcol_urow; else hemv_cntl = hemv_cntl_ge_lrow_ucol; } } // Invoke the internal back-end with the copy-casts of scalars and the // chosen control tree. Set conjh to BLIS_CONJUGATE to invoke the // Hermitian (and not symmetric) algorithms. bli_hemv_int( BLIS_CONJUGATE, &alpha_local, a, x, &beta_local, y, cntx, hemv_cntl ); } // // Define BLAS-like interfaces with homogeneous-typed operands. // #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ uplo_t uploa, \ conj_t conja, \ conj_t conjx, \ dim_t m, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* x, inc_t incx, \ ctype* beta, \ ctype* y, inc_t incy, \ cntx_t* cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ obj_t alphao, ao, xo, betao, yo; \ \ inc_t rs_x, cs_x; \ inc_t rs_y, cs_y; \ \ rs_x = incx; cs_x = m * incx; \ rs_y = incy; cs_y = m * incy; \ \ bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ bli_obj_create_1x1_with_attached_buffer( dt, beta, &betao ); \ \ bli_obj_create_with_attached_buffer( dt, m, m, a, rs_a, cs_a, &ao ); \ bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \ bli_obj_create_with_attached_buffer( dt, m, 1, y, rs_y, cs_y, &yo ); \ \ bli_obj_set_uplo( uploa, &ao ); \ bli_obj_set_conj( conja, &ao ); \ bli_obj_set_conj( conjx, &xo ); \ \ bli_obj_set_struc( BLIS_HERMITIAN, &ao ); \ \ PASTEMAC0(opname)( &alphao, \ &ao, \ &xo, \ &betao, \ &yo, \ cntx ); \ } INSERT_GENTFUNC_BASIC0( hemv_front ) blis-0.6.1/frame/2/hemv/other/bli_hemv_front.h000066400000000000000000000044101360743507500211410ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_hemv_front ( obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y, cntx_t* cntx ); // // Prototype BLAS-like interfaces with homogeneous-typed operands. // #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ uplo_t uploa, \ conj_t conja, \ conj_t conjx, \ dim_t m, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* x, inc_t incx, \ ctype* beta, \ ctype* y, inc_t incy, \ cntx_t* cntx \ ); INSERT_GENTPROT_BASIC( hemv_front ) blis-0.6.1/frame/2/hemv/other/bli_hemv_int.c000066400000000000000000000102441360743507500206000ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T hemv_fp typedef void (*FUNCPTR_T)( conj_t conjh, obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y, cntx_t* cntx, hemv_t* cntl ); static FUNCPTR_T vars[4][3] = { // unblocked unblocked with fusing blocked { bli_hemv_unb_var1, bli_hemv_unf_var1, bli_hemv_blk_var1, }, { bli_hemv_unb_var2, NULL, bli_hemv_blk_var2, }, { bli_hemv_unb_var3, bli_hemv_unf_var3, bli_hemv_blk_var3, }, { bli_hemv_unb_var4, NULL, bli_hemv_blk_var4, }, }; void bli_hemv_int( conj_t conjh, obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y, cntx_t* cntx, hemv_t* cntl ) { varnum_t n; impl_t i; FUNCPTR_T f; obj_t a_local; // Check parameters. if ( bli_error_checking_is_enabled() ) { if ( bli_is_conj( conjh ) ) bli_hemv_check( alpha, a, x, beta, y ); else bli_symv_check( alpha, a, x, beta, y ); } // If y has a zero dimension, return early. if ( bli_obj_has_zero_dim( y ) ) return; // If x has a zero dimension, scale y by beta and return early. if ( bli_obj_has_zero_dim( x ) ) { bli_scalm( beta, y ); return; } // Alias A in case we need to induce the upper triangular case. bli_obj_alias_to( a, &a_local ); /* // Our blocked algorithms only [explicitly] implement the lower triangular // case, so if matrix A is stored as upper triangular, we must toggle the // transposition (and conjugation) bits so that the diagonal partitioning // routines grab the correct partitions corresponding to the upper // triangular case. But we only need to do this for blocked algorithms, // since unblocked algorithms are responsible for handling the upper case // explicitly (and they should not be inspecting the transposition bit anyway). if ( bli_cntl_is_blocked( cntl ) && bli_obj_is_upper( a ) ) { bli_obj_toggle_conj( &a_local ); bli_obj_toggle_trans( &a_local ); } */ // Extract the variant number and implementation type. n = bli_cntl_var_num( cntl ); i = bli_cntl_impl_type( cntl ); // Index into the variant array to extract the correct function pointer. f = vars[n][i]; // Invoke the variant. f( conjh, alpha, &a_local, x, beta, y, cntx, cntl ); } blis-0.6.1/frame/2/hemv/other/bli_hemv_int.h000066400000000000000000000036141360743507500206100ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_hemv_int( conj_t conjh, obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y, cntx_t* cntx, hemv_t* cntl ); blis-0.6.1/frame/2/her/000077500000000000000000000000001360743507500144725ustar00rootroot00000000000000blis-0.6.1/frame/2/her/bli_her.h000066400000000000000000000034631360743507500162550ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // NOTE: level-2 control tree code is temporarily disabled. //#include "bli_her_cntl.h" //#include "bli_her_front.h" //#include "bli_her_int.h" #include "bli_her_var.h" blis-0.6.1/frame/2/her/bli_her_unb_var1.c000066400000000000000000000113531360743507500200420ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ uplo_t uplo, \ conj_t conjx, \ conj_t conjh, \ dim_t m, \ ctype* alpha, /* complex alpha allows her variants to also perform syr. */ \ ctype* x, inc_t incx, \ ctype* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ ctype* x0; \ ctype* chi1; \ ctype* c10t; \ ctype* gamma11; \ ctype alpha_local; \ ctype alpha_chi1; \ ctype alpha_chi1_chi1; \ ctype conjx0_chi1; \ ctype conjx1_chi1; \ dim_t i; \ dim_t n_behind; \ inc_t rs_ct, cs_ct; \ conj_t conj0, conj1; \ \ /* Eliminate unused variable warnings. */ \ ( void )conj0; \ \ /* Make a local copy of alpha and zero out the imaginary component if we are being invoked as her, since her requires alpha to be real. */ \ PASTEMAC(ch,copys)( *alpha, alpha_local ); \ if ( bli_is_conj( conjh ) ) \ { \ PASTEMAC(ch,seti0s)( alpha_local ); \ } \ \ /* The algorithm will be expressed in terms of the lower triangular case; the upper triangular case is supported by swapping the row and column strides of A and toggling some conj parameters. */ \ if ( bli_is_lower( uplo ) ) \ { \ rs_ct = rs_c; \ cs_ct = cs_c; \ } \ else /* if ( bli_is_upper( uplo ) ) */ \ { \ rs_ct = cs_c; \ cs_ct = rs_c; \ \ /* Toggle conjugation of conjx, but only if we are being invoked as her; for syr, conjx is unchanged. */ \ conjx = bli_apply_conj( conjh, conjx ); \ } \ \ /* Apply conjh (which carries the conjugation component of the Hermitian transpose, if applicable) to conjx as needed to arrive at the effective conjugation for the scalar and vector subproblems. */ \ conj0 = conjx; \ conj1 = bli_apply_conj( conjh, conjx ); \ \ PASTECH(ch,axpyv_ker_ft) kfp_av; \ \ /* Query the context for the kernel function pointer. */ \ kfp_av = bli_cntx_get_l1v_ker_dt( dt, BLIS_AXPYV_KER, cntx ); \ \ for ( i = 0; i < m; ++i ) \ { \ n_behind = i; \ x0 = x + (0 )*incx; \ chi1 = x + (i )*incx; \ c10t = c + (i )*rs_ct + (0 )*cs_ct; \ gamma11 = c + (i )*rs_ct + (i )*cs_ct; \ \ /* Apply conjx to chi1. */ \ PASTEMAC(ch,copycjs)( conj0, *chi1, conjx0_chi1 ); \ PASTEMAC(ch,copycjs)( conj1, *chi1, conjx1_chi1 ); \ \ /* Compute scalar for vector subproblem. */ \ PASTEMAC(ch,scal2s)( alpha_local, conjx0_chi1, alpha_chi1 ); \ \ /* Compute alpha * chi1 * conj(chi1) after chi1 has already been conjugated, if needed, by conjx. */ \ PASTEMAC(ch,scal2s)( alpha_chi1, conjx1_chi1, alpha_chi1_chi1 ); \ \ /* c10t = c10t + alpha * chi1 * x0'; */ \ kfp_av \ ( \ conj1, \ n_behind, \ &alpha_chi1, \ x0, incx, \ c10t, cs_ct, \ cntx \ ); \ \ /* gamma11 = gamma11 + alpha * chi1 * conj(chi1); */ \ PASTEMAC(ch,adds)( alpha_chi1_chi1, *gamma11 ); \ \ /* For her2, explicitly set the imaginary component of gamma11 to zero. */ \ if ( bli_is_conj( conjh ) ) \ PASTEMAC(ch,seti0s)( *gamma11 ); \ } \ } INSERT_GENTFUNC_BASIC0( her_unb_var1 ) blis-0.6.1/frame/2/her/bli_her_unb_var2.c000066400000000000000000000113551360743507500200450ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ uplo_t uplo, \ conj_t conjx, \ conj_t conjh, \ dim_t m, \ ctype* alpha, /* complex alpha allows her variants to also perform syr. */ \ ctype* x, inc_t incx, \ ctype* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ ctype* chi1; \ ctype* x2; \ ctype* gamma11; \ ctype* c21; \ ctype alpha_local; \ ctype alpha_chi1; \ ctype alpha_chi1_chi1; \ ctype conjx0_chi1; \ ctype conjx1_chi1; \ dim_t i; \ dim_t n_ahead; \ inc_t rs_ct, cs_ct; \ conj_t conj0, conj1; \ \ /* Eliminate unused variable warnings. */ \ ( void )conj0; \ \ /* Make a local copy of alpha and zero out the imaginary component if we are being invoked as her, since her requires alpha to be real. */ \ PASTEMAC(ch,copys)( *alpha, alpha_local ); \ if ( bli_is_conj( conjh ) ) \ { \ PASTEMAC(ch,seti0s)( alpha_local ); \ } \ \ /* The algorithm will be expressed in terms of the lower triangular case; the upper triangular case is supported by swapping the row and column strides of A and toggling some conj parameters. */ \ if ( bli_is_lower( uplo ) ) \ { \ rs_ct = rs_c; \ cs_ct = cs_c; \ } \ else /* if ( bli_is_upper( uplo ) ) */ \ { \ rs_ct = cs_c; \ cs_ct = rs_c; \ \ /* Toggle conjugation of conjx, but only if we are being invoked as her; for syr, conjx is unchanged. */ \ conjx = bli_apply_conj( conjh, conjx ); \ } \ \ /* Apply conjh (which carries the conjugation component of the Hermitian transpose, if applicable) to conjx as needed to arrive at the effective conjugation for the scalar and vector subproblems. */ \ conj0 = bli_apply_conj( conjh, conjx ); \ conj1 = conjx; \ \ PASTECH(ch,axpyv_ker_ft) kfp_av; \ \ /* Query the context for the kernel function pointer. */ \ kfp_av = bli_cntx_get_l1v_ker_dt( dt, BLIS_AXPYV_KER, cntx ); \ \ for ( i = 0; i < m; ++i ) \ { \ n_ahead = m - i - 1; \ chi1 = x + (i )*incx; \ x2 = x + (i+1)*incx; \ gamma11 = c + (i )*rs_ct + (i )*cs_ct; \ c21 = c + (i+1)*rs_ct + (i )*cs_ct; \ \ /* Apply conjx to chi1. */ \ PASTEMAC(ch,copycjs)( conj0, *chi1, conjx0_chi1 ); \ PASTEMAC(ch,copycjs)( conj1, *chi1, conjx1_chi1 ); \ \ /* Compute scalar for vector subproblem. */ \ PASTEMAC(ch,scal2s)( alpha_local, conjx0_chi1, alpha_chi1 ); \ \ /* Compute alpha * chi1 * conj(chi1) after chi1 has already been conjugated, if needed, by conjx. */ \ PASTEMAC(ch,scal2s)( alpha_chi1, conjx1_chi1, alpha_chi1_chi1 ); \ \ /* c21 = c21 + alpha * x2 * conj(chi1); */ \ kfp_av \ ( \ conj1, \ n_ahead, \ &alpha_chi1, \ x2, incx, \ c21, rs_ct, \ cntx \ ); \ \ /* gamma11 = gamma11 + alpha * chi1 * conj(chi1); */ \ PASTEMAC(ch,adds)( alpha_chi1_chi1, *gamma11 ); \ \ /* For her, explicitly set the imaginary component of gamma11 to zero. */ \ if ( bli_is_conj( conjh ) ) \ PASTEMAC(ch,seti0s)( *gamma11 ); \ } \ } INSERT_GENTFUNC_BASIC0( her_unb_var2 ) blis-0.6.1/frame/2/her/bli_her_var.h000066400000000000000000000050251360743507500171210ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype object-based interfaces. // #undef GENPROT #define GENPROT( opname ) \ \ void PASTEMAC0(opname) \ ( \ conj_t conjh, \ obj_t* alpha, \ obj_t* x, \ obj_t* c, \ cntx_t* cntx, \ cntl_t* cntl \ ); GENPROT( her_blk_var1 ) GENPROT( her_blk_var2 ) GENPROT( her_unb_var1 ) GENPROT( her_unb_var2 ) // // Prototype BLAS-like interfaces with typed operands. // #undef GENTPROTR #define GENTPROTR( ctype, ctype_r, ch, chr, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ uplo_t uplo, \ conj_t conjx, \ conj_t conjh, \ dim_t m, \ ctype* alpha, /* complex alpha allows her variants to also perform syr. */ \ ctype* x, inc_t incx, \ ctype* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx \ ); INSERT_GENTPROTR_BASIC0( her_unb_var1 ) INSERT_GENTPROTR_BASIC0( her_unb_var2 ) blis-0.6.1/frame/2/her/bli_her_var_oapi.c000066400000000000000000000054521360743507500201300ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENFRONT #define GENFRONT( opname, varname ) \ \ void PASTEMAC0(varname) \ ( \ conj_t conjh, \ obj_t* alpha, \ obj_t* x, \ obj_t* c, \ cntx_t* cntx, \ cntl_t* cntl \ ) \ { \ bli_init_once(); \ \ num_t dt = bli_obj_dt( c ); \ \ uplo_t uplo = bli_obj_uplo( c ); \ conj_t conjx = bli_obj_conj_status( x ); \ \ dim_t m = bli_obj_length( c ); \ \ void* buf_x = bli_obj_buffer_at_off( x ); \ inc_t incx = bli_obj_vector_inc( x ); \ \ void* buf_c = bli_obj_buffer_at_off( c ); \ inc_t rs_c = bli_obj_row_stride( c ); \ inc_t cs_c = bli_obj_col_stride( c ); \ \ void* buf_alpha = bli_obj_buffer_for_1x1( dt, alpha ); \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(opname,_unb,_vft) f = \ PASTEMAC(varname,_qfp)( dt ); \ \ f \ ( \ uplo, \ conjx, \ conjh, \ m, \ buf_alpha, \ buf_x, incx, \ buf_c, rs_c, cs_c, \ cntx \ ); \ } \ GENFRONT( her, her_unb_var1 ) GENFRONT( her, her_unb_var2 ) blis-0.6.1/frame/2/her/other/000077500000000000000000000000001360743507500156135ustar00rootroot00000000000000blis-0.6.1/frame/2/her/other/bli_her_blk_var1.c000066400000000000000000000112451360743507500211470ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_her_blk_var1( conj_t conjh, obj_t* alpha, obj_t* x, obj_t* c, cntx_t* cntx, her_t* cntl ) { obj_t c11, c11_pack; obj_t c10; obj_t x1, x1_pack; obj_t x0; dim_t mn; dim_t ij; dim_t b_alg; // Even though this blocked algorithm is expressed only in terms of the // lower triangular case, the upper triangular case is still supported: // when bli_acquire_mpart_tl2br() is passed a matrix that is stored in // in the upper triangle, and the requested subpartition resides in the // lower triangle (as is the case for this algorithm), the routine fills // the request as if the caller had actually requested the corresponding // "mirror" subpartition in the upper triangle, except that it marks the // subpartition for transposition (and conjugation). // Initialize objects for packing. bli_obj_init_pack( &c11_pack ); bli_obj_init_pack( &x1_pack ); // Query dimension. mn = bli_obj_length( c ); // Partition diagonally. for ( ij = 0; ij < mn; ij += b_alg ) { // Determine the current algorithmic blocksize. b_alg = bli_determine_blocksize_f( ij, mn, c, bli_cntl_bszid( cntl ), cntx ); // Acquire partitions for C11, C10, x1, and x0. bli_acquire_mpart_tl2br( BLIS_SUBPART11, ij, b_alg, c, &c11 ); bli_acquire_mpart_tl2br( BLIS_SUBPART10, ij, b_alg, c, &c10 ); bli_acquire_vpart_f2b( BLIS_SUBPART1, ij, b_alg, x, &x1 ); bli_acquire_vpart_f2b( BLIS_SUBPART0, ij, b_alg, x, &x0 ); // Initialize objects for packing C11 and x1 (if needed). bli_packm_init( &c11, &c11_pack, cntx, bli_cntl_sub_packm_c11( cntl ) ); bli_packv_init( &x1, &x1_pack, cntx, bli_cntl_sub_packv_x1( cntl ) ); // Copy/pack C11, x1 (if needed). bli_packm_int( &c11, &c11_pack, cntx, bli_cntl_sub_packm_c11( cntl ), &BLIS_PACKM_SINGLE_THREADED ); bli_packv_int( &x1, &x1_pack, cntx, bli_cntl_sub_packv_x1( cntl ) ); // C10 = C10 + alpha * x1 * x0'; bli_ger_int( BLIS_NO_CONJUGATE, conjh, alpha, &x1_pack, &x0, &c10, cntx, bli_cntl_sub_ger( cntl ) ); // C11 = C11 + alpha * x1 * x1'; bli_her_int( conjh, alpha, &x1_pack, &c11_pack, cntx, bli_cntl_sub_her( cntl ) ); // Copy/unpack C11 (if C11 was packed). bli_unpackm_int( &c11_pack, &c11, cntx, bli_cntl_sub_unpackm_c11( cntl ), &BLIS_PACKM_SINGLE_THREADED ); } // If any packing buffers were acquired within packm, release them back // to the memory manager. bli_packm_release( &c11_pack, bli_cntl_sub_packm_c11( cntl ) ); bli_packv_release( &x1_pack, bli_cntl_sub_packv_x1( cntl ) ); } blis-0.6.1/frame/2/her/other/bli_her_blk_var2.c000066400000000000000000000112451360743507500211500ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_her_blk_var2( conj_t conjh, obj_t* alpha, obj_t* x, obj_t* c, cntx_t* cntx, her_t* cntl ) { obj_t c11, c11_pack; obj_t c21; obj_t x1, x1_pack; obj_t x2; dim_t mn; dim_t ij; dim_t b_alg; // Even though this blocked algorithm is expressed only in terms of the // lower triangular case, the upper triangular case is still supported: // when bli_acquire_mpart_tl2br() is passed a matrix that is stored in // in the upper triangle, and the requested subpartition resides in the // lower triangle (as is the case for this algorithm), the routine fills // the request as if the caller had actually requested the corresponding // "mirror" subpartition in the upper triangle, except that it marks the // subpartition for transposition (and conjugation). // Initialize objects for packing. bli_obj_init_pack( &c11_pack ); bli_obj_init_pack( &x1_pack ); // Query dimension. mn = bli_obj_length( c ); // Partition diagonally. for ( ij = 0; ij < mn; ij += b_alg ) { // Determine the current algorithmic blocksize. b_alg = bli_determine_blocksize_f( ij, mn, c, bli_cntl_bszid( cntl ), cntx ); // Acquire partitions for C11, C21, x1, and x2. bli_acquire_mpart_tl2br( BLIS_SUBPART11, ij, b_alg, c, &c11 ); bli_acquire_mpart_tl2br( BLIS_SUBPART21, ij, b_alg, c, &c21 ); bli_acquire_vpart_f2b( BLIS_SUBPART1, ij, b_alg, x, &x1 ); bli_acquire_vpart_f2b( BLIS_SUBPART2, ij, b_alg, x, &x2 ); // Initialize objects for packing C11 and x1 (if needed). bli_packm_init( &c11, &c11_pack, cntx, bli_cntl_sub_packm_c11( cntl ) ); bli_packv_init( &x1, &x1_pack, cntx, bli_cntl_sub_packv_x1( cntl ) ); // Copy/pack C11, x1 (if needed). bli_packm_int( &c11, &c11_pack, cntx, bli_cntl_sub_packm_c11( cntl ), &BLIS_PACKM_SINGLE_THREADED ); bli_packv_int( &x1, &x1_pack, cntx, bli_cntl_sub_packv_x1( cntl ) ); // C21 = C21 + alpha * x2 * x1'; bli_ger_int( BLIS_NO_CONJUGATE, conjh, alpha, &x2, &x1_pack, &c21, cntx, bli_cntl_sub_ger( cntl ) ); // C11 = C11 + alpha * x1 * x1'; bli_her_int( conjh, alpha, &x1_pack, &c11_pack, cntx, bli_cntl_sub_her( cntl ) ); // Copy/unpack C11 (if C11 was packed). bli_unpackm_int( &c11_pack, &c11, cntx, bli_cntl_sub_unpackm_c11( cntl ), &BLIS_PACKM_SINGLE_THREADED ); } // If any packing buffers were acquired within packm, release them back // to the memory manager. bli_packm_release( &c11_pack, bli_cntl_sub_packm_c11( cntl ) ); bli_packv_release( &x1_pack, bli_cntl_sub_packv_x1( cntl ) ); } blis-0.6.1/frame/2/her/other/bli_her_cntl.c000066400000000000000000000127371360743507500204150ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" extern packm_t* packm_cntl; extern packv_t* packv_cntl; extern unpackm_t* unpackm_cntl; extern ger_t* ger_cntl_rp_bs_row; extern ger_t* ger_cntl_cp_bs_col; extern ger_t* ger_cntl_bs_ke_row; extern ger_t* ger_cntl_bs_ke_col; her_t* her_cntl_bs_ke_lrow_ucol = NULL; her_t* her_cntl_bs_ke_lcol_urow = NULL; her_t* her_cntl_ge_lrow_ucol = NULL; her_t* her_cntl_ge_lcol_urow = NULL; void bli_her_cntl_init() { // Create control trees for the lowest-level kernels. These trees induce // operations on (persumably) relatively small block-subvector problems. her_cntl_bs_ke_lrow_ucol = bli_her_cntl_obj_create( BLIS_UNBLOCKED, BLIS_VARIANT1, 0, NULL, NULL, NULL, NULL, NULL ); her_cntl_bs_ke_lcol_urow = bli_her_cntl_obj_create( BLIS_UNBLOCKED, BLIS_VARIANT2, 0, NULL, NULL, NULL, NULL, NULL ); // Create control trees for generally large problems. Here, we choose // variants that partition for ger subproblems in the same direction // as the assumed storage. her_cntl_ge_lrow_ucol = bli_her_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, BLIS_M2, packv_cntl, // pack x1 (if needed) NULL, // do NOT pack C11 ger_cntl_rp_bs_row, her_cntl_bs_ke_lrow_ucol, NULL ); // no unpacking needed her_cntl_ge_lcol_urow = bli_her_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, BLIS_M2, packv_cntl, // pack x1 (if needed) NULL, // do NOT pack C11 ger_cntl_cp_bs_col, her_cntl_bs_ke_lcol_urow, NULL ); // no unpacking needed } void bli_her_cntl_finalize() { bli_cntl_free_node( her_cntl_bs_ke_lrow_ucol ); bli_cntl_free_node( her_cntl_bs_ke_lcol_urow ); bli_cntl_free_node( her_cntl_ge_lrow_ucol ); bli_cntl_free_node( her_cntl_ge_lcol_urow ); } her_t* bli_her_cntl_obj_create( impl_t impl_type, varnum_t var_num, bszid_t bszid, packv_t* sub_packv_x1, packm_t* sub_packm_c11, ger_t* sub_ger, her_t* sub_her, unpackm_t* sub_unpackm_c11 ) { her_t* cntl; cntl = ( her_t* ) bli_malloc_intl( sizeof(her_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; cntl->bszid = bszid; cntl->sub_packv_x1 = sub_packv_x1; cntl->sub_packm_c11 = sub_packm_c11; cntl->sub_ger = sub_ger; cntl->sub_her = sub_her; cntl->sub_unpackm_c11 = sub_unpackm_c11; return cntl; } void bli_her_cntl_obj_init( her_t* cntl, impl_t impl_type, varnum_t var_num, bszid_t bszid, packv_t* sub_packv_x1, packm_t* sub_packm_c11, ger_t* sub_ger, her_t* sub_her, unpackm_t* sub_unpackm_c11 ) { cntl->impl_type = impl_type; cntl->var_num = var_num; cntl->bszid = bszid; cntl->sub_packv_x1 = sub_packv_x1; cntl->sub_packm_c11 = sub_packm_c11; cntl->sub_ger = sub_ger; cntl->sub_her = sub_her; cntl->sub_unpackm_c11 = sub_unpackm_c11; } blis-0.6.1/frame/2/her/other/bli_her_cntl.h000066400000000000000000000056521360743507500204200ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ struct her_s { impl_t impl_type; varnum_t var_num; bszid_t bszid; struct packv_s* sub_packv_x1; struct packm_s* sub_packm_c11; struct ger_s* sub_ger; struct her_s* sub_her; struct unpackm_s* sub_unpackm_c11; }; typedef struct her_s her_t; #define bli_cntl_sub_her( cntl ) cntl->sub_her void bli_her_cntl_init( void ); void bli_her_cntl_finalize( void ); her_t* bli_her_cntl_obj_create( impl_t impl_type, varnum_t var_num, bszid_t bszid, packv_t* sub_packv_x1, packm_t* sub_packm_c11, ger_t* sub_ger, her_t* sub_her, unpackm_t* sub_unpackm_c11 ); void bli_her_cntl_obj_init( her_t* cntl, impl_t impl_type, varnum_t var_num, bszid_t bszid, packv_t* sub_packv_x1, packm_t* sub_packm_c11, ger_t* sub_ger, her_t* sub_her, unpackm_t* sub_unpackm_c11 ); blis-0.6.1/frame/2/her/other/bli_her_front.c000066400000000000000000000133731360743507500206020ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" extern her_t* her_cntl_bs_ke_lrow_ucol; extern her_t* her_cntl_bs_ke_lcol_urow; extern her_t* her_cntl_ge_lrow_ucol; extern her_t* her_cntl_ge_lcol_urow; void bli_her_front ( obj_t* alpha, obj_t* x, obj_t* c, cntx_t* cntx ) { her_t* her_cntl; num_t dt_targ_x; //num_t dt_targ_c; bool_t x_has_unit_inc; bool_t c_has_unit_inc; obj_t alpha_local; num_t dt_alpha; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_her_check( alpha, x, c ); // Query the target datatypes of each object. dt_targ_x = bli_obj_target_dt( x ); //dt_targ_c = bli_obj_target_dt( c ); // Determine whether each operand with unit stride. x_has_unit_inc = ( bli_obj_vector_inc( x ) == 1 ); c_has_unit_inc = ( bli_obj_is_row_stored( c ) || bli_obj_is_col_stored( c ) ); // Create object to hold a copy-cast of alpha. dt_alpha = dt_targ_x; bli_obj_scalar_init_detached_copy_of( dt_alpha, BLIS_NO_CONJUGATE, alpha, &alpha_local ); // If all operands have unit stride, we choose a control tree for calling // the unblocked implementation directly without any blocking. if ( x_has_unit_inc && c_has_unit_inc ) { // We use two control trees to handle the four cases corresponding to // combinations of upper/lower triangular storage and row/column-storage. // The row-stored lower triangular and column-stored upper triangular // trees are identical. Same for the remaining two trees. if ( bli_obj_is_lower( c ) ) { if ( bli_obj_is_row_stored( c ) ) her_cntl = her_cntl_bs_ke_lrow_ucol; else her_cntl = her_cntl_bs_ke_lcol_urow; } else // if ( bli_obj_is_upper( c ) ) { if ( bli_obj_is_row_stored( c ) ) her_cntl = her_cntl_bs_ke_lcol_urow; else her_cntl = her_cntl_bs_ke_lrow_ucol; } } else { // Mark objects with unit stride as already being packed. This prevents // unnecessary packing from happening within the blocked algorithm. if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, x ); if ( c_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, c ); // Here, we make a similar choice as above, except that (1) we look // at storage tilt, and (2) we choose a tree that performs blocking. if ( bli_obj_is_lower( c ) ) { if ( bli_obj_is_row_stored( c ) ) her_cntl = her_cntl_ge_lrow_ucol; else her_cntl = her_cntl_ge_lcol_urow; } else // if ( bli_obj_is_upper( c ) ) { if ( bli_obj_is_row_stored( c ) ) her_cntl = her_cntl_ge_lcol_urow; else her_cntl = her_cntl_ge_lrow_ucol; } } // Invoke the internal back-end with the copy-cast scalar and the // chosen control tree. Set conjh to BLIS_CONJUGATE to invoke the // Hermitian (and not symmetric) algorithms. bli_her_int( BLIS_CONJUGATE, &alpha_local, x, c, cntx, her_cntl ); } // // Define BLAS-like interfaces with homogeneous-typed operands. // #undef GENTFUNCR #define GENTFUNCR( ctype, ctype_r, ch, chr, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ uplo_t uploc, \ conj_t conjx, \ dim_t m, \ ctype_r* alpha, \ ctype* x, inc_t incx, \ ctype* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx \ ) \ { \ const num_t dt_r = PASTEMAC(chr,type); \ const num_t dt = PASTEMAC(ch,type); \ \ obj_t alphao, xo, co; \ \ inc_t rs_x, cs_x; \ \ rs_x = incx; cs_x = m * incx; \ \ bli_obj_create_1x1_with_attached_buffer( dt_r, alpha, &alphao ); \ \ bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \ bli_obj_create_with_attached_buffer( dt, m, m, c, rs_c, cs_c, &co ); \ \ bli_obj_set_conj( conjx, &xo ); \ bli_obj_set_uplo( uploc, &co ); \ \ bli_obj_set_struc( BLIS_HERMITIAN, &co ); \ \ PASTEMAC0(opname)( &alphao, \ &xo, \ &co, \ cntx ); \ } INSERT_GENTFUNCR_BASIC0( her_front ) blis-0.6.1/frame/2/her/other/bli_her_front.h000066400000000000000000000041351360743507500206030ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_her_front ( obj_t* alpha, obj_t* x, obj_t* c, cntx_t* cntx ); #undef GENTPROTR #define GENTPROTR( ctype, ctype_r, ch, chr, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ uplo_t uploc, \ conj_t conjx, \ dim_t m, \ ctype_r* alpha, \ ctype* x, inc_t incx, \ ctype* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx \ ); INSERT_GENTPROTR_BASIC( her_front ) blis-0.6.1/frame/2/her/other/bli_her_int.c000066400000000000000000000071601360743507500202410ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T her_fp typedef void (*FUNCPTR_T)( conj_t conjh, obj_t* alpha, obj_t* x, obj_t* c, cntx_t* cntx, her_t* cntl ); static FUNCPTR_T vars[4][3] = { // unblocked unblocked with fusing blocked { bli_her_unb_var1, NULL, bli_her_blk_var1, }, { bli_her_unb_var2, NULL, bli_her_blk_var2, }, { NULL, NULL, NULL, }, { NULL, NULL, NULL, }, }; void bli_her_int( conj_t conjh, obj_t* alpha, obj_t* x, obj_t* c, cntx_t* cntx, her_t* cntl ) { varnum_t n; impl_t i; FUNCPTR_T f; obj_t x_local; obj_t c_local; // Check parameters. if ( bli_error_checking_is_enabled() ) { if ( bli_is_conj( conjh ) ) bli_her_check( alpha, x, c ); else bli_syr_check( alpha, x, c ); } // If C or x has a zero dimension, return early. if ( bli_obj_has_zero_dim( c ) ) return; if ( bli_obj_has_zero_dim( x ) ) return; // Alias the operands in case we need to apply conjugations. bli_obj_alias_to( x, &x_local ); bli_obj_alias_to( c, &c_local ); // If matrix C is marked for conjugation, we interpret this as a request // to apply a conjugation to the other operands. if ( bli_obj_has_conj( &c_local ) ) { bli_obj_toggle_conj( &c_local ); // Notice that we don't need to conjugate alpha since it is guaranteed // to be real. bli_obj_toggle_conj( &x_local ); } // Extract the variant number and implementation type. n = bli_cntl_var_num( cntl ); i = bli_cntl_impl_type( cntl ); // Index into the variant array to extract the correct function pointer. f = vars[n][i]; // Invoke the variant. f( conjh, alpha, &x_local, &c_local, cntx, cntl ); } blis-0.6.1/frame/2/her/other/bli_her_int.h000066400000000000000000000035101360743507500202410ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_her_int( conj_t conjh, obj_t* alpha, obj_t* x, obj_t* c, cntx_t* cntx, her_t* cntl ); blis-0.6.1/frame/2/her2/000077500000000000000000000000001360743507500145545ustar00rootroot00000000000000blis-0.6.1/frame/2/her2/bli_her2.h000066400000000000000000000034671360743507500164250ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // NOTE: level-2 control tree code is temporarily disabled. //#include "bli_her2_cntl.h" //#include "bli_her2_front.h" //#include "bli_her2_int.h" #include "bli_her2_var.h" blis-0.6.1/frame/2/her2/bli_her2_unb_var1.c000066400000000000000000000124511360743507500202060ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ uplo_t uplo, \ conj_t conjx, \ conj_t conjy, \ conj_t conjh, \ dim_t m, \ ctype* alpha, \ ctype* x, inc_t incx, \ ctype* y, inc_t incy, \ ctype* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ ctype* x0; \ ctype* chi1; \ ctype* y0; \ ctype* psi1; \ ctype* c10t; \ ctype* gamma11; \ ctype alpha0; \ ctype alpha1; \ ctype alpha0_chi1; \ ctype alpha1_psi1; \ ctype alpha0_chi1_psi1; \ ctype conjx0_chi1; \ ctype conjy1_psi1; \ ctype conjy0_psi1; \ dim_t i; \ dim_t n_behind; \ inc_t rs_ct, cs_ct; \ conj_t conj0, conj1; \ \ /* The algorithm will be expressed in terms of the lower triangular case; the upper triangular case is supported by swapping the row and column strides of A and toggling some conj parameters. */ \ if ( bli_is_lower( uplo ) ) \ { \ rs_ct = rs_c; \ cs_ct = cs_c; \ \ PASTEMAC(ch,copys)( *alpha, alpha0 ); \ PASTEMAC(ch,copycjs)( conjh, *alpha, alpha1 ); \ } \ else /* if ( bli_is_upper( uplo ) ) */ \ { \ rs_ct = cs_c; \ cs_ct = rs_c; \ \ /* Toggle conjugation of conjx/conjy, but only if we are being invoked as her2; for syr2, conjx/conjy are unchanged. */ \ conjx = bli_apply_conj( conjh, conjx ); \ conjy = bli_apply_conj( conjh, conjy ); \ \ PASTEMAC(ch,copycjs)( conjh, *alpha, alpha0 ); \ PASTEMAC(ch,copys)( *alpha, alpha1 ); \ } \ \ /* Apply conjh (which carries the conjugation component of the Hermitian transpose, if applicable) to conjx and/or conjy as needed to arrive at the effective conjugation for the vector subproblems. */ \ conj0 = bli_apply_conj( conjh, conjy ); \ conj1 = bli_apply_conj( conjh, conjx ); \ \ PASTECH(ch,axpyv_ker_ft) kfp_av; \ \ /* Query the context for the kernel function pointer. */ \ kfp_av = bli_cntx_get_l1v_ker_dt( dt, BLIS_AXPYV_KER, cntx ); \ \ for ( i = 0; i < m; ++i ) \ { \ n_behind = i; \ x0 = x + (0 )*incx; \ chi1 = x + (i )*incx; \ y0 = y + (0 )*incy; \ psi1 = y + (i )*incy; \ c10t = c + (i )*rs_ct + (0 )*cs_ct; \ gamma11 = c + (i )*rs_ct + (i )*cs_ct; \ \ /* Apply conjx and/or conjy to chi1 and/or psi1. */ \ PASTEMAC(ch,copycjs)( conjx, *chi1, conjx0_chi1 ); \ PASTEMAC(ch,copycjs)( conjy, *psi1, conjy1_psi1 ); \ PASTEMAC(ch,copycjs)( conj0, *psi1, conjy0_psi1 ); \ \ /* Compute scalars for vector subproblems. */ \ PASTEMAC(ch,scal2s)( alpha0, conjx0_chi1, alpha0_chi1 ); \ PASTEMAC(ch,scal2s)( alpha1, conjy1_psi1, alpha1_psi1 ); \ \ /* Compute alpha * chi1 * conj(psi1) after both chi1 and psi1 have already been conjugated, if needed, by conjx and conjy. */ \ PASTEMAC(ch,scal2s)( alpha0_chi1, conjy0_psi1, alpha0_chi1_psi1 ); \ \ /* c10t = c10t + alpha * chi1 * y0'; */ \ kfp_av \ ( \ conj0, \ n_behind, \ &alpha0_chi1, \ y0, incy, \ c10t, cs_ct, \ cntx \ ); \ \ /* c10t = c10t + conj(alpha) * psi1 * x0'; */ \ kfp_av \ ( \ conj1, \ n_behind, \ &alpha1_psi1, \ x0, incx, \ c10t, cs_ct, \ cntx \ ); \ \ /* gamma11 = gamma11 + alpha * chi1 * conj(psi1) \ + conj(alpha) * psi1 * conj(chi1); */ \ PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \ PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \ \ /* For her2, explicitly set the imaginary component of gamma11 to zero. */ \ if ( bli_is_conj( conjh ) ) \ PASTEMAC(ch,seti0s)( *gamma11 ); \ } \ } INSERT_GENTFUNC_BASIC0( her2_unb_var1 ) blis-0.6.1/frame/2/her2/bli_her2_unb_var2.c000066400000000000000000000130271360743507500202070ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ uplo_t uplo, \ conj_t conjx, \ conj_t conjy, \ conj_t conjh, \ dim_t m, \ ctype* alpha, \ ctype* x, inc_t incx, \ ctype* y, inc_t incy, \ ctype* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ ctype* x0; \ ctype* chi1; \ ctype* x2; \ ctype* psi1; \ ctype* c10t; \ ctype* gamma11; \ ctype* c21; \ ctype alpha0; \ ctype alpha1; \ ctype alpha0_psi1; \ ctype alpha1_psi1; \ ctype alpha0_chi1_psi1; \ ctype conjy0_psi1; \ ctype conjy1_psi1; \ ctype conjx0_chi1; \ dim_t i; \ dim_t n_behind; \ dim_t n_ahead; \ inc_t rs_ct, cs_ct; \ conj_t conj0, conj1; \ conj_t conjh_conjy; \ \ /* Eliminate unused variable warnings. */ \ ( void )conjh_conjy; \ \ /* The algorithm will be expressed in terms of the lower triangular case; the upper triangular case is supported by swapping the row and column strides of A and toggling some conj parameters. */ \ if ( bli_is_lower( uplo ) ) \ { \ rs_ct = rs_c; \ cs_ct = cs_c; \ \ PASTEMAC(ch,copys)( *alpha, alpha0 ); \ PASTEMAC(ch,copycjs)( conjh, *alpha, alpha1 ); \ } \ else /* if ( bli_is_upper( uplo ) ) */ \ { \ rs_ct = cs_c; \ cs_ct = rs_c; \ \ /* Toggle conjugation of conjx/conjy, but only if we are being invoked as her2; for syr2, conjx/conjy are unchanged. */ \ conjx = bli_apply_conj( conjh, conjx ); \ conjy = bli_apply_conj( conjh, conjy ); \ \ PASTEMAC(ch,copycjs)( conjh, *alpha, alpha0 ); \ PASTEMAC(ch,copys)( *alpha, alpha1 ); \ } \ \ /* Apply conjh (which carries the conjugation component of the Hermitian transpose, if applicable) to conjx and/or conjy as needed to arrive at the effective conjugation for the vector subproblems. */ \ conj0 = conjx; \ conj1 = bli_apply_conj( conjh, conjx ); \ conjh_conjy = bli_apply_conj( conjh, conjy ); \ \ PASTECH(ch,axpyv_ker_ft) kfp_av; \ \ /* Query the context for the kernel function pointer. */ \ kfp_av = bli_cntx_get_l1v_ker_dt( dt, BLIS_AXPYV_KER, cntx ); \ \ for ( i = 0; i < m; ++i ) \ { \ n_behind = i; \ n_ahead = m - i - 1; \ x0 = x + (0 )*incx; \ chi1 = x + (i )*incx; \ x2 = x + (i+1)*incx; \ psi1 = y + (i )*incy; \ c10t = c + (i )*rs_ct + (0 )*cs_ct; \ gamma11 = c + (i )*rs_ct + (i )*cs_ct; \ c21 = c + (i+1)*rs_ct + (i )*cs_ct; \ \ /* Apply conjx and/or conjy to chi1 and/or psi1. */ \ PASTEMAC(ch,copycjs)( conjh_conjy, *psi1, conjy0_psi1 ); \ PASTEMAC(ch,copycjs)( conjy, *psi1, conjy1_psi1 ); \ PASTEMAC(ch,copycjs)( conj0, *chi1, conjx0_chi1 ); \ \ /* Compute scalars for vector subproblems. */ \ PASTEMAC(ch,scal2s)( alpha0, conjy0_psi1, alpha0_psi1 ); \ PASTEMAC(ch,scal2s)( alpha1, conjy1_psi1, alpha1_psi1 ); \ \ /* Compute alpha * chi1 * conj(psi1) after both chi1 and psi1 have already been conjugated, if needed, by conjx and conjy. */ \ PASTEMAC(ch,scal2s)( alpha0_psi1, conjx0_chi1, alpha0_chi1_psi1 ); \ \ /* c21 = c21 + alpha * x2 * conj(psi1); */ \ kfp_av \ ( \ conj0, \ n_ahead, \ &alpha0_psi1, \ x2, incx, \ c21, rs_ct, \ cntx \ ); \ \ /* c10t = c10t + conj(alpha) * psi1 * x0'; */ \ kfp_av \ ( \ conj1, \ n_behind, \ &alpha1_psi1, \ x0, incx, \ c10t, cs_ct, \ cntx \ ); \ \ /* gamma11 = gamma11 + alpha * chi1 * conj(psi1) \ + conj(alpha) * psi1 * conj(chi1); */ \ PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \ PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \ \ /* For her2, explicitly set the imaginary component of gamma11 to zero. */ \ if ( bli_is_conj( conjh ) ) \ PASTEMAC(ch,seti0s)( *gamma11 ); \ } \ } INSERT_GENTFUNC_BASIC0( her2_unb_var2 ) blis-0.6.1/frame/2/her2/bli_her2_unb_var3.c000066400000000000000000000130251360743507500202060ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ uplo_t uplo, \ conj_t conjx, \ conj_t conjy, \ conj_t conjh, \ dim_t m, \ ctype* alpha, \ ctype* x, inc_t incx, \ ctype* y, inc_t incy, \ ctype* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ ctype* chi1; \ ctype* y0; \ ctype* psi1; \ ctype* y2; \ ctype* c10t; \ ctype* gamma11; \ ctype* c21; \ ctype alpha0; \ ctype alpha1; \ ctype alpha0_chi1; \ ctype alpha1_chi1; \ ctype alpha0_chi1_psi1; \ ctype conjx0_chi1; \ ctype conjx1_chi1; \ ctype conjy0_psi1; \ dim_t i; \ dim_t n_behind; \ dim_t n_ahead; \ inc_t rs_ct, cs_ct; \ conj_t conj0, conj1; \ conj_t conjh_conjx; \ \ /* Eliminate unused variable warnings. */ \ ( void )conjh_conjx; \ \ /* The algorithm will be expressed in terms of the lower triangular case; the upper triangular case is supported by swapping the row and column strides of A and toggling some conj parameters. */ \ if ( bli_is_lower( uplo ) ) \ { \ rs_ct = rs_c; \ cs_ct = cs_c; \ \ PASTEMAC(ch,copys)( *alpha, alpha0 ); \ PASTEMAC(ch,copycjs)( conjh, *alpha, alpha1 ); \ } \ else /* if ( bli_is_upper( uplo ) ) */ \ { \ rs_ct = cs_c; \ cs_ct = rs_c; \ \ /* Toggle conjugation of conjx/conjy, but only if we are being invoked as her2; for syr2, conjx/conjy are unchanged. */ \ conjx = bli_apply_conj( conjh, conjx ); \ conjy = bli_apply_conj( conjh, conjy ); \ \ PASTEMAC(ch,copycjs)( conjh, *alpha, alpha0 ); \ PASTEMAC(ch,copys)( *alpha, alpha1 ); \ } \ \ /* Apply conjh (which carries the conjugation component of the Hermitian transpose, if applicable) to conjx and/or conjy as needed to arrive at the effective conjugation for the vector subproblems. */ \ conj0 = bli_apply_conj( conjh, conjy ); \ conj1 = conjy; \ conjh_conjx = bli_apply_conj( conjh, conjx ); \ \ PASTECH(ch,axpyv_ker_ft) kfp_av; \ \ /* Query the context for the kernel function pointer. */ \ kfp_av = bli_cntx_get_l1v_ker_dt( dt, BLIS_AXPYV_KER, cntx ); \ \ for ( i = 0; i < m; ++i ) \ { \ n_behind = i; \ n_ahead = m - i - 1; \ chi1 = x + (i )*incx; \ y0 = y + (0 )*incy; \ psi1 = y + (i )*incy; \ y2 = y + (i+1)*incy; \ c10t = c + (i )*rs_ct + (0 )*cs_ct; \ gamma11 = c + (i )*rs_ct + (i )*cs_ct; \ c21 = c + (i+1)*rs_ct + (i )*cs_ct; \ \ /* Apply conjx and/or conjy to chi1 and/or psi1. */ \ PASTEMAC(ch,copycjs)( conjx, *chi1, conjx0_chi1 ); \ PASTEMAC(ch,copycjs)( conjh_conjx, *chi1, conjx1_chi1 ); \ PASTEMAC(ch,copycjs)( conj0, *psi1, conjy0_psi1 ); \ \ /* Compute scalars for vector subproblems. */ \ PASTEMAC(ch,scal2s)( alpha0, conjx0_chi1, alpha0_chi1 ); \ PASTEMAC(ch,scal2s)( alpha1, conjx1_chi1, alpha1_chi1 ); \ \ /* Compute alpha * chi1 * conj(psi1) after both chi1 and psi1 have already been conjugated, if needed, by conjx and conjy. */ \ PASTEMAC(ch,scal2s)( alpha0_chi1, conjy0_psi1, alpha0_chi1_psi1 ); \ \ /* c10t = c10t + alpha * chi1 * y0'; */ \ kfp_av \ ( \ conj0, \ n_behind, \ &alpha0_chi1, \ y0, incy, \ c10t, cs_ct, \ cntx \ ); \ \ /* c21 = c21 + conj(alpha) * y2 * conj(chi1); */ \ kfp_av \ ( \ conj1, \ n_ahead, \ &alpha1_chi1, \ y2, incy, \ c21, rs_ct, \ cntx \ ); \ \ /* gamma11 = gamma11 + alpha * chi1 * conj(psi1) \ + conj(alpha) * psi1 * conj(chi1); */ \ PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \ PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \ \ /* For her2, explicitly set the imaginary component of gamma11 to zero. */ \ if ( bli_is_conj( conjh ) ) \ PASTEMAC(ch,seti0s)( *gamma11 ); \ } \ } INSERT_GENTFUNC_BASIC0( her2_unb_var3 ) blis-0.6.1/frame/2/her2/bli_her2_unb_var4.c000066400000000000000000000127671360743507500202230ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ uplo_t uplo, \ conj_t conjx, \ conj_t conjy, \ conj_t conjh, \ dim_t m, \ ctype* alpha, \ ctype* x, inc_t incx, \ ctype* y, inc_t incy, \ ctype* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ ctype* chi1; \ ctype* x2; \ ctype* psi1; \ ctype* y2; \ ctype* gamma11; \ ctype* c21; \ ctype alpha0; \ ctype alpha1; \ ctype alpha0_psi1; \ ctype alpha1_chi1; \ ctype alpha0_chi1_psi1; \ ctype conjy0_psi1; \ ctype conjx1_chi1; \ ctype conjx0_chi1; \ dim_t i; \ dim_t n_ahead; \ inc_t rs_ct, cs_ct; \ conj_t conj0, conj1; \ conj_t conjh_conjx; \ conj_t conjh_conjy; \ \ /* Eliminate unused variable warnings. */ \ ( void )conjh_conjx; \ ( void )conjh_conjy; \ \ /* The algorithm will be expressed in terms of the lower triangular case; the upper triangular case is supported by swapping the row and column strides of A and toggling some conj parameters. */ \ if ( bli_is_lower( uplo ) ) \ { \ rs_ct = rs_c; \ cs_ct = cs_c; \ \ PASTEMAC(ch,copys)( *alpha, alpha0 ); \ PASTEMAC(ch,copycjs)( conjh, *alpha, alpha1 ); \ } \ else /* if ( bli_is_upper( uplo ) ) */ \ { \ rs_ct = cs_c; \ cs_ct = rs_c; \ \ /* Toggle conjugation of conjx/conjy, but only if we are being invoked as her2; for syr2, conjx/conjy are unchanged. */ \ conjx = bli_apply_conj( conjh, conjx ); \ conjy = bli_apply_conj( conjh, conjy ); \ \ PASTEMAC(ch,copycjs)( conjh, *alpha, alpha0 ); \ PASTEMAC(ch,copys)( *alpha, alpha1 ); \ } \ \ /* Apply conjh (which carries the conjugation component of the Hermitian transpose, if applicable) to conjx and/or conjy as needed to arrive at the effective conjugation for the vector subproblems. */ \ conj0 = conjx; \ conj1 = conjy; \ conjh_conjx = bli_apply_conj( conjh, conjx ); \ conjh_conjy = bli_apply_conj( conjh, conjy ); \ \ PASTECH(ch,axpyv_ker_ft) kfp_av; \ \ /* Query the context for the kernel function pointer. */ \ kfp_av = bli_cntx_get_l1v_ker_dt( dt, BLIS_AXPYV_KER, cntx ); \ \ for ( i = 0; i < m; ++i ) \ { \ n_ahead = m - i - 1; \ chi1 = x + (i )*incx; \ x2 = x + (i+1)*incx; \ psi1 = y + (i )*incy; \ y2 = y + (i+1)*incy; \ gamma11 = c + (i )*rs_ct + (i )*cs_ct; \ c21 = c + (i+1)*rs_ct + (i )*cs_ct; \ \ /* Apply conjx and/or conjy to chi1 and/or psi1. */ \ PASTEMAC(ch,copycjs)( conjh_conjy, *psi1, conjy0_psi1 ); \ PASTEMAC(ch,copycjs)( conjh_conjx, *chi1, conjx1_chi1 ); \ PASTEMAC(ch,copycjs)( conj0, *chi1, conjx0_chi1 ); \ \ /* Compute scalars for vector subproblems. */ \ PASTEMAC(ch,scal2s)( alpha0, conjy0_psi1, alpha0_psi1 ); \ PASTEMAC(ch,scal2s)( alpha1, conjx1_chi1, alpha1_chi1 ); \ \ /* Compute alpha * chi1 * conj(psi1) after both chi1 and psi1 have already been conjugated, if needed, by conjx and conjy. */ \ PASTEMAC(ch,scal2s)( alpha0_psi1, conjx0_chi1, alpha0_chi1_psi1 ); \ \ /* c21 = c21 + alpha * x2 * conj(psi1); */ \ kfp_av \ ( \ conj0, \ n_ahead, \ &alpha0_psi1, \ x2, incx, \ c21, rs_ct, \ cntx \ ); \ \ /* c21 = c21 + conj(alpha) * y2 * conj(chi1); */ \ kfp_av \ ( \ conj1, \ n_ahead, \ &alpha1_chi1, \ y2, incy, \ c21, rs_ct, \ cntx \ ); \ \ /* gamma11 = gamma11 + alpha * chi1 * conj(psi1) \ + conj(alpha) * psi1 * conj(chi1); */ \ PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \ PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \ \ /* For her2, explicitly set the imaginary component of gamma11 to zero. */ \ if ( bli_is_conj( conjh ) ) \ PASTEMAC(ch,seti0s)( *gamma11 ); \ } \ } INSERT_GENTFUNC_BASIC0( her2_unb_var4 ) blis-0.6.1/frame/2/her2/bli_her2_unf_var1.c000066400000000000000000000123501360743507500202100ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ uplo_t uplo, \ conj_t conjx, \ conj_t conjy, \ conj_t conjh, \ dim_t m, \ ctype* alpha, \ ctype* x, inc_t incx, \ ctype* y, inc_t incy, \ ctype* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ ctype* x0; \ ctype* chi1; \ ctype* y0; \ ctype* psi1; \ ctype* c10t; \ ctype* gamma11; \ ctype alpha0; \ ctype alpha1; \ ctype alpha0_chi1; \ ctype alpha1_psi1; \ ctype alpha0_chi1_psi1; \ ctype conjx0_chi1; \ ctype conjy1_psi1; \ ctype conjy0_psi1; \ dim_t i; \ dim_t n_behind; \ inc_t rs_ct, cs_ct; \ conj_t conj0, conj1; \ \ /* The algorithm will be expressed in terms of the lower triangular case; the upper triangular case is supported by swapping the row and column strides of A and toggling some conj parameters. */ \ if ( bli_is_lower( uplo ) ) \ { \ rs_ct = rs_c; \ cs_ct = cs_c; \ \ PASTEMAC(ch,copys)( *alpha, alpha0 ); \ PASTEMAC(ch,copycjs)( conjh, *alpha, alpha1 ); \ } \ else /* if ( bli_is_upper( uplo ) ) */ \ { \ rs_ct = cs_c; \ cs_ct = rs_c; \ \ /* Toggle conjugation of conjx/conjy, but only if we are being invoked as her2; for syr2, conjx/conjy are unchanged. */ \ conjx = bli_apply_conj( conjh, conjx ); \ conjy = bli_apply_conj( conjh, conjy ); \ \ PASTEMAC(ch,copycjs)( conjh, *alpha, alpha0 ); \ PASTEMAC(ch,copys)( *alpha, alpha1 ); \ } \ \ /* Apply conjh (which carries the conjugation component of the Hermitian transpose, if applicable) to conjx and/or conjy as needed to arrive at the effective conjugation for the vector subproblems. */ \ conj0 = bli_apply_conj( conjh, conjy ); \ conj1 = bli_apply_conj( conjh, conjx ); \ \ PASTECH(ch,axpy2v_ker_ft) kfp_2v; \ \ /* Query the context for the kernel function pointer. */ \ kfp_2v = bli_cntx_get_l1f_ker_dt( dt, BLIS_AXPY2V_KER, cntx ); \ \ for ( i = 0; i < m; ++i ) \ { \ n_behind = i; \ x0 = x + (0 )*incx; \ chi1 = x + (i )*incx; \ y0 = y + (0 )*incy; \ psi1 = y + (i )*incy; \ c10t = c + (i )*rs_ct + (0 )*cs_ct; \ gamma11 = c + (i )*rs_ct + (i )*cs_ct; \ \ /* Apply conjx and/or conjy to chi1 and/or psi1. */ \ PASTEMAC(ch,copycjs)( conjx, *chi1, conjx0_chi1 ); \ PASTEMAC(ch,copycjs)( conjy, *psi1, conjy1_psi1 ); \ PASTEMAC(ch,copycjs)( conj0, *psi1, conjy0_psi1 ); \ \ /* Compute scalars for vector subproblems. */ \ PASTEMAC(ch,scal2s)( alpha0, conjx0_chi1, alpha0_chi1 ); \ PASTEMAC(ch,scal2s)( alpha1, conjy1_psi1, alpha1_psi1 ); \ \ /* Compute alpha * chi1 * conj(psi1) after both chi1 and psi1 have already been conjugated, if needed, by conjx and conjy. */ \ PASTEMAC(ch,scal2s)( alpha0_chi1, conjy0_psi1, alpha0_chi1_psi1 ); \ \ /* c10t = c10t + alpha * chi1 * y0'; */ \ /* c10t = c10t + conj(alpha) * psi1 * x0'; */ \ kfp_2v \ ( \ conj0, \ conj1, \ n_behind, \ &alpha0_chi1, \ &alpha1_psi1, \ y0, incy, \ x0, incx, \ c10t, cs_ct, \ cntx \ ); \ \ /* gamma11 = gamma11 + alpha * chi1 * conj(psi1) \ + conj(alpha) * psi1 * conj(chi1); */ \ PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \ PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \ \ /* For her2, explicitly set the imaginary component of gamma11 to zero. */ \ if ( bli_is_conj( conjh ) ) \ PASTEMAC(ch,seti0s)( *gamma11 ); \ } \ } INSERT_GENTFUNC_BASIC0( her2_unf_var1 ) blis-0.6.1/frame/2/her2/bli_her2_unf_var4.c000066400000000000000000000126701360743507500202200ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ uplo_t uplo, \ conj_t conjx, \ conj_t conjy, \ conj_t conjh, \ dim_t m, \ ctype* alpha, \ ctype* x, inc_t incx, \ ctype* y, inc_t incy, \ ctype* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ ctype* chi1; \ ctype* x2; \ ctype* psi1; \ ctype* y2; \ ctype* gamma11; \ ctype* c21; \ ctype alpha0; \ ctype alpha1; \ ctype alpha0_psi1; \ ctype alpha1_chi1; \ ctype alpha0_chi1_psi1; \ ctype conjy0_psi1; \ ctype conjx1_chi1; \ ctype conjx0_chi1; \ dim_t i; \ dim_t n_ahead; \ inc_t rs_ct, cs_ct; \ conj_t conj0, conj1; \ conj_t conjh_conjx; \ conj_t conjh_conjy; \ \ /* Eliminate unused variable warnings. */ \ ( void )conjh_conjx; \ ( void )conjh_conjy; \ \ /* The algorithm will be expressed in terms of the lower triangular case; the upper triangular case is supported by swapping the row and column strides of A and toggling some conj parameters. */ \ if ( bli_is_lower( uplo ) ) \ { \ rs_ct = rs_c; \ cs_ct = cs_c; \ \ PASTEMAC(ch,copys)( *alpha, alpha0 ); \ PASTEMAC(ch,copycjs)( conjh, *alpha, alpha1 ); \ } \ else /* if ( bli_is_upper( uplo ) ) */ \ { \ rs_ct = cs_c; \ cs_ct = rs_c; \ \ /* Toggle conjugation of conjx/conjy, but only if we are being invoked as her2; for syr2, conjx/conjy are unchanged. */ \ conjx = bli_apply_conj( conjh, conjx ); \ conjy = bli_apply_conj( conjh, conjy ); \ \ PASTEMAC(ch,copycjs)( conjh, *alpha, alpha0 ); \ PASTEMAC(ch,copys)( *alpha, alpha1 ); \ } \ \ /* Apply conjh (which carries the conjugation component of the Hermitian transpose, if applicable) to conjx and/or conjy as needed to arrive at the effective conjugation for the vector subproblems. */ \ conj0 = conjx; \ conj1 = conjy; \ conjh_conjx = bli_apply_conj( conjh, conjx ); \ conjh_conjy = bli_apply_conj( conjh, conjy ); \ \ PASTECH(ch,axpy2v_ker_ft) kfp_2v; \ \ /* Query the context for the kernel function pointer. */ \ kfp_2v = bli_cntx_get_l1f_ker_dt( dt, BLIS_AXPY2V_KER, cntx ); \ \ for ( i = 0; i < m; ++i ) \ { \ n_ahead = m - i - 1; \ chi1 = x + (i )*incx; \ x2 = x + (i+1)*incx; \ psi1 = y + (i )*incy; \ y2 = y + (i+1)*incy; \ gamma11 = c + (i )*rs_ct + (i )*cs_ct; \ c21 = c + (i+1)*rs_ct + (i )*cs_ct; \ \ /* Apply conjx and/or conjy to chi1 and/or psi1. */ \ PASTEMAC(ch,copycjs)( conjh_conjy, *psi1, conjy0_psi1 ); \ PASTEMAC(ch,copycjs)( conjh_conjx, *chi1, conjx1_chi1 ); \ PASTEMAC(ch,copycjs)( conj0, *chi1, conjx0_chi1 ); \ \ /* Compute scalars for vector subproblems. */ \ PASTEMAC(ch,scal2s)( alpha0, conjy0_psi1, alpha0_psi1 ); \ PASTEMAC(ch,scal2s)( alpha1, conjx1_chi1, alpha1_chi1 ); \ \ /* Compute alpha * chi1 * conj(psi1) after both chi1 and psi1 have already been conjugated, if needed, by conjx and conjy. */ \ PASTEMAC(ch,scal2s)( alpha0_psi1, conjx0_chi1, alpha0_chi1_psi1 ); \ \ /* c21 = c21 + alpha * x2 * conj(psi1); */ \ /* c21 = c21 + conj(alpha) * y2 * conj(chi1); */ \ kfp_2v \ ( \ conj0, \ conj1, \ n_ahead, \ &alpha0_psi1, \ &alpha1_chi1, \ x2, incx, \ y2, incy, \ c21, rs_ct, \ cntx \ ); \ \ /* gamma11 = gamma11 + alpha * chi1 * conj(psi1) \ + conj(alpha) * psi1 * conj(chi1); */ \ PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \ PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \ \ /* For her2, explicitly set the imaginary component of gamma11 to zero. */ \ if ( bli_is_conj( conjh ) ) \ PASTEMAC(ch,seti0s)( *gamma11 ); \ } \ } INSERT_GENTFUNC_BASIC0( her2_unf_var4 ) blis-0.6.1/frame/2/her2/bli_her2_var.h000066400000000000000000000055551360743507500172750ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype object-based interfaces. // #undef GENPROT #define GENPROT( opname ) \ \ void PASTEMAC0(opname) \ ( \ conj_t conjh, \ obj_t* alpha, \ obj_t* alpha_conj, \ obj_t* x, \ obj_t* y, \ obj_t* c, \ cntx_t* cntx, \ cntl_t* cntl \ ); GENPROT( her2_blk_var1 ) GENPROT( her2_blk_var2 ) GENPROT( her2_blk_var3 ) GENPROT( her2_blk_var4 ) GENPROT( her2_unb_var1 ) GENPROT( her2_unb_var2 ) GENPROT( her2_unb_var3 ) GENPROT( her2_unb_var4 ) GENPROT( her2_unf_var1 ) GENPROT( her2_unf_var4 ) // // Prototype BLAS-like interfaces with typed operands. // #undef GENTPROT #define GENTPROT( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ uplo_t uplo, \ conj_t conjx, \ conj_t conjy, \ conj_t conjh, \ dim_t m, \ ctype* alpha, \ ctype* x, inc_t incx, \ ctype* y, inc_t incy, \ ctype* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx \ ); INSERT_GENTPROT_BASIC0( her2_unb_var1 ) INSERT_GENTPROT_BASIC0( her2_unb_var2 ) INSERT_GENTPROT_BASIC0( her2_unb_var3 ) INSERT_GENTPROT_BASIC0( her2_unb_var4 ) INSERT_GENTPROT_BASIC0( her2_unf_var1 ) INSERT_GENTPROT_BASIC0( her2_unf_var4 ) blis-0.6.1/frame/2/her2/bli_her2_var_oapi.c000066400000000000000000000062321360743507500202710ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENFRONT #define GENFRONT( opname, varname ) \ \ void PASTEMAC0(varname) \ ( \ conj_t conjh, \ obj_t* alpha, \ obj_t* alpha_conj, \ obj_t* x, \ obj_t* y, \ obj_t* c, \ cntx_t* cntx, \ cntl_t* cntl \ ) \ { \ bli_init_once(); \ \ num_t dt = bli_obj_dt( c ); \ \ uplo_t uplo = bli_obj_uplo( c ); \ conj_t conjx = bli_obj_conj_status( x ); \ conj_t conjy = bli_obj_conj_status( y ); \ \ dim_t m = bli_obj_length( c ); \ \ void* buf_x = bli_obj_buffer_at_off( x ); \ inc_t incx = bli_obj_vector_inc( x ); \ \ void* buf_y = bli_obj_buffer_at_off( y ); \ inc_t incy = bli_obj_vector_inc( y ); \ \ void* buf_c = bli_obj_buffer_at_off( c ); \ inc_t rs_c = bli_obj_row_stride( c ); \ inc_t cs_c = bli_obj_col_stride( c ); \ \ void* buf_alpha = bli_obj_buffer_for_1x1( dt, alpha ); \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(opname,_unb,_vft) f = \ PASTEMAC(varname,_qfp)( dt ); \ \ f \ ( \ uplo, \ conjx, \ conjy, \ conjh, \ m, \ buf_alpha, \ buf_x, incx, \ buf_y, incy, \ buf_c, rs_c, cs_c, \ cntx \ ); \ } \ GENFRONT( her2, her2_unb_var1 ) GENFRONT( her2, her2_unb_var2 ) GENFRONT( her2, her2_unb_var3 ) GENFRONT( her2, her2_unb_var4 ) GENFRONT( her2, her2_unf_var1 ) GENFRONT( her2, her2_unf_var4 ) blis-0.6.1/frame/2/her2/other/000077500000000000000000000000001360743507500156755ustar00rootroot00000000000000blis-0.6.1/frame/2/her2/other/bli_her2_blk_var1.c000066400000000000000000000131031360743507500213060ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_her2_blk_var1( conj_t conjh, obj_t* alpha, obj_t* alpha_conj, obj_t* x, obj_t* y, obj_t* c, cntx_t* cntx, her2_t* cntl ) { obj_t c11, c11_pack; obj_t c10; obj_t x1, x1_pack; obj_t x0; obj_t y1, y1_pack; obj_t y0; dim_t mn; dim_t ij; dim_t b_alg; // Even though this blocked algorithm is expressed only in terms of the // lower triangular case, the upper triangular case is still supported: // when bli_acquire_mpart_tl2br() is passed a matrix that is stored in // in the upper triangle, and the requested subpartition resides in the // lower triangle (as is the case for this algorithm), the routine fills // the request as if the caller had actually requested the corresponding // "mirror" subpartition in the upper triangle, except that it marks the // subpartition for transposition (and conjugation). // Initialize objects for packing. bli_obj_init_pack( &c11_pack ); bli_obj_init_pack( &x1_pack ); bli_obj_init_pack( &y1_pack ); // Query dimension. mn = bli_obj_length( c ); // Partition diagonally. for ( ij = 0; ij < mn; ij += b_alg ) { // Determine the current algorithmic blocksize. b_alg = bli_determine_blocksize_f( ij, mn, c, bli_cntl_bszid( cntl ), cntx ); // Acquire partitions for C11, C10, x1, x0, y1, and y0. bli_acquire_mpart_tl2br( BLIS_SUBPART11, ij, b_alg, c, &c11 ); bli_acquire_mpart_tl2br( BLIS_SUBPART10, ij, b_alg, c, &c10 ); bli_acquire_vpart_f2b( BLIS_SUBPART1, ij, b_alg, x, &x1 ); bli_acquire_vpart_f2b( BLIS_SUBPART0, ij, b_alg, x, &x0 ); bli_acquire_vpart_f2b( BLIS_SUBPART1, ij, b_alg, y, &y1 ); bli_acquire_vpart_f2b( BLIS_SUBPART0, ij, b_alg, y, &y0 ); // Initialize objects for packing C11, x1, and y1 (if needed). bli_packm_init( &c11, &c11_pack, cntx, bli_cntl_sub_packm_c11( cntl ) ); bli_packv_init( &x1, &x1_pack, cntx, bli_cntl_sub_packv_x1( cntl ) ); bli_packv_init( &y1, &y1_pack, cntx, bli_cntl_sub_packv_y1( cntl ) ); // Copy/pack C11, x1, y1 (if needed). bli_packm_int( &c11, &c11_pack, cntx, bli_cntl_sub_packm_c11( cntl ), &BLIS_PACKM_SINGLE_THREADED ); bli_packv_int( &x1, &x1_pack, cntx, bli_cntl_sub_packv_x1( cntl ) ); bli_packv_int( &y1, &y1_pack, cntx, bli_cntl_sub_packv_y1( cntl ) ); // C10 = C10 + alpha * x1 * y0'; bli_ger_int( BLIS_NO_CONJUGATE, conjh, alpha, &x1_pack, &y0, &c10, cntx, bli_cntl_sub_ger_rp( cntl ) ); // C10 = C10 + conj(alpha) * y1 * x0'; bli_ger_int( BLIS_NO_CONJUGATE, conjh, alpha_conj, &y1_pack, &x0, &c10, cntx, bli_cntl_sub_ger_rp( cntl ) ); // C11 = C11 + alpha * x1 * y1' + conj(alpha) * y1 * x1'; bli_her2_int( conjh, alpha, alpha_conj, &x1_pack, &y1_pack, &c11_pack, cntx, bli_cntl_sub_her2( cntl ) ); // Copy/unpack C11 (if C11 was packed). bli_unpackm_int( &c11_pack, &c11, cntx, bli_cntl_sub_unpackm_c11( cntl ), &BLIS_PACKM_SINGLE_THREADED ); } // If any packing buffers were acquired within packm, release them back // to the memory manager. bli_packm_release( &c11_pack, bli_cntl_sub_packm_c11( cntl ) ); bli_packv_release( &x1_pack, bli_cntl_sub_packv_x1( cntl ) ); bli_packv_release( &y1_pack, bli_cntl_sub_packv_y1( cntl ) ); } blis-0.6.1/frame/2/her2/other/bli_her2_blk_var2.c000066400000000000000000000132621360743507500213150ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_her2_blk_var2( conj_t conjh, obj_t* alpha, obj_t* alpha_conj, obj_t* x, obj_t* y, obj_t* c, cntx_t* cntx, her2_t* cntl ) { obj_t c11, c11_pack; obj_t c10; obj_t c21; obj_t x1, x1_pack; obj_t x0; obj_t x2; obj_t y1, y1_pack; dim_t mn; dim_t ij; dim_t b_alg; // Even though this blocked algorithm is expressed only in terms of the // lower triangular case, the upper triangular case is still supported: // when bli_acquire_mpart_tl2br() is passed a matrix that is stored in // in the upper triangle, and the requested subpartition resides in the // lower triangle (as is the case for this algorithm), the routine fills // the request as if the caller had actually requested the corresponding // "mirror" subpartition in the upper triangle, except that it marks the // subpartition for transposition (and conjugation). // Initialize objects for packing. bli_obj_init_pack( &c11_pack ); bli_obj_init_pack( &x1_pack ); bli_obj_init_pack( &y1_pack ); // Query dimension. mn = bli_obj_length( c ); // Partition diagonally. for ( ij = 0; ij < mn; ij += b_alg ) { // Determine the current algorithmic blocksize. b_alg = bli_determine_blocksize_f( ij, mn, c, bli_cntl_bszid( cntl ), cntx ); // Acquire partitions for C11, C10, C21, x1, x0, x2, and y1. bli_acquire_mpart_tl2br( BLIS_SUBPART11, ij, b_alg, c, &c11 ); bli_acquire_mpart_tl2br( BLIS_SUBPART10, ij, b_alg, c, &c10 ); bli_acquire_mpart_tl2br( BLIS_SUBPART21, ij, b_alg, c, &c21 ); bli_acquire_vpart_f2b( BLIS_SUBPART1, ij, b_alg, x, &x1 ); bli_acquire_vpart_f2b( BLIS_SUBPART0, ij, b_alg, x, &x0 ); bli_acquire_vpart_f2b( BLIS_SUBPART2, ij, b_alg, x, &x2 ); bli_acquire_vpart_f2b( BLIS_SUBPART1, ij, b_alg, y, &y1 ); // Initialize objects for packing C11, x1, and y1 (if needed). bli_packm_init( &c11, &c11_pack, cntx, bli_cntl_sub_packm_c11( cntl ) ); bli_packv_init( &x1, &x1_pack, cntx, bli_cntl_sub_packv_x1( cntl ) ); bli_packv_init( &y1, &y1_pack, cntx, bli_cntl_sub_packv_y1( cntl ) ); // Copy/pack C11, x1, y1 (if needed). bli_packm_int( &c11, &c11_pack, cntx, bli_cntl_sub_packm_c11( cntl ), &BLIS_PACKM_SINGLE_THREADED ); bli_packv_int( &x1, &x1_pack, cntx, bli_cntl_sub_packv_x1( cntl ) ); bli_packv_int( &y1, &y1_pack, cntx, bli_cntl_sub_packv_y1( cntl ) ); // C10 = C10 + conj(alpha) * y1 * x0'; bli_ger_int( BLIS_NO_CONJUGATE, conjh, alpha_conj, &y1_pack, &x0, &c10, cntx, bli_cntl_sub_ger_rp( cntl ) ); // C21 = C21 + alpha * x2 * y1'; bli_ger_int( BLIS_NO_CONJUGATE, conjh, alpha, &x2, &y1_pack, &c21, cntx, bli_cntl_sub_ger_cp( cntl ) ); // C11 = C11 + alpha * x1 * y1' + conj(alpha) * y1 * x1'; bli_her2_int( conjh, alpha, alpha_conj, &x1_pack, &y1_pack, &c11_pack, cntx, bli_cntl_sub_her2( cntl ) ); // Copy/unpack C11 (if C11 was packed). bli_unpackm_int( &c11_pack, &c11, cntx, bli_cntl_sub_unpackm_c11( cntl ), &BLIS_PACKM_SINGLE_THREADED ); } // If any packing buffers were acquired within packm, release them back // to the memory manager. bli_packm_release( &c11_pack, bli_cntl_sub_packm_c11( cntl ) ); bli_packv_release( &x1_pack, bli_cntl_sub_packv_x1( cntl ) ); bli_packv_release( &y1_pack, bli_cntl_sub_packv_y1( cntl ) ); } blis-0.6.1/frame/2/her2/other/bli_her2_blk_var3.c000066400000000000000000000132621360743507500213160ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_her2_blk_var3( conj_t conjh, obj_t* alpha, obj_t* alpha_conj, obj_t* x, obj_t* y, obj_t* c, cntx_t* cntx, her2_t* cntl ) { obj_t c11, c11_pack; obj_t c10; obj_t c21; obj_t x1, x1_pack; obj_t y1, y1_pack; obj_t y0; obj_t y2; dim_t mn; dim_t ij; dim_t b_alg; // Even though this blocked algorithm is expressed only in terms of the // lower triangular case, the upper triangular case is still supported: // when bli_acquire_mpart_tl2br() is passed a matrix that is stored in // in the upper triangle, and the requested subpartition resides in the // lower triangle (as is the case for this algorithm), the routine fills // the request as if the caller had actually requested the corresponding // "mirror" subpartition in the upper triangle, except that it marks the // subpartition for transposition (and conjugation). // Initialize objects for packing. bli_obj_init_pack( &c11_pack ); bli_obj_init_pack( &x1_pack ); bli_obj_init_pack( &y1_pack ); // Query dimension. mn = bli_obj_length( c ); // Partition diagonally. for ( ij = 0; ij < mn; ij += b_alg ) { // Determine the current algorithmic blocksize. b_alg = bli_determine_blocksize_f( ij, mn, c, bli_cntl_bszid( cntl ), cntx ); // Acquire partitions for C11, C10, C21, x1, y1, y0, and y2. bli_acquire_mpart_tl2br( BLIS_SUBPART11, ij, b_alg, c, &c11 ); bli_acquire_mpart_tl2br( BLIS_SUBPART10, ij, b_alg, c, &c10 ); bli_acquire_mpart_tl2br( BLIS_SUBPART21, ij, b_alg, c, &c21 ); bli_acquire_vpart_f2b( BLIS_SUBPART1, ij, b_alg, x, &x1 ); bli_acquire_vpart_f2b( BLIS_SUBPART1, ij, b_alg, y, &y1 ); bli_acquire_vpart_f2b( BLIS_SUBPART0, ij, b_alg, y, &y0 ); bli_acquire_vpart_f2b( BLIS_SUBPART2, ij, b_alg, y, &y2 ); // Initialize objects for packing C11, x1, and y1 (if needed). bli_packm_init( &c11, &c11_pack, cntx, bli_cntl_sub_packm_c11( cntl ) ); bli_packv_init( &x1, &x1_pack, cntx, bli_cntl_sub_packv_x1( cntl ) ); bli_packv_init( &y1, &y1_pack, cntx, bli_cntl_sub_packv_y1( cntl ) ); // Copy/pack C11, x1, y1 (if needed). bli_packm_int( &c11, &c11_pack, cntx, bli_cntl_sub_packm_c11( cntl ), &BLIS_PACKM_SINGLE_THREADED ); bli_packv_int( &x1, &x1_pack, cntx, bli_cntl_sub_packv_x1( cntl ) ); bli_packv_int( &y1, &y1_pack, cntx, bli_cntl_sub_packv_y1( cntl ) ); // C10 = C10 + alpha * x1 * y0'; bli_ger_int( BLIS_NO_CONJUGATE, conjh, alpha, &x1_pack, &y0, &c10, cntx, bli_cntl_sub_ger_rp( cntl ) ); // C21 = C21 + conj(alpha) * y2 * x1'; bli_ger_int( BLIS_NO_CONJUGATE, conjh, alpha_conj, &y2, &x1_pack, &c21, cntx, bli_cntl_sub_ger_cp( cntl ) ); // C11 = C11 + alpha * x1 * y1' + conj(alpha) * y1 * x1'; bli_her2_int( conjh, alpha, alpha_conj, &x1_pack, &y1_pack, &c11_pack, cntx, bli_cntl_sub_her2( cntl ) ); // Copy/unpack C11 (if C11 was packed). bli_unpackm_int( &c11_pack, &c11, cntx, bli_cntl_sub_unpackm_c11( cntl ), &BLIS_PACKM_SINGLE_THREADED ); } // If any packing buffers were acquired within packm, release them back // to the memory manager. bli_packm_release( &c11_pack, bli_cntl_sub_packm_c11( cntl ) ); bli_packv_release( &x1_pack, bli_cntl_sub_packv_x1( cntl ) ); bli_packv_release( &y1_pack, bli_cntl_sub_packv_y1( cntl ) ); } blis-0.6.1/frame/2/her2/other/bli_her2_blk_var4.c000066400000000000000000000131031360743507500213110ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_her2_blk_var4( conj_t conjh, obj_t* alpha, obj_t* alpha_conj, obj_t* x, obj_t* y, obj_t* c, cntx_t* cntx, her2_t* cntl ) { obj_t c11, c11_pack; obj_t c21; obj_t x1, x1_pack; obj_t x2; obj_t y1, y1_pack; obj_t y2; dim_t mn; dim_t ij; dim_t b_alg; // Even though this blocked algorithm is expressed only in terms of the // lower triangular case, the upper triangular case is still supported: // when bli_acquire_mpart_tl2br() is passed a matrix that is stored in // in the upper triangle, and the requested subpartition resides in the // lower triangle (as is the case for this algorithm), the routine fills // the request as if the caller had actually requested the corresponding // "mirror" subpartition in the upper triangle, except that it marks the // subpartition for transposition (and conjugation). // Initialize objects for packing. bli_obj_init_pack( &c11_pack ); bli_obj_init_pack( &x1_pack ); bli_obj_init_pack( &y1_pack ); // Query dimension. mn = bli_obj_length( c ); // Partition diagonally. for ( ij = 0; ij < mn; ij += b_alg ) { // Determine the current algorithmic blocksize. b_alg = bli_determine_blocksize_f( ij, mn, c, bli_cntl_bszid( cntl ), cntx ); // Acquire partitions for C11, C21, x1, x2, y1, and y2. bli_acquire_mpart_tl2br( BLIS_SUBPART11, ij, b_alg, c, &c11 ); bli_acquire_mpart_tl2br( BLIS_SUBPART21, ij, b_alg, c, &c21 ); bli_acquire_vpart_f2b( BLIS_SUBPART1, ij, b_alg, x, &x1 ); bli_acquire_vpart_f2b( BLIS_SUBPART2, ij, b_alg, x, &x2 ); bli_acquire_vpart_f2b( BLIS_SUBPART1, ij, b_alg, y, &y1 ); bli_acquire_vpart_f2b( BLIS_SUBPART2, ij, b_alg, y, &y2 ); // Initialize objects for packing C11, x1, and y1 (if needed). bli_packm_init( &c11, &c11_pack, cntx, bli_cntl_sub_packm_c11( cntl ) ); bli_packv_init( &x1, &x1_pack, cntx, bli_cntl_sub_packv_x1( cntl ) ); bli_packv_init( &y1, &y1_pack, cntx, bli_cntl_sub_packv_y1( cntl ) ); // Copy/pack C11, x1, y1 (if needed). bli_packm_int( &c11, &c11_pack, cntx, bli_cntl_sub_packm_c11( cntl ), &BLIS_PACKM_SINGLE_THREADED ); bli_packv_int( &x1, &x1_pack, cntx, bli_cntl_sub_packv_x1( cntl ) ); bli_packv_int( &y1, &y1_pack, cntx, bli_cntl_sub_packv_y1( cntl ) ); // C21 = C21 + alpha * x2 * y1'; bli_ger_int( BLIS_NO_CONJUGATE, conjh, alpha, &x2, &y1_pack, &c21, cntx, bli_cntl_sub_ger_cp( cntl ) ); // C21 = C21 + conj(alpha) * y2 * x1'; bli_ger_int( BLIS_NO_CONJUGATE, conjh, alpha_conj, &y2, &x1_pack, &c21, cntx, bli_cntl_sub_ger_cp( cntl ) ); // C11 = C11 + alpha * x1 * y1' + conj(alpha) * y1 * x1'; bli_her2_int( conjh, alpha, alpha_conj, &x1_pack, &y1_pack, &c11_pack, cntx, bli_cntl_sub_her2( cntl ) ); // Copy/unpack C11 (if C11 was packed). bli_unpackm_int( &c11_pack, &c11, cntx, bli_cntl_sub_unpackm_c11( cntl ), &BLIS_PACKM_SINGLE_THREADED ); } // If any packing buffers were acquired within packm, release them back // to the memory manager. bli_packm_release( &c11_pack, bli_cntl_sub_packm_c11( cntl ) ); bli_packv_release( &x1_pack, bli_cntl_sub_packv_x1( cntl ) ); bli_packv_release( &y1_pack, bli_cntl_sub_packv_y1( cntl ) ); } blis-0.6.1/frame/2/her2/other/bli_her2_cntl.c000066400000000000000000000142261360743507500205540ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" extern packm_t* packm_cntl; extern packv_t* packv_cntl; extern unpackm_t* unpackm_cntl; extern ger_t* ger_cntl_rp_bs_row; extern ger_t* ger_cntl_cp_bs_col; her2_t* her2_cntl_bs_ke_lrow_ucol = NULL; her2_t* her2_cntl_bs_ke_lcol_urow = NULL; her2_t* her2_cntl_ge_lrow_ucol = NULL; her2_t* her2_cntl_ge_lcol_urow = NULL; void bli_her2_cntl_init() { // Create control trees for the lowest-level kernels. These trees induce // operations on (persumably) relatively small block-subvector problems. her2_cntl_bs_ke_lrow_ucol = bli_her2_cntl_obj_create( BLIS_UNB_FUSED, BLIS_VARIANT1, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL ); her2_cntl_bs_ke_lcol_urow = bli_her2_cntl_obj_create( BLIS_UNB_FUSED, BLIS_VARIANT4, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL ); // Create control trees for generally large problems. Here, we choose // variants that partition for ger subproblems in the same direction // as the assumed storage. her2_cntl_ge_lrow_ucol = bli_her2_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, BLIS_M2, packv_cntl, // pack x1 (if needed) packv_cntl, // pack y1 (if needed) packm_cntl, // pack C11 (if needed) ger_cntl_rp_bs_row, ger_cntl_rp_bs_row, her2_cntl_bs_ke_lrow_ucol, unpackm_cntl ); // unpack C11 (if packed) her2_cntl_ge_lcol_urow = bli_her2_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT4, BLIS_M2, packv_cntl, // pack x1 (if needed) packv_cntl, // pack y1 (if needed) packm_cntl, // pack C11 (if needed) ger_cntl_cp_bs_col, ger_cntl_cp_bs_col, her2_cntl_bs_ke_lcol_urow, unpackm_cntl ); // unpack C11 (if packed) } void bli_her2_cntl_finalize() { bli_cntl_free_node( her2_cntl_bs_ke_lrow_ucol ); bli_cntl_free_node( her2_cntl_bs_ke_lcol_urow ); bli_cntl_free_node( her2_cntl_ge_lrow_ucol ); bli_cntl_free_node( her2_cntl_ge_lcol_urow ); } her2_t* bli_her2_cntl_obj_create( impl_t impl_type, varnum_t var_num, bszid_t bszid, packv_t* sub_packv_x1, packv_t* sub_packv_y1, packm_t* sub_packm_c11, ger_t* sub_ger_rp, ger_t* sub_ger_cp, her2_t* sub_her2, unpackm_t* sub_unpackm_c11 ) { her2_t* cntl; cntl = ( her2_t* ) bli_malloc_intl( sizeof(her2_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; cntl->bszid = bszid; cntl->sub_packv_x1 = sub_packv_x1; cntl->sub_packv_y1 = sub_packv_y1; cntl->sub_packm_c11 = sub_packm_c11; cntl->sub_ger_rp = sub_ger_rp; cntl->sub_ger_cp = sub_ger_cp; cntl->sub_her2 = sub_her2; cntl->sub_unpackm_c11 = sub_unpackm_c11; return cntl; } void bli_her2_cntl_obj_init( her2_t* cntl, impl_t impl_type, varnum_t var_num, bszid_t bszid, packv_t* sub_packv_x1, packv_t* sub_packv_y1, packm_t* sub_packm_c11, ger_t* sub_ger_rp, ger_t* sub_ger_cp, her2_t* sub_her2, unpackm_t* sub_unpackm_c11 ) { cntl->impl_type = impl_type; cntl->var_num = var_num; cntl->bszid = bszid; cntl->sub_packv_x1 = sub_packv_x1; cntl->sub_packv_y1 = sub_packv_y1; cntl->sub_packm_c11 = sub_packm_c11; cntl->sub_ger_rp = sub_ger_rp; cntl->sub_ger_cp = sub_ger_cp; cntl->sub_her2 = sub_her2; cntl->sub_unpackm_c11 = sub_unpackm_c11; } blis-0.6.1/frame/2/her2/other/bli_her2_cntl.h000066400000000000000000000063701360743507500205620ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ struct her2_s { impl_t impl_type; varnum_t var_num; bszid_t bszid; struct packv_s* sub_packv_x1; struct packv_s* sub_packv_y1; struct packm_s* sub_packm_c11; struct ger_s* sub_ger_rp; struct ger_s* sub_ger_cp; struct her2_s* sub_her2; struct unpackm_s* sub_unpackm_c11; }; typedef struct her2_s her2_t; #define bli_cntl_sub_her2( cntl ) cntl->sub_her2 void bli_her2_cntl_init( void ); void bli_her2_cntl_finalize( void ); her2_t* bli_her2_cntl_obj_create( impl_t impl_type, varnum_t var_num, bszid_t bszid, packv_t* sub_packv_x1, packv_t* sub_packv_y1, packm_t* sub_packm_c11, ger_t* sub_ger_rp, ger_t* sub_ger_cp, her2_t* sub_her2, unpackm_t* sub_unpackm_c11 ); void bli_her2_cntl_obj_init( her2_t* cntl, impl_t impl_type, varnum_t var_num, bszid_t bszid, packv_t* sub_packv_x1, packv_t* sub_packv_y1, packm_t* sub_packm_c11, ger_t* sub_ger_rp, ger_t* sub_ger_cp, her2_t* sub_her2, unpackm_t* sub_unpackm_c11 ); blis-0.6.1/frame/2/her2/other/bli_her2_front.c000066400000000000000000000152131360743507500207410ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" extern her2_t* her2_cntl_bs_ke_lrow_ucol; extern her2_t* her2_cntl_bs_ke_lcol_urow; extern her2_t* her2_cntl_ge_lrow_ucol; extern her2_t* her2_cntl_ge_lcol_urow; void bli_her2_front ( obj_t* alpha, obj_t* x, obj_t* y, obj_t* c, cntx_t* cntx ) { her2_t* her2_cntl; num_t dt_targ_x; num_t dt_targ_y; //num_t dt_targ_c; bool_t x_has_unit_inc; bool_t y_has_unit_inc; bool_t c_has_unit_inc; obj_t alpha_local; obj_t alpha_conj_local; num_t dt_alpha; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_her2_check( alpha, x, y, c ); // Query the target datatypes of each object. dt_targ_x = bli_obj_target_dt( x ); dt_targ_y = bli_obj_target_dt( y ); //dt_targ_c = bli_obj_target_dt( c ); // Determine whether each operand with unit stride. x_has_unit_inc = ( bli_obj_vector_inc( x ) == 1 ); y_has_unit_inc = ( bli_obj_vector_inc( y ) == 1 ); c_has_unit_inc = ( bli_obj_is_row_stored( c ) || bli_obj_is_col_stored( c ) ); // Create an object to hold a copy-cast of alpha. Notice that we use // the type union of the datatypes of x and y. dt_alpha = bli_dt_union( dt_targ_x, dt_targ_y ); bli_obj_scalar_init_detached_copy_of( dt_alpha, BLIS_NO_CONJUGATE, alpha, &alpha_local ); // Also create a conjugated copy of alpha. bli_obj_scalar_init_detached_copy_of( dt_alpha, BLIS_CONJUGATE, alpha, &alpha_conj_local ); // If all operands have unit stride, we choose a control tree for calling // the unblocked implementation directly without any blocking. if ( x_has_unit_inc && y_has_unit_inc && c_has_unit_inc ) { // We use two control trees to handle the four cases corresponding to // combinations of upper/lower triangular storage and row/column-storage. // The row-stored lower triangular and column-stored upper triangular // trees are identical. Same for the remaining two trees. if ( bli_obj_is_lower( c ) ) { if ( bli_obj_is_row_stored( c ) ) her2_cntl = her2_cntl_bs_ke_lrow_ucol; else her2_cntl = her2_cntl_bs_ke_lcol_urow; } else // if ( bli_obj_is_upper( c ) ) { if ( bli_obj_is_row_stored( c ) ) her2_cntl = her2_cntl_bs_ke_lcol_urow; else her2_cntl = her2_cntl_bs_ke_lrow_ucol; } } else { // Mark objects with unit stride as already being packed. This prevents // unnecessary packing from happening within the blocked algorithm. if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, x ); if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, y ); if ( c_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, c ); // Here, we make a similar choice as above, except that (1) we look // at storage tilt, and (2) we choose a tree that performs blocking. if ( bli_obj_is_lower( c ) ) { if ( bli_obj_is_row_stored( c ) ) her2_cntl = her2_cntl_ge_lrow_ucol; else her2_cntl = her2_cntl_ge_lcol_urow; } else // if ( bli_obj_is_upper( c ) ) { if ( bli_obj_is_row_stored( c ) ) her2_cntl = her2_cntl_ge_lcol_urow; else her2_cntl = her2_cntl_ge_lrow_ucol; } } // Invoke the internal back-end with the copy-cast scalar and the // chosen control tree. Set conjh to BLIS_CONJUGATE to invoke the // Hermitian (and not symmetric) algorithms. bli_her2_int( BLIS_CONJUGATE, &alpha_local, &alpha_conj_local, x, y, c, cntx, her2_cntl ); } // // Define BLAS-like interfaces with homogeneous-typed operands. // #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ uplo_t uploc, \ conj_t conjx, \ conj_t conjy, \ dim_t m, \ ctype* alpha, \ ctype* x, inc_t incx, \ ctype* y, inc_t incy, \ ctype* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ obj_t alphao, xo, yo, co; \ \ inc_t rs_x, cs_x; \ inc_t rs_y, cs_y; \ \ rs_x = incx; cs_x = m * incx; \ rs_y = incy; cs_y = m * incy; \ \ bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ \ bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \ bli_obj_create_with_attached_buffer( dt, m, 1, y, rs_y, cs_y, &yo ); \ bli_obj_create_with_attached_buffer( dt, m, m, c, rs_c, cs_c, &co ); \ \ bli_obj_set_conj( conjx, &xo ); \ bli_obj_set_conj( conjy, &yo ); \ bli_obj_set_uplo( uploc, &co ); \ \ bli_obj_set_struc( BLIS_HERMITIAN, &co ); \ \ PASTEMAC0(opname)( &alphao, \ &xo, \ &yo, \ &co, \ cntx ); \ } INSERT_GENTFUNC_BASIC0( her2_front ) blis-0.6.1/frame/2/her2/other/bli_her2_front.h000066400000000000000000000042321360743507500207450ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_her2_front ( obj_t* alpha, obj_t* x, obj_t* y, obj_t* c, cntx_t* cntx ); #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ uplo_t uploc, \ conj_t conjx, \ conj_t conjy, \ dim_t m, \ ctype* alpha, \ ctype* x, inc_t incx, \ ctype* y, inc_t incy, \ ctype* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx \ ); INSERT_GENTPROT_BASIC( her2_front ) blis-0.6.1/frame/2/her2/other/bli_her2_int.c000066400000000000000000000107341360743507500204060ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T her2_fp typedef void (*FUNCPTR_T)( conj_t conjh, obj_t* alpha, obj_t* alpha_conj, obj_t* x, obj_t* y, obj_t* c, cntx_t* cntx, her2_t* cntl ); static FUNCPTR_T vars[4][3] = { // unblocked unblocked with fusing blocked { bli_her2_unb_var1, bli_her2_unf_var1, bli_her2_blk_var1 }, { bli_her2_unb_var2, NULL, bli_her2_blk_var2 }, { bli_her2_unb_var3, NULL, bli_her2_blk_var3 }, { bli_her2_unb_var4, bli_her2_unf_var4, bli_her2_blk_var4 }, }; void bli_her2_int( conj_t conjh, obj_t* alpha, obj_t* alpha_conj, obj_t* x, obj_t* y, obj_t* c, cntx_t* cntx, her2_t* cntl ) { varnum_t n; impl_t i; FUNCPTR_T f; obj_t alpha_local; obj_t alpha_conj_local; obj_t x_local; obj_t y_local; obj_t c_local; // Check parameters. if ( bli_error_checking_is_enabled() ) { if ( bli_is_conj( conjh ) ) bli_her2_check( alpha, x, y, c ); else bli_syr2_check( alpha, x, y, c ); } // If C, x, or y has a zero dimension, return early. if ( bli_obj_has_zero_dim( c ) ) return; if ( bli_obj_has_zero_dim( x ) ) return; if ( bli_obj_has_zero_dim( y ) ) return; // Alias the operands in case we need to apply conjugations. bli_obj_alias_to( x, &x_local ); bli_obj_alias_to( y, &y_local ); bli_obj_alias_to( c, &c_local ); // If matrix C is marked for conjugation, we interpret this as a request // to apply a conjugation to the other operands. if ( bli_obj_has_conj( &c_local ) ) { bli_obj_toggle_conj( &c_local ); bli_obj_toggle_conj( &x_local ); bli_obj_toggle_conj( &y_local ); bli_obj_scalar_init_detached_copy_of( bli_obj_dt( alpha ), BLIS_CONJUGATE, alpha, &alpha_local ); bli_obj_scalar_init_detached_copy_of( bli_obj_dt( alpha_conj ), BLIS_CONJUGATE, alpha_conj, &alpha_conj_local ); } else { bli_obj_alias_to( *alpha, alpha_local ); bli_obj_alias_to( *alpha_conj, alpha_conj_local ); } // Extract the variant number and implementation type. n = bli_cntl_var_num( cntl ); i = bli_cntl_impl_type( cntl ); // Index into the variant array to extract the correct function pointer. f = vars[n][i]; // Invoke the variant. f( conjh, &alpha_local, &alpha_conj_local, &x_local, &y_local, &c_local, cntx, cntl ); } blis-0.6.1/frame/2/her2/other/bli_her2_int.h000066400000000000000000000036231360743507500204120ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_her2_int( conj_t conjh, obj_t* alpha, obj_t* alpha_conj, obj_t* x, obj_t* y, obj_t* c, cntx_t* cntx, her2_t* cntl ); blis-0.6.1/frame/2/symv/000077500000000000000000000000001360743507500147125ustar00rootroot00000000000000blis-0.6.1/frame/2/symv/bli_symv.h000066400000000000000000000033441360743507500167130ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // NOTE: level-2 control tree code is temporarily disabled. //#include "bli_symv_front.h" blis-0.6.1/frame/2/symv/other/000077500000000000000000000000001360743507500160335ustar00rootroot00000000000000blis-0.6.1/frame/2/symv/other/bli_symv_front.c000066400000000000000000000162641360743507500212440ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" extern hemv_t* hemv_cntl_bs_ke_lrow_ucol; extern hemv_t* hemv_cntl_bs_ke_lcol_urow; extern hemv_t* hemv_cntl_ge_lrow_ucol; extern hemv_t* hemv_cntl_ge_lcol_urow; void bli_symv_front ( obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y, cntx_t* cntx ) { hemv_t* hemv_cntl; num_t dt_targ_a; num_t dt_targ_x; num_t dt_targ_y; bool_t a_has_unit_inc; bool_t x_has_unit_inc; bool_t y_has_unit_inc; obj_t alpha_local; obj_t beta_local; num_t dt_alpha; num_t dt_beta; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_symv_check( alpha, a, x, beta, y ); // Query the target datatypes of each object. dt_targ_a = bli_obj_target_dt( a ); dt_targ_x = bli_obj_target_dt( x ); dt_targ_y = bli_obj_target_dt( y ); // Determine whether each operand with unit stride. a_has_unit_inc = ( bli_obj_is_row_stored( a ) || bli_obj_is_col_stored( a ) ); x_has_unit_inc = ( bli_obj_vector_inc( x ) == 1 ); y_has_unit_inc = ( bli_obj_vector_inc( y ) == 1 ); // Create an object to hold a copy-cast of alpha. Notice that we use // the type union of the target datatypes of a and x to prevent any // unnecessary loss of information during the computation. dt_alpha = bli_dt_union( dt_targ_a, dt_targ_x ); bli_obj_scalar_init_detached_copy_of( dt_alpha, BLIS_NO_CONJUGATE, alpha, &alpha_local ); // Create an object to hold a copy-cast of beta. Notice that we use // the datatype of y. Here's why: If y is real and beta is complex, // there is no reason to keep beta_local in the complex domain since // the complex part of beta*y will not be stored. If y is complex and // beta is real then beta is harmlessly promoted to complex. dt_beta = dt_targ_y; bli_obj_scalar_init_detached_copy_of( dt_beta, BLIS_NO_CONJUGATE, beta, &beta_local ); // If all operands have unit stride, we choose a control tree for calling // the unblocked implementation directly without any blocking. if ( a_has_unit_inc && x_has_unit_inc && y_has_unit_inc ) { // We use two control trees to handle the four cases corresponding to // combinations of upper/lower triangular storage and row/column-storage. // The row-stored lower triangular and column-stored upper triangular // trees are identical. Same for the remaining two trees. if ( bli_obj_is_lower( a ) ) { if ( bli_obj_is_row_stored( a ) ) hemv_cntl = hemv_cntl_bs_ke_lrow_ucol; else hemv_cntl = hemv_cntl_bs_ke_lcol_urow; } else // if ( bli_obj_is_upper( a ) ) { if ( bli_obj_is_row_stored( a ) ) hemv_cntl = hemv_cntl_bs_ke_lcol_urow; else hemv_cntl = hemv_cntl_bs_ke_lrow_ucol; } } else { // Mark objects with unit stride as already being packed. This prevents // unnecessary packing from happening within the blocked algorithm. if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, a ); if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, x ); if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, y ); // Here, we make a similar choice as above, except that (1) we look // at storage tilt, and (2) we choose a tree that performs blocking. if ( bli_obj_is_lower( a ) ) { if ( bli_obj_is_row_tilted( a ) ) hemv_cntl = hemv_cntl_ge_lrow_ucol; else hemv_cntl = hemv_cntl_ge_lcol_urow; } else // if ( bli_obj_is_upper( a ) ) { if ( bli_obj_is_row_tilted( a ) ) hemv_cntl = hemv_cntl_ge_lcol_urow; else hemv_cntl = hemv_cntl_ge_lrow_ucol; } } // Invoke the internal back-end with the copy-casts of scalars and the // chosen control tree. Set conjh to BLIS_NO_CONJUGATE to invoke the // symmetric (and not Hermitian) algorithms. bli_hemv_int( BLIS_NO_CONJUGATE, &alpha_local, a, x, &beta_local, y, cntx, hemv_cntl ); } // // Define BLAS-like interfaces with homogeneous-typed operands. // #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ uplo_t uploa, \ conj_t conja, \ conj_t conjx, \ dim_t m, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* x, inc_t incx, \ ctype* beta, \ ctype* y, inc_t incy, \ cntx_t* cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ obj_t alphao, ao, xo, betao, yo; \ \ inc_t rs_x, cs_x; \ inc_t rs_y, cs_y; \ \ rs_x = incx; cs_x = m * incx; \ rs_y = incy; cs_y = m * incy; \ \ bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ bli_obj_create_1x1_with_attached_buffer( dt, beta, &betao ); \ \ bli_obj_create_with_attached_buffer( dt, m, m, a, rs_a, cs_a, &ao ); \ bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \ bli_obj_create_with_attached_buffer( dt, m, 1, y, rs_y, cs_y, &yo ); \ \ bli_obj_set_uplo( uploa, &ao ); \ bli_obj_set_conj( conja, &ao ); \ bli_obj_set_conj( conjx, &xo ); \ \ bli_obj_set_struc( BLIS_SYMMETRIC, &ao ); \ \ PASTEMAC0(opname)( &alphao, \ &ao, \ &xo, \ &betao, \ &yo, \ cntx ); \ } INSERT_GENTFUNC_BASIC0( symv_front ) blis-0.6.1/frame/2/symv/other/bli_symv_front.h000066400000000000000000000042761360743507500212510ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_symv_front ( obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y, cntx_t* cntx ); #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ uplo_t uploa, \ conj_t conja, \ conj_t conjx, \ dim_t m, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* x, inc_t incx, \ ctype* beta, \ ctype* y, inc_t incy, \ cntx_t* cntx \ ); INSERT_GENTPROT_BASIC( symv_front ) blis-0.6.1/frame/2/syr/000077500000000000000000000000001360743507500145315ustar00rootroot00000000000000blis-0.6.1/frame/2/syr/bli_syr.h000066400000000000000000000033431360743507500163500ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // NOTE: level-2 control tree code is temporarily disabled. //#include "bli_syr_front.h" blis-0.6.1/frame/2/syr/other/000077500000000000000000000000001360743507500156525ustar00rootroot00000000000000blis-0.6.1/frame/2/syr/other/bli_syr_front.c000066400000000000000000000135601360743507500206760ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" extern her_t* her_cntl_bs_ke_lrow_ucol; extern her_t* her_cntl_bs_ke_lcol_urow; extern her_t* her_cntl_ge_lrow_ucol; extern her_t* her_cntl_ge_lcol_urow; void bli_syr_front ( obj_t* alpha, obj_t* x, obj_t* c, cntx_t* cntx ) { her_t* her_cntl; num_t dt_targ_x; num_t dt_targ_c; bool_t x_has_unit_inc; bool_t c_has_unit_inc; obj_t alpha_local; num_t dt_alpha; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_syr_check( alpha, x, c ); // Query the target datatypes of each object. dt_targ_x = bli_obj_target_dt( x ); dt_targ_c = bli_obj_target_dt( c ); // Determine whether each operand with unit stride. x_has_unit_inc = ( bli_obj_vector_inc( x ) == 1 ); c_has_unit_inc = ( bli_obj_is_row_stored( c ) || bli_obj_is_col_stored( c ) ); // Create an object to hold a copy-cast of alpha. Notice that we use // the type union of the target datatypes of x and c to prevent any // unnecessary loss of information during the computation. dt_alpha = bli_dt_union( dt_targ_x, dt_targ_c ); bli_obj_scalar_init_detached_copy_of( dt_alpha, BLIS_NO_CONJUGATE, alpha, &alpha_local ); // If all operands have unit stride, we choose a control tree for calling // the unblocked implementation directly without any blocking. if ( x_has_unit_inc && c_has_unit_inc ) { // We use two control trees to handle the four cases corresponding to // combinations of upper/lower triangular storage and row/column-storage. // The row-stored lower triangular and column-stored upper triangular // trees are identical. Same for the remaining two trees. if ( bli_obj_is_lower( c ) ) { if ( bli_obj_is_row_stored( c ) ) her_cntl = her_cntl_bs_ke_lrow_ucol; else her_cntl = her_cntl_bs_ke_lcol_urow; } else // if ( bli_obj_is_upper( c ) ) { if ( bli_obj_is_row_stored( c ) ) her_cntl = her_cntl_bs_ke_lcol_urow; else her_cntl = her_cntl_bs_ke_lrow_ucol; } } else { // Mark objects with unit stride as already being packed. This prevents // unnecessary packing from happening within the blocked algorithm. if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, x ); if ( c_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, c ); // Here, we make a similar choice as above, except that (1) we look // at storage tilt, and (2) we choose a tree that performs blocking. if ( bli_obj_is_lower( c ) ) { if ( bli_obj_is_row_stored( c ) ) her_cntl = her_cntl_ge_lrow_ucol; else her_cntl = her_cntl_ge_lcol_urow; } else // if ( bli_obj_is_upper( c ) ) { if ( bli_obj_is_row_stored( c ) ) her_cntl = her_cntl_ge_lcol_urow; else her_cntl = her_cntl_ge_lrow_ucol; } } // Invoke the internal back-end with the copy-cast scalar and the // chosen control tree. Set conjh to BLIS_NO_CONJUGATE to invoke the // symmetric (and not Hermitian) algorithms. bli_her_int( BLIS_NO_CONJUGATE, &alpha_local, x, c, cntx, her_cntl ); } // // Define BLAS-like interfaces with homogeneous-typed operands. // #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ uplo_t uploc, \ conj_t conjx, \ dim_t m, \ ctype* alpha, \ ctype* x, inc_t incx, \ ctype* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ obj_t alphao, xo, co; \ \ inc_t rs_x, cs_x; \ \ rs_x = incx; cs_x = m * incx; \ \ bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ \ bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \ bli_obj_create_with_attached_buffer( dt, m, m, c, rs_c, cs_c, &co ); \ \ bli_obj_set_conj( conjx, &xo ); \ bli_obj_set_uplo( uploc, &co ); \ \ bli_obj_set_struc( BLIS_SYMMETRIC, &co ); \ \ PASTEMAC0(opname)( &alphao, \ &xo, \ &co, \ cntx ); \ } INSERT_GENTFUNC_BASIC0( syr_front ) blis-0.6.1/frame/2/syr/other/bli_syr_front.h000066400000000000000000000041141360743507500206760ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_syr_front ( obj_t* alpha, obj_t* x, obj_t* c, cntx_t* cntx ); #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ uplo_t uploc, \ conj_t conjx, \ dim_t m, \ ctype* alpha, \ ctype* x, inc_t incx, \ ctype* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx \ ); INSERT_GENTPROT_BASIC( syr_front ) blis-0.6.1/frame/2/syr2/000077500000000000000000000000001360743507500146135ustar00rootroot00000000000000blis-0.6.1/frame/2/syr2/bli_syr2.h000066400000000000000000000033441360743507500165150ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // NOTE: level-2 control tree code is temporarily disabled. //#include "bli_syr2_front.h" blis-0.6.1/frame/2/syr2/other/000077500000000000000000000000001360743507500157345ustar00rootroot00000000000000blis-0.6.1/frame/2/syr2/other/bli_syr2_front.c000066400000000000000000000145621360743507500210450ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" extern her2_t* her2_cntl_bs_ke_lrow_ucol; extern her2_t* her2_cntl_bs_ke_lcol_urow; extern her2_t* her2_cntl_ge_lrow_ucol; extern her2_t* her2_cntl_ge_lcol_urow; void bli_syr2_front ( obj_t* alpha, obj_t* x, obj_t* y, obj_t* c, cntx_t* cntx ) { her2_t* her2_cntl; num_t dt_targ_x; num_t dt_targ_y; //num_t dt_targ_c; bool_t x_has_unit_inc; bool_t y_has_unit_inc; bool_t c_has_unit_inc; obj_t alpha_local; num_t dt_alpha; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_syr2_check( alpha, x, y, c ); // Query the target datatypes of each object. dt_targ_x = bli_obj_target_dt( x ); dt_targ_y = bli_obj_target_dt( y ); //dt_targ_c = bli_obj_target_dt( c ); // Determine whether each operand with unit stride. x_has_unit_inc = ( bli_obj_vector_inc( x ) == 1 ); y_has_unit_inc = ( bli_obj_vector_inc( y ) == 1 ); c_has_unit_inc = ( bli_obj_is_row_stored( c ) || bli_obj_is_col_stored( c ) ); // Create an object to hold a copy-cast of alpha. Notice that we use // the type union of the datatypes of x and y. dt_alpha = bli_dt_union( dt_targ_x, dt_targ_y ); bli_obj_scalar_init_detached_copy_of( dt_alpha, BLIS_NO_CONJUGATE, alpha, &alpha_local ); // If all operands have unit stride, we choose a control tree for calling // the unblocked implementation directly without any blocking. if ( x_has_unit_inc && y_has_unit_inc && c_has_unit_inc ) { // We use two control trees to handle the four cases corresponding to // combinations of upper/lower triangular storage and row/column-storage. // The row-stored lower triangular and column-stored upper triangular // trees are identical. Same for the remaining two trees. if ( bli_obj_is_lower( c ) ) { if ( bli_obj_is_row_stored( c ) ) her2_cntl = her2_cntl_bs_ke_lrow_ucol; else her2_cntl = her2_cntl_bs_ke_lcol_urow; } else // if ( bli_obj_is_upper( c ) ) { if ( bli_obj_is_row_stored( c ) ) her2_cntl = her2_cntl_bs_ke_lcol_urow; else her2_cntl = her2_cntl_bs_ke_lrow_ucol; } } else { // Mark objects with unit stride as already being packed. This prevents // unnecessary packing from happening within the blocked algorithm. if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, x ); if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, y ); if ( c_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, c ); // Here, we make a similar choice as above, except that (1) we look // at storage tilt, and (2) we choose a tree that performs blocking. if ( bli_obj_is_lower( c ) ) { if ( bli_obj_is_row_stored( c ) ) her2_cntl = her2_cntl_ge_lrow_ucol; else her2_cntl = her2_cntl_ge_lcol_urow; } else // if ( bli_obj_is_upper( c ) ) { if ( bli_obj_is_row_stored( c ) ) her2_cntl = her2_cntl_ge_lcol_urow; else her2_cntl = her2_cntl_ge_lrow_ucol; } } // Invoke the internal back-end with the copy-cast scalar and the // chosen control tree. Set conjh to BLIS_NO_CONJUGATE to invoke the // symmetric (and not Hermitian) algorithms. bli_her2_int( BLIS_NO_CONJUGATE, &alpha_local, &alpha_local, x, y, c, cntx, her2_cntl ); } // // Define BLAS-like interfaces with homogeneous-typed operands. // #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ uplo_t uploc, \ conj_t conjx, \ conj_t conjy, \ dim_t m, \ ctype* alpha, \ ctype* x, inc_t incx, \ ctype* y, inc_t incy, \ ctype* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ obj_t alphao, xo, yo, co; \ \ inc_t rs_x, cs_x; \ inc_t rs_y, cs_y; \ \ rs_x = incx; cs_x = m * incx; \ rs_y = incy; cs_y = m * incy; \ \ bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ \ bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \ bli_obj_create_with_attached_buffer( dt, m, 1, y, rs_y, cs_y, &yo ); \ bli_obj_create_with_attached_buffer( dt, m, m, c, rs_c, cs_c, &co ); \ \ bli_obj_set_conj( conjx, &xo ); \ bli_obj_set_conj( conjy, &yo ); \ bli_obj_set_uplo( uploc, &co ); \ \ bli_obj_set_struc( BLIS_SYMMETRIC, &co ); \ \ PASTEMAC0(opname)( &alphao, \ &xo, \ &yo, \ &co, \ cntx ); \ } INSERT_GENTFUNC_BASIC0( syr2_front ) blis-0.6.1/frame/2/syr2/other/bli_syr2_front.h000066400000000000000000000042321360743507500210430ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_syr2_front ( obj_t* alpha, obj_t* x, obj_t* y, obj_t* c, cntx_t* cntx ); #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ uplo_t uploc, \ conj_t conjx, \ conj_t conjy, \ dim_t m, \ ctype* alpha, \ ctype* x, inc_t incx, \ ctype* y, inc_t incy, \ ctype* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx \ ); INSERT_GENTPROT_BASIC( syr2_front ) blis-0.6.1/frame/2/trmv/000077500000000000000000000000001360743507500147045ustar00rootroot00000000000000blis-0.6.1/frame/2/trmv/bli_trmv.h000066400000000000000000000034701360743507500166770ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // NOTE: level-2 control tree code is temporarily disabled. //#include "bli_trmv_cntl.h" //#include "bli_trmv_front.h" //#include "bli_trmv_int.h" #include "bli_trmv_var.h" blis-0.6.1/frame/2/trmv/bli_trmv_unb_var1.c000066400000000000000000000106521360743507500204670ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ uplo_t uploa, \ trans_t transa, \ diag_t diaga, \ dim_t m, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* x, inc_t incx, \ cntx_t* cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ ctype* a10t; \ ctype* alpha11; \ ctype* a12t; \ ctype* x0; \ ctype* chi1; \ ctype* x2; \ ctype alpha_alpha11_conj; \ ctype rho; \ dim_t iter, i; \ dim_t n_ahead; \ inc_t rs_at, cs_at; \ uplo_t uploa_trans; \ conj_t conja; \ \ if ( bli_does_notrans( transa ) ) \ { \ rs_at = rs_a; \ cs_at = cs_a; \ uploa_trans = uploa; \ } \ else /* if ( bli_does_trans( transa ) ) */ \ { \ rs_at = cs_a; \ cs_at = rs_a; \ uploa_trans = bli_uplo_toggled( uploa ); \ } \ \ conja = bli_extract_conj( transa ); \ \ PASTECH(ch,dotv_ker_ft) kfp_dv; \ \ /* Query the context for the kernel function pointer. */ \ kfp_dv = bli_cntx_get_l1v_ker_dt( dt, BLIS_DOTV_KER, cntx ); \ \ /* We reduce all of the possible cases down to just lower/upper. */ \ if ( bli_is_upper( uploa_trans ) ) \ { \ for ( iter = 0; iter < m; ++iter ) \ { \ i = iter; \ n_ahead = m - iter - 1; \ alpha11 = a + (i )*rs_at + (i )*cs_at; \ a12t = a + (i )*rs_at + (i+1)*cs_at; \ chi1 = x + (i )*incx; \ x2 = x + (i+1)*incx; \ \ /* chi1 = alpha * alpha11 * chi1; */ \ PASTEMAC(ch,copys)( *alpha, alpha_alpha11_conj ); \ if ( bli_is_nonunit_diag( diaga ) ) \ PASTEMAC(ch,scalcjs)( conja, *alpha11, alpha_alpha11_conj ); \ PASTEMAC(ch,scals)( alpha_alpha11_conj, *chi1 ); \ \ /* chi1 = chi1 + alpha * a12t * x2; */ \ kfp_dv \ ( \ conja, \ BLIS_NO_CONJUGATE, \ n_ahead, \ a12t, cs_at, \ x2, incx, \ &rho, \ cntx \ ); \ PASTEMAC(ch,axpys)( *alpha, rho, *chi1 ); \ } \ } \ else /* if ( bli_is_lower( uploa_trans ) ) */ \ { \ for ( iter = 0; iter < m; ++iter ) \ { \ i = m - iter - 1; \ n_ahead = i; \ alpha11 = a + (i )*rs_at + (i )*cs_at; \ a10t = a + (i )*rs_at + (0 )*cs_at; \ chi1 = x + (i )*incx; \ x0 = x + (0 )*incx; \ \ /* chi1 = alpha * alpha11 * chi1; */ \ PASTEMAC(ch,copys)( *alpha, alpha_alpha11_conj ); \ if ( bli_is_nonunit_diag( diaga ) ) \ PASTEMAC(ch,scalcjs)( conja, *alpha11, alpha_alpha11_conj ); \ PASTEMAC(ch,scals)( alpha_alpha11_conj, *chi1 ); \ \ /* chi1 = chi1 + alpha * a10t * x0; */ \ kfp_dv \ ( \ conja, \ BLIS_NO_CONJUGATE, \ n_ahead, \ a10t, cs_at, \ x0, incx, \ &rho, \ cntx \ ); \ PASTEMAC(ch,axpys)( *alpha, rho, *chi1 ); \ } \ } \ } INSERT_GENTFUNC_BASIC0( trmv_unb_var1 ) blis-0.6.1/frame/2/trmv/bli_trmv_unb_var2.c000066400000000000000000000106141360743507500204660ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ uplo_t uploa, \ trans_t transa, \ diag_t diaga, \ dim_t m, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* x, inc_t incx, \ cntx_t* cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ ctype* a01; \ ctype* alpha11; \ ctype* a21; \ ctype* x0; \ ctype* chi1; \ ctype* x2; \ ctype alpha_alpha11_conj; \ ctype alpha_chi1; \ dim_t iter, i; \ dim_t n_behind; \ inc_t rs_at, cs_at; \ uplo_t uploa_trans; \ conj_t conja; \ \ if ( bli_does_notrans( transa ) ) \ { \ rs_at = rs_a; \ cs_at = cs_a; \ uploa_trans = uploa; \ } \ else /* if ( bli_does_trans( transa ) ) */ \ { \ rs_at = cs_a; \ cs_at = rs_a; \ uploa_trans = bli_uplo_toggled( uploa ); \ } \ \ conja = bli_extract_conj( transa ); \ \ PASTECH(ch,axpyv_ker_ft) kfp_av; \ \ /* Query the context for the kernel function pointer. */ \ kfp_av = bli_cntx_get_l1v_ker_dt( dt, BLIS_AXPYV_KER, cntx ); \ \ /* We reduce all of the possible cases down to just lower/upper. */ \ if ( bli_is_upper( uploa_trans ) ) \ { \ for ( iter = 0; iter < m; ++iter ) \ { \ i = iter; \ n_behind = i; \ alpha11 = a + (i )*rs_at + (i )*cs_at; \ a01 = a + (0 )*rs_at + (i )*cs_at; \ chi1 = x + (i )*incx; \ x0 = x + (0 )*incx; \ \ /* x0 = x0 + alpha * chi1 * a01; */ \ PASTEMAC(ch,scal2s)( *alpha, *chi1, alpha_chi1 ); \ kfp_av \ ( \ conja, \ n_behind, \ &alpha_chi1, \ a01, rs_at, \ x0, incx, \ cntx \ ); \ \ /* chi1 = alpha * alpha11 * chi1; */ \ PASTEMAC(ch,copys)( *alpha, alpha_alpha11_conj ); \ if ( bli_is_nonunit_diag( diaga ) ) \ PASTEMAC(ch,scalcjs)( conja, *alpha11, alpha_alpha11_conj ); \ PASTEMAC(ch,scals)( alpha_alpha11_conj, *chi1 ); \ } \ } \ else /* if ( bli_is_lower( uploa_trans ) ) */ \ { \ for ( iter = 0; iter < m; ++iter ) \ { \ i = m - iter - 1; \ n_behind = iter; \ alpha11 = a + (i )*rs_at + (i )*cs_at; \ a21 = a + (i+1)*rs_at + (i )*cs_at; \ chi1 = x + (i )*incx; \ x2 = x + (i+1)*incx; \ \ /* x2 = x2 + alpha * chi1 * a21; */ \ PASTEMAC(ch,scal2s)( *alpha, *chi1, alpha_chi1 ); \ kfp_av \ ( \ conja, \ n_behind, \ &alpha_chi1, \ a21, rs_at, \ x2, incx, \ cntx \ ); \ \ /* chi1 = alpha * alpha11 * chi1; */ \ PASTEMAC(ch,copys)( *alpha, alpha_alpha11_conj ); \ if ( bli_is_nonunit_diag( diaga ) ) \ PASTEMAC(ch,scalcjs)( conja, *alpha11, alpha_alpha11_conj ); \ PASTEMAC(ch,scals)( alpha_alpha11_conj, *chi1 ); \ } \ } \ } INSERT_GENTFUNC_BASIC0( trmv_unb_var2 ) blis-0.6.1/frame/2/trmv/bli_trmv_unf_var1.c000066400000000000000000000143571360743507500205010ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ uplo_t uploa, \ trans_t transa, \ diag_t diaga, \ dim_t m, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* x, inc_t incx, \ cntx_t* cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ ctype* one = PASTEMAC(ch,1); \ ctype* A10; \ ctype* A11; \ ctype* A12; \ ctype* a10t; \ ctype* alpha11; \ ctype* a12t; \ ctype* x0; \ ctype* x1; \ ctype* x2; \ ctype* x01; \ ctype* chi11; \ ctype* x21; \ ctype alpha_alpha11_conj; \ ctype rho1; \ dim_t iter, i, k, j, l; \ dim_t b_fuse, f; \ dim_t n_ahead, f_ahead; \ inc_t rs_at, cs_at; \ uplo_t uploa_trans; \ conj_t conja; \ \ if ( bli_does_notrans( transa ) ) \ { \ rs_at = rs_a; \ cs_at = cs_a; \ uploa_trans = uploa; \ } \ else /* if ( bli_does_trans( transa ) ) */ \ { \ rs_at = cs_a; \ cs_at = rs_a; \ uploa_trans = bli_uplo_toggled( uploa ); \ } \ \ conja = bli_extract_conj( transa ); \ \ PASTECH(ch,dotxf_ker_ft) kfp_df; \ \ /* Query the context for the kernel function pointer and fusing factor. */ \ kfp_df = bli_cntx_get_l1f_ker_dt( dt, BLIS_DOTXF_KER, cntx ); \ b_fuse = bli_cntx_get_blksz_def_dt( dt, BLIS_DF, cntx ); \ \ /* We reduce all of the possible cases down to just lower/upper. */ \ if ( bli_is_upper( uploa_trans ) ) \ { \ for ( iter = 0; iter < m; iter += f ) \ { \ f = bli_determine_blocksize_dim_f( iter, m, b_fuse ); \ i = iter; \ n_ahead = m - iter - f; \ A11 = a + (i )*rs_at + (i )*cs_at; \ A12 = a + (i )*rs_at + (i+f)*cs_at; \ x1 = x + (i )*incx; \ x2 = x + (i+f)*incx; \ \ /* x1 = alpha * A11 * x1; */ \ for ( k = 0; k < f; ++k ) \ { \ l = k; \ f_ahead = f - l - 1; \ alpha11 = A11 + (l )*rs_at + (l )*cs_at; \ a12t = A11 + (l )*rs_at + (l+1)*cs_at; \ chi11 = x1 + (l )*incx; \ x21 = x1 + (l+1)*incx; \ \ /* chi11 = alpha * alpha11 * chi11; */ \ PASTEMAC(ch,copys)( *alpha, alpha_alpha11_conj ); \ if ( bli_is_nonunit_diag( diaga ) ) \ PASTEMAC(ch,scalcjs)( conja, *alpha11, alpha_alpha11_conj ); \ PASTEMAC(ch,scals)( alpha_alpha11_conj, *chi11 ); \ \ /* chi11 = chi11 + alpha * a12t * x21; */ \ PASTEMAC(ch,set0s)( rho1 ); \ if ( bli_is_conj( conja ) ) \ { \ for ( j = 0; j < f_ahead; ++j ) \ PASTEMAC(ch,dotjs)( *(a12t + j*cs_at), *(x21 + j*incx), rho1 ); \ } \ else \ { \ for ( j = 0; j < f_ahead; ++j ) \ PASTEMAC(ch,dots)( *(a12t + j*cs_at), *(x21 + j*incx), rho1 ); \ } \ PASTEMAC(ch,axpys)( *alpha, rho1, *chi11 ); \ } \ \ /* x1 = x1 + alpha * A12 * x2; */ \ kfp_df \ ( \ conja, \ BLIS_NO_CONJUGATE, \ n_ahead, \ f, \ alpha, \ A12, cs_at, rs_at, \ x2, incx, \ one, \ x1, incx, \ cntx \ ); \ } \ } \ else /* if ( bli_is_lower( uploa_trans ) ) */ \ { \ for ( iter = 0; iter < m; iter += f ) \ { \ f = bli_determine_blocksize_dim_b( iter, m, b_fuse ); \ i = m - iter - f; \ n_ahead = i; \ A11 = a + (i )*rs_at + (i )*cs_at; \ A10 = a + (i )*rs_at + (0 )*cs_at; \ x1 = x + (i )*incx; \ x0 = x + (0 )*incx; \ \ /* x1 = alpha * A11 * x1; */ \ for ( k = 0; k < f; ++k ) \ { \ l = f - k - 1; \ f_ahead = l; \ alpha11 = A11 + (l )*rs_at + (l )*cs_at; \ a10t = A11 + (l )*rs_at + (0 )*cs_at; \ chi11 = x1 + (l )*incx; \ x01 = x1 + (0 )*incx; \ \ /* chi11 = alpha * alpha11 * chi11; */ \ PASTEMAC(ch,copys)( *alpha, alpha_alpha11_conj ); \ if ( bli_is_nonunit_diag( diaga ) ) \ PASTEMAC(ch,scalcjs)( conja, *alpha11, alpha_alpha11_conj ); \ PASTEMAC(ch,scals)( alpha_alpha11_conj, *chi11 ); \ \ /* chi11 = chi11 + alpha * a10t * x01; */ \ PASTEMAC(ch,set0s)( rho1 ); \ if ( bli_is_conj( conja ) ) \ { \ for ( j = 0; j < f_ahead; ++j ) \ PASTEMAC(ch,dotjs)( *(a10t + j*cs_at), *(x01 + j*incx), rho1 ); \ } \ else \ { \ for ( j = 0; j < f_ahead; ++j ) \ PASTEMAC(ch,dots)( *(a10t + j*cs_at), *(x01 + j*incx), rho1 ); \ } \ PASTEMAC(ch,axpys)( *alpha, rho1, *chi11 ); \ } \ \ /* x1 = x1 + alpha * A10 * x0; */ \ kfp_df \ ( \ conja, \ BLIS_NO_CONJUGATE, \ n_ahead, \ f, \ alpha, \ A10, cs_at, rs_at, \ x0, incx, \ one, \ x1, incx, \ cntx \ ); \ } \ } \ } INSERT_GENTFUNC_BASIC0( trmv_unf_var1 ) blis-0.6.1/frame/2/trmv/bli_trmv_unf_var2.c000066400000000000000000000142161360743507500204740ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ uplo_t uploa, \ trans_t transa, \ diag_t diaga, \ dim_t m, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* x, inc_t incx, \ cntx_t* cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ ctype* A01; \ ctype* A11; \ ctype* A21; \ ctype* a01; \ ctype* alpha11; \ ctype* a21; \ ctype* x0; \ ctype* x1; \ ctype* x2; \ ctype* x01; \ ctype* chi11; \ ctype* x21; \ ctype alpha_alpha11_conj; \ ctype alpha_chi11; \ dim_t iter, i, k, j, l; \ dim_t b_fuse, f; \ dim_t n_behind, f_behind; \ inc_t rs_at, cs_at; \ uplo_t uploa_trans; \ conj_t conja; \ \ if ( bli_does_notrans( transa ) ) \ { \ rs_at = rs_a; \ cs_at = cs_a; \ uploa_trans = uploa; \ } \ else /* if ( bli_does_trans( transa ) ) */ \ { \ rs_at = cs_a; \ cs_at = rs_a; \ uploa_trans = bli_uplo_toggled( uploa ); \ } \ \ conja = bli_extract_conj( transa ); \ \ PASTECH(ch,axpyf_ker_ft) kfp_af; \ \ /* Query the context for the kernel function pointer and fusing factor. */ \ kfp_af = bli_cntx_get_l1f_ker_dt( dt, BLIS_AXPYF_KER, cntx ); \ b_fuse = bli_cntx_get_blksz_def_dt( dt, BLIS_AF, cntx ); \ \ /* We reduce all of the possible cases down to just lower/upper. */ \ if ( bli_is_upper( uploa_trans ) ) \ { \ for ( iter = 0; iter < m; iter += f ) \ { \ f = bli_determine_blocksize_dim_f( iter, m, b_fuse ); \ i = iter; \ n_behind = i; \ A11 = a + (i )*rs_at + (i )*cs_at; \ A01 = a + (0 )*rs_at + (i )*cs_at; \ x1 = x + (i )*incx; \ x0 = x + (0 )*incx; \ \ /* x0 = x0 + alpha * A01 * x1; */ \ kfp_af \ ( \ conja, \ BLIS_NO_CONJUGATE, \ n_behind, \ f, \ alpha, \ A01, rs_at, cs_at, \ x1, incx, \ x0, incx, \ cntx \ ); \ \ /* x1 = alpha * A11 * x1; */ \ for ( k = 0; k < f; ++k ) \ { \ l = k; \ f_behind = l; \ alpha11 = A11 + (l )*rs_at + (l )*cs_at; \ a01 = A11 + (0 )*rs_at + (l )*cs_at; \ chi11 = x1 + (l )*incx; \ x01 = x1 + (0 )*incx; \ \ /* x01 = x01 + alpha * chi11 * a01; */ \ PASTEMAC(ch,scal2s)( *alpha, *chi11, alpha_chi11 ); \ if ( bli_is_conj( conja ) ) \ { \ for ( j = 0; j < f_behind; ++j ) \ PASTEMAC(ch,axpyjs)( alpha_chi11, *(a01 + j*rs_at), *(x01 + j*incx) ); \ } \ else \ { \ for ( j = 0; j < f_behind; ++j ) \ PASTEMAC(ch,axpys)( alpha_chi11, *(a01 + j*rs_at), *(x01 + j*incx) ); \ } \ \ /* chi11 = alpha * alpha11 * chi11; */ \ PASTEMAC(ch,copys)( *alpha, alpha_alpha11_conj ); \ if ( bli_is_nonunit_diag( diaga ) ) \ PASTEMAC(ch,scalcjs)( conja, *alpha11, alpha_alpha11_conj ); \ PASTEMAC(ch,scals)( alpha_alpha11_conj, *chi11 ); \ } \ } \ } \ else /* if ( bli_is_lower( uploa_trans ) ) */ \ { \ for ( iter = 0; iter < m; iter += f ) \ { \ f = bli_determine_blocksize_dim_b( iter, m, b_fuse ); \ i = m - iter - f; \ n_behind = iter; \ A11 = a + (i )*rs_at + (i )*cs_at; \ A21 = a + (i+f)*rs_at + (i )*cs_at; \ x1 = x + (i )*incx; \ x2 = x + (i+f)*incx; \ \ /* x2 = x2 + alpha * A21 * x1; */ \ kfp_af \ ( \ conja, \ BLIS_NO_CONJUGATE, \ n_behind, \ f, \ alpha, \ A21, rs_at, cs_at, \ x1, incx, \ x2, incx, \ cntx \ ); \ \ /* x1 = alpha * A11 * x1; */ \ for ( k = 0; k < f; ++k ) \ { \ l = f - k - 1; \ f_behind = k; \ alpha11 = A11 + (l )*rs_at + (l )*cs_at; \ a21 = A11 + (l+1)*rs_at + (l )*cs_at; \ chi11 = x1 + (l )*incx; \ x21 = x1 + (l+1)*incx; \ \ /* x21 = x21 + alpha * chi11 * a21; */ \ PASTEMAC(ch,scal2s)( *alpha, *chi11, alpha_chi11 ); \ if ( bli_is_conj( conja ) ) \ { \ for ( j = 0; j < f_behind; ++j ) \ PASTEMAC(ch,axpyjs)( alpha_chi11, *(a21 + j*rs_at), *(x21 + j*incx) ); \ } \ else \ { \ for ( j = 0; j < f_behind; ++j ) \ PASTEMAC(ch,axpys)( alpha_chi11, *(a21 + j*rs_at), *(x21 + j*incx) ); \ } \ \ /* chi11 = alpha * alpha11 * chi11; */ \ PASTEMAC(ch,copys)( *alpha, alpha_alpha11_conj ); \ if ( bli_is_nonunit_diag( diaga ) ) \ PASTEMAC(ch,scalcjs)( conja, *alpha11, alpha_alpha11_conj ); \ PASTEMAC(ch,scals)( alpha_alpha11_conj, *chi11 ); \ } \ } \ } \ } INSERT_GENTFUNC_BASIC0( trmv_unf_var2 ) blis-0.6.1/frame/2/trmv/bli_trmv_var.h000066400000000000000000000051641360743507500175510ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype object-based interfaces. // #undef GENPROT #define GENPROT( opname ) \ \ void PASTEMAC0(opname) \ ( \ obj_t* alpha, \ obj_t* a, \ obj_t* x, \ cntx_t* cntx, \ cntl_t* cntl \ ); GENPROT( trmv_l_blk_var1 ) GENPROT( trmv_l_blk_var2 ) GENPROT( trmv_u_blk_var1 ) GENPROT( trmv_u_blk_var2 ) GENPROT( trmv_unb_var1 ) GENPROT( trmv_unb_var2 ) GENPROT( trmv_unf_var1 ) GENPROT( trmv_unf_var2 ) // // Prototype BLAS-like interfaces with typed operands. // #undef GENTPROT #define GENTPROT( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ uplo_t uploa, \ trans_t transa, \ diag_t diaga, \ dim_t m, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* x, inc_t incx, \ cntx_t* cntx \ ); INSERT_GENTPROT_BASIC0( trmv_unb_var1 ) INSERT_GENTPROT_BASIC0( trmv_unb_var2 ) INSERT_GENTPROT_BASIC0( trmv_unf_var1 ) INSERT_GENTPROT_BASIC0( trmv_unf_var2 ) blis-0.6.1/frame/2/trmv/bli_trmv_var_oapi.c000066400000000000000000000056121360743507500205520ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENFRONT #define GENFRONT( opname, varname ) \ \ void PASTEMAC0(varname) \ ( \ obj_t* alpha, \ obj_t* a, \ obj_t* x, \ cntx_t* cntx, \ cntl_t* cntl \ ) \ { \ bli_init_once(); \ \ num_t dt = bli_obj_dt( a ); \ \ uplo_t uploa = bli_obj_uplo( a ); \ trans_t transa = bli_obj_conjtrans_status( a ); \ diag_t diaga = bli_obj_diag( a ); \ \ dim_t m = bli_obj_length( a ); \ \ void* buf_a = bli_obj_buffer_at_off( a ); \ inc_t rs_a = bli_obj_row_stride( a ); \ inc_t cs_a = bli_obj_col_stride( a ); \ \ void* buf_x = bli_obj_buffer_at_off( x ); \ inc_t incx = bli_obj_vector_inc( x ); \ \ void* buf_alpha = bli_obj_buffer_for_1x1( dt, alpha ); \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(opname,_unb,_vft) f = \ PASTEMAC(varname,_qfp)( dt ); \ \ f \ ( \ uploa, \ transa, \ diaga, \ m, \ buf_alpha, \ buf_a, rs_a, cs_a, \ buf_x, incx, \ cntx \ ); \ } \ GENFRONT( trmv, trmv_unb_var1 ) GENFRONT( trmv, trmv_unb_var2 ) GENFRONT( trmv, trmv_unf_var1 ) GENFRONT( trmv, trmv_unf_var2 ) blis-0.6.1/frame/2/trmv/other/000077500000000000000000000000001360743507500160255ustar00rootroot00000000000000blis-0.6.1/frame/2/trmv/other/bli_trmv_cntl.c000066400000000000000000000140401360743507500210260ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" extern packm_t* packm_cntl; extern packv_t* packv_cntl; extern unpackv_t* unpackv_cntl; extern gemv_t* gemv_cntl_rp_bs_dot; extern gemv_t* gemv_cntl_rp_bs_axpy; extern gemv_t* gemv_cntl_cp_bs_dot; extern gemv_t* gemv_cntl_cp_bs_axpy; trmv_t* trmv_cntl_bs_ke_nrow_tcol = NULL; trmv_t* trmv_cntl_bs_ke_ncol_trow = NULL; trmv_t* trmv_cntl_ge_nrow_tcol = NULL; trmv_t* trmv_cntl_ge_ncol_trow = NULL; void bli_trmv_cntl_init() { // Create control trees for the lowest-level kernels. These trees induce // operations on (presumably) relatively small block-subvector problems. trmv_cntl_bs_ke_nrow_tcol = bli_trmv_cntl_obj_create( BLIS_UNB_FUSED, BLIS_VARIANT1, 0, NULL, NULL, NULL, NULL, NULL, NULL ); trmv_cntl_bs_ke_ncol_trow = bli_trmv_cntl_obj_create( BLIS_UNB_FUSED, BLIS_VARIANT2, 0, NULL, NULL, NULL, NULL, NULL, NULL ); // Create control trees for generally large problems. Here we choose a // variant that prioritizes keeping a subvector of x in cache. trmv_cntl_ge_nrow_tcol = bli_trmv_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, // use var1 to maximize x1 usage BLIS_M2, packm_cntl, // pack A11 (if needed) packv_cntl, // pack x1 (if needed) gemv_cntl_rp_bs_dot, // gemv_rp needed by var1 NULL, // gemv_cp not needed by var1 trmv_cntl_bs_ke_nrow_tcol, unpackv_cntl ); // unpack x1 (if packed) trmv_cntl_ge_ncol_trow = bli_trmv_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, // use var1 to maximize x1 usage BLIS_M2, packm_cntl, // pack A11 (if needed) packv_cntl, // pack x1 (if needed) gemv_cntl_rp_bs_axpy, // gemv_rp needed by var1 NULL, // gemv_cp not needed by var1 trmv_cntl_bs_ke_ncol_trow, unpackv_cntl ); // unpack x1 (if packed) } void bli_trmv_cntl_finalize() { bli_cntl_free_node( trmv_cntl_bs_ke_nrow_tcol ); bli_cntl_free_node( trmv_cntl_bs_ke_ncol_trow ); bli_cntl_free_node( trmv_cntl_ge_nrow_tcol ); bli_cntl_free_node( trmv_cntl_ge_ncol_trow ); } trmv_t* bli_trmv_cntl_obj_create( impl_t impl_type, varnum_t var_num, bszid_t bszid, packm_t* sub_packm_a11, packv_t* sub_packv_x1, gemv_t* sub_gemv_rp, gemv_t* sub_gemv_cp, trmv_t* sub_trmv, unpackv_t* sub_unpackv_x1 ) { trmv_t* cntl; cntl = ( trmv_t* ) bli_malloc_intl( sizeof(trmv_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; cntl->bszid = bszid; cntl->sub_packm_a11 = sub_packm_a11; cntl->sub_packv_x1 = sub_packv_x1; cntl->sub_gemv_rp = sub_gemv_rp; cntl->sub_gemv_cp = sub_gemv_cp; cntl->sub_trmv = sub_trmv; cntl->sub_unpackv_x1 = sub_unpackv_x1; return cntl; } void bli_trmv_cntl_obj_init( trmv_t* cntl, impl_t impl_type, varnum_t var_num, bszid_t bszid, packm_t* sub_packm_a11, packv_t* sub_packv_x1, gemv_t* sub_gemv_rp, gemv_t* sub_gemv_cp, trmv_t* sub_trmv, unpackv_t* sub_unpackv_x1 ) { cntl->impl_type = impl_type; cntl->var_num = var_num; cntl->bszid = bszid; cntl->sub_packm_a11 = sub_packm_a11; cntl->sub_packv_x1 = sub_packv_x1; cntl->sub_gemv_rp = sub_gemv_rp; cntl->sub_gemv_cp = sub_gemv_cp; cntl->sub_trmv = sub_trmv; cntl->sub_unpackv_x1 = sub_unpackv_x1; } blis-0.6.1/frame/2/trmv/other/bli_trmv_cntl.h000066400000000000000000000061501360743507500210360ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ struct trmv_s { impl_t impl_type; varnum_t var_num; bszid_t bszid; struct packm_s* sub_packm_a11; struct packv_s* sub_packv_x1; struct gemv_s* sub_gemv_rp; struct gemv_s* sub_gemv_cp; struct trmv_s* sub_trmv; struct unpackv_s* sub_unpackv_x1; }; typedef struct trmv_s trmv_t; #define bli_cntl_sub_trmv( cntl ) cntl->sub_trmv void bli_trmv_cntl_init( void ); void bli_trmv_cntl_finalize( void ); trmv_t* bli_trmv_cntl_obj_create( impl_t impl_type, varnum_t var_num, bszid_t bszid, packm_t* sub_packm_a11, packv_t* sub_packv_x1, gemv_t* sub_gemv_rp, gemv_t* sub_gemv_cp, trmv_t* sub_trmv, unpackv_t* sub_unpackv_x1 ); void bli_trmv_cntl_obj_init( trmv_t* cntl, impl_t impl_type, varnum_t var_num, bszid_t bszid, packm_t* sub_packm_a11, packv_t* sub_packv_x1, gemv_t* sub_gemv_rp, gemv_t* sub_gemv_cp, trmv_t* sub_trmv, unpackv_t* sub_unpackv_x1 ); blis-0.6.1/frame/2/trmv/other/bli_trmv_front.c000066400000000000000000000135221360743507500212220ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" extern trmv_t* trmv_cntl_bs_ke_nrow_tcol; extern trmv_t* trmv_cntl_bs_ke_ncol_trow; extern trmv_t* trmv_cntl_ge_nrow_tcol; extern trmv_t* trmv_cntl_ge_ncol_trow; void bli_trmv_front ( obj_t* alpha, obj_t* a, obj_t* x, cntx_t* cntx ) { trmv_t* trmv_cntl; num_t dt_targ_a; num_t dt_targ_x; bool_t a_has_unit_inc; bool_t x_has_unit_inc; obj_t alpha_local; num_t dt_alpha; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_trmv_check( alpha, a, x ); // Query the target datatypes of each object. dt_targ_a = bli_obj_target_dt( a ); dt_targ_x = bli_obj_target_dt( x ); // Determine whether each operand with unit stride. a_has_unit_inc = ( bli_obj_is_row_stored( a ) || bli_obj_is_col_stored( a ) ); x_has_unit_inc = ( bli_obj_vector_inc( x ) == 1 ); // Create an object to hold a copy-cast of alpha. Notice that we use // the type union of the target datatypes of a and x to prevent any // unnecessary loss of information during the computation. dt_alpha = bli_dt_union( dt_targ_a, dt_targ_x ); bli_obj_scalar_init_detached_copy_of( dt_alpha, BLIS_NO_CONJUGATE, alpha, &alpha_local ); // If all operands have unit stride, we choose a control tree for calling // the unblocked implementation directly without any blocking. if ( a_has_unit_inc && x_has_unit_inc ) { // We use two control trees to handle the four cases corresponding to // combinations of transposition and row/column-storage. // The row-stored without transpose and column-stored with transpose // trees are identical. Same for the remaining two trees. if ( bli_obj_has_notrans( a ) ) { if ( bli_obj_is_row_stored( a ) ) trmv_cntl = trmv_cntl_bs_ke_nrow_tcol; else trmv_cntl = trmv_cntl_bs_ke_ncol_trow; } else // if ( bli_obj_has_trans( a ) ) { if ( bli_obj_is_row_stored( a ) ) trmv_cntl = trmv_cntl_bs_ke_ncol_trow; else trmv_cntl = trmv_cntl_bs_ke_nrow_tcol; } } else { // Mark objects with unit stride as already being packed. This prevents // unnecessary packing from happening within the blocked algorithm. if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, a ); if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, x ); // Here, we make a similar choice as above, except that (1) we look // at storage tilt, and (2) we choose a tree that performs blocking. if ( bli_obj_has_notrans( a ) ) { if ( bli_obj_is_row_tilted( a ) ) trmv_cntl = trmv_cntl_ge_nrow_tcol; else trmv_cntl = trmv_cntl_ge_ncol_trow; } else // if ( bli_obj_has_trans( a ) ) { if ( bli_obj_is_row_tilted( a ) ) trmv_cntl = trmv_cntl_ge_ncol_trow; else trmv_cntl = trmv_cntl_ge_nrow_tcol; } } // Invoke the internal back-end with the copy-cast of alpha and the // chosen control tree. bli_trmv_int( &alpha_local, a, x, cntx, trmv_cntl ); } // // Define BLAS-like interfaces with homogeneous-typed operands. // #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ uplo_t uploa, \ trans_t transa, \ diag_t diaga, \ dim_t m, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* x, inc_t incx, \ cntx_t* cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ obj_t alphao, ao, xo; \ \ inc_t rs_x, cs_x; \ \ rs_x = incx; cs_x = m * incx; \ \ bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ \ bli_obj_create_with_attached_buffer( dt, m, m, a, rs_a, cs_a, &ao ); \ bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \ \ bli_obj_set_uplo( uploa, &ao ); \ bli_obj_set_conjtrans( transa, &ao ); \ bli_obj_set_diag( diaga, &ao ); \ \ bli_obj_set_struc( BLIS_TRIANGULAR, &ao ); \ \ PASTEMAC0(opname)( &alphao, \ &ao, \ &xo, \ cntx ); \ } INSERT_GENTFUNC_BASIC0( trmv_front ) blis-0.6.1/frame/2/trmv/other/bli_trmv_front.h000066400000000000000000000041501360743507500212240ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_trmv_front ( obj_t* alpha, obj_t* a, obj_t* x, cntx_t* cntx ); #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ uplo_t uploa, \ trans_t transa, \ diag_t diaga, \ dim_t m, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* x, inc_t incx, \ cntx_t* cntx \ ); INSERT_GENTPROT_BASIC( trmv_front ) blis-0.6.1/frame/2/trmv/other/bli_trmv_int.c000066400000000000000000000120171360743507500206620ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T trmv_fp typedef void (*FUNCPTR_T)( obj_t* alpha, obj_t* a, obj_t* x, cntx_t* cntx, trmv_t* cntl ); static FUNCPTR_T vars[2][3][3] = { // lower triangular { // unblocked unblocked with fusing blocked { bli_trmv_unb_var1, bli_trmv_unf_var1, bli_trmv_l_blk_var1 }, { bli_trmv_unb_var2, bli_trmv_unf_var2, bli_trmv_l_blk_var2 }, { NULL, NULL, NULL }, }, // upper triangular { // unblocked unblocked with fusing blocked { bli_trmv_unb_var1, bli_trmv_unf_var1, bli_trmv_u_blk_var1 }, { bli_trmv_unb_var2, bli_trmv_unf_var2, bli_trmv_u_blk_var2 }, { NULL, NULL, NULL }, } }; void bli_trmv_int( obj_t* alpha, obj_t* a, obj_t* x, cntx_t* cntx, trmv_t* cntl ) { varnum_t n; impl_t i; bool_t uplo; FUNCPTR_T f; obj_t a_local; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_trmv_check( alpha, a, x ); // If A or x has a zero dimension, return early. if ( bli_obj_has_zero_dim( a ) ) return; if ( bli_obj_has_zero_dim( x ) ) return; // Alias A in case we need to induce a transformation (ie: transposition). bli_obj_alias_to( a, &a_local ); // NOTE: to support cases where B is complex and A is real, we will // need to have the default side case be BLIS_RIGHT and then express // the left case in terms of it, rather than the other way around. // Determine uplo (for indexing to the correct function pointer). if ( bli_obj_is_lower( &a_local ) ) uplo = 0; else uplo = 1; // We do not explicitly implement the cases where A is transposed. // However, we can still handle them. Specifically, if A is marked as // needing a transposition, we simply toggle the uplo value to cause the // correct algorithm to be induced. When that algorithm partitions into // A, it will grab the correct subpartitions, which will inherit A's // transposition bit and thus downstream subproblems will do the right // thing. Alternatively, we could accomplish the same end goal by // inducing a transposition, via bli_obj_induce_trans(), in the code // block below. That macro function swaps dimensions, strides, and // offsets. As an example, given a lower triangular, column-major matrix // that needs a transpose, we would induce that transposition by recasting // the object as an upper triangular, row-major matrix (with no transpose // needed). Note that how we choose to handle transposition here does NOT // affect the optimal choice of kernel (ie: a column-major column panel // matrix with transpose times a vector would use the same kernel as a // row-major row panel matrix with no transpose times a vector). if ( bli_obj_has_trans( &a_local ) ) { //bli_obj_induce_trans( &a_local ); //bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, &a_local ); if ( uplo == 1 ) uplo = 0; else uplo = 1; } // Extract the variant number and implementation type. n = bli_cntl_var_num( cntl ); i = bli_cntl_impl_type( cntl ); // Index into the variant array to extract the correct function pointer. f = vars[uplo][n][i]; // Invoke the variant. f( alpha, &a_local, x, cntx, cntl ); } blis-0.6.1/frame/2/trmv/other/bli_trmv_int.h000066400000000000000000000034541360743507500206740ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_trmv_int( obj_t* alpha, obj_t* a, obj_t* x, cntx_t* cntx, trmv_t* cntl ); blis-0.6.1/frame/2/trmv/other/bli_trmv_l_blk_var1.c000066400000000000000000000100751360743507500221060ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_trmv_l_blk_var1( obj_t* alpha, obj_t* a, obj_t* x, cntx_t* cntx, trmv_t* cntl ) { obj_t a11, a11_pack; obj_t a10; obj_t x1, x1_pack; obj_t x0; dim_t mn; dim_t ij; dim_t b_alg; // Initialize objects for packing. bli_obj_init_pack( &a11_pack ); bli_obj_init_pack( &x1_pack ); // Query dimension. mn = bli_obj_length( a ); // Partition diagonally. for ( ij = 0; ij < mn; ij += b_alg ) { // Determine the current algorithmic blocksize. b_alg = bli_determine_blocksize_b( ij, mn, a, bli_cntl_bszid( cntl ), cntx ); // Acquire partitions for A11, A10, x1, and x0. bli_acquire_mpart_br2tl( BLIS_SUBPART11, ij, b_alg, a, &a11 ); bli_acquire_mpart_br2tl( BLIS_SUBPART10, ij, b_alg, a, &a10 ); bli_acquire_vpart_b2f( BLIS_SUBPART1, ij, b_alg, x, &x1 ); bli_acquire_vpart_b2f( BLIS_SUBPART0, ij, b_alg, x, &x0 ); // Initialize objects for packing A11 and x1 (if needed). bli_packm_init( &a11, &a11_pack, cntx, bli_cntl_sub_packm_a11( cntl ) ); bli_packv_init( &x1, &x1_pack, cntx, bli_cntl_sub_packv_x1( cntl ) ); // Copy/pack A11, x1 (if needed). bli_packm_int( &a11, &a11_pack, cntx, bli_cntl_sub_packm_a11( cntl ), &BLIS_PACKM_SINGLE_THREADED ); bli_packv_int( &x1, &x1_pack, cntx, bli_cntl_sub_packv_x1( cntl ) ); // x1 = alpha * tril( A11 ) * x1; bli_trmv_int( alpha, &a11_pack, &x1_pack, cntx, bli_cntl_sub_trmv( cntl ) ); // x1 = x1 + alpha * A10 * x0; bli_gemv_int( BLIS_NO_TRANSPOSE, BLIS_NO_CONJUGATE, alpha, &a10, &x0, &BLIS_ONE, &x1_pack, cntx, bli_cntl_sub_gemv_rp( cntl ) ); // Copy/unpack x1 (if x1 was packed). bli_unpackv_int( &x1_pack, &x1, cntx, bli_cntl_sub_unpackv_x1( cntl ) ); } // If any packing buffers were acquired within packm, release them back // to the memory manager. bli_packm_release( &a11_pack, bli_cntl_sub_packm_a11( cntl ) ); bli_packv_release( &x1_pack, bli_cntl_sub_packv_x1( cntl ) ); } blis-0.6.1/frame/2/trmv/other/bli_trmv_l_blk_var2.c000066400000000000000000000100751360743507500221070ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_trmv_l_blk_var2( obj_t* alpha, obj_t* a, obj_t* x, cntx_t* cntx, trmv_t* cntl ) { obj_t a11, a11_pack; obj_t a21; obj_t x1, x1_pack; obj_t x2; dim_t mn; dim_t ij; dim_t b_alg; // Initialize objects for packing. bli_obj_init_pack( &a11_pack ); bli_obj_init_pack( &x1_pack ); // Query dimension. mn = bli_obj_length( a ); // Partition diagonally. for ( ij = 0; ij < mn; ij += b_alg ) { // Determine the current algorithmic blocksize. b_alg = bli_determine_blocksize_b( ij, mn, a, bli_cntl_bszid( cntl ), cntx ); // Acquire partitions for A11, A21, x1, and x2. bli_acquire_mpart_br2tl( BLIS_SUBPART11, ij, b_alg, a, &a11 ); bli_acquire_mpart_br2tl( BLIS_SUBPART21, ij, b_alg, a, &a21 ); bli_acquire_vpart_b2f( BLIS_SUBPART1, ij, b_alg, x, &x1 ); bli_acquire_vpart_b2f( BLIS_SUBPART2, ij, b_alg, x, &x2 ); // Initialize objects for packing A11 and x1 (if needed). bli_packm_init( &a11, &a11_pack, cntx, bli_cntl_sub_packm_a11( cntl ) ); bli_packv_init( &x1, &x1_pack, cntx, bli_cntl_sub_packv_x1( cntl ) ); // Copy/pack A11, x1 (if needed). bli_packm_int( &a11, &a11_pack, cntx, bli_cntl_sub_packm_a11( cntl ), &BLIS_PACKM_SINGLE_THREADED ); bli_packv_int( &x1, &x1_pack, cntx, bli_cntl_sub_packv_x1( cntl ) ); // x2 = x2 + alpha * A21 * x1; bli_gemv_int( BLIS_NO_TRANSPOSE, BLIS_NO_CONJUGATE, alpha, &a21, &x1_pack, &BLIS_ONE, &x2, cntx, bli_cntl_sub_gemv_cp( cntl ) ); // x1 = alpha * tril( A11 ) * x1; bli_trmv_int( alpha, &a11_pack, &x1_pack, cntx, bli_cntl_sub_trmv( cntl ) ); // Copy/unpack x1 (if x1 was packed). bli_unpackv_int( &x1_pack, &x1, cntx, bli_cntl_sub_unpackv_x1( cntl ) ); } // If any packing buffers were acquired within packm, release them back // to the memory manager. bli_packm_release( &a11_pack, bli_cntl_sub_packm_a11( cntl ) ); bli_packv_release( &x1_pack, bli_cntl_sub_packv_x1( cntl ) ); } blis-0.6.1/frame/2/trmv/other/bli_trmv_u_blk_var1.c000066400000000000000000000100751360743507500221170ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_trmv_u_blk_var1( obj_t* alpha, obj_t* a, obj_t* x, cntx_t* cntx, trmv_t* cntl ) { obj_t a11, a11_pack; obj_t a12; obj_t x1, x1_pack; obj_t x2; dim_t mn; dim_t ij; dim_t b_alg; // Initialize objects for packing. bli_obj_init_pack( &a11_pack ); bli_obj_init_pack( &x1_pack ); // Query dimension. mn = bli_obj_length( a ); // Partition diagonally. for ( ij = 0; ij < mn; ij += b_alg ) { // Determine the current algorithmic blocksize. b_alg = bli_determine_blocksize_f( ij, mn, a, bli_cntl_bszid( cntl ), cntx ); // Acquire partitions for A11, A12, x1, and x2. bli_acquire_mpart_tl2br( BLIS_SUBPART11, ij, b_alg, a, &a11 ); bli_acquire_mpart_tl2br( BLIS_SUBPART12, ij, b_alg, a, &a12 ); bli_acquire_vpart_f2b( BLIS_SUBPART1, ij, b_alg, x, &x1 ); bli_acquire_vpart_f2b( BLIS_SUBPART2, ij, b_alg, x, &x2 ); // Initialize objects for packing A11 and x1 (if needed). bli_packm_init( &a11, &a11_pack, cntx, bli_cntl_sub_packm_a11( cntl ) ); bli_packv_init( &x1, &x1_pack, cntx, bli_cntl_sub_packv_x1( cntl ) ); // Copy/pack A11, x1 (if needed). bli_packm_int( &a11, &a11_pack, cntx, bli_cntl_sub_packm_a11( cntl ), &BLIS_PACKM_SINGLE_THREADED ); bli_packv_int( &x1, &x1_pack, cntx, bli_cntl_sub_packv_x1( cntl ) ); // x1 = alpha * triu( A11 ) * x1; bli_trmv_int( alpha, &a11_pack, &x1_pack, cntx, bli_cntl_sub_trmv( cntl ) ); // x1 = x1 + alpha * A12 * x2; bli_gemv_int( BLIS_NO_TRANSPOSE, BLIS_NO_CONJUGATE, alpha, &a12, &x2, &BLIS_ONE, &x1_pack, cntx, bli_cntl_sub_gemv_rp( cntl ) ); // Copy/unpack x1 (if x1 was packed). bli_unpackv_int( &x1_pack, &x1, cntx, bli_cntl_sub_unpackv_x1( cntl ) ); } // If any packing buffers were acquired within packm, release them back // to the memory manager. bli_packm_release( &a11_pack, bli_cntl_sub_packm_a11( cntl ) ); bli_packv_release( &x1_pack, bli_cntl_sub_packv_x1( cntl ) ); } blis-0.6.1/frame/2/trmv/other/bli_trmv_u_blk_var2.c000066400000000000000000000100751360743507500221200ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_trmv_u_blk_var2( obj_t* alpha, obj_t* a, obj_t* x, cntx_t* cntx, trmv_t* cntl ) { obj_t a11, a11_pack; obj_t a01; obj_t x1, x1_pack; obj_t x0; dim_t mn; dim_t ij; dim_t b_alg; // Initialize objects for packing. bli_obj_init_pack( &a11_pack ); bli_obj_init_pack( &x1_pack ); // Query dimension. mn = bli_obj_length( a ); // Partition diagonally. for ( ij = 0; ij < mn; ij += b_alg ) { // Determine the current algorithmic blocksize. b_alg = bli_determine_blocksize_b( ij, mn, a, bli_cntl_bszid( cntl ), cntx ); // Acquire partitions for A11, A21, x1, and x2. bli_acquire_mpart_br2tl( BLIS_SUBPART11, ij, b_alg, a, &a11 ); bli_acquire_mpart_br2tl( BLIS_SUBPART01, ij, b_alg, a, &a01 ); bli_acquire_vpart_b2f( BLIS_SUBPART1, ij, b_alg, x, &x1 ); bli_acquire_vpart_b2f( BLIS_SUBPART0, ij, b_alg, x, &x0 ); // Initialize objects for packing A11 and x1 (if needed). bli_packm_init( &a11, &a11_pack, cntx, bli_cntl_sub_packm_a11( cntl ) ); bli_packv_init( &x1, &x1_pack, cntx, bli_cntl_sub_packv_x1( cntl ) ); // Copy/pack A11, x1 (if needed). bli_packm_int( &a11, &a11_pack, cntx, bli_cntl_sub_packm_a11( cntl ), &BLIS_PACKM_SINGLE_THREADED ); bli_packv_int( &x1, &x1_pack, cntx, bli_cntl_sub_packv_x1( cntl ) ); // x0 = x0 + alpha * A01 * x1; bli_gemv_int( BLIS_NO_TRANSPOSE, BLIS_NO_CONJUGATE, alpha, &a01, &x1_pack, &BLIS_ONE, &x0, cntx, bli_cntl_sub_gemv_cp( cntl ) ); // x1 = alpha * triu( A11 ) * x1; bli_trmv_int( alpha, &a11_pack, &x1_pack, cntx, bli_cntl_sub_trmv( cntl ) ); // Copy/unpack x1 (if x1 was packed). bli_unpackv_int( &x1_pack, &x1, cntx, bli_cntl_sub_unpackv_x1( cntl ) ); } // If any packing buffers were acquired within packm, release them back // to the memory manager. bli_packm_release( &a11_pack, bli_cntl_sub_packm_a11( cntl ) ); bli_packv_release( &x1_pack, bli_cntl_sub_packv_x1( cntl ) ); } blis-0.6.1/frame/2/trsv/000077500000000000000000000000001360743507500147125ustar00rootroot00000000000000blis-0.6.1/frame/2/trsv/bli_trsv.h000066400000000000000000000034701360743507500167130ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // NOTE: level-2 control tree code is temporarily disabled. //#include "bli_trsv_cntl.h" //#include "bli_trsv_front.h" //#include "bli_trsv_int.h" #include "bli_trsv_var.h" blis-0.6.1/frame/2/trsv/bli_trsv_unb_var1.c000066400000000000000000000106501360743507500205010ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ uplo_t uploa, \ trans_t transa, \ diag_t diaga, \ dim_t m, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* x, inc_t incx, \ cntx_t* cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ ctype* a10t; \ ctype* alpha11; \ ctype* a12t; \ ctype* x0; \ ctype* chi1; \ ctype* x2; \ ctype alpha11_conj; \ ctype rho; \ dim_t iter, i; \ dim_t n_behind; \ inc_t rs_at, cs_at; \ uplo_t uploa_trans; \ conj_t conja; \ \ if ( bli_does_notrans( transa ) ) \ { \ rs_at = rs_a; \ cs_at = cs_a; \ uploa_trans = uploa; \ } \ else /* if ( bli_does_trans( transa ) ) */ \ { \ rs_at = cs_a; \ cs_at = rs_a; \ uploa_trans = bli_uplo_toggled( uploa ); \ } \ \ conja = bli_extract_conj( transa ); \ \ /* x = alpha * x; */ \ PASTEMAC2(ch,scalv,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ m, \ alpha, \ x, incx, \ cntx, \ NULL \ ); \ \ PASTECH(ch,dotv_ker_ft) kfp_tv; \ \ /* Query the context for the kernel function pointer. */ \ kfp_tv = bli_cntx_get_l1v_ker_dt( dt, BLIS_DOTV_KER, cntx ); \ \ /* We reduce all of the possible cases down to just lower/upper. */ \ if ( bli_is_upper( uploa_trans ) ) \ { \ for ( iter = 0; iter < m; ++iter ) \ { \ i = m - iter - 1; \ n_behind = iter; \ alpha11 = a + (i )*rs_at + (i )*cs_at; \ a12t = a + (i )*rs_at + (i+1)*cs_at; \ chi1 = x + (i )*incx; \ x2 = x + (i+1)*incx; \ \ /* chi1 = chi1 - a12t * x2; */ \ kfp_tv \ ( \ conja, \ BLIS_NO_CONJUGATE, \ n_behind, \ a12t, cs_at, \ x2, incx, \ &rho, \ cntx \ ); \ PASTEMAC(ch,subs)( rho, *chi1 ); \ \ /* chi1 = chi1 / alpha11; */ \ if ( bli_is_nonunit_diag( diaga ) ) \ { \ PASTEMAC(ch,copycjs)( conja, *alpha11, alpha11_conj ); \ PASTEMAC(ch,invscals)( alpha11_conj, *chi1 ); \ } \ } \ } \ else /* if ( bli_is_lower( uploa_trans ) ) */ \ { \ for ( iter = 0; iter < m; ++iter ) \ { \ i = iter; \ n_behind = i; \ alpha11 = a + (i )*rs_at + (i )*cs_at; \ a10t = a + (i )*rs_at + (0 )*cs_at; \ chi1 = x + (i )*incx; \ x0 = x + (0 )*incx; \ \ /* chi1 = chi1 - a10t * x0; */ \ kfp_tv \ ( \ conja, \ BLIS_NO_CONJUGATE, \ n_behind, \ a10t, cs_at, \ x0, incx, \ &rho, \ cntx \ ); \ PASTEMAC(ch,subs)( rho, *chi1 ); \ \ /* chi1 = chi1 / alpha11; */ \ if ( bli_is_nonunit_diag( diaga ) ) \ { \ PASTEMAC(ch,copycjs)( conja, *alpha11, alpha11_conj ); \ PASTEMAC(ch,invscals)( alpha11_conj, *chi1 ); \ } \ } \ } \ } INSERT_GENTFUNC_BASIC0( trsv_unb_var1 ) blis-0.6.1/frame/2/trsv/bli_trsv_unb_var2.c000066400000000000000000000106241360743507500205030ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ uplo_t uploa, \ trans_t transa, \ diag_t diaga, \ dim_t m, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* x, inc_t incx, \ cntx_t* cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ ctype* a01; \ ctype* alpha11; \ ctype* a21; \ ctype* x0; \ ctype* chi1; \ ctype* x2; \ ctype alpha11_conj; \ ctype minus_chi1; \ dim_t iter, i; \ dim_t n_ahead; \ inc_t rs_at, cs_at; \ uplo_t uploa_trans; \ conj_t conja; \ \ if ( bli_does_notrans( transa ) ) \ { \ rs_at = rs_a; \ cs_at = cs_a; \ uploa_trans = uploa; \ } \ else /* if ( bli_does_trans( transa ) ) */ \ { \ rs_at = cs_a; \ cs_at = rs_a; \ uploa_trans = bli_uplo_toggled( uploa ); \ } \ \ conja = bli_extract_conj( transa ); \ \ /* x = alpha * x; */ \ PASTEMAC2(ch,scalv,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ m, \ alpha, \ x, incx, \ cntx, \ NULL \ ); \ \ PASTECH(ch,axpyv_ker_ft) kfp_av; \ \ /* Query the context for the kernel function pointer. */ \ kfp_av = bli_cntx_get_l1v_ker_dt( dt, BLIS_AXPYV_KER, cntx ); \ \ /* We reduce all of the possible cases down to just lower/upper. */ \ if ( bli_is_upper( uploa_trans ) ) \ { \ for ( iter = 0; iter < m; ++iter ) \ { \ i = m - iter - 1; \ n_ahead = i; \ alpha11 = a + (i )*rs_at + (i )*cs_at; \ a01 = a + (0 )*rs_at + (i )*cs_at; \ chi1 = x + (i )*incx; \ x0 = x + (0 )*incx; \ \ /* chi1 = chi1 / alpha11; */ \ if ( bli_is_nonunit_diag( diaga ) ) \ { \ PASTEMAC(ch,copycjs)( conja, *alpha11, alpha11_conj ); \ PASTEMAC(ch,invscals)( alpha11_conj, *chi1 ); \ } \ \ /* x0 = x0 - chi1 * a01; */ \ PASTEMAC(ch,neg2s)( *chi1, minus_chi1 ); \ kfp_av \ ( \ conja, \ n_ahead, \ &minus_chi1, \ a01, rs_at, \ x0, incx, \ cntx \ ); \ } \ } \ else /* if ( bli_is_lower( uploa_trans ) ) */ \ { \ for ( iter = 0; iter < m; ++iter ) \ { \ i = iter; \ n_ahead = m - iter - 1; \ alpha11 = a + (i )*rs_at + (i )*cs_at; \ a21 = a + (i+1)*rs_at + (i )*cs_at; \ chi1 = x + (i )*incx; \ x2 = x + (i+1)*incx; \ \ /* chi1 = chi1 / alpha11; */ \ if ( bli_is_nonunit_diag( diaga ) ) \ { \ PASTEMAC(ch,copycjs)( conja, *alpha11, alpha11_conj ); \ PASTEMAC(ch,invscals)( alpha11_conj, *chi1 ); \ } \ \ /* x2 = x2 - chi1 * a21; */ \ PASTEMAC(ch,neg2s)( *chi1, minus_chi1 ); \ kfp_av \ ( \ conja, \ n_ahead, \ &minus_chi1, \ a21, rs_at, \ x2, incx, \ cntx \ ); \ } \ } \ } INSERT_GENTFUNC_BASIC0( trsv_unb_var2 ) blis-0.6.1/frame/2/trsv/bli_trsv_unf_var1.c000066400000000000000000000144151360743507500205100ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ uplo_t uploa, \ trans_t transa, \ diag_t diaga, \ dim_t m, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* x, inc_t incx, \ cntx_t* cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ ctype* one = PASTEMAC(ch,1); \ ctype* minus_one = PASTEMAC(ch,m1); \ ctype* A10; \ ctype* A11; \ ctype* A12; \ ctype* a10t; \ ctype* alpha11; \ ctype* a12t; \ ctype* x0; \ ctype* x1; \ ctype* x2; \ ctype* x01; \ ctype* chi11; \ ctype* x21; \ ctype alpha11_conj; \ ctype rho1; \ dim_t iter, i, k, j, l; \ dim_t b_fuse, f; \ dim_t n_behind, f_behind; \ inc_t rs_at, cs_at; \ uplo_t uploa_trans; \ conj_t conja; \ \ /* x = alpha * x; */ \ PASTEMAC2(ch,scalv,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ m, \ alpha, \ x, incx, \ cntx, \ NULL \ ); \ \ if ( bli_does_notrans( transa ) ) \ { \ rs_at = rs_a; \ cs_at = cs_a; \ uploa_trans = uploa; \ } \ else /* if ( bli_does_trans( transa ) ) */ \ { \ rs_at = cs_a; \ cs_at = rs_a; \ uploa_trans = bli_uplo_toggled( uploa ); \ } \ \ conja = bli_extract_conj( transa ); \ \ PASTECH(ch,dotxf_ker_ft) kfp_df; \ \ /* Query the context for the kernel function pointer and fusing factor. */ \ kfp_df = bli_cntx_get_l1f_ker_dt( dt, BLIS_DOTXF_KER, cntx ); \ b_fuse = bli_cntx_get_blksz_def_dt( dt, BLIS_DF, cntx ); \ \ /* We reduce all of the possible cases down to just lower/upper. */ \ if ( bli_is_upper( uploa_trans ) ) \ { \ for ( iter = 0; iter < m; iter += f ) \ { \ f = bli_determine_blocksize_dim_b( iter, m, b_fuse ); \ i = m - iter - f; \ n_behind = iter; \ A11 = a + (i )*rs_at + (i )*cs_at; \ A12 = a + (i )*rs_at + (i+f)*cs_at; \ x1 = x + (i )*incx; \ x2 = x + (i+f)*incx; \ \ /* x1 = x1 - A12 * x2; */ \ kfp_df \ ( \ conja, \ BLIS_NO_CONJUGATE, \ n_behind, \ f, \ minus_one, \ A12, cs_at, rs_at, \ x2, incx, \ one, \ x1, incx, \ cntx \ ); \ \ /* x1 = x1 / triu( A11 ); */ \ for ( k = 0; k < f; ++k ) \ { \ l = f - k - 1; \ f_behind = k; \ alpha11 = A11 + (l )*rs_at + (l )*cs_at; \ a12t = A11 + (l )*rs_at + (l+1)*cs_at; \ chi11 = x1 + (l )*incx; \ x21 = x1 + (l+1)*incx; \ \ /* chi11 = chi11 - a12t * x21; */ \ PASTEMAC(ch,set0s)( rho1 ); \ if ( bli_is_conj( conja ) ) \ { \ for ( j = 0; j < f_behind; ++j ) \ PASTEMAC(ch,dotjs)( *(a12t + j*cs_at), *(x21 + j*incx), rho1 ); \ } \ else \ { \ for ( j = 0; j < f_behind; ++j ) \ PASTEMAC(ch,dots)( *(a12t + j*cs_at), *(x21 + j*incx), rho1 ); \ } \ PASTEMAC(ch,subs)( rho1, *chi11 ); \ \ /* chi11 = chi11 / alpha11; */ \ if ( bli_is_nonunit_diag( diaga ) ) \ { \ PASTEMAC(ch,copycjs)( conja, *alpha11, alpha11_conj ); \ PASTEMAC(ch,invscals)( alpha11_conj, *chi11 ); \ } \ } \ } \ } \ else /* if ( bli_is_lower( uploa_trans ) ) */ \ { \ for ( iter = 0; iter < m; iter += f ) \ { \ f = bli_determine_blocksize_dim_f( iter, m, b_fuse ); \ i = iter; \ n_behind = i; \ A11 = a + (i )*rs_at + (i )*cs_at; \ A10 = a + (i )*rs_at + (0 )*cs_at; \ x1 = x + (i )*incx; \ x0 = x + (0 )*incx; \ \ /* x1 = x1 - A10 * x0; */ \ kfp_df \ ( \ conja, \ BLIS_NO_CONJUGATE, \ n_behind, \ f, \ minus_one, \ A10, cs_at, rs_at, \ x0, incx, \ one, \ x1, incx, \ cntx \ ); \ \ /* x1 = x1 / tril( A11 ); */ \ for ( k = 0; k < f; ++k ) \ { \ l = k; \ f_behind = l; \ alpha11 = A11 + (l )*rs_at + (l )*cs_at; \ a10t = A11 + (l )*rs_at + (0 )*cs_at; \ chi11 = x1 + (l )*incx; \ x01 = x1 + (0 )*incx; \ \ /* chi11 = chi11 - a10t * x01; */ \ PASTEMAC(ch,set0s)( rho1 ); \ if ( bli_is_conj( conja ) ) \ { \ for ( j = 0; j < f_behind; ++j ) \ PASTEMAC(ch,dotjs)( *(a10t + j*cs_at), *(x01 + j*incx), rho1 ); \ } \ else \ { \ for ( j = 0; j < f_behind; ++j ) \ PASTEMAC(ch,dots)( *(a10t + j*cs_at), *(x01 + j*incx), rho1 ); \ } \ PASTEMAC(ch,subs)( rho1, *chi11 ); \ \ /* chi11 = chi11 / alpha11; */ \ if ( bli_is_nonunit_diag( diaga ) ) \ { \ PASTEMAC(ch,copycjs)( conja, *alpha11, alpha11_conj ); \ PASTEMAC(ch,invscals)( alpha11_conj, *chi11 ); \ } \ } \ } \ } \ } INSERT_GENTFUNC_BASIC0( trsv_unf_var1 ) blis-0.6.1/frame/2/trsv/bli_trsv_unf_var2.c000066400000000000000000000142741360743507500205140ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ uplo_t uploa, \ trans_t transa, \ diag_t diaga, \ dim_t m, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* x, inc_t incx, \ cntx_t* cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ ctype* minus_one = PASTEMAC(ch,m1); \ ctype* A01; \ ctype* A11; \ ctype* A21; \ ctype* a01; \ ctype* alpha11; \ ctype* a21; \ ctype* x0; \ ctype* x1; \ ctype* x2; \ ctype* x01; \ ctype* chi11; \ ctype* x21; \ ctype alpha11_conj; \ ctype minus_chi11; \ dim_t iter, i, k, j, l; \ dim_t b_fuse, f; \ dim_t n_ahead, f_ahead; \ inc_t rs_at, cs_at; \ uplo_t uploa_trans; \ conj_t conja; \ \ /* x = alpha * x; */ \ PASTEMAC2(ch,scalv,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ m, \ alpha, \ x, incx, \ cntx, \ NULL \ ); \ \ if ( bli_does_notrans( transa ) ) \ { \ rs_at = rs_a; \ cs_at = cs_a; \ uploa_trans = uploa; \ } \ else /* if ( bli_does_trans( transa ) ) */ \ { \ rs_at = cs_a; \ cs_at = rs_a; \ uploa_trans = bli_uplo_toggled( uploa ); \ } \ \ conja = bli_extract_conj( transa ); \ \ PASTECH(ch,axpyf_ker_ft) kfp_af; \ \ /* Query the context for the kernel function pointer and fusing factor. */ \ kfp_af = bli_cntx_get_l1f_ker_dt( dt, BLIS_AXPYF_KER, cntx ); \ b_fuse = bli_cntx_get_blksz_def_dt( dt, BLIS_AF, cntx ); \ \ /* We reduce all of the possible cases down to just lower/upper. */ \ if ( bli_is_upper( uploa_trans ) ) \ { \ for ( iter = 0; iter < m; iter += f ) \ { \ f = bli_determine_blocksize_dim_b( iter, m, b_fuse ); \ i = m - iter - f; \ n_ahead = i; \ A11 = a + (i )*rs_at + (i )*cs_at; \ A01 = a + (0 )*rs_at + (i )*cs_at; \ x1 = x + (i )*incx; \ x0 = x + (0 )*incx; \ \ /* x1 = x1 / triu( A11 ); */ \ for ( k = 0; k < f; ++k ) \ { \ l = f - k - 1; \ f_ahead = l; \ alpha11 = A11 + (l )*rs_at + (l )*cs_at; \ a01 = A11 + (0 )*rs_at + (l )*cs_at; \ chi11 = x1 + (l )*incx; \ x01 = x1 + (0 )*incx; \ \ /* chi11 = chi11 / alpha11; */ \ if ( bli_is_nonunit_diag( diaga ) ) \ { \ PASTEMAC(ch,copycjs)( conja, *alpha11, alpha11_conj ); \ PASTEMAC(ch,invscals)( alpha11_conj, *chi11 ); \ } \ \ /* x01 = x01 - chi11 * a01; */ \ PASTEMAC(ch,neg2s)( *chi11, minus_chi11 ); \ if ( bli_is_conj( conja ) ) \ { \ for ( j = 0; j < f_ahead; ++j ) \ PASTEMAC(ch,axpyjs)( minus_chi11, *(a01 + j*rs_at), *(x01 + j*incx) ); \ } \ else \ { \ for ( j = 0; j < f_ahead; ++j ) \ PASTEMAC(ch,axpys)( minus_chi11, *(a01 + j*rs_at), *(x01 + j*incx) ); \ } \ } \ \ /* x0 = x0 - A01 * x1; */ \ kfp_af \ ( \ conja, \ BLIS_NO_CONJUGATE, \ n_ahead, \ f, \ minus_one, \ A01, rs_at, cs_at, \ x1, incx, \ x0, incx, \ cntx \ ); \ } \ } \ else /* if ( bli_is_lower( uploa_trans ) ) */ \ { \ for ( iter = 0; iter < m; iter += f ) \ { \ f = bli_determine_blocksize_dim_f( iter, m, b_fuse ); \ i = iter; \ n_ahead = m - iter - f; \ A11 = a + (i )*rs_at + (i )*cs_at; \ A21 = a + (i+f)*rs_at + (i )*cs_at; \ x1 = x + (i )*incx; \ x2 = x + (i+f)*incx; \ \ /* x1 = x1 / tril( A11 ); */ \ for ( k = 0; k < f; ++k ) \ { \ l = k; \ f_ahead = f - k - 1; \ alpha11 = A11 + (l )*rs_at + (l )*cs_at; \ a21 = A11 + (l+1)*rs_at + (l )*cs_at; \ chi11 = x1 + (l )*incx; \ x21 = x1 + (l+1)*incx; \ \ /* chi11 = chi11 / alpha11; */ \ if ( bli_is_nonunit_diag( diaga ) ) \ { \ PASTEMAC(ch,copycjs)( conja, *alpha11, alpha11_conj ); \ PASTEMAC(ch,invscals)( alpha11_conj, *chi11 ); \ } \ \ /* x21 = x21 - chi11 * a21; */ \ PASTEMAC(ch,neg2s)( *chi11, minus_chi11 ); \ if ( bli_is_conj( conja ) ) \ { \ for ( j = 0; j < f_ahead; ++j ) \ PASTEMAC(ch,axpyjs)( minus_chi11, *(a21 + j*rs_at), *(x21 + j*incx) ); \ } \ else \ { \ for ( j = 0; j < f_ahead; ++j ) \ PASTEMAC(ch,axpys)( minus_chi11, *(a21 + j*rs_at), *(x21 + j*incx) ); \ } \ } \ \ /* x2 = x2 - A21 * x1; */ \ kfp_af \ ( \ conja, \ BLIS_NO_CONJUGATE, \ n_ahead, \ f, \ minus_one, \ A21, rs_at, cs_at, \ x1, incx, \ x2, incx, \ cntx \ ); \ } \ } \ } INSERT_GENTFUNC_BASIC0( trsv_unf_var2 ) blis-0.6.1/frame/2/trsv/bli_trsv_var.h000066400000000000000000000051641360743507500175650ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype object-based interfaces. // #undef GENPROT #define GENPROT( opname ) \ \ void PASTEMAC0(opname) \ ( \ obj_t* alpha, \ obj_t* a, \ obj_t* x, \ cntx_t* cntx, \ cntl_t* cntl \ ); GENPROT( trsv_l_blk_var1 ) GENPROT( trsv_l_blk_var2 ) GENPROT( trsv_u_blk_var1 ) GENPROT( trsv_u_blk_var2 ) GENPROT( trsv_unb_var1 ) GENPROT( trsv_unb_var2 ) GENPROT( trsv_unf_var1 ) GENPROT( trsv_unf_var2 ) // // Prototype BLAS-like interfaces with typed operands. // #undef GENTPROT #define GENTPROT( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ uplo_t uploa, \ trans_t transa, \ diag_t diaga, \ dim_t m, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* x, inc_t incx, \ cntx_t* cntx \ ); INSERT_GENTPROT_BASIC0( trsv_unb_var1 ) INSERT_GENTPROT_BASIC0( trsv_unb_var2 ) INSERT_GENTPROT_BASIC0( trsv_unf_var1 ) INSERT_GENTPROT_BASIC0( trsv_unf_var2 ) blis-0.6.1/frame/2/trsv/bli_trsv_var_oapi.c000066400000000000000000000056121360743507500205660ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENFRONT #define GENFRONT( opname, varname ) \ \ void PASTEMAC0(varname) \ ( \ obj_t* alpha, \ obj_t* a, \ obj_t* x, \ cntx_t* cntx, \ cntl_t* cntl \ ) \ { \ bli_init_once(); \ \ num_t dt = bli_obj_dt( a ); \ \ uplo_t uploa = bli_obj_uplo( a ); \ trans_t transa = bli_obj_conjtrans_status( a ); \ diag_t diaga = bli_obj_diag( a ); \ \ dim_t m = bli_obj_length( a ); \ \ void* buf_a = bli_obj_buffer_at_off( a ); \ inc_t rs_a = bli_obj_row_stride( a ); \ inc_t cs_a = bli_obj_col_stride( a ); \ \ void* buf_x = bli_obj_buffer_at_off( x ); \ inc_t incx = bli_obj_vector_inc( x ); \ \ void* buf_alpha = bli_obj_buffer_for_1x1( dt, alpha ); \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(opname,_unb,_vft) f = \ PASTEMAC(varname,_qfp)( dt ); \ \ f \ ( \ uploa, \ transa, \ diaga, \ m, \ buf_alpha, \ buf_a, rs_a, cs_a, \ buf_x, incx, \ cntx \ ); \ } \ GENFRONT( trsv, trsv_unb_var1 ) GENFRONT( trsv, trsv_unb_var2 ) GENFRONT( trsv, trsv_unf_var1 ) GENFRONT( trsv, trsv_unf_var2 ) blis-0.6.1/frame/2/trsv/other/000077500000000000000000000000001360743507500160335ustar00rootroot00000000000000blis-0.6.1/frame/2/trsv/other/bli_trsv_cntl.c000066400000000000000000000147571360743507500210610ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" extern scalv_t* scalv_cntl; extern packm_t* packm_cntl; extern packv_t* packv_cntl; extern unpackv_t* unpackv_cntl; extern gemv_t* gemv_cntl_rp_bs_dot; extern gemv_t* gemv_cntl_rp_bs_axpy; extern gemv_t* gemv_cntl_cp_bs_dot; extern gemv_t* gemv_cntl_cp_bs_axpy; trsv_t* trsv_cntl_bs_ke_nrow_tcol = NULL; trsv_t* trsv_cntl_bs_ke_ncol_trow = NULL; trsv_t* trsv_cntl_ge_nrow_tcol = NULL; trsv_t* trsv_cntl_ge_ncol_trow = NULL; void bli_trsv_cntl_init() { // Create control trees for the lowest-level kernels. These trees induce // operations on (presumably) relatively small block-subvector problems. trsv_cntl_bs_ke_nrow_tcol = bli_trsv_cntl_obj_create( BLIS_UNB_FUSED, BLIS_VARIANT1, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL ); trsv_cntl_bs_ke_ncol_trow = bli_trsv_cntl_obj_create( BLIS_UNB_FUSED, BLIS_VARIANT2, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL ); // Create control trees for generally large problems. Here we choose a // variant that prioritizes keeping a subvector of x in cache. trsv_cntl_ge_nrow_tcol = bli_trsv_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, // use var1 to maximize x1 usage BLIS_M2, scalv_cntl, // scale x up-front packm_cntl, // pack A11 (if needed) packv_cntl, // pack x1 (if needed) gemv_cntl_rp_bs_dot, // gemv_rp needed by var1 NULL, // gemv_cp not needed by var1 trsv_cntl_bs_ke_nrow_tcol, unpackv_cntl ); // unpack x1 (if needed) trsv_cntl_ge_ncol_trow = bli_trsv_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, // use var1 to maximize x1 usage BLIS_M2, scalv_cntl, // scale x up-front packm_cntl, // pack A11 (if needed) packv_cntl, // pack x1 (if needed) gemv_cntl_rp_bs_axpy, // gemv_rp needed by var1 NULL, // gemv_cp not needed by var1 trsv_cntl_bs_ke_ncol_trow, unpackv_cntl ); // unpack x1 (if needed) } void bli_trsv_cntl_finalize() { bli_cntl_free_node( trsv_cntl_bs_ke_nrow_tcol ); bli_cntl_free_node( trsv_cntl_bs_ke_ncol_trow ); bli_cntl_free_node( trsv_cntl_ge_nrow_tcol ); bli_cntl_free_node( trsv_cntl_ge_ncol_trow ); } trsv_t* bli_trsv_cntl_obj_create( impl_t impl_type, varnum_t var_num, bszid_t bszid, scalv_t* sub_scalv, packm_t* sub_packm_a11, packv_t* sub_packv_x1, gemv_t* sub_gemv_rp, gemv_t* sub_gemv_cp, trsv_t* sub_trsv, unpackv_t* sub_unpackv_x1 ) { trsv_t* cntl; cntl = ( trsv_t* ) bli_malloc_intl( sizeof(trsv_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; cntl->bszid = bszid; cntl->sub_scalv = sub_scalv; cntl->sub_packm_a11 = sub_packm_a11; cntl->sub_packv_x1 = sub_packv_x1; cntl->sub_gemv_rp = sub_gemv_rp; cntl->sub_gemv_cp = sub_gemv_cp; cntl->sub_trsv = sub_trsv; cntl->sub_unpackv_x1 = sub_unpackv_x1; return cntl; } void bli_trsv_cntl_obj_init( trsv_t* cntl, impl_t impl_type, varnum_t var_num, bszid_t bszid, scalv_t* sub_scalv, packm_t* sub_packm_a11, packv_t* sub_packv_x1, gemv_t* sub_gemv_rp, gemv_t* sub_gemv_cp, trsv_t* sub_trsv, unpackv_t* sub_unpackv_x1 ) { cntl->impl_type = impl_type; cntl->var_num = var_num; cntl->bszid = bszid; cntl->sub_scalv = sub_scalv; cntl->sub_packm_a11 = sub_packm_a11; cntl->sub_packv_x1 = sub_packv_x1; cntl->sub_gemv_rp = sub_gemv_rp; cntl->sub_gemv_cp = sub_gemv_cp; cntl->sub_trsv = sub_trsv; cntl->sub_unpackv_x1 = sub_unpackv_x1; } blis-0.6.1/frame/2/trsv/other/bli_trsv_cntl.h000066400000000000000000000063621360743507500210570ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ struct trsv_s { impl_t impl_type; varnum_t var_num; bszid_t bszid; struct scalv_s* sub_scalv; struct packm_s* sub_packm_a11; struct packv_s* sub_packv_x1; struct gemv_s* sub_gemv_rp; struct gemv_s* sub_gemv_cp; struct trsv_s* sub_trsv; struct unpackv_s* sub_unpackv_x1; }; typedef struct trsv_s trsv_t; #define bli_cntl_sub_trsv( cntl ) cntl->sub_trsv void bli_trsv_cntl_init( void ); void bli_trsv_cntl_finalize( void ); trsv_t* bli_trsv_cntl_obj_create( impl_t impl_type, varnum_t var_num, bszid_t bszid, scalv_t* sub_scalv, packm_t* sub_packm_a11, packv_t* sub_packv_x1, gemv_t* sub_gemv_rp, gemv_t* sub_gemv_cp, trsv_t* sub_trsv, unpackv_t* sub_unpackv_x1 ); void bli_trsv_cntl_obj_init( trsv_t* cntl, impl_t impl_type, varnum_t var_num, bszid_t bszid, scalv_t* sub_scalv, packm_t* sub_packm_a11, packv_t* sub_packv_x1, gemv_t* sub_gemv_rp, gemv_t* sub_gemv_cp, trsv_t* sub_trsv, unpackv_t* sub_unpackv_x1 ); blis-0.6.1/frame/2/trsv/other/bli_trsv_front.c000066400000000000000000000130761360743507500212420ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" extern trsv_t* trsv_cntl_bs_ke_nrow_tcol; extern trsv_t* trsv_cntl_bs_ke_ncol_trow; extern trsv_t* trsv_cntl_ge_nrow_tcol; extern trsv_t* trsv_cntl_ge_ncol_trow; void bli_trsv_front ( obj_t* alpha, obj_t* a, obj_t* x, cntx_t* cntx ) { trsv_t* trsv_cntl; num_t dt_targ_a; num_t dt_targ_x; bool_t a_has_unit_inc; bool_t x_has_unit_inc; obj_t alpha_local; num_t dt_alpha; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_trsv_check( alpha, a, x ); // Query the target datatypes of each object. dt_targ_a = bli_obj_dt( a ); dt_targ_x = bli_obj_dt( x ); // Determine whether each operand with unit stride. a_has_unit_inc = ( bli_obj_is_row_stored( a ) || bli_obj_is_col_stored( a ) ); x_has_unit_inc = ( bli_obj_vector_inc( x ) == 1 ); // Create an object to hold a copy-cast of alpha. Notice that we use // the type union of the target datatypes of a and x to prevent any // unnecessary loss of information during the computation. dt_alpha = bli_dt_union( dt_targ_a, dt_targ_x ); bli_obj_scalar_init_detached_copy_of( dt_alpha, BLIS_NO_CONJUGATE, alpha, &alpha_local ); // If all operands have unit stride, we choose a control tree for calling // the unblocked implementation directly without any blocking. if ( a_has_unit_inc && x_has_unit_inc ) { if ( bli_obj_has_notrans( a ) ) { if ( bli_obj_is_row_stored( a ) ) trsv_cntl = trsv_cntl_bs_ke_nrow_tcol; else trsv_cntl = trsv_cntl_bs_ke_ncol_trow; } else // if ( bli_obj_has_trans( a ) ) { if ( bli_obj_is_row_stored( a ) ) trsv_cntl = trsv_cntl_bs_ke_ncol_trow; else trsv_cntl = trsv_cntl_bs_ke_nrow_tcol; } } else { // Mark objects with unit stride as already being packed. This prevents // unnecessary packing from happening within the blocked algorithm. if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, a ); if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, x ); // Here, we make a similar choice as above, except that (1) we look // at storage tilt, and (2) we choose a tree that performs blocking. if ( bli_obj_has_notrans( a ) ) { if ( bli_obj_is_row_tilted( a ) ) trsv_cntl = trsv_cntl_ge_nrow_tcol; else trsv_cntl = trsv_cntl_ge_ncol_trow; } else // if ( bli_obj_has_trans( a ) ) { if ( bli_obj_is_row_tilted( a ) ) trsv_cntl = trsv_cntl_ge_ncol_trow; else trsv_cntl = trsv_cntl_ge_nrow_tcol; } } // Invoke the internal back-end with the copy-cast of alpha and the // chosen control tree. bli_trsv_int( &alpha_local, a, x, cntx, trsv_cntl ); } // // Define BLAS-like interfaces with homogeneous-typed operands. // #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ uplo_t uploa, \ trans_t transa, \ diag_t diaga, \ dim_t m, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* x, inc_t incx, \ cntx_t* cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ obj_t alphao, ao, xo; \ \ inc_t rs_x, cs_x; \ \ rs_x = incx; cs_x = m * incx; \ \ bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ \ bli_obj_create_with_attached_buffer( dt, m, m, a, rs_a, cs_a, &ao ); \ bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \ \ bli_obj_set_uplo( uploa, &ao ); \ bli_obj_set_conjtrans( transa, &ao ); \ bli_obj_set_diag( diaga, &ao ); \ \ bli_obj_set_struc( BLIS_TRIANGULAR, &ao ); \ \ PASTEMAC0(opname)( &alphao, \ &ao, \ &xo, \ cntx ); \ } INSERT_GENTFUNC_BASIC0( trsv_front ) blis-0.6.1/frame/2/trsv/other/bli_trsv_front.h000066400000000000000000000041471360743507500212460ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_trsv_front ( obj_t* alpha, obj_t* a, obj_t* x, cntx_t* cntx ); #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ uplo_t uploa, \ trans_t transa, \ diag_t diaga, \ dim_t m, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* x, inc_t incx, \ cntx_t* cntx \ ); INSERT_GENTPROT_BASIC( trsv_front ) blis-0.6.1/frame/2/trsv/other/bli_trsv_int.c000066400000000000000000000120201360743507500206700ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T trsv_fp typedef void (*FUNCPTR_T)( obj_t* alpha, obj_t* a, obj_t* x, cntx_t* cntx, trsv_t* cntl ); static FUNCPTR_T vars[2][3][3] = { // lower triangular { // unblocked unblocked with fusing blocked { bli_trsv_unb_var1, bli_trsv_unf_var1, bli_trsv_l_blk_var1 }, { bli_trsv_unb_var2, bli_trsv_unf_var2, bli_trsv_l_blk_var2 }, { NULL, NULL, NULL }, }, // upper triangular { // unblocked unblocked with fusing blocked { bli_trsv_unb_var1, bli_trsv_unf_var1, bli_trsv_u_blk_var1 }, { bli_trsv_unb_var2, bli_trsv_unf_var2, bli_trsv_u_blk_var2 }, { NULL, NULL, NULL }, } }; void bli_trsv_int( obj_t* alpha, obj_t* a, obj_t* x, cntx_t* cntx, trsv_t* cntl ) { varnum_t n; impl_t i; bool_t uplo; FUNCPTR_T f; obj_t a_local; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_trsv_check( alpha, a, x ); // If A or x has a zero dimension, return early. if ( bli_obj_has_zero_dim( a ) ) return; if ( bli_obj_has_zero_dim( x ) ) return; // Alias A in case we need to induce a transformation (ie: transposition). bli_obj_alias_to( a, &a_local ); // NOTE: to support cases where B is complex and A is real, we will // need to have the default side case be BLIS_RIGHT and then express // the left case in terms of it, rather than the other way around. // Determine uplo (for indexing to the correct function pointer). if ( bli_obj_is_lower( &a_local ) ) uplo = 0; else uplo = 1; // We do not explicitly implement the cases where A is transposed. // However, we can still handle them. Specifically, if A is marked as // needing a transposition, we simply toggle the uplo value to cause the // correct algorithm to be induced. When that algorithm partitions into // A, it will grab the correct subpartitions, which will inherit A's // transposition bit and thus downstream subproblems will do the right // thing. Alternatively, we could accomplish the same end goal by // inducing a transposition, via bli_obj_induce_trans(), in the code // block below. That macro function swaps dimensions, strides, and // offsets. As an example, given a lower triangular, column-major matrix // that needs a transpose, we would induce that transposition by recasting // the object as an upper triangular, row-major matrix (with no transpose // needed). Note that how we choose to handle transposition here does NOT // affect the optimal choice of kernel (ie: a column-major column panel // matrix with transpose times a vector would use the same kernel as a // row-major row panel matrix with no transpose times a vector). if ( bli_obj_has_trans( &a_local ) ) { //bli_obj_induce_trans( &a_local ); //bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, &a_local ); if ( uplo == 1 ) uplo = 0; else uplo = 1; } // Extract the variant number and implementation type. n = bli_cntl_var_num( cntl ); i = bli_cntl_impl_type( cntl ); // Index into the variant array to extract the correct function pointer. f = vars[uplo][n][i]; // Invoke the variant. f( alpha, &a_local, x, cntx, cntl ); } blis-0.6.1/frame/2/trsv/other/bli_trsv_int.h000066400000000000000000000034541360743507500207100ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_trsv_int( obj_t* alpha, obj_t* a, obj_t* x, cntx_t* cntx, trsv_t* cntl ); blis-0.6.1/frame/2/trsv/other/bli_trsv_l_blk_var1.c000066400000000000000000000102551360743507500221220ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_trsv_l_blk_var1( obj_t* alpha, obj_t* a, obj_t* x, cntx_t* cntx, trsv_t* cntl ) { obj_t a11, a11_pack; obj_t a10; obj_t x1, x1_pack; obj_t x0; dim_t mn; dim_t ij; dim_t b_alg; // Initialize objects for packing. bli_obj_init_pack( &a11_pack ); bli_obj_init_pack( &x1_pack ); // Query dimension. mn = bli_obj_length( a ); // x = alpha * x; bli_scalv_int( alpha, x, cntx, bli_cntl_sub_scalv( cntl ) ); // Partition diagonally. for ( ij = 0; ij < mn; ij += b_alg ) { // Determine the current algorithmic blocksize. b_alg = bli_determine_blocksize_f( ij, mn, a, bli_cntl_bszid( cntl ), cntx ); // Acquire partitions for A11, A10, x1, and x0. bli_acquire_mpart_tl2br( BLIS_SUBPART11, ij, b_alg, a, &a11 ); bli_acquire_mpart_tl2br( BLIS_SUBPART10, ij, b_alg, a, &a10 ); bli_acquire_vpart_f2b( BLIS_SUBPART1, ij, b_alg, x, &x1 ); bli_acquire_vpart_f2b( BLIS_SUBPART0, ij, b_alg, x, &x0 ); // Initialize objects for packing A11 and x1 (if needed). bli_packm_init( &a11, &a11_pack, cntx, bli_cntl_sub_packm_a11( cntl ) ); bli_packv_init( &x1, &x1_pack, cntx, bli_cntl_sub_packv_x1( cntl ) ); // Copy/pack A11, x1 (if needed). bli_packm_int( &a11, &a11_pack, cntx, bli_cntl_sub_packm_a11( cntl ), &BLIS_PACKM_SINGLE_THREADED ); bli_packv_int( &x1, &x1_pack, cntx, bli_cntl_sub_packv_x1( cntl ) ); // x1 = x1 - A10 * x0; bli_gemv_int( BLIS_NO_TRANSPOSE, BLIS_NO_CONJUGATE, &BLIS_MINUS_ONE, &a10, &x0, &BLIS_ONE, &x1_pack, cntx, bli_cntl_sub_gemv_rp( cntl ) ); // x1 = x1 / tril( A11 ); bli_trsv_int( &BLIS_ONE, &a11_pack, &x1_pack, cntx, bli_cntl_sub_trsv( cntl ) ); // Copy/unpack x1 (if x1 was packed). bli_unpackv_int( &x1_pack, &x1, cntx, bli_cntl_sub_unpackv_x1( cntl ) ); } // If any packing buffers were acquired within packm, release them back // to the memory manager. bli_packm_release( &a11_pack, bli_cntl_sub_packm_a11( cntl ) ); bli_packv_release( &x1_pack, bli_cntl_sub_packv_x1( cntl ) ); } blis-0.6.1/frame/2/trsv/other/bli_trsv_l_blk_var2.c000066400000000000000000000102551360743507500221230ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_trsv_l_blk_var2( obj_t* alpha, obj_t* a, obj_t* x, cntx_t* cntx, trsv_t* cntl ) { obj_t a11, a11_pack; obj_t a21; obj_t x1, x1_pack; obj_t x2; dim_t mn; dim_t ij; dim_t b_alg; // Initialize objects for packing. bli_obj_init_pack( &a11_pack ); bli_obj_init_pack( &x1_pack ); // Query dimension. mn = bli_obj_length( a ); // x = alpha * x; bli_scalv_int( alpha, x, cntx, bli_cntl_sub_scalv( cntl ) ); // Partition diagonally. for ( ij = 0; ij < mn; ij += b_alg ) { // Determine the current algorithmic blocksize. b_alg = bli_determine_blocksize_f( ij, mn, a, bli_cntl_bszid( cntl ), cntx ); // Acquire partitions for A11, A21, x1, and x2. bli_acquire_mpart_tl2br( BLIS_SUBPART11, ij, b_alg, a, &a11 ); bli_acquire_mpart_tl2br( BLIS_SUBPART21, ij, b_alg, a, &a21 ); bli_acquire_vpart_f2b( BLIS_SUBPART1, ij, b_alg, x, &x1 ); bli_acquire_vpart_f2b( BLIS_SUBPART2, ij, b_alg, x, &x2 ); // Initialize objects for packing A11 and x1 (if needed). bli_packm_init( &a11, &a11_pack, cntx, bli_cntl_sub_packm_a11( cntl ) ); bli_packv_init( &x1, &x1_pack, cntx, bli_cntl_sub_packv_x1( cntl ) ); // Copy/pack A11, x1 (if needed). bli_packm_int( &a11, &a11_pack, cntx, bli_cntl_sub_packm_a11( cntl ), &BLIS_PACKM_SINGLE_THREADED ); bli_packv_int( &x1, &x1_pack, cntx, bli_cntl_sub_packv_x1( cntl ) ); // x1 = x1 / tril( A11 ); bli_trsv_int( &BLIS_ONE, &a11_pack, &x1_pack, cntx, bli_cntl_sub_trsv( cntl ) ); // x2 = x2 - A21 * x1; bli_gemv_int( BLIS_NO_TRANSPOSE, BLIS_NO_CONJUGATE, &BLIS_MINUS_ONE, &a21, &x1_pack, &BLIS_ONE, &x2, cntx, bli_cntl_sub_gemv_cp( cntl ) ); // Copy/unpack x1 (if x1 was packed). bli_unpackv_int( &x1_pack, &x1, cntx, bli_cntl_sub_unpackv_x1( cntl ) ); } // If any packing buffers were acquired within packm, release them back // to the memory manager. bli_packm_release( &a11_pack, bli_cntl_sub_packm_a11( cntl ) ); bli_packv_release( &x1_pack, bli_cntl_sub_packv_x1( cntl ) ); } blis-0.6.1/frame/2/trsv/other/bli_trsv_u_blk_var1.c000066400000000000000000000102551360743507500221330ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_trsv_u_blk_var1( obj_t* alpha, obj_t* a, obj_t* x, cntx_t* cntx, trsv_t* cntl ) { obj_t a11, a11_pack; obj_t a12; obj_t x1, x1_pack; obj_t x2; dim_t mn; dim_t ij; dim_t b_alg; // Initialize objects for packing. bli_obj_init_pack( &a11_pack ); bli_obj_init_pack( &x1_pack ); // Query dimension. mn = bli_obj_length( a ); // x = alpha * x; bli_scalv_int( alpha, x, cntx, bli_cntl_sub_scalv( cntl ) ); // Partition diagonally. for ( ij = 0; ij < mn; ij += b_alg ) { // Determine the current algorithmic blocksize. b_alg = bli_determine_blocksize_b( ij, mn, a, bli_cntl_bszid( cntl ), cntx ); // Acquire partitions for A11, A12, x1, and x2. bli_acquire_mpart_br2tl( BLIS_SUBPART11, ij, b_alg, a, &a11 ); bli_acquire_mpart_br2tl( BLIS_SUBPART12, ij, b_alg, a, &a12 ); bli_acquire_vpart_b2f( BLIS_SUBPART1, ij, b_alg, x, &x1 ); bli_acquire_vpart_b2f( BLIS_SUBPART2, ij, b_alg, x, &x2 ); // Initialize objects for packing A11 and x1 (if needed). bli_packm_init( &a11, &a11_pack, cntx, bli_cntl_sub_packm_a11( cntl ) ); bli_packv_init( &x1, &x1_pack, cntx, bli_cntl_sub_packv_x1( cntl ) ); // Copy/pack A11, x1 (if needed). bli_packm_int( &a11, &a11_pack, cntx, bli_cntl_sub_packm_a11( cntl ), &BLIS_PACKM_SINGLE_THREADED ); bli_packv_int( &x1, &x1_pack, cntx, bli_cntl_sub_packv_x1( cntl ) ); // x1 = x1 - A12 * x2; bli_gemv_int( BLIS_NO_TRANSPOSE, BLIS_NO_CONJUGATE, &BLIS_MINUS_ONE, &a12, &x2, &BLIS_ONE, &x1_pack, cntx, bli_cntl_sub_gemv_rp( cntl ) ); // x1 = x1 / tril( A11 ); bli_trsv_int( &BLIS_ONE, &a11_pack, &x1_pack, cntx, bli_cntl_sub_trsv( cntl ) ); // Copy/unpack x1 (if x1 was packed). bli_unpackv_int( &x1_pack, &x1, cntx, bli_cntl_sub_unpackv_x1( cntl ) ); } // If any packing buffers were acquired within packm, release them back // to the memory manager. bli_packm_release( &a11_pack, bli_cntl_sub_packm_a11( cntl ) ); bli_packv_release( &x1_pack, bli_cntl_sub_packv_x1( cntl ) ); } blis-0.6.1/frame/2/trsv/other/bli_trsv_u_blk_var2.c000066400000000000000000000102551360743507500221340ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_trsv_u_blk_var2( obj_t* alpha, obj_t* a, obj_t* x, cntx_t* cntx, trsv_t* cntl ) { obj_t a11, a11_pack; obj_t a01; obj_t x1, x1_pack; obj_t x0; dim_t mn; dim_t ij; dim_t b_alg; // Initialize objects for packing. bli_obj_init_pack( &a11_pack ); bli_obj_init_pack( &x1_pack ); // Query dimension. mn = bli_obj_length( a ); // x = alpha * x; bli_scalv_int( alpha, x, cntx, bli_cntl_sub_scalv( cntl ) ); // Partition diagonally. for ( ij = 0; ij < mn; ij += b_alg ) { // Determine the current algorithmic blocksize. b_alg = bli_determine_blocksize_b( ij, mn, a, bli_cntl_bszid( cntl ), cntx ); // Acquire partitions for A11, A01, x1, and x0. bli_acquire_mpart_br2tl( BLIS_SUBPART11, ij, b_alg, a, &a11 ); bli_acquire_mpart_br2tl( BLIS_SUBPART01, ij, b_alg, a, &a01 ); bli_acquire_vpart_b2f( BLIS_SUBPART1, ij, b_alg, x, &x1 ); bli_acquire_vpart_b2f( BLIS_SUBPART0, ij, b_alg, x, &x0 ); // Initialize objects for packing A11 and x1 (if needed). bli_packm_init( &a11, &a11_pack, cntx, bli_cntl_sub_packm_a11( cntl ) ); bli_packv_init( &x1, &x1_pack, cntx, bli_cntl_sub_packv_x1( cntl ) ); // Copy/pack A11, x1 (if needed). bli_packm_int( &a11, &a11_pack, cntx, bli_cntl_sub_packm_a11( cntl ), &BLIS_PACKM_SINGLE_THREADED ); bli_packv_int( &x1, &x1_pack, cntx, bli_cntl_sub_packv_x1( cntl ) ); // x1 = x1 / tril( A11 ); bli_trsv_int( &BLIS_ONE, &a11_pack, &x1_pack, cntx, bli_cntl_sub_trsv( cntl ) ); // x0 = x0 - A01 * x1; bli_gemv_int( BLIS_NO_TRANSPOSE, BLIS_NO_CONJUGATE, &BLIS_MINUS_ONE, &a01, &x1_pack, &BLIS_ONE, &x0, cntx, bli_cntl_sub_gemv_cp( cntl ) ); // Copy/unpack x1 (if x1 was packed). bli_unpackv_int( &x1_pack, &x1, cntx, bli_cntl_sub_unpackv_x1( cntl ) ); } // If any packing buffers were acquired within packm, release them back // to the memory manager. bli_packm_release( &a11_pack, bli_cntl_sub_packm_a11( cntl ) ); bli_packv_release( &x1_pack, bli_cntl_sub_packv_x1( cntl ) ); } blis-0.6.1/frame/3/000077500000000000000000000000001360743507500137155ustar00rootroot00000000000000blis-0.6.1/frame/3/bli_l3.h000066400000000000000000000064101360743507500152330ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "bli_l3_cntl.h" #include "bli_l3_check.h" // Define function types. #include "bli_l3_ft_ex.h" #include "bli_l3_ft_ukr.h" #include "bli_l3_oft.h" #include "bli_l3_oft_var.h" #include "bli_l3_blocksize.h" #include "bli_l3_direct.h" #include "bli_l3_prune.h" #include "bli_l3_packm.h" // Prototype object APIs (expert and non-expert). #include "bli_oapi_ex.h" #include "bli_l3_oapi.h" #include "bli_oapi_ba.h" #include "bli_l3_oapi.h" // Prototype typed APIs (expert and non-expert). #include "bli_tapi_ex.h" #include "bli_l3_tapi.h" #include "bli_tapi_ba.h" #include "bli_l3_tapi.h" // Define function types for small/unpacked handlers/kernels. #include "bli_l3_sup_oft.h" #include "bli_l3_sup_ft_ker.h" // Define static edge case logic for use in small/unpacked kernels. //#include "bli_l3_sup_edge.h" // Prototype object API to small/unpacked matrix dispatcher. #include "bli_l3_sup.h" // Prototype reference implementation of small/unpacked matrix handler. #include "bli_l3_sup_ref.h" #include "bli_l3_sup_int.h" #include "bli_l3_sup_vars.h" #include "bli_l3_sup_packm_a.h" #include "bli_l3_sup_packm_b.h" #include "bli_l3_sup_packm_var.h" // Prototype microkernel wrapper APIs. #include "bli_l3_ukr_oapi.h" #include "bli_l3_ukr_tapi.h" // Generate function pointer arrays for tapi microkernel functions. #include "bli_l3_ukr_fpa.h" // Operation-specific headers. #include "bli_gemm.h" #include "bli_hemm.h" #include "bli_herk.h" #include "bli_her2k.h" #include "bli_symm.h" #include "bli_syrk.h" #include "bli_syr2k.h" #include "bli_trmm.h" #include "bli_trmm3.h" #include "bli_trsm.h" blis-0.6.1/frame/3/bli_l3_blocksize.c000066400000000000000000000246061360743507500173020ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" dim_t bli_l3_determine_kc ( dir_t direct, dim_t i, dim_t dim, obj_t* a, obj_t* b, bszid_t bszid, cntx_t* cntx, cntl_t* cntl ) { opid_t family = bli_cntl_family( cntl ); if ( family == BLIS_GEMM ) return bli_gemm_determine_kc( direct, i, dim, a, b, bszid, cntx ); else if ( family == BLIS_HERK ) return bli_herk_determine_kc( direct, i, dim, a, b, bszid, cntx ); else if ( family == BLIS_TRMM ) return bli_trmm_determine_kc( direct, i, dim, a, b, bszid, cntx ); else if ( family == BLIS_TRSM ) return bli_trsm_determine_kc( direct, i, dim, a, b, bszid, cntx ); // This should never execute. return bli_gemm_determine_kc( direct, i, dim, a, b, bszid, cntx ); } // ----------------------------------------------------------------------------- // // NOTE: We call a gemm/hemm/symm, trmm, or trsm-specific blocksize // function to determine the kc blocksize so that we can implement the // "nudging" of kc to be a multiple of mr or nr, as needed. // #undef GENFRONT #define GENFRONT( opname, l3op ) \ \ dim_t PASTEMAC0(opname) \ ( \ dir_t direct, \ dim_t i, \ dim_t dim, \ obj_t* a, \ obj_t* b, \ bszid_t bszid, \ cntx_t* cntx \ ) \ { \ if ( direct == BLIS_FWD ) \ return PASTEMAC(l3op,_determine_kc_f)( i, dim, a, b, bszid, cntx ); \ else \ return PASTEMAC(l3op,_determine_kc_b)( i, dim, a, b, bszid, cntx ); \ } GENFRONT( gemm_determine_kc, gemm ) GENFRONT( herk_determine_kc, trmm ) GENFRONT( trmm_determine_kc, trmm ) GENFRONT( trsm_determine_kc, trsm ) // ----------------------------------------------------------------------------- #undef GENFRONT #define GENFRONT( opname, chdir ) \ \ dim_t PASTEMAC0(opname) \ ( \ dim_t i, \ dim_t dim, \ obj_t* a, \ obj_t* b, \ bszid_t bszid, \ cntx_t* cntx \ ) \ { \ num_t dt; \ blksz_t* bsize; \ dim_t mnr; \ dim_t b_alg, b_max; \ dim_t b_use; \ \ /* bli_*_determine_kc_f(): We assume that this function is being called from an algorithm that is moving "forward" (ie: top to bottom, left to right, top-left to bottom-right). */ \ \ /* bli_*_determine_kc_b(): We assume that this function is being called from an algorithm that is moving "backward" (ie: bottom to top, right to left, bottom-right to top-left). */ \ \ /* Extract the execution datatype and use it to query the corresponding blocksize and blocksize maximum values from the blksz_t object. */ \ dt = bli_obj_exec_dt( a ); \ bsize = bli_cntx_get_blksz( bszid, cntx ); \ b_alg = bli_blksz_get_def( dt, bsize ); \ b_max = bli_blksz_get_max( dt, bsize ); \ \ /* Nudge the default and maximum kc blocksizes up to the nearest multiple of MR if A is Hermitian or symmetric, or NR if B is Hermitian or symmetric. If neither case applies, then we leave the blocksizes unchanged. */ \ if ( bli_obj_root_is_herm_or_symm( a ) ) \ { \ mnr = bli_cntx_get_blksz_def_dt( dt, BLIS_MR, cntx ); \ b_alg = bli_align_dim_to_mult( b_alg, mnr ); \ b_max = bli_align_dim_to_mult( b_max, mnr ); \ } \ else if ( bli_obj_root_is_herm_or_symm( b ) ) \ { \ mnr = bli_cntx_get_blksz_def_dt( dt, BLIS_NR, cntx ); \ b_alg = bli_align_dim_to_mult( b_alg, mnr ); \ b_max = bli_align_dim_to_mult( b_max, mnr ); \ } \ \ /* Call the bli_determine_blocksize_[fb]_sub() helper routine defined in bli_blksz.c */ \ b_use = PASTEMAC2(determine_blocksize_,chdir,_sub)( i, dim, b_alg, b_max ); \ \ return b_use; \ } GENFRONT( gemm_determine_kc_f, f ) GENFRONT( gemm_determine_kc_b, b ) // ----------------------------------------------------------------------------- #undef GENFRONT #define GENFRONT( opname, chdir ) \ \ dim_t PASTEMAC0(opname) \ ( \ dim_t i, \ dim_t dim, \ obj_t* a, \ obj_t* b, \ bszid_t bszid, \ cntx_t* cntx \ ) \ { \ num_t dt; \ blksz_t* bsize; \ dim_t b_alg, b_max; \ dim_t b_use; \ \ /* bli_*_determine_kc_f(): We assume that this function is being called from an algorithm that is moving "forward" (ie: top to bottom, left to right, top-left to bottom-right). */ \ \ /* bli_*_determine_kc_b(): We assume that this function is being called from an algorithm that is moving "backward" (ie: bottom to top, right to left, bottom-right to top-left). */ \ \ /* Extract the execution datatype and use it to query the corresponding blocksize and blocksize maximum values from the blksz_t object. */ \ dt = bli_obj_exec_dt( a ); \ bsize = bli_cntx_get_blksz( bszid, cntx ); \ b_alg = bli_blksz_get_def( dt, bsize ); \ b_max = bli_blksz_get_max( dt, bsize ); \ \ /* Notice that for herk, we do not need to perform any special handling for the default and maximum kc blocksizes vis-a-vis MR or NR. */ \ \ /* Call the bli_determine_blocksize_[fb]_sub() helper routine defined in bli_blksz.c */ \ b_use = PASTEMAC2(determine_blocksize_,chdir,_sub)( i, dim, b_alg, b_max ); \ \ return b_use; \ } GENFRONT( herk_determine_kc_f, f ) GENFRONT( herk_determine_kc_b, b ) // ----------------------------------------------------------------------------- #undef GENFRONT #define GENFRONT( opname, chdir ) \ \ dim_t PASTEMAC0(opname) \ ( \ dim_t i, \ dim_t dim, \ obj_t* a, \ obj_t* b, \ bszid_t bszid, \ cntx_t* cntx \ ) \ { \ num_t dt; \ blksz_t* bsize; \ dim_t mnr; \ dim_t b_alg, b_max; \ dim_t b_use; \ \ /* bli_*_determine_kc_f(): We assume that this function is being called from an algorithm that is moving "forward" (ie: top to bottom, left to right, top-left to bottom-right). */ \ \ /* bli_*_determine_kc_b(): We assume that this function is being called from an algorithm that is moving "backward" (ie: bottom to top, right to left, bottom-right to top-left). */ \ \ /* Extract the execution datatype and use it to query the corresponding blocksize and blocksize maximum values from the blksz_t object. */ \ dt = bli_obj_exec_dt( a ); \ bsize = bli_cntx_get_blksz( bszid, cntx ); \ b_alg = bli_blksz_get_def( dt, bsize ); \ b_max = bli_blksz_get_max( dt, bsize ); \ \ /* Nudge the default and maximum kc blocksizes up to the nearest multiple of MR if the triangular matrix is on the left, or NR if the triangular matrix is one the right. */ \ if ( bli_obj_root_is_triangular( a ) ) \ mnr = bli_cntx_get_blksz_def_dt( dt, BLIS_MR, cntx ); \ else \ mnr = bli_cntx_get_blksz_def_dt( dt, BLIS_NR, cntx ); \ \ b_alg = bli_align_dim_to_mult( b_alg, mnr ); \ b_max = bli_align_dim_to_mult( b_max, mnr ); \ \ /* Call the bli_determine_blocksize_[fb]_sub() helper routine defined in bli_blksz.c */ \ b_use = PASTEMAC2(determine_blocksize_,chdir,_sub)( i, dim, b_alg, b_max ); \ \ return b_use; \ } GENFRONT( trmm_determine_kc_f, f ) GENFRONT( trmm_determine_kc_b, b ) // ----------------------------------------------------------------------------- #undef GENFRONT #define GENFRONT( opname, chdir ) \ \ dim_t PASTEMAC0(opname) \ ( \ dim_t i, \ dim_t dim, \ obj_t* a, \ obj_t* b, \ bszid_t bszid, \ cntx_t* cntx \ ) \ { \ num_t dt; \ blksz_t* bsize; \ dim_t mnr; \ dim_t b_alg, b_max; \ dim_t b_use; \ \ /* bli_*_determine_kc_f(): We assume that this function is being called from an algorithm that is moving "forward" (ie: top to bottom, left to right, top-left to bottom-right). */ \ \ /* bli_*_determine_kc_b(): We assume that this function is being called from an algorithm that is moving "backward" (ie: bottom to top, right to left, bottom-right to top-left). */ \ \ /* Extract the execution datatype and use it to query the corresponding blocksize and blocksize maximum values from the blksz_t object. */ \ dt = bli_obj_exec_dt( a ); \ bsize = bli_cntx_get_blksz( bszid, cntx ); \ b_alg = bli_blksz_get_def( dt, bsize ); \ b_max = bli_blksz_get_max( dt, bsize ); \ \ /* Nudge the default and maximum kc blocksizes up to the nearest multiple of MR. We always use MR (rather than sometimes using NR) because even when the triangle is on the right, packing of that matrix uses MR, since only left-side trsm micro-kernels are supported. */ \ mnr = bli_cntx_get_blksz_def_dt( dt, BLIS_MR, cntx ); \ b_alg = bli_align_dim_to_mult( b_alg, mnr ); \ b_max = bli_align_dim_to_mult( b_max, mnr ); \ \ /* Call the bli_determine_blocksize_[fb]_sub() helper routine defined in bli_blksz.c */ \ b_use = PASTEMAC2(determine_blocksize_,chdir,_sub)( i, dim, b_alg, b_max ); \ \ return b_use; \ } GENFRONT( trsm_determine_kc_f, f ) GENFRONT( trsm_determine_kc_b, b ) blis-0.6.1/frame/3/bli_l3_blocksize.h000066400000000000000000000052751360743507500173100ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ dim_t bli_l3_determine_kc ( dir_t direct, dim_t i, dim_t dim, obj_t* a, obj_t* b, bszid_t bszid, cntx_t* cntx, cntl_t* cntl ); #undef GENPROT #define GENPROT( opname ) \ \ dim_t PASTEMAC0(opname) \ ( \ dir_t direct, \ dim_t i, \ dim_t dim, \ obj_t* a, \ obj_t* b, \ bszid_t bszid, \ cntx_t* cntx \ ); GENPROT( gemm_determine_kc ) GENPROT( herk_determine_kc ) GENPROT( trmm_determine_kc ) GENPROT( trsm_determine_kc ) #undef GENPROT #define GENPROT( opname ) \ \ dim_t PASTEMAC0(opname) \ ( \ dim_t i, \ dim_t dim, \ obj_t* a, \ obj_t* b, \ bszid_t bszid, \ cntx_t* cntx \ ); GENPROT( gemm_determine_kc_f ) GENPROT( gemm_determine_kc_b ) GENPROT( herk_determine_kc_f ) GENPROT( herk_determine_kc_b ) GENPROT( trmm_determine_kc_f ) GENPROT( trmm_determine_kc_b ) GENPROT( trsm_determine_kc_f ) GENPROT( trsm_determine_kc_b ) blis-0.6.1/frame/3/bli_l3_check.c000066400000000000000000000301501360743507500163610ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_gemm_check ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx ) { //err_t e_val; // Check basic properties of the operation. bli_gemm_basic_check( alpha, a, b, beta, c, cntx ); // Check object structure. // NOTE: Can't perform these checks as long as bli_gemm_check() is called // from bli_gemm_int(), which is in the execution path for structured // level-3 operations such as hemm. //e_val = bli_check_general_object( a ); //bli_check_error_code( e_val ); //e_val = bli_check_general_object( b ); //bli_check_error_code( e_val ); } void bli_hemm_check ( side_t side, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx ) { err_t e_val; // Perform checks common to hemm/symm. bli_hemm_basic_check( side, alpha, a, b, beta, c, cntx ); // Check object structure. e_val = bli_check_hermitian_object( a ); bli_check_error_code( e_val ); } void bli_herk_check ( obj_t* alpha, obj_t* a, obj_t* beta, obj_t* c, cntx_t* cntx ) { err_t e_val; obj_t ah; // Alias A to A^H so we can perform dimension checks. bli_obj_alias_with_trans( BLIS_CONJ_TRANSPOSE, a, &ah ); // Check basic properties of the operation. bli_herk_basic_check( alpha, a, &ah, beta, c, cntx ); // Check for real-valued alpha and beta. e_val = bli_check_real_valued_object( alpha ); bli_check_error_code( e_val ); e_val = bli_check_real_valued_object( beta ); bli_check_error_code( e_val ); // Check matrix structure. e_val = bli_check_hermitian_object( c ); bli_check_error_code( e_val ); } void bli_her2k_check ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx ) { err_t e_val; obj_t ah, bh; // Alias A and B to A^H and B^H so we can perform dimension checks. bli_obj_alias_with_trans( BLIS_CONJ_TRANSPOSE, a, &ah ); bli_obj_alias_with_trans( BLIS_CONJ_TRANSPOSE, b, &bh ); // Check basic properties of the operation. bli_her2k_basic_check( alpha, a, &bh, b, &ah, beta, c, cntx ); // Check for real-valued beta. e_val = bli_check_real_valued_object( beta ); bli_check_error_code( e_val ); // Check matrix structure. e_val = bli_check_hermitian_object( c ); bli_check_error_code( e_val ); } void bli_symm_check ( side_t side, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx ) { err_t e_val; // Check basic properties of the operation. bli_hemm_basic_check( side, alpha, a, b, beta, c, cntx ); // Check object structure. e_val = bli_check_symmetric_object( a ); bli_check_error_code( e_val ); } void bli_syrk_check ( obj_t* alpha, obj_t* a, obj_t* beta, obj_t* c, cntx_t* cntx ) { err_t e_val; obj_t at; // Alias A to A^T so we can perform dimension checks. bli_obj_alias_with_trans( BLIS_TRANSPOSE, a, &at ); // Check basic properties of the operation. bli_herk_basic_check( alpha, a, &at, beta, c, cntx ); // Check matrix structure. e_val = bli_check_symmetric_object( c ); bli_check_error_code( e_val ); } void bli_syr2k_check ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx ) { err_t e_val; obj_t at, bt; // Alias A and B to A^T and B^T so we can perform dimension checks. bli_obj_alias_with_trans( BLIS_TRANSPOSE, a, &at ); bli_obj_alias_with_trans( BLIS_TRANSPOSE, b, &bt ); // Check basic properties of the operation. bli_her2k_basic_check( alpha, a, &bt, b, &at, beta, c, cntx ); // Check matrix structure. e_val = bli_check_symmetric_object( c ); bli_check_error_code( e_val ); } void bli_trmm_check ( side_t side, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx ) { err_t e_val; // Perform checks common to hemm/symm. bli_hemm_basic_check( side, alpha, a, b, beta, c, cntx ); // Check object structure. e_val = bli_check_triangular_object( a ); bli_check_error_code( e_val ); } void bli_trsm_check ( side_t side, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx ) { err_t e_val; // Perform checks common to hemm/symm. bli_hemm_basic_check( side, alpha, a, b, beta, c, cntx ); // Check object structure. e_val = bli_check_triangular_object( a ); bli_check_error_code( e_val ); } // ----------------------------------------------------------------------------- void bli_gemm_basic_check ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx ) { err_t e_val; // Perform standard checks. bli_l3_basic_check( alpha, a, b, beta, c, cntx ); // Check object dimensions. e_val = bli_check_level3_dims( a, b, c ); bli_check_error_code( e_val ); #ifdef BLIS_ENABLE_GEMM_MD // Skip checking for consistent datatypes between A, B, and C since // that is totally valid for mixed-datatype gemm. // When mixing datatypes, make sure that alpha does not have a non-zero // imaginary component. if ( bli_obj_dt( c ) != bli_obj_dt( a ) || bli_obj_dt( c ) != bli_obj_dt( b ) || bli_obj_comp_prec( c ) != bli_obj_prec( c ) ) if ( !bli_obj_imag_is_zero( alpha ) ) { bli_print_msg( "Mixed-datatype gemm does not yet support alpha with a non-zero imaginary component. Please contact BLIS developers for further support.", __FILE__, __LINE__ ); bli_abort(); } #else // BLIS_DISABLE_GEMM_MD // Check for consistent datatypes. // NOTE: We only perform these tests when mixed datatype support is // disabled. e_val = bli_check_consistent_object_datatypes( c, a ); bli_check_error_code( e_val ); e_val = bli_check_consistent_object_datatypes( c, b ); bli_check_error_code( e_val ); #endif } void bli_hemm_basic_check ( side_t side, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx ) { err_t e_val; // Perform standard checks. bli_l3_basic_check( alpha, a, b, beta, c, cntx ); // Check object dimensions. if ( bli_is_left( side ) ) { e_val = bli_check_level3_dims( a, b, c ); bli_check_error_code( e_val ); } else // if ( bli_is_right( side ) ) { e_val = bli_check_level3_dims( b, a, c ); bli_check_error_code( e_val ); } // Check matrix squareness. e_val = bli_check_square_object( a ); bli_check_error_code( e_val ); // Check for consistent datatypes. e_val = bli_check_consistent_object_datatypes( c, a ); bli_check_error_code( e_val ); e_val = bli_check_consistent_object_datatypes( c, b ); bli_check_error_code( e_val ); } void bli_herk_basic_check ( obj_t* alpha, obj_t* a, obj_t* ah, obj_t* beta, obj_t* c, cntx_t* cntx ) { err_t e_val; // Perform standard checks. bli_l3_basic_check( alpha, a, ah, beta, c, cntx ); // Check object dimensions. e_val = bli_check_level3_dims( a, ah, c ); bli_check_error_code( e_val ); // Check matrix squareness. e_val = bli_check_square_object( c ); bli_check_error_code( e_val ); // Check matrix structure. e_val = bli_check_general_object( a ); bli_check_error_code( e_val ); e_val = bli_check_general_object( ah ); bli_check_error_code( e_val ); // Check for consistent datatypes. e_val = bli_check_consistent_object_datatypes( c, a ); bli_check_error_code( e_val ); e_val = bli_check_consistent_object_datatypes( c, ah ); bli_check_error_code( e_val ); } void bli_her2k_basic_check ( obj_t* alpha, obj_t* a, obj_t* bh, obj_t* b, obj_t* ah, obj_t* beta, obj_t* c, cntx_t* cntx ) { err_t e_val; // Perform standard checks. bli_l3_basic_check( alpha, a, bh, beta, c, cntx ); bli_l3_basic_check( alpha, b, ah, beta, c, cntx ); // Check object dimensions. e_val = bli_check_level3_dims( a, bh, c ); bli_check_error_code( e_val ); e_val = bli_check_level3_dims( b, ah, c ); bli_check_error_code( e_val ); // Check matrix squareness. e_val = bli_check_square_object( c ); bli_check_error_code( e_val ); // Check matrix structure. e_val = bli_check_general_object( a ); bli_check_error_code( e_val ); e_val = bli_check_general_object( bh ); bli_check_error_code( e_val ); e_val = bli_check_general_object( b ); bli_check_error_code( e_val ); e_val = bli_check_general_object( ah ); bli_check_error_code( e_val ); // Check for consistent datatypes. e_val = bli_check_consistent_object_datatypes( c, a ); bli_check_error_code( e_val ); e_val = bli_check_consistent_object_datatypes( c, ah ); bli_check_error_code( e_val ); e_val = bli_check_consistent_object_datatypes( c, b ); bli_check_error_code( e_val ); e_val = bli_check_consistent_object_datatypes( c, bh ); bli_check_error_code( e_val ); } void bli_l3_basic_check ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx ) { err_t e_val; // Check object datatypes. e_val = bli_check_noninteger_object( alpha ); bli_check_error_code( e_val ); e_val = bli_check_noninteger_object( beta ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( a ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( b ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( c ); bli_check_error_code( e_val ); // Check object dimensions. e_val = bli_check_scalar_object( alpha ); bli_check_error_code( e_val ); e_val = bli_check_scalar_object( beta ); bli_check_error_code( e_val ); e_val = bli_check_matrix_object( a ); bli_check_error_code( e_val ); e_val = bli_check_matrix_object( b ); bli_check_error_code( e_val ); e_val = bli_check_matrix_object( c ); bli_check_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( alpha ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( a ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( b ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( beta ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( c ); bli_check_error_code( e_val ); // Check for sufficiently sized stack buffers e_val = bli_check_sufficient_stack_buf_size( bli_obj_dt( a ), cntx ); bli_check_error_code( e_val ); } blis-0.6.1/frame/3/bli_l3_check.h000066400000000000000000000066361360743507500164020ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype object-based check functions. // #undef GENPROT #define GENPROT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* alpha, \ obj_t* a, \ obj_t* b, \ obj_t* beta, \ obj_t* c, \ cntx_t* cntx \ ); GENPROT( gemm ) GENPROT( her2k ) GENPROT( syr2k ) #undef GENPROT #define GENPROT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ side_t side, \ obj_t* alpha, \ obj_t* a, \ obj_t* b, \ obj_t* beta, \ obj_t* c, \ cntx_t* cntx \ ); GENPROT( hemm ) GENPROT( symm ) GENPROT( trmm ) GENPROT( trsm ) #undef GENPROT #define GENPROT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* alpha, \ obj_t* a, \ obj_t* beta, \ obj_t* c, \ cntx_t* cntx \ ); GENPROT( herk ) GENPROT( syrk ) // ----------------------------------------------------------------------------- void bli_gemm_basic_check ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx ); void bli_hemm_basic_check ( side_t side, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx ); void bli_herk_basic_check ( obj_t* alpha, obj_t* a, obj_t* ah, obj_t* beta, obj_t* c, cntx_t* cntx ); void bli_her2k_basic_check ( obj_t* alpha, obj_t* a, obj_t* bh, obj_t* b, obj_t* ah, obj_t* beta, obj_t* c, cntx_t* cntx ); void bli_l3_basic_check ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx ); blis-0.6.1/frame/3/bli_l3_cntl.c000066400000000000000000000067551360743507500162620ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_l3_cntl_create_if ( opid_t family, pack_t schema_a, pack_t schema_b, obj_t* a, obj_t* b, obj_t* c, rntm_t* rntm, cntl_t* cntl_orig, cntl_t** cntl_use ) { // If the control tree pointer is NULL, we construct a default // tree as a function of the operation family. if ( cntl_orig == NULL ) { if ( family == BLIS_GEMM || family == BLIS_HERK || family == BLIS_TRMM ) { *cntl_use = bli_gemm_cntl_create( rntm, family, schema_a, schema_b ); } else // if ( family == BLIS_TRSM ) { side_t side; if ( bli_obj_is_triangular( a ) ) side = BLIS_LEFT; else side = BLIS_RIGHT; *cntl_use = bli_trsm_cntl_create( rntm, side, schema_a, schema_b ); } } else { // If the user provided a control tree, create a copy and use it // instead (so that threads can use its local tree as a place to // cache things like pack mem_t entries). *cntl_use = bli_cntl_copy( rntm, cntl_orig ); // Recursively set the family fields of the newly copied control tree // nodes. bli_cntl_mark_family( family, *cntl_use ); } } void bli_l3_cntl_free ( rntm_t* rntm, cntl_t* cntl_use, thrinfo_t* thread ) { // NOTE: We don't actually need to call separate _cntl_free() functions // for gemm and trsm; it is merely an unnecessary mirroring of behavior // from the _create() side (which must call different functions based // on the family). opid_t family = bli_cntl_family( cntl_use ); if ( family == BLIS_GEMM || family == BLIS_HERK || family == BLIS_TRMM ) { bli_gemm_cntl_free( rntm, cntl_use, thread ); } else // if ( family == BLIS_TRSM ) { bli_trsm_cntl_free( rntm, cntl_use, thread ); } } blis-0.6.1/frame/3/bli_l3_cntl.h000066400000000000000000000041631360743507500162560ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype conditional control tree creation functions. // void bli_l3_cntl_create_if ( opid_t family, pack_t schema_a, pack_t schema_b, obj_t* a, obj_t* b, obj_t* c, rntm_t* rntm, cntl_t* cntl_orig, cntl_t** cntl_use ); void bli_l3_cntl_free ( rntm_t* rntm, cntl_t* cntl_use, thrinfo_t* thread ); blis-0.6.1/frame/3/bli_l3_direct.c000066400000000000000000000074201360743507500165620ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" dir_t bli_l3_direct ( obj_t* a, obj_t* b, obj_t* c, cntl_t* cntl ) { // Query the operation family. opid_t family = bli_cntl_family( cntl ); if ( family == BLIS_GEMM ) return bli_gemm_direct( a, b, c ); else if ( family == BLIS_HERK ) return bli_herk_direct( a, b, c ); else if ( family == BLIS_TRMM ) return bli_trmm_direct( a, b, c ); else if ( family == BLIS_TRSM ) return bli_trsm_direct( a, b, c ); // This should never execute. return BLIS_FWD; } // ----------------------------------------------------------------------------- dir_t bli_gemm_direct ( obj_t* a, obj_t* b, obj_t* c ) { // For gemm, movement may be forwards (or backwards). return BLIS_FWD; } dir_t bli_herk_direct ( obj_t* a, obj_t* b, obj_t* c ) { // For herk, movement may be forwards (or backwards). return BLIS_FWD; } dir_t bli_trmm_direct ( obj_t* a, obj_t* b, obj_t* c ) { dir_t direct; // For trmm, movement for the parameter cases is as follows: // - left,lower: backwards // - left,upper: forwards // - right,lower: forwards // - right,upper: backwards if ( bli_obj_root_is_triangular( a ) ) { if ( bli_obj_root_is_lower( a ) ) direct = BLIS_BWD; else direct = BLIS_FWD; } else // if ( bli_obj_root_is_triangular( b ) ) { if ( bli_obj_root_is_lower( b ) ) direct = BLIS_FWD; else direct = BLIS_BWD; } return direct; } dir_t bli_trsm_direct ( obj_t* a, obj_t* b, obj_t* c ) { dir_t direct; // For trsm, movement for the parameter cases is as follows: // - left,lower: forwards // - left,upper: backwards // - right,lower: backwards // - right,upper: forwards if ( bli_obj_root_is_triangular( a ) ) { if ( bli_obj_root_is_lower( a ) ) direct = BLIS_FWD; else direct = BLIS_BWD; } else // if ( bli_obj_root_is_triangular( b ) ) { if ( bli_obj_root_is_lower( b ) ) direct = BLIS_BWD; else direct = BLIS_FWD; } return direct; } blis-0.6.1/frame/3/bli_l3_direct.h000066400000000000000000000041041360743507500165630ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ dir_t bli_l3_direct ( obj_t* a, obj_t* b, obj_t* c, cntl_t* cntl ); // ----------------------------------------------------------------------------- #undef GENPROT #define GENPROT( opname ) \ \ dir_t PASTEMAC0(opname) \ ( \ obj_t* a, \ obj_t* b, \ obj_t* c \ ); GENPROT( gemm_direct ) GENPROT( herk_direct ) GENPROT( trmm_direct ) GENPROT( trsm_direct ) blis-0.6.1/frame/3/bli_l3_ft_ex.h000066400000000000000000000134701360743507500164240ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_L3_FT_EX_H #define BLIS_L3_FT_EX_H // // -- Level-3 expert function types -------------------------------------------- // // gemm #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,BLIS_TAPI_EX_SUF,tsuf)) \ ( \ trans_t transa, \ trans_t transb, \ dim_t m, \ dim_t n, \ dim_t k, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* b, inc_t rs_b, inc_t cs_b, \ ctype* beta, \ ctype* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm \ ); INSERT_GENTDEF( gemm ) // hemm, symm #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,BLIS_TAPI_EX_SUF,tsuf)) \ ( \ side_t side, \ uplo_t uploa, \ conj_t conja, \ trans_t transb, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* b, inc_t rs_b, inc_t cs_b, \ ctype* beta, \ ctype* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm \ ); INSERT_GENTDEF( hemm ) INSERT_GENTDEF( symm ) // herk #undef GENTDEFR #define GENTDEFR( ctype, ctype_r, ch, chr, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,BLIS_TAPI_EX_SUF,tsuf)) \ ( \ uplo_t uploc, \ trans_t transa, \ dim_t m, \ dim_t k, \ ctype_r* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype_r* beta, \ ctype* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm \ ); INSERT_GENTDEFR( herk ) // her2k #undef GENTDEFR #define GENTDEFR( ctype, ctype_r, ch, chr, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,BLIS_TAPI_EX_SUF,tsuf)) \ ( \ uplo_t uploc, \ trans_t transa, \ trans_t transb, \ dim_t m, \ dim_t k, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* b, inc_t rs_b, inc_t cs_b, \ ctype_r* beta, \ ctype* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm \ ); INSERT_GENTDEFR( her2k ) // syrk #undef GENTDEFR #define GENTDEFR( ctype, ctype_r, ch, chr, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,BLIS_TAPI_EX_SUF,tsuf)) \ ( \ uplo_t uploc, \ trans_t transa, \ dim_t m, \ dim_t k, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* beta, \ ctype* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm \ ); INSERT_GENTDEFR( syrk ) // syr2k #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,BLIS_TAPI_EX_SUF,tsuf)) \ ( \ uplo_t uploc, \ trans_t transa, \ trans_t transb, \ dim_t m, \ dim_t k, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* b, inc_t rs_b, inc_t cs_b, \ ctype* beta, \ ctype* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm \ ); INSERT_GENTDEF( syr2k ) // trmm3 #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,BLIS_TAPI_EX_SUF,tsuf)) \ ( \ side_t side, \ uplo_t uploa, \ trans_t transa, \ diag_t diaga, \ trans_t transb, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* b, inc_t rs_b, inc_t cs_b, \ ctype* beta, \ ctype* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm \ ); INSERT_GENTDEF( trmm3 ) // trmm #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,BLIS_TAPI_EX_SUF,tsuf)) \ ( \ side_t side, \ uplo_t uploa, \ trans_t transa, \ diag_t diaga, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* b, inc_t rs_b, inc_t cs_b, \ cntx_t* cntx, \ rntm_t* rntm \ ); INSERT_GENTDEF( trmm ) INSERT_GENTDEF( trsm ) #endif blis-0.6.1/frame/3/bli_l3_ft_ukr.h000066400000000000000000000061071360743507500166100ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_L3_FT_UKR_H #define BLIS_L3_FT_UKR_H // // -- Level-3 micro-kernel function types -------------------------------------- // // gemm #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,_ukr,tsuf)) \ ( \ dim_t k, \ ctype* restrict alpha, \ ctype* restrict a, \ ctype* restrict b, \ ctype* restrict beta, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict data, \ cntx_t* restrict cntx \ ); INSERT_GENTDEF( gemm ) // gemmtrsm_[lu] #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,_ukr,tsuf)) \ ( \ dim_t k, \ ctype* restrict alpha, \ ctype* restrict a1x, \ ctype* restrict a11, \ ctype* restrict bx1, \ ctype* restrict b11, \ ctype* restrict c11, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict data, \ cntx_t* restrict cntx \ ); INSERT_GENTDEF( gemmtrsm ) // trsm_[lu] #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,_ukr,tsuf)) \ ( \ ctype* restrict a, \ ctype* restrict b, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict data, \ cntx_t* restrict cntx \ ); INSERT_GENTDEF( trsm ) #endif blis-0.6.1/frame/3/bli_l3_oapi.c000066400000000000000000000213621360743507500162410ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // Guard the function definitions so that they are only compiled when // #included from files that define the object API macros. #ifdef BLIS_ENABLE_OAPI // // Define object-based interfaces. // #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* alpha, \ obj_t* a, \ obj_t* b, \ obj_t* beta, \ obj_t* c \ BLIS_OAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_OAPI_EX_DECLS \ \ /* If the rntm is non-NULL, it may indicate that we should forgo sup handling altogether. */ \ bool_t enable_sup = TRUE; \ if ( rntm != NULL ) enable_sup = bli_rntm_l3_sup( rntm ); \ \ if ( enable_sup ) \ { \ /* Execute the small/unpacked oapi handler. If it finds that the problem does not fall within the thresholds that define "small", or for some other reason decides not to use the small/unpacked implementation, the function returns with BLIS_FAILURE, which causes execution to proceed towards the conventional implementation. */ \ err_t result = PASTEMAC(opname,sup)( alpha, a, b, beta, c, cntx, rntm ); \ if ( result == BLIS_SUCCESS ) return; \ } \ \ /* Only proceed with an induced method if each of the operands have a complex storage datatype. NOTE: Allowing precisions to vary while using 1m, which is what we do here, is unique to gemm; other level-3 operations use 1m only if all storage datatypes are equal (and they ignore the computation precision). If any operands are real, skip the induced method chooser function and proceed directly with native execution. */ \ if ( bli_obj_is_complex( c ) && \ bli_obj_is_complex( a ) && \ bli_obj_is_complex( b ) ) \ { \ /* Invoke the operation's "ind" function--its induced method front-end. For complex problems, it calls the highest priority induced method that is available (ie: implemented and enabled), and if none are enabled, it calls native execution. (For real problems, it calls the operation's native execution interface.) */ \ PASTEMAC(opname,ind)( alpha, a, b, beta, c, cntx, rntm ); \ } \ else \ { \ PASTEMAC(opname,nat)( alpha, a, b, beta, c, cntx, rntm ); \ } \ } GENFRONT( gemm ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* alpha, \ obj_t* a, \ obj_t* b, \ obj_t* beta, \ obj_t* c \ BLIS_OAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_OAPI_EX_DECLS \ \ /* Only proceed with an induced method if each of the operands have a complex storage datatype. NOTE: Allowing precisions to vary while using 1m, which is what we do here, is unique to gemm; other level-3 operations use 1m only if all storage datatypes are equal (and they ignore the computation precision). If any operands are real, skip the induced method chooser function and proceed directly with native execution. */ \ if ( bli_obj_is_complex( c ) && \ bli_obj_is_complex( a ) && \ bli_obj_is_complex( b ) ) \ { \ /* Invoke the operation's "ind" function--its induced method front-end. For complex problems, it calls the highest priority induced method that is available (ie: implemented and enabled), and if none are enabled, it calls native execution. (For real problems, it calls the operation's native execution interface.) */ \ PASTEMAC(opname,ind)( alpha, a, b, beta, c, cntx, rntm ); \ } \ else \ { \ PASTEMAC(opname,nat)( alpha, a, b, beta, c, cntx, rntm ); \ } \ } GENFRONT( her2k ) GENFRONT( syr2k ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,EX_SUF) \ ( \ side_t side, \ obj_t* alpha, \ obj_t* a, \ obj_t* b, \ obj_t* beta, \ obj_t* c \ BLIS_OAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_OAPI_EX_DECLS \ \ /* Only proceed with an induced method if all operands have the same (complex) datatype. If any datatypes differ, skip the induced method chooser function and proceed directly with native execution, which is where mixed datatype support will be implemented (if at all). */ \ if ( bli_obj_dt( a ) == bli_obj_dt( c ) && \ bli_obj_dt( b ) == bli_obj_dt( c ) && \ bli_obj_is_complex( c ) ) \ { \ /* Invoke the operation's "ind" function--its induced method front-end. For complex problems, it calls the highest priority induced method that is available (ie: implemented and enabled), and if none are enabled, it calls native execution. (For real problems, it calls the operation's native execution interface.) */ \ PASTEMAC(opname,ind)( side, alpha, a, b, beta, c, cntx, rntm ); \ } \ else \ { \ PASTEMAC(opname,nat)( side, alpha, a, b, beta, c, cntx, rntm ); \ } \ } GENFRONT( hemm ) GENFRONT( symm ) GENFRONT( trmm3 ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* alpha, \ obj_t* a, \ obj_t* beta, \ obj_t* c \ BLIS_OAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_OAPI_EX_DECLS \ \ /* Only proceed with an induced method if all operands have the same (complex) datatype. If any datatypes differ, skip the induced method chooser function and proceed directly with native execution, which is where mixed datatype support will be implemented (if at all). */ \ if ( bli_obj_dt( a ) == bli_obj_dt( c ) && \ bli_obj_is_complex( c ) ) \ { \ /* Invoke the operation's "ind" function--its induced method front-end. For complex problems, it calls the highest priority induced method that is available (ie: implemented and enabled), and if none are enabled, it calls native execution. (For real problems, it calls the operation's native execution interface.) */ \ PASTEMAC(opname,ind)( alpha, a, beta, c, cntx, rntm ); \ } \ else \ { \ PASTEMAC(opname,nat)( alpha, a, beta, c, cntx, rntm ); \ } \ } GENFRONT( herk ) GENFRONT( syrk ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,EX_SUF) \ ( \ side_t side, \ obj_t* alpha, \ obj_t* a, \ obj_t* b \ BLIS_OAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_OAPI_EX_DECLS \ \ /* Only proceed with an induced method if all operands have the same (complex) datatype. If any datatypes differ, skip the induced method chooser function and proceed directly with native execution, which is where mixed datatype support will be implemented (if at all). */ \ if ( bli_obj_dt( a ) == bli_obj_dt( b ) && \ bli_obj_is_complex( b ) ) \ { \ /* Invoke the operation's "ind" function--its induced method front-end. For complex problems, it calls the highest priority induced method that is available (ie: implemented and enabled), and if none are enabled, it calls native execution. (For real problems, it calls the operation's native execution interface.) */ \ PASTEMAC(opname,ind)( side, alpha, a, b, cntx, rntm ); \ } \ else \ { \ PASTEMAC(opname,nat)( side, alpha, a, b, cntx, rntm ); \ } \ } GENFRONT( trmm ) GENFRONT( trsm ) #endif blis-0.6.1/frame/3/bli_l3_oapi.h000066400000000000000000000054641360743507500162530ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype object-based interfaces. // #undef GENPROT #define GENPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* alpha, \ obj_t* a, \ obj_t* b, \ obj_t* beta, \ obj_t* c \ BLIS_OAPI_EX_PARAMS \ ); GENPROT( gemm ) GENPROT( her2k ) GENPROT( syr2k ) #undef GENPROT #define GENPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \ ( \ side_t side, \ obj_t* alpha, \ obj_t* a, \ obj_t* b, \ obj_t* beta, \ obj_t* c \ BLIS_OAPI_EX_PARAMS \ ); GENPROT( hemm ) GENPROT( symm ) GENPROT( trmm3 ) #undef GENPROT #define GENPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* alpha, \ obj_t* a, \ obj_t* beta, \ obj_t* c \ BLIS_OAPI_EX_PARAMS \ ); GENPROT( herk ) GENPROT( syrk ) #undef GENPROT #define GENPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \ ( \ side_t side, \ obj_t* alpha, \ obj_t* a, \ obj_t* b \ BLIS_OAPI_EX_PARAMS \ ); GENPROT( trmm ) GENPROT( trsm ) blis-0.6.1/frame/3/bli_l3_oapi_ba.c000066400000000000000000000036671360743507500167130ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // Include cpp macros that instantiate the API definition templates as // omitting expert parameters. #include "bli_oapi_ba.h" // Define the macro protecting the object API definitions. #define BLIS_ENABLE_OAPI // Include the object API definitions here. #include "bli_l3_oapi.c" blis-0.6.1/frame/3/bli_l3_oapi_ex.c000066400000000000000000000036651360743507500167430ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // Include cpp macros that instantiate the API definition templates as // having expert parameters. #include "bli_oapi_ex.h" // Define the macro protecting the object API definitions. #define BLIS_ENABLE_OAPI // Include the object API definitions here. #include "bli_l3_oapi.c" blis-0.6.1/frame/3/bli_l3_oft.h000066400000000000000000000055001360743507500161020ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_L3_OFT_H #define BLIS_L3_OFT_H // // -- Level-3 object function types -------------------------------------------- // // gemm #undef GENTDEF #define GENTDEF( opname ) \ \ typedef void (*PASTECH(opname,_oft)) \ ( \ obj_t* alpha, \ obj_t* a, \ obj_t* b, \ obj_t* beta, \ obj_t* c, \ cntx_t* cntx, \ rntm_t* rntm \ ); GENTDEF( gemm ) GENTDEF( her2k ) GENTDEF( syr2k ) // hemm, symm, trmm3 #undef GENTDEF #define GENTDEF( opname ) \ \ typedef void (*PASTECH(opname,_oft)) \ ( \ side_t side, \ obj_t* alpha, \ obj_t* a, \ obj_t* b, \ obj_t* beta, \ obj_t* c, \ cntx_t* cntx, \ rntm_t* rntm \ ); GENTDEF( hemm ) GENTDEF( symm ) GENTDEF( trmm3 ) // herk, syrk #undef GENTDEF #define GENTDEF( opname ) \ \ typedef void (*PASTECH(opname,_oft)) \ ( \ obj_t* alpha, \ obj_t* a, \ obj_t* beta, \ obj_t* c, \ cntx_t* cntx, \ rntm_t* rntm \ ); GENTDEF( herk ) GENTDEF( syrk ) // trmm, trsm #undef GENTDEF #define GENTDEF( opname ) \ \ typedef void (*PASTECH(opname,_oft)) \ ( \ side_t side, \ obj_t* alpha, \ obj_t* a, \ obj_t* b, \ cntx_t* cntx, \ rntm_t* rntm \ ); GENTDEF( trmm ) GENTDEF( trsm ) #endif blis-0.6.1/frame/3/bli_l3_oft_var.h000066400000000000000000000043721360743507500167600ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_L3_OFT_VAR_H #define BLIS_L3_OFT_VAR_H // // -- Level-3 variant function types ------------------------------------------- // #undef GENTDEF #define GENTDEF( opname ) \ \ typedef void (*PASTECH(opname,_var_oft)) \ ( \ obj_t* a, \ obj_t* b, \ obj_t* c, \ cntx_t* cntx, \ rntm_t* rntm, \ cntl_t* cntl, \ thrinfo_t* thread \ ); GENTDEF( gemm ) #undef GENTDEF #define GENTDEF( opname ) \ \ typedef void (*PASTECH(opname,_var_oft)) \ ( \ obj_t* a, \ obj_t* b, \ obj_t* c, \ cntx_t* cntx, \ rntm_t* rntm, \ cntl_t* cntl, \ thrinfo_t* thread \ ); GENTDEF( trsm ) #endif blis-0.6.1/frame/3/bli_l3_packm.c000066400000000000000000000132311360743507500164000ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_l3_packm ( obj_t* x, obj_t* x_pack, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { packbuf_t pack_buf_type; mem_t* cntl_mem_p; siz_t size_needed; // FGVZ: Not sure why we need this barrier, but we do. bli_thread_obarrier( thread ); // Every thread initializes x_pack and determines the size of memory // block needed (which gets embedded into the otherwise "blank" mem_t // entry in the control tree node). size_needed = bli_packm_init ( x, x_pack, cntx, cntl ); // If zero was returned, no memory needs to be allocated and so we can // return early. if ( size_needed == 0 ) return; // Query the pack buffer type from the control tree node. pack_buf_type = bli_cntl_packm_params_pack_buf_type( cntl ); // Query the address of the mem_t entry within the control tree node. cntl_mem_p = bli_cntl_pack_mem( cntl ); // Check the mem_t field in the control tree. If it is unallocated, then // we need to acquire a block from the memory broker and broadcast it to // all threads in the chief's thread group. if ( bli_mem_is_unalloc( cntl_mem_p ) ) { mem_t* local_mem_p; mem_t local_mem_s; if ( bli_thread_am_ochief( thread ) ) { #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_l3_packm(): acquiring mem pool block\n" ); #endif // The chief thread acquires a block from the memory broker // and saves the associated mem_t entry to local_mem_s. bli_membrk_acquire_m ( rntm, size_needed, pack_buf_type, &local_mem_s ); } // Broadcast the address of the chief thread's local mem_t entry to // all threads. local_mem_p = bli_thread_obroadcast( thread, &local_mem_s ); // Save the contents of the chief thread's local mem_t entry to the // mem_t field in this thread's control tree node. *cntl_mem_p = *local_mem_p; } else // ( bli_mem_is_alloc( cntl_mem_p ) ) { mem_t* local_mem_p; mem_t local_mem_s; // If the mem_t entry in the control tree does NOT contain a NULL // buffer, then a block has already been acquired from the memory // broker and cached in the control tree. // As a sanity check, we should make sure that the mem_t object isn't // associated with a block that is too small compared to the size of // the packed matrix buffer that is needed, according to the return // value from packm_init(). siz_t cntl_mem_size = bli_mem_size( cntl_mem_p ); if ( cntl_mem_size < size_needed ) { if ( bli_thread_am_ochief( thread ) ) { // The chief thread releases the existing block associated with // the mem_t entry in the control tree, and then re-acquires a // new block, saving the associated mem_t entry to local_mem_s. bli_membrk_release ( rntm, cntl_mem_p ); bli_membrk_acquire_m ( rntm, size_needed, pack_buf_type, &local_mem_s ); } // Broadcast the address of the chief thread's local mem_t entry to // all threads. local_mem_p = bli_thread_obroadcast( thread, &local_mem_s ); // Save the chief thread's local mem_t entry to the mem_t field in // this thread's control tree node. *cntl_mem_p = *local_mem_p; } else { // If the mem_t entry is already allocated and sufficiently large, // then we use it as-is. No action is needed, because all threads // will already have the cached values in their local control // trees' mem_t entries, currently pointed to by cntl_mem_p. bli_thread_obarrier( thread ); } } // Update the buffer address in x_pack to point to the buffer associated // with the mem_t entry acquired from the memory broker (now cached in // the control tree node). void* buf = bli_mem_buffer( cntl_mem_p ); bli_obj_set_buffer( buf, x_pack ); // Pack the contents of object x to object x_pack. bli_packm_int ( x, x_pack, cntx, cntl, thread ); // Barrier so that packing is done before computation. bli_thread_obarrier( thread ); } blis-0.6.1/frame/3/bli_l3_packm.h000066400000000000000000000035471360743507500164160ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_l3_packm ( obj_t* x, obj_t* x_pack, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ); blis-0.6.1/frame/3/bli_l3_prune.c000066400000000000000000000140121360743507500164340ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" /* void bli_l3_prune_unref_mparts_m ( obj_t* a, obj_t* b, obj_t* c, cntl_t* cntl ) { // Query the operation family. opid_t family = bli_cntl_family( cntl ); if ( family == BLIS_GEMM ) return; // No pruning is necessary for gemm. else if ( family == BLIS_HERK ) bli_herk_prune_unref_mparts_m( a, b, c ); else if ( family == BLIS_TRMM ) bli_trmm_prune_unref_mparts_m( a, b, c ); else if ( family == BLIS_TRSM ) bli_trsm_prune_unref_mparts_m( a, b, c ); } */ #undef GENFRONT #define GENFRONT( dim ) \ \ void PASTEMAC(l3_prune_unref_mparts_,dim) \ ( \ obj_t* a, \ obj_t* b, \ obj_t* c, \ cntl_t* cntl \ ) \ { \ /* Query the operation family. */ \ opid_t family = bli_cntl_family( cntl ); \ \ if ( family == BLIS_GEMM ) return; /* No pruning is necessary for gemm. */ \ else if ( family == BLIS_HERK ) PASTEMAC(herk_prune_unref_mparts_,dim)( a, b, c ); \ else if ( family == BLIS_TRMM ) PASTEMAC(trmm_prune_unref_mparts_,dim)( a, b, c ); \ else if ( family == BLIS_TRSM ) PASTEMAC(trsm_prune_unref_mparts_,dim)( a, b, c ); \ } GENFRONT( m ) GENFRONT( n ) GENFRONT( k ) // ----------------------------------------------------------------------------- #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,_prune_unref_mparts_m) \ ( \ obj_t* a, \ obj_t* b, \ obj_t* c \ ) \ { \ /* No pruning is necessary for gemm. */ \ } \ void PASTEMAC(opname,_prune_unref_mparts_n) \ ( \ obj_t* a, \ obj_t* b, \ obj_t* c \ ) \ { \ /* No pruning is necessary for gemm. */ \ } \ void PASTEMAC(opname,_prune_unref_mparts_k) \ ( \ obj_t* a, \ obj_t* b, \ obj_t* c \ ) \ { \ /* No pruning is necessary for gemm. */ \ } GENFRONT( gemm ) // ----------------------------------------------------------------------------- #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,_prune_unref_mparts_m) \ ( \ obj_t* a, \ obj_t* ah, \ obj_t* c \ ) \ { \ /* Prune any unreferenced part from the subpartition of C (that would be encountered from partitioning in the m dimension) and adjust the subpartition of A accordingly. */ \ bli_prune_unref_mparts( c, BLIS_M, a, BLIS_M ); \ } \ void PASTEMAC(opname,_prune_unref_mparts_n) \ ( \ obj_t* a, \ obj_t* ah, \ obj_t* c \ ) \ { \ /* Prune any unreferenced part from the subpartition of C (that would be encountered from partitioning in the n dimension) and adjust the subpartition of Ah accordingly. */ \ bli_prune_unref_mparts( c, BLIS_N, ah, BLIS_N ); \ } \ void PASTEMAC(opname,_prune_unref_mparts_k) \ ( \ obj_t* a, \ obj_t* ah, \ obj_t* c \ ) \ { \ /* As long as A and Ah are general in structure, no pruning should be for the k dimension. */ \ } GENFRONT( herk ) // ----------------------------------------------------------------------------- #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,_prune_unref_mparts_m) \ ( \ obj_t* a, \ obj_t* b, \ obj_t* c \ ) \ { \ /* Prune any unreferenced part from the subpartition of A (that would be encountered from partitioning in the m dimension) and adjust the subpartition of C accordingly. */ \ bli_prune_unref_mparts( a, BLIS_M, c, BLIS_M ); \ } \ void PASTEMAC(opname,_prune_unref_mparts_n) \ ( \ obj_t* a, \ obj_t* b, \ obj_t* c \ ) \ { \ /* Prune any unreferenced part from the subpartition of B (that would be encountered from partitioning in the n dimension) and adjust the subpartition of C accordingly. */ \ bli_prune_unref_mparts( b, BLIS_N, c, BLIS_N ); \ } \ void PASTEMAC(opname,_prune_unref_mparts_k) \ ( \ obj_t* a, \ obj_t* b, \ obj_t* c \ ) \ { \ /* Prune any unreferenced part from the subpartition of A (that would be encountered from partitioning in the k dimension) and adjust the subpartition of B accordingly. */ \ bli_prune_unref_mparts( a, BLIS_N, b, BLIS_M ); \ \ /* Prune any unreferenced part from the subpartition of B (that would be encountered from partitioning in the k dimension) and adjust the subpartition of A accordingly. */ \ bli_prune_unref_mparts( b, BLIS_M, a, BLIS_N ); \ } GENFRONT( trmm ) GENFRONT( trsm ) blis-0.6.1/frame/3/bli_l3_prune.h000066400000000000000000000045331360743507500164500ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #undef GENPROT #define GENPROT( dim ) \ \ void PASTEMAC(l3_prune_unref_mparts_,dim) \ ( \ obj_t* a, \ obj_t* b, \ obj_t* c, \ cntl_t* cntl \ ); GENPROT( m ) GENPROT( n ) GENPROT( k ) // ----------------------------------------------------------------------------- #undef GENPROT #define GENPROT( opname, dim ) \ \ void PASTEMAC2(opname,_prune_unref_mparts_,dim) \ ( \ obj_t* a, \ obj_t* b, \ obj_t* c \ ); GENPROT( gemm, m ) GENPROT( gemm, n ) GENPROT( gemm, k ) GENPROT( herk, m ) GENPROT( herk, n ) GENPROT( herk, k ) GENPROT( trmm, m ) GENPROT( trmm, n ) GENPROT( trmm, k ) GENPROT( trsm, m ) GENPROT( trsm, n ) GENPROT( trsm, k ) blis-0.6.1/frame/3/bli_l3_sup.c000066400000000000000000000116131360743507500161160ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" err_t bli_gemmsup ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm ) { // Return early if small matrix handling is disabled at configure-time. #ifdef BLIS_DISABLE_SUP_HANDLING return BLIS_FAILURE; #endif // Return early if this is a mixed-datatype computation. if ( bli_obj_dt( c ) != bli_obj_dt( a ) || bli_obj_dt( c ) != bli_obj_dt( b ) || bli_obj_comp_prec( c ) != bli_obj_prec( c ) ) return BLIS_FAILURE; // Obtain a valid (native) context from the gks if necessary. // NOTE: This must be done before calling the _check() function, since // that function assumes the context pointer is valid. if ( cntx == NULL ) cntx = bli_gks_query_cntx(); // Return early if a microkernel preference-induced transposition would // have been performed and shifted the dimensions outside of the space // of sup-handled problems. if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( c, BLIS_GEMM_UKR, cntx ) ) { const num_t dt = bli_obj_dt( c ); const dim_t m = bli_obj_length( c ); const dim_t n = bli_obj_width( c ); const dim_t k = bli_obj_width_after_trans( a ); // Pass in m and n reversed, which simulates a transposition of the // entire operation pursuant to the microkernel storage preference. if ( !bli_cntx_l3_sup_thresh_is_met( dt, n, m, k, cntx ) ) return BLIS_FAILURE; } else // ukr_prefers_storage_of( c, ... ) { const num_t dt = bli_obj_dt( c ); const dim_t m = bli_obj_length( c ); const dim_t n = bli_obj_width( c ); const dim_t k = bli_obj_width_after_trans( a ); if ( !bli_cntx_l3_sup_thresh_is_met( dt, m, n, k, cntx ) ) return BLIS_FAILURE; } // Initialize a local runtime with global settings if necessary. Note // that in the case that a runtime is passed in, we make a local copy. rntm_t rntm_l; if ( rntm == NULL ) { bli_rntm_init_from_global( &rntm_l ); rntm = &rntm_l; } else { rntm_l = *rntm; rntm = &rntm_l; } #if 0 const num_t dt = bli_obj_dt( c ); const dim_t m = bli_obj_length( c ); const dim_t n = bli_obj_width( c ); const dim_t k = bli_obj_width_after_trans( a ); const dim_t tm = bli_cntx_get_l3_sup_thresh_dt( dt, BLIS_MT, cntx ); const dim_t tn = bli_cntx_get_l3_sup_thresh_dt( dt, BLIS_NT, cntx ); const dim_t tk = bli_cntx_get_l3_sup_thresh_dt( dt, BLIS_KT, cntx ); printf( "dims: %d %d %d (threshs: %d %d %d)\n", (int)m, (int)n, (int)k, (int)tm, (int)tn, (int)tk ); #endif // We've now ruled out the following two possibilities: // - the ukernel prefers the operation as-is, and the sup thresholds are // unsatisfied. // - the ukernel prefers a transposed operation, and the sup thresholds are // unsatisfied after taking into account the transposition. // This implies that the sup thresholds (at least one of them) are met. // and the small/unpacked handler should be called. // NOTE: The sup handler is free to enforce a stricter threshold regime // if it so chooses, in which case it can/should return BLIS_FAILURE. // Query the small/unpacked handler from the context and invoke it. gemmsup_oft gemmsup_fp = bli_cntx_get_l3_sup_handler( BLIS_GEMM, cntx ); return gemmsup_fp ( alpha, a, b, beta, c, cntx, rntm ); } blis-0.6.1/frame/3/bli_l3_sup.h000066400000000000000000000034601360743507500161240ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ err_t bli_gemmsup ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm ); blis-0.6.1/frame/3/bli_l3_sup_ft_ker.h000066400000000000000000000046461360743507500174650ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_L3_SUP_FT_KER_H #define BLIS_L3_SUP_FT_KER_H // // -- Level-3 small/unpacked kernel function types ----------------------------- // // gemmsup #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,_ker,tsuf)) \ ( \ conj_t conja, \ conj_t conjb, \ dim_t m, \ dim_t n, \ dim_t k, \ ctype* restrict alpha, \ ctype* restrict a, inc_t rs_a, inc_t cs_a, \ ctype* restrict b, inc_t rs_b, inc_t cs_b, \ ctype* restrict beta, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict data, \ cntx_t* restrict cntx \ ); INSERT_GENTDEF( gemmsup ) #endif blis-0.6.1/frame/3/bli_l3_sup_int.c000066400000000000000000000132171360743507500167720ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" err_t bli_gemmsup_int ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { #if 0 //bli_gemmsup_ref_var2 //bli_gemmsup_ref_var1 #if 0 bli_gemmsup_ref_var1n #else #endif const stor3_t stor_id = bli_obj_stor3_from_strides( c, a, b ); const bool_t is_rrr_rrc_rcr_crr = ( stor_id == BLIS_RRR || stor_id == BLIS_RRC || stor_id == BLIS_RCR || stor_id == BLIS_CRR ); if ( is_rrr_rrc_rcr_crr ) { bli_gemmsup_ref_var2m ( BLIS_NO_TRANSPOSE, alpha, a, b, beta, c, stor_id, cntx, rntm ); } else { bli_gemmsup_ref_var2m ( BLIS_TRANSPOSE, alpha, a, b, beta, c, stor_id, cntx, rntm ); } return BLIS_SUCCESS; #endif const stor3_t stor_id = bli_obj_stor3_from_strides( c, a, b ); // Don't use the small/unpacked implementation if one of the matrices // uses general stride. if ( stor_id == BLIS_XXX ) return BLIS_FAILURE; const bool_t is_rrr_rrc_rcr_crr = ( stor_id == BLIS_RRR || stor_id == BLIS_RRC || stor_id == BLIS_RCR || stor_id == BLIS_CRR ); const bool_t is_rcc_crc_ccr_ccc = !is_rrr_rrc_rcr_crr; const num_t dt = bli_obj_dt( c ); const bool_t row_pref = bli_cntx_l3_sup_ker_prefers_rows_dt( dt, stor_id, cntx ); const bool_t is_primary = ( row_pref ? is_rrr_rrc_rcr_crr : is_rcc_crc_ccr_ccc ); if ( is_primary ) { // This branch handles: // - rrr rrc rcr crr for row-preferential kernels // - rcc crc ccr ccc for column-preferential kernels const dim_t m = bli_obj_length( c ); const dim_t n = bli_obj_width( c ); const dim_t NR = bli_cntx_get_blksz_def_dt( dt, BLIS_NR, cntx ); \ const dim_t MR = bli_cntx_get_blksz_def_dt( dt, BLIS_MR, cntx ); \ const dim_t mu = m / MR; const dim_t nu = n / NR; if ( mu >= nu ) //if ( m % 2 == 1 && n % 2 == 1 ) { #ifdef TRACEVAR printf( "bli_l3_sup_int(): var2m primary\n" ); #endif // block-panel macrokernel; m -> mc, mr; n -> nc, nr: var2() bli_gemmsup_ref_var2m( BLIS_NO_TRANSPOSE, alpha, a, b, beta, c, stor_id, cntx, rntm, cntl, thread ); } else // if ( mu < nu ) { #ifdef TRACEVAR printf( "bli_l3_sup_int(): var1n primary\n" ); #endif // panel-block macrokernel; m -> nc*,mr; n -> mc*,nr: var1() bli_gemmsup_ref_var1n( BLIS_NO_TRANSPOSE, alpha, a, b, beta, c, stor_id, cntx, rntm, cntl, thread ); } } else { // This branch handles: // - rrr rrc rcr crr for column-preferential kernels // - rcc crc ccr ccc for row-preferential kernels const dim_t mt = bli_obj_width( c ); const dim_t nt = bli_obj_length( c ); const dim_t NR = bli_cntx_get_blksz_def_dt( dt, BLIS_NR, cntx ); \ const dim_t MR = bli_cntx_get_blksz_def_dt( dt, BLIS_MR, cntx ); \ const dim_t mu = mt / MR; const dim_t nu = nt / NR; if ( mu >= nu ) //if ( mt % 2 == 1 && nt % 2 == 1 ) { #ifdef TRACEVAR printf( "bli_l3_sup_int(): var2m non-primary\n" ); #endif // panel-block macrokernel; m -> nc, nr; n -> mc, mr: var2() + trans bli_gemmsup_ref_var2m( BLIS_TRANSPOSE, alpha, a, b, beta, c, stor_id, cntx, rntm, cntl, thread ); } else // if ( mu < nu ) { #ifdef TRACEVAR printf( "bli_l3_sup_int(): var1n non-primary\n" ); #endif // block-panel macrokernel; m -> mc*,nr; n -> nc*,mr: var1() + trans bli_gemmsup_ref_var1n( BLIS_TRANSPOSE, alpha, a, b, beta, c, stor_id, cntx, rntm, cntl, thread ); } // *requires nudging of mc,nc up to be a multiple of nr,mr. } // Return success so that the caller knows that we computed the solution. return BLIS_SUCCESS; } blis-0.6.1/frame/3/bli_l3_sup_int.h000066400000000000000000000035421360743507500167770ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ err_t bli_gemmsup_int ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ); blis-0.6.1/frame/3/bli_l3_sup_ker.h000066400000000000000000000045031360743507500167640ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Define template prototypes for level-3 kernels on small/unpacked matrices. // // Note: Instead of defining function prototype macro templates and then // instantiating those macros to define the individual function prototypes, // we simply alias the official operations' prototypes as defined in // bli_l3_ker_prot.h. #undef GENTPROT #define GENTPROT GEMMSUP_KER_PROT INSERT_GENTPROT_BASIC0( gemmsup_rv_ukr_name ) INSERT_GENTPROT_BASIC0( gemmsup_rg_ukr_name ) INSERT_GENTPROT_BASIC0( gemmsup_cv_ukr_name ) INSERT_GENTPROT_BASIC0( gemmsup_cg_ukr_name ) INSERT_GENTPROT_BASIC0( gemmsup_rd_ukr_name ) INSERT_GENTPROT_BASIC0( gemmsup_cd_ukr_name ) INSERT_GENTPROT_BASIC0( gemmsup_gx_ukr_name ) blis-0.6.1/frame/3/bli_l3_sup_ker_prot.h000066400000000000000000000044251360743507500200330ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Define template prototypes for level-3 kernels on small/unpacked matrices. // #define GEMMSUP_KER_PROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ conj_t conja, \ conj_t conjb, \ dim_t m, \ dim_t n, \ dim_t k, \ ctype* restrict alpha, \ ctype* restrict a, inc_t rs_a, inc_t cs_a, \ ctype* restrict b, inc_t rs_b, inc_t cs_b, \ ctype* restrict beta, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict data, \ cntx_t* restrict cntx \ ); blis-0.6.1/frame/3/bli_l3_sup_oft.h000066400000000000000000000040131360743507500167670ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_L3_SUP_OFT_H #define BLIS_L3_SUP_OFT_H // // -- Level-3 small/unpacked object function types ----------------------------- // // gemm #undef GENTDEF #define GENTDEF( opname ) \ \ typedef err_t (*PASTECH(opname,_oft)) \ ( \ obj_t* alpha, \ obj_t* a, \ obj_t* b, \ obj_t* beta, \ obj_t* c, \ cntx_t* cntx, \ rntm_t* rntm \ ); GENTDEF( gemmsup ) #endif blis-0.6.1/frame/3/bli_l3_sup_packm_a.c000066400000000000000000000254571360743507500176040ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ bool_t will_pack, \ packbuf_t pack_buf_type, \ stor3_t stor_id, \ dim_t m, \ dim_t k, \ dim_t mr, \ cntx_t* restrict cntx, \ rntm_t* restrict rntm, \ mem_t* restrict mem, \ thrinfo_t* restrict thread \ ) \ { \ /* Inspect whether we are going to be packing matrix A. */ \ if ( will_pack == FALSE ) \ { \ } \ else /* if ( will_pack == TRUE ) */ \ { \ packbuf_t pack_buf_type_use; \ \ /* NOTE: This is "rounding up" of the last upanel is actually optional for the rrc/crc cases, but absolutely necessary for the other cases since we NEED that last micropanel to have the same ldim (cs_p) as the other micropanels. Why? So that millikernels can use the same upanel ldim for all iterations of the ir loop. */ \ const dim_t m_pack = ( m / mr + ( m % mr ? 1 : 0 ) ) * mr; \ const dim_t k_pack = k; \ \ /* Determine the dimensions and strides for the packed matrix A. */ \ if ( stor_id == BLIS_RRC || \ stor_id == BLIS_CRC ) \ { \ /* stor3_t id values _RRC and _CRC: pack A to plain row storage, which can use packing buffer type for general usage. */ \ pack_buf_type_use = BLIS_BUFFER_FOR_GEN_USE; \ } \ else \ { \ /* All other stor3_t ids: pack A to column-stored row-panels using the packing buffer type as specified by the caller. */ \ /*pack_buf_type_use = BLIS_BUFFER_FOR_A_BLOCK;*/ \ pack_buf_type_use = pack_buf_type; \ } \ \ /* Compute the size of the memory block eneded. */ \ siz_t size_needed = sizeof( ctype ) * m_pack * k_pack; \ \ /* Check the mem_t entry provided by the caller. If it is unallocated, then we need to acquire a block from the memory broker. */ \ if ( bli_mem_is_unalloc( mem ) ) \ { \ bli_membrk_acquire_m \ ( \ rntm, \ size_needed, \ pack_buf_type_use, \ mem \ ); \ } \ else \ { \ /* NOTE: This shouldn't execute since the sup code path calls this function only once, before *any* loops of the gemm algorithm are encountered. */ \ bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); \ \ /* If the mem_t entry provided by the caller does NOT contain a NULL buffer, then a block has already been acquired from the memory broker and cached by the caller. */ \ \ /* As a sanity check, we should make sure that the mem_t object isn't associated with a block that is too small compared to the size of the packed matrix buffer that is needed, according to the value computed above. */ \ siz_t mem_size = bli_mem_size( mem ); \ \ if ( mem_size < size_needed ) \ { \ bli_membrk_release \ ( \ rntm, \ mem \ ); \ bli_membrk_acquire_m \ ( \ rntm, \ size_needed, \ pack_buf_type_use, \ mem \ ); \ } \ else \ { \ /* If the mem_t entry is already allocated and sufficiently large, then we use it as-is. No action is needed. */ \ } \ } \ } \ } INSERT_GENTFUNC_BASIC0( packm_sup_init_mem_a ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ bool_t did_pack, \ rntm_t* restrict rntm, \ mem_t* restrict mem, \ thrinfo_t* restrict thread \ ) \ { \ /* Inspect whether we previously packed matrix A. */ \ if ( did_pack == FALSE ) \ { \ /* If we didn't pack matrix A, there's nothing to be done. */ \ } \ else /* if ( did_pack == TRUE ) */ \ { \ /* Check the mem_t entry provided by the caller. Only proceed if it is allocated, which it should be. */ \ if ( bli_mem_is_alloc( mem ) ) \ { \ bli_membrk_release \ ( \ rntm, \ mem \ ); \ } \ } \ } INSERT_GENTFUNC_BASIC0( packm_sup_finalize_mem_a ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ bool_t will_pack, \ stor3_t stor_id, \ pack_t* restrict schema, \ dim_t m, \ dim_t k, \ dim_t mr, \ dim_t* restrict m_max, \ dim_t* restrict k_max, \ ctype* x, inc_t rs_x, inc_t cs_x, \ ctype** p, inc_t* restrict rs_p, inc_t* restrict cs_p, \ dim_t* restrict pd_p, inc_t* restrict ps_p, \ cntx_t* restrict cntx, \ mem_t* restrict mem, \ thrinfo_t* restrict thread \ ) \ { \ /* Inspect whether we are going to be packing matrix A. */ \ if ( will_pack == FALSE ) \ { \ *m_max = m; \ *k_max = k; \ \ /* Set the parameters for use with no packing of A (ie: using the source matrix A directly). */ \ { \ /* Use the strides of the source matrix as the final values. */ \ *rs_p = rs_x; \ *cs_p = cs_x; \ \ *pd_p = mr; \ *ps_p = mr * rs_x; \ \ /* Set the schema to "not packed" to indicate that packing will be skipped. */ \ *schema = BLIS_NOT_PACKED; \ } \ \ /* Since we won't be packing, simply update the buffer address provided by the caller to point to source matrix. */ \ *p = x; \ } \ else /* if ( will_pack == TRUE ) */ \ { \ /* NOTE: This is "rounding up" of the last upanel is actually optional for the rrc/crc cases, but absolutely necessary for the other cases since we NEED that last micropanel to have the same ldim (cs_p) as the other micropanels. Why? So that millikernels can use the same upanel ldim for all iterations of the ir loop. */ \ *m_max = ( m / mr + ( m % mr ? 1 : 0 ) ) * mr; \ *k_max = k; \ \ /* Determine the dimensions and strides for the packed matrix A. */ \ if ( stor_id == BLIS_RRC || \ stor_id == BLIS_CRC ) \ { \ /* stor3_t id values _RRC and _CRC: pack A to plain row storage. */ \ *rs_p = k; \ *cs_p = 1; \ \ *pd_p = mr; \ *ps_p = mr * k; \ \ /* Set the schema to "row packed" to indicate packing to plain row storage. */ \ *schema = BLIS_PACKED_ROWS; \ } \ else \ { \ /* All other stor3_t ids: pack A to column-stored row-panels. */ \ *rs_p = 1; \ *cs_p = mr; \ \ *pd_p = mr; \ *ps_p = mr * k; \ \ /* Set the schema to "packed row panels" to indicate packing to conventional column-stored row panels. */ \ *schema = BLIS_PACKED_ROW_PANELS; \ } \ \ /* Set the buffer address provided by the caller to point to the memory associated with the mem_t entry acquired from the memory broker. */ \ *p = bli_mem_buffer( mem ); \ } \ } INSERT_GENTFUNC_BASIC0( packm_sup_init_a ) // // Define BLAS-like interfaces to the variant chooser. // #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ bool_t will_pack, \ stor3_t stor_id, \ trans_t transc, \ dim_t m, \ dim_t k, \ dim_t mr, \ ctype* restrict kappa, \ ctype* restrict a, inc_t rs_a, inc_t cs_a, \ ctype** restrict p, inc_t* restrict rs_p, inc_t* restrict cs_p, \ inc_t* restrict ps_p, \ cntx_t* restrict cntx, \ mem_t* restrict mem, \ thrinfo_t* restrict thread \ ) \ { \ pack_t schema; \ dim_t m_max; \ dim_t k_max; \ dim_t pd_p; \ \ /* Determine the packing buffer and related parameters for matrix A. If A will not be packed, then a_use will be set to point to a and the _a_use strides will be set accordingly. */ \ PASTEMAC(ch,packm_sup_init_a) \ ( \ will_pack, \ stor_id, \ &schema, \ m, k, mr, \ &m_max, &k_max, \ a, rs_a, cs_a, \ p, rs_p, cs_p, \ &pd_p, ps_p, \ cntx, \ mem, \ thread \ ); \ \ /* Inspect whether we are going to be packing matrix A. */ \ if ( will_pack == FALSE ) \ { \ /* If we aren't going to pack matrix A, then there's nothing to do. */ \ /* printf( "blis_ packm_sup_a: not packing A.\n" ); \ */ \ } \ else /* if ( will_pack == TRUE ) */ \ { \ if ( schema == BLIS_PACKED_ROWS ) \ { \ /* For plain packing by rows, use copym. NOTE: We assume kappa = 1; otherwise, we need scal2m. */ \ \ /* NOTE: This call to copym must be replaced by a proper packm variant, implemented as a loop over copym, once multithreading support is added. */ \ \ /* printf( "blis_ packm_sup_a: packing A to rows.\n" ); \ */ \ PASTEMAC2(ch,copym,BLIS_TAPI_EX_SUF) \ ( \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ transc, \ m, \ k, \ a, rs_a, cs_a, \ *p, *rs_p, *cs_p, \ cntx, \ NULL \ ); \ } \ else /* if ( schema == BLIS_PACKED_ROW_PANELS ) */ \ { \ /* printf( "blis_ packm_sup_a: packing A to row panels.\n" ); \ */ \ /* For packing to column-stored row panels, use var1. */ \ PASTEMAC(ch,packm_sup_var1) \ ( \ transc, \ schema, \ m, \ k, \ m_max, \ k_max, \ kappa, \ a, rs_a, cs_a, \ *p, *rs_p, *cs_p, \ pd_p, *ps_p, \ cntx, \ thread \ ); \ } \ } \ } INSERT_GENTFUNC_BASIC0( packm_sup_a ) blis-0.6.1/frame/3/bli_l3_sup_packm_a.h000066400000000000000000000075311360743507500176020ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ bool_t will_pack, \ packbuf_t pack_buf_type, \ stor3_t stor_id, \ dim_t m, \ dim_t k, \ dim_t mr, \ cntx_t* restrict cntx, \ rntm_t* restrict rntm, \ mem_t* restrict mem, \ thrinfo_t* restrict thread \ ); \ INSERT_GENTPROT_BASIC0( packm_sup_init_mem_a ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ bool_t did_pack, \ rntm_t* restrict rntm, \ mem_t* restrict mem, \ thrinfo_t* restrict thread \ ); \ INSERT_GENTPROT_BASIC0( packm_sup_finalize_mem_a ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ bool_t will_pack, \ stor3_t stor_id, \ pack_t* restrict schema, \ dim_t m, \ dim_t k, \ dim_t mr, \ dim_t* restrict m_max, \ dim_t* restrict k_max, \ ctype* x, inc_t rs_x, inc_t cs_x, \ ctype** p, inc_t* restrict rs_p, inc_t* restrict cs_p, \ dim_t* restrict pd_p, inc_t* restrict ps_p, \ cntx_t* restrict cntx, \ mem_t* restrict mem, \ thrinfo_t* restrict thread \ ); \ INSERT_GENTPROT_BASIC0( packm_sup_init_a ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ bool_t will_pack, \ stor3_t stor_id, \ trans_t transc, \ dim_t m, \ dim_t k, \ dim_t mr, \ ctype* restrict kappa, \ ctype* restrict a, inc_t rs_a, inc_t cs_a, \ ctype** restrict p, inc_t* restrict rs_p, inc_t* restrict cs_p, \ inc_t* restrict ps_p, \ cntx_t* restrict cntx, \ mem_t* restrict mem, \ thrinfo_t* restrict thread \ ); \ INSERT_GENTPROT_BASIC0( packm_sup_a ) blis-0.6.1/frame/3/bli_l3_sup_packm_b.c000066400000000000000000000255041360743507500175760ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ bool_t will_pack, \ packbuf_t pack_buf_type, \ stor3_t stor_id, \ dim_t k, \ dim_t n, \ dim_t nr, \ cntx_t* restrict cntx, \ rntm_t* restrict rntm, \ mem_t* restrict mem, \ thrinfo_t* restrict thread \ ) \ { \ /* Inspect whether we are going to be packing matrix B. */ \ if ( will_pack == FALSE ) \ { \ } \ else /* if ( will_pack == TRUE ) */ \ { \ packbuf_t pack_buf_type_use; \ \ /* NOTE: This is "rounding up" of the last upanel is actually optional for the rrc/crc cases, but absolutely necessary for the other cases since we NEED that last micropanel to have the same ldim (cs_p) as the other micropanels. Why? So that millikernels can use the same upanel ldim for all iterations of the ir loop. */ \ const dim_t k_pack = k; \ const dim_t n_pack = ( n / nr + ( n % nr ? 1 : 0 ) ) * nr; \ \ /* Determine the dimensions and strides for the packed matrix B. */ \ if ( stor_id == BLIS_RRC || \ stor_id == BLIS_CRC ) \ { \ /* stor3_t id values _RRC and _CRC: pack B to plain column storage, which can use packing buffer type for general usage. */ \ pack_buf_type_use = BLIS_BUFFER_FOR_GEN_USE; \ } \ else \ { \ /* All other stor3_t ids: pack A to row-stored column-panels using the packing buffer type as specified by the caller. */ \ /*pack_buf_type_use = BLIS_BUFFER_FOR_B_PANEL;*/ \ pack_buf_type_use = pack_buf_type; \ } \ \ /* Compute the size of the memory block eneded. */ \ siz_t size_needed = sizeof( ctype ) * k_pack * n_pack; \ \ /* Check the mem_t entry provided by the caller. If it is unallocated, then we need to acquire a block from the memory broker. */ \ if ( bli_mem_is_unalloc( mem ) ) \ { \ bli_membrk_acquire_m \ ( \ rntm, \ size_needed, \ pack_buf_type_use, \ mem \ ); \ } \ else \ { \ /* NOTE: This shouldn't execute since the sup code path calls this function only once, before *any* loops of the gemm algorithm are encountered. */ \ bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); \ \ /* If the mem_t entry provided by the caller does NOT contain a NULL buffer, then a block has already been acquired from the memory broker and cached by the caller. */ \ \ /* As a sanity check, we should make sure that the mem_t object isn't associated with a block that is too small compared to the size of the packed matrix buffer that is needed, according to the value computed above. */ \ siz_t mem_size = bli_mem_size( mem ); \ \ if ( mem_size < size_needed ) \ { \ bli_membrk_release \ ( \ rntm, \ mem \ ); \ bli_membrk_acquire_m \ ( \ rntm, \ size_needed, \ pack_buf_type_use, \ mem \ ); \ } \ else \ { \ /* If the mem_t entry is already allocated and sufficiently large, then we use it as-is. No action is needed. */ \ } \ } \ } \ } INSERT_GENTFUNC_BASIC0( packm_sup_init_mem_b ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ bool_t did_pack, \ rntm_t* restrict rntm, \ mem_t* restrict mem, \ thrinfo_t* restrict thread \ ) \ { \ /* Inspect whether we previously packed matrix A. */ \ if ( did_pack == FALSE ) \ { \ /* If we didn't pack matrix A, there's nothing to be done. */ \ } \ else /* if ( did_pack == TRUE ) */ \ { \ /* Check the mem_t entry provided by the caller. Only proceed if it is allocated, which it should be. */ \ if ( bli_mem_is_alloc( mem ) ) \ { \ bli_membrk_release \ ( \ rntm, \ mem \ ); \ } \ } \ } INSERT_GENTFUNC_BASIC0( packm_sup_finalize_mem_b ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ bool_t will_pack, \ stor3_t stor_id, \ pack_t* restrict schema, \ dim_t k, \ dim_t n, \ dim_t nr, \ dim_t* restrict k_max, \ dim_t* restrict n_max, \ ctype* x, inc_t rs_x, inc_t cs_x, \ ctype** p, inc_t* restrict rs_p, inc_t* restrict cs_p, \ dim_t* restrict pd_p, inc_t* restrict ps_p, \ cntx_t* restrict cntx, \ mem_t* restrict mem, \ thrinfo_t* restrict thread \ ) \ { \ /* Inspect whether we are going to be packing matrix B. */ \ if ( will_pack == FALSE ) \ { \ *k_max = k; \ *n_max = n; \ \ /* Set the parameters for use with no packing of B (ie: using the source matrix B directly). */ \ { \ /* Use the strides of the source matrix as the final values. */ \ *rs_p = rs_x; \ *cs_p = cs_x; \ \ *pd_p = nr; \ *ps_p = nr * cs_x; \ \ /* Set the schema to "not packed" to indicate that packing will be skipped. */ \ *schema = BLIS_NOT_PACKED; \ } \ \ /* Since we won't be packing, simply update the buffer address provided by the caller to point to source matrix. */ \ *p = x; \ } \ else /* if ( will_pack == TRUE ) */ \ { \ /* NOTE: This is "rounding up" of the last upanel is actually optional for the rrc/crc cases, but absolutely necessary for the other cases since we NEED that last micropanel to have the same ldim (cs_p) as the other micropanels. Why? So that millikernels can use the same upanel ldim for all iterations of the ir loop. */ \ *k_max = k; \ *n_max = ( n / nr + ( n % nr ? 1 : 0 ) ) * nr; \ \ /* Determine the dimensions and strides for the packed matrix B. */ \ if ( stor_id == BLIS_RRC || \ stor_id == BLIS_CRC ) \ { \ /* stor3_t id values _RRC and _CRC: pack B to plain row storage. */ \ *rs_p = 1; \ *cs_p = k; \ \ *pd_p = nr; \ *ps_p = k * nr; \ \ /* Set the schema to "column packed" to indicate packing to plain column storage. */ \ *schema = BLIS_PACKED_COLUMNS; \ } \ else \ { \ /* All other stor3_t ids: pack A to column-stored row-panels. */ \ *rs_p = nr; \ *cs_p = 1; \ \ *pd_p = nr; \ *ps_p = k * nr; \ \ /* Set the schema to "packed row panels" to indicate packing to conventional column-stored row panels. */ \ *schema = BLIS_PACKED_COL_PANELS; \ } \ \ /* Set the buffer address provided by the caller to point to the memory associated with the mem_t entry acquired from the memory broker. */ \ *p = bli_mem_buffer( mem ); \ } \ } INSERT_GENTFUNC_BASIC0( packm_sup_init_b ) // // Define BLAS-like interfaces to the variant chooser. // #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ bool_t will_pack, \ stor3_t stor_id, \ trans_t transc, \ dim_t k, \ dim_t n, \ dim_t nr, \ ctype* restrict kappa, \ ctype* restrict b, inc_t rs_b, inc_t cs_b, \ ctype** restrict p, inc_t* restrict rs_p, inc_t* restrict cs_p, \ inc_t* restrict ps_p, \ cntx_t* restrict cntx, \ mem_t* restrict mem, \ thrinfo_t* restrict thread \ ) \ { \ pack_t schema; \ dim_t k_max; \ dim_t n_max; \ dim_t pd_p; \ \ /* Determine the packing buffer and related parameters for matrix B. If B will not be packed, then b_use will be set to point to b and the _b_use strides will be set accordingly. */ \ PASTEMAC(ch,packm_sup_init_b) \ ( \ will_pack, \ stor_id, \ &schema, \ k, n, nr, \ &k_max, &n_max, \ b, rs_b, cs_b, \ p, rs_p, cs_p, \ &pd_p, ps_p, \ cntx, \ mem, \ thread \ ); \ \ /* Inspect whether we are going to be packing matrix B. */ \ if ( will_pack == FALSE ) \ { \ /* If we aren't going to pack matrix B, then there's nothing to do. */ \ /* printf( "blis_ packm_sup_b: not packing B.\n" ); \ */ \ } \ else /* if ( will_pack == TRUE ) */ \ { \ if ( schema == BLIS_PACKED_COLUMNS ) \ { \ /* For plain packing by columns, use copym. NOTE: We assume kappa = 1; otherwise, we need scal2m. */ \ \ /* NOTE: This call to copym must be replaced by a proper packm variant, implemented as a loop over copym, once multithreading support is added. */ \ \ /* printf( "blis_ packm_sup_b: packing B to columns.\n" ); \ */ \ PASTEMAC2(ch,copym,BLIS_TAPI_EX_SUF) \ ( \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ transc, \ k, \ n, \ b, rs_b, cs_b, \ *p, *rs_p, *cs_p, \ cntx, \ NULL \ ); \ } \ else /* if ( schema == BLIS_PACKED_COL_PANELS ) */ \ { \ /* printf( "blis_ packm_sup_b: packing B to col panels.\n" ); \ */ \ /* For packing to row-stored column panels, use var1. */ \ PASTEMAC(ch,packm_sup_var1) \ ( \ transc, \ schema, \ k, \ n, \ k_max, \ n_max, \ kappa, \ b, rs_b, cs_b, \ *p, *rs_p, *cs_p, \ pd_p, *ps_p, \ cntx, \ thread \ ); \ } \ } \ } INSERT_GENTFUNC_BASIC0( packm_sup_b ) blis-0.6.1/frame/3/bli_l3_sup_packm_b.h000066400000000000000000000075311360743507500176030ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ bool_t will_pack, \ packbuf_t pack_buf_type, \ stor3_t stor_id, \ dim_t k, \ dim_t n, \ dim_t nr, \ cntx_t* restrict cntx, \ rntm_t* restrict rntm, \ mem_t* restrict mem, \ thrinfo_t* restrict thread \ ); \ INSERT_GENTPROT_BASIC0( packm_sup_init_mem_b ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ bool_t did_pack, \ rntm_t* restrict rntm, \ mem_t* restrict mem, \ thrinfo_t* restrict thread \ ); \ INSERT_GENTPROT_BASIC0( packm_sup_finalize_mem_b ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ bool_t will_pack, \ stor3_t stor_id, \ pack_t* restrict schema, \ dim_t k, \ dim_t n, \ dim_t nr, \ dim_t* restrict k_max, \ dim_t* restrict n_max, \ ctype* x, inc_t rs_x, inc_t cs_x, \ ctype** p, inc_t* restrict rs_p, inc_t* restrict cs_p, \ dim_t* restrict pd_p, inc_t* restrict ps_p, \ cntx_t* restrict cntx, \ mem_t* restrict mem, \ thrinfo_t* restrict thread \ ); \ INSERT_GENTPROT_BASIC0( packm_sup_init_b ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ bool_t will_pack, \ stor3_t stor_id, \ trans_t transc, \ dim_t k, \ dim_t n, \ dim_t nr, \ ctype* restrict kappa, \ ctype* restrict x, inc_t rs_x, inc_t cs_x, \ ctype** restrict p, inc_t* restrict rs_p, inc_t* restrict cs_p, \ inc_t* restrict ps_p, \ cntx_t* restrict cntx, \ mem_t* restrict mem, \ thrinfo_t* restrict thread \ ); \ INSERT_GENTPROT_BASIC0( packm_sup_b ) blis-0.6.1/frame/3/bli_l3_sup_packm_var.c000066400000000000000000000304351360743507500201440ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define BLAS-like interfaces to the variants. // #undef GENTFUNCR #define GENTFUNCR( ctype, ctype_r, ch, chr, opname, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ trans_t transc, \ pack_t schema, \ dim_t m, \ dim_t n, \ dim_t m_max, \ dim_t n_max, \ ctype* restrict kappa, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ ctype* restrict p, inc_t rs_p, inc_t cs_p, \ dim_t pd_p, inc_t ps_p, \ cntx_t* restrict cntx, \ thrinfo_t* restrict thread \ ) \ { \ ctype* restrict kappa_cast = kappa; \ ctype* restrict c_cast = c; \ ctype* restrict p_cast = p; \ \ dim_t iter_dim; \ dim_t n_iter; \ dim_t it, ic; \ dim_t ic0; \ doff_t ic_inc; \ dim_t panel_len_full; \ dim_t panel_len_i; \ dim_t panel_len_max; \ dim_t panel_len_max_i; \ dim_t panel_dim_i; \ dim_t panel_dim_max; \ inc_t vs_c; \ inc_t ldc; \ inc_t ldp, p_inc; \ conj_t conjc; \ \ \ /* Extract the conjugation bit from the transposition argument. */ \ conjc = bli_extract_conj( transc ); \ \ /* If c needs a transposition, induce it so that we can more simply express the remaining parameters and code. */ \ if ( bli_does_trans( transc ) ) \ { \ bli_swap_incs( &rs_c, &cs_c ); \ bli_toggle_trans( &transc ); \ } \ \ /* Create flags to incidate row or column storage. Note that the schema bit that encodes row or column is describing the form of micro-panel, not the storage in the micro-panel. Hence the mismatch in "row" and "column" semantics. */ \ bool_t row_stored = bli_is_col_packed( schema ); \ /*bool_t col_stored = bli_is_row_packed( schema );*/ \ \ /* If the row storage flag indicates row storage, then we are packing to column panels; otherwise, if the strides indicate column storage, we are packing to row panels. */ \ if ( row_stored ) \ { \ /* Prepare to pack to row-stored column panels. */ \ iter_dim = n; \ panel_len_full = m; \ panel_len_max = m_max; \ panel_dim_max = pd_p; \ vs_c = cs_c; \ ldc = rs_c; \ ldp = rs_p; \ } \ else /* if ( col_stored ) */ \ { \ /* Prepare to pack to column-stored row panels. */ \ iter_dim = m; \ panel_len_full = n; \ panel_len_max = n_max; \ panel_dim_max = pd_p; \ vs_c = rs_c; \ ldc = cs_c; \ ldp = cs_p; \ } \ \ /* Compute the total number of iterations we'll need. */ \ n_iter = iter_dim / panel_dim_max + ( iter_dim % panel_dim_max ? 1 : 0 ); \ \ /* Set the initial values and increments for indices related to C and P based on whether reverse iteration was requested. */ \ { \ ic0 = 0; \ ic_inc = panel_dim_max; \ } \ \ ctype* restrict p_begin = p_cast; \ \ /* Query the number of threads and thread ids from the current thread's packm thrinfo_t node. */ \ const dim_t nt = bli_thread_n_way( thread ); \ const dim_t tid = bli_thread_work_id( thread ); \ \ /* Suppress warnings in case tid isn't used (ie: as in slab partitioning). */ \ ( void )nt; \ ( void )tid; \ \ dim_t it_start, it_end, it_inc; \ \ /* Determine the thread range and increment using the current thread's packm thrinfo_t node. NOTE: The definition of bli_thread_range_jrir() will depend on whether slab or round-robin partitioning was requested at configure-time. */ \ bli_thread_range_jrir( thread, n_iter, 1, FALSE, &it_start, &it_end, &it_inc ); \ \ /* Iterate over every logical micropanel in the source matrix. */ \ for ( ic = ic0, it = 0; it < n_iter; \ ic += ic_inc, it += 1 ) \ { \ panel_dim_i = bli_min( panel_dim_max, iter_dim - ic ); \ \ ctype* restrict c_begin = c_cast + (ic )*vs_c; \ \ ctype* restrict c_use = c_begin; \ ctype* restrict p_use = p_begin; \ \ { \ panel_len_i = panel_len_full; \ panel_len_max_i = panel_len_max; \ \ /* The definition of bli_packm_my_iter() will depend on whether slab or round-robin partitioning was requested at configure-time. */ \ if ( bli_packm_my_iter( it, it_start, it_end, tid, nt ) ) \ { \ PASTEMAC(ch,packm_cxk) \ ( \ conjc, \ schema, \ panel_dim_i, \ panel_dim_max, \ panel_len_i, \ panel_len_max_i, \ kappa_cast, \ c_use, vs_c, ldc, \ p_use, ldp, \ cntx \ ); \ } \ \ /* NOTE: This value is equivalent to ps_p. */ \ p_inc = ps_p; \ } \ \ p_begin += p_inc; \ \ /* if ( row_stored ) \ PASTEMAC(ch,fprintm)( stdout, "packm_sup_var1: b packed", panel_len_max, panel_dim_max, \ p_use, rs_p, cs_p, "%5.2f", "" ); \ if ( !row_stored ) \ PASTEMAC(ch,fprintm)( stdout, "packm_sup_var1: a packed", panel_dim_max, panel_len_max, \ p_use, rs_p, cs_p, "%5.2f", "" ); \ */ \ } \ \ } INSERT_GENTFUNCR_BASIC( packm, packm_sup_var1 ) /* if ( row_stored ) \ PASTEMAC(ch,fprintm)( stdout, "packm_var2: b", m, n, \ c_cast, rs_c, cs_c, "%4.1f", "" ); \ if ( col_stored ) \ PASTEMAC(ch,fprintm)( stdout, "packm_var2: a", m, n, \ c_cast, rs_c, cs_c, "%4.1f", "" ); \ */ /* if ( row_stored ) \ PASTEMAC(ch,fprintm)( stdout, "packm_blk_var1: b packed", *m_panel_max, *n_panel_max, \ p_use, rs_p, cs_p, "%5.2f", "" ); \ else \ PASTEMAC(ch,fprintm)( stdout, "packm_blk_var1: a packed", *m_panel_max, *n_panel_max, \ p_use, rs_p, cs_p, "%5.2f", "" ); \ */ \ \ /* if ( col_stored ) { \ if ( bli_thread_work_id( thread ) == 0 ) \ { \ printf( "packm_blk_var1: thread %lu (a = %p, ap = %p)\n", bli_thread_work_id( thread ), c_use, p_use ); \ fflush( stdout ); \ PASTEMAC(ch,fprintm)( stdout, "packm_blk_var1: a", *m_panel_use, *n_panel_use, \ ( ctype* )c_use, rs_c, cs_c, "%4.1f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "packm_blk_var1: ap", *m_panel_max, *n_panel_max, \ ( ctype* )p_use, rs_p, cs_p, "%4.1f", "" ); \ fflush( stdout ); \ } \ bli_thread_obarrier( thread ); \ if ( bli_thread_work_id( thread ) == 1 ) \ { \ printf( "packm_blk_var1: thread %lu (a = %p, ap = %p)\n", bli_thread_work_id( thread ), c_use, p_use ); \ fflush( stdout ); \ PASTEMAC(ch,fprintm)( stdout, "packm_blk_var1: a", *m_panel_use, *n_panel_use, \ ( ctype* )c_use, rs_c, cs_c, "%4.1f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "packm_blk_var1: ap", *m_panel_max, *n_panel_max, \ ( ctype* )p_use, rs_p, cs_p, "%4.1f", "" ); \ fflush( stdout ); \ } \ bli_thread_obarrier( thread ); \ } \ else { \ if ( bli_thread_work_id( thread ) == 0 ) \ { \ printf( "packm_blk_var1: thread %lu (b = %p, bp = %p)\n", bli_thread_work_id( thread ), c_use, p_use ); \ fflush( stdout ); \ PASTEMAC(ch,fprintm)( stdout, "packm_blk_var1: b", *m_panel_use, *n_panel_use, \ ( ctype* )c_use, rs_c, cs_c, "%4.1f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "packm_blk_var1: bp", *m_panel_max, *n_panel_max, \ ( ctype* )p_use, rs_p, cs_p, "%4.1f", "" ); \ fflush( stdout ); \ } \ bli_thread_obarrier( thread ); \ if ( bli_thread_work_id( thread ) == 1 ) \ { \ printf( "packm_blk_var1: thread %lu (b = %p, bp = %p)\n", bli_thread_work_id( thread ), c_use, p_use ); \ fflush( stdout ); \ PASTEMAC(ch,fprintm)( stdout, "packm_blk_var1: b", *m_panel_use, *n_panel_use, \ ( ctype* )c_use, rs_c, cs_c, "%4.1f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "packm_blk_var1: bp", *m_panel_max, *n_panel_max, \ ( ctype* )p_use, rs_p, cs_p, "%4.1f", "" ); \ fflush( stdout ); \ } \ bli_thread_obarrier( thread ); \ } \ */ /* if ( bli_is_4mi_packed( schema ) ) { \ printf( "packm_var2: is_p_use = %lu\n", is_p_use ); \ if ( col_stored ) { \ if ( 0 ) \ PASTEMAC(chr,fprintm)( stdout, "packm_var2: a_r", *m_panel_use, *n_panel_use, \ ( ctype_r* )c_use, 2*rs_c, 2*cs_c, "%4.1f", "" ); \ PASTEMAC(chr,fprintm)( stdout, "packm_var2: ap_r", *m_panel_max, *n_panel_max, \ ( ctype_r* )p_use, rs_p, cs_p, "%4.1f", "" ); \ PASTEMAC(chr,fprintm)( stdout, "packm_var2: ap_i", *m_panel_max, *n_panel_max, \ ( ctype_r* )p_use + is_p_use, rs_p, cs_p, "%4.1f", "" ); \ } \ if ( row_stored ) { \ if ( 0 ) \ PASTEMAC(chr,fprintm)( stdout, "packm_var2: b_r", *m_panel_use, *n_panel_use, \ ( ctype_r* )c_use, 2*rs_c, 2*cs_c, "%4.1f", "" ); \ PASTEMAC(chr,fprintm)( stdout, "packm_var2: bp_r", *m_panel_max, *n_panel_max, \ ( ctype_r* )p_use, rs_p, cs_p, "%4.1f", "" ); \ PASTEMAC(chr,fprintm)( stdout, "packm_var2: bp_i", *m_panel_max, *n_panel_max, \ ( ctype_r* )p_use + is_p_use, rs_p, cs_p, "%4.1f", "" ); \ } \ } \ */ /* PASTEMAC(chr,fprintm)( stdout, "packm_var2: bp_rpi", *m_panel_max, *n_panel_max, \ ( ctype_r* )p_use, rs_p, cs_p, "%4.1f", "" ); \ */ /* if ( row_stored ) { \ PASTEMAC(chr,fprintm)( stdout, "packm_var2: b_r", *m_panel_max, *n_panel_max, \ ( ctype_r* )c_use, 2*rs_c, 2*cs_c, "%4.1f", "" ); \ PASTEMAC(chr,fprintm)( stdout, "packm_var2: b_i", *m_panel_max, *n_panel_max, \ (( ctype_r* )c_use)+rs_c, 2*rs_c, 2*cs_c, "%4.1f", "" ); \ PASTEMAC(chr,fprintm)( stdout, "packm_var2: bp_r", *m_panel_max, *n_panel_max, \ ( ctype_r* )p_use, rs_p, cs_p, "%4.1f", "" ); \ inc_t is_b = rs_p * *m_panel_max; \ PASTEMAC(chr,fprintm)( stdout, "packm_var2: bp_i", *m_panel_max, *n_panel_max, \ ( ctype_r* )p_use + is_b, rs_p, cs_p, "%4.1f", "" ); \ } \ */ /* if ( col_stored ) { \ PASTEMAC(chr,fprintm)( stdout, "packm_var2: a_r", *m_panel_max, *n_panel_max, \ ( ctype_r* )c_use, 2*rs_c, 2*cs_c, "%4.1f", "" ); \ PASTEMAC(chr,fprintm)( stdout, "packm_var2: a_i", *m_panel_max, *n_panel_max, \ (( ctype_r* )c_use)+rs_c, 2*rs_c, 2*cs_c, "%4.1f", "" ); \ PASTEMAC(chr,fprintm)( stdout, "packm_var2: ap_r", *m_panel_max, *n_panel_max, \ ( ctype_r* )p_use, rs_p, cs_p, "%4.1f", "" ); \ PASTEMAC(chr,fprintm)( stdout, "packm_var2: ap_i", *m_panel_max, *n_panel_max, \ ( ctype_r* )p_use + p_inc, rs_p, cs_p, "%4.1f", "" ); \ } \ */ blis-0.6.1/frame/3/bli_l3_sup_packm_var.h000066400000000000000000000045201360743507500201450ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-like interfaces to the variants. // #undef GENTPROT #define GENTPROT( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ trans_t transc, \ pack_t schema, \ dim_t m, \ dim_t n, \ dim_t m_max, \ dim_t n_max, \ ctype* restrict kappa, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ ctype* restrict p, inc_t rs_p, inc_t cs_p, \ dim_t pd_p, inc_t ps_p, \ cntx_t* restrict cntx, \ thrinfo_t* restrict thread \ ); INSERT_GENTPROT_BASIC0( packm_sup_var1 ) blis-0.6.1/frame/3/bli_l3_sup_ref.c000066400000000000000000000071621360743507500167560ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" err_t bli_gemmsup_ref ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm ) { // This function implements the default gemmsup handler. If you are a // BLIS developer and wish to use a different gemmsup handler, please // register a different function pointer in the context in your // sub-configuration's bli_cntx_init_*() function. // Check parameters. if ( bli_error_checking_is_enabled() ) bli_gemm_check( alpha, a, b, beta, c, cntx ); #if 0 // FGVZ: Will this be needed for constructing thrinfo_t's (recall: the // sba needs to be attached to the rntm; see below)? Or will those nodes // just be created "locally," in an exposed manner? // Parse and interpret the contents of the rntm_t object to properly // set the ways of parallelism for each loop, and then make any // additional modifications necessary for the current operation. bli_rntm_set_ways_for_op ( BLIS_GEMM, BLIS_LEFT, // ignored for gemm/hemm/symm bli_obj_length( &c_local ), bli_obj_width( &c_local ), bli_obj_width( &a_local ), rntm ); // FGVZ: the sba needs to be attached to the rntm. But it needs // to be done in the thread region, since it needs a thread id. //bli_sba_rntm_set_pool( tid, array, rntm_p ); #endif #if 0 printf( "rntm.pack_a = %d\n", ( int )bli_rntm_pack_a( rntm ) ); printf( "rntm.pack_b = %d\n", ( int )bli_rntm_pack_b( rntm ) ); #endif //bli_rntm_set_pack_a( 0, rntm ); //bli_rntm_set_pack_b( 0, rntm ); // May not need these here since packm_sup infers the schemas based // on the stor3_t id. (This would also mean that they don't need to // be passed into the thread decorator below.) //pack_t schema_a = BLIS_PACKED_ROW_PANELS; //pack_t schema_b = BLIS_PACKED_COL_PANELS; return bli_l3_sup_thread_decorator ( bli_gemmsup_int, BLIS_GEMM, // operation family id //schema_a, //schema_b, alpha, a, b, beta, c, cntx, rntm ); } blis-0.6.1/frame/3/bli_l3_sup_ref.h000066400000000000000000000034641360743507500167640ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ err_t bli_gemmsup_ref ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm ); blis-0.6.1/frame/3/bli_l3_sup_var12.c000066400000000000000000000536001360743507500171330ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T gemmsup_fp typedef void (*FUNCPTR_T) ( conj_t conja, conj_t conjb, dim_t m, dim_t n, dim_t k, void* restrict alpha, void* restrict a, inc_t rs_a, inc_t cs_a, void* restrict b, inc_t rs_b, inc_t cs_b, void* restrict beta, void* restrict c, inc_t rs_c, inc_t cs_c, stor3_t eff_id, cntx_t* restrict cntx, rntm_t* restrict rntm ); #if 0 // // -- var2 --------------------------------------------------------------------- // static FUNCPTR_T GENARRAY(ftypes_var2,gemmsup_ref_var2); void bli_gemmsup_ref_var2 ( trans_t trans, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, stor3_t eff_id, cntx_t* cntx, rntm_t* rntm ) { #if 0 obj_t at, bt; bli_obj_alias_to( a, &at ); bli_obj_alias_to( b, &bt ); // Induce transpositions on A and/or B if either object is marked for // transposition. We can induce "fast" transpositions since they objects // are guaranteed to not have structure or be packed. if ( bli_obj_has_trans( &at ) ) { bli_obj_induce_fast_trans( &at ); } if ( bli_obj_has_trans( &bt ) ) { bli_obj_induce_fast_trans( &bt ); } const num_t dt_exec = bli_obj_dt( c ); const conj_t conja = bli_obj_conj_status( a ); const conj_t conjb = bli_obj_conj_status( b ); const dim_t m = bli_obj_length( c ); const dim_t n = bli_obj_width( c ); const dim_t k = bli_obj_width( &at ); void* restrict buf_a = bli_obj_buffer_at_off( &at ); const inc_t rs_a = bli_obj_row_stride( &at ); const inc_t cs_a = bli_obj_col_stride( &at ); void* restrict buf_b = bli_obj_buffer_at_off( &bt ); const inc_t rs_b = bli_obj_row_stride( &bt ); const inc_t cs_b = bli_obj_col_stride( &bt ); void* restrict buf_c = bli_obj_buffer_at_off( c ); const inc_t rs_c = bli_obj_row_stride( c ); const inc_t cs_c = bli_obj_col_stride( c ); void* restrict buf_alpha = bli_obj_buffer_for_1x1( dt_exec, alpha ); void* restrict buf_beta = bli_obj_buffer_for_1x1( dt_exec, beta ); #else const num_t dt_exec = bli_obj_dt( c ); const conj_t conja = bli_obj_conj_status( a ); const conj_t conjb = bli_obj_conj_status( b ); const dim_t m = bli_obj_length( c ); const dim_t n = bli_obj_width( c ); dim_t k; void* restrict buf_a = bli_obj_buffer_at_off( a ); inc_t rs_a; inc_t cs_a; void* restrict buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b; inc_t cs_b; if ( bli_obj_has_notrans( a ) ) { k = bli_obj_width( a ); rs_a = bli_obj_row_stride( a ); cs_a = bli_obj_col_stride( a ); } else // if ( bli_obj_has_trans( a ) ) { // Assign the variables with an implicit transposition. k = bli_obj_length( a ); rs_a = bli_obj_col_stride( a ); cs_a = bli_obj_row_stride( a ); } if ( bli_obj_has_notrans( b ) ) { rs_b = bli_obj_row_stride( b ); cs_b = bli_obj_col_stride( b ); } else // if ( bli_obj_has_trans( b ) ) { // Assign the variables with an implicit transposition. rs_b = bli_obj_col_stride( b ); cs_b = bli_obj_row_stride( b ); } void* restrict buf_c = bli_obj_buffer_at_off( c ); const inc_t rs_c = bli_obj_row_stride( c ); const inc_t cs_c = bli_obj_col_stride( c ); void* restrict buf_alpha = bli_obj_buffer_for_1x1( dt_exec, alpha ); void* restrict buf_beta = bli_obj_buffer_for_1x1( dt_exec, beta ); #endif // Index into the type combination array to extract the correct // function pointer. FUNCPTR_T f = ftypes_var2[dt_exec]; // Invoke the function. f ( conja, conjb, m, n, k, buf_alpha, buf_a, rs_a, cs_a, buf_b, rs_b, cs_b, buf_beta, buf_c, rs_c, cs_c, eff_id, cntx, rntm ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ conj_t conja, \ conj_t conjb, \ dim_t m, \ dim_t n, \ dim_t k, \ void* restrict alpha, \ void* restrict a, inc_t rs_a, inc_t cs_a, \ void* restrict b, inc_t rs_b, inc_t cs_b, \ void* restrict beta, \ void* restrict c, inc_t rs_c, inc_t cs_c, \ stor3_t eff_id, \ cntx_t* restrict cntx, \ rntm_t* restrict rntm \ ) \ { \ /* If any dimension is zero, return immediately. */ \ if ( bli_zero_dim3( m, n, k ) ) return; \ \ /* If alpha is zero, scale by beta and return. */ \ if ( PASTEMAC(ch,eq0)( *(( ctype* )alpha) ) ) \ { \ PASTEMAC(ch,scalm) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m, n, \ beta, \ c, rs_c, cs_c \ ); \ return; \ } \ \ const num_t dt = PASTEMAC(ch,type); \ \ /* Query the context for various blocksizes. */ \ const dim_t NC = bli_cntx_get_l3_sup_blksz_def_dt( dt, BLIS_NC, cntx ); \ const dim_t KC = bli_cntx_get_l3_sup_blksz_def_dt( dt, BLIS_KC, cntx ); \ const dim_t MC = bli_cntx_get_l3_sup_blksz_def_dt( dt, BLIS_MC, cntx ); \ const dim_t NR = bli_cntx_get_l3_sup_blksz_def_dt( dt, BLIS_NR, cntx ); \ const dim_t MR = bli_cntx_get_l3_sup_blksz_def_dt( dt, BLIS_MR, cntx ); \ \ /* Compute partitioning step values for each matrix of each loop. */ \ const inc_t jcstep_c = cs_c * NC; \ const inc_t jcstep_b = cs_b * NC; \ \ const inc_t pcstep_a = cs_a * KC; \ const inc_t pcstep_b = rs_b * KC; \ \ const inc_t icstep_c = rs_c * MC; \ const inc_t icstep_a = rs_a * MC; \ \ const inc_t jrstep_c = cs_c * NR; \ const inc_t jrstep_b = cs_b * NR; \ \ const inc_t irstep_c = rs_c * MR; \ const inc_t irstep_a = rs_a * MR; \ \ /* Query a stor3_t enum value to characterize the problem. Examples: BLIS_RRR, BLIS_RRC, BLIS_RCR, BLIS_RCC, etc. NOTE: If any matrix is general-stored, we use the all-purpose sup microkernel corresponding to the stor3_t enum value BLIS_XXX. */ \ const stor3_t stor_id = bli_stor3_from_strides( rs_c, cs_c, \ rs_a, cs_a, rs_b, cs_b ); \ \ /* Query the context for the sup microkernel address and cast it to its function pointer type. */ \ PASTECH(ch,gemmsup_ker_ft) \ gemmsup_ker = bli_cntx_get_l3_sup_ker_dt( dt, stor_id, cntx ); \ \ ctype* restrict a_00 = a; \ ctype* restrict b_00 = b; \ ctype* restrict c_00 = c; \ ctype* restrict alpha_cast = alpha; \ ctype* restrict beta_cast = beta; \ \ ctype* restrict one = PASTEMAC(ch,1); \ \ auxinfo_t aux; \ \ /* Compute number of primary and leftover components of the outer dimensions. NOTE: Functionally speaking, we compute jc_iter as: jc_iter = n / NC; if ( jc_left ) ++jc_iter; However, this is implemented as: jc_iter = ( n + NC - 1 ) / NC; This avoids a branch at the cost of two additional integer instructions. The pc_iter, mc_iter, nr_iter, and mr_iter variables are computed in similar manner. */ \ const dim_t jc_iter = ( n + NC - 1 ) / NC; \ const dim_t jc_left = n % NC; \ \ const dim_t pc_iter = ( k + KC - 1 ) / KC; \ const dim_t pc_left = k % KC; \ \ const dim_t ic_iter = ( m + MC - 1 ) / MC; \ const dim_t ic_left = m % MC; \ \ const dim_t jc_inc = 1; \ const dim_t pc_inc = 1; \ const dim_t ic_inc = 1; \ const dim_t jr_inc = 1; \ const dim_t ir_inc = 1; \ \ /* Loop over the n dimension (NC rows/columns at a time). */ \ for ( dim_t jj = 0; jj < jc_iter; jj += jc_inc ) \ { \ const dim_t nc_cur = ( bli_is_not_edge_f( jj, jc_iter, jc_left ) ? NC : jc_left ); \ \ ctype* restrict b_jc = b_00 + jj * jcstep_b; \ ctype* restrict c_jc = c_00 + jj * jcstep_c; \ \ const dim_t jr_iter = ( nc_cur + NR - 1 ) / NR; \ const dim_t jr_left = nc_cur % NR; \ \ /* Loop over the k dimension (KC rows/columns at a time). */ \ for ( dim_t pp = 0; pp < pc_iter; pp += pc_inc ) \ { \ const dim_t kc_cur = ( bli_is_not_edge_f( pp, pc_iter, pc_left ) ? KC : pc_left ); \ \ ctype* restrict a_pc = a_00 + pp * pcstep_a; \ ctype* restrict b_pc = b_jc + pp * pcstep_b; \ \ /* Only apply beta to the first iteration of the pc loop. */ \ ctype* restrict beta_use = ( pp == 0 ? beta_cast : one ); \ \ /* Loop over the m dimension (MC rows at a time). */ \ for ( dim_t ii = 0; ii < ic_iter; ii += ic_inc ) \ { \ const dim_t mc_cur = ( bli_is_not_edge_f( ii, ic_iter, ic_left ) ? MC : ic_left ); \ \ ctype* restrict a_ic = a_pc + ii * icstep_a; \ ctype* restrict c_ic = c_jc + ii * icstep_c; \ \ const dim_t ir_iter = ( mc_cur + MR - 1 ) / MR; \ const dim_t ir_left = mc_cur % MR; \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( dim_t j = 0; j < jr_iter; j += jr_inc ) \ { \ const dim_t nr_cur = ( bli_is_not_edge_f( j, jr_iter, jr_left ) ? NR : jr_left ); \ \ ctype* restrict b_jr = b_pc + j * jrstep_b; \ ctype* restrict c_jr = c_ic + j * jrstep_c; \ \ /* ctype* restrict b2 = b_jr; \ */ \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( dim_t i = 0; i < ir_iter; i += ir_inc ) \ { \ const dim_t mr_cur = ( bli_is_not_edge_f( i, ir_iter, ir_left ) ? MR : ir_left ); \ \ ctype* restrict a_ir = a_ic + i * irstep_a; \ ctype* restrict c_ir = c_jr + i * irstep_c; \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ /* ctype* restrict a2 = bli_gemm_get_next_a_upanel( a_ir, irstep_a, ir_inc ); \ if ( bli_is_last_iter( i, ir_iter, 0, 1 ) ) \ { \ a2 = a_00; \ b2 = bli_gemm_get_next_b_upanel( b_jr, jrstep_b, jr_inc ); \ if ( bli_is_last_iter( j, jr_iter, 0, 1 ) ) \ b2 = b_00; \ } \ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ */ \ \ /* Invoke the gemmsup micro-kernel. */ \ gemmsup_ker \ ( \ conja, \ conjb, \ mr_cur, \ nr_cur, \ kc_cur, \ alpha_cast, \ a_ir, rs_a, cs_a, \ b_jr, rs_b, cs_b, \ beta_use, \ c_ir, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ } \ } \ } \ } \ \ /* PASTEMAC(ch,fprintm)( stdout, "gemmsup_ref_var2: b1", kc_cur, nr_cur, b_jr, rs_b, cs_b, "%4.1f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "gemmsup_ref_var2: a1", mr_cur, kc_cur, a_ir, rs_a, cs_a, "%4.1f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "gemmsup_ref_var2: c ", mr_cur, nr_cur, c_ir, rs_c, cs_c, "%4.1f", "" ); \ */ \ } INSERT_GENTFUNC_BASIC0( gemmsup_ref_var2 ) // // -- var1 --------------------------------------------------------------------- // static FUNCPTR_T GENARRAY(ftypes_var1,gemmsup_ref_var1); void bli_gemmsup_ref_var1 ( trans_t trans, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, stor3_t eff_id, cntx_t* cntx, rntm_t* rntm ) { #if 0 obj_t at, bt; bli_obj_alias_to( a, &at ); bli_obj_alias_to( b, &bt ); // Induce transpositions on A and/or B if either object is marked for // transposition. We can induce "fast" transpositions since they objects // are guaranteed to not have structure or be packed. if ( bli_obj_has_trans( &at ) ) { bli_obj_induce_fast_trans( &at ); } if ( bli_obj_has_trans( &bt ) ) { bli_obj_induce_fast_trans( &bt ); } const num_t dt_exec = bli_obj_dt( c ); const conj_t conja = bli_obj_conj_status( a ); const conj_t conjb = bli_obj_conj_status( b ); const dim_t m = bli_obj_length( c ); const dim_t n = bli_obj_width( c ); const dim_t k = bli_obj_width( &at ); void* restrict buf_a = bli_obj_buffer_at_off( &at ); const inc_t rs_a = bli_obj_row_stride( &at ); const inc_t cs_a = bli_obj_col_stride( &at ); void* restrict buf_b = bli_obj_buffer_at_off( &bt ); const inc_t rs_b = bli_obj_row_stride( &bt ); const inc_t cs_b = bli_obj_col_stride( &bt ); void* restrict buf_c = bli_obj_buffer_at_off( c ); const inc_t rs_c = bli_obj_row_stride( c ); const inc_t cs_c = bli_obj_col_stride( c ); void* restrict buf_alpha = bli_obj_buffer_for_1x1( dt_exec, alpha ); void* restrict buf_beta = bli_obj_buffer_for_1x1( dt_exec, beta ); #else const num_t dt_exec = bli_obj_dt( c ); const conj_t conja = bli_obj_conj_status( a ); const conj_t conjb = bli_obj_conj_status( b ); const dim_t m = bli_obj_length( c ); const dim_t n = bli_obj_width( c ); dim_t k; void* restrict buf_a = bli_obj_buffer_at_off( a ); inc_t rs_a; inc_t cs_a; void* restrict buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b; inc_t cs_b; if ( bli_obj_has_notrans( a ) ) { k = bli_obj_width( a ); rs_a = bli_obj_row_stride( a ); cs_a = bli_obj_col_stride( a ); } else // if ( bli_obj_has_trans( a ) ) { // Assign the variables with an implicit transposition. k = bli_obj_length( a ); rs_a = bli_obj_col_stride( a ); cs_a = bli_obj_row_stride( a ); } if ( bli_obj_has_notrans( b ) ) { rs_b = bli_obj_row_stride( b ); cs_b = bli_obj_col_stride( b ); } else // if ( bli_obj_has_trans( b ) ) { // Assign the variables with an implicit transposition. rs_b = bli_obj_col_stride( b ); cs_b = bli_obj_row_stride( b ); } void* restrict buf_c = bli_obj_buffer_at_off( c ); const inc_t rs_c = bli_obj_row_stride( c ); const inc_t cs_c = bli_obj_col_stride( c ); void* restrict buf_alpha = bli_obj_buffer_for_1x1( dt_exec, alpha ); void* restrict buf_beta = bli_obj_buffer_for_1x1( dt_exec, beta ); #endif // Index into the type combination array to extract the correct // function pointer. FUNCPTR_T f = ftypes_var1[dt_exec]; // Invoke the function. f ( conja, conjb, m, n, k, buf_alpha, buf_a, rs_a, cs_a, buf_b, rs_b, cs_b, buf_beta, buf_c, rs_c, cs_c, eff_id, cntx, rntm ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ conj_t conja, \ conj_t conjb, \ dim_t m, \ dim_t n, \ dim_t k, \ void* restrict alpha, \ void* restrict a, inc_t rs_a, inc_t cs_a, \ void* restrict b, inc_t rs_b, inc_t cs_b, \ void* restrict beta, \ void* restrict c, inc_t rs_c, inc_t cs_c, \ stor3_t eff_id, \ cntx_t* restrict cntx, \ rntm_t* restrict rntm \ ) \ { \ /* If any dimension is zero, return immediately. */ \ if ( bli_zero_dim3( m, n, k ) ) return; \ \ /* If alpha is zero, scale by beta and return. */ \ if ( PASTEMAC(ch,eq0)( *(( ctype* )alpha) ) ) \ { \ PASTEMAC(ch,scalm) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m, n, \ beta, \ c, rs_c, cs_c \ ); \ return; \ } \ \ const num_t dt = PASTEMAC(ch,type); \ \ /* Query the context for various blocksizes. */ \ const dim_t NC0 = bli_cntx_get_l3_sup_blksz_def_dt( dt, BLIS_NC, cntx ); \ const dim_t KC = bli_cntx_get_l3_sup_blksz_def_dt( dt, BLIS_KC, cntx ); \ const dim_t MC0 = bli_cntx_get_l3_sup_blksz_def_dt( dt, BLIS_MC, cntx ); \ const dim_t NR = bli_cntx_get_l3_sup_blksz_def_dt( dt, BLIS_NR, cntx ); \ const dim_t MR = bli_cntx_get_l3_sup_blksz_def_dt( dt, BLIS_MR, cntx ); \ \ /* Nudge NC up to a multiple of MR and MC up to a multiple of NR. */ \ const dim_t NC = bli_align_dim_to_mult( NC0, MR ); \ const dim_t MC = bli_align_dim_to_mult( MC0, NR ); \ \ /* Compute partitioning step values for each matrix of each loop. */ \ const inc_t jcstep_c = rs_c * NC; \ const inc_t jcstep_a = rs_a * NC; \ \ const inc_t pcstep_a = cs_a * KC; \ const inc_t pcstep_b = rs_b * KC; \ \ const inc_t icstep_c = cs_c * MC; \ const inc_t icstep_b = cs_b * MC; \ \ const inc_t jrstep_c = rs_c * MR; \ const inc_t jrstep_a = rs_a * MR; \ \ const inc_t irstep_c = cs_c * NR; \ const inc_t irstep_b = cs_b * NR; \ \ /* Query a stor3_t enum value to characterize the problem. Examples: BLIS_RRR, BLIS_RRC, BLIS_RCR, BLIS_RCC, etc. NOTE: If any matrix is general-stored, we use the all-purpose sup microkernel corresponding to the stor3_t enum value BLIS_XXX. */ \ const stor3_t stor_id = bli_stor3_from_strides( rs_c, cs_c, \ rs_a, cs_a, rs_b, cs_b ); \ \ /* Query the context for the sup microkernel address and cast it to its function pointer type. */ \ PASTECH(ch,gemmsup_ker_ft) \ gemmsup_ker = bli_cntx_get_l3_sup_ker_dt( dt, stor_id, cntx ); \ \ ctype* restrict a_00 = a; \ ctype* restrict b_00 = b; \ ctype* restrict c_00 = c; \ ctype* restrict alpha_cast = alpha; \ ctype* restrict beta_cast = beta; \ \ ctype* restrict one = PASTEMAC(ch,1); \ \ auxinfo_t aux; \ \ /* Compute number of primary and leftover components of the outer dimensions. NOTE: Functionally speaking, we compute jc_iter as: jc_iter = m / NC; if ( jc_left ) ++jc_iter; However, this is implemented as: jc_iter = ( m + NC - 1 ) / NC; This avoids a branch at the cost of two additional integer instructions. The pc_iter, mc_iter, nr_iter, and mr_iter variables are computed in similar manner. */ \ const dim_t jc_iter = ( m + NC - 1 ) / NC; \ const dim_t jc_left = m % NC; \ \ const dim_t pc_iter = ( k + KC - 1 ) / KC; \ const dim_t pc_left = k % KC; \ \ const dim_t ic_iter = ( n + MC - 1 ) / MC; \ const dim_t ic_left = n % MC; \ \ const dim_t jc_inc = 1; \ const dim_t pc_inc = 1; \ const dim_t ic_inc = 1; \ const dim_t jr_inc = 1; \ const dim_t ir_inc = 1; \ \ /* Loop over the m dimension (NC rows/columns at a time). */ \ for ( dim_t jj = 0; jj < jc_iter; jj += jc_inc ) \ { \ const dim_t nc_cur = ( bli_is_not_edge_f( jj, jc_iter, jc_left ) ? NC : jc_left ); \ \ ctype* restrict a_jc = a_00 + jj * jcstep_a; \ ctype* restrict c_jc = c_00 + jj * jcstep_c; \ \ const dim_t jr_iter = ( nc_cur + MR - 1 ) / MR; \ const dim_t jr_left = nc_cur % MR; \ \ /* Loop over the k dimension (KC rows/columns at a time). */ \ for ( dim_t pp = 0; pp < pc_iter; pp += pc_inc ) \ { \ const dim_t kc_cur = ( bli_is_not_edge_f( pp, pc_iter, pc_left ) ? KC : pc_left ); \ \ ctype* restrict a_pc = a_jc + pp * pcstep_a; \ ctype* restrict b_pc = b_00 + pp * pcstep_b; \ \ /* Only apply beta to the first iteration of the pc loop. */ \ ctype* restrict beta_use = ( pp == 0 ? beta_cast : one ); \ \ /* Loop over the n dimension (MC rows at a time). */ \ for ( dim_t ii = 0; ii < ic_iter; ii += ic_inc ) \ { \ const dim_t mc_cur = ( bli_is_not_edge_f( ii, ic_iter, ic_left ) ? MC : ic_left ); \ \ ctype* restrict b_ic = b_pc + ii * icstep_b; \ ctype* restrict c_ic = c_jc + ii * icstep_c; \ \ const dim_t ir_iter = ( mc_cur + NR - 1 ) / NR; \ const dim_t ir_left = mc_cur % NR; \ \ /* Loop over the m dimension (NR columns at a time). */ \ for ( dim_t j = 0; j < jr_iter; j += jr_inc ) \ { \ const dim_t nr_cur = ( bli_is_not_edge_f( j, jr_iter, jr_left ) ? NR : jr_left ); \ \ ctype* restrict a_jr = a_pc + j * jrstep_a; \ ctype* restrict c_jr = c_ic + j * jrstep_c; \ \ /* Loop over the n dimension (MR rows at a time). */ \ for ( dim_t i = 0; i < ir_iter; i += ir_inc ) \ { \ const dim_t mr_cur = ( bli_is_not_edge_f( i, ir_iter, ir_left ) ? MR : ir_left ); \ \ ctype* restrict b_ir = b_ic + i * irstep_b; \ ctype* restrict c_ir = c_jr + i * irstep_c; \ \ /* Invoke the gemmsup micro-kernel. */ \ gemmsup_ker \ ( \ conja, \ conjb, \ mr_cur, \ nr_cur, \ kc_cur, \ alpha_cast, \ a_jr, rs_a, cs_a, \ b_ir, rs_b, cs_b, \ beta_use, \ c_ir, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ } \ } \ } \ } \ \ /* PASTEMAC(ch,fprintm)( stdout, "gemmsup_ref_var2: b1", kc_cur, nr_cur, b_jr, rs_b, cs_b, "%4.1f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "gemmsup_ref_var2: a1", mr_cur, kc_cur, a_ir, rs_a, cs_a, "%4.1f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "gemmsup_ref_var2: c ", mr_cur, nr_cur, c_ir, rs_c, cs_c, "%4.1f", "" ); \ */ \ } INSERT_GENTFUNC_BASIC0( gemmsup_ref_var1 ) #endif blis-0.6.1/frame/3/bli_l3_sup_var1n2m.c000066400000000000000000001013621360743507500174650ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T gemmsup_fp typedef void (*FUNCPTR_T) ( bool_t packa, bool_t packb, conj_t conja, conj_t conjb, dim_t m, dim_t n, dim_t k, void* restrict alpha, void* restrict a, inc_t rs_a, inc_t cs_a, void* restrict b, inc_t rs_b, inc_t cs_b, void* restrict beta, void* restrict c, inc_t rs_c, inc_t cs_c, stor3_t eff_id, cntx_t* restrict cntx, rntm_t* restrict rntm, cntl_t* restrict cntl, thrinfo_t* restrict thread ); // // -- var1n -------------------------------------------------------------------- // static FUNCPTR_T GENARRAY(ftypes_var1n,gemmsup_ref_var1n); void bli_gemmsup_ref_var1n ( trans_t trans, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, stor3_t eff_id, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { #if 0 obj_t at, bt; bli_obj_alias_to( a, &at ); bli_obj_alias_to( b, &bt ); // Induce transpositions on A and/or B if either object is marked for // transposition. We can induce "fast" transpositions since they objects // are guaranteed to not have structure or be packed. if ( bli_obj_has_trans( &at ) ) { bli_obj_induce_fast_trans( &at ); } if ( bli_obj_has_trans( &bt ) ) { bli_obj_induce_fast_trans( &bt ); } const num_t dt_exec = bli_obj_dt( c ); const conj_t conja = bli_obj_conj_status( a ); const conj_t conjb = bli_obj_conj_status( b ); const dim_t m = bli_obj_length( c ); const dim_t n = bli_obj_width( c ); const dim_t k = bli_obj_width( &at ); void* restrict buf_a = bli_obj_buffer_at_off( &at ); const inc_t rs_a = bli_obj_row_stride( &at ); const inc_t cs_a = bli_obj_col_stride( &at ); void* restrict buf_b = bli_obj_buffer_at_off( &bt ); const inc_t rs_b = bli_obj_row_stride( &bt ); const inc_t cs_b = bli_obj_col_stride( &bt ); void* restrict buf_c = bli_obj_buffer_at_off( c ); const inc_t rs_c = bli_obj_row_stride( c ); const inc_t cs_c = bli_obj_col_stride( c ); void* restrict buf_alpha = bli_obj_buffer_for_1x1( dt_exec, alpha ); void* restrict buf_beta = bli_obj_buffer_for_1x1( dt_exec, beta ); #else const num_t dt_exec = bli_obj_dt( c ); const bool_t packa = bli_rntm_pack_a( rntm ); const bool_t packb = bli_rntm_pack_b( rntm ); const conj_t conja = bli_obj_conj_status( a ); const conj_t conjb = bli_obj_conj_status( b ); const dim_t m = bli_obj_length( c ); const dim_t n = bli_obj_width( c ); dim_t k; void* restrict buf_a = bli_obj_buffer_at_off( a ); inc_t rs_a; inc_t cs_a; void* restrict buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b; inc_t cs_b; if ( bli_obj_has_notrans( a ) ) { k = bli_obj_width( a ); rs_a = bli_obj_row_stride( a ); cs_a = bli_obj_col_stride( a ); } else // if ( bli_obj_has_trans( a ) ) { // Assign the variables with an implicit transposition. k = bli_obj_length( a ); rs_a = bli_obj_col_stride( a ); cs_a = bli_obj_row_stride( a ); } if ( bli_obj_has_notrans( b ) ) { rs_b = bli_obj_row_stride( b ); cs_b = bli_obj_col_stride( b ); } else // if ( bli_obj_has_trans( b ) ) { // Assign the variables with an implicit transposition. rs_b = bli_obj_col_stride( b ); cs_b = bli_obj_row_stride( b ); } void* restrict buf_c = bli_obj_buffer_at_off( c ); const inc_t rs_c = bli_obj_row_stride( c ); const inc_t cs_c = bli_obj_col_stride( c ); void* restrict buf_alpha = bli_obj_buffer_for_1x1( dt_exec, alpha ); void* restrict buf_beta = bli_obj_buffer_for_1x1( dt_exec, beta ); #endif // Index into the type combination array to extract the correct // function pointer. FUNCPTR_T f = ftypes_var1n[dt_exec]; if ( bli_is_notrans( trans ) ) { // Invoke the function. f ( packa, packb, conja, conjb, m, n, k, buf_alpha, buf_a, rs_a, cs_a, buf_b, rs_b, cs_b, buf_beta, buf_c, rs_c, cs_c, eff_id, cntx, rntm, cntl, thread ); } else { // Invoke the function (transposing the operation). f ( packb, packa, conjb, // swap the conj values. conja, n, // swap the m and n dimensions. m, k, buf_alpha, buf_b, cs_b, rs_b, // swap the positions of A and B. buf_a, cs_a, rs_a, // swap the strides of A and B. buf_beta, buf_c, cs_c, rs_c, // swap the strides of C. bli_stor3_trans( eff_id ), // transpose the stor3_t id. cntx, rntm, cntl, thread ); } } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ bool_t packa, \ bool_t packb, \ conj_t conja, \ conj_t conjb, \ dim_t m, \ dim_t n, \ dim_t k, \ void* restrict alpha, \ void* restrict a, inc_t rs_a, inc_t cs_a, \ void* restrict b, inc_t rs_b, inc_t cs_b, \ void* restrict beta, \ void* restrict c, inc_t rs_c, inc_t cs_c, \ stor3_t stor_id, \ cntx_t* restrict cntx, \ rntm_t* restrict rntm, \ cntl_t* restrict cntl, \ thrinfo_t* restrict thread \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ /* If m or n is zero, return immediately. */ \ if ( bli_zero_dim2( m, n ) ) return; \ \ /* If k < 1 or alpha is zero, scale by beta and return. */ \ if ( k < 1 || PASTEMAC(ch,eq0)( *(( ctype* )alpha) ) ) \ { \ PASTEMAC(ch,scalm) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m, n, \ beta, \ c, rs_c, cs_c \ ); \ return; \ } \ \ /* This transposition of the stor3_t id value is inherent to variant 1. The reason: we assume that variant 2 is the "main" variant. The consequence of this is that we assume that the millikernels that iterate over m are registered to the "primary" kernel group associated with the kernel IO preference; similarly, mkernels that iterate over n are assumed to be registered to the "non-primary" group associated with the ("non-primary") anti-preference. Note that this pattern holds regardless of whether the mkernel set has a row or column preference.) See bli_l3_sup_int.c for a higher-level view of how this choice is made. */ \ stor_id = bli_stor3_trans( stor_id ); \ \ /* Query the context for various blocksizes. */ \ const dim_t NR = bli_cntx_get_l3_sup_blksz_def_dt( dt, BLIS_NR, cntx ); \ const dim_t MR = bli_cntx_get_l3_sup_blksz_def_dt( dt, BLIS_MR, cntx ); \ const dim_t NC0 = bli_cntx_get_l3_sup_blksz_def_dt( dt, BLIS_NC, cntx ); \ const dim_t MC0 = bli_cntx_get_l3_sup_blksz_def_dt( dt, BLIS_MC, cntx ); \ const dim_t KC0 = bli_cntx_get_l3_sup_blksz_def_dt( dt, BLIS_KC, cntx ); \ \ dim_t KC; \ if ( FALSE ) KC = KC0; \ else if ( stor_id == BLIS_RRC || \ stor_id == BLIS_CRC ) KC = KC0; \ else if ( m <= MR && n <= NR ) KC = KC0; \ else if ( m <= 2*MR && n <= 2*NR ) KC = KC0 / 2; \ else if ( m <= 3*MR && n <= 3*NR ) KC = (( KC0 / 3 ) / 4 ) * 4; \ else if ( m <= 4*MR && n <= 4*NR ) KC = KC0 / 4; \ else KC = (( KC0 / 5 ) / 4 ) * 4; \ \ /* Nudge NC up to a multiple of MR and MC up to a multiple of NR. NOTE: This is unique to variant 1 (ie: not performed in variant 2) because MC % MR == 0 and NC % NR == 0 is already enforced at runtime. */ \ const dim_t NC = bli_align_dim_to_mult( NC0, MR ); \ const dim_t MC = bli_align_dim_to_mult( MC0, NR ); \ \ /* Query the maximum blocksize for MR, which implies a maximum blocksize extension for the final iteration. */ \ const dim_t MRM = bli_cntx_get_l3_sup_blksz_max_dt( dt, BLIS_MR, cntx ); \ const dim_t MRE = MRM - MR; \ \ /* Compute partitioning step values for each matrix of each loop. */ \ const inc_t jcstep_c = rs_c * NC; \ const inc_t jcstep_a = rs_a * NC; \ \ const inc_t pcstep_a = cs_a * KC; \ const inc_t pcstep_b = rs_b * KC; \ \ const inc_t icstep_c = cs_c * MC; \ const inc_t icstep_b = cs_b * MC; \ \ const inc_t jrstep_c = rs_c * MR; \ \ /* const inc_t jrstep_a = rs_a * MR; \ ( void )jrstep_a; \ */ \ \ /* const inc_t irstep_c = cs_c * NR; \ const inc_t irstep_b = cs_b * NR; \ */ \ \ /* Query the context for the sup microkernel address and cast it to its function pointer type. */ \ PASTECH(ch,gemmsup_ker_ft) \ gemmsup_ker = bli_cntx_get_l3_sup_ker_dt( dt, stor_id, cntx ); \ \ ctype* restrict a_00 = a; \ ctype* restrict b_00 = b; \ ctype* restrict c_00 = c; \ ctype* restrict alpha_cast = alpha; \ ctype* restrict beta_cast = beta; \ \ ctype* restrict one = PASTEMAC(ch,1); \ \ auxinfo_t aux; \ \ /* Compute number of primary and leftover components of the outer dimensions. NOTE: Functionally speaking, we compute jc_iter as: jc_iter = m / NC; if ( jc_left ) ++jc_iter; However, this is implemented as: jc_iter = ( m + NC - 1 ) / NC; This avoids a branch at the cost of two additional integer instructions. The pc_iter, mc_iter, nr_iter, and mr_iter variables are computed in similar manner. */ \ const dim_t jc_iter = ( m + NC - 1 ) / NC; \ const dim_t jc_left = m % NC; \ \ const dim_t pc_iter = ( k + KC - 1 ) / KC; \ const dim_t pc_left = k % KC; \ \ const dim_t ic_iter = ( n + MC - 1 ) / MC; \ const dim_t ic_left = n % MC; \ \ const dim_t jc_inc = 1; \ const dim_t pc_inc = 1; \ const dim_t ic_inc = 1; \ const dim_t jr_inc = 1; \ /* const dim_t ir_inc = 1; \ */ \ \ /* Initialize a mem_t entry for A and B. Strictly speaking, this is only needed for the matrix we will be packing (if any), but we do it unconditionally to be safe. An alternative way of initializing the mem_t entries is: bli_mem_clear( &mem_a ); \ bli_mem_clear( &mem_b ); \ */ \ mem_t mem_a = BLIS_MEM_INITIALIZER; \ mem_t mem_b = BLIS_MEM_INITIALIZER; \ \ /* Prepare the packing destination buffer. If packing is not requested for matrix B, this function will reduce to a no-op. */ \ PASTEMAC(ch,packm_sup_init_mem_a) \ ( \ packa, \ BLIS_BUFFER_FOR_B_PANEL, /* This algorithm packs matrix A to a "panel of B". */ \ stor_id, \ NC, KC, MR, /* Note this "panel of B" is NC x KC. */ \ cntx, \ rntm, \ &mem_a, \ thread \ ); \ \ /* Prepare the packing destination buffer. If packing is not requested for matrix B, this function will reduce to a no-op. */ \ PASTEMAC(ch,packm_sup_init_mem_b) \ ( \ packb, \ BLIS_BUFFER_FOR_A_BLOCK, /* This algorithm packs matrix B to a "block of A". */ \ stor_id, \ KC, MC, NR, /* Note this "block of A" is KC x MC. */ \ cntx, \ rntm, \ &mem_b, \ thread \ ); \ \ /* Loop over the m dimension (NC rows/columns at a time). */ \ for ( dim_t jj = 0; jj < jc_iter; jj += jc_inc ) \ { \ const dim_t nc_cur = ( bli_is_not_edge_f( jj, jc_iter, jc_left ) ? NC : jc_left ); \ \ ctype* restrict a_jc = a_00 + jj * jcstep_a; \ ctype* restrict c_jc = c_00 + jj * jcstep_c; \ \ dim_t jr_iter = ( nc_cur + MR - 1 ) / MR; \ dim_t jr_left = nc_cur % MR; \ \ /* An optimization: allow the last jr iteration to contain up to MRE rows of C and A. (If MRE > MR, the mkernel has agreed to handle these cases.) Note that this prevents us from declaring jr_iter and jr_left as const. NOTE: We forgo this optimization when packing A since packing an extended edge case is not yet supported. */ \ if ( !packa ) \ if ( MRE != 0 && 1 < jr_iter && jr_left != 0 && jr_left <= MRE ) \ { \ jr_iter--; jr_left += MR; \ } \ \ /* Loop over the k dimension (KC rows/columns at a time). */ \ for ( dim_t pp = 0; pp < pc_iter; pp += pc_inc ) \ { \ const dim_t kc_cur = ( bli_is_not_edge_f( pp, pc_iter, pc_left ) ? KC : pc_left ); \ \ ctype* restrict a_pc = a_jc + pp * pcstep_a; \ ctype* restrict b_pc = b_00 + pp * pcstep_b; \ \ /* Only apply beta to the first iteration of the pc loop. */ \ ctype* restrict beta_use = ( pp == 0 ? beta_cast : one ); \ \ ctype* a_use; \ inc_t rs_a_use, cs_a_use, ps_a_use; \ \ /* Determine the packing buffer and related parameters for matrix A. (If A will not be packed, then a_use will be set to point to a and the _a_use strides will be set accordingly.) Then call the packm sup variant chooser, which will call the appropriate implementation based on the schema deduced from the stor_id. */ \ PASTEMAC(ch,packm_sup_a) \ ( \ packa, \ stor_id, \ BLIS_NO_TRANSPOSE, \ nc_cur, kc_cur, MR, \ one, \ a_pc, rs_a, cs_a, \ &a_use, &rs_a_use, &cs_a_use, \ &ps_a_use, \ cntx, \ &mem_a, \ thread \ ); \ \ /* Alias a_use so that it's clear this is our current block of matrix B. */ \ ctype* restrict a_pc_use = a_use; \ \ /* We don't need to embed the panel stride of A within the auxinfo_t object because this variant iterates through A in the jr loop, which occurs here, within the macrokernel, not within the millikernel. */ \ /*bli_auxinfo_set_ps_a( ps_a_use, &aux );*/ \ \ /* Loop over the n dimension (MC rows at a time). */ \ for ( dim_t ii = 0; ii < ic_iter; ii += ic_inc ) \ { \ const dim_t mc_cur = ( bli_is_not_edge_f( ii, ic_iter, ic_left ) ? MC : ic_left ); \ \ ctype* restrict b_ic = b_pc + ii * icstep_b; \ ctype* restrict c_ic = c_jc + ii * icstep_c; \ \ /* const dim_t ir_iter = ( mc_cur + NR - 1 ) / NR; \ const dim_t ir_left = mc_cur % NR; \ */ \ \ ctype* b_use; \ inc_t rs_b_use, cs_b_use, ps_b_use; \ \ /* Determine the packing buffer and related parameters for matrix B. (If B will not be packed, then b_use will be set to point to b and the _b_use strides will be set accordingly.) Then call the packm sup variant chooser, which will call the appropriate implementation based on the schema deduced from the stor_id. NOTE: packing matrix B in this panel-block algorithm corresponds to packing matrix A in the block-panel algorithm. */ \ PASTEMAC(ch,packm_sup_b) \ ( \ packb, \ stor_id, \ BLIS_NO_TRANSPOSE, \ kc_cur, mc_cur, NR, \ one, \ b_ic, rs_b, cs_b, \ &b_use, &rs_b_use, &cs_b_use, \ &ps_b_use, \ cntx, \ &mem_b, \ thread \ ); \ \ /* Alias b_use so that it's clear this is our current block of matrix B. */ \ ctype* restrict b_ic_use = b_use; \ \ /* Embed the panel stride of B within the auxinfo_t object. The millikernel will query and use this to iterate through micropanels of B. */ \ bli_auxinfo_set_ps_b( ps_b_use, &aux ); \ \ \ /* Loop over the m dimension (NR columns at a time). */ \ for ( dim_t j = 0; j < jr_iter; j += jr_inc ) \ { \ const dim_t nr_cur = ( bli_is_not_edge_f( j, jr_iter, jr_left ) ? MR : jr_left ); \ \ /* ctype* restrict a_jr = a_pc + j * jrstep_a; \ */ \ ctype* restrict a_jr = a_pc_use + j * ps_a_use; \ ctype* restrict c_jr = c_ic + j * jrstep_c; \ \ /* Loop over the n dimension (MR rows at a time). */ \ { \ /* Invoke the gemmsup millikernel. */ \ gemmsup_ker \ ( \ conja, \ conjb, \ nr_cur, /* Notice: nr_cur <= MR. */ \ mc_cur, /* Recall: mc_cur partitions the n dimension! */ \ kc_cur, \ alpha_cast, \ a_jr, rs_a_use, cs_a_use, \ b_ic_use, rs_b_use, cs_b_use, \ beta_use, \ c_jr, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ } \ } \ } \ } \ \ /* Release any memory that was acquired for packing matrices A and B. */ \ PASTEMAC(ch,packm_sup_finalize_mem_a) \ ( \ packa, \ rntm, \ &mem_a, \ thread \ ); \ PASTEMAC(ch,packm_sup_finalize_mem_b) \ ( \ packb, \ rntm, \ &mem_b, \ thread \ ); \ \ /* PASTEMAC(ch,fprintm)( stdout, "gemmsup_ref_var2: b1", kc_cur, nr_cur, b_jr, rs_b, cs_b, "%4.1f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "gemmsup_ref_var2: a1", mr_cur, kc_cur, a_ir, rs_a, cs_a, "%4.1f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "gemmsup_ref_var2: c ", mr_cur, nr_cur, c_ir, rs_c, cs_c, "%4.1f", "" ); \ */ \ } INSERT_GENTFUNC_BASIC0( gemmsup_ref_var1n ) // // -- var2m -------------------------------------------------------------------- // static FUNCPTR_T GENARRAY(ftypes_var2m,gemmsup_ref_var2m); void bli_gemmsup_ref_var2m ( trans_t trans, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, stor3_t eff_id, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { #if 0 obj_t at, bt; bli_obj_alias_to( a, &at ); bli_obj_alias_to( b, &bt ); // Induce transpositions on A and/or B if either object is marked for // transposition. We can induce "fast" transpositions since they objects // are guaranteed to not have structure or be packed. if ( bli_obj_has_trans( &at ) ) { bli_obj_induce_fast_trans( &at ); } if ( bli_obj_has_trans( &bt ) ) { bli_obj_induce_fast_trans( &bt ); } const num_t dt_exec = bli_obj_dt( c ); const conj_t conja = bli_obj_conj_status( a ); const conj_t conjb = bli_obj_conj_status( b ); const dim_t m = bli_obj_length( c ); const dim_t n = bli_obj_width( c ); const dim_t k = bli_obj_width( &at ); void* restrict buf_a = bli_obj_buffer_at_off( &at ); const inc_t rs_a = bli_obj_row_stride( &at ); const inc_t cs_a = bli_obj_col_stride( &at ); void* restrict buf_b = bli_obj_buffer_at_off( &bt ); const inc_t rs_b = bli_obj_row_stride( &bt ); const inc_t cs_b = bli_obj_col_stride( &bt ); void* restrict buf_c = bli_obj_buffer_at_off( c ); const inc_t rs_c = bli_obj_row_stride( c ); const inc_t cs_c = bli_obj_col_stride( c ); void* restrict buf_alpha = bli_obj_buffer_for_1x1( dt_exec, alpha ); void* restrict buf_beta = bli_obj_buffer_for_1x1( dt_exec, beta ); #else const num_t dt_exec = bli_obj_dt( c ); const bool_t packa = bli_rntm_pack_a( rntm ); const bool_t packb = bli_rntm_pack_b( rntm ); const conj_t conja = bli_obj_conj_status( a ); const conj_t conjb = bli_obj_conj_status( b ); const dim_t m = bli_obj_length( c ); const dim_t n = bli_obj_width( c ); dim_t k; void* restrict buf_a = bli_obj_buffer_at_off( a ); inc_t rs_a; inc_t cs_a; void* restrict buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b; inc_t cs_b; if ( bli_obj_has_notrans( a ) ) { k = bli_obj_width( a ); rs_a = bli_obj_row_stride( a ); cs_a = bli_obj_col_stride( a ); } else // if ( bli_obj_has_trans( a ) ) { // Assign the variables with an implicit transposition. k = bli_obj_length( a ); rs_a = bli_obj_col_stride( a ); cs_a = bli_obj_row_stride( a ); } if ( bli_obj_has_notrans( b ) ) { rs_b = bli_obj_row_stride( b ); cs_b = bli_obj_col_stride( b ); } else // if ( bli_obj_has_trans( b ) ) { // Assign the variables with an implicit transposition. rs_b = bli_obj_col_stride( b ); cs_b = bli_obj_row_stride( b ); } void* restrict buf_c = bli_obj_buffer_at_off( c ); const inc_t rs_c = bli_obj_row_stride( c ); const inc_t cs_c = bli_obj_col_stride( c ); void* restrict buf_alpha = bli_obj_buffer_for_1x1( dt_exec, alpha ); void* restrict buf_beta = bli_obj_buffer_for_1x1( dt_exec, beta ); #endif // Index into the type combination array to extract the correct // function pointer. FUNCPTR_T f = ftypes_var2m[dt_exec]; if ( bli_is_notrans( trans ) ) { // Invoke the function. f ( packa, packb, conja, conjb, m, n, k, buf_alpha, buf_a, rs_a, cs_a, buf_b, rs_b, cs_b, buf_beta, buf_c, rs_c, cs_c, eff_id, cntx, rntm, cntl, thread ); } else { // Invoke the function (transposing the operation). f ( packb, // swap the pack values. packa, conjb, // swap the conj values. conja, n, // swap the m and n dimensions. m, k, buf_alpha, buf_b, cs_b, rs_b, // swap the positions of A and B. buf_a, cs_a, rs_a, // swap the strides of A and B. buf_beta, buf_c, cs_c, rs_c, // swap the strides of C. bli_stor3_trans( eff_id ), // transpose the stor3_t id. cntx, rntm, cntl, thread ); } } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ bool_t packa, \ bool_t packb, \ conj_t conja, \ conj_t conjb, \ dim_t m, \ dim_t n, \ dim_t k, \ void* restrict alpha, \ void* restrict a, inc_t rs_a, inc_t cs_a, \ void* restrict b, inc_t rs_b, inc_t cs_b, \ void* restrict beta, \ void* restrict c, inc_t rs_c, inc_t cs_c, \ stor3_t stor_id, \ cntx_t* restrict cntx, \ rntm_t* restrict rntm, \ cntl_t* restrict cntl, \ thrinfo_t* restrict thread \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ /* If m or n is zero, return immediately. */ \ if ( bli_zero_dim2( m, n ) ) return; \ \ /* If k < 1 or alpha is zero, scale by beta and return. */ \ if ( k < 1 || PASTEMAC(ch,eq0)( *(( ctype* )alpha) ) ) \ { \ PASTEMAC(ch,scalm) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m, n, \ beta, \ c, rs_c, cs_c \ ); \ return; \ } \ \ /* Query the context for various blocksizes. */ \ const dim_t NR = bli_cntx_get_l3_sup_blksz_def_dt( dt, BLIS_NR, cntx ); \ const dim_t MR = bli_cntx_get_l3_sup_blksz_def_dt( dt, BLIS_MR, cntx ); \ const dim_t NC = bli_cntx_get_l3_sup_blksz_def_dt( dt, BLIS_NC, cntx ); \ const dim_t MC = bli_cntx_get_l3_sup_blksz_def_dt( dt, BLIS_MC, cntx ); \ const dim_t KC0 = bli_cntx_get_l3_sup_blksz_def_dt( dt, BLIS_KC, cntx ); \ \ dim_t KC; \ if ( stor_id == BLIS_RRR || \ stor_id == BLIS_CCC ) KC = KC0; \ else if ( stor_id == BLIS_RRC || \ stor_id == BLIS_CRC ) KC = KC0; \ else if ( m <= MR && n <= NR ) KC = KC0; \ else if ( m <= 2*MR && n <= 2*NR ) KC = KC0 / 2; \ else if ( m <= 3*MR && n <= 3*NR ) KC = (( KC0 / 3 ) / 4 ) * 4; \ else if ( m <= 4*MR && n <= 4*NR ) KC = KC0 / 4; \ else KC = (( KC0 / 5 ) / 4 ) * 4; \ \ /* Query the maximum blocksize for NR, which implies a maximum blocksize extension for the final iteration. */ \ const dim_t NRM = bli_cntx_get_l3_sup_blksz_max_dt( dt, BLIS_NR, cntx ); \ const dim_t NRE = NRM - NR; \ \ /* Compute partitioning step values for each matrix of each loop. */ \ const inc_t jcstep_c = cs_c * NC; \ const inc_t jcstep_b = cs_b * NC; \ \ const inc_t pcstep_a = cs_a * KC; \ const inc_t pcstep_b = rs_b * KC; \ \ const inc_t icstep_c = rs_c * MC; \ const inc_t icstep_a = rs_a * MC; \ \ const inc_t jrstep_c = cs_c * NR; \ \ /* const inc_t jrstep_b = cs_b * NR; \ ( void )jrstep_b; \ */ \ \ /* const inc_t irstep_c = rs_c * MR; \ const inc_t irstep_a = rs_a * MR; \ */ \ \ /* Query the context for the sup microkernel address and cast it to its function pointer type. */ \ PASTECH(ch,gemmsup_ker_ft) \ gemmsup_ker = bli_cntx_get_l3_sup_ker_dt( dt, stor_id, cntx ); \ \ ctype* restrict a_00 = a; \ ctype* restrict b_00 = b; \ ctype* restrict c_00 = c; \ ctype* restrict alpha_cast = alpha; \ ctype* restrict beta_cast = beta; \ \ ctype* restrict one = PASTEMAC(ch,1); \ \ auxinfo_t aux; \ \ /* Compute number of primary and leftover components of the outer dimensions. NOTE: Functionally speaking, we compute jc_iter as: jc_iter = n / NC; if ( jc_left ) ++jc_iter; However, this is implemented as: jc_iter = ( n + NC - 1 ) / NC; This avoids a branch at the cost of two additional integer instructions. The pc_iter, mc_iter, nr_iter, and mr_iter variables are computed in similar manner. */ \ const dim_t jc_iter = ( n + NC - 1 ) / NC; \ const dim_t jc_left = n % NC; \ \ const dim_t pc_iter = ( k + KC - 1 ) / KC; \ const dim_t pc_left = k % KC; \ \ const dim_t ic_iter = ( m + MC - 1 ) / MC; \ const dim_t ic_left = m % MC; \ \ const dim_t jc_inc = 1; \ const dim_t pc_inc = 1; \ const dim_t ic_inc = 1; \ const dim_t jr_inc = 1; \ /* const dim_t ir_inc = 1; \ */ \ \ /* Initialize a mem_t entry for A and B. Strictly speaking, this is only needed for the matrix we will be packing (if any), but we do it unconditionally to be safe. An alternative way of initializing the mem_t entries is: bli_mem_clear( &mem_a ); \ bli_mem_clear( &mem_b ); \ */ \ mem_t mem_a = BLIS_MEM_INITIALIZER; \ mem_t mem_b = BLIS_MEM_INITIALIZER; \ \ /* Prepare the packing destination buffer. If packing is not requested for matrix A, this function will reduce to a no-op. */ \ PASTEMAC(ch,packm_sup_init_mem_a) \ ( \ packa, \ BLIS_BUFFER_FOR_A_BLOCK, /* This algorithm packs matrix A to a "block of A". */ \ stor_id, \ MC, KC, MR, /* Note this "block of A" is MC x KC. */ \ cntx, \ rntm, \ &mem_a, \ thread \ ); \ \ /* Prepare the packing destination buffer. If packing is not requested for matrix B, this function will reduce to a no-op. */ \ PASTEMAC(ch,packm_sup_init_mem_b) \ ( \ packb, \ BLIS_BUFFER_FOR_B_PANEL, /* This algorithm packs matrix B to a "panel of B". */ \ stor_id, \ KC, NC, NR, /* Note this "panel of B" is KC x NC. */ \ cntx, \ rntm, \ &mem_b, \ thread \ ); \ \ /* Loop over the n dimension (NC rows/columns at a time). */ \ for ( dim_t jj = 0; jj < jc_iter; jj += jc_inc ) \ { \ const dim_t nc_cur = ( bli_is_not_edge_f( jj, jc_iter, jc_left ) ? NC : jc_left ); \ \ ctype* restrict b_jc = b_00 + jj * jcstep_b; \ ctype* restrict c_jc = c_00 + jj * jcstep_c; \ \ dim_t jr_iter = ( nc_cur + NR - 1 ) / NR; \ dim_t jr_left = nc_cur % NR; \ \ /* An optimization: allow the last jr iteration to contain up to NRE columns of C and B. (If NRE > NR, the mkernel has agreed to handle these cases.) Note that this prevents us from declaring jr_iter and jr_left as const. NOTE: We forgo this optimization when packing B since packing an extended edge case is not yet supported. */ \ if ( !packb ) \ if ( NRE != 0 && 1 < jr_iter && jr_left != 0 && jr_left <= NRE ) \ { \ jr_iter--; jr_left += NR; \ } \ \ /* Loop over the k dimension (KC rows/columns at a time). */ \ for ( dim_t pp = 0; pp < pc_iter; pp += pc_inc ) \ { \ const dim_t kc_cur = ( bli_is_not_edge_f( pp, pc_iter, pc_left ) ? KC : pc_left ); \ \ ctype* restrict a_pc = a_00 + pp * pcstep_a; \ ctype* restrict b_pc = b_jc + pp * pcstep_b; \ \ /* Only apply beta to the first iteration of the pc loop. */ \ ctype* restrict beta_use = ( pp == 0 ? beta_cast : one ); \ \ ctype* b_use; \ inc_t rs_b_use, cs_b_use, ps_b_use; \ \ /* Determine the packing buffer and related parameters for matrix B. (If B will not be packed, then a_use will be set to point to b and the _b_use strides will be set accordingly.) Then call the packm sup variant chooser, which will call the appropriate implementation based on the schema deduced from the stor_id. */ \ PASTEMAC(ch,packm_sup_b) \ ( \ packb, \ stor_id, \ BLIS_NO_TRANSPOSE, \ kc_cur, nc_cur, NR, \ one, \ b_pc, rs_b, cs_b, \ &b_use, &rs_b_use, &cs_b_use, \ &ps_b_use, \ cntx, \ &mem_b, \ thread \ ); \ \ /* Alias a_use so that it's clear this is our current block of matrix B. */ \ ctype* restrict b_pc_use = b_use; \ \ /* We don't need to embed the panel stride of B within the auxinfo_t object because this variant iterates through B in the jr loop, which occurs here, within the macrokernel, not within the millikernel. */ \ /*bli_auxinfo_set_ps_b( ps_b_use, &aux );*/ \ \ /* Loop over the m dimension (MC rows at a time). */ \ for ( dim_t ii = 0; ii < ic_iter; ii += ic_inc ) \ { \ const dim_t mc_cur = ( bli_is_not_edge_f( ii, ic_iter, ic_left ) ? MC : ic_left ); \ \ ctype* restrict a_ic = a_pc + ii * icstep_a; \ ctype* restrict c_ic = c_jc + ii * icstep_c; \ \ /* const dim_t ir_iter = ( mc_cur + MR - 1 ) / MR; \ const dim_t ir_left = mc_cur % MR; \ */ \ \ ctype* a_use; \ inc_t rs_a_use, cs_a_use, ps_a_use; \ \ /* Determine the packing buffer and related parameters for matrix A. (If A will not be packed, then a_use will be set to point to a and the _a_use strides will be set accordingly.) Then call the packm sup variant chooser, which will call the appropriate implementation based on the schema deduced from the stor_id. */ \ PASTEMAC(ch,packm_sup_a) \ ( \ packa, \ stor_id, \ BLIS_NO_TRANSPOSE, \ mc_cur, kc_cur, MR, \ one, \ a_ic, rs_a, cs_a, \ &a_use, &rs_a_use, &cs_a_use, \ &ps_a_use, \ cntx, \ &mem_a, \ thread \ ); \ \ /* Alias a_use so that it's clear this is our current block of matrix A. */ \ ctype* restrict a_ic_use = a_use; \ \ /* Embed the panel stride of A within the auxinfo_t object. The millikernel will query and use this to iterate through micropanels of A (if needed). */ \ bli_auxinfo_set_ps_a( ps_a_use, &aux ); \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( dim_t j = 0; j < jr_iter; j += jr_inc ) \ { \ const dim_t nr_cur = ( bli_is_not_edge_f( j, jr_iter, jr_left ) ? NR : jr_left ); \ \ /* ctype* restrict b_jr = b_pc_use + j * jrstep_b; \ */ \ ctype* restrict b_jr = b_pc_use + j * ps_b_use; \ ctype* restrict c_jr = c_ic + j * jrstep_c; \ \ /* Loop over the m dimension (MR rows at a time). */ \ { \ /* Invoke the gemmsup millikernel. */ \ gemmsup_ker \ ( \ conja, \ conjb, \ mc_cur, \ nr_cur, \ kc_cur, \ alpha_cast, \ a_ic_use, rs_a_use, cs_a_use, \ b_jr, rs_b_use, cs_b_use, \ beta_use, \ c_jr, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ } \ } \ } \ } \ \ /* Release any memory that was acquired for packing matrices A and B. */ \ PASTEMAC(ch,packm_sup_finalize_mem_a) \ ( \ packa, \ rntm, \ &mem_a, \ thread \ ); \ PASTEMAC(ch,packm_sup_finalize_mem_b) \ ( \ packb, \ rntm, \ &mem_b, \ thread \ ); \ \ /* PASTEMAC(ch,fprintm)( stdout, "gemmsup_ref_var2: b1", kc_cur, nr_cur, b_jr, rs_b, cs_b, "%4.1f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "gemmsup_ref_var2: a1", mr_cur, kc_cur, a_ir, rs_a, cs_a, "%4.1f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "gemmsup_ref_var2: c ", mr_cur, nr_cur, c_ir, rs_c, cs_c, "%4.1f", "" ); \ */ \ } INSERT_GENTFUNC_BASIC0( gemmsup_ref_var2m ) blis-0.6.1/frame/3/bli_l3_sup_vars.h000066400000000000000000000073101360743507500171550ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype object-based interfaces. // #undef GENPROT #define GENPROT( opname ) \ \ void PASTEMAC0(opname) \ ( \ trans_t trans, \ obj_t* alpha, \ obj_t* a, \ obj_t* b, \ obj_t* beta, \ obj_t* c, \ stor3_t eff_id, \ cntx_t* cntx, \ rntm_t* rntm, \ cntl_t* cntl, \ thrinfo_t* thread \ ); GENPROT( gemmsup_ref_var1 ) GENPROT( gemmsup_ref_var2 ) GENPROT( gemmsup_ref_var1n ) GENPROT( gemmsup_ref_var2m ) // // Prototype BLAS-like interfaces with void pointer operands. // #undef GENTPROT #define GENTPROT( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ conj_t conja, \ conj_t conjb, \ dim_t m, \ dim_t n, \ dim_t k, \ void* restrict alpha, \ void* restrict a, inc_t rs_a, inc_t cs_a, \ void* restrict b, inc_t rs_b, inc_t cs_b, \ void* restrict beta, \ void* restrict c, inc_t rs_c, inc_t cs_c, \ stor3_t eff_id, \ cntx_t* restrict cntx, \ rntm_t* restrict rntm, \ cntl_t* restrict cntl, \ thrinfo_t* restrict thread \ ); INSERT_GENTPROT_BASIC0( gemmsup_ref_var1 ) INSERT_GENTPROT_BASIC0( gemmsup_ref_var2 ) #undef GENTPROT #define GENTPROT( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ bool_t packa, \ bool_t packb, \ conj_t conja, \ conj_t conjb, \ dim_t m, \ dim_t n, \ dim_t k, \ void* restrict alpha, \ void* restrict a, inc_t rs_a, inc_t cs_a, \ void* restrict b, inc_t rs_b, inc_t cs_b, \ void* restrict beta, \ void* restrict c, inc_t rs_c, inc_t cs_c, \ stor3_t eff_id, \ cntx_t* restrict cntx, \ rntm_t* restrict rntm, \ cntl_t* restrict cntl, \ thrinfo_t* restrict thread \ ); INSERT_GENTPROT_BASIC0( gemmsup_ref_var1n ) INSERT_GENTPROT_BASIC0( gemmsup_ref_var2m ) blis-0.6.1/frame/3/bli_l3_tapi.c000066400000000000000000000326211360743507500162460ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // Guard the function definitions so that they are only compiled when // #included from files that define the typed API macros. #ifdef BLIS_ENABLE_TAPI // // Define BLAS-like interfaces with typed operands. // #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTEMAC2(ch,opname,EX_SUF) \ ( \ trans_t transa, \ trans_t transb, \ dim_t m, \ dim_t n, \ dim_t k, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* b, inc_t rs_b, inc_t cs_b, \ ctype* beta, \ ctype* c, inc_t rs_c, inc_t cs_c \ BLIS_TAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_TAPI_EX_DECLS \ \ const num_t dt = PASTEMAC(ch,type); \ \ obj_t alphao = BLIS_OBJECT_INITIALIZER_1X1; \ obj_t ao = BLIS_OBJECT_INITIALIZER; \ obj_t bo = BLIS_OBJECT_INITIALIZER; \ obj_t betao = BLIS_OBJECT_INITIALIZER_1X1; \ obj_t co = BLIS_OBJECT_INITIALIZER; \ \ dim_t m_a, n_a; \ dim_t m_b, n_b; \ \ bli_set_dims_with_trans( transa, m, k, &m_a, &n_a ); \ bli_set_dims_with_trans( transb, k, n, &m_b, &n_b ); \ \ bli_obj_init_finish_1x1( dt, alpha, &alphao ); \ bli_obj_init_finish_1x1( dt, beta, &betao ); \ \ bli_obj_init_finish( dt, m_a, n_a, a, rs_a, cs_a, &ao ); \ bli_obj_init_finish( dt, m_b, n_b, b, rs_b, cs_b, &bo ); \ bli_obj_init_finish( dt, m, n, c, rs_c, cs_c, &co ); \ \ bli_obj_set_conjtrans( transa, &ao ); \ bli_obj_set_conjtrans( transb, &bo ); \ \ PASTEMAC(opname,BLIS_OAPI_EX_SUF) \ ( \ &alphao, \ &ao, \ &bo, \ &betao, \ &co, \ cntx, \ rntm \ ); \ } INSERT_GENTFUNC_BASIC0( gemm ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, struca ) \ \ void PASTEMAC2(ch,opname,EX_SUF) \ ( \ side_t side, \ uplo_t uploa, \ conj_t conja, \ trans_t transb, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* b, inc_t rs_b, inc_t cs_b, \ ctype* beta, \ ctype* c, inc_t rs_c, inc_t cs_c \ BLIS_TAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_TAPI_EX_DECLS \ \ const num_t dt = PASTEMAC(ch,type); \ \ obj_t alphao = BLIS_OBJECT_INITIALIZER_1X1; \ obj_t ao = BLIS_OBJECT_INITIALIZER; \ obj_t bo = BLIS_OBJECT_INITIALIZER; \ obj_t betao = BLIS_OBJECT_INITIALIZER_1X1; \ obj_t co = BLIS_OBJECT_INITIALIZER; \ \ dim_t mn_a; \ dim_t m_b, n_b; \ \ bli_set_dim_with_side( side, m, n, &mn_a ); \ bli_set_dims_with_trans( transb, m, n, &m_b, &n_b ); \ \ bli_obj_init_finish_1x1( dt, alpha, &alphao ); \ bli_obj_init_finish_1x1( dt, beta, &betao ); \ \ bli_obj_init_finish( dt, mn_a, mn_a, a, rs_a, cs_a, &ao ); \ bli_obj_init_finish( dt, m_b, n_b, b, rs_b, cs_b, &bo ); \ bli_obj_init_finish( dt, m, n, c, rs_c, cs_c, &co ); \ \ bli_obj_set_uplo( uploa, &ao ); \ bli_obj_set_conj( conja, &ao ); \ bli_obj_set_conjtrans( transb, &bo ); \ \ bli_obj_set_struc( struca, &ao ); \ \ PASTEMAC(opname,BLIS_OAPI_EX_SUF) \ ( \ side, \ &alphao, \ &ao, \ &bo, \ &betao, \ &co, \ cntx, \ rntm \ ); \ } INSERT_GENTFUNC_BASIC( hemm, BLIS_HERMITIAN ) INSERT_GENTFUNC_BASIC( symm, BLIS_SYMMETRIC ) #undef GENTFUNCR #define GENTFUNCR( ctype, ctype_r, ch, chr, opname ) \ \ void PASTEMAC2(ch,opname,EX_SUF) \ ( \ uplo_t uploc, \ trans_t transa, \ dim_t m, \ dim_t k, \ ctype_r* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype_r* beta, \ ctype* c, inc_t rs_c, inc_t cs_c \ BLIS_TAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_TAPI_EX_DECLS \ \ const num_t dt_r = PASTEMAC(chr,type); \ const num_t dt = PASTEMAC(ch,type); \ \ obj_t alphao = BLIS_OBJECT_INITIALIZER_1X1; \ obj_t ao = BLIS_OBJECT_INITIALIZER; \ obj_t betao = BLIS_OBJECT_INITIALIZER_1X1; \ obj_t co = BLIS_OBJECT_INITIALIZER; \ \ dim_t m_a, n_a; \ \ bli_set_dims_with_trans( transa, m, k, &m_a, &n_a ); \ \ bli_obj_init_finish_1x1( dt_r, alpha, &alphao ); \ bli_obj_init_finish_1x1( dt_r, beta, &betao ); \ \ bli_obj_init_finish( dt, m_a, n_a, a, rs_a, cs_a, &ao ); \ bli_obj_init_finish( dt, m, m, c, rs_c, cs_c, &co ); \ \ bli_obj_set_uplo( uploc, &co ); \ bli_obj_set_conjtrans( transa, &ao ); \ \ bli_obj_set_struc( BLIS_HERMITIAN, &co ); \ \ PASTEMAC(opname,BLIS_OAPI_EX_SUF) \ ( \ &alphao, \ &ao, \ &betao, \ &co, \ cntx, \ rntm \ ); \ } INSERT_GENTFUNCR_BASIC0( herk ) #undef GENTFUNCR #define GENTFUNCR( ctype, ctype_r, ch, chr, opname ) \ \ void PASTEMAC2(ch,opname,EX_SUF) \ ( \ uplo_t uploc, \ trans_t transa, \ trans_t transb, \ dim_t m, \ dim_t k, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* b, inc_t rs_b, inc_t cs_b, \ ctype_r* beta, \ ctype* c, inc_t rs_c, inc_t cs_c \ BLIS_TAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_TAPI_EX_DECLS \ \ const num_t dt_r = PASTEMAC(chr,type); \ const num_t dt = PASTEMAC(ch,type); \ \ obj_t alphao = BLIS_OBJECT_INITIALIZER_1X1; \ obj_t ao = BLIS_OBJECT_INITIALIZER; \ obj_t bo = BLIS_OBJECT_INITIALIZER; \ obj_t betao = BLIS_OBJECT_INITIALIZER_1X1; \ obj_t co = BLIS_OBJECT_INITIALIZER; \ \ dim_t m_a, n_a; \ dim_t m_b, n_b; \ \ bli_set_dims_with_trans( transa, m, k, &m_a, &n_a ); \ bli_set_dims_with_trans( transb, m, k, &m_b, &n_b ); \ \ bli_obj_init_finish_1x1( dt, alpha, &alphao ); \ bli_obj_init_finish_1x1( dt_r, beta, &betao ); \ \ bli_obj_init_finish( dt, m_a, n_a, a, rs_a, cs_a, &ao ); \ bli_obj_init_finish( dt, m_b, n_b, b, rs_b, cs_b, &bo ); \ bli_obj_init_finish( dt, m, m, c, rs_c, cs_c, &co ); \ \ bli_obj_set_uplo( uploc, &co ); \ bli_obj_set_conjtrans( transa, &ao ); \ bli_obj_set_conjtrans( transb, &bo ); \ \ bli_obj_set_struc( BLIS_HERMITIAN, &co ); \ \ PASTEMAC(opname,BLIS_OAPI_EX_SUF) \ ( \ &alphao, \ &ao, \ &bo, \ &betao, \ &co, \ cntx, \ rntm \ ); \ } INSERT_GENTFUNCR_BASIC0( her2k ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTEMAC2(ch,opname,EX_SUF) \ ( \ uplo_t uploc, \ trans_t transa, \ dim_t m, \ dim_t k, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* beta, \ ctype* c, inc_t rs_c, inc_t cs_c \ BLIS_TAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_TAPI_EX_DECLS \ \ const num_t dt = PASTEMAC(ch,type); \ \ obj_t alphao = BLIS_OBJECT_INITIALIZER_1X1; \ obj_t ao = BLIS_OBJECT_INITIALIZER; \ obj_t betao = BLIS_OBJECT_INITIALIZER_1X1; \ obj_t co = BLIS_OBJECT_INITIALIZER; \ \ dim_t m_a, n_a; \ \ bli_set_dims_with_trans( transa, m, k, &m_a, &n_a ); \ \ bli_obj_init_finish_1x1( dt, alpha, &alphao ); \ bli_obj_init_finish_1x1( dt, beta, &betao ); \ \ bli_obj_init_finish( dt, m_a, n_a, a, rs_a, cs_a, &ao ); \ bli_obj_init_finish( dt, m, m, c, rs_c, cs_c, &co ); \ \ bli_obj_set_uplo( uploc, &co ); \ bli_obj_set_conjtrans( transa, &ao ); \ \ bli_obj_set_struc( BLIS_SYMMETRIC, &co ); \ \ PASTEMAC(opname,BLIS_OAPI_EX_SUF) \ ( \ &alphao, \ &ao, \ &betao, \ &co, \ cntx, \ rntm \ ); \ } INSERT_GENTFUNC_BASIC0( syrk ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTEMAC2(ch,opname,EX_SUF) \ ( \ uplo_t uploc, \ trans_t transa, \ trans_t transb, \ dim_t m, \ dim_t k, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* b, inc_t rs_b, inc_t cs_b, \ ctype* beta, \ ctype* c, inc_t rs_c, inc_t cs_c \ BLIS_TAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_TAPI_EX_DECLS \ \ const num_t dt = PASTEMAC(ch,type); \ \ obj_t alphao = BLIS_OBJECT_INITIALIZER_1X1; \ obj_t ao = BLIS_OBJECT_INITIALIZER; \ obj_t bo = BLIS_OBJECT_INITIALIZER; \ obj_t betao = BLIS_OBJECT_INITIALIZER_1X1; \ obj_t co = BLIS_OBJECT_INITIALIZER; \ \ dim_t m_a, n_a; \ dim_t m_b, n_b; \ \ bli_set_dims_with_trans( transa, m, k, &m_a, &n_a ); \ bli_set_dims_with_trans( transb, m, k, &m_b, &n_b ); \ \ bli_obj_init_finish_1x1( dt, alpha, &alphao ); \ bli_obj_init_finish_1x1( dt, beta, &betao ); \ \ bli_obj_init_finish( dt, m_a, n_a, a, rs_a, cs_a, &ao ); \ bli_obj_init_finish( dt, m_b, n_b, b, rs_b, cs_b, &bo ); \ bli_obj_init_finish( dt, m, m, c, rs_c, cs_c, &co ); \ \ bli_obj_set_uplo( uploc, &co ); \ bli_obj_set_conjtrans( transa, &ao ); \ bli_obj_set_conjtrans( transb, &bo ); \ \ bli_obj_set_struc( BLIS_SYMMETRIC, &co ); \ \ PASTEMAC(opname,BLIS_OAPI_EX_SUF) \ ( \ &alphao, \ &ao, \ &bo, \ &betao, \ &co, \ cntx, \ rntm \ ); \ } INSERT_GENTFUNC_BASIC0( syr2k ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTEMAC2(ch,opname,EX_SUF) \ ( \ side_t side, \ uplo_t uploa, \ trans_t transa, \ diag_t diaga, \ trans_t transb, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* b, inc_t rs_b, inc_t cs_b, \ ctype* beta, \ ctype* c, inc_t rs_c, inc_t cs_c \ BLIS_TAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_TAPI_EX_DECLS \ \ const num_t dt = PASTEMAC(ch,type); \ \ obj_t alphao = BLIS_OBJECT_INITIALIZER_1X1; \ obj_t ao = BLIS_OBJECT_INITIALIZER; \ obj_t bo = BLIS_OBJECT_INITIALIZER; \ obj_t betao = BLIS_OBJECT_INITIALIZER_1X1; \ obj_t co = BLIS_OBJECT_INITIALIZER; \ \ dim_t mn_a; \ dim_t m_b, n_b; \ \ bli_set_dim_with_side( side, m, n, &mn_a ); \ bli_set_dims_with_trans( transb, m, n, &m_b, &n_b ); \ \ bli_obj_init_finish_1x1( dt, alpha, &alphao ); \ bli_obj_init_finish_1x1( dt, beta, &betao ); \ \ bli_obj_init_finish( dt, mn_a, mn_a, a, rs_a, cs_a, &ao ); \ bli_obj_init_finish( dt, m_b, n_b, b, rs_b, cs_b, &bo ); \ bli_obj_init_finish( dt, m, n, c, rs_c, cs_c, &co ); \ \ bli_obj_set_uplo( uploa, &ao ); \ bli_obj_set_diag( diaga, &ao ); \ bli_obj_set_conjtrans( transa, &ao ); \ bli_obj_set_conjtrans( transb, &bo ); \ \ bli_obj_set_struc( BLIS_TRIANGULAR, &ao ); \ \ PASTEMAC(opname,BLIS_OAPI_EX_SUF) \ ( \ side, \ &alphao, \ &ao, \ &bo, \ &betao, \ &co, \ cntx, \ rntm \ ); \ } INSERT_GENTFUNC_BASIC0( trmm3 ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTEMAC2(ch,opname,EX_SUF) \ ( \ side_t side, \ uplo_t uploa, \ trans_t transa, \ diag_t diaga, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* b, inc_t rs_b, inc_t cs_b \ BLIS_TAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_TAPI_EX_DECLS \ \ const num_t dt = PASTEMAC(ch,type); \ \ obj_t alphao = BLIS_OBJECT_INITIALIZER_1X1; \ obj_t ao = BLIS_OBJECT_INITIALIZER; \ obj_t bo = BLIS_OBJECT_INITIALIZER; \ \ dim_t mn_a; \ \ bli_set_dim_with_side( side, m, n, &mn_a ); \ \ bli_obj_init_finish_1x1( dt, alpha, &alphao ); \ \ bli_obj_init_finish( dt, mn_a, mn_a, a, rs_a, cs_a, &ao ); \ bli_obj_init_finish( dt, m, n, b, rs_b, cs_b, &bo ); \ \ bli_obj_set_uplo( uploa, &ao ); \ bli_obj_set_diag( diaga, &ao ); \ bli_obj_set_conjtrans( transa, &ao ); \ \ bli_obj_set_struc( BLIS_TRIANGULAR, &ao ); \ \ PASTEMAC(opname,BLIS_OAPI_EX_SUF) \ ( \ side, \ &alphao, \ &ao, \ &bo, \ cntx, \ rntm \ ); \ } INSERT_GENTFUNC_BASIC0( trmm ) INSERT_GENTFUNC_BASIC0( trsm ) #endif blis-0.6.1/frame/3/bli_l3_tapi.h000066400000000000000000000127561360743507500162620ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-like interfaces with typed operands. // #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \ ( \ trans_t transa, \ trans_t transb, \ dim_t m, \ dim_t n, \ dim_t k, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* b, inc_t rs_b, inc_t cs_b, \ ctype* beta, \ ctype* c, inc_t rs_c, inc_t cs_c \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTPROT_BASIC0( gemm ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \ ( \ side_t side, \ uplo_t uploa, \ conj_t conja, \ trans_t transb, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* b, inc_t rs_b, inc_t cs_b, \ ctype* beta, \ ctype* c, inc_t rs_c, inc_t cs_c \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTPROT_BASIC0( hemm ) INSERT_GENTPROT_BASIC0( symm ) #undef GENTPROTR #define GENTPROTR( ctype, ctype_r, ch, chr, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \ ( \ uplo_t uploc, \ trans_t transa, \ dim_t m, \ dim_t k, \ ctype_r* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype_r* beta, \ ctype* c, inc_t rs_c, inc_t cs_c \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTPROTR_BASIC0( herk ) #undef GENTPROTR #define GENTPROTR( ctype, ctype_r, ch, chr, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \ ( \ uplo_t uploc, \ trans_t transa, \ trans_t transb, \ dim_t m, \ dim_t k, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* b, inc_t rs_b, inc_t cs_b, \ ctype_r* beta, \ ctype* c, inc_t rs_c, inc_t cs_c \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTPROTR_BASIC0( her2k ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \ ( \ uplo_t uploc, \ trans_t transa, \ dim_t m, \ dim_t k, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* beta, \ ctype* c, inc_t rs_c, inc_t cs_c \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTPROT_BASIC0( syrk ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \ ( \ uplo_t uploc, \ trans_t transa, \ trans_t transb, \ dim_t m, \ dim_t k, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* b, inc_t rs_b, inc_t cs_b, \ ctype* beta, \ ctype* c, inc_t rs_c, inc_t cs_c \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTPROT_BASIC0( syr2k ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \ ( \ side_t side, \ uplo_t uploa, \ trans_t transa, \ diag_t diaga, \ trans_t transb, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* b, inc_t rs_b, inc_t cs_b, \ ctype* beta, \ ctype* c, inc_t rs_c, inc_t cs_c \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTPROT_BASIC0( trmm3 ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \ ( \ side_t side, \ uplo_t uploa, \ trans_t transa, \ diag_t diaga, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* b, inc_t rs_b, inc_t cs_b \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTPROT_BASIC0( trmm ) INSERT_GENTPROT_BASIC0( trsm ) blis-0.6.1/frame/3/bli_l3_tapi_ba.c000066400000000000000000000036651360743507500167160ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // Include cpp macros that instantiate the API definition templates as // omitting expert parameters. #include "bli_tapi_ba.h" // Define the macro protecting the typed API definitions. #define BLIS_ENABLE_TAPI // Include the typed API definitions here. #include "bli_l3_tapi.c" blis-0.6.1/frame/3/bli_l3_tapi_ex.c000066400000000000000000000036631360743507500167460ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // Include cpp macros that instantiate the API definition templates as // having expert parameters. #include "bli_tapi_ex.h" // Define the macro protecting the typed API definitions. #define BLIS_ENABLE_TAPI // Include the typed API definitions here. #include "bli_l3_tapi.c" blis-0.6.1/frame/3/bli_l3_thrinfo.c000066400000000000000000000502671360743507500167700ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "assert.h" void bli_l3_thrinfo_init_single ( thrinfo_t* thread ) { bli_thrinfo_init_single( thread ); } void bli_l3_thrinfo_free ( rntm_t* rntm, thrinfo_t* thread ) { bli_thrinfo_free( rntm, thread ); } // ----------------------------------------------------------------------------- void bli_l3_thrinfo_create_root ( dim_t id, thrcomm_t* gl_comm, rntm_t* rntm, cntl_t* cntl, thrinfo_t** thread ) { // Query the global communicator for the total number of threads to use. dim_t n_threads = bli_thrcomm_num_threads( gl_comm ); // Use the thread id passed in as the global communicator id. dim_t gl_comm_id = id; // Use the blocksize id of the current (root) control tree node to // query the top-most ways of parallelism to obtain. bszid_t bszid = bli_cntl_bszid( cntl ); dim_t xx_way = bli_rntm_ways_for( bszid, rntm ); // Determine the work id for this thrinfo_t node. dim_t work_id = gl_comm_id / ( n_threads / xx_way ); // Create the root thrinfo_t node. *thread = bli_thrinfo_create ( rntm, gl_comm, gl_comm_id, xx_way, work_id, TRUE, bszid, NULL ); } // ----------------------------------------------------------------------------- void bli_l3_thrinfo_print_gemm_paths ( thrinfo_t** threads ) { // In order to query the number of threads, we query the only thread we // know exists: thread 0. dim_t n_threads = bli_thread_num_threads( threads[0] ); // For the purposes of printing the "header" information that is common // to the various instances of a thrinfo_t (ie: across all threads), we // choose the last thread in case the problem is so small that there is // only an "edge" case, which will always be assigned to the last thread // (at least for higher levels of partitioning). thrinfo_t* jc_info = threads[n_threads-1]; thrinfo_t* pc_info = NULL; thrinfo_t* pb_info = NULL; thrinfo_t* ic_info = NULL; thrinfo_t* pa_info = NULL; thrinfo_t* jr_info = NULL; thrinfo_t* ir_info = NULL; // Initialize the n_ways and n_threads fields of each thrinfo_t "level" // to -1. More than likely, these will all be overwritten with meaningful // values, but in case some thrinfo_t trees are not fully built (see // next commnet), these will be the placeholder values. dim_t jc_way = -1, pc_way = -1, pb_way = -1, ic_way = -1, pa_way = -1, jr_way = -1, ir_way = -1; dim_t jc_nt = -1, pc_nt = -1, pb_nt = -1, ic_nt = -1, pa_nt = -1, jr_nt = -1, ir_nt = -1; // NOTE: We must check each thrinfo_t pointer for NULLness. Certain threads // may not fully build their thrinfo_t structures--specifically when the // dimension being parallelized is not large enough for each thread to have // even one unit of work (where as unit is usually a single micropanel's // width, MR or NR). if ( !jc_info ) goto print_header; jc_way = bli_thread_n_way( jc_info ); jc_nt = bli_thread_num_threads( jc_info ); pc_info = bli_thrinfo_sub_node( jc_info ); if ( !pc_info ) goto print_header; pc_way = bli_thread_n_way( pc_info ); pc_nt = bli_thread_num_threads( pc_info ); pb_info = bli_thrinfo_sub_node( pc_info ); if ( !pb_info ) goto print_header; pb_way = bli_thread_n_way( pb_info ); pb_nt = bli_thread_num_threads( pb_info ); ic_info = bli_thrinfo_sub_node( pb_info ); if ( !ic_info ) goto print_header; ic_way = bli_thread_n_way( ic_info ); ic_nt = bli_thread_num_threads( ic_info ); pa_info = bli_thrinfo_sub_node( ic_info ); if ( !pa_info ) goto print_header; pa_way = bli_thread_n_way( pa_info ); pa_nt = bli_thread_num_threads( pa_info ); jr_info = bli_thrinfo_sub_node( pa_info ); if ( !jr_info ) goto print_header; jr_way = bli_thread_n_way( jr_info ); jr_nt = bli_thread_num_threads( jr_info ); ir_info = bli_thrinfo_sub_node( jr_info ); if ( !ir_info ) goto print_header; ir_way = bli_thread_n_way( ir_info ); ir_nt = bli_thread_num_threads( ir_info ); print_header: printf( " jc kc pb ic pa jr ir\n" ); printf( "xx_nt: %4ld %4ld %4ld %4ld %4ld %4ld %4ld\n", ( unsigned long )jc_nt, ( unsigned long )pc_nt, ( unsigned long )pb_nt, ( unsigned long )ic_nt, ( unsigned long )pa_nt, ( unsigned long )jr_nt, ( unsigned long )ir_nt ); printf( "xx_way: %4ld %4ld %4ld %4ld %4ld %4ld %4ld\n", ( unsigned long )jc_way, ( unsigned long )pc_way, ( unsigned long )pb_way, ( unsigned long )ic_way, ( unsigned long )pa_way, ( unsigned long )jr_way, ( unsigned long )ir_way ); printf( "============================================\n" ); for ( dim_t gl_id = 0; gl_id < n_threads; ++gl_id ) { jc_info = threads[gl_id]; dim_t jc_comm_id = -1, pc_comm_id = -1, pb_comm_id = -1, ic_comm_id = -1, pa_comm_id = -1, jr_comm_id = -1, ir_comm_id = -1; dim_t jc_work_id = -1, pc_work_id = -1, pb_work_id = -1, ic_work_id = -1, pa_work_id = -1, jr_work_id = -1, ir_work_id = -1; if ( !jc_info ) goto print_thrinfo; jc_comm_id = bli_thread_ocomm_id( jc_info ); jc_work_id = bli_thread_work_id( jc_info ); pc_info = bli_thrinfo_sub_node( jc_info ); if ( !pc_info ) goto print_thrinfo; pc_comm_id = bli_thread_ocomm_id( pc_info ); pc_work_id = bli_thread_work_id( pc_info ); pb_info = bli_thrinfo_sub_node( pc_info ); if ( !pb_info ) goto print_thrinfo; pb_comm_id = bli_thread_ocomm_id( pb_info ); pb_work_id = bli_thread_work_id( pb_info ); ic_info = bli_thrinfo_sub_node( pb_info ); if ( !ic_info ) goto print_thrinfo; ic_comm_id = bli_thread_ocomm_id( ic_info ); ic_work_id = bli_thread_work_id( ic_info ); pa_info = bli_thrinfo_sub_node( ic_info ); if ( !pa_info ) goto print_thrinfo; pa_comm_id = bli_thread_ocomm_id( pa_info ); pa_work_id = bli_thread_work_id( pa_info ); jr_info = bli_thrinfo_sub_node( pa_info ); if ( !jr_info ) goto print_thrinfo; jr_comm_id = bli_thread_ocomm_id( jr_info ); jr_work_id = bli_thread_work_id( jr_info ); ir_info = bli_thrinfo_sub_node( jr_info ); if ( !ir_info ) goto print_thrinfo; ir_comm_id = bli_thread_ocomm_id( ir_info ); ir_work_id = bli_thread_work_id( ir_info ); print_thrinfo: printf( "comm ids: %4ld %4ld %4ld %4ld %4ld %4ld %4ld\n", ( long )jc_comm_id, ( long )pc_comm_id, ( long )pb_comm_id, ( long )ic_comm_id, ( long )pa_comm_id, ( long )jr_comm_id, ( long )ir_comm_id ); printf( "work ids: %4ld %4ld %4ld %4ld %4ld %4ld %4ld\n", ( long )jc_work_id, ( long )pc_work_id, ( long )pb_work_id, ( long )ic_work_id, ( long )pa_work_id, ( long )jr_work_id, ( long )ir_work_id ); printf( "--------------------------------------------\n" ); } } // ----------------------------------------------------------------------------- // ----------------------------------------------------------------------------- // ----------------------------------------------------------------------------- void bli_l3_thrinfo_print_trsm_paths ( thrinfo_t** threads ) { // In order to query the number of threads, we query the only thread we // know exists: thread 0. dim_t n_threads = bli_thread_num_threads( threads[0] ); // For the purposes of printing the "header" information that is common // to the various instances of a thrinfo_t (ie: across all threads), we // choose the last thread in case the problem is so small that there is // only an "edge" case, which will always be assigned to the last thread // (at least for higher levels of partitioning). thrinfo_t* jc_info = threads[n_threads-1]; thrinfo_t* pc_info = NULL; thrinfo_t* pb_info = NULL; thrinfo_t* ic_info = NULL; thrinfo_t* pa_info = NULL; thrinfo_t* pa_info0 = NULL; thrinfo_t* jr_info = NULL; thrinfo_t* jr_info0 = NULL; thrinfo_t* ir_info = NULL; thrinfo_t* ir_info0 = NULL; // Initialize the n_ways and n_threads fields of each thrinfo_t "level" // to -1. More than likely, these will all be overwritten with meaningful // values, but in case some thrinfo_t trees are not fully built (see // next commnet), these will be the placeholder values. dim_t jc_way = -1, pc_way = -1, pb_way = -1, ic_way = -1, pa_way = -1, jr_way = -1, ir_way = -1, pa_way0 = -1, jr_way0 = -1, ir_way0 = -1; dim_t jc_nt = -1, pc_nt = -1, pb_nt = -1, ic_nt = -1, pa_nt = -1, jr_nt = -1, ir_nt = -1, pa_nt0 = -1, jr_nt0 = -1, ir_nt0 = -1; // NOTE: We must check each thrinfo_t pointer for NULLness. Certain threads // may not fully build their thrinfo_t structures--specifically when the // dimension being parallelized is not large enough for each thread to have // even one unit of work (where as unit is usually a single micropanel's // width, MR or NR). if ( !jc_info ) goto print_header; jc_way = bli_thread_n_way( jc_info ); jc_nt = bli_thread_num_threads( jc_info ); pc_info = bli_thrinfo_sub_node( jc_info ); if ( !pc_info ) goto print_header; pc_way = bli_thread_n_way( pc_info ); pc_nt = bli_thread_num_threads( pc_info ); pb_info = bli_thrinfo_sub_node( pc_info ); if ( !pb_info ) goto print_header; pb_way = bli_thread_n_way( pb_info ); pb_nt = bli_thread_num_threads( pb_info ); ic_info = bli_thrinfo_sub_node( pb_info ); if ( !ic_info ) goto print_header; ic_way = bli_thread_n_way( ic_info ); ic_nt = bli_thread_num_threads( ic_info ); pa_info = bli_thrinfo_sub_node( ic_info ); pa_info0 = bli_thrinfo_sub_prenode( ic_info ); // check_header_prenode: if ( !pa_info0 ) goto check_header_node; pa_way0 = bli_thread_n_way( pa_info0 ); pa_nt0 = bli_thread_num_threads( pa_info0 ); jr_info0 = bli_thrinfo_sub_node( pa_info0 ); if ( !jr_info0 ) goto check_header_node; jr_way0 = bli_thread_n_way( jr_info0 ); jr_nt0 = bli_thread_num_threads( jr_info0 ); ir_info0 = bli_thrinfo_sub_node( jr_info0 ); if ( !ir_info0 ) goto check_header_node; ir_way0 = bli_thread_n_way( ir_info0 ); ir_nt0 = bli_thread_num_threads( ir_info0 ); check_header_node: if ( !pa_info ) goto print_header; pa_way = bli_thread_n_way( pa_info ); pa_nt = bli_thread_num_threads( pa_info ); jr_info = bli_thrinfo_sub_node( pa_info ); if ( !jr_info ) goto print_header; jr_way = bli_thread_n_way( jr_info ); jr_nt = bli_thread_num_threads( jr_info ); ir_info = bli_thrinfo_sub_node( jr_info ); if ( !ir_info ) goto print_header; ir_way = bli_thread_n_way( ir_info ); ir_nt = bli_thread_num_threads( ir_info ); print_header: printf( " jc kc pb ic pa jr ir\n" ); printf( "xx_nt: %4ld %4ld %4ld %4ld %2ld|%2ld %2ld|%2ld %2ld|%2ld\n", ( long )jc_nt, ( long )pc_nt, ( long )pb_nt, ( long )ic_nt, ( long )pa_nt0, ( long )pa_nt, ( long )jr_nt0, ( long )jr_nt, ( long )ir_nt0, ( long )ir_nt ); printf( "xx_way: %4ld %4ld %4ld %4ld %2ld|%2ld %2ld|%2ld %2ld|%2ld\n", ( long )jc_way, ( long )pc_way, ( long )pb_way, ( long )ic_way, ( long )pa_way0, ( long )pa_way, ( long )jr_way0, ( long )jr_way, ( long )ir_way0, ( long )ir_way ); printf( "==================================================\n" ); for ( dim_t gl_id = 0; gl_id < n_threads; ++gl_id ) { jc_info = threads[gl_id]; #if 1 // NOTE: This cpp branch contains code that is safe to execute // for small problems that are parallelized enough that one or // more threads gets no work. dim_t jc_comm_id = -1, pc_comm_id = -1, pb_comm_id = -1, ic_comm_id = -1, pa_comm_id = -1, jr_comm_id = -1, ir_comm_id = -1, pa_comm_id0 = -1, jr_comm_id0 = -1, ir_comm_id0 = -1; dim_t jc_work_id = -1, pc_work_id = -1, pb_work_id = -1, ic_work_id = -1, pa_work_id = -1, jr_work_id = -1, ir_work_id = -1, pa_work_id0 = -1, jr_work_id0 = -1, ir_work_id0 = -1; if ( !jc_info ) goto print_thrinfo; jc_comm_id = bli_thread_ocomm_id( jc_info ); jc_work_id = bli_thread_work_id( jc_info ); pc_info = bli_thrinfo_sub_node( jc_info ); if ( !pc_info ) goto print_thrinfo; pc_comm_id = bli_thread_ocomm_id( pc_info ); pc_work_id = bli_thread_work_id( pc_info ); pb_info = bli_thrinfo_sub_node( pc_info ); if ( !pb_info ) goto print_thrinfo; pb_comm_id = bli_thread_ocomm_id( pb_info ); pb_work_id = bli_thread_work_id( pb_info ); ic_info = bli_thrinfo_sub_node( pb_info ); if ( !ic_info ) goto print_thrinfo; ic_comm_id = bli_thread_ocomm_id( ic_info ); ic_work_id = bli_thread_work_id( ic_info ); pa_info = bli_thrinfo_sub_node( ic_info ); pa_info0 = bli_thrinfo_sub_prenode( ic_info ); // check_thrinfo_prenode: if ( !pa_info0 ) goto check_thrinfo_node; pa_comm_id0 = bli_thread_ocomm_id( pa_info0 ); pa_work_id0 = bli_thread_work_id( pa_info0 ); jr_info0 = bli_thrinfo_sub_node( pa_info0 ); if ( !jr_info0 ) goto check_thrinfo_node; jr_comm_id0 = bli_thread_ocomm_id( jr_info0 ); jr_work_id0 = bli_thread_work_id( jr_info0 ); ir_info0 = bli_thrinfo_sub_node( jr_info0 ); if ( !ir_info0 ) goto check_thrinfo_node; ir_comm_id0 = bli_thread_ocomm_id( ir_info0 ); ir_work_id0 = bli_thread_work_id( ir_info0 ); check_thrinfo_node: if ( !pa_info ) goto print_thrinfo; pa_comm_id = bli_thread_ocomm_id( pa_info ); pa_work_id = bli_thread_work_id( pa_info ); jr_info = bli_thrinfo_sub_node( pa_info ); if ( !jr_info ) goto print_thrinfo; jr_comm_id = bli_thread_ocomm_id( jr_info ); jr_work_id = bli_thread_work_id( jr_info ); ir_info = bli_thrinfo_sub_node( jr_info ); if ( !ir_info ) goto print_thrinfo; ir_comm_id = bli_thread_ocomm_id( ir_info ); ir_work_id = bli_thread_work_id( ir_info ); print_thrinfo: #else dim_t jc_comm_id; dim_t pc_comm_id; dim_t pb_comm_id; dim_t ic_comm_id; dim_t pa_comm_id0, pa_comm_id; dim_t jr_comm_id0, jr_comm_id; dim_t ir_comm_id0, ir_comm_id; dim_t jc_work_id; dim_t pc_work_id; dim_t pb_work_id; dim_t ic_work_id; dim_t pa_work_id0, pa_work_id; dim_t jr_work_id0, jr_work_id; dim_t ir_work_id0, ir_work_id; // NOTE: We must check each thrinfo_t pointer for NULLness. Certain threads // may not fully build their thrinfo_t structures--specifically when the // dimension being parallelized is not large enough for each thread to have // even one unit of work (where as unit is usually a single micropanel's // width, MR or NR). if ( !jc_info ) { jc_comm_id = pc_comm_id = pb_comm_id = ic_comm_id = pa_comm_id = jr_comm_id = ir_comm_id = -1; jc_work_id = pc_work_id = pb_work_id = ic_work_id = pa_work_id = jr_work_id = ir_work_id = -1; } else { jc_comm_id = bli_thread_ocomm_id( jc_info ); jc_work_id = bli_thread_work_id( jc_info ); pc_info = bli_thrinfo_sub_node( jc_info ); if ( !pc_info ) { pc_comm_id = pb_comm_id = ic_comm_id = pa_comm_id = jr_comm_id = ir_comm_id = -1; pc_work_id = pb_work_id = ic_work_id = pa_work_id = jr_work_id = ir_work_id = -1; } else { pc_comm_id = bli_thread_ocomm_id( pc_info ); pc_work_id = bli_thread_work_id( pc_info ); pb_info = bli_thrinfo_sub_node( pc_info ); if ( !pb_info ) { pb_comm_id = ic_comm_id = pa_comm_id = jr_comm_id = ir_comm_id = -1; pb_work_id = ic_work_id = pa_work_id = jr_work_id = ir_work_id = -1; } else { pb_comm_id = bli_thread_ocomm_id( pb_info ); pb_work_id = bli_thread_work_id( pb_info ); ic_info = bli_thrinfo_sub_node( pb_info ); if ( !ic_info ) { ic_comm_id = pa_comm_id = jr_comm_id = ir_comm_id = -1; ic_work_id = pa_work_id = jr_work_id = ir_work_id = -1; } else { ic_comm_id = bli_thread_ocomm_id( ic_info ); ic_work_id = bli_thread_work_id( ic_info ); pa_info0 = bli_thrinfo_sub_prenode( ic_info ); pa_info = bli_thrinfo_sub_node( ic_info ); // Prenode if ( !pa_info0 ) { pa_comm_id0 = jr_comm_id0 = ir_comm_id0 = -1; pa_work_id0 = jr_work_id0 = ir_work_id0 = -1; } else { pa_comm_id0 = bli_thread_ocomm_id( pa_info0 ); pa_work_id0 = bli_thread_work_id( pa_info0 ); jr_info0 = bli_thrinfo_sub_node( pa_info0 ); if ( !jr_info0 ) { jr_comm_id0 = ir_comm_id0 = -1; jr_work_id0 = ir_work_id0 = -1; } else { jr_comm_id0 = bli_thread_ocomm_id( jr_info0 ); jr_work_id0 = bli_thread_work_id( jr_info0 ); ir_info0 = bli_thrinfo_sub_node( jr_info0 ); if ( !ir_info0 ) { ir_comm_id0 = -1; ir_work_id0 = -1; } else { ir_comm_id0 = bli_thread_ocomm_id( ir_info0 ); ir_work_id0 = bli_thread_work_id( ir_info0 ); } } } // Main node if ( !pa_info ) { pa_comm_id = jr_comm_id = ir_comm_id = -1; pa_work_id = jr_work_id = ir_work_id = -1; } else { pa_comm_id = bli_thread_ocomm_id( pa_info ); pa_work_id = bli_thread_work_id( pa_info ); jr_info = bli_thrinfo_sub_node( pa_info ); if ( !jr_info ) { jr_comm_id = ir_comm_id = -1; jr_work_id = ir_work_id = -1; } else { jr_comm_id = bli_thread_ocomm_id( jr_info ); jr_work_id = bli_thread_work_id( jr_info ); ir_info = bli_thrinfo_sub_node( jr_info ); if ( !ir_info ) { ir_comm_id = -1; ir_work_id = -1; } else { ir_comm_id = bli_thread_ocomm_id( ir_info ); ir_work_id = bli_thread_work_id( ir_info ); } } } } } } } #endif printf( "comm ids: %4ld %4ld %4ld %4ld %2ld|%2ld %2ld|%2ld %2ld|%2ld\n", ( long )jc_comm_id, ( long )pc_comm_id, ( long )pb_comm_id, ( long )ic_comm_id, ( long )pa_comm_id0, ( long )pa_comm_id, ( long )jr_comm_id0, ( long )jr_comm_id, ( long )ir_comm_id0, ( long )ir_comm_id ); printf( "work ids: %4ld %4ld %4ld %4ld %2ld|%2ld %2ld|%2ld %2ld|%2ld\n", ( long )jc_work_id, ( long )pc_work_id, ( long )pb_work_id, ( long )ic_work_id, ( long )pa_work_id0, ( long )pa_work_id, ( long )jr_work_id0, ( long )jr_work_id, ( long )ir_work_id0, ( long )ir_work_id ); printf( "--------------------------------------------------\n" ); } } // ----------------------------------------------------------------------------- void bli_l3_thrinfo_free_paths ( rntm_t* rntm, thrinfo_t** threads ) { dim_t n_threads = bli_thread_num_threads( threads[0] ); dim_t i; for ( i = 0; i < n_threads; ++i ) bli_l3_thrinfo_free( rntm, threads[i] ); bli_free_intl( threads ); } blis-0.6.1/frame/3/bli_l3_thrinfo.h000066400000000000000000000075361360743507500167760ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // thrinfo_t macros specific to various level-3 operations. // // gemm // NOTE: The definition of bli_gemm_get_next_?_upanel() does not need to // change depending on BLIS_ENABLE_JRIR_SLAB / BLIS_ENABLE_JRIR_RR. #define bli_gemm_get_next_a_upanel( a1, step, inc ) ( a1 + step * inc ) #define bli_gemm_get_next_b_upanel( b1, step, inc ) ( b1 + step * inc ) // herk // NOTE: The definition of bli_herk_get_next_?_upanel() does not need to // change depending on BLIS_ENABLE_JRIR_SLAB / BLIS_ENABLE_JRIR_RR. #define bli_herk_get_next_a_upanel( a1, step, inc ) ( a1 + step * inc ) #define bli_herk_get_next_b_upanel( b1, step, inc ) ( b1 + step * inc ) // trmm // NOTE: The definition of bli_trmm_get_next_?_upanel() does not need to // change depending on BLIS_ENABLE_JRIR_SLAB / BLIS_ENABLE_JRIR_RR. #define bli_trmm_get_next_a_upanel( a1, step, inc ) ( a1 + step * inc ) #define bli_trmm_get_next_b_upanel( b1, step, inc ) ( b1 + step * inc ) #define bli_trmm_my_iter_rr( index, thread ) \ \ ( index % thread->n_way == thread->work_id % thread->n_way ) // trsm #define bli_trsm_my_iter_rr( index, thread ) \ \ ( index % thread->n_way == thread->work_id % thread->n_way ) // // thrinfo_t APIs specific to level-3 operations. // void bli_l3_thrinfo_init ( thrinfo_t* thread, thrcomm_t* ocomm, dim_t ocomm_id, dim_t n_way, dim_t work_id, thrinfo_t* sub_node ); void bli_l3_thrinfo_init_single ( thrinfo_t* thread ); void bli_l3_thrinfo_free ( rntm_t* rntm, thrinfo_t* thread ); // ----------------------------------------------------------------------------- void bli_l3_thrinfo_create_root ( dim_t id, thrcomm_t* gl_comm, rntm_t* rntm, cntl_t* cntl, thrinfo_t** thread ); void bli_l3_thrinfo_print_gemm_paths ( thrinfo_t** threads ); void bli_l3_thrinfo_print_trsm_paths ( thrinfo_t** threads ); // ----------------------------------------------------------------------------- void bli_l3_thrinfo_free_paths ( rntm_t* rntm, thrinfo_t** threads ); blis-0.6.1/frame/3/bli_l3_ukr.h000066400000000000000000000044561360743507500161240ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Define template prototypes for level-3 micro-kernels. // // Note: Instead of defining function prototype macro templates and then // instantiating those macros to define the individual function prototypes, // we simply alias the official operations' prototypes as defined in // bli_l3_ukr_prot.h. #undef GENTPROT #define GENTPROT GEMM_UKR_PROT INSERT_GENTPROT_BASIC0( gemm_ukr_name ) #undef GENTPROT #define GENTPROT GEMMTRSM_UKR_PROT INSERT_GENTPROT_BASIC0( gemmtrsm_l_ukr_name ) INSERT_GENTPROT_BASIC0( gemmtrsm_u_ukr_name ) #undef GENTPROT #define GENTPROT TRSM_UKR_PROT INSERT_GENTPROT_BASIC0( trsm_l_ukr_name ) INSERT_GENTPROT_BASIC0( trsm_u_ukr_name ) blis-0.6.1/frame/3/bli_l3_ukr_fpa.c000066400000000000000000000041731360743507500167410ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define function pointer query interfaces. // #undef GENFRONT #define GENFRONT( tname, opname ) \ \ GENARRAY_FPA( PASTECH2(tname,_ukr,_vft), \ opname ); \ \ PASTECH2(tname,_ukr,_vft) \ PASTEMAC(opname,_qfp)( num_t dt ) \ { \ return PASTECH(opname,_fpa)[ dt ]; \ } GENFRONT( gemm, gemm_ukernel ) GENFRONT( gemmtrsm, gemmtrsm_l_ukernel ) GENFRONT( gemmtrsm, gemmtrsm_u_ukernel ) GENFRONT( trsm, trsm_l_ukernel ) GENFRONT( trsm, trsm_u_ukernel ) blis-0.6.1/frame/3/bli_l3_ukr_fpa.h000066400000000000000000000037571360743507500167550ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype function pointer query interface. // #undef GENPROT #define GENPROT( tname, opname ) \ \ PASTECH2(tname,_ukr,_vft) \ PASTEMAC(opname,_qfp)( num_t dt ); GENPROT( gemm, gemm_ukernel ) GENPROT( gemmtrsm, gemmtrsm_l_ukernel ) GENPROT( gemmtrsm, gemmtrsm_u_ukernel ) GENPROT( trsm, trsm_l_ukernel ) GENPROT( trsm, trsm_u_ukernel ) blis-0.6.1/frame/3/bli_l3_ukr_oapi.c000066400000000000000000000151601360743507500171210ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENFRONT #define GENFRONT( tname, opname ) \ \ void PASTEMAC0(opname) \ ( \ obj_t* alpha, \ obj_t* a, \ obj_t* b, \ obj_t* beta, \ obj_t* c, \ cntx_t* cntx \ ) \ { \ bli_init_once(); \ \ num_t dt = bli_obj_dt( c ); \ \ dim_t k = bli_obj_width( a ); \ void* buf_a = bli_obj_buffer_at_off( a ); \ void* buf_b = bli_obj_buffer_at_off( b ); \ void* buf_c = bli_obj_buffer_at_off( c ); \ inc_t rs_c = bli_obj_row_stride( c ); \ inc_t cs_c = bli_obj_col_stride( c ); \ void* buf_alpha = bli_obj_buffer_for_1x1( dt, alpha ); \ void* buf_beta = bli_obj_buffer_for_1x1( dt, beta ); \ \ auxinfo_t data; \ \ /* Fill the auxinfo_t struct in case the micro-kernel uses it. */ \ bli_auxinfo_set_next_a( buf_a, &data ); \ bli_auxinfo_set_next_b( buf_b, &data ); \ bli_auxinfo_set_is_a( 1, &data ); \ bli_auxinfo_set_is_b( 1, &data ); \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(tname,_ukr,_vft) f = \ PASTEMAC(opname,_qfp)( dt ); \ \ f \ ( \ k, \ buf_alpha, \ buf_a, \ buf_b, \ buf_beta, \ buf_c, rs_c, cs_c, \ &data, \ cntx \ ); \ } \ GENFRONT( gemm, gemm_ukernel ) #undef GENFRONT #define GENFRONT( tname, opname, opnamel, opnameu ) \ \ void PASTEMAC0(opname) \ ( \ obj_t* alpha, \ obj_t* a1x, \ obj_t* a11, \ obj_t* bx1, \ obj_t* b11, \ obj_t* c11, \ cntx_t* cntx \ ) \ { \ bli_init_once(); \ \ num_t dt = bli_obj_dt( c11 ); \ \ dim_t k = bli_obj_width( a1x ); \ void* buf_a1x = bli_obj_buffer_at_off( a1x ); \ void* buf_a11 = bli_obj_buffer_at_off( a11 ); \ void* buf_bx1 = bli_obj_buffer_at_off( bx1 ); \ void* buf_b11 = bli_obj_buffer_at_off( b11 ); \ void* buf_c11 = bli_obj_buffer_at_off( c11 ); \ inc_t rs_c = bli_obj_row_stride( c11 ); \ inc_t cs_c = bli_obj_col_stride( c11 ); \ void* buf_alpha = bli_obj_buffer_for_1x1( dt, alpha ); \ \ auxinfo_t data; \ \ /* Fill the auxinfo_t struct in case the micro-kernel uses it. */ \ if ( bli_obj_is_lower( a11 ) ) \ { bli_auxinfo_set_next_a( buf_a1x, &data ); } \ else /* if ( bli_obj_is_upper( a11 ) ) */ \ { bli_auxinfo_set_next_a( buf_a11, &data ); } \ bli_auxinfo_set_next_b( buf_bx1, &data ); \ \ /* Invoke the void pointer-based function for the given datatype. */ \ if ( bli_obj_is_lower( a11 ) ) \ { \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(tname,_ukr,_vft) f = \ PASTEMAC(opnamel,_qfp)( dt ); \ \ f \ ( \ k, \ buf_alpha, \ buf_a1x, \ buf_a11, \ buf_bx1, \ buf_b11, \ buf_c11, rs_c, cs_c, \ &data, \ cntx \ ); \ } \ else /* if ( bli_obj_is_upper( a11 ) ) */ \ { \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(tname,_ukr,_vft) f = \ PASTEMAC(opnameu,_qfp)( dt ); \ \ f \ ( \ k, \ buf_alpha, \ buf_a1x, \ buf_a11, \ buf_bx1, \ buf_b11, \ buf_c11, rs_c, cs_c, \ &data, \ cntx \ ); \ } \ } \ GENFRONT( gemmtrsm, gemmtrsm_ukernel, gemmtrsm_l_ukernel, gemmtrsm_u_ukernel ) #undef GENFRONT #define GENFRONT( tname, opname, opnamel, opnameu ) \ \ void PASTEMAC0(opname) \ ( \ obj_t* a, \ obj_t* b, \ obj_t* c, \ cntx_t* cntx \ ) \ { \ bli_init_once(); \ \ num_t dt = bli_obj_dt( c ); \ \ void* buf_a = bli_obj_buffer_at_off( a ); \ void* buf_b = bli_obj_buffer_at_off( b ); \ void* buf_c = bli_obj_buffer_at_off( c ); \ inc_t rs_c = bli_obj_row_stride( c ); \ inc_t cs_c = bli_obj_col_stride( c ); \ \ auxinfo_t data; \ \ /* Fill the auxinfo_t struct in case the micro-kernel uses it. */ \ bli_auxinfo_set_next_a( buf_a, &data ); \ bli_auxinfo_set_next_b( buf_b, &data ); \ bli_auxinfo_set_is_a( 1, &data ); \ bli_auxinfo_set_is_b( 1, &data ); \ \ /* Invoke the void pointer-based function for the given datatype. */ \ if ( bli_obj_is_lower( a ) ) \ { \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(tname,_ukr,_vft) f = \ PASTEMAC(opnamel,_qfp)( dt ); \ \ f \ ( \ buf_a, \ buf_b, \ buf_c, rs_c, cs_c, \ &data, \ cntx \ ); \ } \ else /* if ( bli_obj_is_upper( a ) ) */ \ { \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(tname,_ukr,_vft) f = \ PASTEMAC(opnameu,_qfp)( dt ); \ \ f \ ( \ buf_a, \ buf_b, \ buf_c, rs_c, cs_c, \ &data, \ cntx \ ); \ } \ } \ GENFRONT( trsm, trsm_ukernel, trsm_l_ukernel, trsm_u_ukernel ) blis-0.6.1/frame/3/bli_l3_ukr_oapi.h000066400000000000000000000046701360743507500171320ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype object-based interfaces. // #undef GENPROT #define GENPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC0(opname) \ ( \ obj_t* alpha, \ obj_t* a, \ obj_t* b, \ obj_t* beta, \ obj_t* c, \ cntx_t* cntx \ ); GENPROT( gemm_ukernel ) #undef GENPROT #define GENPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC0(opname) \ ( \ obj_t* alpha, \ obj_t* a1x, \ obj_t* a11, \ obj_t* bx1, \ obj_t* b11, \ obj_t* c11, \ cntx_t* cntx \ ); GENPROT( gemmtrsm_ukernel ) #undef GENPROT #define GENPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC0(opname) \ ( \ obj_t* a, \ obj_t* b, \ obj_t* c, \ cntx_t* cntx \ ); GENPROT( trsm_ukernel ) blis-0.6.1/frame/3/bli_l3_ukr_prot.h000066400000000000000000000054221360743507500171620ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Define template prototypes for level-3 micro-kernels. // #define GEMM_UKR_PROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ dim_t k, \ ctype* restrict alpha, \ ctype* restrict a, \ ctype* restrict b, \ ctype* restrict beta, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict data, \ cntx_t* restrict cntx \ ); #define GEMMTRSM_UKR_PROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ dim_t k, \ ctype* restrict alpha, \ ctype* restrict a1x, \ ctype* restrict a11, \ ctype* restrict bx1, \ ctype* restrict b11, \ ctype* restrict c11, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict data, \ cntx_t* restrict cntx \ ); #define TRSM_UKR_PROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ ctype* restrict a, \ ctype* restrict b, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict data, \ cntx_t* restrict cntx \ ); blis-0.6.1/frame/3/bli_l3_ukr_tapi.c000066400000000000000000000105401360743507500171230ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, tname, kerid ) \ \ void PASTEMAC(ch,opname) \ ( \ dim_t k, \ ctype* restrict alpha, \ ctype* restrict a, \ ctype* restrict b, \ ctype* restrict beta, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict data, \ cntx_t* restrict cntx \ ) \ { \ bli_init_once(); \ \ const num_t dt = PASTEMAC(ch,type); \ \ /* Query the context for the function address of the current datatype's micro-kernel. */ \ PASTECH2(ch,tname,_ukr_ft) f = bli_cntx_get_l3_vir_ukr_dt( dt, kerid, cntx ); \ \ /* Invoke the typed function for the given datatype. */ \ f( \ k, \ alpha, \ a, \ b, \ beta, \ c, rs_c, cs_c, \ data, \ cntx \ ); \ } \ INSERT_GENTFUNC_BASIC2( gemm_ukernel, gemm, BLIS_GEMM_UKR ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, tname, kerid ) \ \ void PASTEMAC(ch,opname) \ ( \ dim_t k, \ ctype* restrict alpha, \ ctype* restrict a1x, \ ctype* restrict a11, \ ctype* restrict bx1, \ ctype* restrict b11, \ ctype* restrict c11, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict data, \ cntx_t* restrict cntx \ ) \ { \ bli_init_once(); \ \ const num_t dt = PASTEMAC(ch,type); \ \ /* Query the context for the function address of the current datatype's micro-kernel. */ \ PASTECH2(ch,tname,_ukr_ft) f = bli_cntx_get_l3_vir_ukr_dt( dt, kerid, cntx ); \ \ /* Invoke the typed function for the given datatype. */ \ f( \ k, \ alpha, \ a1x, \ a11, \ bx1, \ b11, \ c11, rs_c, cs_c, \ data, \ cntx \ ); \ } \ INSERT_GENTFUNC_BASIC2( gemmtrsm_l_ukernel, gemmtrsm, BLIS_GEMMTRSM_L_UKR ) INSERT_GENTFUNC_BASIC2( gemmtrsm_u_ukernel, gemmtrsm, BLIS_GEMMTRSM_U_UKR ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, tname, kerid ) \ \ void PASTEMAC(ch,opname) \ ( \ ctype* restrict a, \ ctype* restrict b, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict data, \ cntx_t* restrict cntx \ ) \ { \ bli_init_once(); \ \ const num_t dt = PASTEMAC(ch,type); \ \ /* Query the context for the function address of the current datatype's micro-kernel. */ \ PASTECH2(ch,tname,_ukr_ft) f = bli_cntx_get_l3_vir_ukr_dt( dt, kerid, cntx ); \ \ /* Invoke the typed function for the given datatype. */ \ f( \ a, \ b, \ c, rs_c, cs_c, \ data, \ cntx \ ); \ } \ INSERT_GENTFUNC_BASIC2( trsm_l_ukernel, trsm, BLIS_TRSM_L_UKR ) INSERT_GENTFUNC_BASIC2( trsm_u_ukernel, trsm, BLIS_TRSM_U_UKR ) blis-0.6.1/frame/3/bli_l3_ukr_tapi.h000066400000000000000000000041651360743507500171360ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Generate prototypes for level-3 micro-kernel wrappers. // #undef gemm_ukr_name #define gemm_ukr_name gemm_ukernel #undef gemmtrsm_l_ukr_name #define gemmtrsm_l_ukr_name gemmtrsm_l_ukernel #undef gemmtrsm_u_ukr_name #define gemmtrsm_u_ukr_name gemmtrsm_u_ukernel #undef trsm_l_ukr_name #define trsm_l_ukr_name trsm_l_ukernel #undef trsm_u_ukr_name #define trsm_u_ukr_name trsm_u_ukernel // Include the level-3 micro-kernel API template. #include "bli_l3_ukr.h" blis-0.6.1/frame/3/gemm/000077500000000000000000000000001360743507500146425ustar00rootroot00000000000000blis-0.6.1/frame/3/gemm/bli_gemm.h000066400000000000000000000035531360743507500165740ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "bli_gemm_cntl.h" #include "bli_gemm_front.h" #include "bli_gemm_int.h" #include "bli_gemm_var.h" #include "bli_gemm_ind_opt.h" // Mixed datatype support. #ifdef BLIS_ENABLE_GEMM_MD #include "bli_gemm_md.h" #endif blis-0.6.1/frame/3/gemm/bli_gemm_blk_var1.c000066400000000000000000000060051360743507500203430ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_gemm_blk_var1 ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { obj_t a1, c1; dim_t my_start, my_end; dim_t b_alg; // Determine the direction in which to partition (forwards or backwards). dir_t direct = bli_l3_direct( a, b, c, cntl ); // Prune any zero region that exists along the partitioning dimension. bli_l3_prune_unref_mparts_m( a, b, c, cntl ); // Determine the current thread's subpartition range. bli_thread_range_mdim ( direct, thread, a, b, c, cntl, cntx, &my_start, &my_end ); // Partition along the m dimension. for ( dim_t i = my_start; i < my_end; i += b_alg ) { // Determine the current algorithmic blocksize. b_alg = bli_determine_blocksize( direct, i, my_end, a, bli_cntl_bszid( cntl ), cntx ); // Acquire partitions for A1 and C1. bli_acquire_mpart_mdim( direct, BLIS_SUBPART1, i, b_alg, a, &a1 ); bli_acquire_mpart_mdim( direct, BLIS_SUBPART1, i, b_alg, c, &c1 ); // Perform gemm subproblem. bli_gemm_int ( &BLIS_ONE, &a1, b, &BLIS_ONE, &c1, cntx, rntm, bli_cntl_sub_node( cntl ), bli_thrinfo_sub_node( thread ) ); } } blis-0.6.1/frame/3/gemm/bli_gemm_blk_var2.c000066400000000000000000000060051360743507500203440ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_gemm_blk_var2 ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { obj_t b1, c1; dim_t my_start, my_end; dim_t b_alg; // Determine the direction in which to partition (forwards or backwards). dir_t direct = bli_l3_direct( a, b, c, cntl ); // Prune any zero region that exists along the partitioning dimension. bli_l3_prune_unref_mparts_n( a, b, c, cntl ); // Determine the current thread's subpartition range. bli_thread_range_ndim ( direct, thread, a, b, c, cntl, cntx, &my_start, &my_end ); // Partition along the n dimension. for ( dim_t i = my_start; i < my_end; i += b_alg ) { // Determine the current algorithmic blocksize. b_alg = bli_determine_blocksize( direct, i, my_end, b, bli_cntl_bszid( cntl ), cntx ); // Acquire partitions for B1 and C1. bli_acquire_mpart_ndim( direct, BLIS_SUBPART1, i, b_alg, b, &b1 ); bli_acquire_mpart_ndim( direct, BLIS_SUBPART1, i, b_alg, c, &c1 ); // Perform gemm subproblem. bli_gemm_int ( &BLIS_ONE, a, &b1, &BLIS_ONE, &c1, cntx, rntm, bli_cntl_sub_node( cntl ), bli_thrinfo_sub_node( thread ) ); } } blis-0.6.1/frame/3/gemm/bli_gemm_blk_var3.c000066400000000000000000000104441360743507500203470ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_gemm_blk_var3 ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { obj_t a1, b1; dim_t b_alg; // Determine the direction in which to partition (forwards or backwards). dir_t direct = bli_l3_direct( a, b, c, cntl ); // Prune any zero region that exists along the partitioning dimension. bli_l3_prune_unref_mparts_k( a, b, c, cntl ); // Query dimension in partitioning direction. dim_t k_trans = bli_obj_width_after_trans( a ); // Partition along the k dimension. for ( dim_t i = 0; i < k_trans; i += b_alg ) { // Determine the current algorithmic blocksize. b_alg = bli_l3_determine_kc( direct, i, k_trans, a, b, bli_cntl_bszid( cntl ), cntx, cntl ); // Acquire partitions for A1 and B1. bli_acquire_mpart_ndim( direct, BLIS_SUBPART1, i, b_alg, a, &a1 ); bli_acquire_mpart_mdim( direct, BLIS_SUBPART1, i, b_alg, b, &b1 ); // Perform gemm subproblem. bli_gemm_int ( &BLIS_ONE, &a1, &b1, &BLIS_ONE, c, cntx, rntm, bli_cntl_sub_node( cntl ), bli_thrinfo_sub_node( thread ) ); bli_thread_obarrier( bli_thrinfo_sub_node( thread ) ); // This variant executes multiple rank-k updates. Therefore, if the // internal beta scalar on matrix C is non-zero, we must use it // only for the first iteration (and then BLIS_ONE for all others). // And since c is a locally aliased obj_t (see _int() function), we // can simply overwrite the internal beta scalar with BLIS_ONE once // it has been used in the first iteration. However... // Unlike variant 3 of gemm and herk, which reset the internal scalar // on C at the end of the first iteration so that subsequent iterations // do not erroneously apply beta more than once, it is important that // this behavior not be applied to trmm. That is because the order of // computation is always such that the beta that is passed into the // macro-kernel must be zero, since the macro-kernel only applies that // beta to (and thus overwrites) the row-panel of C that corresponds to // the current block intersecting the diagonal. It turns out that this // same pattern holds for trmm3 as well--except there, the beta scalar // is potentially non-zero, but is still applied only to the current // row-panel of C, and thus beta is applied to all of C exactly once. // Thus, for neither trmm nor trmm3 should we reset the scalar on C // after the first iteration. if ( bli_cntl_family( cntl ) != BLIS_TRMM ) if ( i == 0 ) bli_obj_scalar_reset( c ); } } blis-0.6.1/frame/3/gemm/bli_gemm_cntl.c000066400000000000000000000172761360743507500176160ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" cntl_t* bli_gemm_cntl_create ( rntm_t* rntm, opid_t family, pack_t schema_a, pack_t schema_b ) { return bli_gemmbp_cntl_create( rntm, family, schema_a, schema_b ); } // ----------------------------------------------------------------------------- cntl_t* bli_gemmbp_cntl_create ( rntm_t* rntm, opid_t family, pack_t schema_a, pack_t schema_b ) { void_fp macro_kernel_fp; void_fp packa_fp; void_fp packb_fp; // Use the function pointers to the macrokernels that use slab // assignment of micropanels to threads in the jr and ir loops. if ( family == BLIS_GEMM ) macro_kernel_fp = bli_gemm_ker_var2; else if ( family == BLIS_HERK ) macro_kernel_fp = bli_herk_x_ker_var2; else if ( family == BLIS_TRMM ) macro_kernel_fp = bli_trmm_xx_ker_var2; else /* should never execute */ macro_kernel_fp = NULL; packa_fp = bli_packm_blk_var1; packb_fp = bli_packm_blk_var1; // Create two nodes for the macro-kernel. cntl_t* gemm_cntl_bu_ke = bli_gemm_cntl_create_node ( rntm, // the thread's runtime structure family, // the operation family BLIS_MR, // needed for bli_thrinfo_rgrow() NULL, // variant function pointer not used NULL // no sub-node; this is the leaf of the tree. ); cntl_t* gemm_cntl_bp_bu = bli_gemm_cntl_create_node ( rntm, // the thread's runtime structure family, BLIS_NR, // not used by macro-kernel, but needed for bli_thrinfo_rgrow() macro_kernel_fp, gemm_cntl_bu_ke ); // Create a node for packing matrix A. cntl_t* gemm_cntl_packa = bli_packm_cntl_create_node ( rntm, bli_gemm_packa, // pack the left-hand operand packa_fp, BLIS_MR, BLIS_KR, FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? schema_a, // normally BLIS_PACKED_ROW_PANELS BLIS_BUFFER_FOR_A_BLOCK, gemm_cntl_bp_bu ); // Create a node for partitioning the m dimension by MC. cntl_t* gemm_cntl_op_bp = bli_gemm_cntl_create_node ( rntm, family, BLIS_MC, bli_gemm_blk_var1, gemm_cntl_packa ); // Create a node for packing matrix B. cntl_t* gemm_cntl_packb = bli_packm_cntl_create_node ( rntm, bli_gemm_packb, // pack the right-hand operand packb_fp, BLIS_KR, BLIS_NR, FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? schema_b, // normally BLIS_PACKED_COL_PANELS BLIS_BUFFER_FOR_B_PANEL, gemm_cntl_op_bp ); // Create a node for partitioning the k dimension by KC. cntl_t* gemm_cntl_mm_op = bli_gemm_cntl_create_node ( rntm, family, BLIS_KC, bli_gemm_blk_var3, gemm_cntl_packb ); // Create a node for partitioning the n dimension by NC. cntl_t* gemm_cntl_vl_mm = bli_gemm_cntl_create_node ( rntm, family, BLIS_NC, bli_gemm_blk_var2, gemm_cntl_mm_op ); return gemm_cntl_vl_mm; } // ----------------------------------------------------------------------------- // This control tree creation function is disabled because it is no longer used. // (It was originally created in the run up to publishing the 1m journal article, // but was disabled to reduce complexity.) #if 0 cntl_t* bli_gemmpb_cntl_create ( opid_t family ) { void_fp macro_kernel_p = bli_gemm_ker_var1; // Change the macro-kernel if the operation family is herk or trmm. //if ( family == BLIS_HERK ) macro_kernel_p = bli_herk_x_ker_var2; //else if ( family == BLIS_TRMM ) macro_kernel_p = bli_trmm_xx_ker_var2; // Create two nodes for the macro-kernel. cntl_t* gemm_cntl_ub_ke = bli_gemm_cntl_create_node ( family, // the operation family BLIS_MR, // needed for bli_thrinfo_rgrow() NULL, // variant function pointer not used NULL // no sub-node; this is the leaf of the tree. ); cntl_t* gemm_cntl_pb_ub = bli_gemm_cntl_create_node ( family, BLIS_NR, // not used by macro-kernel, but needed for bli_thrinfo_rgrow() macro_kernel_p, gemm_cntl_ub_ke ); // Create a node for packing matrix A (which is really the right-hand // operand "B"). cntl_t* gemm_cntl_packb = bli_packm_cntl_create_node ( bli_gemm_packb, // pack the right-hand operand bli_packm_blk_var1, BLIS_KR, BLIS_MR, FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_COL_PANELS, BLIS_BUFFER_FOR_A_BLOCK, gemm_cntl_pb_ub ); // Create a node for partitioning the n dimension by MC. cntl_t* gemm_cntl_op_pb = bli_gemm_cntl_create_node ( family, BLIS_MC, bli_gemm_blk_var2, gemm_cntl_packb ); // Create a node for packing matrix B (which is really the left-hand // operand "A"). cntl_t* gemm_cntl_packa = bli_packm_cntl_create_node ( bli_gemm_packa, // pack the left-hand operand bli_packm_blk_var1, BLIS_NR, BLIS_KR, FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_ROW_PANELS, BLIS_BUFFER_FOR_B_PANEL, gemm_cntl_op_pb ); // Create a node for partitioning the k dimension by KC. cntl_t* gemm_cntl_mm_op = bli_gemm_cntl_create_node ( family, BLIS_KC, bli_gemm_blk_var3, gemm_cntl_packa ); // Create a node for partitioning the m dimension by NC. cntl_t* gemm_cntl_vl_mm = bli_gemm_cntl_create_node ( family, BLIS_NC, bli_gemm_blk_var1, gemm_cntl_mm_op ); return gemm_cntl_vl_mm; } #endif // ----------------------------------------------------------------------------- void bli_gemm_cntl_free ( rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { bli_cntl_free( rntm, cntl, thread ); } // ----------------------------------------------------------------------------- cntl_t* bli_gemm_cntl_create_node ( rntm_t* rntm, opid_t family, bszid_t bszid, void_fp var_func, cntl_t* sub_node ) { return bli_cntl_create_node( rntm, family, bszid, var_func, NULL, sub_node ); } blis-0.6.1/frame/3/gemm/bli_gemm_cntl.h000066400000000000000000000050701360743507500176100ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ cntl_t* bli_gemm_cntl_create ( rntm_t* rntm, opid_t family, pack_t schema_a, pack_t schema_b ); // ----------------------------------------------------------------------------- cntl_t* bli_gemmbp_cntl_create ( rntm_t* rntm, opid_t family, pack_t schema_a, pack_t schema_b ); #if 0 cntl_t* bli_gemmpb_cntl_create ( opid_t family, ); #endif // ----------------------------------------------------------------------------- void bli_gemm_cntl_free ( rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ); // ----------------------------------------------------------------------------- cntl_t* bli_gemm_cntl_create_node ( rntm_t* rntm, opid_t family, bszid_t bszid, void_fp var_func, cntl_t* sub_node ); blis-0.6.1/frame/3/gemm/bli_gemm_front.c000066400000000000000000000335231360743507500177770ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_gemm_front ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl ) { bli_init_once(); obj_t a_local; obj_t b_local; obj_t c_local; #if 0 #ifdef BLIS_ENABLE_SMALL_MATRIX // Only handle small problems separately for homogeneous datatypes. if ( bli_obj_dt( a ) == bli_obj_dt( b ) && bli_obj_dt( a ) == bli_obj_dt( c ) && bli_obj_comp_prec( c ) == bli_obj_prec( c ) ) { gint_t status = bli_gemm_small( alpha, a, b, beta, c, cntx, cntl ); if ( status == BLIS_SUCCESS ) return; } #endif #endif // Check parameters. if ( bli_error_checking_is_enabled() ) bli_gemm_check( alpha, a, b, beta, c, cntx ); // If alpha is zero, scale by beta and return. if ( bli_obj_equals( alpha, &BLIS_ZERO ) ) { bli_scalm( beta, c ); return; } // Alias A, B, and C in case we need to apply transformations. bli_obj_alias_to( a, &a_local ); bli_obj_alias_to( b, &b_local ); bli_obj_alias_to( c, &c_local ); #ifdef BLIS_ENABLE_GEMM_MD cntx_t cntx_local; // If any of the storage datatypes differ, or if the computation precision // differs from the storage precision of C, utilize the mixed datatype // code path. // NOTE: If we ever want to support the caller setting the computation // domain explicitly, we will need to check the computation dt against the // storage dt of C (instead of the computation precision against the // storage precision of C). if ( bli_obj_dt( &c_local ) != bli_obj_dt( &a_local ) || bli_obj_dt( &c_local ) != bli_obj_dt( &b_local ) || bli_obj_comp_prec( &c_local ) != bli_obj_prec( &c_local ) ) { // Handle mixed datatype cases in bli_gemm_md(), which may modify // the objects or the context. (If the context is modified, cntx // is adjusted to point to cntx_local.) bli_gemm_md( &a_local, &b_local, beta, &c_local, &cntx_local, &cntx ); } //else // homogeneous datatypes #endif // Load the pack schemas from the context and embed them into the objects // for A and B. (Native contexts are initialized with the correct pack // schemas, as are contexts for 1m, and if necessary bli_gemm_md() would // have made a copy and modified the schemas, so reading them from the // context should be a safe bet at this point.) This is a sort of hack for // communicating the desired pack schemas to bli_gemm_cntl_create() (via // bli_l3_thread_decorator() and bli_l3_cntl_create_if()). This allows us // to subsequently access the schemas from the control tree, which // hopefully reduces some confusion, particularly in bli_packm_init(). const pack_t schema_a = bli_cntx_schema_a_block( cntx ); const pack_t schema_b = bli_cntx_schema_b_panel( cntx ); bli_obj_set_pack_schema( schema_a, &a_local ); bli_obj_set_pack_schema( schema_b, &b_local ); // Next, we handle the possibility of needing to typecast alpha to the // computation datatype and/or beta to the storage datatype of C. // Attach alpha to B, and in the process typecast alpha to the target // datatype of the matrix (which in this case is equal to the computation // datatype). bli_obj_scalar_attach( BLIS_NO_CONJUGATE, alpha, &b_local ); // Attach beta to C, and in the process typecast beta to the target // datatype of the matrix (which in this case is equal to the storage // datatype of C). bli_obj_scalar_attach( BLIS_NO_CONJUGATE, beta, &c_local ); // Change the alpha and beta pointers to BLIS_ONE since the values have // now been typecast and attached to the matrices above. alpha = &BLIS_ONE; beta = &BLIS_ONE; #ifdef BLIS_ENABLE_GEMM_MD // Don't perform the following optimization for ccr or crc cases, as // those cases are sensitive to the ukernel storage preference (ie: // transposing the operation would break them). if ( !bli_gemm_md_is_ccr( &a_local, &b_local, &c_local ) && !bli_gemm_md_is_crc( &a_local, &b_local, &c_local ) ) #endif // An optimization: If C is stored by rows and the micro-kernel prefers // contiguous columns, or if C is stored by columns and the micro-kernel // prefers contiguous rows, transpose the entire operation to allow the // micro-kernel to access elements of C in its preferred manner. if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) ) { bli_obj_swap( &a_local, &b_local ); bli_obj_induce_trans( &a_local ); bli_obj_induce_trans( &b_local ); bli_obj_induce_trans( &c_local ); // We must also swap the pack schemas, which were set by bli_gemm_md() // or the inlined code above. bli_obj_swap_pack_schemas( &a_local, &b_local ); } // Parse and interpret the contents of the rntm_t object to properly // set the ways of parallelism for each loop, and then make any // additional modifications necessary for the current operation. bli_rntm_set_ways_for_op ( BLIS_GEMM, BLIS_LEFT, // ignored for gemm/hemm/symm bli_obj_length( &c_local ), bli_obj_width( &c_local ), bli_obj_width( &a_local ), rntm ); obj_t* cp = &c_local; obj_t* betap = beta; #ifdef BLIS_ENABLE_GEMM_MD #ifdef BLIS_ENABLE_GEMM_MD_EXTRA_MEM // If any of the following conditions are met, create a temporary matrix // conformal to C into which we will accumulate the matrix product: // - the storage precision of C differs from the computation precision; // - the domains are mixed as crr; // - the storage format of C does not match the preferred orientation // of the ccr or crc cases. // Then, after the computation is complete, this matrix will be copied // or accumulated back to C. const bool_t is_ccr_mismatch = ( bli_gemm_md_is_ccr( &a_local, &b_local, &c_local ) && !bli_obj_is_col_stored( &c_local ) ); const bool_t is_crc_mismatch = ( bli_gemm_md_is_crc( &a_local, &b_local, &c_local ) && !bli_obj_is_row_stored( &c_local ) ); obj_t ct; bool_t use_ct = FALSE; // FGVZ: Consider adding another guard here that only creates and uses a // temporary matrix for accumulation if k < c * kc, where c is some small // constant like 2. And don't forget to use the same conditional for the // castm() and free() at the end. if ( bli_obj_prec( &c_local ) != bli_obj_comp_prec( &c_local ) || bli_gemm_md_is_crr( &a_local, &b_local, &c_local ) || is_ccr_mismatch || is_crc_mismatch ) { use_ct = TRUE; } // If we need a temporary matrix conformal to C for whatever reason, // we create it and prepare to use it now. if ( use_ct ) { const dim_t m = bli_obj_length( &c_local ); const dim_t n = bli_obj_width( &c_local ); inc_t rs = bli_obj_row_stride( &c_local ); inc_t cs = bli_obj_col_stride( &c_local ); num_t dt_ct = bli_obj_domain( &c_local ) | bli_obj_comp_prec( &c_local ); // When performing the crr case, accumulate to a contiguously-stored // real matrix so we do not have to repeatedly update C with general // stride. if ( bli_gemm_md_is_crr( &a_local, &b_local, &c_local ) ) dt_ct = BLIS_REAL | bli_obj_comp_prec( &c_local ); // When performing the mismatched ccr or crc cases, now is the time // to specify the appropriate storage so the gemm_md_c2r_ref() virtual // microkernel can output directly to C (instead of using a temporary // microtile). if ( is_ccr_mismatch ) { rs = 1; cs = m; } else if ( is_crc_mismatch ) { rs = n; cs = 1; } bli_obj_create( dt_ct, m, n, rs, cs, &ct ); const num_t dt_exec = bli_obj_exec_dt( &c_local ); const num_t dt_comp = bli_obj_comp_dt( &c_local ); bli_obj_set_target_dt( dt_ct, &ct ); bli_obj_set_exec_dt( dt_exec, &ct ); bli_obj_set_comp_dt( dt_comp, &ct ); // A naive approach would cast C to the comptuation datatype, // compute with beta, and then cast the result back to the // user-provided output matrix. However, we employ a different // approach that halves the number of memops on C (or its // typecast temporary) by writing the A*B product directly to // temporary storage, and then using xpbym to scale the // output matrix by beta and accumulate/cast the A*B product. //bli_castm( &c_local, &ct ); betap = &BLIS_ZERO; cp = &ct; } #endif #endif // Invoke the internal back-end via the thread handler. bli_l3_thread_decorator ( bli_gemm_int, BLIS_GEMM, // operation family id alpha, &a_local, &b_local, betap, cp, cntx, rntm, cntl ); #ifdef BLIS_ENABLE_GEMM_MD #ifdef BLIS_ENABLE_GEMM_MD_EXTRA_MEM // If we created a temporary matrix conformal to C for whatever reason, // we copy/accumulate the result back to C and then release the object. if ( use_ct ) { obj_t beta_local; bli_obj_scalar_detach( &c_local, &beta_local ); //bli_castnzm( &ct, &c_local ); bli_xpbym( &ct, &beta_local, &c_local ); bli_obj_free( &ct ); } #endif #endif } // ----------------------------------------------------------------------------- #if 0 if ( bli_obj_dt( a ) != bli_obj_dt( b ) || bli_obj_dt( a ) != bli_obj_dt( c ) || bli_obj_comp_prec( c ) != bli_obj_prec( c ) ) { const bool_t a_is_real = bli_obj_is_real( a ); const bool_t a_is_comp = bli_obj_is_complex( a ); const bool_t b_is_real = bli_obj_is_real( b ); const bool_t b_is_comp = bli_obj_is_complex( b ); const bool_t c_is_real = bli_obj_is_real( c ); const bool_t c_is_comp = bli_obj_is_complex( c ); const bool_t a_is_single = bli_obj_is_single_prec( a ); const bool_t a_is_double = bli_obj_is_double_prec( a ); const bool_t b_is_single = bli_obj_is_single_prec( b ); const bool_t b_is_double = bli_obj_is_double_prec( b ); const bool_t c_is_single = bli_obj_is_single_prec( c ); const bool_t c_is_double = bli_obj_is_double_prec( c ); const bool_t comp_single = bli_obj_comp_prec( c ) == BLIS_SINGLE_PREC; const bool_t comp_double = bli_obj_comp_prec( c ) == BLIS_DOUBLE_PREC; const bool_t mixeddomain = bli_obj_domain( c ) != bli_obj_domain( a ) || bli_obj_domain( c ) != bli_obj_domain( b ); ( void )a_is_real; ( void )a_is_comp; ( void )b_is_real; ( void )b_is_comp; ( void )c_is_real; ( void )c_is_comp; ( void )a_is_single; ( void )a_is_double; ( void )b_is_single; ( void )b_is_double; ( void )c_is_single; ( void )c_is_double; ( void )comp_single; ( void )comp_double; if ( //( c_is_comp && a_is_comp && b_is_real ) || //( c_is_comp && a_is_real && b_is_comp ) || //( c_is_real && a_is_comp && b_is_comp ) || //( c_is_comp && a_is_real && b_is_real ) || //( c_is_real && a_is_comp && b_is_real ) || //( c_is_real && a_is_real && b_is_comp ) || //FALSE TRUE ) { if ( ( c_is_single && a_is_single && b_is_single && mixeddomain ) || ( c_is_single && a_is_single && b_is_single && comp_single ) || ( c_is_single && a_is_single && b_is_single && comp_double ) || ( c_is_single && a_is_single && b_is_double ) || ( c_is_single && a_is_double && b_is_single ) || ( c_is_double && a_is_single && b_is_single ) || ( c_is_single && a_is_double && b_is_double ) || ( c_is_double && a_is_single && b_is_double ) || ( c_is_double && a_is_double && b_is_single ) || ( c_is_double && a_is_double && b_is_double && comp_single ) || ( c_is_double && a_is_double && b_is_double && comp_double ) || ( c_is_double && a_is_double && b_is_double && mixeddomain ) || FALSE ) bli_gemm_md_front( alpha, a, b, beta, c, cntx, cntl ); else bli_gemm_md_zgemm( alpha, a, b, beta, c, cntx, cntl ); } else bli_gemm_md_zgemm( alpha, a, b, beta, c, cntx, cntl ); return; } #else #if 0 // If any of the storage datatypes differ, or if the execution precision // differs from the storage precision of C, utilize the mixed datatype // code path. // NOTE: We could check the exec dt against the storage dt of C, but for // now we don't support the caller setting the execution domain // explicitly. if ( bli_obj_dt( a ) != bli_obj_dt( b ) || bli_obj_dt( a ) != bli_obj_dt( c ) || bli_obj_comp_prec( c ) != bli_obj_prec( c ) ) { bli_gemm_md_front( alpha, a, b, beta, c, cntx, cntl ); return; } #endif #endif blis-0.6.1/frame/3/gemm/bli_gemm_front.h000066400000000000000000000040421360743507500177760ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_gemm_front ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl ); #ifdef BLIS_ENABLE_SMALL_MATRIX err_t bli_gemm_small ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, cntl_t* cntl ); #endif blis-0.6.1/frame/3/gemm/bli_gemm_int.c000066400000000000000000000075431360743507500174440ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_gemm_int ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { obj_t a_local; obj_t b_local; obj_t c_local; gemm_var_oft f; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_gemm_basic_check( alpha, a, b, beta, c, cntx ); // If C has a zero dimension, return early. if ( bli_obj_has_zero_dim( c ) ) return; // If A or B has a zero dimension, scale C by beta and return early. if ( bli_obj_has_zero_dim( a ) || bli_obj_has_zero_dim( b ) ) { if ( bli_thread_am_ochief( thread ) ) bli_scalm( beta, c ); bli_thread_obarrier( thread ); return; } // If A or B is marked as being filled with zeros, scale C by beta and // return early. if ( bli_obj_is_zeros( a ) || bli_obj_is_zeros( b ) ) { // This should never execute. bli_abort(); if ( bli_thread_am_ochief( thread ) ) bli_scalm( beta, c ); bli_thread_obarrier( thread ); return; } // Alias A, B, and C in case we need to update attached scalars. bli_obj_alias_to( a, &a_local ); bli_obj_alias_to( b, &b_local ); bli_obj_alias_to( c, &c_local ); // If alpha is non-unit, typecast and apply it to the scalar attached // to B. if ( !bli_obj_equals( alpha, &BLIS_ONE ) ) { bli_obj_scalar_apply_scalar( alpha, &b_local ); } // If beta is non-unit, typecast and apply it to the scalar attached // to C. if ( !bli_obj_equals( beta, &BLIS_ONE ) ) { bli_obj_scalar_apply_scalar( beta, &c_local ); } // Create the next node in the thrinfo_t structure. bli_thrinfo_grow( rntm, cntl, thread ); // Extract the function pointer from the current control tree node. f = bli_cntl_var_func( cntl ); // Somewhat hackish support for 4m1b method implementation. { ind_t im = bli_cntx_method( cntx ); if ( im != BLIS_NAT ) { if ( im == BLIS_4M1B ) if ( f == bli_gemm_ker_var2 ) f = bli_gemm4mb_ker_var2; } } // Invoke the variant. f ( &a_local, &b_local, &c_local, cntx, rntm, cntl, thread ); } blis-0.6.1/frame/3/gemm/bli_gemm_int.h000066400000000000000000000035441360743507500174460ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_gemm_int ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ); blis-0.6.1/frame/3/gemm/bli_gemm_ker_var1.c000066400000000000000000000042621360743507500203570ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #if 0 #include "blis.h" void bli_gemm_ker_var1 ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { // Implement _ker_var1() in terms of _ker_var2() by transposing the // entire suboperation (which also requires swapping A and B). bli_obj_induce_trans( a ); bli_obj_induce_trans( b ); bli_obj_induce_trans( c ); bli_gemm_ker_var2( b, a, c, cntx, rntm, cntl, thread ); } #endif blis-0.6.1/frame/3/gemm/bli_gemm_ker_var2.c000066400000000000000000000302251360743507500203560ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T gemm_fp typedef void (*FUNCPTR_T) ( pack_t schema_a, pack_t schema_b, dim_t m, dim_t n, dim_t k, void* alpha, void* a, inc_t cs_a, inc_t is_a, dim_t pd_a, inc_t ps_a, void* b, inc_t rs_b, inc_t is_b, dim_t pd_b, inc_t ps_b, void* beta, void* c, inc_t rs_c, inc_t cs_c, cntx_t* cntx, rntm_t* rntm, thrinfo_t* thread ); static FUNCPTR_T GENARRAY(ftypes,gemm_ker_var2); void bli_gemm_ker_var2 ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { #ifdef BLIS_ENABLE_GEMM_MD // By now, A and B have been packed and cast to the execution precision. // In most cases, such as when storage precision of C differs from the // execution precision, we utilize the mixed datatype code path. However, // a few cases still fall within this kernel, such as mixed domain with // equal precision (ccr, crc, rcc), hence those expressions being disabled // in the conditional below. if ( //( bli_obj_domain( c ) != bli_obj_domain( a ) ) || //( bli_obj_domain( c ) != bli_obj_domain( b ) ) || ( bli_obj_dt( c ) != bli_obj_exec_dt( c ) ) ) { bli_gemm_ker_var2_md( a, b, c, cntx, rntm, cntl, thread ); return; } #endif num_t dt_exec = bli_obj_exec_dt( c ); pack_t schema_a = bli_obj_pack_schema( a ); pack_t schema_b = bli_obj_pack_schema( b ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width( a ); void* buf_a = bli_obj_buffer_at_off( a ); inc_t cs_a = bli_obj_col_stride( a ); inc_t is_a = bli_obj_imag_stride( a ); dim_t pd_a = bli_obj_panel_dim( a ); inc_t ps_a = bli_obj_panel_stride( a ); void* buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b = bli_obj_row_stride( b ); inc_t is_b = bli_obj_imag_stride( b ); dim_t pd_b = bli_obj_panel_dim( b ); inc_t ps_b = bli_obj_panel_stride( b ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); obj_t scalar_a; obj_t scalar_b; void* buf_alpha; void* buf_beta; FUNCPTR_T f; // Detach and multiply the scalars attached to A and B. bli_obj_scalar_detach( a, &scalar_a ); bli_obj_scalar_detach( b, &scalar_b ); bli_mulsc( &scalar_a, &scalar_b ); // Grab the addresses of the internal scalar buffers for the scalar // merged above and the scalar attached to C. buf_alpha = bli_obj_internal_scalar_buffer( &scalar_b ); buf_beta = bli_obj_internal_scalar_buffer( c ); // If 1m is being employed on a column- or row-stored matrix with a // real-valued beta, we can use the real domain macro-kernel, which // eliminates a little overhead associated with the 1m virtual // micro-kernel. #if 1 if ( bli_cntx_method( cntx ) == BLIS_1M ) { bli_gemm_ind_recast_1m_params ( &dt_exec, schema_a, c, &m, &n, &k, &pd_a, &ps_a, &pd_b, &ps_b, &rs_c, &cs_c ); } #endif #ifdef BLIS_ENABLE_GEMM_MD // Tweak parameters in select mixed domain cases (rcc, crc, ccr). bli_gemm_md_ker_var2_recast ( &dt_exec, bli_obj_dt( a ), bli_obj_dt( b ), bli_obj_dt( c ), &m, &n, &k, &pd_a, &ps_a, &pd_b, &ps_b, c, &rs_c, &cs_c ); #endif // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Invoke the function. f( schema_a, schema_b, m, n, k, buf_alpha, buf_a, cs_a, is_a, pd_a, ps_a, buf_b, rs_b, is_b, pd_b, ps_b, buf_beta, buf_c, rs_c, cs_c, cntx, rntm, thread ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha, \ void* a, inc_t cs_a, inc_t is_a, \ dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, inc_t is_b, \ dim_t pd_b, inc_t ps_b, \ void* beta, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* thread \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ /* Alias some constants to simpler names. */ \ const dim_t MR = pd_a; \ const dim_t NR = pd_b; \ /*const dim_t PACKMR = cs_a;*/ \ /*const dim_t PACKNR = rs_b;*/ \ \ /* Query the context for the micro-kernel address and cast it to its function pointer type. */ \ PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the original C matrix. For example, if C is column-stored, ct will be column-stored as well. */ \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict a_cast = a; \ ctype* restrict b_cast = b; \ ctype* restrict c_cast = c; \ ctype* restrict alpha_cast = alpha; \ ctype* restrict beta_cast = beta; \ ctype* restrict b1; \ ctype* restrict c1; \ \ dim_t m_iter, m_left; \ dim_t n_iter, n_left; \ dim_t i, j; \ dim_t m_cur; \ dim_t n_cur; \ inc_t rstep_a; \ inc_t cstep_b; \ inc_t rstep_c, cstep_c; \ auxinfo_t aux; \ \ /* Assumptions/assertions: rs_a == 1 cs_a == PACKMR pd_a == MR ps_a == stride to next micro-panel of A rs_b == PACKNR cs_b == 1 pd_b == NR ps_b == stride to next micro-panel of B rs_c == (no assumptions) cs_c == (no assumptions) */ \ \ /* If any dimension is zero, return immediately. */ \ if ( bli_zero_dim3( m, n, k ) ) return; \ \ /* Clear the temporary C buffer in case it has any infs or NaNs. */ \ PASTEMAC(ch,set0s_mxn)( MR, NR, \ ct, rs_ct, cs_ct ); \ \ /* Compute number of primary and leftover components of the m and n dimensions. */ \ n_iter = n / NR; \ n_left = n % NR; \ \ m_iter = m / MR; \ m_left = m % MR; \ \ if ( n_left ) ++n_iter; \ if ( m_left ) ++m_iter; \ \ /* Determine some increments used to step through A, B, and C. */ \ rstep_a = ps_a; \ \ cstep_b = ps_b; \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ \ /* Save the pack schemas of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_schema_a( schema_a, &aux ); \ bli_auxinfo_set_schema_b( schema_b, &aux ); \ \ /* Save the imaginary stride of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( is_a, &aux ); \ bli_auxinfo_set_is_b( is_b, &aux ); \ \ /* The 'thread' argument points to the thrinfo_t node for the 2nd (jr) loop around the microkernel. Here we query the thrinfo_t node for the 1st (ir) loop around the microkernel. */ \ thrinfo_t* caucus = bli_thrinfo_sub_node( thread ); \ \ /* Query the number of threads and thread ids for each loop. */ \ dim_t jr_nt = bli_thread_n_way( thread ); \ dim_t jr_tid = bli_thread_work_id( thread ); \ dim_t ir_nt = bli_thread_n_way( caucus ); \ dim_t ir_tid = bli_thread_work_id( caucus ); \ \ dim_t jr_start, jr_end; \ dim_t ir_start, ir_end; \ dim_t jr_inc, ir_inc; \ \ /* Determine the thread range and increment for the 2nd and 1st loops. NOTE: The definition of bli_thread_range_jrir() will depend on whether slab or round-robin partitioning was requested at configure-time. */ \ bli_thread_range_jrir( thread, n_iter, 1, FALSE, &jr_start, &jr_end, &jr_inc ); \ bli_thread_range_jrir( caucus, m_iter, 1, FALSE, &ir_start, &ir_end, &ir_inc ); \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = jr_start; j < jr_end; j += jr_inc ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ b1 = b_cast + j * cstep_b; \ c1 = c_cast + j * cstep_c; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = ir_start; i < ir_end; i += ir_inc ) \ { \ ctype* restrict a2; \ \ a1 = a_cast + i * rstep_a; \ c11 = c1 + i * rstep_c; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = bli_gemm_get_next_a_upanel( a1, rstep_a, ir_inc ); \ if ( bli_is_last_iter( i, ir_end, ir_tid, ir_nt ) ) \ { \ a2 = a_cast; \ b2 = bli_gemm_get_next_b_upanel( b1, cstep_b, jr_inc ); \ if ( bli_is_last_iter( j, jr_end, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ beta_cast, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Scale the bottom edge of C and add the result from above. */ \ PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ beta_cast, \ c11, rs_c, cs_c ); \ } \ } \ } \ \ /* PASTEMAC(ch,fprintm)( stdout, "gemm_ker_var2: b1", k, NR, b1, NR, 1, "%4.1f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "gemm_ker_var2: a1", MR, k, a1, 1, MR, "%4.1f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "gemm_ker_var2: c after", m_cur, n_cur, c11, rs_c, cs_c, "%4.1f", "" ); \ */ \ } INSERT_GENTFUNC_BASIC0( gemm_ker_var2 ) blis-0.6.1/frame/3/gemm/bli_gemm_ker_var2_md.c000066400000000000000000000320701360743507500210360ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #ifdef BLIS_ENABLE_GEMM_MD #define FUNCPTR_T gemm_fp typedef void (*FUNCPTR_T) ( pack_t schema_a, pack_t schema_b, dim_t m, dim_t n, dim_t k, void* alpha, void* a, inc_t cs_a, inc_t is_a, dim_t pd_a, inc_t ps_a, void* b, inc_t rs_b, inc_t is_b, dim_t pd_b, inc_t ps_b, void* beta, void* c, inc_t rs_c, inc_t cs_c, cntx_t* cntx, rntm_t* rntm, thrinfo_t* thread ); static FUNCPTR_T GENARRAY2_ALL(ftypes,gemm_ker_var2_md); void bli_gemm_ker_var2_md ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { num_t dt_exec = bli_obj_exec_dt( c ); num_t dt_c = bli_obj_dt( c ); pack_t schema_a = bli_obj_pack_schema( a ); pack_t schema_b = bli_obj_pack_schema( b ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width( a ); void* buf_a = bli_obj_buffer_at_off( a ); inc_t cs_a = bli_obj_col_stride( a ); inc_t is_a = bli_obj_imag_stride( a ); dim_t pd_a = bli_obj_panel_dim( a ); inc_t ps_a = bli_obj_panel_stride( a ); void* buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b = bli_obj_row_stride( b ); inc_t is_b = bli_obj_imag_stride( b ); dim_t pd_b = bli_obj_panel_dim( b ); inc_t ps_b = bli_obj_panel_stride( b ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); obj_t scalar_a; obj_t scalar_b; void* buf_alpha; void* buf_beta; FUNCPTR_T f; // Detach and multiply the scalars attached to A and B. // NOTE: We know that the internal scalars of A and B are already of the // target datatypes because the necessary typecasting would have already // taken place during bli_packm_init(). bli_obj_scalar_detach( a, &scalar_a ); bli_obj_scalar_detach( b, &scalar_b ); bli_mulsc( &scalar_a, &scalar_b ); // Grab the addresses of the internal scalar buffers for the scalar // merged above and the scalar attached to C. // NOTE: We know that scalar_b is of type dt_exec due to the above code // that casts the scalars of A and B to dt_exec via scalar_a and scalar_b, // and we know that the internal scalar in C is already of the type dt_c // due to the casting in the implementation of bli_obj_scalar_attach(). buf_alpha = bli_obj_internal_scalar_buffer( &scalar_b ); buf_beta = bli_obj_internal_scalar_buffer( c ); #if 0 // NOTE: Turns out that this optimization will never be employed since // currently bli_gemm_ker_var2_md() is only called when the storage // datatype of C differs from the execution/computation datatype, and // this optimization would only make sense if they are equal. // If 1m is being employed on a column- or row-stored matrix with a // real-valued beta, we can use the real domain macro-kernel, which // eliminates a little overhead associated with the 1m virtual // micro-kernel. if ( bli_cntx_method( cntx ) == BLIS_1M ) { // Only employ this optimization if the storage datatype of C is // equal to the execution/computation datatype. if ( dt_c == dt_exec ) { bli_gemm_ind_recast_1m_params ( &dt_exec, schema_a, c, &m, &n, &k, &pd_a, &ps_a, &pd_b, &ps_b, &rs_c, &cs_c ); } } #endif // Tweak parameters in select mixed domain cases (rcc, crc, ccr). bli_gemm_md_ker_var2_recast ( &dt_exec, bli_obj_dt( a ), bli_obj_dt( b ), bli_obj_dt( c ), &m, &n, &k, &pd_a, &ps_a, &pd_b, &ps_b, c, &rs_c, &cs_c ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_c][dt_exec]; // Invoke the function. f( schema_a, schema_b, m, n, k, buf_alpha, buf_a, cs_a, is_a, pd_a, ps_a, buf_b, rs_b, is_b, pd_b, ps_b, buf_beta, buf_c, rs_c, cs_c, cntx, rntm, thread ); } #undef GENTFUNC2 #define GENTFUNC2( ctype_c, ctype_e, chc, che, varname ) \ \ void PASTEMAC2(chc,che,varname) \ ( \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha, \ void* a, inc_t cs_a, inc_t is_a, \ dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, inc_t is_b, \ dim_t pd_b, inc_t ps_b, \ void* beta, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* thread \ ) \ { \ const num_t dte = PASTEMAC(che,type); \ /*const num_t dtc = PASTEMAC(chc,type);*/ \ \ /* Alias some constants to simpler names. */ \ const dim_t MR = pd_a; \ const dim_t NR = pd_b; \ /*const dim_t PACKMR = cs_a;*/ \ /*const dim_t PACKNR = rs_b;*/ \ \ /* Query the context for the micro-kernel address and cast it to its function pointer type. */ \ PASTECH(che,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dte, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the original C matrix. For example, if C is column-stored, ct will be column-stored as well. */ \ ctype_e ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype_e ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dte, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype_e* restrict zero = PASTEMAC(che,0); \ ctype_e* restrict a_cast = a; \ ctype_e* restrict b_cast = b; \ ctype_c* restrict c_cast = c; \ ctype_e* restrict alpha_cast = alpha; \ ctype_c* restrict beta_cast = beta; \ ctype_e* restrict b1; \ ctype_c* restrict c1; \ \ dim_t m_iter, m_left; \ dim_t n_iter, n_left; \ dim_t i, j; \ dim_t m_cur; \ dim_t n_cur; \ inc_t rstep_a; \ inc_t cstep_b; \ inc_t rstep_c, cstep_c; \ auxinfo_t aux; \ \ /* Assumptions/assertions: rs_a == 1 cs_a == PACKMR pd_a == MR ps_a == stride to next micro-panel of A rs_b == PACKNR cs_b == 1 pd_b == NR ps_b == stride to next micro-panel of B rs_c == (no assumptions) cs_c == (no assumptions) */ \ \ /* If any dimension is zero, return immediately. */ \ if ( bli_zero_dim3( m, n, k ) ) return; \ \ /* Clear the temporary C buffer in case it has any infs or NaNs. */ \ PASTEMAC(che,set0s_mxn)( MR, NR, \ ct, rs_ct, cs_ct ); \ \ /* Compute number of primary and leftover components of the m and n dimensions. */ \ n_iter = n / NR; \ n_left = n % NR; \ \ m_iter = m / MR; \ m_left = m % MR; \ \ if ( n_left ) ++n_iter; \ if ( m_left ) ++m_iter; \ \ /* Determine some increments used to step through A, B, and C. */ \ rstep_a = ps_a; \ \ cstep_b = ps_b; \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ \ /* Save the pack schemas of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_schema_a( schema_a, &aux ); \ bli_auxinfo_set_schema_b( schema_b, &aux ); \ \ /* Save the imaginary stride of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( is_a, &aux ); \ bli_auxinfo_set_is_b( is_b, &aux ); \ \ /* The 'thread' argument points to the thrinfo_t node for the 2nd (jr) loop around the microkernel. Here we query the thrinfo_t node for the 1st (ir) loop around the microkernel. */ \ thrinfo_t* caucus = bli_thrinfo_sub_node( thread ); \ \ /* Query the number of threads and thread ids for each loop. */ \ dim_t jr_nt = bli_thread_n_way( thread ); \ dim_t jr_tid = bli_thread_work_id( thread ); \ dim_t ir_nt = bli_thread_n_way( caucus ); \ dim_t ir_tid = bli_thread_work_id( caucus ); \ \ dim_t jr_start, jr_end; \ dim_t ir_start, ir_end; \ dim_t jr_inc, ir_inc; \ \ /* Determine the thread range and increment for the 2nd and 1st loops. NOTE: The definition of bli_thread_range_jrir() will depend on whether slab or round-robin partitioning was requested at configure-time. */ \ bli_thread_range_jrir( thread, n_iter, 1, FALSE, &jr_start, &jr_end, &jr_inc ); \ bli_thread_range_jrir( caucus, m_iter, 1, FALSE, &ir_start, &ir_end, &ir_inc ); \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = jr_start; j < jr_end; j += jr_inc ) \ { \ ctype_e* restrict a1; \ ctype_c* restrict c11; \ ctype_e* restrict b2; \ \ b1 = b_cast + j * cstep_b; \ c1 = c_cast + j * cstep_c; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = ir_start; i < ir_end; i += ir_inc ) \ { \ ctype_e* restrict a2; \ \ a1 = a_cast + i * rstep_a; \ c11 = c1 + i * rstep_c; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = bli_gemm_get_next_a_upanel( a1, rstep_a, ir_inc ); \ if ( bli_is_last_iter( i, ir_end, ir_tid, ir_nt ) ) \ { \ a2 = a_cast; \ b2 = bli_gemm_get_next_b_upanel( b1, cstep_b, jr_inc ); \ if ( bli_is_last_iter( j, jr_end, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Always save the micropanel product to the local microtile and then accumulate it into C via the xpbys_mxn macro. */ \ /*if ( 1 )*/ \ { \ /*bli_auxinfo_set_dt_on_output( dte, &aux );*/ \ \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Scale the microtile of C and add the result from above. */ \ PASTEMAC3(che,chc,chc,xpbys_mxn) \ ( \ m_cur, n_cur, \ ct, rs_ct, cs_ct, \ beta_cast, \ c11, rs_c, cs_c \ ); \ } \ /* else if ( m_cur == MR && n_cur == NR ) \ { \ bli_auxinfo_set_dt_on_output( dtc, &aux ); \ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ ( ctype_e* )beta_cast, \ ( ctype_e* )c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ bli_auxinfo_set_dt_on_output( dte, &aux ); \ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ PASTEMAC3(che,chc,chc,xpbys_mxn) \ ( \ m_cur, n_cur, \ ct, rs_ct, cs_ct, \ beta_cast, \ c11, rs_c, cs_c \ ); \ } \ */ \ } \ } \ \ /* PASTEMAC(ch,fprintm)( stdout, "gemm_ker_var2: b1", k, NR, b1, NR, 1, "%4.1f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "gemm_ker_var2: a1", MR, k, a1, 1, MR, "%4.1f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "gemm_ker_var2: c after", m_cur, n_cur, c11, rs_c, cs_c, "%4.1f", "" ); \ */ \ } INSERT_GENTFUNC2_BASIC0( gemm_ker_var2_md ) INSERT_GENTFUNC2_MIXDP0( gemm_ker_var2_md ) #endif blis-0.6.1/frame/3/gemm/bli_gemm_md.c000066400000000000000000000647541360743507500172610ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2017 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #ifdef BLIS_ENABLE_GEMM_MD void bli_gemm_md ( obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx_local, cntx_t** cntx ) { mddm_t doms; const bool_t a_is_real = bli_obj_is_real( a ); const bool_t a_is_comp = bli_obj_is_complex( a ); const bool_t b_is_real = bli_obj_is_real( b ); const bool_t b_is_comp = bli_obj_is_complex( b ); const bool_t c_is_real = bli_obj_is_real( c ); const bool_t c_is_comp = bli_obj_is_complex( c ); if ( c_is_real && a_is_real && b_is_real ) { // C_real += A_real * B_real doms = bli_gemm_md_rrr( a, b, beta, c, cntx_local, cntx ); } else if ( c_is_comp && a_is_comp && b_is_comp ) { // C_complex += A_complex * B_complex doms = bli_gemm_md_ccc( a, b, beta, c, cntx_local, cntx ); } else if ( c_is_comp && a_is_comp && b_is_real ) { // C_complex += A_complex * B_real doms = bli_gemm_md_ccr( a, b, beta, c, cntx_local, cntx ); } else if ( c_is_comp && a_is_real && b_is_comp ) { // C_complex += A_real * B_complex doms = bli_gemm_md_crc( a, b, beta, c, cntx_local, cntx ); } else if ( c_is_real && a_is_comp && b_is_comp ) { // C_real += A_complex * B_complex doms = bli_gemm_md_rcc( a, b, beta, c, cntx_local, cntx ); } else if ( c_is_comp && a_is_real && b_is_real ) { // C_complex += A_real * B_real doms = bli_gemm_md_crr( a, b, beta, c, cntx_local, cntx ); } else if ( c_is_real && a_is_comp && b_is_real ) { // C_real += A_complex * B_real doms = bli_gemm_md_rcr( a, b, beta, c, cntx_local, cntx ); } else if ( c_is_real && a_is_real && b_is_comp ) { // C_real += A_real * B_complex doms = bli_gemm_md_rrc( a, b, beta, c, cntx_local, cntx ); } else { doms.comp = BLIS_REAL; doms.exec = BLIS_REAL; // This should never execute. bli_abort(); } // Extract the computation and execution domains from the struct // returned above. dom_t dom_comp = doms.comp; dom_t dom_exec = doms.exec; // Inspect the computation precision of C. (The user may have set // this explicitly to request the precision in which the computation // should take place.) prec_t prec_comp = bli_obj_comp_prec( c ); // The computation precision tells us the target precision of A and B. // NOTE: We don't set the target domain here. The target domain would // either be unchanged, or would have been changed in one of the eight // domain cases above. bli_obj_set_target_prec( prec_comp, a ); bli_obj_set_target_prec( prec_comp, b ); // Combine the execution domain with the computation precision to form // the execution datatype. (The computation precision and execution // precision are always equal.) num_t dt_exec = dom_exec | prec_comp; // Set the execution datatypes of A, B, and C. bli_obj_set_exec_dt( dt_exec, a ); bli_obj_set_exec_dt( dt_exec, b ); bli_obj_set_exec_dt( dt_exec, c ); // Combine the computation precision and computation domain to form the // computation datatype. num_t dt_comp = dom_comp | prec_comp; // Set the computation datatypes of A, B, and C. bli_obj_set_comp_dt( dt_comp, a ); bli_obj_set_comp_dt( dt_comp, b ); bli_obj_set_comp_dt( dt_comp, c ); } // ----------------------------------------------------------------------------- // cab mddm_t bli_gemm_md_ccr ( obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx_local, cntx_t** cntx ) { mddm_t doms; // We assume that the requested computation domain is complex. //dom_t dom_comp_in = bli_obj_comp_domain( c ); //dom_t dom_comp_in = BLIS_COMPLEX; // For ccr, the computation (ukernel) will be real, but the execution // will appear complex to other parts of the implementation. doms.comp = BLIS_REAL; doms.exec = BLIS_COMPLEX; // Here we construct the computation datatype, which for the ccr case // is equal to the real projection of the execution datatype, and use // that computation datatype to query the corresponding ukernel output // preference. const num_t dt = BLIS_REAL | bli_obj_comp_prec( c ); const bool_t row_pref = bli_cntx_l3_nat_ukr_prefers_rows_dt( dt, BLIS_GEMM_UKR, *cntx ); // We can only perform this case of mixed-domain gemm, C += A*B where // B is real, if the microkernel prefers column output. If it prefers // row output, we must induce a transposition and perform C += A*B // where A (formerly B) is real. if ( row_pref ) { bli_obj_swap( a, b ); bli_obj_induce_trans( a ); bli_obj_induce_trans( b ); bli_obj_induce_trans( c ); return bli_gemm_md_crc( a, b, beta, c, cntx_local, cntx ); } // Create a local copy of the context and then prepare to use this // context instead of the one passed in. *cntx_local = **cntx; *cntx = cntx_local; // Copy the real domain blocksizes into the slots of their complex // counterparts. blksz_t* blksz_mr = bli_cntx_get_blksz( BLIS_MR, *cntx ); blksz_t* blksz_nr = bli_cntx_get_blksz( BLIS_NR, *cntx ); blksz_t* blksz_mc = bli_cntx_get_blksz( BLIS_MC, *cntx ); blksz_t* blksz_nc = bli_cntx_get_blksz( BLIS_NC, *cntx ); blksz_t* blksz_kc = bli_cntx_get_blksz( BLIS_KC, *cntx ); bli_blksz_copy_dt( BLIS_FLOAT, blksz_mr, BLIS_SCOMPLEX, blksz_mr ); bli_blksz_copy_dt( BLIS_DOUBLE, blksz_mr, BLIS_DCOMPLEX, blksz_mr ); bli_blksz_copy_dt( BLIS_FLOAT, blksz_nr, BLIS_SCOMPLEX, blksz_nr ); bli_blksz_copy_dt( BLIS_DOUBLE, blksz_nr, BLIS_DCOMPLEX, blksz_nr ); bli_blksz_copy_dt( BLIS_FLOAT, blksz_mc, BLIS_SCOMPLEX, blksz_mc ); bli_blksz_copy_dt( BLIS_DOUBLE, blksz_mc, BLIS_DCOMPLEX, blksz_mc ); bli_blksz_copy_dt( BLIS_FLOAT, blksz_nc, BLIS_SCOMPLEX, blksz_nc ); bli_blksz_copy_dt( BLIS_DOUBLE, blksz_nc, BLIS_DCOMPLEX, blksz_nc ); bli_blksz_copy_dt( BLIS_FLOAT, blksz_kc, BLIS_SCOMPLEX, blksz_kc ); bli_blksz_copy_dt( BLIS_DOUBLE, blksz_kc, BLIS_DCOMPLEX, blksz_kc ); // Halve both the real and complex MR's (which are both real MR's). bli_blksz_scale_def_max( 1, 2, BLIS_FLOAT, blksz_mr ); bli_blksz_scale_def_max( 1, 2, BLIS_DOUBLE, blksz_mr ); bli_blksz_scale_def_max( 1, 2, BLIS_SCOMPLEX, blksz_mr ); bli_blksz_scale_def_max( 1, 2, BLIS_DCOMPLEX, blksz_mr ); // Halve both the real and complex MC's (which are both real MC's). bli_blksz_scale_def_max( 1, 2, BLIS_FLOAT, blksz_mc ); bli_blksz_scale_def_max( 1, 2, BLIS_DOUBLE, blksz_mc ); bli_blksz_scale_def_max( 1, 2, BLIS_SCOMPLEX, blksz_mc ); bli_blksz_scale_def_max( 1, 2, BLIS_DCOMPLEX, blksz_mc ); // Use the default pack schemas in the context. // static func_t* bli_cntx_get_l3_vir_ukrs( l3ukr_t ukr_id, cntx_t* cntx ) func_t* l3_vir_ukrs = bli_cntx_get_l3_vir_ukrs( BLIS_GEMM_UKR, *cntx ); // Rather than check which complex datatype dt_comp refers to, we set // the mixed-domain virtual microkernel for both types. bli_func_set_dt( bli_cgemm_md_c2r_ref, BLIS_SCOMPLEX, l3_vir_ukrs ); bli_func_set_dt( bli_zgemm_md_c2r_ref, BLIS_DCOMPLEX, l3_vir_ukrs ); // Return the computation and execution domains. return doms; } // ----------------------------------------------------------------------------- // cab mddm_t bli_gemm_md_crc ( obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx_local, cntx_t** cntx ) { mddm_t doms; // We assume that the requested computation domain is complex. //dom_t dom_comp_in = bli_obj_comp_domain( c ); //dom_t dom_comp_in = BLIS_COMPLEX; // For crc, the computation (ukernel) will be real, but the execution // will appear complex to other parts of the implementation. doms.comp = BLIS_REAL; doms.exec = BLIS_COMPLEX; // Here we construct the computation datatype, which for the crc case // is equal to the real projection of the execution datatype, and use // that computation datatype to query the corresponding ukernel output // preference. const num_t dt = BLIS_REAL | bli_obj_comp_prec( c ); const bool_t col_pref = bli_cntx_l3_nat_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, *cntx ); // We can only perform this case of mixed-domain gemm, C += A*B where // A is real, if the microkernel prefers row output. If it prefers // column output, we must induce a transposition and perform C += A*B // where B (formerly A) is real. if ( col_pref ) { bli_obj_swap( a, b ); bli_obj_induce_trans( a ); bli_obj_induce_trans( b ); bli_obj_induce_trans( c ); return bli_gemm_md_ccr( a, b, beta, c, cntx_local, cntx ); } // Create a local copy of the context and then prepare to use this // context instead of the one passed in. *cntx_local = **cntx; *cntx = cntx_local; // Copy the real domain blocksizes into the slots of their complex // counterparts. blksz_t* blksz_mr = bli_cntx_get_blksz( BLIS_MR, *cntx ); blksz_t* blksz_nr = bli_cntx_get_blksz( BLIS_NR, *cntx ); blksz_t* blksz_mc = bli_cntx_get_blksz( BLIS_MC, *cntx ); blksz_t* blksz_nc = bli_cntx_get_blksz( BLIS_NC, *cntx ); blksz_t* blksz_kc = bli_cntx_get_blksz( BLIS_KC, *cntx ); bli_blksz_copy_dt( BLIS_FLOAT, blksz_mr, BLIS_SCOMPLEX, blksz_mr ); bli_blksz_copy_dt( BLIS_DOUBLE, blksz_mr, BLIS_DCOMPLEX, blksz_mr ); bli_blksz_copy_dt( BLIS_FLOAT, blksz_nr, BLIS_SCOMPLEX, blksz_nr ); bli_blksz_copy_dt( BLIS_DOUBLE, blksz_nr, BLIS_DCOMPLEX, blksz_nr ); bli_blksz_copy_dt( BLIS_FLOAT, blksz_mc, BLIS_SCOMPLEX, blksz_mc ); bli_blksz_copy_dt( BLIS_DOUBLE, blksz_mc, BLIS_DCOMPLEX, blksz_mc ); bli_blksz_copy_dt( BLIS_FLOAT, blksz_nc, BLIS_SCOMPLEX, blksz_nc ); bli_blksz_copy_dt( BLIS_DOUBLE, blksz_nc, BLIS_DCOMPLEX, blksz_nc ); bli_blksz_copy_dt( BLIS_FLOAT, blksz_kc, BLIS_SCOMPLEX, blksz_kc ); bli_blksz_copy_dt( BLIS_DOUBLE, blksz_kc, BLIS_DCOMPLEX, blksz_kc ); // Halve both the real and complex NR's (which are both real NR's). bli_blksz_scale_def_max( 1, 2, BLIS_FLOAT, blksz_nr ); bli_blksz_scale_def_max( 1, 2, BLIS_DOUBLE, blksz_nr ); bli_blksz_scale_def_max( 1, 2, BLIS_SCOMPLEX, blksz_nr ); bli_blksz_scale_def_max( 1, 2, BLIS_DCOMPLEX, blksz_nr ); // Halve both the real and complex NC's (which are both real NC's). bli_blksz_scale_def_max( 1, 2, BLIS_FLOAT, blksz_nc ); bli_blksz_scale_def_max( 1, 2, BLIS_DOUBLE, blksz_nc ); bli_blksz_scale_def_max( 1, 2, BLIS_SCOMPLEX, blksz_nc ); bli_blksz_scale_def_max( 1, 2, BLIS_DCOMPLEX, blksz_nc ); // Use the default pack schemas in the context. // static func_t* bli_cntx_get_l3_vir_ukrs( l3ukr_t ukr_id, cntx_t* cntx ) func_t* l3_vir_ukrs = bli_cntx_get_l3_vir_ukrs( BLIS_GEMM_UKR, *cntx ); // Rather than check which complex datatype dt_comp refers to, we set // the mixed-domain virtual microkernel for both types. bli_func_set_dt( bli_cgemm_md_c2r_ref, BLIS_SCOMPLEX, l3_vir_ukrs ); bli_func_set_dt( bli_zgemm_md_c2r_ref, BLIS_DCOMPLEX, l3_vir_ukrs ); // Return the computation and execution domains. return doms; } // ----------------------------------------------------------------------------- // cab mddm_t bli_gemm_md_rcc ( obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx_local, cntx_t** cntx ) { mddm_t doms; // We assume that the requested computation domain is complex. //dom_t dom_comp_in = bli_obj_comp_domain( c ); //dom_t dom_comp_in = BLIS_COMPLEX; // For rcc, the computation (ukernel) will be real, and since the output // matrix C is also real, so must be the execution domain. doms.comp = BLIS_REAL; doms.exec = BLIS_REAL; // Create a local copy of the context and then prepare to use this // context instead of the one passed in. *cntx_local = **cntx; *cntx = cntx_local; // Copy the real domain blocksizes into the slots of their complex // counterparts. blksz_t* blksz_mr = bli_cntx_get_blksz( BLIS_MR, *cntx ); blksz_t* blksz_nr = bli_cntx_get_blksz( BLIS_NR, *cntx ); blksz_t* blksz_mc = bli_cntx_get_blksz( BLIS_MC, *cntx ); blksz_t* blksz_nc = bli_cntx_get_blksz( BLIS_NC, *cntx ); blksz_t* blksz_kc = bli_cntx_get_blksz( BLIS_KC, *cntx ); bli_blksz_copy_dt( BLIS_FLOAT, blksz_mr, BLIS_SCOMPLEX, blksz_mr ); bli_blksz_copy_dt( BLIS_DOUBLE, blksz_mr, BLIS_DCOMPLEX, blksz_mr ); bli_blksz_copy_dt( BLIS_FLOAT, blksz_nr, BLIS_SCOMPLEX, blksz_nr ); bli_blksz_copy_dt( BLIS_DOUBLE, blksz_nr, BLIS_DCOMPLEX, blksz_nr ); bli_blksz_copy_dt( BLIS_FLOAT, blksz_mc, BLIS_SCOMPLEX, blksz_mc ); bli_blksz_copy_dt( BLIS_DOUBLE, blksz_mc, BLIS_DCOMPLEX, blksz_mc ); bli_blksz_copy_dt( BLIS_FLOAT, blksz_nc, BLIS_SCOMPLEX, blksz_nc ); bli_blksz_copy_dt( BLIS_DOUBLE, blksz_nc, BLIS_DCOMPLEX, blksz_nc ); bli_blksz_copy_dt( BLIS_FLOAT, blksz_kc, BLIS_SCOMPLEX, blksz_kc ); bli_blksz_copy_dt( BLIS_DOUBLE, blksz_kc, BLIS_DCOMPLEX, blksz_kc ); // Halve both the real and complex KC's (which are both real KC's). bli_blksz_scale_def_max( 1, 2, BLIS_FLOAT, blksz_kc ); bli_blksz_scale_def_max( 1, 2, BLIS_DOUBLE, blksz_kc ); bli_blksz_scale_def_max( 1, 2, BLIS_SCOMPLEX, blksz_kc ); bli_blksz_scale_def_max( 1, 2, BLIS_DCOMPLEX, blksz_kc ); // Use the 1r pack schema for both A and B with the conjugation // of A or B toggled (to produce ar * br - ai * bi). bli_cntx_set_schema_a_block( BLIS_PACKED_ROW_PANELS_1R, *cntx ); bli_cntx_set_schema_b_panel( BLIS_PACKED_COL_PANELS_1R, *cntx ); bli_obj_toggle_conj( b ); // We also need to copy over the packm kernels from the 1m // context. We query the address of that context here. // NOTE: This is needed for situations where the rcc case does not // involve any casting to different precisions, since currently // bli_packm_blk_var1() is coded to hand off control to // bli_packm_blk_var1_md() only when the storage datatype differs from // the target datatype. (The packm_blk_var1_md() function has "built-in" // support for packing to 1r (and 1e) schemas, whereas the // packm_blk_var1() function relies on packm kernels for packing to 1r. const num_t dt_complex = bli_obj_dt( a ); cntx_t* cntx_1m = bli_gks_query_ind_cntx( BLIS_1M, dt_complex ); func_t* cntx_funcs = bli_cntx_packm_kers_buf( *cntx ); func_t* cntx_1m_funcs = bli_cntx_packm_kers_buf( cntx_1m ); for ( dim_t i = 0; i <= BLIS_PACKM_31XK_KER; ++i ) { cntx_funcs[ i ] = cntx_1m_funcs[ i ]; } // Return the computation and execution domains. return doms; } // ----------------------------------------------------------------------------- // cab mddm_t bli_gemm_md_crr ( obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx_local, cntx_t** cntx ) { mddm_t doms; #ifndef BLIS_ENABLE_GEMM_MD_EXTRA_MEM obj_t c_real; #endif // We assume that the requested computation domain is real. //dom_t dom_comp_in = bli_obj_comp_domain( c ); //dom_t dom_comp_in = BLIS_REAL; // For crr, the computation (ukernel) will be real, and since we will // be updating only the real part of the output matrix C, the exectuion // domain is also real. doms.comp = BLIS_REAL; doms.exec = BLIS_REAL; // Since the A*B product is real, we can update only the real part of // C. Thus, we convert the obj_t for the complex matrix to one that // represents only the real part. HOWEVER, there are two situations in // which we forgo this trick: // - If extra memory optimizations are enabled, we should leave C alone // since we'll be computing A*B to a temporary matrix and accumulating // that result back to C, and in order for that to work, we need to // allow that code to continue accessing C as a complex matrix. // - Even if extra memory optimizations are diabled, logically projecting // C as a real matrix can still cause problems if beta is non-unit. In // that situation, the implementation won't get a chance to scale the // imaginary components of C by beta, and thus it would compute the // wrong answer. Thus, if beta is non-unit, we must leave C alone. #ifndef BLIS_ENABLE_GEMM_MD_EXTRA_MEM if ( bli_obj_equals( beta, &BLIS_ONE ) ) { bli_obj_real_part( c, &c_real ); // Overwrite the complex obj_t with its real-only alias. *c = c_real; } #endif // Use the default pack schemas in the context. // Return the computation and execution domains. return doms; } // ----------------------------------------------------------------------------- // cab mddm_t bli_gemm_md_rcr ( obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx_local, cntx_t** cntx ) { mddm_t doms; obj_t a_real; // We assume that the requested computation domain is real. //dom_t dom_comp_in = bli_obj_comp_domain( c ); //dom_t dom_comp_in = BLIS_REAL; // For rcr, the computation (ukernel) will be real, and since the output // matrix C is also real, so must be the execution domain. doms.comp = BLIS_REAL; doms.exec = BLIS_REAL; // Convert the obj_t for the complex matrix to one that represents only // the real part. bli_obj_real_part( a, &a_real ); // Overwrite the complex obj_t with its real-only alias. *a = a_real; // Use the default pack schemas in the context. // Return the computation and execution domains. return doms; } // ----------------------------------------------------------------------------- // cab mddm_t bli_gemm_md_rrc ( obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx_local, cntx_t** cntx ) { mddm_t doms; obj_t b_real; // We assume that the requested computation domain is real. //dom_t dom_comp_in = bli_obj_comp_domain( c ); //dom_t dom_comp_in = BLIS_REAL; // For rcr, the computation (ukernel) will be real, and since the output // matrix C is also real, so must be the execution domain. doms.comp = BLIS_REAL; doms.exec = BLIS_REAL; // Convert the obj_t for the complex matrix to one that represents only // the real part. bli_obj_real_part( b, &b_real ); // Overwrite the complex obj_t with its real-only alias. *b = b_real; // Use the default pack schemas in the context. // Return the computation and execution domains. return doms; } // ----------------------------------------------------------------------------- // cab mddm_t bli_gemm_md_rrr ( obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx_local, cntx_t** cntx ) { mddm_t doms; // We assume that the requested computation domain is real. //dom_t dom_comp_in = bli_obj_comp_domain( c ); //dom_t dom_comp_in = BLIS_REAL; // For rrr, the computation (ukernel) and execution domains are both // real. doms.comp = BLIS_REAL; doms.exec = BLIS_REAL; // Use the default pack schemas in the context. // Return the computation and execution domains. return doms; } // ----------------------------------------------------------------------------- // cab mddm_t bli_gemm_md_ccc ( obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx_local, cntx_t** cntx ) { mddm_t doms; // We assume that the requested computation domain is complex. //dom_t dom_comp_in = bli_obj_comp_domain( c ); //dom_t dom_comp_in = BLIS_COMPLEX; // For ccc, the computation (ukernel) and execution domains are both // complex. doms.comp = BLIS_COMPLEX; doms.exec = BLIS_COMPLEX; // Use the default pack schemas in the context. // Return the computation and execution domains. return doms; } // ----------------------------------------------------------------------------- #if 0 void bli_gemm_md_front ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl ) { bli_init_once(); obj_t a_local; obj_t b_local; obj_t c_local; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_gemm_check( alpha, a, b, beta, c, cntx ); // If alpha is zero, scale by beta and return. if ( bli_obj_equals( alpha, &BLIS_ZERO ) ) { bli_scalm( beta, c ); return; } // Alias A, B, and C in case we need to apply transformations. bli_obj_alias_to( a, &a_local ); bli_obj_alias_to( b, &b_local ); bli_obj_alias_to( c, &c_local ); // An optimization: If C is stored by rows and the micro-kernel prefers // contiguous columns, or if C is stored by columns and the micro-kernel // prefers contiguous rows, transpose the entire operation to allow the // micro-kernel to access elements of C in its preferred manner. if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) ) { bli_obj_swap( &a_local, &b_local ); bli_obj_induce_trans( &a_local ); bli_obj_induce_trans( &b_local ); bli_obj_induce_trans( &c_local ); } cntx_t cntx_local; // Handle mixed domain cases in bli_gemm_md(), which may modify // the objects or the context. (If the context is modified, cntx // is adjusted to point to cntx_local.) bli_gemm_md( &a_local, &b_local, beta, &c_local, &cntx_local, &cntx ); // Record the threading for each level within the context. bli_rntm_set_ways_for_op ( BLIS_GEMM, BLIS_LEFT, // ignored for gemm/hemm/symm bli_obj_length( &c_local ), bli_obj_width( &c_local ), bli_obj_width( &a_local ), rntm ); // Invoke the internal back-end via the thread handler. bli_l3_thread_decorator ( bli_gemm_int, BLIS_GEMM, // operation family id alpha, &a_local, &b_local, beta, &c_local, cntx, rntm, cntl ); } // ----------------------------------------------------------------------------- void bli_gemm_md_zgemm ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl ) { bli_init_once(); obj_t a_local; obj_t b_local; obj_t c_local; #if 1 obj_t am, bm, cm; obj_t* c_orig; //if ( is_md == TRUE ) { //num_t dt_c2 = bli_obj_dt( c ); //num_t dt_c1 = bli_dt_proj_to_complex( dt_c2 ); //num_t dt_c = bli_dt_proj_to_double_prec( dt_c1 ); //num_t dt_c = bli_obj_dt_proj_to_complex( c ); num_t dt_c = BLIS_DCOMPLEX; if ( bli_obj_is_single_prec( c ) ) dt_c = BLIS_SCOMPLEX; else dt_c = BLIS_DCOMPLEX; if ( bli_obj_is_real( a ) && bli_obj_is_real( b ) && bli_obj_is_real( c ) ) dt_c = bli_dt_proj_to_real( dt_c ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width_after_trans( a ); bli_obj_create( dt_c, m, k, 0, 0, &am ); bli_obj_create( dt_c, k, n, 0, 0, &bm ); bli_obj_create( dt_c, m, n, 0, 0, &cm ); //bli_projm( a, &am ); //bli_projm( b, &bm ); //bli_projm( c, &cm ); bli_castm( a, &am ); bli_castm( b, &bm ); bli_castm( c, &cm ); c_orig = c; a = &am; b = &bm; c = &cm; } #endif // Check parameters. if ( bli_error_checking_is_enabled() ) bli_gemm_check( alpha, a, b, beta, c, cntx ); // If alpha is zero, scale by beta and return. if ( bli_obj_equals( alpha, &BLIS_ZERO ) ) { bli_scalm( beta, c ); return; } // Alias A, B, and C in case we need to apply transformations. bli_obj_alias_to( a, &a_local ); bli_obj_alias_to( b, &b_local ); bli_obj_alias_to( c, &c_local ); // An optimization: If C is stored by rows and the micro-kernel prefers // contiguous columns, or if C is stored by columns and the micro-kernel // prefers contiguous rows, transpose the entire operation to allow the // micro-kernel to access elements of C in its preferred manner. if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) ) { bli_obj_swap( &a_local, &b_local ); bli_obj_induce_trans( &a_local ); bli_obj_induce_trans( &b_local ); bli_obj_induce_trans( &c_local ); } { // A sort of hack for communicating the desired pach schemas for A and B // to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and // bli_l3_cntl_create_if()). This allows us to access the schemas from // the control tree, which hopefully reduces some confusion, particularly // in bli_packm_init(). if ( bli_cntx_method( cntx ) == BLIS_NAT ) { bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local ); bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &b_local ); } else // if ( bli_cntx_method( cntx ) != BLIS_NAT ) { pack_t schema_a = bli_cntx_schema_a_block( cntx ); pack_t schema_b = bli_cntx_schema_b_panel( cntx ); bli_obj_set_pack_schema( schema_a, &a_local ); bli_obj_set_pack_schema( schema_b, &b_local ); } } // Parse and interpret the contents of the rntm_t object to properly // set the ways of parallelism for each loop, and then make any // additional modifications necessary for the current operation. bli_rntm_set_ways_for_op ( BLIS_GEMM, BLIS_LEFT, // ignored for gemm/hemm/symm bli_obj_length( &c_local ), bli_obj_width( &c_local ), bli_obj_width( &a_local ), rntm ); // Invoke the internal back-end via the thread handler. bli_l3_thread_decorator ( bli_gemm_int, BLIS_GEMM, // operation family id alpha, &a_local, &b_local, beta, &c_local, cntx, rntm, cntl ); #if 1 //if ( is_md == TRUE ) { //bli_projm( &cm, c_orig ); bli_castm( &cm, c_orig ); bli_obj_free( &am ); bli_obj_free( &bm ); bli_obj_free( &cm ); } #endif } #endif #endif blis-0.6.1/frame/3/gemm/bli_gemm_md.h000066400000000000000000000241651360743507500172560ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "bli_gemm_md_c2r_ref.h" // Define a local struct type that makes returning two values easier. typedef struct mddm_s { dom_t comp; dom_t exec; } mddm_t; void bli_gemm_md ( obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx_local, cntx_t** cntx ); mddm_t bli_gemm_md_ccc( obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx_l, cntx_t** cntx ); mddm_t bli_gemm_md_ccr( obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx_l, cntx_t** cntx ); mddm_t bli_gemm_md_crc( obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx_l, cntx_t** cntx ); mddm_t bli_gemm_md_rcc( obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx_l, cntx_t** cntx ); mddm_t bli_gemm_md_rrc( obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx_l, cntx_t** cntx ); mddm_t bli_gemm_md_rcr( obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx_l, cntx_t** cntx ); mddm_t bli_gemm_md_crr( obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx_l, cntx_t** cntx ); mddm_t bli_gemm_md_rrr( obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx_l, cntx_t** cntx ); // ----------------------------------------------------------------------------- void bli_gemm_md_front ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl ); void bli_gemm_md_zgemm ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl ); // ----------------------------------------------------------------------------- static bool_t bli_gemm_md_is_crr( obj_t* a, obj_t* b, obj_t* c ) { bool_t r_val = FALSE; // NOTE: The last conditional subexpression is necessary if/when we // allow the user to specify the computation domain. (The computation // domain is currently ignored, but once it is honored as a user- // settable value, it will affect the execution domain, which is what // is checked below. Until then, the last expression is not actually // necessary since crr is already unconditionally associated with an // execution domain of BLIS_REAL.) if ( bli_obj_is_complex( c ) && bli_obj_is_real( a ) && bli_obj_is_real( b ) && bli_obj_exec_domain( c ) == BLIS_REAL ) r_val = TRUE; return r_val; } static bool_t bli_gemm_md_is_ccr( obj_t* a, obj_t* b, obj_t* c ) { bool_t r_val = FALSE; // NOTE: The last conditional subexpression is necessary if/when we // allow the user to specify the computation domain. (The computation // domain is currently ignored, but once it is honored as a user- // settable value, it will affect the execution domain, which is what // is checked below. Until then, the last expression is not actually // necessary since ccr is already unconditionally associated with an // execution domain of BLIS_COMPLEX.) if ( bli_obj_is_complex( c ) && bli_obj_is_complex( a ) && bli_obj_is_real( b ) && bli_obj_exec_domain( c ) == BLIS_COMPLEX ) r_val = TRUE; return r_val; } static bool_t bli_gemm_md_is_crc( obj_t* a, obj_t* b, obj_t* c ) { bool_t r_val = FALSE; // NOTE: The last conditional subexpression is necessary if/when we // allow the user to specify the computation domain. (The computation // domain is currently ignored, but once it is honored as a user- // settable value, it will affect the execution domain, which is what // is checked below. Until then, the last expression is not actually // necessary since crc is already unconditionally associated with an // execution domain of BLIS_COMPLEX.) if ( bli_obj_is_complex( c ) && bli_obj_is_real( a ) && bli_obj_is_complex( b ) && bli_obj_exec_domain( c ) == BLIS_COMPLEX ) r_val = TRUE; return r_val; } // ----------------------------------------------------------------------------- static void bli_gemm_md_ker_var2_recast ( num_t* dt_comp, num_t dt_a, num_t dt_b, num_t dt_c, dim_t* m, dim_t* n, dim_t* k, inc_t* pd_a, inc_t* ps_a, inc_t* pd_b, inc_t* ps_b, obj_t* c, inc_t* rs_c, inc_t* cs_c ) { if ( bli_is_real( dt_c ) && bli_is_complex( dt_a ) && bli_is_complex( dt_b ) ) { // The rcc case is executed with a real macrokernel, so we need to // double the k dimension (because both A and B are packed to the 1r // schema), and also the panel strides of A and B since they were // packed as complex matrices and we now need to convert them to // units of real elements. *k *= 2; *ps_a *= 2; *ps_b *= 2; } else if ( bli_is_complex( dt_c ) && bli_is_real( dt_a ) && bli_is_complex( dt_b ) ) { #if 1 obj_t beta; bli_obj_scalar_detach( c, &beta ); if ( //bli_obj_imag_equals( &beta, &BLIS_ZERO ) && bli_obj_imag_is_zero( &beta ) && bli_is_row_stored( *rs_c, *cs_c ) && bli_obj_prec( c ) == bli_obj_comp_prec( c ) ) { // If beta is real, and C is not general-stored, and the computation // precision is equal to the storage precision of C, we can use the // real macrokernel (and real microkernel, which is already stored // to the real virtual microkernel slots of the context) instead of // the complex macrokernel and c2r virtual microkernel. *dt_comp = bli_dt_proj_to_real( *dt_comp ); *n *= 2; *pd_b *= 2; *ps_b *= 2; *rs_c *= 2; } else #endif { // Generally speaking, the crc case is executed with a complex // macrokernel, so we need to halve the panel stride of A (which // is real) since the macrokernel will perform the pointer // arithmetic in units of complex elements. *ps_a /= 2; } } else if ( bli_is_complex( dt_c ) && bli_is_complex( dt_a ) && bli_is_real( dt_b ) ) { #if 1 obj_t beta; bli_obj_scalar_detach( c, &beta ); if ( //bli_obj_imag_equals( &beta, &BLIS_ZERO ) && bli_obj_imag_is_zero( &beta ) && bli_is_col_stored( *rs_c, *cs_c ) && bli_obj_prec( c ) == bli_obj_comp_prec( c ) ) { // If beta is real, and C is not general-stored, and the computation // precision is equal to the storage precision of C, we can use the // real macrokernel (and real microkernel, which is already stored // to the real virtual microkernel slots of the context) instead of // the complex macrokernel and c2r virtual microkernel. *dt_comp = bli_dt_proj_to_real( *dt_comp ); *m *= 2; *pd_a *= 2; *ps_a *= 2; *cs_c *= 2; } else #endif { // Generally speaking, the ccr case is executed with a complex // macrokernel, so we need to halve the panel stride of B (which // is real) since the macrokernel will perform the pointer // arithmetic in units of complex elements. *ps_b /= 2; } } #if 0 else if ( bli_is_real( dt_c ) && bli_is_real( dt_a ) && bli_is_real( dt_b ) ) { // No action needed. //printf( "gemm_md.h: rrr: m n k are now %d %d %d\n", (int)*m, (int)*n, (int)*k ); } else if ( bli_is_complex( dt_c ) && bli_is_real( dt_a ) && bli_is_real( dt_b ) ) { // No action needed. } else if ( bli_is_real( dt_c ) && bli_is_complex( dt_a ) && bli_is_real( dt_b ) ) { // No action needed. } else if ( bli_is_real( dt_c ) && bli_is_real( dt_a ) && bli_is_complex( dt_b ) ) { // No action needed. } #endif } // ----------------------------------------------------------------------------- // // Prototype object-based interfaces. // #undef GENPROT #define GENPROT( opname ) \ \ void PASTEMAC0(opname) \ ( \ obj_t* a, \ obj_t* b, \ obj_t* c, \ cntx_t* cntx, \ rntm_t* rntm, \ cntl_t* cntl, \ thrinfo_t* thread \ ); GENPROT( gemm_ker_var2_md ) // // Prototype BLAS-like interfaces with void pointer operands. // #undef GENTPROT2 #define GENTPROT2( ctype_c, ctype_e, chc, che, varname ) \ \ void PASTEMAC2(chc,che,varname) \ ( \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha, \ void* a, inc_t cs_a, inc_t is_a, \ dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, inc_t is_b, \ dim_t pd_b, inc_t ps_b, \ void* beta, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* thread \ ); INSERT_GENTPROT2_BASIC0( gemm_ker_var2_md ) INSERT_GENTPROT2_MIXDP0( gemm_ker_var2_md ) blis-0.6.1/frame/3/gemm/bli_gemm_md_c2r_ref.c000066400000000000000000000204051360743507500206440ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #ifdef BLIS_ENABLE_GEMM_MD #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, opname, suf ) \ \ void PASTEMAC2(ch,opname,suf) \ ( \ dim_t k, \ ctype* restrict alpha, \ ctype* restrict a, \ ctype* restrict b, \ ctype* restrict beta, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict data, \ cntx_t* restrict cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ const num_t dt_r = PASTEMAC(chr,type); \ \ PASTECH(chr,gemm_ukr_ft) \ rgemm_ukr = bli_cntx_get_l3_nat_ukr_dt( dt_r, BLIS_GEMM_UKR, cntx ); \ const bool_t col_pref = bli_cntx_l3_nat_ukr_prefers_cols_dt( dt_r, BLIS_GEMM_UKR, cntx ); \ const bool_t row_pref = !col_pref; \ \ const dim_t mr = bli_cntx_get_blksz_def_dt( dt, BLIS_MR, cntx ); \ const dim_t nr = bli_cntx_get_blksz_def_dt( dt, BLIS_NR, cntx ); \ \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype_r ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ inc_t rs_ct; \ inc_t cs_ct; \ \ ctype_r* restrict a_r = ( ctype_r* )a; \ \ ctype_r* restrict b_r = ( ctype_r* )b; \ \ ctype_r* restrict zero_r = PASTEMAC(chr,0); \ \ ctype_r* restrict alpha_r = &PASTEMAC(ch,real)( *alpha ); \ /* ctype_r* restrict alpha_i = &PASTEMAC(ch,imag)( *alpha ); \ */ \ \ ctype_r* restrict beta_r = &PASTEMAC(ch,real)( *beta ); \ ctype_r* restrict beta_i = &PASTEMAC(ch,imag)( *beta ); \ \ ctype_r* c_use; \ inc_t rs_c_use; \ inc_t cs_c_use; \ \ bool_t using_ct; \ \ /* This virtual microkernel is used by ccr and crc mixed-domain cases when any of the following conditions are met: - beta is complex (ie: has a non-zero imaginary component) - C is general-stored - the computation precision differs from the storage of C If, however, none of the above conditions are met, then the real domain macrokernel can be (and will be) called instead of calling the complex macrokernel (and this virtual microkernel). */ \ \ /* PASTEMAC(chr,fprintm)( stdout, "gemm_ukr: a", mr, k, \ a_r, 1, mr, "%5.2f", "" ); \ PASTEMAC(chr,fprintm)( stdout, "gemm_ukr: b", k, nr, \ b_r, nr, 1, "%5.2f", "" ); \ PASTEMAC(chr,fprintm)( stdout, "gemm_ukr: c before", mr, nr, \ c_use, rs_c_use, cs_c_use, "%5.2f", "" ); \ */ \ \ /* SAFETY CHECK: The higher level implementation should never allow an alpha with non-zero imaginary component to be passed in, because it can't be applied properly using the 1m method. If alpha is not real, then something is very wrong. */ \ /* if ( !PASTEMAC(chr,eq0)( *alpha_i ) ) \ bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); \ */ \ \ /* If beta has a non-zero imaginary component OR if c is stored with general stride, then we compute the alpha*a*b product into temporary storage and then accumulate that result into c afterwards. Note that the other two cases concerning disagreement between the storage of C and the output preference of the micro-kernel, should ONLY occur in the context of trsm, whereby this virtual micro-kernel is called directly from the trsm macro-kernel to update the micro-tile b11 that exists within the packed row-panel of B. Indeed that is the reason those cases MUST be explicitly handled. */ \ if ( !PASTEMAC(chr,eq0)( *beta_i ) ) using_ct = TRUE; \ else if ( bli_is_col_stored( rs_c, cs_c ) && row_pref ) using_ct = TRUE; \ else if ( bli_is_row_stored( rs_c, cs_c ) && col_pref ) using_ct = TRUE; \ else if ( bli_is_gen_stored( rs_c, cs_c ) ) using_ct = TRUE; \ else using_ct = FALSE; \ \ \ if ( using_ct ) \ { \ /* In the atypical cases, we compute the result into temporary workspace ct and then accumulate it back to c at the end. */ \ \ /* Set the strides of ct based on the preference of the underlying native real domain gemm micro-kernel. Note that we set the ct strides in units of complex elements. */ \ if ( col_pref ) { rs_ct = 1; cs_ct = mr; } \ else { rs_ct = nr; cs_ct = 1; } \ \ c_use = ( ctype_r* )ct; \ rs_c_use = rs_ct; \ cs_c_use = cs_ct; \ \ /* Convert the strides from being in units of complex elements to be in units of real elements. Note that we don't need to check for general storage here because that case corresponds to the scenario where we are using the ct buffer and its rs_ct/cs_ct strides. */ \ if ( bli_is_col_stored( rs_c_use, cs_c_use ) ) cs_c_use *= 2; \ else rs_c_use *= 2; \ \ \ /* c = beta * c + alpha_r * a * b; */ \ rgemm_ukr \ ( \ k, \ alpha_r, \ a_r, \ b_r, \ zero_r, \ c_use, rs_c_use, cs_c_use, \ data, \ cntx \ ); \ \ dim_t i, j; \ \ /* Accumulate the final result in ct back to c. */ \ if ( PASTEMAC(ch,eq1)( *beta ) ) \ { \ for ( j = 0; j < nr; ++j ) \ for ( i = 0; i < mr; ++i ) \ { \ PASTEMAC(ch,adds)( *(ct + i*rs_ct + j*cs_ct), \ *(c + i*rs_c + j*cs_c ) ); \ } \ } \ else if ( PASTEMAC(ch,eq0)( *beta ) ) \ { \ for ( j = 0; j < nr; ++j ) \ for ( i = 0; i < mr; ++i ) \ { \ PASTEMAC(ch,copys)( *(ct + i*rs_ct + j*cs_ct), \ *(c + i*rs_c + j*cs_c ) ); \ } \ } \ else \ { \ for ( j = 0; j < nr; ++j ) \ for ( i = 0; i < mr; ++i ) \ { \ PASTEMAC(ch,xpbys)( *(ct + i*rs_ct + j*cs_ct), \ *beta, \ *(c + i*rs_c + j*cs_c ) ); \ } \ } \ } \ else \ { \ /* In the typical cases, we use the real part of beta and accumulate directly into the output matrix c. */ \ \ c_use = ( ctype_r* )c; \ rs_c_use = rs_c; \ cs_c_use = cs_c; \ \ /* Convert the strides from being in units of complex elements to be in units of real elements. Note that we don't need to check for general storage here because that case corresponds to the scenario where we are using the ct buffer and its rs_ct/cs_ct strides. */ \ if ( bli_is_col_stored( rs_c_use, cs_c_use ) ) cs_c_use *= 2; \ else rs_c_use *= 2; \ \ /* c = beta * c + alpha_r * a * b; */ \ rgemm_ukr \ ( \ k, \ alpha_r, \ a_r, \ b_r, \ beta_r, \ c_use, rs_c_use, cs_c_use, \ data, \ cntx \ ); \ } \ } INSERT_GENTFUNCCO_BASIC( gemm_md_c2r, BLIS_REF_SUFFIX ) #endif blis-0.6.1/frame/3/gemm/bli_gemm_md_c2r_ref.h000066400000000000000000000035431360743507500206550ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // -- Level-3 native micro-kernel prototype redefinitions ---------------------- #undef gemm_ukr_name #define gemm_ukr_name gemm_md_c2r_ref // Include the native micro-kernel API template. #include "bli_l3_ukr.h" blis-0.6.1/frame/3/gemm/bli_gemm_packab.c000066400000000000000000000054301360743507500200640ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_gemm_packa ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { obj_t a_pack; // Pack matrix A according to the control tree node. bli_l3_packm ( a, &a_pack, cntx, rntm, cntl, thread ); // Proceed with execution using packed matrix A. bli_gemm_int ( &BLIS_ONE, &a_pack, b, &BLIS_ONE, c, cntx, rntm, bli_cntl_sub_node( cntl ), bli_thrinfo_sub_node( thread ) ); } // ----------------------------------------------------------------------------- void bli_gemm_packb ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { obj_t b_pack; // Pack matrix B according to the control tree node. bli_l3_packm ( b, &b_pack, cntx, rntm, cntl, thread ); // Proceed with execution using packed matrix B. bli_gemm_int ( &BLIS_ONE, a, &b_pack, &BLIS_ONE, c, cntx, rntm, bli_cntl_sub_node( cntl ), bli_thrinfo_sub_node( thread ) ); } blis-0.6.1/frame/3/gemm/bli_gemm_var.h000066400000000000000000000057231360743507500174450ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype object-based interfaces. // #undef GENPROT #define GENPROT( opname ) \ \ void PASTEMAC0(opname) \ ( \ obj_t* a, \ obj_t* b, \ obj_t* c, \ cntx_t* cntx, \ rntm_t* rntm, \ cntl_t* cntl, \ thrinfo_t* thread \ ); GENPROT( gemm_blk_var1 ) GENPROT( gemm_blk_var2 ) GENPROT( gemm_blk_var3 ) GENPROT( gemm_packa ) GENPROT( gemm_packb ) GENPROT( gemm_ker_var1 ) GENPROT( gemm_ker_var2 ) // Headers for induced algorithms: GENPROT( gemm4mb_ker_var2 ) // 4m1b // // Prototype BLAS-like interfaces with void pointer operands. // #undef GENTPROT #define GENTPROT( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha, \ void* a, inc_t cs_a, inc_t is_a, \ dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, inc_t is_b, \ dim_t pd_b, inc_t ps_b, \ void* beta, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* thread \ ); INSERT_GENTPROT_BASIC0( gemm_ker_var2 ) // Headers for induced algorithms: INSERT_GENTPROT_BASIC0( gemm4mb_ker_var2 ) // 4m1b blis-0.6.1/frame/3/gemm/ind/000077500000000000000000000000001360743507500154145ustar00rootroot00000000000000blis-0.6.1/frame/3/gemm/ind/bli_gemm4mb_ker_var2.c000066400000000000000000000257571360743507500215510ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T gemm_fp typedef void (*FUNCPTR_T)( pack_t schema_a, pack_t schema_b, dim_t m, dim_t n, dim_t k, void* alpha, void* a, inc_t cs_a, inc_t is_a, dim_t pd_a, inc_t ps_a, void* b, inc_t rs_b, inc_t is_b, dim_t pd_b, inc_t ps_b, void* beta, void* c, inc_t rs_c, inc_t cs_c, cntx_t* cntx, rntm_t* rntm, thrinfo_t* thread ); static FUNCPTR_T GENARRAY(ftypes,gemm4mb_ker_var2); void bli_gemm4mb_ker_var2 ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { num_t dt_exec = bli_obj_exec_dt( c ); pack_t schema_a = bli_obj_pack_schema( a ); pack_t schema_b = bli_obj_pack_schema( b ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width( a ); void* buf_a = bli_obj_buffer_at_off( a ); inc_t cs_a = bli_obj_col_stride( a ); inc_t is_a = bli_obj_imag_stride( a ); dim_t pd_a = bli_obj_panel_dim( a ); inc_t ps_a = bli_obj_panel_stride( a ); void* buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b = bli_obj_row_stride( b ); inc_t is_b = bli_obj_imag_stride( b ); dim_t pd_b = bli_obj_panel_dim( b ); inc_t ps_b = bli_obj_panel_stride( b ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); obj_t scalar_a; obj_t scalar_b; void* buf_alpha; void* buf_beta; FUNCPTR_T f; // Detach and multiply the scalars attached to A and B. bli_obj_scalar_detach( a, &scalar_a ); bli_obj_scalar_detach( b, &scalar_b ); bli_mulsc( &scalar_a, &scalar_b ); // Grab the addresses of the internal scalar buffers for the scalar // merged above and the scalar attached to C. buf_alpha = bli_obj_internal_scalar_buffer( &scalar_b ); buf_beta = bli_obj_internal_scalar_buffer( c ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Invoke the function. f( schema_a, schema_b, m, n, k, buf_alpha, buf_a, cs_a, is_a, pd_a, ps_a, buf_b, rs_b, is_b, pd_b, ps_b, buf_beta, buf_c, rs_c, cs_c, cntx, rntm, thread ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha, \ void* a, inc_t cs_a, inc_t is_a, \ dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, inc_t is_b, \ dim_t pd_b, inc_t ps_b, \ void* beta, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* thread \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ /* Alias some constants to simpler names. */ \ const dim_t MR = pd_a; \ const dim_t NR = pd_b; \ /*const dim_t PACKMR = cs_a;*/ \ /*const dim_t PACKNR = rs_b;*/ \ \ /* Query the context for the micro-kernel address and cast it to its function pointer type. */ \ PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. */ \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict one = PASTEMAC(ch,1); \ ctype* restrict a_cast = a; \ ctype* restrict b_cast = b; \ ctype* restrict c_cast = c; \ ctype* restrict alpha_cast = alpha; \ ctype* restrict beta_cast = beta; \ ctype* restrict b1; \ ctype* restrict c1; \ \ dim_t m_iter, m_left; \ dim_t n_iter, n_left; \ dim_t i, j; \ dim_t ii; \ dim_t m_cur; \ dim_t n_cur; \ inc_t rstep_a; \ inc_t cstep_b; \ inc_t rstep_c, cstep_c; \ auxinfo_t aux; \ \ /* Assumptions/assertions: rs_a == 1 cs_a == PACKMR pd_a == MR ps_a == stride to next micro-panel of A rs_b == PACKNR cs_b == 1 pd_b == NR ps_b == stride to next micro-panel of B rs_c == (no assumptions) cs_c == (no assumptions) */ \ \ /* If any dimension is zero, return immediately. */ \ if ( bli_zero_dim3( m, n, k ) ) return; \ \ /* Clear the temporary C buffer in case it has any infs or NaNs. */ \ PASTEMAC(ch,set0s_mxn)( MR, NR, \ ct, rs_ct, cs_ct ); \ \ /* Compute number of primary and leftover components of the m and n dimensions. */ \ n_iter = n / NR; \ n_left = n % NR; \ \ m_iter = m / MR; \ m_left = m % MR; \ \ if ( n_left ) ++n_iter; \ if ( m_left ) ++m_iter; \ \ /* Determine some increments used to step through A, B, and C. */ \ rstep_a = ps_a; \ \ cstep_b = ps_b; \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ \ /* Save the pack schemas of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_schema_a( schema_a, &aux ); \ bli_auxinfo_set_schema_b( schema_b, &aux ); \ \ /* Save the imaginary stride of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( is_a, &aux ); \ bli_auxinfo_set_is_b( is_b, &aux ); \ \ thrinfo_t* caucus = bli_thrinfo_sub_node( thread ); \ dim_t jr_num_threads = bli_thread_n_way( thread ); \ dim_t jr_thread_id = bli_thread_work_id( thread ); \ dim_t ir_num_threads = bli_thread_n_way( caucus ); \ dim_t ir_thread_id = bli_thread_work_id( caucus ); \ \ dim_t jr_inc = jr_num_threads; \ dim_t ir_inc = ir_num_threads; \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = jr_thread_id; j < n_iter; j += jr_num_threads ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ b1 = b_cast + j * cstep_b; \ c1 = c_cast + j * cstep_c; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ /* In the 4mb method, we execute the ir loop twice: once for b_r and once for b_i. */ \ for ( ii = 0; ii < 2; ++ii ) \ { \ ctype* restrict beta_use; \ \ if ( ii == 0 ) \ { \ bli_auxinfo_set_schema_b( BLIS_PACKED_COL_PANELS_RO, &aux ); \ beta_use = beta_cast; \ } \ else \ { \ bli_auxinfo_set_schema_b( BLIS_PACKED_COL_PANELS_IO, &aux ); \ beta_use = one; \ } \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = ir_thread_id; i < m_iter; i += ir_num_threads ) \ { \ ctype* restrict a2; \ \ a1 = a_cast + i * rstep_a; \ c11 = c1 + i * rstep_c; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = bli_gemm_get_next_a_upanel( a1, rstep_a, ir_inc ); \ if ( bli_is_last_iter_rr( i, m_iter, ir_thread_id, ir_num_threads ) ) \ { \ a2 = a_cast; \ b2 = bli_gemm_get_next_b_upanel( b1, cstep_b, jr_inc ); \ if ( bli_is_last_iter_rr( j, n_iter, jr_thread_id, jr_num_threads ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /*PASTEMAC(ch,fprintm)( stdout, "gemm_ker_var3 (4m1b): c before", 8, 6, c11, rs_c, cs_c, "%4.1f", "" );*/ \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ beta_use, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ /*PASTEMAC(ch,fprintm)( stdout, "gemm_ker_var3 (4m1b): c after", 8, 6, c11, rs_c, cs_c, "%4.1f", "" );*/ \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Scale the bottom edge of C and add the result from above. */ \ PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ beta_use, \ c11, rs_c, cs_c ); \ } \ } \ } \ } \ /*printf( "gemm_ker_var3 (4m1b): returning\n" );*/ \ \ /*PASTEMAC(ch,fprintm)( stdout, "gemm_ker_var3: b1", k, NR, b1, NR, 1, "%4.1f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "gemm_ker_var3: a1", MR, k, a1, 1, MR, "%4.1f", "" );*/ \ } INSERT_GENTFUNC_BASIC0( gemm4mb_ker_var2 ) blis-0.6.1/frame/3/gemm/ind/bli_gemm_ind_opt.h000066400000000000000000000052031360743507500210540ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ static void bli_gemm_ind_recast_1m_params ( num_t* dt_exec, pack_t schema_a, obj_t* c, dim_t* m, dim_t* n, dim_t* k, inc_t* pd_a, inc_t* ps_a, inc_t* pd_b, inc_t* ps_b, inc_t* rs_c, inc_t* cs_c ) { obj_t beta; /* Detach the beta scalar from c so that we can test its imaginary component. */ bli_obj_scalar_detach( c, &beta ); /* If beta is in the real domain, and c is row- or column-stored, then we may proceed with the optimization. */ if ( bli_obj_imag_is_zero( &beta ) && !bli_is_gen_stored( *rs_c, *cs_c ) ) { *dt_exec = bli_dt_proj_to_real( *dt_exec ); if ( bli_is_1e_packed( schema_a ) ) { *m *= 2; *n *= 1; *k *= 2; *pd_a *= 2; *ps_a *= 2; *pd_b *= 1; *ps_b *= 2; *rs_c *= 1; *cs_c *= 2; } else /* if ( bli_is_1r_packed( schema_a ) ) */ { *m *= 1; *n *= 2; *k *= 2; *pd_a *= 1; *ps_a *= 2; *pd_b *= 2; *ps_b *= 2; *rs_c *= 2; *cs_c *= 1; } } } blis-0.6.1/frame/3/gemm/ind/old/000077500000000000000000000000001360743507500161725ustar00rootroot00000000000000blis-0.6.1/frame/3/gemm/ind/old/bli_gemm3m2_ker_var2.c000066400000000000000000000256001360743507500222310ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T gemm_fp typedef void (*FUNCPTR_T)( pack_t schema_a, pack_t schema_b, dim_t m, dim_t n, dim_t k, void* alpha, void* a, inc_t cs_a, inc_t is_a, dim_t pd_a, inc_t ps_a, void* b, inc_t rs_b, inc_t is_b, dim_t pd_b, inc_t ps_b, void* beta, void* c, inc_t rs_c, inc_t cs_c, cntx_t* cntx, thrinfo_t* thread ); static FUNCPTR_T GENARRAY(ftypes,gemm3m2_ker_var2); void bli_gemm3m2_ker_var2 ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, cntl_t* cntl, thrinfo_t* thread ) { num_t dt_exec = bli_obj_exec_dt( c ); pack_t schema_a = bli_obj_pack_schema( a ); pack_t schema_b = bli_obj_pack_schema( b ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width( a ); void* buf_a = bli_obj_buffer_at_off( a ); inc_t cs_a = bli_obj_col_stride( a ); inc_t is_a = bli_obj_imag_stride( a ); dim_t pd_a = bli_obj_panel_dim( a ); inc_t ps_a = bli_obj_panel_stride( a ); void* buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b = bli_obj_row_stride( b ); inc_t is_b = bli_obj_imag_stride( b ); dim_t pd_b = bli_obj_panel_dim( b ); inc_t ps_b = bli_obj_panel_stride( b ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); obj_t scalar_a; obj_t scalar_b; void* buf_alpha; void* buf_beta; FUNCPTR_T f; // Detach and multiply the scalars attached to A and B. bli_obj_scalar_detach( a, &scalar_a ); bli_obj_scalar_detach( b, &scalar_b ); bli_mulsc( &scalar_a, &scalar_b ); // Grab the addresses of the internal scalar buffers for the scalar // merged above and the scalar attached to C. buf_alpha = bli_obj_internal_scalar_buffer( &scalar_b ); buf_beta = bli_obj_internal_scalar_buffer( c ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Invoke the function. f( schema_a, schema_b, m, n, k, buf_alpha, buf_a, cs_a, is_a, pd_a, ps_a, buf_b, rs_b, is_b, pd_b, ps_b, buf_beta, buf_c, rs_c, cs_c, cntx, thread ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha, \ void* a, inc_t cs_a, inc_t is_a, \ dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, inc_t is_b, \ dim_t pd_b, inc_t ps_b, \ void* beta, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ thrinfo_t* thread \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ /* Alias some constants to simpler names. */ \ const dim_t MR = pd_a; \ const dim_t NR = pd_b; \ /*const dim_t PACKMR = cs_a;*/ \ /*const dim_t PACKNR = rs_b;*/ \ \ /* Query the context for the micro-kernel address and cast it to its function pointer type. */ \ PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. */ \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict one = PASTEMAC(ch,1); \ ctype* restrict a_cast = a; \ ctype* restrict b_cast = b; \ ctype* restrict c_cast = c; \ ctype* restrict alpha_cast = alpha; \ ctype* restrict beta_cast = beta; \ ctype* restrict b1; \ ctype* restrict c1; \ \ dim_t m_iter, m_left; \ dim_t n_iter, n_left; \ dim_t i, j; \ dim_t ii; \ dim_t m_cur; \ dim_t n_cur; \ inc_t rstep_a; \ inc_t cstep_b; \ inc_t rstep_c, cstep_c; \ auxinfo_t aux; \ \ /* Assumptions/assertions: rs_a == 1 cs_a == PACKMR pd_a == MR ps_a == stride to next micro-panel of A rs_b == PACKNR cs_b == 1 pd_b == NR ps_b == stride to next micro-panel of B rs_c == (no assumptions) cs_c == (no assumptions) */ \ \ /* If any dimension is zero, return immediately. */ \ if ( bli_zero_dim3( m, n, k ) ) return; \ \ /* Clear the temporary C buffer in case it has any infs or NaNs. */ \ PASTEMAC(ch,set0s_mxn)( MR, NR, \ ct, rs_ct, cs_ct ); \ \ /* Compute number of primary and leftover components of the m and n dimensions. */ \ n_iter = n / NR; \ n_left = n % NR; \ \ m_iter = m / MR; \ m_left = m % MR; \ \ if ( n_left ) ++n_iter; \ if ( m_left ) ++m_iter; \ \ /* Determine some increments used to step through A, B, and C. */ \ rstep_a = ps_a; \ \ cstep_b = ps_b; \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ \ /* Save the pack schemas of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_schema_a( schema_a, &aux ); \ bli_auxinfo_set_schema_b( schema_b, &aux ); \ \ /* Save the imaginary stride of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( is_a, &aux ); \ bli_auxinfo_set_is_b( is_b, &aux ); \ \ thrinfo_t* caucus = bli_thrinfo_sub_node( thread ); \ dim_t jr_num_threads = bli_thread_n_way( thread ); \ dim_t jr_thread_id = bli_thread_work_id( thread ); \ dim_t ir_num_threads = bli_thread_n_way( caucus ); \ dim_t ir_thread_id = bli_thread_work_id( caucus ); \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = jr_thread_id; j < n_iter; j += jr_num_threads ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ b1 = b_cast + j * cstep_b; \ c1 = c_cast + j * cstep_c; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ /* In the 3m2 method, we execute the ir loop thrice: once for a_r[ir] * b_r, once for a_i[ir] * b_i, and once for a_{r+i}[ir] * b_{r+i}. */ \ for ( ii = 0; ii < 3; ++ii ) \ { \ ctype* restrict beta_use; \ \ if ( ii == 0 ) \ { \ bli_auxinfo_set_schema_a( BLIS_PACKED_ROW_PANELS_RO, &aux ); \ bli_auxinfo_set_schema_b( BLIS_PACKED_COL_PANELS_RO, &aux ); \ beta_use = beta_cast; \ } \ else if ( ii == 1 ) \ { \ bli_auxinfo_set_schema_a( BLIS_PACKED_ROW_PANELS_IO, &aux ); \ bli_auxinfo_set_schema_b( BLIS_PACKED_COL_PANELS_IO, &aux ); \ beta_use = one; \ } \ else \ { \ bli_auxinfo_set_schema_a( BLIS_PACKED_ROW_PANELS_RPI, &aux ); \ bli_auxinfo_set_schema_b( BLIS_PACKED_COL_PANELS_RPI, &aux ); \ beta_use = one; \ } \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = ir_thread_id; i < m_iter; i += ir_num_threads ) \ { \ ctype* restrict a2; \ \ a1 = a_cast + i * rstep_a; \ c11 = c1 + i * rstep_c; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = bli_gemm_get_next_a_upanel( caucus, a1, rstep_a ); \ if ( bli_is_last_iter( i, m_iter, ir_thread_id, ir_num_threads ) ) \ { \ a2 = a_cast; \ b2 = bli_gemm_get_next_b_upanel( thread, b1, cstep_b ); \ if ( bli_is_last_iter( j, n_iter, jr_thread_id, jr_num_threads ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ beta_use, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Scale the bottom edge of C and add the result from above. */ \ PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ beta_use, \ c11, rs_c, cs_c ); \ } \ } \ } \ } \ \ /*PASTEMAC(ch,fprintm)( stdout, "gemm3m2_ker_var2: b1", k, NR, b1, NR, 1, "%4.1f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "gemm3m2_ker_var2: a1", MR, k, a1, 1, MR, "%4.1f", "" );*/ \ } INSERT_GENTFUNC_BASIC0( gemm3m2_ker_var2 ) blis-0.6.1/frame/3/gemm/ind/old/bli_gemm3m3_packa.c000066400000000000000000000066211360743507500216000ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_gemm3m3_packa ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, cntl_t* cntl, thrinfo_t* thread ) { obj_t a_pack; // Make a copy of the context for each stage. cntx_t cntx_ro = *cntx; cntx_t cntx_io = *cntx; cntx_t cntx_rpi = *cntx; // ----------------------------------------------------- // Initialize the context for the real-only stage. bli_gemm3m3_cntx_stage( 0, &cntx_ro ); // Pack matrix the real-only part of A. bli_l3_packm ( a, &a_pack, &cntx_ro, cntl, thread ); // Proceed with execution using packed matrix A. bli_gemm_int ( &BLIS_ONE, &a_pack, b, &BLIS_ONE, c, cntx, bli_cntl_sub_node( cntl ), bli_thrinfo_sub_node( thread ) ); // Only apply beta within the first of three subproblems. bli_obj_scalar_reset( c ); // ----------------------------------------------------- // Initialize the context for the imag-only stage. bli_gemm3m3_cntx_stage( 1, &cntx_io ); // Pack matrix the imag-only part of A. bli_l3_packm ( a, &a_pack, &cntx_io, cntl, thread ); // Proceed with execution using packed matrix A. bli_gemm_int ( &BLIS_ONE, &a_pack, b, &BLIS_ONE, c, cntx, bli_cntl_sub_node( cntl ), bli_thrinfo_sub_node( thread ) ); // ----------------------------------------------------- // Initialize the context for the real+imag stage. bli_gemm3m3_cntx_stage( 2, &cntx_rpi ); // Pack matrix the real+imag part of A. bli_l3_packm ( a, &a_pack, &cntx_rpi, cntl, thread ); // Proceed with execution using packed matrix A. bli_gemm_int ( &BLIS_ONE, &a_pack, b, &BLIS_ONE, c, cntx, bli_cntl_sub_node( cntl ), bli_thrinfo_sub_node( thread ) ); } blis-0.6.1/frame/3/gemm/other/000077500000000000000000000000001360743507500157635ustar00rootroot00000000000000blis-0.6.1/frame/3/gemm/other/bli_gemm_ker_var2.c000066400000000000000000000251011360743507500214740ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T gemm_fp typedef void (*FUNCPTR_T) ( pack_t schema_a, pack_t schema_b, dim_t m, dim_t n, dim_t k, void* alpha, void* a, inc_t cs_a, inc_t is_a, dim_t pd_a, inc_t ps_a, void* b, inc_t rs_b, inc_t is_b, dim_t pd_b, inc_t ps_b, void* beta, void* c, inc_t rs_c, inc_t cs_c, cntx_t* cntx, rntm_t* rntm, thrinfo_t* thread ); static FUNCPTR_T GENARRAY(ftypes,gemm_ker_var2); void bli_gemm_ker_var2 ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { num_t dt_exec = bli_obj_exec_dt( c ); pack_t schema_a = bli_obj_pack_schema( a ); pack_t schema_b = bli_obj_pack_schema( b ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width( a ); void* buf_a = bli_obj_buffer_at_off( a ); inc_t cs_a = bli_obj_col_stride( a ); inc_t is_a = bli_obj_imag_stride( a ); dim_t pd_a = bli_obj_panel_dim( a ); inc_t ps_a = bli_obj_panel_stride( a ); void* buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b = bli_obj_row_stride( b ); inc_t is_b = bli_obj_imag_stride( b ); dim_t pd_b = bli_obj_panel_dim( b ); inc_t ps_b = bli_obj_panel_stride( b ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); obj_t scalar_a; obj_t scalar_b; void* buf_alpha; void* buf_beta; FUNCPTR_T f; // Detach and multiply the scalars attached to A and B. bli_obj_scalar_detach( a, &scalar_a ); bli_obj_scalar_detach( b, &scalar_b ); bli_mulsc( &scalar_a, &scalar_b ); // Grab the addresses of the internal scalar buffers for the scalar // merged above and the scalar attached to C. buf_alpha = bli_obj_internal_scalar_buffer( &scalar_b ); buf_beta = bli_obj_internal_scalar_buffer( c ); // If 1m is being employed on a column- or row-stored matrix with a // real-valued beta, we can use the real domain macro-kernel, which // eliminates a little overhead associated with the 1m virtual // micro-kernel. #if 1 if ( bli_is_1m_packed( schema_a ) ) { bli_l3_ind_recast_1m_params ( dt_exec, schema_a, c, m, n, k, pd_a, ps_a, pd_b, ps_b, rs_c, cs_c ); } #endif // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Invoke the function. f( schema_a, schema_b, m, n, k, buf_alpha, buf_a, cs_a, is_a, pd_a, ps_a, buf_b, rs_b, is_b, pd_b, ps_b, buf_beta, buf_c, rs_c, cs_c, cntx, rntm, thread ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha, \ void* a, inc_t cs_a, inc_t is_a, \ dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, inc_t is_b, \ dim_t pd_b, inc_t ps_b, \ void* beta, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* thread \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ /* Alias some constants to simpler names. */ \ const dim_t MR = pd_a; \ const dim_t NR = pd_b; \ /*const dim_t PACKMR = cs_a;*/ \ /*const dim_t PACKNR = rs_b;*/ \ \ /* Query the context for the micro-kernel address and cast it to its function pointer type. */ \ PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the original C matrix. For example, if C is column-stored, ct will be column-stored as well. */ \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict a_cast = a; \ ctype* restrict b_cast = b; \ ctype* restrict c_cast = c; \ ctype* restrict alpha_cast = alpha; \ ctype* restrict beta_cast = beta; \ ctype* restrict b1; \ ctype* restrict c1; \ \ dim_t m_iter, m_left; \ dim_t n_iter, n_left; \ dim_t i, j; \ dim_t m_cur; \ dim_t n_cur; \ inc_t rstep_a; \ inc_t cstep_b; \ inc_t rstep_c, cstep_c; \ auxinfo_t aux; \ \ /* Assumptions/assertions: rs_a == 1 cs_a == PACKMR pd_a == MR ps_a == stride to next micro-panel of A rs_b == PACKNR cs_b == 1 pd_b == NR ps_b == stride to next micro-panel of B rs_c == (no assumptions) cs_c == (no assumptions) */ \ \ /* If any dimension is zero, return immediately. */ \ if ( bli_zero_dim3( m, n, k ) ) return; \ \ /* Clear the temporary C buffer in case it has any infs or NaNs. */ \ PASTEMAC(ch,set0s_mxn)( MR, NR, \ ct, rs_ct, cs_ct ); \ \ /* Compute number of primary and leftover components of the m and n dimensions. */ \ n_iter = n / NR; \ n_left = n % NR; \ \ m_iter = m / MR; \ m_left = m % MR; \ \ if ( n_left ) ++n_iter; \ if ( m_left ) ++m_iter; \ \ /* Determine some increments used to step through A, B, and C. */ \ rstep_a = ps_a; \ \ cstep_b = ps_b; \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ \ /* Save the pack schemas of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_schema_a( schema_a, &aux ); \ bli_auxinfo_set_schema_b( schema_b, &aux ); \ \ /* Save the imaginary stride of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( is_a, &aux ); \ bli_auxinfo_set_is_b( is_b, &aux ); \ \ thrinfo_t* caucus = bli_thrinfo_sub_node( thread ); \ dim_t jr_num_threads = bli_thread_n_way( thread ); \ dim_t jr_thread_id = bli_thread_work_id( thread ); \ dim_t ir_num_threads = bli_thread_n_way( caucus ); \ dim_t ir_thread_id = bli_thread_work_id( caucus ); \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = jr_thread_id; j < n_iter; j += jr_num_threads ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ b1 = b_cast + j * cstep_b; \ c1 = c_cast + j * cstep_c; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = ir_thread_id; i < m_iter; i += ir_num_threads ) \ { \ ctype* restrict a2; \ \ a1 = a_cast + i * rstep_a; \ c11 = c1 + i * rstep_c; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = bli_gemm_get_next_a_upanel( caucus, a1, rstep_a ); \ if ( bli_is_last_iter( i, m_iter, ir_thread_id, ir_num_threads ) ) \ { \ a2 = a_cast; \ b2 = bli_gemm_get_next_b_upanel( thread, b1, cstep_b ); \ if ( bli_is_last_iter( j, n_iter, jr_thread_id, jr_num_threads ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ beta_cast, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Scale the bottom edge of C and add the result from above. */ \ PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ beta_cast, \ c11, rs_c, cs_c ); \ } \ } \ } \ \ /* PASTEMAC(ch,fprintm)( stdout, "gemm_ker_var2: b1", k, NR, b1, NR, 1, "%4.1f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "gemm_ker_var2: a1", MR, k, a1, 1, MR, "%4.1f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "gemm_ker_var2: c after", m_cur, n_cur, c11, rs_c, cs_c, "%4.1f", "" ); \ */ \ } INSERT_GENTFUNC_BASIC0( gemm_ker_var2 ) blis-0.6.1/frame/3/gemm/other/bli_gemm_ker_var2rr.c000066400000000000000000000262261360743507500220510ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T gemm_fp typedef void (*FUNCPTR_T) ( pack_t schema_a, pack_t schema_b, dim_t m, dim_t n, dim_t k, void* alpha, void* a, inc_t cs_a, inc_t is_a, dim_t pd_a, inc_t ps_a, void* b, inc_t rs_b, inc_t is_b, dim_t pd_b, inc_t ps_b, void* beta, void* c, inc_t rs_c, inc_t cs_c, cntx_t* cntx, rntm_t* rntm, thrinfo_t* thread ); static FUNCPTR_T GENARRAY(ftypes,gemm_ker_var2rr); // // -- Macrokernel functions for round-robin partitioning ----------------------- // void bli_gemm_ker_var2rr ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { num_t dt_exec = bli_obj_exec_dt( c ); pack_t schema_a = bli_obj_pack_schema( a ); pack_t schema_b = bli_obj_pack_schema( b ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width( a ); void* buf_a = bli_obj_buffer_at_off( a ); inc_t cs_a = bli_obj_col_stride( a ); inc_t is_a = bli_obj_imag_stride( a ); dim_t pd_a = bli_obj_panel_dim( a ); inc_t ps_a = bli_obj_panel_stride( a ); void* buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b = bli_obj_row_stride( b ); inc_t is_b = bli_obj_imag_stride( b ); dim_t pd_b = bli_obj_panel_dim( b ); inc_t ps_b = bli_obj_panel_stride( b ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); obj_t scalar_a; obj_t scalar_b; void* buf_alpha; void* buf_beta; FUNCPTR_T f; // Detach and multiply the scalars attached to A and B. bli_obj_scalar_detach( a, &scalar_a ); bli_obj_scalar_detach( b, &scalar_b ); bli_mulsc( &scalar_a, &scalar_b ); // Grab the addresses of the internal scalar buffers for the scalar // merged above and the scalar attached to C. buf_alpha = bli_obj_internal_scalar_buffer( &scalar_b ); buf_beta = bli_obj_internal_scalar_buffer( c ); // If 1m is being employed on a column- or row-stored matrix with a // real-valued beta, we can use the real domain macro-kernel, which // eliminates a little overhead associated with the 1m virtual // micro-kernel. if ( bli_is_1m_packed( schema_a ) ) { bli_l3_ind_recast_1m_params ( dt_exec, schema_a, c, m, n, k, pd_a, ps_a, pd_b, ps_b, rs_c, cs_c ); } // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Invoke the function. f( schema_a, schema_b, m, n, k, buf_alpha, buf_a, cs_a, is_a, pd_a, ps_a, buf_b, rs_b, is_b, pd_b, ps_b, buf_beta, buf_c, rs_c, cs_c, cntx, rntm, thread ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha, \ void* a, inc_t cs_a, inc_t is_a, \ dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, inc_t is_b, \ dim_t pd_b, inc_t ps_b, \ void* beta, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* thread \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ /* Alias some constants to simpler names. */ \ const dim_t MR = pd_a; \ const dim_t NR = pd_b; \ /*const dim_t PACKMR = cs_a;*/ \ /*const dim_t PACKNR = rs_b;*/ \ \ /* Query the context for the micro-kernel address and cast it to its function pointer type. */ \ PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the original C matrix. For example, if C is column-stored, ct will be column-stored as well. */ \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict a_cast = a; \ ctype* restrict b_cast = b; \ ctype* restrict c_cast = c; \ ctype* restrict alpha_cast = alpha; \ ctype* restrict beta_cast = beta; \ ctype* restrict b1; \ ctype* restrict c1; \ \ dim_t m_iter, m_left; \ dim_t n_iter, n_left; \ dim_t i, j; \ dim_t m_cur; \ dim_t n_cur; \ inc_t rstep_a; \ inc_t cstep_b; \ inc_t rstep_c, cstep_c; \ auxinfo_t aux; \ \ /* Assumptions/assertions: rs_a == 1 cs_a == PACKMR pd_a == MR ps_a == stride to next micro-panel of A rs_b == PACKNR cs_b == 1 pd_b == NR ps_b == stride to next micro-panel of B rs_c == (no assumptions) cs_c == (no assumptions) */ \ \ /* If any dimension is zero, return immediately. */ \ if ( bli_zero_dim3( m, n, k ) ) return; \ \ /* Clear the temporary C buffer in case it has any infs or NaNs. */ \ PASTEMAC(ch,set0s_mxn)( MR, NR, \ ct, rs_ct, cs_ct ); \ \ /* Compute number of primary and leftover components of the m and n dimensions. */ \ n_iter = n / NR; \ n_left = n % NR; \ \ m_iter = m / MR; \ m_left = m % MR; \ \ if ( n_left ) ++n_iter; \ if ( m_left ) ++m_iter; \ \ /* Determine some increments used to step through A, B, and C. */ \ rstep_a = ps_a; \ \ cstep_b = ps_b; \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ \ /* Save the pack schemas of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_schema_a( schema_a, &aux ); \ bli_auxinfo_set_schema_b( schema_b, &aux ); \ \ /* Save the imaginary stride of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( is_a, &aux ); \ bli_auxinfo_set_is_b( is_b, &aux ); \ \ /* The 'thread' argument points to the thrinfo_t node for the 2nd (jr) loop around the microkernel. Here we query the thrinfo_t node for the 1st (ir) loop around the microkernel. */ \ thrinfo_t* caucus = bli_thrinfo_sub_node( thread ); \ \ /* Query the number of threads and thread ids for each loop. */ \ dim_t jr_nt = bli_thread_n_way( thread ); \ dim_t jr_tid = bli_thread_work_id( thread ); \ dim_t ir_nt = bli_thread_n_way( caucus ); \ dim_t ir_tid = bli_thread_work_id( caucus ); \ \ dim_t jr_start, jr_end; \ dim_t ir_start, ir_end; \ dim_t jr_inc, ir_inc; \ \ /* Determine the thread range and increment for each thrinfo_t node. */ \ bli_thread_range_jrir_rr( thread, n_iter, 1, FALSE, &jr_start, &jr_end, &jr_inc ); \ bli_thread_range_jrir_rr( caucus, m_iter, 1, FALSE, &ir_start, &ir_end, &ir_inc ); \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = jr_start; j < jr_end; j += jr_inc ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ b1 = b_cast + j * cstep_b; \ c1 = c_cast + j * cstep_c; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = ir_start; i < ir_end; i += ir_inc ) \ { \ ctype* restrict a2; \ \ a1 = a_cast + i * rstep_a; \ c11 = c1 + i * rstep_c; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = bli_gemm_get_next_a_upanel( a1, rstep_a, ir_inc ); \ if ( bli_is_last_iter_rr( i, ir_end, ir_tid, ir_nt ) ) \ { \ a2 = a_cast; \ b2 = bli_gemm_get_next_b_upanel( b1, cstep_b, jr_inc ); \ if ( bli_is_last_iter_rr( j, jr_end, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ beta_cast, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Scale the bottom edge of C and add the result from above. */ \ PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ beta_cast, \ c11, rs_c, cs_c ); \ } \ } \ } \ \ /* PASTEMAC(ch,fprintm)( stdout, "gemm_ker_var2rr: b1", k, NR, b1, NR, 1, "%4.1f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "gemm_ker_var2rr: a1", MR, k, a1, 1, MR, "%4.1f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "gemm_ker_var2rr: c after", m_cur, n_cur, c11, rs_c, cs_c, "%4.1f", "" ); \ */ \ } INSERT_GENTFUNC_BASIC0( gemm_ker_var2rr ) blis-0.6.1/frame/3/gemm/other/bli_gemm_ker_var2sl.c000066400000000000000000000262201360743507500220360ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T gemm_fp typedef void (*FUNCPTR_T) ( pack_t schema_a, pack_t schema_b, dim_t m, dim_t n, dim_t k, void* alpha, void* a, inc_t cs_a, inc_t is_a, dim_t pd_a, inc_t ps_a, void* b, inc_t rs_b, inc_t is_b, dim_t pd_b, inc_t ps_b, void* beta, void* c, inc_t rs_c, inc_t cs_c, cntx_t* cntx, rntm_t* rntm, thrinfo_t* thread ); static FUNCPTR_T GENARRAY(ftypes,gemm_ker_var2sl); // // -- Macrokernel functions for slab partitioning ------------------------------ // void bli_gemm_ker_var2sl ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { num_t dt_exec = bli_obj_exec_dt( c ); pack_t schema_a = bli_obj_pack_schema( a ); pack_t schema_b = bli_obj_pack_schema( b ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width( a ); void* buf_a = bli_obj_buffer_at_off( a ); inc_t cs_a = bli_obj_col_stride( a ); inc_t is_a = bli_obj_imag_stride( a ); dim_t pd_a = bli_obj_panel_dim( a ); inc_t ps_a = bli_obj_panel_stride( a ); void* buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b = bli_obj_row_stride( b ); inc_t is_b = bli_obj_imag_stride( b ); dim_t pd_b = bli_obj_panel_dim( b ); inc_t ps_b = bli_obj_panel_stride( b ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); obj_t scalar_a; obj_t scalar_b; void* buf_alpha; void* buf_beta; FUNCPTR_T f; // Detach and multiply the scalars attached to A and B. bli_obj_scalar_detach( a, &scalar_a ); bli_obj_scalar_detach( b, &scalar_b ); bli_mulsc( &scalar_a, &scalar_b ); // Grab the addresses of the internal scalar buffers for the scalar // merged above and the scalar attached to C. buf_alpha = bli_obj_internal_scalar_buffer( &scalar_b ); buf_beta = bli_obj_internal_scalar_buffer( c ); // If 1m is being employed on a column- or row-stored matrix with a // real-valued beta, we can use the real domain macro-kernel, which // eliminates a little overhead associated with the 1m virtual // micro-kernel. if ( bli_is_1m_packed( schema_a ) ) { bli_l3_ind_recast_1m_params ( dt_exec, schema_a, c, m, n, k, pd_a, ps_a, pd_b, ps_b, rs_c, cs_c ); } // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Invoke the function. f( schema_a, schema_b, m, n, k, buf_alpha, buf_a, cs_a, is_a, pd_a, ps_a, buf_b, rs_b, is_b, pd_b, ps_b, buf_beta, buf_c, rs_c, cs_c, cntx, rntm, thread ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha, \ void* a, inc_t cs_a, inc_t is_a, \ dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, inc_t is_b, \ dim_t pd_b, inc_t ps_b, \ void* beta, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* thread \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ /* Alias some constants to simpler names. */ \ const dim_t MR = pd_a; \ const dim_t NR = pd_b; \ /*const dim_t PACKMR = cs_a;*/ \ /*const dim_t PACKNR = rs_b;*/ \ \ /* Query the context for the micro-kernel address and cast it to its function pointer type. */ \ PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the original C matrix. For example, if C is column-stored, ct will be column-stored as well. */ \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict a_cast = a; \ ctype* restrict b_cast = b; \ ctype* restrict c_cast = c; \ ctype* restrict alpha_cast = alpha; \ ctype* restrict beta_cast = beta; \ ctype* restrict b1; \ ctype* restrict c1; \ \ dim_t m_iter, m_left; \ dim_t n_iter, n_left; \ dim_t i, j; \ dim_t m_cur; \ dim_t n_cur; \ inc_t rstep_a; \ inc_t cstep_b; \ inc_t rstep_c, cstep_c; \ auxinfo_t aux; \ \ /* Assumptions/assertions: rs_a == 1 cs_a == PACKMR pd_a == MR ps_a == stride to next micro-panel of A rs_b == PACKNR cs_b == 1 pd_b == NR ps_b == stride to next micro-panel of B rs_c == (no assumptions) cs_c == (no assumptions) */ \ \ /* If any dimension is zero, return immediately. */ \ if ( bli_zero_dim3( m, n, k ) ) return; \ \ /* Clear the temporary C buffer in case it has any infs or NaNs. */ \ PASTEMAC(ch,set0s_mxn)( MR, NR, \ ct, rs_ct, cs_ct ); \ \ /* Compute number of primary and leftover components of the m and n dimensions. */ \ n_iter = n / NR; \ n_left = n % NR; \ \ m_iter = m / MR; \ m_left = m % MR; \ \ if ( n_left ) ++n_iter; \ if ( m_left ) ++m_iter; \ \ /* Determine some increments used to step through A, B, and C. */ \ rstep_a = ps_a; \ \ cstep_b = ps_b; \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ \ /* Save the pack schemas of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_schema_a( schema_a, &aux ); \ bli_auxinfo_set_schema_b( schema_b, &aux ); \ \ /* Save the imaginary stride of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( is_a, &aux ); \ bli_auxinfo_set_is_b( is_b, &aux ); \ \ /* The 'thread' argument points to the thrinfo_t node for the 2nd (jr) loop around the microkernel. Here we query the thrinfo_t node for the 1st (ir) loop around the microkernel. */ \ thrinfo_t* caucus = bli_thrinfo_sub_node( thread ); \ \ /* Query the number of threads and thread ids for each loop. */ \ dim_t jr_nt = bli_thread_n_way( thread ); \ dim_t jr_tid = bli_thread_work_id( thread ); \ dim_t ir_nt = bli_thread_n_way( caucus ); \ dim_t ir_tid = bli_thread_work_id( caucus ); \ \ dim_t jr_start, jr_end; \ dim_t ir_start, ir_end; \ dim_t jr_inc, ir_inc; \ \ /* Determine the thread range and increment for each thrinfo_t node. */ \ bli_thread_range_jrir_sl( thread, n_iter, 1, FALSE, &jr_start, &jr_end, &jr_inc ); \ bli_thread_range_jrir_sl( caucus, m_iter, 1, FALSE, &ir_start, &ir_end, &ir_inc ); \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = jr_start; j < jr_end; j += jr_inc ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ b1 = b_cast + j * cstep_b; \ c1 = c_cast + j * cstep_c; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = ir_start; i < ir_end; i += ir_inc ) \ { \ ctype* restrict a2; \ \ a1 = a_cast + i * rstep_a; \ c11 = c1 + i * rstep_c; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = bli_gemm_get_next_a_upanel( a1, rstep_a, ir_inc ); \ if ( bli_is_last_iter_sl( i, ir_end, ir_tid, ir_nt ) ) \ { \ a2 = a_cast; \ b2 = bli_gemm_get_next_b_upanel( b1, cstep_b, jr_inc ); \ if ( bli_is_last_iter_sl( j, jr_end, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ beta_cast, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Scale the bottom edge of C and add the result from above. */ \ PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ beta_cast, \ c11, rs_c, cs_c ); \ } \ } \ } \ \ /* PASTEMAC(ch,fprintm)( stdout, "gemm_ker_var2sl: b1", k, NR, b1, NR, 1, "%4.1f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "gemm_ker_var2sl: a1", MR, k, a1, 1, MR, "%4.1f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "gemm_ker_var2sl: c after", m_cur, n_cur, c11, rs_c, cs_c, "%4.1f", "" ); \ */ \ } INSERT_GENTFUNC_BASIC0( gemm_ker_var2sl ) blis-0.6.1/frame/3/gemm/other/bli_gemm_ker_var5.c000066400000000000000000000237361360743507500215130ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T gemm_fp typedef void (*FUNCPTR_T)( dim_t m, dim_t n, dim_t k, void* alpha, void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, void* beta, void* c, inc_t rs_c, inc_t cs_c, void_fp gemm_ukr ); static FUNCPTR_T GENARRAY(ftypes,gemm_ker_var5); void bli_gemm_ker_var5( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, gemm_t* cntl, gemm_thrinfo_t* thread ) { num_t dt_exec = bli_obj_exec_dt( c ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width( a ); void* buf_a = bli_obj_buffer_at_off( a ); inc_t cs_a = bli_obj_col_stride( a ); dim_t pd_a = bli_obj_panel_dim( a ); inc_t ps_a = bli_obj_panel_stride( a ); void* buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b = bli_obj_row_stride( b ); dim_t pd_b = bli_obj_panel_dim( b ); inc_t ps_b = bli_obj_panel_stride( b ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); obj_t scalar_a; obj_t scalar_b; void* buf_alpha; void* buf_beta; FUNCPTR_T f; func_t* gemm_ukrs; void_fp gemm_ukr; // Detach and multiply the scalars attached to A and B. bli_obj_scalar_detach( a, &scalar_a ); bli_obj_scalar_detach( b, &scalar_b ); bli_mulsc( &scalar_a, &scalar_b ); // Grab the addresses of the internal scalar buffers for the scalar // merged above and the scalar attached to C. buf_alpha = bli_obj_internal_scalar_buffer( &scalar_b ); buf_beta = bli_obj_internal_scalar_buffer( c ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Extract from the context the func_t object containing // the gemm micro-kernel function addresses, and then query the // function address corresponding to the current datatype. gemm_ukrs = bli_cntx_get_l3_ukr( BLIS_GEMM_UKR, cntx ); gemm_ukr = bli_func_get_dt( dt_exec, gemm_ukrs ); // Invoke the function. f( m, n, k, buf_alpha, buf_a, cs_a, pd_a, ps_a, buf_b, rs_b, pd_b, ps_b, buf_beta, buf_c, rs_c, cs_c, gemm_ukr ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname, ukrtype ) \ \ void PASTEMAC(ch,varname)( \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha, \ void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, \ void* beta, \ void* c, inc_t rs_c, inc_t cs_c, \ void_fp gemm_ukr \ ) \ { \ /* Cast the micro-kernel address to its function pointer type. */ \ PASTECH(ch,ukrtype) gemm_ukr_cast = gemm_ukr; \ \ /* Temporary buffer for incremental packing of B. */ \ ctype bp[ PASTEMAC(ch,maxkc) * \ /* !!!! NOTE: This packnr actually needs to be something like maxpacknr if it is to be guaranteed to work in all situations !!!! The right place to define maxpackmr/nr would be in bli_kernel_post_macro_defs.h */ \ PASTEMAC(ch,packnr) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ \ /* Temporary C buffer for edge cases. */ \ ctype ct[ PASTEMAC(ch,maxmr) * \ PASTEMAC(ch,maxnr) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const inc_t rs_ct = 1; \ const inc_t cs_ct = PASTEMAC(ch,maxmr); \ \ /* Alias some constants to simpler names. */ \ const dim_t MR = pd_a; \ const dim_t NR = pd_b; \ const dim_t PACKNR = rs_b; \ \ ctype* restrict one = PASTEMAC(ch,1); \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict a_cast = a; \ ctype* restrict b_cast = b; \ ctype* restrict c_cast = c; \ ctype* restrict alpha_cast = alpha; \ ctype* restrict beta_cast = beta; \ ctype* restrict b1; \ ctype* restrict c1; \ ctype* restrict b2; \ \ dim_t m_iter, m_left; \ dim_t n_iter, n_left; \ dim_t i, j; \ dim_t m_cur; \ dim_t n_cur; \ inc_t rstep_a; \ inc_t cstep_b; \ inc_t rstep_c, cstep_c; \ auxinfo_t aux; \ \ /* Assumptions/assertions: rs_a == 1 cs_a == PACKMR pd_a == MR ps_a == stride to next micro-panel of A rs_b == PACKNR cs_b == 1 pd_b == NR ps_b == stride to next micro-panel of B rs_c == (no assumptions) cs_c == (no assumptions) */ \ \ /* If any dimension is zero, return immediately. */ \ if ( bli_zero_dim3( m, n, k ) ) return; \ \ /* Clear the temporary C buffer in case it has any infs or NaNs. */ \ PASTEMAC(ch,set0s_mxn)( MR, NR, \ ct, rs_ct, cs_ct ); \ \ /* Compute number of primary and leftover components of the m and n dimensions. */ \ n_iter = n / NR; \ n_left = n % NR; \ \ m_iter = m / MR; \ m_left = m % MR; \ \ if ( n_left ) ++n_iter; \ if ( m_left ) ++m_iter; \ \ /* Determine some increments used to step through A, B, and C. */ \ rstep_a = ps_a; \ \ cstep_b = ps_b; \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ \ /* Save the panel strides of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_ps_a( ps_a, &aux ); \ bli_auxinfo_set_ps_b( ps_b, &aux ); \ \ b1 = b_cast; \ c1 = c_cast; \ \ /* Since we pack micro-panels of B incrementaly, one at a time, the address of the next micro-panel of B remains constant. */ \ b2 = bp; \ \ /* Save address of next panel of B to the auxinfo_t object. */ \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = 0; j < n_iter; ++j ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ \ a1 = a_cast; \ c11 = c1; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Incrementally pack a single micro-panel of B. */ \ PASTEMAC(ch,packm_cxk)( BLIS_NO_CONJUGATE, \ n_cur, \ k, \ one, \ b1, 1, rs_b, \ bp, PACKNR ); \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = 0; i < m_iter; ++i ) \ { \ ctype* restrict a2; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = a1 + rstep_a; \ if ( bli_is_last_iter( i, m_iter ) ) \ { \ a2 = a_cast; \ } \ \ /* Save address of next panel of A to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr_cast( k, \ alpha_cast, \ a1, \ bp, \ beta_cast, \ c11, rs_c, cs_c, \ &aux ); \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr_cast( k, \ alpha_cast, \ a1, \ bp, \ zero, \ ct, rs_ct, cs_ct, \ &aux ); \ \ /* Scale the bottom edge of C and add the result from above. */ \ PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ beta_cast, \ c11, rs_c, cs_c ); \ } \ \ a1 += rstep_a; \ c11 += rstep_c; \ } \ \ b1 += cstep_b; \ c1 += cstep_c; \ } \ \ /*PASTEMAC(ch,fprintm)( stdout, "gemm_ker_var5: b1", k, NR, b1, NR, 1, "%4.1f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "gemm_ker_var5: a1", MR, k, a1, 1, MR, "%4.1f", "" );*/ \ } INSERT_GENTFUNC_BASIC( gemm_ker_var5, gemm_ukr_t ) blis-0.6.1/frame/3/gemm/other/bli_gemm_ker_var5.h000066400000000000000000000050731360743507500215120ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype object-based interface. // void bli_gemm_ker_var5( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, gemm_t* cntl, gemm_thrinfo_t* thread ); // // Prototype BLAS-like interfaces. // #undef GENTPROT #define GENTPROT( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname)( \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha, \ void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, \ void* beta, \ void* c, inc_t rs_c, inc_t cs_c, \ void_fp gemm_ukr \ ); INSERT_GENTPROT_BASIC( gemm_ker_var5 ) blis-0.6.1/frame/3/hemm/000077500000000000000000000000001360743507500146435ustar00rootroot00000000000000blis-0.6.1/frame/3/hemm/bli_hemm.h000066400000000000000000000032461360743507500165750ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "bli_hemm_front.h" blis-0.6.1/frame/3/hemm/bli_hemm_front.c000066400000000000000000000151751360743507500200040ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_hemm_front ( side_t side, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl ) { bli_init_once(); obj_t a_local; obj_t b_local; obj_t c_local; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_hemm_check( side, alpha, a, b, beta, c, cntx ); // If alpha is zero, scale by beta and return. if ( bli_obj_equals( alpha, &BLIS_ZERO ) ) { bli_scalm( beta, c ); return; } // Alias A, B, and C in case we need to apply transformations. bli_obj_alias_to( a, &a_local ); bli_obj_alias_to( b, &b_local ); bli_obj_alias_to( c, &c_local ); #ifdef BLIS_DISABLE_HEMM_RIGHT // NOTE: This case casts right-side hemm in terms of left side. This is // necessary when the current subconfiguration uses a gemm microkernel // that assumes that the packing kernel will have already duplicated // (broadcast) element of B in the packed copy of B. Supporting // duplication within the logic that packs micropanels from Hermitian/ // matrices would be ugly, and so we simply don't support it. As a // consequence, those subconfigurations need a way to force the Hermitian // matrix to be on the left (and thus the general matrix to the on the // right). So our solution is that in those cases, the subconfigurations // simply #define BLIS_DISABLE_HEMM_RIGHT. // NOTE: This case casts right-side hemm in terms of left side. This can // lead to the microkernel being executed on an output matrix with the // microkernel's general stride IO case (unless the microkernel supports // both both row and column IO cases as well). // If A is being multiplied from the right, transpose all operands // so that we can perform the computation as if A were being multiplied // from the left. if ( bli_is_right( side ) ) { bli_toggle_side( &side ); bli_obj_induce_trans( &a_local ); bli_obj_induce_trans( &b_local ); bli_obj_induce_trans( &c_local ); } #else // NOTE: This case computes right-side hemm/symm natively by packing // elements of the Hermitian/symmetric matrix A to micropanels of the // right-hand packed matrix operand "B", and elements of the general // matrix B to micropanels of the left-hand packed matrix operand "A". // This code path always gives us the opportunity to transpose the // entire operation so that the effective storage format of the output // matrix matches the microkernel's output preference. Thus, from a // performance perspective, this case is preferred. // An optimization: If C is stored by rows and the micro-kernel prefers // contiguous columns, or if C is stored by columns and the micro-kernel // prefers contiguous rows, transpose the entire operation to allow the // micro-kernel to access elements of C in its preferred manner. //if ( !bli_obj_is_1x1( &c_local ) ) // NOTE: This conditional should NOT // be enabled. See issue #342 comments. if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) ) { bli_toggle_side( &side ); bli_obj_toggle_conj( &a_local ); bli_obj_induce_trans( &b_local ); bli_obj_induce_trans( &c_local ); } // If the Hermitian/symmetric matrix A is being multiplied from the right, // swap A and B so that the Hermitian/symmetric matrix will actually be on // the right. if ( bli_is_right( side ) ) { bli_obj_swap( &a_local, &b_local ); } #endif // Set each alias as the root object. // NOTE: We MUST wait until we are done potentially swapping the objects // before setting the root fields! bli_obj_set_as_root( &a_local ); bli_obj_set_as_root( &b_local ); bli_obj_set_as_root( &c_local ); // Parse and interpret the contents of the rntm_t object to properly // set the ways of parallelism for each loop, and then make any // additional modifications necessary for the current operation. bli_rntm_set_ways_for_op ( BLIS_HEMM, BLIS_LEFT, // ignored for gemm/hemm/symm bli_obj_length( &c_local ), bli_obj_width( &c_local ), bli_obj_width( &a_local ), rntm ); // A sort of hack for communicating the desired pach schemas for A and B // to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and // bli_l3_cntl_create_if()). This allows us to access the schemas from // the control tree, which hopefully reduces some confusion, particularly // in bli_packm_init(). if ( bli_cntx_method( cntx ) == BLIS_NAT ) { bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local ); bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &b_local ); } else // if ( bli_cntx_method( cntx ) != BLIS_NAT ) { pack_t schema_a = bli_cntx_schema_a_block( cntx ); pack_t schema_b = bli_cntx_schema_b_panel( cntx ); bli_obj_set_pack_schema( schema_a, &a_local ); bli_obj_set_pack_schema( schema_b, &b_local ); } // Invoke the internal back-end. bli_l3_thread_decorator ( bli_gemm_int, BLIS_GEMM, // operation family id alpha, &a_local, &b_local, beta, &c_local, cntx, rntm, cntl ); } blis-0.6.1/frame/3/hemm/bli_hemm_front.h000066400000000000000000000035401360743507500200020ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_hemm_front ( side_t side, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl ); blis-0.6.1/frame/3/her2k/000077500000000000000000000000001360743507500147305ustar00rootroot00000000000000blis-0.6.1/frame/3/her2k/bli_her2k.h000066400000000000000000000032471360743507500167500ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "bli_her2k_front.h" blis-0.6.1/frame/3/her2k/bli_her2k_front.c000066400000000000000000000135471360743507500201570ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_her2k_front ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl ) { bli_init_once(); obj_t alpha_conj; obj_t c_local; obj_t a_local; obj_t bh_local; obj_t b_local; obj_t ah_local; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_her2k_check( alpha, a, b, beta, c, cntx ); // If alpha is zero, scale by beta, zero the imaginary components of // the diagonal elements, and return. if ( bli_obj_equals( alpha, &BLIS_ZERO ) ) { bli_scalm( beta, c ); bli_setid( &BLIS_ZERO, c ); return; } // Alias A, B, and C in case we need to apply transformations. bli_obj_alias_to( a, &a_local ); bli_obj_alias_to( b, &b_local ); bli_obj_alias_to( c, &c_local ); bli_obj_set_as_root( &c_local ); // For her2k, the first and second right-hand "B" operands are simply B' // and A'. bli_obj_alias_to( b, &bh_local ); bli_obj_induce_trans( &bh_local ); bli_obj_toggle_conj( &bh_local ); bli_obj_alias_to( a, &ah_local ); bli_obj_induce_trans( &ah_local ); bli_obj_toggle_conj( &ah_local ); // Initialize a conjugated copy of alpha. bli_obj_scalar_init_detached_copy_of( bli_obj_dt( a ), BLIS_CONJUGATE, alpha, &alpha_conj ); // An optimization: If C is stored by rows and the micro-kernel prefers // contiguous columns, or if C is stored by columns and the micro-kernel // prefers contiguous rows, transpose the entire operation to allow the // micro-kernel to access elements of C in its preferred manner. if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) ) { bli_obj_swap( &a_local, &bh_local ); bli_obj_swap( &b_local, &ah_local ); bli_obj_induce_trans( &a_local ); bli_obj_induce_trans( &bh_local ); bli_obj_induce_trans( &b_local ); bli_obj_induce_trans( &ah_local ); bli_obj_induce_trans( &c_local ); } // Parse and interpret the contents of the rntm_t object to properly // set the ways of parallelism for each loop, and then make any // additional modifications necessary for the current operation. bli_rntm_set_ways_for_op ( BLIS_HER2K, BLIS_LEFT, // ignored for her[2]k/syr[2]k bli_obj_length( &c_local ), bli_obj_width( &c_local ), bli_obj_width( &a_local ), rntm ); // A sort of hack for communicating the desired pach schemas for A and B // to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and // bli_l3_cntl_create_if()). This allows us to access the schemas from // the control tree, which hopefully reduces some confusion, particularly // in bli_packm_init(). if ( bli_cntx_method( cntx ) == BLIS_NAT ) { bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local ); bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &bh_local ); bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &b_local ); bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &ah_local ); } else // if ( bli_cntx_method( cntx ) != BLIS_NAT ) { pack_t schema_a = bli_cntx_schema_a_block( cntx ); pack_t schema_b = bli_cntx_schema_b_panel( cntx ); bli_obj_set_pack_schema( schema_a, &a_local ); bli_obj_set_pack_schema( schema_b, &bh_local ); bli_obj_set_pack_schema( schema_a, &b_local ); bli_obj_set_pack_schema( schema_b, &ah_local ); } // Invoke herk twice, using beta only the first time. // Invoke the internal back-end. bli_l3_thread_decorator ( bli_gemm_int, BLIS_HERK, // operation family id alpha, &a_local, &bh_local, beta, &c_local, cntx, rntm, cntl ); bli_l3_thread_decorator ( bli_gemm_int, BLIS_HERK, // operation family id &alpha_conj, &b_local, &ah_local, &BLIS_ONE, &c_local, cntx, rntm, cntl ); // The Hermitian rank-2k product was computed as A*B'+B*A', even for // the diagonal elements. Mathematically, the imaginary components of // diagonal elements of a Hermitian rank-2k product should always be // zero. However, in practice, they sometimes accumulate meaningless // non-zero values. To prevent this, we explicitly set those values // to zero before returning. bli_setid( &BLIS_ZERO, &c_local ); } blis-0.6.1/frame/3/her2k/bli_her2k_front.h000066400000000000000000000035141360743507500201550ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_her2k_front ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl ); blis-0.6.1/frame/3/herk/000077500000000000000000000000001360743507500146465ustar00rootroot00000000000000blis-0.6.1/frame/3/herk/bli_herk.h000066400000000000000000000033011360743507500165730ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "bli_herk_front.h" #include "bli_herk_var.h" blis-0.6.1/frame/3/herk/bli_herk_front.c000066400000000000000000000113571360743507500200100ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_herk_front ( obj_t* alpha, obj_t* a, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl ) { bli_init_once(); obj_t a_local; obj_t ah_local; obj_t c_local; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_herk_check( alpha, a, beta, c, cntx ); // If alpha is zero, scale by beta, zero the imaginary components of // the diagonal elements, and return. if ( bli_obj_equals( alpha, &BLIS_ZERO ) ) { bli_scalm( beta, c ); bli_setid( &BLIS_ZERO, c ); return; } // Alias A and C in case we need to apply transformations. bli_obj_alias_to( a, &a_local ); bli_obj_alias_to( c, &c_local ); bli_obj_set_as_root( &c_local ); // For herk, the right-hand "B" operand is simply A'. bli_obj_alias_to( a, &ah_local ); bli_obj_induce_trans( &ah_local ); bli_obj_toggle_conj( &ah_local ); // An optimization: If C is stored by rows and the micro-kernel prefers // contiguous columns, or if C is stored by columns and the micro-kernel // prefers contiguous rows, transpose the entire operation to allow the // micro-kernel to access elements of C in its preferred manner. if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) ) { bli_obj_toggle_conj( &a_local ); bli_obj_toggle_conj( &ah_local ); bli_obj_induce_trans( &c_local ); } // Parse and interpret the contents of the rntm_t object to properly // set the ways of parallelism for each loop, and then make any // additional modifications necessary for the current operation. bli_rntm_set_ways_for_op ( BLIS_HERK, BLIS_LEFT, // ignored for her[2]k/syr[2]k bli_obj_length( &c_local ), bli_obj_width( &c_local ), bli_obj_width( &a_local ), rntm ); // A sort of hack for communicating the desired pach schemas for A and B // to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and // bli_l3_cntl_create_if()). This allows us to access the schemas from // the control tree, which hopefully reduces some confusion, particularly // in bli_packm_init(). if ( bli_cntx_method( cntx ) == BLIS_NAT ) { bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local ); bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &ah_local ); } else // if ( bli_cntx_method( cntx ) != BLIS_NAT ) { pack_t schema_a = bli_cntx_schema_a_block( cntx ); pack_t schema_b = bli_cntx_schema_b_panel( cntx ); bli_obj_set_pack_schema( schema_a, &a_local ); bli_obj_set_pack_schema( schema_b, &ah_local ); } // Invoke the internal back-end. bli_l3_thread_decorator ( bli_gemm_int, BLIS_HERK, // operation family id alpha, &a_local, &ah_local, beta, &c_local, cntx, rntm, cntl ); // The Hermitian rank-k product was computed as A*A', even for the // diagonal elements. Mathematically, the imaginary components of // diagonal elements of a Hermitian rank-k product should always be // zero. However, in practice, they sometimes accumulate meaningless // non-zero values. To prevent this, we explicitly set those values // to zero before returning. bli_setid( &BLIS_ZERO, &c_local ); } blis-0.6.1/frame/3/herk/bli_herk_front.h000066400000000000000000000034711360743507500200130ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_herk_front ( obj_t* alpha, obj_t* a, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl ); blis-0.6.1/frame/3/herk/bli_herk_l_ker_var2.c000066400000000000000000000417401360743507500207050ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T herk_fp typedef void (*FUNCPTR_T) ( doff_t diagoffc, pack_t schema_a, pack_t schema_b, dim_t m, dim_t n, dim_t k, void* alpha, void* a, inc_t cs_a, inc_t is_a, dim_t pd_a, inc_t ps_a, void* b, inc_t rs_b, inc_t is_b, dim_t pd_b, inc_t ps_b, void* beta, void* c, inc_t rs_c, inc_t cs_c, cntx_t* cntx, rntm_t* rntm, thrinfo_t* thread ); static FUNCPTR_T GENARRAY(ftypes,herk_l_ker_var2); void bli_herk_l_ker_var2 ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { num_t dt_exec = bli_obj_exec_dt( c ); doff_t diagoffc = bli_obj_diag_offset( c ); pack_t schema_a = bli_obj_pack_schema( a ); pack_t schema_b = bli_obj_pack_schema( b ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width( a ); void* buf_a = bli_obj_buffer_at_off( a ); inc_t cs_a = bli_obj_col_stride( a ); inc_t is_a = bli_obj_imag_stride( a ); dim_t pd_a = bli_obj_panel_dim( a ); inc_t ps_a = bli_obj_panel_stride( a ); void* buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b = bli_obj_row_stride( b ); inc_t is_b = bli_obj_imag_stride( b ); dim_t pd_b = bli_obj_panel_dim( b ); inc_t ps_b = bli_obj_panel_stride( b ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); obj_t scalar_a; obj_t scalar_b; void* buf_alpha; void* buf_beta; FUNCPTR_T f; // Detach and multiply the scalars attached to A and B. bli_obj_scalar_detach( a, &scalar_a ); bli_obj_scalar_detach( b, &scalar_b ); bli_mulsc( &scalar_a, &scalar_b ); // Grab the addresses of the internal scalar buffers for the scalar // merged above and the scalar attached to C. buf_alpha = bli_obj_internal_scalar_buffer( &scalar_b ); buf_beta = bli_obj_internal_scalar_buffer( c ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Invoke the function. f( diagoffc, schema_a, schema_b, m, n, k, buf_alpha, buf_a, cs_a, is_a, pd_a, ps_a, buf_b, rs_b, is_b, pd_b, ps_b, buf_beta, buf_c, rs_c, cs_c, cntx, rntm, thread ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ doff_t diagoffc, \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha, \ void* a, inc_t cs_a, inc_t is_a, \ dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, inc_t is_b, \ dim_t pd_b, inc_t ps_b, \ void* beta, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* thread \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ /* Alias some constants to simpler names. */ \ const dim_t MR = pd_a; \ const dim_t NR = pd_b; \ /*const dim_t PACKMR = cs_a;*/ \ /*const dim_t PACKNR = rs_b;*/ \ \ /* Query the context for the micro-kernel address and cast it to its function pointer type. */ \ PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the original C matrix. For example, if C is column-stored, ct will be column-stored as well. */ \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict a_cast = a; \ ctype* restrict b_cast = b; \ ctype* restrict c_cast = c; \ ctype* restrict alpha_cast = alpha; \ ctype* restrict beta_cast = beta; \ ctype* restrict b1; \ ctype* restrict c1; \ \ doff_t diagoffc_ij; \ dim_t m_iter, m_left; \ dim_t n_iter, n_left; \ dim_t m_cur; \ dim_t n_cur; \ dim_t i, j, ip; \ inc_t rstep_a; \ inc_t cstep_b; \ inc_t rstep_c, cstep_c; \ auxinfo_t aux; \ \ /* Assumptions/assertions: rs_a == 1 cs_a == PACKMR pd_a == MR ps_a == stride to next micro-panel of A rs_b == PACKNR cs_b == 1 pd_b == NR ps_b == stride to next micro-panel of B rs_c == (no assumptions) cs_c == (no assumptions) */ \ \ /* If any dimension is zero, return immediately. */ \ if ( bli_zero_dim3( m, n, k ) ) return; \ \ /* Safeguard: If the current panel of C is entirely above the diagonal, it is not stored. So we do nothing. */ \ if ( bli_is_strictly_above_diag_n( diagoffc, m, n ) ) return; \ \ /* If there is a zero region above where the diagonal of C intersects the left edge of the panel, adjust the pointer to C and A and treat this case as if the diagonal offset were zero. */ \ if ( diagoffc < 0 ) \ { \ ip = -diagoffc / MR; \ i = ip * MR; \ m = m - i; \ diagoffc = -diagoffc % MR; \ c_cast = c_cast + (i )*rs_c; \ a_cast = a_cast + (ip )*ps_a; \ } \ \ /* If there is a zero region to the right of where the diagonal of C intersects the bottom of the panel, shrink it to prevent "no-op" iterations from executing. */ \ if ( diagoffc + m < n ) \ { \ n = diagoffc + m; \ } \ \ /* Clear the temporary C buffer in case it has any infs or NaNs. */ \ PASTEMAC(ch,set0s_mxn)( MR, NR, \ ct, rs_ct, cs_ct ); \ \ /* Compute number of primary and leftover components of the m and n dimensions. */ \ n_iter = n / NR; \ n_left = n % NR; \ \ m_iter = m / MR; \ m_left = m % MR; \ \ if ( n_left ) ++n_iter; \ if ( m_left ) ++m_iter; \ \ /* Determine some increments used to step through A, B, and C. */ \ rstep_a = ps_a; \ \ cstep_b = ps_b; \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ \ /* Save the pack schemas of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_schema_a( schema_a, &aux ); \ bli_auxinfo_set_schema_b( schema_b, &aux ); \ \ /* Save the imaginary stride of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( is_a, &aux ); \ bli_auxinfo_set_is_b( is_b, &aux ); \ \ /* Save the desired output datatype (indicating no typecasting). */ \ /*bli_auxinfo_set_dt_on_output( dt, &aux );*/ \ \ /* The 'thread' argument points to the thrinfo_t node for the 2nd (jr) loop around the microkernel. Here we query the thrinfo_t node for the 1st (ir) loop around the microkernel. */ \ thrinfo_t* caucus = bli_thrinfo_sub_node( thread ); \ \ /* Query the number of threads and thread ids for each loop. */ \ dim_t jr_nt = bli_thread_n_way( thread ); \ dim_t jr_tid = bli_thread_work_id( thread ); \ dim_t ir_nt = bli_thread_n_way( caucus ); \ dim_t ir_tid = bli_thread_work_id( caucus ); \ \ dim_t jr_start, jr_end; \ dim_t ir_start, ir_end; \ dim_t jr_inc, ir_inc; \ \ /* Note that we partition the 2nd loop into two regions: the rectangular part of C, and the triangular portion. */ \ dim_t n_iter_rct; \ dim_t n_iter_tri; \ \ if ( bli_is_strictly_below_diag_n( diagoffc, m, n ) ) \ { \ /* If the entire panel of C does not intersect the diagonal, there is no triangular region, and therefore we can skip the second set of loops. */ \ n_iter_rct = n_iter; \ n_iter_tri = 0; \ } \ else \ { \ /* If the panel of C does intersect the diagonal, compute the number of iterations in the rectangular region by dividing NR into the diagonal offset. Any remainder from this integer division is discarded, which is what we want. That is, we want the rectangular region to contain as many columns of whole microtiles as possible without including any microtiles that intersect the diagonal. The number of iterations in the triangular (or trapezoidal) region is computed as the remaining number of iterations in the n dimension. */ \ n_iter_rct = diagoffc / NR; \ n_iter_tri = n_iter - n_iter_rct; \ } \ \ /* Determine the thread range and increment for the 2nd and 1st loops for the initial rectangular region of C (if it exists). NOTE: The definition of bli_thread_range_jrir() will depend on whether slab or round-robin partitioning was requested at configure-time. */ \ bli_thread_range_jrir( thread, n_iter_rct, 1, FALSE, &jr_start, &jr_end, &jr_inc ); \ bli_thread_range_jrir( caucus, m_iter, 1, FALSE, &ir_start, &ir_end, &ir_inc ); \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = jr_start; j < jr_end; j += jr_inc ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ b1 = b_cast + j * cstep_b; \ c1 = c_cast + j * cstep_c; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ /* Interior loop over the m dimension (MR rows at a time). */ \ for ( i = ir_start; i < ir_end; i += ir_inc ) \ { \ ctype* restrict a2; \ \ a1 = a_cast + i * rstep_a; \ c11 = c1 + i * rstep_c; \ \ /* No need to compute the diagonal offset for the rectangular region. */ \ /*diagoffc_ij = diagoffc - (doff_t)j*NR + (doff_t)i*MR;*/ \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = bli_herk_get_next_a_upanel( a1, rstep_a, ir_inc ); \ if ( bli_is_last_iter( i, m_iter, ir_tid, ir_nt ) ) \ { \ a2 = a_cast; \ b2 = bli_herk_get_next_b_upanel( b1, cstep_b, jr_inc ); \ if ( bli_is_last_iter( j, n_iter, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* If the diagonal intersects the current MR x NR submatrix, we compute it the temporary buffer and then add in the elements on or below the diagonal. Otherwise, if the submatrix is strictly below the diagonal, we compute and store as we normally would. And if we're strictly above the diagonal, we do nothing and continue. */ \ { \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ beta_cast, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Scale the edge of C and add the result. */ \ PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ beta_cast, \ c11, rs_c, cs_c ); \ } \ } \ } \ } \ \ /* If there is no triangular region, then we're done. */ \ if ( n_iter_tri == 0 ) return; \ \ /* Use round-robin assignment of micropanels to threads in the 2nd loop and the default (slab or rr) partitioning in the 1st loop for the remaining triangular region of C. */ \ bli_thread_range_jrir_rr( thread, n_iter_tri, 1, FALSE, &jr_start, &jr_end, &jr_inc ); \ \ /* Advance the start and end iteration offsets for the triangular region by the number of iterations used for the rectangular region. */ \ jr_start += n_iter_rct; \ jr_end += n_iter_rct; \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = jr_start; j < jr_end; j += jr_inc ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ b1 = b_cast + j * cstep_b; \ c1 = c_cast + j * cstep_c; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ /* Interior loop over the m dimension (MR rows at a time). */ \ for ( i = ir_start; i < ir_end; i += ir_inc ) \ { \ ctype* restrict a2; \ \ a1 = a_cast + i * rstep_a; \ c11 = c1 + i * rstep_c; \ \ /* Compute the diagonal offset for the submatrix at (i,j). */ \ diagoffc_ij = diagoffc - (doff_t)j*NR + (doff_t)i*MR; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = bli_herk_get_next_a_upanel( a1, rstep_a, ir_inc ); \ if ( bli_is_last_iter( i, m_iter, ir_tid, ir_nt ) ) \ { \ a2 = a_cast; \ b2 = bli_herk_get_next_b_upanel( b1, cstep_b, jr_inc ); \ if ( bli_is_last_iter_rr( j, n_iter, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* If the diagonal intersects the current MR x NR submatrix, we compute it the temporary buffer and then add in the elements on or below the diagonal. Otherwise, if the submatrix is strictly below the diagonal, we compute and store as we normally would. And if we're strictly above the diagonal, we do nothing and continue. */ \ if ( bli_intersects_diag_n( diagoffc_ij, m_cur, n_cur ) ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Scale C and add the result to only the stored part. */ \ PASTEMAC(ch,xpbys_mxn_l)( diagoffc_ij, \ m_cur, n_cur, \ ct, rs_ct, cs_ct, \ beta_cast, \ c11, rs_c, cs_c ); \ } \ else if ( bli_is_strictly_below_diag_n( diagoffc_ij, m_cur, n_cur ) ) \ { \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ beta_cast, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Scale the edge of C and add the result. */ \ PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ beta_cast, \ c11, rs_c, cs_c ); \ } \ } \ } \ } \ } INSERT_GENTFUNC_BASIC0( herk_l_ker_var2 ) blis-0.6.1/frame/3/herk/bli_herk_u_ker_var2.c000066400000000000000000000423141360743507500207140ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T herk_fp typedef void (*FUNCPTR_T) ( doff_t diagoffc, pack_t schema_a, pack_t schema_b, dim_t m, dim_t n, dim_t k, void* alpha, void* a, inc_t cs_a, inc_t is_a, dim_t pd_a, inc_t ps_a, void* b, inc_t rs_b, inc_t is_b, dim_t pd_b, inc_t ps_b, void* beta, void* c, inc_t rs_c, inc_t cs_c, cntx_t* cntx, rntm_t* rntm, thrinfo_t* thread ); static FUNCPTR_T GENARRAY(ftypes,herk_u_ker_var2); void bli_herk_u_ker_var2 ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { num_t dt_exec = bli_obj_exec_dt( c ); doff_t diagoffc = bli_obj_diag_offset( c ); pack_t schema_a = bli_obj_pack_schema( a ); pack_t schema_b = bli_obj_pack_schema( b ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width( a ); void* buf_a = bli_obj_buffer_at_off( a ); inc_t cs_a = bli_obj_col_stride( a ); inc_t is_a = bli_obj_imag_stride( a ); dim_t pd_a = bli_obj_panel_dim( a ); inc_t ps_a = bli_obj_panel_stride( a ); void* buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b = bli_obj_row_stride( b ); inc_t is_b = bli_obj_imag_stride( b ); dim_t pd_b = bli_obj_panel_dim( b ); inc_t ps_b = bli_obj_panel_stride( b ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); obj_t scalar_a; obj_t scalar_b; void* buf_alpha; void* buf_beta; FUNCPTR_T f; // Detach and multiply the scalars attached to A and B. bli_obj_scalar_detach( a, &scalar_a ); bli_obj_scalar_detach( b, &scalar_b ); bli_mulsc( &scalar_a, &scalar_b ); // Grab the addresses of the internal scalar buffers for the scalar // merged above and the scalar attached to C. buf_alpha = bli_obj_internal_scalar_buffer( &scalar_b ); buf_beta = bli_obj_internal_scalar_buffer( c ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Invoke the function. f( diagoffc, schema_a, schema_b, m, n, k, buf_alpha, buf_a, cs_a, is_a, pd_a, ps_a, buf_b, rs_b, is_b, pd_b, ps_b, buf_beta, buf_c, rs_c, cs_c, cntx, rntm, thread ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ doff_t diagoffc, \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha, \ void* a, inc_t cs_a, inc_t is_a, \ dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, inc_t is_b, \ dim_t pd_b, inc_t ps_b, \ void* beta, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* thread \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ /* Alias some constants to simpler names. */ \ const dim_t MR = pd_a; \ const dim_t NR = pd_b; \ /*const dim_t PACKMR = cs_a;*/ \ /*const dim_t PACKNR = rs_b;*/ \ \ /* Query the context for the micro-kernel address and cast it to its function pointer type. */ \ PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the original C matrix. For example, if C is column-stored, ct will be column-stored as well. */ \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict a_cast = a; \ ctype* restrict b_cast = b; \ ctype* restrict c_cast = c; \ ctype* restrict alpha_cast = alpha; \ ctype* restrict beta_cast = beta; \ ctype* restrict b1; \ ctype* restrict c1; \ \ doff_t diagoffc_ij; \ dim_t m_iter, m_left; \ dim_t n_iter, n_left; \ dim_t m_cur; \ dim_t n_cur; \ dim_t i, j, jp; \ inc_t rstep_a; \ inc_t cstep_b; \ inc_t rstep_c, cstep_c; \ auxinfo_t aux; \ \ /* Assumptions/assertions: rs_a == 1 cs_a == PACKMR pd_a == MR ps_a == stride to next micro-panel of A rs_b == PACKNR cs_b == 1 pd_b == NR ps_b == stride to next micro-panel of B rs_c == (no assumptions) cs_c == (no assumptions) */ \ \ /* If any dimension is zero, return immediately. */ \ if ( bli_zero_dim3( m, n, k ) ) return; \ \ /* Safeguard: If the current panel of C is entirely below the diagonal, it is not stored. So we do nothing. */ \ if ( bli_is_strictly_below_diag_n( diagoffc, m, n ) ) return; \ \ /* If there is a zero region to the left of where the diagonal of C intersects the top edge of the panel, adjust the pointer to C and B and treat this case as if the diagonal offset were zero. NOTE: It's possible that after this pruning that the diagonal offset is still positive (though it is guaranteed to be less than NR). */ \ if ( diagoffc > 0 ) \ { \ jp = diagoffc / NR; \ j = jp * NR; \ n = n - j; \ diagoffc = diagoffc % NR; \ c_cast = c_cast + (j )*cs_c; \ b_cast = b_cast + (jp )*ps_b; \ } \ \ /* If there is a zero region below where the diagonal of C intersects the right edge of the panel, shrink it to prevent "no-op" iterations from executing. */ \ if ( -diagoffc + n < m ) \ { \ m = -diagoffc + n; \ } \ \ /* Clear the temporary C buffer in case it has any infs or NaNs. */ \ PASTEMAC(ch,set0s_mxn)( MR, NR, \ ct, rs_ct, cs_ct ); \ \ /* Compute number of primary and leftover components of the m and n dimensions. */ \ n_iter = n / NR; \ n_left = n % NR; \ \ m_iter = m / MR; \ m_left = m % MR; \ \ if ( n_left ) ++n_iter; \ if ( m_left ) ++m_iter; \ \ /* Determine some increments used to step through A, B, and C. */ \ rstep_a = ps_a; \ \ cstep_b = ps_b; \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ \ /* Save the pack schemas of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_schema_a( schema_a, &aux ); \ bli_auxinfo_set_schema_b( schema_b, &aux ); \ \ /* Save the imaginary stride of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( is_a, &aux ); \ bli_auxinfo_set_is_b( is_b, &aux ); \ \ /* Save the desired output datatype (indicating no typecasting). */ \ /*bli_auxinfo_set_dt_on_output( dt, &aux );*/ \ \ /* The 'thread' argument points to the thrinfo_t node for the 2nd (jr) loop around the microkernel. Here we query the thrinfo_t node for the 1st (ir) loop around the microkernel. */ \ thrinfo_t* caucus = bli_thrinfo_sub_node( thread ); \ \ /* Query the number of threads and thread ids for each loop. */ \ dim_t jr_nt = bli_thread_n_way( thread ); \ dim_t jr_tid = bli_thread_work_id( thread ); \ dim_t ir_nt = bli_thread_n_way( caucus ); \ dim_t ir_tid = bli_thread_work_id( caucus ); \ \ dim_t jr_start, jr_end; \ dim_t ir_start, ir_end; \ dim_t jr_inc, ir_inc; \ \ /* Note that we partition the 2nd loop into two regions: the triangular part of C, and the rectangular portion. */ \ dim_t n_iter_tri; \ dim_t n_iter_rct; \ \ if ( bli_is_strictly_above_diag_n( diagoffc, m, n ) ) \ { \ /* If the entire panel of C does not intersect the diagonal, there is no triangular region, and therefore we can skip the first set of loops. */ \ n_iter_tri = 0; \ n_iter_rct = n_iter; \ } \ else \ { \ /* If the panel of C does intersect the diagonal, compute the number of iterations in the triangular (or trapezoidal) region by dividing NR into the number of rows in C. A non-zero remainder means we need to add one additional iteration. That is, we want the triangular region to contain as few columns of whole microtiles as possible while still including all microtiles that intersect the diagonal. The number of iterations in the rectangular region is computed as the remaining number of iterations in the n dimension. */ \ n_iter_tri = ( m + diagoffc ) / NR + ( ( m + diagoffc ) % NR ? 1 : 0 ); \ n_iter_rct = n_iter - n_iter_tri; \ } \ \ /* Use round-robin assignment of micropanels to threads in the 2nd loop and the default (slab or rr) partitioning in the 1st loop for the initial triangular region of C (if it exists). */ \ bli_thread_range_jrir_rr( thread, n_iter_tri, 1, FALSE, &jr_start, &jr_end, &jr_inc ); \ bli_thread_range_jrir ( caucus, m_iter, 1, FALSE, &ir_start, &ir_end, &ir_inc ); \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = jr_start; j < jr_end; j += jr_inc ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ b1 = b_cast + j * cstep_b; \ c1 = c_cast + j * cstep_c; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ /* Interior loop over the m dimension (MR rows at a time). */ \ for ( i = ir_start; i < ir_end; i += ir_inc ) \ { \ ctype* restrict a2; \ \ a1 = a_cast + i * rstep_a; \ c11 = c1 + i * rstep_c; \ \ /* Compute the diagonal offset for the submatrix at (i,j). */ \ diagoffc_ij = diagoffc - (doff_t)j*NR + (doff_t)i*MR; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = bli_herk_get_next_a_upanel( a1, rstep_a, ir_inc ); \ if ( bli_is_last_iter( i, m_iter, ir_tid, ir_nt ) ) \ { \ a2 = a_cast; \ b2 = bli_herk_get_next_b_upanel( b1, cstep_b, jr_inc ); \ if ( bli_is_last_iter_rr( j, n_iter, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* If the diagonal intersects the current MR x NR submatrix, we compute it the temporary buffer and then add in the elements on or below the diagonal. Otherwise, if the submatrix is strictly above the diagonal, we compute and store as we normally would. And if we're strictly below the diagonal, we do nothing and continue. */ \ if ( bli_intersects_diag_n( diagoffc_ij, m_cur, n_cur ) ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Scale C and add the result to only the stored part. */ \ PASTEMAC(ch,xpbys_mxn_u)( diagoffc_ij, \ m_cur, n_cur, \ ct, rs_ct, cs_ct, \ beta_cast, \ c11, rs_c, cs_c ); \ } \ else if ( bli_is_strictly_above_diag_n( diagoffc_ij, m_cur, n_cur ) ) \ { \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ beta_cast, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Scale the edge of C and add the result. */ \ PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ beta_cast, \ c11, rs_c, cs_c ); \ } \ } \ } \ } \ \ /* If there is no rectangular region, then we're done. */ \ if ( n_iter_rct == 0 ) return; \ \ /* Determine the thread range and increment for the 2nd loop of the remaining rectangular region of C (and also use default partitioning for the 1st loop). NOTE: The definition of bli_thread_range_jrir() will depend on whether slab or round-robin partitioning was requested at configure-time. */ \ bli_thread_range_jrir( thread, n_iter_rct, 1, FALSE, &jr_start, &jr_end, &jr_inc ); \ \ /* Advance the start and end iteration offsets for the rectangular region by the number of iterations used for the triangular region. */ \ jr_start += n_iter_tri; \ jr_end += n_iter_tri; \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = jr_start; j < jr_end; j += jr_inc ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ b1 = b_cast + j * cstep_b; \ c1 = c_cast + j * cstep_c; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ /* Interior loop over the m dimension (MR rows at a time). */ \ for ( i = ir_start; i < ir_end; i += ir_inc ) \ { \ ctype* restrict a2; \ \ a1 = a_cast + i * rstep_a; \ c11 = c1 + i * rstep_c; \ \ /* No need to compute the diagonal offset for the rectangular region. */ \ /*diagoffc_ij = diagoffc - (doff_t)j*NR + (doff_t)i*MR;*/ \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = bli_herk_get_next_a_upanel( a1, rstep_a, ir_inc ); \ if ( bli_is_last_iter( i, m_iter, ir_tid, ir_nt ) ) \ { \ a2 = a_cast; \ b2 = bli_herk_get_next_b_upanel( b1, cstep_b, jr_inc ); \ if ( bli_is_last_iter( j, n_iter, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* If the diagonal intersects the current MR x NR submatrix, we compute it the temporary buffer and then add in the elements on or below the diagonal. Otherwise, if the submatrix is strictly above the diagonal, we compute and store as we normally would. And if we're strictly below the diagonal, we do nothing and continue. */ \ { \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ beta_cast, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Scale the edge of C and add the result. */ \ PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ beta_cast, \ c11, rs_c, cs_c ); \ } \ } \ } \ } \ } INSERT_GENTFUNC_BASIC0( herk_u_ker_var2 ) blis-0.6.1/frame/3/herk/bli_herk_var.h000066400000000000000000000056451360743507500174600ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype object-based interfaces. // #undef GENPROT #define GENPROT( opname ) \ \ void PASTEMAC0(opname) \ ( \ obj_t* a, \ obj_t* ah, \ obj_t* c, \ cntx_t* cntx, \ rntm_t* rntm, \ cntl_t* cntl, \ thrinfo_t* thread \ ); //GENPROT( herk_blk_var1 ) //GENPROT( herk_blk_var2 ) //GENPROT( herk_blk_var3 ) GENPROT( herk_x_ker_var2 ) GENPROT( herk_l_ker_var2 ) GENPROT( herk_u_ker_var2 ) //GENPROT( herk_packa ) //GENPROT( herk_packb ) // // Prototype BLAS-like interfaces with void pointer operands. // #undef GENTPROT #define GENTPROT( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ doff_t diagoffc, \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha, \ void* a, inc_t cs_a, inc_t is_a, \ dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, inc_t is_b, \ dim_t pd_b, inc_t ps_b, \ void* beta, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* thread \ ); INSERT_GENTPROT_BASIC0( herk_l_ker_var2 ) INSERT_GENTPROT_BASIC0( herk_u_ker_var2 ) blis-0.6.1/frame/3/herk/bli_herk_x_ker_var2.c000066400000000000000000000045251360743507500207210ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" static gemm_var_oft vars[2] = { bli_herk_l_ker_var2, bli_herk_u_ker_var2, }; void bli_herk_x_ker_var2 ( obj_t* a, obj_t* ah, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { bool_t uplo; gemm_var_oft f; // Set a bool based on the uplo field of C's root object. if ( bli_obj_root_is_lower( c ) ) uplo = 0; else uplo = 1; // Index into the variant array to extract the correct function pointer. f = vars[uplo]; // Call the macrokernel. f ( a, ah, c, cntx, rntm, cntl, thread ); } blis-0.6.1/frame/3/herk/other/000077500000000000000000000000001360743507500157675ustar00rootroot00000000000000blis-0.6.1/frame/3/herk/other/bli_herk_l_ker_var2.1looprr.c000066400000000000000000000310411360743507500234140ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T herk_fp typedef void (*FUNCPTR_T) ( doff_t diagoffc, pack_t schema_a, pack_t schema_b, dim_t m, dim_t n, dim_t k, void* alpha, void* a, inc_t cs_a, inc_t is_a, dim_t pd_a, inc_t ps_a, void* b, inc_t rs_b, inc_t is_b, dim_t pd_b, inc_t ps_b, void* beta, void* c, inc_t rs_c, inc_t cs_c, cntx_t* cntx, rntm_t* rntm, thrinfo_t* thread ); static FUNCPTR_T GENARRAY(ftypes,herk_l_ker_var2); void bli_herk_l_ker_var2 ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { num_t dt_exec = bli_obj_exec_dt( c ); doff_t diagoffc = bli_obj_diag_offset( c ); pack_t schema_a = bli_obj_pack_schema( a ); pack_t schema_b = bli_obj_pack_schema( b ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width( a ); void* buf_a = bli_obj_buffer_at_off( a ); inc_t cs_a = bli_obj_col_stride( a ); inc_t is_a = bli_obj_imag_stride( a ); dim_t pd_a = bli_obj_panel_dim( a ); inc_t ps_a = bli_obj_panel_stride( a ); void* buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b = bli_obj_row_stride( b ); inc_t is_b = bli_obj_imag_stride( b ); dim_t pd_b = bli_obj_panel_dim( b ); inc_t ps_b = bli_obj_panel_stride( b ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); obj_t scalar_a; obj_t scalar_b; void* buf_alpha; void* buf_beta; FUNCPTR_T f; // Detach and multiply the scalars attached to A and B. bli_obj_scalar_detach( a, &scalar_a ); bli_obj_scalar_detach( b, &scalar_b ); bli_mulsc( &scalar_a, &scalar_b ); // Grab the addresses of the internal scalar buffers for the scalar // merged above and the scalar attached to C. buf_alpha = bli_obj_internal_scalar_buffer( &scalar_b ); buf_beta = bli_obj_internal_scalar_buffer( c ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Invoke the function. f( diagoffc, schema_a, schema_b, m, n, k, buf_alpha, buf_a, cs_a, is_a, pd_a, ps_a, buf_b, rs_b, is_b, pd_b, ps_b, buf_beta, buf_c, rs_c, cs_c, cntx, rntm, thread ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ doff_t diagoffc, \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha, \ void* a, inc_t cs_a, inc_t is_a, \ dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, inc_t is_b, \ dim_t pd_b, inc_t ps_b, \ void* beta, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* thread \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ /* Alias some constants to simpler names. */ \ const dim_t MR = pd_a; \ const dim_t NR = pd_b; \ /*const dim_t PACKMR = cs_a;*/ \ /*const dim_t PACKNR = rs_b;*/ \ \ /* Query the context for the micro-kernel address and cast it to its function pointer type. */ \ PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the original C matrix. For example, if C is column-stored, ct will be column-stored as well. */ \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict a_cast = a; \ ctype* restrict b_cast = b; \ ctype* restrict c_cast = c; \ ctype* restrict alpha_cast = alpha; \ ctype* restrict beta_cast = beta; \ ctype* restrict b1; \ ctype* restrict c1; \ \ doff_t diagoffc_ij; \ dim_t m_iter, m_left; \ dim_t n_iter, n_left; \ dim_t m_cur; \ dim_t n_cur; \ dim_t i, j, ip; \ inc_t rstep_a; \ inc_t cstep_b; \ inc_t rstep_c, cstep_c; \ auxinfo_t aux; \ \ /* Assumptions/assertions: rs_a == 1 cs_a == PACKMR pd_a == MR ps_a == stride to next micro-panel of A rs_b == PACKNR cs_b == 1 pd_b == NR ps_b == stride to next micro-panel of B rs_c == (no assumptions) cs_c == (no assumptions) */ \ \ /* If any dimension is zero, return immediately. */ \ if ( bli_zero_dim3( m, n, k ) ) return; \ \ /* Safeguard: If the current panel of C is entirely above the diagonal, it is not stored. So we do nothing. */ \ if ( bli_is_strictly_above_diag_n( diagoffc, m, n ) ) return; \ \ /* If there is a zero region above where the diagonal of C intersects the left edge of the panel, adjust the pointer to C and A and treat this case as if the diagonal offset were zero. */ \ if ( diagoffc < 0 ) \ { \ ip = -diagoffc / MR; \ i = ip * MR; \ m = m - i; \ diagoffc = -diagoffc % MR; \ c_cast = c_cast + (i )*rs_c; \ a_cast = a_cast + (ip )*ps_a; \ } \ \ /* If there is a zero region to the right of where the diagonal of C intersects the bottom of the panel, shrink it to prevent "no-op" iterations from executing. */ \ if ( diagoffc + m < n ) \ { \ n = diagoffc + m; \ } \ \ /* Clear the temporary C buffer in case it has any infs or NaNs. */ \ PASTEMAC(ch,set0s_mxn)( MR, NR, \ ct, rs_ct, cs_ct ); \ \ /* Compute number of primary and leftover components of the m and n dimensions. */ \ n_iter = n / NR; \ n_left = n % NR; \ \ m_iter = m / MR; \ m_left = m % MR; \ \ if ( n_left ) ++n_iter; \ if ( m_left ) ++m_iter; \ \ /* Determine some increments used to step through A, B, and C. */ \ rstep_a = ps_a; \ \ cstep_b = ps_b; \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ \ /* Save the pack schemas of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_schema_a( schema_a, &aux ); \ bli_auxinfo_set_schema_b( schema_b, &aux ); \ \ /* Save the imaginary stride of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( is_a, &aux ); \ bli_auxinfo_set_is_b( is_b, &aux ); \ \ /* The 'thread' argument points to the thrinfo_t node for the 2nd (jr) loop around the microkernel. Here we query the thrinfo_t node for the 1st (ir) loop around the microkernel. */ \ thrinfo_t* caucus = bli_thrinfo_sub_node( thread ); \ \ /* Query the number of threads and thread ids for each loop. */ \ dim_t jr_nt = bli_thread_n_way( thread ); \ dim_t jr_tid = bli_thread_work_id( thread ); \ dim_t ir_nt = bli_thread_n_way( caucus ); \ dim_t ir_tid = bli_thread_work_id( caucus ); \ \ dim_t jr_start, jr_end; \ dim_t ir_start, ir_end; \ dim_t jr_inc, ir_inc; \ \ /* Use interleaved (round robin) assignment of micropanels to threads in the 2nd and 1st loops. */ \ bli_thread_range_jrir_rr( thread, n_iter, 1, FALSE, &jr_start, &jr_end, &jr_inc ); \ bli_thread_range_jrir_rr( caucus, m_iter, 1, FALSE, &ir_start, &ir_end, &ir_inc ); \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = jr_start; j < jr_end; j += jr_inc ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ b1 = b_cast + j * cstep_b; \ c1 = c_cast + j * cstep_c; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ /* Interior loop over the m dimension (MR rows at a time). */ \ for ( i = ir_start; i < ir_end; i += ir_inc ) \ { \ ctype* restrict a2; \ \ a1 = a_cast + i * rstep_a; \ c11 = c1 + i * rstep_c; \ \ /* Compute the diagonal offset for the submatrix at (i,j). */ \ diagoffc_ij = diagoffc - (doff_t)j*NR + (doff_t)i*MR; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = bli_herk_get_next_a_upanel( a1, rstep_a, ir_inc ); \ if ( bli_is_last_iter( i, m_iter, ir_tid, ir_nt ) ) \ { \ a2 = a_cast; \ b2 = bli_herk_get_next_b_upanel( b1, cstep_b, jr_inc ); \ if ( bli_is_last_iter( j, n_iter, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* If the diagonal intersects the current MR x NR submatrix, we compute it the temporary buffer and then add in the elements on or below the diagonal. Otherwise, if the submatrix is strictly below the diagonal, we compute and store as we normally would. And if we're strictly above the diagonal, we do nothing and continue. */ \ if ( bli_intersects_diag_n( diagoffc_ij, m_cur, n_cur ) ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Scale C and add the result to only the stored part. */ \ PASTEMAC(ch,xpbys_mxn_l)( diagoffc_ij, \ m_cur, n_cur, \ ct, rs_ct, cs_ct, \ beta_cast, \ c11, rs_c, cs_c ); \ } \ else if ( bli_is_strictly_below_diag_n( diagoffc_ij, m_cur, n_cur ) ) \ { \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ beta_cast, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Scale the edge of C and add the result. */ \ PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ beta_cast, \ c11, rs_c, cs_c ); \ } \ } \ } \ } \ } INSERT_GENTFUNC_BASIC0( herk_l_ker_var2 ) blis-0.6.1/frame/3/herk/other/bli_herk_l_ker_var2.c000066400000000000000000000300531360743507500220210ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T herk_fp typedef void (*FUNCPTR_T) ( doff_t diagoffc, pack_t schema_a, pack_t schema_b, dim_t m, dim_t n, dim_t k, void* alpha, void* a, inc_t cs_a, inc_t is_a, dim_t pd_a, inc_t ps_a, void* b, inc_t rs_b, inc_t is_b, dim_t pd_b, inc_t ps_b, void* beta, void* c, inc_t rs_c, inc_t cs_c, cntx_t* cntx, rntm_t* rntm, thrinfo_t* thread ); static FUNCPTR_T GENARRAY(ftypes,herk_l_ker_var2); void bli_herk_l_ker_var2 ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { num_t dt_exec = bli_obj_exec_dt( c ); doff_t diagoffc = bli_obj_diag_offset( c ); pack_t schema_a = bli_obj_pack_schema( a ); pack_t schema_b = bli_obj_pack_schema( b ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width( a ); void* buf_a = bli_obj_buffer_at_off( a ); inc_t cs_a = bli_obj_col_stride( a ); inc_t is_a = bli_obj_imag_stride( a ); dim_t pd_a = bli_obj_panel_dim( a ); inc_t ps_a = bli_obj_panel_stride( a ); void* buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b = bli_obj_row_stride( b ); inc_t is_b = bli_obj_imag_stride( b ); dim_t pd_b = bli_obj_panel_dim( b ); inc_t ps_b = bli_obj_panel_stride( b ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); obj_t scalar_a; obj_t scalar_b; void* buf_alpha; void* buf_beta; FUNCPTR_T f; // Detach and multiply the scalars attached to A and B. bli_obj_scalar_detach( a, &scalar_a ); bli_obj_scalar_detach( b, &scalar_b ); bli_mulsc( &scalar_a, &scalar_b ); // Grab the addresses of the internal scalar buffers for the scalar // merged above and the scalar attached to C. buf_alpha = bli_obj_internal_scalar_buffer( &scalar_b ); buf_beta = bli_obj_internal_scalar_buffer( c ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Invoke the function. f( diagoffc, schema_a, schema_b, m, n, k, buf_alpha, buf_a, cs_a, is_a, pd_a, ps_a, buf_b, rs_b, is_b, pd_b, ps_b, buf_beta, buf_c, rs_c, cs_c, cntx, rntm, thread ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ doff_t diagoffc, \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha, \ void* a, inc_t cs_a, inc_t is_a, \ dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, inc_t is_b, \ dim_t pd_b, inc_t ps_b, \ void* beta, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* thread \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ /* Alias some constants to simpler names. */ \ const dim_t MR = pd_a; \ const dim_t NR = pd_b; \ /*const dim_t PACKMR = cs_a;*/ \ /*const dim_t PACKNR = rs_b;*/ \ \ /* Query the context for the micro-kernel address and cast it to its function pointer type. */ \ PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the original C matrix. For example, if C is column-stored, ct will be column-stored as well. */ \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict a_cast = a; \ ctype* restrict b_cast = b; \ ctype* restrict c_cast = c; \ ctype* restrict alpha_cast = alpha; \ ctype* restrict beta_cast = beta; \ ctype* restrict b1; \ ctype* restrict c1; \ \ doff_t diagoffc_ij; \ dim_t m_iter, m_left; \ dim_t n_iter, n_left; \ dim_t m_cur; \ dim_t n_cur; \ dim_t i, j, ip; \ inc_t rstep_a; \ inc_t cstep_b; \ inc_t rstep_c, cstep_c; \ auxinfo_t aux; \ \ /* Assumptions/assertions: rs_a == 1 cs_a == PACKMR pd_a == MR ps_a == stride to next micro-panel of A rs_b == PACKNR cs_b == 1 pd_b == NR ps_b == stride to next micro-panel of B rs_c == (no assumptions) cs_c == (no assumptions) */ \ \ /* If any dimension is zero, return immediately. */ \ if ( bli_zero_dim3( m, n, k ) ) return; \ \ /* Safeguard: If the current panel of C is entirely above the diagonal, it is not stored. So we do nothing. */ \ if ( bli_is_strictly_above_diag_n( diagoffc, m, n ) ) return; \ \ /* If there is a zero region above where the diagonal of C intersects the left edge of the panel, adjust the pointer to C and A and treat this case as if the diagonal offset were zero. */ \ if ( diagoffc < 0 ) \ { \ ip = -diagoffc / MR; \ i = ip * MR; \ m = m - i; \ diagoffc = -diagoffc % MR; \ c_cast = c_cast + (i )*rs_c; \ a_cast = a_cast + (ip )*ps_a; \ } \ \ /* If there is a zero region to the right of where the diagonal of C intersects the bottom of the panel, shrink it to prevent "no-op" iterations from executing. */ \ if ( diagoffc + m < n ) \ { \ n = diagoffc + m; \ } \ \ /* Clear the temporary C buffer in case it has any infs or NaNs. */ \ PASTEMAC(ch,set0s_mxn)( MR, NR, \ ct, rs_ct, cs_ct ); \ \ /* Compute number of primary and leftover components of the m and n dimensions. */ \ n_iter = n / NR; \ n_left = n % NR; \ \ m_iter = m / MR; \ m_left = m % MR; \ \ if ( n_left ) ++n_iter; \ if ( m_left ) ++m_iter; \ \ /* Determine some increments used to step through A, B, and C. */ \ rstep_a = ps_a; \ \ cstep_b = ps_b; \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ \ /* Save the pack schemas of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_schema_a( schema_a, &aux ); \ bli_auxinfo_set_schema_b( schema_b, &aux ); \ \ /* Save the imaginary stride of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( is_a, &aux ); \ bli_auxinfo_set_is_b( is_b, &aux ); \ \ b1 = b_cast; \ c1 = c_cast; \ \ thrinfo_t* caucus = bli_thrinfo_sub_node( thread ); \ dim_t jr_num_threads = bli_thread_n_way( thread ); \ dim_t jr_thread_id = bli_thread_work_id( thread ); \ dim_t ir_num_threads = bli_thread_n_way( caucus ); \ dim_t ir_thread_id = bli_thread_work_id( caucus ); \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = jr_thread_id; j < n_iter; j += jr_num_threads ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ b1 = b_cast + j * cstep_b; \ c1 = c_cast + j * cstep_c; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ /* Interior loop over the m dimension (MR rows at a time). */ \ for ( i = ir_thread_id; i < m_iter; i += ir_num_threads ) \ { \ ctype* restrict a2; \ \ a1 = a_cast + i * rstep_a; \ c11 = c1 + i * rstep_c; \ \ /* Compute the diagonal offset for the submatrix at (i,j). */ \ diagoffc_ij = diagoffc - (doff_t)j*NR + (doff_t)i*MR; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = bli_herk_get_next_a_upanel( caucus, a1, rstep_a ); \ if ( bli_is_last_iter( i, m_iter, ir_thread_id, ir_num_threads ) ) \ { \ a2 = a_cast; \ b2 = bli_herk_get_next_b_upanel( thread, b1, cstep_b ); \ if ( bli_is_last_iter( j, n_iter, jr_thread_id, jr_num_threads ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* If the diagonal intersects the current MR x NR submatrix, we compute it the temporary buffer and then add in the elements on or below the diagonal. Otherwise, if the submatrix is strictly below the diagonal, we compute and store as we normally would. And if we're strictly above the diagonal, we do nothing and continue. */ \ if ( bli_intersects_diag_n( diagoffc_ij, m_cur, n_cur ) ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Scale C and add the result to only the stored part. */ \ PASTEMAC(ch,xpbys_mxn_l)( diagoffc_ij, \ m_cur, n_cur, \ ct, rs_ct, cs_ct, \ beta_cast, \ c11, rs_c, cs_c ); \ } \ else if ( bli_is_strictly_below_diag_n( diagoffc_ij, m_cur, n_cur ) ) \ { \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ beta_cast, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Scale the edge of C and add the result. */ \ PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ beta_cast, \ c11, rs_c, cs_c ); \ } \ } \ } \ } \ } INSERT_GENTFUNC_BASIC0( herk_l_ker_var2 ) blis-0.6.1/frame/3/herk/other/bli_herk_l_ker_var2rr.c000066400000000000000000000414271360743507500223740ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T herk_fp typedef void (*FUNCPTR_T) ( doff_t diagoffc, pack_t schema_a, pack_t schema_b, dim_t m, dim_t n, dim_t k, void* alpha, void* a, inc_t cs_a, inc_t is_a, dim_t pd_a, inc_t ps_a, void* b, inc_t rs_b, inc_t is_b, dim_t pd_b, inc_t ps_b, void* beta, void* c, inc_t rs_c, inc_t cs_c, cntx_t* cntx, rntm_t* rntm, thrinfo_t* thread ); static FUNCPTR_T GENARRAY(ftypes,herk_l_ker_var2rr); // // -- Macrokernel functions for round-robin partitioning ----------------------- // void bli_herk_l_ker_var2rr ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { num_t dt_exec = bli_obj_exec_dt( c ); doff_t diagoffc = bli_obj_diag_offset( c ); pack_t schema_a = bli_obj_pack_schema( a ); pack_t schema_b = bli_obj_pack_schema( b ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width( a ); void* buf_a = bli_obj_buffer_at_off( a ); inc_t cs_a = bli_obj_col_stride( a ); inc_t is_a = bli_obj_imag_stride( a ); dim_t pd_a = bli_obj_panel_dim( a ); inc_t ps_a = bli_obj_panel_stride( a ); void* buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b = bli_obj_row_stride( b ); inc_t is_b = bli_obj_imag_stride( b ); dim_t pd_b = bli_obj_panel_dim( b ); inc_t ps_b = bli_obj_panel_stride( b ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); obj_t scalar_a; obj_t scalar_b; void* buf_alpha; void* buf_beta; FUNCPTR_T f; // Detach and multiply the scalars attached to A and B. bli_obj_scalar_detach( a, &scalar_a ); bli_obj_scalar_detach( b, &scalar_b ); bli_mulsc( &scalar_a, &scalar_b ); // Grab the addresses of the internal scalar buffers for the scalar // merged above and the scalar attached to C. buf_alpha = bli_obj_internal_scalar_buffer( &scalar_b ); buf_beta = bli_obj_internal_scalar_buffer( c ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Invoke the function. f( diagoffc, schema_a, schema_b, m, n, k, buf_alpha, buf_a, cs_a, is_a, pd_a, ps_a, buf_b, rs_b, is_b, pd_b, ps_b, buf_beta, buf_c, rs_c, cs_c, cntx, rntm, thread ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ doff_t diagoffc, \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha, \ void* a, inc_t cs_a, inc_t is_a, \ dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, inc_t is_b, \ dim_t pd_b, inc_t ps_b, \ void* beta, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* thread \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ /* Alias some constants to simpler names. */ \ const dim_t MR = pd_a; \ const dim_t NR = pd_b; \ /*const dim_t PACKMR = cs_a;*/ \ /*const dim_t PACKNR = rs_b;*/ \ \ /* Query the context for the micro-kernel address and cast it to its function pointer type. */ \ PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the original C matrix. For example, if C is column-stored, ct will be column-stored as well. */ \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict a_cast = a; \ ctype* restrict b_cast = b; \ ctype* restrict c_cast = c; \ ctype* restrict alpha_cast = alpha; \ ctype* restrict beta_cast = beta; \ ctype* restrict b1; \ ctype* restrict c1; \ \ doff_t diagoffc_ij; \ dim_t m_iter, m_left; \ dim_t n_iter, n_left; \ dim_t m_cur; \ dim_t n_cur; \ dim_t i, j, ip; \ inc_t rstep_a; \ inc_t cstep_b; \ inc_t rstep_c, cstep_c; \ auxinfo_t aux; \ \ /* Assumptions/assertions: rs_a == 1 cs_a == PACKMR pd_a == MR ps_a == stride to next micro-panel of A rs_b == PACKNR cs_b == 1 pd_b == NR ps_b == stride to next micro-panel of B rs_c == (no assumptions) cs_c == (no assumptions) */ \ \ /* If any dimension is zero, return immediately. */ \ if ( bli_zero_dim3( m, n, k ) ) return; \ \ /* Safeguard: If the current panel of C is entirely above the diagonal, it is not stored. So we do nothing. */ \ if ( bli_is_strictly_above_diag_n( diagoffc, m, n ) ) return; \ \ /* If there is a zero region above where the diagonal of C intersects the left edge of the panel, adjust the pointer to C and A and treat this case as if the diagonal offset were zero. */ \ if ( diagoffc < 0 ) \ { \ ip = -diagoffc / MR; \ i = ip * MR; \ m = m - i; \ diagoffc = -diagoffc % MR; \ c_cast = c_cast + (i )*rs_c; \ a_cast = a_cast + (ip )*ps_a; \ } \ \ /* If there is a zero region to the right of where the diagonal of C intersects the bottom of the panel, shrink it to prevent "no-op" iterations from executing. */ \ if ( diagoffc + m < n ) \ { \ n = diagoffc + m; \ } \ \ /* Clear the temporary C buffer in case it has any infs or NaNs. */ \ PASTEMAC(ch,set0s_mxn)( MR, NR, \ ct, rs_ct, cs_ct ); \ \ /* Compute number of primary and leftover components of the m and n dimensions. */ \ n_iter = n / NR; \ n_left = n % NR; \ \ m_iter = m / MR; \ m_left = m % MR; \ \ if ( n_left ) ++n_iter; \ if ( m_left ) ++m_iter; \ \ /* Determine some increments used to step through A, B, and C. */ \ rstep_a = ps_a; \ \ cstep_b = ps_b; \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ \ /* Save the pack schemas of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_schema_a( schema_a, &aux ); \ bli_auxinfo_set_schema_b( schema_b, &aux ); \ \ /* Save the imaginary stride of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( is_a, &aux ); \ bli_auxinfo_set_is_b( is_b, &aux ); \ \ /* The 'thread' argument points to the thrinfo_t node for the 2nd (jr) loop around the microkernel. Here we query the thrinfo_t node for the 1st (ir) loop around the microkernel. */ \ thrinfo_t* caucus = bli_thrinfo_sub_node( thread ); \ \ /* Query the number of threads and thread ids for each loop. */ \ dim_t jr_nt = bli_thread_n_way( thread ); \ dim_t jr_tid = bli_thread_work_id( thread ); \ dim_t ir_nt = bli_thread_n_way( caucus ); \ dim_t ir_tid = bli_thread_work_id( caucus ); \ \ dim_t jr_start, jr_end; \ dim_t ir_start, ir_end; \ dim_t jr_inc, ir_inc; \ \ /* Note that we partition the 2nd loop into two regions: the rectangular part of C, and the triangular portion. */ \ dim_t n_iter_rct; \ dim_t n_iter_tri; \ \ if ( bli_is_strictly_below_diag_n( diagoffc, m, n ) ) \ { \ /* If the entire panel of C does not intersect the diagonal, there is no triangular region, and therefore we can skip the second set of loops. */ \ n_iter_rct = n_iter; \ n_iter_tri = 0; \ } \ else \ { \ /* If the panel of C does intersect the diagonal, compute the number of iterations in the rectangular region by dividing NR into the diagonal offset. Any remainder from this integer division is discarded, which is what we want. That is, we want the rectangular region to contain as many columns of whole microtiles as possible without including any microtiles that intersect the diagonal. The number of iterations in the triangular (or trapezoidal) region is computed as the remaining number of iterations in the n dimension. */ \ n_iter_rct = diagoffc / NR; \ n_iter_tri = n_iter - n_iter_rct; \ } \ \ /* Use round-robin assignment of micropanels to threads in the 2nd and 1st loops for the initial rectangular region of C (if it exists). */ \ bli_thread_range_jrir_rr( thread, n_iter_rct, 1, FALSE, &jr_start, &jr_end, &jr_inc ); \ bli_thread_range_jrir_rr( caucus, m_iter, 1, FALSE, &ir_start, &ir_end, &ir_inc ); \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = jr_start; j < jr_end; j += jr_inc ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ b1 = b_cast + j * cstep_b; \ c1 = c_cast + j * cstep_c; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ /* Interior loop over the m dimension (MR rows at a time). */ \ for ( i = ir_start; i < ir_end; i += ir_inc ) \ { \ ctype* restrict a2; \ \ a1 = a_cast + i * rstep_a; \ c11 = c1 + i * rstep_c; \ \ /* No need to compute the diagonal offset for the rectangular region. */ \ /*diagoffc_ij = diagoffc - (doff_t)j*NR + (doff_t)i*MR;*/ \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = bli_herk_get_next_a_upanel( a1, rstep_a, ir_inc ); \ if ( bli_is_last_iter_rr( i, m_iter, ir_tid, ir_nt ) ) \ { \ a2 = a_cast; \ b2 = bli_herk_get_next_b_upanel( b1, cstep_b, jr_inc ); \ if ( bli_is_last_iter_rr( j, n_iter, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* If the diagonal intersects the current MR x NR submatrix, we compute it the temporary buffer and then add in the elements on or below the diagonal. Otherwise, if the submatrix is strictly below the diagonal, we compute and store as we normally would. And if we're strictly above the diagonal, we do nothing and continue. */ \ { \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ beta_cast, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Scale the edge of C and add the result. */ \ PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ beta_cast, \ c11, rs_c, cs_c ); \ } \ } \ } \ } \ \ /* If there is no triangular region, then we're done. */ \ if ( n_iter_tri == 0 ) return; \ \ /* Use round-robin assignment of micropanels to threads in the 2nd and 1st loops for the remaining triangular region of C. */ \ bli_thread_range_jrir_rr( thread, n_iter_tri, 1, FALSE, &jr_start, &jr_end, &jr_inc ); \ \ /* Advance the start and end iteration offsets for the triangular region by the number of iterations used for the rectangular region. */ \ jr_start += n_iter_rct; \ jr_end += n_iter_rct; \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = jr_start; j < jr_end; j += jr_inc ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ b1 = b_cast + j * cstep_b; \ c1 = c_cast + j * cstep_c; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ /* Interior loop over the m dimension (MR rows at a time). */ \ for ( i = ir_start; i < ir_end; i += ir_inc ) \ { \ ctype* restrict a2; \ \ a1 = a_cast + i * rstep_a; \ c11 = c1 + i * rstep_c; \ \ /* Compute the diagonal offset for the submatrix at (i,j). */ \ diagoffc_ij = diagoffc - (doff_t)j*NR + (doff_t)i*MR; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = bli_herk_get_next_a_upanel( a1, rstep_a, ir_inc ); \ if ( bli_is_last_iter_rr( i, m_iter, ir_tid, ir_nt ) ) \ { \ a2 = a_cast; \ b2 = bli_herk_get_next_b_upanel( b1, cstep_b, jr_inc ); \ if ( bli_is_last_iter_rr( j, n_iter, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* If the diagonal intersects the current MR x NR submatrix, we compute it the temporary buffer and then add in the elements on or below the diagonal. Otherwise, if the submatrix is strictly below the diagonal, we compute and store as we normally would. And if we're strictly above the diagonal, we do nothing and continue. */ \ if ( bli_intersects_diag_n( diagoffc_ij, m_cur, n_cur ) ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Scale C and add the result to only the stored part. */ \ PASTEMAC(ch,xpbys_mxn_l)( diagoffc_ij, \ m_cur, n_cur, \ ct, rs_ct, cs_ct, \ beta_cast, \ c11, rs_c, cs_c ); \ } \ else if ( bli_is_strictly_below_diag_n( diagoffc_ij, m_cur, n_cur ) ) \ { \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ beta_cast, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Scale the edge of C and add the result. */ \ PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ beta_cast, \ c11, rs_c, cs_c ); \ } \ } \ } \ } \ } INSERT_GENTFUNC_BASIC0( herk_l_ker_var2rr ) blis-0.6.1/frame/3/herk/other/bli_herk_l_ker_var2sl.c000066400000000000000000000414611360743507500223650ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T herk_fp typedef void (*FUNCPTR_T) ( doff_t diagoffc, pack_t schema_a, pack_t schema_b, dim_t m, dim_t n, dim_t k, void* alpha, void* a, inc_t cs_a, inc_t is_a, dim_t pd_a, inc_t ps_a, void* b, inc_t rs_b, inc_t is_b, dim_t pd_b, inc_t ps_b, void* beta, void* c, inc_t rs_c, inc_t cs_c, cntx_t* cntx, rntm_t* rntm, thrinfo_t* thread ); static FUNCPTR_T GENARRAY(ftypes,herk_l_ker_var2sl); // // -- Macrokernel functions for slab partitioning ------------------------------ // void bli_herk_l_ker_var2sl ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { num_t dt_exec = bli_obj_exec_dt( c ); doff_t diagoffc = bli_obj_diag_offset( c ); pack_t schema_a = bli_obj_pack_schema( a ); pack_t schema_b = bli_obj_pack_schema( b ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width( a ); void* buf_a = bli_obj_buffer_at_off( a ); inc_t cs_a = bli_obj_col_stride( a ); inc_t is_a = bli_obj_imag_stride( a ); dim_t pd_a = bli_obj_panel_dim( a ); inc_t ps_a = bli_obj_panel_stride( a ); void* buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b = bli_obj_row_stride( b ); inc_t is_b = bli_obj_imag_stride( b ); dim_t pd_b = bli_obj_panel_dim( b ); inc_t ps_b = bli_obj_panel_stride( b ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); obj_t scalar_a; obj_t scalar_b; void* buf_alpha; void* buf_beta; FUNCPTR_T f; // Detach and multiply the scalars attached to A and B. bli_obj_scalar_detach( a, &scalar_a ); bli_obj_scalar_detach( b, &scalar_b ); bli_mulsc( &scalar_a, &scalar_b ); // Grab the addresses of the internal scalar buffers for the scalar // merged above and the scalar attached to C. buf_alpha = bli_obj_internal_scalar_buffer( &scalar_b ); buf_beta = bli_obj_internal_scalar_buffer( c ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Invoke the function. f( diagoffc, schema_a, schema_b, m, n, k, buf_alpha, buf_a, cs_a, is_a, pd_a, ps_a, buf_b, rs_b, is_b, pd_b, ps_b, buf_beta, buf_c, rs_c, cs_c, cntx, rntm, thread ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ doff_t diagoffc, \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha, \ void* a, inc_t cs_a, inc_t is_a, \ dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, inc_t is_b, \ dim_t pd_b, inc_t ps_b, \ void* beta, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* thread \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ /* Alias some constants to simpler names. */ \ const dim_t MR = pd_a; \ const dim_t NR = pd_b; \ /*const dim_t PACKMR = cs_a;*/ \ /*const dim_t PACKNR = rs_b;*/ \ \ /* Query the context for the micro-kernel address and cast it to its function pointer type. */ \ PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the original C matrix. For example, if C is column-stored, ct will be column-stored as well. */ \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict a_cast = a; \ ctype* restrict b_cast = b; \ ctype* restrict c_cast = c; \ ctype* restrict alpha_cast = alpha; \ ctype* restrict beta_cast = beta; \ ctype* restrict b1; \ ctype* restrict c1; \ \ doff_t diagoffc_ij; \ dim_t m_iter, m_left; \ dim_t n_iter, n_left; \ dim_t m_cur; \ dim_t n_cur; \ dim_t i, j, ip; \ inc_t rstep_a; \ inc_t cstep_b; \ inc_t rstep_c, cstep_c; \ auxinfo_t aux; \ \ /* Assumptions/assertions: rs_a == 1 cs_a == PACKMR pd_a == MR ps_a == stride to next micro-panel of A rs_b == PACKNR cs_b == 1 pd_b == NR ps_b == stride to next micro-panel of B rs_c == (no assumptions) cs_c == (no assumptions) */ \ \ /* If any dimension is zero, return immediately. */ \ if ( bli_zero_dim3( m, n, k ) ) return; \ \ /* Safeguard: If the current panel of C is entirely above the diagonal, it is not stored. So we do nothing. */ \ if ( bli_is_strictly_above_diag_n( diagoffc, m, n ) ) return; \ \ /* If there is a zero region above where the diagonal of C intersects the left edge of the panel, adjust the pointer to C and A and treat this case as if the diagonal offset were zero. */ \ if ( diagoffc < 0 ) \ { \ ip = -diagoffc / MR; \ i = ip * MR; \ m = m - i; \ diagoffc = -diagoffc % MR; \ c_cast = c_cast + (i )*rs_c; \ a_cast = a_cast + (ip )*ps_a; \ } \ \ /* If there is a zero region to the right of where the diagonal of C intersects the bottom of the panel, shrink it to prevent "no-op" iterations from executing. */ \ if ( diagoffc + m < n ) \ { \ n = diagoffc + m; \ } \ \ /* Clear the temporary C buffer in case it has any infs or NaNs. */ \ PASTEMAC(ch,set0s_mxn)( MR, NR, \ ct, rs_ct, cs_ct ); \ \ /* Compute number of primary and leftover components of the m and n dimensions. */ \ n_iter = n / NR; \ n_left = n % NR; \ \ m_iter = m / MR; \ m_left = m % MR; \ \ if ( n_left ) ++n_iter; \ if ( m_left ) ++m_iter; \ \ /* Determine some increments used to step through A, B, and C. */ \ rstep_a = ps_a; \ \ cstep_b = ps_b; \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ \ /* Save the pack schemas of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_schema_a( schema_a, &aux ); \ bli_auxinfo_set_schema_b( schema_b, &aux ); \ \ /* Save the imaginary stride of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( is_a, &aux ); \ bli_auxinfo_set_is_b( is_b, &aux ); \ \ /* The 'thread' argument points to the thrinfo_t node for the 2nd (jr) loop around the microkernel. Here we query the thrinfo_t node for the 1st (ir) loop around the microkernel. */ \ thrinfo_t* caucus = bli_thrinfo_sub_node( thread ); \ \ /* Query the number of threads and thread ids for each loop. */ \ dim_t jr_nt = bli_thread_n_way( thread ); \ dim_t jr_tid = bli_thread_work_id( thread ); \ dim_t ir_nt = bli_thread_n_way( caucus ); \ dim_t ir_tid = bli_thread_work_id( caucus ); \ \ dim_t jr_start, jr_end; \ dim_t ir_start, ir_end; \ dim_t jr_inc, ir_inc; \ \ /* Note that we partition the 2nd loop into two regions: the rectangular part of C, and the triangular portion. */ \ dim_t n_iter_rct; \ dim_t n_iter_tri; \ \ if ( bli_is_strictly_below_diag_n( diagoffc, m, n ) ) \ { \ /* If the entire panel of C does not intersect the diagonal, there is no triangular region, and therefore we can skip the second set of loops. */ \ n_iter_rct = n_iter; \ n_iter_tri = 0; \ } \ else \ { \ /* If the panel of C does intersect the diagonal, compute the number of iterations in the rectangular region by dividing NR into the diagonal offset. Any remainder from this integer division is discarded, which is what we want. That is, we want the rectangular region to contain as many columns of whole microtiles as possible without including any microtiles that intersect the diagonal. The number of iterations in the triangular (or trapezoidal) region is computed as the remaining number of iterations in the n dimension. */ \ n_iter_rct = diagoffc / NR; \ n_iter_tri = n_iter - n_iter_rct; \ } \ \ /* Use slab assignment of micropanels to threads in the 2nd and 1st loops for the initial rectangular region of C (if it exists). */ \ bli_thread_range_jrir_sl( thread, n_iter_rct, 1, FALSE, &jr_start, &jr_end, &jr_inc ); \ bli_thread_range_jrir_sl( caucus, m_iter, 1, FALSE, &ir_start, &ir_end, &ir_inc ); \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = jr_start; j < jr_end; j += jr_inc ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ b1 = b_cast + j * cstep_b; \ c1 = c_cast + j * cstep_c; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ /* Interior loop over the m dimension (MR rows at a time). */ \ for ( i = ir_start; i < ir_end; i += ir_inc ) \ { \ ctype* restrict a2; \ \ a1 = a_cast + i * rstep_a; \ c11 = c1 + i * rstep_c; \ \ /* No need to compute the diagonal offset for the rectangular region. */ \ /*diagoffc_ij = diagoffc - (doff_t)j*NR + (doff_t)i*MR;*/ \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = bli_herk_get_next_a_upanel( a1, rstep_a, ir_inc ); \ if ( bli_is_last_iter_sl( i, m_iter, ir_tid, ir_nt ) ) \ { \ a2 = a_cast; \ b2 = bli_herk_get_next_b_upanel( b1, cstep_b, jr_inc ); \ if ( bli_is_last_iter_sl( j, n_iter, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* If the diagonal intersects the current MR x NR submatrix, we compute it the temporary buffer and then add in the elements on or below the diagonal. Otherwise, if the submatrix is strictly below the diagonal, we compute and store as we normally would. And if we're strictly above the diagonal, we do nothing and continue. */ \ { \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ beta_cast, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Scale the edge of C and add the result. */ \ PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ beta_cast, \ c11, rs_c, cs_c ); \ } \ } \ } \ } \ \ /* If there is no triangular region, then we're done. */ \ if ( n_iter_tri == 0 ) return; \ \ /* Use round-robin assignment of micropanels to threads in the 2nd loop and slab partitioning in the 1st loop for the remaining triangular region of C. */ \ bli_thread_range_jrir_rr( thread, n_iter_tri, 1, FALSE, &jr_start, &jr_end, &jr_inc ); \ \ /* Advance the start and end iteration offsets for the triangular region by the number of iterations used for the rectangular region. */ \ jr_start += n_iter_rct; \ jr_end += n_iter_rct; \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = jr_start; j < jr_end; j += jr_inc ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ b1 = b_cast + j * cstep_b; \ c1 = c_cast + j * cstep_c; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ /* Interior loop over the m dimension (MR rows at a time). */ \ for ( i = ir_start; i < ir_end; i += ir_inc ) \ { \ ctype* restrict a2; \ \ a1 = a_cast + i * rstep_a; \ c11 = c1 + i * rstep_c; \ \ /* Compute the diagonal offset for the submatrix at (i,j). */ \ diagoffc_ij = diagoffc - (doff_t)j*NR + (doff_t)i*MR; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = bli_herk_get_next_a_upanel( a1, rstep_a, ir_inc ); \ if ( bli_is_last_iter_rr( i, m_iter, ir_tid, ir_nt ) ) \ { \ a2 = a_cast; \ b2 = bli_herk_get_next_b_upanel( b1, cstep_b, jr_inc ); \ if ( bli_is_last_iter_rr( j, n_iter, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* If the diagonal intersects the current MR x NR submatrix, we compute it the temporary buffer and then add in the elements on or below the diagonal. Otherwise, if the submatrix is strictly below the diagonal, we compute and store as we normally would. And if we're strictly above the diagonal, we do nothing and continue. */ \ if ( bli_intersects_diag_n( diagoffc_ij, m_cur, n_cur ) ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Scale C and add the result to only the stored part. */ \ PASTEMAC(ch,xpbys_mxn_l)( diagoffc_ij, \ m_cur, n_cur, \ ct, rs_ct, cs_ct, \ beta_cast, \ c11, rs_c, cs_c ); \ } \ else if ( bli_is_strictly_below_diag_n( diagoffc_ij, m_cur, n_cur ) ) \ { \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ beta_cast, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Scale the edge of C and add the result. */ \ PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ beta_cast, \ c11, rs_c, cs_c ); \ } \ } \ } \ } \ } INSERT_GENTFUNC_BASIC0( herk_l_ker_var2sl ) blis-0.6.1/frame/3/herk/other/bli_herk_u_ker_var2.1looprr.c000066400000000000000000000310431360743507500234270ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T herk_fp typedef void (*FUNCPTR_T) ( doff_t diagoffc, pack_t schema_a, pack_t schema_b, dim_t m, dim_t n, dim_t k, void* alpha, void* a, inc_t cs_a, inc_t is_a, dim_t pd_a, inc_t ps_a, void* b, inc_t rs_b, inc_t is_b, dim_t pd_b, inc_t ps_b, void* beta, void* c, inc_t rs_c, inc_t cs_c, cntx_t* cntx, rntm_t* rntm, thrinfo_t* thread ); static FUNCPTR_T GENARRAY(ftypes,herk_u_ker_var2); void bli_herk_u_ker_var2 ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { num_t dt_exec = bli_obj_exec_dt( c ); doff_t diagoffc = bli_obj_diag_offset( c ); pack_t schema_a = bli_obj_pack_schema( a ); pack_t schema_b = bli_obj_pack_schema( b ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width( a ); void* buf_a = bli_obj_buffer_at_off( a ); inc_t cs_a = bli_obj_col_stride( a ); inc_t is_a = bli_obj_imag_stride( a ); dim_t pd_a = bli_obj_panel_dim( a ); inc_t ps_a = bli_obj_panel_stride( a ); void* buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b = bli_obj_row_stride( b ); inc_t is_b = bli_obj_imag_stride( b ); dim_t pd_b = bli_obj_panel_dim( b ); inc_t ps_b = bli_obj_panel_stride( b ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); obj_t scalar_a; obj_t scalar_b; void* buf_alpha; void* buf_beta; FUNCPTR_T f; // Detach and multiply the scalars attached to A and B. bli_obj_scalar_detach( a, &scalar_a ); bli_obj_scalar_detach( b, &scalar_b ); bli_mulsc( &scalar_a, &scalar_b ); // Grab the addresses of the internal scalar buffers for the scalar // merged above and the scalar attached to C. buf_alpha = bli_obj_internal_scalar_buffer( &scalar_b ); buf_beta = bli_obj_internal_scalar_buffer( c ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Invoke the function. f( diagoffc, schema_a, schema_b, m, n, k, buf_alpha, buf_a, cs_a, is_a, pd_a, ps_a, buf_b, rs_b, is_b, pd_b, ps_b, buf_beta, buf_c, rs_c, cs_c, cntx, rntm, thread ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ doff_t diagoffc, \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha, \ void* a, inc_t cs_a, inc_t is_a, \ dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, inc_t is_b, \ dim_t pd_b, inc_t ps_b, \ void* beta, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* thread \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ /* Alias some constants to simpler names. */ \ const dim_t MR = pd_a; \ const dim_t NR = pd_b; \ /*const dim_t PACKMR = cs_a;*/ \ /*const dim_t PACKNR = rs_b;*/ \ \ /* Query the context for the micro-kernel address and cast it to its function pointer type. */ \ PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the original C matrix. For example, if C is column-stored, ct will be column-stored as well. */ \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict a_cast = a; \ ctype* restrict b_cast = b; \ ctype* restrict c_cast = c; \ ctype* restrict alpha_cast = alpha; \ ctype* restrict beta_cast = beta; \ ctype* restrict b1; \ ctype* restrict c1; \ \ doff_t diagoffc_ij; \ dim_t m_iter, m_left; \ dim_t n_iter, n_left; \ dim_t m_cur; \ dim_t n_cur; \ dim_t i, j, jp; \ inc_t rstep_a; \ inc_t cstep_b; \ inc_t rstep_c, cstep_c; \ auxinfo_t aux; \ \ /* Assumptions/assertions: rs_a == 1 cs_a == PACKMR pd_a == MR ps_a == stride to next micro-panel of A rs_b == PACKNR cs_b == 1 pd_b == NR ps_b == stride to next micro-panel of B rs_c == (no assumptions) cs_c == (no assumptions) */ \ \ /* If any dimension is zero, return immediately. */ \ if ( bli_zero_dim3( m, n, k ) ) return; \ \ /* Safeguard: If the current panel of C is entirely below the diagonal, it is not stored. So we do nothing. */ \ if ( bli_is_strictly_below_diag_n( diagoffc, m, n ) ) return; \ \ /* If there is a zero region to the left of where the diagonal of C intersects the top edge of the panel, adjust the pointer to C and B and treat this case as if the diagonal offset were zero. */ \ if ( diagoffc > 0 ) \ { \ jp = diagoffc / NR; \ j = jp * NR; \ n = n - j; \ diagoffc = diagoffc % NR; \ c_cast = c_cast + (j )*cs_c; \ b_cast = b_cast + (jp )*ps_b; \ } \ \ /* If there is a zero region below where the diagonal of C intersects the right edge of the panel, shrink it to prevent "no-op" iterations from executing. */ \ if ( -diagoffc + n < m ) \ { \ m = -diagoffc + n; \ } \ \ /* Clear the temporary C buffer in case it has any infs or NaNs. */ \ PASTEMAC(ch,set0s_mxn)( MR, NR, \ ct, rs_ct, cs_ct ); \ \ /* Compute number of primary and leftover components of the m and n dimensions. */ \ n_iter = n / NR; \ n_left = n % NR; \ \ m_iter = m / MR; \ m_left = m % MR; \ \ if ( n_left ) ++n_iter; \ if ( m_left ) ++m_iter; \ \ /* Determine some increments used to step through A, B, and C. */ \ rstep_a = ps_a; \ \ cstep_b = ps_b; \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ \ /* Save the pack schemas of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_schema_a( schema_a, &aux ); \ bli_auxinfo_set_schema_b( schema_b, &aux ); \ \ /* Save the imaginary stride of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( is_a, &aux ); \ bli_auxinfo_set_is_b( is_b, &aux ); \ \ /* The 'thread' argument points to the thrinfo_t node for the 2nd (jr) loop around the microkernel. Here we query the thrinfo_t node for the 1st (ir) loop around the microkernel. */ \ thrinfo_t* caucus = bli_thrinfo_sub_node( thread ); \ \ /* Query the number of threads and thread ids for each loop. */ \ dim_t jr_nt = bli_thread_n_way( thread ); \ dim_t jr_tid = bli_thread_work_id( thread ); \ dim_t ir_nt = bli_thread_n_way( caucus ); \ dim_t ir_tid = bli_thread_work_id( caucus ); \ \ dim_t jr_start, jr_end; \ dim_t ir_start, ir_end; \ dim_t jr_inc, ir_inc; \ \ /* Use interleaved (round robin) assignment of micropanels to threads in the 2nd and 1st loops. */ \ bli_thread_range_jrir_rr( thread, n_iter, 1, FALSE, &jr_start, &jr_end, &jr_inc ); \ bli_thread_range_jrir_rr( caucus, m_iter, 1, FALSE, &ir_start, &ir_end, &ir_inc ); \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = jr_start; j < jr_end; j += jr_inc ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ b1 = b_cast + j * cstep_b; \ c1 = c_cast + j * cstep_c; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ /* Interior loop over the m dimension (MR rows at a time). */ \ for ( i = ir_start; i < ir_end; i += ir_inc ) \ { \ ctype* restrict a2; \ \ a1 = a_cast + i * rstep_a; \ c11 = c1 + i * rstep_c; \ \ /* Compute the diagonal offset for the submatrix at (i,j). */ \ diagoffc_ij = diagoffc - (doff_t)j*NR + (doff_t)i*MR; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = bli_herk_get_next_a_upanel( a1, rstep_a, ir_inc ); \ if ( bli_is_last_iter( i, m_iter, ir_tid, ir_nt ) ) \ { \ a2 = a_cast; \ b2 = bli_herk_get_next_b_upanel( b1, cstep_b, jr_inc ); \ if ( bli_is_last_iter( j, n_iter, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* If the diagonal intersects the current MR x NR submatrix, we compute it the temporary buffer and then add in the elements on or below the diagonal. Otherwise, if the submatrix is strictly above the diagonal, we compute and store as we normally would. And if we're strictly below the diagonal, we do nothing and continue. */ \ if ( bli_intersects_diag_n( diagoffc_ij, m_cur, n_cur ) ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Scale C and add the result to only the stored part. */ \ PASTEMAC(ch,xpbys_mxn_u)( diagoffc_ij, \ m_cur, n_cur, \ ct, rs_ct, cs_ct, \ beta_cast, \ c11, rs_c, cs_c ); \ } \ else if ( bli_is_strictly_above_diag_n( diagoffc_ij, m_cur, n_cur ) ) \ { \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ beta_cast, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Scale the edge of C and add the result. */ \ PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ beta_cast, \ c11, rs_c, cs_c ); \ } \ } \ } \ } \ } INSERT_GENTFUNC_BASIC0( herk_u_ker_var2 ) blis-0.6.1/frame/3/herk/other/bli_herk_u_ker_var2.c000066400000000000000000000300551360743507500220340ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T herk_fp typedef void (*FUNCPTR_T) ( doff_t diagoffc, pack_t schema_a, pack_t schema_b, dim_t m, dim_t n, dim_t k, void* alpha, void* a, inc_t cs_a, inc_t is_a, dim_t pd_a, inc_t ps_a, void* b, inc_t rs_b, inc_t is_b, dim_t pd_b, inc_t ps_b, void* beta, void* c, inc_t rs_c, inc_t cs_c, cntx_t* cntx, rntm_t* rntm, thrinfo_t* thread ); static FUNCPTR_T GENARRAY(ftypes,herk_u_ker_var2); void bli_herk_u_ker_var2 ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { num_t dt_exec = bli_obj_exec_dt( c ); doff_t diagoffc = bli_obj_diag_offset( c ); pack_t schema_a = bli_obj_pack_schema( a ); pack_t schema_b = bli_obj_pack_schema( b ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width( a ); void* buf_a = bli_obj_buffer_at_off( a ); inc_t cs_a = bli_obj_col_stride( a ); inc_t is_a = bli_obj_imag_stride( a ); dim_t pd_a = bli_obj_panel_dim( a ); inc_t ps_a = bli_obj_panel_stride( a ); void* buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b = bli_obj_row_stride( b ); inc_t is_b = bli_obj_imag_stride( b ); dim_t pd_b = bli_obj_panel_dim( b ); inc_t ps_b = bli_obj_panel_stride( b ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); obj_t scalar_a; obj_t scalar_b; void* buf_alpha; void* buf_beta; FUNCPTR_T f; // Detach and multiply the scalars attached to A and B. bli_obj_scalar_detach( a, &scalar_a ); bli_obj_scalar_detach( b, &scalar_b ); bli_mulsc( &scalar_a, &scalar_b ); // Grab the addresses of the internal scalar buffers for the scalar // merged above and the scalar attached to C. buf_alpha = bli_obj_internal_scalar_buffer( &scalar_b ); buf_beta = bli_obj_internal_scalar_buffer( c ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Invoke the function. f( diagoffc, schema_a, schema_b, m, n, k, buf_alpha, buf_a, cs_a, is_a, pd_a, ps_a, buf_b, rs_b, is_b, pd_b, ps_b, buf_beta, buf_c, rs_c, cs_c, cntx, rntm, thread ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ doff_t diagoffc, \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha, \ void* a, inc_t cs_a, inc_t is_a, \ dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, inc_t is_b, \ dim_t pd_b, inc_t ps_b, \ void* beta, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* thread \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ /* Alias some constants to simpler names. */ \ const dim_t MR = pd_a; \ const dim_t NR = pd_b; \ /*const dim_t PACKMR = cs_a;*/ \ /*const dim_t PACKNR = rs_b;*/ \ \ /* Query the context for the micro-kernel address and cast it to its function pointer type. */ \ PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the original C matrix. For example, if C is column-stored, ct will be column-stored as well. */ \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict a_cast = a; \ ctype* restrict b_cast = b; \ ctype* restrict c_cast = c; \ ctype* restrict alpha_cast = alpha; \ ctype* restrict beta_cast = beta; \ ctype* restrict b1; \ ctype* restrict c1; \ \ doff_t diagoffc_ij; \ dim_t m_iter, m_left; \ dim_t n_iter, n_left; \ dim_t m_cur; \ dim_t n_cur; \ dim_t i, j, jp; \ inc_t rstep_a; \ inc_t cstep_b; \ inc_t rstep_c, cstep_c; \ auxinfo_t aux; \ \ /* Assumptions/assertions: rs_a == 1 cs_a == PACKMR pd_a == MR ps_a == stride to next micro-panel of A rs_b == PACKNR cs_b == 1 pd_b == NR ps_b == stride to next micro-panel of B rs_c == (no assumptions) cs_c == (no assumptions) */ \ \ /* If any dimension is zero, return immediately. */ \ if ( bli_zero_dim3( m, n, k ) ) return; \ \ /* Safeguard: If the current panel of C is entirely below the diagonal, it is not stored. So we do nothing. */ \ if ( bli_is_strictly_below_diag_n( diagoffc, m, n ) ) return; \ \ /* If there is a zero region to the left of where the diagonal of C intersects the top edge of the panel, adjust the pointer to C and B and treat this case as if the diagonal offset were zero. */ \ if ( diagoffc > 0 ) \ { \ jp = diagoffc / NR; \ j = jp * NR; \ n = n - j; \ diagoffc = diagoffc % NR; \ c_cast = c_cast + (j )*cs_c; \ b_cast = b_cast + (jp )*ps_b; \ } \ \ /* If there is a zero region below where the diagonal of C intersects the right edge of the panel, shrink it to prevent "no-op" iterations from executing. */ \ if ( -diagoffc + n < m ) \ { \ m = -diagoffc + n; \ } \ \ /* Clear the temporary C buffer in case it has any infs or NaNs. */ \ PASTEMAC(ch,set0s_mxn)( MR, NR, \ ct, rs_ct, cs_ct ); \ \ /* Compute number of primary and leftover components of the m and n dimensions. */ \ n_iter = n / NR; \ n_left = n % NR; \ \ m_iter = m / MR; \ m_left = m % MR; \ \ if ( n_left ) ++n_iter; \ if ( m_left ) ++m_iter; \ \ /* Determine some increments used to step through A, B, and C. */ \ rstep_a = ps_a; \ \ cstep_b = ps_b; \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ \ /* Save the pack schemas of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_schema_a( schema_a, &aux ); \ bli_auxinfo_set_schema_b( schema_b, &aux ); \ \ /* Save the imaginary stride of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( is_a, &aux ); \ bli_auxinfo_set_is_b( is_b, &aux ); \ \ b1 = b_cast; \ c1 = c_cast; \ \ thrinfo_t* caucus = bli_thrinfo_sub_node( thread ); \ dim_t jr_num_threads = bli_thread_n_way( thread ); \ dim_t jr_thread_id = bli_thread_work_id( thread ); \ dim_t ir_num_threads = bli_thread_n_way( caucus ); \ dim_t ir_thread_id = bli_thread_work_id( caucus ); \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = jr_thread_id; j < n_iter; j += jr_num_threads ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ b1 = b_cast + j * cstep_b; \ c1 = c_cast + j * cstep_c; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ /* Interior loop over the m dimension (MR rows at a time). */ \ for ( i = ir_thread_id; i < m_iter; i += ir_num_threads ) \ { \ ctype* restrict a2; \ \ a1 = a_cast + i * rstep_a; \ c11 = c1 + i * rstep_c; \ \ /* Compute the diagonal offset for the submatrix at (i,j). */ \ diagoffc_ij = diagoffc - (doff_t)j*NR + (doff_t)i*MR; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = bli_herk_get_next_a_upanel( caucus, a1, rstep_a ); \ if ( bli_is_last_iter( i, m_iter, ir_thread_id, ir_num_threads ) ) \ { \ a2 = a_cast; \ b2 = bli_herk_get_next_b_upanel( thread, b1, cstep_b ); \ if ( bli_is_last_iter( j, n_iter, jr_thread_id, jr_num_threads ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* If the diagonal intersects the current MR x NR submatrix, we compute it the temporary buffer and then add in the elements on or below the diagonal. Otherwise, if the submatrix is strictly above the diagonal, we compute and store as we normally would. And if we're strictly below the diagonal, we do nothing and continue. */ \ if ( bli_intersects_diag_n( diagoffc_ij, m_cur, n_cur ) ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Scale C and add the result to only the stored part. */ \ PASTEMAC(ch,xpbys_mxn_u)( diagoffc_ij, \ m_cur, n_cur, \ ct, rs_ct, cs_ct, \ beta_cast, \ c11, rs_c, cs_c ); \ } \ else if ( bli_is_strictly_above_diag_n( diagoffc_ij, m_cur, n_cur ) ) \ { \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ beta_cast, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Scale the edge of C and add the result. */ \ PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ beta_cast, \ c11, rs_c, cs_c ); \ } \ } \ } \ } \ } INSERT_GENTFUNC_BASIC0( herk_u_ker_var2 ) blis-0.6.1/frame/3/herk/other/bli_herk_u_ker_var2rr.c000066400000000000000000000417201360743507500224010ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T herk_fp typedef void (*FUNCPTR_T) ( doff_t diagoffc, pack_t schema_a, pack_t schema_b, dim_t m, dim_t n, dim_t k, void* alpha, void* a, inc_t cs_a, inc_t is_a, dim_t pd_a, inc_t ps_a, void* b, inc_t rs_b, inc_t is_b, dim_t pd_b, inc_t ps_b, void* beta, void* c, inc_t rs_c, inc_t cs_c, cntx_t* cntx, rntm_t* rntm, thrinfo_t* thread ); static FUNCPTR_T GENARRAY(ftypes,herk_u_ker_var2rr); // // -- Macrokernel functions for round-robin partitioning ----------------------- // void bli_herk_u_ker_var2rr ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { num_t dt_exec = bli_obj_exec_dt( c ); doff_t diagoffc = bli_obj_diag_offset( c ); pack_t schema_a = bli_obj_pack_schema( a ); pack_t schema_b = bli_obj_pack_schema( b ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width( a ); void* buf_a = bli_obj_buffer_at_off( a ); inc_t cs_a = bli_obj_col_stride( a ); inc_t is_a = bli_obj_imag_stride( a ); dim_t pd_a = bli_obj_panel_dim( a ); inc_t ps_a = bli_obj_panel_stride( a ); void* buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b = bli_obj_row_stride( b ); inc_t is_b = bli_obj_imag_stride( b ); dim_t pd_b = bli_obj_panel_dim( b ); inc_t ps_b = bli_obj_panel_stride( b ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); obj_t scalar_a; obj_t scalar_b; void* buf_alpha; void* buf_beta; FUNCPTR_T f; // Detach and multiply the scalars attached to A and B. bli_obj_scalar_detach( a, &scalar_a ); bli_obj_scalar_detach( b, &scalar_b ); bli_mulsc( &scalar_a, &scalar_b ); // Grab the addresses of the internal scalar buffers for the scalar // merged above and the scalar attached to C. buf_alpha = bli_obj_internal_scalar_buffer( &scalar_b ); buf_beta = bli_obj_internal_scalar_buffer( c ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Invoke the function. f( diagoffc, schema_a, schema_b, m, n, k, buf_alpha, buf_a, cs_a, is_a, pd_a, ps_a, buf_b, rs_b, is_b, pd_b, ps_b, buf_beta, buf_c, rs_c, cs_c, cntx, rntm, thread ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ doff_t diagoffc, \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha, \ void* a, inc_t cs_a, inc_t is_a, \ dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, inc_t is_b, \ dim_t pd_b, inc_t ps_b, \ void* beta, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* thread \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ /* Alias some constants to simpler names. */ \ const dim_t MR = pd_a; \ const dim_t NR = pd_b; \ /*const dim_t PACKMR = cs_a;*/ \ /*const dim_t PACKNR = rs_b;*/ \ \ /* Query the context for the micro-kernel address and cast it to its function pointer type. */ \ PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the original C matrix. For example, if C is column-stored, ct will be column-stored as well. */ \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict a_cast = a; \ ctype* restrict b_cast = b; \ ctype* restrict c_cast = c; \ ctype* restrict alpha_cast = alpha; \ ctype* restrict beta_cast = beta; \ ctype* restrict b1; \ ctype* restrict c1; \ \ doff_t diagoffc_ij; \ dim_t m_iter, m_left; \ dim_t n_iter, n_left; \ dim_t m_cur; \ dim_t n_cur; \ dim_t i, j, jp; \ inc_t rstep_a; \ inc_t cstep_b; \ inc_t rstep_c, cstep_c; \ auxinfo_t aux; \ \ /* Assumptions/assertions: rs_a == 1 cs_a == PACKMR pd_a == MR ps_a == stride to next micro-panel of A rs_b == PACKNR cs_b == 1 pd_b == NR ps_b == stride to next micro-panel of B rs_c == (no assumptions) cs_c == (no assumptions) */ \ \ /* If any dimension is zero, return immediately. */ \ if ( bli_zero_dim3( m, n, k ) ) return; \ \ /* Safeguard: If the current panel of C is entirely below the diagonal, it is not stored. So we do nothing. */ \ if ( bli_is_strictly_below_diag_n( diagoffc, m, n ) ) return; \ \ /* If there is a zero region to the left of where the diagonal of C intersects the top edge of the panel, adjust the pointer to C and B and treat this case as if the diagonal offset were zero. NOTE: It's possible that after this pruning that the diagonal offset is still positive (though it is guaranteed to be less than NR). */ \ if ( diagoffc > 0 ) \ { \ jp = diagoffc / NR; \ j = jp * NR; \ n = n - j; \ diagoffc = diagoffc % NR; \ c_cast = c_cast + (j )*cs_c; \ b_cast = b_cast + (jp )*ps_b; \ } \ \ /* If there is a zero region below where the diagonal of C intersects the right edge of the panel, shrink it to prevent "no-op" iterations from executing. */ \ if ( -diagoffc + n < m ) \ { \ m = -diagoffc + n; \ } \ \ /* Clear the temporary C buffer in case it has any infs or NaNs. */ \ PASTEMAC(ch,set0s_mxn)( MR, NR, \ ct, rs_ct, cs_ct ); \ \ /* Compute number of primary and leftover components of the m and n dimensions. */ \ n_iter = n / NR; \ n_left = n % NR; \ \ m_iter = m / MR; \ m_left = m % MR; \ \ if ( n_left ) ++n_iter; \ if ( m_left ) ++m_iter; \ \ /* Determine some increments used to step through A, B, and C. */ \ rstep_a = ps_a; \ \ cstep_b = ps_b; \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ \ /* Save the pack schemas of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_schema_a( schema_a, &aux ); \ bli_auxinfo_set_schema_b( schema_b, &aux ); \ \ /* Save the imaginary stride of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( is_a, &aux ); \ bli_auxinfo_set_is_b( is_b, &aux ); \ \ /* The 'thread' argument points to the thrinfo_t node for the 2nd (jr) loop around the microkernel. Here we query the thrinfo_t node for the 1st (ir) loop around the microkernel. */ \ thrinfo_t* caucus = bli_thrinfo_sub_node( thread ); \ \ /* Query the number of threads and thread ids for each loop. */ \ dim_t jr_nt = bli_thread_n_way( thread ); \ dim_t jr_tid = bli_thread_work_id( thread ); \ dim_t ir_nt = bli_thread_n_way( caucus ); \ dim_t ir_tid = bli_thread_work_id( caucus ); \ \ dim_t jr_start, jr_end; \ dim_t ir_start, ir_end; \ dim_t jr_inc, ir_inc; \ \ /* Note that we partition the 2nd loop into two regions: the triangular part of C, and the rectangular portion. */ \ dim_t n_iter_tri; \ dim_t n_iter_rct; \ \ if ( bli_is_strictly_above_diag_n( diagoffc, m, n ) ) \ { \ /* If the entire panel of C does not intersect the diagonal, there is no triangular region, and therefore we can skip the first set of loops. */ \ n_iter_tri = 0; \ n_iter_rct = n_iter; \ } \ else \ { \ /* If the panel of C does intersect the diagonal, compute the number of iterations in the triangular (or trapezoidal) region by dividing NR into the number of rows in C. A non-zero remainder means we need to add one additional iteration. That is, we want the triangular region to contain as few columns of whole microtiles as possible while still including all microtiles that intersect the diagonal. The number of iterations in the rectangular region is computed as the remaining number of iterations in the n dimension. */ \ n_iter_tri = ( m + diagoffc ) / NR + ( ( m + diagoffc ) % NR ? 1 : 0 ); \ n_iter_rct = n_iter - n_iter_tri; \ } \ \ /* Use round-robin assignment of micropanels to threads in the 2nd and 1st loops for the initial triangular region of C (if it exists). */ \ bli_thread_range_jrir_rr( thread, n_iter_tri, 1, FALSE, &jr_start, &jr_end, &jr_inc ); \ bli_thread_range_jrir_rr( caucus, m_iter, 1, FALSE, &ir_start, &ir_end, &ir_inc ); \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = jr_start; j < jr_end; j += jr_inc ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ b1 = b_cast + j * cstep_b; \ c1 = c_cast + j * cstep_c; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ /* Interior loop over the m dimension (MR rows at a time). */ \ for ( i = ir_start; i < ir_end; i += ir_inc ) \ { \ ctype* restrict a2; \ \ a1 = a_cast + i * rstep_a; \ c11 = c1 + i * rstep_c; \ \ /* Compute the diagonal offset for the submatrix at (i,j). */ \ diagoffc_ij = diagoffc - (doff_t)j*NR + (doff_t)i*MR; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = bli_herk_get_next_a_upanel( a1, rstep_a, ir_inc ); \ if ( bli_is_last_iter_rr( i, m_iter, ir_tid, ir_nt ) ) \ { \ a2 = a_cast; \ b2 = bli_herk_get_next_b_upanel( b1, cstep_b, jr_inc ); \ if ( bli_is_last_iter_rr( j, n_iter, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* If the diagonal intersects the current MR x NR submatrix, we compute it the temporary buffer and then add in the elements on or below the diagonal. Otherwise, if the submatrix is strictly above the diagonal, we compute and store as we normally would. And if we're strictly below the diagonal, we do nothing and continue. */ \ if ( bli_intersects_diag_n( diagoffc_ij, m_cur, n_cur ) ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Scale C and add the result to only the stored part. */ \ PASTEMAC(ch,xpbys_mxn_u)( diagoffc_ij, \ m_cur, n_cur, \ ct, rs_ct, cs_ct, \ beta_cast, \ c11, rs_c, cs_c ); \ } \ else if ( bli_is_strictly_above_diag_n( diagoffc_ij, m_cur, n_cur ) ) \ { \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ beta_cast, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Scale the edge of C and add the result. */ \ PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ beta_cast, \ c11, rs_c, cs_c ); \ } \ } \ } \ } \ \ /* If there is no rectangular region, then we're done. */ \ if ( n_iter_rct == 0 ) return; \ \ /* Use round-robin assignment of micropanels to threads in the 2nd and 1st loops for the remaining triangular region of C. */ \ bli_thread_range_jrir_rr( thread, n_iter_rct, 1, FALSE, &jr_start, &jr_end, &jr_inc ); \ \ /* Advance the start and end iteration offsets for the rectangular region by the number of iterations used for the triangular region. */ \ jr_start += n_iter_tri; \ jr_end += n_iter_tri; \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = jr_start; j < jr_end; j += jr_inc ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ b1 = b_cast + j * cstep_b; \ c1 = c_cast + j * cstep_c; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ /* Interior loop over the m dimension (MR rows at a time). */ \ for ( i = ir_start; i < ir_end; i += ir_inc ) \ { \ ctype* restrict a2; \ \ a1 = a_cast + i * rstep_a; \ c11 = c1 + i * rstep_c; \ \ /* No need to compute the diagonal offset for the rectangular region. */ \ /*diagoffc_ij = diagoffc - (doff_t)j*NR + (doff_t)i*MR;*/ \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = bli_herk_get_next_a_upanel( a1, rstep_a, ir_inc ); \ if ( bli_is_last_iter_rr( i, m_iter, ir_tid, ir_nt ) ) \ { \ a2 = a_cast; \ b2 = bli_herk_get_next_b_upanel( b1, cstep_b, jr_inc ); \ if ( bli_is_last_iter_rr( j, n_iter, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* If the diagonal intersects the current MR x NR submatrix, we compute it the temporary buffer and then add in the elements on or below the diagonal. Otherwise, if the submatrix is strictly above the diagonal, we compute and store as we normally would. And if we're strictly below the diagonal, we do nothing and continue. */ \ { \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ beta_cast, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Scale the edge of C and add the result. */ \ PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ beta_cast, \ c11, rs_c, cs_c ); \ } \ } \ } \ } \ } INSERT_GENTFUNC_BASIC0( herk_u_ker_var2rr ) blis-0.6.1/frame/3/herk/other/bli_herk_u_ker_var2sl.c000066400000000000000000000417571360743507500224060ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T herk_fp typedef void (*FUNCPTR_T) ( doff_t diagoffc, pack_t schema_a, pack_t schema_b, dim_t m, dim_t n, dim_t k, void* alpha, void* a, inc_t cs_a, inc_t is_a, dim_t pd_a, inc_t ps_a, void* b, inc_t rs_b, inc_t is_b, dim_t pd_b, inc_t ps_b, void* beta, void* c, inc_t rs_c, inc_t cs_c, cntx_t* cntx, rntm_t* rntm, thrinfo_t* thread ); static FUNCPTR_T GENARRAY(ftypes,herk_u_ker_var2sl); // // -- Macrokernel functions for slab partitioning ------------------------------ // void bli_herk_u_ker_var2sl ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { num_t dt_exec = bli_obj_exec_dt( c ); doff_t diagoffc = bli_obj_diag_offset( c ); pack_t schema_a = bli_obj_pack_schema( a ); pack_t schema_b = bli_obj_pack_schema( b ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width( a ); void* buf_a = bli_obj_buffer_at_off( a ); inc_t cs_a = bli_obj_col_stride( a ); inc_t is_a = bli_obj_imag_stride( a ); dim_t pd_a = bli_obj_panel_dim( a ); inc_t ps_a = bli_obj_panel_stride( a ); void* buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b = bli_obj_row_stride( b ); inc_t is_b = bli_obj_imag_stride( b ); dim_t pd_b = bli_obj_panel_dim( b ); inc_t ps_b = bli_obj_panel_stride( b ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); obj_t scalar_a; obj_t scalar_b; void* buf_alpha; void* buf_beta; FUNCPTR_T f; // Detach and multiply the scalars attached to A and B. bli_obj_scalar_detach( a, &scalar_a ); bli_obj_scalar_detach( b, &scalar_b ); bli_mulsc( &scalar_a, &scalar_b ); // Grab the addresses of the internal scalar buffers for the scalar // merged above and the scalar attached to C. buf_alpha = bli_obj_internal_scalar_buffer( &scalar_b ); buf_beta = bli_obj_internal_scalar_buffer( c ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Invoke the function. f( diagoffc, schema_a, schema_b, m, n, k, buf_alpha, buf_a, cs_a, is_a, pd_a, ps_a, buf_b, rs_b, is_b, pd_b, ps_b, buf_beta, buf_c, rs_c, cs_c, cntx, rntm, thread ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ doff_t diagoffc, \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha, \ void* a, inc_t cs_a, inc_t is_a, \ dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, inc_t is_b, \ dim_t pd_b, inc_t ps_b, \ void* beta, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* thread \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ /* Alias some constants to simpler names. */ \ const dim_t MR = pd_a; \ const dim_t NR = pd_b; \ /*const dim_t PACKMR = cs_a;*/ \ /*const dim_t PACKNR = rs_b;*/ \ \ /* Query the context for the micro-kernel address and cast it to its function pointer type. */ \ PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the original C matrix. For example, if C is column-stored, ct will be column-stored as well. */ \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict a_cast = a; \ ctype* restrict b_cast = b; \ ctype* restrict c_cast = c; \ ctype* restrict alpha_cast = alpha; \ ctype* restrict beta_cast = beta; \ ctype* restrict b1; \ ctype* restrict c1; \ \ doff_t diagoffc_ij; \ dim_t m_iter, m_left; \ dim_t n_iter, n_left; \ dim_t m_cur; \ dim_t n_cur; \ dim_t i, j, jp; \ inc_t rstep_a; \ inc_t cstep_b; \ inc_t rstep_c, cstep_c; \ auxinfo_t aux; \ \ /* Assumptions/assertions: rs_a == 1 cs_a == PACKMR pd_a == MR ps_a == stride to next micro-panel of A rs_b == PACKNR cs_b == 1 pd_b == NR ps_b == stride to next micro-panel of B rs_c == (no assumptions) cs_c == (no assumptions) */ \ \ /* If any dimension is zero, return immediately. */ \ if ( bli_zero_dim3( m, n, k ) ) return; \ \ /* Safeguard: If the current panel of C is entirely below the diagonal, it is not stored. So we do nothing. */ \ if ( bli_is_strictly_below_diag_n( diagoffc, m, n ) ) return; \ \ /* If there is a zero region to the left of where the diagonal of C intersects the top edge of the panel, adjust the pointer to C and B and treat this case as if the diagonal offset were zero. NOTE: It's possible that after this pruning that the diagonal offset is still positive (though it is guaranteed to be less than NR). */ \ if ( diagoffc > 0 ) \ { \ jp = diagoffc / NR; \ j = jp * NR; \ n = n - j; \ diagoffc = diagoffc % NR; \ c_cast = c_cast + (j )*cs_c; \ b_cast = b_cast + (jp )*ps_b; \ } \ \ /* If there is a zero region below where the diagonal of C intersects the right edge of the panel, shrink it to prevent "no-op" iterations from executing. */ \ if ( -diagoffc + n < m ) \ { \ m = -diagoffc + n; \ } \ \ /* Clear the temporary C buffer in case it has any infs or NaNs. */ \ PASTEMAC(ch,set0s_mxn)( MR, NR, \ ct, rs_ct, cs_ct ); \ \ /* Compute number of primary and leftover components of the m and n dimensions. */ \ n_iter = n / NR; \ n_left = n % NR; \ \ m_iter = m / MR; \ m_left = m % MR; \ \ if ( n_left ) ++n_iter; \ if ( m_left ) ++m_iter; \ \ /* Determine some increments used to step through A, B, and C. */ \ rstep_a = ps_a; \ \ cstep_b = ps_b; \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ \ /* Save the pack schemas of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_schema_a( schema_a, &aux ); \ bli_auxinfo_set_schema_b( schema_b, &aux ); \ \ /* Save the imaginary stride of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( is_a, &aux ); \ bli_auxinfo_set_is_b( is_b, &aux ); \ \ /* The 'thread' argument points to the thrinfo_t node for the 2nd (jr) loop around the microkernel. Here we query the thrinfo_t node for the 1st (ir) loop around the microkernel. */ \ thrinfo_t* caucus = bli_thrinfo_sub_node( thread ); \ \ /* Query the number of threads and thread ids for each loop. */ \ dim_t jr_nt = bli_thread_n_way( thread ); \ dim_t jr_tid = bli_thread_work_id( thread ); \ dim_t ir_nt = bli_thread_n_way( caucus ); \ dim_t ir_tid = bli_thread_work_id( caucus ); \ \ dim_t jr_start, jr_end; \ dim_t ir_start, ir_end; \ dim_t jr_inc, ir_inc; \ \ /* Note that we partition the 2nd loop into two regions: the triangular part of C, and the rectangular portion. */ \ dim_t n_iter_tri; \ dim_t n_iter_rct; \ \ if ( bli_is_strictly_above_diag_n( diagoffc, m, n ) ) \ { \ /* If the entire panel of C does not intersect the diagonal, there is no triangular region, and therefore we can skip the first set of loops. */ \ n_iter_tri = 0; \ n_iter_rct = n_iter; \ } \ else \ { \ /* If the panel of C does intersect the diagonal, compute the number of iterations in the triangular (or trapezoidal) region by dividing NR into the number of rows in C. A non-zero remainder means we need to add one additional iteration. That is, we want the triangular region to contain as few columns of whole microtiles as possible while still including all microtiles that intersect the diagonal. The number of iterations in the rectangular region is computed as the remaining number of iterations in the n dimension. */ \ n_iter_tri = ( m + diagoffc ) / NR + ( ( m + diagoffc ) % NR ? 1 : 0 ); \ n_iter_rct = n_iter - n_iter_tri; \ } \ \ /* Use round-robin assignment of micropanels to threads in the 2nd loop and slab partitioning in the 1st loop for the initial triangular region of C (if it exists). */ \ bli_thread_range_jrir_rr( thread, n_iter_tri, 1, FALSE, &jr_start, &jr_end, &jr_inc ); \ bli_thread_range_jrir_sl( caucus, m_iter, 1, FALSE, &ir_start, &ir_end, &ir_inc ); \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = jr_start; j < jr_end; j += jr_inc ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ b1 = b_cast + j * cstep_b; \ c1 = c_cast + j * cstep_c; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ /* Interior loop over the m dimension (MR rows at a time). */ \ for ( i = ir_start; i < ir_end; i += ir_inc ) \ { \ ctype* restrict a2; \ \ a1 = a_cast + i * rstep_a; \ c11 = c1 + i * rstep_c; \ \ /* Compute the diagonal offset for the submatrix at (i,j). */ \ diagoffc_ij = diagoffc - (doff_t)j*NR + (doff_t)i*MR; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = bli_herk_get_next_a_upanel( a1, rstep_a, ir_inc ); \ if ( bli_is_last_iter_sl( i, m_iter, ir_tid, ir_nt ) ) \ { \ a2 = a_cast; \ b2 = bli_herk_get_next_b_upanel( b1, cstep_b, jr_inc ); \ if ( bli_is_last_iter_rr( j, n_iter, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* If the diagonal intersects the current MR x NR submatrix, we compute it the temporary buffer and then add in the elements on or below the diagonal. Otherwise, if the submatrix is strictly above the diagonal, we compute and store as we normally would. And if we're strictly below the diagonal, we do nothing and continue. */ \ if ( bli_intersects_diag_n( diagoffc_ij, m_cur, n_cur ) ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Scale C and add the result to only the stored part. */ \ PASTEMAC(ch,xpbys_mxn_u)( diagoffc_ij, \ m_cur, n_cur, \ ct, rs_ct, cs_ct, \ beta_cast, \ c11, rs_c, cs_c ); \ } \ else if ( bli_is_strictly_above_diag_n( diagoffc_ij, m_cur, n_cur ) ) \ { \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ beta_cast, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Scale the edge of C and add the result. */ \ PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ beta_cast, \ c11, rs_c, cs_c ); \ } \ } \ } \ } \ \ /* If there is no rectangular region, then we're done. */ \ if ( n_iter_rct == 0 ) return; \ \ /* Use slab assignment of micropanels to threads in the 2nd and 1st loops loop for the remaining triangular region of C. */ \ bli_thread_range_jrir_sl( thread, n_iter_rct, 1, FALSE, &jr_start, &jr_end, &jr_inc ); \ \ /* Advance the start and end iteration offsets for the rectangular region by the number of iterations used for the triangular region. */ \ jr_start += n_iter_tri; \ jr_end += n_iter_tri; \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = jr_start; j < jr_end; j += jr_inc ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ b1 = b_cast + j * cstep_b; \ c1 = c_cast + j * cstep_c; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ /* Interior loop over the m dimension (MR rows at a time). */ \ for ( i = ir_start; i < ir_end; i += ir_inc ) \ { \ ctype* restrict a2; \ \ a1 = a_cast + i * rstep_a; \ c11 = c1 + i * rstep_c; \ \ /* No need to compute the diagonal offset for the rectangular region. */ \ /*diagoffc_ij = diagoffc - (doff_t)j*NR + (doff_t)i*MR;*/ \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = bli_herk_get_next_a_upanel( a1, rstep_a, ir_inc ); \ if ( bli_is_last_iter_sl( i, m_iter, ir_tid, ir_nt ) ) \ { \ a2 = a_cast; \ b2 = bli_herk_get_next_b_upanel( b1, cstep_b, jr_inc ); \ if ( bli_is_last_iter_sl( j, n_iter, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* If the diagonal intersects the current MR x NR submatrix, we compute it the temporary buffer and then add in the elements on or below the diagonal. Otherwise, if the submatrix is strictly above the diagonal, we compute and store as we normally would. And if we're strictly below the diagonal, we do nothing and continue. */ \ { \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ beta_cast, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Scale the edge of C and add the result. */ \ PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ beta_cast, \ c11, rs_c, cs_c ); \ } \ } \ } \ } \ } INSERT_GENTFUNC_BASIC0( herk_u_ker_var2sl ) blis-0.6.1/frame/3/old/000077500000000000000000000000001360743507500144735ustar00rootroot00000000000000blis-0.6.1/frame/3/old/bli_l3_sup_edge.h000066400000000000000000000076521360743507500176750ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ static void bli_dgemmsup_ker_edge_dispatcher ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx, const dim_t num_mr, const dim_t num_nr, dim_t* restrict mrs, dim_t* restrict nrs, dgemmsup_ker_ft* kmap ) { #if 1 // outer loop = mr; inner loop = nr dim_t n_left = n0; double* restrict cj = c; double* restrict bj = b; for ( dim_t j = 0; n_left != 0; ++j ) { const dim_t nr_cur = nrs[ j ]; if ( nr_cur <= n_left ) { dim_t m_left = m0; double* restrict cij = cj; double* restrict ai = a; for ( dim_t i = 0; m_left != 0; ++i ) { const dim_t mr_cur = mrs[ i ]; if ( mr_cur <= m_left ) { dgemmsup_ker_ft ker_fp = kmap[ i*num_nr + j*1 ]; ker_fp ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += mr_cur*rs_c0; ai += mr_cur*rs_a0; m_left -= mr_cur; } } cj += nr_cur*cs_c0; bj += nr_cur*cs_b0; n_left -= nr_cur; } } #else // outer loop = nr; inner loop = mr dim_t m_left = m0; double* restrict ci = c; double* restrict ai = a; for ( dim_t i = 0; m_left != 0; ++i ) { const dim_t mr_cur = mrs[ i ]; if ( mr_cur <= m_left ) { dim_t n_left = n0; double* restrict cij = ci; double* restrict bj = b; for ( dim_t j = 0; n_left != 0; ++j ) { const dim_t nr_cur = nrs[ j ]; if ( nr_cur <= n_left ) { dgemmsup_ker_ft ker_fp = kmap[ i*num_nr + j*1 ]; ker_fp ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += nr_cur*cs_c0; bj += nr_cur*cs_b0; n_left -= nr_cur; } } ci += mr_cur*rs_c0; ai += mr_cur*rs_a0; m_left -= mr_cur; } } #endif } blis-0.6.1/frame/3/old/bli_l3_sup_var1n2m.c000066400000000000000000000602601360743507500202440ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T gemmsup_fp typedef void (*FUNCPTR_T) ( conj_t conja, conj_t conjb, dim_t m, dim_t n, dim_t k, void* restrict alpha, void* restrict a, inc_t rs_a, inc_t cs_a, void* restrict b, inc_t rs_b, inc_t cs_b, void* restrict beta, void* restrict c, inc_t rs_c, inc_t cs_c, stor3_t eff_id, cntx_t* restrict cntx, rntm_t* restrict rntm, cntl_t* restrict cntl, thrinfo_t* restrict thread ); // // -- var1n -------------------------------------------------------------------- // static FUNCPTR_T GENARRAY(ftypes_var1n,gemmsup_ref_var1n); void bli_gemmsup_ref_var1n ( trans_t trans, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, stor3_t eff_id, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { #if 0 obj_t at, bt; bli_obj_alias_to( a, &at ); bli_obj_alias_to( b, &bt ); // Induce transpositions on A and/or B if either object is marked for // transposition. We can induce "fast" transpositions since they objects // are guaranteed to not have structure or be packed. if ( bli_obj_has_trans( &at ) ) { bli_obj_induce_fast_trans( &at ); } if ( bli_obj_has_trans( &bt ) ) { bli_obj_induce_fast_trans( &bt ); } const num_t dt_exec = bli_obj_dt( c ); const conj_t conja = bli_obj_conj_status( a ); const conj_t conjb = bli_obj_conj_status( b ); const dim_t m = bli_obj_length( c ); const dim_t n = bli_obj_width( c ); const dim_t k = bli_obj_width( &at ); void* restrict buf_a = bli_obj_buffer_at_off( &at ); const inc_t rs_a = bli_obj_row_stride( &at ); const inc_t cs_a = bli_obj_col_stride( &at ); void* restrict buf_b = bli_obj_buffer_at_off( &bt ); const inc_t rs_b = bli_obj_row_stride( &bt ); const inc_t cs_b = bli_obj_col_stride( &bt ); void* restrict buf_c = bli_obj_buffer_at_off( c ); const inc_t rs_c = bli_obj_row_stride( c ); const inc_t cs_c = bli_obj_col_stride( c ); void* restrict buf_alpha = bli_obj_buffer_for_1x1( dt_exec, alpha ); void* restrict buf_beta = bli_obj_buffer_for_1x1( dt_exec, beta ); #else const num_t dt_exec = bli_obj_dt( c ); const conj_t conja = bli_obj_conj_status( a ); const conj_t conjb = bli_obj_conj_status( b ); const dim_t m = bli_obj_length( c ); const dim_t n = bli_obj_width( c ); dim_t k; void* restrict buf_a = bli_obj_buffer_at_off( a ); inc_t rs_a; inc_t cs_a; void* restrict buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b; inc_t cs_b; if ( bli_obj_has_notrans( a ) ) { k = bli_obj_width( a ); rs_a = bli_obj_row_stride( a ); cs_a = bli_obj_col_stride( a ); } else // if ( bli_obj_has_trans( a ) ) { // Assign the variables with an implicit transposition. k = bli_obj_length( a ); rs_a = bli_obj_col_stride( a ); cs_a = bli_obj_row_stride( a ); } if ( bli_obj_has_notrans( b ) ) { rs_b = bli_obj_row_stride( b ); cs_b = bli_obj_col_stride( b ); } else // if ( bli_obj_has_trans( b ) ) { // Assign the variables with an implicit transposition. rs_b = bli_obj_col_stride( b ); cs_b = bli_obj_row_stride( b ); } void* restrict buf_c = bli_obj_buffer_at_off( c ); const inc_t rs_c = bli_obj_row_stride( c ); const inc_t cs_c = bli_obj_col_stride( c ); void* restrict buf_alpha = bli_obj_buffer_for_1x1( dt_exec, alpha ); void* restrict buf_beta = bli_obj_buffer_for_1x1( dt_exec, beta ); #endif // Index into the type combination array to extract the correct // function pointer. FUNCPTR_T f = ftypes_var1n[dt_exec]; if ( bli_is_notrans( trans ) ) { // Invoke the function. f ( conja, conjb, m, n, k, buf_alpha, buf_a, rs_a, cs_a, buf_b, rs_b, cs_b, buf_beta, buf_c, rs_c, cs_c, eff_id, cntx, rntm, cntl, thread ); } else { // Invoke the function (transposing the operation). f ( conjb, // swap the conj values. conja, n, // swap the m and n dimensions. m, k, buf_alpha, buf_b, cs_b, rs_b, // swap the positions of A and B. buf_a, cs_a, rs_a, // swap the strides of A and B. buf_beta, buf_c, cs_c, rs_c, // swap the strides of C. bli_stor3_trans( eff_id ), // transpose the stor3_t id. cntx, rntm, cntl, thread ); } } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ conj_t conja, \ conj_t conjb, \ dim_t m, \ dim_t n, \ dim_t k, \ void* restrict alpha, \ void* restrict a, inc_t rs_a, inc_t cs_a, \ void* restrict b, inc_t rs_b, inc_t cs_b, \ void* restrict beta, \ void* restrict c, inc_t rs_c, inc_t cs_c, \ stor3_t stor_id, \ cntx_t* restrict cntx, \ rntm_t* restrict rntm, \ cntl_t* restrict cntl, \ thrinfo_t* restrict thread \ ) \ { \ /* If m or n is zero, return immediately. */ \ if ( bli_zero_dim2( m, n ) ) return; \ \ /* If k < 1 or alpha is zero, scale by beta and return. */ \ if ( k < 1 || PASTEMAC(ch,eq0)( *(( ctype* )alpha) ) ) \ { \ PASTEMAC(ch,scalm) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m, n, \ beta, \ c, rs_c, cs_c \ ); \ return; \ } \ \ const num_t dt = PASTEMAC(ch,type); \ \ /* This transposition of the stor3_t id value is inherent to variant 1. The reason: we assume that variant 2 is the "main" variant. The consequence of this is that we assume that the millikernels that iterate over m are registered to the kernel group associated with the kernel preference. So, regardless of whether the mkernels are row- or column-preferential, millikernels that iterate over n are always placed in the slots for the opposite kernel group. */ \ stor_id = bli_stor3_trans( stor_id ); \ \ /* Query the context for various blocksizes. */ \ const dim_t NR = bli_cntx_get_l3_sup_blksz_def_dt( dt, BLIS_NR, cntx ); \ const dim_t MR = bli_cntx_get_l3_sup_blksz_def_dt( dt, BLIS_MR, cntx ); \ const dim_t NC0 = bli_cntx_get_l3_sup_blksz_def_dt( dt, BLIS_NC, cntx ); \ const dim_t MC0 = bli_cntx_get_l3_sup_blksz_def_dt( dt, BLIS_MC, cntx ); \ const dim_t KC0 = bli_cntx_get_l3_sup_blksz_def_dt( dt, BLIS_KC, cntx ); \ \ dim_t KC; \ if ( FALSE ) KC = KC0; \ else if ( stor_id == BLIS_RRC || \ stor_id == BLIS_CRC ) KC = KC0; \ else if ( m <= MR && n <= NR ) KC = KC0; \ else if ( m <= 2*MR && n <= 2*NR ) KC = KC0 / 2; \ else if ( m <= 3*MR && n <= 3*NR ) KC = (( KC0 / 3 ) / 4 ) * 4; \ else if ( m <= 4*MR && n <= 4*NR ) KC = KC0 / 4; \ else KC = (( KC0 / 5 ) / 4 ) * 4; \ \ /* Nudge NC up to a multiple of MR and MC up to a multiple of NR. */ \ const dim_t NC = bli_align_dim_to_mult( NC0, MR ); \ const dim_t MC = bli_align_dim_to_mult( MC0, NR ); \ \ /* Query the maximum blocksize for MR, which implies a maximum blocksize extension for the final iteration. */ \ const dim_t MRM = bli_cntx_get_l3_sup_blksz_max_dt( dt, BLIS_MR, cntx ); \ const dim_t MRE = MRM - MR; \ \ /* Compute partitioning step values for each matrix of each loop. */ \ const inc_t jcstep_c = rs_c * NC; \ const inc_t jcstep_a = rs_a * NC; \ \ const inc_t pcstep_a = cs_a * KC; \ const inc_t pcstep_b = rs_b * KC; \ \ const inc_t icstep_c = cs_c * MC; \ const inc_t icstep_b = cs_b * MC; \ \ const inc_t jrstep_c = rs_c * MR; \ const inc_t jrstep_a = rs_a * MR; \ \ /* const inc_t irstep_c = cs_c * NR; \ const inc_t irstep_b = cs_b * NR; \ */ \ \ /* Query the context for the sup microkernel address and cast it to its function pointer type. */ \ PASTECH(ch,gemmsup_ker_ft) \ gemmsup_ker = bli_cntx_get_l3_sup_ker_dt( dt, stor_id, cntx ); \ \ ctype* restrict a_00 = a; \ ctype* restrict b_00 = b; \ ctype* restrict c_00 = c; \ ctype* restrict alpha_cast = alpha; \ ctype* restrict beta_cast = beta; \ \ ctype* restrict one = PASTEMAC(ch,1); \ \ auxinfo_t aux; \ \ /* Compute number of primary and leftover components of the outer dimensions. NOTE: Functionally speaking, we compute jc_iter as: jc_iter = m / NC; if ( jc_left ) ++jc_iter; However, this is implemented as: jc_iter = ( m + NC - 1 ) / NC; This avoids a branch at the cost of two additional integer instructions. The pc_iter, mc_iter, nr_iter, and mr_iter variables are computed in similar manner. */ \ const dim_t jc_iter = ( m + NC - 1 ) / NC; \ const dim_t jc_left = m % NC; \ \ const dim_t pc_iter = ( k + KC - 1 ) / KC; \ const dim_t pc_left = k % KC; \ \ const dim_t ic_iter = ( n + MC - 1 ) / MC; \ const dim_t ic_left = n % MC; \ \ const dim_t jc_inc = 1; \ const dim_t pc_inc = 1; \ const dim_t ic_inc = 1; \ const dim_t jr_inc = 1; \ /* const dim_t ir_inc = 1; \ */ \ \ /* Loop over the m dimension (NC rows/columns at a time). */ \ for ( dim_t jj = 0; jj < jc_iter; jj += jc_inc ) \ { \ const dim_t nc_cur = ( bli_is_not_edge_f( jj, jc_iter, jc_left ) ? NC : jc_left ); \ \ ctype* restrict a_jc = a_00 + jj * jcstep_a; \ ctype* restrict c_jc = c_00 + jj * jcstep_c; \ \ dim_t jr_iter = ( nc_cur + MR - 1 ) / MR; \ dim_t jr_left = nc_cur % MR; \ \ /* An optimization: allow the last jr iteration to contain up to MRE rows of C and A. (If MRE > MR, the mkernel has agreed to handle these cases.) Note that this prevents us from declaring jr_iter and jr_left as const. */ \ if ( 1 ) \ if ( MRE != 0 && 1 < jr_iter && jr_left != 0 && jr_left <= MRE ) \ { \ jr_iter--; jr_left += MR; \ } \ \ /* Loop over the k dimension (KC rows/columns at a time). */ \ for ( dim_t pp = 0; pp < pc_iter; pp += pc_inc ) \ { \ const dim_t kc_cur = ( bli_is_not_edge_f( pp, pc_iter, pc_left ) ? KC : pc_left ); \ \ ctype* restrict a_pc = a_jc + pp * pcstep_a; \ ctype* restrict b_pc = b_00 + pp * pcstep_b; \ \ /* Only apply beta to the first iteration of the pc loop. */ \ ctype* restrict beta_use = ( pp == 0 ? beta_cast : one ); \ \ /* Loop over the n dimension (MC rows at a time). */ \ for ( dim_t ii = 0; ii < ic_iter; ii += ic_inc ) \ { \ const dim_t mc_cur = ( bli_is_not_edge_f( ii, ic_iter, ic_left ) ? MC : ic_left ); \ \ ctype* restrict b_ic = b_pc + ii * icstep_b; \ ctype* restrict c_ic = c_jc + ii * icstep_c; \ \ /* const dim_t ir_iter = ( mc_cur + NR - 1 ) / NR; \ const dim_t ir_left = mc_cur % NR; \ */ \ \ /* Loop over the m dimension (NR columns at a time). */ \ for ( dim_t j = 0; j < jr_iter; j += jr_inc ) \ { \ const dim_t nr_cur = ( bli_is_not_edge_f( j, jr_iter, jr_left ) ? MR : jr_left ); \ \ ctype* restrict a_jr = a_pc + j * jrstep_a; \ ctype* restrict c_jr = c_ic + j * jrstep_c; \ \ /* Loop over the n dimension (MR rows at a time). */ \ { \ /* Invoke the gemmsup millikernel. */ \ gemmsup_ker \ ( \ conja, \ conjb, \ nr_cur, /* Notice: nr_cur <= MR. */ \ mc_cur, /* Recall: mc_cur partitions the n dimension! */ \ kc_cur, \ alpha_cast, \ a_jr, rs_a, cs_a, \ b_ic, rs_b, cs_b, \ beta_use, \ c_jr, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ } \ } \ } \ } \ \ /* PASTEMAC(ch,fprintm)( stdout, "gemmsup_ref_var2: b1", kc_cur, nr_cur, b_jr, rs_b, cs_b, "%4.1f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "gemmsup_ref_var2: a1", mr_cur, kc_cur, a_ir, rs_a, cs_a, "%4.1f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "gemmsup_ref_var2: c ", mr_cur, nr_cur, c_ir, rs_c, cs_c, "%4.1f", "" ); \ */ \ } INSERT_GENTFUNC_BASIC0( gemmsup_ref_var1n ) // // -- var2m -------------------------------------------------------------------- // static FUNCPTR_T GENARRAY(ftypes_var2m,gemmsup_ref_var2m); void bli_gemmsup_ref_var2m ( trans_t trans, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, stor3_t eff_id, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { #if 0 obj_t at, bt; bli_obj_alias_to( a, &at ); bli_obj_alias_to( b, &bt ); // Induce transpositions on A and/or B if either object is marked for // transposition. We can induce "fast" transpositions since they objects // are guaranteed to not have structure or be packed. if ( bli_obj_has_trans( &at ) ) { bli_obj_induce_fast_trans( &at ); } if ( bli_obj_has_trans( &bt ) ) { bli_obj_induce_fast_trans( &bt ); } const num_t dt_exec = bli_obj_dt( c ); const conj_t conja = bli_obj_conj_status( a ); const conj_t conjb = bli_obj_conj_status( b ); const dim_t m = bli_obj_length( c ); const dim_t n = bli_obj_width( c ); const dim_t k = bli_obj_width( &at ); void* restrict buf_a = bli_obj_buffer_at_off( &at ); const inc_t rs_a = bli_obj_row_stride( &at ); const inc_t cs_a = bli_obj_col_stride( &at ); void* restrict buf_b = bli_obj_buffer_at_off( &bt ); const inc_t rs_b = bli_obj_row_stride( &bt ); const inc_t cs_b = bli_obj_col_stride( &bt ); void* restrict buf_c = bli_obj_buffer_at_off( c ); const inc_t rs_c = bli_obj_row_stride( c ); const inc_t cs_c = bli_obj_col_stride( c ); void* restrict buf_alpha = bli_obj_buffer_for_1x1( dt_exec, alpha ); void* restrict buf_beta = bli_obj_buffer_for_1x1( dt_exec, beta ); #else const num_t dt_exec = bli_obj_dt( c ); const conj_t conja = bli_obj_conj_status( a ); const conj_t conjb = bli_obj_conj_status( b ); const dim_t m = bli_obj_length( c ); const dim_t n = bli_obj_width( c ); dim_t k; void* restrict buf_a = bli_obj_buffer_at_off( a ); inc_t rs_a; inc_t cs_a; void* restrict buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b; inc_t cs_b; if ( bli_obj_has_notrans( a ) ) { k = bli_obj_width( a ); rs_a = bli_obj_row_stride( a ); cs_a = bli_obj_col_stride( a ); } else // if ( bli_obj_has_trans( a ) ) { // Assign the variables with an implicit transposition. k = bli_obj_length( a ); rs_a = bli_obj_col_stride( a ); cs_a = bli_obj_row_stride( a ); } if ( bli_obj_has_notrans( b ) ) { rs_b = bli_obj_row_stride( b ); cs_b = bli_obj_col_stride( b ); } else // if ( bli_obj_has_trans( b ) ) { // Assign the variables with an implicit transposition. rs_b = bli_obj_col_stride( b ); cs_b = bli_obj_row_stride( b ); } void* restrict buf_c = bli_obj_buffer_at_off( c ); const inc_t rs_c = bli_obj_row_stride( c ); const inc_t cs_c = bli_obj_col_stride( c ); void* restrict buf_alpha = bli_obj_buffer_for_1x1( dt_exec, alpha ); void* restrict buf_beta = bli_obj_buffer_for_1x1( dt_exec, beta ); #endif // Index into the type combination array to extract the correct // function pointer. FUNCPTR_T f = ftypes_var2m[dt_exec]; if ( bli_is_notrans( trans ) ) { // Invoke the function. f ( conja, conjb, m, n, k, buf_alpha, buf_a, rs_a, cs_a, buf_b, rs_b, cs_b, buf_beta, buf_c, rs_c, cs_c, eff_id, cntx, rntm, cntl, thread ); } else { // Invoke the function (transposing the operation). f ( conjb, // swap the conj values. conja, n, // swap the m and n dimensions. m, k, buf_alpha, buf_b, cs_b, rs_b, // swap the positions of A and B. buf_a, cs_a, rs_a, // swap the strides of A and B. buf_beta, buf_c, cs_c, rs_c, // swap the strides of C. bli_stor3_trans( eff_id ), // transpose the stor3_t id. cntx, rntm, cntl, thread ); } } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ conj_t conja, \ conj_t conjb, \ dim_t m, \ dim_t n, \ dim_t k, \ void* restrict alpha, \ void* restrict a, inc_t rs_a, inc_t cs_a, \ void* restrict b, inc_t rs_b, inc_t cs_b, \ void* restrict beta, \ void* restrict c, inc_t rs_c, inc_t cs_c, \ stor3_t stor_id, \ cntx_t* restrict cntx, \ rntm_t* restrict rntm, \ cntl_t* restrict cntl, \ thrinfo_t* restrict thread \ ) \ { \ /* If m or n is zero, return immediately. */ \ if ( bli_zero_dim2( m, n ) ) return; \ \ /* If k < 1 or alpha is zero, scale by beta and return. */ \ if ( k < 1 || PASTEMAC(ch,eq0)( *(( ctype* )alpha) ) ) \ { \ PASTEMAC(ch,scalm) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m, n, \ beta, \ c, rs_c, cs_c \ ); \ return; \ } \ \ const num_t dt = PASTEMAC(ch,type); \ \ /* Query the context for various blocksizes. */ \ const dim_t NR = bli_cntx_get_l3_sup_blksz_def_dt( dt, BLIS_NR, cntx ); \ const dim_t MR = bli_cntx_get_l3_sup_blksz_def_dt( dt, BLIS_MR, cntx ); \ const dim_t NC = bli_cntx_get_l3_sup_blksz_def_dt( dt, BLIS_NC, cntx ); \ const dim_t MC = bli_cntx_get_l3_sup_blksz_def_dt( dt, BLIS_MC, cntx ); \ const dim_t KC0 = bli_cntx_get_l3_sup_blksz_def_dt( dt, BLIS_KC, cntx ); \ \ dim_t KC; \ if ( stor_id == BLIS_RRR || \ stor_id == BLIS_CCC ) KC = KC0; \ else if ( stor_id == BLIS_RRC || \ stor_id == BLIS_CRC ) KC = KC0; \ else if ( m <= MR && n <= NR ) KC = KC0; \ else if ( m <= 2*MR && n <= 2*NR ) KC = KC0 / 2; \ else if ( m <= 3*MR && n <= 3*NR ) KC = (( KC0 / 3 ) / 4 ) * 4; \ else if ( m <= 4*MR && n <= 4*NR ) KC = KC0 / 4; \ else KC = (( KC0 / 5 ) / 4 ) * 4; \ \ /* Query the maximum blocksize for NR, which implies a maximum blocksize extension for the final iteration. */ \ const dim_t NRM = bli_cntx_get_l3_sup_blksz_max_dt( dt, BLIS_NR, cntx ); \ const dim_t NRE = NRM - NR; \ \ /* Compute partitioning step values for each matrix of each loop. */ \ const inc_t jcstep_c = cs_c * NC; \ const inc_t jcstep_b = cs_b * NC; \ \ const inc_t pcstep_a = cs_a * KC; \ const inc_t pcstep_b = rs_b * KC; \ \ const inc_t icstep_c = rs_c * MC; \ const inc_t icstep_a = rs_a * MC; \ \ const inc_t jrstep_c = cs_c * NR; \ const inc_t jrstep_b = cs_b * NR; \ \ /* const inc_t irstep_c = rs_c * MR; \ const inc_t irstep_a = rs_a * MR; \ */ \ \ /* Query the context for the sup microkernel address and cast it to its function pointer type. */ \ PASTECH(ch,gemmsup_ker_ft) \ gemmsup_ker = bli_cntx_get_l3_sup_ker_dt( dt, stor_id, cntx ); \ \ ctype* restrict a_00 = a; \ ctype* restrict b_00 = b; \ ctype* restrict c_00 = c; \ ctype* restrict alpha_cast = alpha; \ ctype* restrict beta_cast = beta; \ \ ctype* restrict one = PASTEMAC(ch,1); \ \ auxinfo_t aux; \ \ /* Compute number of primary and leftover components of the outer dimensions. NOTE: Functionally speaking, we compute jc_iter as: jc_iter = n / NC; if ( jc_left ) ++jc_iter; However, this is implemented as: jc_iter = ( n + NC - 1 ) / NC; This avoids a branch at the cost of two additional integer instructions. The pc_iter, mc_iter, nr_iter, and mr_iter variables are computed in similar manner. */ \ const dim_t jc_iter = ( n + NC - 1 ) / NC; \ const dim_t jc_left = n % NC; \ \ const dim_t pc_iter = ( k + KC - 1 ) / KC; \ const dim_t pc_left = k % KC; \ \ const dim_t ic_iter = ( m + MC - 1 ) / MC; \ const dim_t ic_left = m % MC; \ \ const dim_t jc_inc = 1; \ const dim_t pc_inc = 1; \ const dim_t ic_inc = 1; \ const dim_t jr_inc = 1; \ /* const dim_t ir_inc = 1; \ */ \ \ /* Loop over the n dimension (NC rows/columns at a time). */ \ for ( dim_t jj = 0; jj < jc_iter; jj += jc_inc ) \ { \ const dim_t nc_cur = ( bli_is_not_edge_f( jj, jc_iter, jc_left ) ? NC : jc_left ); \ \ ctype* restrict b_jc = b_00 + jj * jcstep_b; \ ctype* restrict c_jc = c_00 + jj * jcstep_c; \ \ dim_t jr_iter = ( nc_cur + NR - 1 ) / NR; \ dim_t jr_left = nc_cur % NR; \ \ /* An optimization: allow the last jr iteration to contain up to NRE columns of C and B. (If NRE > NR, the mkernel has agreed to handle these cases.) Note that this prevents us from declaring jr_iter and jr_left as const. */ \ if ( 1 ) \ if ( NRE != 0 && 1 < jr_iter && jr_left != 0 && jr_left <= NRE ) \ { \ jr_iter--; jr_left += NR; \ } \ \ /* Loop over the k dimension (KC rows/columns at a time). */ \ for ( dim_t pp = 0; pp < pc_iter; pp += pc_inc ) \ { \ const dim_t kc_cur = ( bli_is_not_edge_f( pp, pc_iter, pc_left ) ? KC : pc_left ); \ \ ctype* restrict a_pc = a_00 + pp * pcstep_a; \ ctype* restrict b_pc = b_jc + pp * pcstep_b; \ \ /* Only apply beta to the first iteration of the pc loop. */ \ ctype* restrict beta_use = ( pp == 0 ? beta_cast : one ); \ \ /* Loop over the m dimension (MC rows at a time). */ \ for ( dim_t ii = 0; ii < ic_iter; ii += ic_inc ) \ { \ const dim_t mc_cur = ( bli_is_not_edge_f( ii, ic_iter, ic_left ) ? MC : ic_left ); \ \ ctype* restrict a_ic = a_pc + ii * icstep_a; \ ctype* restrict c_ic = c_jc + ii * icstep_c; \ \ /* const dim_t ir_iter = ( mc_cur + MR - 1 ) / MR; \ const dim_t ir_left = mc_cur % MR; \ */ \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( dim_t j = 0; j < jr_iter; j += jr_inc ) \ { \ const dim_t nr_cur = ( bli_is_not_edge_f( j, jr_iter, jr_left ) ? NR : jr_left ); \ \ ctype* restrict b_jr = b_pc + j * jrstep_b; \ ctype* restrict c_jr = c_ic + j * jrstep_c; \ \ /* Loop over the m dimension (MR rows at a time). */ \ { \ /* Invoke the gemmsup millikernel. */ \ gemmsup_ker \ ( \ conja, \ conjb, \ mc_cur, \ nr_cur, \ kc_cur, \ alpha_cast, \ a_ic, rs_a, cs_a, \ b_jr, rs_b, cs_b, \ beta_use, \ c_jr, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ } \ } \ } \ } \ \ /* PASTEMAC(ch,fprintm)( stdout, "gemmsup_ref_var2: b1", kc_cur, nr_cur, b_jr, rs_b, cs_b, "%4.1f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "gemmsup_ref_var2: a1", mr_cur, kc_cur, a_ir, rs_a, cs_a, "%4.1f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "gemmsup_ref_var2: c ", mr_cur, nr_cur, c_ir, rs_c, cs_c, "%4.1f", "" ); \ */ \ } INSERT_GENTFUNC_BASIC0( gemmsup_ref_var2m ) blis-0.6.1/frame/3/symm/000077500000000000000000000000001360743507500147025ustar00rootroot00000000000000blis-0.6.1/frame/3/symm/bli_symm.h000066400000000000000000000032461360743507500166730ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "bli_symm_front.h" blis-0.6.1/frame/3/symm/bli_symm_front.c000066400000000000000000000151311360743507500200720ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_symm_front ( side_t side, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl ) { bli_init_once(); obj_t a_local; obj_t b_local; obj_t c_local; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_symm_check( side, alpha, a, b, beta, c, cntx ); // If alpha is zero, scale by beta and return. if ( bli_obj_equals( alpha, &BLIS_ZERO ) ) { bli_scalm( beta, c ); return; } // Alias A, B, and C in case we need to apply transformations. bli_obj_alias_to( a, &a_local ); bli_obj_alias_to( b, &b_local ); bli_obj_alias_to( c, &c_local ); #ifdef BLIS_DISABLE_SYMM_RIGHT // NOTE: This case casts right-side symm in terms of left side. This is // necessary when the current subconfiguration uses a gemm microkernel // that assumes that the packing kernel will have already duplicated // (broadcast) element of B in the packed copy of B. Supporting // duplication within the logic that packs micropanels from symmetric // matrices would be ugly, and so we simply don't support it. As a // consequence, those subconfigurations need a way to force the symmetric // matrix to be on the left (and thus the general matrix to the on the // right). So our solution is that in those cases, the subconfigurations // simply #define BLIS_DISABLE_SYMM_RIGHT. // NOTE: This case casts right-side symm in terms of left side. This can // lead to the microkernel being executed on an output matrix with the // microkernel's general stride IO case (unless the microkernel supports // both both row and column IO cases as well). // If A is being multiplied from the right, transpose all operands // so that we can perform the computation as if A were being multiplied // from the left. if ( bli_is_right( side ) ) { bli_toggle_side( &side ); bli_obj_induce_trans( &a_local ); bli_obj_induce_trans( &b_local ); bli_obj_induce_trans( &c_local ); } #else // NOTE: This case computes right-side hemm/symm natively by packing // elements of the Hermitian/symmetric matrix A to micropanels of the // right-hand packed matrix operand "B", and elements of the general // matrix B to micropanels of the left-hand packed matrix operand "A". // This code path always gives us the opportunity to transpose the // entire operation so that the effective storage format of the output // matrix matches the microkernel's output preference. Thus, from a // performance perspective, this case is preferred. // An optimization: If C is stored by rows and the micro-kernel prefers // contiguous columns, or if C is stored by columns and the micro-kernel // prefers contiguous rows, transpose the entire operation to allow the // micro-kernel to access elements of C in its preferred manner. //if ( !bli_obj_is_1x1( &c_local ) ) // NOTE: This conditional should NOT // be enabled. See issue #342 comments. if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) ) { bli_toggle_side( &side ); bli_obj_induce_trans( &b_local ); bli_obj_induce_trans( &c_local ); } // If the Hermitian/symmetric matrix A is being multiplied from the right, // swap A and B so that the Hermitian/symmetric matrix will actually be on // the right. if ( bli_is_right( side ) ) { bli_obj_swap( &a_local, &b_local ); } #endif // Set each alias as the root object. // NOTE: We MUST wait until we are done potentially swapping the objects // before setting the root fields! bli_obj_set_as_root( &a_local ); bli_obj_set_as_root( &b_local ); bli_obj_set_as_root( &c_local ); // Parse and interpret the contents of the rntm_t object to properly // set the ways of parallelism for each loop, and then make any // additional modifications necessary for the current operation. bli_rntm_set_ways_for_op ( BLIS_SYMM, BLIS_LEFT, // ignored for gemm/hemm/symm bli_obj_length( &c_local ), bli_obj_width( &c_local ), bli_obj_width( &a_local ), rntm ); // A sort of hack for communicating the desired pach schemas for A and B // to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and // bli_l3_cntl_create_if()). This allows us to access the schemas from // the control tree, which hopefully reduces some confusion, particularly // in bli_packm_init(). if ( bli_cntx_method( cntx ) == BLIS_NAT ) { bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local ); bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &b_local ); } else // if ( bli_cntx_method( cntx ) != BLIS_NAT ) { pack_t schema_a = bli_cntx_schema_a_block( cntx ); pack_t schema_b = bli_cntx_schema_b_panel( cntx ); bli_obj_set_pack_schema( schema_a, &a_local ); bli_obj_set_pack_schema( schema_b, &b_local ); } // Invoke the internal back-end. bli_l3_thread_decorator ( bli_gemm_int, BLIS_GEMM, // operation family id alpha, &a_local, &b_local, beta, &c_local, cntx, rntm, cntl ); } blis-0.6.1/frame/3/symm/bli_symm_front.h000066400000000000000000000035401360743507500201000ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_symm_front ( side_t side, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl ); blis-0.6.1/frame/3/syr2k/000077500000000000000000000000001360743507500147675ustar00rootroot00000000000000blis-0.6.1/frame/3/syr2k/bli_syr2k.h000066400000000000000000000032471360743507500170460ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "bli_syr2k_front.h" blis-0.6.1/frame/3/syr2k/bli_syr2k_front.c000066400000000000000000000114501360743507500202440ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_syr2k_front ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl ) { bli_init_once(); obj_t c_local; obj_t a_local; obj_t bt_local; obj_t b_local; obj_t at_local; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_syr2k_check( alpha, a, b, beta, c, cntx ); // If alpha is zero, scale by beta and return. if ( bli_obj_equals( alpha, &BLIS_ZERO ) ) { bli_scalm( beta, c ); return; } // Alias A, B, and C in case we need to apply transformations. bli_obj_alias_to( a, &a_local ); bli_obj_alias_to( b, &b_local ); bli_obj_alias_to( c, &c_local ); bli_obj_set_as_root( &c_local ); // For syr2k, the first and second right-hand "B" operands are simply B' // and A'. bli_obj_alias_to( b, &bt_local ); bli_obj_induce_trans( &bt_local ); bli_obj_alias_to( a, &at_local ); bli_obj_induce_trans( &at_local ); // An optimization: If C is stored by rows and the micro-kernel prefers // contiguous columns, or if C is stored by columns and the micro-kernel // prefers contiguous rows, transpose the entire operation to allow the // micro-kernel to access elements of C in its preferred manner. if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) ) { bli_obj_induce_trans( &c_local ); } // Parse and interpret the contents of the rntm_t object to properly // set the ways of parallelism for each loop, and then make any // additional modifications necessary for the current operation. bli_rntm_set_ways_for_op ( BLIS_SYR2K, BLIS_LEFT, // ignored for her[2]k/syr[2]k bli_obj_length( &c_local ), bli_obj_width( &c_local ), bli_obj_width( &a_local ), rntm ); // A sort of hack for communicating the desired pach schemas for A and B // to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and // bli_l3_cntl_create_if()). This allows us to access the schemas from // the control tree, which hopefully reduces some confusion, particularly // in bli_packm_init(). if ( bli_cntx_method( cntx ) == BLIS_NAT ) { bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local ); bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &bt_local ); bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &b_local ); bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &at_local ); } else // if ( bli_cntx_method( cntx ) != BLIS_NAT ) { pack_t schema_a = bli_cntx_schema_a_block( cntx ); pack_t schema_b = bli_cntx_schema_b_panel( cntx ); bli_obj_set_pack_schema( schema_a, &a_local ); bli_obj_set_pack_schema( schema_b, &bt_local ); bli_obj_set_pack_schema( schema_a, &b_local ); bli_obj_set_pack_schema( schema_b, &at_local ); } // Invoke herk twice, using beta only the first time. // Invoke the internal back-end. bli_l3_thread_decorator ( bli_gemm_int, BLIS_HERK, // operation family id alpha, &a_local, &bt_local, beta, &c_local, cntx, rntm, cntl ); bli_l3_thread_decorator ( bli_gemm_int, BLIS_HERK, // operation family id alpha, &b_local, &at_local, &BLIS_ONE, &c_local, cntx, rntm, cntl ); } blis-0.6.1/frame/3/syr2k/bli_syr2k_front.h000066400000000000000000000035141360743507500202530ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_syr2k_front ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl ); blis-0.6.1/frame/3/syrk/000077500000000000000000000000001360743507500147055ustar00rootroot00000000000000blis-0.6.1/frame/3/syrk/bli_syrk.h000066400000000000000000000032461360743507500167010ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "bli_syrk_front.h" blis-0.6.1/frame/3/syrk/bli_syrk_front.c000066400000000000000000000106261360743507500201040ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_syrk_front ( obj_t* alpha, obj_t* a, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl ) { bli_init_once(); obj_t a_local; obj_t at_local; obj_t c_local; // Alias A and C in case we need to apply transformations. bli_obj_alias_to( a, &a_local ); bli_obj_alias_to( c, &c_local ); bli_obj_set_as_root( &c_local ); // For syrk, the right-hand "B" operand is simply A^T. bli_obj_alias_to( a, &at_local ); bli_obj_induce_trans( &at_local ); #ifdef BLIS_ENABLE_SMALL_MATRIX gint_t status = bli_syrk_small( alpha, &a_local, &at_local, beta, &c_local, cntx, cntl ); if ( status == BLIS_SUCCESS ) return; #endif // Check parameters. if ( bli_error_checking_is_enabled() ) bli_syrk_check( alpha, a, beta, c, cntx ); // If alpha is zero, scale by beta and return. if ( bli_obj_equals( alpha, &BLIS_ZERO ) ) { bli_scalm( beta, c ); return; } // An optimization: If C is stored by rows and the micro-kernel prefers // contiguous columns, or if C is stored by columns and the micro-kernel // prefers contiguous rows, transpose the entire operation to allow the // micro-kernel to access elements of C in its preferred manner. if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) ) { bli_obj_induce_trans( &c_local ); } // Parse and interpret the contents of the rntm_t object to properly // set the ways of parallelism for each loop, and then make any // additional modifications necessary for the current operation. bli_rntm_set_ways_for_op ( BLIS_SYRK, BLIS_LEFT, // ignored for her[2]k/syr[2]k bli_obj_length( &c_local ), bli_obj_width( &c_local ), bli_obj_width( &a_local ), rntm ); // A sort of hack for communicating the desired pach schemas for A and B // to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and // bli_l3_cntl_create_if()). This allows us to access the schemas from // the control tree, which hopefully reduces some confusion, particularly // in bli_packm_init(). if ( bli_cntx_method( cntx ) == BLIS_NAT ) { bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local ); bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &at_local ); } else // if ( bli_cntx_method( cntx ) != BLIS_NAT ) { pack_t schema_a = bli_cntx_schema_a_block( cntx ); pack_t schema_b = bli_cntx_schema_b_panel( cntx ); bli_obj_set_pack_schema( schema_a, &a_local ); bli_obj_set_pack_schema( schema_b, &at_local ); } // Invoke the internal back-end. bli_l3_thread_decorator ( bli_gemm_int, BLIS_HERK, // operation family id alpha, &a_local, &at_local, beta, &c_local, cntx, rntm, cntl ); } blis-0.6.1/frame/3/syrk/bli_syrk_front.h000066400000000000000000000040201360743507500201000ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_syrk_front ( obj_t* alpha, obj_t* a, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl ); #ifdef BLIS_ENABLE_SMALL_MATRIX err_t bli_syrk_small ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, cntl_t* cntl ); #endif blis-0.6.1/frame/3/trmm/000077500000000000000000000000001360743507500146745ustar00rootroot00000000000000blis-0.6.1/frame/3/trmm/bli_trmm.h000066400000000000000000000033011360743507500166470ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "bli_trmm_front.h" #include "bli_trmm_var.h" blis-0.6.1/frame/3/trmm/bli_trmm_front.c000066400000000000000000000167701360743507500200700ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_trmm_front ( side_t side, obj_t* alpha, obj_t* a, obj_t* b, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl ) { bli_init_once(); obj_t a_local; obj_t b_local; obj_t c_local; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_trmm_check( side, alpha, a, b, &BLIS_ZERO, b, cntx ); // If alpha is zero, scale by beta and return. if ( bli_obj_equals( alpha, &BLIS_ZERO ) ) { bli_scalm( alpha, b ); return; } // Alias A and B so we can tweak the objects if necessary. bli_obj_alias_to( a, &a_local ); bli_obj_alias_to( b, &b_local ); bli_obj_alias_to( b, &c_local ); // We do not explicitly implement the cases where A is transposed. // However, we can still handle them. Specifically, if A is marked as // needing a transposition, we simply induce a transposition. This // allows us to only explicitly implement the no-transpose cases. Once // the transposition is induced, the correct algorithm will be called, // since, for example, an algorithm over a transposed lower triangular // matrix A moves in the same direction (forwards) as a non-transposed // upper triangular matrix. And with the transposition induced, the // matrix now appears to be upper triangular, so the upper triangular // algorithm will grab the correct partitions, as if it were upper // triangular (with no transpose) all along. if ( bli_obj_has_trans( &a_local ) ) { bli_obj_induce_trans( &a_local ); bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, &a_local ); } #ifdef BLIS_DISABLE_TRMM_RIGHT // NOTE: This case casts right-side trmm in terms of left side. This is // necessary when the current subconfiguration uses a gemm microkernel // that assumes that the packing kernel will have already duplicated // (broadcast) element of B in the packed copy of B. Supporting // duplication within the logic that packs micropanels from triangular // matrices would be ugly, and so we simply don't support it. As a // consequence, those subconfigurations need a way to force the triangular // matrix to be on the left (and thus the general matrix to the on the // right). So our solution is that in those cases, the subconfigurations // simply #define BLIS_DISABLE_TRMM_RIGHT. // NOTE: This case casts right-side trmm in terms of left side. This can // lead to the microkernel being executed on an output matrix with the // microkernel's general stride IO case (unless the microkernel supports // both both row and column IO cases as well). // NOTE: Casting right-side trmm in terms of left side reduces the number // of macrokernels exercised to two (trmm_ll and trmm_lu). // If A is being multiplied from the right, transpose all operands // so that we can perform the computation as if A were being multiplied // from the left. if ( bli_is_right( side ) ) { bli_toggle_side( &side ); bli_obj_induce_trans( &a_local ); bli_obj_induce_trans( &b_local ); bli_obj_induce_trans( &c_local ); } #else // NOTE: This case computes right-side trmm natively with trmm_rl and // trmm_ru macrokernels. This code path always gives us the opportunity // to transpose the entire operation so that the effective storage format // of the output matrix matches the microkernel's output preference. // Thus, from a performance perspective, this case is preferred. // An optimization: If C is stored by rows and the micro-kernel prefers // contiguous columns, or if C is stored by columns and the micro-kernel // prefers contiguous rows, transpose the entire operation to allow the // micro-kernel to access elements of C in its preferred manner. // NOTE: We disable the optimization for 1x1 matrices since the concept // of row- vs. column storage breaks down. //if ( !bli_obj_is_1x1( &c_local ) ) // NOTE: This conditional should NOT // be enabled. See issue #342 comments. if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) ) { bli_toggle_side( &side ); bli_obj_induce_trans( &a_local ); bli_obj_induce_trans( &b_local ); bli_obj_induce_trans( &c_local ); } // If A is being multiplied from the right, swap A and B so that // the matrix will actually be on the right. if ( bli_is_right( side ) ) { bli_obj_swap( &a_local, &b_local ); } #endif // Set each alias as the root object. // NOTE: We MUST wait until we are done potentially swapping the objects // before setting the root fields! bli_obj_set_as_root( &a_local ); bli_obj_set_as_root( &b_local ); bli_obj_set_as_root( &c_local ); // Parse and interpret the contents of the rntm_t object to properly // set the ways of parallelism for each loop, and then make any // additional modifications necessary for the current operation. bli_rntm_set_ways_for_op ( BLIS_TRMM, side, bli_obj_length( &c_local ), bli_obj_width( &c_local ), bli_obj_width( &a_local ), rntm ); // A sort of hack for communicating the desired pach schemas for A and B // to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and // bli_l3_cntl_create_if()). This allows us to access the schemas from // the control tree, which hopefully reduces some confusion, particularly // in bli_packm_init(). if ( bli_cntx_method( cntx ) == BLIS_NAT ) { bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local ); bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &b_local ); } else // if ( bli_cntx_method( cntx ) != BLIS_NAT ) { pack_t schema_a = bli_cntx_schema_a_block( cntx ); pack_t schema_b = bli_cntx_schema_b_panel( cntx ); bli_obj_set_pack_schema( schema_a, &a_local ); bli_obj_set_pack_schema( schema_b, &b_local ); } // Invoke the internal back-end. bli_l3_thread_decorator ( bli_gemm_int, BLIS_TRMM, // operation family id alpha, &a_local, &b_local, &BLIS_ZERO, &c_local, cntx, rntm, cntl ); } blis-0.6.1/frame/3/trmm/bli_trmm_front.h000066400000000000000000000034711360743507500200670ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_trmm_front ( side_t side, obj_t* alpha, obj_t* a, obj_t* b, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl ); blis-0.6.1/frame/3/trmm/bli_trmm_ll_ker_var2.c000066400000000000000000000412171360743507500211340ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T gemm_fp typedef void (*FUNCPTR_T) ( doff_t diagoffa, pack_t schema_a, pack_t schema_b, dim_t m, dim_t n, dim_t k, void* alpha, void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, void* beta, void* c, inc_t rs_c, inc_t cs_c, cntx_t* cntx, rntm_t* rntm, thrinfo_t* thread ); static FUNCPTR_T GENARRAY(ftypes,trmm_ll_ker_var2); void bli_trmm_ll_ker_var2 ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { num_t dt_exec = bli_obj_exec_dt( c ); doff_t diagoffa = bli_obj_diag_offset( a ); pack_t schema_a = bli_obj_pack_schema( a ); pack_t schema_b = bli_obj_pack_schema( b ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width( a ); void* buf_a = bli_obj_buffer_at_off( a ); inc_t cs_a = bli_obj_col_stride( a ); dim_t pd_a = bli_obj_panel_dim( a ); inc_t ps_a = bli_obj_panel_stride( a ); void* buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b = bli_obj_row_stride( b ); dim_t pd_b = bli_obj_panel_dim( b ); inc_t ps_b = bli_obj_panel_stride( b ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); obj_t scalar_a; obj_t scalar_b; void* buf_alpha; void* buf_beta; FUNCPTR_T f; // Detach and multiply the scalars attached to A and B. bli_obj_scalar_detach( a, &scalar_a ); bli_obj_scalar_detach( b, &scalar_b ); bli_mulsc( &scalar_a, &scalar_b ); // Grab the addresses of the internal scalar buffers for the scalar // merged above and the scalar attached to C. buf_alpha = bli_obj_internal_scalar_buffer( &scalar_b ); buf_beta = bli_obj_internal_scalar_buffer( c ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Invoke the function. f( diagoffa, schema_a, schema_b, m, n, k, buf_alpha, buf_a, cs_a, pd_a, ps_a, buf_b, rs_b, pd_b, ps_b, buf_beta, buf_c, rs_c, cs_c, cntx, rntm, thread ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ doff_t diagoffa, \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha, \ void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, \ void* beta, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* thread \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ /* Alias some constants to simpler names. */ \ const dim_t MR = pd_a; \ const dim_t NR = pd_b; \ const dim_t PACKMR = cs_a; \ const dim_t PACKNR = rs_b; \ \ /* Query the context for the micro-kernel address and cast it to its function pointer type. */ \ PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the original C matrix. For example, if C is column-stored, ct will be column-stored as well. */ \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict one = PASTEMAC(ch,1); \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict a_cast = a; \ ctype* restrict b_cast = b; \ ctype* restrict c_cast = c; \ ctype* restrict alpha_cast = alpha; \ ctype* restrict beta_cast = beta; \ ctype* restrict b1; \ ctype* restrict c1; \ \ doff_t diagoffa_i; \ dim_t k_full; \ dim_t m_iter, m_left; \ dim_t n_iter, n_left; \ dim_t m_cur; \ dim_t n_cur; \ dim_t k_a1011; \ dim_t off_a1011; \ dim_t i, j; \ inc_t rstep_a; \ inc_t cstep_b; \ inc_t rstep_c, cstep_c; \ inc_t istep_a; \ inc_t istep_b; \ inc_t off_scl; \ inc_t ss_a_num; \ inc_t ss_a_den; \ inc_t ps_a_cur; \ inc_t is_a_cur; \ auxinfo_t aux; \ \ /* Assumptions/assertions: rs_a == 1 cs_a == PACKMR pd_a == MR ps_a == stride to next micro-panel of A rs_b == PACKNR cs_b == 1 pd_b == NR ps_b == stride to next micro-panel of B rs_c == (no assumptions) cs_c == (no assumptions) */ \ \ /* Safety trap: Certain indexing within this macro-kernel does not work as intended if both MR and NR are odd. */ \ if ( ( bli_is_odd( PACKMR ) && bli_is_odd( NR ) ) || \ ( bli_is_odd( PACKNR ) && bli_is_odd( MR ) ) ) bli_abort(); \ \ /* If any dimension is zero, return immediately. */ \ if ( bli_zero_dim3( m, n, k ) ) return; \ \ /* Safeguard: If the current block of A is entirely above the diagonal, it is implicitly zero. So we do nothing. */ \ if ( bli_is_strictly_above_diag_n( diagoffa, m, k ) ) return; \ \ /* Compute k_full. For all trmm, k_full is simply k. This is needed because some parameter combinations of trmm reduce k to advance past zero regions in the triangular matrix, and when computing the imaginary stride of B (the non-triangular matrix), which is used by 4m1/3m1 implementations, we need this unreduced value of k. */ \ k_full = k; \ \ /* Compute indexing scaling factor for for 4m or 3m. This is needed because one of the packing register blocksizes (PACKMR or PACKNR) is used to index into the micro-panels of the non- triangular matrix when computing with a diagonal-intersecting micro-panel of the triangular matrix. In the case of 4m or 3m, real values are stored in both sub-panels, and so the indexing needs to occur in units of real values. The value computed here is divided into the complex pointer offset to cause the pointer to be advanced by the correct value. */ \ if ( bli_is_4mi_packed( schema_a ) || \ bli_is_3mi_packed( schema_a ) || \ bli_is_rih_packed( schema_a ) ) off_scl = 2; \ else off_scl = 1; \ \ /* Compute the storage stride scaling. Usually this is just 1. However, in the case of interleaved 3m, we need to scale the offset by 3/2. And if we are packing real-only, imag-only, or summed-only, we need to scale the computed panel sizes by 1/2 to compensate for the fact that the pointer arithmetic occurs in terms of complex elements rather than real elements. */ \ if ( bli_is_3mi_packed( schema_a ) ) { ss_a_num = 3; ss_a_den = 2; } \ else if ( bli_is_rih_packed( schema_a ) ) { ss_a_num = 1; ss_a_den = 2; } \ else { ss_a_num = 1; ss_a_den = 1; } \ \ /* If there is a zero region above where the diagonal of A intersects the left edge of the block, adjust the pointer to C and treat this case as if the diagonal offset were zero. This skips over the region that was not packed. (Note we assume the diagonal offset is a multiple of MR; this assumption will hold as long as the cache blocksizes are each a multiple of MR and NR.) */ \ if ( diagoffa < 0 ) \ { \ i = -diagoffa; \ m = m - i; \ diagoffa = 0; \ c_cast = c_cast + (i )*rs_c; \ } \ \ /* Clear the temporary C buffer in case it has any infs or NaNs. */ \ PASTEMAC(ch,set0s_mxn)( MR, NR, \ ct, rs_ct, cs_ct ); \ \ /* Compute number of primary and leftover components of the m and n dimensions. */ \ n_iter = n / NR; \ n_left = n % NR; \ \ m_iter = m / MR; \ m_left = m % MR; \ \ if ( n_left ) ++n_iter; \ if ( m_left ) ++m_iter; \ \ /* Determine some increments used to step through A, B, and C. */ \ rstep_a = ps_a; \ \ cstep_b = ps_b; \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ \ istep_a = PACKMR * k; \ istep_b = PACKNR * k_full; \ \ if ( bli_is_odd( istep_a ) ) istep_a += 1; \ if ( bli_is_odd( istep_b ) ) istep_b += 1; \ \ /* Save the pack schemas of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_schema_a( schema_a, &aux ); \ bli_auxinfo_set_schema_b( schema_b, &aux ); \ \ /* Save the imaginary stride of B to the auxinfo_t object. */ \ bli_auxinfo_set_is_b( istep_b, &aux ); \ \ /* Save the desired output datatype (indicating no typecasting). */ \ /*bli_auxinfo_set_dt_on_output( dt, &aux );*/ \ \ /* The 'thread' argument points to the thrinfo_t node for the 2nd (jr) loop around the microkernel. Here we query the thrinfo_t node for the 1st (ir) loop around the microkernel. */ \ /*thrinfo_t* ir_thread = bli_thrinfo_sub_node( thread );*/ \ \ /* Query the number of threads and thread ids for each loop. */ \ dim_t jr_nt = bli_thread_n_way( thread ); \ dim_t jr_tid = bli_thread_work_id( thread ); \ /*dim_t ir_nt = bli_thread_n_way( ir_thread ); \ dim_t ir_tid = bli_thread_work_id( ir_thread );*/ \ \ dim_t jr_start, jr_end; \ /*dim_t ir_start, ir_end;*/ \ dim_t jr_inc; \ \ /* Determine the thread range and increment for the 2nd loop. NOTE: The definition of bli_thread_range_jrir() will depend on whether slab or round-robin partitioning was requested at configure-time. \ NOTE: Parallelism in the 1st loop is disabled for now. */ \ bli_thread_range_jrir( thread, n_iter, 1, FALSE, &jr_start, &jr_end, &jr_inc ); \ /*bli_thread_range_jrir_rr( caucus, m_iter, 1, FALSE, &ir_start, &ir_end, &ir_inc );*/ \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = jr_start; j < jr_end; j += jr_inc ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ b1 = b_cast + j * cstep_b; \ c1 = c_cast + j * cstep_c; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ a1 = a_cast; \ c11 = c1; \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = 0; i < m_iter; ++i ) \ { \ diagoffa_i = diagoffa + ( doff_t )i*MR; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* If the current panel of A intersects the diagonal, scale C by beta. If it is strictly below the diagonal, scale by one. This allows the current macro-kernel to work for both trmm and trmm3. */ \ if ( bli_intersects_diag_n( diagoffa_i, MR, k ) ) \ { \ ctype* restrict b1_i; \ ctype* restrict a2; \ \ /* Determine the offset to and length of the panel that was packed so we can index into the corresponding location in b1. */ \ off_a1011 = 0; \ k_a1011 = bli_min( diagoffa_i + MR, k ); \ \ /* Compute the panel stride for the current diagonal- intersecting micro-panel. */ \ is_a_cur = k_a1011 * PACKMR; \ is_a_cur += ( bli_is_odd( is_a_cur ) ? 1 : 0 ); \ ps_a_cur = ( is_a_cur * ss_a_num ) / ss_a_den; \ \ /* NOTE: ir loop parallelism disabled for now. */ \ /*if ( bli_trmm_my_iter( i, ir_thread ) ) {*/ \ \ b1_i = b1 + ( off_a1011 * PACKNR ) / off_scl; \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = a1; \ if ( bli_is_last_iter_rr( i, m_iter, 0, 1 ) ) \ { \ a2 = a_cast; \ b2 = b1; \ if ( bli_is_last_iter( j, n_iter, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Save the 4m1/3m1 imaginary stride of A to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( is_a_cur, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k_a1011, \ alpha_cast, \ a1, \ b1_i, \ beta_cast, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Copy edge elements of C to the temporary buffer. */ \ PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \ c11, rs_c, cs_c, \ ct, rs_ct, cs_ct ); \ \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k_a1011, \ alpha_cast, \ a1, \ b1_i, \ beta_cast, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Copy the result to the edge of C. */ \ PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ c11, rs_c, cs_c ); \ } \ /*}*/ \ \ a1 += ps_a_cur; \ } \ else if ( bli_is_strictly_below_diag_n( diagoffa_i, MR, k ) ) \ { \ /* NOTE: ir loop parallelism disabled for now. */ \ /*if ( bli_trmm_my_iter( i, ir_thread ) ) {*/ \ \ ctype* restrict a2; \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = a1; \ if ( bli_is_last_iter_rr( i, m_iter, 0, 1 ) ) \ { \ a2 = a_cast; \ b2 = b1; \ if ( bli_is_last_iter( j, n_iter, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Save the 4m1/3m1 imaginary stride of A to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( istep_a, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ one, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Add the result to the edge of C. */ \ PASTEMAC(ch,adds_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ c11, rs_c, cs_c ); \ } \ /*}*/ \ \ a1 += rstep_a; \ } \ \ c11 += rstep_c; \ } \ } \ /*PASTEMAC(ch,fprintm)( stdout, "trmm_ll_ker_var2: a1", MR, k_a1011, a1, 1, MR, "%4.1f", "" );*/ \ /*PASTEMAC(ch,fprintm)( stdout, "trmm_ll_ker_var2: b1", k_a1011, NR, b1_i, NR, 1, "%4.1f", "" );*/ \ } INSERT_GENTFUNC_BASIC0( trmm_ll_ker_var2 ) blis-0.6.1/frame/3/trmm/bli_trmm_lu_ker_var2.c000066400000000000000000000414651360743507500211520ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T gemm_fp typedef void (*FUNCPTR_T) ( doff_t diagoffa, pack_t schema_a, pack_t schema_b, dim_t m, dim_t n, dim_t k, void* alpha, void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, void* beta, void* c, inc_t rs_c, inc_t cs_c, cntx_t* cntx, rntm_t* rntm, thrinfo_t* thread ); static FUNCPTR_T GENARRAY(ftypes,trmm_lu_ker_var2); void bli_trmm_lu_ker_var2 ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { num_t dt_exec = bli_obj_exec_dt( c ); doff_t diagoffa = bli_obj_diag_offset( a ); pack_t schema_a = bli_obj_pack_schema( a ); pack_t schema_b = bli_obj_pack_schema( b ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width( a ); void* buf_a = bli_obj_buffer_at_off( a ); inc_t cs_a = bli_obj_col_stride( a ); dim_t pd_a = bli_obj_panel_dim( a ); inc_t ps_a = bli_obj_panel_stride( a ); void* buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b = bli_obj_row_stride( b ); dim_t pd_b = bli_obj_panel_dim( b ); inc_t ps_b = bli_obj_panel_stride( b ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); obj_t scalar_a; obj_t scalar_b; void* buf_alpha; void* buf_beta; FUNCPTR_T f; // Detach and multiply the scalars attached to A and B. bli_obj_scalar_detach( a, &scalar_a ); bli_obj_scalar_detach( b, &scalar_b ); bli_mulsc( &scalar_a, &scalar_b ); // Grab the addresses of the internal scalar buffers for the scalar // merged above and the scalar attached to C. buf_alpha = bli_obj_internal_scalar_buffer( &scalar_b ); buf_beta = bli_obj_internal_scalar_buffer( c ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Invoke the function. f( diagoffa, schema_a, schema_b, m, n, k, buf_alpha, buf_a, cs_a, pd_a, ps_a, buf_b, rs_b, pd_b, ps_b, buf_beta, buf_c, rs_c, cs_c, cntx, rntm, thread ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ doff_t diagoffa, \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha, \ void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, \ void* beta, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* thread \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ /* Alias some constants to simpler names. */ \ const dim_t MR = pd_a; \ const dim_t NR = pd_b; \ const dim_t PACKMR = cs_a; \ const dim_t PACKNR = rs_b; \ \ /* Query the context for the micro-kernel address and cast it to its function pointer type. */ \ PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the original C matrix. For example, if C is column-stored, ct will be column-stored as well. */ \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict one = PASTEMAC(ch,1); \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict a_cast = a; \ ctype* restrict b_cast = b; \ ctype* restrict c_cast = c; \ ctype* restrict alpha_cast = alpha; \ ctype* restrict beta_cast = beta; \ ctype* restrict b1; \ ctype* restrict c1; \ \ doff_t diagoffa_i; \ dim_t k_full; \ dim_t m_iter, m_left; \ dim_t n_iter, n_left; \ dim_t m_cur; \ dim_t n_cur; \ dim_t k_a1112; \ dim_t off_a1112; \ dim_t i, j; \ inc_t rstep_a; \ inc_t cstep_b; \ inc_t rstep_c, cstep_c; \ inc_t istep_a; \ inc_t istep_b; \ inc_t off_scl; \ inc_t ss_a_num; \ inc_t ss_a_den; \ inc_t ps_a_cur; \ inc_t is_a_cur; \ auxinfo_t aux; \ \ /* Assumptions/assertions: rs_a == 1 cs_a == PACKMR pd_a == MR ps_a == stride to next micro-panel of A rs_b == PACKNR cs_b == 1 pd_b == NR ps_b == stride to next micro-panel of B rs_c == (no assumptions) cs_c == (no assumptions) */ \ \ /* Safety trap: Certain indexing within this macro-kernel does not work as intended if both MR and NR are odd. */ \ if ( ( bli_is_odd( PACKMR ) && bli_is_odd( NR ) ) || \ ( bli_is_odd( PACKNR ) && bli_is_odd( MR ) ) ) bli_abort(); \ \ /* If any dimension is zero, return immediately. */ \ if ( bli_zero_dim3( m, n, k ) ) return; \ \ /* Safeguard: If the current block of A is entirely below the diagonal, it is implicitly zero. So we do nothing. */ \ if ( bli_is_strictly_below_diag_n( diagoffa, m, k ) ) return; \ \ /* Compute k_full. For all trmm, k_full is simply k. This is needed because some parameter combinations of trmm reduce k to advance past zero regions in the triangular matrix, and when computing the imaginary stride of B (the non-triangular matrix), which is used by 4m1/3m1 implementations, we need this unreduced value of k. */ \ k_full = k; \ \ /* Compute indexing scaling factor for for 4m or 3m. This is needed because one of the packing register blocksizes (PACKMR or PACKNR) is used to index into the micro-panels of the non- triangular matrix when computing with a diagonal-intersecting micro-panel of the triangular matrix. In the case of 4m or 3m, real values are stored in both sub-panels, and so the indexing needs to occur in units of real values. The value computed here is divided into the complex pointer offset to cause the pointer to be advanced by the correct value. */ \ if ( bli_is_4mi_packed( schema_a ) || \ bli_is_3mi_packed( schema_a ) || \ bli_is_rih_packed( schema_a ) ) off_scl = 2; \ else off_scl = 1; \ \ /* Compute the storage stride scaling. Usually this is just 1. However, in the case of interleaved 3m, we need to scale the offset by 3/2. And if we are packing real-only, imag-only, or summed-only, we need to scale the computed panel sizes by 1/2 to compensate for the fact that the pointer arithmetic occurs in terms of complex elements rather than real elements. */ \ if ( bli_is_3mi_packed( schema_a ) ) { ss_a_num = 3; ss_a_den = 2; } \ else if ( bli_is_rih_packed( schema_a ) ) { ss_a_num = 1; ss_a_den = 2; } \ else { ss_a_num = 1; ss_a_den = 1; } \ \ /* If there is a zero region to the left of where the diagonal of A intersects the top edge of the block, adjust the pointer to B and treat this case as if the diagonal offset were zero. Note that we don't need to adjust the pointer to A since packm would have simply skipped over the region that was not stored. */ \ if ( diagoffa > 0 ) \ { \ i = diagoffa; \ k = k - i; \ diagoffa = 0; \ b_cast = b_cast + ( i * PACKNR ) / off_scl; \ } \ \ /* If there is a zero region below where the diagonal of A intersects the right side of the block, shrink it to prevent "no-op" iterations from executing. */ \ if ( -diagoffa + k < m ) \ { \ m = -diagoffa + k; \ } \ \ /* Clear the temporary C buffer in case it has any infs or NaNs. */ \ PASTEMAC(ch,set0s_mxn)( MR, NR, \ ct, rs_ct, cs_ct ); \ \ /* Compute number of primary and leftover components of the m and n dimensions. */ \ n_iter = n / NR; \ n_left = n % NR; \ \ m_iter = m / MR; \ m_left = m % MR; \ \ if ( n_left ) ++n_iter; \ if ( m_left ) ++m_iter; \ \ /* Determine some increments used to step through A, B, and C. */ \ rstep_a = ps_a; \ \ cstep_b = ps_b; \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ \ istep_a = PACKMR * k; \ istep_b = PACKNR * k_full; \ \ if ( bli_is_odd( istep_a ) ) istep_a += 1; \ if ( bli_is_odd( istep_b ) ) istep_b += 1; \ \ /* Save the pack schemas of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_schema_a( schema_a, &aux ); \ bli_auxinfo_set_schema_b( schema_b, &aux ); \ \ /* Save the imaginary stride of B to the auxinfo_t object. */ \ bli_auxinfo_set_is_b( istep_b, &aux ); \ \ /* Save the desired output datatype (indicating no typecasting). */ \ /*bli_auxinfo_set_dt_on_output( dt, &aux );*/ \ \ /* The 'thread' argument points to the thrinfo_t node for the 2nd (jr) loop around the microkernel. Here we query the thrinfo_t node for the 1st (ir) loop around the microkernel. */ \ /*thrinfo_t* ir_thread = bli_thrinfo_sub_node( thread );*/ \ \ /* Query the number of threads and thread ids for each loop. */ \ dim_t jr_nt = bli_thread_n_way( thread ); \ dim_t jr_tid = bli_thread_work_id( thread ); \ /*dim_t ir_nt = bli_thread_n_way( ir_thread ); \ dim_t ir_tid = bli_thread_work_id( ir_thread );*/ \ \ dim_t jr_start, jr_end; \ /*dim_t ir_start, ir_end;*/ \ dim_t jr_inc; \ \ /* Determine the thread range and increment for the 2nd loop. NOTE: The definition of bli_thread_range_jrir() will depend on whether slab or round-robin partitioning was requested at configure-time. \ NOTE: Parallelism in the 1st loop is disabled for now. */ \ bli_thread_range_jrir( thread, n_iter, 1, FALSE, &jr_start, &jr_end, &jr_inc ); \ /*bli_thread_range_jrir_rr( caucus, m_iter, 1, FALSE, &ir_start, &ir_end, &ir_inc );*/ \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = jr_start; j < jr_end; j += jr_inc ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ b1 = b_cast + j * cstep_b; \ c1 = c_cast + j * cstep_c; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ a1 = a_cast; \ c11 = c1; \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = 0; i < m_iter; ++i ) \ { \ diagoffa_i = diagoffa + ( doff_t )i*MR; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* If the current panel of A intersects the diagonal, scale C by beta. If it is strictly above the diagonal, scale by one. This allows the current macro-kernel to work for both trmm and trmm3. */ \ if ( bli_intersects_diag_n( diagoffa_i, MR, k ) ) \ { \ ctype* restrict b1_i; \ ctype* restrict a2; \ \ /* Determine the offset to and length of the panel that was packed so we can index into the corresponding location in b1. */ \ off_a1112 = diagoffa_i; \ k_a1112 = k - off_a1112; \ \ /* Compute the panel stride for the current diagonal- intersecting micro-panel. */ \ is_a_cur = k_a1112 * PACKMR; \ is_a_cur += ( bli_is_odd( is_a_cur ) ? 1 : 0 ); \ ps_a_cur = ( is_a_cur * ss_a_num ) / ss_a_den; \ \ /* NOTE: ir loop parallelism disabled for now. */ \ /*if ( bli_trmm_my_iter( i, ir_thread ) ) {*/ \ \ b1_i = b1 + ( off_a1112 * PACKNR ) / off_scl; \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = a1; \ if ( bli_is_last_iter_rr( i, m_iter, 0, 1 ) ) \ { \ a2 = a_cast; \ b2 = b1; \ if ( bli_is_last_iter( j, n_iter, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Save the 4m1/3m1 imaginary stride of A to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( is_a_cur, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k_a1112, \ alpha_cast, \ a1, \ b1_i, \ beta_cast, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Copy edge elements of C to the temporary buffer. */ \ PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \ c11, rs_c, cs_c, \ ct, rs_ct, cs_ct ); \ \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k_a1112, \ alpha_cast, \ a1, \ b1_i, \ beta_cast, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Copy the result to the edge of C. */ \ PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ c11, rs_c, cs_c ); \ } \ /*}*/ \ \ a1 += ps_a_cur; \ } \ else if ( bli_is_strictly_above_diag_n( diagoffa_i, MR, k ) ) \ { \ /* NOTE: ir loop parallelism disabled for now. */ \ /*if ( bli_trmm_my_iter( i, ir_thread ) ) {*/ \ \ ctype* restrict a2; \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = a1; \ if ( bli_is_last_iter_rr( i, m_iter, 0, 1 ) ) \ { \ a2 = a_cast; \ b2 = b1; \ if ( bli_is_last_iter( j, n_iter, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Save the 4m1/3m1 imaginary stride of A to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( istep_a, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ one, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Add the result to the edge of C. */ \ PASTEMAC(ch,adds_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ c11, rs_c, cs_c ); \ } \ /*}*/ \ \ a1 += rstep_a; \ } \ \ c11 += rstep_c; \ } \ } \ \ /*PASTEMAC(ch,fprintm)( stdout, "trmm_lu_ker_var2: a1", MR, k_a1112, a1, 1, MR, "%4.1f", "" );*/ \ /*PASTEMAC(ch,fprintm)( stdout, "trmm_lu_ker_var2: b1", k_a1112, NR, b1_i, NR, 1, "%4.1f", "" );*/ \ } INSERT_GENTFUNC_BASIC0( trmm_lu_ker_var2 ) blis-0.6.1/frame/3/trmm/bli_trmm_rl_ker_var2.c000066400000000000000000000453241360743507500211450ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T gemm_fp typedef void (*FUNCPTR_T) ( doff_t diagoffb, pack_t schema_a, pack_t schema_b, dim_t m, dim_t n, dim_t k, void* alpha, void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, void* beta, void* c, inc_t rs_c, inc_t cs_c, cntx_t* cntx, rntm_t* rntm, thrinfo_t* thread ); static FUNCPTR_T GENARRAY(ftypes,trmm_rl_ker_var2); void bli_trmm_rl_ker_var2 ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { num_t dt_exec = bli_obj_exec_dt( c ); doff_t diagoffb = bli_obj_diag_offset( b ); pack_t schema_a = bli_obj_pack_schema( a ); pack_t schema_b = bli_obj_pack_schema( b ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width( a ); void* buf_a = bli_obj_buffer_at_off( a ); inc_t cs_a = bli_obj_col_stride( a ); dim_t pd_a = bli_obj_panel_dim( a ); inc_t ps_a = bli_obj_panel_stride( a ); void* buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b = bli_obj_row_stride( b ); dim_t pd_b = bli_obj_panel_dim( b ); inc_t ps_b = bli_obj_panel_stride( b ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); obj_t scalar_a; obj_t scalar_b; void* buf_alpha; void* buf_beta; FUNCPTR_T f; // Detach and multiply the scalars attached to A and B. bli_obj_scalar_detach( a, &scalar_a ); bli_obj_scalar_detach( b, &scalar_b ); bli_mulsc( &scalar_a, &scalar_b ); // Grab the addresses of the internal scalar buffers for the scalar // merged above and the scalar attached to C. buf_alpha = bli_obj_internal_scalar_buffer( &scalar_b ); buf_beta = bli_obj_internal_scalar_buffer( c ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Invoke the function. f( diagoffb, schema_a, schema_b, m, n, k, buf_alpha, buf_a, cs_a, pd_a, ps_a, buf_b, rs_b, pd_b, ps_b, buf_beta, buf_c, rs_c, cs_c, cntx, rntm, thread ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ doff_t diagoffb, \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha, \ void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, \ void* beta, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* thread \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ /* Alias some constants to simpler names. */ \ const dim_t MR = pd_a; \ const dim_t NR = pd_b; \ const dim_t PACKMR = cs_a; \ const dim_t PACKNR = rs_b; \ \ /* Query the context for the micro-kernel address and cast it to its function pointer type. */ \ PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the original C matrix. For example, if C is column-stored, ct will be column-stored as well. */ \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict one = PASTEMAC(ch,1); \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict a_cast = a; \ ctype* restrict b_cast = b; \ ctype* restrict c_cast = c; \ ctype* restrict alpha_cast = alpha; \ ctype* restrict beta_cast = beta; \ ctype* restrict b1; \ ctype* restrict c1; \ \ doff_t diagoffb_j; \ dim_t k_full; \ dim_t m_iter, m_left; \ dim_t n_iter, n_left; \ dim_t m_cur; \ dim_t n_cur; \ dim_t k_b1121; \ dim_t off_b1121; \ dim_t i, j; \ inc_t rstep_a; \ inc_t cstep_b; \ inc_t rstep_c, cstep_c; \ inc_t istep_a; \ inc_t istep_b; \ inc_t off_scl; \ inc_t ss_b_num; \ inc_t ss_b_den; \ inc_t ps_b_cur; \ inc_t is_b_cur; \ auxinfo_t aux; \ \ /* Assumptions/assertions: rs_a == 1 cs_a == PACKMR pd_a == MR ps_a == stride to next micro-panel of A rs_b == PACKNR cs_b == 1 pd_b == NR ps_b == stride to next micro-panel of B rs_c == (no assumptions) cs_c == (no assumptions) */ \ \ /* Safety trap: Certain indexing within this macro-kernel does not work as intended if both MR and NR are odd. */ \ if ( ( bli_is_odd( PACKMR ) && bli_is_odd( NR ) ) || \ ( bli_is_odd( PACKNR ) && bli_is_odd( MR ) ) ) bli_abort(); \ \ /* If any dimension is zero, return immediately. */ \ if ( bli_zero_dim3( m, n, k ) ) return; \ \ /* Safeguard: If the current panel of B is entirely above the diagonal, it is implicitly zero. So we do nothing. */ \ if ( bli_is_strictly_above_diag_n( diagoffb, k, n ) ) return; \ \ /* Compute k_full. For all trmm, k_full is simply k. This is needed because some parameter combinations of trmm reduce k to advance past zero regions in the triangular matrix, and when computing the imaginary stride of A (the non-triangular matrix), which is used by 4m1/3m1 implementations, we need this unreduced value of k. */ \ k_full = k; \ \ /* Compute indexing scaling factor for for 4m or 3m. This is needed because one of the packing register blocksizes (PACKMR or PACKNR) is used to index into the micro-panels of the non- triangular matrix when computing with a diagonal-intersecting micro-panel of the triangular matrix. In the case of 4m or 3m, real values are stored in both sub-panels, and so the indexing needs to occur in units of real values. The value computed here is divided into the complex pointer offset to cause the pointer to be advanced by the correct value. */ \ if ( bli_is_4mi_packed( schema_b ) || \ bli_is_3mi_packed( schema_b ) || \ bli_is_rih_packed( schema_b ) ) off_scl = 2; \ else off_scl = 1; \ \ /* Compute the storage stride scaling. Usually this is just 1. However, in the case of interleaved 3m, we need to scale the offset by 3/2. And if we are packing real-only, imag-only, or summed-only, we need to scale the computed panel sizes by 1/2 to compensate for the fact that the pointer arithmetic occurs in terms of complex elements rather than real elements. */ \ if ( bli_is_3mi_packed( schema_b ) ) { ss_b_num = 3; ss_b_den = 2; } \ else if ( bli_is_rih_packed( schema_b ) ) { ss_b_num = 1; ss_b_den = 2; } \ else { ss_b_num = 1; ss_b_den = 1; } \ \ /* If there is a zero region above where the diagonal of B intersects the left edge of the panel, adjust the pointer to A and treat this case as if the diagonal offset were zero. Note that we don't need to adjust the pointer to B since packm would have simply skipped over the region that was not stored. */ \ if ( diagoffb < 0 ) \ { \ j = -diagoffb; \ k = k - j; \ diagoffb = 0; \ a_cast = a_cast + ( j * PACKMR ) / off_scl; \ } \ \ /* If there is a zero region to the right of where the diagonal of B intersects the bottom of the panel, shrink it to prevent "no-op" iterations from executing. */ \ if ( diagoffb + k < n ) \ { \ n = diagoffb + k; \ } \ \ /* Clear the temporary C buffer in case it has any infs or NaNs. */ \ PASTEMAC(ch,set0s_mxn)( MR, NR, \ ct, rs_ct, cs_ct ); \ \ /* Compute number of primary and leftover components of the m and n dimensions. */ \ n_iter = n / NR; \ n_left = n % NR; \ \ m_iter = m / MR; \ m_left = m % MR; \ \ if ( n_left ) ++n_iter; \ if ( m_left ) ++m_iter; \ \ /* Determine some increments used to step through A, B, and C. */ \ rstep_a = ps_a; \ \ cstep_b = ps_b; \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ \ istep_a = PACKMR * k_full; \ istep_b = PACKNR * k; \ \ if ( bli_is_odd( istep_a ) ) istep_a += 1; \ if ( bli_is_odd( istep_b ) ) istep_b += 1; \ \ /* Save the pack schemas of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_schema_a( schema_a, &aux ); \ bli_auxinfo_set_schema_b( schema_b, &aux ); \ \ /* Save the imaginary stride of A to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( istep_a, &aux ); \ \ /* Save the desired output datatype (indicating no typecasting). */ \ /*bli_auxinfo_set_dt_on_output( dt, &aux );*/ \ \ thrinfo_t* caucus = bli_thrinfo_sub_node( thread ); \ \ dim_t jr_nt = bli_thread_n_way( thread ); \ dim_t jr_tid = bli_thread_work_id( thread ); \ dim_t ir_nt = bli_thread_n_way( caucus ); \ dim_t ir_tid = bli_thread_work_id( caucus ); \ \ dim_t jr_start, jr_end; \ dim_t ir_start, ir_end; \ dim_t jr_inc, ir_inc; \ \ /* Note that we partition the 2nd loop into two regions: the rectangular part of B, and the triangular portion. */ \ dim_t n_iter_rct; \ dim_t n_iter_tri; \ \ if ( bli_is_strictly_below_diag_n( diagoffb, m, n ) ) \ { \ /* If the entire panel of B does not intersect the diagonal, there is no triangular region, and therefore we can skip the second set of loops. */ \ n_iter_rct = n_iter; \ n_iter_tri = 0; \ } \ else \ { \ /* If the panel of B does intersect the diagonal, compute the number of iterations in the rectangular region by dividing NR into the diagonal offset. (There should never be any remainder in this division.) The number of iterations in the triangular (or trapezoidal) region is computed as the remaining number of iterations in the n dimension. */ \ n_iter_rct = diagoffb / NR; \ n_iter_tri = n_iter - n_iter_rct; \ } \ \ /* Determine the thread range and increment for the 2nd and 1st loops for the initial rectangular region of B (if it exists). NOTE: The definition of bli_thread_range_jrir() will depend on whether slab or round-robin partitioning was requested at configure-time. \ NOTE: Parallelism in the 1st loop is disabled for now. */ \ bli_thread_range_jrir( thread, n_iter_rct, 1, FALSE, &jr_start, &jr_end, &jr_inc ); \ bli_thread_range_jrir( caucus, m_iter, 1, FALSE, &ir_start, &ir_end, &ir_inc ); \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = jr_start; j < jr_end; j += jr_inc ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ b1 = b_cast + j * cstep_b; \ c1 = c_cast + j * cstep_c; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ { \ /* Save the 4m1/3m1 imaginary stride of B to the auxinfo_t object. */ \ bli_auxinfo_set_is_b( istep_b, &aux ); \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = ir_start; i < ir_end; i += ir_inc ) \ { \ ctype* restrict a2; \ \ a1 = a_cast + i * rstep_a; \ c11 = c1 + i * rstep_c; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = bli_trmm_get_next_a_upanel( a1, rstep_a, ir_inc ); \ if ( bli_is_last_iter( i, m_iter, ir_tid, ir_nt ) ) \ { \ a2 = a_cast; \ b2 = bli_trmm_get_next_b_upanel( b1, cstep_b, jr_inc ); \ if ( bli_is_last_iter( j, n_iter, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ one, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Add the result to the edge of C. */ \ PASTEMAC(ch,adds_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ c11, rs_c, cs_c ); \ } \ } \ } \ } \ \ /* If there is no triangular region, then we're done. */ \ if ( n_iter_tri == 0 ) return; \ \ /* Use round-robin assignment of micropanels to threads in the 2nd and 1st loops for the remaining triangular region of B (if it exists). NOTE: We don't need to call bli_thread_range_jrir_rr() here since we employ a hack that calls for each thread to execute every iteration of the jr and ir loops but skip all but the pointer increment for iterations that are not assigned to it. */ \ \ /* Advance the starting b1 and c1 pointers to the positions corresponding to the start of the triangular region of B. */ \ jr_start = n_iter_rct; \ b1 = b_cast + jr_start * cstep_b; \ c1 = c_cast + jr_start * cstep_c; \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = jr_start; j < n_iter; ++j ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ diagoffb_j = diagoffb - ( doff_t )j*NR; \ \ /* Determine the offset to the beginning of the panel that was packed so we can index into the corresponding location in A. Then compute the length of that panel. */ \ off_b1121 = bli_max( -diagoffb_j, 0 ); \ k_b1121 = k - off_b1121; \ \ a1 = a_cast; \ c11 = c1; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ /* If the current panel of B intersects the diagonal, scale C by beta. If it is strictly below the diagonal, scale by one. This allows the current macro-kernel to work for both trmm and trmm3. */ \ { \ /* Compute the panel stride for the current diagonal- intersecting micro-panel. */ \ is_b_cur = k_b1121 * PACKNR; \ is_b_cur += ( bli_is_odd( is_b_cur ) ? 1 : 0 ); \ ps_b_cur = ( is_b_cur * ss_b_num ) / ss_b_den; \ \ if ( bli_trmm_my_iter_rr( j, thread ) ) { \ \ /* Save the 4m1/3m1 imaginary stride of B to the auxinfo_t object. */ \ bli_auxinfo_set_is_b( is_b_cur, &aux ); \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = 0; i < m_iter; ++i ) \ { \ if ( bli_trmm_my_iter_rr( i, caucus ) ) { \ \ ctype* restrict a1_i; \ ctype* restrict a2; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ a1_i = a1 + ( off_b1121 * PACKMR ) / off_scl; \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = a1; \ if ( bli_is_last_iter_rr( i, m_iter, 0, 1 ) ) \ { \ a2 = a_cast; \ b2 = b1; \ if ( bli_is_last_iter_rr( j, n_iter, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k_b1121, \ alpha_cast, \ a1_i, \ b1, \ beta_cast, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Copy edge elements of C to the temporary buffer. */ \ PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \ c11, rs_c, cs_c, \ ct, rs_ct, cs_ct ); \ \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k_b1121, \ alpha_cast, \ a1_i, \ b1, \ beta_cast, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Copy the result to the edge of C. */ \ PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ c11, rs_c, cs_c ); \ } \ } \ \ a1 += rstep_a; \ c11 += rstep_c; \ } \ } \ \ b1 += ps_b_cur; \ } \ \ c1 += cstep_c; \ } \ \ /*PASTEMAC(ch,fprintm)( stdout, "trmm_rl_ker_var2: a1", MR, k_b1121, a1, 1, MR, "%4.1f", "" );*/ \ /*PASTEMAC(ch,fprintm)( stdout, "trmm_rl_ker_var2: b1", k_b1121, NR, b1_i, NR, 1, "%4.1f", "" );*/ \ } INSERT_GENTFUNC_BASIC0( trmm_rl_ker_var2 ) blis-0.6.1/frame/3/trmm/bli_trmm_ru_ker_var2.c000066400000000000000000000471521360743507500211570ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T gemm_fp typedef void (*FUNCPTR_T) ( doff_t diagoffb, pack_t schema_a, pack_t schema_b, dim_t m, dim_t n, dim_t k, void* alpha, void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, void* beta, void* c, inc_t rs_c, inc_t cs_c, cntx_t* cntx, rntm_t* rntm, thrinfo_t* thread ); static FUNCPTR_T GENARRAY(ftypes,trmm_ru_ker_var2); void bli_trmm_ru_ker_var2 ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { num_t dt_exec = bli_obj_exec_dt( c ); doff_t diagoffb = bli_obj_diag_offset( b ); pack_t schema_a = bli_obj_pack_schema( a ); pack_t schema_b = bli_obj_pack_schema( b ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width( a ); void* buf_a = bli_obj_buffer_at_off( a ); inc_t cs_a = bli_obj_col_stride( a ); dim_t pd_a = bli_obj_panel_dim( a ); inc_t ps_a = bli_obj_panel_stride( a ); void* buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b = bli_obj_row_stride( b ); dim_t pd_b = bli_obj_panel_dim( b ); inc_t ps_b = bli_obj_panel_stride( b ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); obj_t scalar_a; obj_t scalar_b; void* buf_alpha; void* buf_beta; FUNCPTR_T f; // Detach and multiply the scalars attached to A and B. bli_obj_scalar_detach( a, &scalar_a ); bli_obj_scalar_detach( b, &scalar_b ); bli_mulsc( &scalar_a, &scalar_b ); // Grab the addresses of the internal scalar buffers for the scalar // merged above and the scalar attached to C. buf_alpha = bli_obj_internal_scalar_buffer( &scalar_b ); buf_beta = bli_obj_internal_scalar_buffer( c ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Invoke the function. f( diagoffb, schema_a, schema_b, m, n, k, buf_alpha, buf_a, cs_a, pd_a, ps_a, buf_b, rs_b, pd_b, ps_b, buf_beta, buf_c, rs_c, cs_c, cntx, rntm, thread ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ doff_t diagoffb, \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha, \ void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, \ void* beta, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* thread \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ /* Alias some constants to simpler names. */ \ const dim_t MR = pd_a; \ const dim_t NR = pd_b; \ const dim_t PACKMR = cs_a; \ const dim_t PACKNR = rs_b; \ \ /* Query the context for the micro-kernel address and cast it to its function pointer type. */ \ PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the original C matrix. For example, if C is column-stored, ct will be column-stored as well. */ \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict one = PASTEMAC(ch,1); \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict a_cast = a; \ ctype* restrict b_cast = b; \ ctype* restrict c_cast = c; \ ctype* restrict alpha_cast = alpha; \ ctype* restrict beta_cast = beta; \ ctype* restrict b1; \ ctype* restrict c1; \ \ doff_t diagoffb_j; \ dim_t k_full; \ dim_t m_iter, m_left; \ dim_t n_iter, n_left; \ dim_t m_cur; \ dim_t n_cur; \ dim_t k_b0111; \ dim_t off_b0111; \ dim_t i, j, jb0; \ inc_t rstep_a; \ inc_t cstep_b; \ inc_t rstep_c, cstep_c; \ inc_t istep_a; \ inc_t istep_b; \ inc_t off_scl; \ inc_t ss_b_num; \ inc_t ss_b_den; \ inc_t ps_b_cur; \ inc_t is_b_cur; \ auxinfo_t aux; \ \ /* Assumptions/assertions: rs_a == 1 cs_a == PACKMR pd_a == MR ps_a == stride to next micro-panel of A rs_b == PACKNR cs_b == 1 pd_b == NR ps_b == stride to next micro-panel of B rs_c == (no assumptions) cs_c == (no assumptions) */ \ \ /* Safety trap: Certain indexing within this macro-kernel does not work as intended if both MR and NR are odd. */ \ if ( ( bli_is_odd( PACKMR ) && bli_is_odd( NR ) ) || \ ( bli_is_odd( PACKNR ) && bli_is_odd( MR ) ) ) bli_abort(); \ \ /* If any dimension is zero, return immediately. */ \ if ( bli_zero_dim3( m, n, k ) ) return; \ \ /* Safeguard: If the current panel of B is entirely below its diagonal, it is implicitly zero. So we do nothing. */ \ if ( bli_is_strictly_below_diag_n( diagoffb, k, n ) ) return; \ \ /* Compute k_full. For all trmm, k_full is simply k. This is needed because some parameter combinations of trmm reduce k to advance past zero regions in the triangular matrix, and when computing the imaginary stride of A (the non-triangular matrix), which is used by 4m1/3m1 implementations, we need this unreduced value of k. */ \ k_full = k; \ \ /* Compute indexing scaling factor for for 4m or 3m. This is needed because one of the packing register blocksizes (PACKMR or PACKNR) is used to index into the micro-panels of the non- triangular matrix when computing with a diagonal-intersecting micro-panel of the triangular matrix. In the case of 4m or 3m, real values are stored in both sub-panels, and so the indexing needs to occur in units of real values. The value computed here is divided into the complex pointer offset to cause the pointer to be advanced by the correct value. */ \ if ( bli_is_4mi_packed( schema_b ) || \ bli_is_3mi_packed( schema_b ) || \ bli_is_rih_packed( schema_b ) ) off_scl = 2; \ else off_scl = 1; \ \ /* Compute the storage stride scaling. Usually this is just 1. However, in the case of interleaved 3m, we need to scale the offset by 3/2. And if we are packing real-only, imag-only, or summed-only, we need to scale the computed panel sizes by 1/2 to compensate for the fact that the pointer arithmetic occurs in terms of complex elements rather than real elements. */ \ if ( bli_is_3mi_packed( schema_b ) ) { ss_b_num = 3; ss_b_den = 2; } \ else if ( bli_is_rih_packed( schema_b ) ) { ss_b_num = 1; ss_b_den = 2; } \ else { ss_b_num = 1; ss_b_den = 1; } \ \ /* If there is a zero region to the left of where the diagonal of B intersects the top edge of the panel, adjust the pointer to C and treat this case as if the diagonal offset were zero. This skips over the region that was not packed. (Note we assume the diagonal offset is a multiple of MR; this assumption will hold as long as the cache blocksizes are each a multiple of MR and NR.) */ \ if ( diagoffb > 0 ) \ { \ j = diagoffb; \ n = n - j; \ diagoffb = 0; \ c_cast = c_cast + (j )*cs_c; \ } \ \ /* If there is a zero region below where the diagonal of B intersects the right side of the block, shrink it to prevent "no-op" iterations from executing. */ \ if ( -diagoffb + n < k ) \ { \ k = -diagoffb + n; \ } \ \ /* Clear the temporary C buffer in case it has any infs or NaNs. */ \ PASTEMAC(ch,set0s_mxn)( MR, NR, \ ct, rs_ct, cs_ct ); \ \ /* Compute number of primary and leftover components of the m and n dimensions. */ \ n_iter = n / NR; \ n_left = n % NR; \ \ m_iter = m / MR; \ m_left = m % MR; \ \ if ( n_left ) ++n_iter; \ if ( m_left ) ++m_iter; \ \ /* Determine some increments used to step through A, B, and C. */ \ rstep_a = ps_a; \ \ cstep_b = ps_b; \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ \ istep_a = PACKMR * k_full; \ istep_b = PACKNR * k; \ \ if ( bli_is_odd( istep_a ) ) istep_a += 1; \ if ( bli_is_odd( istep_b ) ) istep_b += 1; \ \ /* Save the pack schemas of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_schema_a( schema_a, &aux ); \ bli_auxinfo_set_schema_b( schema_b, &aux ); \ \ /* Save the imaginary stride of A to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( istep_a, &aux ); \ \ /* Save the desired output datatype (indicating no typecasting). */ \ /*bli_auxinfo_set_dt_on_output( dt, &aux );*/ \ \ /* The 'thread' argument points to the thrinfo_t node for the 2nd (jr) loop around the microkernel. Here we query the thrinfo_t node for the 1st (ir) loop around the microkernel. */ \ thrinfo_t* caucus = bli_thrinfo_sub_node( thread ); \ \ /* Query the number of threads and thread ids for each loop. */ \ dim_t jr_nt = bli_thread_n_way( thread ); \ dim_t jr_tid = bli_thread_work_id( thread ); \ dim_t ir_nt = bli_thread_n_way( caucus ); \ dim_t ir_tid = bli_thread_work_id( caucus ); \ \ dim_t jr_start, jr_end; \ dim_t ir_start, ir_end; \ dim_t jr_inc, ir_inc; \ \ /* Note that we partition the 2nd loop into two regions: the triangular part of C, and the rectangular portion. */ \ dim_t n_iter_tri; \ dim_t n_iter_rct; \ \ if ( bli_is_strictly_above_diag_n( diagoffb, k, n ) ) \ { \ /* If the entire panel of B does not intersect the diagonal, there is no triangular region, and therefore we can skip the first set of loops. */ \ n_iter_tri = 0; \ n_iter_rct = n_iter; \ } \ else \ { \ /* If the panel of B does intersect the diagonal, compute the number of iterations in the triangular (or trapezoidal) region by dividing NR into the number of rows in B. (There should never be any remainder in this division.) The number of iterations in the rectangular region is computed as the remaining number of iterations in the n dimension. */ \ n_iter_tri = ( k + diagoffb ) / NR + ( ( k + diagoffb ) % NR ? 1 : 0 ); \ n_iter_rct = n_iter - n_iter_tri; \ } \ \ /* Use round-robin assignment of micropanels to threads in the 2nd and 1st loops for the initial triangular region of B (if it exists). NOTE: We don't need to call bli_thread_range_jrir_rr() here since we employ a hack that calls for each thread to execute every iteration of the jr and ir loops but skip all but the pointer increment for iterations that are not assigned to it. */ \ \ b1 = b_cast; \ c1 = c_cast; \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = 0; j < n_iter_tri; ++j ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ diagoffb_j = diagoffb - ( doff_t )j*NR; \ \ /* Determine the offset to and length of the panel that was packed so we can index into the corresponding location in A. */ \ off_b0111 = 0; \ k_b0111 = bli_min( k, -diagoffb_j + NR ); \ \ a1 = a_cast; \ c11 = c1; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ /* If the current panel of B intersects the diagonal, scale C by beta. If it is strictly below the diagonal, scale by one. This allows the current macro-kernel to work for both trmm and trmm3. */ \ { \ /* Compute the panel stride for the current diagonal- intersecting micro-panel. */ \ is_b_cur = k_b0111 * PACKNR; \ is_b_cur += ( bli_is_odd( is_b_cur ) ? 1 : 0 ); \ ps_b_cur = ( is_b_cur * ss_b_num ) / ss_b_den; \ \ if ( bli_trmm_my_iter_rr( j, thread ) ) { \ \ /* Save the 4m1/3m1 imaginary stride of B to the auxinfo_t object. */ \ bli_auxinfo_set_is_b( is_b_cur, &aux ); \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = 0; i < m_iter; ++i ) \ { \ if ( bli_trmm_my_iter_rr( i, caucus ) ) { \ \ ctype* restrict a1_i; \ ctype* restrict a2; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ a1_i = a1 + ( off_b0111 * PACKMR ) / off_scl; \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = a1; \ if ( bli_is_last_iter_rr( i, m_iter, 0, 1 ) ) \ { \ a2 = a_cast; \ b2 = b1; \ if ( bli_is_last_iter_rr( j, n_iter, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k_b0111, \ alpha_cast, \ a1_i, \ b1, \ beta_cast, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Copy edge elements of C to the temporary buffer. */ \ PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \ c11, rs_c, cs_c, \ ct, rs_ct, cs_ct ); \ \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k_b0111, \ alpha_cast, \ a1_i, \ b1, \ beta_cast, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Copy the result to the edge of C. */ \ PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ c11, rs_c, cs_c ); \ } \ } \ \ a1 += rstep_a; \ c11 += rstep_c; \ } \ } \ \ b1 += ps_b_cur; \ } \ \ c1 += cstep_c; \ } \ \ /* If there is no rectangular region, then we're done. */ \ if ( n_iter_rct == 0 ) return; \ \ /* Determine the thread range and increment for the 2nd and 1st loops for the remaining rectangular region of B. NOTE: The definition of bli_thread_range_jrir() will depend on whether slab or round-robin partitioning was requested at configure-time. \ NOTE: Parallelism in the 1st loop is disabled for now. */ \ bli_thread_range_jrir( thread, n_iter_rct, 1, FALSE, &jr_start, &jr_end, &jr_inc ); \ bli_thread_range_jrir( caucus, m_iter, 1, FALSE, &ir_start, &ir_end, &ir_inc ); \ \ /* Advance the start and end iteration offsets for the rectangular region by the number of iterations used for the triangular region. */ \ jr_start += n_iter_tri; \ jr_end += n_iter_tri; \ jb0 = n_iter_tri; \ \ /* Save the resulting value of b1 from the previous loop since it represents the starting point for the rectangular region. */ \ b_cast = b1; \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = jr_start; j < jr_end; j += jr_inc ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ /* NOTE: We must index through b_cast differently since it contains the starting address of the rectangular region (which is already n_iter_tri logical iterations through B). */ \ b1 = b_cast + (j-jb0) * cstep_b; \ c1 = c_cast + j * cstep_c; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ /* If the current panel of B intersects the diagonal, scale C by beta. If it is strictly below the diagonal, scale by one. This allows the current macro-kernel to work for both trmm and trmm3. */ \ { \ /* Save the 4m1/3m1 imaginary stride of B to the auxinfo_t object. */ \ bli_auxinfo_set_is_b( istep_b, &aux ); \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = ir_start; i < ir_end; i += ir_inc ) \ { \ ctype* restrict a2; \ \ a1 = a_cast + i * rstep_a; \ c11 = c1 + i * rstep_c; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = bli_trmm_get_next_a_upanel( a1, rstep_a, ir_inc ); \ if ( bli_is_last_iter( i, m_iter, ir_tid, ir_nt ) ) \ { \ a2 = a_cast; \ b2 = bli_trmm_get_next_b_upanel( b1, cstep_b, jr_inc ); \ if ( bli_is_last_iter( j, n_iter, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ one, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Add the result to the edge of C. */ \ PASTEMAC(ch,adds_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ c11, rs_c, cs_c ); \ } \ } \ } \ } \ \ \ \ /*PASTEMAC(ch,fprintm)( stdout, "trmm_ru_ker_var2: a1", MR, k_b0111, a1, 1, MR, "%4.1f", "" );*/ \ /*PASTEMAC(ch,fprintm)( stdout, "trmm_ru_ker_var2: b1", k_b0111, NR, b1_i, NR, 1, "%4.1f", "" );*/ \ } INSERT_GENTFUNC_BASIC0( trmm_ru_ker_var2 ) blis-0.6.1/frame/3/trmm/bli_trmm_var.h000066400000000000000000000057561360743507500175370ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype object-based interfaces. // #undef GENPROT #define GENPROT( opname ) \ \ void PASTEMAC0(opname) \ ( \ obj_t* a, \ obj_t* b, \ obj_t* c, \ cntx_t* cntx, \ rntm_t* rntm, \ cntl_t* cntl, \ thrinfo_t* thread \ ); //GENPROT( trmm_blk_var1 ) //GENPROT( trmm_blk_var2 ) //GENPROT( trmm_blk_var3 ) GENPROT( trmm_xx_ker_var2 ) GENPROT( trmm_ll_ker_var2 ) GENPROT( trmm_lu_ker_var2 ) GENPROT( trmm_rl_ker_var2 ) GENPROT( trmm_ru_ker_var2 ) // // Prototype BLAS-like interfaces with void pointer operands. // #undef GENTPROT #define GENTPROT( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ doff_t diagoff, \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha, \ void* a, inc_t cs_a, \ dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, \ dim_t pd_b, inc_t ps_b, \ void* beta, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* thread \ ); INSERT_GENTPROT_BASIC0( trmm_ll_ker_var2 ) INSERT_GENTPROT_BASIC0( trmm_lu_ker_var2 ) INSERT_GENTPROT_BASIC0( trmm_rl_ker_var2 ) INSERT_GENTPROT_BASIC0( trmm_ru_ker_var2 ) blis-0.6.1/frame/3/trmm/bli_trmm_xx_ker_var2.c000066400000000000000000000054331360743507500211640ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" static gemm_var_oft vars[2][2] = { { bli_trmm_ll_ker_var2, bli_trmm_lu_ker_var2 }, { bli_trmm_rl_ker_var2, bli_trmm_ru_ker_var2 } }; void bli_trmm_xx_ker_var2 ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { bool_t side; bool_t uplo; gemm_var_oft f; // Set two bools: one based on the implied side parameter (the structure // of the root object) and one based on the uplo field of the triangular // matrix's root object (whether that is matrix A or matrix B). if ( bli_obj_root_is_triangular( a ) ) { side = 0; if ( bli_obj_root_is_lower( a ) ) uplo = 0; else uplo = 1; } else // if ( bli_obj_root_is_triangular( b ) ) { side = 1; if ( bli_obj_root_is_lower( b ) ) uplo = 0; else uplo = 1; } // Index into the variant array to extract the correct function pointer. f = vars[side][uplo]; // Call the macrokernel. f ( a, b, c, cntx, rntm, cntl, thread ); } blis-0.6.1/frame/3/trmm/other/000077500000000000000000000000001360743507500160155ustar00rootroot00000000000000blis-0.6.1/frame/3/trmm/other/bli_trmm_ll_ker_var2.c000066400000000000000000000372031360743507500222550ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T gemm_fp typedef void (*FUNCPTR_T) ( doff_t diagoffa, pack_t schema_a, pack_t schema_b, dim_t m, dim_t n, dim_t k, void* alpha, void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, void* beta, void* c, inc_t rs_c, inc_t cs_c, cntx_t* cntx, rntm_t* rntm, thrinfo_t* thread ); static FUNCPTR_T GENARRAY(ftypes,trmm_ll_ker_var2); void bli_trmm_ll_ker_var2 ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { num_t dt_exec = bli_obj_exec_dt( c ); doff_t diagoffa = bli_obj_diag_offset( a ); pack_t schema_a = bli_obj_pack_schema( a ); pack_t schema_b = bli_obj_pack_schema( b ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width( a ); void* buf_a = bli_obj_buffer_at_off( a ); inc_t cs_a = bli_obj_col_stride( a ); dim_t pd_a = bli_obj_panel_dim( a ); inc_t ps_a = bli_obj_panel_stride( a ); void* buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b = bli_obj_row_stride( b ); dim_t pd_b = bli_obj_panel_dim( b ); inc_t ps_b = bli_obj_panel_stride( b ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); obj_t scalar_a; obj_t scalar_b; void* buf_alpha; void* buf_beta; FUNCPTR_T f; // Detach and multiply the scalars attached to A and B. bli_obj_scalar_detach( a, &scalar_a ); bli_obj_scalar_detach( b, &scalar_b ); bli_mulsc( &scalar_a, &scalar_b ); // Grab the addresses of the internal scalar buffers for the scalar // merged above and the scalar attached to C. buf_alpha = bli_obj_internal_scalar_buffer( &scalar_b ); buf_beta = bli_obj_internal_scalar_buffer( c ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Invoke the function. f( diagoffa, schema_a, schema_b, m, n, k, buf_alpha, buf_a, cs_a, pd_a, ps_a, buf_b, rs_b, pd_b, ps_b, buf_beta, buf_c, rs_c, cs_c, cntx, rntm, thread ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ doff_t diagoffa, \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha, \ void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, \ void* beta, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* jr_thread \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ /* Alias some constants to simpler names. */ \ const dim_t MR = pd_a; \ const dim_t NR = pd_b; \ const dim_t PACKMR = cs_a; \ const dim_t PACKNR = rs_b; \ \ /* Query the context for the micro-kernel address and cast it to its function pointer type. */ \ PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the original C matrix. For example, if C is column-stored, ct will be column-stored as well. */ \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict one = PASTEMAC(ch,1); \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict a_cast = a; \ ctype* restrict b_cast = b; \ ctype* restrict c_cast = c; \ ctype* restrict alpha_cast = alpha; \ ctype* restrict beta_cast = beta; \ ctype* restrict b1; \ ctype* restrict c1; \ \ doff_t diagoffa_i; \ dim_t k_full; \ dim_t m_iter, m_left; \ dim_t n_iter, n_left; \ dim_t m_cur; \ dim_t n_cur; \ dim_t k_a1011; \ dim_t off_a1011; \ dim_t i, j; \ inc_t rstep_a; \ inc_t cstep_b; \ inc_t rstep_c, cstep_c; \ inc_t istep_a; \ inc_t istep_b; \ inc_t off_scl; \ inc_t ss_a_num; \ inc_t ss_a_den; \ inc_t ps_a_cur; \ inc_t is_a_cur; \ auxinfo_t aux; \ \ /* Assumptions/assertions: rs_a == 1 cs_a == PACKMR pd_a == MR ps_a == stride to next micro-panel of A rs_b == PACKNR cs_b == 1 pd_b == NR ps_b == stride to next micro-panel of B rs_c == (no assumptions) cs_c == (no assumptions) */ \ \ /* Safety trap: Certain indexing within this macro-kernel does not work as intended if both MR and NR are odd. */ \ if ( ( bli_is_odd( PACKMR ) && bli_is_odd( NR ) ) || \ ( bli_is_odd( PACKNR ) && bli_is_odd( MR ) ) ) bli_abort(); \ \ /* If any dimension is zero, return immediately. */ \ if ( bli_zero_dim3( m, n, k ) ) return; \ \ /* Safeguard: If the current block of A is entirely above the diagonal, it is implicitly zero. So we do nothing. */ \ if ( bli_is_strictly_above_diag_n( diagoffa, m, k ) ) return; \ \ /* Compute k_full. For all trmm, k_full is simply k. This is needed because some parameter combinations of trmm reduce k to advance past zero regions in the triangular matrix, and when computing the imaginary stride of B (the non-triangular matrix), which is used by 4m1/3m1 implementations, we need this unreduced value of k. */ \ k_full = k; \ \ /* Compute indexing scaling factor for for 4m or 3m. This is needed because one of the packing register blocksizes (PACKMR or PACKNR) is used to index into the micro-panels of the non- triangular matrix when computing with a diagonal-intersecting micro-panel of the triangular matrix. In the case of 4m or 3m, real values are stored in both sub-panels, and so the indexing needs to occur in units of real values. The value computed here is divided into the complex pointer offset to cause the pointer to be advanced by the correct value. */ \ if ( bli_is_4mi_packed( schema_a ) || \ bli_is_3mi_packed( schema_a ) || \ bli_is_rih_packed( schema_a ) ) off_scl = 2; \ else off_scl = 1; \ \ /* Compute the storage stride scaling. Usually this is just 1. However, in the case of interleaved 3m, we need to scale the offset by 3/2. And if we are packing real-only, imag-only, or summed-only, we need to scale the computed panel sizes by 1/2 to compensate for the fact that the pointer arithmetic occurs in terms of complex elements rather than real elements. */ \ if ( bli_is_3mi_packed( schema_a ) ) { ss_a_num = 3; ss_a_den = 2; } \ else if ( bli_is_rih_packed( schema_a ) ) { ss_a_num = 1; ss_a_den = 2; } \ else { ss_a_num = 1; ss_a_den = 1; } \ \ /* If there is a zero region above where the diagonal of A intersects the left edge of the block, adjust the pointer to C and treat this case as if the diagonal offset were zero. This skips over the region that was not packed. (Note we assume the diagonal offset is a multiple of MR; this assumption will hold as long as the cache blocksizes are each a multiple of MR and NR.) */ \ if ( diagoffa < 0 ) \ { \ i = -diagoffa; \ m = m - i; \ diagoffa = 0; \ c_cast = c_cast + (i )*rs_c; \ } \ \ /* Clear the temporary C buffer in case it has any infs or NaNs. */ \ PASTEMAC(ch,set0s_mxn)( MR, NR, \ ct, rs_ct, cs_ct ); \ \ /* Compute number of primary and leftover components of the m and n dimensions. */ \ n_iter = n / NR; \ n_left = n % NR; \ \ m_iter = m / MR; \ m_left = m % MR; \ \ if ( n_left ) ++n_iter; \ if ( m_left ) ++m_iter; \ \ /* Determine some increments used to step through A, B, and C. */ \ rstep_a = ps_a; \ \ cstep_b = ps_b; \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ \ istep_a = PACKMR * k; \ istep_b = PACKNR * k_full; \ \ if ( bli_is_odd( istep_a ) ) istep_a += 1; \ if ( bli_is_odd( istep_b ) ) istep_b += 1; \ \ /* Save the pack schemas of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_schema_a( schema_a, &aux ); \ bli_auxinfo_set_schema_b( schema_b, &aux ); \ \ /* Save the imaginary stride of B to the auxinfo_t object. */ \ bli_auxinfo_set_is_b( istep_b, &aux ); \ \ b1 = b_cast; \ c1 = c_cast; \ \ thrinfo_t* ir_thread = bli_thrinfo_sub_node( jr_thread ); \ dim_t jr_num_threads = bli_thread_n_way( jr_thread ); \ dim_t jr_thread_id = bli_thread_work_id( jr_thread ); \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = 0; j < n_iter; ++j ) \ { \ if ( bli_trmm_my_iter( j, jr_thread ) ) { \ \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ a1 = a_cast; \ c11 = c1; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = 0; i < m_iter; ++i ) \ { \ diagoffa_i = diagoffa + ( doff_t )i*MR; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* If the current panel of A intersects the diagonal, scale C by beta. If it is strictly below the diagonal, scale by one. This allows the current macro-kernel to work for both trmm and trmm3. */ \ if ( bli_intersects_diag_n( diagoffa_i, MR, k ) ) \ { \ ctype* restrict b1_i; \ ctype* restrict a2; \ \ /* Determine the offset to and length of the panel that was packed so we can index into the corresponding location in b1. */ \ off_a1011 = 0; \ k_a1011 = bli_min( diagoffa_i + MR, k ); \ \ /* Compute the panel stride for the current diagonal- intersecting micro-panel. */ \ is_a_cur = k_a1011 * PACKMR; \ is_a_cur += ( bli_is_odd( is_a_cur ) ? 1 : 0 ); \ ps_a_cur = ( is_a_cur * ss_a_num ) / ss_a_den; \ \ if ( bli_trmm_my_iter( i, ir_thread ) ) { \ \ b1_i = b1 + ( off_a1011 * PACKNR ) / off_scl; \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = a1; \ if ( bli_is_last_iter( i, m_iter, 0, 1 ) ) \ { \ a2 = a_cast; \ b2 = b1; \ if ( bli_is_last_iter( j, n_iter, jr_thread_id, jr_num_threads ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Save the 4m1/3m1 imaginary stride of A to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( is_a_cur, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k_a1011, \ alpha_cast, \ a1, \ b1_i, \ beta_cast, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Copy edge elements of C to the temporary buffer. */ \ PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \ c11, rs_c, cs_c, \ ct, rs_ct, cs_ct ); \ \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k_a1011, \ alpha_cast, \ a1, \ b1_i, \ beta_cast, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Copy the result to the edge of C. */ \ PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ c11, rs_c, cs_c ); \ } \ } \ \ a1 += ps_a_cur; \ } \ else if ( bli_is_strictly_below_diag_n( diagoffa_i, MR, k ) ) \ { \ if ( bli_trmm_my_iter( i, ir_thread ) ) { \ \ ctype* restrict a2; \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = a1; \ if ( bli_is_last_iter( i, m_iter, 0, 1 ) ) \ { \ a2 = a_cast; \ b2 = b1; \ if ( bli_is_last_iter( j, n_iter, jr_thread_id, jr_num_threads ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Save the 4m1/3m1 imaginary stride of A to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( istep_a, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ one, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Add the result to the edge of C. */ \ PASTEMAC(ch,adds_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ c11, rs_c, cs_c ); \ } \ } \ \ a1 += rstep_a; \ } \ \ c11 += rstep_c; \ } \ } \ \ b1 += cstep_b; \ c1 += cstep_c; \ } \ /*PASTEMAC(ch,fprintm)( stdout, "trmm_ll_ker_var2: a1", MR, k_a1011, a1, 1, MR, "%4.1f", "" );*/ \ /*PASTEMAC(ch,fprintm)( stdout, "trmm_ll_ker_var2: b1", k_a1011, NR, b1_i, NR, 1, "%4.1f", "" );*/ \ } INSERT_GENTFUNC_BASIC0( trmm_ll_ker_var2 ) blis-0.6.1/frame/3/trmm/other/bli_trmm_ll_ker_var2rr.c000066400000000000000000000410551360743507500226210ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T gemm_fp typedef void (*FUNCPTR_T) ( doff_t diagoffa, pack_t schema_a, pack_t schema_b, dim_t m, dim_t n, dim_t k, void* alpha, void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, void* beta, void* c, inc_t rs_c, inc_t cs_c, cntx_t* cntx, rntm_t* rntm, thrinfo_t* thread ); static FUNCPTR_T GENARRAY(ftypes,trmm_ll_ker_var2rr); // // -- Macrokernel functions for round-robin partitioning ----------------------- // void bli_trmm_ll_ker_var2rr ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { num_t dt_exec = bli_obj_exec_dt( c ); doff_t diagoffa = bli_obj_diag_offset( a ); pack_t schema_a = bli_obj_pack_schema( a ); pack_t schema_b = bli_obj_pack_schema( b ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width( a ); void* buf_a = bli_obj_buffer_at_off( a ); inc_t cs_a = bli_obj_col_stride( a ); dim_t pd_a = bli_obj_panel_dim( a ); inc_t ps_a = bli_obj_panel_stride( a ); void* buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b = bli_obj_row_stride( b ); dim_t pd_b = bli_obj_panel_dim( b ); inc_t ps_b = bli_obj_panel_stride( b ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); obj_t scalar_a; obj_t scalar_b; void* buf_alpha; void* buf_beta; FUNCPTR_T f; // Detach and multiply the scalars attached to A and B. bli_obj_scalar_detach( a, &scalar_a ); bli_obj_scalar_detach( b, &scalar_b ); bli_mulsc( &scalar_a, &scalar_b ); // Grab the addresses of the internal scalar buffers for the scalar // merged above and the scalar attached to C. buf_alpha = bli_obj_internal_scalar_buffer( &scalar_b ); buf_beta = bli_obj_internal_scalar_buffer( c ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Invoke the function. f( diagoffa, schema_a, schema_b, m, n, k, buf_alpha, buf_a, cs_a, pd_a, ps_a, buf_b, rs_b, pd_b, ps_b, buf_beta, buf_c, rs_c, cs_c, cntx, rntm, thread ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ doff_t diagoffa, \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha, \ void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, \ void* beta, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* thread \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ /* Alias some constants to simpler names. */ \ const dim_t MR = pd_a; \ const dim_t NR = pd_b; \ const dim_t PACKMR = cs_a; \ const dim_t PACKNR = rs_b; \ \ /* Query the context for the micro-kernel address and cast it to its function pointer type. */ \ PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the original C matrix. For example, if C is column-stored, ct will be column-stored as well. */ \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict one = PASTEMAC(ch,1); \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict a_cast = a; \ ctype* restrict b_cast = b; \ ctype* restrict c_cast = c; \ ctype* restrict alpha_cast = alpha; \ ctype* restrict beta_cast = beta; \ ctype* restrict b1; \ ctype* restrict c1; \ \ doff_t diagoffa_i; \ dim_t k_full; \ dim_t m_iter, m_left; \ dim_t n_iter, n_left; \ dim_t m_cur; \ dim_t n_cur; \ dim_t k_a1011; \ dim_t off_a1011; \ dim_t i, j; \ inc_t rstep_a; \ inc_t cstep_b; \ inc_t rstep_c, cstep_c; \ inc_t istep_a; \ inc_t istep_b; \ inc_t off_scl; \ inc_t ss_a_num; \ inc_t ss_a_den; \ inc_t ps_a_cur; \ inc_t is_a_cur; \ auxinfo_t aux; \ \ /* Assumptions/assertions: rs_a == 1 cs_a == PACKMR pd_a == MR ps_a == stride to next micro-panel of A rs_b == PACKNR cs_b == 1 pd_b == NR ps_b == stride to next micro-panel of B rs_c == (no assumptions) cs_c == (no assumptions) */ \ \ /* Safety trap: Certain indexing within this macro-kernel does not work as intended if both MR and NR are odd. */ \ if ( ( bli_is_odd( PACKMR ) && bli_is_odd( NR ) ) || \ ( bli_is_odd( PACKNR ) && bli_is_odd( MR ) ) ) bli_abort(); \ \ /* If any dimension is zero, return immediately. */ \ if ( bli_zero_dim3( m, n, k ) ) return; \ \ /* Safeguard: If the current block of A is entirely above the diagonal, it is implicitly zero. So we do nothing. */ \ if ( bli_is_strictly_above_diag_n( diagoffa, m, k ) ) return; \ \ /* Compute k_full. For all trmm, k_full is simply k. This is needed because some parameter combinations of trmm reduce k to advance past zero regions in the triangular matrix, and when computing the imaginary stride of B (the non-triangular matrix), which is used by 4m1/3m1 implementations, we need this unreduced value of k. */ \ k_full = k; \ \ /* Compute indexing scaling factor for for 4m or 3m. This is needed because one of the packing register blocksizes (PACKMR or PACKNR) is used to index into the micro-panels of the non- triangular matrix when computing with a diagonal-intersecting micro-panel of the triangular matrix. In the case of 4m or 3m, real values are stored in both sub-panels, and so the indexing needs to occur in units of real values. The value computed here is divided into the complex pointer offset to cause the pointer to be advanced by the correct value. */ \ if ( bli_is_4mi_packed( schema_a ) || \ bli_is_3mi_packed( schema_a ) || \ bli_is_rih_packed( schema_a ) ) off_scl = 2; \ else off_scl = 1; \ \ /* Compute the storage stride scaling. Usually this is just 1. However, in the case of interleaved 3m, we need to scale the offset by 3/2. And if we are packing real-only, imag-only, or summed-only, we need to scale the computed panel sizes by 1/2 to compensate for the fact that the pointer arithmetic occurs in terms of complex elements rather than real elements. */ \ if ( bli_is_3mi_packed( schema_a ) ) { ss_a_num = 3; ss_a_den = 2; } \ else if ( bli_is_rih_packed( schema_a ) ) { ss_a_num = 1; ss_a_den = 2; } \ else { ss_a_num = 1; ss_a_den = 1; } \ \ /* If there is a zero region above where the diagonal of A intersects the left edge of the block, adjust the pointer to C and treat this case as if the diagonal offset were zero. This skips over the region that was not packed. (Note we assume the diagonal offset is a multiple of MR; this assumption will hold as long as the cache blocksizes are each a multiple of MR and NR.) */ \ if ( diagoffa < 0 ) \ { \ i = -diagoffa; \ m = m - i; \ diagoffa = 0; \ c_cast = c_cast + (i )*rs_c; \ } \ \ /* Clear the temporary C buffer in case it has any infs or NaNs. */ \ PASTEMAC(ch,set0s_mxn)( MR, NR, \ ct, rs_ct, cs_ct ); \ \ /* Compute number of primary and leftover components of the m and n dimensions. */ \ n_iter = n / NR; \ n_left = n % NR; \ \ m_iter = m / MR; \ m_left = m % MR; \ \ if ( n_left ) ++n_iter; \ if ( m_left ) ++m_iter; \ \ /* Determine some increments used to step through A, B, and C. */ \ rstep_a = ps_a; \ \ cstep_b = ps_b; \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ \ istep_a = PACKMR * k; \ istep_b = PACKNR * k_full; \ \ if ( bli_is_odd( istep_a ) ) istep_a += 1; \ if ( bli_is_odd( istep_b ) ) istep_b += 1; \ \ /* Save the pack schemas of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_schema_a( schema_a, &aux ); \ bli_auxinfo_set_schema_b( schema_b, &aux ); \ \ /* Save the imaginary stride of B to the auxinfo_t object. */ \ bli_auxinfo_set_is_b( istep_b, &aux ); \ \ /* The 'thread' argument points to the thrinfo_t node for the 2nd (jr) loop around the microkernel. Here we query the thrinfo_t node for the 1st (ir) loop around the microkernel. */ \ /*thrinfo_t* ir_thread = bli_thrinfo_sub_node( thread );*/ \ \ /* Query the number of threads and thread ids for each loop. */ \ dim_t jr_nt = bli_thread_n_way( thread ); \ dim_t jr_tid = bli_thread_work_id( thread ); \ /*dim_t ir_nt = bli_thread_n_way( ir_thread ); \ dim_t ir_tid = bli_thread_work_id( ir_thread );*/ \ \ dim_t jr_start, jr_end; \ /*dim_t ir_start, ir_end;*/ \ dim_t jr_inc; \ \ /* Use round-robin assignment of micropanels to threads in the 2nd loop for the initial rectangular region of C (if it exists). NOTE: Parallelism in the 1st loop is disabled for now. */ \ bli_thread_range_jrir_rr( thread, n_iter, 1, FALSE, &jr_start, &jr_end, &jr_inc ); \ /*bli_thread_range_jrir_rr( caucus, m_iter, 1, FALSE, &ir_start, &ir_end, &ir_inc );*/ \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = jr_start; j < jr_end; j += jr_inc ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ b1 = b_cast + j * cstep_b; \ c1 = c_cast + j * cstep_c; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ a1 = a_cast; \ c11 = c1; \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = 0; i < m_iter; ++i ) \ { \ diagoffa_i = diagoffa + ( doff_t )i*MR; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* If the current panel of A intersects the diagonal, scale C by beta. If it is strictly below the diagonal, scale by one. This allows the current macro-kernel to work for both trmm and trmm3. */ \ if ( bli_intersects_diag_n( diagoffa_i, MR, k ) ) \ { \ ctype* restrict b1_i; \ ctype* restrict a2; \ \ /* Determine the offset to and length of the panel that was packed so we can index into the corresponding location in b1. */ \ off_a1011 = 0; \ k_a1011 = bli_min( diagoffa_i + MR, k ); \ \ /* Compute the panel stride for the current diagonal- intersecting micro-panel. */ \ is_a_cur = k_a1011 * PACKMR; \ is_a_cur += ( bli_is_odd( is_a_cur ) ? 1 : 0 ); \ ps_a_cur = ( is_a_cur * ss_a_num ) / ss_a_den; \ \ /* NOTE: ir loop parallelism disabled for now. */ \ /*if ( bli_trmm_my_iter( i, ir_thread ) ) {*/ \ \ b1_i = b1 + ( off_a1011 * PACKNR ) / off_scl; \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = a1; \ if ( bli_is_last_iter_rr( i, m_iter, 0, 1 ) ) \ { \ a2 = a_cast; \ b2 = b1; \ if ( bli_is_last_iter_rr( j, n_iter, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Save the 4m1/3m1 imaginary stride of A to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( is_a_cur, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k_a1011, \ alpha_cast, \ a1, \ b1_i, \ beta_cast, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Copy edge elements of C to the temporary buffer. */ \ PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \ c11, rs_c, cs_c, \ ct, rs_ct, cs_ct ); \ \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k_a1011, \ alpha_cast, \ a1, \ b1_i, \ beta_cast, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Copy the result to the edge of C. */ \ PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ c11, rs_c, cs_c ); \ } \ /*}*/ \ \ a1 += ps_a_cur; \ } \ else if ( bli_is_strictly_below_diag_n( diagoffa_i, MR, k ) ) \ { \ /* NOTE: ir loop parallelism disabled for now. */ \ /*if ( bli_trmm_my_iter( i, ir_thread ) ) {*/ \ \ ctype* restrict a2; \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = a1; \ if ( bli_is_last_iter_rr( i, m_iter, 0, 1 ) ) \ { \ a2 = a_cast; \ b2 = b1; \ if ( bli_is_last_iter_rr( j, n_iter, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Save the 4m1/3m1 imaginary stride of A to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( istep_a, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ one, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Add the result to the edge of C. */ \ PASTEMAC(ch,adds_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ c11, rs_c, cs_c ); \ } \ /*}*/ \ \ a1 += rstep_a; \ } \ \ c11 += rstep_c; \ } \ } \ /*PASTEMAC(ch,fprintm)( stdout, "trmm_ll_ker_var2rr: a1", MR, k_a1011, a1, 1, MR, "%4.1f", "" );*/ \ /*PASTEMAC(ch,fprintm)( stdout, "trmm_ll_ker_var2rr: b1", k_a1011, NR, b1_i, NR, 1, "%4.1f", "" );*/ \ } INSERT_GENTFUNC_BASIC0( trmm_ll_ker_var2rr ) blis-0.6.1/frame/3/trmm/other/bli_trmm_ll_ker_var2sl.c000066400000000000000000000410451360743507500226130ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T gemm_fp typedef void (*FUNCPTR_T) ( doff_t diagoffa, pack_t schema_a, pack_t schema_b, dim_t m, dim_t n, dim_t k, void* alpha, void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, void* beta, void* c, inc_t rs_c, inc_t cs_c, cntx_t* cntx, rntm_t* rntm, thrinfo_t* thread ); static FUNCPTR_T GENARRAY(ftypes,trmm_ll_ker_var2sl); // // -- Macrokernel functions for slab partitioning ------------------------------ // void bli_trmm_ll_ker_var2sl ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { num_t dt_exec = bli_obj_exec_dt( c ); doff_t diagoffa = bli_obj_diag_offset( a ); pack_t schema_a = bli_obj_pack_schema( a ); pack_t schema_b = bli_obj_pack_schema( b ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width( a ); void* buf_a = bli_obj_buffer_at_off( a ); inc_t cs_a = bli_obj_col_stride( a ); dim_t pd_a = bli_obj_panel_dim( a ); inc_t ps_a = bli_obj_panel_stride( a ); void* buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b = bli_obj_row_stride( b ); dim_t pd_b = bli_obj_panel_dim( b ); inc_t ps_b = bli_obj_panel_stride( b ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); obj_t scalar_a; obj_t scalar_b; void* buf_alpha; void* buf_beta; FUNCPTR_T f; // Detach and multiply the scalars attached to A and B. bli_obj_scalar_detach( a, &scalar_a ); bli_obj_scalar_detach( b, &scalar_b ); bli_mulsc( &scalar_a, &scalar_b ); // Grab the addresses of the internal scalar buffers for the scalar // merged above and the scalar attached to C. buf_alpha = bli_obj_internal_scalar_buffer( &scalar_b ); buf_beta = bli_obj_internal_scalar_buffer( c ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Invoke the function. f( diagoffa, schema_a, schema_b, m, n, k, buf_alpha, buf_a, cs_a, pd_a, ps_a, buf_b, rs_b, pd_b, ps_b, buf_beta, buf_c, rs_c, cs_c, cntx, rntm, thread ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ doff_t diagoffa, \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha, \ void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, \ void* beta, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* thread \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ /* Alias some constants to simpler names. */ \ const dim_t MR = pd_a; \ const dim_t NR = pd_b; \ const dim_t PACKMR = cs_a; \ const dim_t PACKNR = rs_b; \ \ /* Query the context for the micro-kernel address and cast it to its function pointer type. */ \ PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the original C matrix. For example, if C is column-stored, ct will be column-stored as well. */ \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict one = PASTEMAC(ch,1); \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict a_cast = a; \ ctype* restrict b_cast = b; \ ctype* restrict c_cast = c; \ ctype* restrict alpha_cast = alpha; \ ctype* restrict beta_cast = beta; \ ctype* restrict b1; \ ctype* restrict c1; \ \ doff_t diagoffa_i; \ dim_t k_full; \ dim_t m_iter, m_left; \ dim_t n_iter, n_left; \ dim_t m_cur; \ dim_t n_cur; \ dim_t k_a1011; \ dim_t off_a1011; \ dim_t i, j; \ inc_t rstep_a; \ inc_t cstep_b; \ inc_t rstep_c, cstep_c; \ inc_t istep_a; \ inc_t istep_b; \ inc_t off_scl; \ inc_t ss_a_num; \ inc_t ss_a_den; \ inc_t ps_a_cur; \ inc_t is_a_cur; \ auxinfo_t aux; \ \ /* Assumptions/assertions: rs_a == 1 cs_a == PACKMR pd_a == MR ps_a == stride to next micro-panel of A rs_b == PACKNR cs_b == 1 pd_b == NR ps_b == stride to next micro-panel of B rs_c == (no assumptions) cs_c == (no assumptions) */ \ \ /* Safety trap: Certain indexing within this macro-kernel does not work as intended if both MR and NR are odd. */ \ if ( ( bli_is_odd( PACKMR ) && bli_is_odd( NR ) ) || \ ( bli_is_odd( PACKNR ) && bli_is_odd( MR ) ) ) bli_abort(); \ \ /* If any dimension is zero, return immediately. */ \ if ( bli_zero_dim3( m, n, k ) ) return; \ \ /* Safeguard: If the current block of A is entirely above the diagonal, it is implicitly zero. So we do nothing. */ \ if ( bli_is_strictly_above_diag_n( diagoffa, m, k ) ) return; \ \ /* Compute k_full. For all trmm, k_full is simply k. This is needed because some parameter combinations of trmm reduce k to advance past zero regions in the triangular matrix, and when computing the imaginary stride of B (the non-triangular matrix), which is used by 4m1/3m1 implementations, we need this unreduced value of k. */ \ k_full = k; \ \ /* Compute indexing scaling factor for for 4m or 3m. This is needed because one of the packing register blocksizes (PACKMR or PACKNR) is used to index into the micro-panels of the non- triangular matrix when computing with a diagonal-intersecting micro-panel of the triangular matrix. In the case of 4m or 3m, real values are stored in both sub-panels, and so the indexing needs to occur in units of real values. The value computed here is divided into the complex pointer offset to cause the pointer to be advanced by the correct value. */ \ if ( bli_is_4mi_packed( schema_a ) || \ bli_is_3mi_packed( schema_a ) || \ bli_is_rih_packed( schema_a ) ) off_scl = 2; \ else off_scl = 1; \ \ /* Compute the storage stride scaling. Usually this is just 1. However, in the case of interleaved 3m, we need to scale the offset by 3/2. And if we are packing real-only, imag-only, or summed-only, we need to scale the computed panel sizes by 1/2 to compensate for the fact that the pointer arithmetic occurs in terms of complex elements rather than real elements. */ \ if ( bli_is_3mi_packed( schema_a ) ) { ss_a_num = 3; ss_a_den = 2; } \ else if ( bli_is_rih_packed( schema_a ) ) { ss_a_num = 1; ss_a_den = 2; } \ else { ss_a_num = 1; ss_a_den = 1; } \ \ /* If there is a zero region above where the diagonal of A intersects the left edge of the block, adjust the pointer to C and treat this case as if the diagonal offset were zero. This skips over the region that was not packed. (Note we assume the diagonal offset is a multiple of MR; this assumption will hold as long as the cache blocksizes are each a multiple of MR and NR.) */ \ if ( diagoffa < 0 ) \ { \ i = -diagoffa; \ m = m - i; \ diagoffa = 0; \ c_cast = c_cast + (i )*rs_c; \ } \ \ /* Clear the temporary C buffer in case it has any infs or NaNs. */ \ PASTEMAC(ch,set0s_mxn)( MR, NR, \ ct, rs_ct, cs_ct ); \ \ /* Compute number of primary and leftover components of the m and n dimensions. */ \ n_iter = n / NR; \ n_left = n % NR; \ \ m_iter = m / MR; \ m_left = m % MR; \ \ if ( n_left ) ++n_iter; \ if ( m_left ) ++m_iter; \ \ /* Determine some increments used to step through A, B, and C. */ \ rstep_a = ps_a; \ \ cstep_b = ps_b; \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ \ istep_a = PACKMR * k; \ istep_b = PACKNR * k_full; \ \ if ( bli_is_odd( istep_a ) ) istep_a += 1; \ if ( bli_is_odd( istep_b ) ) istep_b += 1; \ \ /* Save the pack schemas of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_schema_a( schema_a, &aux ); \ bli_auxinfo_set_schema_b( schema_b, &aux ); \ \ /* Save the imaginary stride of B to the auxinfo_t object. */ \ bli_auxinfo_set_is_b( istep_b, &aux ); \ \ /* The 'thread' argument points to the thrinfo_t node for the 2nd (jr) loop around the microkernel. Here we query the thrinfo_t node for the 1st (ir) loop around the microkernel. */ \ /*thrinfo_t* ir_thread = bli_thrinfo_sub_node( thread );*/ \ \ /* Query the number of threads and thread ids for each loop. */ \ dim_t jr_nt = bli_thread_n_way( thread ); \ dim_t jr_tid = bli_thread_work_id( thread ); \ /*dim_t ir_nt = bli_thread_n_way( ir_thread ); \ dim_t ir_tid = bli_thread_work_id( ir_thread );*/ \ \ dim_t jr_start, jr_end; \ /*dim_t ir_start, ir_end;*/ \ dim_t jr_inc; \ \ /* Use slab assignment of micropanels to threads in the 2nd loop for the initial rectangular region of C (if it exists). NOTE: Parallelism in the 1st loop is disabled for now. */ \ bli_thread_range_jrir_sl( thread, n_iter, 1, FALSE, &jr_start, &jr_end, &jr_inc ); \ /*bli_thread_range_jrir_rr( caucus, m_iter, 1, FALSE, &ir_start, &ir_end, &ir_inc );*/ \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = jr_start; j < jr_end; j += jr_inc ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ b1 = b_cast + j * cstep_b; \ c1 = c_cast + j * cstep_c; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ a1 = a_cast; \ c11 = c1; \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = 0; i < m_iter; ++i ) \ { \ diagoffa_i = diagoffa + ( doff_t )i*MR; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* If the current panel of A intersects the diagonal, scale C by beta. If it is strictly below the diagonal, scale by one. This allows the current macro-kernel to work for both trmm and trmm3. */ \ if ( bli_intersects_diag_n( diagoffa_i, MR, k ) ) \ { \ ctype* restrict b1_i; \ ctype* restrict a2; \ \ /* Determine the offset to and length of the panel that was packed so we can index into the corresponding location in b1. */ \ off_a1011 = 0; \ k_a1011 = bli_min( diagoffa_i + MR, k ); \ \ /* Compute the panel stride for the current diagonal- intersecting micro-panel. */ \ is_a_cur = k_a1011 * PACKMR; \ is_a_cur += ( bli_is_odd( is_a_cur ) ? 1 : 0 ); \ ps_a_cur = ( is_a_cur * ss_a_num ) / ss_a_den; \ \ /* NOTE: ir loop parallelism disabled for now. */ \ /*if ( bli_trmm_my_iter( i, ir_thread ) ) {*/ \ \ b1_i = b1 + ( off_a1011 * PACKNR ) / off_scl; \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = a1; \ if ( bli_is_last_iter_rr( i, m_iter, 0, 1 ) ) \ { \ a2 = a_cast; \ b2 = b1; \ if ( bli_is_last_iter_sl( j, n_iter, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Save the 4m1/3m1 imaginary stride of A to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( is_a_cur, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k_a1011, \ alpha_cast, \ a1, \ b1_i, \ beta_cast, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Copy edge elements of C to the temporary buffer. */ \ PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \ c11, rs_c, cs_c, \ ct, rs_ct, cs_ct ); \ \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k_a1011, \ alpha_cast, \ a1, \ b1_i, \ beta_cast, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Copy the result to the edge of C. */ \ PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ c11, rs_c, cs_c ); \ } \ /*}*/ \ \ a1 += ps_a_cur; \ } \ else if ( bli_is_strictly_below_diag_n( diagoffa_i, MR, k ) ) \ { \ /* NOTE: ir loop parallelism disabled for now. */ \ /*if ( bli_trmm_my_iter( i, ir_thread ) ) {*/ \ \ ctype* restrict a2; \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = a1; \ if ( bli_is_last_iter_rr( i, m_iter, 0, 1 ) ) \ { \ a2 = a_cast; \ b2 = b1; \ if ( bli_is_last_iter_sl( j, n_iter, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Save the 4m1/3m1 imaginary stride of A to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( istep_a, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ one, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Add the result to the edge of C. */ \ PASTEMAC(ch,adds_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ c11, rs_c, cs_c ); \ } \ /*}*/ \ \ a1 += rstep_a; \ } \ \ c11 += rstep_c; \ } \ } \ /*PASTEMAC(ch,fprintm)( stdout, "trmm_ll_ker_var2sl: a1", MR, k_a1011, a1, 1, MR, "%4.1f", "" );*/ \ /*PASTEMAC(ch,fprintm)( stdout, "trmm_ll_ker_var2sl: b1", k_a1011, NR, b1_i, NR, 1, "%4.1f", "" );*/ \ } INSERT_GENTFUNC_BASIC0( trmm_ll_ker_var2sl ) blis-0.6.1/frame/3/trmm/other/bli_trmm_lu_ker_var2.c000066400000000000000000000374571360743507500223010ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T gemm_fp typedef void (*FUNCPTR_T) ( doff_t diagoffa, pack_t schema_a, pack_t schema_b, dim_t m, dim_t n, dim_t k, void* alpha, void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, void* beta, void* c, inc_t rs_c, inc_t cs_c, cntx_t* cntx, rntm_t* rntm, thrinfo_t* thread ); static FUNCPTR_T GENARRAY(ftypes,trmm_lu_ker_var2); void bli_trmm_lu_ker_var2 ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { num_t dt_exec = bli_obj_exec_dt( c ); doff_t diagoffa = bli_obj_diag_offset( a ); pack_t schema_a = bli_obj_pack_schema( a ); pack_t schema_b = bli_obj_pack_schema( b ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width( a ); void* buf_a = bli_obj_buffer_at_off( a ); inc_t cs_a = bli_obj_col_stride( a ); dim_t pd_a = bli_obj_panel_dim( a ); inc_t ps_a = bli_obj_panel_stride( a ); void* buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b = bli_obj_row_stride( b ); dim_t pd_b = bli_obj_panel_dim( b ); inc_t ps_b = bli_obj_panel_stride( b ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); obj_t scalar_a; obj_t scalar_b; void* buf_alpha; void* buf_beta; FUNCPTR_T f; // Detach and multiply the scalars attached to A and B. bli_obj_scalar_detach( a, &scalar_a ); bli_obj_scalar_detach( b, &scalar_b ); bli_mulsc( &scalar_a, &scalar_b ); // Grab the addresses of the internal scalar buffers for the scalar // merged above and the scalar attached to C. buf_alpha = bli_obj_internal_scalar_buffer( &scalar_b ); buf_beta = bli_obj_internal_scalar_buffer( c ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Invoke the function. f( diagoffa, schema_a, schema_b, m, n, k, buf_alpha, buf_a, cs_a, pd_a, ps_a, buf_b, rs_b, pd_b, ps_b, buf_beta, buf_c, rs_c, cs_c, cntx, rntm, thread ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ doff_t diagoffa, \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha, \ void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, \ void* beta, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* jr_thread \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ /* Alias some constants to simpler names. */ \ const dim_t MR = pd_a; \ const dim_t NR = pd_b; \ const dim_t PACKMR = cs_a; \ const dim_t PACKNR = rs_b; \ \ /* Query the context for the micro-kernel address and cast it to its function pointer type. */ \ PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the original C matrix. For example, if C is column-stored, ct will be column-stored as well. */ \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict one = PASTEMAC(ch,1); \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict a_cast = a; \ ctype* restrict b_cast = b; \ ctype* restrict c_cast = c; \ ctype* restrict alpha_cast = alpha; \ ctype* restrict beta_cast = beta; \ ctype* restrict b1; \ ctype* restrict c1; \ \ doff_t diagoffa_i; \ dim_t k_full; \ dim_t m_iter, m_left; \ dim_t n_iter, n_left; \ dim_t m_cur; \ dim_t n_cur; \ dim_t k_a1112; \ dim_t off_a1112; \ dim_t i, j; \ inc_t rstep_a; \ inc_t cstep_b; \ inc_t rstep_c, cstep_c; \ inc_t istep_a; \ inc_t istep_b; \ inc_t off_scl; \ inc_t ss_a_num; \ inc_t ss_a_den; \ inc_t ps_a_cur; \ inc_t is_a_cur; \ auxinfo_t aux; \ \ /* Assumptions/assertions: rs_a == 1 cs_a == PACKMR pd_a == MR ps_a == stride to next micro-panel of A rs_b == PACKNR cs_b == 1 pd_b == NR ps_b == stride to next micro-panel of B rs_c == (no assumptions) cs_c == (no assumptions) */ \ \ /* Safety trap: Certain indexing within this macro-kernel does not work as intended if both MR and NR are odd. */ \ if ( ( bli_is_odd( PACKMR ) && bli_is_odd( NR ) ) || \ ( bli_is_odd( PACKNR ) && bli_is_odd( MR ) ) ) bli_abort(); \ \ /* If any dimension is zero, return immediately. */ \ if ( bli_zero_dim3( m, n, k ) ) return; \ \ /* Safeguard: If the current block of A is entirely below the diagonal, it is implicitly zero. So we do nothing. */ \ if ( bli_is_strictly_below_diag_n( diagoffa, m, k ) ) return; \ \ /* Compute k_full. For all trmm, k_full is simply k. This is needed because some parameter combinations of trmm reduce k to advance past zero regions in the triangular matrix, and when computing the imaginary stride of B (the non-triangular matrix), which is used by 4m1/3m1 implementations, we need this unreduced value of k. */ \ k_full = k; \ \ /* Compute indexing scaling factor for for 4m or 3m. This is needed because one of the packing register blocksizes (PACKMR or PACKNR) is used to index into the micro-panels of the non- triangular matrix when computing with a diagonal-intersecting micro-panel of the triangular matrix. In the case of 4m or 3m, real values are stored in both sub-panels, and so the indexing needs to occur in units of real values. The value computed here is divided into the complex pointer offset to cause the pointer to be advanced by the correct value. */ \ if ( bli_is_4mi_packed( schema_a ) || \ bli_is_3mi_packed( schema_a ) || \ bli_is_rih_packed( schema_a ) ) off_scl = 2; \ else off_scl = 1; \ \ /* Compute the storage stride scaling. Usually this is just 1. However, in the case of interleaved 3m, we need to scale the offset by 3/2. And if we are packing real-only, imag-only, or summed-only, we need to scale the computed panel sizes by 1/2 to compensate for the fact that the pointer arithmetic occurs in terms of complex elements rather than real elements. */ \ if ( bli_is_3mi_packed( schema_a ) ) { ss_a_num = 3; ss_a_den = 2; } \ else if ( bli_is_rih_packed( schema_a ) ) { ss_a_num = 1; ss_a_den = 2; } \ else { ss_a_num = 1; ss_a_den = 1; } \ \ /* If there is a zero region to the left of where the diagonal of A intersects the top edge of the block, adjust the pointer to B and treat this case as if the diagonal offset were zero. Note that we don't need to adjust the pointer to A since packm would have simply skipped over the region that was not stored. */ \ if ( diagoffa > 0 ) \ { \ i = diagoffa; \ k = k - i; \ diagoffa = 0; \ b_cast = b_cast + ( i * PACKNR ) / off_scl; \ } \ \ /* If there is a zero region below where the diagonal of A intersects the right side of the block, shrink it to prevent "no-op" iterations from executing. */ \ if ( -diagoffa + k < m ) \ { \ m = -diagoffa + k; \ } \ \ /* Clear the temporary C buffer in case it has any infs or NaNs. */ \ PASTEMAC(ch,set0s_mxn)( MR, NR, \ ct, rs_ct, cs_ct ); \ \ /* Compute number of primary and leftover components of the m and n dimensions. */ \ n_iter = n / NR; \ n_left = n % NR; \ \ m_iter = m / MR; \ m_left = m % MR; \ \ if ( n_left ) ++n_iter; \ if ( m_left ) ++m_iter; \ \ /* Determine some increments used to step through A, B, and C. */ \ rstep_a = ps_a; \ \ cstep_b = ps_b; \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ \ istep_a = PACKMR * k; \ istep_b = PACKNR * k_full; \ \ if ( bli_is_odd( istep_a ) ) istep_a += 1; \ if ( bli_is_odd( istep_b ) ) istep_b += 1; \ \ /* Save the pack schemas of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_schema_a( schema_a, &aux ); \ bli_auxinfo_set_schema_b( schema_b, &aux ); \ \ /* Save the imaginary stride of B to the auxinfo_t object. */ \ bli_auxinfo_set_is_b( istep_b, &aux ); \ \ b1 = b_cast; \ c1 = c_cast; \ \ thrinfo_t* ir_thread = bli_thrinfo_sub_node( jr_thread ); \ dim_t jr_num_threads = bli_thread_n_way( jr_thread ); \ dim_t jr_thread_id = bli_thread_work_id( jr_thread ); \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = 0; j < n_iter; ++j ) \ { \ if ( bli_trmm_my_iter( j, jr_thread ) ) { \ \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ a1 = a_cast; \ c11 = c1; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = 0; i < m_iter; ++i ) \ { \ diagoffa_i = diagoffa + ( doff_t )i*MR; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* If the current panel of A intersects the diagonal, scale C by beta. If it is strictly above the diagonal, scale by one. This allows the current macro-kernel to work for both trmm and trmm3. */ \ if ( bli_intersects_diag_n( diagoffa_i, MR, k ) ) \ { \ ctype* restrict b1_i; \ ctype* restrict a2; \ \ /* Determine the offset to and length of the panel that was packed so we can index into the corresponding location in b1. */ \ off_a1112 = diagoffa_i; \ k_a1112 = k - off_a1112; \ \ /* Compute the panel stride for the current diagonal- intersecting micro-panel. */ \ is_a_cur = k_a1112 * PACKMR; \ is_a_cur += ( bli_is_odd( is_a_cur ) ? 1 : 0 ); \ ps_a_cur = ( is_a_cur * ss_a_num ) / ss_a_den; \ \ if ( bli_trmm_my_iter( i, ir_thread ) ) { \ \ b1_i = b1 + ( off_a1112 * PACKNR ) / off_scl; \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = a1; \ if ( bli_is_last_iter( i, m_iter, 0, 1 ) ) \ { \ a2 = a_cast; \ b2 = b1; \ if ( bli_is_last_iter( j, n_iter, jr_thread_id, jr_num_threads ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Save the 4m1/3m1 imaginary stride of A to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( is_a_cur, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k_a1112, \ alpha_cast, \ a1, \ b1_i, \ beta_cast, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Copy edge elements of C to the temporary buffer. */ \ PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \ c11, rs_c, cs_c, \ ct, rs_ct, cs_ct ); \ \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k_a1112, \ alpha_cast, \ a1, \ b1_i, \ beta_cast, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Copy the result to the edge of C. */ \ PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ c11, rs_c, cs_c ); \ } \ } \ \ a1 += ps_a_cur; \ } \ else if ( bli_is_strictly_above_diag_n( diagoffa_i, MR, k ) ) \ { \ if ( bli_trmm_my_iter( i, ir_thread ) ) { \ \ ctype* restrict a2; \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = a1; \ if ( bli_is_last_iter( i, m_iter, 0, 1 ) ) \ { \ a2 = a_cast; \ b2 = b1; \ if ( bli_is_last_iter( j, n_iter, jr_thread_id, jr_num_threads ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Save the 4m1/3m1 imaginary stride of A to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( istep_a, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ one, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Add the result to the edge of C. */ \ PASTEMAC(ch,adds_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ c11, rs_c, cs_c ); \ } \ } \ \ a1 += rstep_a; \ } \ \ c11 += rstep_c; \ } \ } \ \ b1 += cstep_b; \ c1 += cstep_c; \ } \ \ /*PASTEMAC(ch,fprintm)( stdout, "trmm_lu_ker_var2: a1", MR, k_a1112, a1, 1, MR, "%4.1f", "" );*/ \ /*PASTEMAC(ch,fprintm)( stdout, "trmm_lu_ker_var2: b1", k_a1112, NR, b1_i, NR, 1, "%4.1f", "" );*/ \ } INSERT_GENTFUNC_BASIC0( trmm_lu_ker_var2 ) blis-0.6.1/frame/3/trmm/other/bli_trmm_lu_ker_var2rr.c000066400000000000000000000412351360743507500226320ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T gemm_fp typedef void (*FUNCPTR_T) ( doff_t diagoffa, pack_t schema_a, pack_t schema_b, dim_t m, dim_t n, dim_t k, void* alpha, void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, void* beta, void* c, inc_t rs_c, inc_t cs_c, cntx_t* cntx, rntm_t* rntm, thrinfo_t* thread ); static FUNCPTR_T GENARRAY(ftypes,trmm_lu_ker_var2rr); // // -- Macrokernel functions for round-robin partitioning ----------------------- // void bli_trmm_lu_ker_var2rr ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { num_t dt_exec = bli_obj_exec_dt( c ); doff_t diagoffa = bli_obj_diag_offset( a ); pack_t schema_a = bli_obj_pack_schema( a ); pack_t schema_b = bli_obj_pack_schema( b ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width( a ); void* buf_a = bli_obj_buffer_at_off( a ); inc_t cs_a = bli_obj_col_stride( a ); dim_t pd_a = bli_obj_panel_dim( a ); inc_t ps_a = bli_obj_panel_stride( a ); void* buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b = bli_obj_row_stride( b ); dim_t pd_b = bli_obj_panel_dim( b ); inc_t ps_b = bli_obj_panel_stride( b ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); obj_t scalar_a; obj_t scalar_b; void* buf_alpha; void* buf_beta; FUNCPTR_T f; // Detach and multiply the scalars attached to A and B. bli_obj_scalar_detach( a, &scalar_a ); bli_obj_scalar_detach( b, &scalar_b ); bli_mulsc( &scalar_a, &scalar_b ); // Grab the addresses of the internal scalar buffers for the scalar // merged above and the scalar attached to C. buf_alpha = bli_obj_internal_scalar_buffer( &scalar_b ); buf_beta = bli_obj_internal_scalar_buffer( c ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Invoke the function. f( diagoffa, schema_a, schema_b, m, n, k, buf_alpha, buf_a, cs_a, pd_a, ps_a, buf_b, rs_b, pd_b, ps_b, buf_beta, buf_c, rs_c, cs_c, cntx, rntm, thread ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ doff_t diagoffa, \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha, \ void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, \ void* beta, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* thread \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ /* Alias some constants to simpler names. */ \ const dim_t MR = pd_a; \ const dim_t NR = pd_b; \ const dim_t PACKMR = cs_a; \ const dim_t PACKNR = rs_b; \ \ /* Query the context for the micro-kernel address and cast it to its function pointer type. */ \ PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the original C matrix. For example, if C is column-stored, ct will be column-stored as well. */ \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict one = PASTEMAC(ch,1); \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict a_cast = a; \ ctype* restrict b_cast = b; \ ctype* restrict c_cast = c; \ ctype* restrict alpha_cast = alpha; \ ctype* restrict beta_cast = beta; \ ctype* restrict b1; \ ctype* restrict c1; \ \ doff_t diagoffa_i; \ dim_t k_full; \ dim_t m_iter, m_left; \ dim_t n_iter, n_left; \ dim_t m_cur; \ dim_t n_cur; \ dim_t k_a1112; \ dim_t off_a1112; \ dim_t i, j; \ inc_t rstep_a; \ inc_t cstep_b; \ inc_t rstep_c, cstep_c; \ inc_t istep_a; \ inc_t istep_b; \ inc_t off_scl; \ inc_t ss_a_num; \ inc_t ss_a_den; \ inc_t ps_a_cur; \ inc_t is_a_cur; \ auxinfo_t aux; \ \ /* Assumptions/assertions: rs_a == 1 cs_a == PACKMR pd_a == MR ps_a == stride to next micro-panel of A rs_b == PACKNR cs_b == 1 pd_b == NR ps_b == stride to next micro-panel of B rs_c == (no assumptions) cs_c == (no assumptions) */ \ \ /* Safety trap: Certain indexing within this macro-kernel does not work as intended if both MR and NR are odd. */ \ if ( ( bli_is_odd( PACKMR ) && bli_is_odd( NR ) ) || \ ( bli_is_odd( PACKNR ) && bli_is_odd( MR ) ) ) bli_abort(); \ \ /* If any dimension is zero, return immediately. */ \ if ( bli_zero_dim3( m, n, k ) ) return; \ \ /* Safeguard: If the current block of A is entirely below the diagonal, it is implicitly zero. So we do nothing. */ \ if ( bli_is_strictly_below_diag_n( diagoffa, m, k ) ) return; \ \ /* Compute k_full. For all trmm, k_full is simply k. This is needed because some parameter combinations of trmm reduce k to advance past zero regions in the triangular matrix, and when computing the imaginary stride of B (the non-triangular matrix), which is used by 4m1/3m1 implementations, we need this unreduced value of k. */ \ k_full = k; \ \ /* Compute indexing scaling factor for for 4m or 3m. This is needed because one of the packing register blocksizes (PACKMR or PACKNR) is used to index into the micro-panels of the non- triangular matrix when computing with a diagonal-intersecting micro-panel of the triangular matrix. In the case of 4m or 3m, real values are stored in both sub-panels, and so the indexing needs to occur in units of real values. The value computed here is divided into the complex pointer offset to cause the pointer to be advanced by the correct value. */ \ if ( bli_is_4mi_packed( schema_a ) || \ bli_is_3mi_packed( schema_a ) || \ bli_is_rih_packed( schema_a ) ) off_scl = 2; \ else off_scl = 1; \ \ /* Compute the storage stride scaling. Usually this is just 1. However, in the case of interleaved 3m, we need to scale the offset by 3/2. And if we are packing real-only, imag-only, or summed-only, we need to scale the computed panel sizes by 1/2 to compensate for the fact that the pointer arithmetic occurs in terms of complex elements rather than real elements. */ \ if ( bli_is_3mi_packed( schema_a ) ) { ss_a_num = 3; ss_a_den = 2; } \ else if ( bli_is_rih_packed( schema_a ) ) { ss_a_num = 1; ss_a_den = 2; } \ else { ss_a_num = 1; ss_a_den = 1; } \ \ /* If there is a zero region to the left of where the diagonal of A intersects the top edge of the block, adjust the pointer to B and treat this case as if the diagonal offset were zero. Note that we don't need to adjust the pointer to A since packm would have simply skipped over the region that was not stored. */ \ if ( diagoffa > 0 ) \ { \ i = diagoffa; \ k = k - i; \ diagoffa = 0; \ b_cast = b_cast + ( i * PACKNR ) / off_scl; \ } \ \ /* If there is a zero region below where the diagonal of A intersects the right side of the block, shrink it to prevent "no-op" iterations from executing. */ \ if ( -diagoffa + k < m ) \ { \ m = -diagoffa + k; \ } \ \ /* Clear the temporary C buffer in case it has any infs or NaNs. */ \ PASTEMAC(ch,set0s_mxn)( MR, NR, \ ct, rs_ct, cs_ct ); \ \ /* Compute number of primary and leftover components of the m and n dimensions. */ \ n_iter = n / NR; \ n_left = n % NR; \ \ m_iter = m / MR; \ m_left = m % MR; \ \ if ( n_left ) ++n_iter; \ if ( m_left ) ++m_iter; \ \ /* Determine some increments used to step through A, B, and C. */ \ rstep_a = ps_a; \ \ cstep_b = ps_b; \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ \ istep_a = PACKMR * k; \ istep_b = PACKNR * k_full; \ \ if ( bli_is_odd( istep_a ) ) istep_a += 1; \ if ( bli_is_odd( istep_b ) ) istep_b += 1; \ \ /* Save the pack schemas of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_schema_a( schema_a, &aux ); \ bli_auxinfo_set_schema_b( schema_b, &aux ); \ \ /* Save the imaginary stride of B to the auxinfo_t object. */ \ bli_auxinfo_set_is_b( istep_b, &aux ); \ \ /* The 'thread' argument points to the thrinfo_t node for the 2nd (jr) loop around the microkernel. Here we query the thrinfo_t node for the 1st (ir) loop around the microkernel. */ \ /*thrinfo_t* ir_thread = bli_thrinfo_sub_node( thread );*/ \ \ /* Query the number of threads and thread ids for each loop. */ \ dim_t jr_nt = bli_thread_n_way( thread ); \ dim_t jr_tid = bli_thread_work_id( thread ); \ /*dim_t ir_nt = bli_thread_n_way( ir_thread ); \ dim_t ir_tid = bli_thread_work_id( ir_thread );*/ \ \ dim_t jr_start, jr_end; \ /*dim_t ir_start, ir_end;*/ \ dim_t jr_inc; \ \ /* Use round-robin assignment of micropanels to threads in the 2nd loop for the initial rectangular region of C (if it exists). */ \ bli_thread_range_jrir_rr( thread, n_iter, 1, FALSE, &jr_start, &jr_end, &jr_inc ); \ /*bli_thread_range_jrir_rr( caucus, m_iter, 1, FALSE, &ir_start, &ir_end, &ir_inc );*/ \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = jr_start; j < jr_end; j += jr_inc ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ b1 = b_cast + j * cstep_b; \ c1 = c_cast + j * cstep_c; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ a1 = a_cast; \ c11 = c1; \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = 0; i < m_iter; ++i ) \ { \ diagoffa_i = diagoffa + ( doff_t )i*MR; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* If the current panel of A intersects the diagonal, scale C by beta. If it is strictly above the diagonal, scale by one. This allows the current macro-kernel to work for both trmm and trmm3. */ \ if ( bli_intersects_diag_n( diagoffa_i, MR, k ) ) \ { \ ctype* restrict b1_i; \ ctype* restrict a2; \ \ /* Determine the offset to and length of the panel that was packed so we can index into the corresponding location in b1. */ \ off_a1112 = diagoffa_i; \ k_a1112 = k - off_a1112; \ \ /* Compute the panel stride for the current diagonal- intersecting micro-panel. */ \ is_a_cur = k_a1112 * PACKMR; \ is_a_cur += ( bli_is_odd( is_a_cur ) ? 1 : 0 ); \ ps_a_cur = ( is_a_cur * ss_a_num ) / ss_a_den; \ \ /* NOTE: ir loop parallelism disabled for now. */ \ /*if ( bli_trmm_my_iter( i, ir_thread ) ) {*/ \ \ b1_i = b1 + ( off_a1112 * PACKNR ) / off_scl; \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = a1; \ if ( bli_is_last_iter_rr( i, m_iter, 0, 1 ) ) \ { \ a2 = a_cast; \ b2 = b1; \ if ( bli_is_last_iter_rr( j, n_iter, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Save the 4m1/3m1 imaginary stride of A to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( is_a_cur, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k_a1112, \ alpha_cast, \ a1, \ b1_i, \ beta_cast, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Copy edge elements of C to the temporary buffer. */ \ PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \ c11, rs_c, cs_c, \ ct, rs_ct, cs_ct ); \ \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k_a1112, \ alpha_cast, \ a1, \ b1_i, \ beta_cast, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Copy the result to the edge of C. */ \ PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ c11, rs_c, cs_c ); \ } \ /*}*/ \ \ a1 += ps_a_cur; \ } \ else if ( bli_is_strictly_above_diag_n( diagoffa_i, MR, k ) ) \ { \ /* NOTE: ir loop parallelism disabled for now. */ \ /*if ( bli_trmm_my_iter( i, ir_thread ) ) {*/ \ \ ctype* restrict a2; \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = a1; \ if ( bli_is_last_iter_rr( i, m_iter, 0, 1 ) ) \ { \ a2 = a_cast; \ b2 = b1; \ if ( bli_is_last_iter_rr( j, n_iter, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Save the 4m1/3m1 imaginary stride of A to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( istep_a, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ one, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Add the result to the edge of C. */ \ PASTEMAC(ch,adds_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ c11, rs_c, cs_c ); \ } \ /*}*/ \ \ a1 += rstep_a; \ } \ \ c11 += rstep_c; \ } \ } \ \ /*PASTEMAC(ch,fprintm)( stdout, "trmm_lu_ker_var2rr: a1", MR, k_a1112, a1, 1, MR, "%4.1f", "" );*/ \ /*PASTEMAC(ch,fprintm)( stdout, "trmm_lu_ker_var2rr: b1", k_a1112, NR, b1_i, NR, 1, "%4.1f", "" );*/ \ } INSERT_GENTFUNC_BASIC0( trmm_lu_ker_var2rr ) blis-0.6.1/frame/3/trmm/other/bli_trmm_lu_ker_var2sl.c000066400000000000000000000412261360743507500226250ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T gemm_fp typedef void (*FUNCPTR_T) ( doff_t diagoffa, pack_t schema_a, pack_t schema_b, dim_t m, dim_t n, dim_t k, void* alpha, void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, void* beta, void* c, inc_t rs_c, inc_t cs_c, cntx_t* cntx, rntm_t* rntm, thrinfo_t* thread ); static FUNCPTR_T GENARRAY(ftypes,trmm_lu_ker_var2sl); // // -- Macrokernel functions for slab partitioning ------------------------------ // void bli_trmm_lu_ker_var2sl ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { num_t dt_exec = bli_obj_exec_dt( c ); doff_t diagoffa = bli_obj_diag_offset( a ); pack_t schema_a = bli_obj_pack_schema( a ); pack_t schema_b = bli_obj_pack_schema( b ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width( a ); void* buf_a = bli_obj_buffer_at_off( a ); inc_t cs_a = bli_obj_col_stride( a ); dim_t pd_a = bli_obj_panel_dim( a ); inc_t ps_a = bli_obj_panel_stride( a ); void* buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b = bli_obj_row_stride( b ); dim_t pd_b = bli_obj_panel_dim( b ); inc_t ps_b = bli_obj_panel_stride( b ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); obj_t scalar_a; obj_t scalar_b; void* buf_alpha; void* buf_beta; FUNCPTR_T f; // Detach and multiply the scalars attached to A and B. bli_obj_scalar_detach( a, &scalar_a ); bli_obj_scalar_detach( b, &scalar_b ); bli_mulsc( &scalar_a, &scalar_b ); // Grab the addresses of the internal scalar buffers for the scalar // merged above and the scalar attached to C. buf_alpha = bli_obj_internal_scalar_buffer( &scalar_b ); buf_beta = bli_obj_internal_scalar_buffer( c ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Invoke the function. f( diagoffa, schema_a, schema_b, m, n, k, buf_alpha, buf_a, cs_a, pd_a, ps_a, buf_b, rs_b, pd_b, ps_b, buf_beta, buf_c, rs_c, cs_c, cntx, rntm, thread ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ doff_t diagoffa, \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha, \ void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, \ void* beta, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* thread \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ /* Alias some constants to simpler names. */ \ const dim_t MR = pd_a; \ const dim_t NR = pd_b; \ const dim_t PACKMR = cs_a; \ const dim_t PACKNR = rs_b; \ \ /* Query the context for the micro-kernel address and cast it to its function pointer type. */ \ PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the original C matrix. For example, if C is column-stored, ct will be column-stored as well. */ \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict one = PASTEMAC(ch,1); \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict a_cast = a; \ ctype* restrict b_cast = b; \ ctype* restrict c_cast = c; \ ctype* restrict alpha_cast = alpha; \ ctype* restrict beta_cast = beta; \ ctype* restrict b1; \ ctype* restrict c1; \ \ doff_t diagoffa_i; \ dim_t k_full; \ dim_t m_iter, m_left; \ dim_t n_iter, n_left; \ dim_t m_cur; \ dim_t n_cur; \ dim_t k_a1112; \ dim_t off_a1112; \ dim_t i, j; \ inc_t rstep_a; \ inc_t cstep_b; \ inc_t rstep_c, cstep_c; \ inc_t istep_a; \ inc_t istep_b; \ inc_t off_scl; \ inc_t ss_a_num; \ inc_t ss_a_den; \ inc_t ps_a_cur; \ inc_t is_a_cur; \ auxinfo_t aux; \ \ /* Assumptions/assertions: rs_a == 1 cs_a == PACKMR pd_a == MR ps_a == stride to next micro-panel of A rs_b == PACKNR cs_b == 1 pd_b == NR ps_b == stride to next micro-panel of B rs_c == (no assumptions) cs_c == (no assumptions) */ \ \ /* Safety trap: Certain indexing within this macro-kernel does not work as intended if both MR and NR are odd. */ \ if ( ( bli_is_odd( PACKMR ) && bli_is_odd( NR ) ) || \ ( bli_is_odd( PACKNR ) && bli_is_odd( MR ) ) ) bli_abort(); \ \ /* If any dimension is zero, return immediately. */ \ if ( bli_zero_dim3( m, n, k ) ) return; \ \ /* Safeguard: If the current block of A is entirely below the diagonal, it is implicitly zero. So we do nothing. */ \ if ( bli_is_strictly_below_diag_n( diagoffa, m, k ) ) return; \ \ /* Compute k_full. For all trmm, k_full is simply k. This is needed because some parameter combinations of trmm reduce k to advance past zero regions in the triangular matrix, and when computing the imaginary stride of B (the non-triangular matrix), which is used by 4m1/3m1 implementations, we need this unreduced value of k. */ \ k_full = k; \ \ /* Compute indexing scaling factor for for 4m or 3m. This is needed because one of the packing register blocksizes (PACKMR or PACKNR) is used to index into the micro-panels of the non- triangular matrix when computing with a diagonal-intersecting micro-panel of the triangular matrix. In the case of 4m or 3m, real values are stored in both sub-panels, and so the indexing needs to occur in units of real values. The value computed here is divided into the complex pointer offset to cause the pointer to be advanced by the correct value. */ \ if ( bli_is_4mi_packed( schema_a ) || \ bli_is_3mi_packed( schema_a ) || \ bli_is_rih_packed( schema_a ) ) off_scl = 2; \ else off_scl = 1; \ \ /* Compute the storage stride scaling. Usually this is just 1. However, in the case of interleaved 3m, we need to scale the offset by 3/2. And if we are packing real-only, imag-only, or summed-only, we need to scale the computed panel sizes by 1/2 to compensate for the fact that the pointer arithmetic occurs in terms of complex elements rather than real elements. */ \ if ( bli_is_3mi_packed( schema_a ) ) { ss_a_num = 3; ss_a_den = 2; } \ else if ( bli_is_rih_packed( schema_a ) ) { ss_a_num = 1; ss_a_den = 2; } \ else { ss_a_num = 1; ss_a_den = 1; } \ \ /* If there is a zero region to the left of where the diagonal of A intersects the top edge of the block, adjust the pointer to B and treat this case as if the diagonal offset were zero. Note that we don't need to adjust the pointer to A since packm would have simply skipped over the region that was not stored. */ \ if ( diagoffa > 0 ) \ { \ i = diagoffa; \ k = k - i; \ diagoffa = 0; \ b_cast = b_cast + ( i * PACKNR ) / off_scl; \ } \ \ /* If there is a zero region below where the diagonal of A intersects the right side of the block, shrink it to prevent "no-op" iterations from executing. */ \ if ( -diagoffa + k < m ) \ { \ m = -diagoffa + k; \ } \ \ /* Clear the temporary C buffer in case it has any infs or NaNs. */ \ PASTEMAC(ch,set0s_mxn)( MR, NR, \ ct, rs_ct, cs_ct ); \ \ /* Compute number of primary and leftover components of the m and n dimensions. */ \ n_iter = n / NR; \ n_left = n % NR; \ \ m_iter = m / MR; \ m_left = m % MR; \ \ if ( n_left ) ++n_iter; \ if ( m_left ) ++m_iter; \ \ /* Determine some increments used to step through A, B, and C. */ \ rstep_a = ps_a; \ \ cstep_b = ps_b; \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ \ istep_a = PACKMR * k; \ istep_b = PACKNR * k_full; \ \ if ( bli_is_odd( istep_a ) ) istep_a += 1; \ if ( bli_is_odd( istep_b ) ) istep_b += 1; \ \ /* Save the pack schemas of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_schema_a( schema_a, &aux ); \ bli_auxinfo_set_schema_b( schema_b, &aux ); \ \ /* Save the imaginary stride of B to the auxinfo_t object. */ \ bli_auxinfo_set_is_b( istep_b, &aux ); \ \ /* The 'thread' argument points to the thrinfo_t node for the 2nd (jr) loop around the microkernel. Here we query the thrinfo_t node for the 1st (ir) loop around the microkernel. */ \ /*thrinfo_t* ir_thread = bli_thrinfo_sub_node( thread );*/ \ \ /* Query the number of threads and thread ids for each loop. */ \ dim_t jr_nt = bli_thread_n_way( thread ); \ dim_t jr_tid = bli_thread_work_id( thread ); \ /*dim_t ir_nt = bli_thread_n_way( ir_thread ); \ dim_t ir_tid = bli_thread_work_id( ir_thread );*/ \ \ dim_t jr_start, jr_end; \ /*dim_t ir_start, ir_end;*/ \ dim_t jr_inc; \ \ /* Use slab assignment of micropanels to threads in the 2nd loop for the initial rectangular region of C (if it exists). */ \ bli_thread_range_jrir_sl( thread, n_iter, 1, FALSE, &jr_start, &jr_end, &jr_inc ); \ /*bli_thread_range_jrir_rr( caucus, m_iter, 1, FALSE, &ir_start, &ir_end, &ir_inc );*/ \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = jr_start; j < jr_end; j += jr_inc ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ b1 = b_cast + j * cstep_b; \ c1 = c_cast + j * cstep_c; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ a1 = a_cast; \ c11 = c1; \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = 0; i < m_iter; ++i ) \ { \ diagoffa_i = diagoffa + ( doff_t )i*MR; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* If the current panel of A intersects the diagonal, scale C by beta. If it is strictly above the diagonal, scale by one. This allows the current macro-kernel to work for both trmm and trmm3. */ \ if ( bli_intersects_diag_n( diagoffa_i, MR, k ) ) \ { \ ctype* restrict b1_i; \ ctype* restrict a2; \ \ /* Determine the offset to and length of the panel that was packed so we can index into the corresponding location in b1. */ \ off_a1112 = diagoffa_i; \ k_a1112 = k - off_a1112; \ \ /* Compute the panel stride for the current diagonal- intersecting micro-panel. */ \ is_a_cur = k_a1112 * PACKMR; \ is_a_cur += ( bli_is_odd( is_a_cur ) ? 1 : 0 ); \ ps_a_cur = ( is_a_cur * ss_a_num ) / ss_a_den; \ \ /* NOTE: ir loop parallelism disabled for now. */ \ /*if ( bli_trmm_my_iter( i, ir_thread ) ) {*/ \ \ b1_i = b1 + ( off_a1112 * PACKNR ) / off_scl; \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = a1; \ if ( bli_is_last_iter_rr( i, m_iter, 0, 1 ) ) \ { \ a2 = a_cast; \ b2 = b1; \ if ( bli_is_last_iter_sl( j, n_iter, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Save the 4m1/3m1 imaginary stride of A to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( is_a_cur, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k_a1112, \ alpha_cast, \ a1, \ b1_i, \ beta_cast, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Copy edge elements of C to the temporary buffer. */ \ PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \ c11, rs_c, cs_c, \ ct, rs_ct, cs_ct ); \ \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k_a1112, \ alpha_cast, \ a1, \ b1_i, \ beta_cast, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Copy the result to the edge of C. */ \ PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ c11, rs_c, cs_c ); \ } \ /*}*/ \ \ a1 += ps_a_cur; \ } \ else if ( bli_is_strictly_above_diag_n( diagoffa_i, MR, k ) ) \ { \ /* NOTE: ir loop parallelism disabled for now. */ \ /*if ( bli_trmm_my_iter( i, ir_thread ) ) {*/ \ \ ctype* restrict a2; \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = a1; \ if ( bli_is_last_iter_rr( i, m_iter, 0, 1 ) ) \ { \ a2 = a_cast; \ b2 = b1; \ if ( bli_is_last_iter_sl( j, n_iter, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Save the 4m1/3m1 imaginary stride of A to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( istep_a, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ one, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Add the result to the edge of C. */ \ PASTEMAC(ch,adds_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ c11, rs_c, cs_c ); \ } \ /*}*/ \ \ a1 += rstep_a; \ } \ \ c11 += rstep_c; \ } \ } \ \ /*PASTEMAC(ch,fprintm)( stdout, "trmm_lu_ker_var2sl: a1", MR, k_a1112, a1, 1, MR, "%4.1f", "" );*/ \ /*PASTEMAC(ch,fprintm)( stdout, "trmm_lu_ker_var2sl: b1", k_a1112, NR, b1_i, NR, 1, "%4.1f", "" );*/ \ } INSERT_GENTFUNC_BASIC0( trmm_lu_ker_var2sl ) blis-0.6.1/frame/3/trmm/other/bli_trmm_rl_ker_var2.c000066400000000000000000000401511360743507500222570ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T gemm_fp typedef void (*FUNCPTR_T) ( doff_t diagoffb, pack_t schema_a, pack_t schema_b, dim_t m, dim_t n, dim_t k, void* alpha, void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, void* beta, void* c, inc_t rs_c, inc_t cs_c, cntx_t* cntx, rntm_t* rntm, thrinfo_t* thread ); static FUNCPTR_T GENARRAY(ftypes,trmm_rl_ker_var2); void bli_trmm_rl_ker_var2 ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { num_t dt_exec = bli_obj_exec_dt( c ); doff_t diagoffb = bli_obj_diag_offset( b ); pack_t schema_a = bli_obj_pack_schema( a ); pack_t schema_b = bli_obj_pack_schema( b ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width( a ); void* buf_a = bli_obj_buffer_at_off( a ); inc_t cs_a = bli_obj_col_stride( a ); dim_t pd_a = bli_obj_panel_dim( a ); inc_t ps_a = bli_obj_panel_stride( a ); void* buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b = bli_obj_row_stride( b ); dim_t pd_b = bli_obj_panel_dim( b ); inc_t ps_b = bli_obj_panel_stride( b ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); obj_t scalar_a; obj_t scalar_b; void* buf_alpha; void* buf_beta; FUNCPTR_T f; // Detach and multiply the scalars attached to A and B. bli_obj_scalar_detach( a, &scalar_a ); bli_obj_scalar_detach( b, &scalar_b ); bli_mulsc( &scalar_a, &scalar_b ); // Grab the addresses of the internal scalar buffers for the scalar // merged above and the scalar attached to C. buf_alpha = bli_obj_internal_scalar_buffer( &scalar_b ); buf_beta = bli_obj_internal_scalar_buffer( c ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Invoke the function. f( diagoffb, schema_a, schema_b, m, n, k, buf_alpha, buf_a, cs_a, pd_a, ps_a, buf_b, rs_b, pd_b, ps_b, buf_beta, buf_c, rs_c, cs_c, cntx, rntm, thread ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ doff_t diagoffb, \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha, \ void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, \ void* beta, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* jr_thread \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ /* Alias some constants to simpler names. */ \ const dim_t MR = pd_a; \ const dim_t NR = pd_b; \ const dim_t PACKMR = cs_a; \ const dim_t PACKNR = rs_b; \ \ /* Query the context for the micro-kernel address and cast it to its function pointer type. */ \ PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the original C matrix. For example, if C is column-stored, ct will be column-stored as well. */ \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict one = PASTEMAC(ch,1); \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict a_cast = a; \ ctype* restrict b_cast = b; \ ctype* restrict c_cast = c; \ ctype* restrict alpha_cast = alpha; \ ctype* restrict beta_cast = beta; \ ctype* restrict b1; \ ctype* restrict c1; \ \ doff_t diagoffb_j; \ dim_t k_full; \ dim_t m_iter, m_left; \ dim_t n_iter, n_left; \ dim_t m_cur; \ dim_t n_cur; \ dim_t k_b1121; \ dim_t off_b1121; \ dim_t i, j; \ inc_t rstep_a; \ inc_t cstep_b; \ inc_t rstep_c, cstep_c; \ inc_t istep_a; \ inc_t istep_b; \ inc_t off_scl; \ inc_t ss_b_num; \ inc_t ss_b_den; \ inc_t ps_b_cur; \ inc_t is_b_cur; \ auxinfo_t aux; \ \ /* Assumptions/assertions: rs_a == 1 cs_a == PACKMR pd_a == MR ps_a == stride to next micro-panel of A rs_b == PACKNR cs_b == 1 pd_b == NR ps_b == stride to next micro-panel of B rs_c == (no assumptions) cs_c == (no assumptions) */ \ \ /* Safety trap: Certain indexing within this macro-kernel does not work as intended if both MR and NR are odd. */ \ if ( ( bli_is_odd( PACKMR ) && bli_is_odd( NR ) ) || \ ( bli_is_odd( PACKNR ) && bli_is_odd( MR ) ) ) bli_abort(); \ \ /* If any dimension is zero, return immediately. */ \ if ( bli_zero_dim3( m, n, k ) ) return; \ \ /* Safeguard: If the current panel of B is entirely above the diagonal, it is implicitly zero. So we do nothing. */ \ if ( bli_is_strictly_above_diag_n( diagoffb, k, n ) ) return; \ \ /* Compute k_full. For all trmm, k_full is simply k. This is needed because some parameter combinations of trmm reduce k to advance past zero regions in the triangular matrix, and when computing the imaginary stride of A (the non-triangular matrix), which is used by 4m1/3m1 implementations, we need this unreduced value of k. */ \ k_full = k; \ \ /* Compute indexing scaling factor for for 4m or 3m. This is needed because one of the packing register blocksizes (PACKMR or PACKNR) is used to index into the micro-panels of the non- triangular matrix when computing with a diagonal-intersecting micro-panel of the triangular matrix. In the case of 4m or 3m, real values are stored in both sub-panels, and so the indexing needs to occur in units of real values. The value computed here is divided into the complex pointer offset to cause the pointer to be advanced by the correct value. */ \ if ( bli_is_4mi_packed( schema_b ) || \ bli_is_3mi_packed( schema_b ) || \ bli_is_rih_packed( schema_b ) ) off_scl = 2; \ else off_scl = 1; \ \ /* Compute the storage stride scaling. Usually this is just 1. However, in the case of interleaved 3m, we need to scale the offset by 3/2. And if we are packing real-only, imag-only, or summed-only, we need to scale the computed panel sizes by 1/2 to compensate for the fact that the pointer arithmetic occurs in terms of complex elements rather than real elements. */ \ if ( bli_is_3mi_packed( schema_b ) ) { ss_b_num = 3; ss_b_den = 2; } \ else if ( bli_is_rih_packed( schema_b ) ) { ss_b_num = 1; ss_b_den = 2; } \ else { ss_b_num = 1; ss_b_den = 1; } \ \ /* If there is a zero region above where the diagonal of B intersects the left edge of the panel, adjust the pointer to A and treat this case as if the diagonal offset were zero. Note that we don't need to adjust the pointer to B since packm would have simply skipped over the region that was not stored. */ \ if ( diagoffb < 0 ) \ { \ j = -diagoffb; \ k = k - j; \ diagoffb = 0; \ a_cast = a_cast + ( j * PACKMR ) / off_scl; \ } \ \ /* If there is a zero region to the right of where the diagonal of B intersects the bottom of the panel, shrink it to prevent "no-op" iterations from executing. */ \ if ( diagoffb + k < n ) \ { \ n = diagoffb + k; \ } \ \ /* Clear the temporary C buffer in case it has any infs or NaNs. */ \ PASTEMAC(ch,set0s_mxn)( MR, NR, \ ct, rs_ct, cs_ct ); \ \ /* Compute number of primary and leftover components of the m and n dimensions. */ \ n_iter = n / NR; \ n_left = n % NR; \ \ m_iter = m / MR; \ m_left = m % MR; \ \ if ( n_left ) ++n_iter; \ if ( m_left ) ++m_iter; \ \ /* Determine some increments used to step through A, B, and C. */ \ rstep_a = ps_a; \ \ cstep_b = ps_b; \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ \ istep_a = PACKMR * k_full; \ istep_b = PACKNR * k; \ \ if ( bli_is_odd( istep_a ) ) istep_a += 1; \ if ( bli_is_odd( istep_b ) ) istep_b += 1; \ \ /* Save the pack schemas of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_schema_a( schema_a, &aux ); \ bli_auxinfo_set_schema_b( schema_b, &aux ); \ \ /* Save the imaginary stride of A to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( istep_a, &aux ); \ \ b1 = b_cast; \ c1 = c_cast; \ \ thrinfo_t* ir_thread = bli_thrinfo_sub_node( jr_thread ); \ dim_t jr_num_threads = bli_thread_n_way( jr_thread ); \ dim_t jr_thread_id = bli_thread_work_id( jr_thread ); \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = 0; j < n_iter; ++j ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ diagoffb_j = diagoffb - ( doff_t )j*NR; \ \ /* Determine the offset to the beginning of the panel that was packed so we can index into the corresponding location in A. Then compute the length of that panel. */ \ off_b1121 = bli_max( -diagoffb_j, 0 ); \ k_b1121 = k - off_b1121; \ \ a1 = a_cast; \ c11 = c1; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ /* If the current panel of B intersects the diagonal, scale C by beta. If it is strictly below the diagonal, scale by one. This allows the current macro-kernel to work for both trmm and trmm3. */ \ if ( bli_intersects_diag_n( diagoffb_j, k, NR ) ) \ { \ /* Compute the panel stride for the current diagonal- intersecting micro-panel. */ \ is_b_cur = k_b1121 * PACKNR; \ is_b_cur += ( bli_is_odd( is_b_cur ) ? 1 : 0 ); \ ps_b_cur = ( is_b_cur * ss_b_num ) / ss_b_den; \ \ if ( bli_trmm_my_iter( j, jr_thread ) ) { \ \ /* Save the 4m1/3m1 imaginary stride of B to the auxinfo_t object. */ \ bli_auxinfo_set_is_b( is_b_cur, &aux ); \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = 0; i < m_iter; ++i ) \ { \ if ( bli_trmm_my_iter( i, ir_thread ) ) { \ \ ctype* restrict a1_i; \ ctype* restrict a2; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ a1_i = a1 + ( off_b1121 * PACKMR ) / off_scl; \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = a1; \ if ( bli_is_last_iter( i, m_iter, 0, 1 ) ) \ { \ a2 = a_cast; \ b2 = b1; \ if ( bli_is_last_iter( j, n_iter, jr_thread_id, jr_num_threads ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k_b1121, \ alpha_cast, \ a1_i, \ b1, \ beta_cast, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Copy edge elements of C to the temporary buffer. */ \ PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \ c11, rs_c, cs_c, \ ct, rs_ct, cs_ct ); \ \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k_b1121, \ alpha_cast, \ a1_i, \ b1, \ beta_cast, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Copy the result to the edge of C. */ \ PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ c11, rs_c, cs_c ); \ } \ } \ \ a1 += rstep_a; \ c11 += rstep_c; \ } \ } \ \ b1 += ps_b_cur; \ } \ else if ( bli_is_strictly_below_diag_n( diagoffb_j, k, NR ) ) \ { \ if ( bli_trmm_my_iter( j, jr_thread ) ) { \ \ /* Save the 4m1/3m1 imaginary stride of B to the auxinfo_t object. */ \ bli_auxinfo_set_is_b( istep_b, &aux ); \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = 0; i < m_iter; ++i ) \ { \ if ( bli_trmm_my_iter( i, ir_thread ) ) { \ \ ctype* restrict a2; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = a1; \ if ( bli_is_last_iter( i, m_iter, 0, 1 ) ) \ { \ a2 = a_cast; \ b2 = b1; \ if ( bli_is_last_iter( j, n_iter, jr_thread_id, jr_num_threads ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ one, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Add the result to the edge of C. */ \ PASTEMAC(ch,adds_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ c11, rs_c, cs_c ); \ } \ } \ \ a1 += rstep_a; \ c11 += rstep_c; \ } \ } \ \ b1 += cstep_b; \ } \ \ c1 += cstep_c; \ } \ \ /*PASTEMAC(ch,fprintm)( stdout, "trmm_rl_ker_var2: a1", MR, k_b1121, a1, 1, MR, "%4.1f", "" );*/ \ /*PASTEMAC(ch,fprintm)( stdout, "trmm_rl_ker_var2: b1", k_b1121, NR, b1_i, NR, 1, "%4.1f", "" );*/ \ } INSERT_GENTFUNC_BASIC0( trmm_rl_ker_var2 ) blis-0.6.1/frame/3/trmm/other/bli_trmm_rl_ker_var2rr.c000066400000000000000000000447521360743507500226360ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T gemm_fp typedef void (*FUNCPTR_T) ( doff_t diagoffb, pack_t schema_a, pack_t schema_b, dim_t m, dim_t n, dim_t k, void* alpha, void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, void* beta, void* c, inc_t rs_c, inc_t cs_c, cntx_t* cntx, rntm_t* rntm, thrinfo_t* thread ); static FUNCPTR_T GENARRAY(ftypes,trmm_rl_ker_var2rr); // // -- Macrokernel functions for round-robin partitioning ----------------------- // void bli_trmm_rl_ker_var2rr ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { num_t dt_exec = bli_obj_exec_dt( c ); doff_t diagoffb = bli_obj_diag_offset( b ); pack_t schema_a = bli_obj_pack_schema( a ); pack_t schema_b = bli_obj_pack_schema( b ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width( a ); void* buf_a = bli_obj_buffer_at_off( a ); inc_t cs_a = bli_obj_col_stride( a ); dim_t pd_a = bli_obj_panel_dim( a ); inc_t ps_a = bli_obj_panel_stride( a ); void* buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b = bli_obj_row_stride( b ); dim_t pd_b = bli_obj_panel_dim( b ); inc_t ps_b = bli_obj_panel_stride( b ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); obj_t scalar_a; obj_t scalar_b; void* buf_alpha; void* buf_beta; FUNCPTR_T f; // Detach and multiply the scalars attached to A and B. bli_obj_scalar_detach( a, &scalar_a ); bli_obj_scalar_detach( b, &scalar_b ); bli_mulsc( &scalar_a, &scalar_b ); // Grab the addresses of the internal scalar buffers for the scalar // merged above and the scalar attached to C. buf_alpha = bli_obj_internal_scalar_buffer( &scalar_b ); buf_beta = bli_obj_internal_scalar_buffer( c ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Invoke the function. f( diagoffb, schema_a, schema_b, m, n, k, buf_alpha, buf_a, cs_a, pd_a, ps_a, buf_b, rs_b, pd_b, ps_b, buf_beta, buf_c, rs_c, cs_c, cntx, rntm, thread ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ doff_t diagoffb, \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha, \ void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, \ void* beta, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* thread \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ /* Alias some constants to simpler names. */ \ const dim_t MR = pd_a; \ const dim_t NR = pd_b; \ const dim_t PACKMR = cs_a; \ const dim_t PACKNR = rs_b; \ \ /* Query the context for the micro-kernel address and cast it to its function pointer type. */ \ PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the original C matrix. For example, if C is column-stored, ct will be column-stored as well. */ \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict one = PASTEMAC(ch,1); \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict a_cast = a; \ ctype* restrict b_cast = b; \ ctype* restrict c_cast = c; \ ctype* restrict alpha_cast = alpha; \ ctype* restrict beta_cast = beta; \ ctype* restrict b1; \ ctype* restrict c1; \ \ doff_t diagoffb_j; \ dim_t k_full; \ dim_t m_iter, m_left; \ dim_t n_iter, n_left; \ dim_t m_cur; \ dim_t n_cur; \ dim_t k_b1121; \ dim_t off_b1121; \ dim_t i, j; \ inc_t rstep_a; \ inc_t cstep_b; \ inc_t rstep_c, cstep_c; \ inc_t istep_a; \ inc_t istep_b; \ inc_t off_scl; \ inc_t ss_b_num; \ inc_t ss_b_den; \ inc_t ps_b_cur; \ inc_t is_b_cur; \ auxinfo_t aux; \ \ /* Assumptions/assertions: rs_a == 1 cs_a == PACKMR pd_a == MR ps_a == stride to next micro-panel of A rs_b == PACKNR cs_b == 1 pd_b == NR ps_b == stride to next micro-panel of B rs_c == (no assumptions) cs_c == (no assumptions) */ \ \ /* Safety trap: Certain indexing within this macro-kernel does not work as intended if both MR and NR are odd. */ \ if ( ( bli_is_odd( PACKMR ) && bli_is_odd( NR ) ) || \ ( bli_is_odd( PACKNR ) && bli_is_odd( MR ) ) ) bli_abort(); \ \ /* If any dimension is zero, return immediately. */ \ if ( bli_zero_dim3( m, n, k ) ) return; \ \ /* Safeguard: If the current panel of B is entirely above the diagonal, it is implicitly zero. So we do nothing. */ \ if ( bli_is_strictly_above_diag_n( diagoffb, k, n ) ) return; \ \ /* Compute k_full. For all trmm, k_full is simply k. This is needed because some parameter combinations of trmm reduce k to advance past zero regions in the triangular matrix, and when computing the imaginary stride of A (the non-triangular matrix), which is used by 4m1/3m1 implementations, we need this unreduced value of k. */ \ k_full = k; \ \ /* Compute indexing scaling factor for for 4m or 3m. This is needed because one of the packing register blocksizes (PACKMR or PACKNR) is used to index into the micro-panels of the non- triangular matrix when computing with a diagonal-intersecting micro-panel of the triangular matrix. In the case of 4m or 3m, real values are stored in both sub-panels, and so the indexing needs to occur in units of real values. The value computed here is divided into the complex pointer offset to cause the pointer to be advanced by the correct value. */ \ if ( bli_is_4mi_packed( schema_b ) || \ bli_is_3mi_packed( schema_b ) || \ bli_is_rih_packed( schema_b ) ) off_scl = 2; \ else off_scl = 1; \ \ /* Compute the storage stride scaling. Usually this is just 1. However, in the case of interleaved 3m, we need to scale the offset by 3/2. And if we are packing real-only, imag-only, or summed-only, we need to scale the computed panel sizes by 1/2 to compensate for the fact that the pointer arithmetic occurs in terms of complex elements rather than real elements. */ \ if ( bli_is_3mi_packed( schema_b ) ) { ss_b_num = 3; ss_b_den = 2; } \ else if ( bli_is_rih_packed( schema_b ) ) { ss_b_num = 1; ss_b_den = 2; } \ else { ss_b_num = 1; ss_b_den = 1; } \ \ /* If there is a zero region above where the diagonal of B intersects the left edge of the panel, adjust the pointer to A and treat this case as if the diagonal offset were zero. Note that we don't need to adjust the pointer to B since packm would have simply skipped over the region that was not stored. */ \ if ( diagoffb < 0 ) \ { \ j = -diagoffb; \ k = k - j; \ diagoffb = 0; \ a_cast = a_cast + ( j * PACKMR ) / off_scl; \ } \ \ /* If there is a zero region to the right of where the diagonal of B intersects the bottom of the panel, shrink it to prevent "no-op" iterations from executing. */ \ if ( diagoffb + k < n ) \ { \ n = diagoffb + k; \ } \ \ /* Clear the temporary C buffer in case it has any infs or NaNs. */ \ PASTEMAC(ch,set0s_mxn)( MR, NR, \ ct, rs_ct, cs_ct ); \ \ /* Compute number of primary and leftover components of the m and n dimensions. */ \ n_iter = n / NR; \ n_left = n % NR; \ \ m_iter = m / MR; \ m_left = m % MR; \ \ if ( n_left ) ++n_iter; \ if ( m_left ) ++m_iter; \ \ /* Determine some increments used to step through A, B, and C. */ \ rstep_a = ps_a; \ \ cstep_b = ps_b; \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ \ istep_a = PACKMR * k_full; \ istep_b = PACKNR * k; \ \ if ( bli_is_odd( istep_a ) ) istep_a += 1; \ if ( bli_is_odd( istep_b ) ) istep_b += 1; \ \ /* Save the pack schemas of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_schema_a( schema_a, &aux ); \ bli_auxinfo_set_schema_b( schema_b, &aux ); \ \ /* Save the imaginary stride of A to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( istep_a, &aux ); \ \ thrinfo_t* caucus = bli_thrinfo_sub_node( thread ); \ \ dim_t jr_nt = bli_thread_n_way( thread ); \ dim_t jr_tid = bli_thread_work_id( thread ); \ dim_t ir_nt = bli_thread_n_way( caucus ); \ dim_t ir_tid = bli_thread_work_id( caucus ); \ \ dim_t jr_start, jr_end; \ dim_t ir_start, ir_end; \ dim_t jr_inc, ir_inc; \ \ /* Note that we partition the 2nd loop into two regions: the rectangular part of B, and the triangular portion. */ \ dim_t n_iter_rct; \ dim_t n_iter_tri; \ \ if ( bli_is_strictly_below_diag_n( diagoffb, m, n ) ) \ { \ /* If the entire panel of B does not intersect the diagonal, there is no triangular region, and therefore we can skip the second set of loops. */ \ n_iter_rct = n_iter; \ n_iter_tri = 0; \ } \ else \ { \ /* If the panel of B does intersect the diagonal, compute the number of iterations in the rectangular region by dividing NR into the diagonal offset. (There should never be any remainder in this division.) The number of iterations in the triangular (or trapezoidal) region is computed as the remaining number of iterations in the n dimension. */ \ n_iter_rct = diagoffb / NR; \ n_iter_tri = n_iter - n_iter_rct; \ } \ \ /* Use round-robin assignment of micropanels to threads in the 2nd and 1st loops for the initial rectangular region of B (if it exists). */ \ bli_thread_range_jrir_rr( thread, n_iter_rct, 1, FALSE, &jr_start, &jr_end, &jr_inc ); \ bli_thread_range_jrir_rr( caucus, m_iter, 1, FALSE, &ir_start, &ir_end, &ir_inc ); \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = jr_start; j < jr_end; j += jr_inc ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ b1 = b_cast + j * cstep_b; \ c1 = c_cast + j * cstep_c; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ { \ /* Save the 4m1/3m1 imaginary stride of B to the auxinfo_t object. */ \ bli_auxinfo_set_is_b( istep_b, &aux ); \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = ir_start; i < ir_end; i += ir_inc ) \ { \ ctype* restrict a2; \ \ a1 = a_cast + i * rstep_a; \ c11 = c1 + i * rstep_c; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = bli_trmm_get_next_a_upanel( a1, rstep_a, ir_inc ); \ if ( bli_is_last_iter_rr( i, m_iter, ir_tid, ir_nt ) ) \ { \ a2 = a_cast; \ b2 = bli_trmm_get_next_b_upanel( b1, cstep_b, jr_inc ); \ if ( bli_is_last_iter_rr( j, n_iter, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ one, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Add the result to the edge of C. */ \ PASTEMAC(ch,adds_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ c11, rs_c, cs_c ); \ } \ } \ } \ } \ \ /* If there is no triangular region, then we're done. */ \ if ( n_iter_tri == 0 ) return; \ \ /* Use round-robin assignment of micropanels to threads in the 2nd loop for the remaining triangular region of B (if it exists). NOTE: We don't need to call bli_thread_range_jrir*() here since we employ a hack that calls for each thread to execute every iteration of the jr and ir loops but skip all but the pointer increment for iterations that are not assigned to it. */ \ \ /* Advance the starting b1 and c1 pointers to the positions corresponding to the start of the triangular region of B. */ \ jr_start = n_iter_rct; \ b1 = b_cast + jr_start * cstep_b; \ c1 = c_cast + jr_start * cstep_c; \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = jr_start; j < n_iter; ++j ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ diagoffb_j = diagoffb - ( doff_t )j*NR; \ \ /* Determine the offset to the beginning of the panel that was packed so we can index into the corresponding location in A. Then compute the length of that panel. */ \ off_b1121 = bli_max( -diagoffb_j, 0 ); \ k_b1121 = k - off_b1121; \ \ a1 = a_cast; \ c11 = c1; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ /* If the current panel of B intersects the diagonal, scale C by beta. If it is strictly below the diagonal, scale by one. This allows the current macro-kernel to work for both trmm and trmm3. */ \ { \ /* Compute the panel stride for the current diagonal- intersecting micro-panel. */ \ is_b_cur = k_b1121 * PACKNR; \ is_b_cur += ( bli_is_odd( is_b_cur ) ? 1 : 0 ); \ ps_b_cur = ( is_b_cur * ss_b_num ) / ss_b_den; \ \ if ( bli_trmm_my_iter( j, thread ) ) { \ \ /* Save the 4m1/3m1 imaginary stride of B to the auxinfo_t object. */ \ bli_auxinfo_set_is_b( is_b_cur, &aux ); \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = 0; i < m_iter; ++i ) \ { \ if ( bli_trmm_my_iter( i, caucus ) ) { \ \ ctype* restrict a1_i; \ ctype* restrict a2; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ a1_i = a1 + ( off_b1121 * PACKMR ) / off_scl; \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = a1; \ if ( bli_is_last_iter_rr( i, m_iter, 0, 1 ) ) \ { \ a2 = a_cast; \ b2 = b1; \ if ( bli_is_last_iter_rr( j, n_iter, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k_b1121, \ alpha_cast, \ a1_i, \ b1, \ beta_cast, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Copy edge elements of C to the temporary buffer. */ \ PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \ c11, rs_c, cs_c, \ ct, rs_ct, cs_ct ); \ \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k_b1121, \ alpha_cast, \ a1_i, \ b1, \ beta_cast, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Copy the result to the edge of C. */ \ PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ c11, rs_c, cs_c ); \ } \ } \ \ a1 += rstep_a; \ c11 += rstep_c; \ } \ } \ \ b1 += ps_b_cur; \ } \ \ c1 += cstep_c; \ } \ \ /*PASTEMAC(ch,fprintm)( stdout, "trmm_rl_ker_var2rr: a1", MR, k_b1121, a1, 1, MR, "%4.1f", "" );*/ \ /*PASTEMAC(ch,fprintm)( stdout, "trmm_rl_ker_var2rr: b1", k_b1121, NR, b1_i, NR, 1, "%4.1f", "" );*/ \ } INSERT_GENTFUNC_BASIC0( trmm_rl_ker_var2rr ) blis-0.6.1/frame/3/trmm/other/bli_trmm_rl_ker_var2sl.c000066400000000000000000000447431360743507500226310ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T gemm_fp typedef void (*FUNCPTR_T) ( doff_t diagoffb, pack_t schema_a, pack_t schema_b, dim_t m, dim_t n, dim_t k, void* alpha, void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, void* beta, void* c, inc_t rs_c, inc_t cs_c, cntx_t* cntx, rntm_t* rntm, thrinfo_t* thread ); static FUNCPTR_T GENARRAY(ftypes,trmm_rl_ker_var2sl); // // -- Macrokernel functions for slab partitioning ------------------------------ // void bli_trmm_rl_ker_var2sl ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { num_t dt_exec = bli_obj_exec_dt( c ); doff_t diagoffb = bli_obj_diag_offset( b ); pack_t schema_a = bli_obj_pack_schema( a ); pack_t schema_b = bli_obj_pack_schema( b ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width( a ); void* buf_a = bli_obj_buffer_at_off( a ); inc_t cs_a = bli_obj_col_stride( a ); dim_t pd_a = bli_obj_panel_dim( a ); inc_t ps_a = bli_obj_panel_stride( a ); void* buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b = bli_obj_row_stride( b ); dim_t pd_b = bli_obj_panel_dim( b ); inc_t ps_b = bli_obj_panel_stride( b ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); obj_t scalar_a; obj_t scalar_b; void* buf_alpha; void* buf_beta; FUNCPTR_T f; // Detach and multiply the scalars attached to A and B. bli_obj_scalar_detach( a, &scalar_a ); bli_obj_scalar_detach( b, &scalar_b ); bli_mulsc( &scalar_a, &scalar_b ); // Grab the addresses of the internal scalar buffers for the scalar // merged above and the scalar attached to C. buf_alpha = bli_obj_internal_scalar_buffer( &scalar_b ); buf_beta = bli_obj_internal_scalar_buffer( c ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Invoke the function. f( diagoffb, schema_a, schema_b, m, n, k, buf_alpha, buf_a, cs_a, pd_a, ps_a, buf_b, rs_b, pd_b, ps_b, buf_beta, buf_c, rs_c, cs_c, cntx, rntm, thread ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ doff_t diagoffb, \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha, \ void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, \ void* beta, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* thread \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ /* Alias some constants to simpler names. */ \ const dim_t MR = pd_a; \ const dim_t NR = pd_b; \ const dim_t PACKMR = cs_a; \ const dim_t PACKNR = rs_b; \ \ /* Query the context for the micro-kernel address and cast it to its function pointer type. */ \ PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the original C matrix. For example, if C is column-stored, ct will be column-stored as well. */ \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict one = PASTEMAC(ch,1); \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict a_cast = a; \ ctype* restrict b_cast = b; \ ctype* restrict c_cast = c; \ ctype* restrict alpha_cast = alpha; \ ctype* restrict beta_cast = beta; \ ctype* restrict b1; \ ctype* restrict c1; \ \ doff_t diagoffb_j; \ dim_t k_full; \ dim_t m_iter, m_left; \ dim_t n_iter, n_left; \ dim_t m_cur; \ dim_t n_cur; \ dim_t k_b1121; \ dim_t off_b1121; \ dim_t i, j; \ inc_t rstep_a; \ inc_t cstep_b; \ inc_t rstep_c, cstep_c; \ inc_t istep_a; \ inc_t istep_b; \ inc_t off_scl; \ inc_t ss_b_num; \ inc_t ss_b_den; \ inc_t ps_b_cur; \ inc_t is_b_cur; \ auxinfo_t aux; \ \ /* Assumptions/assertions: rs_a == 1 cs_a == PACKMR pd_a == MR ps_a == stride to next micro-panel of A rs_b == PACKNR cs_b == 1 pd_b == NR ps_b == stride to next micro-panel of B rs_c == (no assumptions) cs_c == (no assumptions) */ \ \ /* Safety trap: Certain indexing within this macro-kernel does not work as intended if both MR and NR are odd. */ \ if ( ( bli_is_odd( PACKMR ) && bli_is_odd( NR ) ) || \ ( bli_is_odd( PACKNR ) && bli_is_odd( MR ) ) ) bli_abort(); \ \ /* If any dimension is zero, return immediately. */ \ if ( bli_zero_dim3( m, n, k ) ) return; \ \ /* Safeguard: If the current panel of B is entirely above the diagonal, it is implicitly zero. So we do nothing. */ \ if ( bli_is_strictly_above_diag_n( diagoffb, k, n ) ) return; \ \ /* Compute k_full. For all trmm, k_full is simply k. This is needed because some parameter combinations of trmm reduce k to advance past zero regions in the triangular matrix, and when computing the imaginary stride of A (the non-triangular matrix), which is used by 4m1/3m1 implementations, we need this unreduced value of k. */ \ k_full = k; \ \ /* Compute indexing scaling factor for for 4m or 3m. This is needed because one of the packing register blocksizes (PACKMR or PACKNR) is used to index into the micro-panels of the non- triangular matrix when computing with a diagonal-intersecting micro-panel of the triangular matrix. In the case of 4m or 3m, real values are stored in both sub-panels, and so the indexing needs to occur in units of real values. The value computed here is divided into the complex pointer offset to cause the pointer to be advanced by the correct value. */ \ if ( bli_is_4mi_packed( schema_b ) || \ bli_is_3mi_packed( schema_b ) || \ bli_is_rih_packed( schema_b ) ) off_scl = 2; \ else off_scl = 1; \ \ /* Compute the storage stride scaling. Usually this is just 1. However, in the case of interleaved 3m, we need to scale the offset by 3/2. And if we are packing real-only, imag-only, or summed-only, we need to scale the computed panel sizes by 1/2 to compensate for the fact that the pointer arithmetic occurs in terms of complex elements rather than real elements. */ \ if ( bli_is_3mi_packed( schema_b ) ) { ss_b_num = 3; ss_b_den = 2; } \ else if ( bli_is_rih_packed( schema_b ) ) { ss_b_num = 1; ss_b_den = 2; } \ else { ss_b_num = 1; ss_b_den = 1; } \ \ /* If there is a zero region above where the diagonal of B intersects the left edge of the panel, adjust the pointer to A and treat this case as if the diagonal offset were zero. Note that we don't need to adjust the pointer to B since packm would have simply skipped over the region that was not stored. */ \ if ( diagoffb < 0 ) \ { \ j = -diagoffb; \ k = k - j; \ diagoffb = 0; \ a_cast = a_cast + ( j * PACKMR ) / off_scl; \ } \ \ /* If there is a zero region to the right of where the diagonal of B intersects the bottom of the panel, shrink it to prevent "no-op" iterations from executing. */ \ if ( diagoffb + k < n ) \ { \ n = diagoffb + k; \ } \ \ /* Clear the temporary C buffer in case it has any infs or NaNs. */ \ PASTEMAC(ch,set0s_mxn)( MR, NR, \ ct, rs_ct, cs_ct ); \ \ /* Compute number of primary and leftover components of the m and n dimensions. */ \ n_iter = n / NR; \ n_left = n % NR; \ \ m_iter = m / MR; \ m_left = m % MR; \ \ if ( n_left ) ++n_iter; \ if ( m_left ) ++m_iter; \ \ /* Determine some increments used to step through A, B, and C. */ \ rstep_a = ps_a; \ \ cstep_b = ps_b; \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ \ istep_a = PACKMR * k_full; \ istep_b = PACKNR * k; \ \ if ( bli_is_odd( istep_a ) ) istep_a += 1; \ if ( bli_is_odd( istep_b ) ) istep_b += 1; \ \ /* Save the pack schemas of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_schema_a( schema_a, &aux ); \ bli_auxinfo_set_schema_b( schema_b, &aux ); \ \ /* Save the imaginary stride of A to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( istep_a, &aux ); \ \ thrinfo_t* caucus = bli_thrinfo_sub_node( thread ); \ \ dim_t jr_nt = bli_thread_n_way( thread ); \ dim_t jr_tid = bli_thread_work_id( thread ); \ dim_t ir_nt = bli_thread_n_way( caucus ); \ dim_t ir_tid = bli_thread_work_id( caucus ); \ \ dim_t jr_start, jr_end; \ dim_t ir_start, ir_end; \ dim_t jr_inc, ir_inc; \ \ /* Note that we partition the 2nd loop into two regions: the rectangular part of B, and the triangular portion. */ \ dim_t n_iter_rct; \ dim_t n_iter_tri; \ \ if ( bli_is_strictly_below_diag_n( diagoffb, m, n ) ) \ { \ /* If the entire panel of B does not intersect the diagonal, there is no triangular region, and therefore we can skip the second set of loops. */ \ n_iter_rct = n_iter; \ n_iter_tri = 0; \ } \ else \ { \ /* If the panel of B does intersect the diagonal, compute the number of iterations in the rectangular region by dividing NR into the diagonal offset. (There should never be any remainder in this division.) The number of iterations in the triangular (or trapezoidal) region is computed as the remaining number of iterations in the n dimension. */ \ n_iter_rct = diagoffb / NR; \ n_iter_tri = n_iter - n_iter_rct; \ } \ \ /* Use slab assignment of micropanels to threads in the 2nd and 1st loops for the initial rectangular region of B (if it exists). */ \ bli_thread_range_jrir_sl( thread, n_iter_rct, 1, FALSE, &jr_start, &jr_end, &jr_inc ); \ bli_thread_range_jrir_sl( caucus, m_iter, 1, FALSE, &ir_start, &ir_end, &ir_inc ); \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = jr_start; j < jr_end; j += jr_inc ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ b1 = b_cast + j * cstep_b; \ c1 = c_cast + j * cstep_c; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ { \ /* Save the 4m1/3m1 imaginary stride of B to the auxinfo_t object. */ \ bli_auxinfo_set_is_b( istep_b, &aux ); \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = ir_start; i < ir_end; i += ir_inc ) \ { \ ctype* restrict a2; \ \ a1 = a_cast + i * rstep_a; \ c11 = c1 + i * rstep_c; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = bli_trmm_get_next_a_upanel( a1, rstep_a, ir_inc ); \ if ( bli_is_last_iter_sl( i, m_iter, ir_tid, ir_nt ) ) \ { \ a2 = a_cast; \ b2 = bli_trmm_get_next_b_upanel( b1, cstep_b, jr_inc ); \ if ( bli_is_last_iter_sl( j, n_iter, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ one, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Add the result to the edge of C. */ \ PASTEMAC(ch,adds_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ c11, rs_c, cs_c ); \ } \ } \ } \ } \ \ /* If there is no triangular region, then we're done. */ \ if ( n_iter_tri == 0 ) return; \ \ /* Use round-robin assignment of micropanels to threads in the 2nd loop for the remaining triangular region of B (if it exists). NOTE: We don't need to call bli_thread_range_jrir*() here since we employ a hack that calls for each thread to execute every iteration of the jr and ir loops but skip all but the pointer increment for iterations that are not assigned to it. */ \ \ /* Advance the starting b1 and c1 pointers to the positions corresponding to the start of the triangular region of B. */ \ jr_start = n_iter_rct; \ b1 = b_cast + jr_start * cstep_b; \ c1 = c_cast + jr_start * cstep_c; \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = jr_start; j < n_iter; ++j ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ diagoffb_j = diagoffb - ( doff_t )j*NR; \ \ /* Determine the offset to the beginning of the panel that was packed so we can index into the corresponding location in A. Then compute the length of that panel. */ \ off_b1121 = bli_max( -diagoffb_j, 0 ); \ k_b1121 = k - off_b1121; \ \ a1 = a_cast; \ c11 = c1; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ /* If the current panel of B intersects the diagonal, scale C by beta. If it is strictly below the diagonal, scale by one. This allows the current macro-kernel to work for both trmm and trmm3. */ \ { \ /* Compute the panel stride for the current diagonal- intersecting micro-panel. */ \ is_b_cur = k_b1121 * PACKNR; \ is_b_cur += ( bli_is_odd( is_b_cur ) ? 1 : 0 ); \ ps_b_cur = ( is_b_cur * ss_b_num ) / ss_b_den; \ \ if ( bli_trmm_my_iter( j, thread ) ) { \ \ /* Save the 4m1/3m1 imaginary stride of B to the auxinfo_t object. */ \ bli_auxinfo_set_is_b( is_b_cur, &aux ); \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = 0; i < m_iter; ++i ) \ { \ if ( bli_trmm_my_iter( i, caucus ) ) { \ \ ctype* restrict a1_i; \ ctype* restrict a2; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ a1_i = a1 + ( off_b1121 * PACKMR ) / off_scl; \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = a1; \ if ( bli_is_last_iter_rr( i, m_iter, 0, 1 ) ) \ { \ a2 = a_cast; \ b2 = b1; \ if ( bli_is_last_iter_rr( j, n_iter, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k_b1121, \ alpha_cast, \ a1_i, \ b1, \ beta_cast, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Copy edge elements of C to the temporary buffer. */ \ PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \ c11, rs_c, cs_c, \ ct, rs_ct, cs_ct ); \ \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k_b1121, \ alpha_cast, \ a1_i, \ b1, \ beta_cast, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Copy the result to the edge of C. */ \ PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ c11, rs_c, cs_c ); \ } \ } \ \ a1 += rstep_a; \ c11 += rstep_c; \ } \ } \ \ b1 += ps_b_cur; \ } \ \ c1 += cstep_c; \ } \ \ /*PASTEMAC(ch,fprintm)( stdout, "trmm_rl_ker_var2sl: a1", MR, k_b1121, a1, 1, MR, "%4.1f", "" );*/ \ /*PASTEMAC(ch,fprintm)( stdout, "trmm_rl_ker_var2sl: b1", k_b1121, NR, b1_i, NR, 1, "%4.1f", "" );*/ \ } INSERT_GENTFUNC_BASIC0( trmm_rl_ker_var2sl ) blis-0.6.1/frame/3/trmm/other/bli_trmm_ru_ker_var2.c000066400000000000000000000401641360743507500222740ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T gemm_fp typedef void (*FUNCPTR_T) ( doff_t diagoffb, pack_t schema_a, pack_t schema_b, dim_t m, dim_t n, dim_t k, void* alpha, void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, void* beta, void* c, inc_t rs_c, inc_t cs_c, cntx_t* cntx, rntm_t* rntm, thrinfo_t* thread ); static FUNCPTR_T GENARRAY(ftypes,trmm_ru_ker_var2); void bli_trmm_ru_ker_var2 ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { num_t dt_exec = bli_obj_exec_dt( c ); doff_t diagoffb = bli_obj_diag_offset( b ); pack_t schema_a = bli_obj_pack_schema( a ); pack_t schema_b = bli_obj_pack_schema( b ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width( a ); void* buf_a = bli_obj_buffer_at_off( a ); inc_t cs_a = bli_obj_col_stride( a ); dim_t pd_a = bli_obj_panel_dim( a ); inc_t ps_a = bli_obj_panel_stride( a ); void* buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b = bli_obj_row_stride( b ); dim_t pd_b = bli_obj_panel_dim( b ); inc_t ps_b = bli_obj_panel_stride( b ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); obj_t scalar_a; obj_t scalar_b; void* buf_alpha; void* buf_beta; FUNCPTR_T f; // Detach and multiply the scalars attached to A and B. bli_obj_scalar_detach( a, &scalar_a ); bli_obj_scalar_detach( b, &scalar_b ); bli_mulsc( &scalar_a, &scalar_b ); // Grab the addresses of the internal scalar buffers for the scalar // merged above and the scalar attached to C. buf_alpha = bli_obj_internal_scalar_buffer( &scalar_b ); buf_beta = bli_obj_internal_scalar_buffer( c ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Invoke the function. f( diagoffb, schema_a, schema_b, m, n, k, buf_alpha, buf_a, cs_a, pd_a, ps_a, buf_b, rs_b, pd_b, ps_b, buf_beta, buf_c, rs_c, cs_c, cntx, rntm, thread ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ doff_t diagoffb, \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha, \ void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, \ void* beta, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* jr_thread \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ /* Alias some constants to simpler names. */ \ const dim_t MR = pd_a; \ const dim_t NR = pd_b; \ const dim_t PACKMR = cs_a; \ const dim_t PACKNR = rs_b; \ \ /* Query the context for the micro-kernel address and cast it to its function pointer type. */ \ PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the original C matrix. For example, if C is column-stored, ct will be column-stored as well. */ \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict one = PASTEMAC(ch,1); \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict a_cast = a; \ ctype* restrict b_cast = b; \ ctype* restrict c_cast = c; \ ctype* restrict alpha_cast = alpha; \ ctype* restrict beta_cast = beta; \ ctype* restrict b1; \ ctype* restrict c1; \ \ doff_t diagoffb_j; \ dim_t k_full; \ dim_t m_iter, m_left; \ dim_t n_iter, n_left; \ dim_t m_cur; \ dim_t n_cur; \ dim_t k_b0111; \ dim_t off_b0111; \ dim_t i, j; \ inc_t rstep_a; \ inc_t cstep_b; \ inc_t rstep_c, cstep_c; \ inc_t istep_a; \ inc_t istep_b; \ inc_t off_scl; \ inc_t ss_b_num; \ inc_t ss_b_den; \ inc_t ps_b_cur; \ inc_t is_b_cur; \ auxinfo_t aux; \ \ /* Assumptions/assertions: rs_a == 1 cs_a == PACKMR pd_a == MR ps_a == stride to next micro-panel of A rs_b == PACKNR cs_b == 1 pd_b == NR ps_b == stride to next micro-panel of B rs_c == (no assumptions) cs_c == (no assumptions) */ \ \ /* Safety trap: Certain indexing within this macro-kernel does not work as intended if both MR and NR are odd. */ \ if ( ( bli_is_odd( PACKMR ) && bli_is_odd( NR ) ) || \ ( bli_is_odd( PACKNR ) && bli_is_odd( MR ) ) ) bli_abort(); \ \ /* If any dimension is zero, return immediately. */ \ if ( bli_zero_dim3( m, n, k ) ) return; \ \ /* Safeguard: If the current panel of B is entirely below its diagonal, it is implicitly zero. So we do nothing. */ \ if ( bli_is_strictly_below_diag_n( diagoffb, k, n ) ) return; \ \ /* Compute k_full. For all trmm, k_full is simply k. This is needed because some parameter combinations of trmm reduce k to advance past zero regions in the triangular matrix, and when computing the imaginary stride of A (the non-triangular matrix), which is used by 4m1/3m1 implementations, we need this unreduced value of k. */ \ k_full = k; \ \ /* Compute indexing scaling factor for for 4m or 3m. This is needed because one of the packing register blocksizes (PACKMR or PACKNR) is used to index into the micro-panels of the non- triangular matrix when computing with a diagonal-intersecting micro-panel of the triangular matrix. In the case of 4m or 3m, real values are stored in both sub-panels, and so the indexing needs to occur in units of real values. The value computed here is divided into the complex pointer offset to cause the pointer to be advanced by the correct value. */ \ if ( bli_is_4mi_packed( schema_b ) || \ bli_is_3mi_packed( schema_b ) || \ bli_is_rih_packed( schema_b ) ) off_scl = 2; \ else off_scl = 1; \ \ /* Compute the storage stride scaling. Usually this is just 1. However, in the case of interleaved 3m, we need to scale the offset by 3/2. And if we are packing real-only, imag-only, or summed-only, we need to scale the computed panel sizes by 1/2 to compensate for the fact that the pointer arithmetic occurs in terms of complex elements rather than real elements. */ \ if ( bli_is_3mi_packed( schema_b ) ) { ss_b_num = 3; ss_b_den = 2; } \ else if ( bli_is_rih_packed( schema_b ) ) { ss_b_num = 1; ss_b_den = 2; } \ else { ss_b_num = 1; ss_b_den = 1; } \ \ /* If there is a zero region to the left of where the diagonal of B intersects the top edge of the panel, adjust the pointer to C and treat this case as if the diagonal offset were zero. This skips over the region that was not packed. (Note we assume the diagonal offset is a multiple of MR; this assumption will hold as long as the cache blocksizes are each a multiple of MR and NR.) */ \ if ( diagoffb > 0 ) \ { \ j = diagoffb; \ n = n - j; \ diagoffb = 0; \ c_cast = c_cast + (j )*cs_c; \ } \ \ /* If there is a zero region below where the diagonal of B intersects the right side of the block, shrink it to prevent "no-op" iterations from executing. */ \ if ( -diagoffb + n < k ) \ { \ k = -diagoffb + n; \ } \ \ /* Clear the temporary C buffer in case it has any infs or NaNs. */ \ PASTEMAC(ch,set0s_mxn)( MR, NR, \ ct, rs_ct, cs_ct ); \ \ /* Compute number of primary and leftover components of the m and n dimensions. */ \ n_iter = n / NR; \ n_left = n % NR; \ \ m_iter = m / MR; \ m_left = m % MR; \ \ if ( n_left ) ++n_iter; \ if ( m_left ) ++m_iter; \ \ /* Determine some increments used to step through A, B, and C. */ \ rstep_a = ps_a; \ \ cstep_b = ps_b; \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ \ istep_a = PACKMR * k_full; \ istep_b = PACKNR * k; \ \ if ( bli_is_odd( istep_a ) ) istep_a += 1; \ if ( bli_is_odd( istep_b ) ) istep_b += 1; \ \ /* Save the pack schemas of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_schema_a( schema_a, &aux ); \ bli_auxinfo_set_schema_b( schema_b, &aux ); \ \ /* Save the imaginary stride of A to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( istep_a, &aux ); \ \ b1 = b_cast; \ c1 = c_cast; \ \ thrinfo_t* ir_thread = bli_thrinfo_sub_node( jr_thread ); \ dim_t jr_num_threads = bli_thread_n_way( jr_thread ); \ dim_t jr_thread_id = bli_thread_work_id( jr_thread ); \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = 0; j < n_iter; ++j ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ diagoffb_j = diagoffb - ( doff_t )j*NR; \ \ /* Determine the offset to and length of the panel that was packed so we can index into the corresponding location in A. */ \ off_b0111 = 0; \ k_b0111 = bli_min( k, -diagoffb_j + NR ); \ \ a1 = a_cast; \ c11 = c1; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ /* If the current panel of B intersects the diagonal, scale C by beta. If it is strictly below the diagonal, scale by one. This allows the current macro-kernel to work for both trmm and trmm3. */ \ if ( bli_intersects_diag_n( diagoffb_j, k, NR ) ) \ { \ /* Compute the panel stride for the current diagonal- intersecting micro-panel. */ \ is_b_cur = k_b0111 * PACKNR; \ is_b_cur += ( bli_is_odd( is_b_cur ) ? 1 : 0 ); \ ps_b_cur = ( is_b_cur * ss_b_num ) / ss_b_den; \ \ if ( bli_trmm_my_iter( j, jr_thread ) ) { \ \ /* Save the 4m1/3m1 imaginary stride of B to the auxinfo_t object. */ \ bli_auxinfo_set_is_b( is_b_cur, &aux ); \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = 0; i < m_iter; ++i ) \ { \ if ( bli_trmm_my_iter( i, ir_thread ) ) { \ \ ctype* restrict a1_i; \ ctype* restrict a2; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ a1_i = a1 + ( off_b0111 * PACKMR ) / off_scl; \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = a1; \ if ( bli_is_last_iter( i, m_iter, 0, 1 ) ) \ { \ a2 = a_cast; \ b2 = b1; \ if ( bli_is_last_iter( j, n_iter, jr_thread_id, jr_num_threads ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k_b0111, \ alpha_cast, \ a1_i, \ b1, \ beta_cast, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Copy edge elements of C to the temporary buffer. */ \ PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \ c11, rs_c, cs_c, \ ct, rs_ct, cs_ct ); \ \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k_b0111, \ alpha_cast, \ a1_i, \ b1, \ beta_cast, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Copy the result to the edge of C. */ \ PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ c11, rs_c, cs_c ); \ } \ } \ \ a1 += rstep_a; \ c11 += rstep_c; \ } \ } \ \ b1 += ps_b_cur; \ } \ else if ( bli_is_strictly_above_diag_n( diagoffb_j, k, NR ) ) \ { \ if ( bli_trmm_my_iter( j, jr_thread ) ) { \ \ /* Save the 4m1/3m1 imaginary stride of B to the auxinfo_t object. */ \ bli_auxinfo_set_is_b( istep_b, &aux ); \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = 0; i < m_iter; ++i ) \ { \ if ( bli_trmm_my_iter( i, ir_thread ) ) { \ \ ctype* restrict a2; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = a1; \ if ( bli_is_last_iter( i, m_iter, 0, 1 ) ) \ { \ a2 = a_cast; \ b2 = b1; \ if ( bli_is_last_iter( j, n_iter, jr_thread_id, jr_num_threads ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ one, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Add the result to the edge of C. */ \ PASTEMAC(ch,adds_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ c11, rs_c, cs_c ); \ } \ } \ \ a1 += rstep_a; \ c11 += rstep_c; \ } \ } \ \ b1 += cstep_b; \ } \ \ c1 += cstep_c; \ } \ \ /*PASTEMAC(ch,fprintm)( stdout, "trmm_ru_ker_var2: a1", MR, k_b0111, a1, 1, MR, "%4.1f", "" );*/ \ /*PASTEMAC(ch,fprintm)( stdout, "trmm_ru_ker_var2: b1", k_b0111, NR, b1_i, NR, 1, "%4.1f", "" );*/ \ } INSERT_GENTFUNC_BASIC0( trmm_ru_ker_var2 ) blis-0.6.1/frame/3/trmm/other/bli_trmm_ru_ker_var2rr.c000066400000000000000000000466041360743507500226450ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T gemm_fp typedef void (*FUNCPTR_T) ( doff_t diagoffb, pack_t schema_a, pack_t schema_b, dim_t m, dim_t n, dim_t k, void* alpha, void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, void* beta, void* c, inc_t rs_c, inc_t cs_c, cntx_t* cntx, rntm_t* rntm, thrinfo_t* thread ); static FUNCPTR_T GENARRAY(ftypes,trmm_ru_ker_var2rr); // // -- Macrokernel functions for round-robin partitioning ----------------------- // void bli_trmm_ru_ker_var2rr ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { num_t dt_exec = bli_obj_exec_dt( c ); doff_t diagoffb = bli_obj_diag_offset( b ); pack_t schema_a = bli_obj_pack_schema( a ); pack_t schema_b = bli_obj_pack_schema( b ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width( a ); void* buf_a = bli_obj_buffer_at_off( a ); inc_t cs_a = bli_obj_col_stride( a ); dim_t pd_a = bli_obj_panel_dim( a ); inc_t ps_a = bli_obj_panel_stride( a ); void* buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b = bli_obj_row_stride( b ); dim_t pd_b = bli_obj_panel_dim( b ); inc_t ps_b = bli_obj_panel_stride( b ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); obj_t scalar_a; obj_t scalar_b; void* buf_alpha; void* buf_beta; FUNCPTR_T f; // Detach and multiply the scalars attached to A and B. bli_obj_scalar_detach( a, &scalar_a ); bli_obj_scalar_detach( b, &scalar_b ); bli_mulsc( &scalar_a, &scalar_b ); // Grab the addresses of the internal scalar buffers for the scalar // merged above and the scalar attached to C. buf_alpha = bli_obj_internal_scalar_buffer( &scalar_b ); buf_beta = bli_obj_internal_scalar_buffer( c ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Invoke the function. f( diagoffb, schema_a, schema_b, m, n, k, buf_alpha, buf_a, cs_a, pd_a, ps_a, buf_b, rs_b, pd_b, ps_b, buf_beta, buf_c, rs_c, cs_c, cntx, rntm, thread ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ doff_t diagoffb, \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha, \ void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, \ void* beta, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* thread \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ /* Alias some constants to simpler names. */ \ const dim_t MR = pd_a; \ const dim_t NR = pd_b; \ const dim_t PACKMR = cs_a; \ const dim_t PACKNR = rs_b; \ \ /* Query the context for the micro-kernel address and cast it to its function pointer type. */ \ PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the original C matrix. For example, if C is column-stored, ct will be column-stored as well. */ \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict one = PASTEMAC(ch,1); \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict a_cast = a; \ ctype* restrict b_cast = b; \ ctype* restrict c_cast = c; \ ctype* restrict alpha_cast = alpha; \ ctype* restrict beta_cast = beta; \ ctype* restrict b1; \ ctype* restrict c1; \ \ doff_t diagoffb_j; \ dim_t k_full; \ dim_t m_iter, m_left; \ dim_t n_iter, n_left; \ dim_t m_cur; \ dim_t n_cur; \ dim_t k_b0111; \ dim_t off_b0111; \ dim_t i, j, jb0; \ inc_t rstep_a; \ inc_t cstep_b; \ inc_t rstep_c, cstep_c; \ inc_t istep_a; \ inc_t istep_b; \ inc_t off_scl; \ inc_t ss_b_num; \ inc_t ss_b_den; \ inc_t ps_b_cur; \ inc_t is_b_cur; \ auxinfo_t aux; \ \ /* Assumptions/assertions: rs_a == 1 cs_a == PACKMR pd_a == MR ps_a == stride to next micro-panel of A rs_b == PACKNR cs_b == 1 pd_b == NR ps_b == stride to next micro-panel of B rs_c == (no assumptions) cs_c == (no assumptions) */ \ \ /* Safety trap: Certain indexing within this macro-kernel does not work as intended if both MR and NR are odd. */ \ if ( ( bli_is_odd( PACKMR ) && bli_is_odd( NR ) ) || \ ( bli_is_odd( PACKNR ) && bli_is_odd( MR ) ) ) bli_abort(); \ \ /* If any dimension is zero, return immediately. */ \ if ( bli_zero_dim3( m, n, k ) ) return; \ \ /* Safeguard: If the current panel of B is entirely below its diagonal, it is implicitly zero. So we do nothing. */ \ if ( bli_is_strictly_below_diag_n( diagoffb, k, n ) ) return; \ \ /* Compute k_full. For all trmm, k_full is simply k. This is needed because some parameter combinations of trmm reduce k to advance past zero regions in the triangular matrix, and when computing the imaginary stride of A (the non-triangular matrix), which is used by 4m1/3m1 implementations, we need this unreduced value of k. */ \ k_full = k; \ \ /* Compute indexing scaling factor for for 4m or 3m. This is needed because one of the packing register blocksizes (PACKMR or PACKNR) is used to index into the micro-panels of the non- triangular matrix when computing with a diagonal-intersecting micro-panel of the triangular matrix. In the case of 4m or 3m, real values are stored in both sub-panels, and so the indexing needs to occur in units of real values. The value computed here is divided into the complex pointer offset to cause the pointer to be advanced by the correct value. */ \ if ( bli_is_4mi_packed( schema_b ) || \ bli_is_3mi_packed( schema_b ) || \ bli_is_rih_packed( schema_b ) ) off_scl = 2; \ else off_scl = 1; \ \ /* Compute the storage stride scaling. Usually this is just 1. However, in the case of interleaved 3m, we need to scale the offset by 3/2. And if we are packing real-only, imag-only, or summed-only, we need to scale the computed panel sizes by 1/2 to compensate for the fact that the pointer arithmetic occurs in terms of complex elements rather than real elements. */ \ if ( bli_is_3mi_packed( schema_b ) ) { ss_b_num = 3; ss_b_den = 2; } \ else if ( bli_is_rih_packed( schema_b ) ) { ss_b_num = 1; ss_b_den = 2; } \ else { ss_b_num = 1; ss_b_den = 1; } \ \ /* If there is a zero region to the left of where the diagonal of B intersects the top edge of the panel, adjust the pointer to C and treat this case as if the diagonal offset were zero. This skips over the region that was not packed. (Note we assume the diagonal offset is a multiple of MR; this assumption will hold as long as the cache blocksizes are each a multiple of MR and NR.) */ \ if ( diagoffb > 0 ) \ { \ j = diagoffb; \ n = n - j; \ diagoffb = 0; \ c_cast = c_cast + (j )*cs_c; \ } \ \ /* If there is a zero region below where the diagonal of B intersects the right side of the block, shrink it to prevent "no-op" iterations from executing. */ \ if ( -diagoffb + n < k ) \ { \ k = -diagoffb + n; \ } \ \ /* Clear the temporary C buffer in case it has any infs or NaNs. */ \ PASTEMAC(ch,set0s_mxn)( MR, NR, \ ct, rs_ct, cs_ct ); \ \ /* Compute number of primary and leftover components of the m and n dimensions. */ \ n_iter = n / NR; \ n_left = n % NR; \ \ m_iter = m / MR; \ m_left = m % MR; \ \ if ( n_left ) ++n_iter; \ if ( m_left ) ++m_iter; \ \ /* Determine some increments used to step through A, B, and C. */ \ rstep_a = ps_a; \ \ cstep_b = ps_b; \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ \ istep_a = PACKMR * k_full; \ istep_b = PACKNR * k; \ \ if ( bli_is_odd( istep_a ) ) istep_a += 1; \ if ( bli_is_odd( istep_b ) ) istep_b += 1; \ \ /* Save the pack schemas of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_schema_a( schema_a, &aux ); \ bli_auxinfo_set_schema_b( schema_b, &aux ); \ \ /* Save the imaginary stride of A to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( istep_a, &aux ); \ \ /* The 'thread' argument points to the thrinfo_t node for the 2nd (jr) loop around the microkernel. Here we query the thrinfo_t node for the 1st (ir) loop around the microkernel. */ \ thrinfo_t* caucus = bli_thrinfo_sub_node( thread ); \ \ /* Query the number of threads and thread ids for each loop. */ \ dim_t jr_nt = bli_thread_n_way( thread ); \ dim_t jr_tid = bli_thread_work_id( thread ); \ dim_t ir_nt = bli_thread_n_way( caucus ); \ dim_t ir_tid = bli_thread_work_id( caucus ); \ \ dim_t jr_start, jr_end; \ dim_t ir_start, ir_end; \ dim_t jr_inc, ir_inc; \ \ /* Note that we partition the 2nd loop into two regions: the triangular part of C, and the rectangular portion. */ \ dim_t n_iter_tri; \ dim_t n_iter_rct; \ \ if ( bli_is_strictly_above_diag_n( diagoffb, k, n ) ) \ { \ /* If the entire panel of B does not intersect the diagonal, there is no triangular region, and therefore we can skip the first set of loops. */ \ n_iter_tri = 0; \ n_iter_rct = n_iter; \ } \ else \ { \ /* If the panel of B does intersect the diagonal, compute the number of iterations in the triangular (or trapezoidal) region by dividing NR into the number of rows in B. (There should never be any remainder in this division.) The number of iterations in the rectangular region is computed as the remaining number of iterations in the n dimension. */ \ n_iter_tri = ( k + diagoffb ) / NR + ( ( k + diagoffb ) % NR ? 1 : 0 ); \ n_iter_rct = n_iter - n_iter_tri; \ } \ \ /* Use round-robin assignment of micropanels to threads in the 2nd loop for the initial triangular region of B (if it exists). NOTE: We don't need to call bli_thread_range_jrir*() here since we employ a hack that calls for each thread to execute every iteration of the jr and ir loops but skip all but the pointer increment for iterations that are not assigned to it. */ \ \ b1 = b_cast; \ c1 = c_cast; \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = 0; j < n_iter_tri; ++j ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ diagoffb_j = diagoffb - ( doff_t )j*NR; \ \ /* Determine the offset to and length of the panel that was packed so we can index into the corresponding location in A. */ \ off_b0111 = 0; \ k_b0111 = bli_min( k, -diagoffb_j + NR ); \ \ a1 = a_cast; \ c11 = c1; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ /* If the current panel of B intersects the diagonal, scale C by beta. If it is strictly below the diagonal, scale by one. This allows the current macro-kernel to work for both trmm and trmm3. */ \ { \ /* Compute the panel stride for the current diagonal- intersecting micro-panel. */ \ is_b_cur = k_b0111 * PACKNR; \ is_b_cur += ( bli_is_odd( is_b_cur ) ? 1 : 0 ); \ ps_b_cur = ( is_b_cur * ss_b_num ) / ss_b_den; \ \ if ( bli_trmm_my_iter( j, thread ) ) { \ \ /* Save the 4m1/3m1 imaginary stride of B to the auxinfo_t object. */ \ bli_auxinfo_set_is_b( is_b_cur, &aux ); \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = 0; i < m_iter; ++i ) \ { \ if ( bli_trmm_my_iter( i, caucus ) ) { \ \ ctype* restrict a1_i; \ ctype* restrict a2; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ a1_i = a1 + ( off_b0111 * PACKMR ) / off_scl; \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = a1; \ if ( bli_is_last_iter_rr( i, m_iter, 0, 1 ) ) \ { \ a2 = a_cast; \ b2 = b1; \ if ( bli_is_last_iter_rr( j, n_iter, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k_b0111, \ alpha_cast, \ a1_i, \ b1, \ beta_cast, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Copy edge elements of C to the temporary buffer. */ \ PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \ c11, rs_c, cs_c, \ ct, rs_ct, cs_ct ); \ \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k_b0111, \ alpha_cast, \ a1_i, \ b1, \ beta_cast, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Copy the result to the edge of C. */ \ PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ c11, rs_c, cs_c ); \ } \ } \ \ a1 += rstep_a; \ c11 += rstep_c; \ } \ } \ \ b1 += ps_b_cur; \ } \ \ c1 += cstep_c; \ } \ \ /* If there is no rectangular region, then we're done. */ \ if ( n_iter_rct == 0 ) return; \ \ /* Use round-robin assignment of micropanels to threads in the 2nd and 1st loops the remaining triangular region of B. */ \ bli_thread_range_jrir_rr( thread, n_iter_rct, 1, FALSE, &jr_start, &jr_end, &jr_inc ); \ bli_thread_range_jrir_rr( caucus, m_iter, 1, FALSE, &ir_start, &ir_end, &ir_inc ); \ \ /* Advance the start and end iteration offsets for the rectangular region by the number of iterations used for the triangular region. */ \ jr_start += n_iter_tri; \ jr_end += n_iter_tri; \ jb0 = n_iter_tri; \ \ /* Save the resulting value of b1 from the previous loop since it represents the starting point for the rectangular region. */ \ b_cast = b1; \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = jr_start; j < jr_end; j += jr_inc ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ /* NOTE: We must index through b_cast differently since it contains the starting address of the rectangular region (which is already n_iter_tri logical iterations through B). */ \ b1 = b_cast + (j-jb0) * cstep_b; \ c1 = c_cast + j * cstep_c; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ /* If the current panel of B intersects the diagonal, scale C by beta. If it is strictly below the diagonal, scale by one. This allows the current macro-kernel to work for both trmm and trmm3. */ \ { \ /* Save the 4m1/3m1 imaginary stride of B to the auxinfo_t object. */ \ bli_auxinfo_set_is_b( istep_b, &aux ); \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = ir_start; i < ir_end; i += ir_inc ) \ { \ ctype* restrict a2; \ \ a1 = a_cast + i * rstep_a; \ c11 = c1 + i * rstep_c; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = bli_trmm_get_next_a_upanel( a1, rstep_a, ir_inc ); \ if ( bli_is_last_iter_rr( i, m_iter, ir_tid, ir_nt ) ) \ { \ a2 = a_cast; \ b2 = bli_trmm_get_next_b_upanel( b1, cstep_b, jr_inc ); \ if ( bli_is_last_iter_rr( j, n_iter, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ one, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Add the result to the edge of C. */ \ PASTEMAC(ch,adds_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ c11, rs_c, cs_c ); \ } \ } \ } \ } \ \ \ \ /*PASTEMAC(ch,fprintm)( stdout, "trmm_ru_ker_var2rr: a1", MR, k_b0111, a1, 1, MR, "%4.1f", "" );*/ \ /*PASTEMAC(ch,fprintm)( stdout, "trmm_ru_ker_var2rr: b1", k_b0111, NR, b1_i, NR, 1, "%4.1f", "" );*/ \ } INSERT_GENTFUNC_BASIC0( trmm_ru_ker_var2rr ) blis-0.6.1/frame/3/trmm/other/bli_trmm_ru_ker_var2sl.c000066400000000000000000000465751360743507500226470ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T gemm_fp typedef void (*FUNCPTR_T) ( doff_t diagoffb, pack_t schema_a, pack_t schema_b, dim_t m, dim_t n, dim_t k, void* alpha, void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, void* beta, void* c, inc_t rs_c, inc_t cs_c, cntx_t* cntx, rntm_t* rntm, thrinfo_t* thread ); static FUNCPTR_T GENARRAY(ftypes,trmm_ru_ker_var2sl); // // -- Macrokernel functions for slab partitioning ------------------------------ // void bli_trmm_ru_ker_var2sl ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { num_t dt_exec = bli_obj_exec_dt( c ); doff_t diagoffb = bli_obj_diag_offset( b ); pack_t schema_a = bli_obj_pack_schema( a ); pack_t schema_b = bli_obj_pack_schema( b ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width( a ); void* buf_a = bli_obj_buffer_at_off( a ); inc_t cs_a = bli_obj_col_stride( a ); dim_t pd_a = bli_obj_panel_dim( a ); inc_t ps_a = bli_obj_panel_stride( a ); void* buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b = bli_obj_row_stride( b ); dim_t pd_b = bli_obj_panel_dim( b ); inc_t ps_b = bli_obj_panel_stride( b ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); obj_t scalar_a; obj_t scalar_b; void* buf_alpha; void* buf_beta; FUNCPTR_T f; // Detach and multiply the scalars attached to A and B. bli_obj_scalar_detach( a, &scalar_a ); bli_obj_scalar_detach( b, &scalar_b ); bli_mulsc( &scalar_a, &scalar_b ); // Grab the addresses of the internal scalar buffers for the scalar // merged above and the scalar attached to C. buf_alpha = bli_obj_internal_scalar_buffer( &scalar_b ); buf_beta = bli_obj_internal_scalar_buffer( c ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Invoke the function. f( diagoffb, schema_a, schema_b, m, n, k, buf_alpha, buf_a, cs_a, pd_a, ps_a, buf_b, rs_b, pd_b, ps_b, buf_beta, buf_c, rs_c, cs_c, cntx, rntm, thread ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ doff_t diagoffb, \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha, \ void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, \ void* beta, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* thread \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ /* Alias some constants to simpler names. */ \ const dim_t MR = pd_a; \ const dim_t NR = pd_b; \ const dim_t PACKMR = cs_a; \ const dim_t PACKNR = rs_b; \ \ /* Query the context for the micro-kernel address and cast it to its function pointer type. */ \ PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the original C matrix. For example, if C is column-stored, ct will be column-stored as well. */ \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict one = PASTEMAC(ch,1); \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict a_cast = a; \ ctype* restrict b_cast = b; \ ctype* restrict c_cast = c; \ ctype* restrict alpha_cast = alpha; \ ctype* restrict beta_cast = beta; \ ctype* restrict b1; \ ctype* restrict c1; \ \ doff_t diagoffb_j; \ dim_t k_full; \ dim_t m_iter, m_left; \ dim_t n_iter, n_left; \ dim_t m_cur; \ dim_t n_cur; \ dim_t k_b0111; \ dim_t off_b0111; \ dim_t i, j, jb0; \ inc_t rstep_a; \ inc_t cstep_b; \ inc_t rstep_c, cstep_c; \ inc_t istep_a; \ inc_t istep_b; \ inc_t off_scl; \ inc_t ss_b_num; \ inc_t ss_b_den; \ inc_t ps_b_cur; \ inc_t is_b_cur; \ auxinfo_t aux; \ \ /* Assumptions/assertions: rs_a == 1 cs_a == PACKMR pd_a == MR ps_a == stride to next micro-panel of A rs_b == PACKNR cs_b == 1 pd_b == NR ps_b == stride to next micro-panel of B rs_c == (no assumptions) cs_c == (no assumptions) */ \ \ /* Safety trap: Certain indexing within this macro-kernel does not work as intended if both MR and NR are odd. */ \ if ( ( bli_is_odd( PACKMR ) && bli_is_odd( NR ) ) || \ ( bli_is_odd( PACKNR ) && bli_is_odd( MR ) ) ) bli_abort(); \ \ /* If any dimension is zero, return immediately. */ \ if ( bli_zero_dim3( m, n, k ) ) return; \ \ /* Safeguard: If the current panel of B is entirely below its diagonal, it is implicitly zero. So we do nothing. */ \ if ( bli_is_strictly_below_diag_n( diagoffb, k, n ) ) return; \ \ /* Compute k_full. For all trmm, k_full is simply k. This is needed because some parameter combinations of trmm reduce k to advance past zero regions in the triangular matrix, and when computing the imaginary stride of A (the non-triangular matrix), which is used by 4m1/3m1 implementations, we need this unreduced value of k. */ \ k_full = k; \ \ /* Compute indexing scaling factor for for 4m or 3m. This is needed because one of the packing register blocksizes (PACKMR or PACKNR) is used to index into the micro-panels of the non- triangular matrix when computing with a diagonal-intersecting micro-panel of the triangular matrix. In the case of 4m or 3m, real values are stored in both sub-panels, and so the indexing needs to occur in units of real values. The value computed here is divided into the complex pointer offset to cause the pointer to be advanced by the correct value. */ \ if ( bli_is_4mi_packed( schema_b ) || \ bli_is_3mi_packed( schema_b ) || \ bli_is_rih_packed( schema_b ) ) off_scl = 2; \ else off_scl = 1; \ \ /* Compute the storage stride scaling. Usually this is just 1. However, in the case of interleaved 3m, we need to scale the offset by 3/2. And if we are packing real-only, imag-only, or summed-only, we need to scale the computed panel sizes by 1/2 to compensate for the fact that the pointer arithmetic occurs in terms of complex elements rather than real elements. */ \ if ( bli_is_3mi_packed( schema_b ) ) { ss_b_num = 3; ss_b_den = 2; } \ else if ( bli_is_rih_packed( schema_b ) ) { ss_b_num = 1; ss_b_den = 2; } \ else { ss_b_num = 1; ss_b_den = 1; } \ \ /* If there is a zero region to the left of where the diagonal of B intersects the top edge of the panel, adjust the pointer to C and treat this case as if the diagonal offset were zero. This skips over the region that was not packed. (Note we assume the diagonal offset is a multiple of MR; this assumption will hold as long as the cache blocksizes are each a multiple of MR and NR.) */ \ if ( diagoffb > 0 ) \ { \ j = diagoffb; \ n = n - j; \ diagoffb = 0; \ c_cast = c_cast + (j )*cs_c; \ } \ \ /* If there is a zero region below where the diagonal of B intersects the right side of the block, shrink it to prevent "no-op" iterations from executing. */ \ if ( -diagoffb + n < k ) \ { \ k = -diagoffb + n; \ } \ \ /* Clear the temporary C buffer in case it has any infs or NaNs. */ \ PASTEMAC(ch,set0s_mxn)( MR, NR, \ ct, rs_ct, cs_ct ); \ \ /* Compute number of primary and leftover components of the m and n dimensions. */ \ n_iter = n / NR; \ n_left = n % NR; \ \ m_iter = m / MR; \ m_left = m % MR; \ \ if ( n_left ) ++n_iter; \ if ( m_left ) ++m_iter; \ \ /* Determine some increments used to step through A, B, and C. */ \ rstep_a = ps_a; \ \ cstep_b = ps_b; \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ \ istep_a = PACKMR * k_full; \ istep_b = PACKNR * k; \ \ if ( bli_is_odd( istep_a ) ) istep_a += 1; \ if ( bli_is_odd( istep_b ) ) istep_b += 1; \ \ /* Save the pack schemas of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_schema_a( schema_a, &aux ); \ bli_auxinfo_set_schema_b( schema_b, &aux ); \ \ /* Save the imaginary stride of A to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( istep_a, &aux ); \ \ /* The 'thread' argument points to the thrinfo_t node for the 2nd (jr) loop around the microkernel. Here we query the thrinfo_t node for the 1st (ir) loop around the microkernel. */ \ thrinfo_t* caucus = bli_thrinfo_sub_node( thread ); \ \ /* Query the number of threads and thread ids for each loop. */ \ dim_t jr_nt = bli_thread_n_way( thread ); \ dim_t jr_tid = bli_thread_work_id( thread ); \ dim_t ir_nt = bli_thread_n_way( caucus ); \ dim_t ir_tid = bli_thread_work_id( caucus ); \ \ dim_t jr_start, jr_end; \ dim_t ir_start, ir_end; \ dim_t jr_inc, ir_inc; \ \ /* Note that we partition the 2nd loop into two regions: the triangular part of C, and the rectangular portion. */ \ dim_t n_iter_tri; \ dim_t n_iter_rct; \ \ if ( bli_is_strictly_above_diag_n( diagoffb, k, n ) ) \ { \ /* If the entire panel of B does not intersect the diagonal, there is no triangular region, and therefore we can skip the first set of loops. */ \ n_iter_tri = 0; \ n_iter_rct = n_iter; \ } \ else \ { \ /* If the panel of B does intersect the diagonal, compute the number of iterations in the triangular (or trapezoidal) region by dividing NR into the number of rows in B. (There should never be any remainder in this division.) The number of iterations in the rectangular region is computed as the remaining number of iterations in the n dimension. */ \ n_iter_tri = ( k + diagoffb ) / NR + ( ( k + diagoffb ) % NR ? 1 : 0 ); \ n_iter_rct = n_iter - n_iter_tri; \ } \ \ /* Use round-robin assignment of micropanels to threads in the 2nd loop for the initial triangular region of B (if it exists). NOTE: We don't need to call bli_thread_range_jrir*() here since we employ a hack that calls for each thread to execute every iteration of the jr and ir loops but skip all but the pointer increment for iterations that are not assigned to it. */ \ \ b1 = b_cast; \ c1 = c_cast; \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = 0; j < n_iter_tri; ++j ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ diagoffb_j = diagoffb - ( doff_t )j*NR; \ \ /* Determine the offset to and length of the panel that was packed so we can index into the corresponding location in A. */ \ off_b0111 = 0; \ k_b0111 = bli_min( k, -diagoffb_j + NR ); \ \ a1 = a_cast; \ c11 = c1; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ /* If the current panel of B intersects the diagonal, scale C by beta. If it is strictly below the diagonal, scale by one. This allows the current macro-kernel to work for both trmm and trmm3. */ \ { \ /* Compute the panel stride for the current diagonal- intersecting micro-panel. */ \ is_b_cur = k_b0111 * PACKNR; \ is_b_cur += ( bli_is_odd( is_b_cur ) ? 1 : 0 ); \ ps_b_cur = ( is_b_cur * ss_b_num ) / ss_b_den; \ \ if ( bli_trmm_my_iter( j, thread ) ) { \ \ /* Save the 4m1/3m1 imaginary stride of B to the auxinfo_t object. */ \ bli_auxinfo_set_is_b( is_b_cur, &aux ); \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = 0; i < m_iter; ++i ) \ { \ if ( bli_trmm_my_iter( i, caucus ) ) { \ \ ctype* restrict a1_i; \ ctype* restrict a2; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ a1_i = a1 + ( off_b0111 * PACKMR ) / off_scl; \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = a1; \ if ( bli_is_last_iter_rr( i, m_iter, 0, 1 ) ) \ { \ a2 = a_cast; \ b2 = b1; \ if ( bli_is_last_iter_rr( j, n_iter, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k_b0111, \ alpha_cast, \ a1_i, \ b1, \ beta_cast, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Copy edge elements of C to the temporary buffer. */ \ PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \ c11, rs_c, cs_c, \ ct, rs_ct, cs_ct ); \ \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k_b0111, \ alpha_cast, \ a1_i, \ b1, \ beta_cast, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Copy the result to the edge of C. */ \ PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ c11, rs_c, cs_c ); \ } \ } \ \ a1 += rstep_a; \ c11 += rstep_c; \ } \ } \ \ b1 += ps_b_cur; \ } \ \ c1 += cstep_c; \ } \ \ /* If there is no rectangular region, then we're done. */ \ if ( n_iter_rct == 0 ) return; \ \ /* Use slab assignment of micropanels to threads in the 2nd and 1st loops the remaining triangular region of B. */ \ bli_thread_range_jrir_sl( thread, n_iter_rct, 1, FALSE, &jr_start, &jr_end, &jr_inc ); \ bli_thread_range_jrir_sl( caucus, m_iter, 1, FALSE, &ir_start, &ir_end, &ir_inc ); \ \ /* Advance the start and end iteration offsets for the rectangular region by the number of iterations used for the triangular region. */ \ jr_start += n_iter_tri; \ jr_end += n_iter_tri; \ jb0 = n_iter_tri; \ \ /* Save the resulting value of b1 from the previous loop since it represents the starting point for the rectangular region. */ \ b_cast = b1; \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = jr_start; j < jr_end; j += jr_inc ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ /* NOTE: We must index through b_cast differently since it contains the starting address of the rectangular region (which is already n_iter_tri logical iterations through B). */ \ b1 = b_cast + (j-jb0) * cstep_b; \ c1 = c_cast + j * cstep_c; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ /* If the current panel of B intersects the diagonal, scale C by beta. If it is strictly below the diagonal, scale by one. This allows the current macro-kernel to work for both trmm and trmm3. */ \ { \ /* Save the 4m1/3m1 imaginary stride of B to the auxinfo_t object. */ \ bli_auxinfo_set_is_b( istep_b, &aux ); \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = ir_start; i < ir_end; i += ir_inc ) \ { \ ctype* restrict a2; \ \ a1 = a_cast + i * rstep_a; \ c11 = c1 + i * rstep_c; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = bli_trmm_get_next_a_upanel( a1, rstep_a, ir_inc ); \ if ( bli_is_last_iter_sl( i, m_iter, ir_tid, ir_nt ) ) \ { \ a2 = a_cast; \ b2 = bli_trmm_get_next_b_upanel( b1, cstep_b, jr_inc ); \ if ( bli_is_last_iter_sl( j, n_iter, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ one, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Add the result to the edge of C. */ \ PASTEMAC(ch,adds_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ c11, rs_c, cs_c ); \ } \ } \ } \ } \ \ \ \ /*PASTEMAC(ch,fprintm)( stdout, "trmm_ru_ker_var2sl: a1", MR, k_b0111, a1, 1, MR, "%4.1f", "" );*/ \ /*PASTEMAC(ch,fprintm)( stdout, "trmm_ru_ker_var2sl: b1", k_b0111, NR, b1_i, NR, 1, "%4.1f", "" );*/ \ } INSERT_GENTFUNC_BASIC0( trmm_ru_ker_var2sl ) blis-0.6.1/frame/3/trmm3/000077500000000000000000000000001360743507500147575ustar00rootroot00000000000000blis-0.6.1/frame/3/trmm3/bli_trmm3.h000066400000000000000000000032471360743507500170260ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "bli_trmm3_front.h" blis-0.6.1/frame/3/trmm3/bli_trmm3_front.c000066400000000000000000000155611360743507500202330ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_trmm3_front ( side_t side, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl ) { bli_init_once(); obj_t a_local; obj_t b_local; obj_t c_local; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_trmm_check( side, alpha, a, b, beta, c, cntx ); // If alpha is zero, scale by beta and return. if ( bli_obj_equals( alpha, &BLIS_ZERO ) ) { bli_scalm( beta, c ); return; } // Alias A, B, and C so we can tweak the objects if necessary. bli_obj_alias_to( a, &a_local ); bli_obj_alias_to( b, &b_local ); bli_obj_alias_to( c, &c_local ); // We do not explicitly implement the cases where A is transposed. // However, we can still handle them. Specifically, if A is marked as // needing a transposition, we simply induce a transposition. This // allows us to only explicitly implement the no-transpose cases. Once // the transposition is induced, the correct algorithm will be called, // since, for example, an algorithm over a transposed lower triangular // matrix A moves in the same direction (forwards) as a non-transposed // upper triangular matrix. And with the transposition induced, the // matrix now appears to be upper triangular, so the upper triangular // algorithm will grab the correct partitions, as if it were upper // triangular (with no transpose) all along. if ( bli_obj_has_trans( &a_local ) ) { bli_obj_induce_trans( &a_local ); bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, &a_local ); } #ifdef BLIS_DISABLE_TRMM3_RIGHT // NOTE: This case casts right-side trmm3 in terms of left side. This is // necessary when the current subconfiguration uses a gemm microkernel // that assumes that the packing kernel will have already duplicated // (broadcast) element of B in the packed copy of B. Supporting // duplication within the logic that packs micropanels from triangular // matrices would be ugly, and so we simply don't support it. As a // consequence, those subconfigurations need a way to force the triangular // matrix to be on the left (and thus the general matrix to the on the // right). So our solution is that in those cases, the subconfigurations // simply #define BLIS_DISABLE_TRMM3_RIGHT. // NOTE: This case casts right-side trmm3 in terms of left side. This can // lead to the microkernel being executed on an output matrix with the // microkernel's general stride IO case (unless the microkernel supports // both both row and column IO cases as well). // NOTE: Casting right-side trmm3 in terms of left side reduces the number // of macrokernels exercised to two (trmm_ll and trmm_lu). // If A is being multiplied from the right, transpose all operands // so that we can perform the computation as if A were being multiplied // from the left. if ( bli_is_right( side ) ) { bli_toggle_side( &side ); bli_obj_induce_trans( &a_local ); bli_obj_induce_trans( &b_local ); bli_obj_induce_trans( &c_local ); } #else // An optimization: If C is stored by rows and the micro-kernel prefers // contiguous columns, or if C is stored by columns and the micro-kernel // prefers contiguous rows, transpose the entire operation to allow the // micro-kernel to access elements of C in its preferred manner. if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) ) { bli_toggle_side( &side ); bli_obj_induce_trans( &a_local ); bli_obj_induce_trans( &b_local ); bli_obj_induce_trans( &c_local ); } // If A is being multiplied from the right, swap A and B so that // the matrix will actually be on the right. if ( bli_is_right( side ) ) { bli_obj_swap( &a_local, &b_local ); } #endif // Set each alias as the root object. // NOTE: We MUST wait until we are done potentially swapping the objects // before setting the root fields! bli_obj_set_as_root( &a_local ); bli_obj_set_as_root( &b_local ); bli_obj_set_as_root( &c_local ); // Parse and interpret the contents of the rntm_t object to properly // set the ways of parallelism for each loop, and then make any // additional modifications necessary for the current operation. bli_rntm_set_ways_for_op ( BLIS_TRMM3, side, bli_obj_length( &c_local ), bli_obj_width( &c_local ), bli_obj_width( &a_local ), rntm ); // A sort of hack for communicating the desired pach schemas for A and B // to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and // bli_l3_cntl_create_if()). This allows us to access the schemas from // the control tree, which hopefully reduces some confusion, particularly // in bli_packm_init(). if ( bli_cntx_method( cntx ) == BLIS_NAT ) { bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local ); bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &b_local ); } else // if ( bli_cntx_method( cntx ) != BLIS_NAT ) { pack_t schema_a = bli_cntx_schema_a_block( cntx ); pack_t schema_b = bli_cntx_schema_b_panel( cntx ); bli_obj_set_pack_schema( schema_a, &a_local ); bli_obj_set_pack_schema( schema_b, &b_local ); } // Invoke the internal back-end. bli_l3_thread_decorator ( bli_gemm_int, BLIS_TRMM, // operation family id alpha, &a_local, &b_local, beta, &c_local, cntx, rntm, cntl ); } blis-0.6.1/frame/3/trmm3/bli_trmm3_front.h000066400000000000000000000035411360743507500202330ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_trmm3_front ( side_t side, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl ); blis-0.6.1/frame/3/trsm/000077500000000000000000000000001360743507500147025ustar00rootroot00000000000000blis-0.6.1/frame/3/trsm/bli_trsm.h000066400000000000000000000033661360743507500166760ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "bli_trsm_cntl.h" #include "bli_trsm_front.h" #include "bli_trsm_int.h" #include "bli_trsm_var.h" blis-0.6.1/frame/3/trsm/bli_trsm_blk_var1.c000066400000000000000000000142471360743507500204520ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" //#define PRINT void bli_trsm_blk_var1 ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { dim_t my_start, my_end; dim_t b_alg; // Determine the direction in which to partition (forwards or backwards). dir_t direct = bli_l3_direct( a, b, c, cntl ); // Prune any zero region that exists along the partitioning dimension. bli_l3_prune_unref_mparts_m( a, b, c, cntl ); // Isolate the diagonal block A11 and its corresponding row panel C1. const dim_t kc = bli_obj_width( a ); obj_t a11, c1; bli_acquire_mpart_mdim( direct, BLIS_SUBPART1, 0, kc, a, &a11 ); bli_acquire_mpart_mdim( direct, BLIS_SUBPART1, 0, kc, c, &c1 ); // All threads iterate over the entire diagonal block A11. my_start = 0; my_end = kc; #ifdef PRINT printf( "bli_trsm_blk_var1(): a11 is %d x %d at offsets (%3d, %3d)\n", (int)bli_obj_length( &a11 ), (int)bli_obj_width( &a11 ), (int)bli_obj_row_off( &a11 ), (int)bli_obj_col_off( &a11 ) ); printf( "bli_trsm_blk_var1(): entering trsm subproblem loop.\n" ); #endif // Partition along the m dimension for the trsm subproblem. for ( dim_t i = my_start; i < my_end; i += b_alg ) { obj_t a11_1, c1_1; b_alg = bli_determine_blocksize( direct, i, my_end, &a11, bli_cntl_bszid( cntl ), cntx ); // Acquire partitions for A1 and C1. bli_acquire_mpart_mdim( direct, BLIS_SUBPART1, i, b_alg, &a11, &a11_1 ); bli_acquire_mpart_mdim( direct, BLIS_SUBPART1, i, b_alg, &c1, &c1_1 ); #ifdef PRINT printf( "bli_trsm_blk_var1(): a11_1 is %d x %d at offsets (%3d, %3d)\n", (int)bli_obj_length( &a11_1 ), (int)bli_obj_width( &a11_1 ), (int)bli_obj_row_off( &a11_1 ), (int)bli_obj_col_off( &a11_1 ) ); #endif // Perform trsm subproblem. bli_trsm_int ( &BLIS_ONE, &a11_1, b, &BLIS_ONE, &c1_1, cntx, rntm, bli_cntl_sub_prenode( cntl ), bli_thrinfo_sub_prenode( thread ) ); } #ifdef PRINT printf( "bli_trsm_blk_var1(): finishing trsm subproblem loop.\n" ); #endif // We must execute a barrier here because the upcoming rank-k update // requires the packed matrix B to be fully updated by the trsm // subproblem. bli_thread_obarrier( thread ); // Isolate the remaining part of the column panel matrix A, which we do by // acquiring the subpartition ahead of A11 (that is, A21 or A01, depending // on whether we are moving forwards or backwards, respectively). obj_t ax1, cx1; bli_acquire_mpart_mdim( direct, BLIS_SUBPART1A, 0, kc, a, &ax1 ); bli_acquire_mpart_mdim( direct, BLIS_SUBPART1A, 0, kc, c, &cx1 ); #ifdef PRINT printf( "bli_trsm_blk_var1(): ax1 is %d x %d at offsets (%3d, %3d)\n", (int)bli_obj_length( &ax1 ), (int)bli_obj_width( &ax1 ), (int)bli_obj_row_off( &ax1 ), (int)bli_obj_col_off( &ax1 ) ); #endif // Determine the current thread's subpartition range for the gemm // subproblem over Ax1. bli_thread_range_mdim ( direct, thread, &ax1, b, &cx1, cntl, cntx, &my_start, &my_end ); #ifdef PRINT printf( "bli_trsm_blk_var1(): entering gemm subproblem loop (%d->%d).\n", (int)my_start, (int)my_end ); #endif // Partition along the m dimension for the gemm subproblem. for ( dim_t i = my_start; i < my_end; i += b_alg ) { obj_t a11, c1; // Determine the current algorithmic blocksize. b_alg = bli_determine_blocksize( direct, i, my_end, &ax1, bli_cntl_bszid( cntl ), cntx ); // Acquire partitions for A1 and C1. bli_acquire_mpart_mdim( direct, BLIS_SUBPART1, i, b_alg, &ax1, &a11 ); bli_acquire_mpart_mdim( direct, BLIS_SUBPART1, i, b_alg, &cx1, &c1 ); #ifdef PRINT printf( "bli_trsm_blk_var1(): a11 is %d x %d at offsets (%3d, %3d)\n", (int)bli_obj_length( &a11 ), (int)bli_obj_width( &a11 ), (int)bli_obj_row_off( &a11 ), (int)bli_obj_col_off( &a11 ) ); #endif // Perform gemm subproblem. (Note that we use the same backend // function as before, since we're calling the same macrokernel.) bli_trsm_int ( &BLIS_ONE, &a11, b, &BLIS_ONE, &c1, cntx, rntm, bli_cntl_sub_node( cntl ), bli_thrinfo_sub_node( thread ) ); } #ifdef PRINT printf( "bli_trsm_blk_var1(): finishing gemm subproblem loop.\n" ); #endif } blis-0.6.1/frame/3/trsm/bli_trsm_blk_var2.c000066400000000000000000000060101360743507500204400ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_trsm_blk_var2 ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { obj_t b1, c1; dim_t my_start, my_end; dim_t b_alg; // Determine the direction in which to partition (forwards or backwards). dir_t direct = bli_l3_direct( a, b, c, cntl ); // Prune any zero region that exists along the partitioning dimension. bli_l3_prune_unref_mparts_n( a, b, c, cntl ); // Determine the current thread's subpartition range. bli_thread_range_ndim ( direct, thread, a, b, c, cntl, cntx, &my_start, &my_end ); // Partition along the n dimension. for ( dim_t i = my_start; i < my_end; i += b_alg ) { // Determine the current algorithmic blocksize. b_alg = bli_determine_blocksize( direct, i, my_end, b, bli_cntl_bszid( cntl ), cntx ); // Acquire partitions for B1 and C1. bli_acquire_mpart_ndim( direct, BLIS_SUBPART1, i, b_alg, b, &b1 ); bli_acquire_mpart_ndim( direct, BLIS_SUBPART1, i, b_alg, c, &c1 ); // Perform trsm subproblem. bli_trsm_int ( &BLIS_ONE, a, &b1, &BLIS_ONE, &c1, cntx, rntm, bli_cntl_sub_node( cntl ), bli_thrinfo_sub_node( thread ) ); } } blis-0.6.1/frame/3/trsm/bli_trsm_blk_var3.c000066400000000000000000000064061360743507500204520ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_trsm_blk_var3 ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { obj_t a1, b1; dim_t b_alg; // Determine the direction in which to partition (forwards or backwards). dir_t direct = bli_l3_direct( a, b, c, cntl ); // Prune any zero region that exists along the partitioning dimension. bli_l3_prune_unref_mparts_k( a, b, c, cntl ); // Query dimension in partitioning direction. dim_t k_trans = bli_obj_width_after_trans( a ); // Partition along the k dimension. for ( dim_t i = 0; i < k_trans; i += b_alg ) { // Determine the current algorithmic blocksize. b_alg = bli_trsm_determine_kc( direct, i, k_trans, a, b, bli_cntl_bszid( cntl ), cntx ); // Acquire partitions for A1 and B1. bli_acquire_mpart_ndim( direct, BLIS_SUBPART1, i, b_alg, a, &a1 ); bli_acquire_mpart_mdim( direct, BLIS_SUBPART1, i, b_alg, b, &b1 ); // Perform trsm subproblem. bli_trsm_int ( &BLIS_ONE, &a1, &b1, &BLIS_ONE, c, cntx, rntm, bli_cntl_sub_node( cntl ), bli_thrinfo_sub_node( thread ) ); //bli_thread_ibarrier( thread ); bli_thread_obarrier( bli_thrinfo_sub_node( thread ) ); // This variant executes multiple rank-k updates. Therefore, if the // internal alpha scalars on A/B and C are non-zero, we must ensure // that they are only used in the first iteration. if ( i == 0 ) { bli_obj_scalar_reset( a ); bli_obj_scalar_reset( b ); bli_obj_scalar_reset( c ); } } } blis-0.6.1/frame/3/trsm/bli_trsm_cntl.c000066400000000000000000000204731360743507500177070ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" cntl_t* bli_trsm_cntl_create ( rntm_t* rntm, side_t side, pack_t schema_a, pack_t schema_b ) { if ( bli_is_left( side ) ) return bli_trsm_l_cntl_create( rntm, schema_a, schema_b ); else return bli_trsm_r_cntl_create( rntm, schema_a, schema_b ); } cntl_t* bli_trsm_l_cntl_create ( rntm_t* rntm, pack_t schema_a, pack_t schema_b ) { void_fp macro_kernel_p; void_fp packa_fp; void_fp packb_fp; // Use the function pointer to the macrokernels that use slab // assignment of micropanels to threads in the jr and ir loops. macro_kernel_p = bli_trsm_xx_ker_var2; packa_fp = bli_packm_blk_var1; packb_fp = bli_packm_blk_var1; const opid_t family = BLIS_TRSM; // // Create nodes for packing A and the macro-kernel (gemm branch). // cntl_t* gemm_cntl_bu_ke = bli_trsm_cntl_create_node ( rntm, // the thread's runtime structure family, // the operation family BLIS_MR, // needed for bli_thrinfo_rgrow() NULL, // variant function pointer not used NULL // no sub-node; this is the leaf of the tree. ); cntl_t* gemm_cntl_bp_bu = bli_trsm_cntl_create_node ( rntm, family, BLIS_NR, // not used by macro-kernel, but needed for bli_thrinfo_rgrow() macro_kernel_p, gemm_cntl_bu_ke ); // Create a node for packing matrix A. cntl_t* gemm_cntl_packa = bli_packm_cntl_create_node ( rntm, bli_trsm_packa, // trsm operation's packm function for A. packa_fp, BLIS_MR, BLIS_MR, TRUE, // do NOT invert diagonal TRUE, // reverse iteration if upper? FALSE, // reverse iteration if lower? schema_a, // normally BLIS_PACKED_ROW_PANELS BLIS_BUFFER_FOR_A_BLOCK, gemm_cntl_bp_bu ); // // Create nodes for packing A and the macro-kernel (trsm branch). // cntl_t* trsm_cntl_bu_ke = bli_trsm_cntl_create_node ( rntm, // the thread's runtime structure family, // the operation family BLIS_MR, // needed for bli_thrinfo_rgrow() NULL, // variant function pointer not used NULL // no sub-node; this is the leaf of the tree. ); cntl_t* trsm_cntl_bp_bu = bli_trsm_cntl_create_node ( rntm, family, BLIS_NR, // not used by macro-kernel, but needed for bli_thrinfo_rgrow() macro_kernel_p, trsm_cntl_bu_ke ); // Create a node for packing matrix A. cntl_t* trsm_cntl_packa = bli_packm_cntl_create_node ( rntm, bli_trsm_packa, // trsm operation's packm function for A. packa_fp, BLIS_MR, BLIS_MR, TRUE, // do NOT invert diagonal TRUE, // reverse iteration if upper? FALSE, // reverse iteration if lower? schema_a, // normally BLIS_PACKED_ROW_PANELS BLIS_BUFFER_FOR_A_BLOCK, trsm_cntl_bp_bu ); // ------------------------------------------------------------------------- // Create a node for partitioning the m dimension by MC. // NOTE: We attach the gemm sub-tree as the main branch. cntl_t* trsm_cntl_op_bp = bli_trsm_cntl_create_node ( rntm, family, BLIS_MC, bli_trsm_blk_var1, gemm_cntl_packa ); // Attach the trsm sub-tree as the auxiliary "prenode" branch. bli_cntl_set_sub_prenode( trsm_cntl_packa, trsm_cntl_op_bp ); // ------------------------------------------------------------------------- // Create a node for packing matrix B. cntl_t* trsm_cntl_packb = bli_packm_cntl_create_node ( rntm, bli_trsm_packb, packb_fp, BLIS_MR, BLIS_NR, FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? schema_b, // normally BLIS_PACKED_COL_PANELS BLIS_BUFFER_FOR_B_PANEL, trsm_cntl_op_bp ); // Create a node for partitioning the k dimension by KC. cntl_t* trsm_cntl_mm_op = bli_trsm_cntl_create_node ( rntm, family, BLIS_KC, bli_trsm_blk_var3, trsm_cntl_packb ); // Create a node for partitioning the n dimension by NC. cntl_t* trsm_cntl_vl_mm = bli_trsm_cntl_create_node ( rntm, family, BLIS_NC, bli_trsm_blk_var2, trsm_cntl_mm_op ); return trsm_cntl_vl_mm; } cntl_t* bli_trsm_r_cntl_create ( rntm_t* rntm, pack_t schema_a, pack_t schema_b ) { // NOTE: trsm macrokernels are presently disabled for right-side execution. void_fp macro_kernel_p = bli_trsm_xx_ker_var2; void_fp packa_fp = bli_packm_blk_var1; void_fp packb_fp = bli_packm_blk_var1; const opid_t family = BLIS_TRSM; // Create two nodes for the macro-kernel. cntl_t* trsm_cntl_bu_ke = bli_trsm_cntl_create_node ( rntm, family, BLIS_MR, // needed for bli_thrinfo_rgrow() NULL, // variant function pointer not used NULL // no sub-node; this is the leaf of the tree. ); cntl_t* trsm_cntl_bp_bu = bli_trsm_cntl_create_node ( rntm, family, BLIS_NR, // not used by macro-kernel, but needed for bli_thrinfo_rgrow() macro_kernel_p, trsm_cntl_bu_ke ); // Create a node for packing matrix A. cntl_t* trsm_cntl_packa = bli_packm_cntl_create_node ( rntm, bli_trsm_packa, packa_fp, BLIS_NR, BLIS_MR, FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? schema_a, // normally BLIS_PACKED_ROW_PANELS BLIS_BUFFER_FOR_A_BLOCK, trsm_cntl_bp_bu ); // Create a node for partitioning the m dimension by MC. cntl_t* trsm_cntl_op_bp = bli_trsm_cntl_create_node ( rntm, family, BLIS_MC, bli_trsm_blk_var1, trsm_cntl_packa ); // Create a node for packing matrix B. cntl_t* trsm_cntl_packb = bli_packm_cntl_create_node ( rntm, bli_trsm_packb, packb_fp, BLIS_MR, BLIS_MR, TRUE, // do NOT invert diagonal FALSE, // reverse iteration if upper? TRUE, // reverse iteration if lower? schema_b, // normally BLIS_PACKED_COL_PANELS BLIS_BUFFER_FOR_B_PANEL, trsm_cntl_op_bp ); // Create a node for partitioning the k dimension by KC. cntl_t* trsm_cntl_mm_op = bli_trsm_cntl_create_node ( rntm, family, BLIS_KC, bli_trsm_blk_var3, trsm_cntl_packb ); // Create a node for partitioning the n dimension by NC. cntl_t* trsm_cntl_vl_mm = bli_trsm_cntl_create_node ( rntm, family, BLIS_NC, bli_trsm_blk_var2, trsm_cntl_mm_op ); return trsm_cntl_vl_mm; } void bli_trsm_cntl_free ( rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { bli_cntl_free( rntm, cntl, thread ); } // ----------------------------------------------------------------------------- cntl_t* bli_trsm_cntl_create_node ( rntm_t* rntm, opid_t family, bszid_t bszid, void_fp var_func, cntl_t* sub_node ) { return bli_cntl_create_node( rntm, family, bszid, var_func, NULL, sub_node ); } blis-0.6.1/frame/3/trsm/bli_trsm_cntl.h000066400000000000000000000046361360743507500177170ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ cntl_t* bli_trsm_cntl_create ( rntm_t* rntm, side_t side, pack_t schema_a, pack_t schema_b ); cntl_t* bli_trsm_l_cntl_create ( rntm_t* rntm, pack_t schema_a, pack_t schema_b ); cntl_t* bli_trsm_r_cntl_create ( rntm_t* rntm, pack_t schema_a, pack_t schema_b ); void bli_trsm_cntl_free ( rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ); // ----------------------------------------------------------------------------- cntl_t* bli_trsm_cntl_create_node ( rntm_t* rntm, opid_t family, bszid_t bszid, void_fp var_func, cntl_t* sub_node ); blis-0.6.1/frame/3/trsm/bli_trsm_front.c000066400000000000000000000133241360743507500200740ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_trsm_front ( side_t side, obj_t* alpha, obj_t* a, obj_t* b, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl ) { bli_init_once(); obj_t a_local; obj_t b_local; obj_t c_local; #ifdef BLIS_ENABLE_SMALL_MATRIX_TRSM gint_t status = bli_trsm_small( side, alpha, a, b, cntx, cntl ); if ( status == BLIS_SUCCESS ) return; #endif // Check parameters. if ( bli_error_checking_is_enabled() ) bli_trsm_check( side, alpha, a, b, &BLIS_ZERO, b, cntx ); // If alpha is zero, scale by beta and return. if ( bli_obj_equals( alpha, &BLIS_ZERO ) ) { bli_scalm( alpha, b ); return; } // Alias A and B so we can tweak the objects if necessary. bli_obj_alias_to( a, &a_local ); bli_obj_alias_to( b, &b_local ); bli_obj_alias_to( b, &c_local ); // We do not explicitly implement the cases where A is transposed. // However, we can still handle them. Specifically, if A is marked as // needing a transposition, we simply induce a transposition. This // allows us to only explicitly implement the no-transpose cases. Once // the transposition is induced, the correct algorithm will be called, // since, for example, an algorithm over a transposed lower triangular // matrix A moves in the same direction (forwards) as a non-transposed // upper triangular matrix. And with the transposition induced, the // matrix now appears to be upper triangular, so the upper triangular // algorithm will grab the correct partitions, as if it were upper // triangular (with no transpose) all along. if ( bli_obj_has_trans( &a_local ) ) { bli_obj_induce_trans( &a_local ); bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, &a_local ); } #if 1 // If A is being solved against from the right, transpose all operands // so that we can perform the computation as if A were being solved // from the left. if ( bli_is_right( side ) ) { bli_toggle_side( &side ); bli_obj_induce_trans( &a_local ); bli_obj_induce_trans( &b_local ); bli_obj_induce_trans( &c_local ); } #else // NOTE: Enabling this code requires that BLIS NOT be configured with // BLIS_RELAX_MCNR_NCMR_CONSTRAINTS defined. #ifdef BLIS_RELAX_MCNR_NCMR_CONSTRAINTS #error "BLIS_RELAX_MCNR_NCMR_CONSTRAINTS must not be defined for current trsm_r implementation." #endif // If A is being solved against from the right, swap A and B so that // the triangular matrix will actually be on the right. if ( bli_is_right( side ) ) { bli_obj_swap( &a_local, &b_local ); } #endif // Set each alias as the root object. // NOTE: We MUST wait until we are done potentially swapping the objects // before setting the root fields! bli_obj_set_as_root( &a_local ); bli_obj_set_as_root( &b_local ); bli_obj_set_as_root( &c_local ); // Parse and interpret the contents of the rntm_t object to properly // set the ways of parallelism for each loop, and then make any // additional modifications necessary for the current operation. bli_rntm_set_ways_for_op ( BLIS_TRSM, side, bli_obj_length( &c_local ), bli_obj_width( &c_local ), bli_obj_width( &a_local ), rntm ); // A sort of hack for communicating the desired pach schemas for A and B // to bli_trsm_cntl_create() (via bli_l3_thread_decorator() and // bli_l3_cntl_create_if()). This allows us to access the schemas from // the control tree, which hopefully reduces some confusion, particularly // in bli_packm_init(). if ( bli_cntx_method( cntx ) == BLIS_NAT ) { bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local ); bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &b_local ); } else // if ( bli_cntx_method( cntx ) != BLIS_NAT ) { pack_t schema_a = bli_cntx_schema_a_block( cntx ); pack_t schema_b = bli_cntx_schema_b_panel( cntx ); bli_obj_set_pack_schema( schema_a, &a_local ); bli_obj_set_pack_schema( schema_b, &b_local ); } // Invoke the internal back-end. bli_l3_thread_decorator ( bli_trsm_int, BLIS_TRSM, // operation family id alpha, &a_local, &b_local, alpha, &c_local, cntx, rntm, cntl ); } blis-0.6.1/frame/3/trsm/bli_trsm_front.h000066400000000000000000000040711360743507500201000ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_trsm_front ( side_t side, obj_t* alpha, obj_t* a, obj_t* b, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl ); #ifdef BLIS_ENABLE_SMALL_MATRIX err_t bli_trsm_small ( side_t side, obj_t* alpha, obj_t* a, obj_t* b, cntx_t* cntx, cntl_t* cntl ); #endif blis-0.6.1/frame/3/trsm/bli_trsm_int.c000066400000000000000000000107601360743507500175370ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_trsm_int ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { obj_t a_local; obj_t b_local; obj_t c_local; trsm_var_oft f; // Return early if the current control tree node is NULL. if ( bli_cntl_is_null( cntl ) ) return; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_gemm_basic_check( alpha, a, b, beta, c, cntx ); // If C has a zero dimension, return early. if ( bli_obj_has_zero_dim( c ) ) return; // If A or B has a zero dimension, scale C by beta and return early. if ( bli_obj_has_zero_dim( a ) || bli_obj_has_zero_dim( b ) ) { if ( bli_thread_am_ochief( thread ) ) bli_scalm( beta, c ); bli_thread_obarrier( thread ); return; } // Alias A and B in case we need to update attached scalars. bli_obj_alias_to( a, &a_local ); bli_obj_alias_to( b, &b_local ); // Alias C in case we need to induce a transposition. bli_obj_alias_to( c, &c_local ); // If we are about to call a leaf-level implementation, and matrix C // still needs a transposition, then we must induce one by swapping the // strides and dimensions. Note that this transposition would normally // be handled explicitly in the packing of C, but if C is not being // packed, this is our last chance to handle the transposition. if ( bli_cntl_is_leaf( cntl ) && bli_obj_has_trans( c ) ) { bli_obj_induce_trans( &c_local ); bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, &c_local ); } // If beta is non-unit, apply it to the scalar attached to C. if ( !bli_obj_equals( beta, &BLIS_ONE ) ) { bli_obj_scalar_apply_scalar( beta, &c_local ); } // Set two bools: one based on the implied side parameter (the structure // of the root object) and one based on the uplo field of the triangular // matrix's root object (whether that is matrix A or matrix B). if ( bli_obj_root_is_triangular( a ) ) { // If alpha is non-unit, typecast and apply it to the scalar // attached to B (the non-triangular matrix). if ( !bli_obj_equals( alpha, &BLIS_ONE ) ) { bli_obj_scalar_apply_scalar( alpha, &b_local ); } } else // if ( bli_obj_root_is_triangular( b ) ) { // If alpha is non-unit, typecast and apply it to the scalar // attached to A (the non-triangular matrix). if ( !bli_obj_equals( alpha, &BLIS_ONE ) ) { bli_obj_scalar_apply_scalar( alpha, &a_local ); } } // FGVZ->TMS: Is this barrier still needed? bli_thread_obarrier( thread ); // Create the next node in the thrinfo_t structure. bli_thrinfo_grow( rntm, cntl, thread ); // Extract the function pointer from the current control tree node. f = bli_cntl_var_func( cntl ); // Invoke the variant. f ( &a_local, &b_local, &c_local, cntx, rntm, cntl, thread ); } blis-0.6.1/frame/3/trsm/bli_trsm_int.h000066400000000000000000000035441360743507500175460ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_trsm_int ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ); blis-0.6.1/frame/3/trsm/bli_trsm_ll_ker_var2.c000066400000000000000000000501451360743507500211500ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T gemm_fp typedef void (*FUNCPTR_T) ( doff_t diagoffa, pack_t schema_a, pack_t schema_b, dim_t m, dim_t n, dim_t k, void* alpha1, void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, void* alpha2, void* c, inc_t rs_c, inc_t cs_c, cntx_t* cntx, rntm_t* rntm, thrinfo_t* thread ); static FUNCPTR_T GENARRAY(ftypes,trsm_ll_ker_var2); void bli_trsm_ll_ker_var2 ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { num_t dt_exec = bli_obj_exec_dt( c ); doff_t diagoffa = bli_obj_diag_offset( a ); pack_t schema_a = bli_obj_pack_schema( a ); pack_t schema_b = bli_obj_pack_schema( b ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width( a ); void* buf_a = bli_obj_buffer_at_off( a ); inc_t cs_a = bli_obj_col_stride( a ); dim_t pd_a = bli_obj_panel_dim( a ); inc_t ps_a = bli_obj_panel_stride( a ); void* buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b = bli_obj_row_stride( b ); dim_t pd_b = bli_obj_panel_dim( b ); inc_t ps_b = bli_obj_panel_stride( b ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); void* buf_alpha1; void* buf_alpha2; FUNCPTR_T f; // Grab the address of the internal scalar buffer for the scalar // attached to B (the non-triangular matrix). This will be the alpha // scalar used in the gemmtrsm subproblems (ie: the scalar that would // be applied to the packed copy of B prior to it being updated by // the trsm subproblem). This scalar may be unit, if for example it // was applied during packing. buf_alpha1 = bli_obj_internal_scalar_buffer( b ); // Grab the address of the internal scalar buffer for the scalar // attached to C. This will be the "beta" scalar used in the gemm-only // subproblems that correspond to micro-panels that do not intersect // the diagonal. We need this separate scalar because it's possible // that the alpha attached to B was reset, if it was applied during // packing. buf_alpha2 = bli_obj_internal_scalar_buffer( c ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Invoke the function. f( diagoffa, schema_a, schema_b, m, n, k, buf_alpha1, buf_a, cs_a, pd_a, ps_a, buf_b, rs_b, pd_b, ps_b, buf_alpha2, buf_c, rs_c, cs_c, cntx, rntm, thread ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ doff_t diagoffa, \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha1, \ void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, \ void* alpha2, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* thread \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ /* Alias some constants to simpler names. */ \ const dim_t MR = pd_a; \ const dim_t NR = pd_b; \ const dim_t PACKMR = cs_a; \ const dim_t PACKNR = rs_b; \ \ /* Cast the micro-kernel address to its function pointer type. */ \ PASTECH(ch,gemmtrsm_ukr_ft) \ gemmtrsm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMMTRSM_L_UKR, cntx ); \ PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the original C matrix. For example, if C is column-stored, ct will be column-stored as well. */ \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict minus_one = PASTEMAC(ch,m1); \ ctype* restrict a_cast = a; \ ctype* restrict b_cast = b; \ ctype* restrict c_cast = c; \ ctype* restrict alpha1_cast = alpha1; \ ctype* restrict alpha2_cast = alpha2; \ ctype* restrict b1; \ ctype* restrict c1; \ \ doff_t diagoffa_i; \ dim_t k_full; \ dim_t m_iter, m_left; \ dim_t n_iter, n_left; \ dim_t m_cur; \ dim_t n_cur; \ dim_t k_a1011; \ dim_t k_a10; \ dim_t off_a10; \ dim_t off_a11; \ dim_t i, j; \ inc_t rstep_a; \ inc_t cstep_b; \ inc_t rstep_c, cstep_c; \ inc_t istep_a; \ inc_t istep_b; \ inc_t off_scl; \ inc_t ss_a_num; \ inc_t ss_a_den; \ inc_t ps_a_cur; \ inc_t is_a_cur; \ auxinfo_t aux; \ \ /* Assumptions/assertions: rs_a == 1 cs_a == PACKMR pd_a == MR ps_a == stride to next micro-panel of A rs_b == PACKNR cs_b == 1 pd_b == NR ps_b == stride to next micro-panel of B rs_c == (no assumptions) cs_c == (no assumptions) */ \ \ /* Safety trap: Certain indexing within this macro-kernel does not work as intended if both MR and NR are odd. */ \ if ( ( bli_is_odd( PACKMR ) && bli_is_odd( NR ) ) || \ ( bli_is_odd( PACKNR ) && bli_is_odd( MR ) ) ) bli_abort(); \ \ /* If any dimension is zero, return immediately. */ \ if ( bli_zero_dim3( m, n, k ) ) return; \ \ /* Safeguard: If matrix A is above the diagonal, it is implicitly zero. So we do nothing. */ \ if ( bli_is_strictly_above_diag_n( diagoffa, m, k ) ) return; \ \ /* Compute k_full as k inflated up to a multiple of MR. This is needed because some parameter combinations of trsm reduce k to advance past zero regions in the triangular matrix, and when computing the imaginary stride of B (the non-triangular matrix), which is used by 4m1/3m1 implementations, we need this unreduced value of k. */ \ k_full = ( k % MR != 0 ? k + MR - ( k % MR ) : k ); \ \ /* Compute indexing scaling factor for for 4m or 3m. This is needed because one of the packing register blocksizes (PACKMR or PACKNR) is used to index into the micro-panels of the non- triangular matrix when computing with a diagonal-intersecting micro-panel of the triangular matrix. In the case of 4m or 3m, real values are stored in both sub-panels, and so the indexing needs to occur in units of real values. The value computed here is divided into the complex pointer offset to cause the pointer to be advanced by the correct value. */ \ if ( bli_is_4mi_packed( schema_a ) || \ bli_is_3mi_packed( schema_a ) || \ bli_is_rih_packed( schema_a ) ) off_scl = 2; \ else off_scl = 1; \ \ /* Compute the storage stride scaling. Usually this is just 1. However, in the case of interleaved 3m, we need to scale the offset by 3/2. Note that real-only, imag-only, and summed-only packing formats are not applicable here since trsm is a two- operand operation only (unlike trmm, which is capable of three- operand). */ \ if ( bli_is_3mi_packed( schema_a ) ) { ss_a_num = 3; ss_a_den = 2; } \ else { ss_a_num = 1; ss_a_den = 1; } \ \ /* If there is a zero region above where the diagonal of A intersects the left edge of the block, adjust the pointer to C and treat this case as if the diagonal offset were zero. This skips over the region that was not packed. (Note we assume the diagonal offset is a multiple of MR; this assumption will hold as long as the cache blocksizes are each a multiple of MR and NR.) */ \ if ( diagoffa < 0 ) \ { \ i = -diagoffa; \ m = m - i; \ diagoffa = 0; \ c_cast = c_cast + (i )*rs_c; \ } \ \ /* Check the k dimension, which needs to be a multiple of MR. If k isn't a multiple of MR, we adjust it higher to satisfy the micro- kernel, which is expecting to perform an MR x MR triangular solve. This adjustment of k is consistent with what happened when A was packed: all of its bottom/right edges were zero-padded, and furthermore, the panel that stores the bottom-right corner of the matrix has its diagonal extended into the zero-padded region (as identity). This allows the trsm of that bottom-right panel to proceed without producing any infs or NaNs that would infect the "good" values of the corresponding block of B. */ \ if ( k % MR != 0 ) k += MR - ( k % MR ); \ \ /* NOTE: We don't need to check that m is a multiple of PACKMR since we know that the underlying buffer was already allocated to have an m dimension that is a multiple of PACKMR, with the region between the last row and the next multiple of MR zero-padded accordingly. */ \ \ /* Clear the temporary C buffer in case it has any infs or NaNs. */ \ PASTEMAC(ch,set0s_mxn)( MR, NR, \ ct, rs_ct, cs_ct ); \ \ /* Compute number of primary and leftover components of the m and n dimensions. */ \ n_iter = n / NR; \ n_left = n % NR; \ \ m_iter = m / MR; \ m_left = m % MR; \ \ if ( n_left ) ++n_iter; \ if ( m_left ) ++m_iter; \ \ /* Determine some increments used to step through A, B, and C. */ \ rstep_a = ps_a; \ \ cstep_b = ps_b; \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ \ istep_a = PACKMR * k; \ istep_b = PACKNR * k_full; \ \ if ( bli_is_odd( istep_a ) ) istep_a += 1; \ if ( bli_is_odd( istep_b ) ) istep_b += 1; \ \ /* Save the pack schemas of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_schema_a( schema_a, &aux ); \ bli_auxinfo_set_schema_b( schema_b, &aux ); \ \ /* Save the imaginary stride of B to the auxinfo_t object. */ \ bli_auxinfo_set_is_b( istep_b, &aux ); \ \ /* Save the desired output datatype (indicating no typecasting). */ \ /*bli_auxinfo_set_dt_on_output( dt, &aux );*/ \ \ /* We don't bother querying the thrinfo_t node for the 1st loop because we can't parallelize that loop in trsm due to the inter-iteration dependencies that exist. */ \ /*thrinfo_t* caucus = bli_thrinfo_sub_node( thread );*/ \ \ /* Query the number of threads and thread ids for each loop. */ \ dim_t jr_nt = bli_thread_n_way( thread ); \ dim_t jr_tid = bli_thread_work_id( thread ); \ \ dim_t jr_start, jr_end; \ dim_t jr_inc; \ \ /* Determine the thread range and increment for the 2nd loop. NOTE: The definition of bli_thread_range_jrir() will depend on whether slab or round-robin partitioning was requested at configure-time. NOTE: Parallelism in the 1st loop is unattainable due to the inter-iteration dependencies present in trsm. */ \ bli_thread_range_jrir( thread, n_iter, 1, FALSE, &jr_start, &jr_end, &jr_inc ); \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = jr_start; j < jr_end; j += jr_inc ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ b1 = b_cast + j * cstep_b; \ c1 = c_cast + j * cstep_c; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ a1 = a_cast; \ c11 = c1 + (0 )*rstep_c; \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = 0; i < m_iter; ++i ) \ { \ diagoffa_i = diagoffa + ( doff_t )i*MR; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* If the current panel of A intersects the diagonal, use a special micro-kernel that performs a fused gemm and trsm. If the current panel of A resides below the diagonal, use a a regular gemm micro-kernel. Otherwise, if it is above the diagonal, it was not packed (because it is implicitly zero) and so we do nothing. */ \ if ( bli_intersects_diag_n( diagoffa_i, MR, k ) ) \ { \ ctype* restrict a10; \ ctype* restrict a11; \ ctype* restrict b01; \ ctype* restrict b11; \ ctype* restrict a2; \ \ /* Compute various offsets into and lengths of parts of A. */ \ off_a10 = 0; \ k_a1011 = diagoffa_i + MR; \ k_a10 = k_a1011 - MR; \ off_a11 = k_a10; \ \ /* Compute the panel stride for the current diagonal- intersecting micro-panel. */ \ is_a_cur = k_a1011 * PACKMR; \ is_a_cur += ( bli_is_odd( is_a_cur ) ? 1 : 0 ); \ ps_a_cur = ( is_a_cur * ss_a_num ) / ss_a_den; \ \ /* Compute the addresses of the panel A10 and the triangular block A11. */ \ a10 = a1; \ /* a11 = a1 + ( k_a10 * PACKMR ) / off_scl; */ \ a11 = bli_ptr_inc_by_frac( a1, sizeof( ctype ), k_a10 * PACKMR, off_scl ); \ \ /* Compute the addresses of the panel B01 and the block B11. */ \ b01 = b1 + ( off_a10 * PACKNR ) / off_scl; \ b11 = b1 + ( off_a11 * PACKNR ) / off_scl; \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = a1 + ps_a_cur; \ if ( bli_is_last_iter_rr( i, m_iter, 0, 1 ) ) \ { \ a2 = a_cast; \ b2 = b1; \ if ( bli_is_last_iter( j, n_iter, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Save the 4m1/3m1 imaginary stride of A to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( is_a_cur, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the fused gemm/trsm micro-kernel. */ \ gemmtrsm_ukr \ ( \ k_a10, \ alpha1_cast, \ a10, \ a11, \ b01, \ b11, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the fused gemm/trsm micro-kernel. */ \ gemmtrsm_ukr \ ( \ k_a10, \ alpha1_cast, \ a10, \ a11, \ b01, \ b11, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Copy the result to the bottom edge of C. */ \ PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ c11, rs_c, cs_c ); \ } \ \ a1 += ps_a_cur; \ } \ else if ( bli_is_strictly_below_diag_n( diagoffa_i, MR, k ) ) \ { \ ctype* restrict a2; \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = a1 + rstep_a; \ if ( bli_is_last_iter_rr( i, m_iter, 0, 1 ) ) \ { \ a2 = a_cast; \ b2 = b1; \ if ( bli_is_last_iter( j, n_iter, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Save the 4m1/3m1 imaginary stride of A to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( istep_a, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ minus_one, \ a1, \ b1, \ alpha2_cast, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ minus_one, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Add the result to the edge of C. */ \ PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ alpha2_cast, \ c11, rs_c, cs_c ); \ } \ \ a1 += rstep_a; \ } \ \ c11 += rstep_c; \ } \ } \ \ /* if ( bli_is_4mi_packed( schema_a ) ){ \ PASTEMAC(d,fprintm)( stdout, "trsm4m1_ll_ker_var2: b_r before", k, n, \ ( double* )b, rs_b, 1, "%4.1f", "" ); \ PASTEMAC(d,fprintm)( stdout, "trsm4m1_ll_ker_var2: b_i before", k, n, \ ( double* )b+72, rs_b, 1, "%4.1f", "" ); \ }else{ \ PASTEMAC(d,fprintm)( stdout, "trsmnat_ll_ker_var2: b_r before", k, n, \ ( double* )b, 2*rs_b, 2, "%4.1f", "" ); \ PASTEMAC(d,fprintm)( stdout, "trsmnat_ll_ker_var2: b_i before", k, n, \ ( double* )b+1, 2*rs_b, 2, "%4.1f", "" ); \ } \ */ \ \ /* PASTEMAC(d,fprintm)( stdout, "trsm_ll_ker_var2: a11p_r computed", MR, MR, \ ( double* )a11, 1, PACKMR, "%4.1f", "" ); \ */ \ \ /* if ( bli_is_4mi_packed( schema_a ) ){ \ PASTEMAC(d,fprintm)( stdout, "trsm4m1_ll_ker_var2: b_r after", k, n, \ ( double* )b, rs_b, 1, "%4.1f", "" ); \ PASTEMAC(d,fprintm)( stdout, "trsm4m1_ll_ker_var2: b_i after", k, n, \ ( double* )b+72, rs_b, 1, "%4.1f", "" ); \ }else{ \ PASTEMAC(d,fprintm)( stdout, "trsmnat_ll_ker_var2: b_r after", k, n, \ ( double* )b, 2*rs_b, 2, "%4.1f", "" ); \ PASTEMAC(d,fprintm)( stdout, "trsmnat_ll_ker_var2: b_i after", k, n, \ ( double* )b+1, 2*rs_b, 2, "%4.1f", "" ); \ } \ PASTEMAC(d,fprintm)( stdout, "trsm_ll_ker_var2: b_r", m, n, \ ( double* )c, 1, cs_c, "%4.1f", "" ); \ PASTEMAC(d,fprintm)( stdout, "trsm_ll_ker_var2: b_i", m, n, \ ( double* )c + 8*9, 1, cs_c, "%4.1f", "" ); \ */ \ \ /* PASTEMAC(ch,fprintm)( stdout, "trsm_ll_ker_var2: a1 (diag)", MR, k_a1011, a1, 1, MR, "%5.2f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "trsm_ll_ker_var2: a11 (diag)", MR, MR, a11, 1, MR, "%5.2f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "trsm_ll_ker_var2: b1 (diag)", k_a1011, NR, bp_i, NR, 1, "%5.2f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "trsm_ll_ker_var2: bp11 (diag)", MR, NR, bp11, NR, 1, "%5.2f", "" ); \ */ \ \ /* PASTEMAC(ch,fprintm)( stdout, "trsm_ll_ker_var2: a1 (ndiag)", MR, k, a1, 1, MR, "%5.2f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "trsm_ll_ker_var2: b1 (ndiag)", k, NR, bp, NR, 1, "%5.2f", "" ); \ */ \ } INSERT_GENTFUNC_BASIC0( trsm_ll_ker_var2 ) blis-0.6.1/frame/3/trsm/bli_trsm_lu_ker_var2.c000066400000000000000000000462341360743507500211650ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T gemm_fp typedef void (*FUNCPTR_T) ( doff_t diagoffa, pack_t schema_a, pack_t schema_b, dim_t m, dim_t n, dim_t k, void* alpha1, void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, void* alpha2, void* c, inc_t rs_c, inc_t cs_c, cntx_t* cntx, rntm_t* rntm, thrinfo_t* thread ); static FUNCPTR_T GENARRAY(ftypes,trsm_lu_ker_var2); void bli_trsm_lu_ker_var2 ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { num_t dt_exec = bli_obj_exec_dt( c ); doff_t diagoffa = bli_obj_diag_offset( a ); pack_t schema_a = bli_obj_pack_schema( a ); pack_t schema_b = bli_obj_pack_schema( b ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width( a ); void* buf_a = bli_obj_buffer_at_off( a ); inc_t cs_a = bli_obj_col_stride( a ); dim_t pd_a = bli_obj_panel_dim( a ); inc_t ps_a = bli_obj_panel_stride( a ); void* buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b = bli_obj_row_stride( b ); dim_t pd_b = bli_obj_panel_dim( b ); inc_t ps_b = bli_obj_panel_stride( b ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); void* buf_alpha1; void* buf_alpha2; FUNCPTR_T f; // Grab the address of the internal scalar buffer for the scalar // attached to B (the non-triangular matrix). This will be the alpha // scalar used in the gemmtrsm subproblems (ie: the scalar that would // be applied to the packed copy of B prior to it being updated by // the trsm subproblem). This scalar may be unit, if for example it // was applied during packing. buf_alpha1 = bli_obj_internal_scalar_buffer( b ); // Grab the address of the internal scalar buffer for the scalar // attached to C. This will be the "beta" scalar used in the gemm-only // subproblems that correspond to micro-panels that do not intersect // the diagonal. We need this separate scalar because it's possible // that the alpha attached to B was reset, if it was applied during // packing. buf_alpha2 = bli_obj_internal_scalar_buffer( c ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Invoke the function. f( diagoffa, schema_a, schema_b, m, n, k, buf_alpha1, buf_a, cs_a, pd_a, ps_a, buf_b, rs_b, pd_b, ps_b, buf_alpha2, buf_c, rs_c, cs_c, cntx, rntm, thread ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ doff_t diagoffa, \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha1, \ void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, \ void* alpha2, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* thread \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ /* Alias some constants to simpler names. */ \ const dim_t MR = pd_a; \ const dim_t NR = pd_b; \ const dim_t PACKMR = cs_a; \ const dim_t PACKNR = rs_b; \ \ /* Cast the micro-kernel address to its function pointer type. */ \ PASTECH(ch,gemmtrsm_ukr_ft) \ gemmtrsm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMMTRSM_U_UKR, cntx ); \ PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the original C matrix. For example, if C is column-stored, ct will be column-stored as well. */ \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict minus_one = PASTEMAC(ch,m1); \ ctype* restrict a_cast = a; \ ctype* restrict b_cast = b; \ ctype* restrict c_cast = c; \ ctype* restrict alpha1_cast = alpha1; \ ctype* restrict alpha2_cast = alpha2; \ ctype* restrict b1; \ ctype* restrict c1; \ \ doff_t diagoffa_i; \ dim_t k_full; \ dim_t m_iter, m_left; \ dim_t n_iter, n_left; \ dim_t m_cur; \ dim_t n_cur; \ dim_t k_a1112; \ dim_t k_a11; \ dim_t k_a12; \ dim_t off_a11; \ dim_t off_a12; \ dim_t i, j, ib; \ inc_t rstep_a; \ inc_t cstep_b; \ inc_t rstep_c, cstep_c; \ inc_t istep_a; \ inc_t istep_b; \ inc_t off_scl; \ inc_t ss_a_num; \ inc_t ss_a_den; \ inc_t ps_a_cur; \ inc_t is_a_cur; \ auxinfo_t aux; \ \ /* Assumptions/assertions: rs_a == 1 cs_a == PACKMR pd_a == MR ps_a == stride to next micro-panel of A rs_b == PACKNR cs_b == 1 pd_b == NR ps_b == stride to next micro-panel of B rs_c == (no assumptions) cs_c == (no assumptions) */ \ \ /* Safety trap: Certain indexing within this macro-kernel does not work as intended if both MR and NR are odd. */ \ if ( ( bli_is_odd( PACKMR ) && bli_is_odd( NR ) ) || \ ( bli_is_odd( PACKNR ) && bli_is_odd( MR ) ) ) bli_abort(); \ \ /* If any dimension is zero, return immediately. */ \ if ( bli_zero_dim3( m, n, k ) ) return; \ \ /* Safeguard: If matrix A is below the diagonal, it is implicitly zero. So we do nothing. */ \ if ( bli_is_strictly_below_diag_n( diagoffa, m, k ) ) return; \ \ /* Compute k_full as k inflated up to a multiple of MR. This is needed because some parameter combinations of trsm reduce k to advance past zero regions in the triangular matrix, and when computing the imaginary stride of B (the non-triangular matrix), which is used by 4m1/3m1 implementations, we need this unreduced value of k. */ \ k_full = ( k % MR != 0 ? k + MR - ( k % MR ) : k ); \ \ /* Compute indexing scaling factor for for 4m or 3m. This is needed because one of the packing register blocksizes (PACKMR or PACKNR) is used to index into the micro-panels of the non- triangular matrix when computing with a diagonal-intersecting micro-panel of the triangular matrix. In the case of 4m or 3m, real values are stored in both sub-panels, and so the indexing needs to occur in units of real values. The value computed here is divided into the complex pointer offset to cause the pointer to be advanced by the correct value. */ \ if ( bli_is_4mi_packed( schema_a ) || \ bli_is_3mi_packed( schema_a ) || \ bli_is_rih_packed( schema_a ) ) off_scl = 2; \ else off_scl = 1; \ \ /* Compute the storage stride scaling. Usually this is just 1. However, in the case of interleaved 3m, we need to scale the offset by 3/2. Note that real-only, imag-only, and summed-only packing formats are not applicable here since trsm is a two- operand operation only (unlike trmm, which is capable of three- operand). */ \ if ( bli_is_3mi_packed( schema_a ) ) { ss_a_num = 3; ss_a_den = 2; } \ else { ss_a_num = 1; ss_a_den = 1; } \ \ /* If there is a zero region to the left of where the diagonal of A intersects the top edge of the block, adjust the pointer to B and treat this case as if the diagonal offset were zero. Note that we don't need to adjust the pointer to A since packm would have simply skipped over the region that was not stored. */ \ if ( diagoffa > 0 ) \ { \ i = diagoffa; \ k = k - i; \ diagoffa = 0; \ b_cast = b_cast + ( i * PACKNR ) / off_scl; \ } \ \ /* If there is a zero region below where the diagonal of A intersects the right side of the block, shrink it to prevent "no-op" iterations from executing. */ \ if ( -diagoffa + k < m ) \ { \ m = -diagoffa + k; \ } \ \ /* Check the k dimension, which needs to be a multiple of MR. If k isn't a multiple of MR, we adjust it higher to satisfy the micro- kernel, which is expecting to perform an MR x MR triangular solve. This adjustment of k is consistent with what happened when A was packed: all of its bottom/right edges were zero-padded, and furthermore, the panel that stores the bottom-right corner of the matrix has its diagonal extended into the zero-padded region (as identity). This allows the trsm of that bottom-right panel to proceed without producing any infs or NaNs that would infect the "good" values of the corresponding block of B. */ \ if ( k % MR != 0 ) k += MR - ( k % MR ); \ \ /* NOTE: We don't need to check that m is a multiple of PACKMR since we know that the underlying buffer was already allocated to have an m dimension that is a multiple of PACKMR, with the region between the last row and the next multiple of MR zero-padded accordingly. */ \ \ /* Clear the temporary C buffer in case it has any infs or NaNs. */ \ PASTEMAC(ch,set0s_mxn)( MR, NR, \ ct, rs_ct, cs_ct ); \ \ /* Compute number of primary and leftover components of the m and n dimensions. */ \ n_iter = n / NR; \ n_left = n % NR; \ \ m_iter = m / MR; \ m_left = m % MR; \ \ if ( n_left ) ++n_iter; \ if ( m_left ) ++m_iter; \ \ /* Determine some increments used to step through A, B, and C. */ \ rstep_a = ps_a; \ \ cstep_b = ps_b; \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ \ istep_a = PACKMR * k; \ istep_b = PACKNR * k_full; \ \ if ( bli_is_odd( istep_a ) ) istep_a += 1; \ if ( bli_is_odd( istep_b ) ) istep_b += 1; \ \ /* Save the pack schemas of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_schema_a( schema_a, &aux ); \ bli_auxinfo_set_schema_b( schema_b, &aux ); \ \ /* Save the imaginary stride of B to the auxinfo_t object. */ \ bli_auxinfo_set_is_b( istep_b, &aux ); \ \ /* Save the desired output datatype (indicating no typecasting). */ \ /*bli_auxinfo_set_dt_on_output( dt, &aux );*/ \ \ /* We don't bother querying the thrinfo_t node for the 1st loop because we can't parallelize that loop in trsm due to the inter-iteration dependencies that exist. */ \ /*thrinfo_t* caucus = bli_thrinfo_sub_node( thread );*/ \ \ /* Query the number of threads and thread ids for each loop. */ \ dim_t jr_nt = bli_thread_n_way( thread ); \ dim_t jr_tid = bli_thread_work_id( thread ); \ \ dim_t jr_start, jr_end; \ dim_t jr_inc; \ \ /* Determine the thread range and increment for the 2nd loop. NOTE: The definition of bli_thread_range_jrir() will depend on whether slab or round-robin partitioning was requested at configure-time. NOTE: Parallelism in the 1st loop is unattainable due to the inter-iteration dependencies present in trsm. */ \ bli_thread_range_jrir( thread, n_iter, 1, FALSE, &jr_start, &jr_end, &jr_inc ); \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = jr_start; j < jr_end; j += jr_inc ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ b1 = b_cast + j * cstep_b; \ c1 = c_cast + j * cstep_c; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ a1 = a_cast; \ c11 = c1 + (m_iter-1)*rstep_c; \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( ib = 0; ib < m_iter; ++ib ) \ { \ i = m_iter - 1 - ib; \ diagoffa_i = diagoffa + ( doff_t )i*MR; \ \ m_cur = ( bli_is_not_edge_b( ib, m_iter, m_left ) ? MR : m_left ); \ \ /* If the current panel of A intersects the diagonal, use a special micro-kernel that performs a fused gemm and trsm. If the current panel of A resides above the diagonal, use a a regular gemm micro-kernel. Otherwise, if it is below the diagonal, it was not packed (because it is implicitly zero) and so we do nothing. */ \ if ( bli_intersects_diag_n( diagoffa_i, MR, k ) ) \ { \ ctype* restrict a11; \ ctype* restrict a12; \ ctype* restrict b11; \ ctype* restrict b21; \ ctype* restrict a2; \ \ /* Compute various offsets into and lengths of parts of A. */ \ off_a11 = diagoffa_i; \ k_a1112 = k - off_a11;; \ k_a11 = MR; \ k_a12 = k_a1112 - MR; \ off_a12 = off_a11 + k_a11; \ \ /* Compute the panel stride for the current diagonal- intersecting micro-panel. */ \ is_a_cur = k_a1112 * PACKMR; \ is_a_cur += ( bli_is_odd( is_a_cur ) ? 1 : 0 ); \ ps_a_cur = ( is_a_cur * ss_a_num ) / ss_a_den; \ \ /* Compute the addresses of the triangular block A11 and the panel A12. */ \ a11 = a1; \ /* a12 = a1 + ( k_a11 * PACKMR ) / off_scl; */ \ a12 = bli_ptr_inc_by_frac( a1, sizeof( ctype ), k_a11 * PACKMR, off_scl ); \ \ /* Compute the addresses of the panel B01 and the block B11. */ \ b11 = b1 + ( off_a11 * PACKNR ) / off_scl; \ b21 = b1 + ( off_a12 * PACKNR ) / off_scl; \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = a1 + ps_a_cur; \ if ( bli_is_last_iter_rr( ib, m_iter, 0, 1 ) ) \ { \ a2 = a_cast; \ b2 = b1; \ if ( bli_is_last_iter( j, n_iter, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Save the 4m1/3m1 imaginary stride of A to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( is_a_cur, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the fused gemm/trsm micro-kernel. */ \ gemmtrsm_ukr \ ( \ k_a12, \ alpha1_cast, \ a12, \ a11, \ b21, \ b11, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the fused gemm/trsm micro-kernel. */ \ gemmtrsm_ukr \ ( \ k_a12, \ alpha1_cast, \ a12, \ a11, \ b21, \ b11, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Copy the result to the bottom edge of C. */ \ PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ c11, rs_c, cs_c ); \ } \ \ a1 += ps_a_cur; \ } \ else if ( bli_is_strictly_above_diag_n( diagoffa_i, MR, k ) ) \ { \ ctype* restrict a2; \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = a1 + rstep_a; \ if ( bli_is_last_iter_rr( ib, m_iter, 0, 1 ) ) \ { \ a2 = a_cast; \ b2 = b1; \ if ( bli_is_last_iter( j, n_iter, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Save the 4m1/3m1 imaginary stride of A to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( istep_a, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ minus_one, \ a1, \ b1, \ alpha2_cast, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ minus_one, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Add the result to the edge of C. */ \ PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ alpha2_cast, \ c11, rs_c, cs_c ); \ } \ \ a1 += rstep_a; \ } \ \ c11 -= rstep_c; \ } \ } \ \ /* PASTEMAC(ch,fprintm)( stdout, "trsm_lu_ker_var2: a1 (diag)", MR, k_a1112, a1, 1, MR, "%5.2f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "trsm_lu_ker_var2: b11 (diag)", MR, NR, b11, NR, 1, "%6.3f", "" ); \ printf( "m_iter = %lu\n", m_iter ); \ printf( "m_cur = %lu\n", m_cur ); \ printf( "k = %lu\n", k ); \ printf( "diagoffa_i = %lu\n", diagoffa_i ); \ printf( "off_a1112 = %lu\n", off_a1112 ); \ printf( "k_a1112 = %lu\n", k_a1112 ); \ printf( "k_a12 = %lu\n", k_a12 ); \ printf( "k_a11 = %lu\n", k_a11 ); \ printf( "rs_c,cs_c = %lu %lu\n", rs_c, cs_c ); \ printf( "rs_ct,cs_ct= %lu %lu\n", rs_ct, cs_ct ); \ */ \ \ /* PASTEMAC(ch,fprintm)( stdout, "trsm_lu_ker_var2: b11 after (diag)", MR, NR, b11, NR, 1, "%5.2f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "trsm_lu_ker_var2: b11 after (diag)", MR, NR, b11, NR, 1, "%5.2f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "trsm_lu_ker_var2: ct after (diag)", m_cur, n_cur, ct, rs_ct, cs_ct, "%5.2f", "" ); \ */ \ } INSERT_GENTFUNC_BASIC0( trsm_lu_ker_var2 ) blis-0.6.1/frame/3/trsm/bli_trsm_packab.c000066400000000000000000000054301360743507500201640ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_trsm_packa ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { obj_t a_pack; // Pack matrix A according to the control tree node. bli_l3_packm ( a, &a_pack, cntx, rntm, cntl, thread ); // Proceed with execution using packed matrix A. bli_trsm_int ( &BLIS_ONE, &a_pack, b, &BLIS_ONE, c, cntx, rntm, bli_cntl_sub_node( cntl ), bli_thrinfo_sub_node( thread ) ); } // ----------------------------------------------------------------------------- void bli_trsm_packb ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { obj_t b_pack; // Pack matrix B according to the control tree node. bli_l3_packm ( b, &b_pack, cntx, rntm, cntl, thread ); // Proceed with execution using packed matrix B. bli_trsm_int ( &BLIS_ONE, a, &b_pack, &BLIS_ONE, c, cntx, rntm, bli_cntl_sub_node( cntl ), bli_thrinfo_sub_node( thread ) ); } blis-0.6.1/frame/3/trsm/bli_trsm_rl_ker_var2.c000066400000000000000000000471371360743507500211650ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T gemm_fp typedef void (*FUNCPTR_T) ( doff_t diagoffb, pack_t schema_a, pack_t schema_b, dim_t m, dim_t n, dim_t k, void* alpha1, void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, void* alpha2, void* c, inc_t rs_c, inc_t cs_c, cntx_t* cntx, rntm_t* rntm, thrinfo_t* thread ); static FUNCPTR_T GENARRAY(ftypes,trsm_rl_ker_var2); void bli_trsm_rl_ker_var2 ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { num_t dt_exec = bli_obj_exec_dt( c ); doff_t diagoffb = bli_obj_diag_offset( b ); pack_t schema_a = bli_obj_pack_schema( a ); pack_t schema_b = bli_obj_pack_schema( b ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width( a ); void* buf_a = bli_obj_buffer_at_off( a ); inc_t cs_a = bli_obj_col_stride( a ); dim_t pd_a = bli_obj_panel_dim( a ); inc_t ps_a = bli_obj_panel_stride( a ); void* buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b = bli_obj_row_stride( b ); dim_t pd_b = bli_obj_panel_dim( b ); inc_t ps_b = bli_obj_panel_stride( b ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); void* buf_alpha1; void* buf_alpha2; FUNCPTR_T f; // Grab the address of the internal scalar buffer for the scalar // attached to A (the non-triangular matrix). This will be the alpha // scalar used in the gemmtrsm subproblems (ie: the scalar that would // be applied to the packed copy of A prior to it being updated by // the trsm subproblem). This scalar may be unit, if for example it // was applied during packing. buf_alpha1 = bli_obj_internal_scalar_buffer( a ); // Grab the address of the internal scalar buffer for the scalar // attached to C. This will be the "beta" scalar used in the gemm-only // subproblems that correspond to micro-panels that do not intersect // the diagonal. We need this separate scalar because it's possible // that the alpha attached to B was reset, if it was applied during // packing. buf_alpha2 = bli_obj_internal_scalar_buffer( c ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Invoke the function. f( diagoffb, schema_a, schema_b, m, n, k, buf_alpha1, buf_a, cs_a, pd_a, ps_a, buf_b, rs_b, pd_b, ps_b, buf_alpha2, buf_c, rs_c, cs_c, cntx, rntm, thread ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ doff_t diagoffb, \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha1, \ void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, \ void* alpha2, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* thread \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ /* Alias some constants to simpler names. */ \ const dim_t MR = pd_a; \ const dim_t NR = pd_b; \ const dim_t PACKMR = cs_a; \ const dim_t PACKNR = rs_b; \ \ /* Cast the micro-kernel address to its function pointer type. */ \ /* NOTE: We use the upper-triangular gemmtrsm ukernel because, while the current macro-kernel targets the "rl" case (right-side/lower- triangular), it becomes upper-triangular after the kernel operation is transposed so that all kernel instances are of the "left" variety (since those are the only trsm ukernels that exist). */ \ PASTECH(ch,gemmtrsm_ukr_ft) \ gemmtrsm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMMTRSM_U_UKR, cntx ); \ PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the original C matrix. For example, if C is column-stored, ct will be column-stored as well. */ \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict minus_one = PASTEMAC(ch,m1); \ ctype* restrict a_cast = a; \ ctype* restrict b_cast = b; \ ctype* restrict c_cast = c; \ ctype* restrict alpha1_cast = alpha1; \ ctype* restrict alpha2_cast = alpha2; \ ctype* restrict b1; \ ctype* restrict c1; \ \ doff_t diagoffb_j; \ dim_t k_full; \ dim_t m_iter, m_left; \ dim_t n_iter, n_left; \ dim_t m_cur; \ dim_t n_cur; \ dim_t k_b1121; \ dim_t k_b11; \ dim_t k_b21; \ dim_t off_b11; \ dim_t off_b21; \ dim_t i, j, jb; \ inc_t rstep_a; \ inc_t cstep_b; \ inc_t rstep_c, cstep_c; \ inc_t istep_a; \ inc_t istep_b; \ inc_t off_scl; \ inc_t ss_b_num; \ inc_t ss_b_den; \ inc_t ps_b_cur; \ inc_t is_b_cur; \ auxinfo_t aux; \ \ /* Assumptions/assertions: rs_a == 1 cs_a == PACKNR pd_a == NR ps_a == stride to next micro-panel of A rs_b == PACKMR cs_b == 1 pd_b == MR ps_b == stride to next micro-panel of B rs_c == (no assumptions) cs_c == (no assumptions) Note that MR/NR and PACKMR/PACKNR have been swapped to reflect the swapping of values in the control tree (ie: those values used when packing). This swapping is needed since we cast right-hand trsm in terms of transposed left-hand trsm. So, if we're going to be transposing the operation, then A needs to be packed with NR and B needs to be packed with MR (remember: B is the triangular matrix in the right-hand side parameter case). */ \ \ /* Safety trap: Certain indexing within this macro-kernel does not work as intended if both MR and NR are odd. */ \ if ( ( bli_is_odd( PACKMR ) && bli_is_odd( NR ) ) || \ ( bli_is_odd( PACKNR ) && bli_is_odd( MR ) ) ) bli_abort(); \ \ /* If any dimension is zero, return immediately. */ \ if ( bli_zero_dim3( m, n, k ) ) return; \ \ /* Safeguard: If the current panel of B is entirely above its diagonal, it is implicitly zero. So we do nothing. */ \ if ( bli_is_strictly_above_diag_n( diagoffb, k, n ) ) return; \ \ /* Compute k_full as k inflated up to a multiple of NR. This is needed because some parameter combinations of trsm reduce k to advance past zero regions in the triangular matrix, and when computing the imaginary stride of B (the non-triangular matrix), which is used by 4m1/3m1 implementations, we need this unreduced value of k. */ \ k_full = ( k % NR != 0 ? k + NR - ( k % NR ) : k ); \ \ /* Compute indexing scaling factor for for 4m or 3m. This is needed because one of the packing register blocksizes (PACKMR or PACKNR) is used to index into the micro-panels of the non- triangular matrix when computing with a diagonal-intersecting micro-panel of the triangular matrix. In the case of 4m or 3m, real values are stored in both sub-panels, and so the indexing needs to occur in units of real values. The value computed here is divided into the complex pointer offset to cause the pointer to be advanced by the correct value. */ \ if ( bli_is_4mi_packed( schema_b ) || \ bli_is_3mi_packed( schema_b ) || \ bli_is_rih_packed( schema_b ) ) off_scl = 2; \ else off_scl = 1; \ \ /* Compute the storage stride scaling. Usually this is just 1. However, in the case of interleaved 3m, we need to scale the offset by 3/2. Note that real-only, imag-only, and summed-only packing formats are not applicable here since trsm is a two- operand operation only (unlike trmm, which is capable of three- operand). */ \ if ( bli_is_3mi_packed( schema_b ) ) { ss_b_num = 3; ss_b_den = 2; } \ else { ss_b_num = 1; ss_b_den = 1; } \ \ /* If there is a zero region above where the diagonal of B intersects the left edge of the panel, adjust the pointer to A and treat this case as if the diagonal offset were zero. Note that we don't need to adjust the pointer to B since packm would have simply skipped over the region that was not stored. */ \ if ( diagoffb < 0 ) \ { \ j = -diagoffb; \ k = k - j; \ diagoffb = 0; \ a_cast = a_cast + ( j * PACKMR ) / off_scl; \ } \ \ /* If there is a zero region to the right of where the diagonal of B intersects the bottom of the panel, shrink it so that we can index to the correct place in C (corresponding to the part of the panel of B that was packed). NOTE: This is NOT being done to skip over "no-op" iterations, as with the trsm_lu macro-kernel. This MUST be done for correct execution because we use n (via n_iter) to compute diagonal and index offsets for backwards movement through B. */ \ if ( diagoffb + k < n ) \ { \ n = diagoffb + k; \ } \ \ /* Check the k dimension, which needs to be a multiple of NR. If k isn't a multiple of NR, we adjust it higher to satisfy the micro- kernel, which is expecting to perform an NR x NR triangular solve. This adjustment of k is consistent with what happened when B was packed: all of its bottom/right edges were zero-padded, and furthermore, the panel that stores the bottom-right corner of the matrix has its diagonal extended into the zero-padded region (as identity). This allows the trsm of that bottom-right panel to proceed without producing any infs or NaNs that would infect the "good" values of the corresponding block of A. */ \ if ( k % NR != 0 ) k += NR - ( k % NR ); \ \ /* NOTE: We don't need to check that n is a multiple of PACKNR since we know that the underlying buffer was already allocated to have an n dimension that is a multiple of PACKNR, with the region between the last column and the next multiple of NR zero-padded accordingly. */ \ \ /* Clear the temporary C buffer in case it has any infs or NaNs. */ \ PASTEMAC(ch,set0s_mxn)( MR, NR, \ ct, rs_ct, cs_ct ); \ \ /* Compute number of primary and leftover components of the m and n dimensions. */ \ n_iter = n / NR; \ n_left = n % NR; \ \ m_iter = m / MR; \ m_left = m % MR; \ \ if ( n_left ) ++n_iter; \ if ( m_left ) ++m_iter; \ \ /* Determine some increments used to step through A, B, and C. */ \ rstep_a = ps_a; \ \ cstep_b = ps_b; \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ \ istep_a = PACKMR * k_full; \ istep_b = PACKNR * k; \ \ if ( bli_is_odd( istep_a ) ) istep_a += 1; \ if ( bli_is_odd( istep_b ) ) istep_b += 1; \ \ /* Save the pack schemas of A and B to the auxinfo_t object. NOTE: We swap the values for A and B since the triangular "A" matrix is actually contained within B. */ \ bli_auxinfo_set_schema_a( schema_b, &aux ); \ bli_auxinfo_set_schema_b( schema_a, &aux ); \ \ /* Save the imaginary stride of A to the auxinfo_t object. NOTE: We swap the values for A and B since the triangular "A" matrix is actually contained within B. */ \ bli_auxinfo_set_is_b( istep_a, &aux ); \ \ /* Save the desired output datatype (indicating no typecasting). */ \ /*bli_auxinfo_set_dt_on_output( dt, &aux );*/ \ \ b1 = b_cast; \ c1 = c_cast; \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( jb = 0; jb < n_iter; ++jb ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b11; \ ctype* restrict b21; \ ctype* restrict b2; \ \ j = n_iter - 1 - jb; \ diagoffb_j = diagoffb - ( doff_t )j*NR; \ a1 = a_cast; \ c11 = c1 + (n_iter-1)*cstep_c; \ \ n_cur = ( bli_is_not_edge_b( jb, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ /* If the current panel of B intersects the diagonal, use a special micro-kernel that performs a fused gemm and trsm. If the current panel of B resides below the diagonal, use a a regular gemm micro-kernel. Otherwise, if it is above the diagonal, it was not packed (because it is implicitly zero) and so we do nothing. */ \ if ( bli_intersects_diag_n( diagoffb_j, k, NR ) ) \ { \ /* Determine the offset to and length of the panel that was packed so we can index into the corresponding location in A. */ \ off_b11 = bli_max( -diagoffb_j, 0 ); \ k_b1121 = k - off_b11; \ k_b11 = NR; \ k_b21 = k_b1121 - NR; \ off_b21 = off_b11 + k_b11; \ \ /* Compute the addresses of the triangular block B11 and the panel B21. */ \ b11 = b1; \ /* b21 = b1 + ( k_b11 * PACKNR ) / off_scl; */ \ b21 = bli_ptr_inc_by_frac( b1, sizeof( ctype ), k_b11 * PACKNR, off_scl ); \ \ /* Compute the panel stride for the current micro-panel. */ \ is_b_cur = k_b1121 * PACKNR; \ is_b_cur += ( bli_is_odd( is_b_cur ) ? 1 : 0 ); \ ps_b_cur = ( is_b_cur * ss_b_num ) / ss_b_den; \ \ /* Save the 4m1/3m1 imaginary stride of B to the auxinfo_t object. NOTE: We swap the values for A and B since the triangular "A" matrix is actually contained within B. */ \ bli_auxinfo_set_is_a( is_b_cur, &aux ); \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = 0; i < m_iter; ++i ) \ { \ if ( bli_trsm_my_iter_rr( i, thread ) ){ \ \ ctype* restrict a11; \ ctype* restrict a12; \ ctype* restrict a2; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* Compute the addresses of the A11 block and A12 panel. */ \ a11 = a1 + ( off_b11 * PACKMR ) / off_scl; \ a12 = a1 + ( off_b21 * PACKMR ) / off_scl; \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = a1; \ /*if ( bli_is_last_iter_rr( i, m_iter, 0, 1 ) ) */\ if ( i + bli_thread_num_threads(thread) >= m_iter ) \ { \ a2 = a_cast; \ b2 = b1 + ps_b_cur; \ if ( bli_is_last_iter_rr( jb, n_iter, 0, 1 ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. NOTE: We swap the values for A and B since the triangular "A" matrix is actually contained within B. */ \ bli_auxinfo_set_next_a( b2, &aux ); \ bli_auxinfo_set_next_b( a2, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the fused gemm/trsm micro-kernel. */ \ gemmtrsm_ukr \ ( \ k_b21, \ alpha1_cast, \ b21, \ b11, \ a12, \ a11, \ c11, cs_c, rs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the fused gemm/trsm micro-kernel. */ \ gemmtrsm_ukr \ ( \ k_b21, \ alpha1_cast, \ b21, \ b11, \ a12, \ a11, \ ct, cs_ct, rs_ct, \ &aux, \ cntx \ ); \ \ /* Copy the result to the bottom edge of C. */ \ PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ c11, rs_c, cs_c ); \ } \ } \ \ a1 += rstep_a; \ c11 += rstep_c; \ } \ \ b1 += ps_b_cur; \ } \ else if ( bli_is_strictly_below_diag_n( diagoffb_j, k, NR ) ) \ { \ /* Save the 4m1/3m1 imaginary stride of B to the auxinfo_t object. NOTE: We swap the values for A and B since the triangular "A" matrix is actually contained within B. */ \ bli_auxinfo_set_is_a( istep_b, &aux ); \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = 0; i < m_iter; ++i ) \ { \ if ( bli_trsm_my_iter_rr( i, thread ) ){ \ \ ctype* restrict a2; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = a1; \ /*if ( bli_is_last_iter_rr( i, m_iter, 0, 1 ) ) */\ if ( i + bli_thread_num_threads(thread) >= m_iter ) \ { \ a2 = a_cast; \ b2 = b1 + cstep_b; \ if ( bli_is_last_iter_rr( jb, n_iter, 0, 1 ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. NOTE: We swap the values for A and B since the triangular "A" matrix is actually contained within B. */ \ bli_auxinfo_set_next_a( b2, &aux ); \ bli_auxinfo_set_next_b( a2, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ minus_one, \ b1, \ a1, \ alpha2_cast, \ c11, cs_c, rs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ minus_one, \ b1, \ a1, \ zero, \ ct, cs_ct, rs_ct, \ &aux, \ cntx \ ); \ \ /* Add the result to the edge of C. */ \ PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ alpha2_cast, \ c11, rs_c, cs_c ); \ } \ } \ \ a1 += rstep_a; \ c11 += rstep_c; \ } \ \ b1 += cstep_b; \ } \ \ c1 -= cstep_c; \ } \ } INSERT_GENTFUNC_BASIC0( trsm_rl_ker_var2 ) blis-0.6.1/frame/3/trsm/bli_trsm_ru_ker_var2.c000066400000000000000000000463371360743507500211770ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T gemm_fp typedef void (*FUNCPTR_T) ( doff_t diagoffb, pack_t schema_a, pack_t schema_b, dim_t m, dim_t n, dim_t k, void* alpha1, void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, void* alpha2, void* c, inc_t rs_c, inc_t cs_c, cntx_t* cntx, rntm_t* rntm, thrinfo_t* thread ); static FUNCPTR_T GENARRAY(ftypes,trsm_ru_ker_var2); void bli_trsm_ru_ker_var2 ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { num_t dt_exec = bli_obj_exec_dt( c ); doff_t diagoffb = bli_obj_diag_offset( b ); pack_t schema_a = bli_obj_pack_schema( a ); pack_t schema_b = bli_obj_pack_schema( b ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width( a ); void* buf_a = bli_obj_buffer_at_off( a ); inc_t cs_a = bli_obj_col_stride( a ); dim_t pd_a = bli_obj_panel_dim( a ); inc_t ps_a = bli_obj_panel_stride( a ); void* buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b = bli_obj_row_stride( b ); dim_t pd_b = bli_obj_panel_dim( b ); inc_t ps_b = bli_obj_panel_stride( b ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); void* buf_alpha1; void* buf_alpha2; FUNCPTR_T f; // Grab the address of the internal scalar buffer for the scalar // attached to A (the non-triangular matrix). This will be the alpha // scalar used in the gemmtrsm subproblems (ie: the scalar that would // be applied to the packed copy of A prior to it being updated by // the trsm subproblem). This scalar may be unit, if for example it // was applied during packing. buf_alpha1 = bli_obj_internal_scalar_buffer( a ); // Grab the address of the internal scalar buffer for the scalar // attached to C. This will be the "beta" scalar used in the gemm-only // subproblems that correspond to micro-panels that do not intersect // the diagonal. We need this separate scalar because it's possible // that the alpha attached to B was reset, if it was applied during // packing. buf_alpha2 = bli_obj_internal_scalar_buffer( c ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Invoke the function. f( diagoffb, schema_a, schema_b, m, n, k, buf_alpha1, buf_a, cs_a, pd_a, ps_a, buf_b, rs_b, pd_b, ps_b, buf_alpha2, buf_c, rs_c, cs_c, cntx, rntm, thread ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ doff_t diagoffb, \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha1, \ void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, \ void* alpha2, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* thread \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ /* Alias some constants to simpler names. */ \ const dim_t MR = pd_a; \ const dim_t NR = pd_b; \ const dim_t PACKMR = cs_a; \ const dim_t PACKNR = rs_b; \ \ /* Cast the micro-kernel address to its function pointer type. */ \ /* NOTE: We use the lower-triangular gemmtrsm ukernel because, while the current macro-kernel targets the "ru" case (right-side/upper- triangular), it becomes lower-triangular after the kernel operation is transposed so that all kernel instances are of the "left" variety (since those are the only trsm ukernels that exist). */ \ PASTECH(ch,gemmtrsm_ukr_ft) \ gemmtrsm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMMTRSM_L_UKR, cntx ); \ PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the original C matrix. For example, if C is column-stored, ct will be column-stored as well. */ \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict minus_one = PASTEMAC(ch,m1); \ ctype* restrict a_cast = a; \ ctype* restrict b_cast = b; \ ctype* restrict c_cast = c; \ ctype* restrict alpha1_cast = alpha1; \ ctype* restrict alpha2_cast = alpha2; \ ctype* restrict b1; \ ctype* restrict c1; \ \ doff_t diagoffb_j; \ dim_t k_full; \ dim_t m_iter, m_left; \ dim_t n_iter, n_left; \ dim_t m_cur; \ dim_t n_cur; \ dim_t k_b0111; \ dim_t k_b01; \ dim_t off_b01; \ dim_t off_b11; \ dim_t i, j; \ inc_t rstep_a; \ inc_t cstep_b; \ inc_t rstep_c, cstep_c; \ inc_t istep_a; \ inc_t istep_b; \ inc_t off_scl; \ inc_t ss_b_num; \ inc_t ss_b_den; \ inc_t ps_b_cur; \ inc_t is_b_cur; \ auxinfo_t aux; \ \ /* Assumptions/assertions: rs_a == 1 cs_a == PACKNR pd_a == NR ps_a == stride to next micro-panel of A rs_b == PACKMR cs_b == 1 pd_b == MR ps_b == stride to next micro-panel of B rs_c == (no assumptions) cs_c == (no assumptions) Note that MR/NR and PACKMR/PACKNR have been swapped to reflect the swapping of values in the control tree (ie: those values used when packing). This swapping is needed since we cast right-hand trsm in terms of transposed left-hand trsm. So, if we're going to be transposing the operation, then A needs to be packed with NR and B needs to be packed with MR (remember: B is the triangular matrix in the right-hand side parameter case). */ \ \ /* Safety trap: Certain indexing within this macro-kernel does not work as intended if both MR and NR are odd. */ \ if ( ( bli_is_odd( PACKMR ) && bli_is_odd( NR ) ) || \ ( bli_is_odd( PACKNR ) && bli_is_odd( MR ) ) ) bli_abort(); \ \ /* If any dimension is zero, return immediately. */ \ if ( bli_zero_dim3( m, n, k ) ) return; \ \ /* Safeguard: If the current panel of B is entirely below its diagonal, it is implicitly zero. So we do nothing. */ \ if ( bli_is_strictly_below_diag_n( diagoffb, k, n ) ) return; \ \ /* Compute k_full as k inflated up to a multiple of NR. This is needed because some parameter combinations of trsm reduce k to advance past zero regions in the triangular matrix, and when computing the imaginary stride of B (the non-triangular matrix), which is used by 4m1/3m1 implementations, we need this unreduced value of k. */ \ k_full = ( k % NR != 0 ? k + NR - ( k % NR ) : k ); \ \ /* Compute indexing scaling factor for for 4m or 3m. This is needed because one of the packing register blocksizes (PACKMR or PACKNR) is used to index into the micro-panels of the non- triangular matrix when computing with a diagonal-intersecting micro-panel of the triangular matrix. In the case of 4m or 3m, real values are stored in both sub-panels, and so the indexing needs to occur in units of real values. The value computed here is divided into the complex pointer offset to cause the pointer to be advanced by the correct value. */ \ if ( bli_is_4mi_packed( schema_b ) || \ bli_is_3mi_packed( schema_b ) || \ bli_is_rih_packed( schema_b ) ) off_scl = 2; \ else off_scl = 1; \ \ /* Compute the storage stride scaling. Usually this is just 1. However, in the case of interleaved 3m, we need to scale the offset by 3/2. Note that real-only, imag-only, and summed-only packing formats are not applicable here since trsm is a two- operand operation only (unlike trmm, which is capable of three- operand). */ \ if ( bli_is_3mi_packed( schema_b ) ) { ss_b_num = 3; ss_b_den = 2; } \ else { ss_b_num = 1; ss_b_den = 1; } \ \ /* If there is a zero region to the left of where the diagonal of B intersects the top edge of the panel, adjust the pointer to C and treat this case as if the diagonal offset were zero. This skips over the region that was not packed. (Note we assume the diagonal offset is a multiple of MR; this assumption will hold as long as the cache blocksizes are each a multiple of MR and NR.) */ \ if ( diagoffb > 0 ) \ { \ j = diagoffb; \ n = n - j; \ diagoffb = 0; \ c_cast = c_cast + (j )*cs_c; \ } \ \ /* If there is a zero region below where the diagonal of B intersects the right side of the block, shrink it to prevent "no-op" iterations from executing. */ \ if ( -diagoffb + n < k ) \ { \ k = -diagoffb + n; \ } \ \ /* Check the k dimension, which needs to be a multiple of NR. If k isn't a multiple of NR, we adjust it higher to satisfy the micro- kernel, which is expecting to perform an NR x NR triangular solve. This adjustment of k is consistent with what happened when B was packed: all of its bottom/right edges were zero-padded, and furthermore, the panel that stores the bottom-right corner of the matrix has its diagonal extended into the zero-padded region (as identity). This allows the trsm of that bottom-right panel to proceed without producing any infs or NaNs that would infect the "good" values of the corresponding block of A. */ \ if ( k % NR != 0 ) k += NR - ( k % NR ); \ \ /* NOTE: We don't need to check that n is a multiple of PACKNR since we know that the underlying buffer was already allocated to have an n dimension that is a multiple of PACKNR, with the region between the last column and the next multiple of NR zero-padded accordingly. */ \ \ /* Clear the temporary C buffer in case it has any infs or NaNs. */ \ PASTEMAC(ch,set0s_mxn)( MR, NR, \ ct, rs_ct, cs_ct ); \ \ /* Compute number of primary and leftover components of the m and n dimensions. */ \ n_iter = n / NR; \ n_left = n % NR; \ \ m_iter = m / MR; \ m_left = m % MR; \ \ if ( n_left ) ++n_iter; \ if ( m_left ) ++m_iter; \ \ /* Determine some increments used to step through A, B, and C. */ \ rstep_a = ps_a; \ \ cstep_b = ps_b; \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ \ istep_a = PACKMR * k_full; \ istep_b = PACKNR * k; \ \ if ( bli_is_odd( istep_a ) ) istep_a += 1; \ if ( bli_is_odd( istep_b ) ) istep_b += 1; \ \ /* Save the pack schemas of A and B to the auxinfo_t object. NOTE: We swap the values for A and B since the triangular "A" matrix is actually contained within B. */ \ bli_auxinfo_set_schema_a( schema_b, &aux ); \ bli_auxinfo_set_schema_b( schema_a, &aux ); \ \ /* Save the imaginary stride of A to the auxinfo_t object. NOTE: We swap the values for A and B since the triangular "A" matrix is actually contained within B. */ \ bli_auxinfo_set_is_b( istep_a, &aux ); \ \ /* Save the desired output datatype (indicating no typecasting). */ \ /*bli_auxinfo_set_dt_on_output( dt, &aux );*/ \ \ b1 = b_cast; \ c1 = c_cast; \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = 0; j < n_iter; ++j ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b01; \ ctype* restrict b11; \ ctype* restrict b2; \ \ diagoffb_j = diagoffb - ( doff_t )j*NR; \ a1 = a_cast; \ c11 = c1; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ /* If the current panel of B intersects the diagonal, use a special micro-kernel that performs a fused gemm and trsm. If the current panel of B resides above the diagonal, use a a regular gemm micro-kernel. Otherwise, if it is below the diagonal, it was not packed (because it is implicitly zero) and so we do nothing. */ \ if ( bli_intersects_diag_n( diagoffb_j, k, NR ) ) \ { \ /* Determine the offset to and length of the panel that was packed so we can index into the corresponding location in A. */ \ off_b01 = 0; \ k_b0111 = bli_min( k, -diagoffb_j + NR ); \ k_b01 = k_b0111 - NR; \ off_b11 = k_b01; \ \ /* Compute the addresses of the panel B10 and the triangular block B11. */ \ b01 = b1; \ /* b11 = b1 + ( k_b01 * PACKNR ) / off_scl; */ \ b11 = bli_ptr_inc_by_frac( b1, sizeof( ctype ), k_b01 * PACKNR, off_scl ); \ \ /* Compute the panel stride for the current micro-panel. */ \ is_b_cur = k_b0111 * PACKNR; \ is_b_cur += ( bli_is_odd( is_b_cur ) ? 1 : 0 ); \ ps_b_cur = ( is_b_cur * ss_b_num ) / ss_b_den; \ \ /* Save the 4m1/3m1 imaginary stride of B to the auxinfo_t object. NOTE: We swap the values for A and B since the triangular "A" matrix is actually contained within B. */ \ bli_auxinfo_set_is_a( is_b_cur, &aux ); \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = 0; i < m_iter; ++i ) \ { \ if ( bli_trsm_my_iter_rr( i, thread ) ){ \ \ ctype* restrict a10; \ ctype* restrict a11; \ ctype* restrict a2; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* Compute the addresses of the A10 panel and A11 block. */ \ a10 = a1 + ( off_b01 * PACKMR ) / off_scl; \ a11 = a1 + ( off_b11 * PACKMR ) / off_scl; \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = a1; \ /*if ( bli_is_last_iter_rr( i, m_iter, 0, 1 ) ) */\ if ( i + bli_thread_num_threads(thread) >= m_iter ) \ { \ a2 = a_cast; \ b2 = b1 + ps_b_cur; \ if ( bli_is_last_iter_rr( j, n_iter, 0, 1 ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. NOTE: We swap the values for A and B since the triangular "A" matrix is actually contained within B. */ \ bli_auxinfo_set_next_a( b2, &aux ); \ bli_auxinfo_set_next_b( a2, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the fused gemm/trsm micro-kernel. */ \ gemmtrsm_ukr \ ( \ k_b01, \ alpha1_cast, \ b01, \ b11, \ a10, \ a11, \ c11, cs_c, rs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the fused gemm/trsm micro-kernel. */ \ gemmtrsm_ukr \ ( \ k_b01, \ alpha1_cast, \ b01, \ b11, \ a10, \ a11, \ ct, cs_ct, rs_ct, \ &aux, \ cntx \ ); \ \ /* Copy the result to the bottom edge of C. */ \ PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ c11, rs_c, cs_c ); \ } \ } \ \ a1 += rstep_a; \ c11 += rstep_c; \ } \ \ b1 += ps_b_cur; \ } \ else if ( bli_is_strictly_above_diag_n( diagoffb_j, k, NR ) ) \ { \ /* Save the 4m1/3m1 imaginary stride of B to the auxinfo_t object. NOTE: We swap the values for A and B since the triangular "A" matrix is actually contained within B. */ \ bli_auxinfo_set_is_a( istep_b, &aux ); \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = 0; i < m_iter; ++i ) \ { \ if ( bli_trsm_my_iter_rr( i, thread ) ){ \ \ ctype* restrict a2; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = a1; \ /*if ( bli_is_last_iter_rr( i, m_iter, 0, 1 ) ) */\ if ( i + bli_thread_num_threads(thread) >= m_iter ) \ { \ a2 = a_cast; \ b2 = b1 + cstep_b; \ if ( bli_is_last_iter_rr( j, n_iter, 0, 1 ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. NOTE: We swap the values for A and B since the triangular "A" matrix is actually contained within B. */ \ bli_auxinfo_set_next_a( b2, &aux ); \ bli_auxinfo_set_next_b( a2, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ minus_one, \ b1, \ a1, \ alpha2_cast, \ c11, cs_c, rs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ minus_one, \ b1, \ a1, \ zero, \ ct, cs_ct, rs_ct, \ &aux, \ cntx \ ); \ \ /* Add the result to the edge of C. */ \ PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ alpha2_cast, \ c11, rs_c, cs_c ); \ } \ } \ \ a1 += rstep_a; \ c11 += rstep_c; \ } \ \ b1 += cstep_b; \ } \ \ c1 += cstep_c; \ } \ } INSERT_GENTFUNC_BASIC0( trsm_ru_ker_var2 ) blis-0.6.1/frame/3/trsm/bli_trsm_var.h000066400000000000000000000060271360743507500175430ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype object-based interfaces. // #undef GENPROT #define GENPROT( opname ) \ \ void PASTEMAC0(opname) \ ( \ obj_t* a, \ obj_t* b, \ obj_t* c, \ cntx_t* cntx, \ rntm_t* rntm, \ cntl_t* cntl, \ thrinfo_t* thread \ ); GENPROT( trsm_blk_var1 ) GENPROT( trsm_blk_var2 ) GENPROT( trsm_blk_var3 ) GENPROT( trsm_packa ) GENPROT( trsm_packb ) GENPROT( trsm_xx_ker_var2 ) GENPROT( trsm_ll_ker_var2 ) GENPROT( trsm_lu_ker_var2 ) GENPROT( trsm_rl_ker_var2 ) GENPROT( trsm_ru_ker_var2 ) // // Prototype BLAS-like interfaces with void pointer operands. // #undef GENTPROT #define GENTPROT( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ doff_t diagoff, \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha1, \ void* a, inc_t cs_a, \ dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, \ dim_t pd_b, inc_t ps_b, \ void* alpha2, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* thread \ ); INSERT_GENTPROT_BASIC0( trsm_ll_ker_var2 ) INSERT_GENTPROT_BASIC0( trsm_lu_ker_var2 ) INSERT_GENTPROT_BASIC0( trsm_rl_ker_var2 ) INSERT_GENTPROT_BASIC0( trsm_ru_ker_var2 ) blis-0.6.1/frame/3/trsm/bli_trsm_xx_ker_var2.c000066400000000000000000000054331360743507500212000ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" static trsm_var_oft vars[2][2] = { { bli_trsm_ll_ker_var2, bli_trsm_lu_ker_var2 }, { bli_trsm_rl_ker_var2, bli_trsm_ru_ker_var2 } }; void bli_trsm_xx_ker_var2 ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { bool_t side; bool_t uplo; trsm_var_oft f; // Set two bools: one based on the implied side parameter (the structure // of the root object) and one based on the uplo field of the triangular // matrix's root object (whether that is matrix A or matrix B). if ( bli_obj_root_is_triangular( a ) ) { side = 0; if ( bli_obj_root_is_lower( a ) ) uplo = 0; else uplo = 1; } else // if ( bli_obj_root_is_triangular( b ) ) { side = 1; if ( bli_obj_root_is_lower( b ) ) uplo = 0; else uplo = 1; } // Index into the variant array to extract the correct function pointer. f = vars[side][uplo]; // Call the macrokernel. f ( a, b, c, cntx, rntm, cntl, thread ); } blis-0.6.1/frame/3/trsm/other/000077500000000000000000000000001360743507500160235ustar00rootroot00000000000000blis-0.6.1/frame/3/trsm/other/bli_trsm_ll_ker_var2.c000066400000000000000000000464421360743507500222760ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T gemm_fp typedef void (*FUNCPTR_T) ( doff_t diagoffa, pack_t schema_a, pack_t schema_b, dim_t m, dim_t n, dim_t k, void* alpha1, void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, void* alpha2, void* c, inc_t rs_c, inc_t cs_c, cntx_t* cntx, rntm_t* rntm, thrinfo_t* thread ); static FUNCPTR_T GENARRAY(ftypes,trsm_ll_ker_var2); void bli_trsm_ll_ker_var2 ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { num_t dt_exec = bli_obj_exec_dt( c ); doff_t diagoffa = bli_obj_diag_offset( a ); pack_t schema_a = bli_obj_pack_schema( a ); pack_t schema_b = bli_obj_pack_schema( b ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width( a ); void* buf_a = bli_obj_buffer_at_off( a ); inc_t cs_a = bli_obj_col_stride( a ); dim_t pd_a = bli_obj_panel_dim( a ); inc_t ps_a = bli_obj_panel_stride( a ); void* buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b = bli_obj_row_stride( b ); dim_t pd_b = bli_obj_panel_dim( b ); inc_t ps_b = bli_obj_panel_stride( b ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); void* buf_alpha1; void* buf_alpha2; FUNCPTR_T f; // Grab the address of the internal scalar buffer for the scalar // attached to B (the non-triangular matrix). This will be the alpha // scalar used in the gemmtrsm subproblems (ie: the scalar that would // be applied to the packed copy of B prior to it being updated by // the trsm subproblem). This scalar may be unit, if for example it // was applied during packing. buf_alpha1 = bli_obj_internal_scalar_buffer( b ); // Grab the address of the internal scalar buffer for the scalar // attached to C. This will be the "beta" scalar used in the gemm-only // subproblems that correspond to micro-panels that do not intersect // the diagonal. We need this separate scalar because it's possible // that the alpha attached to B was reset, if it was applied during // packing. buf_alpha2 = bli_obj_internal_scalar_buffer( c ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Invoke the function. f( diagoffa, schema_a, schema_b, m, n, k, buf_alpha1, buf_a, cs_a, pd_a, ps_a, buf_b, rs_b, pd_b, ps_b, buf_alpha2, buf_c, rs_c, cs_c, cntx, rntm, thread ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ doff_t diagoffa, \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha1, \ void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, \ void* alpha2, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* thread \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ /* Alias some constants to simpler names. */ \ const dim_t MR = pd_a; \ const dim_t NR = pd_b; \ const dim_t PACKMR = cs_a; \ const dim_t PACKNR = rs_b; \ \ /* Cast the micro-kernel address to its function pointer type. */ \ PASTECH(ch,gemmtrsm_ukr_ft) \ gemmtrsm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMMTRSM_L_UKR, cntx ); \ PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the original C matrix. For example, if C is column-stored, ct will be column-stored as well. */ \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict minus_one = PASTEMAC(ch,m1); \ ctype* restrict a_cast = a; \ ctype* restrict b_cast = b; \ ctype* restrict c_cast = c; \ ctype* restrict alpha1_cast = alpha1; \ ctype* restrict alpha2_cast = alpha2; \ ctype* restrict b1; \ ctype* restrict c1; \ \ doff_t diagoffa_i; \ dim_t k_full; \ dim_t m_iter, m_left; \ dim_t n_iter, n_left; \ dim_t m_cur; \ dim_t n_cur; \ dim_t k_a1011; \ dim_t k_a10; \ dim_t off_a10; \ dim_t off_a11; \ dim_t i, j; \ inc_t rstep_a; \ inc_t cstep_b; \ inc_t rstep_c, cstep_c; \ inc_t istep_a; \ inc_t istep_b; \ inc_t off_scl; \ inc_t ss_a_num; \ inc_t ss_a_den; \ inc_t ps_a_cur; \ inc_t is_a_cur; \ auxinfo_t aux; \ \ /* Assumptions/assertions: rs_a == 1 cs_a == PACKMR pd_a == MR ps_a == stride to next micro-panel of A rs_b == PACKNR cs_b == 1 pd_b == NR ps_b == stride to next micro-panel of B rs_c == (no assumptions) cs_c == (no assumptions) */ \ \ /* Safety trap: Certain indexing within this macro-kernel does not work as intended if both MR and NR are odd. */ \ if ( ( bli_is_odd( PACKMR ) && bli_is_odd( NR ) ) || \ ( bli_is_odd( PACKNR ) && bli_is_odd( MR ) ) ) bli_abort(); \ \ /* If any dimension is zero, return immediately. */ \ if ( bli_zero_dim3( m, n, k ) ) return; \ \ /* Safeguard: If matrix A is above the diagonal, it is implicitly zero. So we do nothing. */ \ if ( bli_is_strictly_above_diag_n( diagoffa, m, k ) ) return; \ \ /* Compute k_full as k inflated up to a multiple of MR. This is needed because some parameter combinations of trsm reduce k to advance past zero regions in the triangular matrix, and when computing the imaginary stride of B (the non-triangular matrix), which is used by 4m1/3m1 implementations, we need this unreduced value of k. */ \ k_full = ( k % MR != 0 ? k + MR - ( k % MR ) : k ); \ \ /* Compute indexing scaling factor for for 4m or 3m. This is needed because one of the packing register blocksizes (PACKMR or PACKNR) is used to index into the micro-panels of the non- triangular matrix when computing with a diagonal-intersecting micro-panel of the triangular matrix. In the case of 4m or 3m, real values are stored in both sub-panels, and so the indexing needs to occur in units of real values. The value computed here is divided into the complex pointer offset to cause the pointer to be advanced by the correct value. */ \ if ( bli_is_4mi_packed( schema_a ) || \ bli_is_3mi_packed( schema_a ) || \ bli_is_rih_packed( schema_a ) ) off_scl = 2; \ else off_scl = 1; \ \ /* Compute the storage stride scaling. Usually this is just 1. However, in the case of interleaved 3m, we need to scale the offset by 3/2. Note that real-only, imag-only, and summed-only packing formats are not applicable here since trsm is a two- operand operation only (unlike trmm, which is capable of three- operand). */ \ if ( bli_is_3mi_packed( schema_a ) ) { ss_a_num = 3; ss_a_den = 2; } \ else { ss_a_num = 1; ss_a_den = 1; } \ \ /* If there is a zero region above where the diagonal of A intersects the left edge of the block, adjust the pointer to C and treat this case as if the diagonal offset were zero. This skips over the region that was not packed. (Note we assume the diagonal offset is a multiple of MR; this assumption will hold as long as the cache blocksizes are each a multiple of MR and NR.) */ \ if ( diagoffa < 0 ) \ { \ i = -diagoffa; \ m = m - i; \ diagoffa = 0; \ c_cast = c_cast + (i )*rs_c; \ } \ \ /* Check the k dimension, which needs to be a multiple of MR. If k isn't a multiple of MR, we adjust it higher to satisfy the micro- kernel, which is expecting to perform an MR x MR triangular solve. This adjustment of k is consistent with what happened when A was packed: all of its bottom/right edges were zero-padded, and furthermore, the panel that stores the bottom-right corner of the matrix has its diagonal extended into the zero-padded region (as identity). This allows the trsm of that bottom-right panel to proceed without producing any infs or NaNs that would infect the "good" values of the corresponding block of B. */ \ if ( k % MR != 0 ) k += MR - ( k % MR ); \ \ /* NOTE: We don't need to check that m is a multiple of PACKMR since we know that the underlying buffer was already allocated to have an m dimension that is a multiple of PACKMR, with the region between the last row and the next multiple of MR zero-padded accordingly. */ \ \ /* Clear the temporary C buffer in case it has any infs or NaNs. */ \ PASTEMAC(ch,set0s_mxn)( MR, NR, \ ct, rs_ct, cs_ct ); \ \ /* Compute number of primary and leftover components of the m and n dimensions. */ \ n_iter = n / NR; \ n_left = n % NR; \ \ m_iter = m / MR; \ m_left = m % MR; \ \ if ( n_left ) ++n_iter; \ if ( m_left ) ++m_iter; \ \ /* Determine some increments used to step through A, B, and C. */ \ rstep_a = ps_a; \ \ cstep_b = ps_b; \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ \ istep_a = PACKMR * k; \ istep_b = PACKNR * k_full; \ \ if ( bli_is_odd( istep_a ) ) istep_a += 1; \ if ( bli_is_odd( istep_b ) ) istep_b += 1; \ \ /* Save the pack schemas of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_schema_a( schema_a, &aux ); \ bli_auxinfo_set_schema_b( schema_b, &aux ); \ \ /* Save the imaginary stride of B to the auxinfo_t object. */ \ bli_auxinfo_set_is_b( istep_b, &aux ); \ \ b1 = b_cast; \ c1 = c_cast; \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = 0; j < n_iter; ++j ) \ { \ if( bli_trsm_my_iter( j, thread ) ) { \ \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ a1 = a_cast; \ c11 = c1 + (0 )*rstep_c; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = 0; i < m_iter; ++i ) \ { \ diagoffa_i = diagoffa + ( doff_t )i*MR; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* If the current panel of A intersects the diagonal, use a special micro-kernel that performs a fused gemm and trsm. If the current panel of A resides below the diagonal, use a a regular gemm micro-kernel. Otherwise, if it is above the diagonal, it was not packed (because it is implicitly zero) and so we do nothing. */ \ if ( bli_intersects_diag_n( diagoffa_i, MR, k ) ) \ { \ ctype* restrict a10; \ ctype* restrict a11; \ ctype* restrict b01; \ ctype* restrict b11; \ ctype* restrict a2; \ \ /* Compute various offsets into and lengths of parts of A. */ \ off_a10 = 0; \ k_a1011 = diagoffa_i + MR; \ k_a10 = k_a1011 - MR; \ off_a11 = k_a10; \ \ /* Compute the panel stride for the current diagonal- intersecting micro-panel. */ \ is_a_cur = k_a1011 * PACKMR; \ is_a_cur += ( bli_is_odd( is_a_cur ) ? 1 : 0 ); \ ps_a_cur = ( is_a_cur * ss_a_num ) / ss_a_den; \ \ /* Compute the addresses of the panel A10 and the triangular block A11. */ \ a10 = a1; \ /* a11 = a1 + ( k_a10 * PACKMR ) / off_scl; */ \ a11 = bli_ptr_inc_by_frac( a1, sizeof( ctype ), k_a10 * PACKMR, off_scl ); \ \ /* Compute the addresses of the panel B01 and the block B11. */ \ b01 = b1 + ( off_a10 * PACKNR ) / off_scl; \ b11 = b1 + ( off_a11 * PACKNR ) / off_scl; \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = a1 + ps_a_cur; \ if ( bli_is_last_iter( i, m_iter, 0, 1 ) ) \ { \ a2 = a_cast; \ b2 = b1; \ /*if ( bli_is_last_iter( j, n_iter, 0, 1 ) ) */\ if ( j + bli_thread_num_threads(thread) >= n_iter ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Save the 4m1/3m1 imaginary stride of A to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( is_a_cur, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the fused gemm/trsm micro-kernel. */ \ gemmtrsm_ukr \ ( \ k_a10, \ alpha1_cast, \ a10, \ a11, \ b01, \ b11, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the fused gemm/trsm micro-kernel. */ \ gemmtrsm_ukr \ ( \ k_a10, \ alpha1_cast, \ a10, \ a11, \ b01, \ b11, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Copy the result to the bottom edge of C. */ \ PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ c11, rs_c, cs_c ); \ } \ \ a1 += ps_a_cur; \ } \ else if ( bli_is_strictly_below_diag_n( diagoffa_i, MR, k ) ) \ { \ ctype* restrict a2; \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = a1 + rstep_a; \ if ( bli_is_last_iter( i, m_iter, 0, 1 ) ) \ { \ a2 = a_cast; \ b2 = b1; \ /*if ( bli_is_last_iter( j, n_iter, 0, 1 ) ) */\ if ( j + bli_thread_num_threads(thread) >= n_iter ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Save the 4m1/3m1 imaginary stride of A to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( istep_a, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ minus_one, \ a1, \ b1, \ alpha2_cast, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ minus_one, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Add the result to the edge of C. */ \ PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ alpha2_cast, \ c11, rs_c, cs_c ); \ } \ \ a1 += rstep_a; \ } \ \ c11 += rstep_c; \ } \ } \ \ b1 += cstep_b; \ c1 += cstep_c; \ } \ \ /* if ( bli_is_4mi_packed( schema_a ) ){ \ PASTEMAC(d,fprintm)( stdout, "trsm4m1_ll_ker_var2: b_r before", k, n, \ ( double* )b, rs_b, 1, "%4.1f", "" ); \ PASTEMAC(d,fprintm)( stdout, "trsm4m1_ll_ker_var2: b_i before", k, n, \ ( double* )b+72, rs_b, 1, "%4.1f", "" ); \ }else{ \ PASTEMAC(d,fprintm)( stdout, "trsmnat_ll_ker_var2: b_r before", k, n, \ ( double* )b, 2*rs_b, 2, "%4.1f", "" ); \ PASTEMAC(d,fprintm)( stdout, "trsmnat_ll_ker_var2: b_i before", k, n, \ ( double* )b+1, 2*rs_b, 2, "%4.1f", "" ); \ } \ */ \ \ /* PASTEMAC(d,fprintm)( stdout, "trsm_ll_ker_var2: a11p_r computed", MR, MR, \ ( double* )a11, 1, PACKMR, "%4.1f", "" ); \ */ \ \ /* if ( bli_is_4mi_packed( schema_a ) ){ \ PASTEMAC(d,fprintm)( stdout, "trsm4m1_ll_ker_var2: b_r after", k, n, \ ( double* )b, rs_b, 1, "%4.1f", "" ); \ PASTEMAC(d,fprintm)( stdout, "trsm4m1_ll_ker_var2: b_i after", k, n, \ ( double* )b+72, rs_b, 1, "%4.1f", "" ); \ }else{ \ PASTEMAC(d,fprintm)( stdout, "trsmnat_ll_ker_var2: b_r after", k, n, \ ( double* )b, 2*rs_b, 2, "%4.1f", "" ); \ PASTEMAC(d,fprintm)( stdout, "trsmnat_ll_ker_var2: b_i after", k, n, \ ( double* )b+1, 2*rs_b, 2, "%4.1f", "" ); \ } \ PASTEMAC(d,fprintm)( stdout, "trsm_ll_ker_var2: b_r", m, n, \ ( double* )c, 1, cs_c, "%4.1f", "" ); \ PASTEMAC(d,fprintm)( stdout, "trsm_ll_ker_var2: b_i", m, n, \ ( double* )c + 8*9, 1, cs_c, "%4.1f", "" ); \ */ \ \ /* PASTEMAC(ch,fprintm)( stdout, "trsm_ll_ker_var2: a1 (diag)", MR, k_a1011, a1, 1, MR, "%5.2f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "trsm_ll_ker_var2: a11 (diag)", MR, MR, a11, 1, MR, "%5.2f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "trsm_ll_ker_var2: b1 (diag)", k_a1011, NR, bp_i, NR, 1, "%5.2f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "trsm_ll_ker_var2: bp11 (diag)", MR, NR, bp11, NR, 1, "%5.2f", "" ); \ */ \ \ /* PASTEMAC(ch,fprintm)( stdout, "trsm_ll_ker_var2: a1 (ndiag)", MR, k, a1, 1, MR, "%5.2f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "trsm_ll_ker_var2: b1 (ndiag)", k, NR, bp, NR, 1, "%5.2f", "" ); \ */ \ } INSERT_GENTFUNC_BASIC0( trsm_ll_ker_var2 ) blis-0.6.1/frame/3/trsm/other/bli_trsm_ll_ker_var2rr.c000066400000000000000000000477131360743507500226440ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T gemm_fp typedef void (*FUNCPTR_T) ( doff_t diagoffa, pack_t schema_a, pack_t schema_b, dim_t m, dim_t n, dim_t k, void* alpha1, void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, void* alpha2, void* c, inc_t rs_c, inc_t cs_c, cntx_t* cntx, rntm_t* rntm, thrinfo_t* thread ); static FUNCPTR_T GENARRAY(ftypes,trsm_ll_ker_var2rr); // // -- Macrokernel functions for round-robin partitioning ----------------------- // void bli_trsm_ll_ker_var2rr ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { num_t dt_exec = bli_obj_exec_dt( c ); doff_t diagoffa = bli_obj_diag_offset( a ); pack_t schema_a = bli_obj_pack_schema( a ); pack_t schema_b = bli_obj_pack_schema( b ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width( a ); void* buf_a = bli_obj_buffer_at_off( a ); inc_t cs_a = bli_obj_col_stride( a ); dim_t pd_a = bli_obj_panel_dim( a ); inc_t ps_a = bli_obj_panel_stride( a ); void* buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b = bli_obj_row_stride( b ); dim_t pd_b = bli_obj_panel_dim( b ); inc_t ps_b = bli_obj_panel_stride( b ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); void* buf_alpha1; void* buf_alpha2; FUNCPTR_T f; // Grab the address of the internal scalar buffer for the scalar // attached to B (the non-triangular matrix). This will be the alpha // scalar used in the gemmtrsm subproblems (ie: the scalar that would // be applied to the packed copy of B prior to it being updated by // the trsm subproblem). This scalar may be unit, if for example it // was applied during packing. buf_alpha1 = bli_obj_internal_scalar_buffer( b ); // Grab the address of the internal scalar buffer for the scalar // attached to C. This will be the "beta" scalar used in the gemm-only // subproblems that correspond to micro-panels that do not intersect // the diagonal. We need this separate scalar because it's possible // that the alpha attached to B was reset, if it was applied during // packing. buf_alpha2 = bli_obj_internal_scalar_buffer( c ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Invoke the function. f( diagoffa, schema_a, schema_b, m, n, k, buf_alpha1, buf_a, cs_a, pd_a, ps_a, buf_b, rs_b, pd_b, ps_b, buf_alpha2, buf_c, rs_c, cs_c, cntx, rntm, thread ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ doff_t diagoffa, \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha1, \ void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, \ void* alpha2, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* thread \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ /* Alias some constants to simpler names. */ \ const dim_t MR = pd_a; \ const dim_t NR = pd_b; \ const dim_t PACKMR = cs_a; \ const dim_t PACKNR = rs_b; \ \ /* Cast the micro-kernel address to its function pointer type. */ \ PASTECH(ch,gemmtrsm_ukr_ft) \ gemmtrsm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMMTRSM_L_UKR, cntx ); \ PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the original C matrix. For example, if C is column-stored, ct will be column-stored as well. */ \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict minus_one = PASTEMAC(ch,m1); \ ctype* restrict a_cast = a; \ ctype* restrict b_cast = b; \ ctype* restrict c_cast = c; \ ctype* restrict alpha1_cast = alpha1; \ ctype* restrict alpha2_cast = alpha2; \ ctype* restrict b1; \ ctype* restrict c1; \ \ doff_t diagoffa_i; \ dim_t k_full; \ dim_t m_iter, m_left; \ dim_t n_iter, n_left; \ dim_t m_cur; \ dim_t n_cur; \ dim_t k_a1011; \ dim_t k_a10; \ dim_t off_a10; \ dim_t off_a11; \ dim_t i, j; \ inc_t rstep_a; \ inc_t cstep_b; \ inc_t rstep_c, cstep_c; \ inc_t istep_a; \ inc_t istep_b; \ inc_t off_scl; \ inc_t ss_a_num; \ inc_t ss_a_den; \ inc_t ps_a_cur; \ inc_t is_a_cur; \ auxinfo_t aux; \ \ /* Assumptions/assertions: rs_a == 1 cs_a == PACKMR pd_a == MR ps_a == stride to next micro-panel of A rs_b == PACKNR cs_b == 1 pd_b == NR ps_b == stride to next micro-panel of B rs_c == (no assumptions) cs_c == (no assumptions) */ \ \ /* Safety trap: Certain indexing within this macro-kernel does not work as intended if both MR and NR are odd. */ \ if ( ( bli_is_odd( PACKMR ) && bli_is_odd( NR ) ) || \ ( bli_is_odd( PACKNR ) && bli_is_odd( MR ) ) ) bli_abort(); \ \ /* If any dimension is zero, return immediately. */ \ if ( bli_zero_dim3( m, n, k ) ) return; \ \ /* Safeguard: If matrix A is above the diagonal, it is implicitly zero. So we do nothing. */ \ if ( bli_is_strictly_above_diag_n( diagoffa, m, k ) ) return; \ \ /* Compute k_full as k inflated up to a multiple of MR. This is needed because some parameter combinations of trsm reduce k to advance past zero regions in the triangular matrix, and when computing the imaginary stride of B (the non-triangular matrix), which is used by 4m1/3m1 implementations, we need this unreduced value of k. */ \ k_full = ( k % MR != 0 ? k + MR - ( k % MR ) : k ); \ \ /* Compute indexing scaling factor for for 4m or 3m. This is needed because one of the packing register blocksizes (PACKMR or PACKNR) is used to index into the micro-panels of the non- triangular matrix when computing with a diagonal-intersecting micro-panel of the triangular matrix. In the case of 4m or 3m, real values are stored in both sub-panels, and so the indexing needs to occur in units of real values. The value computed here is divided into the complex pointer offset to cause the pointer to be advanced by the correct value. */ \ if ( bli_is_4mi_packed( schema_a ) || \ bli_is_3mi_packed( schema_a ) || \ bli_is_rih_packed( schema_a ) ) off_scl = 2; \ else off_scl = 1; \ \ /* Compute the storage stride scaling. Usually this is just 1. However, in the case of interleaved 3m, we need to scale the offset by 3/2. Note that real-only, imag-only, and summed-only packing formats are not applicable here since trsm is a two- operand operation only (unlike trmm, which is capable of three- operand). */ \ if ( bli_is_3mi_packed( schema_a ) ) { ss_a_num = 3; ss_a_den = 2; } \ else { ss_a_num = 1; ss_a_den = 1; } \ \ /* If there is a zero region above where the diagonal of A intersects the left edge of the block, adjust the pointer to C and treat this case as if the diagonal offset were zero. This skips over the region that was not packed. (Note we assume the diagonal offset is a multiple of MR; this assumption will hold as long as the cache blocksizes are each a multiple of MR and NR.) */ \ if ( diagoffa < 0 ) \ { \ i = -diagoffa; \ m = m - i; \ diagoffa = 0; \ c_cast = c_cast + (i )*rs_c; \ } \ \ /* Check the k dimension, which needs to be a multiple of MR. If k isn't a multiple of MR, we adjust it higher to satisfy the micro- kernel, which is expecting to perform an MR x MR triangular solve. This adjustment of k is consistent with what happened when A was packed: all of its bottom/right edges were zero-padded, and furthermore, the panel that stores the bottom-right corner of the matrix has its diagonal extended into the zero-padded region (as identity). This allows the trsm of that bottom-right panel to proceed without producing any infs or NaNs that would infect the "good" values of the corresponding block of B. */ \ if ( k % MR != 0 ) k += MR - ( k % MR ); \ \ /* NOTE: We don't need to check that m is a multiple of PACKMR since we know that the underlying buffer was already allocated to have an m dimension that is a multiple of PACKMR, with the region between the last row and the next multiple of MR zero-padded accordingly. */ \ \ /* Clear the temporary C buffer in case it has any infs or NaNs. */ \ PASTEMAC(ch,set0s_mxn)( MR, NR, \ ct, rs_ct, cs_ct ); \ \ /* Compute number of primary and leftover components of the m and n dimensions. */ \ n_iter = n / NR; \ n_left = n % NR; \ \ m_iter = m / MR; \ m_left = m % MR; \ \ if ( n_left ) ++n_iter; \ if ( m_left ) ++m_iter; \ \ /* Determine some increments used to step through A, B, and C. */ \ rstep_a = ps_a; \ \ cstep_b = ps_b; \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ \ istep_a = PACKMR * k; \ istep_b = PACKNR * k_full; \ \ if ( bli_is_odd( istep_a ) ) istep_a += 1; \ if ( bli_is_odd( istep_b ) ) istep_b += 1; \ \ /* Save the pack schemas of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_schema_a( schema_a, &aux ); \ bli_auxinfo_set_schema_b( schema_b, &aux ); \ \ /* Save the imaginary stride of B to the auxinfo_t object. */ \ bli_auxinfo_set_is_b( istep_b, &aux ); \ \ /* We don't bother querying the thrinfo_t node for the 1st loop because we can't parallelize that loop in trsm due to the inter-iteration dependencies that exist. */ \ /*thrinfo_t* caucus = bli_thrinfo_sub_node( thread );*/ \ \ /* Query the number of threads and thread ids for each loop. */ \ dim_t jr_nt = bli_thread_n_way( thread ); \ dim_t jr_tid = bli_thread_work_id( thread ); \ \ dim_t jr_start, jr_end; \ dim_t jr_inc; \ \ /* Use round-robin assignment of micropanels to threads in the 2nd loop. NOTE: Parallelism in the 1st loop is unattainable due to the inter-iteration dependencies present in trsm. */ \ bli_thread_range_jrir_rr( thread, n_iter, 1, FALSE, &jr_start, &jr_end, &jr_inc ); \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = jr_start; j < jr_end; j += jr_inc ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ b1 = b_cast + j * cstep_b; \ c1 = c_cast + j * cstep_c; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ a1 = a_cast; \ c11 = c1 + (0 )*rstep_c; \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = 0; i < m_iter; ++i ) \ { \ diagoffa_i = diagoffa + ( doff_t )i*MR; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* If the current panel of A intersects the diagonal, use a special micro-kernel that performs a fused gemm and trsm. If the current panel of A resides below the diagonal, use a a regular gemm micro-kernel. Otherwise, if it is above the diagonal, it was not packed (because it is implicitly zero) and so we do nothing. */ \ if ( bli_intersects_diag_n( diagoffa_i, MR, k ) ) \ { \ ctype* restrict a10; \ ctype* restrict a11; \ ctype* restrict b01; \ ctype* restrict b11; \ ctype* restrict a2; \ \ /* Compute various offsets into and lengths of parts of A. */ \ off_a10 = 0; \ k_a1011 = diagoffa_i + MR; \ k_a10 = k_a1011 - MR; \ off_a11 = k_a10; \ \ /* Compute the panel stride for the current diagonal- intersecting micro-panel. */ \ is_a_cur = k_a1011 * PACKMR; \ is_a_cur += ( bli_is_odd( is_a_cur ) ? 1 : 0 ); \ ps_a_cur = ( is_a_cur * ss_a_num ) / ss_a_den; \ \ /* Compute the addresses of the panel A10 and the triangular block A11. */ \ a10 = a1; \ /* a11 = a1 + ( k_a10 * PACKMR ) / off_scl; */ \ a11 = bli_ptr_inc_by_frac( a1, sizeof( ctype ), k_a10 * PACKMR, off_scl ); \ \ /* Compute the addresses of the panel B01 and the block B11. */ \ b01 = b1 + ( off_a10 * PACKNR ) / off_scl; \ b11 = b1 + ( off_a11 * PACKNR ) / off_scl; \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = a1 + ps_a_cur; \ if ( bli_is_last_iter_rr( i, m_iter, 0, 1 ) ) \ { \ a2 = a_cast; \ b2 = b1; \ if ( bli_is_last_iter_rr( j, n_iter, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Save the 4m1/3m1 imaginary stride of A to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( is_a_cur, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the fused gemm/trsm micro-kernel. */ \ gemmtrsm_ukr \ ( \ k_a10, \ alpha1_cast, \ a10, \ a11, \ b01, \ b11, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the fused gemm/trsm micro-kernel. */ \ gemmtrsm_ukr \ ( \ k_a10, \ alpha1_cast, \ a10, \ a11, \ b01, \ b11, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Copy the result to the bottom edge of C. */ \ PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ c11, rs_c, cs_c ); \ } \ \ a1 += ps_a_cur; \ } \ else if ( bli_is_strictly_below_diag_n( diagoffa_i, MR, k ) ) \ { \ ctype* restrict a2; \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = a1 + rstep_a; \ if ( bli_is_last_iter_rr( i, m_iter, 0, 1 ) ) \ { \ a2 = a_cast; \ b2 = b1; \ if ( bli_is_last_iter_rr( j, n_iter, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Save the 4m1/3m1 imaginary stride of A to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( istep_a, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ minus_one, \ a1, \ b1, \ alpha2_cast, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ minus_one, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Add the result to the edge of C. */ \ PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ alpha2_cast, \ c11, rs_c, cs_c ); \ } \ \ a1 += rstep_a; \ } \ \ c11 += rstep_c; \ } \ } \ \ /* if ( bli_is_4mi_packed( schema_a ) ){ \ PASTEMAC(d,fprintm)( stdout, "trsm4m1_ll_ker_var2: b_r before", k, n, \ ( double* )b, rs_b, 1, "%4.1f", "" ); \ PASTEMAC(d,fprintm)( stdout, "trsm4m1_ll_ker_var2: b_i before", k, n, \ ( double* )b+72, rs_b, 1, "%4.1f", "" ); \ }else{ \ PASTEMAC(d,fprintm)( stdout, "trsmnat_ll_ker_var2: b_r before", k, n, \ ( double* )b, 2*rs_b, 2, "%4.1f", "" ); \ PASTEMAC(d,fprintm)( stdout, "trsmnat_ll_ker_var2: b_i before", k, n, \ ( double* )b+1, 2*rs_b, 2, "%4.1f", "" ); \ } \ */ \ \ /* PASTEMAC(d,fprintm)( stdout, "trsm_ll_ker_var2: a11p_r computed", MR, MR, \ ( double* )a11, 1, PACKMR, "%4.1f", "" ); \ */ \ \ /* if ( bli_is_4mi_packed( schema_a ) ){ \ PASTEMAC(d,fprintm)( stdout, "trsm4m1_ll_ker_var2: b_r after", k, n, \ ( double* )b, rs_b, 1, "%4.1f", "" ); \ PASTEMAC(d,fprintm)( stdout, "trsm4m1_ll_ker_var2: b_i after", k, n, \ ( double* )b+72, rs_b, 1, "%4.1f", "" ); \ }else{ \ PASTEMAC(d,fprintm)( stdout, "trsmnat_ll_ker_var2: b_r after", k, n, \ ( double* )b, 2*rs_b, 2, "%4.1f", "" ); \ PASTEMAC(d,fprintm)( stdout, "trsmnat_ll_ker_var2: b_i after", k, n, \ ( double* )b+1, 2*rs_b, 2, "%4.1f", "" ); \ } \ PASTEMAC(d,fprintm)( stdout, "trsm_ll_ker_var2: b_r", m, n, \ ( double* )c, 1, cs_c, "%4.1f", "" ); \ PASTEMAC(d,fprintm)( stdout, "trsm_ll_ker_var2: b_i", m, n, \ ( double* )c + 8*9, 1, cs_c, "%4.1f", "" ); \ */ \ \ /* PASTEMAC(ch,fprintm)( stdout, "trsm_ll_ker_var2: a1 (diag)", MR, k_a1011, a1, 1, MR, "%5.2f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "trsm_ll_ker_var2: a11 (diag)", MR, MR, a11, 1, MR, "%5.2f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "trsm_ll_ker_var2: b1 (diag)", k_a1011, NR, bp_i, NR, 1, "%5.2f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "trsm_ll_ker_var2: bp11 (diag)", MR, NR, bp11, NR, 1, "%5.2f", "" ); \ */ \ \ /* PASTEMAC(ch,fprintm)( stdout, "trsm_ll_ker_var2: a1 (ndiag)", MR, k, a1, 1, MR, "%5.2f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "trsm_ll_ker_var2: b1 (ndiag)", k, NR, bp, NR, 1, "%5.2f", "" ); \ */ \ } INSERT_GENTFUNC_BASIC0( trsm_ll_ker_var2rr ) blis-0.6.1/frame/3/trsm/other/bli_trsm_ll_ker_var2sl.c000066400000000000000000000477041360743507500226370ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T gemm_fp typedef void (*FUNCPTR_T) ( doff_t diagoffa, pack_t schema_a, pack_t schema_b, dim_t m, dim_t n, dim_t k, void* alpha1, void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, void* alpha2, void* c, inc_t rs_c, inc_t cs_c, cntx_t* cntx, rntm_t* rntm, thrinfo_t* thread ); static FUNCPTR_T GENARRAY(ftypes,trsm_ll_ker_var2sl); // // -- Macrokernel functions for slab partitioning ------------------------------ // void bli_trsm_ll_ker_var2sl ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { num_t dt_exec = bli_obj_exec_dt( c ); doff_t diagoffa = bli_obj_diag_offset( a ); pack_t schema_a = bli_obj_pack_schema( a ); pack_t schema_b = bli_obj_pack_schema( b ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width( a ); void* buf_a = bli_obj_buffer_at_off( a ); inc_t cs_a = bli_obj_col_stride( a ); dim_t pd_a = bli_obj_panel_dim( a ); inc_t ps_a = bli_obj_panel_stride( a ); void* buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b = bli_obj_row_stride( b ); dim_t pd_b = bli_obj_panel_dim( b ); inc_t ps_b = bli_obj_panel_stride( b ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); void* buf_alpha1; void* buf_alpha2; FUNCPTR_T f; // Grab the address of the internal scalar buffer for the scalar // attached to B (the non-triangular matrix). This will be the alpha // scalar used in the gemmtrsm subproblems (ie: the scalar that would // be applied to the packed copy of B prior to it being updated by // the trsm subproblem). This scalar may be unit, if for example it // was applied during packing. buf_alpha1 = bli_obj_internal_scalar_buffer( b ); // Grab the address of the internal scalar buffer for the scalar // attached to C. This will be the "beta" scalar used in the gemm-only // subproblems that correspond to micro-panels that do not intersect // the diagonal. We need this separate scalar because it's possible // that the alpha attached to B was reset, if it was applied during // packing. buf_alpha2 = bli_obj_internal_scalar_buffer( c ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Invoke the function. f( diagoffa, schema_a, schema_b, m, n, k, buf_alpha1, buf_a, cs_a, pd_a, ps_a, buf_b, rs_b, pd_b, ps_b, buf_alpha2, buf_c, rs_c, cs_c, cntx, rntm, thread ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ doff_t diagoffa, \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha1, \ void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, \ void* alpha2, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* thread \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ /* Alias some constants to simpler names. */ \ const dim_t MR = pd_a; \ const dim_t NR = pd_b; \ const dim_t PACKMR = cs_a; \ const dim_t PACKNR = rs_b; \ \ /* Cast the micro-kernel address to its function pointer type. */ \ PASTECH(ch,gemmtrsm_ukr_ft) \ gemmtrsm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMMTRSM_L_UKR, cntx ); \ PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the original C matrix. For example, if C is column-stored, ct will be column-stored as well. */ \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict minus_one = PASTEMAC(ch,m1); \ ctype* restrict a_cast = a; \ ctype* restrict b_cast = b; \ ctype* restrict c_cast = c; \ ctype* restrict alpha1_cast = alpha1; \ ctype* restrict alpha2_cast = alpha2; \ ctype* restrict b1; \ ctype* restrict c1; \ \ doff_t diagoffa_i; \ dim_t k_full; \ dim_t m_iter, m_left; \ dim_t n_iter, n_left; \ dim_t m_cur; \ dim_t n_cur; \ dim_t k_a1011; \ dim_t k_a10; \ dim_t off_a10; \ dim_t off_a11; \ dim_t i, j; \ inc_t rstep_a; \ inc_t cstep_b; \ inc_t rstep_c, cstep_c; \ inc_t istep_a; \ inc_t istep_b; \ inc_t off_scl; \ inc_t ss_a_num; \ inc_t ss_a_den; \ inc_t ps_a_cur; \ inc_t is_a_cur; \ auxinfo_t aux; \ \ /* Assumptions/assertions: rs_a == 1 cs_a == PACKMR pd_a == MR ps_a == stride to next micro-panel of A rs_b == PACKNR cs_b == 1 pd_b == NR ps_b == stride to next micro-panel of B rs_c == (no assumptions) cs_c == (no assumptions) */ \ \ /* Safety trap: Certain indexing within this macro-kernel does not work as intended if both MR and NR are odd. */ \ if ( ( bli_is_odd( PACKMR ) && bli_is_odd( NR ) ) || \ ( bli_is_odd( PACKNR ) && bli_is_odd( MR ) ) ) bli_abort(); \ \ /* If any dimension is zero, return immediately. */ \ if ( bli_zero_dim3( m, n, k ) ) return; \ \ /* Safeguard: If matrix A is above the diagonal, it is implicitly zero. So we do nothing. */ \ if ( bli_is_strictly_above_diag_n( diagoffa, m, k ) ) return; \ \ /* Compute k_full as k inflated up to a multiple of MR. This is needed because some parameter combinations of trsm reduce k to advance past zero regions in the triangular matrix, and when computing the imaginary stride of B (the non-triangular matrix), which is used by 4m1/3m1 implementations, we need this unreduced value of k. */ \ k_full = ( k % MR != 0 ? k + MR - ( k % MR ) : k ); \ \ /* Compute indexing scaling factor for for 4m or 3m. This is needed because one of the packing register blocksizes (PACKMR or PACKNR) is used to index into the micro-panels of the non- triangular matrix when computing with a diagonal-intersecting micro-panel of the triangular matrix. In the case of 4m or 3m, real values are stored in both sub-panels, and so the indexing needs to occur in units of real values. The value computed here is divided into the complex pointer offset to cause the pointer to be advanced by the correct value. */ \ if ( bli_is_4mi_packed( schema_a ) || \ bli_is_3mi_packed( schema_a ) || \ bli_is_rih_packed( schema_a ) ) off_scl = 2; \ else off_scl = 1; \ \ /* Compute the storage stride scaling. Usually this is just 1. However, in the case of interleaved 3m, we need to scale the offset by 3/2. Note that real-only, imag-only, and summed-only packing formats are not applicable here since trsm is a two- operand operation only (unlike trmm, which is capable of three- operand). */ \ if ( bli_is_3mi_packed( schema_a ) ) { ss_a_num = 3; ss_a_den = 2; } \ else { ss_a_num = 1; ss_a_den = 1; } \ \ /* If there is a zero region above where the diagonal of A intersects the left edge of the block, adjust the pointer to C and treat this case as if the diagonal offset were zero. This skips over the region that was not packed. (Note we assume the diagonal offset is a multiple of MR; this assumption will hold as long as the cache blocksizes are each a multiple of MR and NR.) */ \ if ( diagoffa < 0 ) \ { \ i = -diagoffa; \ m = m - i; \ diagoffa = 0; \ c_cast = c_cast + (i )*rs_c; \ } \ \ /* Check the k dimension, which needs to be a multiple of MR. If k isn't a multiple of MR, we adjust it higher to satisfy the micro- kernel, which is expecting to perform an MR x MR triangular solve. This adjustment of k is consistent with what happened when A was packed: all of its bottom/right edges were zero-padded, and furthermore, the panel that stores the bottom-right corner of the matrix has its diagonal extended into the zero-padded region (as identity). This allows the trsm of that bottom-right panel to proceed without producing any infs or NaNs that would infect the "good" values of the corresponding block of B. */ \ if ( k % MR != 0 ) k += MR - ( k % MR ); \ \ /* NOTE: We don't need to check that m is a multiple of PACKMR since we know that the underlying buffer was already allocated to have an m dimension that is a multiple of PACKMR, with the region between the last row and the next multiple of MR zero-padded accordingly. */ \ \ /* Clear the temporary C buffer in case it has any infs or NaNs. */ \ PASTEMAC(ch,set0s_mxn)( MR, NR, \ ct, rs_ct, cs_ct ); \ \ /* Compute number of primary and leftover components of the m and n dimensions. */ \ n_iter = n / NR; \ n_left = n % NR; \ \ m_iter = m / MR; \ m_left = m % MR; \ \ if ( n_left ) ++n_iter; \ if ( m_left ) ++m_iter; \ \ /* Determine some increments used to step through A, B, and C. */ \ rstep_a = ps_a; \ \ cstep_b = ps_b; \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ \ istep_a = PACKMR * k; \ istep_b = PACKNR * k_full; \ \ if ( bli_is_odd( istep_a ) ) istep_a += 1; \ if ( bli_is_odd( istep_b ) ) istep_b += 1; \ \ /* Save the pack schemas of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_schema_a( schema_a, &aux ); \ bli_auxinfo_set_schema_b( schema_b, &aux ); \ \ /* Save the imaginary stride of B to the auxinfo_t object. */ \ bli_auxinfo_set_is_b( istep_b, &aux ); \ \ /* We don't bother querying the thrinfo_t node for the 1st loop because we can't parallelize that loop in trsm due to the inter-iteration dependencies that exist. */ \ /*thrinfo_t* caucus = bli_thrinfo_sub_node( thread );*/ \ \ /* Query the number of threads and thread ids for each loop. */ \ dim_t jr_nt = bli_thread_n_way( thread ); \ dim_t jr_tid = bli_thread_work_id( thread ); \ \ dim_t jr_start, jr_end; \ dim_t jr_inc; \ \ /* Use slab assignment of micropanels to threads in the 2nd loop. NOTE: Parallelism in the 1st loop is unattainable due to the inter-iteration dependencies present in trsm. */ \ bli_thread_range_jrir_sl( thread, n_iter, 1, FALSE, &jr_start, &jr_end, &jr_inc ); \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = jr_start; j < jr_end; j += jr_inc ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ b1 = b_cast + j * cstep_b; \ c1 = c_cast + j * cstep_c; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ a1 = a_cast; \ c11 = c1 + (0 )*rstep_c; \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = 0; i < m_iter; ++i ) \ { \ diagoffa_i = diagoffa + ( doff_t )i*MR; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* If the current panel of A intersects the diagonal, use a special micro-kernel that performs a fused gemm and trsm. If the current panel of A resides below the diagonal, use a a regular gemm micro-kernel. Otherwise, if it is above the diagonal, it was not packed (because it is implicitly zero) and so we do nothing. */ \ if ( bli_intersects_diag_n( diagoffa_i, MR, k ) ) \ { \ ctype* restrict a10; \ ctype* restrict a11; \ ctype* restrict b01; \ ctype* restrict b11; \ ctype* restrict a2; \ \ /* Compute various offsets into and lengths of parts of A. */ \ off_a10 = 0; \ k_a1011 = diagoffa_i + MR; \ k_a10 = k_a1011 - MR; \ off_a11 = k_a10; \ \ /* Compute the panel stride for the current diagonal- intersecting micro-panel. */ \ is_a_cur = k_a1011 * PACKMR; \ is_a_cur += ( bli_is_odd( is_a_cur ) ? 1 : 0 ); \ ps_a_cur = ( is_a_cur * ss_a_num ) / ss_a_den; \ \ /* Compute the addresses of the panel A10 and the triangular block A11. */ \ a10 = a1; \ /* a11 = a1 + ( k_a10 * PACKMR ) / off_scl; */ \ a11 = bli_ptr_inc_by_frac( a1, sizeof( ctype ), k_a10 * PACKMR, off_scl ); \ \ /* Compute the addresses of the panel B01 and the block B11. */ \ b01 = b1 + ( off_a10 * PACKNR ) / off_scl; \ b11 = b1 + ( off_a11 * PACKNR ) / off_scl; \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = a1 + ps_a_cur; \ if ( bli_is_last_iter_rr( i, m_iter, 0, 1 ) ) \ { \ a2 = a_cast; \ b2 = b1; \ if ( bli_is_last_iter_sl( j, n_iter, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Save the 4m1/3m1 imaginary stride of A to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( is_a_cur, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the fused gemm/trsm micro-kernel. */ \ gemmtrsm_ukr \ ( \ k_a10, \ alpha1_cast, \ a10, \ a11, \ b01, \ b11, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the fused gemm/trsm micro-kernel. */ \ gemmtrsm_ukr \ ( \ k_a10, \ alpha1_cast, \ a10, \ a11, \ b01, \ b11, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Copy the result to the bottom edge of C. */ \ PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ c11, rs_c, cs_c ); \ } \ \ a1 += ps_a_cur; \ } \ else if ( bli_is_strictly_below_diag_n( diagoffa_i, MR, k ) ) \ { \ ctype* restrict a2; \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = a1 + rstep_a; \ if ( bli_is_last_iter_rr( i, m_iter, 0, 1 ) ) \ { \ a2 = a_cast; \ b2 = b1; \ if ( bli_is_last_iter_sl( j, n_iter, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Save the 4m1/3m1 imaginary stride of A to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( istep_a, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ minus_one, \ a1, \ b1, \ alpha2_cast, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ minus_one, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Add the result to the edge of C. */ \ PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ alpha2_cast, \ c11, rs_c, cs_c ); \ } \ \ a1 += rstep_a; \ } \ \ c11 += rstep_c; \ } \ } \ \ /* if ( bli_is_4mi_packed( schema_a ) ){ \ PASTEMAC(d,fprintm)( stdout, "trsm4m1_ll_ker_var2: b_r before", k, n, \ ( double* )b, rs_b, 1, "%4.1f", "" ); \ PASTEMAC(d,fprintm)( stdout, "trsm4m1_ll_ker_var2: b_i before", k, n, \ ( double* )b+72, rs_b, 1, "%4.1f", "" ); \ }else{ \ PASTEMAC(d,fprintm)( stdout, "trsmnat_ll_ker_var2: b_r before", k, n, \ ( double* )b, 2*rs_b, 2, "%4.1f", "" ); \ PASTEMAC(d,fprintm)( stdout, "trsmnat_ll_ker_var2: b_i before", k, n, \ ( double* )b+1, 2*rs_b, 2, "%4.1f", "" ); \ } \ */ \ \ /* PASTEMAC(d,fprintm)( stdout, "trsm_ll_ker_var2: a11p_r computed", MR, MR, \ ( double* )a11, 1, PACKMR, "%4.1f", "" ); \ */ \ \ /* if ( bli_is_4mi_packed( schema_a ) ){ \ PASTEMAC(d,fprintm)( stdout, "trsm4m1_ll_ker_var2: b_r after", k, n, \ ( double* )b, rs_b, 1, "%4.1f", "" ); \ PASTEMAC(d,fprintm)( stdout, "trsm4m1_ll_ker_var2: b_i after", k, n, \ ( double* )b+72, rs_b, 1, "%4.1f", "" ); \ }else{ \ PASTEMAC(d,fprintm)( stdout, "trsmnat_ll_ker_var2: b_r after", k, n, \ ( double* )b, 2*rs_b, 2, "%4.1f", "" ); \ PASTEMAC(d,fprintm)( stdout, "trsmnat_ll_ker_var2: b_i after", k, n, \ ( double* )b+1, 2*rs_b, 2, "%4.1f", "" ); \ } \ PASTEMAC(d,fprintm)( stdout, "trsm_ll_ker_var2: b_r", m, n, \ ( double* )c, 1, cs_c, "%4.1f", "" ); \ PASTEMAC(d,fprintm)( stdout, "trsm_ll_ker_var2: b_i", m, n, \ ( double* )c + 8*9, 1, cs_c, "%4.1f", "" ); \ */ \ \ /* PASTEMAC(ch,fprintm)( stdout, "trsm_ll_ker_var2: a1 (diag)", MR, k_a1011, a1, 1, MR, "%5.2f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "trsm_ll_ker_var2: a11 (diag)", MR, MR, a11, 1, MR, "%5.2f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "trsm_ll_ker_var2: b1 (diag)", k_a1011, NR, bp_i, NR, 1, "%5.2f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "trsm_ll_ker_var2: bp11 (diag)", MR, NR, bp11, NR, 1, "%5.2f", "" ); \ */ \ \ /* PASTEMAC(ch,fprintm)( stdout, "trsm_ll_ker_var2: a1 (ndiag)", MR, k, a1, 1, MR, "%5.2f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "trsm_ll_ker_var2: b1 (ndiag)", k, NR, bp, NR, 1, "%5.2f", "" ); \ */ \ } INSERT_GENTFUNC_BASIC0( trsm_ll_ker_var2sl ) blis-0.6.1/frame/3/trsm/other/bli_trsm_lu_ker_var2.c000066400000000000000000000445311360743507500223040ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T gemm_fp typedef void (*FUNCPTR_T) ( doff_t diagoffa, pack_t schema_a, pack_t schema_b, dim_t m, dim_t n, dim_t k, void* alpha1, void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, void* alpha2, void* c, inc_t rs_c, inc_t cs_c, cntx_t* cntx, rntm_t* rntm, thrinfo_t* thread ); static FUNCPTR_T GENARRAY(ftypes,trsm_lu_ker_var2); void bli_trsm_lu_ker_var2 ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { num_t dt_exec = bli_obj_exec_dt( c ); doff_t diagoffa = bli_obj_diag_offset( a ); pack_t schema_a = bli_obj_pack_schema( a ); pack_t schema_b = bli_obj_pack_schema( b ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width( a ); void* buf_a = bli_obj_buffer_at_off( a ); inc_t cs_a = bli_obj_col_stride( a ); dim_t pd_a = bli_obj_panel_dim( a ); inc_t ps_a = bli_obj_panel_stride( a ); void* buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b = bli_obj_row_stride( b ); dim_t pd_b = bli_obj_panel_dim( b ); inc_t ps_b = bli_obj_panel_stride( b ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); void* buf_alpha1; void* buf_alpha2; FUNCPTR_T f; // Grab the address of the internal scalar buffer for the scalar // attached to B (the non-triangular matrix). This will be the alpha // scalar used in the gemmtrsm subproblems (ie: the scalar that would // be applied to the packed copy of B prior to it being updated by // the trsm subproblem). This scalar may be unit, if for example it // was applied during packing. buf_alpha1 = bli_obj_internal_scalar_buffer( b ); // Grab the address of the internal scalar buffer for the scalar // attached to C. This will be the "beta" scalar used in the gemm-only // subproblems that correspond to micro-panels that do not intersect // the diagonal. We need this separate scalar because it's possible // that the alpha attached to B was reset, if it was applied during // packing. buf_alpha2 = bli_obj_internal_scalar_buffer( c ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Invoke the function. f( diagoffa, schema_a, schema_b, m, n, k, buf_alpha1, buf_a, cs_a, pd_a, ps_a, buf_b, rs_b, pd_b, ps_b, buf_alpha2, buf_c, rs_c, cs_c, cntx, rntm, thread ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ doff_t diagoffa, \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha1, \ void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, \ void* alpha2, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* thread \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ /* Alias some constants to simpler names. */ \ const dim_t MR = pd_a; \ const dim_t NR = pd_b; \ const dim_t PACKMR = cs_a; \ const dim_t PACKNR = rs_b; \ \ /* Cast the micro-kernel address to its function pointer type. */ \ PASTECH(ch,gemmtrsm_ukr_ft) \ gemmtrsm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMMTRSM_U_UKR, cntx ); \ PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the original C matrix. For example, if C is column-stored, ct will be column-stored as well. */ \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict minus_one = PASTEMAC(ch,m1); \ ctype* restrict a_cast = a; \ ctype* restrict b_cast = b; \ ctype* restrict c_cast = c; \ ctype* restrict alpha1_cast = alpha1; \ ctype* restrict alpha2_cast = alpha2; \ ctype* restrict b1; \ ctype* restrict c1; \ \ doff_t diagoffa_i; \ dim_t k_full; \ dim_t m_iter, m_left; \ dim_t n_iter, n_left; \ dim_t m_cur; \ dim_t n_cur; \ dim_t k_a1112; \ dim_t k_a11; \ dim_t k_a12; \ dim_t off_a11; \ dim_t off_a12; \ dim_t i, j, ib; \ inc_t rstep_a; \ inc_t cstep_b; \ inc_t rstep_c, cstep_c; \ inc_t istep_a; \ inc_t istep_b; \ inc_t off_scl; \ inc_t ss_a_num; \ inc_t ss_a_den; \ inc_t ps_a_cur; \ inc_t is_a_cur; \ auxinfo_t aux; \ \ /* Assumptions/assertions: rs_a == 1 cs_a == PACKMR pd_a == MR ps_a == stride to next micro-panel of A rs_b == PACKNR cs_b == 1 pd_b == NR ps_b == stride to next micro-panel of B rs_c == (no assumptions) cs_c == (no assumptions) */ \ \ /* Safety trap: Certain indexing within this macro-kernel does not work as intended if both MR and NR are odd. */ \ if ( ( bli_is_odd( PACKMR ) && bli_is_odd( NR ) ) || \ ( bli_is_odd( PACKNR ) && bli_is_odd( MR ) ) ) bli_abort(); \ \ /* If any dimension is zero, return immediately. */ \ if ( bli_zero_dim3( m, n, k ) ) return; \ \ /* Safeguard: If matrix A is below the diagonal, it is implicitly zero. So we do nothing. */ \ if ( bli_is_strictly_below_diag_n( diagoffa, m, k ) ) return; \ \ /* Compute k_full as k inflated up to a multiple of MR. This is needed because some parameter combinations of trsm reduce k to advance past zero regions in the triangular matrix, and when computing the imaginary stride of B (the non-triangular matrix), which is used by 4m1/3m1 implementations, we need this unreduced value of k. */ \ k_full = ( k % MR != 0 ? k + MR - ( k % MR ) : k ); \ \ /* Compute indexing scaling factor for for 4m or 3m. This is needed because one of the packing register blocksizes (PACKMR or PACKNR) is used to index into the micro-panels of the non- triangular matrix when computing with a diagonal-intersecting micro-panel of the triangular matrix. In the case of 4m or 3m, real values are stored in both sub-panels, and so the indexing needs to occur in units of real values. The value computed here is divided into the complex pointer offset to cause the pointer to be advanced by the correct value. */ \ if ( bli_is_4mi_packed( schema_a ) || \ bli_is_3mi_packed( schema_a ) || \ bli_is_rih_packed( schema_a ) ) off_scl = 2; \ else off_scl = 1; \ \ /* Compute the storage stride scaling. Usually this is just 1. However, in the case of interleaved 3m, we need to scale the offset by 3/2. Note that real-only, imag-only, and summed-only packing formats are not applicable here since trsm is a two- operand operation only (unlike trmm, which is capable of three- operand). */ \ if ( bli_is_3mi_packed( schema_a ) ) { ss_a_num = 3; ss_a_den = 2; } \ else { ss_a_num = 1; ss_a_den = 1; } \ \ /* If there is a zero region to the left of where the diagonal of A intersects the top edge of the block, adjust the pointer to B and treat this case as if the diagonal offset were zero. Note that we don't need to adjust the pointer to A since packm would have simply skipped over the region that was not stored. */ \ if ( diagoffa > 0 ) \ { \ i = diagoffa; \ k = k - i; \ diagoffa = 0; \ b_cast = b_cast + ( i * PACKNR ) / off_scl; \ } \ \ /* If there is a zero region below where the diagonal of A intersects the right side of the block, shrink it to prevent "no-op" iterations from executing. */ \ if ( -diagoffa + k < m ) \ { \ m = -diagoffa + k; \ } \ \ /* Check the k dimension, which needs to be a multiple of MR. If k isn't a multiple of MR, we adjust it higher to satisfy the micro- kernel, which is expecting to perform an MR x MR triangular solve. This adjustment of k is consistent with what happened when A was packed: all of its bottom/right edges were zero-padded, and furthermore, the panel that stores the bottom-right corner of the matrix has its diagonal extended into the zero-padded region (as identity). This allows the trsm of that bottom-right panel to proceed without producing any infs or NaNs that would infect the "good" values of the corresponding block of B. */ \ if ( k % MR != 0 ) k += MR - ( k % MR ); \ \ /* NOTE: We don't need to check that m is a multiple of PACKMR since we know that the underlying buffer was already allocated to have an m dimension that is a multiple of PACKMR, with the region between the last row and the next multiple of MR zero-padded accordingly. */ \ \ /* Clear the temporary C buffer in case it has any infs or NaNs. */ \ PASTEMAC(ch,set0s_mxn)( MR, NR, \ ct, rs_ct, cs_ct ); \ \ /* Compute number of primary and leftover components of the m and n dimensions. */ \ n_iter = n / NR; \ n_left = n % NR; \ \ m_iter = m / MR; \ m_left = m % MR; \ \ if ( n_left ) ++n_iter; \ if ( m_left ) ++m_iter; \ \ /* Determine some increments used to step through A, B, and C. */ \ rstep_a = ps_a; \ \ cstep_b = ps_b; \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ \ istep_a = PACKMR * k; \ istep_b = PACKNR * k_full; \ \ if ( bli_is_odd( istep_a ) ) istep_a += 1; \ if ( bli_is_odd( istep_b ) ) istep_b += 1; \ \ /* Save the pack schemas of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_schema_a( schema_a, &aux ); \ bli_auxinfo_set_schema_b( schema_b, &aux ); \ \ /* Save the imaginary stride of B to the auxinfo_t object. */ \ bli_auxinfo_set_is_b( istep_b, &aux ); \ \ b1 = b_cast; \ c1 = c_cast; \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = 0; j < n_iter; ++j ) \ { \ if( bli_trsm_my_iter( j, thread ) ) { \ \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ a1 = a_cast; \ c11 = c1 + (m_iter-1)*rstep_c; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( ib = 0; ib < m_iter; ++ib ) \ { \ i = m_iter - 1 - ib; \ diagoffa_i = diagoffa + ( doff_t )i*MR; \ \ m_cur = ( bli_is_not_edge_b( ib, m_iter, m_left ) ? MR : m_left ); \ \ /* If the current panel of A intersects the diagonal, use a special micro-kernel that performs a fused gemm and trsm. If the current panel of A resides above the diagonal, use a a regular gemm micro-kernel. Otherwise, if it is below the diagonal, it was not packed (because it is implicitly zero) and so we do nothing. */ \ if ( bli_intersects_diag_n( diagoffa_i, MR, k ) ) \ { \ ctype* restrict a11; \ ctype* restrict a12; \ ctype* restrict b11; \ ctype* restrict b21; \ ctype* restrict a2; \ \ /* Compute various offsets into and lengths of parts of A. */ \ off_a11 = diagoffa_i; \ k_a1112 = k - off_a11;; \ k_a11 = MR; \ k_a12 = k_a1112 - MR; \ off_a12 = off_a11 + k_a11; \ \ /* Compute the panel stride for the current diagonal- intersecting micro-panel. */ \ is_a_cur = k_a1112 * PACKMR; \ is_a_cur += ( bli_is_odd( is_a_cur ) ? 1 : 0 ); \ ps_a_cur = ( is_a_cur * ss_a_num ) / ss_a_den; \ \ /* Compute the addresses of the triangular block A11 and the panel A12. */ \ a11 = a1; \ /* a12 = a1 + ( k_a11 * PACKMR ) / off_scl; */ \ a12 = bli_ptr_inc_by_frac( a1, sizeof( ctype ), k_a11 * PACKMR, off_scl ); \ \ /* Compute the addresses of the panel B01 and the block B11. */ \ b11 = b1 + ( off_a11 * PACKNR ) / off_scl; \ b21 = b1 + ( off_a12 * PACKNR ) / off_scl; \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = a1 + ps_a_cur; \ if ( bli_is_last_iter( ib, m_iter, 0, 1 ) ) \ { \ a2 = a_cast; \ b2 = b1; \ /*if ( bli_is_last_iter( j, n_iter, 0, 1 ) ) */\ if ( j + bli_thread_num_threads(thread) >= n_iter ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Save the 4m1/3m1 imaginary stride of A to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( is_a_cur, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the fused gemm/trsm micro-kernel. */ \ gemmtrsm_ukr \ ( \ k_a12, \ alpha1_cast, \ a12, \ a11, \ b21, \ b11, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the fused gemm/trsm micro-kernel. */ \ gemmtrsm_ukr \ ( \ k_a12, \ alpha1_cast, \ a12, \ a11, \ b21, \ b11, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Copy the result to the bottom edge of C. */ \ PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ c11, rs_c, cs_c ); \ } \ \ a1 += ps_a_cur; \ } \ else if ( bli_is_strictly_above_diag_n( diagoffa_i, MR, k ) ) \ { \ ctype* restrict a2; \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = a1 + rstep_a; \ if ( bli_is_last_iter( ib, m_iter, 0, 1 ) ) \ { \ a2 = a_cast; \ b2 = b1; \ /*if ( bli_is_last_iter( j, n_iter, 0, 1 ) ) */\ if ( j + bli_thread_num_threads(thread) >= n_iter ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Save the 4m1/3m1 imaginary stride of A to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( istep_a, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ minus_one, \ a1, \ b1, \ alpha2_cast, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ minus_one, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Add the result to the edge of C. */ \ PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ alpha2_cast, \ c11, rs_c, cs_c ); \ } \ \ a1 += rstep_a; \ } \ \ c11 -= rstep_c; \ } \ } \ \ b1 += cstep_b; \ c1 += cstep_c; \ } \ \ /* PASTEMAC(ch,fprintm)( stdout, "trsm_lu_ker_var2: a1 (diag)", MR, k_a1112, a1, 1, MR, "%5.2f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "trsm_lu_ker_var2: b11 (diag)", MR, NR, b11, NR, 1, "%6.3f", "" ); \ printf( "m_iter = %lu\n", m_iter ); \ printf( "m_cur = %lu\n", m_cur ); \ printf( "k = %lu\n", k ); \ printf( "diagoffa_i = %lu\n", diagoffa_i ); \ printf( "off_a1112 = %lu\n", off_a1112 ); \ printf( "k_a1112 = %lu\n", k_a1112 ); \ printf( "k_a12 = %lu\n", k_a12 ); \ printf( "k_a11 = %lu\n", k_a11 ); \ printf( "rs_c,cs_c = %lu %lu\n", rs_c, cs_c ); \ printf( "rs_ct,cs_ct= %lu %lu\n", rs_ct, cs_ct ); \ */ \ \ /* PASTEMAC(ch,fprintm)( stdout, "trsm_lu_ker_var2: b11 after (diag)", MR, NR, b11, NR, 1, "%5.2f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "trsm_lu_ker_var2: b11 after (diag)", MR, NR, b11, NR, 1, "%5.2f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "trsm_lu_ker_var2: ct after (diag)", m_cur, n_cur, ct, rs_ct, cs_ct, "%5.2f", "" ); \ */ \ } INSERT_GENTFUNC_BASIC0( trsm_lu_ker_var2 ) blis-0.6.1/frame/3/trsm/other/bli_trsm_lu_ker_var2rr.c000066400000000000000000000460021360743507500226430ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T gemm_fp typedef void (*FUNCPTR_T) ( doff_t diagoffa, pack_t schema_a, pack_t schema_b, dim_t m, dim_t n, dim_t k, void* alpha1, void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, void* alpha2, void* c, inc_t rs_c, inc_t cs_c, cntx_t* cntx, rntm_t* rntm, thrinfo_t* thread ); static FUNCPTR_T GENARRAY(ftypes,trsm_lu_ker_var2rr); // // -- Macrokernel functions for round-robin partitioning ----------------------- // void bli_trsm_lu_ker_var2rr ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { num_t dt_exec = bli_obj_exec_dt( c ); doff_t diagoffa = bli_obj_diag_offset( a ); pack_t schema_a = bli_obj_pack_schema( a ); pack_t schema_b = bli_obj_pack_schema( b ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width( a ); void* buf_a = bli_obj_buffer_at_off( a ); inc_t cs_a = bli_obj_col_stride( a ); dim_t pd_a = bli_obj_panel_dim( a ); inc_t ps_a = bli_obj_panel_stride( a ); void* buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b = bli_obj_row_stride( b ); dim_t pd_b = bli_obj_panel_dim( b ); inc_t ps_b = bli_obj_panel_stride( b ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); void* buf_alpha1; void* buf_alpha2; FUNCPTR_T f; // Grab the address of the internal scalar buffer for the scalar // attached to B (the non-triangular matrix). This will be the alpha // scalar used in the gemmtrsm subproblems (ie: the scalar that would // be applied to the packed copy of B prior to it being updated by // the trsm subproblem). This scalar may be unit, if for example it // was applied during packing. buf_alpha1 = bli_obj_internal_scalar_buffer( b ); // Grab the address of the internal scalar buffer for the scalar // attached to C. This will be the "beta" scalar used in the gemm-only // subproblems that correspond to micro-panels that do not intersect // the diagonal. We need this separate scalar because it's possible // that the alpha attached to B was reset, if it was applied during // packing. buf_alpha2 = bli_obj_internal_scalar_buffer( c ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Invoke the function. f( diagoffa, schema_a, schema_b, m, n, k, buf_alpha1, buf_a, cs_a, pd_a, ps_a, buf_b, rs_b, pd_b, ps_b, buf_alpha2, buf_c, rs_c, cs_c, cntx, rntm, thread ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ doff_t diagoffa, \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha1, \ void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, \ void* alpha2, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* thread \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ /* Alias some constants to simpler names. */ \ const dim_t MR = pd_a; \ const dim_t NR = pd_b; \ const dim_t PACKMR = cs_a; \ const dim_t PACKNR = rs_b; \ \ /* Cast the micro-kernel address to its function pointer type. */ \ PASTECH(ch,gemmtrsm_ukr_ft) \ gemmtrsm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMMTRSM_U_UKR, cntx ); \ PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the original C matrix. For example, if C is column-stored, ct will be column-stored as well. */ \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict minus_one = PASTEMAC(ch,m1); \ ctype* restrict a_cast = a; \ ctype* restrict b_cast = b; \ ctype* restrict c_cast = c; \ ctype* restrict alpha1_cast = alpha1; \ ctype* restrict alpha2_cast = alpha2; \ ctype* restrict b1; \ ctype* restrict c1; \ \ doff_t diagoffa_i; \ dim_t k_full; \ dim_t m_iter, m_left; \ dim_t n_iter, n_left; \ dim_t m_cur; \ dim_t n_cur; \ dim_t k_a1112; \ dim_t k_a11; \ dim_t k_a12; \ dim_t off_a11; \ dim_t off_a12; \ dim_t i, j, ib; \ inc_t rstep_a; \ inc_t cstep_b; \ inc_t rstep_c, cstep_c; \ inc_t istep_a; \ inc_t istep_b; \ inc_t off_scl; \ inc_t ss_a_num; \ inc_t ss_a_den; \ inc_t ps_a_cur; \ inc_t is_a_cur; \ auxinfo_t aux; \ \ /* Assumptions/assertions: rs_a == 1 cs_a == PACKMR pd_a == MR ps_a == stride to next micro-panel of A rs_b == PACKNR cs_b == 1 pd_b == NR ps_b == stride to next micro-panel of B rs_c == (no assumptions) cs_c == (no assumptions) */ \ \ /* Safety trap: Certain indexing within this macro-kernel does not work as intended if both MR and NR are odd. */ \ if ( ( bli_is_odd( PACKMR ) && bli_is_odd( NR ) ) || \ ( bli_is_odd( PACKNR ) && bli_is_odd( MR ) ) ) bli_abort(); \ \ /* If any dimension is zero, return immediately. */ \ if ( bli_zero_dim3( m, n, k ) ) return; \ \ /* Safeguard: If matrix A is below the diagonal, it is implicitly zero. So we do nothing. */ \ if ( bli_is_strictly_below_diag_n( diagoffa, m, k ) ) return; \ \ /* Compute k_full as k inflated up to a multiple of MR. This is needed because some parameter combinations of trsm reduce k to advance past zero regions in the triangular matrix, and when computing the imaginary stride of B (the non-triangular matrix), which is used by 4m1/3m1 implementations, we need this unreduced value of k. */ \ k_full = ( k % MR != 0 ? k + MR - ( k % MR ) : k ); \ \ /* Compute indexing scaling factor for for 4m or 3m. This is needed because one of the packing register blocksizes (PACKMR or PACKNR) is used to index into the micro-panels of the non- triangular matrix when computing with a diagonal-intersecting micro-panel of the triangular matrix. In the case of 4m or 3m, real values are stored in both sub-panels, and so the indexing needs to occur in units of real values. The value computed here is divided into the complex pointer offset to cause the pointer to be advanced by the correct value. */ \ if ( bli_is_4mi_packed( schema_a ) || \ bli_is_3mi_packed( schema_a ) || \ bli_is_rih_packed( schema_a ) ) off_scl = 2; \ else off_scl = 1; \ \ /* Compute the storage stride scaling. Usually this is just 1. However, in the case of interleaved 3m, we need to scale the offset by 3/2. Note that real-only, imag-only, and summed-only packing formats are not applicable here since trsm is a two- operand operation only (unlike trmm, which is capable of three- operand). */ \ if ( bli_is_3mi_packed( schema_a ) ) { ss_a_num = 3; ss_a_den = 2; } \ else { ss_a_num = 1; ss_a_den = 1; } \ \ /* If there is a zero region to the left of where the diagonal of A intersects the top edge of the block, adjust the pointer to B and treat this case as if the diagonal offset were zero. Note that we don't need to adjust the pointer to A since packm would have simply skipped over the region that was not stored. */ \ if ( diagoffa > 0 ) \ { \ i = diagoffa; \ k = k - i; \ diagoffa = 0; \ b_cast = b_cast + ( i * PACKNR ) / off_scl; \ } \ \ /* If there is a zero region below where the diagonal of A intersects the right side of the block, shrink it to prevent "no-op" iterations from executing. */ \ if ( -diagoffa + k < m ) \ { \ m = -diagoffa + k; \ } \ \ /* Check the k dimension, which needs to be a multiple of MR. If k isn't a multiple of MR, we adjust it higher to satisfy the micro- kernel, which is expecting to perform an MR x MR triangular solve. This adjustment of k is consistent with what happened when A was packed: all of its bottom/right edges were zero-padded, and furthermore, the panel that stores the bottom-right corner of the matrix has its diagonal extended into the zero-padded region (as identity). This allows the trsm of that bottom-right panel to proceed without producing any infs or NaNs that would infect the "good" values of the corresponding block of B. */ \ if ( k % MR != 0 ) k += MR - ( k % MR ); \ \ /* NOTE: We don't need to check that m is a multiple of PACKMR since we know that the underlying buffer was already allocated to have an m dimension that is a multiple of PACKMR, with the region between the last row and the next multiple of MR zero-padded accordingly. */ \ \ /* Clear the temporary C buffer in case it has any infs or NaNs. */ \ PASTEMAC(ch,set0s_mxn)( MR, NR, \ ct, rs_ct, cs_ct ); \ \ /* Compute number of primary and leftover components of the m and n dimensions. */ \ n_iter = n / NR; \ n_left = n % NR; \ \ m_iter = m / MR; \ m_left = m % MR; \ \ if ( n_left ) ++n_iter; \ if ( m_left ) ++m_iter; \ \ /* Determine some increments used to step through A, B, and C. */ \ rstep_a = ps_a; \ \ cstep_b = ps_b; \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ \ istep_a = PACKMR * k; \ istep_b = PACKNR * k_full; \ \ if ( bli_is_odd( istep_a ) ) istep_a += 1; \ if ( bli_is_odd( istep_b ) ) istep_b += 1; \ \ /* Save the pack schemas of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_schema_a( schema_a, &aux ); \ bli_auxinfo_set_schema_b( schema_b, &aux ); \ \ /* Save the imaginary stride of B to the auxinfo_t object. */ \ bli_auxinfo_set_is_b( istep_b, &aux ); \ \ /* We don't bother querying the thrinfo_t node for the 1st loop because we can't parallelize that loop in trsm due to the inter-iteration dependencies that exist. */ \ /*thrinfo_t* caucus = bli_thrinfo_sub_node( thread );*/ \ \ /* Query the number of threads and thread ids for each loop. */ \ dim_t jr_nt = bli_thread_n_way( thread ); \ dim_t jr_tid = bli_thread_work_id( thread ); \ \ dim_t jr_start, jr_end; \ dim_t jr_inc; \ \ /* Use round-robin assignment of micropanels to threads in the 2nd loop. NOTE: Parallelism in the 1st loop is unattainable due to the inter-iteration dependencies present in trsm. */ \ bli_thread_range_jrir_rr( thread, n_iter, 1, FALSE, &jr_start, &jr_end, &jr_inc ); \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = jr_start; j < jr_end; j += jr_inc ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ b1 = b_cast + j * cstep_b; \ c1 = c_cast + j * cstep_c; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ a1 = a_cast; \ c11 = c1 + (m_iter-1)*rstep_c; \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( ib = 0; ib < m_iter; ++ib ) \ { \ i = m_iter - 1 - ib; \ diagoffa_i = diagoffa + ( doff_t )i*MR; \ \ m_cur = ( bli_is_not_edge_b( ib, m_iter, m_left ) ? MR : m_left ); \ \ /* If the current panel of A intersects the diagonal, use a special micro-kernel that performs a fused gemm and trsm. If the current panel of A resides above the diagonal, use a a regular gemm micro-kernel. Otherwise, if it is below the diagonal, it was not packed (because it is implicitly zero) and so we do nothing. */ \ if ( bli_intersects_diag_n( diagoffa_i, MR, k ) ) \ { \ ctype* restrict a11; \ ctype* restrict a12; \ ctype* restrict b11; \ ctype* restrict b21; \ ctype* restrict a2; \ \ /* Compute various offsets into and lengths of parts of A. */ \ off_a11 = diagoffa_i; \ k_a1112 = k - off_a11;; \ k_a11 = MR; \ k_a12 = k_a1112 - MR; \ off_a12 = off_a11 + k_a11; \ \ /* Compute the panel stride for the current diagonal- intersecting micro-panel. */ \ is_a_cur = k_a1112 * PACKMR; \ is_a_cur += ( bli_is_odd( is_a_cur ) ? 1 : 0 ); \ ps_a_cur = ( is_a_cur * ss_a_num ) / ss_a_den; \ \ /* Compute the addresses of the triangular block A11 and the panel A12. */ \ a11 = a1; \ /* a12 = a1 + ( k_a11 * PACKMR ) / off_scl; */ \ a12 = bli_ptr_inc_by_frac( a1, sizeof( ctype ), k_a11 * PACKMR, off_scl ); \ \ /* Compute the addresses of the panel B01 and the block B11. */ \ b11 = b1 + ( off_a11 * PACKNR ) / off_scl; \ b21 = b1 + ( off_a12 * PACKNR ) / off_scl; \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = a1 + ps_a_cur; \ if ( bli_is_last_iter_rr( ib, m_iter, 0, 1 ) ) \ { \ a2 = a_cast; \ b2 = b1; \ if ( bli_is_last_iter_rr( j, n_iter, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Save the 4m1/3m1 imaginary stride of A to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( is_a_cur, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the fused gemm/trsm micro-kernel. */ \ gemmtrsm_ukr \ ( \ k_a12, \ alpha1_cast, \ a12, \ a11, \ b21, \ b11, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the fused gemm/trsm micro-kernel. */ \ gemmtrsm_ukr \ ( \ k_a12, \ alpha1_cast, \ a12, \ a11, \ b21, \ b11, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Copy the result to the bottom edge of C. */ \ PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ c11, rs_c, cs_c ); \ } \ \ a1 += ps_a_cur; \ } \ else if ( bli_is_strictly_above_diag_n( diagoffa_i, MR, k ) ) \ { \ ctype* restrict a2; \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = a1 + rstep_a; \ if ( bli_is_last_iter_rr( ib, m_iter, 0, 1 ) ) \ { \ a2 = a_cast; \ b2 = b1; \ if ( bli_is_last_iter_rr( j, n_iter, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Save the 4m1/3m1 imaginary stride of A to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( istep_a, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ minus_one, \ a1, \ b1, \ alpha2_cast, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ minus_one, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Add the result to the edge of C. */ \ PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ alpha2_cast, \ c11, rs_c, cs_c ); \ } \ \ a1 += rstep_a; \ } \ \ c11 -= rstep_c; \ } \ } \ \ /* PASTEMAC(ch,fprintm)( stdout, "trsm_lu_ker_var2: a1 (diag)", MR, k_a1112, a1, 1, MR, "%5.2f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "trsm_lu_ker_var2: b11 (diag)", MR, NR, b11, NR, 1, "%6.3f", "" ); \ printf( "m_iter = %lu\n", m_iter ); \ printf( "m_cur = %lu\n", m_cur ); \ printf( "k = %lu\n", k ); \ printf( "diagoffa_i = %lu\n", diagoffa_i ); \ printf( "off_a1112 = %lu\n", off_a1112 ); \ printf( "k_a1112 = %lu\n", k_a1112 ); \ printf( "k_a12 = %lu\n", k_a12 ); \ printf( "k_a11 = %lu\n", k_a11 ); \ printf( "rs_c,cs_c = %lu %lu\n", rs_c, cs_c ); \ printf( "rs_ct,cs_ct= %lu %lu\n", rs_ct, cs_ct ); \ */ \ \ /* PASTEMAC(ch,fprintm)( stdout, "trsm_lu_ker_var2: b11 after (diag)", MR, NR, b11, NR, 1, "%5.2f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "trsm_lu_ker_var2: b11 after (diag)", MR, NR, b11, NR, 1, "%5.2f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "trsm_lu_ker_var2: ct after (diag)", m_cur, n_cur, ct, rs_ct, cs_ct, "%5.2f", "" ); \ */ \ } INSERT_GENTFUNC_BASIC0( trsm_lu_ker_var2rr ) blis-0.6.1/frame/3/trsm/other/bli_trsm_lu_ker_var2sl.c000066400000000000000000000457731360743507500226540ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T gemm_fp typedef void (*FUNCPTR_T) ( doff_t diagoffa, pack_t schema_a, pack_t schema_b, dim_t m, dim_t n, dim_t k, void* alpha1, void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, void* alpha2, void* c, inc_t rs_c, inc_t cs_c, cntx_t* cntx, rntm_t* rntm, thrinfo_t* thread ); static FUNCPTR_T GENARRAY(ftypes,trsm_lu_ker_var2sl); // // -- Macrokernel functions for slab partitioning ------------------------------ // void bli_trsm_lu_ker_var2sl ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { num_t dt_exec = bli_obj_exec_dt( c ); doff_t diagoffa = bli_obj_diag_offset( a ); pack_t schema_a = bli_obj_pack_schema( a ); pack_t schema_b = bli_obj_pack_schema( b ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width( a ); void* buf_a = bli_obj_buffer_at_off( a ); inc_t cs_a = bli_obj_col_stride( a ); dim_t pd_a = bli_obj_panel_dim( a ); inc_t ps_a = bli_obj_panel_stride( a ); void* buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b = bli_obj_row_stride( b ); dim_t pd_b = bli_obj_panel_dim( b ); inc_t ps_b = bli_obj_panel_stride( b ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); void* buf_alpha1; void* buf_alpha2; FUNCPTR_T f; // Grab the address of the internal scalar buffer for the scalar // attached to B (the non-triangular matrix). This will be the alpha // scalar used in the gemmtrsm subproblems (ie: the scalar that would // be applied to the packed copy of B prior to it being updated by // the trsm subproblem). This scalar may be unit, if for example it // was applied during packing. buf_alpha1 = bli_obj_internal_scalar_buffer( b ); // Grab the address of the internal scalar buffer for the scalar // attached to C. This will be the "beta" scalar used in the gemm-only // subproblems that correspond to micro-panels that do not intersect // the diagonal. We need this separate scalar because it's possible // that the alpha attached to B was reset, if it was applied during // packing. buf_alpha2 = bli_obj_internal_scalar_buffer( c ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Invoke the function. f( diagoffa, schema_a, schema_b, m, n, k, buf_alpha1, buf_a, cs_a, pd_a, ps_a, buf_b, rs_b, pd_b, ps_b, buf_alpha2, buf_c, rs_c, cs_c, cntx, rntm, thread ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ doff_t diagoffa, \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha1, \ void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, \ void* alpha2, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* thread \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ /* Alias some constants to simpler names. */ \ const dim_t MR = pd_a; \ const dim_t NR = pd_b; \ const dim_t PACKMR = cs_a; \ const dim_t PACKNR = rs_b; \ \ /* Cast the micro-kernel address to its function pointer type. */ \ PASTECH(ch,gemmtrsm_ukr_ft) \ gemmtrsm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMMTRSM_U_UKR, cntx ); \ PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the original C matrix. For example, if C is column-stored, ct will be column-stored as well. */ \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict minus_one = PASTEMAC(ch,m1); \ ctype* restrict a_cast = a; \ ctype* restrict b_cast = b; \ ctype* restrict c_cast = c; \ ctype* restrict alpha1_cast = alpha1; \ ctype* restrict alpha2_cast = alpha2; \ ctype* restrict b1; \ ctype* restrict c1; \ \ doff_t diagoffa_i; \ dim_t k_full; \ dim_t m_iter, m_left; \ dim_t n_iter, n_left; \ dim_t m_cur; \ dim_t n_cur; \ dim_t k_a1112; \ dim_t k_a11; \ dim_t k_a12; \ dim_t off_a11; \ dim_t off_a12; \ dim_t i, j, ib; \ inc_t rstep_a; \ inc_t cstep_b; \ inc_t rstep_c, cstep_c; \ inc_t istep_a; \ inc_t istep_b; \ inc_t off_scl; \ inc_t ss_a_num; \ inc_t ss_a_den; \ inc_t ps_a_cur; \ inc_t is_a_cur; \ auxinfo_t aux; \ \ /* Assumptions/assertions: rs_a == 1 cs_a == PACKMR pd_a == MR ps_a == stride to next micro-panel of A rs_b == PACKNR cs_b == 1 pd_b == NR ps_b == stride to next micro-panel of B rs_c == (no assumptions) cs_c == (no assumptions) */ \ \ /* Safety trap: Certain indexing within this macro-kernel does not work as intended if both MR and NR are odd. */ \ if ( ( bli_is_odd( PACKMR ) && bli_is_odd( NR ) ) || \ ( bli_is_odd( PACKNR ) && bli_is_odd( MR ) ) ) bli_abort(); \ \ /* If any dimension is zero, return immediately. */ \ if ( bli_zero_dim3( m, n, k ) ) return; \ \ /* Safeguard: If matrix A is below the diagonal, it is implicitly zero. So we do nothing. */ \ if ( bli_is_strictly_below_diag_n( diagoffa, m, k ) ) return; \ \ /* Compute k_full as k inflated up to a multiple of MR. This is needed because some parameter combinations of trsm reduce k to advance past zero regions in the triangular matrix, and when computing the imaginary stride of B (the non-triangular matrix), which is used by 4m1/3m1 implementations, we need this unreduced value of k. */ \ k_full = ( k % MR != 0 ? k + MR - ( k % MR ) : k ); \ \ /* Compute indexing scaling factor for for 4m or 3m. This is needed because one of the packing register blocksizes (PACKMR or PACKNR) is used to index into the micro-panels of the non- triangular matrix when computing with a diagonal-intersecting micro-panel of the triangular matrix. In the case of 4m or 3m, real values are stored in both sub-panels, and so the indexing needs to occur in units of real values. The value computed here is divided into the complex pointer offset to cause the pointer to be advanced by the correct value. */ \ if ( bli_is_4mi_packed( schema_a ) || \ bli_is_3mi_packed( schema_a ) || \ bli_is_rih_packed( schema_a ) ) off_scl = 2; \ else off_scl = 1; \ \ /* Compute the storage stride scaling. Usually this is just 1. However, in the case of interleaved 3m, we need to scale the offset by 3/2. Note that real-only, imag-only, and summed-only packing formats are not applicable here since trsm is a two- operand operation only (unlike trmm, which is capable of three- operand). */ \ if ( bli_is_3mi_packed( schema_a ) ) { ss_a_num = 3; ss_a_den = 2; } \ else { ss_a_num = 1; ss_a_den = 1; } \ \ /* If there is a zero region to the left of where the diagonal of A intersects the top edge of the block, adjust the pointer to B and treat this case as if the diagonal offset were zero. Note that we don't need to adjust the pointer to A since packm would have simply skipped over the region that was not stored. */ \ if ( diagoffa > 0 ) \ { \ i = diagoffa; \ k = k - i; \ diagoffa = 0; \ b_cast = b_cast + ( i * PACKNR ) / off_scl; \ } \ \ /* If there is a zero region below where the diagonal of A intersects the right side of the block, shrink it to prevent "no-op" iterations from executing. */ \ if ( -diagoffa + k < m ) \ { \ m = -diagoffa + k; \ } \ \ /* Check the k dimension, which needs to be a multiple of MR. If k isn't a multiple of MR, we adjust it higher to satisfy the micro- kernel, which is expecting to perform an MR x MR triangular solve. This adjustment of k is consistent with what happened when A was packed: all of its bottom/right edges were zero-padded, and furthermore, the panel that stores the bottom-right corner of the matrix has its diagonal extended into the zero-padded region (as identity). This allows the trsm of that bottom-right panel to proceed without producing any infs or NaNs that would infect the "good" values of the corresponding block of B. */ \ if ( k % MR != 0 ) k += MR - ( k % MR ); \ \ /* NOTE: We don't need to check that m is a multiple of PACKMR since we know that the underlying buffer was already allocated to have an m dimension that is a multiple of PACKMR, with the region between the last row and the next multiple of MR zero-padded accordingly. */ \ \ /* Clear the temporary C buffer in case it has any infs or NaNs. */ \ PASTEMAC(ch,set0s_mxn)( MR, NR, \ ct, rs_ct, cs_ct ); \ \ /* Compute number of primary and leftover components of the m and n dimensions. */ \ n_iter = n / NR; \ n_left = n % NR; \ \ m_iter = m / MR; \ m_left = m % MR; \ \ if ( n_left ) ++n_iter; \ if ( m_left ) ++m_iter; \ \ /* Determine some increments used to step through A, B, and C. */ \ rstep_a = ps_a; \ \ cstep_b = ps_b; \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ \ istep_a = PACKMR * k; \ istep_b = PACKNR * k_full; \ \ if ( bli_is_odd( istep_a ) ) istep_a += 1; \ if ( bli_is_odd( istep_b ) ) istep_b += 1; \ \ /* Save the pack schemas of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_schema_a( schema_a, &aux ); \ bli_auxinfo_set_schema_b( schema_b, &aux ); \ \ /* Save the imaginary stride of B to the auxinfo_t object. */ \ bli_auxinfo_set_is_b( istep_b, &aux ); \ \ /* We don't bother querying the thrinfo_t node for the 1st loop because we can't parallelize that loop in trsm due to the inter-iteration dependencies that exist. */ \ /*thrinfo_t* caucus = bli_thrinfo_sub_node( thread );*/ \ \ /* Query the number of threads and thread ids for each loop. */ \ dim_t jr_nt = bli_thread_n_way( thread ); \ dim_t jr_tid = bli_thread_work_id( thread ); \ \ dim_t jr_start, jr_end; \ dim_t jr_inc; \ \ /* Use slab assignment of micropanels to threads in the 2nd loop. NOTE: Parallelism in the 1st loop is unattainable due to the inter-iteration dependencies present in trsm. */ \ bli_thread_range_jrir_sl( thread, n_iter, 1, FALSE, &jr_start, &jr_end, &jr_inc ); \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = jr_start; j < jr_end; j += jr_inc ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ b1 = b_cast + j * cstep_b; \ c1 = c_cast + j * cstep_c; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ a1 = a_cast; \ c11 = c1 + (m_iter-1)*rstep_c; \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( ib = 0; ib < m_iter; ++ib ) \ { \ i = m_iter - 1 - ib; \ diagoffa_i = diagoffa + ( doff_t )i*MR; \ \ m_cur = ( bli_is_not_edge_b( ib, m_iter, m_left ) ? MR : m_left ); \ \ /* If the current panel of A intersects the diagonal, use a special micro-kernel that performs a fused gemm and trsm. If the current panel of A resides above the diagonal, use a a regular gemm micro-kernel. Otherwise, if it is below the diagonal, it was not packed (because it is implicitly zero) and so we do nothing. */ \ if ( bli_intersects_diag_n( diagoffa_i, MR, k ) ) \ { \ ctype* restrict a11; \ ctype* restrict a12; \ ctype* restrict b11; \ ctype* restrict b21; \ ctype* restrict a2; \ \ /* Compute various offsets into and lengths of parts of A. */ \ off_a11 = diagoffa_i; \ k_a1112 = k - off_a11;; \ k_a11 = MR; \ k_a12 = k_a1112 - MR; \ off_a12 = off_a11 + k_a11; \ \ /* Compute the panel stride for the current diagonal- intersecting micro-panel. */ \ is_a_cur = k_a1112 * PACKMR; \ is_a_cur += ( bli_is_odd( is_a_cur ) ? 1 : 0 ); \ ps_a_cur = ( is_a_cur * ss_a_num ) / ss_a_den; \ \ /* Compute the addresses of the triangular block A11 and the panel A12. */ \ a11 = a1; \ /* a12 = a1 + ( k_a11 * PACKMR ) / off_scl; */ \ a12 = bli_ptr_inc_by_frac( a1, sizeof( ctype ), k_a11 * PACKMR, off_scl ); \ \ /* Compute the addresses of the panel B01 and the block B11. */ \ b11 = b1 + ( off_a11 * PACKNR ) / off_scl; \ b21 = b1 + ( off_a12 * PACKNR ) / off_scl; \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = a1 + ps_a_cur; \ if ( bli_is_last_iter_rr( ib, m_iter, 0, 1 ) ) \ { \ a2 = a_cast; \ b2 = b1; \ if ( bli_is_last_iter_sl( j, n_iter, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Save the 4m1/3m1 imaginary stride of A to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( is_a_cur, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the fused gemm/trsm micro-kernel. */ \ gemmtrsm_ukr \ ( \ k_a12, \ alpha1_cast, \ a12, \ a11, \ b21, \ b11, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the fused gemm/trsm micro-kernel. */ \ gemmtrsm_ukr \ ( \ k_a12, \ alpha1_cast, \ a12, \ a11, \ b21, \ b11, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Copy the result to the bottom edge of C. */ \ PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ c11, rs_c, cs_c ); \ } \ \ a1 += ps_a_cur; \ } \ else if ( bli_is_strictly_above_diag_n( diagoffa_i, MR, k ) ) \ { \ ctype* restrict a2; \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = a1 + rstep_a; \ if ( bli_is_last_iter_rr( ib, m_iter, 0, 1 ) ) \ { \ a2 = a_cast; \ b2 = b1; \ if ( bli_is_last_iter_sl( j, n_iter, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Save the 4m1/3m1 imaginary stride of A to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( istep_a, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ minus_one, \ a1, \ b1, \ alpha2_cast, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ minus_one, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Add the result to the edge of C. */ \ PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ alpha2_cast, \ c11, rs_c, cs_c ); \ } \ \ a1 += rstep_a; \ } \ \ c11 -= rstep_c; \ } \ } \ \ /* PASTEMAC(ch,fprintm)( stdout, "trsm_lu_ker_var2: a1 (diag)", MR, k_a1112, a1, 1, MR, "%5.2f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "trsm_lu_ker_var2: b11 (diag)", MR, NR, b11, NR, 1, "%6.3f", "" ); \ printf( "m_iter = %lu\n", m_iter ); \ printf( "m_cur = %lu\n", m_cur ); \ printf( "k = %lu\n", k ); \ printf( "diagoffa_i = %lu\n", diagoffa_i ); \ printf( "off_a1112 = %lu\n", off_a1112 ); \ printf( "k_a1112 = %lu\n", k_a1112 ); \ printf( "k_a12 = %lu\n", k_a12 ); \ printf( "k_a11 = %lu\n", k_a11 ); \ printf( "rs_c,cs_c = %lu %lu\n", rs_c, cs_c ); \ printf( "rs_ct,cs_ct= %lu %lu\n", rs_ct, cs_ct ); \ */ \ \ /* PASTEMAC(ch,fprintm)( stdout, "trsm_lu_ker_var2: b11 after (diag)", MR, NR, b11, NR, 1, "%5.2f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "trsm_lu_ker_var2: b11 after (diag)", MR, NR, b11, NR, 1, "%5.2f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "trsm_lu_ker_var2: ct after (diag)", m_cur, n_cur, ct, rs_ct, cs_ct, "%5.2f", "" ); \ */ \ } INSERT_GENTFUNC_BASIC0( trsm_lu_ker_var2sl ) blis-0.6.1/frame/3/trsm/other/bli_trsm_rl_ker_var2.c000066400000000000000000000467211360743507500223040ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T gemm_fp typedef void (*FUNCPTR_T) ( doff_t diagoffb, pack_t schema_a, pack_t schema_b, dim_t m, dim_t n, dim_t k, void* alpha1, void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, void* alpha2, void* c, inc_t rs_c, inc_t cs_c, cntx_t* cntx, rntm_t* rntm, thrinfo_t* thread ); static FUNCPTR_T GENARRAY(ftypes,trsm_rl_ker_var2); void bli_trsm_rl_ker_var2 ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { num_t dt_exec = bli_obj_exec_dt( c ); doff_t diagoffb = bli_obj_diag_offset( b ); pack_t schema_a = bli_obj_pack_schema( a ); pack_t schema_b = bli_obj_pack_schema( b ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width( a ); void* buf_a = bli_obj_buffer_at_off( a ); inc_t cs_a = bli_obj_col_stride( a ); dim_t pd_a = bli_obj_panel_dim( a ); inc_t ps_a = bli_obj_panel_stride( a ); void* buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b = bli_obj_row_stride( b ); dim_t pd_b = bli_obj_panel_dim( b ); inc_t ps_b = bli_obj_panel_stride( b ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); void* buf_alpha1; void* buf_alpha2; FUNCPTR_T f; // Grab the address of the internal scalar buffer for the scalar // attached to A (the non-triangular matrix). This will be the alpha // scalar used in the gemmtrsm subproblems (ie: the scalar that would // be applied to the packed copy of A prior to it being updated by // the trsm subproblem). This scalar may be unit, if for example it // was applied during packing. buf_alpha1 = bli_obj_internal_scalar_buffer( a ); // Grab the address of the internal scalar buffer for the scalar // attached to C. This will be the "beta" scalar used in the gemm-only // subproblems that correspond to micro-panels that do not intersect // the diagonal. We need this separate scalar because it's possible // that the alpha attached to B was reset, if it was applied during // packing. buf_alpha2 = bli_obj_internal_scalar_buffer( c ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Invoke the function. f( diagoffb, schema_a, schema_b, m, n, k, buf_alpha1, buf_a, cs_a, pd_a, ps_a, buf_b, rs_b, pd_b, ps_b, buf_alpha2, buf_c, rs_c, cs_c, cntx, rntm, thread ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ doff_t diagoffb, \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha1, \ void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, \ void* alpha2, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* thread \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ /* Alias some constants to simpler names. */ \ const dim_t MR = pd_a; \ const dim_t NR = pd_b; \ const dim_t PACKMR = cs_a; \ const dim_t PACKNR = rs_b; \ \ /* Cast the micro-kernel address to its function pointer type. */ \ /* NOTE: We use the upper-triangular gemmtrsm ukernel because, while the current macro-kernel targets the "rl" case (right-side/lower- triangular), it becomes upper-triangular after the kernel operation is transposed so that all kernel instances are of the "left" variety (since those are the only trsm ukernels that exist). */ \ PASTECH(ch,gemmtrsm_ukr_ft) \ gemmtrsm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMMTRSM_U_UKR, cntx ); \ PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the original C matrix. For example, if C is column-stored, ct will be column-stored as well. */ \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict minus_one = PASTEMAC(ch,m1); \ ctype* restrict a_cast = a; \ ctype* restrict b_cast = b; \ ctype* restrict c_cast = c; \ ctype* restrict alpha1_cast = alpha1; \ ctype* restrict alpha2_cast = alpha2; \ ctype* restrict b1; \ ctype* restrict c1; \ \ doff_t diagoffb_j; \ dim_t k_full; \ dim_t m_iter, m_left; \ dim_t n_iter, n_left; \ dim_t m_cur; \ dim_t n_cur; \ dim_t k_b1121; \ dim_t k_b11; \ dim_t k_b21; \ dim_t off_b11; \ dim_t off_b21; \ dim_t i, j, jb; \ inc_t rstep_a; \ inc_t cstep_b; \ inc_t rstep_c, cstep_c; \ inc_t istep_a; \ inc_t istep_b; \ inc_t off_scl; \ inc_t ss_b_num; \ inc_t ss_b_den; \ inc_t ps_b_cur; \ inc_t is_b_cur; \ auxinfo_t aux; \ \ /* Assumptions/assertions: rs_a == 1 cs_a == PACKNR pd_a == NR ps_a == stride to next micro-panel of A rs_b == PACKMR cs_b == 1 pd_b == MR ps_b == stride to next micro-panel of B rs_c == (no assumptions) cs_c == (no assumptions) Note that MR/NR and PACKMR/PACKNR have been swapped to reflect the swapping of values in the control tree (ie: those values used when packing). This swapping is needed since we cast right-hand trsm in terms of transposed left-hand trsm. So, if we're going to be transposing the operation, then A needs to be packed with NR and B needs to be packed with MR (remember: B is the triangular matrix in the right-hand side parameter case). */ \ \ /* Safety trap: Certain indexing within this macro-kernel does not work as intended if both MR and NR are odd. */ \ if ( ( bli_is_odd( PACKMR ) && bli_is_odd( NR ) ) || \ ( bli_is_odd( PACKNR ) && bli_is_odd( MR ) ) ) bli_abort(); \ \ /* If any dimension is zero, return immediately. */ \ if ( bli_zero_dim3( m, n, k ) ) return; \ \ /* Safeguard: If the current panel of B is entirely above its diagonal, it is implicitly zero. So we do nothing. */ \ if ( bli_is_strictly_above_diag_n( diagoffb, k, n ) ) return; \ \ /* Compute k_full as k inflated up to a multiple of NR. This is needed because some parameter combinations of trsm reduce k to advance past zero regions in the triangular matrix, and when computing the imaginary stride of B (the non-triangular matrix), which is used by 4m1/3m1 implementations, we need this unreduced value of k. */ \ k_full = ( k % NR != 0 ? k + NR - ( k % NR ) : k ); \ \ /* Compute indexing scaling factor for for 4m or 3m. This is needed because one of the packing register blocksizes (PACKMR or PACKNR) is used to index into the micro-panels of the non- triangular matrix when computing with a diagonal-intersecting micro-panel of the triangular matrix. In the case of 4m or 3m, real values are stored in both sub-panels, and so the indexing needs to occur in units of real values. The value computed here is divided into the complex pointer offset to cause the pointer to be advanced by the correct value. */ \ if ( bli_is_4mi_packed( schema_b ) || \ bli_is_3mi_packed( schema_b ) || \ bli_is_rih_packed( schema_b ) ) off_scl = 2; \ else off_scl = 1; \ \ /* Compute the storage stride scaling. Usually this is just 1. However, in the case of interleaved 3m, we need to scale the offset by 3/2. Note that real-only, imag-only, and summed-only packing formats are not applicable here since trsm is a two- operand operation only (unlike trmm, which is capable of three- operand). */ \ if ( bli_is_3mi_packed( schema_b ) ) { ss_b_num = 3; ss_b_den = 2; } \ else { ss_b_num = 1; ss_b_den = 1; } \ \ /* If there is a zero region above where the diagonal of B intersects the left edge of the panel, adjust the pointer to A and treat this case as if the diagonal offset were zero. Note that we don't need to adjust the pointer to B since packm would have simply skipped over the region that was not stored. */ \ if ( diagoffb < 0 ) \ { \ j = -diagoffb; \ k = k - j; \ diagoffb = 0; \ a_cast = a_cast + ( j * PACKMR ) / off_scl; \ } \ \ /* If there is a zero region to the right of where the diagonal of B intersects the bottom of the panel, shrink it so that we can index to the correct place in C (corresponding to the part of the panel of B that was packed). NOTE: This is NOT being done to skip over "no-op" iterations, as with the trsm_lu macro-kernel. This MUST be done for correct execution because we use n (via n_iter) to compute diagonal and index offsets for backwards movement through B. */ \ if ( diagoffb + k < n ) \ { \ n = diagoffb + k; \ } \ \ /* Check the k dimension, which needs to be a multiple of NR. If k isn't a multiple of NR, we adjust it higher to satisfy the micro- kernel, which is expecting to perform an NR x NR triangular solve. This adjustment of k is consistent with what happened when B was packed: all of its bottom/right edges were zero-padded, and furthermore, the panel that stores the bottom-right corner of the matrix has its diagonal extended into the zero-padded region (as identity). This allows the trsm of that bottom-right panel to proceed without producing any infs or NaNs that would infect the "good" values of the corresponding block of A. */ \ if ( k % NR != 0 ) k += NR - ( k % NR ); \ \ /* NOTE: We don't need to check that n is a multiple of PACKNR since we know that the underlying buffer was already allocated to have an n dimension that is a multiple of PACKNR, with the region between the last column and the next multiple of NR zero-padded accordingly. */ \ \ /* Clear the temporary C buffer in case it has any infs or NaNs. */ \ PASTEMAC(ch,set0s_mxn)( MR, NR, \ ct, rs_ct, cs_ct ); \ \ /* Compute number of primary and leftover components of the m and n dimensions. */ \ n_iter = n / NR; \ n_left = n % NR; \ \ m_iter = m / MR; \ m_left = m % MR; \ \ if ( n_left ) ++n_iter; \ if ( m_left ) ++m_iter; \ \ /* Determine some increments used to step through A, B, and C. */ \ rstep_a = ps_a; \ \ cstep_b = ps_b; \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ \ istep_a = PACKMR * k_full; \ istep_b = PACKNR * k; \ \ if ( bli_is_odd( istep_a ) ) istep_a += 1; \ if ( bli_is_odd( istep_b ) ) istep_b += 1; \ \ /* Save the pack schemas of A and B to the auxinfo_t object. NOTE: We swap the values for A and B since the triangular "A" matrix is actually contained within B. */ \ bli_auxinfo_set_schema_a( schema_b, &aux ); \ bli_auxinfo_set_schema_b( schema_a, &aux ); \ \ /* Save the imaginary stride of A to the auxinfo_t object. NOTE: We swap the values for A and B since the triangular "A" matrix is actually contained within B. */ \ bli_auxinfo_set_is_b( istep_a, &aux ); \ \ b1 = b_cast; \ c1 = c_cast; \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( jb = 0; jb < n_iter; ++jb ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b11; \ ctype* restrict b21; \ ctype* restrict b2; \ \ j = n_iter - 1 - jb; \ diagoffb_j = diagoffb - ( doff_t )j*NR; \ a1 = a_cast; \ c11 = c1 + (n_iter-1)*cstep_c; \ \ n_cur = ( bli_is_not_edge_b( jb, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ /* If the current panel of B intersects the diagonal, use a special micro-kernel that performs a fused gemm and trsm. If the current panel of B resides below the diagonal, use a a regular gemm micro-kernel. Otherwise, if it is above the diagonal, it was not packed (because it is implicitly zero) and so we do nothing. */ \ if ( bli_intersects_diag_n( diagoffb_j, k, NR ) ) \ { \ /* Determine the offset to and length of the panel that was packed so we can index into the corresponding location in A. */ \ off_b11 = bli_max( -diagoffb_j, 0 ); \ k_b1121 = k - off_b11; \ k_b11 = NR; \ k_b21 = k_b1121 - NR; \ off_b21 = off_b11 + k_b11; \ \ /* Compute the addresses of the triangular block B11 and the panel B21. */ \ b11 = b1; \ /* b21 = b1 + ( k_b11 * PACKNR ) / off_scl; */ \ b21 = bli_ptr_inc_by_frac( b1, sizeof( ctype ), k_b11 * PACKNR, off_scl ); \ \ /* Compute the panel stride for the current micro-panel. */ \ is_b_cur = k_b1121 * PACKNR; \ is_b_cur += ( bli_is_odd( is_b_cur ) ? 1 : 0 ); \ ps_b_cur = ( is_b_cur * ss_b_num ) / ss_b_den; \ \ /* Save the 4m1/3m1 imaginary stride of B to the auxinfo_t object. NOTE: We swap the values for A and B since the triangular "A" matrix is actually contained within B. */ \ bli_auxinfo_set_is_a( is_b_cur, &aux ); \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = 0; i < m_iter; ++i ) \ { \ if( bli_trsm_my_iter( i, thread ) ){ \ \ ctype* restrict a11; \ ctype* restrict a12; \ ctype* restrict a2; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* Compute the addresses of the A11 block and A12 panel. */ \ a11 = a1 + ( off_b11 * PACKMR ) / off_scl; \ a12 = a1 + ( off_b21 * PACKMR ) / off_scl; \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = a1; \ /*if ( bli_is_last_iter( i, m_iter, 0, 1 ) ) */\ if ( i + bli_thread_num_threads(thread) >= m_iter ) \ { \ a2 = a_cast; \ b2 = b1 + ps_b_cur; \ if ( bli_is_last_iter( jb, n_iter, 0, 1 ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. NOTE: We swap the values for A and B since the triangular "A" matrix is actually contained within B. */ \ bli_auxinfo_set_next_a( b2, &aux ); \ bli_auxinfo_set_next_b( a2, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the fused gemm/trsm micro-kernel. */ \ gemmtrsm_ukr \ ( \ k_b21, \ alpha1_cast, \ b21, \ b11, \ a12, \ a11, \ c11, cs_c, rs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the fused gemm/trsm micro-kernel. */ \ gemmtrsm_ukr \ ( \ k_b21, \ alpha1_cast, \ b21, \ b11, \ a12, \ a11, \ ct, cs_ct, rs_ct, \ &aux, \ cntx \ ); \ \ /* Copy the result to the bottom edge of C. */ \ PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ c11, rs_c, cs_c ); \ } \ } \ \ a1 += rstep_a; \ c11 += rstep_c; \ } \ \ b1 += ps_b_cur; \ } \ else if ( bli_is_strictly_below_diag_n( diagoffb_j, k, NR ) ) \ { \ /* Save the 4m1/3m1 imaginary stride of B to the auxinfo_t object. NOTE: We swap the values for A and B since the triangular "A" matrix is actually contained within B. */ \ bli_auxinfo_set_is_a( istep_b, &aux ); \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = 0; i < m_iter; ++i ) \ { \ if( bli_trsm_my_iter( i, thread ) ){ \ \ ctype* restrict a2; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = a1; \ /*if ( bli_is_last_iter( i, m_iter, 0, 1 ) ) */\ if ( i + bli_thread_num_threads(thread) >= m_iter ) \ { \ a2 = a_cast; \ b2 = b1 + cstep_b; \ if ( bli_is_last_iter( jb, n_iter, 0, 1 ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. NOTE: We swap the values for A and B since the triangular "A" matrix is actually contained within B. */ \ bli_auxinfo_set_next_a( b2, &aux ); \ bli_auxinfo_set_next_b( a2, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ minus_one, \ b1, \ a1, \ alpha2_cast, \ c11, cs_c, rs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ minus_one, \ b1, \ a1, \ zero, \ ct, cs_ct, rs_ct, \ &aux, \ cntx \ ); \ \ /* Add the result to the edge of C. */ \ PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ alpha2_cast, \ c11, rs_c, cs_c ); \ } \ } \ \ a1 += rstep_a; \ c11 += rstep_c; \ } \ \ b1 += cstep_b; \ } \ \ c1 -= cstep_c; \ } \ } INSERT_GENTFUNC_BASIC0( trsm_rl_ker_var2 ) blis-0.6.1/frame/3/trsm/other/bli_trsm_ru_ker_var2.c000066400000000000000000000461211360743507500223070ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T gemm_fp typedef void (*FUNCPTR_T) ( doff_t diagoffb, pack_t schema_a, pack_t schema_b, dim_t m, dim_t n, dim_t k, void* alpha1, void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, void* alpha2, void* c, inc_t rs_c, inc_t cs_c, cntx_t* cntx, rntm_t* rntm, thrinfo_t* thread ); static FUNCPTR_T GENARRAY(ftypes,trsm_ru_ker_var2); void bli_trsm_ru_ker_var2 ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { num_t dt_exec = bli_obj_exec_dt( c ); doff_t diagoffb = bli_obj_diag_offset( b ); pack_t schema_a = bli_obj_pack_schema( a ); pack_t schema_b = bli_obj_pack_schema( b ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width( a ); void* buf_a = bli_obj_buffer_at_off( a ); inc_t cs_a = bli_obj_col_stride( a ); dim_t pd_a = bli_obj_panel_dim( a ); inc_t ps_a = bli_obj_panel_stride( a ); void* buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b = bli_obj_row_stride( b ); dim_t pd_b = bli_obj_panel_dim( b ); inc_t ps_b = bli_obj_panel_stride( b ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); void* buf_alpha1; void* buf_alpha2; FUNCPTR_T f; // Grab the address of the internal scalar buffer for the scalar // attached to A (the non-triangular matrix). This will be the alpha // scalar used in the gemmtrsm subproblems (ie: the scalar that would // be applied to the packed copy of A prior to it being updated by // the trsm subproblem). This scalar may be unit, if for example it // was applied during packing. buf_alpha1 = bli_obj_internal_scalar_buffer( a ); // Grab the address of the internal scalar buffer for the scalar // attached to C. This will be the "beta" scalar used in the gemm-only // subproblems that correspond to micro-panels that do not intersect // the diagonal. We need this separate scalar because it's possible // that the alpha attached to B was reset, if it was applied during // packing. buf_alpha2 = bli_obj_internal_scalar_buffer( c ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Invoke the function. f( diagoffb, schema_a, schema_b, m, n, k, buf_alpha1, buf_a, cs_a, pd_a, ps_a, buf_b, rs_b, pd_b, ps_b, buf_alpha2, buf_c, rs_c, cs_c, cntx, rntm, thread ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ doff_t diagoffb, \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha1, \ void* a, inc_t cs_a, dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, dim_t pd_b, inc_t ps_b, \ void* alpha2, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* thread \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ /* Alias some constants to simpler names. */ \ const dim_t MR = pd_a; \ const dim_t NR = pd_b; \ const dim_t PACKMR = cs_a; \ const dim_t PACKNR = rs_b; \ \ /* Cast the micro-kernel address to its function pointer type. */ \ /* NOTE: We use the lower-triangular gemmtrsm ukernel because, while the current macro-kernel targets the "ru" case (right-side/upper- triangular), it becomes lower-triangular after the kernel operation is transposed so that all kernel instances are of the "left" variety (since those are the only trsm ukernels that exist). */ \ PASTECH(ch,gemmtrsm_ukr_ft) \ gemmtrsm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMMTRSM_L_UKR, cntx ); \ PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the original C matrix. For example, if C is column-stored, ct will be column-stored as well. */ \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict minus_one = PASTEMAC(ch,m1); \ ctype* restrict a_cast = a; \ ctype* restrict b_cast = b; \ ctype* restrict c_cast = c; \ ctype* restrict alpha1_cast = alpha1; \ ctype* restrict alpha2_cast = alpha2; \ ctype* restrict b1; \ ctype* restrict c1; \ \ doff_t diagoffb_j; \ dim_t k_full; \ dim_t m_iter, m_left; \ dim_t n_iter, n_left; \ dim_t m_cur; \ dim_t n_cur; \ dim_t k_b0111; \ dim_t k_b01; \ dim_t off_b01; \ dim_t off_b11; \ dim_t i, j; \ inc_t rstep_a; \ inc_t cstep_b; \ inc_t rstep_c, cstep_c; \ inc_t istep_a; \ inc_t istep_b; \ inc_t off_scl; \ inc_t ss_b_num; \ inc_t ss_b_den; \ inc_t ps_b_cur; \ inc_t is_b_cur; \ auxinfo_t aux; \ \ /* Assumptions/assertions: rs_a == 1 cs_a == PACKNR pd_a == NR ps_a == stride to next micro-panel of A rs_b == PACKMR cs_b == 1 pd_b == MR ps_b == stride to next micro-panel of B rs_c == (no assumptions) cs_c == (no assumptions) Note that MR/NR and PACKMR/PACKNR have been swapped to reflect the swapping of values in the control tree (ie: those values used when packing). This swapping is needed since we cast right-hand trsm in terms of transposed left-hand trsm. So, if we're going to be transposing the operation, then A needs to be packed with NR and B needs to be packed with MR (remember: B is the triangular matrix in the right-hand side parameter case). */ \ \ /* Safety trap: Certain indexing within this macro-kernel does not work as intended if both MR and NR are odd. */ \ if ( ( bli_is_odd( PACKMR ) && bli_is_odd( NR ) ) || \ ( bli_is_odd( PACKNR ) && bli_is_odd( MR ) ) ) bli_abort(); \ \ /* If any dimension is zero, return immediately. */ \ if ( bli_zero_dim3( m, n, k ) ) return; \ \ /* Safeguard: If the current panel of B is entirely below its diagonal, it is implicitly zero. So we do nothing. */ \ if ( bli_is_strictly_below_diag_n( diagoffb, k, n ) ) return; \ \ /* Compute k_full as k inflated up to a multiple of NR. This is needed because some parameter combinations of trsm reduce k to advance past zero regions in the triangular matrix, and when computing the imaginary stride of B (the non-triangular matrix), which is used by 4m1/3m1 implementations, we need this unreduced value of k. */ \ k_full = ( k % NR != 0 ? k + NR - ( k % NR ) : k ); \ \ /* Compute indexing scaling factor for for 4m or 3m. This is needed because one of the packing register blocksizes (PACKMR or PACKNR) is used to index into the micro-panels of the non- triangular matrix when computing with a diagonal-intersecting micro-panel of the triangular matrix. In the case of 4m or 3m, real values are stored in both sub-panels, and so the indexing needs to occur in units of real values. The value computed here is divided into the complex pointer offset to cause the pointer to be advanced by the correct value. */ \ if ( bli_is_4mi_packed( schema_b ) || \ bli_is_3mi_packed( schema_b ) || \ bli_is_rih_packed( schema_b ) ) off_scl = 2; \ else off_scl = 1; \ \ /* Compute the storage stride scaling. Usually this is just 1. However, in the case of interleaved 3m, we need to scale the offset by 3/2. Note that real-only, imag-only, and summed-only packing formats are not applicable here since trsm is a two- operand operation only (unlike trmm, which is capable of three- operand). */ \ if ( bli_is_3mi_packed( schema_b ) ) { ss_b_num = 3; ss_b_den = 2; } \ else { ss_b_num = 1; ss_b_den = 1; } \ \ /* If there is a zero region to the left of where the diagonal of B intersects the top edge of the panel, adjust the pointer to C and treat this case as if the diagonal offset were zero. This skips over the region that was not packed. (Note we assume the diagonal offset is a multiple of MR; this assumption will hold as long as the cache blocksizes are each a multiple of MR and NR.) */ \ if ( diagoffb > 0 ) \ { \ j = diagoffb; \ n = n - j; \ diagoffb = 0; \ c_cast = c_cast + (j )*cs_c; \ } \ \ /* If there is a zero region below where the diagonal of B intersects the right side of the block, shrink it to prevent "no-op" iterations from executing. */ \ if ( -diagoffb + n < k ) \ { \ k = -diagoffb + n; \ } \ \ /* Check the k dimension, which needs to be a multiple of NR. If k isn't a multiple of NR, we adjust it higher to satisfy the micro- kernel, which is expecting to perform an NR x NR triangular solve. This adjustment of k is consistent with what happened when B was packed: all of its bottom/right edges were zero-padded, and furthermore, the panel that stores the bottom-right corner of the matrix has its diagonal extended into the zero-padded region (as identity). This allows the trsm of that bottom-right panel to proceed without producing any infs or NaNs that would infect the "good" values of the corresponding block of A. */ \ if ( k % NR != 0 ) k += NR - ( k % NR ); \ \ /* NOTE: We don't need to check that n is a multiple of PACKNR since we know that the underlying buffer was already allocated to have an n dimension that is a multiple of PACKNR, with the region between the last column and the next multiple of NR zero-padded accordingly. */ \ \ /* Clear the temporary C buffer in case it has any infs or NaNs. */ \ PASTEMAC(ch,set0s_mxn)( MR, NR, \ ct, rs_ct, cs_ct ); \ \ /* Compute number of primary and leftover components of the m and n dimensions. */ \ n_iter = n / NR; \ n_left = n % NR; \ \ m_iter = m / MR; \ m_left = m % MR; \ \ if ( n_left ) ++n_iter; \ if ( m_left ) ++m_iter; \ \ /* Determine some increments used to step through A, B, and C. */ \ rstep_a = ps_a; \ \ cstep_b = ps_b; \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ \ istep_a = PACKMR * k_full; \ istep_b = PACKNR * k; \ \ if ( bli_is_odd( istep_a ) ) istep_a += 1; \ if ( bli_is_odd( istep_b ) ) istep_b += 1; \ \ /* Save the pack schemas of A and B to the auxinfo_t object. NOTE: We swap the values for A and B since the triangular "A" matrix is actually contained within B. */ \ bli_auxinfo_set_schema_a( schema_b, &aux ); \ bli_auxinfo_set_schema_b( schema_a, &aux ); \ \ /* Save the imaginary stride of A to the auxinfo_t object. NOTE: We swap the values for A and B since the triangular "A" matrix is actually contained within B. */ \ bli_auxinfo_set_is_b( istep_a, &aux ); \ \ b1 = b_cast; \ c1 = c_cast; \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = 0; j < n_iter; ++j ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b01; \ ctype* restrict b11; \ ctype* restrict b2; \ \ diagoffb_j = diagoffb - ( doff_t )j*NR; \ a1 = a_cast; \ c11 = c1; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ /* If the current panel of B intersects the diagonal, use a special micro-kernel that performs a fused gemm and trsm. If the current panel of B resides above the diagonal, use a a regular gemm micro-kernel. Otherwise, if it is below the diagonal, it was not packed (because it is implicitly zero) and so we do nothing. */ \ if ( bli_intersects_diag_n( diagoffb_j, k, NR ) ) \ { \ /* Determine the offset to and length of the panel that was packed so we can index into the corresponding location in A. */ \ off_b01 = 0; \ k_b0111 = bli_min( k, -diagoffb_j + NR ); \ k_b01 = k_b0111 - NR; \ off_b11 = k_b01; \ \ /* Compute the addresses of the panel B10 and the triangular block B11. */ \ b01 = b1; \ /* b11 = b1 + ( k_b01 * PACKNR ) / off_scl; */ \ b11 = bli_ptr_inc_by_frac( b1, sizeof( ctype ), k_b01 * PACKNR, off_scl ); \ \ /* Compute the panel stride for the current micro-panel. */ \ is_b_cur = k_b0111 * PACKNR; \ is_b_cur += ( bli_is_odd( is_b_cur ) ? 1 : 0 ); \ ps_b_cur = ( is_b_cur * ss_b_num ) / ss_b_den; \ \ /* Save the 4m1/3m1 imaginary stride of B to the auxinfo_t object. NOTE: We swap the values for A and B since the triangular "A" matrix is actually contained within B. */ \ bli_auxinfo_set_is_a( is_b_cur, &aux ); \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = 0; i < m_iter; ++i ) \ { \ if( bli_trsm_my_iter( i, thread ) ){ \ \ ctype* restrict a10; \ ctype* restrict a11; \ ctype* restrict a2; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* Compute the addresses of the A10 panel and A11 block. */ \ a10 = a1 + ( off_b01 * PACKMR ) / off_scl; \ a11 = a1 + ( off_b11 * PACKMR ) / off_scl; \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = a1; \ /*if ( bli_is_last_iter( i, m_iter, 0, 1 ) ) */\ if ( i + bli_thread_num_threads(thread) >= m_iter ) \ { \ a2 = a_cast; \ b2 = b1 + ps_b_cur; \ if ( bli_is_last_iter( j, n_iter, 0, 1 ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. NOTE: We swap the values for A and B since the triangular "A" matrix is actually contained within B. */ \ bli_auxinfo_set_next_a( b2, &aux ); \ bli_auxinfo_set_next_b( a2, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the fused gemm/trsm micro-kernel. */ \ gemmtrsm_ukr \ ( \ k_b01, \ alpha1_cast, \ b01, \ b11, \ a10, \ a11, \ c11, cs_c, rs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the fused gemm/trsm micro-kernel. */ \ gemmtrsm_ukr \ ( \ k_b01, \ alpha1_cast, \ b01, \ b11, \ a10, \ a11, \ ct, cs_ct, rs_ct, \ &aux, \ cntx \ ); \ \ /* Copy the result to the bottom edge of C. */ \ PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ c11, rs_c, cs_c ); \ } \ } \ \ a1 += rstep_a; \ c11 += rstep_c; \ } \ \ b1 += ps_b_cur; \ } \ else if ( bli_is_strictly_above_diag_n( diagoffb_j, k, NR ) ) \ { \ /* Save the 4m1/3m1 imaginary stride of B to the auxinfo_t object. NOTE: We swap the values for A and B since the triangular "A" matrix is actually contained within B. */ \ bli_auxinfo_set_is_a( istep_b, &aux ); \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = 0; i < m_iter; ++i ) \ { \ if( bli_trsm_my_iter( i, thread ) ){ \ \ ctype* restrict a2; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = a1; \ /*if ( bli_is_last_iter( i, m_iter, 0, 1 ) ) */\ if ( i + bli_thread_num_threads(thread) >= m_iter ) \ { \ a2 = a_cast; \ b2 = b1 + cstep_b; \ if ( bli_is_last_iter( j, n_iter, 0, 1 ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. NOTE: We swap the values for A and B since the triangular "A" matrix is actually contained within B. */ \ bli_auxinfo_set_next_a( b2, &aux ); \ bli_auxinfo_set_next_b( a2, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ minus_one, \ b1, \ a1, \ alpha2_cast, \ c11, cs_c, rs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ minus_one, \ b1, \ a1, \ zero, \ ct, cs_ct, rs_ct, \ &aux, \ cntx \ ); \ \ /* Add the result to the edge of C. */ \ PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ alpha2_cast, \ c11, rs_c, cs_c ); \ } \ } \ \ a1 += rstep_a; \ c11 += rstep_c; \ } \ \ b1 += cstep_b; \ } \ \ c1 += cstep_c; \ } \ } INSERT_GENTFUNC_BASIC0( trsm_ru_ker_var2 ) blis-0.6.1/frame/base/000077500000000000000000000000001360743507500144655ustar00rootroot00000000000000blis-0.6.1/frame/base/bli_apool.c000066400000000000000000000427251360743507500166030ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_apool_init ( apool_t* restrict apool ) { // Query the mutex from the apool_t. bli_pthread_mutex_t* restrict mutex = bli_apool_mutex( apool ); // Initialize the mutex. //*mutex = BLIS_PTHREAD_MUTEX_INITIALIZER; bli_pthread_mutex_init( mutex, NULL ); // We choose to start with: // - an empty pool // - an initial block_ptrs_len of 8 // - a single element in each initial array_t (though this is moot with // num_blocks = 0). const siz_t num_blocks = 0; siz_t block_ptrs_len = 8; const siz_t num_elem = 1; // NOTE: Unlike in the bli_pool API, apool_t allocates block_ptrs as an // array of array_t* instead of an array of pblk_t. Why? We don't need to // track the size of each block, thus we don't need the block_size field // of pblk_t. That leaves only the void* field, and since we know apool_t // will always contain "blocks" that are really array_t structs, we can // make block_ptrs an array of array_t*. // We formally set the block_size and align_size fields of the underlying // pool, even though they won't be queried. (They are used from hard-coded // values in bli_apool_alloc_block().) const siz_t block_size = sizeof( array_t ); const siz_t align_size = 64; // Query the underlying pool_t from the apool_t. pool_t* restrict pool = bli_apool_pool( apool ); // Set the default array_t length of the apool_t. bli_apool_set_def_array_len( num_elem, apool ); // ------------------------------------------------------------------------- // Make sure that block_ptrs_len is at least num_blocks. block_ptrs_len = bli_max( block_ptrs_len, num_blocks ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_apool_init(): allocating block_ptrs (length %d): ", ( int )block_ptrs_len ); #endif // Allocate the block_ptrs array. array_t** restrict block_ptrs = bli_malloc_intl( block_ptrs_len * sizeof( array_t* ) ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_apool_init(): allocating %d array_t.\n", ( int )num_blocks ); fflush( stdout ); #endif // Allocate and initialize each entry in the block_ptrs array. for ( dim_t i = 0; i < num_blocks; ++i ) { // Pass in num_elem so the function knows how many elements to // initially have in each array_t. bli_apool_alloc_block ( num_elem, &(block_ptrs[i]) ); } // NOTE: The semantics of top_index approximate a stack, where a "full" // stack (no blocks checked out) is one where top_index == 0 and an empty // stack (all blocks checked out) one where top_index == num_blocks. // (Here, num_blocks tracks the number of blocks currently allocated as // part of the pool.) This "orientation" of the stack was chosen // intentionally, in contrast to one where top_index == -1 means the // stack is empty and top_index = num_blocks - 1 means the stack is // full. The chosen scheme allows one to conceptualize the stack as a // number line in which blocks are checked out from lowest to highest, // and additional blocks are added at the higher end. // Initialize the pool_t structure. // NOTE: We don't use the malloc_fp and free_fp fields at the apool_t // level. Nevertheless, we set them to NULL. bli_pool_set_block_ptrs( block_ptrs, pool ); bli_pool_set_block_ptrs_len( block_ptrs_len, pool ); bli_pool_set_top_index( 0, pool ); bli_pool_set_num_blocks( num_blocks, pool ); bli_pool_set_block_size( block_size, pool ); bli_pool_set_align_size( align_size, pool ); bli_pool_set_malloc_fp( NULL, pool ); bli_pool_set_free_fp( NULL, pool ); } void bli_apool_alloc_block ( siz_t num_elem, array_t** restrict array_p ) { // Since the apool_t is defined as a pool of array_t, we can hard-code // the block_size parameter. const siz_t block_size = sizeof( array_t ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_apool_alloc_block(): allocating array_t: " ); #endif // Allocate the array_t via the bli_fmalloc_align() wrapper, which performs // alignment logic and opaquely saves the original pointer so that it can // be recovered when it's time to free the block. array_t* restrict array = bli_malloc_intl( block_size ); // Initialize an array_t struct within the newly allocated memory region. bli_array_init( num_elem, sizeof( pool_t* ), array ); // Save the pointer in the caller's array_t*. *array_p = array; } void bli_apool_free_block ( array_t* restrict array ) { const siz_t num_elem = bli_array_num_elem( array ); pool_t** restrict buf = bli_array_buf( array ); // Step through the array and finalize each pool_t. for ( dim_t i = 0; i < num_elem; ++i ) { pool_t* restrict pool = buf[ i ]; #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_apool_free_block(): freeing pool_t %d within array_t.\n", ( int )i ); fflush( stdout ); #endif // Finalize and free the current pool_t, if it was created/allocated. if ( pool != NULL ) { // Finalize the pool. bli_pool_finalize( pool ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_apool_free_block(): pool_t %d: ", ( int )i ); #endif // Free the pool_t struct. bli_free_intl( pool ); } } #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_apool_free_block(): " ); #endif // Free the array buffer. bli_array_finalize( array ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_apool_free_block(): freeing array_t: " ); #endif // Free the array. bli_free_intl( array ); } void bli_apool_finalize ( apool_t* restrict apool ) { // Query the mutex from the apool_t. bli_pthread_mutex_t* restrict mutex = bli_apool_mutex( apool ); // Destroy the mutex. bli_pthread_mutex_destroy( mutex ); // Query the underlying pool_t and mutex from the apool_t. pool_t* restrict pool = bli_apool_pool( apool ); // ---------------------------------------------------------------- // Query the block_ptrs array. array_t** restrict block_ptrs = bli_pool_block_ptrs( pool ); // Query the total number of blocks currently allocated. siz_t num_blocks = bli_pool_num_blocks( pool ); // Query the top_index of the pool. siz_t top_index = bli_pool_top_index( pool ); // Sanity check: The top_index should be zero. if ( top_index != 0 ) bli_abort(); // Free the individual blocks (each an array_t) currently in the pool. for ( dim_t i = 0; i < num_blocks; ++i ) { #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_apool_finalize(): freeing array_t %d within apool_t.\n", ( int )i ); fflush( stdout ); #endif bli_apool_free_block( block_ptrs[i] ); } #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_apool_finalize(): freeing block_ptrs (length %d): ", ( int )( bli_pool_block_ptrs_len( pool ) ) ); #endif // Free the block_ptrs array. bli_free_intl( block_ptrs ); } array_t* bli_apool_checkout_array ( siz_t n_threads, apool_t* restrict apool ) { // Acquire the apool_t's mutex. bli_apool_lock( apool ); // ---------------------------------------------------------------------------- // NOTE: Unlike with the bli_pool API, we do not need to handle potential // reinitialization since the apool_t's block_size (corresponding to the // size of an array_t struct) will never grow. // If the apool_t is exhausted, add a block (e.g. an array_t). if ( bli_apool_is_exhausted( apool ) ) { #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_apool_checkout_block(): apool_t is exhausted; " "growing by 1 array_t.\n" ); fflush( stdout ); #endif bli_apool_grow( 1, apool ); } // At this point, at least one array_t is guaranteed to be available. // Query the underlying pool_t from the apool_t. pool_t* restrict pool = bli_apool_pool( apool ); // Query the block_ptrs array. array_t** restrict block_ptrs = bli_pool_block_ptrs( pool ); // Query the top_index of the pool. const siz_t top_index = bli_pool_top_index( pool ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_apool_checkout_array(): checking out array_t %d.\n", ( int )top_index ); fflush( stdout ); #endif // Select the array_t* at top_index to return to the caller. array_t* restrict array = block_ptrs[ top_index ]; // Increment the pool's top_index. bli_pool_set_top_index( top_index + 1, pool ); // ---------------------------------------------------------------------------- // Release the apool_t's mutex. bli_apool_unlock( apool ); // Resize the array_t according to the number of threads specified by the // caller. (We need one element in the array_t per thread.) bli_array_resize( n_threads, array ); // Return the selected array_t*. return array; } void bli_apool_checkin_array ( array_t* restrict array, apool_t* restrict apool ) { // Acquire the apool_t's mutex. bli_apool_lock( apool ); // Query the underlying pool_t from the apool_t. pool_t* restrict pool = bli_apool_pool( apool ); // ---------------------------------------------------------------------------- // NOTE: Unlike with the bli_pool API, we do not need to handle potential // freeing of the blocks upon checkin due to the block_size having since // changed due to reinitialization since the apool's block_size will never // change. // Query the block_ptrs array. array_t** restrict block_ptrs = bli_pool_block_ptrs( pool ); // Query the top_index of the pool. const siz_t top_index = bli_pool_top_index( pool ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_apool_checkin_block(): checking in array_t %d.\n", ( int )top_index - 1 ); fflush( stdout ); #endif // Copy the caller's array_t address to the element at top_index - 1. block_ptrs[ top_index - 1 ] = array; // Decrement the pool's top_index. bli_pool_set_top_index( top_index - 1, pool ); // ---------------------------------------------------------------------------- // Release the apool_t's mutex. bli_apool_unlock( apool ); } pool_t* bli_apool_array_elem ( siz_t index, array_t* restrict array ) { // Query the array element corresponding to index. // NOTE: If we knew that the array_t contained elements of size // sizeof( void* ) or sizeof( whatever ), we could return the *value* // stored in the array. But since array_t is general-purpose, it can't // return the element itself. So instead, bli_array_elem() returns the // address of the element in the array. Since the elements that apool_t // stores in the array_t are pool_t*, that means that the function is // actually returning the address of a pool_t*, or pool_t**, hence the // dereferencing below. pool_t** restrict pool_p = bli_array_elem( index, array ); pool_t* pool = *pool_p; // If the element is NULL, then it means a pool_t has not yet been created // and allocated for the given index (thread id). if ( pool == NULL ) { // Settle on the parameters to use when initializing the pool_t for // the current index within the array_t. const siz_t num_blocks = 1; const siz_t block_ptrs_len = 25; const siz_t align_size = 16; const siz_t offset_size = 0; malloc_ft malloc_fp = BLIS_MALLOC_POOL; free_ft free_fp = BLIS_FREE_POOL; // Each small block pool should contain blocks large enough to // accommodate any of the data structures for which they will be // used. const siz_t n_sizes = 4; siz_t sizes[4] = { sizeof( cntl_t ), sizeof( packm_params_t ), sizeof( thrcomm_t ), sizeof( thrinfo_t ) }; siz_t block_size = 0; // Find the largest of the sizes above and use that as the block_size // for the pool. for ( dim_t i = 0; i < n_sizes; ++i ) { if ( block_size < sizes[i] ) block_size = sizes[i]; } #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_apool_array_elem(): pool_t for tid %d is NULL; allocating pool_t.\n", ( int )index ); printf( "bli_apool_array_elem(): allocating pool_t: " ); #endif // Allocate the pool_t. pool = bli_malloc_intl( sizeof( pool_t ) ); // Initialize the pool_t. bli_pool_init ( num_blocks, block_ptrs_len, block_size, align_size, offset_size, malloc_fp, free_fp, pool ); // Update the array element with the address to the new pool_t. // NOTE: We pass in the address of the pool_t* since the bli_array // API is generalized for arbitrarily-sized elements, and therefore // it must always take the address of the data, rather than the // value (which it can only do if the elem size were fixed). bli_array_set_elem( &pool, index, array ); } // The array element is now guaranteed to refer to an allocated and // initialized pool_t. // Return the array element. return pool; } void bli_apool_grow ( siz_t num_blocks_add, apool_t* restrict apool ) { // If the requested increase is zero, return early. if ( num_blocks_add == 0 ) return; // Query the underlying pool_t from the apool_t. pool_t* restrict pool = bli_apool_pool( apool ); // Query the default initial array length from the apool_t. const siz_t num_elem = bli_apool_def_array_len( apool ); // ---------------------------------------------------------------------------- // Query the allocated length of the block_ptrs array and also the // total number of blocks currently allocated. const siz_t block_ptrs_len_cur = bli_pool_block_ptrs_len( pool ); const siz_t num_blocks_cur = bli_pool_num_blocks( pool ); // Compute the total number of allocated blocks that will exist // after we grow the pool. const siz_t num_blocks_new = num_blocks_cur + num_blocks_add; // If adding num_blocks_add new blocks will exceed the current capacity // of the block_ptrs array, we need to first put in place a new (larger) // array. if ( block_ptrs_len_cur < num_blocks_new ) { // To prevent this from happening often, we double the current // length of the block_ptrs array. const siz_t block_ptrs_len_new = 2 * block_ptrs_len_cur; // Query the current block_ptrs array. array_t** restrict block_ptrs_cur = bli_pool_block_ptrs( pool ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_apool_grow(): growing block_ptrs_len (%d -> %d): ", ( int )block_ptrs_len_cur, ( int )block_ptrs_len_new ); #endif // Allocate a new block_ptrs array. array_t** restrict block_ptrs_new = bli_malloc_intl( block_ptrs_len_new * sizeof( array_t* ) ); // Query the top_index of the pool. const siz_t top_index = bli_pool_top_index( pool ); // Copy the contents of the old block_ptrs array to the new/resized // array. Notice that we can begin with top_index since all entries // from 0 to top_index-1 have been (and are currently) checked out // to threads. for ( dim_t i = top_index; i < num_blocks_cur; ++i ) { block_ptrs_new[i] = block_ptrs_cur[i]; } #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_apool_grow(): freeing prev block_ptrs: " ); #endif // Free the old block_ptrs array. bli_free_intl( block_ptrs_cur ); // Update the pool_t struct with the new block_ptrs array and // record its allocated length. bli_pool_set_block_ptrs( block_ptrs_new, pool ); bli_pool_set_block_ptrs_len( block_ptrs_len_new, pool ); } // At this point, we are guaranteed to have enough unused elements // in the block_ptrs array to accommodate an additional num_blocks_add // blocks. // Query the current block_ptrs array (which was maybe just resized). array_t** restrict block_ptrs = bli_pool_block_ptrs( pool ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_apool_grow(): growing apool_t (%d -> %d).\n", ( int )num_blocks_cur, ( int )num_blocks_new ); fflush( stdout ); #endif // Allocate the requested additional blocks in the resized array. for ( dim_t i = num_blocks_cur; i < num_blocks_new; ++i ) { bli_apool_alloc_block ( num_elem, &(block_ptrs[i]) ); } // Update the pool_t struct with the new number of allocated blocks. // Notice that top_index remains unchanged, as do the block_size and // align_size fields. bli_pool_set_num_blocks( num_blocks_new, pool ); } blis-0.6.1/frame/base/bli_apool.h000066400000000000000000000067321360743507500166060ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_APOOL_H #define BLIS_APOOL_H // -- Locked pool-of-arrays type -- /* typedef struct { bli_pthread_mutex_t mutex; pool_t pool; siz_t def_array_len; } apool_t; */ // apool entry query static pool_t* bli_apool_pool( apool_t* apool ) { return &(apool->pool); } static bli_pthread_mutex_t* bli_apool_mutex( apool_t* apool ) { return &(apool->mutex); } static siz_t bli_apool_def_array_len( apool_t* pool ) { return pool->def_array_len; } static bool_t bli_apool_is_exhausted( apool_t* apool ) { pool_t* restrict pool = bli_apool_pool( apool ); return bli_pool_is_exhausted( pool ); } // apool action static void bli_apool_lock( apool_t* apool ) { bli_pthread_mutex_lock( bli_apool_mutex( apool ) ); } static void bli_apool_unlock( apool_t* apool ) { bli_pthread_mutex_unlock( bli_apool_mutex( apool ) ); } // apool entry modification static void bli_apool_set_def_array_len( siz_t def_array_len, apool_t* pool ) \ { pool->def_array_len = def_array_len; } // ----------------------------------------------------------------------------- void bli_apool_init ( apool_t* restrict apool ); void bli_apool_finalize ( apool_t* restrict apool ); array_t* bli_apool_checkout_array ( siz_t n_threads, apool_t* restrict apool ); void bli_apool_checkin_array ( array_t* restrict array, apool_t* restrict apool ); pool_t* bli_apool_array_elem ( siz_t index, array_t* restrict array ); void bli_apool_grow ( siz_t num_blocks_add, apool_t* restrict apool ); void bli_apool_alloc_block ( siz_t num_elem, array_t** restrict array_p ); void bli_apool_free_block ( array_t* restrict array ); #endif blis-0.6.1/frame/base/bli_arch.c000066400000000000000000000142451360743507500164020ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018-2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_CONFIGURETIME_CPUID #include "blis.h" #else #define BLIS_EXPORT_BLIS #include "bli_system.h" #include "bli_type_defs.h" #include "bli_arch.h" #include "bli_cpuid.h" #endif // ----------------------------------------------------------------------------- // The arch_t id for the currently running hardware. We initialize to -1, // which will be overwritten upon calling bli_arch_set_id(). static arch_t id = -1; arch_t bli_arch_query_id( void ) { bli_arch_set_id_once(); // Simply return the id that was previously cached. return id; } // ----------------------------------------------------------------------------- // A pthread structure used in pthread_once(). pthread_once() is guaranteed to // execute exactly once among all threads that pass in this control object. static bli_pthread_once_t once_id = BLIS_PTHREAD_ONCE_INIT; void bli_arch_set_id_once( void ) { #ifndef BLIS_CONFIGURETIME_CPUID bli_pthread_once( &once_id, bli_arch_set_id ); #endif } // ----------------------------------------------------------------------------- void bli_arch_set_id( void ) { // NOTE: Change this usage of getenv() to bli_env_get_var() after // merging #351. //bool_t do_logging = bli_env_get_var( "BLIS_ARCH_DEBUG", 0 ); bool_t do_logging = getenv( "BLIS_ARCH_DEBUG" ) != NULL; bli_arch_set_logging( do_logging ); // Architecture families. #if defined BLIS_FAMILY_INTEL64 || \ defined BLIS_FAMILY_AMD64 || \ defined BLIS_FAMILY_X86_64 || \ defined BLIS_FAMILY_ARM64 || \ defined BLIS_FAMILY_ARM32 id = bli_cpuid_query_id(); #endif // Intel microarchitectures. #ifdef BLIS_FAMILY_SKX id = BLIS_ARCH_SKX; #endif #ifdef BLIS_FAMILY_KNL id = BLIS_ARCH_KNL; #endif #ifdef BLIS_FAMILY_KNC id = BLIS_ARCH_KNC; #endif #ifdef BLIS_FAMILY_HASWELL id = BLIS_ARCH_HASWELL; #endif #ifdef BLIS_FAMILY_SANDYBRIDGE id = BLIS_ARCH_SANDYBRIDGE; #endif #ifdef BLIS_FAMILY_PENRYN id = BLIS_ARCH_PENRYN; #endif // AMD microarchitectures. #ifdef BLIS_FAMILY_ZEN2 id = BLIS_ARCH_ZEN2; #endif #ifdef BLIS_FAMILY_ZEN id = BLIS_ARCH_ZEN; #endif #ifdef BLIS_FAMILY_EXCAVATOR id = BLIS_ARCH_EXCAVATOR; #endif #ifdef BLIS_FAMILY_STEAMROLLER id = BLIS_ARCH_STEAMROLLER; #endif #ifdef BLIS_FAMILY_PILEDRIVER id = BLIS_ARCH_PILEDRIVER; #endif #ifdef BLIS_FAMILY_BULLDOZER id = BLIS_ARCH_BULLDOZER; #endif // ARM microarchitectures. #ifdef BLIS_FAMILY_THUNDERX2 id = BLIS_ARCH_THUNDERX2; #endif #ifdef BLIS_FAMILY_CORTEXA57 id = BLIS_ARCH_CORTEXA57; #endif #ifdef BLIS_FAMILY_CORTEXA53 id = BLIS_ARCH_CORTEXA53; #endif #ifdef BLIS_FAMILY_CORTEXA15 id = BLIS_ARCH_CORTEXA15; #endif #ifdef BLIS_FAMILY_CORTEXA9 id = BLIS_ARCH_CORTEXA9; #endif // IBM microarchitectures. #ifdef BLIS_FAMILY_POWER9 id = BLIS_ARCH_POWER9; #endif #ifdef BLIS_FAMILY_POWER7 id = BLIS_ARCH_POWER7; #endif #ifdef BLIS_FAMILY_BGQ id = BLIS_ARCH_BGQ; #endif // Generic microarchitecture. #ifdef BLIS_FAMILY_GENERIC id = BLIS_ARCH_GENERIC; #endif if ( bli_arch_get_logging() ) fprintf( stderr, "libblis: selecting sub-configuration '%s'.\n", bli_arch_string( id ) ); //printf( "blis_arch_query_id(): id = %u\n", id ); //exit(1); } // ----------------------------------------------------------------------------- // NOTE: This string array must be kept up-to-date with the arch_t // enumeration that is typedef'ed in bli_type_defs.h. That is, the // index order of each string should correspond to the implied/assigned // enum value given to the corresponding BLIS_ARCH_ value. static char* config_name[ BLIS_NUM_ARCHS ] = { "skx", "knl", "knc", "haswell", "sandybridge", "penryn", "zen2", "zen", "excavator", "steamroller", "piledriver", "bulldozer", "thunderx2", "cortexa57", "cortexa53", "cortexa15", "cortexa9", "power9", "power7", "bgq", "generic" }; char* bli_arch_string( arch_t id ) { return config_name[ id ]; } // ----------------------------------------------------------------------------- static bool_t arch_dolog = 0; void bli_arch_set_logging( bool_t dolog ) { arch_dolog = dolog; } bool_t bli_arch_get_logging( void ) { return arch_dolog; } void bli_arch_log( char* fmt, ... ) { char prefix[] = "libblis: "; int n_chars = strlen( prefix ) + strlen( fmt ) + 1; if ( bli_arch_get_logging() && fmt ) { char* prefix_fmt = malloc( n_chars ); snprintf( prefix_fmt, n_chars, "%s%s", prefix, fmt ); va_list ap; va_start( ap, fmt ); vfprintf( stderr, prefix_fmt, ap ); va_end( ap ); free( prefix_fmt ); } } blis-0.6.1/frame/base/bli_arch.h000066400000000000000000000037411360743507500164060ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_ARCH_H #define BLIS_ARCH_H BLIS_EXPORT_BLIS arch_t bli_arch_query_id( void ); void bli_arch_set_id_once( void ); void bli_arch_set_id( void ); BLIS_EXPORT_BLIS char* bli_arch_string( arch_t id ); void bli_arch_set_logging( bool_t dolog ); bool_t bli_arch_get_logging( void ); void bli_arch_log( char*, ... ); #endif blis-0.6.1/frame/base/bli_array.c000066400000000000000000000150361360743507500166020ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" //#define BLIS_ENABLE_MEM_TRACING void bli_array_init ( const siz_t num_elem, const siz_t elem_size, array_t* restrict array ) { #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_array_init(): allocating array [%d * %d]: ", ( int )num_elem, ( int )elem_size ); #endif // Compute the total size (in bytes) of the array. const size_t array_size = num_elem * elem_size; // Allocate the array buffer. void* restrict buf = bli_malloc_intl( array_size ); // Initialize the array elements to zero. THIS IS IMPORANT because // consumer threads will use the NULL-ness of the array elements to // determine if the corresponding block (data structure) needs to be // created/allocated and initialized. memset( buf, 0, array_size ); // Initialize the array_t structure. bli_array_set_buf( buf, array ); bli_array_set_num_elem( num_elem, array ); bli_array_set_elem_size( elem_size, array ); } void bli_array_resize ( const siz_t num_elem_new, array_t* restrict array ) { // Query the number of elements in the array. const siz_t num_elem_prev = bli_array_num_elem( array ); // If the new requested size (number of elements) is less than or equal to // the current size, no action is needed; return early. if ( num_elem_new <= num_elem_prev ) return; // At this point, we know that num_elem_prev < num_elem_new, which means // we need to proceed with the resizing. // Query the size of each element in the array. const siz_t elem_size = bli_array_elem_size( array ); // Compute the total size (in bytes) of the array before and after resizing. const size_t array_size_prev = num_elem_prev * elem_size; const size_t array_size_new = num_elem_new * elem_size; // Query the previous array buffer. void* restrict buf_prev = bli_array_buf( array ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_array_resize(): allocating array [%d * %d]: ", ( int )num_elem_new, ( int )elem_size ); #endif // Allocate a new array buffer. char* restrict buf_new = bli_malloc_intl( array_size_new ); // Copy the previous array contents to the new array. memcpy( buf_new, buf_prev, array_size_prev ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_array_resize(): freeing array [%d * %d]: ", ( int )num_elem_prev, ( int )elem_size ); #endif // Now that the elements have been copied over to the new buffer, we can // free the previous array buffer. bli_free_intl( buf_prev ); // Initialize the new elements' contents to zero. (Note that we advance // the new buffer address by the size of the previous array so that we // arrive at the first byte of the new segment.) memset( &buf_new[ array_size_prev ], 0, array_size_new - array_size_prev ); // Update the array_t structure. // NOTE: The array elem_size field does not need updating. bli_array_set_buf( buf_new, array ); bli_array_set_num_elem( num_elem_new, array ); } void bli_array_finalize ( array_t* restrict array ) { #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_array_finalize(): freeing buf (length %d): ", ( int )bli_array_num_elem( array ) ); #endif // Query the buffer from the array. void* restrict buf = bli_array_buf( array ); // Free the buffer. bli_free_intl( buf ); } void* bli_array_elem ( const siz_t index, array_t* restrict array ) { // Query the number of elements in the array. const siz_t num_elem = bli_array_num_elem( array ); // Sanity check: disallow access beyond the bounds of the array. if ( num_elem <= index ) bli_abort(); // Query the size of each element in the array. const siz_t elem_size = bli_array_elem_size( array ); // Query the buffer from the array, but store it as a char* so we can use // it to easily perform byte pointer arithmetic. char* restrict buf = bli_array_buf( array ); // Advance the pointer by (index * elem_size) bytes. buf += index * elem_size; // Return the address of the element computed above. return ( void* )buf; } void bli_array_set_elem ( void* restrict elem, const siz_t index, array_t* restrict array ) { // Query the size of each element in the array. const siz_t elem_size = bli_array_elem_size( array ); // Query the buffer from the array as a char*. char* restrict buf = bli_array_buf( array ); if ( elem_size == sizeof( void* ) ) { #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_array_set_elem(): elem_size is %d; setting index %d.\n", ( int )elem_size, ( int )index ); fflush( stdout ); #endif // Special case: Handle elem_size = sizeof( void* ) without calling // memcpy(). void** restrict buf_vvp = ( void** )buf; void** restrict elem_vvp = ( void** )elem; buf_vvp[ index ] = *elem_vvp; } else { // General case: Copy the elem_size bytes from elem to buf at the // element index specified by index. memcpy( &buf[ index * elem_size ], elem, ( size_t )elem_size ); } } blis-0.6.1/frame/base/bli_array.h000066400000000000000000000057601360743507500166120ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_ARRAY_H #define BLIS_ARRAY_H // -- Array type -- /* typedef struct { void* buf; siz_t num_elem; siz_t elem_size; } array_t; */ // Array entry query static void* bli_array_buf( array_t* array ) { return array->buf; } static siz_t bli_array_num_elem( array_t* array ) { return array->num_elem; } static siz_t bli_array_elem_size( array_t* array ) { return array->elem_size; } // Array entry modification static void bli_array_set_buf( void* buf, array_t* array ) \ { array->buf = buf; } static void bli_array_set_num_elem( siz_t num_elem, array_t* array ) \ { array->num_elem = num_elem; } static void bli_array_set_elem_size( siz_t elem_size, array_t* array ) \ { array->elem_size = elem_size; } // ----------------------------------------------------------------------------- void bli_array_init ( const siz_t num_elem, const siz_t elem_size, array_t* restrict array ); void bli_array_resize ( const siz_t num_elem_new, array_t* restrict array ); void bli_array_finalize ( array_t* restrict array ); void* bli_array_elem ( const siz_t index, array_t* restrict array ); void bli_array_set_elem ( void* restrict elem, const siz_t index, array_t* restrict array ); #endif blis-0.6.1/frame/base/bli_auxinfo.h000066400000000000000000000065321360743507500171430ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_AUXINFO_MACRO_DEFS_H #define BLIS_AUXINFO_MACRO_DEFS_H // auxinfo_t field query static pack_t bli_auxinfo_schema_a( auxinfo_t* ai ) { return ai->schema_a; } static pack_t bli_auxinfo_schema_b( auxinfo_t* ai ) { return ai->schema_b; } static void* bli_auxinfo_next_a( auxinfo_t* ai ) { return ai->a_next; } static void* bli_auxinfo_next_b( auxinfo_t* ai ) { return ai->b_next; } static inc_t bli_auxinfo_is_a( auxinfo_t* ai ) { return ai->is_a; } static inc_t bli_auxinfo_is_b( auxinfo_t* ai ) { return ai->is_b; } static inc_t bli_auxinfo_ps_a( auxinfo_t* ai ) { return ai->ps_a; } static inc_t bli_auxinfo_ps_b( auxinfo_t* ai ) { return ai->ps_b; } #if 0 static inc_t bli_auxinfo_dt_on_output( auxinfo_t* ai ) { return ai->dt_on_output; } #endif // auxinfo_t field modification static void bli_auxinfo_set_schema_a( pack_t schema, auxinfo_t* ai ) { ai->schema_a = schema; } static void bli_auxinfo_set_schema_b( pack_t schema, auxinfo_t* ai ) { ai->schema_b = schema; } static void bli_auxinfo_set_next_a( void* p, auxinfo_t* ai ) { ai->a_next = p; } static void bli_auxinfo_set_next_b( void* p, auxinfo_t* ai ) { ai->b_next = p; } static void bli_auxinfo_set_next_ab( void* ap, void* bp, auxinfo_t* ai ) { ai->a_next = ap; ai->b_next = bp; } static void bli_auxinfo_set_is_a( inc_t is, auxinfo_t* ai ) { ai->is_a = is; } static void bli_auxinfo_set_is_b( inc_t is, auxinfo_t* ai ) { ai->is_b = is; } static void bli_auxinfo_set_ps_a( inc_t ps, auxinfo_t* ai ) { ai->ps_a = ps; } static void bli_auxinfo_set_ps_b( inc_t ps, auxinfo_t* ai ) { ai->ps_b = ps; } #if 0 static void bli_auxinfo_set_dt_on_output( num_t dt_on_output, auxinfo_t* ai ) { ai->dt_on_output = dt_on_output; } #endif #endif blis-0.6.1/frame/base/bli_blksz.c000066400000000000000000000244351360743507500166140ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" blksz_t* bli_blksz_create_ed ( dim_t b_s, dim_t be_s, dim_t b_d, dim_t be_d, dim_t b_c, dim_t be_c, dim_t b_z, dim_t be_z ) { blksz_t* b = bli_malloc_intl( sizeof( blksz_t ) ); bli_blksz_init_ed ( b, b_s, be_s, b_d, be_d, b_c, be_c, b_z, be_z ); return b; } blksz_t* bli_blksz_create ( dim_t b_s, dim_t b_d, dim_t b_c, dim_t b_z, dim_t be_s, dim_t be_d, dim_t be_c, dim_t be_z ) { blksz_t* b = bli_malloc_intl( sizeof( blksz_t ) ); bli_blksz_init ( b, b_s, b_d, b_c, b_z, be_s, be_d, be_c, be_z ); return b; } void bli_blksz_init_ed ( blksz_t* b, dim_t b_s, dim_t be_s, dim_t b_d, dim_t be_d, dim_t b_c, dim_t be_c, dim_t b_z, dim_t be_z ) { b->v[BLIS_FLOAT] = b_s; b->v[BLIS_DOUBLE] = b_d; b->v[BLIS_SCOMPLEX] = b_c; b->v[BLIS_DCOMPLEX] = b_z; b->e[BLIS_FLOAT] = be_s; b->e[BLIS_DOUBLE] = be_d; b->e[BLIS_SCOMPLEX] = be_c; b->e[BLIS_DCOMPLEX] = be_z; } void bli_blksz_init ( blksz_t* b, dim_t b_s, dim_t b_d, dim_t b_c, dim_t b_z, dim_t be_s, dim_t be_d, dim_t be_c, dim_t be_z ) { b->v[BLIS_FLOAT] = b_s; b->v[BLIS_DOUBLE] = b_d; b->v[BLIS_SCOMPLEX] = b_c; b->v[BLIS_DCOMPLEX] = b_z; b->e[BLIS_FLOAT] = be_s; b->e[BLIS_DOUBLE] = be_d; b->e[BLIS_SCOMPLEX] = be_c; b->e[BLIS_DCOMPLEX] = be_z; } void bli_blksz_init_easy ( blksz_t* b, dim_t b_s, dim_t b_d, dim_t b_c, dim_t b_z ) { b->v[BLIS_FLOAT] = b->e[BLIS_FLOAT] = b_s; b->v[BLIS_DOUBLE] = b->e[BLIS_DOUBLE] = b_d; b->v[BLIS_SCOMPLEX] = b->e[BLIS_SCOMPLEX] = b_c; b->v[BLIS_DCOMPLEX] = b->e[BLIS_DCOMPLEX] = b_z; } void bli_blksz_free ( blksz_t* b ) { bli_free_intl( b ); } // ----------------------------------------------------------------------------- #if 0 void bli_blksz_reduce_dt_to ( num_t dt_bm, blksz_t* bmult, num_t dt_bs, blksz_t* blksz ) { dim_t blksz_def = bli_blksz_get_def( dt_bs, blksz ); dim_t blksz_max = bli_blksz_get_max( dt_bs, blksz ); dim_t bmult_val = bli_blksz_get_def( dt_bm, bmult ); // If the blocksize multiple is zero, we do nothing. if ( bmult_val == 0 ) return; // Round the default and maximum blocksize values down to their // respective nearest multiples of bmult_val. (Notice that we // ignore the "max" entry in the bmult object since that would // correspond to the packing dimension, which plays no role // as a blocksize multiple.) blksz_def = ( blksz_def / bmult_val ) * bmult_val; blksz_max = ( blksz_max / bmult_val ) * bmult_val; // Make sure the new blocksize values are at least the blocksize // multiple. if ( blksz_def == 0 ) blksz_def = bmult_val; if ( blksz_max == 0 ) blksz_max = bmult_val; // Store the new blocksizes back to the object. bli_blksz_set_def( blksz_def, dt_bs, blksz ); bli_blksz_set_max( blksz_max, dt_bs, blksz ); } #endif // ----------------------------------------------------------------------------- void bli_blksz_reduce_def_to ( num_t dt_bm, blksz_t* bmult, num_t dt_bs, blksz_t* blksz ) { dim_t blksz_def = bli_blksz_get_def( dt_bs, blksz ); dim_t bmult_val = bli_blksz_get_def( dt_bm, bmult ); // If the blocksize multiple is zero, we do nothing. if ( bmult_val == 0 ) return; // Round the default and maximum blocksize values down to their // respective nearest multiples of bmult_val. (Notice that we // ignore the "max" entry in the bmult object since that would // correspond to the packing dimension, which plays no role // as a blocksize multiple.) blksz_def = ( blksz_def / bmult_val ) * bmult_val; // Make sure the new blocksize values are at least the blocksize // multiple. if ( blksz_def == 0 ) blksz_def = bmult_val; // Store the new blocksizes back to the object. bli_blksz_set_def( blksz_def, dt_bs, blksz ); } // ----------------------------------------------------------------------------- void bli_blksz_reduce_max_to ( num_t dt_bm, blksz_t* bmult, num_t dt_bs, blksz_t* blksz ) { dim_t blksz_max = bli_blksz_get_max( dt_bs, blksz ); dim_t bmult_val = bli_blksz_get_def( dt_bm, bmult ); // If the blocksize multiple is zero, we do nothing. if ( bmult_val == 0 ) return; // Round the blocksize values down to its nearest multiple of // of bmult_val. (Notice that we ignore the "max" entry in the // bmult object since that would correspond to the packing // dimension, which plays no role as a blocksize multiple.) blksz_max = ( blksz_max / bmult_val ) * bmult_val; // Make sure the new blocksize value is at least the blocksize // multiple. if ( blksz_max == 0 ) blksz_max = bmult_val; // Store the new blocksize back to the object. bli_blksz_set_max( blksz_max, dt_bs, blksz ); } // ----------------------------------------------------------------------------- dim_t bli_determine_blocksize ( dir_t direct, dim_t i, dim_t dim, obj_t* obj, bszid_t bszid, cntx_t* cntx ) { if ( direct == BLIS_FWD ) return bli_determine_blocksize_f( i, dim, obj, bszid, cntx ); else return bli_determine_blocksize_b( i, dim, obj, bszid, cntx ); } dim_t bli_determine_blocksize_f ( dim_t i, dim_t dim, obj_t* obj, bszid_t bszid, cntx_t* cntx ) { num_t dt; blksz_t* bsize; dim_t b_alg, b_max; dim_t b_use; // Extract the execution datatype and use it to query the corresponding // blocksize and blocksize maximum values from the blksz_t object. dt = bli_obj_exec_dt( obj ); bsize = bli_cntx_get_blksz( bszid, cntx ); b_alg = bli_blksz_get_def( dt, bsize ); b_max = bli_blksz_get_max( dt, bsize ); b_use = bli_determine_blocksize_f_sub( i, dim, b_alg, b_max ); return b_use; } dim_t bli_determine_blocksize_b ( dim_t i, dim_t dim, obj_t* obj, bszid_t bszid, cntx_t* cntx ) { num_t dt; blksz_t* bsize; dim_t b_alg, b_max; dim_t b_use; // Extract the execution datatype and use it to query the corresponding // blocksize and blocksize maximum values from the blksz_t object. dt = bli_obj_exec_dt( obj ); bsize = bli_cntx_get_blksz( bszid, cntx ); b_alg = bli_blksz_get_def( dt, bsize ); b_max = bli_blksz_get_max( dt, bsize ); b_use = bli_determine_blocksize_b_sub( i, dim, b_alg, b_max ); return b_use; } dim_t bli_determine_blocksize_f_sub ( dim_t i, dim_t dim, dim_t b_alg, dim_t b_max ) { dim_t b_now; dim_t dim_left_now; // We assume that this function is being called from an algorithm that // is moving "forward" (ie: top to bottom, left to right, top-left // to bottom-right). // Compute how much of the matrix dimension is left, including the // chunk that will correspond to the blocksize we are computing now. dim_left_now = dim - i; // If the dimension currently remaining is less than the maximum // blocksize, use it instead of the default blocksize b_alg. // Otherwise, use b_alg. if ( dim_left_now <= b_max ) { b_now = dim_left_now; } else { b_now = b_alg; } return b_now; } dim_t bli_determine_blocksize_b_sub ( dim_t i, dim_t dim, dim_t b_alg, dim_t b_max ) { dim_t b_now; dim_t dim_left_now; dim_t dim_at_edge; // We assume that this function is being called from an algorithm that // is moving "backward" (ie: bottom to top, right to left, bottom-right // to top-left). // Compute how much of the matrix dimension is left, including the // chunk that will correspond to the blocksize we are computing now. dim_left_now = dim - i; // Sanity check: if dim_left_now is zero, then we can return zero // without going any further. if ( dim_left_now == 0 ) return 0; dim_at_edge = dim_left_now % b_alg; // If dim_left_now is a multiple of b_alg, we can safely return b_alg // without going any further. if ( dim_at_edge == 0 ) return b_alg; // If the dimension currently remaining is less than the maximum // blocksize, use it as the chosen blocksize. If this is not the case, // then we know dim_left_now is greater than the maximum blocksize. // To determine how much of it we should use for the current blocksize, // we inspect dim_at_edge; if it is smaller than (or equal to) b_max - // b_alg, then we use b_alg + dim_at_edge. Otherwise, dim_at_edge is // greater than b_max - b_alg, in which case we use dim_at_edge. if ( dim_left_now <= b_max ) { b_now = dim_left_now; } else // if ( dim_left_now > b_max ) { if ( dim_at_edge <= b_max - b_alg ) { b_now = b_alg + dim_at_edge; } else // if ( dim_at_edge > b_max - b_alg ) { b_now = dim_at_edge; } } return b_now; } blis-0.6.1/frame/base/bli_blksz.h000066400000000000000000000157621360743507500166240ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // blksz_t query static dim_t bli_blksz_get_def ( num_t dt, blksz_t* b ) { return b->v[ dt ]; } static dim_t bli_blksz_get_max ( num_t dt, blksz_t* b ) { return b->e[ dt ]; } // blksz_t modification static void bli_blksz_set_def ( dim_t val, num_t dt, blksz_t* b ) { b->v[ dt ] = val; } static void bli_blksz_set_max ( dim_t val, num_t dt, blksz_t* b ) { b->e[ dt ] = val; } static void bli_blksz_copy ( blksz_t* b_src, blksz_t* b_dst ) { *b_dst = *b_src; } static void bli_blksz_copy_if_pos ( blksz_t* b_src, blksz_t* b_dst ) { // Copy the blocksize values over to b_dst one-by-one so that // we can skip the ones that are non-positive. const dim_t v_s = bli_blksz_get_def( BLIS_FLOAT, b_src ); const dim_t v_d = bli_blksz_get_def( BLIS_DOUBLE, b_src ); const dim_t v_c = bli_blksz_get_def( BLIS_SCOMPLEX, b_src ); const dim_t v_z = bli_blksz_get_def( BLIS_DCOMPLEX, b_src ); const dim_t e_s = bli_blksz_get_max( BLIS_FLOAT, b_src ); const dim_t e_d = bli_blksz_get_max( BLIS_DOUBLE, b_src ); const dim_t e_c = bli_blksz_get_max( BLIS_SCOMPLEX, b_src ); const dim_t e_z = bli_blksz_get_max( BLIS_DCOMPLEX, b_src ); if ( v_s > 0 ) bli_blksz_set_def( v_s, BLIS_FLOAT, b_dst ); if ( v_d > 0 ) bli_blksz_set_def( v_d, BLIS_DOUBLE, b_dst ); if ( v_c > 0 ) bli_blksz_set_def( v_c, BLIS_SCOMPLEX, b_dst ); if ( v_z > 0 ) bli_blksz_set_def( v_z, BLIS_DCOMPLEX, b_dst ); if ( e_s > 0 ) bli_blksz_set_max( e_s, BLIS_FLOAT, b_dst ); if ( e_d > 0 ) bli_blksz_set_max( e_d, BLIS_DOUBLE, b_dst ); if ( e_c > 0 ) bli_blksz_set_max( e_c, BLIS_SCOMPLEX, b_dst ); if ( e_z > 0 ) bli_blksz_set_max( e_z, BLIS_DCOMPLEX, b_dst ); } static void bli_blksz_copy_def_dt ( num_t dt_src, blksz_t* b_src, num_t dt_dst, blksz_t* b_dst ) { const dim_t val = bli_blksz_get_def( dt_src, b_src ); bli_blksz_set_def( val, dt_dst, b_dst ); } static void bli_blksz_copy_max_dt ( num_t dt_src, blksz_t* b_src, num_t dt_dst, blksz_t* b_dst ) { const dim_t val = bli_blksz_get_max( dt_src, b_src ); bli_blksz_set_max( val, dt_dst, b_dst ); } static void bli_blksz_copy_dt ( num_t dt_src, blksz_t* b_src, num_t dt_dst, blksz_t* b_dst ) { bli_blksz_copy_def_dt( dt_src, b_src, dt_dst, b_dst ); bli_blksz_copy_max_dt( dt_src, b_src, dt_dst, b_dst ); } static void bli_blksz_scale_def ( dim_t num, dim_t den, num_t dt, blksz_t* b ) { const dim_t val = bli_blksz_get_def( dt, b ); bli_blksz_set_def( ( val * num ) / den, dt, b ); } static void bli_blksz_scale_max ( dim_t num, dim_t den, num_t dt, blksz_t* b ) { const dim_t val = bli_blksz_get_max( dt, b ); bli_blksz_set_max( ( val * num ) / den, dt, b ); } static void bli_blksz_scale_def_max ( dim_t num, dim_t den, num_t dt, blksz_t* b ) { bli_blksz_scale_def( num, den, dt, b ); bli_blksz_scale_max( num, den, dt, b ); } // ----------------------------------------------------------------------------- BLIS_EXPORT_BLIS blksz_t* bli_blksz_create_ed ( dim_t b_s, dim_t be_s, dim_t b_d, dim_t be_d, dim_t b_c, dim_t be_c, dim_t b_z, dim_t be_z ); BLIS_EXPORT_BLIS blksz_t* bli_blksz_create ( dim_t b_s, dim_t b_d, dim_t b_c, dim_t b_z, dim_t be_s, dim_t be_d, dim_t be_c, dim_t be_z ); BLIS_EXPORT_BLIS void bli_blksz_init_ed ( blksz_t* b, dim_t b_s, dim_t be_s, dim_t b_d, dim_t be_d, dim_t b_c, dim_t be_c, dim_t b_z, dim_t be_z ); BLIS_EXPORT_BLIS void bli_blksz_init ( blksz_t* b, dim_t b_s, dim_t b_d, dim_t b_c, dim_t b_z, dim_t be_s, dim_t be_d, dim_t be_c, dim_t be_z ); BLIS_EXPORT_BLIS void bli_blksz_init_easy ( blksz_t* b, dim_t b_s, dim_t b_d, dim_t b_c, dim_t b_z ); BLIS_EXPORT_BLIS void bli_blksz_free ( blksz_t* b ); // ----------------------------------------------------------------------------- #if 0 BLIS_EXPORT_BLIS void bli_blksz_reduce_dt_to ( num_t dt_bm, blksz_t* bmult, num_t dt_bs, blksz_t* blksz ); #endif void bli_blksz_reduce_def_to ( num_t dt_bm, blksz_t* bmult, num_t dt_bs, blksz_t* blksz ); void bli_blksz_reduce_max_to ( num_t dt_bm, blksz_t* bmult, num_t dt_bs, blksz_t* blksz ); // ----------------------------------------------------------------------------- dim_t bli_determine_blocksize ( dir_t direct, dim_t i, dim_t dim, obj_t* obj, bszid_t bszid, cntx_t* cntx ); dim_t bli_determine_blocksize_f ( dim_t i, dim_t dim, obj_t* obj, bszid_t bszid, cntx_t* cntx ); dim_t bli_determine_blocksize_b ( dim_t i, dim_t dim, obj_t* obj, bszid_t bszid, cntx_t* cntx ); dim_t bli_determine_blocksize_f_sub ( dim_t i, dim_t dim, dim_t b_alg, dim_t b_max ); dim_t bli_determine_blocksize_b_sub ( dim_t i, dim_t dim, dim_t b_alg, dim_t b_max ); blis-0.6.1/frame/base/bli_check.c000066400000000000000000000526431360743507500165460ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // -- General stuff ------------------------------------------------------------ err_t bli_check_error_code_helper( gint_t code, char* file, guint_t line ) { if ( code == BLIS_SUCCESS ) return code; if ( BLIS_ERROR_CODE_MAX < code && code < BLIS_ERROR_CODE_MIN ) { bli_print_msg( bli_error_string_for_code( code ), file, line ); bli_abort(); } else { bli_print_msg( bli_error_string_for_code( BLIS_UNDEFINED_ERROR_CODE ), file, line ); bli_abort(); } return code; } err_t bli_check_valid_error_level( errlev_t level ) { err_t e_val = BLIS_SUCCESS; if ( level != BLIS_NO_ERROR_CHECKING && level != BLIS_FULL_ERROR_CHECKING ) e_val = BLIS_INVALID_ERROR_CHECKING_LEVEL; return e_val; } err_t bli_check_null_pointer( void* ptr ) { err_t e_val = BLIS_SUCCESS; if ( ptr == NULL ) e_val = BLIS_NULL_POINTER; return e_val; } // -- Parameter-related checks ------------------------------------------------- err_t bli_check_valid_side( side_t side ) { err_t e_val = BLIS_SUCCESS; if ( side != BLIS_LEFT && side != BLIS_RIGHT /*&& side != BLIS_TOP && side != BLIS_BOTTOM*/ ) e_val = BLIS_INVALID_SIDE; return e_val; } err_t bli_check_valid_uplo( uplo_t uplo ) { err_t e_val = BLIS_SUCCESS; if ( !bli_is_lower( uplo ) && !bli_is_upper( uplo ) ) e_val = BLIS_INVALID_UPLO; return e_val; } err_t bli_check_valid_trans( trans_t trans ) { err_t e_val = BLIS_SUCCESS; if ( trans != BLIS_NO_TRANSPOSE && trans != BLIS_TRANSPOSE && trans != BLIS_CONJ_NO_TRANSPOSE && trans != BLIS_CONJ_TRANSPOSE ) e_val = BLIS_INVALID_TRANS; return e_val; } err_t bli_check_valid_diag( diag_t diag ) { err_t e_val = BLIS_SUCCESS; if ( diag != BLIS_NONUNIT_DIAG && diag != BLIS_UNIT_DIAG ) e_val = BLIS_INVALID_DIAG; return e_val; } err_t bli_check_nonunit_diag( obj_t* a ) { err_t e_val = BLIS_SUCCESS; if ( !bli_obj_has_nonunit_diag( a ) ) e_val = BLIS_EXPECTED_NONUNIT_DIAG; return e_val; } // -- Datatype-related checks -------------------------------------------------- err_t bli_check_valid_datatype( num_t dt ) { err_t e_val = BLIS_SUCCESS; if ( dt != BLIS_FLOAT && dt != BLIS_DOUBLE && dt != BLIS_SCOMPLEX && dt != BLIS_DCOMPLEX && dt != BLIS_INT && dt != BLIS_CONSTANT ) e_val = BLIS_INVALID_DATATYPE; return e_val; } err_t bli_check_object_valid_datatype( obj_t* a ) { err_t e_val; num_t dt; dt = bli_obj_dt( a ); e_val = bli_check_valid_datatype( dt ); return e_val; } err_t bli_check_noninteger_datatype( num_t dt ) { err_t e_val = BLIS_SUCCESS; if ( dt == BLIS_INT ) e_val = BLIS_EXPECTED_NONINTEGER_DATATYPE; return e_val; } err_t bli_check_noninteger_object( obj_t* a ) { err_t e_val; num_t dt; dt = bli_obj_dt( a ); e_val = bli_check_noninteger_datatype( dt ); return e_val; } err_t bli_check_nonconstant_datatype( num_t dt ) { err_t e_val = BLIS_SUCCESS; if ( dt == BLIS_CONSTANT ) e_val = BLIS_EXPECTED_NONCONSTANT_DATATYPE; return e_val; } err_t bli_check_nonconstant_object( obj_t* a ) { err_t e_val; num_t dt; dt = bli_obj_dt( a ); e_val = bli_check_nonconstant_datatype( dt ); return e_val; } err_t bli_check_floating_datatype( num_t dt ) { err_t e_val = BLIS_SUCCESS; if ( dt != BLIS_FLOAT && dt != BLIS_DOUBLE && dt != BLIS_SCOMPLEX && dt != BLIS_DCOMPLEX ) e_val = BLIS_EXPECTED_FLOATING_POINT_DATATYPE; return e_val; } err_t bli_check_floating_object( obj_t* a ) { err_t e_val; num_t dt; dt = bli_obj_dt( a ); e_val = bli_check_floating_datatype( dt ); return e_val; } err_t bli_check_real_datatype( num_t dt ) { err_t e_val = BLIS_SUCCESS; if ( dt != BLIS_FLOAT && dt != BLIS_DOUBLE ) e_val = BLIS_EXPECTED_REAL_DATATYPE; return e_val; } err_t bli_check_real_object( obj_t* a ) { err_t e_val; num_t dt; dt = bli_obj_dt( a ); e_val = bli_check_real_datatype( dt ); return e_val; } err_t bli_check_integer_datatype( num_t dt ) { err_t e_val = BLIS_SUCCESS; if ( dt != BLIS_INT ) e_val = BLIS_EXPECTED_INTEGER_DATATYPE; return e_val; } err_t bli_check_integer_object( obj_t* a ) { err_t e_val; num_t dt; dt = bli_obj_dt( a ); e_val = bli_check_integer_datatype( dt ); return e_val; } err_t bli_check_consistent_datatypes( num_t dt_a, num_t dt_b ) { err_t e_val = BLIS_SUCCESS; if ( dt_a != BLIS_CONSTANT && dt_b != BLIS_CONSTANT ) if ( dt_a != dt_b ) e_val = BLIS_INCONSISTENT_DATATYPES; return e_val; } err_t bli_check_consistent_object_datatypes( obj_t* a, obj_t* b ) { err_t e_val; num_t dt_a; num_t dt_b; dt_a = bli_obj_dt( a ); dt_b = bli_obj_dt( b ); e_val = bli_check_consistent_datatypes( dt_a, dt_b ); return e_val; } err_t bli_check_datatype_real_proj_of( num_t dt_c, num_t dt_r ) { err_t e_val = BLIS_SUCCESS; if ( ( dt_c == BLIS_CONSTANT && bli_is_complex( dt_r ) ) || ( dt_c == BLIS_FLOAT && dt_r != BLIS_FLOAT ) || ( dt_c == BLIS_DOUBLE && dt_r != BLIS_DOUBLE ) || ( dt_c == BLIS_SCOMPLEX && dt_r != BLIS_FLOAT ) || ( dt_c == BLIS_DCOMPLEX && dt_r != BLIS_DOUBLE ) ) e_val = BLIS_EXPECTED_REAL_PROJ_OF; return e_val; } err_t bli_check_object_real_proj_of( obj_t* c, obj_t* r ) { err_t e_val; num_t dt_c; num_t dt_r; dt_c = bli_obj_dt( c ); dt_r = bli_obj_dt( r ); e_val = bli_check_datatype_real_proj_of( dt_c, dt_r ); return e_val; } err_t bli_check_real_valued_object( obj_t* a ) { err_t e_val = BLIS_SUCCESS; double a_real; double a_imag; bli_getsc( a, &a_real, &a_imag ); if ( a_imag != 0.0 ) e_val = BLIS_EXPECTED_REAL_VALUED_OBJECT; return e_val; } err_t bli_check_consistent_precisions( num_t dt_a, num_t dt_b ) { err_t e_val = BLIS_SUCCESS; if ( dt_a == BLIS_FLOAT ) { if ( dt_b != BLIS_FLOAT && dt_b != BLIS_SCOMPLEX ) e_val = BLIS_INCONSISTENT_PRECISIONS; } else if ( dt_a == BLIS_DOUBLE ) { if ( dt_b != BLIS_DOUBLE && dt_b != BLIS_DCOMPLEX ) e_val = BLIS_INCONSISTENT_PRECISIONS; } return e_val; } err_t bli_check_consistent_object_precisions( obj_t* a, obj_t* b ) { err_t e_val; num_t dt_a; num_t dt_b; dt_a = bli_obj_dt( a ); dt_b = bli_obj_dt( b ); e_val = bli_check_consistent_precisions( dt_a, dt_b ); return e_val; } // -- Dimension-related checks ------------------------------------------------- err_t bli_check_conformal_dims( obj_t* a, obj_t* b ) { err_t e_val = BLIS_SUCCESS; dim_t m_a, n_a; dim_t m_b, n_b; m_a = bli_obj_length_after_trans( a ); n_a = bli_obj_width_after_trans( a ); m_b = bli_obj_length_after_trans( b ); n_b = bli_obj_width_after_trans( b ); if ( m_a != m_b || n_a != n_b ) e_val = BLIS_NONCONFORMAL_DIMENSIONS; return e_val; } err_t bli_check_level3_dims( obj_t* a, obj_t* b, obj_t* c ) { err_t e_val = BLIS_SUCCESS; dim_t m_c, n_c; dim_t m_a, k_a; dim_t k_b, n_b; m_c = bli_obj_length_after_trans( c ); n_c = bli_obj_width_after_trans( c ); m_a = bli_obj_length_after_trans( a ); k_a = bli_obj_width_after_trans( a ); k_b = bli_obj_length_after_trans( b ); n_b = bli_obj_width_after_trans( b ); if ( m_c != m_a || n_c != n_b || k_a != k_b ) e_val = BLIS_NONCONFORMAL_DIMENSIONS; return e_val; } err_t bli_check_scalar_object( obj_t* a ) { err_t e_val = BLIS_SUCCESS; if ( bli_obj_length( a ) < 0 || bli_obj_width( a ) < 0 ) return BLIS_NEGATIVE_DIMENSION; if ( bli_obj_length( a ) != 1 || bli_obj_width( a ) != 1 ) return BLIS_EXPECTED_SCALAR_OBJECT; return e_val; } err_t bli_check_vector_object( obj_t* a ) { err_t e_val = BLIS_SUCCESS; if ( bli_obj_length( a ) < 0 || bli_obj_width( a ) < 0 ) return BLIS_NEGATIVE_DIMENSION; if ( !bli_obj_is_vector( a ) ) return BLIS_EXPECTED_VECTOR_OBJECT; return e_val; } err_t bli_check_matrix_object( obj_t* a ) { err_t e_val = BLIS_SUCCESS; if ( bli_obj_length( a ) < 0 || bli_obj_width( a ) < 0 ) e_val = BLIS_NEGATIVE_DIMENSION; return e_val; } err_t bli_check_equal_vector_lengths( obj_t* x, obj_t* y ) { err_t e_val = BLIS_SUCCESS; dim_t dim_x; dim_t dim_y; dim_x = bli_obj_vector_dim( x ); dim_y = bli_obj_vector_dim( y ); if ( dim_x != dim_y ) e_val = BLIS_UNEQUAL_VECTOR_LENGTHS; return e_val; } err_t bli_check_square_object( obj_t* a ) { err_t e_val = BLIS_SUCCESS; if ( bli_obj_length( a ) != bli_obj_width( a ) ) e_val = BLIS_EXPECTED_SQUARE_OBJECT; return e_val; } err_t bli_check_object_length_equals( obj_t* a, dim_t m ) { err_t e_val = BLIS_SUCCESS; if ( bli_obj_length( a ) != m ) e_val = BLIS_UNEXPECTED_OBJECT_LENGTH; return e_val; } err_t bli_check_object_width_equals( obj_t* a, dim_t n ) { err_t e_val = BLIS_SUCCESS; if ( bli_obj_width( a ) != n ) e_val = BLIS_UNEXPECTED_OBJECT_WIDTH; return e_val; } err_t bli_check_vector_dim_equals( obj_t* a, dim_t n ) { err_t e_val = BLIS_SUCCESS; if ( bli_obj_vector_dim( a ) != n ) e_val = BLIS_UNEXPECTED_VECTOR_DIM; return e_val; } err_t bli_check_object_diag_offset_equals( obj_t* a, doff_t offset ) { err_t e_val = BLIS_SUCCESS; if ( offset != bli_obj_diag_offset( a ) ) e_val = BLIS_UNEXPECTED_DIAG_OFFSET; return e_val; } // -- Stride-related checks ---------------------------------------------------- err_t bli_check_matrix_strides( dim_t m, dim_t n, inc_t rs, inc_t cs, inc_t is ) { err_t e_val = BLIS_SUCCESS; // Note: A lot of thought went into designing these checks. Do NOT change // them unless you absolutely know what you are doing! Particularly, do // not try to merge the general and row-/column-major sections. It might // be possible, but it would be a lot less readable. // Prohibit negative dimensions. if ( m < 0 || n < 0 ) return BLIS_NEGATIVE_DIMENSION; // Overwrite rs and cs with the absolute value of each. We can do this // since the checks below are not dependent on the sign of the strides. rs = bli_abs( rs ); cs = bli_abs( cs ); is = bli_abs( is ); // The default case (whereby we interpret rs == cs == 0 as a request for // column-major order) is handled prior to calling this function, so the // only time we should see zero strides here is if the matrix is empty. if ( m == 0 || n == 0 ) return e_val; // Disallow row, column, or imaginary strides of zero. if ( ( rs == 0 || cs == 0 || is == 0 ) ) return BLIS_INVALID_DIM_STRIDE_COMBINATION; // Check stride consistency in cases of general stride. if ( rs != 1 && cs != 1 ) { // We apply different tests depending on which way the strides // "tilt". if ( rs == cs ) { // If rs == cs, then we must be dealing with an m-by-1 or a // 1-by-n matrix and thus at least one of the dimensions, m // or n, must be unit (even if the other is zero). if ( m != 1 && n != 1 ) return BLIS_INVALID_DIM_STRIDE_COMBINATION; } else if ( rs < cs ) { // For column-major tilt, cs must be equal or larger than m * rs. if ( m * rs > cs ) return BLIS_INVALID_DIM_STRIDE_COMBINATION; } else if ( cs < rs ) { // For row-major tilt, rs must be equal or larger than n * cs. if ( n * cs > rs ) return BLIS_INVALID_DIM_STRIDE_COMBINATION; } } else // check stride consistency of row-/column-storage cases. { if ( rs == 1 && cs == 1 ) { // If rs == cs == 1, then we must be dealing with an m-by-1, a // 1-by-n, or a 1-by-1 matrix and thus at least one of the // dimensions, m or n, must be unit (even if the other is zero). if ( m != 1 && n != 1 ) return BLIS_INVALID_DIM_STRIDE_COMBINATION; } else if ( rs == 1 ) { // For column-major storage, don't allow the column stride to be // less than the m dimension. if ( cs < m ) return BLIS_INVALID_COL_STRIDE; } else if ( cs == 1 ) { // For row-major storage, don't allow the row stride to be less // than the n dimension. if ( rs < n ) return BLIS_INVALID_ROW_STRIDE; } } return e_val; } // -- Structure-related checks ------------------------------------------------- err_t bli_check_general_object( obj_t* a ) { err_t e_val = BLIS_SUCCESS; if ( !bli_obj_is_general( a ) ) e_val = BLIS_EXPECTED_GENERAL_OBJECT; return e_val; } err_t bli_check_hermitian_object( obj_t* a ) { err_t e_val = BLIS_SUCCESS; if ( !bli_obj_is_hermitian( a ) ) e_val = BLIS_EXPECTED_HERMITIAN_OBJECT; return e_val; } err_t bli_check_symmetric_object( obj_t* a ) { err_t e_val = BLIS_SUCCESS; if ( !bli_obj_is_symmetric( a ) ) e_val = BLIS_EXPECTED_SYMMETRIC_OBJECT; return e_val; } err_t bli_check_triangular_object( obj_t* a ) { err_t e_val = BLIS_SUCCESS; if ( !bli_obj_is_triangular( a ) ) e_val = BLIS_EXPECTED_TRIANGULAR_OBJECT; return e_val; } err_t bli_check_object_struc( obj_t* a, struc_t struc ) { err_t e_val = BLIS_SUCCESS; if ( bli_is_general( struc ) ) e_val = bli_check_general_object( a ); else if ( bli_is_hermitian( struc ) ) e_val = bli_check_hermitian_object( a ); else if ( bli_is_symmetric( struc ) ) e_val = bli_check_symmetric_object( a ); else if ( bli_is_triangular( struc ) ) e_val = bli_check_triangular_object( a ); return e_val; } // -- Storage-related checks --------------------------------------------------- err_t bli_check_upper_or_lower_object( obj_t* a ) { err_t e_val = BLIS_SUCCESS; if ( !bli_obj_is_lower( a ) && !bli_obj_is_upper( a ) ) e_val = BLIS_EXPECTED_UPPER_OR_LOWER_OBJECT; return e_val; } // -- Partitioning-related checks ---------------------------------------------- err_t bli_check_valid_3x1_subpart( subpart_t part ) { err_t e_val = BLIS_SUCCESS; if ( part != BLIS_SUBPART0 && part != BLIS_SUBPART1AND0 && part != BLIS_SUBPART1 && part != BLIS_SUBPART1AND2 && part != BLIS_SUBPART2 && part != BLIS_SUBPART1A && part != BLIS_SUBPART1B ) e_val = BLIS_INVALID_3x1_SUBPART; return e_val; } err_t bli_check_valid_1x3_subpart( subpart_t part ) { err_t e_val = BLIS_SUCCESS; if ( part != BLIS_SUBPART0 && part != BLIS_SUBPART1AND0 && part != BLIS_SUBPART1 && part != BLIS_SUBPART1AND2 && part != BLIS_SUBPART2 && part != BLIS_SUBPART1A && part != BLIS_SUBPART1B ) e_val = BLIS_INVALID_1x3_SUBPART; return e_val; } err_t bli_check_valid_3x3_subpart( subpart_t part ) { err_t e_val = BLIS_SUCCESS; if ( part != BLIS_SUBPART00 && part != BLIS_SUBPART10 && part != BLIS_SUBPART20 && part != BLIS_SUBPART01 && part != BLIS_SUBPART11 && part != BLIS_SUBPART21 && part != BLIS_SUBPART02 && part != BLIS_SUBPART12 && part != BLIS_SUBPART22 ) e_val = BLIS_INVALID_3x3_SUBPART; return e_val; } // -- Control tree-related checks ---------------------------------------------- err_t bli_check_valid_cntl( void* cntl ) { err_t e_val = BLIS_SUCCESS; if ( cntl == NULL ) e_val = BLIS_UNEXPECTED_NULL_CONTROL_TREE; return e_val; } // -- Packing-related checks --------------------------------------------------- err_t bli_check_packm_schema_on_unpack( obj_t* a ) { err_t e_val = BLIS_SUCCESS; if ( bli_obj_pack_schema( a ) != BLIS_PACKED_ROWS && bli_obj_pack_schema( a ) != BLIS_PACKED_COLUMNS && bli_obj_pack_schema( a ) != BLIS_PACKED_ROW_PANELS && bli_obj_pack_schema( a ) != BLIS_PACKED_COL_PANELS ) e_val = BLIS_PACK_SCHEMA_NOT_SUPPORTED_FOR_UNPACK; return e_val; } err_t bli_check_packv_schema_on_unpack( obj_t* a ) { err_t e_val = BLIS_SUCCESS; if ( bli_obj_pack_schema( a ) != BLIS_PACKED_VECTOR ) e_val = BLIS_PACK_SCHEMA_NOT_SUPPORTED_FOR_UNPACK; return e_val; } // -- Buffer-related checks ---------------------------------------------------- err_t bli_check_object_buffer( obj_t* a ) { err_t e_val = BLIS_SUCCESS; // We are only concerned with NULL buffers in objects where BOTH // dimensions are non-zero. if ( bli_obj_buffer( a ) == NULL ) if ( bli_obj_length( a ) > 0 && bli_obj_width( a ) > 0 ) e_val = BLIS_EXPECTED_NONNULL_OBJECT_BUFFER; return e_val; } // -- Memory checks ------------------------------------------------------------ err_t bli_check_valid_malloc_buf( void* ptr ) { err_t e_val = BLIS_SUCCESS; if ( ptr == NULL ) e_val = BLIS_MALLOC_RETURNED_NULL; return e_val; } // -- Internal memory pool checks ---------------------------------------------- err_t bli_check_valid_packbuf( packbuf_t buf_type ) { err_t e_val = BLIS_SUCCESS; if ( buf_type != BLIS_BUFFER_FOR_A_BLOCK && buf_type != BLIS_BUFFER_FOR_B_PANEL && buf_type != BLIS_BUFFER_FOR_C_PANEL && buf_type != BLIS_BUFFER_FOR_GEN_USE ) e_val = BLIS_INVALID_PACKBUF; return e_val; } err_t bli_check_if_exhausted_pool( pool_t* pool ) { err_t e_val = BLIS_SUCCESS; if ( bli_pool_is_exhausted( pool ) ) e_val = BLIS_EXHAUSTED_CONTIG_MEMORY_POOL; return e_val; } err_t bli_check_sufficient_stack_buf_size( num_t dt, cntx_t* cntx ) { err_t e_val = BLIS_SUCCESS; dim_t mr = bli_cntx_get_blksz_def_dt( dt, BLIS_MR, cntx ); dim_t nr = bli_cntx_get_blksz_def_dt( dt, BLIS_NR, cntx ); siz_t dt_size = bli_dt_size( dt ); // NOTE: For induced methods, we use the size of the complex datatypes // (rather than the size of the native micro-kernels' datatype) because // the macro-kernel needs this larger micro-tile footprint, even if the // virtual micro-kernel implementation will only ever be writing to half // of it (real or imaginary part) at a time. if ( mr * nr * dt_size > BLIS_STACK_BUF_MAX_SIZE ) e_val = BLIS_INSUFFICIENT_STACK_BUF_SIZE; return e_val; } err_t bli_check_alignment_is_power_of_two( size_t align_size ) { err_t e_val = BLIS_SUCCESS; // This function returns an error code if align_size is zero or not // a power of two. if ( align_size == 0 ) e_val = BLIS_ALIGNMENT_NOT_POWER_OF_TWO; else if ( ( align_size & ( align_size - 1 ) ) ) e_val = BLIS_ALIGNMENT_NOT_POWER_OF_TWO; return e_val; } err_t bli_check_alignment_is_mult_of_ptr_size( size_t align_size ) { err_t e_val = BLIS_SUCCESS; // This function returns an error code if align_size is not a whole // multiple of the size of a pointer. if ( align_size % sizeof( void* ) != 0 ) e_val = BLIS_ALIGNMENT_NOT_MULT_OF_PTR_SIZE; return e_val; } // -- Object-related errors ---------------------------------------------------- err_t bli_check_object_alias_of( obj_t* a, obj_t* b ) { err_t e_val = BLIS_SUCCESS; if ( !bli_obj_is_alias_of( a, b ) ) e_val = BLIS_EXPECTED_OBJECT_ALIAS; return e_val; } // -- Architecture-related errors ---------------------------------------------- err_t bli_check_valid_arch_id( arch_t id ) { err_t e_val = BLIS_SUCCESS; if ( ( gint_t )id < 0 || BLIS_NUM_ARCHS <= ( gint_t )id ) e_val = BLIS_INVALID_ARCH_ID; return e_val; } // -- Architecture-related errors ---------------------------------------------- err_t bli_check_valid_mc_mod_mult( blksz_t* mc, blksz_t* mr ) { num_t dt; for ( dt = BLIS_DT_LO; dt <= BLIS_DT_HI; ++dt ) { dim_t mc_def_dt = bli_blksz_get_def( dt, mc ); dim_t mc_max_dt = bli_blksz_get_max( dt, mc ); dim_t mr_dt = bli_blksz_get_def( dt, mr ); if ( mc_def_dt % mr_dt != 0 ) return BLIS_MC_DEF_NONMULTIPLE_OF_MR; else if ( mc_max_dt % mr_dt != 0 ) return BLIS_MC_MAX_NONMULTIPLE_OF_MR; } return BLIS_SUCCESS; } err_t bli_check_valid_nc_mod_mult( blksz_t* nc, blksz_t* nr ) { num_t dt; for ( dt = BLIS_DT_LO; dt <= BLIS_DT_HI; ++dt ) { dim_t nc_def_dt = bli_blksz_get_def( dt, nc ); dim_t nc_max_dt = bli_blksz_get_max( dt, nc ); dim_t nr_dt = bli_blksz_get_def( dt, nr ); if ( nc_def_dt % nr_dt != 0 ) return BLIS_NC_DEF_NONMULTIPLE_OF_NR; else if ( nc_max_dt % nr_dt != 0 ) return BLIS_NC_MAX_NONMULTIPLE_OF_NR; } return BLIS_SUCCESS; } err_t bli_check_valid_kc_mod_mult( blksz_t* kc, blksz_t* kr ) { num_t dt; for ( dt = BLIS_DT_LO; dt <= BLIS_DT_HI; ++dt ) { dim_t kc_def_dt = bli_blksz_get_def( dt, kc ); dim_t kc_max_dt = bli_blksz_get_max( dt, kc ); dim_t kr_dt = bli_blksz_get_def( dt, kr ); if ( kc_def_dt % kr_dt != 0 ) return BLIS_KC_DEF_NONMULTIPLE_OF_KR; else if ( kc_max_dt % kr_dt != 0 ) return BLIS_KC_MAX_NONMULTIPLE_OF_KR; } return BLIS_SUCCESS; } blis-0.6.1/frame/base/bli_check.h000066400000000000000000000117531360743507500165500ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ BLIS_EXPORT_BLIS err_t bli_check_error_code_helper( gint_t code, char* file, guint_t line ); err_t bli_check_valid_error_level( errlev_t level ); err_t bli_check_null_pointer( void* ptr ); err_t bli_check_valid_side( side_t side ); err_t bli_check_valid_uplo( uplo_t uplo ); err_t bli_check_valid_trans( trans_t trans ); err_t bli_check_valid_diag( diag_t diag ); err_t bli_check_nonunit_diag( obj_t* a ); err_t bli_check_valid_datatype( num_t dt ); err_t bli_check_object_valid_datatype( obj_t* a ); err_t bli_check_noninteger_datatype( num_t dt ); err_t bli_check_noninteger_object( obj_t* a ); err_t bli_check_nonconstant_datatype( num_t dt ); err_t bli_check_nonconstant_object( obj_t* a ); err_t bli_check_floating_datatype( num_t dt ); err_t bli_check_floating_object( obj_t* a ); err_t bli_check_real_datatype( num_t dt ); err_t bli_check_real_object( obj_t* a ); err_t bli_check_integer_datatype( num_t dt ); err_t bli_check_integer_object( obj_t* a ); err_t bli_check_consistent_datatypes( num_t dt_a, num_t dt_b ); err_t bli_check_consistent_object_datatypes( obj_t* a, obj_t* b ); err_t bli_check_datatype_real_proj_of( num_t dt_c, num_t dt_r ); err_t bli_check_object_real_proj_of( obj_t* c, obj_t* r ); err_t bli_check_real_valued_object( obj_t* a ); err_t bli_check_consistent_precisions( num_t dt_a, num_t dt_b ); err_t bli_check_consistent_object_precisions( obj_t* a, obj_t* b ); err_t bli_check_conformal_dims( obj_t* a, obj_t* b ); err_t bli_check_level3_dims( obj_t* a, obj_t* b, obj_t* c ); err_t bli_check_scalar_object( obj_t* a ); err_t bli_check_vector_object( obj_t* a ); err_t bli_check_matrix_object( obj_t* a ); err_t bli_check_equal_vector_lengths( obj_t* x, obj_t* y ); err_t bli_check_square_object( obj_t* a ); err_t bli_check_object_length_equals( obj_t* a, dim_t m ); err_t bli_check_object_width_equals( obj_t* a, dim_t n ); err_t bli_check_vector_dim_equals( obj_t* a, dim_t n ); err_t bli_check_object_diag_offset_equals( obj_t* a, doff_t offset ); err_t bli_check_matrix_strides( dim_t m, dim_t n, inc_t rs, inc_t cs, inc_t is ); err_t bli_check_general_object( obj_t* a ); err_t bli_check_hermitian_object( obj_t* a ); err_t bli_check_symmetric_object( obj_t* a ); err_t bli_check_triangular_object( obj_t* a ); err_t bli_check_object_struc( obj_t* a, struc_t struc ); err_t bli_check_upper_or_lower_object( obj_t* a ); err_t bli_check_valid_3x1_subpart( subpart_t part ); err_t bli_check_valid_1x3_subpart( subpart_t part ); err_t bli_check_valid_3x3_subpart( subpart_t part ); err_t bli_check_valid_cntl( void* cntl ); err_t bli_check_packm_schema_on_unpack( obj_t* a ); err_t bli_check_packv_schema_on_unpack( obj_t* a ); err_t bli_check_object_buffer( obj_t* a ); err_t bli_check_valid_malloc_buf( void* ptr ); err_t bli_check_valid_packbuf( packbuf_t buf_type ); err_t bli_check_if_exhausted_pool( pool_t* pool ); err_t bli_check_sufficient_stack_buf_size( num_t dt, cntx_t* cntx ); err_t bli_check_alignment_is_power_of_two( size_t align_size ); err_t bli_check_alignment_is_mult_of_ptr_size( size_t align_size ); err_t bli_check_object_alias_of( obj_t* a, obj_t* b ); err_t bli_check_valid_arch_id( arch_t id ); err_t bli_check_valid_mc_mod_mult( blksz_t* mc, blksz_t* mr ); err_t bli_check_valid_nc_mod_mult( blksz_t* nc, blksz_t* nr ); err_t bli_check_valid_kc_mod_mult( blksz_t* kc, blksz_t* kr ); blis-0.6.1/frame/base/bli_clock.c000066400000000000000000000101541360743507500165530ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" static double gtod_ref_time_sec = 0.0; double bli_clock( void ) { return bli_clock_helper(); } double bli_clock_min_diff( double time_min, double time_start ) { double time_min_prev; double time_diff; // Save the old value. time_min_prev = time_min; time_diff = bli_clock() - time_start; time_min = bli_fmin( time_min, time_diff ); // Assume that anything: // - under or equal to zero, // - under a nanosecond // is actually garbled due to the clocks being taken too closely together. if ( time_min <= 0.0 ) time_min = time_min_prev; else if ( time_min < 1.0e-9 ) time_min = time_min_prev; return time_min; } #if BLIS_OS_WINDOWS // --- Begin Windows build definitions ----------------------------------------- double bli_clock_helper() { LARGE_INTEGER clock_freq = {0}; LARGE_INTEGER clock_val; BOOL r_val; r_val = QueryPerformanceFrequency( &clock_freq ); if ( r_val == 0 ) { bli_print_msg( "QueryPerformanceFrequency() failed", __FILE__, __LINE__ ); bli_abort(); } r_val = QueryPerformanceCounter( &clock_val ); if ( r_val == 0 ) { bli_print_msg( "QueryPerformanceCounter() failed", __FILE__, __LINE__ ); bli_abort(); } return ( ( double) clock_val.QuadPart / ( double) clock_freq.QuadPart ); } // --- End Windows build definitions ------------------------------------------- #elif BLIS_OS_OSX // --- Begin OSX build definitions ------------------------------------------- double bli_clock_helper() { mach_timebase_info_data_t timebase; mach_timebase_info( &timebase ); uint64_t nsec = mach_absolute_time(); double the_time = (double) nsec * 1.0e-9 * timebase.numer / timebase.denom; if ( gtod_ref_time_sec == 0.0 ) gtod_ref_time_sec = the_time; return the_time - gtod_ref_time_sec; } // --- End OSX build definitions --------------------------------------------- #else // --- Begin Linux build definitions ------------------------------------------- double bli_clock_helper() { double the_time, norm_sec; struct timespec ts; clock_gettime( CLOCK_MONOTONIC, &ts ); if ( gtod_ref_time_sec == 0.0 ) gtod_ref_time_sec = ( double ) ts.tv_sec; norm_sec = ( double ) ts.tv_sec - gtod_ref_time_sec; the_time = norm_sec + ts.tv_nsec * 1.0e-9; return the_time; } // --- End Linux build definitions --------------------------------------------- #endif blis-0.6.1/frame/base/bli_clock.h000066400000000000000000000034511360743507500165620ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ BLIS_EXPORT_BLIS double bli_clock( void ); BLIS_EXPORT_BLIS double bli_clock_min_diff( double time_min, double time_start ); double bli_clock_helper( void ); blis-0.6.1/frame/base/bli_cntl.c000066400000000000000000000256061360743507500164300ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" cntl_t* bli_cntl_create_node ( rntm_t* rntm, opid_t family, bszid_t bszid, void_fp var_func, void* params, cntl_t* sub_node ) { cntl_t* cntl; mem_t* pack_mem; #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntl_create_node(): " ); #endif // Allocate the cntl_t struct. cntl = bli_sba_acquire( rntm, sizeof( cntl_t ) ); bli_cntl_set_family( family, cntl ); bli_cntl_set_bszid( bszid, cntl ); bli_cntl_set_var_func( var_func, cntl ); bli_cntl_set_params( params, cntl ); bli_cntl_set_sub_prenode( NULL, cntl ); bli_cntl_set_sub_node( sub_node, cntl ); // Query the address of the node's packed mem_t entry so we can initialize // key fields (to NULL or 0). // NOTE: This initialization is important, since it allows threads to // discern whether blocks have been acquired from the memory allocator. pack_mem = bli_cntl_pack_mem( cntl ); bli_mem_clear( pack_mem ); return cntl; } void bli_cntl_free_node ( rntm_t* rntm, cntl_t* cntl ) { #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntl_free_node(): " ); #endif bli_sba_release( rntm, cntl ); } void bli_cntl_clear_node ( cntl_t* cntl ) { mem_t* pack_mem; // Clear various fields in the control tree. Clearing these fields // actually is not needed, but we do it for debugging/completeness. bli_cntl_set_var_func( NULL, cntl ); bli_cntl_set_params( NULL, cntl ); bli_cntl_set_sub_prenode( NULL, cntl ); bli_cntl_set_sub_node( NULL, cntl ); // Clearing these fields is potentially more important if the control // tree is cached somewhere and reused. pack_mem = bli_cntl_pack_mem( cntl ); bli_mem_clear( pack_mem ); } // ----------------------------------------------------------------------------- void bli_cntl_free ( rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { if ( thread != NULL ) bli_cntl_free_w_thrinfo( rntm, cntl, thread ); else bli_cntl_free_wo_thrinfo( rntm, cntl ); } void bli_cntl_free_w_thrinfo ( rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { // Base case: simply return when asked to free NULL nodes. if ( cntl == NULL ) return; cntl_t* cntl_sub_prenode = bli_cntl_sub_prenode( cntl ); cntl_t* cntl_sub_node = bli_cntl_sub_node( cntl ); void* cntl_params = bli_cntl_params( cntl ); mem_t* cntl_pack_mem = bli_cntl_pack_mem( cntl ); // Don't immediately dereference the prenode and subnode of the thrinfo_t // node. In some cases, the thrinfo_t tree is not built out all the way, // perhaps because there are more ways of parallelization than micropanels // of data in this dimension, or because the problem is small enough that // there is no gemm subproblem in bli_trsm_blk_var1(). Thus, we start with // NULL values for these variables and only dereference the fields of the // thrinfo_t struct if the thrinfo_t exists (ie: is non-NULL). We will also // have to check the thrinfo_t pointer for NULLness before using it below, // when checking if we need to free the pack_mem field of the cntl_t node // (see below). thrinfo_t* thread_sub_prenode = NULL; thrinfo_t* thread_sub_node = NULL; if ( thread != NULL ) { thread_sub_prenode = bli_thrinfo_sub_prenode( thread ); thread_sub_node = bli_thrinfo_sub_node( thread ); } // Only recurse into prenode branch if it exists. if ( cntl_sub_prenode != NULL ) { // Recursively free all memory associated with the sub-prenode and its // children. bli_cntl_free_w_thrinfo( rntm, cntl_sub_prenode, thread_sub_prenode ); } // Only recurse into the child node if it exists. if ( cntl_sub_node != NULL ) { // Recursively free all memory associated with the sub-node and its // children. bli_cntl_free_w_thrinfo( rntm, cntl_sub_node, thread_sub_node ); } // Free the current node's params field, if it is non-NULL. if ( cntl_params != NULL ) { #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntl_free_w_thrinfo(): " ); #endif bli_sba_release( rntm, cntl_params ); } // Release the current node's pack mem_t entry back to the memory // broker from which it originated, but only if the mem_t entry is // allocated, and only if the current thread is chief for its group. // Also note that we don't proceed with either of the above tests if // the thrinfo_t pointer is NULL. (See above for background on when // this can happen.) if ( thread != NULL ) if ( bli_thread_am_ochief( thread ) ) if ( bli_mem_is_alloc( cntl_pack_mem ) ) { #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntl_free_w_thrinfo(): releasing mem pool block.\n" ); #endif bli_membrk_release( rntm, cntl_pack_mem ); } // Free the current node. bli_cntl_free_node( rntm, cntl ); } void bli_cntl_free_wo_thrinfo ( rntm_t* rntm, cntl_t* cntl ) { // Base case: simply return when asked to free NULL nodes. if ( cntl == NULL ) return; cntl_t* cntl_sub_prenode = bli_cntl_sub_prenode( cntl ); cntl_t* cntl_sub_node = bli_cntl_sub_node( cntl ); void* cntl_params = bli_cntl_params( cntl ); mem_t* cntl_pack_mem = bli_cntl_pack_mem( cntl ); { // Recursively free all memory associated with the sub-prenode and its // children. bli_cntl_free_wo_thrinfo( rntm, cntl_sub_prenode ); } { // Recursively free all memory associated with the sub-node and its // children. bli_cntl_free_wo_thrinfo( rntm, cntl_sub_node ); } // Free the current node's params field, if it is non-NULL. if ( cntl_params != NULL ) { bli_sba_release( rntm, cntl_params ); } // Release the current node's pack mem_t entry back to the memory // broker from which it originated, but only if the mem_t entry is // allocated. if ( bli_mem_is_alloc( cntl_pack_mem ) ) { bli_membrk_release( rntm, cntl_pack_mem ); } // Free the current node. bli_cntl_free_node( rntm, cntl ); } // ----------------------------------------------------------------------------- cntl_t* bli_cntl_copy ( rntm_t* rntm, cntl_t* cntl ) { // Make a copy of the current node. Notice that the source node // should NOT have any allocated/cached mem_t entries, and that // bli_cntl_create_node() creates a node with a cleared mem_t // field. cntl_t* cntl_copy = bli_cntl_create_node ( rntm, bli_cntl_family( cntl ), bli_cntl_bszid( cntl ), bli_cntl_var_func( cntl ), NULL, NULL ); // Check the params field of the existing control tree; if it's non-NULL, // copy it. if ( bli_cntl_params( cntl ) != NULL ) { // Detect the size of the params struct by reading the first field // as a uint64_t, and then allocate this many bytes for a new params // struct. uint64_t params_size = bli_cntl_params_size( cntl ); void* params_orig = bli_cntl_params( cntl ); void* params_copy = bli_sba_acquire( rntm, ( size_t )params_size ); // Copy the original params struct to the new memory region. memcpy( params_copy, params_orig, params_size ); // Save the address of the new params struct into the new control // tree node. bli_cntl_set_params( params_copy, cntl_copy ); } // If the sub-prenode exists, copy it recursively. if ( bli_cntl_sub_prenode( cntl ) != NULL ) { cntl_t* sub_prenode_copy = bli_cntl_copy ( rntm, bli_cntl_sub_prenode( cntl ) ); // Save the address of the new sub-node (sub-tree) to the existing // node. bli_cntl_set_sub_prenode( sub_prenode_copy, cntl_copy ); } // If the sub-node exists, copy it recursively. if ( bli_cntl_sub_node( cntl ) != NULL ) { cntl_t* sub_node_copy = bli_cntl_copy ( rntm, bli_cntl_sub_node( cntl ) ); // Save the address of the new sub-node (sub-tree) to the existing // node. bli_cntl_set_sub_node( sub_node_copy, cntl_copy ); } // Return the address of the newly created node. return cntl_copy; } void bli_cntl_mark_family ( opid_t family, cntl_t* cntl ) { // This function sets the family field of all cntl tree nodes that are // children of cntl. It's used by bli_l3_cntl_create_if() after making // a copy of a user-given cntl tree, if the user provided one, to mark // the operation family, which is used to determine appropriate behavior // by various functions when executing the blocked variants. // Set the family of the root node. bli_cntl_set_family( family, cntl ); // Recursively set the family field of the sub-tree rooted at the sub-node, // if it exists. if ( bli_cntl_sub_prenode( cntl ) != NULL ) { bli_cntl_mark_family( family, bli_cntl_sub_prenode( cntl ) ); } // Recursively set the family field of the sub-tree rooted at the prenode, // if it exists. if ( bli_cntl_sub_node( cntl ) != NULL ) { bli_cntl_mark_family( family, bli_cntl_sub_node( cntl ) ); } } // ----------------------------------------------------------------------------- dim_t bli_cntl_calc_num_threads_in ( rntm_t* rntm, cntl_t* cntl ) { dim_t n_threads_in = 1; for ( ; cntl != NULL; cntl = bli_cntl_sub_node( cntl ) ) { bszid_t bszid = bli_cntl_bszid( cntl ); dim_t cur_way; // We assume bszid is in {KR,MR,NR,MC,KC,NR} if it is not // BLIS_NO_PART. if ( bszid != BLIS_NO_PART ) cur_way = bli_rntm_ways_for( bszid, rntm ); else cur_way = 1; n_threads_in *= cur_way; } return n_threads_in; } blis-0.6.1/frame/base/bli_cntl.h000066400000000000000000000124041360743507500164250ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* // -- Control tree node definition -- struct cntl_s { // Basic fields (usually required). opid_t family; bszid_t bszid; void_fp var_func; struct cntl_s* sub_prenode; struct cntl_s* sub_node; // Optional fields (needed only by some operations such as packm). // NOTE: first field of params must be a uint64_t containing the size // of the struct. void* params; // Internal fields that track "cached" data. mem_t pack_mem; }; typedef struct cntl_s cntl_t; */ // -- Control tree prototypes -- BLIS_EXPORT_BLIS cntl_t* bli_cntl_create_node ( rntm_t* rntm, opid_t family, bszid_t bszid, void_fp var_func, void* params, cntl_t* sub_node ); BLIS_EXPORT_BLIS void bli_cntl_free_node ( rntm_t* rntm, cntl_t* cntl ); BLIS_EXPORT_BLIS void bli_cntl_clear_node ( cntl_t* cntl ); // ----------------------------------------------------------------------------- BLIS_EXPORT_BLIS void bli_cntl_free ( rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ); BLIS_EXPORT_BLIS void bli_cntl_free_w_thrinfo ( rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ); BLIS_EXPORT_BLIS void bli_cntl_free_wo_thrinfo ( rntm_t* rntm, cntl_t* cntl ); BLIS_EXPORT_BLIS cntl_t* bli_cntl_copy ( rntm_t* rntm, cntl_t* cntl ); BLIS_EXPORT_BLIS void bli_cntl_mark_family ( opid_t family, cntl_t* cntl ); // ----------------------------------------------------------------------------- dim_t bli_cntl_calc_num_threads_in ( rntm_t* rntm, cntl_t* cntl ); // ----------------------------------------------------------------------------- // cntl_t query (fields only) static opid_t bli_cntl_family( cntl_t* cntl ) { return cntl->family; } static bszid_t bli_cntl_bszid( cntl_t* cntl ) { return cntl->bszid; } static void_fp bli_cntl_var_func( cntl_t* cntl ) { return cntl->var_func; } static cntl_t* bli_cntl_sub_prenode( cntl_t* cntl ) { return cntl->sub_prenode; } static cntl_t* bli_cntl_sub_node( cntl_t* cntl ) { return cntl->sub_node; } static void* bli_cntl_params( cntl_t* cntl ) { return cntl->params; } static uint64_t bli_cntl_params_size( cntl_t* cntl ) { // The first 64 bytes is always the size of the params structure. return *( ( uint64_t* )(cntl->params) ); } static mem_t* bli_cntl_pack_mem( cntl_t* cntl ) { return &(cntl->pack_mem); } // cntl_t query (complex) static bool_t bli_cntl_is_null( cntl_t* cntl ) { return ( bool_t ) ( cntl == NULL ); } static bool_t bli_cntl_is_leaf( cntl_t* cntl ) { return ( bool_t ) ( bli_cntl_sub_node( cntl ) == NULL ); } static bool_t bli_cntl_does_part( cntl_t* cntl ) { return ( bool_t ) ( bli_cntl_bszid( cntl ) != BLIS_NO_PART ); } // cntl_t modification static void bli_cntl_set_family( opid_t family, cntl_t* cntl ) { cntl->family = family; } static void bli_cntl_set_bszid( bszid_t bszid, cntl_t* cntl ) { cntl->bszid = bszid; } static void bli_cntl_set_var_func( void_fp var_func, cntl_t* cntl ) { cntl->var_func = var_func; } static void bli_cntl_set_sub_prenode( cntl_t* sub_prenode, cntl_t* cntl ) { cntl->sub_prenode = sub_prenode; } static void bli_cntl_set_sub_node( cntl_t* sub_node, cntl_t* cntl ) { cntl->sub_node = sub_node; } static void bli_cntl_set_params( void* params, cntl_t* cntl ) { cntl->params = params; } static void bli_cntl_set_pack_mem( mem_t* pack_mem, cntl_t* cntl ) { cntl->pack_mem = *pack_mem; } blis-0.6.1/frame/base/bli_cntx.c000066400000000000000000001524221360743507500164410ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_cntx_clear( cntx_t* cntx ) { // Fill the entire cntx_t structure with zeros. memset( ( void* )cntx, 0, sizeof( cntx_t ) ); } // ----------------------------------------------------------------------------- void bli_cntx_set_blkszs( ind_t method, dim_t n_bs, ... ) { // This function can be called from the bli_cntx_init_*() function for // a particular architecture if the kernel developer wishes to use // non-default blocksizes. It should be called after // bli_cntx_init_defaults() so that the context begins with default // blocksizes across all datatypes. /* Example prototypes: void bli_cntx_set_blkszs ( ind_t method = BLIS_NAT, dim_t n_bs, bszid_t bs0_id, blksz_t* blksz0, bszid_t bm0_id, bszid_t bs1_id, blksz_t* blksz1, bszid_t bm1_id, bszid_t bs2_id, blksz_t* blksz2, bszid_t bm2_id, ... cntx_t* cntx ); void bli_cntx_set_blkszs ( ind_t method != BLIS_NAT, dim_t n_bs, bszid_t bs0_id, blksz_t* blksz0, bszid_t bm0_id, dim_t def_scalr0, dim_t max_scalr0, bszid_t bs1_id, blksz_t* blksz1, bszid_t bm1_id, dim_t def_scalr1, dim_t max_scalr1, bszid_t bs2_id, blksz_t* blksz2, bszid_t bm2_id, dim_t def_scalr2, dim_t max_scalr2, ... cntx_t* cntx ); */ va_list args; dim_t i; // Allocate some temporary local arrays. #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_blkszs(): " ); #endif bszid_t* bszids = bli_malloc_intl( n_bs * sizeof( bszid_t ) ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_blkszs(): " ); #endif blksz_t** blkszs = bli_malloc_intl( n_bs * sizeof( blksz_t* ) ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_blkszs(): " ); #endif bszid_t* bmults = bli_malloc_intl( n_bs * sizeof( bszid_t ) ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_blkszs(): " ); #endif double* dsclrs = bli_malloc_intl( n_bs * sizeof( double ) ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_blkszs(): " ); #endif double* msclrs = bli_malloc_intl( n_bs * sizeof( double ) ); // -- Begin variable argument section -- // Initialize variable argument environment. va_start( args, n_bs ); // Handle native and induced method cases separately. if ( method == BLIS_NAT ) { // Process n_bs tuples. for ( i = 0; i < n_bs; ++i ) { // Here, we query the variable argument list for: // - the bszid_t of the blocksize we're about to process, // - the address of the blksz_t object, // - the bszid_t of the multiple we need to associate with // the blksz_t object. bszid_t bs_id = ( bszid_t )va_arg( args, bszid_t ); blksz_t* blksz = ( blksz_t* )va_arg( args, blksz_t* ); bszid_t bm_id = ( bszid_t )va_arg( args, bszid_t ); // Store the values in our temporary arrays. bszids[ i ] = bs_id; blkszs[ i ] = blksz; bmults[ i ] = bm_id; } } else // if induced method execution was indicated { // Process n_bs tuples. for ( i = 0; i < n_bs; ++i ) { // Here, we query the variable argument list for: // - the bszid_t of the blocksize we're about to process, // - the address of the blksz_t object, // - the bszid_t of the multiple we need to associate with // the blksz_t object, // - the scalars we wish to apply to the real blocksizes to // come up with the induced complex blocksizes (for default // and maximum blocksizes). bszid_t bs_id = ( bszid_t )va_arg( args, bszid_t ); blksz_t* blksz = ( blksz_t* )va_arg( args, blksz_t* ); bszid_t bm_id = ( bszid_t )va_arg( args, bszid_t ); double dsclr = ( double )va_arg( args, double ); double msclr = ( double )va_arg( args, double ); // Store the values in our temporary arrays. bszids[ i ] = bs_id; blkszs[ i ] = blksz; bmults[ i ] = bm_id; dsclrs[ i ] = dsclr; msclrs[ i ] = msclr; } } // The last argument should be the context pointer. cntx_t* cntx = ( cntx_t* )va_arg( args, cntx_t* ); // Shutdown variable argument environment and clean up stack. va_end( args ); // -- End variable argument section -- // Save the execution type into the context. bli_cntx_set_method( method, cntx ); // Query the context for the addresses of: // - the blocksize object array // - the blocksize multiple array blksz_t* cntx_blkszs = bli_cntx_blkszs_buf( cntx ); bszid_t* cntx_bmults = bli_cntx_bmults_buf( cntx ); // Now that we have the context address, we want to copy the values // from the temporary buffers into the corresponding buffers in the // context. Notice that the blksz_t* pointers were saved, rather than // the objects themselves, but we copy the contents of the objects // when copying into the context. // Handle native and induced method cases separately. if ( method == BLIS_NAT ) { // Process each blocksize id tuple provided. for ( i = 0; i < n_bs; ++i ) { // Read the current blocksize id, blksz_t* pointer, blocksize // multiple id, and blocksize scalar. bszid_t bs_id = bszids[ i ]; bszid_t bm_id = bmults[ i ]; blksz_t* blksz = blkszs[ i ]; blksz_t* cntx_blksz = &cntx_blkszs[ bs_id ]; // Copy the blksz_t object contents into the appropriate // location within the context's blksz_t array. Do the same // for the blocksize multiple id. //cntx_blkszs[ bs_id ] = *blksz; //bli_blksz_copy( blksz, cntx_blksz ); bli_blksz_copy_if_pos( blksz, cntx_blksz ); // Copy the blocksize multiple id into the context. cntx_bmults[ bs_id ] = bm_id; } } else { // Process each blocksize id tuple provided. for ( i = 0; i < n_bs; ++i ) { // Read the current blocksize id, blksz_t pointer, blocksize // multiple id, and blocksize scalar. bszid_t bs_id = bszids[ i ]; bszid_t bm_id = bmults[ i ]; double dsclr = dsclrs[ i ]; double msclr = msclrs[ i ]; blksz_t* blksz = blkszs[ i ]; // NOTE: This is a bug! We need to grab the actual blocksize // multiple, which is not at blkszs[i], but rather somewhere else // in the array. In order to fix this, you probably need to store // the contents of blkszs (and all the other arrays) by bs_id // rather than i in the first loop. blksz_t* bmult = blkszs[ i ]; blksz_t* cntx_blksz = &cntx_blkszs[ bs_id ]; // Copy the real domain values of the source blksz_t object into // the context, duplicating into the complex domain fields. bli_blksz_copy_dt( BLIS_FLOAT, blksz, BLIS_FLOAT, cntx_blksz ); bli_blksz_copy_dt( BLIS_DOUBLE, blksz, BLIS_DOUBLE, cntx_blksz ); bli_blksz_copy_dt( BLIS_FLOAT, blksz, BLIS_SCOMPLEX, cntx_blksz ); bli_blksz_copy_dt( BLIS_DOUBLE, blksz, BLIS_DCOMPLEX, cntx_blksz ); // If the default blocksize scalar is non-unit, we need to scale // the complex domain default blocksizes. if ( dsclr != 1.0 ) { // Scale the complex domain default blocksize values in the // blocksize object. bli_blksz_scale_def( 1, ( dim_t )dsclr, BLIS_SCOMPLEX, cntx_blksz ); bli_blksz_scale_def( 1, ( dim_t )dsclr, BLIS_DCOMPLEX, cntx_blksz ); // Perform rounding to ensure the newly scaled values are still // multiples of their register blocksize multiples. But only // perform this rounding when the blocksize id is not equal to // the blocksize multiple id (ie: we don't round down scaled // register blocksizes since they are their own multiples). // Also, we skip the rounding for 1m since it should never need // such rounding. if ( bs_id != bm_id && method != BLIS_1M ) { // Round the newly-scaled blocksizes down to their multiple. bli_blksz_reduce_def_to( BLIS_FLOAT, bmult, BLIS_SCOMPLEX, cntx_blksz ); bli_blksz_reduce_def_to( BLIS_DOUBLE, bmult, BLIS_DCOMPLEX, cntx_blksz ); } } // Similarly, if the maximum blocksize scalar is non-unit, we need // to scale the complex domain maximum blocksizes. if ( msclr != 1.0 ) { // Scale the complex domain maximum blocksize values in the // blocksize object. bli_blksz_scale_max( 1, ( dim_t )msclr, BLIS_SCOMPLEX, cntx_blksz ); bli_blksz_scale_max( 1, ( dim_t )msclr, BLIS_DCOMPLEX, cntx_blksz ); // Perform rounding to ensure the newly scaled values are still // multiples of their register blocksize multiples. But only // perform this rounding when the blocksize id is not equal to // the blocksize multiple id (ie: we don't round down scaled // register blocksizes since they are their own multiples). // Also, we skip the rounding for 1m since it should never need // such rounding. if ( bs_id != bm_id && method != BLIS_1M ) { // Round the newly-scaled blocksizes down to their multiple. bli_blksz_reduce_max_to( BLIS_FLOAT, bmult, BLIS_SCOMPLEX, cntx_blksz ); bli_blksz_reduce_max_to( BLIS_DOUBLE, bmult, BLIS_DCOMPLEX, cntx_blksz ); } } // Copy the blocksize multiple id into the context. cntx_bmults[ bs_id ] = bm_id; } } // Free the temporary local arrays. #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_blkszs(): " ); #endif bli_free_intl( blkszs ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_blkszs(): " ); #endif bli_free_intl( bszids ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_blkszs(): " ); #endif bli_free_intl( bmults ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_blkszs(): " ); #endif bli_free_intl( dsclrs ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_blkszs(): " ); #endif bli_free_intl( msclrs ); } // ----------------------------------------------------------------------------- void bli_cntx_set_ind_blkszs( ind_t method, dim_t n_bs, ... ) { /* Example prototypes: void bli_gks_cntx_set_ind_blkszs ( ind_t method != BLIS_NAT, dim_t n_bs, bszid_t bs0_id, dim_t def_scalr0, dim_t max_scalr0, bszid_t bs1_id, dim_t def_scalr1, dim_t max_scalr1, bszid_t bs2_id, dim_t def_scalr2, dim_t max_scalr2, ... cntx_t* cntx ); NOTE: This function modifies an existing context that is presumed to have been initialized for native execution. */ va_list args; dim_t i; // Return early if called with BLIS_NAT. if ( method == BLIS_NAT ) return; // Allocate some temporary local arrays. #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_ind_blkszs(): " ); #endif bszid_t* bszids = bli_malloc_intl( n_bs * sizeof( bszid_t ) ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_ind_blkszs(): " ); #endif double* dsclrs = bli_malloc_intl( n_bs * sizeof( double ) ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_ind_blkszs(): " ); #endif double* msclrs = bli_malloc_intl( n_bs * sizeof( double ) ); // -- Begin variable argument section -- // Initialize variable argument environment. va_start( args, n_bs ); { // Process n_bs tuples. for ( i = 0; i < n_bs; ++i ) { // Here, we query the variable argument list for: // - the bszid_t of the blocksize we're about to process, // - the scalars we wish to apply to the real blocksizes to // come up with the induced complex blocksizes (for default // and maximum blocksizes). bszid_t bs_id = ( bszid_t )va_arg( args, bszid_t ); double dsclr = ( double )va_arg( args, double ); double msclr = ( double )va_arg( args, double ); // Store the values in our temporary arrays. bszids[ i ] = bs_id; dsclrs[ i ] = dsclr; msclrs[ i ] = msclr; } } // The last argument should be the context pointer. cntx_t* cntx = ( cntx_t* )va_arg( args, cntx_t* ); // Shutdown variable argument environment and clean up stack. va_end( args ); // -- End variable argument section -- // Save the execution type into the context. bli_cntx_set_method( method, cntx ); // Now that we have the context address, we want to copy the values // from the temporary buffers into the corresponding buffers in the // context. { // Process each blocksize id tuple provided. for ( i = 0; i < n_bs; ++i ) { // Read the current blocksize id, blocksize multiple id, // and blocksize scalar. bszid_t bs_id = bszids[ i ]; double dsclr = dsclrs[ i ]; double msclr = msclrs[ i ]; //blksz_t* cntx_blksz = &cntx_blkszs[ bs_id ]; // Query the blocksize multiple's blocksize id. bszid_t bm_id = bli_cntx_get_bmult_id( bs_id, cntx ); // Query the context for the blksz_t object assoicated with the // current blocksize id, and also query the object corresponding // to the blocksize multiple. blksz_t* cntx_blksz = bli_cntx_get_blksz( bs_id, cntx ); blksz_t* cntx_bmult = bli_cntx_get_bmult( bs_id, cntx ); // Copy the real domain values of the blksz_t object into the // the complex domain slots of the same object. bli_blksz_copy_dt( BLIS_FLOAT, cntx_blksz, BLIS_SCOMPLEX, cntx_blksz ); bli_blksz_copy_dt( BLIS_DOUBLE, cntx_blksz, BLIS_DCOMPLEX, cntx_blksz ); // If the default blocksize scalar is non-unit, we need to scale // the complex domain default blocksizes. if ( dsclr != 1.0 ) { // Scale the complex domain default blocksize values in the // blocksize object. bli_blksz_scale_def( 1, ( dim_t )dsclr, BLIS_SCOMPLEX, cntx_blksz ); bli_blksz_scale_def( 1, ( dim_t )dsclr, BLIS_DCOMPLEX, cntx_blksz ); // Perform rounding to ensure the newly scaled values are still // multiples of their register blocksize multiples. But only // perform this rounding when the blocksize id is not equal to // the blocksize multiple id (ie: we don't round down scaled // register blocksizes since they are their own multiples). // Also, we skip the rounding for 1m since it should never need // such rounding. if ( bs_id != bm_id && method != BLIS_1M ) { // Round the newly-scaled blocksizes down to their multiple. bli_blksz_reduce_def_to( BLIS_FLOAT, cntx_bmult, BLIS_SCOMPLEX, cntx_blksz ); bli_blksz_reduce_def_to( BLIS_DOUBLE, cntx_bmult, BLIS_DCOMPLEX, cntx_blksz ); } } // Similarly, if the maximum blocksize scalar is non-unit, we need // to scale the complex domain maximum blocksizes. if ( msclr != 1.0 ) { // Scale the complex domain maximum blocksize values in the // blocksize object. bli_blksz_scale_max( 1, ( dim_t )msclr, BLIS_SCOMPLEX, cntx_blksz ); bli_blksz_scale_max( 1, ( dim_t )msclr, BLIS_DCOMPLEX, cntx_blksz ); // Perform rounding to ensure the newly scaled values are still // multiples of their register blocksize multiples. But only // perform this rounding when the blocksize id is not equal to // the blocksize multiple id (ie: we don't round down scaled // register blocksizes since they are their own multiples). // Also, we skip the rounding for 1m since it should never need // such rounding. if ( bs_id != bm_id && method != BLIS_1M ) { // Round the newly-scaled blocksizes down to their multiple. bli_blksz_reduce_max_to( BLIS_FLOAT, cntx_bmult, BLIS_SCOMPLEX, cntx_blksz ); bli_blksz_reduce_max_to( BLIS_DOUBLE, cntx_bmult, BLIS_DCOMPLEX, cntx_blksz ); } } } } // Free the temporary local arrays. #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_ind_blkszs(): " ); #endif bli_free_intl( bszids ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_ind_blkszs(): " ); #endif bli_free_intl( dsclrs ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_ind_blkszs(): " ); #endif bli_free_intl( msclrs ); } // ----------------------------------------------------------------------------- void bli_cntx_set_l3_nat_ukrs( dim_t n_ukrs, ... ) { // This function can be called from the bli_cntx_init_*() function for // a particular architecture if the kernel developer wishes to use // non-default level-3 microkernels. It should be called after // bli_cntx_init_defaults() so that the context begins with default // microkernels across all datatypes. /* Example prototypes: void bli_cntx_set_l3_nat_ukrs ( dim_t n_ukrs, l3ukr_t ukr0_id, num_t dt0, void_fp ukr0_fp, bool_t pref0, l3ukr_t ukr1_id, num_t dt1, void_fp ukr1_fp, bool_t pref1, l3ukr_t ukr2_id, num_t dt2, void_fp ukr2_fp, bool_t pref2, ... cntx_t* cntx ); */ va_list args; dim_t i; // Allocate some temporary local arrays. #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_l3_nat_ukrs(): " ); #endif l3ukr_t* ukr_ids = bli_malloc_intl( n_ukrs * sizeof( l3ukr_t ) ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_l3_nat_ukrs(): " ); #endif num_t* ukr_dts = bli_malloc_intl( n_ukrs * sizeof( num_t ) ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_l3_nat_ukrs(): " ); #endif void_fp* ukr_fps = bli_malloc_intl( n_ukrs * sizeof( void_fp ) ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_l3_nat_ukrs(): " ); #endif bool_t* ukr_prefs = bli_malloc_intl( n_ukrs * sizeof( bool_t ) ); // -- Begin variable argument section -- // Initialize variable argument environment. va_start( args, n_ukrs ); // Process n_ukrs tuples. for ( i = 0; i < n_ukrs; ++i ) { // Here, we query the variable argument list for: // - the l3ukr_t of the kernel we're about to process, // - the datatype of the kernel, // - the kernel function pointer, and // - the kernel function storage preference // that we need to store to the context. // NOTE: The type that we pass into the va_arg() macro for the ukr // preference matters. Using 'bool_t' may cause breakage on 64-bit // systems that define int as 32 bits and long int and pointers as // 64 bits. The problem is that TRUE or FALSE are defined as 1 and // 0, respectively, and when "passed" into the variadic function // they come with no contextual typecast. Thus, default rules of // argument promotion kick in to treat these integer literals as // being of type int. Thus, we need to let va_arg() treat the TRUE // or FALSE value as an int, even if we cast it to and store it // within a bool_t afterwards. const l3ukr_t ukr_id = ( l3ukr_t )va_arg( args, l3ukr_t ); const num_t ukr_dt = ( num_t )va_arg( args, num_t ); void_fp ukr_fp = ( void_fp )va_arg( args, void_fp ); const bool_t ukr_pref = ( bool_t )va_arg( args, int ); // Store the values in our temporary arrays. ukr_ids[ i ] = ukr_id; ukr_dts[ i ] = ukr_dt; ukr_fps[ i ] = ukr_fp; ukr_prefs[ i ] = ukr_pref; } // The last argument should be the context pointer. cntx_t* cntx = ( cntx_t* )va_arg( args, cntx_t* ); // Shutdown variable argument environment and clean up stack. va_end( args ); // -- End variable argument section -- // Query the context for the addresses of: // - the l3 virtual ukernel func_t array // - the l3 native ukernel func_t array // - the l3 native ukernel preferences array func_t* cntx_l3_vir_ukrs = bli_cntx_l3_vir_ukrs_buf( cntx ); func_t* cntx_l3_nat_ukrs = bli_cntx_l3_nat_ukrs_buf( cntx ); mbool_t* cntx_l3_nat_ukrs_prefs = bli_cntx_l3_nat_ukrs_prefs_buf( cntx ); // Now that we have the context address, we want to copy the values // from the temporary buffers into the corresponding buffers in the // context. // Process each blocksize id tuple provided. for ( i = 0; i < n_ukrs; ++i ) { // Read the current ukernel id, ukernel datatype, ukernel function // pointer, and ukernel preference. const l3ukr_t ukr_id = ukr_ids[ i ]; const num_t ukr_dt = ukr_dts[ i ]; void_fp ukr_fp = ukr_fps[ i ]; const bool_t ukr_pref = ukr_prefs[ i ]; // Index into the func_t and mbool_t for the current kernel id // being processed. func_t* vukrs = &cntx_l3_vir_ukrs[ ukr_id ]; func_t* ukrs = &cntx_l3_nat_ukrs[ ukr_id ]; mbool_t* prefs = &cntx_l3_nat_ukrs_prefs[ ukr_id ]; // Store the ukernel function pointer and preference values into // the context. Notice that we redundantly store the native // ukernel address in both the native and virtual ukernel slots // in the context. This is standard practice when creating a // native context. (Induced method contexts will overwrite the // virtual function pointer with the address of the appropriate // virtual ukernel.) bli_func_set_dt( ukr_fp, ukr_dt, vukrs ); bli_func_set_dt( ukr_fp, ukr_dt, ukrs ); bli_mbool_set_dt( ukr_pref, ukr_dt, prefs ); } // Free the temporary local arrays. #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_l3_nat_ukrs(): " ); #endif bli_free_intl( ukr_ids ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_l3_nat_ukrs(): " ); #endif bli_free_intl( ukr_dts ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_l3_nat_ukrs(): " ); #endif bli_free_intl( ukr_fps ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_l3_nat_ukrs(): " ); #endif bli_free_intl( ukr_prefs ); } // ----------------------------------------------------------------------------- void bli_cntx_set_l3_vir_ukrs( dim_t n_ukrs, ... ) { // This function can be called from the bli_cntx_init_*() function for // a particular architecture if the kernel developer wishes to use // non-default level-3 virtual microkernels. It should be called after // bli_cntx_init_defaults() so that the context begins with default // microkernels across all datatypes. /* Example prototypes: void bli_cntx_set_l3_vir_ukrs ( dim_t n_ukrs, l3ukr_t ukr0_id, num_t dt0, void_fp ukr0_fp, l3ukr_t ukr1_id, num_t dt1, void_fp ukr1_fp, l3ukr_t ukr2_id, num_t dt2, void_fp ukr2_fp, ... cntx_t* cntx ); */ va_list args; dim_t i; // Allocate some temporary local arrays. #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_l3_vir_ukrs(): " ); #endif l3ukr_t* ukr_ids = bli_malloc_intl( n_ukrs * sizeof( l3ukr_t ) ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_l3_vir_ukrs(): " ); #endif num_t* ukr_dts = bli_malloc_intl( n_ukrs * sizeof( num_t ) ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_l3_vir_ukrs(): " ); #endif void_fp* ukr_fps = bli_malloc_intl( n_ukrs * sizeof( void_fp ) ); // -- Begin variable argument section -- // Initialize variable argument environment. va_start( args, n_ukrs ); // Process n_ukrs tuples. for ( i = 0; i < n_ukrs; ++i ) { // Here, we query the variable argument list for: // - the l3ukr_t of the kernel we're about to process, // - the datatype of the kernel, and // - the kernel function pointer. // that we need to store to the context. const l3ukr_t ukr_id = ( l3ukr_t )va_arg( args, l3ukr_t ); const num_t ukr_dt = ( num_t )va_arg( args, num_t ); void_fp ukr_fp = ( void_fp )va_arg( args, void_fp ); // Store the values in our temporary arrays. ukr_ids[ i ] = ukr_id; ukr_dts[ i ] = ukr_dt; ukr_fps[ i ] = ukr_fp; } // The last argument should be the context pointer. cntx_t* cntx = ( cntx_t* )va_arg( args, cntx_t* ); // Shutdown variable argument environment and clean up stack. va_end( args ); // -- End variable argument section -- // Query the context for the addresses of: // - the l3 virtual ukernel func_t array func_t* cntx_l3_vir_ukrs = bli_cntx_l3_vir_ukrs_buf( cntx ); // Now that we have the context address, we want to copy the values // from the temporary buffers into the corresponding buffers in the // context. // Process each blocksize id tuple provided. for ( i = 0; i < n_ukrs; ++i ) { // Read the current ukernel id, ukernel datatype, ukernel function // pointer, and ukernel preference. const l3ukr_t ukr_id = ukr_ids[ i ]; const num_t ukr_dt = ukr_dts[ i ]; void_fp ukr_fp = ukr_fps[ i ]; // Index into the func_t and mbool_t for the current kernel id // being processed. func_t* vukrs = &cntx_l3_vir_ukrs[ ukr_id ]; // Store the ukernel function pointer and preference values into // the context. Notice that we redundantly store the native // ukernel address in both the native and virtual ukernel slots // in the context. This is standard practice when creating a // native context. (Induced method contexts will overwrite the // virtual function pointer with the address of the appropriate // virtual ukernel.) bli_func_set_dt( ukr_fp, ukr_dt, vukrs ); } // Free the temporary local arrays. #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_l3_vir_ukrs(): " ); #endif bli_free_intl( ukr_ids ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_l3_vir_ukrs(): " ); #endif bli_free_intl( ukr_dts ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_l3_vir_ukrs(): " ); #endif bli_free_intl( ukr_fps ); } // ----------------------------------------------------------------------------- void bli_cntx_set_l3_sup_thresh( dim_t n_thresh, ... ) { // This function can be called from the bli_cntx_init_*() function for // a particular architecture if the kernel developer wishes to use // non-default thresholds for small/unpacked matrix handling. It should // be called after bli_cntx_init_defaults() so that the context begins // with default thresholds. /* Example prototypes: void bli_cntx_set_l3_sup_thresh ( dim_t n_thresh, threshid_t th0_id, blksz_t* blksz0, threshid_t th1_id, blksz_t* blksz1, ... cntx_t* cntx ); */ va_list args; dim_t i; // Allocate some temporary local arrays. #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_l3_sup_thresh(): " ); #endif threshid_t* threshids = bli_malloc_intl( n_thresh * sizeof( threshid_t ) ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_l3_sup_thresh(): " ); #endif blksz_t** threshs = bli_malloc_intl( n_thresh * sizeof( blksz_t* ) ); // -- Begin variable argument section -- // Initialize variable argument environment. va_start( args, n_thresh ); // Process n_thresh tuples. for ( i = 0; i < n_thresh; ++i ) { // Here, we query the variable argument list for: // - the threshid_t of the threshold we're about to process, // - the address of the blksz_t object, threshid_t th_id = ( threshid_t )va_arg( args, threshid_t ); blksz_t* thresh = ( blksz_t* )va_arg( args, blksz_t* ); // Store the values in our temporary arrays. threshids[ i ] = th_id; threshs[ i ] = thresh; } // The last argument should be the context pointer. cntx_t* cntx = ( cntx_t* )va_arg( args, cntx_t* ); // Shutdown variable argument environment and clean up stack. va_end( args ); // -- End variable argument section -- // Query the context for the addresses of: // - the threshold array blksz_t* cntx_threshs = bli_cntx_l3_sup_thresh_buf( cntx ); // Now that we have the context address, we want to copy the values // from the temporary buffers into the corresponding buffers in the // context. Notice that the blksz_t* pointers were saved, rather than // the objects themselves, but we copy the contents of the objects // when copying into the context. // Process each blocksize id tuple provided. for ( i = 0; i < n_thresh; ++i ) { // Read the current blocksize id, blksz_t* pointer, blocksize // multiple id, and blocksize scalar. threshid_t th_id = threshids[ i ]; blksz_t* thresh = threshs[ i ]; blksz_t* cntx_thresh = &cntx_threshs[ th_id ]; // Copy the blksz_t object contents into the appropriate // location within the context's blksz_t array. //cntx_threshs[ th_id ] = *thresh; //bli_blksz_copy( thresh, cntx_thresh ); bli_blksz_copy_if_pos( thresh, cntx_thresh ); } // Free the temporary local arrays. #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_l3_sup_thresh(): " ); #endif bli_free_intl( threshs ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_l3_sup_thresh(): " ); #endif bli_free_intl( threshids ); } // ----------------------------------------------------------------------------- void bli_cntx_set_l3_sup_handlers( dim_t n_ops, ... ) { // This function can be called from the bli_cntx_init_*() function for // a particular architecture if the kernel developer wishes to use // non-default level-3 operation handler for small/unpacked matrices. It // should be called after bli_cntx_init_defaults() so that the context // begins with default sup handlers across all datatypes. /* Example prototypes: void bli_cntx_set_l3_sup_handlers ( dim_t n_ops, opid_t op0_id, void* handler0_fp, opid_t op1_id, void* handler1_fp, opid_t op2_id, void* handler2_fp, ... cntx_t* cntx ); */ va_list args; dim_t i; // Allocate some temporary local arrays. #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_l3_sup_handlers(): " ); #endif opid_t* op_ids = bli_malloc_intl( n_ops * sizeof( opid_t ) ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_l3_sup_handlers(): " ); #endif void** op_fps = bli_malloc_intl( n_ops * sizeof( void* ) ); // -- Begin variable argument section -- // Initialize variable argument environment. va_start( args, n_ops ); // Process n_ukrs tuples. for ( i = 0; i < n_ops; ++i ) { // Here, we query the variable argument list for: // - the opid_t of the operation we're about to process, // - the sup handler function pointer // that we need to store to the context. const opid_t op_id = ( opid_t )va_arg( args, opid_t ); void* op_fp = ( void* )va_arg( args, void* ); // Store the values in our temporary arrays. op_ids[ i ] = op_id; op_fps[ i ] = op_fp; } // The last argument should be the context pointer. cntx_t* cntx = ( cntx_t* )va_arg( args, cntx_t* ); // Shutdown variable argument environment and clean up stack. va_end( args ); // -- End variable argument section -- // Query the context for the addresses of: // - the l3 small/unpacked handlers array void** cntx_l3_sup_handlers = bli_cntx_l3_sup_handlers_buf( cntx ); // Now that we have the context address, we want to copy the values // from the temporary buffers into the corresponding buffers in the // context. // Process each operation id tuple provided. for ( i = 0; i < n_ops; ++i ) { // Read the current operation id and handler function pointer. const opid_t op_id = op_ids[ i ]; void* op_fp = op_fps[ i ]; // Store the sup handler function pointer into the slot for the // specified operation id. cntx_l3_sup_handlers[ op_id ] = op_fp; } // Free the temporary local arrays. #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_l3_sup_handlers(): " ); #endif bli_free_intl( op_ids ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_l3_sup_handlers(): " ); #endif bli_free_intl( op_fps ); } // ----------------------------------------------------------------------------- void bli_cntx_set_l3_sup_blkszs( dim_t n_bs, ... ) { // This function can be called from the bli_cntx_init_*() function for // a particular architecture if the kernel developer wishes to use // non-default l3 sup blocksizes. It should be called after // bli_cntx_init_defaults() so that the context begins with default // blocksizes across all datatypes. /* Example prototypes: void bli_cntx_set_blkszs ( dim_t n_bs, bszid_t bs0_id, blksz_t* blksz0, bszid_t bs1_id, blksz_t* blksz1, bszid_t bs2_id, blksz_t* blksz2, ... cntx_t* cntx ); */ va_list args; dim_t i; // Allocate some temporary local arrays. #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_blkszs(): " ); #endif bszid_t* bszids = bli_malloc_intl( n_bs * sizeof( bszid_t ) ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_blkszs(): " ); #endif blksz_t** blkszs = bli_malloc_intl( n_bs * sizeof( blksz_t* ) ); // -- Begin variable argument section -- // Initialize variable argument environment. va_start( args, n_bs ); // Process n_bs tuples. for ( i = 0; i < n_bs; ++i ) { // Here, we query the variable argument list for: // - the bszid_t of the blocksize we're about to process, // - the address of the blksz_t object. bszid_t bs_id = ( bszid_t )va_arg( args, bszid_t ); blksz_t* blksz = ( blksz_t* )va_arg( args, blksz_t* ); // Store the values in our temporary arrays. bszids[ i ] = bs_id; blkszs[ i ] = blksz; } // The last argument should be the context pointer. cntx_t* cntx = ( cntx_t* )va_arg( args, cntx_t* ); // Shutdown variable argument environment and clean up stack. va_end( args ); // -- End variable argument section -- // Query the context for the addresses of: // - the blocksize object array blksz_t* cntx_l3_sup_blkszs = bli_cntx_l3_sup_blkszs_buf( cntx ); // Now that we have the context address, we want to copy the values // from the temporary buffers into the corresponding buffers in the // context. Notice that the blksz_t* pointers were saved, rather than // the objects themselves, but we copy the contents of the objects // when copying into the context. // Process each blocksize id tuple provided. for ( i = 0; i < n_bs; ++i ) { // Read the current blocksize id, blksz_t* pointer, blocksize // multiple id, and blocksize scalar. bszid_t bs_id = bszids[ i ]; blksz_t* blksz = blkszs[ i ]; blksz_t* cntx_l3_sup_blksz = &cntx_l3_sup_blkszs[ bs_id ]; // Copy the blksz_t object contents into the appropriate // location within the context's blksz_t array. //cntx_l3_sup_blkszs[ bs_id ] = *blksz; //bli_blksz_copy( blksz, cntx_l3_sup_blksz ); bli_blksz_copy_if_pos( blksz, cntx_l3_sup_blksz ); } // Free the temporary local arrays. #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_blkszs(): " ); #endif bli_free_intl( blkszs ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_blkszs(): " ); #endif bli_free_intl( bszids ); } // ----------------------------------------------------------------------------- void bli_cntx_set_l3_sup_kers( dim_t n_ukrs, ... ) { // This function can be called from the bli_cntx_init_*() function for // a particular architecture if the kernel developer wishes to use // non-default level-3 microkernels for small/unpacked matrices. It // should be called after bli_cntx_init_defaults() so that the context // begins with default sup micro/millikernels across all datatypes. /* Example prototypes: void bli_cntx_set_l3_sup_kers ( dim_t n_ukrs, stor3_t stor_id0, num_t dt0, void* ukr0_fp, bool_t pref0, stor3_t stor_id1, num_t dt1, void* ukr1_fp, bool_t pref1, stor3_t stor_id2, num_t dt2, void* ukr2_fp, bool_t pref2, ... cntx_t* cntx ); */ va_list args; dim_t i; // Allocate some temporary local arrays. #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_l3_sup_kers(): " ); #endif stor3_t* st3_ids = bli_malloc_intl( n_ukrs * sizeof( stor3_t ) ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_l3_sup_kers(): " ); #endif num_t* ukr_dts = bli_malloc_intl( n_ukrs * sizeof( num_t ) ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_l3_sup_kers(): " ); #endif void** ukr_fps = bli_malloc_intl( n_ukrs * sizeof( void* ) ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_l3_sup_kers(): " ); #endif bool_t* ukr_prefs = bli_malloc_intl( n_ukrs * sizeof( bool_t ) ); // -- Begin variable argument section -- // Initialize variable argument environment. va_start( args, n_ukrs ); // Process n_ukrs tuples. for ( i = 0; i < n_ukrs; ++i ) { // Here, we query the variable argument list for: // - the stor3_t storage case being assigned to the kernel we're // about to process, // - the datatype of the kernel, // - the kernel function pointer, and // - the kernel function storage preference // that we need to store to the context. const stor3_t st3_id = ( stor3_t )va_arg( args, stor3_t ); const num_t ukr_dt = ( num_t )va_arg( args, num_t ); void* ukr_fp = ( void* )va_arg( args, void* ); const bool_t ukr_pref = ( bool_t )va_arg( args, int ); // Store the values in our temporary arrays. st3_ids[ i ] = st3_id; ukr_dts[ i ] = ukr_dt; ukr_fps[ i ] = ukr_fp; ukr_prefs[ i ] = ukr_pref; } // The last argument should be the context pointer. cntx_t* cntx = ( cntx_t* )va_arg( args, cntx_t* ); // Shutdown variable argument environment and clean up stack. va_end( args ); // -- End variable argument section -- // Query the context for the addresses of: // - the l3 small/unpacked ukernel func_t array // - the l3 small/unpacked ukernel preferences array func_t* cntx_l3_sup_kers = bli_cntx_l3_sup_kers_buf( cntx ); mbool_t* cntx_l3_sup_kers_prefs = bli_cntx_l3_sup_kers_prefs_buf( cntx ); // Now that we have the context address, we want to copy the values // from the temporary buffers into the corresponding buffers in the // context. #if 0 dim_t sup_map[ BLIS_NUM_LEVEL3_SUP_UKRS ][2]; // Create the small/unpacked ukernel mappings: // - rv -> rrr 0, rcr 2 // - rg -> rrc 1, rcc 3 // - cv -> ccr 6, ccc 7 // - cg -> crr 4, crc 5 // - rd -> rrc 1 // - cd -> crc 5 // - rc -> rcc 3 // - cr -> crr 4 // - gx -> xxx 8 // NOTE: We only need to set one slot in the context l3_sup_kers array // for the general-stride/generic ukernel type, but since the loop below // needs to be set up to set two slots to accommodate the RV, RG, CV, and // CG, ukernel types, we will just be okay with the GX ukernel being set // redundantly. (The RD, CD, CR, and RC ukernel types are set redundantly // for the same reason.) sup_map[ BLIS_GEMMSUP_RV_UKR ][0] = BLIS_RRR; sup_map[ BLIS_GEMMSUP_RV_UKR ][1] = BLIS_RCR; sup_map[ BLIS_GEMMSUP_RG_UKR ][0] = BLIS_RRC; sup_map[ BLIS_GEMMSUP_RG_UKR ][1] = BLIS_RCC; sup_map[ BLIS_GEMMSUP_CV_UKR ][0] = BLIS_CCR; sup_map[ BLIS_GEMMSUP_CV_UKR ][1] = BLIS_CCC; sup_map[ BLIS_GEMMSUP_CG_UKR ][0] = BLIS_CRR; sup_map[ BLIS_GEMMSUP_CG_UKR ][1] = BLIS_CRC; sup_map[ BLIS_GEMMSUP_RD_UKR ][0] = BLIS_RRC; sup_map[ BLIS_GEMMSUP_RD_UKR ][1] = BLIS_RRC; sup_map[ BLIS_GEMMSUP_CD_UKR ][0] = BLIS_CRC; sup_map[ BLIS_GEMMSUP_CD_UKR ][1] = BLIS_CRC; sup_map[ BLIS_GEMMSUP_RC_UKR ][0] = BLIS_RCC; sup_map[ BLIS_GEMMSUP_RC_UKR ][1] = BLIS_RCC; sup_map[ BLIS_GEMMSUP_CR_UKR ][0] = BLIS_CRR; sup_map[ BLIS_GEMMSUP_CR_UKR ][1] = BLIS_CRR; sup_map[ BLIS_GEMMSUP_GX_UKR ][0] = BLIS_XXX; sup_map[ BLIS_GEMMSUP_GX_UKR ][1] = BLIS_XXX; #endif // Process each blocksize id tuple provided. for ( i = 0; i < n_ukrs; ++i ) { // Read the current stor3_t id, ukernel datatype, ukernel function // pointer, and ukernel preference. const stor3_t st3_id = st3_ids[ i ]; const num_t ukr_dt = ukr_dts[ i ]; void* ukr_fp = ukr_fps[ i ]; const bool_t ukr_pref = ukr_prefs[ i ]; // Index to the func_t and mbool_t for the current stor3_t id // being processed. func_t* ukrs = &cntx_l3_sup_kers[ st3_id ]; mbool_t* prefs = &cntx_l3_sup_kers_prefs[ st3_id ]; // Store the ukernel function pointer and preference values into // the stor3_t location in the context. bli_func_set_dt( ukr_fp, ukr_dt, ukrs ); bli_mbool_set_dt( ukr_pref, ukr_dt, prefs ); } // Free the temporary local arrays. #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_l3_sup_kers(): " ); #endif bli_free_intl( st3_ids ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_l3_sup_kers(): " ); #endif bli_free_intl( ukr_dts ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_l3_sup_kers(): " ); #endif bli_free_intl( ukr_fps ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_l3_sup_kers(): " ); #endif bli_free_intl( ukr_prefs ); } // ----------------------------------------------------------------------------- void bli_cntx_set_l1f_kers( dim_t n_kers, ... ) { // This function can be called from the bli_cntx_init_*() function for // a particular architecture if the kernel developer wishes to use // non-default level-1f kernels. It should be called after // bli_cntx_init_defaults() so that the context begins with default l1f // kernels across all datatypes. /* Example prototypes: void bli_cntx_set_l1f_kers ( dim_t n_ukrs, l1fkr_t ker0_id, num_t ker0_dt, void_fp ker0_fp, l1fkr_t ker1_id, num_t ker1_dt, void_fp ker1_fp, l1fkr_t ker2_id, num_t ker2_dt, void_fp ker2_fp, ... cntx_t* cntx ); */ va_list args; dim_t i; // Allocate some temporary local arrays. #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_l1f_kers(): " ); #endif l1fkr_t* ker_ids = bli_malloc_intl( n_kers * sizeof( l1fkr_t ) ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_l1f_kers(): " ); #endif num_t* ker_dts = bli_malloc_intl( n_kers * sizeof( num_t ) ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_l1f_kers(): " ); #endif void_fp* ker_fps = bli_malloc_intl( n_kers * sizeof( void_fp ) ); // -- Begin variable argument section -- // Initialize variable argument environment. va_start( args, n_kers ); // Process n_kers tuples. for ( i = 0; i < n_kers; ++i ) { // Here, we query the variable argument list for: // - the l1fkr_t of the kernel we're about to process, // - the datatype of the kernel, and // - the kernel function pointer // that we need to store to the context. const l1fkr_t ker_id = ( l1fkr_t )va_arg( args, l1fkr_t ); const num_t ker_dt = ( num_t )va_arg( args, num_t ); void_fp ker_fp = ( void_fp )va_arg( args, void_fp ); // Store the values in our temporary arrays. ker_ids[ i ] = ker_id; ker_dts[ i ] = ker_dt; ker_fps[ i ] = ker_fp; } // The last argument should be the context pointer. cntx_t* cntx = ( cntx_t* )va_arg( args, cntx_t* ); // Shutdown variable argument environment and clean up stack. va_end( args ); // -- End variable argument section -- // Query the context for the address of: // - the level-1f kernels func_t array func_t* cntx_l1f_kers = bli_cntx_l1f_kers_buf( cntx ); // Now that we have the context address, we want to copy the values // from the temporary buffers into the corresponding buffers in the // context. // Process each blocksize id tuple provided. for ( i = 0; i < n_kers; ++i ) { // Read the current kernel id, kernel datatype, and kernel function // pointer. const l1fkr_t ker_id = ker_ids[ i ]; const num_t ker_dt = ker_dts[ i ]; void_fp ker_fp = ker_fps[ i ]; // Index into the func_t and mbool_t for the current kernel id // being processed. func_t* kers = &cntx_l1f_kers[ ker_id ]; // Store the ukernel function pointer and preference values into // the context. bli_func_set_dt( ker_fp, ker_dt, kers ); } // Free the temporary local arrays. #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_l1f_kers(): " ); #endif bli_free_intl( ker_ids ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_l1f_kers(): " ); #endif bli_free_intl( ker_dts ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_l1f_kers(): " ); #endif bli_free_intl( ker_fps ); } // ----------------------------------------------------------------------------- void bli_cntx_set_l1v_kers( dim_t n_kers, ... ) { // This function can be called from the bli_cntx_init_*() function for // a particular architecture if the kernel developer wishes to use // non-default level-1v kernels. It should be called after // bli_cntx_init_defaults() so that the context begins with default l1v // kernels across all datatypes. /* Example prototypes: void bli_cntx_set_l1v_kers ( dim_t n_ukrs, l1vkr_t ker0_id, num_t ker0_dt, void_fp ker0_fp, l1vkr_t ker1_id, num_t ker1_dt, void_fp ker1_fp, l1vkr_t ker2_id, num_t ker2_dt, void_fp ker2_fp, ... cntx_t* cntx ); */ va_list args; dim_t i; // Allocate some temporary local arrays. #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_l1v_kers(): " ); #endif l1vkr_t* ker_ids = bli_malloc_intl( n_kers * sizeof( l1vkr_t ) ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_l1v_kers(): " ); #endif num_t* ker_dts = bli_malloc_intl( n_kers * sizeof( num_t ) ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_l1v_kers(): " ); #endif void_fp* ker_fps = bli_malloc_intl( n_kers * sizeof( void_fp ) ); // -- Begin variable argument section -- // Initialize variable argument environment. va_start( args, n_kers ); // Process n_kers tuples. for ( i = 0; i < n_kers; ++i ) { // Here, we query the variable argument list for: // - the l1vkr_t of the kernel we're about to process, // - the datatype of the kernel, and // - the kernel function pointer // that we need to store to the context. const l1vkr_t ker_id = ( l1vkr_t )va_arg( args, l1vkr_t ); const num_t ker_dt = ( num_t )va_arg( args, num_t ); void_fp ker_fp = ( void_fp )va_arg( args, void_fp ); // Store the values in our temporary arrays. ker_ids[ i ] = ker_id; ker_dts[ i ] = ker_dt; ker_fps[ i ] = ker_fp; } // The last argument should be the context pointer. cntx_t* cntx = ( cntx_t* )va_arg( args, cntx_t* ); // Shutdown variable argument environment and clean up stack. va_end( args ); // -- End variable argument section -- // Query the context for the address of: // - the level-1v kernels func_t array func_t* cntx_l1v_kers = bli_cntx_l1v_kers_buf( cntx ); // Now that we have the context address, we want to copy the values // from the temporary buffers into the corresponding buffers in the // context. // Process each blocksize id tuple provided. for ( i = 0; i < n_kers; ++i ) { // Read the current kernel id, kernel datatype, and kernel function // pointer. const l1vkr_t ker_id = ker_ids[ i ]; const num_t ker_dt = ker_dts[ i ]; void_fp ker_fp = ker_fps[ i ]; // Index into the func_t and mbool_t for the current kernel id // being processed. func_t* kers = &cntx_l1v_kers[ ker_id ]; // Store the ukernel function pointer and preference values into // the context. bli_func_set_dt( ker_fp, ker_dt, kers ); } // Free the temporary local arrays. #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_l1v_kers(): " ); #endif bli_free_intl( ker_ids ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_l1v_kers(): " ); #endif bli_free_intl( ker_dts ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_l1v_kers(): " ); #endif bli_free_intl( ker_fps ); } // ----------------------------------------------------------------------------- void bli_cntx_set_packm_kers( dim_t n_kers, ... ) { // This function can be called from the bli_cntx_init_*() function for // a particular architecture if the kernel developer wishes to use // non-default packing kernels. It should be called after // bli_cntx_init_defaults() so that the context begins with default packm // kernels across all datatypes. /* Example prototypes: void bli_cntx_set_packm_kers ( dim_t n_ukrs, l1mkr_t ker0_id, num_t ker0_dt, void_fp ker0_fp, l1mkr_t ker1_id, num_t ker1_dt, void_fp ker1_fp, l1mkr_t ker2_id, num_t ker2_dt, void_fp ker2_fp, ... cntx_t* cntx ); */ va_list args; dim_t i; // Allocate some temporary local arrays. #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_packm_kers(): " ); #endif l1mkr_t* ker_ids = bli_malloc_intl( n_kers * sizeof( l1mkr_t ) ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_packm_kers(): " ); #endif num_t* ker_dts = bli_malloc_intl( n_kers * sizeof( num_t ) ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_packm_kers(): " ); #endif void_fp* ker_fps = bli_malloc_intl( n_kers * sizeof( void_fp ) ); // -- Begin variable argument section -- // Initialize variable argument environment. va_start( args, n_kers ); // Process n_kers tuples. for ( i = 0; i < n_kers; ++i ) { // Here, we query the variable argument list for: // - the l1mkr_t of the kernel we're about to process, // - the datatype of the kernel, and // - the kernel function pointer // that we need to store to the context. const l1mkr_t ker_id = ( l1mkr_t )va_arg( args, l1mkr_t ); const num_t ker_dt = ( num_t )va_arg( args, num_t ); void_fp ker_fp = ( void_fp )va_arg( args, void_fp ); // Store the values in our temporary arrays. ker_ids[ i ] = ker_id; ker_dts[ i ] = ker_dt; ker_fps[ i ] = ker_fp; } // The last argument should be the context pointer. cntx_t* cntx = ( cntx_t* )va_arg( args, cntx_t* ); // Shutdown variable argument environment and clean up stack. va_end( args ); // -- End variable argument section -- // Query the context for the address of: // - the packm kernels func_t array func_t* cntx_packm_kers = bli_cntx_packm_kers_buf( cntx ); // Now that we have the context address, we want to copy the values // from the temporary buffers into the corresponding buffers in the // context. // Process each blocksize id tuple provided. for ( i = 0; i < n_kers; ++i ) { // Read the current kernel id, kernel datatype, and kernel function // pointer. const l1mkr_t ker_id = ker_ids[ i ]; const num_t ker_dt = ker_dts[ i ]; void_fp ker_fp = ker_fps[ i ]; // Index into the func_t and mbool_t for the current kernel id // being processed. func_t* kers = &cntx_packm_kers[ ker_id ]; // Store the ukernel function pointer and preference values into // the context. bli_func_set_dt( ker_fp, ker_dt, kers ); } // Free the temporary local arrays. #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_packm_kers(): " ); #endif bli_free_intl( ker_ids ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_packm_kers(): " ); #endif bli_free_intl( ker_dts ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_cntx_set_packm_kers(): " ); #endif bli_free_intl( ker_fps ); } // ----------------------------------------------------------------------------- void bli_cntx_print( cntx_t* cntx ) { dim_t i; // Print the values stored in the blksz_t objects. printf( " s d c z\n" ); for ( i = 0; i < BLIS_NUM_BLKSZS; ++i ) { printf( "blksz/mult %2lu: %13lu/%2lu %13lu/%2lu %13lu/%2lu %13lu/%2lu\n", ( unsigned long )i, ( unsigned long )bli_cntx_get_blksz_def_dt( BLIS_FLOAT, i, cntx ), ( unsigned long )bli_cntx_get_bmult_dt ( BLIS_FLOAT, i, cntx ), ( unsigned long )bli_cntx_get_blksz_def_dt( BLIS_DOUBLE, i, cntx ), ( unsigned long )bli_cntx_get_bmult_dt ( BLIS_DOUBLE, i, cntx ), ( unsigned long )bli_cntx_get_blksz_def_dt( BLIS_SCOMPLEX, i, cntx ), ( unsigned long )bli_cntx_get_bmult_dt ( BLIS_SCOMPLEX, i, cntx ), ( unsigned long )bli_cntx_get_blksz_def_dt( BLIS_DCOMPLEX, i, cntx ), ( unsigned long )bli_cntx_get_bmult_dt ( BLIS_DCOMPLEX, i, cntx ) ); } for ( i = 0; i < BLIS_NUM_LEVEL3_UKRS; ++i ) { func_t* ukr = bli_cntx_get_l3_vir_ukrs( i, cntx ); printf( "l3 vir ukr %2lu: %16p %16p %16p %16p\n", ( unsigned long )i, bli_func_get_dt( BLIS_FLOAT, ukr ), bli_func_get_dt( BLIS_DOUBLE, ukr ), bli_func_get_dt( BLIS_SCOMPLEX, ukr ), bli_func_get_dt( BLIS_DCOMPLEX, ukr ) ); } for ( i = 0; i < BLIS_NUM_3OP_RC_COMBOS; ++i ) { func_t* ukr = bli_cntx_get_l3_sup_kers( i, cntx ); printf( "l3 sup ukr %2lu: %16p %16p %16p %16p\n", ( unsigned long )i, bli_func_get_dt( BLIS_FLOAT, ukr ), bli_func_get_dt( BLIS_DOUBLE, ukr ), bli_func_get_dt( BLIS_SCOMPLEX, ukr ), bli_func_get_dt( BLIS_DCOMPLEX, ukr ) ); } for ( i = 0; i < BLIS_NUM_LEVEL1F_KERS; ++i ) { func_t* ker = bli_cntx_get_l1f_kers( i, cntx ); printf( "l1f ker %2lu: %16p %16p %16p %16p\n", ( unsigned long )i, bli_func_get_dt( BLIS_FLOAT, ker ), bli_func_get_dt( BLIS_DOUBLE, ker ), bli_func_get_dt( BLIS_SCOMPLEX, ker ), bli_func_get_dt( BLIS_DCOMPLEX, ker ) ); } for ( i = 0; i < BLIS_NUM_LEVEL1V_KERS; ++i ) { func_t* ker = bli_cntx_get_l1v_kers( i, cntx ); printf( "l1v ker %2lu: %16p %16p %16p %16p\n", ( unsigned long )i, bli_func_get_dt( BLIS_FLOAT, ker ), bli_func_get_dt( BLIS_DOUBLE, ker ), bli_func_get_dt( BLIS_SCOMPLEX, ker ), bli_func_get_dt( BLIS_DCOMPLEX, ker ) ); } { ind_t method = bli_cntx_method( cntx ); printf( "ind method : %lu\n", ( unsigned long )method ); } } blis-0.6.1/frame/base/bli_cntx.h000066400000000000000000000532721360743507500164510ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2016, Hewlett Packard Enterprise Development LP Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_CNTX_H #define BLIS_CNTX_H // Context object type (defined in bli_type_defs.h) /* typedef struct cntx_s { blksz_t* blkszs; bszid_t* bmults; func_t* l3_vir_ukrs; func_t* l3_nat_ukrs; mbool_t* l3_nat_ukrs_prefs; blksz_t* l3_sup_thresh; void** l3_sup_handlers; blksz_t* l3_sup_blkszs; func_t* l3_sup_kers; mbool_t* l3_sup_kers_prefs; func_t* l1f_kers; func_t* l1v_kers; func_t* packm_kers; func_t* unpackm_kers; ind_t method; pack_t schema_a; pack_t schema_b; pack_t schema_c; } cntx_t; */ // ----------------------------------------------------------------------------- // // -- cntx_t query (fields only) ----------------------------------------------- // static blksz_t* bli_cntx_blkszs_buf( cntx_t* cntx ) { return cntx->blkszs; } static bszid_t* bli_cntx_bmults_buf( cntx_t* cntx ) { return cntx->bmults; } static func_t* bli_cntx_l3_vir_ukrs_buf( cntx_t* cntx ) { return cntx->l3_vir_ukrs; } static func_t* bli_cntx_l3_nat_ukrs_buf( cntx_t* cntx ) { return cntx->l3_nat_ukrs; } static mbool_t* bli_cntx_l3_nat_ukrs_prefs_buf( cntx_t* cntx ) { return cntx->l3_nat_ukrs_prefs; } static blksz_t* bli_cntx_l3_sup_thresh_buf( cntx_t* cntx ) { return cntx->l3_sup_thresh; } static void** bli_cntx_l3_sup_handlers_buf( cntx_t* cntx ) { return cntx->l3_sup_handlers; } static blksz_t* bli_cntx_l3_sup_blkszs_buf( cntx_t* cntx ) { return cntx->l3_sup_blkszs; } static func_t* bli_cntx_l3_sup_kers_buf( cntx_t* cntx ) { return cntx->l3_sup_kers; } static mbool_t* bli_cntx_l3_sup_kers_prefs_buf( cntx_t* cntx ) { return cntx->l3_sup_kers_prefs; } static func_t* bli_cntx_l1f_kers_buf( cntx_t* cntx ) { return cntx->l1f_kers; } static func_t* bli_cntx_l1v_kers_buf( cntx_t* cntx ) { return cntx->l1v_kers; } static func_t* bli_cntx_packm_kers_buf( cntx_t* cntx ) { return cntx->packm_kers; } static func_t* bli_cntx_unpackm_kers_buf( cntx_t* cntx ) { return cntx->unpackm_kers; } static ind_t bli_cntx_method( cntx_t* cntx ) { return cntx->method; } static pack_t bli_cntx_schema_a_block( cntx_t* cntx ) { return cntx->schema_a_block; } static pack_t bli_cntx_schema_b_panel( cntx_t* cntx ) { return cntx->schema_b_panel; } static pack_t bli_cntx_schema_c_panel( cntx_t* cntx ) { return cntx->schema_c_panel; } // ----------------------------------------------------------------------------- // // -- cntx_t modification (fields only) ---------------------------------------- // static void bli_cntx_set_method( ind_t method, cntx_t* cntx ) { cntx->method = method; } static void bli_cntx_set_schema_a_block( pack_t schema, cntx_t* cntx ) { cntx->schema_a_block = schema; } static void bli_cntx_set_schema_b_panel( pack_t schema, cntx_t* cntx ) { cntx->schema_b_panel = schema; } static void bli_cntx_set_schema_c_panel( pack_t schema, cntx_t* cntx ) { cntx->schema_c_panel = schema; } static void bli_cntx_set_schema_ab_blockpanel( pack_t sa, pack_t sb, cntx_t* cntx ) { bli_cntx_set_schema_a_block( sa, cntx ); bli_cntx_set_schema_b_panel( sb, cntx ); } // ----------------------------------------------------------------------------- // // -- cntx_t query (complex) --------------------------------------------------- // static blksz_t* bli_cntx_get_blksz( bszid_t bs_id, cntx_t* cntx ) { blksz_t* blkszs = bli_cntx_blkszs_buf( cntx ); blksz_t* blksz = &blkszs[ bs_id ]; // Return the address of the blksz_t identified by bs_id. return blksz; } static dim_t bli_cntx_get_blksz_def_dt( num_t dt, bszid_t bs_id, cntx_t* cntx ) { blksz_t* blksz = bli_cntx_get_blksz( bs_id, cntx ); dim_t bs_dt = bli_blksz_get_def( dt, blksz ); // Return the main (default) blocksize value for the datatype given. return bs_dt; } static dim_t bli_cntx_get_blksz_max_dt( num_t dt, bszid_t bs_id, cntx_t* cntx ) { blksz_t* blksz = bli_cntx_get_blksz( bs_id, cntx ); dim_t bs_dt = bli_blksz_get_max( dt, blksz ); // Return the auxiliary (maximum) blocksize value for the datatype given. return bs_dt; } static bszid_t bli_cntx_get_bmult_id( bszid_t bs_id, cntx_t* cntx ) { bszid_t* restrict bmults = bli_cntx_bmults_buf( cntx ); bszid_t bm_id = bmults[ bs_id ]; return bm_id; } static blksz_t* bli_cntx_get_bmult( bszid_t bs_id, cntx_t* cntx ) { bszid_t bm_id = bli_cntx_get_bmult_id( bs_id, cntx ); blksz_t* restrict bmult = bli_cntx_get_blksz( bm_id, cntx ); return bmult; } static dim_t bli_cntx_get_bmult_dt( num_t dt, bszid_t bs_id, cntx_t* cntx ) { blksz_t* bmult = bli_cntx_get_bmult( bs_id, cntx ); dim_t bm_dt = bli_blksz_get_def( dt, bmult ); return bm_dt; } // ----------------------------------------------------------------------------- static func_t* bli_cntx_get_l3_vir_ukrs( l3ukr_t ukr_id, cntx_t* cntx ) { func_t* funcs = bli_cntx_l3_vir_ukrs_buf( cntx ); func_t* func = &funcs[ ukr_id ]; return func; } static void_fp bli_cntx_get_l3_vir_ukr_dt( num_t dt, l3ukr_t ukr_id, cntx_t* cntx ) { func_t* func = bli_cntx_get_l3_vir_ukrs( ukr_id, cntx ); return bli_func_get_dt( dt, func ); } static func_t* bli_cntx_get_l3_nat_ukrs( l3ukr_t ukr_id, cntx_t* cntx ) { func_t* funcs = bli_cntx_l3_nat_ukrs_buf( cntx ); func_t* func = &funcs[ ukr_id ]; return func; } static void_fp bli_cntx_get_l3_nat_ukr_dt( num_t dt, l3ukr_t ukr_id, cntx_t* cntx ) { func_t* func = bli_cntx_get_l3_nat_ukrs( ukr_id, cntx ); return bli_func_get_dt( dt, func ); } // ----------------------------------------------------------------------------- static mbool_t* bli_cntx_get_l3_nat_ukr_prefs( l3ukr_t ukr_id, cntx_t* cntx ) { mbool_t* mbools = bli_cntx_l3_nat_ukrs_prefs_buf( cntx ); mbool_t* mbool = &mbools[ ukr_id ]; return mbool; } static bool_t bli_cntx_get_l3_nat_ukr_prefs_dt( num_t dt, l3ukr_t ukr_id, cntx_t* cntx ) { mbool_t* mbool = bli_cntx_get_l3_nat_ukr_prefs( ukr_id, cntx ); return bli_mbool_get_dt( dt, mbool ); } // ----------------------------------------------------------------------------- static blksz_t* bli_cntx_get_l3_sup_thresh( threshid_t thresh_id, cntx_t* cntx ) { blksz_t* threshs = bli_cntx_l3_sup_thresh_buf( cntx ); blksz_t* thresh = &threshs[ thresh_id ]; // Return the address of the blksz_t identified by thresh_id. return thresh; } static dim_t bli_cntx_get_l3_sup_thresh_dt( num_t dt, threshid_t thresh_id, cntx_t* cntx ) { blksz_t* threshs = bli_cntx_get_l3_sup_thresh( thresh_id, cntx ); dim_t thresh_dt = bli_blksz_get_def( dt, threshs ); // Return the main (default) threshold value for the datatype given. return thresh_dt; } static bool_t bli_cntx_l3_sup_thresh_is_met( num_t dt, dim_t m, dim_t n, dim_t k, cntx_t* cntx ) { if ( m < bli_cntx_get_l3_sup_thresh_dt( dt, BLIS_MT, cntx ) ) return TRUE; if ( n < bli_cntx_get_l3_sup_thresh_dt( dt, BLIS_NT, cntx ) ) return TRUE; if ( k < bli_cntx_get_l3_sup_thresh_dt( dt, BLIS_KT, cntx ) ) return TRUE; return FALSE; } // ----------------------------------------------------------------------------- static void* bli_cntx_get_l3_sup_handler( opid_t op, cntx_t* cntx ) { void** funcs = bli_cntx_l3_sup_handlers_buf( cntx ); void* func = funcs[ op ]; return func; } // ----------------------------------------------------------------------------- static blksz_t* bli_cntx_get_l3_sup_blksz( bszid_t bs_id, cntx_t* cntx ) { blksz_t* blkszs = bli_cntx_l3_sup_blkszs_buf( cntx ); blksz_t* blksz = &blkszs[ bs_id ]; // Return the address of the blksz_t identified by bs_id. return blksz; } static dim_t bli_cntx_get_l3_sup_blksz_def_dt( num_t dt, bszid_t bs_id, cntx_t* cntx ) { blksz_t* blksz = bli_cntx_get_l3_sup_blksz( bs_id, cntx ); dim_t bs_dt = bli_blksz_get_def( dt, blksz ); // Return the main (default) blocksize value for the datatype given. return bs_dt; } static dim_t bli_cntx_get_l3_sup_blksz_max_dt( num_t dt, bszid_t bs_id, cntx_t* cntx ) { blksz_t* blksz = bli_cntx_get_l3_sup_blksz( bs_id, cntx ); dim_t bs_dt = bli_blksz_get_max( dt, blksz ); // Return the auxiliary (maximum) blocksize value for the datatype given. return bs_dt; } // ----------------------------------------------------------------------------- static func_t* bli_cntx_get_l3_sup_kers( stor3_t stor_id, cntx_t* cntx ) { func_t* funcs = bli_cntx_l3_sup_kers_buf( cntx ); func_t* func = &funcs[ stor_id ]; return func; } static void* bli_cntx_get_l3_sup_ker_dt( num_t dt, stor3_t stor_id, cntx_t* cntx ) { func_t* func = bli_cntx_get_l3_sup_kers( stor_id, cntx ); return bli_func_get_dt( dt, func ); } // ----------------------------------------------------------------------------- static mbool_t* bli_cntx_get_l3_sup_ker_prefs( stor3_t stor_id, cntx_t* cntx ) { mbool_t* mbools = bli_cntx_l3_sup_kers_prefs_buf( cntx ); mbool_t* mbool = &mbools[ stor_id ]; return mbool; } static bool_t bli_cntx_get_l3_sup_ker_prefs_dt( num_t dt, stor3_t stor_id, cntx_t* cntx ) { mbool_t* mbool = bli_cntx_get_l3_sup_ker_prefs( stor_id, cntx ); return bli_mbool_get_dt( dt, mbool ); } // ----------------------------------------------------------------------------- static func_t* bli_cntx_get_l1f_kers( l1fkr_t ker_id, cntx_t* cntx ) { func_t* funcs = bli_cntx_l1f_kers_buf( cntx ); func_t* func = &funcs[ ker_id ]; return func; } static void_fp bli_cntx_get_l1f_ker_dt( num_t dt, l1fkr_t ker_id, cntx_t* cntx ) { func_t* func = bli_cntx_get_l1f_kers( ker_id, cntx ); return bli_func_get_dt( dt, func ); } // ----------------------------------------------------------------------------- static func_t* bli_cntx_get_l1v_kers( l1vkr_t ker_id, cntx_t* cntx ) { func_t* funcs = bli_cntx_l1v_kers_buf( cntx ); func_t* func = &funcs[ ker_id ]; return func; } static void_fp bli_cntx_get_l1v_ker_dt( num_t dt, l1vkr_t ker_id, cntx_t* cntx ) { func_t* func = bli_cntx_get_l1v_kers( ker_id, cntx ); return bli_func_get_dt( dt, func ); } // ----------------------------------------------------------------------------- static func_t* bli_cntx_get_packm_kers( l1mkr_t ker_id, cntx_t* cntx ) { func_t* func = NULL; // Only index to the requested packm func_t if the packm kernel being // requested is one that is explicitly supported. if ( 0 <= ( gint_t )ker_id && ( gint_t )ker_id < BLIS_NUM_PACKM_KERS ) { func_t* funcs = bli_cntx_packm_kers_buf( cntx ); func = &funcs[ ker_id ]; } return func; } static void_fp bli_cntx_get_packm_ker_dt( num_t dt, l1mkr_t ker_id, cntx_t* cntx ) { void_fp fp = NULL; // Only query the context for the packm func_t (and then extract the // datatype-specific function pointer) if the packm kernel being // requested is one that is explicitly supported. if ( 0 <= ( gint_t )ker_id && ( gint_t )ker_id < BLIS_NUM_PACKM_KERS ) { func_t* func = bli_cntx_get_packm_kers( ker_id, cntx ); fp = bli_func_get_dt( dt, func ); } return fp; } static func_t* bli_cntx_get_unpackm_kers( l1mkr_t ker_id, cntx_t* cntx ) { func_t* func = NULL; // Only index to the requested unpackm func_t if the unpackm kernel being // requested is one that is explicitly supported. if ( 0 <= ( gint_t )ker_id && ( gint_t )ker_id < BLIS_NUM_UNPACKM_KERS ) { func_t* funcs = bli_cntx_unpackm_kers_buf( cntx ); func = &funcs[ ker_id ]; } return func; } static void_fp bli_cntx_get_unpackm_ker_dt( num_t dt, l1mkr_t ker_id, cntx_t* cntx ) { void_fp fp = NULL; // Only query the context for the unpackm func_t (and then extract the // datatype-specific function pointer) if the unpackm kernel being // requested is one that is explicitly supported. if ( 0 <= ( gint_t )ker_id && ( gint_t )ker_id < BLIS_NUM_UNPACKM_KERS ) { func_t* func = bli_cntx_get_unpackm_kers( ker_id, cntx ); fp = bli_func_get_dt( dt, func ); } return fp; } // ----------------------------------------------------------------------------- static bool_t bli_cntx_l3_nat_ukr_prefers_rows_dt( num_t dt, l3ukr_t ukr_id, cntx_t* cntx ) { const bool_t prefs = bli_cntx_get_l3_nat_ukr_prefs_dt( dt, ukr_id, cntx ); // A ukernel preference of TRUE means the ukernel prefers row storage. return ( bool_t ) ( prefs == TRUE ); } static bool_t bli_cntx_l3_nat_ukr_prefers_cols_dt( num_t dt, l3ukr_t ukr_id, cntx_t* cntx ) { const bool_t prefs = bli_cntx_get_l3_nat_ukr_prefs_dt( dt, ukr_id, cntx ); // A ukernel preference of FALSE means the ukernel prefers column storage. return ( bool_t ) ( prefs == FALSE ); } static bool_t bli_cntx_l3_nat_ukr_prefers_storage_of( obj_t* obj, l3ukr_t ukr_id, cntx_t* cntx ) { // Note that we use the computation datatype, which may differ from the // storage datatype of C (when performing a mixed datatype operation). const num_t dt = bli_obj_comp_dt( obj ); const bool_t ukr_prefers_rows = bli_cntx_l3_nat_ukr_prefers_rows_dt( dt, ukr_id, cntx ); const bool_t ukr_prefers_cols = bli_cntx_l3_nat_ukr_prefers_cols_dt( dt, ukr_id, cntx ); bool_t r_val = FALSE; if ( bli_obj_is_row_stored( obj ) && ukr_prefers_rows ) r_val = TRUE; else if ( bli_obj_is_col_stored( obj ) && ukr_prefers_cols ) r_val = TRUE; return r_val; } static bool_t bli_cntx_l3_nat_ukr_dislikes_storage_of( obj_t* obj, l3ukr_t ukr_id, cntx_t* cntx ) { return ( bool_t ) !bli_cntx_l3_nat_ukr_prefers_storage_of( obj, ukr_id, cntx ); } // ----------------------------------------------------------------------------- static bool_t bli_cntx_l3_vir_ukr_prefers_rows_dt( num_t dt, l3ukr_t ukr_id, cntx_t* cntx ) { // For induced methods, return the ukernel storage preferences of the // corresponding real micro-kernel. // NOTE: This projection to real domain becomes unnecessary if you // set the exec_dt for 1m to the real projection of the storage // datatype. if ( bli_cntx_method( cntx ) != BLIS_NAT ) dt = bli_dt_proj_to_real( dt ); return bli_cntx_l3_nat_ukr_prefers_rows_dt( dt, ukr_id, cntx ); } static bool_t bli_cntx_l3_vir_ukr_prefers_cols_dt( num_t dt, l3ukr_t ukr_id, cntx_t* cntx ) { // For induced methods, return the ukernel storage preferences of the // corresponding real micro-kernel. // NOTE: This projection to real domain becomes unnecessary if you // set the exec_dt for 1m to the real projection of the storage // datatype. if ( bli_cntx_method( cntx ) != BLIS_NAT ) dt = bli_dt_proj_to_real( dt ); return bli_cntx_l3_nat_ukr_prefers_cols_dt( dt, ukr_id, cntx ); } static bool_t bli_cntx_l3_vir_ukr_prefers_storage_of( obj_t* obj, l3ukr_t ukr_id, cntx_t* cntx ) { // Note that we use the computation datatype, which may differ from the // storage datatype of C (when performing a mixed datatype operation). const num_t dt = bli_obj_comp_dt( obj ); const bool_t ukr_prefers_rows = bli_cntx_l3_vir_ukr_prefers_rows_dt( dt, ukr_id, cntx ); const bool_t ukr_prefers_cols = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, ukr_id, cntx ); bool_t r_val = FALSE; if ( bli_obj_is_row_stored( obj ) && ukr_prefers_rows ) r_val = TRUE; else if ( bli_obj_is_col_stored( obj ) && ukr_prefers_cols ) r_val = TRUE; return r_val; } static bool_t bli_cntx_l3_vir_ukr_dislikes_storage_of( obj_t* obj, l3ukr_t ukr_id, cntx_t* cntx ) { return ( bool_t ) !bli_cntx_l3_vir_ukr_prefers_storage_of( obj, ukr_id, cntx ); } // ----------------------------------------------------------------------------- static bool_t bli_cntx_l3_sup_ker_prefers_rows_dt( num_t dt, stor3_t stor_id, cntx_t* cntx ) { const bool_t prefs = bli_cntx_get_l3_sup_ker_prefs_dt( dt, stor_id, cntx ); // A ukernel preference of TRUE means the ukernel prefers row storage. return ( bool_t ) ( prefs == TRUE ); } static bool_t bli_cntx_l3_sup_ker_prefers_cols_dt( num_t dt, stor3_t stor_id, cntx_t* cntx ) { const bool_t prefs = bli_cntx_get_l3_sup_ker_prefs_dt( dt, stor_id, cntx ); // A ukernel preference of FALSE means the ukernel prefers column storage. return ( bool_t ) ( prefs == FALSE ); } #if 0 // NOTE: These static functions aren't needed yet. static bool_t bli_cntx_l3_sup_ker_prefers_storage_of( obj_t* obj, stor3_t stor_id, cntx_t* cntx ) { const num_t dt = bli_obj_dt( obj ); const bool_t ukr_prefers_rows = bli_cntx_l3_sup_ker_prefers_rows_dt( dt, stor_id, cntx ); const bool_t ukr_prefers_cols = bli_cntx_l3_sup_ker_prefers_cols_dt( dt, stor_id, cntx ); bool_t r_val = FALSE; if ( bli_obj_is_row_stored( obj ) && ukr_prefers_rows ) r_val = TRUE; else if ( bli_obj_is_col_stored( obj ) && ukr_prefers_cols ) r_val = TRUE; return r_val; } static bool_t bli_cntx_l3_sup_ker_dislikes_storage_of( obj_t* obj, stor3_t stor_id, cntx_t* cntx ) { return ( bool_t ) !bli_cntx_l3_sup_ker_prefers_storage_of( obj, stor_id, cntx ); } #endif // ----------------------------------------------------------------------------- // // -- cntx_t modification (complex) -------------------------------------------- // // NOTE: The framework does not use any of the following functions. We provide // them in order to facilitate creating/modifying custom contexts. static void bli_cntx_set_blksz( bszid_t bs_id, blksz_t* blksz, bszid_t mult_id, cntx_t* cntx ) { blksz_t* blkszs = bli_cntx_blkszs_buf( cntx ); bszid_t* bmults = bli_cntx_bmults_buf( cntx ); blkszs[ bs_id ] = *blksz; bmults[ bs_id ] = mult_id; } static void bli_cntx_set_blksz_def_dt( num_t dt, bszid_t bs_id, dim_t bs, cntx_t* cntx ) { blksz_t* blkszs = bli_cntx_blkszs_buf( cntx ); blksz_t* blksz = &blkszs[ bs_id ]; bli_blksz_set_def( bs, dt, blksz ); } static void bli_cntx_set_blksz_max_dt( num_t dt, bszid_t bs_id, dim_t bs, cntx_t* cntx ) { blksz_t* blkszs = bli_cntx_blkszs_buf( cntx ); blksz_t* blksz = &blkszs[ bs_id ]; bli_blksz_set_max( bs, dt, blksz ); } static void bli_cntx_set_l3_vir_ukr( l3ukr_t ukr_id, func_t* func, cntx_t* cntx ) { func_t* funcs = bli_cntx_l3_vir_ukrs_buf( cntx ); funcs[ ukr_id ] = *func; } static void bli_cntx_set_l3_nat_ukr( l3ukr_t ukr_id, func_t* func, cntx_t* cntx ) { func_t* funcs = bli_cntx_l3_nat_ukrs_buf( cntx ); funcs[ ukr_id ] = *func; } static void bli_cntx_set_l3_nat_ukr_prefs( l3ukr_t ukr_id, mbool_t* prefs, cntx_t* cntx ) { mbool_t* mbools = bli_cntx_l3_nat_ukrs_prefs_buf( cntx ); mbools[ ukr_id ] = *prefs; } static void bli_cntx_set_l1f_ker( l1fkr_t ker_id, func_t* func, cntx_t* cntx ) { func_t* funcs = bli_cntx_l1f_kers_buf( cntx ); funcs[ ker_id ] = *func; } static void bli_cntx_set_l1v_ker( l1vkr_t ker_id, func_t* func, cntx_t* cntx ) { func_t* funcs = bli_cntx_l1v_kers_buf( cntx ); funcs[ ker_id ] = *func; } static void bli_cntx_set_packm_ker( l1mkr_t ker_id, func_t* func, cntx_t* cntx ) { func_t* funcs = bli_cntx_get_packm_kers( ker_id, cntx ); funcs[ ker_id ] = *func; } static void bli_cntx_set_packm_ker_dt( void_fp fp, num_t dt, l1mkr_t ker_id, cntx_t* cntx ) { func_t* func = ( func_t* )bli_cntx_get_packm_kers( ker_id, cntx ); bli_func_set_dt( fp, dt, func ); } static void bli_cntx_set_unpackm_ker( l1mkr_t ker_id, func_t* func, cntx_t* cntx ) { func_t* funcs = bli_cntx_get_unpackm_kers( ker_id, cntx ); funcs[ ker_id ] = *func; } static void bli_cntx_set_unpackm_ker_dt( void_fp fp, num_t dt, l1mkr_t ker_id, cntx_t* cntx ) { func_t* func = ( func_t* )bli_cntx_get_unpackm_kers( ker_id, cntx ); bli_func_set_dt( fp, dt, func ); } // ----------------------------------------------------------------------------- // Function prototypes BLIS_EXPORT_BLIS void bli_cntx_clear( cntx_t* cntx ); BLIS_EXPORT_BLIS void bli_cntx_set_blkszs( ind_t method, dim_t n_bs, ... ); BLIS_EXPORT_BLIS void bli_cntx_set_ind_blkszs( ind_t method, dim_t n_bs, ... ); BLIS_EXPORT_BLIS void bli_cntx_set_l3_nat_ukrs( dim_t n_ukrs, ... ); BLIS_EXPORT_BLIS void bli_cntx_set_l3_vir_ukrs( dim_t n_ukrs, ... ); BLIS_EXPORT_BLIS void bli_cntx_set_l3_sup_thresh( dim_t n_thresh, ... ); BLIS_EXPORT_BLIS void bli_cntx_set_l3_sup_handlers( dim_t n_ops, ... ); BLIS_EXPORT_BLIS void bli_cntx_set_l3_sup_blkszs( dim_t n_bs, ... ); BLIS_EXPORT_BLIS void bli_cntx_set_l3_sup_kers( dim_t n_ukrs, ... ); BLIS_EXPORT_BLIS void bli_cntx_set_l1f_kers( dim_t n_kers, ... ); BLIS_EXPORT_BLIS void bli_cntx_set_l1v_kers( dim_t n_kers, ... ); BLIS_EXPORT_BLIS void bli_cntx_set_packm_kers( dim_t n_kers, ... ); BLIS_EXPORT_BLIS void bli_cntx_print( cntx_t* cntx ); #endif blis-0.6.1/frame/base/bli_const.c000066400000000000000000000064101360743507500166060ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // Statically initialize structs containing representations of various // constants for each datatype supported in BLIS. static constdata_t bli_two_buffer = bli_obj_init_constdata( 2.0 ); static constdata_t bli_one_buffer = bli_obj_init_constdata( 1.0 ); static constdata_t bli_zero_buffer = bli_obj_init_constdata( 0.0 ); static constdata_t bli_mone_buffer = bli_obj_init_constdata( -1.0 ); static constdata_t bli_mtwo_buffer = bli_obj_init_constdata( -2.0 ); // Statically initialize global scalar constants, attaching the addresses // of the corresponding structs above. obj_t BLIS_TWO = bli_obj_init_const( &bli_two_buffer ); obj_t BLIS_ONE = bli_obj_init_const( &bli_one_buffer ); obj_t BLIS_ZERO = bli_obj_init_const( &bli_zero_buffer ); obj_t BLIS_MINUS_ONE = bli_obj_init_const( &bli_mone_buffer ); obj_t BLIS_MINUS_TWO = bli_obj_init_const( &bli_mtwo_buffer ); #if 0 obj_t BLIS_TWO = {}; obj_t BLIS_ONE = {}; obj_t BLIS_ZERO = {}; obj_t BLIS_MINUS_ONE = {}; obj_t BLIS_MINUS_TWO = {}; void bli_const_init( void ) { bli_obj_create_const( 2.0, &BLIS_TWO ); bli_obj_create_const( 1.0, &BLIS_ONE ); bli_obj_create_const( 0.5, &BLIS_ONE_HALF ); bli_obj_create_const( 0.0, &BLIS_ZERO ); bli_obj_create_const( -0.5, &BLIS_MINUS_ONE_HALF ); bli_obj_create_const( -1.0, &BLIS_MINUS_ONE ); bli_obj_create_const( -2.0, &BLIS_MINUS_TWO ); } void bli_const_finalize( void ) { bli_obj_free( &BLIS_TWO ); bli_obj_free( &BLIS_ONE ); bli_obj_free( &BLIS_ONE_HALF ); bli_obj_free( &BLIS_ZERO ); bli_obj_free( &BLIS_MINUS_ONE_HALF ); bli_obj_free( &BLIS_MINUS_ONE ); bli_obj_free( &BLIS_MINUS_TWO ); } #endif blis-0.6.1/frame/base/bli_const.h000066400000000000000000000033101360743507500166070ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_const_init( void ); void bli_const_finalize( void ); blis-0.6.1/frame/base/bli_cpuid.c000066400000000000000000001022171360743507500165660ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018-2019, Advanced Micro Devices, Inc. Copyright (C) 2019, Dave Love, University of Manchester Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #if 0 // Used only during standalone testing of ARM support. #include "bli_system.h" #include "bli_type_defs.h" #include "bli_cpuid.h" #undef __x86_64__ #undef _M_X64 #undef __i386 #undef _M_IX86 #define __arm__ #endif #ifndef BLIS_CONFIGURETIME_CPUID #include "blis.h" #else #define BLIS_EXPORT_BLIS #include "bli_system.h" #include "bli_type_defs.h" #include "bli_cpuid.h" #include "bli_arch.h" #endif // ----------------------------------------------------------------------------- #if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) arch_t bli_cpuid_query_id( void ) { uint32_t vendor, family, model, features; // Call the CPUID instruction and parse its results into a family id, // model id, and a feature bit field. The return value encodes the // vendor. vendor = bli_cpuid_query( &family, &model, &features ); #if 0 printf( "vendor = %s\n", vendor==1 ? "AMD": "INTEL" ); printf("family = %x\n", family ); printf( "model = %x\n", model ); printf( "features = %x\n", features ); #endif if ( vendor == VENDOR_INTEL ) { // Check for each Intel configuration that is enabled, check for that // microarchitecture. We check from most recent to most dated. #ifdef BLIS_CONFIG_SKX if ( bli_cpuid_is_skx( family, model, features ) ) return BLIS_ARCH_SKX; #endif #ifdef BLIS_CONFIG_KNL if ( bli_cpuid_is_knl( family, model, features ) ) return BLIS_ARCH_KNL; #endif #ifdef BLIS_CONFIG_HASWELL if ( bli_cpuid_is_haswell( family, model, features ) ) return BLIS_ARCH_HASWELL; #endif #ifdef BLIS_CONFIG_SANDYBRIDGE if ( bli_cpuid_is_sandybridge( family, model, features ) ) return BLIS_ARCH_SANDYBRIDGE; #endif #ifdef BLIS_CONFIG_PENRYN if ( bli_cpuid_is_penryn( family, model, features ) ) return BLIS_ARCH_PENRYN; #endif // If none of the other sub-configurations were detected, return // the 'generic' arch_t id value. return BLIS_ARCH_GENERIC; } else if ( vendor == VENDOR_AMD ) { // Check for each AMD configuration that is enabled, check for that // microarchitecture. We check from most recent to most dated. #ifdef BLIS_CONFIG_ZEN2 if ( bli_cpuid_is_zen2( family, model, features ) ) return BLIS_ARCH_ZEN2; #endif #ifdef BLIS_CONFIG_ZEN if ( bli_cpuid_is_zen( family, model, features ) ) return BLIS_ARCH_ZEN; #endif #ifdef BLIS_CONFIG_EXCAVATOR if ( bli_cpuid_is_excavator( family, model, features ) ) return BLIS_ARCH_EXCAVATOR; #endif #ifdef BLIS_CONFIG_STEAMROLLER if ( bli_cpuid_is_steamroller( family, model, features ) ) return BLIS_ARCH_STEAMROLLER; #endif #ifdef BLIS_CONFIG_PILEDRIVER if ( bli_cpuid_is_piledriver( family, model, features ) ) return BLIS_ARCH_PILEDRIVER; #endif #ifdef BLIS_CONFIG_BULLDOZER if ( bli_cpuid_is_bulldozer( family, model, features ) ) return BLIS_ARCH_BULLDOZER; #endif // If none of the other sub-configurations were detected, return // the 'generic' arch_t id value. return BLIS_ARCH_GENERIC; } else if ( vendor == VENDOR_UNKNOWN ) { return BLIS_ARCH_GENERIC; } return BLIS_ARCH_GENERIC; } // ----------------------------------------------------------------------------- bool_t bli_cpuid_is_skx ( uint32_t family, uint32_t model, uint32_t features ) { // Check for expected CPU features. const uint32_t expected = FEATURE_AVX | FEATURE_FMA3 | FEATURE_AVX2 | FEATURE_AVX512F | FEATURE_AVX512DQ | FEATURE_AVX512BW | FEATURE_AVX512VL ; int nvpu = vpu_count(); if ( bli_cpuid_has_features( features, expected ) ) { switch ( nvpu ) { case 1: bli_arch_log( "Hardware has 1 FMA unit; using 'haswell' (not 'skx') sub-config.\n" ); return FALSE; case 2: bli_arch_log( "Hardware has 2 FMA units; using 'skx' sub-config.\n" ); return TRUE; default: bli_arch_log( "Number of FMA units unknown; using 'haswell' (not 'skx') config.\n" ); return FALSE; } } else return FALSE; return TRUE; } bool_t bli_cpuid_is_knl ( uint32_t family, uint32_t model, uint32_t features ) { // Check for expected CPU features. const uint32_t expected = FEATURE_AVX | FEATURE_FMA3 | FEATURE_AVX2 | FEATURE_AVX512F | FEATURE_AVX512PF; if ( !bli_cpuid_has_features( features, expected ) ) return FALSE; return TRUE; } bool_t bli_cpuid_is_haswell ( uint32_t family, uint32_t model, uint32_t features ) { // Check for expected CPU features. const uint32_t expected = FEATURE_AVX | FEATURE_FMA3 | FEATURE_AVX2; if ( !bli_cpuid_has_features( features, expected ) ) return FALSE; return TRUE; } bool_t bli_cpuid_is_sandybridge ( uint32_t family, uint32_t model, uint32_t features ) { // Check for expected CPU features. const uint32_t expected = FEATURE_AVX; if ( !bli_cpuid_has_features( features, expected ) ) return FALSE; return TRUE; } bool_t bli_cpuid_is_penryn ( uint32_t family, uint32_t model, uint32_t features ) { // Check for expected CPU features. const uint32_t expected = FEATURE_SSE3 | FEATURE_SSSE3; if ( !bli_cpuid_has_features( features, expected ) ) return FALSE; return TRUE; } // ----------------------------------------------------------------------------- bool_t bli_cpuid_is_zen2 ( uint32_t family, uint32_t model, uint32_t features ) { // Check for expected CPU features. const uint32_t expected = FEATURE_AVX | FEATURE_FMA3 | FEATURE_AVX2; if ( !bli_cpuid_has_features( features, expected ) ) return FALSE; // All Zen2 cores have a family of 0x17. if ( family != 0x17 ) return FALSE; // Finally, check for specific models: // - 0x30-0xff (THIS NEEDS UPDATING) const bool_t is_arch = ( 0x30 <= model && model <= 0xff ); if ( !is_arch ) return FALSE; return TRUE; } bool_t bli_cpuid_is_zen ( uint32_t family, uint32_t model, uint32_t features ) { // Check for expected CPU features. const uint32_t expected = FEATURE_AVX | FEATURE_FMA3 | FEATURE_AVX2; if ( !bli_cpuid_has_features( features, expected ) ) return FALSE; // All Zen cores have a family of 0x17. if ( family != 0x17 ) return FALSE; // Finally, check for specific models: // - 0x00-0xff (THIS NEEDS UPDATING) const bool_t is_arch = ( 0x00 <= model && model <= 0xff ); if ( !is_arch ) return FALSE; return TRUE; } bool_t bli_cpuid_is_excavator ( uint32_t family, uint32_t model, uint32_t features ) { // Check for expected CPU features. const uint32_t expected = FEATURE_AVX | FEATURE_FMA3 | FEATURE_AVX2; if ( !bli_cpuid_has_features( features, expected ) ) return FALSE; // All Excavator cores have a family of 0x15. if ( family != 0x15 ) return FALSE; // Finally, check for specific models: // - 0x60-0x7f const bool_t is_arch = ( 0x60 <= model && model <= 0x7f ); if ( !is_arch ) return FALSE; return TRUE; } bool_t bli_cpuid_is_steamroller ( uint32_t family, uint32_t model, uint32_t features ) { // Check for expected CPU features. const uint32_t expected = FEATURE_AVX | FEATURE_FMA3 | FEATURE_FMA4; if ( !bli_cpuid_has_features( features, expected ) ) return FALSE; // All Steamroller cores have a family of 0x15. if ( family != 0x15 ) return FALSE; // Finally, check for specific models: // - 0x30-0x3f const bool_t is_arch = ( 0x30 <= model && model <= 0x3f ); if ( !is_arch ) return FALSE; return TRUE; } bool_t bli_cpuid_is_piledriver ( uint32_t family, uint32_t model, uint32_t features ) { // Check for expected CPU features. const uint32_t expected = FEATURE_AVX | FEATURE_FMA3 | FEATURE_FMA4; if ( !bli_cpuid_has_features( features, expected ) ) return FALSE; // All Piledriver cores have a family of 0x15. if ( family != 0x15 ) return FALSE; // Finally, check for specific models: // - 0x02 // - 0x10-0x1f const bool_t is_arch = model == 0x02 || ( 0x10 <= model && model <= 0x1f ); if ( !is_arch ) return FALSE; return TRUE; } bool_t bli_cpuid_is_bulldozer ( uint32_t family, uint32_t model, uint32_t features ) { // Check for expected CPU features. const uint32_t expected = FEATURE_AVX | FEATURE_FMA4; if ( !bli_cpuid_has_features( features, expected ) ) return FALSE; // All Bulldozer cores have a family of 0x15. if ( family != 0x15 ) return FALSE; // Finally, check for specific models: // - 0x00 // - 0x01 const bool_t is_arch = ( model == 0x00 || model == 0x01 ); if ( !is_arch ) return FALSE; return TRUE; } #elif defined(__aarch64__) || defined(__arm__) || defined(_M_ARM) arch_t bli_cpuid_query_id( void ) { uint32_t vendor, model, part, features; // Call the CPUID instruction and parse its results into a model id, // part id, and a feature bit field. The return value encodes the // vendor. vendor = bli_cpuid_query( &model, &part, &features ); #if 0 printf( "vendor = %u\n", vendor ); printf( "model = %u\n", model ); printf( "part = 0x%x\n", part ); printf( "features = %u\n", features ); #endif if ( vendor == VENDOR_ARM ) { if ( model == MODEL_ARMV8 ) { // Check for each ARMv8 configuration that is enabled, check for that // microarchitecture. We check from most recent to most dated. #ifdef BLIS_CONFIG_THUNDERX2 if ( bli_cpuid_is_thunderx2( model, part, features ) ) return BLIS_ARCH_THUNDERX2; #endif #ifdef BLIS_CONFIG_CORTEXA57 if ( bli_cpuid_is_cortexa57( model, part, features ) ) return BLIS_ARCH_CORTEXA57; #endif // If none of the other sub-configurations were detected, return // the 'generic' arch_t id value. return BLIS_ARCH_GENERIC; } else if ( model == MODEL_ARMV7 ) { // Check for each ARMv7 configuration that is enabled, check for that // microarchitecture. We check from most recent to most dated. #ifdef BLIS_CONFIG_CORTEXA15 if ( bli_cpuid_is_cortexa15( model, part, features ) ) return BLIS_ARCH_CORTEXA15; #endif #ifdef BLIS_CONFIG_CORTEXA9 if ( bli_cpuid_is_cortexa9( model, part, features ) ) return BLIS_ARCH_CORTEXA9; #endif // If none of the other sub-configurations were detected, return // the 'generic' arch_t id value. return BLIS_ARCH_GENERIC; } } else if ( vendor == VENDOR_UNKNOWN ) { return BLIS_ARCH_GENERIC; } return BLIS_ARCH_GENERIC; } bool_t bli_cpuid_is_thunderx2 ( uint32_t family, uint32_t model, uint32_t features ) { // Check for expected CPU features. const uint32_t expected = FEATURE_NEON; if ( !bli_cpuid_has_features( features, expected ) ) return FALSE; return TRUE; } bool_t bli_cpuid_is_cortexa57 ( uint32_t family, uint32_t model, uint32_t features ) { // Check for expected CPU features. const uint32_t expected = FEATURE_NEON; if ( !bli_cpuid_has_features( features, expected ) ) return FALSE; return TRUE; } bool_t bli_cpuid_is_cortexa53 ( uint32_t family, uint32_t model, uint32_t features ) { // Check for expected CPU features. const uint32_t expected = FEATURE_NEON; if ( !bli_cpuid_has_features( features, expected ) ) return FALSE; return TRUE; } bool_t bli_cpuid_is_cortexa15 ( uint32_t family, uint32_t model, uint32_t features ) { // Check for expected CPU features. const uint32_t expected = FEATURE_NEON; if ( !bli_cpuid_has_features( features, expected ) ) return FALSE; return TRUE; } bool_t bli_cpuid_is_cortexa9 ( uint32_t family, uint32_t model, uint32_t features ) { // Check for expected CPU features. const uint32_t expected = FEATURE_NEON; if ( !bli_cpuid_has_features( features, expected ) ) return FALSE; return TRUE; } #endif // ----------------------------------------------------------------------------- // // This section of the file was based off of cpuid.cxx from TBLIS [1]. // // [1] https://github.com/devinamatthews/tblis // /* Copyright (C) 2017, The University of Texas at Austin Copyright (C) 2017, Devin Matthews Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) enum { // input register(s) output register FEATURE_MASK_SSE3 = (1u<< 0), // cpuid[eax=1] :ecx[0] FEATURE_MASK_SSSE3 = (1u<< 9), // cpuid[eax=1] :ecx[9] FEATURE_MASK_SSE41 = (1u<<19), // cpuid[eax=1] :ecx[19] FEATURE_MASK_SSE42 = (1u<<20), // cpuid[eax=1] :ecx[20] FEATURE_MASK_AVX = (1u<<28), // cpuid[eax=1] :ecx[28] FEATURE_MASK_AVX2 = (1u<< 5), // cpuid[eax=7,ecx=0] :ebx[5] FEATURE_MASK_FMA3 = (1u<<12), // cpuid[eax=1] :ecx[12] FEATURE_MASK_FMA4 = (1u<<16), // cpuid[eax=0x80000001]:ecx[16] FEATURE_MASK_AVX512F = (1u<<16), // cpuid[eax=7,ecx=0] :ebx[16] FEATURE_MASK_AVX512DQ = (1u<<17), // cpuid[eax=7,ecx=0] :ebx[17] FEATURE_MASK_AVX512PF = (1u<<26), // cpuid[eax=7,ecx=0] :ebx[26] FEATURE_MASK_AVX512ER = (1u<<27), // cpuid[eax=7,ecx=0] :ebx[27] FEATURE_MASK_AVX512CD = (1u<<28), // cpuid[eax=7,ecx=0] :ebx[28] FEATURE_MASK_AVX512BW = (1u<<30), // cpuid[eax=7,ecx=0] :ebx[30] FEATURE_MASK_AVX512VL = (1u<<31), // cpuid[eax=7,ecx=0] :ebx[31] FEATURE_MASK_XGETBV = (1u<<26)| (1u<<27), // cpuid[eax=1] :ecx[27:26] XGETBV_MASK_XMM = 0x02u, // xcr0[1] XGETBV_MASK_YMM = 0x04u, // xcr0[2] XGETBV_MASK_ZMM = 0xe0u // xcr0[7:5] }; uint32_t bli_cpuid_query ( uint32_t* family, uint32_t* model, uint32_t* features ) { uint32_t eax, ebx, ecx, edx; uint32_t old_model = 0; uint32_t old_family = 0; uint32_t ext_model = 0; uint32_t ext_family = 0; *family = 0; *model = 0; *features = 0; //fprintf( stderr, "checking cpuid\n" ); uint32_t cpuid_max = __get_cpuid_max( 0, 0 ); uint32_t cpuid_max_ext = __get_cpuid_max( 0x80000000u, 0 ); //fprintf( stderr, "max cpuid leaf: %d\n", cpuid_max ); //fprintf( stderr, "max extended cpuid leaf: %08x\n", cpuid_max_ext ); if ( cpuid_max < 1 ) return VENDOR_UNKNOWN; // The fourth '0' serves as the NULL-terminator for the vendor string. uint32_t vendor_string[4] = { 0, 0, 0, 0 }; // This is actually a macro that modifies the last four operands, // hence why they are not passed by address. __cpuid( 0, eax, vendor_string[0], vendor_string[2], vendor_string[1] ); // Check extended feature bits for post-AVX2 features. if ( cpuid_max >= 7 ) { // This is actually a macro that modifies the last four operands, // hence why they are not passed by address. __cpuid_count( 7, 0, eax, ebx, ecx, edx ); //fprintf( stderr, "cpuid leaf 7:\n" ); //print_binary( eax ); //print_binary( ebx ); //print_binary( ecx ); //print_binary( edx ); if ( bli_cpuid_has_features( ebx, FEATURE_MASK_AVX2 ) ) *features |= FEATURE_AVX2; if ( bli_cpuid_has_features( ebx, FEATURE_MASK_AVX512F ) ) *features |= FEATURE_AVX512F; if ( bli_cpuid_has_features( ebx, FEATURE_MASK_AVX512DQ ) ) *features |= FEATURE_AVX512DQ; if ( bli_cpuid_has_features( ebx, FEATURE_MASK_AVX512PF ) ) *features |= FEATURE_AVX512PF; if ( bli_cpuid_has_features( ebx, FEATURE_MASK_AVX512ER ) ) *features |= FEATURE_AVX512ER; if ( bli_cpuid_has_features( ebx, FEATURE_MASK_AVX512CD ) ) *features |= FEATURE_AVX512CD; if ( bli_cpuid_has_features( ebx, FEATURE_MASK_AVX512BW ) ) *features |= FEATURE_AVX512BW; if ( bli_cpuid_has_features( ebx, FEATURE_MASK_AVX512VL ) ) *features |= FEATURE_AVX512VL; } // Check extended processor info / features bits for AMD-specific features. if ( cpuid_max_ext >= 0x80000001u ) { // This is actually a macro that modifies the last four operands, // hence why they are not passed by address. __cpuid( 0x80000001u, eax, ebx, ecx, edx ); //fprintf(stderr, "extended cpuid leaf 0x80000001:\n"); //print_binary(eax); //print_binary(ebx); //print_binary(ecx); //print_binary(edx); if ( bli_cpuid_has_features( ecx, FEATURE_MASK_FMA4 ) ) *features |= FEATURE_FMA4; } // Unconditionally check processor info / features bits. { // This is actually a macro that modifies the last four operands, // hence why they are not passed by address. __cpuid( 1, eax, ebx, ecx, edx ); //fprintf(stderr, "cpuid leaf 1:\n"); //print_binary(eax); //print_binary(ebx); //print_binary(ecx); //print_binary(edx); /* cpuid(eax=1): eax[27:0] 3: 0 - Stepping 7: 4 - Model 11: 8 - Family 13:12 - Processor Type 19:16 - Extended Model 27:20 - Extended Family Intel and AMD have suggested applications to display the family of a CPU as the sum of the "Family" and the "Extended Family" fields shown above, and the model as the sum of the "Model" and the 4-bit left-shifted "Extended Model" fields. If "Family" is different than 6 or 15, only the "Family" and "Model" fields should be used while the "Extended Family" and "Extended Model" bits are reserved. If "Family" is set to 15, then "Extended Family" and the 4-bit left-shifted "Extended Model" should be added to the respective base values, and if "Family" is set to 6, then only the 4-bit left-shifted "Extended Model" should be added to "Model". */ old_model = ( eax >> 4 ) & ( 0xF ); // bits 7:4 old_family = ( eax >> 8 ) & ( 0xF ); // bits 11:8 ext_model = ( eax >> 16 ) & ( 0xF ); // bits 19:16 ext_family = ( eax >> 20 ) & ( 0xFF ); // bits 27:20 // Set the display model and family values based on the original family // value. See explanation above. if ( old_family == 6 ) { *model = ( ext_model << 4 ) + old_model; *family = old_family; } else if ( old_family == 15 ) { *model = ( ext_model << 4 ) + old_model; *family = ( ext_family ) + old_family; } else { *model = old_model; *family = old_family; } // Check for SSE, AVX, and FMA3 features. if ( bli_cpuid_has_features( ecx, FEATURE_MASK_SSE3 ) ) *features |= FEATURE_SSE3; if ( bli_cpuid_has_features( ecx, FEATURE_MASK_SSSE3 ) ) *features |= FEATURE_SSSE3; if ( bli_cpuid_has_features( ecx, FEATURE_MASK_SSE41 ) ) *features |= FEATURE_SSE41; if ( bli_cpuid_has_features( ecx, FEATURE_MASK_SSE42 ) ) *features |= FEATURE_SSE42; if ( bli_cpuid_has_features( ecx, FEATURE_MASK_AVX ) ) *features |= FEATURE_AVX; if ( bli_cpuid_has_features( ecx, FEATURE_MASK_FMA3 ) ) *features |= FEATURE_FMA3; // Check whether the hardware supports xsave/xrestor/xsetbv/xgetbv AND // support for these is enabled by the OS. If so, then we proceed with // checking that various register-state saving features are available. if ( bli_cpuid_has_features( ecx, FEATURE_MASK_XGETBV ) ) { uint32_t xcr = 0; // Call xgetbv to get xcr0 (the extended control register) copied // to [edx:eax]. This encodes whether software supports various // register state-saving features. __asm__ __volatile__ ( ".byte 0x0F, 0x01, 0xD0" : "=a" (eax), "=d" (edx) : "c" (xcr) : "cc" ); //fprintf(stderr, "xcr0:\n"); //print_binary(eax); //print_binary(edx); //fprintf(stderr, "xgetbv: xmm: %d\n", bli_cpuid_has_features(eax, XGETBV_MASK_XMM)); //fprintf(stderr, "xgetbv: ymm: %d\n", bli_cpuid_has_features(eax, XGETBV_MASK_XMM| // XGETBV_MASK_YMM)); //fprintf(stderr, "xgetbv: zmm: %d\n", bli_cpuid_has_features(eax, XGETBV_MASK_XMM| // XGETBV_MASK_YMM| // XGETBV_MASK_ZMM)); // The OS can manage the state of 512-bit zmm (AVX-512) registers // only if the xcr[7:5] bits are set. If they are not set, then // clear all feature bits related to AVX-512. if ( !bli_cpuid_has_features( eax, XGETBV_MASK_XMM | XGETBV_MASK_YMM | XGETBV_MASK_ZMM ) ) { *features &= ~( FEATURE_AVX512F | FEATURE_AVX512DQ | FEATURE_AVX512PF | FEATURE_AVX512ER | FEATURE_AVX512CD | FEATURE_AVX512BW | FEATURE_AVX512VL ); } // The OS can manage the state of 256-bit ymm (AVX) registers // only if the xcr[2] bit is set. If it is not set, then // clear all feature bits related to AVX. if ( !bli_cpuid_has_features( eax, XGETBV_MASK_XMM | XGETBV_MASK_YMM ) ) { *features &= ~( FEATURE_AVX | FEATURE_AVX2 | FEATURE_FMA3 | FEATURE_FMA4 ); } // The OS can manage the state of 128-bit xmm (SSE) registers // only if the xcr[1] bit is set. If it is not set, then // clear all feature bits related to SSE (which means the // entire bitfield is clear). if ( !bli_cpuid_has_features( eax, XGETBV_MASK_XMM ) ) { *features = 0; } } else { // If the hardware does not support xsave/xrestor/xsetbv/xgetbv, // OR these features are not enabled by the OS, then we clear // the bitfield, because it means that not even xmm support is // present. //fprintf(stderr, "xgetbv: no\n"); features = 0; } } //fprintf(stderr, "vendor: %12s\n", vendor_string); //fprintf(stderr, "family: %d\n", family); //fprintf(stderr, "model: %d\n", model); //fprintf(stderr, "sse3: %d\n", bli_cpuid_has_features(features, FEATURE_SSE3)); //fprintf(stderr, "ssse3: %d\n", bli_cpuid_has_features(features, FEATURE_SSSE3)); //fprintf(stderr, "sse4.1: %d\n", bli_cpuid_has_features(features, FEATURE_SSE41)); //fprintf(stderr, "sse4.2: %d\n", bli_cpuid_has_features(features, FEATURE_SSE42)); //fprintf(stderr, "avx: %d\n", bli_cpuid_has_features(features, FEATURE_AVX)); //fprintf(stderr, "avx2: %d\n", bli_cpuid_has_features(features, FEATURE_AVX2)); //fprintf(stderr, "fma3: %d\n", bli_cpuid_has_features(features, FEATURE_FMA3)); //fprintf(stderr, "fma4: %d\n", bli_cpuid_has_features(features, FEATURE_FMA4)); //fprintf(stderr, "avx512f: %d\n", bli_cpuid_has_features(features, FEATURE_AVX512F)); //fprintf(stderr, "avx512pf: %d\n", bli_cpuid_has_features(features, FEATURE_AVX512PF)); //fprintf(stderr, "avx512dq: %d\n", bli_cpuid_has_features(features, FEATURE_AVX512DQ)); // Check the vendor string and return a value to indicate Intel or AMD. if ( strcmp( ( char* )vendor_string, "AuthenticAMD" ) == 0 ) return VENDOR_AMD; else if ( strcmp( ( char* )vendor_string, "GenuineIntel" ) == 0 ) return VENDOR_INTEL; else return VENDOR_UNKNOWN; } void get_cpu_name( char *cpu_name ) { uint32_t eax, ebx, ecx, edx; __cpuid( 0x80000002u, eax, ebx, ecx, edx ); //printf("%x %x %x %x\n", eax, ebx, ecx, edx); *( uint32_t* )&cpu_name[0 + 0] = eax; *( uint32_t* )&cpu_name[0 + 4] = ebx; *( uint32_t* )&cpu_name[0 + 8] = ecx; *( uint32_t* )&cpu_name[0 +12] = edx; __cpuid( 0x80000003u, eax, ebx, ecx, edx ); //printf("%x %x %x %x\n", eax, ebx, ecx, edx); *( uint32_t* )&cpu_name[16+ 0] = eax; *( uint32_t* )&cpu_name[16+ 4] = ebx; *( uint32_t* )&cpu_name[16+ 8] = ecx; *( uint32_t* )&cpu_name[16+12] = edx; __cpuid( 0x80000004u, eax, ebx, ecx, edx ); //printf("%x %x %x %x\n", eax, ebx, ecx, edx); *( uint32_t* )&cpu_name[32+ 0] = eax; *( uint32_t* )&cpu_name[32+ 4] = ebx; *( uint32_t* )&cpu_name[32+ 8] = ecx; *( uint32_t* )&cpu_name[32+12] = edx; } // Return the number of FMA units _assuming avx512 is supported_. // This needs updating for new processor types, sigh. // See https://ark.intel.com/content/www/us/en/ark.html#@Processors // and also https://github.com/jeffhammond/vpu-count int vpu_count( void ) { char cpu_name[48] = {}; char* loc; char model_num[5]; int sku; get_cpu_name( cpu_name ); if ( strstr( cpu_name, "Intel(R) Xeon(R)" ) != NULL ) { if (( loc = strstr( cpu_name, "Platinum" ) )) return 2; if ( loc == NULL ) loc = strstr( cpu_name, "Gold" ); // 1 or 2, tested below if ( loc == NULL ) if (( loc = strstr( cpu_name, "Silver" ) )) return 1; if ( loc == NULL ) if (( loc = strstr( cpu_name, "Bronze" ) )) return 1; if ( loc == NULL ) loc = strstr( cpu_name, "W" ); if ( loc == NULL ) if (( loc = strstr( cpu_name, "D" ) )) // Fixme: May be wrong // return 1; if ( loc == NULL ) return -1; // We may have W-nnnn rather than, say, Gold nnnn if ( 'W' == *loc && '-' == *(loc+1) ) loc++; else loc = strstr( loc+1, " " ); if ( loc == NULL ) return -1; strncpy( model_num, loc+1, 4 ); model_num[4] = '\0'; // Things like i9-10900X matched above sku = atoi( model_num ); // These were derived from ARK listings as of 2019-10-09, but // may not be complete, especially as the ARK Skylake listing // seems to be limited. if ( 8199 >= sku && sku >= 8100 ) return 2; else if ( 6199 >= sku && sku >= 6100 ) return 2; else if ( sku == 5122 ) return 2; else if ( 6299 >= sku && sku >= 6200 ) return 2; // Cascade Lake Gold else if ( 5299 >= sku && sku >= 5200 ) return 1; // Cascade Lake Gold else if ( 5199 >= sku && sku >= 5100 ) return 1; else if ( 4199 >= sku && sku >= 4100 ) return 1; else if ( 3199 >= sku && sku >= 3100 ) return 1; else if ( 3299 >= sku && sku >= 3200 ) return 2; // Cascade Lake W else if ( 2299 >= sku && sku >= 2200 ) return 2; // Cascade Lake W else if ( 2199 >= sku && sku >= 2120 ) return 2; else if ( 2102 == sku || sku == 2104 ) return 2; // Gold exceptions else if ( 2119 >= sku && sku >= 2100 ) return 1; else return -1; } else if ( strstr( cpu_name, "Intel(R) Core(TM)" ) != NULL ) return 2; // All i7/i9 with avx512? else { return -1; } } #elif defined(__aarch64__) || defined(__arm__) || defined(_M_ARM) #define TEMP_BUFFER_SIZE 200 uint32_t bli_cpuid_query ( uint32_t* model, uint32_t* part, uint32_t* features ) { *model = MODEL_UNKNOWN; *part = 0; *features = 0; char* pci_str = "/proc/cpuinfo"; char proc_str[ TEMP_BUFFER_SIZE ]; char ptno_str[ TEMP_BUFFER_SIZE ]; char feat_str[ TEMP_BUFFER_SIZE ]; char* r_val; //printf( "bli_cpuid_query(): beginning search\n" ); // Search /proc/cpuinfo for the 'Processor' entry. r_val = find_string_in( "Processor", proc_str, TEMP_BUFFER_SIZE, pci_str ); if ( r_val == NULL ) return VENDOR_ARM; // Search /proc/cpuinfo for the 'CPU part' entry. r_val = find_string_in( "CPU part", ptno_str, TEMP_BUFFER_SIZE, pci_str ); if ( r_val == NULL ) return VENDOR_ARM; // Search /proc/cpuinfo for the 'Features' entry. r_val = find_string_in( "Features", feat_str, TEMP_BUFFER_SIZE, pci_str ); if ( r_val == NULL ) return VENDOR_ARM; #if 0 printf( "bli_cpuid_query(): full processor string: %s\n", proc_str ); printf( "bli_cpuid_query(): full part num string: %s\n", ptno_str ); printf( "bli_cpuid_query(): full features string: %s\n", feat_str ); #endif // Parse the feature string to check for SIMD features. if ( strstr( feat_str, "neon" ) != NULL || strstr( feat_str, "asimd" ) != NULL ) *features |= FEATURE_NEON; //printf( "bli_cpuid_query(): features var: %u\n", *features ); // Parse the processor string to uncover the model. if ( strstr( proc_str, "ARMv7" ) != NULL ) *model = MODEL_ARMV7; else if ( strstr( proc_str, "AArch64" ) != NULL || strstr( proc_str, "ARMv8" ) ) *model = MODEL_ARMV8; //printf( "bli_cpuid_query(): model: %u\n", *model ); // Parse the part number string. r_val = strstr( ptno_str, "0x" ); if ( r_val != NULL) { *part = strtol( r_val, NULL, 16 ); } //printf( "bli_cpuid_query(): part#: %x\n", *part ); return VENDOR_ARM; } char* find_string_in( char* target, char* buffer, size_t buf_len, char* filepath ) { // This function searches for the first line of the file located at // 'filepath' that contains the string 'target' and then copies that // line (actually, the substring of the line starting with 'target') // to 'buffer', which is 'buf_len' bytes long. char* r_val = NULL; // Allocate a temporary local buffer equal to the size of buffer. char* buf_local = malloc( buf_len * sizeof( char ) ); // Open the file stream. FILE* stream = fopen( filepath, "r" ); // Repeatedly read in a line from the stream, storing the contents of // the stream into buf_local. while ( !feof( stream ) ) { // Read in the current line, up to buf_len-1 bytes. r_val = fgets( buf_local, buf_len-1, stream ); //printf( "read line: %s", buf_local ); // fgets() returns the pointer specified by the first argument (in // this case, buf_local) on success and NULL on error. if ( r_val == NULL ) break; // Since fgets() was successful, we can search for the target string // within the current line, as captured in buf_local. r_val = strstr( buf_local, target ); // If the target string was found in buf_local, we save it to buffer. if ( r_val != NULL ) { //printf( " found match to '%s'\n", target ); // Copy the string read by fgets() to the caller's buffer. strncpy( buffer, buf_local, buf_len ); // Make sure that we have a terminating null character by the // end of the buffer. if ( buf_len > 0 ) buffer[ buf_len - 1 ] = '\0'; // Leave the loop since we found the target string. break; } } // Close the file stream. fclose( stream ); // Free the temporary local buffer. free( buf_local ); // Return r_val so the caller knows if we failed. return r_val; } #endif blis-0.6.1/frame/base/bli_cpuid.h000066400000000000000000000146271360743507500166020ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018-2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #if 0 // Used only during standalone testing of ARM support. #define FALSE 0 #define TRUE 1 typedef enum { BLIS_ARCH_CORTEXA57 = 10, BLIS_ARCH_CORTEXA15 = 11, BLIS_ARCH_CORTEXA9 = 12, BLIS_ARCH_GENERIC = 13 } arch_t; typedef uint64_t bool_t; #define bli_abort abort #endif #ifndef BLIS_CPUID_H #define BLIS_CPUID_H arch_t bli_cpuid_query_id( void ); // Intel bool_t bli_cpuid_is_skx( uint32_t family, uint32_t model, uint32_t features ); bool_t bli_cpuid_is_knl( uint32_t family, uint32_t model, uint32_t features ); bool_t bli_cpuid_is_haswell( uint32_t family, uint32_t model, uint32_t features ); bool_t bli_cpuid_is_sandybridge( uint32_t family, uint32_t model, uint32_t features ); bool_t bli_cpuid_is_penryn( uint32_t family, uint32_t model, uint32_t features ); // AMD bool_t bli_cpuid_is_zen2( uint32_t family, uint32_t model, uint32_t features ); bool_t bli_cpuid_is_zen( uint32_t family, uint32_t model, uint32_t features ); bool_t bli_cpuid_is_excavator( uint32_t family, uint32_t model, uint32_t features ); bool_t bli_cpuid_is_steamroller( uint32_t family, uint32_t model, uint32_t features ); bool_t bli_cpuid_is_piledriver( uint32_t family, uint32_t model, uint32_t features ); bool_t bli_cpuid_is_bulldozer( uint32_t family, uint32_t model, uint32_t features ); // ARM bool_t bli_cpuid_is_thunderx2( uint32_t model, uint32_t part, uint32_t features ); bool_t bli_cpuid_is_cortexa57( uint32_t model, uint32_t part, uint32_t features ); bool_t bli_cpuid_is_cortexa53( uint32_t model, uint32_t part, uint32_t features ); bool_t bli_cpuid_is_cortexa15( uint32_t model, uint32_t part, uint32_t features ); bool_t bli_cpuid_is_cortexa9( uint32_t model, uint32_t part, uint32_t features ); uint32_t bli_cpuid_query( uint32_t* family, uint32_t* model, uint32_t* features ); // ----------------------------------------------------------------------------- // // This section of the file was based off of cpuid.hpp from TBLIS [1]. // // [1] https://github.com/devinamatthews/tblis // /* Copyright (C) 2017, The University of Texas at Austin Copyright (C) 2017, Devin Matthews Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ static bool_t bli_cpuid_has_features( uint32_t have, uint32_t want ) { return ( have & want ) == want; } // ----------------------------------------------------------------------------- #if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) #include "cpuid.h" void get_cpu_name( char *cpu_name ); int vpu_count( void ); enum { VENDOR_INTEL = 0, VENDOR_AMD, VENDOR_UNKNOWN }; enum { FEATURE_SSE3 = 0x0001, FEATURE_SSSE3 = 0x0002, FEATURE_SSE41 = 0x0004, FEATURE_SSE42 = 0x0008, FEATURE_AVX = 0x0010, FEATURE_AVX2 = 0x0020, FEATURE_FMA3 = 0x0040, FEATURE_FMA4 = 0x0080, FEATURE_AVX512F = 0x0100, FEATURE_AVX512DQ = 0x0200, FEATURE_AVX512PF = 0x0400, FEATURE_AVX512ER = 0x0800, FEATURE_AVX512CD = 0x1000, FEATURE_AVX512BW = 0x2000, FEATURE_AVX512VL = 0x4000 }; #elif defined(__aarch64__) || defined(__arm__) || defined(_M_ARM) char* find_string_in( char* target, char* buffer, size_t buf_len, char* filepath ); enum { VENDOR_ARM = 0, VENDOR_UNKNOWN }; enum { MODEL_ARMV7 = 0, MODEL_ARMV8, MODEL_UNKNOWN }; enum { FEATURE_NEON = 0x1 }; #endif #endif blis-0.6.1/frame/base/bli_env.c000066400000000000000000000064101360743507500162500ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // ----------------------------------------------------------------------------- dim_t bli_env_get_var( const char* env, dim_t fallback ) { dim_t r_val; char* str; // Query the environment variable and store the result in str. str = getenv( env ); // Set the return value based on the string obtained from getenv(). if ( str != NULL ) { // If there was no error, convert the string to an integer and // prepare to return that integer. r_val = strtol( str, NULL, 10 ); } else { // If there was an error, use the "fallback" as the return value. r_val = fallback; } return r_val; } #if 0 void bli_env_set_var( const char* env, dim_t value ) { dim_t r_val; char value_str[32]; const char* fs_32 = "%u"; const char* fs_64 = "%lu"; // Convert the string to an integer, but vary the format specifier // depending on the integer type size. if ( bli_info_get_int_type_size() == 32 ) sprintf( value_str, fs_32, value ); else sprintf( value_str, fs_64, value ); // Set the environment variable using the string we just wrote to via // sprintf(). (The 'TRUE' argument means we want to overwrite the current // value if the environment variable already exists.) r_val = bli_setenv( env, value_str, TRUE ); // Check the return value in case something went horribly wrong. if ( r_val == -1 ) { char err_str[128]; // Query the human-readable error string corresponding to errno. strerror_r( errno, err_str, 128 ); // Print the error message. bli_print_msg( err_str, __FILE__, __LINE__ ); } } #endif blis-0.6.1/frame/base/bli_env.h000066400000000000000000000036411360743507500162600ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2016, Hewlett Packard Enterprise Development LP Copyright (C) 2018, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_ENV_H #define BLIS_ENV_H dim_t bli_env_get_var( const char* env, dim_t fallback ); //void bli_env_set_var( const char* env, dim_t value ); #endif blis-0.6.1/frame/base/bli_error.c000066400000000000000000000212521360743507500166120ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // Internal array to hold error strings. static char bli_error_string[BLIS_MAX_NUM_ERR_MSGS][BLIS_MAX_ERR_MSG_LENGTH] = { [-BLIS_INVALID_ERROR_CHECKING_LEVEL] = "Invalid error checking level.", [-BLIS_UNDEFINED_ERROR_CODE] = "Undefined error code.", [-BLIS_NULL_POINTER] = "Encountered unexpected null pointer.", [-BLIS_NOT_YET_IMPLEMENTED] = "Requested functionality not yet implemented.", [-BLIS_INVALID_SIDE] = "Invalid side parameter value.", [-BLIS_INVALID_UPLO] = "Invalid uplo_t parameter value.", [-BLIS_INVALID_TRANS] = "Invalid trans_t parameter value.", [-BLIS_INVALID_CONJ] = "Invalid conj_t parameter value.", [-BLIS_INVALID_DIAG] = "Invalid diag_t parameter value.", [-BLIS_EXPECTED_NONUNIT_DIAG] = "Expected object with non-unit diagonal.", [-BLIS_INVALID_DATATYPE] = "Invalid datatype value.", [-BLIS_EXPECTED_FLOATING_POINT_DATATYPE] = "Expected floating-point datatype value.", [-BLIS_EXPECTED_NONINTEGER_DATATYPE] = "Expected non-integer datatype value.", [-BLIS_EXPECTED_NONCONSTANT_DATATYPE] = "Expected non-constant datatype value.", [-BLIS_EXPECTED_REAL_DATATYPE] = "Expected real datatype value.", [-BLIS_EXPECTED_INTEGER_DATATYPE] = "Expected integer datatype value.", [-BLIS_INCONSISTENT_DATATYPES] = "Expected consistent datatypes (equal, or one being constant).", [-BLIS_EXPECTED_REAL_PROJ_OF] = "Expected second datatype to be real projection of first.", [-BLIS_EXPECTED_REAL_VALUED_OBJECT] = "Expected real-valued object (ie: if complex, imaginary component equals zero).", [-BLIS_INCONSISTENT_PRECISIONS] = "Expected consistent precisions (both single or both double).", [-BLIS_NONCONFORMAL_DIMENSIONS] = "Encountered non-conformal dimensions between objects.", [-BLIS_EXPECTED_SCALAR_OBJECT] = "Expected scalar object.", [-BLIS_EXPECTED_VECTOR_OBJECT] = "Expected vector object.", [-BLIS_UNEQUAL_VECTOR_LENGTHS] = "Encountered unequal vector lengths.", [-BLIS_EXPECTED_SQUARE_OBJECT] = "Expected square object.", [-BLIS_UNEXPECTED_OBJECT_LENGTH] = "Unexpected object length.", [-BLIS_UNEXPECTED_OBJECT_WIDTH] = "Unexpected object width.", [-BLIS_UNEXPECTED_VECTOR_DIM] = "Unexpected vector dimension.", [-BLIS_UNEXPECTED_DIAG_OFFSET] = "Unexpected object diagonal offset.", [-BLIS_NEGATIVE_DIMENSION] = "Encountered negative dimension.", [-BLIS_INVALID_ROW_STRIDE] = "Encountered invalid row stride relative to n dimension.", [-BLIS_INVALID_COL_STRIDE] = "Encountered invalid col stride relative to m dimension.", [-BLIS_INVALID_DIM_STRIDE_COMBINATION] = "Encountered invalid stride/dimension combination.", [-BLIS_EXPECTED_GENERAL_OBJECT] = "Expected general object.", [-BLIS_EXPECTED_HERMITIAN_OBJECT] = "Expected Hermitian object.", [-BLIS_EXPECTED_SYMMETRIC_OBJECT] = "Expected symmetric object.", [-BLIS_EXPECTED_TRIANGULAR_OBJECT] = "Expected triangular object.", [-BLIS_EXPECTED_UPPER_OR_LOWER_OBJECT] = "Expected upper or lower triangular object.", [-BLIS_INVALID_3x1_SUBPART] = "Encountered invalid 3x1 (vertical) subpartition label.", [-BLIS_INVALID_1x3_SUBPART] = "Encountered invalid 1x3 (horizontal) subpartition label.", [-BLIS_INVALID_3x3_SUBPART] = "Encountered invalid 3x3 (diagonal) subpartition label.", [-BLIS_UNEXPECTED_NULL_CONTROL_TREE] = "Encountered unexpected null control tree node.", [-BLIS_PACK_SCHEMA_NOT_SUPPORTED_FOR_UNPACK] = "Pack schema not yet supported/implemented for use with unpacking.", [-BLIS_EXPECTED_NONNULL_OBJECT_BUFFER] = "Encountered object with non-zero dimensions containing null buffer.", [-BLIS_MALLOC_RETURNED_NULL] = "malloc() returned NULL; heap memory is likely exhausted.", [-BLIS_INVALID_PACKBUF] = "Invalid packbuf_t value.", [-BLIS_EXHAUSTED_CONTIG_MEMORY_POOL] = "Attempted to allocate more memory from contiguous pool than is available.", [-BLIS_INSUFFICIENT_STACK_BUF_SIZE] = "Configured maximum stack buffer size is insufficient for register blocksizes currently in use.", [-BLIS_ALIGNMENT_NOT_POWER_OF_TWO] = "Encountered memory alignment value that is either zero or not a power of two.", [-BLIS_ALIGNMENT_NOT_MULT_OF_PTR_SIZE] = "Encountered memory alignment value that is not a multiple of sizeof(void*).", [-BLIS_EXPECTED_OBJECT_ALIAS] = "Expected object to be alias.", [-BLIS_INVALID_ARCH_ID] = "Invalid architecture id value.", [-BLIS_MC_DEF_NONMULTIPLE_OF_MR] = "Default MC is non-multiple of MR for one or more datatypes.", [-BLIS_MC_MAX_NONMULTIPLE_OF_MR] = "Maximum MC is non-multiple of MR for one or more datatypes.", [-BLIS_NC_DEF_NONMULTIPLE_OF_NR] = "Default NC is non-multiple of NR for one or more datatypes.", [-BLIS_NC_MAX_NONMULTIPLE_OF_NR] = "Maximum NC is non-multiple of NR for one or more datatypes.", [-BLIS_KC_DEF_NONMULTIPLE_OF_KR] = "Default KC is non-multiple of KR for one or more datatypes.", [-BLIS_KC_MAX_NONMULTIPLE_OF_KR] = "Maximum KC is non-multiple of KR for one or more datatypes.", }; // ----------------------------------------------------------------------------- void bli_print_msg( char* str, char* file, guint_t line ) { fprintf( stderr, "\n" ); fprintf( stderr, "libblis: %s (line %lu):\n", file, ( long unsigned int )line ); fprintf( stderr, "libblis: %s\n", str ); fflush( stderr ); } void bli_abort( void ) { fprintf( stderr, "libblis: Aborting.\n" ); //raise( SIGABRT ); abort(); } // ----------------------------------------------------------------------------- // A mutex to allow synchronous access to bli_err_chk_level. static bli_pthread_mutex_t err_mutex = BLIS_PTHREAD_MUTEX_INITIALIZER; // Current error checking level. static errlev_t bli_err_chk_level = BLIS_FULL_ERROR_CHECKING; errlev_t bli_error_checking_level( void ) { return bli_err_chk_level; } void bli_error_checking_level_set( errlev_t new_level ) { err_t e_val; e_val = bli_check_valid_error_level( new_level ); bli_check_error_code( e_val ); // Acquire the mutex protecting bli_err_chk_level. bli_pthread_mutex_lock( &err_mutex ); // BEGIN CRITICAL SECTION { bli_err_chk_level = new_level; } // END CRITICAL SECTION // Release the mutex protecting bli_err_chk_level. bli_pthread_mutex_unlock( &err_mutex ); } bool_t bli_error_checking_is_enabled( void ) { return bli_error_checking_level() != BLIS_NO_ERROR_CHECKING; } char* bli_error_string_for_code( gint_t code ) { return bli_error_string[-code]; } blis-0.6.1/frame/base/bli_error.h000066400000000000000000000037531360743507500166250ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ BLIS_EXPORT_BLIS errlev_t bli_error_checking_level( void ); BLIS_EXPORT_BLIS void bli_error_checking_level_set( errlev_t new_level ); BLIS_EXPORT_BLIS bool_t bli_error_checking_is_enabled( void ); void bli_print_msg( char* str, char* file, guint_t line ); void bli_abort( void ); char* bli_error_string_for_code( gint_t code ); blis-0.6.1/frame/base/bli_func.c000066400000000000000000000061231360743507500164140ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" func_t* bli_func_create ( void_fp ptr_s, void_fp ptr_d, void_fp ptr_c, void_fp ptr_z ) { func_t* f; f = ( func_t* ) bli_malloc_intl( sizeof(func_t) ); bli_func_init ( f, ptr_s, ptr_d, ptr_c, ptr_z ); return f; } void bli_func_init ( func_t* f, void_fp ptr_s, void_fp ptr_d, void_fp ptr_c, void_fp ptr_z ) { bli_func_set_dt( ptr_s, BLIS_FLOAT, f ); bli_func_set_dt( ptr_d, BLIS_DOUBLE, f ); bli_func_set_dt( ptr_c, BLIS_SCOMPLEX, f ); bli_func_set_dt( ptr_z, BLIS_DCOMPLEX, f ); } void bli_func_init_null ( func_t* f ) { bli_func_set_dt( NULL, BLIS_FLOAT, f ); bli_func_set_dt( NULL, BLIS_DOUBLE, f ); bli_func_set_dt( NULL, BLIS_SCOMPLEX, f ); bli_func_set_dt( NULL, BLIS_DCOMPLEX, f ); } void bli_func_free( func_t* f ) { bli_free_intl( f ); } // ----------------------------------------------------------------------------- bool_t bli_func_is_null_dt( num_t dt, func_t* f ) { return ( bli_func_get_dt( dt, f ) == NULL ); } bool_t bli_func_is_null( func_t* f ) { bool_t r_val = TRUE; num_t dt; // Iterate over all floating-point datatypes. If any is non-null, // return FALSE. Otherwise, if they are all null, return TRUE. for ( dt = BLIS_DT_LO; dt <= BLIS_DT_HI; ++dt ) { if ( bli_func_get_dt( dt, f ) != NULL ) { r_val = FALSE; break; } } return r_val; } blis-0.6.1/frame/base/bli_func.h000066400000000000000000000055311360743507500164230ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // ----------------------------------------------------------------------------- // func_t query static void_fp bli_func_get_dt ( num_t dt, func_t* func ) { return func->ptr[ dt ]; } // func_t modification static void bli_func_set_dt ( void_fp fp, num_t dt, func_t* func ) { func->ptr[ dt ] = fp; } static void bli_func_copy_dt ( num_t dt_src, func_t* func_src, num_t dt_dst, func_t* func_dst ) { void_fp fp = bli_func_get_dt( dt_src, func_src ); bli_func_set_dt( fp, dt_dst, func_dst ); } // ----------------------------------------------------------------------------- func_t* bli_func_create ( void_fp ptr_s, void_fp ptr_d, void_fp ptr_c, void_fp ptr_z ); void bli_func_init ( func_t* f, void_fp ptr_s, void_fp ptr_d, void_fp ptr_c, void_fp ptr_z ); void bli_func_init_null ( func_t* f ); void bli_func_free( func_t* f ); // ----------------------------------------------------------------------------- bool_t bli_func_is_null_dt( num_t dt, func_t* f ); bool_t bli_func_is_null( func_t* f ); blis-0.6.1/frame/base/bli_getopt.c000066400000000000000000000141141360743507500167620ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" static const char OPT_MARKER = '-'; void bli_getopt_init_state( int opterr, getopt_t* state ) { state->optarg = NULL; state->optind = 1; state->opterr = opterr; state->optopt = 0; } int bli_getopt( int argc, char** const argv, const char* optstring, getopt_t* state ) { static char* nextchar = NULL; char* elem_str; char* optstr_char; // If argv contains no more arguments to process, return. if ( state->optind == argc ) return -1; // Get a pointer to the current argv element string to process. If // nextchar is non-NULL, then it means the previous call processed // an element of argv with more than one option character, in which // case we need to pick up where we left off (which is the address // contained in nextchar). if ( nextchar == NULL ) { elem_str = argv[ state->optind ]; // elem_str[0] should be an OPT_MARKER if it is an option. In the // event that it is not an option, argv should be permuted so that // the non-option argument moves back toward the end of the list. // This functionality is not supported/implemented here. Therefore, // we require all of the program's option arguments to precede all of // its non-option arguments. if ( elem_str[0] != OPT_MARKER ) { state->optarg = NULL; //state->optind += 1; return -1; } // Skip over the OPT_MARKER. elem_str++; } else { // Note we don't need to skip the OPT_MARKER here since we are // continuing processing of a string with more than one option // character. // Use the nextchar pointer as our element string. elem_str = nextchar; // Reset nextchar to NULL. nextchar = NULL; } // Find the first occurrence of elem_str[0] in optstring. optstr_char = strchr( optstring, elem_str[0] ); // If the option character in elem_str[0] is absent from the option // string, store it and return '?'. if ( optstr_char == NULL ) { if ( state->opterr == 1 ) fprintf( stderr, "bli_getopt(): **error**: option character '%c' missing from option string \"%s\"\n", elem_str[0], optstring ); // We can't dereference optstr_char since it is NULL, so we use // elem_str[0] instead. state->optopt = elem_str[0]; state->optind += 1; return '?'; } // We can now safely assume that an option characer was found in the // option string. Now we need to check if the option takes an argument. if ( optstr_char[1] == ':' ) { // If the current element string ends after the option character, // then the companion argument must be stored in the next element // of argv. Otherwise, the argument begins immediately after the // option character. if ( elem_str[1] == '\0' ) { // If there are no more elements in argv, the argument was // omitted. Store the corresponding option character and // return '?'. if ( state->optind + 1 >= argc ) { if ( state->opterr == 1 ) fprintf( stderr, "bli_getopt(): **error**: option character '%c' is missing an argument (end of argv)\n", elem_str[0] ); state->optopt = *optstr_char; state->optind += 1; return '?'; } // If there are still more elements in argv yet to process AND // the next one is an option, then the argument was omitted. else if ( argv[ state->optind + 1 ][0] == OPT_MARKER ) { if ( state->opterr == 1 ) fprintf( stderr, "bli_getopt(): **error**: option character '%c' is missing an argument (next element of argv is option '%c')\n", elem_str[0], argv[ state->optind + 1 ][1] ); state->optopt = *optstr_char; state->optind += 1; return '?'; } // If no error was deteced above, we can safely assign optarg // to be the next element in argv and increment optind by two. state->optarg = argv[ state->optind + 1 ]; state->optind += 2; } else { // We don't need to check for missing arguments since we know // that because the char after the option character is not NULL, // the character(s) after it must constitute the argument. state->optarg = &elem_str[1]; state->optind += 1; } return *optstr_char; } // The current option character does NOT take an argument. However, we // still need to check if the next char is an option argument (such as // occurs when the user runs "program -rv" instead of "program -r -v"). if ( elem_str[1] != '\0' ) { if ( strchr( optstring, elem_str[1] ) != NULL ) { nextchar = &elem_str[1]; return *optstr_char; } } state->optarg = NULL; state->optind += 1; return *optstr_char; } blis-0.6.1/frame/base/bli_getopt.h000066400000000000000000000036421360743507500167730ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ typedef struct getopt_s { char* optarg; int optind; int opterr; int optopt; } getopt_t; BLIS_EXPORT_BLIS void bli_getopt_init_state( int opterr, getopt_t* state ); BLIS_EXPORT_BLIS int bli_getopt( int argc, char** const argv, const char* optstring, getopt_t* state ); blis-0.6.1/frame/base/bli_gks.c000066400000000000000000000626701360743507500162560ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018-2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // The array of cntx_t* pointers to cache modified contexts used by // induced methods. static cntx_t** gks[ BLIS_NUM_ARCHS ]; // The array of function pointers holding the registered context initialization // functions for induced methods. static void_fp cntx_ind_init[ BLIS_NUM_ARCHS ]; // The array of function pointers holding the registered context initialization // functions for reference kernels. static void_fp cntx_ref_init[ BLIS_NUM_ARCHS ]; // Define a function pointer type for context initialization functions. typedef void (*nat_cntx_init_ft)( cntx_t* cntx ); typedef void (*ref_cntx_init_ft)( cntx_t* cntx ); typedef void (*ind_cntx_init_ft)( ind_t method, num_t dt, cntx_t* cntx ); // ----------------------------------------------------------------------------- void bli_gks_init( void ) { { // Initialize the internal data structure we use to track registered // contexts. bli_gks_init_index(); // Register a context for each architecture that was #define'd in // bli_config.h. // Intel architectures #ifdef BLIS_CONFIG_SKX bli_gks_register_cntx( BLIS_ARCH_SKX, bli_cntx_init_skx, bli_cntx_init_skx_ref, bli_cntx_init_skx_ind ); #endif #ifdef BLIS_CONFIG_KNL bli_gks_register_cntx( BLIS_ARCH_KNL, bli_cntx_init_knl, bli_cntx_init_knl_ref, bli_cntx_init_knl_ind ); #endif #ifdef BLIS_CONFIG_KNC bli_gks_register_cntx( BLIS_ARCH_KNC, bli_cntx_init_knc, bli_cntx_init_knc_ref, bli_cntx_init_knc_ind ); #endif #ifdef BLIS_CONFIG_HASWELL bli_gks_register_cntx( BLIS_ARCH_HASWELL, bli_cntx_init_haswell, bli_cntx_init_haswell_ref, bli_cntx_init_haswell_ind ); #endif #ifdef BLIS_CONFIG_SANDYBRIDGE bli_gks_register_cntx( BLIS_ARCH_SANDYBRIDGE, bli_cntx_init_sandybridge, bli_cntx_init_sandybridge_ref, bli_cntx_init_sandybridge_ind ); #endif #ifdef BLIS_CONFIG_PENRYN bli_gks_register_cntx( BLIS_ARCH_PENRYN, bli_cntx_init_penryn, bli_cntx_init_penryn_ref, bli_cntx_init_penryn_ind ); #endif // AMD architectures #ifdef BLIS_CONFIG_ZEN2 bli_gks_register_cntx( BLIS_ARCH_ZEN2, bli_cntx_init_zen2, bli_cntx_init_zen2_ref, bli_cntx_init_zen2_ind ); #endif #ifdef BLIS_CONFIG_ZEN bli_gks_register_cntx( BLIS_ARCH_ZEN, bli_cntx_init_zen, bli_cntx_init_zen_ref, bli_cntx_init_zen_ind ); #endif #ifdef BLIS_CONFIG_EXCAVATOR bli_gks_register_cntx( BLIS_ARCH_EXCAVATOR, bli_cntx_init_excavator, bli_cntx_init_excavator_ref, bli_cntx_init_excavator_ind ); #endif #ifdef BLIS_CONFIG_STEAMROLLER bli_gks_register_cntx( BLIS_ARCH_STEAMROLLER, bli_cntx_init_steamroller, bli_cntx_init_steamroller_ref, bli_cntx_init_steamroller_ind ); #endif #ifdef BLIS_CONFIG_PILEDRIVER bli_gks_register_cntx( BLIS_ARCH_PILEDRIVER, bli_cntx_init_piledriver, bli_cntx_init_piledriver_ref, bli_cntx_init_piledriver_ind ); #endif #ifdef BLIS_CONFIG_BULLDOZER bli_gks_register_cntx( BLIS_ARCH_BULLDOZER, bli_cntx_init_bulldozer, bli_cntx_init_bulldozer_ref, bli_cntx_init_bulldozer_ind ); #endif // ARM architectures #ifdef BLIS_CONFIG_THUNDERX2 bli_gks_register_cntx( BLIS_ARCH_THUNDERX2, bli_cntx_init_thunderx2, bli_cntx_init_thunderx2_ref, bli_cntx_init_thunderx2_ind ); #endif #ifdef BLIS_CONFIG_CORTEXA57 bli_gks_register_cntx( BLIS_ARCH_CORTEXA57, bli_cntx_init_cortexa57, bli_cntx_init_cortexa57_ref, bli_cntx_init_cortexa57_ind ); #endif #ifdef BLIS_CONFIG_CORTEXA53 bli_gks_register_cntx( BLIS_ARCH_CORTEXA53, bli_cntx_init_cortexa53, bli_cntx_init_cortexa53_ref, bli_cntx_init_cortexa53_ind ); #endif #ifdef BLIS_CONFIG_CORTEXA15 bli_gks_register_cntx( BLIS_ARCH_CORTEXA15, bli_cntx_init_cortexa15, bli_cntx_init_cortexa15_ref, bli_cntx_init_cortexa15_ind ); #endif #ifdef BLIS_CONFIG_CORTEXA9 bli_gks_register_cntx( BLIS_ARCH_CORTEXA9, bli_cntx_init_cortexa9, bli_cntx_init_cortexa9_ref, bli_cntx_init_cortexa9_ind ); #endif // IBM architectures #ifdef BLIS_CONFIG_POWER9 bli_gks_register_cntx( BLIS_ARCH_POWER9, bli_cntx_init_power9, bli_cntx_init_power9_ref, bli_cntx_init_power9_ind ); #endif #ifdef BLIS_CONFIG_POWER7 bli_gks_register_cntx( BLIS_ARCH_POWER7, bli_cntx_init_power7, bli_cntx_init_power7_ref, bli_cntx_init_power7_ind ); #endif #ifdef BLIS_CONFIG_BGQ bli_gks_register_cntx( BLIS_ARCH_BGQ, bli_cntx_init_bgq, bli_cntx_init_bgq_ref, bli_cntx_init_bgq_ind ); #endif // Generic architectures #ifdef BLIS_CONFIG_GENERIC bli_gks_register_cntx( BLIS_ARCH_GENERIC, bli_cntx_init_generic, bli_cntx_init_generic_ref, bli_cntx_init_generic_ind ); #endif } } // ----------------------------------------------------------------------------- void bli_gks_finalize( void ) { arch_t id; ind_t ind; // BEGIN CRITICAL SECTION // NOTE: This critical section is implicit. We assume this function is only // called from within the critical section within bli_finalize(). { // Iterate over the architectures in the gks array. for ( id = 0; id < BLIS_NUM_ARCHS; ++id ) { cntx_t** restrict gks_id = gks[ id ]; // Only consider context arrays for architectures that were allocated // in the first place. if ( gks_id != NULL ) { // Iterate over the induced methods in the current sub-array // referenced by cntx_pp. for ( ind = 0; ind < BLIS_NUM_IND_METHODS; ++ind ) { cntx_t* restrict gks_id_ind = gks_id[ ind ]; // If the current context was allocated, free it. if ( gks_id_ind != NULL ) { #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_gks_finalize(): cntx for ind_t %d: ", ( int )ind ); #endif bli_free_intl( gks_id_ind ); } } #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_gks_finalize(): gks for arch_t %d: ", ( int )id ); #endif // Free the array of BLIS_NUM_IND_METHODS cntx* elements. bli_free_intl( gks_id ); } } } // END CRITICAL SECTION } // ----------------------------------------------------------------------------- void bli_gks_init_index( void ) { // This function is called by bli_gks_init(). It simply initializes all // architecture id elements of the internal arrays to NULL. const size_t gks_size = sizeof( cntx_t* ) * BLIS_NUM_ARCHS; const size_t fpa_size = sizeof( void_fp ) * BLIS_NUM_ARCHS; // Set every entry in gks and context init function pointer arrays to // zero/NULL. This is done so that later on we know which ones were // allocated. memset( gks, 0, gks_size ); memset( cntx_ref_init, 0, fpa_size ); memset( cntx_ind_init, 0, fpa_size ); } // ----------------------------------------------------------------------------- cntx_t* bli_gks_lookup_nat_cntx ( arch_t id ) { // Return the address of the (native) context for a given architecture id. // This function assumes the architecture has already been registered. return bli_gks_lookup_ind_cntx( id, BLIS_NAT ); } // ----------------------------------------------------------------------------- cntx_t* bli_gks_lookup_ind_cntx ( arch_t id, ind_t ind ) { // Return the address of the context for a given architecture id and // induced method. This function assumes the architecture has already // been registered. Note that this function returns NULL if the induced // method hasn't yet been called (and thus its context pointer is still // NULL). // Sanity check: verify that the arch_t id is valid. if ( bli_error_checking_is_enabled() ) { err_t e_val = bli_check_valid_arch_id( id ); bli_check_error_code( e_val ); } // Index into the array of context pointers for the given architecture id, // and then index into the subarray for the given induced method. cntx_t** restrict gks_id = gks[ id ]; cntx_t* restrict gks_id_ind = gks_id[ ind ]; // Return the context pointer at gks_id_ind. return gks_id_ind; } // ----------------------------------------------------------------------------- void bli_gks_register_cntx ( arch_t id, void_fp nat_fp, void_fp ref_fp, void_fp ind_fp ) { // This function is called by bli_gks_init() for each architecture that // will be supported by BLIS. It takes an architecture id and three // function pointers, one to a function that initializes a native context // (supplied by the kernel developer), one to a function that initializes // a reference context (with function pointers specific to the architecture // associated with id), and one to a function that initializes a // context for use with induced methods (again, with function pointers // to the architecture). The latter two functions are automatically // generated by the framework. Unlike with native contexts, we don't // actually store the induced contexts until that induced method is // called, and we don't ever store reference contexts. For this reason, we // can get away with only storing the pointers to the initialization // functions for those latter two types of contexts, which we can then // call at a later time when those contexts are needed. // Sanity check: verify that the arch_t id is valid. if ( bli_error_checking_is_enabled() ) { err_t e_val = bli_check_valid_arch_id( id ); bli_check_error_code( e_val ); } nat_cntx_init_ft f = nat_fp; // First, store the function pointers to the context initialization // functions for reference kernels and induced method execution. The // former will be used whenever we need to obtain reference kernels and // latter will be used later on if the user calls a level-3 function // with induced execution enabled. cntx_ref_init[ id ] = ref_fp; cntx_ind_init[ id ] = ind_fp; // If the the context array pointer isn't NULL, then it means the given // architecture id has already registered (and the underlying memory // allocations and context initializations have already been performed). // This is really just a safety feature to prevent memory leaks; this // early return should never occur, because the caller should never try // to register with an architecture id that has already been registered. if ( gks[ id ] != NULL ) return; #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_gks_register_cntx(): " ); #endif // At this point, we know the pointer to the array of cntx_t* is NULL and // needs to be allocated. Allocate the memory and initialize it to // zeros/NULL, storing the address of the alloacted memory at the element // for the current architecture id. gks[ id ] = bli_calloc_intl( sizeof( cntx_t* ) * BLIS_NUM_IND_METHODS ); // Alias the allocated array for readability. cntx_t** restrict gks_id = gks[ id ]; #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_gks_register_cntx(): " ); #endif // Allocate memory for a single context and store the address at // the element in the gks[ id ] array that is reserved for native // execution. gks_id[ BLIS_NAT ] = bli_calloc_intl( sizeof( cntx_t ) ); // Alias the allocated context address for readability. cntx_t* restrict gks_id_nat = gks_id[ BLIS_NAT ]; // Call the context initialization function on the element of the newly // allocated array corresponding to native execution. f( gks_id_nat ); // Verify that cache blocksizes are whole multiples of register blocksizes. // Specifically, verify that: // - MC is a whole multiple of MR. // - NC is a whole multiple of NR. // - KC is a whole multiple of KR. // These constraints are enforced because it makes it easier to handle diagonals // in the macro-kernel implementations. Additionally, we optionally verify that: // - MC is a whole multiple of NR. // - NC is a whole multiple of MR. // These latter constraints, guarded by #ifndef BLIS_RELAX_MCNR_NCMR_CONSTRAINTS // below, are only enforced when we wish to be able to handle the trsm right- // side case handling that swaps A and B, so that B is the triangular matrix, // with NR blocking used to pack A and MR blocking used to pack B, with the // arguments to the gemmtrsm microkernel swapped at the last minute, as the // kernel is called. err_t e_val; blksz_t* restrict mc = bli_cntx_get_blksz( BLIS_MC, gks_id_nat ); blksz_t* restrict nc = bli_cntx_get_blksz( BLIS_NC, gks_id_nat ); blksz_t* restrict kc = bli_cntx_get_blksz( BLIS_KC, gks_id_nat ); blksz_t* restrict mr = bli_cntx_get_blksz( BLIS_MR, gks_id_nat ); blksz_t* restrict nr = bli_cntx_get_blksz( BLIS_NR, gks_id_nat ); blksz_t* restrict kr = bli_cntx_get_blksz( BLIS_KR, gks_id_nat ); e_val = bli_check_valid_mc_mod_mult( mc, mr ); bli_check_error_code( e_val ); e_val = bli_check_valid_nc_mod_mult( nc, nr ); bli_check_error_code( e_val ); e_val = bli_check_valid_kc_mod_mult( kc, kr ); bli_check_error_code( e_val ); #ifndef BLIS_RELAX_MCNR_NCMR_CONSTRAINTS e_val = bli_check_valid_mc_mod_mult( mc, nr ); bli_check_error_code( e_val ); e_val = bli_check_valid_nc_mod_mult( nc, mr ); bli_check_error_code( e_val ); #endif } // ----------------------------------------------------------------------------- cntx_t* bli_gks_query_cntx( void ) { return bli_gks_query_nat_cntx(); } cntx_t* bli_gks_query_nat_cntx( void ) { bli_init_once(); // Return the address of the native context for the architecture id // corresponding to the current hardware, as determined by // bli_arch_query_id(). // Query the architecture id. arch_t id = bli_arch_query_id(); // Use the architecture id to look up a pointer to its context. cntx_t* cntx = bli_gks_lookup_nat_cntx( id ); return cntx; } // ----------------------------------------------------------------------------- cntx_t* bli_gks_query_cntx_noinit( void ) { // This function is identical to bli_gks_query_cntx(), except that it // does not call bli_init_once(). // Query the architecture id. arch_t id = bli_arch_query_id(); // Use the architecture id to look up a pointer to its context. cntx_t* cntx = bli_gks_lookup_nat_cntx( id ); return cntx; } // ----------------------------------------------------------------------------- // A mutex to allow synchronous access to the gks when it needs to be updated // with a new entry corresponding to a context for an ind_t value. static bli_pthread_mutex_t gks_mutex = BLIS_PTHREAD_MUTEX_INITIALIZER; cntx_t* bli_gks_query_ind_cntx ( ind_t ind, num_t dt ) { bli_init_once(); cntx_t* gks_id_ind; // Return the address of a context that will be suited for executing a // level-3 operation via the requested induced method (and datatype) for // the architecture id corresponding to the current hardware, as // determined by bli_arch_query_id(). // This function is called when a level-3 operation via induced method is // called, e.g. bli_gemm1m(). If this is the first time that induced method // is being executed since bli_gks_init(), the necessary context structure // is allocated and initialized. If this is not the first time, then the // address of a previously-allocated and initialized (cached) context is // returned. Note that much of this must be done with mutual exclusion to // ensure thread safety and deterministic behavior. // Query the architecture id. arch_t id = bli_arch_query_id(); // Sanity check: verify that the arch_t id is valid. if ( bli_error_checking_is_enabled() ) { err_t e_val = bli_check_valid_arch_id( id ); bli_check_error_code( e_val ); } // NOTE: These initial statements can reside outside of the critical section // because gks[ id ] should have already been allocated, and the native // context in that array should have already been allocated/initialized. // Query the gks for the array of context pointers corresponding to the // given architecture id. cntx_t** restrict gks_id = gks[ id ]; cntx_t* restrict gks_id_nat = gks_id[ BLIS_NAT ]; // If for some reason the native context was requested, we can return // its address early. if ( ind == BLIS_NAT ) return gks_id_nat; // This function assumes that the architecture idenified by id has // already been registered with the gks (which guarantees that // gks[ id ] is non-NULL and gks[ id ][ BLIS_NAT ] is also non-NULL // and refers to a context initialized with valid data). // Acquire the mutex protecting the gks. bli_pthread_mutex_lock( &gks_mutex ); // BEGIN CRITICAL SECTION { // Alias for readability the element of gks_id associated with the // requested induced method. gks_id_ind = gks_id[ ind ]; // If the context pointer is NULL, then we know we must allocate and // then initialize the context before returning its address. if ( gks_id_ind == NULL ) { // If gks_id_ind is NULL, then we know we must allocate and then // initialize the context, storing its address back to // gks_id[ ind ]. gks_id_ind = bli_calloc_intl( sizeof( cntx_t ) ); gks_id[ ind ] = gks_id_ind; // Before we can call the induced method context initialization // function on the newly allocated structure, we must first copy // over the contents of the native context. *gks_id_ind = *gks_id_nat; // Use the architecture id to look up the function pointer to the // context initialization function for induced methods. ind_cntx_init_ft f = cntx_ind_init[ id ]; // Now we modify the context (so that it contains the proper values // for its induced method) by calling the context initialization // function for the current induced method. (That function assumes // that the context is pre- initialized with values for native // execution.) f( ind, dt, gks_id_ind ); } } // END CRITICAL SECTION // Release the mutex protecting the gks. bli_pthread_mutex_unlock( &gks_mutex ); // Return the address of the newly-allocated/initialized context. return gks_id_ind; } // ----------------------------------------------------------------------------- void bli_gks_init_ref_cntx ( cntx_t* cntx ) { // Query the architecture id. arch_t id = bli_arch_query_id(); // Sanity check: verify that the arch_t id is valid. if ( bli_error_checking_is_enabled() ) { err_t e_val = bli_check_valid_arch_id( id ); bli_check_error_code( e_val ); } // Obtain the function pointer to the context initialization function for // reference kernels. ref_cntx_init_ft f = cntx_ref_init[ id ]; // Initialize the caller's context with reference kernels and related values. f( cntx ); } // ----------------------------------------------------------------------------- bool_t bli_gks_cntx_l3_nat_ukr_is_ref ( num_t dt, l3ukr_t ukr_id, cntx_t* cntx ) { cntx_t ref_cntx; // Initialize a context with reference kernels for the arch_t id queried // via bli_arch_query_id(). bli_gks_init_ref_cntx( &ref_cntx ); // Query each context for the micro-kernel function pointer for the // specified datatype. void_fp ref_fp = bli_cntx_get_l3_nat_ukr_dt( dt, ukr_id, &ref_cntx ); void_fp fp = bli_cntx_get_l3_nat_ukr_dt( dt, ukr_id, cntx ); // Return the result. return fp == ref_fp; } // // -- level-3 micro-kernel implementation strings ------------------------------ // static char* bli_gks_l3_ukr_impl_str[BLIS_NUM_UKR_IMPL_TYPES] = { "refrnce", "virtual", "optimzd", "notappl", }; // ----------------------------------------------------------------------------- char* bli_gks_l3_ukr_impl_string( l3ukr_t ukr, ind_t method, num_t dt ) { kimpl_t ki; // Query the context for the current induced method and datatype, and // then query the ukernel function pointer for the given datatype from // that context. cntx_t* cntx = bli_gks_query_ind_cntx( method, dt ); void_fp fp = bli_cntx_get_l3_vir_ukr_dt( dt, ukr, cntx ); // Check whether the ukernel function pointer is NULL for the given // datatype. If it is NULL, return the string for not applicable. // Otherwise, query the ukernel implementation type using the method // provided and return the associated string. if ( fp == NULL ) ki = BLIS_NOTAPPLIC_UKERNEL; else ki = bli_gks_l3_ukr_impl_type( ukr, method, dt ); return bli_gks_l3_ukr_impl_str[ ki ]; } #if 0 char* bli_gks_l3_ukr_avail_impl_string( l3ukr_t ukr, num_t dt ) { opid_t oper; ind_t method; kimpl_t ki; // We need to decide which operation we will use to query the // current available induced method. If the ukr type given is // BLIS_GEMM_UKR, we use gemm. Otherwise, we use trsm (since // the four other defined ukr types are trsm-related). if ( ukr == BLIS_GEMM_UKR ) oper = BLIS_GEMM; else oper = BLIS_TRSM; // Query the current available induced method using the // chosen operation id type. method = bli_l3_ind_oper_find_avail( oper, dt ); // Query the ukernel implementation type using the current // available method. ki = bli_gks_l3_ukr_impl_type( ukr, method, dt ); return bli_ukr_impl_str[ ki ]; } #endif kimpl_t bli_gks_l3_ukr_impl_type( l3ukr_t ukr, ind_t method, num_t dt ) { // If the current available induced method is not native, it // must be virtual. if ( method != BLIS_NAT ) return BLIS_VIRTUAL_UKERNEL; else { // If the current available induced method for the gemm // operation is native, then it might be reference or // optimized. To determine which, we compare the // datatype-specific function pointer within the ukrs // object corresponding to the current available induced // method to the typed function pointer within the known // reference ukrs object. cntx_t ref_cntx_l; // Query the architecture id. arch_t id = bli_arch_query_id(); // Sanity check: verify that the arch_t id is valid. if ( bli_error_checking_is_enabled() ) { err_t e_val = bli_check_valid_arch_id( id ); bli_check_error_code( e_val ); } // Obtain the function pointer to the context initialization function // for reference kernels. ref_cntx_init_ft f = cntx_ref_init[ id ]; // Initialize a local context with reference kernels and related values. f( &ref_cntx_l ); // Query the native context from the gks. cntx_t* nat_cntx = bli_gks_lookup_nat_cntx( id ); // Query the native ukernel func_t from both the native and reference // contexts. void_fp nat_fp = bli_cntx_get_l3_nat_ukr_dt( dt, ukr, nat_cntx ); void_fp ref_fp = bli_cntx_get_l3_nat_ukr_dt( dt, ukr, &ref_cntx_l ); if ( nat_fp == ref_fp ) return BLIS_REFERENCE_UKERNEL; else return BLIS_OPTIMIZED_UKERNEL; } } blis-0.6.1/frame/base/bli_gks.h000066400000000000000000000051311360743507500162500ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_GKS_H #define BLIS_GKS_H void bli_gks_init( void ); void bli_gks_finalize( void ); void bli_gks_init_index( void ); cntx_t* bli_gks_lookup_nat_cntx( arch_t id ); cntx_t* bli_gks_lookup_ind_cntx( arch_t id, ind_t ind ); void bli_gks_register_cntx( arch_t id, void_fp nat_fp, void_fp ref_fp, void_fp ind_fp ); BLIS_EXPORT_BLIS cntx_t* bli_gks_query_cntx( void ); BLIS_EXPORT_BLIS cntx_t* bli_gks_query_nat_cntx( void ); cntx_t* bli_gks_query_cntx_noinit( void ); BLIS_EXPORT_BLIS cntx_t* bli_gks_query_ind_cntx( ind_t ind, num_t dt ); BLIS_EXPORT_BLIS void bli_gks_init_ref_cntx( cntx_t* cntx ); bool_t bli_gks_cntx_l3_nat_ukr_is_ref( num_t dt, l3ukr_t ukr_id, cntx_t* cntx ); BLIS_EXPORT_BLIS char* bli_gks_l3_ukr_impl_string( l3ukr_t ukr, ind_t method, num_t dt ); BLIS_EXPORT_BLIS kimpl_t bli_gks_l3_ukr_impl_type( l3ukr_t ukr, ind_t method, num_t dt ); //char* bli_gks_l3_ukr_avail_impl_string( l3ukr_t ukr, num_t dt ); #endif blis-0.6.1/frame/base/bli_info.c000066400000000000000000000170601360743507500164160ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // -- General library information ---------------------------------------------- // This string gets defined via -D on the command line when BLIS is compiled. // This string is (or rather, should be) only used here. static char* bli_version_str = BLIS_VERSION_STRING; static char* bli_int_type_size_str = STRINGIFY_INT( BLIS_INT_TYPE_SIZE ); char* bli_info_get_version_str( void ) { return bli_version_str; } char* bli_info_get_int_type_size_str( void ) { return bli_int_type_size_str; } // -- General configuration-related -------------------------------------------- gint_t bli_info_get_int_type_size( void ) { return BLIS_INT_TYPE_SIZE; } gint_t bli_info_get_num_fp_types( void ) { return BLIS_NUM_FP_TYPES; } gint_t bli_info_get_max_type_size( void ) { return BLIS_MAX_TYPE_SIZE; } gint_t bli_info_get_page_size( void ) { return BLIS_PAGE_SIZE; } gint_t bli_info_get_simd_num_registers( void ) { return BLIS_SIMD_NUM_REGISTERS; } gint_t bli_info_get_simd_size( void ) { return BLIS_SIMD_SIZE; } gint_t bli_info_get_simd_align_size( void ) { return BLIS_SIMD_ALIGN_SIZE; } gint_t bli_info_get_stack_buf_max_size( void ) { return BLIS_STACK_BUF_MAX_SIZE; } gint_t bli_info_get_stack_buf_align_size( void ) { return BLIS_STACK_BUF_ALIGN_SIZE; } gint_t bli_info_get_heap_addr_align_size( void ) { return BLIS_HEAP_ADDR_ALIGN_SIZE; } gint_t bli_info_get_heap_stride_align_size( void ) { return BLIS_HEAP_STRIDE_ALIGN_SIZE; } gint_t bli_info_get_pool_addr_align_size_a( void ) { return BLIS_POOL_ADDR_ALIGN_SIZE_A; } gint_t bli_info_get_pool_addr_align_size_b( void ) { return BLIS_POOL_ADDR_ALIGN_SIZE_B; } gint_t bli_info_get_pool_addr_align_size_c( void ) { return BLIS_POOL_ADDR_ALIGN_SIZE_C; } gint_t bli_info_get_pool_addr_align_size_gen( void ) { return BLIS_POOL_ADDR_ALIGN_SIZE_GEN; } gint_t bli_info_get_pool_addr_offset_size_a( void ) { return BLIS_POOL_ADDR_OFFSET_SIZE_A; } gint_t bli_info_get_pool_addr_offset_size_b( void ) { return BLIS_POOL_ADDR_OFFSET_SIZE_B; } gint_t bli_info_get_pool_addr_offset_size_c( void ) { return BLIS_POOL_ADDR_OFFSET_SIZE_C; } gint_t bli_info_get_pool_addr_offset_size_gen( void ) { return BLIS_POOL_ADDR_OFFSET_SIZE_GEN; } gint_t bli_info_get_enable_stay_auto_init( void ) { #ifdef BLIS_ENABLE_STAY_AUTO_INITIALIZED return 1; #else return 0; #endif } gint_t bli_info_get_enable_blas( void ) { #ifdef BLIS_ENABLE_BLAS return 1; #else return 0; #endif } gint_t bli_info_get_enable_cblas( void ) { #ifdef BLIS_ENABLE_CBLAS return 1; #else return 0; #endif } gint_t bli_info_get_blas_int_type_size( void ) { return BLIS_BLAS_INT_TYPE_SIZE; } gint_t bli_info_get_enable_pba_pools( void ) { #ifdef BLIS_ENABLE_PBA_POOLS return 1; #else return 0; #endif } gint_t bli_info_get_enable_sba_pools( void ) { #ifdef BLIS_ENABLE_SBA_POOLS return 1; #else return 0; #endif } gint_t bli_info_get_enable_threading( void ) { if ( bli_info_get_enable_openmp() || bli_info_get_enable_pthreads() ) return 1; else return 0; } gint_t bli_info_get_enable_openmp( void ) { #ifdef BLIS_ENABLE_OPENMP return 1; #else return 0; #endif } gint_t bli_info_get_enable_pthreads( void ) { #ifdef BLIS_ENABLE_PTHREADS return 1; #else return 0; #endif } gint_t bli_info_get_thread_part_jrir_slab( void ) { #ifdef BLIS_ENABLE_JRIR_SLAB return 1; #else return 0; #endif } gint_t bli_info_get_thread_part_jrir_rr( void ) { #ifdef BLIS_ENABLE_JRIR_RR return 1; #else return 0; #endif } gint_t bli_info_get_enable_memkind( void ) { #ifdef BLIS_ENABLE_MEMKIND return 1; #else return 0; #endif } gint_t bli_info_get_enable_sandbox( void ) { #ifdef BLIS_ENABLE_SANDBOX return 1; #else return 0; #endif } // -- Kernel implementation-related -------------------------------------------- // -- Level-3 kernel definitions -- char* bli_info_get_gemm_ukr_impl_string( ind_t method, num_t dt ) { bli_init_once(); return bli_gks_l3_ukr_impl_string( BLIS_GEMM_UKR, method, dt ); } char* bli_info_get_gemmtrsm_l_ukr_impl_string( ind_t method, num_t dt ) { bli_init_once(); return bli_gks_l3_ukr_impl_string( BLIS_GEMMTRSM_L_UKR, method, dt ); } char* bli_info_get_gemmtrsm_u_ukr_impl_string( ind_t method, num_t dt ) { bli_init_once(); return bli_gks_l3_ukr_impl_string( BLIS_GEMMTRSM_U_UKR, method, dt ); } char* bli_info_get_trsm_l_ukr_impl_string( ind_t method, num_t dt ) { bli_init_once(); return bli_gks_l3_ukr_impl_string( BLIS_TRSM_L_UKR, method, dt ); } char* bli_info_get_trsm_u_ukr_impl_string( ind_t method, num_t dt ) { bli_init_once(); return bli_gks_l3_ukr_impl_string( BLIS_TRSM_U_UKR, method, dt ); } // -- BLIS implementation query (level-3) -------------------------------------- char* bli_info_get_gemm_impl_string( num_t dt ) { return bli_ind_oper_get_avail_impl_string( BLIS_GEMM, dt ); } char* bli_info_get_hemm_impl_string( num_t dt ) { return bli_ind_oper_get_avail_impl_string( BLIS_HEMM, dt ); } char* bli_info_get_herk_impl_string( num_t dt ) { return bli_ind_oper_get_avail_impl_string( BLIS_HERK, dt ); } char* bli_info_get_her2k_impl_string( num_t dt ) { return bli_ind_oper_get_avail_impl_string( BLIS_HER2K, dt ); } char* bli_info_get_symm_impl_string( num_t dt ) { return bli_ind_oper_get_avail_impl_string( BLIS_SYMM, dt ); } char* bli_info_get_syrk_impl_string( num_t dt ) { return bli_ind_oper_get_avail_impl_string( BLIS_SYRK, dt ); } char* bli_info_get_syr2k_impl_string( num_t dt ) { return bli_ind_oper_get_avail_impl_string( BLIS_SYR2K, dt ); } char* bli_info_get_trmm_impl_string( num_t dt ) { return bli_ind_oper_get_avail_impl_string( BLIS_TRMM, dt ); } char* bli_info_get_trmm3_impl_string( num_t dt ) { return bli_ind_oper_get_avail_impl_string( BLIS_TRMM3, dt ); } char* bli_info_get_trsm_impl_string( num_t dt ) { return bli_ind_oper_get_avail_impl_string( BLIS_TRSM, dt ); } blis-0.6.1/frame/base/bli_info.h000066400000000000000000000124141360743507500164210ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // -- General library information ---------------------------------------------- BLIS_EXPORT_BLIS char* bli_info_get_version_str( void ); BLIS_EXPORT_BLIS char* bli_info_get_int_type_size_str( void ); // -- General configuration-related -------------------------------------------- BLIS_EXPORT_BLIS gint_t bli_info_get_int_type_size( void ); BLIS_EXPORT_BLIS gint_t bli_info_get_num_fp_types( void ); BLIS_EXPORT_BLIS gint_t bli_info_get_max_type_size( void ); BLIS_EXPORT_BLIS gint_t bli_info_get_page_size( void ); BLIS_EXPORT_BLIS gint_t bli_info_get_simd_num_registers( void ); BLIS_EXPORT_BLIS gint_t bli_info_get_simd_size( void ); BLIS_EXPORT_BLIS gint_t bli_info_get_simd_align_size( void ); BLIS_EXPORT_BLIS gint_t bli_info_get_stack_buf_max_size( void ); BLIS_EXPORT_BLIS gint_t bli_info_get_stack_buf_align_size( void ); BLIS_EXPORT_BLIS gint_t bli_info_get_heap_addr_align_size( void ); BLIS_EXPORT_BLIS gint_t bli_info_get_heap_stride_align_size( void ); BLIS_EXPORT_BLIS gint_t bli_info_get_pool_addr_align_size_a( void ); BLIS_EXPORT_BLIS gint_t bli_info_get_pool_addr_align_size_b( void ); BLIS_EXPORT_BLIS gint_t bli_info_get_pool_addr_align_size_c( void ); BLIS_EXPORT_BLIS gint_t bli_info_get_pool_addr_align_size_gen( void ); BLIS_EXPORT_BLIS gint_t bli_info_get_pool_addr_offset_size_a( void ); BLIS_EXPORT_BLIS gint_t bli_info_get_pool_addr_offset_size_b( void ); BLIS_EXPORT_BLIS gint_t bli_info_get_pool_addr_offset_size_c( void ); BLIS_EXPORT_BLIS gint_t bli_info_get_pool_addr_offset_size_gen( void ); BLIS_EXPORT_BLIS gint_t bli_info_get_enable_stay_auto_init( void ); BLIS_EXPORT_BLIS gint_t bli_info_get_enable_blas( void ); BLIS_EXPORT_BLIS gint_t bli_info_get_enable_cblas( void ); BLIS_EXPORT_BLIS gint_t bli_info_get_blas_int_type_size( void ); BLIS_EXPORT_BLIS gint_t bli_info_get_enable_pba_pools( void ); BLIS_EXPORT_BLIS gint_t bli_info_get_enable_sba_pools( void ); BLIS_EXPORT_BLIS gint_t bli_info_get_enable_threading( void ); BLIS_EXPORT_BLIS gint_t bli_info_get_enable_openmp( void ); BLIS_EXPORT_BLIS gint_t bli_info_get_enable_pthreads( void ); BLIS_EXPORT_BLIS gint_t bli_info_get_thread_part_jrir_slab( void ); BLIS_EXPORT_BLIS gint_t bli_info_get_thread_part_jrir_rr( void ); BLIS_EXPORT_BLIS gint_t bli_info_get_enable_memkind( void ); BLIS_EXPORT_BLIS gint_t bli_info_get_enable_sandbox( void ); // -- Kernel implementation-related -------------------------------------------- // -- Level-3 kernel definitions -- BLIS_EXPORT_BLIS char* bli_info_get_gemm_ukr_impl_string( ind_t method, num_t dt ); BLIS_EXPORT_BLIS char* bli_info_get_gemmtrsm_l_ukr_impl_string( ind_t method, num_t dt ); BLIS_EXPORT_BLIS char* bli_info_get_gemmtrsm_u_ukr_impl_string( ind_t method, num_t dt ); BLIS_EXPORT_BLIS char* bli_info_get_trsm_l_ukr_impl_string( ind_t method, num_t dt ); BLIS_EXPORT_BLIS char* bli_info_get_trsm_u_ukr_impl_string( ind_t method, num_t dt ); // -- BLIS implementation query (level-3) -------------------------------------- BLIS_EXPORT_BLIS char* bli_info_get_gemm_impl_string( num_t dt ); BLIS_EXPORT_BLIS char* bli_info_get_hemm_impl_string( num_t dt ); BLIS_EXPORT_BLIS char* bli_info_get_herk_impl_string( num_t dt ); BLIS_EXPORT_BLIS char* bli_info_get_her2k_impl_string( num_t dt ); BLIS_EXPORT_BLIS char* bli_info_get_symm_impl_string( num_t dt ); BLIS_EXPORT_BLIS char* bli_info_get_syrk_impl_string( num_t dt ); BLIS_EXPORT_BLIS char* bli_info_get_syr2k_impl_string( num_t dt ); BLIS_EXPORT_BLIS char* bli_info_get_trmm_impl_string( num_t dt ); BLIS_EXPORT_BLIS char* bli_info_get_trmm3_impl_string( num_t dt ); BLIS_EXPORT_BLIS char* bli_info_get_trsm_impl_string( num_t dt ); blis-0.6.1/frame/base/bli_init.c000066400000000000000000000066351360743507500164340ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // ----------------------------------------------------------------------------- void bli_init( void ) { bli_init_once(); } void bli_finalize( void ) { bli_finalize_once(); } // ----------------------------------------------------------------------------- void bli_init_auto( void ) { bli_init_once(); } void bli_finalize_auto( void ) { #ifdef BLIS_ENABLE_STAY_AUTO_INITIALIZED // If BLIS was configured to stay initialized after being automatically // initialized, we honor the configuration request and do nothing. // BLIS will remain initialized unless and until the user explicitly // calls bli_finalize(). #else bli_finalize_once(); #endif } // ----------------------------------------------------------------------------- void bli_init_apis( void ) { // Initialize various sub-APIs. bli_gks_init(); bli_ind_init(); bli_thread_init(); bli_pack_init(); bli_memsys_init(); } void bli_finalize_apis( void ) { // Finalize various sub-APIs. bli_memsys_finalize(); bli_pack_finalize(); bli_thread_finalize(); bli_ind_finalize(); bli_gks_finalize(); } // ----------------------------------------------------------------------------- // A pthread_once_t variable is a pthread structure used in pthread_once(). // pthread_once() is guaranteed to execute exactly once among all threads that // pass in this control object. Thus, we need one for initialization and a // separate one for finalization. static bli_pthread_once_t once_init = BLIS_PTHREAD_ONCE_INIT; static bli_pthread_once_t once_finalize = BLIS_PTHREAD_ONCE_INIT; void bli_init_once( void ) { bli_pthread_once( &once_init, bli_init_apis ); } void bli_finalize_once( void ) { bli_pthread_once( &once_finalize, bli_finalize_apis ); } blis-0.6.1/frame/base/bli_init.h000066400000000000000000000036251360743507500164350ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ BLIS_EXPORT_BLIS void bli_init( void ); BLIS_EXPORT_BLIS void bli_finalize( void ); void bli_init_auto( void ); void bli_finalize_auto( void ); void bli_init_apis( void ); void bli_finalize_apis( void ); void bli_init_once( void ); void bli_finalize_once( void ); blis-0.6.1/frame/base/bli_machval.c000066400000000000000000000067351360743507500171050ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define FUNCPTR_T machval_fp typedef void (*FUNCPTR_T)( machval_t mval, void* v ); static FUNCPTR_T GENARRAY(ftypes,machval); // // Define object-based interface. // void bli_machval( machval_t mval, obj_t* v ) { num_t dt_v = bli_obj_dt( v ); void* buf_v = bli_obj_buffer_at_off( v ); FUNCPTR_T f; // Index into the function pointer array. f = ftypes[dt_v]; // Invoke the function. f( mval, buf_v ); } // // Define BLAS-like interfaces. // #undef GENTFUNCR #define GENTFUNCR( ctype_v, ctype_vr, chv, chvr, opname, varname ) \ \ void PASTEMAC(chv,opname) \ ( \ machval_t mval, \ void* v \ ) \ { \ static ctype_vr pvals[ BLIS_NUM_MACH_PARAMS ]; \ \ static bool_t first_time = TRUE; \ \ dim_t val_i = mval - BLIS_MACH_PARAM_FIRST; \ ctype_v* v_cast = v; \ \ /* If this is the first time through, call the underlying code to discover each machine parameter. */ \ if ( first_time ) \ { \ char lapack_mval; \ dim_t m, i; \ \ for( i = 0, m = BLIS_MACH_PARAM_FIRST; \ i < BLIS_NUM_MACH_PARAMS - 1; \ ++i, ++m ) \ { \ bli_param_map_blis_to_netlib_machval( m, &lapack_mval ); \ \ /*printf( "bli_machval: querying %u %c\n", m, lapack_mval );*/ \ \ pvals[i] = PASTEMAC(chvr,varname)( &lapack_mval, 1 ); \ \ /*printf( "bli_machval: got back %34.29e\n", pvals[i] ); */ \ } \ \ /* Store epsilon^2 in the last element. */ \ pvals[i] = pvals[0] * pvals[0]; \ \ first_time = FALSE; \ } \ \ /* Copy the requested parameter value to the output buffer, which may involve a demotion from the complex to real domain. */ \ PASTEMAC2(chvr,chv,copys)( pvals[ val_i ], *v_cast ); \ } INSERT_GENTFUNCR_BASIC( machval, lamch ) blis-0.6.1/frame/base/bli_machval.h000066400000000000000000000041131360743507500170760ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "bli_lsame.h" #include "bli_slamch.h" #include "bli_dlamch.h" // // Prototype object-based interface. // BLIS_EXPORT_BLIS void bli_machval( machval_t mval, obj_t* v ); // // Prototype BLAS-like interfaces. // #undef GENTPROTR #define GENTPROTR( ctype_v, ctype_vr, chv, chvr, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(chv,opname) \ ( \ machval_t mval, \ void* v \ ); INSERT_GENTPROTR_BASIC0( machval ) blis-0.6.1/frame/base/bli_malloc.c000066400000000000000000000162701360743507500167340ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" //#define BLIS_ENABLE_MEM_TRACING // ----------------------------------------------------------------------------- // NOTE: These functions are no longer used. Instead, the relevant sections // of code call bli_fmalloc_align() and pass in the desired malloc()-like // function, such as BLIS_MALLOC_POOL. #if 0 void* bli_malloc_pool( size_t size ) { const malloc_ft malloc_fp = BLIS_MALLOC_POOL; const size_t align_size = BLIS_POOL_ADDR_ALIGN_SIZE; #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_malloc_pool(): size %ld, align size %ld\n", ( long )size, ( long )align_size ); fflush( stdout ); #endif return bli_fmalloc_align( malloc_fp, size, align_size ); } void bli_free_pool( void* p ) { #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_free_pool(): freeing block\n" ); fflush( stdout ); #endif bli_ffree_align( BLIS_FREE_POOL, p ); } #endif // ----------------------------------------------------------------------------- void* bli_malloc_user( size_t size ) { const malloc_ft malloc_fp = BLIS_MALLOC_USER; const size_t align_size = BLIS_HEAP_ADDR_ALIGN_SIZE; #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_malloc_user(): size %ld, align size %ld\n", ( long )size, ( long )align_size ); fflush( stdout ); #endif return bli_fmalloc_align( malloc_fp, size, align_size ); } void bli_free_user( void* p ) { #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_free_user(): freeing block\n" ); fflush( stdout ); #endif bli_ffree_align( BLIS_FREE_USER, p ); } // ----------------------------------------------------------------------------- void* bli_malloc_intl( size_t size ) { const malloc_ft malloc_fp = BLIS_MALLOC_INTL; #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_malloc_intl(): size %ld\n", ( long )size ); fflush( stdout ); #endif return bli_fmalloc_noalign( malloc_fp, size ); } void* bli_calloc_intl( size_t size ) { #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_calloc_intl(): " ); #endif void* p = bli_malloc_intl( size ); memset( p, 0, size ); return p; } void bli_free_intl( void* p ) { #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_free_intl(): freeing block\n" ); fflush( stdout ); #endif bli_ffree_noalign( BLIS_FREE_INTL, p ); } // ----------------------------------------------------------------------------- void* bli_fmalloc_align ( malloc_ft f, size_t size, size_t align_size ) { const size_t ptr_size = sizeof( void* ); size_t align_offset = 0; void* p_orig; int8_t* p_byte; void** p_addr; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_fmalloc_align_check( f, size, align_size ); // Return early if zero bytes were requested. if ( size == 0 ) return NULL; // Add the alignment size and the size of a pointer to the number // of bytes to allocate. size += align_size + ptr_size; // Call the allocation function. p_orig = f( size ); // Check the pointer returned by malloc(). if ( bli_error_checking_is_enabled() ) bli_fmalloc_post_check( p_orig ); // Advance the pointer by one pointer element. p_byte = p_orig; p_byte += ptr_size; // Compute the offset to the desired alignment. if ( bli_is_unaligned_to( ( siz_t )p_byte, ( siz_t )align_size ) ) { align_offset = align_size - bli_offset_past_alignment( ( siz_t )p_byte, ( siz_t )align_size ); } // Advance the pointer using the difference between the alignment // size and the alignment offset. p_byte += align_offset; // Compute the address of the pointer element just before the start // of the aligned address, and store the original address there. p_addr = ( void** )(p_byte - ptr_size); *p_addr = p_orig; // Return the aligned pointer. return p_byte; } void bli_ffree_align ( free_ft f, void* p ) { const size_t ptr_size = sizeof( void* ); void* p_orig; int8_t* p_byte; void** p_addr; // If the pointer to free is NULL, it was obviously not aligned and // does not need to be freed. if ( p == NULL ) return; // Since the bli_fmalloc_align() function returned the aligned pointer, // we have to first recover the original pointer before we can free the // memory. // Start by casting the pointer to a byte pointer. p_byte = p; // Compute the address of the pointer element just before the start // of the aligned address, and recover the original address. p_addr = ( void** )( p_byte - ptr_size ); p_orig = *p_addr; // Free the original pointer. f( p_orig ); } // ----------------------------------------------------------------------------- void* bli_fmalloc_noalign ( malloc_ft f, size_t size ) { void* p = f( size ); // Check the pointer returned by malloc(). if ( bli_error_checking_is_enabled() ) bli_fmalloc_post_check( p ); return p; } void bli_ffree_noalign ( free_ft f, void* p ) { f( p ); } // ----------------------------------------------------------------------------- void bli_fmalloc_align_check ( malloc_ft f, size_t size, size_t align_size ) { err_t e_val; // Check for valid alignment. e_val = bli_check_alignment_is_power_of_two( align_size ); bli_check_error_code( e_val ); e_val = bli_check_alignment_is_mult_of_ptr_size( align_size ); bli_check_error_code( e_val ); } void bli_fmalloc_post_check ( void* p ) { err_t e_val; // Check for valid values from malloc(). e_val = bli_check_valid_malloc_buf( p ); bli_check_error_code( e_val ); } blis-0.6.1/frame/base/bli_malloc.h000066400000000000000000000052521360743507500167370ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // Typedef function pointer types for malloc() and free() substitutes. typedef void* (*malloc_ft) ( size_t size ); typedef void (*free_ft) ( void* p ); // ----------------------------------------------------------------------------- #if 0 BLIS_EXPORT_BLIS void* bli_malloc_pool( size_t size ); BLIS_EXPORT_BLIS void bli_free_pool( void* p ); #endif void* bli_malloc_intl( size_t size ); void* bli_calloc_intl( size_t size ); void bli_free_intl( void* p ); BLIS_EXPORT_BLIS void* bli_malloc_user( size_t size ); BLIS_EXPORT_BLIS void bli_free_user( void* p ); // ----------------------------------------------------------------------------- void* bli_fmalloc_align( malloc_ft f, size_t size, size_t align_size ); void bli_ffree_align( free_ft f, void* p ); void* bli_fmalloc_noalign( malloc_ft f, size_t size ); void bli_ffree_noalign( free_ft f, void* p ); void bli_fmalloc_align_check( malloc_ft f, size_t size, size_t align_size ); void bli_fmalloc_post_check( void* p ); blis-0.6.1/frame/base/bli_mbool.c000066400000000000000000000044331360743507500165730ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" mbool_t* bli_mbool_create ( bool_t b_s, bool_t b_d, bool_t b_c, bool_t b_z ) { mbool_t* b; b = ( mbool_t* ) bli_malloc_intl( sizeof(mbool_t) ); bli_mbool_init ( b, b_s, b_d, b_c, b_z ); return b; } void bli_mbool_init ( mbool_t* b, bool_t b_s, bool_t b_d, bool_t b_c, bool_t b_z ) { bli_mbool_set_dt( b_s, BLIS_FLOAT, b ); bli_mbool_set_dt( b_d, BLIS_DOUBLE, b ); bli_mbool_set_dt( b_c, BLIS_SCOMPLEX, b ); bli_mbool_set_dt( b_z, BLIS_DCOMPLEX, b ); } void bli_mbool_free( mbool_t* b ) { bli_free_intl( b ); } blis-0.6.1/frame/base/bli_mbool.h000066400000000000000000000044501360743507500165770ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // ----------------------------------------------------------------------------- // mbool_t query static bool_t bli_mbool_get_dt( num_t dt, mbool_t* mb ) { return mb->v[ dt ]; } // mbool_t modification static void bli_mbool_set_dt( bool_t val, num_t dt, mbool_t* mb ) { mb->v[ dt ] = val; } // ----------------------------------------------------------------------------- mbool_t* bli_mbool_create ( bool_t b_s, bool_t b_d, bool_t b_c, bool_t b_z ); void bli_mbool_init ( mbool_t* b, bool_t b_s, bool_t b_d, bool_t b_c, bool_t b_z ); void bli_mbool_free( mbool_t* b ); blis-0.6.1/frame/base/bli_mem.h000066400000000000000000000075541360743507500162550ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2016, Hewlett Packard Enterprise Development LP Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_MEM_H #define BLIS_MEM_H // mem_t object type (defined in bli_type_defs.h) /* typedef struct mem_s { pblk_t pblk; packbuf_t buf_type; pool_t* pool; siz_t size; } mem_t; typedef struct { void* buf; siz_t block_size; } pblk_t; */ // // -- mem_t query -------------------------------------------------------------- // static pblk_t* bli_mem_pblk( mem_t* mem ) { return &(mem->pblk); } static void* bli_mem_buffer( mem_t* mem ) { return bli_pblk_buf( bli_mem_pblk( mem ) ); } static packbuf_t bli_mem_buf_type( mem_t* mem ) { return mem->buf_type; } static pool_t* bli_mem_pool( mem_t* mem ) { return mem->pool; } static siz_t bli_mem_size( mem_t* mem ) { return mem->size; } static bool_t bli_mem_is_alloc( mem_t* mem ) { return ( bool_t ) ( bli_mem_buffer( mem ) != NULL ); } static bool_t bli_mem_is_unalloc( mem_t* mem ) { return ( bool_t ) ( bli_mem_buffer( mem ) == NULL ); } // // -- mem_t modification ------------------------------------------------------- // static void bli_mem_set_pblk( pblk_t* pblk, mem_t* mem ) { mem->pblk = *pblk; } static void bli_mem_set_buffer( void* buf, mem_t* mem ) { bli_pblk_set_buf( buf, &(mem->pblk) ); } static void bli_mem_set_buf_type( packbuf_t buf_type, mem_t* mem ) { mem->buf_type = buf_type; } static void bli_mem_set_pool( pool_t* pool, mem_t* mem ) { mem->pool = pool; } static void bli_mem_set_size( siz_t size, mem_t* mem ) { mem->size = size; } // // -- mem_t initialization ----------------------------------------------------- // // NOTE: This initializer macro must be updated whenever fields are added or // removed from the mem_t type definition. An alternative to the initializer is // calling bli_mem_clear() at runtime. #define BLIS_MEM_INITIALIZER \ { \ .pblk = BLIS_PBLK_INITIALIZER, \ .buf_type = -1, \ .pool = NULL, \ .size = 0, \ } \ static void bli_mem_clear( mem_t* mem ) { bli_mem_set_buffer( NULL, mem ); bli_mem_set_buf_type( -1, mem ); bli_mem_set_pool( NULL, mem ); bli_mem_set_size( 0, mem ); } #endif blis-0.6.1/frame/base/bli_membrk.c000066400000000000000000000425171360743507500167450ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2016, Hewlett Packard Enterprise Development LP Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" static membrk_t global_membrk; // ----------------------------------------------------------------------------- membrk_t* bli_membrk_query( void ) { return &global_membrk; } void bli_membrk_init ( cntx_t* restrict cntx ) { membrk_t* restrict membrk = bli_membrk_query(); const siz_t align_size = BLIS_POOL_ADDR_ALIGN_SIZE_GEN; malloc_ft malloc_fp = BLIS_MALLOC_POOL; free_ft free_fp = BLIS_FREE_POOL; // These fields are used for general-purpose allocation (ie: buf_type // equal to BLIS_BUFFER_FOR_GEN_USE) within bli_membrk_acquire_m(). bli_membrk_set_align_size( align_size, membrk ); bli_membrk_set_malloc_fp( malloc_fp, membrk ); bli_membrk_set_free_fp( free_fp, membrk ); bli_membrk_init_mutex( membrk ); #ifdef BLIS_ENABLE_PBA_POOLS bli_membrk_init_pools( cntx, membrk ); #endif } void bli_membrk_finalize ( void ) { membrk_t* restrict membrk = bli_membrk_query(); bli_membrk_set_malloc_fp( NULL, membrk ); bli_membrk_set_free_fp( NULL, membrk ); #ifdef BLIS_ENABLE_PBA_POOLS bli_membrk_finalize_pools( membrk ); #endif bli_membrk_finalize_mutex( membrk ); } void bli_membrk_acquire_m ( rntm_t* rntm, siz_t req_size, packbuf_t buf_type, mem_t* mem ) { pool_t* pool; pblk_t* pblk; dim_t pi; // If the internal memory pools for packing block allocator are disabled, // we spoof the buffer type as BLIS_BUFFER_FOR_GEN_USE to induce the // immediate usage of bli_membrk_malloc(). #ifndef BLIS_ENABLE_PBA_POOLS buf_type = BLIS_BUFFER_FOR_GEN_USE; #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_membrk_acquire_m(): bli_fmalloc_align(): size %ld\n", ( long )req_size ); #endif #endif // Query the memory broker from the runtime. membrk_t* membrk = bli_rntm_membrk( rntm ); if ( buf_type == BLIS_BUFFER_FOR_GEN_USE ) { malloc_ft malloc_fp = bli_membrk_malloc_fp( membrk ); siz_t align_size = bli_membrk_align_size( membrk ); // For general-use buffer requests, dynamically allocating memory // is assumed to be sufficient. void* buf = bli_fmalloc_align( malloc_fp, req_size, align_size ); // Initialize the mem_t object with: // - the address of the memory block, // - the buffer type (a packbuf_t value), // - the size of the requested region, // - the membrk_t from which the mem_t entry was acquired. // NOTE: We initialize the pool field to NULL since this block did not // come from a memory pool. bli_mem_set_buffer( buf, mem ); bli_mem_set_buf_type( buf_type, mem ); bli_mem_set_pool( NULL, mem ); bli_mem_set_size( req_size, mem ); } else { // This branch handles cases where the memory block needs to come // from an internal memory pool, in which blocks are allocated once // and then recycled. // Map the requested packed buffer type to a zero-based index, which // we then use to select the corresponding memory pool. pi = bli_packbuf_index( buf_type ); pool = bli_membrk_pool( pi, membrk ); // Extract the address of the pblk_t struct within the mem_t. pblk = bli_mem_pblk( mem ); // Acquire the mutex associated with the membrk object. bli_membrk_lock( membrk ); // BEGIN CRITICAL SECTION { // Checkout a block from the pool. If the pool's blocks are too // small, it will be reinitialized with blocks large enough to // accommodate the requested block size. If the pool is exhausted, // either because it is still empty or because all blocks have // been checked out already, additional blocks will be allocated // automatically, as-needed. Note that the addresses are stored // directly into the mem_t struct since pblk is the address of // the struct's pblk_t field. bli_pool_checkout_block( req_size, pblk, pool ); } // END CRITICAL SECTION // Release the mutex associated with the membrk object. bli_membrk_unlock( membrk ); // Query the block_size from the pblk_t. This will be at least // req_size, perhaps larger. siz_t block_size = bli_pblk_block_size( pblk ); // Initialize the mem_t object with: // - the buffer type (a packbuf_t value), // - the address of the memory pool to which it belongs, // - the size of the contiguous memory block (NOT the size of the // requested region), // - the membrk_t from which the mem_t entry was acquired. // The actual (aligned) address is already stored in the mem_t // struct's pblk_t field. bli_mem_set_buf_type( buf_type, mem ); bli_mem_set_pool( pool, mem ); bli_mem_set_size( block_size, mem ); } } void bli_membrk_release ( rntm_t* rntm, mem_t* mem ) { packbuf_t buf_type; pool_t* pool; pblk_t* pblk; // Query the memory broker from the runtime. membrk_t* membrk = bli_rntm_membrk( rntm ); // Extract the buffer type so we know what kind of memory was allocated. buf_type = bli_mem_buf_type( mem ); #ifndef BLIS_ENABLE_PBA_POOLS #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_membrk_release(): bli_ffree_align(): size %ld\n", ( long )bli_mem_size( mem ) ); #endif #endif if ( buf_type == BLIS_BUFFER_FOR_GEN_USE ) { free_ft free_fp = bli_membrk_free_fp( membrk ); void* buf = bli_mem_buffer( mem ); // For general-use buffers, we dynamically allocate memory, and so // here we need to free it. bli_ffree_align( free_fp, buf ); } else { // Extract the address of the pool from which the memory was // allocated. pool = bli_mem_pool( mem ); // Extract the address of the pblk_t struct within the mem_t struct. pblk = bli_mem_pblk( mem ); // Acquire the mutex associated with the membrk object. bli_membrk_lock( membrk ); // BEGIN CRITICAL SECTION { // Check the block back into the pool. bli_pool_checkin_block( pblk, pool ); } // END CRITICAL SECTION // Release the mutex associated with the membrk object. bli_membrk_unlock( membrk ); } // Clear the mem_t object so that it appears unallocated. This clears: // - the pblk_t struct's fields (ie: the buffer addresses) // - the pool field // - the size field // - the membrk field // NOTE: We do not clear the buf_type field since there is no // "uninitialized" value for packbuf_t. bli_mem_clear( mem ); } #if 0 void bli_membrk_acquire_v ( membrk_t* membrk, siz_t req_size, mem_t* mem ) { bli_membrk_acquire_m( membrk, req_size, BLIS_BUFFER_FOR_GEN_USE, mem ); } #endif void bli_membrk_rntm_set_membrk ( rntm_t* rntm ) { membrk_t* membrk = bli_membrk_query(); bli_rntm_set_membrk( membrk, rntm ); } siz_t bli_membrk_pool_size ( membrk_t* membrk, packbuf_t buf_type ) { siz_t r_val; if ( buf_type == BLIS_BUFFER_FOR_GEN_USE ) { // We don't (yet) track the amount of general-purpose // memory that is currently allocated. r_val = 0; } else { dim_t pool_index; pool_t* pool; // Acquire the pointer to the pool corresponding to the buf_type // provided. pool_index = bli_packbuf_index( buf_type ); pool = bli_membrk_pool( pool_index, membrk ); // Compute the pool "size" as the product of the block size // and the number of blocks in the pool. r_val = bli_pool_block_size( pool ) * bli_pool_num_blocks( pool ); } return r_val; } // ----------------------------------------------------------------------------- void bli_membrk_init_pools ( cntx_t* cntx, membrk_t* membrk ) { // Map each of the packbuf_t values to an index starting at zero. const dim_t index_a = bli_packbuf_index( BLIS_BUFFER_FOR_A_BLOCK ); const dim_t index_b = bli_packbuf_index( BLIS_BUFFER_FOR_B_PANEL ); const dim_t index_c = bli_packbuf_index( BLIS_BUFFER_FOR_C_PANEL ); // Alias the pool addresses to convenient identifiers. pool_t* pool_a = bli_membrk_pool( index_a, membrk ); pool_t* pool_b = bli_membrk_pool( index_b, membrk ); pool_t* pool_c = bli_membrk_pool( index_c, membrk ); // Start with empty pools. const dim_t num_blocks_a = 0; const dim_t num_blocks_b = 0; const dim_t num_blocks_c = 0; siz_t block_size_a = 0; siz_t block_size_b = 0; siz_t block_size_c = 0; // For blocks of A and panels of B, start off with block_ptrs arrays that // are of a decent length. For C, we can start off with an empty array. const dim_t block_ptrs_len_a = 80; const dim_t block_ptrs_len_b = 80; const dim_t block_ptrs_len_c = 0; // Use the address alignment sizes designated (at configure-time) for pools. const siz_t align_size_a = BLIS_POOL_ADDR_ALIGN_SIZE_A; const siz_t align_size_b = BLIS_POOL_ADDR_ALIGN_SIZE_B; const siz_t align_size_c = BLIS_POOL_ADDR_ALIGN_SIZE_C; // Use the offsets from the above alignments. const siz_t offset_size_a = BLIS_POOL_ADDR_OFFSET_SIZE_A; const siz_t offset_size_b = BLIS_POOL_ADDR_OFFSET_SIZE_B; const siz_t offset_size_c = BLIS_POOL_ADDR_OFFSET_SIZE_C; // Use the malloc() and free() designated (at configure-time) for pools. malloc_ft malloc_fp = BLIS_MALLOC_POOL; free_ft free_fp = BLIS_FREE_POOL; // Determine the block size for each memory pool. bli_membrk_compute_pool_block_sizes( &block_size_a, &block_size_b, &block_size_c, cntx ); // Initialize the memory pools for A, B, and C. bli_pool_init( num_blocks_a, block_ptrs_len_a, block_size_a, align_size_a, offset_size_a, malloc_fp, free_fp, pool_a ); bli_pool_init( num_blocks_b, block_ptrs_len_b, block_size_b, align_size_b, offset_size_b, malloc_fp, free_fp, pool_b ); bli_pool_init( num_blocks_c, block_ptrs_len_c, block_size_c, align_size_c, offset_size_c, malloc_fp, free_fp, pool_c ); } void bli_membrk_finalize_pools ( membrk_t* membrk ) { // Map each of the packbuf_t values to an index starting at zero. dim_t index_a = bli_packbuf_index( BLIS_BUFFER_FOR_A_BLOCK ); dim_t index_b = bli_packbuf_index( BLIS_BUFFER_FOR_B_PANEL ); dim_t index_c = bli_packbuf_index( BLIS_BUFFER_FOR_C_PANEL ); // Alias the pool addresses to convenient identifiers. pool_t* pool_a = bli_membrk_pool( index_a, membrk ); pool_t* pool_b = bli_membrk_pool( index_b, membrk ); pool_t* pool_c = bli_membrk_pool( index_c, membrk ); // Finalize the memory pools for A, B, and C. bli_pool_finalize( pool_a ); bli_pool_finalize( pool_b ); bli_pool_finalize( pool_c ); } // ----------------------------------------------------------------------------- void bli_membrk_compute_pool_block_sizes ( siz_t* bs_a, siz_t* bs_b, siz_t* bs_c, cntx_t* cntx ) { const ind_t im = bli_cntx_method( cntx ); siz_t bs_cand_a = 0; siz_t bs_cand_b = 0; siz_t bs_cand_c = 0; num_t dt; // Compute pool block sizes for each datatype and find the maximum // size for each pool. This is done so that new pools do not need // to be allocated if the user switches datatypes. for ( dt = BLIS_DT_LO; dt <= BLIS_DT_HI; ++dt ) { siz_t bs_dt_a; siz_t bs_dt_b; siz_t bs_dt_c; // Avoid considering induced methods for real datatypes. if ( bli_is_real( dt ) && im != BLIS_NAT ) continue; bli_membrk_compute_pool_block_sizes_dt( dt, &bs_dt_a, &bs_dt_b, &bs_dt_c, cntx ); bs_cand_a = bli_max( bs_dt_a, bs_cand_a ); bs_cand_b = bli_max( bs_dt_b, bs_cand_b ); bs_cand_c = bli_max( bs_dt_c, bs_cand_c ); } // Save the results. *bs_a = bs_cand_a; *bs_b = bs_cand_b; *bs_c = bs_cand_c; } // ----------------------------------------------------------------------------- void bli_membrk_compute_pool_block_sizes_dt ( num_t dt, siz_t* bs_a, siz_t* bs_b, siz_t* bs_c, cntx_t* cntx ) { siz_t size_dt = bli_dt_size( dt ); blksz_t* mr; blksz_t* nr; blksz_t* mc; blksz_t* kc; blksz_t* nc; dim_t mr_dt; dim_t nr_dt; dim_t max_mnr_dt; dim_t mc_max_dt; dim_t kc_max_dt; dim_t nc_max_dt; dim_t packmr_dt; dim_t packnr_dt; dim_t max_packmnr_dt; dim_t scale_num_dt; dim_t scale_den_dt; dim_t pool_mc_dt, left_mc_dt; dim_t pool_nc_dt, left_nc_dt; dim_t pool_kc_dt; // // Find the larger of the two register blocksizes. // // Query the mr and nr blksz_t objects for the given method of // execution. mr = bli_cntx_get_blksz( BLIS_MR, cntx ); nr = bli_cntx_get_blksz( BLIS_NR, cntx ); // Extract the mr and nr values specific to the current datatype. mr_dt = bli_blksz_get_def( dt, mr ); nr_dt = bli_blksz_get_def( dt, nr ); // Find the maximum of mr and nr. max_mnr_dt = bli_max( mr_dt, nr_dt ); // // Define local maximum cache blocksizes. // // Query the mc, kc, and nc blksz_t objects for native execution. mc = bli_cntx_get_blksz( BLIS_MC, cntx ); kc = bli_cntx_get_blksz( BLIS_KC, cntx ); nc = bli_cntx_get_blksz( BLIS_NC, cntx ); // Extract the maximum mc, kc, and nc values specific to the current // datatype. mc_max_dt = bli_blksz_get_max( dt, mc ); kc_max_dt = bli_blksz_get_max( dt, kc ); nc_max_dt = bli_blksz_get_max( dt, nc ); // Add max(mr,nr) to kc to make room for the nudging of kc at // runtime to be a multiple of mr or nr for triangular operations // trmm, trmm3, and trsm. kc_max_dt += max_mnr_dt; // // Compute scaling factors. // // Compute integer scaling factors (numerator and denominator) used // to account for situations when the packing register blocksizes are // larger than the regular register blocksizes. // In order to compute the scaling factors, we first have to determine // whether ( packmr / mr ) is greater than ( packnr / nr ). This is // needed ONLY because the amount of space allocated for a block of A // and a panel of B needs to be such that MR and NR can be swapped (ie: // A is packed with NR and B is packed with MR). This transformation is // needed for right-side trsm when inducing an algorithm that (a) has // favorable access patterns for column-stored C and (b) allows the // macro-kernel to reuse the existing left-side fused gemmtrsm micro- // kernels. We avoid integer division by cross-multiplying: // // ( packmr / mr ) >= ( packnr / nr ) // ( packmr / mr ) * nr >= packnr // packmr * nr >= packnr * mr // // So, if packmr * nr >= packnr * mr, then we will use packmr and mr as // our scaling factors. Otherwise, we'll use packnr and nr. packmr_dt = bli_blksz_get_max( dt, mr ); packnr_dt = bli_blksz_get_max( dt, nr ); if ( packmr_dt * nr_dt >= packnr_dt * mr_dt ) { scale_num_dt = packmr_dt; scale_den_dt = mr_dt; } else { scale_num_dt = packnr_dt; scale_den_dt = nr_dt; } // // Compute pool block dimensions. // pool_mc_dt = ( mc_max_dt * scale_num_dt ) / scale_den_dt; left_mc_dt = ( mc_max_dt * scale_num_dt ) % scale_den_dt; pool_nc_dt = ( nc_max_dt * scale_num_dt ) / scale_den_dt; left_nc_dt = ( nc_max_dt * scale_num_dt ) % scale_den_dt; pool_kc_dt = ( kc_max_dt ); if ( left_mc_dt > 0 ) pool_mc_dt += 1; if ( left_nc_dt > 0 ) pool_nc_dt += 1; // // Compute pool block sizes // // We add an extra micro-panel of space to the block sizes for A and B // just to be sure any pre-loading performed by the micro-kernel does // not cause a segmentation fault. max_packmnr_dt = bli_max( packmr_dt, packnr_dt ); *bs_a = ( pool_mc_dt + max_packmnr_dt ) * pool_kc_dt * size_dt; *bs_b = ( pool_nc_dt + max_packmnr_dt ) * pool_kc_dt * size_dt; *bs_c = ( pool_mc_dt ) * pool_nc_dt * size_dt; } blis-0.6.1/frame/base/bli_membrk.h000066400000000000000000000101631360743507500167420ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2016, Hewlett Packard Enterprise Development LP Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_MEMBRK_H #define BLIS_MEMBRK_H // membrk init static void bli_membrk_init_mutex( membrk_t* membrk ) { bli_pthread_mutex_init( &(membrk->mutex), NULL ); } static void bli_membrk_finalize_mutex( membrk_t* membrk ) { bli_pthread_mutex_destroy( &(membrk->mutex) ); } // membrk query static pool_t* bli_membrk_pool( dim_t pool_index, membrk_t* membrk ) { return &(membrk->pools[ pool_index ]); } static siz_t bli_membrk_align_size( membrk_t* membrk ) { return membrk->align_size; } static malloc_ft bli_membrk_malloc_fp( membrk_t* membrk ) { return membrk->malloc_fp; } static free_ft bli_membrk_free_fp( membrk_t* membrk ) { return membrk->free_fp; } // membrk modification static void bli_membrk_set_align_size( siz_t align_size, membrk_t* membrk ) { membrk->align_size = align_size; } static void bli_membrk_set_malloc_fp( malloc_ft malloc_fp, membrk_t* membrk ) { membrk->malloc_fp = malloc_fp; } static void bli_membrk_set_free_fp( free_ft free_fp, membrk_t* membrk ) { membrk->free_fp = free_fp; } // membrk action static void bli_membrk_lock( membrk_t* membrk ) { bli_pthread_mutex_lock( &(membrk->mutex) ); } static void bli_membrk_unlock( membrk_t* membrk ) { bli_pthread_mutex_unlock( &(membrk->mutex) ); } // ----------------------------------------------------------------------------- membrk_t* bli_membrk_query( void ); void bli_membrk_init ( cntx_t* cntx ); void bli_membrk_finalize ( void ); void bli_membrk_acquire_m ( rntm_t* rntm, siz_t req_size, packbuf_t buf_type, mem_t* mem ); void bli_membrk_release ( rntm_t* rntm, mem_t* mem ); void bli_membrk_rntm_set_membrk ( rntm_t* rntm ); siz_t bli_membrk_pool_size ( membrk_t* membrk, packbuf_t buf_type ); // ---------------------------------------------------------------------------- void bli_membrk_init_pools ( cntx_t* cntx, membrk_t* membrk ); void bli_membrk_finalize_pools ( membrk_t* membrk ); void bli_membrk_compute_pool_block_sizes ( siz_t* bs_a, siz_t* bs_b, siz_t* bs_c, cntx_t* cntx ); void bli_membrk_compute_pool_block_sizes_dt ( num_t dt, siz_t* bs_a, siz_t* bs_b, siz_t* bs_c, cntx_t* cntx ); #endif blis-0.6.1/frame/base/bli_memsys.c000066400000000000000000000051251360743507500167770ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2016, Hewlett Packard Enterprise Development LP Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_memsys_init( void ) { // Query a native context so we have something to pass into // bli_membrk_init_pools(). We use BLIS_DOUBLE for the datatype, // but the dt argument is actually only used when initializing // contexts for induced methods. // NOTE: Instead of calling bli_gks_query_cntx(), we call // bli_gks_query_cntx_noinit() to avoid the call to bli_init_once(). cntx_t* cntx_p = bli_gks_query_cntx_noinit(); // Initialize the packing block allocator and its data structures. bli_membrk_init( cntx_p ); // Initialize the small block allocator and its data structures. bli_sba_init(); } void bli_memsys_finalize( void ) { // Finalize the small block allocator and its data structures. bli_sba_finalize(); // Finalize the global membrk_t object and its data structures. bli_membrk_finalize(); } blis-0.6.1/frame/base/bli_memsys.h000066400000000000000000000037161360743507500170100ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2016, Hewlett Packard Enterprise Development LP Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_MEMSYS_H #define BLIS_MEMSYS_H // ----------------------------------------------------------------------------- void bli_memsys_init( void ); void bli_memsys_finalize( void ); #endif blis-0.6.1/frame/base/bli_obj.c000066400000000000000000000466121360743507500162420ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_obj_create ( num_t dt, dim_t m, dim_t n, inc_t rs, inc_t cs, obj_t* obj ) { bli_init_once(); bli_obj_create_without_buffer( dt, m, n, obj ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_obj_create(): " ); #endif bli_obj_alloc_buffer( rs, cs, 1, obj ); } void bli_obj_create_with_attached_buffer ( num_t dt, dim_t m, dim_t n, void* p, inc_t rs, inc_t cs, obj_t* obj ) { bli_init_once(); bli_obj_create_without_buffer( dt, m, n, obj ); bli_obj_attach_buffer( p, rs, cs, 1, obj ); } void bli_obj_create_without_buffer ( num_t dt, dim_t m, dim_t n, obj_t* obj ) { siz_t elem_size; void* s; bli_init_once(); if ( bli_error_checking_is_enabled() ) bli_obj_create_without_buffer_check( dt, m, n, obj ); // Query the size of one element of the object's pre-set datatype. elem_size = bli_dt_size( dt ); // Set any default properties that are appropriate. bli_obj_set_defaults( obj ); // Set the object root to itself, since obj is not presumed to be a view // into a larger matrix. This is typically the only time this field is // ever set; henceforth, subpartitions and aliases to this object will // get copies of this field, and thus always have access to its // "greatest-grand" parent (ie: the original parent, or "root", object). // However, there ARE a few places where it is convenient to reset the // root field explicitly via bli_obj_set_as_root(). (We do not list // those places here. Just grep for bli_obj_set_as_root within the // top-level 'frame' directory to see them. bli_obj_set_as_root( obj ); // Set individual fields. bli_obj_set_buffer( NULL, obj ); bli_obj_set_dt( dt, obj ); bli_obj_set_elem_size( elem_size, obj ); bli_obj_set_target_dt( dt, obj ); bli_obj_set_exec_dt( dt, obj ); bli_obj_set_comp_dt( dt, obj ); bli_obj_set_dims( m, n, obj ); bli_obj_set_offs( 0, 0, obj ); bli_obj_set_diag_offset( 0, obj ); // Set the internal scalar to 1.0. bli_obj_set_scalar_dt( dt, obj ); s = bli_obj_internal_scalar_buffer( obj ); // Always writing the imaginary component is needed in mixed-domain // scenarios. Failing to do this can lead to reading uninitialized // memory just before calling the macrokernel (as the internal scalars // for A and B are merged). //if ( bli_is_float( dt ) ) { bli_sset1s( *(( float* )s) ); } //else if ( bli_is_double( dt ) ) { bli_dset1s( *(( double* )s) ); } if ( bli_is_float( dt ) ) { bli_cset1s( *(( scomplex* )s) ); } else if ( bli_is_double( dt ) ) { bli_zset1s( *(( dcomplex* )s) ); } else if ( bli_is_scomplex( dt ) ) { bli_cset1s( *(( scomplex* )s) ); } else if ( bli_is_dcomplex( dt ) ) { bli_zset1s( *(( dcomplex* )s) ); } } void bli_obj_alloc_buffer ( inc_t rs, inc_t cs, inc_t is, obj_t* obj ) { dim_t n_elem = 0; dim_t m, n; siz_t elem_size; siz_t buffer_size; void* p; bli_init_once(); // Query the dimensions of the object we are allocating. m = bli_obj_length( obj ); n = bli_obj_width( obj ); // Query the size of one element. elem_size = bli_obj_elem_size( obj ); // Adjust the strides, if needed, before doing anything else // (particularly, before doing any error checking). bli_adjust_strides( m, n, elem_size, &rs, &cs, &is ); if ( bli_error_checking_is_enabled() ) bli_obj_alloc_buffer_check( rs, cs, is, obj ); // Determine how much object to allocate. if ( m == 0 || n == 0 ) { // For empty objects, set n_elem to zero. Row and column strides // should remain unchanged (because alignment is not needed). n_elem = 0; } else { // The number of elements to allocate is given by the distance from // the element with the lowest address (usually {0, 0}) to the element // with the highest address (usually {m-1, n-1}), plus one for the // highest element itself. n_elem = (m-1) * bli_abs( rs ) + (n-1) * bli_abs( cs ) + 1; } // Handle the special case where imaginary stride is larger than // normal. if ( bli_obj_is_complex( obj ) ) { // Notice that adding is/2 works regardless of whether the // imaginary stride is unit, something between unit and // 2*n_elem, or something bigger than 2*n_elem. n_elem = bli_abs( is ) / 2 + n_elem; } // Compute the size of the total buffer to be allocated, which includes // padding if the leading dimension was increased for alignment purposes. buffer_size = ( siz_t )n_elem * elem_size; // Allocate the buffer. p = bli_malloc_user( buffer_size ); // Set individual fields. bli_obj_set_buffer( p, obj ); bli_obj_set_strides( rs, cs, obj ); bli_obj_set_imag_stride( is, obj ); } void bli_obj_attach_buffer ( void* p, inc_t rs, inc_t cs, inc_t is, obj_t* obj ) { bli_init_once(); // Interpret is = 0 as a request for the default, which is is = 1; if ( is == 0 ) is = 1; // Check that the strides and lengths are compatible. Note that the // user *must* specify valid row and column strides when attaching an // external buffer. if ( bli_error_checking_is_enabled() ) bli_obj_attach_buffer_check( p, rs, cs, is, obj ); // Update the object. bli_obj_set_buffer( p, obj ); bli_obj_set_strides( rs, cs, obj ); bli_obj_set_imag_stride( is, obj ); } void bli_obj_create_1x1 ( num_t dt, obj_t* obj ) { bli_obj_create_without_buffer( dt, 1, 1, obj ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_obj_create_1x1(): " ); #endif bli_obj_alloc_buffer( 1, 1, 1, obj ); } void bli_obj_create_1x1_with_attached_buffer ( num_t dt, void* p, obj_t* obj ) { bli_obj_create_without_buffer( dt, 1, 1, obj ); bli_obj_attach_buffer( p, 1, 1, 1, obj ); } void bli_obj_create_conf_to ( obj_t* s, obj_t* d ) { const num_t dt = bli_obj_dt( s ); const dim_t m = bli_obj_length( s ); const dim_t n = bli_obj_width( s ); const inc_t rs = bli_obj_row_stride( s ); const inc_t cs = bli_obj_col_stride( s ); bli_obj_create( dt, m, n, rs, cs, d ); } void bli_obj_free ( obj_t* obj ) { if ( bli_error_checking_is_enabled() ) bli_obj_free_check( obj ); // Don't dereference obj if it is NULL. if ( obj != NULL ) { // Idiot safety: Don't try to free the buffer field if the object // is a detached scalar (ie: if the buffer pointer refers to the // address of the internal scalar buffer). if ( bli_obj_buffer( obj ) != bli_obj_internal_scalar_buffer( obj ) ) { #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_obj_free(): " ); #endif bli_free_user( bli_obj_buffer( obj ) ); } } } #if 0 //void bli_obj_create_const ( double value, obj_t* obj ) { gint_t* temp_i; float* temp_s; double* temp_d; scomplex* temp_c; dcomplex* temp_z; if ( bli_error_checking_is_enabled() ) bli_obj_create_const_check( value, obj ); bli_obj_create( BLIS_CONSTANT, 1, 1, 1, 1, obj ); //temp_s = bli_obj_buffer_for_const( BLIS_FLOAT, obj ); //temp_d = bli_obj_buffer_for_const( BLIS_DOUBLE, obj ); //temp_c = bli_obj_buffer_for_const( BLIS_SCOMPLEX, obj ); //temp_z = bli_obj_buffer_for_const( BLIS_DCOMPLEX, obj ); //temp_i = bli_obj_buffer_for_const( BLIS_INT, obj ); bli_dssets( value, 0.0, *temp_s ); bli_ddsets( value, 0.0, *temp_d ); bli_dcsets( value, 0.0, *temp_c ); bli_dzsets( value, 0.0, *temp_z ); *temp_i = ( gint_t ) value; } //void bli_obj_create_const_copy_of ( obj_t* a, obj_t* b ) { gint_t* temp_i; float* temp_s; double* temp_d; scomplex* temp_c; dcomplex* temp_z; void* buf_a; dcomplex value; if ( bli_error_checking_is_enabled() ) bli_obj_create_const_copy_of_check( a, b ); bli_obj_create( BLIS_CONSTANT, 1, 1, 1, 1, b ); //temp_s = bli_obj_buffer_for_const( BLIS_FLOAT, b ); //temp_d = bli_obj_buffer_for_const( BLIS_DOUBLE, b ); //temp_c = bli_obj_buffer_for_const( BLIS_SCOMPLEX, b ); //temp_z = bli_obj_buffer_for_const( BLIS_DCOMPLEX, b ); //temp_i = bli_obj_buffer_for_const( BLIS_INT, b ); buf_a = bli_obj_buffer_at_off( a ); bli_zzsets( 0.0, 0.0, value ); if ( bli_obj_is_float( a ) ) { bli_szcopys( *(( float* )buf_a), value ); } else if ( bli_obj_is_double( a ) ) { bli_dzcopys( *(( double* )buf_a), value ); } else if ( bli_obj_is_scomplex( a ) ) { bli_czcopys( *(( scomplex* )buf_a), value ); } else if ( bli_obj_is_dcomplex( a ) ) { bli_zzcopys( *(( dcomplex* )buf_a), value ); } else { bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); } bli_zscopys( value, *temp_s ); bli_zdcopys( value, *temp_d ); bli_zccopys( value, *temp_c ); bli_zzcopys( value, *temp_z ); *temp_i = ( gint_t ) bli_zreal( value ); } #endif void bli_adjust_strides ( dim_t m, dim_t n, siz_t elem_size, inc_t* rs, inc_t* cs, inc_t* is ) { // Here, we check the strides that were input from the user and modify // them if needed. // Handle the special "empty" case first. If either dimension is zero, // do nothing (this could represent a zero-length "slice" of another // matrix). if ( m == 0 || n == 0 ) return; // Interpret rs = cs = 0 as request for column storage and -1 as a request // for row storage. if ( *rs == 0 && *cs == 0 && ( *is == 0 || *is == 1 ) ) { // First we handle the 1x1 scalar case explicitly. if ( m == 1 && n == 1 ) { *rs = 1; *cs = 1; } // We use column-major storage, except when m == 1, in which case we // use what amounts to row-major storage because we don't want both // strides to be unit. else if ( m == 1 && n > 1 ) { *rs = n; *cs = 1; } else { *rs = 1; *cs = m; } // Use default complex storage. *is = 1; // Align the strides depending on the tilt of the matrix. Note that // scalars are neither row nor column tilted. Also note that alignment // is only done for rs = cs = 0, and any user-supplied row and column // strides are preserved. if ( bli_is_col_tilted( m, n, *rs, *cs ) ) { *cs = bli_align_dim_to_size( *cs, elem_size, BLIS_HEAP_STRIDE_ALIGN_SIZE ); } else if ( bli_is_row_tilted( m, n, *rs, *cs ) ) { *rs = bli_align_dim_to_size( *rs, elem_size, BLIS_HEAP_STRIDE_ALIGN_SIZE ); } } else if ( *rs == -1 && *cs == -1 && ( *is == 0 || *is == 1 ) ) { // First we handle the 1x1 scalar case explicitly. if ( m == 1 && n == 1 ) { *rs = 1; *cs = 1; } // We use row-major storage, except when n == 1, in which case we // use what amounts to column-major storage because we don't want both // strides to be unit. else if ( n == 1 && m > 1 ) { *rs = 1; *cs = m; } else { *rs = n; *cs = 1; } // Use default complex storage. *is = 1; // Align the strides depending on the tilt of the matrix. Note that // scalars are neither row nor column tilted. Also note that alignment // is only done for rs = cs = -1, and any user-supplied row and column // strides are preserved. if ( bli_is_col_tilted( m, n, *rs, *cs ) ) { *cs = bli_align_dim_to_size( *cs, elem_size, BLIS_HEAP_STRIDE_ALIGN_SIZE ); } else if ( bli_is_row_tilted( m, n, *rs, *cs ) ) { *rs = bli_align_dim_to_size( *rs, elem_size, BLIS_HEAP_STRIDE_ALIGN_SIZE ); } } else if ( *rs == 1 && *cs == 1 ) { // If both strides are unit, this is probably a "lazy" request for a // single vector (but could also be a request for a 1xn matrix in // column-major order or an mx1 matrix in row-major order). In BLIS, // we have decided to "reserve" the case where rs = cs = 1 for // 1x1 scalars only. if ( m > 1 && n == 1 ) { // Set the column stride to indicate that this is a column vector // stored in column-major order. This is done for legacy reasons, // because we at one time we had to satisify the error checking // in the underlying BLAS library, which expects the leading // dimension to be set to at least m, even if it will never be // used for indexing since it is a vector and thus only has one // column of data. *cs = m; } else if ( m == 1 && n > 1 ) { // Set the row stride to indicate that this is a row vector stored // in row-major order. *rs = n; } // Nothing needs to be done for the 1x1 scalar case where m == n == 1. } } static siz_t dt_sizes[6] = { sizeof( float ), sizeof( scomplex ), sizeof( double ), sizeof( dcomplex ), sizeof( gint_t ), sizeof( constdata_t ) }; siz_t bli_dt_size ( num_t dt ) { if ( bli_error_checking_is_enabled() ) bli_dt_size_check( dt ); return dt_sizes[dt]; } static char* dt_names[ BLIS_NUM_FP_TYPES+1 ] = { "float", "scomplex", "double", "dcomplex", "int" }; char* bli_dt_string ( num_t dt ) { if ( bli_error_checking_is_enabled() ) bli_dt_string_check( dt ); return dt_names[dt]; } dim_t bli_align_dim_to_mult ( dim_t dim, dim_t dim_mult ) { // We return the dimension unmodified if the multiple is zero // (to avoid division by zero). if ( dim_mult == 0 ) return dim; dim = ( ( dim + dim_mult - 1 ) / dim_mult ) * dim_mult; return dim; } dim_t bli_align_dim_to_size ( dim_t dim, siz_t elem_size, siz_t align_size ) { dim = ( ( dim * ( dim_t )elem_size + ( dim_t )align_size - 1 ) / ( dim_t )align_size ) * ( dim_t )align_size / ( dim_t )elem_size; return dim; } dim_t bli_align_ptr_to_size ( void* p, size_t align_size ) { dim_t dim; dim = ( ( ( uintptr_t )p + align_size - 1 ) / align_size ) * align_size; return dim; } #if 0 static num_t type_union[BLIS_NUM_FP_TYPES][BLIS_NUM_FP_TYPES] = { // s c d z /* s */ { BLIS_FLOAT, BLIS_SCOMPLEX, BLIS_DOUBLE, BLIS_DCOMPLEX }, /* c */ { BLIS_SCOMPLEX, BLIS_SCOMPLEX, BLIS_DCOMPLEX, BLIS_DCOMPLEX }, /* d */ { BLIS_DOUBLE, BLIS_DCOMPLEX, BLIS_DOUBLE, BLIS_DCOMPLEX }, /* z */ { BLIS_DCOMPLEX, BLIS_DCOMPLEX, BLIS_DCOMPLEX, BLIS_DCOMPLEX } }; num_t bli_dt_union( num_t dt1, num_t dt2 ) { if ( bli_error_checking_is_enabled() ) bli_dt_union_check( dt1, dt2 ); return type_union[dt1][dt2]; } #endif void bli_obj_print ( char* label, obj_t* obj ) { bli_init_once(); FILE* file = stdout; if ( bli_error_checking_is_enabled() ) bli_obj_print_check( label, obj ); fprintf( file, "\n" ); fprintf( file, "%s\n", label ); fprintf( file, "\n" ); fprintf( file, " m x n %lu x %lu\n", ( unsigned long )bli_obj_length( obj ), ( unsigned long )bli_obj_width( obj ) ); fprintf( file, "\n" ); fprintf( file, " offm, offn %lu, %lu\n", ( unsigned long )bli_obj_row_off( obj ), ( unsigned long )bli_obj_col_off( obj ) ); fprintf( file, " diagoff %ld\n", ( signed long int )bli_obj_diag_offset( obj ) ); fprintf( file, "\n" ); fprintf( file, " buf %p\n", ( void* )bli_obj_buffer( obj ) ); fprintf( file, " elem size %lu\n", ( unsigned long )bli_obj_elem_size( obj ) ); fprintf( file, " rs, cs %ld, %ld\n", ( signed long int )bli_obj_row_stride( obj ), ( signed long int )bli_obj_col_stride( obj ) ); fprintf( file, " is %ld\n", ( signed long int )bli_obj_imag_stride( obj ) ); fprintf( file, " m_padded %lu\n", ( unsigned long )bli_obj_padded_length( obj ) ); fprintf( file, " n_padded %lu\n", ( unsigned long )bli_obj_padded_width( obj ) ); fprintf( file, " pd %lu\n", ( unsigned long )bli_obj_panel_dim( obj ) ); fprintf( file, " ps %lu\n", ( unsigned long )bli_obj_panel_stride( obj ) ); fprintf( file, "\n" ); fprintf( file, " info %lX\n", ( unsigned long )(*obj).info ); fprintf( file, " - is complex %lu\n", ( unsigned long )bli_obj_is_complex( obj ) ); fprintf( file, " - is d. prec %lu\n", ( unsigned long )bli_obj_is_double_prec( obj ) ); fprintf( file, " - datatype %lu\n", ( unsigned long )bli_obj_dt( obj ) ); fprintf( file, " - target dt %lu\n", ( unsigned long )bli_obj_target_dt( obj ) ); fprintf( file, " - exec dt %lu\n", ( unsigned long )bli_obj_exec_dt( obj ) ); fprintf( file, " - comp dt %lu\n", ( unsigned long )bli_obj_comp_dt( obj ) ); fprintf( file, " - scalar dt %lu\n", ( unsigned long )bli_obj_scalar_dt( obj ) ); fprintf( file, " - has trans %lu\n", ( unsigned long )bli_obj_has_trans( obj ) ); fprintf( file, " - has conj %lu\n", ( unsigned long )bli_obj_has_conj( obj ) ); fprintf( file, " - unit diag? %lu\n", ( unsigned long )bli_obj_has_unit_diag( obj ) ); fprintf( file, " - struc type %lu\n", ( unsigned long )bli_obj_struc( obj ) >> BLIS_STRUC_SHIFT ); fprintf( file, " - uplo type %lu\n", ( unsigned long )bli_obj_uplo( obj ) >> BLIS_UPLO_SHIFT ); fprintf( file, " - is upper %lu\n", ( unsigned long )bli_obj_is_upper( obj ) ); fprintf( file, " - is lower %lu\n", ( unsigned long )bli_obj_is_lower( obj ) ); fprintf( file, " - is dense %lu\n", ( unsigned long )bli_obj_is_dense( obj ) ); fprintf( file, " - pack schema %lu\n", ( unsigned long )bli_obj_pack_schema( obj ) >> BLIS_PACK_SCHEMA_SHIFT ); fprintf( file, " - packinv diag? %lu\n", ( unsigned long )bli_obj_has_inverted_diag( obj ) ); fprintf( file, " - pack ordifup %lu\n", ( unsigned long )bli_obj_is_pack_rev_if_upper( obj ) ); fprintf( file, " - pack ordiflo %lu\n", ( unsigned long )bli_obj_is_pack_rev_if_lower( obj ) ); fprintf( file, " - packbuf type %lu\n", ( unsigned long )bli_obj_pack_buffer_type( obj ) >> BLIS_PACK_BUFFER_SHIFT ); fprintf( file, "\n" ); } blis-0.6.1/frame/base/bli_obj.h000066400000000000000000000067641360743507500162530ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "bli_obj_check.h" BLIS_EXPORT_BLIS void bli_obj_create ( num_t dt, dim_t m, dim_t n, inc_t rs, inc_t cs, obj_t* obj ); BLIS_EXPORT_BLIS void bli_obj_create_with_attached_buffer ( num_t dt, dim_t m, dim_t n, void* p, inc_t rs, inc_t cs, obj_t* obj ); BLIS_EXPORT_BLIS void bli_obj_create_without_buffer ( num_t dt, dim_t m, dim_t n, obj_t* obj ); BLIS_EXPORT_BLIS void bli_obj_alloc_buffer ( inc_t rs, inc_t cs, inc_t is, obj_t* obj ); BLIS_EXPORT_BLIS void bli_obj_attach_buffer ( void* p, inc_t rs, inc_t cs, inc_t is, obj_t* obj ); BLIS_EXPORT_BLIS void bli_obj_create_1x1 ( num_t dt, obj_t* obj ); BLIS_EXPORT_BLIS void bli_obj_create_1x1_with_attached_buffer ( num_t dt, void* p, obj_t* obj ); BLIS_EXPORT_BLIS void bli_obj_create_conf_to ( obj_t* s, obj_t* d ); BLIS_EXPORT_BLIS void bli_obj_free ( obj_t* obj ); void bli_adjust_strides ( dim_t m, dim_t n, siz_t elem_size, inc_t* rs, inc_t* cs, inc_t* is ); BLIS_EXPORT_BLIS siz_t bli_dt_size ( num_t dt ); BLIS_EXPORT_BLIS char* bli_dt_string ( num_t dt ); BLIS_EXPORT_BLIS dim_t bli_align_dim_to_mult ( dim_t dim, dim_t dim_mult ); BLIS_EXPORT_BLIS dim_t bli_align_dim_to_size ( dim_t dim, siz_t elem_size, siz_t align_size ); BLIS_EXPORT_BLIS dim_t bli_align_ptr_to_size ( void* p, size_t align_size ); BLIS_EXPORT_BLIS void bli_obj_print ( char* label, obj_t* obj ); blis-0.6.1/frame/base/bli_obj_scalar.c000066400000000000000000000166351360743507500175710ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_obj_scalar_init_detached ( num_t dt, obj_t* beta ) { void* p; // Initialize beta without a buffer and then attach its internal buffer. // NOTE: This initializes both the storage datatype and scalar datatype // bitfields within beta to dt. bli_obj_create_without_buffer( dt, 1, 1, beta ); // Query the address of the object's internal scalar buffer. p = bli_obj_internal_scalar_buffer( beta ); // Update the object. bli_obj_set_buffer( p, beta ); bli_obj_set_strides( 1, 1, beta ); bli_obj_set_imag_stride( 1, beta ); } void bli_obj_scalar_init_detached_copy_of ( num_t dt, conj_t conj, obj_t* alpha, obj_t* beta ) { obj_t alpha_local; // Make a local copy of alpha so we can apply the conj parameter. bli_obj_alias_to( alpha, &alpha_local ); bli_obj_apply_conj( conj, &alpha_local ); // Initialize beta without a buffer and then attach its internal buffer. bli_obj_scalar_init_detached( dt, beta ); // Copy the scalar value in a to object b, conjugating and/or // typecasting if needed. bli_copysc( &alpha_local, beta ); } void bli_obj_scalar_detach ( obj_t* a, obj_t* alpha ) { // Use the scalar datatype of A as the storage datatype of the detached // object alpha. num_t dt_a = bli_obj_scalar_dt( a ); // Initialize alpha to be a bufferless internal scalar of the same // datatype as the scalar attached to A. bli_obj_scalar_init_detached( dt_a, alpha ); // Copy the internal scalar in A to alpha. // NOTE: This is simply a field-to-field copy with no typecasting. But // that's okay since bli_obj_scalar_init_detached() initializes the // storage datatype of alpha to be the same as the datatype of the // scalar queried from bli_obj_scalar_dt() above. bli_obj_copy_internal_scalar( a, alpha ); } void bli_obj_scalar_attach ( conj_t conj, obj_t* alpha, obj_t* a ) { obj_t alpha_cast; // Use the target datatype of A as the datatype to which we cast // alpha locally. const num_t dt_targ = bli_obj_target_dt( a ); // Make a copy-cast of alpha to the target datatype of A, queried // above. This step gives us the opportunity to conjugate and/or // typecast alpha. bli_obj_scalar_init_detached_copy_of( dt_targ, conj, alpha, &alpha_cast ); // Copy the internal scalar in alpha_cast to A. bli_obj_copy_internal_scalar( &alpha_cast, a ); // Update the scalar datatype of A. bli_obj_set_scalar_dt( dt_targ, a ); } void bli_obj_scalar_cast_to ( num_t dt, obj_t* a ) { obj_t alpha; obj_t alpha_cast; // Initialize an object alpha to be a bufferless scalar whose // storage datatype is equal to the scalar datatype of A. bli_obj_scalar_init_detached( bli_obj_scalar_dt( a ), &alpha ); // Copy the internal scalar in A to alpha. // NOTE: Since alpha was initialized with the scalar datatype of A, // a simple field-to-field copy is sufficient (no casting is needed // here). bli_obj_copy_internal_scalar( a, &alpha ); // Make a copy-cast of alpha, alpha_cast, with the datatype given by // the caller. (This is where the typecasting happens.) bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, &alpha, &alpha_cast ); // Copy the newly-typecasted value in alpha_cast back to A. bli_obj_copy_internal_scalar( &alpha_cast, a ); // Update the scalar datatype of A to reflect to new datatype used // in the typecast. bli_obj_set_scalar_dt( dt, a ); } void bli_obj_scalar_apply_scalar ( obj_t* alpha, obj_t* a ) { obj_t alpha_cast; obj_t scalar_a; // Make a copy of alpha, alpha_cast, with the same datatype as the // scalar datatype of A. (This is where the typecasting happens.) bli_obj_scalar_init_detached_copy_of( bli_obj_scalar_dt( a ), BLIS_NO_CONJUGATE, alpha, &alpha_cast ); // Detach the scalar from A. bli_obj_scalar_detach( a, &scalar_a ); // Scale the detached scalar by alpha. bli_mulsc( &alpha_cast, &scalar_a ); // Copy the internal scalar in scalar_a to A. bli_obj_copy_internal_scalar( &scalar_a, a ); } void bli_obj_scalar_reset ( obj_t* a ) { num_t dt = bli_obj_scalar_dt( a ); void* scalar_a = bli_obj_internal_scalar_buffer( a ); void* one = bli_obj_buffer_for_const( dt, &BLIS_ONE ); if ( bli_is_float( dt ) ) *(( float* )scalar_a) = *(( float* )one); else if ( bli_is_double( dt ) ) *(( double* )scalar_a) = *(( double* )one); else if ( bli_is_scomplex( dt ) ) *(( scomplex* )scalar_a) = *(( scomplex* )one); else if ( bli_is_dcomplex( dt ) ) *(( dcomplex* )scalar_a) = *(( dcomplex* )one); // Alternate implementation: //bli_obj_scalar_attach( BLIS_NO_CONJUGATE, &BLIS_ONE, a ); } bool_t bli_obj_scalar_has_nonzero_imag ( obj_t* a ) { bool_t r_val = FALSE; num_t dt = bli_obj_scalar_dt( a ); void* scalar_a = bli_obj_internal_scalar_buffer( a ); // FGVZ: Reimplement by using bli_obj_imag_part() and then // bli_obj_equals( &BLIS_ZERO, ... ). if ( bli_is_real( dt ) ) { r_val = FALSE; } else if ( bli_is_scomplex( dt ) ) { r_val = ( bli_cimag( *(( scomplex* )scalar_a) ) != 0.0F ); } else if ( bli_is_dcomplex( dt ) ) { r_val = ( bli_zimag( *(( dcomplex* )scalar_a) ) != 0.0 ); } return r_val; } bool_t bli_obj_scalar_equals ( obj_t* a, obj_t* beta ) { obj_t scalar_a; bool_t r_val; bli_obj_scalar_detach( a, &scalar_a ); r_val = bli_obj_equals( &scalar_a, beta ); return r_val; } blis-0.6.1/frame/base/bli_obj_scalar.h000066400000000000000000000050531360743507500175660ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ BLIS_EXPORT_BLIS void bli_obj_scalar_init_detached ( num_t dt, obj_t* beta ); BLIS_EXPORT_BLIS void bli_obj_scalar_init_detached_copy_of ( num_t dt, conj_t conj, obj_t* alpha, obj_t* beta ); BLIS_EXPORT_BLIS void bli_obj_scalar_detach ( obj_t* a, obj_t* alpha ); BLIS_EXPORT_BLIS void bli_obj_scalar_attach ( conj_t conj, obj_t* alpha, obj_t* a ); BLIS_EXPORT_BLIS void bli_obj_scalar_cast_to ( num_t dt, obj_t* a ); BLIS_EXPORT_BLIS void bli_obj_scalar_apply_scalar ( obj_t* alpha, obj_t* a ); BLIS_EXPORT_BLIS void bli_obj_scalar_reset ( obj_t* a ); BLIS_EXPORT_BLIS bool_t bli_obj_scalar_has_nonzero_imag ( obj_t* a ); BLIS_EXPORT_BLIS bool_t bli_obj_scalar_equals ( obj_t* a, obj_t* beta ); blis-0.6.1/frame/base/bli_opid.h000066400000000000000000000034051360743507500164210ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ static bool_t bli_opid_is_level3( opid_t opid ) { return ( bool_t ) ( BLIS_GEMM <= opid && opid <= BLIS_TRSM ); } blis-0.6.1/frame/base/bli_pack.c000066400000000000000000000115751360743507500164060ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // The global rntm_t structure. (The definition resides in bli_rntm.c.) extern rntm_t global_rntm; // A mutex to allow synchronous access to global_rntm. (The definition // resides in bli_rntm.c.) extern bli_pthread_mutex_t global_rntm_mutex; // ----------------------------------------------------------------------------- void bli_pack_init( void ) { // Read the environment variables and use them to initialize the // global runtime object. bli_pack_init_rntm_from_env( &global_rntm ); } void bli_pack_finalize( void ) { } // ----------------------------------------------------------------------------- dim_t bli_pack_get_pack_a( void ) { // We must ensure that global_rntm has been initialized. bli_init_once(); return bli_rntm_pack_a( &global_rntm ); } // ----------------------------------------------------------------------------- dim_t bli_pack_get_pack_b( void ) { // We must ensure that global_rntm has been initialized. bli_init_once(); return bli_rntm_pack_b( &global_rntm ); } // ---------------------------------------------------------------------------- void bli_pack_set_pack_a( bool_t pack_a ) { // We must ensure that global_rntm has been initialized. bli_init_once(); // Acquire the mutex protecting global_rntm. bli_pthread_mutex_lock( &global_rntm_mutex ); bli_rntm_set_pack_a( pack_a, &global_rntm ); // Release the mutex protecting global_rntm. bli_pthread_mutex_unlock( &global_rntm_mutex ); } // ---------------------------------------------------------------------------- void bli_pack_set_pack_b( bool_t pack_b ) { // We must ensure that global_rntm has been initialized. bli_init_once(); // Acquire the mutex protecting global_rntm. bli_pthread_mutex_lock( &global_rntm_mutex ); bli_rntm_set_pack_a( pack_b, &global_rntm ); // Release the mutex protecting global_rntm. bli_pthread_mutex_unlock( &global_rntm_mutex ); } // ---------------------------------------------------------------------------- void bli_pack_init_rntm_from_env ( rntm_t* rntm ) { // NOTE: We don't need to acquire the global_rntm_mutex here because this // function is only called from bli_pack_init(), which is only called // by bli_init_once(). bool_t pack_a; bool_t pack_b; #if 1 //def BLIS_ENABLE_SELECTIVE_PACKING // Try to read BLIS_PACK_A and BLIS_PACK_B. For each variable, default to // -1 if it is unset. pack_a = bli_env_get_var( "BLIS_PACK_A", -1 ); pack_b = bli_env_get_var( "BLIS_PACK_B", -1 ); // Enforce the default behavior first, then check for affirmative FALSE, and // finally assume anything else is TRUE. if ( pack_a == -1 ) pack_a = FALSE; // default behavior else if ( pack_a == 0 ) pack_a = FALSE; // zero is FALSE else pack_a = TRUE; // anything else is TRUE if ( pack_b == -1 ) pack_b = FALSE; // default behavior else if ( pack_b == 0 ) pack_b = FALSE; // zero is FALSE else pack_b = TRUE; // anything else is TRUE #else pack_a = TRUE; pack_b = TRUE; #endif // Save the results back in the runtime object. bli_rntm_set_pack_a( pack_a, rntm ); bli_rntm_set_pack_b( pack_b, rntm ); #if 0 printf( "bli_pack_init_rntm_from_env()\n" ); bli_rntm_print( rntm ); #endif } blis-0.6.1/frame/base/bli_pack.h000066400000000000000000000040201360743507500163760ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_PACK_H #define BLIS_PACK_H void bli_pack_init( void ); void bli_pack_finalize( void ); BLIS_EXPORT_BLIS dim_t bli_pack_get_pack_a( void ); BLIS_EXPORT_BLIS dim_t bli_pack_get_pack_b( void ); BLIS_EXPORT_BLIS void bli_pack_set_pack_a( bool_t pack_a ); BLIS_EXPORT_BLIS void bli_pack_set_pack_b( bool_t pack_b ); void bli_pack_init_rntm_from_env( rntm_t* rntm ); #endif blis-0.6.1/frame/base/bli_param_map.c000066400000000000000000000170011360743507500174130ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // --- BLIS to BLAS/LAPACK mappings -------------------------------------------- void bli_param_map_blis_to_netlib_side( side_t side, char* blas_side ) { if ( side == BLIS_LEFT ) *blas_side = 'L'; else if ( side == BLIS_RIGHT ) *blas_side = 'R'; else { bli_check_error_code( BLIS_INVALID_SIDE ); } } void bli_param_map_blis_to_netlib_uplo( uplo_t uplo, char* blas_uplo ) { if ( uplo == BLIS_LOWER ) *blas_uplo = 'L'; else if ( uplo == BLIS_UPPER ) *blas_uplo = 'U'; else { bli_check_error_code( BLIS_INVALID_UPLO ); } } void bli_param_map_blis_to_netlib_trans( trans_t trans, char* blas_trans ) { if ( trans == BLIS_NO_TRANSPOSE ) *blas_trans = 'N'; else if ( trans == BLIS_TRANSPOSE ) *blas_trans = 'T'; else if ( trans == BLIS_CONJ_TRANSPOSE ) *blas_trans = 'C'; else { bli_check_error_code( BLIS_INVALID_TRANS ); } } void bli_param_map_blis_to_netlib_diag( diag_t diag, char* blas_diag ) { if ( diag == BLIS_NONUNIT_DIAG ) *blas_diag = 'N'; else if ( diag == BLIS_UNIT_DIAG ) *blas_diag = 'U'; else { bli_check_error_code( BLIS_INVALID_DIAG ); } } void bli_param_map_blis_to_netlib_machval( machval_t machval, char* blas_machval ) { if ( machval == BLIS_MACH_EPS ) *blas_machval = 'E'; else if ( machval == BLIS_MACH_SFMIN ) *blas_machval = 'S'; else if ( machval == BLIS_MACH_BASE ) *blas_machval = 'B'; else if ( machval == BLIS_MACH_PREC ) *blas_machval = 'P'; else if ( machval == BLIS_MACH_NDIGMANT ) *blas_machval = 'N'; else if ( machval == BLIS_MACH_RND ) *blas_machval = 'R'; else if ( machval == BLIS_MACH_EMIN ) *blas_machval = 'M'; else if ( machval == BLIS_MACH_RMIN ) *blas_machval = 'U'; else if ( machval == BLIS_MACH_EMAX ) *blas_machval = 'L'; else if ( machval == BLIS_MACH_RMAX ) *blas_machval = 'O'; else { bli_check_error_code( BLIS_INVALID_MACHVAL ); } } // --- BLAS/LAPACK to BLIS mappings -------------------------------------------- // NOTE: These functions were converted into static functions. Please see this // file's corresponding header for those definitions. // --- BLIS char to BLIS mappings ---------------------------------------------- void bli_param_map_char_to_blis_side( char side, side_t* blis_side ) { if ( side == 'l' || side == 'L' ) *blis_side = BLIS_LEFT; else if ( side == 'r' || side == 'R' ) *blis_side = BLIS_RIGHT; else { bli_check_error_code( BLIS_INVALID_SIDE ); } } void bli_param_map_char_to_blis_uplo( char uplo, uplo_t* blis_uplo ) { if ( uplo == 'l' || uplo == 'L' ) *blis_uplo = BLIS_LOWER; else if ( uplo == 'u' || uplo == 'U' ) *blis_uplo = BLIS_UPPER; else if ( uplo == 'e' || uplo == 'E' ) *blis_uplo = BLIS_DENSE; else { bli_check_error_code( BLIS_INVALID_UPLO ); } } void bli_param_map_char_to_blis_trans( char trans, trans_t* blis_trans ) { if ( trans == 'n' || trans == 'N' ) *blis_trans = BLIS_NO_TRANSPOSE; else if ( trans == 't' || trans == 'T' ) *blis_trans = BLIS_TRANSPOSE; else if ( trans == 'c' || trans == 'C' ) *blis_trans = BLIS_CONJ_NO_TRANSPOSE; else if ( trans == 'h' || trans == 'H' ) *blis_trans = BLIS_CONJ_TRANSPOSE; else { bli_check_error_code( BLIS_INVALID_TRANS ); } } void bli_param_map_char_to_blis_conj( char conj, conj_t* blis_conj ) { if ( conj == 'n' || conj == 'N' ) *blis_conj = BLIS_NO_CONJUGATE; else if ( conj == 'c' || conj == 'C' ) *blis_conj = BLIS_CONJUGATE; else { bli_check_error_code( BLIS_INVALID_CONJ ); } } void bli_param_map_char_to_blis_diag( char diag, diag_t* blis_diag ) { if ( diag == 'n' || diag == 'N' ) *blis_diag = BLIS_NONUNIT_DIAG; else if ( diag == 'u' || diag == 'U' ) *blis_diag = BLIS_UNIT_DIAG; else { bli_check_error_code( BLIS_INVALID_DIAG ); } } void bli_param_map_char_to_blis_dt( char dt, num_t* blis_dt ) { if ( dt == 's' ) *blis_dt = BLIS_FLOAT; else if ( dt == 'd' ) *blis_dt = BLIS_DOUBLE; else if ( dt == 'c' ) *blis_dt = BLIS_SCOMPLEX; else if ( dt == 'z' ) *blis_dt = BLIS_DCOMPLEX; else if ( dt == 'i' ) *blis_dt = BLIS_INT; else { bli_check_error_code( BLIS_INVALID_DATATYPE ); } } // --- BLIS to BLIS char mappings ---------------------------------------------- void bli_param_map_blis_to_char_side( side_t blis_side, char* side ) { if ( blis_side == BLIS_LEFT ) *side = 'l'; else if ( blis_side == BLIS_RIGHT ) *side = 'r'; else { bli_check_error_code( BLIS_INVALID_SIDE ); } } void bli_param_map_blis_to_char_uplo( uplo_t blis_uplo, char* uplo ) { if ( blis_uplo == BLIS_LOWER ) *uplo = 'l'; else if ( blis_uplo == BLIS_UPPER ) *uplo = 'u'; else { bli_check_error_code( BLIS_INVALID_UPLO ); } } void bli_param_map_blis_to_char_trans( trans_t blis_trans, char* trans ) { if ( blis_trans == BLIS_NO_TRANSPOSE ) *trans = 'n'; else if ( blis_trans == BLIS_TRANSPOSE ) *trans = 't'; else if ( blis_trans == BLIS_CONJ_NO_TRANSPOSE ) *trans = 'c'; else if ( blis_trans == BLIS_CONJ_TRANSPOSE ) *trans = 'h'; else { bli_check_error_code( BLIS_INVALID_TRANS ); } } void bli_param_map_blis_to_char_conj( conj_t blis_conj, char* conj ) { if ( blis_conj == BLIS_NO_CONJUGATE ) *conj = 'n'; else if ( blis_conj == BLIS_CONJUGATE ) *conj = 'c'; else { bli_check_error_code( BLIS_INVALID_CONJ ); } } void bli_param_map_blis_to_char_diag( diag_t blis_diag, char* diag ) { if ( blis_diag == BLIS_NONUNIT_DIAG ) *diag = 'n'; else if ( blis_diag == BLIS_UNIT_DIAG ) *diag = 'u'; else { bli_check_error_code( BLIS_INVALID_DIAG ); } } void bli_param_map_blis_to_char_dt( num_t blis_dt, char* dt ) { if ( blis_dt == BLIS_FLOAT ) *dt = 's'; else if ( blis_dt == BLIS_DOUBLE ) *dt = 'd'; else if ( blis_dt == BLIS_SCOMPLEX ) *dt = 'c'; else if ( blis_dt == BLIS_DCOMPLEX ) *dt = 'z'; else if ( blis_dt == BLIS_INT ) *dt = 'i'; else { bli_check_error_code( BLIS_INVALID_DATATYPE ); } } blis-0.6.1/frame/base/bli_param_map.h000066400000000000000000000130611360743507500174220ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // --- BLIS to BLAS/LAPACK mappings -------------------------------------------- BLIS_EXPORT_BLIS void bli_param_map_blis_to_netlib_side( side_t side, char* blas_side ); BLIS_EXPORT_BLIS void bli_param_map_blis_to_netlib_uplo( uplo_t uplo, char* blas_uplo ); BLIS_EXPORT_BLIS void bli_param_map_blis_to_netlib_trans( trans_t trans, char* blas_trans ); BLIS_EXPORT_BLIS void bli_param_map_blis_to_netlib_diag( diag_t diag, char* blas_diag ); BLIS_EXPORT_BLIS void bli_param_map_blis_to_netlib_machval( machval_t machval, char* blas_machval ); // --- BLAS/LAPACK to BLIS mappings -------------------------------------------- // NOTE: These static functions were converted from regular functions in order // to reduce function call overhead within the BLAS compatibility layer. static void bli_param_map_netlib_to_blis_side( char side, side_t* blis_side ) { if ( side == 'l' || side == 'L' ) *blis_side = BLIS_LEFT; else if ( side == 'r' || side == 'R' ) *blis_side = BLIS_RIGHT; else { // Instead of reporting an error to the framework, default to // an arbitrary value. This is needed because this function is // called by the BLAS compatibility layer AFTER it has already // checked errors and called xerbla(). If the application wants // to override the BLAS compatibility layer's xerbla--which // responds to errors with abort()--we need to also NOT call // abort() here, since either way it has already been dealt // with. //bli_check_error_code( BLIS_INVALID_SIDE ); *blis_side = BLIS_LEFT; } } static void bli_param_map_netlib_to_blis_uplo( char uplo, uplo_t* blis_uplo ) { if ( uplo == 'l' || uplo == 'L' ) *blis_uplo = BLIS_LOWER; else if ( uplo == 'u' || uplo == 'U' ) *blis_uplo = BLIS_UPPER; else { // See comment for bli_param_map_netlib_to_blis_side() above. //bli_check_error_code( BLIS_INVALID_UPLO ); *blis_uplo = BLIS_LOWER; } } static void bli_param_map_netlib_to_blis_trans( char trans, trans_t* blis_trans ) { if ( trans == 'n' || trans == 'N' ) *blis_trans = BLIS_NO_TRANSPOSE; else if ( trans == 't' || trans == 'T' ) *blis_trans = BLIS_TRANSPOSE; else if ( trans == 'c' || trans == 'C' ) *blis_trans = BLIS_CONJ_TRANSPOSE; else { // See comment for bli_param_map_netlib_to_blis_side() above. //bli_check_error_code( BLIS_INVALID_TRANS ); *blis_trans = BLIS_NO_TRANSPOSE; } } static void bli_param_map_netlib_to_blis_diag( char diag, diag_t* blis_diag ) { if ( diag == 'n' || diag == 'N' ) *blis_diag = BLIS_NONUNIT_DIAG; else if ( diag == 'u' || diag == 'U' ) *blis_diag = BLIS_UNIT_DIAG; else { // See comment for bli_param_map_netlib_to_blis_side() above. //bli_check_error_code( BLIS_INVALID_DIAG ); *blis_diag = BLIS_NONUNIT_DIAG; } } // --- BLIS char to BLIS mappings ---------------------------------------------- BLIS_EXPORT_BLIS void bli_param_map_char_to_blis_side( char side, side_t* blis_side ); BLIS_EXPORT_BLIS void bli_param_map_char_to_blis_uplo( char uplo, uplo_t* blis_uplo ); BLIS_EXPORT_BLIS void bli_param_map_char_to_blis_trans( char trans, trans_t* blis_trans ); BLIS_EXPORT_BLIS void bli_param_map_char_to_blis_conj( char conj, conj_t* blis_conj ); BLIS_EXPORT_BLIS void bli_param_map_char_to_blis_diag( char diag, diag_t* blis_diag ); BLIS_EXPORT_BLIS void bli_param_map_char_to_blis_dt( char dt, num_t* blis_dt ); // --- BLIS to BLIS char mappings ---------------------------------------------- BLIS_EXPORT_BLIS void bli_param_map_blis_to_char_side( side_t blis_side, char* side ); BLIS_EXPORT_BLIS void bli_param_map_blis_to_char_uplo( uplo_t blis_uplo, char* uplo ); BLIS_EXPORT_BLIS void bli_param_map_blis_to_char_trans( trans_t blis_trans, char* trans ); BLIS_EXPORT_BLIS void bli_param_map_blis_to_char_conj( conj_t blis_conj, char* conj ); BLIS_EXPORT_BLIS void bli_param_map_blis_to_char_diag( diag_t blis_diag, char* diag ); BLIS_EXPORT_BLIS void bli_param_map_blis_to_char_dt( num_t blis_dt, char* dt ); blis-0.6.1/frame/base/bli_part.c000066400000000000000000000603071360743507500164330ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // -- Matrix partitioning ------------------------------------------------------ void bli_acquire_mpart ( dim_t i, dim_t j, dim_t bm, dim_t bn, obj_t* parent, obj_t* child ) { // Query the dimensions of the parent object. const dim_t m_par = bli_obj_length( parent ); const dim_t n_par = bli_obj_width( parent ); // If either i or j is already beyond what exists of the parent matrix, // slide them back to the outer dimensions. (What will happen in this // scenario is that bm and bn and/or will be reduced to zero so that the // child matrix does not refer to anything beyond the bounds of the // parent. (Note: This is a safety measure and generally should never // be needed if the caller is passing in sane arguments.) if ( i > m_par ) i = m_par; if ( j > n_par ) j = n_par; // If either bm or bn spills out over the edge of the parent matrix, // reduce them so that the child matrix fits within the bounds of the // parent. (Note: This is a safety measure and generally should never // be needed if the caller is passing in sane arguments, though this // code is somewhat more likely to be needed than the code above.) if ( bm > m_par - i ) bm = m_par - i; if ( bn > n_par - j ) bn = n_par - j; // Alias the parent object's contents into the child object. bli_obj_alias_to( parent, child ); // Set the offsets and dimensions of the child object. Note that we // increment, rather than overwrite, the offsets of the child object // in case the parent object already had non-zero offsets (usually // because the parent was itself a child a larger grandparent object). bli_obj_inc_offs( i, j, child ); bli_obj_set_dims( bm, bn, child ); } void bli_acquire_mpart_t2b ( subpart_t req_part, dim_t i, dim_t b, obj_t* obj, obj_t* sub_obj ) { bli_acquire_mpart_mdim( BLIS_FWD, req_part, i, b, obj, sub_obj ); } void bli_acquire_mpart_b2t ( subpart_t req_part, dim_t i, dim_t b, obj_t* obj, obj_t* sub_obj ) { bli_acquire_mpart_mdim( BLIS_BWD, req_part, i, b, obj, sub_obj ); } void bli_acquire_mpart_mdim ( dir_t direct, subpart_t req_part, dim_t i, dim_t b, obj_t* obj, obj_t* sub_obj ) { dim_t m; dim_t n; dim_t m_part = 0; dim_t n_part = 0; inc_t offm_inc = 0; inc_t offn_inc = 0; doff_t diag_off_inc; // Call a special function for partitioning packed objects. (By only // catching those objects packed to panels, we omit cases where the // object is packed to row or column storage, as such objects can be // partitioned through normally.) Note that the function called below // assumes forward partitioning. if ( bli_obj_is_panel_packed( obj ) ) { bli_packm_acquire_mpart_t2b( req_part, i, b, obj, sub_obj ); return; } // Check parameters. if ( bli_error_checking_is_enabled() ) bli_acquire_mpart_t2b_check( req_part, i, b, obj, sub_obj ); // Query the m and n dimensions of the object (accounting for // transposition, if indicated). if ( bli_obj_has_notrans( obj ) ) { m = bli_obj_length( obj ); n = bli_obj_width( obj ); } else // if ( bli_obj_has_trans( obj ) ) { m = bli_obj_width( obj ); n = bli_obj_length( obj ); } // Foolproofing: do not let b exceed what's left of the m dimension at // row offset i. if ( b > m - i ) b = m - i; // NOTE: Most of this function implicitly assumes moving forward. // When moving backward, we have to relocate i. if ( direct == BLIS_BWD ) { // Modify i to account for the fact that we are moving backwards. i = m - i - b; } // Support SUBPART1B (behind SUBPART1) and SUBPART1A (ahead of SUBPART1), // to refer to subpartitions 0 and 2 when moving forward, and 2 and 0 when // moving backward. subpart_t subpart0_alias; subpart_t subpart2_alias; if ( direct == BLIS_FWD ) { subpart0_alias = BLIS_SUBPART1B; subpart2_alias = BLIS_SUBPART1A; } else { subpart0_alias = BLIS_SUBPART1A; subpart2_alias = BLIS_SUBPART1B; } // Compute offset increments and dimensions based on which // subpartition is being requested, assuming no transposition. if ( req_part == BLIS_SUBPART0 || req_part == subpart0_alias ) { // A0 (offm,offn) unchanged. // A0 is i x n. offm_inc = 0; offn_inc = 0; m_part = i; n_part = n; } else if ( req_part == BLIS_SUBPART1AND0 ) { // A1+A0 (offm,offn) unchanged. // A1+A0 is (i+b) x n. offm_inc = 0; offn_inc = 0; m_part = i + b; n_part = n; } else if ( req_part == BLIS_SUBPART1 ) { // A1 (offm,offn) += (i,0). // A1 is b x n. offm_inc = i; offn_inc = 0; m_part = b; n_part = n; } else if ( req_part == BLIS_SUBPART1AND2 ) { // A1+A2 (offm,offn) += (i,0). // A1+A2 is (m-i) x n. offm_inc = i; offn_inc = 0; m_part = m - i; n_part = n; } else if ( req_part == BLIS_SUBPART2 || req_part == subpart2_alias ) { // A2 (offm,offn) += (i+b,0). // A2 is (m-i-b) x n. offm_inc = i + b; offn_inc = 0; m_part = m - i - b; n_part = n; } // Compute the diagonal offset based on the m and n offsets. diag_off_inc = ( doff_t )offm_inc - ( doff_t )offn_inc; // Begin by copying the info, elem size, buffer, row stride, and column // stride fields of the parent object. Note that this omits copying view // information because the new partition will have its own dimensions // and offsets. bli_obj_init_subpart_from( obj, sub_obj ); // Modify offsets and dimensions of requested partition based on // whether it needs to be transposed. if ( bli_obj_has_notrans( obj ) ) { bli_obj_set_dims( m_part, n_part, sub_obj ); bli_obj_inc_offs( offm_inc, offn_inc, sub_obj ); bli_obj_inc_diag_offset( diag_off_inc, sub_obj ); } else // if ( bli_obj_has_trans( obj ) ) { bli_obj_set_dims( n_part, m_part, sub_obj ); bli_obj_inc_offs( offn_inc, offm_inc, sub_obj ); bli_obj_inc_diag_offset( -diag_off_inc, sub_obj ); } // If the root matrix is not general (ie: has structure defined by the // diagonal), and the subpartition does not intersect the root matrix's // diagonal, then set the subpartition structure to "general"; otherwise // we let the subpartition inherit the storage structure of its immediate // parent. if ( !bli_obj_root_is_general( sub_obj ) && bli_obj_is_outside_diag( sub_obj ) ) { // NOTE: This comment may be out-of-date since we now distinguish // between uplo properties for the current and root objects... // Note that we cannot mark the subpartition object as general/dense // here since it makes sense to preserve the existing uplo information // a while longer so that the correct kernels are invoked. (Example: // incremental packing/computing in herk produces subpartitions that // appear general/dense, but their uplo fields are needed to be either // lower or upper, to determine which macro-kernel gets called in the // herk_int() back-end.) // If the subpartition lies entirely in an "unstored" triangle of the // root matrix, then we need to tweak the subpartition. If the root // matrix is Hermitian or symmetric, then we reflect the partition to // the other side of the diagonal, toggling the transposition bit (and // conjugation bit if the root matrix is Hermitian). Or, if the root // matrix is triangular, the subpartition should be marked as zero. if ( bli_obj_is_unstored_subpart( sub_obj ) ) { if ( bli_obj_root_is_hermitian( sub_obj ) ) { bli_obj_reflect_about_diag( sub_obj ); bli_obj_toggle_conj( sub_obj ); } else if ( bli_obj_root_is_symmetric( sub_obj ) ) { bli_obj_reflect_about_diag( sub_obj ); } else if ( bli_obj_root_is_triangular( sub_obj ) ) { bli_obj_set_uplo( BLIS_ZEROS, sub_obj ); } } } } void bli_acquire_mpart_l2r ( subpart_t req_part, dim_t i, dim_t b, obj_t* obj, obj_t* sub_obj ) { bli_acquire_mpart_ndim( BLIS_FWD, req_part, i, b, obj, sub_obj ); } void bli_acquire_mpart_r2l ( subpart_t req_part, dim_t j, dim_t b, obj_t* obj, obj_t* sub_obj ) { bli_acquire_mpart_ndim( BLIS_BWD, req_part, j, b, obj, sub_obj ); } void bli_acquire_mpart_ndim ( dir_t direct, subpart_t req_part, dim_t j, dim_t b, obj_t* obj, obj_t* sub_obj ) { dim_t m; dim_t n; dim_t m_part = 0; dim_t n_part = 0; inc_t offm_inc = 0; inc_t offn_inc = 0; doff_t diag_off_inc; // Call a special function for partitioning packed objects. (By only // catching those objects packed to panels, we omit cases where the // object is packed to row or column storage, as such objects can be // partitioned through normally.) Note that the function called below // assumes forward partitioning. if ( bli_obj_is_panel_packed( obj ) ) { bli_packm_acquire_mpart_l2r( req_part, j, b, obj, sub_obj ); return; } // Check parameters. if ( bli_error_checking_is_enabled() ) bli_acquire_mpart_l2r_check( req_part, j, b, obj, sub_obj ); // Query the m and n dimensions of the object (accounting for // transposition, if indicated). if ( bli_obj_has_notrans( obj ) ) { m = bli_obj_length( obj ); n = bli_obj_width( obj ); } else // if ( bli_obj_has_trans( obj ) ) { m = bli_obj_width( obj ); n = bli_obj_length( obj ); } // Foolproofing: do not let b exceed what's left of the n dimension at // column offset j. if ( b > n - j ) b = n - j; // NOTE: Most of this function implicitly assumes moving forward. // When moving backward, we have to relocate j. if ( direct == BLIS_BWD ) { // Modify j to account for the fact that we are moving backwards. j = n - j - b; } // Support SUBPART1B (behind SUBPART1) and SUBPART1A (ahead of SUBPART1), // to refer to subpartitions 0 and 2 when moving forward, and 2 and 0 when // moving backward. subpart_t subpart0_alias; subpart_t subpart2_alias; if ( direct == BLIS_FWD ) { subpart0_alias = BLIS_SUBPART1B; subpart2_alias = BLIS_SUBPART1A; } else { subpart0_alias = BLIS_SUBPART1A; subpart2_alias = BLIS_SUBPART1B; } // Compute offset increments and dimensions based on which // subpartition is being requested, assuming no transposition. if ( req_part == BLIS_SUBPART0 || req_part == subpart0_alias ) { // A0 (offm,offn) unchanged. // A0 is m x j. offm_inc = 0; offn_inc = 0; m_part = m; n_part = j; } else if ( req_part == BLIS_SUBPART1AND0 ) { // A1+A0 (offm,offn) unchanged. // A1+A0 is m x (j+b). offm_inc = 0; offn_inc = 0; m_part = m; n_part = j + b; } else if ( req_part == BLIS_SUBPART1 ) { // A1 (offm,offn) += (0,j). // A1 is m x b. offm_inc = 0; offn_inc = j; m_part = m; n_part = b; } else if ( req_part == BLIS_SUBPART1AND2 ) { // A1+A2 (offm,offn) += (0,j). // A1+A2 is m x (n-j). offm_inc = 0; offn_inc = j; m_part = m; n_part = n - j; } else if ( req_part == BLIS_SUBPART2 || req_part == subpart2_alias ) { // A2 (offm,offn) += (0,j+b). // A2 is m x (n-j-b). offm_inc = 0; offn_inc = j + b; m_part = m; n_part = n - j - b; } // Compute the diagonal offset based on the m and n offsets. diag_off_inc = ( doff_t )offm_inc - ( doff_t )offn_inc; // Begin by copying the info, elem size, buffer, row stride, and column // stride fields of the parent object. Note that this omits copying view // information because the new partition will have its own dimensions // and offsets. bli_obj_init_subpart_from( obj, sub_obj ); // Modify offsets and dimensions of requested partition based on // whether it needs to be transposed. if ( bli_obj_has_notrans( obj ) ) { bli_obj_set_dims( m_part, n_part, sub_obj ); bli_obj_inc_offs( offm_inc, offn_inc, sub_obj ); bli_obj_inc_diag_offset( diag_off_inc, sub_obj ); } else // if ( bli_obj_has_trans( obj ) ) { bli_obj_set_dims( n_part, m_part, sub_obj ); bli_obj_inc_offs( offn_inc, offm_inc, sub_obj ); bli_obj_inc_diag_offset( -diag_off_inc, sub_obj ); } // If the root matrix is not general (ie: has structure defined by the // diagonal), and the subpartition does not intersect the root matrix's // diagonal, then we might need to modify some of the subpartition's // properties, depending on its structure type. if ( !bli_obj_root_is_general( sub_obj ) && bli_obj_is_outside_diag( sub_obj ) ) { // NOTE: This comment may be out-of-date since we now distinguish // between uplo properties for the current and root objects... // Note that we cannot mark the subpartition object as general/dense // here since it makes sense to preserve the existing uplo information // a while longer so that the correct kernels are invoked. (Example: // incremental packing/computing in herk produces subpartitions that // appear general/dense, but their uplo fields are needed to be either // lower or upper, to determine which macro-kernel gets called in the // herk_int() back-end.) // If the subpartition lies entirely in an "unstored" triangle of the // root matrix, then we need to tweak the subpartition. If the root // matrix is Hermitian or symmetric, then we reflect the partition to // the other side of the diagonal, toggling the transposition bit (and // conjugation bit if the root matrix is Hermitian). Or, if the root // matrix is triangular, the subpartition should be marked as zero. if ( bli_obj_is_unstored_subpart( sub_obj ) ) { if ( bli_obj_root_is_hermitian( sub_obj ) ) { bli_obj_reflect_about_diag( sub_obj ); bli_obj_toggle_conj( sub_obj ); } else if ( bli_obj_root_is_symmetric( sub_obj ) ) { bli_obj_reflect_about_diag( sub_obj ); } else if ( bli_obj_root_is_triangular( sub_obj ) ) { bli_obj_set_uplo( BLIS_ZEROS, sub_obj ); } } } } void bli_acquire_mpart_tl2br ( subpart_t req_part, dim_t i, dim_t b, obj_t* obj, obj_t* sub_obj ) { bli_acquire_mpart_mndim( BLIS_FWD, req_part, i, b, obj, sub_obj ); } void bli_acquire_mpart_br2tl ( subpart_t req_part, dim_t j, dim_t b, obj_t* obj, obj_t* sub_obj ) { bli_acquire_mpart_mndim( BLIS_BWD, req_part, j, b, obj, sub_obj ); } void bli_acquire_mpart_mndim ( dir_t direct, subpart_t req_part, dim_t ij, dim_t b, obj_t* obj, obj_t* sub_obj ) { dim_t m; dim_t n; dim_t min_m_n; dim_t m_part = 0; dim_t n_part = 0; inc_t offm_inc = 0; inc_t offn_inc = 0; doff_t diag_off_inc; // Call a special function for partitioning packed objects. (By only // catching those objects packed to panels, we omit cases where the // object is packed to row or column storage, as such objects can be // partitioned through normally.) Note that the function called below // assumes forward partitioning. if ( bli_obj_is_panel_packed( obj ) ) { bli_packm_acquire_mpart_tl2br( req_part, ij, b, obj, sub_obj ); return; } // Check parameters. if ( bli_error_checking_is_enabled() ) bli_acquire_mpart_tl2br_check( req_part, ij, b, obj, sub_obj ); // Query the m and n dimensions of the object (accounting for // transposition, if indicated). if ( bli_obj_has_notrans( obj ) ) { m = bli_obj_length( obj ); n = bli_obj_width( obj ); } else // if ( bli_obj_has_trans( obj ) ) { m = bli_obj_width( obj ); n = bli_obj_length( obj ); } // Foolproofing: do not let b exceed what's left of min(m,n) at // row/column offset ij. min_m_n = bli_min( m, n ); if ( b > min_m_n - ij ) b = min_m_n - ij; // NOTE: Most of this function implicitly assumes moving forward. // When moving backward, we have to relocate ij. if ( direct == BLIS_BWD ) { // Modify ij to account for the fact that we are moving backwards. ij = min_m_n - ij - b; } // Compute offset increments and dimensions based on which // subpartition is being requested, assuming no transposition. // Left column of subpartitions if ( req_part == BLIS_SUBPART00 ) { // A00 (offm,offn) unchanged. // A00 is ij x ij. offm_inc = 0; offn_inc = 0; m_part = ij; n_part = ij; } else if ( req_part == BLIS_SUBPART10 ) { // A10 (offm,offn) += (ij,0). // A10 is b x ij. offm_inc = ij; offn_inc = 0; m_part = b; n_part = ij; } else if ( req_part == BLIS_SUBPART20 ) { // A20 (offm,offn) += (ij+b,0). // A20 is (m-ij-b) x ij. offm_inc = ij + b; offn_inc = 0; m_part = m - ij - b; n_part = ij; } // Middle column of subpartitions. else if ( req_part == BLIS_SUBPART01 ) { // A01 (offm,offn) += (0,ij). // A01 is ij x b. offm_inc = 0; offn_inc = ij; m_part = ij; n_part = b; } else if ( req_part == BLIS_SUBPART11 ) { // A11 (offm,offn) += (ij,ij). // A11 is b x b. offm_inc = ij; offn_inc = ij; m_part = b; n_part = b; } else if ( req_part == BLIS_SUBPART21 ) { // A21 (offm,offn) += (ij+b,ij). // A21 is (m-ij-b) x b. offm_inc = ij + b; offn_inc = ij; m_part = m - ij - b; n_part = b; } // Right column of subpartitions. else if ( req_part == BLIS_SUBPART02 ) { // A02 (offm,offn) += (0,ij+b). // A02 is ij x (n-ij-b). offm_inc = 0; offn_inc = ij + b; m_part = ij; n_part = n - ij - b; } else if ( req_part == BLIS_SUBPART12 ) { // A12 (offm,offn) += (ij,ij+b). // A12 is b x (n-ij-b). offm_inc = ij; offn_inc = ij + b; m_part = b; n_part = n - ij - b; } else // if ( req_part == BLIS_SUBPART22 ) { // A22 (offm,offn) += (ij+b,ij+b). // A22 is (m-ij-b) x (n-ij-b). offm_inc = ij + b; offn_inc = ij + b; m_part = m - ij - b; n_part = n - ij - b; } // Compute the diagonal offset based on the m and n offsets. diag_off_inc = ( doff_t )offm_inc - ( doff_t )offn_inc; // Begin by copying the info, elem size, buffer, row stride, and column // stride fields of the parent object. Note that this omits copying view // information because the new partition will have its own dimensions // and offsets. bli_obj_init_subpart_from( obj, sub_obj ); // Modify offsets and dimensions of requested partition based on // whether it needs to be transposed. if ( bli_obj_has_notrans( obj ) ) { bli_obj_set_dims( m_part, n_part, sub_obj ); bli_obj_inc_offs( offm_inc, offn_inc, sub_obj ); bli_obj_inc_diag_offset( diag_off_inc, sub_obj ); } else // if ( bli_obj_has_trans( obj ) ) { bli_obj_set_dims( n_part, m_part, sub_obj ); bli_obj_inc_offs( offn_inc, offm_inc, sub_obj ); bli_obj_inc_diag_offset( -diag_off_inc, sub_obj ); } // If the root matrix is not general (ie: has structure defined by the // diagonal), and the subpartition does not intersect the root matrix's // diagonal, then set the subpartition structure to "general"; otherwise // we let the subpartition inherit the storage structure of its immediate // parent. if ( !bli_obj_root_is_general( sub_obj ) && req_part != BLIS_SUBPART00 && req_part != BLIS_SUBPART11 && req_part != BLIS_SUBPART22 ) { // FGVZ: Fix me. This needs to be cleaned up. Either non-diagonal // intersecting subpartitions should inherit their root object's // uplo field, or it should not. Right now, they DO inherit the // uplo (because they are not set to BLIS_DENSE when the diagonal // does not intersect). But the whole point of being able to query // the root object's properties (e.g. uplo field) was so that we // COULD mark such subpartitions as dense, to make it easier for // certain subproblems on those subpartitions--subproblems that // are agnostic to where the subpartition came from. // NOTE: This comment may be out-of-date since we now distinguish // between uplo properties for the current and root objects... // Note that we cannot mark the subpartition object as general/dense // here since it makes sense to preserve the existing uplo information // a while longer so that the correct kernels are invoked. (Example: // incremental packing/computing in herk produces subpartitions that // appear general/dense, but their uplo fields are needed to be either // lower or upper, to determine which macro-kernel gets called in the // herk_int() back-end.) // If the subpartition lies entirely in an "unstored" triangle of the // root matrix, then we need to tweak the subpartition. If the root // matrix is Hermitian or symmetric, then we reflect the partition to // the other side of the diagonal, toggling the transposition bit (and // conjugation bit if the root matrix is Hermitian). Or, if the root // matrix is triangular, the subpartition should be marked as zero. if ( bli_obj_is_unstored_subpart( sub_obj ) ) { if ( bli_obj_root_is_hermitian( sub_obj ) ) { bli_obj_reflect_about_diag( sub_obj ); bli_obj_toggle_conj( sub_obj ); } else if ( bli_obj_root_is_symmetric( sub_obj ) ) { bli_obj_reflect_about_diag( sub_obj ); } else if ( bli_obj_root_is_triangular( sub_obj ) ) { bli_obj_set_uplo( BLIS_ZEROS, sub_obj ); } } } } // -- Vector partitioning ------------------------------------------------------ void bli_acquire_vpart_f2b ( subpart_t req_part, dim_t i, dim_t b, obj_t* obj, obj_t* sub_obj ) { if ( bli_obj_is_col_vector( obj ) ) bli_acquire_mpart_mdim( BLIS_FWD, req_part, i, b, obj, sub_obj ); else // if ( bli_obj_is_row_vector( obj ) ) bli_acquire_mpart_ndim( BLIS_FWD, req_part, i, b, obj, sub_obj ); } void bli_acquire_vpart_b2f ( subpart_t req_part, dim_t i, dim_t b, obj_t* obj, obj_t* sub_obj ) { if ( bli_obj_is_col_vector( obj ) ) bli_acquire_mpart_mdim( BLIS_BWD, req_part, i, b, obj, sub_obj ); else // if ( bli_obj_is_row_vector( obj ) ) bli_acquire_mpart_ndim( BLIS_BWD, req_part, i, b, obj, sub_obj ); } // -- Scalar acquisition ------------------------------------------------------- void bli_acquire_mij ( dim_t i, dim_t j, obj_t* obj, obj_t* sub_obj ) { obj_t tmp_obj; bli_acquire_mpart_ndim( BLIS_FWD, BLIS_SUBPART1, j, 1, obj, &tmp_obj ); bli_acquire_mpart_mdim( BLIS_FWD, BLIS_SUBPART1, i, 1, &tmp_obj, sub_obj ); } void bli_acquire_vi ( dim_t i, obj_t* obj, obj_t* sub_obj ) { if ( bli_obj_is_col_vector( obj ) ) bli_acquire_mpart_mdim( BLIS_FWD, BLIS_SUBPART1, i, 1, obj, sub_obj ); else // if ( bli_obj_is_row_vector( obj ) ) bli_acquire_mpart_ndim( BLIS_FWD, BLIS_SUBPART1, i, 1, obj, sub_obj ); } blis-0.6.1/frame/base/bli_part.h000066400000000000000000000065671360743507500164500ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "bli_part_check.h" // -- Matrix partitioning ------------------------------------------------------ BLIS_EXPORT_BLIS void bli_acquire_mpart ( dim_t i, dim_t j, dim_t m, dim_t n, obj_t* obj, obj_t* sub_obj ); #undef GENPROT #define GENPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC0( opname ) \ ( \ subpart_t req_part, \ dim_t i, \ dim_t b, \ obj_t* obj, \ obj_t* sub_obj \ ); GENPROT( acquire_mpart_t2b ) GENPROT( acquire_mpart_b2t ) GENPROT( acquire_mpart_l2r ) GENPROT( acquire_mpart_r2l ) GENPROT( acquire_mpart_tl2br ) GENPROT( acquire_mpart_br2tl ) #undef GENPROT #define GENPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC0( opname ) \ ( \ dir_t direct, \ subpart_t req_part, \ dim_t i, \ dim_t b, \ obj_t* obj, \ obj_t* sub_obj \ ); GENPROT( acquire_mpart_mdim ) GENPROT( acquire_mpart_ndim ) GENPROT( acquire_mpart_mndim ) // -- Vector partitioning ------------------------------------------------------ #undef GENPROT #define GENPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC0( opname ) \ ( \ subpart_t req_part, \ dim_t i, \ dim_t b, \ obj_t* obj, \ obj_t* sub_obj \ ); GENPROT( acquire_vpart_f2b ) GENPROT( acquire_vpart_b2f ) // -- Scalar acquisition ------------------------------------------------------- BLIS_EXPORT_BLIS void bli_acquire_mij ( dim_t i, dim_t j, obj_t* obj, obj_t* sub_obj ); BLIS_EXPORT_BLIS void bli_acquire_vi ( dim_t i, obj_t* obj, obj_t* sub_obj ); blis-0.6.1/frame/base/bli_pool.c000066400000000000000000000505641360743507500164420ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" //#define BLIS_ENABLE_MEM_TRACING void bli_pool_init ( siz_t num_blocks, siz_t block_ptrs_len, siz_t block_size, siz_t align_size, siz_t offset_size, malloc_ft malloc_fp, free_ft free_fp, pool_t* restrict pool ) { // Make sure that block_ptrs_len is at least num_blocks. block_ptrs_len = bli_max( block_ptrs_len, num_blocks ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_pool_init(): allocating block_ptrs (length %d): ", ( int )block_ptrs_len ); #endif // Allocate the block_ptrs array. // FGVZ: Do we want to call malloc_fp() for internal data structures as // well as pool blocks? If so, don't forget to s/bli_free_intl/free_fp/g. pblk_t* restrict block_ptrs = bli_malloc_intl( block_ptrs_len * sizeof( pblk_t ) ); // Allocate and initialize each entry in the block_ptrs array. for ( dim_t i = 0; i < num_blocks; ++i ) { #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_pool_init(): allocating block %d of size %d (align %d, offset %d).\n", ( int )i, ( int )block_size, ( int )align_size, ( int )offset_size ); fflush( stdout ); #endif bli_pool_alloc_block ( block_size, align_size, offset_size, malloc_fp, &(block_ptrs[i]) ); } // NOTE: The semantics of top_index approximate a stack, where a "full" // stack (no blocks checked out) is one where top_index == 0 and an empty // stack (all blocks checked out) one where top_index == num_blocks. // (Here, num_blocks tracks the number of blocks currently allocated as // part of the pool.) This "orientation" of the stack was chosen // intentionally, in contrast to one where top_index == -1 means the // stack is empty and top_index = num_blocks - 1 means the stack is // full. The chosen scheme allows one to conceptualize the stack as a // number line in which blocks are checked out from lowest to highest, // and additional blocks are added at the higher end. // Initialize the pool_t structure. bli_pool_set_block_ptrs( block_ptrs, pool ); bli_pool_set_block_ptrs_len( block_ptrs_len, pool ); bli_pool_set_top_index( 0, pool ); bli_pool_set_num_blocks( num_blocks, pool ); bli_pool_set_block_size( block_size, pool ); bli_pool_set_align_size( align_size, pool ); bli_pool_set_offset_size( offset_size, pool ); bli_pool_set_malloc_fp( malloc_fp, pool ); bli_pool_set_free_fp( free_fp, pool ); } void bli_pool_finalize ( pool_t* restrict pool ) { // NOTE: This implementation assumes that either: // - all blocks have been checked in by all threads, or // - some subset of blocks have been checked in and the caller // is bli_pool_reinit(). // Query the block_ptrs array. pblk_t* restrict block_ptrs = bli_pool_block_ptrs( pool ); // Query the total number of blocks currently allocated. const siz_t num_blocks = bli_pool_num_blocks( pool ); // Query the top_index of the pool. const siz_t top_index = bli_pool_top_index( pool ); // Sanity check: The top_index should be zero. if ( top_index != 0 ) { printf( "bli_pool_finalize(): final top_index == %d (expected 0); block_size: %d.\n", ( int )top_index, ( int )bli_pool_block_size( pool ) ); printf( "bli_pool_finalize(): Implication: not all blocks were checked back in!\n" ); bli_abort(); } // Query the free() function pointer for the pool. free_ft free_fp = bli_pool_free_fp( pool ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_pool_finalize(): freeing %d blocks of size %d (align %d, offset %d).\n", ( int )num_blocks, ( int )bli_pool_block_size( pool ), ( int )bli_pool_align_size( pool ), ( int )bli_pool_offset_size( pool ) ); fflush( stdout ); #endif // Query the offset size of the pool. const siz_t offset_size = bli_pool_offset_size( pool ); // Free the individual blocks currently in the pool. for ( dim_t i = 0; i < num_blocks; ++i ) { #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_pool_finalize(): block %d: ", ( int )i ); #endif bli_pool_free_block( offset_size, free_fp, &(block_ptrs[i]) ); } #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_pool_finalize(): freeing block_ptrs (length %d): ", ( int )( bli_pool_block_ptrs_len( pool ) ) ); #endif // Free the block_ptrs array. bli_free_intl( block_ptrs ); // This explicit clearing of the pool_t struct is not strictly // necessary and so it has been commented out. #if 0 // Clear the contents of the pool_t struct. bli_pool_set_block_ptrs( NULL, pool ); bli_pool_set_block_ptrs_len( 0, pool ); bli_pool_set_num_blocks( 0, pool ); bli_pool_set_top_index( 0, pool ); bli_pool_set_block_size( 0, pool ); bli_pool_set_align_size( 0, pool ); bli_pool_set_offset_size( 0, pool ); #endif } void bli_pool_reinit ( siz_t num_blocks_new, siz_t block_ptrs_len_new, siz_t block_size_new, siz_t align_size_new, siz_t offset_size_new, pool_t* restrict pool ) { // Preserve the pointers to malloc() and free() provided when the pool // was first initialized. malloc_ft malloc_fp = bli_pool_malloc_fp( pool ); free_ft free_fp = bli_pool_free_fp( pool ); // Finalize the pool as it is currently configured. If some blocks // are still checked out to threads, those blocks are not freed // here, and instead will be freed when the threads attempt to check // those blocks back into the pool. (This condition can be detected // since the block size is encoded into each pblk, which is copied // upon checkout.) bli_pool_finalize( pool ); // Reinitialize the pool with the new parameters, in particular, // the new block size. bli_pool_init ( num_blocks_new, block_ptrs_len_new, block_size_new, align_size_new, offset_size_new, malloc_fp, free_fp, pool ); } void bli_pool_checkout_block ( siz_t req_size, pblk_t* restrict block, pool_t* restrict pool ) { // If the requested block size is smaller than what the pool was // initialized with, reinitialize the pool to contain blocks of the // requested size. if ( bli_pool_block_size( pool ) < req_size ) { const siz_t num_blocks_new = bli_pool_num_blocks( pool ); const siz_t block_ptrs_len_new = bli_pool_block_ptrs_len( pool ); const siz_t align_size_new = bli_pool_align_size( pool ); const siz_t offset_size_new = bli_pool_offset_size( pool ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_pool_checkout_block(): old block size %d < req size %d; " "reiniting.\n", ( int )bli_pool_block_size( pool ), ( int )req_size ); fflush( stdout ); #endif bli_pool_reinit ( num_blocks_new, block_ptrs_len_new, req_size, align_size_new, offset_size_new, pool ); } // If the pool is exhausted, add a block. if ( bli_pool_is_exhausted( pool ) ) { #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_pool_checkout_block(): pool is exhausted (block size %d); " "growing by 1.\n", ( int )bli_pool_block_size( pool ) ); fflush( stdout ); #endif bli_pool_grow( 1, pool ); } // At this point, at least one block is guaranteed to be available. // Query the block_ptrs array. pblk_t* restrict block_ptrs = bli_pool_block_ptrs( pool ); // Query the top_index of the pool. const siz_t top_index = bli_pool_top_index( pool ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_pool_checkout_block(): checking out block %d of size %d " "(align %d).\n", ( int )top_index, ( int )bli_pool_block_size( pool ), ( int )bli_pool_align_size( pool ) ); fflush( stdout ); #endif // Copy the pblk_t at top_index to the caller's pblk_t struct. *block = block_ptrs[ top_index ]; // Notice that we don't actually need to clear the contents of // block_ptrs[top_index]. It will get overwritten eventually when // the block is checked back in. bli_pblk_clear( &block_ptrs[top_index] ); // Increment the pool's top_index. bli_pool_set_top_index( top_index + 1, pool ); } void bli_pool_checkin_block ( pblk_t* restrict block, pool_t* restrict pool ) { // If the pblk_t being checked in was allocated with a different block // size than is currently in use in the pool, we simply free it and // return. These "orphaned" blocks are no longer of use because the pool // has since been reinitialized to a different (larger) block size. if ( bli_pblk_block_size( block ) != bli_pool_block_size( pool ) ) { // Query the offset size of the pool. const siz_t offset_size = bli_pool_offset_size( pool ); // Query the free() function pointer for the pool. free_ft free_fp = bli_pool_free_fp( pool ); bli_pool_free_block( offset_size, free_fp, block ); return; } // Query the block_ptrs array. pblk_t* restrict block_ptrs = bli_pool_block_ptrs( pool ); // Query the top_index of the pool. const siz_t top_index = bli_pool_top_index( pool ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_pool_checkin_block(): checking in block %d of size %d " "(align %d, offset %d).\n", ( int )top_index - 1, ( int )bli_pool_block_size( pool ), ( int )bli_pool_align_size( pool ), ( int )bli_pool_offset_size( pool ) ); fflush( stdout ); #endif // Copy the caller's pblk_t struct to the block at top_index - 1. block_ptrs[ top_index - 1 ] = *block; // Decrement the pool's top_index. bli_pool_set_top_index( top_index - 1, pool ); } void bli_pool_grow ( siz_t num_blocks_add, pool_t* restrict pool ) { // If the requested increase is zero, return early. if ( num_blocks_add == 0 ) return; // Query the allocated length of the block_ptrs array and also the // total number of blocks currently allocated. const siz_t block_ptrs_len_cur = bli_pool_block_ptrs_len( pool ); const siz_t num_blocks_cur = bli_pool_num_blocks( pool ); // Compute the total number of allocated blocks that will exist // after we grow the pool. const siz_t num_blocks_new = num_blocks_cur + num_blocks_add; // If adding num_blocks_add new blocks will exceed the current capacity // of the block_ptrs array, we need to first put in place a new (larger) // array. if ( block_ptrs_len_cur < num_blocks_new ) { // To prevent this from happening often, we double the current // length of the block_ptrs array. const siz_t block_ptrs_len_new = 2 * block_ptrs_len_cur; #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_pool_grow(): growing block_ptrs_len (%d -> %d): ", ( int )block_ptrs_len_cur, ( int )block_ptrs_len_new ); #endif // Query the current block_ptrs array. pblk_t* restrict block_ptrs_cur = bli_pool_block_ptrs( pool ); // Allocate a new block_ptrs array. // FGVZ: Do we want to call malloc_fp() for internal data structures as // well as pool blocks? If so, don't forget to s/bli_free_intl/free_fp/g. pblk_t* restrict block_ptrs_new = bli_malloc_intl( block_ptrs_len_new * sizeof( pblk_t ) ); // Query the top_index of the pool. const siz_t top_index = bli_pool_top_index( pool ); // Copy the contents of the old block_ptrs array to the new/resized // array. Notice that we can begin with top_index since all entries // from 0 to top_index-1 have been (and are currently) checked out // to threads. for ( dim_t i = top_index; i < num_blocks_cur; ++i ) { block_ptrs_new[i] = block_ptrs_cur[i]; } #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_pool_grow(): freeing prev block_ptrs: " ); #endif // Free the old block_ptrs array. bli_free_intl( block_ptrs_cur ); // Update the pool_t struct with the new block_ptrs array and // record its allocated length. bli_pool_set_block_ptrs( block_ptrs_new, pool ); bli_pool_set_block_ptrs_len( block_ptrs_len_new, pool ); } // At this point, we are guaranteed to have enough unused elements // in the block_ptrs array to accommodate an additional num_blocks_add // blocks. // Query the current block_ptrs array (which was mabye just resized). pblk_t* restrict block_ptrs = bli_pool_block_ptrs( pool ); // Query the block size and alignment size of the pool. const siz_t block_size = bli_pool_block_size( pool ); const siz_t align_size = bli_pool_align_size( pool ); const siz_t offset_size = bli_pool_offset_size( pool ); // Query the malloc() function pointer for the pool. malloc_ft malloc_fp = bli_pool_malloc_fp( pool ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_pool_grow(): growing pool from (%d -> %d).\n", ( int )num_blocks_cur, ( int )num_blocks_new ); fflush( stdout ); #endif // Allocate the requested additional blocks in the resized array. for ( dim_t i = num_blocks_cur; i < num_blocks_new; ++i ) { bli_pool_alloc_block ( block_size, align_size, offset_size, malloc_fp, &(block_ptrs[i]) ); } // Update the pool_t struct with the new number of allocated blocks. // Notice that top_index remains unchanged, as do the block_size and // align_size fields. bli_pool_set_num_blocks( num_blocks_new, pool ); } void bli_pool_shrink ( siz_t num_blocks_sub, pool_t* restrict pool ) { // If the requested decrease is zero, return early. if ( num_blocks_sub == 0 ) return; // Query the total number of blocks currently allocated. const siz_t num_blocks = bli_pool_num_blocks( pool ); // Query the top_index of the pool. const siz_t top_index = bli_pool_top_index( pool ); // Compute the number of blocks available to be checked out // (and thus available for removal). const siz_t num_blocks_avail = num_blocks - top_index; // If the requested decrease is more than the number of available // blocks in the pool, only remove the number of blocks actually // available. num_blocks_sub = bli_min( num_blocks_sub, num_blocks_avail ); // Query the block_ptrs array. pblk_t* restrict block_ptrs = bli_pool_block_ptrs( pool ); // Compute the new total number of blocks. const siz_t num_blocks_new = num_blocks - num_blocks_sub; // Query the offset size of the pool. const siz_t offset_size = bli_pool_offset_size( pool ); // Query the free() function pointer for the pool. free_ft free_fp = bli_pool_free_fp( pool ); // Free the individual blocks. for ( dim_t i = num_blocks_new; i < num_blocks; ++i ) { bli_pool_free_block( offset_size, free_fp, &(block_ptrs[i]) ); } // Update the pool_t struct. bli_pool_set_num_blocks( num_blocks_new, pool ); // Note that after shrinking the pool, num_blocks < block_ptrs_len. // This means the pool can grow again by num_blocks_sub before // a re-allocation of block_ptrs is triggered. } void bli_pool_alloc_block ( siz_t block_size, siz_t align_size, siz_t offset_size, malloc_ft malloc_fp, pblk_t* restrict block ) { #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_pool_alloc_block(): calling fmalloc_align(): size %d (align %d, offset %d)\n", ( int )block_size, ( int )align_size, ( int )offset_size ); fflush( stdout ); #endif // Allocate the block via the bli_fmalloc_align() wrapper, which performs // alignment logic and opaquely saves the original pointer so that it can // be recovered when it's time to free the block. Note that we have to // add offset_size to the number of bytes requested since we will skip // that many bytes at the beginning of the allocated memory. void* restrict buf = bli_fmalloc_align( malloc_fp, block_size + offset_size, align_size ); #if 0 // NOTE: This code is disabled because it is not needed, since // bli_fmalloc_align() is guaranteed to return an aligned address. // Advance the pointer to achieve the necessary alignment, if it is not // already aligned. if ( bli_is_unaligned_to( ( siz_t )buf_sys, ( siz_t )align_size ) ) { // C99's stdint.h guarantees that a void* can be safely cast to a // uintptr_t and then back to a void*, hence the casting of buf_sys // and align_size to uintptr_t. buf_align is initially cast to char* // to allow pointer arithmetic in units of bytes, and then advanced // to the next nearest alignment boundary, and finally cast back to // void* before being stored. Notice that the arithmetic works even // if the alignment value is not a power of two. buf_align = ( void* )( ( char* )buf_align + ( ( uintptr_t )align_size - ( uintptr_t )buf_sys % ( uintptr_t )align_size ) ); } #endif // Advance the pointer by offset_size bytes. buf = ( void* )( ( char* )buf + offset_size ); // Save the results in the pblk_t structure. bli_pblk_set_buf( buf, block ); bli_pblk_set_block_size( block_size, block ); } void bli_pool_free_block ( siz_t offset_size, free_ft free_fp, pblk_t* restrict block ) { #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_pool_free_block(): calling ffree_align(): size %d.\n", ( int )bli_pblk_block_size( block ) ); fflush( stdout ); #endif // Extract the pblk_t buffer, which is the aligned address returned from // bli_fmalloc_align() when the block was allocated. void* restrict buf = bli_pblk_buf( block ); // Undo the pointer advancement by offset_size bytes performed previously // by bli_pool_alloc_block(). buf = ( void* )( ( char* )buf - offset_size ); // Free the block via the bli_ffree_align() wrapper, which recovers the // original pointer that was returned by the pool's malloc() function when // the block was allocated. bli_ffree_align( free_fp, buf ); } void bli_pool_print ( pool_t* restrict pool ) { pblk_t* block_ptrs = bli_pool_block_ptrs( pool ); siz_t block_ptrs_len = bli_pool_block_ptrs_len( pool ); siz_t top_index = bli_pool_top_index( pool ); siz_t num_blocks = bli_pool_num_blocks( pool ); siz_t block_size = bli_pool_block_size( pool ); siz_t align_size = bli_pool_align_size( pool ); siz_t offset_size = bli_pool_offset_size( pool ); printf( "pool struct ---------------\n" ); printf( " block_ptrs: %p\n", block_ptrs ); printf( " block_ptrs_len: %d\n", ( int )block_ptrs_len ); printf( " top_index: %d\n", ( int )top_index ); printf( " num_blocks: %d\n", ( int )num_blocks ); printf( " block_size: %d\n", ( int )block_size ); printf( " align_size: %d\n", ( int )align_size ); printf( " offset_size: %d\n", ( int )offset_size ); printf( " pblks sys align\n" ); for ( dim_t i = 0; i < num_blocks; ++i ) { printf( " %d: %p\n", ( int )i, bli_pblk_buf( &block_ptrs[i] ) ); } } void bli_pblk_print ( pblk_t* restrict pblk ) { void* buf = bli_pblk_buf( pblk ); printf( "pblk struct ---------------\n" ); printf( " block address (aligned): %p\n", buf ); } blis-0.6.1/frame/base/bli_pool.h000066400000000000000000000147221360743507500164430ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_POOL_H #define BLIS_POOL_H // -- Pool block type -- /* typedef struct { void* buf; siz_t block_size; } pblk_t; */ // -- Pool type -- /* typedef struct { void* block_ptrs; siz_t block_ptrs_len; siz_t top_index; siz_t num_blocks; siz_t block_size; siz_t align_size; malloc_ft malloc_fp; free_ft free_fp; } pool_t; */ // Pool block query static void* bli_pblk_buf( pblk_t* pblk ) { return pblk->buf; } static siz_t bli_pblk_block_size( pblk_t* pblk ) { return pblk->block_size; } // Pool block modification static void bli_pblk_set_buf( void* buf, pblk_t* pblk ) { pblk->buf = buf; } static void bli_pblk_set_block_size( siz_t block_size, pblk_t* pblk ) { pblk->block_size = block_size; } // // -- pool block initialization ------------------------------------------------ // // NOTE: This initializer macro must be updated whenever fields are added or // removed from the pblk_t type definition. An alternative to the initializer is // calling bli_pblk_clear() at runtime. #define BLIS_PBLK_INITIALIZER \ { \ .buf = NULL, \ .block_size = 0, \ } \ static void bli_pblk_clear( pblk_t* pblk ) { bli_pblk_set_buf( NULL, pblk ); bli_pblk_set_block_size( 0, pblk ); } // Pool entry query static void* bli_pool_block_ptrs( pool_t* pool ) { return pool->block_ptrs; } static siz_t bli_pool_block_ptrs_len( pool_t* pool ) { return pool->block_ptrs_len; } static siz_t bli_pool_num_blocks( pool_t* pool ) { return pool->num_blocks; } static siz_t bli_pool_block_size( pool_t* pool ) { return pool->block_size; } static siz_t bli_pool_align_size( pool_t* pool ) { return pool->align_size; } static siz_t bli_pool_offset_size( pool_t* pool ) { return pool->offset_size; } static malloc_ft bli_pool_malloc_fp( pool_t* pool ) { return pool->malloc_fp; } static free_ft bli_pool_free_fp( pool_t* pool ) { return pool->free_fp; } static siz_t bli_pool_top_index( pool_t* pool ) { return pool->top_index; } static bool_t bli_pool_is_exhausted( pool_t* pool ) { return ( bool_t ) ( bli_pool_top_index( pool ) == bli_pool_num_blocks( pool ) ); } // Pool entry modification static void bli_pool_set_block_ptrs( void* block_ptrs, pool_t* pool ) \ { pool->block_ptrs = block_ptrs; } static void bli_pool_set_block_ptrs_len( siz_t block_ptrs_len, pool_t* pool ) \ { pool->block_ptrs_len = block_ptrs_len; } static void bli_pool_set_num_blocks( siz_t num_blocks, pool_t* pool ) \ { pool->num_blocks = num_blocks; } static void bli_pool_set_block_size( siz_t block_size, pool_t* pool ) \ { pool->block_size = block_size; } static void bli_pool_set_align_size( siz_t align_size, pool_t* pool ) \ { pool->align_size = align_size; } static void bli_pool_set_offset_size( siz_t offset_size, pool_t* pool ) \ { pool->offset_size = offset_size; } static void bli_pool_set_malloc_fp( malloc_ft malloc_fp, pool_t* pool ) \ { pool->malloc_fp = malloc_fp; } static void bli_pool_set_free_fp( free_ft free_fp, pool_t* pool ) \ { pool->free_fp = free_fp; } static void bli_pool_set_top_index( siz_t top_index, pool_t* pool ) \ { pool->top_index = top_index; } // ----------------------------------------------------------------------------- void bli_pool_init ( siz_t num_blocks, siz_t block_ptrs_len, siz_t block_size, siz_t align_size, siz_t offset_size, malloc_ft malloc_fp, free_ft free_fp, pool_t* restrict pool ); void bli_pool_finalize ( pool_t* restrict pool ); void bli_pool_reinit ( siz_t num_blocks_new, siz_t block_ptrs_len_new, siz_t block_size_new, siz_t align_size_new, siz_t offset_size_new, pool_t* restrict pool ); void bli_pool_checkout_block ( siz_t req_size, pblk_t* restrict block, pool_t* restrict pool ); void bli_pool_checkin_block ( pblk_t* restrict block, pool_t* restrict pool ); void bli_pool_grow ( siz_t num_blocks_add, pool_t* restrict pool ); void bli_pool_shrink ( siz_t num_blocks_sub, pool_t* restrict pool ); void bli_pool_alloc_block ( siz_t block_size, siz_t align_size, siz_t offset_size, malloc_ft malloc_fp, pblk_t* restrict block ); void bli_pool_free_block ( siz_t offset_size, free_ft free_fp, pblk_t* restrict block ); void bli_pool_print ( pool_t* restrict pool ); void bli_pblk_print ( pblk_t* restrict pblk ); #endif blis-0.6.1/frame/base/bli_prune.c000066400000000000000000000131041360743507500166070ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_prune_unref_mparts( obj_t* p, mdim_t mdim_p, obj_t* s, mdim_t mdim_s ) { // If the primary object is general, it has no structure, and // therefore, no unreferenced parts. if ( bli_obj_is_general( p ) ) return; // If the primary object is BLIS_ZEROS, set the dimensions so that the // matrix is empty. This is not strictly needed but rather a minor // optimization, as it would prevent threads that would otherwise get // subproblems on BLIS_ZEROS operands from calling the macro-kernel, // because bli_thread_range*() would return empty ranges, which would // cause the variant's for loop from executing any iterations. // NOTE: this should only ever execute if the primary object is // triangular because that is the only structure type with subpartitions // that can be marked as BLIS_ZEROS. if ( bli_obj_is_triangular( p ) && bli_obj_is_zeros( p ) ) { bli_obj_set_dim( mdim_p, 0, p ); bli_obj_set_dim( mdim_s, 0, s ); return; } // If the primary object is hermitian, symmetric, or triangular, we // assume that the unstored region will be unreferenced (otherwise, // the caller should not be invoking this function on that object). //if ( bli_obj_is_herm_or_symm( p ) || // bli_obj_is_triangular( p ) ) { doff_t diagoff_p = bli_obj_diag_offset( p ); dim_t m = bli_obj_length( p ); dim_t n = bli_obj_width( p ); uplo_t uplo = bli_obj_uplo( p ); dim_t off_inc = 0; dim_t q; // Support implicit transposition on p and s. if ( bli_obj_has_trans( p ) ) { bli_reflect_about_diag( &diagoff_p, &uplo, &m, &n ); bli_toggle_dim( &mdim_p ); } if ( bli_obj_has_trans( s ) ) { bli_toggle_dim( &mdim_s ); } // Prune away any zero region of the matrix depending on the // dimension of the primary object being partitioned and the // triangle in which it is stored. if ( bli_obj_is_lower( p ) ) { if ( bli_is_m_dim( mdim_p ) ) { bli_prune_unstored_region_top_l( &diagoff_p, &m, &n, &off_inc ); } else // if ( bli_is_n_dim( mdim_p ) ) { bli_prune_unstored_region_right_l( &diagoff_p, &m, &n, &off_inc ); } } else if ( bli_obj_is_upper( p ) ) { if ( bli_is_m_dim( mdim_p ) ) { bli_prune_unstored_region_bottom_u( &diagoff_p, &m, &n, &off_inc ); } else // if ( bli_is_n_dim( mdim_p ) ) { bli_prune_unstored_region_left_u( &diagoff_p, &m, &n, &off_inc ); } } else if ( bli_obj_is_dense( p ) ) { // Hermitian, symmetric, and triangular matrices are almost // never dense, but if one were found to be dense, it would // have no unreferenced regions to prune. return; } else // if ( bli_obj_is_zeros( p ) ) { // Sanity check. Hermitian/symmetric matrices should never have // zero subpartitions. bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); } // Select the (potentially modified) dimension along which we are // partitioning. if ( bli_is_m_dim( mdim_p ) ) q = m; else /* if ( bli_is_n_dim( mdim_p ) ) */ q = n; // Update the affected objects in case anything changed. Notice that // it is okay to update the dimension and diagonal offset fields of // packed primary objects, as long as we do so in tandem with the // secondary object to maintain conformality. This just means that // the "ignore-able" zero region is skipped over here, rather than // within the macro-kernel. bli_obj_set_diag_offset( diagoff_p, p ); bli_obj_set_dim( mdim_p, q, p ); bli_obj_set_dim( mdim_s, q, s ); // Only update the affected offset fields if the object in question // is NOT a packed object. Otherwise, bli_obj_buffer_at_off() will // compute the wrong address within the macro-kernel object wrapper. if ( !bli_obj_is_packed( p ) ) { bli_obj_inc_off( mdim_p, off_inc, p ); } if ( !bli_obj_is_packed( s ) ) { bli_obj_inc_off( mdim_s, off_inc, s ); } } } blis-0.6.1/frame/base/bli_prune.h000066400000000000000000000033671360743507500166260ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_prune_unref_mparts( obj_t* p, mdim_t mdim_p, obj_t* s, mdim_t mdim_s ); blis-0.6.1/frame/base/bli_query.c000066400000000000000000000131621360743507500166270ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" bool_t bli_obj_equals( obj_t* a, obj_t* b ) { bool_t r_val = FALSE; num_t dt_a; num_t dt_b; num_t dt; // The function is not yet implemented for vectors and matrices. if ( !bli_obj_is_1x1( a ) || !bli_obj_is_1x1( b ) ) bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); dt_a = bli_obj_dt( a ); dt_b = bli_obj_dt( b ); // If B is BLIS_CONSTANT, then we need to test equality based on the // datatype of A--this works even if A is also BLIS_CONSTANT. If B // is a regular non-constant type, then we should use its datatype // to test equality. if ( dt_b == BLIS_CONSTANT ) dt = dt_a; else dt = dt_b; // Now test equality based on the chosen datatype. if ( dt == BLIS_CONSTANT ) { dcomplex* ap_z = bli_obj_buffer_for_const( BLIS_DCOMPLEX, a ); dcomplex* bp_z = bli_obj_buffer_for_const( BLIS_DCOMPLEX, b ); // We only test equality for one datatype (double complex) since // we expect either all fields within the constant to be equal or // none to be equal. Therefore, we can just test one of them. r_val = bli_zeqa( ap_z, bp_z ); } else { void* buf_a = bli_obj_buffer_for_1x1( dt, a ); void* buf_b = bli_obj_buffer_for_1x1( dt, b ); if ( dt == BLIS_FLOAT ) r_val = bli_seqa( buf_a, buf_b ); else if ( dt == BLIS_DOUBLE ) r_val = bli_deqa( buf_a, buf_b ); else if ( dt == BLIS_SCOMPLEX ) r_val = bli_ceqa( buf_a, buf_b ); else if ( dt == BLIS_DCOMPLEX ) r_val = bli_zeqa( buf_a, buf_b ); else if ( dt == BLIS_INT ) r_val = bli_ieqa( buf_a, buf_b ); } return r_val; } bool_t bli_obj_imag_equals( obj_t* a, obj_t* b ) { #if 0 bool_t r_val = FALSE; num_t dt_a; num_t dt_b; dt_a = bli_obj_dt( a ); dt_b = bli_obj_dt( b ); // The function is not yet implemented for vectors and matrices. if ( !bli_obj_is_1x1( a ) || !bli_obj_is_1x1( b ) || bli_is_constant( dt_a ) || bli_is_complex( dt_b ) ) bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); // Handle the special (trivial) case where a is real, in which // case all we have to do is test whether b is zero. if ( bli_is_real( dt_a ) ) { r_val = bli_obj_equals( &BLIS_ZERO, b ); } else // if ( bli_is_complex( dt_a ) ) { num_t dt_a_real = bli_dt_proj_to_real( dt_a ); // Now we compare the imaginary part of a to b. Notice that since // we are using bli_obj_buffer_for_1x1() to acquire the buffer for // b, this works regardless of whether b is BLIS_CONSTANT. if ( dt_a == BLIS_SCOMPLEX ) { scomplex* ap_c = bli_obj_buffer_at_off( a ); float* bp_c = bli_obj_buffer_for_1x1( dt_a_real, b ); r_val = bli_seq( bli_cimag( *ap_c ), *bp_c ); } else if ( dt_a == BLIS_DCOMPLEX ) { dcomplex* ap_z = bli_obj_buffer_at_off( a ); double* bp_z = bli_obj_buffer_for_1x1( dt_a_real, b ); r_val = bli_deq( bli_zimag( *ap_z ), *bp_z ); } } #endif bool_t r_val = FALSE; // The function is not yet implemented for vectors and matrices. if ( !bli_obj_is_1x1( a ) || !bli_obj_is_1x1( b ) || bli_obj_is_complex( b ) ) bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); double a_r, a_i; double b_r, b_i; // Get the real and imaginary parts of a and cast them to local doubles. bli_getsc( a, &a_r, &a_i ); // Get the value of b and cast to a local double. (Note: the imaginary part // of b is ignored since we know b is real.) bli_getsc( b, &b_r, &b_i ); // Compare the imaginary part of a to the real part of b. if ( a_i == b_r ) r_val = TRUE; return r_val; } bool_t bli_obj_imag_is_zero( obj_t* a ) { bool_t r_val = TRUE; // The function is not yet implemented for vectors and matrices. if ( !bli_obj_is_1x1( a ) ) bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); if ( bli_obj_is_complex( a ) ) { double a_r, a_i; // Get the real and imaginary parts and cast them to local doubles. bli_getsc( a, &a_r, &a_i ); // Compare the imaginary part of a to double-precision zero. if ( !bli_deq0( a_i ) ) r_val = FALSE; } return r_val; } blis-0.6.1/frame/base/bli_query.h000066400000000000000000000035061360743507500166350ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ BLIS_EXPORT_BLIS bool_t bli_obj_equals( obj_t* a, obj_t* b ); BLIS_EXPORT_BLIS bool_t bli_obj_imag_equals( obj_t* a, obj_t* b ); BLIS_EXPORT_BLIS bool_t bli_obj_imag_is_zero( obj_t* a ); blis-0.6.1/frame/base/bli_rntm.c000066400000000000000000000172471360743507500164520ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // The global rntm_t structure, which holds the global thread settings // along with a few other key parameters. rntm_t global_rntm; // A mutex to allow synchronous access to global_rntm. bli_pthread_mutex_t global_rntm_mutex = BLIS_PTHREAD_MUTEX_INITIALIZER; // ---------------------------------------------------------------------------- void bli_rntm_init_from_global( rntm_t* rntm ) { // We must ensure that global_rntm has been initialized. bli_init_once(); // Acquire the mutex protecting global_rntm. bli_pthread_mutex_lock( &global_rntm_mutex ); *rntm = global_rntm; // Release the mutex protecting global_rntm. bli_pthread_mutex_unlock( &global_rntm_mutex ); } // ----------------------------------------------------------------------------- void bli_rntm_set_ways_for_op ( opid_t l3_op, side_t side, dim_t m, dim_t n, dim_t k, rntm_t* rntm ) { // Set the number of ways for each loop, if needed, depending on what // kind of information is already stored in the rntm_t object. bli_rntm_set_ways_from_rntm( m, n, k, rntm ); #if 0 printf( "bli_rntm_set_ways_for_op()\n" ); bli_rntm_print( rntm ); #endif // Now modify the number of ways, if necessary, based on the operation. if ( l3_op == BLIS_TRMM || l3_op == BLIS_TRSM ) { dim_t jc = bli_rntm_jc_ways( rntm ); dim_t pc = bli_rntm_pc_ways( rntm ); dim_t ic = bli_rntm_ic_ways( rntm ); dim_t jr = bli_rntm_jr_ways( rntm ); dim_t ir = bli_rntm_ir_ways( rntm ); // Notice that, if we do need to update the ways, we don't need to // update the num_threads field since we only reshuffle where the // parallelism is extracted, not the total amount of parallelism. if ( l3_op == BLIS_TRMM ) { // We reconfigure the parallelism extracted from trmm_r due to a // dependency in the jc loop. (NOTE: This dependency does not exist // for trmm3.) if ( bli_is_left( side ) ) { bli_rntm_set_ways_only ( jc, pc, ic, jr, ir, rntm ); } else // if ( bli_is_right( side ) ) { bli_rntm_set_ways_only ( 1, pc, ic, jr * jc, ir, rntm ); } } else if ( l3_op == BLIS_TRSM ) { //printf( "bli_rntm_set_ways_for_op(): jc%d ic%d jr%d\n", (int)jc, (int)ic, (int)jr ); if ( bli_is_left( side ) ) { bli_rntm_set_ways_only ( jc, 1, ic * pc, jr * ir, 1, rntm ); } else // if ( bli_is_right( side ) ) { bli_rntm_set_ways_only ( 1, 1, ic * pc * jc * ir * jr, 1, 1, rntm ); } } } } void bli_rntm_set_ways_from_rntm ( dim_t m, dim_t n, dim_t k, rntm_t* rntm ) { dim_t nt = bli_rntm_num_threads( rntm ); dim_t jc = bli_rntm_jc_ways( rntm ); dim_t pc = bli_rntm_pc_ways( rntm ); dim_t ic = bli_rntm_ic_ways( rntm ); dim_t jr = bli_rntm_jr_ways( rntm ); dim_t ir = bli_rntm_ir_ways( rntm ); #ifdef BLIS_ENABLE_MULTITHREADING bool_t nt_set = FALSE; bool_t ways_set = FALSE; // If the rntm was fed in as a copy of the global runtime via // bli_rntm_init_from_global(), we know that either the num_threads // field will be set and all of the ways unset, or vice versa. // However, we can't be sure that a user-provided rntm_t isn't // initialized uncleanly. So here we have to enforce some rules // to get the rntm_t into a predictable state. // First, we establish whether or not the number of threads is set. if ( nt > 0 ) nt_set = TRUE; // Next, we establish whether or not any of the ways of parallelism // for each loop were set. If any of the ways are set (positive), we // then we assume the user wanted to use those positive values and // default the non-positive values to 1. if ( jc > 0 || pc > 0 || ic > 0 || jr > 0 || ir > 0 ) { ways_set = TRUE; if ( jc < 1 ) jc = 1; if ( pc < 1 ) pc = 1; if ( ic < 1 ) ic = 1; if ( jr < 1 ) jr = 1; if ( ir < 1 ) ir = 1; } // Now we use the values of nt_set and ways_set to determine how to // interpret the original values we found in the rntm_t object. if ( ways_set == TRUE ) { // If the ways were set, then we use the values that were given // and interpreted above (we set any non-positive value to 1). // The only thing left to do is calculate the correct number of // threads. nt = jc * pc * ic * jr * ir; } else if ( ways_set == FALSE && nt_set == TRUE ) { // If the ways were not set but the number of threas was set, then // we attempt to automatically generate a thread factorization that // will work given the problem size. Thus, here we only set the // ways and leave the number of threads unchanged. pc = 1; bli_partition_2x2( nt, m*BLIS_THREAD_RATIO_M, n*BLIS_THREAD_RATIO_N, &ic, &jc ); for ( ir = BLIS_THREAD_MAX_IR ; ir > 1 ; ir-- ) { if ( ic % ir == 0 ) { ic /= ir; break; } } for ( jr = BLIS_THREAD_MAX_JR ; jr > 1 ; jr-- ) { if ( jc % jr == 0 ) { jc /= jr; break; } } } else // if ( ways_set == FALSE && nt_set == FALSE ) { // If neither the ways nor the number of threads were set, then // the rntm was not meaningfully changed since initialization, // and thus we'll default to single-threaded execution. nt = 1; jc = pc = ic = jr = ir = 1; } #else // When multithreading is disabled, always set the rntm_t ways // values to 1. nt = 1; jc = pc = ic = jr = ir = 1; #endif // Save the results back in the runtime object. bli_rntm_set_num_threads_only( nt, rntm ); bli_rntm_set_ways_only( jc, pc, ic, jr, ir, rntm ); } void bli_rntm_print ( rntm_t* rntm ) { dim_t nt = bli_rntm_num_threads( rntm ); dim_t jc = bli_rntm_jc_ways( rntm ); dim_t pc = bli_rntm_pc_ways( rntm ); dim_t ic = bli_rntm_ic_ways( rntm ); dim_t jr = bli_rntm_jr_ways( rntm ); dim_t ir = bli_rntm_ir_ways( rntm ); printf( "rntm contents nt jc pc ic jr ir\n" ); printf( " %4d%4d%4d%4d%4d%4d\n", (int)nt, (int)jc, (int)pc, (int)ic, (int)jr, (int)ir ); } blis-0.6.1/frame/base/bli_rntm.h000066400000000000000000000222701360743507500164470ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2016, Hewlett Packard Enterprise Development LP Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_RNTM_H #define BLIS_RNTM_H // Runtime object type (defined in bli_type_defs.h) /* typedef struct rntm_s { dim_t num_threads; dim_t* thrloop; dim_t pack_a; dim_t pack_b; bool_t l3_sup; pool_t* sba_pool; membrk_t* membrk; } rntm_t; */ // // -- rntm_t query (public API) ------------------------------------------------ // static dim_t bli_rntm_num_threads( rntm_t* rntm ) { return rntm->num_threads; } static dim_t bli_rntm_ways_for( bszid_t bszid, rntm_t* rntm ) { return rntm->thrloop[ bszid ]; } static dim_t bli_rntm_jc_ways( rntm_t* rntm ) { return bli_rntm_ways_for( BLIS_NC, rntm ); } static dim_t bli_rntm_pc_ways( rntm_t* rntm ) { return bli_rntm_ways_for( BLIS_KC, rntm ); } static dim_t bli_rntm_ic_ways( rntm_t* rntm ) { return bli_rntm_ways_for( BLIS_MC, rntm ); } static dim_t bli_rntm_jr_ways( rntm_t* rntm ) { return bli_rntm_ways_for( BLIS_NR, rntm ); } static dim_t bli_rntm_ir_ways( rntm_t* rntm ) { return bli_rntm_ways_for( BLIS_MR, rntm ); } static dim_t bli_rntm_pr_ways( rntm_t* rntm ) { return bli_rntm_ways_for( BLIS_KR, rntm ); } static bool_t bli_rntm_pack_a( rntm_t* rntm ) { return rntm->pack_a; } static bool_t bli_rntm_pack_b( rntm_t* rntm ) { return rntm->pack_b; } static bool_t bli_rntm_l3_sup( rntm_t* rntm ) { return rntm->l3_sup; } // // -- rntm_t query (internal use only) ----------------------------------------- // static pool_t* bli_rntm_sba_pool( rntm_t* rntm ) { return rntm->sba_pool; } static membrk_t* bli_rntm_membrk( rntm_t* rntm ) { return rntm->membrk; } static dim_t bli_rntm_equals( rntm_t* rntm1, rntm_t* rntm2 ) { const bool_t nt = bli_rntm_num_threads( rntm1 ) == bli_rntm_num_threads( rntm2 ); const bool_t jc = bli_rntm_jc_ways( rntm1 ) == bli_rntm_jc_ways( rntm2 ); const bool_t pc = bli_rntm_pc_ways( rntm1 ) == bli_rntm_pc_ways( rntm2 ); const bool_t ic = bli_rntm_ic_ways( rntm1 ) == bli_rntm_ic_ways( rntm2 ); const bool_t jr = bli_rntm_jr_ways( rntm1 ) == bli_rntm_jr_ways( rntm2 ); const bool_t ir = bli_rntm_ir_ways( rntm1 ) == bli_rntm_ir_ways( rntm2 ); const bool_t pr = bli_rntm_pr_ways( rntm1 ) == bli_rntm_pr_ways( rntm2 ); if ( nt && jc && pc && ic && jr && ir && pr ) return TRUE; else return FALSE; } // // -- rntm_t modification (internal use only) ---------------------------------- // static void bli_rntm_set_num_threads_only( dim_t nt, rntm_t* rntm ) { rntm->num_threads = nt; } static void bli_rntm_set_ways_for_only( bszid_t loop, dim_t n_ways, rntm_t* rntm ) { rntm->thrloop[ loop ] = n_ways; } static void bli_rntm_set_jc_ways_only( dim_t ways, rntm_t* rntm ) { bli_rntm_set_ways_for_only( BLIS_NC, ways, rntm ); } static void bli_rntm_set_pc_ways_only( dim_t ways, rntm_t* rntm ) { bli_rntm_set_ways_for_only( BLIS_KC, ways, rntm ); } static void bli_rntm_set_ic_ways_only( dim_t ways, rntm_t* rntm ) { bli_rntm_set_ways_for_only( BLIS_MC, ways, rntm ); } static void bli_rntm_set_jr_ways_only( dim_t ways, rntm_t* rntm ) { bli_rntm_set_ways_for_only( BLIS_NR, ways, rntm ); } static void bli_rntm_set_ir_ways_only( dim_t ways, rntm_t* rntm ) { bli_rntm_set_ways_for_only( BLIS_MR, ways, rntm ); } static void bli_rntm_set_pr_ways_only( dim_t ways, rntm_t* rntm ) { bli_rntm_set_ways_for_only( BLIS_KR, ways, rntm ); } static void bli_rntm_set_ways_only( dim_t jc, dim_t pc, dim_t ic, dim_t jr, dim_t ir, rntm_t* rntm ) { // Record the number of ways of parallelism per loop. bli_rntm_set_jc_ways_only( jc, rntm ); bli_rntm_set_pc_ways_only( pc, rntm ); bli_rntm_set_ic_ways_only( ic, rntm ); bli_rntm_set_jr_ways_only( jr, rntm ); bli_rntm_set_ir_ways_only( ir, rntm ); bli_rntm_set_pr_ways_only( 1, rntm ); } static void bli_rntm_set_sba_pool( pool_t* sba_pool, rntm_t* rntm ) { rntm->sba_pool = sba_pool; } static void bli_rntm_set_membrk( membrk_t* membrk, rntm_t* rntm ) { rntm->membrk = membrk; } static void bli_rntm_clear_num_threads_only( rntm_t* rntm ) { bli_rntm_set_num_threads_only( -1, rntm ); } static void bli_rntm_clear_ways_only( rntm_t* rntm ) { bli_rntm_set_ways_only( -1, -1, -1, -1, -1, rntm ); } static void bli_rntm_clear_sba_pool( rntm_t* rntm ) { bli_rntm_set_sba_pool( NULL, rntm ); } static void bli_rntm_clear_membrk( rntm_t* rntm ) { bli_rntm_set_membrk( NULL, rntm ); } // // -- rntm_t modification (public API) ----------------------------------------- // static void bli_rntm_set_num_threads( dim_t nt, rntm_t* rntm ) { // Record the total number of threads to use. bli_rntm_set_num_threads_only( nt, rntm ); // Set the individual ways of parallelism to default states. bli_rntm_clear_ways_only( rntm ); } static void bli_rntm_set_ways( dim_t jc, dim_t pc, dim_t ic, dim_t jr, dim_t ir, rntm_t* rntm ) { // Record the number of ways of parallelism per loop. bli_rntm_set_jc_ways_only( jc, rntm ); bli_rntm_set_pc_ways_only( pc, rntm ); bli_rntm_set_ic_ways_only( ic, rntm ); bli_rntm_set_jr_ways_only( jr, rntm ); bli_rntm_set_ir_ways_only( ir, rntm ); bli_rntm_set_pr_ways_only( 1, rntm ); // Set the num_threads field to a default state. bli_rntm_clear_num_threads_only( rntm ); } static void bli_rntm_set_pack_a( bool_t pack_a, rntm_t* rntm ) { // Set the bool_t indicating whether matrix A should be packed. rntm->pack_a = pack_a; } static void bli_rntm_set_pack_b( bool_t pack_b, rntm_t* rntm ) { // Set the bool_t indicating whether matrix B should be packed. rntm->pack_b = pack_b; } static void bli_rntm_set_l3_sup( bool_t l3_sup, rntm_t* rntm ) { // Set the bool_t indicating whether level-3 sup handling is enabled. rntm->l3_sup = l3_sup; } static void bli_rntm_enable_l3_sup( rntm_t* rntm ) { bli_rntm_set_l3_sup( TRUE, rntm ); } static void bli_rntm_disable_l3_sup( rntm_t* rntm ) { bli_rntm_set_l3_sup( FALSE, rntm ); } // // -- rntm_t modification (internal use only) ---------------------------------- // static void bli_rntm_clear_pack_a( rntm_t* rntm ) { bli_rntm_set_pack_a( TRUE, rntm ); } static void bli_rntm_clear_pack_b( rntm_t* rntm ) { bli_rntm_set_pack_b( TRUE, rntm ); } static void bli_rntm_clear_l3_sup( rntm_t* rntm ) { bli_rntm_set_l3_sup( TRUE, rntm ); } // // -- rntm_t initialization ---------------------------------------------------- // // NOTE: Initialization is not necessary as long the user calls at least ONE // of the public "set" accessors, each of which guarantees that the rntm_t // will be in a good state upon return. #define BLIS_RNTM_INITIALIZER \ { \ .num_threads = -1, \ .thrloop = { -1, -1, -1, -1, -1, -1 }, \ .pack_a = TRUE, \ .pack_b = TRUE, \ .l3_sup = TRUE \ .sba_pool = NULL, \ .membrk = NULL, \ } \ static void bli_rntm_init( rntm_t* rntm ) { bli_rntm_clear_num_threads_only( rntm ); bli_rntm_clear_ways_only( rntm ); bli_rntm_clear_pack_a( rntm ); bli_rntm_clear_pack_b( rntm ); bli_rntm_clear_l3_sup( rntm ); bli_rntm_clear_sba_pool( rntm ); bli_rntm_clear_membrk( rntm ); } // ----------------------------------------------------------------------------- // Function prototypes BLIS_EXPORT_BLIS void bli_rntm_init_from_global( rntm_t* rntm ); BLIS_EXPORT_BLIS void bli_rntm_set_ways_for_op ( opid_t l3_op, side_t side, dim_t m, dim_t n, dim_t k, rntm_t* rntm ); void bli_rntm_set_ways_from_rntm ( dim_t m, dim_t n, dim_t k, rntm_t* rntm ); void bli_rntm_print ( rntm_t* rntm ); #endif blis-0.6.1/frame/base/bli_sba.c000066400000000000000000000116601360743507500162300ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // The small block allocator: an apool_t of array_t of pool_t. static apool_t sba; apool_t* bli_sba_query( void ) { return &sba; } // ----------------------------------------------------------------------------- void bli_sba_init( void ) { bli_apool_init( &sba ); } void bli_sba_finalize( void ) { bli_apool_finalize( &sba ); } void* bli_sba_acquire ( rntm_t* restrict rntm, siz_t req_size ) { void* block; #ifdef BLIS_ENABLE_SBA_POOLS if ( rntm == NULL ) { block = bli_malloc_intl( req_size ); } else { pblk_t pblk; // Query the small block pool from the rntm. pool_t* restrict pool = bli_rntm_sba_pool( rntm ); // Query the block_size of the pool_t so that we can request the exact // size present. const siz_t block_size = bli_pool_block_size( pool ); // Sanity check: Make sure the requested size is no larger than the // block_size field of the pool. if ( block_size < req_size ) { printf( "bli_sba_acquire(): ** pool block_size is %d but req_size is %d.\n", ( int )block_size, ( int )req_size ); bli_abort(); } // Check out a block using the block_size queried above. bli_pool_checkout_block( block_size, &pblk, pool ); // The block address is stored within the pblk_t. block = bli_pblk_buf( &pblk ); } #else block = bli_malloc_intl( req_size ); #endif // Return the address obtained from the pblk_t. return block; } void bli_sba_release ( rntm_t* restrict rntm, void* restrict block ) { #ifdef BLIS_ENABLE_SBA_POOLS if ( rntm == NULL ) { bli_free_intl( block ); } else { pblk_t pblk; // Query the small block pool from the rntm. pool_t* restrict pool = bli_rntm_sba_pool( rntm ); // Query the block_size field from the pool. This is not super-important // for this particular application of the pool_t (that is, the "leaf" // component of the sba), but it seems like good housekeeping to maintain // the block_size field of the pblk_t in case its ever needed/read. const siz_t block_size = bli_pool_block_size( pool ); // Embed the block's memory address into a pblk_t, along with the // block_size queried from the pool. bli_pblk_set_buf( block, &pblk ); bli_pblk_set_block_size( block_size, &pblk ); // Check the pblk_t back into the pool_t. (It's okay that the pblk_t is // a local variable since its contents are copied into the pool's internal // data structure--an array of pblk_t.) bli_pool_checkin_block( &pblk, pool ); } #else bli_free_intl( block ); #endif } array_t* bli_sba_checkout_array ( const siz_t n_threads ) { #ifndef BLIS_ENABLE_SBA_POOLS return NULL; #endif return bli_apool_checkout_array( n_threads, &sba ); } void bli_sba_checkin_array ( array_t* restrict array ) { #ifndef BLIS_ENABLE_SBA_POOLS return; #endif bli_apool_checkin_array( array, &sba ); } void bli_sba_rntm_set_pool ( siz_t index, array_t* restrict array, rntm_t* restrict rntm ) { #ifndef BLIS_ENABLE_SBA_POOLS bli_rntm_set_sba_pool( NULL, rntm ); return; #endif // Query the pool_t* in the array_t corresponding to index. pool_t* restrict pool = bli_apool_array_elem( index, array ); // Embed the pool_t* into the rntm_t. bli_rntm_set_sba_pool( pool, rntm ); } blis-0.6.1/frame/base/bli_sba.h000066400000000000000000000045161360743507500162370ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SBA_H #define BLIS_SBA_H apool_t* bli_sba_query( void ); // ----------------------------------------------------------------------------- void bli_sba_init( void ); void bli_sba_finalize( void ); array_t* bli_sba_checkout_array ( const siz_t n_threads ); void bli_sba_checkin_array ( array_t* restrict array ); void bli_sba_rntm_set_pool ( siz_t index, array_t* restrict array, rntm_t* restrict rntm ); void* bli_sba_acquire ( rntm_t* restrict rntm, siz_t req_size ); void bli_sba_release ( rntm_t* restrict rntm, void* restrict block ); #endif blis-0.6.1/frame/base/bli_setgetij.c000066400000000000000000000117761360743507500173110ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" typedef void (*setijm_fp) ( double ar, double ai, dim_t i, dim_t j, void* restrict b, inc_t rs, inc_t cs ); static setijm_fp GENARRAY(ftypes_setijm,setijm); err_t bli_setijm ( double ar, double ai, dim_t i, dim_t j, obj_t* b ) { dim_t m = bli_obj_length( b ); dim_t n = bli_obj_width( b ); dim_t rs = bli_obj_row_stride( b ); dim_t cs = bli_obj_col_stride( b ); num_t dt = bli_obj_dt( b ); // Return error if i or j is beyond bounds of matrix/vector. if ( m <= i ) return BLIS_FAILURE; if ( n <= j ) return BLIS_FAILURE; // Don't modify scalar constants. if ( dt == BLIS_CONSTANT ) return BLIS_FAILURE; // Query the pointer to the buffer at the adjusted offsets. void* b_p = bli_obj_buffer_at_off( b ); // Index into the function pointer array. setijm_fp f = ftypes_setijm[ dt ]; // Invoke the type-specific function. f ( ar, ai, i, j, b_p, rs, cs ); return BLIS_SUCCESS; } #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ double ar, \ double ai, \ dim_t i, \ dim_t j, \ void* restrict b, inc_t rs, inc_t cs \ ) \ { \ ctype* restrict b_cast = ( ctype* )b; \ \ ctype* restrict b_ij = b_cast + (i )*rs + (j )*cs; \ \ PASTEMAC2(z,ch,sets)( ar, ai, *b_ij ); \ } INSERT_GENTFUNC_BASIC0( setijm ) // ----------------------------------------------------------------------------- typedef void (*getijm_fp) ( dim_t i, dim_t j, void* restrict b, inc_t rs, inc_t cs, double* ar, double* ai ); static getijm_fp GENARRAY(ftypes_getijm,getijm); err_t bli_getijm ( dim_t i, dim_t j, obj_t* b, double* ar, double* ai ) { dim_t m = bli_obj_length( b ); dim_t n = bli_obj_width( b ); dim_t rs = bli_obj_row_stride( b ); dim_t cs = bli_obj_col_stride( b ); num_t dt = bli_obj_dt( b ); // Return error if i or j is beyond bounds of matrix/vector. if ( m <= i ) return BLIS_FAILURE; if ( n <= j ) return BLIS_FAILURE; void* b_p; #if 0 // Handle scalar constants separately. if ( dt == BLIS_CONSTANT ) { if ( i == 0 && j == 0 ) { dt = BLIS_DCOMPLEX; b_p = bli_obj_buffer_for_const( dt, b ) } else return BLIS_FAILURE; } else { // Query the pointer to the buffer at the adjusted offsets. b_p = bli_obj_buffer_at_off( b ); } #else // Disallow access into scalar constants. if ( dt == BLIS_CONSTANT ) return BLIS_FAILURE; // Query the pointer to the buffer at the adjusted offsets. b_p = bli_obj_buffer_at_off( b ); #endif // Index into the function pointer array. getijm_fp f = ftypes_getijm[ dt ]; // Invoke the type-specific function. f ( i, j, b_p, rs, cs, ar, ai ); return BLIS_SUCCESS; } #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ dim_t i, \ dim_t j, \ void* restrict b, inc_t rs, inc_t cs, \ double* ar, \ double* ai \ ) \ { \ ctype* restrict b_cast = ( ctype* )b; \ \ ctype* restrict b_ij = b_cast + (i )*rs + (j )*cs; \ \ PASTEMAC2(ch,z,gets)( *b_ij, *ar, *ai ); \ } INSERT_GENTFUNC_BASIC0( getijm ) blis-0.6.1/frame/base/bli_setgetij.h000066400000000000000000000051541360743507500173070ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ BLIS_EXPORT_BLIS err_t bli_setijm ( double ar, double ai, dim_t i, dim_t j, obj_t* b ); #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(ch,opname) \ ( \ double ar, \ double ai, \ dim_t i, \ dim_t j, \ void* restrict b, inc_t rs, inc_t cs \ ); INSERT_GENTPROT_BASIC0( setijm ) // ----------------------------------------------------------------------------- BLIS_EXPORT_BLIS err_t bli_getijm ( dim_t i, dim_t j, obj_t* b, double* ar, double* ai ); #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(ch,opname) \ ( \ dim_t i, \ dim_t j, \ void* restrict b, inc_t rs, inc_t cs, \ double* ar, \ double* ai \ ); INSERT_GENTPROT_BASIC0( getijm ) blis-0.6.1/frame/base/bli_setri.c000066400000000000000000000113361360743507500166110ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // -- setr --------------------------------------------------------------------- void bli_setrm ( obj_t* alpha, obj_t* b ) { obj_t alpha_real; obj_t br; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_setm_check( alpha, b ); // Initialize a local scalar, alpha_real, using the real projection // of the datatype of b. bli_obj_scalar_init_detached( bli_obj_dt_proj_to_real( b ), &alpha_real ); // Copy/typecast alpha to alpha_real. This discards the imaginary // part of alpha (if it is complex). bli_copysc( alpha, &alpha_real ); // Acquire an alias to the real part of b. bli_obj_real_part( b, &br ); // Use setm to set the real part of b to alpha_real. bli_setm( &alpha_real, &br ); } void bli_setrv ( obj_t* alpha, obj_t* x ) { obj_t alpha_real; obj_t xr; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_setv_check( alpha, x ); // Initialize a local scalar, alpha_real, using the real projection // of the datatype of x. bli_obj_scalar_init_detached( bli_obj_dt_proj_to_real( x ), &alpha_real ); // Copy/typecast alpha to alpha_real. This discards the imaginary // part of alpha (if it is complex). bli_copysc( alpha, &alpha_real ); // Acquire an alias to the real part of x. bli_obj_real_part( x, &xr ); // Use setv to set the real part of x to alpha_real. bli_setv( &alpha_real, &xr ); } // -- seti --------------------------------------------------------------------- void bli_setim ( obj_t* alpha, obj_t* b ) { obj_t alpha_real; obj_t bi; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_setm_check( alpha, b ); // If the object is real, return early. if ( bli_obj_is_real( b ) ) return; // Initialize a local scalar, alpha_real, using the real projection // of the datatype of b. bli_obj_scalar_init_detached( bli_obj_dt_proj_to_real( b ), &alpha_real ); // Copy/typecast alpha to alpha_real. This discards the imaginary // part of alpha (if it is complex). bli_copysc( alpha, &alpha_real ); // Acquire an alias to the imaginary part of b. bli_obj_imag_part( b, &bi ); // Use setm to set the imaginary part of b to alpha_real. bli_setm( &alpha_real, &bi ); } void bli_setiv ( obj_t* alpha, obj_t* x ) { obj_t alpha_real; obj_t xi; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_setv_check( alpha, x ); // If the object is real, return early. if ( bli_obj_is_real( x ) ) return; // Initialize a local scalar, alpha_real, using the real projection // of the datatype of x. bli_obj_scalar_init_detached( bli_obj_dt_proj_to_real( x ), &alpha_real ); // Copy/typecast alpha to alpha_real. This discards the imaginary // part of alpha (if it is complex). bli_copysc( alpha, &alpha_real ); // Acquire an alias to the imaginary part of x. bli_obj_imag_part( x, &xi ); // Use setm to set the imaginary part of x to alpha_real. bli_setm( &alpha_real, &xi ); } blis-0.6.1/frame/base/bli_setri.h000066400000000000000000000042011360743507500166070ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // -- setr --------------------------------------------------------------------- BLIS_EXPORT_BLIS void bli_setrm ( obj_t* alpha, obj_t* b ); BLIS_EXPORT_BLIS void bli_setrv ( obj_t* alpha, obj_t* x ); // -- seti --------------------------------------------------------------------- BLIS_EXPORT_BLIS void bli_setim ( obj_t* alpha, obj_t* b ); BLIS_EXPORT_BLIS void bli_setiv ( obj_t* alpha, obj_t* x ); blis-0.6.1/frame/base/bli_string.c000066400000000000000000000035621360743507500167730ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_string_mkupper( char* s ) { // Convert the string to uppercase. for ( ; *s != '\0'; s++ ) { // Convert to unsigned in case one of the chars is negative. *s = toupper( ( unsigned char ) *s ); } } blis-0.6.1/frame/base/bli_string.h000066400000000000000000000032551360743507500167770ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_string_mkupper( char* s ); blis-0.6.1/frame/base/bli_winsys.c000066400000000000000000000045111360743507500170140ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #ifdef _MSC_VER #include #else #include #endif #if 0 // NOTE: This function is no longer needed by BLIS since BLIS no longer // makes any attempt to change environment variables; rather, it only // reads them. We can keep it here for some time before removing it, // though. int bli_setenv( const char *name, const char *value, int overwrite ) { #ifdef _MSC_VER // Windows. _putenv_s( name, value ); #else // Everything else: Linux, OS X, etc. setenv( name, value, overwrite ); #endif } #endif void bli_sleep( unsigned int secs ) { #ifdef _MSC_VER // Windows. Sleep( secs * 1000 ); #else // Everything else: Linux, OS X, etc. sleep( secs ); #endif } blis-0.6.1/frame/base/bli_winsys.h000066400000000000000000000034111360743507500170170ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ //int bli_setenv( const char *name, const char *value, int overwrite ); BLIS_EXPORT_BLIS void bli_sleep( unsigned int secs ); blis-0.6.1/frame/base/cast/000077500000000000000000000000001360743507500154175ustar00rootroot00000000000000blis-0.6.1/frame/base/cast/bli_castm.c000066400000000000000000000153071360743507500175260ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // NOTE: This is one of the few functions in BLIS that is defined // with heterogeneous type support. This is done so that we have // an operation that can be used to typecast (copy-cast) a matrix // of one datatype to a scalar of another datatype. typedef void (*FUNCPTR_T) ( trans_t transa, dim_t m, dim_t n, void* restrict a, inc_t rs_a, inc_t cs_a, void* restrict b, inc_t rs_b, inc_t cs_b ); static FUNCPTR_T GENARRAY2_ALL(ftypes,castm); // // Define object-based interface. // void bli_castm ( obj_t* a, obj_t* b ) { num_t dt_a = bli_obj_dt( a ); num_t dt_b = bli_obj_dt( b ); trans_t transa = bli_obj_conjtrans_status( a ); dim_t m = bli_obj_length( b ); dim_t n = bli_obj_width( b ); void* buf_a = bli_obj_buffer_at_off( a ); inc_t rs_a = bli_obj_row_stride( a ); inc_t cs_a = bli_obj_col_stride( a ); void* buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b = bli_obj_row_stride( b ); inc_t cs_b = bli_obj_col_stride( b ); FUNCPTR_T f; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_castm_check( a, b ); #if 0 if ( bli_obj_dt( a ) == bli_obj_dt( b ) ) { // If a and b share the same datatype, we can simply use copym. bli_copym( a, b ); return; } #endif // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_a][dt_b]; // Invoke the void pointer-based function. f ( transa, m, n, buf_a, rs_a, cs_a, buf_b, rs_b, cs_b ); } // ----------------------------------------------------------------------------- // // Define BLAS-like interfaces with typed operands. // #undef GENTFUNC2 #define GENTFUNC2( ctype_a, ctype_b, cha, chb, opname ) \ \ void PASTEMAC2(cha,chb,opname) \ ( \ trans_t transa, \ dim_t m, \ dim_t n, \ void* restrict a, inc_t rs_a, inc_t cs_a, \ void* restrict b, inc_t rs_b, inc_t cs_b \ ) \ { \ ctype_a* restrict a_cast = a; \ ctype_b* restrict b_cast = b; \ conj_t conja; \ dim_t n_iter; \ dim_t n_elem; \ inc_t lda, inca; \ inc_t ldb, incb; \ dim_t j, i; \ \ /* Set various loop parameters. */ \ bli_set_dims_incs_2m \ ( \ transa, \ m, n, rs_a, cs_a, rs_b, cs_b, \ &n_elem, &n_iter, &inca, &lda, &incb, &ldb \ ); \ \ /* Extract the conjugation component from the transa parameter. */ \ conja = bli_extract_conj( transa ); \ \ if ( bli_is_conj( conja ) ) \ { \ if ( inca == 1 && incb == 1 ) \ { \ for ( j = 0; j < n_iter; ++j ) \ { \ ctype_a* restrict a1 = a_cast + (j )*lda + (0 )*inca; \ ctype_b* restrict b1 = b_cast + (j )*ldb + (0 )*incb; \ \ for ( i = 0; i < n_elem; ++i ) \ { \ PASTEMAC2(cha,chb,copyjs)( a1[i], b1[i] ); \ } \ } \ } \ else \ { \ for ( j = 0; j < n_iter; ++j ) \ { \ ctype_a* restrict a1 = a_cast + (j )*lda + (0 )*inca; \ ctype_b* restrict b1 = b_cast + (j )*ldb + (0 )*incb; \ \ for ( i = 0; i < n_elem; ++i ) \ { \ PASTEMAC2(cha,chb,copyjs)( *a1, *b1 ); \ \ a1 += inca; \ b1 += incb; \ } \ } \ } \ } \ else \ { \ if ( inca == 1 && incb == 1 ) \ { \ for ( j = 0; j < n_iter; ++j ) \ { \ ctype_a* restrict a1 = a_cast + (j )*lda + (0 )*inca; \ ctype_b* restrict b1 = b_cast + (j )*ldb + (0 )*incb; \ \ for ( i = 0; i < n_elem; ++i ) \ { \ PASTEMAC2(cha,chb,copys)( a1[i], b1[i] ); \ } \ } \ } \ else \ { \ for ( j = 0; j < n_iter; ++j ) \ { \ ctype_a* restrict a1 = a_cast + (j )*lda + (0 )*inca; \ ctype_b* restrict b1 = b_cast + (j )*ldb + (0 )*incb; \ \ for ( i = 0; i < n_elem; ++i ) \ { \ PASTEMAC2(cha,chb,copys)( *a1, *b1 ); \ \ a1 += inca; \ b1 += incb; \ } \ } \ } \ } \ } INSERT_GENTFUNC2_BASIC0( castm ) INSERT_GENTFUNC2_MIXDP0( castm ) // ----------------------------------------------------------------------------- // // Define object-based _check() function. // void bli_castm_check ( obj_t* a, obj_t* b ) { err_t e_val; // Check object datatypes. e_val = bli_check_floating_object( a ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( b ); bli_check_error_code( e_val ); // Check structure. // NOTE: We enforce general structure for now in order to simplify the // implementation. bli_check_general_object( a ); bli_check_error_code( e_val ); bli_check_general_object( b ); bli_check_error_code( e_val ); // Check object dimensions. e_val = bli_check_matrix_object( a ); bli_check_error_code( e_val ); e_val = bli_check_matrix_object( b ); bli_check_error_code( e_val ); e_val = bli_check_conformal_dims( a, b ); bli_check_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( a ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( b ); bli_check_error_code( e_val ); } blis-0.6.1/frame/base/cast/bli_castm.h000066400000000000000000000044721360743507500175340ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype object-based interface. // BLIS_EXPORT_BLIS void bli_castm ( obj_t* a, obj_t* b ); // // Prototype BLAS-like interfaces with heterogeneous-typed operands. // #undef GENTPROT2 #define GENTPROT2( ctype_a, ctype_b, cha, chb, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(cha,chb,opname) \ ( \ trans_t transa, \ dim_t m, \ dim_t n, \ void* a, inc_t rs_a, inc_t cs_a, \ void* b, inc_t rs_b, inc_t cs_b \ ); INSERT_GENTPROT2_BASIC0( castm ) INSERT_GENTPROT2_MIXDP0( castm ) // // Prototype object-based _check() function. // void bli_castm_check ( obj_t* a, obj_t* b ); blis-0.6.1/frame/base/cast/bli_castnzm.c000066400000000000000000000153331360743507500200750ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // NOTE: This is one of the few functions in BLIS that is defined // with heterogeneous type support. This is done so that we have // an operation that can be used to typecast (copy-cast) a matrix // of one datatype to a scalar of another datatype. typedef void (*FUNCPTR_T) ( trans_t transa, dim_t m, dim_t n, void* restrict a, inc_t rs_a, inc_t cs_a, void* restrict b, inc_t rs_b, inc_t cs_b ); static FUNCPTR_T GENARRAY2_ALL(ftypes,castnzm); // // Define object-based interface. // void bli_castnzm ( obj_t* a, obj_t* b ) { num_t dt_a = bli_obj_dt( a ); num_t dt_b = bli_obj_dt( b ); trans_t transa = bli_obj_conjtrans_status( a ); dim_t m = bli_obj_length( b ); dim_t n = bli_obj_width( b ); void* buf_a = bli_obj_buffer_at_off( a ); inc_t rs_a = bli_obj_row_stride( a ); inc_t cs_a = bli_obj_col_stride( a ); void* buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b = bli_obj_row_stride( b ); inc_t cs_b = bli_obj_col_stride( b ); FUNCPTR_T f; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_castnzm_check( a, b ); #if 0 if ( bli_obj_dt( a ) == bli_obj_dt( b ) ) { // If a and b share the same datatype, we can simply use copym. bli_copym( a, b ); return; } #endif // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_a][dt_b]; // Invoke the void pointer-based function. f ( transa, m, n, buf_a, rs_a, cs_a, buf_b, rs_b, cs_b ); } // ----------------------------------------------------------------------------- // // Define BLAS-like interfaces with typed operands. // #undef GENTFUNC2 #define GENTFUNC2( ctype_a, ctype_b, cha, chb, opname ) \ \ void PASTEMAC2(cha,chb,opname) \ ( \ trans_t transa, \ dim_t m, \ dim_t n, \ void* restrict a, inc_t rs_a, inc_t cs_a, \ void* restrict b, inc_t rs_b, inc_t cs_b \ ) \ { \ ctype_a* restrict a_cast = a; \ ctype_b* restrict b_cast = b; \ conj_t conja; \ dim_t n_iter; \ dim_t n_elem; \ inc_t lda, inca; \ inc_t ldb, incb; \ dim_t j, i; \ \ /* Set various loop parameters. */ \ bli_set_dims_incs_2m \ ( \ transa, \ m, n, rs_a, cs_a, rs_b, cs_b, \ &n_elem, &n_iter, &inca, &lda, &incb, &ldb \ ); \ \ /* Extract the conjugation component from the transa parameter. */ \ conja = bli_extract_conj( transa ); \ \ if ( bli_is_conj( conja ) ) \ { \ if ( inca == 1 && incb == 1 ) \ { \ for ( j = 0; j < n_iter; ++j ) \ { \ ctype_a* restrict a1 = a_cast + (j )*lda + (0 )*inca; \ ctype_b* restrict b1 = b_cast + (j )*ldb + (0 )*incb; \ \ for ( i = 0; i < n_elem; ++i ) \ { \ PASTEMAC2(cha,chb,copyjnzs)( a1[i], b1[i] ); \ } \ } \ } \ else \ { \ for ( j = 0; j < n_iter; ++j ) \ { \ ctype_a* restrict a1 = a_cast + (j )*lda + (0 )*inca; \ ctype_b* restrict b1 = b_cast + (j )*ldb + (0 )*incb; \ \ for ( i = 0; i < n_elem; ++i ) \ { \ PASTEMAC2(cha,chb,copyjnzs)( *a1, *b1 ); \ \ a1 += inca; \ b1 += incb; \ } \ } \ } \ } \ else \ { \ if ( inca == 1 && incb == 1 ) \ { \ for ( j = 0; j < n_iter; ++j ) \ { \ ctype_a* restrict a1 = a_cast + (j )*lda + (0 )*inca; \ ctype_b* restrict b1 = b_cast + (j )*ldb + (0 )*incb; \ \ for ( i = 0; i < n_elem; ++i ) \ { \ PASTEMAC2(cha,chb,copynzs)( a1[i], b1[i] ); \ } \ } \ } \ else \ { \ for ( j = 0; j < n_iter; ++j ) \ { \ ctype_a* restrict a1 = a_cast + (j )*lda + (0 )*inca; \ ctype_b* restrict b1 = b_cast + (j )*ldb + (0 )*incb; \ \ for ( i = 0; i < n_elem; ++i ) \ { \ PASTEMAC2(cha,chb,copynzs)( *a1, *b1 ); \ \ a1 += inca; \ b1 += incb; \ } \ } \ } \ } \ } INSERT_GENTFUNC2_BASIC0( castnzm ) INSERT_GENTFUNC2_MIXDP0( castnzm ) // ----------------------------------------------------------------------------- // // Define object-based _check() function. // void bli_castnzm_check ( obj_t* a, obj_t* b ) { err_t e_val; // Check object datatypes. e_val = bli_check_floating_object( a ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( b ); bli_check_error_code( e_val ); // Check structure. // NOTE: We enforce general structure for now in order to simplify the // implementation. bli_check_general_object( a ); bli_check_error_code( e_val ); bli_check_general_object( b ); bli_check_error_code( e_val ); // Check object dimensions. e_val = bli_check_matrix_object( a ); bli_check_error_code( e_val ); e_val = bli_check_matrix_object( b ); bli_check_error_code( e_val ); e_val = bli_check_conformal_dims( a, b ); bli_check_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( a ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( b ); bli_check_error_code( e_val ); } blis-0.6.1/frame/base/cast/bli_castnzm.h000066400000000000000000000045021360743507500200760ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype object-based interface. // BLIS_EXPORT_BLIS void bli_castnzm ( obj_t* a, obj_t* b ); // // Prototype BLAS-like interfaces with heterogeneous-typed operands. // #undef GENTPROT2 #define GENTPROT2( ctype_a, ctype_b, cha, chb, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(cha,chb,opname) \ ( \ trans_t transa, \ dim_t m, \ dim_t n, \ void* a, inc_t rs_a, inc_t cs_a, \ void* b, inc_t rs_b, inc_t cs_b \ ); INSERT_GENTPROT2_BASIC0( castnzm ) INSERT_GENTPROT2_MIXDP0( castnzm ) // // Prototype object-based _check() function. // void bli_castnzm_check ( obj_t* a, obj_t* b ); blis-0.6.1/frame/base/cast/bli_castv.c000066400000000000000000000120511360743507500175300ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // NOTE: This is one of the few functions in BLIS that is defined // with heterogeneous type support. This is done so that we have // an operation that can be used to typecast (copy-cast) a matrix // of one datatype to a scalar of another datatype. typedef void (*FUNCPTR_T) ( conj_t conjx, dim_t n, void* restrict x, inc_t inc_x, void* restrict y, inc_t inc_y ); static FUNCPTR_T GENARRAY2_ALL(ftypes,castv); // // Define object-based interface. // void bli_castv ( obj_t* x, obj_t* y ) { num_t dt_x = bli_obj_dt( x ); num_t dt_y = bli_obj_dt( y ); conj_t conjx = bli_obj_conj_status( x ); dim_t n = bli_obj_vector_dim( x ); void* buf_x = bli_obj_buffer_at_off( x ); inc_t inc_x = bli_obj_vector_inc( x ); void* buf_y = bli_obj_buffer_at_off( y ); inc_t inc_y = bli_obj_vector_inc( y ); FUNCPTR_T f; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_castv_check( x, y ); #if 0 if ( bli_obj_dt( x ) == bli_obj_dt( y ) ) { // If x and y share the same datatype, we can simply use copyv. bli_copyv( x, y ); return; } #endif // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_x][dt_y]; // Invoke the void pointer-based function. f ( conjx, n, buf_x, inc_x, buf_y, inc_y ); } // ----------------------------------------------------------------------------- // // Define BLAS-like interfaces with typed operands. // #undef GENTFUNC2 #define GENTFUNC2( ctype_x, ctype_y, chx, chy, opname ) \ \ void PASTEMAC2(chx,chy,opname) \ ( \ conj_t conjx, \ dim_t n, \ void* restrict x, inc_t incx, \ void* restrict y, inc_t incy \ ) \ { \ ctype_x* restrict x1 = x; \ ctype_y* restrict y1 = y; \ dim_t i; \ \ if ( bli_is_conj( conjx ) ) \ { \ if ( incx == 1 && incy == 1 ) \ { \ for ( i = 0; i < n; ++i ) \ { \ PASTEMAC2(chx,chy,copyjs)( x1[i], y1[i] ); \ } \ } \ else \ { \ for ( i = 0; i < n; ++i ) \ { \ PASTEMAC2(chx,chy,copyjs)( *x1, *y1 ); \ \ x1 += incx; \ y1 += incy; \ } \ } \ } \ else \ { \ if ( incx == 1 && incy == 1 ) \ { \ for ( i = 0; i < n; ++i ) \ { \ PASTEMAC2(chx,chy,copys)( x1[i], y1[i] ); \ } \ } \ else \ { \ for ( i = 0; i < n; ++i ) \ { \ PASTEMAC2(chx,chy,copys)( *x1, *y1 ); \ \ x1 += incx; \ y1 += incy; \ } \ } \ } \ } INSERT_GENTFUNC2_BASIC0( castv ) INSERT_GENTFUNC2_MIXDP0( castv ) // ----------------------------------------------------------------------------- // // Define object-based _check() function. // void bli_castv_check ( obj_t* x, obj_t* y ) { err_t e_val; // Check object datatypes. e_val = bli_check_floating_object( x ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( y ); bli_check_error_code( e_val ); // Check object dimensions. e_val = bli_check_vector_object( x ); bli_check_error_code( e_val ); e_val = bli_check_vector_object( y ); bli_check_error_code( e_val ); e_val = bli_check_equal_vector_lengths( x, y ); bli_check_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( x ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( y ); bli_check_error_code( e_val ); } blis-0.6.1/frame/base/cast/bli_castv.h000066400000000000000000000044151360743507500175420ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype object-based interface. // BLIS_EXPORT_BLIS void bli_castv ( obj_t* x, obj_t* y ); // // Prototype BLAS-like interfaces with heterogeneous-typed operands. // #undef GENTPROT2 #define GENTPROT2( ctype_x, ctype_y, chx, chy, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(chx,chy,opname) \ ( \ conj_t conjx, \ dim_t n, \ void* x, inc_t incx, \ void* y, inc_t incy \ ); INSERT_GENTPROT2_BASIC0( castv ) INSERT_GENTPROT2_MIXDP0( castv ) // // Prototype object-based _check() function. // void bli_castv_check ( obj_t* x, obj_t* y ); blis-0.6.1/frame/base/cast/old/000077500000000000000000000000001360743507500161755ustar00rootroot00000000000000blis-0.6.1/frame/base/cast/old/bli_cast_check.c000066400000000000000000000064261360743507500212660ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_castm_check ( obj_t* a, obj_t* b ) { err_t e_val; // Check object datatypes. e_val = bli_check_floating_object( a ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( b ); bli_check_error_code( e_val ); // Check structure. // NOTE: We enforce general structure for now in order to simplify the // implementation. bli_check_general_object( a ); bli_check_error_code( e_val ); bli_check_general_object( b ); bli_check_error_code( e_val ); // Check object dimensions. e_val = bli_check_matrix_object( a ); bli_check_error_code( e_val ); e_val = bli_check_matrix_object( b ); bli_check_error_code( e_val ); e_val = bli_check_conformal_dims( a, b ); bli_check_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( a ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( b ); bli_check_error_code( e_val ); } void bli_castv_check ( obj_t* x, obj_t* y ) { err_t e_val; // Check object datatypes. e_val = bli_check_floating_object( x ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( y ); bli_check_error_code( e_val ); // Check object dimensions. e_val = bli_check_vector_object( x ); bli_check_error_code( e_val ); e_val = bli_check_vector_object( y ); bli_check_error_code( e_val ); e_val = bli_check_equal_vector_lengths( x, y ); bli_check_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( x ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( y ); bli_check_error_code( e_val ); } blis-0.6.1/frame/base/cast/old/bli_cast_check.h000066400000000000000000000034241360743507500212660ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_castm_check ( obj_t* a, obj_t* b ); void bli_castv_check ( obj_t* x, obj_t* y ); blis-0.6.1/frame/base/check/000077500000000000000000000000001360743507500155425ustar00rootroot00000000000000blis-0.6.1/frame/base/check/bli_obj_check.c000066400000000000000000000131441360743507500204460ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_obj_create_check( num_t dt, dim_t m, dim_t n, inc_t rs, inc_t cs, obj_t* obj ) { err_t e_val; e_val = bli_check_valid_datatype( dt ); bli_check_error_code( e_val ); e_val = bli_check_matrix_strides( m, n, rs, cs, 1 ); bli_check_error_code( e_val ); e_val = bli_check_null_pointer( obj ); bli_check_error_code( e_val ); } void bli_obj_create_without_buffer_check( num_t dt, dim_t m, dim_t n, obj_t* obj ) { err_t e_val; e_val = bli_check_valid_datatype( dt ); bli_check_error_code( e_val ); e_val = bli_check_null_pointer( obj ); bli_check_error_code( e_val ); } void bli_obj_alloc_buffer_check( inc_t rs, inc_t cs, inc_t is, obj_t* obj ) { err_t e_val; e_val = bli_check_matrix_strides( bli_obj_length( obj ), bli_obj_width( obj ), rs, cs, is ); bli_check_error_code( e_val ); e_val = bli_check_null_pointer( obj ); bli_check_error_code( e_val ); } void bli_obj_attach_buffer_check( void* p, inc_t rs, inc_t cs, inc_t is, obj_t* obj ) { err_t e_val; // NOTE: We allow the caller to attach NULL to an object because // the buffer contains NULL after _create_wihout_buffer() anyway. // Thus, we're not opening a window for undefined behavior because // that window is already open. Instead of checking for NULL here, // we check the object buffers for all objects in all of the // computational operations' _check()/_int_check() functions. //e_val = bli_check_null_pointer( p ); //bli_check_error_code( e_val ); e_val = bli_check_matrix_strides( bli_obj_length( obj ), bli_obj_width( obj ), rs, cs, is ); bli_check_error_code( e_val ); e_val = bli_check_null_pointer( obj ); bli_check_error_code( e_val ); } void bli_obj_create_scalar_check( num_t dt, obj_t* obj ) { err_t e_val; e_val = bli_check_valid_datatype( dt ); bli_check_error_code( e_val ); e_val = bli_check_null_pointer( obj ); bli_check_error_code( e_val ); } void bli_obj_free_check( obj_t* obj ) { //err_t e_val; // We don't bother checking for null-ness since bli_obj_free() // handles null pointers safely. //e_val = bli_check_null_pointer( obj ); //bli_check_error_code( e_val ); } void bli_obj_create_const_check( double value, obj_t* obj ) { err_t e_val; e_val = bli_check_null_pointer( obj ); bli_check_error_code( e_val ); } #if 0 void bli_obj_create_const_copy_of_check( obj_t* a, obj_t* b ) { err_t e_val; e_val = bli_check_null_pointer( a ); bli_check_error_code( e_val ); e_val = bli_check_null_pointer( b ); bli_check_error_code( e_val ); e_val = bli_check_scalar_object( a ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( a ); bli_check_error_code( e_val ); } #endif void bli_dt_size_check( num_t dt ) { err_t e_val; e_val = bli_check_valid_datatype( dt ); bli_check_error_code( e_val ); } void bli_dt_string_check( num_t dt ) { err_t e_val; e_val = bli_check_nonconstant_datatype( dt ); bli_check_error_code( e_val ); } void bli_dt_union_check( num_t dt1, num_t dt2 ) { err_t e_val; e_val = bli_check_floating_datatype( dt1 ); bli_check_error_code( e_val ); e_val = bli_check_floating_datatype( dt2 ); bli_check_error_code( e_val ); } void bli_obj_print_check( char* label, obj_t* obj ) { err_t e_val; e_val = bli_check_null_pointer( label ); bli_check_error_code( e_val ); e_val = bli_check_null_pointer( obj ); bli_check_error_code( e_val ); } blis-0.6.1/frame/base/check/bli_obj_check.h000066400000000000000000000056251360743507500204600ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_obj_create_check( num_t dt, dim_t m, dim_t n, inc_t rs, inc_t cs, obj_t* obj ); void bli_obj_create_without_buffer_check( num_t dt, dim_t m, dim_t n, obj_t* obj ); void bli_obj_alloc_buffer_check( inc_t rs, inc_t cs, inc_t is, obj_t* obj ); void bli_obj_attach_buffer_check( void* p, inc_t rs, inc_t cs, inc_t is, obj_t* obj ); void bli_obj_create_scalar_check( num_t dt, obj_t* obj ); void bli_obj_free_check( obj_t* obj ); void bli_obj_create_const_check( double value, obj_t* obj ); void bli_obj_create_const_copy_of_check( obj_t* a, obj_t* b ); void bli_dt_size_check( num_t dt ); void bli_dt_string_check( num_t dt ); void bli_dt_union_check( num_t dt1, num_t dt2 ); void bli_obj_print_check( char* label, obj_t* obj ); blis-0.6.1/frame/base/check/bli_part_check.c000066400000000000000000000062771360743507500206530ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_acquire_mpart_t2b_check( subpart_t requested_part, dim_t i, dim_t b, obj_t* obj, obj_t* sub_obj ) { err_t e_val; e_val = bli_check_valid_3x1_subpart( requested_part ); bli_check_error_code( e_val ); e_val = bli_check_null_pointer( obj ); bli_check_error_code( e_val ); e_val = bli_check_null_pointer( sub_obj ); bli_check_error_code( e_val ); } void bli_acquire_mpart_l2r_check( subpart_t requested_part, dim_t j, dim_t b, obj_t* obj, obj_t* sub_obj ) { err_t e_val; e_val = bli_check_valid_1x3_subpart( requested_part ); bli_check_error_code( e_val ); e_val = bli_check_null_pointer( obj ); bli_check_error_code( e_val ); e_val = bli_check_null_pointer( sub_obj ); bli_check_error_code( e_val ); } void bli_acquire_mpart_tl2br_check( subpart_t requested_part, dim_t ij, dim_t b, obj_t* obj, obj_t* sub_obj ) { err_t e_val; e_val = bli_check_valid_3x3_subpart( requested_part ); bli_check_error_code( e_val ); e_val = bli_check_null_pointer( obj ); bli_check_error_code( e_val ); e_val = bli_check_null_pointer( sub_obj ); bli_check_error_code( e_val ); } blis-0.6.1/frame/base/check/bli_part_check.h000066400000000000000000000046541360743507500206550ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_acquire_mpart_t2b_check( subpart_t requested_part, dim_t i, dim_t b, obj_t* obj, obj_t* sub_obj ); void bli_acquire_mpart_l2r_check( subpart_t requested_part, dim_t j, dim_t b, obj_t* obj, obj_t* sub_obj ); void bli_acquire_mpart_tl2br_check( subpart_t requested_part, dim_t ij, dim_t b, obj_t* obj, obj_t* sub_obj ); blis-0.6.1/frame/base/noopt/000077500000000000000000000000001360743507500156245ustar00rootroot00000000000000blis-0.6.1/frame/base/noopt/bli_dlamch.c000066400000000000000000000660361360743507500200610ustar00rootroot00000000000000/* dlamch.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ #ifdef __cplusplus extern "C" { #endif #include "blis.h" double bli_pow_di( bla_double* a, bla_integer* n ); /* Table of constant values */ //static bla_integer c__1 = 1; static bla_double c_b32 = 0.; double bli_pow_di(bla_double *ap, bla_integer *bp) { double pow, x; bla_integer n; unsigned long u; pow = 1; x = *ap; n = *bp; if( n != 0 ) { if( n < 0 ) { n = -n; x = 1/x; } for( u = n; ; ) { if( u & 01 ) pow *= x; if( u >>= 1 ) x *= x; else break; } } return pow; } bla_double bli_dlamch(bla_character *cmach, ftnlen cmach_len) { /* Initialized data */ static bla_logical first = TRUE_; /* System generated locals */ bla_integer i__1; bla_double ret_val; /* Builtin functions */ double bli_pow_di(bla_double *, bla_integer *); /* Local variables */ static bla_double base; static bla_integer beta; static bla_double emin, prec, emax; static bla_integer imin, imax; static bla_logical lrnd; static bla_double rmin, rmax, t, rmach; extern bla_logical bli_lsame(bla_character *, bla_character *, ftnlen, ftnlen); static bla_double smnum, sfmin; extern /* Subroutine */ int bli_dlamc2(bla_integer *, bla_integer *, bla_logical *, bla_double *, bla_integer *, bla_double *, bla_integer *, bla_double *); static bla_integer it; static bla_double rnd, eps; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAMCH determines double precision machine parameters. */ /* Arguments */ /* ========= */ /* CMACH (input) CHARACTER*1 */ /* Specifies the value to be returned by DLAMCH: */ /* = 'E' or 'e', DLAMCH := eps */ /* = 'S' or 's , DLAMCH := sfmin */ /* = 'B' or 'b', DLAMCH := base */ /* = 'P' or 'p', DLAMCH := eps*base */ /* = 'N' or 'n', DLAMCH := t */ /* = 'R' or 'r', DLAMCH := rnd */ /* = 'M' or 'm', DLAMCH := emin */ /* = 'U' or 'u', DLAMCH := rmin */ /* = 'L' or 'l', DLAMCH := emax */ /* = 'O' or 'o', DLAMCH := rmax */ /* where */ /* eps = relative machine precision */ /* sfmin = safe minimum, such that 1/sfmin does not overflow */ /* base = base of the machine */ /* prec = eps*base */ /* t = number of (base) digits in the mantissa */ /* rnd = 1.0 when rounding occurs in addition, 0.0 otherwise */ /* emin = minimum exponent before (gradual) underflow */ /* rmin = underflow threshold - base**(emin-1) */ /* emax = largest exponent before overflow */ /* rmax = overflow threshold - (base**emax)*(1-eps) */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Save statement .. */ /* .. */ /* .. Data statements .. */ /* .. */ /* .. Executable Statements .. */ if (first) { bli_dlamc2(&beta, &it, &lrnd, &eps, &imin, &rmin, &imax, &rmax); base = (bla_double) beta; t = (bla_double) it; if (lrnd) { rnd = 1.; i__1 = 1 - it; eps = bli_pow_di(&base, &i__1) / 2; } else { rnd = 0.; i__1 = 1 - it; eps = bli_pow_di(&base, &i__1); } prec = eps * base; emin = (bla_double) imin; emax = (bla_double) imax; sfmin = rmin; smnum = 1. / rmax; if (smnum >= sfmin) { /* Use SMALL plus a bit, to avoid the possibility of rounding */ /* causing overflow when computing 1/sfmin. */ sfmin = smnum * (eps + 1.); } } if (bli_lsame(cmach, "E", (ftnlen)1, (ftnlen)1)) { rmach = eps; } else if (bli_lsame(cmach, "S", (ftnlen)1, (ftnlen)1)) { rmach = sfmin; } else if (bli_lsame(cmach, "B", (ftnlen)1, (ftnlen)1)) { rmach = base; } else if (bli_lsame(cmach, "P", (ftnlen)1, (ftnlen)1)) { rmach = prec; } else if (bli_lsame(cmach, "N", (ftnlen)1, (ftnlen)1)) { rmach = t; } else if (bli_lsame(cmach, "R", (ftnlen)1, (ftnlen)1)) { rmach = rnd; } else if (bli_lsame(cmach, "M", (ftnlen)1, (ftnlen)1)) { rmach = emin; } else if (bli_lsame(cmach, "U", (ftnlen)1, (ftnlen)1)) { rmach = rmin; } else if (bli_lsame(cmach, "L", (ftnlen)1, (ftnlen)1)) { rmach = emax; } else if (bli_lsame(cmach, "O", (ftnlen)1, (ftnlen)1)) { rmach = rmax; } ret_val = rmach; first = FALSE_; return ret_val; /* End of DLAMCH */ } /* bli_dlamch_ */ /* *********************************************************************** */ /* Subroutine */ int bli_dlamc1(bla_integer *beta, bla_integer *t, bla_logical *rnd, bla_logical *ieee1) { /* Initialized data */ static bla_logical first = TRUE_; /* System generated locals */ bla_double d__1, d__2; /* Local variables */ static bla_logical lrnd; static bla_double a, b, c__, f; static bla_integer lbeta; static bla_double savec; extern bla_double bli_dlamc3(bla_double *, bla_double *); static bla_logical lieee1; static bla_double t1, t2; static bla_integer lt; static bla_double one, qtr; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAMC1 determines the machine parameters given by BETA, T, RND, and */ /* IEEE1. */ /* Arguments */ /* ========= */ /* BETA (output) INTEGER */ /* The base of the machine. */ /* T (output) INTEGER */ /* The number of ( BETA ) digits in the mantissa. */ /* RND (output) LOGICAL */ /* Specifies whether proper rounding ( RND = .TRUE. ) or */ /* chopping ( RND = .FALSE. ) occurs in addition. This may not */ /* be a reliable guide to the way in which the machine performs */ /* its arithmetic. */ /* IEEE1 (output) LOGICAL */ /* Specifies whether rounding appears to be done in the IEEE */ /* 'round to nearest' style. */ /* Further Details */ /* =============== */ /* The routine is based on the routine ENVRON by Malcolm and */ /* incorporates suggestions by Gentleman and Marovich. See */ /* Malcolm M. A. (1972) Algorithms to reveal properties of */ /* floating-point arithmetic. Comms. of the ACM, 15, 949-951. */ /* Gentleman W. M. and Marovich S. B. (1974) More on algorithms */ /* that reveal properties of floating point arithmetic units. */ /* Comms. of the ACM, 17, 276-277. */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Save statement .. */ /* .. */ /* .. Data statements .. */ /* .. */ /* .. Executable Statements .. */ if (first) { one = 1.; /* LBETA, LIEEE1, LT and LRND are the local values of BETA, */ /* IEEE1, T and RND. */ /* Throughout this routine we use the function DLAMC3 to ensure */ /* that relevant values are stored and not held in registers, or */ /* are not affected by optimizers. */ /* Compute a = 2.0**m with the smallest positive bla_integer m such */ /* that */ /* fl( a + 1.0 ) = a. */ a = 1.; c__ = 1.; /* + WHILE( C.EQ.ONE )LOOP */ L10: if (c__ == one) { a *= 2; c__ = bli_dlamc3(&a, &one); d__1 = -a; c__ = bli_dlamc3(&c__, &d__1); goto L10; } /* + END WHILE */ /* Now compute b = 2.0**m with the smallest positive bla_integer m */ /* such that */ /* fl( a + b ) .gt. a. */ b = 1.; c__ = bli_dlamc3(&a, &b); /* + WHILE( C.EQ.A )LOOP */ L20: if (c__ == a) { b *= 2; c__ = bli_dlamc3(&a, &b); goto L20; } /* + END WHILE */ /* Now compute the base. a and c are neighbouring floating point */ /* numbers in the interval ( beta**t, beta**( t + 1 ) ) and so */ /* their difference is beta. Adding 0.25 to c is to ensure that it */ /* is truncated to beta and not ( beta - 1 ). */ qtr = one / 4; savec = c__; d__1 = -a; c__ = bli_dlamc3(&c__, &d__1); lbeta = (bla_integer) (c__ + qtr); /* Now determine whether rounding or chopping occurs, by adding a */ /* bit less than beta/2 and a bit more than beta/2 to a. */ b = (bla_double) lbeta; d__1 = b / 2; d__2 = -b / 100; f = bli_dlamc3(&d__1, &d__2); c__ = bli_dlamc3(&f, &a); if (c__ == a) { lrnd = TRUE_; } else { lrnd = FALSE_; } d__1 = b / 2; d__2 = b / 100; f = bli_dlamc3(&d__1, &d__2); c__ = bli_dlamc3(&f, &a); if (lrnd && c__ == a) { lrnd = FALSE_; } /* Try and decide whether rounding is done in the IEEE 'round to */ /* nearest' style. B/2 is half a unit in the last place of the two */ /* numbers A and SAVEC. Furthermore, A is even, i.e. has last bit */ /* zero, and SAVEC is odd. Thus adding B/2 to A should not change */ /* A, but adding B/2 to SAVEC should change SAVEC. */ d__1 = b / 2; t1 = bli_dlamc3(&d__1, &a); d__1 = b / 2; t2 = bli_dlamc3(&d__1, &savec); lieee1 = t1 == a && t2 > savec && lrnd; /* Now find the mantissa, t. It should be the bla_integer part of */ /* log to the base beta of a, however it is safer to determine t */ /* by powering. So we find t as the smallest positive bla_integer for */ /* which */ /* fl( beta**t + 1.0 ) = 1.0. */ lt = 0; a = 1.; c__ = 1.; /* + WHILE( C.EQ.ONE )LOOP */ L30: if (c__ == one) { ++lt; a *= lbeta; c__ = bli_dlamc3(&a, &one); d__1 = -a; c__ = bli_dlamc3(&c__, &d__1); goto L30; } /* + END WHILE */ } *beta = lbeta; *t = lt; *rnd = lrnd; *ieee1 = lieee1; first = FALSE_; return 0; /* End of DLAMC1 */ } /* bli_dlamc1_ */ /* *********************************************************************** */ /* Subroutine */ int bli_dlamc2(bla_integer *beta, bla_integer *t, bla_logical *rnd, bla_double *eps, bla_integer *emin, bla_double *rmin, bla_integer *emax, bla_double *rmax) { /* Initialized data */ static bla_logical first = TRUE_; static bla_logical iwarn = FALSE_; /* Format strings */ static bla_character fmt_9999[] = "(//\002 WARNING. The value EMIN may be incorre\ ct:-\002,\002 EMIN = \002,i8,/\002 If, after inspection, the value EMIN loo\ ks\002,\002 acceptable please comment out \002,/\002 the IF block as marked \ within the code of routine\002,\002 DLAMC2,\002,/\002 otherwise supply EMIN \ explicitly.\002,/)"; /* System generated locals */ bla_integer i__1; bla_double d__1, d__2, d__3, d__4, d__5; /* Builtin functions */ double bli_pow_di(bla_double *, bla_integer *); //bla_integer s_wsfe(cilist *), do_fio(bla_integer *, bla_character *, ftnlen), e_wsfe(); /* Local variables */ static bla_logical ieee; static bla_double half; static bla_logical lrnd; static bla_double leps, zero, a, b, c__; static bla_integer i__, lbeta; static bla_double rbase; static bla_integer lemin, lemax, gnmin; static bla_double smnum; static bla_integer gpmin; static bla_double third, lrmin, lrmax, sixth; extern /* Subroutine */ int bli_dlamc1(bla_integer *, bla_integer *, bla_logical *, bla_logical *); extern bla_double bli_dlamc3(bla_double *, bla_double *); static bla_logical lieee1; extern /* Subroutine */ int bli_dlamc4(bla_integer *, bla_double *, bla_integer *), bli_dlamc5(bla_integer *, bla_integer *, bla_integer *, bla_logical *, bla_integer *, bla_double *); static bla_integer lt, ngnmin, ngpmin; static bla_double one, two; /* Fortran I/O blocks */ //static cilist io___58 = { 0, 6, 0, fmt_9999, 0 }; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAMC2 determines the machine parameters specified in its argument */ /* list. */ /* Arguments */ /* ========= */ /* BETA (output) INTEGER */ /* The base of the machine. */ /* T (output) INTEGER */ /* The number of ( BETA ) digits in the mantissa. */ /* RND (output) LOGICAL */ /* Specifies whether proper rounding ( RND = .TRUE. ) or */ /* chopping ( RND = .FALSE. ) occurs in addition. This may not */ /* be a reliable guide to the way in which the machine performs */ /* its arithmetic. */ /* EPS (output) DOUBLE PRECISION */ /* The smallest positive number such that */ /* fl( 1.0 - EPS ) .LT. 1.0, */ /* where fl denotes the computed value. */ /* EMIN (output) INTEGER */ /* The minimum exponent before (gradual) underflow occurs. */ /* RMIN (output) DOUBLE PRECISION */ /* The smallest normalized number for the machine, given by */ /* BASE**( EMIN - 1 ), where BASE is the floating point value */ /* of BETA. */ /* EMAX (output) INTEGER */ /* The maximum exponent before overflow occurs. */ /* RMAX (output) DOUBLE PRECISION */ /* The largest positive number for the machine, given by */ /* BASE**EMAX * ( 1 - EPS ), where BASE is the floating point */ /* value of BETA. */ /* Further Details */ /* =============== */ /* The computation of EPS is based on a routine PARANOIA by */ /* W. Kahan of the University of California at Berkeley. */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Save statement .. */ /* .. */ /* .. Data statements .. */ /* .. */ /* .. Executable Statements .. */ if (first) { zero = 0.; one = 1.; two = 2.; /* LBETA, LT, LRND, LEPS, LEMIN and LRMIN are the local values of */ /* BETA, T, RND, EPS, EMIN and RMIN. */ /* Throughout this routine we use the function DLAMC3 to ensure */ /* that relevant values are stored and not held in registers, or */ /* are not affected by optimizers. */ /* DLAMC1 returns the parameters LBETA, LT, LRND and LIEEE1. */ bli_dlamc1(&lbeta, <, &lrnd, &lieee1); /* Start to find EPS. */ b = (bla_double) lbeta; i__1 = -lt; a = bli_pow_di(&b, &i__1); leps = a; /* Try some tricks to see whether or not this is the correct EPS. */ b = two / 3; half = one / 2; d__1 = -half; sixth = bli_dlamc3(&b, &d__1); third = bli_dlamc3(&sixth, &sixth); d__1 = -half; b = bli_dlamc3(&third, &d__1); b = bli_dlamc3(&b, &sixth); b = f2c_abs(b); if (b < leps) { b = leps; } leps = 1.; /* + WHILE( ( LEPS.GT.B ).AND.( B.GT.ZERO ) )LOOP */ L10: if (leps > b && b > zero) { leps = b; d__1 = half * leps; /* Computing 5th power */ d__3 = two, d__4 = d__3, d__3 *= d__3; /* Computing 2nd power */ d__5 = leps; d__2 = d__4 * (d__3 * d__3) * (d__5 * d__5); c__ = bli_dlamc3(&d__1, &d__2); d__1 = -c__; c__ = bli_dlamc3(&half, &d__1); b = bli_dlamc3(&half, &c__); d__1 = -b; c__ = bli_dlamc3(&half, &d__1); b = bli_dlamc3(&half, &c__); goto L10; } /* + END WHILE */ if (a < leps) { leps = a; } /* Computation of EPS complete. */ /* Now find EMIN. Let A = + or - 1, and + or - (1 + BASE**(-3)). */ /* Keep dividing A by BETA until (gradual) underflow occurs. This */ /* is detected when we cannot recover the previous A. */ rbase = one / lbeta; smnum = one; for (i__ = 1; i__ <= 3; ++i__) { d__1 = smnum * rbase; smnum = bli_dlamc3(&d__1, &zero); /* L20: */ } a = bli_dlamc3(&one, &smnum); bli_dlamc4(&ngpmin, &one, &lbeta); d__1 = -one; bli_dlamc4(&ngnmin, &d__1, &lbeta); bli_dlamc4(&gpmin, &a, &lbeta); d__1 = -a; bli_dlamc4(&gnmin, &d__1, &lbeta); ieee = FALSE_; if (ngpmin == ngnmin && gpmin == gnmin) { if (ngpmin == gpmin) { lemin = ngpmin; /* ( Non twos-complement machines, no gradual underflow; */ /* e.g., VAX ) */ } else if (gpmin - ngpmin == 3) { lemin = ngpmin - 1 + lt; ieee = TRUE_; /* ( Non twos-complement machines, with gradual underflow; */ /* e.g., IEEE standard followers ) */ } else { lemin = f2c_min(ngpmin,gpmin); /* ( A guess; no known machine ) */ iwarn = TRUE_; } } else if (ngpmin == gpmin && ngnmin == gnmin) { if ((i__1 = ngpmin - ngnmin, f2c_abs(i__1)) == 1) { lemin = f2c_max(ngpmin,ngnmin); /* ( Twos-complement machines, no gradual underflow; */ /* e.g., CYBER 205 ) */ } else { lemin = f2c_min(ngpmin,ngnmin); /* ( A guess; no known machine ) */ iwarn = TRUE_; } } else if ((i__1 = ngpmin - ngnmin, f2c_abs(i__1)) == 1 && gpmin == gnmin) { if (gpmin - f2c_min(ngpmin,ngnmin) == 3) { lemin = f2c_max(ngpmin,ngnmin) - 1 + lt; /* ( Twos-complement machines with gradual underflow; */ /* no known machine ) */ } else { lemin = f2c_min(ngpmin,ngnmin); /* ( A guess; no known machine ) */ iwarn = TRUE_; } } else { /* Computing MIN */ i__1 = f2c_min(ngpmin,ngnmin), i__1 = f2c_min(i__1,gpmin); lemin = f2c_min(i__1,gnmin); /* ( A guess; no known machine ) */ iwarn = TRUE_; } first = FALSE_; /* ** */ /* Comment out this if block if EMIN is ok */ if (iwarn) { first = TRUE_; /* s_wsfe(&io___58); do_fio(&c__1, (bla_character *)&lemin, (ftnlen)sizeof(bla_integer)); e_wsfe(); */ printf( "%s", fmt_9999 ); } /* ** */ /* Assume IEEE arithmetic if we found denormalised numbers above, */ /* or if arithmetic seems to round in the IEEE style, determined */ /* in routine DLAMC1. A true IEEE machine should have both things */ /* true; however, faulty machines may have one or the other. */ ieee = ieee || lieee1; /* Compute RMIN by successive division by BETA. We could compute */ /* RMIN as BASE**( EMIN - 1 ), but some machines underflow during */ /* this computation. */ lrmin = 1.; i__1 = 1 - lemin; for (i__ = 1; i__ <= i__1; ++i__) { d__1 = lrmin * rbase; lrmin = bli_dlamc3(&d__1, &zero); /* L30: */ } /* Finally, call DLAMC5 to compute EMAX and RMAX. */ bli_dlamc5(&lbeta, <, &lemin, &ieee, &lemax, &lrmax); } *beta = lbeta; *t = lt; *rnd = lrnd; *eps = leps; *emin = lemin; *rmin = lrmin; *emax = lemax; *rmax = lrmax; return 0; /* End of DLAMC2 */ } /* bli_dlamc2_ */ /* *********************************************************************** */ bla_double bli_dlamc3(bla_double *a, bla_double *b) { /* System generated locals */ bla_double ret_val; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAMC3 is intended to force A and B to be stored prior to doing */ /* the addition of A and B , for use in situations where optimizers */ /* might hold one of these in a register. */ /* Arguments */ /* ========= */ /* A (input) DOUBLE PRECISION */ /* B (input) DOUBLE PRECISION */ /* The values A and B. */ /* ===================================================================== */ /* .. Executable Statements .. */ ret_val = *a + *b; return ret_val; /* End of DLAMC3 */ } /* bli_dlamc3_ */ /* *********************************************************************** */ /* Subroutine */ int bli_dlamc4(bla_integer *emin, bla_double *start, bla_integer *base) { /* System generated locals */ bla_integer i__1; bla_double d__1; /* Local variables */ static bla_double zero, a; static bla_integer i__; static bla_double rbase, b1, b2, c1, c2, d1, d2; extern bla_double bli_dlamc3(bla_double *, bla_double *); static bla_double one; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAMC4 is a service routine for DLAMC2. */ /* Arguments */ /* ========= */ /* EMIN (output) INTEGER */ /* The minimum exponent before (gradual) underflow, computed by */ /* setting A = START and dividing by BASE until the previous A */ /* can not be recovered. */ /* START (input) DOUBLE PRECISION */ /* The starting point for determining EMIN. */ /* BASE (input) INTEGER */ /* The base of the machine. */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ a = *start; one = 1.; rbase = one / *base; zero = 0.; *emin = 1; d__1 = a * rbase; b1 = bli_dlamc3(&d__1, &zero); c1 = a; c2 = a; d1 = a; d2 = a; /* + WHILE( ( C1.EQ.A ).AND.( C2.EQ.A ).AND. */ /* $ ( D1.EQ.A ).AND.( D2.EQ.A ) )LOOP */ L10: if (c1 == a && c2 == a && d1 == a && d2 == a) { --(*emin); a = b1; d__1 = a / *base; b1 = bli_dlamc3(&d__1, &zero); d__1 = b1 * *base; c1 = bli_dlamc3(&d__1, &zero); d1 = zero; i__1 = *base; for (i__ = 1; i__ <= i__1; ++i__) { d1 += b1; /* L20: */ } d__1 = a * rbase; b2 = bli_dlamc3(&d__1, &zero); d__1 = b2 / rbase; c2 = bli_dlamc3(&d__1, &zero); d2 = zero; i__1 = *base; for (i__ = 1; i__ <= i__1; ++i__) { d2 += b2; /* L30: */ } goto L10; } /* + END WHILE */ return 0; /* End of DLAMC4 */ } /* bli_dlamc4_ */ /* *********************************************************************** */ /* Subroutine */ int bli_dlamc5(bla_integer *beta, bla_integer *p, bla_integer *emin, bla_logical *ieee, bla_integer *emax, bla_double *rmax) { /* System generated locals */ bla_integer i__1; bla_double d__1; /* Local variables */ static bla_integer lexp; static bla_double oldy; static bla_integer uexp, i__; static bla_double y, z__; static bla_integer nbits; extern bla_double bli_dlamc3(bla_double *, bla_double *); static bla_double recbas; static bla_integer exbits, expsum, try__; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAMC5 attempts to compute RMAX, the largest machine floating-point */ /* number, without overflow. It assumes that EMAX + f2c_abs(EMIN) sum */ /* approximately to a power of 2. It will fail on machines where this */ /* assumption does not hold, for example, the Cyber 205 (EMIN = -28625, */ /* EMAX = 28718). It will also fail if the value supplied for EMIN is */ /* too large (i.e. too close to zero), probably with overflow. */ /* Arguments */ /* ========= */ /* BETA (input) INTEGER */ /* The base of floating-point arithmetic. */ /* P (input) INTEGER */ /* The number of base BETA digits in the mantissa of a */ /* floating-point value. */ /* EMIN (input) INTEGER */ /* The minimum exponent before (gradual) underflow. */ /* IEEE (input) LOGICAL */ /* A bla_logical flag specifying whether or not the arithmetic */ /* system is thought to comply with the IEEE standard. */ /* EMAX (output) INTEGER */ /* The largest exponent before overflow */ /* RMAX (output) DOUBLE PRECISION */ /* The largest machine floating-point number. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* First compute LEXP and UEXP, two powers of 2 that bound */ /* f2c_abs(EMIN). We then assume that EMAX + f2c_abs(EMIN) will sum */ /* approximately to the bound that is closest to f2c_abs(EMIN). */ /* (EMAX is the exponent of the required number RMAX). */ lexp = 1; exbits = 1; L10: try__ = lexp << 1; if (try__ <= -(*emin)) { lexp = try__; ++exbits; goto L10; } if (lexp == -(*emin)) { uexp = lexp; } else { uexp = try__; ++exbits; } /* Now -LEXP is less than or equal to EMIN, and -UEXP is greater */ /* than or equal to EMIN. EXBITS is the number of bits needed to */ /* store the exponent. */ if (uexp + *emin > -lexp - *emin) { expsum = lexp << 1; } else { expsum = uexp << 1; } /* EXPSUM is the exponent range, approximately equal to */ /* EMAX - EMIN + 1 . */ *emax = expsum + *emin - 1; nbits = exbits + 1 + *p; /* NBITS is the total number of bits needed to store a */ /* floating-point number. */ if (nbits % 2 == 1 && *beta == 2) { /* Either there are an odd number of bits used to store a */ /* floating-point number, which is unlikely, or some bits are */ /* not used in the representation of numbers, which is possible, */ /* (e.g. Cray machines) or the mantissa has an implicit bit, */ /* (e.g. IEEE machines, Dec Vax machines), which is perhaps the */ /* most likely. We have to assume the last alternative. */ /* If this is true, then we need to reduce EMAX by one because */ /* there must be some way of representing zero in an implicit-bit */ /* system. On machines like Cray, we are reducing EMAX by one */ /* unnecessarily. */ --(*emax); } if (*ieee) { /* Assume we are on an IEEE machine which reserves one exponent */ /* for infinity and NaN. */ --(*emax); } /* Now create RMAX, the largest machine number, which should */ /* be equal to (1.0 - BETA**(-P)) * BETA**EMAX . */ /* First compute 1.0 - BETA**(-P), being careful that the */ /* result is less than 1.0 . */ recbas = 1. / *beta; z__ = *beta - 1.; y = 0.; i__1 = *p; for (i__ = 1; i__ <= i__1; ++i__) { z__ *= recbas; if (y < 1.) { oldy = y; } y = bli_dlamc3(&y, &z__); /* L20: */ } if (y >= 1.) { y = oldy; } /* Now multiply by BETA**EMAX to get RMAX. */ i__1 = *emax; for (i__ = 1; i__ <= i__1; ++i__) { d__1 = y * *beta; y = bli_dlamc3(&d__1, &c_b32); /* L30: */ } *rmax = y; return 0; /* End of DLAMC5 */ } /* bli_dlamc5_ */ #ifdef __cplusplus } #endif blis-0.6.1/frame/base/noopt/bli_dlamch.h000066400000000000000000000033121360743507500200520ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ bla_double bli_dlamch( bla_character* cmach, ftnlen cmach_len ); blis-0.6.1/frame/base/noopt/bli_lsame.c000066400000000000000000000053551360743507500177270ustar00rootroot00000000000000/* lsame.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ #ifdef __cplusplus extern "C" { #endif #include "blis.h" bla_logical bli_lsame(bla_character *ca, bla_character *cb, ftnlen ca_len, ftnlen cb_len) { /* System generated locals */ bla_logical ret_val; /* Local variables */ static bla_integer inta, intb, zcode; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* LSAME returns .TRUE. if CA is the same letter as CB regardless of */ /* case. */ /* Arguments */ /* ========= */ /* CA (input) CHARACTER*1 */ /* CB (input) CHARACTER*1 */ /* CA and CB specify the single bla_characters to be compared. */ /* ===================================================================== */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Executable Statements .. */ /* Test if the bla_characters are equal */ ret_val = *(unsigned char *)ca == *(unsigned char *)cb; if (ret_val) { return ret_val; } /* Now test for equivalence if both bla_characters are alphabetic. */ zcode = 'Z'; /* Use 'Z' rather than 'A' so that ASCII can be detected on Prime */ /* machines, on which ICHAR returns a value with bit 8 set. */ /* ICHAR('A') on Prime machines returns 193 which is the same as */ /* ICHAR('A') on an EBCDIC machine. */ inta = *(unsigned char *)ca; intb = *(unsigned char *)cb; if (zcode == 90 || zcode == 122) { /* ASCII is assumed - ZCODE is the ASCII code of either lower or */ /* upper case 'Z'. */ if (inta >= 97 && inta <= 122) { inta += -32; } if (intb >= 97 && intb <= 122) { intb += -32; } } else if (zcode == 233 || zcode == 169) { /* EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or */ /* upper case 'Z'. */ if ((inta >= 129 && inta <= 137) || (inta >= 145 && inta <= 153) || (inta >= 162 && inta <= 169)) { inta += 64; } if ((intb >= 129 && intb <= 137) || (intb >= 145 && intb <= 153) || (intb >= 162 && intb <= 169)) { intb += 64; } } else if (zcode == 218 || zcode == 250) { /* ASCII is assumed, on Prime machines - ZCODE is the ASCII code */ /* plus 128 of either lower or upper case 'Z'. */ if (inta >= 225 && inta <= 250) { inta += -32; } if (intb >= 225 && intb <= 250) { intb += -32; } } ret_val = inta == intb; /* RETURN */ /* End of LSAME */ return ret_val; } /* bli_lsame */ #ifdef __cplusplus } #endif blis-0.6.1/frame/base/noopt/bli_lsame.h000066400000000000000000000033461360743507500177320ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ bla_logical bli_lsame( bla_character* ca, bla_character* cb, ftnlen ca_len, ftnlen cb_len ); blis-0.6.1/frame/base/noopt/bli_slamch.c000066400000000000000000000657271360743507500201060ustar00rootroot00000000000000/* slamch.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ #ifdef __cplusplus extern "C" { #endif #include "blis.h" double bli_pow_ri( bla_real* a, bla_integer* n ); /* Table of constant values */ //static bla_integer c__1 = 1; static bla_real c_b32 = (float)0.; double bli_pow_ri(bla_real *ap, bla_integer *bp) { double pow, x; bla_integer n; unsigned long u; pow = 1; x = *ap; n = *bp; if( n != 0 ) { if( n < 0 ) { n = -n; x = 1/x; } for( u = n; ; ) { if( u & 01 ) pow *= x; if( u >>= 1 ) x *= x; else break; } } return pow; } bla_real bli_slamch(bla_character *cmach, ftnlen cmach_len) { /* Initialized data */ static bla_logical first = TRUE_; /* System generated locals */ bla_integer i__1; bla_real ret_val; /* Builtin functions */ double bli_pow_ri(bla_real *, bla_integer *); /* Local variables */ static bla_real base; static bla_integer beta; static bla_real emin, prec, emax; static bla_integer imin, imax; static bla_logical lrnd; static bla_real rmin, rmax, t, rmach; extern bla_logical bli_lsame(bla_character *, bla_character *, ftnlen, ftnlen); static bla_real smnum, sfmin; extern /* Subroutine */ int bli_slamc2(bla_integer *, bla_integer *, bla_logical *, bla_real *, bla_integer *, bla_real *, bla_integer *, bla_real *); static bla_integer it; static bla_real rnd, eps; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* SLAMCH determines single precision machine parameters. */ /* Arguments */ /* ========= */ /* CMACH (input) CHARACTER*1 */ /* Specifies the value to be returned by SLAMCH: */ /* = 'E' or 'e', SLAMCH := eps */ /* = 'S' or 's , SLAMCH := sfmin */ /* = 'B' or 'b', SLAMCH := base */ /* = 'P' or 'p', SLAMCH := eps*base */ /* = 'N' or 'n', SLAMCH := t */ /* = 'R' or 'r', SLAMCH := rnd */ /* = 'M' or 'm', SLAMCH := emin */ /* = 'U' or 'u', SLAMCH := rmin */ /* = 'L' or 'l', SLAMCH := emax */ /* = 'O' or 'o', SLAMCH := rmax */ /* where */ /* eps = relative machine precision */ /* sfmin = safe minimum, such that 1/sfmin does not overflow */ /* base = base of the machine */ /* prec = eps*base */ /* t = number of (base) digits in the mantissa */ /* rnd = 1.0 when rounding occurs in addition, 0.0 otherwise */ /* emin = minimum exponent before (gradual) underflow */ /* rmin = underflow threshold - base**(emin-1) */ /* emax = largest exponent before overflow */ /* rmax = overflow threshold - (base**emax)*(1-eps) */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Save statement .. */ /* .. */ /* .. Data statements .. */ /* .. */ /* .. Executable Statements .. */ if (first) { bli_slamc2(&beta, &it, &lrnd, &eps, &imin, &rmin, &imax, &rmax); base = (bla_real) beta; t = (bla_real) it; if (lrnd) { rnd = (float)1.; i__1 = 1 - it; eps = bli_pow_ri(&base, &i__1) / 2; } else { rnd = (float)0.; i__1 = 1 - it; eps = bli_pow_ri(&base, &i__1); } prec = eps * base; emin = (bla_real) imin; emax = (bla_real) imax; sfmin = rmin; smnum = (float)1. / rmax; if (smnum >= sfmin) { /* Use SMALL plus a bit, to avoid the possibility of rounding */ /* causing overflow when computing 1/sfmin. */ sfmin = smnum * (eps + (float)1.); } } if (bli_lsame(cmach, "E", (ftnlen)1, (ftnlen)1)) { rmach = eps; } else if (bli_lsame(cmach, "S", (ftnlen)1, (ftnlen)1)) { rmach = sfmin; } else if (bli_lsame(cmach, "B", (ftnlen)1, (ftnlen)1)) { rmach = base; } else if (bli_lsame(cmach, "P", (ftnlen)1, (ftnlen)1)) { rmach = prec; } else if (bli_lsame(cmach, "N", (ftnlen)1, (ftnlen)1)) { rmach = t; } else if (bli_lsame(cmach, "R", (ftnlen)1, (ftnlen)1)) { rmach = rnd; } else if (bli_lsame(cmach, "M", (ftnlen)1, (ftnlen)1)) { rmach = emin; } else if (bli_lsame(cmach, "U", (ftnlen)1, (ftnlen)1)) { rmach = rmin; } else if (bli_lsame(cmach, "L", (ftnlen)1, (ftnlen)1)) { rmach = emax; } else if (bli_lsame(cmach, "O", (ftnlen)1, (ftnlen)1)) { rmach = rmax; } ret_val = rmach; first = FALSE_; return ret_val; /* End of SLAMCH */ } /* bli_slamch_ */ /* *********************************************************************** */ /* Subroutine */ int bli_slamc1(bla_integer *beta, bla_integer *t, bla_logical *rnd, bla_logical *ieee1) { /* Initialized data */ static bla_logical first = TRUE_; /* System generated locals */ bla_real r__1, r__2; /* Local variables */ static bla_logical lrnd; static bla_real a, b, c__, f; static bla_integer lbeta; static bla_real savec; static bla_logical lieee1; static bla_real t1, t2; extern bla_real bli_slamc3(bla_real *, bla_real *); static bla_integer lt; static bla_real one, qtr; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* SLAMC1 determines the machine parameters given by BETA, T, RND, and */ /* IEEE1. */ /* Arguments */ /* ========= */ /* BETA (output) INTEGER */ /* The base of the machine. */ /* T (output) INTEGER */ /* The number of ( BETA ) digits in the mantissa. */ /* RND (output) LOGICAL */ /* Specifies whether proper rounding ( RND = .TRUE. ) or */ /* chopping ( RND = .FALSE. ) occurs in addition. This may not */ /* be a reliable guide to the way in which the machine performs */ /* its arithmetic. */ /* IEEE1 (output) LOGICAL */ /* Specifies whether rounding appears to be done in the IEEE */ /* 'round to nearest' style. */ /* Further Details */ /* =============== */ /* The routine is based on the routine ENVRON by Malcolm and */ /* incorporates suggestions by Gentleman and Marovich. See */ /* Malcolm M. A. (1972) Algorithms to reveal properties of */ /* floating-point arithmetic. Comms. of the ACM, 15, 949-951. */ /* Gentleman W. M. and Marovich S. B. (1974) More on algorithms */ /* that reveal properties of floating point arithmetic units. */ /* Comms. of the ACM, 17, 276-277. */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Save statement .. */ /* .. */ /* .. Data statements .. */ /* .. */ /* .. Executable Statements .. */ if (first) { one = (float)1.; /* LBETA, LIEEE1, LT and LRND are the local values of BETA, */ /* IEEE1, T and RND. */ /* Throughout this routine we use the function SLAMC3 to ensure */ /* that relevant values are stored and not held in registers, or */ /* are not affected by optimizers. */ /* Compute a = 2.0**m with the smallest positive bla_integer m such */ /* that */ /* fl( a + 1.0 ) = a. */ a = (float)1.; c__ = (float)1.; /* + WHILE( C.EQ.ONE )LOOP */ L10: if (c__ == one) { a *= 2; c__ = bli_slamc3(&a, &one); r__1 = -a; c__ = bli_slamc3(&c__, &r__1); goto L10; } /* + END WHILE */ /* Now compute b = 2.0**m with the smallest positive bla_integer m */ /* such that */ /* fl( a + b ) .gt. a. */ b = (float)1.; c__ = bli_slamc3(&a, &b); /* + WHILE( C.EQ.A )LOOP */ L20: if (c__ == a) { b *= 2; c__ = bli_slamc3(&a, &b); goto L20; } /* + END WHILE */ /* Now compute the base. a and c are neighbouring floating point */ /* numbers in the interval ( beta**t, beta**( t + 1 ) ) and so */ /* their difference is beta. Adding 0.25 to c is to ensure that it */ /* is truncated to beta and not ( beta - 1 ). */ qtr = one / 4; savec = c__; r__1 = -a; c__ = bli_slamc3(&c__, &r__1); lbeta = c__ + qtr; /* Now determine whether rounding or chopping occurs, by adding a */ /* bit less than beta/2 and a bit more than beta/2 to a. */ b = (bla_real) lbeta; r__1 = b / 2; r__2 = -b / 100; f = bli_slamc3(&r__1, &r__2); c__ = bli_slamc3(&f, &a); if (c__ == a) { lrnd = TRUE_; } else { lrnd = FALSE_; } r__1 = b / 2; r__2 = b / 100; f = bli_slamc3(&r__1, &r__2); c__ = bli_slamc3(&f, &a); if (lrnd && c__ == a) { lrnd = FALSE_; } /* Try and decide whether rounding is done in the IEEE 'round to */ /* nearest' style. B/2 is half a unit in the last place of the two */ /* numbers A and SAVEC. Furthermore, A is even, i.e. has last bit */ /* zero, and SAVEC is odd. Thus adding B/2 to A should not change */ /* A, but adding B/2 to SAVEC should change SAVEC. */ r__1 = b / 2; t1 = bli_slamc3(&r__1, &a); r__1 = b / 2; t2 = bli_slamc3(&r__1, &savec); lieee1 = t1 == a && t2 > savec && lrnd; /* Now find the mantissa, t. It should be the bla_integer part of */ /* log to the base beta of a, however it is safer to determine t */ /* by powering. So we find t as the smallest positive bla_integer for */ /* which */ /* fl( beta**t + 1.0 ) = 1.0. */ lt = 0; a = (float)1.; c__ = (float)1.; /* + WHILE( C.EQ.ONE )LOOP */ L30: if (c__ == one) { ++lt; a *= lbeta; c__ = bli_slamc3(&a, &one); r__1 = -a; c__ = bli_slamc3(&c__, &r__1); goto L30; } /* + END WHILE */ } *beta = lbeta; *t = lt; *rnd = lrnd; *ieee1 = lieee1; first = FALSE_; return 0; /* End of SLAMC1 */ } /* bli_slamc1_ */ /* *********************************************************************** */ /* Subroutine */ int bli_slamc2(bla_integer *beta, bla_integer *t, bla_logical *rnd, bla_real * eps, bla_integer *emin, bla_real *rmin, bla_integer *emax, bla_real *rmax) { /* Initialized data */ static bla_logical first = TRUE_; static bla_logical iwarn = FALSE_; /* Format strings */ static bla_character fmt_9999[] = "(//\002 WARNING. The value EMIN may be incorre\ ct:-\002,\002 EMIN = \002,i8,/\002 If, after inspection, the value EMIN loo\ ks\002,\002 acceptable please comment out \002,/\002 the IF block as marked \ within the code of routine\002,\002 SLAMC2,\002,/\002 otherwise supply EMIN \ explicitly.\002,/)"; /* System generated locals */ bla_integer i__1; bla_real r__1, r__2, r__3, r__4, r__5; /* Builtin functions */ double bli_pow_ri(bla_real *, bla_integer *); //bla_integer s_wsfe(cilist *), do_fio(bla_integer *, bla_character *, ftnlen), e_wsfe(); /* Local variables */ static bla_logical ieee; static bla_real half; static bla_logical lrnd; static bla_real leps, zero, a, b, c__; static bla_integer i__, lbeta; static bla_real rbase; static bla_integer lemin, lemax, gnmin; static bla_real smnum; static bla_integer gpmin; static bla_real third, lrmin, lrmax, sixth; static bla_logical lieee1; extern /* Subroutine */ int bli_slamc1(bla_integer *, bla_integer *, bla_logical *, bla_logical *); extern bla_real bli_slamc3(bla_real *, bla_real *); extern /* Subroutine */ int bli_slamc4(bla_integer *, bla_real *, bla_integer *), bli_slamc5(bla_integer *, bla_integer *, bla_integer *, bla_logical *, bla_integer *, bla_real *); static bla_integer lt, ngnmin, ngpmin; static bla_real one, two; /* Fortran I/O blocks */ //static cilist io___58 = { 0, 6, 0, fmt_9999, 0 }; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* SLAMC2 determines the machine parameters specified in its argument */ /* list. */ /* Arguments */ /* ========= */ /* BETA (output) INTEGER */ /* The base of the machine. */ /* T (output) INTEGER */ /* The number of ( BETA ) digits in the mantissa. */ /* RND (output) LOGICAL */ /* Specifies whether proper rounding ( RND = .TRUE. ) or */ /* chopping ( RND = .FALSE. ) occurs in addition. This may not */ /* be a reliable guide to the way in which the machine performs */ /* its arithmetic. */ /* EPS (output) REAL */ /* The smallest positive number such that */ /* fl( 1.0 - EPS ) .LT. 1.0, */ /* where fl denotes the computed value. */ /* EMIN (output) INTEGER */ /* The minimum exponent before (gradual) underflow occurs. */ /* RMIN (output) REAL */ /* The smallest normalized number for the machine, given by */ /* BASE**( EMIN - 1 ), where BASE is the floating point value */ /* of BETA. */ /* EMAX (output) INTEGER */ /* The maximum exponent before overflow occurs. */ /* RMAX (output) REAL */ /* The largest positive number for the machine, given by */ /* BASE**EMAX * ( 1 - EPS ), where BASE is the floating point */ /* value of BETA. */ /* Further Details */ /* =============== */ /* The computation of EPS is based on a routine PARANOIA by */ /* W. Kahan of the University of California at Berkeley. */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Save statement .. */ /* .. */ /* .. Data statements .. */ /* .. */ /* .. Executable Statements .. */ if (first) { zero = (float)0.; one = (float)1.; two = (float)2.; /* LBETA, LT, LRND, LEPS, LEMIN and LRMIN are the local values of */ /* BETA, T, RND, EPS, EMIN and RMIN. */ /* Throughout this routine we use the function SLAMC3 to ensure */ /* that relevant values are stored and not held in registers, or */ /* are not affected by optimizers. */ /* SLAMC1 returns the parameters LBETA, LT, LRND and LIEEE1. */ bli_slamc1(&lbeta, <, &lrnd, &lieee1); /* Start to find EPS. */ b = (bla_real) lbeta; i__1 = -lt; a = bli_pow_ri(&b, &i__1); leps = a; /* Try some tricks to see whether or not this is the correct EPS. */ b = two / 3; half = one / 2; r__1 = -half; sixth = bli_slamc3(&b, &r__1); third = bli_slamc3(&sixth, &sixth); r__1 = -half; b = bli_slamc3(&third, &r__1); b = bli_slamc3(&b, &sixth); b = f2c_abs(b); if (b < leps) { b = leps; } leps = (float)1.; /* + WHILE( ( LEPS.GT.B ).AND.( B.GT.ZERO ) )LOOP */ L10: if (leps > b && b > zero) { leps = b; r__1 = half * leps; /* Computing 5th power */ r__3 = two, r__4 = r__3, r__3 *= r__3; /* Computing 2nd power */ r__5 = leps; r__2 = r__4 * (r__3 * r__3) * (r__5 * r__5); c__ = bli_slamc3(&r__1, &r__2); r__1 = -c__; c__ = bli_slamc3(&half, &r__1); b = bli_slamc3(&half, &c__); r__1 = -b; c__ = bli_slamc3(&half, &r__1); b = bli_slamc3(&half, &c__); goto L10; } /* + END WHILE */ if (a < leps) { leps = a; } /* Computation of EPS complete. */ /* Now find EMIN. Let A = + or - 1, and + or - (1 + BASE**(-3)). */ /* Keep dividing A by BETA until (gradual) underflow occurs. This */ /* is detected when we cannot recover the previous A. */ rbase = one / lbeta; smnum = one; for (i__ = 1; i__ <= 3; ++i__) { r__1 = smnum * rbase; smnum = bli_slamc3(&r__1, &zero); /* L20: */ } a = bli_slamc3(&one, &smnum); bli_slamc4(&ngpmin, &one, &lbeta); r__1 = -one; bli_slamc4(&ngnmin, &r__1, &lbeta); bli_slamc4(&gpmin, &a, &lbeta); r__1 = -a; bli_slamc4(&gnmin, &r__1, &lbeta); ieee = FALSE_; if (ngpmin == ngnmin && gpmin == gnmin) { if (ngpmin == gpmin) { lemin = ngpmin; /* ( Non twos-complement machines, no gradual underflow; */ /* e.g., VAX ) */ } else if (gpmin - ngpmin == 3) { lemin = ngpmin - 1 + lt; ieee = TRUE_; /* ( Non twos-complement machines, with gradual underflow; */ /* e.g., IEEE standard followers ) */ } else { lemin = f2c_min(ngpmin,gpmin); /* ( A guess; no known machine ) */ iwarn = TRUE_; } } else if (ngpmin == gpmin && ngnmin == gnmin) { if ((i__1 = ngpmin - ngnmin, f2c_abs(i__1)) == 1) { lemin = f2c_max(ngpmin,ngnmin); /* ( Twos-complement machines, no gradual underflow; */ /* e.g., CYBER 205 ) */ } else { lemin = f2c_min(ngpmin,ngnmin); /* ( A guess; no known machine ) */ iwarn = TRUE_; } } else if ((i__1 = ngpmin - ngnmin, f2c_abs(i__1)) == 1 && gpmin == gnmin) { if (gpmin - f2c_min(ngpmin,ngnmin) == 3) { lemin = f2c_max(ngpmin,ngnmin) - 1 + lt; /* ( Twos-complement machines with gradual underflow; */ /* no known machine ) */ } else { lemin = f2c_min(ngpmin,ngnmin); /* ( A guess; no known machine ) */ iwarn = TRUE_; } } else { /* Computing MIN */ i__1 = f2c_min(ngpmin,ngnmin), i__1 = f2c_min(i__1,gpmin); lemin = f2c_min(i__1,gnmin); /* ( A guess; no known machine ) */ iwarn = TRUE_; } first = FALSE_; /* ** */ /* Comment out this if block if EMIN is ok */ if (iwarn) { first = TRUE_; /* s_wsfe(&io___58); do_fio(&c__1, (bla_character *)&lemin, (ftnlen)sizeof(bla_integer)); e_wsfe(); */ printf( "%s", fmt_9999 ); } /* ** */ /* Assume IEEE arithmetic if we found denormalised numbers above, */ /* or if arithmetic seems to round in the IEEE style, determined */ /* in routine SLAMC1. A true IEEE machine should have both things */ /* true; however, faulty machines may have one or the other. */ ieee = ieee || lieee1; /* Compute RMIN by successive division by BETA. We could compute */ /* RMIN as BASE**( EMIN - 1 ), but some machines underflow during */ /* this computation. */ lrmin = (float)1.; i__1 = 1 - lemin; for (i__ = 1; i__ <= i__1; ++i__) { r__1 = lrmin * rbase; lrmin = bli_slamc3(&r__1, &zero); /* L30: */ } /* Finally, call SLAMC5 to compute EMAX and RMAX. */ bli_slamc5(&lbeta, <, &lemin, &ieee, &lemax, &lrmax); } *beta = lbeta; *t = lt; *rnd = lrnd; *eps = leps; *emin = lemin; *rmin = lrmin; *emax = lemax; *rmax = lrmax; return 0; /* End of SLAMC2 */ } /* bli_slamc2_ */ /* *********************************************************************** */ bla_real bli_slamc3(bla_real *a, bla_real *b) { /* System generated locals */ bla_real ret_val; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* SLAMC3 is intended to force A and B to be stored prior to doing */ /* the addition of A and B , for use in situations where optimizers */ /* might hold one of these in a register. */ /* Arguments */ /* ========= */ /* A (input) REAL */ /* B (input) REAL */ /* The values A and B. */ /* ===================================================================== */ /* .. Executable Statements .. */ ret_val = *a + *b; return ret_val; /* End of SLAMC3 */ } /* bli_slamc3_ */ /* *********************************************************************** */ /* Subroutine */ int bli_slamc4(bla_integer *emin, bla_real *start, bla_integer *base) { /* System generated locals */ bla_integer i__1; bla_real r__1; /* Local variables */ static bla_real zero, a; static bla_integer i__; static bla_real rbase, b1, b2, c1, c2, d1, d2; extern bla_real bli_slamc3(bla_real *, bla_real *); static bla_real one; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* SLAMC4 is a service routine for SLAMC2. */ /* Arguments */ /* ========= */ /* EMIN (output) INTEGER */ /* The minimum exponent before (gradual) underflow, computed by */ /* setting A = START and dividing by BASE until the previous A */ /* can not be recovered. */ /* START (input) REAL */ /* The starting point for determining EMIN. */ /* BASE (input) INTEGER */ /* The base of the machine. */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ a = *start; one = (float)1.; rbase = one / *base; zero = (float)0.; *emin = 1; r__1 = a * rbase; b1 = bli_slamc3(&r__1, &zero); c1 = a; c2 = a; d1 = a; d2 = a; /* + WHILE( ( C1.EQ.A ).AND.( C2.EQ.A ).AND. */ /* $ ( D1.EQ.A ).AND.( D2.EQ.A ) )LOOP */ L10: if (c1 == a && c2 == a && d1 == a && d2 == a) { --(*emin); a = b1; r__1 = a / *base; b1 = bli_slamc3(&r__1, &zero); r__1 = b1 * *base; c1 = bli_slamc3(&r__1, &zero); d1 = zero; i__1 = *base; for (i__ = 1; i__ <= i__1; ++i__) { d1 += b1; /* L20: */ } r__1 = a * rbase; b2 = bli_slamc3(&r__1, &zero); r__1 = b2 / rbase; c2 = bli_slamc3(&r__1, &zero); d2 = zero; i__1 = *base; for (i__ = 1; i__ <= i__1; ++i__) { d2 += b2; /* L30: */ } goto L10; } /* + END WHILE */ return 0; /* End of SLAMC4 */ } /* bli_slamc4_ */ /* *********************************************************************** */ /* Subroutine */ int bli_slamc5(bla_integer *beta, bla_integer *p, bla_integer *emin, bla_logical *ieee, bla_integer *emax, bla_real *rmax) { /* System generated locals */ bla_integer i__1; bla_real r__1; /* Local variables */ static bla_integer lexp; static bla_real oldy; static bla_integer uexp, i__; static bla_real y, z__; static bla_integer nbits; extern bla_real bli_slamc3(bla_real *, bla_real *); static bla_real recbas; static bla_integer exbits, expsum, try__; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* SLAMC5 attempts to compute RMAX, the largest machine floating-point */ /* number, without overflow. It assumes that EMAX + f2c_abs(EMIN) sum */ /* approximately to a power of 2. It will fail on machines where this */ /* assumption does not hold, for example, the Cyber 205 (EMIN = -28625, */ /* EMAX = 28718). It will also fail if the value supplied for EMIN is */ /* too large (i.e. too close to zero), probably with overflow. */ /* Arguments */ /* ========= */ /* BETA (input) INTEGER */ /* The base of floating-point arithmetic. */ /* P (input) INTEGER */ /* The number of base BETA digits in the mantissa of a */ /* floating-point value. */ /* EMIN (input) INTEGER */ /* The minimum exponent before (gradual) underflow. */ /* IEEE (input) LOGICAL */ /* A bla_logical flag specifying whether or not the arithmetic */ /* system is thought to comply with the IEEE standard. */ /* EMAX (output) INTEGER */ /* The largest exponent before overflow */ /* RMAX (output) REAL */ /* The largest machine floating-point number. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* First compute LEXP and UEXP, two powers of 2 that bound */ /* f2c_abs(EMIN). We then assume that EMAX + f2c_abs(EMIN) will sum */ /* approximately to the bound that is closest to f2c_abs(EMIN). */ /* (EMAX is the exponent of the required number RMAX). */ lexp = 1; exbits = 1; L10: try__ = lexp << 1; if (try__ <= -(*emin)) { lexp = try__; ++exbits; goto L10; } if (lexp == -(*emin)) { uexp = lexp; } else { uexp = try__; ++exbits; } /* Now -LEXP is less than or equal to EMIN, and -UEXP is greater */ /* than or equal to EMIN. EXBITS is the number of bits needed to */ /* store the exponent. */ if (uexp + *emin > -lexp - *emin) { expsum = lexp << 1; } else { expsum = uexp << 1; } /* EXPSUM is the exponent range, approximately equal to */ /* EMAX - EMIN + 1 . */ *emax = expsum + *emin - 1; nbits = exbits + 1 + *p; /* NBITS is the total number of bits needed to store a */ /* floating-point number. */ if (nbits % 2 == 1 && *beta == 2) { /* Either there are an odd number of bits used to store a */ /* floating-point number, which is unlikely, or some bits are */ /* not used in the representation of numbers, which is possible, */ /* (e.g. Cray machines) or the mantissa has an implicit bit, */ /* (e.g. IEEE machines, Dec Vax machines), which is perhaps the */ /* most likely. We have to assume the last alternative. */ /* If this is true, then we need to reduce EMAX by one because */ /* there must be some way of representing zero in an implicit-bit */ /* system. On machines like Cray, we are reducing EMAX by one */ /* unnecessarily. */ --(*emax); } if (*ieee) { /* Assume we are on an IEEE machine which reserves one exponent */ /* for infinity and NaN. */ --(*emax); } /* Now create RMAX, the largest machine number, which should */ /* be equal to (1.0 - BETA**(-P)) * BETA**EMAX . */ /* First compute 1.0 - BETA**(-P), being careful that the */ /* result is less than 1.0 . */ recbas = (float)1. / *beta; z__ = *beta - (float)1.; y = (float)0.; i__1 = *p; for (i__ = 1; i__ <= i__1; ++i__) { z__ *= recbas; if (y < (float)1.) { oldy = y; } y = bli_slamc3(&y, &z__); /* L20: */ } if (y >= (float)1.) { y = oldy; } /* Now multiply by BETA**EMAX to get RMAX. */ i__1 = *emax; for (i__ = 1; i__ <= i__1; ++i__) { r__1 = y * *beta; y = bli_slamc3(&r__1, &c_b32); /* L30: */ } *rmax = y; return 0; /* End of SLAMC5 */ } /* bli_slamc5_ */ #ifdef __cplusplus } #endif blis-0.6.1/frame/base/noopt/bli_slamch.h000066400000000000000000000033101360743507500200670ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ bla_real bli_slamch( bla_character* cmach, ftnlen cmach_len ); blis-0.6.1/frame/base/proj/000077500000000000000000000000001360743507500154375ustar00rootroot00000000000000blis-0.6.1/frame/base/proj/bli_projm.c000066400000000000000000000071161360743507500175650ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_projm ( obj_t* a, obj_t* b ) { // Check parameters. if ( bli_error_checking_is_enabled() ) bli_projm_check( a, b ); if ( ( bli_obj_is_real( a ) && bli_obj_is_real( b ) ) || ( bli_obj_is_complex( a ) && bli_obj_is_complex( b ) ) ) { // If a and b are both real or both complex, we can simply use // copym. bli_copym( a, b ); } else { // This branch handles the case where one operand is real and // the other is complex. if ( bli_obj_is_real( a ) /* && bli_obj_is_complex( b ) */ ) { // If a is real and b is complex, we must obtain the real part // of b so that we can copy a into the real part (after // initializing all of b, including imaginary components, to // zero). obj_t br; bli_obj_real_part( b, &br ); bli_setm( &BLIS_ZERO, b ); bli_copym( a, &br ); } else // bli_obj_is_complex( a ) && bli_obj_is_real( b ) { // If a is complex and b is real, we can simply copy the // real part of a into b. obj_t ar; bli_obj_real_part( a, &ar ); bli_copym( &ar, b ); } } } // ----------------------------------------------------------------------------- void bli_projm_check ( obj_t* a, obj_t* b ) { err_t e_val; // Check object datatypes. e_val = bli_check_floating_object( a ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( b ); bli_check_error_code( e_val ); e_val = bli_check_consistent_object_precisions( a, b ); bli_check_error_code( e_val ); // Check object dimensions. e_val = bli_check_matrix_object( a ); bli_check_error_code( e_val ); e_val = bli_check_matrix_object( b ); bli_check_error_code( e_val ); e_val = bli_check_conformal_dims( a, b ); bli_check_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( a ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( b ); bli_check_error_code( e_val ); } blis-0.6.1/frame/base/proj/bli_projm.h000066400000000000000000000034401360743507500175660ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ BLIS_EXPORT_BLIS void bli_projm ( obj_t* a, obj_t* b ); void bli_projm_check ( obj_t* a, obj_t* b ); blis-0.6.1/frame/base/proj/bli_projv.c000066400000000000000000000071241360743507500175750ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_projv ( obj_t* x, obj_t* y ) { // Check parameters. if ( bli_error_checking_is_enabled() ) bli_projv_check( x, y ); if ( ( bli_obj_is_real( x ) && bli_obj_is_real( y ) ) || ( bli_obj_is_complex( x ) && bli_obj_is_complex( y ) ) ) { // If x and y are both real or both complex, we can simply use // copyv. bli_copyv( x, y ); } else { // This branch handles the case where one operand is real and // the other is complex. if ( bli_obj_is_real( x ) /* && bli_obj_is_complex( y ) */ ) { // If x is real and y is complex, we must obtain the real part // of y so that we can copy x into the real part (after // initializing all of y, including imaginary components, to // zero). obj_t yr; bli_obj_real_part( y, &yr ); bli_setv( &BLIS_ZERO, y ); bli_copyv( x, &yr ); } else // bli_obj_is_complex( x ) && bli_obj_is_real( y ) { // If x is complex and y is real, we can simply copy the // real part of x into y. obj_t xr; bli_obj_real_part( x, &xr ); bli_copyv( &xr, y ); } } } // ----------------------------------------------------------------------------- void bli_projv_check ( obj_t* x, obj_t* y ) { err_t e_val; // Check object datatypes. e_val = bli_check_floating_object( x ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( y ); bli_check_error_code( e_val ); e_val = bli_check_consistent_object_precisions( x, y ); bli_check_error_code( e_val ); // Check object dimensions. e_val = bli_check_vector_object( x ); bli_check_error_code( e_val ); e_val = bli_check_vector_object( y ); bli_check_error_code( e_val ); e_val = bli_check_equal_vector_lengths( x, y ); bli_check_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( x ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( y ); bli_check_error_code( e_val ); } blis-0.6.1/frame/base/proj/bli_projv.h000066400000000000000000000034401360743507500175770ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ BLIS_EXPORT_BLIS void bli_projv ( obj_t* x, obj_t* y ); void bli_projv_check ( obj_t* x, obj_t* y ); blis-0.6.1/frame/base/proj/old/000077500000000000000000000000001360743507500162155ustar00rootroot00000000000000blis-0.6.1/frame/base/proj/old/bli_proj_check.c000066400000000000000000000063261360743507500213250ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_projm_check ( obj_t* a, obj_t* b ) { err_t e_val; // Check object datatypes. e_val = bli_check_floating_object( a ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( b ); bli_check_error_code( e_val ); e_val = bli_check_consistent_object_precisions( a, b ); bli_check_error_code( e_val ); // Check object dimensions. e_val = bli_check_matrix_object( a ); bli_check_error_code( e_val ); e_val = bli_check_matrix_object( b ); bli_check_error_code( e_val ); e_val = bli_check_conformal_dims( a, b ); bli_check_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( a ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( b ); bli_check_error_code( e_val ); } void bli_projv_check ( obj_t* x, obj_t* y ) { err_t e_val; // Check object datatypes. e_val = bli_check_floating_object( x ); bli_check_error_code( e_val ); e_val = bli_check_floating_object( y ); bli_check_error_code( e_val ); e_val = bli_check_consistent_object_precisions( x, y ); bli_check_error_code( e_val ); // Check object dimensions. e_val = bli_check_vector_object( x ); bli_check_error_code( e_val ); e_val = bli_check_vector_object( y ); bli_check_error_code( e_val ); e_val = bli_check_equal_vector_lengths( x, y ); bli_check_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( x ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( y ); bli_check_error_code( e_val ); } blis-0.6.1/frame/base/proj/old/bli_proj_check.h000066400000000000000000000034241360743507500213260ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_projm_check ( obj_t* a, obj_t* b ); void bli_projv_check ( obj_t* x, obj_t* y ); blis-0.6.1/frame/compat/000077500000000000000000000000001360743507500150365ustar00rootroot00000000000000blis-0.6.1/frame/compat/attic/000077500000000000000000000000001360743507500161425ustar00rootroot00000000000000blis-0.6.1/frame/compat/attic/bla_gbmv.c000066400000000000000000000063111360743507500200600ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define BLAS-to-BLIS interfaces. // #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ void PASTEF77(ch,blasname)( \ f77_char* transa, \ f77_int* m, \ f77_int* n, \ f77_int* kl, \ f77_int* ku, \ ftype* alpha, \ ftype* a, f77_int* lda, \ ftype* x, f77_int* incx, \ ftype* beta, \ ftype* y, f77_int* incy \ ) \ { \ trans_t blis_transa; \ dim_t m0, n0; \ dim_t m_y, n_x; \ ftype* x0; \ ftype* y0; \ inc_t incx0; \ inc_t incy0; \ \ /* Map BLAS chars to their corresponding BLIS enumerated type value. */ \ bli_param_map_netlib_to_blis_trans( *transa, &blis_transa ); \ \ /* Convert/typecast negative values of m and n to zero. */ \ bli_convert_blas_dim1( *m, m0 ); \ bli_convert_blas_dim1( *n, n0 ); \ \ /* Determine the dimensions of x and y so we can adjust the increments, if necessary.*/ \ bli_set_dims_with_trans( blis_transa, m0, n0, &m_y, &n_x ); \ \ /* If the input increments are negative, adjust the pointers so we can use positive increments instead. */ \ bli_convert_blas_incv( n_x, x, *incx, x0, incx0 ); \ bli_convert_blas_incv( m_y, y, *incy, y0, incy0 ); \ \ bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); \ } #ifdef BLIS_ENABLE_BLAS INSERT_GENTFUNC_BLAS( gbmv, gbmv ) #endif blis-0.6.1/frame/compat/attic/bla_gbmv.h000066400000000000000000000045451360743507500200740ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-to-BLIS interfaces. // #undef GENTPROT #define GENTPROT( ftype, ch, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname)( \ f77_char* transa, \ f77_int* m, \ f77_int* n, \ f77_int* kl, \ f77_int* ku, \ ftype* alpha, \ ftype* a, f77_int* lda, \ ftype* x, f77_int* incx, \ ftype* beta, \ ftype* y, f77_int* incy \ ); #ifdef BLIS_ENABLE_BLAS INSERT_GENTPROT_BLAS( gbmv ) #endif blis-0.6.1/frame/compat/attic/bla_hbmv.c000066400000000000000000000056331360743507500200670ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define BLAS-to-BLIS interfaces. // #undef GENTFUNCCO #define GENTFUNCCO( ftype, ftype_r, ch, chr, blasname, blisname ) \ \ void PASTEF77(ch,blasname)( \ f77_char* uploa, \ f77_int* m, \ f77_int* k, \ ftype* alpha, \ ftype* a, f77_int* lda, \ ftype* x, f77_int* incx, \ ftype* beta, \ ftype* y, f77_int* incy \ ) \ { \ uplo_t blis_uploa; \ dim_t m0; \ ftype* x0; \ ftype* y0; \ inc_t incx0; \ inc_t incy0; \ \ /* Map BLAS chars to their corresponding BLIS enumerated type value. */ \ bli_param_map_netlib_to_blis_uplo( *uploa, &blis_uploa ); \ \ /* Convert/typecast negative values of m to zero. */ \ bli_convert_blas_dim1( *m, m0 ); \ \ /* If the input increments are negative, adjust the pointers so we can use positive increments instead. */ \ bli_convert_blas_incv( m0, x, *incx, x0, incx0 ); \ bli_convert_blas_incv( m0, y, *incy, y0, incy0 ); \ \ bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); \ } #ifdef BLIS_ENABLE_BLAS INSERT_GENTFUNCCO_BLAS( hbmv, hbmv ) #endif blis-0.6.1/frame/compat/attic/bla_hbmv.h000066400000000000000000000044401360743507500200670ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-to-BLIS interfaces. // #undef GENTPROTCO #define GENTPROTCO( ftype, ftype_r, ch, chr, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname)( \ f77_char* uploa, \ f77_int* m, \ f77_int* k, \ ftype* alpha, \ ftype* a, f77_int* lda, \ ftype* x, f77_int* incx, \ ftype* beta, \ ftype* y, f77_int* incy \ ); #ifdef BLIS_ENABLE_BLAS INSERT_GENTPROTCO_BLAS( hbmv ) #endif blis-0.6.1/frame/compat/attic/bla_hpmv.c000066400000000000000000000055421360743507500201040ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define BLAS-to-BLIS interfaces. // #undef GENTFUNCCO #define GENTFUNCCO( ftype, ftype_r, ch, chr, blasname, blisname ) \ \ void PASTEF77(ch,blasname)( \ f77_char* uploa, \ f77_int* m, \ ftype* alpha, \ ftype* a, \ ftype* x, f77_int* incx, \ ftype* beta, \ ftype* y, f77_int* incy \ ) \ { \ uplo_t blis_uploa; \ dim_t m0; \ ftype* x0; \ ftype* y0; \ inc_t incx0; \ inc_t incy0; \ \ /* Map BLAS chars to their corresponding BLIS enumerated type value. */ \ bli_param_map_netlib_to_blis_uplo( *uploa, &blis_uploa ); \ \ /* Convert/typecast negative values of m to zero. */ \ bli_convert_blas_dim1( *m, m0 ); \ \ /* If the input increments are negative, adjust the pointers so we can use positive increments instead. */ \ bli_convert_blas_incv( m0, x, *incx, x0, incx0 ); \ bli_convert_blas_incv( m0, y, *incy, y0, incy0 ); \ \ bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); \ } #ifdef BLIS_ENABLE_BLAS INSERT_GENTFUNCCO_BLAS( hpmv, hpmv ) #endif blis-0.6.1/frame/compat/attic/bla_hpmv.h000066400000000000000000000043471360743507500201130ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-to-BLIS interfaces. // #undef GENTPROTCO #define GENTPROTCO( ftype, ftype_r, ch, chr, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname)( \ f77_char* uploa, \ f77_int* m, \ ftype* alpha, \ ftype* a, \ ftype* x, f77_int* incx, \ ftype* beta, \ ftype* y, f77_int* incy \ ); #ifdef BLIS_ENABLE_BLAS INSERT_GENTPROTCO_BLAS( hpmv ) #endif blis-0.6.1/frame/compat/attic/bla_hpr.c000066400000000000000000000052421360743507500177200ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define BLAS-to-BLIS interfaces. // #undef GENTFUNCCO #define GENTFUNCCO( ftype, ftype_r, ch, chr, blasname, blisname ) \ \ void PASTEF77(ch,blasname)( \ f77_char* uploa, \ f77_int* m, \ ftype_r* alpha, \ ftype* x, f77_int* incx, \ ftype* a \ ) \ { \ uplo_t blis_uploa; \ dim_t m0; \ ftype* x0; \ inc_t incx0; \ \ /* Map BLAS chars to their corresponding BLIS enumerated type value. */ \ bli_param_map_netlib_to_blis_uplo( *uploa, &blis_uploa ); \ \ /* Convert/typecast negative values of m to zero. */ \ bli_convert_blas_dim1( *m, m0 ); \ \ /* If the input increments are negative, adjust the pointers so we can use positive increments instead. */ \ bli_convert_blas_incv( m0, x, *incx, x0, incx0 ); \ \ bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); \ } #ifdef BLIS_ENABLE_BLAS INSERT_GENTFUNCCO_BLAS( hpr, hpr ) #endif blis-0.6.1/frame/compat/attic/bla_hpr.h000066400000000000000000000041761360743507500177320ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-to-BLIS interfaces. // #undef GENTPROTCO #define GENTPROTCO( ftype, ftype_r, ch, chr, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname)( \ f77_char* uploa, \ f77_int* m, \ ftype_r* alpha, \ ftype* x, f77_int* incx, \ ftype* a \ ); #ifdef BLIS_ENABLE_BLAS INSERT_GENTPROTCO_BLAS( hpr ) #endif blis-0.6.1/frame/compat/attic/bla_hpr2.c000066400000000000000000000054641360743507500200100ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define BLAS-to-BLIS interfaces. // #undef GENTFUNCCO #define GENTFUNCCO( ftype, ftype_r, ch, chr, blasname, blisname ) \ \ void PASTEF77(ch,blasname)( \ f77_char* uploa, \ f77_int* m, \ ftype* alpha, \ ftype* x, f77_int* incx, \ ftype* y, f77_int* incy, \ ftype* a \ ) \ { \ uplo_t blis_uploa; \ dim_t m0; \ ftype* x0; \ ftype* y0; \ inc_t incx0; \ inc_t incy0; \ \ /* Map BLAS chars to their corresponding BLIS enumerated type value. */ \ bli_param_map_netlib_to_blis_uplo( *uploa, &blis_uploa ); \ \ /* Convert/typecast negative values of m to zero. */ \ bli_convert_blas_dim1( *m, m0 ); \ \ /* If the input increments are negative, adjust the pointers so we can use positive increments instead. */ \ bli_convert_blas_incv( m0, x, *incx, x0, incx0 ); \ bli_convert_blas_incv( m0, y, *incy, y0, incy0 ); \ \ bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); \ } #ifdef BLIS_ENABLE_BLAS INSERT_GENTFUNCCO_BLAS( hpr2, hpr2 ) #endif blis-0.6.1/frame/compat/attic/bla_hpr2.h000066400000000000000000000042711360743507500200100ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-to-BLIS interfaces. // #undef GENTPROTCO #define GENTPROTCO( ftype, ftype_r, ch, chr, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname)( \ f77_char* uploa, \ f77_int* m, \ ftype* alpha, \ ftype* x, f77_int* incx, \ ftype* y, f77_int* incy, \ ftype* a \ ); #ifdef BLIS_ENABLE_BLAS INSERT_GENTPROTCO_BLAS( hpr2 ) #endif blis-0.6.1/frame/compat/attic/bla_rot.c000066400000000000000000000052451360743507500177360ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define BLAS-to-BLIS interfaces. // #undef GENTFUNCR2 #define GENTFUNCR2( ftype_xy, ftype_r, chxy, chr, blasname, blisname ) \ \ void PASTEF772(chxy,chr,blasname)( \ f77_int* n, \ ftype_xy* x, f77_int* incx, \ ftype_xy* y, f77_int* incy, \ ftype_r* c, \ ftype_r* s \ ) \ { \ dim_t n0; \ ftype_xy* x0; \ ftype_xy* y0; \ inc_t incx0; \ inc_t incy0; \ \ /* Convert/typecast negative values of n to zero. */ \ bli_convert_blas_dim1( *n, n0 ); \ \ /* If the input increments are negative, adjust the pointers so we can use positive increments instead. */ \ bli_convert_blas_incv( n0, x, *incx, x0, incx0 ); \ bli_convert_blas_incv( n0, y, *incy, y0, incy0 ); \ \ bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); \ } #ifdef BLIS_ENABLE_BLAS INSERT_GENTFUNCR2_BLAS( rot, ROT_KERNEL ) #endif blis-0.6.1/frame/compat/attic/bla_rot.h000066400000000000000000000042731360743507500177430ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-to-BLIS interfaces. // #undef GENTPROTR2 #define GENTPROTR2( ftype_xy, ftype_r, chxy, chr, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF772(chxy,chr,blasname)( \ f77_int* n, \ ftype_xy* x, f77_int* incx, \ ftype_xy* y, f77_int* incy, \ ftype_r* c, \ ftype_r* s \ ); #ifdef BLIS_ENABLE_BLAS INSERT_GENTPROTR2_BLAS( rot ) #endif blis-0.6.1/frame/compat/attic/bla_rotg.c000066400000000000000000000042471360743507500201060ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define BLAS-to-BLIS interfaces. // #undef GENTFUNCR #define GENTFUNCR( ftype_xy, ftype_r, chxy, chr, blasname, blisname ) \ \ void PASTEF77(chxy,blasname)( \ ftype_xy* x, \ ftype_xy* y, \ ftype_r* c, \ ftype_r* s \ ) \ { \ bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); \ } #ifdef BLIS_ENABLE_BLAS INSERT_GENTFUNCR_BLAS( rotg, rotg, ROTG_KERNEL ) #endif blis-0.6.1/frame/compat/attic/bla_rotg.h000066400000000000000000000041251360743507500201060ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-to-BLIS interfaces. // #undef GENTPROTR #define GENTPROTR( ftype_xy, ftype_r, chxy, chr, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(chxy,blasname)( \ ftype_xy* x, \ ftype_xy* y, \ ftype_r* c, \ ftype_r* s \ ); #ifdef BLIS_ENABLE_BLAS INSERT_GENTPROTR_BLAS( rotg, rotg ) #endif blis-0.6.1/frame/compat/attic/bla_rotm.c000066400000000000000000000050521360743507500201070ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define BLAS-to-BLIS interfaces. // #undef GENTFUNCRO #define GENTFUNCRO( ftype, ch, blasname, blisname ) \ \ void PASTEF77(ch,blasname)( \ f77_int* n, \ ftype* x, f77_int* incx, \ ftype* y, f77_int* incy, \ ftype* dparam \ ) \ { \ dim_t n0; \ ftype* x0; \ ftype* y0; \ inc_t incx0; \ inc_t incy0; \ \ /* Convert/typecast negative values of n to zero. */ \ bli_convert_blas_dim1( *n, n0 ); \ \ /* If the input increments are negative, adjust the pointers so we can use positive increments instead. */ \ bli_convert_blas_incv( n0, x, *incx, x0, incx0 ); \ bli_convert_blas_incv( n0, y, *incy, y0, incy0 ); \ \ bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); \ } #ifdef BLIS_ENABLE_BLAS INSERT_GENTFUNCRO_BLAS( rotm, ROTM_KERNEL ) #endif blis-0.6.1/frame/compat/attic/bla_rotm.h000066400000000000000000000041161360743507500201140ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-to-BLIS interfaces. // #undef GENTPROTRO #define GENTPROTRO( ftype, ch, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname)( \ f77_int* n, \ ftype* x, f77_int* incx, \ ftype* y, f77_int* incy, \ ftype* dparam \ ); #ifdef BLIS_ENABLE_BLAS INSERT_GENTPROTRO_BLAS( rotm ) #endif blis-0.6.1/frame/compat/attic/bla_rotmg.c000066400000000000000000000042531360743507500202600ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define BLAS-to-BLIS interfaces. // #undef GENTFUNCRO #define GENTFUNCRO( ftype, ch, blasname, blisname ) \ \ void PASTEF77(ch,blasname)( \ ftype* d1, \ ftype* d2, \ ftype* x, \ ftype* y, \ ftype* dparam \ ) \ { \ bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); \ } #ifdef BLIS_ENABLE_BLAS INSERT_GENTFUNCRO_BLAS( rotmg, ROTMG_KERNEL ) #endif blis-0.6.1/frame/compat/attic/bla_rotmg.h000066400000000000000000000041301360743507500202570ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-to-BLIS interfaces. // #undef GENTPROTRO #define GENTPROTRO( ftype, ch, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname)( \ ftype* d1, \ ftype* d2, \ ftype* x, \ ftype* y, \ ftype* dparam \ ); #ifdef BLIS_ENABLE_BLAS INSERT_GENTPROTRO_BLAS( rotmg ) #endif blis-0.6.1/frame/compat/attic/bla_sbmv.c000066400000000000000000000056151360743507500201020ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define BLAS-to-BLIS interfaces. // #undef GENTFUNCRO #define GENTFUNCRO( ftype, ch, blasname, blisname ) \ \ void PASTEF77(ch,blasname)( \ f77_char* uploa, \ f77_int* m, \ f77_int* k, \ ftype* alpha, \ ftype* a, f77_int* lda, \ ftype* x, f77_int* incx, \ ftype* beta, \ ftype* y, f77_int* incy \ ) \ { \ uplo_t blis_uploa; \ dim_t m0; \ ftype* x0; \ ftype* y0; \ inc_t incx0; \ inc_t incy0; \ \ /* Map BLAS chars to their corresponding BLIS enumerated type value. */ \ bli_param_map_netlib_to_blis_uplo( *uploa, &blis_uploa ); \ \ /* Convert/typecast negative values of m to zero. */ \ bli_convert_blas_dim1( *m, m0 ); \ \ /* If the input increments are negative, adjust the pointers so we can use positive increments instead. */ \ bli_convert_blas_incv( m0, x, *incx, x0, incx0 ); \ bli_convert_blas_incv( m0, y, *incy, y0, incy0 ); \ \ bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); \ } #ifdef BLIS_ENABLE_BLAS INSERT_GENTFUNCRO_BLAS( sbmv, sbmv ) #endif blis-0.6.1/frame/compat/attic/bla_sbmv.h000066400000000000000000000044221360743507500201020ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-to-BLIS interfaces. // #undef GENTPROTRO #define GENTPROTRO( ftype, ch, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname)( \ f77_char* uploa, \ f77_int* m, \ f77_int* k, \ ftype* alpha, \ ftype* a, f77_int* lda, \ ftype* x, f77_int* incx, \ ftype* beta, \ ftype* y, f77_int* incy \ ); #ifdef BLIS_ENABLE_BLAS INSERT_GENTPROTRO_BLAS( sbmv ) #endif blis-0.6.1/frame/compat/attic/bla_spmv.c000066400000000000000000000055241360743507500201170ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define BLAS-to-BLIS interfaces. // #undef GENTFUNCRO #define GENTFUNCRO( ftype, ch, blasname, blisname ) \ \ void PASTEF77(ch,blasname)( \ f77_char* uploa, \ f77_int* m, \ ftype* alpha, \ ftype* a, \ ftype* x, f77_int* incx, \ ftype* beta, \ ftype* y, f77_int* incy \ ) \ { \ uplo_t blis_uploa; \ dim_t m0; \ ftype* x0; \ ftype* y0; \ inc_t incx0; \ inc_t incy0; \ \ /* Map BLAS chars to their corresponding BLIS enumerated type value. */ \ bli_param_map_netlib_to_blis_uplo( *uploa, &blis_uploa ); \ \ /* Convert/typecast negative values of m to zero. */ \ bli_convert_blas_dim1( *m, m0 ); \ \ /* If the input increments are negative, adjust the pointers so we can use positive increments instead. */ \ bli_convert_blas_incv( m0, x, *incx, x0, incx0 ); \ bli_convert_blas_incv( m0, y, *incy, y0, incy0 ); \ \ bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); \ } #ifdef BLIS_ENABLE_BLAS INSERT_GENTFUNCRO_BLAS( spmv, spmv ) #endif blis-0.6.1/frame/compat/attic/bla_spmv.h000066400000000000000000000043311360743507500201170ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-to-BLIS interfaces. // #undef GENTPROTRO #define GENTPROTRO( ftype, ch, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname)( \ f77_char* uploa, \ f77_int* m, \ ftype* alpha, \ ftype* a, \ ftype* x, f77_int* incx, \ ftype* beta, \ ftype* y, f77_int* incy \ ); #ifdef BLIS_ENABLE_BLAS INSERT_GENTPROTRO_BLAS( spmv ) #endif blis-0.6.1/frame/compat/attic/bla_spr.c000066400000000000000000000052241360743507500177330ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define BLAS-to-BLIS interfaces. // #undef GENTFUNCRO #define GENTFUNCRO( ftype, ch, blasname, blisname ) \ \ void PASTEF77(ch,blasname)( \ f77_char* uploa, \ f77_int* m, \ ftype* alpha, \ ftype* x, f77_int* incx, \ ftype* a \ ) \ { \ uplo_t blis_uploa; \ dim_t m0; \ ftype* x0; \ inc_t incx0; \ \ /* Map BLAS chars to their corresponding BLIS enumerated type value. */ \ bli_param_map_netlib_to_blis_uplo( *uploa, &blis_uploa ); \ \ /* Convert/typecast negative values of m to zero. */ \ bli_convert_blas_dim1( *m, m0 ); \ \ /* If the input increments are negative, adjust the pointers so we can use positive increments instead. */ \ bli_convert_blas_incv( m0, x, *incx, x0, incx0 ); \ \ bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); \ } #ifdef BLIS_ENABLE_BLAS INSERT_GENTFUNCRO_BLAS( spr, spr ) #endif blis-0.6.1/frame/compat/attic/bla_spr.h000066400000000000000000000041601360743507500177360ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-to-BLIS interfaces. // #undef GENTPROTRO #define GENTPROTRO( ftype, ch, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname)( \ f77_char* uploa, \ f77_int* m, \ ftype* alpha, \ ftype* x, f77_int* incx, \ ftype* a \ ); #ifdef BLIS_ENABLE_BLAS INSERT_GENTPROTRO_BLAS( spr ) #endif blis-0.6.1/frame/compat/attic/bla_spr2.c000066400000000000000000000054461360743507500200230ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define BLAS-to-BLIS interfaces. // #undef GENTFUNCRO #define GENTFUNCRO( ftype, ch, blasname, blisname ) \ \ void PASTEF77(ch,blasname)( \ f77_char* uploa, \ f77_int* m, \ ftype* alpha, \ ftype* x, f77_int* incx, \ ftype* y, f77_int* incy, \ ftype* a \ ) \ { \ uplo_t blis_uploa; \ dim_t m0; \ ftype* x0; \ ftype* y0; \ inc_t incx0; \ inc_t incy0; \ \ /* Map BLAS chars to their corresponding BLIS enumerated type value. */ \ bli_param_map_netlib_to_blis_uplo( *uploa, &blis_uploa ); \ \ /* Convert/typecast negative values of m to zero. */ \ bli_convert_blas_dim1( *m, m0 ); \ \ /* If the input increments are negative, adjust the pointers so we can use positive increments instead. */ \ bli_convert_blas_incv( m0, x, *incx, x0, incx0 ); \ bli_convert_blas_incv( m0, y, *incy, y0, incy0 ); \ \ bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); \ } #ifdef BLIS_ENABLE_BLAS INSERT_GENTFUNCRO_BLAS( spr2, spr2 ) #endif blis-0.6.1/frame/compat/attic/bla_spr2.h000066400000000000000000000042531360743507500200230ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-to-BLIS interfaces. // #undef GENTPROTRO #define GENTPROTRO( ftype, ch, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname)( \ f77_char* uploa, \ f77_int* m, \ ftype* alpha, \ ftype* x, f77_int* incx, \ ftype* y, f77_int* incy, \ ftype* a \ ); #ifdef BLIS_ENABLE_BLAS INSERT_GENTPROTRO_BLAS( spr2 ) #endif blis-0.6.1/frame/compat/attic/bla_tbmv.c000066400000000000000000000056451360743507500201060ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define BLAS-to-BLIS interfaces. // #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ void PASTEF77(ch,blasname)( \ f77_char* uploa, \ f77_char* transa, \ f77_char* diaga, \ f77_int* m, \ f77_int* k, \ ftype* a, f77_int* lda, \ ftype* x, f77_int* incx \ ) \ { \ uplo_t blis_uploa; \ trans_t blis_transa; \ diag_t blis_diaga; \ dim_t m0; \ ftype* x0; \ inc_t incx0; \ \ /* Map BLAS chars to their corresponding BLIS enumerated type value. */ \ bli_param_map_netlib_to_blis_uplo( *uploa, &blis_uploa ); \ bli_param_map_netlib_to_blis_trans( *transa, &blis_transa ); \ bli_param_map_netlib_to_blis_diag( *diaga, &blis_diaga ); \ \ /* Convert/typecast negative values of m to zero. */ \ bli_convert_blas_dim1( *m, m0 ); \ \ /* If the input increments are negative, adjust the pointers so we can use positive increments instead. */ \ bli_convert_blas_incv( m0, x, *incx, x0, incx0 ); \ \ bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); \ } #ifdef BLIS_ENABLE_BLAS INSERT_GENTFUNC_BLAS( tbmv, tbmv ) #endif blis-0.6.1/frame/compat/attic/bla_tbmv.h000066400000000000000000000043241360743507500201040ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-to-BLIS interfaces. // #undef GENTPROT #define GENTPROT( ftype, ch, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname)( \ f77_char* uploa, \ f77_char* transa, \ f77_char* diaga, \ f77_int* m, \ f77_int* k, \ ftype* a, f77_int* lda, \ ftype* x, f77_int* incx \ ); #ifdef BLIS_ENABLE_BLAS INSERT_GENTPROT_BLAS( tbmv ) #endif blis-0.6.1/frame/compat/attic/bla_tbsv.c000066400000000000000000000056451360743507500201140ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define BLAS-to-BLIS interfaces. // #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ void PASTEF77(ch,blasname)( \ f77_char* uploa, \ f77_char* transa, \ f77_char* diaga, \ f77_int* m, \ f77_int* k, \ ftype* a, f77_int* lda, \ ftype* x, f77_int* incx \ ) \ { \ uplo_t blis_uploa; \ trans_t blis_transa; \ diag_t blis_diaga; \ dim_t m0; \ ftype* x0; \ inc_t incx0; \ \ /* Map BLAS chars to their corresponding BLIS enumerated type value. */ \ bli_param_map_netlib_to_blis_uplo( *uploa, &blis_uploa ); \ bli_param_map_netlib_to_blis_trans( *transa, &blis_transa ); \ bli_param_map_netlib_to_blis_diag( *diaga, &blis_diaga ); \ \ /* Convert/typecast negative values of m to zero. */ \ bli_convert_blas_dim1( *m, m0 ); \ \ /* If the input increments are negative, adjust the pointers so we can use positive increments instead. */ \ bli_convert_blas_incv( m0, x, *incx, x0, incx0 ); \ \ bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); \ } #ifdef BLIS_ENABLE_BLAS INSERT_GENTFUNC_BLAS( tbsv, tbsv ) #endif blis-0.6.1/frame/compat/attic/bla_tbsv.h000066400000000000000000000043241360743507500201120ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-to-BLIS interfaces. // #undef GENTPROT #define GENTPROT( ftype, ch, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname)( \ f77_char* uploa, \ f77_char* transa, \ f77_char* diaga, \ f77_int* m, \ f77_int* k, \ ftype* a, f77_int* lda, \ ftype* x, f77_int* incx \ ); #ifdef BLIS_ENABLE_BLAS INSERT_GENTPROT_BLAS( tbsv ) #endif blis-0.6.1/frame/compat/attic/bla_tpmv.c000066400000000000000000000055541360743507500201230ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define BLAS-to-BLIS interfaces. // #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ void PASTEF77(ch,blasname)( \ f77_char* uploa, \ f77_char* transa, \ f77_char* diaga, \ f77_int* m, \ ftype* a, \ ftype* x, f77_int* incx \ ) \ { \ uplo_t blis_uploa; \ trans_t blis_transa; \ diag_t blis_diaga; \ dim_t m0; \ ftype* x0; \ inc_t incx0; \ \ /* Map BLAS chars to their corresponding BLIS enumerated type value. */ \ bli_param_map_netlib_to_blis_uplo( *uploa, &blis_uploa ); \ bli_param_map_netlib_to_blis_trans( *transa, &blis_transa ); \ bli_param_map_netlib_to_blis_diag( *diaga, &blis_diaga ); \ \ /* Convert/typecast negative values of m to zero. */ \ bli_convert_blas_dim1( *m, m0 ); \ \ /* If the input increments are negative, adjust the pointers so we can use positive increments instead. */ \ bli_convert_blas_incv( m0, x, *incx, x0, incx0 ); \ \ bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); \ } #ifdef BLIS_ENABLE_BLAS INSERT_GENTFUNC_BLAS( tpmv, tpmv ) #endif blis-0.6.1/frame/compat/attic/bla_tpmv.h000066400000000000000000000042331360743507500201210ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-to-BLIS interfaces. // #undef GENTPROT #define GENTPROT( ftype, ch, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname)( \ f77_char* uploa, \ f77_char* transa, \ f77_char* diaga, \ f77_int* m, \ ftype* a, \ ftype* x, f77_int* incx \ ); #ifdef BLIS_ENABLE_BLAS INSERT_GENTPROT_BLAS( tpmv ) #endif blis-0.6.1/frame/compat/attic/bla_tpsv.c000066400000000000000000000055541360743507500201310ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define BLAS-to-BLIS interfaces. // #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ void PASTEF77(ch,blasname)( \ f77_char* uploa, \ f77_char* transa, \ f77_char* diaga, \ f77_int* m, \ ftype* a, \ ftype* x, f77_int* incx \ ) \ { \ uplo_t blis_uploa; \ trans_t blis_transa; \ diag_t blis_diaga; \ dim_t m0; \ ftype* x0; \ inc_t incx0; \ \ /* Map BLAS chars to their corresponding BLIS enumerated type value. */ \ bli_param_map_netlib_to_blis_uplo( *uploa, &blis_uploa ); \ bli_param_map_netlib_to_blis_trans( *transa, &blis_transa ); \ bli_param_map_netlib_to_blis_diag( *diaga, &blis_diaga ); \ \ /* Convert/typecast negative values of m to zero. */ \ bli_convert_blas_dim1( *m, m0 ); \ \ /* If the input increments are negative, adjust the pointers so we can use positive increments instead. */ \ bli_convert_blas_incv( m0, x, *incx, x0, incx0 ); \ \ bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); \ } #ifdef BLIS_ENABLE_BLAS INSERT_GENTFUNC_BLAS( tpsv, tpsv ) #endif blis-0.6.1/frame/compat/attic/bla_tpsv.h000066400000000000000000000042331360743507500201270ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-to-BLIS interfaces. // #undef GENTPROT #define GENTPROT( ftype, ch, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname)( \ f77_char* uploa, \ f77_char* transa, \ f77_char* diaga, \ f77_int* m, \ ftype* a, \ ftype* x, f77_int* incx \ ); #ifdef BLIS_ENABLE_BLAS INSERT_GENTPROT_BLAS( tpsv ) #endif blis-0.6.1/frame/compat/bla_amax.c000066400000000000000000000061371360743507500167550ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define BLAS-to-BLIS interfaces. // #undef GENTFUNC #define GENTFUNC( ftype_x, chx, blasname, blisname ) \ \ f77_int PASTEF772(i,chx,blasname) \ ( \ const f77_int* n, \ const ftype_x* x, const f77_int* incx \ ) \ { \ dim_t n0; \ ftype_x* x0; \ inc_t incx0; \ gint_t bli_index; \ f77_int f77_index; \ \ /* If the vector is empty, return an index of zero. This early check is needed to emulate netlib BLAS. Without it, bli_?amaxv() will return 0, which ends up getting incremented to 1 (below) before being returned, which is not what we want. */ \ if ( *n < 1 || *incx <= 0 ) return 0; \ \ /* Initialize BLIS. */ \ bli_init_auto(); \ \ /* Convert/typecast negative values of n to zero. */ \ bli_convert_blas_dim1( *n, n0 ); \ \ /* If the input increments are negative, adjust the pointers so we can use positive increments instead. */ \ bli_convert_blas_incv( n0, (ftype_x*)x, *incx, x0, incx0 ); \ \ /* Call BLIS interface. */ \ PASTEMAC2(chx,blisname,BLIS_TAPI_EX_SUF) \ ( \ n0, \ x0, incx0, \ &bli_index, \ NULL, \ NULL \ ); \ \ /* Convert zero-based BLIS (C) index to one-based BLAS (Fortran) index. Also, if the BLAS integer size differs from the BLIS integer size, that typecast occurs here. */ \ f77_index = bli_index + 1; \ \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ \ return f77_index; \ } #ifdef BLIS_ENABLE_BLAS INSERT_GENTFUNC_BLAS( amax, amaxv ) #endif blis-0.6.1/frame/compat/bla_amax.h000066400000000000000000000037051360743507500167600ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-to-BLIS interfaces. // #undef GENTPROT #define GENTPROT( ftype_x, chx, blasname ) \ \ BLIS_EXPORT_BLAS f77_int PASTEF772(i,chx,blasname) \ ( \ const f77_int* n, \ const ftype_x* x, const f77_int* incx \ ); #ifdef BLIS_ENABLE_BLAS INSERT_GENTPROT_BLAS( amax ) #endif blis-0.6.1/frame/compat/bla_asum.c000066400000000000000000000051161360743507500167700ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define BLAS-to-BLIS interfaces. // #undef GENTFUNCR2 #define GENTFUNCR2( ftype_x, ftype_r, chx, chr, blasname, blisname ) \ \ ftype_r PASTEF772(chr,chx,blasname) \ ( \ const f77_int* n, \ const ftype_x* x, const f77_int* incx \ ) \ { \ dim_t n0; \ ftype_x* x0; \ inc_t incx0; \ ftype_r asum; \ \ /* Initialize BLIS. */ \ bli_init_auto(); \ \ /* Convert/typecast negative values of n to zero. */ \ bli_convert_blas_dim1( *n, n0 ); \ \ /* If the input increments are negative, adjust the pointers so we can use positive increments instead. */ \ bli_convert_blas_incv( n0, (ftype_x*)x, *incx, x0, incx0 ); \ \ /* Call BLIS interface. */ \ PASTEMAC2(chx,blisname,BLIS_TAPI_EX_SUF) \ ( \ n0, \ x0, incx0, \ &asum, \ NULL, \ NULL \ ); \ \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ \ return asum; \ } #ifdef BLIS_ENABLE_BLAS INSERT_GENTFUNCR2_BLAS( asum, asumv ) #endif blis-0.6.1/frame/compat/bla_asum.h000066400000000000000000000037331360743507500170000ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-to-BLIS interfaces. // #undef GENTPROTR2 #define GENTPROTR2( ftype_x, ftype_r, chx, chr, blasname ) \ \ BLIS_EXPORT_BLAS ftype_r PASTEF772(chr,chx,blasname) \ ( \ const f77_int* n, \ const ftype_x* x, const f77_int* incx \ ); #ifdef BLIS_ENABLE_BLAS INSERT_GENTPROTR2_BLAS( asum ) #endif blis-0.6.1/frame/compat/bla_axpy.c000066400000000000000000000053341360743507500170060ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define BLAS-to-BLIS interfaces. // #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ void PASTEF77(ch,blasname) \ ( \ const f77_int* n, \ const ftype* alpha, \ const ftype* x, const f77_int* incx, \ ftype* y, const f77_int* incy \ ) \ { \ dim_t n0; \ ftype* x0; \ ftype* y0; \ inc_t incx0; \ inc_t incy0; \ \ /* Initialize BLIS. */ \ bli_init_auto(); \ \ /* Convert/typecast negative values of n to zero. */ \ bli_convert_blas_dim1( *n, n0 ); \ \ /* If the input increments are negative, adjust the pointers so we can use positive increments instead. */ \ bli_convert_blas_incv( n0, (ftype*)x, *incx, x0, incx0 ); \ bli_convert_blas_incv( n0, (ftype*)y, *incy, y0, incy0 ); \ \ /* Call BLIS interface. */ \ PASTEMAC2(ch,blisname,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ n0, \ (ftype*)alpha, \ x0, incx0, \ y0, incy0, \ NULL, \ NULL \ ); \ \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ } #ifdef BLIS_ENABLE_BLAS INSERT_GENTFUNC_BLAS( axpy, axpyv ) #endif blis-0.6.1/frame/compat/bla_axpy.h000066400000000000000000000040111360743507500170020ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-to-BLIS interfaces. // #undef GENTPROT #define GENTPROT( ftype, ch, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname) \ ( \ const f77_int* n, \ const ftype* alpha, \ const ftype* x, const f77_int* incx, \ ftype* y, const f77_int* incy \ ); #ifdef BLIS_ENABLE_BLAS INSERT_GENTPROT_BLAS( axpy ) #endif blis-0.6.1/frame/compat/bla_copy.c000066400000000000000000000052511360743507500167750ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define BLAS-to-BLIS interfaces. // #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ void PASTEF77(ch,blasname) \ ( \ const f77_int* n, \ const ftype* x, const f77_int* incx, \ ftype* y, const f77_int* incy \ ) \ { \ dim_t n0; \ ftype* x0; \ ftype* y0; \ inc_t incx0; \ inc_t incy0; \ \ /* Initialize BLIS. */ \ bli_init_auto(); \ \ /* Convert/typecast negative values of n to zero. */ \ bli_convert_blas_dim1( *n, n0 ); \ \ /* If the input increments are negative, adjust the pointers so we can use positive increments instead. */ \ bli_convert_blas_incv( n0, (ftype*)x, *incx, x0, incx0 ); \ bli_convert_blas_incv( n0, (ftype*)y, *incy, y0, incy0 ); \ \ /* Call BLIS interface. */ \ PASTEMAC2(ch,blisname,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ n0, \ x0, incx0, \ y0, incy0, \ NULL, \ NULL \ ); \ \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ } #ifdef BLIS_ENABLE_BLAS INSERT_GENTFUNC_BLAS( copy, copyv ) #endif blis-0.6.1/frame/compat/bla_copy.h000066400000000000000000000037521360743507500170060ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-to-BLIS interfaces. // #undef GENTPROT #define GENTPROT( ftype, ch, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname) \ ( \ const f77_int* n, \ const ftype* x, const f77_int* incx, \ ftype* y, const f77_int* incy \ ); #ifdef BLIS_ENABLE_BLAS INSERT_GENTPROT_BLAS( copy ) #endif blis-0.6.1/frame/compat/bla_dot.c000066400000000000000000000104561360743507500166140ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define BLAS-to-BLIS interfaces. // #undef GENTFUNCDOT #define GENTFUNCDOT( ftype, ch, chc, blis_conjx, blasname, blisname ) \ \ ftype PASTEF772(ch,blasname,chc) \ ( \ const f77_int* n, \ const ftype* x, const f77_int* incx, \ const ftype* y, const f77_int* incy \ ) \ { \ dim_t n0; \ ftype* x0; \ ftype* y0; \ inc_t incx0; \ inc_t incy0; \ ftype rho; \ \ /* Initialize BLIS. */ \ bli_init_auto(); \ \ /* Convert/typecast negative values of n to zero. */ \ bli_convert_blas_dim1( *n, n0 ); \ \ /* If the input increments are negative, adjust the pointers so we can use positive increments instead. */ \ bli_convert_blas_incv( n0, (ftype*)x, *incx, x0, incx0 ); \ bli_convert_blas_incv( n0, (ftype*)y, *incy, y0, incy0 ); \ \ /* Call BLIS interface. */ \ PASTEMAC2(ch,blisname,BLIS_TAPI_EX_SUF) \ ( \ blis_conjx, \ BLIS_NO_CONJUGATE, \ n0, \ x0, incx0, \ y0, incy0, \ &rho, \ NULL, \ NULL \ ); \ \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ \ return rho; \ } #ifdef BLIS_ENABLE_BLAS INSERT_GENTFUNCDOT_BLAS( dot, dotv ) // -- "Black sheep" dot product function definitions -- // Input vectors stored in single precision, computed in double precision, // with result returned in single precision. float PASTEF77(sd,sdot) ( const f77_int* n, const float* sb, const float* x, const f77_int* incx, const float* y, const f77_int* incy ) { return ( float ) ( ( double )(*sb) + PASTEF77(d,sdot) ( n, x, incx, y, incy ) ); } // Input vectors stored in single precision, computed in double precision, // with result returned in double precision. double PASTEF77(d,sdot) ( const f77_int* n, const float* x, const f77_int* incx, const float* y, const f77_int* incy ) { dim_t n0; float* x0; float* y0; inc_t incx0; inc_t incy0; double rho; dim_t i; /* Initialization of BLIS is not required. */ /* Convert/typecast negative values of n to zero. */ bli_convert_blas_dim1( *n, n0 ); /* If the input increments are negative, adjust the pointers so we can use positive increments instead. */ bli_convert_blas_incv( n0, (float*)x, *incx, x0, incx0 ); bli_convert_blas_incv( n0, (float*)y, *incy, y0, incy0 ); rho = 0.0; for ( i = 0; i < n0; i++ ) { float* chi1 = x0 + (i )*incx0; float* psi1 = y0 + (i )*incy0; bli_ddots( (( double )(*chi1)), (( double )(*psi1)), rho ); } /* Finalization of BLIS is not required, because initialization was not required. */ return rho; } #endif blis-0.6.1/frame/compat/bla_dot.h000066400000000000000000000046601360743507500166210ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-to-BLIS interfaces. // #undef GENTPROTDOT #define GENTPROTDOT( ftype, ch, chc, blasname ) \ \ BLIS_EXPORT_BLAS ftype PASTEF772(ch,blasname,chc) \ ( \ const f77_int* n, \ const ftype* x, const f77_int* incx, \ const ftype* y, const f77_int* incy \ ); #ifdef BLIS_ENABLE_BLAS INSERT_GENTPROTDOT_BLAS( dot ) // -- "Black sheep" dot product function prototypes -- BLIS_EXPORT_BLAS float PASTEF77(sd,sdot) ( const f77_int* n, const float* sb, const float* x, const f77_int* incx, const float* y, const f77_int* incy ); BLIS_EXPORT_BLAS double PASTEF77(d,sdot) ( const f77_int* n, const float* x, const f77_int* incx, const float* y, const f77_int* incy ); #endif blis-0.6.1/frame/compat/bla_gemm.c000066400000000000000000000137371360743507500167600ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define BLAS-to-BLIS interfaces. // #ifdef BLIS_BLAS3_CALLS_TAPI #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ void PASTEF77(ch,blasname) \ ( \ const f77_char* transa, \ const f77_char* transb, \ const f77_int* m, \ const f77_int* n, \ const f77_int* k, \ const ftype* alpha, \ const ftype* a, const f77_int* lda, \ const ftype* b, const f77_int* ldb, \ const ftype* beta, \ ftype* c, const f77_int* ldc \ ) \ { \ trans_t blis_transa; \ trans_t blis_transb; \ dim_t m0, n0, k0; \ inc_t rs_a, cs_a; \ inc_t rs_b, cs_b; \ inc_t rs_c, cs_c; \ \ /* Initialize BLIS. */ \ bli_init_auto(); \ \ /* Perform BLAS parameter checking. */ \ PASTEBLACHK(blasname) \ ( \ MKSTR(ch), \ MKSTR(blasname), \ transa, \ transb, \ m, \ n, \ k, \ lda, \ ldb, \ ldc \ ); \ \ /* Map BLAS chars to their corresponding BLIS enumerated type value. */ \ bli_param_map_netlib_to_blis_trans( *transa, &blis_transa ); \ bli_param_map_netlib_to_blis_trans( *transb, &blis_transb ); \ \ /* Typecast BLAS integers to BLIS integers. */ \ bli_convert_blas_dim1( *m, m0 ); \ bli_convert_blas_dim1( *n, n0 ); \ bli_convert_blas_dim1( *k, k0 ); \ \ /* Set the row and column strides of the matrix operands. */ \ rs_a = 1; \ cs_a = *lda; \ rs_b = 1; \ cs_b = *ldb; \ rs_c = 1; \ cs_c = *ldc; \ \ /* Call BLIS interface. */ \ PASTEMAC2(ch,blisname,BLIS_TAPI_EX_SUF) \ ( \ blis_transa, \ blis_transb, \ m0, \ n0, \ k0, \ (ftype*)alpha, \ (ftype*)a, rs_a, cs_a, \ (ftype*)b, rs_b, cs_b, \ (ftype*)beta, \ (ftype*)c, rs_c, cs_c, \ NULL, \ NULL \ ); \ \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ } #else #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ void PASTEF77(ch,blasname) \ ( \ const f77_char* transa, \ const f77_char* transb, \ const f77_int* m, \ const f77_int* n, \ const f77_int* k, \ const ftype* alpha, \ const ftype* a, const f77_int* lda, \ const ftype* b, const f77_int* ldb, \ const ftype* beta, \ ftype* c, const f77_int* ldc \ ) \ { \ trans_t blis_transa; \ trans_t blis_transb; \ dim_t m0, n0, k0; \ \ /* Initialize BLIS. */ \ bli_init_auto(); \ \ /* Perform BLAS parameter checking. */ \ PASTEBLACHK(blasname) \ ( \ MKSTR(ch), \ MKSTR(blasname), \ transa, \ transb, \ m, \ n, \ k, \ lda, \ ldb, \ ldc \ ); \ \ /* Map BLAS chars to their corresponding BLIS enumerated type value. */ \ bli_param_map_netlib_to_blis_trans( *transa, &blis_transa ); \ bli_param_map_netlib_to_blis_trans( *transb, &blis_transb ); \ \ /* Typecast BLAS integers to BLIS integers. */ \ bli_convert_blas_dim1( *m, m0 ); \ bli_convert_blas_dim1( *n, n0 ); \ bli_convert_blas_dim1( *k, k0 ); \ \ /* Set the row and column strides of the matrix operands. */ \ const inc_t rs_a = 1; \ const inc_t cs_a = *lda; \ const inc_t rs_b = 1; \ const inc_t cs_b = *ldb; \ const inc_t rs_c = 1; \ const inc_t cs_c = *ldc; \ \ const num_t dt = PASTEMAC(ch,type); \ \ obj_t alphao = BLIS_OBJECT_INITIALIZER_1X1; \ obj_t ao = BLIS_OBJECT_INITIALIZER; \ obj_t bo = BLIS_OBJECT_INITIALIZER; \ obj_t betao = BLIS_OBJECT_INITIALIZER_1X1; \ obj_t co = BLIS_OBJECT_INITIALIZER; \ \ dim_t m0_a, n0_a; \ dim_t m0_b, n0_b; \ \ bli_set_dims_with_trans( blis_transa, m0, k0, &m0_a, &n0_a ); \ bli_set_dims_with_trans( blis_transb, k0, n0, &m0_b, &n0_b ); \ \ bli_obj_init_finish_1x1( dt, (ftype*)alpha, &alphao ); \ bli_obj_init_finish_1x1( dt, (ftype*)beta, &betao ); \ \ bli_obj_init_finish( dt, m0_a, n0_a, (ftype*)a, rs_a, cs_a, &ao ); \ bli_obj_init_finish( dt, m0_b, n0_b, (ftype*)b, rs_b, cs_b, &bo ); \ bli_obj_init_finish( dt, m0, n0, (ftype*)c, rs_c, cs_c, &co ); \ \ bli_obj_set_conjtrans( blis_transa, &ao ); \ bli_obj_set_conjtrans( blis_transb, &bo ); \ \ PASTEMAC(blisname,BLIS_OAPI_EX_SUF) \ ( \ &alphao, \ &ao, \ &bo, \ &betao, \ &co, \ NULL, \ NULL \ ); \ \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ } #endif #ifdef BLIS_ENABLE_BLAS INSERT_GENTFUNC_BLAS( gemm, gemm ) #endif blis-0.6.1/frame/compat/bla_gemm.h000066400000000000000000000043251360743507500167560ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-to-BLIS interfaces. // #undef GENTPROT #define GENTPROT( ftype, ch, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname) \ ( \ const f77_char* transa, \ const f77_char* transb, \ const f77_int* m, \ const f77_int* n, \ const f77_int* k, \ const ftype* alpha, \ const ftype* a, const f77_int* lda, \ const ftype* b, const f77_int* ldb, \ const ftype* beta, \ ftype* c, const f77_int* ldc \ ); #ifdef BLIS_ENABLE_BLAS INSERT_GENTPROT_BLAS( gemm ) #endif blis-0.6.1/frame/compat/bla_gemv.c000066400000000000000000000106631360743507500167640ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define BLAS-to-BLIS interfaces. // #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ void PASTEF77(ch,blasname) \ ( \ const f77_char* transa, \ const f77_int* m, \ const f77_int* n, \ const ftype* alpha, \ const ftype* a, const f77_int* lda, \ const ftype* x, const f77_int* incx, \ const ftype* beta, \ ftype* y, const f77_int* incy \ ) \ { \ trans_t blis_transa; \ dim_t m0, n0; \ dim_t m_y, n_x; \ ftype* x0; \ ftype* y0; \ inc_t incx0; \ inc_t incy0; \ inc_t rs_a, cs_a; \ \ /* Initialize BLIS. */ \ bli_init_auto(); \ \ /* Perform BLAS parameter checking. */ \ PASTEBLACHK(blasname) \ ( \ MKSTR(ch), \ MKSTR(blasname), \ transa, \ m, \ n, \ lda, \ incx, \ incy \ ); \ \ /* Map BLAS chars to their corresponding BLIS enumerated type value. */ \ bli_param_map_netlib_to_blis_trans( *transa, &blis_transa ); \ \ /* Convert/typecast negative values of m and n to zero. */ \ bli_convert_blas_dim1( *m, m0 ); \ bli_convert_blas_dim1( *n, n0 ); \ \ /* Determine the dimensions of x and y so we can adjust the increments, if necessary.*/ \ bli_set_dims_with_trans( blis_transa, m0, n0, &m_y, &n_x ); \ \ /* BLAS handles cases where trans(A) has no columns, and x has no elements, in a peculiar way. In these situations, BLAS returns without performing any action, even though most sane interpretations of gemv would have the the operation reduce to y := beta * y. Here, we catch those cases that BLAS would normally mishandle and emulate the BLAS exactly so as to provide "bug-for-bug" compatibility. Note that this extreme level of compatibility would not be as much of an issue if it weren't for the fact that some BLAS test suites actually test for these cases. Also, it should be emphasized that BLIS, if called natively, does NOT exhibit this quirky behavior; it will scale y by beta, as one would expect. */ \ if ( m_y > 0 && n_x == 0 ) \ { \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ \ return; \ } \ \ /* If the input increments are negative, adjust the pointers so we can use positive increments instead. */ \ bli_convert_blas_incv( n_x, (ftype*)x, *incx, x0, incx0 ); \ bli_convert_blas_incv( m_y, (ftype*)y, *incy, y0, incy0 ); \ \ /* Set the row and column strides of A. */ \ rs_a = 1; \ cs_a = *lda; \ \ /* Call BLIS interface. */ \ PASTEMAC2(ch,blisname,BLIS_TAPI_EX_SUF) \ ( \ blis_transa, \ BLIS_NO_CONJUGATE, \ m0, \ n0, \ (ftype*)alpha, \ (ftype*)a, rs_a, cs_a, \ x0, incx0, \ (ftype*)beta, \ y0, incy0, \ NULL, \ NULL \ ); \ \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ } #ifdef BLIS_ENABLE_BLAS INSERT_GENTFUNC_BLAS( gemv, gemv ) #endif blis-0.6.1/frame/compat/bla_gemv.h000066400000000000000000000042321360743507500167640ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-to-BLIS interfaces. // #undef GENTPROT #define GENTPROT( ftype, ch, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname) \ ( \ const f77_char* transa, \ const f77_int* m, \ const f77_int* n, \ const ftype* alpha, \ const ftype* a, const f77_int* lda, \ const ftype* x, const f77_int* incx, \ const ftype* beta, \ ftype* y, const f77_int* incy \ ); #ifdef BLIS_ENABLE_BLAS INSERT_GENTPROT_BLAS( gemv ) #endif blis-0.6.1/frame/compat/bla_ger.c000066400000000000000000000063071360743507500166030ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define BLAS-to-BLIS interfaces. // #undef GENTFUNCDOT #define GENTFUNCDOT( ftype, ch, chc, blis_conjy, blasname, blisname ) \ \ void PASTEF772(ch,blasname,chc) \ ( \ const f77_int* m, \ const f77_int* n, \ const ftype* alpha, \ const ftype* x, const f77_int* incx, \ const ftype* y, const f77_int* incy, \ ftype* a, const f77_int* lda \ ) \ { \ dim_t m0, n0; \ ftype* x0; \ ftype* y0; \ inc_t incx0; \ inc_t incy0; \ inc_t rs_a, cs_a; \ \ /* Initialize BLIS. */ \ bli_init_auto(); \ \ /* Perform BLAS parameter checking. */ \ PASTEBLACHK(blasname) \ ( \ MKSTR(ch), \ MKSTR(blasname), \ MKSTR(chc), \ m, \ n, \ incx, \ incy, \ lda \ ); \ \ /* Convert/typecast negative values of m and n to zero. */ \ bli_convert_blas_dim1( *m, m0 ); \ bli_convert_blas_dim1( *n, n0 ); \ \ /* If the input increments are negative, adjust the pointers so we can use positive increments instead. */ \ bli_convert_blas_incv( m0, (ftype*)x, *incx, x0, incx0 ); \ bli_convert_blas_incv( n0, (ftype*)y, *incy, y0, incy0 ); \ \ /* Set the row and column strides of A. */ \ rs_a = 1; \ cs_a = *lda; \ \ /* Call BLIS interface. */ \ PASTEMAC2(ch,blisname,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ blis_conjy, \ m0, \ n0, \ (ftype*)alpha, \ x0, incx0, \ y0, incy0, \ (ftype*)a, rs_a, cs_a, \ NULL, \ NULL \ ); \ \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ } #ifdef BLIS_ENABLE_BLAS INSERT_GENTFUNCDOT_BLAS( ger, ger ) #endif blis-0.6.1/frame/compat/bla_ger.h000066400000000000000000000041521360743507500166040ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-to-BLIS interfaces. // #undef GENTPROTDOT #define GENTPROTDOT( ftype, chxy, chc, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF772(chxy,blasname,chc) \ ( \ const f77_int* m, \ const f77_int* n, \ const ftype* alpha, \ const ftype* x, const f77_int* incx, \ const ftype* y, const f77_int* incy, \ ftype* a, const f77_int* lda \ ); #ifdef BLIS_ENABLE_BLAS INSERT_GENTPROTDOT_BLAS( ger ) #endif blis-0.6.1/frame/compat/bla_hemm.c000066400000000000000000000140731360743507500167530ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define BLAS-to-BLIS interfaces. // #ifdef BLIS_BLAS3_CALLS_TAPI #undef GENTFUNCCO #define GENTFUNCCO( ftype, ftype_r, ch, chr, blasname, blisname ) \ \ void PASTEF77(ch,blasname) \ ( \ const f77_char* side, \ const f77_char* uploa, \ const f77_int* m, \ const f77_int* n, \ const ftype* alpha, \ const ftype* a, const f77_int* lda, \ const ftype* b, const f77_int* ldb, \ const ftype* beta, \ ftype* c, const f77_int* ldc \ ) \ { \ side_t blis_side; \ uplo_t blis_uploa; \ dim_t m0, n0; \ inc_t rs_a, cs_a; \ inc_t rs_b, cs_b; \ inc_t rs_c, cs_c; \ \ /* Initialize BLIS. */ \ bli_init_auto(); \ \ /* Perform BLAS parameter checking. */ \ PASTEBLACHK(blasname) \ ( \ MKSTR(ch), \ MKSTR(blasname), \ side, \ uploa, \ m, \ n, \ lda, \ ldb, \ ldc \ ); \ \ /* Map BLAS chars to their corresponding BLIS enumerated type value. */ \ bli_param_map_netlib_to_blis_side( *side, &blis_side ); \ bli_param_map_netlib_to_blis_uplo( *uploa, &blis_uploa ); \ \ /* Typecast BLAS integers to BLIS integers. */ \ bli_convert_blas_dim1( *m, m0 ); \ bli_convert_blas_dim1( *n, n0 ); \ \ /* Set the row and column strides of the matrix operands. */ \ rs_a = 1; \ cs_a = *lda; \ rs_b = 1; \ cs_b = *ldb; \ rs_c = 1; \ cs_c = *ldc; \ \ /* Call BLIS interface. */ \ PASTEMAC2(ch,blisname,BLIS_TAPI_EX_SUF) \ ( \ blis_side, \ blis_uploa, \ BLIS_NO_CONJUGATE, \ BLIS_NO_TRANSPOSE, \ m0, \ n0, \ (ftype*)alpha, \ (ftype*)a, rs_a, cs_a, \ (ftype*)b, rs_b, cs_b, \ (ftype*)beta, \ (ftype*)c, rs_c, cs_c, \ NULL, \ NULL \ ); \ \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ } #else #undef GENTFUNCCO #define GENTFUNCCO( ftype, ftype_r, ch, chr, blasname, blisname ) \ \ void PASTEF77(ch,blasname) \ ( \ const f77_char* side, \ const f77_char* uploa, \ const f77_int* m, \ const f77_int* n, \ const ftype* alpha, \ const ftype* a, const f77_int* lda, \ const ftype* b, const f77_int* ldb, \ const ftype* beta, \ ftype* c, const f77_int* ldc \ ) \ { \ side_t blis_side; \ uplo_t blis_uploa; \ dim_t m0, n0; \ \ /* Initialize BLIS. */ \ bli_init_auto(); \ \ /* Perform BLAS parameter checking. */ \ PASTEBLACHK(blasname) \ ( \ MKSTR(ch), \ MKSTR(blasname), \ side, \ uploa, \ m, \ n, \ lda, \ ldb, \ ldc \ ); \ \ /* Map BLAS chars to their corresponding BLIS enumerated type value. */ \ bli_param_map_netlib_to_blis_side( *side, &blis_side ); \ bli_param_map_netlib_to_blis_uplo( *uploa, &blis_uploa ); \ \ /* Typecast BLAS integers to BLIS integers. */ \ bli_convert_blas_dim1( *m, m0 ); \ bli_convert_blas_dim1( *n, n0 ); \ \ /* Set the row and column strides of the matrix operands. */ \ const inc_t rs_a = 1; \ const inc_t cs_a = *lda; \ const inc_t rs_b = 1; \ const inc_t cs_b = *ldb; \ const inc_t rs_c = 1; \ const inc_t cs_c = *ldc; \ \ const num_t dt = PASTEMAC(ch,type); \ \ const conj_t conja = BLIS_NO_CONJUGATE; \ const trans_t transb = BLIS_NO_TRANSPOSE; \ const struc_t struca = BLIS_HERMITIAN; \ \ obj_t alphao = BLIS_OBJECT_INITIALIZER_1X1; \ obj_t ao = BLIS_OBJECT_INITIALIZER; \ obj_t bo = BLIS_OBJECT_INITIALIZER; \ obj_t betao = BLIS_OBJECT_INITIALIZER_1X1; \ obj_t co = BLIS_OBJECT_INITIALIZER; \ \ dim_t mn0_a; \ dim_t m0_b, n0_b; \ \ bli_set_dim_with_side( blis_side, m0, n0, &mn0_a ); \ bli_set_dims_with_trans( transb, m0, n0, &m0_b, &n0_b ); \ \ bli_obj_init_finish_1x1( dt, (ftype*)alpha, &alphao ); \ bli_obj_init_finish_1x1( dt, (ftype*)beta, &betao ); \ \ bli_obj_init_finish( dt, mn0_a, mn0_a, (ftype*)a, rs_a, cs_a, &ao ); \ bli_obj_init_finish( dt, m0_b, n0_b, (ftype*)b, rs_b, cs_b, &bo ); \ bli_obj_init_finish( dt, m0, n0, (ftype*)c, rs_c, cs_c, &co ); \ \ bli_obj_set_uplo( blis_uploa, &ao ); \ bli_obj_set_conj( conja, &ao ); \ bli_obj_set_conjtrans( transb, &bo ); \ \ bli_obj_set_struc( struca, &ao ); \ \ PASTEMAC(blisname,BLIS_OAPI_EX_SUF) \ ( \ blis_side, \ &alphao, \ &ao, \ &bo, \ &betao, \ &co, \ NULL, \ NULL \ ); \ \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ } #endif #ifdef BLIS_ENABLE_BLAS INSERT_GENTFUNCCO_BLAS( hemm, hemm ) #endif blis-0.6.1/frame/compat/bla_hemm.h000066400000000000000000000043121360743507500167530ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-to-BLIS interfaces. // #undef GENTPROTCO #define GENTPROTCO( ftype, ftype_r, ch, chr, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname) \ ( \ const f77_char* side, \ const f77_char* uploa, \ const f77_int* m, \ const f77_int* n, \ const ftype* alpha, \ const ftype* a, const f77_int* lda, \ const ftype* b, const f77_int* ldb, \ const ftype* beta, \ ftype* c, const f77_int* ldc \ ); #ifdef BLIS_ENABLE_BLAS INSERT_GENTPROTCO_BLAS( hemm ) #endif blis-0.6.1/frame/compat/bla_hemv.c000066400000000000000000000065571360743507500167740ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define BLAS-to-BLIS interfaces. // #undef GENTFUNCCO #define GENTFUNCCO( ftype, ftype_r, ch, chr, blasname, blisname ) \ \ void PASTEF77(ch,blasname) \ ( \ const f77_char* uploa, \ const f77_int* m, \ const ftype* alpha, \ const ftype* a, const f77_int* lda, \ const ftype* x, const f77_int* incx, \ const ftype* beta, \ ftype* y, const f77_int* incy \ ) \ { \ uplo_t blis_uploa; \ dim_t m0; \ ftype* x0; \ ftype* y0; \ inc_t incx0; \ inc_t incy0; \ inc_t rs_a, cs_a; \ \ /* Initialize BLIS. */ \ bli_init_auto(); \ \ /* Perform BLAS parameter checking. */ \ PASTEBLACHK(blasname) \ ( \ MKSTR(ch), \ MKSTR(blasname), \ uploa, \ m, \ lda, \ incx, \ incy \ ); \ \ /* Map BLAS chars to their corresponding BLIS enumerated type value. */ \ bli_param_map_netlib_to_blis_uplo( *uploa, &blis_uploa ); \ \ /* Convert/typecast negative values of m to zero. */ \ bli_convert_blas_dim1( *m, m0 ); \ \ /* If the input increments are negative, adjust the pointers so we can use positive increments instead. */ \ bli_convert_blas_incv( m0, (ftype*)x, *incx, x0, incx0 ); \ bli_convert_blas_incv( m0, (ftype*)y, *incy, y0, incy0 ); \ \ /* Set the row and column strides of A. */ \ rs_a = 1; \ cs_a = *lda; \ \ /* Call BLIS interface. */ \ PASTEMAC2(ch,blisname,BLIS_TAPI_EX_SUF) \ ( \ blis_uploa, \ BLIS_NO_CONJUGATE, \ BLIS_NO_CONJUGATE, \ m0, \ (ftype*)alpha, \ (ftype*)a, rs_a, cs_a, \ x0, incx0, \ (ftype*)beta, \ y0, incy0, \ NULL, \ NULL \ ); \ \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ } #ifdef BLIS_ENABLE_BLAS INSERT_GENTFUNCCO_BLAS( hemv, hemv ) #endif blis-0.6.1/frame/compat/bla_hemv.h000066400000000000000000000042211360743507500167630ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-to-BLIS interfaces. // #undef GENTPROTCO #define GENTPROTCO( ftype, ftype_r, ch, chr, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname) \ ( \ const f77_char* uploa, \ const f77_int* m, \ const ftype* alpha, \ const ftype* a, const f77_int* lda, \ const ftype* x, const f77_int* incx, \ const ftype* beta, \ ftype* y, const f77_int* incy \ ); #ifdef BLIS_ENABLE_BLAS INSERT_GENTPROTCO_BLAS( hemv ) #endif blis-0.6.1/frame/compat/bla_her.c000066400000000000000000000061731360743507500166050ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define BLAS-to-BLIS interfaces. // #undef GENTFUNCCO #define GENTFUNCCO( ftype, ftype_r, ch, chr, blasname, blisname ) \ \ void PASTEF77(ch,blasname) \ ( \ const f77_char* uploa, \ const f77_int* m, \ const ftype_r* alpha, \ const ftype* x, const f77_int* incx, \ ftype* a, const f77_int* lda \ ) \ { \ uplo_t blis_uploa; \ dim_t m0; \ ftype* x0; \ inc_t incx0; \ inc_t rs_a, cs_a; \ \ /* Initialize BLIS. */ \ bli_init_auto(); \ \ /* Perform BLAS parameter checking. */ \ PASTEBLACHK(blasname) \ ( \ MKSTR(ch), \ MKSTR(blasname), \ uploa, \ m, \ incx, \ lda \ ); \ \ /* Map BLAS chars to their corresponding BLIS enumerated type value. */ \ bli_param_map_netlib_to_blis_uplo( *uploa, &blis_uploa ); \ \ /* Convert/typecast negative values of m to zero. */ \ bli_convert_blas_dim1( *m, m0 ); \ \ /* If the input increments are negative, adjust the pointers so we can use positive increments instead. */ \ bli_convert_blas_incv( m0, (ftype*)x, *incx, x0, incx0 ); \ \ /* Set the row and column strides of A. */ \ rs_a = 1; \ cs_a = *lda; \ \ /* Call BLIS interface. */ \ PASTEMAC2(ch,blisname,BLIS_TAPI_EX_SUF) \ ( \ blis_uploa, \ BLIS_NO_CONJUGATE, \ m0, \ (ftype_r*)alpha, \ x0, incx0, \ (ftype*)a, rs_a, cs_a, \ NULL, \ NULL \ ); \ \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ } #ifdef BLIS_ENABLE_BLAS INSERT_GENTFUNCCO_BLAS( her, her ) #endif blis-0.6.1/frame/compat/bla_her.h000066400000000000000000000041001360743507500165760ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-to-BLIS interfaces. // #undef GENTPROTCO #define GENTPROTCO( ftype, ftype_r, ch, chr, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname) \ ( \ const f77_char* uploa, \ const f77_int* m, \ const ftype_r* alpha, \ const ftype* x, const f77_int* incx, \ ftype* a, const f77_int* lda \ ); #ifdef BLIS_ENABLE_BLAS INSERT_GENTPROTCO_BLAS( her ) #endif blis-0.6.1/frame/compat/bla_her2.c000066400000000000000000000064751360743507500166740ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define BLAS-to-BLIS interfaces. // #undef GENTFUNCCO #define GENTFUNCCO( ftype, ftype_r, ch, chr, blasname, blisname ) \ \ void PASTEF77(ch,blasname) \ ( \ const f77_char* uploa, \ const f77_int* m, \ const ftype* alpha, \ const ftype* x, const f77_int* incx, \ const ftype* y, const f77_int* incy, \ ftype* a, const f77_int* lda \ ) \ { \ uplo_t blis_uploa; \ dim_t m0; \ ftype* x0; \ ftype* y0; \ inc_t incx0; \ inc_t incy0; \ inc_t rs_a, cs_a; \ \ /* Initialize BLIS. */ \ bli_init_auto(); \ \ /* Perform BLAS parameter checking. */ \ PASTEBLACHK(blasname) \ ( \ MKSTR(ch), \ MKSTR(blasname), \ uploa, \ m, \ incx, \ incy, \ lda \ ); \ \ /* Map BLAS chars to their corresponding BLIS enumerated type value. */ \ bli_param_map_netlib_to_blis_uplo( *uploa, &blis_uploa ); \ \ /* Convert/typecast negative values of m to zero. */ \ bli_convert_blas_dim1( *m, m0 ); \ \ /* If the input increments are negative, adjust the pointers so we can use positive increments instead. */ \ bli_convert_blas_incv( m0, (ftype*)x, *incx, x0, incx0 ); \ bli_convert_blas_incv( m0, (ftype*)y, *incy, y0, incy0 ); \ \ /* Set the row and column strides of A. */ \ rs_a = 1; \ cs_a = *lda; \ \ /* Call BLIS interface. */ \ PASTEMAC2(ch,blisname,BLIS_TAPI_EX_SUF) \ ( \ blis_uploa, \ BLIS_NO_CONJUGATE, \ BLIS_NO_CONJUGATE, \ m0, \ (ftype*)alpha, \ x0, incx0, \ y0, incy0, \ (ftype*)a, rs_a, cs_a, \ NULL, \ NULL \ ); \ \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ } #ifdef BLIS_ENABLE_BLAS INSERT_GENTFUNCCO_BLAS( her2, her2 ) #endif blis-0.6.1/frame/compat/bla_her2.h000066400000000000000000000041621360743507500166700ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-to-BLIS interfaces. // #undef GENTPROTCO #define GENTPROTCO( ftype, ftype_r, ch, chr, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname) \ ( \ const f77_char* uploa, \ const f77_int* m, \ const ftype* alpha, \ const ftype* x, const f77_int* incx, \ const ftype* y, const f77_int* incy, \ ftype* a, const f77_int* lda \ ); #ifdef BLIS_ENABLE_BLAS INSERT_GENTPROTCO_BLAS( her2 ) #endif blis-0.6.1/frame/compat/bla_her2k.c000066400000000000000000000160001360743507500170300ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define BLAS-to-BLIS interfaces. // #ifdef BLIS_BLAS3_CALLS_TAPI #undef GENTFUNCCO #define GENTFUNCCO( ftype, ftype_r, ch, chr, blasname, blisname ) \ \ void PASTEF77(ch,blasname) \ ( \ const f77_char* uploc, \ const f77_char* transa, \ const f77_int* m, \ const f77_int* k, \ const ftype* alpha, \ const ftype* a, const f77_int* lda, \ const ftype* b, const f77_int* ldb, \ const ftype_r* beta, \ ftype* c, const f77_int* ldc \ ) \ { \ uplo_t blis_uploc; \ trans_t blis_transa; \ dim_t m0, k0; \ inc_t rs_a, cs_a; \ inc_t rs_b, cs_b; \ inc_t rs_c, cs_c; \ \ /* Initialize BLIS. */ \ bli_init_auto(); \ \ /* Perform BLAS parameter checking. */ \ PASTEBLACHK(blasname) \ ( \ MKSTR(ch), \ MKSTR(blasname), \ uploc, \ transa, \ m, \ k, \ lda, \ ldb, \ ldc \ ); \ \ /* Map BLAS chars to their corresponding BLIS enumerated type value. */ \ bli_param_map_netlib_to_blis_uplo( *uploc, &blis_uploc ); \ bli_param_map_netlib_to_blis_trans( *transa, &blis_transa ); \ \ /* Typecast BLAS integers to BLIS integers. */ \ bli_convert_blas_dim1( *m, m0 ); \ bli_convert_blas_dim1( *k, k0 ); \ \ /* We emulate the BLAS early return behavior with the following conditional, which returns if one of the following is true: - matrix C is empty - the rank-2k product is empty (either because alpha is zero or k is zero) AND matrix C is not scaled. */ \ if ( m0 == 0 || \ ( ( PASTEMAC(ch,eq0)( *alpha ) || k0 == 0 ) \ && PASTEMAC(chr,eq1)( *beta ) \ ) \ ) \ { \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ \ return; \ } \ \ /* Set the row and column strides of the matrix operands. */ \ rs_a = 1; \ cs_a = *lda; \ rs_b = 1; \ cs_b = *ldb; \ rs_c = 1; \ cs_c = *ldc; \ \ /* Call BLIS interface. */ \ PASTEMAC2(ch,blisname,BLIS_TAPI_EX_SUF) \ ( \ blis_uploc, \ blis_transa, \ blis_transa, \ m0, \ k0, \ (ftype*)alpha, \ (ftype*)a, rs_a, cs_a, \ (ftype*)b, rs_b, cs_b, \ (ftype_r*)beta, \ (ftype*)c, rs_c, cs_c, \ NULL, \ NULL \ ); \ \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ } #else #undef GENTFUNCCO #define GENTFUNCCO( ftype, ftype_r, ch, chr, blasname, blisname ) \ \ void PASTEF77(ch,blasname) \ ( \ const f77_char* uploc, \ const f77_char* transa, \ const f77_int* m, \ const f77_int* k, \ const ftype* alpha, \ const ftype* a, const f77_int* lda, \ const ftype* b, const f77_int* ldb, \ const ftype_r* beta, \ ftype* c, const f77_int* ldc \ ) \ { \ uplo_t blis_uploc; \ trans_t blis_transa; \ dim_t m0, k0; \ \ /* Initialize BLIS. */ \ bli_init_auto(); \ \ /* Perform BLAS parameter checking. */ \ PASTEBLACHK(blasname) \ ( \ MKSTR(ch), \ MKSTR(blasname), \ uploc, \ transa, \ m, \ k, \ lda, \ ldb, \ ldc \ ); \ \ /* Map BLAS chars to their corresponding BLIS enumerated type value. */ \ bli_param_map_netlib_to_blis_uplo( *uploc, &blis_uploc ); \ bli_param_map_netlib_to_blis_trans( *transa, &blis_transa ); \ \ /* Typecast BLAS integers to BLIS integers. */ \ bli_convert_blas_dim1( *m, m0 ); \ bli_convert_blas_dim1( *k, k0 ); \ \ /* We emulate the BLAS early return behavior with the following conditional, which returns if one of the following is true: - matrix C is empty - the rank-2k product is empty (either because alpha is zero or k is zero) AND matrix C is not scaled. */ \ if ( m0 == 0 || \ ( ( PASTEMAC(ch,eq0)( *alpha ) || k0 == 0 ) \ && PASTEMAC(chr,eq1)( *beta ) \ ) \ ) \ { \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ \ return; \ } \ \ /* Set the row and column strides of the matrix operands. */ \ const inc_t rs_a = 1; \ const inc_t cs_a = *lda; \ const inc_t rs_b = 1; \ const inc_t cs_b = *ldb; \ const inc_t rs_c = 1; \ const inc_t cs_c = *ldc; \ \ const num_t dt_r = PASTEMAC(chr,type); \ const num_t dt = PASTEMAC(ch,type); \ \ const trans_t transb = blis_transa; \ const struc_t strucc = BLIS_HERMITIAN; \ \ obj_t alphao = BLIS_OBJECT_INITIALIZER_1X1; \ obj_t ao = BLIS_OBJECT_INITIALIZER; \ obj_t bo = BLIS_OBJECT_INITIALIZER; \ obj_t betao = BLIS_OBJECT_INITIALIZER_1X1; \ obj_t co = BLIS_OBJECT_INITIALIZER; \ \ dim_t m0_a, n0_a; \ dim_t m0_b, n0_b; \ \ bli_set_dims_with_trans( blis_transa, m0, k0, &m0_a, &n0_a ); \ bli_set_dims_with_trans( transb, m0, k0, &m0_b, &n0_b ); \ \ bli_obj_init_finish_1x1( dt, (ftype* )alpha, &alphao ); \ bli_obj_init_finish_1x1( dt_r, (ftype_r*)beta, &betao ); \ \ bli_obj_init_finish( dt, m0_a, n0_a, (ftype*)a, rs_a, cs_a, &ao ); \ bli_obj_init_finish( dt, m0_b, n0_b, (ftype*)b, rs_b, cs_b, &bo ); \ bli_obj_init_finish( dt, m0, m0, (ftype*)c, rs_c, cs_c, &co ); \ \ bli_obj_set_uplo( blis_uploc, &co ); \ bli_obj_set_conjtrans( blis_transa, &ao ); \ bli_obj_set_conjtrans( transb, &bo ); \ \ bli_obj_set_struc( strucc, &co ); \ \ PASTEMAC(blisname,BLIS_OAPI_EX_SUF) \ ( \ &alphao, \ &ao, \ &bo, \ &betao, \ &co, \ NULL, \ NULL \ ); \ \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ } #endif #ifdef BLIS_ENABLE_BLAS INSERT_GENTFUNCCO_BLAS( her2k, her2k ) #endif blis-0.6.1/frame/compat/bla_her2k.h000066400000000000000000000043151360743507500170430ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-to-BLIS interfaces. // #undef GENTPROTCO #define GENTPROTCO( ftype, ftype_r, ch, chr, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname) \ ( \ const f77_char* uploc, \ const f77_char* transa, \ const f77_int* m, \ const f77_int* k, \ const ftype* alpha, \ const ftype* a, const f77_int* lda, \ const ftype* b, const f77_int* ldb, \ const ftype_r* beta, \ ftype* c, const f77_int* ldc \ ); #ifdef BLIS_ENABLE_BLAS INSERT_GENTPROTCO_BLAS( her2k ) #endif blis-0.6.1/frame/compat/bla_herk.c000066400000000000000000000147101360743507500167540ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define BLAS-to-BLIS interfaces. // #ifdef BLIS_BLAS3_CALLS_TAPI #undef GENTFUNCCO #define GENTFUNCCO( ftype, ftype_r, ch, chr, blasname, blisname ) \ \ void PASTEF77(ch,blasname) \ ( \ const f77_char* uploc, \ const f77_char* transa, \ const f77_int* m, \ const f77_int* k, \ const ftype_r* alpha, \ const ftype* a, const f77_int* lda, \ const ftype_r* beta, \ ftype* c, const f77_int* ldc \ ) \ { \ uplo_t blis_uploc; \ trans_t blis_transa; \ dim_t m0, k0; \ inc_t rs_a, cs_a; \ inc_t rs_c, cs_c; \ \ /* Initialize BLIS. */ \ bli_init_auto(); \ \ /* Perform BLAS parameter checking. */ \ PASTEBLACHK(blasname) \ ( \ MKSTR(ch), \ MKSTR(blasname), \ uploc, \ transa, \ m, \ k, \ lda, \ ldc \ ); \ \ /* Map BLAS chars to their corresponding BLIS enumerated type value. */ \ bli_param_map_netlib_to_blis_uplo( *uploc, &blis_uploc ); \ bli_param_map_netlib_to_blis_trans( *transa, &blis_transa ); \ \ /* Typecast BLAS integers to BLIS integers. */ \ bli_convert_blas_dim1( *m, m0 ); \ bli_convert_blas_dim1( *k, k0 ); \ \ /* We emulate the BLAS early return behavior with the following conditional, which returns if one of the following is true: - matrix C is empty - the rank-k product is empty (either because alpha is zero or k is zero) AND matrix C is not scaled. */ \ if ( m0 == 0 || \ ( ( PASTEMAC(chr,eq0)( *alpha ) || k0 == 0 ) \ && PASTEMAC(chr,eq1)( *beta ) \ ) \ ) \ { \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ \ return; \ } \ \ /* Set the row and column strides of the matrix operands. */ \ rs_a = 1; \ cs_a = *lda; \ rs_c = 1; \ cs_c = *ldc; \ \ /* Call BLIS interface. */ \ PASTEMAC2(ch,blisname,BLIS_TAPI_EX_SUF) \ ( \ blis_uploc, \ blis_transa, \ m0, \ k0, \ (ftype_r*)alpha, \ (ftype*)a, rs_a, cs_a, \ (ftype_r*)beta, \ (ftype*)c, rs_c, cs_c, \ NULL, \ NULL \ ); \ \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ } #else #undef GENTFUNCCO #define GENTFUNCCO( ftype, ftype_r, ch, chr, blasname, blisname ) \ \ void PASTEF77(ch,blasname) \ ( \ const f77_char* uploc, \ const f77_char* transa, \ const f77_int* m, \ const f77_int* k, \ const ftype_r* alpha, \ const ftype* a, const f77_int* lda, \ const ftype_r* beta, \ ftype* c, const f77_int* ldc \ ) \ { \ uplo_t blis_uploc; \ trans_t blis_transa; \ dim_t m0, k0; \ \ /* Initialize BLIS. */ \ bli_init_auto(); \ \ /* Perform BLAS parameter checking. */ \ PASTEBLACHK(blasname) \ ( \ MKSTR(ch), \ MKSTR(blasname), \ uploc, \ transa, \ m, \ k, \ lda, \ ldc \ ); \ \ /* Map BLAS chars to their corresponding BLIS enumerated type value. */ \ bli_param_map_netlib_to_blis_uplo( *uploc, &blis_uploc ); \ bli_param_map_netlib_to_blis_trans( *transa, &blis_transa ); \ \ /* Typecast BLAS integers to BLIS integers. */ \ bli_convert_blas_dim1( *m, m0 ); \ bli_convert_blas_dim1( *k, k0 ); \ \ /* We emulate the BLAS early return behavior with the following conditional, which returns if one of the following is true: - matrix C is empty - the rank-k product is empty (either because alpha is zero or k is zero) AND matrix C is not scaled. */ \ if ( m0 == 0 || \ ( ( PASTEMAC(chr,eq0)( *alpha ) || k0 == 0 ) \ && PASTEMAC(chr,eq1)( *beta ) \ ) \ ) \ { \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ \ return; \ } \ \ /* Set the row and column strides of the matrix operands. */ \ const inc_t rs_a = 1; \ const inc_t cs_a = *lda; \ const inc_t rs_c = 1; \ const inc_t cs_c = *ldc; \ \ const num_t dt_r = PASTEMAC(chr,type); \ const num_t dt = PASTEMAC(ch,type); \ \ const struc_t strucc = BLIS_HERMITIAN; \ \ obj_t alphao = BLIS_OBJECT_INITIALIZER_1X1; \ obj_t ao = BLIS_OBJECT_INITIALIZER; \ obj_t betao = BLIS_OBJECT_INITIALIZER_1X1; \ obj_t co = BLIS_OBJECT_INITIALIZER; \ \ dim_t m0_a, n0_a; \ \ bli_set_dims_with_trans( blis_transa, m0, k0, &m0_a, &n0_a ); \ \ bli_obj_init_finish_1x1( dt_r, (ftype_r*)alpha, &alphao ); \ bli_obj_init_finish_1x1( dt_r, (ftype_r*)beta, &betao ); \ \ bli_obj_init_finish( dt, m0_a, n0_a, (ftype*)a, rs_a, cs_a, &ao ); \ bli_obj_init_finish( dt, m0, m0, (ftype*)c, rs_c, cs_c, &co ); \ \ bli_obj_set_uplo( blis_uploc, &co ); \ bli_obj_set_conjtrans( blis_transa, &ao ); \ \ bli_obj_set_struc( strucc, &co ); \ \ PASTEMAC(blisname,BLIS_OAPI_EX_SUF) \ ( \ &alphao, \ &ao, \ &betao, \ &co, \ NULL, \ NULL \ ); \ \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ } #endif #ifdef BLIS_ENABLE_BLAS INSERT_GENTFUNCCO_BLAS( herk, herk ) #endif blis-0.6.1/frame/compat/bla_herk.h000066400000000000000000000042341360743507500167610ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-to-BLIS interfaces. // #undef GENTPROTCO #define GENTPROTCO( ftype, ftype_r, ch, chr, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname) \ ( \ const f77_char* uploc, \ const f77_char* transa, \ const f77_int* m, \ const f77_int* k, \ const ftype_r* alpha, \ const ftype* a, const f77_int* lda, \ const ftype_r* beta, \ ftype* c, const f77_int* ldc \ ); #ifdef BLIS_ENABLE_BLAS INSERT_GENTPROTCO_BLAS( herk ) #endif blis-0.6.1/frame/compat/bla_nrm2.c000066400000000000000000000051171360743507500167020ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define BLAS-to-BLIS interfaces. // #undef GENTFUNCR2 #define GENTFUNCR2( ftype_x, ftype_r, chx, chr, blasname, blisname ) \ \ ftype_r PASTEF772(chr,chx,blasname) \ ( \ const f77_int* n, \ const ftype_x* x, const f77_int* incx \ ) \ { \ dim_t n0; \ ftype_x* x0; \ inc_t incx0; \ ftype_r norm; \ \ /* Initialize BLIS. */ \ bli_init_auto(); \ \ /* Convert/typecast negative values of n to zero. */ \ bli_convert_blas_dim1( *n, n0 ); \ \ /* If the input increments are negative, adjust the pointers so we can use positive increments instead. */ \ bli_convert_blas_incv( n0, (ftype_x*)x, *incx, x0, incx0 ); \ \ /* Call BLIS interface. */ \ PASTEMAC2(chx,blisname,BLIS_TAPI_EX_SUF) \ ( \ n0, \ x0, incx0, \ &norm, \ NULL, \ NULL \ ); \ \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ \ return norm; \ } #ifdef BLIS_ENABLE_BLAS INSERT_GENTFUNCR2_BLAS( nrm2, normfv ) #endif blis-0.6.1/frame/compat/bla_nrm2.h000066400000000000000000000037331360743507500167110ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-to-BLIS interfaces. // #undef GENTPROTR2 #define GENTPROTR2( ftype_x, ftype_r, chx, chr, blasname ) \ \ BLIS_EXPORT_BLAS ftype_r PASTEF772(chr,chx,blasname) \ ( \ const f77_int* n, \ const ftype_x* x, const f77_int* incx \ ); #ifdef BLIS_ENABLE_BLAS INSERT_GENTPROTR2_BLAS( nrm2 ) #endif blis-0.6.1/frame/compat/bla_scal.c000066400000000000000000000057041360743507500167500ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define BLAS-to-BLIS interfaces. // #undef GENTFUNCSCAL #define GENTFUNCSCAL( ftype_x, ftype_a, chx, cha, blasname, blisname ) \ \ void PASTEF772(chx,cha,blasname) \ ( \ const f77_int* n, \ const ftype_a* alpha, \ ftype_x* x, const f77_int* incx \ ) \ { \ dim_t n0; \ ftype_x* x0; \ inc_t incx0; \ ftype_x alpha_cast; \ \ /* Initialize BLIS. */ \ bli_init_auto(); \ \ /* Convert/typecast negative values of n to zero. */ \ bli_convert_blas_dim1( *n, n0 ); \ \ /* If the input increments are negative, adjust the pointers so we can use positive increments instead. */ \ bli_convert_blas_incv( n0, (ftype_x*)x, *incx, x0, incx0 ); \ \ /* NOTE: We do not natively implement BLAS's csscal/zdscal in BLIS. that is, we just always sub-optimally implement those cases by casting alpha to ctype_x (potentially the complex domain) and using the homogeneous datatype instance according to that type. */ \ PASTEMAC2(cha,chx,copys)( *alpha, alpha_cast ); \ \ /* Call BLIS interface. */ \ PASTEMAC2(chx,blisname,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ n0, \ &alpha_cast, \ x0, incx0, \ NULL, \ NULL \ ); \ \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ } #ifdef BLIS_ENABLE_BLAS INSERT_GENTFUNCSCAL_BLAS( scal, scalv ) #endif blis-0.6.1/frame/compat/bla_scal.h000066400000000000000000000037671360743507500167640ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-to-BLIS interfaces. // #undef GENTPROTSCAL #define GENTPROTSCAL( ftype_a, ftype_x, cha, chx, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF772(chx,cha,blasname) \ ( \ const f77_int* n, \ const ftype_a* alpha, \ ftype_x* x, const f77_int* incx \ ); #ifdef BLIS_ENABLE_BLAS INSERT_GENTPROTSCAL_BLAS( scal ) #endif blis-0.6.1/frame/compat/bla_swap.c000066400000000000000000000052051360743507500167740ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define BLAS-to-BLIS interfaces. // #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ void PASTEF77(ch,blasname) \ ( \ const f77_int* n, \ ftype* x, const f77_int* incx, \ ftype* y, const f77_int* incy \ ) \ { \ dim_t n0; \ ftype* x0; \ ftype* y0; \ inc_t incx0; \ inc_t incy0; \ \ /* Initialize BLIS. */ \ bli_init_auto(); \ \ /* Convert/typecast negative values of n to zero. */ \ bli_convert_blas_dim1( *n, n0 ); \ \ /* If the input increments are negative, adjust the pointers so we can use positive increments instead. */ \ bli_convert_blas_incv( n0, (ftype*)x, *incx, x0, incx0 ); \ bli_convert_blas_incv( n0, (ftype*)y, *incy, y0, incy0 ); \ \ /* Call BLIS interface. */ \ PASTEMAC2(ch,blisname,BLIS_TAPI_EX_SUF) \ ( \ n0, \ x0, incx0, \ y0, incy0, \ NULL, \ NULL \ ); \ \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ } #ifdef BLIS_ENABLE_BLAS INSERT_GENTFUNC_BLAS( swap, swapv ) #endif blis-0.6.1/frame/compat/bla_swap.h000066400000000000000000000037361360743507500170100ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-to-BLIS interfaces. // #undef GENTPROT #define GENTPROT( ftype, ch, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname) \ ( \ const f77_int* n, \ ftype* x, const f77_int* incx, \ ftype* y, const f77_int* incy \ ); #ifdef BLIS_ENABLE_BLAS INSERT_GENTPROT_BLAS( swap ) #endif blis-0.6.1/frame/compat/bla_symm.c000066400000000000000000000140251360743507500170070ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define BLAS-to-BLIS interfaces. // #ifdef BLIS_BLAS3_CALLS_TAPI #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ void PASTEF77(ch,blasname) \ ( \ const f77_char* side, \ const f77_char* uploa, \ const f77_int* m, \ const f77_int* n, \ const ftype* alpha, \ const ftype* a, const f77_int* lda, \ const ftype* b, const f77_int* ldb, \ const ftype* beta, \ ftype* c, const f77_int* ldc \ ) \ { \ side_t blis_side; \ uplo_t blis_uploa; \ dim_t m0, n0; \ inc_t rs_a, cs_a; \ inc_t rs_b, cs_b; \ inc_t rs_c, cs_c; \ \ /* Initialize BLIS. */ \ bli_init_auto(); \ \ /* Perform BLAS parameter checking. */ \ PASTEBLACHK(blasname) \ ( \ MKSTR(ch), \ MKSTR(blasname), \ side, \ uploa, \ m, \ n, \ lda, \ ldb, \ ldc \ ); \ \ /* Map BLAS chars to their corresponding BLIS enumerated type value. */ \ bli_param_map_netlib_to_blis_side( *side, &blis_side ); \ bli_param_map_netlib_to_blis_uplo( *uploa, &blis_uploa ); \ \ /* Typecast BLAS integers to BLIS integers. */ \ bli_convert_blas_dim1( *m, m0 ); \ bli_convert_blas_dim1( *n, n0 ); \ \ /* Set the row and column strides of the matrix operands. */ \ rs_a = 1; \ cs_a = *lda; \ rs_b = 1; \ cs_b = *ldb; \ rs_c = 1; \ cs_c = *ldc; \ \ /* Call BLIS interface. */ \ PASTEMAC2(ch,blisname,BLIS_TAPI_EX_SUF) \ ( \ blis_side, \ blis_uploa, \ BLIS_NO_CONJUGATE, \ BLIS_NO_TRANSPOSE, \ m0, \ n0, \ (ftype*)alpha, \ (ftype*)a, rs_a, cs_a, \ (ftype*)b, rs_b, cs_b, \ (ftype*)beta, \ (ftype*)c, rs_c, cs_c, \ NULL, \ NULL \ ); \ \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ } #else #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ void PASTEF77(ch,blasname) \ ( \ const f77_char* side, \ const f77_char* uploa, \ const f77_int* m, \ const f77_int* n, \ const ftype* alpha, \ const ftype* a, const f77_int* lda, \ const ftype* b, const f77_int* ldb, \ const ftype* beta, \ ftype* c, const f77_int* ldc \ ) \ { \ side_t blis_side; \ uplo_t blis_uploa; \ dim_t m0, n0; \ \ /* Initialize BLIS. */ \ bli_init_auto(); \ \ /* Perform BLAS parameter checking. */ \ PASTEBLACHK(blasname) \ ( \ MKSTR(ch), \ MKSTR(blasname), \ side, \ uploa, \ m, \ n, \ lda, \ ldb, \ ldc \ ); \ \ /* Map BLAS chars to their corresponding BLIS enumerated type value. */ \ bli_param_map_netlib_to_blis_side( *side, &blis_side ); \ bli_param_map_netlib_to_blis_uplo( *uploa, &blis_uploa ); \ \ /* Typecast BLAS integers to BLIS integers. */ \ bli_convert_blas_dim1( *m, m0 ); \ bli_convert_blas_dim1( *n, n0 ); \ \ /* Set the row and column strides of the matrix operands. */ \ const inc_t rs_a = 1; \ const inc_t cs_a = *lda; \ const inc_t rs_b = 1; \ const inc_t cs_b = *ldb; \ const inc_t rs_c = 1; \ const inc_t cs_c = *ldc; \ \ const num_t dt = PASTEMAC(ch,type); \ \ const conj_t conja = BLIS_NO_CONJUGATE; \ const trans_t transb = BLIS_NO_TRANSPOSE; \ const struc_t struca = BLIS_SYMMETRIC; \ \ obj_t alphao = BLIS_OBJECT_INITIALIZER_1X1; \ obj_t ao = BLIS_OBJECT_INITIALIZER; \ obj_t bo = BLIS_OBJECT_INITIALIZER; \ obj_t betao = BLIS_OBJECT_INITIALIZER_1X1; \ obj_t co = BLIS_OBJECT_INITIALIZER; \ \ dim_t mn0_a; \ dim_t m0_b, n0_b; \ \ bli_set_dim_with_side( blis_side, m0, n0, &mn0_a ); \ bli_set_dims_with_trans( transb, m0, n0, &m0_b, &n0_b ); \ \ bli_obj_init_finish_1x1( dt, (ftype*)alpha, &alphao ); \ bli_obj_init_finish_1x1( dt, (ftype*)beta, &betao ); \ \ bli_obj_init_finish( dt, mn0_a, mn0_a, (ftype*)a, rs_a, cs_a, &ao ); \ bli_obj_init_finish( dt, m0_b, n0_b, (ftype*)b, rs_b, cs_b, &bo ); \ bli_obj_init_finish( dt, m0, n0, (ftype*)c, rs_c, cs_c, &co ); \ \ bli_obj_set_uplo( blis_uploa, &ao ); \ bli_obj_set_conj( conja, &ao ); \ bli_obj_set_conjtrans( transb, &bo ); \ \ bli_obj_set_struc( struca, &ao ); \ \ PASTEMAC(blisname,BLIS_OAPI_EX_SUF) \ ( \ blis_side, \ &alphao, \ &ao, \ &bo, \ &betao, \ &co, \ NULL, \ NULL \ ); \ \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ } #endif #ifdef BLIS_ENABLE_BLAS INSERT_GENTFUNC_BLAS( symm, symm ) #endif blis-0.6.1/frame/compat/bla_symm.h000066400000000000000000000042661360743507500170220ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-to-BLIS interfaces. // #undef GENTPROT #define GENTPROT( ftype, ch, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname) \ ( \ const f77_char* side, \ const f77_char* uploa, \ const f77_int* m, \ const f77_int* n, \ const ftype* alpha, \ const ftype* a, const f77_int* lda, \ const ftype* b, const f77_int* ldb, \ const ftype* beta, \ ftype* c, const f77_int* ldc \ ); #ifdef BLIS_ENABLE_BLAS INSERT_GENTPROT_BLAS( symm ) #endif blis-0.6.1/frame/compat/bla_symv.c000066400000000000000000000065411360743507500170240ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define BLAS-to-BLIS interfaces. // #undef GENTFUNCRO #define GENTFUNCRO( ftype, ch, blasname, blisname ) \ \ void PASTEF77(ch,blasname) \ ( \ const f77_char* uploa, \ const f77_int* m, \ const ftype* alpha, \ const ftype* a, const f77_int* lda, \ const ftype* x, const f77_int* incx, \ const ftype* beta, \ ftype* y, const f77_int* incy \ ) \ { \ uplo_t blis_uploa; \ dim_t m0; \ ftype* x0; \ ftype* y0; \ inc_t incx0; \ inc_t incy0; \ inc_t rs_a, cs_a; \ \ /* Initialize BLIS. */ \ bli_init_auto(); \ \ /* Perform BLAS parameter checking. */ \ PASTEBLACHK(blasname) \ ( \ MKSTR(ch), \ MKSTR(blasname), \ uploa, \ m, \ lda, \ incx, \ incy \ ); \ \ /* Map BLAS chars to their corresponding BLIS enumerated type value. */ \ bli_param_map_netlib_to_blis_uplo( *uploa, &blis_uploa ); \ \ /* Convert/typecast negative values of m to zero. */ \ bli_convert_blas_dim1( *m, m0 ); \ \ /* If the input increments are negative, adjust the pointers so we can use positive increments instead. */ \ bli_convert_blas_incv( m0, (ftype*)x, *incx, x0, incx0 ); \ bli_convert_blas_incv( m0, (ftype*)y, *incy, y0, incy0 ); \ \ /* Set the row and column strides of A. */ \ rs_a = 1; \ cs_a = *lda; \ \ /* Call BLIS interface. */ \ PASTEMAC2(ch,blisname,BLIS_TAPI_EX_SUF) \ ( \ blis_uploa, \ BLIS_NO_CONJUGATE, \ BLIS_NO_CONJUGATE, \ m0, \ (ftype*)alpha, \ (ftype*)a, rs_a, cs_a, \ x0, incx0, \ (ftype*)beta, \ y0, incy0, \ NULL, \ NULL \ ); \ \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ } #ifdef BLIS_ENABLE_BLAS INSERT_GENTFUNCRO_BLAS( symv, symv ) #endif blis-0.6.1/frame/compat/bla_symv.h000066400000000000000000000042031360743507500170220ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-to-BLIS interfaces. // #undef GENTPROTRO #define GENTPROTRO( ftype, ch, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname) \ ( \ const f77_char* uploa, \ const f77_int* m, \ const ftype* alpha, \ const ftype* a, const f77_int* lda, \ const ftype* x, const f77_int* incx, \ const ftype* beta, \ ftype* y, const f77_int* incy \ ); #ifdef BLIS_ENABLE_BLAS INSERT_GENTPROTRO_BLAS( symv ) #endif blis-0.6.1/frame/compat/bla_syr.c000066400000000000000000000061531360743507500166420ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define BLAS-to-BLIS interfaces. // #undef GENTFUNCRO #define GENTFUNCRO( ftype, ch, blasname, blisname ) \ \ void PASTEF77(ch,blasname) \ ( \ const f77_char* uploa, \ const f77_int* m, \ const ftype* alpha, \ const ftype* x, const f77_int* incx, \ ftype* a, const f77_int* lda \ ) \ { \ uplo_t blis_uploa; \ dim_t m0; \ ftype* x0; \ inc_t incx0; \ inc_t rs_a, cs_a; \ \ /* Initialize BLIS. */ \ bli_init_auto(); \ \ /* Perform BLAS parameter checking. */ \ PASTEBLACHK(blasname) \ ( \ MKSTR(ch), \ MKSTR(blasname), \ uploa, \ m, \ incx, \ lda \ ); \ \ /* Map BLAS chars to their corresponding BLIS enumerated type value. */ \ bli_param_map_netlib_to_blis_uplo( *uploa, &blis_uploa ); \ \ /* Convert/typecast negative values of m to zero. */ \ bli_convert_blas_dim1( *m, m0 ); \ \ /* If the input increments are negative, adjust the pointers so we can use positive increments instead. */ \ bli_convert_blas_incv( m0, (ftype*)x, *incx, x0, incx0 ); \ \ /* Set the row and column strides of A. */ \ rs_a = 1; \ cs_a = *lda; \ \ /* Call BLIS interface. */ \ PASTEMAC2(ch,blisname,BLIS_TAPI_EX_SUF) \ ( \ blis_uploa, \ BLIS_NO_CONJUGATE, \ m0, \ (ftype*)alpha, \ x0, incx0, \ (ftype*)a, rs_a, cs_a, \ NULL, \ NULL \ ); \ \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ } #ifdef BLIS_ENABLE_BLAS INSERT_GENTFUNCRO_BLAS( syr, syr ) #endif blis-0.6.1/frame/compat/bla_syr.h000066400000000000000000000040621360743507500166440ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-to-BLIS interfaces. // #undef GENTPROTRO #define GENTPROTRO( ftype, ch, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname) \ ( \ const f77_char* uploa, \ const f77_int* m, \ const ftype* alpha, \ const ftype* x, const f77_int* incx, \ ftype* a, const f77_int* lda \ ); #ifdef BLIS_ENABLE_BLAS INSERT_GENTPROTRO_BLAS( syr ) #endif blis-0.6.1/frame/compat/bla_syr2.c000066400000000000000000000064611360743507500167260ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define BLAS-to-BLIS interfaces. // #undef GENTFUNCRO #define GENTFUNCRO( ftype, ch, blasname, blisname ) \ \ void PASTEF77(ch,blasname) \ ( \ const f77_char* uploa, \ const f77_int* m, \ const ftype* alpha, \ const ftype* x, const f77_int* incx, \ const ftype* y, const f77_int* incy, \ ftype* a, const f77_int* lda \ ) \ { \ uplo_t blis_uploa; \ dim_t m0; \ ftype* x0; \ ftype* y0; \ inc_t incx0; \ inc_t incy0; \ inc_t rs_a, cs_a; \ \ /* Initialize BLIS. */ \ bli_init_auto(); \ \ /* Perform BLAS parameter checking. */ \ PASTEBLACHK(blasname) \ ( \ MKSTR(ch), \ MKSTR(blasname), \ uploa, \ m, \ incx, \ incy, \ lda \ ); \ \ \ /* Map BLAS chars to their corresponding BLIS enumerated type value. */ \ bli_param_map_netlib_to_blis_uplo( *uploa, &blis_uploa ); \ \ /* Convert/typecast negative values of m to zero. */ \ bli_convert_blas_dim1( *m, m0 ); \ \ /* If the input increments are negative, adjust the pointers so we can use positive increments instead. */ \ bli_convert_blas_incv( m0, (ftype*)x, *incx, x0, incx0 ); \ bli_convert_blas_incv( m0, (ftype*)y, *incy, y0, incy0 ); \ \ /* Set the row and column strides of A. */ \ rs_a = 1; \ cs_a = *lda; \ \ /* Call BLIS interface. */ \ PASTEMAC2(ch,blisname,BLIS_TAPI_EX_SUF) \ ( \ blis_uploa, \ BLIS_NO_CONJUGATE, \ BLIS_NO_CONJUGATE, \ m0, \ (ftype*)alpha, \ x0, incx0, \ y0, incy0, \ (ftype*)a, rs_a, cs_a, \ NULL, \ NULL \ ); \ \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ } #ifdef BLIS_ENABLE_BLAS INSERT_GENTFUNCRO_BLAS( syr2, syr2 ) #endif blis-0.6.1/frame/compat/bla_syr2.h000066400000000000000000000041441360743507500167270ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-to-BLIS interfaces. // #undef GENTPROTRO #define GENTPROTRO( ftype, ch, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname) \ ( \ const f77_char* uploa, \ const f77_int* m, \ const ftype* alpha, \ const ftype* x, const f77_int* incx, \ const ftype* y, const f77_int* incy, \ ftype* a, const f77_int* lda \ ); #ifdef BLIS_ENABLE_BLAS INSERT_GENTPROTRO_BLAS( syr2 ) #endif blis-0.6.1/frame/compat/bla_syr2k.c000066400000000000000000000152141360743507500170750ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define BLAS-to-BLIS interfaces. // #ifdef BLIS_BLAS3_CALLS_TAPI #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ void PASTEF77(ch,blasname) \ ( \ const f77_char* uploc, \ const f77_char* transa, \ const f77_int* m, \ const f77_int* k, \ const ftype* alpha, \ const ftype* a, const f77_int* lda, \ const ftype* b, const f77_int* ldb, \ const ftype* beta, \ ftype* c, const f77_int* ldc \ ) \ { \ uplo_t blis_uploc; \ trans_t blis_transa; \ dim_t m0, k0; \ inc_t rs_a, cs_a; \ inc_t rs_b, cs_b; \ inc_t rs_c, cs_c; \ \ /* Initialize BLIS. */ \ bli_init_auto(); \ \ /* Perform BLAS parameter checking. */ \ PASTEBLACHK(blasname) \ ( \ MKSTR(ch), \ MKSTR(blasname), \ uploc, \ transa, \ m, \ k, \ lda, \ ldb, \ ldc \ ); \ \ /* Map BLAS chars to their corresponding BLIS enumerated type value. */ \ bli_param_map_netlib_to_blis_uplo( *uploc, &blis_uploc ); \ bli_param_map_netlib_to_blis_trans( *transa, &blis_transa ); \ \ /* The real domain ssyr2k and dsyr2k in netlib BLAS treat a trans value of 'C' (conjugate-transpose) as 'T' (transpose only). So, we have to go out of our way a little to support this behavior. */ \ if ( bli_is_real( PASTEMAC(ch,type) ) && \ bli_is_conjtrans( blis_transa ) ) \ { \ blis_transa = BLIS_TRANSPOSE; \ } \ \ /* Typecast BLAS integers to BLIS integers. */ \ bli_convert_blas_dim1( *m, m0 ); \ bli_convert_blas_dim1( *k, k0 ); \ \ /* Set the row and column strides of the matrix operands. */ \ rs_a = 1; \ cs_a = *lda; \ rs_b = 1; \ cs_b = *ldb; \ rs_c = 1; \ cs_c = *ldc; \ \ /* Call BLIS interface. */ \ PASTEMAC2(ch,blisname,BLIS_TAPI_EX_SUF) \ ( \ blis_uploc, \ blis_transa, \ blis_transa, \ m0, \ k0, \ (ftype*)alpha, \ (ftype*)a, rs_a, cs_a, \ (ftype*)b, rs_b, cs_b, \ (ftype*)beta, \ (ftype*)c, rs_c, cs_c, \ NULL, \ NULL \ ); \ \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ } #else #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ void PASTEF77(ch,blasname) \ ( \ const f77_char* uploc, \ const f77_char* transa, \ const f77_int* m, \ const f77_int* k, \ const ftype* alpha, \ const ftype* a, const f77_int* lda, \ const ftype* b, const f77_int* ldb, \ const ftype* beta, \ ftype* c, const f77_int* ldc \ ) \ { \ uplo_t blis_uploc; \ trans_t blis_transa; \ dim_t m0, k0; \ \ /* Initialize BLIS. */ \ bli_init_auto(); \ \ /* Perform BLAS parameter checking. */ \ PASTEBLACHK(blasname) \ ( \ MKSTR(ch), \ MKSTR(blasname), \ uploc, \ transa, \ m, \ k, \ lda, \ ldb, \ ldc \ ); \ \ /* Map BLAS chars to their corresponding BLIS enumerated type value. */ \ bli_param_map_netlib_to_blis_uplo( *uploc, &blis_uploc ); \ bli_param_map_netlib_to_blis_trans( *transa, &blis_transa ); \ \ /* The real domain ssyr2k and dsyr2k in netlib BLAS treat a trans value of 'C' (conjugate-transpose) as 'T' (transpose only). So, we have to go out of our way a little to support this behavior. */ \ if ( bli_is_real( PASTEMAC(ch,type) ) && \ bli_is_conjtrans( blis_transa ) ) \ { \ blis_transa = BLIS_TRANSPOSE; \ } \ \ /* Typecast BLAS integers to BLIS integers. */ \ bli_convert_blas_dim1( *m, m0 ); \ bli_convert_blas_dim1( *k, k0 ); \ \ /* Set the row and column strides of the matrix operands. */ \ const inc_t rs_a = 1; \ const inc_t cs_a = *lda; \ const inc_t rs_b = 1; \ const inc_t cs_b = *ldb; \ const inc_t rs_c = 1; \ const inc_t cs_c = *ldc; \ \ const num_t dt = PASTEMAC(ch,type); \ \ const trans_t transb = blis_transa; \ const struc_t strucc = BLIS_SYMMETRIC; \ \ obj_t alphao = BLIS_OBJECT_INITIALIZER_1X1; \ obj_t ao = BLIS_OBJECT_INITIALIZER; \ obj_t bo = BLIS_OBJECT_INITIALIZER; \ obj_t betao = BLIS_OBJECT_INITIALIZER_1X1; \ obj_t co = BLIS_OBJECT_INITIALIZER; \ \ dim_t m0_a, n0_a; \ dim_t m0_b, n0_b; \ \ bli_set_dims_with_trans( blis_transa, m0, k0, &m0_a, &n0_a ); \ bli_set_dims_with_trans( transb, m0, k0, &m0_b, &n0_b ); \ \ bli_obj_init_finish_1x1( dt, (ftype*)alpha, &alphao ); \ bli_obj_init_finish_1x1( dt, (ftype*)beta, &betao ); \ \ bli_obj_init_finish( dt, m0_a, n0_a, (ftype*)a, rs_a, cs_a, &ao ); \ bli_obj_init_finish( dt, m0_b, n0_b, (ftype*)b, rs_b, cs_b, &bo ); \ bli_obj_init_finish( dt, m0, m0, (ftype*)c, rs_c, cs_c, &co ); \ \ bli_obj_set_uplo( blis_uploc, &co ); \ bli_obj_set_conjtrans( blis_transa, &ao ); \ bli_obj_set_conjtrans( transb, &bo ); \ \ bli_obj_set_struc( strucc, &co ); \ \ PASTEMAC(blisname,BLIS_OAPI_EX_SUF) \ ( \ &alphao, \ &ao, \ &bo, \ &betao, \ &co, \ NULL, \ NULL \ ); \ \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ } #endif #ifdef BLIS_ENABLE_BLAS INSERT_GENTFUNC_BLAS( syr2k, syr2k ) #endif blis-0.6.1/frame/compat/bla_syr2k.h000066400000000000000000000042711360743507500171030ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-to-BLIS interfaces. // #undef GENTPROT #define GENTPROT( ftype, ch, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname) \ ( \ const f77_char* uploc, \ const f77_char* transa, \ const f77_int* m, \ const f77_int* k, \ const ftype* alpha, \ const ftype* a, const f77_int* lda, \ const ftype* b, const f77_int* ldb, \ const ftype* beta, \ ftype* c, const f77_int* ldc \ ); #ifdef BLIS_ENABLE_BLAS INSERT_GENTPROT_BLAS( syr2k ) #endif blis-0.6.1/frame/compat/bla_syrk.c000066400000000000000000000141161360743507500170130ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define BLAS-to-BLIS interfaces. // #ifdef BLIS_BLAS3_CALLS_TAPI #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ void PASTEF77(ch,blasname) \ ( \ const f77_char* uploc, \ const f77_char* transa, \ const f77_int* m, \ const f77_int* k, \ const ftype* alpha, \ const ftype* a, const f77_int* lda, \ const ftype* beta, \ ftype* c, const f77_int* ldc \ ) \ { \ uplo_t blis_uploc; \ trans_t blis_transa; \ dim_t m0, k0; \ inc_t rs_a, cs_a; \ inc_t rs_c, cs_c; \ \ /* Initialize BLIS. */ \ bli_init_auto(); \ \ /* Perform BLAS parameter checking. */ \ PASTEBLACHK(blasname) \ ( \ MKSTR(ch), \ MKSTR(blasname), \ uploc, \ transa, \ m, \ k, \ lda, \ ldc \ ); \ \ /* Map BLAS chars to their corresponding BLIS enumerated type value. */ \ bli_param_map_netlib_to_blis_uplo( *uploc, &blis_uploc ); \ bli_param_map_netlib_to_blis_trans( *transa, &blis_transa ); \ \ /* The real domain ssyrk and dsyrk in netlib BLAS treat a trans value of 'C' (conjugate-transpose) as 'T' (transpose only). So, we have to go out of our way a little to support this behavior. */ \ if ( bli_is_real( PASTEMAC(ch,type) ) && \ bli_is_conjtrans( blis_transa ) ) \ { \ blis_transa = BLIS_TRANSPOSE; \ } \ \ /* Typecast BLAS integers to BLIS integers. */ \ bli_convert_blas_dim1( *m, m0 ); \ bli_convert_blas_dim1( *k, k0 ); \ \ /* Set the row and column strides of the matrix operands. */ \ rs_a = 1; \ cs_a = *lda; \ rs_c = 1; \ cs_c = *ldc; \ \ /* Call BLIS interface. */ \ PASTEMAC2(ch,blisname,BLIS_TAPI_EX_SUF) \ ( \ blis_uploc, \ blis_transa, \ m0, \ k0, \ (ftype*)alpha, \ (ftype*)a, rs_a, cs_a, \ (ftype*)beta, \ (ftype*)c, rs_c, cs_c, \ NULL, \ NULL \ ); \ \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ } #else #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ void PASTEF77(ch,blasname) \ ( \ const f77_char* uploc, \ const f77_char* transa, \ const f77_int* m, \ const f77_int* k, \ const ftype* alpha, \ const ftype* a, const f77_int* lda, \ const ftype* beta, \ ftype* c, const f77_int* ldc \ ) \ { \ uplo_t blis_uploc; \ trans_t blis_transa; \ dim_t m0, k0; \ \ /* Initialize BLIS. */ \ bli_init_auto(); \ \ /* Perform BLAS parameter checking. */ \ PASTEBLACHK(blasname) \ ( \ MKSTR(ch), \ MKSTR(blasname), \ uploc, \ transa, \ m, \ k, \ lda, \ ldc \ ); \ \ /* Map BLAS chars to their corresponding BLIS enumerated type value. */ \ bli_param_map_netlib_to_blis_uplo( *uploc, &blis_uploc ); \ bli_param_map_netlib_to_blis_trans( *transa, &blis_transa ); \ \ /* The real domain ssyrk and dsyrk in netlib BLAS treat a trans value of 'C' (conjugate-transpose) as 'T' (transpose only). So, we have to go out of our way a little to support this behavior. */ \ if ( bli_is_real( PASTEMAC(ch,type) ) && \ bli_is_conjtrans( blis_transa ) ) \ { \ blis_transa = BLIS_TRANSPOSE; \ } \ \ /* Typecast BLAS integers to BLIS integers. */ \ bli_convert_blas_dim1( *m, m0 ); \ bli_convert_blas_dim1( *k, k0 ); \ \ /* Set the row and column strides of the matrix operands. */ \ const inc_t rs_a = 1; \ const inc_t cs_a = *lda; \ const inc_t rs_c = 1; \ const inc_t cs_c = *ldc; \ \ const num_t dt = PASTEMAC(ch,type); \ \ const struc_t strucc = BLIS_SYMMETRIC; \ \ obj_t alphao = BLIS_OBJECT_INITIALIZER_1X1; \ obj_t ao = BLIS_OBJECT_INITIALIZER; \ obj_t betao = BLIS_OBJECT_INITIALIZER_1X1; \ obj_t co = BLIS_OBJECT_INITIALIZER; \ \ dim_t m0_a, n0_a; \ \ bli_set_dims_with_trans( blis_transa, m0, k0, &m0_a, &n0_a ); \ \ bli_obj_init_finish_1x1( dt, (ftype*)alpha, &alphao ); \ bli_obj_init_finish_1x1( dt, (ftype*)beta, &betao ); \ \ bli_obj_init_finish( dt, m0_a, n0_a, (ftype*)a, rs_a, cs_a, &ao ); \ bli_obj_init_finish( dt, m0, m0, (ftype*)c, rs_c, cs_c, &co ); \ \ bli_obj_set_uplo( blis_uploc, &co ); \ bli_obj_set_conjtrans( blis_transa, &ao ); \ \ bli_obj_set_struc( strucc, &co ); \ \ PASTEMAC(blisname,BLIS_OAPI_EX_SUF) \ ( \ &alphao, \ &ao, \ &betao, \ &co, \ NULL, \ NULL \ ); \ \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ } #endif #ifdef BLIS_ENABLE_BLAS INSERT_GENTFUNC_BLAS( syrk, syrk ) #endif blis-0.6.1/frame/compat/bla_syrk.h000066400000000000000000000042101360743507500170120ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-to-BLIS interfaces. // #undef GENTPROT #define GENTPROT( ftype, ch, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname) \ ( \ const f77_char* uploc, \ const f77_char* transa, \ const f77_int* m, \ const f77_int* k, \ const ftype* alpha, \ const ftype* a, const f77_int* lda, \ const ftype* beta, \ ftype* c, const f77_int* ldc \ ); #ifdef BLIS_ENABLE_BLAS INSERT_GENTPROT_BLAS( syrk ) #endif blis-0.6.1/frame/compat/bla_trmm.c000066400000000000000000000134441360743507500170050ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define BLAS-to-BLIS interfaces. // #ifdef BLIS_BLAS3_CALLS_TAPI #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ void PASTEF77(ch,blasname) \ ( \ const f77_char* side, \ const f77_char* uploa, \ const f77_char* transa, \ const f77_char* diaga, \ const f77_int* m, \ const f77_int* n, \ const ftype* alpha, \ const ftype* a, const f77_int* lda, \ ftype* b, const f77_int* ldb \ ) \ { \ side_t blis_side; \ uplo_t blis_uploa; \ trans_t blis_transa; \ diag_t blis_diaga; \ dim_t m0, n0; \ inc_t rs_a, cs_a; \ inc_t rs_b, cs_b; \ \ /* Initialize BLIS. */ \ bli_init_auto(); \ \ /* Perform BLAS parameter checking. */ \ PASTEBLACHK(blasname) \ ( \ MKSTR(ch), \ MKSTR(blasname), \ side, \ uploa, \ transa, \ diaga, \ m, \ n, \ lda, \ ldb \ ); \ \ /* Map BLAS chars to their corresponding BLIS enumerated type value. */ \ bli_param_map_netlib_to_blis_side( *side, &blis_side ); \ bli_param_map_netlib_to_blis_uplo( *uploa, &blis_uploa ); \ bli_param_map_netlib_to_blis_trans( *transa, &blis_transa ); \ bli_param_map_netlib_to_blis_diag( *diaga, &blis_diaga ); \ \ /* Typecast BLAS integers to BLIS integers. */ \ bli_convert_blas_dim1( *m, m0 ); \ bli_convert_blas_dim1( *n, n0 ); \ \ /* Set the row and column strides of the matrix operands. */ \ rs_a = 1; \ cs_a = *lda; \ rs_b = 1; \ cs_b = *ldb; \ \ /* Call BLIS interface. */ \ PASTEMAC2(ch,blisname,BLIS_TAPI_EX_SUF) \ ( \ blis_side, \ blis_uploa, \ blis_transa, \ blis_diaga, \ m0, \ n0, \ (ftype*)alpha, \ (ftype*)a, rs_a, cs_a, \ (ftype*)b, rs_b, cs_b, \ NULL, \ NULL \ ); \ \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ } #else #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ void PASTEF77(ch,blasname) \ ( \ const f77_char* side, \ const f77_char* uploa, \ const f77_char* transa, \ const f77_char* diaga, \ const f77_int* m, \ const f77_int* n, \ const ftype* alpha, \ const ftype* a, const f77_int* lda, \ ftype* b, const f77_int* ldb \ ) \ { \ side_t blis_side; \ uplo_t blis_uploa; \ trans_t blis_transa; \ diag_t blis_diaga; \ dim_t m0, n0; \ \ /* Initialize BLIS. */ \ bli_init_auto(); \ \ /* Perform BLAS parameter checking. */ \ PASTEBLACHK(blasname) \ ( \ MKSTR(ch), \ MKSTR(blasname), \ side, \ uploa, \ transa, \ diaga, \ m, \ n, \ lda, \ ldb \ ); \ \ /* Map BLAS chars to their corresponding BLIS enumerated type value. */ \ bli_param_map_netlib_to_blis_side( *side, &blis_side ); \ bli_param_map_netlib_to_blis_uplo( *uploa, &blis_uploa ); \ bli_param_map_netlib_to_blis_trans( *transa, &blis_transa ); \ bli_param_map_netlib_to_blis_diag( *diaga, &blis_diaga ); \ \ /* Typecast BLAS integers to BLIS integers. */ \ bli_convert_blas_dim1( *m, m0 ); \ bli_convert_blas_dim1( *n, n0 ); \ \ /* Set the row and column strides of the matrix operands. */ \ const inc_t rs_a = 1; \ const inc_t cs_a = *lda; \ const inc_t rs_b = 1; \ const inc_t cs_b = *ldb; \ \ const num_t dt = PASTEMAC(ch,type); \ \ const struc_t struca = BLIS_TRIANGULAR; \ \ obj_t alphao = BLIS_OBJECT_INITIALIZER_1X1; \ obj_t ao = BLIS_OBJECT_INITIALIZER; \ obj_t bo = BLIS_OBJECT_INITIALIZER; \ \ dim_t mn0_a; \ \ bli_set_dim_with_side( blis_side, m0, n0, &mn0_a ); \ \ bli_obj_init_finish_1x1( dt, (ftype*)alpha, &alphao ); \ \ bli_obj_init_finish( dt, mn0_a, mn0_a, (ftype*)a, rs_a, cs_a, &ao ); \ bli_obj_init_finish( dt, m0, n0, (ftype*)b, rs_b, cs_b, &bo ); \ \ bli_obj_set_uplo( blis_uploa, &ao ); \ bli_obj_set_diag( blis_diaga, &ao ); \ bli_obj_set_conjtrans( blis_transa, &ao ); \ \ bli_obj_set_struc( struca, &ao ); \ \ PASTEMAC(blisname,BLIS_OAPI_EX_SUF) \ ( \ blis_side, \ &alphao, \ &ao, \ &bo, \ NULL, \ NULL \ ); \ \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ } #endif #ifdef BLIS_ENABLE_BLAS INSERT_GENTFUNC_BLAS( trmm, trmm ) #endif blis-0.6.1/frame/compat/bla_trmm.h000066400000000000000000000042501360743507500170050ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-to-BLIS interfaces. // #undef GENTPROT #define GENTPROT( ftype, ch, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname) \ ( \ const f77_char* side, \ const f77_char* uploa, \ const f77_char* transa, \ const f77_char* diaga, \ const f77_int* m, \ const f77_int* n, \ const ftype* alpha, \ const ftype* a, const f77_int* lda, \ ftype* b, const f77_int* ldb \ ); #ifdef BLIS_ENABLE_BLAS INSERT_GENTPROT_BLAS( trmm ) #endif blis-0.6.1/frame/compat/bla_trmv.c000066400000000000000000000067021360743507500170150ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define BLAS-to-BLIS interfaces. // #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ void PASTEF77(ch,blasname) \ ( \ const f77_char* uploa, \ const f77_char* transa, \ const f77_char* diaga, \ const f77_int* m, \ const ftype* a, const f77_int* lda, \ ftype* x, const f77_int* incx \ ) \ { \ uplo_t blis_uploa; \ trans_t blis_transa; \ diag_t blis_diaga; \ dim_t m0; \ ftype* x0; \ inc_t incx0; \ inc_t rs_a, cs_a; \ ftype* one_p; \ \ /* Initialize BLIS. */ \ bli_init_auto(); \ \ /* Perform BLAS parameter checking. */ \ PASTEBLACHK(blasname) \ ( \ MKSTR(ch), \ MKSTR(blasname), \ uploa, \ transa, \ diaga, \ m, \ lda, \ incx \ ); \ \ /* Map BLAS chars to their corresponding BLIS enumerated type value. */ \ bli_param_map_netlib_to_blis_uplo( *uploa, &blis_uploa ); \ bli_param_map_netlib_to_blis_trans( *transa, &blis_transa ); \ bli_param_map_netlib_to_blis_diag( *diaga, &blis_diaga ); \ \ /* Convert/typecast negative values of m to zero. */ \ bli_convert_blas_dim1( *m, m0 ); \ \ /* If the input increments are negative, adjust the pointers so we can use positive increments instead. */ \ bli_convert_blas_incv( m0, (ftype*)x, *incx, x0, incx0 ); \ \ /* Set the row and column strides of A. */ \ rs_a = 1; \ cs_a = *lda; \ \ /* Acquire a pointer to the global scalar constant BLIS_ONE. */ \ one_p = PASTEMAC(ch,1); \ \ /* Call BLIS interface. */ \ PASTEMAC2(ch,blisname,BLIS_TAPI_EX_SUF) \ ( \ blis_uploa, \ blis_transa, \ blis_diaga, \ m0, \ one_p, \ (ftype*)a, rs_a, cs_a, \ x0, incx0, \ NULL, \ NULL \ ); \ \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ } #ifdef BLIS_ENABLE_BLAS INSERT_GENTFUNC_BLAS( trmv, trmv ) #endif blis-0.6.1/frame/compat/bla_trmv.h000066400000000000000000000041161360743507500170170ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-to-BLIS interfaces. // #undef GENTPROT #define GENTPROT( ftype, ch, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname) \ ( \ const f77_char* uploa, \ const f77_char* transa, \ const f77_char* diaga, \ const f77_int* m, \ const ftype* a, const f77_int* lda, \ ftype* x, const f77_int* incx \ ); #ifdef BLIS_ENABLE_BLAS INSERT_GENTPROT_BLAS( trmv ) #endif blis-0.6.1/frame/compat/bla_trsm.c000066400000000000000000000134441360743507500170130ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define BLAS-to-BLIS interfaces. // #ifdef BLIS_BLAS3_CALLS_TAPI #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ void PASTEF77(ch,blasname) \ ( \ const f77_char* side, \ const f77_char* uploa, \ const f77_char* transa, \ const f77_char* diaga, \ const f77_int* m, \ const f77_int* n, \ const ftype* alpha, \ const ftype* a, const f77_int* lda, \ ftype* b, const f77_int* ldb \ ) \ { \ side_t blis_side; \ uplo_t blis_uploa; \ trans_t blis_transa; \ diag_t blis_diaga; \ dim_t m0, n0; \ inc_t rs_a, cs_a; \ inc_t rs_b, cs_b; \ \ /* Initialize BLIS. */ \ bli_init_auto(); \ \ /* Perform BLAS parameter checking. */ \ PASTEBLACHK(blasname) \ ( \ MKSTR(ch), \ MKSTR(blasname), \ side, \ uploa, \ transa, \ diaga, \ m, \ n, \ lda, \ ldb \ ); \ \ /* Map BLAS chars to their corresponding BLIS enumerated type value. */ \ bli_param_map_netlib_to_blis_side( *side, &blis_side ); \ bli_param_map_netlib_to_blis_uplo( *uploa, &blis_uploa ); \ bli_param_map_netlib_to_blis_trans( *transa, &blis_transa ); \ bli_param_map_netlib_to_blis_diag( *diaga, &blis_diaga ); \ \ /* Typecast BLAS integers to BLIS integers. */ \ bli_convert_blas_dim1( *m, m0 ); \ bli_convert_blas_dim1( *n, n0 ); \ \ /* Set the row and column strides of the matrix operands. */ \ rs_a = 1; \ cs_a = *lda; \ rs_b = 1; \ cs_b = *ldb; \ \ /* Call BLIS interface. */ \ PASTEMAC2(ch,blisname,BLIS_TAPI_EX_SUF) \ ( \ blis_side, \ blis_uploa, \ blis_transa, \ blis_diaga, \ m0, \ n0, \ (ftype*)alpha, \ (ftype*)a, rs_a, cs_a, \ (ftype*)b, rs_b, cs_b, \ NULL, \ NULL \ ); \ \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ } #else #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ void PASTEF77(ch,blasname) \ ( \ const f77_char* side, \ const f77_char* uploa, \ const f77_char* transa, \ const f77_char* diaga, \ const f77_int* m, \ const f77_int* n, \ const ftype* alpha, \ const ftype* a, const f77_int* lda, \ ftype* b, const f77_int* ldb \ ) \ { \ side_t blis_side; \ uplo_t blis_uploa; \ trans_t blis_transa; \ diag_t blis_diaga; \ dim_t m0, n0; \ \ /* Initialize BLIS. */ \ bli_init_auto(); \ \ /* Perform BLAS parameter checking. */ \ PASTEBLACHK(blasname) \ ( \ MKSTR(ch), \ MKSTR(blasname), \ side, \ uploa, \ transa, \ diaga, \ m, \ n, \ lda, \ ldb \ ); \ \ /* Map BLAS chars to their corresponding BLIS enumerated type value. */ \ bli_param_map_netlib_to_blis_side( *side, &blis_side ); \ bli_param_map_netlib_to_blis_uplo( *uploa, &blis_uploa ); \ bli_param_map_netlib_to_blis_trans( *transa, &blis_transa ); \ bli_param_map_netlib_to_blis_diag( *diaga, &blis_diaga ); \ \ /* Typecast BLAS integers to BLIS integers. */ \ bli_convert_blas_dim1( *m, m0 ); \ bli_convert_blas_dim1( *n, n0 ); \ \ /* Set the row and column strides of the matrix operands. */ \ const inc_t rs_a = 1; \ const inc_t cs_a = *lda; \ const inc_t rs_b = 1; \ const inc_t cs_b = *ldb; \ \ const num_t dt = PASTEMAC(ch,type); \ \ const struc_t struca = BLIS_TRIANGULAR; \ \ obj_t alphao = BLIS_OBJECT_INITIALIZER_1X1; \ obj_t ao = BLIS_OBJECT_INITIALIZER; \ obj_t bo = BLIS_OBJECT_INITIALIZER; \ \ dim_t mn0_a; \ \ bli_set_dim_with_side( blis_side, m0, n0, &mn0_a ); \ \ bli_obj_init_finish_1x1( dt, (ftype*)alpha, &alphao ); \ \ bli_obj_init_finish( dt, mn0_a, mn0_a, (ftype*)a, rs_a, cs_a, &ao ); \ bli_obj_init_finish( dt, m0, n0, (ftype*)b, rs_b, cs_b, &bo ); \ \ bli_obj_set_uplo( blis_uploa, &ao ); \ bli_obj_set_diag( blis_diaga, &ao ); \ bli_obj_set_conjtrans( blis_transa, &ao ); \ \ bli_obj_set_struc( struca, &ao ); \ \ PASTEMAC(blisname,BLIS_OAPI_EX_SUF) \ ( \ blis_side, \ &alphao, \ &ao, \ &bo, \ NULL, \ NULL \ ); \ \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ } #endif #ifdef BLIS_ENABLE_BLAS INSERT_GENTFUNC_BLAS( trsm, trsm ) #endif blis-0.6.1/frame/compat/bla_trsm.h000066400000000000000000000042501360743507500170130ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-to-BLIS interfaces. // #undef GENTPROT #define GENTPROT( ftype, ch, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname) \ ( \ const f77_char* side, \ const f77_char* uploa, \ const f77_char* transa, \ const f77_char* diaga, \ const f77_int* m, \ const f77_int* n, \ const ftype* alpha, \ const ftype* a, const f77_int* lda, \ ftype* b, const f77_int* ldb \ ); #ifdef BLIS_ENABLE_BLAS INSERT_GENTPROT_BLAS( trsm ) #endif blis-0.6.1/frame/compat/bla_trsv.c000066400000000000000000000067021360743507500170230ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define BLAS-to-BLIS interfaces. // #undef GENTFUNC #define GENTFUNC( ftype, ch, blasname, blisname ) \ \ void PASTEF77(ch,blasname) \ ( \ const f77_char* uploa, \ const f77_char* transa, \ const f77_char* diaga, \ const f77_int* m, \ const ftype* a, const f77_int* lda, \ ftype* x, const f77_int* incx \ ) \ { \ uplo_t blis_uploa; \ trans_t blis_transa; \ diag_t blis_diaga; \ dim_t m0; \ ftype* x0; \ inc_t incx0; \ inc_t rs_a, cs_a; \ ftype* one_p; \ \ /* Initialize BLIS. */ \ bli_init_auto(); \ \ /* Perform BLAS parameter checking. */ \ PASTEBLACHK(blasname) \ ( \ MKSTR(ch), \ MKSTR(blasname), \ uploa, \ transa, \ diaga, \ m, \ lda, \ incx \ ); \ \ /* Map BLAS chars to their corresponding BLIS enumerated type value. */ \ bli_param_map_netlib_to_blis_uplo( *uploa, &blis_uploa ); \ bli_param_map_netlib_to_blis_trans( *transa, &blis_transa ); \ bli_param_map_netlib_to_blis_diag( *diaga, &blis_diaga ); \ \ /* Convert/typecast negative values of m to zero. */ \ bli_convert_blas_dim1( *m, m0 ); \ \ /* If the input increments are negative, adjust the pointers so we can use positive increments instead. */ \ bli_convert_blas_incv( m0, (ftype*)x, *incx, x0, incx0 ); \ \ /* Set the row and column strides of A. */ \ rs_a = 1; \ cs_a = *lda; \ \ /* Acquire a pointer to the global scalar constant BLIS_ONE. */ \ one_p = PASTEMAC(ch,1); \ \ /* Call BLIS interface. */ \ PASTEMAC2(ch,blisname,BLIS_TAPI_EX_SUF) \ ( \ blis_uploa, \ blis_transa, \ blis_diaga, \ m0, \ one_p, \ (ftype*)a, rs_a, cs_a, \ x0, incx0, \ NULL, \ NULL \ ); \ \ /* Finalize BLIS. */ \ bli_finalize_auto(); \ } #ifdef BLIS_ENABLE_BLAS INSERT_GENTFUNC_BLAS( trsv, trsv ) #endif blis-0.6.1/frame/compat/bla_trsv.h000066400000000000000000000041161360743507500170250ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-to-BLIS interfaces. // #undef GENTPROT #define GENTPROT( ftype, ch, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF77(ch,blasname) \ ( \ const f77_char* uploa, \ const f77_char* transa, \ const f77_char* diaga, \ const f77_int* m, \ const ftype* a, const f77_int* lda, \ ftype* x, const f77_int* incx \ ); #ifdef BLIS_ENABLE_BLAS INSERT_GENTPROT_BLAS( trsv ) #endif blis-0.6.1/frame/compat/bli_blas.h000066400000000000000000000121321360743507500167550ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // If the CBLAS compatibility layer was enabled while the BLAS layer // was not enabled, we must enable it here. #ifdef BLIS_ENABLE_CBLAS #ifndef BLIS_ENABLE_BLAS #define BLIS_ENABLE_BLAS #endif #endif // BLIS_ENABLE_CBLAS // By default, if the BLAS compatibility layer is enabled, we define // (include) all of the BLAS prototypes. However, if the user is // #including "blis.h" and also #including another header that also // declares the BLAS functions, then we provide an opportunity to // #undefine the BLIS_ENABLE_BLAS_DEFS macro (see below). #ifdef BLIS_ENABLE_BLAS #define BLIS_ENABLE_BLAS_DEFS #else #undef BLIS_ENABLE_BLAS_DEFS #endif // Skip prototyping all of the BLAS if the BLAS test drivers are being // compiled. #ifdef BLIS_VIA_BLASTEST #undef BLIS_ENABLE_BLAS_DEFS #endif // Skip prototyping all of the BLAS if the environment has defined the // macro BLIS_DISABLE_BLAS_DEFS. #ifdef BLIS_DISABLE_BLAS_DEFS #undef BLIS_ENABLE_BLAS_DEFS #endif // Begin including all BLAS prototypes. #ifdef BLIS_ENABLE_BLAS_DEFS // -- System headers needed by BLAS compatibility layer -- #include // for toupper(), used in xerbla() // -- Constants -- #define BLIS_MAX_BLAS_FUNC_STR_LENGTH (7+1) // -- Utility macros -- #include "bla_r_sign.h" #include "bla_d_sign.h" #include "bla_r_cnjg.h" #include "bla_d_cnjg.h" #include "bla_r_imag.h" #include "bla_d_imag.h" #include "bla_c_div.h" #include "bla_z_div.h" #include "bla_f__cabs.h" // needed by c_abs, z_abs #include "bla_r_abs.h" #include "bla_d_abs.h" #include "bla_c_abs.h" #include "bla_z_abs.h" #include "bla_lsame.h" #include "bla_xerbla.h" // -- Level-0 BLAS prototypes -- #include "bla_cabs1.h" // -- Level-1 BLAS prototypes -- #include "bla_amax.h" #include "bla_asum.h" #include "bla_axpy.h" #include "bla_copy.h" #include "bla_dot.h" #include "bla_nrm2.h" #include "bla_rot.h" #include "bla_rotg.h" #include "bla_rotm.h" #include "bla_rotmg.h" #include "bla_scal.h" #include "bla_swap.h" #include "f77_amax_sub.h" #include "f77_asum_sub.h" #include "f77_dot_sub.h" #include "f77_nrm2_sub.h" // -- Level-2 BLAS prototypes -- // dense #include "bla_gemv.h" #include "bla_ger.h" #include "bla_hemv.h" #include "bla_her.h" #include "bla_her2.h" #include "bla_symv.h" #include "bla_syr.h" #include "bla_syr2.h" #include "bla_trmv.h" #include "bla_trsv.h" #include "bla_gemv_check.h" #include "bla_ger_check.h" #include "bla_hemv_check.h" #include "bla_her_check.h" #include "bla_her2_check.h" #include "bla_symv_check.h" #include "bla_syr_check.h" #include "bla_syr2_check.h" #include "bla_trmv_check.h" #include "bla_trsv_check.h" // packed #include "bla_hpmv.h" #include "bla_hpr.h" #include "bla_hpr2.h" #include "bla_spmv.h" #include "bla_spr.h" #include "bla_spr2.h" #include "bla_tpmv.h" #include "bla_tpsv.h" // banded #include "bla_gbmv.h" #include "bla_hbmv.h" #include "bla_sbmv.h" #include "bla_tbmv.h" #include "bla_tbsv.h" // -- Level-3 BLAS prototypes -- #include "bla_gemm.h" #include "bla_hemm.h" #include "bla_herk.h" #include "bla_her2k.h" #include "bla_symm.h" #include "bla_syrk.h" #include "bla_syr2k.h" #include "bla_trmm.h" #include "bla_trsm.h" #include "bla_gemm_check.h" #include "bla_hemm_check.h" #include "bla_herk_check.h" #include "bla_her2k_check.h" #include "bla_symm_check.h" #include "bla_syrk_check.h" #include "bla_syr2k_check.h" #include "bla_trmm_check.h" #include "bla_trsm_check.h" // -- Fortran-compatible APIs to BLIS functions -- #include "b77_thread.h" #endif // BLIS_ENABLE_BLAS blis-0.6.1/frame/compat/blis/000077500000000000000000000000001360743507500157675ustar00rootroot00000000000000blis-0.6.1/frame/compat/blis/thread/000077500000000000000000000000001360743507500172365ustar00rootroot00000000000000blis-0.6.1/frame/compat/blis/thread/b77_thread.c000066400000000000000000000053021360743507500213300ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2018, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define Fortran-compatible BLIS interfaces. // void PASTEF770(bli_thread_set_ways) ( const f77_int* jc, const f77_int* pc, const f77_int* ic, const f77_int* jr, const f77_int* ir ) { dim_t jc0 = *jc; dim_t pc0 = *pc; dim_t ic0 = *ic; dim_t jr0 = *jr; dim_t ir0 = *ir; // Initialize BLIS. bli_init_auto(); // Convert/typecast negative values to zero. //bli_convert_blas_dim1( *jc, jc0 ); //bli_convert_blas_dim1( *pc, pc0 ); //bli_convert_blas_dim1( *ic, ic0 ); //bli_convert_blas_dim1( *jr, jr0 ); //bli_convert_blas_dim1( *ir, ir0 ); // Call the BLIS function. bli_thread_set_ways( jc0, pc0, ic0, jr0, ir0 ); // Finalize BLIS. bli_finalize_auto(); } void PASTEF770(bli_thread_set_num_threads) ( const f77_int* nt ) { dim_t nt0 = *nt; // Initialize BLIS. bli_init_auto(); // Convert/typecast negative values to zero. //bli_convert_blas_dim1( *nt, nt0 ); // Call the BLIS function. bli_thread_set_num_threads( nt0 ); // Finalize BLIS. bli_finalize_auto(); } blis-0.6.1/frame/compat/blis/thread/b77_thread.h000066400000000000000000000037551360743507500213470ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype Fortran-compatible BLIS interfaces. // BLIS_EXPORT_BLAS void PASTEF770(bli_thread_set_ways) ( const f77_int* jc, const f77_int* pc, const f77_int* ic, const f77_int* jr, const f77_int* ir ); BLIS_EXPORT_BLAS void PASTEF770(bli_thread_set_num_threads) ( const f77_int* nt ); blis-0.6.1/frame/compat/cblas/000077500000000000000000000000001360743507500161225ustar00rootroot00000000000000blis-0.6.1/frame/compat/cblas/bli_cblas.h000066400000000000000000000041551360743507500202120ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_CBLAS_H #define BLIS_CBLAS_H #ifdef BLIS_ENABLE_CBLAS // Undefine these macros so that no internal conversion is done by CBLAS. // The function signatures have been modified to use the proper integer types // directly. #undef F77_INT #undef F77_CHAR // Include the main CBLAS header so that including this header file // (probably via blis.h) allows applications to access CBLAS // prototypes and definitions. #include "cblas.h" #endif // BLIS_ENABLE_CBLAS #endif blis-0.6.1/frame/compat/cblas/cblas.tgz000066400000000000000000006015341360743507500177450ustar00rootroot00000000000000‹ š8Mì½}â8²0ºÿN>…îìÙ É‚ y›Ù>÷BwÓMH ÛÉî9¿þâ$LȤ_æœùî·J’mÙ–eÙØºñô°õR*•ªJ¥ªrý´Uëîÿ%Ó«\.úyxH?Ëf•}ò‹ÆqhÕJå” Ó88ú 9È,v½Ìæý)€òûËhh)ÊA±û{Ås>çsM®:ëkÿéydͲ!„øó_9::ØÌ›ÿáx0z¹³2âñæÿçÿ°llæ?‹Íÿhx›¡ ˆ?ÿæ‘YÝÌ›ÿóþ'ë~8R ðqX­†Í¿Q)Uaþ+Õj¹ rŸòÿÃJù/¤œ 8ÞëŸÿ»ÑçyEJû¥-.ÈðæŽC¥áxk«?ýJàp ü˜ÏædkëÑ=ÿºõÓÿG¬Áã„üŒÈìej‘o“҇ϗÙpü@“éỔР¹ŸL±Ô”Ì¾ÍæÖSég·™ÚœÌ‡32²>[£"éÏf/OØ 6úØÿl‘»É—ñhrgÝ!$dl ¬Ù¬?ýFàšÁžf¤?¾#O}U  ‡PŸÉ=AÒ'ÓÉË|8†’DÞf‚ð‰O{–3,çÓ3ítwv¡#ºÈH¡N†ã¹5½ïh÷B3ø|‡Â:· »)ƒG‡M=ö†³ád<+‘à%4Ó¼Çò€wÀÞxBö `†¶ÆF? ˆÃõ‹d843˜Z}€0ûEhãËpþHÁ÷Ÿ(ü´Q?áoŠ<¡™§>t8¶J?“ÜMlŒݹOÈüÛ3L¢`4š|™yZû¬×kþŸâŒ|¡£¶Ÿàð'c„ñWq"a¸ÙÛÛ#´vJkÄ F­%ÎõN¥³‹«ÓVƒ\võf·yÑè4P¿8¿l5®ÅzĨ·kŠUÙ*…ʪªH[]…7âÌTCaJqYIÀ™ e‘õCP¦[OŠ2S…23Ê*j(*R”EVò“ñ#-²…¤éÖ“"­¢BZEi\°H °ù â‘uâ’!u‘3†JÛ ]äÚ5+ÝÃ6µ›‘ÍCÜÊÁɘÓ„ú>Qâ¢P`¡Ü™¢ÓŒH²þµkF£S«™0tÆ©,E§¦•h€$¼A»¦2ljµ†Í8•¥Ø¬ÄÅf%›\ÝS4X÷hwž–¦/c^»ñռ̭8•}` ëD0¸ÊÔ'ÀǦ¨Œú•¹Z«ÚÎ`dËÃvÓþÌr¿Ò„6c} û·#¦¹ÎÄVhMÎ:uCŠ[_­`qEÓ'ÖÁéXOàÉî ©66*nT$zø-ƒ‹þ0ƒU(¬‘ÙBg7š!@>*±(Ä®.l€®ü`Jöçdp;êÏ>Beø,ì zøe2ý$lMFßB„ ›^hìC:Æ öo¯Öêë—þxN~Á>'\Œq(²ŸÎÎ]vî¾…lS^æîÂ%H9´¦ÃùÜ“Ûoä½5œ÷Óþx8{ …*ûÕý“c¨¶µõWÒ“k ¥`£5÷ìeõÜM8÷èÎ Í^n÷ì}Á –(aÁV°@ŒìWÿ;8#ïþt6ŸÀbïžìѲ¹²¾Î7“7?¹ý]ÙÔç?eóàÀàç?fµb‘²Q­V6ç?y\…M€wö·þŠÿ¾«q,ôƒÇþøÁr`“#œç”h­­¿î¥yÝGk4J»Ù­îÛF«E^‘ýÛáxö˜Ü—ü\$uÐ/[µ@Îf*}¸[tc?ägìDpFRÅi Ä ä!Ê“R‹ 0<ˆ,Qg&fþ£€ÃÝÇé”ަŒ®^‡ [¯ñóþèh«uQ;ktàÇ^×w2ÌëQÿ™™²ÖëVíMF±7›ßd¯¹ÿ2›Ú®CdžÓ¢¢4|[wa7n æ3²wV;;û¸õÚ©|Oö^(Fø ÐQã”çéäaÚbÄ|Oq”:Vjú[VJ¿Ù£œnujmFÑSj [UeÉ'ÿß^^¦.þ£åùÀ/ÿ+Õ£üÏãJUþ§¾º6 €¿e[À…š—üO»²RòÿûÿÙIÿ_.ªd¯ÖÁÞ¾¨¿­µß4È/–+ßáñÒ¤{ú„)ÝñLeUe»Îå“ÿÃq}DÊÿÊ_þ›c#ÿó¸<ò¿Õl_]oäÿâd/ÿÙTå£XjûW3VûtõíŸM°…Ïgû§“—Ñd¾?ê?÷Ÿöî¬Ïüëþ|ú2þ´OµŠ‚ïPux°u‡  9l¿3b À=²ÖºBˆª0XqK€OþÓ5•vQò¿ 7}òÿã6ò?ûk#ÿ7òßm}cH¡éO·Ïâßцßwùä÷Mó°švÑûÿjàüßÜÄæryä?ýÍùÿÂd/þÙL­©øÿ¤ÿ÷kþwå>Ù¾½÷4|žÁÇÔ lÏ« È g’çߣò0XeíÁ/ÿ¯Ú©‹ÿèýÿQ@þW6þ¹\^ù³¿Ùþ/ÞAògj#þBßæÿ{‘ÿDP‚ûþ‡eúôm\ú¸$ò¿{Ñ2Sí#rÿ_ñûÿ@ÉüÏã Èœý°xùèt¶ÖTø¡N¾E@¢½þx6${ÏÖ]<ÈÞ‡>,Øpí`sD°:“ÿFí켑Uò¿jT|ò¿R®nÎÿs¹šín¯ÖöÚ¼hÓÀãË‘…ya,;3ΣÅña<ñýpŠÕ,c ÿ€÷ô<ÿO2“½™K€«Ä“Bʲ<}ô‘“£Å²©G{‘k™}»HE\ÑtE2™b+4Ñàä˘|7œŒIáËãpðHf“—ѹ¨§ÃÏÃþ5”» eˆ,• M­2·ó—©5Û)acnÒJ(<³0cŒãÿ âÓá#y†>ï¹5ú†)Ph÷žqþºÅqQ?=kvŽ ñ&äN¤ãFeêeŒnlÖK ÔŸÒ,/¼6‡X›vѲúƒo,K%O¸Cûé=Zcš$1d"h89¦æ3}ŽFäÙ$MÐIî¬yº¿ƒæY¼8fu¤9gv›ÒÖVãºQ¿ê5ÛoHïmƒôÝ^£Óe ÀÍKû<à|ÿëŒÆ”â­»à­AðÖÁ[¬-“üÃŽgߟ Çf°i±Ä¯Ä Pbà+ñG Ä¾ ŽŠŽJŽŠŽJŽŠŽJŽŠŽÊÖÇt/NA@cH‡ýÌZ:*ψ˜žf_ ?ºPÄ4GðøÓxò…<ÂÿðýÌ_E–¿sÛ¡ÂÒöÖVxb"R8Ø7Ëû'Ç;Ùtj1žöò|Ç’tQ¥€Ò2 `Œ·Öü‹e±äF8Þp 扊j¶ãI8U"µÁ`2½ÃƒùJq©óò,‹’5†SèëŽ"ƒªdv+>,Û]Òt¹ãÉœV¸³î·­»¡Ch£ÙÄk´ì”” äTœ4cl‘­Á'Êóxr+lÄú:´¸“˜ö òÌó`Òä®·ý;ã¹à–îàßòéÛoØÌ8îtÈNÁp¨m¨‚ $°þ™3tigÞ*!1D@»ƒþ|k± f ÄyŒ rÇ–]‚÷ lzòÄ2SÍh#â¦ô“îË:¿Ôž‹6ÁAÚ¡“2vÒCcl:l¤¸€PÜæ·¶®(¶ïbßY÷÷äíd:R¨ìû''P«Ðc§Dê# òá±?²¾‘‚¹oVöË•4¯‡_¹ôþ¿ý§þW6–ç¬ ¤þ<™Rº{œZÀæ­>ÐEÿaò+A´Þõ§w •s$¤­fí¼v 5@‚(C,Né¿LþñŠ ióÿ í"¹}™CÁ{bˆ×/ýy‚µ+|<ú¶ÅZ@Dކ°æ_ýŠ Øõ_‘vi]´áïbúÿl:X±üÿÕÍû?r¹ØüÛG6}Ä›žÿóþ‡\.ïü÷_¾GC™¥AŠ}Džÿ2ûÿ¡Y=8ª0ÿ‡eccÿÏåÚßÝ"»t›(èè½y["g“"1Pñ89¦òýz§vC:°;Æm4ì v÷·þj§ÿøÇ öÜã‡Òãº÷~¦æÛÒãÏþ[‘ÞðþÖçÉðŽÀ~ó#zHÏž'3ë#*À…Á#lwñk‘jÓ,êÇ^§Öî^^tð +ìÿEnx_ è‹Í·'cOöC9>f°Sâ£Û<ƒ•‰ÅÂñú°=gú¬{5>¦Šæ;‘Íw†6ø#6–ͰS¾¼òðq`?Y©Šµü7@ÚøåÿAõÀÜÈÿ<.Q€ß 'ùP¦£á‚Ýš|*òo#k:µ¿Ç÷“ßh©Ñpü‰§eÕë\5~ÛbŒ…•CCÕo[ÐÏýuf…uxÊØþâÆû½7›¢gB‘ÐB»Ÿ±«ß¶þŠË9P‘õVc|7¼ç¢VT)àwÆ×ˆõunMÑŠ¨3RoyaÌâ£àÀ òORp[%¯^à_ jï‡öî ?ïâEš­VãM­EþYk]5ÈÅkrYëÔνF‡´¯ÎOáãow¤}Ñ#gp¯Þkœ‘Óò·¡µÿ Ĉ³ýÙùwgb™Ð›n9·|ž«P ¦7ᘦ­1è9êï¡J$aX´ÉûѤ?'¤öoó¿áÙÿ”Kå"üÿg‘8üškÕÃÕCzÒä/ ¥ \¿l=qYëvÖÜA82yv_äœcð`=}Ùl0Ùì…›RÇ+â)ê%I iÏ{—–,4Û°šgEV_$eúO˜X… ü3ŠäšþE¢(’›"®=§ Æv¤€˜¿yæî¼®µº x\OÑÂEâ@›)|•Äðy‘(B›¨Õ”@- ÐfêQJ šYâó$=|êYN¤‘|¥°ÊË2(õ–9Ua¤7à ·Ë.oék‚·+?¤KYøI!5s\÷IaÌuÙg d䪇Ú–†ÅåV_q¹ W\´ØÝm”:³ JÍm¤R³ ªÊp—)Àš N$Àå¼$ÈAz—s$‡)l`^®Ž¦‰a#KÜ1X—¬ÙŠ——ZR®&eü°êIÂÛH-rtI +ÃjÁ»DN›Þe1ÚDð.‘Ï.€ß¼Ùì æÍeó5UþQßù¨°AêȘǠe’{/_Œ0J.Um—'"/k†¡§IFA™Ý&ÿ8ø2ä 1VÇÂf¦v=Fï–©mÉ`[î Idž $x¹®ÌàKC`êÛ¾´}=m_îh—.3£m^Ë›a;Z?SX¶•+ Е9ÄÓÔX²yHÄ¥à¥cv²>ÚIJTq¶ZæÊNçªÒiÀ™çºNá²èR1 åY_Ey^PEyNº§ÏV9y^`CŸ­Z",þ^%[9¢þ­³r^Ðe'8ŸØ$g*2%€-‘ê@˜Ñe\ l}>Õfë´èlëËÙ:ýîØ«%£æh€ ÅŒ+ÙReñÄ0Ÿ”zì^¥îÂ[F½ hŒ‘ÈY\lõ<+†Ñ‹LâØàê‰Ð”Á5ª·aNP‘R•¬.œɺ”Õ® ä2{ó^ëY€˜ÝRÏÚtVzöpê,ô(uFß>_Ð>5¤Çv†Ê¢ž šJ®i=+05õš0cñº U›EÁ áwI Ö;ëOâò"LZïô?}ˆ°jZ8Ò6;µLuR°,–¥ ç’9V(—À°²€2S~•=À)³«\N‰[åkª¥þÈ|ÁyèH":}¥Ru’5ˆšêäG" ¨©HFwÓœ¾ ¹(€á[æôU±tau0š®^£:ÈÉÕhB¸´E“¾\×Lðe´d²5rÅDCÍô¡f CÍVÿj¶ÇPA(áÙC©aŒË1²9†ZÆp®‘Í1Tªàfy •* CÍRÖ 29† ¹ÌŞļ×z f·Ô³‡6Ãc¨TáLåJ_¹]P¹MOÉò*U&Ëc¨•š,¡RÔk²<†ÊFµÉò*m¥!ûc¨´!Îô*m`3<†JU-Ëð*EÍ,Ãc¨•³ ¡²ÑÏ2<†J[õÉü*m€³<†JÖTŽ¡ôUËçUËçT-eC¥m&Ëà*]YÇPéÈ28†ÊÌ:–Á1T§ô¡Ò67¥ •®­)ýc¨t MéCefeJÿ*ÓMráú`MñCe­äùPtáŠõ ~¥–w±È®GµI7­,ö&4†A7~ê¹ šYJå0x0PNлž¤Ð齫wÍm¸¬{3”¦¶;—´¦Mš dI€ú d@ ;O‰üô{7ãâ=š™ô™ù`Af>X3¬3À°f.ƒ!?fÙ{¶Ì<Ð}žÌ|°|f!f.!7fÙy¦Ì\Ö{ÚÌüÑššÚ)e¡èÌ뇦ŸÉƒ«S–ÌÕe0H÷ày°÷(`râó±ÀÈáGÁ{íE“©4y¦ ÅÒ8¿ „\ˆ4,ùÈ‚XPd'¢ÀH]:<ëK‡ç¥ÃsLér²ø¢!åTc È…t…B$! .]‰w¹¥›,>N7 Ø<8UHÚô°(‘ä­kZ7°äbúp(ÃëxÆ“…&¬âv‰{×ÖµX]b0´µßÓñvzo„º™"ȵ ‡K‘ ô:ÏšâC‘. Äï? D«rºlíy1¶ö¬ÍÖÒVábð´´•·$ -mµ-7Û õÿ'=ŠÇ%ÌÆ·Ö3¾±Ã‹“çép<¿/üLþ¶g˜3ûÝõ½· Òkt{]rñš4:‹Î^ãºÙëþ×øç¢°ÊY/È'üÍí²‹üml¶M^›ö´üçÖ_²¹ê§­ZwnÍæÃñÃþàã H³Jƒ4û(—ˇÕ*ÁÏ£ÃCúY6Ùoøb˜F™F¥zhVŽ*)Õã/¤œ&a×ËlÞŸ(¿¿Œ†–¢»¿W<§ƒ)çsM®¿ǃÑËEþ1›ß '¥ÇÿÜo?xî1AVzüÙë#ÒÞߢ'ö³ÈÈšN‹Âêÿ– ÇŸ> …Û¯˜<ö§d×%ýß¶ Ÿû;ëž¼>:úX‡§[Ÿ'Ã;ú‹U,Øè½ÙtܲЄÚýŒ]Áêù+]•þЬ¯°ã»áý«ÃÙ~çâÝú:·¦c¢7RoyaÌâ£àÀ=Ì [Ev Š£`xØ i¶Z7µŽ|Õ@¶rYëÔνF‡´¯ÎOáãow¤}Ñ#gp¯Þ&tzƒÉá7ÌA¶æ°c䚤LGÝrn3øùË‹2SMBîüÐiÃ%Æpì™4.æXq$9DwÈþ.ôG#2´@¾Ngs2ŸP2&»ûXÅ«ðÚ‹Ë” QñçŸr¢[àïn=.µ¨äîOO?ÃsÍW°óÒl¤~mÄðéXºàª8>·>ﯲýO$5ÙNô§ì/Í€^aÂŽÜJ¶ÂXSà,+¸teZŸêæö¼> ç°®# `/jo¨^YóXó•ê8Va|M _/Ø×ïKú^ôO|z/{°&ЯìëŒ÷¥a]ב5½ Xè× öuÆûÒ°®—£+>½› å’¹¸×j›ZP›Ë ‘PêÖƒ9Ñ>X7"$#_eÀÛ\|»`›YŠût±ÐYmBõØßD’ÍþsõÁNUD.윀֊ì@v]VÒ¼²ðkúc}…l+©Ðú²¶ø)Ðúz€¾ÆX_!»JZ´¾”í}:´¾ ¯1Ö—iSÉØão=“['R#󸯉¼€^X5OÇ¢²0ig¾›î”Ó"ê¼wøép@ ¨S5§ä¶—õ2 …í8ãÐu’2ÜË3]å¶—ˆë|0m_ø32*1Ä‹PQ >1Ž/á£Ìñ¬3|xœs<´&_x0ò]¸5\¸ÜЋe;CÉ¡e(µîç«åÒ ¬ˆÜ¶¡ C-Rð- ›¯»u€ZÂ.VÆy%eç°é\j e¯ØAÊ^¨å‚0k°uãÈcPv"½Kwo¿0¼ršÎn3‘&‚9Y¬‚]:Îp·¦›z+]^`K¼0ÀÎv‹–&Šõ‰xUPwÙ%‚8%M?'Ú/ ð"T¼ˆ âå£xá‘ê¹ûc¸QdE6€ñ€^•ý_bT/sû—ÕËÜý-JÕKÙ,LÕkõŠlý¦ê¥ìü¥êœ÷%iÐsžû¾E)ymл¤]_ L9ÃsÐEáMF¿KXÊŠWÞd¼ÀšI Ó¥àT·OùQðRNDÁ+‚ภ0Þˆ8Až}‹‚l×Ñ;AÆÒkr‚Ì@õïÊWõY­° Vé9Ø¥oÅcB½*„äÈ^µdMd¯Ú r,Ê^™CÍx”½`¯ˆaqÊ^™äX”½œ±izÙ'ȱ¨y}¼B'Èi“pæÇ›ñhxNÓ&âUAñ gCÅofDÅËx5O3¢âÌNx×çYôªìÿ£zÅN5Q½b'ȱ¨zUŽ5ãQõ@½"[¿…©zUNcQõò8cÓó’OcQòÚ wuNã2åep¦N¿Kx%OS§à¬OÓ¦à¬83¢à¥¼’'ÈQp'Èóiüd»ŽÞ 2–œ ×ýn“ËFÏ Œ¯ÆÃ¹‘ •R=Pfû7éÒå\¡×31È¡°/£GÖÀë&F/Û¹g ºÞqQ,Ð#Qï{§FRÐõrc§ ºõ’÷l¯Ì(Öj ‘¼ŽS6ŒµDPÛXÇ©ÅZA¢G­ãT„ #ÏA-]VˆÙ0Wmk5†ÅeÅJc­±°¬XåQ¬Õ—+=Œ<¡é´‘¥°0¥ÂN׆žÙÖüTD„¹L JA:,þ…òIhA™°lðS+³J‚Eà_Äšš 0Wcëþâ2@¾†—?‚u?u°Ä† Xæ HS/ùÀ¯õšúèsñ˜èÏÆz—Ê Öi«z¸Î(Öi +z´Ê Öi«z°Î(–{®³ŒÈÆj—«ŒX…!¬ê¡B¾2bư¢G ¹ÊˆUª(ä+#2:OÈYH,¸Ëþ5>«Ã„ŒÁ¹K…e‚ŸÑQÂ2¨?FªÄ“æAÂR¨?½smðƒ6°ü7鞃ä½7X&ôéóþ…ÀËåý9ƒŸÆf`E¨%Î0—Íû—Hýià_>Þˆ Kš%KšÅ Kš­mXÒl­Ã’dÐ Ø_í°¤àEä¯ZXR Ð#QŸsXRš {QŸ§×ÜQ¬ÕVõä0¥a¬Õ Vôì0Q¬ÕVõô0¥a¬bXRv²"O¯ÿ¬dÅJŒaUOs–+1ˆ=CÌWV¬ÄVõ1gY‘kXRvÂB~‘zTCVrbÙà¯MXR–Òa©ð¯KXRV2aÙà¯MXR–’ ]“þ²eÀÂ&ýåÊ€¼Á_›°¤ÌdÀ²á_—°¤¼dÀ2WÀJ‡%e&ƒ_ÓmÞ~Ã’âb†°ª‡ éŒbư¢G © b†°ª éŒbÃ’²“9úþg%#Va«z¨¯ŒX…1¬è‘B®2b†°ª ùʈ<Ã’² îã2‚M _—°¤,¥Â2Á_“°¤¬dÁ Qÿ*‡%e)_Ó³>Ó Bºç yï – ýº„%åÆûsMÂ’2ܬ õ¯rXRn¼_>ÞˆKz´¦Ÿâ†%ÙuôÂ’°tÁ•$c[,ðB2þއÖ:±ßh¥wâCÁ¤…† e©^¼‘Òfš¨zÑEz :xU¿Ew•@®OÆ¿¯Ð2¹F ç†g½³˜ô,jVdÏ« ´”ž×äÜð¬õ&]f¿’FÎÒ×8†Ášê[õ`ðkfÄ)„6Œ€W³!”›fSàÁqMŽÚ´¸o,JÈ ·R²]U܆sÜLpk0Û¬YšÀÆã æ2ÀM…n—ìrp›Ã]>´9H³…áMJºËÁn–º‚†kö-¾Ë®£gÇÂÒ Ù±¼&Ól¬XHÿüÄ´b-§ž K g„^”°z¦ =`5­+«ð€›Àzµ:箞i"&ýª «ð€›ÀZµ:ç®–}Â…v!+•ÒT-)zFhFKÅh,ëÔR1*¡Ô¼0šjºv“¸Ü5ÆÜç…ÓX[£åâTÎQ3À©žñ!&3ÍfI¥ ª6 æMN—jÞ8Í…¡.Ö¼§?–º¬f#û5¬LÖÔäf&3Ž»”Ï_Ê\ÌÐtÎËÔmÊ\Øâ”ÀÚÞS€êO†k{Êè@¬iYAȵ|VöV©„7Eâ+=þì¿·>O†wäõÑÑÇAÿëó·Â`2žÍ ð²ÛÎDÑ"»ýÑóc¿È\KY*jCO×v離ïÉÍÎ=ìåöŒýñ> "¯Säå)cœZó—éy™ö`Û†1j= i?Øz,èî&óÁbЩ ´Ã^¼Ðâ³—Ûˆ‹„VQÂý’!ÜØ/°_¢À~Q€=ôG Ö£b){Á¤ÝHVY(h÷£Iždh»1`ûÒ.ÍÙLs!ÑÞ ‹ë"kð¦(_DXÁúOý¯ŠY É;D5m·MþúÙÿþ/{‚?vx/…²Ø§­ÑñîDÜùÅ Ô›*j6OŸÌ$@ñÞXg¼Ogžú³—'͵(ïƒ6ècÙrñG¹¤úŸ™§þwX92Í€þwx´Ñÿò¸˜þ‡— Ä•.Kä¼?Yߊälø*݈4þïËðùÉ‚Å\ŸLŸ'Óþ|8—ìÚ—Së~øÕº#?×?þLæ‚tEp{7[ @ŽïÈüÑNÉ5õYå™­?âu>¹Þ¡è¿W"oKälR$ÕýòñþÉq‘tß4÷ëÚ éX3«?<–¼ºä?fó»Ñð¶ôøŸ2=2LåÄ̃õô¹@YÕdz‡ûôÁcJvç¸Q.2&öÄ?Ç­"¨‡ éúúˆ›áõ‹óËVã ñfFw}/[þZt˜å×Ð.n­¹#‰¿¹¾¡‹â–¼ÃJ,8,þ^lÑŸÖøåÉ.Ü©µ»—Ý¡£¥Þ‹Öü#ûdf}œ{¶ 6*þN¿PéDeÅh§‹Ïßu‡I莼”ýb»?Öü.ÈÀÜyêF“AÚ|ÚÙ…Š»³áÖä¾ ÔŽcýH _•#Ãì>Áß_~q ìéïøô÷ìŽá/>ýQM¨ýaÜþò;ùïÒÔê^õÿ ÀäììþþËßü-´Êð©ÿ¨‚7Ý*²o\Å ”F|æÎ93!‰!yqõ¨VÄŽŠäk‘ÅÖOØ#oìæ7SË*Ôè/ìÑqpõÍPý¢Eg( (×nÅ€’èT>0á#fN)ˆ®éÈÓéUû¬ñºÙnœ¥Ü¡G‹|¸µ¼éç§‘ýåVãOŠÍ Š\ëª6|k_Q’Tö$ ;N'_Š¿&£%1†O/¿¢1mîðJ¶Úw‰Í ÀI ¼‡=H¸ƒ”9|záÜÁ]ÿ8úW†½¡»b!¯ ð`G¼Ë™ >Œä'…ß÷°ÜÎ/ØE\Î⯬ÏcøÇÅJÇÁ9¿Ç€Þ_% æ?=3„ð|–(Añæj¸Gg+t®lºQÌXØ`ÝÉPÌXذ=•U3f³ÄÛhá@eJ1±õ0 & J‰ÛP)À¬£À Jˆ[©„H¡3¯t°¦/éà— ®¬:šŸGqkQÛ‹#;1Ô¼¤*Ú+ OÝuYðS"ö»‚º™_1*ð¯=Ÿæá*61ÙË/5…T6ÄWLü¦ªBÇÀD€õ,¦0:ÈrY²ø Õ±Ua½ÇkÑ¿¨k³¨éZ\¢&ú«š¬õž ha³´µ—ö “¥=Hki?Ê5/Ï#ØÌh¬ï-­E)oá«];‚hnÓ’m®.[ììÊðÛ1<ü?v’ó‹(3 ¨Ô‰ô€ñêñ /³x”h™K²Í<Êl3x$6 ‘–žýè·Äh4Òºw¡ÊÍ.¾…ÊŒ,|]-d` m!ZrÓ‘„7à_´!K–-XfTÌ*Ù®],‚ÈÿóJpè#ÿ;ñÜ¥hº³ççÈGí"›¦Û,)àK½Ù¤tî.eÛæã˜{ŸQï6pã®ËhDd¼‘á à÷Ú…¦Þ38ˆ¶vÙÖ¡à£0ñ7N´)›ÐÐØõçVØ4ÉxJ˱!åÝÁ&0'É+ê ̈ûÍG‰t„åõ qh™[Ï$†3Õðˆm?‹ d¾ £©9™M  [š-2"9^¸€¸]PÈó–~bú¶G<ßþ‰'úÞ,“ü!lÙ ©{I ªØ¥TX¸;¯jðœ†/}þ¦¹W3ÑÜaȵKQ}ÿ´Ÿ‡ð©Ø3ÿÐ0eÓ­VÆñ/Ûf¸† äv aÙÕ.£Ú#…¶ù‹AuŠ}sg×Eø3I¦c€ä(’O>ñ!cµ\ùóo¨ÁhÅÍÐó¿É§H¶*n…x…H&jƒÆÇ Û½y;Ʃ¥ p„ýÃ[!Êúá‚JþÇŸß…ñ¯éôÐa‘Ä<óã.⨉ –L>#"ë‰Þm_ÊûƒÈÄ+JÛ7.í;&Ú•?kíÊŸ5áVîΟ#wç±»ñÈâ¹r›îu•`¿î†ý‡1ïÀ·_̬æHh“8n³©þ ­Ãª¬Eù0„ aÏý³fí Á1Fø<.j ŽO±=ö”aïïø‘Ó&®ÞäócUHÐõ¸’·õ“MZq¶õ²]=Ùlë7ÛzIÅ·õ¾]½txZ£K:¸DcûÓ]ä~>ŒŠ¥»ù¸›ùÅ÷ò nåÿ”â& ;ȸ“R†&ìæ·ØÞÃÕˆ_SH¸iŸ‡nÚ•l–x¹l@ ˜K7æÚMúþlŸ®Ä÷ŠüÌOQæÏË|âÝܧ"ýã ÿ„îôÿ‘¥šFý\€U0é ›ûõ±å˘œ† Àå¿cCd*@lÓ€ Ó €DþÏÒ“ÿJã»¶üOj}Wìí}¶sðŸ62>\ÆË¬g*!/ZŠ"ŽéE+½RžGšè½zµn ³Îÿ”²‰`c•ßXå½Ó}K4LJâ9G«¼”¯„HÑ6Ÿ¹Ù}nv×àuA¡*7­G6哦‹ì¦7Âtu„éìû¦²ô§u¤KáØ)IÒ4öʈPu…X’J˜ÊjHÒYº’t–‚$¦q¢9:„ë*Nah¯bÄ…¦fœû)IH̰1o ¥FÍ•ësM#îÃí>z}jš¦ZGHró‘¸J:=Ji±µÍzåë5Äõµfâ>õ¢¸¼2Xwùþcáþä_²Úæ`ÿ’•Åj%9ýjë,Ì'›¶™`ÝJœ®§Ñ>מœ\%JSÕÖßœ.ê%ÇÑ)ˆfí?ÇØ¦à M<ÛClP™PGêýÓªùA¯ÙvD²MüŽí­k¶‡—íWcv¹Œ„`0°È“zJ¤LÏIq¨µ"EFOzúLE¤R­I©4Õ%™A¯ðôèq•%ÌNF¡SEP_¨!‰ ˜†Dê…´á×Í’h¤‘(GÕÔw¨CšªC†°”(ÜcÅIG­TÚmHl¥RÝœL¡\5}2Û#âŸü\„¤"r³5h€Îþˆ!£KÇ}þ§ 䫨F LšÃj)ú¡Þ [ D—KDé ?J®šj˜dfTë/O¥pÙ3’•:hFG’Jó£)4CS1Þœ/ÓZlƒabca²tˆ9gE‹u¦ïl‰dî]ÇNÎÈåƒy©ÉŠgP$MŒqØ?YbÌ\‰)¤JDÌ„¯\É6¬¨J‘H[ÓÝÐÃR#¦³«#élì~ÒJ}º²‹=Âx®{|¶~ëÝ _ð Ó¥~+?©Ðf+7”$ÜN“qßœE)Öÿþ§JžïªÂ|ÿçauóþ§<®•ÿ“qáûŸþzP-âYç?•}÷¹œøÉpï; ”¾gøÞ$õúª™¾÷çmØ›¥”‘ÕzÙP·B…ø­SêV•»8¼ÓÀ€¨ä7°¥Sø¿î“ã(%NñO]íˆÓç>·j›¾ý꙾ãF#-wk—»eêeM;öþºñ Åõ“£8ß¡à—£´ã~Yú)<`wˆ}þò»£Dü¾Ë¤æ/ðH]§W"Äa1¬ªÌÒOÑHòx;`Dv,}ŠÂÒÓò±ä#±[5‰zuS¸N£|»hˆý©{$1úB£ÂO)2NôÝrd܆¢O¨AÑç­—ÈN+1Mˆ!ªÇÇ'2™íMo)æ‡%{kS÷’O]ëmu]_^?Y¨Y—u:²ºƒ‹Ù@Ž ¡8E„·¸ôeîëæžäïVrMQx³? lÐòHN¹$ãC©H#©!&ˆˆW>\ý^œâƇ«b¼æLúëÔó«žls$`Ú÷ž(5¦ŒcATß¹¾Âp=(ríCº‹býû#·2èûOïK)dzßt>º—Ya\•Ïéqeï§äŠž²ÿ¬T½0¯ênó¬AfÃ;K#-.>Å¢ì)Eôßñw,õ+8’·eÝÏ%šOiQûhf©Û­€ÖB?pÅ=b¨v±ÏýhŠ”»Šœ5¹¢) µEè"„ÓŽBg[ì-L^„ÄÔB⨠ÒÖ;üã_ ÍãQ¦y ãqì{ò÷þÚú…L&mTŽ0»*‡ÅN¬„#ÜC‘¬Ö5ýºFêŠJÆìÛ¢JÆ‚jÆ"¥6ªFLûHšF¨8ôJBÁÿ¬zhlqeºŽH_@¢Ë´W6ÿ¶ŠF¹dõ Õß<œ–ò¾Õ”¢¡BQ‡¢Ë@ ØÉW²Ž³€^‡OºÁÇÒÌy¼•tß)Ç+…¿F"¼DÙÄ¥R˜<ÓYHe-Á—¬/ÀhtNBrbõ«t² DŒN˜›9?÷ s\5޹_\$~=Çý"ð8U»m»Üõ‰:ûäÖõyM.ÛÆ«³CL¼'´ÑŠïG£#ž‚xŒÚBOaúÉzñ½Ù·…Þ{¾DoJÛˆCÆÎ2‡Gn„áF®¬0¤¼OGúdáFJ‘! µx‚ö%ãôñ§Ï·}11H²>d÷‰Ê.ýÒ/O£däéw.'e®<)l%©i¸kîÂÆ Šb²|­ ¼—¹·EL6‰öô*Qg¯ÌL¿½dJ…õüNt\……|§ “S TšãÁ6Wï.jœ^!­%¥#^­1•–;ñ9ïêëboqU™Èž¼‡¹)è3 ç¦qš»ÑoVE¿IU½Ùh7ËÓnâ…>h¹¸gG±Ñké5z©°WJ«aÒjQ­f£Ô 3é(5zGõ)è4J•f>ÕõOÓL.DFÆWt<-…fÔ©¦¢ÔR|šˆàS–,|y:Åøìä¶lKþrÉ•¹Õ9IÛñn¿»¼=éÝ78<ùe°èoåÍ ïu¾B>îõ‰KÏq<8àW>œü^œâÀ‡}œä½O~‰‡$àÆ&—¶´Ÿ¿¸D}ø^•3KMÒ¦)k7Òv¥-‰· +øFànîw'pgK¸³œî,m»ìüLY_túòÍ’*ÓUìKÿëèÀ< æÿ:ªlòåq±Ù¹xÓ©“:’CÏØÚ¥w{@äy:y˜öŸ%a/b+(¬ÏÖˆL‡*ñJ§ý™ukn2¦Å'ÓáÃpܱB ÍÎ FæPy|Îåõ_—ßÔ^“Æ×þÓóÈ"—¼ûžõuÎK”Jä²÷¬¹5Á/ηší^ãM£ã²¼öÅU?»¬Áè=÷i¾:Üq›ìú£>´7“úäé Ào¹Y¯uE¸_¿¦oŠäüâ î´yÖÅ(Órk\Öº]·¯Ö:sztúé4Ä*pu_×êá0¸í5¾*ÉÝ—['åšÓlãÞš®¿mÔßEöiÉÛFí¬Ñq›ãã¿™þIhfþü¢-€·w`V÷9:Ú2Œx~ÖŸ÷ ¬¤¹…‰ä„¶Ïj½Z`èû'¥£ÃƒCó ±WÝGk ^æý[ Ž®¤©f¯Á&¸x‚—-VÏ.ˆY¼Nƒ7Ê®x¤€Ûͺ{¦¯å`†wŽEÇÃù°?‡Í;dÌr‡Ã¦‹¥OÆÖ2€åPr«÷`I|î^,‚Ñrn°XXõ/ÃшôŸŸ-Ø© é:r[¸³æýá–‰~~™Q ÷Çߊ´1ìnúsr7!ã ì¬ÆŸ'£ÏBЈîggtî¨qL€ˆR¯sÕ( á6Âì¹y¸Iá”|M Ç¥V£t°Czom÷©lFŽœyA7n´`jÜÞ4J‡ÊŒ@í3¨ïŽo˜ ì9½àáÀw´sLÛª©_´{ÍöUƒWêö..m”¼Èë‹Îy­G Û¸p€g}弎²O›u¬ÙËh>Û.î×;´â±PQãÚËÍýºmÆgCuuÚÈA÷ ,éï‚aÖ¦Óþ7‘!½(ëä®qÀ›)åÕbdÒÎþ6Û ÕÏ.zõíý𢦧蕪hÅ-Z»¾¼Q­ºEëê¢nÑî‡Ú¥ªè¡S´[owÎMUÙ#¡l­{u®*{,€$¡*z"* „a†i>¯]o/"RlŠn1¶d¯ÅN£wÕi8Ãþ6c“{2{¹µ5ªñËÓ­5Ý.6+Å“ëbÍ8P®hs‹·®• ;teÃr{xñ!" ¡0%OHÀôÔ(ACåW½-¾"y3Üvо4"™Q°W¢…ãâAÑÜ)²Ÿ‡îϺ_¯è¹-ö¸Î?\Ö»°2‹ô[¯p°>ìäeéת[X2>,QÁAEòõËx@Sô*çž®eÜöCw”àwÕ|òŠÓ‹bÕ¤J/¡ÜM9k¶Î[ÇQ†ŸnÍñ…Ùp CŒ¯ÓlwAtæ©v_ôd*{€ºaµýr©Ò(I¡\ªÂç^¹tÔnZ«Pâlßbó•Qâ|  Z [0Je(fâ_ EåÏ|a˜Ø£ˆÀßú¸ÕЧ˜úgư Ø£ã`:`¿ öû€6sÕŒ§˜úgÒF4I@?tËxD[=Ž‚ÅSLýS»F,>È8é¨f¨*«e²Ÿ”‹•(H<Å”?5–±™tSj,ØÝª~æÛˆS¢+Eµâ)¦þ™q#~*§4Wµ\h‡œðŽÖT1À1q¦qâg“•“¥«šÌœ”+îä±¥x©èÉa@9Pœ™t åˆéÁ'ά:Sy̪jIÅA³M¦ðý馰vj†dEx4 %õzxG6Ê…f#A¦àQ7¢ÇoómE$”•,¬‹ø™‚¶:âá>…$+ñè$Jœ‹GW[áLáð;Õþéü™kª(°ED“«'ˆËk=ô„2ÓêE5áX¸KÕ(A¹ãpÆL¹@MyÍòabi¿€­Á]öߟ.\ök§ ©^_ðp‡%k¾eC8öTÔ¶HÈ™ÅÂz€oÙk«î ±Kx˜B Ë„wèI{v„´„"1Ù?ýcÕ³ rX¶}K°-§axT-ã±{r°%öÚ†›øhžû­>1wÏk×…6(®Ï§kÍÉgk0GçœàÙ¦Ó1: ±n[>/¸ê×…æ<¥¦U¢ˆàÛ^"¼ˆ×W„cÊñ`iüǃe×Û6‚èzù#ÜÉ…RÜ‚0Ôú5£ÈÏÿ.ÅÜ;Ç„´ñ¹Ë€Ë |¤Ø>±‹Ø.¸ 0Ëé| 0= ƒ•>Døjˆòrtx4 Åø.ØpÉAvK5:Q È>Ð •]†Ö=´ÃàÐÓšQ›{î66vv[p ŠÔÅO¸‘j‚@ø›N^Fwãíí9¹µÈ£5µ¨_õÃØöצaž.¸ÛÞ¬RÖ ò…CÁ¥Ìv¿³¡ýSIÏ”RÝŸµ»;ÒgÞµÔ÷ýÚ‰õ_ú#|ïÖt" ¯ŽÑ.f¦äÿ;vøŸ—í2ïÊýB*ÛYb{'Ã)t@ƒ0‘léÈ—ž@™ø˜‚Bÿv=ì@cݸ‹!!öºˆ=:rÖŒrFhÆUhsÖö|mGãN“±åGƒDƒ*CÆo?4ÌöÓpü2“ ~O2újôè÷”ÃwžA¦‘òøY9ΧØî¿æ<áßünhæÆ -™HÏÆùe8(Ôãì=|¼oÃÿÝæ¿T ½¦0øv~“Àíì¢W@e¯Žm…ªýÕ,14üQ)U…4¯÷Œò5”¯iuøu#ü:,ØßŽØ7E;Çn=h~ìó†àÏp4ÁTuépºØ7À|¡cÁ/š>m/1tÑåÞaè‚» ÏºæòÖÐõ–E×ZÁ-±ÇYí´ ôÐl¯šÛY½¶¯ëkFçrŸí÷ðCYö†–Å‚ô#¼,Òe-WéøW)’£ðJPÅÙÊVËµâøøùÀ±caŠ`ó«)´¤¬ÈPî¡`CpQp|.•‡Å…Ðó£­C§.³GDÙŽ<õœ±¸VX~Ü9nUr¬—Ç:ô‚†ÆcñpáˆY«;þA)(ŽVTÿ̼zªšDÕX`|µ¼°)æÒ(¸Ô³bõA§cê;>q¨¹Â©ò$5šÒ¡msCÛ™Ž͹Gâ\Gr]¶«ªae>Ž£cVŸq—9ÃY·GWUw]ˆÌ[ÕëGÖ¡Iú• ég:¢8¤o°CœçU‡Ÿôª˜æI„Ê%åìlhZ^œ½º!ïŒGÿúòÀ§Ûf1(m¥* wàåÁ¿mpˆMEïh_Æi8iŸD"íT=rU¯*ëÚŒt„HÚ-âá„O‡ËTÊ|WŽ ôX ßcvÜzàìm¦\‰æeÉ{=:ä³yàŒ¶Ra·ŽÍˆ > L0ëÒáÈ*˜O8­ñé…þÊ\‰»ž<}bKU¦ÝEN—Éjž„-MÍÑ Õ°.Mwïªéh=š2iG ¥„¯µäA³]ØŽY4b¬‡ìñ.VIò$5©ä€[=#(sTÖžìU&›peÛá á¦×È‘Ÿpõ,™ù éX Ð1,‘^ókÑk„} ÈÐ#kÙVwCÖ)nt=ÌlÅÈZ[y÷’u:c¬Ýsv€ze;Ù õf¥Mxç}-´ ‰\9²>oOO¸ùŽNŽ¼Ç«D¹ FsA®‡RhÑ¡›)Ï‹M'F«JÓ3ûØçåÉsîCñ'ÐRJØÐµ' èÐ#ìïãÌZPe'Ô¥Tv8@FÚ±Ñtn”ŽœàBj7éioT·I·K¤;ÏOƒe0x¤¿ ž‚«P3íæJ£:°5 <W¡fÜÐÉ÷<Ö ºåtÁƒˆ˜‹ô{1΀§ëf.Óž'çX£vÚ-øBÎoÜû7;bšpžù¾èžðèLè¡íq Þìðè¼Ù.˜EÿStr‡‡Ô‹û}»x~x|#>¾ Ävݼì}LžÎƒà‹×7ÞßS9ò<"BŒçµáÔÀ§7üéý4Fä§Æg;ž‡FñÙÜ·bý¦H¶iXARæõ° a¾Vi‹FËыРÄQä0OZñ–¦r\WQãºÊz\G ÇU —íÚ:.»€'žÔZDðåM‘ŽþF nâoü¢}5z;!z»@pVô@G]× |ET} ˆtåqᮼ@l ¯Y´+Ú¶5€NaÌþöâDŸšiFŸ²òÑ_]'.¸‹ñE,r»Ø¥1Jb4ØnÔÅZR”s^·Ñ{ÛìRhDS­Óè’Z§S»éb¢h¸SØ!5'…¾_¼Fñ¦÷–&è]ðVº J`@S¤×蜓ÓöyÖ|ýºÑi´ë ¢¹jõš—­fã ŸãxŠÐWƒ7Ñn¼i5ß4O[’\½DZð¯ö¡{Ñ.’w—­"1NŽªä¬QÇw‹ìÆ ŽKòî­Ð8Õ;dZ !tˆF‘é5I‘•Aƒ¡`§ßWüµ:g¡$L…ÞEb²W? Œ=tŽãêWï³U7m¥i|²¼Ý3¸ÉHô¢=NÍØ;EB•IF1Í_è/ ÏÝîÙNÑóhAYàœ¶2ýæ6@"¾’Gz½…¥M¡³VÒ¾è‘z ßò mØ`z^ƒ’”+öBÖŸYŽèí&­G^ך-rÞèvko”3±·°²Ê~½Îëˆá~è;‹$ÏŽÓÿìÈy1…tΛEžMŠ x|f/i6Õ0I|ùÞ…8Úö¿g›Ð70ÁÞî_èV…¾I(Nqmû¶ZÛÄžmÕÅK lˆ¢…áAxgÏ‘²êE_ÝQià‡Ù¨–ŽáÃ0A튤˜@5˜5âÈQÞ€ZÐze(ü«µ)·ë@  îèz€ìP I!¾—×ï4þÏU³Ó8o}÷ÞÂÈ?€nFjäì궇µÎ›+úˆõ@¥s[áµkõÞpB§ØùH~ ý rÚp*‹Žû½Ñ¢}‰rV"f©˜=¥l¬hq†{.çÙ$„|©ìîøe?";’ÍŸ!‹XÖŒ2îúƒ”#x¹Ý·Í·íŒ#6$xŸá„ÝwÓø€vªý©5É^_µë½&ˆ-&ºµb÷4†‰•”:$ƒ`Ž¥°ÔQMì}¸ í«óÓF§[’hrL‘{Ý8%ÆARóÍv æùTÛºÃF÷ ÓLìa=%fÅä nî:K4WÛˆhëè¬Mî’Áâ\\_ ¤?¨ új]ÐWë}5D‡TiŠšKž­:³ì(Íì+…Á,'_þÝERÔšçµ7E lÛcHŠ4>ÄÝæe"Øj¡ÎEòޤ¢HK1Ìxã²[VLhLRŠaÜÛ˜­mJŠ ‰¥LIÚ&;š»ªý±MQ«ØIi…ÍÐVú|±¡÷Ö6ë°wبü³ÖiÖ`· šÞ§º,•ÞèµV³wSrXAªûáô’ÅÈ2¾ fÙˆV,iŒ¤§³\²{D¬yf¡´áƌѤö^ŒnÅ!ñM¥¬ÜdïÝzÉlû%Ý{…n¼Bw]Í3ú¢XÍ'!y¨½˜@„Eœe²»-å%ßmE@Ù^á»­¨í•Ù¬›†)n«–ýÞìïå’¾ÿÝÌñýï£zX>ܼÿ}I[QÎûßñõï&cèQog{¼INGýß´ã;®ízO€^<¿¸›?[ ¹¾‘>™=N¦sr‡é¤î‡#«D+ܧPÒ8"Sk0™ÞͰ¡É=í ‘þÔ‚Gý;ò2B%£ál¾w7„ÂsëD6}60üÍøªÒ&'Ó§þ„MÍ0A‡5Éì'Ƥ?OPøÝ‹¿~@èSP±! È ?ÆMnç}PÌïpHwÖÈš;MÓTÈàÔœÕrî§“'û%Þ÷“Ñhò Wª0 Píé‹ï·æKÝví’1÷6¨<¸WÅ;Ý· 4A®À^–¼n¶¨Ò²gø9íU»ÙãÛZOE¬r©G®º <"¥HX¶åxíL$WX^·pCÐC¹Ûi|hâùЧ¡ÚkTĵú[x^¿èœ•¢šÁÃ)ú £+P1i…ž¢Ý€6: èçºÙcL»BÙ)R¿h]·÷Îkï.: m:÷;웦sóô¢÷Û1K츾÷`y{Ñ:u·uÅì´v­×¤gG‡Yæ"—Véâ7º#-Cï&Lú9áE}ªZm½wÛ+ø èµEE;UÜA¼ÓC«@4{¦¤Ý@áŠVµÖåÛG'tÁÙ¡Ìü[¹tDÝÎvÝ9õôº9môÂ{1JÑìÅ®†"möqð`=}Fº£‹ PE錃êkQ#6»Êmì*ñ{yLÐËsì*óiü*ñ›'l°øUžcWy°¦ƒøU^b̔ĞýøU¬©¿Í*öA»eý*îÏ&ãt¿"ïJð¯HÎ^H}:ùÃþù¶ÿôÔŸŽP¢¶?¾ƒ[ãìo;X ä6’òÜß¼%3‹¾Hý5(SÅä´Æ®ˆRÜÿ¬[è²ë¼f}VaêYƒÇñ·ÞäÜzš@w}`:“Y‰T R˜ZŸ‡³!€PÙ¡E(ØÙ êÂp0s§¸Å&°;Zãc² Zmú0á^»V8è²Õ¿…ç“é·¢ÛÐÉh‹ÝÉËü‘Ôû³©žôbíê°5Ž&Ã9,W+'Eâž]LÅ_íÚqëÐ4ƒMXeÿøˆÝëTá+À×~y²¦´ m2Ο„ñ½™N^žA:ÏïJEÞlÝÆs÷÷Ã@F̃CÄÿøöeú öª“þ]‘\|åêN€ïÚ$G¸á½z_äh“õÌ;lŽØŸa”A½©³¹ÛPw>µ,Ðò`n‡s Ób‘ƾŒQłڟ"YÆÁžaVàîU·FqFõ¦=òºj"0ÏFy¯öòJêžqr|dÓ\gˆÚ›MEÒ°‡}ÏDÎì²ï¬©õôÍ&î"E•)†¨R|ãi7ñ­ó*(¯Pˆ¼"¬ |;$á9sI»{uÚU¶†Ш{ä4L$LþÕè\À$·ª¦°f2Ç4éøÂÑ2Ù¡uè=ùG\›hàø¤Ã:ê¼­µ^ÃGD‡Þ#íÖ¡?XM–¨\‰œó3Ïà;ÜU8‚çˆè»0¾oDÙp“5Ýnž±Ï÷§ì³ÖbŸ§¨.›¼Ó#»ø~b·Dï³êÿA‚ë†W¦]áw}Þ!™‘Æe·ˆZy‘kÎáƒnvº ÈÑlÖm:Ø×´ÿBó?Úu:ð½N 6gµw­ª°«èÕZP %S¯H÷,E* èñ.}íÔŠŽzÝîf¤ü´×À[˜•:Øu6\x<Þ >¶ë.nÝzçáùeÀÃ›Ô œN)AÀŠªñ;»üFøa 61`Å®·"ªÊ›@Tí\œÎ¯í6øBPV»ö—%7NKáÕn]›žN½›@½ˆÉGNáo춤 Ô[°×áåÎ…ŸïO öÂÜ 'Y–·Ÿ3ÜBêºc} ß4¯wš œºÍ[ƒ®¬n‘– v¯éŸ@êõ·ï"ý0ÙG…}TÙÇû8TÌi½nB XÔõóÖß&ΘÎS¦ã*òäMuø·^_ô˜Ø­‡ãõâ½bjCç©­öÉÙÇÎëû"E¡+IÚW}ßßeŒ|îœ÷·Å­ôv‘l‹ûämÅìm‹»ãíⶸó~>k6C÷²n½¹·™¹~33o3ÞŸÏ3½fè>Ñ­G÷€Â p§…›gZp[ܱ‰¸ÁŸ:Böa% ŒÅAñik ´ƒŽA¥šøËx8'ã—§[€-¼³Ü’}#°¦a_à ¯Pnò Ú2µÕ n>µ³SS_Ÿ£ò¹Kv˜` Ž?¦"ƒ/¥7’ý•&È)öl‡8Xº¸l´ \% vÈWLŽáPˆz¼»Ã¤ÖØQͲý9aÆâ™Z_† Íî“›—gãþóìq2ç U çñ‡&u#øÐ,ÕÚg% rtß³ùäù™v¶æoä¾?½L•2&¼i˦F¾ ;ú¸6¸¦¸†èÁ€ïðܳÚua!Àºd4ŽzÏ¡]í…תëÞ%Ñ®‹Q)‹O| ‚Wf± 00 Ypî²Uaï84ç©Jvš•:ÁBø"X_¯¬þ¥ý)ù pF,âŒÞÙM­Ñ°; çߎ©ûJY_ûæª%ØxR1‹^{)ýë;N4.Áàa¯áQvjúÚK:©øâv(µ?Á’lMbU"K®…Y½wî½û¢{‹½FÖW¡©ÈÀ¼q*¿Û-ƒ‚ìì½} òÜÛÅï{øžV°§ fvsCp+;£hf¸„ÝíËh^"cºEg¾a å7=VšWfSƒ¥É€Q*u6£em^FO"^Ø3Ú¤Nó`™{ÊN`èñ[‘ÔŠüáš¾æ±ÝÐï7@+odfªxîžýð#F[Œ¯:óOÝ^¡­¸@ka“í ØÅb¨‹A{¥vÃ^šØ1lkø˜ØÚ+œ*BwpÔ‹£=I{ß'–œˆ4±XÀI†öðùËtì‘I&.e<ß“ ‡(õÉ ÍÎiK;|h»J]2œ‘ñDnZRa¢Â…I×éFØQù“p0û8³+zªˆ01$¹6/Ôz)U*EÂc'?¾-»*‰Ïê)€È8õWðÈx:î¹{ñ^¢&~º*‹X'¶h*à›ª‹üÏ÷Ï!ÿ#Y/Ä}*þ9rþ‡U;Ƨ'ìlŸyÁñ×ß4Îÿ èu±ú›SúÓÄð7k¦3%ѯ:º“õö½˜*¾Ô™šn/sf›Ž8kfÇÓ6{`gáüDüý)žÚ§åø'´v”ÎÏÓñ ¦ N ”Õðÿnx3Èãðÿ.eo”umGÊrJo!ú:ôIÑgD£Ïn*KÈ÷-#_<&}ËH·Ê)ùí%ýy@)ù ìÎD!;aþá£ãà<¨¨øÇ…;% ÷:”R1КÑð~eô{\ôÔ€Â]¼}B ãW£Ì ¾wI”àã|%çIs¦!|ÒB¦G‚û oæ§Ÿ~J‡ˆ¿|,F˜o #ðë|­Pâ:ŠC\Õ|é0‚…Ð)]å¡Í¸O‡î~T-F’oÑïÐpe<þb"þ8Qü˜N‡(T-L”&nÌ\ªÄŸG”,Oâåá€ëtÈò‡@•œ,ÑÄ„è`«À@§VÌ Êw¾@ò5eG妡yæàXñæªì$Á ÚŽå a T4 .ùX!˜BðˆsïhÀ׃ؾ‰Ñ ޵í}»[ì\ŽeW(ÐÌ nè,õ±pbf™û';UÃ|« óÕ8Û†&ٜۊ´&ùðV)‡uZ‹Ù. ØMRÓe9>nöÞºUÀ- _#ŒGÓd@Pœ!AÁN}ˆ‡ôÖkÃ,Uà.íËie›eü:‡™5qvE3¾®]µzt’pmÕ@4½…%Ô¬ãtÚ«f·SÚö@SŒ1{#q2ml±“›–Œk{8‘1pì®X艎çièÚ ±€ SØîEÁ(.\Ze¦`!:… }^Ö@Çø{ž2Ep&fìà‘úÓÛBÕ(MÃ!3†¶¢k8ä:^¸ýPj¬i2Ï›7“ÌŒɸ§1ÜÙºöòu8bÐ}äH]²ý¹g$ifV074^ö­ëx©0ËÃX ,U/bVÕ‰$K’å’'"éÚú©,<ªn¹†Ç}2Š ¥+ϺhŠ™á1 l¿Ãb1ø„„Õ%É"ª€éÄY¾VGc†…_öh`¥F„c0øRc^Ý ¾L/Æ2œfâ„U6ÏŨʺS©´,›2Ô©p^ºËðõ¿RQÛR9:qÀ×Ù7$̉ó“ùÁÿï¯à²d1ßòGšö/•ôLP +}ßÂÿá¶÷þª‹!Ð}VS´O± 4Ñ ƒðZ9çŽ-µÎü† ï ŸjÕ¡d醯4¡ häõU KÒ¿À p…3gš:œOGHä4G¿$‚ÚÎlT—GX;Ù‚šb¨<~Z{!…£¢J+•¥Á°0f[ÞÃ]ÅBÁŽôƒf™ÓBû2À2[»øW§ýÅã[¦âÄ­¡ìo·{u­ÀIV)Ûv*“_É óßܶl÷1¶¤en·y?gÖ=ê2èÃÇã+1“œ-e\wìëÏç-DW,zWÙع·sÁî;¬|ÅUB×ÿàóºàðC—«ßÛ‡ñ:ô?¥Z„àí…”Ͷ4J ïòp× ï#rÚû&ºJúÞGÝ@¹ã˜é1DÓ›sê"4óÛΡmÐæ‘‰ù½SÜÚfHmôXmD”2q´hBŽ^>*t‘ò„gù«œhAÃ[,hx§áwe”v0…¿(€à¢•iX| øa–9(°çúÔz.Ù ìÆáFhò‡—4‚É¡æ(¤Ð…Ï OôyìÉÓp<|zyâÁžÃ{ØLž/÷ÒÀ¶ý ¡úáþE–|¬ÞyÈ $p@Œ&5 e â_Jý>,|>³XS+º4ZãÉËÃcø°±»7Šîü^ä.5Çn’‹k—Z ÈvN¿ó8¿±ÆÖX$aÞñ i¾’ZÉ·në-×éÚsqoÛšãòX Ç”câž›ÿ9§šŠãz[Sé:Ç\OºrÔƒUºÒN`¡ÕÙ:«HˆÀ¶¼4Ñ#¸Yÿ ËVžàв”Ù··Ãˆ‘Ø* Zdà;†ð·'½)ìB¯n#B,‹ÛvO·mÞººí¨fhõÉø÷ðf¤ë0ÔA"äHQîïúvpFˆs¬Ñ]ÐpS*•?k·dË–„®s»9Y»9YW>I*^@WÇ@WײxQñr_ÉZÀÒ2‚«e¿„••ßÙm·Â«{–ågk0ŸLɵdY²ËYœ‚Âw‰KtûÁÚW&­e ¨,Aö%Œ¤ Öf‚Ç¿Nå¸Âýs 9ž¡¢ ÂRµ@ÇØ a;nA(i3qËëòð e  ”Ž€n¢h€ˆïß-`…ÀÕº( º'N VÍ -Jø~›§¡•5»ô5~ŸºQ”ªÒ„°£†W<óT°–†]2Z¾ ¥eçRJßAݸS¦!0‘”í^hîfˆÒù9£ó­DJ.ßEÕxøÃÔj5^»ýϱ>[Óo¾Ö­uZìóFìò¤ÎE+¢8a&?¸ˆ6Ã&[ýF9´¼ôMÒÁ Í>LË‹†XxgÏu4À×À××ÀæÂS‹ã‚Q@œÒébýÃ.OÚ†ÖMôØnœ±¡“­’ÆØnøØn"×gßRNÓÅXfaöUù€ïˆâ²SX–%!IÚÙá•N;unæãJ=3ÁR™E©!V !ëHÄ0iF&½Qò´öB6*ÕX˜cšà ü¢HÄñÅà÷®9ÚÆÇ57Çk…²Q1ñ1ŠãQØ•ñÄ%õØ»<Št)Ǧ´¢¢Ó0*ÂqÆ¢)"9ÅkDÄ’>=yãFC._>¼gÑ’{4ØùKFæýOÖ˜ ǃÉ“¢¾é±=2zñ^—x£¤(.'ä1 )½¸¦°Àø.=´auò…&C;÷à±?~ o8™ ïDaáMkb›y †ýü6ÁM8)Ðíî¦H™P?J6ÃìÎsC ±8—}lB¦³¯÷áDTe›´sZ/JïãU*´ ë*jºcKEÖE•©c-Ú‡ŽÖçV=`)(0‚Ýèú«Õtˆ‹7pÈí ;VßÜšGn×x´ˆ¤jSÔ?¦õ©F_å­‡¹uOhÝS†1 ÅŒðý|Œ¹q;3ÊîH©gÉ *w:# ·EõÔA·ka‹k7g_t?ÉNP[ØUj5Ì.ç»ì¡öôߨÓ%“jâ"{ÏH&r£F|+ 6n´^ÔÎøV¢E}´è¢>N¼¨O\Ôœ‚“­jNy-k3·emþ8˺Yëi#Í{¦‡ æ&èl¶^R$Sš®.¢tú›ZÞD"a÷mÕÒ¾¼ ²ð<e¼}š^5"ŽÂ¦Ör³Q·RÌÍQ %Í(\]Üö ¦Ñ]•™kºš0 œ$Љ7qÍ¥Ù‘¦üñX'4w•Îådòì âï¾n—YO¡xí¨² ).Ççƒú°_´{_Î9ì÷äa‚é¬ú#`·4Óòì‹5µ9®ùèVl¦ « ¦¹Þ´I…J$˜µÏ“!r°g«ÏÞóHOð¿ çä¼4²Je2™F·Ò¦Eõ×™¡»Î¢K`C!LÆûBóCY)o‘£@ïóã²âáI9ä‰áDþyoû’¿Ñ{Þ×°³ñó¼©~ž€„ÃH½6˜ç²^Úמ< ='yY¯ š-Ö©æó!õHu>ÉOß!¦c.…Š×gû À! (èí`X!F5± @@áŽVvì6-@qòy| ̳£^êoACí7 6vÑé4ê=н1+bÄ¡3æÿÜÓO ÇV Æã í9½ê±È$ áæùÕ¹bÉ£(ýQJ{¤{Õ½€ýàŠá‹vóÜøbXú¢ÍÄ4òw † 1èuêh°T•®UA%«ÐïÔ»¨à£$Ä %ÿvq¸Ï×"¯ RÝžœ¢V 7NùR PFBmf u °+Q„ìÆGñˆX˜µÖZÍ36w»~²rÉÀ7…!QaSæ6ÅRÑ4`n°Mô”sm¢§‚䵉žÚDO­[ôÔâSðOŒ–òE)ÖŠ7Dê}ü¨ËZý=Þ"¡‚KÈnàê²< ÿª" ðyà±óêö$ê-€â"ç¾L7*Û¨• ‚ºjåÅʰU!-óœrœ”“C_3N*WÅa )ÜlÒÇ‹„UºaU‚uSy ŒO œ˜ø#“üÇ NL’4$)êð½‹äÏ}+9¹y/-’X´’EÁËC†ÁB2pYÙ¶ X—Ê¡ðè ¡A‘qAò  p:ç°Ñˆ!PvY¤{µ›H# ~<‘G>»½7LÇô÷‚1*Dch°å1W¡!4uÞ–>sõülMq/a±3Áú­ÉYý3¶¬Ãª˜†A:Û âªè?¨ªèèЗ`Ü‹ô(FÌKTÀKT´‹~°K ÔE7Ò…žŸGF¹àž$2Â%$¼E'ºEۤעԈhi+âit£YℲċc‰Ä’$‚E;|%šVùÛ[ôÏýq+¢ÕmD/fE;`eÁhmzºë˜„Ý•ðÈè“÷4¤DUB3œ$f,I¬@ç½’xñ#±‚Gt€Ô Ñ‹‰0+ZDs0šq"ñƒDbùÂÆòð—»÷Wâ¹÷×™j¢ã«ÇQ_ßKßÎÀë ôðƒ¡?ù.oˆ‡ëf£‰h‡ü$þ™)L|Ì  wâß/´tâ8•‡3¤ ŠÅ.½€‡ôJ>bÆýÃJñ„Ëôx‚„èL¢†·fZ7q£mBß5Q/–÷!·h˜J—(,Å ¯Qtå Š±M¢M_UËÔ n‰'ÍÅÈ™ðOýØîïÞˆmßwoLLÇwÃ~§w£¾¾Ã{\o÷ W Ow=7wA÷¸¡©Ä2ޝº–£z JAÏK=‘f#.‚÷,z$ýÇ_7 cÁ‚-–8,aüWÂu“0ò+ÖÂQÅ|Å_9Ù/˜q[‰”¾Éú0)AåCÐÇÙÓóñ: ‚“˜Ä¬¡ÖiÆ)EE%h)iG(ÅOJ›”00)YT’–+vÉÃÄ•>øu w A ÂÉ j’Ä.…·sÝêœÈ¿|‘E+|”Z”Qý ÿôðÏUX Ñ5*©á˜ ‹+Ê9tè¬Y¦‹»NúH–ˆ¯#¬¨ÈïëØP ¬cD÷ö*PF|SïDŽ0 ‰‹5*Éè'Ê·pâ©SAG vxE„›È%­È¥+ºD ±ÇßåDœÁƒv1MÓÕŒi:ÖÛˆB”ô"šŽ‚^ú8vD½‰1:/Ïäk:±= Q[£ Æ™^Ñϵí2m¸þe;XÚŠˆ×öãÖßÄLmb¦<ãÜÄL±˜©«È ©+Œšºú•–]›¸)ß}rTï79aðF½÷+-¿y™“§™È¸ŸúYAñR 82$Ÿ!’Ï~¥5B_éC X!Í8 Þ´‹èñÕx8—‚D‘ M›\M)™u#öإݽÇT};|/\¹´/'¾/V»xÐU䋬ˆV@»4ÃúØï…VDëV$Í@?v)Ãý´ZP¿üŠè²K+<M$Ȯȳt½P/8/ÍX­H.Í×Ñ 5”öÃ’ªCÂ"âÁ¼ñSê(/iˆ—4z*ÒC#¸Ë ¥tœ’†uI#¡"ót…ÓKÌ(\=œ›å_©×•:OŸ·£—Óz„NÙI뎀pNt5@Î,D-þ )T‹;²«-y?ÎwœÀ÷«2‡’è¦äsø>ù4®C¤Rp—>}’(¥Å– æ´©=?Y•1æÙ†1gΘ»ƼöŒ9Í9Ü0f9´2æ´—`:Œ9Ê­>•€ÒXѤ‰BIõÇ£v#q’Føh‚ØQOà¨î;Ø<=¶Žîu£ªTµTõåò¡rIâäb…é$¨ÓÐñ7.ÎÁcð ›`ÈYˆ ¸0Ɖ Œ-×½oõŠË †Ç%˜£¬ æ8K‚Q‡s­Áœ$!] bŽ‘`2Ä¡ëOæáLfœXúH!™F0^t$ž^^¼¼dxI¢ï„ÞEUãÝe¢8E¦·4£-¹ÙFéqýœ¿]´‹Eé1ïßÙN‡×q£ôpÍ祧i¬‰Äýäéùen‘þóótòuøDý¬Ñe2> ÑõŽòEÏ OÜÈ—oDa"¸Ù'^ø›žqíÆ@•6eW°/I¿"î‘JØ›ºˆöë5Iù_IIùzɤüº„EË‘²/a@ªN4ªN¼©~°©ÿ3Œ1ÕáÿŠS‘ÿobGUÕÖ.v4`P «ßHš^ì¨Ì&ëé>´ë…ãF£‡-v»‰ýQâFCb+'¼‘ìø^͆ѡþ^+ã>ãtâ {ÙDOÆÙTŽË«ê)ßpËŠF¸e5ËpKá…rš——·Å…¶”ù¦Ñ©³ÀKøvµ ¼Ü^:WÓûÎ9Oøå÷y™ôEsIÃ0ƒ¡—î+ä«¥ön¹ì#=}·\ª/”;{‘œð¾¸p ÅÞ…¾C_U|ÞÂ>Ãg.3u<(¨‰ïì8P1þÓ‡M@¥™èqbxOëpIeU‘}^¥úÆ7Äù> šíõŒ¡ŒuÄ1º!І!Øðñ‚ÝèÂTÞyfêÆï¹÷1ýÔ™_¹sߎ<—Å©àþ¯yŽã5‚¦•sÛÞƒÞû¸BË>3’[Û ©Ýl³`/º¾ÓôZD“Çß±X·ó@« ç3…²y¢ÖJÒyHü žT„žDEœMÎ#^¸¥ˆ/Q†•DD“œ Ñ$žH•m´Äs­wFá윫ãB€¤ÏÕá ò(óÈ(E<ÞIäK¢šÑoˆ’¾j7P,Æ‹–¢"°Ò{­’òmJ‹¿ìF'Æ5>q^oÒzرΫ¸dïá NÉ6ˆÎCçÎæ¹çx“I„÷rðu2Ÿ!@kDA¥uÞ È  E¨Rä[©´ßG¥ù&*õ‰UT€’î{§4ß8¥Æ”ŽÖ›£4ß Nä{¢âHé¸äj8T³%Ú‚çý-lß:»UVÕþzÞ¹:~¹Ü#w®G$Òy?K4« î+x—ê°#I¥·Pã»Êy|JŸ Å=‰õ݈CO¶BkhúÁpý·rªÆ€¦ßpøøEŸásê£Âüõß13á0Ï´«œÇOû,èŽ0ßïÁ•T^Ó?0èæ©á¨^æ¾|vc¼R*zEª 9 øÈÊ©7†?_”ßž†Ó^ ½îz±}õâ:êEÈô.z)ó%5ud0½<Éí7þ-*W>îyÊÑ€ϯ^ç\®ÛW¤U¤U”c/Ï–0ÜQ4YŽ ¬t“ÑPíØÄR¸¼Óp™ã3r£9#Ķ3£Z[€õ[Ã}Ö;9¡­F;4;:e uz§&á‚ý[Ù}àEçÔLÀu«Ù§eð®'Ü%¯€í¼Ã½«÷5¸ýbî‡ï¸û! ìX[·‘€;žž¿èšyÐU(“<Šï]­ô¯S8×Sç:2™FºÙEøØ‰lüXå@wæ@Ç=å· ‰ùý8ДÃܲBÁÞ±Õd.w:îmž-™«L;?Øx‘}Ï^d<+®Û µNƒÖÎU †…`È­«ó6ÅmEÓ=,ø.åû0;¾ëª%&õÓîËœÅrqáªj¸pü`.\oáõàz{ÙÙ8pm¸œkãÀ%ÎÆ+®¾[@9¼|vCá´¹ê!‰úßÁ?xþŽûi…#Rtàz'MدpΊHÒ‰<.µ"ÉÕ¯Ÿ?ðXLÁ¿q«£ÔC_0y麂殽aH¿ëé3­>˪¿xò|+ÿÄøNêz½L÷‡Þ½a*~leí<ôöý„aêj_°G0]/0µ ˜$Aº2£Cdjô€…PêK´_ÛIÏe®nÂsLLy©Þ}ѩεòœËì›Z Î%&L*Jˆ›  :ʧì''1ôçá'×Nè'§çX¥áU69i¶d…ŸÜQ¤'UÇö£BTp½©¤X»,aW­¢Û Y…€;&«‚Äf³·Ý@¥[qî¤ùÎ)yØéεœµO­vlO­(7­4|´"T‡úhu˜‡V'ì$^Ï?KÇ9KañVºeiùdé8d)ˆÌ­íø¤“¥,¹gTÈÛvC[éxl¾JǨ Hw î ʳ?ù`Û݈饾gЪ\€b$xJË -²³Çó¥Ï8v³Hˆ{‰ÂÁhAÿ*M窘ÙuÜT‰-ã], c¬ü‹šîT_ªw¦ åá±]©bùQé88ÅO°zž/Ír&•ñmÛÁIc9(™M˜€CÈ\H?M ‚ÓòŒŠr‹òø+|¢â:DÅó†R»BUô…»®Tj,DC´ãSr¯§8.O^··ÐbDêì” _œÒ?G?í–LÛëà$õÀŠJáÞóŽîŸäϘOV¤Ï“ßåç_Ô‹)Â[mÎ#½£Þ½ ]-,ñ.´D”ÃÔ;uu»Ç‘*¼#µUÀÇ mç‚“S!Ùa·?èùEÇË ­ö ÜœpóÎqjJ˜<,Ò£)FFc} œtß¡U."­†Óœ§-å<­w3(IÎíKA·}$pKÅ?LÓ9Ìãw¥H]&Y¨O˜Ü! ôs¥³]¡F±ü5• AÜ…æQ;–ºq¿±€ÇØ÷âfçQ3b»-/óš°O÷Û å9 Ù?ËöÉ›„fW´E\Ñìœav®0Áúœyhûœ]Ê|ÍRè@åÓ–oZ² Ÿ¶ÃϧÍtœÚÌWÛÆ«Í¹6^m’á|w^mEbªÈñûÉL&uhÓÍLf§&]ßdn˜¦Lq&òC¸¿¹ôô¸¿™ÜÿÍÌ(ÚÆçmµ}ÞNô¶}vñãà.Ž>^Äç­úû¼UWÁç ÷á?œÓM¸ñz[S¯7š¢q“î{MGý 䇓 x(:Š¡þ‰$ÛœrŽŠ³Ãb¶•¯bt#«“v.«‘l’Ïm’ÏÉÛÓ;ZÕ|mt:Θ$^¶:’A¦6ÜÛIܳOF§ÿ6Î…¦$®Ë&‘LI޳q™Íl¬kÖ<Õ›·ãe§:Ì2k^øÛ¶ã¾j{“N ›ÛÇa[å,Lìb‰ŽË0ã0”2=Þ[•àÕ˜)dÇÛJdÒÁ˜¾¦‘~È$Ò™èy¢F6B´}’‰NjG©g²É}‹ÿÅ|‹›ªx ÉeþÉšÍ0·…‡²68Úiõý”I\ò!1¼•‰™ø[Œ¢¢õêæXŽÒjJÿîÞ}@5“ü²\Få¶Ôòl&qR»É,}ãRZÊž˜ò˜‰ósõûq~>*‡%•Œr~>\¦ó³h òßP—ª7t˜ýI¤¾£KôÆ%zA—hozNqxü®*=ç"®Ò u¼|êÃ(j¦ ò×·{2íÚ™EˆkÚèÕp„¡R‰±®ã¡-MFÎÿiCK÷ä3aS>CÈñç`òôü2·Œ©üüÜùS]çHå"éDxN ~ʬ0îŒt}–ÃC‘fÂa³éÖ„R¯fç°‰‘§ëÌÌk…{)ŸÿÓ†/èsII~Açä]îQ¼Ëýwm_ö…Ý ýöÜAßë¶®@i§ÓT “­åC)~pGYÍ{üь봎Ü:¢Žùc,èSYkž×Þ“ ¢û?ž@w¶Ã¡¬m  mCU›Ü¡ŸáÐ×Ö*:³_ ÷Ë-U§NÜ!„ÌïiûF"¢ MJpÔw«g»{Õêö#~ØFçV̧§·¼®müm;zÕâsñ‘]Ô®êQ+gKbw{»:‡0[›ØÔž!í––7‚Å EÏ7òžÑ©éÜÛóªç›@Ï7’žmÖ\§¢Æ"Ö×gk0·îlûðp ‹™¼ÌpGàmÒ½èMÉWû¡ÿò`Q_à7Âf!yokÅžÃq<È3С+àmóƹßñ>x‡È}/nÙ#1\0GÛ€×ò<óvî|ý…šwŠhÝ÷Nrîæ‚öÆ­D×S*³;¼‰`#tðçFøØì:ÒÎÖEº"ˆí„s¤¶½Q€:ç!7Úž!û‡XI2™MjªK42V5ÑÈ*Š‘ùLIäÌ|,ÀZÄîßéÑšƒÊ-Æþ!`mzÞŠØ]¤ðçÆ(ðÌža/tÔN©—™¢RÈ´ÏÇá,`é@£—gA{_Ó$Ì m[ìÐa€{Ž/"þf謁©³4èO^ð†Ö-µ%OzÃÿøÿÙûöçDr$áýõú¯PÌÞ®±cЇ3Ûwccì<ݾû.&°]n3mƒðôôÞÍÿþ)SR•T%U©ŠâÕcb¦ÁUz¤R©T*J¥øÚ{°42`…Ò.ÏÊn«­Óg;°5ç@p£Ý¼§Ý€¤¨óg=Y‰Â°ØbâýãxüŒ2þ#ˆ§yÞQâ0˜¸d0#ôƒÇ{2¸½}ÏE±¥².‹A]Ó=åÒlrø4ìõ–H£†ÉŸªíTaréŸ5¼ÇljÍ‹9 œbf8‚ Dô–°:ÝÒ<(ªŠ>°AY*sô§[¦þhsiõH»Ùë‘þÊÐ?ÔÊœ­Rk4®ºµ~ÓS] .Dÿi~%6hûÒ|à„GÕ˜.„ö Ú€÷Nµ°Ïuâ7hœ4p8\ëàB¨>¹ê4@ļ>º”Pº˜úÒóuLWFsÁâÞQ sHÏ ›~"6Ÿ(5g—v7Ýq­Ûâ²î©ÝKwn²–öU¯Ü®Ê8ÁŸOu±û¹Â·oP2 ò1V¥ê _¾Á+œÔ(û(ðýØ[¯ ­Ò’¶”ŠÞDýë˦p"âj#ïÎüƒTúºL²¦ß _TLµûÏ ¡uBwTOðànAV°­‡ç­?Åcnußèn ½õ‚B)uµëWx‹¿R-ÊDëMzÆoQÒÊ{vprEÊÚ.6]Xïñ„Ñ(>¥TܰՓ‚æ¸+ø̯Z'gh¾æ‰Æ°žÑÉ>/_‘¦(HÙ:= [àÝ_­¥bð ‘<ÞP´C©†cΆ9Ñ9@À,…ÜtVdÞ@ÈdÌP;ÔØŽ/}llWÍ<¸Ž8€ù@Ž/d¶+:ªÚÙ–œÝ¶DÆyõüÿP¤j4Öñ·€Ãˆ-€Öí‚?x»@ƒNUŽGÅr^/‚‹¦\ªÿ‡ðRæ>%σá5ûÊIÇOÞ«—Ѳé§ÇoÐÂÝp:› o^`Ÿ¸qg_]Ê¢w‹…*ª[è÷÷Ëú5|q›–Õ6P§{NžŸJýCT“î7Ò«ýÜ ¼±‘¤z^¼tßšŸ³ ¤*æÌéêÚÁãð_prTü6˜ ¡/*œ~Îa!I ä¸b¥zà?"ÒŸ§êŸ-9è›Hqßþò!!õa‚z¨ôâŽn] qŒÕFjoÔr ‚ÌÍøet'ѵƒDMO‚–è±xÈÆ(jO,åËùýü<8Êã¹ùÙ Çwäëž—o\ÊG¢¨ÀCmÅR5ªbU[Çñ;kÀ ^¦tD³1¹™¸ƒ/äåY®sóL¿ ŸŸAwêððu#¼*JöIÄ}«!<žøÜÚ9æ ô Û9› ¾…ý¢X,îPn¼?¼S,ößžªoQÛÜýE5¸3I@Ü!¨Î='n¶ýñÄì|]…n 7¼?ˆ¥–ŸI!†&u,Íÿí·NèèÙôZðßµ`‹)ï7‚qXó;x#ƒ†,—X2ÖŽÑ«òqæ¸F©s%½†”zá£xv¥K©¡lŽœ9ÀT FTÀ¤ë¤ãÅ⢃œŒè:º¡þ:¼›=àÖˆcàª:k#ë8»Â©éÍÆ—ÙÏy˼ú€Éú5ñ÷»þcòr;{™€±ýŸ/É{mÐFÑþÎÈ&ϼÔ^F_#åÄ6ug°Ø)-½ðÎô‡µÏ7üÈÿ²ßÏðïl‚ÿÞl‘‹.ýÞü£\¢ÛËÒÜ[q™NÍqñþ*Ú°Á]ÚbÓ¡²ïLo83· €pWÀ•Ò÷@¹ˆJ##Ñ5ŒkÕ;_³U'ñþ‡pÆ1vWÜ à2Ç)ÜaÞIà•$ŸÈ[''Ì€þvfÆÄû&EVûâ#Ä|ö®ÏÑÑf¯:­~>|v7_¥ói àN­¹Ù ÞÎ'…ªŽçp»˜šÂ0fã¡C[=Gº$E:S.•E‡îýƒxßGãKèýL¼ÊŒ´v¨°¡aaýNALá¶×2t3ha Wÿ6|ïnaèæˆ¿}š4&ŠÂSò2 Ü"ÕD®‡`nàÝ i|Ž#áÏßãsÍ‘Z«½8A™ Ãú jáŸgW¬qÝF 5Þ‹ °b6tkpù÷M ïÔ“/•UVã} ÷rC·ÃhÆÙ¥Ã©B(ä´à[¨ßIoU‡÷*hÙ6}3JüSÿ‘£SÈ#Uæ–ÁÀgˆ§yc™á¼ÛšŒoÞèŒù¿ßJ[8¯ŠËÊ\——/iLÉ(¶)Êp¾ÜzðÛ « º9ÂBÏiR*Ê·óàì°«BÞò X Q±ÏpÂȪØê×̪TsGžÀ@…R˜RqTƒS@tØ;ÊG øn7¬àD h¨÷ãPæœbÜŽr Û¿åz¿¥¨ÛØP2μ紼>ç4çh ‚ËTÉ<[åè‡/Í'¡+óMÓEÏ ¸›‰¿é©A3}N9c…··šM –kÄuúi³×*щžíéTë± qÝ».DÌ’A]oØwì•øx¹U´?~™ûaJ&Út„RÈ“\Óq-'ÂÇŒøWéÅ™,†áÙEb¶m1l^énãÏaßcçªmýDã¹øëðʼ ¬L-c…hŸ4+óì üÛ:bæT‹®IÀ¡PIÏm×ñ™­Äæ•×®2»•ŒCKÁrذK¶X[äÚöw'oÜk K¾’Å’×îìJ7Õù–¼Øϼµ_ír?›µ½ÿǯ}J7‹_ü–‹ÿ9°øŸC‹Tªêpi´6À2íÆ‘2~iøÒ¼ IÍAL´©ÑÇ;ÍaCß‘¬f 6>9éÛQc¢'‡ŽNüsô⟌wƒ=ôóªÿ/+û4êíZoTÓÃÑç½Û_no³rá>Ë>ŠÅâ~¥Bàû`¿‹%öw±X.–ÊT~)WöK•êAÙ!E§Z98ø )f „éó2 &”__‡nD9Zì> +8˜"ñ¾7äÃh𒮀níœ5ô˾§›b®‹JHÉÉleÙ†ÆcQì¬w“áo.ÞK4 ÓðkFÀýðÑ-`…ûá„–tÊdâÞŽ'wShˆ‡MB!t§›¸ƒ;»ò8œÎvï†æ‚7=¿Ì˜}÷¬äV5±Åñäi0£ɶKd»@j#2Æ $½#îïpøÀ»ÚRhḌ`<ã›Ù@˜¯îÜGwæ5ã(ÃU\“Á-ZRî'ã'x‡mŒÇ_¡p©D1r§?Âã-†øB¯S»džÀ•N.N <ý²÷á¢O.®ú—W}rÒj£Î}×!rSó-W„$ס¿® ½uB ‰Qß~âÍ£ðŠlƒÞ¿b~q¢ÚPí,ÍZã}߸èâšéõ/.!àb¯¯˜7¥‘ˆ ûÍœ«!d˜UðŒE¯‹ÁÞ=¯^tAÔÏ»ÅÃ’÷°~ÑÇt Î~ñrvKÃEû"‘¯×~Ð9”Üm7>r±J~¡CG‘ö^¢“^¥”Ç>e«¶Øý´xLfôÿm’s¼_ÅÂA~·X8ÚuáÕ³ëãWM½8…r~×)8á^D5Ø!¦¿Ü~vŸž`‘(1&¾ÃP]`·À±Ùa“Ç«<$¯2ý–¸Êl’¢Ê4ùX&_%qÚKéKâ^,« ±Ýuôä“ãñè3(mÉiþ—VcÿÏû{*Ö‘^ÍõOOƒ ^Ýææ&}Ç;±- À¢Õ¦Œ,ŸÝÚ|ïå†ï’ƒEß½}A9wŸÆàT÷òD:ãÂá!ÉuÝ߆Sˆru¨´v> ¼”²ïáíàP#™&¤w;D¯¥ã!«““äx4r9í`6ž|Ë“£ƒbQh°Ç/³ÒL§¤F7«׫JEe¸%{<œ’ýb¥|”§¬Tk°?Ü=¡#|L¾ÅÞó„:Ü~ñ° çDwD8-Ýì6)²'_ÝÇG£YÿåÉ ò(ŽÇž‡§)y?¿<“öìNÔêÁìO›EµhÏ=¶&H)]kï`"‘*+HíGåvêô®ê½ÈÖ Ä Ey,Äc!âf–Ù-Ù<§ftSn+0ãÆ·ðëQî­Ú¯F6Ò:IÖØw½×,Ö”‹Zàr(jÂo c~ÄÕ ‰áèŠìѽ íû/A\Ü”Dš¸è¤píÚµÍu màœ}‘Že¾êñw, £¹‹~¯ƒiðû¤»r:Óõ 5îgU—SrÐ…]7ÊÒûË^ÿ‰³Âùèê^‚¶Øün"y¨ÈRg®X¯º¥³˜´Y1˜¯Ȱg®Úh«†Ú²Éi÷;î(ÉéæÌ_'—Ó¥jo;†Bz~A¢üW4 £·a8^¦Dw ÖÅügšîí’ŸÁm?f>;c ÐÎÊì«Â¾ªí|ž4hÂm&蔕mþ³6²‹³ˆ £ã>—|ãçN’–ÇN#ªa½`ÇIRª1*ÛÛ’à)Kû[«gK–ñ¥Š(À+OíB™\ªˆ·í ‚‚6©â¼Ôj€xP¦âGpîȈŠM(/¿Œ†3‘Ô)ÓÑàyú0¦âòËŒŠ®\Í*Âg*¦¡fÄkž®Íã®X`ôÞ·ç…ÊëÞK Xú†w¸'‚§PhK©T5íÅe³ãår€cÿ;¶«xLGÄ øãDåÇã´ pa?ªfàþ€¯ÃÑœ¸2Ç5 |Øq½×Ai _èâÄr´Æö=™³þT;ßÈý`øø2‰ì”míæ¦a,·èôüõÁ¥¯&üÖÈÍÀB>!vÀEW_÷.ª'&$éi2þºû4ø•ö†ª³ÇÁ7J8Öýqy'ÐßìaâÒÙx¼óÃð1¨þð&ˆ¨0M!v ß[ïNògqf]ÄEûÒÑïg/º¤EÏRÖA¼PþU–födñ㻨ÍTNÅ{„Y¿4Y.i#$t°åDDL?Kü,‰òå ¾Ka "šKD}¼-Fä?ñrZ^Ð@ÏSe<š!‘€gZàJ Jä¤ÚiñµxúbRà€ž‹…¶s‚}n‰ÃÅ\ó²ŽÉÓ‚ù•CÃT/u ²îIÜ8Jú§?JøFÉEVÛQB—[óVºA¢Ä¬ ²îG1ï*?ŠË[™RêãU°‘ï+6Äo¼öÕk1);²ýOÛïa%¸ýY4f¼”E9âš“ŽGŒÙkèC²OÆíÁ[Ú8û²X4L/8p;2;ó3qúܧcݧ®i'ªéb°éDP#'pGA}HˆÃ…#%ÂÍ6lØ¢‡·Þ–Í+Š}›þóM#¨>肆%eñÎ{ÌYšåÀ­xF€/Pr=Ìæ°_ä·3÷…BÃï»bêmëX ð.N”¦ë=ª".Üê¶h^ LDð³ZT‡¬ë] b&o´/(¹0í¢ÏúÄ Wp»SŽ%‰¼}€ˆ¬g*$¢‚x[2Å_öÐÛ€ií±çˆ§ˆ º·LeÅ.Œ4…n¨’cÙý@Ó·ã_t%‡Ö‡/#buà_ì.±‡dŠxƒ—ÒÃ<7ÃÇáìsA ÇfnuÜ2s¬tœŽTåR^Õ^pÏHmÌÄ‘!)葼 ôyk‰„yÂפ֙‚×=å0¼å©8NÕ׎÷ú4øºáeØðùú@7Ü 3<&Pñ6wrè·„ ìœBRg:åÙŽç×Dß–ß0—Ì€XÕh¶Yt#C<¿a\ŒðÁ¬Ål~}VšWVÒü U£ùËÊsö ñ•kåGuåO¤qîÝ“ìiQ›ÊRܲŒu-ª94«¸‡Ä\ýÆtà ñgƒ]žÜhèaE —*raðìRx³m î„·|Ì>Eå2Úžtkœ…TvÛrLåëëRˆÏ­H`ñÝxEô"Í™ÃëUÇ,4¹gtLC.æ„o2:è£&YÔ5³ Í„xŸ.Rã/òÆë"]äߢ}€éŒ¸T\“¤nŒýWò¢ü\dæ@­ wÖóF*DOlI„NîHØõýàn€ÑX¯}‹B‰ÈÛÒóºñ+…\~™nž©^•*2L I¾R…¼Ú0ûð¼·åÆ/ßyOÂ@°x*I@1,(å>¦ ÕËš~.Î4g:"å`TμìÃäè¸àçÁ)^ü³ïýsÀÿѲcçÞR|Cã}óüœJ¬tëûùà •Cƒ‡SÃΜ\½|2=€X£LåmYf¶h±´™µ›Û¼Ñ .ÙÂëéåù% uÿšC ªÓ×p­½–Û°ëèy|©¨§ùà«ûŠ/†/'_üºž£¢†"?0Šóhï–‘8«Éˆ³ôç@vfÄù'ÁלÄÙï"E‚µ¾ßíÁÏ*ç~2â,/Ù•¦Vl0Ì…4ÑÆ¿ý[f¤÷]`cn®×=£Ô´\ a$#¬ÊŸcgFz|ÍOœ% ÉCFøûÉó0yV×Ý>nƒHü¸Ö÷½ $½ñB ñsÁ¢#LÏ–›±¿ÖëϪ¾W6oŽ›^JáKÜõ TM „Õkzk†ƒvu–î¹äÞ³Ñ7¾{¤x¾æžî”;o˜%ÓOú(GŠ`?”ˆù¿1;de„lÒsõ1dW`FK/I„¶$…PkýJ·ÆJ\ü¶Ç&Õ¦àlÉM<+E(±(f©à96ƒ™"h'Í6­üs“œ×ànÉËn³ÑêÁ}ær¤3[oÒœKº  ¥ Õ!a.IŠr%Y†÷(¾ï6k}Ì[ª‚Š%0% gúôb¸:j‹E¹ÅP UF†2ã MÕ(ÇæÏÍ6„³´k=ÈÖ±·…åN‚0^´Û[÷’ß<òÇebõ&Æày G+JÂQŒÕQ í먵Oy>½Ò,;TZæ Çòú¹>½ŸØòLÅ#'ðôZÆüB´ìH-S„Ð%…ô>…´›V§Açs¬vkפ×ú/ŠªVGÄ£º•ƲéªÕé"½è4ƒI`¥ÅÕ»ª‹èVŒ©Ä„¨¥<¥Ê‹>†0¾ïÐ^Õ|´ýàÄFvw(-K·H‡ÀLx» ßÚmÃÒ9¦ýQHޝœƒÔ›@)M PŠ\Èßò±{Ñyß¾.0ÐX{˜Ã—î‡ðòåú¦V(µï±¤¯ÆÐ26ŒL§­p-0}Û†ù× 4[<̱S*@êWQÿ]jçtÚ(YÓÕGÁI 3äÒáÃò$¬©Q&ÿ.ŠV¦^¬3J ÝBûlëÓÃzØ÷3ÛÀĶùâ>à)÷œ?çšâן†Ž¶aþ™¼j'à0Ð¥® y‚;ygƒyކO/O<ÎgxO¦ñ“j>‡Zï4×]Óçë ó×T‚ÁwJäËð™Eú@wà-áŽÆ/Ÿ4=‹^Þ›zKTÊñXß;ªÂ8¸tÁ-ØJsŽ¿d ÏaõÌ„cú9óÑ|¦Î×\¨Ö¨±6ÊÁ÷DòÕj{-ú#–×ÝN|ðÔé5žV݃î=hl…¡ ž:ŒV¸Õú?Ó¿ÃŽCã'Æ«Esº*¢9]W÷öad[KA¶ìuÁ¹Ø€úÚLÃr ðo’9„öô¬ök¤jö ¸ˆ+ Õ8H;š'Ô+uÅ÷̳¥\)hI„{©ajžÖ„ˆÎkl·¥†1Ë øá›=wz×õʯ„nÔÍ+„HNu|‘ÔÄ*ºEÂK×Å:©×I=j©Ô£– ýœCæ×a1™×Œh×PQ´kèÖ¸xˆX?õÔ뇬N—PÝŒ7Z"nyíÀ¿ú…DR¬% €˜åDÔx xuÞJu‹Šè×U]¿®HÌÒª3éYx~‚$fUÂd÷øÕE¼ÛÝk~𜑴˜¶ø¹«™Û$^Z÷ºZg,J?¨-~ÇCòê’KûÑ!·aD.ûD¢O+8eT°Å?Y¡™}PìpQ'jˆ˜—ÐýÍ|ƒÀºþnÜ{X“Tâ~ä7FU÷ý]cÐÂŽNžÌW´î5,rö9ï™wvöéô"™#ýœõ"Ùrì!1úáÅÚš|ºY–0Ý c×#—ðîF|´·>p˜f›p ”~å01PÖ=(Aá¡,ÍeAÙsQ.…:›Æâ'Ñ 2vÌXÞX@ÇÓ äÐH5–K#z‘5èbbwJØ.­ÕXß6ò›Ën£ã䘎Ô\¹ÎÔ…qœ&èûÎØÔY^è‘,ä+ܘª‰bà ùCÃ{2âFÆoõ”n‰@*rbÀú!Ù2\ä^âÁz«©lÛ`¸à¬;nŸaÑT¾@÷ø]Œ®û:˜’Ùà‹;"ÃÑíx…<~³  ¸8‹‘ÙÇ(SÏsm{½‡?üD\29ì¨Ð~z®K¾âµ) W¹¥Bãg¼Qw:¼“WÖ4I\­†ŒÀ?°² Ê¢bɯXë6Ë,ª´•"÷,^¾Â½°|äLòòU,†å#÷3^~Ÿm>m¬»»ñJ,ÜBqЋ­†[]4Å𪇬*Ý¥ Ã˜mŠ×9ò»«s§%ŠmºÃÔŠ¸Ñðþ"7Q‰‘E!%nÿÁ 6@…c³ØD%HhXlàÖ=¤Èsš¾õ.xŸI(—={Ö\?>‹‚N›åB]™OIƒOI̾ٺgkx¡ÄÅÚÏ“ ^Õ\}0qÕ@,퇻ij4õ*ÐÀÆL» ìꎿžÃõ·¦>å²ñ£Z6|Ý_ðžËmJ´l½h¨C[¥²m±ç¿±asð£ÂYNøW0VÄ)Ò6d–GòlG@nUÙú9žXáÆ¿ µBÌN QËu0þÏàx—- êïZxuAuAuA'¯.èüó]¸ ÷àŸ+ßý¼¦á^nƘ…ß¹äVnnGø›ÇøšÓóó)øŸÀú ú›Gº0ZÇ¿‡þKû¸ºl_°{¡^¶ÁVã¢äÃÕŸÁÕüCÈÕV lv¯.ç \Î{{[íîÖ£›½­«¶•û¹²s*®|Ë‘ÃÁVÇÒU»¾K£ç¡.2©Ïå¡^Lç¡~ôê tP?šÓ?=„¥4þœÜS„ó¦Ùg31–"<4õXòý1æ$+/ÌxÏ@ÙõRö¬4îBZW@áëßc$_z¨» ó`¨FïG,2ìÀžë›–ÆeÝÖ8*#¡œÉ¨÷Ö7˜³±p¨½/ñ\^øÌ¾²íÛøßG:ß'ó¶q»7ùÜîo…3û•èÄ FW@W?bq½£’BÞîäi8RdCj¨oOôÀDWö>ýʆ!„\€s¿YøÂçœeåÜoå{líxœÀë8Ëqbc{gcÃ5¼áO"Oc;7ãy|Œí<&QPæÌÊXhZ¾…b<Šc܉m<…“¸ ÛûǺoXx'q ¶÷ ¶€,Ö#ØÂ8‰/°½#°ð.À ým}{pÉèúí°ÊŽ™Œ3~ÒGÖ÷Í,$r4ñþ¾6ξО:b=ÄElŠOph°Vü³3p"ßœxWº8/?>*8ånÒ¨â\{3ñ~NäúœÊïÙÞ•{{8fŸ¡Xœdáë¬8:ã‰ù‘¢¶HÅÅ÷K¨¹a&rnNäÙ¬¸5[¹)+ŽÍ |”Ußf eÕ£ÙÞ;Yõj¶ðLV}™mÜ’í}’U—çÉq|*Ôp¾ÈÖ|&¨RdKÿãHÏÄxçc;ÏãdnÇé|ŽÓ8§ð6Žªú/„3ÇÂfë[œÐ±8V¥¢´-ù c~Xü‡-¯s?Š)‰Ï¿Îý8qÈ*€¦ñ?F3O‚6; ÇÇID¢Ì—mBŸ½¿‰`CQ¼‘Ò¥{^n gn{Oîd ?ìÃéÀmöÞ6¸nëü¶CNÛ¯ÚËwÐ.Ãþñ¬@Bí’Ù·1òÌ&ã€;#1y¿zf¿zfÿÉ=³ ‹¥8bË.Ù`w(ÉžØ*°‰¼²eWlµ£[6¼]‘kvÉÎ5; û2í¼údgåíÃ.aíŒÝË7®x o&9ÉLt ý0sGf.¿m÷~fr öË2Oâá燙Ñ‚}^…ú¼RÛ¹z~vîȲíñWW¾.ÐÒœD:@/ßûYa3ÈtU/f…¯xÌ9äû¬©;±ûsyIîÏâDÄ}›íÈ>Ï"÷’Þ¥Rt1—fÈÐõêÒœ½K3¦~åElýš±>s6Ó×ùÕË9¹—óŽÍkçAE.ÉÂ#¹qì¹$÷~ɧ’k²Sz¯äsí ×QŽQ®ÈI[5Êáß^^uH–ïÁ†7”Í@Å{.#ú<ÿå`IùíþŸÀ»9àÕÌ’S6pß{õn¶ônöš‘¨úürmü㘾èðßÜ:Í¥ÛŽxØ¥‚#ݰÿåNÆÂ× dœvo\è‘sÊ>Š Lq#R_ä¼”7¢­Kl¹Žªg$诣ª"y»•”7—çr´þ.]ˆáÎ`ɇXÁó´>÷<­;†›ÀAõ·ßÞ@‰WÏÞT òì Ò±píÅ›úÓùö†ËЏŠFŸÄ^|Ío‡GÖFÿG¬å! qc3˜¼ 0X»ÇÐîñX)æÊßCË›„‰Þ§wÇ\<ÁEåÞGqÃM¶„~ŒIP¾[³YDR?êÕæ!o܄Л¯ƒÖ@—`  |a3o,¶ò)&–nÅ$Ñ È(sÓ’ðU.‹ëí/7.Ï{¹ñ|WíZ^n\™÷rãy.È“\™p!0œpŠ?¢oîÜOO[v·OZ_(Ld;f9GR^*LTïbqTd,QÖÆÆ·#Ôµ6÷%ZÝ,L¬/&ž×-œÂr:#™-o$DƒÛ4ò‡3Ùx‡#íL_iGãi§—ídB:v”cq#óŸáÞêêZÞ[mçÎí×JâÑí×*{W]3—èxÉEqðF1êÅÉ1Ч·ýÙû /È>HsáõaRgr¿ê‘µ?9Iy—t1ÅõØöwI£ý·ë ï’NÂv, sBNôYÜ%m땘Ù]ÒÕ ¾KZ{×-Y§»¤-¹· œö‹3VT»‹š¤¸ŽZôlí;κÑ\b­ø'ZáÄ|uÒv¸E:puâV|B/Pc'ÃÏÃÑà1¾ ¦±³ZëìŠa²¢4~ «)`ÉÙ”ÄÔ$oIŽþ½ ¢èNâ¹$$^?á.±ºÐY% .Fu—°>ò&vY8dC‹ø$gÑÙóhðÙ‘XtŠHöaü¡¾nüá„JøëΣÙ42bÑ›<V<ÚªÐ÷™yÂ^‚=ŒÏƒFM…%2+k™‰fßõ± õèj4TæGSÔë\ô{‰üb$nXÇ ûòÞ“[&R HË–í¼*›™åÂXßä¢{&’\tÏ^#ÂÖ;É*<âÿX8“½¦½Ø”€°×´ZÃi/š}» 3J;¼v«wYý®Ô@\N /öìÌ‹=;¥ öôS?EÙëYŒ»zH Eó²b´iç5‹fn'QZŒP,oAÔ½ôÖš&;Ê\\ß}û<ý?SžŒî™.OFW ÓJIÖ Tÿ ¨H–’ôŸ)¬LÄ’5–’4C¨~çNš¡Ê2Ed¥Ì{ÁòW(ÊÙ¨Œ†t¶6щ.¸Ú³()Ïåáü g¦°µ3IgÆT }C r&]ÐŒ¦œˆ @rhh²à=ºý C » YÊE¯ô@jjÅh~<‡cè™æ…!„KoA;×­„š_X Øùæ‚ŦxÀa§ÝH¸`åKÓ3Gê ]I*pÇU®Ê8¿ƒ¶˜åEã]Ñì‚9Þ¸Ùæ)Jãþ™ó¥=ŽImhµy²P˜ Ú_²Þåm²Ñã‘_^\ Ÿ‹·Q£'±¦_%A‘Ř¢[´E¿ 7G£g^8.›- Á¨&ø‡ Y5ŽêáÑvaÙ?"²ª°Ë–íLú^®å± ˆ²Š¹cDZ­ša‘;ë6DöI°\X¤]7Úñ<Î%Å&\/fͬo’d¼±yz2y#ÑmÄ"³4WÚ‘´9S“vÄûöa¾ãÀ~êh0Éî•çY¬»²¿k“°BºvÐ8$«ø/Ëà//I÷ÌÏÕ‰S&ÄpM´uV'BæÌ=’fÊ+YN¹<ã¡›$ /`JAy’Å”†ò¯ˆ ý^ò©”2ΧRZ§|*‰’£”¼8ºžGW©œ$I ¸;ÃÂY+JD^—‡ÌÍ)º$Ïçb—Òc é^’â ¢‚ˆ›KJQÚMÎ#³$Ns´  1GJ€¡ö“Àµº“÷¡Ë"SLqs3Å”7/SLºô{‹Îc³çÄ–6S ¹?¾<ÈÍ7þË"ŒÞŽM~:†9Ðèš•aÆÇHøM…ÎJS±nü§ Gp¢J°^,Þ!L…hmsz×?û´¡ÜiL9»è’S›¦D—²KÇþ6D«€‰X[@àŠCêó8$° ;ID_öp*S4l§ ‚áÕ½ÚÉÒò‘åÐF.5’&õ”€·çÈÞCÒÅ:ÚQš>ˆ©ŸÁüòœôó§ÔþSΟírMÆÌ€™ž‚RÖÏš`„ÀŽWÉ-fņ¾Ó¸-f¼O}™Øk:ª×¨-¸ÔŽ p`Uvd’ÔKØôÀ}jÊeý¥•+UÍ®)Ú¬»õ¡Fí×$¤Ó“fJU)&èKÛU¸'­AÌâkÚkÚw6Gb-ôE1Ñk’Z·Épßì]µ)1œ J?l9–MZ..!FHQÁüZÒGÀ/ˆ­¼¤bD_WHm&œ7,xi©Ñw»è»Š9úÎ*îÎÂàcwÇZáÞÌód “a—±Ž¸Ãh;^ÄÛX_£Þ’D½­Oš ëðÔ¢Ð"hÚ2­,‡”Åç E™áO’±eÅç§#^®1}öÂPš‰ ì¯/Eb¼²W6ð}²eu%¢ìM«ëhZ—"MMa¦äcòHÓ’jZz5ÝÔ샯A¨Ù¡†bNë9RÚ±‹Ý”ãS5a¨uÛÐȨÓPp©¹`ÔiTpiD36;5Ó ÓÓÓZTÒC“ê§GÔÄŸŠTˆæFŒ1ª¯¡¨kŠZÒÇ¢–æF…I|ÿ“º„˜TÐ5WLª9Qà÷“Z¶ŠIÅ4©ƒRY†¸×¨TM+¯Q©I¢RgAa©±Sn·JÅ&tá©Ï,$#Ú%ôtc ‰}- FÑá0¸³Ûf÷ NCtƳ†c¾2šŽåLoÑ™'føbÖ ™ƒøÐ¯gk˜˜\nX"°  ^JÍeÇ@¥‹6G¾F/%º8Ipñkl±îó=ÆÛÄú®o0¯‹S›ˆ3md¬—‚ÓTd3ÃŒËs…§ ¯Ø¼0ãƒEÅœ‚8'Ó­4.õ+Y¦†#S=i(™o¶E$֢ޫ+¤€U&/𨸲$€?k0sDZ×`f»`æÄáÄË‹¥µËÎÈêg¶4VH˜ÌƒZ³Læ®$4Y¤Í<6Ùo˜e?}ý‚ˆIæ±ÆÖŒ0» äÒæ%W6/(Y¾³ò d›½#¶× dÞT|Pòi­l¸lŸ¯ñ5p9ê“4pYɲl(äÏGOS‚I&A‹ì' ,%ÌzɵϠOÔÌ>‰V 5ïÿ`÷n¥L•¨“Ì7nþá÷ºuRFpâ| þg%ãôHÝ”F›}l’ió’1ú¥„!Þ,Oh=ÇXà¶ol‹p Ô~²=*²¬z¼Lã5óëe¯—i¬áe‰ÓAî§0]»±ô‹5Œpç €›¢Ûè—{»FÕîvjæaõzFž4¶¾þzÏÆk€ýثԽÌË6|Æä­EÁ |^e¬—æÊƒÍå ž€P}å^K¯ãOÈ1ÃA®‘þÎôèï__6G8nÕ pl( wÒ7*oØÚBœAS7ÅÛ/#rÎ9ŒÂ4Oh ð+à…‡#ükÀî!¨‰¿ïX ìäåvö2'„¾ '.ê§h«è—ÀÜÝòäëpö@^F_£™{GÜGè:ug­7~áñ;* d8±Cèá³5ý¶E(Ø[³ÉÖë5p͇¦ŸîÅû«È>°6¸K[l:´Qös}HìÅ!Êe!ævâî ¸l®Ñ ‚-PÓ®Íc"Ùâ—ôÒšwd¦}˜p."¹j®;€å6gÒû‘ñ~Çân ÃÅõ&…´Eù#»ªâÔ<æ÷MŠ– Ì´/>ÂWïúÄ ÚÊU§ÕwCxpDݾ UƒB"¸µæÂxë5=ßµê1×ÐQƒ/,"(oä/>`¢÷âÁ{AÑ"¿ ”¿ `É/fÞ^.bsÐ(ÀÑ&à *°Ð69ÚQLÐGÛ:m¯:Mè¬ÜÂ@¥~%Cˆ‡ e¬|òã,)ì¹¥„^©ñù ù¥xd~8XæÆ§Z|ŽâÏßãs™Dgƒà-´ ‚Ýœ’IŽß¿°MÞª ^§Uè`'Ìå?×>?¹þ¾I÷̹“q°-"iÑËþ)zuï•LV^‰…[ümd‰To¯a9’‹Ä9¤Oˆ½£ÚCгkÏ0\½OóÔ䍿%¯„‡9÷ N"<$–%)ö^lkÂc½Ñó¿•vr^—›¹./_ÒÅPôIÏ_†=ømA œá$æœ)æ ¨õÃT‚ìÎC«c&ßJ~bpCˆhÊ!ë`  î}Ææ¼.‚ÒÜ‘'7TtTuâ­Ä·‘mJìZù›ri ÎŽ 8‹1áÃn«óLõ©Å°Ž`ƒ«9` ´Nô–}QÅfYiZˆ‰EëÆj‰W±É€Ú·¦¨â-†]hdAEûq¤zÀzñE¦”kâ ®£CÞ vŽeÎh¡ 7jaÞÌÁæu0œê¸ªxL•ÉîÛ†Y<Ò®Ry’=)cÂB'wÕBÀ~)§{ðgâ:>& Ô…xæ+9¼°¾˜˜FîˆÄ‡<¿Kü[ö&âBÆv˜Êåüg¡'À‚©S0…Ÿ·ã§g*šò“/ÞgózÏþžðü{&Îã•™–'ã¼t¿fêsmØœdºwnUcj΃gAµ m~$¦„k>r·¸óŸÍÇbu)¦>ü"ˆxö­ç¨;üŽKy'ö~K6¨q™åNÌ•6§kݶ1Ýn+ò\~ê;>ë.i 3‚ï:C(Çk=ý)¹a™µÖ9è.‚7ÒÓþvûyŠ£²®mÍ€iÛNTmrçÞGÃ@CP+GÉöP¸ƒm0ødn|† ‹‡Ö{Ä™Ýv‚ö4¶¤ÂuQ¸,\W 74M«/M5üówY>•ývoAo˶„c(¥ö3Jà‹•Ö¯Hç¤=Ñd‡ š;ºua1ñ/ú«rÙ)k7ã—Ñ´€\=ôäæQë=²1ŠZÀ|Kùr~?Žò¨²x¦Ô6¾#_‡´1ʰ¢¨ÀCmÅR5ªbU[Çñ;kÀ ^¦tD³1¹™¸ƒ/„R¼Tçæ™~>?ƒ+ƒÃÇyñýö%ßfÀ}Ë»¦Šïý­sáZƒNŒ;çbW÷°1‹ÅÊö÷¼H.ì¿=UßbÈ@”JÕÐ5Dà·îÕ¹çÄÍöYî-É.«B7Іs1‡Kù™ä¤õ„¤Mêx=:áxŒ¾wÜ:¡Cø”'ׂѿòßôüWãᨵf[8”>ѹ¾Žœ^,©Îï_àÓ¨·k½=˜-:ª½Û_îJ·_ÜÂí_2üа_©ø>ØßÇïb‰ýÍ«ÄqÊ•ýå˜e‡je¿úRÌÓç…nL ʯ/C7¢-vñž †xßòùëptûørç’LgwÃqáá?ÞÈè–ûYyöÃ-D‘~>úhž¿Ò=ž=Éó_îd"~G÷㟰]I_~ùÝÐÇï@ˆýéÍí](;¬°“ŸÞÐ~îïÜ{rrpðKƒ¾}óÛxx‡±Š9ñŸM'£8´a¡ß «íŸÞüÕ}œº¡Š¬/SÑÝðþ «CW­’ƒßÛäa¹“HìFª–—Æ,¿ ö©{’ó[%ïÞaf‘ÿåkø™NÎì>÷S<·Úíæ{ÊM~®µ¯šäâ$|MÓßîHç¢OŽé3TׯÉߦLÑüÿF?È0‹ßÈöO¼;1PØ¥>üã÷˜ÁGÁûéÍo|L3nÂ1­1è9êïÑ%žÓaa“wãàuµÿ.ý}÷¿àØLÿÿ#/]g@>E½¼Žz‰´wè- ö1øœž0ä  –w/ôf“ÏÑó)&T"€m²·ƒ¹ô@€™Ò“žŠ0ªÊ=¨ò¿ úéúPú]äøÁ›,™Ìý÷ëmóô§gV!› e^ù5cþ (gÝ>=çÈô>Ï×ÿÝg÷é·ò޳ýî]q;7Îñ;¢U € RŸbÉ\«ƒ×?å•;:ò¤ˆÿç„Hôüþ ÿeQ×yXA^l]ok)ý¤Ì-}‚¢¸<åb z€Ð.¾rjøT$ÊÐ.ÔJF %hêAF –‰Ï£ìðid1Nú”Á*/ê ´[æ(ˆhš eW·ô-Á[ƒ•Ÿ Ò•,ü´––¸îÓ¸Ôe¿X cW=ýã³àrc/¸Ü˜+vw'άƒPs+Ô¬ƒh£ÒÌà °¥€ pqY;H5;€‹KÙHö3¸‚yµ2š%†EâÖIÀº,`]ìö’áRKËL»LV»ð&VŠ\YRäÚ°ÚTð®Ó¦…wUŒ6¼+ä³sàwÙlvP—Íe—j¢üô›µ(Eçå¡~@”Ç‹W/¿#dòûJÅvp2òÍ0ì$É8(wÈ?̾ò†«cn&vùàéÅ®UJ[:ØV»BR¹Ì’¼¥®…Á—ņi¯ûšÎ©ûš†u_þhW¾gÆë¼V»mšN´A¦°j-W kcijÔY±zÈÄ•ð2ƒqq{}¼Še¥Û½éµÊ•Îu1ÒYÀ¹ÌuÂUè21 åÙ^DyžSDyN{¦_¬pò<Ç~±b‰²äg•Åîó1 Zà/Õ:+. ºÅmœÏs’ºej[!Õ¥€pyD·à2`볉5[Ç¢s°u¨¯gëøÛÓWkFÍ Œ®FÃbÆßÙ2eñ¢‰Å/J;v¯ƒÒváÍ£Ý4ÆXdÊ,.1Œvžsè"Óƒ81¸v[hÆà–Rjw`ÎP™2ÝY}85;ëJV»%«\ì)@\öZ_ˆ‹[ꋇ6›•¾x8mzœ8c¯HŸÍ©HŸéɇ½@‰&J£¾@-…[Õú¢À´”kbÀLÄë(ÚÌ ¦ß¥ØÎÖŸ1ÄÅy˜´õ?{ˆ°Zj8²vqbY”¥`U,ËÎs¬P®€a-Ê…ò«Åœ1»Z Àq«¥Àš…hio™Íi™ ©$êì…Ê(KÈ¢A´'ç0‰Ì  ¥ `ÒCsö"会ÌÙ‹bÙÂêa4[¹&ʳüUc áÊM ø–ºfß‚–ÌâA]1±f¨©½j:§jºþf¨éF˜¡ÂPÎÃ-c†Š†1)ÇXŒjNÍ\c1f¨LÁ]¤*S@h†šf,,Ä r•‹=ˆË^ë‹qqK}ñÐ.Ð •)œ™˜¡ìÅ™›9Å™›ìÄ™Eš¡2•hi†ÊP¨Y¤*C¹f‘f¨Åˆ6‹4Ce-4,Þ •5Ä 5Ce ìÍP™Še 4Ce(™-Ð •¡p¶@3Ôb䳚¡²}n†ÊàE𡲆53”½hù<§hùœ©¦lf¨¬Õd 0Ce«#[€*[ÙÌP ÓŽ-À µSöf¨¬ÕMÙ›¡²Õ5eo†ÊVÑ”½jaZ¦ìÍP Pݤß\?»º·­îΟÌu-…((ѲÈ.ÖuíÉ5ÙÞ)Kë›6̹A°½ ?‚"u΂å¥Z”ñStn·GXtÎF^LÔ¹åù[Ó{i!Ô§½nzÇš5ñÙA°HÚ³†`¤—®ïŒ(ϺóRR¤Ç^<8)Y_ dõqp$^{ñdª½bM@±2®¯a)Dš–å쉠XÜžFæ»Ã³ýîð<çîðœpwÈø¦šä[CÆÒ̱/d»)Ä@bXpÙîI`ð—[¶—È$gÃÙÞ3ΔÇ’5=Ì B,9ÄKÄ– (9ŸŠÅâ~¥Bàû`¿‹%öw±èì‹å qœre¿T©”Rtª•jå/¤˜%¦ÏËt6˜PP~}yºåh±ûûˆ÷8˜"ñ¾7äó×áèöñåÎ%ÿ˜Îî†ãÂü‘Q*ÿ¬ï‡“éŒÌÆH$dgª¨²¦ ]J¿‹ü?x“%“¹_àï~½m1ƒþâÔÂ2dS¡L,ß.ýQè}žž~ ï-SéòÒú¼ÜN@P€Ò9_$ ¸g¨ÅÒÔ«4 œ!ëyÿ&ÛF%sràÖ ´5ÆZΗu¢z m@V5¹¯mÈKØÖCG°ŠÚP¦ÑµyÙ[¦ÆµÀqÆ7úÍ‚}“ñ¾2¬[&1OLïEeýfÁ¾Éx_Öm]LSл4€ ~³`ßd¼¯ ëvw­$§÷RÊ}©´¸7ê/YA]Z©ÛæTç`Û@tŒ|ŸìÒÚàÛ»´Èí>[l/ èEBíØßD²˜óçúƒé¹°—´U„¦²ï’²–ê•y€ß(Ð7ëk¤[É„ÖWuÄÏ€Ö7ô ÆúéU²¢õ•ﳡõ }ƒ±¾Jʉ=ùÑ3½v"32O®œXÐs‹æÙhTæ&í…Ÿ&Â'嬈zÙ'ül8 Ô™ªS–¶Wõ*… :q&¡ë4,$c¸W§ºZÚj\!®ix)˜ÄoP”¶ó„Ò!'³îðóÃŒ¹=þÊ# Ví®Å@E ŒîZ~\ĪŸôÐ2”º÷³õr_‰VFsC-Sð-!›¯»M€ZÃ.ÖÆQ%e/á€97ÔÊްÔ½ Pë7ÂEƒmÙ€²SÉX¶çø¹áÕÓôâY"˜“Åæ اãžÌl/ÂÊ–„ç8þÎ °††{ËÅöD¼.(NºìRAœ‘¤ŸŠŠSvçx*^ ÄsPñêQ<Ïæ‘©]‚7¨Y“`2 ×åü—Õ«<þ¥Gõ*OóRõJÎ#sSõ@½&G¿¹©z%'¿y©zÉç’,èy™ç¾y)ycл¢S_Ly6ÏyáMG¿+XËŠ×Þt<À– fKÁ™Ÿ–GÁ+8¯ ‚“Rp,Àð .)Ý$‰íX”¶³Céí8¼ð·0øëÆÍ ¨Éø‚ûQ¦¦dyðxŸPoÑÐ[¦®ÓB/a_ëÊ¢·Lk¼î̾hÐ-Þ%=õú$S‰A·Lžœ)è*ê5·q®Í(6j šÍx§Â4ŒDXÎØÄ©0Œb£Æ ‘ 6q*LÃXæ V¾WÈKë6ŠÃü{ÅZc£1÷^±Î£Ø¨1Ì¿W¬õ0–9KwEn%ífg«=_Ø6üL¶ˆÒ*)(ƒÝa¥ðϽ1¬š„æÜV ~&ÛÁÚ¬€”;Á<ðÏ£M]ÌPZl øóïú5¼úl ü™ï+\Yì«\Yn«Árà·ºÊC‚>Þ"žý‹ÑÞe2ˆMº²Å&aMM ™ b“†°®†…lF±Z»Â’÷ˆÅhí–ºG¬ÃÖÕ¨°Ü=bư¦&…¥îë0„u5(,wX=aɛĜç¸Á¿!Ð/ʘ0‡2xé»Â*Á_)aÔ¿|F¦Ä“¥!a%ÔŸÁü°lù„lí Ë>¬úìyÿÜJàÕòþ%ƒŸÅa`M¨-l˜«æý+¤þ,ð><ˆ Hš& Hš& Hšnl@Òt£’tÐKØ_àeä¯[@RÐcQ¿ä€¤,AWQ¿Lÿ¤£Ø¨1¬«Í0£alÔ ÖÔj˜Í(6j ëj7Ìhë´¸½b™þþ‹Ú+Öb ëj;\ò^±ƒXSëár÷еúÚ—¼W,5 iq›…Þ ‘y<âö‰Uƒ¿1I‹ÜV ÿ¦$-jOX5ø´È [eþª÷€¹•ù«Ý– þÆ$-lX5ü›´¬=`•+`­’¶$€ßN󥵇zàorDRÒAlÒÖÕºÍ(6i kj[Èd›4„uµ,d3Š5ŒHZܱD·ÿEíë0„uµ*,wX‡1¬©Ma©{Ä: a]- ËÝ#–‘´¸Mb΃܂àßè7%"i‘»Â*Á߈¤EíkDýë‘´È`ð-êz@ÈÖ²ì³Á*¡ß”ˆ¤¥ñþ%ƒ¿!I <¬ õ¯sDÒÒxøð &"iúmòÅ>"I”¶‹H‚Ò¡ˆ$[Ž¥‹ÃHšÀÊÎØÃÀD€ŒáG‹†Ô6±²R­Q!:YÀiUg,RÍzЕ‚¼Û²Å5yÛ…˜e@Çf]ÍJAÞ€méxMA^À–Yr=xƒÂH2 } Êèk«RjbÙXM@¸+ǪÅ.« â¹QÀ¦G뢸­‘–†W[r] ¼Zsعñê8Kb®Y,®Lµµ´ P³ ×•»t¼.‰Á®Ú¥Á’Xìj0» ™ÀNUâú¨’>ª”L!UJ¯‘2] ²HÅTi.ÍT†[+¨tËhŽÒPe¯µF"Þx$Gºì­ôÜ–M®9èKÜZc1?Gº­ôÜšÎ×ô%n«æ(Í{3ùV,P5csÄ:A,è87ÄI{]°lIÑ‹ÄòÂx¶è…*n2àÖ:ÿ‚•àÙšœ× Ïö:#<Û¹Ã-„9—Rã9[ Fäåá95{^%žSòçLñ¼4)¢—Ä¡WŠéÔ,z¥˜NÉ£çÀtX³(¿®èÄ´‘Jù:7ÈÊ«zUx^2^¨WEKæÑ+Âô\çBLÿÿ€%„3þBÞ±…G¶EOÏ“áhvŸûüm×)MÉe­×k“þ‡&é7{ý¹8!Ín÷¢»ÛüÔê÷þß臼d(a½€½%ØÜ|Èߦ'­vDƒd‡} a¥Ý?ÞüåõóÝõv­·7s§³áèóÞí/w@Ná6Ë>ŠÅâ~¥Bàû`¿‹%ö7|Ê•ƒ2qœre¿T©”Rtª•ýÊ_H1K LŸ—él0¡ üúò8t#ÊÑb÷÷ïÙXˆ÷½!Ÿ½7d‡øóNÿ‚ý—r’ñçÉà‰ §d@äëdû¹OÈpŠ™Sà¥?N†³™;"7ßÈ™;œ HߥÌt8}(óÜsœ=çèèÞ{ó×áèöñåÎ&^ ¾ÂÃÁçðènüróè’“ƒƒ_îÓ—§Üíx4ÊáÈN'OøëO”%ú/h3Ÿ¶ß QyâÎ^&#±` P–g¥€Ë½ùm<¼ã]üþü-ÐûKt4x|~~Òoˆ|h¯ƒÐ^sh˜ô-ºàæÉužWøÉ]` ·ãÐÌhÊ-è¶b_2B# ”§òn<‹D³Ðœj #¤*@ÇFx@Ž&O¥„PjûÄv¢¨n2ž}Îù$æÏ×ÿóÖÿ9Uçkò7ùÛü4‚,h±±' ¯o|{è "ã6øÁTá,|ʳ®ó¬Zq' îéíàÑfIÆ,FÝŠTñˆéV]p_Ï °j†ÌzÅaa4F®¶i8'ŽÁ–‚ÿe`œJ'Æå’¬+ÃjZÐÏðnð4ø=ݲ1|§CþA…õÿû?öþØæ äŠ2…ÄÎ;T zë|?â±Vþ+-Sþ«÷ËŰüç¼ÊËø0ù>’w\¸,óÁèÑý–'ÇÃÏT¤{$; ŸŸ\º¨ãÉóx2˜ Ç£‚¨}9q»w䇯/?Ù˜]8JG. Gwdöà©èèÞ>Xå©ás>¾Þi´ÿ~|(ã1=Wï•Ê{G‡yÒ{ßÚktkפëNÝÁäö¡ Ê’ÿ˜Îî‡7…‡ÿÐÉ‘&‘SâåŸÝ§ßrÈ2Æ“;P Ü> &dgZçzN~vg¿°ÁŒ§î/³oÏnNŒíïø¹òHD**š½þ/Ý‹¿œŸv=7MÚyGqðV¸^Öèÿïˆ`vÈöÓàñq|›£-=mïÐâ;Óá¿Üñ½'ãl{Êz–È‘á»âOdø'úïÛ·¾b„½ýÞþúýWy =ÿ7@³3|û+ùŸwƒÿ¦RtoïüúvHþ‡÷À÷œÝ ^‡ ˜§¨‡¹…yZËCÓyò{^LŸ§oy1%b ×ÍÕð/Ðöx¾¯46.ÚˆFX¾jŠ¥…ŠþÇ(*,_!¤ttÕ9nž´:Íã :ùC]<eíĬízà @GþyݲŠ_K¥ç‚öœã(Ú‚žÿx˜×I.@ÕÁéôgQLŸ n»esÌwòÌ+$4ˆà"°'Ô5Î&ZVþòü8þäë쯻áàóHP`˜jãx9›á´ÌYy{uÙ¾ iàùq«öž˜1Ì\ðrÂKAS¬þßáË -²× ‡/xýÚ%Ù FɈÐ~#@Ú¬@‰·é·€,¾ß½Oê†îClØãìûr›kOÔÞÁ¥3Mµt|ñ%ñâ!k¸z¢OôÚY·¥“¥…ä‘nõèä%f í~/É Û $Í·¨½ F>‘Äòd¼/Ä(õaÛJDí=ÇäwLt8…Z¢‹ØA¶µÙ 4èw¤ÁhQÙЫª6ü6IM’¿{$©!>‰×¾’Yr2 ‰Òá ÷ÅPFoY/É$h#½NâÉÕ‡ž®†2K‹!ÍØãÜ+Á¦ ØRŠ ÿÖ‰vKIˆ7t ©#néòðûË£øñ’kóBrAœ<j"(X)÷†JÊ¿æ½?f¨æK´&¾¼¼¥¸y[²XP0Éâøò‰ÍÓð;Þçîð'ï9`à]Ž¾Ø–Ÿrâ„—që)÷ë.Û~ =DhW¤…(zUð.î‹£šv8:ÓN2¤ w-F¤ˆy1£F§­ j„ÊæÆ¬OõT7°¤`9IZrº*æÒ¬š€jOÍ@ )Tœ3=OI¬Tb<&å°$Z–þ@¬~ÿª¦/ÒÁ‡bØôÐAo¾Ë­¤¨ 0ªÐ’ôW¥¼ %Fµxnê’r+UržõSeh˜e¬äöx²Š ¢Þ»ŽëÙ®coN8·Ô0Jä0>)?ÑòH‹LÈ$=ô¨Üy‡ÚXƒ…âѤ9¹±V/BG:•`€é¥S¾2½ï‘éi—–Ýe.–‘µat §[`çÄŠÛEíA‘ÛOÃKÊñL,ÏZSlÉò´" ßM6 o)åÙ—,>ævÑ#Ô#ì¨?".HQòÅJQ²‘rØ+cú¾ÓÔ(‹ &±(‹‚Q6 vœÒªð<¥ë9ľìì[F•ß©]Ê\êËÕ¸q¼ ¨(’/Õ.M\-—£ß:È×¶÷JÛsó6º$óäKĺ”9à¯|µÑæõÁUÇÏÿM¾h–­èM§œöaJà:»Äöu_=ƒJ4Ú_%>X„f¨c´b•?kckå¥^÷ÄòŒœEüui–ˆ$"-—Ï6¨‡ÁH“­êÙêìG¡“9 ú¾üIÎ}¯Œì•‘­Š‘ÍÌŒ,ìësãçcÁ·ff¾öúy:ü(l*Õ+›zeSkϦ^ù.÷¥ò©i6|jú¼(/°gv7±¦×³ØŸƒ7È‹åOÉ`…ÅÅ$—¾ËŠÉ¬¦PF!aXy—uò9JH0YÎavÃ'ÏøÑ&>c‡–"½-Ÿuܹž–¯›Âë¦ðº)ð…—dWÜf_÷‡×ýØí¥D„äÑÌ·ŠeÝÿ ½ÿ£¼Ìû?JåƒýJèþƒòëýËø¬ùý¥=çh÷üõŽB=r‰±ûoÅÀs.Åü›ã?÷Âmö]'p“È“1ú` þyŽF0'½¯I´“É÷ð&xé”ÞŽ{ëU¸ E#PÖT§ÿ7dYíâíã:üÓˆVp¸^ä&Zs9ñ /¾V[îF”»±:¡kÚÛ­ù µò~òl!ÜÛN "Áµ&DÝkÃ7›öBplÑ;< ¡£·¿‚¯Ã¯;ÌÙáí0z+æyR!hƒ—ŒýX¾Äåiž±xsu=Wu5\‹~ê¦!æ˜R=Ã!œ·ë8Ä: ñ†ñÆbºê!º3ކhÖc‰ÃL¤g7ºúÕ‰h˜gM#&4ȈáØõÔ@€ð-ø–¬ˆfÈœµ+£’ƒGBÞ@4!ª8+ä<µL1«a)cyçÒ¬¼®+5R_Lô¤­‰A“ð]Òàé&Ïv$¨Û<ßp~ÒÉÐ ‚àE é=p_€nóžÌïu:ß6ì³k¿]›,޽Öq“L‡wn¬ê‡½…¢ì-âêïðw¢-*xL¿íÞÏ4»³Ý¦¶€ z®M h;4”‘åPâƒC‰eøÆÈ#›¡„vÞˆm÷)fÛóò·˜j#¶&|íß8Þ™`QxªÃubÿÑ0½ïiK’ðãoIQøñÜh=ÖoÆPô^ĺ–ö¢t¼‡á‹­K (!Å>”vÏ‘·¾±ÈÛíNc cÞob.pJx4L~2\Þûp˜zï‰ ß%íÁÉvï±çè#Žžþ’[ŽNWq”gŠ)â x%,™ƒƒëÈ+9Oͳ ¾2ÆÐ‡0 âX4íÉt ^º^B7ãdÄ’³:X2ê Ÿ®ëxuý;ç×:-Ë|<ã% r9 ÏÜ™;™Ò¿8huúÍ÷Í®Ï:W}þî²FÇØìûosðòÝþ¶ßdïvð8 í G¤1~z¢à›[n5j½fž>o|¯óäüâ˜>éðí‹÷´LÛ¯¹Ýý¾ÚcÚ™×£×OÁ½“Zà ƒß^ówŠ @rïåÆs¼õšm~¢ïHÐ4>4gÅ<ûvøw‰—óäC³vÜìúÍs|ÜÐmè‹Ônãâüœéöè:Ë{=†TDfB××Ì÷bµ~È@ÅÞQá`¿º_ªïVöäÑ»·/³ì…=MS»­~“Mxþ>ÛºI©HñH·[Чèo'8ú¸ÕðŸÑél{˜á³þ[£ál8x¤»2›ç<áãæh<.¤¹_É-]$Qn–ËoƒÇ—”XÚÇ]H¬‘¯ÃÇGù¸© >dk,ÐÎ; é"$”ž_fyØÛ£oylºžZo‰SÉh éR?þj‡6M÷Åge©íø¡RúÝ«fA™>†!(¯Cq8¡’'$‡è/4ÿ³PÞ&ýÍŽÿVÌ£à…$4ÛtÖ”Há¢[ Þ߇¿ðoÖÂ[¹¯„SŒ„Á‰‡Á ôY ü]…a?„R<•ÈÊ¡:Ç´¾O »»” Óã—:K0ÿÛ¡ÕuˆíÐuÕ¸èô[«&¯Ôë_\RêÁHN.ºçµ>Ém‘®ëm ¸çvßu§/³éV~Ïù´µ¥ZŸ]ø0ÅŸ[b|tp¤«z—‚M¥«Çû.ö—Úd2ø&óëÊr‡J]¬™%ïõâó¤í½-~T?¾èoí™K–ü’µO—×$ªlÙ/Û½è¿,[QÊF­úE1 ìûe{k—‘eü²îy)²ì¡„†ÞÕydÙ#   ¨²”6DáÖqí¼öikžMWu›±%±»ÍþU·â {[ŒŒïÉôåFH¢£—§w²•o•óGŸò5§¹¨•M"ÙÒŽ\ÛÆÅMWÜç— e:&Ô¥b )øBJÁ“ôè½GOè½ý¿gè,Žç„[>®9”bó@pTp`?ëüQÝÔÀGš]‘·Ûƒ÷©%`‚Lùtxðå$ç‚ eƒ€¦½b¡|LÅôb¡_»üÏÝÀßÅB‘~1Å^¾Œ‹¦`·«öè4¦3‡uVŒé¬í³’‡¼yùOþ—“ÉÈzЙܬ©Ï,F†DMû«²Âò×®úm5,‹yã•ÕZ£Ñ`<Ÿg º\èƒæ'/üø2%_†·ô¤1‚ÓÅ =CL(/OÜ;^‡2»;÷vøD×+-¡ÞÉ‚3gŽe C}[†·»Æ×Õ`e¯q<ò±ß¡rÚé¹3Òyw†g¤áåð~x‹ ¸ì$ÅRþº¥Ÿmì,æüâõúò˜Ò¯|Vxß/n“÷¤A*E¥@4ÝÀAÏTms”ÈÀFÏQÈ÷ºËQ®N™:åéTr—cŒ qÞ|¶-ý œ‚•êyŽ{¨T·¬ÔÈŸ‡ì+¦x/lŠ÷tÅáÜ£T–…m²Ezã—Ç»ÑÖRåƒ;qqßE9aKE(Wü–Ùˆ„6•¢`¼‚9Äy•CâåMOT`lÒµsé¤5€ Ê òìgÙÿYÍæ«ùüù³øiä½ÈÂ()œ_Rο„ Òû”;Ü6ƒåJ´[¤rò2ºÅXøhi‰JôLNÁƒü4÷†RºTD#÷hÚËËSKT-\¤´Y8WH’UXÀjf“áh:¼Õ¡€ŽªÛêôZ ¯a Ùº g½šÍvåmSÚDËþWÄÆ[f_ûýÏ´g –XW_ÆŽÕ¼ã2ûK÷elH[' 1V¢›`¯#¿Š…RT ¾˜ƒýVY%Ý—±‰ªèÈQk!¯áãýè6ØëÀ¯zÈþR¾Œ é ‹/Ùûë(º!¥”þ‹·C/b*ç 96‡‚DÄ”U“@¤ZG"1e1s$Dð¯–ãA±æpñel™8Š…OðŽi¡jေ ÇFÎÇM Ö~F«¾¤"nŽõ\ýÒ×<«>†oP‚òHI`5õÂ÷è°(XWñˆ™`ÅHV®]ü>˜wÙÛ1uüº·d¢AŸVœRZBó–mq߂Р«CufŽLmFᣬòÖȾ?‰DÁRô׈y‰2YoÖ2 ÿ+Û+LéÉô°(ŒgЀú ¯ö‹ ³7Uå ÇOxµKå9•šé›Ò•žr¼³<ƒÂq÷7÷v&ÉðiÁë ¤¬ÛvÀ~D?TömÁI›ŠÐ­<"à ûûx‡¥¥²j¤Óœ’žˆ„ :çÂ8ôÏä| AÐK9?z­Q„ô>1`óØŒø7tü ›ÒÍÀ ÑÚØr<°¢µy€=2+¤t °Li @ÿµ\Øu¶wªor¤`ÝcPYD âÎN-Õ ñTŠ~á€Qƒªœ)8جø×7ža7Œÿ8dÂ;ìäü‚ò,ñCà) D°¹$: g+LW²š‚ÈšŠýb‡!£¬¥î¢ôª»HgC‰RkœæÉý:£'Ó³^뿚¸\>á¿ýu~Fåá?åò•|UNñZúë ‡¿Œ[ÝqÿÐ/ýÉÉD•½Æ÷¤À;¹ŠøYÊ9T¬ôúŸXþudC=^ J™F§«‡À”±_Sôö?,u,ÈñE_Ršh* ¤VŽ }š«]ÀNÌ‚8KUI­Þ£dÓꬫ¾$Bx‚ùÝc‚Ó.üïD–½Æ²P¿ÌeF (k¹‚?èe*õ™+Ñ*žW‰8“}’ÌXBsÀO…ܤt$±bŽ×ý\;{¾.â(|„ô5 ‘Š„*êˆy P…æÅáÇÛ’Å©NT‰™‹«Ê¡Ð{pƒ‘ƒŠ¦"ý.³Ãré z(‡¾ .|,µ<¤úFµÃ¹›PZ¢Ã;Úf@âÌDg9ðu—G¶ªË#UsY²ÖwMfÝ$^ßiT‡ê&T»Ï£õ,' I©„BRö‹a›Óeá .þZã:iÐ,HqnXç©ÈµÊ)aÚ0,P(ú­þ•7·¯´Öc­Ñí’p5m An„Ê!XxmØmõ*]¡¦’ÇŸ•4ÏÊšgÖƒú°ª)¸¯+x )x¨yv¤« S¡’Ž.z­áµf„×âõ¡fˆ×lˆ%õÙ¾¦Åð¯Ùƒ j†x͇èÆ £ã‡¸( Ã(B4¡Pxú;l eù ë´,µï[°ªJóœ{µ”‡×º‡ ËÍëž_žS 4”çÏwCÊìÅ>Ù WâïZôÝ~°^ÅÐQÅÔÑ~DGûè¼/B»ï4@ÐåbêèÈБãDô$^Š®œ}%j?\gø° 0|Xt)?Ä•V <¬K^‹ŽäÕ‚OKÕ‚Ùš> uu-ºÚUAuŠšÎðqI_šõW >ÖŒ>Õ NtèŸjF'º =Õ Oô&-ìRQË8¸ÈÈ9ªFdäŒH~lâлÊL g6¦³mVóSèR04<‰××Á×Ün$Þ÷QPÒñ›è_‹"Îah‹ç}*"ç”y”ÉFø®žç;ê6 4Ãäœ*¯ÍÀ_ 'Tþšô¾DùRx«·¿JñÇ ÌÆàc4ÊFÙ¸k)¢œwÛ^ÿC«‡ €%è²ÖmöH­Û­]÷ÀP@Ÿä¶IÊ[,Æz›\œ€Èþ¾ÿïè_ðVzMt. 2:é7»ç¤~;[''Ín³Óh‚9êªÝo]¶[ÍcxãÉÓ¾š¼‰Nó}»õ¾Uo7¥« ¤Mÿ«}ì]tòäô²MÉüè BŽ› ¸^}'¡:ÍýÊVê¨kõÛMɇæ\»&ù´²0ƒŠß8ð{Í3 !HyuYˆI¬ž Ý”½ôNf-¥âeÜYm£0ÜuÐ;¦IÐ|×»ò@bìô¸ŒHÈÁñžQ euoñ/Jž;½ãí¼òjÖ¨Fá͵†wQäNð¼ø| K¡€QVÒ¹è“F²[Ñ6˜Ê5ÿ))ÐrÂn|gý•Š1½]ÒIë““Z«MΛ½^í}9»i]:ü‹4'5z•žÓ6hÞJGóà»ïæi휷ò>u‘À}},–´ç§ß)·ÛÛ}G[ÁKî÷¶&¡ & $€(ÅE|¶J¯-"f;ê#®äv€pCˆ†é^þd‡n帛»ÜªÂW鸼_8¤_N©P‰¼©Û»”‚ê°mÔI²ò¢7Zu¥ÿÕ:Èíº´‡¦ ¥îZƒ‚rq\£”›$!¤ãõ»Íÿ¼ju›çMJßýtä?Ð͵FޝÎϯéùþ _±pwÎC+¼v­Ñ¿¢œÐ+v~Ew~Jû¤Þô*‹®ÿ»Ùƾä}V³Íâ6»/ﲉœ¶îù>Ϧ ÍFÈ—ÊÎvr×(¶ýë5yýrðrÑ·àÛB:Às†ö\,³ô-I†0prÕiô½½“ÝÖ›@†„J‘2$ƒœÂzE—–:ˆ‰ý¤su^ov{$ǹ“f8Õ´$¦~Ø óÖ÷c±Ñ½ƒw¡^$f¥Å,nêÁ)›yì1ÚŠá*a©\8f Ù˜Çϵn«FQAy<Ç]¶Â ¥Önõ¯ ‹‘¤³óöÔ¹lfÙˆÖÌëSwCîR<ìbÙ ³–PÄPǨÒZŽC1Ž‘àz r‹åb’{Y˜è¦•ÛŒB›Qbk¨ÌF³Ë‰H_#NŽ“È.4^ I-ò£—Ôâ@@Ƴ¤'š•Zåý|Ë)É"Ùª“¾~–þÑæÿ,-1ÿgÙ©:EMþσ×üŸËø°Åïåÿ„ôŸ%¶÷$ÉþY"õÇÁ”ŸMú®WÌÅw“áo.Ýf¿‘™>Œ'3rþç÷ÃG·€î‡ZÒ9 ÷v<¹›BCã{ì ‘ÁÄ¥¯wäeJ •<§³Ý»!-‘žô>ÐýnTd''­6JX»NpS¸ê´ú\zW*B ØBûäª×+<)´©0Àr¾ˆÚBº:i×ÞÓ“*ˆÝæÇ¨‘•†j' 56kô}ã¢{\ˆk|lè¾yEåa¬Ð¨€jðf·K¿ÍO­>«PŠ^‘ÆEûê¼³{^;½èRÑØ{Þ½ø(–¼‡õ‹þhÇÙ/0çþ ˇ‹ö1•ÍÛWìøƒµkôô%÷CÛ®\¬Òƒ_¨(ÒÞKtÒ«äˆ ¨Xµuæ·%W°k ¥»’xö“WyNÓK)y/–U„ÙÏu”O—ÇãÑgˆß'§ú_ž¿Ædü/ñç‡ÁÓÓ`ò;è„ ñ‡ÁhJÏÌ]( % ¦(Ý7gîè²O]ÌÛxB÷ú Ý2!#øð–´éŽ:…ÇÏî í²çeuœd˜úîíÃhÇyrî>iwÊ.ÆÓ©8$7qN!'Wy:tò´à€n±tWÞNý†à­HN6!½Û¡;ºu阇¬Z­6ù<賦ù¢]¶7´ÇÙxò-ï7tt@…ºÞøeö@ƒé”Ô¨8óâæEuzØ|¤8§d¿X)åé^«Œèb"ÿÕ©½—ÙuŸ)š¦ôXWÞ;<@ ûÝ ýIáë¼<¹,E‘6ž gOÒøÞOÆ/ÏtÝò¼Ù†;¢§˜»¸¿ÞRÈH©ºøݼL¾ÑÓïxp—'¿SèN‚ïS‰Àúê,ÏѦë™wØÝBŽS¤RHÙ™Îü†z³‰ëRaŒÎípæ‚÷\hìë$!Zû7€HBVÕ©î:¥ý2}zÕ«!ÎP¼Ù%i—”šg§¸[{ùLeÉ]çèð@Ð\wB– Ç<éQ°‡e"§¢ì©;qŸ¾ âÎ#ª¦¢ Éj¤Ó‚¬TQj5*cÒBàn ¢G9•tzWõ^dkP¬~3%ë5»tíÖÚ'tª;ͨ¡(sh¨‚a­Jx½H ÏkLYš¢`§ïUQœh"n±¦;­cö}Vgßµ6û®7ãºlñND+ô÷‘h Ÿ³êÿNÂÖ ¯Œ]Áo²jXDš“æe/Bmž žæA·(vzT4ÂÛÁ;8Øì¿ChÃW§cˆ¾ß­Ñ³MíÚ'ϰ• åýZ›ƒƒ¶"74á;ÚN£ÑQ¿×ÇqÁa I¿»ø-©~ºi¯|·Öé…_ ¹´'Ÿ~è}Yz›åZ3µ§S$ˆmʺù“þÀI’=r,ùªèÀ9l4lhduóX&ܽ¥µ’7>œAî<úUb_eöUa_Uöµ1aã-AïñùÏ©o"àWÀjQî#HdСN.úlÛk˜ñzq1µCç’×ÚZCö óÆž B^êJSûjì»LpO§Ä½-ù\º•'[ò¡s+bö¶ä£æV~K>FJ>[6ƒC¿ÞLmffßÌTmFýóyj× º¤Q|Sÿ|žKÜÐóŽR¯D¬RÒ {ÒGÁ¾(D¾.èGTäCÑ÷e4œñ4ó¨ùœŽÏÓ‡ñL$fIZpüLåSTbJfþÚqŽ †'ç îím¶e™ßÃVÍ_ãf ÐÑ…÷M/^¥Ew önƒª<¼\\6;9¾Ù£‚îÛ! ‘,;z# õ”PXf„©Q§dâ~Ršô¸âÕ6¡~¼×Á¾ µÎqaŽï{:??c×#Ì­|?>¾L";e≹iË-%HDíÒWâN&t2Ý߇´CP ÏÆ ÉÛ‡{Õp’ô4Ý}üJ{C5÷ãà¥ëþ¸Ìèoö0qél<Þ±üÚb’ 92#OԘȩP;Ðïíƒ{û›y‘½[hü¡}épþ3{K;ïD4åâß uv Ýûë}¿ •¢Ñæ›Ãõ)šD3æöVgKñ~inÍ.Uj7ÖãÒI æ·‰Iç)zàT^ùÝÁh¼ú0¢"ŒÈ£ÒŒÉ8,zŠë(ãÑ I•#˦ä,jJÎêò„œÕýé ¿l.ÙNÆÙ–8EÍ5 *SqV—'ÂõÀÇÀkâ$Xã»J¶íq]ŠÀ5ªÌa ¡Ê<ïô½²: i!Ð?pèhÆŒzE]k¢3÷š ]]†<å¢ü&° xeæœL>ËÞS¶*Ä Ârž*d[Ñ'XÌV9b¶˜"b–èOž%ú§?KðÌ?ÚÎö¹%”sÍ@•Ù5Ívh˜h‰eÝ;Xã(éŸþ(á%?ÆÚŽºÜ hbÒ OÑÊ é¤·k–TìOt´Þž5õÅ/= e²ù¾d³-è«:qLL_­¿è´¢è!ªâaŽ}¢@O1&ñXÁ :ä‡ú-Q‹Æô8;ò™È&‘¡*äœÖÐ?/9&wÉwñzCócÎuÞ‹†énG@æ€Ó‹ q³«ß§cݧ®i'ªéb°éDPGOàŽŒúÐq ×éTÒÀŽ ’3ÈÊÃÛOvæ…Mÿù¦‘ѵA7²Êœxj#Øx”IøV‹a6D©µ’ÇQ¾ƒk¶™*RhGý¾÷M}Ãb•íB1…{ r²eÜñ™c8®W¿Žö<¼”¸bá ¨Y–uT-2Ì…¯F`Q?ÌFáO7˜Îšäàð4¸} ÓGžéi gRtz'Œ G¡%¨Ë¡5á-S„ÃOÐú !‡LÉNi¬ÿŸ½oïN#WöÝÿî| ­Ùwo?Bl~Î융c°àI¼Ï=k¶qÂÄÀ“dŸ;ßýªJR·Ô-µÔM7„^31Ðz”¤R©TúUi»æí¡Y¢cŒ¯vÙdYà_¬,R¤P¡I¼†{û¡qÿqл<¦ß€…ÑœÆDý¯½;†_’Œ „…T!¥¢00K›‰¢m[šƒðBÚf(zNy/Åñ„»›ÑEüœ¶‡¦dÓ²-MÌòû¿Ýˆß ·_iƒR5%™7^æóíM²É¨ [Ûô·Ý"¥Jùm>¿Ïô-¸<ÂJÙÍ ]ì»™õ%ÝØ¾`\QvtvƒŸo(¯œéŒR7ô½¢ÃJo1‘ê?¿Ëð&æ‚"[á’FËÛiÕø´pÐÁŽè††7‰Ú€¥ÂË"…aQd·×EÝø]ôFî£7ße'y`[éSÙ'-a`ûž¾Œ‡ÊbT„‰ gvÚUÁ¦7yírKÚâMÛ–zÔÇKÁ„ Gz›RTO”ø*Òñª‘¶RÁ0@ÌÎ,ŠJ™&ÖI¾iM¥í–ZmRâ;~j±¬'°WwèJ0wJ$2yØ€ pH@xø ï¹¼ÐèG„Ÿ˜ê|U‰X˜6!ìKŽÿ³¯þsÀÿÑLâ¿•ÿ9„Žø?úlÇð¶ØoᦟœÕš¿Ò•ƒ²&è_'güZâ Þ… 32Å0¨.ŠqªÐ(ñIÎTs1Á™AÚrtÌN›…``GÛü€û¢Ç„âðþ1–ÂNÆùñ8œ‹³Ãq “€¬ ÿwÌÅ€tƒÿ;(ØP u„p#y=“É?A÷µ×Ý§í¾‚½û„WR^þäW8 í0ÖÝãœÜ¹Â¯ûÈÉûñ8¹øCEjœücvߌœÜm#¿À Œ“á#ã⣜’ƒ&îÀÏǘ>òœí»Wøµ€líKs7o$Ìãf!M÷ÿK”ò׿þ55NþnzdFæ<ôVi‡ñ˜jo>]èÉ€™:Q;ÁÅxýœžèü!zkæ5˜±ä­óðmYó(kîÿkþ½5;kBÚ}Ÿ7áë2çq<æ<øº;5æü!zËÀœ`m*æ}îÚd´#ÈŠA¡g Äcâ¢î¸¼Xp`ëòKÙ`·štp(Ÿ\cȾÓòu£‹ã“¡L—“w”çëUQø„#àöΆBN.ÆzSß‹ñЬˆ~.|ÃC©ˆî#ŠÑJïxäŽHPªØÏFðÆ|pÐß` 6墊¼p‰ã® Úxé€_QM¶e[•0¦luG»³ïKqµûqÅÌlþÓšYê 1³åÇF™°cÿD…ƒ¤Ë/_ƒÞø'†¥FSÑNYBŸ×H×Rž&¦W)Ix¡°ÌdBûÓùU…‡¨Ùi’ñ†‘®ËÞŒfJ¶`Þì…ÉSÍo}ŒxšvïEÏIñ4Ú•Ñ仨…/¾ƒ¢¹„õ\Œ`á¬XoʾŠUÉS1™Ë/ê+00ìfú±Òhú'®‘â(AÀ…nGÿ¿¸¦ÿ€””ãßÈÂ0xæþÀû¦h*È@%ÛEþ§?@y×poá“rDô-$„Û¯!âý–ÒäH‘rû >Á ;Y5½@ØwÌS Vµ-äôº)ñ_:ëa*3ø†áeiMþżg5~Æ"|NUï‡ìE©©Ëþ“z/c íu ê%WGT½§)º’Ò6‹%8ì,*' WäêrZåX€*‡œ4Ë+ç=ê•?»w¨WT¯OÊ(»­nÕÉ ‘eΰšMrü39fhȾÀc±9 Ms»Áë9é?€z˜8î¬áÊÄrâ#›7Y}ü ÎXÀ+‰hÆ8¼)ê6¥W™H†)|_• 48]ƒø&ó Q‚OáeZ-­¿üÊù> ~‡¾iíy¸Q•‰Uôßð®¨7l6–5Ö²EõkaA³¥Ÿ»hÈ Ð¥% [u‚÷ã™z—· 0GŠ£ÑßΆ j²°UU\rvq-܂娢fFúIwMÞECôMðÆ‡¨*®¢pèð‚±`ÝK Æ‹@d£)¬Q€‰+§ Ç?õ†äi0<½Ûƒæƒøˆ¿ô&dÚûÜ’Áðn4†cßÜD:b^^¸r¯-ÞHÄã9†} µ7ÔBf˜ÀãØo~Ÿ|Ál`ë¾ûÔ~Ä«4&ƒ{yÁ˜8t?óÀÕÎÛ•+ç/–¬l‡ÕÄ|6¥g)aV•mœb/i¬Š=¦K5°•ÍϺÏB1€?3œ™áÌ)—]¸‚pÀ (cåvÝËÏyèW ç‚ ûF‹€&ÿæG ï%³+Q~ÞcÌ[a=æ U¾16~eT³õZŠHÐÌ\Zj¶ßØjjöXÚßÅÚŠ‰7ƒŒà¹¥-¡S[MVµÀ#šZÃ#†ß¶¦%]÷åIvÁXƺË"Fw]˜Ï¶í"‰•hRÎ:©Oêã'5çàd³šóļ¦uqnÓºøãLëRüií¦FÔ˜*—¤l …"GƶÍR €èÆ}5®†éá`Q'µI 'Øö.ãåc°ð®à]XwšnBƒô3ÅÜÖ„âadݽ٠r×Y™¹ŠêH³$IlüHÔ(NXÈZÑÀQ, ŽûAïñâå({Ÿøû¦‚ª„Ó‰WNTìˆÇlàé¾€¬a,û(PÁðq±zTÜbÄáÉ—þXH˜óöR„P°ÒêyD9Î7gV…EÅJfùÑ$Øs¿ÇnÄó÷/ƒé'ÒÜyìïäÉhl/¥…IÝçY!Î<Û7ˆ?<¤:Ð¥R“†’¨ïò/ó†7Ï·Mý9ç SâòªÑAƒ3Ø‚1. *z8(ýð†df-áÑ5˜GBä€w5®’½`Ì^gWUgBŠRIKš€ ’¹E¿àìÆýö Ž~{Ç"b¡4º]¸1°oÐ3{•¹n0ÆY6(Æ@ÑÆú¹±P©;d€ÖÐáÏÆBÄ­Ôld|tŸ)æØÍÐx߀+‹ï^ˆóCvúó=v6˜£È&8º€r Ë\gKõ(ª\w™ÃþzóºÉ|¥B˜Û w „RÞÎuçªVíÊŽ'A/JÝŠã²Ð]ÐÑ î”öÎf®;~¥¢aKA ×8{êê‰èçz‚Ž›z"ßkƒ{d¢/¤ðZª¾ y}µÎjè§wÙnÓ¤;2ÕçHqòôÙ‚CvM•ZªÒuÈqœÅ0«eè‡<è1*Š÷膻j)Ñ—-¦².òœ$÷e_/FýÏ^ÞM×½\R’#{T—ð3œàŸîíá@³„.`‰r»T0Ím|ȉဘ ’ÞäBŽ£ô÷-²öKµRVŒE‰ ~V³nyÂÕ£¬dåHßQ{ÓÞÿµÜ¨Ÿ°1ز‡?œA6ðMsð_+þèþk,®ÆTóé­ÝؼgíÆ¶vc f[»±¥âÆ6»ýOv[S½Ó"&…ê«vßíª\½€ß$—´ð\\_5¨˜£\Ñà}èµwQûá‰&{ž?Z‡û¥±ŸU\«f÷Hû1œÑ®iø¢}sóEciؤЦyNÙ_Í»ÀÑ_-ä߯i5¤?ïÖñõ îmW÷6ÉNy ö }=Ä‚Fžo˜Ö5Ìvœ{áù„£UiÎà.´>^®ä\6z¹ëVÈiKG.KÛÒ¬2ÒQËì¥ÃEË꟥wÎ2³9§ =·¨R°Í<Ù E{ß<¾4ý£x€N`Tw©b°Xm®R`ä„t(b®®LU^–ïÆtýü܇üL>LáüÑ]~ÃiiÖîmL¿@cR+þçâÝf÷BrpA ûióbøÙl^GîNG!#W‡#DBX`[au42x¹8Eû˜9¹¹ù…‹ZnM®NEq<Šâ¹Åó%JâHäìEdçU~)û)rÐ}HaZ×BÜ\‡œý†ftrveÀ=¥IÚ[é ·:] gOT G¯ž˜.=±üy\` nž<ñÜxbùð¸éê½ãæºÏo'–ÓŽccÝuâûêÄB5Çò²Ð»X”â¹XT™j¢óÍq÷—ˆã,áî)ÁÝ$À¤à»IɵèO꣺ÙÄòtHŸMa4c:\ù£yaP{) ñŠvĽ&¹¸Æ¹Êu^Ì_á!qX jËxR`˜>S¡ ½Tsù*µ¹œt;àeÓòUŠë¨d¼õØ’/þ“["Š‘ 4—^JÉ3IqK¦Ù•«èê^o–}—bø (¾±>I®ÞO¤x®ŠËD·ÅUÂÝå ®¿Aد"†¯›£A°Š°“AlÇ^È8ÞN®!?7?DÊ‹< .˜ÿNþ?ozã%tÅ›É/±^B¼„ó&¡ï]¬‰åuæd?ubzÎ%Rú–aýHÄÐIúp> }”=?­ÒBp“™Ô:GO1›_ˆ“›˜³Xl±ÄÞa ]Ã’ù…9Ù¦b{„e¸p¡3†X|0Ùÿk£å§ ï/Ï¡+ÆDU]¿o®¥˜¯QQˆ£ÓWr¯ܽÜ|½b9z¹1¡ÝÅ+¿‹9m¹M†h‡-«·–ÍU+ÒOËì¤eðÐÒ¹g…|³Ö~Xó÷Ã*åÃnp,A~X%ÕÊlÔW&c:>X{†ºU´R±ÍL¯l;¹€iÍŸ–VK_a¯¹µÇ×Úãkíñ¥Tùx| O.ßGª|l§Õj9º4Î\Iº#äÄæDŒB@q¾Þd.·¡•æâMð3–bôc¶ï¶Y­_X·~aÝ ûsÅþ´;ìÇwë^u~e‘(s1|±r<ÞC ¥•³°ãÞ]‡ÒÈ׈u<ï­.÷Þ‚Õÿâ"Éš*.ø üÐY{x9yx]3/d½.¿{ ¿œÐ­4|¿ÆËéûuäfM¹r¹y~†wíø:®çþ¾L/ÏäßýñH ÅAÂâ‹n ÜRlGÿ@TWmm~þµoÙÚ·LiçÚ·Œù–][Ë®Á»ìúgL»2þeß¹kVµK\n'—j÷gL¿¾|L)ÆêU=10OE5EÖÉ'ÐÉ'?cã Td2Ø»šÁ‹ö;zx=Lu­ H‚TìQT䡾«g#{¼ ¬^ŽL·nŽæ­ˆxé=?G_O^4Êh<ø8¬;ä³ pɧ¯%1‘ DâÄ ¾ã×vŒ®r–¯ì ×¥)ÀRñ[⩘n¯#Î`:²ò¿’²ò‡³òi™2ª—­k_B`ï_ÿ^wç^ùkqëµúôºÈÿ^Yþ¯}u×¾º òÕ ÿB+hÐMÏWWg?Vª7V=³Ÿ®½Ùrµk?ݵŸîÚO÷ðÓ-m¢o*se—- ÿÓòUŽú!‘?®®Å37Ðz—Ü8þ¶QͲºÛ.ί¶äàW»—¥_­t9c:žµ¡›嘜oϨ²öª]j¯ZÎ0²oíwàV›ôêÅ|lCî³æìþm‹é]ª8£·î¬—*&ºI±iºAQº(ÑÜpƒâ¹ñöD¸æ¼Ù‘|wÍ!óŸD;õ27^Ù}7ÐaAXM½ÇÃas«æ/{¸ÙkvvÏWúO½µ’î¯F/U–ÂÍwÔwúLãʾ¢«[¥ÿ;øûƒûûCÍ4›:÷!ØêÖ›hq[¼šÂ N0Éò랟»hÈ]o1<œ¢LÀ©†êän‘̱"hyü¶9“¡âL¸sÙÞiÜ Ñ§5Â#ÈæòÿiZî‹‹pû‰ôö±8ù4%'Å'ÊäJ­¦Ó•g0:ÍhwÊÒÍh/½sNÓêœá&yl½ã¬n¿àL{»Ùv(YŒ{ÂlŽqéÝ yØì7ØÝ¸¸^ê%ëMZ7عÜ$§»F.5ǛԢOmŽc®÷¦9Þ˜MŒâ,f"Çéæ3Ç;ϬäXï9‹ã¸æ•vº³)e,A¹ÇˆmžŒ£ë‡¶¢ 9ø™ªÞ>ö¹)‡õor¹ä‘QÌ¥€Y˜ÍŽYv,Ï¥Œ97\¥Û3·Fã¯ÊæöËèä&¢Á âÌýö ˜W9„op°ƒòÀ Ž- ü³_…ÄÊÙ® pD"†¥XÇHø¡ EÊça'ÆealÖéI‰ZîòѸî´Að1À ±aq1–e*0eÁMÓÕAø¹=¾< Éí7þÉætSôø¼l¨€Ì ÂÎ{|„™¯ekÙ0Ä<0G|Â@I®³pD+ŒÈq%0CŢŜ; óøˆÜ8ŽFPÐÔ6i ³B±åÒ`ëpîwޱT+~,tFw§\¿ù—8)y¿ºIYÂz›PÎ9ì˜ q/Ñ¥ŸÏ9E7;Qt-$ÎsC®ž®„rì0>Ö:mµk"ÔŽŒÆVÐq'KÚ£(8ݱ NÇqs¡Ÿ 8]q §[œn?oi¡aç,Ç^2ž ØMÙÂ7“XùþS¶Æ”­1e«Ž)caó q)·kŒYjëmÚ)Ž! (vnÉ,¦¹²A†g© ”¯C¯rØ"¡G› ¦kÏÓµÿƒaº:7mvgBçj îZƒ»àYƒ»4Ù×à.îª.`8§ÿÑ÷çºeîÓu®½ˆ!¯e¹|âµ\y<¯¹ƒÁýž…Ðkùj…5J,x«]=ðÏM{}O‚Ë= ìš„ÙoCø6ÿ›¼»Ü®.8P÷Ìi àòÎ ˆßbÉB1ñ£Qd™+~,<¦‰x¢Ãë>d‡Õ¢Ð§"н4èG°‡Hãh<„ ´Ç®w \¯³";E¬×ŠqÑ ~t'JXbw”2ÂNsœ2„]+!ÂÎ ’å€Ç²ÇÁNþ:awhÅ`…Xá4!8&fŽá"j;”%\+ÜN„‡XÄ wBjy0­Vl˜– £•@Ë5ÜЊFg¹A³\pYÇ‘ˆ,'8– +‚kÈngÌ“Kà¸ä (Ã%ÎÆR[ö< QT¥ †ƒÔ¡¢ü÷_‰| #¸VZP³X½š}‡^9u¨L§šMæ%‹õÒ«0$¢½³E·Œ×Ò<96 3ææŠ¶Ð ]ÐÓ®Ò-沬ÍðDÊàÁãcã}1‚ÓBÙ ` l8ÿüù û*¹/Ю€¯Ô$H vWr„Wx— ñ3&#Z`W‚0|‘X$÷hf21-Ì¥E›YˆŠ€2ã.FÿŽáϬø.\õ¯h¨ŒÛ[`ççfªØÓ€çÆ6LØytvQ‡3W  aÄÀD-Ä6’-öó{7|V!ç¡ÄÀ8ž&s–0›#>´;cαç`³„õuÀ*eEŽ3qºé"’åüº"øÖRGˆ]*ø:Gp‚[‹§i™S§ÔQ­<DÇ G`W‰ Xé''HZåQ鎴0:Ž» !îÖ0ºÅE¥+ĆÑ-.Ž]bç ºDAã ›wÍ&}n ä[ùÖ@>°ŒOD^Û ù” ßUœ(o‰j0‚‹ ó¶ï <øñ EX\ƒ× @²Î˜#þ¡ÂŠâã}“„:  €‹8Åú!P„>S|—(Â"‡×8ÂïGxì¶GÉÂ{^|=ŽpïÆî-ŽŒ?8®‘„+Š$Ä€™ë`}ßk°>Ä–'ˆÖ§M–/JæáOµŒ>4‹Y¢°£öB–' eJ֑בõ幘;Þ­ž<–Ä HœÄíu0[txÚE!hÉ ÆÔ&ŒweíLC[K4C2ÇѸÊf4,áå–6àcÔõôñâªdðÑ|%}Üûè×qW*î£3ì4E}¢F€tû-Ç‹ŒªwK2ü¬J,I«Ì‹¦g¹cIÚŒ€òÍqÁ;ïp’1ikÍ'¢dÌ8‘Ü3N‹Š" $ïmmø?§–Ø{û()mÞ>ʨ·£°ßDøe»„÷Tàò1£)uQJm‰ÃlŽ+vœ¸A‡­…g9In1‹*”{Ó9̧ŸT)w-†¡B"PåÎô8‡uÇ–“¸Db ̉§K´±qº¼<¸=šÙ¿»›Ò÷Q«8ž_dW[1…ðcµ{Gg<œÛ¼  Ñš«¹dªl]Ç]E€!rìÆ{*/^óJ½Ê_ãpšY…$#oàwZÌ~”ph# ιJ@TÑH.¹4ú°}žwlx*ܦ‚V’L·6Z¦ÂÅÍ„'<›àù"HTÔ !׉sáªÌxéµlí6u°i æ8kv1Þdr!Òo—/æ 9£p7̱½4æ˜v)UªÙŸmމeç›lûÖ*Ò™lº·pè8Ùž“í94Ù`×J|wBN»I“HºéV íú`åq¤ö‘m¨y“ñf>â˜À»HõŸí4h§Fï44Ú´¾"Ù°naA§}ôÚ‡Ü+Þ1ÓÐ…‘  V#)Â8pϸTÑ3z‹+µnìÀúvqÂÆa€eÕ‡õ#Í_…ù!q 7ÊáëÝèéùe*ŒyxĽ6+Š'Ú¬èUËȇ ²8±SÓ†éð¼0|à ažË…£ù+O¢1"/¦aÿ;Ûd?ð¿7âo—}0²1ºâ¸Xu!NÚízD§±©Ô`xxJóü Ó¤aîBèŸÙCPÆþÏv×ÙÒ•¢½¬Ì€ö]¶äx߫²Æ=PZAyÙl¨’½ßÅÎ]]$šò+‘TdUŸŠmh`šjIÊv=͵ÕbúB8y!¢æ}ÍàÛTk¾‰ªù&Tó¦f!Jª(û¤ÿõ¹'9N<RÞ&/À £%¸Âà±ÒÍN ÷ÇÞËGvªt&™d€’‹¿e+‹Ô˜Bà²t~?‡¾½P@+z6!Â|*“ò-Õí}|íß¶ét?׸iø”ù™ÀÉ6”3œ©§ÿ¼f<¯#va!Gµ»7ˆ©$I¤ætjn·'ÐÀ$‚qÑØ–Ȇåcû&èîÓÊ(D/ã­íár^972%ýç5›ð¢¤1L Æí½°Èm¦|LB`1À *ŒªøNÈüR]¥¾ñ|µá;Së%¯m &A—Q•åÏ0/˜®qWUF^ üÙ= øa0Zì1ÇÒn©…Óß¶AªoÂòMk9«í€b ‹’Á€‰ °ŽÔ¯ÇÑè5°Gª®Lr|G…~Ö÷IoJû=ªp}ê=>ÞÝÝ œ”xÇBXÂǸNÅÀzT&=²¡áƒ@·WZ7 =öêX:2•<AK»´ËP1û¡äĈ†ËøFÌø¨ÚbÖ‚äÕ·t³ñ@™ ¤&€ë?@ƒuj'ÎBêÒ¨u:¤ûŽJêwåF qJÊÕêu»Ü­yH„îøOíàpü9’*XŸ"ŒfB ËaŽÏ G;0ìatû¡<^·ª€?bÎmpÛ†°[è€Êú_Fì0ƒqùàž*Aªô­úÆÃ¼Ðè¨v2å»]çúuû<ù¹»³Bªh«Hábˆ ;©1 Å.•Çc¦lH,C)ÏWJ}†Ó2•;|yÒïÉiJWNE§I½Äwç<0&sƒTùwÒd ¡|:`êþÿ¼x À÷L¾mÊ¿hOú¡8^€[Ì›ÒÌ+åMV=C¬tüoIa+Tå<À]Ò¡DN6]ô'¶YŽpµ "Ï`óù*_öôY˜ÏK„RëìmžêA *zÑù(/Æiô ØKKVüL}íz”i&Ÿ‰‚çì’KoØSI^½ü?Êëµ,?¿E,4±nÉo9P06Aà阊ÍÑX8!/ÕÉOßÈý`2n_`E¸íO¿ô©0~“ßÙG{ýûý y#01¡P­ÈAªâ3y¡,ýÜò¯µ O‡sÇ5ÙP¦pÕ,Ì ò óF÷¦ƒÞãàß°»ƒ¶ÿÑ /¬%}š0GŽeSñïņ¯rPJ"Å¥ôŸ„ìU]ˆ:DõŽþð®3€ã™é§:h·£—á½Ä¢äOºlA7¥F=—aÅ\)w;eå8‡Öçþx0º'_t£zÛ§óÞwòió÷ yöµÉ ~UhÅË„6a:"·ã~ï3yy–óÜ~#“σçg0Ìx,MÖú!‘`ìØ¹õª8­€í¦gQ†¯ »óùü6•Œ»tR“†¾^…]ÿ~èø/0lâ¾u¼»áb\Ƕ¤ûPT…ç Á¥º£ycJ¶“ú)¥›nñn„d[ y”ðÚᾇ¶Ó®þ¶¤è~…”jÇþeý¬êS­4Ê]`h*vï~»¿}ìMK;iÖAÃÁÞ¿‡ø7_dßé‡ýâáÞ)J{ŽýÃRä ûûùÒ_H>M"L“½1%å÷—ÇA?"MöÕ+ؘ<ñþ®ÈÃ&ñUûò¬]n‚ -wKþN^r°ˆ„Ä “|%YòÁ+ò=Ñî…ê~<ø£Ñezdò °x(ô0xìï`†‡Á˜¦,”ȸ7ßO  ~ú‰Ð\@W³{~¤ôHÕÉ7÷ƒ131 †Ï/S¶2>‚%ø@.%K¤:ioJ6Ñÿ‘4ŠTÁ'å!]ê‡#÷¤ÿµöfŒ¸”BAHÇ]oíÝN{ÂWê¾ÿØŸzEc;JPiÜ»Ãóõ‡1Õˆé;,côø8ú‚kï1mİ?ù~Þ`¿Ói•¯˜Å|zÉå)8÷^uÞ]vÉåu÷êºKNëL{ã™ă¸îK)g„d³E?]ƒ—gý”À9!Éã®þTäö¬> pŠéÂ’Ìãß©•Oáð¿V®¾£ï«—í“[1îå8×Kã5ÛmP‰È€.•Ì îy,ƒgßÈ{Id_U‰ß=_Yº"~¬\vñ*€ÂÁÛœ1üËÆ xý]c_û¾œò ØÁRçb–|B­6Ok/ÒAß'Ç{½I·¢â;œ¦®µk´³Ç_úÆÝ=Õ§ÇØy´G”žOOrFuþgÒ˜Þ‹\XÃüasÈ ¸Òìð[p¿‹„[Ò®Zx¡Â>KH?D]aÓê\W:‘¥A¥”yÝÜd¦Ù÷ëÔ“Ì"ïìGR?—7±¿•š­X̤"XDNøœˆåA­Nљҡr‘X-ïâ© 8{‹ë§—MyO£|ã‚wk€,ëŠÛ§ØU<þŽÝ°¦±nò§Ûéb3ª<Òm[¾èÊ„Ÿ+s?õJ8rÉUØU·$½¿ê{Þá&Ÿð…WïÚ§më­Qâv,‘7p –9c¥¨–Žß¦à¢ÈŒÁ›¶ª+¼ÌY«Õ`ÖPY.7eIW{AHºmùn“È ²ätºûŽ€ë„¬Ù²pBÇO(Ù/âù&ƒ•"‚œÏ¯ñ\†¼xñ’¦zç[—ªï. 9üSdJìÏû³W0•臚<™OšMùʪٽ¦³¸” ®R2 퇦dÜžýÖ&s¬«º¬2ÎíNŒßv7dÝΟeÅv#b¾lÈꬔuÕ܆¬‡º”ƒÚ§Ó]S> ‰îm‡T·@¥òe8˜Šû¢`Ó>ymå¡,KÕ;¾—CÞ3Uepûí_ò=Gx°Š8•=ì=ÓÍüÔ¥ðv­|²É´†AÚÆx[&M÷^мÎâÈ¿ÅŬÊÂô+ŘW­Ì—Wµ–T6®oÙ"B׺n¹{ úÎF«ö~ÓEâœÀo7nç{°¯‡Hâhlàݾ߃ÒÎÍ^/°›› ®÷ú=ØÏázº³; ­uO¦#f¸±âyè _Æ‘•²•Ý\4´åî¾|êÓWcW„ ñ ¤ç}4y÷ï£jbºAœšÆ£/ožz¿ÓÚÐôØûFɹ>®îê›~÷éh<ÞûGø3³‚Õ¦œ) Øù΋•ü,”x9ÂŽ(_Ú‹ýê*µ¢ø[º ƒ!Âw0ÎÏξÁõb|qu GT ƒÊòø©9Ȩ¨=eÑѶ)nfÃk/‰}›Œ{ôQO›ÝðøGäGX:´ÈÿÅ»4ÍI·&-¥=š&)­*„ФþÈÑöµÃâßHTç×£úƒ_€z®ºŽ Ö¹!v3 »I¼¿5ÔL´êD´²â)ÜØJúÕo%|VrÕµ•PåF`£•¬‘¨0+¬øpâ¡xýÓ[ofJw:CCF¾ßsa~cHóøDe,FµÙ$ ïÎä¬Ã l˰`m½G{ÁåÏ¡0#ZÙÛŠƒÎnÄžÕWõ?CPQ&íUÜ Ä›ñ Dׯš^pâ¶qMEW0–\gÁ¹N]Ñ…¨¢óÁ¢cQÀm¥ëC #Né6\ì`Á†%zp÷É[²yF±nÓ¾iT5‘àaÉúD¼m L#rÇŠ˜Õ€\ ìz”# ev Âáv…=ï{ÏT7†áÁLÐïb£i䆽¢Øß£‡§‚Cø¸ŸW›¬ëy ð¯jã’² 3÷ù¢O¸–€_Å&»½îî ž©’ˆÛ-étþ 4]n° ƒÅ K8ê,s¯™¥ 5ÄzrçÈÈ3¯p0ðmûÎ%2î(T!Ëÿb]‘}zD¶ ‰×néÆŠÇAïvð8˜~c˜{ºgæ§{ý¯½;v¶(íkP* ;Ь¢fî) ’& }`ê"”}Žðé¨õ’æyÏ9 ¯ùíçêë‚÷ú<øº*nð…€ú@5<¼G¸M` âeno¢û?’°}ÍÒ&(¿m{áèÛÒ+ê# QU«mæ1Í:ž{5‘Ë!n ØÑ'Ÿj—¥æ™•›G¡ñ,ÓÊŽž2îÔ6äŸ*ÊOü €¦ç;îÙтʮÝd‘ü+Ü1X³ Wû. à†7fø®Š¯UæP^­ê aI ŽæþüÂt¡¥aÁ­ðjâP•Œ–ç{j9ècG° zƒÙz}OjâË)X@ü~î®û9Ã~æ!r³U'*4wfèD†œ¬v1†}B ²Èkšñð‹ )…ˆñ§xw=Å×Süûêgÿd~2%}ª¥IÊ6zC)׫¡ÚœgÇZýÕ¶Åó*4N,i+¾R=ü …áŽ{Ñ-ªK¹¾Ûñªñ3…¼F˜}žY`•,2M¬“|Û£JÚyµ_öð{8ñrÕa<®Úûq¦qj|÷õÙìœYÖ<@Ö<ŠÇšû+ÑÍ~Ÿûî}2ï{ê”xÌõÊ JˆC%B\:†Ë*ùápAwŒ_ ›·õÛQºÌ‹ã‡*Åp@}û¦Âö3ýaEOhdÔ²pŒ’ßxïl‡¾ññâG€u{Ö °+l‡n·Snj£,]‡07vÂØa—‚ÕèÖùDwל6„ZR³äMsc<ñ ¯¼lN{[]vÀáÑž‚¡™Xô'~Ӛ檹v­AsÿZ#ÍrõÄ”õ!¡@ÞóEG·R£E®¨²w¼SÐ\§s¤Á›Æè;% Un™:k×Ê]¼ÀN%Sà=ešß<§£–Zb^.1T‚|ñë5p[zWÓøåÕ~­5À¤Qî@¬Ý Lw$ò²Ñ¸|_oI€xÞæ÷u*Î*5ôónžÛSnžCçUÎкŽë¡»ßDjmON}xzàA1”%Ę^ædJ<Ȉ÷£™ë­*íH¼¯]¾!ú¿h³ê-áíè é–?Öµå V—­ÚIðæ>y6t®+Ây]ö¼KÁá<äÎZ´ÚõÁnp"ë;’çó‹£`§joÀìô¦á“i…””“ë*Ÿõ•Œk †3t³!Ä8{ß¾l5nvm¬<¼{‘®cð®9ôíóЮà݆ v|32¦€¢P¡ï8-†gájn¸Ó_†šaZŠ;p“§©ûŽÎ&:ʆ'tºÐ©{ZÆ« i;‡ÚEeó;ÊÅõ*Œ¿¸ò‘²C{'0×ßöç€/c`p|:*qáØ¸CÂØ+¢ðÈŸú”`TînØ öåú€xÑ\y´ä¼ÜÏc ¨"/¡` èj Ð-»¬ Ó\Œúˆ³Fc8ÈoióHäY[Spðþ§ÓÎã ä .7œ~ê ÉÓ`8xzyâ>-ƒª¥ŒžÔ3cÈõVsÃ'ýpý²ìü?Lð/À…B>ž™W Tþpôòñ“¦fQË™©áK=§RŽÕ½­šÑ@úŠ .þD·‘&~’{˜xÍ SÓçÂïæ u„¼"àâžj™•Q ¾'<©šôgL¯»™eá×R”ƒ÷R”ÅÅ!*ˆÿ@9êFÏ&~¡‡‡ÚOŒWn‰âtYDqºª4˜ö0¶-'`[ö@^@Ó:‡¾6ó°\üädN¡;?«õ¹š=H´BR™“´Ý*{š´’WÜêËPÜìvö-‹pdÞ"¿qæ« "þi–Ù ºEÞ¬àÃxòÖÕÊïÖ¬VÌ3„H@2>I*?cÝ$á©+bžT‚ó¤5Oˆ?U*QS…>M¨Âü)0L&óœå2Šr Õ'ó§’xþ‚Ù骘û¦°Í"¯øW?‘H‚¹$`™NDu ÐWáämSET7©ˆ~^UôóŠX¦V…iÌíˬ"„éëöÙE¼»#ʾ£˜‘µ˜åô-w/+›Ë$"÷‡d0¼!ôÅã7Þ@ìûå…E3dÑõÛ’O{C”éáûÞ¢A•aOÀW@ûtú}ò€ñ䎪†1êédp/ÏŸ‰­“¸Á §»¿-eËCÆ¢Ÿ±"2V\2–˜³$Þι2ñô{Ì ÓGŽ$O¿é/0}äªÅÓðûâ0ƒu ã™™/8™ t«Œ Z4Çð¬G^c¯ƒØëãY¡×á~JVäEM "‰f@bì~Š€êûÉúÀ bh‡½É¸B6h\J´87dï0¦/¾ d‚‚ÁsìSíüŒiCG°üÀaM;Ά…‹ýéL'`z(ºá‡JÑeg‚˜3tx+t¤í.D–ǃº`ÊM€òš<ðV µ¯ ,@Ð \\ÿŒÉõø…½á¤>åð»8õÎ͆PIß[Y‚Ô0´ÎÚèÙÐÙØ¸YwЬ!„jø‰…˜uƒË΂•uCþ¡zËå’1ª½4ü5&² c-°XÄk¸«;ÖÕŠOp@¹Æ¸ºã[(³"[`­q0­î€Vâ ¬1q¬® Vÿ°³h„°j7¥‡m™fò"dÌïŸ-ÈÖØq«. UŽX…=šX55ÁIØ…Pª1©©ÀRcaRRÝ!Fü¾`ÆqXq¨i€P* v¼Š%J²+ØSó%RÎÇBÆ‚œ*xS'ü¨‚8 €NíÈÑÔÔ6€›Ú!£*ÈÔ/êU±¨1¢6˜h¨à0¢v€¨®º@Cq¡‘h1;(Ô š ššjÕbâ?3‘ÌVÚ\1Ÿ1ŸÖM¿R¶ Å«(ñ6½š¾ I4¦ÛcHÓ€Bñ4!F±A¡ö¨.óÕ6ÏSïá{ìí2+¨>!ÌvVŒ­ÀÖ]oâ‡qµ‘ Z3¢Ö§ÕaiC@Ú5hvþ Ùb>ŒYf b‚f‹fZäV1ÌÆè1¡z×hÙ5Zv–ÕãËŠ›€Ž•q²€ª(Ê 2…Ú8HY Eí Fm]Z¶¨¢euˆ¶¸rʴĦ”M %ëÓNáŒíäª×<…·Ô’MéÄ.ˆ×ìˆ(ƒa6úS\ÓKÊÀƒŸ¦F„(4k¼Õx­sýüÜ7Dդї¾º¼˜Ti<"©ó‡£G-Í Ž*v7kj,GÆ Š+]øü&*ø.BY¼ÿg CM†š×P,*»„š*Ìs¦®!©ñ!©3 P%…­žxÒnEz.IÍM×cH›ÚؼQÇø©GOêe* áßNNE‘Ê!|Á©•-3Uv¸)öŸ: ¦”CwÕHG£¡Úñx¹{ £—fBÒ~—QpbW– ¢Jï0(mÓƒÒ¶ QŒÁn–t3b Ü P•p3ÈʼnQÆ“A7Ãip¾Ú%Ö ½<²5€«ÝŸ1O*€õÕ3™*â5„ÀŒI½9”­†úHع+ýtújF[u‘G()‰½mšþF¥K#t«{`ÖÒ¬Yg ê˜uoÖÀ¬³Dý’à«3Es ©~F`®ÜOOn1õœƒ¡ù°´ID%*¢Tì™HŒÎfP— pNQQ‰s`Tâ!Maë¥Gšò¦¹ GB4to`¡Q… µ#FŒü1Yó‡ÒÆ ù£3þ IÄ!dìXw)ëºÁšý\qÍ~®’‹—AƒíJˆtFòÙTñìÁ÷ fßÃ$y₪ý¬Çθj’0Øm>.›Ä v›2Wb»#w¨+„Ðäi»u…ç¥ìv…ƒÝjq’e vë(¾]ètŸVµÛ,—$‰—Kb‚¨y=š(» š:Ö‡Çh7vAì@7n7v)ž]! ¬˜=I‚ô'IËž6[ „€g'Î^þaÇÀºœçÍˬ==¿Lû¤÷ü<}ÜÉgV'Ÿû­Ù<«0»°Ìâ¬]|Ö.>kGŸ½t]|¶„Z”ÿNÉÍ§äæ¿c9EÑ™…b»ðð|zï!JÍ~;ªÔ §áÅCG\—lqk€·¨þî<‘!ýgŽè# œxþ0ì’O’‡×Ã20á”^Ê)¿§'ÆÙ†é›…ÇS‘yOëÐûk?'²½o½_ë&нïyM]x^SçT Ÿã=-QçÔÌ»ŠÅž‘œ¨¼èû ZF³,¼¨Ìå8†ßùO1…”¿+ofhœ©PáÊaŒ8ükg¨P|þ¶ä´v‚Ò;A)ŽO3èvËYôëýƒLÎA cì³XùŠa1*º¾!´¾«ÏGtP}n²ËK†_m<}%˜¾æŠ‚ “cÕ…ßwÆXï&ï”B:·M:]ç»(ðRò¿Wµ.D˜Ít6Ö„Í”îþuƒ»þx§©M,J Ÿ™ÓQsuœŽ¬Ñât¸úÄDz’$ ènñ™!н.%]µ9²’7~Üøñ¡pü¹ ïèUáä¸Ã´4!ùÌ´GûÙ\t ’Ž=«*4}bÈ^œ)"{Òpò ‹ÈíEœ±YÂ%x›…ð-•¡UzJSŽ¶Þ¾ØTi×è$ÊjCV)Êz)å(ëÅeв+dzÑó*èø^¶L¥8¡Ó™ûÁ&6 ÐU‰³¢2ýðçÇE??V}$€«1œR ~ž_Ýàç¥Õ ~žì"”¬ƒŸ»L+mIƒŸ“»ÑãËÓÜ~ãŸ,øÞsPk Q)˜×²#öèg[GÈéù¹­~ö4 ݹ%ÄòÜ¥(Qe‹¼¡mm%Ñ 5hµ9 Ú¡ÔÁP\ˆ«ÐxU¹ `°Û{(Ïîåf R¼2¨Þ s©$PRP¿)÷nÍË$Cû»±Y8~7Áƒž+$<%÷9x®5ž1zÖ?®®‘7)%—˜–úNaËÌüŸ84Æúf‚5h"]n&ÀØ­ 2Kê5SºÇ;7] ü¼·IŒÐCãFÐ #@òkÈóò¼†<« =†`„®èÔH¹]c_ë\7(7œbŸ@!±¥\9Ã;Ì÷ ˆÇƒO—æ ŸÞsƒOï™áÓNÀi˲pš•Âa7³\{ ÓNã ™F¸4Oâ-2kà²#pyypÄ‚¥Ã£j€G°³#¸$ã‚îJH*bÜPÃûˆ®H°a#fø½§JÜŸ€?aÄÅ5Žx#^q6\¡³}Û ~+CŒ5HâŠ+Œ78Â›Ë ‡#ñÁæbÎ| ñ{Ñ«">?/GÝ¿À@ÆþM @±¸•Á\ˆt¼Æ¯ ¶¸¸Ï \,{³€‹Í—|ÿàâÒ,àb¼Q 1º˜£_ËE)kx±îŽâtðÅÖ!w Óå›é2¸“1/⢃Ö@ ߸’â†&AChCx7ê š‘µÔƒ¢Ø¤Â‚‹S_'d®ð9šBSQï°§äqÛ;’üü9¥Ç–ðßÏðwIg1,Kwð˜mr_‡-òZt =õVÂŽ/IðŒùdôÔ÷Wºü»?iŒPž}¡„äŸ#d·h²ÉôJî…Áq",¤,6·ÎËw÷ï gd ·ò Té-”š_ýŸ ª}Li ¤t£á\´‰~-É}žÙœ—§/j q¨6øƒê3x>tPB;l‚4߃Ë=µçÍ ®0t×4Ãeª—:—Ãã\ Y$Up°D Ü9JÓ\‘·(îé*Ù× ªÏg’¨C$I8MúÈrLk‡îÄÝç«Ãr–yhqˆ•õ§"‹ çkJ°“ÄÚ¢=ž7‘ij8ä7@­[“T9œÄ*4äÀ56ÖÓVÃkÒ™—|z¼Ì4;þà•vñ‘›¡àJ›×>Ù$}r¦„}Â2údÏÐ'û}`–ˆ>9Œî“ƒÙøä?ÍÆ'çüE« 4?B ÔðIDwGwÇÑl,’ ;B,"wÇ‘¡;Ž#º# ò 'ÙhÑ…ëý¶ß*Xø·«Z“ƒ×ÖôrÝÚVÛ…~ª¼I^)X‡ß„öìÄ—Í Y¢£ƒ>Ç4ÇOƒIYCp ^bA×bXr颵o˜7§q—j;¡M*kX“dåÿ,þìž…•ΕcÉ·BµÐŸ·aÞ‹VwVÛ].T›¢PР“tF‘:SÙG£gÔÛ©î9Éð0Z}o܇uõ±ß£jú§ÞãéÝݽ€¥Ð³€zžÐÓ]wÙ¨íx)­Ç‚`s,™ïÞ=è!Ø”„fµÙ7#'Æ8' ±*9ÜKò ðJÕLº‘I‡È$o£ÉúË`àÚ GGz‡4jé¾+·È»r#8%åjõº]îÖ<à‚i$.þÈ¿Dt 䧃Mðô0GŠg…£þú01º#´é¦jݦ[(ϧ׭* e˜œ0´©´m´EQ]ÙcúeÄl†lR î©ò ÛºõîÖqwkÞlêNÅÚÉvtí:ß‚µÏ“Q9ïxTü¶*˜€¾B1a—2&9Ù5,ápÞ”ƒr’e`aŠÂq»y†Ó2a;\‘Ö[¢hJ×)Œò±??ðçÁ+Ñb˜(“þ#Û¢ÉÖöÀìéÿÏ‹7säÓtb’@(:ÖÄ8ÀJz8£;HÑ™Ž7bd”¬3õµëé[äÉÃLÄ8š“Ü#ÂØq&ÜõKÌ‘ðË0æñW™£ˆU†&Ö-4!ùà­8Ê9~6î ïGOdøòt &ò—ဠÿ§Çoä~0™Ž·/°èÜö§_úTž¾Éïì£þ]¯&3¯&FêÃÁtÐ{üöÀ@ý½ñŠ™HÎNM˜ÎGÇ’¥¯r¸M"EÜôç ?éBªõ‡w}˜,ËH?ÕAº½ ï%n. +Ó‘Ç:T«B깸+æJ¹ƒÜ!(PÇ9ÜÖ?ÓQÝ“/º¿íSaÈ»yŽ´yŠû†<ûÚ俊*´âeB›0‘Ûq¿÷™P~“òÜ~#“σçg°ÐxìFÖú!9ÐйuÏY/¤õí¦wJ_AÌçóùm*Dw è% }½ V“ýU`ظ#¯ŽûrÂÖ%æ »åC5xœ܉«»Oš/¦<©ŸRšéžøFÀµØJ.¶4š(t­û†ƒŽü:î7ÑC )Õ±þ‹ñ©VåÎ. !mêîÝo“âݧÏý;sŽø{{þàß|‘}g?å÷I¡PÚ;(îí– $_ØßÏïý…äÓ$Âô¼PY?¦¤üþò8èG¤£É"Þ³ÆïïŠ< ï_îû䟓éý`´óé?^É?)W(¿ýtŽ ;Ÿ~ þôðüþj@—MöÓèsŽzìÇâó`ø0úSÑéõù·¯ý1ýù-¨~¿¼ºûDgÏ6K2æ—W´ž‡ûþ9=<ü­Jß¾úc4¸Ço,ã¦x¿MÆÃœºb¢í? ª­_^ý­ÿ8é‡2²ºL9†÷ƒ‡W,4Ë&|Þ"ÿ Óª(_âÖR5½ÔfùU¸áôÝàlú¥’·o1˜Øÿò‰ýLgú°ù³¯ÖÚU‹~-7®käò4ì"÷÷{Òºì’úJ+7äïfNý¿ÃŸdšÅg dë^h(¬Uüó•÷3£’÷Ë«?_ù=ͤ ïi,QÏ»þW2ø6 ‹|xõ¦t¡ù¯âÓwÿ›ßÉçèÿæ$ò!êåMÔK<z ¯ð >‡'Ü ò€Ž>Ó ÷ŸÎtü1z<Å€J °Ev·14(¨#ºšÆtô€4d{²ü¯Òý‚u}*ý*r?ýä –Ìæ~‚øù¶øúCˆ# ³„2®ÜÇÓo• û§çM2yÈñù?ùØúã§\¡°õöm~+@7Žñ[¢$U ˆRÅ”›õºæ'±ÉãÁ--!4îz?à¿ +tƒw¹zU°y½¥%¤ø‹2¶ôT¢uä)¾]p?§6SúJ‰éS;Q¦6#R÷R"5/Q›©‡)‘Z̲?ÓëO7"ó ˆ,$Ÿ@)Ìò¼ŽJ·iŽŠˆöGå"í⦾#yK0óQº‰Ÿ”Òâç}Rç:í³%Ò:ëé—? lÌŠË­»ârkV\œÄÝ­MY¥æÖªÔ,ƒj£¡Ò,à2%ØQÁ±œŸ× ²ŸÁù¹,$)\ ѼXͱ‡ Yöm!†èr 5Ûå%Å©–T*˜V™ ­n+á­U‹\]RCäÒˆÚDô.PÒ&¥wQ‚6½ ”³3ôï¼Åì ¤Î[Ê΃ÔTùÉ7gU“ΠÊCþ€*³¯¿#eúûBÕvqrçe-0Ü4I•ÙmòR¡/CÙcvÌL`fj—Ož^íZ¤¶¥£m±3$‘óœ È›ëüÈŒ¾4LwÛ×dFÛ×$lûò[»ð5ÓnóZì²iÚÑ…¢­\6B—æÏЂÍC$.ü/5³[ëí&–….÷¦mÔ"gv:—åÎÎyÎëd.ú€.5ÓPQžÝU”çU”ç¤{úl•“ç6ôÙª%ÊâïU²]ç-$:ô_¢y–ŸuÙ-œÏ3l’3]25„-ëP8?¦Ë†¸Äútì,Ö1é bòëÅ:~öìÕšVóÃëá`Š=ã¯l©Šx$Ñ$âç@¥›¸×Qé:ñf§Ñmj£ÑÚ™²ˆ‹M£²bfÕÎô(ŽM®Ûš2¹Å„ºm˜S&Tf€TWVŸNÍʺÙîHä"'{ç=׳ 1»©ž=µéÌôìét™è6uÆÝ>Ñ>5Òã7;C&Ê¢>B•WÓzVd:ê52cɺ U›YÉ4È»$»õ§Lq~!ívúŸ>Å…´:Z8Ò'6;µ,ê¤`Q"Ë‘ÎK¬T.@`eAe¦ò*{‚SWs!8%i5ZÓP-Ý@¦3€L ‰4êô•ʨ“¬ItT'g8™‘@GEÒB`ÜMsú*䬚·Ìé«béÒêõhºzMÔAÎüg#… ›4 è›ëœÉ‚¾Œ¦Lö¤ZgŒõjâ~ 5™ñj²üÇP“•8† S9‹´Èæ*šÆ¸#›c¨i4KlŽ¡R%7Ëc¨T Íðj’²^É1T˜ÈENö$Î{®gAbvS={j3<†J•ÎTŽ¡ÜÕ™ÛÕ™ÛôÔ™,¡RÕh²<†JQ©Éò*E½&Ëc¨lT›,¡ÒV²?†J›âL¡Ò&6Ãc¨TÕ² ¡RÔÌ2<†JQ9Ëð*ý,Ãc¨´UŸÌ¡Ò&8Ëc¨´iMåÊ]µ|žQµ|NÕR–Á1TÚf² Ž¡Òµ‘ep •®,ƒc¨Ì¬cCe`qJÿ*msSúÇPéÚšÒ?†J×Дþ1TfV¦ô¡20Ý$_\?öÇtmÍ;ÅÎÏ–RÔh™gsëºñôštcÊÒü¦sf\cá‡HP´Î™Hp Pª%Ai>Aånk„Cå¬åùX•;î¿5µ3á>m¸éq”kÚÌçFA–¼çLA¬—¬î”8ϹòbÜN·Ò¤3HpÈo R0YŽ,X˜ëhÐjjóë6bæ$àc‘‘¡¨·Ñ{îÙÙTbMP±0©¯#a.Lš€–ù¬±¨ÈnM°‘‘úêðì¾:<ϸ:<Ç\RŽTiH9 Í ëBº‹‚…ÄKwEˆCƒ?ÝÒ "_ §+fœª¶’6?ÌJ‚•ì±£ERΦ» ¼,aga—…_Ðe¡úÆrYè¼qTÍ ”Lgá–v_°e VÆjè“qô¸™48Wiö<›4{v•fi+nî¢,m-K[U‹%ÄÖ=¿d=ÿ'Ðø×eÃ]ÆL^lñäÞmÙäïo ʼn¸Ñ¸û®FºµN·7f×ÚíËö›Ú‡z·#]ˆí_‚·ˆŠÛfܤ}Z®7"Š$<)­”üç«Þÿ^­4Ê]¸»}0ü¸{÷Û¤D;¶¿s—fù|þ`oÀßÃü›/²ïù|á Ÿ/B¡´wPÜÛ?,H¾°¿w¸ÿ’O“Óó2™öÆ””ß_ýˆt4ÙÃCÄ{lLžxWäùÛ`x÷ørß'ÿœLï£OÿñJþ‰rùGå7¶Êí|ú)øÓoÀCðû+ÿõÑgÝ%ê ~ÁTÒUèoÙ⮹6ýoƒ‡ûþÞh^¥oýËåYÆMñ‚]8?öžú9‚‰¶ÿ€ªè ûÎÜ`F~ ½!Çð~ðÀo²ç¢>óµ?ta|TKÕôR›åWá†+ÝÃþ´OûPD©7µ³rƒP™}]ÑsUn—›µn­MZ×Í ýó÷{Òºì’ú[µKU外'“$šÃ¢Ï“ØxK|ž]ÿç+ïg~O|€8ó{šIÞÓX£žwýͲIPÚbE><ŽzSBÊÿUüoúîó;ùýÿOå¾õ²õ×’·ð £—§àð„;AÐÑç`zy‹O1 l‘Ýmr×{|$ÓO}ª|> Æ“)™ŽIÈö.dQuMÁº>•~¹Ÿ~òKfs?Á?ü|[bý1Ä¡…iȆBX¾\ú­Ðcžž~¢ï¯Òå© º²ª'@âM_#  3ÔoyñŸ4ò* ,d%粭ư1Ç'n©H[â^‹è³¬èr¾§žRPUMèµÕ!yN»tt«]ºhtiIž7ÁŽ7ã:ôqT¯õ«Eû*÷ûÂzÝñóØüžW°"Ô¯í«Üï ëuW„i~—°"Ô¯í«Üï ëu·P+ñù½˜p]*.†îU¡:‚Ç‹NTÁ#Fîv£9Ñ>ØÕO#™ _fÂg »¸4ýí“]Ìr¹O··çEtV›P7ñ7“d³ÿ\~²S]"CvD»m•µÞ&Í>$e)í+³¿R¤¯p¯/‘q%^_Ô?^_ ÒW¸×—Ȱ’¯/dŸ¯¯é+Üë‹4ªdÈìñ÷žÉÍ©±y|ëļˆžY7OǤ23kg¾o•ÓbêyoñÓ‘€T§jO™Û\\ Õ‹°f´ãŒÃ×IDHÊt/Îv5·Ù¸À¾Îº§á«§`Ü H­7ÒQf:„2k>~šò67F_¸§Á¢ñZŒT$ʈ×òý"|ÒS˺´ÿ0].üŠ•X¹sç°åœ™j™ƒWˆl©³ù¼[ª5âbi*±8{Ì™©Öpö*æìU Z¿fM¶«gw ÎN¤c¹îãg¦WÏÓÙmÒì`ΫÓÁ>g¸3s „•. ϰý™` g»K³‹Ý™xYº8î´KDqJš~".N´Ù™àY¸x1ÏÀÅ‹ïâYtqÁA È’ìã½,ÀÄ]½Èý_ò®^äöoV®^Ȇdf®^ª—dï73W/dë7+WÏyc’?Ïsã7+'¯L÷.hÛ—‚PÎðÐsVz“ñï ÖŠâ%¦7Ï@°cdÁt98ÕýÓü8x!'âà%éà¸ì´Ý³ÝJ7Žsx,R»CêÐáqxàgqâ¯k7O ÞÆ\R=Kf”÷çöõ²¦Þñî:-õRïëã®dM¼ã½v6âu{ö¬Iw¼ñ.éÖ®×ß2›tÇÛ“S%]ízM8Î¥iÅJµA³¯âP˜š±Rë«8†V¬T4Ô*…©ólÄáÂ× ÙeiÙZ±Rm˜}­Xêf¬T#f^+–¹+Õ†Ù׊¥nÆ<áˆ×Èr±(j;Wëyf XòSY"Š‹ä V‡…Ò?ó°hšqMX4ù©,K3®³Ð?‹55›5 ¸ Xòg_ôsxñ-XúS_8ÒX9Ò\„ß‚ èw³|iN@%òíGâ1û?ó]*X¥&,ëéB:­X¥6,éÙB*X¥&,ëÉB:­XìÁœ׈lÌvs]#–¡ Ëzª0ß5bÚ°¤g s]#–¡ Ëz¢0ß5"£…9/3nä2¢E¨Ïê4akðÜW…E’ŸÑYÂ"¸?c#FÖÌ“æIÂB¸?½ƒgòÃF°ùoÒ=™÷Þ`‘Ô§/ûg¶/VöÏ™ü46KÂýKqˆ¹hÙ¿@îO£ÿ#ȇ¬I“XI“XI“•õHš¬´G’Žz©÷—Û#ÉB¼ÜùËæ‘ƒtk×ÏÙ#)MÒÕ®Ÿ'à?n+Vª Ëzf˜R3VªKzj˜N+Vª Ëzn˜R3–Ñ#)»µbž€ÿ¬ÖŠ¥hòžÎy­XŠF,ééá|׊¥hòžÎy­˜«GRv‹…þ"u‡†¬Ö‰E“¿2IY® ¥U<’²ZMþÊx$e¹¤kÌ_ô0³1±kÀ¼É_¤ÌÖ€EÓ¿*IóZ9–Ú#)³5 ýn–/íy¨Gþ*{$ÅmÄ*5aYOÒiÅ*µaIÏRiÄ*5aYOÒiÅz$e·FÌöŸÕ± MXÖS…ù®ËІ%=S˜ë± MXÖ…ù®óôHÊn‘˜q#—ý+Býªx$e¹*,’üñHÊj-X"î_f¤,W€YÈwÕgºAH÷ dÞ{ƒER¿*Is“ýs&E<’2Ü, ÷/³GÒÜdùðƒÅ#iòmüÙÝ#I¤vóH‚Ô!$W‰¥óÈ{ƒ•Ûa# 2ºeM©ëÕÊ:JånrÑIƒN7¯";ÖN5ÛAJòJì*—”äyìæb–›m5 %y%våã%%y;^“ëÑTFb°qHéËèJ_'ZÍ”êLóîÕŒ»ð^uãØlz5#™ElònÍJÚ9`nýêÊ®KѯÎvæ~-æ$\Ó˜\©ëNjq¤¦Á¯ !vîý:'»xjçÎs±‹éÙŒt7sT‘Û£Šnö¨b<ƒT1¹EÊ$KÃTq&ËTŠ;¨tËÝe¡J‘^g‹„^{'GBöNúJî*&—œô9îl±˜Ï#aG '}¥wæóå&}Ž„»š9гnÆLØŠ M3N4GP¬SÄ’©Ž3S‡±—¥—9:Ë^ÎLfë‰ÎÔp“‚´Öá ÒÏÎì¼Lýì.¡Sêg78\&¹˜¸ŸÓ%:k„Iž_?'Ï‹ìç„ò9Õ~ž›€Nµ£ç$¡ÚÓ‰EôB{:¡Œž¡§Ãœ¬d´}-\ ѱy#‘ñuf’SÔ9Cô¢úyÎ2z ¨^sÌYF/¨§gÚºôôŸðœ„0bFŸÉ[6ñÈ–¨éy<N6"S(NÈU¹Ó©î»éÖ:ݹ<%µvû²ý¦ö¡ÞíüßáO9é „Õç-Áâ¶á!ŸÐ>©7" $Ûì‚•rÿ|õ—õó?ÕJ£ÜÙö'ÓÁðãîÝo`€ÂÎ]šuäóùƒ½=ðo¾È¾ÃSÚ;8$…ýSÜÛ?,H¾°¿Ÿ?ø ɧI„éy™L{cJÊï/ƒ~D:šìá!â=k ñþ®È³»ýŠlÜé7ø¡û©OåÈèã¸÷DÒ#Uòe܃U‰<ŒÆdrbZØá©ßÓiHn¿‘‹þ`Ú#Ý>¥ƒÉ§Á»…Ânáøøˆ%Þ}õ·Áðîñå¾/xùv>ýü~zxõ¦äôðð·Ioòò´y7N¦„Š7²ÝÊövû‡þï´[¯ð@yÜŸ¾Œ‡bÀ MÏR„{õÇhpÏkøúü-PûÆëé=>ê~û _.ôx¤ÞI½á¤ ¨XT&hÍ‘›Ïð‹ß4l€ÔGwšNÂö¹öÑ¡“ä:†ã§¢¶›âÕ„ÅD ÇÝ(z8Âåˆv0ûë“ rëóûÑ4*}¶±Ñ«v$Ôo¤Z%Õ8rñ&’u䯣éÇMoÖxÌë}ºó>MÔ1Àœ½Ümî.7 uºRÁ&q”ÒÜ{¥ëm¯wÕÎ@"Õ´B/|ȱjnr¬šo;Šv:ǤL´|Ñ •:¬F'I¢HûÒ{vïW3a®3ë waˆBÈ&½§Þ×D øv‹ü“ªéÿïÿ±7ðe‹W³™—+º:¯Oaõ×…ïK1ÖêÅyê{ù½b)¬ÿå×úß<¦ÿÁ#)q';W;¤Ù>ö¿åÈÉà#UéIí^ÏO}:±ª£ñóhÜ›FÑûjÜ|íß“Ÿª¿ýD¦#|E`#9ö©9¼'ÓOýÁ˜Ü÷ï{,óDèð4G÷ƒ‡-€ÖßÝ!ïvÈɈîªw‹¥Ýã£éœÕw«íò i÷'ýÞøîÓŽªKþs2½Üî|úiR9%á÷±ÿôÇ&ŠÑøLwŸzc²=›ÀsŽ ”'þw˜ HëWU%_'|¼÷Eùל'•¾úkbªÉþÍOú:ò•÷ª r ^r¿ç'øµ?|y"8‘ë¶Ë­ÎÕe§FpDL~ìOcÍMú¿M¿=÷7E«þPö¡„Ħ£¢ÖéþÖ¾|ÿ[ó¼íÁ3iuä-mýk¹,Óÿß¾Ôo“­§Þããèn“ô´µMSoOÿî„.°å™Hèb“ Þæ!ƒn?Ñ_¿öÍ!ìíïðö÷né¿Ê[¨÷¿€–íÁëßÉ¿íý­vôÖöï¯ä¿y |‰ÁQ Z3±ÑÐë´ÛaLa<ÅH–sPtŽ|͉‘âcô-'ÆC´aÜïo–ñØx<Äk «— ìDY¾AŠ‘¥¥ŠþÇxÉB–oR*ºnÔNë­ÚI •ü©Nš±2g¢g‰n"pÖ×0~N3™¬“aŽŒÁÉžÙÆÊŒüç«À€Ž7ìGøÄ¸ ¶v›šd¾•Û`ž¡F¹ß¥œCCl8ke÷ËóãèKP³o÷ƒÞÇ¡`¾»Z¤7ß„Yy{}Õ¸$@fà÷“zùŒ.ä7á© (–€µýðÇ %²×¬ýÿ€?ð:õi3Œ!ÿ‡ñøÏ]ø#[& tˆ·@ó×Å~BÞ¯ÞçrCõ!ÑëIsÑ÷r™’8UÞŸÁY3I4k|u%æ¼!Ë7q¢çMô´Y®Y“¦Ö„œ‘lâèÔ#vZüZâ϶H¦‹oQ+@¤:"éßqtvOcQ2Ã:‡Ë%>¶°âwËl8tZf‹ÔËA…uPÍ ¼çWä½hØP«jFû6NÈŠ_=V s$\× “ÁB*sx¨}u“qZšJ! ]ŒIñù…3™gø ÐÜ·¼Æ7ƒ_¼ß¡ýo7é‹-ùWΔðÒ66ɶ^C fFÀÒR=x•¿G×Å»Š.ðîÀ~.Äë”Áìc§ˆa1w†N¿·‚]#l1·f ©g“©ÓH2xÓ 1“­ÔDDÐj&iH`´œêeIlk“-³Z@*àøÓ¤ôvé÷oCú,­Êf- 6  êòQ³’ñ5 £B³ÑŸò\”dÔVàw)WPù’‹«_‚ÒB#'­Ê†EbñeÁŽze¬ÚV³[ÅÞ˜pA©‘‘†Aa"RþE+uÒ1¦|ôºG”(6ÜŒ" ,Š&»È­³Ý*ÒÙúò.™o-ï¾3y§™WZI—º2F–FÆ)B.Ãʉ“ ‹Z~"Wž(YWØ™¤³ ØQÚi?¿štdÝ$R·3I2®Nò®ž ¡Ùš lK¨ßfdùìbYEÕk-¾o41ª_B8duT`TÇ‚'<.xN~tõ”Z.V&É”Û._ÉÒéó´™ì%ÊWY¶¹I³½. 4ÛÚ-nÍ*ÑèD̑ϳQ–{¿ó9Fs˜g7 ?ÿù¬™¬¢6áÙ§i ‘c¨ì Ë×Ysõb)Vk—̼±Hͺv†Öйý¬1~i¯ôö%£ÓÊþáʬúHuGžA>»Ô­nø"%J|8Ô³ÃþŽ’&KØØ}þAövkáµ^‹^S³ð Ãt®" :vY55˪0Vç9ÓQDS:¶¨µhZ‹¦åMkÙ„ó|^²i’ŽlšE¯²3l³,I+¶¡¬ ®E0`ÿ©¤š¦Ò2<)¿õ›b[ ’ÄØ~ JÿXí'!+^ðcE»R²J‚±¸—Hæ¯EþZä¯EþÒŠüÉ.ò'óù“ÔEþ¢á~¡G‡ÿœv¢Ž±Ÿhüç!ü¾ÿmoÿœÇÃ&ïUûò¬]n’°C·ðjíR®ðn¦œ¤]+7!ÛèÿÑ$¶Nïð¬•Þ¤OçÊhˆ™FãÁÇÁ°÷È`7 %Ó]f?ÑYÿe0ýô3Ïš?¨•OIíkïéù±O®8Ýþ×)O±³C®zô·þ´?žÐo\Õ[ÝÚY­í“׺¼îòwWeÚÆZ×» /ßlùEvîz=ZÞ`Hª£§'J¾¹äzµÜ©åèïÕøïMŽ4/Oè/-ž£qyFÓH½7»ûu5F´2¯F¯žPwNËU3 ~yµ¯´+ “;/·ðÖ+¶ö6¼%]}W«^äsìoÿ-ò¿¥yW+ŸÔÚ~ñ¼?néRøY*·zÙl^¶$rwé/t”wy÷´t=¤vÄIoÚ#t~Mû/–Ê>)wË¡®Ø=Þ9<Ø?(î×ÞìíÊ­ïß½L{·”[:š¢Þ·ëÝðÜ1¸›ªÿ6ÇåÜ÷§½Á#„„²ÂóË4 loi´¨zBh>ª¿ÜÈpW¦þ1zü#T-š.ÎÏÊTÛö;šJûh§Û¾®íH}GI¦?C”oB?bsB)OÉ&vÿNí?wJ[¤û®Öòߊq`¼ L!©"µ5¥€C²sÙÞ!Þ÷£À÷cüÎJx-×â¥(ä#i(Øi(ê,¾ï[i8ˆ$¡h'a/²€R¨€Ö Íïó›7T ÓÍŸ:J0þ[¡Ùu„åÐyU½luë­ëÏÔé^^QîÁ HN/ÛÍr—lnvß[pÍâ¾ÝŸ¼/f“²÷rÉyJÒÖî· œ\vÉÆ®9iÑOZþpu™¶ä§m_vÏ"Óî)i#“îûI«—ü´÷å«È´‡~ÚV»YŒL{$uC纙öX¢8(*-e‘¸Þ)7Ë6fYuW7˜\ó±]ë^·[!Ѱ»ÁÂèL^n…*:|yºí7rõRîøC®\ØœÕÊ*onGNnãì¦SîãK #BJÓêÉ…‚!¡æI:åéTèÿUúÇHÐ…Mè„K>)(Çæ€á¨æÀ>VøOÿ§*þ¤Yy)xŸX&(ÀÔ@› ñÅ`¦rºi7¿SªQ==¿³Þð¯oßó;yúÇØSì5)Àãd‡!Øå媵*µTV`•å-•U¡²–òˆ/åß ©´¬•ÉÅšêL£eÈÔ´¾}–XþóFýëÔ,‡qãæ•æÕ:w£×@»œ› :]è&ƒîæÇ/|ÿ2!_> î>ѭƶ·t1¦²|4îßó6 ÁL•ï{ÈTqÌTÍœ‡ì%y'b’wtÉaã£d–•í-²A:ŸF/÷à äÊOýq×]Ô6Ô…ýŠ_2Û‘ÐÎf///¡A)¬õ»¢©‰*Œ5º²¶® T `Õ9ö±äÜß<ÊíçŠðõWñÑ(!;eÐ…QSh^QÉŸ„¢Òù°y´e&ÒiµMåôex‡ÎðÑzÕ蹞øh® µt)‰FïÑ”—“3&Ö¨ê8IA[¤/iVa«>œŽÃÉàN×´Uíz«S¯zSÊ–M9ë”…nöF^6¥E´äÿ‰XxK¡ÄìOÄzÿ+­¹ ‹¬ªˆ?ÆŠÕļâû¦ûc,H›' 1îEÁ^GþÉïÿ?{oßF®ì ïÏ| =ûÞsìмøeæä®;Ø€sOÂ>÷¬½°MöØÆìÉdŸ{¾ûSU’º¥nI­nÀN&ôLlLë¥$•¤RÕ¯J®"1‡ê­ñL¦_Ö"j²¢@¬¥¼F_ï»Ëà¯c¿DÖCþ—öËZ)±ü%(;â¹ ÒR™‰òRøEå,ÇÇP²ˆ²Z*â\(,"‡,eŒ¤~æ Bªs޶‘HÆV™£´w Þ)%T%ð%#ð‘óiÓ¹¿¢Y_Ö;n‰ùŸýʯef}ʺ ²’ìÕÜ?äÃ’\ºJG‚ÉäPr.åÆÉ­ËN{¿…CR>ï=— Y`Ä+A9/£…Ó¶´ïÁhXÕ¡>2G¶2]ýQÑ×^È~Ôe5pÑRŠæˆ}ŠrYï5Îe þ¯ø+LádzX’Ö3, :}À«ýJÀüMM;¨àñ_í²@û¤fxSÞé©Ð+Û‰3(wŸ\?¢M2yZ+F )¯¶3 Á²oOÚ B·‹@DÉ— þ„‡¥-³n¥3œ’‘RDS†• aëçr>PÇéçGYtÈà'¶HÅÈŸ‰ãgÒ–vh'VŠÖÄVÒ‰•¥-Cì‘X)¥ˆådÈX½P@vƒíÚ+¤<$)ž÷U–¡¾À³S;ì‹Zœyª¥(qÌŒh語*Vþôè›Ð²›ìœè8dë~Ø)D ÕQ‡>ì§$ñâ²è(‚­$_©j ¦j*öKñþ;LXe=uåî"Ÿ Å¥Ö8+²søu'ÓóAûo-š.è'’Ÿº£<* ô¡pP¬«¤êJ#寃}²nuÇÃÃ(õ‡ pàJ;¢÷°PñA¡*?– A•+ƒáž`8r4É0•½Ã`¸T ÿ=¨.ü_Çr|1t+Md‚ÜʲO mÚ‰ÅG4¯RURo €mÚ½oU_âžp|_sÁiÿδ#J‹ é—=-ò&å%Wéü_©Ïž ²„R\Õq&û ˜±¤æ@œ …IéH9c¥¯õŒ‚ב.â(y„Œ4 NE Qå:b('T©y Äñ¶ìqª“ÙPbÇâÚv( ߥôÈAÕ~Wøa¹|ànÊadƒKK=©‘Qípé"´’ y‡ù ±•¤˜ªŽfÔòb k¹"Ò˜,6ªÖáI'£r¨—U;ÐçÕAJŸŠiqt˜èÅU îREÈÊÞ:HY<¦â[\†* +âôÏ34žó6½ Ôy›qPVßJŽBRÊ:H[kÑêB÷žràk°T¬l1MÞ%¦Š}GÌ<ÛV5mW6ÿåT r‚[@kßkº3€IÜ¿V´×ˆ¡w‘ì>Û¯® ´QÑ&°ÞM®ÕhUËÚÊÖÇšÖªd„Uy¶{5­T¢[qAÁJ J#²/‹KÑâÙIží&VêÜ ‰}E¥,zw–µúW'ò®l=[Í’rÒ”ý¦tL²àg”V;#ÞHNÊl¬Ÿ()MBÓ¹ÌyØæZ³˜E8ض±rjÖòk†Zog((Ø <~[‹ñÌŸ³?L«èô  NTØ1ª‘Š]è¸:ò¼½­½ñ7£Ø›.æ¨7…˜Å¤;оm«2¬¿õ÷õ3añƒ*zºq”ÈðªÛîÊÅø[Ô-ÃKR™ž÷ŠÝ‰×#õõ(a/T¼Çè(´ôðAWI‡ ˆ,‡ «…´~ í„ i0oGòm&kb`·y 5®Õä%,o"YdÒ)FE£"ªÐ¡`xíìÇäìÆ!ò2E•6E¡7¶Ò,™.Ap<7Ï…æ¹Q<?θuî°pVäÍÅtÙír#$Ê+JË@‘¸Ô¯Wjö^‘*tC¯pc¹óDó„ñ#m¥ûÞ ”x$kÛEÂþ¤uǾÖ+Ìv[®HàA®À!¸úïCÔ´þóïéÃ̬ôÛx?¤uÜR=/)‹q¶œÁ8‹‹»Þò é2ëiœ­lŒ³¹Œ³nÇ6´Ì®Å@‹+Ç¡㧑@¤©6Û£ÐdkÝc„q64Âvß¿k‚€DŸ†T•µJÂi€8Šò‘òGj)¼ §ýWØy­¥,eÿÚ½Ôn™íÞ(ì›ÿd³gð#TL·:ËíÆ.ûâvY¾Ri>Œ.MÙÑJÕþ+3}H­i5²ÁY|(;{8¨Óõêo]ËŸ·] [[éTÅŠ:ÐÕ¶Giš¤*]Nôâ*FwyÒ¦´ÑÕ–4ýmjDÝX.kÝHVÕºky_zǤNÝÌ‹@yù!Ö‹ ¹.àÈò,Ãì\Wj,(¹pÙ¸Sj¯WÁâñ²jÕ”R\ü°2]x) oöNÓ¶Ê=<(§”åšlš9a7ÂÉý;üê.§œ†ÿÈÇ)+YM©%6ü9ÇGôVÀg¯£š ¾ì*Í'ÂrÓ7mÞf\{À2Ó×RV»«œˆÙœbàÆFð6‚ý—5T_ÚB`WÂWíÊʾU ï6°4»3™â "å呯îòHW]–½U—}›Áƒ¥+<­ºóDÞŒz÷eÔž•,>)Õ„OÊ~)iÔáÊ, áÍ5¡”FÍ‚âèFIiœJB­® °Ð¶& Û×Ê𲡢•%¬‚˜ó A†Èº˜•¹RA¥¶J|W6|W1|W ËW¾¬î›¾;2eF_©DÊ H&Z82´p$#øè_š8âM,ëßíJL¶pÄ[/ÐÐÄ‘hb¬š Î…AäÜÿtÎÄDÉáïñ†TÔox¥¥üÈvUÓŠËV[ûrdúµ¸EÓ÷#Ë÷À–ôâûÝD† ±Ïv“™Ä»6¼Ûç«Z*ªÚ*ÚwT´ï¨èÀ\àH¾HTth-l$߈€éb«èÈRQ8j’/eUÁ¾‰“P©_ŒÏ3ú²ã0ú²"ùRý’fZ5öe-žr$+Rg }[ŽÍú’Ïéø·‰ªF²ª]Ô d¨Œ¾.›Sóújñ¯ mƒo “ño ­“Õ%¾54OÖ¦LìrɸpYQ¬¨YQ,Dê\&à fº78·.oóœáÀÈä$_⯅ÅH¾’„dH1É$Áabouj²X)‹D(Šˆ(.‰í¼(öÓíx)TDMdæÔÊ_1É&‘~Ć¿dúrrŸO7¼*ŽÇÍ òÈ&¯c²ªÖØ´‡—äH†Ù¾mˆ´½«÷[Vï÷ë£à›Â6«ƒœÅ«·ÙÅ Šê§Ã·`x!J´U²9¶ú]ÖñßÇí““V¿Õk¶Ð uÙ¶ßuÚ­c|í)B]-QD¯uÚiŸ¶–puàÿúûÁE¯ÈÎÞu€Ëªì¸ÕÄÀê;Óy"+{™¦]õ;-ÅøFv\¿"©ó!³´ÊÏ4øù¿càØJANÚ2“œ=qƒ)œÍJ áçˆVísô·9ל%`#ß c(ë:“© x¬ç+Ý+ú Øsgp¼]Ô^mãšÔ'¢©Î#DîÄÏÙ±ç-Lm¢NKIïbÈš¼V Êdjþ‘R¶éöõÎë+—Rj{ƒ6d'õv‡u[ƒAý´E+±®úå'u8‚*ß[/l0¼;TŽäñwaÈi㘷‹ahj"ÙðúXNéÿfÞ)’qíý죭xxû×[Œ®Ÿ`=. @w%Ç9ž­˜²k‹ÉÑv=2‡²$ ¢náý Dä?I`'¯bPîJ»†¿Ê­ÊþÞ!ü Ê{Ugˆî0(ðm4ȲŠÜ­¾‡Âÿõ­v}¨¡¥RÜ]o)Çuà$Ü$¡…xÿœÈßoýŸËv¿ÕmßBËß¿…͵Ύ/»Ýl‘§—ôŠ×@»sK¹ëÍá%¬„a²î%ìüÀû¬Ñ 3°‹~ô¹Õ¡ºÔ}Ö°ÍÒ6»¯î²™ÐZ¼ïÅ>χ ÏF(¦ÊÎvvLŸ‡Q\Í`@Ný}›íW6f‹Ï³ù#»AôùÇéíd2|œÎ!epÀæ“ëÙüfÍ>R˜ˆçx5¾aO `Tv;]<îÞL!ñãä¤ ºàP̾ZL,+9›ßaw¬å"ë”Ù6œqîÝT2Æ'ââQ aL¤bADÈõø4»zÃ!ä›t3¹<†ESC*t3ûøš³óÙ¾£2f··³/˜¸RVÀ1†®<Ýâ=¿7èÕßñݨR æøÍà-ìò°‚àÎNÚ’³vƒøÖpÙk… ¯eĸ‘Ùå …6x¶×‘€ßùv¤±N:õS8¯¢ Ðo½o£2Y+¨~‚²c«Þ| ï›ýã½´b À@¤ÂÝó¤bÊ0td ex«ß‡CpëC{È3”e†R˜¤yѹìöv»õ³‹>Èá÷ý‹÷òËrøeãbøË ö÷8tcøhy{Ñ9 ½sÉA”»g0L¹ŸØ|£Î¥,üDZÔ^†A¯±#‘4– êUÖyT–š!–À¯,’Eè¬ò5J¦Ù-ÊM$®xÕQï¼{['š¡OøWÚ;H”&ò+³ÑÆŠLv©L#.êþ4¹û9ЦÊ< ÓX¢çl·%ø„³‘Ìr•9Ëâkö,9jyÈœåqž=KvÂó¶ÈNXö,™³|šÌYæÑÏžå!O-åìµxf‘Æ¿ÉägõŒy<»ÿ„îûìlþ/²ã'ÖœÏþ)ÿ|;¾»ÏoqÇBÍ^cüv|¿€“s(—à ûæãäþo^LèÚÆØëç°eâàÓkÖ5 ·Ÿ&WPå ¼Ôq±§Ò4œ\¾Ÿâ¡žu'w3(àf ËÅl±Çª+Ì'¿Ox%We›: Šp [,ìêÓëET¾•w“ÍÙàz:¹¿ž@›§¼ÈVŸšÝßÃw=ºå ªìŒ¯ ÆÇÙük1*è脺Áìéñ3kŽ VqæiR”ÙáÈy }4›.Ø~©Z9*Â^«µèb®þÕ«ŸªìO ›p¸«¼>< ¢‡ý*|úzOw“9¥‚N›Í§Ÿï”öÎgO°‰>ÞìE±ÍÉ=ô8ôÜÅÇÓk Œ•kûØÿ÷WOó¯pžoŠìânú>”Ù¤/Ï‹¢ÛL5‹ Û÷×X_”@ ©‹Ç¨ Áã|2a Ævú8A]yìË=JBûw¤Hé¬ZPÛ ÊûøörP§>#ñf—½‡*›ŒsPÚ­?}Yr78:<<ן¢%ù±È@öt¬ äB¦=›Ì'w_%s©«dOñŽÚË®Vc½6^JåR®Œ ‰s¬ÇîàÊzƒËÆÀY&@›cTŒAÕú·Vÿæn½sCÝk¹ ĤŒÜ+ñ ô¹FÙ]Ÿç$·[犼žÉE5¼Ç¦×dbÄÐ8 nó¢{ícþû¼Á×;üw£•Ve[Tz KÏG²$úžgÿŸ,ùðjDfª ?³„UÃÃÅœµÞ Š(Î…Èiotïࡈ‚÷¨±'TÿšÐîâ¯^“Úà"~دé¦>Š3©Aq|Xï@2Ü+ |ö‹´eˆÞA9Í–£¢á`HíÂc@ ~÷é·T¤FM‡éûõÞ ùZJ¤<ó ï+Êûw©÷[ ^|J ± ‹¶øfG|ao! @A2fèQ-Hq•sZ+ÿ |”ʼn9á*áÇxZ6 K²g%ª`£¡O¾Q"ßß ¬,î`aàÒ‚œoÜO?œŠðçy£ 'à¶5ùb©ÛNa˜A”PQÀg‰ü®aUL䄨ø¤¡‘ç-RšdõžÖJ6h¾=Gã+ü*ó_þ«ÊÕø¯}Ç€5eH“·ûkómî"Î-A&ƒ|ur1ä^ÓÞ¯玡…ê*\ok {•7_«$•ª ¨®æëx•â#N|½¥žH·ŠlK=nn9FoK=dn·Ô¤òçƒg1t$Œò=êÅ<ú³Ð‹Ñÿ|XøCÇ-¥_õ?æÌ³oणå+3¯»è¥=Œx¢p_”Â^5ƒ÷ ì‘Ðût?}÷Ë“Îsq?~X|ž=Ê›¹.Î@2%õ¥b毸HxÒEAo‡mó-Ëþ·jñš6[¤>ì¶ö¤§Jv þn›œ©Â~¹x×êÄfOª¹7|‡dŠa=n”v’‚òvŒšÊñ#ãúÓ›O¾LAr†žÐ¸†­æ ´“BCøú=Báð×^½w¼G$§×½xœ=‡±œü1… Qü8C.=&7®š¸|“¥¦ùìËîÝøPé·oÇ_q¼ë"[¬¾ÇÏó ŒÆí ¿W[ÇæxÖqµ€Kœ³#û^ž\ÿFÅ<È“Ž¼µ[ªú±|åTþ+ •÷\üŒû¿Â¸ô7ª›ƒ½‹þÿët¸'å¡Éä[ é)‹$+æöVoKÊðQjaÌ®”tn·ÓVÂI›äÛ$ŠÂ CNaÚ«¨:lM˜[TÂEß`« m²6 Žo=­=†&i­ T¹Øœ»†ä¼¡Èy#øŒd yÌw0ηä!j©Á@QPŠó†:‘hÔÑ‘“Á»¿klÛ¿¯Ë޾&]9Î!Ò•;úÞk3¡×T&üAóÀÄ3ö®×ôô4'zKÏ Ðõ)ÑkªQ)©obBdæØdhB–Ãoù¬Ïqª²mMà1ZÇhqëƒc”à„§Žüþ£$ξ£DunI]ÅR£CçOmtPC³h&YD­l„çjj%üµÿÀVŠS¬o+±Ê­˜"&_#é­5¾ÁF†»ªGåþ­ ÷¬E$}™ ­°mçû²Ï¶`Τ-bæl¥ôIgÌxxäâWÆCWGLA@1.ñx0Á‹ãñõ–₨Gaæ>; g’ÉH •À¦5Í_#HŽË]j Þ°iQa[¾hÚ^âv$e¢]¢®]£:ï:ME®¢Kñ¢3QíÀµë§)š§ Eý;&JÎ(+O¯?‡²³È(høñÕ £}nT]9 µF¸ñhƒƒÖJÉe¸µZ¤V¾ÁèÛ\)•£QÝû¶ºqñLØíR/E{ v°å«ã-ŽI¯^ó<Ú—ôˆT2²ÂAIo²F,¯¨Vâ=—ŒˆÀ~¸q">a)C•5+à!àn|ý†=Ài,fŠs&z#­ G‰¨KÈN¦„W\ŽI\ ³ƒÒ9œ``;­t4Hì´Â#4Ot„i ‘xüI•9—X1IØðð<4ŸÜNÇWÓÛéãWdaÒ¦ äÐäñ5.):Æ#©°JYê—•ÃD9Œ²­ÌA|¡349§^ KAi<ñÎfr?ƒö@J>1+3yØBbžï,ún$¿ÃÞþÉ’©©˜È…™Ïv ¬À©`Û;ðÝë2P¥}·ƒŸwñ3¼E‡ç )”F O± êfÞ—p°}º}Üc÷t:çH)Þå£!O-2ó¡ÁÔìš3)A¯(­\ÆÈ¼ôÂqQr9¹i`šËÍ*ĆEV/ »À 'Êmf#ú<^95é¤Fð>2è; ç-¾¤†ãOÖ÷7¨.P.,²—Üq1ŠaPÞ^¯%¦…‡ vÑ$n´AME˜E‰¾¢­Ýa ³wÑ®ÚG»ÊN :P·2µOÙÂPõýø4¿×6£2Nd4Ùw…4¹)lŸ\Ç©¤mÑ´¥G# ›.Øý̬SrõDEì"ƒ°å(þÃUá\¡¨eQiâ©Öt*Hmi”&¾F‹B¼žØYÝ£+YLÝ©È× ÄiD‹‚ñçâÜ 1a05¹ª2¹10èKQü¨é?öÅÃtaÑ[õÇþ8?ÌÙŽð-)¨ß’Mœ¶º¿Âά‰ò×à´A–‘ø ¼ÈeF.ÆÅE9NdŸJŒ’˜ä\4—œë£S,ÇÜØ,nÙöíóZ ¥íXKá†qaG³8·+ZÀêøo`/W7ü7 …V¡\ÜXÉÌdêWØ}ýM÷»/Hï>é”T2°ï`DüŠÆÐgݪàäÁ;ú³Fœ\ËÆÉår(VÆÉ?f÷-ÉÉÃ>ñë>:7sN>Àœ‹‹úf?ìðë#JŒƒ’`ûá;ú3 ¶ßÏÆö•g·p$ìãf!C÷ÿM–ò/ÿò/+ãä?M,Éœ§Þ ¸Tp©ªÏÓ…á°T''¸µ˜°ŸW·tþ½µôžÌX ÷yü«J¬y˜5k?Bg¯Œ5ˆÞZž51òl-âMüsŸ˜ó(sîÿݽ2æü!zËœ¨m*—"î*pÚ dÅ¡P1 ™¸l2——O»Cd´ÅU¹5¡œT"› lT< À=fHFrPPqQãC£¼‰pÃòKô¾õލúÛù‰Ûæx”„E`ˆO¤>S—©5 “žúð(‘š5zÙ'û’ÉéšÂ¬éU¦„Ž"h„–“}K¾†ñ¼`õdŒ„X7CïËpQ|Øê hÖE¯u,¿|¨2êà²!C.ŸÿV‘lQ8t«?íAµÇ8¼Jùñv×§²5w¦‡FpÛÚ.ª¾w;8—¡B åø²)–µF y¦…¬¯¸øVê‹Þig´Çiã彯h¯Æ×íáÛÈÊÓ;ˆ—UŠÃÉ…ýv‚¢x‰&úêޤpØ¢º¢R¶x¼­. 0ññ%Eì;©_v†4N8ê°¼žo7qDñ\a€û{[9Å Ný0hÄOܰ"û9ø ‡;‡R[º£)•"߉™ PHªøÏGp7ÆR|Ðw)‚ŸòHQCݸ¤¹«…€ö"ú‡bà7ÆjTtÙ©B¶¬²§Þë÷¬¥øêý„`fWÿÕ,mŽ˜ÙŽ‚¢,¸Y%²¨týééít<ÿʤŠÔñ0*†ˆ)ß ³«Ó§T¤ÉàNÊr^"¬2˜”üLîFMéj÷—ä|a}{Ûª#£Ýw’%¸§asR‹ZŸ!”fºãbèŸxâöb´¹-ñÈ7Ñ^Âwê´è`é§ØîªnŠMÅI1Ÿ·/É*80êføØèH$ü á¤ý±ã B.l`\;øw~ ?p…TCßárEðìýAWLA*Ì«ÚyÿÁXÞùå}„ÑQø¸î(ÞbB¼ñƒíÁÿPJW DêýSü„3ìg™kzáB?°O)ÜÑZPÈÉeSÒO˜õ8•9tƒ"ðò´6×bѳc3§ivACÓ´U×I³ƒ±ö6RûÅ×ÕìdJ^¤Ðf¹ý&ýDÕDÉŠ|½M›Ðð€A·þÝ9ކå/ï•ÅáåõVoØôr@äY³%¤¦ÀŽ~fG ¹5‘X,>Œi®¶D=Ç“(Z Nø)bŒ2¹D¨æ¯/†½¡‹X%ÈX€† ²n[zy‰‰¢”¢÷M>CÓ5Žákž„ *Ð)ºD«gôµÂo(?ƒá÷Ø7½×eDÆnÁ#D¥@a•£7¢+Ú]Z'e]‰³ìlkX\eå.[r#ø³Ñ Èwø•x¶Þ­B¼‘æäÄ̲QÂ@O–ԨʛÍÎ/¥KB¼œ4‚à¡ÌäÜŸL7ãwdÏA‚Ýžª=¦FȱÃkÁ¦—|ÃpF¸G!.`w3DŽß³»éýôîéN¸LN?‚L>»KÜ5èÑXö¤êÒýÊ@–¹‘<_×Ò}rT ±¶òñ'q¬~›>pMl+â'÷³§OŸíÍÆêNÕÅñØuAÇN’“ù ×Ûë´¸;X—>%#ŸNî'sX!ǘÃFó«ïÅæ »z'Â/k@®ÖCü`Ò‚‰'ð-ñ£K‚Jc­»Dêc!&]†Ò‡ !>«Í´#˜hM>Ï*&Š6¢kÛÍŸ1±i6¡„ÒÒZßÛ²1#“ *Hà3zÂ÷fÃ9œ 02Å#$*{è[¶(Ý]vZ1TDsvÿ{1ÆùÀx×A*åÈQÑßͭ䈰Ð0гNhxºÄ¥æw½ŽiÚ2ë<—Å™²ÈâLUÅ6Rõ÷\~0y]ªOtÅlS› žŽ¼T–§ßÞéul!Ƶiùûäúq6g Ó’?áäTT±G¢[Ÿ&[êÌĦu$öÞ¥— ¥°Ý`v¾óÄ'ª¹³ð ÜÁ%/p±ãgÝÛ–u'J)XÕÎy…+„³0b‚`‚Q0õBáf°t<‘ p§Û±UÏB§zähhMÊĹ‹'źi½Œ^ƒÂ‘3¢+5c\§ÿFø16å¥í11óÈÊÌáãÜrbO {ãI™ÜIÒY;|Pû¨Œñy—óy†RR·®ØCb<üàrµ»_ãß'lòûdþ]¡@ĺš|D± Îy·"rJ ‘O…çp äF’¸;°/ÄáC;“}öóç¼3 ©/-Ù%%»LIV§ÒTWtÛ£E I+%9ãº=îÿ‡ÊÁ6ŸýAÉšÞvYuìAµóÒ)ŽB5t>¤ü!$øÃ‡àòÒ“FŒ¯‚iD4h8—yÛ£E?èŒÒÛ6 Û†ªHѶÊ*Ú6m¥ÎÏ&ÌCÅ'Ã4³é \éð Ç#ñ"[”µÆÃ0 /3ÚÌhæÔë>\! Øç€è…•§Ë^Q΃¨j´ âÚß±j ù)?ÉPt%Yºå=¢¼ ÞcR‡ñ cU’mØRBiŒP2ói©]“VSqÇÊù.ÓQL>täçÈ­ ½ÚjÓªÅÙÔ@ÿHÚž–wßW'Ù9g™ÔS‹Í08uQ¾´c‹M¬\“ú`ÙI}˜{R-9©ç›Õ‚'žkZ—ŸmZ—œi]É>­ýĈöG.@ È¥[R (²9…lK)tó‰SÃö ¨—ؤ‰B vz—‰ò)ÐzVˆ.l{M7)AF™2k±‰(ªnuyÅ@ÅwV®]Dõ¤YYIÒø‘éœ(¨PjEÌüFÓ,xžÃ'Œ•£}²Ÿ›F!üT ¥“­WÜÇ6Ⱥ/!kÇ%}`aø4øNã[X®(ÚðâËd.Wœóé¥ÈE!•ÖÐÊs¾y³jÎßñÔŸMq{˜Œùõdÿ2}ü̺{·“½›ÍÓKéQRÿyd™g5Ë…æÂTû¦Tz’ƒDýýaÉñò¨dy„~mú×±gô“W ŸñÈœqQÁÑÃñÕ.GæÚ)Ѱ‹(ˆì®G¿Õ²ÖìA2».²xRVJ’XÒUìÅhò…`7á³xúìÉh…ÊP˜NáÖà¿‘BÏ®ìÕæºE—r@±‰¶Ö/”…ZÝ ´Žh6Ž ©ùÈDÎä:S.òK¡;-ô¼!o°Ðµæ‡êðyëlq'‘:¹í£p‹ÛÜ`[÷&j\¹³ »{Ùå~‚J!ÜeP¸b)»lp9x×jU§“¸¥…nÍ¿ñ[¡;0Ñ®”éÍÝv¢JcDã‘78zšêqtŽw=q§M³÷Pä±!¼1ÉRz,5ß&<¾z§-òÑ»è÷¡áD¦ûiž¡[|è“n©J+Uú9³f½ óǽEeñ!ÝxA-}Ñãr!ï¢ÐA²¦úyqêóh\«E¼Ÿ¤Èª°Wè3ZðOª{q'8”,± x¢âkX˜ÿ€Šr<ðåô£bÂk¾ßf{I§ÔTÒÊ™HcŠðk=«BQ%•#ï4áB Ýþk½Ó>æ¿ç‹hã^kèæá´VþÑÖx05 ¤ÅPÛø®…ÏÆwm㻦fÛø®­Äwmy·5ø_õUÓ]Ò“BwP;Ïîö®Þ<Çï?´ä\‘\¾ëÀƒ?]þgø>ñ:¼’ý‡p?SÝÍÐ m œÑxHÏ&íSË»¡ýh—U8 }õs@ãiø¤0¦yX±“Zx€§“Z©MÐjI”<¢Óë%|Ú_Ÿ6E9í´¢&ï¸[XÜJ:„ýÁÒl¸ç¡#X<<•Áðvntì28nå÷ÚJ£Wøk%<µLäò´=±ñ*ÞYv׬ ~Y©NYf,;› ÚÈ] „‚î¦È¯h(§÷ÅÍËÐ?šÛWÌì¢ûH•ãµàv˜æ…šMLGKÌ¥Õ©)ÊŠ|—.&ó„Ó‘Íq)™¿3ûbÊo1‘®Û§Ë¤Aêåpj£ÿ}\ÚÒ]<üŽ’NGF^‡£4o£4W#O£„›‘¯—ÁR=ŒðX‘ê]dq-òñ,r;–yùù9%¼‰z_&_O¢,nDÙ|ˆ²9åñòvJçUq ¿é8î3¤1­o!~þBÞÎBKz yû/Й hRÎVfÂS=ÎÉÇ•ÂÓ•'£O&'쇟ûN6ßLŽ;>DúºìøùëdsÖÉä©ãÙOì:™ Ì™\+Ì~•l~M.š˜rü$²xHø»GßT)D¾qrSä'ýÑ}k2¹7äÁÌ®`43zYE£ynÐôR^|ÄÆ›ä?àªhÔE1ÿ‚Ë®#[ìY dt”JLèoj.¿[Ù\Î;=@²«rPÊêd½æ8%_&ЧÐD”H4Ÿ^Z‘;’æ‹$Uª&Þ•«ìëS”mV–28h™½tG$o—Ýý(£¿æ'‘Å×@óð÷3Èêdt¦Èà`àç]¯"éYÙ›7$2‹‹€—@Â9ÄÏ9 —ð¢N‚sî´“…ÿ³Ï›œ.x9ýï–r¾Ëíy—Óí.ç¼Éép—iâ¸\í²ÏœõOŒîr¹„¾o`ÿÈÇÐûyúàyúpýü|ø=mG™ÙC¬ótKsñò óv Ëì–Û%,§?X>g0/ÝTf7°5|èÌàú•ÝïKuúÚêEæ4éòzqe˜¨º¿—æÂ•¡û½é ñôôÊïæµ/?¯LÞ]~L˜î×åpêâžZ~“Áí¥•ê¢•æŸåtβ{fYܲL>Y ‡¬óÕó;_UJIß7ž` ÎWÝùɤÔOÃUú^U-ÕŸÛ)H«Þ,o{¹~5 ZåNO¯¤·ÜÆÓkãéµñôÒªüxzI®È5ªþ.ÞÎT/ªïÆ—+Ow$|×¼ˆÑ(?¯3™Ï h•gq&‹¹‡YK±ºqõMxìÑ-lØ'·°aƒÿzÇõüËð¾üx¾bEà#¹gp£l(ð핞ÉwÌì/¦²£à4S/bö3_Â',rS¼½ìù“n`0I|ò%]Âþ&³9Fi]îZ™ý²šÇøcˆ?.m®YPâ¶wÍë™­ŽÛuXÒðç ŒuÊcO38bE„‡&”H«zeQǽ½L¤Q/þx®[Cẅûý¦ÝCYkš´qÐ+ÚfèÃ`ãÞååÞuÉý»ˆõ†â¶1úã^ôVáø5ÿ6¿ýô ?.?·¯ƒäy^guû¢ïБééýs2ŸI˜8 ?4CÂÑUÁÀ=Mqô7‰B•q]Ñåß8–mË´vn˸cÙeªgÙ%º–]þLi¿ç²Ø÷Â/«9d>w¡‡Ksø3¥ß\7¦“êÕ<¶0”H’"ïäcìäãŸ)‡õÎ)VÀ éÝŒÍEG}y?}4µ‚¥à”¢˜dWGQN‹¾¯[#<\ R]¹l/}íGù„N™<ù#<ÓR¯Zc^^üñô}äO¶+ט׭kl•ÞüqúDz•à¾ùzOòǡR$ÍïIÉŸTà‚Ÿ?œ‡3œ§C—»›ç½Yt‚†TÊØÐ@·ß\ŠÓœîdæv…3úÁ]ÌRa/pºÇ˜¥fô}3º‹¥¦y½Ùù%£—Î±š¥Ÿ ¢H»ÎÝï[éÓéûð/“á8›áº%y¼6TlÄýQ¥D¨×êÈŸžáú¦?‘KYrÏ¿•14x‹åÃóüÃø=¸…%‡ñŇÏà¶Üô67Ì6UÓ¼Ø,Ìk_˜›…ù»_˜W9†›…ÙLíæUOÁÕ,Ìi> +ñÞÍ些Ëo×¥ŸŽŽNí“UøêæpÔÕ¼t}ïÔ¼§¤ŽÎºiYª^ÞÀþûò~‰yœ3ùDåw_ôð†Šnt… 3¬D›¢ÈYjˆhÌâ‚™y_×o®K÷Múbge˜ƒu3Ìá:Æí;÷m0ÌQ†ñ• –Y1r f§ÿÁÜÿ3Kà‚ÔMržén~>Ùóy;æquÌá瘮TÍæá¸Á)•6¿©™®É]¯K¤Ï¹ýí¢×ZÎ%’ƒiÿ&Q†²ºD✗HOeMjßÏîž'lüð0Ÿý1½#Ø2¢Qfóé§)bí¸‘/}LDtK1}S3J$-^ø7Ù¸v2t•÷úÊŸd]†R*~Ã"“Ší¾:æ}å¥'+ÿ-/+xaV>©£¦ñrêÞ—Óû×Çõ×ǹ×ß³×cýMñéMuèõYÿî¼êú¿qÔÝ8ê¾£nBù—˜Xq…îê¼tÓë6è“W祛^½ZíÆKw㥻ñÒý¼t+òLåî±âŠEá}ZWdñ†~ÈåkªDóËõ„Ù!7‹·­«Y©Î¶/çU[ñ𪭮ӫV¹™q5~µ‰k­åØ\oOA ÙøÔ~£>µ‚UTÏÚ?SmÞ[Wàa›pžµg.Z\Ý}ŠKúê.{Ÿb®K»¶Ë•;í]€—'žY/NÄkÍ»Ås×^1ÿ±Û¥—;ñªÎ»±‹{Ãúë=™†íu|gÞ²M\à‹šÓìòޝð£Ýû.½_­Nª<…Ÿëhäó¹Šëúʾ^•Ñ÷è„Þí~/}4»&ï!<å¶»¤pH*¼ºR ‡>8ËJ1å^”»lÉÝîq<š£|…ÓõÔù½"¹b7Aзã¶SÍÙÜ 5_½‹þ^×âUH.­‡ 4? „ûO7å®8‡×ÓÙ'Åǧ«øøhþ;.+È\]¯ëÎptºno`é®ÛIÇì›ÓMõÍqxI¥ÞoÖN¿ÜÌx³ÙN"Y†;ÂÒüâVw#˜ó"°åo¯ùx^¡ì¥ŽëhU·×úÜ"gºB.9xìy]£áìj—7¦Œ&S®ŸÓ2 ª=œÓVä™Ö$6Šˆ ‡YêjÞw©yÞ¢æ6$¦ùùÞ™æy[š›ÍWÌFŽ×­gž÷¥’“zÇY¿5¤´ÎO)k ÚFüôd]4t*Z`ŸAôŽ Ï]5–htëÏœb± سå!Ëþxe«YÊšÃp#DºªÅÚê†_y”ííWÁÉ]ƒY–3ÿ›ƒ2^ã¼½!“§á=`lIÜ_ú5q¨\ÚUž@Ä$žÔêèD¦HÅ<d¸(ŒÏ:3)œ…~'Ƶpoä`BИ˜˜˜˜²Me®xarÓãu­A½Ç®g·Ow÷ìê«ø”æs‚SåøR:ÖPØY»ð‰f©p­T¬V„XÄåÈN Éms%9¾º!T¿,aðnƒëÚàº6¸®Ä³Áuq\WÓróÂüïÏjËÞ *œëÌxƒª•rëjyò¼áòÿ ¯Õ;6±øu °oЯQsA‚Ï ü~„å¯AøúüW„—øÝY°¯Ÿ–W}+yß( ¿Ï‰"KÃwãÇ,à1_ä˜6fuïŒÍ‘ä>¡5âÏ’ö¾ÞŒB×cˆqŒ0ž@¦­÷ŠXoÒ{…ª7¨ˆiÓ`QX' , ®;\1¶Î`Hyl]/'¶ÎŒåÄJ€7îµ[wоJB¯’i@LÊ„#–¨D– @­,@;côyb|Þ £´z™Ziè¬U@³RÂ…[¡Yn\–(Ë‘å088±X^@,–ƒ€ÔXÝÞh'ŸˆqùáP–«›­¥hZìçCHiÂCEïa ä)Yl†¨Z«™eêÕõwè;¯µÀh@ª%qdž ²Œá.}P®H!Žö.Ö2S@KOØXÌXfÀX&´˜Œ+{¼B;hÁ5θK÷$Ž+eoÆÇ¹ŽØ !{,0þa|¸*üWøK ;_Ya_Ù0_nÀWŃö…z­lqÐïÊíÊìÒÁ}ÖdÌéÊωBòc¦ÓÓa\FœY QÓbÌï8ò,Ùªþæiá¸ñ“q*ôëìÌN:˜âÌš" væÎ.+QböŠÜ±: UÔ <¬€”ló¯ßû!³‚bˆCåx€@ÎB8XÎl©X° ¡ýƒ Ž=C•XJ<_D V–sœ™×N–‹êrðmJ9Àu+AÖyÂê4Äš#œ¡eV4JR¹>ÇG¨WqPl¾íS¤ìrÖpt‡F@Ü%°vÝË…£ 2è^.€]\-t¹"ÅYï†Cú&"Ü·ðm |i>Œº†>nm+†ãÓZ(±{ﲄvËUƒ‚|©Øn50àþ,‡hÀò¸nà€ñg­pÀ"‹Ì ß)"0k¤7;h‚bÔ7‡ýê‡ÀFLñ§Ä–€°¼AþÉ„G~§c™ü0yÚ¥×K «?0‚°ú- Q]ñÃA)hãCøb)Hæ&@ߟ5@¡ÊsDè3&ËeÏÕÏM}l×;¸P£é…|;qÿRð¥lýoýÏ\žŸ­Üó:õÕcY¶pÌ ÓÉünN ²ehgúÁé0ãñ ³ÝR»ÔdEÕ2Ã<ãh¼[Ïh¤„”ûfƒ<ºn¤ÏKmAí·Ðg½‚~ëñ»ŠõhG {Mш¨œQýaÃQ ’1"]õnâG&Ÿï%~dêšç¦çÛŽ™†aD|oQ,¼ÏB2#m½ç‰"™16¤pÁÌÒ¢² ý(zÛòÏ«%é½}˜—¶Xo®©·]¨o&=²}BzjØoÕÌhK]VR§ÄÞÌ N…Œ³TÔ8ó §¼áã,WˆQ4b–u9õ¦whO%&© Lî[ ǃ8ðäÞôx‡õG•³¬Ä2`Ë™§ÄKLcæu_y&X»›Ùÿt—£×Hª8z¾h®i1\½pè,KTí(hk¬]JèVÓ›²9ÖkÅU¯n ê/U?(Ù‚§¦AÕ÷¿¨ºÆâqáºÀëš"L)û¾°o ì+˜k¡h×g·¾})hû~´Û…‡£w-©m‚—,²9¹.ÿ¾á¸ºùÓ5ȯ¨Zúϧé|rƒe@¡¤mâš÷"Ÿî¿Œï¡ÊÉ­Ñ-&ˆûšÏ>=‰ÊÂûŽMVñé .¾ÒOþù>ÎéçÕƒQÞz|Øú¡0õÐô*‚ÞZjÿâôÒY"%€rv¡ VŸ€†ù@|ϯ8äºúHb4Gi‚ˆ×p1Ÿ}nð0rÜ’Àû‰ºßñŒ[pß_ép…k7Z@u»…7ø¶/NNÎ}~nï…ÓtTçâ=]0=êâ‚E^öÚC ¿‰rá— M)rÅ3@ŽñmXtVhqòþêÀ-4xÇ v™ŸQÝC¸ÀOJM7½_È÷@éý£|/‡, !F˜a›ì*QÃûÔİd ¬ºß¦È–¤‘7?D*Ñâ ­Xt£-‡ÃM’:I “¼œz Éz nuÁãøPKßSKÄ÷§ô½á nÒaép ¤Â(?^µCž_òÂM›æ8•9Ú¨¤¬Ö¢£ª“ŠøŒ§ÈsNÞf¯ô%%|,*"µ VÑÐÎö^ºÆB!&AöshÙê¤ñ $I‡!ixF$†ò+Ï ÏKÿ6ú*vñÖ@"HRNÙl“øüJÙ DVšö¼"½é†J"Á!šül‘DP I ލ 0ùù¢1ÙŠw.·_uþ´*q‡˜%s%¡~ŠU!Œ¨]T’qFöŠÂ¼jª¤fÔˆÏÂ>¹2õ‰Í"Lj3m6»I³m¸ŠK¥z?­kÐÔUFBPä7ÜÃzñJTVþŠ.|3Ì*"¡l!|ƒßC1»âÆ1,á (´qU¨²}´*î†'£g°Dì ÛpTLûŽüdbÃð¡zÓ8~)¦BLö¤h6Û–B\LÞð&{.Ë2‹ÏJe(!$Þ£!ÊF’Èâ¹Cðèu’JP4ªÃÐKéÓÜpmY¼!D²d¾U+R§[kXêƒü+—A;ŸÔÍ‹ŽMN…«ØT0®dí–Æ”©p~™hÁIÎÎ&dÂ…•Í‹ïÄ9÷fÂôF¶ö›:Ô´sœ7»œm2ùµ+Z惢uq·Ì±ê*æ˜q+Õª©-7Çä¶sN¶Zj•û«™l¦ 78ðœl±Éö˜lxje‘+¡ Ý&Iä݉L»†q°¬åYVíô¡M¦_†ùHc‚ïœâ??i@§ºOiÚ\‘ªØI¶00IYúP{%4!q ]*HQù™ª %Ùb—(†ÊÑ0\q£5¬£ØÜN&­k‚8Vs0?ÖýUªN ·Pðp ¤ÿ¼žÝ=<=JE™·7*EùØUŠae™Ô€bÈùËA“§4cxŽÐû"R òѹìÑ7º¿Š$õ ñá*t§þûƒø=’¿‡üƒ•…ÉÇGkh mÒï·ƧQ‡wþa!€æsü‡‰S¤cïBìŸåCSÿŸþÐ[ˇ•’®¬Îö[C¾Ý„7¥VMxžôâke·£¯ê=ü[žÚõ ¢«¾’IeVmÑ Á°<óB=Ep£®'…¹±ZJ$“ŽšGæšÑùµ«×O â5FÕ|&T~ÇU]OwCmü›‹ôŠ·6‘€mTgùSÊ‹jkÚ¼uqQÔ‚¿^ŸÆü/8­îXäi·õÂá»\Õ ¸}C-§­= LÑ18 QÔñb¸àu;›=ôu ¢Ê¢ÈÐgTÊfãù„Ùíd ÂÖçñíG6¾¾~B+Ih¢*6&ä)ÒûkÒ-1p´2ºg˜1WGaƒ5Ïm¡…n@É2±¨ØýOŠrDAÊø:f¼«¶ŒµyímÓlÜ×&#å¨ XÆû/ ˜ÁA:­ccaíë´6| +õÛz'Ì8aõfó²_¶B„ Av¢§õñ7Q‰A/(J‘D1vå ÈʧÁáÞ¾‚çÓG O'—½&⎸SZaûn+p ¬ÿeÆ œË§7 MAèûÞös€AFíôó ßý¶¯ûgùmîÞ©t ¯¯HŠXf‘˜¤s_¡ø5Ę́\‰-HNæÎª¦CVð^žyHâîyr?}œŽo§ÿĶý÷ñ|ŠÅ„!,áéâ¼<ݰ/S8œ^M`®[òV1Ï¡1O¹fÉS3&¢*šØŠ§4áqÆ®æ“ñoìéAÍsõ•-~›>< 2.q3yëï™Y§Îm7¥uJvº¡ÿÄõºT*íÀjø: ‡4eèÛM<é׿¾Ø°É[eôñLÌ7ä:~ ­aXU°…5˜ §ÇZvÜ>Já 7’kÙ7±å4•`;¼g'µ:÷jŒÜ}‰)õÎüË_þÒltêƒ×Ø1ÀJ¯¯ÿ¾¸º?Vö>þe…°Ô~µÊð÷Áþ>ý.•ùßð¡V)_A¥º_®Ö*+µZ©öVZ%¶Æv<Rþñt;8ÒA²®^¡Æ”Xøû;y83¼ë_œöë]˜oú°û´É‚ËO‚]ù ª¨3g™ïº×¶›ùô÷ Å ³ÅgTב áãôv²G>Nç2¨°ùäz6¿Y`AÂN‹‰èp ëà0@Ü‚ ²{3óéôþá鑯©·¨7ÜWK‰å¤Aš?²yɱNÄAV¿‡Mâ~†S N¸ŒQ;Iq™‰R,ˆè¸ßc{fWcéQsçáǰhjGÃîÌÇ×d‰ý8Y ÞQ³ÛÛÙZµ ÷“ÅÏøõïø½A¯þŽë·Ðó“]œ  è»ÁÛ‹!»¸¾»²“v‡ƒ»‹=„wjFÌÁ =øt‰¾€í†V%V¢3àI8ŽRGÐA÷‰!.æ"Jš^PýÍÄ­zó-¼o^ô÷ÒŠ /Þ¡ :¾]rÙxÄ‘ï¸Î¸x†ð4\ “¨’߇•šC~Ù¸RÀø`‹òÜkû¢sŒ¾a—ÔבǦÜw°Ò¹”e€ŸH‚å ªª°;Ic *^eñ`aXЈÆöÒÞA¢¼0‘_™hõP‹ ö*‰"eškŒÔü÷ŧÉÝŽ±¹öx'îñXN¼ßù°ˆ,‹¯™³<ÎsdYd'lþ[ö,åß¼²Ýdòs¸ÓÏî?¡2ŒíÁÿEyìˆþü"³GòèÛñÝÝxN¡VR²¨Sˆt<ÇÉuuŒb}V ”n?M® øÁÓ•X`1c8¹þ|údÖÜÍð÷tÇz³½ÃCVèO~Ÿ.¦(ÏlÃÉa ‹¬ÓëÒ¡Û·æ «LI¬>žòß-Ø)ÈŽ¬óx#s p‹†Í#›šc8öðåb„æô(ì~'„Oû®‹NzƒËÆÀY&ÐJYÿÍ>vj#ß?3±<2Ë~ÍYHûX^îÃ7ZiÅR4¤É\xJ•9ñsNÀNOK½æÈVpBìô‹‰è ,¯'R.#bɧSù £:¸– åEü–xÇoà²W1 ©1Mµ¯^‡dC[Õ…7s#™N» )éÔYQÞ¿Äûßã¾—äµHðr Ô»…äJ2oì²${ÆF#V-ŒbAò’3cü>¦fì¢'{Öf3ž5Q–Ï}JÊP¸lG½Ãy’šÎt+r\k¶S8a%TÎÁÙ¼Xñ´ë 'Tƒ›|\1/]Ïc¨ÞûnžæÛó H¿ÊüW…ÿªò_5¡ |pÍ·A·«Þs´¼»íšnòá ÃÉÐ-]EKºüU?ö TWóu¼Ê,Wqö{½¥ŠÂh¼TåÜ-Ç€m©Ò­’‘D×­â–*–ú”C¨×Eá…>ØQh¶Á½@¨«ðô{Ò‰O÷ÓGyïëO(Ï|eÀÔ ŠÓ6ªŒ@Ø¡zX6̳ãQ8ìd‡B+…0$Ó{%È6þ†ö1T*JÝŸN\×0³‹w­^@ŸoøVShX^¢Ô²Õk½ßÒ0h¶La"©LŽ:‡Nî·c<ÂchiÒ+ ¢ Nê7(Ÿ MÄâ~ü°ø<{Ýbo4î­áë÷mR~¾çŽàüRÀÔº3®Ý¡^â+û8žÞ>Í•òMÜ^4¶åšì;_>OàÕ\àØ0Š8·Þ£²yBZÒÉ«&.d©i>û²{7þÔFzŸÛñW`7ïú„d«ïñó|£q{é¿ékÔ[6ÏÕ.í¨e"Ó_‡Ás¤¼®†\‘å+Ç®_CÓCÏ5 ”û±(:þz˜Îü/¼oJì£>1Â@"¥¸˜¸/ðëÌ®?£1û„DRÎn+Ýw( Ý\ED] 娈{ÅUS„à—7ª©£B’ÂÂQ›·y¨©ÃD…< þ¤ºœ}zȶ1IØnå ƒÛéøjz;}üÊÁØp†¼ÉãknFT?L8‘WÊRe¢Š0–PJG–Àõ¨, c‘sD[Æ6/21®³"ᕦ¿Â×gñ×M*>ò AêcÕˆ˜É6¡.H”¹S Ÿp"aç o…&hßí„>ãð¶òÿ“¨šM†‚6w£å/Ü]ØÅ= ¸•“OsÈS‹ÌÚUÄÐd6¥´ªà Nǹ-õ«†ö•ˆ ß ŠCÕ"©Lù=Œ<´{CxŒŽØzßcy¡cã:îðø&÷2n6M…ð$ïãXLwq ù@iTp/¹ÛÓ ) ’Ay‘ ‡[ž3ËÈšûÄš‡ÙX³ö]tsÔ§ñ¾{ÿ +⟸²±ÞOaÌIÆ¢ë 3°Ü|( Ë PNªÆÍGÙá,Š™r2J‡¹€š­€¤îÍlèȺÃ0£É@#PPQãC»¼‰`“òKD‡šÔIì$®JÓ®ý‚Vî#ø·Nø S-8v›..3fÂø=z–’­bk®xà 5t@7—oõj«~:"„P<Þ)$®í2Ü[Öou ÷¯-Ö­7ßbÒwýV³=@_R¤Ž.‚Ñm´ ˆàLÒ£½Àp™É߆®­‚wZl£¯,:í·êCº M'–RÐ¥W†ëÃBߤž^bI-1Q‚z‹ï5ôn¢ûÃâî{­_[t3éÔ\éõ¥;‰yÑé\¼o÷Nô¼hóû6¬n9—…טUµkÌȇF_/ ®£ö~â"1™š‡pSSœì‡cPN¤G4 ³¦W9ˆ‡I@üHØ@ÈÜî5¡_è–µ~}Äí¿A³Ú=éë åÊ8ÞµõL«‹^ë8~ œ:— écIž}á }è—‡Žt§=¨öX¿’.ÆÊUß¡:¸û4‚[ävQeµÛAV?† ”ã˦˜õŽk ‡3qMÎzß¿èvF{œ6^]äÛ¾Æ;ó"Ý>¶+~Q.;‘ šR`Q´‘«2#»xÏ3^G/ãäð(u8­ƒò^ 'h¾…¹Ñ…¡6<†éS÷¤N÷äA ;'Úkó[àâvÇ_ÞìÐßÓG Þãæ«ãˆÖÜŽ˜Ž‡Z°1>îØƒ8öÚRxM} Ux%d‚ššÀe-…Kªq.¡›òÄø¡‰Ÿà8Në^¯Èàã9æ«;]qeëàšFÞàÅòkÐúH¯š‰ß^L#ÖA¦î È]ñ_ÿÿBc0šuv®ï]v:á=¦ÜzŠ»¿0©¼eFJ5ÉÄo‡÷`èßV}¤Ì¾2ÛHZCNxz™™ÝÈŠ"ä–Ü“ž`j¢dE¾þdMaì£;Š+ð [»SXXþò>_aQY|¹€-^oõ†ÍlîSÈçh“¯È/Ô@Gap*ÕÈ˧¶Dê)6~ºIøu¯ üVÏ»B%À¢|ˆ èó1ŽqY{1Ã:Æ€Ù&þ€Ýá%zŸÇ÷ìnz?½{ºn2Ó £Ìît34æzc¸I¾GWuéüŸ±LøH!¿M¸£ V‡¨ƒÉýìéÓgCͲ–S[-2²”nk‡EŽ×½£+ÛpñA`^0Iž(]ú¤ö0 AŸç¶>†ç<êæs}„Â"ð‚˜f—Q‰¿g â©(Hú3¥7ÝIKW Ôãw Ôå% *Xë¾îº9²‹…Ÿ›ßQʼnö3ëÕN²8SYœ©*Ì‚?œmë9Ø–?˜º–Î×vVKÀŸqNúó³^¯•«ùCYk$ÕI;½z(Gkyåí±Îoß3²ˆ{ÑMå[§¨ÕÚbòG·Î7øÄMåvñ€±¿ ܸ©Vq‡c³aŸ!LÁ¦‰IÒø™²˜&‰HÝó¤Ÿ' ׃Ʉ}¡¨#¨V¹¡ñES]LoÔ™µHë$¡I£… :°ò Ê#c9ÊØ>+Ü3“îtîY"}•;MQzçHŠô5JNéû™H¿/n-£ ©»›ÈtÀ½…Ñ£…`uÚêÜ#²ò¬°Ka…)Û”ÈsU×h”Š}ªƒ­”××õ97™‰³EƒwJÚþCñ)P…ã3Ùd å¨IdÊkÒÖån’óœf)žîÌQ$P!|¼—Az¢ÌEžñšlu1&M1&)gû#Ÿì¸*Ë œüE6§p®"0tÀ|¢{4aÓO_ÓtW~ÔÀ¦ (™Ü¼5&:£ºÎ†ëy”#u×d®×^‘‡fu ‘GsZóâû5n>t*óÓÉjLwä×oG´æpø%J)*Ãt‹ß4ÅQšÈPÁ•‘!Ñšœ¡œTGGFne·4r¯É~?fH>0å?ÍÐadæ¯ìŠ"]-¾Lær®+A€mœînC$§Ï”ÊÌ—5ÓÓo(ÜO$IÜ`hyX²½9J\þCÖR)ù]`¸9T0ŠÏq§& #êÕ8!qËÆ “¸ cX‰Dpq@é5­Ùkö ™]ß ½ )+%ŬÞYªØ‹Ñ¶FÁE†kp·ãp 1®ê‰ã;'¾Äj”dqÓ)/qŽ#:“èßðÎÞú£"¢Öi!"œ!"—±N6© Ä`±¡û(´`CÛ:Æ­q9ä >èâv÷²ËáµJ!i+дXÊ.\ðâM[[èÖ`Áß Ý‰nD §w6GçE•ƈFQ7à£Mõ8:Ç»ž8ÖÙ Œ°UÄLða, oˆÉÞ)ÞÑk^ôûÐQìºo¨2l"^ýÙ/z|¦ñ*C°pMƒGR›~ó¶¶b?èè4V`eAP(Ô· c^ÝÃd¸õcÓèìþáç-%I¼±Í0Ùöž ÑlîÒL)äÐÜ_ëö1oôŽ;jY"~RÇXîÐüoi™qÉrêlRÖ-±€Çñ QݲÒpŒŠÁS‰\æb¨)Q¥r.Lƒü‚ˆkëyô²Ê¿èS¬ˆz±9”„»0+(6öÞVln’uc >`@ÞÞl8áIÑc¼if{[ ¢Œd ‰´”®9»ÿ‡žVÝøÂZ6º¾t7lt72ÐÝÈ@·*Äêì7©N*¨?ûÒű¶:Õ´ÍÙ­ž6"/r˜–Ïw³T·=vøŸ|Ö4üPËPòS|Å)— Œ¬Ë›œæÊ*gXÙdgWÅ­ü;ÃqÛ•ÊF 7+ع7@nžcäÞ¹WäàËÄÝ!í¾iÛ{À½­€³íåHÔv b»Ó:Á©ÇmÛ2`ñö1báç@Þ?‰á¤h7ßRg¼½üóCµUˆ6îJ¸½l Ú)PíÁë­Në5ç‘×[—|°í²üb Øv)lûhƒÚŽ£¶–m'û)ÌQ  -p`íPÆÌýä.šû)‚)ÆúÀ œ˜˜S‰*àк“r?àL_þ)– ÷ìE˜Psð3¥M:p÷Ac¤¦“hǀߥ~Ät6û˜Än±òRâD)fˆíRàtŽ+ï%LÞ>°t'&=ˆÖnƒ¢Çpè±·ã}iaxP0à\p‰\pù3%7ãw4öÆË–&Àá×YÐíƒÑ–”Hˆß{ë„·{¡o½¡·p·@·™·þp[K<×ä“ kë´]eë‡$éV¬KÖD$õB"ümM”‚©MÔú`e³eýQ²©è|lp¬?2Öƒ²TL¬ 6 Ö ëA¼6#Öþ™BËVð«²Éφ| æëEBYêÌÙ˜³5éˆW¸«Àºâ-ÂºÚšàµØ%®þÈÕ•ÀV3aVsVýHÂ>ØQ©8ÕU€T5„*-ìt/Œke×°©´Ìcç:Ÿ •š ’ªáQ½ð¥"5¸T¥z Ku(ª?¬T‡£z@JuªžÔLªcU3 IÓ`¤‰‚W€!M¦ GcÀV/è¨'nÔ‰%Kú!F³ÁEóaEóEs¡DS¥ŒøÐµ¬Í©´ùbB3BSýZÙ t”nƤ|‚u#”,hMh4³·!’f’9!C™A£wW—E‚›×jøˆSöwÚe©°ûœ0Üe1¸\ôm¶‰ŸÄÝ:A·vÄ­nkÂÚ&€¶PíóƒjË¥$¦™'Ȫ-ÛjÎÃb’c 4fCýnд4íMkFž• ˆžUq´«(«ð2Ú,HZ_†íŒ‚})4m9ަåŸãHÚ,ë”m‹]!„vUøÙˆöd oìì Ø¼)­–›]É9àQÐìL>>Ú€œaRûœ~úühÅŽb3¨ÆËD—Z1—+tTOÚ™}™˜‘£ß.ZU'VõùªåªVž ¨*O7…j-GE§Ê+eÌàS¼ˆ|jé:¢ uõ0Ô¢¼›¥bQùØ .?,uHÍH]ƒú‚@ÒLèÑæqÆ0¤g ŒÔÞt3‚´kŒéë²â¯6zÜ®ÃBˆ?ECª†þE_’´ìT¥ƒM©ÿBÈi<¥:xøãRùxMÚp6ÀÔ`ª£' E@aúã^ôÄg^̓X•W|gF¬ò¡<2Øÿ9™Ï$ºê°]„Á·€kº# 8¢¿b´~‚į—þ® c„ÐÐux¢ÜjÎèÇ•¥p´J|(:¸k+Q­w8¶i{–èǨ3[p3–bÛŒQµ&Øfœ?˜n“¢“çn&ÓJØ|sÈRƒýŠˆØ]l¦<.L$Âê›Çv2EB*x¹ÇXîñÏ”)%Ìé¡gôTfFqîØ“gÎ>ðrØq¬\jŠ€¬v±Ht¼k™‘z{\õNй/ý1lú÷¥Õ EÊ<¤,SÔW’³!%þv¥[EÈWÿ€®•eº.^Ô3 kuÙ€®ËDSÀ«k‚JÚÄQýL°$Ú¹ïî¶ü"îyQeª°R`9©2O*‡|IÌÍ”*PŸq^ÑT™w@UâLñäeÆ™Š¦ù.ŽŒ:Á×¹A}Ä*›%±2Ïböø½™ âvàXßÌ¥„*‰*ùÌÚÙÝÃÓã„æ³?¦wÎ@…öl>ý4½ߦÁuw^Ó{òK#‘« xžÅ)f©jTâ “W¬ï¢@º“y,K—Ô'Y%e—Ú«,4dK0O2ötìQ÷±gâ„5Íñ|«ôE¯¥¬Ñ9œ=øÃׇƷ¶>œ€œÿÍ/Ò4Í-Ñßóx­Ñ^‰þœ÷ý…ØÃô€ûGnß "ßò2°ùesÀýŠÁ7¨ºñ z ß }áT[Ö7ȡݵ+o5f7âœEœýýgПË3( ²n× Šþ„¾AÕÕúmËeè¥*~Ž??ÇŸ+‹Á¤7eöý W-“Û\Lí?úº™LŸÁý‡!¹‰¾ÜÇ"i¸­þ~@Î[–¾$ ÃY®Àa·|¬‘|y?Õ&™2ìQ5efo© ÎRÏç+ÅùÚ2y×á(UæŽRá{÷›Ê´Ìú¹HU¿ÏXþÖü–PþýóÕ&”?ϱñœÚ„òO„òo sÅòý°ÎC?¬3X¨ÎÎèB˜s—›ûkñP6Š[VÍ¿etëÒ/Ë^Žg8ÿ„GQ$|N ƒ{ (BdÌ×ã^•ˆ÷ßWÜ6nUF·*Í•j‰€ÿR¹lÀ³Ç‘ÍÝ(gÌ~{_Ó7º¢õ[Bõûz‘¸ƒô M^IÑãókÁù WœÛ\µÎ£¾;·ÆŽ·ù‹Ð"dr1¤“hxq´B¿§èï¦Ñ)‰²ÙLf]]cbYJ¢øµ¹1u¿7¦Ôèó1:|½lœ¾)yħx¤,ß”6m¹7}"¼ñ; Hïé§áå Ä…4¹òÙiw{îœ,+¾ïÐó(è³ôïå¥"¼ç OÿbÞ#Ó[ÕazK‰¿ž’·a&â]™)]qôöþyA§ÝD ×ÒP×ò=Em¯¬8j{ù[ŠÚž){9t7Dîi™*YB±s¿„sJl_@¿—¸í¸T®>œúÑš¢©éΊL Ä Þ+ ¦^ú~ƒ©W¾Ç`êù®VYw0uŸ%3•¶¼ÁÔÙõìöéîž]}ŸR€¿g(Ø®ÜÚŒôóÊ'õì,­~þt0ÝYJ:?ìå™OQ²ÊÛ…¶¿J%Ñ N˜ªuˆ=Dt(C³£¦8È*Ò„Ýj¬Qùž*²‡¹¹ˆ”­ œQc®’cŠ8pïö±áY>7?6KÆÕ®bð°ç‚üƒ§åþ!Ïw¢f[Æp=Cý@§×Jß*¥–¸ªèOŠgæ€Üá667lÐÌ=ËrÓyPs¨,i–Má”wf» #*ºZ`VD¢‡z#®ˆ‘èù úÏ„…+ÝÜ’°š±+-Vï·xç·—à†êì "¶’@1¯ñŽû½ ò QÕ•çEUWýPÕU;ªÚ Oí¡[öÀSóRîf™kTš“i¼‘Ô„¢IÂMfƒgöÄ3;—1H–NŽªcì`gO„qE {ܽw‰ñƒ×¾58qCÁÛÀÄì}vës*mφö?«7Òðö^þaÂ7®'«G€vÇ}ËSŸ÷áÁÄÿ(âyYjÎÙ+ÿÅ“hö ´þž«L\•g«X©ÜvQ|® )]åó¬{aløTÛµxÙÊyiÿ~Dˆ¾×¤ÕnÈ58wrŽÌÃgù'Â2Õ*U»/È9 z«:p7ÆG›*Wg+F1®ÎVP84®ÎVÆî¥ÂÔ-Ï«ç„ÈB¿^ –XEçýI]aª$ŽÚîÉä “Œýï üoúo ùoŠ÷Ÿö¿q…y¹ÀþÕ¹Â,}A@m®0qéæn€?ÌÆæ;ó‡Q^;|câöbN25?'™ÚÊdÌö ¬ž2»ÌÆ]Æ”öåÜetÆ~)ŸÖXû ‡} GïÂ{:´ËCâ +é°ˆµ:r’P”‡í-È)e|Ϻ¨*î©ð5 êqÒ4i•Eâé=ý5æž uù÷ ÇÝΟ®áô‚*èÿ|šÎ'tÖRI+Í ŠEöeúø™=Ýß?NnØäVÀj“G„ÎgŸžDeÂu»Më4m1 xkØßÚøÝ¬Þ麗WŠ$)Î7”ä‡c-µqzé,‘@9»PP+ÈïÓÃÙܶ¡ Y_Ü r ØtšN¶K^䣘¹ . È´K^ °#oØñðE±¸“4Z@i–™bhÃ0´÷´…W ]¼§ F]Ü–!óe¯=”îaõ.Cc@€\ë > øV)Ú‚¶‰DÈô+í‘°È…sÔU_Àú ^@+Ô°`ˆ[XÀ¼¦ÝfÛdÂTᆲ¿Þ§&î„%CŸ°¡M Âëu¼È["iÉ„…B,¥‘<È\Iš†9ÓAîxø†Bò„Þd.ÔÒ÷Ôñý)}oP™´…uåJ^³‚pmØf¯ôI§ÑÓÞëQ%ÜtÉâBʳ»I´ÀêÿÏÉ|fP@…º… ‘F×e›îK¥Wñ%Œ—ˆŒãÐŽòˆHB3¯^å{‰1üʳPeÖN¾¾ tݘÖ@"Hë"NÙl“øüJYóEVbd{^‘¾lP‘È jÄàül‘PHH@ê¨ Ê|w.7xmÍŒ0qõ4‡aêw<דã\Ih#cU¨D•ÛE(Ó^Q¸)VM•ÔLƒñ™²ÔÑ‚¤¬p†>:²ôQЉ÷“µ=â «ãvƒZylq‚•Í–™Å‡ó %¤“ÄÛb4ÍÛHY(؉Ràkä`´u““Vô§ñÓ'®F:ÕNO@‹4盄ܩÑ,?½î9l‹èµg;f„M: ó¡ûN˜²ÓaIoµùáV9ÐøöIÍÝ'Õ<}rN‡¥œ}Â3[ú¤j铚GŸÄ˜ÅÑ'î>Ù_ŽOÎèÓr|r&¾Ð$ÊXó OÝqäîŽÃåX$Gw$XDíŽCKw9º#&îKX÷ÒEûýNÔ*ÜøwšFuCØÞrhÝÞÑÛE.©¢Ia)TŽ€Ý$Îë,Z›Qª$)6GYŽKŸ§‹¢†ÑšXxe4S‹qË•¤ËÖîrÇ=AãkvTÞ:Ô$©ÎÈÿ3^,þz}št=àTJW‘'ßNÔ_ïà&\@‰ ª;mí¡$—¨MVT’I!Òæâúílö@2û-È‹"CgB)ÑçÜWo'cÑ?o?²ñõõj CígèQ;}„}÷–Ú^ˆŒ2z*˜5GawDÌ0zØCx IÌj»WFQŽqQb}åð/), ,Õ0é–D$8Iá!“÷WL€ƒBZÇÁÚÖi lø¶Þcoë8Là„Õ›ÍË~}Ø ¢Q¸ø"b¢À%^P”" 2!óçA‘•OƒÃ½}ñú 7ª#qà†Ã´éÀ-…ç“Ë^Q2Ü ­ }Xm;±hzS˜)_f\_È'Åô„_<ÒmN¶ž'[ûaÓdëç;ÑõÛâÖ?Ëožò>ñè¾ö}}aBRÄÄ$ÉøÊÉ/^IðF&¶Nò <"Q2R·ÈpR‡{B6k¡(ß” ßEÕÜ/ ý"\%iËbe1¹åG4UÓ›=“ÿ| gŽjE'û9*7“j“Jš™íZ•µÙŽ5U Æ­×€œ0;ç³)´‹¨1Ýâc´Œ2u‘°›c°øz¨ÞÆ%ƒBs('Cc8¨¹vˆS„²·¡&ƒñ*¯aÆdD1šK,¦ FŒJªc©¾öµ¼9­KQÁ,§¸E$1ã|q7o1‡ÒÚ'Úe» $6m4´>„»Œf·Áæãû›Ù»º»B•øÓýü»Û¯ìfºxœO¯žp£¹š<~™Àº[Ú«‘n~ov¥w+þ(犺E¶¢Ma@Ì(÷ þk+>a’¹SVr3†XJGy´ï§ÓñíôŸxîEêϧXÌBqlêâ>ÿƀߔØß§ß¥2ÿŸƒr©Ì‚ RÝ/Wk•€•‚Zm¿öVZ%¶ç öÎ9ò§ÛéÄ‘’}üèxÏÛÂÂßßÉó?¦÷×·O7öo‹Ç›élïóÿþ)úêúñëÃDÿ RçŸâßÍy´ïþz¾"{ŸÿÿêïÈjøýO¿Ï¦7ŒÇ™®0ùezÿqVd׳{X±¯?ÃBµƒ ¹þ Ê®EXwö¶ú/\B&„Yf˜—w;™Ï‹â³(>Ï~û%––ÏßDõñWÒ)fð8ÿ¤¾ã4ð‘6z‡ÿ^ïH±èö+Ôø÷¿W®?ÿ6á[ü5-ˆ¦±/‘—-€;ìÓíìJ‘¡TªÞÀÙ˜“ƒñ /½)Ôd¥=YØkÿoЦÿˆFžjˆÉ'slÛ „¶ñß¹ÐÂÁ±Ó9t´(k >Óš=»œChÑ`d<‚b'ŸÆ×_}u;½šqó£ì¯ñ×ô#+(„n ÿ÷_~’/£ždÿßuìý/„^ß=(/‹ÔsÛ˜¦´ý_bçxÆ{üXø+WÓhõ î=Yï ·#ù¦ú=Çêßþuñ¿Aª [õcvqÂÿ*ùÿ{ÿW^ƒäªíQd(„DSúþ¿Ã&¨¬Bò“¤Xb8CÉD7Šø…!œn'¼ïo¦°.@pÀ0ìwÔ¿¼ª¿šÜ™„nX;ì~Âe›õûÇÙííì "lý¿í±÷Ÿ'Zvdä»ÉÂlT’æôUþ>R¨¿5Ø2]f·ãëhä9Ï”²>cRu{ÀyO ö©ºIæBY ƒ$8𬔉EåEv{3Fú¡Ú‡ÙbŠâ†‚SøñˆÏ‹Û›+‘.*(Ìq¤f‚ðhÂ9S pü_ ±ÿ+6䯂ÉD9–<9=\TØ›7Wf¬úK”pr» péaª’°æJQÂ#W£¨Ä ÿû§Xj­q‹¯aãØÿûL{÷ù9î&óqn'>g!ó@©}ßEæ¾’ðÀL`•ß3RÝf²†Š‹’J”®êIÈÕ³bìº(“³kQº}ßžghW%Á²‹Œò¶Wë·ÄZ/’ZÛÿŦڼl†Ñ»Ú…o‹zA¿Aê7jjW-Ù¨hã×@d$beÛ’ñ~(þ_oÔíø_1<-û×…º%«ÒÿlØ—Q܃Òx÷–Í«eã~ù Z÷?¦o&IÖj‚”ÇEÔHò*ÈôÝb~?F$%Úù:á§ÿå'òq‘Ñ’áþfúäØxÝØ%Û/2eø;âßÿð_[×[Å­+øw ÿÆðoÿþÿþo)åÇÿ¢5º}ÿ8I‡U(²$õ üKüÝTÕšdÍ»™ëxq$ö;sÑtCñÎÁQ,7ÿ>ö êÈ| ì ”u&÷ÛZ‚\†ñ‘Êž§¡è%Æl›¯f¿O,R¬*ä)ߣ^L2 ”såìñ¦0}Sú cìß`²é«WÛ|§¯öq$g·ƒ ÀðïÓÿàŒÎsÂ.9ÿ7ìüð{ww›hâ¹ÿgíÀÿf8¼êÑäýƒÏι–˜†‰[LQIõÐù“ùGùØøæfAÀ`¼F¤˜ˆ2 Š‚·¤H+“(V‘g›…*ÿkJõ¾ Š|ýúë6κ—>ÿØO\ÿóÏ2ZŸQÿ”‚ýZRÿS*oô?Ïñ8õ?ù;‘fö[Ñ¢‘ùå§Ø"ÿͧ¿ü”T®,¿)ÿ’yWŽö ®¤úüd)àçmfÖ8¹ZêØØ˜[«©G¸¬ò†Bñ›%­6HW§õ,ºl¡8•/òUïbÈŽá;6F¡°EÚ¤¾Ì¦)±h[Ód©€‘ö4_M ‘"OP/ºþ#Y†Hõ³Í‹¼™=¡ŒÕÿ½L¢ :]Á¿ÿ.*áhØ×Ë‘ë%aªã ”÷ˆ®Æ×Öúòš!xÙdÿ©¼Ô=ú+z4ØÓ…VUx\m‡š=H#¶ÞÿÒFNr}D¥¢\£½˜§VgH”à)¢¾¢ýR˜'0DƒT΢FÀ¢rª=¶øXKÇ?é,] ‚í7oJÛ1º‰=Þ0-©Î“R ¢K) íEð*j±™Š¬DÿÇXŒ)^¡AÍ_tÕqò…Uð%aÛHHùmlµHŒ<-¤^+¨]+}•Üôé¨R»&R«+"µ¤P»&RVDjyýy´ºþô#²”ƒÈ ÿZÁ,/™¨ô›æ$ÿ´Q)á_jê{’÷ Ìü\”¾ÈÄÏKiùç}^ŸuÚ¯—ÈÔYÏB¡Up¹ò\®ì‚‹×rw•&Î| BÍUªPó-ˆ6*í ÜZ öpR .=×R[Á¥gÙHöWDp9AóËÊhž=¬³oƒ K—­ëÝ^V8Õò® ¶]&N«ßNx•*E~²¤Èof©ÍEï ®´yé}©…6½/¸Î.ѿϽÌ.Aês¯²ÏAê DùÏþ:ÈϤÏó9©™¤8õ//¿NQJ¾¨Øn"Ní¼u/~’d•ë;䮄¾5® fÇÒ®Mìúœ¢¼{IiËDÛËÎD>çÉAÞ³ÎµÑ·Š Ó_÷õyIÝ×ç¤î+jí‹ï™é:¯—Ý6m'Úø¢ðÒZ®4B¿#ž¡Á «‡÷\_‰ë›êë§vf¨•Ò¹3”¿8sµ¤8sµ:qff¨•J4ë4C­P¨Y§j…rÍ:ÍPëmÖi†ZµÐ°~3Ôª)^«jÕÄ®Ñ µR±lf¨Jfk4C­P8[£j=òÙÍP«}Ön†Z5Áë4C­šÖ•˜¡üEˇ%Eˇ•jÊÖ`†Zµšl f¨ÕêÈÖ`†Z­‚l f¨µiÇÖ`†ZƒÆiõf¨U«›Vo†Z­®iõf¨Õ*šVo†Z›–iõf¨5¨nòo®Ÿ&ó'Ø\K^Áó!é›+æ/Ä…ZîÜÅ=»F¡h³Ú°²X¯mÓ\šßxøI4Ñs)<£”šiÐz ”£v¿Â§vÞöR¦Ú=á¦êËkaAcØiY¹Ù¡uÕèIÂ:П„5ð_ÎÊWÄ~þµ—³ö{úb~í¿˜_/¹˜_¿Ðb~ý ,æ ^`1ÿÿÙ{Ó®Fr&QøùÚõ+ôöÜy0”1N/,ýLÝsŒ1`° c›®¢æÎécÀî›±M-}oÿ÷7BK¦2SR*Óé­ Ÿ*¼¤–P( E„T0,Ž™Gö>_fê~‘ÌüvùÌ<Â♹ „…1óÈÎçÊÌU½§ÍÌéM¿¶)e¡è ÌëkÓÏ,‚«S–ÌÕU0(÷à‹`ïQÀ,ˆÏÇcŽ ? ŽØk/šL•É3Kãü*B¤ `YŒ,ˆÅü„B©K‡g{éð<£txŽ)RÎA_4¤œjl¹®Pˆ€D³àÒ•q`ð–[ºéÁâ³át³€ÍÀƒSeÀ€¤M³‚Iѱ¥uKΦk^Û7žyhÂ&n—¸wkØŠÕ%ÃZûà1maÏCïP7S$µ6aâp)R]çó&‚øP¤KñûCѪœ-[{ž­=[³µ´U¸<-må- CK[m‹ÇÍ^‘¿zÈÿ›Åãf ã­õŒolòâäy<Nï3¿’ßv qw}÷´FºµN·C.ŽI­Ý¾ho×>Ô»ÿ3ü5+­rÖ ò‰`s[ìEþ}¬7 M^›öµü÷›ÌåU=lT:;Óþd:~Ú¹ýã¯"ଟ»M³|>¿[*|ßÛÝ¥ïùû å‚“'ŽS,íJå½¢CòN¹œÏÿƒäÓB÷z™L{cåÏ—ÇAßPŠÝßžÓÁä‰û¾&¯ o_îúä?&Ó»Á(÷ð¿ßÈ?òýÆäXîá×àO áïo€æ‰Xab}<öÇ㬴øÿEK=†Ÿÿ€ ?¿còûö¡7&[åÿë ôs׿'Ç{{Táé›/£ÁýÆ*fÄúÛd<ì=õ³„Úú‚]Áâù7º(ƒY_ºûÁýV‡süÌ¥{ÿÛ´?»‘úËKc–…îãUØ*r+ÐO\ýÂÇ]H½Ñ¨TòU ¹Êe¥]iÖºµ6i]5áíßïHë¢KŽà·jxÐá52$—ÝH0‡¹šË‘i’!•ÿ*ý7íøù¿ù\>+ýÿ;+8,¾m Vm Rqò_…ÿ– º¬ués_Ÿ§I´/,@ÚX…} RDï2 >ËË"ÓLBÞüÐiÃ%Æpì›4.åXq$9‰D7Éιí=>’éCàûÁx2%Ó%c²µƒUüú®X\Þ <ˆ²¿þê’“¼½ÿôêq¡íB¥ö–xzúž[ÞÀÎK³‘•' v`錧á¼úüßòâŸL>j>ô‡ì/M€^aÁŽÜJ¶ÂX3àl^pÙúÉ ´ÍWçõ¼> /`[? À~Ô†.¨^Y °åê86a|M _/Ø×ïKú]ðO|zÏû°&ЯìëŒ÷¥aÝÖ5½KXè× öuÆûÒ°n—¢+>½Ê¥Ârà^¨ 4^°‚º° ÑR·̉öÁ¶!Éù*>Ø…•Á·važâ>]l/ èymBíØß D2Ÿýçꃪˆ\Ø Ú*°_ÙósYIóÊ,À¯èkŒõ²­¤BëËÚâ§@ëëúc}…ì*iÑúR¶÷éÐú€¾ÆX_¦MeŽÄë™Ü:‘™Ç7N, è™Uót,*3“öÜwárZD½è~:ÐêTÍ) [‹K„z†Â9í8ãÐu’2ÜË3]-l5.׋Á´xá×È D¡<¢Pxb_•ãY{ðéaÊñÐ}å± Ëváz°páò"/–í ¥†–¡´?]-—–H`eä.`:3Ô2¯Ø²ùº[¨ìbeœWbQö63C­ ìu;LÙëµZÎlÛ0ò”Hï²ÝÛÏ ¯š¦ç·™HÁœ,ÖÁÏq·f›y+]žaK<3À žï-MÛñª 8î²KqJš~"*N´žàY¨x9Ï@ÅËGñ,Â#Õs÷½QdE6€ñ€^•ý_bT/sû—ÕËÜýÍJÕKÙÌLÕkõŠlýf¦ê¥ìüf¥êïKÒ çEîûf¥äµAï’v})0å9žƒÎ o2ú]"ÀJV¼Âð&£à¶Ìa˜.§º}Z/àD¼"ŽKÁ‘ã'È“ïñOE»d,½&'È Ôà®|UOÕÐJ«`•N#]úV<&Ô«bAHŽìU;A¶Döª Ç¢ì•9ÔŒGÙëöŠ˜f§ì•9AŽEÙ+pÀ›¦—}‚‹š×Á+t‚œ6 Ïýx3 ¯Â rÚD¼*(^¡äùPñ7çDÅËx5OçDÅs;A–à]ŸdЫ²ÿKŒê;A¶DõŠ Ç¢êU9ÖŒGÕkõŠlýf¦êU9AŽEÕË?âŒMÏK>AŽEÉkƒÞÕ9AŽË”—}À™:ý.à•V»…ʈUª*,VF¬ÂVôHa¡2b†°ª ‹•s:OX°˜q7'ø×úy&Ì` ^¸TX&øs:JXõ/Þ†‘*ñ¤y°êOïÁü° lñ„tÏA½7X&ôéóþ™ÀËåý ?ÍÀŠPÿJœa.›÷/‘úÓÀ¿|ü!2,i’ ,i+,i²¶aI“µKRA/aµÃ’"€—‘¿jaI1@Dý‚Ã’ÒÝúEzýÇÅZaUOSÆZ bEÏÓÅZaUOSÆ*†%ÍOV,Òë^²b%ưª'ˆ –+1ˆ=C\¬¬X‰1¬ê)â‚eÅBÃ’æ',Ôg©G5ÌKN,üµ Kš§tX*üë–4/™°lð×&,iž’ ]“þ²eÀÌ&ýåÊ€Eƒ¿6aIs“ˆ]Â’%–¹V:,in2`6ø-Ýæ]è×9,)î Öi«z¸Î(Öi +z´Ê Öi«z°Î(V0,i~2b¾ÿó’«0„U=TX¬ŒX…1¬è‘ÂBeÄ* aU+#–4?!1ã>nNð¯ ôë–4O©°Lð×$,i^²`…¨•Ã’æ)fßÒ³~®„tÏA½7X&ôë–´0Þ¿`ð×$,iŽû€U¡þUKZï7€?D„%=ôÇŸã†%‰:vaIX:ˆJR±-x¡ÛGkíØ7ZÙøP0)@Ú¤¹Cjo¤„TÃLçª]t‘¨.^Í·è®ÈÕÑðϵZÅ"×ä…áÙ.ä,&=K†šYÂój­¤çõyax¶ºI׃9¨¤èÈYy£ÖToýµƒUÂoaNœbFhu¼Š˜ÕPîâ0›Ž lrÔ¦Å}cQ¢p«$ÛUÅ­žãηŽ3f;¯E–&°ñxBaà¦B·Ëv9¸] Ã]>´ f3Û”t—ƒÝyê v¬É÷øv,QÇÎŽ…¥g²cùM¦ó±bQ ƒóÓŠ5œv6,%œzÑ€µ3UØki]Y€×ÜÖ«ÕxàÚ™&bÒ¯Ù0±:¯¸ ¬U«ðÀµ²OxÐÎd¥RCšª%ÅÒÍh©eZ*F”º(Œ¦Àcãš®Ý$.w1÷‹Âi¬­Ñrqªæ¨sÀ©ñ!&3Ï’JTk@ K4:]¨‹ÆéBêÒa]ôô/„¥.«ó‘ýV¦‡þ¸ÀÍL…8îR…xþR…Ù Maç¼¹ºMf¶8¥°µ÷”`ƒú3Gˆ­=el ¶´¬ äÖN>+{«Ô B¾`¬[{ÑÄ¢u³qb!·vZØX°VòcÝÖí¦†]+4OO!-ŽÿÌ@/ ϱ¬]«ç˜Ç¬óÂs¼ÛæùúÅçÚvä1_7­XüzÅ1Ó]+LÛÅpÎI’c:U˜ãqÔ ÃtR6½LL'çÓébzAŒ:]T/ŒS/×IYõRqœWÏ‚ë°i'^L*.æ$$‘¦73Ô)êKy™˜^(¯^ “ì_Ò¢…òê%ázÖ½bZ޹ñLÜJÖ®¹s82™Ÿ‹î|NLæçª_ñšÐÖšs>3Y.àkvJ§%Ë|q`[{pÎùœd¹€¯Ø),ðÅmëö9·³‘¹9ªÚ@œÀ\¿p Ï|*²p Çðš†ÓàÑqôòù¹±ÆãÎÖúÅãxfKÄâqÃW8ÛY¹çÀŒ ‰qœ*È l®‹Çņ̃‹Çq2~œ*ŽÄSEòB8ò2±<ó‰Ç°œŒ'Ï€e+kv\žœHî-ä¶ÕÅœžn±—„ã…òäåü$²X(O^–gÙïEcù›ž;à™ƒfô™¼{‡åEÏãÁpzŸù•üû¶S˜ËJ§S;"ÝÓéÖ:ݹ8&µvû¢½]ûPïvþÏð׬tÒÁzÅc“`s[ø"ÿ>ŒÔ†É{aþvÿ~ó××R^ÕÃF¥³3íO¦ƒá§Û?þÂiqr·iö‘ÏçwK%‚ï{»»ô=_`ßñU,” ÄqŠ¥ÝB©¼WtHÞ)—ó¥|š@è^/“io  üùò8èÊA±û{Ãs6⾯Ékgë Ù"Þ¼Ã7ü¡ûЇÕ=ú4î=‘Á„ôH•|÷PLûјü…«wêäxé÷ãÁtÚ’›ïä¼?˜öH·üm0yä)ì8ÎŽsp°Ï ï¼ù·Áðöñå®/ÎM‘ør¿ÇŸ¾Œwäxoï¿zßž¿gnGÃÉ”Ï![-àLô-²Õ{|~èeù—J–J¤ÚÐÓQú:xr½ù†örŽN{ÆþxP‘׿Îòò”1ŽûÓ—ñy™öí(¶€1 j; i?Øz,èîFÓ[R­aôÕ ƒše°7?ÔøË“— äYB«á™ »& {`¿„À~‰ûÅöä¶÷¨ÄûÝèåæ±oGÎ*šÀIûÁÖ·´kN &-Ø\Ðb@öµ÷œ!–Ó­ËrEÑÞ ‹YÖàuV½š°:‚7ø«÷Ôûk5ñQ_Ûj‘ÿEíÿý?ö¿lò^2y¹O®ÙñÞdÜ|ëPMê '"êî¯áø©(ÖGoÆ×[¸«ÞäåÉr5ª;¡ „:Y¶„ü±_Jý¯°Hýo·¸_‚ú_!ÿªÿ-âÅô?|IJÜQî2Gš½ácÿ{– >J÷Hjÿó2x~êÃ®ŽÆÏ£qo: s¢öå¸?øÖ¿#¿Vÿø•LGéŠàön0ìƒ9¼#Ó‡þ`Lîú·=Vy"ôG|5Gwƒû4ýwsä4GŽFYRÚÉïïìgI社SmW®I»?é÷Æ·9¿.ù“éÝãà&÷ð¿Uz¤N唤˧þÓ— eP£ñîÓozc²5Åús–±®'þ>ôia=TzH×׸þããEó²Qû…x3w=?7þ–uYä7m7ý©+~¿{¾£‹âu‡ Xpý3Û8¢_û×'Q¸]iu./:5BGK½?õ§°±&ý?¦ßŸûŠÒT(QÑEñ…†Úiûâýͳ¶ë0 Ý‘w€²·Âý±Bà{FææSïñqt›6Ÿ6· âÖdðWtŸQ jÓ5‚À~$Cïòÿ"ƒÿØz‚¿oßzöôO|úçl á/>ý¿²vPù/„qkðöOòß¹q¿÷ø®÷_LÎæÖŸoüÇi« žzŸBUðG¯Êßì×,(¥‘€¹sÊ BHbH^\+ªd±£,ù–åDñælˆ‘Àwöãwã~?S¡ß°G×Á50CÕ‹!PžÝŠ¥P¥`Â?FÌ.œJ=Ó‘¯Ó«ÖQí¸Þª¥Ü¡OyütkyÓ÷Ïâà ¬Æ_ šA¹ÖMmÖ¾¡$©êIv0¾fÿ¼=.‰1|~y ˆ~[ÜájµoÁ2ÀI ¼=(¸ƒ’9|~áÜÁ[ÿ8úw†í·b!ï2ð`Sþ•3|ÉO2nc¹Í·ØE\άlÏcøÛÅ*ÇÁ9Æ€>X% æ¿}3„ð9|–(A8ñæj°MgK;W‚n 3¦¬7†Ó ÛWÙ4c‚%ÞD ʘ(S ‰‰7¿ã0°`2£”¸ÑJ =t³Ž/,!n”"…ÎüÒ¡?~ñI‡ <0p}`ÕÑü<Š[ËÚ^„ÙŒ¡æ%UÑÞ)xê–Ç‚Ÿ±ßÔÍ‚ŠPApí4OáÄ$–_j ©jˆïB˜ø—© w!Ö3›Âè"ËcÈâ T«b«Òz×bpQ߮͢¦kq ‹šØ¯j²Ö{. …×¥m½´oç²´oÓZÚjcÍËó#lf,Ö÷«E©nᛨÁ ,·iI6W— ‚vweø…mÈþ‰o›ÉùE”™TêDzÀpõø…ŸY<¨ 4ˆÌ%ÙfT¶™< ›…ÈJÏ~Zb,š×´î_¨j³K`¡2# _W3X´-DKn:}ÁE«Y²lÁ2£ŠdV™ïÚÅ"Xüï$‡>òÏ߯ÔMZwbîqŽÔ.³iºÍRÒî°Ì›MJçÞR6×ÜÃøŒy·w[F##㌌@¿ß.25ð–˜ÁA¶5°—°m†éLD$Ø8™ÑRdlÂB?b¯¿ßè¾ IÆWZ %ï 69I]Ñn`Þ@¼OJ¤#̯ÿ‰KËÜz¦0œ™†G„ý,‚ù2Œ¦ædV4+dli ´¨ˆ@æxz/q»°ç-ý ô¶G|ß…þ‰'ú]Þ¬’ü¶ì‡Ô{) «Ø¥R˜¹;¿j𜆯|þ¬ÓÜÃÇ«sÑÜaÈ•KY}ÿ<“¿à9R3ÿÒ0UÓmV†>ñ¯Úfx† äv iÙU.£Ú#™ ¶ùÖ¡:ÅæNasË…þÀ™$Ó1@rdÉç€øP±Z®ŒüÉù7TŠ`´òfèù¿ÈçH¶*o…x…H&*@ãcPíÞ|ƒHã4 áÒ8Âþá¯eýÁ$ÿãÎÏŸÒø×tzè0Hb¾ùñqÔÄ„K&Ÿ™õDï¶/U‡ýadâ+J‹.Å/'Ú•?[íÊŸ-á6îΟ#wç±»ñÉâ©q›îw•`ßî½OCÞA`?›YÍ•Ð6&qÜfSýZ‡TY‹òaÐ aßïGõÊ Á1Fø®F‚šBÂMûT»i7²Yâç²!-`ªÜ˜[7ø“~ºß/ò_e~Š2’Xæÿæ>éOø'Üp¿JÿŸYú§iÔ_¨° &}is¿>¶|“³P¸ünlˆJˆmð`šAPÈÿIzòßh|·–ÿI­ï†½}ÀvŽþó«Œ×Ëx•õÌ$äeKQÄ1½l¥7ÊóH½ßBoÖ TÖù_R6¼Zå_­òþé ɾ%šãµx^ U^ÉW4G¶ÍÏÝì>Õ›Ý-x]X¨ªMë‘M¤é,»éWaº:Âtòã S YúË:HÒ¥pì”$iš {eD¨¹ÂO,ILe5$é$]I:IA’ŽÓ8ˆNÑNBu§0´w1âBÓ‰Ñç~I3lÌB©ÑråEÓˆûðº^Ÿ–&¤±Õ’Ú|$¯Ò™NRZ§Dníu½òõ ⊈úŒZ³º¨O»(.¿ ¶]¾¿ÄX¸¿—¬µ98¸dU±ZINÇÖÆßÐ:ÓùdÓ6¬[…Óõ8ÚçÚŸ…’ëDÉ‚ªzÏö»ÓYݤ±CDüE„J…ÂY{Ï1¶†)xCßþ4fÔQúCÿ²jŽÐk¶Qì`ƒëšmâU›ÅÕ˜=—‘¢pB yRW‰”é94é2­V¤ŒÃèIOŸ©ÈTj5é2•¦º$ç@Ð+<=v\e ³3§Ø±!ªO«A(ÆšP=MAe°B42嘚úuÈ‚é”A—…»¬¸¹Q⨕Fà ‰­Tš›S)”«¦OÎ÷Œø— !©ˆÜùZ´C@ÏÿŒAƒÑ¥ã~ñÇ 䛨F(L™Äj)ú¡Ý [ Ä–KDé ¿„J®šj˜dfLëo‘Já²gd^ê`!:”T™ Í Ì!£úæ©Öâ[ [ “%D\p^´X‡`:…çLçþ…ìf\îA˜Ÿœúñ,†´‰1Žûâ§KŒ™-1…d‰ˆýÒUìò¦$‰´5Û]V—1mIgg÷‹UòÓ•]ìÖsÛ´õ[ïý‚O˜0õÇ[ùI¥6[¹ZDr»MÆ»1‹Òkðþ§â"ï*Á«ºÿÉÙ}½ÿi¯•¿ÿÉ)Ïñþ§»¨‡}â[å¿ä¿s)ñ‹ãýî.O û¶¸IêI{ÕLÏÿõFw³”1²Ú.ê­¿qKݘrë¸u¸%&é LéþWReÄ!þ©šqzÜÇáÆìaÓWÏô\7e¹Qî&–¥—5íšKøuã›aƒëgWm )Þ¡”¢¡´ãAIúY°;À>ßþéªn1™ùv ‹ÔukpB®¡‹aýÛd•~ŠF’ÏÙ#²`és–ž–¥‰Ý˜IìЯ™Âë0Ê·‹†Ø°G£O~H‘qè¢ï†#ãF‹>©EŸ¿F\"; ­Ä4!†¨ŸÈT¦7»¥¸8,‰MÕO>U«Û ª¶¾¼A²0³.?:ªtdU·|d·j\HÅ)"üÅ•—!x×Í=©ïVrMVºY(˜6lx$‡\’ñŒ¡T¤‘ÔFÄ»®þ¥/NqÀU1~k&ývèûVM¶5’0¸'ÊŒi7ãXÕ7Y®¯0\ßf¹ö¡ÜC±þƒ‘[sèûoÿ¥*½o<}¼WÙ`<•Ïíqeï—䊞±ÿy©z:¯êNý¨F&ƒ»¾EZ\|ŠEÙSŠèâ÷XêVp%o£?Uhv¥Åì¢9OÝn´ú†+.ìpCµ‹åxDS¤Ü5ä¬Y(šBZ[„.B8ít¶Ùnaò#$¦Gq¶ÞÿZh*ÍkÝSßû+ô •LzU9t(öTŠÝX W¸k‘lÖ5‚ºFêÊJÆäû¬JÆŒjÆ,¥^U˜ö‘EhZqè—„’ûYõ ÑØâÊt‘>ƒDW h¿lþ×*Ô’Õ/Tÿåã´”÷­¦Õ EIÊBnPÂÎb ëx¾ÐïÞðÙ6øXyÁœßYÉöR9QK9ŒB|ÉÒ‰Ë%D³Z¡Xe+Ñ™l/ÂhtÎBÄìWé8dF™2wý…o™ã ©aÌã,ì Ü1—3ű ëåV@؉³[Ïé5¹t®Î1ñ®P QŒD£+ ÂxŒÚBOºýd½øÄÞäûŒbo†]_¢»Ò^Å!cgs‡s8|†¯Âpe…!å}6Â0 _E¡‰¢0´U‹'i?Q‚0N|ÛgƒdÞǬ¾¢±Ï „ ÈÃ(!yøƒ J•7O {II,DÚî†Ö¶;ÇXAYL–ï¦ø*·ˆÉ&Ñ‚~- êø•ù‚Ù·—L«pãžÀ‰Ž«±Tea‚*ÎB³<‹õÖ¬öéR[R:åuÑS‹aÙ3\³ó–ÝÑ.öW—‰ìÉž›‚B3ã‰nº¯úͪè7©ª7¯ÚÍò´›xÑV^îó ¥xÕké5vÙ°WJ«aÒjV­æU© YfÒQjìNëSÐiŒ*Ítlë¢f™)\ ŽŒ¯èøZÒ&—Ô¥¦bÔRšˆäV¦Ë¾<Æœeü'ös[¶©.sÉ•¹Õ9IÛ÷n×»E;Ó{—8<e°ìråO ï÷¿B>îw‹KÏw<<àwœüK_œâ €{œäj§ ÄƒCòdSK[ÚÆ5.QÛr&©IÚ4eí«´]giK¢Å­Á þ*p_î'p'¯—Ì*p' ¸“´î²S4ÍõEh ÿ×ÔÉ™2]Å~™óí•‹N)”ÿ«à¼æÿZÄ‹-÷ËöÅI»Ò$‘ºÎ›-úk¨‚üÆëçwO*Ǥö­÷ôüØ'—¼ûnÿÛ”—ÈåÈe~ëOûã |ãL«ÞêÖNjmßµ.®ºüÙeFWëzO3øðÝî¦×dç¶÷؃öCR==øú–ëÕJ§–…ß«èßë,i^Á/-^£qqe^ËJ§ãõÕAgnn?GW‡LF­ZïÔ/Z¤s\©êaðÚ«}T ’;/7nÊ5·ÙÚxK‚¦zZ«ž;Yö^È’ÓZå¨Ööšãã¿þYjf¾ @y¯øfu‡££¥ÂˆàG½iÀJšö1‘œ<ôJ·Bä}ç ··[Þ-”¶K;òhû·/Ó:YuM½o×»56ÁÙ|mºè%…<à ¼“÷d#ü\¯z¿Áô5\Ìðαèp0ôA»¡Có³Üá°ébé‘aÿ+¹…åóªwaI|é=¾ô BFËy ÀbaÕ¿Iïù¹Û´]G^ wýioðKŒÀD?¿L³(‚{ÃïYÚv7 ½)¹‘á¶UÃ/£Ç/RÐÈígwtÞ¨qL€ˆ\·}UËIág„Ù÷ãuèG ¨ä1ÉPçµ\y“tOk-ï©@6#Ç Î¼¤×05^'µÜ®±'Ô@ëê{ãßÞ¦N?x8ðÍíìÓv€jª­n½uUã•:Ý‹K@%/r|ÑnVº$³ xÖ7Îë(ûü«ÝŸ¼\^›Š–¼¢Õ sѲW´ó¾ri*ºë=úØj7 ¦²{RÙJçªi*»/$a*z ¡+ª,†(\ÿXiV>lÌ"RE7[k±]ë^µ[!ΰ³Á˜ÁèžL^n„F5|yºé7²õböàC¶â”+ÚÇÜâ­kãÂÖ®lXnŸ^‚ÔÔP˜Š’ˆ'$` vj”¤¡òWµ¢‡¿¢‡/KÎ`†kÀZ—N$3 ÷ŠB´œÙÏ–³…Í,ûºë}ýÝûøˆžbߒЫùþ²Ú•™¥Ÿº™ò¦~4ØG–¥K^aÅø°DK„Éã—á-MÑkš{¶–qÏŸ%Œàg}tÙIEBŠ©¦Å¬\5©ÒK(7aÍ1nÁ>Wyøæ ÚðÝë¥>œ‚²0ܪPcl×[P)ݹª|ˆ/~æ*€Âa•|®x”Ï’L>W‚÷í|nï8¢¶V&ZÏžeÍlý“=ƒ7Ђ-пªÍ8¹<+à_ Gã×Å6Â0±Mß‹ôq)ª_1ó×97Â&`›Žƒ ¨Ì¾;ì{™6³ÕŒ¯˜ùkÒF,I@ßõÊ…¸G[Ý‚ÅWÌüÕºF,È8é˜f¨¤ªU`_)%f‹QøŠ¿Z,ãBÒeL©1[Ýš¾.¶‘à2¦D—Ý‹jÅWÌüuΩœÒ\öÀŸ¼ª£ÈÄE‹ékÒF,I¥è_üÁU]J†–=?÷ö}ZÆžà‘¼À·ÚżJT²ÁʾÝxÁÄòõ˘éu ñ0a”Az •äü-¢…’×­Ã#¼Ù›±`%L¡ý1µ\h»œðöÖT1À1ìq¦qà’g“Ń¥«–Ìœä‹Þäì±¥¸©èÉa@d9Pœè@òÓ‚O\¡äNå>«j%lËF,™Â§c(˜ÂÚ©ŠáÓ4ŒÔëãóQ., 3Ÿº=~Á;¬-+™Y 2kuÄÇ; IVâÓIŒ8—9Ž­¶Â™Âîª)0üÓù+¬©¢À]LžžP–—×zè y¦ÕËj¾ôK,UK åÃ#0ã-¨kæwKûl Þ²ÿñtð²_;U Lõöš€;,Y,ûŠÀ¾¯¢µEBÍ,fÖËÞZ ðq…]ÂÇbX&¼ÃNÚ³c¤4 dIý³?Z=º »yá_‚ ¼q†G¥<½±'å7r¯-ømÇ÷{£†O [ÍʇL ”ÏïƒÓéOÉ—þítÂç›nÇè(ĺmõq >ÁÚÔ;Ì×wAÃK”EÈüaWr/®x‚!НFºv©Õû©rwGzÌÛ–ú"¢“;éÿÏKïïæý«?I(¬bèK†™†é_ AÀ ÷]^ègÁÌrBMe"œX!¶rÜBû!TH“é-Á²z JZÆÇú·ãcëÇ[ 1ØA ÒÑû0çäç„:lØ€º# wÊ5;K<Œ†ý œ0 Ñh 2U7|ñÐ)¬È°Ÿ׉jðÛŠÑ—¢G¿m¾ûÔ 3”ÇÏÊq~žpŸ6÷ ÿtM+¼º¦%sMË~ìÖš—zP¨Ú9¼·à§þ±F•Òô/Ÿš× ÜÓŽ.ºP¥HÛt2%ñ±Ùcniø¥˜qJ©^í:ùP¾”¥ÕáÛµôm7#>í±O†vö½zÐ }sØû5ïz4ÁTuèpºØ'À| cÁ–~n Ï1tÛžcÝ«ýÐÐ]—·†î¸ü#ºÛJi‰=Ð*‡ ‡zkÕÜЪ•[ß3:—;l7¹ÿcÙkZ Ò7}Y¤,ÊZ.Ñð¯˜%{úJPÅÝÚ–ôÅ€Zq|ü¼`ßµGJŽ0M°‚®9¶ µd¬ÈRÞ!á‘ä²àú`ì1‹ ¡çG]»n]fŸ(GÙ#ö|õܱ”=«,?~ˆ ·2¹ÖÌ}zAÃã¾|Ø°Ç¬Ž¥Íà ŒFW Ëš¿Î½zÊšDÉY`µü°æÒÉ•Ëõ¬FÐÙ˜þö\j.rª›ew´Å"ûi¿1Á»¡ f]ºÙó§Õ=>½Ð_ž«#qד¯Ol©Ä´û½Èé*°šº¥i9¤ÖeÁÛ»Z:^G&ÏDÚ^DCNîaà«Ã,yÐl§Û1ËFŒõ=~ÃÅ*Iž¤&•àÖNÅË“µgþ*“ \Õvø•pÓkd/H¸v–ÌÅ iß –Ð1,‘~ó«Žè-Â@%dØ‘µj«ûJÖ)nt}ÌlÅÈÚZy÷“u˜:cH¬Ýwv€zU;ÙWê—6áŸ÷µÐ&rEæÈö¼==uâú:9ò?®å&<]rí8”A›ˆåLy^ìhúÇ81ZUšÙÇ>/Ÿã|çþ1T?•R†­=QB‡aÿçAkCØ+¤ÃŤëmÙÆ–Y§=/v4ýc­*M++^­Øk-U1í^ì%UÆp2@]JU‡ôk¤{MçNnÏ 6¤&ñ=í궨èv‰tçûê°Œü7¡ÀWpjF ½°Òh§NìÍŒ_ÁU¨7”òœÇ”Þ¸]ð`"æ"}.ÇðÞÌeÚ÷¤‰5*‡L „ yíý~½)§gášç­P÷„GkB-Œõf‡GÍz+SÈŸ¢“;<¤^Üç­lóCèñµüø:ë9ðrµ÷0¡:€~ßx~/NeÏ÷ˆH1Ÿœ`¤>½æO¯ÅÓ‘ Ž!œ;žë£ùx/Ò-[½ÎR‡mV†”yý¬rØ¿UÖ¢“ŲYôâôqdy Ì“UüeÁ8®«¨q]Í{\{ ÇUÔK¸ökÇ% øâJÅÐ"‚0¯±ú 9Yð~ÇvЗôЋh-ô¢@xVì@G]×|Q 4]6„òò¸ }(//è,êmÛ@§0æ`{q¢P iF¡²òÑ_7>¸ƒñE,’;Û¡1Jr4ØVÔ‹µd(ç^ÁÑ=­w(4¢©Ò®uH¥Ý®\w0ü%³I*0N |¾8FqÒ=¥‰º¼•NÐéÖÚMrxÍÞêÇǵv­U­aÍU£[¿lÔkGøÇ“…¾j¼‰Ví¤Q?©6j9¸jŽ4à_å}碕%g—,qöJä¨VÅûF¶bÇ%¹Á*4Ît¯L£&…Ñ(2»&)ò¡2h0”Äg:øyůÚ9ÒB0=z‰I¬ž`{è6/Æ Ô¯EÜq¡U7·Ü!“’:OFç~d$zÑ6§Ÿæ ì"!ƒÊ$£˜úææ[ú Ès«s´™õ=ÚDA—8§P¦O.`a$ò5=Ê×),m XIë¢Kª ¼öÚ`ú®ÄAHIÊå»$†õWÈGôv “Ö%Ç•zƒ4kNå¤F9»™E’UâÊã ˆéwí=FŠgû’Î|¶ç^V¡œózÖŇo“"pÄ’fXÒIŠð…8v/ÄÑFðRœ Boe‚½ݿЭ ½](ÎðÚlµ6ˆ˜mÓ‹–H ÜE ÃtÏž ²êE ¯ó(ÖËøV8*îæöáÍ)€Ú)H ‡‰Q'Žå ˜­_†Â¿J‹r»6ôP“¡ê®T” Øêר„âý¸¼~»öŸWõv­YúîžÂÈ߃nF*äèª ÛÃJûäŠ>b=PéœÅVxíJµ{œÐ-ּɴAknrÑö>×´/YÎ*Äì>³»²”-ÎpÏå<›‚$‚/•­Í  Fdk@ !Ùlñ9ªˆeË(ãN0H9‚—‹¾ß™G$ø;à ûÝKëÚ­Vû3k’! _µª]Wvf:•lç0†‰•Œ:$ƒ`Ž¥´ÔQM쾿 ­«æa­ÝÉ)49¦È׉SNJbþÞËÑ9´¶î°Ñ½Ã4ÛXψY99ƒ—ÇÎÍÙÃ6"Ö::kÄ’»Ìaqή¯†ÒT%}µ*é«U…¾ªÑ!Mš¢å’g«®w•fö‘ÂPÈ'_þYRÕ›•“,9PãØ5)Óø·êÛ”‰`«™*É›Šb´B ,Å0ãoŒËnU1©1E)†qcBÛT““Ké›$îXØ2íYñèU,R[Ñfh*+{¾XÐ Â{kÁ:ÄU€ß+ízPšþNuY *¼Ð *z÷:粂T÷Ãé%‹Qe|A̲­XÒEOG Éî±æ™…І3F“Ö{1ºc„Ä7a”v°rÝ=º%õ2·í—rï¥Ýxiw]õ#zy,Žf›“ºFÔ^L"Â,Î2YÝ–ñ¥ÞmE@Ù¾ô»­¨íU¡^ÜÍÖ‚¼­Zö]ÚëøRÞÿ^XàýïE§\„C÷¿ï¾Þÿ¾ˆ[=îýïxý{1ï¨ÛßÝ»¾@{¾GÇk®EÕ'À0WÜ_ú ¨¾“™<ŒÆSr‡Ù£îý­p?CIgŒû·£ñÝÝÓn±éûð¨wG^&@«äq0™nß  ð´šÞ……Œ`3ª´ÉÑø©7ÙRq  ²È&l†¤7ŽPÖÝ‘>¿@èQP±! ÈmoˆÝL{ ‡ßáîúý©Û4H‘Ü>€VsK•šûñèIÜã}?z|}ÅÂÅŒ4ùßðgz9m·ë´*—Œ—·@ÃÁ­)þÒ9 b¶®ä¸Î4óm'ÈX¯Zõ.ßÅú*b C]rÕ©á)É5@ ²Ʊ;‘\?9n þßE1Û®½¯ãqН¡Ê1ê]µJõžW/ÚG¹¨fð,ŠàdÏh”´B×Pî7kíöôó¡Þe ¢BÞ-R½h\5[ÛÍÊÙE”K÷÷öÅ{ñcÁýñð¢{Ší8»9v:ß=XN/G Ý6®˜€Ö®tëô¨H¢rþòK«tðÝ€æ¡÷Lz™ð¢%«¶Î½¶ä vmQINõtætàÐ*ÍvAÑn¨pѪJãò´‚ÅÑç=n6iÀ2ÿ”ÏíQ/³ÍPwn=»nk]}/N®HØÃ½ˆj· Õ&üõ©ÿôéŽ."Ô+@ó¤3š>®5FMŒØD•›ØUâ÷ò —çØU¦ãøUâ6MØ$>`ñ«<Ç®ò©?¾_å%6ÁŒIìÙ_¥?.ÄïŲŠ8Wï÷“·~G£á'ô¶"g9ø—%G/¤:ý%¾žöžžzãG…htí ïà§á¶³m,óÉ yÚÞ¼%“>½Ký”ˆ1ÈbrØCÛVD)n÷?õo ËŽ{Óú$'ÃÔíß> ¸Ó&ÍþÓ¸ëÓMr¤ä̸ÿe0ÅM о“…‚=Ý . n'^Cø·ß xÀvnýámÆ<` @µÊøÓh8„ßZ=4ºA—Þ ô8¿g½†ö@aìŒ^¦¤Ú›LHô¤—~VT‡à#àh4˜Ý|©x!îÑÅXþÖªœÈƒl÷ŸMØswö÷(ÐÝv >|­—§þ˜–¤ÆƒéÓ4¾“ñèå¤óô.—åÍVûCÀ8`îâþ~p ‘Byñ?¼y‡­é¨w—%ß@¹º“àûP {¸¿½:Ïr´©zæÖ‡·ØŸãäA½):“©×Pg:î÷A˃¹Lû˜;‹4öuˆ*Ôþ‚IÈ*;åm§°[„_¯:Š3ª7m“÷Ð%PyvòÛ•—O ¤n;û{‚æÚÔÞ=fIÀô|9eÏúãþÓwAÜYŠ*)†¨\|[iÕñây“Å ”W(DÞ‘2+Ÿv‰>E.iu®;ÆÖ°Úp÷Üf ÿÇZû&¹U35……0q9ËŠžeod3KX¥ÿE/h’Vq|U\P*mKû´Ò8†·˜Ú(­E¿ÒëTÛ¬{‡õjÀb³ÂÌ6x×» ™ðg¤, ã½$Ɔë¬éVýˆ½Ÿ²÷Jƒ½Ö¢º¬óN÷D+ðù@´D×£ŸuÃ+Ó®ð3‰>QÌKí²“Eõ=ËUlý ë€h|4Ëu‹ö˜öߥC¨7ñ­U¥c0ßmW`W¹öÖKØÚ Ûn¥ÅP„Aû¸¹ÉRIF}é3h§Z3tÔíté¸pÛSk#Õà»X,§0+U(°åîÌðؼ~,4ðîñº¡çEéùeÈBÛ›T2œN)AÀ²«ð_¶øúÂd1`ÅŽ¿"êÔA ¦v>dÜÎ?ˆ6øB0Vû,K®Ý–ôÕ®C]ë®M½ëP½Rà£ÕSø‰hÛ@Ú@½±7)p—(|=?̈…¹©'Y–ÏŸsæÍBêx%#~ Ÿ5øIòZSóQpذË«›¥eÂÝ[ú-ÕÓs'Kß ì­ÈÞJì­ÌÞv sZýX€°¨?6¯ž&ΤÎS©ã*òåSu(?_t™|®êñzqn˜ZÀPS:͵>ñ!;ØyuG!+u¥¨@ûªî»Œ‘çSâΆ¼çÞÈ’ yC½a˜½ y½‘Ý·ÈÒ×gËfè¦×«7õ73µofâoÆÿõyb× ÝPzõèfQn­póL nÈ[;7øÕ&pH²QÒÀŸBUm£Átª*UÙ_†ƒ)¾<ÝŒh ž¼àÞí;5  n¡…r£gP«©QWrÿ©e˜>{ÜD-u‹l2Á¦Ž?¦"ƒ¹“ZN\¯R9ÅžmÒàK—µV†«Ô`ùŽÉQ"QOxo˜ÔlûØCûmoJ˜UyBÆý¯Pûa{ÊíГaïyò0šòêÁGÕÂ}üý"ð-Wiå(ÈÑ}O¦£çgÚ5¥¿“ûÞàñelì”é0ú¦q,·0ýòõ¡?EbëÇ0•ýoèmäÓ¶ñ4¨gê‰iAqz¾n?õþ„Þ¨Õÿ±÷Ǻ?®Øú›>Œû0wäKïñ¥/&‰^Ú3ÆšiL/õÑ:’ïíCÿö3mæYlÓXë÷ Û—L ¿³§ÐyËDϨ H„K¿£ÞÉ]´sìÛI7'´?AÓcä ]¢Iz2º¹ÑÚš¾Wšó~j×ÖáªJj)̇‡B$)@°Mö=òºÃѸõqDy‘÷ ŽJ1&í°` ÛòG1$ߨÙ'0%ç¦)9?”'äüЛøŒ`síÌv2Î7ÄVk¦É@ÅÐ7ç‡òDx~VÞøxM: Öø.“M{\ ¸¦'¸†è ‚ïðÜ·ZUi!ÀºT4£G½ïô‚®‰ÖÌkÕuÿ’hUå‰(æå'Á+³˜†2¸¿²U!v–óT"›>£ƒÅl ³Å` ³û@y–à«7Køg‰ïmg‰ö¹!,3ÍÝ¥úf9›¡aÒ Ã(ÝÝ7%|õF‰_p”|¯k;Jìr#`®I6HºÕö ~ÁAºR m»B>Áh]™5ñ´/5 E²i|^° êªNSWËG/:eÅý=˜*î›pìu>clÉìTÝo>¨ˆZ4¦ÆÙ«ÈX4ÁLU!w·ªúgô»cj—ËíŽÌkŒ¹ë¹ªº¶-˜ƒ^@&D̯קcݧªiÇÔt>Øt,¨Íó·%c>´—¢Ët"ÙBP`¢âŒªòàöÁUyE¡?ßï ]Š'Ûù‰kBB¹ã›ä€ga9æB@¬¥,å;¼­u“™+…Õë{W×7®V Ñ.ŒTuϪÏw¹Œ9îQÞŽDV/£]/%ž÷òþ!û€e•ó sû¡®X, ;Xñ¦Ÿò¡]›dpðÔ»}€é#ϰY£§}RÌ zçF„º@œPSY†9¼eærú™* ìœBBHÏßž_lÕÜm4+u€…ð~Ø@¯¬þ¥ýÙ 0F,âŽÞÝûƒÞÍàq0ýŽtLíkܧªÿ­wË\º$³ÏíAŠ…, XTée®gœh<‚ÁÒVçëT2ô6L:©x¿;”Ú†¯o±$[“X•¨rn@aVïÌûíZü†è~Ãn— ÔDhŠ*0¯ÝÊg[’aPÍ-øm§Pù~ÛÂÏÛøž±§°bv}Mp';¡hf¸„ÍíËã4G.†t‡Î|Ȫå×]VšWfSƒ¥É-£Tê”FË ^F"^Ø2 R§é°L†=e0ôh/K*Y~‚ðÞ~ÈÚ®éçk •••êž{G?üD…Ñã«îüS÷wh*ÎÐZØd+,u±ªbÐ^®U ÃBÛ‡] ;ßAs…[EJàãà.Žºñq´-#iûÇÄ’(‚–>°@I’¡9|ú2údR—2ï)…C”öäP°sÚÒ&Ú–„RÏ׃ &d8R[–L˜(raÒq»‘6TÁÜÌ<ÎÌŠ¾*2L IžÍ5^*uJ‰ðøAF&ØO`ÇnJ0zJ 2ŽAý‚0â¹û~½8W¨I„®ªÙ‰M¼À:Ëÿ”ývùÅz!ÞSùÏžûg_WmŸ°?°{Fä…Çÿñ¤ÖüÐ'êbOé׎€_¸M˜:̔Ġê(&‹\…¦Š/u¦¥‹eÎLÓGÍìtZ°vÎÄÏñøP–ãm+ì$§ã9:;L—|(«àÿ޾äqø¿CÙåEÁâH^MiòOˆ¾ö+ú”ès¢Ñ'bžò ò=eä‹§¤§ŒtKœ’O/é×2¥ärÞ› !»Qø)'"@Çáy0QñOŽ;' wFî¶)¥î w ïáGF¿ûY_ (ÜÁŸhaüèä9Áw/éW‡ün‚/.xÒÜiÐOšfz¸ÿèzÕýòË/éñƒÙó¤ÖÀ0B?^ÁÇ"%®½8ÄUZ 2]F0:•«\ÛŒ‡ñtèî§@Õl$yŠn‡Ž'ãññûqˆ²ü3`:¢ü)P53QpcæQ%~Ý£dy‡,w\§C–?ªÔd‰&& D[:õ¯b^Póâ’oAuR^p,ϼ+Þ\‰$¶cuŽ®¢eÔ#+„CHqÞàÝ xâ9‹1jĵ6¢½oë ;—cI24¡ƒbK],ÜØZæýÉNÕ0 kƆՎ6 Ivçµ¢¬„¹?üUòºŽµµˆ”#ƒýÐÜ5–ÚƒåúÂh^t2ÆJš÷Õ òŶµNÀçrSÞTзêx?WàìË£©5 öï5Ò¬TO1UŽçBÀUÎi²YØrAÎ%,˃œÃ›Ùó`UºnÐU³ÁábpÀ" œ´kŒ÷ÂJK@µºtWÔknù[ÌË-†Z(«Z8´ _’ W;0iÝšW‹:æ@_4¡4 êØ7ˆ¢‡(6q,ao0Ú¡Qû½ÖÀô Ì#ÙÙÙ`) ƒxºh4.Þ×['Rè Gûû:pÔÃÿM K^„Å>úyôuPßå¥PéóPé=·t>TÚç¿ä/½*Mýhä¶}#ÝËldtK9'‹^ÁîÇMÚðoÌ~Lý‚ £ë ™½á=™$šÈeéò¥¾¸l°@À€XzGL‡˲%²Pz…ù™¬TQ­Ú‘øOÙþ®G1«C‘Ó‚fQØÈÒ44Õ&-8iA¯GH/RóA’1wWöºc™ `ìxnmçÛ d GÐ@rtUå,ò°†4XCÒ vkâ¸þEë¤qc ±öÞW:Tìããz÷Ô;§fáa,š¦‚âü 6p¢èCâfäxÃNfÄïÎ1‹ˆœIŸØ÷–,ôŠTÇó9tDG. Àbê›Áí( å•K«lÓôŒ… D‡²çe5ô‹¿çÙU$GU¦7Ù©Ï1½M«FY™PÛŠ­áëxzû¡ÒXSgž7›^Ò™ ©â‘ ÝITñ4†ûZW^¾ s Ž©Gv0M"Í ü†cöØáñ©çv©Hq— ¬–¹fD˜ktTm›ûCè`ð…Ñ’dè‰LÙB}UOUE¬>*”¤¾eS—Ã5õ¢l;Ä"5ø|iBî’äµÏt£0ͱšºàÌ. »´ˆ ‡f*"0õÕ½ÐÌô"0õ4'è²Þ”c.«RÄ¥EH³jÊPçÂyéP,ÃÇCüHEqÃ@æèãÓªCÁÓѯ4”°5häøª%é_`¸Â™/ M8ΧCWÍѯˆ¯ ’ªêøk7éP]ŽUGW+`¯#„ mmÕ‘´4TÆ,Ôp0¬\(Ü‘mHm•û4T¹«ÃÇfe½¢c1fÕí`öðW·©8a­@);­nÕ*®’UAÒNBrð9`þ}á^ƲÌÍï稺úøñðKÌH'ÄŒç®aý¼‰è’Eï+‘¸{CgDߺòEOIuo ÂçUÉ!ˆ®× 7cvèŸJõÉ ,ë-eþÊ£ <7ü‘ÓÚ) +eà²9ê&ÊË >C5 ÀiRò°°)œG[ í# z¯xµ šÚèÑÚˆ(dòh+Ô„½|TèBå‹Þ"ê hAÇ_,l˜§Ñyy"Ö"ØN@ð¢•iÔ| ù—a†ÀØö|n}/Õ DãˆpGÛ°ê¡Âe`’©)J)tñsÈÓ}âzCò4ž^žx,èàv £§Ð`˜À¶ß!Toî· °ÔƒdõššA(à€ l$«4>IÊÄ¿”úXø¨ÂIå—w“kK« ^ qw++¿¹Õjèò±û–å—þít4&Ë’½ÜÅ)Y)/y‰n|êoÈ+‡Ö&K‘xIciƒµ™à ®S5®pÝ@Žç˜(ƒ°LE Ð165lÇ+%`Æ2ny]ÞalÁ@„ÒÀÐÀu ùÚÞ VÐ ^k‰ àV³¡ëž¸\/€R[”ð 16O#/+*vh¼ zA–¦Ò„°³ˆw<>óP²¦ê^*Z¾ÖÒ²û2JœÀ+‚ºq«LCd")Û{¡9Tš!JçMFç1Z‰”\UãáS«Íxíô¾ôIÿKüû@úéߣÖ½Gžò:¢/JÄr::BDnvô|Ø}QÁ¤_ýìuÞèP¥/ªØ-vQ¬B[“Cìu/_f P´"Šfóch4¬³Õïäµå•P‡_h÷aZ^4ÄÒU?¢þàüრpaf€©IŒqÁ( étH©t/_V‡Æuôخݱ¡“­˜ÆØ®ùØ®#×g/7§Ù‰b,3½ÀT>ä[bx ç–„IÊaÆCv¸eÓN•Ûù¸RÏl°TggvQj‰µHJJ1Lš°Én”<=¾”¬Ê4Ò…6¸ ?ˆÈy|1ø½gøøÀ­ÄñZa–l”ALüDŒ"Âx¤{¥C<å¸Ä ±Ë“¨È–r¥­ê¨Ç‹d¤Èä¯Köôä+Õ¼XüùàžESnÓ`诽 ™ö>÷‡d0¼1gÚãw;¶GCJ/Îm‰7*Šáå†D†£%•/n)†,0—Ú0;ùJsÊ¡ûö¡7üDoJ™ îdaáOb›y %†ýüÁM8ÉÐíî¦HžP?K6Ãì—M榪±¸/qnB¦»¯÷áDTe›´&­¥÷ñ*EZ…u5ݱ¥"ë¢ÄÔ±íÃFëóª–YŠ ŒðÆs7ºþ*ââ ì²@{ÃΣÕ7¯æž×5ž-¢ihm Šúû´>UÃè pÑz˜W÷€Ö=d³PÌßÏǘ¯3'ï”zž\£rg3R½(ª§6ºeK[ÄX»9ñ¢ûIp‚ÚÒ®Òj¬:»\à%†êˆé¿Ó%“jò";g$¹Q#7Z/jçF +Ñ¢Þ›uQï'^Ô3.jNÁÉV5§‰E-ë–uáçYÖÅøËÚN©ß3=57IgzI–Œi6»ˆVÐ)pÜ÷'ѽ¸ó«•öåO …‡àÑ(ãíÓô+¨qÖ­–›PD½J17G¡”M4ápivÛƒœVÆvUÎ]Óµ„Yâ$QôHü‰­h®¥ÈŽˆ2%Ï:a¹«t_n!ß*þîëÚu©õ%Š×Ž)‘áåú|Pÿövȉ£†ýž|aº«Þ#°ƒšˆyòµ?×|t+‚)DÂêF‹Y®7kRE¡ fåËh€ì¹ßc÷EÒü¯ƒéiæû¹<£[iÑ¢öë̱]gÑ%°¡²†ÉøïAßU•òÙ ñ?ßÏä5O72Ðÿs 9ýÍ{;?O«ä H8Œ´Ñkƒ¹.Ûe…=àÉ¥Ðu’—õ« Úd²nµ€©OªóIæ‘„Že$áÈŸ( Oµ÷Õf#öŒqz3­o…i iÛmÖjmÿÜÐçë[c:öÁáQ¸c¸u;pèFaÀ!@/¢a‚a‡9UßÅ‚(@B‡›"îlßkZ¡â¹äóøA¹'¢bª§¡¨¢ÖIÆ•]´Ûµj4dL‹‘è†9@wíÆê±Ñ€ñøb¯º,t H¸Þ¼jJ!˜<Ê2Æ´M:WK8®Þ(šwáÆ fè‹Ó CnL_Y%bÐÿæÖÍÐhªl+JV¤Ÿ©wQ&0FE JþìpŸoY^¤º˜œ¬U ×nùM’ XFB]˜7Ô1À.F²?Å#faÖ~¯4êGlî¶‚då‘A0® C¦,ª ?{XKVÅ4a,º 3„ýÑU6áU†ø*½¦?¾*"Àê5Âê5ÂJz½FXÍ%Âjöà*ø'GTù§ kÅFu?Jê²R=Çߤh©ð \]6€íá_S”>=vo‰ÿI‚¤Ny—’<}fºÁR?IœÔUcaR¬ [Ê2Ï)‡R¹iø-C©B¡WVMùƒð~”>ž%òʱ¼’  Æ“ºpSèP%¼<‰pÖ”QKQç„çn¸R0}®âpç\~¤0z%-Š‚—G…â‰Tಲ-°Á.1Dú¢ÑC‘¡Cê¸!=sØhP¨ [,˜ŽÝŽPˆÆ&I_pRÀ´ïä){AŃa6XŽò˜+m”M•·åEØ\=?÷Ç¡Ð]xM¸~côUU_s 7ïÈ+¦aÐÓÐV‚Ð+úÏ&ð*:@Æ":&£<'ŠcІ± †¡Gì‘0¸'‰ ‚ÑDÀØÀ˜ÃŸ¬B_ìâ^BA/-CÈmÀKœh—x¡.ñâ\’¹XG¸DÓ*¿Æþx2Úâ#ZÛFìÂZ¬cZf h±v³§».€IÚ]© P9§Q'¦–'1ÃMbÅšØøØE™Ä 1‰_b¤md‰]XI¼˜’X%–ƒ± %‰GË]6V€: / ÊT}àˆ;_~{G~îÅê‹ ‚¡?^þ(Ï?FÑ>ûI\8S˜ø˜qCÞÄŸÏ4´tâ8T‡;¤ŽŠ9Å^v1&å+úˆb +Å.Óã v`3‰iÅäÄ ÈÑÞVQ/–ƒ"·hŒ^S6XŠcèÊ7#L²MßT«`ÿOšËÁ51œä}Îü±=äýA3ÖîñþP™˜¾ñ>Ÿþ8~ñ>_~{Ÿø¸ñaÇÿÎðvžðÁ.Â^ðq£W‰ dwv+_öP ƒ#{"ÍF^ç,À$ýÇ_7 ÃÅÆŠÍ(–8J,aˆXÂu“08,ÖÂ1……Å_9ó_:1C»)}+ ?’ônR‚Þ[ AïÏŸž÷×IÄ$f µÎ2”)*pÁ*ŽÉ:ˆ)vSâ𥄱KÉ—¬l\±C–渰3F˜Rü%9@i£åˉð$7â(žeHŠMò…Å´!¨o>шeTRò¤â‘ì‚‘bE"Ùat ’!‰EÙ-sDQd°PT¤1LH#¤ RE…Bƒ~”0 b>Ùdj~¦0 ¢? Gop÷x:!@%MßçÉ#Ô ¬U’Ò¤1j)à(´õpôS±ØÿýZ>¥ˆK£#¢v ášì£„KŒ/’#‹‚PG·`/šµö°F6€!%¸……ECU?žžÛ\2U\H4T ¾I׊6ì‰mÐÝë@UaMüöx~s<¿5žßOoˆ÷.‡ÿéb²@FB Ì7àÉP@ _k!¥ŽŠ’Œë¥A(Ê‹}R„8éÛùЕê…â ÈÇ@ÒŠÅ(¥ŒT=Â?]üs¥‹Gú€º¬“ºð£GÕ+À›ñoÇMDÉRúecDey ••C‘(âN¯Beä;º?~¼’—„ÑJ]½„¾£ÔO•ÊCZ°Í+‚À| p² pºbN”»üV(úå´Ò}¯fèÓ¾Ý~=Édø´ÞÓDZŸèÊóòLþêGÂQ•:º`<@r‡myL^…¦PEü&"¯þkhÕkh•oœ¯¡U,´ê*2¶ê ƒ«®~£e×&¼*ð;Lªv‰ÍPãQíþFË¿^ åk&2<¨z¤!(^ G†ä#DòÑo´†ör ’Á ÑhÆað¦=D¯†ƒ©j$â$]jŠr54e<‹¶ ìc/‹  È ?¦ê‹(?½r)^n`¬X?öâ±Y‘Wb«¸?ö²Œþc¯xWc«Û±Hšñ€ìeŒ ´jÁ|±d/«(B^4y,!{E¹ÛE„Y„ƒY†ÄX|Y^pD7ÔPJÚ+hŽ‹ó‡Y™ƒÁ”‘`Ê «H‡ ‹0̔ѿJý¥ ˜ŠÌ÷¥§—˜qR¸z87-ý†®gTæ<}Ùˆ^Lë_%’/V]ñàûZ€<·8 ´kã(8¤P+ÞÈ^-Å=;?PœTxÏWe!PÉçð<ù4®C8Sx—>}ŠP¦Ù– å´™ÝCÝY ³åÉ+[ž;[î¼²åµgËiÎá+[VC;G¶œöL‡-GyÞ§s+à4Q´©½oy´Oo$NÒˆ0M^ê‹-µ½ÉÍó#¬otŸU¥dÃj/—gˆ¦KJ+’'yÐE O°qevƒPØ$#ÎLLÀƒ1Nà`l¹î¿,:r3A—`öæM0ûó$sÄ×jÌA‚±Õ fá &3A¨ºýdî®ÁdÆ ·’iÄëEëÙEêÅ ÓK£—$@/At^´A5^\Þ\§HØì–f”w¾a|\;g'o­Úla|Ì=ø£p7ü7ŒWüBÂø¬ 5‘˜==¿Lû¤÷ü<}4€Í`À¥™ïé‘£¾§5 ȬR™–eïW©^w„H?¡ñ•ð§ÞZÏ ËˆXH£ÃXöL}·ñb½àÃTnN+؆÷y¿cœ º±3·s÷w,ØT…±à6±ÞÄñ:a LS˜…йh>`mòj4µë- F—7ã™~Ãiòð< × ´:ñs[‘.ÎÍÔ–»hçššð6[iˆL‰ H Å¡4#®í2„Ÿ£N"‚MšR°‰/ÄdE±iuóÎNÓ6$Ý4G‹¨ƒDš‘A"†p½ƒÈ«¦êÑ÷L)/™Ú ‹q]ST€Vz—3ïdšý"±k› Tøäy½Në"±}› ½T·y…'Ì7Æ.bçÍfÓw^Ädév¾.5ó©Ú"H*¥©f'$'<( ‘L‘w[YßjeyŸ•ù`+*~Éöö*Ë{«ÌÀøb–tàXÝ?eyóT$8‘·MʼnŸ²ñÚµð¹fKJÛ‚ï¶íÒÎ.BDUU³Ë€¯ë.wÚÅÍ…ç´Û”SMº·¼D³ â]äãz]ëN.EóßÕ‚ÇgtšÝÙØÞÓX{¦­aé*Ãõß’æ¬ÔŒK×býøe·â&uãÒ0û›jb^¾- Ú›Îçégá€öØ‹N»tr‹JMoéBöµpR4/ó÷O¾N;1.¦Š^‘&HBn´jêáòåÚgá×é/G_lw¾¸¾|2=†_Ê|É Uý ã¹=¾< ÉÍwþ)*XW>îyòÑN‚>1 ¯s_žwX¤¯U¤£U”ï/O¦0ÜQÔY !¬ö¦±ÐìÿÄ2¼œYxÖñ¹¶œ"Į̀Öf ‡Y]¨åÖpŸuæ!GÛj´Ï³«SF°P·wfΈŒý‡îƒhR;W®>ŠÓ²÷ø« 'Üu/ƒíœáæÕ‰{›âwS¤rÛ6rÛ³ó*]3O»"å’{ñ=°~x'¼&uÂ#£q¤;^„/žÌÇ÷MŽv:G;îQúÙQ8Ú~G»r^ç¾¥u;c5JÉ\ólÜà|{²à^™v^~õ6û‘½ÍxÖ2\§F*í&¬« ÁWÍÅmÑÒ,|¥€ñL²ï¹tÉwÈÙûUNe qõ*Y¸z•2W¯Sø…zz^¶_½^½^½^½ÖÄÑ+†P¯ï-œÂ᫪Iøÿàù÷çÒ#Rvô:S&þ78qE$ûǤ —îZQäü·Ïëz,§òu«¢tDŸ1)»Œê>\ÇŽ€~×Ów,f‚~–Ÿö4üýŧØw“àÛåÌßõï"SqyË[g´¿'t %c7»i|ÆlÆÌÞbŠTëÆü‘IÖCÆD¥ÛYØÔ-Ò§«¼½Ôé˜â3\‡£“¦[eLW™B­R¥+¬T˜/µö©ÛOÙ¥Nq&°—ºVB—:;, ¬èÌIó.\êö"®ÚÂå ePÆs¼RšjݲTˆe<ÍŠî9T5B®›¬ R›ào[¡J1œ»â8ç)S§Sú™Ó­»\¯®Vl¯®(—®4ü¹"r]ký¹ÚÌ›«­;µ·óå²qä2Ç.\Vþ[6Î["SM[;IÙ$=KîE¥¹ßWÛJÛg6:QExEºq¿!Пƒ¹ [Þ^Ì.‹>ƒÖä.#_TZ.kñ=<_ðlc׉‹h\Q ÎH3úbY:bÅLöhãê`Ê•aïlIc¥s´t½òù]µ¹ã•–‡Çv»Šåseã ?_Ÿöì_™4M)ã[ÂÊb9™ÎaÀ%d .dŸuÐ@ˆiyQE¹Pù|” þSq§âyN™Ý¦ŠöÂÝÖa*5b€!ÚI*¹‡T÷(¿‹œ¶Q:F%H?gôå±Ïä%Óò;C)½µ"€2¸Ñ ”úóߊô ¹}¤.O®…l{éJuv¦‡½XâL["Ê»êÌ\]tàz]é;2;\…ü¡Ð~.9De’Möó{;_$'ëzD¡å>KncÎ\¨„ É"½ŸbdH¶' Âi÷ ír‰m-<ì|mç™XÝô`$9¯/ÝFô‘À,_2KG2Ÿ–!šbdZÿ1µó(èF‡1f½B bõ•— IÞíis³í+]ƸYÈ»ìGq¹ÙœØ.cËËæ&mÔƒƒD¹Ó4hÕFù5IÚ«ÛÚ,nk"™È?&¹­Qÿ4ß…Ú¥Ê/-…Lþo‹KuVýxz^¶ðÛýùüß ®\áÕîÕîÕîÕNá—%Aʯ…;Ä‘xÉÏäW¬DhJ¿8ÛDh"šìA§r”ìh†s•ŸÂ‹Î#µÄ‹®ÀÝè óJ½öê9·Úžsv[GQ|?¼¤gñœ+ýÄžs¥UðœÃ½üOç:Gs¾úέ©ïÍ ùšîGÍGG½©$¤S ¹9º^ŽZ'G2ß$vÞŽ³Ãbö“Ãct#«“ç.Â5’¼f»{Ív§nÏîxÖò*ët<:I¼ôxd©áps§ð=œö;ûBgš’¸~ŸD1% œËùÌÆº¦é3Ý/Öî<Óôéo{ý÷k¶¾µÊÖ§wUµZ¢P óöÙû¬z#gù3õûš0ôZ— €‘,Ï ÏjgŒòšCßÒ,绋N¶ÖbòÆÌîÇ#㌨ ’÷ql+ÓªY$ÛûIa `{NØ6yˆl“”Ñõ;¦Â}Kö>f£2æã{/t QC6íDæ9Œé²éÎL"=š‰Ckd#ÄÚµ™Ø¤“T:8¸‹òGæ¢ìD˜Uå—”RåælÙ óž18:[ƒcéÑÞÝ™Ä%Ãé™X‘I°Å(: V·JÇò·6Súw…u™*‹K¬•NÓÊAšÄIŸìåÏ ŒKÊ¢©zRP§Ý,*|¨K?Žõ^^—Ç2ʇzw™>Ô²1ÈG6A[]ªNÕ:”L}{¯žÕ¯žÕ3zVû3‚ÊÃ㿚2‚Îâq=SÇËuŦnÖQ®ØLAä7Ëû’ûŠ %ÄÙ?¬u+8B­Tb¬†ëxhNS‡±‘æïZº-ŸHûò BŽ_oGOÏ/Sá«LåçOä5ÍŸÆq•ŽtcÎ’v„¶Ï™™Çý‘½k³>‰"ÒŽ>A'B"*ŸÝs'F¦žÏ3¯¥wfnþ.à û_Rҟчy‹;oq·á-áÌ>°_ô£?aÏ]ìr«”¶Ûu:Ùšn0”âwš…ÑœãD3®×†¹UDs͘տ²Þ¬œ(ÒÂêûÏvW"<á|¨j\hÜ1Õ&wès84„µ2¤Êlè ²UU©w¡ó߬ý$SÐ&¥8êÇÕ®_ÕУªxÄ[èèÁжfÃïFÐÂïÂéЯ7åG¢¨¨êÓ…]‹gCaƒ;ÇíK„-NljÛQvKË;á⎡çkuÏèàÔô÷|mêù:Ôóµ¢gÁ£«TæôIÿÛsÿvÚ¿¶âÁV3y™àNŽ'ÀxÌI7¥×¹@íO½—O}ê|"íz’ók—V¥ƒy<È4й+äysâþÞö?8CäžK]‚` æ4"Œy ß3çîÇ·ÔΓEKép¸3ŜډW‰®!·*Tf¿ð&ÂÐaÀŸ·Œ:ð±Ú~º¥Ý=ŒrEás¤®éQ{Áco¹}cޱ˜d6ëÔh—hd¬j¢‘ # •$È™-Y‚õˆ­ë ¤4•ƒCÀÚ.ô¼¹ºJáÏ[Æ)ðʲ!V:ê©Ô倌Q=dzèÃ`²y ùË·¢ý—DIs‡b[È:lpÛuLÄïì¢ÉE‘zNƒåg'´n®UËù&þ/_7ø¶s8vdÀ ó]–•Ýô·¿m¡lΠêÝœ@7¨-ªœÂÙñžlNaX¬3Eÿq4z¦Úþ#¨Æ“,AW)±èû¤7%ý(÷½Ç{Ò»½}A7F!SYûù ÕéØô#›> »yõ±¤ÖÖäMÕžrªèÕÖ¿+˜þÜ5+æ4´‡Š˜a0ô³:ÕÒÜËûM~x eiÖQïs™!¤vÄM¤Þ!Z§Cº§ÀÑO+³ÎÆ1©T«WíJ·æ1¨UDýª}@sm<ÐíKñ €2!j0Ø£w•@¯h8qʹ]n-ŸÑö¤ØŠã6[µjõñU«Š&!æÒBic˜Z Šx+æëVíÂÅ1¸s;…ußë÷”ŠÝK£lÃÖ®ó-YûÌnË¥Ú9Y«û~ݶŸq (‚?Ÿ©|9cc¿Âé<€L‚|ŒU`÷„…³yð Ç`9.Ýõê£U(iK©Ôµ¨{}YEÜ€Ä虳Ÿ~'ýG¦ZÃ;ÃèÉ¢îÿÏ‹KÐØ:‰Êƒ úx%ÙÆÃóÆÏEñôfwï^Hzꆈ"Z9*Ö=]9èN.¾%Z>ÀDk°É¯ieÝq…yÅÍÚ.6UŒïñ˜Ñ(ý¨((°ý[Å~ Wpb•ÏØ|ÅUq=Sû¬œsÍg*eëtO,lwoµòÁ_ˆ´ãq‡ò JY3”ˆƒmœ•+ÎRÈqAužÌóü*N‘}.û´±m4³àÚ°!òŽœB<—Ù.© (+g[ò| Ÿ*2ΫæÿûâžHmOìDVI‰?¸‚à#ÌV†GÉrv/‚&\±ÿÆCz¿OÈso0¦fþ10ÓÑ“ûèe8Nýôø[¸L¦ãÁÍ ŠŠ›þôk¸ôv>W¦&xÿq¹¿‚RÜ&å¶UjØmÂÇæ™Ô?F9¹á^#Êïµ Ík‰kìe©ü­y:‹Z dż;]ˆÐfÛ{ü…»GÄÅ—Þx€Í¸aâðjâbÞ?˜H9c©¼çý‚E¤¯gþ¯u9 œHàÞ"!b£|@ƒéoûHä4x›’{5—3TfnF/Ã;‰°JÕ°ÛÄ[㣨…|±-fw³{øÃA–îŸûãÁèŽ|Àžù¦¼ÄÔ@ ?ì++ʦŠeeÇ묊ƒz™Àˆ¦#r3î÷>“—g¹ÎÍw2ùŸß޼ƒÜ,-ì==ó?¥&ç*Ú0ÊAé$‘˜Ð?÷ìËG&Å}l“]Æ~°;'—Ïò‹Žåß\ ÔœÛTñáÐa‡ËŽêÇ0ج^ n¼L2af$BGb¿çÂÑ£·2ÍŒ˜¥%#OÇit«¼Ã9ªãIUÓw‡‡´znp`<¿RÝÛá–œWàdô† s²i¹±ºÔ{ 7K°¬nÇ_wÓ**iâ ºÈΔ“œ_ÑÉéLGã>;\ç-óê=¦þWÄ÷;–dür;}ãIüÿ¼ Æý;l¥‡óŒp²Ì…íeøµ7ômâ&ý)®} ¦Þ™zÿöé†ïâð/ûüŒ§cú÷fƒ\´á}ýww±2¤¥Ÿ-"Ó™Ê6rqrevÀ´ÑmlÕfÙ{ÊÔô-Eƒ(`Ü%tZðe¶RèU°Îézv·åŒDip¼¿ðÞq•¹íà}~ ¸}™aóî0÷&tk’7òõ‹ãcæAÿÏÏõ˜8©²ï1n´sÝDfÍ^µêÝlx˯Ï5¥ZKŽàæáôPô錚kÀE¡‰IÊü×)Fˆ'<žòÃLÂÒü -ðÌ*!l€Õóñ¼KOmBϧ⹰‚d°%ôÀ ›4À,l¡“Q¸á¶ Dê´¬—І ßÍ0!ŽÈ¹˜ñ„¬ÎÔâ³”Jþ „eÕ¨÷7`°p9tÆ8ýŽ„ÿ~BWìÅ•fcª§ÑÆh}TÏè×ó+Ö¸ŠÅbQ•¼3Z£Ýj¢¼³Ówþý2(¸>fã¾4Nû¾†ô9f㬣µâL !Aò3Ä=xGÛïä³r¿õ#4îP·mÙœ`%úWï'GeZ‘‡ê›\ÃÓáù=uî“MÅõsîð´­yŸßJ¢žW¥ K_——/(N¡©zç³£ó×ÁϺP ©Î®è³Û5ƒÓÃR޼åû$n¶WÅ™é@ÒXú5’ÇÞþO“d™¢—¹뤣MGØ’\í5×r þiÄKÉu¢`1$ƒS‰Ü,ì4ؼ †3¥äPð±ÍÕ¦z&ƒQaüqxiÞ–¦’³bÌP’¥y~ŽÎ 5s²¥nMȱPAÍmò¹­Òæ–W.3»¥L‡–€ç°al!Õð6ãâöÄŽ“Õ Íš/¥±æ•¢Ý×My¶5/Ä๻øË‘]øÍ @ôâºYÀêß³\ýÏÕÿZýhHðÛÜ™:Uk ޵ý(ÚãC¦o A‰Ÿ…ôæ &êˆTóO±ÝPw$Û‚„7Oê6ÂäÄéJF¢£Òµ(#^s‚¦|¿¹ÿ¯/þª6*´ª†Ÿvnÿøëæ±7-æîÓì#ŸÏï–Jß÷vwé{¾À¾çóÅ|©°ç€JU,íJå½¢CòN¹\ÈÿƒäÓB÷z™L{cåÏ—ÇAßPŠÝ›°B“'îûš¼Øš¸„Ù®4ÉG †nÑóÛó´àaêfvS”O`ð$\T} ã¡ñÝxð¥O³.õÈ䵩â~ðØÏÑ ÷ƒ1”tŠdÜ¿ï&ØÅBÔ?pÜïÝñhœÇÁdº}73ŸÂÁðùeÊ«Ñcü@n%P“¶8?õ¦ 1þ·Q ›9R’Þp8BÛíéë¡÷ MDOÅv(·½!gt3퉳·»þcê¶L‡QÄ 144ûO#ôõ{y"­QnŸdÚý/ƒ à:  6{Àn'‡?ÆjL:·êKu4`u²xê9û¾szè`:Ï’ƒ½|^ÈG/ÓRíM&¤Rç¥ïV=“y²›/²À•~ûÛÇ0Â—Þø;:¸þYg½ÛÏ.v¹>R '(¡t­]d¿öµÞ/Oý1Eàxð<hR_rÝÏ毰›ç©§»ÂÂáõ]ÒõMOüi%Ä»Øbê2–”E¨»U*l^ O‰ðc9ï²X7Ó‰&>¾Ú¸rafIõ‰¤]˜°*Ã.Á¼}Àˆ²gP©eySò¸ìPfÌ ‡Ó#Nx¤%µÙ½e6,– ÑJº%‡ç{ ùoËËÝ%g çWbuð/íΈØ}²‰EÜÁK×ß<z7ƒÇÁô;ó‹€m4?íëݲYi{#ü»Š…, X<¨³¥2šã@sé –ÜðêŠW·]d _“J^÷ŒÃð–_5ræì¸Ï‚«îõ!n„B膻‡†Ç„v#ÞæV†ºSQ¶ÎðÒj‚ï·-×Ý žß0WÑ€ZU­Ô¶Yt&C>|“" <êü«ƒÝzá¸CeÛÐàæT`˜žkeèåë”ÙÄe&xóQ3;œK›óCpz..Žã]3<‰É¡—¹QÁ‚á1æêWr6@d—/åÕô ÿ„øj¿â‹áˉÆÏ>tWPä)£H<-Eÿ9øX¤ÄYŽGœ…ŸÙ©çO‚¯‰4éxx.˜ð±L‰s7qŒlJ]+6æJšhã—_xÝÔ(ðGBÊÌ<°}´µ›eþ¸ðq’Ù^<2+ýk:5 üIð5;q$÷³Ü§>PòÜGžåÕG·‡Û ßÏþ`x‰GnoÜØHB¼Kp©“LGs.?^vTŽ…°m\½õ\½ysüX¦ÎY¯n ¬k lzSŸt8ôŒ‡`EÕ) ÷ròïžÁÏ—Rüˆì®] [oØ)§wÛ¥pDÁ^¤ó•cg”vf öÜGè-Å4Ý;1”uð ¼WÃ×­¶Ràê žÉ²CoTUÞ=Z—žø%¡Ué¥ürÑàÅÐI­•¯‘f¥zŠy3=WR„^b 3{XƒœKX•9‡„îKUÅ(ÑK4å¾»AÌyÒ®UºôÂV?¨´½‚“„¯8uC¼Zþór‹¡Ü›N7Æ0DŒ_äêw¤nÔ~¯50F¦Qéàå$;´àqÆ‹Fãâ}½u"9Ùó!¿¯/;¬Ñ=÷¦Õ’ï¦Uäç3Ð×A}—.8uK³Ë¤Ò>hö2ìâSï’S÷fSÚòoÌü#ß\ê¶L¯SJ¡eGjKŠÒ7õ?„nê­*Ì ½\¶]¹&úG@U½%VÝJ7å²ùªÂ"½hÕŽ‚·ßJ‹«su(Â_iÈ%½ ¶ª¼èÒÇ“ôz俈·œXcwûÒ²da0v¼·¶¯í.#è 9ºªrrXCJ©!„îÆëjÞ·/Z'ëµG//±ˆñ¢`ºÓøH²eÓØel4”ZábxÚÀù£}Ðlð[£pŽBï¼åuOa©5aÚ€¬`õ#8®Ð«aø¸<÷óT€ÉŸÂ¢¨WqêÅBJhçØ`[}/®‹}ï"ìû†Ùaåû}Øœ#úpÞ}\ußc$Ò(íˆe¹€úÎ#3AîË+Ý»˜³„èP–(¡TÑ4v8"£1µ§‰{¼#Ô#¦é´{ýH{ÒÁ^I?2©HÚêÌóÇwÕ µÐ&I"\Ô'þ¹‚g¼™˜ÆÎ¥—Ø.•¯.¶,*¬LFæ^/,E]ãÒGޱÍóLç˜Ôê̘Í×Ö …oñˆ¬csÜV0òé0а¾ª6r«K¿XG€ã¶BáY†ªú¸-=-ĉȪ7I¢+™Åí鎸0ý°Ñ1'¿aÑsüÕD="âMø¡ŠŸÎá?2ÞV–ÀÇs¼p«bŒ|DÖÚ@¦JÓVÃzG×!ýC?ÒGUz¼¾™&¦¼†JÍN–E`âÿö ~ÃCm<žîèIëªÑpóà³C`T>øÉ°"øQT¤¥:¼7Mº9êr”˜:$RfAË`Ë :u”\–_¥#Dr8ÐM.îÈ6\®Ê$i–£,™=6nAaqn/i…½¹ Æ gÙÙhu«ñâ¾æÑÍ (~¯ q¯Š‘Ï­Ù2'â gÉoæ¤N þü•{Õ7¹¥ƒxõÐß¡¥¬G˜ï&¯Ú 8 `VXêè¶à'Ì#<}è ÉÓ`8xzyâ1@ƒ{P˜FOÿ?{oÞÞÆô‹¾ÿŽ>&3Ç–ZI-I<š{¹8‰¬Å¾¢ôÆò{æÉ#‘”ÄXyØÔXäœ|÷‹¥,…n ÙO‘Íj] ª ¿*ˆ[ëä® I7¾Nâ xùúwé&ò‚«:òµ7`Q@¤9‚¤è>öŸn£V~ѵÎ"â°ÄcmoŠ~C"‰Ü‹äî¦a6'ôÏaƒYt<Æå(aó‘ø†â*H ¸FÕQ•GŽë@‡0éO”J©Må Š¯…Çȳìíñ…Ækµ(v‡áÛSRqŸÊàßhÃÊó#m6ɨ:è–¨:¨): ¶µÖr/k˜ƒÖa¾òy$‡=4Ïb»ÚQÍŠºT »´yZ‹•záÞ(1?C½³ãd¶À!"Øè±7á‘-(úßI­öʱ7z]–p±ñP«aëF]?C¸ 'Iý'z 4IBêz4Oêò<©§Í”L•zÚTÁå„4¡ÿ™ö@3™ôs&ªWscT¯¦YíäAÑü©çž?ÑÛñªëù†)²fQ\ù?<‘P޹Äu c:!1Öè^=ìÞ&VJ¡I…àyU‡çʘZu¦=G¨PâɘU1Ý={v¡8#}- ¬Ó-æ->Ãñjú:Q|Š}= »Ó’âB½Åa¸^s`bnCË\VRYL­•F–3J®±ÚEFlf…*‚§¡ª“öˆô Æî¿»Ã1 ºÁóïº{Cæ$Ö¸ïÃD‚i·'XØ ¶0Ó)Ö²Hë1©f’³rÒү쬜¶R…#.G­T1‡ÈplÑÁ˜„ƒEÉx2±|¦ÓÜ5†ë¦òæÓYã´¼Î|„Äý:×™»0KÒȸx&¦ŽJ‘É@¿¢Ës5anhÔ¡äÑh¬' “~r™Dô `·õèáG2¸É“LJ*Ö롛ʴÆ‹Ptg­3,2ë:ÿ†FÞ}» Ðèêk÷õÛý!I"r?654xàãQ†ÎÈŠ6ˆ>ã¾ö®"âÕZÄ’Êl–V·‹¾Ñ”*Ä¯ÒÆJã-Í·ô:üÌ ²˜ºÕ¨ H V¶@ÜXIn¬G7ÖMn¬²ˆÓ¹)uÍ éwX(¥O}“!ý.¥?¢ô©ëYH¿ÇŸczCæêÞ´ÏB¡I˜Å´ÕèR—>bÂ[`·âUŠ4˜±L…÷ü˜4W±KD+6iŽ Iš°½Ô'º‰ ‹:cJÖúC“oŽÉd‹Z¨$D7téJ¤T; ®ýŒ€Ð84Ô=[ÆR–D€¥õ<˜ê]5|%ð•d¬›‡7l®YÈIÁhî—ЦI«‚¤EvÅ -°„ð‚l‘&¦) ØŒÖLC×èi}Œ‡™b6çÉ™‹&J Ƨ¬v¦Ðw„0<£A?Õ¤ŸÜäLhHÌN@càÓ›H´Œ4(ó¹)±ñ Š„y=@d&3É´y5™ñš)7²ýšY}£›‹’ó_-x²ßöI¬,Ñ5Íß|ë£YÎåeÖ•h¢§w1F¦šL&ë1¹ Í/îÜDħ§ù};åǶu¿ü¨œUB¹ÛÛêµ2p”c˜–Iý„[ì5‡1(çåüŒ"Õ;L‹È 0åMh™œ6´·—µ·—ÕÛŕѸ#®&iïÛ¦CU}5¢Ž¢V DôkS§1Fø^ySBSOEê‡Ì;Å€£ýTÑÌñÁ6ìé¨X Vdp‚ $¸¹bŒýM°r!¦•à4÷ˆºB´µ!Bíêç HˆY|xrqÂÃ\% <„I-oPë¢õé}㜇¸É@jM¿ ó¢ô» õ›€ª³™Í@‚I£R§‰’ˆ;.¾5í¤0Ǹ ¾ C@VÌ~ÍþfðÈÆ¯ õôr¬ÈiããÙæßñ¥Œ¶åØQ“ñ’SAðíOÙDcMÆè]¤Ié§øÞõׯCÔ*ù ~úX]§§–ãË/¤ƒ&=àu)¤ÄªûÌ×󺤽µߺ±¥À·uœNž!Vsá¿kLJMÆ‹MØ !@ ¸S€FŸy¨¥KußdH²P¤Ë8‘¤í¨ÑX.&Õ …‘‡°©l…b…3ÀYNuAÂ'?î° g4F­Ô8*ˆ×e´Îm·Ÿ¾–ÖäÝèx?¬Â§ýó!V§¢$!q.;a_WCX‡ZƒBKéýÇ?DZ~)Œ[­ëú]7íw]×ïºE¿ëýæU©Í³m“VtÖÿvBRãêÚäiý{‘VM(çÀÜÀƒ–Í`t€·ìl¬ùk&bŽ|Ø EŽúIŽ)oã:x‘‡JlE ÒŠÜÊ>àëTâE3\|䳿.EØEL¥• ÀszÔq}Ð< ‚>?ºÍ.iu g¶x zT<Þúbè-ò¿‹„~L7$"¹žcès\®¯'Bg αý ƒ&óGFŸëÔG<1›äÈüܯŧãìÿ-åŽøHÂÃÆ¯”%¿^¼Àù¯ à}ºäyà¹ð¼õöõñÙë·lܼ}}qlB:yú!J ·Ž~ø‰ìرc¶» °1Æ©GGÀO…Sß·SÿÑÃÔe˜úS¢Ô.åAu†`N ÿ)N=rÓšK)8M˜K *SÚT2Âbfãy&¯Ô®B 0Bü·Ø¯¬I7‘U…ᘠ*ªÑú‰Ò*Û;dÍ"óSÓi} À•) nGÆìk6µ)±R Œ(ž ‹Ï`ô§Ê¿ ?‚o‡6ßë÷ì^ú5‚´_h†.D`£à‚Œ‚‹Ÿ(9 W†÷]wøÐõ0³ÉaRãl6a#ËÙ/,‘rC'Á©ñ×+_´Ä-#ˆ¿Ù~l=¶[£ŽÍ!ÇšD½j±Â›§A›á&©¢ +-U 1ù«%ÊÀg€ŠMðÂ6`as¤p&ˆÃ#l6Gô,l ¶A›Ã :o¶D›B€“]ኜ[ef&ÁL^(^âÔû“Í”ú4Ù¨_È/©‰Z™ø£ü«€ VÖH"¤‡[!t²uYX¿ð©ˆ•»LO•ð-m€Î…~6´…˜²9”É“"ÏÜ™®ô¥´%R:Óõ’Ü’º`ZAœ­ð͸٬,À›-Ê"ÂÙ¦,âšÍ1Ê"¶ÙŸ,"šMÀÉæÈdølKÎ’SJÅ ’åLV§*@dCr*>1‚l†?¶çCççÀg*U–hã™HæÌ¾™"Œ-áÅ™.¡n‰LO¥ÿcÓë$‰e²AþC dëÀUÒ5…L·y,ê°†!gGK¤²,ÑM ãæ#XR–e†pävO‹ê6€t›ã¹í&¾ŠäN…që1Ü7„ÞV Û¦í¦]ÙVQòŒÀ¦]Ñ#SmnuK F¤Ã‘{|¶Çg¿p|¶·XY'pl˜Mö*<[ì¬6›d‹ÕhÁÙä×9´+fmñ¥[y DfËNú®RC²[¥ÆEH¯ÀhÛ&•ÂÄ?Ì@É ø{ܽé -Ã÷nïFZP2yÚæ…Òæ…XÏÅ`ÐÕ‚’%Úãþ·. J^\ 4÷NRaÐî1Ђ˜¡BWÄ2 r%Î ¸‰HlktÕ:²ˆB„³¶ùË›É!^ ØLÎðòÀæÂ€Íì\×ð¬y$ƒµ˜fz ½^6Å:ë¡Îó@9{|³=¾y HóÂá’SF‘9Â"7š1ù\B$à@ÉzNÁxä0Ãu„# „l ?nÖ°$&ÿo•D(2Ÿ›¸Q=Pß©lÌ2e_Œ\–)ù<Úç«k–ñÌìàÊ]ñ<®Ùל@™é :“kÓ/MüÃiø9Ä>çIº]Ž.Xƒ™éE‚]ÄKõ¤;ìG(/¢¥Ðמ,nMó!Üîœ_€Hü QÉ‹eӺ˭·eÑÈ(RGtB†õîäÌ^ b½’àa’óAè0‡¸Ã0Ö'1ÆúT“ œ8ýfê•(<¦WêÕŒ0½òø@ ^š©?ªW¥"*ç(3ñu˜ž@Xç?Ñ{Ò°±$â¢ÑÔw3$Ä «·IêmþDoÊHùûƒa&a£y7õä‰Êã"pGÃבgŒiP  Y¯"‰ELm®àp-{¯O ô>5"Á´ÿRàÂrf,6B#C@1²Ê€LunLIþ¦Ñ‘þØ<¹quÚäÆÓ¥Ú5Ln¼3mrãiäq æ$c&ÊÛ?QT]¹^›eŸ4N(ŒøÌê:Ê™T‰¸âÈTd"‘÷Ãf×9jMò%eFÆÉ…QŒ·%VØ:´ý>™©lDàé1| …l6F;v?v„gœáØi4v :f#Ç #óKÈ[½»y«Í€ÜÉ]6Xîä®jœêš¡³5ÚMÕr_–#`¼ÍdïY&ÈÞÏ“ðú[yrëÆHr”3—ôvŽôØæ¹¤·Ó‘ÛuË\Ò6bÇ we>_D.iSµm=l?ZÊfm[Iì‡Èƒ¡¦%O ld„@§E¡Gï B¢çd Ü7 ¢WÊ’³?sAƦ‘É\í? žF]t5 ûϽ !þëþ°wÛ{¼ºÏ®‚¹êŒ&9Ëêí ™Œ²(ñ»%U³â ¿‡Äü#ߣuüý ÑA7m_%)™«?_Ô&é푳ʦõèIÈ=jöu©ð«×~*±²y–Rìesñ™€u8Ùœ#8…&êÓI‡ă$~ƪ}±ò¹xÍ^AcZù¼¯ÀHF­æ‘æªëÙGNü˜Ë¡ù±¬‹e*Wà#'ª@,ÓŽešG,Ó^Ë´;m,SŠ7Wï¬;¾OšØó‘L>’i%#™ dÚ± d²cÚˆE’üÌìÏœB–ªf!KU³¥Œ}`—EÞG²ŽZ "i«U«Jo¸„<¹A‚Ó›!i¼î¾œ¦Ô£3jaMùΨ‰}I=‚‚ÑAAÞ¾¦×M±×=áý¤1_yÒU öbC\3géÿŽ$±Œ¸Ã/t2P¦X6‹ìÚYÎã-´÷kN·8;ŠN·8;òA`‹ºuzdŸqÁbÉÂH2âÅòD„ù/b¶ª'^¼?7‹2Ãc'¼ûŒÞ~Æî?ã*È:#>;ŠƒÏ>`Aûá=ûé(mÞ©±¬C\,Z| Æ1®ã¤£éë±:C Fcj.Qy?Ås 8ƒê^¡Jý=7?"ãü%‘qv‘qÆÅiÙ†’5xØ&Ãÿ¯‰GôK ,‹¢ÉN̈|ÀS˜‡eéb²ržyÁή¼´i§]hŽº0 µI?ä"ôns^tþ| õl#]àÚQ¤#í1 çš0*š °€.Š  Ã¡†d‘ß)ð‡BRXd@G­bËTW‹vò„Ø£GÀš .x+í$ŽjQªŸY(ØÉò„‚eï õÃ4R)5¾G:lÁMœÕ3ű%VBèjè#*ølÌ0ÉøYTY×ÃÊõ›Ÿ15]@×}/d%xS:Hmš#(t„æÖÏÂ:ÙãÓ/©[¿yôü¡†›öø(sX8žÈà¡Ò†vTþC‚p×±Uz‡ä²×EFaZa Š kúœä>jáÎìì”3UXªe³Ýýø¤9bewÇ,„haÕ÷==LYY±˜/,Úî,|ž…N1 Ù˘3‹{üˆð¡ÜLÍ„žÅ̰’ô:2™Y™êБ¼'¦ÌæÐ3î›…%‚½ÜaÜX)<ÉúŒ÷蟙WÁ%Ô>’Q ˜aX|þÈÙQrR‡ú$FB¡økâCŸé„Д'äyå;E¾rþ+y$µ]žÁ+%þ“"^©rúJôBWå4•JÁ§©Té4«£Q*q,]+‰¥Ëº©jsD º;¢ÄE(+BTÞY67¥êbš‹Ù{±å ¤J eÜTZŠPgTå4:‹õ1?Îè|˜… CЄ±@YŸ–’ÞqNÌöòžS]¾sbò¾7ësbL֜̾å='µû÷Oèz~2¥7S(´ŽÉ;¼ÍÂcf‡K$U©.g¡ªLDÿâG(§Q°,/†P„!̉hxóáCVû¬ºtf&LªŠš=îM‰.E‹¶À¦Ô–’¹FÂ}0šF[Á`´)ŽÕŠâ¿0'ZïQíì=ãýûÖÅ1 ?S–þÐÎVå±båŠ"'ø . B>]‹+p,)q<[ÕÑbHè>D$V£ž&u¼â,¯A"LÌñvôxF!x>!x¬–Ð<Í™a|ŸUãà;x’Ä «€³ €[œh´h\«¯V–2¦ ÃѪ|tYöIcJÀýˆJQ˜™4ù’ƒÆP|Ò|v¡Œ¦{ /’` /¼XM1 Ì. %Ê,ætwÑbNë\Щ.âýftZI¢N+>ìt ÏÌŠCIªIj¨ñ§Ó„Ÿ*ѦõuTÙ4‹Úä#SÔºiôgj¼©Vª¯GŽ7M +M©æ·ˆË|Zh¸é‡µ´óY4jr2"y‚¨¯DêƒP.µG¡V Cm’×ø‹GuŽÅrMŽª?%põÃQ«Fá¨ôŒ¿Üñ¨ìx8 ÔâRmRËåE¤f¾r£U¬ 0µ‹š{z5!b Ê&QÀãÆ´'æÑ³H •û¡Á±›ínªŒ³ðèQ5Ø« ×±Ž%Ó÷Å“ñøÑ[Ó(óžÏÆ}bš¹QǬº¥~Æçiºþ1_ø³>®øWì&®Ø&¬ØGCe£ŠM¢|7ŒWŒ‰M?‰<†3oLl|§Žd9Œ«Sç ¬X¾ãýYE›} K©›kD*u±’ªÆ¤Æê*Û kñÆ»sóIžõ`ÏWäx©aÌ)g{ø0f³0fë@bw‘ÂÊÁ¡fgsJ§†&çZjo° IÃY‹I+KPrtháQÉIÅììÓ;â`ˆ¢‘Ã(ccAX\8reyÑw–/ Ü™{8²ÉÚ‘Ù7ŽV•Žü¡VÏ 6 Y6?´Ñ‡,§ÛeáŒe½BÁ—ßbO =i’¸‘“S(+–G_†îgâÏ>¦™«YÃ÷:l„ºBɦxœ’2×qhºKøü¦Ïn|4#’žØúTФÌå9ã±®;E›“³´CÊŒØüʦet7;-´¾ÎdàF²Ý–‚K¼áÃÒ[%‘ÚVÕðQÝR ¶]wâ¨n!PÛ®ŽB£ò…óyÍDTnÁO–XHÐñ§’õÀ? çL8ø?pèK“ÎhäU® ÈBìP®‰lþ² \RØãç—EFú)âµO#6†Öýó}8 Š`ÞŠ&ÌØ¡¶­îÐu«„ê¡È©'"ëCÖœ… „¬œ‚ìfÌïÄã‚fä<9Y§ ‰›•æy,„9$oL›Ò‰ÂYQ­¦fÑðG:û,>‹ÆfѰ>Z“˜B—oÃyF m¿×5×…µ ½w—VƒÑš¥ÕØ-<žä¶AõuŸ`ÃGÖ/`d½8º]fÙHS<#•È*í}yrmì/¯lLBT½ÄˆkòãJ ÊaYjäOÖÁýç—ŸÞG"¡yX#‘³tÆ*g† }­÷†Ž¶ãHŸ‰‚ мWè„€Nùp0Ü©Qˆ)® $î=ÒoW,A-úÞa‘°Ã§öèiH@ÿç©7ìR®•âÜ­„¾õFwèéñÛÕã¨ÛAÝû0Ò5èŽH¸Þ°û6f§ l@½ ¢Ÿ×Áø5ÂÝ~=¾ö >Š?W^Ÿåƒ}ª½1#Б³¿\¤v‚ÐJßZ˸Zö7ã”ûÌóí¥t"úš²»õñMÔ3“ä$3ØÖ­ìaÜ$“œï‹§²B·V£PÐpS )™‘9³’D)?6ifüuÓ ƒ†&½Eý=îé!–¡,¡Åý3ÿò³åW¢ðüüi]ž•×rqzxeˆû‘–£š1¸'‘DWÓ*Ð_㪧Mž æMÈH€Ÿ›€€ñ뢪 á¿Ò`Åø‡»øÌþ,fÃpÇøFñŠE>®“JI”®‚ü!7°ø7>&2zE¿™Þs·ƒ_E4R2N“@KÆ8|Ž®FXú†«Tb²³h'Þ%Äg‰QüÄ?ŒùÈ¢éÃ2¬#‰ ü€{K¯Ó ¯ÿB¯›)Ðî‰ð¢û‡¸i2PÖÃ, è{qÊ ý9Ü:¥0\ ’ ‰~úÝdqÅ ë¤;ìËu!Î×^¥Ýÿ@¡ßo+º½ ¡¿‰•kü5}·#  âš¡]ËPs^§Ô°æ¸Æ´úè3«Oó¼ð> p5¹Tw¡bŠ˜uóY>„Üb‘³á9Æñ/@müxÚÚ’Ïßs ~x+pú{Cú °yFµÆdð%±E>kTE¢'*qiô8ñ%¿*†ášB,Þ‰2ïhhå÷šŒälPFMUÙE”šÑÒ'dSºZêŠu‡¨‘]hˆ$£–“ÜTpr[øŽå4À³5<ËØê'+.`MF‹f"‘Ešì®(FD·˜Ì+ †ì.±gÑ®º.…·t)y±m€TÀ”Ñ&&Œ¢½¬¡ºÏZIÔ¦œsb?«¡†ÈÃN1-Ô#/ö¦­2 ô,Wõá(V£ˆTe ü†æ5þNSþ-Ç›©L6F[ ÄÄ·Ø'Á @Dk¡­)ùG‰;$ŽÿË~ !K!Ì(ÆgÒï<î(„iëaΙ“ÿŽ 4ö à‚ò„äk»ÿ0Àêih"ÓÌ7ÞZ/ÞZÏa¨gÚÇ%šbÓØJ. 9s[¹ê” 1¨zëðHvjGc<4ñÖ‹!îä¿õF²8%s›Â´‹Ô®¯³®n†y1i—73sb²‡ÚŒ`nfäµ4±µž6ŽSÞÅÙÙaª•þ!JC‰ ‰P"댡!_ëùmæ‰ãlžO†’tÿÿßÙ97>#»ªxb\y9ínÔéÞô{REä®uÌI¶œ’Dl©¡WˆúÍ®‘>G­|Ê´(Ø7ÚT`æt|¡ñš#®GÄu™¸.7€ªÅ¥ª‰1Þ ²›σn›xzÙÚPÂR»E§]Ðõº%šùñ=$ ±ÂZðe"h>çÆ–TõíÕÓ-sIÿ"šø¼/eìÓ˜dúh¬¼_¢_b¿ašùF£ö(Û’l{ŒQª¦ÂL”0à‘O ¤fQ,¡ÓѧïCUík<$§–ÎüŸé—ø>:@ã»ñýìJ\‹ší…g@СęÚF\¡€<6ð@åŽÙ|Z®!)Šbœ†wœ+âˆýé\Lgd\…^†ÓòyGƒzEb¨Cæ Óÿ3˜¶—‡iñ“΋Uºõæ<=U’=UigŽUèGÙÚlÿ!'ÛÃyÚ:uÎéƒbL&½é à[úAÿ~Ì9V |;ƒk4yßl‹Â)³ĺȔr9÷hµ¨¡š5Qœ8íµ $+nê›Aæò7ƒÇ´IÓ € BHófÕDhüË­ê' _4®§&Í'`ÂûÌ…ÊûŒY„fÊq5ïÏñ]CŽ›; &HÆ"Re.´(k­]ži¡Æò/iêi‘±¾†T™k,2Ðnæ7ÒS‚¤eæ'¡¤nIÁß© ’i2±Õ„iÄ}³Ùwßb…¬ ³DƒÙ²„f— Í’¸vZe[ëÚŒ{ÚŠÄI““¡!q2wã]/PB¼uj(† á„Îx$Nø‘"6¼ayÃN¾}ÿIL¥C·•)%ÙËöîþ.WLþ¼ýEM¬ÃúE–ù†Ò ¾¼I|ëÄs†Ûûå}”‰VjŽfUhîûJY”«@›}‡ÌÕ{ß憎÷¾;ê%D²MFÞà«a—˜ú÷Ý«`„î®îoÐU»ýDvêc kyw[À»¹]ݳÇ^öú‘s‰¡ ¸8g¶"ÕõI‡JÑ[.q/YÔËÌkŠ+ˆkæceº(¹ý(J®¢FÉÅŒ_jìŒMÁBÞ7Ã`tØBÇï[-tþkíýZ;–£)~FµFãâ¬vþ>ŽÓ¡¡Spyÿ™ÄDÑÚåŠhk@‘:¤Ä1,è> cAÐùKywk/„hFŸIÀRxë~î( e—ç¤ñ+´Ë¹B¾8m *–à‘ΰØ?–Î4ëá)ö­Ï6ÑÙlêuº#²à·S ·SôÛ˜ŽÃ‚ÙøôÏ×™7þìC~d›±ßWL*}&J4Ò•hqù%Y E. äVϬÇãQ°ìv0Šz8}xÃÏ5,ûèXNÃ[Ÿ˜ÒtJМ~< :@‡‡Ò-ci¢Ý{ænæá'ÒìéþŸ§xæðèbŠ(¾ ÿ$…Ÿ\³˜\R5šGêþÚÌà§Â¦I´}+ƒ* d®(ðÒÔy %¶˜ ‡áˆz¬©¾c=ˆè ¹èTpüÌ~$C"¾'‚¡’ÅdôÊ‹Z‚F é¯ÁýX |ª™Ÿ©xo SK…äLÕ —ûCM=À„>¼ôü%ÑÞ“¬>?¤¬>˜Z€$y¯C,W¡aÏ3z|z¸&P« ®zCŠ€^=vúñOO=¼L<ÜI ^0ö®ŸÈJuÝ}ëbáûf{k—ntâ¿~é™zéÑÆ>äõ f8ÁO>pí“Mâ#“JZµÿ~/O0m%ù‚Œ–ÉžŽ_1~Hp W÷½ ±ß /þ}5ì‘j.iÐ ‘?üÈI¬"†wv÷9_à¾~¿ò§"î Âd¾"%¡ú99ªkjÝÇv—̦0þ :$úÙ¢´]÷Ÿ;Ü *Óéƒ-¸x¸Þ {Æè."„+¥ji¯´O.üX¢Þ‹nýúÖ»¿Ç•aÁ•VÁùðxce7íÆ]ðžrÒXƒ<ÔS€ŸhÔG×ÃîÕW„‡üËÚh<èf°(ôtsß¿b™ß…Ÿ{W·ïПb~_c¯œ-ieDj¤/Uº)öG·ìùéG28~‡ˆnc¢=JªÒ×”Ð<€D¤OmŸ:IŸ:ú>u’>ut}ê$}êhûß®aÇÉG˜¨ÝŒC"ò&ºzމÈG¨\=\=S"öîyÒ©Ž¶S¤Sm§:I§:úNu’Nu´j'jk;ÕN:ÕÖvªtª­ïT;éT[Û©IÒ©‰¶S“¤Sm§&I§&úNM’NM´ :É„éègBÓÑÒ$u´ÑÃŒ?jjЉ:Z¢vBÔÖM¢IJM1U[ß© ׫IJ·ð“?…Ý"µD턨­i0®i¢­i’Ô4ÑÖ<B8ý¨ÁS$ÉGøÍ$5u´5u’š:Úš‚v\û¨¡Šëbá'q]죆*®‹}Ô̆xÜ0æ—±Š~«´ï¾vÑD?«½àH:0I›#iÃ$Žd“ÁøáßÑü ŸŠkŽâ¤pˆâ¶;Dþ u#!ÀŸ¡68‚\C…«¡VQáªP):3: 3:3: 3:3: 3:3:3:3:3:3:3:3: 3:3: 3ÚwÝøAèg€âš£€˜Ñ¾p3Ú˜‘ ¤ŸAŠ6Gцz³ƒ|†ºÁü"wU¸ n 8 ˆ_Ž__Ž__Ž__Ž__Ž__Ž_ˆ_Ž_ˆ_Ž__Ž__X$RãâWpËIž[PòŒ† ù PpuŒà:8é5¥×hp­`+Å5H1à(E‡ãGäG‡ãGäG‡ãGäG‡ãGäG‡ãGäG‡ãGäG‡ãGäG‡ãGäG›ãGäG›ãGäG›ãGäG›ãGäG›ãGäG›ãGäG›ãGäG›ãGäÇ„ãÇäÇ„ãÇäÇ„ãÇäÇ„ãÇäÇ„ãÇäÇ„ãÇäÇ„ãÇäÇ„ãÇ„ñƒW®ªzåªÊ)WUX¹âH:0I›#iÃ$Žd¢©/ÑK%ŸÁ…çkB1ü /M_ŠŠJ2áš™€ÍL¸f&`3®™ Ü ÒQ3ô3¨LÆä3¨¥}M(€ŽPÝíkBuܸò”ØE Rt¸‡é€Óá¦>L‡{˜ø0îa:ðÃt¸‡é€Óá¦>L›{˜6ø0mîaÚàô¹‡iƒÓæ¦ ?L›{˜6ø0mîaÚàÃL¸‡™€3áf>Ì„{˜ ø0îa&ðÃL¸‡™€3áfÂæoÝ{Îõyñ©Qk½×9?Ñç÷gXŒÙ;?[gÏ!Wˆ&ô3š-J*jh+JhN ÎóÙÔuˆs|6µâüž„H[OBö‡óz¶Z¿Õ>4‰×¬Õøøé¤Iœf­Úg˜†sy’3†kŸÁN'jê:Ä9<›ºqþΦ¶C‰»ó°©ëçílè:Ä9;ºq¾Î†¶C‰«ó°¡ëçéü¢ëçèü¢ëççü¢íPâæ<ü¢ëçål55C1!iêH¸jšºz8g«Õ¨ÃÅ4M çálèh8ç}=|EÚš¸.}Ñ÷)qJ6ðã_èhÚ (†8çæ]=œoó‹®εÙ:=;©ÀÓ>öG¶j­ Xä%õ4uõp~ͦ®έ‰å¶Gm®K mŸ&\§¾h{5áºõEÛ¯Ž0â¡kí×l´*_Þ+½à(š çÕl4@ Î©ÙøRp>Mà~ÿÞ_áî¯@T¸ ΗÙhBLà\™˜`çÉÄ8Gf£ 0óc’.¿¸ßU&p^Lz?ðŒŽ`çÃl4~}¯>çÂÄ8&&˜À90 Ì„ ˆ Í4 %TØÀy/I€ßcg½à€#ØÄ¹._ 6qžKL°‰s\b€Mœß²ñb綤 ›8¯%é¤ÂÎiIºüž°é Ä&ÎeI+Pç\â!“’ œ\ù’+‰ÿ¥Ñ:?ƒ¸ÎÁ8ÉtI¦ÄùBšhAMpuˆ`À|R 8O%™÷`àD ÄÎOÙhB|àÜ”˜¬N8'%ix ŽMˆœ‹’4¡pJ2³!ÙÀIˆœ²Ñ€øÀ¹'1X'Ÿ >pÎIÒð8×$iB%à<“dêB“Ÿ8¿dã ÄÎ-‰ À8ñsJ’&€§àøðâç’$M`kd£UÕ"Ž¢ RpþÈF¤àÜ‘/p‰›.†'ÐBò5!8;Wš¯ AE¡à\‘t%QÚà<‘T C5Äm|ÛàüT +mpnHªÿAq'ˆv¥6‘x‡˜ú¥R$Î!*ÉÕ6ß•à ç¤RT%࣠=ç¤ú‘ÒIÎýÈ(•‚{Œ&ôœó‘ŠQuÐpÑ€ƒs=6ÐcpžGB ;î1àcp~G*GÕ6¸Çh@Áy© S ¸Çø=çs$ÀÈæã øœÇ‘ŠAµ î1¾ÐÇý§¿’ÃKµGöw ¸%D£ -!"”T¤­'!y€h ¨%D£ -!"”T¤­'!ûÁ,U^EYªžd)Ó€Ku¾«K…@Xª. `©tÂWª3W…W*4ºR¡À•J‡ l¥:Uh¥B +X©tÂU*ï^…U*oCEUª/ž£ÑÕ*ÕŠTL¥úRUH¥ÊgQ©ÖÃW¤­ T*u1GaŒ¥„~os0JµzÿDwèdœèîŒ1nRâÔûC&¾³û;ºûC§bGwäPL@’*EØ…¶¶¡#1Gªa7&Ú~D#1AEæE*­+˜Hå-+Hå=*ˆHHy‘*„‡T $8$ À hHHûDI€ßE,$¨½r÷§9R= ¤J á U  ¨¿ Ò]QÒàw ê¾ÜýiÎC†€„ì%©HøG•@‚?ʳˆ~„ð#d’¡¤“ dè#hÒq÷§9ð2ùÜ£J ÁU õ(ç"è"0U‰’N*l UÊÝŸæ/z»)$ RwÞ)|ኔÿ£÷X)¾ôüÛÕòNYÊÿ±³SÞóù?\”×dœW¶Z§µO,=òiíä=úø3"WZ¿~<ÇÔ•,곿½9©}Àw¬ãÇb‰GXæèðòa‹¾on¬•÷¶X.Åó_qW~ýxÜDÿ];¾ f•ÕÎ?®íÉlåXJé[äÓéÚ6*£ VawÑ!ÿëŽQ-Ga-<5ÿ«Y-‡§Ï¨F}6.×H}xl¼©5Š”U£ÚéIkëÛ[Û%üo­—ãOÛ[û¥7Û[?n( ±›Ì gR‚õ—·ª¥7å­²Z?½‡¥Ê -ÊsDgÍ~ÊFÇj‘ ÄÆMýµýeýw¶õìè™ÝgAoÙŸ‘uËþXÒìè™SÌŠþÉn<Ÿ™Íûµ¤§>7«úÍèç½Àù’Zý¯Z|ú_e{oWÑÿ*U¯ÿ¹(Lÿ«zýÐÿ¶ãßyí®„•'E,aý'ºXÿxþ«[½outªKÉŠž9†¬tKúÀ²ÿÄeÓ;zæ µªßŒ~ÞB˗ЏþwæâÿÁÿ©þŸ¿þ»(¯›Þÿ³ôëÿªù}°Ò€°ò€°Z¢Ôj­$•©¬ã´Ž¥G§céÑ a^ô¶õÛyP:–Ž¥G§céÑéXzt:–Ž¥G'ÄÌÙ¼_Kz;Mˆ±³ªß{h|É(ŠþçÞÿSÞÝßWô¿ª÷ÿ8)LÿóþŸ×ÿ"íojÿÏLt«òVU©LÒ­l<#KOMÇÒSÓ±ôÔt,=/åñ¤ˆòÿäêk÷¦wŸ&s” ù¿_)W$ù¿[ÝöòßIùÛÚßèÙlÑ›§G â¿j£oëÁ€ƒøØ¡gb‘Ïä8,:h¶ZûÛÚZçþß÷äð·­·[kÑa/_'W7âá´Õ{\ÃíÔ†í»Þ¿»è†ÖÿØmwƒ€½5êG­®Öqe_Ç-Ö7ð‡ú1ý@nÿxýG·= ï&Ýð OJAƒa·Ý zýǵ€ôô¾ÜÇõ´È|,oõ×ÂËîre«O>ÒÈ$üñ*: Œ^fÁ»ÉUîÆ*»±šq#Ðcîl®Ç¾Ç¸Ç¾Ç¤Ç¬wøw’wrô8äqt0fÒé6ßévÜé6ßévÒévV§Û|§ÛI§ÛùÙ¬vzÂwzv® ßëIÒëIV¯'|¯'I¯'Y½¾º¿ÿ áÿ•Éÿ*äUD.–BtÀ•Q‡ýi³?ö‡TB’ #©0’ #©ŠjHQeUFQeUÌëûîÕãOk> 77˜MÃ.Ú$}Ýê?Ðó&û½ý‡Ž„tŸ»1Áó& g ~€ç€¼¿Q™ Dr¡^hGÚá…Ita^`•TâJ*¬’J\I…UR‰+©°J*q%•°’j\I•UR+©²Jªq%UVI5®s‰ˆÆ“K¨{Õ¾ãÆÑßȰk%âg-z䟰ȊÐF(/FdbüåïëÇkÍ÷gD¨‘ODmm 7ý˜]¹P ¦Ç"0j¢’4QIš¨5QáÈ…ZÄ&ªIÕ¤‰ªQUŽ\¨%lâo¨™ÈÀµh`;Û:&lëÄlë$lë(lë$lëpl똰­³­“°­£°­“°­Ã±­c¶N̶N¶ŽÌ6”Œ·Pª­Esˆ´Úæx×6á];æ];á][á];á]›ã]Û„wí˜wí„wm…wí„wmŽwmÞµcÞµÞµeÞ%c.ZÖ"qCZp¼›˜ðnón’ðn¢ðn’ðnÂñnb»IÌ»I»‰Â»I» Ç»‰ ï&1ï& ï&ʸCTõ:»8¥æKkmøDŽÿ·Û¾ë£ïÞ¼ù'½|xú ªÂ–ñÔÂß„cAÿñæÍwø–­·± ûg¸Æ‘Å$½2åŒÑ¤²húãÚ:†µ…‡îÕµ“êÚvK«5+ÁµN j­˜ð¯‚þAÔ²JÄÆJv™l$uvXÃ:³™I*m³JÛvÍfiå/ÿ@\÷_BÆfÔ]5al•1¶1¶š]g&c«Œ±Õˆ±uf3¶Ê[kÞÑlÆVc«Ñˆ¥uÿeí/k[­‹Ÿ>üü¾õ²bëmµ×Ö¶n¶ú?9ósƒÈŒŸ#ÓFßܺYÛj‡?7ðÏo±‘˜˜ˆ ¶=WÈ‹,¢ÿ'˜þ£¬â?ªÿ᤼nyü‡ÿñ8x¯"°Ä–8ÀXâ@KH`‰ ,q %$°Ä–8ÀXâ@KH`‰ ,q Çø¢/Šþ· øÝŠ×ÿ\¦ÿyüÇ‚ã?Š‹ÿ™þ#°Ä–øÀÿXâ?KüG°¬øÉÂäÿØÙõòßEyýÅÛÿÞþ_PûqcU'–^‚‰¥—`b™ÿcb™ÿcb™ÿcbé%˜Xz &–^‚‰¥—`bé%˜Xz &–ù?&–ù?&–ù?&–ù?&–ù?&>ÿÇjEÿ[üÞþwS˜þçíÿcÿ/¶Nec•O,óL,½ K¯ÂÄÒ«0±Ìÿ1±ôBL,óL–ÇkáKQ…­ÿÁ°ý6LCNXÚjÚFÆú/2üǾZ­ì“øŸí}¿þ»(,3¾ðîñrD †ýÛáÕêè 5PïqÔÞ\µ»4d‡R‡Ä¿ {£Q÷]ÑQ·7ºBçÝáÕc/¸ëm!Ty[.¿-ÿøãŒøíÚß"Øw´á­»ïäK¿ßìï“Ë4û4×»uÔî?#ÒtZ ¿P¢Í«ûÁÝ•xésiMY¸XIjÁí~.…ô—%ñ‡ËµÿàžÝtº74«÷á)=Å=üHÿžàn¤žVYøõ’|½|GŽ ºHÎ~Š×mé¹›öøá’ö'Ê"v"dÉ+Z_ …À=xUWB—É·Ëwk"Qz+óŸœÂæzþWöÕù¿íç¿‹"ÌöîMç?¥v8ÿI{)ó_;ÙWd®³Çæzú$7×ДùONÝû=xº.PdíÿoïîIó¯RñøO'E˜ÿÉ»7•ä"Ðá+÷÷á[núCrXE@Œ®»7$€ôo“»\IŽè¹R¥‡,'@iÂߢH»NZ[ta’p$[ ”}"+±âË’Hþ?¹–ÿ{ÛŠüß){ùï¢ÈòŸ½û ùÿËÿnÆðäxxÊ\`õQÖ W€§%Xž¬V€'¿¬`Qä?*8µÿËÛåªbÿWw=þËIä?{÷™‚¿ƒÂCR7ù'‰m„vßV¶ß2ûÁ¨ÓëoÝýS¼tß»®Y‰vÒ‘u&…¹³è>ž5ߟ¡þ°Ór]!ONÙ;'‡ìÕx¹~R ]mä×£cžöèBK¬óX¢M¡éûÎUzÝ"4qÝAU„7ƒ>Ò9àï¼öŽ_À¿Ö΢Ń|¦( Y£äe缆^á$E[vҗ“ƒ“’¸&âG? Ÿ±@Jõàw@?]]h:w‚N –TÜ:yòå£cüš•‹ømëÖäg“5™¼ÇƒíêÑÿá»èÓ'dïèæ¾5B›ÏÏh=ü¼BFw%SäÅ#î2´.vîêþOå_%²ýŠÿ¾‹º0bË=ùóÌÑo>mŽ“¯˜z3‘NoŽž¶éMÝg,*i…l~5°â÷ó°ÿÐ>ë;¹ú£?l†·ô7þ~\¹*Õ‚(³.ß u:ÉÑÁjÐèßÓûiêŠÿ„#Ÿ±‰Óöé… <Ìqm¯O_¿ i©ÆÜ Ÿg’7ú·4Ä[ÂÏÿIæ&“g,CÔz¥‰<*à¾+}wxß½½º º#‚Ø)¡ÿÕùßß•ÂËï’úTžms¿ª|ŽvGOÃÇèŸá_@°NwüÜ•ŸéOëxæÇIFxHÊ6»)T1OJ±úŠM±èÃE¼©’ùUAE…¨ÀRÑ'{F£Š_›8z"®£GbUùñ¨Ê&–ƒ0Uêøaszû_T$¼“(ãÞàðî7:ÿé=˜]òuz ¾.Þ"÷5äÄ ú'÷Ä×þ#¼°G|Ï úÇ?î±òŒ¯?à!Úo¯?n½I·Ã$ËÆ†H8"”ÏLpí®Ó546¤&Éïøú;i½“¦R“T”_°@CÏß?Š×ÿd¯EßÊæú›Ê†¶•7P3ä¡€žÑ÷ëJm ß:}á«Ò¯MÒ*^/ÔÚñ %z~ßlÒvØiõ7úc/­WßîH¾´õgô×ü|Ò<Œäw¨*|‰×ÑyäÄ+;_BV+¤ìèe’ K6t¤zè*ÈÞê¥XÐhDü†Q ¿Ž¿ÿß/5ŒÖO¥¹‚€·GF«Z5ˆM¨Ö½OÕ—DGÕ}•Ÿ:}µ!\6Ǥÿë›cuô’‡Þ9"Ó¾êõ1ø¦é}oä>pc…ûHçÕ3¯¯ Ï„“ÒôrN“¥i××Si%Ëj¼ÐÒÕ¡¤Y^q©¡R²Â>—¸–¬Ð‹äi’§[Ჟ£[ºußÜÀ"äTŽ}Þ@7Ãnwý™&ÐD'W¦€ÖO;hÎr˜}ò0Ñv„iR–§ ŠçÉGªÉÓ„êI¬ÝüPg†w.|Qý݇·øŸÝ½ÝŠ÷ÿÍ©Pÿ勞Ÿ¹{Aúæ¥MßÂ!¿î¼ýAØÂÉëèíé}Tf• üzêz’uO=¯/0 H|}iЕM ¯‹¾Áë ÷"[Ã{â½mÑÓ‡Õ ú;D¼u™?æ‚ÃäéŽ?åz_¯'þ@k‡àÑÁàÄ\À_®G ~B|½=•ðÉ—4>AÜò>ÔËmÂê5Îþ(Ô3®Åef‚ûæ#èC’kLë’8€üH§?!vK#û–ÐI‚H0¹å4·g K^sq¯áKL¬+L¬§0±3±žÂÄ:Ìĺ-«L¬ÃL¬Ï’‰9í¤ˆ .ÔeCj-¡eK‰(Ît®ÈØ„5)‰R}‘בÒˆ/µCçcòF?NãyÔMÚ´ñ¦™´iãM3i­ÇÛBMZ‘‡êœM|š9›&ø4s¶pÁçtβ¿ 1e7Bq^¸U[á|I+Êúç:ÿËöÞ>þ¢¬ÿ>þÃIÖÿ;³ü/dý¿KÏÿRþ!×òoè(ⵄ»"ò¿\|:þˆ.÷ý¯DðÁ¶ Å"Á~.޳a?”Ò'.ŽÑ+üÏö“¯a©ÌL•aŸeɳ¯8„ÿ +ô™Y1ÕqÿÖ$ð $0ƒc‚!_ôñE^Ï N[zm^{Œôñ’=Æ‹ãÏ]G}q,íÞi<MÑe²8;‰65 ˜—DMJ’×™™Ká¼™ò¶x$%ŽCQT ”b$~¢ž€iE ”"<ÚDëÅ1&z|‰[¢âJT&€( á$6L9YèÀ”Ó„DMxÕ 4E“DnDJ _—(Zx ,!% ˜D·¯­úõ)5w‡}âŽDØó]ç6¸œé6Hg²ŽÃ{ÿŒ´¯CÈ4vîó_žåí€gn¿@¿˜dlõÍ9Eêɳ/Á8ð´¨ö¿ûüÿ×Çÿ8)rþ‡;‹üwNó?Ü™æàn6ߣV¯Óµó¸±`çPs<;RS<´š%â@)iªG ÕD¯ð?%ÅCä)ÈâÁYf‡•Lã@‡y²(önïFø½¼>"“ÚãîÍ1ÚãÜ:—KµrqÆy¬|ÈÚ$傪{"Fldžªž nuÏé”ÏOmž ÕL ˾‹Â²©aVë¬NS(h' `¥h'ÌÙÊNˆ Ú cg«ù L„¯ÎÛt#±I§·ã–:/ÓÈþs½ÿ»·à¿üù¿NÊ[Éø3ÞÿMÏÿïÿÎþWDþÐò›½±·b[¾ïô¶­»ôÇjÌ`cWku¢ü;»i›µVÛÀVZôêììvWÿ"¦dð{¹~/×ïå¢UÝËåó$‰zG»ºÙ¢~yvu!QEþ?§‹ú‚vr»Kº“‹+øL†û3ZÜí\§»¹ö°ÿ‡›ÿ™û¿Û%þ¿¼_õö¿‹"ÙÿCcó¸Ñÿ¸#F1b ±¯ÈSàth¶0šDŒ)µ•ôa¨8;Ýp»5ÃJ/Ð,WAÕ¡å+à°ü,}=Ï*ìÍž¼¹;\jXÔŒ†*Ü Þãô¤8”»°sç¯ìf½ÒeÐu ¶å£beɧf Ô¤Ôó`’ 03 .- &' ˜Ð,ɘ0% 6ûßtf}ü]oÕ›õ)6½Ð-øÜÀ¤­Tiòœ"MPüÆÍì§á<Ì'†c† oö,¬µáË¢Èþ«8ÞÿÝÝS쿊ßÿuSdû¯bnVô`õm¥êЬhB6à|²† E³ïäŽA[òØœL’É=‡ìAyå2 Lq×5yjºç$ 3JÇÞ*M3a*«d–Vt{³Û±iEɸÒmÖpxÁ&mZvŘ(ÅÖ¥],Ð$~ž¥MüœË(VÎ2ù²Ï296K3Wm•f’¾,Ó<“ ëxºË‚ù‡ÉöÛXèsQ$õØ&Å”]É/Fé2eo2H¢©•°‡Bj®M‘I™+™˜qóÙx1+ÎR™Ÿ#ð£H©C…¼¡+â_íÿ¯Å:²öwöweûggÏÛÿ.ŠÿK_¿i0!vŒ›³=>ÍÔÏ<°yt Á ‡…3¯‡ÄÁ~fvÜûYF,ðùz…ÿM <“SÞÿHw5ÎÔ>•N@è"=='œÁ+;ˆ86næ?+Y‰_çHøÀŽj–Ï;C)‡CÃgCŸ¥œ  }6Åqî Y½à™J‹RIøÌºt–å›ù«´‘Ø2I&„P¾ši¼±Á ›aÀíâ̰„ ú œË˜1Á²ï'˜Ý ‹4ÁÂyàd~YFz¨ÓQ7!¬³ál\‹ÞYªùÕ‡4Èþ+ØüËÎÿ´_Uó?ùý_'°ÿlÌ?·Ö_¡Æ_¡¶_VØ8.˜³íVÌ’sk¼-GƦ¹ÛZÞÔšÎÔzy–V¶¡õ °´?.³µ¸!·Ñ<'XÂ…åµ´–~Y&‡*j~Mi>yëé¥Õþ8Ïÿ´»§äÚÙöñŸNŠˆÿåêß JºùŸ3;ÿGwcÊ™?Ÿü)?… †í“<ùO†g÷ f¥¦ãˆµ‰?æ$¬PާAJާOþ°{mB—?ÁÓ;þŸßÉçw ˪æwÒ ÷y„d w´ØÒ=S¼§ŸÒóIŽD.(§Ó`ár: ý^ÚãyÁŸ«ƒì×ùŸvö·üo¥ì÷Éþ7Îÿ4HÉÿä4úw0¿üO)·3QnþÎ9˜7GŽ'ŸÌiŽÖî %s'sZÁ€×Ìù‚ã]§Há$*p>ƒ“ðÃ4†{üuÆf»Ð/8…“©yÖ 3iŽ©›ØÏ¼}ãÞ¤€ô×ùªeEÿ¯no{ýßE‘õãü?ƒ´ü?N3ÀŠÍÿÃägdçʬ„Ê'³D@ƒµù™ ®vïæØ'yG>¯Ï´zí åõ˜æõ‘Rõ VÒ`ñzreèI±k晘§K'ÊË£±t;CyFÙxàšs%ã.³qÁ¬¹g¾£ ˜Š‡ã˜‹‡û}±’ñD*o6ž¹¥ÍÆ“²þ(ÙxxSw`Ÿ]g°$ÙuR^ª­µ¥(Šý´¯îÝÚÿøG%ÿou§âíE°ÿÙ»B3]”úf+$×Ç€n!Ty[. 1 ùL}Òâzf‚Þ,³\²ËÉî\Î4%X2ÅþÌ™¼Tô³‡I±96\ˆ4uþ/²ç¿‚ÿß­øý'Eœÿ–€’»œÿZ ìÒk÷çkþ‹Úu Ôùÿíjàxý߯¨ç¿ûóßÜqþÓwo<ý µËÙÛ“&¿öõÈËn$.óˆØ9n'Ô”Ø&*è“G’Bï—Ì!/Ôù?~xpÿE=ÿcgÛ¯ÿNŠœÿ…½~³ü/”ÖYþÒšmþ—Öaó=jõ:]» @N<œˆi`tÕ¤„š¦~ éuY?MâÓrÅ´š%²›ˆ rÅÒÔÅV½Âÿ¦ÍsrpR¥kY?OЉ žgÖÏÙå“!#=ÙêÝÞ6ð{?x}$mh»7#Ähs§“!ó“O@ý‹äâ¬ÓÉÌ!«NUφ¹dÕq˜³‚ŠeQj„ɉᘙFL8óü0º å‡ÑM˜³•0n²äø ÃÉcÿÁúˆœ¶ ‰1¸L‰CWûˆ¢ÁùÚA‚vëOr˜*½¨Â@çù Ñ­Œ2 )Å‘ø18Êa!¬ÉyÛQþ`‡3eN/Ó|c–aºÙš)v§?ªa®²ÿ 6ÿòœÿ°Söçÿ9)€ýgcþ¹µþýü‡¢v7¹ VÇsk»y#ÍÌHó6Út6Ú 4ÑŠ±ÐVßóÖØTÖØË3Æ t}L³èæøo’-BQì¿Ñµëóö÷¶ü·ßÿsRü7{÷FàoJjÿÅÊà#M™Ø%Ëà«õ5k¿ fïêV[`Z›~º³!xKï¼ö޳ú’ÏÍÃì 1$¡0¿²Ð{@°†MÀlÊW›‡èþg}d„°­Ûˆ9’Tš[ƒP hĈ6L—Æ0’;<—O(#[‰³;¡‘ù¤åx±ÊcnD’y:ç¼!äÉØ–5Q¬¸ÖÈÓª«tƒ@Xy¬G·ä>çLâ_بêÖfÌC"é’¡ôØm`ù¢Iíiÿ‘'·3¨w´¼ mÈœ /MH@ǧò×dÊ „ y˜rZ]ÉIòæ!p¦6¿A£~äƒ £§^• Fs•DÆ¢([¶XJ®D©¬’ë{ˆN¥³2þ# vžÆgå< RFa–8ós\”d+õ•<:NJ럨!^~&rðX.Vé $¦ÀQž…IzÞ¬o ys¢6Äœ;HHmƒÒò¨’»…³8¸{ÿ´Ò‹´8,ðê°DÊPQ«ƒËåA/ÑÒ$Ž’ ÉóošÉ)gÄBæ³S˜‰Éº§óƒ±Á–åë/92ðÿÎý?euÿÛûœÉÿ˜û‚ÿîKÿOàý?„Øûæãÿñî3£+˜·R¶äîŸÀ»`^¼x÷OàÝ?Ë"‰ŒEÑR¸Ÿ¤ZJÓy€„ßiBîÍÊæú(“¢;E<Ñ“}h¶k±Ã)®¤é\A‹ä šó:á áõÍ{¡p¹Rè…Ûü™ÁÜ7Ê–Û'3ð /^¢ª²E)SÓ& €”È­š³ež"ÆXÆ,…³åcm¦ØÎG3p±Ì[ì{¹/¼¾yËý¼‚_’ûhÁ<'14ƒ—€¡Yæøœãª5ÿ£Ïÿá¦HþsüÏ`öøŸÁÒá¼ÿgÖþŸ$&ËûÒŒ3É™Îÿã19^¼D; !e‹’÷ÿÌYĢ¥ðÿÌl#zS<Öfaä¿_„×7ï ï °è—ž Ñ,qQý?C×ç?îïìøÿå¤Èù_Ùë7ËÿJiå%­¹=ÿ1ÕWdPaâŸÏ<%±Z ˆÑb†˜å({þŸò¿H'ÈßwÊ‹R6ïUÀ/Âë[ŒeÍ~%@s€¥ÏK‡`£¡;3,€ÿ'pÿÙ«î«þŸŠ÷ÿ¸(*þ'°ÀÿNñ?cüÏËÿÌ û#_…½J÷ãâ¤iugÚ)î'XÔi·¸Ÿ`q÷¥—e[:ðÛÒ^øméìmé` ‘²ÔÎRd ¹AŠ\r»ByÉí%·—Ü ÉOtCˆ";Ñmàa V`ù\ãvËþÇçÿqR$üyþŸáìóÿ —6ÿì£Ià>iÕ¬lz ÿYFøO!4üǧÒðâåîûFЬ‡ÿ,º2AKÿ™ãq^IJÓôBb/"½Ð¼×¿8¯ïe,èå&úäCš"û:WÁ“cü9LöÿT÷¼ÿÇIáý?á»AY. FM(G¨§J€Fø–›þp„Å ú6¼¢aß×]|¥K ðdKîJÇ m!Ty[. ¸!s/R§ÿt}ßåŸjòÖ„d@b^˜€Tð.o*d%Ëa.drvý÷àéz=–ü|VQü‘lpR(쫹(Rçÿó`ìzþï—ÿïö¾Ÿÿ.Š8ÿé»7žÿ„zÆ3™óÓöÒæ±€–Kæ¶F9’¦|)¾ãR’—Ä%k;ù㯗äëe.Y þpIû£ Ê—xç+dƒ((.“o—X^ü‰"¡Ìÿvßýü¯V•õ߯ÿNŠ0ÿÙ»7ÿ”Úáü'íe¬ãš¹¾2S±Öä)n¦(ó¿Ó<ý³æ+òü¯øóÝaþÓwo:ý ±µöÞäNùÇ #3ÄÛ@É‘X ¸U{ƒÁ¥ ÁL³+xY‚?bjÁÈ ÷'þ¾Œ_ø¢ÈÿÛk×ùvª»;ªÿÇÇ9)RüWøúâ¿­Iü×^Úi, ¤ rcÄEdùõ蘧=ºÐ§X¦F‘\ÒŠ£Jªóº;‚j‰ï×­Z1Ú(U”‘êÙ”®¥_¥zŽŽð» Ÿ.Ž.fßutŒ_·rñ¿õ©VáE„-Ö&A PÙ°X›ÔmDý.¡² Èd]´÷'G£¼b£2úp‘¸|4¥ÍBÁL%†l:¢‚$sPÀ·øq•Œ«Si|Ñá°p\i†*ñÃÊnTivjCŽÏd§v)·gg^Tý¿ëúü—ݽÿ_ÝÛöú¿‹¢èÿ]ãó_­«ü´5ÛüyÒ6H÷ÔóÚ»SÄH«ƒŠVé9«C–ÕÀdq|{C¼½-Zx1«[ƒÉíÎ~9¯ãëõÄN°6ŽŽÒáÿ¤åÔðB Dæ°¾ZÈUeþÇÿዊÿ¿R-ûùï¢óhÿ7œ}(oàæÖ…I¬¾%nîBj¹0ûÛÒ÷ ¸``æ+Ò žúÐ/LéF²T œ0 ÛÃÏö*ØH׸ ùëxý¯ç¿oûùï¤ÈóÿÖJÜ:–·ëÉÌNP›×ÉÇvò1·Â¢Is»~Uº.µKxJ“È[¤@óßuþ¯Ý}%ÿ×ζÿwRrOE9¥–~9æ×b{"}Þü4ÓEyªíãH¸<˜…ÒÚX°Àypþ¬dÍÿ=eþïîúõßI‘×ÿ;àÁµð€U€H,tʉléT8u ¬4º†»k © ¸%Ü®W÷ Ì“ê,QQæà<ÿCu¿ªæØ÷ùÿœyÿ/°ÈÿäËÿsòE$€PÎø<‚,nk丘,ä¥t"vø ²>æÍš^!3ÀZzäßœO}¡“,ãÌzmñ’±«à« Šv¿8–ÀWGzðžFqpž3a·o˜ „Yÿãv Àï}&I‚Ù%9@¶›ëªþ×¾ºwìÿÙÛ©(úߎßÿuRû½{SóR;´þH{ùŸ9G”‘·ésçR»»ìyÒàH,NuþŸ6cþ—ËÛ@þ?ŸÿÙIç¿UØ@É{c’6p—–.Pr„Ç@Z& ïZö°Oë‹*ÿÎý»;êùeïÿwRÿßÀÂÿ7ÈåÿC¹=€ƒÙyó9ý>½D7ŸãV寮3ƒrÓ ïLäžûä½rvï}y¼riï]ãûT n°8N¸9@ÿsÿY-ï+ö¥êóÿ;)ªþgÿIHÝíþÌâ?AMO‘&Ó"§Ã¸Õëg†‰2µ.ÊœJ' *Q~¥+M²ÒÐlßáRë\ââ;Ô®½RLé ŠÍV£P.=*M5²Rº^Ì«Œõáï±w™¡ùàßR ýÏqüoµ àÿÊ^ÿsRÞJÊ_%Òþ²6i¨:JZB»o+Ûo§ÞîÅ-8Qù´)@2•>› ‹¢:pÚyýQ§tT–YëÈŠ¡Ö!¥TYMur©ß¬¢Oæ{µäݪ—¯_.FQõ¿oWçù_öÔøOþ“"êôÝãµKün/OÐ)¼— Çœ.úÑïìé‹<úuþŸÿUÞSì¿þ»›¢øÿÇæçQZgç‘ÖlÏÿj6ߣV¯ÓÍ ™OVxå/;ÌHÚ!_­f‰ØŸÈà/Bšjжšèþ§¼Y’û/gg{­äA^tÄ'æTïön´ßûÁë3à¨ö¸{3BŒ6¿14Ψ¡õ˺=¹8㓸lç@&!R HĈí¸PÕrÁ-Rc:›0$À›'h5SÝãð˜žDf˜%Ì×(gnÔÒNÀ1¤0g«:a &h'ŒwÌOuÂdne2WÖP"J|©‡Y¡ün/°H²ÿ\ãÿ÷vªŠýWññŸN `ÿ™ãÿÇNóŒ ýïS~éçõ˜Q¼Àx…âÆiñ>GÞ°<i~ß³Iã1^@ÿsŽÿ¯n+ùŸ+UŸÿÍIQõ?süÿØ%þìÿŸ –zÚZäW¨±€û"Êüò¨/på]j¨¹ä´2Fš'zØ*â½–ûJp¯|/Õè5vGàŠSHÿsÿßÝUÏÿòñŸn  ÿU,ÀŠS p•âÒj[8¥1v屯*‘‰¥k—^½Ô©"K =—õËœAÇÏjë Kýâ%4oÌAæ«7ÔT_fH¨ÿ}uzþ 9ìA9ÿeÛûÿœHÿûj£~u«~µ…§¨€ÀþðùYí´õécë=:Çý¬N‰(hϸ,°˜HÛó3#0ðy- |~†^áÓ‚3N“ÈžêP‰¥ÇšÊ\€°’žòuÞÀ>Â:¡ãg£ß6ðàG¯ÏFˆÔþãì ¹åàu#óŽÓ~Dè§ùq‘"ÿXµ2ÙãÌ„ƒÓ){!á3Gp~–e|µAv»Ù  &èM¦3À\Ø_Þ›sQì¿‘óóß÷ªUõüÏ}ÿé¤ù¿FÜáïÉ¿FáÙï³Ëÿ:Ê<ï½X{¯Ô×<¬ý‚š½«[m}€u˜ÃðÓGˆòfÞyígò%Ÿ›‡Ùø°dûŽý¥÷€ö_ Û5Øú“¯6Ñ+üoº¨ÑØ@ÌqrAž“á}¤(½[p4ÿs¿É“±)k”X­‘§TPé°%òXÇniäUY%þ…í€:kmÆ<$r+J½Ñ–š‘$Оöyr;ÃxGË Ú†Ì rqñ j! œî^“ (ƒ‚æaJØ*]ÏÉäæ¡ªäK̰~/œV,dy®’ÈXeËKÉ¥ˆ¢Ü~©¹‹¢–EKŠ’E.…‘(tNöRf‚ºõ[»f°ÿçöÿŽšÿ×ïÿº)’ý˜ÛÿÁÌíÿÀÛÿÞþ÷ö¿cû?˜·ž´äöàí˜/Þþ¼ý¿,’h5ìÿ9‹"/‹BF°¿ó–EsFzó?XþzK^Tûà|ÿ{·¬žÿSöö¿‹"ÚÿóýÿÁÌ÷ÿK·ÿŸ˜üƒÒK³ésXïÞ&Gfšð`î{Ëm“üž¼†/QNHÙ“©iÎç YEc{ž"f%ŒíyË/dBF°¿ó2y¥Œ½=X,ô¾Ì«ö¿óýÿí]ÿ¿Sñç;)’ýo¾ÿ?˜ùþÿ`éöÿ½ýïíÿ™Øÿ~O~:ûßïÉkxñUó„”­1ÞþŸ³ˆY ûßo¶{!ƒ4ö¿¹”É¢ø]t_òÕþ><¸µÿ÷÷¶Õýÿíoÿ»(rþ7öúÍò¿QZgÙ¿Ik¶ùßZ‡Í÷¨Õëts ê,0¨0ñp€7'OkÓ‡+yãpe­f‰8Upö—Üjà{HÉ'']o5Ñ«VöJæž;98)¥Ä)ÄÁþž ×EFªðÅL8ÇF¡#S"1Vz·wXçk5^Ÿ¹ÚãîÍ1Úüî <‘9íÖ/«+äâŒsbÍ!ç^UË„ùæÜí'}Ò=À~"¹Áj)Y÷“Ž%«Ù¦ÝÛÑr0%9ØŒ ®u¤\ˆ˜PðP‚ ®ÜŽ»ZV¸µ·Ü\xYà ZÍé,2¢KðË—Ö0;É8cžK-_tæB†2ê„ôÙR i)í&}£—ÒÈKi/¥#R:!µÒ§œ°6ÒÙn5ú¶²ÜjKäÿqÿ¨ìñþü7'ðÿüÛÐÿÜvY ˆ¥ÿåw-/ dº«|29Ð#>ÑÃt ’¡*™ T2ôA%^¼ÄýÞ„”­SÙÛ½>ÁÃ<%±ZdÌɼE—A!#Øß"ƒ2ífŽÒ÷p”E.€ý¸ÆìW”óßw*Uoÿ»(ªýXà?§øÀ1þc5áÿñÒñìïÜQ ÁüQ '¦Ã‚L  `“1bdÜ_ØÖ¡Ín£õfcØÆ&@°{/ÕGd¹ç8õ–cà·í€!An`ˆ² ïAN ‰Üœ!#s”èÆ"Ý 825nÄ‹t/Ò½H_.‘ÃHd±>-¬Dx}sG•@þçøÝªêÿñç:)o%çqþáÌó —8ÿ‡ì¡áÑiUy¸‡‡{ÌîásˆL÷ð9D4¼xñ[­FÑýî1O d,‚îáSŒx„æ&ƒRLZŸ€dY‹bÿO®‚§‚ öÿö~U±ÿw÷<þÃIìÿð݇f}¦€‘ÒÃjãé ¾ç¦?áE}^-]wñ•.%À»-:²…Påm¹,@GÌ= ¡½Í?Ø:d²Sr˜ á0ðíïòfÂT †©0©d§=ÿ=xº^Å8ŸŠŽx*1Á'*ÃÎÊKuþ?*sŸÿ;û>ÿ“"Îöîç?%·ŸÿÑmç?irŠùOnŸÓü'M§ÎB ÌÖÙœóÿö¾}u*²æÿ¶êÿßÛ÷ç;)€“ô+Þ²s”\›wW}™A‘ç¯}õpõìxýßÝUôÿŠŸÿN ¿þGïÞpýÉm×ÿä¶Y®ÿL¦ž6ßžÎJ `:ýJî‹ÀºªÐÎ *½‚þö÷MýDœ) F ÌÿÎ<æ¿ÿ±[õç:)ÂüïØÍÿN¾ùß™Óüïhç¨å/ªèÌP(ó?˜ÇüßSæÿŽÏÿâ¤ó?°›ÿA¾ùÌiþÚùsß¿-ìôf©(ó²ëÿ®ŸÿNŠ0ÿ'vó’oþOæ4ÿ'Ë©ÿOrLÿu`þo­ÿAñÛÙó¿¢Øÿ;e¯ÿ;)üü¬¶ÿ‚\»‹Í?¶žse·ê‡¾öûܶþ‚ïü±¢ÎÿçÁØíü/oïî¨ø?ÿqþÓwo<ÿ µýüïšåüçÂh{)Ó_ȰK 4J¥è†KI„\ÍÁVdÄ_/É×Ë\Dýá’öG-”)rbsQ¼\&ß.‰î€üÐ Eþ·çÿRòÿîîzùï¤ò¿m§¶ói€mç*`;/ükÞ*`{æ: :ÿçÿRóïUüüwQÄùo…ÿ Éíç¿ ü—8ÿó¿X-sC±ŽÏýÍÿ¾kûœÿeÿvRÄùß·±ÿ(µCKŽ´—îÈјm«bµ1À‚@¶ÖrÏÿNTðôÏöÿì+øÏJÅïÿ8)Âü§ïÞtúbëµ?¼ÉÙÊÛ+Ddw‚#Öp“öÊ‚K1‚;˜¦Rð’ÄÔ‚~AïOüÝ»ƒV¡¨ò¿ø Kþ—w¶½ÿgNE”ÿ–+@Î5Àý*±Xm¬ôÒ .Ù›~‘Xâ¢ÈÿÛk×çÿíT÷ÔøÏm/ÿ)ÿøú Ïÿ»Îuþ_NóŸ´–žNØÆ9ß,Ró“_ŽyÚ£ -±~}1HâÏÓNJ±Âëîª#º[·0ÅI沓Ée$ø7L—‘œ_¿ìIõà·@?]]Ì"›ÿÑ1~ÑÊÅ ü¾§Zi1=ÝråeK²JQ©°yÙRséSý(™|˜”c7‰ÆÙ¨Œ>\$Ú™’¥–)ã°s@†ì7D'[®d[/j\Jã‹§x€…ãJ3¬PiŠa¥É#²Üç‘rZTý¿ûàÿ½»¯žÿ]Ý÷þ'EÑÿéë7Ôÿ ­‰þÿC!ú?nÍöü¯<ÇvI÷Ôóš™.&b¤Õ@¥«xƒ|šW†ÑÀdqtsC¼¹-Zx1«[†ÉíNü:¯ãëõÄN°6ŽŽÒŽó’îÇOˆ¯·§2 ŽÐ‘¡­ü¥\nVsÚá¸F€é‘`Z½Ëî|d}@È&²?!„WÔ°XøN§¨ ´éRsë së)Ì­ÃÌ­§0·3·nËÜjNæÖaæÖçvIfŠU,‚º¬@ 4YD¨³Ð„óCÀóBbE¹_k ÇëN}$”nF§ :ÍŒNtšm=è–bF‹¼U'tš´ÔLè4i©™Ð«)-Ùß…˜ÏŠeœÌga “;c>‡,×Ú¾ô½À¯e‡ÅV0dÿ¹ÞÿÁWÕýþ“Øæû?]Ûý4ü“´¶û?©öœßÓ±ÚÓ)~ÏÆ|'¦pC,Ý{[1]—.óâ=äÝ´HŸ=ßÊ<õ;,ÃeiUN“(™zGA[)]¿•â‹Iôÿ¡ãøJ¹ªäÿ©”}þ_'EÕÿ‡æêÿÐ!ú«;4: ÔD‘tpP‘WõòôJTU¼¤5 ²£Ìæ¡NèÍVÀX—åáºVŸKC5'»sÑŠ]°ú&uÐĉå¦ÖuP¯ ½zàËëñéòäÿ«V|þ'Eˆÿ±Kÿ/ûƒóäÚÜfùÿæ—üaæ¹HQæÿÐyü÷öþ¾ÿWÙÝöóßEæÿÐ&úo8û >Þ*|ºùË'xÐltʺº Ì·¥ïAqy!ŠQÄ%9A8cÊìU@Sù%qyÐü¿u¼þWUüçNÙŸÿç¤ÈóÿÖJÜ:–·ëѲ¼7¯ãOíøSn…Eävýªt]j—ðð'óßÛ&Qæ¿óüß»5ÿ¯ßÿwRäùo‘þ‘R»ÿrÇ)T€T`óÓâ¯ùf \>m¤Fâƒó¿` kþï¨çîî{ûßIQæ¿ðàZxÀ*@8G;åxšw*‰6PÖOy\“Ü3´Ü®WŠ+{…é6Õ–«(ó?pžÿ¡ºWôŸÿÁI‘÷ÿ‹üA¾üùæ~™þAEû]|:þˆ.÷}aûpšlIñZ)‘Zˆº­tDßÅq6¢Ò@ZÇÅ1z…ÿG_e]M¯ó˜‡V-or¦EFaLyA<ãø…Յйˆûßâã¼h«@Œg§-H~ôÚ¼€Vôñ ÕÅq Ð*ˆbÙ/Ž% •.ò&ž~F𪢠yúw~lóÎ/føÎéEÿÎ ¶ÌƒÕÌN êí«{ÇþŸ=5ÿcÕçÿwSû½{SóR;´þH{ÙIÌÈñó9£§ˆÝ]ö4ix$'F§:ÿÎí¿Ý²ÿUõóßIQì¿…ý7piÿ ÿJ ©C½>Í3¸«XSp1ýÜ eÉ¡±â+dÅ >Òä?ѵP³†;Oc7wK.û½/%—öÞá÷ÍFCAvÜ`fvÜ2núŸëøŸjy_9ÿ·âóÿ»)ªþgÿCHÅÿàÆŠñþ39«‘vGhàµAÁZš­·^6QóX¨K¡|¥éSVššÍ"<\jÝK\„‡Ú5X 3"›ßtiÍV§P.}*ME²R¾^Ì«Œõâï±w™¡#ùð°R ýÏqüWµªÆW½ÿÏMô¿Š…XqªVf«¦G„gê€æá‹¢$:pä®L‚ËÍ2ê’•eÖ@de²b¨Hö«©Z.õ›UtË|¯–¼[CU³âÝqó+ªþ÷íjà<þ_ñÿí”}ü“"â?è»7Æj—øÜž}ü¼£ ‡ÿ,úÑïìÙ‹=úuþŸÿRòÿVýù/NŠbÿÍÏ¡´ÎÎ!­ÙžÿÒ:l¾G­^§›2‡¤ÀÊ!/6°‘´C^ZÍ17‘Á!/„4Õòl5Ñ+üO‘v¸$÷!/ÎÎv™ê gf-³kq¡ƒ=±žz·w£ üÞ^Ÿ‡ ´ÇÝ›b´ùmŸ±Ùé´aYÇ'g| ‚í½È4Dª!‰±{ªöìq ï˜Îh °(à ZÍTÁ8ð;ò^¼D­8!e+L¦RL„9Ÿ”fMïyŠ˜•0½ç-c¼ ÁþÎ[Èä•2ö&õÀo¡ûbSûßùþÿöî¶ÿžS÷ÿæûÿƒ™ïÿ–nÿßüÞàŸÁï·ä§3øý–¼†/QOHÙ ã þ9‹˜Õ0øý^»2Hcð›K^ȘZü~Ý—üEµÿ‡níÿýÝ]Õþ÷çÿ9)Êþ?}ý†ûÿ„ÖYöoÒšmþ·Öaó=jõ:Ý\gC§: *L¼œ àdÊ“§Íã ”äáJÚ8\U«Y"Þd•œý%·8RÒÉI×[MôªÕ„]†©çNNJ)Ñq q°¿'èÄÀo‘‘*|1óͱqAèÈlH,•ÞíVøZ̓×g@~&ö¸{3BŒ6¿/ÏaN5¤õ˺ ¹8ã¼VsH¹WÕ2a¾™õ@ãIŸZ0žH~¯ZJn=Åžc ¾j¶Éõv´LÉã5ckk)Ö"ö<” k+$·cÅ®Ì 4'k˹…מ ÕœÎ#z¿~iͲ“ŒC湤ÕEç%ÔIi(/¡NJŸ-¥”6Ón’3Ê3ËËéœrÚ(áâ Ôs’Ó€N“Íib9M"ë„1à{7–§œ4Ž¥pXUª,Ž}gzç™4>_àÑÿÇ5þ£²ïñó*€ÿÇ<þcè2þc¸¼ñSe€XE¸ˆà”ÉYÍ4î %CC2¤dècH4¼x‰»½ )[¥²7{}v‡yJ c´Èˆ“y‹ /ƒBF°¿/M¥˜Ô²D°ÿ×ø½í²jÿ—½ýö`ÿœâ?ÇøE„XyTÈê#@N=d± àr©`Ž{‹ –yk1˜ïÖbÂÄùï,s„€°¿K† <DRK$¥]!@¼”öRÚKi´àRDˆL ^6òÿ8ÇìªþŸÿ㤼•œ?Æù?†3Ïÿ1\Þü²‹†Ãw¤Uäð1#À‡Ï!2àÃçÑðbõX»ÍV£è~ø˜§2A øð)F¼ Bs“A)v²|,o‘íö‚‹udØÿÕÝ$þcwwo›Øÿ»Õ}oÿ»(±!þ`Ôéõ·îþ¹&\ºï]Ë׆xb+tWCñZ¦©ÏÛú¡Ta&ðMŒ¿ ¦îæ°ÿ4¯Üô‡%´µµ&´H3éè݇Áhü?åa±òÝwôò¿¯~¿ïá q·£!³óð52Fëì"6Úq;Ñþ:ZçëV,<"GÃuÚÙïn»ßm ¿b)¶¡.j˜œ–uò¬diÜEˆ}Fh‡[â54"ÜáwÓËå„ðÇ4“Ëey‰‹©…‡ Æñáÿû‘ðÛËOï&íÓt“úvÍ»¹Ïµ¾—ÖÍ=Žp߬›x¨üÛ¢+;(j¡šÖ“jB·cØ‘kgY—Ð¥ŽìÝ„nÏ”ÃC‹çªÆo°’ÖʆÑÓÿ°¡tVóð!ÙÚg’žê®;¬hgá þm#i5*¯^ÉsvXùŠÉHU&,šò¡þŒä)ù5jïf€W•Ñ ~F¬'bÑûݧ«áÕCËu¬A‡3é+ñúý¯}» ˆÏ¯?vÛ#ª]²Eƒ0óßrulÅ`r}#Z°½Î_Š;9ôWÚ¹È"!:q¸FÑ¥kÒô÷wèí&ú¹?lwÑ}ïñ+êß þÓ#Ülˆî®;÷ø6ß²uª7ZSÞX$uVÖÿ&Wσ±Ûý|Q=ÿÝŸÿæ¦ðû?á»wu²¶€uH¬Go!Ty[.OLÛ[‡¶](Ѧ–e—´V®´çR é/¥èÛK¢`Úf@¿^’¯—á‡uM͈®þpIûÃmsë!K" ?䟬¸„.“o—Dò Qô(ó¿Ýw?ÿñœ—çÅÛNŠ0ÿÙ»7ÿ”Úáü'í¥Ìíd_‘¹Î?šëé“Ü\½Pæ§?jÿ<](2æ iþHˆŸÿ.Š0ÿ“wo*È4èp„ÚW÷÷á[°º=ÂÓ}^‘&tÝÅWº” ÷x›ÜåJrDÏ•*=d9JþE‚D¢…4·èÒ$aI¶D)!úDÞ!¿‚’ÿOÎåÿŽ"ÿ÷}þ''E–ÿOÖòÿ)—ür,ÿŸŠ‘ÿæ+ÀÓ¬OV+À“_V¯¨ò?ÀÓÔéþ/ýQ²ÿww¼þ龜òŸ½{cáOÉ]ÊqÒ (Ä;ý§ëûnä£÷˜»ÿòHê\¢Y–ÀìqäCé°±ÙÊYeþ“}@§þ¿2V÷ÔüŸÛÛ~þ»(Âügï>sî÷o%%„Vñ†+™@:’&¢ ãP‹Œä×£cžöèBK¬Û±0Êî!Ыz«ÐÄuwUÞ ú@{’c’‘øÃ0N$ãØ½~,Õst|€ßýtqpt1‹,GÇø5+/ðÛÖÉýg•œ¼Çêl—è=á"¿îpÛ|~>@ëÑ— Ì2ŠøKá "oñ×ÉH õ…WjÇŸ~­ýOå_¥úûsò÷]Ô‹SøÉŸgþ†Íç±á1÷ß°ŒHç7GÏrœÍ„á,WüI‚ž§BnâOR1ÏzH³‚XfB;*+™Ø4‹>\Ä›(i/°™·a©ø“wGPÁ¡s )PÇ›ÔÛÿ: báüCÿðÿ€?p¿Q@ïÁì’¯Ó[ðuñ¹¯!'NÐ?ÈÑ„öˆï9AÿøGÂ=Vžñõpc…ûHçÕ³ ³ Ï„•Ò"ôr—N“ÅiWØSi¥% k¼ÔÒõ¡¤Y`q©¡R²Æ>—¸%–¬Ð‹äy’§[៣[º•ß"þ• ²ÏèfØí®?óÚÉâìÊ”ÐúyM:PÐÓïO~f†q…Óćq½´¢úÿº®ÏÞÝÛQñeïÿsR¤ü¿áë7ÊÿËhMòÿþP@þ_Úšmþß·#„3³%`É"Ôej-¡e ‰(ÌtNÈØD5)`’ÁÈúhÄ—Úg‡ÕLٴᦙ²iÃM3e­‡Û"MY‘…êŒM“zš›&õ43¶h©çtƲ¿ 1açAþËIy+Æøî¢à?º‹ÿðŽ‚ ÅC6ÌÔròHŒ¯‘s1³Ô©Œ:þÊ&YÇrÝlœëÎdÝzeÚŒ¼–Ï‚C—²w›wm¤î·sŸ¤þðJÙäûÕè·cX2V0v’1zÈ€î1ºoLQ‹f†÷‹`ÒT_Ò<_R¼^`ÝÛ 3’âåbðpC‹@aED§£)te1»±˜\Xt÷ÝŒ 8®D^+y¬˜ªÃ hÅT]UbJ“>5è¥b¢v¢„IÞ+N)FOÐÇô.ÑüJL›àº!ÙÆCvù¦Â§¹‹ ‘×|ë´ Ã_õ½÷Õ0üÔøùƒwpv/òt“AÁr¦!É& èM®C°$B¶çè>þŸ“[uOàVØ40‹“Œ +¢ÓØžJ~ÜËde6]ÿ¯>þCûP¿ÿ­UÛÿ+Ijü‡ëñ®+ÿpmÿAÌîŸõ^ þÈæ³”¸±Ï  Çx°6¤†xè÷šÔ €,B<Т©~=&ÿ´±¥ pˆ‡Ê";ÜË0lšsQ8ú|½Ø%ßýä‡sàp²TöÕðj²¯ ¯´”ƒ°¬u @_®8‚—œ'ê’aA_q¢°p>4tÌh¨vŹܒ3*@H[,Ðï¥;Žfs®awŠ^©VDÁH0€nb$˜ó{K0Œ“OC«  ÈG_§’¼GEά½UÂÕÀ›˜ ý¯êýß#Èÿ«Þÿ­$=Q”?ëýßôøðþï*Üÿ¯Ëˆÿj~«WöîÙ–oÉ;½õ¶îÊ·uº'*¾¯›¶U›k8×Zúþìë3ÿ&Æa¨wrëÜz'—¦{º“+FL°böíéf3ûíÙÓ…˜= •Ù—´;ÜÒ}\ÒÀG:ßoÑænæV°—[$ú¿_²úŸ¥ÿ·´óÿN§SëÿU$Eÿ÷­Õ3Nÿ“X•}fÞ,œª½kÍ5ÍGÂPyŠºå~k†š^¢^®«ß‘Þ«êà&øV}±¸]Õ¹´ x®ÍC«5`yUäëŽnà >a”œä.í^Çõ/t³>é6¬sËÖäã”KO DhˆBhVåÁ(`ðASäACØA0æ ]” 0Ì`JŒAc€Áå”úäÙ¬ÓÛªô)½4¬Ô¨u™üä6…Ÿ ÌÀŒDs]NãÏ+ê(ëQ  õ¿SõþY/«ñ¿êøÕ$uýïØ+ŽYèÐô¿N½ÿWI’ýçVñfWˆÝŒø?ó•Ýÿbª˜rçË»ú–—R<†ë ?+¿»…ºjº+±1ðÃ:|‰ç÷(ÊÏ<%ÊÏ»ú²–.´¼`ñ?ÏÄœ:ÂOá'J÷6‰½¯ãh@6{G›Íß3|ú--ïÔÓ¨%Eõ™o\TiÜ[{= -ðm›}iôÿêãÿ´µû_œƒZÿ¯$)ú¿uüŸyJüŸJOÿÎ×ÿ'¥~ÈÁ£Íß5Ÿæ-ã§æ³^M÷ó™[ó¹‡Ç]3?äC>ìºDyíVGð‘2–QÚ“Ç«ìÒ¸ !|lÙÈ­äP‘üuhH¢~#ê6UªÐú¿êø?Íÿ³ÓqêõI]ÿ[Çÿ™§Åÿ©4è¼Üø?!§ì@™0Îdh¾»³>m¡ªÝ;c`Ÿ²Cùð/£Gò©ã÷X-`û¤ŠX†ïQ"òÌï¥f’õa²j’7O¸FÙ䉼(oe-ÇJÇÑW֪ʢ5ÿм)þÖ¼1šMÙú‹¢Uª kÆf4…BÍ(£ÉeF€L܆C|_ÎJ» ·0¡ÙÒ¢ÐhßP.ýS[š$HÚ*5ö£Æ´Iïç7½@'LŽu«åùÀbí˜Û¿‹ÛçÀˆ"¥x\žÈ›Ë–'ELiayDÅw^,ÌŽ, 67ÊNJÇ[°Ñ¶¡IÓÿw0®VÿouŽ4ýÿø¨Öÿ«H’þ~ûè@h¦ €•Ž ›O€î#ä® ghÞí!»`°ÇìÂl¬ÈÍ5tú¿›LªŽÿpÔÖÏÿÔ÷W’Ôøáç·‹ÿÀÊVÿö–7þCÿ¬÷õGÞ°ð%ௗ»<ï•àæ¨6çƒÒ"Eô{Mº›€,"EТ© ýzLþ-)âõÉë¦Ì[Kú÷½¶àÀë ú·ºht¦s“ôèóõb—|÷“Î#ÛRÙWë Ë&AéS<4Ì:Píô媃I¬!¦FÇŒ†µÄÔ¨ðÄ:cË2׈Ö#¯Sï/7¬Ë¤`+a"(:„‰`Îï-ÁT#£&Ñ¥)"k‚±£—Ä l´«_aE!Pm‹Ý¬è•Çïj÷¿ÖöŸj’®ÿåˆÿ®PÜæøï¹ÀÔm”Ám x¿#À—­ ®W’V·u÷¥‚ j¬<úY)kã\áC/ŠcöcÆ}#´ÉuëQuP÷s¦·‰ÞÂÛ@n%ªu¨öµ&Hÿ+Yý³ˆÿ®ÿ?¨ÏÿW“ý/úW­ö·éñßËÚÜ¿ßE®ZÝ­VÒì”´ZG[NG{€*Z9ÚýWÇjml)mìá)c%š>–ÓÉj•ì¡$Mÿ[\Vÿýø¨ø×ç?*I’ÿwøí­œ¿YÑÜñr)|´‹2;d)|§@{½³ÓŸQo4øllPs«~&7vQÓ{úLÐúøïÞYv„FT”,¬¨žðVÕ·½3ô˜üË2^ÚÖKtÄ1êìµAèŽ4~Á:f¢1>¶d—¢K¨zâêb³‡jd±(ir…”°W#)¥®9˜…,¤bu-J–®§:`ñªTŠz§R0â‚õ¡b‚¾Ü¼ÀQ6„Eÿ©Z€!ˆè¥ÜÀd Ä»c^Þ;nÔ·h€ð¤aÆPß—°&kåDÖ¬(›·ää\œé¨RÛˆFˆÞ(Aòÿ’¦‹¦A‹¢°ˆbJ$¡_“ERˆKÿ¤ãèC1“Púòë[‰_|bTB¬ 96ˆŠâ•üÔØ“ŠÄ}È1Hø¥QD@ôo;òßm,¶(l*K:T)Ì-ãhA™J˧!ÙS§D‰\î™ aádË2„ͶÚØ‚Êí?mýþ÷Nmÿ©$)öŸÀÞþ¤ØL7ä´ÿµý‡®í?k²ÿÔæK¥+X÷¢lËÍ?AmþqñàÍ?AmþÙNdÍŠ¶ÂüƒŒAjѲ )ŸEéÝsö‹dNÊæùêEÄ®õ`·zÈN1%-g Ú$KКåD-(¤Ï·nAQ¥¤03·õY‚*4÷Õ¤Ûæ•ûÿtÚÚùÿÚþSQ’í?s{ÿŸùêýæ[çÿ›|ææÐ”÷Ò¦³Ä “µMf56ËÛ´k›Œa­=¯]r ¸xˆKm^4K™+m*”pÞè¾[ÖÉb¬yÌV[°¯ÍÛùh&–u³ýšïKŸoÝ|¿(ãWø>Ú0ËI…>4óûïC³Í °ÿTîÿÓq=þGÿ¿’¤Øìýæ«÷ÿ™oÿOmÿY¹ýG8“UÛÒ”³Ú'g9ûOí“cÀÅCÔxÑP,ÕöŸ5³k³öŸõ9ÛÈÖ”Ú×fcø-¤Ï·nPTlºM•– {ëD³ÅI·ÿøUßÿx|ÔÖí?úþJ’ÿ5üüvñ_YÙÊâ¿ÒÞª½ÿ1ÕVdÑ 7U|¡¤áú-p,iŠ^BÕ;³ «Xœâû«2,O)e•÷áµ’êÛÐ&Uò•’«»:r3#Άó¾ò¾Çn5—DR"^ßw ºî Çû\Ñ%‘*ÖwWQ…Q¨ ›#ïëVV'%ô®¦ž‡BOóÆÞ=0ã0%BèŠ5´Ò44Du.x6AZT<.͸¨VC«FE3_²ˆŠèp>p #¨Ê¥_c*Þ„Qvc#«®äzÒõ²j^]M çšW×¼ºæÕRåÕé7èB¼:1[™íV~%7ã®ÉnÙªöÿ9âÿÔþ?Õ¤'ŠñÇÚÿÇ_ýù/kÏ-òç>úIF˜ÎBu\Ÿµøùõ²¥|ˆüú ™q ™ E[örÏgÈšm…‹Q}ÄL|·NÏ¢uK…Z,HŸo3ÄZ½d@×õȯ¡`ÿ *÷ÿ9ÐãÿtjûOI÷ÿ røÿ•úÿûÿ<÷ŸUùþ¨oa«Rí÷SÅMÓúnr¥~?Á¦n&Wë÷lî^ò¶l%õV²õVröVr°„Û&jWèdÁ¹+rÚpÎ]•P͹kÎ]sn^  Î]ŒuC^@ùXwbQJ1)÷Â+²ÿTïÿs¬ÙkÿŸJ’âÿcÿÇ_}ükãÿ¨6îî“Ö̽ T»ÿl§ûOBh9÷Ÿ:„wŸ7^ÊÖî?›Î¬YÐV¸ÿ¬ñ:/žo^HnâA„Z·l¨…ƒôù†p@ب>&ÁþãÍnps¹Ÿfå(”2ì?­Ã–êÿspÜ>¬í?U$—Qÿö;îNøJ³]ÍüáèOÀÎ`“ç°Þ~Tãß&' }„œ'ívèäF„Kzóg7 ªËǽ7¦ÍÛ&áV·Í;úç®Ißï&5B%œ/+½tg“ùxx¾k Ô\õ™ s¶HÛSªÜ6öv›wäI´ú kIJÌþBJ4õ¶à•$ÓÿÍÆÐ§¶ÿV’8ýߤÿ›RèÿÆ@ÿ7ú¿èÿ¦)d¤ÿFÿ7þ½Ap3Y ýj×è¿öÿ­&…¤}ûi^úgõö—fñÔãTG_”Ï &ÄÊ,øs莂Ñlf6…2 0U»9­}ÂJÆroIŸ%þ ß[ ùgËÿv[¥ÿÃÃzý_IŠè?þö9ÉŸT[žøÃ¾!áqòÚÈxžYúƒôÏ–´Gq)pBÛ»ï þ§þÄY“üWãÿÖò¿’Ñòís2ZoyõÎi¾€ˆŸ¾7R?Íl % ˆZû„5ô@Ä¿HÿÁªäÿñA«¦ÿ*RDÿAÁ@PÊ È·ÒÖ4³i³ð‡¤-Ðp±•üámeýãU²åÿ‘êÿٮ鿚Ñ?ÿö9«¸<ÀÖœjÀyl¢!P±à‡béU @ú'k‚šþ«H1ýUp9¶Vpª€ó( ý‡*~(:§ÿ‘;˜ n×BÿGÚý?­c§¦ÿ*RH¸Â·ÏGÿaÅ¥é?éŸÓÿˆ¾@XT¡îèmX)òA’gµNÂ&Í‹I omô¯íÿú¯íÿ•¤eéß³§Ç1Ó¿gOÿHÿžý§›ÿ"Fà= F а6ú?ÔζÛG5ýW‘"úŠÒPŽüìé?é?°£ÿÄè§Ò|ð€h^LýãõÉÕþØvjý¿’Ñ?.Jÿ¸úÇöôAúǹ䚀'àôÿzðex5§Ñ@Á”NÿíÎq›®ÿÛv§uèÑý¿ƒVíÿ[Iú.Œó|Fäo€„üÙä@‘;ßíxã¯ct‚ö÷Ÿìïľoз»ÉüÙMwHÑïÐ  Ñ`ãá×á9è1êìì }Ÿ–"à Ïü|Ï.〴( "ñÚúÛpdl4hvÅF›À; í“æ¾oÐíÝè‡ÿèÐQ¯»ˆ‚LÔPözÅFÙf¿CÌœrœ„=S`ùl…’è'¤x@;O  üÙâs}šÈryBÿQOàÁŸƒ9/¸ƒ1-=º³ùÏܲ'½ u•} Ââìù®¡Æùé›(7,F;î|Ì‹;Ž@ËîÏKí¥o"f7îÈÍ !T#B½o÷-{Ä©ýй’4NEý˜4žˆzCëbóƒ´æÓ—Î2K‡ÓÈçáä+'‹Ï—ÒÓÐ$|˜•<ÎÙ#§ëä½/•ò܉­wRÉ;ßÚ[H#Z\âÓ\Ê›âÙå‚Pƒ~Xl§ ¨ç¸ó$Üyî<wžŒ;Ï€;OÂ'áΓpçI¸ó ¸ó$Üyî< wžŒ;Ï€;/Á]a!αçJØs%ì¹×RÞµœcÏ?Iu$hÝŒ¤d·7K‘ïy#=¹âh…¯AŸ ¯AÆ-• ¿nñåÇ$–0‰%Lb “øZ΃1‰%Lb “XÂ$–0‰ ˜Ä&±„I,aK˜ÄLb “8ÆdŠôvéíĒˉŠóyg·¸ÈvžŠíÇúœÌÖªd mEj;OEìû«äåÎSö½ƒUr sçiòñrõV±ñN"âmúT¥}v§r¿³ŒÜïHrŸ¯à©d–žü/Ò“#<Ò«¬÷gX%¯‹ ÕŽ$TùPî½\ï8îçï¬bǽ; ÷ÎÙgT%­'fI˜…ZîôiTŸˆK1aêhg¿ÿáå˳/úO!­}í_‘—dõ”VèvYá¨ÑŸÎö÷ŸÄöŸ\ôýÞ¾»³•}ÉʾŒË²ì«b;R|ÿ'X_ü‡íüçñaíÿQI 7nƒ‚Ç?‚RN¶‡?á`ãÇÕqÝéƒòÊ´$п»¶ó_‡šÿgÛ©ÏW’"úw‹2·àZ³Wã®0ö x Œ@¤ÿµÿÒé¿Õ9¨é¿ŠÓÁó_ÑfÜòôo{þ+,)Ó¿Õ™/óa¯°šþ×ÿéàHõÿ>8jÕëÿJRDÿÅÂ?”ý!_ð!öCHýùC=hqžN´=‰ô¿*EÿDÚëëÿZþW’bú/ÌJây¹€ÎÊü< È/aè}ñßtûßÑQíÿ]IŠè¿èò¿œÕ¿õâ_[û[-ýuû_¸îÀËþ$qúÜâ˽ú“¥ úoÄñ_:Gí£ãðüG}þ«’”ÜÃùÏ`áfû×ÿâWsþÓ]ÜÍ‡Ò«Ì E—¸H·Z„ L†Ñuš{_GÓ«Ùnt‹¤ZÝ”¹g¨Þ.üt‹¦XïÙŽp…<Ë ,ë÷ÿ¢ô×îÍ.É¿1ù7 ÿòïù÷ŸVÆÿ¾=ÛI¹8sŽñ„ô™ø~$\—Iï¡T˱ D¥§Ýi AÂL`{ét?õߟ7DŒ‹ßmWê†^¤´/]0GäDctÒz†Fô±òêÇwCœŽ~<¢h]ÌÆôæ¸FØíï£ÿ&wþ°Rbí°Eh\$œˆÑ÷gøù±Ý¤õš)·ãIœûGW BCßo¢GïDè òÐ?7ùZ Îÿ“kˆÿ©Æÿ:8®ãÿV“Â¥ÿöùÖ¸”ûâÞmïÿÁÂ5?ŠÃ¶¿8 Ü„ïùm@2ý¯éþ€þëø?Õ$NÿEîÿÁ¥Üÿ÷n{ÿ®ùèßò ú¿aôŸo é?ZË?‰V÷%÷‘NÿÇÇDë‹éÿðð¨Æÿ¬í¿•$¢L©"®àIô´äYÔIÕwžìí =ôbz3!$¾zÌo’,žG_Éoª3$ÿ‰ÒÖÙ›Þ‹(áá§BOöЂ˜ îÐׇ.‡‹?‡„}Ìǃa-“€¶°3$}Dõßž÷^œ£¿ºtŠÆ·pž´[íf|‹é8~å K¨øþüôMÿÝÛþ‹¨rt}óI»ן¦r#*yבûðîÕÛ¨v!öIÛ‰[aw^“g¥ûÞÙéÏIÏìÂÜ“v'®=*Uúg½x°¯†W¤ÀA\þ|ôùš>;‚ùé“;ßôßN¤ó=ê>"êM¬¤…Ÿë¤¼D›{çÏ3ö­…P mÄÎ>\ÝLÝõ E˜½|êBí‚"*T|8b·ü=Ù¹Ï 92Æ”ôLUá7M>„Åãùõ@~µg¸N—·AoÔUêüÖ”ó#Ê_,a¢#©ãØûØÌê”Ö§|Z—PŸ@r•l8 }F…òôWúŒ&õËd¦]¾Òø sŸðœ[2)/Sê¢Ùt|Ǧ3%'1âCôÐPCc‘Õ@…цµ´±Æ¶'¶–y¦Â­z.Åž4 ¼ÈÀkCFæ\ê£êwQïà/l!Yk4Êœó5 PœiF’ÓhÖ4†´0\Ó( ŸNÃk„¹ œJ†`š·péaÆœyZe)žÛ¾øX N¶où‘ anm%ð¶pbþÚö\C#€­Ã‚,3ÿ·%¶ó_2HmÊ2ĸ1`½1îaˆÙ†ˆÝæÐ6,?D<¦cÊ´ö°ßÑk‰Ç-\v˜PfšJYvàm_r”Ží[nD(˜Ûšþ·p©aúʶ,B›ô[‡›I_|Îo:læ¼´GU//ø|Èò™¨—¥,/p½¼ÀõòBÁǽ_^࿼ÀõòB™ó[‡«å…«½Ürö£7³•³#øŠ|Ù´ ¯U$¦{1äò:V"!²v 7-+¶džß§§DØ?}+Ðm–’Y Z"•Ü ûÎê½³Êî{æúœF˜ø×†ù6À0×1ïlð°­§WJK৺@kçfV/7Óý,EDžCÇR|ûæU Ï­ÂÍê%h*:Ì2´"”/FÓOrY Ò”ƒ¦íSõË®Z”æ<±¦27[0V-MËc®ãŲeÉ‘ç˜hy„ª·z¡Zàf¶žµIW`ÙeÛ+æ®)¦rKIc2•§Œóì _W,€·'«¼f4˜¥îê§—”—&q³ Í ˆ1Iº”b·Öß; ®-—Fe*GÖÚA°UÜhWÅPŽô%¤§Íút™yUÊ´Zj}°ÎÕóZÒ‡h¨%½Ž“*éñ}”ôø>Jz¼rIil¥Šz¼rQŸB1YャÇ÷FÖW{¡³±&¦oúÖðM5çƒü Ï 0Öíi?—'ýežmÝ®\×Õ¶áò#—Å# VAêª<Ú…†Aaz ¶ô¿äÅà2Ž42꤉·r<æ›Y¾S*bÊÅL¨)aŽY"sáWM¦K2SîÚU1Ù>†Y¢Æç’øTãl¥.uïÜZNƒ;Çé,ÀÛhI]j˜#µ‹‘‰Æ×e"3çÛ‘½$zJ™l¶}Pr{Écú"zL†¥Ë£T=æ¿•Ò»,s¼µàÖÒé ÀÝh©‚>K)c@ŸŽ-½Kôm¼´. ‰¹&Õ6Èée³üô²Ã䃒ÏË|øKè1 jl.…Mõt|-•W!•q-•—‘ʸ–ʬÔR¹ ©Œk©\¢TƵT^Z*Ã;ÙÉöuìœoòɯeMq ðz¸ñ²Æè”[hwÕ+µ¬ÉÚZ5bRõ¤­é³øZp èÓìIWl7Å/5…fn£X‘èíÐ'4s—t¯>Ú£7tÊoè]ñM´¿¿¿ ߈þ-¾=úó·:‰‰M¬'£©;¾ñ†OBô_ï_—ØG«Õ::8@ôïñÑûÛrÂgúªux|ˆÚíÎÁAË9<Ê7ãÑ0¥)vu•’ƒ’¿[’B·Sá»ÓÇû£Åb8E—wè×áh1@ߌ‚ëÉ¥>̽ÁbèÑÿoxu…~™ùÓ¡Os^ýÏCOl-•£ç™¿ }º;d}=aD;¥Tò¸—ÇÇŸ~Ùù޼ ëîèÝ{Sw÷üô7ÊY¾‹f-úgÔîþõ¿Øû¨.m©ûËé9útåzbF×yÉ2ƒ]Ô@„W,f¤D‘ç&j£]´«–î¿'…›h”o—J Ú1©ñêÅÖí~<œîÆ(†ãßÑï½yÑU«ž½yƳégµ|TA˜ä|7C=?d•jƒQgRqÚyÈ^ã¡ì©ùŽ¥÷Ǡ쨾†‚Âl¯qÚë}Ú¡_ ÷2³i"b(”(üóIÅ»Â3”6ì'X`ò9)0K ÞÜÏŸh<>ƒÇÇàÁcðø‡ÇÇáÁãðø8<Ó8\>‡ËÇá‚ãpù8\x.‡kæãÀà80Çù80<ÌÇMã<2«hòuÉOðÓñ"ž©ˆÐŒgj‡]sOEòh%)à\^À `^Z›€Û† ã ބಟàD#./ã‚“€·ƒMí`Þ6µLý‰–a?Á2ƒàf•¡?ÁÏÈÛñLíx¼ÏÔNà r#r…!¹Æ1aaPØ8*, ÇåIÓ=™¨¢´q¸´ÑæïÝäk<ÉO`‚_ò—`9/0‡ |ú(*@~ Ü%ùä'ÔÏŸÃõ^ßpxz#Á‘àq$x <ŽD‚Ç‘àAHð8< G‚!ÁãHð@$x ˆq%b"ô'Pà’€ÀsÄ $°Qú,àò.4È ô'4ž ‰Yy} s^Bæh š0GÑ„9š0ˆ&ÌÑ„A4aŽ& ¢ s4aM˜£ ChÂMDæh šØ5Ãaö(ÀùÊg¯ÐèæQú(À[XÀ-pδ9 %t€]ð—`9/0 xˆãÁñàqÃÁð8†ÇÁð@0<†ƒáq0< ƒá`¸ Ãå`¸ .ÃÁp9. †ËÁpA0\† ‚9s00æ`` ÌÁÀ0˜ƒA00‡`|7 ¶°ﺧýy¬aèã‹s’¯Í^n‰êŸ¿}ÿ3”¢hþk°â €õyökmÚñþ{Pÿ‚!¬ö/ØÁz@ÿ‚¬õ/XÁúýŸ¾ÓUÑÄØÒï¾}÷›®<'¶–þéG=_0õO_Ÿ~ìx®So2†4ÁÖƒÆ Ø¿zà¸ùë¬gƒ`ýêBcŒ_]h ‚í« Ž›¾Îº†1–¯ h ‚áëƒ`÷ºÇÀÍ^g†1f€~ïí{èkñ=C ¡‘ž¡ÁÚÔïwO_ém$ù=(_0xu¡|ÁÞu×[†p©ºÊдâ6*Z¤ }uÞÊ…¡ÁÒuahE0tõßœ¿v "ÜÔ?íx }9ÞJÏЊ`åêZ\ý®i4‚‹ ×0ÁÄÕ»0H°põ.Lcò¤™ÏÊÜö­~ÿ·×ÿ«ç_òüçPþœç¿ò¹]§ÿó‹s û$»ÿ=çÙïÀÚ¯í@Õ^]ËìZ=xÁ¬Õƒ€¬Z=xÁ¨Õ€lZ=xÁ¤Õ€,Z=xÁ Õƒ€ìYÝ_^èƒÌYÝ_àkV÷xÁ˜Õ%À€ò]ž¯m† ¦,2> |Á’Eº² ­ @?çùv3Ö„ÁŠuaG0b]@ØlXvÖ„Á‚u`G0`]ØìWvóÕ„ÁzE¨¢zÎ5~†¸×ÑûïÏ¡|^ÿ=XŸs÷×á :i¿µÏóŸCùsžÿNϬV=~ÁhÕƒàlV=~ÁdÕƒà,V=~Á`ÕƒàìU=~Á\Õƒà¬U]~ÁXÕ…àlU]~ÁTÕ…à,U]~ÁPÕ…àìT]~ÁLÕ…à¬Tü‚‘ê‚_°Q]@ð &ª ~ÁBuÁ/¨. øûÔ¿`žº`ðç¶NQ‘¢id‚qŠ2å_A¦ý%Éw´‚iêê@°L]@†© °Á.E S&º\‚òãЉÞ>·°‹^€›(gÔ;àÖÊÙt¥›Ѓ R=ÁÕƒÌQ=ÁÕƒŒQ=ÁÕ…LQ]ÁÕ… Q]ÁÕ…ÌP]Á u ¡. Ô€`‚º,P‚ê‚ ÛŸÞ¼íþrúæç|¨ð¯6Y,(_rÇ‚ ÞXŸgO´¹¦øbAù’+TñÀú<[ë_õÃÒEÙ KWke/,5rÂÒ VöÁÒò,])–=°´1X:ÍÉþWZ¾â~¥å+ÞWÚç+ldß+-_q½ÒòÏ+m €ã•®©«~W¶5·+ý{ê^Wú¼“®ô6dŸ+ý{É.W:.e+½¾êp¥µ ù[imèîVPÅÛJïGs¶‚Š(¾V>uW+T5O+ 뺣•NlšŸ•Î37+½àe¥u9Yé…+}^C.V=¬ô|ÙÁJÏ—ý«€%è^­ÈPÒ=-9Wë9^;ÍÅ~BË1ѳJÏ—«ô|Ù¯ XŠnUÐj%ÝÙ’S¸äµÓ,P¼êQ¥çËUz¾ìO,%%w*(_ô¦‚Ô”ŒO_ñ¥•^;ÍÅ~BºŽèH¥çË~Tz¾ìF¬S%/*(_t¢‚t)”ŒO_q¡51^;Í…!ì¨þSz¾ì>éY¢÷”ž/;Oéù²ï¤§‰®S@û’çоä8éy¢ß”ž/»MAjšè5¥çËNSz¾ì3©y¢Ëоä1´/9LAj¢è/¥çËîR–'zKéù²³”ž/ûJAZ¢è*´/yJíKŽR–)úIéù²›¤$Š^Rz¾ì$¥çË>R’)ºHíKR@û’ƒTAÿ(ˆ)‹îQ Ó½£ ®,:GA|Môùžè16Ñ3 Z.‰ŽQЊDô‹W,¢[ÄE¯(ˆ³‰NQg}¢ %è­*D(pÕ!:DA¬Mô‡‚X“è±ÑJ×ndg( _ò…‚ò%W(ˆ7‰žPo¡ Þ úAéê›ìäK^PP¾ä1Ñ b¢ ”zgãÒy[*6öØ6~óH€ÖnÊkMŸ¡q­ˆøžLÚ âé!÷—"æRzä–ñ´ RÎ…ÉÁTWTÙÓšx‰éŒK›ÎÇ^êtÆ¥NgœÊqSQQ„ø.KÚèMLÛLKÆy(PÙ­YJa¢©X!ã½,±γ֥Ö.¨ó”¬h$tCGIé×€JÐOw¡ò½t‡ÔzÙ@qòŸL…ÅÐäeF“…öƒòÇšÑdá†çùðšÑX>ÀÓ)|è7Ê™Øäëç§Þ Ú(Þ"Ñ:µÆœ¥¹KøÒÜÇ]é}$˜±SöÓEI–^i÷6]˲—'¼Ç$Tò¢¢Xç©’¥Š¯a/æ¾ËèÕRÆä3Nä’29›¶“3yÇ›CÒälÚNÖX.;icÍ`ySlºgQ7(!–´ŸÁbǶQU*x&ÉS !)ý¤Ï’ýHÈÞ&•.‰ÒÔëìwi–!{ù·²‚A\kÀ¯ìëTQ³Â~ îq¹¾,ÅŠ½1—HÉѬ{¶ÙŠ5ç9‡TÊr®qFAúú.$>òNáT1ôoÊìÃÀl´ûAS÷—2xJ:53Ð’*”Ã(™Ï†¼&ñƒ7AüàÊÅ^“øÁŠ\¶øÁ«?¸<ñƒË?x5â—&~pâW!~°Aü¨Ê€ý¨+?Ø l 4e¥ å“@¢Sí²{>“†™RDæ ¶]ÒFRÑü/åŽ!«;§äþò‚]峬b#ÊÚJYmoÖöiÛi_‰yÔfîW3 (×HkGU_€–³ð †Üæ¶–m†8p7@„ƒ®y ÕŠ¤ð2ÏêºÛ ]Õ¹æAT*†Ý*Ű•dƒÄ1ÞqŒ­P5©z]€­8Q¹]nÌRÛ±¤*FRõ¢W²(J»w4ú‹¯8¤\kÝ—?þ-¾ÿsx; L5ˆ.Ûûn)}¤ßÿÙrŽœV|ÿçAÇ9F­öqËiÕ÷V‘žÄ—Fß|ßîäŒïÖ ÞhFoÖ”^G—Ò»G¬‘ýëG;;ô~ÇÉ`4E ´»ó%Fí6ä½ ù™’Å/7¦æ§`ðŒ õ˜ÖM´wKþÝ=ÞF×Ó»†ÙkÖuM›ìcòèÞ²ÿß‘ÿ‡Í±žÑ êÒávgã׃?fáHÂNã¬73vÃrXiBÞÕjÏþÔª¸¬%^‹ <©÷j8ðˆª…¼Ñd8e+Ráí¡‚à…?ºE£€>? awoIÍv„÷.y`XJž(²ÈC+þLŽ™íNãñÌmd¼³«èõîÞdo2Õ[‹ÂQÑ;û¢Ð÷%Ãñ@¹(ŒEW#Ÿ°Þ=ˆƒß[ÿåPýÞ¦NôàЇNôÐù/ÃaJëÁ°vOm~"6?ù±-?:òc‡?:Y\|½=i¨ä1ì‡|rÄ_±2‡BÖÛQ*Âf7þâZï®#7Õ »;^°ÎŽ…¬Êÿh-¦¬³[v£ïé‚ øVü*·âW¹ýÝs„Ý…uZÑC[|pć ÿBVºƒ†´ÜŒ·~Ä ¸¹|ù]ÈP2ßù”‡ÜE軚ù 4b# þ‰¦äÏ?ZšÓÒWGèî)õ«ÿL‘›dà£ÿ†m^ùÃac ü¾~ß…¿ýáâÆŸRÔ|«|I&ÿjäÿÁaËÑå¿SËÿ*’*ÿ¥Ê«ðp².ˆC?œ½ùßÓWg=ôS[\+Ð_ÿ³ÛŒîªÞ#_ãÍôg7‹“ŸÚÍÑôjvÒjNšÓæ—&¥ñ±wIþ¹âºà4d"µö[Mòï[ ”ÿ<-³›–yúêÝ/§'4 =ñžýz&d³‘^¡ý 9»E™tè¤ÑÁb6¢™_ ¿Û+Rp¤<'Êû¶#4¶@¸ç.Š¥¯.NZü¥À“º×C÷ ]=‚œmNÐÕ`4&l|Šæ@x:Y†¶µ+@"o„Ë!²ra+¥& ŸÙâ¨)-•šñ‡m¢ýOãµ1›ˆü×&( ÿOIb÷®ÇCø¦Ø^ÄÃÜ &`‰·$(KÑYÄÿ»$ˆÉû|ÈÉ ž“^gYðÚíásŠÀ§~¾odU d²iAÚQœDsîšž•Èú?ÂùNÚôÏeøÇ=i Uø¦$zôž,I½!ÿ{LVHÉéñôÇöÇlíôœý¸$ÿOP@ž]{:ÐNùè-žY*ï!§@3ð+€<›|SÁøí›;é_ÛIƒùÍ#T ÐÙD t»¥AÝJ vòÌñUCýMhkÝ+ör“²þ=øBViã´…pþ”±þovÚÊúÿð Ó©×ÿU$oüuLÖŸûOöwâuû÷ úr÷Éþ~2öGÓÁxüÅ¦áø‡³³¿zŠvþþ}£ÛÝE?¹¤‘îËW§?÷ÉÃi(ð]¤Ú–iéWoO{/ÎI¡Y’ÝV ÎhcÏ_=ß%?ž¿¢?’N\:ÆNµ Ô)%~w<L)&vþîOÐOWhoº³ål¡NuªSêT§:Õ©NuªSêT§:Õ©NuªSêT§:Õ©N[šþ?™6È;¸blis-0.6.1/frame/compat/cblas/f77_sub/000077500000000000000000000000001360743507500173765ustar00rootroot00000000000000blis-0.6.1/frame/compat/cblas/f77_sub/f77_amax_sub.c000066400000000000000000000041451360743507500220300ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "f77_amax_sub.h" // // Define CBLAS subrotine wrapper interfaces. // #undef GENTFUNC #define GENTFUNC( ftype_x, chx, blasname, blisname ) \ \ void PASTEF773(i,chx,blasname,sub) \ ( \ const f77_int* n, \ const ftype_x* x, const f77_int* incx, \ f77_int* rval \ ) \ { \ *rval = PASTEF772(i,chx,blasname) \ ( \ n, \ x, incx \ ); \ } #ifdef BLIS_ENABLE_CBLAS INSERT_GENTFUNC_BLAS( amax, NULL ) #endif blis-0.6.1/frame/compat/cblas/f77_sub/f77_amax_sub.h000066400000000000000000000037601360743507500220370ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype CBLAS subroutine wrapper interfaces. // #undef GENTPROT #define GENTPROT( ftype_x, chx, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF773(i,chx,blasname,sub) \ ( \ const f77_int* n, \ const ftype_x* x, const f77_int* incx, \ f77_int* rval \ ); #ifdef BLIS_ENABLE_CBLAS INSERT_GENTPROT_BLAS( amax ) #endif blis-0.6.1/frame/compat/cblas/f77_sub/f77_asum_sub.c000066400000000000000000000041751360743507500220520ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "f77_asum_sub.h" // // Define CBLAS subrotine wrapper interfaces. // #undef GENTFUNCR2 #define GENTFUNCR2( ftype_x, ftype_r, chx, chr, blasname, blisname ) \ \ void PASTEF773(chr,chx,blasname,sub) \ ( \ const f77_int* n, \ const ftype_x* x, const f77_int* incx, \ ftype_r* rval \ ) \ { \ *rval = PASTEF772(chr,chx,blasname) \ ( \ n, \ x, incx \ ); \ } #ifdef BLIS_ENABLE_CBLAS INSERT_GENTFUNCR2_BLAS( asum, NULL ) #endif blis-0.6.1/frame/compat/cblas/f77_sub/f77_asum_sub.h000066400000000000000000000040061360743507500220500ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype CBLAS subroutine wrapper interfaces. // #undef GENTPROTR2 #define GENTPROTR2( ftype_x, ftype_r, chx, chr, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF773(chr,chx,blasname,sub) \ ( \ const f77_int* n, \ const ftype_x* x, const f77_int* incx, \ ftype_r* rval \ ); #ifdef BLIS_ENABLE_CBLAS INSERT_GENTPROTR2_BLAS( asum ) #endif blis-0.6.1/frame/compat/cblas/f77_sub/f77_dot_sub.c000066400000000000000000000057771360743507500217040ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "f77_dot_sub.h" // // Define CBLAS subrotine wrapper interfaces. // #undef GENTFUNCDOT #define GENTFUNCDOT( ftype, ch, chc, blis_conjx, blasname, blisname ) \ \ void PASTEF773(ch,blasname,chc,sub) \ ( \ const f77_int* n, \ const ftype* x, const f77_int* incx, \ const ftype* y, const f77_int* incy, \ ftype* rval \ ) \ { \ *rval = PASTEF772(ch,blasname,chc) \ ( \ n, \ x, incx, \ y, incy \ ); \ } #ifdef BLIS_ENABLE_CBLAS INSERT_GENTFUNCDOT_BLAS( dot, NULL ) // -- "Black sheep" dot product function definitions -- // Input vectors stored in single precision, computed in double precision, // with result returned in single precision. void PASTEF772(sds,dot,sub) ( const f77_int* n, const float* sb, const float* x, const f77_int* incx, const float* y, const f77_int* incy, float* rval ) { *rval = PASTEF77(sds,dot) ( n, sb, x, incx, y, incy ); } // Input vectors stored in single precision, computed in double precision, // with result returned in double precision. void PASTEF772(ds,dot,sub) ( const f77_int* n, const float* x, const f77_int* incx, const float* y, const f77_int* incy, double* rval ) { *rval = PASTEF77(ds,dot) ( n, x, incx, y, incy ); } #endif blis-0.6.1/frame/compat/cblas/f77_sub/f77_dot_sub.h000066400000000000000000000050261360743507500216740ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype CBLAS subroutine wrapper interfaces. // #undef GENTPROTDOT #define GENTPROTDOT( ftype, ch, chc, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF773(ch,blasname,chc,sub) \ ( \ const f77_int* n, \ const ftype* x, const f77_int* incx, \ const ftype* y, const f77_int* incy, \ ftype* rval \ ); #ifdef BLIS_ENABLE_CBLAS INSERT_GENTPROTDOT_BLAS( dot ) // -- "Black sheep" dot product function prototypes -- BLIS_EXPORT_BLAS void PASTEF772(sds,dot,sub) ( const f77_int* n, const float* sb, const float* x, const f77_int* incx, const float* y, const f77_int* incy, float* rval ); BLIS_EXPORT_BLAS void PASTEF772(ds,dot,sub) ( const f77_int* n, const float* x, const f77_int* incx, const float* y, const f77_int* incy, double* rval ); #endif blis-0.6.1/frame/compat/cblas/f77_sub/f77_nrm2_sub.c000066400000000000000000000041751360743507500217630ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "f77_nrm2_sub.h" // // Define CBLAS subrotine wrapper interfaces. // #undef GENTFUNCR2 #define GENTFUNCR2( ftype_x, ftype_r, chx, chr, blasname, blisname ) \ \ void PASTEF773(chr,chx,blasname,sub) \ ( \ const f77_int* n, \ const ftype_x* x, const f77_int* incx, \ ftype_r* rval \ ) \ { \ *rval = PASTEF772(chr,chx,blasname) \ ( \ n, \ x, incx \ ); \ } #ifdef BLIS_ENABLE_CBLAS INSERT_GENTFUNCR2_BLAS( nrm2, NULL ) #endif blis-0.6.1/frame/compat/cblas/f77_sub/f77_nrm2_sub.h000066400000000000000000000040061360743507500217610ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype CBLAS subroutine wrapper interfaces. // #undef GENTPROTR2 #define GENTPROTR2( ftype_x, ftype_r, chx, chr, blasname ) \ \ BLIS_EXPORT_BLAS void PASTEF773(chr,chx,blasname,sub) \ ( \ const f77_int* n, \ const ftype_x* x, const f77_int* incx, \ ftype_r* rval \ ); #ifdef BLIS_ENABLE_CBLAS INSERT_GENTPROTR2_BLAS( nrm2 ) #endif blis-0.6.1/frame/compat/cblas/integrate-cblas-tarball.sh000077500000000000000000000204211360743507500231430ustar00rootroot00000000000000#!/bin/bash # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # # bump-version.sh # # Field G. Van Zee # print_usage() { #local script_name # Get the script name #script_name=${0##*/} # Echo usage info echo " " echo " "$script_name echo " " echo " Field G. Van Zee" echo " " echo " Unpacks a CBLAS tarball and performs whatever preprocessing is" echo " necessary and appropriate in order to integrate the CBLAS source" echo " code into BLIS." echo " " echo " IMPORTANT: This script is designed to be run from the following" echo " directory:" echo " " echo " frame/compat/cblas" echo " " echo " Usage:" echo " ${script_name} tarball" echo " " echo " Arguments:" echo " " echo " tarball The name of the CBLAS package that will be unpacked." echo " If tarball is not in the current directory, the full" echo " directory path should be given." echo " " # Exit with non-zero exit status exit 1 } main() { # -- BEGIN GLOBAL VARIABLE DECLARATIONS -- # The name of the script, stripped of any preceeding path. script_name=${0##*/} # The name and path of the CBLAS tarball. tarball_path= # The name of the CBLAS directory after it is unpacked. cblas_dir=CBLAS # The name of the sub-directory that we will create and into which # we will copy the source code for CBLAS wrappers. src_dir=src # -- END GLOBAL VARIABLE DECLARATIONS -- # Process our command line options. while getopts ":h" opt; do case $opt in h ) print_usage ;; \? ) print_usage esac done shift $(($OPTIND - 1)) # Check the number of arguments after command line option processing. if [ $# = "1" ]; then tarball_path=$1 echo "${script_name}: preparing to extract from '${tarball_path}'." else print_usage fi # Check that src_dir does not already exist. If it does, abort. if [ -d ${src_dir} ] ; then echo "${script_name}: found '${src_dir}' directory; please remove before proceeding." return 0 fi # Un-tar and un-gzip the tarball. echo "${script_name}: extracting '${tarball_path}'." echo "${script_name}: expecting unpacked directory to be named '${cblas_dir}'." tar xzf ${tarball_path} # Create the directory into which we will copy the source code for the # CBLAS wrappers. echo "${script_name}: creating local '${src_dir}' directory." mkdir -p ${src_dir} # Copy the cblas.h header file. echo "${script_name}: copying cblas.h from '${cblas_dir}/include' to '${src_dir}'." cp ${cblas_dir}/include/cblas.h ${src_dir}/cblas.h # Copy the cblas_f77.h header file, removing all prototypes. echo "${script_name}: copying cblas_f77.h from '${cblas_dir}/include' to '${src_dir}'" cp ${cblas_dir}/include/cblas_f77.h ${src_dir}/cblas_f77.h # Create some temporary files to facilitate #including BLIS-specific # cpp macros. echo "${script_name}: creating temporary files." echo "#include \"bli_config.h\"" > include_bli_config.h echo "#include \"bli_system.h\"" > include_bli_system.h echo "#include \"bli_type_defs.h\"" > include_bli_type_defs.h echo "#include \"bli_cblas.h\"" > include_bli_cblas.h echo "#ifdef BLIS_ENABLE_CBLAS" > ifdef_cblas.h echo "#endif" > endif_cblas.h # Process each CBLAS source file. echo "${script_name}: copying source from '${cblas_dir}/src' to '${src_dir}' with" echo "${script_name}: '#ifdef BLIS_ENABLE_CBLAS' guard:" for cbl_src_filepath in ${cblas_dir}/src/cblas_*.c; do # Strip the path to obtain just the filename. cbl_src_file=${cbl_src_filepath##*/} # Append the ifdef and prepend the endif macro statements to the # current file and output to its new location in ${src_dir}. echo "${script_name}: ...copying/BLIS-ifying ${cbl_src_file}" cat include_bli_config.h \ include_bli_system.h \ include_bli_type_defs.h \ include_bli_cblas.h \ ifdef_cblas.h \ ${cbl_src_filepath} \ endif_cblas.h > ${src_dir}/${cbl_src_file} done # Remove the temporary files. echo "${script_name}: cleaning up temporary files." rm -f include_bli_config.h rm -f include_bli_system.h rm -f include_bli_type_defs.h rm -f include_bli_cblas.h rm -f ifdef_cblas.h rm -f endif_cblas.h # Process some bugfixes to syntax errors present in the CBLAS source. echo "${script_name}: fixing syntax errors in CBLAS source:" fix_file ${src_dir}/cblas_chpmv.c "s/ F77_K=K,//g" fix_file ${src_dir}/cblas_chpmv.c "s/ F77_lda=lda,//g" fix_file ${src_dir}/cblas_zhpmv.c "s/ F77_K=K,//g" fix_file ${src_dir}/cblas_zhpmv.c "s/ F77_lda=lda,//g" fix_file ${src_dir}/cblas_ssyr2.c "s/F77__lda/F77_lda/g" fix_file ${src_dir}/cblas_dsyr2.c "s/F77__lda/F77_lda/g" fix_file ${src_dir}/cblas_strsm.c "s/F77_N=M/F77_M=M/g" # Now process some optional fixes that eliminate compiler warnings. echo "${script_name}: fixing compiler warnings in CBLAS source:" incx_string="s/, incx=incX//g" incy_string="s/, incy=incY//g" fix_file ${src_dir}/cblas_cgbmv.c "${incx_string}" fix_file ${src_dir}/cblas_cgemv.c "${incx_string}" fix_file ${src_dir}/cblas_cgerc.c "${incy_string}" fix_file ${src_dir}/cblas_chbmv.c "${incx_string}" fix_file ${src_dir}/cblas_chemv.c "${incx_string}" fix_file ${src_dir}/cblas_cher.c "${incx_string}" fix_file ${src_dir}/cblas_cher2.c "${incx_string}" fix_file ${src_dir}/cblas_cher2.c "${incy_string}" fix_file ${src_dir}/cblas_chpmv.c "${incx_string}" fix_file ${src_dir}/cblas_chpr.c "${incx_string}" fix_file ${src_dir}/cblas_chpr2.c "${incx_string}" fix_file ${src_dir}/cblas_chpr2.c "${incy_string}" fix_file ${src_dir}/cblas_zgbmv.c "${incx_string}" fix_file ${src_dir}/cblas_zgemv.c "${incx_string}" fix_file ${src_dir}/cblas_zgerc.c "${incy_string}" fix_file ${src_dir}/cblas_zhbmv.c "${incx_string}" fix_file ${src_dir}/cblas_zhemv.c "${incx_string}" fix_file ${src_dir}/cblas_zher.c "${incx_string}" fix_file ${src_dir}/cblas_zher2.c "${incx_string}" fix_file ${src_dir}/cblas_zher2.c "${incy_string}" fix_file ${src_dir}/cblas_zhpmv.c "${incx_string}" fix_file ${src_dir}/cblas_zhpr.c "${incx_string}" fix_file ${src_dir}/cblas_zhpr2.c "${incx_string}" fix_file ${src_dir}/cblas_zhpr2.c "${incy_string}" # Now that we're done with everything, we can remove the CBLAS directory. echo "${script_name}: removing '${cblas_dir}' directory." rm -rf ${cblas_dir} # Exit peacefully. return 0 } fix_file() { # Get the first function argument: the filename and path to fix. local filepath="$1" # Get the second function argument: the sed command to apply. local sedstring="$2" filename=${filepath##*/} echo "${script_name}: ...fixing ${filename} with 'sed -e ${sedstring}'" cat ${filepath} | sed -e "${sedstring}" > ${filepath}.new mv ${filepath}.new ${filepath} } # The script's main entry point, passing all parameters given. main "$@" blis-0.6.1/frame/compat/cblas/src/000077500000000000000000000000001360743507500167115ustar00rootroot00000000000000blis-0.6.1/frame/compat/cblas/src/cblas.h000066400000000000000000000765051360743507500201630ustar00rootroot00000000000000#ifndef CBLAS_H #define CBLAS_H #include // We need to #include "bli_type_defs.h" in order to pull in the // definition of f77_int. But in order to #include that header, we // also need to pull in the headers that precede it in blis.h. #include "bli_system.h" #include "bli_config.h" #include "bli_config_macro_defs.h" #include "bli_type_defs.h" /* * Enumerated and derived types */ enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102}; enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113}; enum CBLAS_UPLO {CblasUpper=121, CblasLower=122}; enum CBLAS_DIAG {CblasNonUnit=131, CblasUnit=132}; enum CBLAS_SIDE {CblasLeft=141, CblasRight=142}; #ifdef __cplusplus extern "C" { #endif /* * =========================================================================== * Prototypes for level 1 BLAS functions (complex are recast as routines) * =========================================================================== */ BLIS_EXPORT_BLAS float cblas_sdsdot(f77_int N, float alpha, const float *X, f77_int incX, const float *Y, f77_int incY); BLIS_EXPORT_BLAS double cblas_dsdot(f77_int N, const float *X, f77_int incX, const float *Y, f77_int incY); BLIS_EXPORT_BLAS float cblas_sdot(f77_int N, const float *X, f77_int incX, const float *Y, f77_int incY); BLIS_EXPORT_BLAS double cblas_ddot(f77_int N, const double *X, f77_int incX, const double *Y, f77_int incY); /* * Functions having prefixes Z and C only */ BLIS_EXPORT_BLAS void cblas_cdotu_sub(f77_int N, const void *X, f77_int incX, const void *Y, f77_int incY, void *dotu); BLIS_EXPORT_BLAS void cblas_cdotc_sub(f77_int N, const void *X, f77_int incX, const void *Y, f77_int incY, void *dotc); BLIS_EXPORT_BLAS void cblas_zdotu_sub(f77_int N, const void *X, f77_int incX, const void *Y, f77_int incY, void *dotu); BLIS_EXPORT_BLAS void cblas_zdotc_sub(f77_int N, const void *X, f77_int incX, const void *Y, f77_int incY, void *dotc); /* * Functions having prefixes S D SC DZ */ BLIS_EXPORT_BLAS float cblas_snrm2(f77_int N, const float *X, f77_int incX); BLIS_EXPORT_BLAS float cblas_sasum(f77_int N, const float *X, f77_int incX); BLIS_EXPORT_BLAS double cblas_dnrm2(f77_int N, const double *X, f77_int incX); BLIS_EXPORT_BLAS double cblas_dasum(f77_int N, const double *X, f77_int incX); BLIS_EXPORT_BLAS float cblas_scnrm2(f77_int N, const void *X, f77_int incX); BLIS_EXPORT_BLAS float cblas_scasum(f77_int N, const void *X, f77_int incX); BLIS_EXPORT_BLAS double cblas_dznrm2(f77_int N, const void *X, f77_int incX); BLIS_EXPORT_BLAS double cblas_dzasum(f77_int N, const void *X, f77_int incX); /* * Functions having standard 4 prefixes (S D C Z) */ BLIS_EXPORT_BLAS f77_int cblas_isamax(f77_int N, const float *X, f77_int incX); BLIS_EXPORT_BLAS f77_int cblas_idamax(f77_int N, const double *X, f77_int incX); BLIS_EXPORT_BLAS f77_int cblas_icamax(f77_int N, const void *X, f77_int incX); BLIS_EXPORT_BLAS f77_int cblas_izamax(f77_int N, const void *X, f77_int incX); /* * =========================================================================== * Prototypes for level 1 BLAS routines * =========================================================================== */ /* * Routines with standard 4 prefixes (s, d, c, z) */ void BLIS_EXPORT_BLAS cblas_sswap(f77_int N, float *X, f77_int incX, float *Y, f77_int incY); void BLIS_EXPORT_BLAS cblas_scopy(f77_int N, const float *X, f77_int incX, float *Y, f77_int incY); void BLIS_EXPORT_BLAS cblas_saxpy(f77_int N, float alpha, const float *X, f77_int incX, float *Y, f77_int incY); void BLIS_EXPORT_BLAS cblas_dswap(f77_int N, double *X, f77_int incX, double *Y, f77_int incY); void BLIS_EXPORT_BLAS cblas_dcopy(f77_int N, const double *X, f77_int incX, double *Y, f77_int incY); void BLIS_EXPORT_BLAS cblas_daxpy(f77_int N, double alpha, const double *X, f77_int incX, double *Y, f77_int incY); void BLIS_EXPORT_BLAS cblas_cswap(f77_int N, void *X, f77_int incX, void *Y, f77_int incY); void BLIS_EXPORT_BLAS cblas_ccopy(f77_int N, const void *X, f77_int incX, void *Y, f77_int incY); void BLIS_EXPORT_BLAS cblas_caxpy(f77_int N, const void *alpha, const void *X, f77_int incX, void *Y, f77_int incY); void BLIS_EXPORT_BLAS cblas_zswap(f77_int N, void *X, f77_int incX, void *Y, f77_int incY); void BLIS_EXPORT_BLAS cblas_zcopy(f77_int N, const void *X, f77_int incX, void *Y, f77_int incY); void BLIS_EXPORT_BLAS cblas_zaxpy(f77_int N, const void *alpha, const void *X, f77_int incX, void *Y, f77_int incY); /* * Routines with S and D prefix only */ void BLIS_EXPORT_BLAS cblas_srotg(float *a, float *b, float *c, float *s); void BLIS_EXPORT_BLAS cblas_srotmg(float *d1, float *d2, float *b1, const float b2, float *P); void BLIS_EXPORT_BLAS cblas_srot(f77_int N, float *X, f77_int incX, float *Y, f77_int incY, const float c, const float s); void BLIS_EXPORT_BLAS cblas_srotm(f77_int N, float *X, f77_int incX, float *Y, f77_int incY, const float *P); void BLIS_EXPORT_BLAS cblas_drotg(double *a, double *b, double *c, double *s); void BLIS_EXPORT_BLAS cblas_drotmg(double *d1, double *d2, double *b1, const double b2, double *P); void BLIS_EXPORT_BLAS cblas_drot(f77_int N, double *X, f77_int incX, double *Y, f77_int incY, const double c, const double s); void BLIS_EXPORT_BLAS cblas_drotm(f77_int N, double *X, f77_int incX, double *Y, f77_int incY, const double *P); /* * Routines with S D C Z CS and ZD prefixes */ void BLIS_EXPORT_BLAS cblas_sscal(f77_int N, float alpha, float *X, f77_int incX); void BLIS_EXPORT_BLAS cblas_dscal(f77_int N, double alpha, double *X, f77_int incX); void BLIS_EXPORT_BLAS cblas_cscal(f77_int N, const void *alpha, void *X, f77_int incX); void BLIS_EXPORT_BLAS cblas_zscal(f77_int N, const void *alpha, void *X, f77_int incX); void BLIS_EXPORT_BLAS cblas_csscal(f77_int N, float alpha, void *X, f77_int incX); void BLIS_EXPORT_BLAS cblas_zdscal(f77_int N, double alpha, void *X, f77_int incX); /* * =========================================================================== * Prototypes for level 2 BLAS * =========================================================================== */ /* * Routines with standard 4 prefixes (S, D, C, Z) */ void BLIS_EXPORT_BLAS cblas_sgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, f77_int M, f77_int N, float alpha, const float *A, f77_int lda, const float *X, f77_int incX, float beta, float *Y, f77_int incY); void BLIS_EXPORT_BLAS cblas_sgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, f77_int M, f77_int N, f77_int KL, f77_int KU, float alpha, const float *A, f77_int lda, const float *X, f77_int incX, float beta, float *Y, f77_int incY); void BLIS_EXPORT_BLAS cblas_strmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int N, const float *A, f77_int lda, float *X, f77_int incX); void BLIS_EXPORT_BLAS cblas_stbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int N, f77_int K, const float *A, f77_int lda, float *X, f77_int incX); void BLIS_EXPORT_BLAS cblas_stpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int N, const float *Ap, float *X, f77_int incX); void BLIS_EXPORT_BLAS cblas_strsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int N, const float *A, f77_int lda, float *X, f77_int incX); void BLIS_EXPORT_BLAS cblas_stbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int N, f77_int K, const float *A, f77_int lda, float *X, f77_int incX); void BLIS_EXPORT_BLAS cblas_stpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int N, const float *Ap, float *X, f77_int incX); void BLIS_EXPORT_BLAS cblas_dgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, f77_int M, f77_int N, double alpha, const double *A, f77_int lda, const double *X, f77_int incX, double beta, double *Y, f77_int incY); void BLIS_EXPORT_BLAS cblas_dgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, f77_int M, f77_int N, f77_int KL, f77_int KU, double alpha, const double *A, f77_int lda, const double *X, f77_int incX, double beta, double *Y, f77_int incY); void BLIS_EXPORT_BLAS cblas_dtrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int N, const double *A, f77_int lda, double *X, f77_int incX); void BLIS_EXPORT_BLAS cblas_dtbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int N, f77_int K, const double *A, f77_int lda, double *X, f77_int incX); void BLIS_EXPORT_BLAS cblas_dtpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int N, const double *Ap, double *X, f77_int incX); void BLIS_EXPORT_BLAS cblas_dtrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int N, const double *A, f77_int lda, double *X, f77_int incX); void BLIS_EXPORT_BLAS cblas_dtbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int N, f77_int K, const double *A, f77_int lda, double *X, f77_int incX); void BLIS_EXPORT_BLAS cblas_dtpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int N, const double *Ap, double *X, f77_int incX); void BLIS_EXPORT_BLAS cblas_cgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, f77_int M, f77_int N, const void *alpha, const void *A, f77_int lda, const void *X, f77_int incX, const void *beta, void *Y, f77_int incY); void BLIS_EXPORT_BLAS cblas_cgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, f77_int M, f77_int N, f77_int KL, f77_int KU, const void *alpha, const void *A, f77_int lda, const void *X, f77_int incX, const void *beta, void *Y, f77_int incY); void BLIS_EXPORT_BLAS cblas_ctrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int N, const void *A, f77_int lda, void *X, f77_int incX); void BLIS_EXPORT_BLAS cblas_ctbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int N, f77_int K, const void *A, f77_int lda, void *X, f77_int incX); void BLIS_EXPORT_BLAS cblas_ctpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int N, const void *Ap, void *X, f77_int incX); void BLIS_EXPORT_BLAS cblas_ctrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int N, const void *A, f77_int lda, void *X, f77_int incX); void BLIS_EXPORT_BLAS cblas_ctbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int N, f77_int K, const void *A, f77_int lda, void *X, f77_int incX); void BLIS_EXPORT_BLAS cblas_ctpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int N, const void *Ap, void *X, f77_int incX); void BLIS_EXPORT_BLAS cblas_zgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, f77_int M, f77_int N, const void *alpha, const void *A, f77_int lda, const void *X, f77_int incX, const void *beta, void *Y, f77_int incY); void BLIS_EXPORT_BLAS cblas_zgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, f77_int M, f77_int N, f77_int KL, f77_int KU, const void *alpha, const void *A, f77_int lda, const void *X, f77_int incX, const void *beta, void *Y, f77_int incY); void BLIS_EXPORT_BLAS cblas_ztrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int N, const void *A, f77_int lda, void *X, f77_int incX); void BLIS_EXPORT_BLAS cblas_ztbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int N, f77_int K, const void *A, f77_int lda, void *X, f77_int incX); void BLIS_EXPORT_BLAS cblas_ztpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int N, const void *Ap, void *X, f77_int incX); void BLIS_EXPORT_BLAS cblas_ztrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int N, const void *A, f77_int lda, void *X, f77_int incX); void BLIS_EXPORT_BLAS cblas_ztbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int N, f77_int K, const void *A, f77_int lda, void *X, f77_int incX); void BLIS_EXPORT_BLAS cblas_ztpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int N, const void *Ap, void *X, f77_int incX); /* * Routines with S and D prefixes only */ void BLIS_EXPORT_BLAS cblas_ssymv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, float alpha, const float *A, f77_int lda, const float *X, f77_int incX, float beta, float *Y, f77_int incY); void BLIS_EXPORT_BLAS cblas_ssbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, f77_int K, float alpha, const float *A, f77_int lda, const float *X, f77_int incX, float beta, float *Y, f77_int incY); void BLIS_EXPORT_BLAS cblas_sspmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, float alpha, const float *Ap, const float *X, f77_int incX, float beta, float *Y, f77_int incY); void BLIS_EXPORT_BLAS cblas_sger(enum CBLAS_ORDER order, f77_int M, f77_int N, float alpha, const float *X, f77_int incX, const float *Y, f77_int incY, float *A, f77_int lda); void BLIS_EXPORT_BLAS cblas_ssyr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, float alpha, const float *X, f77_int incX, float *A, f77_int lda); void BLIS_EXPORT_BLAS cblas_sspr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, float alpha, const float *X, f77_int incX, float *Ap); void BLIS_EXPORT_BLAS cblas_ssyr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, float alpha, const float *X, f77_int incX, const float *Y, f77_int incY, float *A, f77_int lda); void BLIS_EXPORT_BLAS cblas_sspr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, float alpha, const float *X, f77_int incX, const float *Y, f77_int incY, float *A); void BLIS_EXPORT_BLAS cblas_dsymv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, double alpha, const double *A, f77_int lda, const double *X, f77_int incX, double beta, double *Y, f77_int incY); void BLIS_EXPORT_BLAS cblas_dsbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, f77_int K, double alpha, const double *A, f77_int lda, const double *X, f77_int incX, double beta, double *Y, f77_int incY); void BLIS_EXPORT_BLAS cblas_dspmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, double alpha, const double *Ap, const double *X, f77_int incX, double beta, double *Y, f77_int incY); void BLIS_EXPORT_BLAS cblas_dger(enum CBLAS_ORDER order, f77_int M, f77_int N, double alpha, const double *X, f77_int incX, const double *Y, f77_int incY, double *A, f77_int lda); void BLIS_EXPORT_BLAS cblas_dsyr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, double alpha, const double *X, f77_int incX, double *A, f77_int lda); void BLIS_EXPORT_BLAS cblas_dspr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, double alpha, const double *X, f77_int incX, double *Ap); void BLIS_EXPORT_BLAS cblas_dsyr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, double alpha, const double *X, f77_int incX, const double *Y, f77_int incY, double *A, f77_int lda); void BLIS_EXPORT_BLAS cblas_dspr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, double alpha, const double *X, f77_int incX, const double *Y, f77_int incY, double *A); /* * Routines with C and Z prefixes only */ void BLIS_EXPORT_BLAS cblas_chemv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, const void *alpha, const void *A, f77_int lda, const void *X, f77_int incX, const void *beta, void *Y, f77_int incY); void BLIS_EXPORT_BLAS cblas_chbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, f77_int K, const void *alpha, const void *A, f77_int lda, const void *X, f77_int incX, const void *beta, void *Y, f77_int incY); void BLIS_EXPORT_BLAS cblas_chpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, const void *alpha, const void *Ap, const void *X, f77_int incX, const void *beta, void *Y, f77_int incY); void BLIS_EXPORT_BLAS cblas_cgeru(enum CBLAS_ORDER order, f77_int M, f77_int N, const void *alpha, const void *X, f77_int incX, const void *Y, f77_int incY, void *A, f77_int lda); void BLIS_EXPORT_BLAS cblas_cgerc(enum CBLAS_ORDER order, f77_int M, f77_int N, const void *alpha, const void *X, f77_int incX, const void *Y, f77_int incY, void *A, f77_int lda); void BLIS_EXPORT_BLAS cblas_cher(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, float alpha, const void *X, f77_int incX, void *A, f77_int lda); void BLIS_EXPORT_BLAS cblas_chpr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, float alpha, const void *X, f77_int incX, void *A); void BLIS_EXPORT_BLAS cblas_cher2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, const void *alpha, const void *X, f77_int incX, const void *Y, f77_int incY, void *A, f77_int lda); void BLIS_EXPORT_BLAS cblas_chpr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, const void *alpha, const void *X, f77_int incX, const void *Y, f77_int incY, void *Ap); void BLIS_EXPORT_BLAS cblas_zhemv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, const void *alpha, const void *A, f77_int lda, const void *X, f77_int incX, const void *beta, void *Y, f77_int incY); void BLIS_EXPORT_BLAS cblas_zhbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, f77_int K, const void *alpha, const void *A, f77_int lda, const void *X, f77_int incX, const void *beta, void *Y, f77_int incY); void BLIS_EXPORT_BLAS cblas_zhpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, const void *alpha, const void *Ap, const void *X, f77_int incX, const void *beta, void *Y, f77_int incY); void BLIS_EXPORT_BLAS cblas_zgeru(enum CBLAS_ORDER order, f77_int M, f77_int N, const void *alpha, const void *X, f77_int incX, const void *Y, f77_int incY, void *A, f77_int lda); void BLIS_EXPORT_BLAS cblas_zgerc(enum CBLAS_ORDER order, f77_int M, f77_int N, const void *alpha, const void *X, f77_int incX, const void *Y, f77_int incY, void *A, f77_int lda); void BLIS_EXPORT_BLAS cblas_zher(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, double alpha, const void *X, f77_int incX, void *A, f77_int lda); void BLIS_EXPORT_BLAS cblas_zhpr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, double alpha, const void *X, f77_int incX, void *A); void BLIS_EXPORT_BLAS cblas_zher2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, const void *alpha, const void *X, f77_int incX, const void *Y, f77_int incY, void *A, f77_int lda); void BLIS_EXPORT_BLAS cblas_zhpr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, const void *alpha, const void *X, f77_int incX, const void *Y, f77_int incY, void *Ap); /* * =========================================================================== * Prototypes for level 3 BLAS * =========================================================================== */ /* * Routines with standard 4 prefixes (S, D, C, Z) */ void BLIS_EXPORT_BLAS cblas_sgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, f77_int M, f77_int N, f77_int K, float alpha, const float *A, f77_int lda, const float *B, f77_int ldb, float beta, float *C, f77_int ldc); void BLIS_EXPORT_BLAS cblas_ssymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, f77_int M, f77_int N, float alpha, const float *A, f77_int lda, const float *B, f77_int ldb, float beta, float *C, f77_int ldc); void BLIS_EXPORT_BLAS cblas_ssyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K, float alpha, const float *A, f77_int lda, float beta, float *C, f77_int ldc); void BLIS_EXPORT_BLAS cblas_ssyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K, float alpha, const float *A, f77_int lda, const float *B, f77_int ldb, float beta, float *C, f77_int ldc); void BLIS_EXPORT_BLAS cblas_strmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int M, f77_int N, float alpha, const float *A, f77_int lda, float *B, f77_int ldb); void BLIS_EXPORT_BLAS cblas_strsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int M, f77_int N, float alpha, const float *A, f77_int lda, float *B, f77_int ldb); void BLIS_EXPORT_BLAS cblas_dgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, f77_int M, f77_int N, f77_int K, double alpha, const double *A, f77_int lda, const double *B, f77_int ldb, double beta, double *C, f77_int ldc); void BLIS_EXPORT_BLAS cblas_dsymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, f77_int M, f77_int N, double alpha, const double *A, f77_int lda, const double *B, f77_int ldb, double beta, double *C, f77_int ldc); void BLIS_EXPORT_BLAS cblas_dsyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K, double alpha, const double *A, f77_int lda, double beta, double *C, f77_int ldc); void BLIS_EXPORT_BLAS cblas_dsyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K, double alpha, const double *A, f77_int lda, const double *B, f77_int ldb, double beta, double *C, f77_int ldc); void BLIS_EXPORT_BLAS cblas_dtrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int M, f77_int N, double alpha, const double *A, f77_int lda, double *B, f77_int ldb); void BLIS_EXPORT_BLAS cblas_dtrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int M, f77_int N, double alpha, const double *A, f77_int lda, double *B, f77_int ldb); void BLIS_EXPORT_BLAS cblas_cgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, f77_int M, f77_int N, f77_int K, const void *alpha, const void *A, f77_int lda, const void *B, f77_int ldb, const void *beta, void *C, f77_int ldc); void BLIS_EXPORT_BLAS cblas_csymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, f77_int M, f77_int N, const void *alpha, const void *A, f77_int lda, const void *B, f77_int ldb, const void *beta, void *C, f77_int ldc); void BLIS_EXPORT_BLAS cblas_csyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K, const void *alpha, const void *A, f77_int lda, const void *beta, void *C, f77_int ldc); void BLIS_EXPORT_BLAS cblas_csyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K, const void *alpha, const void *A, f77_int lda, const void *B, f77_int ldb, const void *beta, void *C, f77_int ldc); void BLIS_EXPORT_BLAS cblas_ctrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int M, f77_int N, const void *alpha, const void *A, f77_int lda, void *B, f77_int ldb); void BLIS_EXPORT_BLAS cblas_ctrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int M, f77_int N, const void *alpha, const void *A, f77_int lda, void *B, f77_int ldb); void BLIS_EXPORT_BLAS cblas_zgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, f77_int M, f77_int N, f77_int K, const void *alpha, const void *A, f77_int lda, const void *B, f77_int ldb, const void *beta, void *C, f77_int ldc); void BLIS_EXPORT_BLAS cblas_zsymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, f77_int M, f77_int N, const void *alpha, const void *A, f77_int lda, const void *B, f77_int ldb, const void *beta, void *C, f77_int ldc); void BLIS_EXPORT_BLAS cblas_zsyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K, const void *alpha, const void *A, f77_int lda, const void *beta, void *C, f77_int ldc); void BLIS_EXPORT_BLAS cblas_zsyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K, const void *alpha, const void *A, f77_int lda, const void *B, f77_int ldb, const void *beta, void *C, f77_int ldc); void BLIS_EXPORT_BLAS cblas_ztrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int M, f77_int N, const void *alpha, const void *A, f77_int lda, void *B, f77_int ldb); void BLIS_EXPORT_BLAS cblas_ztrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int M, f77_int N, const void *alpha, const void *A, f77_int lda, void *B, f77_int ldb); /* * Routines with prefixes C and Z only */ void BLIS_EXPORT_BLAS cblas_chemm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, f77_int M, f77_int N, const void *alpha, const void *A, f77_int lda, const void *B, f77_int ldb, const void *beta, void *C, f77_int ldc); void BLIS_EXPORT_BLAS cblas_cherk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K, float alpha, const void *A, f77_int lda, float beta, void *C, f77_int ldc); void BLIS_EXPORT_BLAS cblas_cher2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K, const void *alpha, const void *A, f77_int lda, const void *B, f77_int ldb, float beta, void *C, f77_int ldc); void BLIS_EXPORT_BLAS cblas_zhemm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, f77_int M, f77_int N, const void *alpha, const void *A, f77_int lda, const void *B, f77_int ldb, const void *beta, void *C, f77_int ldc); void BLIS_EXPORT_BLAS cblas_zherk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K, double alpha, const void *A, f77_int lda, double beta, void *C, f77_int ldc); void BLIS_EXPORT_BLAS cblas_zher2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K, const void *alpha, const void *A, f77_int lda, const void *B, f77_int ldb, double beta, void *C, f77_int ldc); void BLIS_EXPORT_BLAS cblas_xerbla(f77_int p, const char *rout, const char *form, ...); #ifdef __cplusplus } #endif #endif blis-0.6.1/frame/compat/cblas/src/cblas_caxpy.c000066400000000000000000000011001360743507500213350ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_caxpy.c * * The program is a C interface to caxpy. * * Written by Keita Teranishi. 2/11/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_caxpy( f77_int N, const void *alpha, const void *X, f77_int incX, void *Y, f77_int incY) { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; #else #define F77_N N #define F77_incX incX #define F77_incY incY #endif F77_caxpy( &F77_N, (scomplex*)alpha, (scomplex*)X, &F77_incX, (scomplex*)Y, &F77_incY); } #endif blis-0.6.1/frame/compat/cblas/src/cblas_ccopy.c000066400000000000000000000010311360743507500213310ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_ccopy.c * * The program is a C interface to ccopy. * * Written by Keita Teranishi. 2/11/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_ccopy( f77_int N, const void *X, f77_int incX, void *Y, f77_int incY) { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; #else #define F77_N N #define F77_incX incX #define F77_incY incY #endif F77_ccopy( &F77_N, (scomplex*)X, &F77_incX, (scomplex*)Y, &F77_incY); } #endif blis-0.6.1/frame/compat/cblas/src/cblas_cdotc_sub.c000066400000000000000000000011731360743507500221700ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_cdotc_sub.c * * The program is a C interface to cdotc. * It calls the fortran wrapper before calling cdotc. * * Written by Keita Teranishi. 2/11/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_cdotc_sub( f77_int N, const void *X, f77_int incX, const void *Y, f77_int incY,void *dotc) { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; #else #define F77_N N #define F77_incX incX #define F77_incY incY #endif F77_cdotc_sub( &F77_N, (scomplex*)X, &F77_incX, (scomplex*)Y, &F77_incY, (scomplex*)dotc); } #endif blis-0.6.1/frame/compat/cblas/src/cblas_cdotu_sub.c000066400000000000000000000011751360743507500222140ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_cdotu_sub.f * * The program is a C interface to cdotu. * It calls the forteran wrapper before calling cdotu. * * Written by Keita Teranishi. 2/11/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_cdotu_sub( f77_int N, const void *X, f77_int incX, const void *Y, f77_int incY,void *dotu) { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; #else #define F77_N N #define F77_incX incX #define F77_incY incY #endif F77_cdotu_sub( &F77_N, (scomplex*)X, &F77_incX, (scomplex*)Y, &F77_incY, (scomplex*)dotu); } #endif blis-0.6.1/frame/compat/cblas/src/cblas_cgbmv.c000066400000000000000000000103761360743507500213260ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_cgbmv.c * The program is a C interface of cgbmv * * Keita Teranishi 5/20/98 * */ #include #include #include "cblas.h" #include "cblas_f77.h" void cblas_cgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, f77_int M, f77_int N, f77_int KL, f77_int KU, const void *alpha, const void *A, f77_int lda, const void *X, f77_int incX, const void *beta, void *Y, f77_int incY) { char TA; #ifdef F77_CHAR F77_CHAR F77_TA; #else #define F77_TA &TA #endif #ifdef F77_INT F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY; F77_INT F77_KL=KL,F77_KU=KU; #else #define F77_M M #define F77_N N #define F77_lda lda #define F77_KL KL #define F77_KU KU #define F77_incX incX #define F77_incY incY #endif int n=0, i=0; const float *xx= (float *)X, *alp= (float *)alpha, *bet = (float *)beta; float ALPHA[2],BETA[2]; int tincY, tincx; float *x=(float *)X, *y=(float *)Y, *st=0, *tx=0; extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (TransA == CblasNoTrans) TA = 'N'; else if (TransA == CblasTrans) TA = 'T'; else if (TransA == CblasConjTrans) TA = 'C'; else { cblas_xerbla(2, "cblas_cgbmv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_TA = C2F_CHAR(&TA); #endif F77_cgbmv(F77_TA, &F77_M, &F77_N, &F77_KL, &F77_KU, (scomplex*)alpha, (scomplex*)A, &F77_lda, (scomplex*)X, &F77_incX, (scomplex*)beta, (scomplex*)Y, &F77_incY); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (TransA == CblasNoTrans) TA = 'T'; else if (TransA == CblasTrans) TA = 'N'; else if (TransA == CblasConjTrans) { ALPHA[0]= *alp; ALPHA[1]= -alp[1]; BETA[0]= *bet; BETA[1]= -bet[1]; TA = 'N'; if (M > 0) { n = M << 1; x = malloc(n*sizeof(float)); tx = x; if( incX > 0 ) { i = incX << 1 ; tincx = 2; st= x+n; } else { i = incX *(-2); tincx = -2; st = x-2; x +=(n-2); } do { *x = *xx; x[1] = -xx[1]; x += tincx ; xx += i; } while (x != st); x=tx; #ifdef F77_INT F77_incX = 1; #else incX = 1; #endif if( incY > 0 ) tincY = incY; else tincY = -incY; y++; if (N > 0) { i = tincY << 1; n = i * N ; st = y + n; do { *y = -(*y); y += i; } while(y != st); y -= n; } } else x = (float *) X; } else { cblas_xerbla(2, "cblas_cgbmv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_TA = C2F_CHAR(&TA); #endif if (TransA == CblasConjTrans) F77_cgbmv(F77_TA, &F77_N, &F77_M, &F77_KU, &F77_KL, (scomplex*)ALPHA, (scomplex*)A ,&F77_lda, (scomplex*)x,&F77_incX, (scomplex*)BETA, (scomplex*)Y, &F77_incY); else F77_cgbmv(F77_TA, &F77_N, &F77_M, &F77_KU, &F77_KL, (scomplex*)alpha, (scomplex*)A ,&F77_lda, (scomplex*)x,&F77_incX, (scomplex*)beta, (scomplex*)Y, &F77_incY); if (TransA == CblasConjTrans) { if (x != X) free(x); if (N > 0) { do { *y = -(*y); y += i; } while (y != st); } } } else cblas_xerbla(1, "cblas_cgbmv", "Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_cgemm.c000066400000000000000000000060541360743507500213160ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * * cblas_cgemm.c * This program is a C interface to cgemm. * Written by Keita Teranishi * 4/8/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_cgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, f77_int M, f77_int N, f77_int K, const void *alpha, const void *A, f77_int lda, const void *B, f77_int ldb, const void *beta, void *C, f77_int ldc) { char TA, TB; #ifdef F77_CHAR F77_CHAR F77_TA, F77_TB; #else #define F77_TA &TA #define F77_TB &TB #endif #ifdef F77_INT F77_INT F77_M=M, F77_N=N, F77_K=K, F77_lda=lda, F77_ldb=ldb; F77_INT F77_ldc=ldc; #else #define F77_M M #define F77_N N #define F77_K K #define F77_lda lda #define F77_ldb ldb #define F77_ldc ldc #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if( Order == CblasColMajor ) { if(TransA == CblasTrans) TA='T'; else if ( TransA == CblasConjTrans ) TA='C'; else if ( TransA == CblasNoTrans ) TA='N'; else { cblas_xerbla(2, "cblas_cgemm", "Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if(TransB == CblasTrans) TB='T'; else if ( TransB == CblasConjTrans ) TB='C'; else if ( TransB == CblasNoTrans ) TB='N'; else { cblas_xerbla(3, "cblas_cgemm", "Illegal TransB setting, %d\n", TransB); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_TA = C2F_CHAR(&TA); F77_TB = C2F_CHAR(&TB); #endif F77_cgemm(F77_TA, F77_TB, &F77_M, &F77_N, &F77_K, (scomplex*)alpha, (scomplex*)A, &F77_lda, (scomplex*)B, &F77_ldb, (scomplex*)beta, (scomplex*)C, &F77_ldc); } else if (Order == CblasRowMajor) { RowMajorStrg = 1; if(TransA == CblasTrans) TB='T'; else if ( TransA == CblasConjTrans ) TB='C'; else if ( TransA == CblasNoTrans ) TB='N'; else { cblas_xerbla(2, "cblas_cgemm", "Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if(TransB == CblasTrans) TA='T'; else if ( TransB == CblasConjTrans ) TA='C'; else if ( TransB == CblasNoTrans ) TA='N'; else { cblas_xerbla(2, "cblas_cgemm", "Illegal TransB setting, %d\n", TransB); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_TA = C2F_CHAR(&TA); F77_TB = C2F_CHAR(&TB); #endif F77_cgemm(F77_TA, F77_TB, &F77_N, &F77_M, &F77_K, (scomplex*)alpha, (scomplex*)B, &F77_ldb, (scomplex*)A, &F77_lda, (scomplex*)beta, (scomplex*)C, &F77_ldc); } else cblas_xerbla(1, "cblas_cgemm", "Illegal Order setting, %d\n", Order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_cgemv.c000066400000000000000000000102371360743507500213250ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_cgemv.c * The program is a C interface of cgemv * * Keita Teranishi 5/20/98 * */ #include #include #include "cblas.h" #include "cblas_f77.h" void cblas_cgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, f77_int M, f77_int N, const void *alpha, const void *A, f77_int lda, const void *X, f77_int incX, const void *beta, void *Y, f77_int incY) { char TA; #ifdef F77_CHAR F77_CHAR F77_TA; #else #define F77_TA &TA #endif #ifdef F77_INT F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY; #else #define F77_M M #define F77_N N #define F77_lda lda #define F77_incX incX #define F77_incY incY #endif int n=0, i=0; const float *xx= (const float *)X; float ALPHA[2],BETA[2]; int tincY, tincx; float *x=(float *)X, *y=(float *)Y, *st=0, *tx=0; const float *stx = x; extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (TransA == CblasNoTrans) TA = 'N'; else if (TransA == CblasTrans) TA = 'T'; else if (TransA == CblasConjTrans) TA = 'C'; else { cblas_xerbla(2, "cblas_cgemv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_TA = C2F_CHAR(&TA); #endif F77_cgemv(F77_TA, &F77_M, &F77_N, (scomplex*)alpha, (scomplex*)A, &F77_lda, (scomplex*)X, &F77_incX, (scomplex*)beta, (scomplex*)Y, &F77_incY); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (TransA == CblasNoTrans) TA = 'T'; else if (TransA == CblasTrans) TA = 'N'; else if (TransA == CblasConjTrans) { ALPHA[0]= *( (const float *) alpha ); ALPHA[1]= -( *( (const float *) alpha+1) ); BETA[0]= *( (const float *) beta ); BETA[1]= -( *( (const float *) beta+1 ) ); TA = 'N'; if (M > 0) { n = M << 1; x = malloc(n*sizeof(float)); tx = x; if( incX > 0 ) { i = incX << 1 ; tincx = 2; st= x+n; } else { i = incX *(-2); tincx = -2; st = x-2; x +=(n-2); } do { *x = *xx; x[1] = -xx[1]; x += tincx ; xx += i; } while (x != st); x=tx; F77_incX = 1; if(incY > 0) tincY = incY; else tincY = -incY; y++; if (N > 0) { i = tincY << 1; n = i * N ; st = y + n; do { *y = -(*y); y += i; } while(y != st); y -= n; } stx = x; } else stx = (const float *)X; } else { cblas_xerbla(2, "cblas_cgemv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_TA = C2F_CHAR(&TA); #endif if (TransA == CblasConjTrans) F77_cgemv(F77_TA, &F77_N, &F77_M, (scomplex*)ALPHA, (scomplex*)A, &F77_lda, (scomplex*)stx, &F77_incX, (scomplex*)BETA, (scomplex*)Y, &F77_incY); else F77_cgemv(F77_TA, &F77_N, &F77_M, (scomplex*)alpha, (scomplex*)A, &F77_lda, (scomplex*)x, &F77_incX, (scomplex*)beta, (scomplex*)Y, &F77_incY); if (TransA == CblasConjTrans) { if (x != (const float *)X) free(x); if (N > 0) { do { *y = -(*y); y += i; } while (y != st); } } } else cblas_xerbla(1, "cblas_cgemv", "Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_cgerc.c000066400000000000000000000037651360743507500213170ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_cgerc.c * The program is a C interface to cgerc. * * Keita Teranishi 5/20/98 * */ #include #include #include "cblas.h" #include "cblas_f77.h" void cblas_cgerc(enum CBLAS_ORDER order, f77_int M, f77_int N, const void *alpha, const void *X, f77_int incX, const void *Y, f77_int incY, void *A, f77_int lda) { #ifdef F77_INT F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY; #else #define F77_M M #define F77_N N #define F77_incX incX #define F77_incY incY #define F77_lda lda #endif int n, i, tincy; float *y=(float *)Y, *yy=(float *)Y, *ty, *st; extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { F77_cgerc( &F77_M, &F77_N, (scomplex*)alpha, (scomplex*)X, &F77_incX, (scomplex*)Y, &F77_incY, (scomplex*)A, &F77_lda); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (N > 0) { n = N << 1; y = malloc(n*sizeof(float)); ty = y; if( incY > 0 ) { i = incY << 1; tincy = 2; st= y+n; } else { i = incY *(-2); tincy = -2; st = y-2; y +=(n-2); } do { *y = *yy; y[1] = -yy[1]; y += tincy ; yy += i; } while (y != st); y = ty; #ifdef F77_INT F77_incY = 1; #else incY = 1; #endif } else y = (float *) Y; F77_cgeru( &F77_N, &F77_M, (scomplex*)alpha, (scomplex*)y, &F77_incY, (scomplex*)X, &F77_incX, (scomplex*)A, &F77_lda); if(Y!=y) free(y); } else cblas_xerbla(1, "cblas_cgerc", "Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_cgeru.c000066400000000000000000000023431360743507500213300ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_cgeru.c * The program is a C interface to cgeru. * * Keita Teranishi 5/20/98 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_cgeru(enum CBLAS_ORDER order, f77_int M, f77_int N, const void *alpha, const void *X, f77_int incX, const void *Y, f77_int incY, void *A, f77_int lda) { #ifdef F77_INT F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY; #else #define F77_M M #define F77_N N #define F77_incX incX #define F77_incY incY #define F77_lda lda #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { F77_cgeru( &F77_M, &F77_N, (scomplex*)alpha, (scomplex*)X, &F77_incX, (scomplex*)Y, &F77_incY, (scomplex*)A, &F77_lda); } else if (order == CblasRowMajor) { RowMajorStrg = 1; F77_cgeru( &F77_N, &F77_M, (scomplex*)alpha, (scomplex*)Y, &F77_incY, (scomplex*)X, &F77_incX, (scomplex*)A, &F77_lda); } else cblas_xerbla(1, "cblas_cgeru","Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_chbmv.c000066400000000000000000000071131360743507500213220ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_chbmv.c * The program is a C interface to chbmv * * Keita Teranishi 5/18/98 * */ #include "cblas.h" #include "cblas_f77.h" #include #include void cblas_chbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,f77_int N,f77_int K, const void *alpha, const void *A, f77_int lda, const void *X, f77_int incX, const void *beta, void *Y, f77_int incY) { char UL; #ifdef F77_CHAR F77_CHAR F77_UL; #else #define F77_UL &UL #endif #ifdef F77_INT F77_INT F77_N=N, F77_K=K, F77_lda=lda, F77_incX=incX, F77_incY=incY; #else #define F77_N N #define F77_K K #define F77_lda lda #define F77_incX incX #define F77_incY incY #endif int n, i=0; const float *xx= (float *)X, *alp= (float *)alpha, *bet = (float *)beta; float ALPHA[2],BETA[2]; int tincY, tincx; float *x=(float *)X, *y=(float *)Y, *st=0, *tx; extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (Uplo == CblasLower) UL = 'L'; else if (Uplo == CblasUpper) UL = 'U'; else { cblas_xerbla(2, "cblas_chbmv","Illegal Uplo setting, %d\n",Uplo ); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif F77_chbmv(F77_UL, &F77_N, &F77_K, (scomplex*)alpha, (scomplex*)A, &F77_lda, (scomplex*)X, &F77_incX, (scomplex*)beta, (scomplex*)Y, &F77_incY); } else if (order == CblasRowMajor) { RowMajorStrg = 1; ALPHA[0]= *alp; ALPHA[1]= -alp[1]; BETA[0]= *bet; BETA[1]= -bet[1]; if (N > 0) { n = N << 1; x = malloc(n*sizeof(float)); tx = x; if( incX > 0 ) { i = incX << 1 ; tincx = 2; st= x+n; } else { i = incX *(-2); tincx = -2; st = x-2; x +=(n-2); } do { *x = *xx; x[1] = -xx[1]; x += tincx ; xx += i; } while (x != st); x=tx; #ifdef F77_INT F77_incX = 1; #else incX = 1; #endif if(incY > 0) tincY = incY; else tincY = -incY; y++; i = tincY << 1; n = i * N ; st = y + n; do { *y = -(*y); y += i; } while(y != st); y -= n; } else x = (float *) X; if (Uplo == CblasUpper) UL = 'L'; else if (Uplo == CblasLower) UL = 'U'; else { cblas_xerbla(2, "cblas_chbmv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif F77_chbmv(F77_UL, &F77_N, &F77_K, (scomplex*)ALPHA, (scomplex*)A ,&F77_lda, (scomplex*)x,&F77_incX, (scomplex*)BETA, (scomplex*)Y, &F77_incY); } else { cblas_xerbla(1, "cblas_chbmv","Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if ( order == CblasRowMajor ) { RowMajorStrg = 1; if(X!=x) free(x); if (N > 0) { do { *y = -(*y); y += i; } while (y != st); } } CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_chemm.c000066400000000000000000000053561360743507500213230ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * * cblas_chemm.c * This program is a C interface to chemm. * Written by Keita Teranishi * 4/8/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_chemm(enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, f77_int M, f77_int N, const void *alpha, const void *A, f77_int lda, const void *B, f77_int ldb, const void *beta, void *C, f77_int ldc) { char SD, UL; #ifdef F77_CHAR F77_CHAR F77_SD, F77_UL; #else #define F77_SD &SD #define F77_UL &UL #endif #ifdef F77_INT F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_ldb=ldb; F77_INT F77_ldc=ldc; #else #define F77_M M #define F77_N N #define F77_lda lda #define F77_ldb ldb #define F77_ldc ldc #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if( Order == CblasColMajor ) { if( Side == CblasRight) SD='R'; else if ( Side == CblasLeft ) SD='L'; else { cblas_xerbla(2, "cblas_chemm", "Illegal Side setting, %d\n", Side); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Uplo == CblasUpper) UL='U'; else if ( Uplo == CblasLower ) UL='L'; else { cblas_xerbla(3, "cblas_chemm", "Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_SD = C2F_CHAR(&SD); #endif F77_chemm(F77_SD, F77_UL, &F77_M, &F77_N, (scomplex*)alpha, (scomplex*)A, &F77_lda, (scomplex*)B, &F77_ldb, (scomplex*)beta, (scomplex*)C, &F77_ldc); } else if (Order == CblasRowMajor) { RowMajorStrg = 1; if( Side == CblasRight) SD='L'; else if ( Side == CblasLeft ) SD='R'; else { cblas_xerbla(2, "cblas_chemm", "Illegal Side setting, %d\n", Side); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Uplo == CblasUpper) UL='L'; else if ( Uplo == CblasLower ) UL='U'; else { cblas_xerbla(3, "cblas_chemm", "Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_SD = C2F_CHAR(&SD); #endif F77_chemm(F77_SD, F77_UL, &F77_N, &F77_M, (scomplex*)alpha, (scomplex*)A, &F77_lda, (scomplex*)B, &F77_ldb, (scomplex*)beta, (scomplex*)C, &F77_ldc); } else cblas_xerbla(1, "cblas_chemm", "Illegal Order setting, %d\n", Order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_chemv.c000066400000000000000000000070411360743507500213250ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_chemv.c * The program is a C interface to chemv * * Keita Teranishi 5/18/98 * */ #include #include #include "cblas.h" #include "cblas_f77.h" void cblas_chemv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, const void *alpha, const void *A, f77_int lda, const void *X, f77_int incX, const void *beta, void *Y, f77_int incY) { char UL; #ifdef F77_CHAR F77_CHAR F77_UL; #else #define F77_UL &UL #endif #ifdef F77_INT F77_INT F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY; #else #define F77_N N #define F77_lda lda #define F77_incX incX #define F77_incY incY #endif int n=0, i=0; const float *xx= (float *)X, *alp= (float *)alpha, *bet = (float *)beta; float ALPHA[2],BETA[2]; int tincY, tincx; float *x=(float *)X, *y=(float *)Y, *st=0, *tx; extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (Uplo == CblasUpper) UL = 'U'; else if (Uplo == CblasLower) UL = 'L'; else { cblas_xerbla(2, "cblas_chemv","Illegal Uplo setting, %d\n",Uplo ); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif F77_chemv(F77_UL, &F77_N, (scomplex*)alpha, (scomplex*)A, &F77_lda, (scomplex*)X, &F77_incX, (scomplex*)beta, (scomplex*)Y, &F77_incY); } else if (order == CblasRowMajor) { RowMajorStrg = 1; ALPHA[0]= *alp; ALPHA[1]= -alp[1]; BETA[0]= *bet; BETA[1]= -bet[1]; if (N > 0) { n = N << 1; x = malloc(n*sizeof(float)); tx = x; if( incX > 0 ) { i = incX << 1 ; tincx = 2; st= x+n; } else { i = incX *(-2); tincx = -2; st = x-2; x +=(n-2); } do { *x = *xx; x[1] = -xx[1]; x += tincx ; xx += i; } while (x != st); x=tx; #ifdef F77_INT F77_incX = 1; #else incX = 1; #endif if(incY > 0) tincY = incY; else tincY = -incY; y++; i = tincY << 1; n = i * N ; st = y + n; do { *y = -(*y); y += i; } while(y != st); y -= n; } else x = (float *) X; if (Uplo == CblasUpper) UL = 'L'; else if (Uplo == CblasLower) UL = 'U'; else { cblas_xerbla(2, "cblas_chemv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif F77_chemv(F77_UL, &F77_N, (scomplex*)ALPHA, (scomplex*)A, &F77_lda, (scomplex*)x, &F77_incX, (scomplex*)BETA, (scomplex*)Y, &F77_incY); } else { cblas_xerbla(1, "cblas_chemv","Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if ( order == CblasRowMajor ) { RowMajorStrg = 1; if ( X != x ) free(x); if (N > 0) { do { *y = -(*y); y += i; } while (y != st); } } CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_cher.c000066400000000000000000000051131360743507500211420ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_cher.c * The program is a C interface to cher. * * Keita Teranishi 5/20/98 * */ #include #include #include "cblas.h" #include "cblas_f77.h" void cblas_cher(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, float alpha, const void *X, f77_int incX ,void *A, f77_int lda) { char UL; #ifdef F77_CHAR F77_CHAR F77_UL; #else #define F77_UL &UL #endif #ifdef F77_INT F77_INT F77_N=N, F77_lda=lda, F77_incX=incX; #else #define F77_N N #define F77_lda lda #define F77_incX incX #endif int n, i, tincx; float *x=(float *)X, *xx=(float *)X, *tx, *st; extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (Uplo == CblasLower) UL = 'L'; else if (Uplo == CblasUpper) UL = 'U'; else { cblas_xerbla(2, "cblas_cher","Illegal Uplo setting, %d\n",Uplo ); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif F77_cher(F77_UL, &F77_N, &alpha, (scomplex*)X, &F77_incX, (scomplex*)A, &F77_lda); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (Uplo == CblasUpper) UL = 'L'; else if (Uplo == CblasLower) UL = 'U'; else { cblas_xerbla(2, "cblas_cher","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif if (N > 0) { n = N << 1; x = malloc(n*sizeof(float)); tx = x; if( incX > 0 ) { i = incX << 1 ; tincx = 2; st= x+n; } else { i = incX *(-2); tincx = -2; st = x-2; x +=(n-2); } do { *x = *xx; x[1] = -xx[1]; x += tincx ; xx += i; } while (x != st); x=tx; #ifdef F77_INT F77_incX = 1; #else incX = 1; #endif } else x = (float *) X; F77_cher(F77_UL, &F77_N, &alpha, (scomplex*)x, &F77_incX, (scomplex*)A, &F77_lda); } else { cblas_xerbla(1, "cblas_cher","Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if(X!=x) free(x); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_cher2.c000066400000000000000000000066621360743507500212360ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_cher2.c * The program is a C interface to cher2. * * Keita Teranishi 3/23/98 * */ #include #include #include "cblas.h" #include "cblas_f77.h" void cblas_cher2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, const void *alpha, const void *X, f77_int incX, const void *Y, f77_int incY, void *A, f77_int lda) { char UL; #ifdef F77_CHAR F77_CHAR F77_UL; #else #define F77_UL &UL #endif #ifdef F77_INT F77_INT F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY; #else #define F77_N N #define F77_lda lda #define F77_incX incX #define F77_incY incY #endif int n, i, j, tincx, tincy; float *x=(float *)X, *xx=(float *)X, *y=(float *)Y, *yy=(float *)Y, *tx, *ty, *stx, *sty; extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (Uplo == CblasLower) UL = 'L'; else if (Uplo == CblasUpper) UL = 'U'; else { cblas_xerbla(2, "cblas_cher2","Illegal Uplo setting, %d\n",Uplo ); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif F77_cher2(F77_UL, &F77_N, (scomplex*)alpha, (scomplex*)X, &F77_incX, (scomplex*)Y, &F77_incY, (scomplex*)A, &F77_lda); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (Uplo == CblasUpper) UL = 'L'; else if (Uplo == CblasLower) UL = 'U'; else { cblas_xerbla(2, "cblas_cher2","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif if (N > 0) { n = N << 1; x = malloc(n*sizeof(float)); y = malloc(n*sizeof(float)); tx = x; ty = y; if( incX > 0 ) { i = incX << 1 ; tincx = 2; stx= x+n; } else { i = incX *(-2); tincx = -2; stx = x-2; x +=(n-2); } if( incY > 0 ) { j = incY << 1; tincy = 2; sty= y+n; } else { j = incY *(-2); tincy = -2; sty = y-2; y +=(n-2); } do { *x = *xx; x[1] = -xx[1]; x += tincx ; xx += i; } while (x != stx); do { *y = *yy; y[1] = -yy[1]; y += tincy ; yy += j; } while (y != sty); x=tx; y=ty; #ifdef F77_INT F77_incX = 1; F77_incY = 1; #else incX = 1; incY = 1; #endif } else { x = (float *) X; y = (float *) Y; } F77_cher2(F77_UL, &F77_N, (scomplex*)alpha, (scomplex*)y, &F77_incY, (scomplex*)x, &F77_incX, (scomplex*)A, &F77_lda); } else { cblas_xerbla(1, "cblas_cher2","Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if(X!=x) free(x); if(Y!=y) free(y); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_cher2k.c000066400000000000000000000056451360743507500214110ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * * cblas_cher2k.c * This program is a C interface to cher2k. * Written by Keita Teranishi * 4/8/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_cher2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K, const void *alpha, const void *A, f77_int lda, const void *B, f77_int ldb, float beta, void *C, f77_int ldc) { char UL, TR; #ifdef F77_CHAR F77_CHAR F77_TR, F77_UL; #else #define F77_TR &TR #define F77_UL &UL #endif #ifdef F77_INT F77_INT F77_N=N, F77_K=K, F77_lda=lda, F77_ldb=ldb; F77_INT F77_ldc=ldc; #else #define F77_N N #define F77_K K #define F77_lda lda #define F77_ldb ldb #define F77_ldc ldc #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; float ALPHA[2]; const float *alp=(float *)alpha; CBLAS_CallFromC = 1; RowMajorStrg = 0; if( Order == CblasColMajor ) { if( Uplo == CblasUpper) UL='U'; else if ( Uplo == CblasLower ) UL='L'; else { cblas_xerbla(2, "cblas_cher2k", "Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Trans == CblasTrans) TR ='T'; else if ( Trans == CblasConjTrans ) TR='C'; else if ( Trans == CblasNoTrans ) TR='N'; else { cblas_xerbla(3, "cblas_cher2k", "Illegal Trans setting, %d\n", Trans); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TR = C2F_CHAR(&TR); #endif F77_cher2k(F77_UL, F77_TR, &F77_N, &F77_K, (scomplex*)alpha, (scomplex*)A, &F77_lda, (scomplex*)B, &F77_ldb, &beta, (scomplex*)C, &F77_ldc); } else if (Order == CblasRowMajor) { RowMajorStrg = 1; if( Uplo == CblasUpper) UL='L'; else if ( Uplo == CblasLower ) UL='U'; else { cblas_xerbla(2, "cblas_cher2k", "Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Trans == CblasTrans) TR ='N'; else if ( Trans == CblasConjTrans ) TR='N'; else if ( Trans == CblasNoTrans ) TR='C'; else { cblas_xerbla(3, "cblas_cher2k", "Illegal Trans setting, %d\n", Trans); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TR = C2F_CHAR(&TR); #endif ALPHA[0]= *alp; ALPHA[1]= -alp[1]; F77_cher2k(F77_UL,F77_TR, &F77_N, &F77_K, (scomplex*)ALPHA, (scomplex*)A, &F77_lda, (scomplex*)B, &F77_ldb, &beta, (scomplex*)C, &F77_ldc); } else cblas_xerbla(1, "cblas_cher2k", "Illegal Order setting, %d\n", Order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_cherk.c000066400000000000000000000052631360743507500213230ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * * cblas_cherk.c * This program is a C interface to cherk. * Written by Keita Teranishi * 4/8/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_cherk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K, float alpha, const void *A, f77_int lda, float beta, void *C, f77_int ldc) { char UL, TR; #ifdef F77_CHAR F77_CHAR F77_TR, F77_UL; #else #define F77_TR &TR #define F77_UL &UL #endif #ifdef F77_INT F77_INT F77_N=N, F77_K=K, F77_lda=lda; F77_INT F77_ldc=ldc; #else #define F77_N N #define F77_K K #define F77_lda lda #define F77_ldc ldc #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if( Order == CblasColMajor ) { if( Uplo == CblasUpper) UL='U'; else if ( Uplo == CblasLower ) UL='L'; else { cblas_xerbla(2, "cblas_cherk", "Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Trans == CblasTrans) TR ='T'; else if ( Trans == CblasConjTrans ) TR='C'; else if ( Trans == CblasNoTrans ) TR='N'; else { cblas_xerbla(3, "cblas_cherk", "Illegal Trans setting, %d\n", Trans); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TR = C2F_CHAR(&TR); #endif F77_cherk(F77_UL, F77_TR, &F77_N, &F77_K, &alpha, (scomplex*)A, &F77_lda, &beta, (scomplex*)C, &F77_ldc); } else if (Order == CblasRowMajor) { RowMajorStrg = 1; if( Uplo == CblasUpper) UL='L'; else if ( Uplo == CblasLower ) UL='U'; else { cblas_xerbla(3, "cblas_cherk", "Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Trans == CblasTrans) TR ='N'; else if ( Trans == CblasConjTrans ) TR='N'; else if ( Trans == CblasNoTrans ) TR='C'; else { cblas_xerbla(3, "cblas_cherk", "Illegal Trans setting, %d\n", Trans); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_SD = C2F_CHAR(&SD); #endif F77_cherk(F77_UL, F77_TR, &F77_N, &F77_K, &alpha, (scomplex*)A, &F77_lda, &beta, (scomplex*)C, &F77_ldc); } else cblas_xerbla(1, "cblas_cherk", "Illegal Order setting, %d\n", Order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_chpmv.c000066400000000000000000000067271360743507500213520ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_chpmv.c * The program is a C interface of chpmv * * Keita Teranishi 5/18/98 * */ #include #include #include "cblas.h" #include "cblas_f77.h" void cblas_chpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,f77_int N, const void *alpha, const void *AP, const void *X, f77_int incX, const void *beta, void *Y, f77_int incY) { char UL; #ifdef F77_CHAR F77_CHAR F77_UL; #else #define F77_UL &UL #endif #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; #else #define F77_N N #define F77_incX incX #define F77_incY incY #endif int n, i=0; const float *xx= (float *)X, *alp= (float *)alpha, *bet = (float *)beta; float ALPHA[2],BETA[2]; int tincY, tincx; float *x=(float *)X, *y=(float *)Y, *st=0, *tx; extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (Uplo == CblasLower) UL = 'L'; else if (Uplo == CblasUpper) UL = 'U'; else { cblas_xerbla(2, "cblas_chpmv","Illegal Uplo setting, %d\n",Uplo ); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif F77_chpmv(F77_UL, &F77_N, (scomplex*)alpha, (scomplex*)AP, (scomplex*)X, &F77_incX, (scomplex*)beta, (scomplex*)Y, &F77_incY); } else if (order == CblasRowMajor) { RowMajorStrg = 1; ALPHA[0]= *alp; ALPHA[1]= -alp[1]; BETA[0]= *bet; BETA[1]= -bet[1]; if (N > 0) { n = N << 1; x = malloc(n*sizeof(float)); tx = x; if( incX > 0 ) { i = incX << 1; tincx = 2; st= x+n; } else { i = incX *(-2); tincx = -2; st = x-2; x +=(n-2); } do { *x = *xx; x[1] = -xx[1]; x += tincx ; xx += i; } while (x != st); x=tx; #ifdef F77_INT F77_incX = 1; #else incX = 1; #endif if(incY > 0) tincY = incY; else tincY = -incY; y++; i = tincY << 1; n = i * N ; st = y + n; do { *y = -(*y); y += i; } while(y != st); y -= n; } else x = (float *) X; if (Uplo == CblasUpper) UL = 'L'; else if (Uplo == CblasLower) UL = 'U'; else { cblas_xerbla(2, "cblas_chpmv","Illegal Uplo setting, %d\n", Uplo ); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif F77_chpmv(F77_UL, &F77_N, (scomplex*)ALPHA, (scomplex*)AP, (scomplex*)x, &F77_incX, (scomplex*)BETA, (scomplex*)Y, &F77_incY); } else { cblas_xerbla(1, "cblas_chpmv","Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if ( order == CblasRowMajor ) { RowMajorStrg = 1; if(X!=x) free(x); if (N > 0) { do { *y = -(*y); y += i; } while (y != st); } } CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_chpr.c000066400000000000000000000050021360743507500211520ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_chpr.c * The program is a C interface to chpr. * * Keita Teranishi 3/23/98 * */ #include #include #include "cblas.h" #include "cblas_f77.h" void cblas_chpr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, float alpha, const void *X, f77_int incX, void *A) { char UL; #ifdef F77_CHAR F77_CHAR F77_UL; #else #define F77_UL &UL #endif #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX; #else #define F77_N N #define F77_incX incX #endif int n, i, tincx; float *x=(float *)X, *xx=(float *)X, *tx, *st; extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (Uplo == CblasLower) UL = 'L'; else if (Uplo == CblasUpper) UL = 'U'; else { cblas_xerbla(2, "cblas_chpr","Illegal Uplo setting, %d\n",Uplo ); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif F77_chpr(F77_UL, &F77_N, &alpha, (scomplex*)X, &F77_incX, (scomplex*)A); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (Uplo == CblasUpper) UL = 'L'; else if (Uplo == CblasLower) UL = 'U'; else { cblas_xerbla(2, "cblas_chpr","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif if (N > 0) { n = N << 1; x = malloc(n*sizeof(float)); tx = x; if( incX > 0 ) { i = incX << 1; tincx = 2; st= x+n; } else { i = incX *(-2); tincx = -2; st = x-2; x +=(n-2); } do { *x = *xx; x[1] = -xx[1]; x += tincx ; xx += i; } while (x != st); x=tx; #ifdef F77_INT F77_incX = 1; #else incX = 1; #endif } else x = (float *) X; F77_chpr(F77_UL, &F77_N, &alpha, (scomplex*)x, &F77_incX, (scomplex*)A); } else { cblas_xerbla(1, "cblas_chpr","Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if(X!=x) free(x); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_chpr2.c000066400000000000000000000064571360743507500212530ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_chpr2.c * The program is a C interface to chpr2. * * Keita Teranishi 5/20/98 * */ #include #include #include "cblas.h" #include "cblas_f77.h" void cblas_chpr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N,const void *alpha, const void *X, f77_int incX,const void *Y, f77_int incY, void *Ap) { char UL; #ifdef F77_CHAR F77_CHAR F77_UL; #else #define F77_UL &UL #endif #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; #else #define F77_N N #define F77_incX incX #define F77_incY incY #endif int n, i, j, tincx, tincy; float *x=(float *)X, *xx=(float *)X, *y=(float *)Y, *yy=(float *)Y, *tx, *ty, *stx, *sty; extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (Uplo == CblasLower) UL = 'L'; else if (Uplo == CblasUpper) UL = 'U'; else { cblas_xerbla(2, "cblas_chpr2","Illegal Uplo setting, %d\n",Uplo ); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif F77_chpr2(F77_UL, &F77_N, (scomplex*)alpha, (scomplex*)X, &F77_incX, (scomplex*)Y, &F77_incY, (scomplex*)Ap); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (Uplo == CblasUpper) UL = 'L'; else if (Uplo == CblasLower) UL = 'U'; else { cblas_xerbla(2, "cblas_chpr2","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif if (N > 0) { n = N << 1; x = malloc(n*sizeof(float)); y = malloc(n*sizeof(float)); tx = x; ty = y; if( incX > 0 ) { i = incX << 1 ; tincx = 2; stx= x+n; } else { i = incX *(-2); tincx = -2; stx = x-2; x +=(n-2); } if( incY > 0 ) { j = incY << 1; tincy = 2; sty= y+n; } else { j = incY *(-2); tincy = -2; sty = y-2; y +=(n-2); } do { *x = *xx; x[1] = -xx[1]; x += tincx ; xx += i; } while (x != stx); do { *y = *yy; y[1] = -yy[1]; y += tincy ; yy += j; } while (y != sty); x=tx; y=ty; #ifdef F77_INT F77_incX = 1; F77_incY = 1; #else incX = 1; incY = 1; #endif } else { x = (float *) X; y = (void *) Y; } F77_chpr2(F77_UL, &F77_N, (scomplex*)alpha, (scomplex*)y, &F77_incY, (scomplex*)x, &F77_incX, (scomplex*)Ap); } else { cblas_xerbla(1, "cblas_chpr2","Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if(X!=x) free(x); if(Y!=y) free(y); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_cscal.c000066400000000000000000000007441360743507500213130ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_cscal.c * * The program is a C interface to cscal.f. * * Written by Keita Teranishi. 2/11/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_cscal( f77_int N, const void *alpha, void *X, f77_int incX) { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX; #else #define F77_N N #define F77_incX incX #endif F77_cscal( &F77_N, (scomplex*)alpha, (scomplex*)X, &F77_incX); } #endif blis-0.6.1/frame/compat/cblas/src/cblas_csscal.c000066400000000000000000000007251360743507500214750ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_csscal.c * * The program is a C interface to csscal. * * Written by Keita Teranishi. 2/11/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_csscal( f77_int N, float alpha, void *X, f77_int incX) { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX; #else #define F77_N N #define F77_incX incX #endif F77_csscal( &F77_N, &alpha, (scomplex*)X, &F77_incX); } #endif blis-0.6.1/frame/compat/cblas/src/cblas_cswap.c000066400000000000000000000010241360743507500213330ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_cswap.c * * The program is a C interface to cswap. * * Written by Keita Teranishi. 2/11/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_cswap( f77_int N, void *X, f77_int incX, void *Y, f77_int incY) { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; #else #define F77_N N #define F77_incX incX #define F77_incY incY #endif F77_cswap( &F77_N, (scomplex*)X, &F77_incX, (scomplex*)Y, &F77_incY); } #endif blis-0.6.1/frame/compat/cblas/src/cblas_csymm.c000066400000000000000000000053511360743507500213550ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * * cblas_csymm.c * This program is a C interface to csymm. * Written by Keita Teranishi * 4/8/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_csymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, f77_int M, f77_int N, const void *alpha, const void *A, f77_int lda, const void *B, f77_int ldb, const void *beta, void *C, f77_int ldc) { char SD, UL; #ifdef F77_CHAR F77_CHAR F77_SD, F77_UL; #else #define F77_SD &SD #define F77_UL &UL #endif #ifdef F77_INT F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_ldb=ldb; F77_INT F77_ldc=ldc; #else #define F77_M M #define F77_N N #define F77_lda lda #define F77_ldb ldb #define F77_ldc ldc #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if( Order == CblasColMajor ) { if( Side == CblasRight) SD='R'; else if ( Side == CblasLeft ) SD='L'; else { cblas_xerbla(2, "cblas_csymm", "Illegal Side setting, %d\n", Side); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Uplo == CblasUpper) UL='U'; else if ( Uplo == CblasLower ) UL='L'; else { cblas_xerbla(3, "cblas_csymm", "Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_SD = C2F_CHAR(&SD); #endif F77_csymm(F77_SD, F77_UL, &F77_M, &F77_N, (scomplex*)alpha, (scomplex*)A, &F77_lda, (scomplex*)B, &F77_ldb, (scomplex*)beta, (scomplex*)C, &F77_ldc); } else if (Order == CblasRowMajor) { RowMajorStrg = 1; if( Side == CblasRight) SD='L'; else if ( Side == CblasLeft ) SD='R'; else { cblas_xerbla(2, "cblas_csymm", "Illegal Side setting, %d\n", Side); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Uplo == CblasUpper) UL='L'; else if ( Uplo == CblasLower ) UL='U'; else { cblas_xerbla(3, "cblas_csymm", "Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_SD = C2F_CHAR(&SD); #endif F77_csymm(F77_SD, F77_UL, &F77_N, &F77_M, (scomplex*)alpha, (scomplex*)A, &F77_lda, (scomplex*)B, &F77_ldb, (scomplex*)beta, (scomplex*)C, &F77_ldc); } else cblas_xerbla(1, "cblas_csymm", "Illegal Order setting, %d\n", Order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_csyr2k.c000066400000000000000000000055471360743507500214510ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * * cblas_csyr2k.c * This program is a C interface to csyr2k. * Written by Keita Teranishi * 4/8/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_csyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K, const void *alpha, const void *A, f77_int lda, const void *B, f77_int ldb, const void *beta, void *C, f77_int ldc) { char UL, TR; #ifdef F77_CHAR F77_CHAR F77_TR, F77_UL; #else #define F77_TR &TR #define F77_UL &UL #endif #ifdef F77_INT F77_INT F77_N=N, F77_K=K, F77_lda=lda, F77_ldb=ldb; F77_INT F77_ldc=ldc; #else #define F77_N N #define F77_K K #define F77_lda lda #define F77_ldb ldb #define F77_ldc ldc #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if( Order == CblasColMajor ) { if( Uplo == CblasUpper) UL='U'; else if ( Uplo == CblasLower ) UL='L'; else { cblas_xerbla(2, "cblas_csyr2k", "Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Trans == CblasTrans) TR ='T'; else if ( Trans == CblasConjTrans ) TR='C'; else if ( Trans == CblasNoTrans ) TR='N'; else { cblas_xerbla(3, "cblas_csyr2k", "Illegal Trans setting, %d\n", Trans); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TR = C2F_CHAR(&TR); #endif F77_csyr2k(F77_UL, F77_TR, &F77_N, &F77_K, (scomplex*)alpha, (scomplex*)A, &F77_lda, (scomplex*)B, &F77_ldb, (scomplex*)beta, (scomplex*)C, &F77_ldc); } else if (Order == CblasRowMajor) { RowMajorStrg = 1; if( Uplo == CblasUpper) UL='L'; else if ( Uplo == CblasLower ) UL='U'; else { cblas_xerbla(3, "cblas_csyr2k", "Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Trans == CblasTrans) TR ='N'; else if ( Trans == CblasConjTrans ) TR='N'; else if ( Trans == CblasNoTrans ) TR='T'; else { cblas_xerbla(3, "cblas_csyr2k", "Illegal Trans setting, %d\n", Trans); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TR = C2F_CHAR(&TR); #endif F77_csyr2k(F77_UL, F77_TR, &F77_N, &F77_K, (scomplex*)alpha, (scomplex*)A, &F77_lda, (scomplex*)B, &F77_ldb, (scomplex*)beta, (scomplex*)C, &F77_ldc); } else cblas_xerbla(1, "cblas_csyr2k", "Illegal Order setting, %d\n", Order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_csyrk.c000066400000000000000000000053461360743507500213640ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * * cblas_csyrk.c * This program is a C interface to csyrk. * Written by Keita Teranishi * 4/8/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_csyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K, const void *alpha, const void *A, f77_int lda, const void *beta, void *C, f77_int ldc) { char UL, TR; #ifdef F77_CHAR F77_CHAR F77_TR, F77_UL; #else #define F77_TR &TR #define F77_UL &UL #endif #ifdef F77_INT F77_INT F77_N=N, F77_K=K, F77_lda=lda; F77_INT F77_ldc=ldc; #else #define F77_N N #define F77_K K #define F77_lda lda #define F77_ldc ldc #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if( Order == CblasColMajor ) { if( Uplo == CblasUpper) UL='U'; else if ( Uplo == CblasLower ) UL='L'; else { cblas_xerbla(2, "cblas_csyrk", "Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Trans == CblasTrans) TR ='T'; else if ( Trans == CblasConjTrans ) TR='C'; else if ( Trans == CblasNoTrans ) TR='N'; else { cblas_xerbla(3, "cblas_csyrk", "Illegal Trans setting, %d\n", Trans); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TR = C2F_CHAR(&TR); #endif F77_csyrk(F77_UL, F77_TR, &F77_N, &F77_K, (scomplex*)alpha, (scomplex*)A, &F77_lda, (scomplex*)beta, (scomplex*)C, &F77_ldc); } else if (Order == CblasRowMajor) { RowMajorStrg = 1; if( Uplo == CblasUpper) UL='L'; else if ( Uplo == CblasLower ) UL='U'; else { cblas_xerbla(3, "cblas_csyrk", "Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Trans == CblasTrans) TR ='N'; else if ( Trans == CblasConjTrans ) TR='N'; else if ( Trans == CblasNoTrans ) TR='T'; else { cblas_xerbla(3, "cblas_csyrk", "Illegal Trans setting, %d\n", Trans); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TR = C2F_CHAR(&TR); #endif F77_csyrk(F77_UL, F77_TR, &F77_N, &F77_K, (scomplex*)alpha, (scomplex*)A, &F77_lda, (scomplex*)beta, (scomplex*)C, &F77_ldc); } else cblas_xerbla(1, "cblas_csyrk", "Illegal Order setting, %d\n", Order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_ctbmv.c000066400000000000000000000077001360743507500213400ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_ctbmv.c * The program is a C interface to ctbmv. * * Keita Teranishi 5/20/98 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_ctbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int N, f77_int K, const void *A, f77_int lda, void *X, f77_int incX) { char TA; char UL; char DI; #ifdef F77_CHAR F77_CHAR F77_TA, F77_UL, F77_DI; #else #define F77_TA &TA #define F77_UL &UL #define F77_DI &DI #endif #ifdef F77_INT F77_INT F77_N=N, F77_lda=lda, F77_K=K, F77_incX=incX; #else #define F77_N N #define F77_K K #define F77_lda lda #define F77_incX incX #endif int n, i=0, tincX; float *st=0, *x=(float *)X; extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (Uplo == CblasUpper) UL = 'U'; else if (Uplo == CblasLower) UL = 'L'; else { cblas_xerbla(2, "cblas_ctbmv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (TransA == CblasNoTrans) TA = 'N'; else if (TransA == CblasTrans) TA = 'T'; else if (TransA == CblasConjTrans) TA = 'C'; else { cblas_xerbla(3, "cblas_ctbmv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (Diag == CblasUnit) DI = 'U'; else if (Diag == CblasNonUnit) DI = 'N'; else { cblas_xerbla(4, "cblas_ctbmv","Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif F77_ctbmv( F77_UL, F77_TA, F77_DI, &F77_N, &F77_K, (scomplex*)A, &F77_lda, (scomplex*)X, &F77_incX); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (Uplo == CblasUpper) UL = 'L'; else if (Uplo == CblasLower) UL = 'U'; else { cblas_xerbla(2, "cblas_ctbmv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (TransA == CblasNoTrans) TA = 'T'; else if (TransA == CblasTrans) TA = 'N'; else if (TransA == CblasConjTrans) { TA = 'N'; if ( N > 0) { if(incX > 0) tincX = incX; else tincX = -incX; i = tincX << 1; n = i * N; x++; st = x + n; do { *x = -(*x); x+= i; } while (x != st); x -= n; } } else { cblas_xerbla(3, "cblas_ctbmv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (Diag == CblasUnit) DI = 'U'; else if (Diag == CblasNonUnit) DI = 'N'; else { cblas_xerbla(4, "cblas_ctbmv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif F77_ctbmv( F77_UL, F77_TA, F77_DI, &F77_N, &F77_K, (scomplex*)A, &F77_lda, (scomplex*)X, &F77_incX); if (TransA == CblasConjTrans) { if (N > 0) { do { *x = -(*x); x += i; } while (x != st); } } } else cblas_xerbla(1, "cblas_ctbmv", "Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_ctbsv.c000066400000000000000000000077111360743507500213500ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_ctbsv.c * The program is a C interface to ctbsv. * * Keita Teranishi 3/23/98 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_ctbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int N, f77_int K, const void *A, f77_int lda, void *X, f77_int incX) { char TA; char UL; char DI; #ifdef F77_CHAR F77_CHAR F77_TA, F77_UL, F77_DI; #else #define F77_TA &TA #define F77_UL &UL #define F77_DI &DI #endif #ifdef F77_INT F77_INT F77_N=N, F77_lda=lda, F77_K=K, F77_incX=incX; #else #define F77_N N #define F77_K K #define F77_lda lda #define F77_incX incX #endif int n, i=0, tincX; float *st=0,*x=(float *)X; extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (Uplo == CblasUpper) UL = 'U'; else if (Uplo == CblasLower) UL = 'L'; else { cblas_xerbla(2, "cblas_ctbsv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (TransA == CblasNoTrans) TA = 'N'; else if (TransA == CblasTrans) TA = 'T'; else if (TransA == CblasConjTrans) TA = 'C'; else { cblas_xerbla(3, "cblas_ctbsv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (Diag == CblasUnit) DI = 'U'; else if (Diag == CblasNonUnit) DI = 'N'; else { cblas_xerbla(4, "cblas_ctbsv","Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif F77_ctbsv( F77_UL, F77_TA, F77_DI, &F77_N, &F77_K, (scomplex*)A, &F77_lda, (scomplex*)X, &F77_incX); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (Uplo == CblasUpper) UL = 'L'; else if (Uplo == CblasLower) UL = 'U'; else { cblas_xerbla(2, "cblas_ctbsv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (TransA == CblasNoTrans) TA = 'T'; else if (TransA == CblasTrans) TA = 'N'; else if (TransA == CblasConjTrans) { TA = 'N'; if ( N > 0) { if ( incX > 0 ) tincX = incX; else tincX = -incX; n = N*2*(tincX); x++; st=x+n; i = tincX << 1; do { *x = -(*x); x+=i; } while (x != st); x -= n; } } else { cblas_xerbla(3, "cblas_ctbsv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (Diag == CblasUnit) DI = 'U'; else if (Diag == CblasNonUnit) DI = 'N'; else { cblas_xerbla(4, "cblas_ctbsv","Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif F77_ctbsv( F77_UL, F77_TA, F77_DI, &F77_N, &F77_K, (scomplex*)A, &F77_lda, (scomplex*)X, &F77_incX); if (TransA == CblasConjTrans) { if (N > 0) { do { *x = -(*x); x+= i; } while (x != st); } } } else cblas_xerbla(1, "cblas_ctbsv", "Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_ctpmv.c000066400000000000000000000074101360743507500213540ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_ctpmv.c * The program is a C interface to ctpmv. * * Keita Teranishi 5/20/98 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_ctpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int N, const void *Ap, void *X, f77_int incX) { char TA; char UL; char DI; #ifdef F77_CHAR F77_CHAR F77_TA, F77_UL, F77_DI; #else #define F77_TA &TA #define F77_UL &UL #define F77_DI &DI #endif #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX; #else #define F77_N N #define F77_incX incX #endif int n, i=0, tincX; float *st=0,*x=(float *)X; extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (Uplo == CblasUpper) UL = 'U'; else if (Uplo == CblasLower) UL = 'L'; else { cblas_xerbla(2, "cblas_ctpmv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (TransA == CblasNoTrans) TA = 'N'; else if (TransA == CblasTrans) TA = 'T'; else if (TransA == CblasConjTrans) TA = 'C'; else { cblas_xerbla(3, "cblas_ctpmv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (Diag == CblasUnit) DI = 'U'; else if (Diag == CblasNonUnit) DI = 'N'; else { cblas_xerbla(4, "cblas_ctpmv","Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif F77_ctpmv( F77_UL, F77_TA, F77_DI, &F77_N, (scomplex*)Ap, (scomplex*)X, &F77_incX); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (Uplo == CblasUpper) UL = 'L'; else if (Uplo == CblasLower) UL = 'U'; else { cblas_xerbla(2, "cblas_ctpmv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (TransA == CblasNoTrans) TA = 'T'; else if (TransA == CblasTrans) TA = 'N'; else if (TransA == CblasConjTrans) { TA = 'N'; if ( N > 0) { if(incX > 0) tincX = incX; else tincX = -incX; i = tincX << 1; n = i * N; x++; st = x + n; do { *x = -(*x); x += i; } while (x != st); x -= n; } } else { cblas_xerbla(3, "cblas_ctpmv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (Diag == CblasUnit) DI = 'U'; else if (Diag == CblasNonUnit) DI = 'N'; else { cblas_xerbla(4, "cblas_ctpmv","Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif F77_ctpmv( F77_UL, F77_TA, F77_DI, &F77_N, (scomplex*)Ap, (scomplex*)X,&F77_incX); if (TransA == CblasConjTrans) { if (N > 0) { do { *x = -(*x); x += i; } while (x != st); } } } else cblas_xerbla(1, "cblas_ctpmv", "Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_ctpsv.c000066400000000000000000000074221360743507500213650ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_ctpsv.c * The program is a C interface to ctpsv. * * Keita Teranishi 3/23/98 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_ctpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int N, const void *Ap, void *X, f77_int incX) { char TA; char UL; char DI; #ifdef F77_CHAR F77_CHAR F77_TA, F77_UL, F77_DI; #else #define F77_TA &TA #define F77_UL &UL #define F77_DI &DI #endif #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX; #else #define F77_N N #define F77_incX incX #endif int n, i=0, tincX; float *st=0, *x=(float*)X; extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (Uplo == CblasUpper) UL = 'U'; else if (Uplo == CblasLower) UL = 'L'; else { cblas_xerbla(2, "cblas_ctpsv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (TransA == CblasNoTrans) TA = 'N'; else if (TransA == CblasTrans) TA = 'T'; else if (TransA == CblasConjTrans) TA = 'C'; else { cblas_xerbla(3, "cblas_ctpsv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (Diag == CblasUnit) DI = 'U'; else if (Diag == CblasNonUnit) DI = 'N'; else { cblas_xerbla(4, "cblas_ctpsv","Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif F77_ctpsv( F77_UL, F77_TA, F77_DI, &F77_N, (scomplex*)Ap, (scomplex*)X, &F77_incX); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (Uplo == CblasUpper) UL = 'L'; else if (Uplo == CblasLower) UL = 'U'; else { cblas_xerbla(2, "cblas_ctpsv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (TransA == CblasNoTrans) TA = 'T'; else if (TransA == CblasTrans) TA = 'N'; else if (TransA == CblasConjTrans) { TA = 'N'; if ( N > 0) { if ( incX > 0 ) tincX = incX; else tincX = -incX; n = N*2*(tincX); x++; st=x+n; i = tincX << 1; do { *x = -(*x); x+=i; } while (x != st); x -= n; } } else { cblas_xerbla(3, "cblas_ctpsv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (Diag == CblasUnit) DI = 'U'; else if (Diag == CblasNonUnit) DI = 'N'; else { cblas_xerbla(4, "cblas_ctpsv","Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif F77_ctpsv( F77_UL, F77_TA, F77_DI, &F77_N, (scomplex*)Ap, (scomplex*)X,&F77_incX); if (TransA == CblasConjTrans) { if (N > 0) { do { *x = -(*x); x += i; } while (x != st); } } } else cblas_xerbla(1, "cblas_ctpsv", "Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_ctrmm.c000066400000000000000000000076221360743507500213520ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * * cblas_ctrmm.c * This program is a C interface to ctrmm. * Written by Keita Teranishi * 4/8/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_ctrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int M, f77_int N, const void *alpha, const void *A, f77_int lda, void *B, f77_int ldb) { char UL, TA, SD, DI; #ifdef F77_CHAR F77_CHAR F77_TA, F77_UL, F77_SD, F77_DI; #else #define F77_TA &TA #define F77_UL &UL #define F77_SD &SD #define F77_DI &DI #endif #ifdef F77_INT F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_ldb=ldb; #else #define F77_M M #define F77_N N #define F77_lda lda #define F77_ldb ldb #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if( Order == CblasColMajor ) { if( Side == CblasRight ) SD='R'; else if ( Side == CblasLeft ) SD='L'; else { cblas_xerbla(2, "cblas_ctrmm", "Illegal Side setting, %d\n", Side); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Uplo == CblasUpper ) UL='U'; else if ( Uplo == CblasLower ) UL='L'; else { cblas_xerbla(3, "cblas_ctrmm", "Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( TransA == CblasTrans ) TA ='T'; else if ( TransA == CblasConjTrans ) TA='C'; else if ( TransA == CblasNoTrans ) TA='N'; else { cblas_xerbla(4, "cblas_ctrmm", "Illegal Trans setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Diag == CblasUnit ) DI='U'; else if ( Diag == CblasNonUnit ) DI='N'; else cblas_xerbla(5, "cblas_ctrmm", "Illegal Diag setting, %d\n", Diag); #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_SD = C2F_CHAR(&SD); F77_DI = C2F_CHAR(&DI); #endif F77_ctrmm(F77_SD, F77_UL, F77_TA, F77_DI, &F77_M, &F77_N, (scomplex*)alpha, (scomplex*)A, &F77_lda, (scomplex*)B, &F77_ldb); } else if (Order == CblasRowMajor) { RowMajorStrg = 1; if( Side == CblasRight ) SD='L'; else if ( Side == CblasLeft ) SD='R'; else { cblas_xerbla(2, "cblas_ctrmm", "Illegal Side setting, %d\n", Side); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Uplo == CblasUpper ) UL='L'; else if ( Uplo == CblasLower ) UL='U'; else { cblas_xerbla(3, "cblas_ctrmm", "Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( TransA == CblasTrans ) TA ='T'; else if ( TransA == CblasConjTrans ) TA='C'; else if ( TransA == CblasNoTrans ) TA='N'; else { cblas_xerbla(4, "cblas_ctrmm", "Illegal Trans setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Diag == CblasUnit ) DI='U'; else if ( Diag == CblasNonUnit ) DI='N'; else { cblas_xerbla(5, "cblas_ctrmm", "Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_SD = C2F_CHAR(&SD); F77_DI = C2F_CHAR(&DI); #endif F77_ctrmm(F77_SD, F77_UL, F77_TA, F77_DI, &F77_N, &F77_M, (scomplex*)alpha, (scomplex*)A, &F77_lda, (scomplex*)B, &F77_ldb); } else cblas_xerbla(1, "cblas_ctrmm", "Illegal Order setting, %d\n", Order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_ctrmv.c000066400000000000000000000075751360743507500213720ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_ctrmv.c * The program is a C interface to ctrmv. * * Keita Teranishi 3/23/98 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_ctrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int N, const void *A, f77_int lda, void *X, f77_int incX) { char TA; char UL; char DI; #ifdef F77_CHAR F77_CHAR F77_TA, F77_UL, F77_DI; #else #define F77_TA &TA #define F77_UL &UL #define F77_DI &DI #endif #ifdef F77_INT F77_INT F77_N=N, F77_lda=lda, F77_incX=incX; #else #define F77_N N #define F77_lda lda #define F77_incX incX #endif int n, i=0, tincX; float *st=0,*x=(float *)X; extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (Uplo == CblasUpper) UL = 'U'; else if (Uplo == CblasLower) UL = 'L'; else { cblas_xerbla(2, "cblas_ctrmv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (TransA == CblasNoTrans) TA = 'N'; else if (TransA == CblasTrans) TA = 'T'; else if (TransA == CblasConjTrans) TA = 'C'; else { cblas_xerbla(3, "cblas_ctrmv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (Diag == CblasUnit) DI = 'U'; else if (Diag == CblasNonUnit) DI = 'N'; else { cblas_xerbla(4, "cblas_ctrmv","Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif F77_ctrmv( F77_UL, F77_TA, F77_DI, &F77_N, (scomplex*)A, &F77_lda, (scomplex*)X, &F77_incX); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (Uplo == CblasUpper) UL = 'L'; else if (Uplo == CblasLower) UL = 'U'; else { cblas_xerbla(2, "cblas_ctrmv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (TransA == CblasNoTrans) TA = 'T'; else if (TransA == CblasTrans) TA = 'N'; else if (TransA == CblasConjTrans) { TA = 'N'; if ( N > 0) { if(incX > 0) tincX = incX; else tincX = -incX; i = tincX << 1; n = i * N; st = x + n; do { x[1] = -x[1]; x+= i; } while (x != st); x -= n; } } else { cblas_xerbla(3, "cblas_ctrmv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (Diag == CblasUnit) DI = 'U'; else if (Diag == CblasNonUnit) DI = 'N'; else { cblas_xerbla(4, "cblas_ctrmv","Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif F77_ctrmv( F77_UL, F77_TA, F77_DI, &F77_N, (scomplex*)A, &F77_lda, (scomplex*)X, &F77_incX); if (TransA == CblasConjTrans) { if (N > 0) { do { x[1] = -x[1]; x += i; } while (x != st); } } } else cblas_xerbla(1, "cblas_ctrmv", "Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_ctrsm.c000066400000000000000000000077141360743507500213620ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * * cblas_ctrsm.c * This program is a C interface to ctrsm. * Written by Keita Teranishi * 4/8/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_ctrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int M, f77_int N, const void *alpha, const void *A, f77_int lda, void *B, f77_int ldb) { char UL, TA, SD, DI; #ifdef F77_CHAR F77_CHAR F77_TA, F77_UL, F77_SD, F77_DI; #else #define F77_TA &TA #define F77_UL &UL #define F77_SD &SD #define F77_DI &DI #endif #ifdef F77_INT F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_ldb=ldb; #else #define F77_M M #define F77_N N #define F77_lda lda #define F77_ldb ldb #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if( Order == CblasColMajor ) { if( Side == CblasRight) SD='R'; else if ( Side == CblasLeft ) SD='L'; else { cblas_xerbla(2, "cblas_ctrsm", "Illegal Side setting, %d\n", Side); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Uplo == CblasUpper) UL='U'; else if ( Uplo == CblasLower ) UL='L'; else { cblas_xerbla(3, "cblas_ctrsm", "Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( TransA == CblasTrans) TA ='T'; else if ( TransA == CblasConjTrans ) TA='C'; else if ( TransA == CblasNoTrans ) TA='N'; else { cblas_xerbla(4, "cblas_ctrsm", "Illegal Trans setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Diag == CblasUnit ) DI='U'; else if ( Diag == CblasNonUnit ) DI='N'; else { cblas_xerbla(5, "cblas_ctrsm", "Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_SD = C2F_CHAR(&SD); F77_DI = C2F_CHAR(&DI); #endif F77_ctrsm(F77_SD, F77_UL, F77_TA, F77_DI, &F77_M, &F77_N, alpha, A, &F77_lda, B, &F77_ldb); } else if (Order == CblasRowMajor) { RowMajorStrg = 1; if( Side == CblasRight) SD='L'; else if ( Side == CblasLeft ) SD='R'; else { cblas_xerbla(2, "cblas_ctrsm", "Illegal Side setting, %d\n", Side); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Uplo == CblasUpper) UL='L'; else if ( Uplo == CblasLower ) UL='U'; else { cblas_xerbla(3, "cblas_ctrsm", "Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( TransA == CblasTrans) TA ='T'; else if ( TransA == CblasConjTrans ) TA='C'; else if ( TransA == CblasNoTrans ) TA='N'; else { cblas_xerbla(4, "cblas_ctrsm", "Illegal Trans setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Diag == CblasUnit ) DI='U'; else if ( Diag == CblasNonUnit ) DI='N'; else { cblas_xerbla(5, "cblas_ctrsm", "Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_SD = C2F_CHAR(&SD); F77_DI = C2F_CHAR(&DI); #endif F77_ctrsm(F77_SD, F77_UL, F77_TA, F77_DI, &F77_N, &F77_M, (scomplex*)alpha, (scomplex*)A, &F77_lda, (scomplex*)B, &F77_ldb); } else cblas_xerbla(1, "cblas_ctrsm", "Illegal Order setting, %d\n", Order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_ctrsv.c000066400000000000000000000076151360743507500213730ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_ctrsv.c * The program is a C interface to ctrsv. * * Keita Teranishi 3/23/98 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_ctrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int N, const void *A, f77_int lda, void *X, f77_int incX) { char TA; char UL; char DI; #ifdef F77_CHAR F77_CHAR F77_TA, F77_UL, F77_DI; #else #define F77_TA &TA #define F77_UL &UL #define F77_DI &DI #endif #ifdef F77_INT F77_INT F77_N=N, F77_lda=lda, F77_incX=incX; #else #define F77_N N #define F77_lda lda #define F77_incX incX #endif int n, i=0, tincX; float *st=0,*x=(float *)X; extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (Uplo == CblasUpper) UL = 'U'; else if (Uplo == CblasLower) UL = 'L'; else { cblas_xerbla(2, "cblas_ctrsv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (TransA == CblasNoTrans) TA = 'N'; else if (TransA == CblasTrans) TA = 'T'; else if (TransA == CblasConjTrans) TA = 'C'; else { cblas_xerbla(3, "cblas_ctrsv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (Diag == CblasUnit) DI = 'U'; else if (Diag == CblasNonUnit) DI = 'N'; else { cblas_xerbla(4, "cblas_ctrsv","Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif F77_ctrsv( F77_UL, F77_TA, F77_DI, &F77_N, (scomplex*)A, &F77_lda, (scomplex*)X, &F77_incX); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (Uplo == CblasUpper) UL = 'L'; else if (Uplo == CblasLower) UL = 'U'; else { cblas_xerbla(2, "cblas_ctrsv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (TransA == CblasNoTrans) TA = 'T'; else if (TransA == CblasTrans) TA = 'N'; else if (TransA == CblasConjTrans) { TA = 'N'; if ( N > 0) { if ( incX > 0 ) tincX = incX; else tincX = -incX; n = N*2*(tincX); x++; st=x+n; i = tincX << 1; do { *x = -(*x); x+=i; } while (x != st); x -= n; } } else { cblas_xerbla(3, "cblas_ctrsv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (Diag == CblasUnit) DI = 'U'; else if (Diag == CblasNonUnit) DI = 'N'; else { cblas_xerbla(4, "cblas_ctrsv","Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif F77_ctrsv( F77_UL, F77_TA, F77_DI, &F77_N, (scomplex*)A, &F77_lda, (scomplex*)X, &F77_incX); if (TransA == CblasConjTrans) { if (N > 0) { do { *x = -(*x); x += i; } while (x != st); } } } else cblas_xerbla(1, "cblas_ctrsv", "Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_dasum.c000066400000000000000000000010061360743507500213270ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_dasum.c * * The program is a C interface to dasum. * It calls the fortran wrapper before calling dasum. * * Written by Keita Teranishi. 2/11/1998 * */ #include "cblas.h" #include "cblas_f77.h" double cblas_dasum( f77_int N, const double *X, f77_int incX) { double asum; #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX; #else #define F77_N N #define F77_incX incX #endif F77_dasum_sub( &F77_N, X, &F77_incX, &asum); return asum; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_daxpy.c000066400000000000000000000010371360743507500213470ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_daxpy.c * * The program is a C interface to daxpy. * * Written by Keita Teranishi. 2/11/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_daxpy( f77_int N, double alpha, const double *X, f77_int incX, double *Y, f77_int incY) { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; #else #define F77_N N #define F77_incX incX #define F77_incY incY #endif F77_daxpy( &F77_N, &alpha, X, &F77_incX, Y, &F77_incY); } #endif blis-0.6.1/frame/compat/cblas/src/cblas_dcopy.c000066400000000000000000000010071360743507500213350ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_dcopy.c * * The program is a C interface to dcopy. * * Written by Keita Teranishi. 2/11/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_dcopy( f77_int N, const double *X, f77_int incX, double *Y, f77_int incY) { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; #else #define F77_N N #define F77_incX incX #define F77_incY incY #endif F77_dcopy( &F77_N, X, &F77_incX, Y, &F77_incY); } #endif blis-0.6.1/frame/compat/cblas/src/cblas_ddot.c000066400000000000000000000011531360743507500211530ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_ddot.c * * The program is a C interface to ddot. * It calls the fortran wrapper before calling ddot. * * Written by Keita Teranishi. 2/11/1998 * */ #include "cblas.h" #include "cblas_f77.h" double cblas_ddot( f77_int N, const double *X, f77_int incX, const double *Y, f77_int incY) { double dot; #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; #else #define F77_N N #define F77_incX incX #define F77_incY incY #endif F77_ddot_sub( &F77_N, X, &F77_incX, Y, &F77_incY, &dot); return dot; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_dgbmv.c000066400000000000000000000043701360743507500213240ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * * cblas_dgbmv.c * This program is a C interface to dgbmv. * Written by Keita Teranishi * 4/6/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_dgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, f77_int M, f77_int N, f77_int KL, f77_int KU, double alpha, const double *A, f77_int lda, const double *X, f77_int incX, double beta, double *Y, f77_int incY) { char TA; #ifdef F77_CHAR F77_CHAR F77_TA; #else #define F77_TA &TA #endif #ifdef F77_INT F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY; F77_INT F77_KL=KL,F77_KU=KU; #else #define F77_M M #define F77_N N #define F77_lda lda #define F77_KL KL #define F77_KU KU #define F77_incX incX #define F77_incY incY #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (TransA == CblasNoTrans) TA = 'N'; else if (TransA == CblasTrans) TA = 'T'; else if (TransA == CblasConjTrans) TA = 'C'; else { cblas_xerbla(2, "cblas_dgbmv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_TA = C2F_CHAR(&TA); #endif F77_dgbmv(F77_TA, &F77_M, &F77_N, &F77_KL, &F77_KU, &alpha, A, &F77_lda, X, &F77_incX, &beta, Y, &F77_incY); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (TransA == CblasNoTrans) TA = 'T'; else if (TransA == CblasTrans) TA = 'N'; else if (TransA == CblasConjTrans) TA = 'N'; else { cblas_xerbla(2, "cblas_dgbmv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_TA = C2F_CHAR(&TA); #endif F77_dgbmv(F77_TA, &F77_N, &F77_M, &F77_KU, &F77_KL, &alpha, A ,&F77_lda, X,&F77_incX, &beta, Y, &F77_incY); } else cblas_xerbla(1, "cblas_dgbmv", "Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_dgemm.c000066400000000000000000000056551360743507500213250ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * * cblas_dgemm.c * This program is a C interface to dgemm. * Written by Keita Teranishi * 4/8/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_dgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, f77_int M, f77_int N, f77_int K, double alpha, const double *A, f77_int lda, const double *B, f77_int ldb, double beta, double *C, f77_int ldc) { char TA, TB; #ifdef F77_CHAR F77_CHAR F77_TA, F77_TB; #else #define F77_TA &TA #define F77_TB &TB #endif #ifdef F77_INT F77_INT F77_M=M, F77_N=N, F77_K=K, F77_lda=lda, F77_ldb=ldb; F77_INT F77_ldc=ldc; #else #define F77_M M #define F77_N N #define F77_K K #define F77_lda lda #define F77_ldb ldb #define F77_ldc ldc #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if( Order == CblasColMajor ) { if(TransA == CblasTrans) TA='T'; else if ( TransA == CblasConjTrans ) TA='C'; else if ( TransA == CblasNoTrans ) TA='N'; else { cblas_xerbla(2, "cblas_dgemm","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if(TransB == CblasTrans) TB='T'; else if ( TransB == CblasConjTrans ) TB='C'; else if ( TransB == CblasNoTrans ) TB='N'; else { cblas_xerbla(3, "cblas_dgemm","Illegal TransB setting, %d\n", TransB); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_TA = C2F_CHAR(&TA); F77_TB = C2F_CHAR(&TB); #endif F77_dgemm(F77_TA, F77_TB, &F77_M, &F77_N, &F77_K, &alpha, A, &F77_lda, B, &F77_ldb, &beta, C, &F77_ldc); } else if (Order == CblasRowMajor) { RowMajorStrg = 1; if(TransA == CblasTrans) TB='T'; else if ( TransA == CblasConjTrans ) TB='C'; else if ( TransA == CblasNoTrans ) TB='N'; else { cblas_xerbla(2, "cblas_dgemm","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if(TransB == CblasTrans) TA='T'; else if ( TransB == CblasConjTrans ) TA='C'; else if ( TransB == CblasNoTrans ) TA='N'; else { cblas_xerbla(2, "cblas_dgemm","Illegal TransB setting, %d\n", TransB); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_TA = C2F_CHAR(&TA); F77_TB = C2F_CHAR(&TB); #endif F77_dgemm(F77_TA, F77_TB, &F77_N, &F77_M, &F77_K, &alpha, B, &F77_ldb, A, &F77_lda, &beta, C, &F77_ldc); } else cblas_xerbla(1, "cblas_dgemm", "Illegal Order setting, %d\n", Order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_dgemv.c000066400000000000000000000041411360743507500213230ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * * cblas_dgemv.c * This program is a C interface to dgemv. * Written by Keita Teranishi * 4/6/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_dgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, f77_int M, f77_int N, double alpha, const double *A, f77_int lda, const double *X, f77_int incX, double beta, double *Y, f77_int incY) { char TA; #ifdef F77_CHAR F77_CHAR F77_TA; #else #define F77_TA &TA #endif #ifdef F77_INT F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY; #else #define F77_M M #define F77_N N #define F77_lda lda #define F77_incX incX #define F77_incY incY #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (TransA == CblasNoTrans) TA = 'N'; else if (TransA == CblasTrans) TA = 'T'; else if (TransA == CblasConjTrans) TA = 'C'; else { cblas_xerbla(2, "cblas_dgemv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_TA = C2F_CHAR(&TA); #endif F77_dgemv(F77_TA, &F77_M, &F77_N, &alpha, A, &F77_lda, X, &F77_incX, &beta, Y, &F77_incY); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (TransA == CblasNoTrans) TA = 'T'; else if (TransA == CblasTrans) TA = 'N'; else if (TransA == CblasConjTrans) TA = 'N'; else { cblas_xerbla(2, "cblas_dgemv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_TA = C2F_CHAR(&TA); #endif F77_dgemv(F77_TA, &F77_N, &F77_M, &alpha, A, &F77_lda, X, &F77_incX, &beta, Y, &F77_incY); } else cblas_xerbla(1, "cblas_dgemv", "Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_dger.c000066400000000000000000000022331360743507500211420ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * * cblas_dger.c * This program is a C interface to dger. * Written by Keita Teranishi * 4/6/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_dger(enum CBLAS_ORDER order, f77_int M, f77_int N, double alpha, const double *X, f77_int incX, const double *Y, f77_int incY, double *A, f77_int lda) { #ifdef F77_INT F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY; #else #define F77_M M #define F77_N N #define F77_incX incX #define F77_incY incY #define F77_lda lda #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { F77_dger( &F77_M, &F77_N, &alpha, X, &F77_incX, Y, &F77_incY, A, &F77_lda); } else if (order == CblasRowMajor) { RowMajorStrg = 1; F77_dger( &F77_N, &F77_M ,&alpha, Y, &F77_incY, X, &F77_incX, A, &F77_lda); } else cblas_xerbla(1, "cblas_dger", "Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_dnrm2.c000066400000000000000000000010051360743507500212370ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_dnrm2.c * * The program is a C interface to dnrm2. * It calls the fortranwrapper before calling dnrm2. * * Written by Keita Teranishi. 2/11/1998 * */ #include "cblas.h" #include "cblas_f77.h" double cblas_dnrm2( f77_int N, const double *X, f77_int incX) { double nrm2; #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX; #else #define F77_N N #define F77_incX incX #endif F77_dnrm2_sub( &F77_N, X, &F77_incX, &nrm2); return nrm2; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_drot.c000066400000000000000000000010351360743507500211700ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_drot.c * * The program is a C interface to drot. * * Written by Keita Teranishi. 2/11/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_drot(f77_int N, double *X, f77_int incX, double *Y, f77_int incY, const double c, const double s) { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; #else #define F77_N N #define F77_incX incX #define F77_incY incY #endif F77_drot(&F77_N, X, &F77_incX, Y, &F77_incY, &c, &s); return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_drotg.c000066400000000000000000000004571360743507500213460ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_drotg.c * * The program is a C interface to drotg. * * Written by Keita Teranishi. 2/11/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_drotg( double *a, double *b, double *c, double *s) { F77_drotg(a,b,c,s); } #endif blis-0.6.1/frame/compat/cblas/src/cblas_drotm.c000066400000000000000000000006441360743507500213520ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS #include "cblas.h" #include "cblas_f77.h" void cblas_drotm( f77_int N, double *X, f77_int incX, double *Y, f77_int incY, const double *P) { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; #else #define F77_N N #define F77_incX incX #define F77_incY incY #endif F77_drotm( &F77_N, X, &F77_incX, Y, &F77_incY, P); } #endif blis-0.6.1/frame/compat/cblas/src/cblas_drotmg.c000066400000000000000000000005421360743507500215160ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_drotmg.c * * The program is a C interface to drotmg. * * Written by Keita Teranishi. 2/11/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_drotmg( double *d1, double *d2, double *b1, const double b2, double *p) { F77_drotmg(d1,d2,b1,&b2,p); } #endif blis-0.6.1/frame/compat/cblas/src/cblas_dsbmv.c000066400000000000000000000037541360743507500213450ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * * cblas_dsbmv.c * This program is a C interface to dsbmv. * Written by Keita Teranishi * 4/6/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_dsbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, f77_int K, double alpha, const double *A, f77_int lda, const double *X, f77_int incX, double beta, double *Y, f77_int incY) { char UL; #ifdef F77_CHAR F77_CHAR F77_UL; #else #define F77_UL &UL #endif #ifdef F77_INT F77_INT F77_N=N, F77_K=K, F77_lda=lda, F77_incX=incX, F77_incY=incY; #else #define F77_N N #define F77_K K #define F77_lda lda #define F77_incX incX #define F77_incY incY #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (Uplo == CblasUpper) UL = 'U'; else if (Uplo == CblasLower) UL = 'L'; else { cblas_xerbla(2, "cblas_dsbmv","Illegal Uplo setting, %d\n",Uplo ); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif F77_dsbmv(F77_UL, &F77_N, &F77_K, &alpha, A, &F77_lda, X, &F77_incX, &beta, Y, &F77_incY); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (Uplo == CblasUpper) UL = 'L'; else if (Uplo == CblasLower) UL = 'U'; else { cblas_xerbla(2, "cblas_dsbmv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif F77_dsbmv(F77_UL, &F77_N, &F77_K, &alpha, A ,&F77_lda, X,&F77_incX, &beta, Y, &F77_incY); } else cblas_xerbla(1, "cblas_dsbmv", "Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_dscal.c000066400000000000000000000007121360743507500213070ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_dscal.c * * The program is a C interface to dscal. * * Written by Keita Teranishi. 2/11/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_dscal( f77_int N, double alpha, double *X, f77_int incX) { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX; #else #define F77_N N #define F77_incX incX #endif F77_dscal( &F77_N, &alpha, X, &F77_incX); } #endif blis-0.6.1/frame/compat/cblas/src/cblas_dsdot.c000066400000000000000000000011601360743507500213340ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_dsdot.c * * The program is a C interface to dsdot. * It calls fthe fortran wrapper before calling dsdot. * * Written by Keita Teranishi. 2/11/1998 * */ #include "cblas.h" #include "cblas_f77.h" double cblas_dsdot( f77_int N, const float *X, f77_int incX, const float *Y, f77_int incY) { double dot; #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; #else #define F77_N N #define F77_incX incX #define F77_incY incY #endif F77_dsdot_sub( &F77_N, X, &F77_incX, Y, &F77_incY, &dot); return dot; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_dspmv.c000066400000000000000000000035651360743507500213630ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * * cblas_dspmv.c * This program is a C interface to dspmv. * Written by Keita Teranishi * 4/6/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_dspmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, double alpha, const double *AP, const double *X, f77_int incX, double beta, double *Y, f77_int incY) { char UL; #ifdef F77_CHAR F77_CHAR F77_UL; #else #define F77_UL &UL #endif #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; #else #define F77_N N #define F77_incX incX #define F77_incY incY #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (Uplo == CblasUpper) UL = 'U'; else if (Uplo == CblasLower) UL = 'L'; else { cblas_xerbla(2, "cblas_dspmv","Illegal Uplo setting, %d\n",Uplo ); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif F77_dspmv(F77_UL, &F77_N, &alpha, AP, X, &F77_incX, &beta, Y, &F77_incY); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (Uplo == CblasUpper) UL = 'L'; else if (Uplo == CblasLower) UL = 'U'; else { cblas_xerbla(2, "cblas_dspmv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif F77_dspmv(F77_UL, &F77_N, &alpha, AP, X,&F77_incX, &beta, Y, &F77_incY); } else cblas_xerbla(1, "cblas_dspmv", "Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_dspr.c000066400000000000000000000032301360743507500211670ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * * cblas_dspr.c * This program is a C interface to dspr. * Written by Keita Teranishi * 4/6/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_dspr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, double alpha, const double *X, f77_int incX, double *Ap) { char UL; #ifdef F77_CHAR F77_CHAR F77_UL; #else #define F77_UL &UL #endif #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX; #else #define F77_N N #define F77_incX incX #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (Uplo == CblasLower) UL = 'L'; else if (Uplo == CblasUpper) UL = 'U'; else { cblas_xerbla(2, "cblas_dspr","Illegal Uplo setting, %d\n",Uplo ); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif F77_dspr(F77_UL, &F77_N, &alpha, X, &F77_incX, Ap); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (Uplo == CblasLower) UL = 'U'; else if (Uplo == CblasUpper) UL = 'L'; else { cblas_xerbla(2, "cblas_dspr","Illegal Uplo setting, %d\n",Uplo ); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif F77_dspr(F77_UL, &F77_N, &alpha, X, &F77_incX, Ap); } else cblas_xerbla(1, "cblas_dspr", "Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_dspr2.c000066400000000000000000000033741360743507500212620ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_dspr2.c * The program is a C interface to dspr2. * * Keita Teranishi 5/20/98 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_dspr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, const double alpha, const double *X, f77_int incX, const double *Y, f77_int incY, double *A) { char UL; #ifdef F77_CHAR F77_CHAR F77_UL; #else #define F77_UL &UL #endif #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; #else #define F77_N N #define F77_incX incX #define F77_incY incY #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (Uplo == CblasLower) UL = 'L'; else if (Uplo == CblasUpper) UL = 'U'; else { cblas_xerbla(2, "cblas_dspr2","Illegal Uplo setting, %d\n",Uplo ); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif F77_dspr2(F77_UL, &F77_N, &alpha, X, &F77_incX, Y, &F77_incY, A); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (Uplo == CblasLower) UL = 'U'; else if (Uplo == CblasUpper) UL = 'L'; else { cblas_xerbla(2, "cblas_dspr2","Illegal Uplo setting, %d\n",Uplo ); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif F77_dspr2(F77_UL, &F77_N, &alpha, X, &F77_incX, Y, &F77_incY, A); } else cblas_xerbla(1, "cblas_dspr2", "Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_dswap.c000066400000000000000000000010021360743507500213300ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_dswap.c * * The program is a C interface to dswap. * * Written by Keita Teranishi. 2/11/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_dswap( f77_int N, double *X, f77_int incX, double *Y, f77_int incY) { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; #else #define F77_N N #define F77_incX incX #define F77_incY incY #endif F77_dswap( &F77_N, X, &F77_incX, Y, &F77_incY); } #endif blis-0.6.1/frame/compat/cblas/src/cblas_dsymm.c000066400000000000000000000051761360743507500213630ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * * cblas_dsymm.c * This program is a C interface to dsymm. * Written by Keita Teranishi * 4/8/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_dsymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, f77_int M, f77_int N, double alpha, const double *A, f77_int lda, const double *B, f77_int ldb, double beta, double *C, f77_int ldc) { char SD, UL; #ifdef F77_CHAR F77_CHAR F77_SD, F77_UL; #else #define F77_SD &SD #define F77_UL &UL #endif #ifdef F77_INT F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_ldb=ldb; F77_INT F77_ldc=ldc; #else #define F77_M M #define F77_N N #define F77_lda lda #define F77_ldb ldb #define F77_ldc ldc #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if( Order == CblasColMajor ) { if( Side == CblasRight) SD='R'; else if ( Side == CblasLeft ) SD='L'; else { cblas_xerbla(2, "cblas_dsymm","Illegal Side setting, %d\n", Side); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Uplo == CblasUpper) UL='U'; else if ( Uplo == CblasLower ) UL='L'; else { cblas_xerbla(3, "cblas_dsymm","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_SD = C2F_CHAR(&SD); #endif F77_dsymm(F77_SD, F77_UL, &F77_M, &F77_N, &alpha, A, &F77_lda, B, &F77_ldb, &beta, C, &F77_ldc); } else if (Order == CblasRowMajor) { RowMajorStrg = 1; if( Side == CblasRight) SD='L'; else if ( Side == CblasLeft ) SD='R'; else { cblas_xerbla(2, "cblas_dsymm","Illegal Side setting, %d\n", Side); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Uplo == CblasUpper) UL='L'; else if ( Uplo == CblasLower ) UL='U'; else { cblas_xerbla(3, "cblas_dsymm","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_SD = C2F_CHAR(&SD); #endif F77_dsymm(F77_SD, F77_UL, &F77_N, &F77_M, &alpha, A, &F77_lda, B, &F77_ldb, &beta, C, &F77_ldc); } else cblas_xerbla(1, "cblas_dsymm","Illegal Order setting, %d\n", Order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_dsymv.c000066400000000000000000000036651360743507500213750ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * * cblas_dsymv.c * This program is a C interface to dsymv. * Written by Keita Teranishi * 4/6/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_dsymv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, double alpha, const double *A, f77_int lda, const double *X, f77_int incX, double beta, double *Y, f77_int incY) { char UL; #ifdef F77_CHAR F77_CHAR F77_UL; #else #define F77_UL &UL #endif #ifdef F77_INT F77_INT F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY; #else #define F77_N N #define F77_lda lda #define F77_incX incX #define F77_incY incY #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (Uplo == CblasUpper) UL = 'U'; else if (Uplo == CblasLower) UL = 'L'; else { cblas_xerbla(2, "cblas_dsymv","Illegal Uplo setting, %d\n",Uplo ); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif F77_dsymv(F77_UL, &F77_N, &alpha, A, &F77_lda, X, &F77_incX, &beta, Y, &F77_incY); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (Uplo == CblasUpper) UL = 'L'; else if (Uplo == CblasLower) UL = 'U'; else { cblas_xerbla(2, "cblas_dsymv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif F77_dsymv(F77_UL, &F77_N, &alpha, A ,&F77_lda, X,&F77_incX, &beta, Y, &F77_incY); } else cblas_xerbla(1, "cblas_dsymv", "Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_dsyr.c000066400000000000000000000033451360743507500212070ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * * cblas_dsyr.c * This program is a C interface to dsyr. * Written by Keita Teranishi * 4/6/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_dsyr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, const double alpha, const double *X, f77_int incX, double *A, f77_int lda) { char UL; #ifdef F77_CHAR F77_CHAR F77_UL; #else #define F77_UL &UL #endif #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_lda=lda; #else #define F77_N N #define F77_incX incX #define F77_lda lda #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (Uplo == CblasLower) UL = 'L'; else if (Uplo == CblasUpper) UL = 'U'; else { cblas_xerbla(2, "cblas_dsyr","Illegal Uplo setting, %d\n",Uplo ); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif F77_dsyr(F77_UL, &F77_N, &alpha, X, &F77_incX, A, &F77_lda); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (Uplo == CblasLower) UL = 'U'; else if (Uplo == CblasUpper) UL = 'L'; else { cblas_xerbla(2, "cblas_dsyr","Illegal Uplo setting, %d\n",Uplo ); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif F77_dsyr(F77_UL, &F77_N, &alpha, X, &F77_incX, A, &F77_lda); } else cblas_xerbla(1, "cblas_dsyr", "Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_dsyr2.c000066400000000000000000000036141360743507500212700ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * * cblas_dsyr2.c * This program is a C interface to dsyr2. * Written by Keita Teranishi * 4/6/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_dsyr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, const double alpha, const double *X, f77_int incX, const double *Y, f77_int incY, double *A, f77_int lda) { char UL; #ifdef F77_CHAR F77_CHAR F77_UL; #else #define F77_UL &UL #endif #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY, F77_lda=lda; #else #define F77_N N #define F77_incX incX #define F77_incY incY #define F77_lda lda #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (Uplo == CblasLower) UL = 'L'; else if (Uplo == CblasUpper) UL = 'U'; else { cblas_xerbla(2, "cblas_dsyr2","Illegal Uplo setting, %d\n",Uplo ); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif F77_dsyr2(F77_UL, &F77_N, &alpha, X, &F77_incX, Y, &F77_incY, A, &F77_lda); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (Uplo == CblasLower) UL = 'U'; else if (Uplo == CblasUpper) UL = 'L'; else { cblas_xerbla(2, "cblas_dsyr2","Illegal Uplo setting, %d\n",Uplo ); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif F77_dsyr2(F77_UL, &F77_N, &alpha, X, &F77_incX, Y, &F77_incY, A, &F77_lda); } else cblas_xerbla(1, "cblas_dsyr2", "Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_dsyr2k.c000066400000000000000000000054111360743507500214400ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * * cblas_dsyr2k.c * This program is a C interface to dsyr2k. * Written by Keita Teranishi * 4/6/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_dsyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K, double alpha, const double *A, f77_int lda, const double *B, f77_int ldb, double beta, double *C, f77_int ldc) { char UL, TR; #ifdef F77_CHAR F77_CHAR F77_TA, F77_UL; #else #define F77_TR &TR #define F77_UL &UL #endif #ifdef F77_INT F77_INT F77_N=N, F77_K=K, F77_lda=lda, F77_ldb=ldb; F77_INT F77_ldc=ldc; #else #define F77_N N #define F77_K K #define F77_lda lda #define F77_ldb ldb #define F77_ldc ldc #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if( Order == CblasColMajor ) { if( Uplo == CblasUpper) UL='U'; else if ( Uplo == CblasLower ) UL='L'; else { cblas_xerbla(2, "cblas_dsyr2k","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Trans == CblasTrans) TR ='T'; else if ( Trans == CblasConjTrans ) TR='C'; else if ( Trans == CblasNoTrans ) TR='N'; else { cblas_xerbla(3, "cblas_dsyr2k","Illegal Trans setting, %d\n", Trans); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TR = C2F_CHAR(&TR); #endif F77_dsyr2k(F77_UL, F77_TR, &F77_N, &F77_K, &alpha, A, &F77_lda, B, &F77_ldb, &beta, C, &F77_ldc); } else if (Order == CblasRowMajor) { RowMajorStrg = 1; if( Uplo == CblasUpper) UL='L'; else if ( Uplo == CblasLower ) UL='U'; else { cblas_xerbla(3, "cblas_dsyr2k","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Trans == CblasTrans) TR ='N'; else if ( Trans == CblasConjTrans ) TR='N'; else if ( Trans == CblasNoTrans ) TR='T'; else { cblas_xerbla(3, "cblas_dsyr2k","Illegal Trans setting, %d\n", Trans); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TR = C2F_CHAR(&TR); #endif F77_dsyr2k(F77_UL, F77_TR, &F77_N, &F77_K, &alpha, A, &F77_lda, B, &F77_ldb, &beta, C, &F77_ldc); } else cblas_xerbla(1, "cblas_dsyr2k","Illegal Order setting, %d\n", Order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_dsyrk.c000066400000000000000000000052141360743507500213570ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * * cblas_dsyrk.c * This program is a C interface to dsyrk. * Written by Keita Teranishi * 4/8/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_dsyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K, double alpha, const double *A, f77_int lda, double beta, double *C, f77_int ldc) { char UL, TR; #ifdef F77_CHAR F77_CHAR F77_TR, F77_UL; #else #define F77_TR &TR #define F77_UL &UL #endif #ifdef F77_INT F77_INT F77_N=N, F77_K=K, F77_lda=lda; F77_INT F77_ldc=ldc; #else #define F77_N N #define F77_K K #define F77_lda lda #define F77_ldc ldc #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if( Order == CblasColMajor ) { if( Uplo == CblasUpper) UL='U'; else if ( Uplo == CblasLower ) UL='L'; else { cblas_xerbla(2, "cblas_dsyrk","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Trans == CblasTrans) TR ='T'; else if ( Trans == CblasConjTrans ) TR='C'; else if ( Trans == CblasNoTrans ) TR='N'; else { cblas_xerbla(3, "cblas_dsyrk","Illegal Trans setting, %d\n", Trans); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TR = C2F_CHAR(&TR); #endif F77_dsyrk(F77_UL, F77_TR, &F77_N, &F77_K, &alpha, A, &F77_lda, &beta, C, &F77_ldc); } else if (Order == CblasRowMajor) { RowMajorStrg = 1; if( Uplo == CblasUpper) UL='L'; else if ( Uplo == CblasLower ) UL='U'; else { cblas_xerbla(3, "cblas_dsyrk","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Trans == CblasTrans) TR ='N'; else if ( Trans == CblasConjTrans ) TR='N'; else if ( Trans == CblasNoTrans ) TR='T'; else { cblas_xerbla(3, "cblas_dsyrk","Illegal Trans setting, %d\n", Trans); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TR = C2F_CHAR(&TR); #endif F77_dsyrk(F77_UL, F77_TR, &F77_N, &F77_K, &alpha, A, &F77_lda, &beta, C, &F77_ldc); } else cblas_xerbla(1, "cblas_dsyrk","Illegal Order setting, %d\n", Order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_dtbmv.c000066400000000000000000000063541360743507500213450ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_dtbmv.c * The program is a C interface to dtbmv. * * Keita Teranishi 5/20/98 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_dtbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int N, f77_int K, const double *A, f77_int lda, double *X, f77_int incX) { char TA; char UL; char DI; #ifdef F77_CHAR F77_CHAR F77_TA, F77_UL, F77_DI; #else #define F77_TA &TA #define F77_UL &UL #define F77_DI &DI #endif #ifdef F77_INT F77_INT F77_N=N, F77_lda=lda, F77_K=K, F77_incX=incX; #else #define F77_N N #define F77_K K #define F77_lda lda #define F77_incX incX #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (Uplo == CblasUpper) UL = 'U'; else if (Uplo == CblasLower) UL = 'L'; else { cblas_xerbla(2, "cblas_dtbmv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (TransA == CblasNoTrans) TA = 'N'; else if (TransA == CblasTrans) TA = 'T'; else if (TransA == CblasConjTrans) TA = 'C'; else { cblas_xerbla(3, "cblas_dtbmv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (Diag == CblasUnit) DI = 'U'; else if (Diag == CblasNonUnit) DI = 'N'; else { cblas_xerbla(4, "cblas_dtbmv","Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif F77_dtbmv( F77_UL, F77_TA, F77_DI, &F77_N, &F77_K, A, &F77_lda, X, &F77_incX); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (Uplo == CblasUpper) UL = 'L'; else if (Uplo == CblasLower) UL = 'U'; else { cblas_xerbla(2, "cblas_dtbmv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (TransA == CblasNoTrans) TA = 'T'; else if (TransA == CblasTrans) TA = 'N'; else if (TransA == CblasConjTrans) TA = 'N'; else { cblas_xerbla(3, "cblas_dtbmv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (Diag == CblasUnit) DI = 'U'; else if (Diag == CblasNonUnit) DI = 'N'; else { cblas_xerbla(4, "cblas_dtbmv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif F77_dtbmv( F77_UL, F77_TA, F77_DI, &F77_N, &F77_K, A, &F77_lda, X, &F77_incX); } else cblas_xerbla(1, "cblas_dtbmv", "Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_dtbsv.c000066400000000000000000000063661360743507500213560ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_dtbsv.c * The program is a C interface to dtbsv. * * Keita Teranishi 5/20/98 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_dtbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int N, f77_int K, const double *A, f77_int lda, double *X, f77_int incX) { char TA; char UL; char DI; #ifdef F77_CHAR F77_CHAR F77_TA, F77_UL, F77_DI; #else #define F77_TA &TA #define F77_UL &UL #define F77_DI &DI #endif #ifdef F77_INT F77_INT F77_N=N, F77_lda=lda, F77_K=K, F77_incX=incX; #else #define F77_N N #define F77_K K #define F77_lda lda #define F77_incX incX #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (Uplo == CblasUpper) UL = 'U'; else if (Uplo == CblasLower) UL = 'L'; else { cblas_xerbla(2, "cblas_dtbsv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (TransA == CblasNoTrans) TA = 'N'; else if (TransA == CblasTrans) TA = 'T'; else if (TransA == CblasConjTrans) TA = 'C'; else { cblas_xerbla(3, "cblas_dtbsv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (Diag == CblasUnit) DI = 'U'; else if (Diag == CblasNonUnit) DI = 'N'; else { cblas_xerbla(4, "cblas_dtbsv","Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif F77_dtbsv( F77_UL, F77_TA, F77_DI, &F77_N, &F77_K, A, &F77_lda, X, &F77_incX); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (Uplo == CblasUpper) UL = 'L'; else if (Uplo == CblasLower) UL = 'U'; else { cblas_xerbla(2, "cblas_dtbsv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (TransA == CblasNoTrans) TA = 'T'; else if (TransA == CblasTrans) TA = 'N'; else if (TransA == CblasConjTrans) TA = 'N'; else { cblas_xerbla(3, "cblas_dtbsv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (Diag == CblasUnit) DI = 'U'; else if (Diag == CblasNonUnit) DI = 'N'; else { cblas_xerbla(4, "cblas_dtbsv","Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif F77_dtbsv( F77_UL, F77_TA, F77_DI, &F77_N, &F77_K, A, &F77_lda, X, &F77_incX); } else cblas_xerbla(1, "cblas_dtbsv", "Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_dtpmv.c000066400000000000000000000060771360743507500213650ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_dtpmv.c * The program is a C interface to dtpmv. * * Keita Teranishi 5/20/98 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_dtpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int N, const double *Ap, double *X, f77_int incX) { char TA; char UL; char DI; #ifdef F77_CHAR F77_CHAR F77_TA, F77_UL, F77_DI; #else #define F77_TA &TA #define F77_UL &UL #define F77_DI &DI #endif #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX; #else #define F77_N N #define F77_incX incX #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (Uplo == CblasUpper) UL = 'U'; else if (Uplo == CblasLower) UL = 'L'; else { cblas_xerbla(2, "cblas_dtpmv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (TransA == CblasNoTrans) TA = 'N'; else if (TransA == CblasTrans) TA = 'T'; else if (TransA == CblasConjTrans) TA = 'C'; else { cblas_xerbla(3, "cblas_dtpmv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (Diag == CblasUnit) DI = 'U'; else if (Diag == CblasNonUnit) DI = 'N'; else { cblas_xerbla(4, "cblas_dtpmv","Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif F77_dtpmv( F77_UL, F77_TA, F77_DI, &F77_N, Ap, X, &F77_incX); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (Uplo == CblasUpper) UL = 'L'; else if (Uplo == CblasLower) UL = 'U'; else { cblas_xerbla(2, "cblas_dtpmv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (TransA == CblasNoTrans) TA = 'T'; else if (TransA == CblasTrans) TA = 'N'; else if (TransA == CblasConjTrans) TA = 'N'; else { cblas_xerbla(3, "cblas_dtpmv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (Diag == CblasUnit) DI = 'U'; else if (Diag == CblasNonUnit) DI = 'N'; else { cblas_xerbla(4, "cblas_dtpmv","Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif F77_dtpmv( F77_UL, F77_TA, F77_DI, &F77_N, Ap, X,&F77_incX); } else cblas_xerbla(1, "cblas_dtpmv", "Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_dtpsv.c000066400000000000000000000061001360743507500213560ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_dtpsv.c * The program is a C interface to dtpsv. * * Keita Teranishi 5/20/98 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_dtpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int N, const double *Ap, double *X, f77_int incX) { char TA; char UL; char DI; #ifdef F77_CHAR F77_CHAR F77_TA, F77_UL, F77_DI; #else #define F77_TA &TA #define F77_UL &UL #define F77_DI &DI #endif #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX; #else #define F77_N N #define F77_incX incX #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (Uplo == CblasUpper) UL = 'U'; else if (Uplo == CblasLower) UL = 'L'; else { cblas_xerbla(2, "cblas_dtpsv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (TransA == CblasNoTrans) TA = 'N'; else if (TransA == CblasTrans) TA = 'T'; else if (TransA == CblasConjTrans) TA = 'C'; else { cblas_xerbla(3, "cblas_dtpsv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (Diag == CblasUnit) DI = 'U'; else if (Diag == CblasNonUnit) DI = 'N'; else { cblas_xerbla(4, "cblas_dtpsv","Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif F77_dtpsv( F77_UL, F77_TA, F77_DI, &F77_N, Ap, X, &F77_incX); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (Uplo == CblasUpper) UL = 'L'; else if (Uplo == CblasLower) UL = 'U'; else { cblas_xerbla(2, "cblas_dtpsv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (TransA == CblasNoTrans) TA = 'T'; else if (TransA == CblasTrans) TA = 'N'; else if (TransA == CblasConjTrans) TA = 'N'; else { cblas_xerbla(3, "cblas_dtpsv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (Diag == CblasUnit) DI = 'U'; else if (Diag == CblasNonUnit) DI = 'N'; else { cblas_xerbla(4, "cblas_dtpsv","Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif F77_dtpsv( F77_UL, F77_TA, F77_DI, &F77_N, Ap, X,&F77_incX); } else cblas_xerbla(1, "cblas_dtpsv", "Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_dtrmm.c000066400000000000000000000076151360743507500213550ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * * cblas_dtrmm.c * This program is a C interface to dtrmm. * Written by Keita Teranishi * 4/6/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_dtrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int M, f77_int N, double alpha, const double *A, f77_int lda, double *B, f77_int ldb) { char UL, TA, SD, DI; #ifdef F77_CHAR F77_CHAR F77_TA, F77_UL, F77_SD, F77_DI; #else #define F77_TA &TA #define F77_UL &UL #define F77_SD &SD #define F77_DI &DI #endif #ifdef F77_INT F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_ldb=ldb; #else #define F77_M M #define F77_N N #define F77_lda lda #define F77_ldb ldb #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if( Order == CblasColMajor ) { if( Side == CblasRight) SD='R'; else if ( Side == CblasLeft ) SD='L'; else { cblas_xerbla(2, "cblas_dtrmm","Illegal Side setting, %d\n", Side); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Uplo == CblasUpper) UL='U'; else if ( Uplo == CblasLower ) UL='L'; else { cblas_xerbla(3, "cblas_dtrmm","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( TransA == CblasTrans) TA ='T'; else if ( TransA == CblasConjTrans ) TA='C'; else if ( TransA == CblasNoTrans ) TA='N'; else { cblas_xerbla(4, "cblas_dtrmm","Illegal Trans setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Diag == CblasUnit ) DI='U'; else if ( Diag == CblasNonUnit ) DI='N'; else { cblas_xerbla(5, "cblas_dtrmm","Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_SD = C2F_CHAR(&SD); F77_DI = C2F_CHAR(&DI); #endif F77_dtrmm(F77_SD, F77_UL, F77_TA, F77_DI, &F77_M, &F77_N, &alpha, A, &F77_lda, B, &F77_ldb); } else if (Order == CblasRowMajor) { RowMajorStrg = 1; if( Side == CblasRight) SD='L'; else if ( Side == CblasLeft ) SD='R'; else { cblas_xerbla(2, "cblas_dtrmm","Illegal Side setting, %d\n", Side); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Uplo == CblasUpper) UL='L'; else if ( Uplo == CblasLower ) UL='U'; else { cblas_xerbla(3, "cblas_dtrmm","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( TransA == CblasTrans) TA ='T'; else if ( TransA == CblasConjTrans ) TA='C'; else if ( TransA == CblasNoTrans ) TA='N'; else { cblas_xerbla(4, "cblas_dtrmm","Illegal Trans setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Diag == CblasUnit ) DI='U'; else if ( Diag == CblasNonUnit ) DI='N'; else { cblas_xerbla(5, "cblas_dtrmm","Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_SD = C2F_CHAR(&SD); F77_DI = C2F_CHAR(&DI); #endif F77_dtrmm(F77_SD, F77_UL, F77_TA, F77_DI, &F77_N, &F77_M, &alpha, A, &F77_lda, B, &F77_ldb); } else cblas_xerbla(1, "cblas_dtrmm", "Illegal Order setting, %d\n", Order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_dtrmv.c000066400000000000000000000063151360743507500213620ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * * cblas_dtrmv.c * This program is a C interface to sgemv. * Written by Keita Teranishi * 4/6/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_dtrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int N, const double *A, f77_int lda, double *X, f77_int incX) { char TA; char UL; char DI; #ifdef F77_CHAR F77_CHAR F77_TA, F77_UL, F77_DI; #else #define F77_TA &TA #define F77_UL &UL #define F77_DI &DI #endif #ifdef F77_INT F77_INT F77_N=N, F77_lda=lda, F77_incX=incX; #else #define F77_N N #define F77_lda lda #define F77_incX incX #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (Uplo == CblasUpper) UL = 'U'; else if (Uplo == CblasLower) UL = 'L'; else { cblas_xerbla(2, "cblas_dtrmv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (TransA == CblasNoTrans) TA = 'N'; else if (TransA == CblasTrans) TA = 'T'; else if (TransA == CblasConjTrans) TA = 'C'; else { cblas_xerbla(3, "cblas_dtrmv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (Diag == CblasUnit) DI = 'U'; else if (Diag == CblasNonUnit) DI = 'N'; else { cblas_xerbla(4, "cblas_dtrmv","Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif F77_dtrmv( F77_UL, F77_TA, F77_DI, &F77_N, A, &F77_lda, X, &F77_incX); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (Uplo == CblasUpper) UL = 'L'; else if (Uplo == CblasLower) UL = 'U'; else { cblas_xerbla(2, "cblas_dtrmv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (TransA == CblasNoTrans) TA = 'T'; else if (TransA == CblasTrans) TA = 'N'; else if (TransA == CblasConjTrans) TA = 'N'; else { cblas_xerbla(3, "cblas_dtrmv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (Diag == CblasUnit) DI = 'U'; else if (Diag == CblasNonUnit) DI = 'N'; else { cblas_xerbla(4, "cblas_dtrmv","Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif F77_dtrmv( F77_UL, F77_TA, F77_DI, &F77_N, A, &F77_lda, X, &F77_incX); } else cblas_xerbla(1, "cblas_dtrmv", "Illegal order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_dtrsm.c000066400000000000000000000077351360743507500213660ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * * cblas_dtrsm.c * This program is a C interface to dtrsm. * Written by Keita Teranishi * 4/6/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_dtrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int M, f77_int N, double alpha, const double *A, f77_int lda, double *B, f77_int ldb) { char UL, TA, SD, DI; #ifdef F77_CHAR F77_CHAR F77_TA, F77_UL, F77_SD, F77_DI; #else #define F77_TA &TA #define F77_UL &UL #define F77_SD &SD #define F77_DI &DI #endif #ifdef F77_INT F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_ldb=ldb; #else #define F77_M M #define F77_N N #define F77_lda lda #define F77_ldb ldb #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if( Order == CblasColMajor ) { if ( Side == CblasRight) SD='R'; else if ( Side == CblasLeft ) SD='L'; else { cblas_xerbla(2, "cblas_dtrsm","Illegal Side setting, %d\n", Side); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if ( Uplo == CblasUpper) UL='U'; else if ( Uplo == CblasLower) UL='L'; else { cblas_xerbla(3, "cblas_dtrsm","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if ( TransA == CblasTrans ) TA='T'; else if ( TransA == CblasConjTrans) TA='C'; else if ( TransA == CblasNoTrans ) TA='N'; else { cblas_xerbla(4, "cblas_dtrsm","Illegal Trans setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if ( Diag == CblasUnit ) DI='U'; else if ( Diag == CblasNonUnit) DI='N'; else { cblas_xerbla(5, "cblas_dtrsm","Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_SD = C2F_CHAR(&SD); F77_DI = C2F_CHAR(&DI); #endif F77_dtrsm(F77_SD, F77_UL, F77_TA, F77_DI, &F77_M, &F77_N, &alpha, A, &F77_lda, B, &F77_ldb); } else if (Order == CblasRowMajor) { RowMajorStrg = 1; if ( Side == CblasRight) SD='L'; else if ( Side == CblasLeft ) SD='R'; else { cblas_xerbla(2, "cblas_dtrsm","Illegal Side setting, %d\n", Side); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if ( Uplo == CblasUpper) UL='L'; else if ( Uplo == CblasLower) UL='U'; else { cblas_xerbla(3, "cblas_dtrsm","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if ( TransA == CblasTrans ) TA='T'; else if ( TransA == CblasConjTrans) TA='C'; else if ( TransA == CblasNoTrans ) TA='N'; else { cblas_xerbla(4, "cblas_dtrsm","Illegal Trans setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if ( Diag == CblasUnit ) DI='U'; else if ( Diag == CblasNonUnit) DI='N'; else { cblas_xerbla(5, "cblas_dtrsm","Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_SD = C2F_CHAR(&SD); F77_DI = C2F_CHAR(&DI); #endif F77_dtrsm(F77_SD, F77_UL, F77_TA, F77_DI, &F77_N, &F77_M, &alpha, A, &F77_lda, B, &F77_ldb); } else cblas_xerbla(1, "cblas_dtrsm","Illegal Order setting, %d\n", Order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_dtrsv.c000066400000000000000000000062771360743507500213770ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_dtrsv.c * The program is a C interface to dtrsv. * * Keita Teranishi 5/20/98 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_dtrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int N, const double *A, f77_int lda, double *X, f77_int incX) { char TA; char UL; char DI; #ifdef F77_CHAR F77_CHAR F77_TA, F77_UL, F77_DI; #else #define F77_TA &TA #define F77_UL &UL #define F77_DI &DI #endif #ifdef F77_INT F77_INT F77_N=N, F77_lda=lda, F77_incX=incX; #else #define F77_N N #define F77_lda lda #define F77_incX incX #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (Uplo == CblasUpper) UL = 'U'; else if (Uplo == CblasLower) UL = 'L'; else { cblas_xerbla(2, "cblas_dtrsv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (TransA == CblasNoTrans) TA = 'N'; else if (TransA == CblasTrans) TA = 'T'; else if (TransA == CblasConjTrans) TA = 'C'; else { cblas_xerbla(3, "cblas_dtrsv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (Diag == CblasUnit) DI = 'U'; else if (Diag == CblasNonUnit) DI = 'N'; else { cblas_xerbla(4, "cblas_dtrsv","Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif F77_dtrsv( F77_UL, F77_TA, F77_DI, &F77_N, A, &F77_lda, X, &F77_incX); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (Uplo == CblasUpper) UL = 'L'; else if (Uplo == CblasLower) UL = 'U'; else { cblas_xerbla(2, "cblas_dtrsv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (TransA == CblasNoTrans) TA = 'T'; else if (TransA == CblasTrans) TA = 'N'; else if (TransA == CblasConjTrans) TA = 'N'; else { cblas_xerbla(3, "cblas_dtrsv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (Diag == CblasUnit) DI = 'U'; else if (Diag == CblasNonUnit) DI = 'N'; else { cblas_xerbla(4, "cblas_dtrsv","Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif F77_dtrsv( F77_UL, F77_TA, F77_DI, &F77_N, A, &F77_lda, X, &F77_incX); } else cblas_xerbla(1, "cblas_dtrsv", "Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_dzasum.c000066400000000000000000000010111360743507500215150ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_dzasum.c * * The program is a C interface to dzasum. * It calls the fortran wrapper before calling dzasum. * * Written by Keita Teranishi. 2/11/1998 * */ #include "cblas.h" #include "cblas_f77.h" double cblas_dzasum( f77_int N, const void *X, f77_int incX) { double asum; #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX; #else #define F77_N N #define F77_incX incX #endif F77_dzasum_sub( &F77_N, X, &F77_incX, &asum); return asum; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_dznrm2.c000066400000000000000000000010111360743507500214260ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_dznrm2.c * * The program is a C interface to dznrm2. * It calls the fortran wrapper before calling dznrm2. * * Written by Keita Teranishi. 2/11/1998 * */ #include "cblas.h" #include "cblas_f77.h" double cblas_dznrm2( f77_int N, const void *X, f77_int incX) { double nrm2; #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX; #else #define F77_N N #define F77_incX incX #endif F77_dznrm2_sub( &F77_N, X, &F77_incX, &nrm2); return nrm2; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_f77.h000066400000000000000000000101701360743507500206300ustar00rootroot00000000000000/* * cblas_f77.h * Written by Keita Teranishi * * Updated by Jeff Horner * Merged cblas_f77.h and cblas_fortran_header.h * * (Heavily hacked down from the original) */ #ifndef CBLAS_F77_H #define CBLAS_F77_H /* * Level 1 BLAS */ #define F77_xerbla xerbla_ #define F77_srotg srotg_ #define F77_srotmg srotmg_ #define F77_srot srot_ #define F77_srotm srotm_ #define F77_drotg drotg_ #define F77_drotmg drotmg_ #define F77_drot drot_ #define F77_drotm drotm_ #define F77_sswap sswap_ #define F77_scopy scopy_ #define F77_saxpy saxpy_ #define F77_isamax_sub isamaxsub_ #define F77_dswap dswap_ #define F77_dcopy dcopy_ #define F77_daxpy daxpy_ #define F77_idamax_sub idamaxsub_ #define F77_cswap cswap_ #define F77_ccopy ccopy_ #define F77_caxpy caxpy_ #define F77_icamax_sub icamaxsub_ #define F77_zswap zswap_ #define F77_zcopy zcopy_ #define F77_zaxpy zaxpy_ #define F77_izamax_sub izamaxsub_ #define F77_sdot_sub sdotsub_ #define F77_ddot_sub ddotsub_ #define F77_dsdot_sub dsdotsub_ #define F77_sscal sscal_ #define F77_dscal dscal_ #define F77_cscal cscal_ #define F77_zscal zscal_ #define F77_csscal csscal_ #define F77_zdscal zdscal_ #define F77_cdotu_sub cdotusub_ #define F77_cdotc_sub cdotcsub_ #define F77_zdotu_sub zdotusub_ #define F77_zdotc_sub zdotcsub_ #define F77_snrm2_sub snrm2sub_ #define F77_sasum_sub sasumsub_ #define F77_dnrm2_sub dnrm2sub_ #define F77_dasum_sub dasumsub_ #define F77_scnrm2_sub scnrm2sub_ #define F77_scasum_sub scasumsub_ #define F77_dznrm2_sub dznrm2sub_ #define F77_dzasum_sub dzasumsub_ #define F77_sdsdot_sub sdsdotsub_ /* * Level 2 BLAS */ #define F77_ssymv ssymv_ #define F77_ssbmv ssbmv_ #define F77_sspmv sspmv_ #define F77_sger sger_ #define F77_ssyr ssyr_ #define F77_sspr sspr_ #define F77_ssyr2 ssyr2_ #define F77_sspr2 sspr2_ #define F77_dsymv dsymv_ #define F77_dsbmv dsbmv_ #define F77_dspmv dspmv_ #define F77_dger dger_ #define F77_dsyr dsyr_ #define F77_dspr dspr_ #define F77_dsyr2 dsyr2_ #define F77_dspr2 dspr2_ #define F77_chemv chemv_ #define F77_chbmv chbmv_ #define F77_chpmv chpmv_ #define F77_cgeru cgeru_ #define F77_cgerc cgerc_ #define F77_cher cher_ #define F77_chpr chpr_ #define F77_cher2 cher2_ #define F77_chpr2 chpr2_ #define F77_zhemv zhemv_ #define F77_zhbmv zhbmv_ #define F77_zhpmv zhpmv_ #define F77_zgeru zgeru_ #define F77_zgerc zgerc_ #define F77_zher zher_ #define F77_zhpr zhpr_ #define F77_zher2 zher2_ #define F77_zhpr2 zhpr2_ #define F77_sgemv sgemv_ #define F77_sgbmv sgbmv_ #define F77_strmv strmv_ #define F77_stbmv stbmv_ #define F77_stpmv stpmv_ #define F77_strsv strsv_ #define F77_stbsv stbsv_ #define F77_stpsv stpsv_ #define F77_dgemv dgemv_ #define F77_dgbmv dgbmv_ #define F77_dtrmv dtrmv_ #define F77_dtbmv dtbmv_ #define F77_dtpmv dtpmv_ #define F77_dtrsv dtrsv_ #define F77_dtbsv dtbsv_ #define F77_dtpsv dtpsv_ #define F77_cgemv cgemv_ #define F77_cgbmv cgbmv_ #define F77_ctrmv ctrmv_ #define F77_ctbmv ctbmv_ #define F77_ctpmv ctpmv_ #define F77_ctrsv ctrsv_ #define F77_ctbsv ctbsv_ #define F77_ctpsv ctpsv_ #define F77_zgemv zgemv_ #define F77_zgbmv zgbmv_ #define F77_ztrmv ztrmv_ #define F77_ztbmv ztbmv_ #define F77_ztpmv ztpmv_ #define F77_ztrsv ztrsv_ #define F77_ztbsv ztbsv_ #define F77_ztpsv ztpsv_ /* * Level 3 BLAS */ #define F77_chemm chemm_ #define F77_cherk cherk_ #define F77_cher2k cher2k_ #define F77_zhemm zhemm_ #define F77_zherk zherk_ #define F77_zher2k zher2k_ #define F77_sgemm sgemm_ #define F77_ssymm ssymm_ #define F77_ssyrk ssyrk_ #define F77_ssyr2k ssyr2k_ #define F77_strmm strmm_ #define F77_strsm strsm_ #define F77_dgemm dgemm_ #define F77_dsymm dsymm_ #define F77_dsyrk dsyrk_ #define F77_dsyr2k dsyr2k_ #define F77_dtrmm dtrmm_ #define F77_dtrsm dtrsm_ #define F77_cgemm cgemm_ #define F77_csymm csymm_ #define F77_csyrk csyrk_ #define F77_csyr2k csyr2k_ #define F77_ctrmm ctrmm_ #define F77_ctrsm ctrsm_ #define F77_zgemm zgemm_ #define F77_zsymm zsymm_ #define F77_zsyrk zsyrk_ #define F77_zsyr2k zsyr2k_ #define F77_ztrmm ztrmm_ #define F77_ztrsm ztrsm_ #endif /* CBLAS_F77_H */ blis-0.6.1/frame/compat/cblas/src/cblas_globals.c000066400000000000000000000001351360743507500216430ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS int CBLAS_CallFromC=0; int RowMajorStrg=0; #endif blis-0.6.1/frame/compat/cblas/src/cblas_icamax.c000066400000000000000000000010461360743507500214640ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_icamax.c * * The program is a C interface to icamax. * It calls the fortran wrapper before calling icamax. * * Written by Keita Teranishi. 2/11/1998 * */ #include "cblas.h" #include "cblas_f77.h" f77_int cblas_icamax( f77_int N, const void *X, f77_int incX) { f77_int iamax; #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX; #else #define F77_N N #define F77_incX incX #endif F77_icamax_sub( &F77_N, (scomplex*)X, &F77_incX, &iamax); return iamax ? iamax-1 : 0; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_idamax.c000066400000000000000000000010351360743507500214630ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_idamax.c * * The program is a C interface to idamax. * It calls the fortran wrapper before calling idamax. * * Written by Keita Teranishi. 2/11/1998 * */ #include "cblas.h" #include "cblas_f77.h" f77_int cblas_idamax( f77_int N, const double *X, f77_int incX) { f77_int iamax; #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX; #else #define F77_N N #define F77_incX incX #endif F77_idamax_sub( &F77_N, X, &F77_incX, &iamax); return iamax ? iamax-1 : 0; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_isamax.c000066400000000000000000000010341360743507500215010ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_isamax.c * * The program is a C interface to isamax. * It calls the fortran wrapper before calling isamax. * * Written by Keita Teranishi. 2/11/1998 * */ #include "cblas.h" #include "cblas_f77.h" f77_int cblas_isamax( f77_int N, const float *X, f77_int incX) { f77_int iamax; #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX; #else #define F77_N N #define F77_incX incX #endif F77_isamax_sub( &F77_N, X, &F77_incX, &iamax); return iamax ? iamax-1 : 0; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_izamax.c000066400000000000000000000010501360743507500215060ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_izamax.c * * The program is a C interface to izamax. * It calls the fortran wrapper before calling izamax. * * Written by Keita Teranishi. 2/11/1998 * */ #include "cblas.h" #include "cblas_f77.h" f77_int cblas_izamax( f77_int N, const void *X, f77_int incX) { f77_int iamax; #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX; #else #define F77_N N #define F77_incX incX #endif F77_izamax_sub( &F77_N, (dcomplex*)X, &F77_incX, &iamax); return (iamax ? iamax-1 : 0); } #endif blis-0.6.1/frame/compat/cblas/src/cblas_sasum.c000066400000000000000000000010031360743507500213430ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_sasum.c * * The program is a C interface to sasum. * It calls the fortran wrapper before calling sasum. * * Written by Keita Teranishi. 2/11/1998 * */ #include "cblas.h" #include "cblas_f77.h" float cblas_sasum( f77_int N, const float *X, f77_int incX) { float asum; #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX; #else #define F77_N N #define F77_incX incX #endif F77_sasum_sub( &F77_N, X, &F77_incX, &asum); return asum; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_saxpy.c000066400000000000000000000011221360743507500213610ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_saxpy.c * * The program is a C interface to saxpy. * It calls the fortran wrapper before calling saxpy. * * Written by Keita Teranishi. 2/11/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_saxpy( f77_int N, float alpha, const float *X, f77_int incX, float *Y, f77_int incY) { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; #else #define F77_N N #define F77_incX incX #define F77_incY incY #endif F77_saxpy( &F77_N, &alpha, X, &F77_incX, Y, &F77_incY); } #endif blis-0.6.1/frame/compat/cblas/src/cblas_scasum.c000066400000000000000000000010071360743507500215120ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_scasum.c * * The program is a C interface to scasum. * It calls the fortran wrapper before calling scasum. * * Written by Keita Teranishi. 2/11/1998 * */ #include "cblas.h" #include "cblas_f77.h" float cblas_scasum( f77_int N, const void *X, f77_int incX) { float asum; #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX; #else #define F77_N N #define F77_incX incX #endif F77_scasum_sub( &F77_N, X, &F77_incX, &asum); return asum; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_scnrm2.c000066400000000000000000000010071360743507500214230ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_scnrm2.c * * The program is a C interface to scnrm2. * It calls the fortran wrapper before calling scnrm2. * * Written by Keita Teranishi. 2/11/1998 * */ #include "cblas.h" #include "cblas_f77.h" float cblas_scnrm2( f77_int N, const void *X, f77_int incX) { float nrm2; #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX; #else #define F77_N N #define F77_incX incX #endif F77_scnrm2_sub( &F77_N, X, &F77_incX, &nrm2); return nrm2; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_scopy.c000066400000000000000000000010051360743507500213520ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_scopy.c * * The program is a C interface to scopy. * * Written by Keita Teranishi. 2/11/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_scopy( f77_int N, const float *X, f77_int incX, float *Y, f77_int incY) { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; #else #define F77_N N #define F77_incX incX #define F77_incY incY #endif F77_scopy( &F77_N, X, &F77_incX, Y, &F77_incY); } #endif blis-0.6.1/frame/compat/cblas/src/cblas_sdot.c000066400000000000000000000011471360743507500211750ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_sdot.c * * The program is a C interface to sdot. * It calls the fortran wrapper before calling sdot. * * Written by Keita Teranishi. 2/11/1998 * */ #include "cblas.h" #include "cblas_f77.h" float cblas_sdot( f77_int N, const float *X, f77_int incX, const float *Y, f77_int incY) { float dot; #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; #else #define F77_N N #define F77_incX incX #define F77_incY incY #endif F77_sdot_sub( &F77_N, X, &F77_incX, Y, &F77_incY, &dot); return dot; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_sdsdot.c000066400000000000000000000012061360743507500215200ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_sdsdot.c * * The program is a C interface to sdsdot. * It calls the fortran wrapper before calling sdsdot. * * Written by Keita Teranishi. 2/11/1998 * */ #include "cblas.h" #include "cblas_f77.h" float cblas_sdsdot( f77_int N, float alpha, const float *X, f77_int incX, const float *Y, f77_int incY) { float dot; #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; #else #define F77_N N #define F77_incX incX #define F77_incY incY #endif F77_sdsdot_sub( &F77_N, &alpha, X, &F77_incX, Y, &F77_incY, &dot); return dot; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_sgbmv.c000066400000000000000000000043771360743507500213520ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * * cblas_sgbmv.c * This program is a C interface to sgbmv. * Written by Keita Teranishi * 4/6/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_sgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, f77_int M, f77_int N, f77_int KL, f77_int KU, float alpha, const float *A, f77_int lda, const float *X, f77_int incX, float beta, float *Y, f77_int incY) { char TA; #ifdef F77_CHAR F77_CHAR F77_TA; #else #define F77_TA &TA #endif #ifdef F77_INT F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY; F77_INT F77_KL=KL,F77_KU=KU; #else #define F77_M M #define F77_N N #define F77_lda lda #define F77_KL KL #define F77_KU KU #define F77_incX incX #define F77_incY incY #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (TransA == CblasNoTrans) TA = 'N'; else if (TransA == CblasTrans) TA = 'T'; else if (TransA == CblasConjTrans) TA = 'C'; else { cblas_xerbla(2, "cblas_sgbmv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_TA = C2F_CHAR(&TA); #endif F77_sgbmv(F77_TA, &F77_M, &F77_N, &F77_KL, &F77_KU, &alpha, A, &F77_lda, X, &F77_incX, &beta, Y, &F77_incY); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (TransA == CblasNoTrans) TA = 'T'; else if (TransA == CblasTrans) TA = 'N'; else if (TransA == CblasConjTrans) TA = 'N'; else { cblas_xerbla(2, "cblas_sgbmv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_TA = C2F_CHAR(&TA); #endif F77_sgbmv(F77_TA, &F77_N, &F77_M, &F77_KU, &F77_KL, &alpha, A ,&F77_lda, X, &F77_incX, &beta, Y, &F77_incY); } else cblas_xerbla(1, "cblas_sgbmv", "Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_sgemm.c000066400000000000000000000060011360743507500213260ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * * cblas_sgemm.c * This program is a C interface to sgemm. * Written by Keita Teranishi * 4/8/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_sgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, f77_int M, f77_int N, f77_int K, float alpha, const float *A, f77_int lda, const float *B, f77_int ldb, float beta, float *C, f77_int ldc) { char TA, TB; #ifdef F77_CHAR F77_CHAR F77_TA, F77_TB; #else #define F77_TA &TA #define F77_TB &TB #endif #ifdef F77_INT F77_INT F77_M=M, F77_N=N, F77_K=K, F77_lda=lda, F77_ldb=ldb; F77_INT F77_ldc=ldc; #else #define F77_M M #define F77_N N #define F77_K K #define F77_lda lda #define F77_ldb ldb #define F77_ldc ldc #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if( Order == CblasColMajor ) { if(TransA == CblasTrans) TA='T'; else if ( TransA == CblasConjTrans ) TA='C'; else if ( TransA == CblasNoTrans ) TA='N'; else { cblas_xerbla(2, "cblas_sgemm", "Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if(TransB == CblasTrans) TB='T'; else if ( TransB == CblasConjTrans ) TB='C'; else if ( TransB == CblasNoTrans ) TB='N'; else { cblas_xerbla(3, "cblas_sgemm", "Illegal TransB setting, %d\n", TransB); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_TA = C2F_CHAR(&TA); F77_TB = C2F_CHAR(&TB); #endif F77_sgemm(F77_TA, F77_TB, &F77_M, &F77_N, &F77_K, &alpha, A, &F77_lda, B, &F77_ldb, &beta, C, &F77_ldc); } else if (Order == CblasRowMajor) { RowMajorStrg = 1; if(TransA == CblasTrans) TB='T'; else if ( TransA == CblasConjTrans ) TB='C'; else if ( TransA == CblasNoTrans ) TB='N'; else { cblas_xerbla(2, "cblas_sgemm", "Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if(TransB == CblasTrans) TA='T'; else if ( TransB == CblasConjTrans ) TA='C'; else if ( TransB == CblasNoTrans ) TA='N'; else { cblas_xerbla(2, "cblas_sgemm", "Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_TA = C2F_CHAR(&TA); F77_TB = C2F_CHAR(&TB); #endif F77_sgemm(F77_TA, F77_TB, &F77_N, &F77_M, &F77_K, &alpha, B, &F77_ldb, A, &F77_lda, &beta, C, &F77_ldc); } else cblas_xerbla(1, "cblas_sgemm", "Illegal Order setting, %d\n", Order); CBLAS_CallFromC = 0; RowMajorStrg = 0; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_sgemv.c000066400000000000000000000041161360743507500213440ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * * cblas_sgemv.c * This program is a C interface to sgemv. * Written by Keita Teranishi * 4/6/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_sgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, f77_int M, f77_int N, float alpha, const float *A, f77_int lda, const float *X, f77_int incX, float beta, float *Y, f77_int incY) { char TA; #ifdef F77_CHAR F77_CHAR F77_TA; #else #define F77_TA &TA #endif #ifdef F77_INT F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY; #else #define F77_M M #define F77_N N #define F77_lda lda #define F77_incX incX #define F77_incY incY #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (TransA == CblasNoTrans) TA = 'N'; else if (TransA == CblasTrans) TA = 'T'; else if (TransA == CblasConjTrans) TA = 'C'; else { cblas_xerbla(2, "cblas_sgemv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; } #ifdef F77_CHAR F77_TA = C2F_CHAR(&TA); #endif F77_sgemv(F77_TA, &F77_M, &F77_N, &alpha, A, &F77_lda, X, &F77_incX, &beta, Y, &F77_incY); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (TransA == CblasNoTrans) TA = 'T'; else if (TransA == CblasTrans) TA = 'N'; else if (TransA == CblasConjTrans) TA = 'N'; else { cblas_xerbla(2, "cblas_sgemv", "Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_TA = C2F_CHAR(&TA); #endif F77_sgemv(F77_TA, &F77_N, &F77_M, &alpha, A, &F77_lda, X, &F77_incX, &beta, Y, &F77_incY); } else cblas_xerbla(1, "cblas_sgemv", "Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_sger.c000066400000000000000000000021771360743507500211700ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * * cblas_sger.c * This program is a C interface to sger. * Written by Keita Teranishi * 4/6/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_sger(enum CBLAS_ORDER order, f77_int M, f77_int N, const float alpha, const float *X, f77_int incX, const float *Y, f77_int incY, float *A, f77_int lda) { #ifdef F77_INT F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY; #else #define F77_M M #define F77_N N #define F77_incX incX #define F77_incY incY #define F77_lda lda #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { F77_sger( &F77_M, &F77_N, &alpha, X, &F77_incX, Y, &F77_incY, A, &F77_lda); } else if (order == CblasRowMajor) { RowMajorStrg = 1; F77_sger( &F77_N, &F77_M, &alpha, Y, &F77_incY, X, &F77_incX, A, &F77_lda); } else cblas_xerbla(1, "cblas_sger", "Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_snrm2.c000066400000000000000000000010031360743507500212540ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_snrm2.c * * The program is a C interface to snrm2. * It calls the fortran wrapper before calling snrm2. * * Written by Keita Teranishi. 2/11/1998 * */ #include "cblas.h" #include "cblas_f77.h" float cblas_snrm2( f77_int N, const float *X, f77_int incX) { float nrm2; #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX; #else #define F77_N N #define F77_incX incX #endif F77_snrm2_sub( &F77_N, X, &F77_incX, &nrm2); return nrm2; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_srot.c000066400000000000000000000010441360743507500212070ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_srot.c * * The program is a C interface to srot. * * Written by Keita Teranishi. 2/11/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_srot( f77_int N, float *X, f77_int incX, float *Y, f77_int incY, const float c, const float s) { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; #else #define F77_N N #define F77_incX incX #define F77_incY incY #endif F77_srot(&F77_N, X, &F77_incX, Y, &F77_incY, &c, &s); } #endif blis-0.6.1/frame/compat/cblas/src/cblas_srotg.c000066400000000000000000000004531360743507500213610ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_srotg.c * * The program is a C interface to srotg. * * Written by Keita Teranishi. 2/11/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_srotg( float *a, float *b, float *c, float *s) { F77_srotg(a,b,c,s); } #endif blis-0.6.1/frame/compat/cblas/src/cblas_srotm.c000066400000000000000000000010261360743507500213640ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_srotm.c * * The program is a C interface to srotm. * * Written by Keita Teranishi. 2/11/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_srotm( f77_int N, float *X, f77_int incX, float *Y, f77_int incY, const float *P) { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; #else #define F77_N N #define F77_incX incX #define F77_incY incY #endif F77_srotm( &F77_N, X, &F77_incX, Y, &F77_incY, P); } #endif blis-0.6.1/frame/compat/cblas/src/cblas_srotmg.c000066400000000000000000000005351360743507500215370ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_srotmg.c * * The program is a C interface to srotmg. * * Written by Keita Teranishi. 2/11/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_srotmg( float *d1, float *d2, float *b1, const float b2, float *p) { F77_srotmg(d1,d2,b1,&b2,p); } #endif blis-0.6.1/frame/compat/cblas/src/cblas_ssbmv.c000066400000000000000000000036411360743507500213570ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * * cblas_ssbmv.c * This program is a C interface to ssbmv. * Written by Keita Teranishi * 4/6/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_ssbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, f77_int K, float alpha, const float *A, f77_int lda, const float *X, f77_int incX, float beta, float *Y, f77_int incY) { char UL; #ifdef F77_CHAR F77_CHAR F77_UL; #else #define F77_UL &UL #endif #ifdef F77_INT F77_INT F77_N=N, F77_K=K, F77_lda=lda, F77_incX=incX, F77_incY=incY; #else #define F77_N N #define F77_K K #define F77_lda lda #define F77_incX incX #define F77_incY incY #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (Uplo == CblasUpper) UL = 'U'; else if (Uplo == CblasLower) UL = 'L'; else { cblas_xerbla(2, "cblas_ssbmv","Illegal Uplo setting, %d\n",Uplo ); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif F77_ssbmv(F77_UL, &F77_N, &F77_K, &alpha, A, &F77_lda, X, &F77_incX, &beta, Y, &F77_incY); }else if (order == CblasRowMajor) { RowMajorStrg = 1; if (Uplo == CblasUpper) UL = 'L'; else if (Uplo == CblasLower) UL = 'U'; else { cblas_xerbla(2, "cblas_ssbmv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif F77_ssbmv(F77_UL, &F77_N, &F77_K, &alpha, A, &F77_lda, X, &F77_incX, &beta, Y, &F77_incY); } else cblas_xerbla(1, "cblas_ssbmv", "Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_sscal.c000066400000000000000000000007101360743507500213240ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_sscal.c * * The program is a C interface to sscal. * * Written by Keita Teranishi. 2/11/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_sscal( f77_int N, float alpha, float *X, f77_int incX) { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX; #else #define F77_N N #define F77_incX incX #endif F77_sscal( &F77_N, &alpha, X, &F77_incX); } #endif blis-0.6.1/frame/compat/cblas/src/cblas_sspmv.c000066400000000000000000000035421360743507500213750ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * * cblas_sspmv.c * This program is a C interface to sspmv. * Written by Keita Teranishi * 4/6/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_sspmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, float alpha, const float *AP, const float *X, f77_int incX, float beta, float *Y, f77_int incY) { char UL; #ifdef F77_CHAR F77_CHAR F77_UL; #else #define F77_UL &UL #endif #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; #else #define F77_N N #define F77_incX incX #define F77_incY incY #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (Uplo == CblasUpper) UL = 'U'; else if (Uplo == CblasLower) UL = 'L'; else { cblas_xerbla(2, "cblas_sspmv","Illegal Uplo setting, %d\n",Uplo ); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif F77_sspmv(F77_UL, &F77_N, &alpha, AP, X, &F77_incX, &beta, Y, &F77_incY); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (Uplo == CblasUpper) UL = 'L'; else if (Uplo == CblasLower) UL = 'U'; else { cblas_xerbla(2, "cblas_sspmv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif F77_sspmv(F77_UL, &F77_N, &alpha, AP, X,&F77_incX, &beta, Y, &F77_incY); } else cblas_xerbla(1, "cblas_sspmv", "Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_sspr.c000066400000000000000000000032361360743507500212140ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * * cblas_sspr.c * This program is a C interface to sspr. * Written by Keita Teranishi * 4/6/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_sspr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, const float alpha, const float *X, f77_int incX, float *Ap) { char UL; #ifdef F77_CHAR F77_CHAR F77_UL; #else #define F77_UL &UL #endif #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX; #else #define F77_N N #define F77_incX incX #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (Uplo == CblasLower) UL = 'L'; else if (Uplo == CblasUpper) UL = 'U'; else { cblas_xerbla(2, "cblas_sspr","Illegal Uplo setting, %d\n",Uplo ); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif F77_sspr(F77_UL, &F77_N, &alpha, X, &F77_incX, Ap); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (Uplo == CblasLower) UL = 'U'; else if (Uplo == CblasUpper) UL = 'L'; else { cblas_xerbla(2, "cblas_sspr","Illegal Uplo setting, %d\n",Uplo ); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif F77_sspr(F77_UL, &F77_N, &alpha, X, &F77_incX, Ap); } else cblas_xerbla(1, "cblas_sspr", "Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_sspr2.c000066400000000000000000000033751360743507500213020ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * * cblas_sspr2.c * This program is a C interface to sspr2. * Written by Keita Teranishi * 4/6/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_sspr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, const float alpha, const float *X, f77_int incX, const float *Y, f77_int incY, float *A) { char UL; #ifdef F77_CHAR F77_CHAR F77_UL; #else #define F77_UL &UL #endif #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; #else #define F77_N N #define F77_incX incX #define F77_incY incY #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (Uplo == CblasLower) UL = 'L'; else if (Uplo == CblasUpper) UL = 'U'; else { cblas_xerbla(2, "cblas_sspr2","Illegal Uplo setting, %d\n",Uplo ); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif F77_sspr2(F77_UL, &F77_N, &alpha, X, &F77_incX, Y, &F77_incY, A); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (Uplo == CblasLower) UL = 'U'; else if (Uplo == CblasUpper) UL = 'L'; else { cblas_xerbla(2, "cblas_sspr2","Illegal Uplo setting, %d\n",Uplo ); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif F77_sspr2(F77_UL, &F77_N, &alpha, X, &F77_incX, Y, &F77_incY, A); } else cblas_xerbla(1, "cblas_sspr2", "Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_sswap.c000066400000000000000000000010001360743507500213450ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_sswap.c * * The program is a C interface to sswap. * * Written by Keita Teranishi. 2/11/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_sswap( f77_int N, float *X, f77_int incX, float *Y, f77_int incY) { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; #else #define F77_N N #define F77_incX incX #define F77_incY incY #endif F77_sswap( &F77_N, X, &F77_incX, Y, &F77_incY); } #endif blis-0.6.1/frame/compat/cblas/src/cblas_ssymm.c000066400000000000000000000053101360743507500213700ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * * cblas_ssymm.c * This program is a C interface to ssymm. * Written by Keita Teranishi * 4/8/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_ssymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, f77_int M, f77_int N, float alpha, const float *A, f77_int lda, const float *B, f77_int ldb, float beta, float *C, f77_int ldc) { char SD, UL; #ifdef F77_CHAR F77_CHAR F77_SD, F77_UL; #else #define F77_SD &SD #define F77_UL &UL #endif #ifdef F77_INT F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_ldb=ldb; F77_INT F77_ldc=ldc; #else #define F77_M M #define F77_N N #define F77_lda lda #define F77_ldb ldb #define F77_ldc ldc #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if( Order == CblasColMajor ) { if( Side == CblasRight) SD='R'; else if ( Side == CblasLeft ) SD='L'; else { cblas_xerbla(2, "cblas_ssymm", "Illegal Side setting, %d\n", Side); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Uplo == CblasUpper) UL='U'; else if ( Uplo == CblasLower ) UL='L'; else { cblas_xerbla(3, "cblas_ssymm", "Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_SD = C2F_CHAR(&SD); #endif F77_ssymm(F77_SD, F77_UL, &F77_M, &F77_N, &alpha, A, &F77_lda, B, &F77_ldb, &beta, C, &F77_ldc); } else if (Order == CblasRowMajor) { RowMajorStrg = 1; if( Side == CblasRight) SD='L'; else if ( Side == CblasLeft ) SD='R'; else { cblas_xerbla(2, "cblas_ssymm", "Illegal Side setting, %d\n", Side); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Uplo == CblasUpper) UL='L'; else if ( Uplo == CblasLower ) UL='U'; else { cblas_xerbla(3, "cblas_ssymm", "Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_SD = C2F_CHAR(&SD); #endif F77_ssymm(F77_SD, F77_UL, &F77_N, &F77_M, &alpha, A, &F77_lda, B, &F77_ldb, &beta, C, &F77_ldc); } else cblas_xerbla(1, "cblas_ssymm", "Illegal Order setting, %d\n", Order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_ssymv.c000066400000000000000000000036601360743507500214070ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * * cblas_ssymv.c * This program is a C interface to ssymv. * Written by Keita Teranishi * 4/6/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_ssymv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, float alpha, const float *A, f77_int lda, const float *X, f77_int incX, float beta, float *Y, f77_int incY) { char UL; #ifdef F77_CHAR F77_CHAR F77_UL; #else #define F77_UL &UL #endif #ifdef F77_INT F77_INT F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY; #else #define F77_N N #define F77_lda lda #define F77_incX incX #define F77_incY incY #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (Uplo == CblasUpper) UL = 'U'; else if (Uplo == CblasLower) UL = 'L'; else { cblas_xerbla(2, "cblas_ssymv","Illegal Uplo setting, %d\n",Uplo ); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif F77_ssymv(F77_UL, &F77_N, &alpha, A, &F77_lda, X, &F77_incX, &beta, Y, &F77_incY); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (Uplo == CblasUpper) UL = 'L'; else if (Uplo == CblasLower) UL = 'U'; else { cblas_xerbla(2, "cblas_ssymv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif F77_ssymv(F77_UL, &F77_N, &alpha, A ,&F77_lda, X,&F77_incX, &beta, Y, &F77_incY); } else cblas_xerbla(1, "cblas_ssymv", "Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_ssyr.c000066400000000000000000000033411360743507500212220ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * * cblas_ssyr.c * This program is a C interface to ssyr. * Written by Keita Teranishi * 4/6/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_ssyr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, const float alpha, const float *X, f77_int incX, float *A, f77_int lda) { char UL; #ifdef F77_CHAR F77_CHAR F77_UL; #else #define F77_UL &UL #endif #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_lda=lda; #else #define F77_N N #define F77_incX incX #define F77_lda lda #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (Uplo == CblasLower) UL = 'L'; else if (Uplo == CblasUpper) UL = 'U'; else { cblas_xerbla(2, "cblas_ssyr","Illegal Uplo setting, %d\n",Uplo ); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif F77_ssyr(F77_UL, &F77_N, &alpha, X, &F77_incX, A, &F77_lda); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (Uplo == CblasLower) UL = 'U'; else if (Uplo == CblasUpper) UL = 'L'; else { cblas_xerbla(2, "cblas_ssyr","Illegal Uplo setting, %d\n",Uplo ); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif F77_ssyr(F77_UL, &F77_N, &alpha, X, &F77_incX, A, &F77_lda); } else cblas_xerbla(1, "cblas_ssyr", "Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_ssyr2.c000066400000000000000000000036101360743507500213030ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * * cblas_ssyr2.c * This program is a C interface to ssyr2. * Written by Keita Teranishi * 4/6/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_ssyr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, const float alpha, const float *X, f77_int incX, const float *Y, f77_int incY, float *A, f77_int lda) { char UL; #ifdef F77_CHAR F77_CHAR F77_UL; #else #define F77_UL &UL #endif #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY, F77_lda=lda; #else #define F77_N N #define F77_incX incX #define F77_incY incY #define F77_lda lda #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (Uplo == CblasLower) UL = 'L'; else if (Uplo == CblasUpper) UL = 'U'; else { cblas_xerbla(2, "cblas_ssyr2","Illegal Uplo setting, %d\n",Uplo ); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif F77_ssyr2(F77_UL, &F77_N, &alpha, X, &F77_incX, Y, &F77_incY, A, &F77_lda); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (Uplo == CblasLower) UL = 'U'; else if (Uplo == CblasUpper) UL = 'L'; else { cblas_xerbla(2, "cblas_ssyr2","Illegal Uplo setting, %d\n",Uplo ); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif F77_ssyr2(F77_UL, &F77_N, &alpha, X, &F77_incX, Y, &F77_incY, A, &F77_lda); } else cblas_xerbla(1, "cblas_ssyr2", "Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_ssyr2k.c000066400000000000000000000055241360743507500214640ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * * cblas_ssyr2k.c * This program is a C interface to ssyr2k. * Written by Keita Teranishi * 4/6/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_ssyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K, float alpha, const float *A, f77_int lda, const float *B, f77_int ldb, float beta, float *C, f77_int ldc) { char UL, TR; #ifdef F77_CHAR F77_CHAR F77_TA, F77_UL; #else #define F77_TR &TR #define F77_UL &UL #endif #ifdef F77_INT F77_INT F77_N=N, F77_K=K, F77_lda=lda, F77_ldb=ldb; F77_INT F77_ldc=ldc; #else #define F77_N N #define F77_K K #define F77_lda lda #define F77_ldb ldb #define F77_ldc ldc #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if( Order == CblasColMajor ) { if( Uplo == CblasUpper) UL='U'; else if ( Uplo == CblasLower ) UL='L'; else { cblas_xerbla(2, "cblas_ssyr2k", "Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Trans == CblasTrans) TR ='T'; else if ( Trans == CblasConjTrans ) TR='C'; else if ( Trans == CblasNoTrans ) TR='N'; else { cblas_xerbla(3, "cblas_ssyr2k", "Illegal Trans setting, %d\n", Trans); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TR = C2F_CHAR(&TR); #endif F77_ssyr2k(F77_UL, F77_TR, &F77_N, &F77_K, &alpha, A, &F77_lda, B, &F77_ldb, &beta, C, &F77_ldc); } else if (Order == CblasRowMajor) { RowMajorStrg = 1; if( Uplo == CblasUpper) UL='L'; else if ( Uplo == CblasLower ) UL='U'; else { cblas_xerbla(3, "cblas_ssyr2k", "Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Trans == CblasTrans) TR ='N'; else if ( Trans == CblasConjTrans ) TR='N'; else if ( Trans == CblasNoTrans ) TR='T'; else { cblas_xerbla(3, "cblas_ssyr2k", "Illegal Trans setting, %d\n", Trans); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TR = C2F_CHAR(&TR); #endif F77_ssyr2k(F77_UL, F77_TR, &F77_N, &F77_K, &alpha, A, &F77_lda, B, &F77_ldb, &beta, C, &F77_ldc); } else cblas_xerbla(1, "cblas_ssyr2k", "Illegal Order setting, %d\n", Order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_ssyrk.c000066400000000000000000000053321360743507500213770ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * * cblas_ssyrk.c * This program is a C interface to ssyrk. * Written by Keita Teranishi * 4/8/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_ssyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K, float alpha, const float *A, f77_int lda, float beta, float *C, f77_int ldc) { char UL, TR; #ifdef F77_CHAR F77_CHAR F77_TR, F77_UL; #else #define F77_TR &TR #define F77_UL &UL #endif #ifdef F77_INT F77_INT F77_N=N, F77_K=K, F77_lda=lda; F77_INT F77_ldc=ldc; #else #define F77_N N #define F77_K K #define F77_lda lda #define F77_ldc ldc #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if( Order == CblasColMajor ) { if( Uplo == CblasUpper) UL='U'; else if ( Uplo == CblasLower ) UL='L'; else { cblas_xerbla(2, "cblas_ssyrk", "Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Trans == CblasTrans) TR ='T'; else if ( Trans == CblasConjTrans ) TR='C'; else if ( Trans == CblasNoTrans ) TR='N'; else { cblas_xerbla(3, "cblas_ssyrk", "Illegal Trans setting, %d\n", Trans); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TR = C2F_CHAR(&TR); #endif F77_ssyrk(F77_UL, F77_TR, &F77_N, &F77_K, &alpha, A, &F77_lda, &beta, C, &F77_ldc); } else if (Order == CblasRowMajor) { RowMajorStrg = 1; if( Uplo == CblasUpper) UL='L'; else if ( Uplo == CblasLower ) UL='U'; else { cblas_xerbla(3, "cblas_ssyrk", "Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Trans == CblasTrans) TR ='N'; else if ( Trans == CblasConjTrans ) TR='N'; else if ( Trans == CblasNoTrans ) TR='T'; else { cblas_xerbla(3, "cblas_ssyrk", "Illegal Trans setting, %d\n", Trans); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TR = C2F_CHAR(&TR); #endif F77_ssyrk(F77_UL, F77_TR, &F77_N, &F77_K, &alpha, A, &F77_lda, &beta, C, &F77_ldc); } else cblas_xerbla(1, "cblas_ssyrk", "Illegal Order setting, %d\n", Order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_stbmv.c000066400000000000000000000064051360743507500213610ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_stbmv.c * This program is a C interface to stbmv. * Written by Keita Teranishi * 3/3/1998 */ #include "cblas.h" #include "cblas_f77.h" void cblas_stbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int N, f77_int K, const float *A, f77_int lda, float *X, f77_int incX) { char TA; char UL; char DI; #ifdef F77_CHAR F77_CHAR F77_TA, F77_UL, F77_DI; #else #define F77_TA &TA #define F77_UL &UL #define F77_DI &DI #endif #ifdef F77_INT F77_INT F77_N=N, F77_lda=lda, F77_K=K, F77_incX=incX; #else #define F77_N N #define F77_K K #define F77_lda lda #define F77_incX incX #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (Uplo == CblasUpper) UL = 'U'; else if (Uplo == CblasLower) UL = 'L'; else { cblas_xerbla(2, "cblas_stbmv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (TransA == CblasNoTrans) TA = 'N'; else if (TransA == CblasTrans) TA = 'T'; else if (TransA == CblasConjTrans) TA = 'C'; else { cblas_xerbla(3, "cblas_stbmv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (Diag == CblasUnit) DI = 'U'; else if (Diag == CblasNonUnit) DI = 'N'; else { cblas_xerbla(4, "cblas_stbmv","Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif F77_stbmv( F77_UL, F77_TA, F77_DI, &F77_N, &F77_K, A, &F77_lda, X, &F77_incX); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (Uplo == CblasUpper) UL = 'L'; else if (Uplo == CblasLower) UL = 'U'; else { cblas_xerbla(2, "cblas_stbmv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (TransA == CblasNoTrans) TA = 'T'; else if (TransA == CblasTrans) TA = 'N'; else if (TransA == CblasConjTrans) TA = 'N'; else { cblas_xerbla(3, "cblas_stbmv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (Diag == CblasUnit) DI = 'U'; else if (Diag == CblasNonUnit) DI = 'N'; else { cblas_xerbla(4, "cblas_stbmv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif F77_stbmv( F77_UL, F77_TA, F77_DI, &F77_N, &F77_K, A, &F77_lda, X, &F77_incX); } else cblas_xerbla(1, "cblas_stbmv", "Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_stbsv.c000066400000000000000000000063651360743507500213740ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_stbsv.c * The program is a C interface to stbsv. * * Keita Teranishi 5/20/98 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_stbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int N, f77_int K, const float *A, f77_int lda, float *X, f77_int incX) { char TA; char UL; char DI; #ifdef F77_CHAR F77_CHAR F77_TA, F77_UL, F77_DI; #else #define F77_TA &TA #define F77_UL &UL #define F77_DI &DI #endif #ifdef F77_INT F77_INT F77_N=N, F77_lda=lda, F77_K=K, F77_incX=incX; #else #define F77_N N #define F77_K K #define F77_lda lda #define F77_incX incX #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (Uplo == CblasUpper) UL = 'U'; else if (Uplo == CblasLower) UL = 'L'; else { cblas_xerbla(2, "cblas_stbsv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (TransA == CblasNoTrans) TA = 'N'; else if (TransA == CblasTrans) TA = 'T'; else if (TransA == CblasConjTrans) TA = 'C'; else { cblas_xerbla(3, "cblas_stbsv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (Diag == CblasUnit) DI = 'U'; else if (Diag == CblasNonUnit) DI = 'N'; else { cblas_xerbla(4, "cblas_stbsv","Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif F77_stbsv( F77_UL, F77_TA, F77_DI, &F77_N, &F77_K, A, &F77_lda, X, &F77_incX); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (Uplo == CblasUpper) UL = 'L'; else if (Uplo == CblasLower) UL = 'U'; else { cblas_xerbla(2, "cblas_stbsv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (TransA == CblasNoTrans) TA = 'T'; else if (TransA == CblasTrans) TA = 'N'; else if (TransA == CblasConjTrans) TA = 'N'; else { cblas_xerbla(3, "cblas_stbsv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (Diag == CblasUnit) DI = 'U'; else if (Diag == CblasNonUnit) DI = 'N'; else { cblas_xerbla(4, "cblas_stbsv","Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif F77_stbsv( F77_UL, F77_TA, F77_DI, &F77_N, &F77_K, A, &F77_lda, X, &F77_incX); } else cblas_xerbla(1, "cblas_stbsv", "Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_stpmv.c000066400000000000000000000061141360743507500213740ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * * cblas_stpmv.c * This program is a C interface to stpmv. * Written by Keita Teranishi * 4/6/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_stpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int N, const float *Ap, float *X, f77_int incX) { char TA; char UL; char DI; #ifdef F77_CHAR F77_CHAR F77_TA, F77_UL, F77_DI; #else #define F77_TA &TA #define F77_UL &UL #define F77_DI &DI #endif #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX; #else #define F77_N N #define F77_incX incX #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (Uplo == CblasUpper) UL = 'U'; else if (Uplo == CblasLower) UL = 'L'; else { cblas_xerbla(2, "cblas_stpmv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (TransA == CblasNoTrans) TA = 'N'; else if (TransA == CblasTrans) TA = 'T'; else if (TransA == CblasConjTrans) TA = 'C'; else { cblas_xerbla(3, "cblas_stpmv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (Diag == CblasUnit) DI = 'U'; else if (Diag == CblasNonUnit) DI = 'N'; else { cblas_xerbla(4, "cblas_stpmv","Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif F77_stpmv( F77_UL, F77_TA, F77_DI, &F77_N, Ap, X, &F77_incX); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (Uplo == CblasUpper) UL = 'L'; else if (Uplo == CblasLower) UL = 'U'; else { cblas_xerbla(2, "cblas_stpmv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (TransA == CblasNoTrans) TA = 'T'; else if (TransA == CblasTrans) TA = 'N'; else if (TransA == CblasConjTrans) TA = 'N'; else { cblas_xerbla(3, "cblas_stpmv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (Diag == CblasUnit) DI = 'U'; else if (Diag == CblasNonUnit) DI = 'N'; else { cblas_xerbla(4, "cblas_stpmv","Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif F77_stpmv( F77_UL, F77_TA, F77_DI, &F77_N, Ap, X,&F77_incX); } else cblas_xerbla(1, "cblas_stpmv", "Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_stpsv.c000066400000000000000000000060761360743507500214110ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_stpsv.c * The program is a C interface to stpsv. * * Keita Teranishi 5/20/98 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_stpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int N, const float *Ap, float *X, f77_int incX) { char TA; char UL; char DI; #ifdef F77_CHAR F77_CHAR F77_TA, F77_UL, F77_DI; #else #define F77_TA &TA #define F77_UL &UL #define F77_DI &DI #endif #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX; #else #define F77_N N #define F77_incX incX #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (Uplo == CblasUpper) UL = 'U'; else if (Uplo == CblasLower) UL = 'L'; else { cblas_xerbla(2, "cblas_stpsv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (TransA == CblasNoTrans) TA = 'N'; else if (TransA == CblasTrans) TA = 'T'; else if (TransA == CblasConjTrans) TA = 'C'; else { cblas_xerbla(3, "cblas_stpsv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (Diag == CblasUnit) DI = 'U'; else if (Diag == CblasNonUnit) DI = 'N'; else { cblas_xerbla(4, "cblas_stpsv","Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif F77_stpsv( F77_UL, F77_TA, F77_DI, &F77_N, Ap, X, &F77_incX); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (Uplo == CblasUpper) UL = 'L'; else if (Uplo == CblasLower) UL = 'U'; else { cblas_xerbla(2, "cblas_stpsv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (TransA == CblasNoTrans) TA = 'T'; else if (TransA == CblasTrans) TA = 'N'; else if (TransA == CblasConjTrans) TA = 'N'; else { cblas_xerbla(3, "cblas_stpsv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (Diag == CblasUnit) DI = 'U'; else if (Diag == CblasNonUnit) DI = 'N'; else { cblas_xerbla(4, "cblas_stpsv","Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif F77_stpsv( F77_UL, F77_TA, F77_DI, &F77_N, Ap, X,&F77_incX); } else cblas_xerbla(1, "cblas_stpsv", "Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_strmm.c000066400000000000000000000075611360743507500213740ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * * cblas_strmm.c * This program is a C interface to strmm. * Written by Keita Teranishi * 4/6/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_strmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int M, f77_int N, float alpha, const float *A, f77_int lda, float *B, f77_int ldb) { char UL, TA, SD, DI; #ifdef F77_CHAR F77_CHAR F77_TA, F77_UL, F77_SD, F77_DI; #else #define F77_TA &TA #define F77_UL &UL #define F77_SD &SD #define F77_DI &DI #endif #ifdef F77_INT F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_ldb=ldb; #else #define F77_M M #define F77_N N #define F77_lda lda #define F77_ldb ldb #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if( Order == CblasColMajor ) { if( Side == CblasRight) SD='R'; else if ( Side == CblasLeft ) SD='L'; else { cblas_xerbla(2, "cblas_strmm","Illegal Side setting, %d\n", Side); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Uplo == CblasUpper) UL='U'; else if ( Uplo == CblasLower ) UL='L'; else { cblas_xerbla(3, "cblas_strmm","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( TransA == CblasTrans) TA ='T'; else if ( TransA == CblasConjTrans ) TA='C'; else if ( TransA == CblasNoTrans ) TA='N'; else { cblas_xerbla(4, "cblas_strmm","Illegal Trans setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Diag == CblasUnit ) DI='U'; else if ( Diag == CblasNonUnit ) DI='N'; else { cblas_xerbla(5, "cblas_strmm", "Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_SD = C2F_CHAR(&SD); F77_DI = C2F_CHAR(&DI); #endif F77_strmm(F77_SD, F77_UL, F77_TA, F77_DI, &F77_M, &F77_N, &alpha, A, &F77_lda, B, &F77_ldb); } else if (Order == CblasRowMajor) { RowMajorStrg = 1; if( Side == CblasRight) SD='L'; else if ( Side == CblasLeft ) SD='R'; else { cblas_xerbla(2, "cblas_strmm","Illegal Side setting, %d\n", Side); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Uplo == CblasUpper) UL='L'; else if ( Uplo == CblasLower ) UL='U'; else { cblas_xerbla(3, "cblas_strmm", "Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( TransA == CblasTrans) TA ='T'; else if ( TransA == CblasConjTrans ) TA='C'; else if ( TransA == CblasNoTrans ) TA='N'; else { cblas_xerbla(4, "cblas_strmm", "Illegal Trans setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Diag == CblasUnit ) DI='U'; else if ( Diag == CblasNonUnit ) DI='N'; else { cblas_xerbla(5, "cblas_strmm","Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_SD = C2F_CHAR(&SD); F77_DI = C2F_CHAR(&DI); #endif F77_strmm(F77_SD, F77_UL, F77_TA, F77_DI, &F77_N, &F77_M, &alpha, A, &F77_lda, B, &F77_ldb); } else cblas_xerbla(1, "cblas_strmm", "Illegal Order setting, %d\n", Order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_strmv.c000066400000000000000000000063141360743507500214000ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * * cblas_strmv.c * This program is a C interface to strmv. * Written by Keita Teranishi * 4/6/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_strmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int N, const float *A, f77_int lda, float *X, f77_int incX) { char TA; char UL; char DI; #ifdef F77_CHAR F77_CHAR F77_TA, F77_UL, F77_DI; #else #define F77_TA &TA #define F77_UL &UL #define F77_DI &DI #endif #ifdef F77_INT F77_INT F77_N=N, F77_lda=lda, F77_incX=incX; #else #define F77_N N #define F77_lda lda #define F77_incX incX #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (Uplo == CblasUpper) UL = 'U'; else if (Uplo == CblasLower) UL = 'L'; else { cblas_xerbla(2, "cblas_strmv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (TransA == CblasNoTrans) TA = 'N'; else if (TransA == CblasTrans) TA = 'T'; else if (TransA == CblasConjTrans) TA = 'C'; else { cblas_xerbla(3, "cblas_strmv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (Diag == CblasUnit) DI = 'U'; else if (Diag == CblasNonUnit) DI = 'N'; else { cblas_xerbla(4, "cblas_strmv","Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif F77_strmv( F77_UL, F77_TA, F77_DI, &F77_N, A, &F77_lda, X, &F77_incX); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (Uplo == CblasUpper) UL = 'L'; else if (Uplo == CblasLower) UL = 'U'; else { cblas_xerbla(2, "cblas_strmv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (TransA == CblasNoTrans) TA = 'T'; else if (TransA == CblasTrans) TA = 'N'; else if (TransA == CblasConjTrans) TA = 'N'; else { cblas_xerbla(3, "cblas_strmv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (Diag == CblasUnit) DI = 'U'; else if (Diag == CblasNonUnit) DI = 'N'; else { cblas_xerbla(4, "cblas_strmv","Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif F77_strmv( F77_UL, F77_TA, F77_DI, &F77_N, A, &F77_lda, X, &F77_incX); } else cblas_xerbla(1, "cblas_strmv", "Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_strsm.c000066400000000000000000000076071360743507500214030ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * * cblas_strsm.c * This program is a C interface to strsm. * Written by Keita Teranishi * 4/6/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_strsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int M, f77_int N, float alpha, const float *A, f77_int lda, float *B, f77_int ldb) { char UL, TA, SD, DI; #ifdef F77_CHAR F77_CHAR F77_TA, F77_UL, F77_SD, F77_DI; #else #define F77_TA &TA #define F77_UL &UL #define F77_SD &SD #define F77_DI &DI #endif #ifdef F77_INT F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_ldb=ldb; #else #define F77_M M #define F77_N N #define F77_lda lda #define F77_ldb ldb #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if( Order == CblasColMajor ) { if( Side == CblasRight) SD='R'; else if ( Side == CblasLeft ) SD='L'; else { cblas_xerbla(2, "cblas_strsm", "Illegal Side setting, %d\n", Side); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Uplo == CblasUpper) UL='U'; else if ( Uplo == CblasLower ) UL='L'; else { cblas_xerbla(3, "cblas_strsm", "Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( TransA == CblasTrans) TA ='T'; else if ( TransA == CblasConjTrans ) TA='C'; else if ( TransA == CblasNoTrans ) TA='N'; else { cblas_xerbla(4, "cblas_strsm", "Illegal Trans setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Diag == CblasUnit ) DI='U'; else if ( Diag == CblasNonUnit ) DI='N'; else { cblas_xerbla(5, "cblas_strsm", "Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_SD = C2F_CHAR(&SD); F77_DI = C2F_CHAR(&DI); #endif F77_strsm(F77_SD, F77_UL, F77_TA, F77_DI, &F77_M, &F77_N, &alpha, A, &F77_lda, B, &F77_ldb); } else if (Order == CblasRowMajor) { RowMajorStrg = 1; if( Side == CblasRight) SD='L'; else if ( Side == CblasLeft ) SD='R'; else { cblas_xerbla(2, "cblas_strsm", "Illegal Side setting, %d\n", Side); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Uplo == CblasUpper) UL='L'; else if ( Uplo == CblasLower ) UL='U'; else { cblas_xerbla(3, "cblas_strsm", "Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( TransA == CblasTrans) TA ='T'; else if ( TransA == CblasConjTrans ) TA='C'; else if ( TransA == CblasNoTrans ) TA='N'; else { cblas_xerbla(4, "cblas_strsm", "Illegal Trans setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Diag == CblasUnit ) DI='U'; else if ( Diag == CblasNonUnit ) DI='N'; else { cblas_xerbla(5, "cblas_strsm", "Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_SD = C2F_CHAR(&SD); F77_DI = C2F_CHAR(&DI); #endif F77_strsm(F77_SD, F77_UL, F77_TA, F77_DI, &F77_N, &F77_M, &alpha, A, &F77_lda, B, &F77_ldb); } else cblas_xerbla(1, "cblas_strsm", "Illegal Order setting, %d\n", Order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_strsv.c000066400000000000000000000062751360743507500214140ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_strsv.c * The program is a C interface to strsv. * * Keita Teranishi 5/20/98 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_strsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int N, const float *A, f77_int lda, float *X, f77_int incX) { char TA; char UL; char DI; #ifdef F77_CHAR F77_CHAR F77_TA, F77_UL, F77_DI; #else #define F77_TA &TA #define F77_UL &UL #define F77_DI &DI #endif #ifdef F77_INT F77_INT F77_N=N, F77_lda=lda, F77_incX=incX; #else #define F77_N N #define F77_lda lda #define F77_incX incX #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (Uplo == CblasUpper) UL = 'U'; else if (Uplo == CblasLower) UL = 'L'; else { cblas_xerbla(2, "cblas_strsv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (TransA == CblasNoTrans) TA = 'N'; else if (TransA == CblasTrans) TA = 'T'; else if (TransA == CblasConjTrans) TA = 'C'; else { cblas_xerbla(3, "cblas_strsv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (Diag == CblasUnit) DI = 'U'; else if (Diag == CblasNonUnit) DI = 'N'; else { cblas_xerbla(4, "cblas_strsv","Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif F77_strsv( F77_UL, F77_TA, F77_DI, &F77_N, A, &F77_lda, X, &F77_incX); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (Uplo == CblasUpper) UL = 'L'; else if (Uplo == CblasLower) UL = 'U'; else { cblas_xerbla(2, "cblas_strsv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (TransA == CblasNoTrans) TA = 'T'; else if (TransA == CblasTrans) TA = 'N'; else if (TransA == CblasConjTrans) TA = 'N'; else { cblas_xerbla(3, "cblas_strsv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (Diag == CblasUnit) DI = 'U'; else if (Diag == CblasNonUnit) DI = 'N'; else { cblas_xerbla(4, "cblas_strsv","Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif F77_strsv( F77_UL, F77_TA, F77_DI, &F77_N, A, &F77_lda, X, &F77_incX); } else cblas_xerbla(1, "cblas_strsv", "Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_xerbla.c000066400000000000000000000036761360743507500215120ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS #include #include #include #include #include "cblas.h" #include "cblas_f77.h" void cblas_xerbla(f77_int info, const char *rout, const char *form, ...) { extern int RowMajorStrg; char empty[1] = ""; va_list argptr; va_start(argptr, form); if (RowMajorStrg) { if (strstr(rout,"gemm") != 0) { if (info == 5 ) info = 4; else if (info == 4 ) info = 5; else if (info == 11) info = 9; else if (info == 9 ) info = 11; } else if (strstr(rout,"symm") != 0 || strstr(rout,"hemm") != 0) { if (info == 5 ) info = 4; else if (info == 4 ) info = 5; } else if (strstr(rout,"trmm") != 0 || strstr(rout,"trsm") != 0) { if (info == 7 ) info = 6; else if (info == 6 ) info = 7; } else if (strstr(rout,"gemv") != 0) { if (info == 4) info = 3; else if (info == 3) info = 4; } else if (strstr(rout,"gbmv") != 0) { if (info == 4) info = 3; else if (info == 3) info = 4; else if (info == 6) info = 5; else if (info == 5) info = 6; } else if (strstr(rout,"ger") != 0) { if (info == 3) info = 2; else if (info == 2) info = 3; else if (info == 8) info = 6; else if (info == 6) info = 8; } else if ( (strstr(rout,"her2") != 0 || strstr(rout,"hpr2") != 0) && strstr(rout,"her2k") == 0 ) { if (info == 8) info = 6; else if (info == 6) info = 8; } } if (info) fprintf(stderr, "Parameter %jd to routine %s was incorrect\n", ( intmax_t )info, rout); vfprintf(stderr, form, argptr); va_end(argptr); if (info && !info) F77_xerbla(empty, &info, 0); /* Force link of our F77 error handler */ exit(-1); } #endif blis-0.6.1/frame/compat/cblas/src/cblas_zaxpy.c000066400000000000000000000011001360743507500213640ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_zaxpy.c * * The program is a C interface to zaxpy. * * Written by Keita Teranishi. 2/11/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_zaxpy( f77_int N, const void *alpha, const void *X, f77_int incX, void *Y, f77_int incY) { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; #else #define F77_N N #define F77_incX incX #define F77_incY incY #endif F77_zaxpy( &F77_N, (dcomplex*)alpha, (dcomplex*)X, &F77_incX, (dcomplex*)Y, &F77_incY); } #endif blis-0.6.1/frame/compat/cblas/src/cblas_zcopy.c000066400000000000000000000010311360743507500213600ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_zcopy.c * * The program is a C interface to zcopy. * * Written by Keita Teranishi. 2/11/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_zcopy( f77_int N, const void *X, f77_int incX, void *Y, f77_int incY) { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; #else #define F77_N N #define F77_incX incX #define F77_incY incY #endif F77_zcopy( &F77_N, (dcomplex*)X, &F77_incX, (dcomplex*)Y, &F77_incY); } #endif blis-0.6.1/frame/compat/cblas/src/cblas_zdotc_sub.c000066400000000000000000000012071360743507500222150ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_zdotc_sub.c * * The program is a C interface to zdotc. * It calls the fortran wrapper before calling zdotc. * * Written by Keita Teranishi. 2/11/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_zdotc_sub( f77_int N, const void *X, f77_int incX, const void *Y, f77_int incY, void *dotc) { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; #else #define F77_N N #define F77_incX incX #define F77_incY incY #endif F77_zdotc_sub( &F77_N, (dcomplex*)X, &F77_incX, (dcomplex*)Y, &F77_incY, (dcomplex*)dotc); return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_zdotu_sub.c000066400000000000000000000012111360743507500222320ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_zdotu_sub.c * * The program is a C interface to zdotu. * It calls the fortran wrapper before calling zdotu. * * Written by Keita Teranishi. 2/11/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_zdotu_sub( f77_int N, const void *X, f77_int incX, const void *Y, f77_int incY, void *dotu) { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; #else #define F77_N N #define F77_incX incX #define F77_incY incY #endif F77_zdotu_sub( &F77_N, (dcomplex*)X, &F77_incX, (dcomplex*)Y, &F77_incY, (dcomplex*)dotu); return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_zdscal.c000066400000000000000000000007271360743507500215070ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_zdscal.c * * The program is a C interface to zdscal. * * Written by Keita Teranishi. 2/11/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_zdscal( f77_int N, double alpha, void *X, f77_int incX) { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX; #else #define F77_N N #define F77_incX incX #endif F77_zdscal( &F77_N, &alpha, (dcomplex*)X, &F77_incX); } #endif blis-0.6.1/frame/compat/cblas/src/cblas_zgbmv.c000066400000000000000000000104171360743507500213510ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_zgbmv.c * The program is a C interface of zgbmv * * Keita Teranishi 5/20/98 * */ #include #include #include "cblas.h" #include "cblas_f77.h" void cblas_zgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, f77_int M, f77_int N, f77_int KL, f77_int KU, const void *alpha, const void *A, f77_int lda, const void *X, f77_int incX, const void *beta, void *Y, f77_int incY) { char TA; #ifdef F77_CHAR F77_CHAR F77_TA; #else #define F77_TA &TA #endif #ifdef F77_INT F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY; F77_INT F77_KL=KL,F77_KU=KU; #else #define F77_M M #define F77_N N #define F77_lda lda #define F77_KL KL #define F77_KU KU #define F77_incX incX #define F77_incY incY #endif int n, i=0; const double *xx= (double *)X, *alp= (double *)alpha, *bet = (double *)beta; double ALPHA[2],BETA[2]; int tincY, tincx; double *x=(double *)X, *y=(double *)Y, *st=0, *tx; extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (TransA == CblasNoTrans) TA = 'N'; else if (TransA == CblasTrans) TA = 'T'; else if (TransA == CblasConjTrans) TA = 'C'; else { cblas_xerbla(2, "cblas_zgbmv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_TA = C2F_CHAR(&TA); #endif F77_zgbmv(F77_TA, &F77_M, &F77_N, &F77_KL, &F77_KU, (dcomplex*)alpha, (dcomplex*)A, &F77_lda, (dcomplex*)X, &F77_incX, (dcomplex*)beta, (dcomplex*)Y, &F77_incY); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (TransA == CblasNoTrans) TA = 'T'; else if (TransA == CblasTrans) TA = 'N'; else if (TransA == CblasConjTrans) { ALPHA[0]= *alp; ALPHA[1]= -alp[1]; BETA[0]= *bet; BETA[1]= -bet[1]; TA = 'N'; if (M > 0) { n = M << 1; x = malloc(n*sizeof(double)); tx = x; if( incX > 0 ) { i = incX << 1 ; tincx = 2; st= x+n; } else { i = incX *(-2); tincx = -2; st = x-2; x +=(n-2); } do { *x = *xx; x[1] = -xx[1]; x += tincx ; xx += i; } while (x != st); x=tx; #ifdef F77_INT F77_incX = 1; #else incX = 1; #endif if( incY > 0 ) tincY = incY; else tincY = -incY; y++; if (N > 0) { i = tincY << 1; n = i * N ; st = y + n; do { *y = -(*y); y += i; } while(y != st); y -= n; } } else x = (double *) X; } else { cblas_xerbla(2, "cblas_zgbmv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_TA = C2F_CHAR(&TA); #endif if (TransA == CblasConjTrans) F77_zgbmv(F77_TA, &F77_N, &F77_M, &F77_KU, &F77_KL, (dcomplex*)ALPHA, (dcomplex*)A ,&F77_lda, (dcomplex*)x,&F77_incX, (dcomplex*)BETA, (dcomplex*)Y, &F77_incY); else F77_zgbmv(F77_TA, &F77_N, &F77_M, &F77_KU, &F77_KL, (dcomplex*)alpha, (dcomplex*)A ,&F77_lda, (dcomplex*)x,&F77_incX, (dcomplex*)beta, (dcomplex*)Y, &F77_incY); if (TransA == CblasConjTrans) { if (x != X) free(x); if (N > 0) { do { *y = -(*y); y += i; } while (y != st); } } } else cblas_xerbla(1, "cblas_zgbmv", "Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_zgemm.c000066400000000000000000000060511360743507500213420ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * * cblas_zgemm.c * This program is a C interface to zgemm. * Written by Keita Teranishi * 4/8/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_zgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, f77_int M, f77_int N, f77_int K, const void *alpha, const void *A, f77_int lda, const void *B, f77_int ldb, const void *beta, void *C, f77_int ldc) { char TA, TB; #ifdef F77_CHAR F77_CHAR F77_TA, F77_TB; #else #define F77_TA &TA #define F77_TB &TB #endif #ifdef F77_INT F77_INT F77_M=M, F77_N=N, F77_K=K, F77_lda=lda, F77_ldb=ldb; F77_INT F77_ldc=ldc; #else #define F77_M M #define F77_N N #define F77_K K #define F77_lda lda #define F77_ldb ldb #define F77_ldc ldc #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if( Order == CblasColMajor ) { if(TransA == CblasTrans) TA='T'; else if ( TransA == CblasConjTrans ) TA='C'; else if ( TransA == CblasNoTrans ) TA='N'; else { cblas_xerbla(2, "cblas_zgemm","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if(TransB == CblasTrans) TB='T'; else if ( TransB == CblasConjTrans ) TB='C'; else if ( TransB == CblasNoTrans ) TB='N'; else { cblas_xerbla(3, "cblas_zgemm","Illegal TransB setting, %d\n", TransB); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_TA = C2F_CHAR(&TA); F77_TB = C2F_CHAR(&TB); #endif F77_zgemm(F77_TA, F77_TB, &F77_M, &F77_N, &F77_K, (dcomplex*)alpha, (dcomplex*)A, &F77_lda, (dcomplex*)B, &F77_ldb, (dcomplex*)beta, (dcomplex*)C, &F77_ldc); } else if (Order == CblasRowMajor) { RowMajorStrg = 1; if(TransA == CblasTrans) TB='T'; else if ( TransA == CblasConjTrans ) TB='C'; else if ( TransA == CblasNoTrans ) TB='N'; else { cblas_xerbla(2, "cblas_zgemm","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if(TransB == CblasTrans) TA='T'; else if ( TransB == CblasConjTrans ) TA='C'; else if ( TransB == CblasNoTrans ) TA='N'; else { cblas_xerbla(2, "cblas_zgemm","Illegal TransB setting, %d\n", TransB); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_TA = C2F_CHAR(&TA); F77_TB = C2F_CHAR(&TB); #endif F77_zgemm(F77_TA, F77_TB, &F77_N, &F77_M, &F77_K, (dcomplex*)alpha, (dcomplex*)B, &F77_ldb, (dcomplex*)A, &F77_lda, (dcomplex*)beta, (dcomplex*)C, &F77_ldc); } else cblas_xerbla(1, "cblas_zgemm", "Illegal Order setting, %d\n", Order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_zgemv.c000066400000000000000000000101731360743507500213530ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_zgemv.c * The program is a C interface of zgemv * * Keita Teranishi 5/20/98 * */ #include #include #include "cblas.h" #include "cblas_f77.h" void cblas_zgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, f77_int M, f77_int N, const void *alpha, const void *A, f77_int lda, const void *X, f77_int incX, const void *beta, void *Y, f77_int incY) { char TA; #ifdef F77_CHAR F77_CHAR F77_TA; #else #define F77_TA &TA #endif #ifdef F77_INT F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY; #else #define F77_M M #define F77_N N #define F77_lda lda #define F77_incX incX #define F77_incY incY #endif int n, i=0; const double *xx= (double *)X, *alp= (double *)alpha, *bet = (double *)beta; double ALPHA[2],BETA[2]; int tincY, tincx; double *x=(double *)X, *y=(double *)Y, *st=0, *tx; extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (TransA == CblasNoTrans) TA = 'N'; else if (TransA == CblasTrans) TA = 'T'; else if (TransA == CblasConjTrans) TA = 'C'; else { cblas_xerbla(2, "cblas_zgemv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_TA = C2F_CHAR(&TA); #endif F77_zgemv(F77_TA, &F77_M, &F77_N, (dcomplex*)alpha, (dcomplex*)A, &F77_lda, (dcomplex*)X, &F77_incX, (dcomplex*)beta, (dcomplex*)Y, &F77_incY); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (TransA == CblasNoTrans) TA = 'T'; else if (TransA == CblasTrans) TA = 'N'; else if (TransA == CblasConjTrans) { ALPHA[0]= *alp; ALPHA[1]= -alp[1]; BETA[0]= *bet; BETA[1]= -bet[1]; TA = 'N'; if (M > 0) { n = M << 1; x = malloc(n*sizeof(double)); tx = x; if( incX > 0 ) { i = incX << 1 ; tincx = 2; st= x+n; } else { i = incX *(-2); tincx = -2; st = x-2; x +=(n-2); } do { *x = *xx; x[1] = -xx[1]; x += tincx ; xx += i; } while (x != st); x=tx; #ifdef F77_INT F77_incX = 1; #else incX = 1; #endif if(incY > 0) tincY = incY; else tincY = -incY; y++; if (N > 0) { i = tincY << 1; n = i * N ; st = y + n; do { *y = -(*y); y += i; } while(y != st); y -= n; } } else x = (double *) X; } else { cblas_xerbla(2, "cblas_zgemv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_TA = C2F_CHAR(&TA); #endif if (TransA == CblasConjTrans) F77_zgemv(F77_TA, &F77_N, &F77_M, (dcomplex*)ALPHA, (dcomplex*)A, &F77_lda, (dcomplex*)x, &F77_incX, (dcomplex*)BETA, (dcomplex*)Y, &F77_incY); else F77_zgemv(F77_TA, &F77_N, &F77_M, (dcomplex*)alpha, (dcomplex*)A, &F77_lda, (dcomplex*)x, &F77_incX, (dcomplex*)beta, (dcomplex*)Y, &F77_incY); if (TransA == CblasConjTrans) { if (x != (double *)X) free(x); if (N > 0) { do { *y = -(*y); y += i; } while (y != st); } } } else cblas_xerbla(1, "cblas_zgemv", "Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_zgerc.c000066400000000000000000000037721360743507500213440ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_zgerc.c * The program is a C interface to zgerc. * * Keita Teranishi 5/20/98 * */ #include #include #include "cblas.h" #include "cblas_f77.h" void cblas_zgerc(enum CBLAS_ORDER order, f77_int M, f77_int N, const void *alpha, const void *X, f77_int incX, const void *Y, f77_int incY, void *A, f77_int lda) { #ifdef F77_INT F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY; #else #define F77_M M #define F77_N N #define F77_incX incX #define F77_incY incY #define F77_lda lda #endif int n, i, tincy; double *y=(double *)Y, *yy=(double *)Y, *ty, *st; extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { F77_zgerc( &F77_M, &F77_N, (dcomplex*)alpha, (dcomplex*)X, &F77_incX, (dcomplex*)Y, &F77_incY, (dcomplex*)A, &F77_lda); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (N > 0) { n = N << 1; y = malloc(n*sizeof(double)); ty = y; if( incY > 0 ) { i = incY << 1; tincy = 2; st= y+n; } else { i = incY *(-2); tincy = -2; st = y-2; y +=(n-2); } do { *y = *yy; y[1] = -yy[1]; y += tincy ; yy += i; } while (y != st); y = ty; #ifdef F77_INT F77_incY = 1; #else incY = 1; #endif } else y = (double *) Y; F77_zgeru( &F77_N, &F77_M, (dcomplex*)alpha, (dcomplex*)y, &F77_incY, (dcomplex*)X, &F77_incX, (dcomplex*)A, &F77_lda); if(Y!=y) free(y); } else cblas_xerbla(1, "cblas_zgerc", "Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_zgeru.c000066400000000000000000000023431360743507500213570ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_zgeru.c * The program is a C interface to zgeru. * * Keita Teranishi 5/20/98 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_zgeru(enum CBLAS_ORDER order, f77_int M, f77_int N, const void *alpha, const void *X, f77_int incX, const void *Y, f77_int incY, void *A, f77_int lda) { #ifdef F77_INT F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY; #else #define F77_M M #define F77_N N #define F77_incX incX #define F77_incY incY #define F77_lda lda #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { F77_zgeru( &F77_M, &F77_N, (dcomplex*)alpha, (dcomplex*)X, &F77_incX, (dcomplex*)Y, &F77_incY, (dcomplex*)A, &F77_lda); } else if (order == CblasRowMajor) { RowMajorStrg = 1; F77_zgeru( &F77_N, &F77_M, (dcomplex*)alpha, (dcomplex*)Y, &F77_incY, (dcomplex*)X, &F77_incX, (dcomplex*)A, &F77_lda); } else cblas_xerbla(1, "cblas_zgeru", "Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_zhbmv.c000066400000000000000000000071251360743507500213540ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_zhbmv.c * The program is a C interface to zhbmv * * Keita Teranishi 5/18/98 * */ #include "cblas.h" #include "cblas_f77.h" #include #include void cblas_zhbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,f77_int N,f77_int K, const void *alpha, const void *A, f77_int lda, const void *X, f77_int incX, const void *beta, void *Y, f77_int incY) { char UL; #ifdef F77_CHAR F77_CHAR F77_UL; #else #define F77_UL &UL #endif #ifdef F77_INT F77_INT F77_N=N, F77_K=K, F77_lda=lda, F77_incX=incX, F77_incY=incY; #else #define F77_N N #define F77_K K #define F77_lda lda #define F77_incX incX #define F77_incY incY #endif int n, i=0; const double *xx= (double *)X, *alp= (double *)alpha, *bet = (double *)beta; double ALPHA[2],BETA[2]; int tincY, tincx; double *x=(double *)X, *y=(double *)Y, *st=0, *tx; extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (Uplo == CblasLower) UL = 'L'; else if (Uplo == CblasUpper) UL = 'U'; else { cblas_xerbla(2, "cblas_zhbmv","Illegal Uplo setting, %d\n",Uplo ); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif F77_zhbmv(F77_UL, &F77_N, &F77_K, (dcomplex*)alpha, (dcomplex*)A, &F77_lda, (dcomplex*)X, &F77_incX, (dcomplex*)beta, (dcomplex*)Y, &F77_incY); } else if (order == CblasRowMajor) { RowMajorStrg = 1; ALPHA[0]= *alp; ALPHA[1]= -alp[1]; BETA[0]= *bet; BETA[1]= -bet[1]; if (N > 0) { n = N << 1; x = malloc(n*sizeof(double)); tx = x; if( incX > 0 ) { i = incX << 1 ; tincx = 2; st= x+n; } else { i = incX *(-2); tincx = -2; st = x-2; x +=(n-2); } do { *x = *xx; x[1] = -xx[1]; x += tincx ; xx += i; } while (x != st); x=tx; #ifdef F77_INT F77_incX = 1; #else incX = 1; #endif if(incY > 0) tincY = incY; else tincY = -incY; y++; i = tincY << 1; n = i * N ; st = y + n; do { *y = -(*y); y += i; } while(y != st); y -= n; } else x = (double *) X; if (Uplo == CblasUpper) UL = 'L'; else if (Uplo == CblasLower) UL = 'U'; else { cblas_xerbla(2, "cblas_zhbmv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif F77_zhbmv(F77_UL, &F77_N, &F77_K, (dcomplex*)ALPHA, (dcomplex*)A ,&F77_lda, (dcomplex*)x,&F77_incX, (dcomplex*)BETA, (dcomplex*)Y, &F77_incY); } else { cblas_xerbla(1, "cblas_zhbmv","Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if ( order == CblasRowMajor ) { RowMajorStrg = 1; if(X!=x) free(x); if (N > 0) { do { *y = -(*y); y += i; } while (y != st); } } CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_zhemm.c000066400000000000000000000053571360743507500213530ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * * cblas_zhemm.c * This program is a C interface to zhemm. * Written by Keita Teranishi * 4/8/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_zhemm(enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, f77_int M, f77_int N, const void *alpha, const void *A, f77_int lda, const void *B, f77_int ldb, const void *beta, void *C, f77_int ldc) { char SD, UL; #ifdef F77_CHAR F77_CHAR F77_SD, F77_UL; #else #define F77_SD &SD #define F77_UL &UL #endif #ifdef F77_INT F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_ldb=ldb; F77_INT F77_ldc=ldc; #else #define F77_M M #define F77_N N #define F77_lda lda #define F77_ldb ldb #define F77_ldc ldc #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if( Order == CblasColMajor ) { if( Side == CblasRight) SD='R'; else if ( Side == CblasLeft ) SD='L'; else { cblas_xerbla(2, "cblas_zhemm", "Illegal Side setting, %d\n", Side); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Uplo == CblasUpper) UL='U'; else if ( Uplo == CblasLower ) UL='L'; else { cblas_xerbla(3, "cblas_zhemm", "Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_SD = C2F_CHAR(&SD); #endif F77_zhemm(F77_SD, F77_UL, &F77_M, &F77_N, (dcomplex*)alpha, (dcomplex*)A, &F77_lda, (dcomplex*)B, &F77_ldb, (dcomplex*)beta, (dcomplex*)C, &F77_ldc); } else if (Order == CblasRowMajor) { RowMajorStrg = 1; if( Side == CblasRight) SD='L'; else if ( Side == CblasLeft ) SD='R'; else { cblas_xerbla(2, "cblas_zhemm", "Illegal Side setting, %d\n", Side); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Uplo == CblasUpper) UL='L'; else if ( Uplo == CblasLower ) UL='U'; else { cblas_xerbla(3, "cblas_zhemm", "Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_SD = C2F_CHAR(&SD); #endif F77_zhemm(F77_SD, F77_UL, &F77_N, &F77_M, (dcomplex*)alpha, (dcomplex*)A, &F77_lda, (dcomplex*)B, &F77_ldb, (dcomplex*)beta, (dcomplex*)C, &F77_ldc); } else cblas_xerbla(1, "cblas_zhemm", "Illegal Order setting, %d\n", Order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_zhemv.c000066400000000000000000000070511360743507500213550ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_zhemv.c * The program is a C interface to zhemv * * Keita Teranishi 5/18/98 * */ #include #include #include "cblas.h" #include "cblas_f77.h" void cblas_zhemv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, const void *alpha, const void *A, f77_int lda, const void *X, f77_int incX, const void *beta, void *Y, f77_int incY) { char UL; #ifdef F77_CHAR F77_CHAR F77_UL; #else #define F77_UL &UL #endif #ifdef F77_INT F77_INT F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY; #else #define F77_N N #define F77_lda lda #define F77_incX incX #define F77_incY incY #endif int n, i=0; const double *xx= (double *)X, *alp= (double *)alpha, *bet = (double *)beta; double ALPHA[2],BETA[2]; int tincY, tincx; double *x=(double *)X, *y=(double *)Y, *st=0, *tx; extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (Uplo == CblasUpper) UL = 'U'; else if (Uplo == CblasLower) UL = 'L'; else { cblas_xerbla(2, "cblas_zhemv","Illegal Uplo setting, %d\n",Uplo ); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif F77_zhemv(F77_UL, &F77_N, (dcomplex*)alpha, (dcomplex*)A, &F77_lda, (dcomplex*)X, &F77_incX, (dcomplex*)beta, (dcomplex*)Y, &F77_incY); } else if (order == CblasRowMajor) { RowMajorStrg = 1; ALPHA[0]= *alp; ALPHA[1]= -alp[1]; BETA[0]= *bet; BETA[1]= -bet[1]; if (N > 0) { n = N << 1; x = malloc(n*sizeof(double)); tx = x; if( incX > 0 ) { i = incX << 1 ; tincx = 2; st= x+n; } else { i = incX *(-2); tincx = -2; st = x-2; x +=(n-2); } do { *x = *xx; x[1] = -xx[1]; x += tincx ; xx += i; } while (x != st); x=tx; #ifdef F77_INT F77_incX = 1; #else incX = 1; #endif if(incY > 0) tincY = incY; else tincY = -incY; y++; i = tincY << 1; n = i * N ; st = y + n; do { *y = -(*y); y += i; } while(y != st); y -= n; } else x = (double *) X; if (Uplo == CblasUpper) UL = 'L'; else if (Uplo == CblasLower) UL = 'U'; else { cblas_xerbla(2, "cblas_zhemv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif F77_zhemv(F77_UL, &F77_N, (dcomplex*)ALPHA, (dcomplex*)A, &F77_lda, (dcomplex*)x, &F77_incX, (dcomplex*)BETA, (dcomplex*)Y, &F77_incY); } else { cblas_xerbla(1, "cblas_zhemv","Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if ( order == CblasRowMajor ) { RowMajorStrg = 1; if ( X != x ) free(x); if (N > 0) { do { *y = -(*y); y += i; } while (y != st); } } CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_zher.c000066400000000000000000000050001360743507500211640ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_zher.c * The program is a C interface to zher. * * Keita Teranishi 5/20/98 * */ #include #include #include "cblas.h" #include "cblas_f77.h" void cblas_zher(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, double alpha, const void *X, f77_int incX ,void *A, f77_int lda) { char UL; #ifdef F77_CHAR F77_CHAR F77_UL; #else #define F77_UL &UL #endif #ifdef F77_INT F77_INT F77_N=N, F77_lda=lda, F77_incX=incX; #else #define F77_N N #define F77_lda lda #define F77_incX incX #endif int n, i, tincx; double *x=(double *)X, *xx=(double *)X, *tx, *st; extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (Uplo == CblasLower) UL = 'L'; else if (Uplo == CblasUpper) UL = 'U'; else { cblas_xerbla(2, "cblas_zher","Illegal Uplo setting, %d\n",Uplo ); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif F77_zher(F77_UL, &F77_N, &alpha, (dcomplex*)X, &F77_incX, (dcomplex*)A, &F77_lda); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (Uplo == CblasUpper) UL = 'L'; else if (Uplo == CblasLower) UL = 'U'; else { cblas_xerbla(2, "cblas_zher","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif if (N > 0) { n = N << 1; x = malloc(n*sizeof(double)); tx = x; if( incX > 0 ) { i = incX << 1 ; tincx = 2; st= x+n; } else { i = incX *(-2); tincx = -2; st = x-2; x +=(n-2); } do { *x = *xx; x[1] = -xx[1]; x += tincx ; xx += i; } while (x != st); x=tx; #ifdef F77_INT F77_incX = 1; #else incX = 1; #endif } else x = (double *) X; F77_zher(F77_UL, &F77_N, &alpha, (dcomplex*)x, &F77_incX, (dcomplex*)A, &F77_lda); } else cblas_xerbla(1, "cblas_zher", "Illegal Order setting, %d\n", order); if(X!=x) free(x); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_zher2.c000066400000000000000000000067021360743507500212600ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_zher2.c * The program is a C interface to zher2. * * Keita Teranishi 3/23/98 * */ #include #include #include "cblas.h" #include "cblas_f77.h" void cblas_zher2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, const void *alpha, const void *X, f77_int incX, const void *Y, f77_int incY, void *A, f77_int lda) { char UL; #ifdef F77_CHAR F77_CHAR F77_UL; #else #define F77_UL &UL #endif #ifdef F77_INT F77_INT F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY; #else #define F77_N N #define F77_lda lda #define F77_incX incX #define F77_incY incY #endif int n, i, j, tincx, tincy; double *x=(double *)X, *xx=(double *)X, *y=(double *)Y, *yy=(double *)Y, *tx, *ty, *stx, *sty; extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (Uplo == CblasLower) UL = 'L'; else if (Uplo == CblasUpper) UL = 'U'; else { cblas_xerbla(2, "cblas_zher2", "Illegal Uplo setting, %d\n",Uplo ); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif F77_zher2(F77_UL, &F77_N, (dcomplex*)alpha, (dcomplex*)X, &F77_incX, (dcomplex*)Y, &F77_incY, (dcomplex*)A, &F77_lda); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (Uplo == CblasUpper) UL = 'L'; else if (Uplo == CblasLower) UL = 'U'; else { cblas_xerbla(2, "cblas_zher2", "Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif if (N > 0) { n = N << 1; x = malloc(n*sizeof(double)); y = malloc(n*sizeof(double)); tx = x; ty = y; if( incX > 0 ) { i = incX << 1 ; tincx = 2; stx= x+n; } else { i = incX *(-2); tincx = -2; stx = x-2; x +=(n-2); } if( incY > 0 ) { j = incY << 1; tincy = 2; sty= y+n; } else { j = incY *(-2); tincy = -2; sty = y-2; y +=(n-2); } do { *x = *xx; x[1] = -xx[1]; x += tincx ; xx += i; } while (x != stx); do { *y = *yy; y[1] = -yy[1]; y += tincy ; yy += j; } while (y != sty); x=tx; y=ty; #ifdef F77_INT F77_incX = 1; F77_incY = 1; #else incX = 1; incY = 1; #endif } else { x = (double *) X; y = (double *) Y; } F77_zher2(F77_UL, &F77_N, (dcomplex*)alpha, (dcomplex*)y, &F77_incY, (dcomplex*)x, &F77_incX, (dcomplex*)A, &F77_lda); } else { cblas_xerbla(1, "cblas_zher2", "Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if(X!=x) free(x); if(Y!=y) free(y); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_zher2k.c000066400000000000000000000056451360743507500214400ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * * cblas_zher2k.c * This program is a C interface to zher2k. * Written by Keita Teranishi * 4/8/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_zher2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K, const void *alpha, const void *A, f77_int lda, const void *B, f77_int ldb, double beta, void *C, f77_int ldc) { char UL, TR; #ifdef F77_CHAR F77_CHAR F77_TR, F77_UL; #else #define F77_TR &TR #define F77_UL &UL #endif #ifdef F77_INT F77_INT F77_N=N, F77_K=K, F77_lda=lda, F77_ldb=ldb; F77_INT F77_ldc=ldc; #else #define F77_N N #define F77_K K #define F77_lda lda #define F77_ldb ldb #define F77_ldc ldc #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; double ALPHA[2]; const double *alp=(double *)alpha; CBLAS_CallFromC = 1; RowMajorStrg = 0; if( Order == CblasColMajor ) { if( Uplo == CblasUpper) UL='U'; else if ( Uplo == CblasLower ) UL='L'; else { cblas_xerbla(2, "cblas_zher2k", "Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Trans == CblasTrans) TR ='T'; else if ( Trans == CblasConjTrans ) TR='C'; else if ( Trans == CblasNoTrans ) TR='N'; else { cblas_xerbla(3, "cblas_zher2k", "Illegal Trans setting, %d\n", Trans); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TR = C2F_CHAR(&TR); #endif F77_zher2k(F77_UL, F77_TR, &F77_N, &F77_K, (dcomplex*)alpha, (dcomplex*)A, &F77_lda, (dcomplex*)B, &F77_ldb, &beta, (dcomplex*)C, &F77_ldc); } else if (Order == CblasRowMajor) { RowMajorStrg = 1; if( Uplo == CblasUpper) UL='L'; else if ( Uplo == CblasLower ) UL='U'; else { cblas_xerbla(2, "cblas_zher2k", "Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Trans == CblasTrans) TR ='N'; else if ( Trans == CblasConjTrans ) TR='N'; else if ( Trans == CblasNoTrans ) TR='C'; else { cblas_xerbla(3, "cblas_zher2k", "Illegal Trans setting, %d\n", Trans); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TR = C2F_CHAR(&TR); #endif ALPHA[0]= *alp; ALPHA[1]= -alp[1]; F77_zher2k(F77_UL,F77_TR, &F77_N, &F77_K, (dcomplex*)ALPHA, (dcomplex*)A, &F77_lda, (dcomplex*)B, &F77_ldb, &beta, (dcomplex*)C, &F77_ldc); } else cblas_xerbla(1, "cblas_zher2k", "Illegal Order setting, %d\n", Order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_zherk.c000066400000000000000000000052651360743507500213540ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * * cblas_zherk.c * This program is a C interface to zherk. * Written by Keita Teranishi * 4/8/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_zherk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K, double alpha, const void *A, f77_int lda, double beta, void *C, f77_int ldc) { char UL, TR; #ifdef F77_CHAR F77_CHAR F77_TR, F77_UL; #else #define F77_TR &TR #define F77_UL &UL #endif #ifdef F77_INT F77_INT F77_N=N, F77_K=K, F77_lda=lda; F77_INT F77_ldc=ldc; #else #define F77_N N #define F77_K K #define F77_lda lda #define F77_ldc ldc #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if( Order == CblasColMajor ) { if( Uplo == CblasUpper) UL='U'; else if ( Uplo == CblasLower ) UL='L'; else { cblas_xerbla(2, "cblas_zherk", "Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Trans == CblasTrans) TR ='T'; else if ( Trans == CblasConjTrans ) TR='C'; else if ( Trans == CblasNoTrans ) TR='N'; else { cblas_xerbla(3, "cblas_zherk", "Illegal Trans setting, %d\n", Trans); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TR = C2F_CHAR(&TR); #endif F77_zherk(F77_UL, F77_TR, &F77_N, &F77_K, &alpha, (dcomplex*)A, &F77_lda, &beta, (dcomplex*)C, &F77_ldc); } else if (Order == CblasRowMajor) { RowMajorStrg = 1; if( Uplo == CblasUpper) UL='L'; else if ( Uplo == CblasLower ) UL='U'; else { cblas_xerbla(3, "cblas_zherk", "Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Trans == CblasTrans) TR ='N'; else if ( Trans == CblasConjTrans ) TR='N'; else if ( Trans == CblasNoTrans ) TR='C'; else { cblas_xerbla(3, "cblas_zherk", "Illegal Trans setting, %d\n", Trans); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_SD = C2F_CHAR(&SD); #endif F77_zherk(F77_UL, F77_TR, &F77_N, &F77_K, &alpha, (dcomplex*)A, &F77_lda, &beta, (dcomplex*)C, &F77_ldc); } else cblas_xerbla(1, "cblas_zherk", "Illegal Order setting, %d\n", Order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_zhpmv.c000066400000000000000000000067411360743507500213750ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_zhpmv.c * The program is a C interface of zhpmv * * Keita Teranishi 5/18/98 * */ #include #include #include "cblas.h" #include "cblas_f77.h" void cblas_zhpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,f77_int N, const void *alpha, const void *AP, const void *X, f77_int incX, const void *beta, void *Y, f77_int incY) { char UL; #ifdef F77_CHAR F77_CHAR F77_UL; #else #define F77_UL &UL #endif #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; #else #define F77_N N #define F77_incX incX #define F77_incY incY #endif int n, i=0; const double *xx= (double *)X, *alp= (double *)alpha, *bet = (double *)beta; double ALPHA[2],BETA[2]; int tincY, tincx; double *x=(double *)X, *y=(double *)Y, *st=0, *tx; extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (Uplo == CblasLower) UL = 'L'; else if (Uplo == CblasUpper) UL = 'U'; else { cblas_xerbla(2, "cblas_zhpmv","Illegal Uplo setting, %d\n",Uplo ); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif F77_zhpmv(F77_UL, &F77_N, (dcomplex*)alpha, (dcomplex*)AP, (dcomplex*)X, &F77_incX, (dcomplex*)beta, (dcomplex*)Y, &F77_incY); } else if (order == CblasRowMajor) { RowMajorStrg = 1; ALPHA[0]= *alp; ALPHA[1]= -alp[1]; BETA[0]= *bet; BETA[1]= -bet[1]; if (N > 0) { n = N << 1; x = malloc(n*sizeof(double)); tx = x; if( incX > 0 ) { i = incX << 1; tincx = 2; st= x+n; } else { i = incX *(-2); tincx = -2; st = x-2; x +=(n-2); } do { *x = *xx; x[1] = -xx[1]; x += tincx ; xx += i; } while (x != st); x=tx; #ifdef F77_INT F77_incX = 1; #else incX = 1; #endif if(incY > 0) tincY = incY; else tincY = -incY; y++; i = tincY << 1; n = i * N ; st = y + n; do { *y = -(*y); y += i; } while(y != st); y -= n; } else x = (double *) X; if (Uplo == CblasUpper) UL = 'L'; else if (Uplo == CblasLower) UL = 'U'; else { cblas_xerbla(2, "cblas_zhpmv","Illegal Uplo setting, %d\n", Uplo ); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif F77_zhpmv(F77_UL, &F77_N, (dcomplex*)ALPHA, (dcomplex*)AP, (dcomplex*)x, &F77_incX, (dcomplex*)BETA, (dcomplex*)Y, &F77_incY); } else { cblas_xerbla(1, "cblas_zhpmv","Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if ( order == CblasRowMajor ) { RowMajorStrg = 1; if(X!=x) free(x); if (N > 0) { do { *y = -(*y); y += i; } while (y != st); } } CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_zhpr.c000066400000000000000000000050101360743507500212000ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_zhpr.c * The program is a C interface to zhpr. * * Keita Teranishi 3/23/98 * */ #include #include #include "cblas.h" #include "cblas_f77.h" void cblas_zhpr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, double alpha, const void *X, f77_int incX, void *A) { char UL; #ifdef F77_CHAR F77_CHAR F77_UL; #else #define F77_UL &UL #endif #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX; #else #define F77_N N #define F77_incX incX #endif int n, i, tincx; double *x=(double *)X, *xx=(double *)X, *tx, *st; extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (Uplo == CblasLower) UL = 'L'; else if (Uplo == CblasUpper) UL = 'U'; else { cblas_xerbla(2, "cblas_zhpr","Illegal Uplo setting, %d\n",Uplo ); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif F77_zhpr(F77_UL, &F77_N, &alpha, (dcomplex*)X, &F77_incX, (dcomplex*)A); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (Uplo == CblasUpper) UL = 'L'; else if (Uplo == CblasLower) UL = 'U'; else { cblas_xerbla(2, "cblas_zhpr","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif if (N > 0) { n = N << 1; x = malloc(n*sizeof(double)); tx = x; if( incX > 0 ) { i = incX << 1; tincx = 2; st= x+n; } else { i = incX *(-2); tincx = -2; st = x-2; x +=(n-2); } do { *x = *xx; x[1] = -xx[1]; x += tincx ; xx += i; } while (x != st); x=tx; #ifdef F77_INT F77_incX = 1; #else incX = 1; #endif } else x = (double *) X; F77_zhpr(F77_UL, &F77_N, &alpha, (dcomplex*)x, &F77_incX, (dcomplex*)A); } else { cblas_xerbla(1, "cblas_zhpr","Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if(X!=x) free(x); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_zhpr2.c000066400000000000000000000065071360743507500212760ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_zhpr2.c * The program is a C interface to zhpr2. * * Keita Teranishi 5/20/98 * */ #include #include #include "cblas.h" #include "cblas_f77.h" void cblas_zhpr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N,const void *alpha, const void *X, f77_int incX,const void *Y, f77_int incY, void *Ap) { char UL; #ifdef F77_CHAR F77_CHAR F77_UL; #else #define F77_UL &UL #endif #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; #else #define F77_N N #define F77_incX incX #define F77_incY incY #endif int n, i, j; double *x=(double *)X, *xx=(double *)X, *y=(double *)Y, *yy=(double *)Y, *stx, *sty; extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (Uplo == CblasLower) UL = 'L'; else if (Uplo == CblasUpper) UL = 'U'; else { cblas_xerbla(2, "cblas_zhpr2","Illegal Uplo setting, %d\n",Uplo ); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif F77_zhpr2(F77_UL, &F77_N, (dcomplex*)alpha, (dcomplex*)X, &F77_incX, (dcomplex*)Y, &F77_incY, (dcomplex*)Ap); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (Uplo == CblasUpper) UL = 'L'; else if (Uplo == CblasLower) UL = 'U'; else { cblas_xerbla(2, "cblas_zhpr2","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif if (N > 0) { n = N << 1; x = malloc(n*sizeof(double)); y = malloc(n*sizeof(double)); stx = x + n; sty = y + n; if( incX > 0 ) i = incX << 1; else i = incX *(-2); if( incY > 0 ) j = incY << 1; else j = incY *(-2); do { *x = *xx; x[1] = -xx[1]; x += 2; xx += i; } while (x != stx); do { *y = *yy; y[1] = -yy[1]; y += 2; yy += j; } while (y != sty); x -= n; y -= n; #ifdef F77_INT if(incX > 0 ) F77_incX = 1; else F77_incX = -1; if(incY > 0 ) F77_incY = 1; else F77_incY = -1; #else if(incX > 0 ) incX = 1; else incX = -1; if(incY > 0 ) incY = 1; else incY = -1; #endif } else { x = (double *) X; y = (void *) Y; } F77_zhpr2(F77_UL, &F77_N, (dcomplex*)alpha, (dcomplex*)y, &F77_incY, (dcomplex*)x, &F77_incX, (dcomplex*)Ap); } else { cblas_xerbla(1, "cblas_zhpr2","Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if(X!=x) free(x); if(Y!=y) free(y); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_zscal.c000066400000000000000000000007421360743507500213400ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_zscal.c * * The program is a C interface to zscal. * * Written by Keita Teranishi. 2/11/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_zscal( f77_int N, const void *alpha, void *X, f77_int incX) { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX; #else #define F77_N N #define F77_incX incX #endif F77_zscal( &F77_N, (dcomplex*)alpha, (dcomplex*)X, &F77_incX); } #endif blis-0.6.1/frame/compat/cblas/src/cblas_zswap.c000066400000000000000000000010261360743507500213640ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_zswap.c * * The program is a C interface to zswap. * * Written by Keita Teranishi. 2/11/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_zswap( f77_int N, void *X, f77_int incX, void *Y, f77_int incY) { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; #else #define F77_N N #define F77_incX incX #define F77_incY incY #endif F77_zswap( &F77_N, (dcomplex*)X, &F77_incX, (dcomplex*)Y, &F77_incY); } #endif blis-0.6.1/frame/compat/cblas/src/cblas_zsymm.c000066400000000000000000000053521360743507500214050ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * * cblas_zsymm.c * This program is a C interface to zsymm. * Written by Keita Teranishi * 4/8/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_zsymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, f77_int M, f77_int N, const void *alpha, const void *A, f77_int lda, const void *B, f77_int ldb, const void *beta, void *C, f77_int ldc) { char SD, UL; #ifdef F77_CHAR F77_CHAR F77_SD, F77_UL; #else #define F77_SD &SD #define F77_UL &UL #endif #ifdef F77_INT F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_ldb=ldb; F77_INT F77_ldc=ldc; #else #define F77_M M #define F77_N N #define F77_lda lda #define F77_ldb ldb #define F77_ldc ldc #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if( Order == CblasColMajor ) { if( Side == CblasRight) SD='R'; else if ( Side == CblasLeft ) SD='L'; else { cblas_xerbla(2, "cblas_zsymm", "Illegal Side setting, %d\n", Side); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Uplo == CblasUpper) UL='U'; else if ( Uplo == CblasLower ) UL='L'; else { cblas_xerbla(3, "cblas_zsymm", "Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_SD = C2F_CHAR(&SD); #endif F77_zsymm(F77_SD, F77_UL, &F77_M, &F77_N, (dcomplex*)alpha, (dcomplex*)A, &F77_lda, (dcomplex*)B, &F77_ldb, (dcomplex*)beta, (dcomplex*)C, &F77_ldc); } else if (Order == CblasRowMajor) { RowMajorStrg = 1; if( Side == CblasRight) SD='L'; else if ( Side == CblasLeft ) SD='R'; else { cblas_xerbla(2, "cblas_zsymm", "Illegal Side setting, %d\n", Side); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Uplo == CblasUpper) UL='L'; else if ( Uplo == CblasLower ) UL='U'; else { cblas_xerbla(3, "cblas_zsymm", "Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_SD = C2F_CHAR(&SD); #endif F77_zsymm(F77_SD, F77_UL, &F77_N, &F77_M, (dcomplex*)alpha, (dcomplex*)A, &F77_lda, (dcomplex*)B, &F77_ldb, (dcomplex*)beta, (dcomplex*)C, &F77_ldc); } else cblas_xerbla(1, "cblas_zsymm", "Illegal Order setting, %d\n", Order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_zsyr2k.c000066400000000000000000000055471360743507500215000ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * * cblas_zsyr2k.c * This program is a C interface to zsyr2k. * Written by Keita Teranishi * 4/8/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_zsyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K, const void *alpha, const void *A, f77_int lda, const void *B, f77_int ldb, const void *beta, void *C, f77_int ldc) { char UL, TR; #ifdef F77_CHAR F77_CHAR F77_TR, F77_UL; #else #define F77_TR &TR #define F77_UL &UL #endif #ifdef F77_INT F77_INT F77_N=N, F77_K=K, F77_lda=lda, F77_ldb=ldb; F77_INT F77_ldc=ldc; #else #define F77_N N #define F77_K K #define F77_lda lda #define F77_ldb ldb #define F77_ldc ldc #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if( Order == CblasColMajor ) { if( Uplo == CblasUpper) UL='U'; else if ( Uplo == CblasLower ) UL='L'; else { cblas_xerbla(2, "cblas_zsyr2k", "Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Trans == CblasTrans) TR ='T'; else if ( Trans == CblasConjTrans ) TR='C'; else if ( Trans == CblasNoTrans ) TR='N'; else { cblas_xerbla(3, "cblas_zsyr2k", "Illegal Trans setting, %d\n", Trans); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TR = C2F_CHAR(&TR); #endif F77_zsyr2k(F77_UL, F77_TR, &F77_N, &F77_K, (dcomplex*)alpha, (dcomplex*)A, &F77_lda, (dcomplex*)B, &F77_ldb, (dcomplex*)beta, (dcomplex*)C, &F77_ldc); } else if (Order == CblasRowMajor) { RowMajorStrg = 1; if( Uplo == CblasUpper) UL='L'; else if ( Uplo == CblasLower ) UL='U'; else { cblas_xerbla(3, "cblas_zsyr2k", "Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Trans == CblasTrans) TR ='N'; else if ( Trans == CblasConjTrans ) TR='N'; else if ( Trans == CblasNoTrans ) TR='T'; else { cblas_xerbla(3, "cblas_zsyr2k", "Illegal Trans setting, %d\n", Trans); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TR = C2F_CHAR(&TR); #endif F77_zsyr2k(F77_UL, F77_TR, &F77_N, &F77_K, (dcomplex*)alpha, (dcomplex*)A, &F77_lda, (dcomplex*)B, &F77_ldb, (dcomplex*)beta, (dcomplex*)C, &F77_ldc); } else cblas_xerbla(1, "cblas_zsyr2k", "Illegal Order setting, %d\n", Order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_zsyrk.c000066400000000000000000000053461360743507500214130ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * * cblas_zsyrk.c * This program is a C interface to zsyrk. * Written by Keita Teranishi * 4/8/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_zsyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K, const void *alpha, const void *A, f77_int lda, const void *beta, void *C, f77_int ldc) { char UL, TR; #ifdef F77_CHAR F77_CHAR F77_TR, F77_UL; #else #define F77_TR &TR #define F77_UL &UL #endif #ifdef F77_INT F77_INT F77_N=N, F77_K=K, F77_lda=lda; F77_INT F77_ldc=ldc; #else #define F77_N N #define F77_K K #define F77_lda lda #define F77_ldc ldc #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if( Order == CblasColMajor ) { if( Uplo == CblasUpper) UL='U'; else if ( Uplo == CblasLower ) UL='L'; else { cblas_xerbla(2, "cblas_zsyrk", "Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Trans == CblasTrans) TR ='T'; else if ( Trans == CblasConjTrans ) TR='C'; else if ( Trans == CblasNoTrans ) TR='N'; else { cblas_xerbla(3, "cblas_zsyrk", "Illegal Trans setting, %d\n", Trans); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TR = C2F_CHAR(&TR); #endif F77_zsyrk(F77_UL, F77_TR, &F77_N, &F77_K, (dcomplex*)alpha, (dcomplex*)A, &F77_lda, (dcomplex*)beta, (dcomplex*)C, &F77_ldc); } else if (Order == CblasRowMajor) { RowMajorStrg = 1; if( Uplo == CblasUpper) UL='L'; else if ( Uplo == CblasLower ) UL='U'; else { cblas_xerbla(3, "cblas_zsyrk", "Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Trans == CblasTrans) TR ='N'; else if ( Trans == CblasConjTrans ) TR='N'; else if ( Trans == CblasNoTrans ) TR='T'; else { cblas_xerbla(3, "cblas_zsyrk", "Illegal Trans setting, %d\n", Trans); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TR = C2F_CHAR(&TR); #endif F77_zsyrk(F77_UL, F77_TR, &F77_N, &F77_K, (dcomplex*)alpha, (dcomplex*)A, &F77_lda, (dcomplex*)beta, (dcomplex*)C, &F77_ldc); } else cblas_xerbla(1, "cblas_zsyrk", "Illegal Order setting, %d\n", Order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_ztbmv.c000066400000000000000000000077021360743507500213710ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_ztbmv.c * The program is a C interface to ztbmv. * * Keita Teranishi 5/20/98 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_ztbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int N, f77_int K, const void *A, f77_int lda, void *X, f77_int incX) { char TA; char UL; char DI; #ifdef F77_CHAR F77_CHAR F77_TA, F77_UL, F77_DI; #else #define F77_TA &TA #define F77_UL &UL #define F77_DI &DI #endif #ifdef F77_INT F77_INT F77_N=N, F77_lda=lda, F77_K=K, F77_incX=incX; #else #define F77_N N #define F77_K K #define F77_lda lda #define F77_incX incX #endif int n, i=0, tincX; double *st=0, *x=(double *)X; extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (Uplo == CblasUpper) UL = 'U'; else if (Uplo == CblasLower) UL = 'L'; else { cblas_xerbla(2, "cblas_ztbmv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (TransA == CblasNoTrans) TA = 'N'; else if (TransA == CblasTrans) TA = 'T'; else if (TransA == CblasConjTrans) TA = 'C'; else { cblas_xerbla(3, "cblas_ztbmv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (Diag == CblasUnit) DI = 'U'; else if (Diag == CblasNonUnit) DI = 'N'; else { cblas_xerbla(4, "cblas_ztbmv","Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif F77_ztbmv( F77_UL, F77_TA, F77_DI, &F77_N, &F77_K, (dcomplex*)A, &F77_lda, (dcomplex*)X, &F77_incX); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (Uplo == CblasUpper) UL = 'L'; else if (Uplo == CblasLower) UL = 'U'; else { cblas_xerbla(2, "cblas_ztbmv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (TransA == CblasNoTrans) TA = 'T'; else if (TransA == CblasTrans) TA = 'N'; else if (TransA == CblasConjTrans) { TA = 'N'; if ( N > 0) { if(incX > 0) tincX = incX; else tincX = -incX; i = tincX << 1; n = i * N; x++; st = x + n; do { *x = -(*x); x+= i; } while (x != st); x -= n; } } else { cblas_xerbla(3, "cblas_ztbmv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (Diag == CblasUnit) DI = 'U'; else if (Diag == CblasNonUnit) DI = 'N'; else { cblas_xerbla(4, "cblas_ztbmv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif F77_ztbmv( F77_UL, F77_TA, F77_DI, &F77_N, &F77_K, (dcomplex*)A, &F77_lda, (dcomplex*)X, &F77_incX); if (TransA == CblasConjTrans) { if (N > 0) { do { *x = -(*x); x += i; } while (x != st); } } } else cblas_xerbla(1, "cblas_ztbmv", "Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_ztbsv.c000066400000000000000000000077131360743507500214010ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_ztbsv.c * The program is a C interface to ztbsv. * * Keita Teranishi 3/23/98 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_ztbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int N, f77_int K, const void *A, f77_int lda, void *X, f77_int incX) { char TA; char UL; char DI; #ifdef F77_CHAR F77_CHAR F77_TA, F77_UL, F77_DI; #else #define F77_TA &TA #define F77_UL &UL #define F77_DI &DI #endif #ifdef F77_INT F77_INT F77_N=N, F77_lda=lda, F77_K=K, F77_incX=incX; #else #define F77_N N #define F77_K K #define F77_lda lda #define F77_incX incX #endif int n, i=0, tincX; double *st=0,*x=(double *)X; extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (Uplo == CblasUpper) UL = 'U'; else if (Uplo == CblasLower) UL = 'L'; else { cblas_xerbla(2, "cblas_ztbsv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (TransA == CblasNoTrans) TA = 'N'; else if (TransA == CblasTrans) TA = 'T'; else if (TransA == CblasConjTrans) TA = 'C'; else { cblas_xerbla(3, "cblas_ztbsv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (Diag == CblasUnit) DI = 'U'; else if (Diag == CblasNonUnit) DI = 'N'; else { cblas_xerbla(4, "cblas_ztbsv","Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif F77_ztbsv( F77_UL, F77_TA, F77_DI, &F77_N, &F77_K, (dcomplex*)A, &F77_lda, (dcomplex*)X, &F77_incX); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (Uplo == CblasUpper) UL = 'L'; else if (Uplo == CblasLower) UL = 'U'; else { cblas_xerbla(2, "cblas_ztbsv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (TransA == CblasNoTrans) TA = 'T'; else if (TransA == CblasTrans) TA = 'N'; else if (TransA == CblasConjTrans) { TA = 'N'; if ( N > 0) { if ( incX > 0 ) tincX = incX; else tincX = -incX; n = N*2*(tincX); x++; st=x+n; i = tincX << 1; do { *x = -(*x); x+=i; } while (x != st); x -= n; } } else { cblas_xerbla(3, "cblas_ztbsv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (Diag == CblasUnit) DI = 'U'; else if (Diag == CblasNonUnit) DI = 'N'; else { cblas_xerbla(4, "cblas_ztbsv","Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif F77_ztbsv( F77_UL, F77_TA, F77_DI, &F77_N, &F77_K, (dcomplex*)A, &F77_lda, (dcomplex*)X, &F77_incX); if (TransA == CblasConjTrans) { if (N > 0) { do { *x = -(*x); x+= i; } while (x != st); } } } else cblas_xerbla(1, "cblas_ztbsv", "Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_ztpmv.c000066400000000000000000000074121360743507500214050ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_ztpmv.c * The program is a C interface to ztpmv. * * Keita Teranishi 5/20/98 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_ztpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int N, const void *Ap, void *X, f77_int incX) { char TA; char UL; char DI; #ifdef F77_CHAR F77_CHAR F77_TA, F77_UL, F77_DI; #else #define F77_TA &TA #define F77_UL &UL #define F77_DI &DI #endif #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX; #else #define F77_N N #define F77_incX incX #endif int n, i=0, tincX; double *st=0,*x=(double *)X; extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (Uplo == CblasUpper) UL = 'U'; else if (Uplo == CblasLower) UL = 'L'; else { cblas_xerbla(2, "cblas_ztpmv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (TransA == CblasNoTrans) TA = 'N'; else if (TransA == CblasTrans) TA = 'T'; else if (TransA == CblasConjTrans) TA = 'C'; else { cblas_xerbla(3, "cblas_ztpmv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (Diag == CblasUnit) DI = 'U'; else if (Diag == CblasNonUnit) DI = 'N'; else { cblas_xerbla(4, "cblas_ztpmv","Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif F77_ztpmv( F77_UL, F77_TA, F77_DI, &F77_N, (dcomplex*)Ap, (dcomplex*)X, &F77_incX); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (Uplo == CblasUpper) UL = 'L'; else if (Uplo == CblasLower) UL = 'U'; else { cblas_xerbla(2, "cblas_ztpmv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (TransA == CblasNoTrans) TA = 'T'; else if (TransA == CblasTrans) TA = 'N'; else if (TransA == CblasConjTrans) { TA = 'N'; if ( N > 0) { if(incX > 0) tincX = incX; else tincX = -incX; i = tincX << 1; n = i * N; x++; st = x + n; do { *x = -(*x); x += i; } while (x != st); x -= n; } } else { cblas_xerbla(3, "cblas_ztpmv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (Diag == CblasUnit) DI = 'U'; else if (Diag == CblasNonUnit) DI = 'N'; else { cblas_xerbla(4, "cblas_ztpmv","Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif F77_ztpmv( F77_UL, F77_TA, F77_DI, &F77_N, (dcomplex*)Ap, (dcomplex*)X,&F77_incX); if (TransA == CblasConjTrans) { if (N > 0) { do { *x = -(*x); x += i; } while (x != st); } } } else cblas_xerbla(1, "cblas_ztpmv", "Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_ztpsv.c000066400000000000000000000074241360743507500214160ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_ztpsv.c * The program is a C interface to ztpsv. * * Keita Teranishi 3/23/98 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_ztpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int N, const void *Ap, void *X, f77_int incX) { char TA; char UL; char DI; #ifdef F77_CHAR F77_CHAR F77_TA, F77_UL, F77_DI; #else #define F77_TA &TA #define F77_UL &UL #define F77_DI &DI #endif #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX; #else #define F77_N N #define F77_incX incX #endif int n, i=0, tincX; double *st=0, *x=(double*)X; extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (Uplo == CblasUpper) UL = 'U'; else if (Uplo == CblasLower) UL = 'L'; else { cblas_xerbla(2, "cblas_ztpsv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (TransA == CblasNoTrans) TA = 'N'; else if (TransA == CblasTrans) TA = 'T'; else if (TransA == CblasConjTrans) TA = 'C'; else { cblas_xerbla(3, "cblas_ztpsv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (Diag == CblasUnit) DI = 'U'; else if (Diag == CblasNonUnit) DI = 'N'; else { cblas_xerbla(4, "cblas_ztpsv","Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif F77_ztpsv( F77_UL, F77_TA, F77_DI, &F77_N, (dcomplex*)Ap, (dcomplex*)X, &F77_incX); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (Uplo == CblasUpper) UL = 'L'; else if (Uplo == CblasLower) UL = 'U'; else { cblas_xerbla(2, "cblas_ztpsv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (TransA == CblasNoTrans) TA = 'T'; else if (TransA == CblasTrans) TA = 'N'; else if (TransA == CblasConjTrans) { TA = 'N'; if ( N > 0) { if ( incX > 0 ) tincX = incX; else tincX = -incX; n = N*2*(tincX); x++; st=x+n; i = tincX << 1; do { *x = -(*x); x+=i; } while (x != st); x -= n; } } else { cblas_xerbla(3, "cblas_ztpsv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (Diag == CblasUnit) DI = 'U'; else if (Diag == CblasNonUnit) DI = 'N'; else { cblas_xerbla(4, "cblas_ztpsv","Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif F77_ztpsv( F77_UL, F77_TA, F77_DI, &F77_N, (dcomplex*)Ap, (dcomplex*)X,&F77_incX); if (TransA == CblasConjTrans) { if (N > 0) { do { *x = -(*x); x += i; } while (x != st); } } } else cblas_xerbla(1, "cblas_ztpsv", "Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_ztrmm.c000066400000000000000000000077361360743507500214070ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * * cblas_ztrmm.c * This program is a C interface to ztrmm. * Written by Keita Teranishi * 4/8/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_ztrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int M, f77_int N, const void *alpha, const void *A, f77_int lda, void *B, f77_int ldb) { char UL, TA, SD, DI; #ifdef F77_CHAR F77_CHAR F77_TA, F77_UL, F77_SD, F77_DI; #else #define F77_TA &TA #define F77_UL &UL #define F77_SD &SD #define F77_DI &DI #endif #ifdef F77_INT F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_ldb=ldb; #else #define F77_M M #define F77_N N #define F77_lda lda #define F77_ldb ldb #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if( Order == CblasColMajor ) { if( Side == CblasRight ) SD='R'; else if ( Side == CblasLeft ) SD='L'; else { cblas_xerbla(2, "cblas_ztrmm", "Illegal Side setting, %d\n", Side); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Uplo == CblasUpper ) UL='U'; else if ( Uplo == CblasLower ) UL='L'; else { cblas_xerbla(3, "cblas_ztrmm", "Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( TransA == CblasTrans ) TA ='T'; else if ( TransA == CblasConjTrans ) TA='C'; else if ( TransA == CblasNoTrans ) TA='N'; else { cblas_xerbla(4, "cblas_ztrmm", "Illegal Trans setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Diag == CblasUnit ) DI='U'; else if ( Diag == CblasNonUnit ) DI='N'; else { cblas_xerbla(5, "cblas_ztrmm", "Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_SD = C2F_CHAR(&SD); F77_DI = C2F_CHAR(&DI); #endif F77_ztrmm(F77_SD, F77_UL, F77_TA, F77_DI, &F77_M, &F77_N, (dcomplex*)alpha, (dcomplex*)A, &F77_lda, (dcomplex*)B, &F77_ldb); } else if (Order == CblasRowMajor) { RowMajorStrg = 1; if( Side == CblasRight ) SD='L'; else if ( Side == CblasLeft ) SD='R'; else { cblas_xerbla(2, "cblas_ztrmm", "Illegal Side setting, %d\n", Side); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Uplo == CblasUpper ) UL='L'; else if ( Uplo == CblasLower ) UL='U'; else { cblas_xerbla(3, "cblas_ztrmm", "Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( TransA == CblasTrans ) TA ='T'; else if ( TransA == CblasConjTrans ) TA='C'; else if ( TransA == CblasNoTrans ) TA='N'; else { cblas_xerbla(4, "cblas_ztrmm", "Illegal Trans setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Diag == CblasUnit ) DI='U'; else if ( Diag == CblasNonUnit ) DI='N'; else { cblas_xerbla(5, "cblas_ztrmm", "Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_SD = C2F_CHAR(&SD); F77_DI = C2F_CHAR(&DI); #endif F77_ztrmm(F77_SD, F77_UL, F77_TA, F77_DI, &F77_N, &F77_M, (dcomplex*)alpha, (dcomplex*)A, &F77_lda, (dcomplex*)B, &F77_ldb); } else cblas_xerbla(1, "cblas_ztrmm", "Illegal Order setting, %d\n", Order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_ztrmv.c000066400000000000000000000076151360743507500214140ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_ztrmv.c * The program is a C interface to ztrmv. * * Keita Teranishi 5/20/98 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_ztrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int N, const void *A, f77_int lda, void *X, f77_int incX) { char TA; char UL; char DI; #ifdef F77_CHAR F77_CHAR F77_TA, F77_UL, F77_DI; #else #define F77_TA &TA #define F77_UL &UL #define F77_DI &DI #endif #ifdef F77_INT F77_INT F77_N=N, F77_lda=lda, F77_incX=incX; #else #define F77_N N #define F77_lda lda #define F77_incX incX #endif int n, i=0, tincX; double *st=0,*x=(double *)X; extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (Uplo == CblasUpper) UL = 'U'; else if (Uplo == CblasLower) UL = 'L'; else { cblas_xerbla(2, "cblas_ztrmv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (TransA == CblasNoTrans) TA = 'N'; else if (TransA == CblasTrans) TA = 'T'; else if (TransA == CblasConjTrans) TA = 'C'; else { cblas_xerbla(3, "cblas_ztrmv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (Diag == CblasUnit) DI = 'U'; else if (Diag == CblasNonUnit) DI = 'N'; else { cblas_xerbla(4, "cblas_ztrmv","Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif F77_ztrmv( F77_UL, F77_TA, F77_DI, &F77_N, (dcomplex*)A, &F77_lda, (dcomplex*)X, &F77_incX); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (Uplo == CblasUpper) UL = 'L'; else if (Uplo == CblasLower) UL = 'U'; else { cblas_xerbla(2, "cblas_ztrmv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (TransA == CblasNoTrans) TA = 'T'; else if (TransA == CblasTrans) TA = 'N'; else if (TransA == CblasConjTrans) { TA = 'N'; if ( N > 0) { if(incX > 0) tincX = incX; else tincX = -incX; i = tincX << 1; n = i * N; x++; st = x + n; do { *x = -(*x); x += i; } while (x != st); x -= n; } } else { cblas_xerbla(3, "cblas_ztrmv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (Diag == CblasUnit) DI = 'U'; else if (Diag == CblasNonUnit) DI = 'N'; else { cblas_xerbla(4, "cblas_ztrmv","Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif F77_ztrmv( F77_UL, F77_TA, F77_DI, &F77_N, (dcomplex*)A, &F77_lda, (dcomplex*)X, &F77_incX); if (TransA == CblasConjTrans) { if (N > 0) { do { *x = -(*x); x += i; } while (x != st); } } } else cblas_xerbla(1, "cblas_ztrmv", "Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_ztrsm.c000066400000000000000000000077561360743507500214170ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * * cblas_ztrsm.c * This program is a C interface to ztrsm. * Written by Keita Teranishi * 4/8/1998 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_ztrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int M, f77_int N, const void *alpha, const void *A, f77_int lda, void *B, f77_int ldb) { char UL, TA, SD, DI; #ifdef F77_CHAR F77_CHAR F77_TA, F77_UL, F77_SD, F77_DI; #else #define F77_TA &TA #define F77_UL &UL #define F77_SD &SD #define F77_DI &DI #endif #ifdef F77_INT F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_ldb=ldb; #else #define F77_M M #define F77_N N #define F77_lda lda #define F77_ldb ldb #endif extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if( Order == CblasColMajor ) { if( Side == CblasRight) SD='R'; else if ( Side == CblasLeft ) SD='L'; else { cblas_xerbla(2, "cblas_ztrsm", "Illegal Side setting, %d\n", Side); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Uplo == CblasUpper) UL='U'; else if ( Uplo == CblasLower ) UL='L'; else { cblas_xerbla(3, "cblas_ztrsm", "Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( TransA == CblasTrans) TA ='T'; else if ( TransA == CblasConjTrans ) TA='C'; else if ( TransA == CblasNoTrans ) TA='N'; else { cblas_xerbla(4, "cblas_ztrsm", "Illegal Trans setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Diag == CblasUnit ) DI='U'; else if ( Diag == CblasNonUnit ) DI='N'; else { cblas_xerbla(5, "cblas_ztrsm", "Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_SD = C2F_CHAR(&SD); F77_DI = C2F_CHAR(&DI); #endif F77_ztrsm(F77_SD, F77_UL, F77_TA, F77_DI, &F77_M, &F77_N, (dcomplex*)alpha, (dcomplex*)A, &F77_lda, (dcomplex*)B, &F77_ldb); } else if (Order == CblasRowMajor) { RowMajorStrg = 1; if( Side == CblasRight) SD='L'; else if ( Side == CblasLeft ) SD='R'; else { cblas_xerbla(2, "cblas_ztrsm", "Illegal Side setting, %d\n", Side); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Uplo == CblasUpper) UL='L'; else if ( Uplo == CblasLower ) UL='U'; else { cblas_xerbla(3, "cblas_ztrsm", "Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( TransA == CblasTrans) TA ='T'; else if ( TransA == CblasConjTrans ) TA='C'; else if ( TransA == CblasNoTrans ) TA='N'; else { cblas_xerbla(4, "cblas_ztrsm", "Illegal Trans setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if( Diag == CblasUnit ) DI='U'; else if ( Diag == CblasNonUnit ) DI='N'; else { cblas_xerbla(5, "cblas_ztrsm", "Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_SD = C2F_CHAR(&SD); F77_DI = C2F_CHAR(&DI); #endif F77_ztrsm(F77_SD, F77_UL, F77_TA, F77_DI, &F77_N, &F77_M, (dcomplex*)alpha, (dcomplex*)A, &F77_lda, (dcomplex*)B, &F77_ldb); } else cblas_xerbla(1, "cblas_ztrsm", "Illegal Order setting, %d\n", Order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/cblas/src/cblas_ztrsv.c000066400000000000000000000076171360743507500214240ustar00rootroot00000000000000#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_ztrsv.c * The program is a C interface to ztrsv. * * Keita Teranishi 3/23/98 * */ #include "cblas.h" #include "cblas_f77.h" void cblas_ztrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, f77_int N, const void *A, f77_int lda, void *X, f77_int incX) { char TA; char UL; char DI; #ifdef F77_CHAR F77_CHAR F77_TA, F77_UL, F77_DI; #else #define F77_TA &TA #define F77_UL &UL #define F77_DI &DI #endif #ifdef F77_INT F77_INT F77_N=N, F77_lda=lda, F77_incX=incX; #else #define F77_N N #define F77_lda lda #define F77_incX incX #endif int n, i=0, tincX; double *st=0,*x=(double *)X; extern int CBLAS_CallFromC; extern int RowMajorStrg; RowMajorStrg = 0; CBLAS_CallFromC = 1; if (order == CblasColMajor) { if (Uplo == CblasUpper) UL = 'U'; else if (Uplo == CblasLower) UL = 'L'; else { cblas_xerbla(2, "cblas_ztrsv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (TransA == CblasNoTrans) TA = 'N'; else if (TransA == CblasTrans) TA = 'T'; else if (TransA == CblasConjTrans) TA = 'C'; else { cblas_xerbla(3, "cblas_ztrsv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (Diag == CblasUnit) DI = 'U'; else if (Diag == CblasNonUnit) DI = 'N'; else { cblas_xerbla(4, "cblas_ztrsv","Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif F77_ztrsv( F77_UL, F77_TA, F77_DI, &F77_N, (dcomplex*)A, &F77_lda, (dcomplex*)X, &F77_incX); } else if (order == CblasRowMajor) { RowMajorStrg = 1; if (Uplo == CblasUpper) UL = 'L'; else if (Uplo == CblasLower) UL = 'U'; else { cblas_xerbla(2, "cblas_ztrsv","Illegal Uplo setting, %d\n", Uplo); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (TransA == CblasNoTrans) TA = 'T'; else if (TransA == CblasTrans) TA = 'N'; else if (TransA == CblasConjTrans) { TA = 'N'; if ( N > 0) { if ( incX > 0 ) tincX = incX; else tincX = -incX; n = N*2*(tincX); x++; st=x+n; i = tincX << 1; do { *x = -(*x); x+=i; } while (x != st); x -= n; } } else { cblas_xerbla(3, "cblas_ztrsv","Illegal TransA setting, %d\n", TransA); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } if (Diag == CblasUnit) DI = 'U'; else if (Diag == CblasNonUnit) DI = 'N'; else { cblas_xerbla(4, "cblas_ztrsv","Illegal Diag setting, %d\n", Diag); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif F77_ztrsv( F77_UL, F77_TA, F77_DI, &F77_N, (dcomplex*)A, &F77_lda, (dcomplex*)X, &F77_incX); if (TransA == CblasConjTrans) { if (N > 0) { do { *x = -(*x); x += i; } while (x != st); } } } else cblas_xerbla(1, "cblas_ztrsv", "Illegal Order setting, %d\n", order); CBLAS_CallFromC = 0; RowMajorStrg = 0; return; } #endif blis-0.6.1/frame/compat/check/000077500000000000000000000000001360743507500161135ustar00rootroot00000000000000blis-0.6.1/frame/compat/check/bla_gemm_check.h000066400000000000000000000057701360743507500211750ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef BLIS_ENABLE_BLAS #define bla_gemm_check( dt_str, op_str, transa, transb, m, n, k, lda, ldb, ldc ) \ { \ f77_int info = 0; \ f77_int nota, notb; \ f77_int conja, conjb; \ f77_int ta, tb; \ f77_int nrowa, nrowb; \ \ nota = PASTEF770(lsame)( transa, "N", (ftnlen)1, (ftnlen)1 ); \ notb = PASTEF770(lsame)( transb, "N", (ftnlen)1, (ftnlen)1 ); \ conja = PASTEF770(lsame)( transa, "C", (ftnlen)1, (ftnlen)1 ); \ conjb = PASTEF770(lsame)( transb, "C", (ftnlen)1, (ftnlen)1 ); \ ta = PASTEF770(lsame)( transa, "T", (ftnlen)1, (ftnlen)1 ); \ tb = PASTEF770(lsame)( transb, "T", (ftnlen)1, (ftnlen)1 ); \ \ if ( nota ) { nrowa = *m; } \ else { nrowa = *k; } \ if ( notb ) { nrowb = *k; } \ else { nrowb = *n; } \ \ if ( !nota && !conja && !ta ) \ info = 1; \ else if ( !notb && !conjb && !tb ) \ info = 2; \ else if ( *m < 0 ) \ info = 3; \ else if ( *n < 0 ) \ info = 4; \ else if ( *k < 0 ) \ info = 5; \ else if ( *lda < bli_max( 1, nrowa ) ) \ info = 8; \ else if ( *ldb < bli_max( 1, nrowb ) ) \ info = 10; \ else if ( *ldc < bli_max( 1, *m ) ) \ info = 13; \ \ if ( info != 0 ) \ { \ char func_str[ BLIS_MAX_BLAS_FUNC_STR_LENGTH ]; \ \ sprintf( func_str, "%s%-5s", dt_str, op_str ); \ \ bli_string_mkupper( func_str ); \ \ PASTEF770(xerbla)( func_str, &info, (ftnlen)6 ); \ \ return; \ } \ } #endif blis-0.6.1/frame/compat/check/bla_gemv_check.h000066400000000000000000000047531360743507500212060ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef BLIS_ENABLE_BLAS #define bla_gemv_check( dt_str, op_str, transa, m, n, lda, incx, incy ) \ { \ f77_int info = 0; \ f77_int nota, ta, conja; \ \ nota = PASTEF770(lsame)( transa, "N", (ftnlen)1, (ftnlen)1 ); \ ta = PASTEF770(lsame)( transa, "T", (ftnlen)1, (ftnlen)1 ); \ conja = PASTEF770(lsame)( transa, "C", (ftnlen)1, (ftnlen)1 ); \ \ if ( !nota && !ta && !conja ) \ info = 1; \ else if ( *m < 0 ) \ info = 2; \ else if ( *n < 0 ) \ info = 3; \ else if ( *lda < bli_max( 1, *m ) ) \ info = 6; \ else if ( *incx == 0 ) \ info = 8; \ else if ( *incy == 0 ) \ info = 11; \ \ if ( info != 0 ) \ { \ char func_str[ BLIS_MAX_BLAS_FUNC_STR_LENGTH ]; \ \ sprintf( func_str, "%s%-5s", dt_str, op_str ); \ \ bli_string_mkupper( func_str ); \ \ PASTEF770(xerbla)( func_str, &info, (ftnlen)6 ); \ \ return; \ } \ } #endif blis-0.6.1/frame/compat/check/bla_ger_check.h000066400000000000000000000045021360743507500210150ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef BLIS_ENABLE_BLAS #define bla_ger_check( dt_str, op_str, conj_str, m, n, incx, incy, lda ) \ { \ f77_int info = 0; \ \ if ( *m < 0 ) \ info = 1; \ else if ( *n < 0 ) \ info = 2; \ else if ( *incx == 0 ) \ info = 5; \ else if ( *incy == 0 ) \ info = 7; \ else if ( *lda < bli_max( 1, *m ) ) \ info = 9; \ \ if ( info != 0 ) \ { \ char func_str[ BLIS_MAX_BLAS_FUNC_STR_LENGTH ]; \ \ /* We have to append an extra character to denote whether we are testing geru or gerc. */ \ sprintf( func_str, "%s%s%-2s", dt_str, op_str, conj_str ); \ \ bli_string_mkupper( func_str ); \ \ PASTEF770(xerbla)( func_str, &info, (ftnlen)6 ); \ \ return; \ } \ } #endif blis-0.6.1/frame/compat/check/bla_hemm_check.h000066400000000000000000000053361360743507500211740ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef BLIS_ENABLE_BLAS #define bla_hemm_check( dt_str, op_str, sidea, uploa, m, n, lda, ldb, ldc ) \ { \ f77_int info = 0; \ f77_int left, right; \ f77_int lower, upper; \ f77_int nrowa; \ \ left = PASTEF770(lsame)( sidea, "L", (ftnlen)1, (ftnlen)1 ); \ right = PASTEF770(lsame)( sidea, "R", (ftnlen)1, (ftnlen)1 ); \ lower = PASTEF770(lsame)( uploa, "L", (ftnlen)1, (ftnlen)1 ); \ upper = PASTEF770(lsame)( uploa, "U", (ftnlen)1, (ftnlen)1 ); \ \ if ( left ) { nrowa = *m; } \ else { nrowa = *n; } \ \ if ( !left && !right ) \ info = 1; \ else if ( !lower && !upper ) \ info = 2; \ else if ( *m < 0 ) \ info = 3; \ else if ( *n < 0 ) \ info = 4; \ else if ( *lda < bli_max( 1, nrowa ) ) \ info = 7; \ else if ( *ldb < bli_max( 1, *m ) ) \ info = 9; \ else if ( *ldc < bli_max( 1, *m ) ) \ info = 12; \ \ if ( info != 0 ) \ { \ char func_str[ BLIS_MAX_BLAS_FUNC_STR_LENGTH ]; \ \ sprintf( func_str, "%s%-5s", dt_str, op_str ); \ \ bli_string_mkupper( func_str ); \ \ PASTEF770(xerbla)( func_str, &info, (ftnlen)6 ); \ \ return; \ } \ } #endif blis-0.6.1/frame/compat/check/bla_hemv_check.h000066400000000000000000000045661360743507500212110ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef BLIS_ENABLE_BLAS #define bla_hemv_check( dt_str, op_str, uploa, m, lda, incx, incy ) \ { \ f77_int info = 0; \ f77_int lower, upper; \ \ lower = PASTEF770(lsame)( uploa, "L", (ftnlen)1, (ftnlen)1 ); \ upper = PASTEF770(lsame)( uploa, "U", (ftnlen)1, (ftnlen)1 ); \ \ if ( !lower && !upper ) \ info = 1; \ else if ( *m < 0 ) \ info = 2; \ else if ( *lda < bli_max( 1, *m ) ) \ info = 5; \ else if ( *incx == 0 ) \ info = 7; \ else if ( *incy == 0 ) \ info = 10; \ \ if ( info != 0 ) \ { \ char func_str[ BLIS_MAX_BLAS_FUNC_STR_LENGTH ]; \ \ sprintf( func_str, "%s%-5s", dt_str, op_str ); \ \ bli_string_mkupper( func_str ); \ \ PASTEF770(xerbla)( func_str, &info, (ftnlen)6 ); \ \ return; \ } \ } #endif blis-0.6.1/frame/compat/check/bla_her2_check.h000066400000000000000000000045651360743507500211110ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef BLIS_ENABLE_BLAS #define bla_her2_check( dt_str, op_str, uploa, m, incx, incy, lda ) \ { \ f77_int info = 0; \ f77_int lower, upper; \ \ lower = PASTEF770(lsame)( uploa, "L", (ftnlen)1, (ftnlen)1 ); \ upper = PASTEF770(lsame)( uploa, "U", (ftnlen)1, (ftnlen)1 ); \ \ if ( !lower && !upper ) \ info = 1; \ else if ( *m < 0 ) \ info = 2; \ else if ( *incx == 0 ) \ info = 5; \ else if ( *incy == 0 ) \ info = 7; \ else if ( *lda < bli_max( 1, *m ) ) \ info = 9; \ \ if ( info != 0 ) \ { \ char func_str[ BLIS_MAX_BLAS_FUNC_STR_LENGTH ]; \ \ sprintf( func_str, "%s%-5s", dt_str, op_str ); \ \ bli_string_mkupper( func_str ); \ \ PASTEF770(xerbla)( func_str, &info, (ftnlen)6 ); \ \ return; \ } \ } #endif blis-0.6.1/frame/compat/check/bla_her2k_check.h000066400000000000000000000053371360743507500212620ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef BLIS_ENABLE_BLAS #define bla_her2k_check( dt_str, op_str, uploa, trans, m, k, lda, ldb, ldc ) \ { \ f77_int info = 0; \ f77_int nota, conja; \ f77_int lower, upper; \ f77_int nrowa; \ \ nota = PASTEF770(lsame)( trans, "N", (ftnlen)1, (ftnlen)1 ); \ conja = PASTEF770(lsame)( trans, "C", (ftnlen)1, (ftnlen)1 ); \ lower = PASTEF770(lsame)( uploa, "L", (ftnlen)1, (ftnlen)1 ); \ upper = PASTEF770(lsame)( uploa, "U", (ftnlen)1, (ftnlen)1 ); \ \ if ( nota ) { nrowa = *m; } \ else { nrowa = *k; } \ \ if ( !lower && !upper ) \ info = 1; \ else if ( !nota && !conja ) \ info = 2; \ else if ( *m < 0 ) \ info = 3; \ else if ( *k < 0 ) \ info = 4; \ else if ( *lda < bli_max( 1, nrowa ) ) \ info = 7; \ else if ( *ldb < bli_max( 1, nrowa ) ) \ info = 9; \ else if ( *ldc < bli_max( 1, *m ) ) \ info = 12; \ \ if ( info != 0 ) \ { \ char func_str[ BLIS_MAX_BLAS_FUNC_STR_LENGTH ]; \ \ sprintf( func_str, "%s%-5s", dt_str, op_str ); \ \ bli_string_mkupper( func_str ); \ \ PASTEF770(xerbla)( func_str, &info, (ftnlen)6 ); \ \ return; \ } \ } #endif blis-0.6.1/frame/compat/check/bla_her_check.h000066400000000000000000000045061360743507500210220ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef BLIS_ENABLE_BLAS #define bla_her_check( dt_str, op_str, uploa, m, incx, lda ) \ { \ f77_int info = 0; \ f77_int lower, upper; \ \ lower = PASTEF770(lsame)( uploa, "L", (ftnlen)1, (ftnlen)1 ); \ upper = PASTEF770(lsame)( uploa, "U", (ftnlen)1, (ftnlen)1 ); \ \ if ( !lower && !upper ) \ info = 1; \ else if ( *m < 0 ) \ info = 2; \ else if ( *incx == 0 ) \ info = 5; \ else if ( *lda < bli_max( 1, *m ) ) \ info = 7; \ \ if ( info != 0 ) \ { \ char func_str[ BLIS_MAX_BLAS_FUNC_STR_LENGTH ]; \ \ sprintf( func_str, "%s%-5s", dt_str, op_str ); \ \ bli_string_mkupper( func_str ); \ \ PASTEF770(xerbla)( func_str, &info, (ftnlen)6 ); \ \ return; \ } \ } #endif blis-0.6.1/frame/compat/check/bla_herk_check.h000066400000000000000000000052461360743507500211770ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef BLIS_ENABLE_BLAS #define bla_herk_check( dt_str, op_str, uploa, transa, m, k, lda, ldc ) \ { \ f77_int info = 0; \ f77_int nota, conja; \ f77_int lower, upper; \ f77_int nrowa; \ \ nota = PASTEF770(lsame)( transa, "N", (ftnlen)1, (ftnlen)1 ); \ conja = PASTEF770(lsame)( transa, "C", (ftnlen)1, (ftnlen)1 ); \ lower = PASTEF770(lsame)( uploc, "L", (ftnlen)1, (ftnlen)1 ); \ upper = PASTEF770(lsame)( uploc, "U", (ftnlen)1, (ftnlen)1 ); \ \ if ( nota ) { nrowa = *m; } \ else { nrowa = *k; } \ \ if ( !lower && !upper ) \ info = 1; \ else if ( !nota && !conja ) \ info = 2; \ else if ( *m < 0 ) \ info = 3; \ else if ( *k < 0 ) \ info = 4; \ else if ( *lda < bli_max( 1, nrowa ) ) \ info = 7; \ else if ( *ldc < bli_max( 1, *m ) ) \ info = 10; \ \ if ( info != 0 ) \ { \ char func_str[ BLIS_MAX_BLAS_FUNC_STR_LENGTH ]; \ \ sprintf( func_str, "%s%-5s", dt_str, op_str ); \ \ bli_string_mkupper( func_str ); \ \ PASTEF770(xerbla)( func_str, &info, (ftnlen)6 ); \ \ return; \ } \ } #endif blis-0.6.1/frame/compat/check/bla_symm_check.h000066400000000000000000000033201360743507500212220ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef BLIS_ENABLE_BLAS #define bla_symm_check bla_hemm_check #endif blis-0.6.1/frame/compat/check/bla_symv_check.h000066400000000000000000000033201360743507500212330ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef BLIS_ENABLE_BLAS #define bla_symv_check bla_hemv_check #endif blis-0.6.1/frame/compat/check/bla_syr2_check.h000066400000000000000000000033201360743507500211340ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef BLIS_ENABLE_BLAS #define bla_syr2_check bla_her2_check #endif blis-0.6.1/frame/compat/check/bla_syr2k_check.h000066400000000000000000000056351360743507500213220ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef BLIS_ENABLE_BLAS #define bla_syr2k_check( dt_str, op_str, uploa, trans, m, k, lda, ldb, ldc ) \ { \ f77_int info = 0; \ f77_int is_r; \ f77_int nota, ta, cta; \ f77_int lower, upper; \ f77_int nrowa; \ \ static char* dt_cst = dt_str; \ \ is_r = ( dt_cst[0] == 's' || dt_cst[0] == 'd' ); \ nota = PASTEF770(lsame)( trans, "N", (ftnlen)1, (ftnlen)1 ); \ ta = PASTEF770(lsame)( trans, "T", (ftnlen)1, (ftnlen)1 ); \ cta = PASTEF770(lsame)( trans, "C", (ftnlen)1, (ftnlen)1 ); \ lower = PASTEF770(lsame)( uploa, "L", (ftnlen)1, (ftnlen)1 ); \ upper = PASTEF770(lsame)( uploa, "U", (ftnlen)1, (ftnlen)1 ); \ \ if ( nota ) { nrowa = *m; } \ else { nrowa = *k; } \ \ if ( !lower && !upper ) \ info = 1; \ else if ( !nota && !ta && (is_r ? !cta : 1) ) \ info = 2; \ else if ( *m < 0 ) \ info = 3; \ else if ( *k < 0 ) \ info = 4; \ else if ( *lda < bli_max( 1, nrowa ) ) \ info = 7; \ else if ( *ldb < bli_max( 1, nrowa ) ) \ info = 9; \ else if ( *ldc < bli_max( 1, *m ) ) \ info = 12; \ \ if ( info != 0 ) \ { \ char func_str[ BLIS_MAX_BLAS_FUNC_STR_LENGTH ]; \ \ sprintf( func_str, "%s%-5s", dt_str, op_str ); \ \ bli_string_mkupper( func_str ); \ \ PASTEF770(xerbla)( func_str, &info, (ftnlen)6 ); \ \ return; \ } \ } #endif blis-0.6.1/frame/compat/check/bla_syr_check.h000066400000000000000000000033161360743507500210570ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef BLIS_ENABLE_BLAS #define bla_syr_check bla_her_check #endif blis-0.6.1/frame/compat/check/bla_syrk_check.h000066400000000000000000000055451360743507500212400ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef BLIS_ENABLE_BLAS #define bla_syrk_check( dt_str, op_str, uploa, transa, m, k, lda, ldc ) \ { \ f77_int info = 0; \ f77_int is_r; \ f77_int nota, ta, cta; \ f77_int lower, upper; \ f77_int nrowa; \ \ static char* dt_cst = dt_str; \ \ is_r = ( dt_cst[0] == 's' || dt_cst[0] == 'd' ); \ nota = PASTEF770(lsame)( transa, "N", (ftnlen)1, (ftnlen)1 ); \ ta = PASTEF770(lsame)( transa, "T", (ftnlen)1, (ftnlen)1 ); \ cta = PASTEF770(lsame)( transa, "C", (ftnlen)1, (ftnlen)1 ); \ lower = PASTEF770(lsame)( uploc, "L", (ftnlen)1, (ftnlen)1 ); \ upper = PASTEF770(lsame)( uploc, "U", (ftnlen)1, (ftnlen)1 ); \ \ if ( nota ) { nrowa = *m; } \ else { nrowa = *k; } \ \ if ( !lower && !upper ) \ info = 1; \ else if ( !nota && !ta && (is_r ? !cta : 1) ) \ info = 2; \ else if ( *m < 0 ) \ info = 3; \ else if ( *k < 0 ) \ info = 4; \ else if ( *lda < bli_max( 1, nrowa ) ) \ info = 7; \ else if ( *ldc < bli_max( 1, *m ) ) \ info = 10; \ \ if ( info != 0 ) \ { \ char func_str[ BLIS_MAX_BLAS_FUNC_STR_LENGTH ]; \ \ sprintf( func_str, "%s%-5s", dt_str, op_str ); \ \ bli_string_mkupper( func_str ); \ \ PASTEF770(xerbla)( func_str, &info, (ftnlen)6 ); \ \ return; \ } \ } #endif blis-0.6.1/frame/compat/check/bla_trmm_check.h000066400000000000000000000062251360743507500212230ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef BLIS_ENABLE_BLAS #define bla_trmm_check( dt_str, op_str, sidea, uploa, transa, diaga, m, n, lda, ldb ) \ { \ f77_int info = 0; \ f77_int left, right; \ f77_int lower, upper; \ f77_int nota, ta, conja; \ f77_int unita, nonua; \ f77_int nrowa; \ \ left = PASTEF770(lsame)( sidea, "L", (ftnlen)1, (ftnlen)1 ); \ right = PASTEF770(lsame)( sidea, "R", (ftnlen)1, (ftnlen)1 ); \ lower = PASTEF770(lsame)( uploa, "L", (ftnlen)1, (ftnlen)1 ); \ upper = PASTEF770(lsame)( uploa, "U", (ftnlen)1, (ftnlen)1 ); \ nota = PASTEF770(lsame)( transa, "N", (ftnlen)1, (ftnlen)1 ); \ ta = PASTEF770(lsame)( transa, "T", (ftnlen)1, (ftnlen)1 ); \ conja = PASTEF770(lsame)( transa, "C", (ftnlen)1, (ftnlen)1 ); \ unita = PASTEF770(lsame)( diaga, "U", (ftnlen)1, (ftnlen)1 ); \ nonua = PASTEF770(lsame)( diaga, "N", (ftnlen)1, (ftnlen)1 ); \ \ if ( left ) { nrowa = *m; } \ else { nrowa = *n; } \ \ if ( !left && !right ) \ info = 1; \ else if ( !lower && !upper ) \ info = 2; \ else if ( !nota && !ta && !conja ) \ info = 3; \ else if ( !unita && !nonua ) \ info = 4; \ else if ( *m < 0 ) \ info = 5; \ else if ( *n < 0 ) \ info = 6; \ else if ( *lda < bli_max( 1, nrowa ) ) \ info = 9; \ else if ( *ldb < bli_max( 1, *m ) ) \ info = 11; \ \ if ( info != 0 ) \ { \ char func_str[ BLIS_MAX_BLAS_FUNC_STR_LENGTH ]; \ \ sprintf( func_str, "%s%-5s", dt_str, op_str ); \ \ bli_string_mkupper( func_str ); \ \ PASTEF770(xerbla)( func_str, &info, (ftnlen)6 ); \ \ return; \ } \ } #endif blis-0.6.1/frame/compat/check/bla_trmv_check.h000066400000000000000000000054711360743507500212360ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef BLIS_ENABLE_BLAS #define bla_trmv_check( dt_str, op_str, uploa, transa, diaga, m, lda, incx ) \ { \ f77_int info = 0; \ f77_int lower, upper; \ f77_int nota, ta, conja; \ f77_int unita, nonua; \ \ lower = PASTEF770(lsame)( uploa, "L", (ftnlen)1, (ftnlen)1 ); \ upper = PASTEF770(lsame)( uploa, "U", (ftnlen)1, (ftnlen)1 ); \ nota = PASTEF770(lsame)( transa, "N", (ftnlen)1, (ftnlen)1 ); \ ta = PASTEF770(lsame)( transa, "T", (ftnlen)1, (ftnlen)1 ); \ conja = PASTEF770(lsame)( transa, "C", (ftnlen)1, (ftnlen)1 ); \ unita = PASTEF770(lsame)( diaga, "U", (ftnlen)1, (ftnlen)1 ); \ nonua = PASTEF770(lsame)( diaga, "N", (ftnlen)1, (ftnlen)1 ); \ \ if ( !lower && !upper ) \ info = 1; \ else if ( !nota && !ta && !conja ) \ info = 2; \ else if ( !unita && !nonua ) \ info = 3; \ else if ( *m < 0 ) \ info = 4; \ else if ( *lda < bli_max( 1, *m ) ) \ info = 6; \ else if ( *incx == 0 ) \ info = 8; \ \ if ( info != 0 ) \ { \ char func_str[ BLIS_MAX_BLAS_FUNC_STR_LENGTH ]; \ \ sprintf( func_str, "%s%-5s", dt_str, op_str ); \ \ bli_string_mkupper( func_str ); \ \ PASTEF770(xerbla)( func_str, &info, (ftnlen)6 ); \ \ return; \ } \ } #endif blis-0.6.1/frame/compat/check/bla_trsm_check.h000066400000000000000000000033201360743507500212220ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef BLIS_ENABLE_BLAS #define bla_trsm_check bla_trmm_check #endif blis-0.6.1/frame/compat/check/bla_trsv_check.h000066400000000000000000000033201360743507500212330ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef BLIS_ENABLE_BLAS #define bla_trsv_check bla_trmv_check #endif blis-0.6.1/frame/compat/f2c/000077500000000000000000000000001360743507500155105ustar00rootroot00000000000000blis-0.6.1/frame/compat/f2c/bla_cabs1.c000066400000000000000000000044531360743507500174710ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #ifdef BLIS_ENABLE_BLAS /* scabs1.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ /* Subroutine */ bla_real PASTEF77(s,cabs1)(bla_scomplex *z) { return bli_fabs( bli_creal( *z ) ) + bli_fabs( bli_cimag( *z ) ); } /* scabs1_ */ /* dcabs1.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ /* Subroutine */ bla_double PASTEF77(d,cabs1)(bla_dcomplex *z) { return bli_fabs( bli_zreal( *z ) ) + bli_fabs( bli_zimag( *z ) ); } /* dcabs1_ */ #endif blis-0.6.1/frame/compat/f2c/bla_cabs1.h000066400000000000000000000034521360743507500174740ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef BLIS_ENABLE_BLAS BLIS_EXPORT_BLAS bla_real PASTEF77(s,cabs1)(bla_scomplex *z); BLIS_EXPORT_BLAS bla_double PASTEF77(d,cabs1)(bla_dcomplex *z); #endif blis-0.6.1/frame/compat/f2c/bla_gbmv.c000066400000000000000000001450771360743507500174430ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #ifdef BLIS_ENABLE_BLAS /* cgbmv.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ /* Subroutine */ int PASTEF77(c,gbmv)(const bla_character *trans, const bla_integer *m, const bla_integer *n, const bla_integer *kl, const bla_integer *ku, const bla_scomplex *alpha, const bla_scomplex *a, const bla_integer *lda, const bla_scomplex *x, const bla_integer *incx, const bla_scomplex *beta, bla_scomplex *y, const bla_integer *incy) { /* System generated locals */ bla_integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; bla_scomplex q__1, q__2, q__3; /* Builtin functions */ //void bla_r_cnjg(bla_scomplex *, bla_scomplex *); /* Local variables */ bla_integer info; bla_scomplex temp; bla_integer lenx, leny, i__, j, k; //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer ix, iy, jx, jy, kx, ky; //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); bla_logical noconj; bla_integer kup1; /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* CGBMV performs one of the matrix-vector operations */ /* y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, or */ /* y := alpha*conjg( A' )*x + beta*y, */ /* where alpha and beta are scalars, x and y are vectors and A is an */ /* m by n band matrix, with kl sub-diagonals and ku super-diagonals. */ /* Parameters */ /* ========== */ /* TRANS - CHARACTER*1. */ /* On entry, TRANS specifies the operation to be performed as */ /* follows: */ /* TRANS = 'N' or 'n' y := alpha*A*x + beta*y. */ /* TRANS = 'T' or 't' y := alpha*A'*x + beta*y. */ /* TRANS = 'C' or 'c' y := alpha*conjg( A' )*x + beta*y. */ /* Unchanged on exit. */ /* M - INTEGER. */ /* On entry, M specifies the number of rows of the matrix A. */ /* M must be at least zero. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the number of columns of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* KL - INTEGER. */ /* On entry, KL specifies the number of sub-diagonals of the */ /* matrix A. KL must satisfy 0 .le. KL. */ /* Unchanged on exit. */ /* KU - INTEGER. */ /* On entry, KU specifies the number of super-diagonals of the */ /* matrix A. KU must satisfy 0 .le. KU. */ /* Unchanged on exit. */ /* ALPHA - COMPLEX . */ /* On entry, ALPHA specifies the scalar alpha. */ /* Unchanged on exit. */ /* A - COMPLEX array of DIMENSION ( LDA, n ). */ /* Before entry, the leading ( kl + ku + 1 ) by n part of the */ /* array A must contain the matrix of coefficients, supplied */ /* column by column, with the leading diagonal of the matrix in */ /* row ( ku + 1 ) of the array, the first super-diagonal */ /* starting at position 2 in row ku, the first sub-diagonal */ /* starting at position 1 in row ( ku + 2 ), and so on. */ /* Elements in the array A that do not correspond to elements */ /* in the band matrix (such as the top left ku by ku triangle) */ /* are not referenced. */ /* The following program segment will transfer a band matrix */ /* from conventional full matrix storage to band storage: */ /* DO 20, J = 1, N */ /* K = KU + 1 - J */ /* DO 10, I = MAX( 1, J - KU ), MIN( M, J + KL ) */ /* A( K + I, J ) = matrix( I, J ) */ /* 10 CONTINUE */ /* 20 CONTINUE */ /* Unchanged on exit. */ /* LDA - INTEGER. */ /* On entry, LDA specifies the first dimension of A as declared */ /* in the calling (sub) program. LDA must be at least */ /* ( kl + ku + 1 ). */ /* Unchanged on exit. */ /* X - COMPLEX array of DIMENSION at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' */ /* and at least */ /* ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. */ /* Before entry, the incremented array X must contain the */ /* vector x. */ /* Unchanged on exit. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* BETA - COMPLEX . */ /* On entry, BETA specifies the scalar beta. When BETA is */ /* supplied as zero then Y need not be set on input. */ /* Unchanged on exit. */ /* Y - COMPLEX array of DIMENSION at least */ /* ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' */ /* and at least */ /* ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. */ /* Before entry, the incremented array Y must contain the */ /* vector y. On exit, Y is overwritten by the updated vector y. */ /* INCY - INTEGER. */ /* On entry, INCY specifies the increment for the elements of */ /* Y. INCY must not be zero. */ /* Unchanged on exit. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; --x; --y; /* Function Body */ info = 0; if (! PASTEF770(lsame)(trans, "N", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(trans, "T", ( ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(trans, "C", (ftnlen)1, (ftnlen)1) ) { info = 1; } else if (*m < 0) { info = 2; } else if (*n < 0) { info = 3; } else if (*kl < 0) { info = 4; } else if (*ku < 0) { info = 5; } else if (*lda < *kl + *ku + 1) { info = 8; } else if (*incx == 0) { info = 10; } else if (*incy == 0) { info = 13; } if (info != 0) { PASTEF770(xerbla)("CGBMV ", &info, (ftnlen)6); return 0; } /* Quick return if possible. */ if (*m == 0 || *n == 0 || (bli_creal(*alpha) == 0.f && bli_cimag(*alpha) == 0.f && (bli_creal(*beta) == 1.f && bli_cimag(*beta) == 0.f))) { return 0; } noconj = PASTEF770(lsame)(trans, "T", (ftnlen)1, (ftnlen)1); /* Set LENX and LENY, the lengths of the vectors x and y, and set */ /* up the start points in X and Y. */ if (PASTEF770(lsame)(trans, "N", (ftnlen)1, (ftnlen)1)) { lenx = *n; leny = *m; } else { lenx = *m; leny = *n; } if (*incx > 0) { kx = 1; } else { kx = 1 - (lenx - 1) * *incx; } if (*incy > 0) { ky = 1; } else { ky = 1 - (leny - 1) * *incy; } /* Start the operations. In this version the elements of A are */ /* accessed sequentially with one pass through the band part of A. */ /* First form y := beta*y. */ if (bli_creal(*beta) != 1.f || bli_cimag(*beta) != 0.f) { if (*incy == 1) { if (bli_creal(*beta) == 0.f && bli_cimag(*beta) == 0.f) { i__1 = leny; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__; bli_csets( (0.f), (0.f), y[i__2] ); /* L10: */ } } else { i__1 = leny; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__; i__3 = i__; bli_csets( (bli_creal(*beta) * bli_creal(y[i__3]) - bli_cimag(*beta) * bli_cimag(y[i__3])), (bli_creal(*beta) * bli_cimag(y[i__3]) + bli_cimag(*beta) * bli_creal(y[i__3])), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__2] ); /* L20: */ } } } else { iy = ky; if (bli_creal(*beta) == 0.f && bli_cimag(*beta) == 0.f) { i__1 = leny; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = iy; bli_csets( (0.f), (0.f), y[i__2] ); iy += *incy; /* L30: */ } } else { i__1 = leny; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = iy; i__3 = iy; bli_csets( (bli_creal(*beta) * bli_creal(y[i__3]) - bli_cimag(*beta) * bli_cimag(y[i__3])), (bli_creal(*beta) * bli_cimag(y[i__3]) + bli_cimag(*beta) * bli_creal(y[i__3])), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__2] ); iy += *incy; /* L40: */ } } } } if (bli_creal(*alpha) == 0.f && bli_cimag(*alpha) == 0.f) { return 0; } kup1 = *ku + 1; if (PASTEF770(lsame)(trans, "N", (ftnlen)1, (ftnlen)1)) { /* Form y := alpha*A*x + y. */ jx = kx; if (*incy == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = jx; if (bli_creal(x[i__2]) != 0.f || bli_cimag(x[i__2]) != 0.f) { i__2 = jx; bli_csets( (bli_creal(*alpha) * bli_creal(x[i__2]) - bli_cimag(*alpha) * bli_cimag(x[i__2])), (bli_creal(*alpha) * bli_cimag(x[i__2]) + bli_cimag(*alpha) * bli_creal(x[i__2])), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); k = kup1 - j; /* Computing MAX */ i__2 = 1, i__3 = j - *ku; /* Computing MIN */ i__5 = *m, i__6 = j + *kl; i__4 = f2c_min(i__5,i__6); for (i__ = f2c_max(i__2,i__3); i__ <= i__4; ++i__) { i__2 = i__; i__3 = i__; i__5 = k + i__ + j * a_dim1; bli_csets( (bli_creal(temp) * bli_creal(a[i__5]) - bli_cimag(temp) * bli_cimag(a[i__5])), (bli_creal(temp) * bli_cimag(a[i__5]) + bli_cimag(temp) * bli_creal(a[i__5])), q__2 ); bli_csets( (bli_creal(y[i__3]) + bli_creal(q__2)), (bli_cimag(y[i__3]) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__2] ); /* L50: */ } } jx += *incx; /* L60: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__4 = jx; if (bli_creal(x[i__4]) != 0.f || bli_cimag(x[i__4]) != 0.f) { i__4 = jx; bli_csets( (bli_creal(*alpha) * bli_creal(x[i__4]) - bli_cimag(*alpha) * bli_cimag(x[i__4])), (bli_creal(*alpha) * bli_cimag(x[i__4]) + bli_cimag(*alpha) * bli_creal(x[i__4])), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); iy = ky; k = kup1 - j; /* Computing MAX */ i__4 = 1, i__2 = j - *ku; /* Computing MIN */ i__5 = *m, i__6 = j + *kl; i__3 = f2c_min(i__5,i__6); for (i__ = f2c_max(i__4,i__2); i__ <= i__3; ++i__) { i__4 = iy; i__2 = iy; i__5 = k + i__ + j * a_dim1; bli_csets( (bli_creal(temp) * bli_creal(a[i__5]) - bli_cimag(temp) * bli_cimag(a[i__5])), (bli_creal(temp) * bli_cimag(a[i__5]) + bli_cimag(temp) * bli_creal(a[i__5])), q__2 ); bli_csets( (bli_creal(y[i__2]) + bli_creal(q__2)), (bli_cimag(y[i__2]) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__4] ); iy += *incy; /* L70: */ } } jx += *incx; if (j > *ku) { ky += *incy; } /* L80: */ } } } else { /* Form y := alpha*A'*x + y or y := alpha*conjg( A' )*x + y. */ jy = ky; if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { bli_csets( (0.f), (0.f), temp ); k = kup1 - j; if (noconj) { /* Computing MAX */ i__3 = 1, i__4 = j - *ku; /* Computing MIN */ i__5 = *m, i__6 = j + *kl; i__2 = f2c_min(i__5,i__6); for (i__ = f2c_max(i__3,i__4); i__ <= i__2; ++i__) { i__3 = k + i__ + j * a_dim1; i__4 = i__; bli_csets( (bli_creal(a[i__3]) * bli_creal(x[i__4]) - bli_cimag(a[i__3]) * bli_cimag(x[i__4])), (bli_creal(a[i__3]) * bli_cimag(x[i__4]) + bli_cimag(a[i__3]) * bli_creal(x[i__4])), q__2 ); bli_csets( (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); /* L90: */ } } else { /* Computing MAX */ i__2 = 1, i__3 = j - *ku; /* Computing MIN */ i__5 = *m, i__6 = j + *kl; i__4 = f2c_min(i__5,i__6); for (i__ = f2c_max(i__2,i__3); i__ <= i__4; ++i__) { bla_r_cnjg(&q__3, &a[k + i__ + j * a_dim1]); i__2 = i__; bli_csets( (bli_creal(q__3) * bli_creal(x[i__2]) - bli_cimag(q__3) * bli_cimag(x[i__2])), (bli_creal(q__3) * bli_cimag(x[i__2]) + bli_cimag(q__3) * bli_creal(x[i__2])), q__2 ); bli_csets( (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); /* L100: */ } } i__4 = jy; i__2 = jy; bli_csets( (bli_creal(*alpha) * bli_creal(temp) - bli_cimag(*alpha) * bli_cimag(temp)), (bli_creal(*alpha) * bli_cimag(temp) + bli_cimag(*alpha) * bli_creal(temp)), q__2 ); bli_csets( (bli_creal(y[i__2]) + bli_creal(q__2)), (bli_cimag(y[i__2]) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__4] ); jy += *incy; /* L110: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { bli_csets( (0.f), (0.f), temp ); ix = kx; k = kup1 - j; if (noconj) { /* Computing MAX */ i__4 = 1, i__2 = j - *ku; /* Computing MIN */ i__5 = *m, i__6 = j + *kl; i__3 = f2c_min(i__5,i__6); for (i__ = f2c_max(i__4,i__2); i__ <= i__3; ++i__) { i__4 = k + i__ + j * a_dim1; i__2 = ix; bli_csets( (bli_creal(a[i__4]) * bli_creal(x[i__2]) - bli_cimag(a[i__4]) * bli_cimag(x[i__2])), (bli_creal(a[i__4]) * bli_cimag(x[i__2]) + bli_cimag(a[i__4]) * bli_creal(x[i__2])), q__2 ); bli_csets( (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); ix += *incx; /* L120: */ } } else { /* Computing MAX */ i__3 = 1, i__4 = j - *ku; /* Computing MIN */ i__5 = *m, i__6 = j + *kl; i__2 = f2c_min(i__5,i__6); for (i__ = f2c_max(i__3,i__4); i__ <= i__2; ++i__) { bla_r_cnjg(&q__3, &a[k + i__ + j * a_dim1]); i__3 = ix; bli_csets( (bli_creal(q__3) * bli_creal(x[i__3]) - bli_cimag(q__3) * bli_cimag(x[i__3])), (bli_creal(q__3) * bli_cimag(x[i__3]) + bli_cimag(q__3) * bli_creal(x[i__3])), q__2 ); bli_csets( (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); ix += *incx; /* L130: */ } } i__2 = jy; i__3 = jy; bli_csets( (bli_creal(*alpha) * bli_creal(temp) - bli_cimag(*alpha) * bli_cimag(temp)), (bli_creal(*alpha) * bli_cimag(temp) + bli_cimag(*alpha) * bli_creal(temp)), q__2 ); bli_csets( (bli_creal(y[i__3]) + bli_creal(q__2)), (bli_cimag(y[i__3]) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__2] ); jy += *incy; if (j > *ku) { kx += *incx; } /* L140: */ } } } return 0; /* End of CGBMV . */ } /* cgbmv_ */ /* dgbmv.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ /* Subroutine */ int PASTEF77(d,gbmv)(const bla_character *trans, const bla_integer *m, const bla_integer *n, const bla_integer *kl, const bla_integer *ku, const bla_double *alpha, const bla_double *a, const bla_integer *lda, const bla_double *x, const bla_integer *incx, const bla_double *beta, bla_double *y, const bla_integer *incy) { /* System generated locals */ bla_integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; /* Local variables */ bla_integer info; bla_double temp; bla_integer lenx, leny, i__, j, k; //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer ix, iy, jx, jy, kx, ky; //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); bla_integer kup1; /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGBMV performs one of the matrix-vector operations */ /* y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, */ /* where alpha and beta are scalars, x and y are vectors and A is an */ /* m by n band matrix, with kl sub-diagonals and ku super-diagonals. */ /* Parameters */ /* ========== */ /* TRANS - CHARACTER*1. */ /* On entry, TRANS specifies the operation to be performed as */ /* follows: */ /* TRANS = 'N' or 'n' y := alpha*A*x + beta*y. */ /* TRANS = 'T' or 't' y := alpha*A'*x + beta*y. */ /* TRANS = 'C' or 'c' y := alpha*A'*x + beta*y. */ /* Unchanged on exit. */ /* M - INTEGER. */ /* On entry, M specifies the number of rows of the matrix A. */ /* M must be at least zero. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the number of columns of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* KL - INTEGER. */ /* On entry, KL specifies the number of sub-diagonals of the */ /* matrix A. KL must satisfy 0 .le. KL. */ /* Unchanged on exit. */ /* KU - INTEGER. */ /* On entry, KU specifies the number of super-diagonals of the */ /* matrix A. KU must satisfy 0 .le. KU. */ /* Unchanged on exit. */ /* ALPHA - DOUBLE PRECISION. */ /* On entry, ALPHA specifies the scalar alpha. */ /* Unchanged on exit. */ /* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */ /* Before entry, the leading ( kl + ku + 1 ) by n part of the */ /* array A must contain the matrix of coefficients, supplied */ /* column by column, with the leading diagonal of the matrix in */ /* row ( ku + 1 ) of the array, the first super-diagonal */ /* starting at position 2 in row ku, the first sub-diagonal */ /* starting at position 1 in row ( ku + 2 ), and so on. */ /* Elements in the array A that do not correspond to elements */ /* in the band matrix (such as the top left ku by ku triangle) */ /* are not referenced. */ /* The following program segment will transfer a band matrix */ /* from conventional full matrix storage to band storage: */ /* DO 20, J = 1, N */ /* K = KU + 1 - J */ /* DO 10, I = MAX( 1, J - KU ), MIN( M, J + KL ) */ /* A( K + I, J ) = matrix( I, J ) */ /* 10 CONTINUE */ /* 20 CONTINUE */ /* Unchanged on exit. */ /* LDA - INTEGER. */ /* On entry, LDA specifies the first dimension of A as declared */ /* in the calling (sub) program. LDA must be at least */ /* ( kl + ku + 1 ). */ /* Unchanged on exit. */ /* X - DOUBLE PRECISION array of DIMENSION at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' */ /* and at least */ /* ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. */ /* Before entry, the incremented array X must contain the */ /* vector x. */ /* Unchanged on exit. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* BETA - DOUBLE PRECISION. */ /* On entry, BETA specifies the scalar beta. When BETA is */ /* supplied as zero then Y need not be set on input. */ /* Unchanged on exit. */ /* Y - DOUBLE PRECISION array of DIMENSION at least */ /* ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' */ /* and at least */ /* ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. */ /* Before entry, the incremented array Y must contain the */ /* vector y. On exit, Y is overwritten by the updated vector y. */ /* INCY - INTEGER. */ /* On entry, INCY specifies the increment for the elements of */ /* Y. INCY must not be zero. */ /* Unchanged on exit. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; --x; --y; /* Function Body */ info = 0; if (! PASTEF770(lsame)(trans, "N", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(trans, "T", ( ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(trans, "C", (ftnlen)1, (ftnlen)1) ) { info = 1; } else if (*m < 0) { info = 2; } else if (*n < 0) { info = 3; } else if (*kl < 0) { info = 4; } else if (*ku < 0) { info = 5; } else if (*lda < *kl + *ku + 1) { info = 8; } else if (*incx == 0) { info = 10; } else if (*incy == 0) { info = 13; } if (info != 0) { PASTEF770(xerbla)("DGBMV ", &info, (ftnlen)6); return 0; } /* Quick return if possible. */ if (*m == 0 || *n == 0 || (*alpha == 0. && *beta == 1.)) { return 0; } /* Set LENX and LENY, the lengths of the vectors x and y, and set */ /* up the start points in X and Y. */ if (PASTEF770(lsame)(trans, "N", (ftnlen)1, (ftnlen)1)) { lenx = *n; leny = *m; } else { lenx = *m; leny = *n; } if (*incx > 0) { kx = 1; } else { kx = 1 - (lenx - 1) * *incx; } if (*incy > 0) { ky = 1; } else { ky = 1 - (leny - 1) * *incy; } /* Start the operations. In this version the elements of A are */ /* accessed sequentially with one pass through the band part of A. */ /* First form y := beta*y. */ if (*beta != 1.) { if (*incy == 1) { if (*beta == 0.) { i__1 = leny; for (i__ = 1; i__ <= i__1; ++i__) { y[i__] = 0.; /* L10: */ } } else { i__1 = leny; for (i__ = 1; i__ <= i__1; ++i__) { y[i__] = *beta * y[i__]; /* L20: */ } } } else { iy = ky; if (*beta == 0.) { i__1 = leny; for (i__ = 1; i__ <= i__1; ++i__) { y[iy] = 0.; iy += *incy; /* L30: */ } } else { i__1 = leny; for (i__ = 1; i__ <= i__1; ++i__) { y[iy] = *beta * y[iy]; iy += *incy; /* L40: */ } } } } if (*alpha == 0.) { return 0; } kup1 = *ku + 1; if (PASTEF770(lsame)(trans, "N", (ftnlen)1, (ftnlen)1)) { /* Form y := alpha*A*x + y. */ jx = kx; if (*incy == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[jx] != 0.) { temp = *alpha * x[jx]; k = kup1 - j; /* Computing MAX */ i__2 = 1, i__3 = j - *ku; /* Computing MIN */ i__5 = *m, i__6 = j + *kl; i__4 = f2c_min(i__5,i__6); for (i__ = f2c_max(i__2,i__3); i__ <= i__4; ++i__) { y[i__] += temp * a[k + i__ + j * a_dim1]; /* L50: */ } } jx += *incx; /* L60: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[jx] != 0.) { temp = *alpha * x[jx]; iy = ky; k = kup1 - j; /* Computing MAX */ i__4 = 1, i__2 = j - *ku; /* Computing MIN */ i__5 = *m, i__6 = j + *kl; i__3 = f2c_min(i__5,i__6); for (i__ = f2c_max(i__4,i__2); i__ <= i__3; ++i__) { y[iy] += temp * a[k + i__ + j * a_dim1]; iy += *incy; /* L70: */ } } jx += *incx; if (j > *ku) { ky += *incy; } /* L80: */ } } } else { /* Form y := alpha*A'*x + y. */ jy = ky; if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { temp = 0.; k = kup1 - j; /* Computing MAX */ i__3 = 1, i__4 = j - *ku; /* Computing MIN */ i__5 = *m, i__6 = j + *kl; i__2 = f2c_min(i__5,i__6); for (i__ = f2c_max(i__3,i__4); i__ <= i__2; ++i__) { temp += a[k + i__ + j * a_dim1] * x[i__]; /* L90: */ } y[jy] += *alpha * temp; jy += *incy; /* L100: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { temp = 0.; ix = kx; k = kup1 - j; /* Computing MAX */ i__2 = 1, i__3 = j - *ku; /* Computing MIN */ i__5 = *m, i__6 = j + *kl; i__4 = f2c_min(i__5,i__6); for (i__ = f2c_max(i__2,i__3); i__ <= i__4; ++i__) { temp += a[k + i__ + j * a_dim1] * x[ix]; ix += *incx; /* L110: */ } y[jy] += *alpha * temp; jy += *incy; if (j > *ku) { kx += *incx; } /* L120: */ } } } return 0; /* End of DGBMV . */ } /* dgbmv_ */ /* sgbmv.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ /* Subroutine */ int PASTEF77(s,gbmv)(const bla_character *trans, const bla_integer *m, const bla_integer *n, const bla_integer *kl, const bla_integer *ku, const bla_real *alpha, const bla_real *a, const bla_integer *lda, const bla_real *x, const bla_integer * incx, const bla_real *beta, bla_real *y, const bla_integer *incy) { /* System generated locals */ bla_integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; /* Local variables */ bla_integer info; bla_real temp; bla_integer lenx, leny, i__, j, k; //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer ix, iy, jx, jy, kx, ky; //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); bla_integer kup1; /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* SGBMV performs one of the matrix-vector operations */ /* y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, */ /* where alpha and beta are scalars, x and y are vectors and A is an */ /* m by n band matrix, with kl sub-diagonals and ku super-diagonals. */ /* Parameters */ /* ========== */ /* TRANS - CHARACTER*1. */ /* On entry, TRANS specifies the operation to be performed as */ /* follows: */ /* TRANS = 'N' or 'n' y := alpha*A*x + beta*y. */ /* TRANS = 'T' or 't' y := alpha*A'*x + beta*y. */ /* TRANS = 'C' or 'c' y := alpha*A'*x + beta*y. */ /* Unchanged on exit. */ /* M - INTEGER. */ /* On entry, M specifies the number of rows of the matrix A. */ /* M must be at least zero. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the number of columns of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* KL - INTEGER. */ /* On entry, KL specifies the number of sub-diagonals of the */ /* matrix A. KL must satisfy 0 .le. KL. */ /* Unchanged on exit. */ /* KU - INTEGER. */ /* On entry, KU specifies the number of super-diagonals of the */ /* matrix A. KU must satisfy 0 .le. KU. */ /* Unchanged on exit. */ /* ALPHA - REAL . */ /* On entry, ALPHA specifies the scalar alpha. */ /* Unchanged on exit. */ /* A - REAL array of DIMENSION ( LDA, n ). */ /* Before entry, the leading ( kl + ku + 1 ) by n part of the */ /* array A must contain the matrix of coefficients, supplied */ /* column by column, with the leading diagonal of the matrix in */ /* row ( ku + 1 ) of the array, the first super-diagonal */ /* starting at position 2 in row ku, the first sub-diagonal */ /* starting at position 1 in row ( ku + 2 ), and so on. */ /* Elements in the array A that do not correspond to elements */ /* in the band matrix (such as the top left ku by ku triangle) */ /* are not referenced. */ /* The following program segment will transfer a band matrix */ /* from conventional full matrix storage to band storage: */ /* DO 20, J = 1, N */ /* K = KU + 1 - J */ /* DO 10, I = MAX( 1, J - KU ), MIN( M, J + KL ) */ /* A( K + I, J ) = matrix( I, J ) */ /* 10 CONTINUE */ /* 20 CONTINUE */ /* Unchanged on exit. */ /* LDA - INTEGER. */ /* On entry, LDA specifies the first dimension of A as declared */ /* in the calling (sub) program. LDA must be at least */ /* ( kl + ku + 1 ). */ /* Unchanged on exit. */ /* X - REAL array of DIMENSION at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' */ /* and at least */ /* ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. */ /* Before entry, the incremented array X must contain the */ /* vector x. */ /* Unchanged on exit. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* BETA - REAL . */ /* On entry, BETA specifies the scalar beta. When BETA is */ /* supplied as zero then Y need not be set on input. */ /* Unchanged on exit. */ /* Y - REAL array of DIMENSION at least */ /* ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' */ /* and at least */ /* ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. */ /* Before entry, the incremented array Y must contain the */ /* vector y. On exit, Y is overwritten by the updated vector y. */ /* INCY - INTEGER. */ /* On entry, INCY specifies the increment for the elements of */ /* Y. INCY must not be zero. */ /* Unchanged on exit. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; --x; --y; /* Function Body */ info = 0; if (! PASTEF770(lsame)(trans, "N", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(trans, "T", ( ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(trans, "C", (ftnlen)1, (ftnlen)1) ) { info = 1; } else if (*m < 0) { info = 2; } else if (*n < 0) { info = 3; } else if (*kl < 0) { info = 4; } else if (*ku < 0) { info = 5; } else if (*lda < *kl + *ku + 1) { info = 8; } else if (*incx == 0) { info = 10; } else if (*incy == 0) { info = 13; } if (info != 0) { PASTEF770(xerbla)("SGBMV ", &info, (ftnlen)6); return 0; } /* Quick return if possible. */ if (*m == 0 || *n == 0 || (*alpha == 0.f && *beta == 1.f)) { return 0; } /* Set LENX and LENY, the lengths of the vectors x and y, and set */ /* up the start points in X and Y. */ if (PASTEF770(lsame)(trans, "N", (ftnlen)1, (ftnlen)1)) { lenx = *n; leny = *m; } else { lenx = *m; leny = *n; } if (*incx > 0) { kx = 1; } else { kx = 1 - (lenx - 1) * *incx; } if (*incy > 0) { ky = 1; } else { ky = 1 - (leny - 1) * *incy; } /* Start the operations. In this version the elements of A are */ /* accessed sequentially with one pass through the band part of A. */ /* First form y := beta*y. */ if (*beta != 1.f) { if (*incy == 1) { if (*beta == 0.f) { i__1 = leny; for (i__ = 1; i__ <= i__1; ++i__) { y[i__] = 0.f; /* L10: */ } } else { i__1 = leny; for (i__ = 1; i__ <= i__1; ++i__) { y[i__] = *beta * y[i__]; /* L20: */ } } } else { iy = ky; if (*beta == 0.f) { i__1 = leny; for (i__ = 1; i__ <= i__1; ++i__) { y[iy] = 0.f; iy += *incy; /* L30: */ } } else { i__1 = leny; for (i__ = 1; i__ <= i__1; ++i__) { y[iy] = *beta * y[iy]; iy += *incy; /* L40: */ } } } } if (*alpha == 0.f) { return 0; } kup1 = *ku + 1; if (PASTEF770(lsame)(trans, "N", (ftnlen)1, (ftnlen)1)) { /* Form y := alpha*A*x + y. */ jx = kx; if (*incy == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[jx] != 0.f) { temp = *alpha * x[jx]; k = kup1 - j; /* Computing MAX */ i__2 = 1, i__3 = j - *ku; /* Computing MIN */ i__5 = *m, i__6 = j + *kl; i__4 = f2c_min(i__5,i__6); for (i__ = f2c_max(i__2,i__3); i__ <= i__4; ++i__) { y[i__] += temp * a[k + i__ + j * a_dim1]; /* L50: */ } } jx += *incx; /* L60: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[jx] != 0.f) { temp = *alpha * x[jx]; iy = ky; k = kup1 - j; /* Computing MAX */ i__4 = 1, i__2 = j - *ku; /* Computing MIN */ i__5 = *m, i__6 = j + *kl; i__3 = f2c_min(i__5,i__6); for (i__ = f2c_max(i__4,i__2); i__ <= i__3; ++i__) { y[iy] += temp * a[k + i__ + j * a_dim1]; iy += *incy; /* L70: */ } } jx += *incx; if (j > *ku) { ky += *incy; } /* L80: */ } } } else { /* Form y := alpha*A'*x + y. */ jy = ky; if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { temp = 0.f; k = kup1 - j; /* Computing MAX */ i__3 = 1, i__4 = j - *ku; /* Computing MIN */ i__5 = *m, i__6 = j + *kl; i__2 = f2c_min(i__5,i__6); for (i__ = f2c_max(i__3,i__4); i__ <= i__2; ++i__) { temp += a[k + i__ + j * a_dim1] * x[i__]; /* L90: */ } y[jy] += *alpha * temp; jy += *incy; /* L100: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { temp = 0.f; ix = kx; k = kup1 - j; /* Computing MAX */ i__2 = 1, i__3 = j - *ku; /* Computing MIN */ i__5 = *m, i__6 = j + *kl; i__4 = f2c_min(i__5,i__6); for (i__ = f2c_max(i__2,i__3); i__ <= i__4; ++i__) { temp += a[k + i__ + j * a_dim1] * x[ix]; ix += *incx; /* L110: */ } y[jy] += *alpha * temp; jy += *incy; if (j > *ku) { kx += *incx; } /* L120: */ } } } return 0; /* End of SGBMV . */ } /* sgbmv_ */ /* zgbmv.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ /* Subroutine */ int PASTEF77(z,gbmv)(const bla_character *trans, const bla_integer *m, const bla_integer *n, const bla_integer *kl, const bla_integer *ku, const bla_dcomplex *alpha, const bla_dcomplex *a, const bla_integer *lda, const bla_dcomplex *x, const bla_integer *incx, const bla_dcomplex *beta, bla_dcomplex * y, const bla_integer *incy) { /* System generated locals */ bla_integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; bla_dcomplex z__1, z__2, z__3; /* Builtin functions */ //void bla_d_cnjg(bla_dcomplex *, bla_dcomplex *); /* Local variables */ bla_integer info; bla_dcomplex temp; bla_integer lenx, leny, i__, j, k; //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer ix, iy, jx, jy, kx, ky; //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); bla_logical noconj; bla_integer kup1; /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* ZGBMV performs one of the matrix-vector operations */ /* y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, or */ /* y := alpha*conjg( A' )*x + beta*y, */ /* where alpha and beta are scalars, x and y are vectors and A is an */ /* m by n band matrix, with kl sub-diagonals and ku super-diagonals. */ /* Parameters */ /* ========== */ /* TRANS - CHARACTER*1. */ /* On entry, TRANS specifies the operation to be performed as */ /* follows: */ /* TRANS = 'N' or 'n' y := alpha*A*x + beta*y. */ /* TRANS = 'T' or 't' y := alpha*A'*x + beta*y. */ /* TRANS = 'C' or 'c' y := alpha*conjg( A' )*x + beta*y. */ /* Unchanged on exit. */ /* M - INTEGER. */ /* On entry, M specifies the number of rows of the matrix A. */ /* M must be at least zero. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the number of columns of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* KL - INTEGER. */ /* On entry, KL specifies the number of sub-diagonals of the */ /* matrix A. KL must satisfy 0 .le. KL. */ /* Unchanged on exit. */ /* KU - INTEGER. */ /* On entry, KU specifies the number of super-diagonals of the */ /* matrix A. KU must satisfy 0 .le. KU. */ /* Unchanged on exit. */ /* ALPHA - COMPLEX*16 . */ /* On entry, ALPHA specifies the scalar alpha. */ /* Unchanged on exit. */ /* A - COMPLEX*16 array of DIMENSION ( LDA, n ). */ /* Before entry, the leading ( kl + ku + 1 ) by n part of the */ /* array A must contain the matrix of coefficients, supplied */ /* column by column, with the leading diagonal of the matrix in */ /* row ( ku + 1 ) of the array, the first super-diagonal */ /* starting at position 2 in row ku, the first sub-diagonal */ /* starting at position 1 in row ( ku + 2 ), and so on. */ /* Elements in the array A that do not correspond to elements */ /* in the band matrix (such as the top left ku by ku triangle) */ /* are not referenced. */ /* The following program segment will transfer a band matrix */ /* from conventional full matrix storage to band storage: */ /* DO 20, J = 1, N */ /* K = KU + 1 - J */ /* DO 10, I = MAX( 1, J - KU ), MIN( M, J + KL ) */ /* A( K + I, J ) = matrix( I, J ) */ /* 10 CONTINUE */ /* 20 CONTINUE */ /* Unchanged on exit. */ /* LDA - INTEGER. */ /* On entry, LDA specifies the first dimension of A as declared */ /* in the calling (sub) program. LDA must be at least */ /* ( kl + ku + 1 ). */ /* Unchanged on exit. */ /* X - COMPLEX*16 array of DIMENSION at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' */ /* and at least */ /* ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. */ /* Before entry, the incremented array X must contain the */ /* vector x. */ /* Unchanged on exit. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* BETA - COMPLEX*16 . */ /* On entry, BETA specifies the scalar beta. When BETA is */ /* supplied as zero then Y need not be set on input. */ /* Unchanged on exit. */ /* Y - COMPLEX*16 array of DIMENSION at least */ /* ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' */ /* and at least */ /* ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. */ /* Before entry, the incremented array Y must contain the */ /* vector y. On exit, Y is overwritten by the updated vector y. */ /* INCY - INTEGER. */ /* On entry, INCY specifies the increment for the elements of */ /* Y. INCY must not be zero. */ /* Unchanged on exit. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; --x; --y; /* Function Body */ info = 0; if (! PASTEF770(lsame)(trans, "N", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(trans, "T", ( ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(trans, "C", (ftnlen)1, (ftnlen)1) ) { info = 1; } else if (*m < 0) { info = 2; } else if (*n < 0) { info = 3; } else if (*kl < 0) { info = 4; } else if (*ku < 0) { info = 5; } else if (*lda < *kl + *ku + 1) { info = 8; } else if (*incx == 0) { info = 10; } else if (*incy == 0) { info = 13; } if (info != 0) { PASTEF770(xerbla)("ZGBMV ", &info, (ftnlen)6); return 0; } /* Quick return if possible. */ if (*m == 0 || *n == 0 || (bli_zreal(*alpha) == 0. && bli_zimag(*alpha) == 0. && (bli_zreal(*beta) == 1. && bli_zimag(*beta) == 0.))) { return 0; } noconj = PASTEF770(lsame)(trans, "T", (ftnlen)1, (ftnlen)1); /* Set LENX and LENY, the lengths of the vectors x and y, and set */ /* up the start points in X and Y. */ if (PASTEF770(lsame)(trans, "N", (ftnlen)1, (ftnlen)1)) { lenx = *n; leny = *m; } else { lenx = *m; leny = *n; } if (*incx > 0) { kx = 1; } else { kx = 1 - (lenx - 1) * *incx; } if (*incy > 0) { ky = 1; } else { ky = 1 - (leny - 1) * *incy; } /* Start the operations. In this version the elements of A are */ /* accessed sequentially with one pass through the band part of A. */ /* First form y := beta*y. */ if (bli_zreal(*beta) != 1. || bli_zimag(*beta) != 0.) { if (*incy == 1) { if (bli_zreal(*beta) == 0. && bli_zimag(*beta) == 0.) { i__1 = leny; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__; bli_zsets( (0.), (0.), y[i__2] ); /* L10: */ } } else { i__1 = leny; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__; i__3 = i__; bli_zsets( (bli_zreal(*beta) * bli_zreal(y[i__3]) - bli_zimag(*beta) * bli_zimag(y[i__3])), (bli_zreal(*beta) * bli_zimag(y[i__3]) + bli_zimag(*beta) * bli_zreal(y[i__3])), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__2] ); /* L20: */ } } } else { iy = ky; if (bli_zreal(*beta) == 0. && bli_zimag(*beta) == 0.) { i__1 = leny; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = iy; bli_zsets( (0.), (0.), y[i__2] ); iy += *incy; /* L30: */ } } else { i__1 = leny; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = iy; i__3 = iy; bli_zsets( (bli_zreal(*beta) * bli_zreal(y[i__3]) - bli_zimag(*beta) * bli_zimag(y[i__3])), (bli_zreal(*beta) * bli_zimag(y[i__3]) + bli_zimag(*beta) * bli_zreal(y[i__3])), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__2] ); iy += *incy; /* L40: */ } } } } if (bli_zreal(*alpha) == 0. && bli_zimag(*alpha) == 0.) { return 0; } kup1 = *ku + 1; if (PASTEF770(lsame)(trans, "N", (ftnlen)1, (ftnlen)1)) { /* Form y := alpha*A*x + y. */ jx = kx; if (*incy == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = jx; if (bli_zreal(x[i__2]) != 0. || bli_zimag(x[i__2]) != 0.) { i__2 = jx; bli_zsets( (bli_zreal(*alpha) * bli_zreal(x[i__2]) - bli_zimag(*alpha) * bli_zimag(x[i__2])), (bli_zreal(*alpha) * bli_zimag(x[i__2]) + bli_zimag(*alpha) * bli_zreal(x[i__2])), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); k = kup1 - j; /* Computing MAX */ i__2 = 1, i__3 = j - *ku; /* Computing MIN */ i__5 = *m, i__6 = j + *kl; i__4 = f2c_min(i__5,i__6); for (i__ = f2c_max(i__2,i__3); i__ <= i__4; ++i__) { i__2 = i__; i__3 = i__; i__5 = k + i__ + j * a_dim1; bli_zsets( (bli_zreal(temp) * bli_zreal(a[i__5]) - bli_zimag(temp) * bli_zimag(a[i__5])), (bli_zreal(temp) * bli_zimag(a[i__5]) + bli_zimag(temp) * bli_zreal(a[i__5])), z__2 ); bli_zsets( (bli_zreal(y[i__3]) + bli_zreal(z__2)), (bli_zimag(y[i__3]) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__2] ); /* L50: */ } } jx += *incx; /* L60: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__4 = jx; if (bli_zreal(x[i__4]) != 0. || bli_zimag(x[i__4]) != 0.) { i__4 = jx; bli_zsets( (bli_zreal(*alpha) * bli_zreal(x[i__4]) - bli_zimag(*alpha) * bli_zimag(x[i__4])), (bli_zreal(*alpha) * bli_zimag(x[i__4]) + bli_zimag(*alpha) * bli_zreal(x[i__4])), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); iy = ky; k = kup1 - j; /* Computing MAX */ i__4 = 1, i__2 = j - *ku; /* Computing MIN */ i__5 = *m, i__6 = j + *kl; i__3 = f2c_min(i__5,i__6); for (i__ = f2c_max(i__4,i__2); i__ <= i__3; ++i__) { i__4 = iy; i__2 = iy; i__5 = k + i__ + j * a_dim1; bli_zsets( (bli_zreal(temp) * bli_zreal(a[i__5]) - bli_zimag(temp) * bli_zimag(a[i__5])), (bli_zreal(temp) * bli_zimag(a[i__5]) + bli_zimag(temp) * bli_zreal(a[i__5])), z__2 ); bli_zsets( (bli_zreal(y[i__2]) + bli_zreal(z__2)), (bli_zimag(y[i__2]) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__4] ); iy += *incy; /* L70: */ } } jx += *incx; if (j > *ku) { ky += *incy; } /* L80: */ } } } else { /* Form y := alpha*A'*x + y or y := alpha*conjg( A' )*x + y. */ jy = ky; if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { bli_zsets( (0.), (0.), temp ); k = kup1 - j; if (noconj) { /* Computing MAX */ i__3 = 1, i__4 = j - *ku; /* Computing MIN */ i__5 = *m, i__6 = j + *kl; i__2 = f2c_min(i__5,i__6); for (i__ = f2c_max(i__3,i__4); i__ <= i__2; ++i__) { i__3 = k + i__ + j * a_dim1; i__4 = i__; bli_zsets( (bli_zreal(a[i__3]) * bli_zreal(x[i__4]) - bli_zimag(a[i__3]) * bli_zimag(x[i__4])), (bli_zreal(a[i__3]) * bli_zimag(x[i__4]) + bli_zimag(a[i__3]) * bli_zreal(x[i__4])), z__2 ); bli_zsets( (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); /* L90: */ } } else { /* Computing MAX */ i__2 = 1, i__3 = j - *ku; /* Computing MIN */ i__5 = *m, i__6 = j + *kl; i__4 = f2c_min(i__5,i__6); for (i__ = f2c_max(i__2,i__3); i__ <= i__4; ++i__) { bla_d_cnjg(&z__3, &a[k + i__ + j * a_dim1]); i__2 = i__; bli_zsets( (bli_zreal(z__3) * bli_zreal(x[i__2]) - bli_zimag(z__3) * bli_zimag(x[i__2])), (bli_zreal(z__3) * bli_zimag(x[i__2]) + bli_zimag(z__3) * bli_zreal(x[i__2])), z__2 ); bli_zsets( (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); /* L100: */ } } i__4 = jy; i__2 = jy; bli_zsets( (bli_zreal(*alpha) * bli_zreal(temp) - bli_zimag(*alpha) * bli_zimag(temp)), (bli_zreal(*alpha) * bli_zimag(temp) + bli_zimag(*alpha) * bli_zreal(temp)), z__2 ); bli_zsets( (bli_zreal(y[i__2]) + bli_zreal(z__2)), (bli_zimag(y[i__2]) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__4] ); jy += *incy; /* L110: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { bli_zsets( (0.), (0.), temp ); ix = kx; k = kup1 - j; if (noconj) { /* Computing MAX */ i__4 = 1, i__2 = j - *ku; /* Computing MIN */ i__5 = *m, i__6 = j + *kl; i__3 = f2c_min(i__5,i__6); for (i__ = f2c_max(i__4,i__2); i__ <= i__3; ++i__) { i__4 = k + i__ + j * a_dim1; i__2 = ix; bli_zsets( (bli_zreal(a[i__4]) * bli_zreal(x[i__2]) - bli_zimag(a[i__4]) * bli_zimag(x[i__2])), (bli_zreal(a[i__4]) * bli_zimag(x[i__2]) + bli_zimag(a[i__4]) * bli_zreal(x[i__2])), z__2 ); bli_zsets( (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); ix += *incx; /* L120: */ } } else { /* Computing MAX */ i__3 = 1, i__4 = j - *ku; /* Computing MIN */ i__5 = *m, i__6 = j + *kl; i__2 = f2c_min(i__5,i__6); for (i__ = f2c_max(i__3,i__4); i__ <= i__2; ++i__) { bla_d_cnjg(&z__3, &a[k + i__ + j * a_dim1]); i__3 = ix; bli_zsets( (bli_zreal(z__3) * bli_zreal(x[i__3]) - bli_zimag(z__3) * bli_zimag(x[i__3])), (bli_zreal(z__3) * bli_zimag(x[i__3]) + bli_zimag(z__3) * bli_zreal(x[i__3])), z__2 ); bli_zsets( (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); ix += *incx; /* L130: */ } } i__2 = jy; i__3 = jy; bli_zsets( (bli_zreal(*alpha) * bli_zreal(temp) - bli_zimag(*alpha) * bli_zimag(temp)), (bli_zreal(*alpha) * bli_zimag(temp) + bli_zimag(*alpha) * bli_zreal(temp)), z__2 ); bli_zsets( (bli_zreal(y[i__3]) + bli_zreal(z__2)), (bli_zimag(y[i__3]) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__2] ); jy += *incy; if (j > *ku) { kx += *incx; } /* L140: */ } } } return 0; /* End of ZGBMV . */ } /* zgbmv_ */ #endif blis-0.6.1/frame/compat/f2c/bla_gbmv.h000066400000000000000000000057721360743507500174450ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef BLIS_ENABLE_BLAS BLIS_EXPORT_BLAS int PASTEF77(c,gbmv)(const bla_character *trans, const bla_integer *m, const bla_integer *n, const bla_integer *kl, const bla_integer *ku, const bla_scomplex *alpha, const bla_scomplex *a, const bla_integer *lda, const bla_scomplex *x, const bla_integer *incx, const bla_scomplex *beta, bla_scomplex *y, const bla_integer *incy); BLIS_EXPORT_BLAS int PASTEF77(d,gbmv)(const bla_character *trans, const bla_integer *m, const bla_integer *n, const bla_integer *kl, const bla_integer *ku, const bla_double *alpha, const bla_double *a, const bla_integer *lda, const bla_double *x, const bla_integer *incx, const bla_double *beta, bla_double *y, const bla_integer *incy); BLIS_EXPORT_BLAS int PASTEF77(s,gbmv)(const bla_character *trans, const bla_integer *m, const bla_integer *n, const bla_integer *kl, const bla_integer *ku, const bla_real *alpha, const bla_real *a, const bla_integer *lda, const bla_real *x, const bla_integer * incx, const bla_real *beta, bla_real *y, const bla_integer *incy); BLIS_EXPORT_BLAS int PASTEF77(z,gbmv)(const bla_character *trans, const bla_integer *m, const bla_integer *n, const bla_integer *kl, const bla_integer *ku, const bla_dcomplex *alpha, const bla_dcomplex *a, const bla_integer *lda, const bla_dcomplex *x, const bla_integer *incx, const bla_dcomplex *beta, bla_dcomplex * y, const bla_integer *incy); #endif blis-0.6.1/frame/compat/f2c/bla_hbmv.c000066400000000000000000001072731360743507500174400ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #ifdef BLIS_ENABLE_BLAS /* chbmv.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ /* Subroutine */ int PASTEF77(c,hbmv)(const bla_character *uplo, const bla_integer *n, const bla_integer *k, const bla_scomplex * alpha, const bla_scomplex *a, const bla_integer *lda, const bla_scomplex *x, const bla_integer *incx, const bla_scomplex *beta, bla_scomplex *y, const bla_integer *incy) { /* System generated locals */ bla_integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; bla_real r__1; bla_scomplex q__1, q__2, q__3, q__4; /* Builtin functions */ //void bla_r_cnjg(bla_scomplex *, bla_scomplex *); /* Local variables */ bla_integer info; bla_scomplex temp1, temp2; bla_integer i__, j, l; //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kplus1, ix, iy, jx, jy, kx, ky; //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* CHBMV performs the matrix-vector operation */ /* y := alpha*A*x + beta*y, */ /* where alpha and beta are scalars, x and y are n element vectors and */ /* A is an n by n hermitian band matrix, with k super-diagonals. */ /* Parameters */ /* ========== */ /* UPLO - CHARACTER*1. */ /* On entry, UPLO specifies whether the upper or lower */ /* triangular part of the band matrix A is being supplied as */ /* follows: */ /* UPLO = 'U' or 'u' The upper triangular part of A is */ /* being supplied. */ /* UPLO = 'L' or 'l' The lower triangular part of A is */ /* being supplied. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the order of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* K - INTEGER. */ /* On entry, K specifies the number of super-diagonals of the */ /* matrix A. K must satisfy 0 .le. K. */ /* Unchanged on exit. */ /* ALPHA - COMPLEX . */ /* On entry, ALPHA specifies the scalar alpha. */ /* Unchanged on exit. */ /* A - COMPLEX array of DIMENSION ( LDA, n ). */ /* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */ /* by n part of the array A must contain the upper triangular */ /* band part of the hermitian matrix, supplied column by */ /* column, with the leading diagonal of the matrix in row */ /* ( k + 1 ) of the array, the first super-diagonal starting at */ /* position 2 in row k, and so on. The top left k by k triangle */ /* of the array A is not referenced. */ /* The following program segment will transfer the upper */ /* triangular part of a hermitian band matrix from conventional */ /* full matrix storage to band storage: */ /* DO 20, J = 1, N */ /* M = K + 1 - J */ /* DO 10, I = MAX( 1, J - K ), J */ /* A( M + I, J ) = matrix( I, J ) */ /* 10 CONTINUE */ /* 20 CONTINUE */ /* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */ /* by n part of the array A must contain the lower triangular */ /* band part of the hermitian matrix, supplied column by */ /* column, with the leading diagonal of the matrix in row 1 of */ /* the array, the first sub-diagonal starting at position 1 in */ /* row 2, and so on. The bottom right k by k triangle of the */ /* array A is not referenced. */ /* The following program segment will transfer the lower */ /* triangular part of a hermitian band matrix from conventional */ /* full matrix storage to band storage: */ /* DO 20, J = 1, N */ /* M = 1 - J */ /* DO 10, I = J, MIN( N, J + K ) */ /* A( M + I, J ) = matrix( I, J ) */ /* 10 CONTINUE */ /* 20 CONTINUE */ /* Note that the imaginary parts of the diagonal elements need */ /* not be set and are assumed to be zero. */ /* Unchanged on exit. */ /* LDA - INTEGER. */ /* On entry, LDA specifies the first dimension of A as declared */ /* in the calling (sub) program. LDA must be at least */ /* ( k + 1 ). */ /* Unchanged on exit. */ /* X - COMPLEX array of DIMENSION at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ). */ /* Before entry, the incremented array X must contain the */ /* vector x. */ /* Unchanged on exit. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* BETA - COMPLEX . */ /* On entry, BETA specifies the scalar beta. */ /* Unchanged on exit. */ /* Y - COMPLEX array of DIMENSION at least */ /* ( 1 + ( n - 1 )*abs( INCY ) ). */ /* Before entry, the incremented array Y must contain the */ /* vector y. On exit, Y is overwritten by the updated vector y. */ /* INCY - INTEGER. */ /* On entry, INCY specifies the increment for the elements of */ /* Y. INCY must not be zero. */ /* Unchanged on exit. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; --x; --y; /* Function Body */ info = 0; if (! PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(uplo, "L", ( ftnlen)1, (ftnlen)1)) { info = 1; } else if (*n < 0) { info = 2; } else if (*k < 0) { info = 3; } else if (*lda < *k + 1) { info = 6; } else if (*incx == 0) { info = 8; } else if (*incy == 0) { info = 11; } if (info != 0) { PASTEF770(xerbla)("CHBMV ", &info, (ftnlen)6); return 0; } /* Quick return if possible. */ if (*n == 0 || (bli_creal(*alpha) == 0.f && bli_cimag(*alpha) == 0.f && (bli_creal(*beta) == 1.f && bli_cimag(*beta) == 0.f))) { return 0; } /* Set up the start points in X and Y. */ if (*incx > 0) { kx = 1; } else { kx = 1 - (*n - 1) * *incx; } if (*incy > 0) { ky = 1; } else { ky = 1 - (*n - 1) * *incy; } /* Start the operations. In this version the elements of the array A */ /* are accessed sequentially with one pass through A. */ /* First form y := beta*y. */ if (bli_creal(*beta) != 1.f || bli_cimag(*beta) != 0.f) { if (*incy == 1) { if (bli_creal(*beta) == 0.f && bli_cimag(*beta) == 0.f) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__; bli_csets( (0.f), (0.f), y[i__2] ); /* L10: */ } } else { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__; i__3 = i__; bli_csets( (bli_creal(*beta) * bli_creal(y[i__3]) - bli_cimag(*beta) * bli_cimag(y[i__3])), (bli_creal(*beta) * bli_cimag(y[i__3]) + bli_cimag(*beta) * bli_creal(y[i__3])), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__2] ); /* L20: */ } } } else { iy = ky; if (bli_creal(*beta) == 0.f && bli_cimag(*beta) == 0.f) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = iy; bli_csets( (0.f), (0.f), y[i__2] ); iy += *incy; /* L30: */ } } else { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = iy; i__3 = iy; bli_csets( (bli_creal(*beta) * bli_creal(y[i__3]) - bli_cimag(*beta) * bli_cimag(y[i__3])), (bli_creal(*beta) * bli_cimag(y[i__3]) + bli_cimag(*beta) * bli_creal(y[i__3])), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__2] ); iy += *incy; /* L40: */ } } } } if (bli_creal(*alpha) == 0.f && bli_cimag(*alpha) == 0.f) { return 0; } if (PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1)) { /* Form y when upper triangle of A is stored. */ kplus1 = *k + 1; if (*incx == 1 && *incy == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j; bli_csets( (bli_creal(*alpha) * bli_creal(x[i__2]) - bli_cimag(*alpha) * bli_cimag(x[i__2])), (bli_creal(*alpha) * bli_cimag(x[i__2]) + bli_cimag(*alpha) * bli_creal(x[i__2])), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp1 ); bli_csets( (0.f), (0.f), temp2 ); l = kplus1 - j; /* Computing MAX */ i__2 = 1, i__3 = j - *k; i__4 = j - 1; for (i__ = f2c_max(i__2,i__3); i__ <= i__4; ++i__) { i__2 = i__; i__3 = i__; i__5 = l + i__ + j * a_dim1; bli_csets( (bli_creal(temp1) * bli_creal(a[i__5]) - bli_cimag(temp1) * bli_cimag(a[i__5])), (bli_creal(temp1) * bli_cimag(a[i__5]) + bli_cimag(temp1) * bli_creal(a[i__5])), q__2 ); bli_csets( (bli_creal(y[i__3]) + bli_creal(q__2)), (bli_cimag(y[i__3]) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__2] ); bla_r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); i__2 = i__; bli_csets( (bli_creal(q__3) * bli_creal(x[i__2]) - bli_cimag(q__3) * bli_cimag(x[i__2])), (bli_creal(q__3) * bli_cimag(x[i__2]) + bli_cimag(q__3) * bli_creal(x[i__2])), q__2 ); bli_csets( (bli_creal(temp2) + bli_creal(q__2)), (bli_cimag(temp2) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp2 ); /* L50: */ } i__4 = j; i__2 = j; i__3 = kplus1 + j * a_dim1; r__1 = bli_creal(a[i__3]); bli_csets( (r__1 * bli_creal(temp1)), (r__1 * bli_cimag(temp1)), q__3 ); bli_csets( (bli_creal(y[i__2]) + bli_creal(q__3)), (bli_cimag(y[i__2]) + bli_cimag(q__3)), q__2 ); bli_csets( (bli_creal(*alpha) * bli_creal(temp2) - bli_cimag(*alpha) * bli_cimag(temp2)), (bli_creal(*alpha) * bli_cimag(temp2) + bli_cimag(*alpha) * bli_creal(temp2)), q__4 ); bli_csets( (bli_creal(q__2) + bli_creal(q__4)), (bli_cimag(q__2) + bli_cimag(q__4)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__4] ); /* L60: */ } } else { jx = kx; jy = ky; i__1 = *n; for (j = 1; j <= i__1; ++j) { i__4 = jx; bli_csets( (bli_creal(*alpha) * bli_creal(x[i__4]) - bli_cimag(*alpha) * bli_cimag(x[i__4])), (bli_creal(*alpha) * bli_cimag(x[i__4]) + bli_cimag(*alpha) * bli_creal(x[i__4])), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp1 ); bli_csets( (0.f), (0.f), temp2 ); ix = kx; iy = ky; l = kplus1 - j; /* Computing MAX */ i__4 = 1, i__2 = j - *k; i__3 = j - 1; for (i__ = f2c_max(i__4,i__2); i__ <= i__3; ++i__) { i__4 = iy; i__2 = iy; i__5 = l + i__ + j * a_dim1; bli_csets( (bli_creal(temp1) * bli_creal(a[i__5]) - bli_cimag(temp1) * bli_cimag(a[i__5])), (bli_creal(temp1) * bli_cimag(a[i__5]) + bli_cimag(temp1) * bli_creal(a[i__5])), q__2 ); bli_csets( (bli_creal(y[i__2]) + bli_creal(q__2)), (bli_cimag(y[i__2]) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__4] ); bla_r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); i__4 = ix; bli_csets( (bli_creal(q__3) * bli_creal(x[i__4]) - bli_cimag(q__3) * bli_cimag(x[i__4])), (bli_creal(q__3) * bli_cimag(x[i__4]) + bli_cimag(q__3) * bli_creal(x[i__4])), q__2 ); bli_csets( (bli_creal(temp2) + bli_creal(q__2)), (bli_cimag(temp2) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp2 ); ix += *incx; iy += *incy; /* L70: */ } i__3 = jy; i__4 = jy; i__2 = kplus1 + j * a_dim1; r__1 = bli_creal(a[i__2]); bli_csets( (r__1 * bli_creal(temp1)), (r__1 * bli_cimag(temp1)), q__3 ); bli_csets( (bli_creal(y[i__4]) + bli_creal(q__3)), (bli_cimag(y[i__4]) + bli_cimag(q__3)), q__2 ); bli_csets( (bli_creal(*alpha) * bli_creal(temp2) - bli_cimag(*alpha) * bli_cimag(temp2)), (bli_creal(*alpha) * bli_cimag(temp2) + bli_cimag(*alpha) * bli_creal(temp2)), q__4 ); bli_csets( (bli_creal(q__2) + bli_creal(q__4)), (bli_cimag(q__2) + bli_cimag(q__4)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__3] ); jx += *incx; jy += *incy; if (j > *k) { kx += *incx; ky += *incy; } /* L80: */ } } } else { /* Form y when lower triangle of A is stored. */ if (*incx == 1 && *incy == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__3 = j; bli_csets( (bli_creal(*alpha) * bli_creal(x[i__3]) - bli_cimag(*alpha) * bli_cimag(x[i__3])), (bli_creal(*alpha) * bli_cimag(x[i__3]) + bli_cimag(*alpha) * bli_creal(x[i__3])), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp1 ); bli_csets( (0.f), (0.f), temp2 ); i__3 = j; i__4 = j; i__2 = j * a_dim1 + 1; r__1 = bli_creal(a[i__2]); bli_csets( (r__1 * bli_creal(temp1)), (r__1 * bli_cimag(temp1)), q__2 ); bli_csets( (bli_creal(y[i__4]) + bli_creal(q__2)), (bli_cimag(y[i__4]) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__3] ); l = 1 - j; /* Computing MIN */ i__4 = *n, i__2 = j + *k; i__3 = f2c_min(i__4,i__2); for (i__ = j + 1; i__ <= i__3; ++i__) { i__4 = i__; i__2 = i__; i__5 = l + i__ + j * a_dim1; bli_csets( (bli_creal(temp1) * bli_creal(a[i__5]) - bli_cimag(temp1) * bli_cimag(a[i__5])), (bli_creal(temp1) * bli_cimag(a[i__5]) + bli_cimag(temp1) * bli_creal(a[i__5])), q__2 ); bli_csets( (bli_creal(y[i__2]) + bli_creal(q__2)), (bli_cimag(y[i__2]) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__4] ); bla_r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); i__4 = i__; bli_csets( (bli_creal(q__3) * bli_creal(x[i__4]) - bli_cimag(q__3) * bli_cimag(x[i__4])), (bli_creal(q__3) * bli_cimag(x[i__4]) + bli_cimag(q__3) * bli_creal(x[i__4])), q__2 ); bli_csets( (bli_creal(temp2) + bli_creal(q__2)), (bli_cimag(temp2) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp2 ); /* L90: */ } i__3 = j; i__4 = j; bli_csets( (bli_creal(*alpha) * bli_creal(temp2) - bli_cimag(*alpha) * bli_cimag(temp2)), (bli_creal(*alpha) * bli_cimag(temp2) + bli_cimag(*alpha) * bli_creal(temp2)), q__2 ); bli_csets( (bli_creal(y[i__4]) + bli_creal(q__2)), (bli_cimag(y[i__4]) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__3] ); /* L100: */ } } else { jx = kx; jy = ky; i__1 = *n; for (j = 1; j <= i__1; ++j) { i__3 = jx; bli_csets( (bli_creal(*alpha) * bli_creal(x[i__3]) - bli_cimag(*alpha) * bli_cimag(x[i__3])), (bli_creal(*alpha) * bli_cimag(x[i__3]) + bli_cimag(*alpha) * bli_creal(x[i__3])), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp1 ); bli_csets( (0.f), (0.f), temp2 ); i__3 = jy; i__4 = jy; i__2 = j * a_dim1 + 1; r__1 = bli_creal(a[i__2]); bli_csets( (r__1 * bli_creal(temp1)), (r__1 * bli_cimag(temp1)), q__2 ); bli_csets( (bli_creal(y[i__4]) + bli_creal(q__2)), (bli_cimag(y[i__4]) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__3] ); l = 1 - j; ix = jx; iy = jy; /* Computing MIN */ i__4 = *n, i__2 = j + *k; i__3 = f2c_min(i__4,i__2); for (i__ = j + 1; i__ <= i__3; ++i__) { ix += *incx; iy += *incy; i__4 = iy; i__2 = iy; i__5 = l + i__ + j * a_dim1; bli_csets( (bli_creal(temp1) * bli_creal(a[i__5]) - bli_cimag(temp1) * bli_cimag(a[i__5])), (bli_creal(temp1) * bli_cimag(a[i__5]) + bli_cimag(temp1) * bli_creal(a[i__5])), q__2 ); bli_csets( (bli_creal(y[i__2]) + bli_creal(q__2)), (bli_cimag(y[i__2]) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__4] ); bla_r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); i__4 = ix; bli_csets( (bli_creal(q__3) * bli_creal(x[i__4]) - bli_cimag(q__3) * bli_cimag(x[i__4])), (bli_creal(q__3) * bli_cimag(x[i__4]) + bli_cimag(q__3) * bli_creal(x[i__4])), q__2 ); bli_csets( (bli_creal(temp2) + bli_creal(q__2)), (bli_cimag(temp2) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp2 ); /* L110: */ } i__3 = jy; i__4 = jy; bli_csets( (bli_creal(*alpha) * bli_creal(temp2) - bli_cimag(*alpha) * bli_cimag(temp2)), (bli_creal(*alpha) * bli_cimag(temp2) + bli_cimag(*alpha) * bli_creal(temp2)), q__2 ); bli_csets( (bli_creal(y[i__4]) + bli_creal(q__2)), (bli_cimag(y[i__4]) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__3] ); jx += *incx; jy += *incy; /* L120: */ } } } return 0; /* End of CHBMV . */ } /* chbmv_ */ /* zhbmv.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ /* Subroutine */ int PASTEF77(z,hbmv)(const bla_character *uplo, const bla_integer *n, const bla_integer *k, const bla_dcomplex *alpha, const bla_dcomplex *a, const bla_integer *lda, const bla_dcomplex *x, const bla_integer * incx, const bla_dcomplex *beta, bla_dcomplex *y, const bla_integer *incy) { /* System generated locals */ bla_integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; bla_double d__1; bla_dcomplex z__1, z__2, z__3, z__4; /* Builtin functions */ //void bla_d_cnjg(bla_dcomplex *, bla_dcomplex *); /* Local variables */ bla_integer info; bla_dcomplex temp1, temp2; bla_integer i__, j, l; //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kplus1, ix, iy, jx, jy, kx, ky; //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* ZHBMV performs the matrix-vector operation */ /* y := alpha*A*x + beta*y, */ /* where alpha and beta are scalars, x and y are n element vectors and */ /* A is an n by n hermitian band matrix, with k super-diagonals. */ /* Parameters */ /* ========== */ /* UPLO - CHARACTER*1. */ /* On entry, UPLO specifies whether the upper or lower */ /* triangular part of the band matrix A is being supplied as */ /* follows: */ /* UPLO = 'U' or 'u' The upper triangular part of A is */ /* being supplied. */ /* UPLO = 'L' or 'l' The lower triangular part of A is */ /* being supplied. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the order of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* K - INTEGER. */ /* On entry, K specifies the number of super-diagonals of the */ /* matrix A. K must satisfy 0 .le. K. */ /* Unchanged on exit. */ /* ALPHA - COMPLEX*16 . */ /* On entry, ALPHA specifies the scalar alpha. */ /* Unchanged on exit. */ /* A - COMPLEX*16 array of DIMENSION ( LDA, n ). */ /* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */ /* by n part of the array A must contain the upper triangular */ /* band part of the hermitian matrix, supplied column by */ /* column, with the leading diagonal of the matrix in row */ /* ( k + 1 ) of the array, the first super-diagonal starting at */ /* position 2 in row k, and so on. The top left k by k triangle */ /* of the array A is not referenced. */ /* The following program segment will transfer the upper */ /* triangular part of a hermitian band matrix from conventional */ /* full matrix storage to band storage: */ /* DO 20, J = 1, N */ /* M = K + 1 - J */ /* DO 10, I = MAX( 1, J - K ), J */ /* A( M + I, J ) = matrix( I, J ) */ /* 10 CONTINUE */ /* 20 CONTINUE */ /* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */ /* by n part of the array A must contain the lower triangular */ /* band part of the hermitian matrix, supplied column by */ /* column, with the leading diagonal of the matrix in row 1 of */ /* the array, the first sub-diagonal starting at position 1 in */ /* row 2, and so on. The bottom right k by k triangle of the */ /* array A is not referenced. */ /* The following program segment will transfer the lower */ /* triangular part of a hermitian band matrix from conventional */ /* full matrix storage to band storage: */ /* DO 20, J = 1, N */ /* M = 1 - J */ /* DO 10, I = J, MIN( N, J + K ) */ /* A( M + I, J ) = matrix( I, J ) */ /* 10 CONTINUE */ /* 20 CONTINUE */ /* Note that the imaginary parts of the diagonal elements need */ /* not be set and are assumed to be zero. */ /* Unchanged on exit. */ /* LDA - INTEGER. */ /* On entry, LDA specifies the first dimension of A as declared */ /* in the calling (sub) program. LDA must be at least */ /* ( k + 1 ). */ /* Unchanged on exit. */ /* X - COMPLEX*16 array of DIMENSION at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ). */ /* Before entry, the incremented array X must contain the */ /* vector x. */ /* Unchanged on exit. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* BETA - COMPLEX*16 . */ /* On entry, BETA specifies the scalar beta. */ /* Unchanged on exit. */ /* Y - COMPLEX*16 array of DIMENSION at least */ /* ( 1 + ( n - 1 )*abs( INCY ) ). */ /* Before entry, the incremented array Y must contain the */ /* vector y. On exit, Y is overwritten by the updated vector y. */ /* INCY - INTEGER. */ /* On entry, INCY specifies the increment for the elements of */ /* Y. INCY must not be zero. */ /* Unchanged on exit. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; --x; --y; /* Function Body */ info = 0; if (! PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(uplo, "L", ( ftnlen)1, (ftnlen)1)) { info = 1; } else if (*n < 0) { info = 2; } else if (*k < 0) { info = 3; } else if (*lda < *k + 1) { info = 6; } else if (*incx == 0) { info = 8; } else if (*incy == 0) { info = 11; } if (info != 0) { PASTEF770(xerbla)("ZHBMV ", &info, (ftnlen)6); return 0; } /* Quick return if possible. */ if (*n == 0 || (bli_zreal(*alpha) == 0. && bli_zimag(*alpha) == 0. && (bli_zreal(*beta) == 1. && bli_zimag(*beta) == 0.))) { return 0; } /* Set up the start points in X and Y. */ if (*incx > 0) { kx = 1; } else { kx = 1 - (*n - 1) * *incx; } if (*incy > 0) { ky = 1; } else { ky = 1 - (*n - 1) * *incy; } /* Start the operations. In this version the elements of the array A */ /* are accessed sequentially with one pass through A. */ /* First form y := beta*y. */ if (bli_zreal(*beta) != 1. || bli_zimag(*beta) != 0.) { if (*incy == 1) { if (bli_zreal(*beta) == 0. && bli_zimag(*beta) == 0.) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__; bli_zsets( (0.), (0.), y[i__2] ); /* L10: */ } } else { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__; i__3 = i__; bli_zsets( (bli_zreal(*beta) * bli_zreal(y[i__3]) - bli_zimag(*beta) * bli_zimag(y[i__3])), (bli_zreal(*beta) * bli_zimag(y[i__3]) + bli_zimag(*beta) * bli_zreal(y[i__3])), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__2] ); /* L20: */ } } } else { iy = ky; if (bli_zreal(*beta) == 0. && bli_zimag(*beta) == 0.) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = iy; bli_zsets( (0.), (0.), y[i__2] ); iy += *incy; /* L30: */ } } else { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = iy; i__3 = iy; bli_zsets( (bli_zreal(*beta) * bli_zreal(y[i__3]) - bli_zimag(*beta) * bli_zimag(y[i__3])), (bli_zreal(*beta) * bli_zimag(y[i__3]) + bli_zimag(*beta) * bli_zreal(y[i__3])), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__2] ); iy += *incy; /* L40: */ } } } } if (bli_zreal(*alpha) == 0. && bli_zimag(*alpha) == 0.) { return 0; } if (PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1)) { /* Form y when upper triangle of A is stored. */ kplus1 = *k + 1; if (*incx == 1 && *incy == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j; bli_zsets( (bli_zreal(*alpha) * bli_zreal(x[i__2]) - bli_zimag(*alpha) * bli_zimag(x[i__2])), (bli_zreal(*alpha) * bli_zimag(x[i__2]) + bli_zimag(*alpha) * bli_zreal(x[i__2])), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp1 ); bli_zsets( (0.), (0.), temp2 ); l = kplus1 - j; /* Computing MAX */ i__2 = 1, i__3 = j - *k; i__4 = j - 1; for (i__ = f2c_max(i__2,i__3); i__ <= i__4; ++i__) { i__2 = i__; i__3 = i__; i__5 = l + i__ + j * a_dim1; bli_zsets( (bli_zreal(temp1) * bli_zreal(a[i__5]) - bli_zimag(temp1) * bli_zimag(a[i__5])), (bli_zreal(temp1) * bli_zimag(a[i__5]) + bli_zimag(temp1) * bli_zreal(a[i__5])), z__2 ); bli_zsets( (bli_zreal(y[i__3]) + bli_zreal(z__2)), (bli_zimag(y[i__3]) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__2] ); bla_d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); i__2 = i__; bli_zsets( (bli_zreal(z__3) * bli_zreal(x[i__2]) - bli_zimag(z__3) * bli_zimag(x[i__2])), (bli_zreal(z__3) * bli_zimag(x[i__2]) + bli_zimag(z__3) * bli_zreal(x[i__2])), z__2 ); bli_zsets( (bli_zreal(temp2) + bli_zreal(z__2)), (bli_zimag(temp2) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp2 ); /* L50: */ } i__4 = j; i__2 = j; i__3 = kplus1 + j * a_dim1; d__1 = bli_zreal(a[i__3]); bli_zsets( (d__1 * bli_zreal(temp1)), (d__1 * bli_zimag(temp1)), z__3 ); bli_zsets( (bli_zreal(y[i__2]) + bli_zreal(z__3)), (bli_zimag(y[i__2]) + bli_zimag(z__3)), z__2 ); bli_zsets( (bli_zreal(*alpha) * bli_zreal(temp2) - bli_zimag(*alpha) * bli_zimag(temp2)), (bli_zreal(*alpha) * bli_zimag(temp2) + bli_zimag(*alpha) * bli_zreal(temp2)), z__4 ); bli_zsets( (bli_zreal(z__2) + bli_zreal(z__4)), (bli_zimag(z__2) + bli_zimag(z__4)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__4] ); /* L60: */ } } else { jx = kx; jy = ky; i__1 = *n; for (j = 1; j <= i__1; ++j) { i__4 = jx; bli_zsets( (bli_zreal(*alpha) * bli_zreal(x[i__4]) - bli_zimag(*alpha) * bli_zimag(x[i__4])), (bli_zreal(*alpha) * bli_zimag(x[i__4]) + bli_zimag(*alpha) * bli_zreal(x[i__4])), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp1 ); bli_zsets( (0.), (0.), temp2 ); ix = kx; iy = ky; l = kplus1 - j; /* Computing MAX */ i__4 = 1, i__2 = j - *k; i__3 = j - 1; for (i__ = f2c_max(i__4,i__2); i__ <= i__3; ++i__) { i__4 = iy; i__2 = iy; i__5 = l + i__ + j * a_dim1; bli_zsets( (bli_zreal(temp1) * bli_zreal(a[i__5]) - bli_zimag(temp1) * bli_zimag(a[i__5])), (bli_zreal(temp1) * bli_zimag(a[i__5]) + bli_zimag(temp1) * bli_zreal(a[i__5])), z__2 ); bli_zsets( (bli_zreal(y[i__2]) + bli_zreal(z__2)), (bli_zimag(y[i__2]) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__4] ); bla_d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); i__4 = ix; bli_zsets( (bli_zreal(z__3) * bli_zreal(x[i__4]) - bli_zimag(z__3) * bli_zimag(x[i__4])), (bli_zreal(z__3) * bli_zimag(x[i__4]) + bli_zimag(z__3) * bli_zreal(x[i__4])), z__2 ); bli_zsets( (bli_zreal(temp2) + bli_zreal(z__2)), (bli_zimag(temp2) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp2 ); ix += *incx; iy += *incy; /* L70: */ } i__3 = jy; i__4 = jy; i__2 = kplus1 + j * a_dim1; d__1 = bli_zreal(a[i__2]); bli_zsets( (d__1 * bli_zreal(temp1)), (d__1 * bli_zimag(temp1)), z__3 ); bli_zsets( (bli_zreal(y[i__4]) + bli_zreal(z__3)), (bli_zimag(y[i__4]) + bli_zimag(z__3)), z__2 ); bli_zsets( (bli_zreal(*alpha) * bli_zreal(temp2) - bli_zimag(*alpha) * bli_zimag(temp2)), (bli_zreal(*alpha) * bli_zimag(temp2) + bli_zimag(*alpha) * bli_zreal(temp2)), z__4 ); bli_zsets( (bli_zreal(z__2) + bli_zreal(z__4)), (bli_zimag(z__2) + bli_zimag(z__4)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__3] ); jx += *incx; jy += *incy; if (j > *k) { kx += *incx; ky += *incy; } /* L80: */ } } } else { /* Form y when lower triangle of A is stored. */ if (*incx == 1 && *incy == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__3 = j; bli_zsets( (bli_zreal(*alpha) * bli_zreal(x[i__3]) - bli_zimag(*alpha) * bli_zimag(x[i__3])), (bli_zreal(*alpha) * bli_zimag(x[i__3]) + bli_zimag(*alpha) * bli_zreal(x[i__3])), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp1 ); bli_zsets( (0.), (0.), temp2 ); i__3 = j; i__4 = j; i__2 = j * a_dim1 + 1; d__1 = bli_zreal(a[i__2]); bli_zsets( (d__1 * bli_zreal(temp1)), (d__1 * bli_zimag(temp1)), z__2 ); bli_zsets( (bli_zreal(y[i__4]) + bli_zreal(z__2)), (bli_zimag(y[i__4]) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__3] ); l = 1 - j; /* Computing MIN */ i__4 = *n, i__2 = j + *k; i__3 = f2c_min(i__4,i__2); for (i__ = j + 1; i__ <= i__3; ++i__) { i__4 = i__; i__2 = i__; i__5 = l + i__ + j * a_dim1; bli_zsets( (bli_zreal(temp1) * bli_zreal(a[i__5]) - bli_zimag(temp1) * bli_zimag(a[i__5])), (bli_zreal(temp1) * bli_zimag(a[i__5]) + bli_zimag(temp1) * bli_zreal(a[i__5])), z__2 ); bli_zsets( (bli_zreal(y[i__2]) + bli_zreal(z__2)), (bli_zimag(y[i__2]) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__4] ); bla_d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); i__4 = i__; bli_zsets( (bli_zreal(z__3) * bli_zreal(x[i__4]) - bli_zimag(z__3) * bli_zimag(x[i__4])), (bli_zreal(z__3) * bli_zimag(x[i__4]) + bli_zimag(z__3) * bli_zreal(x[i__4])), z__2 ); bli_zsets( (bli_zreal(temp2) + bli_zreal(z__2)), (bli_zimag(temp2) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp2 ); /* L90: */ } i__3 = j; i__4 = j; bli_zsets( (bli_zreal(*alpha) * bli_zreal(temp2) - bli_zimag(*alpha) * bli_zimag(temp2)), (bli_zreal(*alpha) * bli_zimag(temp2) + bli_zimag(*alpha) * bli_zreal(temp2)), z__2 ); bli_zsets( (bli_zreal(y[i__4]) + bli_zreal(z__2)), (bli_zimag(y[i__4]) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__3] ); /* L100: */ } } else { jx = kx; jy = ky; i__1 = *n; for (j = 1; j <= i__1; ++j) { i__3 = jx; bli_zsets( (bli_zreal(*alpha) * bli_zreal(x[i__3]) - bli_zimag(*alpha) * bli_zimag(x[i__3])), (bli_zreal(*alpha) * bli_zimag(x[i__3]) + bli_zimag(*alpha) * bli_zreal(x[i__3])), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp1 ); bli_zsets( (0.), (0.), temp2 ); i__3 = jy; i__4 = jy; i__2 = j * a_dim1 + 1; d__1 = bli_zreal(a[i__2]); bli_zsets( (d__1 * bli_zreal(temp1)), (d__1 * bli_zimag(temp1)), z__2 ); bli_zsets( (bli_zreal(y[i__4]) + bli_zreal(z__2)), (bli_zimag(y[i__4]) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__3] ); l = 1 - j; ix = jx; iy = jy; /* Computing MIN */ i__4 = *n, i__2 = j + *k; i__3 = f2c_min(i__4,i__2); for (i__ = j + 1; i__ <= i__3; ++i__) { ix += *incx; iy += *incy; i__4 = iy; i__2 = iy; i__5 = l + i__ + j * a_dim1; bli_zsets( (bli_zreal(temp1) * bli_zreal(a[i__5]) - bli_zimag(temp1) * bli_zimag(a[i__5])), (bli_zreal(temp1) * bli_zimag(a[i__5]) + bli_zimag(temp1) * bli_zreal(a[i__5])), z__2 ); bli_zsets( (bli_zreal(y[i__2]) + bli_zreal(z__2)), (bli_zimag(y[i__2]) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__4] ); bla_d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); i__4 = ix; bli_zsets( (bli_zreal(z__3) * bli_zreal(x[i__4]) - bli_zimag(z__3) * bli_zimag(x[i__4])), (bli_zreal(z__3) * bli_zimag(x[i__4]) + bli_zimag(z__3) * bli_zreal(x[i__4])), z__2 ); bli_zsets( (bli_zreal(temp2) + bli_zreal(z__2)), (bli_zimag(temp2) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp2 ); /* L110: */ } i__3 = jy; i__4 = jy; bli_zsets( (bli_zreal(*alpha) * bli_zreal(temp2) - bli_zimag(*alpha) * bli_zimag(temp2)), (bli_zreal(*alpha) * bli_zimag(temp2) + bli_zimag(*alpha) * bli_zreal(temp2)), z__2 ); bli_zsets( (bli_zreal(y[i__4]) + bli_zreal(z__2)), (bli_zimag(y[i__4]) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__3] ); jx += *incx; jy += *incy; /* L120: */ } } } return 0; /* End of ZHBMV . */ } /* zhbmv_ */ #endif blis-0.6.1/frame/compat/f2c/bla_hbmv.h000066400000000000000000000044021360743507500174330ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef BLIS_ENABLE_BLAS BLIS_EXPORT_BLAS int PASTEF77(c,hbmv)(const bla_character *uplo, const bla_integer *n, const bla_integer *k, const bla_scomplex *alpha, const bla_scomplex *a, const bla_integer *lda, const bla_scomplex *x, const bla_integer *incx, const bla_scomplex *beta, bla_scomplex *y, const bla_integer *incy); BLIS_EXPORT_BLAS int PASTEF77(z,hbmv)(const bla_character *uplo, const bla_integer *n, const bla_integer *k, const bla_dcomplex *alpha, const bla_dcomplex *a, const bla_integer *lda, const bla_dcomplex *x, const bla_integer *incx, const bla_dcomplex *beta, bla_dcomplex *y, const bla_integer *incy); #endif blis-0.6.1/frame/compat/f2c/bla_hpmv.c000066400000000000000000000770761360743507500174650ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #ifdef BLIS_ENABLE_BLAS /* chpmv.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ /* Subroutine */ int PASTEF77(c,hpmv)(const bla_character *uplo, const bla_integer *n, const bla_scomplex *alpha, const bla_scomplex * ap, const bla_scomplex *x, const bla_integer *incx, const bla_scomplex *beta, bla_scomplex *y, const bla_integer *incy) { /* System generated locals */ bla_integer i__1, i__2, i__3, i__4, i__5; bla_real r__1; bla_scomplex q__1, q__2, q__3, q__4; /* Builtin functions */ //void bla_r_cnjg(bla_scomplex *, bla_scomplex *); /* Local variables */ bla_integer info; bla_scomplex temp1, temp2; bla_integer i__, j, k; //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kk, ix, iy, jx, jy, kx, ky; //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* CHPMV performs the matrix-vector operation */ /* y := alpha*A*x + beta*y, */ /* where alpha and beta are scalars, x and y are n element vectors and */ /* A is an n by n hermitian matrix, supplied in packed form. */ /* Parameters */ /* ========== */ /* UPLO - CHARACTER*1. */ /* On entry, UPLO specifies whether the upper or lower */ /* triangular part of the matrix A is supplied in the packed */ /* array AP as follows: */ /* UPLO = 'U' or 'u' The upper triangular part of A is */ /* supplied in AP. */ /* UPLO = 'L' or 'l' The lower triangular part of A is */ /* supplied in AP. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the order of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* ALPHA - COMPLEX . */ /* On entry, ALPHA specifies the scalar alpha. */ /* Unchanged on exit. */ /* AP - COMPLEX array of DIMENSION at least */ /* ( ( n*( n + 1 ) )/2 ). */ /* Before entry with UPLO = 'U' or 'u', the array AP must */ /* contain the upper triangular part of the hermitian matrix */ /* packed sequentially, column by column, so that AP( 1 ) */ /* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */ /* and a( 2, 2 ) respectively, and so on. */ /* Before entry with UPLO = 'L' or 'l', the array AP must */ /* contain the lower triangular part of the hermitian matrix */ /* packed sequentially, column by column, so that AP( 1 ) */ /* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */ /* and a( 3, 1 ) respectively, and so on. */ /* Note that the imaginary parts of the diagonal elements need */ /* not be set and are assumed to be zero. */ /* Unchanged on exit. */ /* X - COMPLEX array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ). */ /* Before entry, the incremented array X must contain the n */ /* element vector x. */ /* Unchanged on exit. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* BETA - COMPLEX . */ /* On entry, BETA specifies the scalar beta. When BETA is */ /* supplied as zero then Y need not be set on input. */ /* Unchanged on exit. */ /* Y - COMPLEX array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCY ) ). */ /* Before entry, the incremented array Y must contain the n */ /* element vector y. On exit, Y is overwritten by the updated */ /* vector y. */ /* INCY - INTEGER. */ /* On entry, INCY specifies the increment for the elements of */ /* Y. INCY must not be zero. */ /* Unchanged on exit. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --y; --x; --ap; /* Function Body */ info = 0; if (! PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(uplo, "L", ( ftnlen)1, (ftnlen)1)) { info = 1; } else if (*n < 0) { info = 2; } else if (*incx == 0) { info = 6; } else if (*incy == 0) { info = 9; } if (info != 0) { PASTEF770(xerbla)("CHPMV ", &info, (ftnlen)6); return 0; } /* Quick return if possible. */ if (*n == 0 || (bli_creal(*alpha) == 0.f && bli_cimag(*alpha) == 0.f && (bli_creal(*beta) == 1.f && bli_cimag(*beta) == 0.f))) { return 0; } /* Set up the start points in X and Y. */ if (*incx > 0) { kx = 1; } else { kx = 1 - (*n - 1) * *incx; } if (*incy > 0) { ky = 1; } else { ky = 1 - (*n - 1) * *incy; } /* Start the operations. In this version the elements of the array AP */ /* are accessed sequentially with one pass through AP. */ /* First form y := beta*y. */ if (bli_creal(*beta) != 1.f || bli_cimag(*beta) != 0.f) { if (*incy == 1) { if (bli_creal(*beta) == 0.f && bli_cimag(*beta) == 0.f) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__; bli_csets( (0.f), (0.f), y[i__2] ); /* L10: */ } } else { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__; i__3 = i__; bli_csets( (bli_creal(*beta) * bli_creal(y[i__3]) - bli_cimag(*beta) * bli_cimag(y[i__3])), (bli_creal(*beta) * bli_cimag(y[i__3]) + bli_cimag(*beta) * bli_creal(y[i__3])), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__2] ); /* L20: */ } } } else { iy = ky; if (bli_creal(*beta) == 0.f && bli_cimag(*beta) == 0.f) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = iy; bli_csets( (0.f), (0.f), y[i__2] ); iy += *incy; /* L30: */ } } else { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = iy; i__3 = iy; bli_csets( (bli_creal(*beta) * bli_creal(y[i__3]) - bli_cimag(*beta) * bli_cimag(y[i__3])), (bli_creal(*beta) * bli_cimag(y[i__3]) + bli_cimag(*beta) * bli_creal(y[i__3])), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__2] ); iy += *incy; /* L40: */ } } } } if (bli_creal(*alpha) == 0.f && bli_cimag(*alpha) == 0.f) { return 0; } kk = 1; if (PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1)) { /* Form y when AP contains the upper triangle. */ if (*incx == 1 && *incy == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j; bli_csets( (bli_creal(*alpha) * bli_creal(x[i__2]) - bli_cimag(*alpha) * bli_cimag(x[i__2])), (bli_creal(*alpha) * bli_cimag(x[i__2]) + bli_cimag(*alpha) * bli_creal(x[i__2])), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp1 ); bli_csets( (0.f), (0.f), temp2 ); k = kk; i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__; i__4 = i__; i__5 = k; bli_csets( (bli_creal(temp1) * bli_creal(ap[i__5]) - bli_cimag(temp1) * bli_cimag(ap[i__5])), (bli_creal(temp1) * bli_cimag(ap[i__5]) + bli_cimag(temp1) * bli_creal(ap[i__5])), q__2 ); bli_csets( (bli_creal(y[i__4]) + bli_creal(q__2)), (bli_cimag(y[i__4]) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__3] ); bla_r_cnjg(&q__3, &ap[k]); i__3 = i__; bli_csets( (bli_creal(q__3) * bli_creal(x[i__3]) - bli_cimag(q__3) * bli_cimag(x[i__3])), (bli_creal(q__3) * bli_cimag(x[i__3]) + bli_cimag(q__3) * bli_creal(x[i__3])), q__2 ); bli_csets( (bli_creal(temp2) + bli_creal(q__2)), (bli_cimag(temp2) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp2 ); ++k; /* L50: */ } i__2 = j; i__3 = j; i__4 = kk + j - 1; r__1 = bli_creal(ap[i__4]); bli_csets( (r__1 * bli_creal(temp1)), (r__1 * bli_cimag(temp1)), q__3 ); bli_csets( (bli_creal(y[i__3]) + bli_creal(q__3)), (bli_cimag(y[i__3]) + bli_cimag(q__3)), q__2 ); bli_csets( (bli_creal(*alpha) * bli_creal(temp2) - bli_cimag(*alpha) * bli_cimag(temp2)), (bli_creal(*alpha) * bli_cimag(temp2) + bli_cimag(*alpha) * bli_creal(temp2)), q__4 ); bli_csets( (bli_creal(q__2) + bli_creal(q__4)), (bli_cimag(q__2) + bli_cimag(q__4)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__2] ); kk += j; /* L60: */ } } else { jx = kx; jy = ky; i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = jx; bli_csets( (bli_creal(*alpha) * bli_creal(x[i__2]) - bli_cimag(*alpha) * bli_cimag(x[i__2])), (bli_creal(*alpha) * bli_cimag(x[i__2]) + bli_cimag(*alpha) * bli_creal(x[i__2])), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp1 ); bli_csets( (0.f), (0.f), temp2 ); ix = kx; iy = ky; i__2 = kk + j - 2; for (k = kk; k <= i__2; ++k) { i__3 = iy; i__4 = iy; i__5 = k; bli_csets( (bli_creal(temp1) * bli_creal(ap[i__5]) - bli_cimag(temp1) * bli_cimag(ap[i__5])), (bli_creal(temp1) * bli_cimag(ap[i__5]) + bli_cimag(temp1) * bli_creal(ap[i__5])), q__2 ); bli_csets( (bli_creal(y[i__4]) + bli_creal(q__2)), (bli_cimag(y[i__4]) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__3] ); bla_r_cnjg(&q__3, &ap[k]); i__3 = ix; bli_csets( (bli_creal(q__3) * bli_creal(x[i__3]) - bli_cimag(q__3) * bli_cimag(x[i__3])), (bli_creal(q__3) * bli_cimag(x[i__3]) + bli_cimag(q__3) * bli_creal(x[i__3])), q__2 ); bli_csets( (bli_creal(temp2) + bli_creal(q__2)), (bli_cimag(temp2) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp2 ); ix += *incx; iy += *incy; /* L70: */ } i__2 = jy; i__3 = jy; i__4 = kk + j - 1; r__1 = bli_creal(ap[i__4]); bli_csets( (r__1 * bli_creal(temp1)), (r__1 * bli_cimag(temp1)), q__3 ); bli_csets( (bli_creal(y[i__3]) + bli_creal(q__3)), (bli_cimag(y[i__3]) + bli_cimag(q__3)), q__2 ); bli_csets( (bli_creal(*alpha) * bli_creal(temp2) - bli_cimag(*alpha) * bli_cimag(temp2)), (bli_creal(*alpha) * bli_cimag(temp2) + bli_cimag(*alpha) * bli_creal(temp2)), q__4 ); bli_csets( (bli_creal(q__2) + bli_creal(q__4)), (bli_cimag(q__2) + bli_cimag(q__4)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__2] ); jx += *incx; jy += *incy; kk += j; /* L80: */ } } } else { /* Form y when AP contains the lower triangle. */ if (*incx == 1 && *incy == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j; bli_csets( (bli_creal(*alpha) * bli_creal(x[i__2]) - bli_cimag(*alpha) * bli_cimag(x[i__2])), (bli_creal(*alpha) * bli_cimag(x[i__2]) + bli_cimag(*alpha) * bli_creal(x[i__2])), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp1 ); bli_csets( (0.f), (0.f), temp2 ); i__2 = j; i__3 = j; i__4 = kk; r__1 = bli_creal(ap[i__4]); bli_csets( (r__1 * bli_creal(temp1)), (r__1 * bli_cimag(temp1)), q__2 ); bli_csets( (bli_creal(y[i__3]) + bli_creal(q__2)), (bli_cimag(y[i__3]) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__2] ); k = kk + 1; i__2 = *n; for (i__ = j + 1; i__ <= i__2; ++i__) { i__3 = i__; i__4 = i__; i__5 = k; bli_csets( (bli_creal(temp1) * bli_creal(ap[i__5]) - bli_cimag(temp1) * bli_cimag(ap[i__5])), (bli_creal(temp1) * bli_cimag(ap[i__5]) + bli_cimag(temp1) * bli_creal(ap[i__5])), q__2 ); bli_csets( (bli_creal(y[i__4]) + bli_creal(q__2)), (bli_cimag(y[i__4]) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__3] ); bla_r_cnjg(&q__3, &ap[k]); i__3 = i__; bli_csets( (bli_creal(q__3) * bli_creal(x[i__3]) - bli_cimag(q__3) * bli_cimag(x[i__3])), (bli_creal(q__3) * bli_cimag(x[i__3]) + bli_cimag(q__3) * bli_creal(x[i__3])), q__2 ); bli_csets( (bli_creal(temp2) + bli_creal(q__2)), (bli_cimag(temp2) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp2 ); ++k; /* L90: */ } i__2 = j; i__3 = j; bli_csets( (bli_creal(*alpha) * bli_creal(temp2) - bli_cimag(*alpha) * bli_cimag(temp2)), (bli_creal(*alpha) * bli_cimag(temp2) + bli_cimag(*alpha) * bli_creal(temp2)), q__2 ); bli_csets( (bli_creal(y[i__3]) + bli_creal(q__2)), (bli_cimag(y[i__3]) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__2] ); kk += *n - j + 1; /* L100: */ } } else { jx = kx; jy = ky; i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = jx; bli_csets( (bli_creal(*alpha) * bli_creal(x[i__2]) - bli_cimag(*alpha) * bli_cimag(x[i__2])), (bli_creal(*alpha) * bli_cimag(x[i__2]) + bli_cimag(*alpha) * bli_creal(x[i__2])), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp1 ); bli_csets( (0.f), (0.f), temp2 ); i__2 = jy; i__3 = jy; i__4 = kk; r__1 = bli_creal(ap[i__4]); bli_csets( (r__1 * bli_creal(temp1)), (r__1 * bli_cimag(temp1)), q__2 ); bli_csets( (bli_creal(y[i__3]) + bli_creal(q__2)), (bli_cimag(y[i__3]) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__2] ); ix = jx; iy = jy; i__2 = kk + *n - j; for (k = kk + 1; k <= i__2; ++k) { ix += *incx; iy += *incy; i__3 = iy; i__4 = iy; i__5 = k; bli_csets( (bli_creal(temp1) * bli_creal(ap[i__5]) - bli_cimag(temp1) * bli_cimag(ap[i__5])), (bli_creal(temp1) * bli_cimag(ap[i__5]) + bli_cimag(temp1) * bli_creal(ap[i__5])), q__2 ); bli_csets( (bli_creal(y[i__4]) + bli_creal(q__2)), (bli_cimag(y[i__4]) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__3] ); bla_r_cnjg(&q__3, &ap[k]); i__3 = ix; bli_csets( (bli_creal(q__3) * bli_creal(x[i__3]) - bli_cimag(q__3) * bli_cimag(x[i__3])), (bli_creal(q__3) * bli_cimag(x[i__3]) + bli_cimag(q__3) * bli_creal(x[i__3])), q__2 ); bli_csets( (bli_creal(temp2) + bli_creal(q__2)), (bli_cimag(temp2) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp2 ); /* L110: */ } i__2 = jy; i__3 = jy; bli_csets( (bli_creal(*alpha) * bli_creal(temp2) - bli_cimag(*alpha) * bli_cimag(temp2)), (bli_creal(*alpha) * bli_cimag(temp2) + bli_cimag(*alpha) * bli_creal(temp2)), q__2 ); bli_csets( (bli_creal(y[i__3]) + bli_creal(q__2)), (bli_cimag(y[i__3]) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), y[i__2] ); jx += *incx; jy += *incy; kk += *n - j + 1; /* L120: */ } } } return 0; /* End of CHPMV . */ } /* chpmv_ */ /* zhpmv.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ /* Subroutine */ int PASTEF77(z,hpmv)(const bla_character *uplo, const bla_integer *n, const bla_dcomplex *alpha, const bla_dcomplex *ap, const bla_dcomplex *x, const bla_integer *incx, const bla_dcomplex *beta, bla_dcomplex *y, const bla_integer *incy) { /* System generated locals */ bla_integer i__1, i__2, i__3, i__4, i__5; bla_double d__1; bla_dcomplex z__1, z__2, z__3, z__4; /* Builtin functions */ //void bla_d_cnjg(bla_dcomplex *, bla_dcomplex *); /* Local variables */ bla_integer info; bla_dcomplex temp1, temp2; bla_integer i__, j, k; //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kk, ix, iy, jx, jy, kx, ky; //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* ZHPMV performs the matrix-vector operation */ /* y := alpha*A*x + beta*y, */ /* where alpha and beta are scalars, x and y are n element vectors and */ /* A is an n by n hermitian matrix, supplied in packed form. */ /* Parameters */ /* ========== */ /* UPLO - CHARACTER*1. */ /* On entry, UPLO specifies whether the upper or lower */ /* triangular part of the matrix A is supplied in the packed */ /* array AP as follows: */ /* UPLO = 'U' or 'u' The upper triangular part of A is */ /* supplied in AP. */ /* UPLO = 'L' or 'l' The lower triangular part of A is */ /* supplied in AP. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the order of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* ALPHA - COMPLEX*16 . */ /* On entry, ALPHA specifies the scalar alpha. */ /* Unchanged on exit. */ /* AP - COMPLEX*16 array of DIMENSION at least */ /* ( ( n*( n + 1 ) )/2 ). */ /* Before entry with UPLO = 'U' or 'u', the array AP must */ /* contain the upper triangular part of the hermitian matrix */ /* packed sequentially, column by column, so that AP( 1 ) */ /* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */ /* and a( 2, 2 ) respectively, and so on. */ /* Before entry with UPLO = 'L' or 'l', the array AP must */ /* contain the lower triangular part of the hermitian matrix */ /* packed sequentially, column by column, so that AP( 1 ) */ /* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */ /* and a( 3, 1 ) respectively, and so on. */ /* Note that the imaginary parts of the diagonal elements need */ /* not be set and are assumed to be zero. */ /* Unchanged on exit. */ /* X - COMPLEX*16 array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ). */ /* Before entry, the incremented array X must contain the n */ /* element vector x. */ /* Unchanged on exit. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* BETA - COMPLEX*16 . */ /* On entry, BETA specifies the scalar beta. When BETA is */ /* supplied as zero then Y need not be set on input. */ /* Unchanged on exit. */ /* Y - COMPLEX*16 array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCY ) ). */ /* Before entry, the incremented array Y must contain the n */ /* element vector y. On exit, Y is overwritten by the updated */ /* vector y. */ /* INCY - INTEGER. */ /* On entry, INCY specifies the increment for the elements of */ /* Y. INCY must not be zero. */ /* Unchanged on exit. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --y; --x; --ap; /* Function Body */ info = 0; if (! PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(uplo, "L", ( ftnlen)1, (ftnlen)1)) { info = 1; } else if (*n < 0) { info = 2; } else if (*incx == 0) { info = 6; } else if (*incy == 0) { info = 9; } if (info != 0) { PASTEF770(xerbla)("ZHPMV ", &info, (ftnlen)6); return 0; } /* Quick return if possible. */ if (*n == 0 || (bli_zreal(*alpha) == 0. && bli_zimag(*alpha) == 0. && (bli_zreal(*beta) == 1. && bli_zimag(*beta) == 0.))) { return 0; } /* Set up the start points in X and Y. */ if (*incx > 0) { kx = 1; } else { kx = 1 - (*n - 1) * *incx; } if (*incy > 0) { ky = 1; } else { ky = 1 - (*n - 1) * *incy; } /* Start the operations. In this version the elements of the array AP */ /* are accessed sequentially with one pass through AP. */ /* First form y := beta*y. */ if (bli_zreal(*beta) != 1. || bli_zimag(*beta) != 0.) { if (*incy == 1) { if (bli_zreal(*beta) == 0. && bli_zimag(*beta) == 0.) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__; bli_zsets( (0.), (0.), y[i__2] ); /* L10: */ } } else { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__; i__3 = i__; bli_zsets( (bli_zreal(*beta) * bli_zreal(y[i__3]) - bli_zimag(*beta) * bli_zimag(y[i__3])), (bli_zreal(*beta) * bli_zimag(y[i__3]) + bli_zimag(*beta) * bli_zreal(y[i__3])), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__2] ); /* L20: */ } } } else { iy = ky; if (bli_zreal(*beta) == 0. && bli_zimag(*beta) == 0.) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = iy; bli_zsets( (0.), (0.), y[i__2] ); iy += *incy; /* L30: */ } } else { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = iy; i__3 = iy; bli_zsets( (bli_zreal(*beta) * bli_zreal(y[i__3]) - bli_zimag(*beta) * bli_zimag(y[i__3])), (bli_zreal(*beta) * bli_zimag(y[i__3]) + bli_zimag(*beta) * bli_zreal(y[i__3])), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__2] ); iy += *incy; /* L40: */ } } } } if (bli_zreal(*alpha) == 0. && bli_zimag(*alpha) == 0.) { return 0; } kk = 1; if (PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1)) { /* Form y when AP contains the upper triangle. */ if (*incx == 1 && *incy == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j; bli_zsets( (bli_zreal(*alpha) * bli_zreal(x[i__2]) - bli_zimag(*alpha) * bli_zimag(x[i__2])), (bli_zreal(*alpha) * bli_zimag(x[i__2]) + bli_zimag(*alpha) * bli_zreal(x[i__2])), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp1 ); bli_zsets( (0.), (0.), temp2 ); k = kk; i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__; i__4 = i__; i__5 = k; bli_zsets( (bli_zreal(temp1) * bli_zreal(ap[i__5]) - bli_zimag(temp1) * bli_zimag(ap[i__5])), (bli_zreal(temp1) * bli_zimag(ap[i__5]) + bli_zimag(temp1) * bli_zreal(ap[i__5])), z__2 ); bli_zsets( (bli_zreal(y[i__4]) + bli_zreal(z__2)), (bli_zimag(y[i__4]) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__3] ); bla_d_cnjg(&z__3, &ap[k]); i__3 = i__; bli_zsets( (bli_zreal(z__3) * bli_zreal(x[i__3]) - bli_zimag(z__3) * bli_zimag(x[i__3])), (bli_zreal(z__3) * bli_zimag(x[i__3]) + bli_zimag(z__3) * bli_zreal(x[i__3])), z__2 ); bli_zsets( (bli_zreal(temp2) + bli_zreal(z__2)), (bli_zimag(temp2) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp2 ); ++k; /* L50: */ } i__2 = j; i__3 = j; i__4 = kk + j - 1; d__1 = bli_zreal(ap[i__4]); bli_zsets( (d__1 * bli_zreal(temp1)), (d__1 * bli_zimag(temp1)), z__3 ); bli_zsets( (bli_zreal(y[i__3]) + bli_zreal(z__3)), (bli_zimag(y[i__3]) + bli_zimag(z__3)), z__2 ); bli_zsets( (bli_zreal(*alpha) * bli_zreal(temp2) - bli_zimag(*alpha) * bli_zimag(temp2)), (bli_zreal(*alpha) * bli_zimag(temp2) + bli_zimag(*alpha) * bli_zreal(temp2)), z__4 ); bli_zsets( (bli_zreal(z__2) + bli_zreal(z__4)), (bli_zimag(z__2) + bli_zimag(z__4)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__2] ); kk += j; /* L60: */ } } else { jx = kx; jy = ky; i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = jx; bli_zsets( (bli_zreal(*alpha) * bli_zreal(x[i__2]) - bli_zimag(*alpha) * bli_zimag(x[i__2])), (bli_zreal(*alpha) * bli_zimag(x[i__2]) + bli_zimag(*alpha) * bli_zreal(x[i__2])), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp1 ); bli_zsets( (0.), (0.), temp2 ); ix = kx; iy = ky; i__2 = kk + j - 2; for (k = kk; k <= i__2; ++k) { i__3 = iy; i__4 = iy; i__5 = k; bli_zsets( (bli_zreal(temp1) * bli_zreal(ap[i__5]) - bli_zimag(temp1) * bli_zimag(ap[i__5])), (bli_zreal(temp1) * bli_zimag(ap[i__5]) + bli_zimag(temp1) * bli_zreal(ap[i__5])), z__2 ); bli_zsets( (bli_zreal(y[i__4]) + bli_zreal(z__2)), (bli_zimag(y[i__4]) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__3] ); bla_d_cnjg(&z__3, &ap[k]); i__3 = ix; bli_zsets( (bli_zreal(z__3) * bli_zreal(x[i__3]) - bli_zimag(z__3) * bli_zimag(x[i__3])), (bli_zreal(z__3) * bli_zimag(x[i__3]) + bli_zimag(z__3) * bli_zreal(x[i__3])), z__2 ); bli_zsets( (bli_zreal(temp2) + bli_zreal(z__2)), (bli_zimag(temp2) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp2 ); ix += *incx; iy += *incy; /* L70: */ } i__2 = jy; i__3 = jy; i__4 = kk + j - 1; d__1 = bli_zreal(ap[i__4]); bli_zsets( (d__1 * bli_zreal(temp1)), (d__1 * bli_zimag(temp1)), z__3 ); bli_zsets( (bli_zreal(y[i__3]) + bli_zreal(z__3)), (bli_zimag(y[i__3]) + bli_zimag(z__3)), z__2 ); bli_zsets( (bli_zreal(*alpha) * bli_zreal(temp2) - bli_zimag(*alpha) * bli_zimag(temp2)), (bli_zreal(*alpha) * bli_zimag(temp2) + bli_zimag(*alpha) * bli_zreal(temp2)), z__4 ); bli_zsets( (bli_zreal(z__2) + bli_zreal(z__4)), (bli_zimag(z__2) + bli_zimag(z__4)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__2] ); jx += *incx; jy += *incy; kk += j; /* L80: */ } } } else { /* Form y when AP contains the lower triangle. */ if (*incx == 1 && *incy == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j; bli_zsets( (bli_zreal(*alpha) * bli_zreal(x[i__2]) - bli_zimag(*alpha) * bli_zimag(x[i__2])), (bli_zreal(*alpha) * bli_zimag(x[i__2]) + bli_zimag(*alpha) * bli_zreal(x[i__2])), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp1 ); bli_zsets( (0.), (0.), temp2 ); i__2 = j; i__3 = j; i__4 = kk; d__1 = bli_zreal(ap[i__4]); bli_zsets( (d__1 * bli_zreal(temp1)), (d__1 * bli_zimag(temp1)), z__2 ); bli_zsets( (bli_zreal(y[i__3]) + bli_zreal(z__2)), (bli_zimag(y[i__3]) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__2] ); k = kk + 1; i__2 = *n; for (i__ = j + 1; i__ <= i__2; ++i__) { i__3 = i__; i__4 = i__; i__5 = k; bli_zsets( (bli_zreal(temp1) * bli_zreal(ap[i__5]) - bli_zimag(temp1) * bli_zimag(ap[i__5])), (bli_zreal(temp1) * bli_zimag(ap[i__5]) + bli_zimag(temp1) * bli_zreal(ap[i__5])), z__2 ); bli_zsets( (bli_zreal(y[i__4]) + bli_zreal(z__2)), (bli_zimag(y[i__4]) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__3] ); bla_d_cnjg(&z__3, &ap[k]); i__3 = i__; bli_zsets( (bli_zreal(z__3) * bli_zreal(x[i__3]) - bli_zimag(z__3) * bli_zimag(x[i__3])), (bli_zreal(z__3) * bli_zimag(x[i__3]) + bli_zimag(z__3) * bli_zreal(x[i__3])), z__2 ); bli_zsets( (bli_zreal(temp2) + bli_zreal(z__2)), (bli_zimag(temp2) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp2 ); ++k; /* L90: */ } i__2 = j; i__3 = j; bli_zsets( (bli_zreal(*alpha) * bli_zreal(temp2) - bli_zimag(*alpha) * bli_zimag(temp2)), (bli_zreal(*alpha) * bli_zimag(temp2) + bli_zimag(*alpha) * bli_zreal(temp2)), z__2 ); bli_zsets( (bli_zreal(y[i__3]) + bli_zreal(z__2)), (bli_zimag(y[i__3]) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__2] ); kk += *n - j + 1; /* L100: */ } } else { jx = kx; jy = ky; i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = jx; bli_zsets( (bli_zreal(*alpha) * bli_zreal(x[i__2]) - bli_zimag(*alpha) * bli_zimag(x[i__2])), (bli_zreal(*alpha) * bli_zimag(x[i__2]) + bli_zimag(*alpha) * bli_zreal(x[i__2])), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp1 ); bli_zsets( (0.), (0.), temp2 ); i__2 = jy; i__3 = jy; i__4 = kk; d__1 = bli_zreal(ap[i__4]); bli_zsets( (d__1 * bli_zreal(temp1)), (d__1 * bli_zimag(temp1)), z__2 ); bli_zsets( (bli_zreal(y[i__3]) + bli_zreal(z__2)), (bli_zimag(y[i__3]) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__2] ); ix = jx; iy = jy; i__2 = kk + *n - j; for (k = kk + 1; k <= i__2; ++k) { ix += *incx; iy += *incy; i__3 = iy; i__4 = iy; i__5 = k; bli_zsets( (bli_zreal(temp1) * bli_zreal(ap[i__5]) - bli_zimag(temp1) * bli_zimag(ap[i__5])), (bli_zreal(temp1) * bli_zimag(ap[i__5]) + bli_zimag(temp1) * bli_zreal(ap[i__5])), z__2 ); bli_zsets( (bli_zreal(y[i__4]) + bli_zreal(z__2)), (bli_zimag(y[i__4]) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__3] ); bla_d_cnjg(&z__3, &ap[k]); i__3 = ix; bli_zsets( (bli_zreal(z__3) * bli_zreal(x[i__3]) - bli_zimag(z__3) * bli_zimag(x[i__3])), (bli_zreal(z__3) * bli_zimag(x[i__3]) + bli_zimag(z__3) * bli_zreal(x[i__3])), z__2 ); bli_zsets( (bli_zreal(temp2) + bli_zreal(z__2)), (bli_zimag(temp2) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp2 ); /* L110: */ } i__2 = jy; i__3 = jy; bli_zsets( (bli_zreal(*alpha) * bli_zreal(temp2) - bli_zimag(*alpha) * bli_zimag(temp2)), (bli_zreal(*alpha) * bli_zimag(temp2) + bli_zimag(*alpha) * bli_zreal(temp2)), z__2 ); bli_zsets( (bli_zreal(y[i__3]) + bli_zreal(z__2)), (bli_zimag(y[i__3]) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), y[i__2] ); jx += *incx; jy += *incy; kk += *n - j + 1; /* L120: */ } } } return 0; /* End of ZHPMV . */ } /* zhpmv_ */ #endif blis-0.6.1/frame/compat/f2c/bla_hpmv.h000066400000000000000000000042501360743507500174520ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef BLIS_ENABLE_BLAS BLIS_EXPORT_BLAS int PASTEF77(c,hpmv)(const bla_character *uplo, const bla_integer *n, const bla_scomplex *alpha, const bla_scomplex *ap, const bla_scomplex *x, const bla_integer *incx, const bla_scomplex *beta, bla_scomplex *y, const bla_integer *incy); BLIS_EXPORT_BLAS int PASTEF77(z,hpmv)(const bla_character *uplo, const bla_integer *n, const bla_dcomplex *alpha, const bla_dcomplex *ap, const bla_dcomplex *x, const bla_integer *incx, const bla_dcomplex *beta, bla_dcomplex *y, const bla_integer *incy); #endif blis-0.6.1/frame/compat/f2c/bla_hpr.c000066400000000000000000000546501360743507500172750ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #ifdef BLIS_ENABLE_BLAS /* chpr.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ /* Subroutine */ int PASTEF77(c,hpr)(const bla_character *uplo, const bla_integer *n, const bla_real *alpha, const bla_scomplex *x, const bla_integer *incx, bla_scomplex *ap) { /* System generated locals */ bla_integer i__1, i__2, i__3, i__4, i__5; bla_real r__1; bla_scomplex q__1, q__2; /* Builtin functions */ //void bla_r_cnjg(bla_scomplex *, bla_scomplex *); /* Local variables */ bla_integer info; bla_scomplex temp; bla_integer i__, j, k; //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kk, ix, jx, kx = 0; //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* CHPR performs the hermitian rank 1 operation */ /* A := alpha*x*conjg( x' ) + A, */ /* where alpha is a bla_real scalar, x is an n element vector and A is an */ /* n by n hermitian matrix, supplied in packed form. */ /* Parameters */ /* ========== */ /* UPLO - CHARACTER*1. */ /* On entry, UPLO specifies whether the upper or lower */ /* triangular part of the matrix A is supplied in the packed */ /* array AP as follows: */ /* UPLO = 'U' or 'u' The upper triangular part of A is */ /* supplied in AP. */ /* UPLO = 'L' or 'l' The lower triangular part of A is */ /* supplied in AP. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the order of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* ALPHA - REAL . */ /* On entry, ALPHA specifies the scalar alpha. */ /* Unchanged on exit. */ /* X - COMPLEX array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ). */ /* Before entry, the incremented array X must contain the n */ /* element vector x. */ /* Unchanged on exit. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* AP - COMPLEX array of DIMENSION at least */ /* ( ( n*( n + 1 ) )/2 ). */ /* Before entry with UPLO = 'U' or 'u', the array AP must */ /* contain the upper triangular part of the hermitian matrix */ /* packed sequentially, column by column, so that AP( 1 ) */ /* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */ /* and a( 2, 2 ) respectively, and so on. On exit, the array */ /* AP is overwritten by the upper triangular part of the */ /* updated matrix. */ /* Before entry with UPLO = 'L' or 'l', the array AP must */ /* contain the lower triangular part of the hermitian matrix */ /* packed sequentially, column by column, so that AP( 1 ) */ /* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */ /* and a( 3, 1 ) respectively, and so on. On exit, the array */ /* AP is overwritten by the lower triangular part of the */ /* updated matrix. */ /* Note that the imaginary parts of the diagonal elements need */ /* not be set, they are assumed to be zero, and on exit they */ /* are set to zero. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --ap; --x; /* Function Body */ info = 0; if (! PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(uplo, "L", ( ftnlen)1, (ftnlen)1)) { info = 1; } else if (*n < 0) { info = 2; } else if (*incx == 0) { info = 5; } if (info != 0) { PASTEF770(xerbla)("CHPR ", &info, (ftnlen)6); return 0; } /* Quick return if possible. */ if (*n == 0 || *alpha == 0.f) { return 0; } /* Set the start point in X if the increment is not unity. */ if (*incx <= 0) { kx = 1 - (*n - 1) * *incx; } else if (*incx != 1) { kx = 1; } /* Start the operations. In this version the elements of the array AP */ /* are accessed sequentially with one pass through AP. */ kk = 1; if (PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1)) { /* Form A when upper triangle is stored in AP. */ if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j; if (bli_creal(x[i__2]) != 0.f || bli_cimag(x[i__2]) != 0.f) { bla_r_cnjg(&q__2, &x[j]); bli_csets( (*alpha * bli_creal(q__2)), (*alpha * bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); k = kk; i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = k; i__4 = k; i__5 = i__; bli_csets( (bli_creal(x[i__5]) * bli_creal(temp) - bli_cimag(x[i__5]) * bli_cimag(temp)), (bli_creal(x[i__5]) * bli_cimag(temp) + bli_cimag(x[i__5]) * bli_creal(temp)), q__2 ); bli_csets( (bli_creal(ap[i__4]) + bli_creal(q__2)), (bli_cimag(ap[i__4]) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), ap[i__3] ); ++k; /* L10: */ } i__2 = kk + j - 1; i__3 = kk + j - 1; i__4 = j; bli_csets( (bli_creal(x[i__4]) * bli_creal(temp) - bli_cimag(x[i__4]) * bli_cimag(temp)), (bli_creal(x[i__4]) * bli_cimag(temp) + bli_cimag(x[i__4]) * bli_creal(temp)), q__1 ); r__1 = bli_creal(ap[i__3]) + bli_creal(q__1); bli_csets( (r__1), (0.f), ap[i__2] ); } else { i__2 = kk + j - 1; i__3 = kk + j - 1; r__1 = bli_creal(ap[i__3]); bli_csets( (r__1), (0.f), ap[i__2] ); } kk += j; /* L20: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = jx; if (bli_creal(x[i__2]) != 0.f || bli_cimag(x[i__2]) != 0.f) { bla_r_cnjg(&q__2, &x[jx]); bli_csets( (*alpha * bli_creal(q__2)), (*alpha * bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); ix = kx; i__2 = kk + j - 2; for (k = kk; k <= i__2; ++k) { i__3 = k; i__4 = k; i__5 = ix; bli_csets( (bli_creal(x[i__5]) * bli_creal(temp) - bli_cimag(x[i__5]) * bli_cimag(temp)), (bli_creal(x[i__5]) * bli_cimag(temp) + bli_cimag(x[i__5]) * bli_creal(temp)), q__2 ); bli_csets( (bli_creal(ap[i__4]) + bli_creal(q__2)), (bli_cimag(ap[i__4]) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), ap[i__3] ); ix += *incx; /* L30: */ } i__2 = kk + j - 1; i__3 = kk + j - 1; i__4 = jx; bli_csets( (bli_creal(x[i__4]) * bli_creal(temp) - bli_cimag(x[i__4]) * bli_cimag(temp)), (bli_creal(x[i__4]) * bli_cimag(temp) + bli_cimag(x[i__4]) * bli_creal(temp)), q__1 ); r__1 = bli_creal(ap[i__3]) + bli_creal(q__1); bli_csets( (r__1), (0.f), ap[i__2] ); } else { i__2 = kk + j - 1; i__3 = kk + j - 1; r__1 = bli_creal(ap[i__3]); bli_csets( (r__1), (0.f), ap[i__2] ); } jx += *incx; kk += j; /* L40: */ } } } else { /* Form A when lower triangle is stored in AP. */ if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j; if (bli_creal(x[i__2]) != 0.f || bli_cimag(x[i__2]) != 0.f) { bla_r_cnjg(&q__2, &x[j]); bli_csets( (*alpha * bli_creal(q__2)), (*alpha * bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); i__2 = kk; i__3 = kk; i__4 = j; bli_csets( (bli_creal(temp) * bli_creal(x[i__4]) - bli_cimag(temp) * bli_cimag(x[i__4])), (bli_creal(temp) * bli_cimag(x[i__4]) + bli_cimag(temp) * bli_creal(x[i__4])), q__1 ); r__1 = bli_creal(ap[i__3]) + bli_creal(q__1); bli_csets( (r__1), (0.f), ap[i__2] ); k = kk + 1; i__2 = *n; for (i__ = j + 1; i__ <= i__2; ++i__) { i__3 = k; i__4 = k; i__5 = i__; bli_csets( (bli_creal(x[i__5]) * bli_creal(temp) - bli_cimag(x[i__5]) * bli_cimag(temp)), (bli_creal(x[i__5]) * bli_cimag(temp) + bli_cimag(x[i__5]) * bli_creal(temp)), q__2 ); bli_csets( (bli_creal(ap[i__4]) + bli_creal(q__2)), (bli_cimag(ap[i__4]) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), ap[i__3] ); ++k; /* L50: */ } } else { i__2 = kk; i__3 = kk; r__1 = bli_creal(ap[i__3]); bli_csets( (r__1), (0.f), ap[i__2] ); } kk = kk + *n - j + 1; /* L60: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = jx; if (bli_creal(x[i__2]) != 0.f || bli_cimag(x[i__2]) != 0.f) { bla_r_cnjg(&q__2, &x[jx]); bli_csets( (*alpha * bli_creal(q__2)), (*alpha * bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); i__2 = kk; i__3 = kk; i__4 = jx; bli_csets( (bli_creal(temp) * bli_creal(x[i__4]) - bli_cimag(temp) * bli_cimag(x[i__4])), (bli_creal(temp) * bli_cimag(x[i__4]) + bli_cimag(temp) * bli_creal(x[i__4])), q__1 ); r__1 = bli_creal(ap[i__3]) + bli_creal(q__1); bli_csets( (r__1), (0.f), ap[i__2] ); ix = jx; i__2 = kk + *n - j; for (k = kk + 1; k <= i__2; ++k) { ix += *incx; i__3 = k; i__4 = k; i__5 = ix; bli_csets( (bli_creal(x[i__5]) * bli_creal(temp) - bli_cimag(x[i__5]) * bli_cimag(temp)), (bli_creal(x[i__5]) * bli_cimag(temp) + bli_cimag(x[i__5]) * bli_creal(temp)), q__2 ); bli_csets( (bli_creal(ap[i__4]) + bli_creal(q__2)), (bli_cimag(ap[i__4]) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), ap[i__3] ); /* L70: */ } } else { i__2 = kk; i__3 = kk; r__1 = bli_creal(ap[i__3]); bli_csets( (r__1), (0.f), ap[i__2] ); } jx += *incx; kk = kk + *n - j + 1; /* L80: */ } } } return 0; /* End of CHPR . */ } /* chpr_ */ /* zhpr.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ /* Subroutine */ int PASTEF77(z,hpr)(const bla_character *uplo, const bla_integer *n, const bla_double *alpha, const bla_dcomplex *x, const bla_integer *incx, bla_dcomplex *ap) { /* System generated locals */ bla_integer i__1, i__2, i__3, i__4, i__5; bla_double d__1; bla_dcomplex z__1, z__2; /* Builtin functions */ //void bla_d_cnjg(bla_dcomplex *, bla_dcomplex *); /* Local variables */ bla_integer info; bla_dcomplex temp; bla_integer i__, j, k; //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kk, ix, jx, kx = 0; //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* ZHPR performs the hermitian rank 1 operation */ /* A := alpha*x*conjg( x' ) + A, */ /* where alpha is a bla_real scalar, x is an n element vector and A is an */ /* n by n hermitian matrix, supplied in packed form. */ /* Parameters */ /* ========== */ /* UPLO - CHARACTER*1. */ /* On entry, UPLO specifies whether the upper or lower */ /* triangular part of the matrix A is supplied in the packed */ /* array AP as follows: */ /* UPLO = 'U' or 'u' The upper triangular part of A is */ /* supplied in AP. */ /* UPLO = 'L' or 'l' The lower triangular part of A is */ /* supplied in AP. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the order of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* ALPHA - DOUBLE PRECISION. */ /* On entry, ALPHA specifies the scalar alpha. */ /* Unchanged on exit. */ /* X - COMPLEX*16 array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ). */ /* Before entry, the incremented array X must contain the n */ /* element vector x. */ /* Unchanged on exit. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* AP - COMPLEX*16 array of DIMENSION at least */ /* ( ( n*( n + 1 ) )/2 ). */ /* Before entry with UPLO = 'U' or 'u', the array AP must */ /* contain the upper triangular part of the hermitian matrix */ /* packed sequentially, column by column, so that AP( 1 ) */ /* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */ /* and a( 2, 2 ) respectively, and so on. On exit, the array */ /* AP is overwritten by the upper triangular part of the */ /* updated matrix. */ /* Before entry with UPLO = 'L' or 'l', the array AP must */ /* contain the lower triangular part of the hermitian matrix */ /* packed sequentially, column by column, so that AP( 1 ) */ /* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */ /* and a( 3, 1 ) respectively, and so on. On exit, the array */ /* AP is overwritten by the lower triangular part of the */ /* updated matrix. */ /* Note that the imaginary parts of the diagonal elements need */ /* not be set, they are assumed to be zero, and on exit they */ /* are set to zero. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --ap; --x; /* Function Body */ info = 0; if (! PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(uplo, "L", ( ftnlen)1, (ftnlen)1)) { info = 1; } else if (*n < 0) { info = 2; } else if (*incx == 0) { info = 5; } if (info != 0) { PASTEF770(xerbla)("ZHPR ", &info, (ftnlen)6); return 0; } /* Quick return if possible. */ if (*n == 0 || *alpha == 0.) { return 0; } /* Set the start point in X if the increment is not unity. */ if (*incx <= 0) { kx = 1 - (*n - 1) * *incx; } else if (*incx != 1) { kx = 1; } /* Start the operations. In this version the elements of the array AP */ /* are accessed sequentially with one pass through AP. */ kk = 1; if (PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1)) { /* Form A when upper triangle is stored in AP. */ if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j; if (bli_zreal(x[i__2]) != 0. || bli_zimag(x[i__2]) != 0.) { bla_d_cnjg(&z__2, &x[j]); bli_zsets( (*alpha * bli_zreal(z__2)), (*alpha * bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); k = kk; i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = k; i__4 = k; i__5 = i__; bli_zsets( (bli_zreal(x[i__5]) * bli_zreal(temp) - bli_zimag(x[i__5]) * bli_zimag(temp)), (bli_zreal(x[i__5]) * bli_zimag(temp) + bli_zimag(x[i__5]) * bli_zreal(temp)), z__2 ); bli_zsets( (bli_zreal(ap[i__4]) + bli_zreal(z__2)), (bli_zimag(ap[i__4]) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), ap[i__3] ); ++k; /* L10: */ } i__2 = kk + j - 1; i__3 = kk + j - 1; i__4 = j; bli_zsets( (bli_zreal(x[i__4]) * bli_zreal(temp) - bli_zimag(x[i__4]) * bli_zimag(temp)), (bli_zreal(x[i__4]) * bli_zimag(temp) + bli_zimag(x[i__4]) * bli_zreal(temp)), z__1 ); d__1 = bli_zreal(ap[i__3]) + bli_zreal(z__1); bli_zsets( (d__1), (0.), ap[i__2] ); } else { i__2 = kk + j - 1; i__3 = kk + j - 1; d__1 = bli_zreal(ap[i__3]); bli_zsets( (d__1), (0.), ap[i__2] ); } kk += j; /* L20: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = jx; if (bli_zreal(x[i__2]) != 0. || bli_zimag(x[i__2]) != 0.) { bla_d_cnjg(&z__2, &x[jx]); bli_zsets( (*alpha * bli_zreal(z__2)), (*alpha * bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); ix = kx; i__2 = kk + j - 2; for (k = kk; k <= i__2; ++k) { i__3 = k; i__4 = k; i__5 = ix; bli_zsets( (bli_zreal(x[i__5]) * bli_zreal(temp) - bli_zimag(x[i__5]) * bli_zimag(temp)), (bli_zreal(x[i__5]) * bli_zimag(temp) + bli_zimag(x[i__5]) * bli_zreal(temp)), z__2 ); bli_zsets( (bli_zreal(ap[i__4]) + bli_zreal(z__2)), (bli_zimag(ap[i__4]) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), ap[i__3] ); ix += *incx; /* L30: */ } i__2 = kk + j - 1; i__3 = kk + j - 1; i__4 = jx; bli_zsets( (bli_zreal(x[i__4]) * bli_zreal(temp) - bli_zimag(x[i__4]) * bli_zimag(temp)), (bli_zreal(x[i__4]) * bli_zimag(temp) + bli_zimag(x[i__4]) * bli_zreal(temp)), z__1 ); d__1 = bli_zreal(ap[i__3]) + bli_zreal(z__1); bli_zsets( (d__1), (0.), ap[i__2] ); } else { i__2 = kk + j - 1; i__3 = kk + j - 1; d__1 = bli_zreal(ap[i__3]); bli_zsets( (d__1), (0.), ap[i__2] ); } jx += *incx; kk += j; /* L40: */ } } } else { /* Form A when lower triangle is stored in AP. */ if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j; if (bli_zreal(x[i__2]) != 0. || bli_zimag(x[i__2]) != 0.) { bla_d_cnjg(&z__2, &x[j]); bli_zsets( (*alpha * bli_zreal(z__2)), (*alpha * bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); i__2 = kk; i__3 = kk; i__4 = j; bli_zsets( (bli_zreal(temp) * bli_zreal(x[i__4]) - bli_zimag(temp) * bli_zimag(x[i__4])), (bli_zreal(temp) * bli_zimag(x[i__4]) + bli_zimag(temp) * bli_zreal(x[i__4])), z__1 ); d__1 = bli_zreal(ap[i__3]) + bli_zreal(z__1); bli_zsets( (d__1), (0.), ap[i__2] ); k = kk + 1; i__2 = *n; for (i__ = j + 1; i__ <= i__2; ++i__) { i__3 = k; i__4 = k; i__5 = i__; bli_zsets( (bli_zreal(x[i__5]) * bli_zreal(temp) - bli_zimag(x[i__5]) * bli_zimag(temp)), (bli_zreal(x[i__5]) * bli_zimag(temp) + bli_zimag(x[i__5]) * bli_zreal(temp)), z__2 ); bli_zsets( (bli_zreal(ap[i__4]) + bli_zreal(z__2)), (bli_zimag(ap[i__4]) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), ap[i__3] ); ++k; /* L50: */ } } else { i__2 = kk; i__3 = kk; d__1 = bli_zreal(ap[i__3]); bli_zsets( (d__1), (0.), ap[i__2] ); } kk = kk + *n - j + 1; /* L60: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = jx; if (bli_zreal(x[i__2]) != 0. || bli_zimag(x[i__2]) != 0.) { bla_d_cnjg(&z__2, &x[jx]); bli_zsets( (*alpha * bli_zreal(z__2)), (*alpha * bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); i__2 = kk; i__3 = kk; i__4 = jx; bli_zsets( (bli_zreal(temp) * bli_zreal(x[i__4]) - bli_zimag(temp) * bli_zimag(x[i__4])), (bli_zreal(temp) * bli_zimag(x[i__4]) + bli_zimag(temp) * bli_zreal(x[i__4])), z__1 ); d__1 = bli_zreal(ap[i__3]) + bli_zreal(z__1); bli_zsets( (d__1), (0.), ap[i__2] ); ix = jx; i__2 = kk + *n - j; for (k = kk + 1; k <= i__2; ++k) { ix += *incx; i__3 = k; i__4 = k; i__5 = ix; bli_zsets( (bli_zreal(x[i__5]) * bli_zreal(temp) - bli_zimag(x[i__5]) * bli_zimag(temp)), (bli_zreal(x[i__5]) * bli_zimag(temp) + bli_zimag(x[i__5]) * bli_zreal(temp)), z__2 ); bli_zsets( (bli_zreal(ap[i__4]) + bli_zreal(z__2)), (bli_zimag(ap[i__4]) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), ap[i__3] ); /* L70: */ } } else { i__2 = kk; i__3 = kk; d__1 = bli_zreal(ap[i__3]); bli_zsets( (d__1), (0.), ap[i__2] ); } jx += *incx; kk = kk + *n - j + 1; /* L80: */ } } } return 0; /* End of ZHPR . */ } /* zhpr_ */ #endif blis-0.6.1/frame/compat/f2c/bla_hpr.h000066400000000000000000000040141360743507500172670ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef BLIS_ENABLE_BLAS BLIS_EXPORT_BLAS int PASTEF77(c,hpr)(const bla_character *uplo, const bla_integer *n, const bla_real *alpha, const bla_scomplex *x, const bla_integer *incx, bla_scomplex *ap); BLIS_EXPORT_BLAS int PASTEF77(z,hpr)(const bla_character *uplo, const bla_integer *n, const bla_double *alpha, const bla_dcomplex *x, const bla_integer *incx, bla_dcomplex *ap); #endif blis-0.6.1/frame/compat/f2c/bla_hpr2.c000066400000000000000000001011571360743507500173520ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #ifdef BLIS_ENABLE_BLAS /* chpr2.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ /* Subroutine */ int PASTEF77(c,hpr2)(const bla_character *uplo, const bla_integer *n, const bla_scomplex *alpha, const bla_scomplex *x, const bla_integer *incx, const bla_scomplex *y, const bla_integer *incy, bla_scomplex *ap) { /* System generated locals */ bla_integer i__1, i__2, i__3, i__4, i__5, i__6; bla_real r__1; bla_scomplex q__1, q__2, q__3, q__4; /* Builtin functions */ //void bla_r_cnjg(bla_scomplex *, bla_scomplex *); /* Local variables */ bla_integer info; bla_scomplex temp1, temp2; bla_integer i__, j, k; //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kk, ix, iy, jx = 0, jy = 0, kx = 0, ky = 0; //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* CHPR2 performs the hermitian rank 2 operation */ /* A := alpha*x*conjg( y' ) + conjg( alpha )*y*conjg( x' ) + A, */ /* where alpha is a scalar, x and y are n element vectors and A is an */ /* n by n hermitian matrix, supplied in packed form. */ /* Parameters */ /* ========== */ /* UPLO - CHARACTER*1. */ /* On entry, UPLO specifies whether the upper or lower */ /* triangular part of the matrix A is supplied in the packed */ /* array AP as follows: */ /* UPLO = 'U' or 'u' The upper triangular part of A is */ /* supplied in AP. */ /* UPLO = 'L' or 'l' The lower triangular part of A is */ /* supplied in AP. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the order of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* ALPHA - COMPLEX . */ /* On entry, ALPHA specifies the scalar alpha. */ /* Unchanged on exit. */ /* X - COMPLEX array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ). */ /* Before entry, the incremented array X must contain the n */ /* element vector x. */ /* Unchanged on exit. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* Y - COMPLEX array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCY ) ). */ /* Before entry, the incremented array Y must contain the n */ /* element vector y. */ /* Unchanged on exit. */ /* INCY - INTEGER. */ /* On entry, INCY specifies the increment for the elements of */ /* Y. INCY must not be zero. */ /* Unchanged on exit. */ /* AP - COMPLEX array of DIMENSION at least */ /* ( ( n*( n + 1 ) )/2 ). */ /* Before entry with UPLO = 'U' or 'u', the array AP must */ /* contain the upper triangular part of the hermitian matrix */ /* packed sequentially, column by column, so that AP( 1 ) */ /* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */ /* and a( 2, 2 ) respectively, and so on. On exit, the array */ /* AP is overwritten by the upper triangular part of the */ /* updated matrix. */ /* Before entry with UPLO = 'L' or 'l', the array AP must */ /* contain the lower triangular part of the hermitian matrix */ /* packed sequentially, column by column, so that AP( 1 ) */ /* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */ /* and a( 3, 1 ) respectively, and so on. On exit, the array */ /* AP is overwritten by the lower triangular part of the */ /* updated matrix. */ /* Note that the imaginary parts of the diagonal elements need */ /* not be set, they are assumed to be zero, and on exit they */ /* are set to zero. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --ap; --y; --x; /* Function Body */ info = 0; if (! PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(uplo, "L", ( ftnlen)1, (ftnlen)1)) { info = 1; } else if (*n < 0) { info = 2; } else if (*incx == 0) { info = 5; } else if (*incy == 0) { info = 7; } if (info != 0) { PASTEF770(xerbla)("CHPR2 ", &info, (ftnlen)6); return 0; } /* Quick return if possible. */ if (*n == 0 || (bli_creal(*alpha) == 0.f && bli_cimag(*alpha) == 0.f)) { return 0; } /* Set up the start points in X and Y if the increments are not both */ /* unity. */ if (*incx != 1 || *incy != 1) { if (*incx > 0) { kx = 1; } else { kx = 1 - (*n - 1) * *incx; } if (*incy > 0) { ky = 1; } else { ky = 1 - (*n - 1) * *incy; } jx = kx; jy = ky; } /* Start the operations. In this version the elements of the array AP */ /* are accessed sequentially with one pass through AP. */ kk = 1; if (PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1)) { /* Form A when upper triangle is stored in AP. */ if (*incx == 1 && *incy == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j; i__3 = j; if (bli_creal(x[i__2]) != 0.f || bli_cimag(x[i__2]) != 0.f || (bli_creal(y[i__3]) != 0.f || bli_cimag(y[i__3]) != 0.f)) { bla_r_cnjg(&q__2, &y[j]); bli_csets( (bli_creal(*alpha) * bli_creal(q__2) - bli_cimag(*alpha) * bli_cimag(q__2)), (bli_creal(*alpha) * bli_cimag(q__2) + bli_cimag(*alpha) * bli_creal(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp1 ); i__2 = j; bli_csets( (bli_creal(*alpha) * bli_creal(x[i__2]) - bli_cimag(*alpha) * bli_cimag(x[i__2])), (bli_creal(*alpha) * bli_cimag(x[i__2]) + bli_cimag(*alpha) * bli_creal(x[i__2])), q__2 ); bla_r_cnjg(&q__1, &q__2); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp2 ); k = kk; i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = k; i__4 = k; i__5 = i__; bli_csets( (bli_creal(x[i__5]) * bli_creal(temp1) - bli_cimag(x[i__5]) * bli_cimag(temp1)), (bli_creal(x[i__5]) * bli_cimag(temp1) + bli_cimag(x[i__5]) * bli_creal(temp1)), q__3 ); bli_csets( (bli_creal(ap[i__4]) + bli_creal(q__3)), (bli_cimag(ap[i__4]) + bli_cimag(q__3)), q__2 ); i__6 = i__; bli_csets( (bli_creal(y[i__6]) * bli_creal(temp2) - bli_cimag(y[i__6]) * bli_cimag(temp2)), (bli_creal(y[i__6]) * bli_cimag(temp2) + bli_cimag(y[i__6]) * bli_creal(temp2)), q__4 ); bli_csets( (bli_creal(q__2) + bli_creal(q__4)), (bli_cimag(q__2) + bli_cimag(q__4)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), ap[i__3] ); ++k; /* L10: */ } i__2 = kk + j - 1; i__3 = kk + j - 1; i__4 = j; bli_csets( (bli_creal(x[i__4]) * bli_creal(temp1) - bli_cimag(x[i__4]) * bli_cimag(temp1)), (bli_creal(x[i__4]) * bli_cimag(temp1) + bli_cimag(x[i__4]) * bli_creal(temp1)), q__2 ); i__5 = j; bli_csets( (bli_creal(y[i__5]) * bli_creal(temp2) - bli_cimag(y[i__5]) * bli_cimag(temp2)), (bli_creal(y[i__5]) * bli_cimag(temp2) + bli_cimag(y[i__5]) * bli_creal(temp2)), q__3 ); bli_csets( (bli_creal(q__2) + bli_creal(q__3)), (bli_cimag(q__2) + bli_cimag(q__3)), q__1 ); r__1 = bli_creal(ap[i__3]) + bli_creal(q__1); bli_csets( (r__1), (0.f), ap[i__2] ); } else { i__2 = kk + j - 1; i__3 = kk + j - 1; r__1 = bli_creal(ap[i__3]); bli_csets( (r__1), (0.f), ap[i__2] ); } kk += j; /* L20: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = jx; i__3 = jy; if (bli_creal(x[i__2]) != 0.f || bli_cimag(x[i__2]) != 0.f || (bli_creal(y[i__3]) != 0.f || bli_cimag(y[i__3]) != 0.f)) { bla_r_cnjg(&q__2, &y[jy]); bli_csets( (bli_creal(*alpha) * bli_creal(q__2) - bli_cimag(*alpha) * bli_cimag(q__2)), (bli_creal(*alpha) * bli_cimag(q__2) + bli_cimag(*alpha) * bli_creal(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp1 ); i__2 = jx; bli_csets( (bli_creal(*alpha) * bli_creal(x[i__2]) - bli_cimag(*alpha) * bli_cimag(x[i__2])), (bli_creal(*alpha) * bli_cimag(x[i__2]) + bli_cimag(*alpha) * bli_creal(x[i__2])), q__2 ); bla_r_cnjg(&q__1, &q__2); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp2 ); ix = kx; iy = ky; i__2 = kk + j - 2; for (k = kk; k <= i__2; ++k) { i__3 = k; i__4 = k; i__5 = ix; bli_csets( (bli_creal(x[i__5]) * bli_creal(temp1) - bli_cimag(x[i__5]) * bli_cimag(temp1)), (bli_creal(x[i__5]) * bli_cimag(temp1) + bli_cimag(x[i__5]) * bli_creal(temp1)), q__3 ); bli_csets( (bli_creal(ap[i__4]) + bli_creal(q__3)), (bli_cimag(ap[i__4]) + bli_cimag(q__3)), q__2 ); i__6 = iy; bli_csets( (bli_creal(y[i__6]) * bli_creal(temp2) - bli_cimag(y[i__6]) * bli_cimag(temp2)), (bli_creal(y[i__6]) * bli_cimag(temp2) + bli_cimag(y[i__6]) * bli_creal(temp2)), q__4 ); bli_csets( (bli_creal(q__2) + bli_creal(q__4)), (bli_cimag(q__2) + bli_cimag(q__4)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), ap[i__3] ); ix += *incx; iy += *incy; /* L30: */ } i__2 = kk + j - 1; i__3 = kk + j - 1; i__4 = jx; bli_csets( (bli_creal(x[i__4]) * bli_creal(temp1) - bli_cimag(x[i__4]) * bli_cimag(temp1)), (bli_creal(x[i__4]) * bli_cimag(temp1) + bli_cimag(x[i__4]) * bli_creal(temp1)), q__2 ); i__5 = jy; bli_csets( (bli_creal(y[i__5]) * bli_creal(temp2) - bli_cimag(y[i__5]) * bli_cimag(temp2)), (bli_creal(y[i__5]) * bli_cimag(temp2) + bli_cimag(y[i__5]) * bli_creal(temp2)), q__3 ); bli_csets( (bli_creal(q__2) + bli_creal(q__3)), (bli_cimag(q__2) + bli_cimag(q__3)), q__1 ); r__1 = bli_creal(ap[i__3]) + bli_creal(q__1); bli_csets( (r__1), (0.f), ap[i__2] ); } else { i__2 = kk + j - 1; i__3 = kk + j - 1; r__1 = bli_creal(ap[i__3]); bli_csets( (r__1), (0.f), ap[i__2] ); } jx += *incx; jy += *incy; kk += j; /* L40: */ } } } else { /* Form A when lower triangle is stored in AP. */ if (*incx == 1 && *incy == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j; i__3 = j; if (bli_creal(x[i__2]) != 0.f || bli_cimag(x[i__2]) != 0.f || (bli_creal(y[i__3]) != 0.f || bli_cimag(y[i__3]) != 0.f)) { bla_r_cnjg(&q__2, &y[j]); bli_csets( (bli_creal(*alpha) * bli_creal(q__2) - bli_cimag(*alpha) * bli_cimag(q__2)), (bli_creal(*alpha) * bli_cimag(q__2) + bli_cimag(*alpha) * bli_creal(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp1 ); i__2 = j; bli_csets( (bli_creal(*alpha) * bli_creal(x[i__2]) - bli_cimag(*alpha) * bli_cimag(x[i__2])), (bli_creal(*alpha) * bli_cimag(x[i__2]) + bli_cimag(*alpha) * bli_creal(x[i__2])), q__2 ); bla_r_cnjg(&q__1, &q__2); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp2 ); i__2 = kk; i__3 = kk; i__4 = j; bli_csets( (bli_creal(x[i__4]) * bli_creal(temp1) - bli_cimag(x[i__4]) * bli_cimag(temp1)), (bli_creal(x[i__4]) * bli_cimag(temp1) + bli_cimag(x[i__4]) * bli_creal(temp1)), q__2 ); i__5 = j; bli_csets( (bli_creal(y[i__5]) * bli_creal(temp2) - bli_cimag(y[i__5]) * bli_cimag(temp2)), (bli_creal(y[i__5]) * bli_cimag(temp2) + bli_cimag(y[i__5]) * bli_creal(temp2)), q__3 ); bli_csets( (bli_creal(q__2) + bli_creal(q__3)), (bli_cimag(q__2) + bli_cimag(q__3)), q__1 ); r__1 = bli_creal(ap[i__3]) + bli_creal(q__1); bli_csets( (r__1), (0.f), ap[i__2] ); k = kk + 1; i__2 = *n; for (i__ = j + 1; i__ <= i__2; ++i__) { i__3 = k; i__4 = k; i__5 = i__; bli_csets( (bli_creal(x[i__5]) * bli_creal(temp1) - bli_cimag(x[i__5]) * bli_cimag(temp1)), (bli_creal(x[i__5]) * bli_cimag(temp1) + bli_cimag(x[i__5]) * bli_creal(temp1)), q__3 ); bli_csets( (bli_creal(ap[i__4]) + bli_creal(q__3)), (bli_cimag(ap[i__4]) + bli_cimag(q__3)), q__2 ); i__6 = i__; bli_csets( (bli_creal(y[i__6]) * bli_creal(temp2) - bli_cimag(y[i__6]) * bli_cimag(temp2)), (bli_creal(y[i__6]) * bli_cimag(temp2) + bli_cimag(y[i__6]) * bli_creal(temp2)), q__4 ); bli_csets( (bli_creal(q__2) + bli_creal(q__4)), (bli_cimag(q__2) + bli_cimag(q__4)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), ap[i__3] ); ++k; /* L50: */ } } else { i__2 = kk; i__3 = kk; r__1 = bli_creal(ap[i__3]); bli_csets( (r__1), (0.f), ap[i__2] ); } kk = kk + *n - j + 1; /* L60: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = jx; i__3 = jy; if (bli_creal(x[i__2]) != 0.f || bli_cimag(x[i__2]) != 0.f || (bli_creal(y[i__3]) != 0.f || bli_cimag(y[i__3]) != 0.f)) { bla_r_cnjg(&q__2, &y[jy]); bli_csets( (bli_creal(*alpha) * bli_creal(q__2) - bli_cimag(*alpha) * bli_cimag(q__2)), (bli_creal(*alpha) * bli_cimag(q__2) + bli_cimag(*alpha) * bli_creal(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp1 ); i__2 = jx; bli_csets( (bli_creal(*alpha) * bli_creal(x[i__2]) - bli_cimag(*alpha) * bli_cimag(x[i__2])), (bli_creal(*alpha) * bli_cimag(x[i__2]) + bli_cimag(*alpha) * bli_creal(x[i__2])), q__2 ); bla_r_cnjg(&q__1, &q__2); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp2 ); i__2 = kk; i__3 = kk; i__4 = jx; bli_csets( (bli_creal(x[i__4]) * bli_creal(temp1) - bli_cimag(x[i__4]) * bli_cimag(temp1)), (bli_creal(x[i__4]) * bli_cimag(temp1) + bli_cimag(x[i__4]) * bli_creal(temp1)), q__2 ); i__5 = jy; bli_csets( (bli_creal(y[i__5]) * bli_creal(temp2) - bli_cimag(y[i__5]) * bli_cimag(temp2)), (bli_creal(y[i__5]) * bli_cimag(temp2) + bli_cimag(y[i__5]) * bli_creal(temp2)), q__3 ); bli_csets( (bli_creal(q__2) + bli_creal(q__3)), (bli_cimag(q__2) + bli_cimag(q__3)), q__1 ); r__1 = bli_creal(ap[i__3]) + bli_creal(q__1); bli_csets( (r__1), (0.f), ap[i__2] ); ix = jx; iy = jy; i__2 = kk + *n - j; for (k = kk + 1; k <= i__2; ++k) { ix += *incx; iy += *incy; i__3 = k; i__4 = k; i__5 = ix; bli_csets( (bli_creal(x[i__5]) * bli_creal(temp1) - bli_cimag(x[i__5]) * bli_cimag(temp1)), (bli_creal(x[i__5]) * bli_cimag(temp1) + bli_cimag(x[i__5]) * bli_creal(temp1)), q__3 ); bli_csets( (bli_creal(ap[i__4]) + bli_creal(q__3)), (bli_cimag(ap[i__4]) + bli_cimag(q__3)), q__2 ); i__6 = iy; bli_csets( (bli_creal(y[i__6]) * bli_creal(temp2) - bli_cimag(y[i__6]) * bli_cimag(temp2)), (bli_creal(y[i__6]) * bli_cimag(temp2) + bli_cimag(y[i__6]) * bli_creal(temp2)), q__4 ); bli_csets( (bli_creal(q__2) + bli_creal(q__4)), (bli_cimag(q__2) + bli_cimag(q__4)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), ap[i__3] ); /* L70: */ } } else { i__2 = kk; i__3 = kk; r__1 = bli_creal(ap[i__3]); bli_csets( (r__1), (0.f), ap[i__2] ); } jx += *incx; jy += *incy; kk = kk + *n - j + 1; /* L80: */ } } } return 0; /* End of CHPR2 . */ } /* chpr2_ */ /* zhpr2.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ /* Subroutine */ int PASTEF77(z,hpr2)(const bla_character *uplo, const bla_integer *n, const bla_dcomplex *alpha, const bla_dcomplex *x, const bla_integer *incx, const bla_dcomplex *y, const bla_integer *incy, bla_dcomplex *ap) { /* System generated locals */ bla_integer i__1, i__2, i__3, i__4, i__5, i__6; bla_double d__1; bla_dcomplex z__1, z__2, z__3, z__4; /* Builtin functions */ //void bla_d_cnjg(bla_dcomplex *, bla_dcomplex *); /* Local variables */ bla_integer info; bla_dcomplex temp1, temp2; bla_integer i__, j, k; //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kk, ix, iy, jx = 0, jy = 0, kx = 0, ky = 0; //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* ZHPR2 performs the hermitian rank 2 operation */ /* A := alpha*x*conjg( y' ) + conjg( alpha )*y*conjg( x' ) + A, */ /* where alpha is a scalar, x and y are n element vectors and A is an */ /* n by n hermitian matrix, supplied in packed form. */ /* Parameters */ /* ========== */ /* UPLO - CHARACTER*1. */ /* On entry, UPLO specifies whether the upper or lower */ /* triangular part of the matrix A is supplied in the packed */ /* array AP as follows: */ /* UPLO = 'U' or 'u' The upper triangular part of A is */ /* supplied in AP. */ /* UPLO = 'L' or 'l' The lower triangular part of A is */ /* supplied in AP. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the order of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* ALPHA - COMPLEX*16 . */ /* On entry, ALPHA specifies the scalar alpha. */ /* Unchanged on exit. */ /* X - COMPLEX*16 array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ). */ /* Before entry, the incremented array X must contain the n */ /* element vector x. */ /* Unchanged on exit. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* Y - COMPLEX*16 array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCY ) ). */ /* Before entry, the incremented array Y must contain the n */ /* element vector y. */ /* Unchanged on exit. */ /* INCY - INTEGER. */ /* On entry, INCY specifies the increment for the elements of */ /* Y. INCY must not be zero. */ /* Unchanged on exit. */ /* AP - COMPLEX*16 array of DIMENSION at least */ /* ( ( n*( n + 1 ) )/2 ). */ /* Before entry with UPLO = 'U' or 'u', the array AP must */ /* contain the upper triangular part of the hermitian matrix */ /* packed sequentially, column by column, so that AP( 1 ) */ /* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */ /* and a( 2, 2 ) respectively, and so on. On exit, the array */ /* AP is overwritten by the upper triangular part of the */ /* updated matrix. */ /* Before entry with UPLO = 'L' or 'l', the array AP must */ /* contain the lower triangular part of the hermitian matrix */ /* packed sequentially, column by column, so that AP( 1 ) */ /* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */ /* and a( 3, 1 ) respectively, and so on. On exit, the array */ /* AP is overwritten by the lower triangular part of the */ /* updated matrix. */ /* Note that the imaginary parts of the diagonal elements need */ /* not be set, they are assumed to be zero, and on exit they */ /* are set to zero. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --ap; --y; --x; /* Function Body */ info = 0; if (! PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(uplo, "L", ( ftnlen)1, (ftnlen)1)) { info = 1; } else if (*n < 0) { info = 2; } else if (*incx == 0) { info = 5; } else if (*incy == 0) { info = 7; } if (info != 0) { PASTEF770(xerbla)("ZHPR2 ", &info, (ftnlen)6); return 0; } /* Quick return if possible. */ if (*n == 0 || (bli_zreal(*alpha) == 0. && bli_zimag(*alpha) == 0.)) { return 0; } /* Set up the start points in X and Y if the increments are not both */ /* unity. */ if (*incx != 1 || *incy != 1) { if (*incx > 0) { kx = 1; } else { kx = 1 - (*n - 1) * *incx; } if (*incy > 0) { ky = 1; } else { ky = 1 - (*n - 1) * *incy; } jx = kx; jy = ky; } /* Start the operations. In this version the elements of the array AP */ /* are accessed sequentially with one pass through AP. */ kk = 1; if (PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1)) { /* Form A when upper triangle is stored in AP. */ if (*incx == 1 && *incy == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j; i__3 = j; if (bli_zreal(x[i__2]) != 0. || bli_zimag(x[i__2]) != 0. || (bli_zreal(y[i__3]) != 0. || bli_zimag(y[i__3]) != 0.)) { bla_d_cnjg(&z__2, &y[j]); bli_zsets( (bli_zreal(*alpha) * bli_zreal(z__2) - bli_zimag(*alpha) * bli_zimag(z__2)), (bli_zreal(*alpha) * bli_zimag(z__2) + bli_zimag(*alpha) * bli_zreal(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp1 ); i__2 = j; bli_zsets( (bli_zreal(*alpha) * bli_zreal(x[i__2]) - bli_zimag(*alpha) * bli_zimag(x[i__2])), (bli_zreal(*alpha) * bli_zimag(x[i__2]) + bli_zimag(*alpha) * bli_zreal(x[i__2])), z__2 ); bla_d_cnjg(&z__1, &z__2); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp2 ); k = kk; i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = k; i__4 = k; i__5 = i__; bli_zsets( (bli_zreal(x[i__5]) * bli_zreal(temp1) - bli_zimag(x[i__5]) * bli_zimag(temp1)), (bli_zreal(x[i__5]) * bli_zimag(temp1) + bli_zimag(x[i__5]) * bli_zreal(temp1)), z__3 ); bli_zsets( (bli_zreal(ap[i__4]) + bli_zreal(z__3)), (bli_zimag(ap[i__4]) + bli_zimag(z__3)), z__2 ); i__6 = i__; bli_zsets( (bli_zreal(y[i__6]) * bli_zreal(temp2) - bli_zimag(y[i__6]) * bli_zimag(temp2)), (bli_zreal(y[i__6]) * bli_zimag(temp2) + bli_zimag(y[i__6]) * bli_zreal(temp2)), z__4 ); bli_zsets( (bli_zreal(z__2) + bli_zreal(z__4)), (bli_zimag(z__2) + bli_zimag(z__4)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), ap[i__3] ); ++k; /* L10: */ } i__2 = kk + j - 1; i__3 = kk + j - 1; i__4 = j; bli_zsets( (bli_zreal(x[i__4]) * bli_zreal(temp1) - bli_zimag(x[i__4]) * bli_zimag(temp1)), (bli_zreal(x[i__4]) * bli_zimag(temp1) + bli_zimag(x[i__4]) * bli_zreal(temp1)), z__2 ); i__5 = j; bli_zsets( (bli_zreal(y[i__5]) * bli_zreal(temp2) - bli_zimag(y[i__5]) * bli_zimag(temp2)), (bli_zreal(y[i__5]) * bli_zimag(temp2) + bli_zimag(y[i__5]) * bli_zreal(temp2)), z__3 ); bli_zsets( (bli_zreal(z__2) + bli_zreal(z__3)), (bli_zimag(z__2) + bli_zimag(z__3)), z__1 ); d__1 = bli_zreal(ap[i__3]) + bli_zreal(z__1); bli_zsets( (d__1), (0.), ap[i__2] ); } else { i__2 = kk + j - 1; i__3 = kk + j - 1; d__1 = bli_zreal(ap[i__3]); bli_zsets( (d__1), (0.), ap[i__2] ); } kk += j; /* L20: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = jx; i__3 = jy; if (bli_zreal(x[i__2]) != 0. || bli_zimag(x[i__2]) != 0. || (bli_zreal(y[i__3]) != 0. || bli_zimag(y[i__3]) != 0.)) { bla_d_cnjg(&z__2, &y[jy]); bli_zsets( (bli_zreal(*alpha) * bli_zreal(z__2) - bli_zimag(*alpha) * bli_zimag(z__2)), (bli_zreal(*alpha) * bli_zimag(z__2) + bli_zimag(*alpha) * bli_zreal(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp1 ); i__2 = jx; bli_zsets( (bli_zreal(*alpha) * bli_zreal(x[i__2]) - bli_zimag(*alpha) * bli_zimag(x[i__2])), (bli_zreal(*alpha) * bli_zimag(x[i__2]) + bli_zimag(*alpha) * bli_zreal(x[i__2])), z__2 ); bla_d_cnjg(&z__1, &z__2); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp2 ); ix = kx; iy = ky; i__2 = kk + j - 2; for (k = kk; k <= i__2; ++k) { i__3 = k; i__4 = k; i__5 = ix; bli_zsets( (bli_zreal(x[i__5]) * bli_zreal(temp1) - bli_zimag(x[i__5]) * bli_zimag(temp1)), (bli_zreal(x[i__5]) * bli_zimag(temp1) + bli_zimag(x[i__5]) * bli_zreal(temp1)), z__3 ); bli_zsets( (bli_zreal(ap[i__4]) + bli_zreal(z__3)), (bli_zimag(ap[i__4]) + bli_zimag(z__3)), z__2 ); i__6 = iy; bli_zsets( (bli_zreal(y[i__6]) * bli_zreal(temp2) - bli_zimag(y[i__6]) * bli_zimag(temp2)), (bli_zreal(y[i__6]) * bli_zimag(temp2) + bli_zimag(y[i__6]) * bli_zreal(temp2)), z__4 ); bli_zsets( (bli_zreal(z__2) + bli_zreal(z__4)), (bli_zimag(z__2) + bli_zimag(z__4)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), ap[i__3] ); ix += *incx; iy += *incy; /* L30: */ } i__2 = kk + j - 1; i__3 = kk + j - 1; i__4 = jx; bli_zsets( (bli_zreal(x[i__4]) * bli_zreal(temp1) - bli_zimag(x[i__4]) * bli_zimag(temp1)), (bli_zreal(x[i__4]) * bli_zimag(temp1) + bli_zimag(x[i__4]) * bli_zreal(temp1)), z__2 ); i__5 = jy; bli_zsets( (bli_zreal(y[i__5]) * bli_zreal(temp2) - bli_zimag(y[i__5]) * bli_zimag(temp2)), (bli_zreal(y[i__5]) * bli_zimag(temp2) + bli_zimag(y[i__5]) * bli_zreal(temp2)), z__3 ); bli_zsets( (bli_zreal(z__2) + bli_zreal(z__3)), (bli_zimag(z__2) + bli_zimag(z__3)), z__1 ); d__1 = bli_zreal(ap[i__3]) + bli_zreal(z__1); bli_zsets( (d__1), (0.), ap[i__2] ); } else { i__2 = kk + j - 1; i__3 = kk + j - 1; d__1 = bli_zreal(ap[i__3]); bli_zsets( (d__1), (0.), ap[i__2] ); } jx += *incx; jy += *incy; kk += j; /* L40: */ } } } else { /* Form A when lower triangle is stored in AP. */ if (*incx == 1 && *incy == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j; i__3 = j; if (bli_zreal(x[i__2]) != 0. || bli_zimag(x[i__2]) != 0. || (bli_zreal(y[i__3]) != 0. || bli_zimag(y[i__3]) != 0.)) { bla_d_cnjg(&z__2, &y[j]); bli_zsets( (bli_zreal(*alpha) * bli_zreal(z__2) - bli_zimag(*alpha) * bli_zimag(z__2)), (bli_zreal(*alpha) * bli_zimag(z__2) + bli_zimag(*alpha) * bli_zreal(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp1 ); i__2 = j; bli_zsets( (bli_zreal(*alpha) * bli_zreal(x[i__2]) - bli_zimag(*alpha) * bli_zimag(x[i__2])), (bli_zreal(*alpha) * bli_zimag(x[i__2]) + bli_zimag(*alpha) * bli_zreal(x[i__2])), z__2 ); bla_d_cnjg(&z__1, &z__2); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp2 ); i__2 = kk; i__3 = kk; i__4 = j; bli_zsets( (bli_zreal(x[i__4]) * bli_zreal(temp1) - bli_zimag(x[i__4]) * bli_zimag(temp1)), (bli_zreal(x[i__4]) * bli_zimag(temp1) + bli_zimag(x[i__4]) * bli_zreal(temp1)), z__2 ); i__5 = j; bli_zsets( (bli_zreal(y[i__5]) * bli_zreal(temp2) - bli_zimag(y[i__5]) * bli_zimag(temp2)), (bli_zreal(y[i__5]) * bli_zimag(temp2) + bli_zimag(y[i__5]) * bli_zreal(temp2)), z__3 ); bli_zsets( (bli_zreal(z__2) + bli_zreal(z__3)), (bli_zimag(z__2) + bli_zimag(z__3)), z__1 ); d__1 = bli_zreal(ap[i__3]) + bli_zreal(z__1); bli_zsets( (d__1), (0.), ap[i__2] ); k = kk + 1; i__2 = *n; for (i__ = j + 1; i__ <= i__2; ++i__) { i__3 = k; i__4 = k; i__5 = i__; bli_zsets( (bli_zreal(x[i__5]) * bli_zreal(temp1) - bli_zimag(x[i__5]) * bli_zimag(temp1)), (bli_zreal(x[i__5]) * bli_zimag(temp1) + bli_zimag(x[i__5]) * bli_zreal(temp1)), z__3 ); bli_zsets( (bli_zreal(ap[i__4]) + bli_zreal(z__3)), (bli_zimag(ap[i__4]) + bli_zimag(z__3)), z__2 ); i__6 = i__; bli_zsets( (bli_zreal(y[i__6]) * bli_zreal(temp2) - bli_zimag(y[i__6]) * bli_zimag(temp2)), (bli_zreal(y[i__6]) * bli_zimag(temp2) + bli_zimag(y[i__6]) * bli_zreal(temp2)), z__4 ); bli_zsets( (bli_zreal(z__2) + bli_zreal(z__4)), (bli_zimag(z__2) + bli_zimag(z__4)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), ap[i__3] ); ++k; /* L50: */ } } else { i__2 = kk; i__3 = kk; d__1 = bli_zreal(ap[i__3]); bli_zsets( (d__1), (0.), ap[i__2] ); } kk = kk + *n - j + 1; /* L60: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = jx; i__3 = jy; if (bli_zreal(x[i__2]) != 0. || bli_zimag(x[i__2]) != 0. || (bli_zreal(y[i__3]) != 0. || bli_zimag(y[i__3]) != 0.)) { bla_d_cnjg(&z__2, &y[jy]); bli_zsets( (bli_zreal(*alpha) * bli_zreal(z__2) - bli_zimag(*alpha) * bli_zimag(z__2)), (bli_zreal(*alpha) * bli_zimag(z__2) + bli_zimag(*alpha) * bli_zreal(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp1 ); i__2 = jx; bli_zsets( (bli_zreal(*alpha) * bli_zreal(x[i__2]) - bli_zimag(*alpha) * bli_zimag(x[i__2])), (bli_zreal(*alpha) * bli_zimag(x[i__2]) + bli_zimag(*alpha) * bli_zreal(x[i__2])), z__2 ); bla_d_cnjg(&z__1, &z__2); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp2 ); i__2 = kk; i__3 = kk; i__4 = jx; bli_zsets( (bli_zreal(x[i__4]) * bli_zreal(temp1) - bli_zimag(x[i__4]) * bli_zimag(temp1)), (bli_zreal(x[i__4]) * bli_zimag(temp1) + bli_zimag(x[i__4]) * bli_zreal(temp1)), z__2 ); i__5 = jy; bli_zsets( (bli_zreal(y[i__5]) * bli_zreal(temp2) - bli_zimag(y[i__5]) * bli_zimag(temp2)), (bli_zreal(y[i__5]) * bli_zimag(temp2) + bli_zimag(y[i__5]) * bli_zreal(temp2)), z__3 ); bli_zsets( (bli_zreal(z__2) + bli_zreal(z__3)), (bli_zimag(z__2) + bli_zimag(z__3)), z__1 ); d__1 = bli_zreal(ap[i__3]) + bli_zreal(z__1); bli_zsets( (d__1), (0.), ap[i__2] ); ix = jx; iy = jy; i__2 = kk + *n - j; for (k = kk + 1; k <= i__2; ++k) { ix += *incx; iy += *incy; i__3 = k; i__4 = k; i__5 = ix; bli_zsets( (bli_zreal(x[i__5]) * bli_zreal(temp1) - bli_zimag(x[i__5]) * bli_zimag(temp1)), (bli_zreal(x[i__5]) * bli_zimag(temp1) + bli_zimag(x[i__5]) * bli_zreal(temp1)), z__3 ); bli_zsets( (bli_zreal(ap[i__4]) + bli_zreal(z__3)), (bli_zimag(ap[i__4]) + bli_zimag(z__3)), z__2 ); i__6 = iy; bli_zsets( (bli_zreal(y[i__6]) * bli_zreal(temp2) - bli_zimag(y[i__6]) * bli_zimag(temp2)), (bli_zreal(y[i__6]) * bli_zimag(temp2) + bli_zimag(y[i__6]) * bli_zreal(temp2)), z__4 ); bli_zsets( (bli_zreal(z__2) + bli_zreal(z__4)), (bli_zimag(z__2) + bli_zimag(z__4)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), ap[i__3] ); /* L70: */ } } else { i__2 = kk; i__3 = kk; d__1 = bli_zreal(ap[i__3]); bli_zsets( (d__1), (0.), ap[i__2] ); } jx += *incx; jy += *incy; kk = kk + *n - j + 1; /* L80: */ } } } return 0; /* End of ZHPR2 . */ } /* zhpr2_ */ #endif blis-0.6.1/frame/compat/f2c/bla_hpr2.h000066400000000000000000000041641360743507500173570ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef BLIS_ENABLE_BLAS BLIS_EXPORT_BLAS int PASTEF77(c,hpr2)(const bla_character *uplo, const bla_integer *n, const bla_scomplex *alpha, const bla_scomplex *x, const bla_integer *incx, const bla_scomplex *y, const bla_integer *incy, bla_scomplex *ap); BLIS_EXPORT_BLAS int PASTEF77(z,hpr2)(const bla_character *uplo, const bla_integer *n, const bla_dcomplex *alpha, const bla_dcomplex *x, const bla_integer *incx, const bla_dcomplex *y, const bla_integer *incy, bla_dcomplex *ap); #endif blis-0.6.1/frame/compat/f2c/bla_lsame.c000066400000000000000000000107621360743507500176010ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #ifdef BLIS_ENABLE_BLAS /* lsame.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ #ifdef LAPACK_ILP64 long PASTEF770(lsame)(const char *ca, const char *cb, long ca_len, long cb_len) #else int PASTEF770(lsame)(const char *ca, const char *cb, int ca_len, int cb_len) #endif { /* System generated locals */ bla_logical ret_val; /* Local variables */ bla_integer inta, intb, zcode; /* -- LAPACK auxiliary routine (version 2.0) -- */ /* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., */ /* Courant Institute, Argonne National Lab, and Rice University */ /* January 31, 1994 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* LSAME returns .TRUE. if CA is the same letter as CB regardless of */ /* case. */ /* Arguments */ /* ========= */ /* CA (input) CHARACTER*1 */ /* CB (input) CHARACTER*1 */ /* CA and CB specify the single bla_characters to be compared. */ /* ===================================================================== */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Executable Statements .. */ /* Test if the bla_characters are equal */ ret_val = *(unsigned char *)ca == *(unsigned char *)cb; if (ret_val) { return ret_val; } /* Now test for equivalence if both bla_characters are alphabetic. */ zcode = 'Z'; /* Use 'Z' rather than 'A' so that ASCII can be detected on Prime */ /* machines, on which ICHAR returns a value with bit 8 set. */ /* ICHAR('A') on Prime machines returns 193 which is the same as */ /* ICHAR('A') on an EBCDIC machine. */ inta = *(unsigned char *)ca; intb = *(unsigned char *)cb; if (zcode == 90 || zcode == 122) { /* ASCII is assumed - ZCODE is the ASCII code of either lower or */ /* upper case 'Z'. */ if (inta >= 97 && inta <= 122) { inta += -32; } if (intb >= 97 && intb <= 122) { intb += -32; } } else if (zcode == 233 || zcode == 169) { /* EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or */ /* upper case 'Z'. */ if ((inta >= 129 && inta <= 137) || (inta >= 145 && inta <= 153) || (inta >= 162 && inta <= 169)) { inta += 64; } if ((intb >= 129 && intb <= 137) || (intb >= 145 && intb <= 153) || (intb >= 162 && intb <= 169)) { intb += 64; } } else if (zcode == 218 || zcode == 250) { /* ASCII is assumed, on Prime machines - ZCODE is the ASCII code */ /* plus 128 of either lower or upper case 'Z'. */ if (inta >= 225 && inta <= 250) { inta += -32; } if (intb >= 225 && intb <= 250) { intb += -32; } } ret_val = inta == intb; /* RETURN */ /* End of LSAME */ return ret_val; } /* lsame */ #endif blis-0.6.1/frame/compat/f2c/bla_lsame.h000066400000000000000000000035731360743507500176100ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef BLIS_ENABLE_BLAS #ifdef LAPACK_ILP64 long PASTEF770(lsame)(const char *ca, const char *cb, long ca_len, long cb_len); #else BLIS_EXPORT_BLAS int PASTEF770(lsame)(const char *ca, const char *cb, int ca_len, int cb_len); #endif #endif blis-0.6.1/frame/compat/f2c/bla_rot.c000066400000000000000000000244561360743507500173110ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #ifdef BLIS_ENABLE_BLAS /* srot.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ /* Subroutine */ int PASTEF77(s,rot)(const bla_integer *n, bla_real *sx, const bla_integer *incx, bla_real *sy, const bla_integer *incy, const bla_real *c__, const bla_real *s) { /* System generated locals */ bla_integer i__1; /* Local variables */ bla_integer i__; bla_real stemp; bla_integer ix, iy; /* applies a plane rotation. */ /* jack dongarra, linpack, 3/11/78. */ /* modified 12/3/93, array(1) declarations changed to array(*) */ /* Parameter adjustments */ --sy; --sx; /* Function Body */ if (*n <= 0) { return 0; } if (*incx == 1 && *incy == 1) { goto L20; } /* code for unequal increments or equal increments not equal */ /* to 1 */ ix = 1; iy = 1; if (*incx < 0) { ix = (-(*n) + 1) * *incx + 1; } if (*incy < 0) { iy = (-(*n) + 1) * *incy + 1; } i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { stemp = *c__ * sx[ix] + *s * sy[iy]; sy[iy] = *c__ * sy[iy] - *s * sx[ix]; sx[ix] = stemp; ix += *incx; iy += *incy; /* L10: */ } return 0; /* code for both increments equal to 1 */ L20: i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { stemp = *c__ * sx[i__] + *s * sy[i__]; sy[i__] = *c__ * sy[i__] - *s * sx[i__]; sx[i__] = stemp; /* L30: */ } return 0; } /* srot_ */ /* drot.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ /* Subroutine */ int PASTEF77(d,rot)(const bla_integer *n, bla_double *dx, const bla_integer *incx, bla_double *dy, const bla_integer *incy, const bla_double *c__, const bla_double *s) { /* System generated locals */ bla_integer i__1; /* Local variables */ bla_integer i__; bla_double dtemp; bla_integer ix, iy; /* applies a plane rotation. */ /* jack dongarra, linpack, 3/11/78. */ /* modified 12/3/93, array(1) declarations changed to array(*) */ /* Parameter adjustments */ --dy; --dx; /* Function Body */ if (*n <= 0) { return 0; } if (*incx == 1 && *incy == 1) { goto L20; } /* code for unequal increments or equal increments not equal */ /* to 1 */ ix = 1; iy = 1; if (*incx < 0) { ix = (-(*n) + 1) * *incx + 1; } if (*incy < 0) { iy = (-(*n) + 1) * *incy + 1; } i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { dtemp = *c__ * dx[ix] + *s * dy[iy]; dy[iy] = *c__ * dy[iy] - *s * dx[ix]; dx[ix] = dtemp; ix += *incx; iy += *incy; /* L10: */ } return 0; /* code for both increments equal to 1 */ L20: i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { dtemp = *c__ * dx[i__] + *s * dy[i__]; dy[i__] = *c__ * dy[i__] - *s * dx[i__]; dx[i__] = dtemp; /* L30: */ } return 0; } /* drot_ */ /* csrot.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ /* Subroutine */ int PASTEF77(cs,rot)(const bla_integer *n, bla_scomplex *cx, const bla_integer *incx, bla_scomplex *cy, const bla_integer *incy, const bla_real *c__, const bla_real *s) { /* System generated locals */ bla_integer i__1, i__2, i__3, i__4; bla_scomplex q__1, q__2, q__3; /* Local variables */ bla_integer i__; bla_scomplex ctemp; bla_integer ix, iy; /* applies a plane rotation, where the cos and sin (c and s) are bla_real */ /* and the vectors cx and cy are complex. */ /* jack dongarra, linpack, 3/11/78. */ /* Parameter adjustments */ --cy; --cx; /* Function Body */ if (*n <= 0) { return 0; } if (*incx == 1 && *incy == 1) { goto L20; } /* code for unequal increments or equal increments not equal */ /* to 1 */ ix = 1; iy = 1; if (*incx < 0) { ix = (-(*n) + 1) * *incx + 1; } if (*incy < 0) { iy = (-(*n) + 1) * *incy + 1; } i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = ix; bli_csets( (*c__ * bli_creal(cx[i__2])), (*c__ * bli_cimag(cx[i__2])), q__2 ); i__3 = iy; bli_csets( (*s * bli_creal(cy[i__3])), (*s * bli_cimag(cy[i__3])), q__3 ); bli_csets( (bli_creal(q__2) + bli_creal(q__3)), (bli_cimag(q__2) + bli_cimag(q__3)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), ctemp ); i__2 = iy; i__3 = iy; bli_csets( (*c__ * bli_creal(cy[i__3])), (*c__ * bli_cimag(cy[i__3])), q__2 ); i__4 = ix; bli_csets( (*s * bli_creal(cx[i__4])), (*s * bli_cimag(cx[i__4])), q__3 ); bli_csets( (bli_creal(q__2) - bli_creal(q__3)), (bli_cimag(q__2) - bli_cimag(q__3)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), cy[i__2] ); i__2 = ix; bli_csets( (bli_creal(ctemp)), (bli_cimag(ctemp)), cx[i__2] ); ix += *incx; iy += *incy; /* L10: */ } return 0; /* code for both increments equal to 1 */ L20: i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__; bli_csets( (*c__ * bli_creal(cx[i__2])), (*c__ * bli_cimag(cx[i__2])), q__2 ); i__3 = i__; bli_csets( (*s * bli_creal(cy[i__3])), (*s * bli_cimag(cy[i__3])), q__3 ); bli_csets( (bli_creal(q__2) + bli_creal(q__3)), (bli_cimag(q__2) + bli_cimag(q__3)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), ctemp ); i__2 = i__; i__3 = i__; bli_csets( (*c__ * bli_creal(cy[i__3])), (*c__ * bli_cimag(cy[i__3])), q__2 ); i__4 = i__; bli_csets( (*s * bli_creal(cx[i__4])), (*s * bli_cimag(cx[i__4])), q__3 ); bli_csets( (bli_creal(q__2) - bli_creal(q__3)), (bli_cimag(q__2) - bli_cimag(q__3)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), cy[i__2] ); i__2 = i__; bli_csets( (bli_creal(ctemp)), (bli_cimag(ctemp)), cx[i__2] ); /* L30: */ } return 0; } /* csrot_ */ /* zdrot.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ /* Subroutine */ int PASTEF77(zd,rot)(const bla_integer *n, bla_dcomplex *zx, const bla_integer *incx, bla_dcomplex *zy, const bla_integer *incy, const bla_double *c__, const bla_double *s) { /* System generated locals */ bla_integer i__1, i__2, i__3, i__4; bla_dcomplex z__1, z__2, z__3; /* Local variables */ bla_integer i__; bla_dcomplex ztemp; bla_integer ix, iy; /* applies a plane rotation, where the cos and sin (c and s) are */ /* double precision and the vectors zx and zy are double complex. */ /* jack dongarra, linpack, 3/11/78. */ /* Parameter adjustments */ --zy; --zx; /* Function Body */ if (*n <= 0) { return 0; } if (*incx == 1 && *incy == 1) { goto L20; } /* code for unequal increments or equal increments not equal */ /* to 1 */ ix = 1; iy = 1; if (*incx < 0) { ix = (-(*n) + 1) * *incx + 1; } if (*incy < 0) { iy = (-(*n) + 1) * *incy + 1; } i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = ix; bli_zsets( (*c__ * bli_zreal(zx[i__2])), (*c__ * bli_zimag(zx[i__2])), z__2 ); i__3 = iy; bli_zsets( (*s * bli_zreal(zy[i__3])), (*s * bli_zimag(zy[i__3])), z__3 ); bli_zsets( (bli_zreal(z__2) + bli_zreal(z__3)), (bli_zimag(z__2) + bli_zimag(z__3)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), ztemp ); i__2 = iy; i__3 = iy; bli_zsets( (*c__ * bli_zreal(zy[i__3])), (*c__ * bli_zimag(zy[i__3])), z__2 ); i__4 = ix; bli_zsets( (*s * bli_zreal(zx[i__4])), (*s * bli_zimag(zx[i__4])), z__3 ); bli_zsets( (bli_zreal(z__2) - bli_zreal(z__3)), (bli_zimag(z__2) - bli_zimag(z__3)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), zy[i__2] ); i__2 = ix; bli_zsets( (bli_zreal(ztemp)), (bli_zimag(ztemp)), zx[i__2] ); ix += *incx; iy += *incy; /* L10: */ } return 0; /* code for both increments equal to 1 */ L20: i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = i__; bli_zsets( (*c__ * bli_zreal(zx[i__2])), (*c__ * bli_zimag(zx[i__2])), z__2 ); i__3 = i__; bli_zsets( (*s * bli_zreal(zy[i__3])), (*s * bli_zimag(zy[i__3])), z__3 ); bli_zsets( (bli_zreal(z__2) + bli_zreal(z__3)), (bli_zimag(z__2) + bli_zimag(z__3)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), ztemp ); i__2 = i__; i__3 = i__; bli_zsets( (*c__ * bli_zreal(zy[i__3])), (*c__ * bli_zimag(zy[i__3])), z__2 ); i__4 = i__; bli_zsets( (*s * bli_zreal(zx[i__4])), (*s * bli_zimag(zx[i__4])), z__3 ); bli_zsets( (bli_zreal(z__2) - bli_zreal(z__3)), (bli_zimag(z__2) - bli_zimag(z__3)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), zy[i__2] ); i__2 = i__; bli_zsets( (bli_zreal(ztemp)), (bli_zimag(ztemp)), zx[i__2] ); /* L30: */ } return 0; } /* zdrot_ */ #endif blis-0.6.1/frame/compat/f2c/bla_rot.h000066400000000000000000000046201360743507500173050ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef BLIS_ENABLE_BLAS BLIS_EXPORT_BLAS int PASTEF77(s,rot)(const bla_integer *n, bla_real *sx, const bla_integer *incx, bla_real *sy, const bla_integer *incy, const bla_real *c__, const bla_real *s); BLIS_EXPORT_BLAS int PASTEF77(d,rot)(const bla_integer *n, bla_double *dx, const bla_integer *incx, bla_double *dy, const bla_integer *incy, const bla_double *c__, const bla_double *s); BLIS_EXPORT_BLAS int PASTEF77(cs,rot)(const bla_integer *n, bla_scomplex *cx, const bla_integer *incx, bla_scomplex *cy, const bla_integer *incy, const bla_real *c__, const bla_real *s); BLIS_EXPORT_BLAS int PASTEF77(zd,rot)(const bla_integer *n, bla_dcomplex *zx, const bla_integer *incx, bla_dcomplex *zy, const bla_integer *incy, const bla_double *c__, const bla_double *s); #endif blis-0.6.1/frame/compat/f2c/bla_rotg.c000066400000000000000000000175331360743507500174560ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #ifdef BLIS_ENABLE_BLAS /* srotg.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ /* Table of constant values */ static bla_real sc_b4 = 1.f; /* Subroutine */ int PASTEF77(s,rotg)(bla_real *sa, bla_real *sb, bla_real *c__, bla_real *s) { /* System generated locals */ bla_real r__1, r__2; /* Builtin functions */ //double sqrt(bla_double), bla_r_sign(bla_real *, bla_real *); /* Local variables */ bla_real r__, scale, z__, roe; /* construct givens plane rotation. */ /* jack dongarra, linpack, 3/11/78. */ roe = *sb; if (bli_fabs(*sa) > bli_fabs(*sb)) { roe = *sa; } scale = bli_fabs(*sa) + bli_fabs(*sb); if (scale != 0.f) { goto L10; } *c__ = 1.f; *s = 0.f; r__ = 0.f; z__ = 0.f; goto L20; L10: /* Computing 2nd power */ r__1 = *sa / scale; /* Computing 2nd power */ r__2 = *sb / scale; r__ = scale * sqrt(r__1 * r__1 + r__2 * r__2); r__ = bla_r_sign(&sc_b4, &roe) * r__; *c__ = *sa / r__; *s = *sb / r__; z__ = 1.f; if (bli_fabs(*sa) > bli_fabs(*sb)) { z__ = *s; } if (bli_fabs(*sb) >= bli_fabs(*sa) && *c__ != 0.f) { z__ = 1.f / *c__; } L20: *sa = r__; *sb = z__; return 0; } /* srotg_ */ /* drotg.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ /* Table of constant values */ static bla_double dc_b4 = 1.; /* Subroutine */ int PASTEF77(d,rotg)(bla_double *da, bla_double *db, bla_double *c__, bla_double *s) { /* System generated locals */ bla_double d__1, d__2; /* Builtin functions */ //double sqrt(bla_double), bla_d_sign(bla_double *, bla_double *); /* Local variables */ bla_double r__, scale, z__, roe; /* construct givens plane rotation. */ /* jack dongarra, linpack, 3/11/78. */ roe = *db; if (bli_fabs(*da) > bli_fabs(*db)) { roe = *da; } scale = bli_fabs(*da) + bli_fabs(*db); if (scale != 0.) { goto L10; } *c__ = 1.; *s = 0.; r__ = 0.; z__ = 0.; goto L20; L10: /* Computing 2nd power */ d__1 = *da / scale; /* Computing 2nd power */ d__2 = *db / scale; r__ = scale * sqrt(d__1 * d__1 + d__2 * d__2); r__ = bla_d_sign(&dc_b4, &roe) * r__; *c__ = *da / r__; *s = *db / r__; z__ = 1.; if (bli_fabs(*da) > bli_fabs(*db)) { z__ = *s; } if (bli_fabs(*db) >= bli_fabs(*da) && *c__ != 0.) { z__ = 1. / *c__; } L20: *da = r__; *db = z__; return 0; } /* drotg_ */ /* crotg.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ /* Subroutine */ int PASTEF77(c,rotg)(bla_scomplex *ca, bla_scomplex *cb, bla_real *c__, bla_scomplex *s) { /* System generated locals */ bla_real r__1, r__2; bla_scomplex q__1, q__2, q__3; /* Builtin functions */ //double bla_c_abs(bla_scomplex *), sqrt(bla_double); //void bla_r_cnjg(bla_scomplex *, bla_scomplex *); /* Local variables */ bla_real norm; bla_scomplex alpha; bla_real scale; if (bla_c_abs(ca) != 0.f) { goto L10; } *c__ = 0.f; bli_csets( 1.f, 0.f, *s ); bli_csets( bli_creal(*cb), bli_cimag(*cb), *ca ); goto L20; L10: scale = bla_c_abs(ca) + bla_c_abs(cb); bli_csets( (bli_creal(*ca) / scale), (bli_cimag(*ca) / scale), q__1 ); /* Computing 2nd power */ r__1 = bla_c_abs(&q__1); bli_csets( (bli_creal(*cb) / scale), (bli_cimag(*cb) / scale), q__2 ); /* Computing 2nd power */ r__2 = bla_c_abs(&q__2); norm = scale * sqrt(r__1 * r__1 + r__2 * r__2); r__1 = bla_c_abs(ca); bli_csets( (bli_creal(*ca) / r__1), (bli_cimag(*ca) / r__1), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), alpha ); *c__ = bla_c_abs(ca) / norm; bla_r_cnjg(&q__3, cb); bli_csets( (bli_creal(alpha) * bli_creal(q__3) - bli_cimag(alpha) * bli_cimag(q__3)), (bli_creal(alpha) * bli_cimag(q__3) + bli_cimag(alpha) * bli_creal(q__3)), q__2 ); bli_csets( (bli_creal(q__2) / norm), (bli_cimag(q__2) / norm), q__1 ); bli_csets( bli_creal(q__1), bli_cimag(q__1), *s ); bli_csets( (norm * bli_creal(alpha)), (norm * bli_cimag(alpha)), q__1 ); bli_csets( bli_creal(q__1), bli_cimag(q__1), *ca ); L20: return 0; } /* crotg_ */ /* zrotg.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ /* Subroutine */ int PASTEF77(z,rotg)(bla_dcomplex *ca, bla_dcomplex *cb, bla_double *c__, bla_dcomplex *s) { /* System generated locals */ bla_double d__1, d__2; bla_dcomplex z__1, z__2, z__3, z__4; /* Builtin functions */ //double bla_z_abs(bla_dcomplex *); //void bla_z_div(bla_dcomplex *, bla_dcomplex *, bla_dcomplex *); //double sqrt(bla_double); //void bla_d_cnjg(bla_dcomplex *, bla_dcomplex *); /* Local variables */ bla_double norm; bla_dcomplex alpha; bla_double scale; if (bla_z_abs(ca) != 0.) { goto L10; } *c__ = 0.; bli_zsets( 1., 0., *s ); bli_zsets( bli_zreal(*cb), bli_zimag(*cb), *ca ); goto L20; L10: scale = bla_z_abs(ca) + bla_z_abs(cb); bli_zsets( (scale), (0.), z__2 ); bla_z_div(&z__1, ca, &z__2); /* Computing 2nd power */ d__1 = bla_z_abs(&z__1); bli_zsets( (scale), (0.), z__4 ); bla_z_div(&z__3, cb, &z__4); /* Computing 2nd power */ d__2 = bla_z_abs(&z__3); norm = scale * sqrt(d__1 * d__1 + d__2 * d__2); d__1 = bla_z_abs(ca); bli_zsets( (bli_zreal(*ca) / d__1), (bli_zimag(*ca) / d__1), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), alpha ); *c__ = bla_z_abs(ca) / norm; bla_d_cnjg(&z__3, cb); bli_zsets( (bli_zreal(alpha) * bli_zreal(z__3) - bli_zimag(alpha) * bli_zimag(z__3)), (bli_zreal(alpha) * bli_zimag(z__3) + bli_zimag(alpha) * bli_zreal(z__3)), z__2 ); bli_zsets( (bli_zreal(z__2) / norm), (bli_zimag(z__2) / norm), z__1 ); bli_zsets( bli_zreal(z__1), bli_zimag(z__1), *s ); bli_zsets( (norm * bli_zreal(alpha)), (norm * bli_zimag(alpha)), z__1 ); bli_zsets( bli_zreal(z__1), bli_zimag(z__1), *ca ); L20: return 0; } /* zrotg_ */ #endif blis-0.6.1/frame/compat/f2c/bla_rotg.h000066400000000000000000000041101360743507500174460ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef BLIS_ENABLE_BLAS BLIS_EXPORT_BLAS int PASTEF77(s,rotg)(bla_real *sa, bla_real *sb, bla_real *c__, bla_real *s); BLIS_EXPORT_BLAS int PASTEF77(d,rotg)(bla_double *da, bla_double *db, bla_double *c__, bla_double *s); BLIS_EXPORT_BLAS int PASTEF77(c,rotg)(bla_scomplex *ca, bla_scomplex *cb, bla_real *c__, bla_scomplex *s); BLIS_EXPORT_BLAS int PASTEF77(z,rotg)(bla_dcomplex *ca, bla_dcomplex *cb, bla_double *c__, bla_dcomplex *s); #endif blis-0.6.1/frame/compat/f2c/bla_rotm.c000066400000000000000000000215401360743507500174550ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #ifdef BLIS_ENABLE_BLAS /* srotm.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ /* Subroutine */ int PASTEF77(s,rotm)(const bla_integer *n, bla_real *sx, const bla_integer *incx, bla_real *sy, const bla_integer *incy, const bla_real *sparam) { /* Initialized data */ static bla_real zero = 0.f; static bla_real two = 2.f; /* System generated locals */ bla_integer i__1, i__2; /* Local variables */ bla_integer i__; bla_real w, z__, sflag; bla_integer kx, ky, nsteps; bla_real sh11, sh12, sh21, sh22; /* APPLY THE MODIFIED GIVENS TRANSFORMATION, H, TO THE 2 BY N MATRIX */ /* (SX**T) , WHERE **T INDICATES TRANSPOSE. THE ELEMENTS OF SX ARE IN */ /* (DX**T) */ /* SX(LX+I*INCX), I = 0 TO N-1, WHERE LX = 1 IF INCX .GE. 0, ELSE */ /* LX = (-INCX)*N, AND SIMILARLY FOR SY USING USING LY AND INCY. */ /* WITH SPARAM(1)=SFLAG, H HAS ONE OF THE FOLLOWING FORMS.. */ /* SFLAG=-1.E0 SFLAG=0.E0 SFLAG=1.E0 SFLAG=-2.E0 */ /* (SH11 SH12) (1.E0 SH12) (SH11 1.E0) (1.E0 0.E0) */ /* H=( ) ( ) ( ) ( ) */ /* (SH21 SH22), (SH21 1.E0), (-1.E0 SH22), (0.E0 1.E0). */ /* SEE SROTMG FOR A DESCRIPTION OF DATA STORAGE IN SPARAM. */ /* Parameter adjustments */ --sparam; --sy; --sx; /* Function Body */ sflag = sparam[1]; if (*n <= 0 || sflag + two == zero) { goto L140; } if (! (*incx == *incy && *incx > 0)) { goto L70; } nsteps = *n * *incx; if (sflag < 0.f) { goto L50; } else if (sflag == 0) { goto L10; } else { goto L30; } L10: sh12 = sparam[4]; sh21 = sparam[3]; i__1 = nsteps; i__2 = *incx; for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { w = sx[i__]; z__ = sy[i__]; sx[i__] = w + z__ * sh12; sy[i__] = w * sh21 + z__; /* L20: */ } goto L140; L30: sh11 = sparam[2]; sh22 = sparam[5]; i__2 = nsteps; i__1 = *incx; for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { w = sx[i__]; z__ = sy[i__]; sx[i__] = w * sh11 + z__; sy[i__] = -w + sh22 * z__; /* L40: */ } goto L140; L50: sh11 = sparam[2]; sh12 = sparam[4]; sh21 = sparam[3]; sh22 = sparam[5]; i__1 = nsteps; i__2 = *incx; for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { w = sx[i__]; z__ = sy[i__]; sx[i__] = w * sh11 + z__ * sh12; sy[i__] = w * sh21 + z__ * sh22; /* L60: */ } goto L140; L70: kx = 1; ky = 1; if (*incx < 0) { kx = (1 - *n) * *incx + 1; } if (*incy < 0) { ky = (1 - *n) * *incy + 1; } if (sflag < 0.f) { goto L120; } else if (sflag == 0) { goto L80; } else { goto L100; } L80: sh12 = sparam[4]; sh21 = sparam[3]; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { w = sx[kx]; z__ = sy[ky]; sx[kx] = w + z__ * sh12; sy[ky] = w * sh21 + z__; kx += *incx; ky += *incy; /* L90: */ } goto L140; L100: sh11 = sparam[2]; sh22 = sparam[5]; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { w = sx[kx]; z__ = sy[ky]; sx[kx] = w * sh11 + z__; sy[ky] = -w + sh22 * z__; kx += *incx; ky += *incy; /* L110: */ } goto L140; L120: sh11 = sparam[2]; sh12 = sparam[4]; sh21 = sparam[3]; sh22 = sparam[5]; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { w = sx[kx]; z__ = sy[ky]; sx[kx] = w * sh11 + z__ * sh12; sy[ky] = w * sh21 + z__ * sh22; kx += *incx; ky += *incy; /* L130: */ } L140: return 0; } /* srotm_ */ /* drotm.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ /* Subroutine */ int PASTEF77(d,rotm)(const bla_integer *n, bla_double *dx, const bla_integer *incx, bla_double *dy, const bla_integer *incy, const bla_double *dparam) { /* Initialized data */ static bla_double zero = 0.; static bla_double two = 2.; /* System generated locals */ bla_integer i__1, i__2; /* Local variables */ bla_integer i__; bla_double dflag, w, z__; bla_integer kx, ky, nsteps; bla_double dh11, dh12, dh22, dh21; /* APPLY THE MODIFIED GIVENS TRANSFORMATION, H, TO THE 2 BY N MATRIX */ /* (DX**T) , WHERE **T INDICATES TRANSPOSE. THE ELEMENTS OF DX ARE IN */ /* (DY**T) */ /* DX(LX+I*INCX), I = 0 TO N-1, WHERE LX = 1 IF INCX .GE. 0, ELSE */ /* LX = (-INCX)*N, AND SIMILARLY FOR SY USING LY AND INCY. */ /* WITH DPARAM(1)=DFLAG, H HAS ONE OF THE FOLLOWING FORMS.. */ /* DFLAG=-1.D0 DFLAG=0.D0 DFLAG=1.D0 DFLAG=-2.D0 */ /* (DH11 DH12) (1.D0 DH12) (DH11 1.D0) (1.D0 0.D0) */ /* H=( ) ( ) ( ) ( ) */ /* (DH21 DH22), (DH21 1.D0), (-1.D0 DH22), (0.D0 1.D0). */ /* SEE DROTMG FOR A DESCRIPTION OF DATA STORAGE IN DPARAM. */ /* Parameter adjustments */ --dparam; --dy; --dx; /* Function Body */ dflag = dparam[1]; if (*n <= 0 || dflag + two == zero) { goto L140; } if (! (*incx == *incy && *incx > 0)) { goto L70; } nsteps = *n * *incx; if (dflag < 0.) { goto L50; } else if (dflag == 0) { goto L10; } else { goto L30; } L10: dh12 = dparam[4]; dh21 = dparam[3]; i__1 = nsteps; i__2 = *incx; for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { w = dx[i__]; z__ = dy[i__]; dx[i__] = w + z__ * dh12; dy[i__] = w * dh21 + z__; /* L20: */ } goto L140; L30: dh11 = dparam[2]; dh22 = dparam[5]; i__2 = nsteps; i__1 = *incx; for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { w = dx[i__]; z__ = dy[i__]; dx[i__] = w * dh11 + z__; dy[i__] = -w + dh22 * z__; /* L40: */ } goto L140; L50: dh11 = dparam[2]; dh12 = dparam[4]; dh21 = dparam[3]; dh22 = dparam[5]; i__1 = nsteps; i__2 = *incx; for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { w = dx[i__]; z__ = dy[i__]; dx[i__] = w * dh11 + z__ * dh12; dy[i__] = w * dh21 + z__ * dh22; /* L60: */ } goto L140; L70: kx = 1; ky = 1; if (*incx < 0) { kx = (1 - *n) * *incx + 1; } if (*incy < 0) { ky = (1 - *n) * *incy + 1; } if (dflag < 0.) { goto L120; } else if (dflag == 0) { goto L80; } else { goto L100; } L80: dh12 = dparam[4]; dh21 = dparam[3]; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { w = dx[kx]; z__ = dy[ky]; dx[kx] = w + z__ * dh12; dy[ky] = w * dh21 + z__; kx += *incx; ky += *incy; /* L90: */ } goto L140; L100: dh11 = dparam[2]; dh22 = dparam[5]; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { w = dx[kx]; z__ = dy[ky]; dx[kx] = w * dh11 + z__; dy[ky] = -w + dh22 * z__; kx += *incx; ky += *incy; /* L110: */ } goto L140; L120: dh11 = dparam[2]; dh12 = dparam[4]; dh21 = dparam[3]; dh22 = dparam[5]; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { w = dx[kx]; z__ = dy[ky]; dx[kx] = w * dh11 + z__ * dh12; dy[ky] = w * dh21 + z__ * dh22; kx += *incx; ky += *incy; /* L130: */ } L140: return 0; } /* drotm_ */ #endif blis-0.6.1/frame/compat/f2c/bla_rotm.h000066400000000000000000000037661360743507500174740ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef BLIS_ENABLE_BLAS BLIS_EXPORT_BLAS int PASTEF77(s,rotm)(const bla_integer *n, bla_real *sx, const bla_integer *incx, bla_real *sy, const bla_integer *incy, const bla_real *sparam); BLIS_EXPORT_BLAS int PASTEF77(d,rotm)(const bla_integer *n, bla_double *dx, const bla_integer *incx, bla_double *dy, const bla_integer *incy, const bla_double *dparam); #endif blis-0.6.1/frame/compat/f2c/bla_rotmg.c000066400000000000000000000264261360743507500176340ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #ifdef BLIS_ENABLE_BLAS /* srotmg.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ /* Subroutine */ int PASTEF77(s,rotmg)(bla_real *sd1, bla_real *sd2, bla_real *sx1, const bla_real *sy1, bla_real *sparam) { /* Initialized data */ static bla_real zero = 0.f; static bla_real one = 1.f; static bla_real two = 2.f; static bla_real gam = 4096.f; static bla_real gamsq = 16777200.f; static bla_real rgamsq = 5.96046e-8f; /* Format strings */ /* System generated locals */ bla_real r__1; /* Local variables */ bla_real sflag, stemp, su, sp1, sp2, sq2, sq1, sh11 = 0.f, sh21 = 0.f, sh12 = 0.f, sh22 = 0.f; bla_integer igo; /* Assigned format variables */ /* CONSTRUCT THE MODIFIED GIVENS TRANSFORMATION MATRIX H WHICH ZEROS */ /* THE SECOND COMPONENT OF THE 2-VECTOR (SQRT(SD1)*SX1,SQRT(SD2)* */ /* SY2)**T. */ /* WITH SPARAM(1)=SFLAG, H HAS ONE OF THE FOLLOWING FORMS.. */ /* SFLAG=-1.E0 SFLAG=0.E0 SFLAG=1.E0 SFLAG=-2.E0 */ /* (SH11 SH12) (1.E0 SH12) (SH11 1.E0) (1.E0 0.E0) */ /* H=( ) ( ) ( ) ( ) */ /* (SH21 SH22), (SH21 1.E0), (-1.E0 SH22), (0.E0 1.E0). */ /* LOCATIONS 2-4 OF SPARAM CONTAIN SH11,SH21,SH12, AND SH22 */ /* RESPECTIVELY. (VALUES OF 1.E0, -1.E0, OR 0.E0 IMPLIED BY THE */ /* VALUE OF SPARAM(1) ARE NOT STORED IN SPARAM.) */ /* THE VALUES OF GAMSQ AND RGAMSQ SET IN THE DATA STATEMENT MAY BE */ /* INEXACT. THIS IS OK AS THEY ARE ONLY USED FOR TESTING THE SIZE */ /* OF SD1 AND SD2. ALL ACTUAL SCALING OF DATA IS DONE USING GAM. */ /* Parameter adjustments */ --sparam; /* Function Body */ if (! (*sd1 < zero)) { goto L10; } /* GO ZERO-H-D-AND-SX1.. */ goto L60; L10: /* CASE-SD1-NONNEGATIVE */ sp2 = *sd2 * *sy1; if (! (sp2 == zero)) { goto L20; } sflag = -two; goto L260; /* REGULAR-CASE.. */ L20: sp1 = *sd1 * *sx1; sq2 = sp2 * *sy1; sq1 = sp1 * *sx1; if (! (bli_fabs(sq1) > bli_fabs(sq2))) { goto L40; } sh21 = -(*sy1) / *sx1; sh12 = sp2 / sp1; su = one - sh12 * sh21; if (! (su <= zero)) { goto L30; } /* GO ZERO-H-D-AND-SX1.. */ goto L60; L30: sflag = zero; *sd1 /= su; *sd2 /= su; *sx1 *= su; /* GO SCALE-CHECK.. */ goto L100; L40: if (! (sq2 < zero)) { goto L50; } /* GO ZERO-H-D-AND-SX1.. */ goto L60; L50: sflag = one; sh11 = sp1 / sp2; sh22 = *sx1 / *sy1; su = one + sh11 * sh22; stemp = *sd2 / su; *sd2 = *sd1 / su; *sd1 = stemp; *sx1 = *sy1 * su; /* GO SCALE-CHECK */ goto L100; /* PROCEDURE..ZERO-H-D-AND-SX1.. */ L60: sflag = -one; sh11 = zero; sh12 = zero; sh21 = zero; sh22 = zero; *sd1 = zero; *sd2 = zero; *sx1 = zero; /* RETURN.. */ goto L220; /* PROCEDURE..FIX-H.. */ L70: if (! (sflag >= zero)) { goto L90; } if (! (sflag == zero)) { goto L80; } sh11 = one; sh22 = one; sflag = -one; goto L90; L80: sh21 = -one; sh12 = one; sflag = -one; L90: switch (igo) { case 0: goto L120; case 1: goto L150; case 2: goto L180; case 3: goto L210; } /* PROCEDURE..SCALE-CHECK */ L100: L110: if (! (*sd1 <= rgamsq)) { goto L130; } if (*sd1 == zero) { goto L160; } igo = 0; /* FIX-H.. */ goto L70; L120: /* Computing 2nd power */ r__1 = gam; *sd1 *= r__1 * r__1; *sx1 /= gam; sh11 /= gam; sh12 /= gam; goto L110; L130: L140: if (! (*sd1 >= gamsq)) { goto L160; } igo = 1; /* FIX-H.. */ goto L70; L150: /* Computing 2nd power */ r__1 = gam; *sd1 /= r__1 * r__1; *sx1 *= gam; sh11 *= gam; sh12 *= gam; goto L140; L160: L170: if (! (bli_fabs(*sd2) <= rgamsq)) { goto L190; } if (*sd2 == zero) { goto L220; } igo = 2; /* FIX-H.. */ goto L70; L180: /* Computing 2nd power */ r__1 = gam; *sd2 *= r__1 * r__1; sh21 /= gam; sh22 /= gam; goto L170; L190: L200: if (! (bli_fabs(*sd2) >= gamsq)) { goto L220; } igo = 3; /* FIX-H.. */ goto L70; L210: /* Computing 2nd power */ r__1 = gam; *sd2 /= r__1 * r__1; sh21 *= gam; sh22 *= gam; goto L200; L220: if (sflag < 0.f) { goto L250; } else if (sflag == 0) { goto L230; } else { goto L240; } L230: sparam[3] = sh21; sparam[4] = sh12; goto L260; L240: sparam[2] = sh11; sparam[5] = sh22; goto L260; L250: sparam[2] = sh11; sparam[3] = sh21; sparam[4] = sh12; sparam[5] = sh22; L260: sparam[1] = sflag; return 0; } /* srotmg_ */ /* drotmg.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ /* Subroutine */ int PASTEF77(d,rotmg)(bla_double *dd1, bla_double *dd2, bla_double *dx1, const bla_double *dy1, bla_double *dparam) { /* Initialized data */ static bla_double zero = 0.; static bla_double one = 1.; static bla_double two = 2.; static bla_double gam = 4096.; static bla_double gamsq = 16777216.; static bla_double rgamsq = 5.9604645e-8; /* Format strings */ /* System generated locals */ bla_double d__1; /* Local variables */ bla_double dflag, dtemp, du, dp1, dp2, dq2, dq1, dh11 = 0.f, dh21 = 0.f, dh12 = 0.f, dh22 = 0.f; bla_integer igo; /* Assigned format variables */ /* CONSTRUCT THE MODIFIED GIVENS TRANSFORMATION MATRIX H WHICH ZEROS */ /* THE SECOND COMPONENT OF THE 2-VECTOR (DSQRT(DD1)*DX1,DSQRT(DD2)* */ /* DY2)**T. */ /* WITH DPARAM(1)=DFLAG, H HAS ONE OF THE FOLLOWING FORMS.. */ /* DFLAG=-1.D0 DFLAG=0.D0 DFLAG=1.D0 DFLAG=-2.D0 */ /* (DH11 DH12) (1.D0 DH12) (DH11 1.D0) (1.D0 0.D0) */ /* H=( ) ( ) ( ) ( ) */ /* (DH21 DH22), (DH21 1.D0), (-1.D0 DH22), (0.D0 1.D0). */ /* LOCATIONS 2-4 OF DPARAM CONTAIN DH11, DH21, DH12, AND DH22 */ /* RESPECTIVELY. (VALUES OF 1.D0, -1.D0, OR 0.D0 IMPLIED BY THE */ /* VALUE OF DPARAM(1) ARE NOT STORED IN DPARAM.) */ /* THE VALUES OF GAMSQ AND RGAMSQ SET IN THE DATA STATEMENT MAY BE */ /* INEXACT. THIS IS OK AS THEY ARE ONLY USED FOR TESTING THE SIZE */ /* OF DD1 AND DD2. ALL ACTUAL SCALING OF DATA IS DONE USING GAM. */ /* Parameter adjustments */ --dparam; /* Function Body */ if (! (*dd1 < zero)) { goto L10; } /* GO ZERO-H-D-AND-DX1.. */ goto L60; L10: /* CASE-DD1-NONNEGATIVE */ dp2 = *dd2 * *dy1; if (! (dp2 == zero)) { goto L20; } dflag = -two; goto L260; /* REGULAR-CASE.. */ L20: dp1 = *dd1 * *dx1; dq2 = dp2 * *dy1; dq1 = dp1 * *dx1; if (! (bli_fabs(dq1) > bli_fabs(dq2))) { goto L40; } dh21 = -(*dy1) / *dx1; dh12 = dp2 / dp1; du = one - dh12 * dh21; if (! (du <= zero)) { goto L30; } /* GO ZERO-H-D-AND-DX1.. */ goto L60; L30: dflag = zero; *dd1 /= du; *dd2 /= du; *dx1 *= du; /* GO SCALE-CHECK.. */ goto L100; L40: if (! (dq2 < zero)) { goto L50; } /* GO ZERO-H-D-AND-DX1.. */ goto L60; L50: dflag = one; dh11 = dp1 / dp2; dh22 = *dx1 / *dy1; du = one + dh11 * dh22; dtemp = *dd2 / du; *dd2 = *dd1 / du; *dd1 = dtemp; *dx1 = *dy1 * du; /* GO SCALE-CHECK */ goto L100; /* PROCEDURE..ZERO-H-D-AND-DX1.. */ L60: dflag = -one; dh11 = zero; dh12 = zero; dh21 = zero; dh22 = zero; *dd1 = zero; *dd2 = zero; *dx1 = zero; /* RETURN.. */ goto L220; /* PROCEDURE..FIX-H.. */ L70: if (! (dflag >= zero)) { goto L90; } if (! (dflag == zero)) { goto L80; } dh11 = one; dh22 = one; dflag = -one; goto L90; L80: dh21 = -one; dh12 = one; dflag = -one; L90: switch (igo) { case 0: goto L120; case 1: goto L150; case 2: goto L180; case 3: goto L210; } /* PROCEDURE..SCALE-CHECK */ L100: L110: if (! (*dd1 <= rgamsq)) { goto L130; } if (*dd1 == zero) { goto L160; } igo = 0; /* FIX-H.. */ goto L70; L120: /* Computing 2nd power */ d__1 = gam; *dd1 *= d__1 * d__1; *dx1 /= gam; dh11 /= gam; dh12 /= gam; goto L110; L130: L140: if (! (*dd1 >= gamsq)) { goto L160; } igo = 1; /* FIX-H.. */ goto L70; L150: /* Computing 2nd power */ d__1 = gam; *dd1 /= d__1 * d__1; *dx1 *= gam; dh11 *= gam; dh12 *= gam; goto L140; L160: L170: if (! (bli_fabs(*dd2) <= rgamsq)) { goto L190; } if (*dd2 == zero) { goto L220; } igo = 2; /* FIX-H.. */ goto L70; L180: /* Computing 2nd power */ d__1 = gam; *dd2 *= d__1 * d__1; dh21 /= gam; dh22 /= gam; goto L170; L190: L200: if (! (bli_fabs(*dd2) >= gamsq)) { goto L220; } igo = 3; /* FIX-H.. */ goto L70; L210: /* Computing 2nd power */ d__1 = gam; *dd2 /= d__1 * d__1; dh21 *= gam; dh22 *= gam; goto L200; L220: if (dflag < 0.) { goto L250; } else if (dflag == 0) { goto L230; } else { goto L240; } L230: dparam[3] = dh21; dparam[4] = dh12; goto L260; L240: dparam[2] = dh11; dparam[5] = dh22; goto L260; L250: dparam[2] = dh11; dparam[3] = dh21; dparam[4] = dh12; dparam[5] = dh22; L260: dparam[1] = dflag; return 0; } /* drotmg_ */ #endif blis-0.6.1/frame/compat/f2c/bla_rotmg.h000066400000000000000000000036541360743507500176370ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef BLIS_ENABLE_BLAS BLIS_EXPORT_BLAS int PASTEF77(s,rotmg)(bla_real *sd1, bla_real *sd2, bla_real *sx1, const bla_real *sy1, bla_real *sparam); BLIS_EXPORT_BLAS int PASTEF77(d,rotmg)(bla_double *dd1, bla_double *dd2, bla_double *dx1, const bla_double *dy1, bla_double *dparam); #endif blis-0.6.1/frame/compat/f2c/bla_sbmv.c000066400000000000000000000522451360743507500174510ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #ifdef BLIS_ENABLE_BLAS /* dsbmv.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ /* Subroutine */ int PASTEF77(d,sbmv)(const bla_character *uplo, const bla_integer *n, const bla_integer *k, const bla_double *alpha, const bla_double *a, const bla_integer *lda, const bla_double *x, const bla_integer *incx, const bla_double *beta, bla_double *y, const bla_integer *incy) { /* System generated locals */ bla_integer a_dim1, a_offset, i__1, i__2, i__3, i__4; /* Local variables */ bla_integer info; bla_double temp1, temp2; bla_integer i__, j, l; //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kplus1, ix, iy, jx, jy, kx, ky; //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSBMV performs the matrix-vector operation */ /* y := alpha*A*x + beta*y, */ /* where alpha and beta are scalars, x and y are n element vectors and */ /* A is an n by n symmetric band matrix, with k super-diagonals. */ /* Parameters */ /* ========== */ /* UPLO - CHARACTER*1. */ /* On entry, UPLO specifies whether the upper or lower */ /* triangular part of the band matrix A is being supplied as */ /* follows: */ /* UPLO = 'U' or 'u' The upper triangular part of A is */ /* being supplied. */ /* UPLO = 'L' or 'l' The lower triangular part of A is */ /* being supplied. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the order of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* K - INTEGER. */ /* On entry, K specifies the number of super-diagonals of the */ /* matrix A. K must satisfy 0 .le. K. */ /* Unchanged on exit. */ /* ALPHA - DOUBLE PRECISION. */ /* On entry, ALPHA specifies the scalar alpha. */ /* Unchanged on exit. */ /* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */ /* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */ /* by n part of the array A must contain the upper triangular */ /* band part of the symmetric matrix, supplied column by */ /* column, with the leading diagonal of the matrix in row */ /* ( k + 1 ) of the array, the first super-diagonal starting at */ /* position 2 in row k, and so on. The top left k by k triangle */ /* of the array A is not referenced. */ /* The following program segment will transfer the upper */ /* triangular part of a symmetric band matrix from conventional */ /* full matrix storage to band storage: */ /* DO 20, J = 1, N */ /* M = K + 1 - J */ /* DO 10, I = MAX( 1, J - K ), J */ /* A( M + I, J ) = matrix( I, J ) */ /* 10 CONTINUE */ /* 20 CONTINUE */ /* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */ /* by n part of the array A must contain the lower triangular */ /* band part of the symmetric matrix, supplied column by */ /* column, with the leading diagonal of the matrix in row 1 of */ /* the array, the first sub-diagonal starting at position 1 in */ /* row 2, and so on. The bottom right k by k triangle of the */ /* array A is not referenced. */ /* The following program segment will transfer the lower */ /* triangular part of a symmetric band matrix from conventional */ /* full matrix storage to band storage: */ /* DO 20, J = 1, N */ /* M = 1 - J */ /* DO 10, I = J, MIN( N, J + K ) */ /* A( M + I, J ) = matrix( I, J ) */ /* 10 CONTINUE */ /* 20 CONTINUE */ /* Unchanged on exit. */ /* LDA - INTEGER. */ /* On entry, LDA specifies the first dimension of A as declared */ /* in the calling (sub) program. LDA must be at least */ /* ( k + 1 ). */ /* Unchanged on exit. */ /* X - DOUBLE PRECISION array of DIMENSION at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ). */ /* Before entry, the incremented array X must contain the */ /* vector x. */ /* Unchanged on exit. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* BETA - DOUBLE PRECISION. */ /* On entry, BETA specifies the scalar beta. */ /* Unchanged on exit. */ /* Y - DOUBLE PRECISION array of DIMENSION at least */ /* ( 1 + ( n - 1 )*abs( INCY ) ). */ /* Before entry, the incremented array Y must contain the */ /* vector y. On exit, Y is overwritten by the updated vector y. */ /* INCY - INTEGER. */ /* On entry, INCY specifies the increment for the elements of */ /* Y. INCY must not be zero. */ /* Unchanged on exit. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; --x; --y; /* Function Body */ info = 0; if (! PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(uplo, "L", ( ftnlen)1, (ftnlen)1)) { info = 1; } else if (*n < 0) { info = 2; } else if (*k < 0) { info = 3; } else if (*lda < *k + 1) { info = 6; } else if (*incx == 0) { info = 8; } else if (*incy == 0) { info = 11; } if (info != 0) { PASTEF770(xerbla)("DSBMV ", &info, (ftnlen)6); return 0; } /* Quick return if possible. */ if (*n == 0 || (*alpha == 0. && *beta == 1.)) { return 0; } /* Set up the start points in X and Y. */ if (*incx > 0) { kx = 1; } else { kx = 1 - (*n - 1) * *incx; } if (*incy > 0) { ky = 1; } else { ky = 1 - (*n - 1) * *incy; } /* Start the operations. In this version the elements of the array A */ /* are accessed sequentially with one pass through A. */ /* First form y := beta*y. */ if (*beta != 1.) { if (*incy == 1) { if (*beta == 0.) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { y[i__] = 0.; /* L10: */ } } else { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { y[i__] = *beta * y[i__]; /* L20: */ } } } else { iy = ky; if (*beta == 0.) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { y[iy] = 0.; iy += *incy; /* L30: */ } } else { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { y[iy] = *beta * y[iy]; iy += *incy; /* L40: */ } } } } if (*alpha == 0.) { return 0; } if (PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1)) { /* Form y when upper triangle of A is stored. */ kplus1 = *k + 1; if (*incx == 1 && *incy == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { temp1 = *alpha * x[j]; temp2 = 0.; l = kplus1 - j; /* Computing MAX */ i__2 = 1, i__3 = j - *k; i__4 = j - 1; for (i__ = f2c_max(i__2,i__3); i__ <= i__4; ++i__) { y[i__] += temp1 * a[l + i__ + j * a_dim1]; temp2 += a[l + i__ + j * a_dim1] * x[i__]; /* L50: */ } y[j] = y[j] + temp1 * a[kplus1 + j * a_dim1] + *alpha * temp2; /* L60: */ } } else { jx = kx; jy = ky; i__1 = *n; for (j = 1; j <= i__1; ++j) { temp1 = *alpha * x[jx]; temp2 = 0.; ix = kx; iy = ky; l = kplus1 - j; /* Computing MAX */ i__4 = 1, i__2 = j - *k; i__3 = j - 1; for (i__ = f2c_max(i__4,i__2); i__ <= i__3; ++i__) { y[iy] += temp1 * a[l + i__ + j * a_dim1]; temp2 += a[l + i__ + j * a_dim1] * x[ix]; ix += *incx; iy += *incy; /* L70: */ } y[jy] = y[jy] + temp1 * a[kplus1 + j * a_dim1] + *alpha * temp2; jx += *incx; jy += *incy; if (j > *k) { kx += *incx; ky += *incy; } /* L80: */ } } } else { /* Form y when lower triangle of A is stored. */ if (*incx == 1 && *incy == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { temp1 = *alpha * x[j]; temp2 = 0.; y[j] += temp1 * a[j * a_dim1 + 1]; l = 1 - j; /* Computing MIN */ i__4 = *n, i__2 = j + *k; i__3 = f2c_min(i__4,i__2); for (i__ = j + 1; i__ <= i__3; ++i__) { y[i__] += temp1 * a[l + i__ + j * a_dim1]; temp2 += a[l + i__ + j * a_dim1] * x[i__]; /* L90: */ } y[j] += *alpha * temp2; /* L100: */ } } else { jx = kx; jy = ky; i__1 = *n; for (j = 1; j <= i__1; ++j) { temp1 = *alpha * x[jx]; temp2 = 0.; y[jy] += temp1 * a[j * a_dim1 + 1]; l = 1 - j; ix = jx; iy = jy; /* Computing MIN */ i__4 = *n, i__2 = j + *k; i__3 = f2c_min(i__4,i__2); for (i__ = j + 1; i__ <= i__3; ++i__) { ix += *incx; iy += *incy; y[iy] += temp1 * a[l + i__ + j * a_dim1]; temp2 += a[l + i__ + j * a_dim1] * x[ix]; /* L110: */ } y[jy] += *alpha * temp2; jx += *incx; jy += *incy; /* L120: */ } } } return 0; /* End of DSBMV . */ } /* dsbmv_ */ /* ssbmv.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ /* Subroutine */ int PASTEF77(s,sbmv)(const bla_character *uplo, const bla_integer *n, const bla_integer *k, const bla_real *alpha, const bla_real *a, const bla_integer *lda, const bla_real *x, const bla_integer *incx, const bla_real *beta, bla_real *y, const bla_integer *incy) { /* System generated locals */ bla_integer a_dim1, a_offset, i__1, i__2, i__3, i__4; /* Local variables */ bla_integer info; bla_real temp1, temp2; bla_integer i__, j, l; //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kplus1, ix, iy, jx, jy, kx, ky; //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* SSBMV performs the matrix-vector operation */ /* y := alpha*A*x + beta*y, */ /* where alpha and beta are scalars, x and y are n element vectors and */ /* A is an n by n symmetric band matrix, with k super-diagonals. */ /* Parameters */ /* ========== */ /* UPLO - CHARACTER*1. */ /* On entry, UPLO specifies whether the upper or lower */ /* triangular part of the band matrix A is being supplied as */ /* follows: */ /* UPLO = 'U' or 'u' The upper triangular part of A is */ /* being supplied. */ /* UPLO = 'L' or 'l' The lower triangular part of A is */ /* being supplied. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the order of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* K - INTEGER. */ /* On entry, K specifies the number of super-diagonals of the */ /* matrix A. K must satisfy 0 .le. K. */ /* Unchanged on exit. */ /* ALPHA - REAL . */ /* On entry, ALPHA specifies the scalar alpha. */ /* Unchanged on exit. */ /* A - REAL array of DIMENSION ( LDA, n ). */ /* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */ /* by n part of the array A must contain the upper triangular */ /* band part of the symmetric matrix, supplied column by */ /* column, with the leading diagonal of the matrix in row */ /* ( k + 1 ) of the array, the first super-diagonal starting at */ /* position 2 in row k, and so on. The top left k by k triangle */ /* of the array A is not referenced. */ /* The following program segment will transfer the upper */ /* triangular part of a symmetric band matrix from conventional */ /* full matrix storage to band storage: */ /* DO 20, J = 1, N */ /* M = K + 1 - J */ /* DO 10, I = MAX( 1, J - K ), J */ /* A( M + I, J ) = matrix( I, J ) */ /* 10 CONTINUE */ /* 20 CONTINUE */ /* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */ /* by n part of the array A must contain the lower triangular */ /* band part of the symmetric matrix, supplied column by */ /* column, with the leading diagonal of the matrix in row 1 of */ /* the array, the first sub-diagonal starting at position 1 in */ /* row 2, and so on. The bottom right k by k triangle of the */ /* array A is not referenced. */ /* The following program segment will transfer the lower */ /* triangular part of a symmetric band matrix from conventional */ /* full matrix storage to band storage: */ /* DO 20, J = 1, N */ /* M = 1 - J */ /* DO 10, I = J, MIN( N, J + K ) */ /* A( M + I, J ) = matrix( I, J ) */ /* 10 CONTINUE */ /* 20 CONTINUE */ /* Unchanged on exit. */ /* LDA - INTEGER. */ /* On entry, LDA specifies the first dimension of A as declared */ /* in the calling (sub) program. LDA must be at least */ /* ( k + 1 ). */ /* Unchanged on exit. */ /* X - REAL array of DIMENSION at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ). */ /* Before entry, the incremented array X must contain the */ /* vector x. */ /* Unchanged on exit. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* BETA - REAL . */ /* On entry, BETA specifies the scalar beta. */ /* Unchanged on exit. */ /* Y - REAL array of DIMENSION at least */ /* ( 1 + ( n - 1 )*abs( INCY ) ). */ /* Before entry, the incremented array Y must contain the */ /* vector y. On exit, Y is overwritten by the updated vector y. */ /* INCY - INTEGER. */ /* On entry, INCY specifies the increment for the elements of */ /* Y. INCY must not be zero. */ /* Unchanged on exit. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; --x; --y; /* Function Body */ info = 0; if (! PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(uplo, "L", ( ftnlen)1, (ftnlen)1)) { info = 1; } else if (*n < 0) { info = 2; } else if (*k < 0) { info = 3; } else if (*lda < *k + 1) { info = 6; } else if (*incx == 0) { info = 8; } else if (*incy == 0) { info = 11; } if (info != 0) { PASTEF770(xerbla)("SSBMV ", &info, (ftnlen)6); return 0; } /* Quick return if possible. */ if (*n == 0 || (*alpha == 0.f && *beta == 1.f)) { return 0; } /* Set up the start points in X and Y. */ if (*incx > 0) { kx = 1; } else { kx = 1 - (*n - 1) * *incx; } if (*incy > 0) { ky = 1; } else { ky = 1 - (*n - 1) * *incy; } /* Start the operations. In this version the elements of the array A */ /* are accessed sequentially with one pass through A. */ /* First form y := beta*y. */ if (*beta != 1.f) { if (*incy == 1) { if (*beta == 0.f) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { y[i__] = 0.f; /* L10: */ } } else { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { y[i__] = *beta * y[i__]; /* L20: */ } } } else { iy = ky; if (*beta == 0.f) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { y[iy] = 0.f; iy += *incy; /* L30: */ } } else { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { y[iy] = *beta * y[iy]; iy += *incy; /* L40: */ } } } } if (*alpha == 0.f) { return 0; } if (PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1)) { /* Form y when upper triangle of A is stored. */ kplus1 = *k + 1; if (*incx == 1 && *incy == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { temp1 = *alpha * x[j]; temp2 = 0.f; l = kplus1 - j; /* Computing MAX */ i__2 = 1, i__3 = j - *k; i__4 = j - 1; for (i__ = f2c_max(i__2,i__3); i__ <= i__4; ++i__) { y[i__] += temp1 * a[l + i__ + j * a_dim1]; temp2 += a[l + i__ + j * a_dim1] * x[i__]; /* L50: */ } y[j] = y[j] + temp1 * a[kplus1 + j * a_dim1] + *alpha * temp2; /* L60: */ } } else { jx = kx; jy = ky; i__1 = *n; for (j = 1; j <= i__1; ++j) { temp1 = *alpha * x[jx]; temp2 = 0.f; ix = kx; iy = ky; l = kplus1 - j; /* Computing MAX */ i__4 = 1, i__2 = j - *k; i__3 = j - 1; for (i__ = f2c_max(i__4,i__2); i__ <= i__3; ++i__) { y[iy] += temp1 * a[l + i__ + j * a_dim1]; temp2 += a[l + i__ + j * a_dim1] * x[ix]; ix += *incx; iy += *incy; /* L70: */ } y[jy] = y[jy] + temp1 * a[kplus1 + j * a_dim1] + *alpha * temp2; jx += *incx; jy += *incy; if (j > *k) { kx += *incx; ky += *incy; } /* L80: */ } } } else { /* Form y when lower triangle of A is stored. */ if (*incx == 1 && *incy == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { temp1 = *alpha * x[j]; temp2 = 0.f; y[j] += temp1 * a[j * a_dim1 + 1]; l = 1 - j; /* Computing MIN */ i__4 = *n, i__2 = j + *k; i__3 = f2c_min(i__4,i__2); for (i__ = j + 1; i__ <= i__3; ++i__) { y[i__] += temp1 * a[l + i__ + j * a_dim1]; temp2 += a[l + i__ + j * a_dim1] * x[i__]; /* L90: */ } y[j] += *alpha * temp2; /* L100: */ } } else { jx = kx; jy = ky; i__1 = *n; for (j = 1; j <= i__1; ++j) { temp1 = *alpha * x[jx]; temp2 = 0.f; y[jy] += temp1 * a[j * a_dim1 + 1]; l = 1 - j; ix = jx; iy = jy; /* Computing MIN */ i__4 = *n, i__2 = j + *k; i__3 = f2c_min(i__4,i__2); for (i__ = j + 1; i__ <= i__3; ++i__) { ix += *incx; iy += *incy; y[iy] += temp1 * a[l + i__ + j * a_dim1]; temp2 += a[l + i__ + j * a_dim1] * x[ix]; /* L110: */ } y[jy] += *alpha * temp2; jx += *incx; jy += *incy; /* L120: */ } } } return 0; /* End of SSBMV . */ } /* ssbmv_ */ #endif blis-0.6.1/frame/compat/f2c/bla_sbmv.h000066400000000000000000000043441360743507500174530ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef BLIS_ENABLE_BLAS BLIS_EXPORT_BLAS int PASTEF77(d,sbmv)(const bla_character *uplo, const bla_integer *n, const bla_integer *k, const bla_double *alpha, const bla_double *a, const bla_integer *lda, const bla_double *x, const bla_integer *incx, const bla_double *beta, bla_double *y, const bla_integer *incy); BLIS_EXPORT_BLAS int PASTEF77(s,sbmv)(const bla_character *uplo, const bla_integer *n, const bla_integer *k, const bla_real *alpha, const bla_real *a, const bla_integer *lda, const bla_real *x, const bla_integer *incx, const bla_real *beta, bla_real *y, const bla_integer *incy); #endif blis-0.6.1/frame/compat/f2c/bla_spmv.c000066400000000000000000000416511360743507500174660ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #ifdef BLIS_ENABLE_BLAS /* dspmv.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ /* Subroutine */ int PASTEF77(d,spmv)(const bla_character *uplo, const bla_integer *n, const bla_double *alpha, const bla_double *ap, const bla_double *x, const bla_integer *incx, const bla_double *beta, bla_double *y, const bla_integer *incy) { /* System generated locals */ bla_integer i__1, i__2; /* Local variables */ bla_integer info; bla_double temp1, temp2; bla_integer i__, j, k; //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kk, ix, iy, jx, jy, kx, ky; //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSPMV performs the matrix-vector operation */ /* y := alpha*A*x + beta*y, */ /* where alpha and beta are scalars, x and y are n element vectors and */ /* A is an n by n symmetric matrix, supplied in packed form. */ /* Parameters */ /* ========== */ /* UPLO - CHARACTER*1. */ /* On entry, UPLO specifies whether the upper or lower */ /* triangular part of the matrix A is supplied in the packed */ /* array AP as follows: */ /* UPLO = 'U' or 'u' The upper triangular part of A is */ /* supplied in AP. */ /* UPLO = 'L' or 'l' The lower triangular part of A is */ /* supplied in AP. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the order of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* ALPHA - DOUBLE PRECISION. */ /* On entry, ALPHA specifies the scalar alpha. */ /* Unchanged on exit. */ /* AP - DOUBLE PRECISION array of DIMENSION at least */ /* ( ( n*( n + 1 ) )/2 ). */ /* Before entry with UPLO = 'U' or 'u', the array AP must */ /* contain the upper triangular part of the symmetric matrix */ /* packed sequentially, column by column, so that AP( 1 ) */ /* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */ /* and a( 2, 2 ) respectively, and so on. */ /* Before entry with UPLO = 'L' or 'l', the array AP must */ /* contain the lower triangular part of the symmetric matrix */ /* packed sequentially, column by column, so that AP( 1 ) */ /* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */ /* and a( 3, 1 ) respectively, and so on. */ /* Unchanged on exit. */ /* X - DOUBLE PRECISION array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ). */ /* Before entry, the incremented array X must contain the n */ /* element vector x. */ /* Unchanged on exit. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* BETA - DOUBLE PRECISION. */ /* On entry, BETA specifies the scalar beta. When BETA is */ /* supplied as zero then Y need not be set on input. */ /* Unchanged on exit. */ /* Y - DOUBLE PRECISION array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCY ) ). */ /* Before entry, the incremented array Y must contain the n */ /* element vector y. On exit, Y is overwritten by the updated */ /* vector y. */ /* INCY - INTEGER. */ /* On entry, INCY specifies the increment for the elements of */ /* Y. INCY must not be zero. */ /* Unchanged on exit. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --y; --x; --ap; /* Function Body */ info = 0; if (! PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(uplo, "L", ( ftnlen)1, (ftnlen)1)) { info = 1; } else if (*n < 0) { info = 2; } else if (*incx == 0) { info = 6; } else if (*incy == 0) { info = 9; } if (info != 0) { PASTEF770(xerbla)("DSPMV ", &info, (ftnlen)6); return 0; } /* Quick return if possible. */ if (*n == 0 || (*alpha == 0. && *beta == 1.)) { return 0; } /* Set up the start points in X and Y. */ if (*incx > 0) { kx = 1; } else { kx = 1 - (*n - 1) * *incx; } if (*incy > 0) { ky = 1; } else { ky = 1 - (*n - 1) * *incy; } /* Start the operations. In this version the elements of the array AP */ /* are accessed sequentially with one pass through AP. */ /* First form y := beta*y. */ if (*beta != 1.) { if (*incy == 1) { if (*beta == 0.) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { y[i__] = 0.; /* L10: */ } } else { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { y[i__] = *beta * y[i__]; /* L20: */ } } } else { iy = ky; if (*beta == 0.) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { y[iy] = 0.; iy += *incy; /* L30: */ } } else { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { y[iy] = *beta * y[iy]; iy += *incy; /* L40: */ } } } } if (*alpha == 0.) { return 0; } kk = 1; if (PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1)) { /* Form y when AP contains the upper triangle. */ if (*incx == 1 && *incy == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { temp1 = *alpha * x[j]; temp2 = 0.; k = kk; i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { y[i__] += temp1 * ap[k]; temp2 += ap[k] * x[i__]; ++k; /* L50: */ } y[j] = y[j] + temp1 * ap[kk + j - 1] + *alpha * temp2; kk += j; /* L60: */ } } else { jx = kx; jy = ky; i__1 = *n; for (j = 1; j <= i__1; ++j) { temp1 = *alpha * x[jx]; temp2 = 0.; ix = kx; iy = ky; i__2 = kk + j - 2; for (k = kk; k <= i__2; ++k) { y[iy] += temp1 * ap[k]; temp2 += ap[k] * x[ix]; ix += *incx; iy += *incy; /* L70: */ } y[jy] = y[jy] + temp1 * ap[kk + j - 1] + *alpha * temp2; jx += *incx; jy += *incy; kk += j; /* L80: */ } } } else { /* Form y when AP contains the lower triangle. */ if (*incx == 1 && *incy == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { temp1 = *alpha * x[j]; temp2 = 0.; y[j] += temp1 * ap[kk]; k = kk + 1; i__2 = *n; for (i__ = j + 1; i__ <= i__2; ++i__) { y[i__] += temp1 * ap[k]; temp2 += ap[k] * x[i__]; ++k; /* L90: */ } y[j] += *alpha * temp2; kk += *n - j + 1; /* L100: */ } } else { jx = kx; jy = ky; i__1 = *n; for (j = 1; j <= i__1; ++j) { temp1 = *alpha * x[jx]; temp2 = 0.; y[jy] += temp1 * ap[kk]; ix = jx; iy = jy; i__2 = kk + *n - j; for (k = kk + 1; k <= i__2; ++k) { ix += *incx; iy += *incy; y[iy] += temp1 * ap[k]; temp2 += ap[k] * x[ix]; /* L110: */ } y[jy] += *alpha * temp2; jx += *incx; jy += *incy; kk += *n - j + 1; /* L120: */ } } } return 0; /* End of DSPMV . */ } /* dspmv_ */ /* sspmv.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ /* Subroutine */ int PASTEF77(s,spmv)(const bla_character *uplo, const bla_integer *n, const bla_real *alpha, const bla_real *ap, const bla_real *x, const bla_integer *incx, const bla_real *beta, bla_real *y, const bla_integer *incy) { /* System generated locals */ bla_integer i__1, i__2; /* Local variables */ bla_integer info; bla_real temp1, temp2; bla_integer i__, j, k; //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kk, ix, iy, jx, jy, kx, ky; //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* SSPMV performs the matrix-vector operation */ /* y := alpha*A*x + beta*y, */ /* where alpha and beta are scalars, x and y are n element vectors and */ /* A is an n by n symmetric matrix, supplied in packed form. */ /* Parameters */ /* ========== */ /* UPLO - CHARACTER*1. */ /* On entry, UPLO specifies whether the upper or lower */ /* triangular part of the matrix A is supplied in the packed */ /* array AP as follows: */ /* UPLO = 'U' or 'u' The upper triangular part of A is */ /* supplied in AP. */ /* UPLO = 'L' or 'l' The lower triangular part of A is */ /* supplied in AP. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the order of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* ALPHA - REAL . */ /* On entry, ALPHA specifies the scalar alpha. */ /* Unchanged on exit. */ /* AP - REAL array of DIMENSION at least */ /* ( ( n*( n + 1 ) )/2 ). */ /* Before entry with UPLO = 'U' or 'u', the array AP must */ /* contain the upper triangular part of the symmetric matrix */ /* packed sequentially, column by column, so that AP( 1 ) */ /* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */ /* and a( 2, 2 ) respectively, and so on. */ /* Before entry with UPLO = 'L' or 'l', the array AP must */ /* contain the lower triangular part of the symmetric matrix */ /* packed sequentially, column by column, so that AP( 1 ) */ /* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */ /* and a( 3, 1 ) respectively, and so on. */ /* Unchanged on exit. */ /* X - REAL array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ). */ /* Before entry, the incremented array X must contain the n */ /* element vector x. */ /* Unchanged on exit. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* BETA - REAL . */ /* On entry, BETA specifies the scalar beta. When BETA is */ /* supplied as zero then Y need not be set on input. */ /* Unchanged on exit. */ /* Y - REAL array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCY ) ). */ /* Before entry, the incremented array Y must contain the n */ /* element vector y. On exit, Y is overwritten by the updated */ /* vector y. */ /* INCY - INTEGER. */ /* On entry, INCY specifies the increment for the elements of */ /* Y. INCY must not be zero. */ /* Unchanged on exit. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --y; --x; --ap; /* Function Body */ info = 0; if (! PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(uplo, "L", ( ftnlen)1, (ftnlen)1)) { info = 1; } else if (*n < 0) { info = 2; } else if (*incx == 0) { info = 6; } else if (*incy == 0) { info = 9; } if (info != 0) { PASTEF770(xerbla)("SSPMV ", &info, (ftnlen)6); return 0; } /* Quick return if possible. */ if (*n == 0 || (*alpha == 0.f && *beta == 1.f)) { return 0; } /* Set up the start points in X and Y. */ if (*incx > 0) { kx = 1; } else { kx = 1 - (*n - 1) * *incx; } if (*incy > 0) { ky = 1; } else { ky = 1 - (*n - 1) * *incy; } /* Start the operations. In this version the elements of the array AP */ /* are accessed sequentially with one pass through AP. */ /* First form y := beta*y. */ if (*beta != 1.f) { if (*incy == 1) { if (*beta == 0.f) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { y[i__] = 0.f; /* L10: */ } } else { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { y[i__] = *beta * y[i__]; /* L20: */ } } } else { iy = ky; if (*beta == 0.f) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { y[iy] = 0.f; iy += *incy; /* L30: */ } } else { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { y[iy] = *beta * y[iy]; iy += *incy; /* L40: */ } } } } if (*alpha == 0.f) { return 0; } kk = 1; if (PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1)) { /* Form y when AP contains the upper triangle. */ if (*incx == 1 && *incy == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { temp1 = *alpha * x[j]; temp2 = 0.f; k = kk; i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { y[i__] += temp1 * ap[k]; temp2 += ap[k] * x[i__]; ++k; /* L50: */ } y[j] = y[j] + temp1 * ap[kk + j - 1] + *alpha * temp2; kk += j; /* L60: */ } } else { jx = kx; jy = ky; i__1 = *n; for (j = 1; j <= i__1; ++j) { temp1 = *alpha * x[jx]; temp2 = 0.f; ix = kx; iy = ky; i__2 = kk + j - 2; for (k = kk; k <= i__2; ++k) { y[iy] += temp1 * ap[k]; temp2 += ap[k] * x[ix]; ix += *incx; iy += *incy; /* L70: */ } y[jy] = y[jy] + temp1 * ap[kk + j - 1] + *alpha * temp2; jx += *incx; jy += *incy; kk += j; /* L80: */ } } } else { /* Form y when AP contains the lower triangle. */ if (*incx == 1 && *incy == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { temp1 = *alpha * x[j]; temp2 = 0.f; y[j] += temp1 * ap[kk]; k = kk + 1; i__2 = *n; for (i__ = j + 1; i__ <= i__2; ++i__) { y[i__] += temp1 * ap[k]; temp2 += ap[k] * x[i__]; ++k; /* L90: */ } y[j] += *alpha * temp2; kk += *n - j + 1; /* L100: */ } } else { jx = kx; jy = ky; i__1 = *n; for (j = 1; j <= i__1; ++j) { temp1 = *alpha * x[jx]; temp2 = 0.f; y[jy] += temp1 * ap[kk]; ix = jx; iy = jy; i__2 = kk + *n - j; for (k = kk + 1; k <= i__2; ++k) { ix += *incx; iy += *incy; y[iy] += temp1 * ap[k]; temp2 += ap[k] * x[ix]; /* L110: */ } y[jy] += *alpha * temp2; jx += *incx; jy += *incy; kk += *n - j + 1; /* L120: */ } } } return 0; /* End of SSPMV . */ } /* sspmv_ */ #endif blis-0.6.1/frame/compat/f2c/bla_spmv.h000066400000000000000000000042121360743507500174630ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef BLIS_ENABLE_BLAS BLIS_EXPORT_BLAS int PASTEF77(d,spmv)(const bla_character *uplo, const bla_integer *n, const bla_double *alpha, const bla_double *ap, const bla_double *x, const bla_integer *incx, const bla_double *beta, bla_double *y, const bla_integer *incy); BLIS_EXPORT_BLAS int PASTEF77(s,spmv)(const bla_character *uplo, const bla_integer *n, const bla_real *alpha, const bla_real *ap, const bla_real *x, const bla_integer *incx, const bla_real *beta, bla_real *y, const bla_integer *incy); #endif blis-0.6.1/frame/compat/f2c/bla_spr.c000066400000000000000000000331001360743507500172730ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #ifdef BLIS_ENABLE_BLAS /* dspr.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ /* Subroutine */ int PASTEF77(d,spr)(const bla_character *uplo, const bla_integer *n, const bla_double *alpha, const bla_double *x, const bla_integer *incx, bla_double *ap) { /* System generated locals */ bla_integer i__1, i__2; /* Local variables */ bla_integer info; bla_double temp; bla_integer i__, j, k; //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kk, ix, jx, kx = 0; //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSPR performs the symmetric rank 1 operation */ /* A := alpha*x*x' + A, */ /* where alpha is a bla_real scalar, x is an n element vector and A is an */ /* n by n symmetric matrix, supplied in packed form. */ /* Parameters */ /* ========== */ /* UPLO - CHARACTER*1. */ /* On entry, UPLO specifies whether the upper or lower */ /* triangular part of the matrix A is supplied in the packed */ /* array AP as follows: */ /* UPLO = 'U' or 'u' The upper triangular part of A is */ /* supplied in AP. */ /* UPLO = 'L' or 'l' The lower triangular part of A is */ /* supplied in AP. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the order of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* ALPHA - DOUBLE PRECISION. */ /* On entry, ALPHA specifies the scalar alpha. */ /* Unchanged on exit. */ /* X - DOUBLE PRECISION array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ). */ /* Before entry, the incremented array X must contain the n */ /* element vector x. */ /* Unchanged on exit. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* AP - DOUBLE PRECISION array of DIMENSION at least */ /* ( ( n*( n + 1 ) )/2 ). */ /* Before entry with UPLO = 'U' or 'u', the array AP must */ /* contain the upper triangular part of the symmetric matrix */ /* packed sequentially, column by column, so that AP( 1 ) */ /* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */ /* and a( 2, 2 ) respectively, and so on. On exit, the array */ /* AP is overwritten by the upper triangular part of the */ /* updated matrix. */ /* Before entry with UPLO = 'L' or 'l', the array AP must */ /* contain the lower triangular part of the symmetric matrix */ /* packed sequentially, column by column, so that AP( 1 ) */ /* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */ /* and a( 3, 1 ) respectively, and so on. On exit, the array */ /* AP is overwritten by the lower triangular part of the */ /* updated matrix. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --ap; --x; /* Function Body */ info = 0; if (! PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(uplo, "L", ( ftnlen)1, (ftnlen)1)) { info = 1; } else if (*n < 0) { info = 2; } else if (*incx == 0) { info = 5; } if (info != 0) { PASTEF770(xerbla)("DSPR ", &info, (ftnlen)6); return 0; } /* Quick return if possible. */ if (*n == 0 || *alpha == 0.) { return 0; } /* Set the start point in X if the increment is not unity. */ if (*incx <= 0) { kx = 1 - (*n - 1) * *incx; } else if (*incx != 1) { kx = 1; } /* Start the operations. In this version the elements of the array AP */ /* are accessed sequentially with one pass through AP. */ kk = 1; if (PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1)) { /* Form A when upper triangle is stored in AP. */ if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[j] != 0.) { temp = *alpha * x[j]; k = kk; i__2 = j; for (i__ = 1; i__ <= i__2; ++i__) { ap[k] += x[i__] * temp; ++k; /* L10: */ } } kk += j; /* L20: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[jx] != 0.) { temp = *alpha * x[jx]; ix = kx; i__2 = kk + j - 1; for (k = kk; k <= i__2; ++k) { ap[k] += x[ix] * temp; ix += *incx; /* L30: */ } } jx += *incx; kk += j; /* L40: */ } } } else { /* Form A when lower triangle is stored in AP. */ if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[j] != 0.) { temp = *alpha * x[j]; k = kk; i__2 = *n; for (i__ = j; i__ <= i__2; ++i__) { ap[k] += x[i__] * temp; ++k; /* L50: */ } } kk = kk + *n - j + 1; /* L60: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[jx] != 0.) { temp = *alpha * x[jx]; ix = jx; i__2 = kk + *n - j; for (k = kk; k <= i__2; ++k) { ap[k] += x[ix] * temp; ix += *incx; /* L70: */ } } jx += *incx; kk = kk + *n - j + 1; /* L80: */ } } } return 0; /* End of DSPR . */ } /* dspr_ */ /* sspr.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ /* Subroutine */ int PASTEF77(s,spr)(const bla_character *uplo, const bla_integer *n, const bla_real *alpha, const bla_real *x, const bla_integer *incx, bla_real *ap) { /* System generated locals */ bla_integer i__1, i__2; /* Local variables */ bla_integer info; bla_real temp; bla_integer i__, j, k; //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kk, ix, jx, kx = 0; //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* SSPR performs the symmetric rank 1 operation */ /* A := alpha*x*x' + A, */ /* where alpha is a bla_real scalar, x is an n element vector and A is an */ /* n by n symmetric matrix, supplied in packed form. */ /* Parameters */ /* ========== */ /* UPLO - CHARACTER*1. */ /* On entry, UPLO specifies whether the upper or lower */ /* triangular part of the matrix A is supplied in the packed */ /* array AP as follows: */ /* UPLO = 'U' or 'u' The upper triangular part of A is */ /* supplied in AP. */ /* UPLO = 'L' or 'l' The lower triangular part of A is */ /* supplied in AP. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the order of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* ALPHA - REAL . */ /* On entry, ALPHA specifies the scalar alpha. */ /* Unchanged on exit. */ /* X - REAL array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ). */ /* Before entry, the incremented array X must contain the n */ /* element vector x. */ /* Unchanged on exit. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* AP - REAL array of DIMENSION at least */ /* ( ( n*( n + 1 ) )/2 ). */ /* Before entry with UPLO = 'U' or 'u', the array AP must */ /* contain the upper triangular part of the symmetric matrix */ /* packed sequentially, column by column, so that AP( 1 ) */ /* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */ /* and a( 2, 2 ) respectively, and so on. On exit, the array */ /* AP is overwritten by the upper triangular part of the */ /* updated matrix. */ /* Before entry with UPLO = 'L' or 'l', the array AP must */ /* contain the lower triangular part of the symmetric matrix */ /* packed sequentially, column by column, so that AP( 1 ) */ /* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */ /* and a( 3, 1 ) respectively, and so on. On exit, the array */ /* AP is overwritten by the lower triangular part of the */ /* updated matrix. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --ap; --x; /* Function Body */ info = 0; if (! PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(uplo, "L", ( ftnlen)1, (ftnlen)1)) { info = 1; } else if (*n < 0) { info = 2; } else if (*incx == 0) { info = 5; } if (info != 0) { PASTEF770(xerbla)("SSPR ", &info, (ftnlen)6); return 0; } /* Quick return if possible. */ if (*n == 0 || *alpha == 0.f) { return 0; } /* Set the start point in X if the increment is not unity. */ if (*incx <= 0) { kx = 1 - (*n - 1) * *incx; } else if (*incx != 1) { kx = 1; } /* Start the operations. In this version the elements of the array AP */ /* are accessed sequentially with one pass through AP. */ kk = 1; if (PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1)) { /* Form A when upper triangle is stored in AP. */ if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[j] != 0.f) { temp = *alpha * x[j]; k = kk; i__2 = j; for (i__ = 1; i__ <= i__2; ++i__) { ap[k] += x[i__] * temp; ++k; /* L10: */ } } kk += j; /* L20: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[jx] != 0.f) { temp = *alpha * x[jx]; ix = kx; i__2 = kk + j - 1; for (k = kk; k <= i__2; ++k) { ap[k] += x[ix] * temp; ix += *incx; /* L30: */ } } jx += *incx; kk += j; /* L40: */ } } } else { /* Form A when lower triangle is stored in AP. */ if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[j] != 0.f) { temp = *alpha * x[j]; k = kk; i__2 = *n; for (i__ = j; i__ <= i__2; ++i__) { ap[k] += x[i__] * temp; ++k; /* L50: */ } } kk = kk + *n - j + 1; /* L60: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[jx] != 0.f) { temp = *alpha * x[jx]; ix = jx; i__2 = kk + *n - j; for (k = kk; k <= i__2; ++k) { ap[k] += x[ix] * temp; ix += *incx; /* L70: */ } } jx += *incx; kk = kk + *n - j + 1; /* L80: */ } } } return 0; /* End of SSPR . */ } /* sspr_ */ #endif blis-0.6.1/frame/compat/f2c/bla_spr.h000066400000000000000000000040001360743507500172750ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef BLIS_ENABLE_BLAS BLIS_EXPORT_BLAS int PASTEF77(d,spr)(const bla_character *uplo, const bla_integer *n, const bla_double *alpha, const bla_double *x, const bla_integer *incx, bla_double *ap); BLIS_EXPORT_BLAS int PASTEF77(s,spr)(const bla_character *uplo, const bla_integer *n, const bla_real *alpha, const bla_real *x, const bla_integer *incx, bla_real *ap); #endif blis-0.6.1/frame/compat/f2c/bla_spr2.c000066400000000000000000000372721360743507500173730ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #ifdef BLIS_ENABLE_BLAS /* dspr2.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ /* Subroutine */ int PASTEF77(d,spr2)(const bla_character *uplo, const bla_integer *n, const bla_double *alpha, const bla_double *x, const bla_integer *incx, const bla_double *y, const bla_integer *incy, bla_double *ap) { /* System generated locals */ bla_integer i__1, i__2; /* Local variables */ bla_integer info; bla_double temp1, temp2; bla_integer i__, j, k; //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kk, ix, iy, jx = 0, jy = 0, kx = 0, ky = 0; //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DSPR2 performs the symmetric rank 2 operation */ /* A := alpha*x*y' + alpha*y*x' + A, */ /* where alpha is a scalar, x and y are n element vectors and A is an */ /* n by n symmetric matrix, supplied in packed form. */ /* Parameters */ /* ========== */ /* UPLO - CHARACTER*1. */ /* On entry, UPLO specifies whether the upper or lower */ /* triangular part of the matrix A is supplied in the packed */ /* array AP as follows: */ /* UPLO = 'U' or 'u' The upper triangular part of A is */ /* supplied in AP. */ /* UPLO = 'L' or 'l' The lower triangular part of A is */ /* supplied in AP. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the order of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* ALPHA - DOUBLE PRECISION. */ /* On entry, ALPHA specifies the scalar alpha. */ /* Unchanged on exit. */ /* X - DOUBLE PRECISION array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ). */ /* Before entry, the incremented array X must contain the n */ /* element vector x. */ /* Unchanged on exit. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* Y - DOUBLE PRECISION array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCY ) ). */ /* Before entry, the incremented array Y must contain the n */ /* element vector y. */ /* Unchanged on exit. */ /* INCY - INTEGER. */ /* On entry, INCY specifies the increment for the elements of */ /* Y. INCY must not be zero. */ /* Unchanged on exit. */ /* AP - DOUBLE PRECISION array of DIMENSION at least */ /* ( ( n*( n + 1 ) )/2 ). */ /* Before entry with UPLO = 'U' or 'u', the array AP must */ /* contain the upper triangular part of the symmetric matrix */ /* packed sequentially, column by column, so that AP( 1 ) */ /* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */ /* and a( 2, 2 ) respectively, and so on. On exit, the array */ /* AP is overwritten by the upper triangular part of the */ /* updated matrix. */ /* Before entry with UPLO = 'L' or 'l', the array AP must */ /* contain the lower triangular part of the symmetric matrix */ /* packed sequentially, column by column, so that AP( 1 ) */ /* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */ /* and a( 3, 1 ) respectively, and so on. On exit, the array */ /* AP is overwritten by the lower triangular part of the */ /* updated matrix. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --ap; --y; --x; /* Function Body */ info = 0; if (! PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(uplo, "L", ( ftnlen)1, (ftnlen)1)) { info = 1; } else if (*n < 0) { info = 2; } else if (*incx == 0) { info = 5; } else if (*incy == 0) { info = 7; } if (info != 0) { PASTEF770(xerbla)("DSPR2 ", &info, (ftnlen)6); return 0; } /* Quick return if possible. */ if (*n == 0 || *alpha == 0.) { return 0; } /* Set up the start points in X and Y if the increments are not both */ /* unity. */ if (*incx != 1 || *incy != 1) { if (*incx > 0) { kx = 1; } else { kx = 1 - (*n - 1) * *incx; } if (*incy > 0) { ky = 1; } else { ky = 1 - (*n - 1) * *incy; } jx = kx; jy = ky; } /* Start the operations. In this version the elements of the array AP */ /* are accessed sequentially with one pass through AP. */ kk = 1; if (PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1)) { /* Form A when upper triangle is stored in AP. */ if (*incx == 1 && *incy == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[j] != 0. || y[j] != 0.) { temp1 = *alpha * y[j]; temp2 = *alpha * x[j]; k = kk; i__2 = j; for (i__ = 1; i__ <= i__2; ++i__) { ap[k] = ap[k] + x[i__] * temp1 + y[i__] * temp2; ++k; /* L10: */ } } kk += j; /* L20: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[jx] != 0. || y[jy] != 0.) { temp1 = *alpha * y[jy]; temp2 = *alpha * x[jx]; ix = kx; iy = ky; i__2 = kk + j - 1; for (k = kk; k <= i__2; ++k) { ap[k] = ap[k] + x[ix] * temp1 + y[iy] * temp2; ix += *incx; iy += *incy; /* L30: */ } } jx += *incx; jy += *incy; kk += j; /* L40: */ } } } else { /* Form A when lower triangle is stored in AP. */ if (*incx == 1 && *incy == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[j] != 0. || y[j] != 0.) { temp1 = *alpha * y[j]; temp2 = *alpha * x[j]; k = kk; i__2 = *n; for (i__ = j; i__ <= i__2; ++i__) { ap[k] = ap[k] + x[i__] * temp1 + y[i__] * temp2; ++k; /* L50: */ } } kk = kk + *n - j + 1; /* L60: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[jx] != 0. || y[jy] != 0.) { temp1 = *alpha * y[jy]; temp2 = *alpha * x[jx]; ix = jx; iy = jy; i__2 = kk + *n - j; for (k = kk; k <= i__2; ++k) { ap[k] = ap[k] + x[ix] * temp1 + y[iy] * temp2; ix += *incx; iy += *incy; /* L70: */ } } jx += *incx; jy += *incy; kk = kk + *n - j + 1; /* L80: */ } } } return 0; /* End of DSPR2 . */ } /* dspr2_ */ /* sspr2.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ /* Subroutine */ int PASTEF77(s,spr2)(const bla_character *uplo, const bla_integer *n, const bla_real *alpha, const bla_real *x, const bla_integer *incx, const bla_real *y, const bla_integer *incy, bla_real *ap) { /* System generated locals */ bla_integer i__1, i__2; /* Local variables */ bla_integer info; bla_real temp1, temp2; bla_integer i__, j, k; //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kk, ix, iy, jx = 0, jy = 0, kx = 0, ky = 0; //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* SSPR2 performs the symmetric rank 2 operation */ /* A := alpha*x*y' + alpha*y*x' + A, */ /* where alpha is a scalar, x and y are n element vectors and A is an */ /* n by n symmetric matrix, supplied in packed form. */ /* Parameters */ /* ========== */ /* UPLO - CHARACTER*1. */ /* On entry, UPLO specifies whether the upper or lower */ /* triangular part of the matrix A is supplied in the packed */ /* array AP as follows: */ /* UPLO = 'U' or 'u' The upper triangular part of A is */ /* supplied in AP. */ /* UPLO = 'L' or 'l' The lower triangular part of A is */ /* supplied in AP. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the order of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* ALPHA - REAL . */ /* On entry, ALPHA specifies the scalar alpha. */ /* Unchanged on exit. */ /* X - REAL array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ). */ /* Before entry, the incremented array X must contain the n */ /* element vector x. */ /* Unchanged on exit. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* Y - REAL array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCY ) ). */ /* Before entry, the incremented array Y must contain the n */ /* element vector y. */ /* Unchanged on exit. */ /* INCY - INTEGER. */ /* On entry, INCY specifies the increment for the elements of */ /* Y. INCY must not be zero. */ /* Unchanged on exit. */ /* AP - REAL array of DIMENSION at least */ /* ( ( n*( n + 1 ) )/2 ). */ /* Before entry with UPLO = 'U' or 'u', the array AP must */ /* contain the upper triangular part of the symmetric matrix */ /* packed sequentially, column by column, so that AP( 1 ) */ /* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */ /* and a( 2, 2 ) respectively, and so on. On exit, the array */ /* AP is overwritten by the upper triangular part of the */ /* updated matrix. */ /* Before entry with UPLO = 'L' or 'l', the array AP must */ /* contain the lower triangular part of the symmetric matrix */ /* packed sequentially, column by column, so that AP( 1 ) */ /* contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */ /* and a( 3, 1 ) respectively, and so on. On exit, the array */ /* AP is overwritten by the lower triangular part of the */ /* updated matrix. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --ap; --y; --x; /* Function Body */ info = 0; if (! PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(uplo, "L", ( ftnlen)1, (ftnlen)1)) { info = 1; } else if (*n < 0) { info = 2; } else if (*incx == 0) { info = 5; } else if (*incy == 0) { info = 7; } if (info != 0) { PASTEF770(xerbla)("SSPR2 ", &info, (ftnlen)6); return 0; } /* Quick return if possible. */ if (*n == 0 || *alpha == 0.f) { return 0; } /* Set up the start points in X and Y if the increments are not both */ /* unity. */ if (*incx != 1 || *incy != 1) { if (*incx > 0) { kx = 1; } else { kx = 1 - (*n - 1) * *incx; } if (*incy > 0) { ky = 1; } else { ky = 1 - (*n - 1) * *incy; } jx = kx; jy = ky; } /* Start the operations. In this version the elements of the array AP */ /* are accessed sequentially with one pass through AP. */ kk = 1; if (PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1)) { /* Form A when upper triangle is stored in AP. */ if (*incx == 1 && *incy == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[j] != 0.f || y[j] != 0.f) { temp1 = *alpha * y[j]; temp2 = *alpha * x[j]; k = kk; i__2 = j; for (i__ = 1; i__ <= i__2; ++i__) { ap[k] = ap[k] + x[i__] * temp1 + y[i__] * temp2; ++k; /* L10: */ } } kk += j; /* L20: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[jx] != 0.f || y[jy] != 0.f) { temp1 = *alpha * y[jy]; temp2 = *alpha * x[jx]; ix = kx; iy = ky; i__2 = kk + j - 1; for (k = kk; k <= i__2; ++k) { ap[k] = ap[k] + x[ix] * temp1 + y[iy] * temp2; ix += *incx; iy += *incy; /* L30: */ } } jx += *incx; jy += *incy; kk += j; /* L40: */ } } } else { /* Form A when lower triangle is stored in AP. */ if (*incx == 1 && *incy == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[j] != 0.f || y[j] != 0.f) { temp1 = *alpha * y[j]; temp2 = *alpha * x[j]; k = kk; i__2 = *n; for (i__ = j; i__ <= i__2; ++i__) { ap[k] = ap[k] + x[i__] * temp1 + y[i__] * temp2; ++k; /* L50: */ } } kk = kk + *n - j + 1; /* L60: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[jx] != 0.f || y[jy] != 0.f) { temp1 = *alpha * y[jy]; temp2 = *alpha * x[jx]; ix = jx; iy = jy; i__2 = kk + *n - j; for (k = kk; k <= i__2; ++k) { ap[k] = ap[k] + x[ix] * temp1 + y[iy] * temp2; ix += *incx; iy += *incy; /* L70: */ } } jx += *incx; jy += *incy; kk = kk + *n - j + 1; /* L80: */ } } } return 0; /* End of SSPR2 . */ } /* sspr2_ */ #endif blis-0.6.1/frame/compat/f2c/bla_spr2.h000066400000000000000000000041341360743507500173670ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef BLIS_ENABLE_BLAS BLIS_EXPORT_BLAS int PASTEF77(d,spr2)(const bla_character *uplo, const bla_integer *n, const bla_double *alpha, const bla_double *x, const bla_integer *incx, const bla_double *y, const bla_integer *incy, bla_double *ap); BLIS_EXPORT_BLAS int PASTEF77(s,spr2)(const bla_character *uplo, const bla_integer *n, const bla_real *alpha, const bla_real *x, const bla_integer *incx, const bla_real *y, const bla_integer *incy, bla_real *ap); #endif blis-0.6.1/frame/compat/f2c/bla_tbmv.c000066400000000000000000002041231360743507500174440ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #ifdef BLIS_ENABLE_BLAS /* ctbmv.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ /* Subroutine */ int PASTEF77(c,tbmv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_integer *k, const bla_scomplex *a, const bla_integer *lda, bla_scomplex *x, const bla_integer *incx) { /* System generated locals */ bla_integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; bla_scomplex q__1, q__2, q__3; /* Builtin functions */ //void bla_r_cnjg(bla_scomplex *, bla_scomplex *); /* Local variables */ bla_integer info; bla_scomplex temp; bla_integer i__, j, l; //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kplus1, ix, jx, kx = 0; //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); bla_logical noconj, nounit; /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* CTBMV performs one of the matrix-vector operations */ /* x := A*x, or x := A'*x, or x := conjg( A' )*x, */ /* where x is an n element vector and A is an n by n unit, or non-unit, */ /* upper or lower triangular band matrix, with ( k + 1 ) diagonals. */ /* Parameters */ /* ========== */ /* UPLO - CHARACTER*1. */ /* On entry, UPLO specifies whether the matrix is an upper or */ /* lower triangular matrix as follows: */ /* UPLO = 'U' or 'u' A is an upper triangular matrix. */ /* UPLO = 'L' or 'l' A is a lower triangular matrix. */ /* Unchanged on exit. */ /* TRANS - CHARACTER*1. */ /* On entry, TRANS specifies the operation to be performed as */ /* follows: */ /* TRANS = 'N' or 'n' x := A*x. */ /* TRANS = 'T' or 't' x := A'*x. */ /* TRANS = 'C' or 'c' x := conjg( A' )*x. */ /* Unchanged on exit. */ /* DIAG - CHARACTER*1. */ /* On entry, DIAG specifies whether or not A is unit */ /* triangular as follows: */ /* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ /* DIAG = 'N' or 'n' A is not assumed to be unit */ /* triangular. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the order of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* K - INTEGER. */ /* On entry with UPLO = 'U' or 'u', K specifies the number of */ /* super-diagonals of the matrix A. */ /* On entry with UPLO = 'L' or 'l', K specifies the number of */ /* sub-diagonals of the matrix A. */ /* K must satisfy 0 .le. K. */ /* Unchanged on exit. */ /* A - COMPLEX array of DIMENSION ( LDA, n ). */ /* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */ /* by n part of the array A must contain the upper triangular */ /* band part of the matrix of coefficients, supplied column by */ /* column, with the leading diagonal of the matrix in row */ /* ( k + 1 ) of the array, the first super-diagonal starting at */ /* position 2 in row k, and so on. The top left k by k triangle */ /* of the array A is not referenced. */ /* The following program segment will transfer an upper */ /* triangular band matrix from conventional full matrix storage */ /* to band storage: */ /* DO 20, J = 1, N */ /* M = K + 1 - J */ /* DO 10, I = MAX( 1, J - K ), J */ /* A( M + I, J ) = matrix( I, J ) */ /* 10 CONTINUE */ /* 20 CONTINUE */ /* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */ /* by n part of the array A must contain the lower triangular */ /* band part of the matrix of coefficients, supplied column by */ /* column, with the leading diagonal of the matrix in row 1 of */ /* the array, the first sub-diagonal starting at position 1 in */ /* row 2, and so on. The bottom right k by k triangle of the */ /* array A is not referenced. */ /* The following program segment will transfer a lower */ /* triangular band matrix from conventional full matrix storage */ /* to band storage: */ /* DO 20, J = 1, N */ /* M = 1 - J */ /* DO 10, I = J, MIN( N, J + K ) */ /* A( M + I, J ) = matrix( I, J ) */ /* 10 CONTINUE */ /* 20 CONTINUE */ /* Note that when DIAG = 'U' or 'u' the elements of the array A */ /* corresponding to the diagonal elements of the matrix are not */ /* referenced, but are assumed to be unity. */ /* Unchanged on exit. */ /* LDA - INTEGER. */ /* On entry, LDA specifies the first dimension of A as declared */ /* in the calling (sub) program. LDA must be at least */ /* ( k + 1 ). */ /* Unchanged on exit. */ /* X - COMPLEX array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ). */ /* Before entry, the incremented array X must contain the n */ /* element vector x. On exit, X is overwritten with the */ /* tranformed vector x. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; --x; /* Function Body */ info = 0; if (! PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(uplo, "L", ( ftnlen)1, (ftnlen)1)) { info = 1; } else if (! PASTEF770(lsame)(trans, "N", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(trans, "T", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(trans, "C", (ftnlen)1, ( ftnlen)1)) { info = 2; } else if (! PASTEF770(lsame)(diag, "U", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(diag, "N", (ftnlen)1, (ftnlen)1)) { info = 3; } else if (*n < 0) { info = 4; } else if (*k < 0) { info = 5; } else if (*lda < *k + 1) { info = 7; } else if (*incx == 0) { info = 9; } if (info != 0) { PASTEF770(xerbla)("CTBMV ", &info, (ftnlen)6); return 0; } /* Quick return if possible. */ if (*n == 0) { return 0; } noconj = PASTEF770(lsame)(trans, "T", (ftnlen)1, (ftnlen)1); nounit = PASTEF770(lsame)(diag, "N", (ftnlen)1, (ftnlen)1); /* Set up the start point in X if the increment is not unity. This */ /* will be ( N - 1 )*INCX too small for descending loops. */ if (*incx <= 0) { kx = 1 - (*n - 1) * *incx; } else if (*incx != 1) { kx = 1; } /* Start the operations. In this version the elements of A are */ /* accessed sequentially with one pass through A. */ if (PASTEF770(lsame)(trans, "N", (ftnlen)1, (ftnlen)1)) { /* Form x := A*x. */ if (PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1)) { kplus1 = *k + 1; if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j; if (bli_creal(x[i__2]) != 0.f || bli_cimag(x[i__2]) != 0.f) { i__2 = j; bli_csets( (bli_creal(x[i__2])), (bli_cimag(x[i__2])), temp ); l = kplus1 - j; /* Computing MAX */ i__2 = 1, i__3 = j - *k; i__4 = j - 1; for (i__ = f2c_max(i__2,i__3); i__ <= i__4; ++i__) { i__2 = i__; i__3 = i__; i__5 = l + i__ + j * a_dim1; bli_csets( (bli_creal(temp) * bli_creal(a[i__5]) - bli_cimag(temp) * bli_cimag(a[i__5])), (bli_creal(temp) * bli_cimag(a[i__5]) + bli_cimag(temp) * bli_creal(a[i__5])), q__2 ); bli_csets( (bli_creal(x[i__3]) + bli_creal(q__2)), (bli_cimag(x[i__3]) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__2] ); /* L10: */ } if (nounit) { i__4 = j; i__2 = j; i__3 = kplus1 + j * a_dim1; bli_csets( (bli_creal(x[i__2]) * bli_creal(a[i__3]) - bli_cimag(x[i__2]) * bli_cimag(a[i__3])), (bli_creal(x[i__2]) * bli_cimag(a[i__3]) + bli_cimag(x[i__2]) * bli_creal(a[i__3])), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__4] ); } } /* L20: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { i__4 = jx; if (bli_creal(x[i__4]) != 0.f || bli_cimag(x[i__4]) != 0.f) { i__4 = jx; bli_csets( (bli_creal(x[i__4])), (bli_cimag(x[i__4])), temp ); ix = kx; l = kplus1 - j; /* Computing MAX */ i__4 = 1, i__2 = j - *k; i__3 = j - 1; for (i__ = f2c_max(i__4,i__2); i__ <= i__3; ++i__) { i__4 = ix; i__2 = ix; i__5 = l + i__ + j * a_dim1; bli_csets( (bli_creal(temp) * bli_creal(a[i__5]) - bli_cimag(temp) * bli_cimag(a[i__5])), (bli_creal(temp) * bli_cimag(a[i__5]) + bli_cimag(temp) * bli_creal(a[i__5])), q__2 ); bli_csets( (bli_creal(x[i__2]) + bli_creal(q__2)), (bli_cimag(x[i__2]) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__4] ); ix += *incx; /* L30: */ } if (nounit) { i__3 = jx; i__4 = jx; i__2 = kplus1 + j * a_dim1; bli_csets( (bli_creal(x[i__4]) * bli_creal(a[i__2]) - bli_cimag(x[i__4]) * bli_cimag(a[i__2])), (bli_creal(x[i__4]) * bli_cimag(a[i__2]) + bli_cimag(x[i__4]) * bli_creal(a[i__2])), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__3] ); } } jx += *incx; if (j > *k) { kx += *incx; } /* L40: */ } } } else { if (*incx == 1) { for (j = *n; j >= 1; --j) { i__1 = j; if (bli_creal(x[i__1]) != 0.f || bli_cimag(x[i__1]) != 0.f) { i__1 = j; bli_csets( (bli_creal(x[i__1])), (bli_cimag(x[i__1])), temp ); l = 1 - j; /* Computing MIN */ i__1 = *n, i__3 = j + *k; i__4 = j + 1; for (i__ = f2c_min(i__1,i__3); i__ >= i__4; --i__) { i__1 = i__; i__3 = i__; i__2 = l + i__ + j * a_dim1; bli_csets( (bli_creal(temp) * bli_creal(a[i__2]) - bli_cimag(temp) * bli_cimag(a[i__2])), (bli_creal(temp) * bli_cimag(a[i__2]) + bli_cimag(temp) * bli_creal(a[i__2])), q__2 ); bli_csets( (bli_creal(x[i__3]) + bli_creal(q__2)), (bli_cimag(x[i__3]) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__1] ); /* L50: */ } if (nounit) { i__4 = j; i__1 = j; i__3 = j * a_dim1 + 1; bli_csets( (bli_creal(x[i__1]) * bli_creal(a[i__3]) - bli_cimag(x[i__1]) * bli_cimag(a[i__3])), (bli_creal(x[i__1]) * bli_cimag(a[i__3]) + bli_cimag(x[i__1]) * bli_creal(a[i__3])), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__4] ); } } /* L60: */ } } else { kx += (*n - 1) * *incx; jx = kx; for (j = *n; j >= 1; --j) { i__4 = jx; if (bli_creal(x[i__4]) != 0.f || bli_cimag(x[i__4]) != 0.f) { i__4 = jx; bli_csets( (bli_creal(x[i__4])), (bli_cimag(x[i__4])), temp ); ix = kx; l = 1 - j; /* Computing MIN */ i__4 = *n, i__1 = j + *k; i__3 = j + 1; for (i__ = f2c_min(i__4,i__1); i__ >= i__3; --i__) { i__4 = ix; i__1 = ix; i__2 = l + i__ + j * a_dim1; bli_csets( (bli_creal(temp) * bli_creal(a[i__2]) - bli_cimag(temp) * bli_cimag(a[i__2])), (bli_creal(temp) * bli_cimag(a[i__2]) + bli_cimag(temp) * bli_creal(a[i__2])), q__2 ); bli_csets( (bli_creal(x[i__1]) + bli_creal(q__2)), (bli_cimag(x[i__1]) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__4] ); ix -= *incx; /* L70: */ } if (nounit) { i__3 = jx; i__4 = jx; i__1 = j * a_dim1 + 1; bli_csets( (bli_creal(x[i__4]) * bli_creal(a[i__1]) - bli_cimag(x[i__4]) * bli_cimag(a[i__1])), (bli_creal(x[i__4]) * bli_cimag(a[i__1]) + bli_cimag(x[i__4]) * bli_creal(a[i__1])), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__3] ); } } jx -= *incx; if (*n - j >= *k) { kx -= *incx; } /* L80: */ } } } } else { /* Form x := A'*x or x := conjg( A' )*x. */ if (PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1)) { kplus1 = *k + 1; if (*incx == 1) { for (j = *n; j >= 1; --j) { i__3 = j; bli_csets( (bli_creal(x[i__3])), (bli_cimag(x[i__3])), temp ); l = kplus1 - j; if (noconj) { if (nounit) { i__3 = kplus1 + j * a_dim1; bli_csets( (bli_creal(temp) * bli_creal(a[i__3]) - bli_cimag(temp) * bli_cimag(a[i__3])), (bli_creal(temp) * bli_cimag(a[i__3]) + bli_cimag(temp) * bli_creal(a[i__3])), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } /* Computing MAX */ i__4 = 1, i__1 = j - *k; i__3 = f2c_max(i__4,i__1); for (i__ = j - 1; i__ >= i__3; --i__) { i__4 = l + i__ + j * a_dim1; i__1 = i__; bli_csets( (bli_creal(a[i__4]) * bli_creal(x[i__1]) - bli_cimag(a[i__4]) * bli_cimag(x[i__1])), (bli_creal(a[i__4]) * bli_cimag(x[i__1]) + bli_cimag(a[i__4]) * bli_creal(x[i__1])), q__2 ); bli_csets( (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); /* L90: */ } } else { if (nounit) { bla_r_cnjg(&q__2, &a[kplus1 + j * a_dim1]); bli_csets( (bli_creal(temp) * bli_creal(q__2) - bli_cimag(temp) * bli_cimag(q__2)), (bli_creal(temp) * bli_cimag(q__2) + bli_cimag(temp) * bli_creal(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } /* Computing MAX */ i__4 = 1, i__1 = j - *k; i__3 = f2c_max(i__4,i__1); for (i__ = j - 1; i__ >= i__3; --i__) { bla_r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); i__4 = i__; bli_csets( (bli_creal(q__3) * bli_creal(x[i__4]) - bli_cimag(q__3) * bli_cimag(x[i__4])), (bli_creal(q__3) * bli_cimag(x[i__4]) + bli_cimag(q__3) * bli_creal(x[i__4])), q__2 ); bli_csets( (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); /* L100: */ } } i__3 = j; bli_csets( (bli_creal(temp)), (bli_cimag(temp)), x[i__3] ); /* L110: */ } } else { kx += (*n - 1) * *incx; jx = kx; for (j = *n; j >= 1; --j) { i__3 = jx; bli_csets( (bli_creal(x[i__3])), (bli_cimag(x[i__3])), temp ); kx -= *incx; ix = kx; l = kplus1 - j; if (noconj) { if (nounit) { i__3 = kplus1 + j * a_dim1; bli_csets( (bli_creal(temp) * bli_creal(a[i__3]) - bli_cimag(temp) * bli_cimag(a[i__3])), (bli_creal(temp) * bli_cimag(a[i__3]) + bli_cimag(temp) * bli_creal(a[i__3])), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } /* Computing MAX */ i__4 = 1, i__1 = j - *k; i__3 = f2c_max(i__4,i__1); for (i__ = j - 1; i__ >= i__3; --i__) { i__4 = l + i__ + j * a_dim1; i__1 = ix; bli_csets( (bli_creal(a[i__4]) * bli_creal(x[i__1]) - bli_cimag(a[i__4]) * bli_cimag(x[i__1])), (bli_creal(a[i__4]) * bli_cimag(x[i__1]) + bli_cimag(a[i__4]) * bli_creal(x[i__1])), q__2 ); bli_csets( (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); ix -= *incx; /* L120: */ } } else { if (nounit) { bla_r_cnjg(&q__2, &a[kplus1 + j * a_dim1]); bli_csets( (bli_creal(temp) * bli_creal(q__2) - bli_cimag(temp) * bli_cimag(q__2)), (bli_creal(temp) * bli_cimag(q__2) + bli_cimag(temp) * bli_creal(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } /* Computing MAX */ i__4 = 1, i__1 = j - *k; i__3 = f2c_max(i__4,i__1); for (i__ = j - 1; i__ >= i__3; --i__) { bla_r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); i__4 = ix; bli_csets( (bli_creal(q__3) * bli_creal(x[i__4]) - bli_cimag(q__3) * bli_cimag(x[i__4])), (bli_creal(q__3) * bli_cimag(x[i__4]) + bli_cimag(q__3) * bli_creal(x[i__4])), q__2 ); bli_csets( (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); ix -= *incx; /* L130: */ } } i__3 = jx; bli_csets( (bli_creal(temp)), (bli_cimag(temp)), x[i__3] ); jx -= *incx; /* L140: */ } } } else { if (*incx == 1) { i__3 = *n; for (j = 1; j <= i__3; ++j) { i__4 = j; bli_csets( (bli_creal(x[i__4])), (bli_cimag(x[i__4])), temp ); l = 1 - j; if (noconj) { if (nounit) { i__4 = j * a_dim1 + 1; bli_csets( (bli_creal(temp) * bli_creal(a[i__4]) - bli_cimag(temp) * bli_cimag(a[i__4])), (bli_creal(temp) * bli_cimag(a[i__4]) + bli_cimag(temp) * bli_creal(a[i__4])), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } /* Computing MIN */ i__1 = *n, i__2 = j + *k; i__4 = f2c_min(i__1,i__2); for (i__ = j + 1; i__ <= i__4; ++i__) { i__1 = l + i__ + j * a_dim1; i__2 = i__; bli_csets( (bli_creal(a[i__1]) * bli_creal(x[i__2]) - bli_cimag(a[i__1]) * bli_cimag(x[i__2])), (bli_creal(a[i__1]) * bli_cimag(x[i__2]) + bli_cimag(a[i__1]) * bli_creal(x[i__2])), q__2 ); bli_csets( (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); /* L150: */ } } else { if (nounit) { bla_r_cnjg(&q__2, &a[j * a_dim1 + 1]); bli_csets( (bli_creal(temp) * bli_creal(q__2) - bli_cimag(temp) * bli_cimag(q__2)), (bli_creal(temp) * bli_cimag(q__2) + bli_cimag(temp) * bli_creal(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } /* Computing MIN */ i__1 = *n, i__2 = j + *k; i__4 = f2c_min(i__1,i__2); for (i__ = j + 1; i__ <= i__4; ++i__) { bla_r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); i__1 = i__; bli_csets( (bli_creal(q__3) * bli_creal(x[i__1]) - bli_cimag(q__3) * bli_cimag(x[i__1])), (bli_creal(q__3) * bli_cimag(x[i__1]) + bli_cimag(q__3) * bli_creal(x[i__1])), q__2 ); bli_csets( (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); /* L160: */ } } i__4 = j; bli_csets( (bli_creal(temp)), (bli_cimag(temp)), x[i__4] ); /* L170: */ } } else { jx = kx; i__3 = *n; for (j = 1; j <= i__3; ++j) { i__4 = jx; bli_csets( (bli_creal(x[i__4])), (bli_cimag(x[i__4])), temp ); kx += *incx; ix = kx; l = 1 - j; if (noconj) { if (nounit) { i__4 = j * a_dim1 + 1; bli_csets( (bli_creal(temp) * bli_creal(a[i__4]) - bli_cimag(temp) * bli_cimag(a[i__4])), (bli_creal(temp) * bli_cimag(a[i__4]) + bli_cimag(temp) * bli_creal(a[i__4])), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } /* Computing MIN */ i__1 = *n, i__2 = j + *k; i__4 = f2c_min(i__1,i__2); for (i__ = j + 1; i__ <= i__4; ++i__) { i__1 = l + i__ + j * a_dim1; i__2 = ix; bli_csets( (bli_creal(a[i__1]) * bli_creal(x[i__2]) - bli_cimag(a[i__1]) * bli_cimag(x[i__2])), (bli_creal(a[i__1]) * bli_cimag(x[i__2]) + bli_cimag(a[i__1]) * bli_creal(x[i__2])), q__2 ); bli_csets( (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); ix += *incx; /* L180: */ } } else { if (nounit) { bla_r_cnjg(&q__2, &a[j * a_dim1 + 1]); bli_csets( (bli_creal(temp) * bli_creal(q__2) - bli_cimag(temp) * bli_cimag(q__2)), (bli_creal(temp) * bli_cimag(q__2) + bli_cimag(temp) * bli_creal(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } /* Computing MIN */ i__1 = *n, i__2 = j + *k; i__4 = f2c_min(i__1,i__2); for (i__ = j + 1; i__ <= i__4; ++i__) { bla_r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); i__1 = ix; bli_csets( (bli_creal(q__3) * bli_creal(x[i__1]) - bli_cimag(q__3) * bli_cimag(x[i__1])), (bli_creal(q__3) * bli_cimag(x[i__1]) + bli_cimag(q__3) * bli_creal(x[i__1])), q__2 ); bli_csets( (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); ix += *incx; /* L190: */ } } i__4 = jx; bli_csets( (bli_creal(temp)), (bli_cimag(temp)), x[i__4] ); jx += *incx; /* L200: */ } } } } return 0; /* End of CTBMV . */ } /* ctbmv_ */ /* dtbmv.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ /* Subroutine */ int PASTEF77(d,tbmv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_integer *k, const bla_double *a, const bla_integer *lda, bla_double *x, const bla_integer *incx) { /* System generated locals */ bla_integer a_dim1, a_offset, i__1, i__2, i__3, i__4; /* Local variables */ bla_integer info; bla_double temp; bla_integer i__, j, l; //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kplus1, ix, jx, kx = 0; //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); bla_logical nounit; /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DTBMV performs one of the matrix-vector operations */ /* x := A*x, or x := A'*x, */ /* where x is an n element vector and A is an n by n unit, or non-unit, */ /* upper or lower triangular band matrix, with ( k + 1 ) diagonals. */ /* Parameters */ /* ========== */ /* UPLO - CHARACTER*1. */ /* On entry, UPLO specifies whether the matrix is an upper or */ /* lower triangular matrix as follows: */ /* UPLO = 'U' or 'u' A is an upper triangular matrix. */ /* UPLO = 'L' or 'l' A is a lower triangular matrix. */ /* Unchanged on exit. */ /* TRANS - CHARACTER*1. */ /* On entry, TRANS specifies the operation to be performed as */ /* follows: */ /* TRANS = 'N' or 'n' x := A*x. */ /* TRANS = 'T' or 't' x := A'*x. */ /* TRANS = 'C' or 'c' x := A'*x. */ /* Unchanged on exit. */ /* DIAG - CHARACTER*1. */ /* On entry, DIAG specifies whether or not A is unit */ /* triangular as follows: */ /* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ /* DIAG = 'N' or 'n' A is not assumed to be unit */ /* triangular. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the order of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* K - INTEGER. */ /* On entry with UPLO = 'U' or 'u', K specifies the number of */ /* super-diagonals of the matrix A. */ /* On entry with UPLO = 'L' or 'l', K specifies the number of */ /* sub-diagonals of the matrix A. */ /* K must satisfy 0 .le. K. */ /* Unchanged on exit. */ /* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */ /* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */ /* by n part of the array A must contain the upper triangular */ /* band part of the matrix of coefficients, supplied column by */ /* column, with the leading diagonal of the matrix in row */ /* ( k + 1 ) of the array, the first super-diagonal starting at */ /* position 2 in row k, and so on. The top left k by k triangle */ /* of the array A is not referenced. */ /* The following program segment will transfer an upper */ /* triangular band matrix from conventional full matrix storage */ /* to band storage: */ /* DO 20, J = 1, N */ /* M = K + 1 - J */ /* DO 10, I = MAX( 1, J - K ), J */ /* A( M + I, J ) = matrix( I, J ) */ /* 10 CONTINUE */ /* 20 CONTINUE */ /* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */ /* by n part of the array A must contain the lower triangular */ /* band part of the matrix of coefficients, supplied column by */ /* column, with the leading diagonal of the matrix in row 1 of */ /* the array, the first sub-diagonal starting at position 1 in */ /* row 2, and so on. The bottom right k by k triangle of the */ /* array A is not referenced. */ /* The following program segment will transfer a lower */ /* triangular band matrix from conventional full matrix storage */ /* to band storage: */ /* DO 20, J = 1, N */ /* M = 1 - J */ /* DO 10, I = J, MIN( N, J + K ) */ /* A( M + I, J ) = matrix( I, J ) */ /* 10 CONTINUE */ /* 20 CONTINUE */ /* Note that when DIAG = 'U' or 'u' the elements of the array A */ /* corresponding to the diagonal elements of the matrix are not */ /* referenced, but are assumed to be unity. */ /* Unchanged on exit. */ /* LDA - INTEGER. */ /* On entry, LDA specifies the first dimension of A as declared */ /* in the calling (sub) program. LDA must be at least */ /* ( k + 1 ). */ /* Unchanged on exit. */ /* X - DOUBLE PRECISION array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ). */ /* Before entry, the incremented array X must contain the n */ /* element vector x. On exit, X is overwritten with the */ /* tranformed vector x. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; --x; /* Function Body */ info = 0; if (! PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(uplo, "L", ( ftnlen)1, (ftnlen)1)) { info = 1; } else if (! PASTEF770(lsame)(trans, "N", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(trans, "T", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(trans, "C", (ftnlen)1, ( ftnlen)1)) { info = 2; } else if (! PASTEF770(lsame)(diag, "U", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(diag, "N", (ftnlen)1, (ftnlen)1)) { info = 3; } else if (*n < 0) { info = 4; } else if (*k < 0) { info = 5; } else if (*lda < *k + 1) { info = 7; } else if (*incx == 0) { info = 9; } if (info != 0) { PASTEF770(xerbla)("DTBMV ", &info, (ftnlen)6); return 0; } /* Quick return if possible. */ if (*n == 0) { return 0; } nounit = PASTEF770(lsame)(diag, "N", (ftnlen)1, (ftnlen)1); /* Set up the start point in X if the increment is not unity. This */ /* will be ( N - 1 )*INCX too small for descending loops. */ if (*incx <= 0) { kx = 1 - (*n - 1) * *incx; } else if (*incx != 1) { kx = 1; } /* Start the operations. In this version the elements of A are */ /* accessed sequentially with one pass through A. */ if (PASTEF770(lsame)(trans, "N", (ftnlen)1, (ftnlen)1)) { /* Form x := A*x. */ if (PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1)) { kplus1 = *k + 1; if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[j] != 0.) { temp = x[j]; l = kplus1 - j; /* Computing MAX */ i__2 = 1, i__3 = j - *k; i__4 = j - 1; for (i__ = f2c_max(i__2,i__3); i__ <= i__4; ++i__) { x[i__] += temp * a[l + i__ + j * a_dim1]; /* L10: */ } if (nounit) { x[j] *= a[kplus1 + j * a_dim1]; } } /* L20: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[jx] != 0.) { temp = x[jx]; ix = kx; l = kplus1 - j; /* Computing MAX */ i__4 = 1, i__2 = j - *k; i__3 = j - 1; for (i__ = f2c_max(i__4,i__2); i__ <= i__3; ++i__) { x[ix] += temp * a[l + i__ + j * a_dim1]; ix += *incx; /* L30: */ } if (nounit) { x[jx] *= a[kplus1 + j * a_dim1]; } } jx += *incx; if (j > *k) { kx += *incx; } /* L40: */ } } } else { if (*incx == 1) { for (j = *n; j >= 1; --j) { if (x[j] != 0.) { temp = x[j]; l = 1 - j; /* Computing MIN */ i__1 = *n, i__3 = j + *k; i__4 = j + 1; for (i__ = f2c_min(i__1,i__3); i__ >= i__4; --i__) { x[i__] += temp * a[l + i__ + j * a_dim1]; /* L50: */ } if (nounit) { x[j] *= a[j * a_dim1 + 1]; } } /* L60: */ } } else { kx += (*n - 1) * *incx; jx = kx; for (j = *n; j >= 1; --j) { if (x[jx] != 0.) { temp = x[jx]; ix = kx; l = 1 - j; /* Computing MIN */ i__4 = *n, i__1 = j + *k; i__3 = j + 1; for (i__ = f2c_min(i__4,i__1); i__ >= i__3; --i__) { x[ix] += temp * a[l + i__ + j * a_dim1]; ix -= *incx; /* L70: */ } if (nounit) { x[jx] *= a[j * a_dim1 + 1]; } } jx -= *incx; if (*n - j >= *k) { kx -= *incx; } /* L80: */ } } } } else { /* Form x := A'*x. */ if (PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1)) { kplus1 = *k + 1; if (*incx == 1) { for (j = *n; j >= 1; --j) { temp = x[j]; l = kplus1 - j; if (nounit) { temp *= a[kplus1 + j * a_dim1]; } /* Computing MAX */ i__4 = 1, i__1 = j - *k; i__3 = f2c_max(i__4,i__1); for (i__ = j - 1; i__ >= i__3; --i__) { temp += a[l + i__ + j * a_dim1] * x[i__]; /* L90: */ } x[j] = temp; /* L100: */ } } else { kx += (*n - 1) * *incx; jx = kx; for (j = *n; j >= 1; --j) { temp = x[jx]; kx -= *incx; ix = kx; l = kplus1 - j; if (nounit) { temp *= a[kplus1 + j * a_dim1]; } /* Computing MAX */ i__4 = 1, i__1 = j - *k; i__3 = f2c_max(i__4,i__1); for (i__ = j - 1; i__ >= i__3; --i__) { temp += a[l + i__ + j * a_dim1] * x[ix]; ix -= *incx; /* L110: */ } x[jx] = temp; jx -= *incx; /* L120: */ } } } else { if (*incx == 1) { i__3 = *n; for (j = 1; j <= i__3; ++j) { temp = x[j]; l = 1 - j; if (nounit) { temp *= a[j * a_dim1 + 1]; } /* Computing MIN */ i__1 = *n, i__2 = j + *k; i__4 = f2c_min(i__1,i__2); for (i__ = j + 1; i__ <= i__4; ++i__) { temp += a[l + i__ + j * a_dim1] * x[i__]; /* L130: */ } x[j] = temp; /* L140: */ } } else { jx = kx; i__3 = *n; for (j = 1; j <= i__3; ++j) { temp = x[jx]; kx += *incx; ix = kx; l = 1 - j; if (nounit) { temp *= a[j * a_dim1 + 1]; } /* Computing MIN */ i__1 = *n, i__2 = j + *k; i__4 = f2c_min(i__1,i__2); for (i__ = j + 1; i__ <= i__4; ++i__) { temp += a[l + i__ + j * a_dim1] * x[ix]; ix += *incx; /* L150: */ } x[jx] = temp; jx += *incx; /* L160: */ } } } } return 0; /* End of DTBMV . */ } /* dtbmv_ */ /* stbmv.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ /* Subroutine */ int PASTEF77(s,tbmv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_integer *k, const bla_real *a, const bla_integer *lda, bla_real *x, const bla_integer *incx) { /* System generated locals */ bla_integer a_dim1, a_offset, i__1, i__2, i__3, i__4; /* Local variables */ bla_integer info; bla_real temp; bla_integer i__, j, l; //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kplus1, ix, jx, kx = 0; //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); bla_logical nounit; /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* STBMV performs one of the matrix-vector operations */ /* x := A*x, or x := A'*x, */ /* where x is an n element vector and A is an n by n unit, or non-unit, */ /* upper or lower triangular band matrix, with ( k + 1 ) diagonals. */ /* Parameters */ /* ========== */ /* UPLO - CHARACTER*1. */ /* On entry, UPLO specifies whether the matrix is an upper or */ /* lower triangular matrix as follows: */ /* UPLO = 'U' or 'u' A is an upper triangular matrix. */ /* UPLO = 'L' or 'l' A is a lower triangular matrix. */ /* Unchanged on exit. */ /* TRANS - CHARACTER*1. */ /* On entry, TRANS specifies the operation to be performed as */ /* follows: */ /* TRANS = 'N' or 'n' x := A*x. */ /* TRANS = 'T' or 't' x := A'*x. */ /* TRANS = 'C' or 'c' x := A'*x. */ /* Unchanged on exit. */ /* DIAG - CHARACTER*1. */ /* On entry, DIAG specifies whether or not A is unit */ /* triangular as follows: */ /* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ /* DIAG = 'N' or 'n' A is not assumed to be unit */ /* triangular. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the order of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* K - INTEGER. */ /* On entry with UPLO = 'U' or 'u', K specifies the number of */ /* super-diagonals of the matrix A. */ /* On entry with UPLO = 'L' or 'l', K specifies the number of */ /* sub-diagonals of the matrix A. */ /* K must satisfy 0 .le. K. */ /* Unchanged on exit. */ /* A - REAL array of DIMENSION ( LDA, n ). */ /* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */ /* by n part of the array A must contain the upper triangular */ /* band part of the matrix of coefficients, supplied column by */ /* column, with the leading diagonal of the matrix in row */ /* ( k + 1 ) of the array, the first super-diagonal starting at */ /* position 2 in row k, and so on. The top left k by k triangle */ /* of the array A is not referenced. */ /* The following program segment will transfer an upper */ /* triangular band matrix from conventional full matrix storage */ /* to band storage: */ /* DO 20, J = 1, N */ /* M = K + 1 - J */ /* DO 10, I = MAX( 1, J - K ), J */ /* A( M + I, J ) = matrix( I, J ) */ /* 10 CONTINUE */ /* 20 CONTINUE */ /* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */ /* by n part of the array A must contain the lower triangular */ /* band part of the matrix of coefficients, supplied column by */ /* column, with the leading diagonal of the matrix in row 1 of */ /* the array, the first sub-diagonal starting at position 1 in */ /* row 2, and so on. The bottom right k by k triangle of the */ /* array A is not referenced. */ /* The following program segment will transfer a lower */ /* triangular band matrix from conventional full matrix storage */ /* to band storage: */ /* DO 20, J = 1, N */ /* M = 1 - J */ /* DO 10, I = J, MIN( N, J + K ) */ /* A( M + I, J ) = matrix( I, J ) */ /* 10 CONTINUE */ /* 20 CONTINUE */ /* Note that when DIAG = 'U' or 'u' the elements of the array A */ /* corresponding to the diagonal elements of the matrix are not */ /* referenced, but are assumed to be unity. */ /* Unchanged on exit. */ /* LDA - INTEGER. */ /* On entry, LDA specifies the first dimension of A as declared */ /* in the calling (sub) program. LDA must be at least */ /* ( k + 1 ). */ /* Unchanged on exit. */ /* X - REAL array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ). */ /* Before entry, the incremented array X must contain the n */ /* element vector x. On exit, X is overwritten with the */ /* tranformed vector x. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; --x; /* Function Body */ info = 0; if (! PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(uplo, "L", ( ftnlen)1, (ftnlen)1)) { info = 1; } else if (! PASTEF770(lsame)(trans, "N", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(trans, "T", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(trans, "C", (ftnlen)1, ( ftnlen)1)) { info = 2; } else if (! PASTEF770(lsame)(diag, "U", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(diag, "N", (ftnlen)1, (ftnlen)1)) { info = 3; } else if (*n < 0) { info = 4; } else if (*k < 0) { info = 5; } else if (*lda < *k + 1) { info = 7; } else if (*incx == 0) { info = 9; } if (info != 0) { PASTEF770(xerbla)("STBMV ", &info, (ftnlen)6); return 0; } /* Quick return if possible. */ if (*n == 0) { return 0; } nounit = PASTEF770(lsame)(diag, "N", (ftnlen)1, (ftnlen)1); /* Set up the start point in X if the increment is not unity. This */ /* will be ( N - 1 )*INCX too small for descending loops. */ if (*incx <= 0) { kx = 1 - (*n - 1) * *incx; } else if (*incx != 1) { kx = 1; } /* Start the operations. In this version the elements of A are */ /* accessed sequentially with one pass through A. */ if (PASTEF770(lsame)(trans, "N", (ftnlen)1, (ftnlen)1)) { /* Form x := A*x. */ if (PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1)) { kplus1 = *k + 1; if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[j] != 0.f) { temp = x[j]; l = kplus1 - j; /* Computing MAX */ i__2 = 1, i__3 = j - *k; i__4 = j - 1; for (i__ = f2c_max(i__2,i__3); i__ <= i__4; ++i__) { x[i__] += temp * a[l + i__ + j * a_dim1]; /* L10: */ } if (nounit) { x[j] *= a[kplus1 + j * a_dim1]; } } /* L20: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[jx] != 0.f) { temp = x[jx]; ix = kx; l = kplus1 - j; /* Computing MAX */ i__4 = 1, i__2 = j - *k; i__3 = j - 1; for (i__ = f2c_max(i__4,i__2); i__ <= i__3; ++i__) { x[ix] += temp * a[l + i__ + j * a_dim1]; ix += *incx; /* L30: */ } if (nounit) { x[jx] *= a[kplus1 + j * a_dim1]; } } jx += *incx; if (j > *k) { kx += *incx; } /* L40: */ } } } else { if (*incx == 1) { for (j = *n; j >= 1; --j) { if (x[j] != 0.f) { temp = x[j]; l = 1 - j; /* Computing MIN */ i__1 = *n, i__3 = j + *k; i__4 = j + 1; for (i__ = f2c_min(i__1,i__3); i__ >= i__4; --i__) { x[i__] += temp * a[l + i__ + j * a_dim1]; /* L50: */ } if (nounit) { x[j] *= a[j * a_dim1 + 1]; } } /* L60: */ } } else { kx += (*n - 1) * *incx; jx = kx; for (j = *n; j >= 1; --j) { if (x[jx] != 0.f) { temp = x[jx]; ix = kx; l = 1 - j; /* Computing MIN */ i__4 = *n, i__1 = j + *k; i__3 = j + 1; for (i__ = f2c_min(i__4,i__1); i__ >= i__3; --i__) { x[ix] += temp * a[l + i__ + j * a_dim1]; ix -= *incx; /* L70: */ } if (nounit) { x[jx] *= a[j * a_dim1 + 1]; } } jx -= *incx; if (*n - j >= *k) { kx -= *incx; } /* L80: */ } } } } else { /* Form x := A'*x. */ if (PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1)) { kplus1 = *k + 1; if (*incx == 1) { for (j = *n; j >= 1; --j) { temp = x[j]; l = kplus1 - j; if (nounit) { temp *= a[kplus1 + j * a_dim1]; } /* Computing MAX */ i__4 = 1, i__1 = j - *k; i__3 = f2c_max(i__4,i__1); for (i__ = j - 1; i__ >= i__3; --i__) { temp += a[l + i__ + j * a_dim1] * x[i__]; /* L90: */ } x[j] = temp; /* L100: */ } } else { kx += (*n - 1) * *incx; jx = kx; for (j = *n; j >= 1; --j) { temp = x[jx]; kx -= *incx; ix = kx; l = kplus1 - j; if (nounit) { temp *= a[kplus1 + j * a_dim1]; } /* Computing MAX */ i__4 = 1, i__1 = j - *k; i__3 = f2c_max(i__4,i__1); for (i__ = j - 1; i__ >= i__3; --i__) { temp += a[l + i__ + j * a_dim1] * x[ix]; ix -= *incx; /* L110: */ } x[jx] = temp; jx -= *incx; /* L120: */ } } } else { if (*incx == 1) { i__3 = *n; for (j = 1; j <= i__3; ++j) { temp = x[j]; l = 1 - j; if (nounit) { temp *= a[j * a_dim1 + 1]; } /* Computing MIN */ i__1 = *n, i__2 = j + *k; i__4 = f2c_min(i__1,i__2); for (i__ = j + 1; i__ <= i__4; ++i__) { temp += a[l + i__ + j * a_dim1] * x[i__]; /* L130: */ } x[j] = temp; /* L140: */ } } else { jx = kx; i__3 = *n; for (j = 1; j <= i__3; ++j) { temp = x[jx]; kx += *incx; ix = kx; l = 1 - j; if (nounit) { temp *= a[j * a_dim1 + 1]; } /* Computing MIN */ i__1 = *n, i__2 = j + *k; i__4 = f2c_min(i__1,i__2); for (i__ = j + 1; i__ <= i__4; ++i__) { temp += a[l + i__ + j * a_dim1] * x[ix]; ix += *incx; /* L150: */ } x[jx] = temp; jx += *incx; /* L160: */ } } } } return 0; /* End of STBMV . */ } /* stbmv_ */ /* ztbmv.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ /* Subroutine */ int PASTEF77(z,tbmv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_integer *k, const bla_dcomplex *a, const bla_integer *lda, bla_dcomplex *x, const bla_integer *incx) { /* System generated locals */ bla_integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; bla_dcomplex z__1, z__2, z__3; /* Builtin functions */ //void bla_d_cnjg(bla_dcomplex *, bla_dcomplex *); /* Local variables */ bla_integer info; bla_dcomplex temp; bla_integer i__, j, l; //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kplus1, ix, jx, kx = 0; //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); bla_logical noconj, nounit; /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* ZTBMV performs one of the matrix-vector operations */ /* x := A*x, or x := A'*x, or x := conjg( A' )*x, */ /* where x is an n element vector and A is an n by n unit, or non-unit, */ /* upper or lower triangular band matrix, with ( k + 1 ) diagonals. */ /* Parameters */ /* ========== */ /* UPLO - CHARACTER*1. */ /* On entry, UPLO specifies whether the matrix is an upper or */ /* lower triangular matrix as follows: */ /* UPLO = 'U' or 'u' A is an upper triangular matrix. */ /* UPLO = 'L' or 'l' A is a lower triangular matrix. */ /* Unchanged on exit. */ /* TRANS - CHARACTER*1. */ /* On entry, TRANS specifies the operation to be performed as */ /* follows: */ /* TRANS = 'N' or 'n' x := A*x. */ /* TRANS = 'T' or 't' x := A'*x. */ /* TRANS = 'C' or 'c' x := conjg( A' )*x. */ /* Unchanged on exit. */ /* DIAG - CHARACTER*1. */ /* On entry, DIAG specifies whether or not A is unit */ /* triangular as follows: */ /* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ /* DIAG = 'N' or 'n' A is not assumed to be unit */ /* triangular. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the order of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* K - INTEGER. */ /* On entry with UPLO = 'U' or 'u', K specifies the number of */ /* super-diagonals of the matrix A. */ /* On entry with UPLO = 'L' or 'l', K specifies the number of */ /* sub-diagonals of the matrix A. */ /* K must satisfy 0 .le. K. */ /* Unchanged on exit. */ /* A - COMPLEX*16 array of DIMENSION ( LDA, n ). */ /* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */ /* by n part of the array A must contain the upper triangular */ /* band part of the matrix of coefficients, supplied column by */ /* column, with the leading diagonal of the matrix in row */ /* ( k + 1 ) of the array, the first super-diagonal starting at */ /* position 2 in row k, and so on. The top left k by k triangle */ /* of the array A is not referenced. */ /* The following program segment will transfer an upper */ /* triangular band matrix from conventional full matrix storage */ /* to band storage: */ /* DO 20, J = 1, N */ /* M = K + 1 - J */ /* DO 10, I = MAX( 1, J - K ), J */ /* A( M + I, J ) = matrix( I, J ) */ /* 10 CONTINUE */ /* 20 CONTINUE */ /* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */ /* by n part of the array A must contain the lower triangular */ /* band part of the matrix of coefficients, supplied column by */ /* column, with the leading diagonal of the matrix in row 1 of */ /* the array, the first sub-diagonal starting at position 1 in */ /* row 2, and so on. The bottom right k by k triangle of the */ /* array A is not referenced. */ /* The following program segment will transfer a lower */ /* triangular band matrix from conventional full matrix storage */ /* to band storage: */ /* DO 20, J = 1, N */ /* M = 1 - J */ /* DO 10, I = J, MIN( N, J + K ) */ /* A( M + I, J ) = matrix( I, J ) */ /* 10 CONTINUE */ /* 20 CONTINUE */ /* Note that when DIAG = 'U' or 'u' the elements of the array A */ /* corresponding to the diagonal elements of the matrix are not */ /* referenced, but are assumed to be unity. */ /* Unchanged on exit. */ /* LDA - INTEGER. */ /* On entry, LDA specifies the first dimension of A as declared */ /* in the calling (sub) program. LDA must be at least */ /* ( k + 1 ). */ /* Unchanged on exit. */ /* X - COMPLEX*16 array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ). */ /* Before entry, the incremented array X must contain the n */ /* element vector x. On exit, X is overwritten with the */ /* tranformed vector x. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; --x; /* Function Body */ info = 0; if (! PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(uplo, "L", ( ftnlen)1, (ftnlen)1)) { info = 1; } else if (! PASTEF770(lsame)(trans, "N", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(trans, "T", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(trans, "C", (ftnlen)1, ( ftnlen)1)) { info = 2; } else if (! PASTEF770(lsame)(diag, "U", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(diag, "N", (ftnlen)1, (ftnlen)1)) { info = 3; } else if (*n < 0) { info = 4; } else if (*k < 0) { info = 5; } else if (*lda < *k + 1) { info = 7; } else if (*incx == 0) { info = 9; } if (info != 0) { PASTEF770(xerbla)("ZTBMV ", &info, (ftnlen)6); return 0; } /* Quick return if possible. */ if (*n == 0) { return 0; } noconj = PASTEF770(lsame)(trans, "T", (ftnlen)1, (ftnlen)1); nounit = PASTEF770(lsame)(diag, "N", (ftnlen)1, (ftnlen)1); /* Set up the start point in X if the increment is not unity. This */ /* will be ( N - 1 )*INCX too small for descending loops. */ if (*incx <= 0) { kx = 1 - (*n - 1) * *incx; } else if (*incx != 1) { kx = 1; } /* Start the operations. In this version the elements of A are */ /* accessed sequentially with one pass through A. */ if (PASTEF770(lsame)(trans, "N", (ftnlen)1, (ftnlen)1)) { /* Form x := A*x. */ if (PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1)) { kplus1 = *k + 1; if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j; if (bli_zreal(x[i__2]) != 0. || bli_zimag(x[i__2]) != 0.) { i__2 = j; bli_zsets( (bli_zreal(x[i__2])), (bli_zimag(x[i__2])), temp ); l = kplus1 - j; /* Computing MAX */ i__2 = 1, i__3 = j - *k; i__4 = j - 1; for (i__ = f2c_max(i__2,i__3); i__ <= i__4; ++i__) { i__2 = i__; i__3 = i__; i__5 = l + i__ + j * a_dim1; bli_zsets( (bli_zreal(temp) * bli_zreal(a[i__5]) - bli_zimag(temp) * bli_zimag(a[i__5])), (bli_zreal(temp) * bli_zimag(a[i__5]) + bli_zimag(temp) * bli_zreal(a[i__5])), z__2 ); bli_zsets( (bli_zreal(x[i__3]) + bli_zreal(z__2)), (bli_zimag(x[i__3]) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__2] ); /* L10: */ } if (nounit) { i__4 = j; i__2 = j; i__3 = kplus1 + j * a_dim1; bli_zsets( (bli_zreal(x[i__2]) * bli_zreal(a[i__3]) - bli_zimag(x[i__2]) * bli_zimag(a[i__3])), (bli_zreal(x[i__2]) * bli_zimag(a[i__3]) + bli_zimag(x[i__2]) * bli_zreal(a[i__3])), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__4] ); } } /* L20: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { i__4 = jx; if (bli_zreal(x[i__4]) != 0. || bli_zimag(x[i__4]) != 0.) { i__4 = jx; bli_zsets( (bli_zreal(x[i__4])), (bli_zimag(x[i__4])), temp ); ix = kx; l = kplus1 - j; /* Computing MAX */ i__4 = 1, i__2 = j - *k; i__3 = j - 1; for (i__ = f2c_max(i__4,i__2); i__ <= i__3; ++i__) { i__4 = ix; i__2 = ix; i__5 = l + i__ + j * a_dim1; bli_zsets( (bli_zreal(temp) * bli_zreal(a[i__5]) - bli_zimag(temp) * bli_zimag(a[i__5])), (bli_zreal(temp) * bli_zimag(a[i__5]) + bli_zimag(temp) * bli_zreal(a[i__5])), z__2 ); bli_zsets( (bli_zreal(x[i__2]) + bli_zreal(z__2)), (bli_zimag(x[i__2]) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__4] ); ix += *incx; /* L30: */ } if (nounit) { i__3 = jx; i__4 = jx; i__2 = kplus1 + j * a_dim1; bli_zsets( (bli_zreal(x[i__4]) * bli_zreal(a[i__2]) - bli_zimag(x[i__4]) * bli_zimag(a[i__2])), (bli_zreal(x[i__4]) * bli_zimag(a[i__2]) + bli_zimag(x[i__4]) * bli_zreal(a[i__2])), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__3] ); } } jx += *incx; if (j > *k) { kx += *incx; } /* L40: */ } } } else { if (*incx == 1) { for (j = *n; j >= 1; --j) { i__1 = j; if (bli_zreal(x[i__1]) != 0. || bli_zimag(x[i__1]) != 0.) { i__1 = j; bli_zsets( (bli_zreal(x[i__1])), (bli_zimag(x[i__1])), temp ); l = 1 - j; /* Computing MIN */ i__1 = *n, i__3 = j + *k; i__4 = j + 1; for (i__ = f2c_min(i__1,i__3); i__ >= i__4; --i__) { i__1 = i__; i__3 = i__; i__2 = l + i__ + j * a_dim1; bli_zsets( (bli_zreal(temp) * bli_zreal(a[i__2]) - bli_zimag(temp) * bli_zimag(a[i__2])), (bli_zreal(temp) * bli_zimag(a[i__2]) + bli_zimag(temp) * bli_zreal(a[i__2])), z__2 ); bli_zsets( (bli_zreal(x[i__3]) + bli_zreal(z__2)), (bli_zimag(x[i__3]) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__1] ); /* L50: */ } if (nounit) { i__4 = j; i__1 = j; i__3 = j * a_dim1 + 1; bli_zsets( (bli_zreal(x[i__1]) * bli_zreal(a[i__3]) - bli_zimag(x[i__1]) * bli_zimag(a[i__3])), (bli_zreal(x[i__1]) * bli_zimag(a[i__3]) + bli_zimag(x[i__1]) * bli_zreal(a[i__3])), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__4] ); } } /* L60: */ } } else { kx += (*n - 1) * *incx; jx = kx; for (j = *n; j >= 1; --j) { i__4 = jx; if (bli_zreal(x[i__4]) != 0. || bli_zimag(x[i__4]) != 0.) { i__4 = jx; bli_zsets( (bli_zreal(x[i__4])), (bli_zimag(x[i__4])), temp ); ix = kx; l = 1 - j; /* Computing MIN */ i__4 = *n, i__1 = j + *k; i__3 = j + 1; for (i__ = f2c_min(i__4,i__1); i__ >= i__3; --i__) { i__4 = ix; i__1 = ix; i__2 = l + i__ + j * a_dim1; bli_zsets( (bli_zreal(temp) * bli_zreal(a[i__2]) - bli_zimag(temp) * bli_zimag(a[i__2])), (bli_zreal(temp) * bli_zimag(a[i__2]) + bli_zimag(temp) * bli_zreal(a[i__2])), z__2 ); bli_zsets( (bli_zreal(x[i__1]) + bli_zreal(z__2)), (bli_zimag(x[i__1]) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__4] ); ix -= *incx; /* L70: */ } if (nounit) { i__3 = jx; i__4 = jx; i__1 = j * a_dim1 + 1; bli_zsets( (bli_zreal(x[i__4]) * bli_zreal(a[i__1]) - bli_zimag(x[i__4]) * bli_zimag(a[i__1])), (bli_zreal(x[i__4]) * bli_zimag(a[i__1]) + bli_zimag(x[i__4]) * bli_zreal(a[i__1])), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__3] ); } } jx -= *incx; if (*n - j >= *k) { kx -= *incx; } /* L80: */ } } } } else { /* Form x := A'*x or x := conjg( A' )*x. */ if (PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1)) { kplus1 = *k + 1; if (*incx == 1) { for (j = *n; j >= 1; --j) { i__3 = j; bli_zsets( (bli_zreal(x[i__3])), (bli_zimag(x[i__3])), temp ); l = kplus1 - j; if (noconj) { if (nounit) { i__3 = kplus1 + j * a_dim1; bli_zsets( (bli_zreal(temp) * bli_zreal(a[i__3]) - bli_zimag(temp) * bli_zimag(a[i__3])), (bli_zreal(temp) * bli_zimag(a[i__3]) + bli_zimag(temp) * bli_zreal(a[i__3])), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } /* Computing MAX */ i__4 = 1, i__1 = j - *k; i__3 = f2c_max(i__4,i__1); for (i__ = j - 1; i__ >= i__3; --i__) { i__4 = l + i__ + j * a_dim1; i__1 = i__; bli_zsets( (bli_zreal(a[i__4]) * bli_zreal(x[i__1]) - bli_zimag(a[i__4]) * bli_zimag(x[i__1])), (bli_zreal(a[i__4]) * bli_zimag(x[i__1]) + bli_zimag(a[i__4]) * bli_zreal(x[i__1])), z__2 ); bli_zsets( (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); /* L90: */ } } else { if (nounit) { bla_d_cnjg(&z__2, &a[kplus1 + j * a_dim1]); bli_zsets( (bli_zreal(temp) * bli_zreal(z__2) - bli_zimag(temp) * bli_zimag(z__2)), (bli_zreal(temp) * bli_zimag(z__2) + bli_zimag(temp) * bli_zreal(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } /* Computing MAX */ i__4 = 1, i__1 = j - *k; i__3 = f2c_max(i__4,i__1); for (i__ = j - 1; i__ >= i__3; --i__) { bla_d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); i__4 = i__; bli_zsets( (bli_zreal(z__3) * bli_zreal(x[i__4]) - bli_zimag(z__3) * bli_zimag(x[i__4])), (bli_zreal(z__3) * bli_zimag(x[i__4]) + bli_zimag(z__3) * bli_zreal(x[i__4])), z__2 ); bli_zsets( (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); /* L100: */ } } i__3 = j; bli_zsets( (bli_zreal(temp)), (bli_zimag(temp)), x[i__3] ); /* L110: */ } } else { kx += (*n - 1) * *incx; jx = kx; for (j = *n; j >= 1; --j) { i__3 = jx; bli_zsets( (bli_zreal(x[i__3])), (bli_zimag(x[i__3])), temp ); kx -= *incx; ix = kx; l = kplus1 - j; if (noconj) { if (nounit) { i__3 = kplus1 + j * a_dim1; bli_zsets( (bli_zreal(temp) * bli_zreal(a[i__3]) - bli_zimag(temp) * bli_zimag(a[i__3])), (bli_zreal(temp) * bli_zimag(a[i__3]) + bli_zimag(temp) * bli_zreal(a[i__3])), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } /* Computing MAX */ i__4 = 1, i__1 = j - *k; i__3 = f2c_max(i__4,i__1); for (i__ = j - 1; i__ >= i__3; --i__) { i__4 = l + i__ + j * a_dim1; i__1 = ix; bli_zsets( (bli_zreal(a[i__4]) * bli_zreal(x[i__1]) - bli_zimag(a[i__4]) * bli_zimag(x[i__1])), (bli_zreal(a[i__4]) * bli_zimag(x[i__1]) + bli_zimag(a[i__4]) * bli_zreal(x[i__1])), z__2 ); bli_zsets( (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); ix -= *incx; /* L120: */ } } else { if (nounit) { bla_d_cnjg(&z__2, &a[kplus1 + j * a_dim1]); bli_zsets( (bli_zreal(temp) * bli_zreal(z__2) - bli_zimag(temp) * bli_zimag(z__2)), (bli_zreal(temp) * bli_zimag(z__2) + bli_zimag(temp) * bli_zreal(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } /* Computing MAX */ i__4 = 1, i__1 = j - *k; i__3 = f2c_max(i__4,i__1); for (i__ = j - 1; i__ >= i__3; --i__) { bla_d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); i__4 = ix; bli_zsets( (bli_zreal(z__3) * bli_zreal(x[i__4]) - bli_zimag(z__3) * bli_zimag(x[i__4])), (bli_zreal(z__3) * bli_zimag(x[i__4]) + bli_zimag(z__3) * bli_zreal(x[i__4])), z__2 ); bli_zsets( (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); ix -= *incx; /* L130: */ } } i__3 = jx; bli_zsets( (bli_zreal(temp)), (bli_zimag(temp)), x[i__3] ); jx -= *incx; /* L140: */ } } } else { if (*incx == 1) { i__3 = *n; for (j = 1; j <= i__3; ++j) { i__4 = j; bli_zsets( (bli_zreal(x[i__4])), (bli_zimag(x[i__4])), temp ); l = 1 - j; if (noconj) { if (nounit) { i__4 = j * a_dim1 + 1; bli_zsets( (bli_zreal(temp) * bli_zreal(a[i__4]) - bli_zimag(temp) * bli_zimag(a[i__4])), (bli_zreal(temp) * bli_zimag(a[i__4]) + bli_zimag(temp) * bli_zreal(a[i__4])), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } /* Computing MIN */ i__1 = *n, i__2 = j + *k; i__4 = f2c_min(i__1,i__2); for (i__ = j + 1; i__ <= i__4; ++i__) { i__1 = l + i__ + j * a_dim1; i__2 = i__; bli_zsets( (bli_zreal(a[i__1]) * bli_zreal(x[i__2]) - bli_zimag(a[i__1]) * bli_zimag(x[i__2])), (bli_zreal(a[i__1]) * bli_zimag(x[i__2]) + bli_zimag(a[i__1]) * bli_zreal(x[i__2])), z__2 ); bli_zsets( (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); /* L150: */ } } else { if (nounit) { bla_d_cnjg(&z__2, &a[j * a_dim1 + 1]); bli_zsets( (bli_zreal(temp) * bli_zreal(z__2) - bli_zimag(temp) * bli_zimag(z__2)), (bli_zreal(temp) * bli_zimag(z__2) + bli_zimag(temp) * bli_zreal(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } /* Computing MIN */ i__1 = *n, i__2 = j + *k; i__4 = f2c_min(i__1,i__2); for (i__ = j + 1; i__ <= i__4; ++i__) { bla_d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); i__1 = i__; bli_zsets( (bli_zreal(z__3) * bli_zreal(x[i__1]) - bli_zimag(z__3) * bli_zimag(x[i__1])), (bli_zreal(z__3) * bli_zimag(x[i__1]) + bli_zimag(z__3) * bli_zreal(x[i__1])), z__2 ); bli_zsets( (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); /* L160: */ } } i__4 = j; bli_zsets( (bli_zreal(temp)), (bli_zimag(temp)), x[i__4] ); /* L170: */ } } else { jx = kx; i__3 = *n; for (j = 1; j <= i__3; ++j) { i__4 = jx; bli_zsets( (bli_zreal(x[i__4])), (bli_zimag(x[i__4])), temp ); kx += *incx; ix = kx; l = 1 - j; if (noconj) { if (nounit) { i__4 = j * a_dim1 + 1; bli_zsets( (bli_zreal(temp) * bli_zreal(a[i__4]) - bli_zimag(temp) * bli_zimag(a[i__4])), (bli_zreal(temp) * bli_zimag(a[i__4]) + bli_zimag(temp) * bli_zreal(a[i__4])), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } /* Computing MIN */ i__1 = *n, i__2 = j + *k; i__4 = f2c_min(i__1,i__2); for (i__ = j + 1; i__ <= i__4; ++i__) { i__1 = l + i__ + j * a_dim1; i__2 = ix; bli_zsets( (bli_zreal(a[i__1]) * bli_zreal(x[i__2]) - bli_zimag(a[i__1]) * bli_zimag(x[i__2])), (bli_zreal(a[i__1]) * bli_zimag(x[i__2]) + bli_zimag(a[i__1]) * bli_zreal(x[i__2])), z__2 ); bli_zsets( (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); ix += *incx; /* L180: */ } } else { if (nounit) { bla_d_cnjg(&z__2, &a[j * a_dim1 + 1]); bli_zsets( (bli_zreal(temp) * bli_zreal(z__2) - bli_zimag(temp) * bli_zimag(z__2)), (bli_zreal(temp) * bli_zimag(z__2) + bli_zimag(temp) * bli_zreal(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } /* Computing MIN */ i__1 = *n, i__2 = j + *k; i__4 = f2c_min(i__1,i__2); for (i__ = j + 1; i__ <= i__4; ++i__) { bla_d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); i__1 = ix; bli_zsets( (bli_zreal(z__3) * bli_zreal(x[i__1]) - bli_zimag(z__3) * bli_zimag(x[i__1])), (bli_zreal(z__3) * bli_zimag(x[i__1]) + bli_zimag(z__3) * bli_zreal(x[i__1])), z__2 ); bli_zsets( (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); ix += *incx; /* L190: */ } } i__4 = jx; bli_zsets( (bli_zreal(temp)), (bli_zimag(temp)), x[i__4] ); jx += *incx; /* L200: */ } } } } return 0; /* End of ZTBMV . */ } /* ztbmv_ */ #endif blis-0.6.1/frame/compat/f2c/bla_tbmv.h000066400000000000000000000052261360743507500174540ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef BLIS_ENABLE_BLAS BLIS_EXPORT_BLAS int PASTEF77(c,tbmv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_integer *k, const bla_scomplex *a, const bla_integer *lda, bla_scomplex *x, const bla_integer *incx); BLIS_EXPORT_BLAS int PASTEF77(d,tbmv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_integer *k, const bla_double *a, const bla_integer *lda, bla_double *x, const bla_integer *incx); BLIS_EXPORT_BLAS int PASTEF77(s,tbmv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_integer *k, const bla_real *a, const bla_integer *lda, bla_real *x, const bla_integer *incx); BLIS_EXPORT_BLAS int PASTEF77(z,tbmv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_integer *k, const bla_dcomplex *a, const bla_integer *lda, bla_dcomplex *x, const bla_integer *incx); #endif blis-0.6.1/frame/compat/f2c/bla_tbsv.c000066400000000000000000001761311360743507500174610ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #ifdef BLIS_ENABLE_BLAS /* ctbsv.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ /* Subroutine */ int PASTEF77(c,tbsv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_integer *k, const bla_scomplex *a, const bla_integer *lda, bla_scomplex *x, const bla_integer *incx) { /* System generated locals */ bla_integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; bla_scomplex q__1, q__2, q__3; /* Builtin functions */ //void bla_c_div(bla_scomplex *, bla_scomplex *, bla_scomplex *), bla_r_cnjg(bla_scomplex *, bla_scomplex *); /* Local variables */ bla_integer info; bla_scomplex temp; bla_integer i__, j, l; //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kplus1, ix, jx, kx = 0; //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); bla_logical noconj, nounit; /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* CTBSV solves one of the systems of equations */ /* A*x = b, or A'*x = b, or conjg( A' )*x = b, */ /* where b and x are n element vectors and A is an n by n unit, or */ /* non-unit, upper or lower triangular band matrix, with ( k + 1 ) */ /* diagonals. */ /* No test for singularity or near-singularity is included in this */ /* routine. Such tests must be performed before calling this routine. */ /* Parameters */ /* ========== */ /* UPLO - CHARACTER*1. */ /* On entry, UPLO specifies whether the matrix is an upper or */ /* lower triangular matrix as follows: */ /* UPLO = 'U' or 'u' A is an upper triangular matrix. */ /* UPLO = 'L' or 'l' A is a lower triangular matrix. */ /* Unchanged on exit. */ /* TRANS - CHARACTER*1. */ /* On entry, TRANS specifies the equations to be solved as */ /* follows: */ /* TRANS = 'N' or 'n' A*x = b. */ /* TRANS = 'T' or 't' A'*x = b. */ /* TRANS = 'C' or 'c' conjg( A' )*x = b. */ /* Unchanged on exit. */ /* DIAG - CHARACTER*1. */ /* On entry, DIAG specifies whether or not A is unit */ /* triangular as follows: */ /* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ /* DIAG = 'N' or 'n' A is not assumed to be unit */ /* triangular. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the order of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* K - INTEGER. */ /* On entry with UPLO = 'U' or 'u', K specifies the number of */ /* super-diagonals of the matrix A. */ /* On entry with UPLO = 'L' or 'l', K specifies the number of */ /* sub-diagonals of the matrix A. */ /* K must satisfy 0 .le. K. */ /* Unchanged on exit. */ /* A - COMPLEX array of DIMENSION ( LDA, n ). */ /* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */ /* by n part of the array A must contain the upper triangular */ /* band part of the matrix of coefficients, supplied column by */ /* column, with the leading diagonal of the matrix in row */ /* ( k + 1 ) of the array, the first super-diagonal starting at */ /* position 2 in row k, and so on. The top left k by k triangle */ /* of the array A is not referenced. */ /* The following program segment will transfer an upper */ /* triangular band matrix from conventional full matrix storage */ /* to band storage: */ /* DO 20, J = 1, N */ /* M = K + 1 - J */ /* DO 10, I = MAX( 1, J - K ), J */ /* A( M + I, J ) = matrix( I, J ) */ /* 10 CONTINUE */ /* 20 CONTINUE */ /* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */ /* by n part of the array A must contain the lower triangular */ /* band part of the matrix of coefficients, supplied column by */ /* column, with the leading diagonal of the matrix in row 1 of */ /* the array, the first sub-diagonal starting at position 1 in */ /* row 2, and so on. The bottom right k by k triangle of the */ /* array A is not referenced. */ /* The following program segment will transfer a lower */ /* triangular band matrix from conventional full matrix storage */ /* to band storage: */ /* DO 20, J = 1, N */ /* M = 1 - J */ /* DO 10, I = J, MIN( N, J + K ) */ /* A( M + I, J ) = matrix( I, J ) */ /* 10 CONTINUE */ /* 20 CONTINUE */ /* Note that when DIAG = 'U' or 'u' the elements of the array A */ /* corresponding to the diagonal elements of the matrix are not */ /* referenced, but are assumed to be unity. */ /* Unchanged on exit. */ /* LDA - INTEGER. */ /* On entry, LDA specifies the first dimension of A as declared */ /* in the calling (sub) program. LDA must be at least */ /* ( k + 1 ). */ /* Unchanged on exit. */ /* X - COMPLEX array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ). */ /* Before entry, the incremented array X must contain the n */ /* element right-hand side vector b. On exit, X is overwritten */ /* with the solution vector x. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; --x; /* Function Body */ info = 0; if (! PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(uplo, "L", ( ftnlen)1, (ftnlen)1)) { info = 1; } else if (! PASTEF770(lsame)(trans, "N", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(trans, "T", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(trans, "C", (ftnlen)1, ( ftnlen)1)) { info = 2; } else if (! PASTEF770(lsame)(diag, "U", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(diag, "N", (ftnlen)1, (ftnlen)1)) { info = 3; } else if (*n < 0) { info = 4; } else if (*k < 0) { info = 5; } else if (*lda < *k + 1) { info = 7; } else if (*incx == 0) { info = 9; } if (info != 0) { PASTEF770(xerbla)("CTBSV ", &info, (ftnlen)6); return 0; } /* Quick return if possible. */ if (*n == 0) { return 0; } noconj = PASTEF770(lsame)(trans, "T", (ftnlen)1, (ftnlen)1); nounit = PASTEF770(lsame)(diag, "N", (ftnlen)1, (ftnlen)1); /* Set up the start point in X if the increment is not unity. This */ /* will be ( N - 1 )*INCX too small for descending loops. */ if (*incx <= 0) { kx = 1 - (*n - 1) * *incx; } else if (*incx != 1) { kx = 1; } /* Start the operations. In this version the elements of A are */ /* accessed by sequentially with one pass through A. */ if (PASTEF770(lsame)(trans, "N", (ftnlen)1, (ftnlen)1)) { /* Form x := inv( A )*x. */ if (PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1)) { kplus1 = *k + 1; if (*incx == 1) { for (j = *n; j >= 1; --j) { i__1 = j; if (bli_creal(x[i__1]) != 0.f || bli_cimag(x[i__1]) != 0.f) { l = kplus1 - j; if (nounit) { i__1 = j; bla_c_div(&q__1, &x[j], &a[kplus1 + j * a_dim1]); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__1] ); } i__1 = j; bli_csets( (bli_creal(x[i__1])), (bli_cimag(x[i__1])), temp ); /* Computing MAX */ i__2 = 1, i__3 = j - *k; i__1 = f2c_max(i__2,i__3); for (i__ = j - 1; i__ >= i__1; --i__) { i__2 = i__; i__3 = i__; i__4 = l + i__ + j * a_dim1; bli_csets( (bli_creal(temp) * bli_creal(a[i__4]) - bli_cimag(temp) * bli_cimag(a[i__4])), (bli_creal(temp) * bli_cimag(a[i__4]) + bli_cimag(temp) * bli_creal(a[i__4])), q__2 ); bli_csets( (bli_creal(x[i__3]) - bli_creal(q__2)), (bli_cimag(x[i__3]) - bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__2] ); /* L10: */ } } /* L20: */ } } else { kx += (*n - 1) * *incx; jx = kx; for (j = *n; j >= 1; --j) { kx -= *incx; i__1 = jx; if (bli_creal(x[i__1]) != 0.f || bli_cimag(x[i__1]) != 0.f) { ix = kx; l = kplus1 - j; if (nounit) { i__1 = jx; bla_c_div(&q__1, &x[jx], &a[kplus1 + j * a_dim1]); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__1] ); } i__1 = jx; bli_csets( (bli_creal(x[i__1])), (bli_cimag(x[i__1])), temp ); /* Computing MAX */ i__2 = 1, i__3 = j - *k; i__1 = f2c_max(i__2,i__3); for (i__ = j - 1; i__ >= i__1; --i__) { i__2 = ix; i__3 = ix; i__4 = l + i__ + j * a_dim1; bli_csets( (bli_creal(temp) * bli_creal(a[i__4]) - bli_cimag(temp) * bli_cimag(a[i__4])), (bli_creal(temp) * bli_cimag(a[i__4]) + bli_cimag(temp) * bli_creal(a[i__4])), q__2 ); bli_csets( (bli_creal(x[i__3]) - bli_creal(q__2)), (bli_cimag(x[i__3]) - bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__2] ); ix -= *incx; /* L30: */ } } jx -= *incx; /* L40: */ } } } else { if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j; if (bli_creal(x[i__2]) != 0.f || bli_cimag(x[i__2]) != 0.f) { l = 1 - j; if (nounit) { i__2 = j; bla_c_div(&q__1, &x[j], &a[j * a_dim1 + 1]); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__2] ); } i__2 = j; bli_csets( (bli_creal(x[i__2])), (bli_cimag(x[i__2])), temp ); /* Computing MIN */ i__3 = *n, i__4 = j + *k; i__2 = f2c_min(i__3,i__4); for (i__ = j + 1; i__ <= i__2; ++i__) { i__3 = i__; i__4 = i__; i__5 = l + i__ + j * a_dim1; bli_csets( (bli_creal(temp) * bli_creal(a[i__5]) - bli_cimag(temp) * bli_cimag(a[i__5])), (bli_creal(temp) * bli_cimag(a[i__5]) + bli_cimag(temp) * bli_creal(a[i__5])), q__2 ); bli_csets( (bli_creal(x[i__4]) - bli_creal(q__2)), (bli_cimag(x[i__4]) - bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__3] ); /* L50: */ } } /* L60: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { kx += *incx; i__2 = jx; if (bli_creal(x[i__2]) != 0.f || bli_cimag(x[i__2]) != 0.f) { ix = kx; l = 1 - j; if (nounit) { i__2 = jx; bla_c_div(&q__1, &x[jx], &a[j * a_dim1 + 1]); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__2] ); } i__2 = jx; bli_csets( (bli_creal(x[i__2])), (bli_cimag(x[i__2])), temp ); /* Computing MIN */ i__3 = *n, i__4 = j + *k; i__2 = f2c_min(i__3,i__4); for (i__ = j + 1; i__ <= i__2; ++i__) { i__3 = ix; i__4 = ix; i__5 = l + i__ + j * a_dim1; bli_csets( (bli_creal(temp) * bli_creal(a[i__5]) - bli_cimag(temp) * bli_cimag(a[i__5])), (bli_creal(temp) * bli_cimag(a[i__5]) + bli_cimag(temp) * bli_creal(a[i__5])), q__2 ); bli_csets( (bli_creal(x[i__4]) - bli_creal(q__2)), (bli_cimag(x[i__4]) - bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__3] ); ix += *incx; /* L70: */ } } jx += *incx; /* L80: */ } } } } else { /* Form x := inv( A' )*x or x := inv( conjg( A') )*x. */ if (PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1)) { kplus1 = *k + 1; if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j; bli_csets( (bli_creal(x[i__2])), (bli_cimag(x[i__2])), temp ); l = kplus1 - j; if (noconj) { /* Computing MAX */ i__2 = 1, i__3 = j - *k; i__4 = j - 1; for (i__ = f2c_max(i__2,i__3); i__ <= i__4; ++i__) { i__2 = l + i__ + j * a_dim1; i__3 = i__; bli_csets( (bli_creal(a[i__2]) * bli_creal(x[i__3]) - bli_cimag(a[i__2]) * bli_cimag(x[i__3])), (bli_creal(a[i__2]) * bli_cimag(x[i__3]) + bli_cimag(a[i__2]) * bli_creal(x[i__3])), q__2 ); bli_csets( (bli_creal(temp) - bli_creal(q__2)), (bli_cimag(temp) - bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); /* L90: */ } if (nounit) { bla_c_div(&q__1, &temp, &a[kplus1 + j * a_dim1]); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } } else { /* Computing MAX */ i__4 = 1, i__2 = j - *k; i__3 = j - 1; for (i__ = f2c_max(i__4,i__2); i__ <= i__3; ++i__) { bla_r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); i__4 = i__; bli_csets( (bli_creal(q__3) * bli_creal(x[i__4]) - bli_cimag(q__3) * bli_cimag(x[i__4])), (bli_creal(q__3) * bli_cimag(x[i__4]) + bli_cimag(q__3) * bli_creal(x[i__4])), q__2 ); bli_csets( (bli_creal(temp) - bli_creal(q__2)), (bli_cimag(temp) - bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); /* L100: */ } if (nounit) { bla_r_cnjg(&q__2, &a[kplus1 + j * a_dim1]); bla_c_div(&q__1, &temp, &q__2); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } } i__3 = j; bli_csets( (bli_creal(temp)), (bli_cimag(temp)), x[i__3] ); /* L110: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { i__3 = jx; bli_csets( (bli_creal(x[i__3])), (bli_cimag(x[i__3])), temp ); ix = kx; l = kplus1 - j; if (noconj) { /* Computing MAX */ i__3 = 1, i__4 = j - *k; i__2 = j - 1; for (i__ = f2c_max(i__3,i__4); i__ <= i__2; ++i__) { i__3 = l + i__ + j * a_dim1; i__4 = ix; bli_csets( (bli_creal(a[i__3]) * bli_creal(x[i__4]) - bli_cimag(a[i__3]) * bli_cimag(x[i__4])), (bli_creal(a[i__3]) * bli_cimag(x[i__4]) + bli_cimag(a[i__3]) * bli_creal(x[i__4])), q__2 ); bli_csets( (bli_creal(temp) - bli_creal(q__2)), (bli_cimag(temp) - bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); ix += *incx; /* L120: */ } if (nounit) { bla_c_div(&q__1, &temp, &a[kplus1 + j * a_dim1]); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } } else { /* Computing MAX */ i__2 = 1, i__3 = j - *k; i__4 = j - 1; for (i__ = f2c_max(i__2,i__3); i__ <= i__4; ++i__) { bla_r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); i__2 = ix; bli_csets( (bli_creal(q__3) * bli_creal(x[i__2]) - bli_cimag(q__3) * bli_cimag(x[i__2])), (bli_creal(q__3) * bli_cimag(x[i__2]) + bli_cimag(q__3) * bli_creal(x[i__2])), q__2 ); bli_csets( (bli_creal(temp) - bli_creal(q__2)), (bli_cimag(temp) - bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); ix += *incx; /* L130: */ } if (nounit) { bla_r_cnjg(&q__2, &a[kplus1 + j * a_dim1]); bla_c_div(&q__1, &temp, &q__2); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } } i__4 = jx; bli_csets( (bli_creal(temp)), (bli_cimag(temp)), x[i__4] ); jx += *incx; if (j > *k) { kx += *incx; } /* L140: */ } } } else { if (*incx == 1) { for (j = *n; j >= 1; --j) { i__1 = j; bli_csets( (bli_creal(x[i__1])), (bli_cimag(x[i__1])), temp ); l = 1 - j; if (noconj) { /* Computing MIN */ i__1 = *n, i__4 = j + *k; i__2 = j + 1; for (i__ = f2c_min(i__1,i__4); i__ >= i__2; --i__) { i__1 = l + i__ + j * a_dim1; i__4 = i__; bli_csets( (bli_creal(a[i__1]) * bli_creal(x[i__4]) - bli_cimag(a[i__1]) * bli_cimag(x[i__4])), (bli_creal(a[i__1]) * bli_cimag(x[i__4]) + bli_cimag(a[i__1]) * bli_creal(x[i__4])), q__2 ); bli_csets( (bli_creal(temp) - bli_creal(q__2)), (bli_cimag(temp) - bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); /* L150: */ } if (nounit) { bla_c_div(&q__1, &temp, &a[j * a_dim1 + 1]); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } } else { /* Computing MIN */ i__2 = *n, i__1 = j + *k; i__4 = j + 1; for (i__ = f2c_min(i__2,i__1); i__ >= i__4; --i__) { bla_r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); i__2 = i__; bli_csets( (bli_creal(q__3) * bli_creal(x[i__2]) - bli_cimag(q__3) * bli_cimag(x[i__2])), (bli_creal(q__3) * bli_cimag(x[i__2]) + bli_cimag(q__3) * bli_creal(x[i__2])), q__2 ); bli_csets( (bli_creal(temp) - bli_creal(q__2)), (bli_cimag(temp) - bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); /* L160: */ } if (nounit) { bla_r_cnjg(&q__2, &a[j * a_dim1 + 1]); bla_c_div(&q__1, &temp, &q__2); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } } i__4 = j; bli_csets( (bli_creal(temp)), (bli_cimag(temp)), x[i__4] ); /* L170: */ } } else { kx += (*n - 1) * *incx; jx = kx; for (j = *n; j >= 1; --j) { i__4 = jx; bli_csets( (bli_creal(x[i__4])), (bli_cimag(x[i__4])), temp ); ix = kx; l = 1 - j; if (noconj) { /* Computing MIN */ i__4 = *n, i__2 = j + *k; i__1 = j + 1; for (i__ = f2c_min(i__4,i__2); i__ >= i__1; --i__) { i__4 = l + i__ + j * a_dim1; i__2 = ix; bli_csets( (bli_creal(a[i__4]) * bli_creal(x[i__2]) - bli_cimag(a[i__4]) * bli_cimag(x[i__2])), (bli_creal(a[i__4]) * bli_cimag(x[i__2]) + bli_cimag(a[i__4]) * bli_creal(x[i__2])), q__2 ); bli_csets( (bli_creal(temp) - bli_creal(q__2)), (bli_cimag(temp) - bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); ix -= *incx; /* L180: */ } if (nounit) { bla_c_div(&q__1, &temp, &a[j * a_dim1 + 1]); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } } else { /* Computing MIN */ i__1 = *n, i__4 = j + *k; i__2 = j + 1; for (i__ = f2c_min(i__1,i__4); i__ >= i__2; --i__) { bla_r_cnjg(&q__3, &a[l + i__ + j * a_dim1]); i__1 = ix; bli_csets( (bli_creal(q__3) * bli_creal(x[i__1]) - bli_cimag(q__3) * bli_cimag(x[i__1])), (bli_creal(q__3) * bli_cimag(x[i__1]) + bli_cimag(q__3) * bli_creal(x[i__1])), q__2 ); bli_csets( (bli_creal(temp) - bli_creal(q__2)), (bli_cimag(temp) - bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); ix -= *incx; /* L190: */ } if (nounit) { bla_r_cnjg(&q__2, &a[j * a_dim1 + 1]); bla_c_div(&q__1, &temp, &q__2); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } } i__2 = jx; bli_csets( (bli_creal(temp)), (bli_cimag(temp)), x[i__2] ); jx -= *incx; if (*n - j >= *k) { kx -= *incx; } /* L200: */ } } } } return 0; /* End of CTBSV . */ } /* ctbsv_ */ /* dtbsv.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ /* Subroutine */ int PASTEF77(d,tbsv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_integer *k, const bla_double *a, const bla_integer *lda, bla_double *x, const bla_integer *incx) { /* System generated locals */ bla_integer a_dim1, a_offset, i__1, i__2, i__3, i__4; /* Local variables */ bla_integer info; bla_double temp; bla_integer i__, j, l; //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kplus1, ix, jx, kx = 0; //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); bla_logical nounit; /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DTBSV solves one of the systems of equations */ /* A*x = b, or A'*x = b, */ /* where b and x are n element vectors and A is an n by n unit, or */ /* non-unit, upper or lower triangular band matrix, with ( k + 1 ) */ /* diagonals. */ /* No test for singularity or near-singularity is included in this */ /* routine. Such tests must be performed before calling this routine. */ /* Parameters */ /* ========== */ /* UPLO - CHARACTER*1. */ /* On entry, UPLO specifies whether the matrix is an upper or */ /* lower triangular matrix as follows: */ /* UPLO = 'U' or 'u' A is an upper triangular matrix. */ /* UPLO = 'L' or 'l' A is a lower triangular matrix. */ /* Unchanged on exit. */ /* TRANS - CHARACTER*1. */ /* On entry, TRANS specifies the equations to be solved as */ /* follows: */ /* TRANS = 'N' or 'n' A*x = b. */ /* TRANS = 'T' or 't' A'*x = b. */ /* TRANS = 'C' or 'c' A'*x = b. */ /* Unchanged on exit. */ /* DIAG - CHARACTER*1. */ /* On entry, DIAG specifies whether or not A is unit */ /* triangular as follows: */ /* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ /* DIAG = 'N' or 'n' A is not assumed to be unit */ /* triangular. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the order of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* K - INTEGER. */ /* On entry with UPLO = 'U' or 'u', K specifies the number of */ /* super-diagonals of the matrix A. */ /* On entry with UPLO = 'L' or 'l', K specifies the number of */ /* sub-diagonals of the matrix A. */ /* K must satisfy 0 .le. K. */ /* Unchanged on exit. */ /* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */ /* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */ /* by n part of the array A must contain the upper triangular */ /* band part of the matrix of coefficients, supplied column by */ /* column, with the leading diagonal of the matrix in row */ /* ( k + 1 ) of the array, the first super-diagonal starting at */ /* position 2 in row k, and so on. The top left k by k triangle */ /* of the array A is not referenced. */ /* The following program segment will transfer an upper */ /* triangular band matrix from conventional full matrix storage */ /* to band storage: */ /* DO 20, J = 1, N */ /* M = K + 1 - J */ /* DO 10, I = MAX( 1, J - K ), J */ /* A( M + I, J ) = matrix( I, J ) */ /* 10 CONTINUE */ /* 20 CONTINUE */ /* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */ /* by n part of the array A must contain the lower triangular */ /* band part of the matrix of coefficients, supplied column by */ /* column, with the leading diagonal of the matrix in row 1 of */ /* the array, the first sub-diagonal starting at position 1 in */ /* row 2, and so on. The bottom right k by k triangle of the */ /* array A is not referenced. */ /* The following program segment will transfer a lower */ /* triangular band matrix from conventional full matrix storage */ /* to band storage: */ /* DO 20, J = 1, N */ /* M = 1 - J */ /* DO 10, I = J, MIN( N, J + K ) */ /* A( M + I, J ) = matrix( I, J ) */ /* 10 CONTINUE */ /* 20 CONTINUE */ /* Note that when DIAG = 'U' or 'u' the elements of the array A */ /* corresponding to the diagonal elements of the matrix are not */ /* referenced, but are assumed to be unity. */ /* Unchanged on exit. */ /* LDA - INTEGER. */ /* On entry, LDA specifies the first dimension of A as declared */ /* in the calling (sub) program. LDA must be at least */ /* ( k + 1 ). */ /* Unchanged on exit. */ /* X - DOUBLE PRECISION array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ). */ /* Before entry, the incremented array X must contain the n */ /* element right-hand side vector b. On exit, X is overwritten */ /* with the solution vector x. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; --x; /* Function Body */ info = 0; if (! PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(uplo, "L", ( ftnlen)1, (ftnlen)1)) { info = 1; } else if (! PASTEF770(lsame)(trans, "N", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(trans, "T", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(trans, "C", (ftnlen)1, ( ftnlen)1)) { info = 2; } else if (! PASTEF770(lsame)(diag, "U", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(diag, "N", (ftnlen)1, (ftnlen)1)) { info = 3; } else if (*n < 0) { info = 4; } else if (*k < 0) { info = 5; } else if (*lda < *k + 1) { info = 7; } else if (*incx == 0) { info = 9; } if (info != 0) { PASTEF770(xerbla)("DTBSV ", &info, (ftnlen)6); return 0; } /* Quick return if possible. */ if (*n == 0) { return 0; } nounit = PASTEF770(lsame)(diag, "N", (ftnlen)1, (ftnlen)1); /* Set up the start point in X if the increment is not unity. This */ /* will be ( N - 1 )*INCX too small for descending loops. */ if (*incx <= 0) { kx = 1 - (*n - 1) * *incx; } else if (*incx != 1) { kx = 1; } /* Start the operations. In this version the elements of A are */ /* accessed by sequentially with one pass through A. */ if (PASTEF770(lsame)(trans, "N", (ftnlen)1, (ftnlen)1)) { /* Form x := inv( A )*x. */ if (PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1)) { kplus1 = *k + 1; if (*incx == 1) { for (j = *n; j >= 1; --j) { if (x[j] != 0.) { l = kplus1 - j; if (nounit) { x[j] /= a[kplus1 + j * a_dim1]; } temp = x[j]; /* Computing MAX */ i__2 = 1, i__3 = j - *k; i__1 = f2c_max(i__2,i__3); for (i__ = j - 1; i__ >= i__1; --i__) { x[i__] -= temp * a[l + i__ + j * a_dim1]; /* L10: */ } } /* L20: */ } } else { kx += (*n - 1) * *incx; jx = kx; for (j = *n; j >= 1; --j) { kx -= *incx; if (x[jx] != 0.) { ix = kx; l = kplus1 - j; if (nounit) { x[jx] /= a[kplus1 + j * a_dim1]; } temp = x[jx]; /* Computing MAX */ i__2 = 1, i__3 = j - *k; i__1 = f2c_max(i__2,i__3); for (i__ = j - 1; i__ >= i__1; --i__) { x[ix] -= temp * a[l + i__ + j * a_dim1]; ix -= *incx; /* L30: */ } } jx -= *incx; /* L40: */ } } } else { if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[j] != 0.) { l = 1 - j; if (nounit) { x[j] /= a[j * a_dim1 + 1]; } temp = x[j]; /* Computing MIN */ i__3 = *n, i__4 = j + *k; i__2 = f2c_min(i__3,i__4); for (i__ = j + 1; i__ <= i__2; ++i__) { x[i__] -= temp * a[l + i__ + j * a_dim1]; /* L50: */ } } /* L60: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { kx += *incx; if (x[jx] != 0.) { ix = kx; l = 1 - j; if (nounit) { x[jx] /= a[j * a_dim1 + 1]; } temp = x[jx]; /* Computing MIN */ i__3 = *n, i__4 = j + *k; i__2 = f2c_min(i__3,i__4); for (i__ = j + 1; i__ <= i__2; ++i__) { x[ix] -= temp * a[l + i__ + j * a_dim1]; ix += *incx; /* L70: */ } } jx += *incx; /* L80: */ } } } } else { /* Form x := inv( A')*x. */ if (PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1)) { kplus1 = *k + 1; if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { temp = x[j]; l = kplus1 - j; /* Computing MAX */ i__2 = 1, i__3 = j - *k; i__4 = j - 1; for (i__ = f2c_max(i__2,i__3); i__ <= i__4; ++i__) { temp -= a[l + i__ + j * a_dim1] * x[i__]; /* L90: */ } if (nounit) { temp /= a[kplus1 + j * a_dim1]; } x[j] = temp; /* L100: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { temp = x[jx]; ix = kx; l = kplus1 - j; /* Computing MAX */ i__4 = 1, i__2 = j - *k; i__3 = j - 1; for (i__ = f2c_max(i__4,i__2); i__ <= i__3; ++i__) { temp -= a[l + i__ + j * a_dim1] * x[ix]; ix += *incx; /* L110: */ } if (nounit) { temp /= a[kplus1 + j * a_dim1]; } x[jx] = temp; jx += *incx; if (j > *k) { kx += *incx; } /* L120: */ } } } else { if (*incx == 1) { for (j = *n; j >= 1; --j) { temp = x[j]; l = 1 - j; /* Computing MIN */ i__1 = *n, i__3 = j + *k; i__4 = j + 1; for (i__ = f2c_min(i__1,i__3); i__ >= i__4; --i__) { temp -= a[l + i__ + j * a_dim1] * x[i__]; /* L130: */ } if (nounit) { temp /= a[j * a_dim1 + 1]; } x[j] = temp; /* L140: */ } } else { kx += (*n - 1) * *incx; jx = kx; for (j = *n; j >= 1; --j) { temp = x[jx]; ix = kx; l = 1 - j; /* Computing MIN */ i__4 = *n, i__1 = j + *k; i__3 = j + 1; for (i__ = f2c_min(i__4,i__1); i__ >= i__3; --i__) { temp -= a[l + i__ + j * a_dim1] * x[ix]; ix -= *incx; /* L150: */ } if (nounit) { temp /= a[j * a_dim1 + 1]; } x[jx] = temp; jx -= *incx; if (*n - j >= *k) { kx -= *incx; } /* L160: */ } } } } return 0; /* End of DTBSV . */ } /* dtbsv_ */ /* stbsv.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ /* Subroutine */ int PASTEF77(s,tbsv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_integer *k, const bla_real *a, const bla_integer *lda, bla_real *x, const bla_integer *incx) { /* System generated locals */ bla_integer a_dim1, a_offset, i__1, i__2, i__3, i__4; /* Local variables */ bla_integer info; bla_real temp; bla_integer i__, j, l; //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kplus1, ix, jx, kx = 0; //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); bla_logical nounit; /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* STBSV solves one of the systems of equations */ /* A*x = b, or A'*x = b, */ /* where b and x are n element vectors and A is an n by n unit, or */ /* non-unit, upper or lower triangular band matrix, with ( k + 1 ) */ /* diagonals. */ /* No test for singularity or near-singularity is included in this */ /* routine. Such tests must be performed before calling this routine. */ /* Parameters */ /* ========== */ /* UPLO - CHARACTER*1. */ /* On entry, UPLO specifies whether the matrix is an upper or */ /* lower triangular matrix as follows: */ /* UPLO = 'U' or 'u' A is an upper triangular matrix. */ /* UPLO = 'L' or 'l' A is a lower triangular matrix. */ /* Unchanged on exit. */ /* TRANS - CHARACTER*1. */ /* On entry, TRANS specifies the equations to be solved as */ /* follows: */ /* TRANS = 'N' or 'n' A*x = b. */ /* TRANS = 'T' or 't' A'*x = b. */ /* TRANS = 'C' or 'c' A'*x = b. */ /* Unchanged on exit. */ /* DIAG - CHARACTER*1. */ /* On entry, DIAG specifies whether or not A is unit */ /* triangular as follows: */ /* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ /* DIAG = 'N' or 'n' A is not assumed to be unit */ /* triangular. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the order of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* K - INTEGER. */ /* On entry with UPLO = 'U' or 'u', K specifies the number of */ /* super-diagonals of the matrix A. */ /* On entry with UPLO = 'L' or 'l', K specifies the number of */ /* sub-diagonals of the matrix A. */ /* K must satisfy 0 .le. K. */ /* Unchanged on exit. */ /* A - REAL array of DIMENSION ( LDA, n ). */ /* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */ /* by n part of the array A must contain the upper triangular */ /* band part of the matrix of coefficients, supplied column by */ /* column, with the leading diagonal of the matrix in row */ /* ( k + 1 ) of the array, the first super-diagonal starting at */ /* position 2 in row k, and so on. The top left k by k triangle */ /* of the array A is not referenced. */ /* The following program segment will transfer an upper */ /* triangular band matrix from conventional full matrix storage */ /* to band storage: */ /* DO 20, J = 1, N */ /* M = K + 1 - J */ /* DO 10, I = MAX( 1, J - K ), J */ /* A( M + I, J ) = matrix( I, J ) */ /* 10 CONTINUE */ /* 20 CONTINUE */ /* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */ /* by n part of the array A must contain the lower triangular */ /* band part of the matrix of coefficients, supplied column by */ /* column, with the leading diagonal of the matrix in row 1 of */ /* the array, the first sub-diagonal starting at position 1 in */ /* row 2, and so on. The bottom right k by k triangle of the */ /* array A is not referenced. */ /* The following program segment will transfer a lower */ /* triangular band matrix from conventional full matrix storage */ /* to band storage: */ /* DO 20, J = 1, N */ /* M = 1 - J */ /* DO 10, I = J, MIN( N, J + K ) */ /* A( M + I, J ) = matrix( I, J ) */ /* 10 CONTINUE */ /* 20 CONTINUE */ /* Note that when DIAG = 'U' or 'u' the elements of the array A */ /* corresponding to the diagonal elements of the matrix are not */ /* referenced, but are assumed to be unity. */ /* Unchanged on exit. */ /* LDA - INTEGER. */ /* On entry, LDA specifies the first dimension of A as declared */ /* in the calling (sub) program. LDA must be at least */ /* ( k + 1 ). */ /* Unchanged on exit. */ /* X - REAL array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ). */ /* Before entry, the incremented array X must contain the n */ /* element right-hand side vector b. On exit, X is overwritten */ /* with the solution vector x. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; --x; /* Function Body */ info = 0; if (! PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(uplo, "L", ( ftnlen)1, (ftnlen)1)) { info = 1; } else if (! PASTEF770(lsame)(trans, "N", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(trans, "T", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(trans, "C", (ftnlen)1, ( ftnlen)1)) { info = 2; } else if (! PASTEF770(lsame)(diag, "U", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(diag, "N", (ftnlen)1, (ftnlen)1)) { info = 3; } else if (*n < 0) { info = 4; } else if (*k < 0) { info = 5; } else if (*lda < *k + 1) { info = 7; } else if (*incx == 0) { info = 9; } if (info != 0) { PASTEF770(xerbla)("STBSV ", &info, (ftnlen)6); return 0; } /* Quick return if possible. */ if (*n == 0) { return 0; } nounit = PASTEF770(lsame)(diag, "N", (ftnlen)1, (ftnlen)1); /* Set up the start point in X if the increment is not unity. This */ /* will be ( N - 1 )*INCX too small for descending loops. */ if (*incx <= 0) { kx = 1 - (*n - 1) * *incx; } else if (*incx != 1) { kx = 1; } /* Start the operations. In this version the elements of A are */ /* accessed by sequentially with one pass through A. */ if (PASTEF770(lsame)(trans, "N", (ftnlen)1, (ftnlen)1)) { /* Form x := inv( A )*x. */ if (PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1)) { kplus1 = *k + 1; if (*incx == 1) { for (j = *n; j >= 1; --j) { if (x[j] != 0.f) { l = kplus1 - j; if (nounit) { x[j] /= a[kplus1 + j * a_dim1]; } temp = x[j]; /* Computing MAX */ i__2 = 1, i__3 = j - *k; i__1 = f2c_max(i__2,i__3); for (i__ = j - 1; i__ >= i__1; --i__) { x[i__] -= temp * a[l + i__ + j * a_dim1]; /* L10: */ } } /* L20: */ } } else { kx += (*n - 1) * *incx; jx = kx; for (j = *n; j >= 1; --j) { kx -= *incx; if (x[jx] != 0.f) { ix = kx; l = kplus1 - j; if (nounit) { x[jx] /= a[kplus1 + j * a_dim1]; } temp = x[jx]; /* Computing MAX */ i__2 = 1, i__3 = j - *k; i__1 = f2c_max(i__2,i__3); for (i__ = j - 1; i__ >= i__1; --i__) { x[ix] -= temp * a[l + i__ + j * a_dim1]; ix -= *incx; /* L30: */ } } jx -= *incx; /* L40: */ } } } else { if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[j] != 0.f) { l = 1 - j; if (nounit) { x[j] /= a[j * a_dim1 + 1]; } temp = x[j]; /* Computing MIN */ i__3 = *n, i__4 = j + *k; i__2 = f2c_min(i__3,i__4); for (i__ = j + 1; i__ <= i__2; ++i__) { x[i__] -= temp * a[l + i__ + j * a_dim1]; /* L50: */ } } /* L60: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { kx += *incx; if (x[jx] != 0.f) { ix = kx; l = 1 - j; if (nounit) { x[jx] /= a[j * a_dim1 + 1]; } temp = x[jx]; /* Computing MIN */ i__3 = *n, i__4 = j + *k; i__2 = f2c_min(i__3,i__4); for (i__ = j + 1; i__ <= i__2; ++i__) { x[ix] -= temp * a[l + i__ + j * a_dim1]; ix += *incx; /* L70: */ } } jx += *incx; /* L80: */ } } } } else { /* Form x := inv( A')*x. */ if (PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1)) { kplus1 = *k + 1; if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { temp = x[j]; l = kplus1 - j; /* Computing MAX */ i__2 = 1, i__3 = j - *k; i__4 = j - 1; for (i__ = f2c_max(i__2,i__3); i__ <= i__4; ++i__) { temp -= a[l + i__ + j * a_dim1] * x[i__]; /* L90: */ } if (nounit) { temp /= a[kplus1 + j * a_dim1]; } x[j] = temp; /* L100: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { temp = x[jx]; ix = kx; l = kplus1 - j; /* Computing MAX */ i__4 = 1, i__2 = j - *k; i__3 = j - 1; for (i__ = f2c_max(i__4,i__2); i__ <= i__3; ++i__) { temp -= a[l + i__ + j * a_dim1] * x[ix]; ix += *incx; /* L110: */ } if (nounit) { temp /= a[kplus1 + j * a_dim1]; } x[jx] = temp; jx += *incx; if (j > *k) { kx += *incx; } /* L120: */ } } } else { if (*incx == 1) { for (j = *n; j >= 1; --j) { temp = x[j]; l = 1 - j; /* Computing MIN */ i__1 = *n, i__3 = j + *k; i__4 = j + 1; for (i__ = f2c_min(i__1,i__3); i__ >= i__4; --i__) { temp -= a[l + i__ + j * a_dim1] * x[i__]; /* L130: */ } if (nounit) { temp /= a[j * a_dim1 + 1]; } x[j] = temp; /* L140: */ } } else { kx += (*n - 1) * *incx; jx = kx; for (j = *n; j >= 1; --j) { temp = x[jx]; ix = kx; l = 1 - j; /* Computing MIN */ i__4 = *n, i__1 = j + *k; i__3 = j + 1; for (i__ = f2c_min(i__4,i__1); i__ >= i__3; --i__) { temp -= a[l + i__ + j * a_dim1] * x[ix]; ix -= *incx; /* L150: */ } if (nounit) { temp /= a[j * a_dim1 + 1]; } x[jx] = temp; jx -= *incx; if (*n - j >= *k) { kx -= *incx; } /* L160: */ } } } } return 0; /* End of STBSV . */ } /* stbsv_ */ /* ztbsv.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ /* Subroutine */ int PASTEF77(z,tbsv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_integer *k, const bla_dcomplex *a, const bla_integer *lda, bla_dcomplex *x, const bla_integer *incx) { /* System generated locals */ bla_integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; bla_dcomplex z__1, z__2, z__3; /* Builtin functions */ //void bla_z_div(bla_dcomplex *, bla_dcomplex *, bla_dcomplex *), bla_d_cnjg( // bla_dcomplex *, bla_dcomplex *); /* Local variables */ bla_integer info; bla_dcomplex temp; bla_integer i__, j, l; //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kplus1, ix, jx, kx = 0; //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); bla_logical noconj, nounit; /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* ZTBSV solves one of the systems of equations */ /* A*x = b, or A'*x = b, or conjg( A' )*x = b, */ /* where b and x are n element vectors and A is an n by n unit, or */ /* non-unit, upper or lower triangular band matrix, with ( k + 1 ) */ /* diagonals. */ /* No test for singularity or near-singularity is included in this */ /* routine. Such tests must be performed before calling this routine. */ /* Parameters */ /* ========== */ /* UPLO - CHARACTER*1. */ /* On entry, UPLO specifies whether the matrix is an upper or */ /* lower triangular matrix as follows: */ /* UPLO = 'U' or 'u' A is an upper triangular matrix. */ /* UPLO = 'L' or 'l' A is a lower triangular matrix. */ /* Unchanged on exit. */ /* TRANS - CHARACTER*1. */ /* On entry, TRANS specifies the equations to be solved as */ /* follows: */ /* TRANS = 'N' or 'n' A*x = b. */ /* TRANS = 'T' or 't' A'*x = b. */ /* TRANS = 'C' or 'c' conjg( A' )*x = b. */ /* Unchanged on exit. */ /* DIAG - CHARACTER*1. */ /* On entry, DIAG specifies whether or not A is unit */ /* triangular as follows: */ /* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ /* DIAG = 'N' or 'n' A is not assumed to be unit */ /* triangular. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the order of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* K - INTEGER. */ /* On entry with UPLO = 'U' or 'u', K specifies the number of */ /* super-diagonals of the matrix A. */ /* On entry with UPLO = 'L' or 'l', K specifies the number of */ /* sub-diagonals of the matrix A. */ /* K must satisfy 0 .le. K. */ /* Unchanged on exit. */ /* A - COMPLEX*16 array of DIMENSION ( LDA, n ). */ /* Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */ /* by n part of the array A must contain the upper triangular */ /* band part of the matrix of coefficients, supplied column by */ /* column, with the leading diagonal of the matrix in row */ /* ( k + 1 ) of the array, the first super-diagonal starting at */ /* position 2 in row k, and so on. The top left k by k triangle */ /* of the array A is not referenced. */ /* The following program segment will transfer an upper */ /* triangular band matrix from conventional full matrix storage */ /* to band storage: */ /* DO 20, J = 1, N */ /* M = K + 1 - J */ /* DO 10, I = MAX( 1, J - K ), J */ /* A( M + I, J ) = matrix( I, J ) */ /* 10 CONTINUE */ /* 20 CONTINUE */ /* Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */ /* by n part of the array A must contain the lower triangular */ /* band part of the matrix of coefficients, supplied column by */ /* column, with the leading diagonal of the matrix in row 1 of */ /* the array, the first sub-diagonal starting at position 1 in */ /* row 2, and so on. The bottom right k by k triangle of the */ /* array A is not referenced. */ /* The following program segment will transfer a lower */ /* triangular band matrix from conventional full matrix storage */ /* to band storage: */ /* DO 20, J = 1, N */ /* M = 1 - J */ /* DO 10, I = J, MIN( N, J + K ) */ /* A( M + I, J ) = matrix( I, J ) */ /* 10 CONTINUE */ /* 20 CONTINUE */ /* Note that when DIAG = 'U' or 'u' the elements of the array A */ /* corresponding to the diagonal elements of the matrix are not */ /* referenced, but are assumed to be unity. */ /* Unchanged on exit. */ /* LDA - INTEGER. */ /* On entry, LDA specifies the first dimension of A as declared */ /* in the calling (sub) program. LDA must be at least */ /* ( k + 1 ). */ /* Unchanged on exit. */ /* X - COMPLEX*16 array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ). */ /* Before entry, the incremented array X must contain the n */ /* element right-hand side vector b. On exit, X is overwritten */ /* with the solution vector x. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; --x; /* Function Body */ info = 0; if (! PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(uplo, "L", ( ftnlen)1, (ftnlen)1)) { info = 1; } else if (! PASTEF770(lsame)(trans, "N", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(trans, "T", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(trans, "C", (ftnlen)1, ( ftnlen)1)) { info = 2; } else if (! PASTEF770(lsame)(diag, "U", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(diag, "N", (ftnlen)1, (ftnlen)1)) { info = 3; } else if (*n < 0) { info = 4; } else if (*k < 0) { info = 5; } else if (*lda < *k + 1) { info = 7; } else if (*incx == 0) { info = 9; } if (info != 0) { PASTEF770(xerbla)("ZTBSV ", &info, (ftnlen)6); return 0; } /* Quick return if possible. */ if (*n == 0) { return 0; } noconj = PASTEF770(lsame)(trans, "T", (ftnlen)1, (ftnlen)1); nounit = PASTEF770(lsame)(diag, "N", (ftnlen)1, (ftnlen)1); /* Set up the start point in X if the increment is not unity. This */ /* will be ( N - 1 )*INCX too small for descending loops. */ if (*incx <= 0) { kx = 1 - (*n - 1) * *incx; } else if (*incx != 1) { kx = 1; } /* Start the operations. In this version the elements of A are */ /* accessed by sequentially with one pass through A. */ if (PASTEF770(lsame)(trans, "N", (ftnlen)1, (ftnlen)1)) { /* Form x := inv( A )*x. */ if (PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1)) { kplus1 = *k + 1; if (*incx == 1) { for (j = *n; j >= 1; --j) { i__1 = j; if (bli_zreal(x[i__1]) != 0. || bli_zimag(x[i__1]) != 0.) { l = kplus1 - j; if (nounit) { i__1 = j; bla_z_div(&z__1, &x[j], &a[kplus1 + j * a_dim1]); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__1] ); } i__1 = j; bli_zsets( (bli_zreal(x[i__1])), (bli_zimag(x[i__1])), temp ); /* Computing MAX */ i__2 = 1, i__3 = j - *k; i__1 = f2c_max(i__2,i__3); for (i__ = j - 1; i__ >= i__1; --i__) { i__2 = i__; i__3 = i__; i__4 = l + i__ + j * a_dim1; bli_zsets( (bli_zreal(temp) * bli_zreal(a[i__4]) - bli_zimag(temp) * bli_zimag(a[i__4])), (bli_zreal(temp) * bli_zimag(a[i__4]) + bli_zimag(temp) * bli_zreal(a[i__4])), z__2 ); bli_zsets( (bli_zreal(x[i__3]) - bli_zreal(z__2)), (bli_zimag(x[i__3]) - bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__2] ); /* L10: */ } } /* L20: */ } } else { kx += (*n - 1) * *incx; jx = kx; for (j = *n; j >= 1; --j) { kx -= *incx; i__1 = jx; if (bli_zreal(x[i__1]) != 0. || bli_zimag(x[i__1]) != 0.) { ix = kx; l = kplus1 - j; if (nounit) { i__1 = jx; bla_z_div(&z__1, &x[jx], &a[kplus1 + j * a_dim1]); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__1] ); } i__1 = jx; bli_zsets( (bli_zreal(x[i__1])), (bli_zimag(x[i__1])), temp ); /* Computing MAX */ i__2 = 1, i__3 = j - *k; i__1 = f2c_max(i__2,i__3); for (i__ = j - 1; i__ >= i__1; --i__) { i__2 = ix; i__3 = ix; i__4 = l + i__ + j * a_dim1; bli_zsets( (bli_zreal(temp) * bli_zreal(a[i__4]) - bli_zimag(temp) * bli_zimag(a[i__4])), (bli_zreal(temp) * bli_zimag(a[i__4]) + bli_zimag(temp) * bli_zreal(a[i__4])), z__2 ); bli_zsets( (bli_zreal(x[i__3]) - bli_zreal(z__2)), (bli_zimag(x[i__3]) - bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__2] ); ix -= *incx; /* L30: */ } } jx -= *incx; /* L40: */ } } } else { if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j; if (bli_zreal(x[i__2]) != 0. || bli_zimag(x[i__2]) != 0.) { l = 1 - j; if (nounit) { i__2 = j; bla_z_div(&z__1, &x[j], &a[j * a_dim1 + 1]); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__2] ); } i__2 = j; bli_zsets( (bli_zreal(x[i__2])), (bli_zimag(x[i__2])), temp ); /* Computing MIN */ i__3 = *n, i__4 = j + *k; i__2 = f2c_min(i__3,i__4); for (i__ = j + 1; i__ <= i__2; ++i__) { i__3 = i__; i__4 = i__; i__5 = l + i__ + j * a_dim1; bli_zsets( (bli_zreal(temp) * bli_zreal(a[i__5]) - bli_zimag(temp) * bli_zimag(a[i__5])), (bli_zreal(temp) * bli_zimag(a[i__5]) + bli_zimag(temp) * bli_zreal(a[i__5])), z__2 ); bli_zsets( (bli_zreal(x[i__4]) - bli_zreal(z__2)), (bli_zimag(x[i__4]) - bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__3] ); /* L50: */ } } /* L60: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { kx += *incx; i__2 = jx; if (bli_zreal(x[i__2]) != 0. || bli_zimag(x[i__2]) != 0.) { ix = kx; l = 1 - j; if (nounit) { i__2 = jx; bla_z_div(&z__1, &x[jx], &a[j * a_dim1 + 1]); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__2] ); } i__2 = jx; bli_zsets( (bli_zreal(x[i__2])), (bli_zimag(x[i__2])), temp ); /* Computing MIN */ i__3 = *n, i__4 = j + *k; i__2 = f2c_min(i__3,i__4); for (i__ = j + 1; i__ <= i__2; ++i__) { i__3 = ix; i__4 = ix; i__5 = l + i__ + j * a_dim1; bli_zsets( (bli_zreal(temp) * bli_zreal(a[i__5]) - bli_zimag(temp) * bli_zimag(a[i__5])), (bli_zreal(temp) * bli_zimag(a[i__5]) + bli_zimag(temp) * bli_zreal(a[i__5])), z__2 ); bli_zsets( (bli_zreal(x[i__4]) - bli_zreal(z__2)), (bli_zimag(x[i__4]) - bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__3] ); ix += *incx; /* L70: */ } } jx += *incx; /* L80: */ } } } } else { /* Form x := inv( A' )*x or x := inv( conjg( A') )*x. */ if (PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1)) { kplus1 = *k + 1; if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j; bli_zsets( (bli_zreal(x[i__2])), (bli_zimag(x[i__2])), temp ); l = kplus1 - j; if (noconj) { /* Computing MAX */ i__2 = 1, i__3 = j - *k; i__4 = j - 1; for (i__ = f2c_max(i__2,i__3); i__ <= i__4; ++i__) { i__2 = l + i__ + j * a_dim1; i__3 = i__; bli_zsets( (bli_zreal(a[i__2]) * bli_zreal(x[i__3]) - bli_zimag(a[i__2]) * bli_zimag(x[i__3])), (bli_zreal(a[i__2]) * bli_zimag(x[i__3]) + bli_zimag(a[i__2]) * bli_zreal(x[i__3])), z__2 ); bli_zsets( (bli_zreal(temp) - bli_zreal(z__2)), (bli_zimag(temp) - bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); /* L90: */ } if (nounit) { bla_z_div(&z__1, &temp, &a[kplus1 + j * a_dim1]); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } } else { /* Computing MAX */ i__4 = 1, i__2 = j - *k; i__3 = j - 1; for (i__ = f2c_max(i__4,i__2); i__ <= i__3; ++i__) { bla_d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); i__4 = i__; bli_zsets( (bli_zreal(z__3) * bli_zreal(x[i__4]) - bli_zimag(z__3) * bli_zimag(x[i__4])), (bli_zreal(z__3) * bli_zimag(x[i__4]) + bli_zimag(z__3) * bli_zreal(x[i__4])), z__2 ); bli_zsets( (bli_zreal(temp) - bli_zreal(z__2)), (bli_zimag(temp) - bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); /* L100: */ } if (nounit) { bla_d_cnjg(&z__2, &a[kplus1 + j * a_dim1]); bla_z_div(&z__1, &temp, &z__2); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } } i__3 = j; bli_zsets( (bli_zreal(temp)), (bli_zimag(temp)), x[i__3] ); /* L110: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { i__3 = jx; bli_zsets( (bli_zreal(x[i__3])), (bli_zimag(x[i__3])), temp ); ix = kx; l = kplus1 - j; if (noconj) { /* Computing MAX */ i__3 = 1, i__4 = j - *k; i__2 = j - 1; for (i__ = f2c_max(i__3,i__4); i__ <= i__2; ++i__) { i__3 = l + i__ + j * a_dim1; i__4 = ix; bli_zsets( (bli_zreal(a[i__3]) * bli_zreal(x[i__4]) - bli_zimag(a[i__3]) * bli_zimag(x[i__4])), (bli_zreal(a[i__3]) * bli_zimag(x[i__4]) + bli_zimag(a[i__3]) * bli_zreal(x[i__4])), z__2 ); bli_zsets( (bli_zreal(temp) - bli_zreal(z__2)), (bli_zimag(temp) - bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); ix += *incx; /* L120: */ } if (nounit) { bla_z_div(&z__1, &temp, &a[kplus1 + j * a_dim1]); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } } else { /* Computing MAX */ i__2 = 1, i__3 = j - *k; i__4 = j - 1; for (i__ = f2c_max(i__2,i__3); i__ <= i__4; ++i__) { bla_d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); i__2 = ix; bli_zsets( (bli_zreal(z__3) * bli_zreal(x[i__2]) - bli_zimag(z__3) * bli_zimag(x[i__2])), (bli_zreal(z__3) * bli_zimag(x[i__2]) + bli_zimag(z__3) * bli_zreal(x[i__2])), z__2 ); bli_zsets( (bli_zreal(temp) - bli_zreal(z__2)), (bli_zimag(temp) - bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); ix += *incx; /* L130: */ } if (nounit) { bla_d_cnjg(&z__2, &a[kplus1 + j * a_dim1]); bla_z_div(&z__1, &temp, &z__2); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } } i__4 = jx; bli_zsets( (bli_zreal(temp)), (bli_zimag(temp)), x[i__4] ); jx += *incx; if (j > *k) { kx += *incx; } /* L140: */ } } } else { if (*incx == 1) { for (j = *n; j >= 1; --j) { i__1 = j; bli_zsets( (bli_zreal(x[i__1])), (bli_zimag(x[i__1])), temp ); l = 1 - j; if (noconj) { /* Computing MIN */ i__1 = *n, i__4 = j + *k; i__2 = j + 1; for (i__ = f2c_min(i__1,i__4); i__ >= i__2; --i__) { i__1 = l + i__ + j * a_dim1; i__4 = i__; bli_zsets( (bli_zreal(a[i__1]) * bli_zreal(x[i__4]) - bli_zimag(a[i__1]) * bli_zimag(x[i__4])), (bli_zreal(a[i__1]) * bli_zimag(x[i__4]) + bli_zimag(a[i__1]) * bli_zreal(x[i__4])), z__2 ); bli_zsets( (bli_zreal(temp) - bli_zreal(z__2)), (bli_zimag(temp) - bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); /* L150: */ } if (nounit) { bla_z_div(&z__1, &temp, &a[j * a_dim1 + 1]); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } } else { /* Computing MIN */ i__2 = *n, i__1 = j + *k; i__4 = j + 1; for (i__ = f2c_min(i__2,i__1); i__ >= i__4; --i__) { bla_d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); i__2 = i__; bli_zsets( (bli_zreal(z__3) * bli_zreal(x[i__2]) - bli_zimag(z__3) * bli_zimag(x[i__2])), (bli_zreal(z__3) * bli_zimag(x[i__2]) + bli_zimag(z__3) * bli_zreal(x[i__2])), z__2 ); bli_zsets( (bli_zreal(temp) - bli_zreal(z__2)), (bli_zimag(temp) - bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); /* L160: */ } if (nounit) { bla_d_cnjg(&z__2, &a[j * a_dim1 + 1]); bla_z_div(&z__1, &temp, &z__2); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } } i__4 = j; bli_zsets( (bli_zreal(temp)), (bli_zimag(temp)), x[i__4] ); /* L170: */ } } else { kx += (*n - 1) * *incx; jx = kx; for (j = *n; j >= 1; --j) { i__4 = jx; bli_zsets( (bli_zreal(x[i__4])), (bli_zimag(x[i__4])), temp ); ix = kx; l = 1 - j; if (noconj) { /* Computing MIN */ i__4 = *n, i__2 = j + *k; i__1 = j + 1; for (i__ = f2c_min(i__4,i__2); i__ >= i__1; --i__) { i__4 = l + i__ + j * a_dim1; i__2 = ix; bli_zsets( (bli_zreal(a[i__4]) * bli_zreal(x[i__2]) - bli_zimag(a[i__4]) * bli_zimag(x[i__2])), (bli_zreal(a[i__4]) * bli_zimag(x[i__2]) + bli_zimag(a[i__4]) * bli_zreal(x[i__2])), z__2 ); bli_zsets( (bli_zreal(temp) - bli_zreal(z__2)), (bli_zimag(temp) - bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); ix -= *incx; /* L180: */ } if (nounit) { bla_z_div(&z__1, &temp, &a[j * a_dim1 + 1]); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } } else { /* Computing MIN */ i__1 = *n, i__4 = j + *k; i__2 = j + 1; for (i__ = f2c_min(i__1,i__4); i__ >= i__2; --i__) { bla_d_cnjg(&z__3, &a[l + i__ + j * a_dim1]); i__1 = ix; bli_zsets( (bli_zreal(z__3) * bli_zreal(x[i__1]) - bli_zimag(z__3) * bli_zimag(x[i__1])), (bli_zreal(z__3) * bli_zimag(x[i__1]) + bli_zimag(z__3) * bli_zreal(x[i__1])), z__2 ); bli_zsets( (bli_zreal(temp) - bli_zreal(z__2)), (bli_zimag(temp) - bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); ix -= *incx; /* L190: */ } if (nounit) { bla_d_cnjg(&z__2, &a[j * a_dim1 + 1]); bla_z_div(&z__1, &temp, &z__2); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } } i__2 = jx; bli_zsets( (bli_zreal(temp)), (bli_zimag(temp)), x[i__2] ); jx -= *incx; if (*n - j >= *k) { kx -= *incx; } /* L200: */ } } } } return 0; /* End of ZTBSV . */ } /* ztbsv_ */ #endif blis-0.6.1/frame/compat/f2c/bla_tbsv.h000066400000000000000000000052261360743507500174620ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef BLIS_ENABLE_BLAS BLIS_EXPORT_BLAS int PASTEF77(c,tbsv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_integer *k, const bla_scomplex *a, const bla_integer *lda, bla_scomplex *x, const bla_integer *incx); BLIS_EXPORT_BLAS int PASTEF77(d,tbsv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_integer *k, const bla_double *a, const bla_integer *lda, bla_double *x, const bla_integer *incx); BLIS_EXPORT_BLAS int PASTEF77(s,tbsv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_integer *k, const bla_real *a, const bla_integer *lda, bla_real *x, const bla_integer *incx); BLIS_EXPORT_BLAS int PASTEF77(z,tbsv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_integer *k, const bla_dcomplex *a, const bla_integer *lda, bla_dcomplex *x, const bla_integer *incx); #endif blis-0.6.1/frame/compat/f2c/bla_tpmv.c000066400000000000000000001550311360743507500174650ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #ifdef BLIS_ENABLE_BLAS /* ctpmv.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ /* Subroutine */ int PASTEF77(c,tpmv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_scomplex *ap, bla_scomplex *x, const bla_integer *incx) { /* System generated locals */ bla_integer i__1, i__2, i__3, i__4, i__5; bla_scomplex q__1, q__2, q__3; /* Builtin functions */ //void bla_r_cnjg(bla_scomplex *, bla_scomplex *); /* Local variables */ bla_integer info; bla_scomplex temp; bla_integer i__, j, k; //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kk, ix, jx, kx = 0; //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); bla_logical noconj, nounit; /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* CTPMV performs one of the matrix-vector operations */ /* x := A*x, or x := A'*x, or x := conjg( A' )*x, */ /* where x is an n element vector and A is an n by n unit, or non-unit, */ /* upper or lower triangular matrix, supplied in packed form. */ /* Parameters */ /* ========== */ /* UPLO - CHARACTER*1. */ /* On entry, UPLO specifies whether the matrix is an upper or */ /* lower triangular matrix as follows: */ /* UPLO = 'U' or 'u' A is an upper triangular matrix. */ /* UPLO = 'L' or 'l' A is a lower triangular matrix. */ /* Unchanged on exit. */ /* TRANS - CHARACTER*1. */ /* On entry, TRANS specifies the operation to be performed as */ /* follows: */ /* TRANS = 'N' or 'n' x := A*x. */ /* TRANS = 'T' or 't' x := A'*x. */ /* TRANS = 'C' or 'c' x := conjg( A' )*x. */ /* Unchanged on exit. */ /* DIAG - CHARACTER*1. */ /* On entry, DIAG specifies whether or not A is unit */ /* triangular as follows: */ /* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ /* DIAG = 'N' or 'n' A is not assumed to be unit */ /* triangular. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the order of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* AP - COMPLEX array of DIMENSION at least */ /* ( ( n*( n + 1 ) )/2 ). */ /* Before entry with UPLO = 'U' or 'u', the array AP must */ /* contain the upper triangular matrix packed sequentially, */ /* column by column, so that AP( 1 ) contains a( 1, 1 ), */ /* AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) */ /* respectively, and so on. */ /* Before entry with UPLO = 'L' or 'l', the array AP must */ /* contain the lower triangular matrix packed sequentially, */ /* column by column, so that AP( 1 ) contains a( 1, 1 ), */ /* AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) */ /* respectively, and so on. */ /* Note that when DIAG = 'U' or 'u', the diagonal elements of */ /* A are not referenced, but are assumed to be unity. */ /* Unchanged on exit. */ /* X - COMPLEX array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ). */ /* Before entry, the incremented array X must contain the n */ /* element vector x. On exit, X is overwritten with the */ /* tranformed vector x. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --x; --ap; /* Function Body */ info = 0; if (! PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(uplo, "L", ( ftnlen)1, (ftnlen)1)) { info = 1; } else if (! PASTEF770(lsame)(trans, "N", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(trans, "T", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(trans, "C", (ftnlen)1, ( ftnlen)1)) { info = 2; } else if (! PASTEF770(lsame)(diag, "U", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(diag, "N", (ftnlen)1, (ftnlen)1)) { info = 3; } else if (*n < 0) { info = 4; } else if (*incx == 0) { info = 7; } if (info != 0) { PASTEF770(xerbla)("CTPMV ", &info, (ftnlen)6); return 0; } /* Quick return if possible. */ if (*n == 0) { return 0; } noconj = PASTEF770(lsame)(trans, "T", (ftnlen)1, (ftnlen)1); nounit = PASTEF770(lsame)(diag, "N", (ftnlen)1, (ftnlen)1); /* Set up the start point in X if the increment is not unity. This */ /* will be ( N - 1 )*INCX too small for descending loops. */ if (*incx <= 0) { kx = 1 - (*n - 1) * *incx; } else if (*incx != 1) { kx = 1; } /* Start the operations. In this version the elements of AP are */ /* accessed sequentially with one pass through AP. */ if (PASTEF770(lsame)(trans, "N", (ftnlen)1, (ftnlen)1)) { /* Form x:= A*x. */ if (PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1)) { kk = 1; if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j; if (bli_creal(x[i__2]) != 0.f || bli_cimag(x[i__2]) != 0.f) { i__2 = j; bli_csets( (bli_creal(x[i__2])), (bli_cimag(x[i__2])), temp ); k = kk; i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__; i__4 = i__; i__5 = k; bli_csets( (bli_creal(temp) * bli_creal(ap[i__5]) - bli_cimag(temp) * bli_cimag(ap[i__5])), (bli_creal(temp) * bli_cimag(ap[i__5]) + bli_cimag(temp) * bli_creal(ap[i__5])), q__2 ); bli_csets( (bli_creal(x[i__4]) + bli_creal(q__2)), (bli_cimag(x[i__4]) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__3] ); ++k; /* L10: */ } if (nounit) { i__2 = j; i__3 = j; i__4 = kk + j - 1; bli_csets( (bli_creal(x[i__3]) * bli_creal(ap[i__4]) - bli_cimag(x[i__3]) * bli_cimag(ap[i__4])), (bli_creal(x[i__3]) * bli_cimag(ap[i__4]) + bli_cimag(x[i__3]) * bli_creal(ap[i__4])), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__2] ); } } kk += j; /* L20: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = jx; if (bli_creal(x[i__2]) != 0.f || bli_cimag(x[i__2]) != 0.f) { i__2 = jx; bli_csets( (bli_creal(x[i__2])), (bli_cimag(x[i__2])), temp ); ix = kx; i__2 = kk + j - 2; for (k = kk; k <= i__2; ++k) { i__3 = ix; i__4 = ix; i__5 = k; bli_csets( (bli_creal(temp) * bli_creal(ap[i__5]) - bli_cimag(temp) * bli_cimag(ap[i__5])), (bli_creal(temp) * bli_cimag(ap[i__5]) + bli_cimag(temp) * bli_creal(ap[i__5])), q__2 ); bli_csets( (bli_creal(x[i__4]) + bli_creal(q__2)), (bli_cimag(x[i__4]) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__3] ); ix += *incx; /* L30: */ } if (nounit) { i__2 = jx; i__3 = jx; i__4 = kk + j - 1; bli_csets( (bli_creal(x[i__3]) * bli_creal(ap[i__4]) - bli_cimag(x[i__3]) * bli_cimag(ap[i__4])), (bli_creal(x[i__3]) * bli_cimag(ap[i__4]) + bli_cimag(x[i__3]) * bli_creal(ap[i__4])), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__2] ); } } jx += *incx; kk += j; /* L40: */ } } } else { kk = *n * (*n + 1) / 2; if (*incx == 1) { for (j = *n; j >= 1; --j) { i__1 = j; if (bli_creal(x[i__1]) != 0.f || bli_cimag(x[i__1]) != 0.f) { i__1 = j; bli_csets( (bli_creal(x[i__1])), (bli_cimag(x[i__1])), temp ); k = kk; i__1 = j + 1; for (i__ = *n; i__ >= i__1; --i__) { i__2 = i__; i__3 = i__; i__4 = k; bli_csets( (bli_creal(temp) * bli_creal(ap[i__4]) - bli_cimag(temp) * bli_cimag(ap[i__4])), (bli_creal(temp) * bli_cimag(ap[i__4]) + bli_cimag(temp) * bli_creal(ap[i__4])), q__2 ); bli_csets( (bli_creal(x[i__3]) + bli_creal(q__2)), (bli_cimag(x[i__3]) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__2] ); --k; /* L50: */ } if (nounit) { i__1 = j; i__2 = j; i__3 = kk - *n + j; bli_csets( (bli_creal(x[i__2]) * bli_creal(ap[i__3]) - bli_cimag(x[i__2]) * bli_cimag(ap[i__3])), (bli_creal(x[i__2]) * bli_cimag(ap[i__3]) + bli_cimag(x[i__2]) * bli_creal(ap[i__3])), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__1] ); } } kk -= *n - j + 1; /* L60: */ } } else { kx += (*n - 1) * *incx; jx = kx; for (j = *n; j >= 1; --j) { i__1 = jx; if (bli_creal(x[i__1]) != 0.f || bli_cimag(x[i__1]) != 0.f) { i__1 = jx; bli_csets( (bli_creal(x[i__1])), (bli_cimag(x[i__1])), temp ); ix = kx; i__1 = kk - (*n - (j + 1)); for (k = kk; k >= i__1; --k) { i__2 = ix; i__3 = ix; i__4 = k; bli_csets( (bli_creal(temp) * bli_creal(ap[i__4]) - bli_cimag(temp) * bli_cimag(ap[i__4])), (bli_creal(temp) * bli_cimag(ap[i__4]) + bli_cimag(temp) * bli_creal(ap[i__4])), q__2 ); bli_csets( (bli_creal(x[i__3]) + bli_creal(q__2)), (bli_cimag(x[i__3]) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__2] ); ix -= *incx; /* L70: */ } if (nounit) { i__1 = jx; i__2 = jx; i__3 = kk - *n + j; bli_csets( (bli_creal(x[i__2]) * bli_creal(ap[i__3]) - bli_cimag(x[i__2]) * bli_cimag(ap[i__3])), (bli_creal(x[i__2]) * bli_cimag(ap[i__3]) + bli_cimag(x[i__2]) * bli_creal(ap[i__3])), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__1] ); } } jx -= *incx; kk -= *n - j + 1; /* L80: */ } } } } else { /* Form x := A'*x or x := conjg( A' )*x. */ if (PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1)) { kk = *n * (*n + 1) / 2; if (*incx == 1) { for (j = *n; j >= 1; --j) { i__1 = j; bli_csets( (bli_creal(x[i__1])), (bli_cimag(x[i__1])), temp ); k = kk - 1; if (noconj) { if (nounit) { i__1 = kk; bli_csets( (bli_creal(temp) * bli_creal(ap[i__1]) - bli_cimag(temp) * bli_cimag(ap[i__1])), (bli_creal(temp) * bli_cimag(ap[i__1]) + bli_cimag(temp) * bli_creal(ap[i__1])), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } for (i__ = j - 1; i__ >= 1; --i__) { i__1 = k; i__2 = i__; bli_csets( (bli_creal(ap[i__1]) * bli_creal(x[i__2]) - bli_cimag(ap[i__1]) * bli_cimag(x[i__2])), (bli_creal(ap[i__1]) * bli_cimag(x[i__2]) + bli_cimag(ap[i__1]) * bli_creal(x[i__2])), q__2 ); bli_csets( (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); --k; /* L90: */ } } else { if (nounit) { bla_r_cnjg(&q__2, &ap[kk]); bli_csets( (bli_creal(temp) * bli_creal(q__2) - bli_cimag(temp) * bli_cimag(q__2)), (bli_creal(temp) * bli_cimag(q__2) + bli_cimag(temp) * bli_creal(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } for (i__ = j - 1; i__ >= 1; --i__) { bla_r_cnjg(&q__3, &ap[k]); i__1 = i__; bli_csets( (bli_creal(q__3) * bli_creal(x[i__1]) - bli_cimag(q__3) * bli_cimag(x[i__1])), (bli_creal(q__3) * bli_cimag(x[i__1]) + bli_cimag(q__3) * bli_creal(x[i__1])), q__2 ); bli_csets( (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); --k; /* L100: */ } } i__1 = j; bli_csets( (bli_creal(temp)), (bli_cimag(temp)), x[i__1] ); kk -= j; /* L110: */ } } else { jx = kx + (*n - 1) * *incx; for (j = *n; j >= 1; --j) { i__1 = jx; bli_csets( (bli_creal(x[i__1])), (bli_cimag(x[i__1])), temp ); ix = jx; if (noconj) { if (nounit) { i__1 = kk; bli_csets( (bli_creal(temp) * bli_creal(ap[i__1]) - bli_cimag(temp) * bli_cimag(ap[i__1])), (bli_creal(temp) * bli_cimag(ap[i__1]) + bli_cimag(temp) * bli_creal(ap[i__1])), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } i__1 = kk - j + 1; for (k = kk - 1; k >= i__1; --k) { ix -= *incx; i__2 = k; i__3 = ix; bli_csets( (bli_creal(ap[i__2]) * bli_creal(x[i__3]) - bli_cimag(ap[i__2]) * bli_cimag(x[i__3])), (bli_creal(ap[i__2]) * bli_cimag(x[i__3]) + bli_cimag(ap[i__2]) * bli_creal(x[i__3])), q__2 ); bli_csets( (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); /* L120: */ } } else { if (nounit) { bla_r_cnjg(&q__2, &ap[kk]); bli_csets( (bli_creal(temp) * bli_creal(q__2) - bli_cimag(temp) * bli_cimag(q__2)), (bli_creal(temp) * bli_cimag(q__2) + bli_cimag(temp) * bli_creal(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } i__1 = kk - j + 1; for (k = kk - 1; k >= i__1; --k) { ix -= *incx; bla_r_cnjg(&q__3, &ap[k]); i__2 = ix; bli_csets( (bli_creal(q__3) * bli_creal(x[i__2]) - bli_cimag(q__3) * bli_cimag(x[i__2])), (bli_creal(q__3) * bli_cimag(x[i__2]) + bli_cimag(q__3) * bli_creal(x[i__2])), q__2 ); bli_csets( (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); /* L130: */ } } i__1 = jx; bli_csets( (bli_creal(temp)), (bli_cimag(temp)), x[i__1] ); jx -= *incx; kk -= j; /* L140: */ } } } else { kk = 1; if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j; bli_csets( (bli_creal(x[i__2])), (bli_cimag(x[i__2])), temp ); k = kk + 1; if (noconj) { if (nounit) { i__2 = kk; bli_csets( (bli_creal(temp) * bli_creal(ap[i__2]) - bli_cimag(temp) * bli_cimag(ap[i__2])), (bli_creal(temp) * bli_cimag(ap[i__2]) + bli_cimag(temp) * bli_creal(ap[i__2])), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } i__2 = *n; for (i__ = j + 1; i__ <= i__2; ++i__) { i__3 = k; i__4 = i__; bli_csets( (bli_creal(ap[i__3]) * bli_creal(x[i__4]) - bli_cimag(ap[i__3]) * bli_cimag(x[i__4])), (bli_creal(ap[i__3]) * bli_cimag(x[i__4]) + bli_cimag(ap[i__3]) * bli_creal(x[i__4])), q__2 ); bli_csets( (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); ++k; /* L150: */ } } else { if (nounit) { bla_r_cnjg(&q__2, &ap[kk]); bli_csets( (bli_creal(temp) * bli_creal(q__2) - bli_cimag(temp) * bli_cimag(q__2)), (bli_creal(temp) * bli_cimag(q__2) + bli_cimag(temp) * bli_creal(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } i__2 = *n; for (i__ = j + 1; i__ <= i__2; ++i__) { bla_r_cnjg(&q__3, &ap[k]); i__3 = i__; bli_csets( (bli_creal(q__3) * bli_creal(x[i__3]) - bli_cimag(q__3) * bli_cimag(x[i__3])), (bli_creal(q__3) * bli_cimag(x[i__3]) + bli_cimag(q__3) * bli_creal(x[i__3])), q__2 ); bli_csets( (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); ++k; /* L160: */ } } i__2 = j; bli_csets( (bli_creal(temp)), (bli_cimag(temp)), x[i__2] ); kk += *n - j + 1; /* L170: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = jx; bli_csets( (bli_creal(x[i__2])), (bli_cimag(x[i__2])), temp ); ix = jx; if (noconj) { if (nounit) { i__2 = kk; bli_csets( (bli_creal(temp) * bli_creal(ap[i__2]) - bli_cimag(temp) * bli_cimag(ap[i__2])), (bli_creal(temp) * bli_cimag(ap[i__2]) + bli_cimag(temp) * bli_creal(ap[i__2])), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } i__2 = kk + *n - j; for (k = kk + 1; k <= i__2; ++k) { ix += *incx; i__3 = k; i__4 = ix; bli_csets( (bli_creal(ap[i__3]) * bli_creal(x[i__4]) - bli_cimag(ap[i__3]) * bli_cimag(x[i__4])), (bli_creal(ap[i__3]) * bli_cimag(x[i__4]) + bli_cimag(ap[i__3]) * bli_creal(x[i__4])), q__2 ); bli_csets( (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); /* L180: */ } } else { if (nounit) { bla_r_cnjg(&q__2, &ap[kk]); bli_csets( (bli_creal(temp) * bli_creal(q__2) - bli_cimag(temp) * bli_cimag(q__2)), (bli_creal(temp) * bli_cimag(q__2) + bli_cimag(temp) * bli_creal(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } i__2 = kk + *n - j; for (k = kk + 1; k <= i__2; ++k) { ix += *incx; bla_r_cnjg(&q__3, &ap[k]); i__3 = ix; bli_csets( (bli_creal(q__3) * bli_creal(x[i__3]) - bli_cimag(q__3) * bli_cimag(x[i__3])), (bli_creal(q__3) * bli_cimag(x[i__3]) + bli_cimag(q__3) * bli_creal(x[i__3])), q__2 ); bli_csets( (bli_creal(temp) + bli_creal(q__2)), (bli_cimag(temp) + bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); /* L190: */ } } i__2 = jx; bli_csets( (bli_creal(temp)), (bli_cimag(temp)), x[i__2] ); jx += *incx; kk += *n - j + 1; /* L200: */ } } } } return 0; /* End of CTPMV . */ } /* ctpmv_ */ /* dtpmv.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ /* Subroutine */ int PASTEF77(d,tpmv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_double *ap, bla_double *x, const bla_integer *incx) { /* System generated locals */ bla_integer i__1, i__2; /* Local variables */ bla_integer info; bla_double temp; bla_integer i__, j, k; //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kk, ix, jx, kx = 0; //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); bla_logical nounit; /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DTPMV performs one of the matrix-vector operations */ /* x := A*x, or x := A'*x, */ /* where x is an n element vector and A is an n by n unit, or non-unit, */ /* upper or lower triangular matrix, supplied in packed form. */ /* Parameters */ /* ========== */ /* UPLO - CHARACTER*1. */ /* On entry, UPLO specifies whether the matrix is an upper or */ /* lower triangular matrix as follows: */ /* UPLO = 'U' or 'u' A is an upper triangular matrix. */ /* UPLO = 'L' or 'l' A is a lower triangular matrix. */ /* Unchanged on exit. */ /* TRANS - CHARACTER*1. */ /* On entry, TRANS specifies the operation to be performed as */ /* follows: */ /* TRANS = 'N' or 'n' x := A*x. */ /* TRANS = 'T' or 't' x := A'*x. */ /* TRANS = 'C' or 'c' x := A'*x. */ /* Unchanged on exit. */ /* DIAG - CHARACTER*1. */ /* On entry, DIAG specifies whether or not A is unit */ /* triangular as follows: */ /* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ /* DIAG = 'N' or 'n' A is not assumed to be unit */ /* triangular. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the order of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* AP - DOUBLE PRECISION array of DIMENSION at least */ /* ( ( n*( n + 1 ) )/2 ). */ /* Before entry with UPLO = 'U' or 'u', the array AP must */ /* contain the upper triangular matrix packed sequentially, */ /* column by column, so that AP( 1 ) contains a( 1, 1 ), */ /* AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) */ /* respectively, and so on. */ /* Before entry with UPLO = 'L' or 'l', the array AP must */ /* contain the lower triangular matrix packed sequentially, */ /* column by column, so that AP( 1 ) contains a( 1, 1 ), */ /* AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) */ /* respectively, and so on. */ /* Note that when DIAG = 'U' or 'u', the diagonal elements of */ /* A are not referenced, but are assumed to be unity. */ /* Unchanged on exit. */ /* X - DOUBLE PRECISION array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ). */ /* Before entry, the incremented array X must contain the n */ /* element vector x. On exit, X is overwritten with the */ /* tranformed vector x. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --x; --ap; /* Function Body */ info = 0; if (! PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(uplo, "L", ( ftnlen)1, (ftnlen)1)) { info = 1; } else if (! PASTEF770(lsame)(trans, "N", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(trans, "T", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(trans, "C", (ftnlen)1, ( ftnlen)1)) { info = 2; } else if (! PASTEF770(lsame)(diag, "U", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(diag, "N", (ftnlen)1, (ftnlen)1)) { info = 3; } else if (*n < 0) { info = 4; } else if (*incx == 0) { info = 7; } if (info != 0) { PASTEF770(xerbla)("DTPMV ", &info, (ftnlen)6); return 0; } /* Quick return if possible. */ if (*n == 0) { return 0; } nounit = PASTEF770(lsame)(diag, "N", (ftnlen)1, (ftnlen)1); /* Set up the start point in X if the increment is not unity. This */ /* will be ( N - 1 )*INCX too small for descending loops. */ if (*incx <= 0) { kx = 1 - (*n - 1) * *incx; } else if (*incx != 1) { kx = 1; } /* Start the operations. In this version the elements of AP are */ /* accessed sequentially with one pass through AP. */ if (PASTEF770(lsame)(trans, "N", (ftnlen)1, (ftnlen)1)) { /* Form x:= A*x. */ if (PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1)) { kk = 1; if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[j] != 0.) { temp = x[j]; k = kk; i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { x[i__] += temp * ap[k]; ++k; /* L10: */ } if (nounit) { x[j] *= ap[kk + j - 1]; } } kk += j; /* L20: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[jx] != 0.) { temp = x[jx]; ix = kx; i__2 = kk + j - 2; for (k = kk; k <= i__2; ++k) { x[ix] += temp * ap[k]; ix += *incx; /* L30: */ } if (nounit) { x[jx] *= ap[kk + j - 1]; } } jx += *incx; kk += j; /* L40: */ } } } else { kk = *n * (*n + 1) / 2; if (*incx == 1) { for (j = *n; j >= 1; --j) { if (x[j] != 0.) { temp = x[j]; k = kk; i__1 = j + 1; for (i__ = *n; i__ >= i__1; --i__) { x[i__] += temp * ap[k]; --k; /* L50: */ } if (nounit) { x[j] *= ap[kk - *n + j]; } } kk -= *n - j + 1; /* L60: */ } } else { kx += (*n - 1) * *incx; jx = kx; for (j = *n; j >= 1; --j) { if (x[jx] != 0.) { temp = x[jx]; ix = kx; i__1 = kk - (*n - (j + 1)); for (k = kk; k >= i__1; --k) { x[ix] += temp * ap[k]; ix -= *incx; /* L70: */ } if (nounit) { x[jx] *= ap[kk - *n + j]; } } jx -= *incx; kk -= *n - j + 1; /* L80: */ } } } } else { /* Form x := A'*x. */ if (PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1)) { kk = *n * (*n + 1) / 2; if (*incx == 1) { for (j = *n; j >= 1; --j) { temp = x[j]; if (nounit) { temp *= ap[kk]; } k = kk - 1; for (i__ = j - 1; i__ >= 1; --i__) { temp += ap[k] * x[i__]; --k; /* L90: */ } x[j] = temp; kk -= j; /* L100: */ } } else { jx = kx + (*n - 1) * *incx; for (j = *n; j >= 1; --j) { temp = x[jx]; ix = jx; if (nounit) { temp *= ap[kk]; } i__1 = kk - j + 1; for (k = kk - 1; k >= i__1; --k) { ix -= *incx; temp += ap[k] * x[ix]; /* L110: */ } x[jx] = temp; jx -= *incx; kk -= j; /* L120: */ } } } else { kk = 1; if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { temp = x[j]; if (nounit) { temp *= ap[kk]; } k = kk + 1; i__2 = *n; for (i__ = j + 1; i__ <= i__2; ++i__) { temp += ap[k] * x[i__]; ++k; /* L130: */ } x[j] = temp; kk += *n - j + 1; /* L140: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { temp = x[jx]; ix = jx; if (nounit) { temp *= ap[kk]; } i__2 = kk + *n - j; for (k = kk + 1; k <= i__2; ++k) { ix += *incx; temp += ap[k] * x[ix]; /* L150: */ } x[jx] = temp; jx += *incx; kk += *n - j + 1; /* L160: */ } } } } return 0; /* End of DTPMV . */ } /* dtpmv_ */ /* stpmv.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ /* Subroutine */ int PASTEF77(s,tpmv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_real *ap, bla_real *x, const bla_integer *incx) { /* System generated locals */ bla_integer i__1, i__2; /* Local variables */ bla_integer info; bla_real temp; bla_integer i__, j, k; //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kk, ix, jx, kx = 0; //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); bla_logical nounit; /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* STPMV performs one of the matrix-vector operations */ /* x := A*x, or x := A'*x, */ /* where x is an n element vector and A is an n by n unit, or non-unit, */ /* upper or lower triangular matrix, supplied in packed form. */ /* Parameters */ /* ========== */ /* UPLO - CHARACTER*1. */ /* On entry, UPLO specifies whether the matrix is an upper or */ /* lower triangular matrix as follows: */ /* UPLO = 'U' or 'u' A is an upper triangular matrix. */ /* UPLO = 'L' or 'l' A is a lower triangular matrix. */ /* Unchanged on exit. */ /* TRANS - CHARACTER*1. */ /* On entry, TRANS specifies the operation to be performed as */ /* follows: */ /* TRANS = 'N' or 'n' x := A*x. */ /* TRANS = 'T' or 't' x := A'*x. */ /* TRANS = 'C' or 'c' x := A'*x. */ /* Unchanged on exit. */ /* DIAG - CHARACTER*1. */ /* On entry, DIAG specifies whether or not A is unit */ /* triangular as follows: */ /* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ /* DIAG = 'N' or 'n' A is not assumed to be unit */ /* triangular. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the order of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* AP - REAL array of DIMENSION at least */ /* ( ( n*( n + 1 ) )/2 ). */ /* Before entry with UPLO = 'U' or 'u', the array AP must */ /* contain the upper triangular matrix packed sequentially, */ /* column by column, so that AP( 1 ) contains a( 1, 1 ), */ /* AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) */ /* respectively, and so on. */ /* Before entry with UPLO = 'L' or 'l', the array AP must */ /* contain the lower triangular matrix packed sequentially, */ /* column by column, so that AP( 1 ) contains a( 1, 1 ), */ /* AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) */ /* respectively, and so on. */ /* Note that when DIAG = 'U' or 'u', the diagonal elements of */ /* A are not referenced, but are assumed to be unity. */ /* Unchanged on exit. */ /* X - REAL array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ). */ /* Before entry, the incremented array X must contain the n */ /* element vector x. On exit, X is overwritten with the */ /* tranformed vector x. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --x; --ap; /* Function Body */ info = 0; if (! PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(uplo, "L", ( ftnlen)1, (ftnlen)1)) { info = 1; } else if (! PASTEF770(lsame)(trans, "N", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(trans, "T", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(trans, "C", (ftnlen)1, ( ftnlen)1)) { info = 2; } else if (! PASTEF770(lsame)(diag, "U", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(diag, "N", (ftnlen)1, (ftnlen)1)) { info = 3; } else if (*n < 0) { info = 4; } else if (*incx == 0) { info = 7; } if (info != 0) { PASTEF770(xerbla)("STPMV ", &info, (ftnlen)6); return 0; } /* Quick return if possible. */ if (*n == 0) { return 0; } nounit = PASTEF770(lsame)(diag, "N", (ftnlen)1, (ftnlen)1); /* Set up the start point in X if the increment is not unity. This */ /* will be ( N - 1 )*INCX too small for descending loops. */ if (*incx <= 0) { kx = 1 - (*n - 1) * *incx; } else if (*incx != 1) { kx = 1; } /* Start the operations. In this version the elements of AP are */ /* accessed sequentially with one pass through AP. */ if (PASTEF770(lsame)(trans, "N", (ftnlen)1, (ftnlen)1)) { /* Form x:= A*x. */ if (PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1)) { kk = 1; if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[j] != 0.f) { temp = x[j]; k = kk; i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { x[i__] += temp * ap[k]; ++k; /* L10: */ } if (nounit) { x[j] *= ap[kk + j - 1]; } } kk += j; /* L20: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[jx] != 0.f) { temp = x[jx]; ix = kx; i__2 = kk + j - 2; for (k = kk; k <= i__2; ++k) { x[ix] += temp * ap[k]; ix += *incx; /* L30: */ } if (nounit) { x[jx] *= ap[kk + j - 1]; } } jx += *incx; kk += j; /* L40: */ } } } else { kk = *n * (*n + 1) / 2; if (*incx == 1) { for (j = *n; j >= 1; --j) { if (x[j] != 0.f) { temp = x[j]; k = kk; i__1 = j + 1; for (i__ = *n; i__ >= i__1; --i__) { x[i__] += temp * ap[k]; --k; /* L50: */ } if (nounit) { x[j] *= ap[kk - *n + j]; } } kk -= *n - j + 1; /* L60: */ } } else { kx += (*n - 1) * *incx; jx = kx; for (j = *n; j >= 1; --j) { if (x[jx] != 0.f) { temp = x[jx]; ix = kx; i__1 = kk - (*n - (j + 1)); for (k = kk; k >= i__1; --k) { x[ix] += temp * ap[k]; ix -= *incx; /* L70: */ } if (nounit) { x[jx] *= ap[kk - *n + j]; } } jx -= *incx; kk -= *n - j + 1; /* L80: */ } } } } else { /* Form x := A'*x. */ if (PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1)) { kk = *n * (*n + 1) / 2; if (*incx == 1) { for (j = *n; j >= 1; --j) { temp = x[j]; if (nounit) { temp *= ap[kk]; } k = kk - 1; for (i__ = j - 1; i__ >= 1; --i__) { temp += ap[k] * x[i__]; --k; /* L90: */ } x[j] = temp; kk -= j; /* L100: */ } } else { jx = kx + (*n - 1) * *incx; for (j = *n; j >= 1; --j) { temp = x[jx]; ix = jx; if (nounit) { temp *= ap[kk]; } i__1 = kk - j + 1; for (k = kk - 1; k >= i__1; --k) { ix -= *incx; temp += ap[k] * x[ix]; /* L110: */ } x[jx] = temp; jx -= *incx; kk -= j; /* L120: */ } } } else { kk = 1; if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { temp = x[j]; if (nounit) { temp *= ap[kk]; } k = kk + 1; i__2 = *n; for (i__ = j + 1; i__ <= i__2; ++i__) { temp += ap[k] * x[i__]; ++k; /* L130: */ } x[j] = temp; kk += *n - j + 1; /* L140: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { temp = x[jx]; ix = jx; if (nounit) { temp *= ap[kk]; } i__2 = kk + *n - j; for (k = kk + 1; k <= i__2; ++k) { ix += *incx; temp += ap[k] * x[ix]; /* L150: */ } x[jx] = temp; jx += *incx; kk += *n - j + 1; /* L160: */ } } } } return 0; /* End of STPMV . */ } /* stpmv_ */ /* ztpmv.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ /* Subroutine */ int PASTEF77(z,tpmv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_dcomplex *ap, bla_dcomplex *x, const bla_integer *incx) { /* System generated locals */ bla_integer i__1, i__2, i__3, i__4, i__5; bla_dcomplex z__1, z__2, z__3; /* Builtin functions */ //void bla_d_cnjg(bla_dcomplex *, bla_dcomplex *); /* Local variables */ bla_integer info; bla_dcomplex temp; bla_integer i__, j, k; //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kk, ix, jx, kx = 0; //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); bla_logical noconj, nounit; /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* ZTPMV performs one of the matrix-vector operations */ /* x := A*x, or x := A'*x, or x := conjg( A' )*x, */ /* where x is an n element vector and A is an n by n unit, or non-unit, */ /* upper or lower triangular matrix, supplied in packed form. */ /* Parameters */ /* ========== */ /* UPLO - CHARACTER*1. */ /* On entry, UPLO specifies whether the matrix is an upper or */ /* lower triangular matrix as follows: */ /* UPLO = 'U' or 'u' A is an upper triangular matrix. */ /* UPLO = 'L' or 'l' A is a lower triangular matrix. */ /* Unchanged on exit. */ /* TRANS - CHARACTER*1. */ /* On entry, TRANS specifies the operation to be performed as */ /* follows: */ /* TRANS = 'N' or 'n' x := A*x. */ /* TRANS = 'T' or 't' x := A'*x. */ /* TRANS = 'C' or 'c' x := conjg( A' )*x. */ /* Unchanged on exit. */ /* DIAG - CHARACTER*1. */ /* On entry, DIAG specifies whether or not A is unit */ /* triangular as follows: */ /* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ /* DIAG = 'N' or 'n' A is not assumed to be unit */ /* triangular. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the order of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* AP - COMPLEX*16 array of DIMENSION at least */ /* ( ( n*( n + 1 ) )/2 ). */ /* Before entry with UPLO = 'U' or 'u', the array AP must */ /* contain the upper triangular matrix packed sequentially, */ /* column by column, so that AP( 1 ) contains a( 1, 1 ), */ /* AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) */ /* respectively, and so on. */ /* Before entry with UPLO = 'L' or 'l', the array AP must */ /* contain the lower triangular matrix packed sequentially, */ /* column by column, so that AP( 1 ) contains a( 1, 1 ), */ /* AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) */ /* respectively, and so on. */ /* Note that when DIAG = 'U' or 'u', the diagonal elements of */ /* A are not referenced, but are assumed to be unity. */ /* Unchanged on exit. */ /* X - COMPLEX*16 array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ). */ /* Before entry, the incremented array X must contain the n */ /* element vector x. On exit, X is overwritten with the */ /* tranformed vector x. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --x; --ap; /* Function Body */ info = 0; if (! PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(uplo, "L", ( ftnlen)1, (ftnlen)1)) { info = 1; } else if (! PASTEF770(lsame)(trans, "N", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(trans, "T", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(trans, "C", (ftnlen)1, ( ftnlen)1)) { info = 2; } else if (! PASTEF770(lsame)(diag, "U", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(diag, "N", (ftnlen)1, (ftnlen)1)) { info = 3; } else if (*n < 0) { info = 4; } else if (*incx == 0) { info = 7; } if (info != 0) { PASTEF770(xerbla)("ZTPMV ", &info, (ftnlen)6); return 0; } /* Quick return if possible. */ if (*n == 0) { return 0; } noconj = PASTEF770(lsame)(trans, "T", (ftnlen)1, (ftnlen)1); nounit = PASTEF770(lsame)(diag, "N", (ftnlen)1, (ftnlen)1); /* Set up the start point in X if the increment is not unity. This */ /* will be ( N - 1 )*INCX too small for descending loops. */ if (*incx <= 0) { kx = 1 - (*n - 1) * *incx; } else if (*incx != 1) { kx = 1; } /* Start the operations. In this version the elements of AP are */ /* accessed sequentially with one pass through AP. */ if (PASTEF770(lsame)(trans, "N", (ftnlen)1, (ftnlen)1)) { /* Form x:= A*x. */ if (PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1)) { kk = 1; if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j; if (bli_zreal(x[i__2]) != 0. || bli_zimag(x[i__2]) != 0.) { i__2 = j; bli_zsets( (bli_zreal(x[i__2])), (bli_zimag(x[i__2])), temp ); k = kk; i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = i__; i__4 = i__; i__5 = k; bli_zsets( (bli_zreal(temp) * bli_zreal(ap[i__5]) - bli_zimag(temp) * bli_zimag(ap[i__5])), (bli_zreal(temp) * bli_zimag(ap[i__5]) + bli_zimag(temp) * bli_zreal(ap[i__5])), z__2 ); bli_zsets( (bli_zreal(x[i__4]) + bli_zreal(z__2)), (bli_zimag(x[i__4]) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__3] ); ++k; /* L10: */ } if (nounit) { i__2 = j; i__3 = j; i__4 = kk + j - 1; bli_zsets( (bli_zreal(x[i__3]) * bli_zreal(ap[i__4]) - bli_zimag(x[i__3]) * bli_zimag(ap[i__4])), (bli_zreal(x[i__3]) * bli_zimag(ap[i__4]) + bli_zimag(x[i__3]) * bli_zreal(ap[i__4])), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__2] ); } } kk += j; /* L20: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = jx; if (bli_zreal(x[i__2]) != 0. || bli_zimag(x[i__2]) != 0.) { i__2 = jx; bli_zsets( (bli_zreal(x[i__2])), (bli_zimag(x[i__2])), temp ); ix = kx; i__2 = kk + j - 2; for (k = kk; k <= i__2; ++k) { i__3 = ix; i__4 = ix; i__5 = k; bli_zsets( (bli_zreal(temp) * bli_zreal(ap[i__5]) - bli_zimag(temp) * bli_zimag(ap[i__5])), (bli_zreal(temp) * bli_zimag(ap[i__5]) + bli_zimag(temp) * bli_zreal(ap[i__5])), z__2 ); bli_zsets( (bli_zreal(x[i__4]) + bli_zreal(z__2)), (bli_zimag(x[i__4]) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__3] ); ix += *incx; /* L30: */ } if (nounit) { i__2 = jx; i__3 = jx; i__4 = kk + j - 1; bli_zsets( (bli_zreal(x[i__3]) * bli_zreal(ap[i__4]) - bli_zimag(x[i__3]) * bli_zimag(ap[i__4])), (bli_zreal(x[i__3]) * bli_zimag(ap[i__4]) + bli_zimag(x[i__3]) * bli_zreal(ap[i__4])), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__2] ); } } jx += *incx; kk += j; /* L40: */ } } } else { kk = *n * (*n + 1) / 2; if (*incx == 1) { for (j = *n; j >= 1; --j) { i__1 = j; if (bli_zreal(x[i__1]) != 0. || bli_zimag(x[i__1]) != 0.) { i__1 = j; bli_zsets( (bli_zreal(x[i__1])), (bli_zimag(x[i__1])), temp ); k = kk; i__1 = j + 1; for (i__ = *n; i__ >= i__1; --i__) { i__2 = i__; i__3 = i__; i__4 = k; bli_zsets( (bli_zreal(temp) * bli_zreal(ap[i__4]) - bli_zimag(temp) * bli_zimag(ap[i__4])), (bli_zreal(temp) * bli_zimag(ap[i__4]) + bli_zimag(temp) * bli_zreal(ap[i__4])), z__2 ); bli_zsets( (bli_zreal(x[i__3]) + bli_zreal(z__2)), (bli_zimag(x[i__3]) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__2] ); --k; /* L50: */ } if (nounit) { i__1 = j; i__2 = j; i__3 = kk - *n + j; bli_zsets( (bli_zreal(x[i__2]) * bli_zreal(ap[i__3]) - bli_zimag(x[i__2]) * bli_zimag(ap[i__3])), (bli_zreal(x[i__2]) * bli_zimag(ap[i__3]) + bli_zimag(x[i__2]) * bli_zreal(ap[i__3])), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__1] ); } } kk -= *n - j + 1; /* L60: */ } } else { kx += (*n - 1) * *incx; jx = kx; for (j = *n; j >= 1; --j) { i__1 = jx; if (bli_zreal(x[i__1]) != 0. || bli_zimag(x[i__1]) != 0.) { i__1 = jx; bli_zsets( (bli_zreal(x[i__1])), (bli_zimag(x[i__1])), temp ); ix = kx; i__1 = kk - (*n - (j + 1)); for (k = kk; k >= i__1; --k) { i__2 = ix; i__3 = ix; i__4 = k; bli_zsets( (bli_zreal(temp) * bli_zreal(ap[i__4]) - bli_zimag(temp) * bli_zimag(ap[i__4])), (bli_zreal(temp) * bli_zimag(ap[i__4]) + bli_zimag(temp) * bli_zreal(ap[i__4])), z__2 ); bli_zsets( (bli_zreal(x[i__3]) + bli_zreal(z__2)), (bli_zimag(x[i__3]) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__2] ); ix -= *incx; /* L70: */ } if (nounit) { i__1 = jx; i__2 = jx; i__3 = kk - *n + j; bli_zsets( (bli_zreal(x[i__2]) * bli_zreal(ap[i__3]) - bli_zimag(x[i__2]) * bli_zimag(ap[i__3])), (bli_zreal(x[i__2]) * bli_zimag(ap[i__3]) + bli_zimag(x[i__2]) * bli_zreal(ap[i__3])), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__1] ); } } jx -= *incx; kk -= *n - j + 1; /* L80: */ } } } } else { /* Form x := A'*x or x := conjg( A' )*x. */ if (PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1)) { kk = *n * (*n + 1) / 2; if (*incx == 1) { for (j = *n; j >= 1; --j) { i__1 = j; bli_zsets( (bli_zreal(x[i__1])), (bli_zimag(x[i__1])), temp ); k = kk - 1; if (noconj) { if (nounit) { i__1 = kk; bli_zsets( (bli_zreal(temp) * bli_zreal(ap[i__1]) - bli_zimag(temp) * bli_zimag(ap[i__1])), (bli_zreal(temp) * bli_zimag(ap[i__1]) + bli_zimag(temp) * bli_zreal(ap[i__1])), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } for (i__ = j - 1; i__ >= 1; --i__) { i__1 = k; i__2 = i__; bli_zsets( (bli_zreal(ap[i__1]) * bli_zreal(x[i__2]) - bli_zimag(ap[i__1]) * bli_zimag(x[i__2])), (bli_zreal(ap[i__1]) * bli_zimag(x[i__2]) + bli_zimag(ap[i__1]) * bli_zreal(x[i__2])), z__2 ); bli_zsets( (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); --k; /* L90: */ } } else { if (nounit) { bla_d_cnjg(&z__2, &ap[kk]); bli_zsets( (bli_zreal(temp) * bli_zreal(z__2) - bli_zimag(temp) * bli_zimag(z__2)), (bli_zreal(temp) * bli_zimag(z__2) + bli_zimag(temp) * bli_zreal(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } for (i__ = j - 1; i__ >= 1; --i__) { bla_d_cnjg(&z__3, &ap[k]); i__1 = i__; bli_zsets( (bli_zreal(z__3) * bli_zreal(x[i__1]) - bli_zimag(z__3) * bli_zimag(x[i__1])), (bli_zreal(z__3) * bli_zimag(x[i__1]) + bli_zimag(z__3) * bli_zreal(x[i__1])), z__2 ); bli_zsets( (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); --k; /* L100: */ } } i__1 = j; bli_zsets( (bli_zreal(temp)), (bli_zimag(temp)), x[i__1] ); kk -= j; /* L110: */ } } else { jx = kx + (*n - 1) * *incx; for (j = *n; j >= 1; --j) { i__1 = jx; bli_zsets( (bli_zreal(x[i__1])), (bli_zimag(x[i__1])), temp ); ix = jx; if (noconj) { if (nounit) { i__1 = kk; bli_zsets( (bli_zreal(temp) * bli_zreal(ap[i__1]) - bli_zimag(temp) * bli_zimag(ap[i__1])), (bli_zreal(temp) * bli_zimag(ap[i__1]) + bli_zimag(temp) * bli_zreal(ap[i__1])), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } i__1 = kk - j + 1; for (k = kk - 1; k >= i__1; --k) { ix -= *incx; i__2 = k; i__3 = ix; bli_zsets( (bli_zreal(ap[i__2]) * bli_zreal(x[i__3]) - bli_zimag(ap[i__2]) * bli_zimag(x[i__3])), (bli_zreal(ap[i__2]) * bli_zimag(x[i__3]) + bli_zimag(ap[i__2]) * bli_zreal(x[i__3])), z__2 ); bli_zsets( (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); /* L120: */ } } else { if (nounit) { bla_d_cnjg(&z__2, &ap[kk]); bli_zsets( (bli_zreal(temp) * bli_zreal(z__2) - bli_zimag(temp) * bli_zimag(z__2)), (bli_zreal(temp) * bli_zimag(z__2) + bli_zimag(temp) * bli_zreal(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } i__1 = kk - j + 1; for (k = kk - 1; k >= i__1; --k) { ix -= *incx; bla_d_cnjg(&z__3, &ap[k]); i__2 = ix; bli_zsets( (bli_zreal(z__3) * bli_zreal(x[i__2]) - bli_zimag(z__3) * bli_zimag(x[i__2])), (bli_zreal(z__3) * bli_zimag(x[i__2]) + bli_zimag(z__3) * bli_zreal(x[i__2])), z__2 ); bli_zsets( (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); /* L130: */ } } i__1 = jx; bli_zsets( (bli_zreal(temp)), (bli_zimag(temp)), x[i__1] ); jx -= *incx; kk -= j; /* L140: */ } } } else { kk = 1; if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j; bli_zsets( (bli_zreal(x[i__2])), (bli_zimag(x[i__2])), temp ); k = kk + 1; if (noconj) { if (nounit) { i__2 = kk; bli_zsets( (bli_zreal(temp) * bli_zreal(ap[i__2]) - bli_zimag(temp) * bli_zimag(ap[i__2])), (bli_zreal(temp) * bli_zimag(ap[i__2]) + bli_zimag(temp) * bli_zreal(ap[i__2])), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } i__2 = *n; for (i__ = j + 1; i__ <= i__2; ++i__) { i__3 = k; i__4 = i__; bli_zsets( (bli_zreal(ap[i__3]) * bli_zreal(x[i__4]) - bli_zimag(ap[i__3]) * bli_zimag(x[i__4])), (bli_zreal(ap[i__3]) * bli_zimag(x[i__4]) + bli_zimag(ap[i__3]) * bli_zreal(x[i__4])), z__2 ); bli_zsets( (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); ++k; /* L150: */ } } else { if (nounit) { bla_d_cnjg(&z__2, &ap[kk]); bli_zsets( (bli_zreal(temp) * bli_zreal(z__2) - bli_zimag(temp) * bli_zimag(z__2)), (bli_zreal(temp) * bli_zimag(z__2) + bli_zimag(temp) * bli_zreal(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } i__2 = *n; for (i__ = j + 1; i__ <= i__2; ++i__) { bla_d_cnjg(&z__3, &ap[k]); i__3 = i__; bli_zsets( (bli_zreal(z__3) * bli_zreal(x[i__3]) - bli_zimag(z__3) * bli_zimag(x[i__3])), (bli_zreal(z__3) * bli_zimag(x[i__3]) + bli_zimag(z__3) * bli_zreal(x[i__3])), z__2 ); bli_zsets( (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); ++k; /* L160: */ } } i__2 = j; bli_zsets( (bli_zreal(temp)), (bli_zimag(temp)), x[i__2] ); kk += *n - j + 1; /* L170: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = jx; bli_zsets( (bli_zreal(x[i__2])), (bli_zimag(x[i__2])), temp ); ix = jx; if (noconj) { if (nounit) { i__2 = kk; bli_zsets( (bli_zreal(temp) * bli_zreal(ap[i__2]) - bli_zimag(temp) * bli_zimag(ap[i__2])), (bli_zreal(temp) * bli_zimag(ap[i__2]) + bli_zimag(temp) * bli_zreal(ap[i__2])), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } i__2 = kk + *n - j; for (k = kk + 1; k <= i__2; ++k) { ix += *incx; i__3 = k; i__4 = ix; bli_zsets( (bli_zreal(ap[i__3]) * bli_zreal(x[i__4]) - bli_zimag(ap[i__3]) * bli_zimag(x[i__4])), (bli_zreal(ap[i__3]) * bli_zimag(x[i__4]) + bli_zimag(ap[i__3]) * bli_zreal(x[i__4])), z__2 ); bli_zsets( (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); /* L180: */ } } else { if (nounit) { bla_d_cnjg(&z__2, &ap[kk]); bli_zsets( (bli_zreal(temp) * bli_zreal(z__2) - bli_zimag(temp) * bli_zimag(z__2)), (bli_zreal(temp) * bli_zimag(z__2) + bli_zimag(temp) * bli_zreal(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } i__2 = kk + *n - j; for (k = kk + 1; k <= i__2; ++k) { ix += *incx; bla_d_cnjg(&z__3, &ap[k]); i__3 = ix; bli_zsets( (bli_zreal(z__3) * bli_zreal(x[i__3]) - bli_zimag(z__3) * bli_zimag(x[i__3])), (bli_zreal(z__3) * bli_zimag(x[i__3]) + bli_zimag(z__3) * bli_zreal(x[i__3])), z__2 ); bli_zsets( (bli_zreal(temp) + bli_zreal(z__2)), (bli_zimag(temp) + bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); /* L190: */ } } i__2 = jx; bli_zsets( (bli_zreal(temp)), (bli_zimag(temp)), x[i__2] ); jx += *incx; kk += *n - j + 1; /* L200: */ } } } } return 0; /* End of ZTPMV . */ } /* ztpmv_ */ #endif blis-0.6.1/frame/compat/f2c/bla_tpmv.h000066400000000000000000000047421360743507500174740ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef BLIS_ENABLE_BLAS BLIS_EXPORT_BLAS int PASTEF77(c,tpmv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_scomplex *ap, bla_scomplex *x, const bla_integer *incx); BLIS_EXPORT_BLAS int PASTEF77(d,tpmv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_double *ap, bla_double *x, const bla_integer *incx); BLIS_EXPORT_BLAS int PASTEF77(s,tpmv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_real *ap, bla_real *x, const bla_integer *incx); BLIS_EXPORT_BLAS int PASTEF77(z,tpmv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_dcomplex *ap, bla_dcomplex *x, const bla_integer *incx); #endif blis-0.6.1/frame/compat/f2c/bla_tpsv.c000066400000000000000000001471011360743507500174720ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #ifdef BLIS_ENABLE_BLAS /* ctpsv.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ /* Subroutine */ int PASTEF77(c,tpsv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_scomplex *ap, bla_scomplex *x, const bla_integer *incx) { /* System generated locals */ bla_integer i__1, i__2, i__3, i__4, i__5; bla_scomplex q__1, q__2, q__3; /* Builtin functions */ //void bla_c_div(bla_scomplex *, bla_scomplex *, bla_scomplex *), bla_r_cnjg(bla_scomplex *, bla_scomplex *); /* Local variables */ bla_integer info; bla_scomplex temp; bla_integer i__, j, k; //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kk, ix, jx, kx = 0; //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); bla_logical noconj, nounit; /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* CTPSV solves one of the systems of equations */ /* A*x = b, or A'*x = b, or conjg( A' )*x = b, */ /* where b and x are n element vectors and A is an n by n unit, or */ /* non-unit, upper or lower triangular matrix, supplied in packed form. */ /* No test for singularity or near-singularity is included in this */ /* routine. Such tests must be performed before calling this routine. */ /* Parameters */ /* ========== */ /* UPLO - CHARACTER*1. */ /* On entry, UPLO specifies whether the matrix is an upper or */ /* lower triangular matrix as follows: */ /* UPLO = 'U' or 'u' A is an upper triangular matrix. */ /* UPLO = 'L' or 'l' A is a lower triangular matrix. */ /* Unchanged on exit. */ /* TRANS - CHARACTER*1. */ /* On entry, TRANS specifies the equations to be solved as */ /* follows: */ /* TRANS = 'N' or 'n' A*x = b. */ /* TRANS = 'T' or 't' A'*x = b. */ /* TRANS = 'C' or 'c' conjg( A' )*x = b. */ /* Unchanged on exit. */ /* DIAG - CHARACTER*1. */ /* On entry, DIAG specifies whether or not A is unit */ /* triangular as follows: */ /* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ /* DIAG = 'N' or 'n' A is not assumed to be unit */ /* triangular. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the order of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* AP - COMPLEX array of DIMENSION at least */ /* ( ( n*( n + 1 ) )/2 ). */ /* Before entry with UPLO = 'U' or 'u', the array AP must */ /* contain the upper triangular matrix packed sequentially, */ /* column by column, so that AP( 1 ) contains a( 1, 1 ), */ /* AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) */ /* respectively, and so on. */ /* Before entry with UPLO = 'L' or 'l', the array AP must */ /* contain the lower triangular matrix packed sequentially, */ /* column by column, so that AP( 1 ) contains a( 1, 1 ), */ /* AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) */ /* respectively, and so on. */ /* Note that when DIAG = 'U' or 'u', the diagonal elements of */ /* A are not referenced, but are assumed to be unity. */ /* Unchanged on exit. */ /* X - COMPLEX array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ). */ /* Before entry, the incremented array X must contain the n */ /* element right-hand side vector b. On exit, X is overwritten */ /* with the solution vector x. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --x; --ap; /* Function Body */ info = 0; if (! PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(uplo, "L", ( ftnlen)1, (ftnlen)1)) { info = 1; } else if (! PASTEF770(lsame)(trans, "N", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(trans, "T", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(trans, "C", (ftnlen)1, ( ftnlen)1)) { info = 2; } else if (! PASTEF770(lsame)(diag, "U", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(diag, "N", (ftnlen)1, (ftnlen)1)) { info = 3; } else if (*n < 0) { info = 4; } else if (*incx == 0) { info = 7; } if (info != 0) { PASTEF770(xerbla)("CTPSV ", &info, (ftnlen)6); return 0; } /* Quick return if possible. */ if (*n == 0) { return 0; } noconj = PASTEF770(lsame)(trans, "T", (ftnlen)1, (ftnlen)1); nounit = PASTEF770(lsame)(diag, "N", (ftnlen)1, (ftnlen)1); /* Set up the start point in X if the increment is not unity. This */ /* will be ( N - 1 )*INCX too small for descending loops. */ if (*incx <= 0) { kx = 1 - (*n - 1) * *incx; } else if (*incx != 1) { kx = 1; } /* Start the operations. In this version the elements of AP are */ /* accessed sequentially with one pass through AP. */ if (PASTEF770(lsame)(trans, "N", (ftnlen)1, (ftnlen)1)) { /* Form x := inv( A )*x. */ if (PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1)) { kk = *n * (*n + 1) / 2; if (*incx == 1) { for (j = *n; j >= 1; --j) { i__1 = j; if (bli_creal(x[i__1]) != 0.f || bli_cimag(x[i__1]) != 0.f) { if (nounit) { i__1 = j; bla_c_div(&q__1, &x[j], &ap[kk]); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__1] ); } i__1 = j; bli_csets( (bli_creal(x[i__1])), (bli_cimag(x[i__1])), temp ); k = kk - 1; for (i__ = j - 1; i__ >= 1; --i__) { i__1 = i__; i__2 = i__; i__3 = k; bli_csets( (bli_creal(temp) * bli_creal(ap[i__3]) - bli_cimag(temp) * bli_cimag(ap[i__3])), (bli_creal(temp) * bli_cimag(ap[i__3]) + bli_cimag(temp) * bli_creal(ap[i__3])), q__2 ); bli_csets( (bli_creal(x[i__2]) - bli_creal(q__2)), (bli_cimag(x[i__2]) - bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__1] ); --k; /* L10: */ } } kk -= j; /* L20: */ } } else { jx = kx + (*n - 1) * *incx; for (j = *n; j >= 1; --j) { i__1 = jx; if (bli_creal(x[i__1]) != 0.f || bli_cimag(x[i__1]) != 0.f) { if (nounit) { i__1 = jx; bla_c_div(&q__1, &x[jx], &ap[kk]); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__1] ); } i__1 = jx; bli_csets( (bli_creal(x[i__1])), (bli_cimag(x[i__1])), temp ); ix = jx; i__1 = kk - j + 1; for (k = kk - 1; k >= i__1; --k) { ix -= *incx; i__2 = ix; i__3 = ix; i__4 = k; bli_csets( (bli_creal(temp) * bli_creal(ap[i__4]) - bli_cimag(temp) * bli_cimag(ap[i__4])), (bli_creal(temp) * bli_cimag(ap[i__4]) + bli_cimag(temp) * bli_creal(ap[i__4])), q__2 ); bli_csets( (bli_creal(x[i__3]) - bli_creal(q__2)), (bli_cimag(x[i__3]) - bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__2] ); /* L30: */ } } jx -= *incx; kk -= j; /* L40: */ } } } else { kk = 1; if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j; if (bli_creal(x[i__2]) != 0.f || bli_cimag(x[i__2]) != 0.f) { if (nounit) { i__2 = j; bla_c_div(&q__1, &x[j], &ap[kk]); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__2] ); } i__2 = j; bli_csets( (bli_creal(x[i__2])), (bli_cimag(x[i__2])), temp ); k = kk + 1; i__2 = *n; for (i__ = j + 1; i__ <= i__2; ++i__) { i__3 = i__; i__4 = i__; i__5 = k; bli_csets( (bli_creal(temp) * bli_creal(ap[i__5]) - bli_cimag(temp) * bli_cimag(ap[i__5])), (bli_creal(temp) * bli_cimag(ap[i__5]) + bli_cimag(temp) * bli_creal(ap[i__5])), q__2 ); bli_csets( (bli_creal(x[i__4]) - bli_creal(q__2)), (bli_cimag(x[i__4]) - bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__3] ); ++k; /* L50: */ } } kk += *n - j + 1; /* L60: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = jx; if (bli_creal(x[i__2]) != 0.f || bli_cimag(x[i__2]) != 0.f) { if (nounit) { i__2 = jx; bla_c_div(&q__1, &x[jx], &ap[kk]); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__2] ); } i__2 = jx; bli_csets( (bli_creal(x[i__2])), (bli_cimag(x[i__2])), temp ); ix = jx; i__2 = kk + *n - j; for (k = kk + 1; k <= i__2; ++k) { ix += *incx; i__3 = ix; i__4 = ix; i__5 = k; bli_csets( (bli_creal(temp) * bli_creal(ap[i__5]) - bli_cimag(temp) * bli_cimag(ap[i__5])), (bli_creal(temp) * bli_cimag(ap[i__5]) + bli_cimag(temp) * bli_creal(ap[i__5])), q__2 ); bli_csets( (bli_creal(x[i__4]) - bli_creal(q__2)), (bli_cimag(x[i__4]) - bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), x[i__3] ); /* L70: */ } } jx += *incx; kk += *n - j + 1; /* L80: */ } } } } else { /* Form x := inv( A' )*x or x := inv( conjg( A' ) )*x. */ if (PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1)) { kk = 1; if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j; bli_csets( (bli_creal(x[i__2])), (bli_cimag(x[i__2])), temp ); k = kk; if (noconj) { i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = k; i__4 = i__; bli_csets( (bli_creal(ap[i__3]) * bli_creal(x[i__4]) - bli_cimag(ap[i__3]) * bli_cimag(x[i__4])), (bli_creal(ap[i__3]) * bli_cimag(x[i__4]) + bli_cimag(ap[i__3]) * bli_creal(x[i__4])), q__2 ); bli_csets( (bli_creal(temp) - bli_creal(q__2)), (bli_cimag(temp) - bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); ++k; /* L90: */ } if (nounit) { bla_c_div(&q__1, &temp, &ap[kk + j - 1]); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } } else { i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { bla_r_cnjg(&q__3, &ap[k]); i__3 = i__; bli_csets( (bli_creal(q__3) * bli_creal(x[i__3]) - bli_cimag(q__3) * bli_cimag(x[i__3])), (bli_creal(q__3) * bli_cimag(x[i__3]) + bli_cimag(q__3) * bli_creal(x[i__3])), q__2 ); bli_csets( (bli_creal(temp) - bli_creal(q__2)), (bli_cimag(temp) - bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); ++k; /* L100: */ } if (nounit) { bla_r_cnjg(&q__2, &ap[kk + j - 1]); bla_c_div(&q__1, &temp, &q__2); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } } i__2 = j; bli_csets( (bli_creal(temp)), (bli_cimag(temp)), x[i__2] ); kk += j; /* L110: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = jx; bli_csets( (bli_creal(x[i__2])), (bli_cimag(x[i__2])), temp ); ix = kx; if (noconj) { i__2 = kk + j - 2; for (k = kk; k <= i__2; ++k) { i__3 = k; i__4 = ix; bli_csets( (bli_creal(ap[i__3]) * bli_creal(x[i__4]) - bli_cimag(ap[i__3]) * bli_cimag(x[i__4])), (bli_creal(ap[i__3]) * bli_cimag(x[i__4]) + bli_cimag(ap[i__3]) * bli_creal(x[i__4])), q__2 ); bli_csets( (bli_creal(temp) - bli_creal(q__2)), (bli_cimag(temp) - bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); ix += *incx; /* L120: */ } if (nounit) { bla_c_div(&q__1, &temp, &ap[kk + j - 1]); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } } else { i__2 = kk + j - 2; for (k = kk; k <= i__2; ++k) { bla_r_cnjg(&q__3, &ap[k]); i__3 = ix; bli_csets( (bli_creal(q__3) * bli_creal(x[i__3]) - bli_cimag(q__3) * bli_cimag(x[i__3])), (bli_creal(q__3) * bli_cimag(x[i__3]) + bli_cimag(q__3) * bli_creal(x[i__3])), q__2 ); bli_csets( (bli_creal(temp) - bli_creal(q__2)), (bli_cimag(temp) - bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); ix += *incx; /* L130: */ } if (nounit) { bla_r_cnjg(&q__2, &ap[kk + j - 1]); bla_c_div(&q__1, &temp, &q__2); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } } i__2 = jx; bli_csets( (bli_creal(temp)), (bli_cimag(temp)), x[i__2] ); jx += *incx; kk += j; /* L140: */ } } } else { kk = *n * (*n + 1) / 2; if (*incx == 1) { for (j = *n; j >= 1; --j) { i__1 = j; bli_csets( (bli_creal(x[i__1])), (bli_cimag(x[i__1])), temp ); k = kk; if (noconj) { i__1 = j + 1; for (i__ = *n; i__ >= i__1; --i__) { i__2 = k; i__3 = i__; bli_csets( (bli_creal(ap[i__2]) * bli_creal(x[i__3]) - bli_cimag(ap[i__2]) * bli_cimag(x[i__3])), (bli_creal(ap[i__2]) * bli_cimag(x[i__3]) + bli_cimag(ap[i__2]) * bli_creal(x[i__3])), q__2 ); bli_csets( (bli_creal(temp) - bli_creal(q__2)), (bli_cimag(temp) - bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); --k; /* L150: */ } if (nounit) { bla_c_div(&q__1, &temp, &ap[kk - *n + j]); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } } else { i__1 = j + 1; for (i__ = *n; i__ >= i__1; --i__) { bla_r_cnjg(&q__3, &ap[k]); i__2 = i__; bli_csets( (bli_creal(q__3) * bli_creal(x[i__2]) - bli_cimag(q__3) * bli_cimag(x[i__2])), (bli_creal(q__3) * bli_cimag(x[i__2]) + bli_cimag(q__3) * bli_creal(x[i__2])), q__2 ); bli_csets( (bli_creal(temp) - bli_creal(q__2)), (bli_cimag(temp) - bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); --k; /* L160: */ } if (nounit) { bla_r_cnjg(&q__2, &ap[kk - *n + j]); bla_c_div(&q__1, &temp, &q__2); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } } i__1 = j; bli_csets( (bli_creal(temp)), (bli_cimag(temp)), x[i__1] ); kk -= *n - j + 1; /* L170: */ } } else { kx += (*n - 1) * *incx; jx = kx; for (j = *n; j >= 1; --j) { i__1 = jx; bli_csets( (bli_creal(x[i__1])), (bli_cimag(x[i__1])), temp ); ix = kx; if (noconj) { i__1 = kk - (*n - (j + 1)); for (k = kk; k >= i__1; --k) { i__2 = k; i__3 = ix; bli_csets( (bli_creal(ap[i__2]) * bli_creal(x[i__3]) - bli_cimag(ap[i__2]) * bli_cimag(x[i__3])), (bli_creal(ap[i__2]) * bli_cimag(x[i__3]) + bli_cimag(ap[i__2]) * bli_creal(x[i__3])), q__2 ); bli_csets( (bli_creal(temp) - bli_creal(q__2)), (bli_cimag(temp) - bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); ix -= *incx; /* L180: */ } if (nounit) { bla_c_div(&q__1, &temp, &ap[kk - *n + j]); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } } else { i__1 = kk - (*n - (j + 1)); for (k = kk; k >= i__1; --k) { bla_r_cnjg(&q__3, &ap[k]); i__2 = ix; bli_csets( (bli_creal(q__3) * bli_creal(x[i__2]) - bli_cimag(q__3) * bli_cimag(x[i__2])), (bli_creal(q__3) * bli_cimag(x[i__2]) + bli_cimag(q__3) * bli_creal(x[i__2])), q__2 ); bli_csets( (bli_creal(temp) - bli_creal(q__2)), (bli_cimag(temp) - bli_cimag(q__2)), q__1 ); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); ix -= *incx; /* L190: */ } if (nounit) { bla_r_cnjg(&q__2, &ap[kk - *n + j]); bla_c_div(&q__1, &temp, &q__2); bli_csets( (bli_creal(q__1)), (bli_cimag(q__1)), temp ); } } i__1 = jx; bli_csets( (bli_creal(temp)), (bli_cimag(temp)), x[i__1] ); jx -= *incx; kk -= *n - j + 1; /* L200: */ } } } } return 0; /* End of CTPSV . */ } /* ctpsv_ */ /* dtpsv.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ /* Subroutine */ int PASTEF77(d,tpsv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_double *ap, bla_double *x, const bla_integer *incx) { /* System generated locals */ bla_integer i__1, i__2; /* Local variables */ bla_integer info; bla_double temp; bla_integer i__, j, k; //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kk, ix, jx, kx = 0; //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); bla_logical nounit; /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DTPSV solves one of the systems of equations */ /* A*x = b, or A'*x = b, */ /* where b and x are n element vectors and A is an n by n unit, or */ /* non-unit, upper or lower triangular matrix, supplied in packed form. */ /* No test for singularity or near-singularity is included in this */ /* routine. Such tests must be performed before calling this routine. */ /* Parameters */ /* ========== */ /* UPLO - CHARACTER*1. */ /* On entry, UPLO specifies whether the matrix is an upper or */ /* lower triangular matrix as follows: */ /* UPLO = 'U' or 'u' A is an upper triangular matrix. */ /* UPLO = 'L' or 'l' A is a lower triangular matrix. */ /* Unchanged on exit. */ /* TRANS - CHARACTER*1. */ /* On entry, TRANS specifies the equations to be solved as */ /* follows: */ /* TRANS = 'N' or 'n' A*x = b. */ /* TRANS = 'T' or 't' A'*x = b. */ /* TRANS = 'C' or 'c' A'*x = b. */ /* Unchanged on exit. */ /* DIAG - CHARACTER*1. */ /* On entry, DIAG specifies whether or not A is unit */ /* triangular as follows: */ /* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ /* DIAG = 'N' or 'n' A is not assumed to be unit */ /* triangular. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the order of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* AP - DOUBLE PRECISION array of DIMENSION at least */ /* ( ( n*( n + 1 ) )/2 ). */ /* Before entry with UPLO = 'U' or 'u', the array AP must */ /* contain the upper triangular matrix packed sequentially, */ /* column by column, so that AP( 1 ) contains a( 1, 1 ), */ /* AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) */ /* respectively, and so on. */ /* Before entry with UPLO = 'L' or 'l', the array AP must */ /* contain the lower triangular matrix packed sequentially, */ /* column by column, so that AP( 1 ) contains a( 1, 1 ), */ /* AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) */ /* respectively, and so on. */ /* Note that when DIAG = 'U' or 'u', the diagonal elements of */ /* A are not referenced, but are assumed to be unity. */ /* Unchanged on exit. */ /* X - DOUBLE PRECISION array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ). */ /* Before entry, the incremented array X must contain the n */ /* element right-hand side vector b. On exit, X is overwritten */ /* with the solution vector x. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --x; --ap; /* Function Body */ info = 0; if (! PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(uplo, "L", ( ftnlen)1, (ftnlen)1)) { info = 1; } else if (! PASTEF770(lsame)(trans, "N", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(trans, "T", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(trans, "C", (ftnlen)1, ( ftnlen)1)) { info = 2; } else if (! PASTEF770(lsame)(diag, "U", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(diag, "N", (ftnlen)1, (ftnlen)1)) { info = 3; } else if (*n < 0) { info = 4; } else if (*incx == 0) { info = 7; } if (info != 0) { PASTEF770(xerbla)("DTPSV ", &info, (ftnlen)6); return 0; } /* Quick return if possible. */ if (*n == 0) { return 0; } nounit = PASTEF770(lsame)(diag, "N", (ftnlen)1, (ftnlen)1); /* Set up the start point in X if the increment is not unity. This */ /* will be ( N - 1 )*INCX too small for descending loops. */ if (*incx <= 0) { kx = 1 - (*n - 1) * *incx; } else if (*incx != 1) { kx = 1; } /* Start the operations. In this version the elements of AP are */ /* accessed sequentially with one pass through AP. */ if (PASTEF770(lsame)(trans, "N", (ftnlen)1, (ftnlen)1)) { /* Form x := inv( A )*x. */ if (PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1)) { kk = *n * (*n + 1) / 2; if (*incx == 1) { for (j = *n; j >= 1; --j) { if (x[j] != 0.) { if (nounit) { x[j] /= ap[kk]; } temp = x[j]; k = kk - 1; for (i__ = j - 1; i__ >= 1; --i__) { x[i__] -= temp * ap[k]; --k; /* L10: */ } } kk -= j; /* L20: */ } } else { jx = kx + (*n - 1) * *incx; for (j = *n; j >= 1; --j) { if (x[jx] != 0.) { if (nounit) { x[jx] /= ap[kk]; } temp = x[jx]; ix = jx; i__1 = kk - j + 1; for (k = kk - 1; k >= i__1; --k) { ix -= *incx; x[ix] -= temp * ap[k]; /* L30: */ } } jx -= *incx; kk -= j; /* L40: */ } } } else { kk = 1; if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[j] != 0.) { if (nounit) { x[j] /= ap[kk]; } temp = x[j]; k = kk + 1; i__2 = *n; for (i__ = j + 1; i__ <= i__2; ++i__) { x[i__] -= temp * ap[k]; ++k; /* L50: */ } } kk += *n - j + 1; /* L60: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[jx] != 0.) { if (nounit) { x[jx] /= ap[kk]; } temp = x[jx]; ix = jx; i__2 = kk + *n - j; for (k = kk + 1; k <= i__2; ++k) { ix += *incx; x[ix] -= temp * ap[k]; /* L70: */ } } jx += *incx; kk += *n - j + 1; /* L80: */ } } } } else { /* Form x := inv( A' )*x. */ if (PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1)) { kk = 1; if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { temp = x[j]; k = kk; i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { temp -= ap[k] * x[i__]; ++k; /* L90: */ } if (nounit) { temp /= ap[kk + j - 1]; } x[j] = temp; kk += j; /* L100: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { temp = x[jx]; ix = kx; i__2 = kk + j - 2; for (k = kk; k <= i__2; ++k) { temp -= ap[k] * x[ix]; ix += *incx; /* L110: */ } if (nounit) { temp /= ap[kk + j - 1]; } x[jx] = temp; jx += *incx; kk += j; /* L120: */ } } } else { kk = *n * (*n + 1) / 2; if (*incx == 1) { for (j = *n; j >= 1; --j) { temp = x[j]; k = kk; i__1 = j + 1; for (i__ = *n; i__ >= i__1; --i__) { temp -= ap[k] * x[i__]; --k; /* L130: */ } if (nounit) { temp /= ap[kk - *n + j]; } x[j] = temp; kk -= *n - j + 1; /* L140: */ } } else { kx += (*n - 1) * *incx; jx = kx; for (j = *n; j >= 1; --j) { temp = x[jx]; ix = kx; i__1 = kk - (*n - (j + 1)); for (k = kk; k >= i__1; --k) { temp -= ap[k] * x[ix]; ix -= *incx; /* L150: */ } if (nounit) { temp /= ap[kk - *n + j]; } x[jx] = temp; jx -= *incx; kk -= *n - j + 1; /* L160: */ } } } } return 0; /* End of DTPSV . */ } /* dtpsv_ */ /* stpsv.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ /* Subroutine */ int PASTEF77(s,tpsv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_real *ap, bla_real *x, const bla_integer *incx) { /* System generated locals */ bla_integer i__1, i__2; /* Local variables */ bla_integer info; bla_real temp; bla_integer i__, j, k; //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kk, ix, jx, kx = 0; //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); bla_logical nounit; /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* STPSV solves one of the systems of equations */ /* A*x = b, or A'*x = b, */ /* where b and x are n element vectors and A is an n by n unit, or */ /* non-unit, upper or lower triangular matrix, supplied in packed form. */ /* No test for singularity or near-singularity is included in this */ /* routine. Such tests must be performed before calling this routine. */ /* Parameters */ /* ========== */ /* UPLO - CHARACTER*1. */ /* On entry, UPLO specifies whether the matrix is an upper or */ /* lower triangular matrix as follows: */ /* UPLO = 'U' or 'u' A is an upper triangular matrix. */ /* UPLO = 'L' or 'l' A is a lower triangular matrix. */ /* Unchanged on exit. */ /* TRANS - CHARACTER*1. */ /* On entry, TRANS specifies the equations to be solved as */ /* follows: */ /* TRANS = 'N' or 'n' A*x = b. */ /* TRANS = 'T' or 't' A'*x = b. */ /* TRANS = 'C' or 'c' A'*x = b. */ /* Unchanged on exit. */ /* DIAG - CHARACTER*1. */ /* On entry, DIAG specifies whether or not A is unit */ /* triangular as follows: */ /* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ /* DIAG = 'N' or 'n' A is not assumed to be unit */ /* triangular. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the order of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* AP - REAL array of DIMENSION at least */ /* ( ( n*( n + 1 ) )/2 ). */ /* Before entry with UPLO = 'U' or 'u', the array AP must */ /* contain the upper triangular matrix packed sequentially, */ /* column by column, so that AP( 1 ) contains a( 1, 1 ), */ /* AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) */ /* respectively, and so on. */ /* Before entry with UPLO = 'L' or 'l', the array AP must */ /* contain the lower triangular matrix packed sequentially, */ /* column by column, so that AP( 1 ) contains a( 1, 1 ), */ /* AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) */ /* respectively, and so on. */ /* Note that when DIAG = 'U' or 'u', the diagonal elements of */ /* A are not referenced, but are assumed to be unity. */ /* Unchanged on exit. */ /* X - REAL array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ). */ /* Before entry, the incremented array X must contain the n */ /* element right-hand side vector b. On exit, X is overwritten */ /* with the solution vector x. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --x; --ap; /* Function Body */ info = 0; if (! PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(uplo, "L", ( ftnlen)1, (ftnlen)1)) { info = 1; } else if (! PASTEF770(lsame)(trans, "N", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(trans, "T", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(trans, "C", (ftnlen)1, ( ftnlen)1)) { info = 2; } else if (! PASTEF770(lsame)(diag, "U", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(diag, "N", (ftnlen)1, (ftnlen)1)) { info = 3; } else if (*n < 0) { info = 4; } else if (*incx == 0) { info = 7; } if (info != 0) { PASTEF770(xerbla)("STPSV ", &info, (ftnlen)6); return 0; } /* Quick return if possible. */ if (*n == 0) { return 0; } nounit = PASTEF770(lsame)(diag, "N", (ftnlen)1, (ftnlen)1); /* Set up the start point in X if the increment is not unity. This */ /* will be ( N - 1 )*INCX too small for descending loops. */ if (*incx <= 0) { kx = 1 - (*n - 1) * *incx; } else if (*incx != 1) { kx = 1; } /* Start the operations. In this version the elements of AP are */ /* accessed sequentially with one pass through AP. */ if (PASTEF770(lsame)(trans, "N", (ftnlen)1, (ftnlen)1)) { /* Form x := inv( A )*x. */ if (PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1)) { kk = *n * (*n + 1) / 2; if (*incx == 1) { for (j = *n; j >= 1; --j) { if (x[j] != 0.f) { if (nounit) { x[j] /= ap[kk]; } temp = x[j]; k = kk - 1; for (i__ = j - 1; i__ >= 1; --i__) { x[i__] -= temp * ap[k]; --k; /* L10: */ } } kk -= j; /* L20: */ } } else { jx = kx + (*n - 1) * *incx; for (j = *n; j >= 1; --j) { if (x[jx] != 0.f) { if (nounit) { x[jx] /= ap[kk]; } temp = x[jx]; ix = jx; i__1 = kk - j + 1; for (k = kk - 1; k >= i__1; --k) { ix -= *incx; x[ix] -= temp * ap[k]; /* L30: */ } } jx -= *incx; kk -= j; /* L40: */ } } } else { kk = 1; if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[j] != 0.f) { if (nounit) { x[j] /= ap[kk]; } temp = x[j]; k = kk + 1; i__2 = *n; for (i__ = j + 1; i__ <= i__2; ++i__) { x[i__] -= temp * ap[k]; ++k; /* L50: */ } } kk += *n - j + 1; /* L60: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { if (x[jx] != 0.f) { if (nounit) { x[jx] /= ap[kk]; } temp = x[jx]; ix = jx; i__2 = kk + *n - j; for (k = kk + 1; k <= i__2; ++k) { ix += *incx; x[ix] -= temp * ap[k]; /* L70: */ } } jx += *incx; kk += *n - j + 1; /* L80: */ } } } } else { /* Form x := inv( A' )*x. */ if (PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1)) { kk = 1; if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { temp = x[j]; k = kk; i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { temp -= ap[k] * x[i__]; ++k; /* L90: */ } if (nounit) { temp /= ap[kk + j - 1]; } x[j] = temp; kk += j; /* L100: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { temp = x[jx]; ix = kx; i__2 = kk + j - 2; for (k = kk; k <= i__2; ++k) { temp -= ap[k] * x[ix]; ix += *incx; /* L110: */ } if (nounit) { temp /= ap[kk + j - 1]; } x[jx] = temp; jx += *incx; kk += j; /* L120: */ } } } else { kk = *n * (*n + 1) / 2; if (*incx == 1) { for (j = *n; j >= 1; --j) { temp = x[j]; k = kk; i__1 = j + 1; for (i__ = *n; i__ >= i__1; --i__) { temp -= ap[k] * x[i__]; --k; /* L130: */ } if (nounit) { temp /= ap[kk - *n + j]; } x[j] = temp; kk -= *n - j + 1; /* L140: */ } } else { kx += (*n - 1) * *incx; jx = kx; for (j = *n; j >= 1; --j) { temp = x[jx]; ix = kx; i__1 = kk - (*n - (j + 1)); for (k = kk; k >= i__1; --k) { temp -= ap[k] * x[ix]; ix -= *incx; /* L150: */ } if (nounit) { temp /= ap[kk - *n + j]; } x[jx] = temp; jx -= *incx; kk -= *n - j + 1; /* L160: */ } } } } return 0; /* End of STPSV . */ } /* stpsv_ */ /* ztpsv.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ /* Subroutine */ int PASTEF77(z,tpsv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_dcomplex *ap, bla_dcomplex *x, const bla_integer *incx) { /* System generated locals */ bla_integer i__1, i__2, i__3, i__4, i__5; bla_dcomplex z__1, z__2, z__3; /* Builtin functions */ //void bla_z_div(bla_dcomplex *, bla_dcomplex *, bla_dcomplex *), bla_d_cnjg( // bla_dcomplex *, bla_dcomplex *); /* Local variables */ bla_integer info; bla_dcomplex temp; bla_integer i__, j, k; //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kk, ix, jx, kx = 0; //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); bla_logical noconj, nounit; /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* ZTPSV solves one of the systems of equations */ /* A*x = b, or A'*x = b, or conjg( A' )*x = b, */ /* where b and x are n element vectors and A is an n by n unit, or */ /* non-unit, upper or lower triangular matrix, supplied in packed form. */ /* No test for singularity or near-singularity is included in this */ /* routine. Such tests must be performed before calling this routine. */ /* Parameters */ /* ========== */ /* UPLO - CHARACTER*1. */ /* On entry, UPLO specifies whether the matrix is an upper or */ /* lower triangular matrix as follows: */ /* UPLO = 'U' or 'u' A is an upper triangular matrix. */ /* UPLO = 'L' or 'l' A is a lower triangular matrix. */ /* Unchanged on exit. */ /* TRANS - CHARACTER*1. */ /* On entry, TRANS specifies the equations to be solved as */ /* follows: */ /* TRANS = 'N' or 'n' A*x = b. */ /* TRANS = 'T' or 't' A'*x = b. */ /* TRANS = 'C' or 'c' conjg( A' )*x = b. */ /* Unchanged on exit. */ /* DIAG - CHARACTER*1. */ /* On entry, DIAG specifies whether or not A is unit */ /* triangular as follows: */ /* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ /* DIAG = 'N' or 'n' A is not assumed to be unit */ /* triangular. */ /* Unchanged on exit. */ /* N - INTEGER. */ /* On entry, N specifies the order of the matrix A. */ /* N must be at least zero. */ /* Unchanged on exit. */ /* AP - COMPLEX*16 array of DIMENSION at least */ /* ( ( n*( n + 1 ) )/2 ). */ /* Before entry with UPLO = 'U' or 'u', the array AP must */ /* contain the upper triangular matrix packed sequentially, */ /* column by column, so that AP( 1 ) contains a( 1, 1 ), */ /* AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) */ /* respectively, and so on. */ /* Before entry with UPLO = 'L' or 'l', the array AP must */ /* contain the lower triangular matrix packed sequentially, */ /* column by column, so that AP( 1 ) contains a( 1, 1 ), */ /* AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) */ /* respectively, and so on. */ /* Note that when DIAG = 'U' or 'u', the diagonal elements of */ /* A are not referenced, but are assumed to be unity. */ /* Unchanged on exit. */ /* X - COMPLEX*16 array of dimension at least */ /* ( 1 + ( n - 1 )*abs( INCX ) ). */ /* Before entry, the incremented array X must contain the n */ /* element right-hand side vector b. On exit, X is overwritten */ /* with the solution vector x. */ /* INCX - INTEGER. */ /* On entry, INCX specifies the increment for the elements of */ /* X. INCX must not be zero. */ /* Unchanged on exit. */ /* Level 2 Blas routine. */ /* -- Written on 22-October-1986. */ /* Jack Dongarra, Argonne National Lab. */ /* Jeremy Du Croz, Nag Central Office. */ /* Sven Hammarling, Nag Central Office. */ /* Richard Hanson, Sandia National Labs. */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. External Functions .. */ /* .. External Subroutines .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --x; --ap; /* Function Body */ info = 0; if (! PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(uplo, "L", ( ftnlen)1, (ftnlen)1)) { info = 1; } else if (! PASTEF770(lsame)(trans, "N", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(trans, "T", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(trans, "C", (ftnlen)1, ( ftnlen)1)) { info = 2; } else if (! PASTEF770(lsame)(diag, "U", (ftnlen)1, (ftnlen)1) && ! PASTEF770(lsame)(diag, "N", (ftnlen)1, (ftnlen)1)) { info = 3; } else if (*n < 0) { info = 4; } else if (*incx == 0) { info = 7; } if (info != 0) { PASTEF770(xerbla)("ZTPSV ", &info, (ftnlen)6); return 0; } /* Quick return if possible. */ if (*n == 0) { return 0; } noconj = PASTEF770(lsame)(trans, "T", (ftnlen)1, (ftnlen)1); nounit = PASTEF770(lsame)(diag, "N", (ftnlen)1, (ftnlen)1); /* Set up the start point in X if the increment is not unity. This */ /* will be ( N - 1 )*INCX too small for descending loops. */ if (*incx <= 0) { kx = 1 - (*n - 1) * *incx; } else if (*incx != 1) { kx = 1; } /* Start the operations. In this version the elements of AP are */ /* accessed sequentially with one pass through AP. */ if (PASTEF770(lsame)(trans, "N", (ftnlen)1, (ftnlen)1)) { /* Form x := inv( A )*x. */ if (PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1)) { kk = *n * (*n + 1) / 2; if (*incx == 1) { for (j = *n; j >= 1; --j) { i__1 = j; if (bli_zreal(x[i__1]) != 0. || bli_zimag(x[i__1]) != 0.) { if (nounit) { i__1 = j; bla_z_div(&z__1, &x[j], &ap[kk]); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__1] ); } i__1 = j; bli_zsets( (bli_zreal(x[i__1])), (bli_zimag(x[i__1])), temp ); k = kk - 1; for (i__ = j - 1; i__ >= 1; --i__) { i__1 = i__; i__2 = i__; i__3 = k; bli_zsets( (bli_zreal(temp) * bli_zreal(ap[i__3]) - bli_zimag(temp) * bli_zimag(ap[i__3])), (bli_zreal(temp) * bli_zimag(ap[i__3]) + bli_zimag(temp) * bli_zreal(ap[i__3])), z__2 ); bli_zsets( (bli_zreal(x[i__2]) - bli_zreal(z__2)), (bli_zimag(x[i__2]) - bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__1] ); --k; /* L10: */ } } kk -= j; /* L20: */ } } else { jx = kx + (*n - 1) * *incx; for (j = *n; j >= 1; --j) { i__1 = jx; if (bli_zreal(x[i__1]) != 0. || bli_zimag(x[i__1]) != 0.) { if (nounit) { i__1 = jx; bla_z_div(&z__1, &x[jx], &ap[kk]); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__1] ); } i__1 = jx; bli_zsets( (bli_zreal(x[i__1])), (bli_zimag(x[i__1])), temp ); ix = jx; i__1 = kk - j + 1; for (k = kk - 1; k >= i__1; --k) { ix -= *incx; i__2 = ix; i__3 = ix; i__4 = k; bli_zsets( (bli_zreal(temp) * bli_zreal(ap[i__4]) - bli_zimag(temp) * bli_zimag(ap[i__4])), (bli_zreal(temp) * bli_zimag(ap[i__4]) + bli_zimag(temp) * bli_zreal(ap[i__4])), z__2 ); bli_zsets( (bli_zreal(x[i__3]) - bli_zreal(z__2)), (bli_zimag(x[i__3]) - bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__2] ); /* L30: */ } } jx -= *incx; kk -= j; /* L40: */ } } } else { kk = 1; if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j; if (bli_zreal(x[i__2]) != 0. || bli_zimag(x[i__2]) != 0.) { if (nounit) { i__2 = j; bla_z_div(&z__1, &x[j], &ap[kk]); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__2] ); } i__2 = j; bli_zsets( (bli_zreal(x[i__2])), (bli_zimag(x[i__2])), temp ); k = kk + 1; i__2 = *n; for (i__ = j + 1; i__ <= i__2; ++i__) { i__3 = i__; i__4 = i__; i__5 = k; bli_zsets( (bli_zreal(temp) * bli_zreal(ap[i__5]) - bli_zimag(temp) * bli_zimag(ap[i__5])), (bli_zreal(temp) * bli_zimag(ap[i__5]) + bli_zimag(temp) * bli_zreal(ap[i__5])), z__2 ); bli_zsets( (bli_zreal(x[i__4]) - bli_zreal(z__2)), (bli_zimag(x[i__4]) - bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__3] ); ++k; /* L50: */ } } kk += *n - j + 1; /* L60: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = jx; if (bli_zreal(x[i__2]) != 0. || bli_zimag(x[i__2]) != 0.) { if (nounit) { i__2 = jx; bla_z_div(&z__1, &x[jx], &ap[kk]); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__2] ); } i__2 = jx; bli_zsets( (bli_zreal(x[i__2])), (bli_zimag(x[i__2])), temp ); ix = jx; i__2 = kk + *n - j; for (k = kk + 1; k <= i__2; ++k) { ix += *incx; i__3 = ix; i__4 = ix; i__5 = k; bli_zsets( (bli_zreal(temp) * bli_zreal(ap[i__5]) - bli_zimag(temp) * bli_zimag(ap[i__5])), (bli_zreal(temp) * bli_zimag(ap[i__5]) + bli_zimag(temp) * bli_zreal(ap[i__5])), z__2 ); bli_zsets( (bli_zreal(x[i__4]) - bli_zreal(z__2)), (bli_zimag(x[i__4]) - bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), x[i__3] ); /* L70: */ } } jx += *incx; kk += *n - j + 1; /* L80: */ } } } } else { /* Form x := inv( A' )*x or x := inv( conjg( A' ) )*x. */ if (PASTEF770(lsame)(uplo, "U", (ftnlen)1, (ftnlen)1)) { kk = 1; if (*incx == 1) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j; bli_zsets( (bli_zreal(x[i__2])), (bli_zimag(x[i__2])), temp ); k = kk; if (noconj) { i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { i__3 = k; i__4 = i__; bli_zsets( (bli_zreal(ap[i__3]) * bli_zreal(x[i__4]) - bli_zimag(ap[i__3]) * bli_zimag(x[i__4])), (bli_zreal(ap[i__3]) * bli_zimag(x[i__4]) + bli_zimag(ap[i__3]) * bli_zreal(x[i__4])), z__2 ); bli_zsets( (bli_zreal(temp) - bli_zreal(z__2)), (bli_zimag(temp) - bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); ++k; /* L90: */ } if (nounit) { bla_z_div(&z__1, &temp, &ap[kk + j - 1]); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } } else { i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { bla_d_cnjg(&z__3, &ap[k]); i__3 = i__; bli_zsets( (bli_zreal(z__3) * bli_zreal(x[i__3]) - bli_zimag(z__3) * bli_zimag(x[i__3])), (bli_zreal(z__3) * bli_zimag(x[i__3]) + bli_zimag(z__3) * bli_zreal(x[i__3])), z__2 ); bli_zsets( (bli_zreal(temp) - bli_zreal(z__2)), (bli_zimag(temp) - bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); ++k; /* L100: */ } if (nounit) { bla_d_cnjg(&z__2, &ap[kk + j - 1]); bla_z_div(&z__1, &temp, &z__2); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } } i__2 = j; bli_zsets( (bli_zreal(temp)), (bli_zimag(temp)), x[i__2] ); kk += j; /* L110: */ } } else { jx = kx; i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = jx; bli_zsets( (bli_zreal(x[i__2])), (bli_zimag(x[i__2])), temp ); ix = kx; if (noconj) { i__2 = kk + j - 2; for (k = kk; k <= i__2; ++k) { i__3 = k; i__4 = ix; bli_zsets( (bli_zreal(ap[i__3]) * bli_zreal(x[i__4]) - bli_zimag(ap[i__3]) * bli_zimag(x[i__4])), (bli_zreal(ap[i__3]) * bli_zimag(x[i__4]) + bli_zimag(ap[i__3]) * bli_zreal(x[i__4])), z__2 ); bli_zsets( (bli_zreal(temp) - bli_zreal(z__2)), (bli_zimag(temp) - bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); ix += *incx; /* L120: */ } if (nounit) { bla_z_div(&z__1, &temp, &ap[kk + j - 1]); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } } else { i__2 = kk + j - 2; for (k = kk; k <= i__2; ++k) { bla_d_cnjg(&z__3, &ap[k]); i__3 = ix; bli_zsets( (bli_zreal(z__3) * bli_zreal(x[i__3]) - bli_zimag(z__3) * bli_zimag(x[i__3])), (bli_zreal(z__3) * bli_zimag(x[i__3]) + bli_zimag(z__3) * bli_zreal(x[i__3])), z__2 ); bli_zsets( (bli_zreal(temp) - bli_zreal(z__2)), (bli_zimag(temp) - bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); ix += *incx; /* L130: */ } if (nounit) { bla_d_cnjg(&z__2, &ap[kk + j - 1]); bla_z_div(&z__1, &temp, &z__2); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } } i__2 = jx; bli_zsets( (bli_zreal(temp)), (bli_zimag(temp)), x[i__2] ); jx += *incx; kk += j; /* L140: */ } } } else { kk = *n * (*n + 1) / 2; if (*incx == 1) { for (j = *n; j >= 1; --j) { i__1 = j; bli_zsets( (bli_zreal(x[i__1])), (bli_zimag(x[i__1])), temp ); k = kk; if (noconj) { i__1 = j + 1; for (i__ = *n; i__ >= i__1; --i__) { i__2 = k; i__3 = i__; bli_zsets( (bli_zreal(ap[i__2]) * bli_zreal(x[i__3]) - bli_zimag(ap[i__2]) * bli_zimag(x[i__3])), (bli_zreal(ap[i__2]) * bli_zimag(x[i__3]) + bli_zimag(ap[i__2]) * bli_zreal(x[i__3])), z__2 ); bli_zsets( (bli_zreal(temp) - bli_zreal(z__2)), (bli_zimag(temp) - bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); --k; /* L150: */ } if (nounit) { bla_z_div(&z__1, &temp, &ap[kk - *n + j]); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } } else { i__1 = j + 1; for (i__ = *n; i__ >= i__1; --i__) { bla_d_cnjg(&z__3, &ap[k]); i__2 = i__; bli_zsets( (bli_zreal(z__3) * bli_zreal(x[i__2]) - bli_zimag(z__3) * bli_zimag(x[i__2])), (bli_zreal(z__3) * bli_zimag(x[i__2]) + bli_zimag(z__3) * bli_zreal(x[i__2])), z__2 ); bli_zsets( (bli_zreal(temp) - bli_zreal(z__2)), (bli_zimag(temp) - bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); --k; /* L160: */ } if (nounit) { bla_d_cnjg(&z__2, &ap[kk - *n + j]); bla_z_div(&z__1, &temp, &z__2); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } } i__1 = j; bli_zsets( (bli_zreal(temp)), (bli_zimag(temp)), x[i__1] ); kk -= *n - j + 1; /* L170: */ } } else { kx += (*n - 1) * *incx; jx = kx; for (j = *n; j >= 1; --j) { i__1 = jx; bli_zsets( (bli_zreal(x[i__1])), (bli_zimag(x[i__1])), temp ); ix = kx; if (noconj) { i__1 = kk - (*n - (j + 1)); for (k = kk; k >= i__1; --k) { i__2 = k; i__3 = ix; bli_zsets( (bli_zreal(ap[i__2]) * bli_zreal(x[i__3]) - bli_zimag(ap[i__2]) * bli_zimag(x[i__3])), (bli_zreal(ap[i__2]) * bli_zimag(x[i__3]) + bli_zimag(ap[i__2]) * bli_zreal(x[i__3])), z__2 ); bli_zsets( (bli_zreal(temp) - bli_zreal(z__2)), (bli_zimag(temp) - bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); ix -= *incx; /* L180: */ } if (nounit) { bla_z_div(&z__1, &temp, &ap[kk - *n + j]); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } } else { i__1 = kk - (*n - (j + 1)); for (k = kk; k >= i__1; --k) { bla_d_cnjg(&z__3, &ap[k]); i__2 = ix; bli_zsets( (bli_zreal(z__3) * bli_zreal(x[i__2]) - bli_zimag(z__3) * bli_zimag(x[i__2])), (bli_zreal(z__3) * bli_zimag(x[i__2]) + bli_zimag(z__3) * bli_zreal(x[i__2])), z__2 ); bli_zsets( (bli_zreal(temp) - bli_zreal(z__2)), (bli_zimag(temp) - bli_zimag(z__2)), z__1 ); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); ix -= *incx; /* L190: */ } if (nounit) { bla_d_cnjg(&z__2, &ap[kk - *n + j]); bla_z_div(&z__1, &temp, &z__2); bli_zsets( (bli_zreal(z__1)), (bli_zimag(z__1)), temp ); } } i__1 = jx; bli_zsets( (bli_zreal(temp)), (bli_zimag(temp)), x[i__1] ); jx -= *incx; kk -= *n - j + 1; /* L200: */ } } } } return 0; /* End of ZTPSV . */ } /* ztpsv_ */ #endif blis-0.6.1/frame/compat/f2c/bla_tpsv.h000066400000000000000000000047421360743507500175020ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef BLIS_ENABLE_BLAS BLIS_EXPORT_BLAS int PASTEF77(c,tpsv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_scomplex *ap, bla_scomplex *x, const bla_integer *incx); BLIS_EXPORT_BLAS int PASTEF77(d,tpsv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_double *ap, bla_double *x, const bla_integer *incx); BLIS_EXPORT_BLAS int PASTEF77(s,tpsv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_real *ap, bla_real *x, const bla_integer *incx); BLIS_EXPORT_BLAS int PASTEF77(z,tpsv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_dcomplex *ap, bla_dcomplex *x, const bla_integer *incx); #endif blis-0.6.1/frame/compat/f2c/bla_xerbla.c000066400000000000000000000062171360743507500177550ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #ifdef BLIS_ENABLE_BLAS /* xerbla.f -- translated by f2c (version 19991025). You must link the resulting object file with the libraries: -lf2c -lm (in that order) */ /* Table of constant values */ /* Subroutine */ int PASTEF770(xerbla)(const bla_character *srname, const bla_integer *info, ftnlen srname_len) { /* -- LAPACK auxiliary routine (preliminary version) -- */ /* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., */ /* Courant Institute, Argonne National Lab, and Rice University */ /* February 29, 1992 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* XERBLA is an error handler for the LAPACK routines. */ /* It is called by an LAPACK routine if an input parameter has an */ /* invalid value. A message is printed and execution stops. */ /* Installers may consider modifying the STOP statement in order to */ /* call system-specific exception-handling facilities. */ /* Arguments */ /* ========= */ /* SRNAME (input) CHARACTER*6 */ /* The name of the routine which called XERBLA. */ /* INFO (input) INTEGER */ /* The position of the invalid parameter in the parameter list */ /* of the calling routine. */ //int i; //for ( i = 0; i < srname_len; ++i ) // srname[i] = toupper( srname[i] ); printf("** On entry to %6s, parameter number %2i had an illegal value\n", srname, (int)*info); //bli_abort(); /* End of XERBLA */ return 0; } /* xerbla */ #endif blis-0.6.1/frame/compat/f2c/bla_xerbla.h000066400000000000000000000034331360743507500177570ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef BLIS_ENABLE_BLAS BLIS_EXPORT_BLAS int PASTEF770(xerbla)(const bla_character *srname, const bla_integer *info, ftnlen srname_len); #endif blis-0.6.1/frame/compat/f2c/util/000077500000000000000000000000001360743507500164655ustar00rootroot00000000000000blis-0.6.1/frame/compat/f2c/util/bla_c_abs.c000066400000000000000000000034741360743507500205260ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #ifdef BLIS_ENABLE_BLAS double bla_c_abs(const bla_scomplex *z) { return( bla_f__cabs( bli_creal( *z ), bli_cimag( *z ) ) ); } #endif blis-0.6.1/frame/compat/f2c/util/bla_c_abs.h000066400000000000000000000033241360743507500205250ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef BLIS_ENABLE_BLAS double bla_c_abs(const bla_scomplex *z); #endif blis-0.6.1/frame/compat/f2c/util/bla_c_div.c000066400000000000000000000035101360743507500205320ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #ifdef BLIS_ENABLE_BLAS void bla_c_div(bla_scomplex *cp, const bla_scomplex *ap, const bla_scomplex *bp) { bli_ccopys( *ap, *cp ); bli_cinvscals( *bp, *cp ); } #endif blis-0.6.1/frame/compat/f2c/util/bla_c_div.h000066400000000000000000000033751360743507500205500ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef BLIS_ENABLE_BLAS void bla_c_div(bla_scomplex *cp, const bla_scomplex *ap, const bla_scomplex *bp); #endif blis-0.6.1/frame/compat/f2c/util/bla_d_abs.c000066400000000000000000000034251360743507500205230ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #ifdef BLIS_ENABLE_BLAS double bla_d_abs(const bla_double *x) { if(*x >= 0.0) return(*x); return(- *x); } #endif blis-0.6.1/frame/compat/f2c/util/bla_d_abs.h000066400000000000000000000033221360743507500205240ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef BLIS_ENABLE_BLAS double bla_d_abs(const bla_double *x); #endif blis-0.6.1/frame/compat/f2c/util/bla_d_cnjg.c000066400000000000000000000034341360743507500206770ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #ifdef BLIS_ENABLE_BLAS void bla_d_cnjg(bla_dcomplex *dest, const bla_dcomplex *src) { bli_zcopyjs( *src, *dest ); } #endif blis-0.6.1/frame/compat/f2c/util/bla_d_cnjg.h000066400000000000000000000033511360743507500207020ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef BLIS_ENABLE_BLAS void bla_d_cnjg(bla_dcomplex *dest, const bla_dcomplex *src); #endif blis-0.6.1/frame/compat/f2c/util/bla_d_imag.c000066400000000000000000000034041360743507500206700ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #ifdef BLIS_ENABLE_BLAS double bla_d_imag(const bla_dcomplex *z) { return bli_zimag( *z ); } #endif blis-0.6.1/frame/compat/f2c/util/bla_d_imag.h000066400000000000000000000033251360743507500206770ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef BLIS_ENABLE_BLAS double bla_d_imag(const bla_dcomplex *z); #endif blis-0.6.1/frame/compat/f2c/util/bla_d_sign.c000066400000000000000000000035011360743507500207110ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #ifdef BLIS_ENABLE_BLAS double bla_d_sign(const bla_double *a, const bla_double *b) { double x = (*a >= 0.0 ? *a : - *a); return(*b >= 0.0 ? x : -x); } #endif blis-0.6.1/frame/compat/f2c/util/bla_d_sign.h000066400000000000000000000033501360743507500207200ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef BLIS_ENABLE_BLAS double bla_d_sign(const bla_double *a, const bla_double *b); #endif blis-0.6.1/frame/compat/f2c/util/bla_f__cabs.c000066400000000000000000000041041360743507500210220ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #ifdef BLIS_ENABLE_BLAS double bla_f__cabs(double real_val, double imag_val) { double temp; if(real_val < 0) real_val = -real_val; if(imag_val < 0) imag_val = -imag_val; if(imag_val > real_val) { temp = real_val; real_val = imag_val; imag_val = temp; } if((real_val+imag_val) == real_val) return(real_val); temp = imag_val/real_val; temp = real_val*sqrt(1.0 + temp*temp); return(temp); } #endif blis-0.6.1/frame/compat/f2c/util/bla_f__cabs.h000066400000000000000000000033311360743507500210300ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef BLIS_ENABLE_BLAS double bla_f__cabs(double real, double imag); #endif blis-0.6.1/frame/compat/f2c/util/bla_r_abs.c000066400000000000000000000034231360743507500205370ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #ifdef BLIS_ENABLE_BLAS double bla_r_abs(const bla_real *x) { if(*x >= 0.0) return(*x); return(- *x); } #endif blis-0.6.1/frame/compat/f2c/util/bla_r_abs.h000066400000000000000000000033201360743507500205400ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef BLIS_ENABLE_BLAS double bla_r_abs(const bla_real *x); #endif blis-0.6.1/frame/compat/f2c/util/bla_r_cnjg.c000066400000000000000000000034341360743507500207150ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #ifdef BLIS_ENABLE_BLAS void bla_r_cnjg(bla_scomplex *dest, const bla_scomplex *src) { bli_ccopyjs( *src, *dest ); } #endif blis-0.6.1/frame/compat/f2c/util/bla_r_cnjg.h000066400000000000000000000033511360743507500207200ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef BLIS_ENABLE_BLAS void bla_r_cnjg(bla_scomplex *dest, const bla_scomplex *src); #endif blis-0.6.1/frame/compat/f2c/util/bla_r_imag.c000066400000000000000000000034061360743507500207100ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #ifdef BLIS_ENABLE_BLAS bla_real bla_r_imag(const bla_scomplex *z) { return bli_cimag( *z ); } #endif blis-0.6.1/frame/compat/f2c/util/bla_r_imag.h000066400000000000000000000033271360743507500207170ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef BLIS_ENABLE_BLAS bla_real bla_r_imag(const bla_scomplex *z); #endif blis-0.6.1/frame/compat/f2c/util/bla_r_sign.c000066400000000000000000000034751360743507500207410ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #ifdef BLIS_ENABLE_BLAS double bla_r_sign(const bla_real *a, const bla_real *b) { double x = (*a >= 0.0 ? *a : - *a); return(*b >= 0.0 ? x : -x); } #endif blis-0.6.1/frame/compat/f2c/util/bla_r_sign.h000066400000000000000000000033441360743507500207410ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef BLIS_ENABLE_BLAS double bla_r_sign(const bla_real *a, const bla_real *b); #endif blis-0.6.1/frame/compat/f2c/util/bla_z_abs.c000066400000000000000000000034741360743507500205550ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #ifdef BLIS_ENABLE_BLAS double bla_z_abs(const bla_dcomplex *z) { return( bla_f__cabs( bli_zreal( *z ), bli_zimag( *z ) ) ); } #endif blis-0.6.1/frame/compat/f2c/util/bla_z_abs.h000066400000000000000000000033241360743507500205540ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef BLIS_ENABLE_BLAS double bla_z_abs(const bla_dcomplex *z); #endif blis-0.6.1/frame/compat/f2c/util/bla_z_div.c000066400000000000000000000035101360743507500205610ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #ifdef BLIS_ENABLE_BLAS void bla_z_div(bla_dcomplex *cp, const bla_dcomplex *ap, const bla_dcomplex *bp) { bli_zcopys( *ap, *cp ); bli_zinvscals( *bp, *cp ); } #endif blis-0.6.1/frame/compat/f2c/util/bla_z_div.h000066400000000000000000000033751360743507500205770ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef BLIS_ENABLE_BLAS void bla_z_div(bla_dcomplex *cp, const bla_dcomplex *ap, const bla_dcomplex *bp); #endif blis-0.6.1/frame/include/000077500000000000000000000000001360743507500151765ustar00rootroot00000000000000blis-0.6.1/frame/include/bli_arch_config.h000066400000000000000000000146671360743507500204550ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2016, Hewlett Packard Enterprise Development LP Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_ARCH_CONFIG_H #define BLIS_ARCH_CONFIG_H // // -- Context initialization prototypes ---------------------------------------- // // -- Intel64 architectures -- #ifdef BLIS_CONFIG_SKX CNTX_INIT_PROTS( skx ) #endif #ifdef BLIS_CONFIG_KNL CNTX_INIT_PROTS( knl ) #endif #ifdef BLIS_CONFIG_KNC CNTX_INIT_PROTS( knc ) #endif #ifdef BLIS_CONFIG_HASWELL CNTX_INIT_PROTS( haswell ) #endif #ifdef BLIS_CONFIG_SANDYBRIDGE CNTX_INIT_PROTS( sandybridge ) #endif #ifdef BLIS_CONFIG_PENRYN CNTX_INIT_PROTS( penryn ) #endif // -- AMD64 architectures -- #ifdef BLIS_CONFIG_ZEN2 CNTX_INIT_PROTS( zen2 ) #endif #ifdef BLIS_CONFIG_ZEN CNTX_INIT_PROTS( zen ) #endif #ifdef BLIS_CONFIG_EXCAVATOR CNTX_INIT_PROTS( excavator ) #endif #ifdef BLIS_CONFIG_STEAMROLLER CNTX_INIT_PROTS( steamroller ) #endif #ifdef BLIS_CONFIG_PILEDRIVER CNTX_INIT_PROTS( piledriver ) #endif #ifdef BLIS_CONFIG_BULLDOZER CNTX_INIT_PROTS( bulldozer ) #endif // -- ARM architectures -- #ifdef BLIS_CONFIG_THUNDERX2 CNTX_INIT_PROTS( thunderx2 ) #endif #ifdef BLIS_CONFIG_CORTEXA57 CNTX_INIT_PROTS( cortexa57 ) #endif #ifdef BLIS_CONFIG_CORTEXA53 CNTX_INIT_PROTS( cortexa53 ) #endif #ifdef BLIS_CONFIG_CORTEXA15 CNTX_INIT_PROTS( cortexa15 ) #endif #ifdef BLIS_CONFIG_CORTEXA9 CNTX_INIT_PROTS( cortexa9 ) #endif // -- IBM Power -- #ifdef BLIS_CONFIG_POWER9 CNTX_INIT_PROTS( power9 ) #endif #ifdef BLIS_CONFIG_POWER7 CNTX_INIT_PROTS( power7 ) #endif // -- IBM BG/Q -- #ifdef BLIS_CONFIG_BGQ CNTX_INIT_PROTS( bgq ) #endif // -- Generic -- #ifdef BLIS_CONFIG_GENERIC CNTX_INIT_PROTS( generic ) #endif // // -- Architecture family-specific headers ------------------------------------- // // -- x86_64 families -- #ifdef BLIS_FAMILY_INTEL64 #include "bli_family_intel64.h" #endif #ifdef BLIS_FAMILY_AMD64 #include "bli_family_amd64.h" #endif #ifdef BLIS_FAMILY_X86_64 #include "bli_family_x86_64.h" #endif // -- Intel64 architectures -- #ifdef BLIS_FAMILY_SKX #include "bli_family_skx.h" #endif #ifdef BLIS_FAMILY_KNL #include "bli_family_knl.h" #endif #ifdef BLIS_FAMILY_KNC #include "bli_family_knc.h" #endif #ifdef BLIS_FAMILY_HASWELL #include "bli_family_haswell.h" #endif #ifdef BLIS_FAMILY_SANDYBRIDGE #include "bli_family_sandybridge.h" #endif #ifdef BLIS_FAMILY_PENRYN #include "bli_family_penryn.h" #endif // -- AMD64 architectures -- #ifdef BLIS_FAMILY_ZEN2 #include "bli_family_zen2.h" #endif #ifdef BLIS_FAMILY_ZEN #include "bli_family_zen.h" #endif #ifdef BLIS_FAMILY_EXCAVATOR #include "bli_family_excavator.h" #endif #ifdef BLIS_FAMILY_STEAMROLLER #include "bli_family_steamroller.h" #endif #ifdef BLIS_FAMILY_PILEDRIVER #include "bli_family_piledriver.h" #endif #ifdef BLIS_FAMILY_BULLDOZER #include "bli_family_bulldozer.h" #endif // -- ARM architectures -- #ifdef BLIS_FAMILY_THUNDERX2 #include "bli_family_thunderx2.h" #endif #ifdef BLIS_FAMILY_CORTEXA57 #include "bli_family_cortexa57.h" #endif #ifdef BLIS_FAMILY_CORTEXA53 #include "bli_family_cortexa53.h" #endif #ifdef BLIS_FAMILY_CORTEXA15 #include "bli_family_cortexa15.h" #endif #ifdef BLIS_FAMILY_CORTEXA9 #include "bli_family_cortexa9.h" #endif // -- IBM Power -- #ifdef BLIS_FAMILY_POWER9 #include "bli_family_power9.h" #endif #ifdef BLIS_FAMILY_POWER7 #include "bli_family_power7.h" #endif // -- IBM BG/Q -- #ifdef BLIS_FAMILY_BGQ #include "bli_family_bgq.h" #endif // -- Generic -- #ifdef BLIS_FAMILY_GENERIC #include "bli_family_generic.h" #endif // // -- kernel set prototypes ---------------------------------------------------- // // -- Intel64 architectures -- #ifdef BLIS_KERNELS_SKX #include "bli_kernels_skx.h" #endif #ifdef BLIS_KERNELS_KNL #include "bli_kernels_knl.h" #endif #ifdef BLIS_KERNELS_KNC #include "bli_kernels_knc.h" #endif #ifdef BLIS_KERNELS_HASWELL #include "bli_kernels_haswell.h" #endif #ifdef BLIS_KERNELS_SANDYBRIDGE #include "bli_kernels_sandybridge.h" #endif #ifdef BLIS_KERNELS_PENRYN #include "bli_kernels_penryn.h" #endif // -- AMD64 architectures -- //#ifdef BLIS_KERNELS_ZEN2 //#include "bli_kernels_zen2.h" //#endif #ifdef BLIS_KERNELS_ZEN #include "bli_kernels_zen.h" #endif //#ifdef BLIS_KERNELS_EXCAVATOR //#include "bli_kernels_excavator.h" //#endif //#ifdef BLIS_KERNELS_STEAMROLLER //#include "bli_kernels_steamroller.h" //#endif #ifdef BLIS_KERNELS_PILEDRIVER #include "bli_kernels_piledriver.h" #endif #ifdef BLIS_KERNELS_BULLDOZER #include "bli_kernels_bulldozer.h" #endif // -- ARM architectures -- #ifdef BLIS_KERNELS_ARMV8A #include "bli_kernels_armv8a.h" #endif #ifdef BLIS_KERNELS_ARMV7A #include "bli_kernels_armv7a.h" #endif // -- IBM Power -- #ifdef BLIS_KERNELS_POWER9 #include "bli_kernels_power9.h" #endif #ifdef BLIS_KERNELS_POWER7 #include "bli_kernels_power7.h" #endif // -- IBM BG/Q -- #ifdef BLIS_KERNELS_BGQ #include "bli_kernels_bgq.h" #endif #endif blis-0.6.1/frame/include/bli_arch_config_pre.h000066400000000000000000000054151360743507500213120ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_ARCH_CONFIG_PRE_H #define BLIS_ARCH_CONFIG_PRE_H // -- Naming-related kernel definitions ---------------------------------------- // The default suffix appended to reference kernels. #define BLIS_REF_SUFFIX _ref // A suffix used for labeling certain induced method aware functions. #define BLIS_IND_SUFFIX _ind // Add an underscore to the BLIS kernel set string, if it was defined. #ifdef BLIS_CNAME #define BLIS_CNAME_INFIX PASTECH(_,BLIS_CNAME) #endif // Combine the CNAME and _ref for convenience to the code that defines // reference kernels. //#define BLIS_CNAME_REF_SUFFIX PASTECH2(_,BLIS_CNAME,BLIS_REF_SUFFIX) // -- Prototype-generating macro definitions ----------------------------------- // Prototype-generating macro for bli_cntx_init_*() functions. #define CNTX_INIT_PROTS( archname ) \ \ void PASTEMAC(cntx_init_,archname) \ ( \ cntx_t* cntx \ ); \ void PASTEMAC2(cntx_init_,archname,BLIS_REF_SUFFIX) \ ( \ cntx_t* cntx \ ); \ void PASTEMAC2(cntx_init_,archname,BLIS_IND_SUFFIX) \ ( \ ind_t method, \ num_t dt, \ cntx_t* cntx \ ); #endif blis-0.6.1/frame/include/bli_blas_macro_defs.h000066400000000000000000000061501360743507500213020ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_BLAS_MACRO_DEFS_H #define BLIS_BLAS_MACRO_DEFS_H // -- Various Fortran compatibility macros -- // Macro to treat negative dimensions as zero. #define bli_convert_blas_dim1( n_blas, n_blis )\ { \ if ( n_blas < 0 ) n_blis = ( dim_t )0; \ else n_blis = ( dim_t )n_blas; \ } // Macro to flip signs of increments if input increments are negative. #define bli_convert_blas_incv( n, x_blas, incx_blas, \ x_blis, incx_blis ) \ { \ if ( incx_blas < 0 ) \ { \ /* The semantics of negative stride in BLAS are that the vector operand be traversed in reverse order. (Another way to think of this is that negative strides effectively reverse the order of the vector, but without any explicit data movements.) This is also how BLIS interprets negative strides. The differences is that with BLAS, the caller *always* passes in the 0th (i.e., top-most or left-most) element of the vector, even when the stride is negative. By contrast, in BLIS, negative strides are used *relative* to the vector address as it is given. Thus, in BLIS, if this backwards traversal is desired, the caller *must* pass in the address to the (n-1)th (i.e., the bottom-most or right-most) element along with a negative stride. */ \ x_blis = (x_blas) + (n-1)*(-incx_blas); \ incx_blis = ( inc_t )(incx_blas); \ } \ else \ { \ x_blis = (x_blas); \ incx_blis = ( inc_t )(incx_blas); \ } \ } #endif blis-0.6.1/frame/include/bli_builtin_macro_defs.h000066400000000000000000000040041360743507500220230ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2019, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_BUILTIN_MACRO_DEFS_H #define BLIS_BUILTIN_MACRO_DEFS_H #if defined(__ICC) || defined(__INTEL_COMPILER) // icc #define bli_prefetch( addr, rw, loc ) #elif defined(__clang__) // clang #define bli_prefetch( addr, rw, loc ) #elif defined(__GNUC__) // gcc #define bli_prefetch( addr, rw, loc ) __builtin_prefetch( addr, rw, loc ); #endif #endif blis-0.6.1/frame/include/bli_complex_macro_defs.h000066400000000000000000000044411360743507500220310ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_COMPLEX_MACRO_DEFS_H #define BLIS_COMPLEX_MACRO_DEFS_H // -- Real and imaginary accessor macros -- #define bli_sreal( x ) ( x ) #define bli_simag( x ) ( 0.0F ) #define bli_dreal( x ) ( x ) #define bli_dimag( x ) ( 0.0 ) #ifndef BLIS_ENABLE_C99_COMPLEX #define bli_creal( x ) ( (x).real ) #define bli_cimag( x ) ( (x).imag ) #define bli_zreal( x ) ( (x).real ) #define bli_zimag( x ) ( (x).imag ) #else // ifdef BLIS_ENABLE_C99_COMPLEX #define bli_creal( x ) ( crealf(x) ) #define bli_cimag( x ) ( cimagf(x) ) #define bli_zreal( x ) ( creal(x) ) #define bli_zimag( x ) ( cimag(x) ) #endif // BLIS_ENABLE_C99_COMPLEX #endif blis-0.6.1/frame/include/bli_config_macro_defs.h000066400000000000000000000205461360743507500216330ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_CONFIG_MACRO_DEFS_H #define BLIS_CONFIG_MACRO_DEFS_H // -- INTEGER PROPERTIES ------------------------------------------------------- // The bit size of the integer type used to track values such as dimensions, // strides, diagonal offsets. A value of 32 results in BLIS using 32-bit signed // integers while 64 results in 64-bit integers. Any other value results in use // of the C99 type "long int". Note that this ONLY affects integers used // internally within BLIS as well as those exposed in the native BLAS-like BLIS // interface. #ifndef BLIS_INT_TYPE_SIZE #ifdef BLIS_ARCH_64 #define BLIS_INT_TYPE_SIZE 64 #else #define BLIS_INT_TYPE_SIZE 32 #endif #endif // -- FLOATING-POINT PROPERTIES ------------------------------------------------ // Enable use of built-in C99 "float complex" and "double complex" types and // associated overloaded operations and functions? Disabling results in // scomplex and dcomplex being defined in terms of simple structs. // NOTE: AVOID USING THIS FEATURE. IT IS PROBABLY BROKEN. #ifdef BLIS_ENABLE_C99_COMPLEX // No additional definitions needed. #else // Default behavior is disabled. #endif // -- MULTITHREADING ----------------------------------------------------------- // Enable multithreading via POSIX threads. #ifdef BLIS_ENABLE_PTHREADS // No additional definitions needed. #else // Default behavior is disabled. #endif // Enable multithreading via OpenMP. #ifdef BLIS_ENABLE_OPENMP // No additional definitions needed. #else // Default behavior is disabled. #endif // Perform a sanity check to make sure the user doesn't try to enable // both OpenMP and pthreads. #if defined ( BLIS_ENABLE_OPENMP ) && \ defined ( BLIS_ENABLE_PTHREADS ) #error "BLIS_ENABLE_OPENMP and BLIS_ENABLE_PTHREADS may not be simultaneously defined." #endif // Here, we define BLIS_ENABLE_MULTITHREADING if either OpenMP // or pthreads are enabled. This macro is useful in situations when // we want to detect use of either OpenMP or pthreads (as opposed // to neither being used). #if defined ( BLIS_ENABLE_OPENMP ) || \ defined ( BLIS_ENABLE_PTHREADS ) #define BLIS_ENABLE_MULTITHREADING #endif // -- MIXED DATATYPE SUPPORT --------------------------------------------------- // Enable mixed datatype support? #ifdef BLIS_DISABLE_MIXED_DT #undef BLIS_ENABLE_GEMM_MD #else // Default behavior is enabled. #define BLIS_ENABLE_GEMM_MD #endif // Enable memory-intensive optimizations for mixed datatype support? #ifdef BLIS_DISABLE_MIXED_DT_EXTRA_MEM #undef BLIS_ENABLE_GEMM_MD_EXTRA_MEM #else // Default behavior is enabled. #define BLIS_ENABLE_GEMM_MD_EXTRA_MEM #endif // -- MISCELLANEOUS OPTIONS ---------------------------------------------------- // Do NOT require the cross-blocksize constraints. That is, do not enforce // MC % NR = 0 and NC % MR = 0 in bli_kernel_macro_defs.h. These are ONLY // needed when implementing trsm_r by allowing the right-hand matrix B to // be triangular. #ifndef BLIS_RELAX_MCNR_NCMR_CONSTRAINTS #define BLIS_RELAX_MCNR_NCMR_CONSTRAINTS #endif // Stay initialized after auto-initialization, unless and until the user // explicitly calls bli_finalize(). #ifdef BLIS_DISABLE_STAY_AUTO_INITIALIZED #undef BLIS_ENABLE_STAY_AUTO_INITIALIZED #else // Default behavior is enabled. #undef BLIS_ENABLE_STAY_AUTO_INITIALIZED // In case user explicitly enabled. #define BLIS_ENABLE_STAY_AUTO_INITIALIZED #endif // -- BLAS COMPATIBILITY LAYER ------------------------------------------------- // Enable the BLAS compatibility layer? #ifdef BLIS_DISABLE_BLAS #undef BLIS_ENABLE_BLAS #else // Default behavior is enabled. #undef BLIS_ENABLE_BLAS // In case user explicitly enabled. #define BLIS_ENABLE_BLAS #endif // The bit size of the integer type used to track values such as dimensions and // leading dimensions (ie: column strides) within the BLAS compatibility layer. // A value of 32 results in the compatibility layer using 32-bit signed integers // while 64 results in 64-bit integers. Any other value results in use of the // C99 type "long int". Note that this ONLY affects integers used within the // BLAS compatibility layer. #ifndef BLIS_BLAS_INT_TYPE_SIZE #define BLIS_BLAS_INT_TYPE_SIZE 32 #endif // By default, the level-3 BLAS routines are implemented by directly calling // the BLIS object API. Alternatively, they may first call the typed BLIS // API, which will then call the object API. //#define BLIS_BLAS3_CALLS_TAPI #ifdef BLIS_BLAS3_CALLS_TAPI #undef BLIS_BLAS3_CALLS_OAPI #else // Default behavior is to call object API directly. #undef BLIS_BLAS3_CALLS_OAPI // In case user explicitly enabled. #define BLIS_BLAS3_CALLS_OAPI #endif // -- CBLAS COMPATIBILITY LAYER ------------------------------------------------ // Enable the CBLAS compatibility layer? // NOTE: Enabling CBLAS will automatically enable the BLAS compatibility layer // regardless of whether or not it was explicitly enabled above. Furthermore, // the CBLAS compatibility layer will use the integer type size definition // specified above when defining the size of its own integers (regardless of // whether the BLAS layer was enabled directly or indirectly). #ifdef BLIS_ENABLE_CBLAS // No additional definitions needed. #else // Default behavior is disabled. #endif // -- SHARED LIBRARY SYMBOL EXPORT --------------------------------------------- // When building shared libraries, we can control which symbols are exported for // linking by external applications. BLIS annotates all function prototypes that // are meant to be "public" with BLIS_EXPORT_BLIS (with BLIS_EXPORT_BLAS playing // a similar role for BLAS compatibility routines). Which symbols are exported // is controlled by the default symbol visibility, as specifed by the gcc option // -fvisibility=[default|hidden]. The default for this option is 'default', or, // "public", which, if allowed to stand, causes all symbols in BLIS to be // linkable from the outside. But when compiling with -fvisibility=hidden, all // symbols start out hidden (that is, restricted only for internal use by BLIS), // with that setting overridden only for function prototypes or variable // declarations that are annotated with BLIS_EXPORT_BLIS. #ifndef BLIS_EXPORT #if !defined(BLIS_ENABLE_SHARED) #define BLIS_EXPORT #else #if defined(_WIN32) || defined(__CYGWIN__) #ifdef BLIS_IS_BUILDING_LIBRARY #define BLIS_EXPORT __declspec(dllexport) #else #define BLIS_EXPORT __declspec(dllimport) #endif #elif defined(__GNUC__) && __GNUC__ >= 4 #define BLIS_EXPORT __attribute__ ((visibility ("default"))) #else #define BLIS_EXPORT #endif #endif #endif #define BLIS_EXPORT_BLIS BLIS_EXPORT #define BLIS_EXPORT_BLAS BLIS_EXPORT #endif blis-0.6.1/frame/include/bli_error_macro_defs.h000066400000000000000000000040511360743507500215100ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_ERROR_MACRO_DEFS_H #define BLIS_ERROR_MACRO_DEFS_H // -- Error-related macros -- // Used to determine the size of the array of error strings. #define BLIS_MAX_NUM_ERR_MSGS 200 #define BLIS_MAX_ERR_MSG_LENGTH 200 // Used to insert filenames and line numbers into error-checking code. #define bli_check_error_code( code ) \ bli_check_error_code_helper( code, __FILE__, __LINE__ ) #endif blis-0.6.1/frame/include/bli_extern_defs.h000066400000000000000000000042611360743507500205060ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_EXTERN_DEFS_H #define BLIS_EXTERN_DEFS_H BLIS_EXPORT_BLIS extern obj_t BLIS_TWO; BLIS_EXPORT_BLIS extern obj_t BLIS_ONE; //BLIS_EXPORT_BLIS extern obj_t BLIS_ONE_HALF; BLIS_EXPORT_BLIS extern obj_t BLIS_ZERO; //BLIS_EXPORT_BLIS extern obj_t BLIS_MINUS_ONE_HALF; BLIS_EXPORT_BLIS extern obj_t BLIS_MINUS_ONE; BLIS_EXPORT_BLIS extern obj_t BLIS_MINUS_TWO; BLIS_EXPORT_BLIS extern thrcomm_t BLIS_SINGLE_COMM; BLIS_EXPORT_BLIS extern thrinfo_t BLIS_PACKM_SINGLE_THREADED; BLIS_EXPORT_BLIS extern thrinfo_t BLIS_GEMM_SINGLE_THREADED; #endif blis-0.6.1/frame/include/bli_f2c.h000066400000000000000000000043751360743507500166600ustar00rootroot00000000000000// f2c.h -- Standard Fortran to C header file // barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." // - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) #ifndef BLIS_F2C_H #define BLIS_F2C_H typedef f77_int bla_integer; typedef f77_char bla_character; //typedef char *address; //typedef short int shortint; typedef float bla_real; typedef double bla_double; typedef scomplex bla_scomplex; typedef dcomplex bla_dcomplex; typedef f77_int bla_logical; //typedef short int shortlogical; //typedef char logical1; //typedef char integer1; #ifdef INTEGER_STAR_8 // Adjust for integer*8. typedef long long longint; // system-dependent typedef unsigned long long ulongint; // system-dependent #define qbit_clear(a,b) ((a) & ~((ulongint)1 << (b))) #define qbit_set(a,b) ((a) | ((ulongint)1 << (b))) #endif #ifndef TRUE_ #define TRUE_ (1) #endif #ifndef FALSE_ #define FALSE_ (0) #endif // Extern is for use with -E #ifndef Extern #define Extern extern #endif // I/O stuff #ifdef f2c_i2 // for -i2 //typedef short flag; //typedef short ftnlen; typedef bla_integer ftnlen; //typedef short ftnint; #else //typedef long int flag; //typedef long int ftnlen; typedef bla_integer ftnlen; //typedef long int ftnint; #endif #ifndef VOID #define VOID void #endif #ifndef f2c_abs #define f2c_abs(x) ((x) >= 0 ? (x) : -(x)) #endif #ifndef f2c_dabs #define f2c_dabs(x) (doublereal)f2c_abs(x) #endif #ifndef f2c_min #define f2c_min(a,b) ((a) <= (b) ? (a) : (b)) #endif #ifndef f2c_max #define f2c_max(a,b) ((a) >= (b) ? (a) : (b)) #endif #ifndef f2c_dmin #define f2c_dmin(a,b) (doublereal)f2c_min(a,b) #endif #ifndef f2c_dmax #define f2c_dmax(a,b) (doublereal)f2c_max(a,b) #endif #ifndef bit_test #define bit_test(a,b) ((a) >> (b) & 1) #endif #ifndef bit_clear #define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) #endif #ifndef bit_set #define bit_set(a,b) ((a) | ((uinteger)1 << (b))) #endif // undef any lower-case symbols that your C compiler predefines, e.g.: #ifndef Skip_f2c_Undefs #undef cray #undef gcos #undef mc68010 #undef mc68020 #undef mips #undef pdp11 #undef sgi #undef sparc #undef sun #undef sun2 #undef sun3 #undef sun4 #undef u370 #undef u3b #undef u3b2 #undef u3b5 #undef unix #undef vax #endif #endif blis-0.6.1/frame/include/bli_genarray_macro_defs.h000066400000000000000000000244421360743507500221750ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_GENARRAY_MACRO_DEFS_H #define BLIS_GENARRAY_MACRO_DEFS_H // -- Macros to generate function arrays --------------------------------------- // -- "Smart" one-operand macro -- #define GENARRAY_FPA(tname,opname) \ \ static tname PASTECH(opname,_fpa)[BLIS_NUM_FP_TYPES] = \ { \ ( tname )PASTEMAC(s,opname), \ ( tname )PASTEMAC(c,opname), \ ( tname )PASTEMAC(d,opname), \ ( tname )PASTEMAC(z,opname) \ } // -- "Smart" one-operand macro (with integer support) -- #define GENARRAY_FPA_I(tname,opname) \ \ static tname PASTECH(opname,_fpa)[BLIS_NUM_FP_TYPES+1] = \ { \ ( tname )PASTEMAC(s,opname), \ ( tname )PASTEMAC(c,opname), \ ( tname )PASTEMAC(d,opname), \ ( tname )PASTEMAC(z,opname), \ ( tname )PASTEMAC(i,opname) \ } // -- "Smart" two-operand macro -- #define GENARRAY_FPA2(tname,op) \ \ static tname PASTECH(op,_fpa2)[BLIS_NUM_FP_TYPES][BLIS_NUM_FP_TYPES] = \ { \ { ( tname )PASTEMAC2(s,s,op), ( tname )PASTEMAC2(s,c,op), ( tname )PASTEMAC2(s,d,op), ( tname )PASTEMAC2(s,z,op) }, \ { ( tname )PASTEMAC2(c,s,op), ( tname )PASTEMAC2(c,c,op), ( tname )PASTEMAC2(c,d,op), ( tname )PASTEMAC2(c,z,op) }, \ { ( tname )PASTEMAC2(d,s,op), ( tname )PASTEMAC2(d,c,op), ( tname )PASTEMAC2(d,d,op), ( tname )PASTEMAC2(d,z,op) }, \ { ( tname )PASTEMAC2(z,s,op), ( tname )PASTEMAC2(z,c,op), ( tname )PASTEMAC2(z,d,op), ( tname )PASTEMAC2(z,z,op) } \ } // -- "Smart" two-operand macro -- /* #define GENARRAY2_VFP(arrayname,op) \ \ arrayname[BLIS_NUM_FP_TYPES][BLIS_NUM_FP_TYPES] = \ { \ { PASTEMAC2(s,s,op), PASTEMAC2(s,c,op), PASTEMAC2(s,d,op), PASTEMAC2(s,z,op) }, \ { PASTEMAC2(c,s,op), PASTEMAC2(c,c,op), PASTEMAC2(c,d,op), PASTEMAC2(c,z,op) }, \ { PASTEMAC2(d,s,op), PASTEMAC2(d,c,op), PASTEMAC2(d,d,op), PASTEMAC2(d,z,op) }, \ { PASTEMAC2(z,s,op), PASTEMAC2(z,c,op), PASTEMAC2(z,d,op), PASTEMAC2(z,z,op) } \ } */ // -- One-operand macro -- #define GENARRAY(arrayname,op) \ \ arrayname[BLIS_NUM_FP_TYPES] = \ { \ PASTEMAC(s,op), \ PASTEMAC(c,op), \ PASTEMAC(d,op), \ PASTEMAC(z,op) \ } #define GENARRAY_I(arrayname,op) \ \ arrayname[BLIS_NUM_FP_TYPES+1] = \ { \ PASTEMAC(s,op), \ PASTEMAC(c,op), \ PASTEMAC(d,op), \ PASTEMAC(z,op), \ PASTEMAC(i,op) \ } /* #define GENARRAYR(arrayname,op) \ \ arrayname[BLIS_NUM_FP_TYPES][BLIS_NUM_FP_TYPES] = \ { \ { PASTEMAC2(s,s,op), NULL, PASTEMAC2(s,d,op), NULL, }, \ { PASTEMAC2(c,s,op), NULL, PASTEMAC2(c,d,op), NULL, }, \ { PASTEMAC2(d,s,op), NULL, PASTEMAC2(d,d,op), NULL, }, \ { PASTEMAC2(z,s,op), NULL, PASTEMAC2(z,d,op), NULL, } \ } */ // -- Two-operand macros -- #define GENARRAY2_ALL(arrayname,op) \ \ arrayname[BLIS_NUM_FP_TYPES][BLIS_NUM_FP_TYPES] = \ { \ { PASTEMAC2(s,s,op), PASTEMAC2(s,c,op), PASTEMAC2(s,d,op), PASTEMAC2(s,z,op) }, \ { PASTEMAC2(c,s,op), PASTEMAC2(c,c,op), PASTEMAC2(c,d,op), PASTEMAC2(c,z,op) }, \ { PASTEMAC2(d,s,op), PASTEMAC2(d,c,op), PASTEMAC2(d,d,op), PASTEMAC2(d,z,op) }, \ { PASTEMAC2(z,s,op), PASTEMAC2(z,c,op), PASTEMAC2(z,d,op), PASTEMAC2(z,z,op) } \ } #define GENARRAY2_EXT(arrayname,op) \ \ arrayname[BLIS_NUM_FP_TYPES][BLIS_NUM_FP_TYPES] = \ { \ { PASTEMAC2(s,s,op), PASTEMAC2(s,c,op), NULL, NULL, }, \ { PASTEMAC2(c,s,op), PASTEMAC2(c,c,op), NULL, NULL, }, \ { NULL, NULL, PASTEMAC2(d,d,op), PASTEMAC2(d,z,op) }, \ { NULL, NULL, PASTEMAC2(z,d,op), PASTEMAC2(z,z,op) } \ } #define GENARRAY2_MIN(arrayname,op) \ \ arrayname[BLIS_NUM_FP_TYPES][BLIS_NUM_FP_TYPES] = \ { \ { PASTEMAC2(s,s,op), NULL, NULL, NULL, }, \ { NULL, PASTEMAC2(c,c,op), NULL, NULL, }, \ { NULL, NULL, PASTEMAC2(d,d,op), NULL, }, \ { NULL, NULL, NULL, PASTEMAC2(z,z,op) } \ } // -- Three-operand macros -- #define GENARRAY3_ALL(arrayname,op) \ \ arrayname[BLIS_NUM_FP_TYPES][BLIS_NUM_FP_TYPES][BLIS_NUM_FP_TYPES] = \ { \ { \ { PASTEMAC3(s,s,s,op), PASTEMAC3(s,s,c,op), PASTEMAC3(s,s,d,op), PASTEMAC3(s,s,z,op) }, \ { PASTEMAC3(s,c,s,op), PASTEMAC3(s,c,c,op), PASTEMAC3(s,c,d,op), PASTEMAC3(s,c,z,op) }, \ { PASTEMAC3(s,d,s,op), PASTEMAC3(s,d,c,op), PASTEMAC3(s,d,d,op), PASTEMAC3(s,d,z,op) }, \ { PASTEMAC3(s,z,s,op), PASTEMAC3(s,z,c,op), PASTEMAC3(s,z,d,op), PASTEMAC3(s,z,z,op) } \ }, \ { \ { PASTEMAC3(c,s,s,op), PASTEMAC3(c,s,c,op), PASTEMAC3(c,s,d,op), PASTEMAC3(c,s,z,op) }, \ { PASTEMAC3(c,c,s,op), PASTEMAC3(c,c,c,op), PASTEMAC3(c,c,d,op), PASTEMAC3(c,c,z,op) }, \ { PASTEMAC3(c,d,s,op), PASTEMAC3(c,d,c,op), PASTEMAC3(c,d,d,op), PASTEMAC3(c,d,z,op) }, \ { PASTEMAC3(c,z,s,op), PASTEMAC3(c,z,c,op), PASTEMAC3(c,z,d,op), PASTEMAC3(c,z,z,op) } \ }, \ { \ { PASTEMAC3(d,s,s,op), PASTEMAC3(d,s,c,op), PASTEMAC3(d,s,d,op), PASTEMAC3(d,s,z,op) }, \ { PASTEMAC3(d,c,s,op), PASTEMAC3(d,c,c,op), PASTEMAC3(d,c,d,op), PASTEMAC3(d,c,z,op) }, \ { PASTEMAC3(d,d,s,op), PASTEMAC3(d,d,c,op), PASTEMAC3(d,d,d,op), PASTEMAC3(d,d,z,op) }, \ { PASTEMAC3(d,z,s,op), PASTEMAC3(d,z,c,op), PASTEMAC3(d,z,d,op), PASTEMAC3(d,z,z,op) } \ }, \ { \ { PASTEMAC3(z,s,s,op), PASTEMAC3(z,s,c,op), PASTEMAC3(z,s,d,op), PASTEMAC3(z,s,z,op) }, \ { PASTEMAC3(z,c,s,op), PASTEMAC3(z,c,c,op), PASTEMAC3(z,c,d,op), PASTEMAC3(z,c,z,op) }, \ { PASTEMAC3(z,d,s,op), PASTEMAC3(z,d,c,op), PASTEMAC3(z,d,d,op), PASTEMAC3(z,d,z,op) }, \ { PASTEMAC3(z,z,s,op), PASTEMAC3(z,z,c,op), PASTEMAC3(z,z,d,op), PASTEMAC3(z,z,z,op) } \ } \ } #define GENARRAY3_EXT(arrayname,op) \ \ arrayname[BLIS_NUM_FP_TYPES][BLIS_NUM_FP_TYPES][BLIS_NUM_FP_TYPES] = \ { \ { \ { PASTEMAC3(s,s,s,op), PASTEMAC3(s,s,c,op), NULL, NULL, }, \ { PASTEMAC3(s,c,s,op), PASTEMAC3(s,c,c,op), NULL, NULL, }, \ { NULL, NULL, NULL, NULL, }, \ { NULL, NULL, NULL, NULL, } \ }, \ { \ { PASTEMAC3(c,s,s,op), PASTEMAC3(c,s,c,op), NULL, NULL, }, \ { PASTEMAC3(c,c,s,op), PASTEMAC3(c,c,c,op), NULL, NULL, }, \ { NULL, NULL, NULL, NULL, }, \ { NULL, NULL, NULL, NULL, } \ }, \ { \ { NULL, NULL, NULL, NULL, }, \ { NULL, NULL, NULL, NULL, }, \ { NULL, NULL, PASTEMAC3(d,d,d,op), PASTEMAC3(d,d,z,op) }, \ { NULL, NULL, PASTEMAC3(d,z,d,op), PASTEMAC3(d,z,z,op) } \ }, \ { \ { NULL, NULL, NULL, NULL, }, \ { NULL, NULL, NULL, NULL, }, \ { NULL, NULL, PASTEMAC3(z,d,d,op), PASTEMAC3(z,d,z,op) }, \ { NULL, NULL, PASTEMAC3(z,z,d,op), PASTEMAC3(z,z,z,op) } \ } \ } #define GENARRAY3_MIN(arrayname,op) \ \ arrayname[BLIS_NUM_FP_TYPES][BLIS_NUM_FP_TYPES][BLIS_NUM_FP_TYPES] = \ { \ { \ { PASTEMAC3(s,s,s,op), NULL, NULL, NULL, }, \ { NULL, NULL, NULL, NULL, }, \ { NULL, NULL, NULL, NULL, }, \ { NULL, NULL, NULL, NULL, } \ }, \ { \ { NULL, NULL, NULL, NULL, }, \ { NULL, PASTEMAC3(c,c,c,op), NULL, NULL, }, \ { NULL, NULL, NULL, NULL, }, \ { NULL, NULL, NULL, NULL, } \ }, \ { \ { NULL, NULL, NULL, NULL, }, \ { NULL, NULL, NULL, NULL, }, \ { NULL, NULL, PASTEMAC3(d,d,d,op), NULL, }, \ { NULL, NULL, NULL, NULL, } \ }, \ { \ { NULL, NULL, NULL, NULL, }, \ { NULL, NULL, NULL, NULL, }, \ { NULL, NULL, NULL, NULL, }, \ { NULL, NULL, NULL, PASTEMAC3(z,z,z,op) } \ } \ } #endif blis-0.6.1/frame/include/bli_gentdef_macro_defs.h000066400000000000000000000054671360743507500220070ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_GENTDEF_MACRO_DEFS_H #define BLIS_GENTDEF_MACRO_DEFS_H // // -- MACROS TO INSERT TYPEDEF-GENERATING MACROS ------------------------------- // // -- function typedef macro (both typed and void) -- #define INSERT_GENTDEF( opname ) \ \ GENTDEF( float, s, opname, _ft ) \ GENTDEF( double, d, opname, _ft ) \ GENTDEF( scomplex, c, opname, _ft ) \ GENTDEF( dcomplex, z, opname, _ft ) \ \ GENTDEF( void, s, opname, _vft ) \ GENTDEF( void, d, opname, _vft ) \ GENTDEF( void, c, opname, _vft ) \ GENTDEF( void, z, opname, _vft ) \ \ GENTDEF( void, , opname, _vft ) // -- function typedef macro (both typed and void) with real projection -- #define INSERT_GENTDEFR( opname ) \ \ GENTDEFR( float, float, s, s, opname, _ft ) \ GENTDEFR( double, double, d, d, opname, _ft ) \ GENTDEFR( scomplex, float, c, s, opname, _ft ) \ GENTDEFR( dcomplex, double, z, d, opname, _ft ) \ \ GENTDEFR( void, void, s, s, opname, _vft ) \ GENTDEFR( void, void, d, d, opname, _vft ) \ GENTDEFR( void, void, c, s, opname, _vft ) \ GENTDEFR( void, void, z, d, opname, _vft ) \ \ GENTDEFR( void, void, , , opname, _vft ) #endif blis-0.6.1/frame/include/bli_gentfunc_macro_defs.h000066400000000000000000001514501360743507500221760ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_GENTFUNC_MACRO_DEFS_H #define BLIS_GENTFUNC_MACRO_DEFS_H // // -- MACROS TO INSERT FUNCTION-GENERATING MACROS ------------------------------ // // -- Macros for generating BLAS routines -------------------------------------- // -- Basic one-operand macro -- #define INSERT_GENTFUNC_BLAS( blasname, blisname ) \ \ GENTFUNC( float, s, blasname, blisname ) \ GENTFUNC( double, d, blasname, blisname ) \ GENTFUNC( scomplex, c, blasname, blisname ) \ GENTFUNC( dcomplex, z, blasname, blisname ) // -- Basic one-operand macro with real domain only -- #define INSERT_GENTFUNCRO_BLAS( blasname, blisname ) \ \ GENTFUNCRO( float, s, blasname, blisname ) \ GENTFUNCRO( double, d, blasname, blisname ) // -- Basic one-operand macro with complex domain only and real projection -- #define INSERT_GENTFUNCCO_BLAS( blasname, blisname ) \ \ GENTFUNCCO( scomplex, float, c, s, blasname, blisname ) \ GENTFUNCCO( dcomplex, double, z, d, blasname, blisname ) // -- Basic one-operand macro with conjugation (used only for dot, ger) -- #define INSERT_GENTFUNCDOT_BLAS( blasname, blisname ) \ \ GENTFUNCDOT( float, s, , BLIS_NO_CONJUGATE, blasname, blisname ) \ GENTFUNCDOT( double, d, , BLIS_NO_CONJUGATE, blasname, blisname ) \ GENTFUNCDOT( scomplex, c, c, BLIS_CONJUGATE, blasname, blisname ) \ GENTFUNCDOT( scomplex, c, u, BLIS_NO_CONJUGATE, blasname, blisname ) \ GENTFUNCDOT( dcomplex, z, c, BLIS_CONJUGATE, blasname, blisname ) \ GENTFUNCDOT( dcomplex, z, u, BLIS_NO_CONJUGATE, blasname, blisname ) // -- Basic one-operand macro with real projection -- #define INSERT_GENTFUNCR_BLAS( rblasname, cblasname, blisname ) \ \ GENTFUNCR( float, float, s, s, rblasname, blisname ) \ GENTFUNCR( double, double, d, d, rblasname, blisname ) \ GENTFUNCR( scomplex, float, c, s, cblasname, blisname ) \ GENTFUNCR( dcomplex, double, z, d, cblasname, blisname ) // -- Alternate two-operand macro (one char for complex, one for real proj) -- #define INSERT_GENTFUNCR2_BLAS( blasname, blisname ) \ \ GENTFUNCR2( float, float, s, , blasname, blisname ) \ GENTFUNCR2( double, double, d, , blasname, blisname ) \ GENTFUNCR2( scomplex, float, c, s, blasname, blisname ) \ GENTFUNCR2( dcomplex, double, z, d, blasname, blisname ) // -- Extended two-operand macro (used only for scal) -- #define INSERT_GENTFUNCSCAL_BLAS( blasname, blisname ) \ \ GENTFUNCSCAL( float, float, s, , blasname, blisname ) \ GENTFUNCSCAL( double, double, d, , blasname, blisname ) \ GENTFUNCSCAL( scomplex, scomplex, c, , blasname, blisname ) \ GENTFUNCSCAL( dcomplex, dcomplex, z, , blasname, blisname ) \ GENTFUNCSCAL( scomplex, float, c, s, blasname, blisname ) \ GENTFUNCSCAL( dcomplex, double, z, d, blasname, blisname ) // -- Macros for functions with one operand ------------------------------------ // -- Basic one-operand macro -- // -- (no auxiliary arguments) -- #define INSERT_GENTFUNC_BASIC0( tfuncname ) \ \ GENTFUNC( float, s, tfuncname ) \ GENTFUNC( double, d, tfuncname ) \ GENTFUNC( scomplex, c, tfuncname ) \ GENTFUNC( dcomplex, z, tfuncname ) // -- (one auxiliary argument) -- #define INSERT_GENTFUNC_BASIC( tfuncname, varname ) \ \ GENTFUNC( float, s, tfuncname, varname ) \ GENTFUNC( double, d, tfuncname, varname ) \ GENTFUNC( scomplex, c, tfuncname, varname ) \ GENTFUNC( dcomplex, z, tfuncname, varname ) // -- (two auxiliary arguments) -- #define INSERT_GENTFUNC_BASIC2( tfuncname, varname1, varname2 ) \ \ GENTFUNC( float, s, tfuncname, varname1, varname2 ) \ GENTFUNC( double, d, tfuncname, varname1, varname2 ) \ GENTFUNC( scomplex, c, tfuncname, varname1, varname2 ) \ GENTFUNC( dcomplex, z, tfuncname, varname1, varname2 ) // -- (three auxiliary arguments) -- #define INSERT_GENTFUNC_BASIC3( tfuncname, varname1, varname2, varname3 ) \ \ GENTFUNC( float, s, tfuncname, varname1, varname2, varname3 ) \ GENTFUNC( double, d, tfuncname, varname1, varname2, varname3 ) \ GENTFUNC( scomplex, c, tfuncname, varname1, varname2, varname3 ) \ GENTFUNC( dcomplex, z, tfuncname, varname1, varname2, varname3 ) // -- (four auxiliary arguments) -- #define INSERT_GENTFUNC_BASIC4( tfuncname, varname1, varname2, varname3, varname4 ) \ \ GENTFUNC( float, s, tfuncname, varname1, varname2, varname3, varname4 ) \ GENTFUNC( double, d, tfuncname, varname1, varname2, varname3, varname4 ) \ GENTFUNC( scomplex, c, tfuncname, varname1, varname2, varname3, varname4 ) \ GENTFUNC( dcomplex, z, tfuncname, varname1, varname2, varname3, varname4 ) // -- Basic one-operand with real projection -- // -- (no auxiliary arguments) -- #define INSERT_GENTFUNCR_BASIC0( tfuncname ) \ \ GENTFUNCR( float, float, s, s, tfuncname ) \ GENTFUNCR( double, double, d, d, tfuncname ) \ GENTFUNCR( scomplex, float, c, s, tfuncname ) \ GENTFUNCR( dcomplex, double, z, d, tfuncname ) // -- (one auxiliary argument) -- #define INSERT_GENTFUNCR_BASIC( tfuncname, varname ) \ \ GENTFUNCR( float, float, s, s, tfuncname, varname ) \ GENTFUNCR( double, double, d, d, tfuncname, varname ) \ GENTFUNCR( scomplex, float, c, s, tfuncname, varname ) \ GENTFUNCR( dcomplex, double, z, d, tfuncname, varname ) // -- (two auxiliary arguments) -- #define INSERT_GENTFUNCR_BASIC2( tfuncname, varname1, varname2 ) \ \ GENTFUNCR( float, float, s, s, tfuncname, varname1, varname2 ) \ GENTFUNCR( double, double, d, d, tfuncname, varname1, varname2 ) \ GENTFUNCR( scomplex, float, c, s, tfuncname, varname1, varname2 ) \ GENTFUNCR( dcomplex, double, z, d, tfuncname, varname1, varname2 ) // -- (three auxiliary arguments) -- #define INSERT_GENTFUNCR_BASIC3( tfuncname, varname1, varname2, varname3 ) \ \ GENTFUNCR( float, float, s, s, tfuncname, varname1, varname2, varname3 ) \ GENTFUNCR( double, double, d, d, tfuncname, varname1, varname2, varname3 ) \ GENTFUNCR( scomplex, float, c, s, tfuncname, varname1, varname2, varname3 ) \ GENTFUNCR( dcomplex, double, z, d, tfuncname, varname1, varname2, varname3 ) // -- (four auxiliary arguments) -- #define INSERT_GENTFUNCR_BASIC4( tfuncname, varname1, varname2, varname3, varname4 ) \ \ GENTFUNCR( float, float, s, s, tfuncname, varname1, varname2, varname3, varname4 ) \ GENTFUNCR( double, double, d, d, tfuncname, varname1, varname2, varname3, varname4 ) \ GENTFUNCR( scomplex, float, c, s, tfuncname, varname1, varname2, varname3, varname4 ) \ GENTFUNCR( dcomplex, double, z, d, tfuncname, varname1, varname2, varname3, varname4 ) // -- Basic one-operand macro with real domain only -- // -- (no auxiliary arguments) -- #define INSERT_GENTFUNCRO_BASIC0( tfuncname ) \ \ GENTFUNCRO( float, s, tfuncname ) \ GENTFUNCRO( double, d, tfuncname ) \ // -- (one auxiliary argument) -- #define INSERT_GENTFUNCRO_BASIC( tfuncname, varname ) \ \ GENTFUNCRO( float, s, tfuncname, varname ) \ GENTFUNCRO( double, d, tfuncname, varname ) \ // -- Basic one-operand macro with complex domain only and real projection -- // -- (no auxiliary arguments) -- #define INSERT_GENTFUNCCO_BASIC0( tfuncname ) \ \ GENTFUNCCO( scomplex, float, c, s, tfuncname ) \ GENTFUNCCO( dcomplex, double, z, d, tfuncname ) // -- (one auxiliary argument) -- #define INSERT_GENTFUNCCO_BASIC( tfuncname, varname ) \ \ GENTFUNCCO( scomplex, float, c, s, tfuncname, varname ) \ GENTFUNCCO( dcomplex, double, z, d, tfuncname, varname ) // -- (two auxiliary arguments) -- #define INSERT_GENTFUNCCO_BASIC2( tfuncname, varname1, varname2 ) \ \ GENTFUNCCO( scomplex, float, c, s, tfuncname, varname1, varname2 ) \ GENTFUNCCO( dcomplex, double, z, d, tfuncname, varname1, varname2 ) // -- (three auxiliary arguments) -- #define INSERT_GENTFUNCCO_BASIC3( tfuncname, varname1, varname2, varname3 ) \ \ GENTFUNCCO( scomplex, float, c, s, tfuncname, varname1, varname2, varname3 ) \ GENTFUNCCO( dcomplex, double, z, d, tfuncname, varname1, varname2, varname3 ) // -- Basic one-operand macro with integer instance -- // -- (no auxiliary arguments) -- #define INSERT_GENTFUNC_BASIC0_I( tfuncname ) \ \ GENTFUNC( float, s, tfuncname ) \ GENTFUNC( double, d, tfuncname ) \ GENTFUNC( scomplex, c, tfuncname ) \ GENTFUNC( dcomplex, z, tfuncname ) \ GENTFUNC( gint_t, i, tfuncname ) // -- (one auxiliary argument) -- #define INSERT_GENTFUNC_BASIC_I( tfuncname, varname ) \ \ GENTFUNC( float, s, tfuncname, varname ) \ GENTFUNC( double, d, tfuncname, varname ) \ GENTFUNC( scomplex, c, tfuncname, varname ) \ GENTFUNC( dcomplex, z, tfuncname, varname ) \ GENTFUNC( gint_t, i, tfuncname, varname ) // -- Basic one-operand with integer projection -- // -- (no auxiliary arguments) -- #define INSERT_GENTFUNCI_BASIC0( tfuncname ) \ \ GENTFUNCI( float, gint_t, s, i, tfuncname ) \ GENTFUNCI( double, gint_t, d, i, tfuncname ) \ GENTFUNCI( scomplex, gint_t, c, i, tfuncname ) \ GENTFUNCI( dcomplex, gint_t, z, i, tfuncname ) // -- (one auxiliary argument) -- #define INSERT_GENTFUNCI_BASIC( tfuncname, varname ) \ \ GENTFUNCI( float, gint_t, s, i, tfuncname, varname ) \ GENTFUNCI( double, gint_t, d, i, tfuncname, varname ) \ GENTFUNCI( scomplex, gint_t, c, i, tfuncname, varname ) \ GENTFUNCI( dcomplex, gint_t, z, i, tfuncname, varname ) // -- Basic one-operand with real and integer projections -- // -- (no auxiliary arguments) -- #define INSERT_GENTFUNCRI_BASIC0( tfuncname ) \ \ GENTFUNCRI( float, float, gint_t, s, s, i, tfuncname ) \ GENTFUNCRI( double, double, gint_t, d, d, i, tfuncname ) \ GENTFUNCRI( scomplex, float, gint_t, c, s, i, tfuncname ) \ GENTFUNCRI( dcomplex, double, gint_t, z, d, i, tfuncname ) // -- Macros for functions with two primary operands --------------------------- // -- Basic two-operand macro -- // -- (no auxiliary arguments) -- #define INSERT_GENTFUNC2_BASIC0( tfuncname ) \ \ GENTFUNC2( float, float, s, s, tfuncname ) \ GENTFUNC2( double, double, d, d, tfuncname ) \ GENTFUNC2( scomplex, scomplex, c, c, tfuncname ) \ GENTFUNC2( dcomplex, dcomplex, z, z, tfuncname ) // -- (one auxiliary argument) -- #define INSERT_GENTFUNC2_BASIC( tfuncname, varname ) \ \ GENTFUNC2( float, float, s, s, tfuncname, varname ) \ GENTFUNC2( double, double, d, d, tfuncname, varname ) \ GENTFUNC2( scomplex, scomplex, c, c, tfuncname, varname ) \ GENTFUNC2( dcomplex, dcomplex, z, z, tfuncname, varname ) // -- Mixed domain two-operand macro -- // -- (no auxiliary arguments) -- #define INSERT_GENTFUNC2_MIX_D0( tfuncname ) \ \ GENTFUNC2( float, scomplex, s, c, tfuncname ) \ GENTFUNC2( scomplex, float, c, s, tfuncname ) \ \ GENTFUNC2( double, dcomplex, d, z, tfuncname ) \ GENTFUNC2( dcomplex, double, z, d, tfuncname ) // -- (one auxiliary argument) -- #define INSERT_GENTFUNC2_MIX_D( tfuncname, varname ) \ \ GENTFUNC2( float, scomplex, s, c, tfuncname, varname ) \ GENTFUNC2( scomplex, float, c, s, tfuncname, varname ) \ \ GENTFUNC2( double, dcomplex, d, z, tfuncname, varname ) \ GENTFUNC2( dcomplex, double, z, d, tfuncname, varname ) // -- Mixed precision two-operand macro -- // -- (no auxiliary arguments) -- #define INSERT_GENTFUNC2_MIX_P0( tfuncname ) \ \ GENTFUNC2( float, double, s, d, tfuncname ) \ GENTFUNC2( float, dcomplex, s, z, tfuncname ) \ \ GENTFUNC2( double, float, d, s, tfuncname ) \ GENTFUNC2( double, scomplex, d, c, tfuncname ) \ \ GENTFUNC2( scomplex, double, c, d, tfuncname ) \ GENTFUNC2( scomplex, dcomplex, c, z, tfuncname ) \ \ GENTFUNC2( dcomplex, float, z, s, tfuncname ) \ GENTFUNC2( dcomplex, scomplex, z, c, tfuncname ) \ // -- (one auxiliary argument) -- #define INSERT_GENTFUNC2_MIX_P( tfuncname, varname ) \ \ GENTFUNC2( float, double, s, d, tfuncname, varname ) \ GENTFUNC2( float, dcomplex, s, z, tfuncname, varname ) \ \ GENTFUNC2( double, float, d, s, tfuncname, varname ) \ GENTFUNC2( double, scomplex, d, c, tfuncname, varname ) \ \ GENTFUNC2( scomplex, double, c, d, tfuncname, varname ) \ GENTFUNC2( scomplex, dcomplex, c, z, tfuncname, varname ) \ \ GENTFUNC2( dcomplex, float, z, s, tfuncname, varname ) \ GENTFUNC2( dcomplex, scomplex, z, c, tfuncname, varname ) \ // -- Mixed domain/precision (all) two-operand macro -- // -- (no auxiliary arguments) -- #define INSERT_GENTFUNC2_MIXDP0( tfuncname ) \ \ GENTFUNC2( float, double, s, d, tfuncname ) \ GENTFUNC2( float, scomplex, s, c, tfuncname ) \ GENTFUNC2( float, dcomplex, s, z, tfuncname ) \ \ GENTFUNC2( double, float, d, s, tfuncname ) \ GENTFUNC2( double, scomplex, d, c, tfuncname ) \ GENTFUNC2( double, dcomplex, d, z, tfuncname ) \ \ GENTFUNC2( scomplex, float, c, s, tfuncname ) \ GENTFUNC2( scomplex, double, c, d, tfuncname ) \ GENTFUNC2( scomplex, dcomplex, c, z, tfuncname ) \ \ GENTFUNC2( dcomplex, float, z, s, tfuncname ) \ GENTFUNC2( dcomplex, double, z, d, tfuncname ) \ GENTFUNC2( dcomplex, scomplex, z, c, tfuncname ) // -- (one auxiliary argument) -- #define INSERT_GENTFUNC2_MIX_DP( tfuncname, varname ) \ \ GENTFUNC2( float, double, s, d, tfuncname, varname ) \ GENTFUNC2( float, scomplex, s, c, tfuncname, varname ) \ GENTFUNC2( float, dcomplex, s, z, tfuncname, varname ) \ \ GENTFUNC2( double, float, d, s, tfuncname, varname ) \ GENTFUNC2( double, scomplex, d, c, tfuncname, varname ) \ GENTFUNC2( double, dcomplex, d, z, tfuncname, varname ) \ \ GENTFUNC2( scomplex, float, c, s, tfuncname, varname ) \ GENTFUNC2( scomplex, double, c, d, tfuncname, varname ) \ GENTFUNC2( scomplex, dcomplex, c, z, tfuncname, varname ) \ \ GENTFUNC2( dcomplex, float, z, s, tfuncname, varname ) \ GENTFUNC2( dcomplex, double, z, d, tfuncname, varname ) \ GENTFUNC2( dcomplex, scomplex, z, c, tfuncname, varname ) // -- Basic two-operand with real projection of second operand -- // -- (no auxiliary arguments) -- #define INSERT_GENTFUNC2R_BASIC0( tfuncname ) \ \ GENTFUNC2R( float, float, float, s, s, s, tfuncname ) \ GENTFUNC2R( double, double, double, d, d, d, tfuncname ) \ GENTFUNC2R( scomplex, scomplex, float, c, c, s, tfuncname ) \ GENTFUNC2R( dcomplex, dcomplex, double, z, z, d, tfuncname ) // -- (one auxiliary argument) -- #define INSERT_GENTFUNC2R_BASIC( tfuncname, varname ) \ \ GENTFUNC2R( float, float, float, s, s, s, tfuncname, varname ) \ GENTFUNC2R( double, double, double, d, d, d, tfuncname, varname ) \ GENTFUNC2R( scomplex, scomplex, float, c, c, s, tfuncname, varname ) \ GENTFUNC2R( dcomplex, dcomplex, double, z, z, d, tfuncname, varname ) // -- Mixed domain two-operand with real projection of second operand -- // -- (no auxiliary arguments) -- #define INSERT_GENTFUNC2R_MIX_D0( tfuncname ) \ \ GENTFUNC2R( float, scomplex, float, s, c, s, tfuncname ) \ GENTFUNC2R( scomplex, float, float, c, s, s, tfuncname ) \ \ GENTFUNC2R( double, dcomplex, double, d, z, d, tfuncname ) \ GENTFUNC2R( dcomplex, double, double, z, d, d, tfuncname ) // -- (one auxiliary argument) -- #define INSERT_GENTFUNC2R_MIX_D( tfuncname, varname ) \ \ GENTFUNC2R( float, scomplex, float, s, c, s, tfuncname, varname ) \ GENTFUNC2R( scomplex, float, float, c, s, s, tfuncname, varname ) \ \ GENTFUNC2R( double, dcomplex, double, d, z, d, tfuncname, varname ) \ GENTFUNC2R( dcomplex, double, double, z, d, d, tfuncname, varname ) // -- Mixed precision two-operand with real projection of second operand -- // -- (no auxiliary arguments) -- #define INSERT_GENTFUNC2R_MIX_P0( tfuncname ) \ \ GENTFUNC2R( float, double, double, s, d, d, tfuncname ) \ GENTFUNC2R( float, dcomplex, double, s, z, d, tfuncname ) \ \ GENTFUNC2R( double, float, float, d, s, s, tfuncname ) \ GENTFUNC2R( double, scomplex, float, d, c, s, tfuncname ) \ \ GENTFUNC2R( scomplex, double, double, c, d, d, tfuncname ) \ GENTFUNC2R( scomplex, dcomplex, double, c, z, d, tfuncname ) \ \ GENTFUNC2R( dcomplex, float, float, z, s, s, tfuncname ) \ GENTFUNC2R( dcomplex, scomplex, float, z, c, s, tfuncname ) // -- (one auxiliary argument) -- #define INSERT_GENTFUNC2R_MIX_P( tfuncname, varname ) \ \ GENTFUNC2R( float, double, double, s, d, d, tfuncname, varname ) \ GENTFUNC2R( float, dcomplex, double, s, z, d, tfuncname, varname ) \ \ GENTFUNC2R( double, float, float, d, s, s, tfuncname, varname ) \ GENTFUNC2R( double, scomplex, float, d, c, s, tfuncname, varname ) \ \ GENTFUNC2R( scomplex, double, double, c, d, d, tfuncname, varname ) \ GENTFUNC2R( scomplex, dcomplex, double, c, z, d, tfuncname, varname ) \ \ GENTFUNC2R( dcomplex, float, float, z, s, s, tfuncname, varname ) \ GENTFUNC2R( dcomplex, scomplex, float, z, c, s, tfuncname, varname ) // -- Mixed domain/precision (all) two-operand macro with real projection of second operand -- // -- (no auxiliary arguments) -- #define INSERT_GENTFUNC2R_MIXDP0( tfuncname ) \ \ GENTFUNC2R( float, double, double, s, d, d, tfuncname ) \ GENTFUNC2R( float, scomplex, float, s, c, s, tfuncname ) \ GENTFUNC2R( float, dcomplex, double, s, z, d, tfuncname ) \ \ GENTFUNC2R( double, float, float, d, s, s, tfuncname ) \ GENTFUNC2R( double, scomplex, float, d, c, s, tfuncname ) \ GENTFUNC2R( double, dcomplex, double, d, z, d, tfuncname ) \ \ GENTFUNC2R( scomplex, float, float, c, s, s, tfuncname ) \ GENTFUNC2R( scomplex, double, double, c, d, d, tfuncname ) \ GENTFUNC2R( scomplex, dcomplex, double, c, z, d, tfuncname ) \ \ GENTFUNC2R( dcomplex, float, float, z, s, s, tfuncname ) \ GENTFUNC2R( dcomplex, double, double, z, d, d, tfuncname ) \ GENTFUNC2R( dcomplex, scomplex, float, z, c, s, tfuncname ) \ // -- (one auxiliary argument) -- #define INSERT_GENTFUNC2R_MIX_DP( tfuncname, varname ) \ \ GENTFUNC2R( float, double, double, s, d, d, tfuncname, varname ) \ GENTFUNC2R( float, scomplex, float, s, c, s, tfuncname, varname ) \ GENTFUNC2R( float, dcomplex, double, s, z, d, tfuncname, varname ) \ \ GENTFUNC2R( double, float, float, d, s, s, tfuncname, varname ) \ GENTFUNC2R( double, scomplex, float, d, c, s, tfuncname, varname ) \ GENTFUNC2R( double, dcomplex, double, d, z, d, tfuncname, varname ) \ \ GENTFUNC2R( scomplex, float, float, c, s, s, tfuncname, varname ) \ GENTFUNC2R( scomplex, double, double, c, d, d, tfuncname, varname ) \ GENTFUNC2R( scomplex, dcomplex, double, c, z, d, tfuncname, varname ) \ \ GENTFUNC2R( dcomplex, float, float, z, s, s, tfuncname, varname ) \ GENTFUNC2R( dcomplex, double, double, z, d, d, tfuncname, varname ) \ GENTFUNC2R( dcomplex, scomplex, float, z, c, s, tfuncname, varname ) \ // -- Macros for functions with three primary operands ------------------------- // -- Basic three-operand macro -- // -- (no auxiliary arguments) -- #define INSERT_GENTFUNC3_BASIC0( tfuncname ) \ \ GENTFUNC3( float, float, float, s, s, s, tfuncname ) \ GENTFUNC3( double, double, double, d, d, d, tfuncname ) \ GENTFUNC3( scomplex, scomplex, scomplex, c, c, c, tfuncname ) \ GENTFUNC3( dcomplex, dcomplex, dcomplex, z, z, z, tfuncname ) // -- (one auxiliary argument) -- #define INSERT_GENTFUNC3_BASIC( tfuncname, varname ) \ \ GENTFUNC3( float, float, float, s, s, s, tfuncname, varname ) \ GENTFUNC3( double, double, double, d, d, d, tfuncname, varname ) \ GENTFUNC3( scomplex, scomplex, scomplex, c, c, c, tfuncname, varname ) \ GENTFUNC3( dcomplex, dcomplex, dcomplex, z, z, z, tfuncname, varname ) // -- (two auxiliary arguments) -- #define INSERT_GENTFUNC3_BASIC2( tfuncname, varname1, varname2 ) \ \ GENTFUNC3( float, float, float, s, s, s, tfuncname, varname1, varname2 ) \ GENTFUNC3( double, double, double, d, d, d, tfuncname, varname1, varname2 ) \ GENTFUNC3( scomplex, scomplex, scomplex, c, c, c, tfuncname, varname1, varname2 ) \ GENTFUNC3( dcomplex, dcomplex, dcomplex, z, z, z, tfuncname, varname1, varname2 ) // -- Mixed domain three-operand macro -- // -- (no auxiliary arguments) -- #define INSERT_GENTFUNC3_MIX_D0( tfuncname ) \ \ GENTFUNC3( float, float, scomplex, s, s, c, tfuncname ) \ GENTFUNC3( float, scomplex, float, s, c, s, tfuncname ) \ GENTFUNC3( float, scomplex, scomplex, s, c, c, tfuncname ) \ \ GENTFUNC3( double, double, dcomplex, d, d, z, tfuncname ) \ GENTFUNC3( double, dcomplex, double, d, z, d, tfuncname ) \ GENTFUNC3( double, dcomplex, dcomplex, d, z, z, tfuncname ) \ \ GENTFUNC3( scomplex, float, float, c, s, s, tfuncname ) \ GENTFUNC3( scomplex, float, scomplex, c, s, c, tfuncname ) \ GENTFUNC3( scomplex, scomplex, float, c, c, s, tfuncname ) \ \ GENTFUNC3( dcomplex, double, double, z, d, d, tfuncname ) \ GENTFUNC3( dcomplex, double, dcomplex, z, d, z, tfuncname ) \ GENTFUNC3( dcomplex, dcomplex, double, z, z, d, tfuncname ) // -- (one auxiliary argument) -- #define INSERT_GENTFUNC3_MIX_D( tfuncname, varname ) \ \ GENTFUNC3( float, float, scomplex, s, s, c, tfuncname, varname ) \ GENTFUNC3( float, scomplex, float, s, c, s, tfuncname, varname ) \ GENTFUNC3( float, scomplex, scomplex, s, c, c, tfuncname, varname ) \ \ GENTFUNC3( double, double, dcomplex, d, d, z, tfuncname, varname ) \ GENTFUNC3( double, dcomplex, double, d, z, d, tfuncname, varname ) \ GENTFUNC3( double, dcomplex, dcomplex, d, z, z, tfuncname, varname ) \ \ GENTFUNC3( scomplex, float, float, c, s, s, tfuncname, varname ) \ GENTFUNC3( scomplex, float, scomplex, c, s, c, tfuncname, varname ) \ GENTFUNC3( scomplex, scomplex, float, c, c, s, tfuncname, varname ) \ \ GENTFUNC3( dcomplex, double, double, z, d, d, tfuncname, varname ) \ GENTFUNC3( dcomplex, double, dcomplex, z, d, z, tfuncname, varname ) \ GENTFUNC3( dcomplex, dcomplex, double, z, z, d, tfuncname, varname ) // -- (two auxiliary arguments) -- #define INSERT_GENTFUNC3_MIX_D2( tfuncname, varname1, varname2 ) \ \ GENTFUNC3( float, float, scomplex, s, s, c, tfuncname, varname1, varname2 ) \ GENTFUNC3( float, scomplex, float, s, c, s, tfuncname, varname1, varname2 ) \ GENTFUNC3( float, scomplex, scomplex, s, c, c, tfuncname, varname1, varname2 ) \ \ GENTFUNC3( double, double, dcomplex, d, d, z, tfuncname, varname1, varname2 ) \ GENTFUNC3( double, dcomplex, double, d, z, d, tfuncname, varname1, varname2 ) \ GENTFUNC3( double, dcomplex, dcomplex, d, z, z, tfuncname, varname1, varname2 ) \ \ GENTFUNC3( scomplex, float, float, c, s, s, tfuncname, varname1, varname2 ) \ GENTFUNC3( scomplex, float, scomplex, c, s, c, tfuncname, varname1, varname2 ) \ GENTFUNC3( scomplex, scomplex, float, c, c, s, tfuncname, varname1, varname2 ) \ \ GENTFUNC3( dcomplex, double, double, z, d, d, tfuncname, varname1, varname2 ) \ GENTFUNC3( dcomplex, double, dcomplex, z, d, z, tfuncname, varname1, varname2 ) \ GENTFUNC3( dcomplex, dcomplex, double, z, z, d, tfuncname, varname1, varname2 ) // -- Mixed precision three-operand macro -- // -- (no auxiliary arguments) -- #define INSERT_GENTFUNC3_MIX_P0( tfuncname ) \ \ GENTFUNC3( float, float, double, s, s, d, tfuncname ) \ GENTFUNC3( float, float, dcomplex, s, s, z, tfuncname ) \ \ GENTFUNC3( float, double, float, s, d, s, tfuncname ) \ GENTFUNC3( float, double, double, s, d, d, tfuncname ) \ GENTFUNC3( float, double, scomplex, s, d, c, tfuncname ) \ GENTFUNC3( float, double, dcomplex, s, d, z, tfuncname ) \ \ GENTFUNC3( float, scomplex, double, s, c, d, tfuncname ) \ GENTFUNC3( float, scomplex, dcomplex, s, c, z, tfuncname ) \ \ GENTFUNC3( float, dcomplex, float, s, z, s, tfuncname ) \ GENTFUNC3( float, dcomplex, double, s, z, d, tfuncname ) \ GENTFUNC3( float, dcomplex, scomplex, s, z, c, tfuncname ) \ GENTFUNC3( float, dcomplex, dcomplex, s, z, z, tfuncname ) \ \ \ GENTFUNC3( double, float, float, d, s, s, tfuncname ) \ GENTFUNC3( double, float, double, d, s, d, tfuncname ) \ GENTFUNC3( double, float, scomplex, d, s, c, tfuncname ) \ GENTFUNC3( double, float, dcomplex, d, s, z, tfuncname ) \ \ GENTFUNC3( double, double, float, d, d, s, tfuncname ) \ GENTFUNC3( double, double, scomplex, d, d, c, tfuncname ) \ \ GENTFUNC3( double, scomplex, float, d, c, s, tfuncname ) \ GENTFUNC3( double, scomplex, double, d, c, d, tfuncname ) \ GENTFUNC3( double, scomplex, scomplex, d, c, c, tfuncname ) \ GENTFUNC3( double, scomplex, dcomplex, d, c, z, tfuncname ) \ \ GENTFUNC3( double, dcomplex, float, d, z, s, tfuncname ) \ GENTFUNC3( double, dcomplex, scomplex, d, z, c, tfuncname ) \ \ \ GENTFUNC3( scomplex, float, double, c, s, d, tfuncname ) \ GENTFUNC3( scomplex, float, dcomplex, c, s, z, tfuncname ) \ \ GENTFUNC3( scomplex, double, float, c, d, s, tfuncname ) \ GENTFUNC3( scomplex, double, double, c, d, d, tfuncname ) \ GENTFUNC3( scomplex, double, scomplex, c, d, c, tfuncname ) \ GENTFUNC3( scomplex, double, dcomplex, c, d, z, tfuncname ) \ \ GENTFUNC3( scomplex, scomplex, double, c, c, d, tfuncname ) \ GENTFUNC3( scomplex, scomplex, dcomplex, c, c, z, tfuncname ) \ \ GENTFUNC3( scomplex, dcomplex, float, c, z, s, tfuncname ) \ GENTFUNC3( scomplex, dcomplex, double, c, z, d, tfuncname ) \ GENTFUNC3( scomplex, dcomplex, scomplex, c, z, c, tfuncname ) \ GENTFUNC3( scomplex, dcomplex, dcomplex, c, z, z, tfuncname ) \ \ \ GENTFUNC3( dcomplex, float, float, z, s, s, tfuncname ) \ GENTFUNC3( dcomplex, float, double, z, s, d, tfuncname ) \ GENTFUNC3( dcomplex, float, scomplex, z, s, c, tfuncname ) \ GENTFUNC3( dcomplex, float, dcomplex, z, s, z, tfuncname ) \ \ GENTFUNC3( dcomplex, double, float, z, d, s, tfuncname ) \ GENTFUNC3( dcomplex, double, scomplex, z, d, c, tfuncname ) \ \ GENTFUNC3( dcomplex, scomplex, float, z, c, s, tfuncname ) \ GENTFUNC3( dcomplex, scomplex, double, z, c, d, tfuncname ) \ GENTFUNC3( dcomplex, scomplex, scomplex, z, c, c, tfuncname ) \ GENTFUNC3( dcomplex, scomplex, dcomplex, z, c, z, tfuncname ) \ \ GENTFUNC3( dcomplex, dcomplex, float, z, z, s, tfuncname ) \ GENTFUNC3( dcomplex, dcomplex, scomplex, z, z, c, tfuncname ) // -- (one auxiliary argument) -- #define INSERT_GENTFUNC3_MIX_P( tfuncname, varname ) \ \ GENTFUNC3( float, float, double, s, s, d, tfuncname, varname ) \ GENTFUNC3( float, float, dcomplex, s, s, z, tfuncname, varname ) \ \ GENTFUNC3( float, double, float, s, d, s, tfuncname, varname ) \ GENTFUNC3( float, double, double, s, d, d, tfuncname, varname ) \ GENTFUNC3( float, double, scomplex, s, d, c, tfuncname, varname ) \ GENTFUNC3( float, double, dcomplex, s, d, z, tfuncname, varname ) \ \ GENTFUNC3( float, scomplex, double, s, c, d, tfuncname, varname ) \ GENTFUNC3( float, scomplex, dcomplex, s, c, z, tfuncname, varname ) \ \ GENTFUNC3( float, dcomplex, float, s, z, s, tfuncname, varname ) \ GENTFUNC3( float, dcomplex, double, s, z, d, tfuncname, varname ) \ GENTFUNC3( float, dcomplex, scomplex, s, z, c, tfuncname, varname ) \ GENTFUNC3( float, dcomplex, dcomplex, s, z, z, tfuncname, varname ) \ \ \ GENTFUNC3( double, float, float, d, s, s, tfuncname, varname ) \ GENTFUNC3( double, float, double, d, s, d, tfuncname, varname ) \ GENTFUNC3( double, float, scomplex, d, s, c, tfuncname, varname ) \ GENTFUNC3( double, float, dcomplex, d, s, z, tfuncname, varname ) \ \ GENTFUNC3( double, double, float, d, d, s, tfuncname, varname ) \ GENTFUNC3( double, double, scomplex, d, d, c, tfuncname, varname ) \ \ GENTFUNC3( double, scomplex, float, d, c, s, tfuncname, varname ) \ GENTFUNC3( double, scomplex, double, d, c, d, tfuncname, varname ) \ GENTFUNC3( double, scomplex, scomplex, d, c, c, tfuncname, varname ) \ GENTFUNC3( double, scomplex, dcomplex, d, c, z, tfuncname, varname ) \ \ GENTFUNC3( double, dcomplex, float, d, z, s, tfuncname, varname ) \ GENTFUNC3( double, dcomplex, scomplex, d, z, c, tfuncname, varname ) \ \ \ GENTFUNC3( scomplex, float, double, c, s, d, tfuncname, varname ) \ GENTFUNC3( scomplex, float, dcomplex, c, s, z, tfuncname, varname ) \ \ GENTFUNC3( scomplex, double, float, c, d, s, tfuncname, varname ) \ GENTFUNC3( scomplex, double, double, c, d, d, tfuncname, varname ) \ GENTFUNC3( scomplex, double, scomplex, c, d, c, tfuncname, varname ) \ GENTFUNC3( scomplex, double, dcomplex, c, d, z, tfuncname, varname ) \ \ GENTFUNC3( scomplex, scomplex, double, c, c, d, tfuncname, varname ) \ GENTFUNC3( scomplex, scomplex, dcomplex, c, c, z, tfuncname, varname ) \ \ GENTFUNC3( scomplex, dcomplex, float, c, z, s, tfuncname, varname ) \ GENTFUNC3( scomplex, dcomplex, double, c, z, d, tfuncname, varname ) \ GENTFUNC3( scomplex, dcomplex, scomplex, c, z, c, tfuncname, varname ) \ GENTFUNC3( scomplex, dcomplex, dcomplex, c, z, z, tfuncname, varname ) \ \ \ GENTFUNC3( dcomplex, float, float, z, s, s, tfuncname, varname ) \ GENTFUNC3( dcomplex, float, double, z, s, d, tfuncname, varname ) \ GENTFUNC3( dcomplex, float, scomplex, z, s, c, tfuncname, varname ) \ GENTFUNC3( dcomplex, float, dcomplex, z, s, z, tfuncname, varname ) \ \ GENTFUNC3( dcomplex, double, float, z, d, s, tfuncname, varname ) \ GENTFUNC3( dcomplex, double, scomplex, z, d, c, tfuncname, varname ) \ \ GENTFUNC3( dcomplex, scomplex, float, z, c, s, tfuncname, varname ) \ GENTFUNC3( dcomplex, scomplex, double, z, c, d, tfuncname, varname ) \ GENTFUNC3( dcomplex, scomplex, scomplex, z, c, c, tfuncname, varname ) \ GENTFUNC3( dcomplex, scomplex, dcomplex, z, c, z, tfuncname, varname ) \ \ GENTFUNC3( dcomplex, dcomplex, float, z, z, s, tfuncname, varname ) \ GENTFUNC3( dcomplex, dcomplex, scomplex, z, z, c, tfuncname, varname ) // -- (two auxiliary arguments) -- #define INSERT_GENTFUNC3_MIX_P2( tfuncname, varname1, varname2 ) \ \ GENTFUNC3( float, float, double, s, s, d, tfuncname, varname1, varname2 ) \ GENTFUNC3( float, float, dcomplex, s, s, z, tfuncname, varname1, varname2 ) \ \ GENTFUNC3( float, double, float, s, d, s, tfuncname, varname1, varname2 ) \ GENTFUNC3( float, double, double, s, d, d, tfuncname, varname1, varname2 ) \ GENTFUNC3( float, double, scomplex, s, d, c, tfuncname, varname1, varname2 ) \ GENTFUNC3( float, double, dcomplex, s, d, z, tfuncname, varname1, varname2 ) \ \ GENTFUNC3( float, scomplex, double, s, c, d, tfuncname, varname1, varname2 ) \ GENTFUNC3( float, scomplex, dcomplex, s, c, z, tfuncname, varname1, varname2 ) \ \ GENTFUNC3( float, dcomplex, float, s, z, s, tfuncname, varname1, varname2 ) \ GENTFUNC3( float, dcomplex, double, s, z, d, tfuncname, varname1, varname2 ) \ GENTFUNC3( float, dcomplex, scomplex, s, z, c, tfuncname, varname1, varname2 ) \ GENTFUNC3( float, dcomplex, dcomplex, s, z, z, tfuncname, varname1, varname2 ) \ \ \ GENTFUNC3( double, float, float, d, s, s, tfuncname, varname1, varname2 ) \ GENTFUNC3( double, float, double, d, s, d, tfuncname, varname1, varname2 ) \ GENTFUNC3( double, float, scomplex, d, s, c, tfuncname, varname1, varname2 ) \ GENTFUNC3( double, float, dcomplex, d, s, z, tfuncname, varname1, varname2 ) \ \ GENTFUNC3( double, double, float, d, d, s, tfuncname, varname1, varname2 ) \ GENTFUNC3( double, double, scomplex, d, d, c, tfuncname, varname1, varname2 ) \ \ GENTFUNC3( double, scomplex, float, d, c, s, tfuncname, varname1, varname2 ) \ GENTFUNC3( double, scomplex, double, d, c, d, tfuncname, varname1, varname2 ) \ GENTFUNC3( double, scomplex, scomplex, d, c, c, tfuncname, varname1, varname2 ) \ GENTFUNC3( double, scomplex, dcomplex, d, c, z, tfuncname, varname1, varname2 ) \ \ GENTFUNC3( double, dcomplex, float, d, z, s, tfuncname, varname1, varname2 ) \ GENTFUNC3( double, dcomplex, scomplex, d, z, c, tfuncname, varname1, varname2 ) \ \ \ GENTFUNC3( scomplex, float, double, c, s, d, tfuncname, varname1, varname2 ) \ GENTFUNC3( scomplex, float, dcomplex, c, s, z, tfuncname, varname1, varname2 ) \ \ GENTFUNC3( scomplex, double, float, c, d, s, tfuncname, varname1, varname2 ) \ GENTFUNC3( scomplex, double, double, c, d, d, tfuncname, varname1, varname2 ) \ GENTFUNC3( scomplex, double, scomplex, c, d, c, tfuncname, varname1, varname2 ) \ GENTFUNC3( scomplex, double, dcomplex, c, d, z, tfuncname, varname1, varname2 ) \ \ GENTFUNC3( scomplex, scomplex, double, c, c, d, tfuncname, varname1, varname2 ) \ GENTFUNC3( scomplex, scomplex, dcomplex, c, c, z, tfuncname, varname1, varname2 ) \ \ GENTFUNC3( scomplex, dcomplex, float, c, z, s, tfuncname, varname1, varname2 ) \ GENTFUNC3( scomplex, dcomplex, double, c, z, d, tfuncname, varname1, varname2 ) \ GENTFUNC3( scomplex, dcomplex, scomplex, c, z, c, tfuncname, varname1, varname2 ) \ GENTFUNC3( scomplex, dcomplex, dcomplex, c, z, z, tfuncname, varname1, varname2 ) \ \ \ GENTFUNC3( dcomplex, float, float, z, s, s, tfuncname, varname1, varname2 ) \ GENTFUNC3( dcomplex, float, double, z, s, d, tfuncname, varname1, varname2 ) \ GENTFUNC3( dcomplex, float, scomplex, z, s, c, tfuncname, varname1, varname2 ) \ GENTFUNC3( dcomplex, float, dcomplex, z, s, z, tfuncname, varname1, varname2 ) \ \ GENTFUNC3( dcomplex, double, float, z, d, s, tfuncname, varname1, varname2 ) \ GENTFUNC3( dcomplex, double, scomplex, z, d, c, tfuncname, varname1, varname2 ) \ \ GENTFUNC3( dcomplex, scomplex, float, z, c, s, tfuncname, varname1, varname2 ) \ GENTFUNC3( dcomplex, scomplex, double, z, c, d, tfuncname, varname1, varname2 ) \ GENTFUNC3( dcomplex, scomplex, scomplex, z, c, c, tfuncname, varname1, varname2 ) \ GENTFUNC3( dcomplex, scomplex, dcomplex, z, c, z, tfuncname, varname1, varname2 ) \ \ GENTFUNC3( dcomplex, dcomplex, float, z, z, s, tfuncname, varname1, varname2 ) \ GENTFUNC3( dcomplex, dcomplex, scomplex, z, z, c, tfuncname, varname1, varname2 ) // -- Basic three-operand with union of operands 1 and 2 -- // -- (no auxiliary arguments) -- #define INSERT_GENTFUNC3U12_BASIC0( tfuncname ) \ \ GENTFUNC3U12( float, float, float, float, s, s, s, s, tfuncname ) \ GENTFUNC3U12( double, double, double, double, d, d, d, d, tfuncname ) \ GENTFUNC3U12( scomplex, scomplex, scomplex, scomplex, c, c, c, c, tfuncname ) \ GENTFUNC3U12( dcomplex, dcomplex, dcomplex, dcomplex, z, z, z, z, tfuncname ) // -- (one auxiliary argument) -- #define INSERT_GENTFUNC3U12_BASIC( tfuncname, varname ) \ \ GENTFUNC3U12( float, float, float, float, s, s, s, s, tfuncname, varname ) \ GENTFUNC3U12( double, double, double, double, d, d, d, d, tfuncname, varname ) \ GENTFUNC3U12( scomplex, scomplex, scomplex, scomplex, c, c, c, c, tfuncname, varname ) \ GENTFUNC3U12( dcomplex, dcomplex, dcomplex, dcomplex, z, z, z, z, tfuncname, varname ) // -- (two auxiliary arguments) -- #define INSERT_GENTFUNC3U12_BASIC2( tfuncname, varname1, varname2 ) \ \ GENTFUNC3U12( float, float, float, float, s, s, s, s, tfuncname, varname1, varname2 ) \ GENTFUNC3U12( double, double, double, double, d, d, d, d, tfuncname, varname1, varname2 ) \ GENTFUNC3U12( scomplex, scomplex, scomplex, scomplex, c, c, c, c, tfuncname, varname1, varname2 ) \ GENTFUNC3U12( dcomplex, dcomplex, dcomplex, dcomplex, z, z, z, z, tfuncname, varname1, varname2 ) // -- Mixed domain three-operand with union of operands 1 and 2 -- // -- (no auxiliary arguments) -- #define INSERT_GENTFUNC3U12_MIX_D0( tfuncname ) \ \ GENTFUNC3U12( float, float, scomplex, float, s, s, c, s, tfuncname ) \ GENTFUNC3U12( float, scomplex, float, scomplex, s, c, s, c, tfuncname ) \ GENTFUNC3U12( float, scomplex, scomplex, scomplex, s, c, c, c, tfuncname ) \ \ GENTFUNC3U12( double, double, dcomplex, double, d, d, z, d, tfuncname ) \ GENTFUNC3U12( double, dcomplex, double, dcomplex, d, z, d, z, tfuncname ) \ GENTFUNC3U12( double, dcomplex, dcomplex, dcomplex, d, z, z, z, tfuncname ) \ \ GENTFUNC3U12( scomplex, float, float, scomplex, c, s, s, c, tfuncname ) \ GENTFUNC3U12( scomplex, float, scomplex, scomplex, c, s, c, c, tfuncname ) \ GENTFUNC3U12( scomplex, scomplex, float, scomplex, c, c, s, c, tfuncname ) \ \ GENTFUNC3U12( dcomplex, double, double, dcomplex, z, d, d, z, tfuncname ) \ GENTFUNC3U12( dcomplex, double, dcomplex, dcomplex, z, d, z, z, tfuncname ) \ GENTFUNC3U12( dcomplex, dcomplex, double, dcomplex, z, z, d, z, tfuncname ) // -- (one auxiliary argument) -- #define INSERT_GENTFUNC3U12_MIX_D( tfuncname, varname ) \ \ GENTFUNC3U12( float, float, scomplex, float, s, s, c, s, tfuncname, varname ) \ GENTFUNC3U12( float, scomplex, float, scomplex, s, c, s, c, tfuncname, varname ) \ GENTFUNC3U12( float, scomplex, scomplex, scomplex, s, c, c, c, tfuncname, varname ) \ \ GENTFUNC3U12( double, double, dcomplex, double, d, d, z, d, tfuncname, varname ) \ GENTFUNC3U12( double, dcomplex, double, dcomplex, d, z, d, z, tfuncname, varname ) \ GENTFUNC3U12( double, dcomplex, dcomplex, dcomplex, d, z, z, z, tfuncname, varname ) \ \ GENTFUNC3U12( scomplex, float, float, scomplex, c, s, s, c, tfuncname, varname ) \ GENTFUNC3U12( scomplex, float, scomplex, scomplex, c, s, c, c, tfuncname, varname ) \ GENTFUNC3U12( scomplex, scomplex, float, scomplex, c, c, s, c, tfuncname, varname ) \ \ GENTFUNC3U12( dcomplex, double, double, dcomplex, z, d, d, z, tfuncname, varname ) \ GENTFUNC3U12( dcomplex, double, dcomplex, dcomplex, z, d, z, z, tfuncname, varname ) \ GENTFUNC3U12( dcomplex, dcomplex, double, dcomplex, z, z, d, z, tfuncname, varname ) // -- (two auxiliary arguments) -- #define INSERT_GENTFUNC3U12_MIX_D2( tfuncname, varname1, varname2 ) \ \ GENTFUNC3U12( float, float, scomplex, float, s, s, c, s, tfuncname, varname1, varname2 ) \ GENTFUNC3U12( float, scomplex, float, scomplex, s, c, s, c, tfuncname, varname1, varname2 ) \ GENTFUNC3U12( float, scomplex, scomplex, scomplex, s, c, c, c, tfuncname, varname1, varname2 ) \ \ GENTFUNC3U12( double, double, dcomplex, double, d, d, z, d, tfuncname, varname1, varname2 ) \ GENTFUNC3U12( double, dcomplex, double, dcomplex, d, z, d, z, tfuncname, varname1, varname2 ) \ GENTFUNC3U12( double, dcomplex, dcomplex, dcomplex, d, z, z, z, tfuncname, varname1, varname2 ) \ \ GENTFUNC3U12( scomplex, float, float, scomplex, c, s, s, c, tfuncname, varname1, varname2 ) \ GENTFUNC3U12( scomplex, float, scomplex, scomplex, c, s, c, c, tfuncname, varname1, varname2 ) \ GENTFUNC3U12( scomplex, scomplex, float, scomplex, c, c, s, c, tfuncname, varname1, varname2 ) \ \ GENTFUNC3U12( dcomplex, double, double, dcomplex, z, d, d, z, tfuncname, varname1, varname2 ) \ GENTFUNC3U12( dcomplex, double, dcomplex, dcomplex, z, d, z, z, tfuncname, varname1, varname2 ) \ GENTFUNC3U12( dcomplex, dcomplex, double, dcomplex, z, z, d, z, tfuncname, varname1, varname2 ) // -- Mixed precision three-operand with union of operands 1 and 2 -- // -- (no auxiliary arguments) -- #define INSERT_GENTFUNC3U12_MIX_P0( tfuncname ) \ \ GENTFUNC3U12( float, float, double, float, s, s, d, s, tfuncname ) \ GENTFUNC3U12( float, float, dcomplex, float, s, s, z, s, tfuncname ) \ \ GENTFUNC3U12( float, double, float, double, s, d, s, d, tfuncname ) \ GENTFUNC3U12( float, double, double, double, s, d, d, d, tfuncname ) \ GENTFUNC3U12( float, double, scomplex, double, s, d, c, d, tfuncname ) \ GENTFUNC3U12( float, double, dcomplex, double, s, d, z, d, tfuncname ) \ \ GENTFUNC3U12( float, scomplex, double, scomplex, s, c, d, c, tfuncname ) \ GENTFUNC3U12( float, scomplex, dcomplex, scomplex, s, c, z, c, tfuncname ) \ \ GENTFUNC3U12( float, dcomplex, float, dcomplex, s, z, s, z, tfuncname ) \ GENTFUNC3U12( float, dcomplex, double, dcomplex, s, z, d, z, tfuncname ) \ GENTFUNC3U12( float, dcomplex, scomplex, dcomplex, s, z, c, z, tfuncname ) \ GENTFUNC3U12( float, dcomplex, dcomplex, dcomplex, s, z, z, z, tfuncname ) \ \ \ GENTFUNC3U12( double, float, float, double, d, s, s, d, tfuncname ) \ GENTFUNC3U12( double, float, double, double, d, s, d, d, tfuncname ) \ GENTFUNC3U12( double, float, scomplex, double, d, s, c, d, tfuncname ) \ GENTFUNC3U12( double, float, dcomplex, double, d, s, z, d, tfuncname ) \ \ GENTFUNC3U12( double, double, float, double, d, d, s, d, tfuncname ) \ GENTFUNC3U12( double, double, scomplex, double, d, d, c, d, tfuncname ) \ \ GENTFUNC3U12( double, scomplex, float, dcomplex, d, c, s, z, tfuncname ) \ GENTFUNC3U12( double, scomplex, double, dcomplex, d, c, d, z, tfuncname ) \ GENTFUNC3U12( double, scomplex, scomplex, dcomplex, d, c, c, z, tfuncname ) \ GENTFUNC3U12( double, scomplex, dcomplex, dcomplex, d, c, z, z, tfuncname ) \ \ GENTFUNC3U12( double, dcomplex, float, dcomplex, d, z, s, z, tfuncname ) \ GENTFUNC3U12( double, dcomplex, scomplex, dcomplex, d, z, c, z, tfuncname ) \ \ \ GENTFUNC3U12( scomplex, float, double, scomplex, c, s, d, c, tfuncname ) \ GENTFUNC3U12( scomplex, float, dcomplex, scomplex, c, s, z, c, tfuncname ) \ \ GENTFUNC3U12( scomplex, double, float, dcomplex, c, d, s, z, tfuncname ) \ GENTFUNC3U12( scomplex, double, double, dcomplex, c, d, d, z, tfuncname ) \ GENTFUNC3U12( scomplex, double, scomplex, dcomplex, c, d, c, z, tfuncname ) \ GENTFUNC3U12( scomplex, double, dcomplex, dcomplex, c, d, z, z, tfuncname ) \ \ GENTFUNC3U12( scomplex, scomplex, double, scomplex, c, c, d, c, tfuncname ) \ GENTFUNC3U12( scomplex, scomplex, dcomplex, scomplex, c, c, z, c, tfuncname ) \ \ GENTFUNC3U12( scomplex, dcomplex, float, dcomplex, c, z, s, z, tfuncname ) \ GENTFUNC3U12( scomplex, dcomplex, double, dcomplex, c, z, d, z, tfuncname ) \ GENTFUNC3U12( scomplex, dcomplex, scomplex, dcomplex, c, z, c, z, tfuncname ) \ GENTFUNC3U12( scomplex, dcomplex, dcomplex, dcomplex, c, z, z, z, tfuncname ) \ \ \ GENTFUNC3U12( dcomplex, float, float, dcomplex, z, s, s, z, tfuncname ) \ GENTFUNC3U12( dcomplex, float, double, dcomplex, z, s, d, z, tfuncname ) \ GENTFUNC3U12( dcomplex, float, scomplex, dcomplex, z, s, c, z, tfuncname ) \ GENTFUNC3U12( dcomplex, float, dcomplex, dcomplex, z, s, z, z, tfuncname ) \ \ GENTFUNC3U12( dcomplex, double, float, dcomplex, z, d, s, z, tfuncname ) \ GENTFUNC3U12( dcomplex, double, scomplex, dcomplex, z, d, c, z, tfuncname ) \ \ GENTFUNC3U12( dcomplex, scomplex, float, dcomplex, z, c, s, z, tfuncname ) \ GENTFUNC3U12( dcomplex, scomplex, double, dcomplex, z, c, d, z, tfuncname ) \ GENTFUNC3U12( dcomplex, scomplex, scomplex, dcomplex, z, c, c, z, tfuncname ) \ GENTFUNC3U12( dcomplex, scomplex, dcomplex, dcomplex, z, c, z, z, tfuncname ) \ \ GENTFUNC3U12( dcomplex, dcomplex, float, dcomplex, z, z, s, z, tfuncname ) \ GENTFUNC3U12( dcomplex, dcomplex, scomplex, dcomplex, z, z, c, z, tfuncname ) // -- (one auxiliary argument) -- #define INSERT_GENTFUNC3U12_MIX_P( tfuncname, varname ) \ \ GENTFUNC3U12( float, float, double, float, s, s, d, s, tfuncname, varname ) \ GENTFUNC3U12( float, float, dcomplex, float, s, s, z, s, tfuncname, varname ) \ \ GENTFUNC3U12( float, double, float, double, s, d, s, d, tfuncname, varname ) \ GENTFUNC3U12( float, double, double, double, s, d, d, d, tfuncname, varname ) \ GENTFUNC3U12( float, double, scomplex, double, s, d, c, d, tfuncname, varname ) \ GENTFUNC3U12( float, double, dcomplex, double, s, d, z, d, tfuncname, varname ) \ \ GENTFUNC3U12( float, scomplex, double, scomplex, s, c, d, c, tfuncname, varname ) \ GENTFUNC3U12( float, scomplex, dcomplex, scomplex, s, c, z, c, tfuncname, varname ) \ \ GENTFUNC3U12( float, dcomplex, float, dcomplex, s, z, s, z, tfuncname, varname ) \ GENTFUNC3U12( float, dcomplex, double, dcomplex, s, z, d, z, tfuncname, varname ) \ GENTFUNC3U12( float, dcomplex, scomplex, dcomplex, s, z, c, z, tfuncname, varname ) \ GENTFUNC3U12( float, dcomplex, dcomplex, dcomplex, s, z, z, z, tfuncname, varname ) \ \ \ GENTFUNC3U12( double, float, float, double, d, s, s, d, tfuncname, varname ) \ GENTFUNC3U12( double, float, double, double, d, s, d, d, tfuncname, varname ) \ GENTFUNC3U12( double, float, scomplex, double, d, s, c, d, tfuncname, varname ) \ GENTFUNC3U12( double, float, dcomplex, double, d, s, z, d, tfuncname, varname ) \ \ GENTFUNC3U12( double, double, float, double, d, d, s, d, tfuncname, varname ) \ GENTFUNC3U12( double, double, scomplex, double, d, d, c, d, tfuncname, varname ) \ \ GENTFUNC3U12( double, scomplex, float, dcomplex, d, c, s, z, tfuncname, varname ) \ GENTFUNC3U12( double, scomplex, double, dcomplex, d, c, d, z, tfuncname, varname ) \ GENTFUNC3U12( double, scomplex, scomplex, dcomplex, d, c, c, z, tfuncname, varname ) \ GENTFUNC3U12( double, scomplex, dcomplex, dcomplex, d, c, z, z, tfuncname, varname ) \ \ GENTFUNC3U12( double, dcomplex, float, dcomplex, d, z, s, z, tfuncname, varname ) \ GENTFUNC3U12( double, dcomplex, scomplex, dcomplex, d, z, c, z, tfuncname, varname ) \ \ \ GENTFUNC3U12( scomplex, float, double, scomplex, c, s, d, c, tfuncname, varname ) \ GENTFUNC3U12( scomplex, float, dcomplex, scomplex, c, s, z, c, tfuncname, varname ) \ \ GENTFUNC3U12( scomplex, double, float, dcomplex, c, d, s, z, tfuncname, varname ) \ GENTFUNC3U12( scomplex, double, double, dcomplex, c, d, d, z, tfuncname, varname ) \ GENTFUNC3U12( scomplex, double, scomplex, dcomplex, c, d, c, z, tfuncname, varname ) \ GENTFUNC3U12( scomplex, double, dcomplex, dcomplex, c, d, z, z, tfuncname, varname ) \ \ GENTFUNC3U12( scomplex, scomplex, double, scomplex, c, c, d, c, tfuncname, varname ) \ GENTFUNC3U12( scomplex, scomplex, dcomplex, scomplex, c, c, z, c, tfuncname, varname ) \ \ GENTFUNC3U12( scomplex, dcomplex, float, dcomplex, c, z, s, z, tfuncname, varname ) \ GENTFUNC3U12( scomplex, dcomplex, double, dcomplex, c, z, d, z, tfuncname, varname ) \ GENTFUNC3U12( scomplex, dcomplex, scomplex, dcomplex, c, z, c, z, tfuncname, varname ) \ GENTFUNC3U12( scomplex, dcomplex, dcomplex, dcomplex, c, z, z, z, tfuncname, varname ) \ \ \ GENTFUNC3U12( dcomplex, float, float, dcomplex, z, s, s, z, tfuncname, varname ) \ GENTFUNC3U12( dcomplex, float, double, dcomplex, z, s, d, z, tfuncname, varname ) \ GENTFUNC3U12( dcomplex, float, scomplex, dcomplex, z, s, c, z, tfuncname, varname ) \ GENTFUNC3U12( dcomplex, float, dcomplex, dcomplex, z, s, z, z, tfuncname, varname ) \ \ GENTFUNC3U12( dcomplex, double, float, dcomplex, z, d, s, z, tfuncname, varname ) \ GENTFUNC3U12( dcomplex, double, scomplex, dcomplex, z, d, c, z, tfuncname, varname ) \ \ GENTFUNC3U12( dcomplex, scomplex, float, dcomplex, z, c, s, z, tfuncname, varname ) \ GENTFUNC3U12( dcomplex, scomplex, double, dcomplex, z, c, d, z, tfuncname, varname ) \ GENTFUNC3U12( dcomplex, scomplex, scomplex, dcomplex, z, c, c, z, tfuncname, varname ) \ GENTFUNC3U12( dcomplex, scomplex, dcomplex, dcomplex, z, c, z, z, tfuncname, varname ) \ \ GENTFUNC3U12( dcomplex, dcomplex, float, dcomplex, z, z, s, z, tfuncname, varname ) \ GENTFUNC3U12( dcomplex, dcomplex, scomplex, dcomplex, z, z, c, z, tfuncname, varname ) // -- (two auxiliary arguments) -- #define INSERT_GENTFUNC3U12_MIX_P2( tfuncname, varname1, varname2 ) \ \ GENTFUNC3U12( float, float, double, float, s, s, d, s, tfuncname, varname1, varname2 ) \ GENTFUNC3U12( float, float, dcomplex, float, s, s, z, s, tfuncname, varname1, varname2 ) \ \ GENTFUNC3U12( float, double, float, double, s, d, s, d, tfuncname, varname1, varname2 ) \ GENTFUNC3U12( float, double, double, double, s, d, d, d, tfuncname, varname1, varname2 ) \ GENTFUNC3U12( float, double, scomplex, double, s, d, c, d, tfuncname, varname1, varname2 ) \ GENTFUNC3U12( float, double, dcomplex, double, s, d, z, d, tfuncname, varname1, varname2 ) \ \ GENTFUNC3U12( float, scomplex, double, scomplex, s, c, d, c, tfuncname, varname1, varname2 ) \ GENTFUNC3U12( float, scomplex, dcomplex, scomplex, s, c, z, c, tfuncname, varname1, varname2 ) \ \ GENTFUNC3U12( float, dcomplex, float, dcomplex, s, z, s, z, tfuncname, varname1, varname2 ) \ GENTFUNC3U12( float, dcomplex, double, dcomplex, s, z, d, z, tfuncname, varname1, varname2 ) \ GENTFUNC3U12( float, dcomplex, scomplex, dcomplex, s, z, c, z, tfuncname, varname1, varname2 ) \ GENTFUNC3U12( float, dcomplex, dcomplex, dcomplex, s, z, z, z, tfuncname, varname1, varname2 ) \ \ \ GENTFUNC3U12( double, float, float, double, d, s, s, d, tfuncname, varname1, varname2 ) \ GENTFUNC3U12( double, float, double, double, d, s, d, d, tfuncname, varname1, varname2 ) \ GENTFUNC3U12( double, float, scomplex, double, d, s, c, d, tfuncname, varname1, varname2 ) \ GENTFUNC3U12( double, float, dcomplex, double, d, s, z, d, tfuncname, varname1, varname2 ) \ \ GENTFUNC3U12( double, double, float, double, d, d, s, d, tfuncname, varname1, varname2 ) \ GENTFUNC3U12( double, double, scomplex, double, d, d, c, d, tfuncname, varname1, varname2 ) \ \ GENTFUNC3U12( double, scomplex, float, dcomplex, d, c, s, z, tfuncname, varname1, varname2 ) \ GENTFUNC3U12( double, scomplex, double, dcomplex, d, c, d, z, tfuncname, varname1, varname2 ) \ GENTFUNC3U12( double, scomplex, scomplex, dcomplex, d, c, c, z, tfuncname, varname1, varname2 ) \ GENTFUNC3U12( double, scomplex, dcomplex, dcomplex, d, c, z, z, tfuncname, varname1, varname2 ) \ \ GENTFUNC3U12( double, dcomplex, float, dcomplex, d, z, s, z, tfuncname, varname1, varname2 ) \ GENTFUNC3U12( double, dcomplex, scomplex, dcomplex, d, z, c, z, tfuncname, varname1, varname2 ) \ \ \ GENTFUNC3U12( scomplex, float, double, scomplex, c, s, d, c, tfuncname, varname1, varname2 ) \ GENTFUNC3U12( scomplex, float, dcomplex, scomplex, c, s, z, c, tfuncname, varname1, varname2 ) \ \ GENTFUNC3U12( scomplex, double, float, dcomplex, c, d, s, z, tfuncname, varname1, varname2 ) \ GENTFUNC3U12( scomplex, double, double, dcomplex, c, d, d, z, tfuncname, varname1, varname2 ) \ GENTFUNC3U12( scomplex, double, scomplex, dcomplex, c, d, c, z, tfuncname, varname1, varname2 ) \ GENTFUNC3U12( scomplex, double, dcomplex, dcomplex, c, d, z, z, tfuncname, varname1, varname2 ) \ \ GENTFUNC3U12( scomplex, scomplex, double, scomplex, c, c, d, c, tfuncname, varname1, varname2 ) \ GENTFUNC3U12( scomplex, scomplex, dcomplex, scomplex, c, c, z, c, tfuncname, varname1, varname2 ) \ \ GENTFUNC3U12( scomplex, dcomplex, float, dcomplex, c, z, s, z, tfuncname, varname1, varname2 ) \ GENTFUNC3U12( scomplex, dcomplex, double, dcomplex, c, z, d, z, tfuncname, varname1, varname2 ) \ GENTFUNC3U12( scomplex, dcomplex, scomplex, dcomplex, c, z, c, z, tfuncname, varname1, varname2 ) \ GENTFUNC3U12( scomplex, dcomplex, dcomplex, dcomplex, c, z, z, z, tfuncname, varname1, varname2 ) \ \ \ GENTFUNC3U12( dcomplex, float, float, dcomplex, z, s, s, z, tfuncname, varname1, varname2 ) \ GENTFUNC3U12( dcomplex, float, double, dcomplex, z, s, d, z, tfuncname, varname1, varname2 ) \ GENTFUNC3U12( dcomplex, float, scomplex, dcomplex, z, s, c, z, tfuncname, varname1, varname2 ) \ GENTFUNC3U12( dcomplex, float, dcomplex, dcomplex, z, s, z, z, tfuncname, varname1, varname2 ) \ \ GENTFUNC3U12( dcomplex, double, float, dcomplex, z, d, s, z, tfuncname, varname1, varname2 ) \ GENTFUNC3U12( dcomplex, double, scomplex, dcomplex, z, d, c, z, tfuncname, varname1, varname2 ) \ \ GENTFUNC3U12( dcomplex, scomplex, float, dcomplex, z, c, s, z, tfuncname, varname1, varname2 ) \ GENTFUNC3U12( dcomplex, scomplex, double, dcomplex, z, c, d, z, tfuncname, varname1, varname2 ) \ GENTFUNC3U12( dcomplex, scomplex, scomplex, dcomplex, z, c, c, z, tfuncname, varname1, varname2 ) \ GENTFUNC3U12( dcomplex, scomplex, dcomplex, dcomplex, z, c, z, z, tfuncname, varname1, varname2 ) \ \ GENTFUNC3U12( dcomplex, dcomplex, float, dcomplex, z, z, s, z, tfuncname, varname1, varname2 ) \ GENTFUNC3U12( dcomplex, dcomplex, scomplex, dcomplex, z, z, c, z, tfuncname, varname1, varname2 ) #endif blis-0.6.1/frame/include/bli_gentprot_macro_defs.h000066400000000000000000000646471360743507500222420ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_GENTPROT_MACRO_DEFS_H #define BLIS_GENTPROT_MACRO_DEFS_H // // -- MACROS TO INSERT PROTOTYPE-GENERATING MACROS ----------------------------- // // -- Macros for generating BLAS routines -------------------------------------- // -- Basic one-operand macro -- #define INSERT_GENTPROT_BLAS( blasname ) \ \ GENTPROT( float, s, blasname ) \ GENTPROT( double, d, blasname ) \ GENTPROT( scomplex, c, blasname ) \ GENTPROT( dcomplex, z, blasname ) // -- Basic one-operand macro with real domain only -- #define INSERT_GENTPROTRO_BLAS( blasname ) \ \ GENTPROTRO( float, s, blasname ) \ GENTPROTRO( double, d, blasname ) // -- Basic one-operand macro with complex domain only and real projection -- #define INSERT_GENTPROTCO_BLAS( blasname ) \ \ GENTPROTCO( scomplex, float, c, s, blasname ) \ GENTPROTCO( dcomplex, double, z, d, blasname ) // -- Basic one-operand macro with conjugation (used only for dot, ger) -- #define INSERT_GENTPROTDOT_BLAS( blasname ) \ \ GENTPROTDOT( float, s, , blasname ) \ GENTPROTDOT( double, d, , blasname ) \ GENTPROTDOT( scomplex, c, c, blasname ) \ GENTPROTDOT( scomplex, c, u, blasname ) \ GENTPROTDOT( dcomplex, z, c, blasname ) \ GENTPROTDOT( dcomplex, z, u, blasname ) // -- Basic one-operand macro with real projection -- #define INSERT_GENTPROTR_BLAS( rblasname, cblasname ) \ \ GENTPROTR( float, float, s, s, rblasname ) \ GENTPROTR( double, double, d, d, rblasname ) \ GENTPROTR( scomplex, float, c, s, cblasname ) \ GENTPROTR( dcomplex, double, z, d, cblasname ) // -- Alternate two-operand macro (one char for complex, one for real proj) -- #define INSERT_GENTPROTR2_BLAS( blasname ) \ \ GENTPROTR2( float, float, , s, blasname ) \ GENTPROTR2( double, double, , d, blasname ) \ GENTPROTR2( scomplex, float, c, s, blasname ) \ GENTPROTR2( dcomplex, double, z, d, blasname ) // -- Extended two-operand macro (used only for scal) -- #define INSERT_GENTPROTSCAL_BLAS( blasname ) \ \ GENTPROTSCAL( float, float, , s, blasname ) \ GENTPROTSCAL( double, double, , d, blasname ) \ GENTPROTSCAL( scomplex, scomplex, , c, blasname ) \ GENTPROTSCAL( dcomplex, dcomplex, , z, blasname ) \ GENTPROTSCAL( float, scomplex, s, c, blasname ) \ GENTPROTSCAL( double, dcomplex, d, z, blasname ) // -- Macros for functions with one operand ------------------------------------ // -- Basic one-operand macro -- // -- (no auxiliary arguments) -- #define INSERT_GENTPROT_BASIC0( tfuncname ) \ \ GENTPROT( float, s, tfuncname ) \ GENTPROT( double, d, tfuncname ) \ GENTPROT( scomplex, c, tfuncname ) \ GENTPROT( dcomplex, z, tfuncname ) // -- (one auxiliary argument) -- #define INSERT_GENTPROT_BASIC( tfuncname, varname ) \ \ GENTPROT( float, s, tfuncname, varname ) \ GENTPROT( double, d, tfuncname, varname ) \ GENTPROT( scomplex, c, tfuncname, varname ) \ GENTPROT( dcomplex, z, tfuncname, varname ) // -- (two auxiliary arguments) -- #define INSERT_GENTPROT_BASIC2( tfuncname, varname1, varname2 ) \ \ GENTPROT( float, s, tfuncname, varname1, varname2 ) \ GENTPROT( double, d, tfuncname, varname1, varname2 ) \ GENTPROT( scomplex, c, tfuncname, varname1, varname2 ) \ GENTPROT( dcomplex, z, tfuncname, varname1, varname2 ) // -- (three auxiliary arguments) -- #define INSERT_GENTPROT_BASIC3( tfuncname, varname1, varname2, varname3 ) \ \ GENTPROT( float, s, tfuncname, varname1, varname2, varname3 ) \ GENTPROT( double, d, tfuncname, varname1, varname2, varname3 ) \ GENTPROT( scomplex, c, tfuncname, varname1, varname2, varname3 ) \ GENTPROT( dcomplex, z, tfuncname, varname1, varname2, varname3 ) // -- (four auxiliary arguments) -- #define INSERT_GENTPROT_BASIC4( tfuncname, varname1, varname2, varname3, varname4 ) \ \ GENTPROT( float, s, tfuncname, varname1, varname2, varname3, varname4 ) \ GENTPROT( double, d, tfuncname, varname1, varname2, varname3, varname4 ) \ GENTPROT( scomplex, c, tfuncname, varname1, varname2, varname3, varname4 ) \ GENTPROT( dcomplex, z, tfuncname, varname1, varname2, varname3, varname4 ) // -- Basic one-operand with real projection -- // -- (no auxiliary arguments) -- #define INSERT_GENTPROTR_BASIC0( tfuncname ) \ \ GENTPROTR( float, float, s, s, tfuncname ) \ GENTPROTR( double, double, d, d, tfuncname ) \ GENTPROTR( scomplex, float, c, s, tfuncname ) \ GENTPROTR( dcomplex, double, z, d, tfuncname ) // -- (one auxiliary argument) -- #define INSERT_GENTPROTR_BASIC( tfuncname, varname ) \ \ GENTPROTR( float, float, s, s, tfuncname, varname ) \ GENTPROTR( double, double, d, d, tfuncname, varname ) \ GENTPROTR( scomplex, float, c, s, tfuncname, varname ) \ GENTPROTR( dcomplex, double, z, d, tfuncname, varname ) // -- (two auxiliary arguments) -- #define INSERT_GENTPROTR_BASIC2( tfuncname, varname1, varname2 ) \ \ GENTPROTR( float, float, s, s, tfuncname, varname1, varname2 ) \ GENTPROTR( double, double, d, d, tfuncname, varname1, varname2 ) \ GENTPROTR( scomplex, float, c, s, tfuncname, varname1, varname2 ) \ GENTPROTR( dcomplex, double, z, d, tfuncname, varname1, varname2 ) // -- (three auxiliary arguments) -- #define INSERT_GENTPROTR_BASIC3( tfuncname, varname1, varname2, varname3 ) \ \ GENTPROTR( float, float, s, s, tfuncname, varname1, varname2, varname3 ) \ GENTPROTR( double, double, d, d, tfuncname, varname1, varname2, varname3 ) \ GENTPROTR( scomplex, float, c, s, tfuncname, varname1, varname2, varname3 ) \ GENTPROTR( dcomplex, double, z, d, tfuncname, varname1, varname2, varname3 ) // -- (four auxiliary arguments) -- #define INSERT_GENTPROTR_BASIC4( tfuncname, varname1, varname2, varname3, varname4 ) \ \ GENTPROTR( float, float, s, s, tfuncname, varname1, varname2, varname3, varname4 ) \ GENTPROTR( double, double, d, d, tfuncname, varname1, varname2, varname3, varname4 ) \ GENTPROTR( scomplex, float, c, s, tfuncname, varname1, varname2, varname3, varname4 ) \ GENTPROTR( dcomplex, double, z, d, tfuncname, varname1, varname2, varname3, varname4 ) // -- Basic one-operand macro with complex domain only and real projection -- // -- (no auxiliary arguments) -- #define INSERT_GENTPROTCO_BASIC0( tfuncname ) \ \ GENTPROTCO( scomplex, float, c, s, tfuncname ) \ GENTPROTCO( dcomplex, double, z, d, tfuncname ) // -- (one auxiliary argument) -- #define INSERT_GENTPROTCO_BASIC( tfuncname, varname ) \ \ GENTPROTCO( scomplex, float, c, s, tfuncname, varname ) \ GENTPROTCO( dcomplex, double, z, d, tfuncname, varname ) // -- (two auxiliary arguments) -- #define INSERT_GENTPROTCO_BASIC2( tfuncname, varname1, varname2 ) \ \ GENTPROTCO( scomplex, float, c, s, tfuncname, varname1, varname2 ) \ GENTPROTCO( dcomplex, double, z, d, tfuncname, varname1, varname2 ) // -- Basic one-operand macro with integer instance -- // -- (no auxiliary arguments) -- #define INSERT_GENTPROT_BASIC0_I( funcname ) \ \ GENTPROT( float, s, funcname ) \ GENTPROT( double, d, funcname ) \ GENTPROT( scomplex, c, funcname ) \ GENTPROT( dcomplex, z, funcname ) \ GENTPROT( gint_t, i, funcname ) // -- (one auxiliary argument) -- #define INSERT_GENTPROT_BASIC_I( tfuncname, varname ) \ \ GENTPROT( float, s, tfuncname, varname ) \ GENTPROT( double, d, tfuncname, varname ) \ GENTPROT( scomplex, c, tfuncname, varname ) \ GENTPROT( dcomplex, z, tfuncname, varname ) \ GENTPROT( gint_t, i, tfuncname, varname ) // -- Basic one-operand with integer projection -- // -- (no auxiliary arguments) -- #define INSERT_GENTPROTI_BASIC0( funcname ) \ \ GENTPROTI( float, gint_t, s, i, funcname ) \ GENTPROTI( double, gint_t, d, i, funcname ) \ GENTPROTI( scomplex, gint_t, c, i, funcname ) \ GENTPROTI( dcomplex, gint_t, z, i, funcname ) // -- (one auxiliary argument) -- #define INSERT_GENTPROTI_BASIC( tfuncname, varname ) \ \ GENTPROTI( float, gint_t, s, i, tfuncname, varname ) \ GENTPROTI( double, gint_t, d, i, tfuncname, varname ) \ GENTPROTI( scomplex, gint_t, c, i, tfuncname, varname ) \ GENTPROTI( dcomplex, gint_t, z, i, tfuncname, varname ) // -- Basic one-operand with real and integer projections -- // -- (no auxiliary arguments) -- #define INSERT_GENTPROTRI_BASIC( funcname ) \ \ GENTPROTRI( float, float, gint_t, s, s, i, funcname ) \ GENTPROTRI( double, double, gint_t, d, d, i, funcname ) \ GENTPROTRI( scomplex, float, gint_t, c, s, i, funcname ) \ GENTPROTRI( dcomplex, double, gint_t, z, d, i, funcname ) // -- Macros for functions with two primary operands --------------------------- // -- Basic two-operand macro -- // -- (no auxiliary arguments) -- #define INSERT_GENTPROT2_BASIC0( funcname ) \ \ GENTPROT2( float, float, s, s, funcname ) \ GENTPROT2( double, double, d, d, funcname ) \ GENTPROT2( scomplex, scomplex, c, c, funcname ) \ GENTPROT2( dcomplex, dcomplex, z, z, funcname ) // -- (one auxiliary argument) -- #define INSERT_GENTPROT2_BASIC( tfuncname, varname ) \ \ GENTPROT2( float, float, s, s, tfuncname, varname ) \ GENTPROT2( double, double, d, d, tfuncname, varname ) \ GENTPROT2( scomplex, scomplex, c, c, tfuncname, varname ) \ GENTPROT2( dcomplex, dcomplex, z, z, tfuncname, varname ) // -- Mixed domain two-operand macro -- // -- (no auxiliary arguments) -- #define INSERT_GENTPROT2_MIX_D0( funcname ) \ \ GENTPROT2( float, scomplex, s, c, funcname ) \ GENTPROT2( scomplex, float, c, s, funcname ) \ \ GENTPROT2( double, dcomplex, d, z, funcname ) \ GENTPROT2( dcomplex, double, z, d, funcname ) // -- (one auxiliary argument) -- #define INSERT_GENTPROT2_MIX_D( tfuncname, varname ) \ \ GENTPROT2( float, scomplex, s, c, tfuncname, varname ) \ GENTPROT2( scomplex, float, c, s, tfuncname, varname ) \ \ GENTPROT2( double, dcomplex, d, z, tfuncname, varname ) \ GENTPROT2( dcomplex, double, z, d, tfuncname, varname ) // -- Mixed precision two-operand macro -- // -- (no auxiliary arguments) -- #define INSERT_GENTPROT2_MIX_P0( funcname ) \ \ GENTPROT2( float, double, s, d, funcname ) \ GENTPROT2( float, dcomplex, s, z, funcname ) \ \ GENTPROT2( double, float, d, s, funcname ) \ GENTPROT2( double, scomplex, d, c, funcname ) \ \ GENTPROT2( scomplex, double, c, d, funcname ) \ GENTPROT2( scomplex, dcomplex, c, z, funcname ) \ \ GENTPROT2( dcomplex, float, z, s, funcname ) \ GENTPROT2( dcomplex, scomplex, z, c, funcname ) \ // -- (one auxiliary argument) -- #define INSERT_GENTPROT2_MIX_P( tfuncname, varname ) \ \ GENTPROT2( float, double, s, d, tfuncname, varname ) \ GENTPROT2( float, dcomplex, s, z, tfuncname, varname ) \ \ GENTPROT2( double, float, d, s, tfuncname, varname ) \ GENTPROT2( double, scomplex, d, c, tfuncname, varname ) \ \ GENTPROT2( scomplex, double, c, d, tfuncname, varname ) \ GENTPROT2( scomplex, dcomplex, c, z, tfuncname, varname ) \ \ GENTPROT2( dcomplex, float, z, s, tfuncname, varname ) \ GENTPROT2( dcomplex, scomplex, z, c, tfuncname, varname ) \ // -- Mixed domain/precision (all) two-operand macro -- // -- (no auxiliary arguments) -- #define INSERT_GENTPROT2_MIXDP0( funcname ) \ \ GENTPROT2( float, double, s, d, funcname ) \ GENTPROT2( float, scomplex, s, c, funcname ) \ GENTPROT2( float, dcomplex, s, z, funcname ) \ \ GENTPROT2( double, float, d, s, funcname ) \ GENTPROT2( double, scomplex, d, c, funcname ) \ GENTPROT2( double, dcomplex, d, z, funcname ) \ \ GENTPROT2( scomplex, float, c, s, funcname ) \ GENTPROT2( scomplex, double, c, d, funcname ) \ GENTPROT2( scomplex, dcomplex, c, z, funcname ) \ \ GENTPROT2( dcomplex, float, z, s, funcname ) \ GENTPROT2( dcomplex, double, z, d, funcname ) \ GENTPROT2( dcomplex, scomplex, z, c, funcname ) // -- (one auxiliary argument) -- #define INSERT_GENTPROT2_MIX_DP( tfuncname, varname ) \ \ GENTPROT2( float, double, s, d, tfuncname, varname ) \ GENTPROT2( float, scomplex, s, c, tfuncname, varname ) \ GENTPROT2( float, dcomplex, s, z, tfuncname, varname ) \ \ GENTPROT2( double, float, d, s, tfuncname, varname ) \ GENTPROT2( double, scomplex, d, c, tfuncname, varname ) \ GENTPROT2( double, dcomplex, d, z, tfuncname, varname ) \ \ GENTPROT2( scomplex, float, c, s, tfuncname, varname ) \ GENTPROT2( scomplex, double, c, d, tfuncname, varname ) \ GENTPROT2( scomplex, dcomplex, c, z, tfuncname, varname ) \ \ GENTPROT2( dcomplex, float, z, s, tfuncname, varname ) \ GENTPROT2( dcomplex, double, z, d, tfuncname, varname ) \ GENTPROT2( dcomplex, scomplex, z, c, tfuncname, varname ) // -- Basic two-operand with real projection of first operand -- // -- (no auxiliary arguments) -- #define INSERT_GENTPROT2R_BASIC0( funcname ) \ \ GENTPROT2R( float, float, float, s, s, s, funcname ) \ GENTPROT2R( double, double, double, d, d, d, funcname ) \ GENTPROT2R( scomplex, scomplex, float, c, c, s, funcname ) \ GENTPROT2R( dcomplex, dcomplex, double, z, z, d, funcname ) // -- (one auxiliary argument) -- #define INSERT_GENTPROT2R_BASIC( tfuncname, varname ) \ \ GENTPROT2R( float, float, float, s, s, s, tfuncname, varname ) \ GENTPROT2R( double, double, double, d, d, d, tfuncname, varname ) \ GENTPROT2R( scomplex, scomplex, float, c, c, s, tfuncname, varname ) \ GENTPROT2R( dcomplex, dcomplex, double, z, z, d, tfuncname, varname ) // -- Mixed domain two-operand with real projection of first operand -- // -- (no auxiliary arguments) -- #define INSERT_GENTPROT2R_MIX_D0( tfuncname ) \ \ GENTPROT2R( float, scomplex, float, s, c, s, tfuncname ) \ GENTPROT2R( scomplex, float, float, c, s, s, tfuncname ) \ \ GENTPROT2R( double, dcomplex, double, d, z, d, tfuncname ) \ GENTPROT2R( dcomplex, double, double, z, d, d, tfuncname ) // -- (one auxiliary argument) -- #define INSERT_GENTPROT2R_MIX_D( tfuncname, varname ) \ \ GENTPROT2R( float, scomplex, float, s, c, s, tfuncname, varname ) \ GENTPROT2R( scomplex, float, float, c, s, s, tfuncname, varname ) \ \ GENTPROT2R( double, dcomplex, double, d, z, d, tfuncname, varname ) \ GENTPROT2R( dcomplex, double, double, z, d, d, tfuncname, varname ) // -- Mixed precision two-operand with real projection of first operand -- // -- (no auxiliary arguments) -- #define INSERT_GENTPROT2R_MIX_P0( tfuncname ) \ \ GENTPROT2R( float, double, float, s, d, s, tfuncname ) \ GENTPROT2R( float, dcomplex, float, s, z, s, tfuncname ) \ \ GENTPROT2R( double, float, double, d, s, d, tfuncname ) \ GENTPROT2R( double, scomplex, double, d, c, d, tfuncname ) \ \ GENTPROT2R( scomplex, double, float, c, d, s, tfuncname ) \ GENTPROT2R( scomplex, dcomplex, float, c, z, s, tfuncname ) \ \ GENTPROT2R( dcomplex, float, double, z, s, d, tfuncname ) \ GENTPROT2R( dcomplex, scomplex, double, z, c, d, tfuncname ) // -- (one auxiliary argument) -- #define INSERT_GENTPROT2R_MIX_P( tfuncname, varname ) \ \ GENTPROT2R( float, double, float, s, d, s, tfuncname, varname ) \ GENTPROT2R( float, dcomplex, float, s, z, s, tfuncname, varname ) \ \ GENTPROT2R( double, float, double, d, s, d, tfuncname, varname ) \ GENTPROT2R( double, scomplex, double, d, c, d, tfuncname, varname ) \ \ GENTPROT2R( scomplex, double, float, c, d, s, tfuncname, varname ) \ GENTPROT2R( scomplex, dcomplex, float, c, z, s, tfuncname, varname ) \ \ GENTPROT2R( dcomplex, float, double, z, s, d, tfuncname, varname ) \ GENTPROT2R( dcomplex, scomplex, double, z, c, d, tfuncname, varname ) // -- Macros for functions with three primary operands ------------------------- // -- Basic three-operand macro -- #define INSERT_GENTPROT3_BASIC( funcname ) \ \ GENTPROT3( float, float, float, s, s, s, funcname ) \ GENTPROT3( double, double, double, d, d, d, funcname ) \ GENTPROT3( scomplex, scomplex, scomplex, c, c, c, funcname ) \ GENTPROT3( dcomplex, dcomplex, dcomplex, z, z, z, funcname ) // -- Mixed domain three-operand macro -- #define INSERT_GENTPROT3_MIX_D( funcname ) \ \ GENTPROT3( float, float, scomplex, s, s, c, funcname ) \ GENTPROT3( float, scomplex, float, s, c, s, funcname ) \ GENTPROT3( float, scomplex, scomplex, s, c, c, funcname ) \ \ GENTPROT3( double, double, dcomplex, d, d, z, funcname ) \ GENTPROT3( double, dcomplex, double, d, z, d, funcname ) \ GENTPROT3( double, dcomplex, dcomplex, d, z, z, funcname ) \ \ GENTPROT3( scomplex, float, float, c, s, s, funcname ) \ GENTPROT3( scomplex, float, scomplex, c, s, c, funcname ) \ GENTPROT3( scomplex, scomplex, float, c, c, s, funcname ) \ \ GENTPROT3( dcomplex, double, double, z, d, d, funcname ) \ GENTPROT3( dcomplex, double, dcomplex, z, d, z, funcname ) \ GENTPROT3( dcomplex, dcomplex, double, z, z, d, funcname ) // -- Mixed precision three-operand macro -- #define INSERT_GENTPROT3_MIX_P( funcname ) \ \ GENTPROT3( float, float, double, s, s, d, funcname ) \ GENTPROT3( float, float, dcomplex, s, s, z, funcname ) \ \ GENTPROT3( float, double, float, s, d, s, funcname ) \ GENTPROT3( float, double, double, s, d, d, funcname ) \ GENTPROT3( float, double, scomplex, s, d, c, funcname ) \ GENTPROT3( float, double, dcomplex, s, d, z, funcname ) \ \ GENTPROT3( float, scomplex, double, s, c, d, funcname ) \ GENTPROT3( float, scomplex, dcomplex, s, c, z, funcname ) \ \ GENTPROT3( float, dcomplex, float, s, z, s, funcname ) \ GENTPROT3( float, dcomplex, double, s, z, d, funcname ) \ GENTPROT3( float, dcomplex, scomplex, s, z, c, funcname ) \ GENTPROT3( float, dcomplex, dcomplex, s, z, z, funcname ) \ \ \ GENTPROT3( double, float, float, d, s, s, funcname ) \ GENTPROT3( double, float, double, d, s, d, funcname ) \ GENTPROT3( double, float, scomplex, d, s, c, funcname ) \ GENTPROT3( double, float, dcomplex, d, s, z, funcname ) \ \ GENTPROT3( double, double, float, d, d, s, funcname ) \ GENTPROT3( double, double, scomplex, d, d, c, funcname ) \ \ GENTPROT3( double, scomplex, float, d, c, s, funcname ) \ GENTPROT3( double, scomplex, double, d, c, d, funcname ) \ GENTPROT3( double, scomplex, scomplex, d, c, c, funcname ) \ GENTPROT3( double, scomplex, dcomplex, d, c, z, funcname ) \ \ GENTPROT3( double, dcomplex, float, d, z, s, funcname ) \ GENTPROT3( double, dcomplex, scomplex, d, z, c, funcname ) \ \ \ GENTPROT3( scomplex, float, double, c, s, d, funcname ) \ GENTPROT3( scomplex, float, dcomplex, c, s, z, funcname ) \ \ GENTPROT3( scomplex, double, float, c, d, s, funcname ) \ GENTPROT3( scomplex, double, double, c, d, d, funcname ) \ GENTPROT3( scomplex, double, scomplex, c, d, c, funcname ) \ GENTPROT3( scomplex, double, dcomplex, c, d, z, funcname ) \ \ GENTPROT3( scomplex, scomplex, double, c, c, d, funcname ) \ GENTPROT3( scomplex, scomplex, dcomplex, c, c, z, funcname ) \ \ GENTPROT3( scomplex, dcomplex, float, c, z, s, funcname ) \ GENTPROT3( scomplex, dcomplex, double, c, z, d, funcname ) \ GENTPROT3( scomplex, dcomplex, scomplex, c, z, c, funcname ) \ GENTPROT3( scomplex, dcomplex, dcomplex, c, z, z, funcname ) \ \ \ GENTPROT3( dcomplex, float, float, z, s, s, funcname ) \ GENTPROT3( dcomplex, float, double, z, s, d, funcname ) \ GENTPROT3( dcomplex, float, scomplex, z, s, c, funcname ) \ GENTPROT3( dcomplex, float, dcomplex, z, s, z, funcname ) \ \ GENTPROT3( dcomplex, double, float, z, d, s, funcname ) \ GENTPROT3( dcomplex, double, scomplex, z, d, c, funcname ) \ \ GENTPROT3( dcomplex, scomplex, float, z, c, s, funcname ) \ GENTPROT3( dcomplex, scomplex, double, z, c, d, funcname ) \ GENTPROT3( dcomplex, scomplex, scomplex, z, c, c, funcname ) \ GENTPROT3( dcomplex, scomplex, dcomplex, z, c, z, funcname ) \ \ GENTPROT3( dcomplex, dcomplex, float, z, z, s, funcname ) \ GENTPROT3( dcomplex, dcomplex, scomplex, z, z, c, funcname ) \ // -- Basic three-operand with union of operands 1 and 2 -- #define INSERT_GENTPROT3U12_BASIC( funcname ) \ \ GENTPROT3U12( float, float, float, float, s, s, s, s, funcname ) \ GENTPROT3U12( double, double, double, double, d, d, d, d, funcname ) \ GENTPROT3U12( scomplex, scomplex, scomplex, scomplex, c, c, c, c, funcname ) \ GENTPROT3U12( dcomplex, dcomplex, dcomplex, dcomplex, z, z, z, z, funcname ) // -- Mixed domain three-operand with union of operands 1 and 2 -- #define INSERT_GENTPROT3U12_MIX_D( funcname ) \ \ GENTPROT3U12( float, float, scomplex, float, s, s, c, s, funcname ) \ GENTPROT3U12( float, scomplex, float, scomplex, s, c, s, c, funcname ) \ GENTPROT3U12( float, scomplex, scomplex, scomplex, s, c, c, c, funcname ) \ \ GENTPROT3U12( double, double, dcomplex, double, d, d, z, d, funcname ) \ GENTPROT3U12( double, dcomplex, double, dcomplex, d, z, d, z, funcname ) \ GENTPROT3U12( double, dcomplex, dcomplex, dcomplex, d, z, z, z, funcname ) \ \ GENTPROT3U12( scomplex, float, float, scomplex, c, s, s, c, funcname ) \ GENTPROT3U12( scomplex, float, scomplex, scomplex, c, s, c, c, funcname ) \ GENTPROT3U12( scomplex, scomplex, float, scomplex, c, c, s, c, funcname ) \ \ GENTPROT3U12( dcomplex, double, double, dcomplex, z, d, d, z, funcname ) \ GENTPROT3U12( dcomplex, double, dcomplex, dcomplex, z, d, z, z, funcname ) \ GENTPROT3U12( dcomplex, dcomplex, double, dcomplex, z, z, d, z, funcname ) // -- Mixed precision three-operand with union of operands 1 and 2 -- #define INSERT_GENTPROT3U12_MIX_P( funcname ) \ \ GENTPROT3U12( float, float, double, float, s, s, d, s, funcname ) \ GENTPROT3U12( float, float, dcomplex, float, s, s, z, s, funcname ) \ \ GENTPROT3U12( float, double, float, double, s, d, s, d, funcname ) \ GENTPROT3U12( float, double, double, double, s, d, d, d, funcname ) \ GENTPROT3U12( float, double, scomplex, double, s, d, c, d, funcname ) \ GENTPROT3U12( float, double, dcomplex, double, s, d, z, d, funcname ) \ \ GENTPROT3U12( float, scomplex, double, scomplex, s, c, d, c, funcname ) \ GENTPROT3U12( float, scomplex, dcomplex, scomplex, s, c, z, c, funcname ) \ \ GENTPROT3U12( float, dcomplex, float, dcomplex, s, z, s, z, funcname ) \ GENTPROT3U12( float, dcomplex, double, dcomplex, s, z, d, z, funcname ) \ GENTPROT3U12( float, dcomplex, scomplex, dcomplex, s, z, c, z, funcname ) \ GENTPROT3U12( float, dcomplex, dcomplex, dcomplex, s, z, z, z, funcname ) \ \ \ GENTPROT3U12( double, float, float, double, d, s, s, d, funcname ) \ GENTPROT3U12( double, float, double, double, d, s, d, d, funcname ) \ GENTPROT3U12( double, float, scomplex, double, d, s, c, d, funcname ) \ GENTPROT3U12( double, float, dcomplex, double, d, s, z, d, funcname ) \ \ GENTPROT3U12( double, double, float, double, d, d, s, d, funcname ) \ GENTPROT3U12( double, double, scomplex, double, d, d, c, d, funcname ) \ \ GENTPROT3U12( double, scomplex, float, dcomplex, d, c, s, z, funcname ) \ GENTPROT3U12( double, scomplex, double, dcomplex, d, c, d, z, funcname ) \ GENTPROT3U12( double, scomplex, scomplex, dcomplex, d, c, c, z, funcname ) \ GENTPROT3U12( double, scomplex, dcomplex, dcomplex, d, c, z, z, funcname ) \ \ GENTPROT3U12( double, dcomplex, float, dcomplex, d, z, s, z, funcname ) \ GENTPROT3U12( double, dcomplex, scomplex, dcomplex, d, z, c, z, funcname ) \ \ \ GENTPROT3U12( scomplex, float, double, scomplex, c, s, d, c, funcname ) \ GENTPROT3U12( scomplex, float, dcomplex, scomplex, c, s, z, c, funcname ) \ \ GENTPROT3U12( scomplex, double, float, dcomplex, c, d, s, z, funcname ) \ GENTPROT3U12( scomplex, double, double, dcomplex, c, d, d, z, funcname ) \ GENTPROT3U12( scomplex, double, scomplex, dcomplex, c, d, c, z, funcname ) \ GENTPROT3U12( scomplex, double, dcomplex, dcomplex, c, d, z, z, funcname ) \ \ GENTPROT3U12( scomplex, scomplex, double, scomplex, c, c, d, c, funcname ) \ GENTPROT3U12( scomplex, scomplex, dcomplex, scomplex, c, c, z, c, funcname ) \ \ GENTPROT3U12( scomplex, dcomplex, float, dcomplex, c, z, s, z, funcname ) \ GENTPROT3U12( scomplex, dcomplex, double, dcomplex, c, z, d, z, funcname ) \ GENTPROT3U12( scomplex, dcomplex, scomplex, dcomplex, c, z, c, z, funcname ) \ GENTPROT3U12( scomplex, dcomplex, dcomplex, dcomplex, c, z, z, z, funcname ) \ \ \ GENTPROT3U12( dcomplex, float, float, dcomplex, z, s, s, z, funcname ) \ GENTPROT3U12( dcomplex, float, double, dcomplex, z, s, d, z, funcname ) \ GENTPROT3U12( dcomplex, float, scomplex, dcomplex, z, s, c, z, funcname ) \ GENTPROT3U12( dcomplex, float, dcomplex, dcomplex, z, s, z, z, funcname ) \ \ GENTPROT3U12( dcomplex, double, float, dcomplex, z, d, s, z, funcname ) \ GENTPROT3U12( dcomplex, double, scomplex, dcomplex, z, d, c, z, funcname ) \ \ GENTPROT3U12( dcomplex, scomplex, float, dcomplex, z, c, s, z, funcname ) \ GENTPROT3U12( dcomplex, scomplex, double, dcomplex, z, c, d, z, funcname ) \ GENTPROT3U12( dcomplex, scomplex, scomplex, dcomplex, z, c, c, z, funcname ) \ GENTPROT3U12( dcomplex, scomplex, dcomplex, dcomplex, z, c, z, z, funcname ) \ \ GENTPROT3U12( dcomplex, dcomplex, float, dcomplex, z, z, s, z, funcname ) \ GENTPROT3U12( dcomplex, dcomplex, scomplex, dcomplex, z, z, c, z, funcname ) #endif blis-0.6.1/frame/include/bli_kernel_macro_defs.h000066400000000000000000000165401360743507500216450ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_KERNEL_MACRO_DEFS_H #define BLIS_KERNEL_MACRO_DEFS_H // -- Define default threading parameters -------------------------------------- #ifndef BLIS_THREAD_RATIO_M #define BLIS_THREAD_RATIO_M 2 #endif #ifndef BLIS_THREAD_RATIO_N #define BLIS_THREAD_RATIO_N 1 #endif #ifndef BLIS_THREAD_MAX_IR #define BLIS_THREAD_MAX_IR 1 #endif #ifndef BLIS_THREAD_MAX_JR #define BLIS_THREAD_MAX_JR 4 #endif // -- Memory allocation -------------------------------------------------------- // hbwmalloc.h provides hbw_malloc() and hbw_free() on systems with // libmemkind. But disable use of libmemkind if BLIS_DISABLE_MEMKIND // was explicitly defined. #ifdef BLIS_DISABLE_MEMKIND #undef BLIS_ENABLE_MEMKIND #endif #ifdef BLIS_ENABLE_MEMKIND #include #endif // Memory allocation functions. These macros define the three types of // malloc()-style functions, and their free() counterparts: one for each // type of memory to be allocated. // NOTE: ANY ALTERNATIVE TO malloc()/free() USED FOR ANY OF THE FOLLOWING // THREE PAIRS OF MACROS MUST USE THE SAME FUNCTION PROTOTYPE AS malloc() // and free(): // // void* malloc( size_t size ); // void free( void* p ); // // This allocation function is called to allocate memory for blocks within // BLIS's internal memory pools. #ifndef BLIS_MALLOC_POOL // If use of libmemkind was enabled at configure-time, the default // memory allocation function for memory pools should be hbw_malloc() // instead of malloc(). #ifdef BLIS_ENABLE_MEMKIND #define BLIS_MALLOC_POOL hbw_malloc #else #define BLIS_MALLOC_POOL malloc #endif #endif #ifndef BLIS_FREE_POOL // If use of libmemkind was enabled at configure-time, the default // memory deallocation function for memory pools should be hbw_free() // instead of free(). #ifdef BLIS_ENABLE_MEMKIND #define BLIS_FREE_POOL hbw_free #else #define BLIS_FREE_POOL free #endif #endif // This allocation function is called to allocate memory for internally- // used objects and structures, such as control tree nodes. #ifndef BLIS_MALLOC_INTL #define BLIS_MALLOC_INTL malloc #endif #ifndef BLIS_FREE_INTL #define BLIS_FREE_INTL free #endif // This allocation function is called to allocate memory for objects // created by user-level API functions, such as bli_obj_create(). #ifndef BLIS_MALLOC_USER #define BLIS_MALLOC_USER malloc #endif #ifndef BLIS_FREE_USER #define BLIS_FREE_USER free #endif // -- Other system-related definitions ----------------------------------------- // Size of a virtual memory page. This is used to align blocks within the // memory pools. #ifndef BLIS_PAGE_SIZE #define BLIS_PAGE_SIZE 4096 #endif // The maximum number of named SIMD vector registers available for use. // When configuring with umbrella configuration families, this should be // set to the maximum number of registers across all sub-configurations in // the family. #ifndef BLIS_SIMD_NUM_REGISTERS #define BLIS_SIMD_NUM_REGISTERS 32 #endif // The maximum size (in bytes) of each SIMD vector. // When configuring with umbrella configuration families, this should be // set to the maximum SIMD size across all sub-configurations in the family. #ifndef BLIS_SIMD_SIZE #define BLIS_SIMD_SIZE 64 #endif // Alignment size (in bytes) needed by the instruction set for aligned // SIMD/vector instructions. #ifndef BLIS_SIMD_ALIGN_SIZE #define BLIS_SIMD_ALIGN_SIZE BLIS_SIMD_SIZE #endif // The maximum size in bytes of local stack buffers within macro-kernel // functions. These buffers are usually used to store a temporary copy // of a single microtile. The reason we multiply by 2 is to handle induced // methods, where we use real domain register blocksizes in units of // complex elements. Specifically, the macro-kernels will need this larger // micro-tile footprint, even though the virtual micro-kernels will only // ever be writing to half (real or imaginary part) at a time. #ifndef BLIS_STACK_BUF_MAX_SIZE #define BLIS_STACK_BUF_MAX_SIZE ( BLIS_SIMD_NUM_REGISTERS * \ BLIS_SIMD_SIZE * 2 ) #endif // Alignment size used to align local stack buffers within macro-kernel // functions. #ifndef BLIS_STACK_BUF_ALIGN_SIZE #define BLIS_STACK_BUF_ALIGN_SIZE BLIS_SIMD_ALIGN_SIZE #endif // Alignment size used when allocating memory via BLIS_MALLOC_USER. // To disable heap alignment, set this to 1. #ifndef BLIS_HEAP_ADDR_ALIGN_SIZE #define BLIS_HEAP_ADDR_ALIGN_SIZE BLIS_SIMD_ALIGN_SIZE #endif // Alignment size used when sizing leading dimensions of memory allocated // via BLIS_MALLOC_USER. #ifndef BLIS_HEAP_STRIDE_ALIGN_SIZE #define BLIS_HEAP_STRIDE_ALIGN_SIZE BLIS_SIMD_ALIGN_SIZE #endif // Alignment sizes used when allocating blocks to the internal memory // pool, via BLIS_MALLOC_POOL. #ifndef BLIS_POOL_ADDR_ALIGN_SIZE_A #define BLIS_POOL_ADDR_ALIGN_SIZE_A BLIS_PAGE_SIZE #endif #ifndef BLIS_POOL_ADDR_ALIGN_SIZE_B #define BLIS_POOL_ADDR_ALIGN_SIZE_B BLIS_PAGE_SIZE #endif #ifndef BLIS_POOL_ADDR_ALIGN_SIZE_C #define BLIS_POOL_ADDR_ALIGN_SIZE_C BLIS_PAGE_SIZE #endif #ifndef BLIS_POOL_ADDR_ALIGN_SIZE_GEN #define BLIS_POOL_ADDR_ALIGN_SIZE_GEN BLIS_PAGE_SIZE #endif // Offsets from alignment specified by BLIS_POOL_ADDR_ALIGN_SIZE_*. #ifndef BLIS_POOL_ADDR_OFFSET_SIZE_A #define BLIS_POOL_ADDR_OFFSET_SIZE_A 0 #endif #ifndef BLIS_POOL_ADDR_OFFSET_SIZE_B #define BLIS_POOL_ADDR_OFFSET_SIZE_B 0 #endif #ifndef BLIS_POOL_ADDR_OFFSET_SIZE_C #define BLIS_POOL_ADDR_OFFSET_SIZE_C 0 #endif #ifndef BLIS_POOL_ADDR_OFFSET_SIZE_GEN #define BLIS_POOL_ADDR_OFFSET_SIZE_GEN 0 #endif #endif blis-0.6.1/frame/include/bli_macro_defs.h000066400000000000000000000151251360743507500203030ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_MACRO_DEFS_H #define BLIS_MACRO_DEFS_H // -- Undefine restrict for C++ and C89/90 -- #ifdef __cplusplus // Language is C++; define restrict as nothing. #ifndef restrict #define restrict #endif #elif __STDC_VERSION__ >= 199901L // Language is C99 (or later); do nothing since restrict is recognized. #else // Language is pre-C99; define restrict as nothing. #ifndef restrict #define restrict #endif #endif // -- Define typeof() operator if using non-GNU compiler -- #ifndef __GNUC__ #define typeof __typeof__ #else #ifndef typeof #define typeof __typeof__ #endif #endif // -- BLIS Thread Local Storage Keyword -- // __thread for TLS is supported by GCC, CLANG, ICC, and IBMC. // There is a small risk here as __GNUC__ can also be defined by some other // compiler (other than ICC and CLANG which we know define it) that // doesn't support __thread, as __GNUC__ is not quite unique to GCC. // But the possibility of someone using such non-main-stream compiler // for building BLIS is low. #if defined(__GNUC__) || defined(__clang__) || defined(__ICC) || defined(__IBMC__) #define BLIS_THREAD_LOCAL __thread #else #define BLIS_THREAD_LOCAL #endif // -- BLIS constructor/destructor function attribute -- // __attribute__((constructor/destructor)) is supported by GCC only. // There is a small risk here as __GNUC__ can also be defined by some other // compiler (other than ICC and CLANG which we know define it) that // doesn't support this, as __GNUC__ is not quite unique to GCC. // But the possibility of someone using such non-main-stream compiler // for building BLIS is low. #if defined(__ICC) || defined(__INTEL_COMPILER) // ICC defines __GNUC__ but doesn't support this #define BLIS_ATTRIB_CTOR #define BLIS_ATTRIB_DTOR #elif defined(__clang__) // CLANG supports __attribute__, but its documentation doesn't // mention support for constructor/destructor. Compiling with // clang and testing shows that it does support. #define BLIS_ATTRIB_CTOR __attribute__((constructor)) #define BLIS_ATTRIB_DTOR __attribute__((destructor)) #elif defined(__GNUC__) #define BLIS_ATTRIB_CTOR __attribute__((constructor)) #define BLIS_ATTRIB_DTOR __attribute__((destructor)) #else #define BLIS_ATTRIB_CTOR #define BLIS_ATTRIB_DTOR #endif // -- Concatenation macros -- #define BLIS_FUNC_PREFIX_STR "bli" // We add an extra layer the definitions of these string-pasting macros // because sometimes it is needed if, for example, one of the PASTE // macros is invoked with an "op" argument that is itself a macro. #define PASTEMAC0_(op) bli_ ## op #define PASTEMAC0(op) PASTEMAC0_(op) #define PASTEMAC_(ch,op) bli_ ## ch ## op #define PASTEMAC(ch,op) PASTEMAC_(ch,op) #define PASTEMAC2_(ch1,ch2,op) bli_ ## ch1 ## ch2 ## op #define PASTEMAC2(ch1,ch2,op) PASTEMAC2_(ch1,ch2,op) #define PASTEMAC3_(ch1,ch2,ch3,op) bli_ ## ch1 ## ch2 ## ch3 ## op #define PASTEMAC3(ch1,ch2,ch3,op) PASTEMAC3_(ch1,ch2,ch3,op) #define PASTEMAC4_(ch1,ch2,ch3,ch4,op) bli_ ## ch1 ## ch2 ## ch3 ## ch4 ## op #define PASTEMAC4(ch1,ch2,ch3,ch4,op) PASTEMAC4_(ch1,ch2,ch3,ch4,op) #define PASTEMAC5_(ch1,ch2,ch3,ch4,ch5,op) bli_ ## ch1 ## ch2 ## ch3 ## ch4 ## ch5 ## op #define PASTEMAC5(ch1,ch2,ch3,ch4,ch5,op) PASTEMAC5_(ch1,ch2,ch3,ch4,ch5,op) #define PASTEMAC6_(ch1,ch2,ch3,ch4,ch5,ch6,op) bli_ ## ch1 ## ch2 ## ch3 ## ch4 ## ch5 ## ch6 ## op #define PASTEMAC6(ch1,ch2,ch3,ch4,ch5,ch6,op) PASTEMAC6_(ch1,ch2,ch3,ch4,ch5,ch6,op) #define PASTEBLACHK_(op) bla_ ## op ## _check #define PASTEBLACHK(op) PASTEBLACHK_(op) #define PASTECH0_(op) op #define PASTECH0(op) PASTECH0_(op) #define PASTECH_(ch,op) ch ## op #define PASTECH(ch,op) PASTECH_(ch,op) #define PASTECH2_(ch1,ch2,op) ch1 ## ch2 ## op #define PASTECH2(ch1,ch2,op) PASTECH2_(ch1,ch2,op) #define PASTECH3_(ch1,ch2,ch3,op) ch1 ## ch2 ## ch3 ## op #define PASTECH3(ch1,ch2,ch3,op) PASTECH3_(ch1,ch2,ch3,op) #define MKSTR(s1) #s1 #define STRINGIFY_INT( s ) MKSTR( s ) // Fortran-77 name-mangling macros. #define PASTEF770(name) name ## _ #define PASTEF77(ch1,name) ch1 ## name ## _ #define PASTEF772(ch1,ch2,name) ch1 ## ch2 ## name ## _ #define PASTEF773(ch1,ch2,ch3,name) ch1 ## ch2 ## ch3 ## name ## _ // -- Include other groups of macros #include "bli_genarray_macro_defs.h" #include "bli_gentdef_macro_defs.h" #include "bli_gentfunc_macro_defs.h" #include "bli_gentprot_macro_defs.h" #include "bli_misc_macro_defs.h" #include "bli_param_macro_defs.h" #include "bli_obj_macro_defs.h" #include "bli_complex_macro_defs.h" #include "bli_scalar_macro_defs.h" #include "bli_error_macro_defs.h" #include "bli_blas_macro_defs.h" #include "bli_builtin_macro_defs.h" #include "bli_oapi_macro_defs.h" #include "bli_tapi_macro_defs.h" #endif blis-0.6.1/frame/include/bli_misc_macro_defs.h000066400000000000000000000104461360743507500213170ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_MISC_MACRO_DEFS_H #define BLIS_MISC_MACRO_DEFS_H // -- Miscellaneous macros -- // min, max, abs // NOTE: These must remain macros since we don't know the types of a and b. #define bli_min( a, b ) ( (a) < (b) ? (a) : (b) ) #define bli_max( a, b ) ( (a) > (b) ? (a) : (b) ) #define bli_abs( a ) ( (a) <= 0 ? -(a) : (a) ) // fmin, fmax, fabs // NOTE: These must remain macros since we don't know the types of a and b. #define bli_fmin( a, b ) bli_min( a, b ) #define bli_fmax( a, b ) bli_max( a, b ) #define bli_fabs( a ) ( (a) <= 0.0 ? -(a) : (a) ) // fminabs, fmaxabs // NOTE: These must remain macros since we don't know the types of a and b. #define bli_fminabs( a, b ) \ \ bli_fmin( bli_fabs( a ), \ bli_fabs( b ) ) #define bli_fmaxabs( a, b ) \ \ bli_fmax( bli_fabs( a ), \ bli_fabs( b ) ) // round static double bli_round( double a ) { return round( a ); } // round_to_mult static guint_t bli_round_to_mult( guint_t val, guint_t mult ) { return ( guint_t ) ( ( ( ( guint_t )val + ( guint_t )mult / 2 ) / mult ) * mult ); } // isnan, isinf // NOTE: These must remain macros, since isinf() and isnan() are macros // (defined in math.h) that likely depend on the type of the argument 'a' // below. #define bli_isinf( a ) isinf( a ) #define bli_isnan( a ) isnan( a ) // is_odd, is_even static bool_t bli_is_odd( gint_t a ) { return ( a % 2 == 1 ); } static bool_t bli_is_even( gint_t a ) { return ( a % 2 == 0 ); } // swap_dims static void bli_swap_dims( dim_t* dim1, dim_t* dim2 ) { dim_t temp = *dim1; *dim1 = *dim2; *dim2 = temp; } // swap_incs static void bli_swap_incs( inc_t* inc1, inc_t* inc2 ) { inc_t temp = *inc1; *inc1 = *inc2; *inc2 = temp; } // toggle_bool static void bli_toggle_bool( bool_t* b ) { if ( *b == TRUE ) *b = FALSE; else *b = TRUE; } // return datatype for char #define bli_stype ( BLIS_FLOAT ) #define bli_dtype ( BLIS_DOUBLE ) #define bli_ctype ( BLIS_SCOMPLEX ) #define bli_ztype ( BLIS_DCOMPLEX ) // return C type for char #define bli_sctype float #define bli_dctype double #define bli_cctype scomplex #define bli_zctype dcomplex // return real proj of C type for char #define bli_sctyper float #define bli_dctyper double #define bli_cctyper float #define bli_zctyper double // return default format specifier for char // NOTE: These must remain macros due to the way they are used to initialize // local char arrays. #define bli_sformatspec() "%9.2e" #define bli_dformatspec() "%9.2e" #define bli_cformatspec() "%9.2e + %9.2e " #define bli_zformatspec() "%9.2e + %9.2e " #define bli_iformatspec() "%6d" #endif blis-0.6.1/frame/include/bli_oapi_ba.h000066400000000000000000000047231360743507500175750ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // This file defines macros used to allow the _oapi.c files to produce // object APIs that omit expert parameters. // Define the macro to remove the function name suffix (in function // definitions). #undef EX_SUF #define EX_SUF // Define the macro to omit expert arguments from function signatures // and prototypes. #undef BLIS_OAPI_EX_PARAMS #define BLIS_OAPI_EX_PARAMS // Define the macro to declare local expert variables that are initialized // to NULL. The "( void )" statements are to prevent unused variable // warnings by the compiler. #undef BLIS_OAPI_EX_DECLS #define BLIS_OAPI_EX_DECLS cntx_t* cntx = NULL; ( void )cntx; \ rntm_t* rntm = NULL; ( void )rntm; // Define the macro to pass the local expert variables to another function. //#undef BLIS_TAPI_EX_VARS //#define BLIS_TAPI_EX_VARS blis-0.6.1/frame/include/bli_oapi_ex.h000066400000000000000000000046341360743507500176300ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // This file defines macros used to allow the _oapi.c files to produce // object APIs that contain context parameters. // Define the macro to add a suffix to the object API function names // (in function definitions). #undef EX_SUF #define EX_SUF BLIS_OAPI_EX_SUF // Define the macro to add expert arguments to function signatures // and prototypes. #undef BLIS_OAPI_EX_PARAMS #define BLIS_OAPI_EX_PARAMS ,cntx_t* cntx, rntm_t* rntm // Define the macro to omit the expert variable declaration block, since // it is not needed when expert parameters are passed in through the API. #undef BLIS_OAPI_EX_DECLS #define BLIS_OAPI_EX_DECLS // Define the macro to pass the local expert variables to another function. //#undef BLIS_TAPI_EX_VARS //#define BLIS_TAPI_EX_VARS ,cntx, rntm blis-0.6.1/frame/include/bli_oapi_macro_defs.h000066400000000000000000000034201360743507500213060ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // Define the suffix to add to object API function names that include // additional "expert" parameters. #define BLIS_OAPI_EX_SUF _ex blis-0.6.1/frame/include/bli_obj_macro_defs.h000066400000000000000000001122301360743507500211300ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2016, Hewlett Packard Enterprise Development LP Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_OBJ_MACRO_DEFS_H #define BLIS_OBJ_MACRO_DEFS_H // -- Object query/modification macros -- // Info query static num_t bli_obj_dt( obj_t* obj ) { return ( num_t ) ( obj->info & BLIS_DATATYPE_BITS ); } static bool_t bli_obj_is_float( obj_t* obj ) { return ( bool_t ) ( bli_obj_dt( obj ) == BLIS_BITVAL_FLOAT_TYPE ); } static bool_t bli_obj_is_double( obj_t* obj ) { return ( bool_t ) ( bli_obj_dt( obj ) == BLIS_BITVAL_DOUBLE_TYPE ); } static bool_t bli_obj_is_scomplex( obj_t* obj ) { return ( bool_t ) ( bli_obj_dt( obj ) == BLIS_BITVAL_SCOMPLEX_TYPE ); } static bool_t bli_obj_is_dcomplex( obj_t* obj ) { return ( bool_t ) ( bli_obj_dt( obj ) == BLIS_BITVAL_DCOMPLEX_TYPE ); } static bool_t bli_obj_is_int( obj_t* obj ) { return ( bool_t ) ( bli_obj_dt( obj ) == BLIS_BITVAL_INT_TYPE ); } static bool_t bli_obj_is_const( obj_t* obj ) { return ( bool_t ) ( bli_obj_dt( obj ) == BLIS_BITVAL_CONST_TYPE ); } static dom_t bli_obj_domain( obj_t* obj ) { return ( dom_t ) ( obj->info & BLIS_DOMAIN_BIT ); } static prec_t bli_obj_prec( obj_t* obj ) { return ( prec_t ) ( obj->info & BLIS_PRECISION_BIT ); } static bool_t bli_obj_is_single_prec( obj_t* obj ) { return ( bool_t ) ( bli_obj_prec( obj ) == BLIS_BITVAL_SINGLE_PREC ); } static bool_t bli_obj_is_double_prec( obj_t* obj ) { return ( bool_t ) ( bli_obj_prec( obj ) == BLIS_BITVAL_DOUBLE_PREC ); } static num_t bli_obj_dt_proj_to_single_prec( obj_t* obj ) { return ( num_t ) ( bli_obj_dt( obj ) & ~BLIS_BITVAL_SINGLE_PREC ); } static num_t bli_obj_dt_proj_to_double_prec( obj_t* obj ) { return ( num_t ) ( bli_obj_dt( obj ) | BLIS_BITVAL_DOUBLE_PREC ); } static bool_t bli_obj_is_real( obj_t* obj ) { return ( bool_t ) ( bli_obj_domain( obj ) == BLIS_BITVAL_REAL && !bli_obj_is_const( obj ) ); } static bool_t bli_obj_is_complex( obj_t* obj ) { return ( bool_t ) ( bli_obj_domain( obj ) == BLIS_BITVAL_COMPLEX && !bli_obj_is_const( obj ) ); } static num_t bli_obj_dt_proj_to_real( obj_t* obj ) { return ( num_t ) ( bli_obj_dt( obj ) & ~BLIS_BITVAL_COMPLEX ); } static num_t bli_obj_dt_proj_to_complex( obj_t* obj ) { return ( num_t ) ( bli_obj_dt( obj ) | BLIS_BITVAL_COMPLEX ); } static num_t bli_obj_target_dt( obj_t* obj ) { return ( num_t ) ( ( obj->info & BLIS_TARGET_DT_BITS ) >> BLIS_TARGET_DT_SHIFT ); } static dom_t bli_obj_target_domain( obj_t* obj ) { return ( dom_t ) ( ( obj->info & BLIS_TARGET_DOMAIN_BIT ) >> BLIS_TARGET_DT_SHIFT ); } static prec_t bli_obj_target_prec( obj_t* obj ) { return ( prec_t ) ( ( obj->info & BLIS_TARGET_PREC_BIT ) >> BLIS_TARGET_DT_SHIFT ); } static num_t bli_obj_exec_dt( obj_t* obj ) { return ( num_t ) ( ( obj->info & BLIS_EXEC_DT_BITS ) >> BLIS_EXEC_DT_SHIFT ); } static dom_t bli_obj_exec_domain( obj_t* obj ) { return ( dom_t ) ( ( obj->info & BLIS_EXEC_DOMAIN_BIT ) >> BLIS_EXEC_DT_SHIFT ); } static prec_t bli_obj_exec_prec( obj_t* obj ) { return ( prec_t ) ( ( obj->info & BLIS_EXEC_PREC_BIT ) >> BLIS_EXEC_DT_SHIFT ); } static num_t bli_obj_comp_dt( obj_t* obj ) { return ( num_t ) ( ( obj->info & BLIS_COMP_DT_BITS ) >> BLIS_COMP_DT_SHIFT ); } static dom_t bli_obj_comp_domain( obj_t* obj ) { return ( dom_t ) ( ( obj->info & BLIS_COMP_DOMAIN_BIT ) >> BLIS_COMP_DT_SHIFT ); } static prec_t bli_obj_comp_prec( obj_t* obj ) { return ( prec_t ) ( ( obj->info & BLIS_COMP_PREC_BIT ) >> BLIS_COMP_DT_SHIFT ); } // NOTE: This function queries info2. static num_t bli_obj_scalar_dt( obj_t* obj ) { return ( num_t ) ( ( obj->info2 & BLIS_SCALAR_DT_BITS ) >> BLIS_SCALAR_DT_SHIFT ); } // NOTE: This function queries info2. static dom_t bli_obj_scalar_domain( obj_t* obj ) { return ( dom_t ) ( ( obj->info2 & BLIS_SCALAR_DOMAIN_BIT ) >> BLIS_SCALAR_DT_SHIFT ); } // NOTE: This function queries info2. static prec_t bli_obj_scalar_prec( obj_t* obj ) { return ( prec_t ) ( ( obj->info2 & BLIS_SCALAR_PREC_BIT ) >> BLIS_SCALAR_DT_SHIFT ); } static trans_t bli_obj_conjtrans_status( obj_t* obj ) { return ( trans_t ) ( obj->info & BLIS_CONJTRANS_BITS ); } static trans_t bli_obj_onlytrans_status( obj_t* obj ) { return ( trans_t ) ( obj->info & BLIS_TRANS_BIT ); } static bool_t bli_obj_has_trans( obj_t* obj ) { return ( bool_t ) ( bli_obj_onlytrans_status( obj ) == BLIS_BITVAL_TRANS ); } static bool_t bli_obj_has_notrans( obj_t* obj ) { return ( bool_t ) ( bli_obj_onlytrans_status( obj ) == BLIS_BITVAL_NO_TRANS ); } static conj_t bli_obj_conj_status( obj_t* obj ) { return ( conj_t ) ( obj->info & BLIS_CONJ_BIT ); } static bool_t bli_obj_has_conj( obj_t* obj ) { return ( bool_t ) ( bli_obj_conj_status( obj ) == BLIS_BITVAL_CONJ ); } static bool_t bli_obj_has_noconj( obj_t* obj ) { return ( bool_t ) ( bli_obj_conj_status( obj ) == BLIS_BITVAL_NO_CONJ ); } static uplo_t bli_obj_uplo( obj_t* obj ) { return ( uplo_t ) ( obj->info & BLIS_UPLO_BITS ); } static bool_t bli_obj_is_upper( obj_t* obj ) { return ( bool_t ) ( bli_obj_uplo( obj ) == BLIS_BITVAL_UPPER ); } static bool_t bli_obj_is_lower( obj_t* obj ) { return ( bool_t ) ( bli_obj_uplo( obj ) == BLIS_BITVAL_LOWER ); } static bool_t bli_obj_is_upper_or_lower( obj_t* obj ) { return ( bool_t ) ( bli_obj_is_upper( obj ) || bli_obj_is_lower( obj ) ); } static bool_t bli_obj_is_dense( obj_t* obj ) { return ( bool_t ) ( bli_obj_uplo( obj ) == BLIS_BITVAL_DENSE ); } static bool_t bli_obj_is_zeros( obj_t* obj ) { return ( bool_t ) ( bli_obj_uplo( obj ) == BLIS_BITVAL_ZEROS ); } static diag_t bli_obj_diag( obj_t* obj ) { return ( diag_t ) ( obj->info & BLIS_UNIT_DIAG_BIT ); } static bool_t bli_obj_has_nonunit_diag( obj_t* obj ) { return ( bool_t ) ( bli_obj_diag( obj ) == BLIS_BITVAL_NONUNIT_DIAG ); } static bool_t bli_obj_has_unit_diag( obj_t* obj ) { return ( bool_t ) ( bli_obj_diag( obj ) == BLIS_BITVAL_UNIT_DIAG ); } static bool_t bli_obj_has_inverted_diag( obj_t* obj ) { return ( bool_t ) ( ( obj->info & BLIS_INVERT_DIAG_BIT ) == BLIS_BITVAL_INVERT_DIAG ); } static bool_t bli_obj_is_pack_rev_if_upper( obj_t* obj ) { return ( bool_t ) ( ( obj->info & BLIS_PACK_REV_IF_UPPER_BIT ) == BLIS_BITVAL_PACK_REV_IF_UPPER ); } static bool_t bli_obj_is_pack_rev_if_lower( obj_t* obj ) { return ( bool_t ) ( ( obj->info & BLIS_PACK_REV_IF_LOWER_BIT ) == BLIS_BITVAL_PACK_REV_IF_LOWER ); } static pack_t bli_obj_pack_schema( obj_t* obj ) { return ( pack_t ) ( obj->info & BLIS_PACK_SCHEMA_BITS ); } static bool_t bli_obj_is_packed( obj_t* obj ) { return ( bool_t ) ( obj->info & BLIS_PACK_BIT ); } static bool_t bli_obj_is_row_packed( obj_t* obj ) { return ( bool_t ) ( obj->info & BLIS_PACK_RC_BIT ) == ( BLIS_BITVAL_PACKED_UNSPEC ^ BLIS_BITVAL_PACKED_ROWS ); } static bool_t bli_obj_is_col_packed( obj_t* obj ) { return ( bool_t ) ( obj->info & BLIS_PACK_RC_BIT ) == ( BLIS_BITVAL_PACKED_UNSPEC ^ BLIS_BITVAL_PACKED_COLUMNS ); } static bool_t bli_obj_is_panel_packed( obj_t* obj ) { return ( bool_t ) ( obj->info & BLIS_PACK_PANEL_BIT ); } static packbuf_t bli_obj_pack_buffer_type( obj_t* obj ) { return ( packbuf_t ) ( obj->info & BLIS_PACK_BUFFER_BITS ); } static struc_t bli_obj_struc( obj_t* obj ) { return ( struc_t ) ( obj->info & BLIS_STRUC_BITS ); } static bool_t bli_obj_is_general( obj_t* obj ) { return ( bool_t ) ( bli_obj_struc( obj ) == BLIS_BITVAL_GENERAL ); } static bool_t bli_obj_is_hermitian( obj_t* obj ) { return ( bool_t ) ( bli_obj_struc( obj ) == BLIS_BITVAL_HERMITIAN ); } static bool_t bli_obj_is_symmetric( obj_t* obj ) { return ( bool_t ) ( bli_obj_struc( obj ) == BLIS_BITVAL_SYMMETRIC ); } static bool_t bli_obj_is_triangular( obj_t* obj ) { return ( bool_t ) ( bli_obj_struc( obj ) == BLIS_BITVAL_TRIANGULAR ); } // Info modification static void bli_obj_apply_trans( trans_t trans, obj_t* obj ) { obj->info = ( objbits_t ) ( obj->info ^ trans ); } static void bli_obj_apply_conj( conj_t conj, obj_t* obj ) { obj->info = ( objbits_t ) ( obj->info ^ conj ); } static void bli_obj_set_conjtrans( trans_t trans, obj_t* obj ) { obj->info = ( objbits_t ) ( obj->info & ~BLIS_CONJTRANS_BITS ) | trans; } static void bli_obj_set_onlytrans( trans_t trans, obj_t* obj ) { obj->info = ( objbits_t ) ( obj->info & ~BLIS_TRANS_BIT ) | trans; } static void bli_obj_set_conj( conj_t conj, obj_t* obj ) { obj->info = ( objbits_t ) ( obj->info & ~BLIS_CONJ_BIT ) | conj; } static void bli_obj_set_uplo( uplo_t uplo, obj_t* obj ) { obj->info = ( objbits_t ) ( obj->info & ~BLIS_UPLO_BITS ) | uplo; } static void bli_obj_set_diag( diag_t diag, obj_t* obj ) { obj->info = ( objbits_t ) ( obj->info & ~BLIS_UNIT_DIAG_BIT ) | diag; } static void bli_obj_set_invert_diag( invdiag_t invdiag, obj_t* obj ) { obj->info = ( objbits_t ) ( obj->info & ~BLIS_INVERT_DIAG_BIT ) | invdiag; } static void bli_obj_set_dt( num_t dt, obj_t* obj ) { obj->info = ( objbits_t ) ( obj->info & ~BLIS_DATATYPE_BITS ) | dt; } static void bli_obj_set_target_dt( num_t dt, obj_t* obj ) { obj->info = ( objbits_t ) ( obj->info & ~BLIS_TARGET_DT_BITS ) | ( dt << BLIS_TARGET_DT_SHIFT ); } static void bli_obj_set_target_domain( dom_t dt, obj_t* obj ) { obj->info = ( objbits_t ) ( obj->info & ~BLIS_TARGET_DOMAIN_BIT ) | ( dt << BLIS_TARGET_DT_SHIFT ); } static void bli_obj_set_target_prec( prec_t dt, obj_t* obj ) { obj->info = ( objbits_t ) ( obj->info & ~BLIS_TARGET_PREC_BIT ) | ( dt << BLIS_TARGET_DT_SHIFT ); } static void bli_obj_set_exec_dt( num_t dt, obj_t* obj ) { obj->info = ( objbits_t ) ( obj->info & ~BLIS_EXEC_DT_BITS ) | ( dt << BLIS_EXEC_DT_SHIFT ); } static void bli_obj_set_exec_domain( dom_t dt, obj_t* obj ) { obj->info = ( objbits_t ) ( obj->info & ~BLIS_EXEC_DOMAIN_BIT ) | ( dt << BLIS_EXEC_DT_SHIFT ); } static void bli_obj_set_exec_prec( prec_t dt, obj_t* obj ) { obj->info = ( objbits_t ) ( obj->info & ~BLIS_EXEC_PREC_BIT ) | ( dt << BLIS_EXEC_DT_SHIFT ); } static void bli_obj_set_comp_dt( num_t dt, obj_t* obj ) { obj->info = ( objbits_t ) ( obj->info & ~BLIS_COMP_DT_BITS ) | ( dt << BLIS_COMP_DT_SHIFT ); } static void bli_obj_set_comp_domain( dom_t dt, obj_t* obj ) { obj->info = ( objbits_t ) ( obj->info & ~BLIS_COMP_DOMAIN_BIT ) | ( dt << BLIS_COMP_DT_SHIFT ); } static void bli_obj_set_comp_prec( prec_t dt, obj_t* obj ) { obj->info = ( objbits_t ) ( obj->info & ~BLIS_COMP_PREC_BIT ) | ( dt << BLIS_COMP_DT_SHIFT ); } // NOTE: This function queries and modifies info2. static void bli_obj_set_scalar_dt( num_t dt, obj_t* obj ) { obj->info2 = ( objbits_t ) ( obj->info2 & ~BLIS_SCALAR_DT_BITS ) | ( dt << BLIS_SCALAR_DT_SHIFT ); } // NOTE: This function queries and modifies info2. static void bli_obj_set_scalar_domain( dom_t dt, obj_t* obj ) { obj->info2 = ( objbits_t ) ( obj->info2 & ~BLIS_SCALAR_DOMAIN_BIT ) | ( dt << BLIS_SCALAR_DT_SHIFT ); } // NOTE: This function queries and modifies info2. static void bli_obj_set_scalar_prec( prec_t dt, obj_t* obj ) { obj->info2 = ( objbits_t ) ( obj->info2 & ~BLIS_SCALAR_PREC_BIT ) | ( dt << BLIS_SCALAR_DT_SHIFT ); } static void bli_obj_set_pack_schema( pack_t schema, obj_t* obj ) { obj->info = ( objbits_t ) ( obj->info & ~BLIS_PACK_SCHEMA_BITS ) | schema; } static void bli_obj_set_pack_order_if_upper( packord_t ordif, obj_t* obj ) { obj->info = ( objbits_t ) ( obj->info & ~BLIS_PACK_REV_IF_UPPER_BIT ) | ordif; } static void bli_obj_set_pack_order_if_lower( packord_t ordif, obj_t* obj ) { obj->info = ( objbits_t ) ( obj->info & ~BLIS_PACK_REV_IF_LOWER_BIT ) | ordif; } // NOTE: The packbuf_t bitfield in the obj_t is currently unused. Instead, // packbuf_t is stored/used from the context in order to support various // induced methods. (Though ideally the packbuf_t field would only be // present in the control tree). static void bli_obj_set_pack_buffer_type( packbuf_t buf_type, obj_t* obj ) { obj->info = ( objbits_t ) ( obj->info & ~BLIS_PACK_BUFFER_BITS ) | buf_type; } static void bli_obj_set_struc( struc_t struc, obj_t* obj ) { obj->info = ( objbits_t ) ( obj->info & ~BLIS_STRUC_BITS ) | struc; } static void bli_obj_toggle_trans( obj_t* obj ) { bli_obj_apply_trans( BLIS_TRANSPOSE, obj ); } static void bli_obj_toggle_conj( obj_t* obj ) { bli_obj_apply_conj( BLIS_CONJUGATE, obj ); } static void bli_obj_toggle_uplo( obj_t* obj ) { obj->info = ( objbits_t ) ( obj->info ^ BLIS_LOWER_BIT ) ^ BLIS_UPPER_BIT; } // Root matrix query static obj_t* bli_obj_root( obj_t* obj ) { return ( obj->root ); } static bool_t bli_obj_root_is_general( obj_t* obj ) { return bli_obj_is_general( bli_obj_root( obj ) ); } static bool_t bli_obj_root_is_hermitian( obj_t* obj ) { return bli_obj_is_hermitian( bli_obj_root( obj ) ); } static bool_t bli_obj_root_is_symmetric( obj_t* obj ) { return bli_obj_is_symmetric( bli_obj_root( obj ) ); } static bool_t bli_obj_root_is_triangular( obj_t* obj ) { return bli_obj_is_triangular( bli_obj_root( obj ) ); } static bool_t bli_obj_root_is_herm_or_symm( obj_t* obj ) { return bli_obj_is_hermitian( bli_obj_root( obj ) ) || bli_obj_is_symmetric( bli_obj_root( obj ) ); } static bool_t bli_obj_root_is_upper( obj_t* obj ) { return bli_obj_is_upper( bli_obj_root( obj ) ); } static bool_t bli_obj_root_is_lower( obj_t* obj ) { return bli_obj_is_lower( bli_obj_root( obj ) ); } // Root matrix modification static void bli_obj_set_as_root( obj_t* obj ) { obj->root = obj; } // Diagonal offset query static doff_t bli_obj_diag_offset( obj_t* obj ) { return ( doff_t ) ( obj->diag_off ); } static doff_t bli_obj_diag_offset_after_trans( obj_t* obj ) { return ( doff_t ) ( bli_obj_has_trans( obj ) ? -bli_obj_diag_offset( obj ) : bli_obj_diag_offset( obj ) ); } // Diagonal offset modification static void bli_obj_set_diag_offset( doff_t offset, obj_t* obj ) { obj->diag_off = ( doff_t )offset; } static void bli_obj_negate_diag_offset( obj_t* obj ) { obj->diag_off = -(obj->diag_off); } static void bli_obj_inc_diag_offset( doff_t offset, obj_t* obj ) { obj->diag_off += ( doff_t )offset; } // Dimension query static dim_t bli_obj_length( obj_t* obj ) { return ( obj->dim[ BLIS_M ] ); } static dim_t bli_obj_width( obj_t* obj ) { return ( obj->dim[ BLIS_N ] ); } static dim_t bli_obj_dim( mdim_t mdim, obj_t* obj ) { return ( obj->dim[ mdim ] ); } static dim_t bli_obj_min_dim( obj_t* obj ) { return bli_min( bli_obj_length( obj ), bli_obj_width( obj ) ); } static dim_t bli_obj_max_dim( obj_t* obj ) { return bli_max( bli_obj_length( obj ), bli_obj_width( obj ) ); } static dim_t bli_obj_length_after_trans( obj_t* obj ) { return ( bli_obj_has_trans( obj ) ? bli_obj_width( obj ) : bli_obj_length( obj ) ); } static dim_t bli_obj_width_after_trans( obj_t* obj ) { return ( bli_obj_has_trans( obj ) ? bli_obj_length( obj ) : bli_obj_width( obj ) ); } static bool_t bli_obj_is_1x1( obj_t* x ) { return ( bool_t ) ( bli_obj_length( x ) == 1 && bli_obj_width( x ) == 1 ); } // Stride/increment query static inc_t bli_obj_row_stride( obj_t* obj ) { return ( obj->rs ); } static inc_t bli_obj_col_stride( obj_t* obj ) { return ( obj->cs ); } static inc_t bli_obj_imag_stride( obj_t* obj ) { return ( obj->is ); } static inc_t bli_obj_row_stride_mag( obj_t* obj ) { return ( bli_abs( obj->rs ) ); } static inc_t bli_obj_col_stride_mag( obj_t* obj ) { return ( bli_abs( obj->cs ) ); } static inc_t bli_obj_imag_stride_mag( obj_t* obj ) { return ( bli_abs( obj->is ) ); } // Note: The purpose of these functions is to obtain the length and width // of the smallest submatrices of an object that could still encompass // the stored data above (if obj is upper) or below (if obj is lower) // the diagonal. static dim_t bli_obj_length_stored( obj_t* obj ) { return ( dim_t ) ( bli_obj_is_upper( obj ) ? bli_min( bli_obj_length( obj ), bli_obj_width( obj ) - bli_obj_diag_offset( obj ) ) : bli_min( bli_obj_length( obj ), bli_obj_length( obj ) + bli_obj_diag_offset( obj ) ) ); } static dim_t bli_obj_width_stored( obj_t* obj ) { return ( dim_t ) ( bli_obj_is_lower( obj ) ? bli_min( bli_obj_width( obj ), bli_obj_length( obj ) + bli_obj_diag_offset( obj ) ) : bli_min( bli_obj_width( obj ), bli_obj_width( obj ) - bli_obj_diag_offset( obj ) ) ); } static dim_t bli_obj_length_stored_after_trans( obj_t* obj ) { return ( bli_obj_has_trans( obj ) ? bli_obj_width_stored( obj ) : bli_obj_length_stored( obj ) ); } static dim_t bli_obj_width_stored_after_trans( obj_t* obj ) { return ( bli_obj_has_trans( obj ) ? bli_obj_length_stored( obj ) : bli_obj_width_stored( obj ) ); } static dim_t bli_obj_vector_dim( obj_t* x ) { return ( bli_obj_length( x ) == 1 ? bli_obj_width( x ) : bli_obj_length( x ) ); } static inc_t bli_obj_vector_inc( obj_t* x ) { return ( bli_obj_is_1x1( x ) ? 1 : \ ( bli_obj_length( x ) == 1 ? bli_obj_col_stride( x ) : bli_obj_row_stride( x ) ) ); } static bool_t bli_obj_is_vector( obj_t* x ) { return ( bool_t ) ( bli_obj_length( x ) == 1 || bli_obj_width( x ) == 1 ); } static bool_t bli_obj_is_row_vector( obj_t* x ) { return ( bool_t ) ( bli_obj_length( x ) == 1 ); } static bool_t bli_obj_is_col_vector( obj_t* x ) { return ( bool_t ) ( bli_obj_width( x ) == 1 ); } static bool_t bli_obj_has_zero_dim( obj_t* x ) { return ( bool_t ) ( bli_obj_length( x ) == 0 || bli_obj_width( x ) == 0 ); } // Dimension modification static void bli_obj_set_length( dim_t m, obj_t* obj ) { obj->dim[ BLIS_M ] = m; } static void bli_obj_set_width( dim_t n, obj_t* obj ) { obj->dim[ BLIS_N ] = n; } static void bli_obj_set_dim( mdim_t mdim, dim_t dim_val, obj_t* obj ) { obj->dim[ mdim ] = dim_val; } static void bli_obj_set_dims( dim_t m, dim_t n, obj_t* obj ) { bli_obj_set_length( m, obj ); bli_obj_set_width( n, obj ); } static void bli_obj_set_dims_with_trans( trans_t trans, dim_t m, dim_t n, obj_t* obj ) { //if ( bli_does_notrans( trans ) ) if ( ( ~trans & BLIS_TRANS_BIT ) == BLIS_BITVAL_TRANS ) { bli_obj_set_length( m, obj ); bli_obj_set_width( n, obj ); } else { bli_obj_set_length( n, obj ); bli_obj_set_width( m, obj ); } } // Stride/increment predicates // // NOTE: The following two macros differ from their non-obj counterparts // in that they do not identify m x 1 and 1 x n objects as row-stored and // column-stored, respectively, which is needed when considering packed // objects. But this is okay, since none of the invocations of these // "obj" macros are used on packed matrices. // static bool_t bli_obj_is_row_stored( obj_t* obj ) { return ( bool_t ) ( bli_obj_col_stride_mag( obj ) == 1 ); } static bool_t bli_obj_is_col_stored( obj_t* obj ) { return ( bool_t ) ( bli_obj_row_stride_mag( obj ) == 1 ); } static bool_t bli_obj_is_gen_stored( obj_t* obj ) { return ( bool_t ) ( bli_obj_row_stride_mag( obj ) != 1 && bli_obj_col_stride_mag( obj ) != 1 ); } static bool_t bli_obj_is_row_tilted( obj_t* obj ) { return ( bool_t ) ( bli_obj_col_stride_mag( obj ) < bli_obj_row_stride_mag( obj ) ); } static bool_t bli_obj_is_col_tilted( obj_t* obj ) { return ( bool_t ) ( bli_obj_row_stride_mag( obj ) < bli_obj_col_stride_mag( obj ) ); } // Stride/increment modification static void bli_obj_set_row_stride( inc_t rs, obj_t* obj ) { obj->rs = rs; } static void bli_obj_set_col_stride( inc_t cs, obj_t* obj ) { obj->cs = cs; } static void bli_obj_set_strides( inc_t rs, inc_t cs, obj_t* obj ) { bli_obj_set_row_stride( rs, obj ); bli_obj_set_col_stride( cs, obj ); } static void bli_obj_set_imag_stride( inc_t is, obj_t* obj ) { obj->is = is; } // Offset query static dim_t bli_obj_row_off( obj_t* obj ) { return ( obj->off[ BLIS_M ] ); } static dim_t bli_obj_col_off( obj_t* obj ) { return ( obj->off[ BLIS_N ] ); } static dim_t bli_obj_off( mdim_t mdim, obj_t* obj ) { return ( obj->off[ mdim ] ); } // Offset modification static void bli_obj_set_off( mdim_t mdim, dim_t offset, obj_t* obj ) { obj->off[ mdim ] = offset; } static void bli_obj_set_offs( dim_t offm, dim_t offn, obj_t* obj ) { bli_obj_set_off( BLIS_M, offm, obj ); bli_obj_set_off( BLIS_N, offn, obj ); } static void bli_obj_inc_off( mdim_t mdim, dim_t offset, obj_t* obj ) { obj->off[ mdim ] += offset; } static void bli_obj_inc_offs( dim_t offm, dim_t offn, obj_t* obj ) { bli_obj_inc_off( BLIS_M, offm, obj ); bli_obj_inc_off( BLIS_N, offn, obj ); } // Diagonal offset predicates static bool_t bli_obj_is_strictly_above_diag( obj_t* obj ) { return ( bool_t ) ( ( doff_t )bli_obj_length( obj ) <= -bli_obj_diag_offset( obj ) ); } static bool_t bli_obj_is_strictly_below_diag( obj_t* obj ) { return ( bool_t ) ( ( doff_t )bli_obj_width( obj ) <= bli_obj_diag_offset( obj ) ); } static bool_t bli_obj_is_outside_diag( obj_t* obj ) { return ( bool_t ) ( bli_obj_is_strictly_above_diag( obj ) || bli_obj_is_strictly_below_diag( obj ) ); } static bool_t bli_obj_intersects_diag( obj_t* obj ) { return ( bool_t ) ( !bli_obj_is_strictly_above_diag( obj ) && !bli_obj_is_strictly_below_diag( obj ) ); } static bool_t bli_obj_is_unstored_subpart( obj_t* obj ) { return ( bool_t ) ( ( bli_obj_root_is_lower( obj ) && bli_obj_is_strictly_above_diag( obj ) ) || ( bli_obj_root_is_upper( obj ) && bli_obj_is_strictly_below_diag( obj ) ) ); } // Buffer address query static void* bli_obj_buffer( obj_t* obj ) { return ( obj->buffer ); } // Buffer address modification static void bli_obj_set_buffer( void* p, obj_t* obj ) { obj->buffer = p; } // Bufferless scalar field query static void* bli_obj_internal_scalar_buffer( obj_t* obj ) { return ( void* ) ( &( obj->scalar ) ); } // Bufferless scalar field modification static void bli_obj_copy_internal_scalar( obj_t* a, obj_t* b ) { b->scalar = a->scalar; } // Element size query static siz_t bli_obj_elem_size( obj_t* obj ) { return ( obj->elem_size ); } // Element size modification static void bli_obj_set_elem_size( siz_t size, obj_t* obj ) { obj->elem_size = size; } // Packed matrix info query static dim_t bli_obj_padded_length( obj_t* obj ) { return ( obj->m_padded ); } static dim_t bli_obj_padded_width( obj_t* obj ) { return ( obj->n_padded ); } // Packed matrix info modification static void bli_obj_set_padded_length( dim_t m, obj_t* obj ) { obj->m_padded = m; } static void bli_obj_set_padded_width( dim_t n, obj_t* obj ) { obj->n_padded = n; } static void bli_obj_set_padded_dims( dim_t m, dim_t n, obj_t* obj ) { bli_obj_set_padded_length( m, obj ); bli_obj_set_padded_width( n, obj ); } // Packed panel info query static dim_t bli_obj_panel_length( obj_t* obj ) { return ( obj->m_panel ); } static dim_t bli_obj_panel_width( obj_t* obj ) { return ( obj->n_panel ); } static inc_t bli_obj_panel_dim( obj_t* obj ) { return ( obj->pd ); } static inc_t bli_obj_panel_stride( obj_t* obj ) { return ( obj->ps ); } // Packed panel info modification static void bli_obj_set_panel_length( dim_t m, obj_t* obj ) { obj->m_panel = m; } static void bli_obj_set_panel_width( dim_t n, obj_t* obj ) { obj->n_panel = n; } static void bli_obj_set_panel_dims( dim_t m, dim_t n, obj_t* obj ) { bli_obj_set_panel_length( m, obj ); bli_obj_set_panel_width( n, obj ); } static void bli_obj_set_panel_dim( inc_t pd, obj_t* obj ) { obj->pd = pd; } static void bli_obj_set_panel_stride( inc_t ps, obj_t* obj ) { obj->ps = ps; } // stor3_t-related static stor3_t bli_obj_stor3_from_strides( obj_t* c, obj_t* a, obj_t* b ) { const inc_t rs_c = bli_obj_row_stride( c ); const inc_t cs_c = bli_obj_col_stride( c ); inc_t rs_a, cs_a; inc_t rs_b, cs_b; if ( bli_obj_has_notrans( a ) ) { rs_a = bli_obj_row_stride( a ); cs_a = bli_obj_col_stride( a ); } else { rs_a = bli_obj_col_stride( a ); cs_a = bli_obj_row_stride( a ); } if ( bli_obj_has_notrans( b ) ) { rs_b = bli_obj_row_stride( b ); cs_b = bli_obj_col_stride( b ); } else { rs_b = bli_obj_col_stride( b ); cs_b = bli_obj_row_stride( b ); } return bli_stor3_from_strides( rs_c, cs_c, rs_a, cs_a, rs_b, cs_b ); } // -- Initialization-related macros -- // Finish the initialization started by the matrix-specific static initializer // (e.g. BLIS_OBJECT_PREINITIALIZER) // NOTE: This is intended only for use in the BLAS compatibility API and typed // BLIS API. static void bli_obj_init_finish( num_t dt, dim_t m, dim_t n, void* p, inc_t rs, inc_t cs, obj_t* obj ) { bli_obj_set_as_root( obj ); bli_obj_set_dt( dt, obj ); bli_obj_set_target_dt( dt, obj ); bli_obj_set_exec_dt( dt, obj ); bli_obj_set_comp_dt( dt, obj ); bli_obj_set_dims( m, n, obj ); bli_obj_set_strides( rs, cs, obj ); siz_t elem_size = sizeof( float ); if ( bli_dt_prec_is_double( dt ) ) elem_size *= 2; if ( bli_dt_dom_is_complex( dt ) ) elem_size *= 2; bli_obj_set_elem_size( elem_size, obj ); bli_obj_set_buffer( p, obj ); bli_obj_set_scalar_dt( dt, obj ); void* restrict s = bli_obj_internal_scalar_buffer( obj ); if ( bli_dt_prec_is_single( dt ) ) { (( scomplex* )s)->real = 1.0F; (( scomplex* )s)->imag = 0.0F; } else if ( bli_dt_prec_is_double( dt ) ) { (( dcomplex* )s)->real = 1.0; (( dcomplex* )s)->imag = 0.0; } } // Finish the initialization started by the 1x1-specific static initializer // (e.g. BLIS_OBJECT_PREINITIALIZER_1X1) // NOTE: This is intended only for use in the BLAS compatibility API and typed // BLIS API. static void bli_obj_init_finish_1x1( num_t dt, void* p, obj_t* obj ) { bli_obj_set_as_root( obj ); bli_obj_set_dt( dt, obj ); bli_obj_set_buffer( p, obj ); } // -- Miscellaneous object macros -- // Toggle the region referenced (or "stored"). static void bli_obj_toggle_region_ref( obj_t* obj ) { if ( bli_obj_is_upper( obj ) ) bli_obj_inc_diag_offset( -1, obj ); else if ( bli_obj_is_lower( obj ) ) bli_obj_inc_diag_offset( 1, obj ); bli_obj_toggle_uplo( obj ); } static void bli_obj_toggle_uplo_if_trans( trans_t trans, obj_t* obj ) { //if ( bli_does_trans( trans ) && if ( ( trans & BLIS_TRANS_BIT ) == BLIS_BITVAL_TRANS && bli_obj_is_upper_or_lower( obj ) ) { bli_obj_toggle_uplo( obj ); bli_obj_negate_diag_offset( obj ); } } // Initialize object with default properties (info field). static void bli_obj_set_defaults( obj_t* obj ) { obj->info = 0x0; obj->info = obj->info | BLIS_BITVAL_DENSE | BLIS_BITVAL_GENERAL; } // Acquire buffer at object's submatrix offset (offset-aware buffer query). static void* bli_obj_buffer_at_off( obj_t* obj ) { return ( void* ) ( ( ( char* )( bli_obj_buffer ( obj ) ) + ( dim_t )( bli_obj_elem_size( obj ) ) * ( bli_obj_col_off( obj ) * bli_obj_col_stride( obj ) + bli_obj_row_off( obj ) * bli_obj_row_stride( obj ) ) ) ); } // Acquire buffer from BLIS_CONSTANT object. static void* bli_obj_buffer_for_const( num_t dt, obj_t* obj ) { void* p; if ( dt == BLIS_FLOAT ) p = &((( constdata_t* )bli_obj_buffer( obj ))->s); else if ( dt == BLIS_DOUBLE ) p = &((( constdata_t* )bli_obj_buffer( obj ))->d); else if ( dt == BLIS_SCOMPLEX ) p = &((( constdata_t* )bli_obj_buffer( obj ))->c); else if ( dt == BLIS_DCOMPLEX ) p = &((( constdata_t* )bli_obj_buffer( obj ))->z); else p = &((( constdata_t* )bli_obj_buffer( obj ))->i); return p; } // Acquire buffer from scalar (1x1) object, including BLIS_CONSTANT objects. static void* bli_obj_buffer_for_1x1( num_t dt, obj_t* obj ) { return ( void* ) ( bli_obj_is_const( obj ) ? bli_obj_buffer_for_const( dt, obj ) : bli_obj_buffer_at_off( obj ) ); } // Make a full alias (shallow copy). static void bli_obj_alias_to( obj_t* a, obj_t* b ) { bli_obj_init_full_shallow_copy_of( a, b ); } // Check if two objects are aliases of one another. static bool_t bli_obj_is_alias_of( obj_t* a, obj_t* b ) { return ( bool_t ) ( bli_obj_buffer( a ) == bli_obj_buffer( b ) ); } // Create an alias with a trans value applied. // (Note: trans may include a conj component.) static void bli_obj_alias_with_trans( trans_t trans, obj_t* a, obj_t* b ) { bli_obj_alias_to( a, b ); bli_obj_apply_trans( trans, b ); } // Create an alias with a conj value applied. static void bli_obj_alias_with_conj( conj_t conja, obj_t* a, obj_t* b ) { bli_obj_alias_to( a, b ); bli_obj_apply_conj( conja, b ); } // Alias only the real part. static void bli_obj_real_part( obj_t* c, obj_t* r ) { bli_obj_alias_to( c, r ); if ( bli_obj_is_complex( c ) ) { // Change the datatypes. const num_t dt_stor_r = bli_dt_proj_to_real( bli_obj_dt( c ) ); const num_t dt_targ_r = bli_dt_proj_to_real( bli_obj_target_dt( c ) ); const num_t dt_exec_r = bli_dt_proj_to_real( bli_obj_exec_dt( c ) ); const num_t dt_comp_r = bli_dt_proj_to_real( bli_obj_comp_dt( c ) ); bli_obj_set_dt( dt_stor_r, r ); bli_obj_set_target_dt( dt_targ_r, r ); bli_obj_set_exec_dt( dt_exec_r, r ); bli_obj_set_comp_dt( dt_comp_r, r ); // Don't touch the attached scalar datatype. // Update the element size. siz_t es_c = bli_obj_elem_size( c ); bli_obj_set_elem_size( es_c/2, r ); // Update the strides. inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); bli_obj_set_strides( 2*rs_c, 2*cs_c, r ); // Buffer is left unchanged. } } // Alias only the imaginary part. static void bli_obj_imag_part( obj_t* c, obj_t* i ) { if ( bli_obj_is_complex( c ) ) { bli_obj_alias_to( c, i ); // Change the datatype. const num_t dt_stor_r = bli_dt_proj_to_real( bli_obj_dt( c ) ); const num_t dt_targ_r = bli_dt_proj_to_real( bli_obj_target_dt( c ) ); const num_t dt_exec_r = bli_dt_proj_to_real( bli_obj_exec_dt( c ) ); const num_t dt_comp_r = bli_dt_proj_to_real( bli_obj_comp_dt( c ) ); bli_obj_set_dt( dt_stor_r, i ); bli_obj_set_target_dt( dt_targ_r, i ); bli_obj_set_exec_dt( dt_exec_r, i ); bli_obj_set_comp_dt( dt_comp_r, i ); // Don't touch the attached scalar datatype. // Update the element size. siz_t es_c = bli_obj_elem_size( c ); bli_obj_set_elem_size( es_c/2, i ); // Update the strides. inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); bli_obj_set_strides( 2*rs_c, 2*cs_c, i ); // Update the buffer. inc_t is_c = bli_obj_imag_stride( c ); char* p = ( char* )bli_obj_buffer_at_off( c ); bli_obj_set_buffer( p + is_c * es_c/2, i ); } } // Given a 1x1 object, acquire an address to the buffer depending on whether // the object is a BLIS_CONSTANT, and also set a datatype associated with the // chosen buffer (possibly using an auxiliary datatype if the object is // BLIS_CONSTANT). static void bli_obj_scalar_set_dt_buffer( obj_t* obj, num_t dt_aux, num_t* dt, void** buf ) { if ( bli_obj_is_const( obj ) ) { *dt = dt_aux; *buf = bli_obj_buffer_for_1x1( dt_aux, obj ); } else { *dt = bli_obj_dt( obj ); *buf = bli_obj_buffer_at_off( obj ); } } // Swap all object fields (metadata/properties). static void bli_obj_swap( obj_t* a, obj_t* b ) { obj_t t = *b; *b = *a; *a = t; } // Swap object pack schemas. static void bli_obj_swap_pack_schemas( obj_t* a, obj_t* b ) { const pack_t schema_a = bli_obj_pack_schema( a ); const pack_t schema_b = bli_obj_pack_schema( b ); bli_obj_set_pack_schema( schema_b, a ); bli_obj_set_pack_schema( schema_a, b ); } // Induce a transposition on an object: swap dimensions, increments, and // offsets, then clear the trans bit. static void bli_obj_induce_trans( obj_t* obj ) { // Induce transposition among basic fields. dim_t m = bli_obj_length( obj ); dim_t n = bli_obj_width( obj ); inc_t rs = bli_obj_row_stride( obj ); inc_t cs = bli_obj_col_stride( obj ); dim_t offm = bli_obj_row_off( obj ); dim_t offn = bli_obj_col_off( obj ); doff_t diag_off = bli_obj_diag_offset( obj ); bli_obj_set_dims( n, m, obj ); bli_obj_set_strides( cs, rs, obj ); bli_obj_set_offs( offn, offm, obj ); bli_obj_set_diag_offset( -diag_off, obj ); if ( bli_obj_is_upper_or_lower( obj ) ) bli_obj_toggle_uplo( obj ); // Induce transposition among packed fields. dim_t m_padded = bli_obj_padded_length( obj ); dim_t n_padded = bli_obj_padded_width( obj ); dim_t m_panel = bli_obj_panel_length( obj ); dim_t n_panel = bli_obj_panel_width( obj ); bli_obj_set_padded_dims( n_padded, m_padded, obj ); bli_obj_set_panel_dims( n_panel, m_panel, obj ); // Note that this macro DOES NOT touch the transposition bit! If // the calling code is using this function to handle an object whose // transposition bit is set prior to computation, that code needs // to manually clear or toggle the bit, via // bli_obj_set_onlytrans() or bli_obj_toggle_trans(), // respectively. } static void bli_obj_induce_fast_trans( obj_t* obj ) { // NOTE: This function is only used in situations where the matrices // are guaranteed to not have structure or be packed. // Induce transposition among basic fields. dim_t m = bli_obj_length( obj ); dim_t n = bli_obj_width( obj ); inc_t rs = bli_obj_row_stride( obj ); inc_t cs = bli_obj_col_stride( obj ); dim_t offm = bli_obj_row_off( obj ); dim_t offn = bli_obj_col_off( obj ); bli_obj_set_dims( n, m, obj ); bli_obj_set_strides( cs, rs, obj ); bli_obj_set_offs( offn, offm, obj ); // Note that this macro DOES NOT touch the transposition bit! If // the calling code is using this function to handle an object whose // transposition bit is set prior to computation, that code needs // to manually clear or toggle the bit, via // bli_obj_set_onlytrans() or bli_obj_toggle_trans(), // respectively. } // Sometimes we need to "reflect" a partition because the data we want is // actually stored on the other side of the diagonal. The nuts and bolts of // this macro look a lot like an induced transposition, except that the row // and column strides are left unchanged (which, of course, drastically // changes the effect of the macro). static void bli_obj_reflect_about_diag( obj_t* obj ) { dim_t m = bli_obj_length( obj ); dim_t n = bli_obj_width( obj ); dim_t offm = bli_obj_row_off( obj ); dim_t offn = bli_obj_col_off( obj ); doff_t diag_off = bli_obj_diag_offset( obj ); bli_obj_set_dims( n, m, obj ); bli_obj_set_offs( offn, offm, obj ); bli_obj_set_diag_offset( -diag_off, obj ); bli_obj_toggle_trans( obj ); } #endif blis-0.6.1/frame/include/bli_param_macro_defs.h000066400000000000000000001033331360743507500214620ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_PARAM_MACRO_DEFS_H #define BLIS_PARAM_MACRO_DEFS_H // -- Parameter query macros -- // buffer static bool_t bli_is_aligned_to( siz_t p, siz_t size ) { return ( bool_t ) ( p % size == 0 ); } static bool_t bli_is_unaligned_to( siz_t p, siz_t size ) { return ( bool_t ) ( p % size != 0 ); } static siz_t bli_offset_past_alignment( siz_t p, siz_t size ) { return ( siz_t ) ( p % size ); } // datatype static bool_t bli_is_float( num_t dt ) { return ( bool_t ) ( dt == BLIS_FLOAT ); } static bool_t bli_is_double( num_t dt ) { return ( bool_t ) ( dt == BLIS_DOUBLE ); } static bool_t bli_is_scomplex( num_t dt ) { return ( bool_t ) ( dt == BLIS_SCOMPLEX ); } static bool_t bli_is_dcomplex( num_t dt ) { return ( bool_t ) ( dt == BLIS_DCOMPLEX ); } static bool_t bli_is_constant( num_t dt ) { return ( bool_t ) ( dt == BLIS_CONSTANT ); } static bool_t bli_is_int( num_t dt ) { return ( bool_t ) ( dt == BLIS_INT ); } static bool_t bli_is_real( num_t dt ) { return ( bool_t ) ( bli_is_float( dt ) || bli_is_double( dt ) ); } static bool_t bli_is_complex( num_t dt ) { return ( bool_t ) ( bli_is_scomplex( dt ) || bli_is_dcomplex( dt ) ); } static bool_t bli_is_single_prec( num_t dt ) { return ( bool_t ) ( bli_is_float( dt ) || bli_is_scomplex( dt ) ); } static bool_t bli_is_double_prec( num_t dt ) { return ( bool_t ) ( bli_is_double( dt ) || bli_is_dcomplex( dt ) ); } static dom_t bli_dt_domain( num_t dt ) { return ( dom_t ) ( dt & BLIS_DOMAIN_BIT ); } static bool_t bli_dt_dom_is_real( num_t dt ) { return ( bool_t ) ( ( dt & BLIS_DOMAIN_BIT ) == BLIS_REAL ); } static bool_t bli_dt_dom_is_complex( num_t dt ) { return ( bool_t ) ( ( dt & BLIS_DOMAIN_BIT ) == BLIS_COMPLEX ); } static prec_t bli_dt_prec( num_t dt ) { return ( prec_t ) ( dt & BLIS_PRECISION_BIT ); } static bool_t bli_dt_prec_is_single( num_t dt ) { return ( bool_t ) ( ( dt & BLIS_PRECISION_BIT ) == BLIS_SINGLE_PREC ); } static bool_t bli_dt_prec_is_double( num_t dt ) { return ( bool_t ) ( ( dt & BLIS_PRECISION_BIT ) == BLIS_DOUBLE_PREC ); } static num_t bli_dt_proj_to_real( num_t dt ) { return ( num_t ) ( dt & ~BLIS_BITVAL_COMPLEX ); } static num_t bli_dt_proj_to_complex( num_t dt ) { return ( num_t ) ( dt | BLIS_BITVAL_COMPLEX ); } static num_t bli_dt_proj_to_single_prec( num_t dt ) { return ( num_t ) ( dt & ~BLIS_BITVAL_DOUBLE_PREC ); } static num_t bli_dt_proj_to_double_prec( num_t dt ) { return ( num_t ) ( dt | BLIS_BITVAL_DOUBLE_PREC ); } // trans static bool_t bli_is_notrans( trans_t trans ) { return ( bool_t ) ( trans == BLIS_NO_TRANSPOSE ); } static bool_t bli_is_trans( trans_t trans ) { return ( bool_t ) ( trans == BLIS_TRANSPOSE ); } static bool_t bli_is_conjnotrans( trans_t trans ) { return ( bool_t ) ( trans == BLIS_CONJ_NO_TRANSPOSE ); } static bool_t bli_is_conjtrans( trans_t trans ) { return ( bool_t ) ( trans == BLIS_CONJ_TRANSPOSE ); } static bool_t bli_does_notrans( trans_t trans ) { return ( bool_t ) ( (~trans & BLIS_TRANS_BIT ) == BLIS_BITVAL_TRANS ); } static bool_t bli_does_trans( trans_t trans ) { return ( bool_t ) ( ( trans & BLIS_TRANS_BIT ) == BLIS_BITVAL_TRANS ); } static bool_t bli_does_noconj( trans_t trans ) { return ( bool_t ) ( (~trans & BLIS_CONJ_BIT ) == BLIS_BITVAL_CONJ ); } static bool_t bli_does_conj( trans_t trans ) { return ( bool_t ) ( ( trans & BLIS_CONJ_BIT ) == BLIS_BITVAL_CONJ ); } static trans_t bli_extract_trans( trans_t trans ) { return ( trans_t ) ( trans & BLIS_TRANS_BIT ); } static conj_t bli_extract_conj( trans_t trans ) { return ( conj_t ) ( trans & BLIS_CONJ_BIT ); } static trans_t bli_trans_toggled( trans_t trans ) { return ( trans_t ) ( trans ^ BLIS_TRANS_BIT ); } static trans_t bli_trans_toggled_conj( trans_t trans ) { return ( trans_t ) ( trans ^ BLIS_CONJ_BIT ); } static void bli_toggle_trans( trans_t* trans ) { *trans = bli_trans_toggled( *trans ); } // side static bool_t bli_is_left( side_t side ) { return ( bool_t ) ( side == BLIS_LEFT ); } static bool_t bli_is_right( side_t side ) { return ( bool_t ) ( side == BLIS_RIGHT ); } static side_t bli_side_toggled( side_t side ) { return ( bli_is_left( side ) ? BLIS_RIGHT : BLIS_LEFT ); } static void bli_toggle_side( side_t* side ) { *side = bli_side_toggled( *side ); } // uplo static bool_t bli_is_lower( uplo_t uplo ) { return ( bool_t ) ( uplo == BLIS_LOWER ); } static bool_t bli_is_upper( uplo_t uplo ) { return ( bool_t ) ( uplo == BLIS_UPPER ); } static bool_t bli_is_upper_or_lower( uplo_t uplo ) { return ( bool_t ) ( bli_is_upper( uplo ) || bli_is_lower( uplo ) ); } static bool_t bli_is_dense( uplo_t uplo ) { return ( bool_t ) ( uplo == BLIS_DENSE ); } static bool_t bli_is_zeros( uplo_t uplo ) { return ( bool_t ) ( uplo == BLIS_ZEROS ); } static uplo_t bli_uplo_toggled( uplo_t uplo ) { return ( uplo_t ) ( bli_is_upper_or_lower( uplo ) ? ( ( uplo ^ BLIS_LOWER_BIT ) ^ BLIS_UPPER_BIT ) : uplo ); } static void bli_toggle_uplo( uplo_t* uplo ) { *uplo = bli_uplo_toggled( *uplo ); } // structure static bool_t bli_is_general( struc_t struc ) { return ( bool_t ) ( struc == BLIS_GENERAL ); } static bool_t bli_is_hermitian( struc_t struc ) { return ( bool_t ) ( struc == BLIS_HERMITIAN ); } static bool_t bli_is_symmetric( struc_t struc ) { return ( bool_t ) ( struc == BLIS_SYMMETRIC ); } static bool_t bli_is_triangular( struc_t struc ) { return ( bool_t ) ( struc == BLIS_TRIANGULAR ); } static bool_t bli_is_herm_or_symm( struc_t struc ) { return ( bool_t ) ( bli_is_hermitian( struc ) || bli_is_symmetric( struc ) ); } // conj static bool_t bli_is_noconj( conj_t conj ) { return ( bool_t ) ( conj == BLIS_NO_CONJUGATE ); } static bool_t bli_is_conj( conj_t conj ) { return ( bool_t ) ( conj == BLIS_CONJUGATE ); } static conj_t bli_conj_toggled( conj_t conj ) { return ( conj_t ) ( conj ^ BLIS_CONJ_BIT ); } static conj_t bli_apply_conj( conj_t conjapp, conj_t conj ) { return ( conj_t ) ( conj ^ conjapp ); } static void bli_toggle_conj( conj_t* conj ) { *conj = bli_conj_toggled( *conj ); } // diag static bool_t bli_is_nonunit_diag( diag_t diag ) { return ( bool_t ) ( diag == BLIS_NONUNIT_DIAG ); } static bool_t bli_is_unit_diag( diag_t diag ) { return ( bool_t ) ( diag == BLIS_UNIT_DIAG ); } // dimension-related static bool_t bli_zero_dim1( dim_t m ) { return ( bool_t ) ( m == 0 ); } static bool_t bli_zero_dim2( dim_t m, dim_t n ) { return ( bool_t ) ( m == 0 || n == 0 ); } static bool_t bli_zero_dim3( dim_t m, dim_t n, dim_t k ) { return ( bool_t ) ( m == 0 || n == 0 || k == 0 ); } static bool_t bli_nonzero_dim( dim_t m ) { return ( bool_t ) ( m > 0 ); } static bool_t bli_vector_dim( dim_t m, dim_t n ) { return ( bool_t ) ( m == 1 ? n : m ); } static bool_t bli_is_vector( dim_t m, dim_t n ) { return ( bool_t ) ( m == 1 || n == 1 ); } static bool_t bli_is_row_vector( dim_t m, dim_t n ) { return ( bool_t ) ( m == 1 ); } static bool_t bli_is_col_vector( dim_t m, dim_t n ) { return ( bool_t ) ( n == 1 ); } static void bli_set_dim_with_side( side_t side, dim_t m, dim_t n, dim_t* dim ) { if ( bli_is_left( side ) ) *dim = m; else *dim = n; } static void bli_set_dims_with_trans( trans_t trans, dim_t m, dim_t n, dim_t* mt, dim_t* nt ) { if ( bli_does_notrans( trans ) ) { *mt = m; *nt = n; } else { *mt = n; *nt = m; } } static void bli_set_dims_incs_with_trans( trans_t trans, dim_t m, dim_t n, inc_t rs, inc_t cs, dim_t* mt, dim_t* nt, inc_t* rst, inc_t* cst ) { if ( bli_does_notrans( trans ) ) { *mt = m; *nt = n; *rst = rs; *cst = cs; } else { *mt = n; *nt = m; *rst = cs; *cst = rs; } } // blocksize-related static dim_t bli_determine_blocksize_dim_f( dim_t i, dim_t dim, dim_t b_alg ) { return ( dim_t ) ( bli_min( b_alg, dim - i ) ); } static dim_t bli_determine_blocksize_dim_b( dim_t i, dim_t dim, dim_t b_alg ) { return ( dim_t ) ( i == 0 && dim % b_alg != 0 ? dim % b_alg : b_alg ); } // stride-related static inc_t bli_vector_inc( trans_t trans, dim_t m, dim_t n, inc_t rs, inc_t cs ) { return ( inc_t ) ( bli_does_notrans( trans ) ? ( m == 1 ? cs : rs ) : ( m == 1 ? rs : cs ) ); } static bool_t bli_is_row_stored( inc_t rs, inc_t cs ) { return ( bool_t ) ( bli_abs( cs ) == 1 ); } static bool_t bli_is_col_stored( inc_t rs, inc_t cs ) { return ( bool_t ) ( bli_abs( rs ) == 1 ); } static bool_t bli_is_row_stored_f( dim_t m, dim_t n, inc_t rs, inc_t cs ) { return ( bool_t ) ( cs == 1 && ( rs > 1 || n == 1 ) ); } static bool_t bli_is_col_stored_f( dim_t m, dim_t n, inc_t rs, inc_t cs ) { return ( bool_t ) ( rs == 1 && ( cs > 1 || m == 1 ) ); } static bool_t bli_is_gen_stored( inc_t rs, inc_t cs ) { return ( bool_t ) ( bli_abs( rs ) != 1 && bli_abs( cs ) != 1 ); } static bool_t bli_is_row_tilted( dim_t m, dim_t n, inc_t rs, inc_t cs ) { return ( bool_t ) ( bli_abs( cs ) == bli_abs( rs ) ? n < m : bli_abs( cs ) < bli_abs( rs ) ); } static bool_t bli_is_col_tilted( dim_t m, dim_t n, inc_t rs, inc_t cs ) { return ( bool_t ) ( bli_abs( rs ) == bli_abs( cs ) ? m < n : bli_abs( rs ) < bli_abs( cs ) ); } static bool_t bli_has_nonunit_inc1( inc_t s1 ) { return ( bool_t ) ( s1 != 1 ); } static bool_t bli_has_nonunit_inc2( inc_t s1, inc_t s2 ) { return ( bool_t ) ( s1 != 1 || s2 != 1 ); } static bool_t bli_has_nonunit_inc3( inc_t s1, inc_t s2, inc_t s3 ) { return ( bool_t ) ( s1 != 1 || s2 != 1 || s3 != 1 ); } // diag offset-related static void bli_negate_diag_offset( doff_t* diagoff ) { *diagoff = -(*diagoff); } static void bli_shift_diag_offset_to_grow_uplo( uplo_t uplo, doff_t* diagoff ) { if ( bli_is_upper( uplo ) ) *diagoff -= 1; else if ( bli_is_lower( uplo ) ) *diagoff += 1; } static void bli_shift_diag_offset_to_shrink_uplo( uplo_t uplo, doff_t* diagoff ) { if ( bli_is_upper( uplo ) ) *diagoff += 1; else if ( bli_is_lower( uplo ) ) *diagoff -= 1; } static bool_t bli_diag_offset_with_trans( trans_t trans, doff_t diagoff ) { return ( bool_t ) ( bli_does_trans( trans ) ? -diagoff : diagoff ); } static bool_t bli_is_strictly_above_diag( doff_t diagoff, trans_t trans, dim_t m, dim_t n ) { return ( bool_t ) ( bli_does_trans( trans ) ? ( ( doff_t )n <= -diagoff ) : ( ( doff_t )m <= -diagoff ) ); } static bool_t bli_is_strictly_below_diag( doff_t diagoff, trans_t trans, dim_t m, dim_t n ) { return ( bool_t ) ( bli_does_trans( trans ) ? ( ( doff_t )m <= diagoff ) : ( ( doff_t )n <= diagoff ) ); } static bool_t bli_is_outside_diag( doff_t diagoff, trans_t trans, dim_t m, dim_t n ) { return ( bool_t ) ( bli_is_strictly_above_diag( diagoff, trans, m, n ) || bli_is_strictly_below_diag( diagoff, trans, m, n ) ); } static bool_t bli_is_stored_subpart( doff_t diagoff, trans_t trans, uplo_t uplo, dim_t m, dim_t n ) { return ( bool_t ) ( ( bli_is_upper( uplo ) && bli_is_strictly_above_diag( diagoff, trans, m, n ) ) || ( bli_is_lower( uplo ) && bli_is_strictly_below_diag( diagoff, trans, m, n ) ) ); } static bool_t bli_is_unstored_subpart( doff_t diagoff, trans_t trans, uplo_t uplo, dim_t m, dim_t n ) { return ( bool_t ) ( ( bli_is_upper( uplo ) && bli_is_strictly_below_diag( diagoff, trans, m, n ) ) || ( bli_is_lower( uplo ) && bli_is_strictly_above_diag( diagoff, trans, m, n ) ) ); } static bool_t bli_is_strictly_above_diag_n( doff_t diagoff, dim_t m, dim_t n ) { return ( bool_t ) ( ( doff_t )m <= -diagoff ); } static bool_t bli_is_strictly_below_diag_n( doff_t diagoff, dim_t m, dim_t n ) { return ( bool_t ) ( ( doff_t )n <= diagoff ); } static bool_t bli_intersects_diag_n( doff_t diagoff, dim_t m, dim_t n ) { return ( bool_t ) ( !bli_is_strictly_above_diag_n( diagoff, m, n ) && !bli_is_strictly_below_diag_n( diagoff, m, n ) ); } static bool_t bli_is_outside_diag_n( doff_t diagoff, dim_t m, dim_t n ) { return ( bool_t ) ( bli_is_strictly_above_diag_n( diagoff, m, n ) || bli_is_strictly_below_diag_n( diagoff, m, n ) ); } static bool_t bli_is_stored_subpart_n( doff_t diagoff, uplo_t uplo, dim_t m, dim_t n ) { return ( bool_t ) ( ( bli_is_upper( uplo ) && bli_is_strictly_above_diag_n( diagoff, m, n ) ) || ( bli_is_lower( uplo ) && bli_is_strictly_below_diag_n( diagoff, m, n ) ) ); } static bool_t bli_is_unstored_subpart_n( doff_t diagoff, uplo_t uplo, dim_t m, dim_t n ) { return ( bool_t ) ( ( bli_is_upper( uplo ) && bli_is_strictly_below_diag_n( diagoff, m, n ) ) || ( bli_is_lower( uplo ) && bli_is_strictly_above_diag_n( diagoff, m, n ) ) ); } // pruning-related static void bli_prune_unstored_region_top_l( doff_t* diagoff, dim_t* m, dim_t* n, dim_t* offm_inc ) { *offm_inc = 0; // If the diagonal intersects the left side of the matrix, // ignore the area above that intersection. if ( *diagoff < 0 ) { *m = *m + *diagoff; *offm_inc = - *diagoff; *diagoff = 0; } } static void bli_prune_unstored_region_right_l( doff_t* diagoff, dim_t* m, dim_t* n, dim_t* offn_inc ) { *offn_inc = 0; // If the diagonal intersects the bottom side of the matrix, // ignore the area to the right of that intersection. if ( *n > *diagoff + *m ) { *n = *diagoff + *m; } } static void bli_prune_unstored_region_left_u( doff_t* diagoff, dim_t* m, dim_t* n, dim_t* offn_inc ) { *offn_inc = 0; // If the diagonal intersects the top side of the matrix, // ignore the area to the left of that intersection. if ( *diagoff > 0 ) { *n = *n - *diagoff; *offn_inc = + *diagoff; *diagoff = 0; } } static void bli_prune_unstored_region_bottom_u( doff_t* diagoff, dim_t* m, dim_t* n, dim_t* offm_inc ) { *offm_inc = 0; // If the diagonal intersects the right side of the matrix, // ignore the area below that intersection. if ( *m > -(*diagoff) + *n ) { *m = -(*diagoff) + *n; } } // thread range-related static void bli_rotate180_trapezoid( doff_t* diagoff, uplo_t* uplo, dim_t* m, dim_t* n ) { *diagoff = *n - *diagoff - *m; bli_toggle_uplo( uplo ); } static void bli_reflect_about_diag( doff_t* diagoff, uplo_t* uplo, dim_t* m, dim_t* n ) { bli_swap_dims( m, n ); bli_negate_diag_offset( diagoff ); bli_toggle_uplo( uplo ); } static void bli_reverse_index_direction( dim_t n, dim_t* start, dim_t* end ) { dim_t start2 = n - *start; dim_t end2 = n - *end; *start = end2; *end = start2; } // mdim_t-related static bool_t bli_is_m_dim( mdim_t mdim ) { return ( bool_t ) ( mdim == BLIS_M ); } static bool_t bli_is_n_dim( mdim_t mdim ) { return ( bool_t ) ( mdim == BLIS_N ); } static mdim_t bli_dim_toggled( mdim_t mdim ) { return ( mdim == BLIS_M ? BLIS_N : BLIS_M ); } static void bli_toggle_dim( mdim_t* mdim ) { *mdim = bli_dim_toggled( *mdim ); } // stor3_t-related static stor3_t bli_stor3_from_strides( inc_t rs_c, inc_t cs_c, inc_t rs_a, inc_t cs_a, inc_t rs_b, inc_t cs_b ) { // If any matrix is general-stored, return the stor3_t id for the // general-purpose sup microkernel. if ( bli_is_gen_stored( rs_c, cs_c ) || bli_is_gen_stored( rs_a, cs_a ) || bli_is_gen_stored( rs_b, cs_b ) ) return BLIS_XXX; // Otherwise, compute and return the stor3_t id as follows. const bool_t c_is_col = bli_is_col_stored( rs_c, cs_c ); const bool_t a_is_col = bli_is_col_stored( rs_a, cs_a ); const bool_t b_is_col = bli_is_col_stored( rs_b, cs_b ); return ( stor3_t )( 4 * c_is_col + 2 * a_is_col + 1 * b_is_col ); } static stor3_t bli_stor3_trans( stor3_t id ) { #if 1 stor3_t map[ BLIS_NUM_3OP_RC_COMBOS ] = { ( stor3_t )7, // BLIS_RRR = 0 -> BLIS_CCC = 7 ( stor3_t )5, // BLIS_RRC = 1 -> BLIS_CRC = 5 ( stor3_t )6, // BLIS_RCR = 2 -> BLIS_CCR = 6 ( stor3_t )4, // BLIS_RCC = 3 -> BLIS_CRR = 4 ( stor3_t )3, // BLIS_CRR = 4 -> BLIS_RCC = 3 ( stor3_t )1, // BLIS_CRC = 5 -> BLIS_RRC = 1 ( stor3_t )2, // BLIS_CCR = 6 -> BLIS_RCR = 2 ( stor3_t )0, // BLIS_CCC = 7 -> BLIS_RRR = 0 }; return map[id]; #else return ( ( id & 0x4 ) ^ 0x4 ) | // flip c bit ( ( ( id & 0x1 ) ^ 0x1 ) << 1 ) | // flip b bit and move to a position ( ( ( id & 0x2 ) ^ 0x2 ) >> 1 ); // flip a bit and move to b position #endif } static stor3_t bli_stor3_transa( stor3_t id ) { #if 0 stor3_t map[ BLIS_NUM_3OP_RC_COMBOS ] = { ( stor3_t )1, // BLIS_RRR = 0 -> BLIS_RRC = 1 ( stor3_t )0, // BLIS_RRC = 1 -> BLIS_RRR = 0 ( stor3_t )3, // BLIS_RCR = 2 -> BLIS_RCC = 3 ( stor3_t )2, // BLIS_RCC = 3 -> BLIS_RCR = 2 ( stor3_t )5, // BLIS_CRR = 4 -> BLIS_CRC = 5 ( stor3_t )4, // BLIS_CRC = 5 -> BLIS_CRR = 4 ( stor3_t )7, // BLIS_CCR = 6 -> BLIS_CCC = 7 ( stor3_t )6, // BLIS_CCC = 7 -> BLIS_CCR = 6 }; return map[id]; #else return ( stor3_t )( id ^ 0x1 ); #endif } static stor3_t bli_stor3_transb( stor3_t id ) { #if 0 stor3_t map[ BLIS_NUM_3OP_RC_COMBOS ] = { ( stor3_t )2, // BLIS_RRR = 0 -> BLIS_RCR = 2 ( stor3_t )3, // BLIS_RRC = 1 -> BLIS_RCC = 3 ( stor3_t )0, // BLIS_RCR = 2 -> BLIS_RRR = 0 ( stor3_t )1, // BLIS_RCC = 3 -> BLIS_RRC = 1 ( stor3_t )6, // BLIS_CRR = 4 -> BLIS_CCR = 6 ( stor3_t )7, // BLIS_CRC = 5 -> BLIS_CCC = 7 ( stor3_t )4, // BLIS_CCR = 6 -> BLIS_CRR = 4 ( stor3_t )5, // BLIS_CCC = 7 -> BLIS_CRC = 5 }; return map[id]; #else return ( stor3_t )( id ^ 0x2 ); #endif } // index-related static bool_t bli_is_edge_f( dim_t i, dim_t n_iter, dim_t n_left ) { return ( bool_t ) ( i == n_iter - 1 && n_left != 0 ); } static bool_t bli_is_not_edge_f( dim_t i, dim_t n_iter, dim_t n_left ) { return ( bool_t ) ( i != n_iter - 1 || n_left == 0 ); } static bool_t bli_is_edge_b( dim_t i, dim_t n_iter, dim_t n_left ) { return ( bool_t ) ( i == 0 && n_left != 0 ); } static bool_t bli_is_not_edge_b( dim_t i, dim_t n_iter, dim_t n_left ) { return ( bool_t ) ( i != 0 || n_left == 0 ); } static bool_t bli_is_last_iter_sl( dim_t i, dim_t end_iter, dim_t tid, dim_t nth ) { return ( bool_t ) ( i == end_iter - 1 ); } static bool_t bli_is_last_iter_rr( dim_t i, dim_t end_iter, dim_t tid, dim_t nth ) { return ( bool_t ) ( i == end_iter - 1 - ( ( end_iter - tid - 1 ) % nth ) ); } static bool_t bli_is_last_iter( dim_t i, dim_t end_iter, dim_t tid, dim_t nth ) { #ifdef BLIS_ENABLE_JRIR_SLAB return bli_is_last_iter_sl( i, end_iter, tid, nth ); #else // BLIS_ENABLE_JRIR_RR return bli_is_last_iter_rr( i, end_iter, tid, nth ); #endif } // packbuf_t-related static guint_t bli_packbuf_index( packbuf_t buf_type ) { return ( guint_t ) ( ( buf_type & BLIS_PACK_BUFFER_BITS ) >> BLIS_PACK_BUFFER_SHIFT ); } // pack_t-related static bool_t bli_is_packed( pack_t schema ) { return ( bool_t ) ( schema & BLIS_PACK_BIT ); } static bool_t bli_is_row_packed( pack_t schema ) { return ( bool_t ) ( schema & BLIS_PACK_RC_BIT ) == ( BLIS_BITVAL_PACKED_UNSPEC ^ BLIS_BITVAL_PACKED_ROWS ); } static bool_t bli_is_col_packed( pack_t schema ) { return ( bool_t ) ( schema & BLIS_PACK_RC_BIT ) == ( BLIS_BITVAL_PACKED_UNSPEC ^ BLIS_BITVAL_PACKED_COLUMNS ); } static bool_t bli_is_panel_packed( pack_t schema ) { return ( bool_t ) ( schema & BLIS_PACK_PANEL_BIT ); } static bool_t bli_is_4mi_packed( pack_t schema ) { return ( bool_t ) ( schema & BLIS_PACK_FORMAT_BITS ) == BLIS_BITVAL_4MI; } static bool_t bli_is_3mi_packed( pack_t schema ) { return ( bool_t ) ( schema & BLIS_PACK_FORMAT_BITS ) == BLIS_BITVAL_3MI; } static bool_t bli_is_3ms_packed( pack_t schema ) { return ( bool_t ) ( schema & BLIS_PACK_FORMAT_BITS ) == BLIS_BITVAL_3MS; } static bool_t bli_is_ro_packed( pack_t schema ) { return ( bool_t ) ( schema & BLIS_PACK_FORMAT_BITS ) == BLIS_BITVAL_RO; } static bool_t bli_is_io_packed( pack_t schema ) { return ( bool_t ) ( schema & BLIS_PACK_FORMAT_BITS ) == BLIS_BITVAL_IO; } static bool_t bli_is_rpi_packed( pack_t schema ) { return ( bool_t ) ( schema & BLIS_PACK_FORMAT_BITS ) == BLIS_BITVAL_RPI; } static bool_t bli_is_rih_packed( pack_t schema ) { return ( bool_t ) ( bli_is_ro_packed( schema ) || bli_is_io_packed( schema ) || bli_is_rpi_packed( schema ) ); } static bool_t bli_is_1r_packed( pack_t schema ) { return ( bool_t ) ( schema & BLIS_PACK_FORMAT_BITS ) == BLIS_BITVAL_1R; } static bool_t bli_is_1e_packed( pack_t schema ) { return ( bool_t ) ( schema & BLIS_PACK_FORMAT_BITS ) == BLIS_BITVAL_1E; } static bool_t bli_is_1m_packed( pack_t schema ) { return ( bool_t ) ( bli_is_1r_packed( schema ) || bli_is_1e_packed( schema ) ); } static bool_t bli_is_nat_packed( pack_t schema ) { return ( bool_t ) ( schema & BLIS_PACK_FORMAT_BITS ) == 0; } static bool_t bli_is_ind_packed( pack_t schema ) { return ( bool_t ) ( schema & BLIS_PACK_FORMAT_BITS ) != 0; } static guint_t bli_pack_schema_index( pack_t schema ) { return ( guint_t ) ( schema & BLIS_PACK_FORMAT_BITS ) >> BLIS_PACK_FORMAT_SHIFT; } // pointer-related // Increment a pointer by an integer fraction: // p0 + (num/dem) // where p0 is a pointer to a datatype of size sizeof_p0. static void_fp bli_ptr_inc_by_frac( void_fp p0, siz_t sizeof_p0, dim_t num, dim_t den ) { return ( void_fp ) ( ( char* )p0 + ( ( num * ( dim_t )sizeof_p0 ) / den ) ); } // Set dimensions, increments, effective uplo/diagoff, etc for ONE matrix // argument. static void bli_set_dims_incs_uplo_1m ( doff_t diagoffa, diag_t diaga, uplo_t uploa, dim_t m, dim_t n, inc_t rs_a, inc_t cs_a, uplo_t* uplo_eff, dim_t* n_elem_max, dim_t* n_iter, inc_t* inca, inc_t* lda, dim_t* ij0, dim_t* n_shift ) { // This is to prevent the compiler from warning about uninitialized // variables. *ij0 = 0; *n_shift = 0; // If matrix A is entirely "unstored", that is, if either: // - A is lower-stored and entirely above the diagonal, or // - A is upper-stored and entirely below the diagonal // then we mark the storage as implicitly zero. if ( bli_is_unstored_subpart( diagoffa, BLIS_NO_TRANSPOSE, uploa, m, n ) ) { *uplo_eff = BLIS_ZEROS; } else { doff_t diagoffa_use_ = diagoffa; doff_t diagoff_eff_; dim_t n_iter_max_; if ( bli_is_unit_diag( diaga ) ) bli_shift_diag_offset_to_shrink_uplo( uploa, &diagoffa_use_ ); // If matrix A is entirely "stored", that is, if either: // - A is upper-stored and entirely above the diagonal, or // - A is lower-stored and entirely below the diagonal // then we mark the storage as dense. if ( bli_is_stored_subpart( diagoffa_use_, BLIS_NO_TRANSPOSE, uploa, m, n ) ) uploa = BLIS_DENSE; n_iter_max_ = n; *n_elem_max = m; *inca = rs_a; *lda = cs_a; *uplo_eff = uploa; diagoff_eff_ = diagoffa_use_; if ( bli_is_row_tilted( *n_elem_max, n_iter_max_, *inca, *lda ) ) { bli_swap_dims( &n_iter_max_, n_elem_max ); bli_swap_incs( inca, lda ); bli_toggle_uplo( uplo_eff ); bli_negate_diag_offset( &diagoff_eff_ ); } if ( bli_is_dense( *uplo_eff ) ) { *n_iter = n_iter_max_; } else if ( bli_is_upper( *uplo_eff ) ) { if ( diagoff_eff_ < 0 ) { *ij0 = 0; *n_shift = -diagoff_eff_; *n_elem_max = bli_min( *n_elem_max, *n_shift + bli_min( m, n ) ); *n_iter = n_iter_max_; } else { *ij0 = diagoff_eff_; *n_shift = 0; *n_iter = n_iter_max_ - diagoff_eff_; } } else // if ( bli_is_lower( *uplo_eff ) ) { if ( diagoff_eff_ < 0 ) { *ij0 = -diagoff_eff_; *n_shift = 0; *n_elem_max = *n_elem_max + diagoff_eff_; *n_iter = bli_min( *n_elem_max, bli_min( m, n ) ); } else { *ij0 = 0; *n_shift = diagoff_eff_; *n_iter = bli_min( n_iter_max_, *n_shift + bli_min( m, n ) ); } } } } // Set dimensions, increments, effective uplo/diagoff, etc for ONE matrix // argument (without column-wise stride optimization). static void bli_set_dims_incs_uplo_1m_noswap ( doff_t diagoffa, diag_t diaga, uplo_t uploa, dim_t m, dim_t n, inc_t rs_a, inc_t cs_a, uplo_t* uplo_eff, dim_t* n_elem_max, dim_t* n_iter, inc_t* inca, inc_t* lda, dim_t* ij0, dim_t* n_shift ) { // This is to prevent the compiler from warning about uninitialized // variables. *ij0 = 0; *n_shift = 0; // If matrix A is entirely "unstored", that is, if either: // - A is lower-stored and entirely above the diagonal, or // - A is upper-stored and entirely below the diagonal // then we mark the storage as implicitly zero. if ( bli_is_unstored_subpart( diagoffa, BLIS_NO_TRANSPOSE, uploa, m, n ) ) { *uplo_eff = BLIS_ZEROS; } else { doff_t diagoffa_use_ = diagoffa; doff_t diagoff_eff_; dim_t n_iter_max_; if ( bli_is_unit_diag( diaga ) ) bli_shift_diag_offset_to_shrink_uplo( uploa, &diagoffa_use_ ); // If matrix A is entirely "stored", that is, if either: // - A is upper-stored and entirely above the diagonal, or // - A is lower-stored and entirely below the diagonal // then we mark the storage as dense. if ( bli_is_stored_subpart( diagoffa_use_, BLIS_NO_TRANSPOSE, uploa, m, n ) ) uploa = BLIS_DENSE; n_iter_max_ = n; *n_elem_max = m; *inca = rs_a; *lda = cs_a; *uplo_eff = uploa; diagoff_eff_ = diagoffa_use_; if ( bli_is_dense( *uplo_eff ) ) { *n_iter = n_iter_max_; } else if ( bli_is_upper( *uplo_eff ) ) { if ( diagoff_eff_ < 0 ) { *ij0 = 0; *n_shift = -diagoff_eff_; *n_elem_max = bli_min( *n_elem_max, *n_shift + bli_min( m, n ) ); *n_iter = n_iter_max_; } else { *ij0 = diagoff_eff_; *n_shift = 0; *n_iter = n_iter_max_ - diagoff_eff_; } } else // if ( bli_is_lower( *uplo_eff ) ) { if ( diagoff_eff_ < 0 ) { *ij0 = -diagoff_eff_; *n_shift = 0; *n_elem_max = *n_elem_max + diagoff_eff_; *n_iter = bli_min( *n_elem_max, bli_min( m, n ) ); } else { *ij0 = 0; *n_shift = diagoff_eff_; *n_iter = bli_min( n_iter_max_, *n_shift + bli_min( m, n ) ); } } } } // Set dimensions and increments for TWO matrix arguments. static void bli_set_dims_incs_2m ( trans_t transa, dim_t m, dim_t n, inc_t rs_a, inc_t cs_a, inc_t rs_b, inc_t cs_b, dim_t* n_elem, dim_t* n_iter, inc_t* inca, inc_t* lda, inc_t* incb, inc_t* ldb ) { { *n_iter = n; *n_elem = m; *inca = rs_a; *lda = cs_a; *incb = rs_b; *ldb = cs_b; if ( bli_does_trans( transa ) ) { bli_swap_incs( inca, lda ); } if ( bli_is_row_tilted( *n_elem, *n_iter, *incb, *ldb ) && bli_is_row_tilted( *n_elem, *n_iter, *inca, *lda ) ) { bli_swap_dims( n_iter, n_elem ); bli_swap_incs( inca, lda ); bli_swap_incs( incb, ldb ); } } } // Set dimensions, increments, effective uplo/diagoff, etc for TWO matrix // arguments. static void bli_set_dims_incs_uplo_2m ( doff_t diagoffa, diag_t diaga, trans_t transa, uplo_t uploa, dim_t m, dim_t n, inc_t rs_a, inc_t cs_a, inc_t rs_b, inc_t cs_b, uplo_t* uplo_eff, dim_t* n_elem_max, dim_t* n_iter, inc_t* inca, inc_t* lda, inc_t* incb, inc_t* ldb, dim_t* ij0, dim_t* n_shift ) { // This is to prevent the compiler from warning about uninitialized // variables. *ij0 = 0; *n_shift = 0; // If matrix A is entirely "unstored", that is, if either: // - A is lower-stored and entirely above the diagonal, or // - A is upper-stored and entirely below the diagonal // then we mark the storage as implicitly zero. if ( bli_is_unstored_subpart( diagoffa, transa, uploa, m, n ) ) { *uplo_eff = BLIS_ZEROS; } else { doff_t diagoffa_use_ = diagoffa; doff_t diagoff_eff_; dim_t n_iter_max_; if ( bli_is_unit_diag( diaga ) ) bli_shift_diag_offset_to_shrink_uplo( uploa, &diagoffa_use_ ); // If matrix A is entirely "stored", that is, if either: // - A is upper-stored and entirely above the diagonal, or // - A is lower-stored and entirely below the diagonal // then we mark the storage as dense. if ( bli_is_stored_subpart( diagoffa_use_, transa, uploa, m, n ) ) uploa = BLIS_DENSE; n_iter_max_ = n; *n_elem_max = m; *inca = rs_a; *lda = cs_a; *incb = rs_b; *ldb = cs_b; *uplo_eff = uploa; diagoff_eff_ = diagoffa_use_; if ( bli_does_trans( transa ) ) { bli_swap_incs( inca, lda ); bli_toggle_uplo( uplo_eff ); bli_negate_diag_offset( &diagoff_eff_ ); } if ( bli_is_row_tilted( *n_elem_max, n_iter_max_, *incb, *ldb ) && bli_is_row_tilted( *n_elem_max, n_iter_max_, *inca, *lda ) ) { bli_swap_dims( &n_iter_max_, n_elem_max ); bli_swap_incs( inca, lda ); bli_swap_incs( incb, ldb ); bli_toggle_uplo( uplo_eff ); bli_negate_diag_offset( &diagoff_eff_ ); } if ( bli_is_dense( *uplo_eff ) ) { *n_iter = n_iter_max_; } else if ( bli_is_upper( *uplo_eff ) ) { if ( diagoff_eff_ < 0 ) { *ij0 = 0; *n_shift = -diagoff_eff_; *n_elem_max = bli_min( *n_elem_max, *n_shift + bli_min( m, n ) ); *n_iter = n_iter_max_; } else { *ij0 = diagoff_eff_; *n_shift = 0; *n_iter = n_iter_max_ - diagoff_eff_; } } else // if ( bli_is_lower( *uplo_eff ) ) { if ( diagoff_eff_ < 0 ) { *ij0 = -diagoff_eff_; *n_shift = 0; *n_elem_max = *n_elem_max + diagoff_eff_; *n_iter = bli_min( *n_elem_max, bli_min( m, n ) ); } else { *ij0 = 0; *n_shift = diagoff_eff_; *n_iter = bli_min( n_iter_max_, *n_shift + bli_min( m, n ) ); } } } } // Set dimensions, increments, etc for ONE matrix argument when operating // on the diagonal. static void bli_set_dims_incs_1d ( doff_t diagoffx, dim_t m, dim_t n, inc_t rs_x, inc_t cs_x, dim_t* offx, dim_t* n_elem, inc_t* incx ) { if ( diagoffx < 0 ) { *n_elem = bli_min( m - ( dim_t )(-diagoffx), n ); *offx = ( dim_t )(-diagoffx) * rs_x; } else { *n_elem = bli_min( n - ( dim_t )( diagoffx), m ); *offx = ( dim_t )( diagoffx) * cs_x; } *incx = rs_x + cs_x; \ } // Set dimensions, increments, etc for TWO matrix arguments when operating // on diagonals. static void bli_set_dims_incs_2d ( doff_t diagoffx, trans_t transx, dim_t m, dim_t n, inc_t rs_x, inc_t cs_x, inc_t rs_y, inc_t cs_y, dim_t* offx, dim_t* offy, dim_t* n_elem, inc_t* incx, inc_t* incy ) { doff_t diagoffy_ = bli_diag_offset_with_trans( transx, diagoffx ); if ( diagoffx < 0 ) *offx = -diagoffx * rs_x; else *offx = diagoffx * cs_x; if ( diagoffy_ < 0 ) { *n_elem = bli_min( m - ( dim_t )(-diagoffy_), n ); *offy = -diagoffy_ * rs_y; } else { *n_elem = bli_min( n - ( dim_t )( diagoffy_), m ); *offy = diagoffy_ * cs_y; } *incx = rs_x + cs_x; *incy = rs_y + cs_y; } #endif blis-0.6.1/frame/include/bli_pragma_macro_defs.h000066400000000000000000000051771360743507500216400ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2019, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* NOTE: The following code is based on [1]. [1] https://github.com/jeffhammond/nwchem-tce-triples-kernels/blob/master/src/pragma_vendor.h */ #ifndef BLIS_PRAGMA_MACRO_DEFS_H #define BLIS_PRAGMA_MACRO_DEFS_H // Generally speaking, if BLIS_ENABLE_PRAGMA_OMP_SIMD is set, then we define // all instances of PRAGMA_SIMD as _Pragma("omp simd"). #ifdef BLIS_ENABLE_PRAGMA_OMP_SIMD #define PRAGMA_OMP_SIMD _Pragma("omp simd") #else #define PRAGMA_OMP_SIMD #endif // Require ISO C99 or later for SIMD-related pragmas. #if (( __STDC_VERSION__ >= 199901L )) #define GEN_PRAGMA(x) _Pragma(#x) #if defined(__ICC) || defined(__INTEL_COMPILER) // Intel icc. //#define PRAGMA_SIMD GEN_PRAGMA(simd) #define PRAGMA_SIMD PRAGMA_OMP_SIMD #elif defined(__clang__) // clang/llvm. #define PRAGMA_SIMD PRAGMA_OMP_SIMD #elif defined(__GNUC__) // GNU gcc. #define PRAGMA_SIMD PRAGMA_OMP_SIMD #else // Unknown compiler. #define PRAGMA_SIMD #endif #endif #endif blis-0.6.1/frame/include/bli_sbox.h000066400000000000000000000037631360743507500171610ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SBOX_H #define BLIS_SBOX_H // Each sandbox must have a bli_sandbox.h file present somewhere inside. // If a sandbox was enabled at configure-time, we need to #include its // header file here so that it will get pulled into blis.h when it is // flattened into a monolithic header. #ifdef BLIS_ENABLE_SANDBOX #include "bli_sandbox.h" #endif #endif blis-0.6.1/frame/include/bli_scalar_macro_defs.h000066400000000000000000000142401360743507500216250ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SCALAR_MACRO_DEFS_H #define BLIS_SCALAR_MACRO_DEFS_H // -- Assignment/Accessor macros -- // NOTE: This macro is defined first since some of the other scalar macros // use it to abstract away the method used to assign complex values (ie: // whether fields of a struct are set directly or whether native C99 // assignment is used). #include "bli_sets.h" // sets both real and imaginary components // NOTE: These macros are not used by other scalar macros, but they are // related to those defined in bli_sets.h, and so we #include them here. #include "bli_setrs.h" // sets real component only #include "bli_setis.h" // sets imaginary component only // NOTE: This macro also needs to be defined early on since it determines // how real and imaginary components are accessed (ie: whether the fields // of a struct are read directly or whether native C99 functions are used.) #include "bli_gets.h" // -- Scalar constant initialization macros -- #include "bli_constants.h" // -- Separated scalar macros (separated real/imaginary values) -- #include "bli_absq2ris.h" #include "bli_abval2ris.h" #include "bli_addris.h" #include "bli_addjris.h" #include "bli_add3ris.h" #include "bli_axpbyris.h" #include "bli_axpbyjris.h" #include "bli_axpyris.h" #include "bli_axpyjris.h" #include "bli_axmyris.h" #include "bli_conjris.h" #include "bli_copyris.h" #include "bli_copyjris.h" #include "bli_copycjris.h" #include "bli_eqris.h" #include "bli_invertris.h" #include "bli_invscalris.h" #include "bli_invscaljris.h" #include "bli_neg2ris.h" #include "bli_scalris.h" #include "bli_scaljris.h" #include "bli_scalcjris.h" #include "bli_scal2ris.h" #include "bli_scal2jris.h" #include "bli_set0ris.h" #include "bli_sqrt2ris.h" #include "bli_subris.h" #include "bli_subjris.h" #include "bli_swapris.h" #include "bli_xpbyris.h" #include "bli_xpbyjris.h" // Inlined scalar macros in loops #include "bli_scal2ris_mxn.h" #include "bli_scalris_mxn_uplo.h" // -- Conventional scalar macros (paired real/imaginary values) -- #include "bli_absq2s.h" #include "bli_abval2s.h" #include "bli_adds.h" #include "bli_addjs.h" #include "bli_add3s.h" #include "bli_axpbys.h" #include "bli_axpbyjs.h" #include "bli_axpys.h" #include "bli_axpyjs.h" #include "bli_axmys.h" #include "bli_conjs.h" #include "bli_copys.h" #include "bli_copyjs.h" #include "bli_copycjs.h" #include "bli_copynzs.h" #include "bli_copyjnzs.h" #include "bli_dots.h" #include "bli_dotjs.h" #include "bli_eq.h" #include "bli_fprints.h" #include "bli_inverts.h" #include "bli_invscals.h" #include "bli_invscaljs.h" #include "bli_neg2s.h" #include "bli_rands.h" #include "bli_randnp2s.h" #include "bli_scals.h" #include "bli_scaljs.h" #include "bli_scalcjs.h" #include "bli_scal2s.h" #include "bli_scal2js.h" #include "bli_set0s.h" #include "bli_set1s.h" #include "bli_seti0s.h" #include "bli_sqrt2s.h" #include "bli_subs.h" #include "bli_subjs.h" #include "bli_swaps.h" #include "bli_xpbys.h" #include "bli_xpbyjs.h" // Inlined scalar macros in loops #include "bli_adds_mxn.h" #include "bli_adds_mxn_uplo.h" #include "bli_set0s_mxn.h" #include "bli_copys_mxn.h" #include "bli_scal2s_mxn.h" #include "bli_xpbys_mxn.h" #include "bli_xpbys_mxn_uplo.h" // -- "broadcast B" scalar macros -- #include "bli_bcastbbs_mxn.h" #include "bli_scal2bbs_mxn.h" #include "bli_set0bbs_mxn.h" // -- 3m-specific scalar macros -- #include "bli_copyri3s.h" #include "bli_copyjri3s.h" #include "bli_scal2ri3s.h" #include "bli_scal2jri3s.h" #include "bli_scal2ri3s_mxn.h" // -- 4mh/3mh-specific scalar macros -- // ro #include "bli_scal2ros.h" #include "bli_scal2jros.h" // io #include "bli_scal2ios.h" #include "bli_scal2jios.h" // rpi #include "bli_scal2rpis.h" #include "bli_scal2jrpis.h" #include "bli_scal2rihs_mxn.h" #include "bli_scal2rihs_mxn_diag.h" #include "bli_scal2rihs_mxn_uplo.h" #include "bli_setrihs_mxn_diag.h" // -- 1m-specific scalar macros -- // 1e #include "bli_copy1es.h" #include "bli_copyj1es.h" #include "bli_invert1es.h" #include "bli_scal1es.h" #include "bli_scal21es.h" #include "bli_scal2j1es.h" // 1r #include "bli_copy1rs.h" #include "bli_copyj1rs.h" #include "bli_invert1rs.h" #include "bli_scal1rs.h" #include "bli_scal21rs.h" #include "bli_scal2j1rs.h" // 1m (1e or 1r) #include "bli_invert1ms_mxn_diag.h" #include "bli_scal1ms_mxn.h" #include "bli_scal21ms_mxn.h" #include "bli_scal21ms_mxn_diag.h" #include "bli_scal21ms_mxn_uplo.h" #include "bli_set1ms_mxn.h" #include "bli_set1ms_mxn_diag.h" #include "bli_set1ms_mxn_uplo.h" #include "bli_seti01ms_mxn_diag.h" #endif blis-0.6.1/frame/include/bli_system.h000066400000000000000000000101071360743507500175200ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SYSTEM_H #define BLIS_SYSTEM_H // NOTE: If not yet defined, we define _POSIX_C_SOURCE to make sure that // various parts of POSIX are defined and made available. #ifndef _POSIX_C_SOURCE #define _POSIX_C_SOURCE 200809L #endif #include #include #include #include #include #include #include #include // Determine the compiler (hopefully) and define conveniently named macros // accordingly. #if defined(__ICC) || defined(__INTEL_COMPILER) #define BLIS_ICC #elif defined(__clang__) #define BLIS_CLANG #elif defined(__GNUC__) #define BLIS_GCC #endif // Determine if we are on a 64-bit or 32-bit architecture. #if defined(_M_X64) || defined(__x86_64) || defined(__aarch64__) || \ defined(_ARCH_PPC64) #define BLIS_ARCH_64 #else #define BLIS_ARCH_32 #endif // Determine the target operating system. #if defined(_WIN32) || defined(__CYGWIN__) #define BLIS_OS_WINDOWS 1 #elif defined(__gnu_hurd__) #define BLIS_OS_GNU 1 #elif defined(__APPLE__) || defined(__MACH__) #define BLIS_OS_OSX 1 #elif defined(__ANDROID__) #define BLIS_OS_ANDROID 1 #elif defined(__linux__) #define BLIS_OS_LINUX 1 #elif defined(__bgq__) #define BLIS_OS_BGQ 1 #elif defined(__bg__) #define BLIS_OS_BGP 1 #elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \ defined(__bsdi__) || defined(__DragonFly__) || \ defined(__FreeBSD_kernel__) || defined(__HAIKU__) #define BLIS_OS_BSD 1 #elif defined(EMSCRIPTEN) #define BLIS_OS_EMSCRIPTEN #else #error "Cannot determine operating system" #endif // A few changes that may be necessary in Windows environments. #if BLIS_OS_WINDOWS // Include Windows header file. #define WIN32_LEAN_AND_MEAN #define VC_EXTRALEAN #include #if !defined(__clang__) && !defined(__GNUC__) // Undefine attribute specifiers in Windows. #define __attribute__(x) // Undefine restrict. #define restrict #endif #endif // time.h provides clock_gettime(). #if BLIS_OS_WINDOWS #include #elif BLIS_OS_OSX #include #else //#include #include #endif // POSIX threads are unconditionally required, regardless of whether // multithreading is enabled via pthreads or OpenMP (or disabled). // If pthreads is not available (Windows), then fake it. //#include "bli_pthread_wrap.h" #endif blis-0.6.1/frame/include/bli_tapi_ba.h000066400000000000000000000047221360743507500176010ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // This file defines macros used to allow the _tapi.c files to produce // typed APIs that omit expert parameters. // Define the macro to remove the function name suffix (in function // definitions). #undef EX_SUF #define EX_SUF // Define the macro to omit expert arguments from function signatures // and prototypes. #undef BLIS_TAPI_EX_PARAMS #define BLIS_TAPI_EX_PARAMS // Define the macro to declare local expert variables that are initialized // to NULL. The "( void )" statements are to prevent unused variable // warnings by the compiler. #undef BLIS_TAPI_EX_DECLS #define BLIS_TAPI_EX_DECLS cntx_t* cntx = NULL; ( void )cntx; \ rntm_t* rntm = NULL; ( void )rntm; // Define the macro to pass the local expert variables to another function. //#undef BLIS_TAPI_EX_VARS //#define BLIS_TAPI_EX_VARS blis-0.6.1/frame/include/bli_tapi_ex.h000066400000000000000000000046321360743507500176330ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // This file defines macros used to allow the _tapi.c files to produce // typed APIs that contain context parameters. // Define the macro to add a suffix to the typed API function names // (in function definitions). #undef EX_SUF #define EX_SUF BLIS_TAPI_EX_SUF // Define the macro to add expert arguments to function signatures // and prototypes. #undef BLIS_TAPI_EX_PARAMS #define BLIS_TAPI_EX_PARAMS ,cntx_t* cntx, rntm_t* rntm // Define the macro to omit the expert variable declaration block, since // it is not needed when expert parameters are passed in through the API. #undef BLIS_TAPI_EX_DECLS #define BLIS_TAPI_EX_DECLS // Define the macro to pass the local expert variables to another function. //#undef BLIS_TAPI_EX_VARS //#define BLIS_TAPI_EX_VARS ,cntx, rntm blis-0.6.1/frame/include/bli_tapi_macro_defs.h000066400000000000000000000034171360743507500213210ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // Define the suffix to add to typed API function names that include // additional "expert" parameters. #define BLIS_TAPI_EX_SUF _ex blis-0.6.1/frame/include/bli_type_defs.h000066400000000000000000001315561360743507500201720ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2016, Hewlett Packard Enterprise Development LP Copyright (C) 2018-2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_TYPE_DEFS_H #define BLIS_TYPE_DEFS_H // // -- BLIS basic types --------------------------------------------------------- // #ifdef __cplusplus // For C++, include stdint.h. #include #elif __STDC_VERSION__ >= 199901L // For C99 (or later), include stdint.h. #include #else // When stdint.h is not available, manually typedef the types we will use. #ifdef _WIN32 typedef __int32 int32_t; typedef unsigned __int32 uint32_t; typedef __int64 int64_t; typedef unsigned __int64 uint64_t; #else #error "Attempting to compile on pre-C99 system without stdint.h." #endif #endif // -- General-purpose integers -- // If BLAS integers are 64 bits, mandate that BLIS integers also be 64 bits. // NOTE: This cpp guard will only meaningfully change BLIS's behavior on // systems where the BLIS integer size would have been automatically selected // to be 32 bits, since explicit selection of 32 bits is prohibited at // configure-time (and explicit or automatic selection of 64 bits is fine // and would have had the same result). #if BLIS_BLAS_INT_SIZE == 64 #undef BLIS_INT_TYPE_SIZE #define BLIS_INT_TYPE_SIZE 64 #endif // Define integer types depending on what size integer was requested. #if BLIS_INT_TYPE_SIZE == 32 typedef int32_t gint_t; typedef uint32_t guint_t; #elif BLIS_INT_TYPE_SIZE == 64 typedef int64_t gint_t; typedef uint64_t guint_t; #else typedef signed long int gint_t; typedef unsigned long int guint_t; #endif // -- Boolean type -- typedef gint_t bool_t; // -- Boolean values -- #ifndef TRUE #define TRUE 1 #endif #ifndef FALSE #define FALSE 0 #endif // -- Special-purpose integers -- // This cpp guard provides a temporary hack to allow libflame // interoperability with BLIS. #ifndef _DEFINED_DIM_T #define _DEFINED_DIM_T typedef gint_t dim_t; // dimension type #endif typedef gint_t inc_t; // increment/stride type typedef gint_t doff_t; // diagonal offset type typedef guint_t siz_t; // byte size type typedef uint32_t objbits_t; // object information bit field // -- Real types -- // Define the number of floating-point types supported, and the size of the // largest type. #define BLIS_NUM_FP_TYPES 4 #define BLIS_MAX_TYPE_SIZE sizeof(dcomplex) // There are some places where we need to use sizeof() inside of a C // preprocessor #if conditional, and so here we define the various sizes // for those purposes. #define BLIS_SIZEOF_S 4 // sizeof(float) #define BLIS_SIZEOF_D 8 // sizeof(double) #define BLIS_SIZEOF_C 8 // sizeof(scomplex) #define BLIS_SIZEOF_Z 16 // sizeof(dcomplex) // -- Complex types -- #ifdef BLIS_ENABLE_C99_COMPLEX #if __STDC_VERSION__ >= 199901L #include // Typedef official complex types to BLIS complex type names. typedef float complex scomplex; typedef double complex dcomplex; #else #error "Configuration requested C99 complex types, but C99 does not appear to be supported." #endif #else // ifndef BLIS_ENABLE_C99_COMPLEX // This cpp guard provides a temporary hack to allow libflame // interoperability with BLIS. #ifndef _DEFINED_SCOMPLEX #define _DEFINED_SCOMPLEX typedef struct { float real; float imag; } scomplex; #endif // This cpp guard provides a temporary hack to allow libflame // interoperability with BLIS. #ifndef _DEFINED_DCOMPLEX #define _DEFINED_DCOMPLEX typedef struct { double real; double imag; } dcomplex; #endif #endif // BLIS_ENABLE_C99_COMPLEX // -- Atom type -- // Note: atom types are used to hold "bufferless" scalar object values. Note // that it needs to be as large as the largest possible scalar value we might // want to hold. Thus, for now, it is a dcomplex. typedef dcomplex atom_t; // -- Fortran-77 types -- // Note: These types are typically only used by BLAS compatibility layer, but // we must define them even when the compatibility layer isn't being built // because they also occur in bli_slamch() and bli_dlamch(). // Define f77_int depending on what size of integer was requested. #if BLIS_BLAS_INT_TYPE_SIZE == 32 typedef int32_t f77_int; #elif BLIS_BLAS_INT_TYPE_SIZE == 64 typedef int64_t f77_int; #else typedef long int f77_int; #endif typedef char f77_char; typedef float f77_float; typedef double f77_double; typedef scomplex f77_scomplex; typedef dcomplex f77_dcomplex; // -- Void function pointer types -- // Note: This type should be used in any situation where the address of a // *function* will be conveyed or stored prior to it being typecast back // to the correct function type. It does not need to be used when conveying // or storing the address of *data* (such as an array of float or double). //typedef void (*void_fp)( void ); typedef void* void_fp; // // -- BLIS info bit field offsets ---------------------------------------------- // /* info field description bit(s) purpose ------- ------- 2 ~ 0 Stored numerical datatype - 0: domain (0 == real, 1 == complex) - 1: precision (0 == single, 1 == double) - 2: special (100 = int; 101 = const) 3 Transposition required [during pack]? 4 Conjugation required [during pack]? 7 ~ 5 Part of matrix stored: - 5: strictly upper triangular - 6: diagonal - 7: strictly lower triangular 8 Implicit unit diagonal? 9 Invert diagonal required [during pack]? 12 ~ 10 Target numerical datatype - 10: domain (0 == real, 1 == complex) - 11: precision (0 == single, 1 == double) - 12: used to encode integer, constant types 15 ~ 13 Execution numerical datatype - 13: domain (0 == real, 1 == complex) - 14: precision (0 == single, 1 == double) - 15: used to encode integer, constant types 22 ~ 16 Packed type/status - 0 0000 00: not packed - 1 0000 00: packed (unspecified; by rows, columns, or vector) - 1 0000 00: packed by rows - 1 0000 01: packed by columns - 1 0000 10: packed by row panels - 1 0000 11: packed by column panels - 1 0001 10: packed by 4m interleaved row panels - 1 0001 11: packed by 4m interleaved column panels - 1 0010 10: packed by 3m interleaved row panels - 1 0010 11: packed by 3m interleaved column panels - 1 0011 10: packed by 4m separated row panels (not used) - 1 0011 11: packed by 4m separated column panels (not used) - 1 0100 10: packed by 3m separated row panels - 1 0100 11: packed by 3m separated column panels - 1 0101 10: packed real-only row panels - 1 0101 11: packed real-only column panels - 1 0110 10: packed imag-only row panels - 1 0110 11: packed imag-only column panels - 1 0111 10: packed real+imag row panels - 1 0111 11: packed real+imag column panels - 1 1000 10: packed by 1m expanded row panels - 1 1000 11: packed by 1m expanded column panels - 1 1001 10: packed by 1m reordered row panels - 1 1001 11: packed by 1m reordered column panels 23 Packed panel order if upper-stored - 0 == forward order if upper - 1 == reverse order if upper 24 Packed panel order if lower-stored - 0 == forward order if lower - 1 == reverse order if lower 26 ~ 25 Packed buffer type - 0 == block of A - 1 == panel of B - 2 == panel of C - 3 == general use 28 ~ 27 Structure type - 0 == general - 1 == Hermitian - 2 == symmetric - 3 == triangular 31 ~ 29 Computation numerical datatype - 29: domain (0 == real, 1 == complex) - 30: precision (0 == single, 1 == double) - 31: used to encode integer, constant types info2 field description bit(s) purpose ------- ------- 2 ~ 0 Scalar storage numerical datatype - 0: domain (0 == real, 1 == complex) - 1: precision (0 == single, 1 == double) - 2: used to encode integer, constant types */ // info #define BLIS_DATATYPE_SHIFT 0 #define BLIS_DOMAIN_SHIFT 0 #define BLIS_PRECISION_SHIFT 1 #define BLIS_CONJTRANS_SHIFT 3 #define BLIS_TRANS_SHIFT 3 #define BLIS_CONJ_SHIFT 4 #define BLIS_UPLO_SHIFT 5 #define BLIS_UPPER_SHIFT 5 #define BLIS_DIAG_SHIFT 6 #define BLIS_LOWER_SHIFT 7 #define BLIS_UNIT_DIAG_SHIFT 8 #define BLIS_INVERT_DIAG_SHIFT 9 #define BLIS_TARGET_DT_SHIFT 10 #define BLIS_TARGET_DOMAIN_SHIFT 10 #define BLIS_TARGET_PREC_SHIFT 11 #define BLIS_EXEC_DT_SHIFT 13 #define BLIS_EXEC_DOMAIN_SHIFT 13 #define BLIS_EXEC_PREC_SHIFT 14 #define BLIS_PACK_SCHEMA_SHIFT 16 #define BLIS_PACK_RC_SHIFT 16 #define BLIS_PACK_PANEL_SHIFT 17 #define BLIS_PACK_FORMAT_SHIFT 18 #define BLIS_PACK_SHIFT 22 #define BLIS_PACK_REV_IF_UPPER_SHIFT 23 #define BLIS_PACK_REV_IF_LOWER_SHIFT 24 #define BLIS_PACK_BUFFER_SHIFT 25 #define BLIS_STRUC_SHIFT 27 #define BLIS_COMP_DT_SHIFT 29 #define BLIS_COMP_DOMAIN_SHIFT 29 #define BLIS_COMP_PREC_SHIFT 30 // info2 #define BLIS_SCALAR_DT_SHIFT 0 #define BLIS_SCALAR_DOMAIN_SHIFT 0 #define BLIS_SCALAR_PREC_SHIFT 1 // // -- BLIS info bit field masks ------------------------------------------------ // // info #define BLIS_DATATYPE_BITS ( 0x7 << BLIS_DATATYPE_SHIFT ) #define BLIS_DOMAIN_BIT ( 0x1 << BLIS_DOMAIN_SHIFT ) #define BLIS_PRECISION_BIT ( 0x1 << BLIS_PRECISION_SHIFT ) #define BLIS_CONJTRANS_BITS ( 0x3 << BLIS_CONJTRANS_SHIFT ) #define BLIS_TRANS_BIT ( 0x1 << BLIS_TRANS_SHIFT ) #define BLIS_CONJ_BIT ( 0x1 << BLIS_CONJ_SHIFT ) #define BLIS_UPLO_BITS ( 0x7 << BLIS_UPLO_SHIFT ) #define BLIS_UPPER_BIT ( 0x1 << BLIS_UPPER_SHIFT ) #define BLIS_DIAG_BIT ( 0x1 << BLIS_DIAG_SHIFT ) #define BLIS_LOWER_BIT ( 0x1 << BLIS_LOWER_SHIFT ) #define BLIS_UNIT_DIAG_BIT ( 0x1 << BLIS_UNIT_DIAG_SHIFT ) #define BLIS_INVERT_DIAG_BIT ( 0x1 << BLIS_INVERT_DIAG_SHIFT ) #define BLIS_TARGET_DT_BITS ( 0x7 << BLIS_TARGET_DT_SHIFT ) #define BLIS_TARGET_DOMAIN_BIT ( 0x1 << BLIS_TARGET_DOMAIN_SHIFT ) #define BLIS_TARGET_PREC_BIT ( 0x1 << BLIS_TARGET_PREC_SHIFT ) #define BLIS_EXEC_DT_BITS ( 0x7 << BLIS_EXEC_DT_SHIFT ) #define BLIS_EXEC_DOMAIN_BIT ( 0x1 << BLIS_EXEC_DOMAIN_SHIFT ) #define BLIS_EXEC_PREC_BIT ( 0x1 << BLIS_EXEC_PREC_SHIFT ) #define BLIS_PACK_SCHEMA_BITS ( 0x7F << BLIS_PACK_SCHEMA_SHIFT ) #define BLIS_PACK_RC_BIT ( 0x1 << BLIS_PACK_RC_SHIFT ) #define BLIS_PACK_PANEL_BIT ( 0x1 << BLIS_PACK_PANEL_SHIFT ) #define BLIS_PACK_FORMAT_BITS ( 0xF << BLIS_PACK_FORMAT_SHIFT ) #define BLIS_PACK_BIT ( 0x1 << BLIS_PACK_SHIFT ) #define BLIS_PACK_REV_IF_UPPER_BIT ( 0x1 << BLIS_PACK_REV_IF_UPPER_SHIFT ) #define BLIS_PACK_REV_IF_LOWER_BIT ( 0x1 << BLIS_PACK_REV_IF_LOWER_SHIFT ) #define BLIS_PACK_BUFFER_BITS ( 0x3 << BLIS_PACK_BUFFER_SHIFT ) #define BLIS_STRUC_BITS ( 0x3 << BLIS_STRUC_SHIFT ) #define BLIS_COMP_DT_BITS ( 0x7 << BLIS_COMP_DT_SHIFT ) #define BLIS_COMP_DOMAIN_BIT ( 0x1 << BLIS_COMP_DOMAIN_SHIFT ) #define BLIS_COMP_PREC_BIT ( 0x1 << BLIS_COMP_PREC_SHIFT ) // info2 #define BLIS_SCALAR_DT_BITS ( 0x7 << BLIS_SCALAR_DT_SHIFT ) #define BLIS_SCALAR_DOMAIN_BIT ( 0x1 << BLIS_SCALAR_DOMAIN_SHIFT ) #define BLIS_SCALAR_PREC_BIT ( 0x1 << BLIS_SCALAR_PREC_SHIFT ) // // -- BLIS enumerated type value definitions ----------------------------------- // #define BLIS_BITVAL_REAL 0x0 #define BLIS_BITVAL_COMPLEX BLIS_DOMAIN_BIT #define BLIS_BITVAL_SINGLE_PREC 0x0 #define BLIS_BITVAL_DOUBLE_PREC BLIS_PRECISION_BIT #define BLIS_BITVAL_FLOAT_TYPE 0x0 #define BLIS_BITVAL_SCOMPLEX_TYPE BLIS_DOMAIN_BIT #define BLIS_BITVAL_DOUBLE_TYPE BLIS_PRECISION_BIT #define BLIS_BITVAL_DCOMPLEX_TYPE ( BLIS_DOMAIN_BIT | BLIS_PRECISION_BIT ) #define BLIS_BITVAL_INT_TYPE 0x04 #define BLIS_BITVAL_CONST_TYPE 0x05 #define BLIS_BITVAL_NO_TRANS 0x0 #define BLIS_BITVAL_TRANS BLIS_TRANS_BIT #define BLIS_BITVAL_NO_CONJ 0x0 #define BLIS_BITVAL_CONJ BLIS_CONJ_BIT #define BLIS_BITVAL_CONJ_TRANS ( BLIS_CONJ_BIT | BLIS_TRANS_BIT ) #define BLIS_BITVAL_ZEROS 0x0 #define BLIS_BITVAL_UPPER ( BLIS_UPPER_BIT | BLIS_DIAG_BIT ) #define BLIS_BITVAL_LOWER ( BLIS_LOWER_BIT | BLIS_DIAG_BIT ) #define BLIS_BITVAL_DENSE BLIS_UPLO_BITS #define BLIS_BITVAL_NONUNIT_DIAG 0x0 #define BLIS_BITVAL_UNIT_DIAG BLIS_UNIT_DIAG_BIT #define BLIS_BITVAL_INVERT_DIAG BLIS_INVERT_DIAG_BIT #define BLIS_BITVAL_NOT_PACKED 0x0 #define BLIS_BITVAL_4MI ( 0x1 << BLIS_PACK_FORMAT_SHIFT ) #define BLIS_BITVAL_3MI ( 0x2 << BLIS_PACK_FORMAT_SHIFT ) #define BLIS_BITVAL_4MS ( 0x3 << BLIS_PACK_FORMAT_SHIFT ) #define BLIS_BITVAL_3MS ( 0x4 << BLIS_PACK_FORMAT_SHIFT ) #define BLIS_BITVAL_RO ( 0x5 << BLIS_PACK_FORMAT_SHIFT ) #define BLIS_BITVAL_IO ( 0x6 << BLIS_PACK_FORMAT_SHIFT ) #define BLIS_BITVAL_RPI ( 0x7 << BLIS_PACK_FORMAT_SHIFT ) #define BLIS_BITVAL_1E ( 0x8 << BLIS_PACK_FORMAT_SHIFT ) #define BLIS_BITVAL_1R ( 0x9 << BLIS_PACK_FORMAT_SHIFT ) #define BLIS_BITVAL_PACKED_UNSPEC ( BLIS_PACK_BIT ) #define BLIS_BITVAL_PACKED_ROWS ( BLIS_PACK_BIT ) #define BLIS_BITVAL_PACKED_COLUMNS ( BLIS_PACK_BIT | BLIS_PACK_RC_BIT ) #define BLIS_BITVAL_PACKED_ROW_PANELS ( BLIS_PACK_BIT | BLIS_PACK_PANEL_BIT ) #define BLIS_BITVAL_PACKED_COL_PANELS ( BLIS_PACK_BIT | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT ) #define BLIS_BITVAL_PACKED_ROW_PANELS_4MI ( BLIS_PACK_BIT | BLIS_BITVAL_4MI | BLIS_PACK_PANEL_BIT ) #define BLIS_BITVAL_PACKED_COL_PANELS_4MI ( BLIS_PACK_BIT | BLIS_BITVAL_4MI | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT ) #define BLIS_BITVAL_PACKED_ROW_PANELS_3MI ( BLIS_PACK_BIT | BLIS_BITVAL_3MI | BLIS_PACK_PANEL_BIT ) #define BLIS_BITVAL_PACKED_COL_PANELS_3MI ( BLIS_PACK_BIT | BLIS_BITVAL_3MI | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT ) #define BLIS_BITVAL_PACKED_ROW_PANELS_4MS ( BLIS_PACK_BIT | BLIS_BITVAL_4MS | BLIS_PACK_PANEL_BIT ) #define BLIS_BITVAL_PACKED_COL_PANELS_4MS ( BLIS_PACK_BIT | BLIS_BITVAL_4MS | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT ) #define BLIS_BITVAL_PACKED_ROW_PANELS_3MS ( BLIS_PACK_BIT | BLIS_BITVAL_3MS | BLIS_PACK_PANEL_BIT ) #define BLIS_BITVAL_PACKED_COL_PANELS_3MS ( BLIS_PACK_BIT | BLIS_BITVAL_3MS | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT ) #define BLIS_BITVAL_PACKED_ROW_PANELS_RO ( BLIS_PACK_BIT | BLIS_BITVAL_RO | BLIS_PACK_PANEL_BIT ) #define BLIS_BITVAL_PACKED_COL_PANELS_RO ( BLIS_PACK_BIT | BLIS_BITVAL_RO | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT ) #define BLIS_BITVAL_PACKED_ROW_PANELS_IO ( BLIS_PACK_BIT | BLIS_BITVAL_IO | BLIS_PACK_PANEL_BIT ) #define BLIS_BITVAL_PACKED_COL_PANELS_IO ( BLIS_PACK_BIT | BLIS_BITVAL_IO | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT ) #define BLIS_BITVAL_PACKED_ROW_PANELS_RPI ( BLIS_PACK_BIT | BLIS_BITVAL_RPI | BLIS_PACK_PANEL_BIT ) #define BLIS_BITVAL_PACKED_COL_PANELS_RPI ( BLIS_PACK_BIT | BLIS_BITVAL_RPI | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT ) #define BLIS_BITVAL_PACKED_ROW_PANELS_1E ( BLIS_PACK_BIT | BLIS_BITVAL_1E | BLIS_PACK_PANEL_BIT ) #define BLIS_BITVAL_PACKED_COL_PANELS_1E ( BLIS_PACK_BIT | BLIS_BITVAL_1E | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT ) #define BLIS_BITVAL_PACKED_ROW_PANELS_1R ( BLIS_PACK_BIT | BLIS_BITVAL_1R | BLIS_PACK_PANEL_BIT ) #define BLIS_BITVAL_PACKED_COL_PANELS_1R ( BLIS_PACK_BIT | BLIS_BITVAL_1R | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT ) #define BLIS_BITVAL_PACK_FWD_IF_UPPER 0x0 #define BLIS_BITVAL_PACK_REV_IF_UPPER BLIS_PACK_REV_IF_UPPER_BIT #define BLIS_BITVAL_PACK_FWD_IF_LOWER 0x0 #define BLIS_BITVAL_PACK_REV_IF_LOWER BLIS_PACK_REV_IF_LOWER_BIT #define BLIS_BITVAL_BUFFER_FOR_A_BLOCK 0x0 #define BLIS_BITVAL_BUFFER_FOR_B_PANEL ( 0x1 << BLIS_PACK_BUFFER_SHIFT ) #define BLIS_BITVAL_BUFFER_FOR_C_PANEL ( 0x2 << BLIS_PACK_BUFFER_SHIFT ) #define BLIS_BITVAL_BUFFER_FOR_GEN_USE ( 0x3 << BLIS_PACK_BUFFER_SHIFT ) #define BLIS_BITVAL_GENERAL 0x0 #define BLIS_BITVAL_HERMITIAN ( 0x1 << BLIS_STRUC_SHIFT ) #define BLIS_BITVAL_SYMMETRIC ( 0x2 << BLIS_STRUC_SHIFT ) #define BLIS_BITVAL_TRIANGULAR ( 0x3 << BLIS_STRUC_SHIFT ) // // -- BLIS enumerated type definitions ----------------------------------------- // // -- Operational parameter types -- typedef enum { BLIS_NO_TRANSPOSE = 0x0, BLIS_TRANSPOSE = BLIS_BITVAL_TRANS, BLIS_CONJ_NO_TRANSPOSE = BLIS_BITVAL_CONJ, BLIS_CONJ_TRANSPOSE = BLIS_BITVAL_CONJ_TRANS } trans_t; typedef enum { BLIS_NO_CONJUGATE = 0x0, BLIS_CONJUGATE = BLIS_BITVAL_CONJ } conj_t; typedef enum { BLIS_ZEROS = BLIS_BITVAL_ZEROS, BLIS_LOWER = BLIS_BITVAL_LOWER, BLIS_UPPER = BLIS_BITVAL_UPPER, BLIS_DENSE = BLIS_BITVAL_DENSE } uplo_t; typedef enum { BLIS_LEFT = 0x0, BLIS_RIGHT } side_t; typedef enum { BLIS_NONUNIT_DIAG = 0x0, BLIS_UNIT_DIAG = BLIS_BITVAL_UNIT_DIAG } diag_t; typedef enum { BLIS_NO_INVERT_DIAG = 0x0, BLIS_INVERT_DIAG = BLIS_BITVAL_INVERT_DIAG } invdiag_t; typedef enum { BLIS_GENERAL = BLIS_BITVAL_GENERAL, BLIS_HERMITIAN = BLIS_BITVAL_HERMITIAN, BLIS_SYMMETRIC = BLIS_BITVAL_SYMMETRIC, BLIS_TRIANGULAR = BLIS_BITVAL_TRIANGULAR } struc_t; // -- Data type -- typedef enum { BLIS_FLOAT = BLIS_BITVAL_FLOAT_TYPE, BLIS_DOUBLE = BLIS_BITVAL_DOUBLE_TYPE, BLIS_SCOMPLEX = BLIS_BITVAL_SCOMPLEX_TYPE, BLIS_DCOMPLEX = BLIS_BITVAL_DCOMPLEX_TYPE, BLIS_INT = BLIS_BITVAL_INT_TYPE, BLIS_CONSTANT = BLIS_BITVAL_CONST_TYPE, BLIS_DT_LO = BLIS_FLOAT, BLIS_DT_HI = BLIS_DCOMPLEX } num_t; typedef enum { BLIS_REAL = BLIS_BITVAL_REAL, BLIS_COMPLEX = BLIS_BITVAL_COMPLEX } dom_t; typedef enum { BLIS_SINGLE_PREC = BLIS_BITVAL_SINGLE_PREC, BLIS_DOUBLE_PREC = BLIS_BITVAL_DOUBLE_PREC } prec_t; // -- Pack schema type -- typedef enum { BLIS_NOT_PACKED = BLIS_BITVAL_NOT_PACKED, BLIS_PACKED_UNSPEC = BLIS_BITVAL_PACKED_UNSPEC, BLIS_PACKED_VECTOR = BLIS_BITVAL_PACKED_UNSPEC, BLIS_PACKED_ROWS = BLIS_BITVAL_PACKED_ROWS, BLIS_PACKED_COLUMNS = BLIS_BITVAL_PACKED_COLUMNS, BLIS_PACKED_ROW_PANELS = BLIS_BITVAL_PACKED_ROW_PANELS, BLIS_PACKED_COL_PANELS = BLIS_BITVAL_PACKED_COL_PANELS, BLIS_PACKED_ROW_PANELS_4MI = BLIS_BITVAL_PACKED_ROW_PANELS_4MI, BLIS_PACKED_COL_PANELS_4MI = BLIS_BITVAL_PACKED_COL_PANELS_4MI, BLIS_PACKED_ROW_PANELS_3MI = BLIS_BITVAL_PACKED_ROW_PANELS_3MI, BLIS_PACKED_COL_PANELS_3MI = BLIS_BITVAL_PACKED_COL_PANELS_3MI, BLIS_PACKED_ROW_PANELS_4MS = BLIS_BITVAL_PACKED_ROW_PANELS_4MS, BLIS_PACKED_COL_PANELS_4MS = BLIS_BITVAL_PACKED_COL_PANELS_4MS, BLIS_PACKED_ROW_PANELS_3MS = BLIS_BITVAL_PACKED_ROW_PANELS_3MS, BLIS_PACKED_COL_PANELS_3MS = BLIS_BITVAL_PACKED_COL_PANELS_3MS, BLIS_PACKED_ROW_PANELS_RO = BLIS_BITVAL_PACKED_ROW_PANELS_RO, BLIS_PACKED_COL_PANELS_RO = BLIS_BITVAL_PACKED_COL_PANELS_RO, BLIS_PACKED_ROW_PANELS_IO = BLIS_BITVAL_PACKED_ROW_PANELS_IO, BLIS_PACKED_COL_PANELS_IO = BLIS_BITVAL_PACKED_COL_PANELS_IO, BLIS_PACKED_ROW_PANELS_RPI = BLIS_BITVAL_PACKED_ROW_PANELS_RPI, BLIS_PACKED_COL_PANELS_RPI = BLIS_BITVAL_PACKED_COL_PANELS_RPI, BLIS_PACKED_ROW_PANELS_1E = BLIS_BITVAL_PACKED_ROW_PANELS_1E, BLIS_PACKED_COL_PANELS_1E = BLIS_BITVAL_PACKED_COL_PANELS_1E, BLIS_PACKED_ROW_PANELS_1R = BLIS_BITVAL_PACKED_ROW_PANELS_1R, BLIS_PACKED_COL_PANELS_1R = BLIS_BITVAL_PACKED_COL_PANELS_1R } pack_t; // We combine row and column packing into one "type", and we start // with BLIS_PACKED_ROW_PANELS, _COLUMN_PANELS. We also count the // schema pair for "4ms" (4m separated), because its bit value has // been reserved, even though we don't use it. #define BLIS_NUM_PACK_SCHEMA_TYPES 10 // -- Pack order type -- typedef enum { BLIS_PACK_FWD_IF_UPPER = BLIS_BITVAL_PACK_FWD_IF_UPPER, BLIS_PACK_REV_IF_UPPER = BLIS_BITVAL_PACK_REV_IF_UPPER, BLIS_PACK_FWD_IF_LOWER = BLIS_BITVAL_PACK_FWD_IF_LOWER, BLIS_PACK_REV_IF_LOWER = BLIS_BITVAL_PACK_REV_IF_LOWER } packord_t; // -- Pack buffer type -- typedef enum { BLIS_BUFFER_FOR_A_BLOCK = BLIS_BITVAL_BUFFER_FOR_A_BLOCK, BLIS_BUFFER_FOR_B_PANEL = BLIS_BITVAL_BUFFER_FOR_B_PANEL, BLIS_BUFFER_FOR_C_PANEL = BLIS_BITVAL_BUFFER_FOR_C_PANEL, BLIS_BUFFER_FOR_GEN_USE = BLIS_BITVAL_BUFFER_FOR_GEN_USE } packbuf_t; // -- Partitioning direction -- typedef enum { BLIS_FWD, BLIS_BWD } dir_t; // -- Subpartition type -- typedef enum { BLIS_SUBPART0, BLIS_SUBPART1, BLIS_SUBPART2, BLIS_SUBPART1AND0, BLIS_SUBPART1AND2, BLIS_SUBPART1A, BLIS_SUBPART1B, BLIS_SUBPART00, BLIS_SUBPART10, BLIS_SUBPART20, BLIS_SUBPART01, BLIS_SUBPART11, BLIS_SUBPART21, BLIS_SUBPART02, BLIS_SUBPART12, BLIS_SUBPART22 } subpart_t; // -- Matrix dimension type -- typedef enum { BLIS_M = 0, BLIS_N = 1 } mdim_t; // -- Machine parameter types -- typedef enum { BLIS_MACH_EPS = 0, BLIS_MACH_SFMIN, BLIS_MACH_BASE, BLIS_MACH_PREC, BLIS_MACH_NDIGMANT, BLIS_MACH_RND, BLIS_MACH_EMIN, BLIS_MACH_RMIN, BLIS_MACH_EMAX, BLIS_MACH_RMAX, BLIS_MACH_EPS2 } machval_t; #define BLIS_NUM_MACH_PARAMS 11 #define BLIS_MACH_PARAM_FIRST BLIS_MACH_EPS #define BLIS_MACH_PARAM_LAST BLIS_MACH_EPS2 // -- Induced method types -- typedef enum { BLIS_3MH = 0, BLIS_3M1, BLIS_4MH, BLIS_4M1B, BLIS_4M1A, BLIS_1M, BLIS_NAT, BLIS_IND_FIRST = 0, BLIS_IND_LAST = BLIS_NAT } ind_t; #define BLIS_NUM_IND_METHODS (BLIS_NAT+1) // These are used in bli_*_oapi.c to construct the ind_t values from // the induced method substrings that go into function names. #define bli_3mh BLIS_3MH #define bli_3m1 BLIS_3M1 #define bli_4mh BLIS_4MH #define bli_4mb BLIS_4M1B #define bli_4m1 BLIS_4M1A #define bli_1m BLIS_1M #define bli_nat BLIS_NAT // -- Kernel ID types -- typedef enum { BLIS_ADDV_KER = 0, BLIS_AMAXV_KER, BLIS_AXPBYV_KER, BLIS_AXPYV_KER, BLIS_COPYV_KER, BLIS_DOTV_KER, BLIS_DOTXV_KER, BLIS_INVERTV_KER, BLIS_SCALV_KER, BLIS_SCAL2V_KER, BLIS_SETV_KER, BLIS_SUBV_KER, BLIS_SWAPV_KER, BLIS_XPBYV_KER } l1vkr_t; #define BLIS_NUM_LEVEL1V_KERS 14 typedef enum { BLIS_AXPY2V_KER = 0, BLIS_DOTAXPYV_KER, BLIS_AXPYF_KER, BLIS_DOTXF_KER, BLIS_DOTXAXPYF_KER } l1fkr_t; #define BLIS_NUM_LEVEL1F_KERS 5 typedef enum { BLIS_PACKM_0XK_KER = 0, BLIS_PACKM_1XK_KER = 1, BLIS_PACKM_2XK_KER = 2, BLIS_PACKM_3XK_KER = 3, BLIS_PACKM_4XK_KER = 4, BLIS_PACKM_5XK_KER = 5, BLIS_PACKM_6XK_KER = 6, BLIS_PACKM_7XK_KER = 7, BLIS_PACKM_8XK_KER = 8, BLIS_PACKM_9XK_KER = 9, BLIS_PACKM_10XK_KER = 10, BLIS_PACKM_11XK_KER = 11, BLIS_PACKM_12XK_KER = 12, BLIS_PACKM_13XK_KER = 13, BLIS_PACKM_14XK_KER = 14, BLIS_PACKM_15XK_KER = 15, BLIS_PACKM_16XK_KER = 16, BLIS_PACKM_17XK_KER = 17, BLIS_PACKM_18XK_KER = 18, BLIS_PACKM_19XK_KER = 19, BLIS_PACKM_20XK_KER = 20, BLIS_PACKM_21XK_KER = 21, BLIS_PACKM_22XK_KER = 22, BLIS_PACKM_23XK_KER = 23, BLIS_PACKM_24XK_KER = 24, BLIS_PACKM_25XK_KER = 25, BLIS_PACKM_26XK_KER = 26, BLIS_PACKM_27XK_KER = 27, BLIS_PACKM_28XK_KER = 28, BLIS_PACKM_29XK_KER = 29, BLIS_PACKM_30XK_KER = 30, BLIS_PACKM_31XK_KER = 31, BLIS_UNPACKM_0XK_KER = 0, BLIS_UNPACKM_1XK_KER = 1, BLIS_UNPACKM_2XK_KER = 2, BLIS_UNPACKM_3XK_KER = 3, BLIS_UNPACKM_4XK_KER = 4, BLIS_UNPACKM_5XK_KER = 5, BLIS_UNPACKM_6XK_KER = 6, BLIS_UNPACKM_7XK_KER = 7, BLIS_UNPACKM_8XK_KER = 8, BLIS_UNPACKM_9XK_KER = 9, BLIS_UNPACKM_10XK_KER = 10, BLIS_UNPACKM_11XK_KER = 11, BLIS_UNPACKM_12XK_KER = 12, BLIS_UNPACKM_13XK_KER = 13, BLIS_UNPACKM_14XK_KER = 14, BLIS_UNPACKM_15XK_KER = 15, BLIS_UNPACKM_16XK_KER = 16, BLIS_UNPACKM_17XK_KER = 17, BLIS_UNPACKM_18XK_KER = 18, BLIS_UNPACKM_19XK_KER = 19, BLIS_UNPACKM_20XK_KER = 20, BLIS_UNPACKM_21XK_KER = 21, BLIS_UNPACKM_22XK_KER = 22, BLIS_UNPACKM_23XK_KER = 23, BLIS_UNPACKM_24XK_KER = 24, BLIS_UNPACKM_25XK_KER = 25, BLIS_UNPACKM_26XK_KER = 26, BLIS_UNPACKM_27XK_KER = 27, BLIS_UNPACKM_28XK_KER = 28, BLIS_UNPACKM_29XK_KER = 29, BLIS_UNPACKM_30XK_KER = 30, BLIS_UNPACKM_31XK_KER = 31 } l1mkr_t; #define BLIS_NUM_PACKM_KERS 32 #define BLIS_NUM_UNPACKM_KERS 32 typedef enum { BLIS_GEMM_UKR = 0, BLIS_GEMMTRSM_L_UKR, BLIS_GEMMTRSM_U_UKR, BLIS_TRSM_L_UKR, BLIS_TRSM_U_UKR } l3ukr_t; #define BLIS_NUM_LEVEL3_UKRS 5 typedef enum { BLIS_REFERENCE_UKERNEL = 0, BLIS_VIRTUAL_UKERNEL, BLIS_OPTIMIZED_UKERNEL, BLIS_NOTAPPLIC_UKERNEL } kimpl_t; #define BLIS_NUM_UKR_IMPL_TYPES 4 #if 0 typedef enum { // RV = row-stored, contiguous vector-loading // RG = row-stored, non-contiguous gather-loading // CV = column-stored, contiguous vector-loading // CG = column-stored, non-contiguous gather-loading // RD = row-stored, dot-based // CD = col-stored, dot-based // RC = row-stored, column-times-column // CR = column-stored, row-times-row // GX = general-stored generic implementation BLIS_GEMMSUP_RV_UKR = 0, BLIS_GEMMSUP_RG_UKR, BLIS_GEMMSUP_CV_UKR, BLIS_GEMMSUP_CG_UKR, BLIS_GEMMSUP_RD_UKR, BLIS_GEMMSUP_CD_UKR, BLIS_GEMMSUP_RC_UKR, BLIS_GEMMSUP_CR_UKR, BLIS_GEMMSUP_GX_UKR, } l3sup_t; #define BLIS_NUM_LEVEL3_SUP_UKRS 9 #endif typedef enum { // 3-operand storage combinations BLIS_RRR = 0, BLIS_RRC, // 1 BLIS_RCR, // 2 BLIS_RCC, // 3 BLIS_CRR, // 4 BLIS_CRC, // 5 BLIS_CCR, // 6 BLIS_CCC, // 7 BLIS_XXX, // 8 #if 0 BLIS_RRG, BLIS_RCG, BLIS_RGR, BLIS_RGC, BLIS_RGG, BLIS_CRG, BLIS_CCG, BLIS_CGR, BLIS_CGC, BLIS_CGG, BLIS_GRR, BLIS_GRC, BLIS_GRG, BLIS_GCR, BLIS_GCC, BLIS_GCG, BLIS_GGR, BLIS_GGC, BLIS_GGG, #endif } stor3_t; #define BLIS_NUM_3OP_RC_COMBOS 9 //#define BLIS_NUM_3OP_RCG_COMBOS 27 #if 0 typedef enum { BLIS_JC_IDX = 0, BLIS_PC_IDX, BLIS_IC_IDX, BLIS_JR_IDX, BLIS_IR_IDX, BLIS_PR_IDX } thridx_t; #endif #define BLIS_NUM_LOOPS 6 // -- Operation ID type -- typedef enum { // // NOTE: If/when additional type values are added to this enum, // you must either: // - keep the level-3 values (starting with _GEMM) beginning at // index 0; or // - if the value range is moved such that it does not begin at // index 0, implement something like a BLIS_OPID_LEVEL3_RANGE_START // value that can be subtracted from the opid_t value to map it // to a zero-based range. // This is needed because these level-3 opid_t values are used in // bli_l3_ind.c to index into arrays. // BLIS_GEMM = 0, BLIS_HEMM, BLIS_HERK, BLIS_HER2K, BLIS_SYMM, BLIS_SYRK, BLIS_SYR2K, BLIS_TRMM3, BLIS_TRMM, BLIS_TRSM, BLIS_NOID } opid_t; #define BLIS_NUM_LEVEL3_OPS 10 // -- Blocksize ID type -- typedef enum { // NOTE: the level-3 blocksizes MUST be indexed starting at zero. // At one point, we made this assumption in bli_cntx_set_blkszs() // and friends. BLIS_KR = 0, BLIS_MR, BLIS_NR, BLIS_MC, BLIS_KC, BLIS_NC, BLIS_M2, // level-2 blocksize in m dimension BLIS_N2, // level-2 blocksize in n dimension BLIS_AF, // level-1f axpyf fusing factor BLIS_DF, // level-1f dotxf fusing factor BLIS_XF, // level-1f dotxaxpyf fusing factor BLIS_NO_PART // used as a placeholder when blocksizes are not applicable. } bszid_t; #define BLIS_NUM_BLKSZS 11 // -- Threshold ID type -- typedef enum { BLIS_MT = 0, // level-3 small/unpacked matrix threshold in m dimension BLIS_NT, // level-3 small/unpacked matrix threshold in n dimension BLIS_KT // level-3 small/unpacked matrix threshold in k dimension } threshid_t; #define BLIS_NUM_THRESH 3 // -- Architecture ID type -- // NOTE: This typedef enum must be kept up-to-date with the arch_t // string array in bli_arch.c. Whenever values are added/inserted // OR if values are rearranged, be sure to update the string array // in bli_arch.c. typedef enum { // Intel BLIS_ARCH_SKX = 0, BLIS_ARCH_KNL, BLIS_ARCH_KNC, BLIS_ARCH_HASWELL, BLIS_ARCH_SANDYBRIDGE, BLIS_ARCH_PENRYN, // AMD BLIS_ARCH_ZEN2, BLIS_ARCH_ZEN, BLIS_ARCH_EXCAVATOR, BLIS_ARCH_STEAMROLLER, BLIS_ARCH_PILEDRIVER, BLIS_ARCH_BULLDOZER, // ARM BLIS_ARCH_THUNDERX2, BLIS_ARCH_CORTEXA57, BLIS_ARCH_CORTEXA53, BLIS_ARCH_CORTEXA15, BLIS_ARCH_CORTEXA9, // IBM/Power BLIS_ARCH_POWER9, BLIS_ARCH_POWER7, BLIS_ARCH_BGQ, // Generic architecture/configuration BLIS_ARCH_GENERIC } arch_t; // NOTE: This value must be updated to reflect the number of enum values // listed above for arch_t! #define BLIS_NUM_ARCHS 21 // // -- BLIS misc. structure types ----------------------------------------------- // // These headers must be included here (or earlier) because definitions they // provide are needed in the pool_t and related structs. #include "bli_pthread.h" #include "bli_malloc.h" // -- Pool block type -- typedef struct { void* buf; siz_t block_size; } pblk_t; // -- Pool type -- typedef struct { void* block_ptrs; dim_t block_ptrs_len; dim_t top_index; dim_t num_blocks; siz_t block_size; siz_t align_size; siz_t offset_size; malloc_ft malloc_fp; free_ft free_fp; } pool_t; // -- Array type -- typedef struct { void* buf; siz_t num_elem; siz_t elem_size; } array_t; // -- Locked pool-of-arrays-of-pools type -- typedef struct { bli_pthread_mutex_t mutex; pool_t pool; siz_t def_array_len; } apool_t; // -- packing block allocator: Locked set of pools type -- typedef struct membrk_s { pool_t pools[3]; bli_pthread_mutex_t mutex; // These fields are used for general-purpose allocation. siz_t align_size; malloc_ft malloc_fp; free_ft free_fp; } membrk_t; // -- Memory object type -- typedef struct mem_s { pblk_t pblk; packbuf_t buf_type; pool_t* pool; siz_t size; } mem_t; // -- Control tree node type -- struct cntl_s { // Basic fields (usually required). opid_t family; bszid_t bszid; void_fp var_func; struct cntl_s* sub_prenode; struct cntl_s* sub_node; // Optional fields (needed only by some operations such as packm). // NOTE: first field of params must be a uint64_t containing the size // of the struct. void* params; // Internal fields that track "cached" data. mem_t pack_mem; }; typedef struct cntl_s cntl_t; // -- Blocksize object type -- typedef struct blksz_s { // Primary blocksize values. dim_t v[BLIS_NUM_FP_TYPES]; // Blocksize extensions. dim_t e[BLIS_NUM_FP_TYPES]; } blksz_t; // -- Function pointer object type -- typedef struct func_s { // Kernel function address. void_fp ptr[BLIS_NUM_FP_TYPES]; } func_t; // -- Multi-boolean object type -- typedef struct mbool_s { bool_t v[BLIS_NUM_FP_TYPES]; } mbool_t; // -- Auxiliary kernel info type -- // Note: This struct is used by macro-kernels to package together extra // parameter values that may be of use to the micro-kernel without // cluttering up the micro-kernel interface itself. typedef struct { // The pack schemas of A and B. pack_t schema_a; pack_t schema_b; // Pointers to the micro-panels of A and B which will be used by the // next call to the micro-kernel. void* a_next; void* b_next; // The imaginary strides of A and B. inc_t is_a; inc_t is_b; // The panel strides of A and B. // NOTE: These are only used in situations where iteration over the // micropanels takes place in part within the kernel code (e.g. sup // millikernels). inc_t ps_a; inc_t ps_b; // The type to convert to on output. //num_t dt_on_output; } auxinfo_t; // -- Global scalar constant data struct -- // Note: This struct is used only when statically initializing the // global scalar constants in bli_const.c. typedef struct constdata_s { float s; double d; scomplex c; dcomplex z; gint_t i; } constdata_t; // // -- BLIS object type definitions --------------------------------------------- // typedef struct obj_s { // Basic fields struct obj_s* root; dim_t off[2]; dim_t dim[2]; doff_t diag_off; objbits_t info; objbits_t info2; siz_t elem_size; void* buffer; inc_t rs; inc_t cs; inc_t is; // Bufferless scalar storage atom_t scalar; // Pack-related fields dim_t m_padded; // m dimension of matrix, including any padding dim_t n_padded; // n dimension of matrix, including any padding inc_t ps; // panel stride (distance to next panel) inc_t pd; // panel dimension (the "width" of a panel: // usually MR or NR) dim_t m_panel; // m dimension of a "full" panel dim_t n_panel; // n dimension of a "full" panel } obj_t; // Pre-initializors. Things that must be set afterwards: // - root object pointer // - info bitfields: dt, target_dt, exec_dt, comp_dt // - info2 bitfields: scalar_dt // - elem_size // - dims, strides // - buffer // - internal scalar buffer (must always set imaginary component) #define BLIS_OBJECT_INITIALIZER \ { \ .root = NULL, \ \ .off = { 0, 0 }, \ .dim = { 0, 0 }, \ .diag_off = 0, \ \ .info = 0x0 | BLIS_BITVAL_DENSE | \ BLIS_BITVAL_GENERAL, \ .info2 = 0x0, \ .elem_size = sizeof( float ), /* this is changed later. */ \ \ .buffer = NULL, \ .rs = 0, \ .cs = 0, \ .is = 1, \ \ .scalar = { 0.0, 0.0 }, \ \ .m_padded = 0, \ .n_padded = 0, \ .ps = 0, \ .pd = 0, \ .m_panel = 0, \ .n_panel = 0 \ } #define BLIS_OBJECT_INITIALIZER_1X1 \ { \ .root = NULL, \ \ .off = { 0, 0 }, \ .dim = { 1, 1 }, \ .diag_off = 0, \ \ .info = 0x0 | BLIS_BITVAL_DENSE | \ BLIS_BITVAL_GENERAL, \ .info2 = 0x0, \ .elem_size = sizeof( float ), /* this is changed later. */ \ \ .buffer = NULL, \ .rs = 0, \ .cs = 0, \ .is = 1, \ \ .scalar = { 0.0, 0.0 }, \ \ .m_padded = 0, \ .n_padded = 0, \ .ps = 0, \ .pd = 0, \ .m_panel = 0, \ .n_panel = 0 \ } // Define these macros here since they must be updated if contents of // obj_t changes. static void bli_obj_init_full_shallow_copy_of( obj_t* a, obj_t* b ) { b->root = a->root; b->off[0] = a->off[0]; b->off[1] = a->off[1]; b->dim[0] = a->dim[0]; b->dim[1] = a->dim[1]; b->diag_off = a->diag_off; b->info = a->info; b->info2 = a->info2; b->elem_size = a->elem_size; b->buffer = a->buffer; b->rs = a->rs; b->cs = a->cs; b->is = a->is; b->scalar = a->scalar; //b->pack_mem = a->pack_mem; b->m_padded = a->m_padded; b->n_padded = a->n_padded; b->ps = a->ps; b->pd = a->pd; b->m_panel = a->m_panel; b->n_panel = a->n_panel; } static void bli_obj_init_subpart_from( obj_t* a, obj_t* b ) { b->root = a->root; b->off[0] = a->off[0]; b->off[1] = a->off[1]; // Avoid copying m and n since they will be overwritten. //b->dim[0] = a->dim[0]; //b->dim[1] = a->dim[1]; b->diag_off = a->diag_off; b->info = a->info; b->info2 = a->info2; b->elem_size = a->elem_size; b->buffer = a->buffer; b->rs = a->rs; b->cs = a->cs; b->is = a->is; b->scalar = a->scalar; // Avoid copying pack_mem entry. // FGVZ: You should probably make sure this is right. //b->pack_mem = a->pack_mem; b->m_padded = a->m_padded; b->n_padded = a->n_padded; b->ps = a->ps; b->pd = a->pd; b->m_panel = a->m_panel; b->n_panel = a->n_panel; } // Initializors for global scalar constants. // NOTE: These must remain cpp macros since they are initializor // expressions, not functions. #define bli_obj_init_const( buffer0 ) \ { \ .root = NULL, \ \ .off = { 0, 0 }, \ .dim = { 1, 1 }, \ .diag_off = 0, \ \ .info = 0x0 | BLIS_BITVAL_CONST_TYPE | \ BLIS_BITVAL_DENSE | \ BLIS_BITVAL_GENERAL, \ .info2 = 0x0, \ .elem_size = sizeof( constdata_t ), \ \ .buffer = buffer0, \ .rs = 1, \ .cs = 1, \ .is = 1 \ } #define bli_obj_init_constdata( val ) \ { \ .s = ( float )val, \ .d = ( double )val, \ .c = { .real = ( float )val, .imag = 0.0f }, \ .z = { .real = ( double )val, .imag = 0.0 }, \ .i = ( gint_t )val, \ } // -- Context type -- typedef struct cntx_s { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; bszid_t bmults[ BLIS_NUM_BLKSZS ]; func_t l3_vir_ukrs[ BLIS_NUM_LEVEL3_UKRS ]; func_t l3_nat_ukrs[ BLIS_NUM_LEVEL3_UKRS ]; mbool_t l3_nat_ukrs_prefs[ BLIS_NUM_LEVEL3_UKRS ]; blksz_t l3_sup_thresh[ BLIS_NUM_THRESH ]; void* l3_sup_handlers[ BLIS_NUM_LEVEL3_OPS ]; blksz_t l3_sup_blkszs[ BLIS_NUM_BLKSZS ]; func_t l3_sup_kers[ BLIS_NUM_3OP_RC_COMBOS ]; mbool_t l3_sup_kers_prefs[ BLIS_NUM_3OP_RC_COMBOS ]; func_t l1f_kers[ BLIS_NUM_LEVEL1F_KERS ]; func_t l1v_kers[ BLIS_NUM_LEVEL1V_KERS ]; func_t packm_kers[ BLIS_NUM_PACKM_KERS ]; func_t unpackm_kers[ BLIS_NUM_UNPACKM_KERS ]; ind_t method; pack_t schema_a_block; pack_t schema_b_panel; pack_t schema_c_panel; } cntx_t; // -- Runtime type -- // NOTE: The order of these fields must be kept consistent with the definition // of the BLIS_RNTM_INITIALIZER macro in bli_rntm.h. typedef struct rntm_s { // "External" fields: these may be queried by the end-user. dim_t num_threads; dim_t thrloop[ BLIS_NUM_LOOPS ]; bool_t pack_a; // enable/disable packing of left-hand matrix A. bool_t pack_b; // enable/disable packing of right-hand matrix B. bool_t l3_sup; // enable/disable small matrix handling in level-3 ops. // "Internal" fields: these should not be exposed to the end-user. // The small block pool, which is attached in the l3 thread decorator. pool_t* sba_pool; // The packing block allocator, which is attached in the l3 thread decorator. membrk_t* membrk; } rntm_t; // -- Error types -- typedef enum { BLIS_NO_ERROR_CHECKING = 0, BLIS_FULL_ERROR_CHECKING } errlev_t; typedef enum { // Generic error codes BLIS_SUCCESS = ( -1), BLIS_FAILURE = ( -2), BLIS_ERROR_CODE_MIN = ( -9), // General errors BLIS_INVALID_ERROR_CHECKING_LEVEL = ( -10), BLIS_UNDEFINED_ERROR_CODE = ( -11), BLIS_NULL_POINTER = ( -12), BLIS_NOT_YET_IMPLEMENTED = ( -13), // Parameter-specific errors BLIS_INVALID_SIDE = ( -20), BLIS_INVALID_UPLO = ( -21), BLIS_INVALID_TRANS = ( -22), BLIS_INVALID_CONJ = ( -23), BLIS_INVALID_DIAG = ( -24), BLIS_INVALID_MACHVAL = ( -25), BLIS_EXPECTED_NONUNIT_DIAG = ( -26), // Datatype-specific errors BLIS_INVALID_DATATYPE = ( -30), BLIS_EXPECTED_FLOATING_POINT_DATATYPE = ( -31), BLIS_EXPECTED_NONINTEGER_DATATYPE = ( -32), BLIS_EXPECTED_NONCONSTANT_DATATYPE = ( -33), BLIS_EXPECTED_REAL_DATATYPE = ( -34), BLIS_EXPECTED_INTEGER_DATATYPE = ( -35), BLIS_INCONSISTENT_DATATYPES = ( -36), BLIS_EXPECTED_REAL_PROJ_OF = ( -37), BLIS_EXPECTED_REAL_VALUED_OBJECT = ( -38), BLIS_INCONSISTENT_PRECISIONS = ( -39), // Dimension-specific errors BLIS_NONCONFORMAL_DIMENSIONS = ( -40), BLIS_EXPECTED_SCALAR_OBJECT = ( -41), BLIS_EXPECTED_VECTOR_OBJECT = ( -42), BLIS_UNEQUAL_VECTOR_LENGTHS = ( -43), BLIS_EXPECTED_SQUARE_OBJECT = ( -44), BLIS_UNEXPECTED_OBJECT_LENGTH = ( -45), BLIS_UNEXPECTED_OBJECT_WIDTH = ( -46), BLIS_UNEXPECTED_VECTOR_DIM = ( -47), BLIS_UNEXPECTED_DIAG_OFFSET = ( -48), BLIS_NEGATIVE_DIMENSION = ( -49), // Stride-specific errors BLIS_INVALID_ROW_STRIDE = ( -50), BLIS_INVALID_COL_STRIDE = ( -51), BLIS_INVALID_DIM_STRIDE_COMBINATION = ( -52), // Structure-specific errors BLIS_EXPECTED_GENERAL_OBJECT = ( -60), BLIS_EXPECTED_HERMITIAN_OBJECT = ( -61), BLIS_EXPECTED_SYMMETRIC_OBJECT = ( -62), BLIS_EXPECTED_TRIANGULAR_OBJECT = ( -63), // Storage-specific errors BLIS_EXPECTED_UPPER_OR_LOWER_OBJECT = ( -70), // Partitioning-specific errors BLIS_INVALID_3x1_SUBPART = ( -80), BLIS_INVALID_1x3_SUBPART = ( -81), BLIS_INVALID_3x3_SUBPART = ( -82), // Control tree-specific errors BLIS_UNEXPECTED_NULL_CONTROL_TREE = ( -90), // Packing-specific errors BLIS_PACK_SCHEMA_NOT_SUPPORTED_FOR_UNPACK = (-100), // Buffer-specific errors BLIS_EXPECTED_NONNULL_OBJECT_BUFFER = (-110), // Memory errors BLIS_MALLOC_RETURNED_NULL = (-120), // Internal memory pool errors BLIS_INVALID_PACKBUF = (-130), BLIS_EXHAUSTED_CONTIG_MEMORY_POOL = (-131), BLIS_INSUFFICIENT_STACK_BUF_SIZE = (-132), BLIS_ALIGNMENT_NOT_POWER_OF_TWO = (-133), BLIS_ALIGNMENT_NOT_MULT_OF_PTR_SIZE = (-134), // Object-related errors BLIS_EXPECTED_OBJECT_ALIAS = (-140), // Architecture-related errors BLIS_INVALID_ARCH_ID = (-150), // Blocksize-related errors BLIS_MC_DEF_NONMULTIPLE_OF_MR = (-160), BLIS_MC_MAX_NONMULTIPLE_OF_MR = (-161), BLIS_NC_DEF_NONMULTIPLE_OF_NR = (-162), BLIS_NC_MAX_NONMULTIPLE_OF_NR = (-163), BLIS_KC_DEF_NONMULTIPLE_OF_KR = (-164), BLIS_KC_MAX_NONMULTIPLE_OF_KR = (-165), BLIS_ERROR_CODE_MAX = (-170) } err_t; #endif blis-0.6.1/frame/include/bli_x86_asm_macros.h000066400000000000000000001320451360743507500210330ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2018, The University of Texas at Austin Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_X86_ASM_MACROS_H #define BLIS_X86_ASM_MACROS_H // // Assembly macros to make inline x86 with AT&T syntax somewhat less painful // // "Private" macros end with _ // // Default syntax is Intel #if !defined(BLIS_ASM_SYNTAX_ATT) && !defined(BLIS_ASM_SYNTAX_INTEL) #define BLIS_ASM_SYNTAX_INTEL #endif #define STRINGIFY_(...) #__VA_ARGS__ #define GET_MACRO_(_1_,_2_,_3_,_4_,NAME,...) NAME #if (defined(_WIN32) && !defined(__clang__) && !defined(__MINGW32__)) || defined(__MIC__) // Intel-style assembly blocks #define BEGIN_ASM __asm { #define END_ASM(...) } #ifdef BLIS_ASM_SYNTAX_INTEL #define INSTR_4_(name,_0,_1,_2,_3) name _0,_1,_2,_3 #define INSTR_3_(name,_0,_1,_2) name _0,_1,_2 #define INSTR_2_(name,_0,_1) name _0,_1 #define INSTR_1_(name,_0) name _0 #define INSTR_0_(name) name #else #define INSTR_4_(name,_0,_1,_2,_3) name _3,_2,_1,_0 #define INSTR_3_(name,_0,_1,_2) name _2,_1,_0 #define INSTR_2_(name,_0,_1) name _1,_0 #define INSTR_1_(name,_0) name _0 #define INSTR_0_(name) name #endif #define LABEL(label) label: #define REGISTER_(r) r #define IMM(x) x #define VAR(x) x #define MASK_(x) {x} #define JMP_(insn, target) insn target #define MEM_4_(reg,off,scale,disp) [reg + off*scale + disp] #define MEM_3_(reg,off,scale) [reg + off*scale] #define MEM_2_(reg,disp) [reg + disp] #define MEM_1_(reg) [reg] #define ALIGN4 align 4 #define ALIGN8 align 8 #define ALIGN16 align 16 #define ALIGN32 align 32 #else // GCC extended assembly with AT&T syntax #define COMMENT_BEGIN "#" #define COMMENT_END #define BEGIN_ASM() __asm__ volatile ( #define END_ASM(...) __VA_ARGS__ ); #ifdef BLIS_ASM_SYNTAX_ATT #define INSTR_4_(name,_0,_1,_2,_3) STRINGIFY_(name) " " STRINGIFY_(_0,_1,_2,_3) "\n\t" #define INSTR_3_(name,_0,_1,_2) STRINGIFY_(name) " " STRINGIFY_(_0,_1,_2) "\n\t" #define INSTR_2_(name,_0,_1) STRINGIFY_(name) " " STRINGIFY_(_0,_1) "\n\t" #define INSTR_1_(name,_0) STRINGIFY_(name) " " STRINGIFY_(_0) "\n\t" #define INSTR_0_(name) STRINGIFY_(name) "\n\t" #else #define INSTR_4_(name,_0,_1,_2,_3) STRINGIFY_(name) " " STRINGIFY_(_3,_2,_1,_0) "\n\t" #define INSTR_3_(name,_0,_1,_2) STRINGIFY_(name) " " STRINGIFY_(_2,_1,_0) "\n\t" #define INSTR_2_(name,_0,_1) STRINGIFY_(name) " " STRINGIFY_(_1,_0) "\n\t" #define INSTR_1_(name,_0) STRINGIFY_(name) " " STRINGIFY_(_0) "\n\t" #define INSTR_0_(name) STRINGIFY_(name) "\n\t" #endif #if BLIS_OS_OSX #define LABEL_(label) "L" STRINGIFY_(label) "%=" #else #define LABEL_(label) ".L" STRINGIFY_(label) "%=" #endif #define REGISTER_(r) %%r #define IMM(x) $##x #define VAR(x) %[x] #define MASK_(x) %{x%} #define LABEL(target) LABEL_(target) ":\n\t" #define JMP_(insn, target) STRINGIFY_(insn) " " LABEL_(target) "\n\t" #define MEM_4_(reg,off,scale,disp) disp(reg,off,scale) #define MEM_3_(reg,off,scale) (reg,off,scale) #define MEM_2_(reg,disp) disp(reg) #define MEM_1_(reg) (reg) #define ALIGN4 ".p2align 2 \n\t" #define ALIGN8 ".p2align 3 \n\t" #define ALIGN16 ".p2align 4 \n\t" #define ALIGN32 ".p2align 5 \n\t" #endif #define begin_asm() BEGIN_ASM() #define end_asm(...) END_ASM(__VA_ARGS__) #define label(...) LABEL(__VA_ARGS__) #define imm(...) IMM(__VA_ARGS__) #define var(...) VAR(__VA_ARGS__) #define align16 ALIGN16 #define align32 ALIGN32 // General-purpose registers #define AL REGISTER_(al) #define AH REGISTER_(ah) #define BL REGISTER_(bl) #define BH REGISTER_(bh) #define CL REGISTER_(cl) #define CH REGISTER_(ch) #define DL REGISTER_(dl) #define DH REGISTER_(dh) #define R8B REGISTER_(r8b) #define R9B REGISTER_(r9b) #define R10B REGISTER_(r10b) #define R11B REGISTER_(r11b) #define R12B REGISTER_(r12b) #define R13B REGISTER_(r13b) #define R14B REGISTER_(r14b) #define R15B REGISTER_(r15b) #define al AL #define ah AH #define bl BL #define bh BH #define cl CL #define ch CH #define dl DL #define dh DH #define r8b R8B #define r9b R9B #define r10b R10B #define r11b R11B #define r12b R12B #define r13b R13B #define r14b R14B #define r15b R15B #define AX REGISTER_(ax) #define BX REGISTER_(bx) #define CX REGISTER_(cx) #define DX REGISTER_(dx) #define SI REGISTER_(si) #define DI REGISTER_(di) #define BP REGISTER_(bp) #define SP REGISTER_(sp) #define R8W REGISTER_(r8w) #define R9W REGISTER_(r9w) #define R10W REGISTER_(r10w) #define R11W REGISTER_(r11w) #define R12W REGISTER_(r12w) #define R13W REGISTER_(r13w) #define R14W REGISTER_(r14w) #define R15W REGISTER_(r15w) #define ax AX #define bx BX #define cx CX #define dx DX #define si SI #define di DI #define bp BP #define sp SP #define r8w R8W #define r9w R9W #define r10w R10W #define r11w R11W #define r12w R12W #define r13w R13W #define r14w R14W #define r15w R15W #define EAX REGISTER_(eax) #define EBX REGISTER_(ebx) #define ECX REGISTER_(ecx) #define EDX REGISTER_(edx) #define ESP REGISTER_(esp) #define EBP REGISTER_(ebp) #define EDI REGISTER_(edi) #define ESI REGISTER_(esi) #define R8D REGISTER_(r8d) #define R9D REGISTER_(r9d) #define R10D REGISTER_(r10d) #define R11D REGISTER_(r11d) #define R12D REGISTER_(r12d) #define R13D REGISTER_(r13d) #define R14D REGISTER_(r14d) #define R15D REGISTER_(r15d) #define eax EAX #define ebx EBX #define ecx ECX #define edx EDX #define esp ESP #define ebp EBP #define edi EDI #define esi ESI #define r8d R8D #define r9d R9D #define r10d R10D #define r11d R11D #define r12d R12D #define r13d R13D #define r14d R14D #define r15d R15D #define RAX REGISTER_(rax) #define RBX REGISTER_(rbx) #define RCX REGISTER_(rcx) #define RDX REGISTER_(rdx) #define RSP REGISTER_(rsp) #define RBP REGISTER_(rbp) #define RDI REGISTER_(rdi) #define RSI REGISTER_(rsi) #define R8 REGISTER_(r8) #define R9 REGISTER_(r9) #define R10 REGISTER_(r10) #define R11 REGISTER_(r11) #define R12 REGISTER_(r12) #define R13 REGISTER_(r13) #define R14 REGISTER_(r14) #define R15 REGISTER_(r15) #define rax RAX #define rbx RBX #define rcx RCX #define rdx RDX #define rsp RSP #define rbp RBP #define rdi RDI #define rsi RSI #define r8 R8 #define r9 R9 #define r10 R10 #define r11 R11 #define r12 R12 #define r13 R13 #define r14 R14 #define r15 R15 // Vector registers #define XMM(x) REGISTER_(Xmm##x) #define YMM(x) REGISTER_(Ymm##x) #define ZMM(x) REGISTER_(Zmm##x) #define K(x) REGISTER_(k##x) #define MASK_K(n) MASK_(K(n)) #define MASK_KZ(n) MASK_(K(n))MASK_(z) #define xmm(x) XMM(x) #define ymm(x) YMM(x) #define zmm(x) ZMM(x) #define k(x) K(x) #define mask_k(x) MASK_K(x) #define mask_kz(x) MASK_KZ(x) #define XMM0 XMM(0) #define XMM1 XMM(1) #define XMM2 XMM(2) #define XMM3 XMM(3) #define XMM4 XMM(4) #define XMM5 XMM(5) #define XMM6 XMM(6) #define XMM7 XMM(7) #define XMM8 XMM(8) #define XMM9 XMM(9) #define XMM10 XMM(10) #define XMM11 XMM(11) #define XMM12 XMM(12) #define XMM13 XMM(13) #define XMM14 XMM(14) #define XMM15 XMM(15) #define XMM16 XMM(16) #define XMM17 XMM(17) #define XMM18 XMM(18) #define XMM19 XMM(19) #define XMM20 XMM(20) #define XMM21 XMM(21) #define XMM22 XMM(22) #define XMM23 XMM(23) #define XMM24 XMM(24) #define XMM25 XMM(25) #define XMM26 XMM(26) #define XMM27 XMM(27) #define XMM28 XMM(28) #define XMM29 XMM(29) #define XMM30 XMM(30) #define XMM31 XMM(31) #define YMM0 YMM(0) #define YMM1 YMM(1) #define YMM2 YMM(2) #define YMM3 YMM(3) #define YMM4 YMM(4) #define YMM5 YMM(5) #define YMM6 YMM(6) #define YMM7 YMM(7) #define YMM8 YMM(8) #define YMM9 YMM(9) #define YMM10 YMM(10) #define YMM11 YMM(11) #define YMM12 YMM(12) #define YMM13 YMM(13) #define YMM14 YMM(14) #define YMM15 YMM(15) #define YMM16 YMM(16) #define YMM17 YMM(17) #define YMM18 YMM(18) #define YMM19 YMM(19) #define YMM20 YMM(20) #define YMM21 YMM(21) #define YMM22 YMM(22) #define YMM23 YMM(23) #define YMM24 YMM(24) #define YMM25 YMM(25) #define YMM26 YMM(26) #define YMM27 YMM(27) #define YMM28 YMM(28) #define YMM29 YMM(29) #define YMM30 YMM(30) #define YMM31 YMM(31) #define ZMM0 ZMM(0) #define ZMM1 ZMM(1) #define ZMM2 ZMM(2) #define ZMM3 ZMM(3) #define ZMM4 ZMM(4) #define ZMM5 ZMM(5) #define ZMM6 ZMM(6) #define ZMM7 ZMM(7) #define ZMM8 ZMM(8) #define ZMM9 ZMM(9) #define ZMM10 ZMM(10) #define ZMM11 ZMM(11) #define ZMM12 ZMM(12) #define ZMM13 ZMM(13) #define ZMM14 ZMM(14) #define ZMM15 ZMM(15) #define ZMM16 ZMM(16) #define ZMM17 ZMM(17) #define ZMM18 ZMM(18) #define ZMM19 ZMM(19) #define ZMM20 ZMM(20) #define ZMM21 ZMM(21) #define ZMM22 ZMM(22) #define ZMM23 ZMM(23) #define ZMM24 ZMM(24) #define ZMM25 ZMM(25) #define ZMM26 ZMM(26) #define ZMM27 ZMM(27) #define ZMM28 ZMM(28) #define ZMM29 ZMM(29) #define ZMM30 ZMM(30) #define ZMM31 ZMM(31) #define xmm0 xmm(0) #define xmm1 xmm(1) #define xmm2 xmm(2) #define xmm3 xmm(3) #define xmm4 xmm(4) #define xmm5 xmm(5) #define xmm6 xmm(6) #define xmm7 xmm(7) #define xmm8 xmm(8) #define xmm9 xmm(9) #define xmm10 xmm(10) #define xmm11 xmm(11) #define xmm12 xmm(12) #define xmm13 xmm(13) #define xmm14 xmm(14) #define xmm15 xmm(15) #define xmm16 xmm(16) #define xmm17 xmm(17) #define xmm18 xmm(18) #define xmm19 xmm(19) #define xmm20 xmm(20) #define xmm21 xmm(21) #define xmm22 xmm(22) #define xmm23 xmm(23) #define xmm24 xmm(24) #define xmm25 xmm(25) #define xmm26 xmm(26) #define xmm27 xmm(27) #define xmm28 xmm(28) #define xmm29 xmm(29) #define xmm30 xmm(30) #define xmm31 xmm(31) #define ymm0 ymm(0) #define ymm1 ymm(1) #define ymm2 ymm(2) #define ymm3 ymm(3) #define ymm4 ymm(4) #define ymm5 ymm(5) #define ymm6 ymm(6) #define ymm7 ymm(7) #define ymm8 ymm(8) #define ymm9 ymm(9) #define ymm10 ymm(10) #define ymm11 ymm(11) #define ymm12 ymm(12) #define ymm13 ymm(13) #define ymm14 ymm(14) #define ymm15 ymm(15) #define ymm16 ymm(16) #define ymm17 ymm(17) #define ymm18 ymm(18) #define ymm19 ymm(19) #define ymm20 ymm(20) #define ymm21 ymm(21) #define ymm22 ymm(22) #define ymm23 ymm(23) #define ymm24 ymm(24) #define ymm25 ymm(25) #define ymm26 ymm(26) #define ymm27 ymm(27) #define ymm28 ymm(28) #define ymm29 ymm(29) #define ymm30 ymm(30) #define ymm31 ymm(31) #define zmm0 zmm(0) #define zmm1 zmm(1) #define zmm2 zmm(2) #define zmm3 zmm(3) #define zmm4 zmm(4) #define zmm5 zmm(5) #define zmm6 zmm(6) #define zmm7 zmm(7) #define zmm8 zmm(8) #define zmm9 zmm(9) #define zmm10 zmm(10) #define zmm11 zmm(11) #define zmm12 zmm(12) #define zmm13 zmm(13) #define zmm14 zmm(14) #define zmm15 zmm(15) #define zmm16 zmm(16) #define zmm17 zmm(17) #define zmm18 zmm(18) #define zmm19 zmm(19) #define zmm20 zmm(20) #define zmm21 zmm(21) #define zmm22 zmm(22) #define zmm23 zmm(23) #define zmm24 zmm(24) #define zmm25 zmm(25) #define zmm26 zmm(26) #define zmm27 zmm(27) #define zmm28 zmm(28) #define zmm29 zmm(29) #define zmm30 zmm(30) #define zmm31 zmm(31) // Memory access // MEM(rax) -> (%rax) or [rax] // MEM(rax,0x80) -> 0x80(%rax) or [rax + 0x80] // MEM(rax,rsi,4) -> (%rax,%rsi,4) or [rax + rsi*4] // MEM(rax,rsi,4,0x80) -> 0x80(%rax,%rsi,4) or [rax + rsi*4 + 0x80] #define MEM(...) GET_MACRO_(__VA_ARGS__,MEM_4_,MEM_3_,MEM_2_,MEM_1_)(__VA_ARGS__) #define MEM_1TO8(...) MEM(__VA_ARGS__) MASK_(1to8) #define MEM_1TO16(...) MEM(__VA_ARGS__) MASK_(1to16) #define MEM_BCAST(...) MEM(__VA_ARGS__) MASK_(b) #define mem(...) MEM(__VA_ARGS__) #define mem_1to8(...) MEM_1TO8(__VA_ARGS__) #define mem_1to16(...) MEM_1TO16(__VA_ARGS__) #define mem_bcast(...) MEM_BCAST(__VA_ARGS__) #define VAR_1TO8(...) VAR(__VA_ARGS__) MASK_(1to8) #define VAR_1TO16(...) VAR(__VA_ARGS__) MASK_(1to16) #define VAR_BCAST(...) VAR(__VA_ARGS__) MASK_(b) #define var_1to8(...) VAR_1TO8(__VA_ARGS__) #define var_1to16(...) VAR_1TO16(__VA_ARGS__) #define var_bcast(...) VAR_BCAST(__VA_ARGS__) // Instructions #define INSTR_(name,...) GET_MACRO_(__VA_ARGS__,INSTR_4_,INSTR_3_,INSTR_2_, \ INSTR_1_,INSTR_0_)(name,__VA_ARGS__) // Jumps #define JC(_0) JMP_(jc, _0) #define JB(_0) JC(_0) #define JNAE(_0) JC(_0) #define JNC(_0) JMP_(jnc, _0) #define JNB(_0) JNC(_0) #define JAE(_0) JNC(_0) #define jc(_0) JC(_0) #define jb(_0) JB(_0) #define jnae(_0) JNAE(_0) #define jnc(_0) JNC(_0) #define jnb(_0) JNB(_0) #define jae(_0) JAE(_0) #define JO(_0) JMP_(jo, _0) #define JNO(_0) JMP_(jno, _0) #define jo(_0) JO(_0) #define jno(_0) JNO(_0) #define JP(_0) JMP_(jp, _0) #define JPE(_0) JP(_0) #define JNP(_0) JMP_(jnp, _0) #define JPO(_0) JNP(_0) #define jp(_0) JP(_0) #define jpe(_0) JPE(_0) #define jnp(_0) JNP(_0) #define jpo(_0) JPO(_0) #define JS(_0) JMP_(js, _0) #define JNS(_0) JMP_(jns, _0) #define js(_0) JS(_0) #define jns(_0) JNS(_0) #define JA(_0) JMP_(ja, _0) #define JNBE(_0) JA(_0) #define JNA(_0) JMP_(jna, _0) #define JBE(_0) JNA(_0) #define ja(_0) JA(_0) #define jnbe(_0) JNBE(_0) #define jna(_0) JNA(_0) #define jbe(_0) JBE(_0) #define JL(_0) JMP_(jl, _0) #define JNGE(_0) JL(_0) #define JNL(_0) JMP_(jnl, _0) #define JGE(_0) JNL(_0) #define jl(_0) JL(_0) #define jnge(_0) JNGE(_0) #define jnl(_0) JNL(_0) #define jge(_0) JGE(_0) #define JG(_0) JMP_(jg, _0) #define JNLE(_0) JG(_0) #define JNG(_0) JMP_(jng, _0) #define JLE(_0) JNG(_0) #define jg(_0) JG(_0) #define jnle(_0) JNLE(_0) #define jng(_0) JNG(_0) #define jle(_0) JLE(_0) #define JE(_0) JMP_(je, _0) #define JZ(_0) JE(_0) #define JNE(_0) JMP_(jne, _0) #define JNZ(_0) JNE(_0) #define je(_0) JE(_0) #define jz(_0) JZ(_0) #define jne(_0) JNE(_0) #define jnz(_0) JNZ(_0) #define JMP(_0) JMP_(jmp, _0) #define jmp(_0) JMP(_0) #define SETE(_0) INSTR_(sete, _0) #define SETZ(_0) SETE(_0) #define sete(_0) SETE(_0) #define setz(_0) SETZ(_0) // Comparisons #define CMP(_0, _1) INSTR_(cmp, _0, _1) #define TEST(_0, _1) INSTR_(test, _0, _1) #define cmp(_0, _1) CMP(_0, _1) #define test(_0, _1) TEST(_0, _1) // Integer math #define AND(_0, _1) INSTR_(and, _0, _1) #define OR(_0, _1) INSTR_(or, _0, _1) #define XOR(_0, _1) INSTR_(xor, _0, _1) #define ADD(_0, _1) INSTR_(add, _0, _1) #define SUB(_0, _1) INSTR_(sub, _0, _1) #define IMUL(_0, _1) INSTR_(imul, _0, _1) #define SAL(...) INSTR_(sal, __VA_ARGS__) #define SAR(...) INSTR_(sar, __VA_ARGS__) #define SHLX(_0, _1, _2) INSTR_(shlx, _0, _1, _2) #define SHRX(_0, _1, _2) INSTR_(shrx, _0, _1, _2) #define RORX(_0, _1, _2) INSTR_(rorx, _0, _1, _2) #define DEC(_0) INSTR_(dec, _0) #define INC(_0) INSTR_(inc, _0) #define and(_0, _1) AND(_0, _1) #define or(_0, _1) OR(_0, _1) #define xor(_0, _1) XOR(_0, _1) #define add(_0, _1) ADD(_0, _1) #define sub(_0, _1) SUB(_0, _1) #define imul(_0, _1) IMUL(_0, _1) #define sal(...) SAL(__VA_ARGS__) #define sar(...) SAR(__VA_ARGS__) #define shlx(_0, _1, _2) SHLX(_0, _1, _2) #define shrx(_0, _1, _2) SHRX(_0, _1, _2) #define rorx(_0, _1, _2) RORX(_0, _1, _2) #define dec(_0) DEC(_0) #define inc(_0) INC(_0) // Memory access #define LEA(_0, _1) INSTR_(lea, _0, _1) #define MOV(_0, _1) INSTR_(mov, _0, _1) #define MOVD(_0, _1) INSTR_(movd, _0, _1) #define MOVL(_0, _1) INSTR_(movl, _0, _1) #define MOVQ(_0, _1) INSTR_(movq, _0, _1) #define CMOVA(_0, _1) INSTR_(cmova, _0, _1) #define CMOVAE(_0, _1) INSTR_(cmovae, _0, _1) #define CMOVB(_0, _1) INSTR_(cmovb, _0, _1) #define CMOVBE(_0, _1) INSTR_(cmovbe, _0, _1) #define CMOVC(_0, _1) INSTR_(cmovc, _0, _1) #define CMOVP(_0, _1) INSTR_(cmovp, _0, _1) #define CMOVO(_0, _1) INSTR_(cmovo, _0, _1) #define CMOVS(_0, _1) INSTR_(cmovs, _0, _1) #define CMOVE(_0, _1) INSTR_(cmove, _0, _1) #define CMOVZ(_0, _1) INSTR_(cmovz, _0, _1) #define CMOVG(_0, _1) INSTR_(cmovg, _0, _1) #define CMOVGE(_0, _1) INSTR_(cmovge, _0, _1) #define CMOVL(_0, _1) INSTR_(cmovl, _0, _1) #define CMOVLE(_0, _1) INSTR_(cmovle, _0, _1) #define CMOVNA(_0, _1) INSTR_(cmovna, _0, _1) #define CMOVNAE(_0, _1) INSTR_(cmovnae, _0, _1) #define CMOVNB(_0, _1) INSTR_(cmovnb, _0, _1) #define CMOVNBE(_0, _1) INSTR_(cmovnbe, _0, _1) #define CMOVNC(_0, _1) INSTR_(cmovnc, _0, _1) #define CMOVNP(_0, _1) INSTR_(cmovnp, _0, _1) #define CMOVNO(_0, _1) INSTR_(cmovno, _0, _1) #define CMOVNS(_0, _1) INSTR_(cmovns, _0, _1) #define CMOVNE(_0, _1) INSTR_(cmovne, _0, _1) #define CMOVNZ(_0, _1) INSTR_(cmovnz, _0, _1) #define CMOVNG(_0, _1) INSTR_(cmovng, _0, _1) #define CMOVNGE(_0, _1) INSTR_(cmovnge, _0, _1) #define CMOVNL(_0, _1) INSTR_(cmovnl, _0, _1) #define CMOVNLE(_0, _1) INSTR_(cmovnle, _0, _1) #define lea(_0, _1) LEA(_0, _1) #define mov(_0, _1) MOV(_0, _1) #define movd(_0, _1) MOVD(_0, _1) #define movl(_0, _1) MOVL(_0, _1) #define movq(_0, _1) MOVQ(_0, _1) #define cmova(_0, _1) CMOVA(_0, _1) #define cmovae(_0, _1) CMOVAE(_0, _1) #define cmovb(_0, _1) CMOVB(_0, _1) #define cmovbe(_0, _1) CMOVBE(_0, _1) #define cmovc(_0, _1) CMOVC(_0, _1) #define cmovp(_0, _1) CMOVP(_0, _1) #define cmovo(_0, _1) CMOVO(_0, _1) #define cmovs(_0, _1) CMOVS(_0, _1) #define cmove(_0, _1) CMOVE(_0, _1) #define cmovz(_0, _1) CMOVZ(_0, _1) #define cmovg(_0, _1) CMOVG(_0, _1) #define cmovge(_0, _1) CMOVGE(_0, _1) #define cmovl(_0, _1) CMOVL(_0, _1) #define cmovle(_0, _1) CMOVLE(_0, _1) #define cmovna(_0, _1) CMOVNA(_0, _1) #define cmovnae(_0, _1) CMOVNAE(_0, _1) #define cmovnb(_0, _1) CMOVNB(_0, _1) #define cmovnbe(_0, _1) CMOVNBE(_0, _1) #define cmovnc(_0, _1) CMOVNC(_0, _1) #define cmovnp(_0, _1) CMOVNP(_0, _1) #define cmovno(_0, _1) CMOVNO(_0, _1) #define cmovns(_0, _1) CMOVNS(_0, _1) #define cmovne(_0, _1) CMOVNE(_0, _1) #define cmovnz(_0, _1) CMOVNZ(_0, _1) #define cmovng(_0, _1) CMOVNG(_0, _1) #define cmovnge(_0, _1) CMOVNGE(_0, _1) #define cmovnl(_0, _1) CMOVNL(_0, _1) #define cmovnle(_0, _1) CMOVNLE(_0, _1) // Vector moves #define MOVSS(_0, _1) INSTR_(movss, _0, _1) #define MOVSD(_0, _1) INSTR_(movsd, _0, _1) #define MOVAPS(_0, _1) INSTR_(movaps, _0, _1) #define MOVAPD(_0, _1) INSTR_(movaps, _0, _1) //use movaps because it is shorter #define MOVDDUP(_0, _1) INSTR_(movddup, _0, _1) #define MOVLPS(_0, _1) INSTR_(movlps, _0, _1) #define MOVHPS(_0, _1) INSTR_(movhps, _0, _1) #define MOVLPD(_0, _1) INSTR_(movlpd, _0, _1) #define MOVHPD(_0, _1) INSTR_(movhpd, _0, _1) #define movss(_0, _1) MOVSS(_0, _1) #define movsd(_0, _1) MOVSD(_0, _1) #define movaps(_0, _1) MOVAPS(_0, _1) #define movapd(_0, _1) MOVAPD(_0, _1) #define movddup(_0, _1) MOVDDUP(_0, _1) #define movlps(_0, _1) MOVLPS(_0, _1) #define movhps(_0, _1) MOVHPS(_0, _1) #define movlpd(_0, _1) MOVLPD(_0, _1) #define movhpd(_0, _1) MOVHPD(_0, _1) #define VMOVDDUP(_0, _1) INSTR_(vmovddup, _0, _1) #define VMOVSLDUP(_0, _1) INSTR_(vmovsldup, _0, _1) #define VMOVSHDUP(_0, _1) INSTR_(vmovshdup, _0, _1) #define VMOVD(_0, _1) INSTR_(vmovd, _0, _1) #define VMOVQ(_0, _1) INSTR_(vmovq, _0, _1) #define VMOVSS(_0, _1) INSTR_(vmovss, _0, _1) #define VMOVSD(_0, _1) INSTR_(vmovsd, _0, _1) #define VMOVAPS(_0, _1) INSTR_(vmovaps, _0, _1) #define VMOVUPS(_0, _1) INSTR_(vmovups, _0, _1) #define VMOVAPD(_0, _1) INSTR_(vmovapd, _0, _1) #define VMOVUPD(_0, _1) INSTR_(vmovupd, _0, _1) #define VMOVLPS(...) INSTR_(vmovlps, __VA_ARGS__) #define VMOVHPS(...) INSTR_(vmovhps, __VA_ARGS__) #define VMOVLPD(...) INSTR_(vmovlpd, __VA_ARGS__) #define VMOVHPD(...) INSTR_(vmovhpd, __VA_ARGS__) #define VMOVDQA(_0, _1) INSTR_(vmovdqa, _0, _1) #define VMOVDQA32(_0, _1) INSTR_(vmovdqa32, _0, _1) #define VMOVDQA64(_0, _1) INSTR_(vmovdqa64, _0, _1) #define VBROADCASTSS(_0, _1) INSTR_(vbroadcastss, _0, _1) #define VBROADCASTSD(_0, _1) INSTR_(vbroadcastsd, _0, _1) #define VPBROADCASTD(_0, _1) INSTR_(vpbroadcastd, _0, _1) #define VPBROADCASTQ(_0, _1) INSTR_(vpbroadcastq, _0, _1) #define VBROADCASTF128(_0, _1) INSTR_(vbroadcastf128, _0, _1) #define VBROADCASTF64X4(_0, _1) INSTR_(vbroadcastf64x4, _0, _1) #define VGATHERDPS(...) INSTR_(vgatherdps, __VA_ARGS__) #define VSCATTERDPS(_0, _1) INSTR_(vscatterdps, _0, _1) #define VGATHERDPD(...) INSTR_(vgatherdpd, __VA_ARGS__) #define VSCATTERDPD(_0, _1) INSTR_(vscatterdpd, _0, _1) #define VGATHERQPS(...) INSTR_(vgatherqps, __VA_ARGS__) #define VSCATTERQPS(_0, _1) INSTR_(vscatterqps, _0, _1) #define VGATHERQPD(...) INSTR_(vgatherqpd, __VA_ARGS__) #define VSCATTERQPD(_0, _1) INSTR_(vscatterqpd, _0, _1) #define vmovddup(_0, _1) VMOVDDUP(_0, _1) #define vmovsldup(_0, _1) VMOVSLDUP(_0, _1) #define vmovshdup(_0, _1) VMOVSHDUP(_0, _1) #define vmovd(_0, _1) VMOVD(_0, _1) #define vmovq(_0, _1) VMOVQ(_0, _1) #define vmovss(_0, _1) VMOVSS(_0, _1) #define vmovsd(_0, _1) VMOVSD(_0, _1) #define vmovaps(_0, _1) VMOVAPS(_0, _1) #define vmovups(_0, _1) VMOVUPS(_0, _1) #define vmovapd(_0, _1) VMOVAPD(_0, _1) #define vmovupd(_0, _1) VMOVUPD(_0, _1) #define vmovlps(...) VMOVLPS(__VA_ARGS__) #define vmovhps(...) VMOVHPS(__VA_ARGS__) #define vmovlpd(...) VMOVLPD(__VA_ARGS__) #define vmovhpd(...) VMOVHPD(__VA_ARGS__) #define vmovdqa(_0, _1) VMOVDQA(_0, _1) #define vmovdqa32(_0, _1) VMOVDQA32(_0, _1) #define vmovdqa64(_0, _1) VMOVDQA64(_0, _1) #define vbroadcastss(_0, _1) VBROADCASTSS(_0, _1) #define vbroadcastsd(_0, _1) VBROADCASTSD(_0, _1) #define vpbroadcastd(_0, _1) VPBROADCASTD(_0, _1) #define vpbroadcastq(_0, _1) VPBROADCASTQ(_0, _1) #define vbroadcastf128(_0, _1) VBROADCASTF128(_0, _1) #define vbroadcastf64x4(_0, _1) VBROADCASTF64X4(_0, _1) #define vgatherdps(...) VGATHERDPS(__VA_ARGS__) #define vscatterdps(_0, _1) VSCATTERDPS(_0, _1) #define vgatherdpd(...) VGATHERDPD(__VA_ARGS__) #define vscatterdpd(_0, _1) VSCATTERDPD(_0, _1) #define vgatherqps(...) VGATHERQPS(__VA_ARGS__) #define vscatterqps(_0, _1) VSCATTERQPS(_0, _1) #define vgatherqpd(...) VGATHERQPD(__VA_ARGS__) #define vscatterqpd(_0, _1) VSCATTERQPD(_0, _1) // Vector comparisons #define VPCMPEQB(_0, _1, _2) INSTR_(vpcmpeqb, _0, _1, _2) #define VPCMPEQW(_0, _1, _2) INSTR_(vpcmpeqw, _0, _1, _2) #define VPCMPEQD(_0, _1, _2) INSTR_(vpcmpeqd, _0, _1, _2) #define vpcmpeqb(_0, _1, _2) VPCMPEQB(_0, _1, _2) #define vpcmpeqw(_0, _1, _2) VPCMPEQW(_0, _1, _2) #define vpcmpeqd(_0, _1, _2) VPCMPEQD(_0, _1, _2) // Vector integer math #define VPADDB(_0, _1, _2) INSTR_(vpaddb, _0, _1, _2) #define VPADDW(_0, _1, _2) INSTR_(vpaddw, _0, _1, _2) #define VPADDD(_0, _1, _2) INSTR_(vpaddd, _0, _1, _2) #define VPADDQ(_0, _1, _2) INSTR_(vpaddq, _0, _1, _2) #define vpaddb(_0, _1, _2) VPADDB(_0, _1, _2) #define vpaddw(_0, _1, _2) VPADDW(_0, _1, _2) #define vpaddd(_0, _1, _2) VPADDD(_0, _1, _2) #define vpaddq(_0, _1, _2) VPADDQ(_0, _1, _2) // Vector math #define ADDPS(_0, _1) INSTR_(addps, _0, _1) #define ADDPD(_0, _1) INSTR_(addpd, _0, _1) #define SUBPS(_0, _1) INSTR_(subps, _0, _1) #define SUBPD(_0, _1) INSTR_(subpd, _0, _1) #define MULPS(_0, _1) INSTR_(mulps, _0, _1) #define MULPD(_0, _1) INSTR_(mulpd, _0, _1) #define XORPS(_0, _1) INSTR_(xorps, _0, _1) #define XORPD(_0, _1) INSTR_(xorpd, _0, _1) #define UCOMISS(_0, _1) INSTR_(ucomiss, _0, _1) #define UCOMISD(_0, _1) INSTR_(ucomisd, _0, _1) #define COMISS(_0, _1) INSTR_(comiss, _0, _1) #define COMISD(_0, _1) INSTR_(comisd, _0, _1) #define addps(_0, _1) ADDPS(_0, _1) #define addpd(_0, _1) ADDPD(_0, _1) #define subps(_0, _1) SUBPS(_0, _1) #define subpd(_0, _1) SUBPD(_0, _1) #define mulps(_0, _1) MULPS(_0, _1) #define mulpd(_0, _1) MULPD(_0, _1) #define xorps(_0, _1) XORPS(_0, _1) #define xorpd(_0, _1) XORPD(_0, _1) #define ucomiss(_0, _1) UCOMISS(_0, _1) #define ucomisd(_0, _1) UCOMISD(_0, _1) #define cmoiss(_0, _1) COMISS(_0, _1) #define comisd(_0, _1) COMISD(_0, _1) #define VADDSUBPS(_0, _1, _2) INSTR_(vaddsubps, _0, _1, _2) #define VADDSUBPD(_0, _1, _2) INSTR_(vaddsubpd, _0, _1, _2) #define VHADDPD(_0, _1, _2) INSTR_(vhaddpd, _0, _1, _2) #define VHADDPS(_0, _1, _2) INSTR_(vhaddps, _0, _1, _2) #define VUCOMISS(_0, _1) INSTR_(vucomiss, _0, _1) #define VUCOMISD(_0, _1) INSTR_(vucomisd, _0, _1) #define VCOMISS(_0, _1) INSTR_(vcomiss, _0, _1) #define VCOMISD(_0, _1) INSTR_(vcomisd, _0, _1) #define VADDPS(_0, _1, _2) INSTR_(vaddps, _0, _1, _2) #define VADDPD(_0, _1, _2) INSTR_(vaddpd, _0, _1, _2) #define VSUBPS(_0, _1, _2) INSTR_(vsubps, _0, _1, _2) #define VSUBPD(_0, _1, _2) INSTR_(vsubpd, _0, _1, _2) #define VMULSS(_0, _1, _2) INSTR_(vmulss, _0, _1, _2) #define VMULSD(_0, _1, _2) INSTR_(vmulsd, _0, _1, _2) #define VMULPS(_0, _1, _2) INSTR_(vmulps, _0, _1, _2) #define VMULPD(_0, _1, _2) INSTR_(vmulpd, _0, _1, _2) #define VPMULLD(_0, _1, _2) INSTR_(vpmulld, _0, _1, _2) #define VPMULLQ(_0, _1, _2) INSTR_(vpmullq, _0, _1, _2) #define VPADDD(_0, _1, _2) INSTR_(vpaddd, _0, _1, _2) #define VPSLLD(_0, _1, _2) INSTR_(vpslld, _0, _1, _2) #define VXORPS(_0, _1, _2) INSTR_(vxorps, _0, _1, _2) #define VXORPD(_0, _1, _2) INSTR_(vxorpd, _0, _1, _2) #define VPXORD(_0, _1, _2) INSTR_(vpxord, _0, _1, _2) #define VFMADD132SS(_0, _1, _2) INSTR_(vfmadd132ss, _0, _1, _2) #define VFMADD213SS(_0, _1, _2) INSTR_(vfmadd213ss, _0, _1, _2) #define VFMADD231SS(_0, _1, _2) INSTR_(vfmadd231ss, _0, _1, _2) #define VFMADD132SD(_0, _1, _2) INSTR_(vfmadd132sd, _0, _1, _2) #define VFMADD213SD(_0, _1, _2) INSTR_(vfmadd213sd, _0, _1, _2) #define VFMADD231SD(_0, _1, _2) INSTR_(vfmadd231sd, _0, _1, _2) #define VFMADD132PS(_0, _1, _2) INSTR_(vfmadd132ps, _0, _1, _2) #define VFMADD213PS(_0, _1, _2) INSTR_(vfmadd213ps, _0, _1, _2) #define VFMADD231PS(_0, _1, _2) INSTR_(vfmadd231ps, _0, _1, _2) #define VFMADD132PD(_0, _1, _2) INSTR_(vfmadd132pd, _0, _1, _2) #define VFMADD213PD(_0, _1, _2) INSTR_(vfmadd213pd, _0, _1, _2) #define VFMADD231PD(_0, _1, _2) INSTR_(vfmadd231pd, _0, _1, _2) #define VFMSUB132SS(_0, _1, _2) INSTR_(vfmsub132ss, _0, _1, _2) #define VFMSUB213SS(_0, _1, _2) INSTR_(vfmsub213ss, _0, _1, _2) #define VFMSUB231SS(_0, _1, _2) INSTR_(vfmsub231ss, _0, _1, _2) #define VFMSUB132SD(_0, _1, _2) INSTR_(vfmsub132sd, _0, _1, _2) #define VFMSUB213SD(_0, _1, _2) INSTR_(vfmsub213sd, _0, _1, _2) #define VFMSUB231SD(_0, _1, _2) INSTR_(vfmsub231sd, _0, _1, _2) #define VFMSUB132PS(_0, _1, _2) INSTR_(vfmsub132ps, _0, _1, _2) #define VFMSUB213PS(_0, _1, _2) INSTR_(vfmsub213ps, _0, _1, _2) #define VFMSUB231PS(_0, _1, _2) INSTR_(vfmsub231ps, _0, _1, _2) #define VFMSUB132PD(_0, _1, _2) INSTR_(vfmsub132pd, _0, _1, _2) #define VFMSUB213PD(_0, _1, _2) INSTR_(vfmsub213pd, _0, _1, _2) #define VFMSUB231PD(_0, _1, _2) INSTR_(vfmsub231pd, _0, _1, _2) #define VFNMADD132SS(_0, _1, _2) INSTR_(vfnmadd132ss, _0, _1, _2) #define VFNMADD213SS(_0, _1, _2) INSTR_(vfnmadd213ss, _0, _1, _2) #define VFNMADD231SS(_0, _1, _2) INSTR_(vfnmadd231ss, _0, _1, _2) #define VFNMADD132SD(_0, _1, _2) INSTR_(vfnmadd132sd, _0, _1, _2) #define VFNMADD213SD(_0, _1, _2) INSTR_(vfnmadd213sd, _0, _1, _2) #define VFNMADD231SD(_0, _1, _2) INSTR_(vfnmadd231sd, _0, _1, _2) #define VFNMADD132PS(_0, _1, _2) INSTR_(vfnmadd132ps, _0, _1, _2) #define VFNMADD213PS(_0, _1, _2) INSTR_(vfnmadd213ps, _0, _1, _2) #define VFNMADD231PS(_0, _1, _2) INSTR_(vfnmadd231ps, _0, _1, _2) #define VFNMADD132PD(_0, _1, _2) INSTR_(vfnmadd132pd, _0, _1, _2) #define VFNMADD213PD(_0, _1, _2) INSTR_(vfnmadd213pd, _0, _1, _2) #define VFNMADD231PD(_0, _1, _2) INSTR_(vfnmadd231pd, _0, _1, _2) #define VFNMSUB132SS(_0, _1, _2) INSTR_(vfnmsub132ss, _0, _1, _2) #define VFNMSUB213SS(_0, _1, _2) INSTR_(vfnmsub213ss, _0, _1, _2) #define VFNMSUB231SS(_0, _1, _2) INSTR_(vfnmsub231ss, _0, _1, _2) #define VFNMSUB132SD(_0, _1, _2) INSTR_(vfnmsub132sd, _0, _1, _2) #define VFNMSUB213SD(_0, _1, _2) INSTR_(vfnmsub213sd, _0, _1, _2) #define VFNMSUB231SD(_0, _1, _2) INSTR_(vfnmsub231sd, _0, _1, _2) #define VFNMSUB132PS(_0, _1, _2) INSTR_(vfnmsub132ps, _0, _1, _2) #define VFNMSUB213PS(_0, _1, _2) INSTR_(vfnmsub213ps, _0, _1, _2) #define VFNMSUB231PS(_0, _1, _2) INSTR_(vfnmsub231ps, _0, _1, _2) #define VFNMSUB132PD(_0, _1, _2) INSTR_(vfnmsub132pd, _0, _1, _2) #define VFNMSUB213PD(_0, _1, _2) INSTR_(vfnmsub213pd, _0, _1, _2) #define VFNMSUB231PD(_0, _1, _2) INSTR_(vfnmsub231pd, _0, _1, _2) #define VFMADDSUB132SS(_0, _1, _2) INSTR_(vfmaddsub132ss, _0, _1, _2) #define VFMADDSUB213SS(_0, _1, _2) INSTR_(vfmaddsub213ss, _0, _1, _2) #define VFMADDSUB231SS(_0, _1, _2) INSTR_(vfmaddsub231ss, _0, _1, _2) #define VFMADDSUB132SD(_0, _1, _2) INSTR_(vfmaddsub132sd, _0, _1, _2) #define VFMADDSUB213SD(_0, _1, _2) INSTR_(vfmaddsub213sd, _0, _1, _2) #define VFMADDSUB231SD(_0, _1, _2) INSTR_(vfmaddsub231sd, _0, _1, _2) #define VFMADDSUB132PS(_0, _1, _2) INSTR_(vfmaddsub132ps, _0, _1, _2) #define VFMADDSUB213PS(_0, _1, _2) INSTR_(vfmaddsub213ps, _0, _1, _2) #define VFMADDSUB231PS(_0, _1, _2) INSTR_(vfmaddsub231ps, _0, _1, _2) #define VFMADDSUB132PD(_0, _1, _2) INSTR_(vfmaddsub132pd, _0, _1, _2) #define VFMADDSUB213PD(_0, _1, _2) INSTR_(vfmaddsub213pd, _0, _1, _2) #define VFMADDSUB231PD(_0, _1, _2) INSTR_(vfmaddsub231pd, _0, _1, _2) #define VFMSUBADD132SS(_0, _1, _2) INSTR_(vfmsubadd132ss, _0, _1, _2) #define VFMSUBADD213SS(_0, _1, _2) INSTR_(vfmsubadd213ss, _0, _1, _2) #define VFMSUBADD231SS(_0, _1, _2) INSTR_(vfmsubadd231ss, _0, _1, _2) #define VFMSUBADD132SD(_0, _1, _2) INSTR_(vfmsubadd132sd, _0, _1, _2) #define VFMSUBADD213SD(_0, _1, _2) INSTR_(vfmsubadd213sd, _0, _1, _2) #define VFMSUBADD231SD(_0, _1, _2) INSTR_(vfmsubadd231sd, _0, _1, _2) #define VFMSUBADD132PS(_0, _1, _2) INSTR_(vfmsubadd132ps, _0, _1, _2) #define VFMSUBADD213PS(_0, _1, _2) INSTR_(vfmsubadd213ps, _0, _1, _2) #define VFMSUBADD231PS(_0, _1, _2) INSTR_(vfmsubadd231ps, _0, _1, _2) #define VFMSUBADD132PD(_0, _1, _2) INSTR_(vfmsubadd132pd, _0, _1, _2) #define VFMSUBADD213PD(_0, _1, _2) INSTR_(vfmsubadd213pd, _0, _1, _2) #define VFMSUBADD231PD(_0, _1, _2) INSTR_(vfmsubadd231pd, _0, _1, _2) #define VFMADDSS(_0, _1, _2, _3) INSTR_(vfmaddss, _0, _1, _2, _3) #define VFMADDSD(_0, _1, _2, _3) INSTR_(vfmaddsd, _0, _1, _2, _3) #define VFMADDPS(_0, _1, _2, _3) INSTR_(vfmaddps, _0, _1, _2, _3) #define VFMADDPD(_0, _1, _2, _3) INSTR_(vfmaddpd, _0, _1, _2, _3) #define VFMSUBSS(_0, _1, _2, _3) INSTR_(vfmsubss, _0, _1, _2, _3) #define VFMSUBSD(_0, _1, _2, _3) INSTR_(vfmsubsd, _0, _1, _2, _3) #define VFMSUBPS(_0, _1, _2, _3) INSTR_(vfmsubps, _0, _1, _2, _3) #define VFMSUBPD(_0, _1, _2, _3) INSTR_(vfmsubpd, _0, _1, _2, _3) #define VFNMADDSS(_0, _1, _2, _3) INSTR_(vfnmaddss, _0, _1, _2, _3) #define VFNMADDSD(_0, _1, _2, _3) INSTR_(vfnmaddsd, _0, _1, _2, _3) #define VFNMADDPS(_0, _1, _2, _3) INSTR_(vfnmaddps, _0, _1, _2, _3) #define VFNMADDPD(_0, _1, _2, _3) INSTR_(vfnmaddpd, _0, _1, _2, _3) #define VFNMSUBSS(_0, _1, _2, _3) INSTR_(vfnmsubss, _0, _1, _2, _3) #define VFNMSUBSD(_0, _1, _2, _3) INSTR_(vfnmsubsd, _0, _1, _2, _3) #define VFNMSUBPS(_0, _1, _2, _3) INSTR_(vfnmsubps, _0, _1, _2, _3) #define VFNMSUBPD(_0, _1, _2, _3) INSTR_(vfnmsubpd, _0, _1, _2, _3) #define VFMADDSUBSS(_0, _1, _2, _3) INSTR_(vfmaddsubss, _0, _1, _2, _3) #define VFMADDSUBSD(_0, _1, _2, _3) INSTR_(vfmaddsubsd, _0, _1, _2, _3) #define VFMADDSUBPS(_0, _1, _2, _3) INSTR_(vfmaddsubps, _0, _1, _2, _3) #define VFMADDSUBPD(_0, _1, _2, _3) INSTR_(vfmaddsubpd, _0, _1, _2, _3) #define VFMSUBADDSS(_0, _1, _2, _3) INSTR_(vfmsubaddss, _0, _1, _2, _3) #define VFMSUBADDSD(_0, _1, _2, _3) INSTR_(vfmsubaddsd, _0, _1, _2, _3) #define VFMSUBADDPS(_0, _1, _2, _3) INSTR_(vfmsubaddps, _0, _1, _2, _3) #define VFMSUBADDPD(_0, _1, _2, _3) INSTR_(vfmsubaddpd, _0, _1, _2, _3) #define V4FMADDSS(_0, _1, _2) INSTR_(v4fmaddss, _0, _1, _2) #define V4FMADDPS(_0, _1, _2) INSTR_(v4fmaddps, _0, _1, _2) #define V4FNMADDSS(_0, _1, _2) INSTR_(v4fnmaddss, _0, _1, _2) #define V4FNMADDPS(_0, _1, _2) INSTR_(v4fnmaddps, _0, _1, _2) #define vaddsubps(_0, _1, _2) VADDSUBPS(_0, _1, _2) #define vaddsubpd(_0, _1, _2) VADDSUBPD(_0, _1, _2) #define vhaddpd(_0, _1, _2) VHADDPD(_0, _1, _2) #define vhaddps(_0, _1, _2) VHADDPS(_0, _1, _2) #define vucomiss(_0, _1) VUCOMISS(_0, _1) #define vucomisd(_0, _1) VUCOMISD(_0, _1) #define vcomiss(_0, _1) VCOMISS(_0, _1) #define vcomisd(_0, _1) VCOMISD(_0, _1) #define vaddps(_0, _1, _2) VADDPS(_0, _1, _2) #define vaddpd(_0, _1, _2) VADDPD(_0, _1, _2) #define vsubps(_0, _1, _2) VSUBPS(_0, _1, _2) #define vsubpd(_0, _1, _2) VSUBPD(_0, _1, _2) #define vmulss(_0, _1, _2) VMULSS(_0, _1, _2) #define vmulps(_0, _1, _2) VMULPS(_0, _1, _2) #define vmulsd(_0, _1, _2) VMULSD(_0, _1, _2) #define vmulpd(_0, _1, _2) VMULPD(_0, _1, _2) #define vpmulld(_0, _1, _2) VPMULLD(_0, _1, _2) #define vpmullq(_0, _1, _2) VPMULLQ(_0, _1, _2) #define vpaddd(_0, _1, _2) VPADDD(_0, _1, _2) #define vpslld(_0, _1, _2) VPSLLD(_0, _1, _2) #define vxorps(_0, _1, _2) VXORPS(_0, _1, _2) #define vxorpd(_0, _1, _2) VXORPD(_0, _1, _2) #define vpxord(_0, _1, _2) VPXORD(_0, _1, _2) #define vfmadd132ss(_0, _1, _2) VFMADD132SS(_0, _1, _2) #define vfmadd213ss(_0, _1, _2) VFMADD213SS(_0, _1, _2) #define vfmadd231ss(_0, _1, _2) VFMADD231SS(_0, _1, _2) #define vfmadd132sd(_0, _1, _2) VFMADD132SD(_0, _1, _2) #define vfmadd213sd(_0, _1, _2) VFMADD213SD(_0, _1, _2) #define vfmadd231sd(_0, _1, _2) VFMADD231SD(_0, _1, _2) #define vfmadd132ps(_0, _1, _2) VFMADD132PS(_0, _1, _2) #define vfmadd213ps(_0, _1, _2) VFMADD213PS(_0, _1, _2) #define vfmadd231ps(_0, _1, _2) VFMADD231PS(_0, _1, _2) #define vfmadd132pd(_0, _1, _2) VFMADD132PD(_0, _1, _2) #define vfmadd213pd(_0, _1, _2) VFMADD213PD(_0, _1, _2) #define vfmadd231pd(_0, _1, _2) VFMADD231PD(_0, _1, _2) #define vfmadd132ss(_0, _1, _2) VFMADD132SS(_0, _1, _2) #define vfmsub213ss(_0, _1, _2) VFMSUB213SS(_0, _1, _2) #define vfmsub231ss(_0, _1, _2) VFMSUB231SS(_0, _1, _2) #define vfmsub132sd(_0, _1, _2) VFMSUB132SD(_0, _1, _2) #define vfmsub213sd(_0, _1, _2) VFMSUB213SD(_0, _1, _2) #define vfmsub231sd(_0, _1, _2) VFMSUB231SD(_0, _1, _2) #define vfmsub132ps(_0, _1, _2) VFMSUB132PS(_0, _1, _2) #define vfmsub213ps(_0, _1, _2) VFMSUB213PS(_0, _1, _2) #define vfmsub231ps(_0, _1, _2) VFMSUB231PS(_0, _1, _2) #define vfmsub132pd(_0, _1, _2) VFMSUB132PD(_0, _1, _2) #define vfmsub213pd(_0, _1, _2) VFMSUB213PD(_0, _1, _2) #define vfmsub231pd(_0, _1, _2) VFMSUB231PD(_0, _1, _2) #define vfnmadd132ss(_0, _1, _2) VFNMADD132SS(_0, _1, _2) #define vfnmadd213ss(_0, _1, _2) VFNMADD213SS(_0, _1, _2) #define vfnmadd231ss(_0, _1, _2) VFNMADD231SS(_0, _1, _2) #define vfnmadd132sd(_0, _1, _2) VFNMADD132SD(_0, _1, _2) #define vfnmadd213sd(_0, _1, _2) VFNMADD213SD(_0, _1, _2) #define vfnmadd231sd(_0, _1, _2) VFNMADD231SD(_0, _1, _2) #define vfnmadd132ps(_0, _1, _2) VFNMADD132PS(_0, _1, _2) #define vfnmadd213ps(_0, _1, _2) VFNMADD213PS(_0, _1, _2) #define vfnmadd231ps(_0, _1, _2) VFNMADD231PS(_0, _1, _2) #define vfnmadd132pd(_0, _1, _2) VFNMADD132PD(_0, _1, _2) #define vfnmadd213pd(_0, _1, _2) VFNMADD213PD(_0, _1, _2) #define vfnmadd231pd(_0, _1, _2) VFNMADD231PD(_0, _1, _2) #define vfnmadd132ss(_0, _1, _2) VFNMADD132SS(_0, _1, _2) #define vfnmsub213ss(_0, _1, _2) VFNMSUB213SS(_0, _1, _2) #define vfnmsub231ss(_0, _1, _2) VFNMSUB231SS(_0, _1, _2) #define vfnmsub132sd(_0, _1, _2) VFNMSUB132SD(_0, _1, _2) #define vfnmsub213sd(_0, _1, _2) VFNMSUB213SD(_0, _1, _2) #define vfnmsub231sd(_0, _1, _2) VFNMSUB231SD(_0, _1, _2) #define vfnmsub132ps(_0, _1, _2) VFNMSUB132PS(_0, _1, _2) #define vfnmsub213ps(_0, _1, _2) VFNMSUB213PS(_0, _1, _2) #define vfnmsub231ps(_0, _1, _2) VFNMSUB231PS(_0, _1, _2) #define vfnmsub132pd(_0, _1, _2) VFNMSUB132PD(_0, _1, _2) #define vfnmsub213pd(_0, _1, _2) VFNMSUB213PD(_0, _1, _2) #define vfnmsub231pd(_0, _1, _2) VFNMSUB231PD(_0, _1, _2) #define vfmaddsub132ss(_0, _1, _2) VFMADDSUB132SS(_0, _1, _2) #define vfmaddsub213ss(_0, _1, _2) VFMADDSUB213SS(_0, _1, _2) #define vfmaddsub231ss(_0, _1, _2) VFMADDSUB231SS(_0, _1, _2) #define vfmaddsub132sd(_0, _1, _2) VFMADDSUB132SD(_0, _1, _2) #define vfmaddsub213sd(_0, _1, _2) VFMADDSUB213SD(_0, _1, _2) #define vfmaddsub231sd(_0, _1, _2) VFMADDSUB231SD(_0, _1, _2) #define vfmaddsub132ps(_0, _1, _2) VFMADDSUB132PS(_0, _1, _2) #define vfmaddsub213ps(_0, _1, _2) VFMADDSUB213PS(_0, _1, _2) #define vfmaddsub231ps(_0, _1, _2) VFMADDSUB231PS(_0, _1, _2) #define vfmaddsub132pd(_0, _1, _2) VFMADDSUB132PD(_0, _1, _2) #define vfmaddsub213pd(_0, _1, _2) VFMADDSUB213PD(_0, _1, _2) #define vfmaddsub231pd(_0, _1, _2) VFMADDSUB231PD(_0, _1, _2) #define vfmsubadd132ss(_0, _1, _2) VFMSUBADD132SS(_0, _1, _2) #define vfmsubadd213ss(_0, _1, _2) VFMSUBADD213SS(_0, _1, _2) #define vfmsubadd231ss(_0, _1, _2) VFMSUBADD231SS(_0, _1, _2) #define vfmsubadd132sd(_0, _1, _2) VFMSUBADD132SD(_0, _1, _2) #define vfmsubadd213sd(_0, _1, _2) VFMSUBADD213SD(_0, _1, _2) #define vfmsubadd231sd(_0, _1, _2) VFMSUBADD231SD(_0, _1, _2) #define vfmsubadd132ps(_0, _1, _2) VFMSUBADD132PS(_0, _1, _2) #define vfmsubadd213ps(_0, _1, _2) VFMSUBADD213PS(_0, _1, _2) #define vfmsubadd231ps(_0, _1, _2) VFMSUBADD231PS(_0, _1, _2) #define vfmsubadd132pd(_0, _1, _2) VFMSUBADD132PD(_0, _1, _2) #define vfmsubadd213pd(_0, _1, _2) VFMSUBADD213PD(_0, _1, _2) #define vfmsubadd231pd(_0, _1, _2) VFMSUBADD231PD(_0, _1, _2) #define vfmaddss(_0, _1, _2, _3) VFMADDSS(_0, _1, _2, _3) #define vfmaddsd(_0, _1, _2, _3) VFMADDSD(_0, _1, _2, _3) #define vfmaddps(_0, _1, _2, _3) VFMADDPS(_0, _1, _2, _3) #define vfmaddpd(_0, _1, _2, _3) VFMADDPD(_0, _1, _2, _3) #define vfmsubss(_0, _1, _2, _3) VFMSUBSS(_0, _1, _2, _3) #define vfmsubsd(_0, _1, _2, _3) VFMSUBSD(_0, _1, _2, _3) #define vfmsubps(_0, _1, _2, _3) VFMSUBPS(_0, _1, _2, _3) #define vfmsubpd(_0, _1, _2, _3) VFMSUBPD(_0, _1, _2, _3) #define vfnmaddss(_0, _1, _2, _3) VFNMADDSS(_0, _1, _2, _3) #define vfnmaddsd(_0, _1, _2, _3) VFNMADDSD(_0, _1, _2, _3) #define vfnmaddps(_0, _1, _2, _3) VFNMADDPS(_0, _1, _2, _3) #define vfnmaddpd(_0, _1, _2, _3) VFNMADDPD(_0, _1, _2, _3) #define vfnmsubss(_0, _1, _2, _3) VFNMSUBSS(_0, _1, _2, _3) #define vfnmsubsd(_0, _1, _2, _3) VFNMSUBSD(_0, _1, _2, _3) #define vfnmsubps(_0, _1, _2, _3) VFNMSUBPS(_0, _1, _2, _3) #define vfnmsubpd(_0, _1, _2, _3) VFNMSUBPD(_0, _1, _2, _3) #define vfmaddsubss(_0, _1, _2, _3) VFMADDSUBSS(_0, _1, _2, _3) #define vfmaddsubsd(_0, _1, _2, _3) VFMADDSUBSD(_0, _1, _2, _3) #define vfmaddsubps(_0, _1, _2, _3) VFMADDSUBPS(_0, _1, _2, _3) #define vfmaddsubpd(_0, _1, _2, _3) VFMADDSUBPD(_0, _1, _2, _3) #define vfmsubaddss(_0, _1, _2, _3) VFMSUBADDSS(_0, _1, _2, _3) #define vfmsubaddsd(_0, _1, _2, _3) VFMSUBADDSD(_0, _1, _2, _3) #define vfmsubaddps(_0, _1, _2, _3) VFMSUBADDPS(_0, _1, _2, _3) #define vfmsubaddpd(_0, _1, _2, _3) VFMSUBADDPD(_0, _1, _2, _3) #define v4fmaddss(_0, _1, _2) V4FMADDSS(_0, _1, _2) #define v4fmaddps(_0, _1, _2) V4FMADDPS(_0, _1, _2) #define v4fnmaddss(_0, _1, _2) V4FNMADDSS(_0, _1, _2) #define v4fnmaddps(_0, _1, _2) V4FNMADDPS(_0, _1, _2) // Conversions #define CVTSS2SD(_0, _1) INSTR_(cvtss2sd, _0, _1) #define CVTSD2SS(_0, _1) INSTR_(cvtsd2ss, _0, _1) #define CVTPS2PD(_0, _1) INSTR_(cvtps2pd, _0, _1) #define CVTPD2PS(_0, _1) INSTR_(cvtpd2ps, _0, _1) #define cvtss2sd(_0, _1) CVTSS2SD(_0, _1) #define cvtsd2ss(_0, _1) CVTSD2SS(_0, _1) #define cvtps2pd(_0, _1) CVTPS2PD(_0, _1) #define cvtpd2ps(_0, _1) CVTPD2PS(_0, _1) #define VCVTSS2SD(_0, _1) INSTR_(vcvtss2sd, _0, _1) #define VCVTSD2SS(_0, _1) INSTR_(vcvtsd2ss, _0, _1) #define VCVTPS2PD(_0, _1) INSTR_(vcvtps2pd, _0, _1) #define VCVTPD2PS(_0, _1) INSTR_(vcvtpd2ps, _0, _1) #define vcvtss2sd(_0, _1) VCVTSS2SD(_0, _1) #define vcvtsd2ss(_0, _1) VCVTSD2SS(_0, _1) #define vcvtps2pd(_0, _1) VCVTPS2PD(_0, _1) #define vcvtpd2ps(_0, _1) VCVTPD2PS(_0, _1) // Vector shuffles #define PSHUFD(_0, _1, _2) INSTR_(pshufd, _0, _1, _2) #define SHUFPS(_0, _1, _2) INSTR_(shufps, _0, _1, _2) #define SHUFPD(_0, _1, _2) INSTR_(shufpd, _0, _1, _2) #define UNPCKLPS(_0, _1) INSTR_(unpcklps, _0, _1) #define UNPCKHPS(_0, _1) INSTR_(unpckhps, _0, _1) #define UNPCKLPD(_0, _1) INSTR_(unpcklpd, _0, _1) #define UNPCKHPD(_0, _1) INSTR_(unpckhpd, _0, _1) #define pshufd(_0, _1, _2) PSHUFD(_0, _1, _2) #define shufps(_0, _1, _2) SHUFPS(_0, _1, _2) #define shufpd(_0, _1, _2) SHUFPD(_0, _1, _2) #define unpcklps(_0, _1) UNPCKLPS(_0, _1) #define unpckhps(_0, _1) UNPCKHPS(_0, _1) #define unpcklpd(_0, _1) UNPCKLPD(_0, _1) #define unpckhpd(_0, _1) UNPCKHPD(_0, _1) #define VSHUFPS(_0, _1, _2, _3) INSTR_(vshufps, _0, _1, _2, _3) #define VSHUFPD(_0, _1, _2, _3) INSTR_(vshufpd, _0, _1, _2, _3) #define VPERMILPS(_0, _1, _2) INSTR_(vpermilps, _0, _1, _2) #define VPERMILPD(_0, _1, _2) INSTR_(vpermilpd, _0, _1, _2) #define VPERM2F128(_0, _1, _2, _3) INSTR_(vperm2f128, _0, _1, _2, _3) #define VPERMPD(_0, _1, _2) INSTR_(vpermpd, _0, _1, _2) #define VUNPCKLPS(_0, _1, _2) INSTR_(vunpcklps, _0, _1, _2) #define VUNPCKHPS(_0, _1, _2) INSTR_(vunpckhps, _0, _1, _2) #define VUNPCKLPD(_0, _1, _2) INSTR_(vunpcklpd, _0, _1, _2) #define VUNPCKHPD(_0, _1, _2) INSTR_(vunpckhpd, _0, _1, _2) #define VSHUFF32X4(_0, _1, _2, _3) INSTR_(vshuff32x4, _0, _1, _2, _3) #define VSHUFF64X2(_0, _1, _2, _3) INSTR_(vshuff64x2, _0, _1, _2, _3) #define VINSERTF128(_0, _1, _2, _3) INSTR_(vinsertf128, _0, _1, _2, _3) #define VINSERTF32X4(_0, _1, _2, _3) INSTR_(vinsertf32x4, _0, _1, _2, _3) #define VINSERTF32X8(_0, _1, _2, _3) INSTR_(vinsertf32x8, _0, _1, _2, _3) #define VINSERTF64X2(_0, _1, _2, _3) INSTR_(vinsertf64x2, _0, _1, _2, _3) #define VINSERTF64X4(_0, _1, _2, _3) INSTR_(vinsertf64x4, _0, _1, _2, _3) #define VEXTRACTF128(_0, _1, _2) INSTR_(vextractf128, _0, _1, _2) #define VEXTRACTF32X4(_0, _1, _2) INSTR_(vextractf32x4, _0, _1, _2) #define VEXTRACTF32X8(_0, _1, _2) INSTR_(vextractf32x8, _0, _1, _2) #define VEXTRACTF64X2(_0, _1, _2) INSTR_(vextractf64x4, _0, _1, _2) #define VEXTRACTF64X4(_0, _1, _2) INSTR_(vextractf64x4, _0, _1, _2) #define VBLENDPS(_0, _1, _2, _3) INSTR_(vblendps, _0, _1, _2, _3) #define VBLENDPD(_0, _1, _2, _3) INSTR_(vblendpd, _0, _1, _2, _3) #define VBLENDMPS(_0, _1, _2) INSTR_(vblendmps, _0, _1, _2) #define VBLENDMPD(_0, _1, _2) INSTR_(vblendmpd, _0, _1, _2) #define vshufps(_0, _1, _2, _3) VSHUFPS(_0, _1, _2, _3) #define vshufpd(_0, _1, _2, _3) VSHUFPD(_0, _1, _2, _3) #define vpermilps(_0, _1, _2) VPERMILPS(_0, _1, _2) #define vpermilpd(_0, _1, _2) VPERMILPD(_0, _1, _2) #define vperm2f128(_0, _1, _2, _3) VPERM2F128(_0, _1, _2, _3) #define vpermpd(_0, _1, _2) VPERMPD(_0, _1, _2) #define vunpcklps(_0, _1, _2) VUNPCKLPS(_0, _1, _2) #define vunpckhps(_0, _1, _2) VUNPCKHPS(_0, _1, _2) #define vunpcklpd(_0, _1, _2) VUNPCKLPD(_0, _1, _2) #define vunpckhpd(_0, _1, _2) VUNPCKHPD(_0, _1, _2) #define vshuff32x4(_0, _1, _2, _3) VSHUFF32x4(_0, _1, _2, _3) #define vshuff64x2(_0, _1, _2, _3) VSHUFF64x2(_0, _1, _2, _3) #define vinsertf128(_0, _1, _2, _3) VINSERTF128(_0, _1, _2, _3) #define vinsertf32x4(_0, _1, _2, _3) VINSERTF32x4(_0, _1, _2, _3) #define vinsertf32x8(_0, _1, _2, _3) VINSERTF32x8(_0, _1, _2, _3) #define vinsertf64x2(_0, _1, _2, _3) VINSERTF64x2(_0, _1, _2, _3) #define vinsertf64x4(_0, _1, _2, _3) VINSERTF64x4(_0, _1, _2, _3) #define vextractf128(_0, _1, _2) VEXTRACTF128(_0, _1, _2) #define vextractf32x4(_0, _1, _2) VEXTRACTF32x4(_0, _1, _2) #define vextractf32x8(_0, _1, _2) VEXTRACTF32x8(_0, _1, _2) #define vextractf64x2(_0, _1, _2) VEXTRACTF64x2(_0, _1, _2) #define vextractf64x4(_0, _1, _2) VEXTRACTF64x4(_0, _1, _2) #define vblendps(_0, _1, _2, _3) VBLENDPS(_0, _1, _2, _3) #define vblendpd(_0, _1, _2, _3) VBLENDPD(_0, _1, _2, _3) #define vblendmps(_0, _1, _2) VBLENDMSD(_0, _1, _2) #define vblendmpd(_0, _1, _2) VBLENDMPD(_0, _1, _2) // Prefetches #define PREFETCH(_0, _1) INSTR_(prefetcht##_0, _1) #define PREFETCHW0(_0) INSTR_(prefetchw, _0) #define PREFETCHW1(_0) INSTR_(prefetchwt1, _0) #define VGATHERPFDPS(_0, _1) INSTR_(vgatherpf##_0##dps, _1) #define VSCATTERPFDPS(_0, _1) INSTR_(vscatterpf##_0##dps, _1) #define VGATHERPFDPD(_0, _1) INSTR_(vgatherpf##_0##dpd, _1) #define VSCATTERPFDPD(_0, _1) INSTR_(vscatterpf##_0##dpd, _1) #define VGATHERPFQPS(_0, _1) INSTR_(vgatherpf##_0##qps, _1) #define VSCATTERPFQPS(_0, _1) INSTR_(vscatterpf##_0##qps, _1) #define VGATHERPFQPD(_0, _1) INSTR_(vgatherpf##_0##qpd, _1) #define VSCATTERPFQPD(_0, _1) INSTR_(vscatterpf##_0##qpd, _1) #define prefetch(_0, _1) PREFETCH(_0, _1) #define prefetchw0(_0) PREFETCHW0(_0) #define prefetchw1(_0) PREFETCHW1(_0) #define vgatherpfdps(_0, _1) VGATHERPFDPS(_0, _1) #define vscatterpfdps(_0, _1) VSCATTERPFDPS(_0, _1) #define vgatherpfdpd(_0, _1) VGATHERPFDPD(_0, _1) #define vscatterpfdpd(_0, _1) VSCATTERPFDPD(_0, _1) #define vgatherpfqps(_0, _1) VGATHERPFQPS(_0, _1) #define vscatterpfqps(_0, _1) VSCATTERPFQPS(_0, _1) #define vgatherpfqpd(_0, _1) VGATHERPFQPD(_0, _1) #define vscatterpfqpd(_0, _1) VSCATTERPFQPD(_0, _1) // Mask operations #ifdef __MIC__ #define KMOVW(_0, _1) INSTR_(kmov, _0, _1) #define JKNZD(_0, _1) INSTR_(jknzd, _0, _1) #else #define KMOVW(_0, _1) INSTR_(kmovw, _0, _1) #define JKNZD(_0, _1) INSTR_(kortestw, _0, _0) INSTR_(jnz, _1) #endif #define KXNORW(_0, _1, _2) INSTR_(kxnorw, _0, _1, _2) #define KSHIFTRW(_0, _1, _2) INSTR_(kshiftrw, _0, _1, _2) #define kmovw(_0, _1) KMOVW(_0, _1) #define jknzd(_0, _1) JKNZD(_0, _1) #define kxnorw(_0, _1, _2) KXNORW(_0, _1, _2) #define kshiftrw(_0, _1, _2) KSHIFTRW(_0, _1, _2) // Other #define RDTSC() INSTR_(rdtsc) #define VZEROALL() INSTR_(vzeroall) #define VZEROUPPER() INSTR_(vzeroupper) #define rdtsc() RDTSC() #define vzeroall() VZEROALL() #define vzeroupper() VZEROUPPER() #endif blis-0.6.1/frame/include/blis.h000066400000000000000000000113331360743507500163010ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2016, Hewlett Packard Enterprise Development LP Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_H #define BLIS_H // Allow C++ users to include this header file in their source code. However, // we make the extern "C" conditional on whether we're using a C++ compiler, // since regular C compilers don't understand the extern "C" construct. #ifdef __cplusplus extern "C" { #endif // NOTE: PLEASE DON'T CHANGE THE ORDER IN WHICH HEADERS ARE INCLUDED UNLESS // YOU ARE SURE THAT IT DOESN'T BREAK INTER-HEADER MACRO DEPENDENCIES. // -- System headers -- // NOTE: This header must be included before bli_config_macro_defs.h. #include "bli_system.h" // -- configure definitions -- #include "bli_config.h" #include "bli_config_macro_defs.h" // -- Common BLIS definitions -- #include "bli_type_defs.h" #include "bli_macro_defs.h" // -- pragma definitions -- #include "bli_pragma_macro_defs.h" // -- Threading definitions -- #include "bli_thread.h" #include "bli_pthread.h" // -- Constant definitions -- #include "bli_extern_defs.h" // -- BLIS architecture/kernel definitions -- #include "bli_l1v_ker_prot.h" #include "bli_l1f_ker_prot.h" #include "bli_l1m_ker_prot.h" #include "bli_l3_ukr_prot.h" #include "bli_l3_sup_ker_prot.h" #include "bli_arch_config_pre.h" #include "bli_arch_config.h" #include "bli_kernel_macro_defs.h" // -- Base operation prototypes -- #include "bli_init.h" #include "bli_const.h" #include "bli_obj.h" #include "bli_obj_scalar.h" #include "bli_blksz.h" #include "bli_func.h" #include "bli_mbool.h" #include "bli_cntx.h" #include "bli_rntm.h" #include "bli_gks.h" #include "bli_ind.h" #include "bli_membrk.h" #include "bli_pool.h" #include "bli_array.h" #include "bli_apool.h" #include "bli_sba.h" #include "bli_memsys.h" #include "bli_mem.h" #include "bli_part.h" #include "bli_prune.h" #include "bli_query.h" #include "bli_auxinfo.h" #include "bli_param_map.h" #include "bli_clock.h" #include "bli_check.h" #include "bli_error.h" #include "bli_f2c.h" #include "bli_machval.h" #include "bli_getopt.h" #include "bli_opid.h" #include "bli_cntl.h" #include "bli_env.h" #include "bli_pack.h" #include "bli_info.h" #include "bli_arch.h" #include "bli_cpuid.h" #include "bli_string.h" #include "bli_setgetij.h" #include "bli_setri.h" #include "bli_castm.h" #include "bli_castnzm.h" #include "bli_castv.h" #include "bli_projm.h" #include "bli_projv.h" // -- Level-0 operations -- #include "bli_l0.h" // -- Level-1v operations -- #include "bli_l1v.h" // -- Level-1d operations -- #include "bli_l1d.h" // -- Level-1f operations -- #include "bli_l1f.h" // -- Level-1m operations -- #include "bli_l1m.h" // -- Level-2 operations -- #include "bli_l2.h" // -- Level-3 operations -- #include "bli_l3.h" // -- Utility operations -- #include "bli_util.h" // -- sandbox implementation -- #include "bli_sbox.h" // -- BLAS compatibility layer -- #include "bli_blas.h" // -- CBLAS compatibility layer -- #include "bli_cblas.h" // -- Windows definitions #include "bli_winsys.h" // End extern "C" construct block. #ifdef __cplusplus } #endif #endif blis-0.6.1/frame/include/level0/000077500000000000000000000000001360743507500163655ustar00rootroot00000000000000blis-0.6.1/frame/include/level0/1e/000077500000000000000000000000001360743507500166725ustar00rootroot00000000000000blis-0.6.1/frame/include/level0/1e/bli_copy1es.h000066400000000000000000000063221360743507500212570ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_COPY1ES_H #define BLIS_COPY1ES_H // copy1es // Notes: // - The first char encodes the type of x. // - The second char encodes the type of y. #define bli_sscopy1es( a, bri, bir ) {} #define bli_dscopy1es( a, bri, bir ) {} #define bli_cscopy1es( a, bri, bir ) {} #define bli_zscopy1es( a, bri, bir ) {} #define bli_sdcopy1es( a, bri, bir ) {} #define bli_ddcopy1es( a, bri, bir ) {} #define bli_cdcopy1es( a, bri, bir ) {} #define bli_zdcopy1es( a, bri, bir ) {} #define bli_sccopy1es( a, bri, bir ) {} #define bli_dccopy1es( a, bri, bir ) {} #define bli_cccopy1es( a, bri, bir ) \ { \ bli_cccopyris( bli_creal(a), bli_cimag(a), bli_creal(bri), bli_cimag(bri) ); \ bli_cccopyris( -bli_cimag(a), bli_creal(a), bli_creal(bir), bli_cimag(bir) ); \ } #define bli_zccopy1es( a, bri, bir ) \ { \ bli_zccopyris( bli_zreal(a), bli_zimag(a), bli_creal(bri), bli_cimag(bri) ); \ bli_zccopyris( -bli_zimag(a), bli_zreal(a), bli_creal(bir), bli_cimag(bir) ); \ } #define bli_szcopy1es( a, bri, bir ) {} #define bli_dzcopy1es( a, bri, bir ) {} #define bli_czcopy1es( a, bri, bir ) \ { \ bli_czcopyris( bli_creal(a), bli_cimag(a), bli_zreal(bri), bli_zimag(bri) ); \ bli_czcopyris( -bli_cimag(a), bli_creal(a), bli_zreal(bir), bli_zimag(bir) ); \ } #define bli_zzcopy1es( a, bri, bir ) \ { \ bli_zzcopyris( bli_zreal(a), bli_zimag(a), bli_zreal(bri), bli_zimag(bri) ); \ bli_zzcopyris( -bli_zimag(a), bli_zreal(a), bli_zreal(bir), bli_zimag(bir) ); \ } #define bli_ccopy1es( a, bri, bir ) bli_cccopy1es( a, bri, bir ) #define bli_zcopy1es( a, bri, bir ) bli_zzcopy1es( a, bri, bir ) #endif blis-0.6.1/frame/include/level0/1e/bli_copyj1es.h000066400000000000000000000063511360743507500214330ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_COPYJ1ES_H #define BLIS_COPYJ1ES_H // copyj1es // Notes: // - The first char encodes the type of x. // - The second char encodes the type of y. #define bli_sscopyj1es( a, bri, bir ) {} #define bli_dscopyj1es( a, bri, bir ) {} #define bli_cscopyj1es( a, bri, bir ) {} #define bli_zscopyj1es( a, bri, bir ) {} #define bli_sdcopyj1es( a, bri, bir ) {} #define bli_ddcopyj1es( a, bri, bir ) {} #define bli_cdcopyj1es( a, bri, bir ) {} #define bli_zdcopyj1es( a, bri, bir ) {} #define bli_sccopyj1es( a, bri, bir ) {} #define bli_dccopyj1es( a, bri, bir ) {} #define bli_cccopyj1es( a, bri, bir ) \ { \ bli_cccopyris( bli_creal(a), -bli_cimag(a), bli_creal(bri), bli_cimag(bri) ); \ bli_cccopyris( bli_cimag(a), bli_creal(a), bli_creal(bir), bli_cimag(bir) ); \ } #define bli_zccopyj1es( a, bri, bir ) \ { \ bli_zccopyris( bli_zreal(a), -bli_zimag(a), bli_creal(bri), bli_cimag(bri) ); \ bli_zccopyris( bli_zimag(a), bli_zreal(a), bli_creal(bir), bli_cimag(bir) ); \ } #define bli_szcopyj1es( a, bri, bir ) {} #define bli_dzcopyj1es( a, bri, bir ) {} #define bli_czcopyj1es( a, bri, bir ) \ { \ bli_czcopyris( bli_creal(a), -bli_cimag(a), bli_zreal(bri), bli_zimag(bri) ); \ bli_czcopyris( bli_cimag(a), bli_creal(a), bli_zreal(bir), bli_zimag(bir) ); \ } #define bli_zzcopyj1es( a, bri, bir ) \ { \ bli_zzcopyris( bli_zreal(a), -bli_zimag(a), bli_zreal(bri), bli_zimag(bri) ); \ bli_zzcopyris( bli_zimag(a), bli_zreal(a), bli_zreal(bir), bli_zimag(bir) ); \ } #define bli_ccopyj1es( a, bri, bir ) bli_cccopyj1es( a, bri, bir ) #define bli_zcopyj1es( a, bri, bir ) bli_zzcopyj1es( a, bri, bir ) #endif blis-0.6.1/frame/include/level0/1e/bli_invert1es.h000066400000000000000000000040741360743507500216160ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_INVERT1ES_H #define BLIS_INVERT1ES_H // invert1es #define bli_cinvert1es( bri, bir ) \ { \ bli_cinvertris( bli_creal(bri), bli_cimag(bri) ); \ bli_ccopyris( bli_creal(bri), -bli_cimag(bri), bli_cimag(bir), bli_creal(bir) ); \ } #define bli_zinvert1es( bri, bir ) \ { \ bli_zinvertris( bli_zreal(bri), bli_zimag(bri) ); \ bli_zcopyris( bli_zreal(bri), -bli_zimag(bri), bli_zimag(bir), bli_zreal(bir) ); \ } #endif blis-0.6.1/frame/include/level0/1e/bli_scal1es.h000066400000000000000000000041661360743507500212330ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SCAL1ES_H #define BLIS_SCAL1ES_H // scal1es #define bli_cscal1es( a, yri, yir ) \ { \ bli_cscalris( bli_creal(a), bli_cimag(a), bli_creal(yri), bli_cimag(yri) ); \ bli_ccopyris( -bli_cimag(yri), bli_creal(yri), bli_creal(yir), bli_cimag(yir) ); \ } #define bli_zscal1es( a, yri, yir ) \ { \ bli_zscalris( bli_zreal(a), bli_zimag(a), bli_zreal(yri), bli_zimag(yri) ); \ bli_zcopyris( -bli_zimag(yri), bli_zreal(yri), bli_zreal(yir), bli_zimag(yir) ); \ } #endif blis-0.6.1/frame/include/level0/1e/bli_scal21es.h000066400000000000000000000246431360743507500213170ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SCAL21ES_H #define BLIS_SCAL21ES_H // scal21es // Notes: // - The first char encodes the type of a. // - The second char encodes the type of x. // - The third char encodes the type of y. // -- (axy) = (??s) ------------------------------------------------------------ #define bli_sssscal21es( a, x, yri, yir ) {} #define bli_sdsscal21es( a, x, yri, yir ) {} #define bli_scsscal21es( a, x, yri, yir ) {} #define bli_szsscal21es( a, x, yri, yir ) {} #define bli_dssscal21es( a, x, yri, yir ) {} #define bli_ddsscal21es( a, x, yri, yir ) {} #define bli_dcsscal21es( a, x, yri, yir ) {} #define bli_dzsscal21es( a, x, yri, yir ) {} #define bli_cssscal21es( a, x, yri, yir ) {} #define bli_cdsscal21es( a, x, yri, yir ) {} #define bli_ccsscal21es( a, x, yri, yir ) {} #define bli_czsscal21es( a, x, yri, yir ) {} #define bli_zssscal21es( a, x, yri, yir ) {} #define bli_zdsscal21es( a, x, yri, yir ) {} #define bli_zcsscal21es( a, x, yri, yir ) {} #define bli_zzsscal21es( a, x, yri, yir ) {} // -- (axy) = (??d) ------------------------------------------------------------ #define bli_ssdscal21es( a, x, yri, yir ) {} #define bli_sddscal21es( a, x, yri, yir ) {} #define bli_scdscal21es( a, x, yri, yir ) {} #define bli_szdscal21es( a, x, yri, yir ) {} #define bli_dsdscal21es( a, x, yri, yir ) {} #define bli_dddscal21es( a, x, yri, yir ) {} #define bli_dcdscal21es( a, x, yri, yir ) {} #define bli_dzdscal21es( a, x, yri, yir ) {} #define bli_csdscal21es( a, x, yri, yir ) {} #define bli_cddscal21es( a, x, yri, yir ) {} #define bli_ccdscal21es( a, x, yri, yir ) {} #define bli_czdscal21es( a, x, yri, yir ) {} #define bli_zsdscal21es( a, x, yri, yir ) {} #define bli_zddscal21es( a, x, yri, yir ) {} #define bli_zcdscal21es( a, x, yri, yir ) {} #define bli_zzdscal21es( a, x, yri, yir ) {} // -- (axy) = (??c) ------------------------------------------------------------ #define bli_sscscal21es( a, x, yri, yir ) {} #define bli_sdcscal21es( a, x, yri, yir ) {} #define bli_sccscal21es( a, x, yri, yir ) \ { \ bli_cxscal2ris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_zreal(yri), bli_zimag(yri) ); \ bli_cxscal2ris( bli_sreal(a), bli_simag(a), -bli_cimag(x), bli_creal(x), bli_zreal(yir), bli_zimag(yir) ); \ } #define bli_szcscal21es( a, x, yri, yir ) \ { \ bli_cxscal2ris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_zreal(yri), bli_zimag(yri) ); \ bli_cxscal2ris( bli_sreal(a), bli_simag(a), -bli_zimag(x), bli_zreal(x), bli_zreal(yir), bli_zimag(yir) ); \ } #define bli_dscscal21es( a, x, yri, yir ) {} #define bli_ddcscal21es( a, x, yri, yir ) {} #define bli_dccscal21es( a, x, yri, yir ) \ { \ bli_cxscal2ris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_zreal(yri), bli_zimag(yri) ); \ bli_cxscal2ris( bli_dreal(a), bli_dimag(a), -bli_cimag(x), bli_creal(x), bli_zreal(yir), bli_zimag(yir) ); \ } #define bli_dzcscal21es( a, x, yri, yir ) \ { \ bli_cxscal2ris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(yri), bli_zimag(yri) ); \ bli_cxscal2ris( bli_dreal(a), bli_dimag(a), -bli_zimag(x), bli_zreal(x), bli_zreal(yir), bli_zimag(yir) ); \ } #define bli_cscscal21es( a, x, yri, yir ) \ { \ bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_zreal(yri), bli_zimag(yri) ); \ bli_cxscal2ris( bli_creal(a), bli_cimag(a), -bli_simag(x), bli_sreal(x), bli_zreal(yir), bli_zimag(yir) ); \ } #define bli_cdcscal21es( a, x, yri, yir ) \ { \ bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(yri), bli_zimag(yri) ); \ bli_cxscal2ris( bli_creal(a), bli_cimag(a), -bli_dimag(x), bli_dreal(x), bli_zreal(yir), bli_zimag(yir) ); \ } #define bli_cccscal21es( a, x, yri, yir ) \ { \ bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_zreal(yri), bli_zimag(yri) ); \ bli_cxscal2ris( bli_creal(a), bli_cimag(a), -bli_cimag(x), bli_creal(x), bli_zreal(yir), bli_zimag(yir) ); \ } #define bli_czcscal21es( a, x, yri, yir ) \ { \ bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(yri), bli_zimag(yri) ); \ bli_cxscal2ris( bli_creal(a), bli_cimag(a), -bli_zimag(x), bli_zreal(x), bli_zreal(yir), bli_zimag(yir) ); \ } #define bli_zscscal21es( a, x, yri, yir ) \ { \ bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_zreal(yri), bli_zimag(yri) ); \ bli_cxscal2ris( bli_zreal(a), bli_zimag(a), -bli_simag(x), bli_sreal(x), bli_zreal(yir), bli_zimag(yir) ); \ } #define bli_zdcscal21es( a, x, yri, yir ) \ { \ bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(yri), bli_zimag(yri) ); \ bli_cxscal2ris( bli_zreal(a), bli_zimag(a), -bli_dimag(x), bli_dreal(x), bli_zreal(yir), bli_zimag(yir) ); \ } #define bli_zccscal21es( a, x, yri, yir ) \ { \ bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_zreal(yri), bli_zimag(yri) ); \ bli_cxscal2ris( bli_zreal(a), bli_zimag(a), -bli_cimag(x), bli_creal(x), bli_zreal(yir), bli_zimag(yir) ); \ } #define bli_zzcscal21es( a, x, yri, yir ) \ { \ bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(yri), bli_zimag(yri) ); \ bli_cxscal2ris( bli_zreal(a), bli_zimag(a), -bli_zimag(x), bli_zreal(x), bli_zreal(yir), bli_zimag(yir) ); \ } // -- (axy) = (??z) ------------------------------------------------------------ #define bli_sszscal21es( a, x, yri, yir ) {} #define bli_sdzscal21es( a, x, yri, yir ) {} #define bli_sczscal21es( a, x, yri, yir ) \ { \ bli_cxscal2ris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_zreal(yri), bli_zimag(yri) ); \ bli_cxscal2ris( bli_sreal(a), bli_simag(a), -bli_cimag(x), bli_creal(x), bli_zreal(yir), bli_zimag(yir) ); \ } #define bli_szzscal21es( a, x, yri, yir ) \ { \ bli_cxscal2ris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_zreal(yri), bli_zimag(yri) ); \ bli_cxscal2ris( bli_sreal(a), bli_simag(a), -bli_zimag(x), bli_zreal(x), bli_zreal(yir), bli_zimag(yir) ); \ } #define bli_dszscal21es( a, x, yri, yir ) {} #define bli_ddzscal21es( a, x, yri, yir ) {} #define bli_dczscal21es( a, x, yri, yir ) \ { \ bli_cxscal2ris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_zreal(yri), bli_zimag(yri) ); \ bli_cxscal2ris( bli_dreal(a), bli_dimag(a), -bli_cimag(x), bli_creal(x), bli_zreal(yir), bli_zimag(yir) ); \ } #define bli_dzzscal21es( a, x, yri, yir ) \ { \ bli_cxscal2ris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(yri), bli_zimag(yri) ); \ bli_cxscal2ris( bli_dreal(a), bli_dimag(a), -bli_zimag(x), bli_zreal(x), bli_zreal(yir), bli_zimag(yir) ); \ } #define bli_cszscal21es( a, x, yri, yir ) \ { \ bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_zreal(yri), bli_zimag(yri) ); \ bli_cxscal2ris( bli_creal(a), bli_cimag(a), -bli_simag(x), bli_sreal(x), bli_zreal(yir), bli_zimag(yir) ); \ } #define bli_cdzscal21es( a, x, yri, yir ) \ { \ bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(yri), bli_zimag(yri) ); \ bli_cxscal2ris( bli_creal(a), bli_cimag(a), -bli_dimag(x), bli_dreal(x), bli_zreal(yir), bli_zimag(yir) ); \ } #define bli_cczscal21es( a, x, yri, yir ) \ { \ bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_zreal(yri), bli_zimag(yri) ); \ bli_cxscal2ris( bli_creal(a), bli_cimag(a), -bli_cimag(x), bli_creal(x), bli_zreal(yir), bli_zimag(yir) ); \ } #define bli_czzscal21es( a, x, yri, yir ) \ { \ bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(yri), bli_zimag(yri) ); \ bli_cxscal2ris( bli_creal(a), bli_cimag(a), -bli_zimag(x), bli_zreal(x), bli_zreal(yir), bli_zimag(yir) ); \ } #define bli_zszscal21es( a, x, yri, yir ) \ { \ bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_zreal(yri), bli_zimag(yri) ); \ bli_cxscal2ris( bli_zreal(a), bli_zimag(a), -bli_simag(x), bli_sreal(x), bli_zreal(yir), bli_zimag(yir) ); \ } #define bli_zdzscal21es( a, x, yri, yir ) \ { \ bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(yri), bli_zimag(yri) ); \ bli_cxscal2ris( bli_zreal(a), bli_zimag(a), -bli_dimag(x), bli_dreal(x), bli_zreal(yir), bli_zimag(yir) ); \ } #define bli_zczscal21es( a, x, yri, yir ) \ { \ bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_zreal(yri), bli_zimag(yri) ); \ bli_cxscal2ris( bli_zreal(a), bli_zimag(a), -bli_cimag(x), bli_creal(x), bli_zreal(yir), bli_zimag(yir) ); \ } #define bli_zzzscal21es( a, x, yri, yir ) \ { \ bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(yri), bli_zimag(yri) ); \ bli_cxscal2ris( bli_zreal(a), bli_zimag(a), -bli_zimag(x), bli_zreal(x), bli_zreal(yir), bli_zimag(yir) ); \ } #define bli_cscal21es( a, x, yri, yir ) bli_cccscal21es( a, x, yri, yir ) #define bli_zscal21es( a, x, yri, yir ) bli_zzzscal21es( a, x, yri, yir ) #endif blis-0.6.1/frame/include/level0/1e/bli_scal2j1es.h000066400000000000000000000247521360743507500214720ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SCAL2J1ES_H #define BLIS_SCAL2J1ES_H // scal2j1es // Notes: // - The first char encodes the type of a. // - The second char encodes the type of x. // - The third char encodes the type of y. // -- (axy) = (??s) ------------------------------------------------------------ #define bli_sssscal2j1es( a, x, yri, yir ) {} #define bli_sdsscal2j1es( a, x, yri, yir ) {} #define bli_scsscal2j1es( a, x, yri, yir ) {} #define bli_szsscal2j1es( a, x, yri, yir ) {} #define bli_dssscal2j1es( a, x, yri, yir ) {} #define bli_ddsscal2j1es( a, x, yri, yir ) {} #define bli_dcsscal2j1es( a, x, yri, yir ) {} #define bli_dzsscal2j1es( a, x, yri, yir ) {} #define bli_cssscal2j1es( a, x, yri, yir ) {} #define bli_cdsscal2j1es( a, x, yri, yir ) {} #define bli_ccsscal2j1es( a, x, yri, yir ) {} #define bli_czsscal2j1es( a, x, yri, yir ) {} #define bli_zssscal2j1es( a, x, yri, yir ) {} #define bli_zdsscal2j1es( a, x, yri, yir ) {} #define bli_zcsscal2j1es( a, x, yri, yir ) {} #define bli_zzsscal2j1es( a, x, yri, yir ) {} // -- (axy) = (??d) ------------------------------------------------------------ #define bli_ssdscal2j1es( a, x, yri, yir ) {} #define bli_sddscal2j1es( a, x, yri, yir ) {} #define bli_scdscal2j1es( a, x, yri, yir ) {} #define bli_szdscal2j1es( a, x, yri, yir ) {} #define bli_dsdscal2j1es( a, x, yri, yir ) {} #define bli_dddscal2j1es( a, x, yri, yir ) {} #define bli_dcdscal2j1es( a, x, yri, yir ) {} #define bli_dzdscal2j1es( a, x, yri, yir ) {} #define bli_csdscal2j1es( a, x, yri, yir ) {} #define bli_cddscal2j1es( a, x, yri, yir ) {} #define bli_ccdscal2j1es( a, x, yri, yir ) {} #define bli_czdscal2j1es( a, x, yri, yir ) {} #define bli_zsdscal2j1es( a, x, yri, yir ) {} #define bli_zddscal2j1es( a, x, yri, yir ) {} #define bli_zcdscal2j1es( a, x, yri, yir ) {} #define bli_zzdscal2j1es( a, x, yri, yir ) {} // -- (axy) = (??c) ------------------------------------------------------------ #define bli_sscscal2j1es( a, x, yri, yir ) {} #define bli_sdcscal2j1es( a, x, yri, yir ) {} #define bli_sccscal2j1es( a, x, yri, yir ) \ { \ bli_cxscal2ris( bli_sreal(a), bli_simag(a), bli_creal(x), -bli_cimag(x), bli_zreal(yri), bli_zimag(yri) ); \ bli_cxscal2ris( bli_sreal(a), bli_simag(a), bli_cimag(x), bli_creal(x), bli_zreal(yir), bli_zimag(yir) ); \ } #define bli_szcscal2j1es( a, x, yri, yir ) \ { \ bli_cxscal2ris( bli_sreal(a), bli_simag(a), bli_zreal(x), -bli_zimag(x), bli_zreal(yri), bli_zimag(yri) ); \ bli_cxscal2ris( bli_sreal(a), bli_simag(a), bli_zimag(x), bli_zreal(x), bli_zreal(yir), bli_zimag(yir) ); \ } #define bli_dscscal2j1es( a, x, yri, yir ) {} #define bli_ddcscal2j1es( a, x, yri, yir ) {} #define bli_dccscal2j1es( a, x, yri, yir ) \ { \ bli_cxscal2ris( bli_dreal(a), bli_dimag(a), bli_creal(x), -bli_cimag(x), bli_zreal(yri), bli_zimag(yri) ); \ bli_cxscal2ris( bli_dreal(a), bli_dimag(a), bli_cimag(x), bli_creal(x), bli_zreal(yir), bli_zimag(yir) ); \ } #define bli_dzcscal2j1es( a, x, yri, yir ) \ { \ bli_cxscal2ris( bli_dreal(a), bli_dimag(a), bli_zreal(x), -bli_zimag(x), bli_zreal(yri), bli_zimag(yri) ); \ bli_cxscal2ris( bli_dreal(a), bli_dimag(a), bli_zimag(x), bli_zreal(x), bli_zreal(yir), bli_zimag(yir) ); \ } #define bli_cscscal2j1es( a, x, yri, yir ) \ { \ bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_sreal(x), -bli_simag(x), bli_zreal(yri), bli_zimag(yri) ); \ bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_simag(x), bli_sreal(x), bli_zreal(yir), bli_zimag(yir) ); \ } #define bli_cdcscal2j1es( a, x, yri, yir ) \ { \ bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_dreal(x), -bli_dimag(x), bli_zreal(yri), bli_zimag(yri) ); \ bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_dimag(x), bli_dreal(x), bli_zreal(yir), bli_zimag(yir) ); \ } #define bli_cccscal2j1es( a, x, yri, yir ) \ { \ bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_creal(x), -bli_cimag(x), bli_zreal(yri), bli_zimag(yri) ); \ bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_cimag(x), bli_creal(x), bli_zreal(yir), bli_zimag(yir) ); \ } #define bli_czcscal2j1es( a, x, yri, yir ) \ { \ bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_zreal(x), -bli_zimag(x), bli_zreal(yri), bli_zimag(yri) ); \ bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_zimag(x), bli_zreal(x), bli_zreal(yir), bli_zimag(yir) ); \ } #define bli_zscscal2j1es( a, x, yri, yir ) \ { \ bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_sreal(x), -bli_simag(x), bli_zreal(yri), bli_zimag(yri) ); \ bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_simag(x), bli_sreal(x), bli_zreal(yir), bli_zimag(yir) ); \ } #define bli_zdcscal2j1es( a, x, yri, yir ) \ { \ bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_dreal(x), -bli_dimag(x), bli_zreal(yri), bli_zimag(yri) ); \ bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_dimag(x), bli_dreal(x), bli_zreal(yir), bli_zimag(yir) ); \ } #define bli_zccscal2j1es( a, x, yri, yir ) \ { \ bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_creal(x), -bli_cimag(x), bli_zreal(yri), bli_zimag(yri) ); \ bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_cimag(x), bli_creal(x), bli_zreal(yir), bli_zimag(yir) ); \ } #define bli_zzcscal2j1es( a, x, yri, yir ) \ { \ bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_zreal(x), -bli_zimag(x), bli_zreal(yri), bli_zimag(yri) ); \ bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_zimag(x), bli_zreal(x), bli_zreal(yir), bli_zimag(yir) ); \ } // -- (axy) = (??z) ------------------------------------------------------------ #define bli_sszscal2j1es( a, x, yri, yir ) {} #define bli_sdzscal2j1es( a, x, yri, yir ) {} #define bli_sczscal2j1es( a, x, yri, yir ) \ { \ bli_cxscal2ris( bli_sreal(a), bli_simag(a), bli_creal(x), -bli_cimag(x), bli_zreal(yri), bli_zimag(yri) ); \ bli_cxscal2ris( bli_sreal(a), bli_simag(a), bli_cimag(x), bli_creal(x), bli_zreal(yir), bli_zimag(yir) ); \ } #define bli_szzscal2j1es( a, x, yri, yir ) \ { \ bli_cxscal2ris( bli_sreal(a), bli_simag(a), bli_zreal(x), -bli_zimag(x), bli_zreal(yri), bli_zimag(yri) ); \ bli_cxscal2ris( bli_sreal(a), bli_simag(a), bli_zimag(x), bli_zreal(x), bli_zreal(yir), bli_zimag(yir) ); \ } #define bli_dszscal2j1es( a, x, yri, yir ) {} #define bli_ddzscal2j1es( a, x, yri, yir ) {} #define bli_dczscal2j1es( a, x, yri, yir ) \ { \ bli_cxscal2ris( bli_dreal(a), bli_dimag(a), bli_creal(x), -bli_cimag(x), bli_zreal(yri), bli_zimag(yri) ); \ bli_cxscal2ris( bli_dreal(a), bli_dimag(a), bli_cimag(x), bli_creal(x), bli_zreal(yir), bli_zimag(yir) ); \ } #define bli_dzzscal2j1es( a, x, yri, yir ) \ { \ bli_cxscal2ris( bli_dreal(a), bli_dimag(a), bli_zreal(x), -bli_zimag(x), bli_zreal(yri), bli_zimag(yri) ); \ bli_cxscal2ris( bli_dreal(a), bli_dimag(a), bli_zimag(x), bli_zreal(x), bli_zreal(yir), bli_zimag(yir) ); \ } #define bli_cszscal2j1es( a, x, yri, yir ) \ { \ bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_sreal(x), -bli_simag(x), bli_zreal(yri), bli_zimag(yri) ); \ bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_simag(x), bli_sreal(x), bli_zreal(yir), bli_zimag(yir) ); \ } #define bli_cdzscal2j1es( a, x, yri, yir ) \ { \ bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_dreal(x), -bli_dimag(x), bli_zreal(yri), bli_zimag(yri) ); \ bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_dimag(x), bli_dreal(x), bli_zreal(yir), bli_zimag(yir) ); \ } #define bli_cczscal2j1es( a, x, yri, yir ) \ { \ bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_creal(x), -bli_cimag(x), bli_zreal(yri), bli_zimag(yri) ); \ bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_cimag(x), bli_creal(x), bli_zreal(yir), bli_zimag(yir) ); \ } #define bli_czzscal2j1es( a, x, yri, yir ) \ { \ bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_zreal(x), -bli_zimag(x), bli_zreal(yri), bli_zimag(yri) ); \ bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_zimag(x), bli_zreal(x), bli_zreal(yir), bli_zimag(yir) ); \ } #define bli_zszscal2j1es( a, x, yri, yir ) \ { \ bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_sreal(x), -bli_simag(x), bli_zreal(yri), bli_zimag(yri) ); \ bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_simag(x), bli_sreal(x), bli_zreal(yir), bli_zimag(yir) ); \ } #define bli_zdzscal2j1es( a, x, yri, yir ) \ { \ bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_dreal(x), -bli_dimag(x), bli_zreal(yri), bli_zimag(yri) ); \ bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_dimag(x), bli_dreal(x), bli_zreal(yir), bli_zimag(yir) ); \ } #define bli_zczscal2j1es( a, x, yri, yir ) \ { \ bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_creal(x), -bli_cimag(x), bli_zreal(yri), bli_zimag(yri) ); \ bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_cimag(x), bli_creal(x), bli_zreal(yir), bli_zimag(yir) ); \ } #define bli_zzzscal2j1es( a, x, yri, yir ) \ { \ bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_zreal(x), -bli_zimag(x), bli_zreal(yri), bli_zimag(yri) ); \ bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_zimag(x), bli_zreal(x), bli_zreal(yir), bli_zimag(yir) ); \ } #define bli_cscal2j1es( a, x, yri, yir ) bli_cccscal2j1es( a, x, yri, yir ) #define bli_zscal2j1es( a, x, yri, yir ) bli_zzzscal2j1es( a, x, yri, yir ) #endif blis-0.6.1/frame/include/level0/1m/000077500000000000000000000000001360743507500167025ustar00rootroot00000000000000blis-0.6.1/frame/include/level0/1m/bli_invert1ms_mxn_diag.h000066400000000000000000000106271360743507500235050ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_INVERT1MS_MXN_DIAG_H #define BLIS_INVERT1MS_MXN_DIAG_H // invert1ms_mxn_diag #define bli_cinvert1ms_mxn_diag( schema, offm, offn, m, n, y, rs_y, cs_y, ld_y ) \ { \ dim_t min_m_n = bli_min( m, n ); \ dim_t i; \ \ /* Handle 1e and 1r separately. */ \ if ( bli_is_1e_packed( schema ) ) \ { \ scomplex* restrict y_off_ri = y + (offm )*rs_y \ + (offn )*cs_y; \ scomplex* restrict y_off_ir = y + (offm )*rs_y \ + (offn )*cs_y + ld_y/2; \ \ for ( i = 0; i < min_m_n; ++i ) \ { \ bli_cinvert1es( *(y_off_ri + i*rs_y + i*cs_y), \ *(y_off_ir + i*rs_y + i*cs_y) ); \ } \ } \ else /* if ( bli_is_1r_packed( schema ) ) */ \ { \ inc_t rs_y2 = rs_y; \ inc_t cs_y2 = cs_y; \ \ /* Scale the non-unit stride by two for the 1r loop, which steps in units of real (not complex) values. */ \ if ( rs_y2 == 1 ) { cs_y2 *= 2; } \ else /* if ( cs_y2 == 1 ) */ { rs_y2 *= 2; } \ \ float* restrict y_cast = ( float* )y; \ float* restrict y_off_r = y_cast + (offm )*rs_y2 \ + (offn )*cs_y2; \ float* restrict y_off_i = y_cast + (offm )*rs_y2 \ + (offn )*cs_y2 + ld_y; \ \ for ( i = 0; i < min_m_n; ++i ) \ { \ bli_cinvert1rs( *(y_off_r + i*rs_y2 + i*cs_y2), \ *(y_off_i + i*rs_y2 + i*cs_y2) ); \ } \ } \ } #define bli_zinvert1ms_mxn_diag( schema, offm, offn, m, n, y, rs_y, cs_y, ld_y ) \ { \ dim_t min_m_n = bli_min( m, n ); \ dim_t i; \ \ /* Handle 1e and 1r separately. */ \ if ( bli_is_1e_packed( schema ) ) \ { \ dcomplex* restrict y_off_ri = y + (offm )*rs_y \ + (offn )*cs_y; \ dcomplex* restrict y_off_ir = y + (offm )*rs_y \ + (offn )*cs_y + ld_y/2; \ \ for ( i = 0; i < min_m_n; ++i ) \ { \ bli_zinvert1es( *(y_off_ri + i*rs_y + i*cs_y), \ *(y_off_ir + i*rs_y + i*cs_y) ); \ } \ } \ else /* if ( bli_is_1r_packed( schema ) ) */ \ { \ inc_t rs_y2 = rs_y; \ inc_t cs_y2 = cs_y; \ \ /* Scale the non-unit stride by two for the 1r loop, which steps in units of real (not complex) values. */ \ if ( rs_y2 == 1 ) { cs_y2 *= 2; } \ else /* if ( cs_y2 == 1 ) */ { rs_y2 *= 2; } \ \ double* restrict y_cast = ( double* )y; \ double* restrict y_off_r = y_cast + (offm )*rs_y2 \ + (offn )*cs_y2; \ double* restrict y_off_i = y_cast + (offm )*rs_y2 \ + (offn )*cs_y2 + ld_y; \ \ for ( i = 0; i < min_m_n; ++i ) \ { \ bli_zinvert1rs( *(y_off_r + i*rs_y2 + i*cs_y2), \ *(y_off_i + i*rs_y2 + i*cs_y2) ); \ } \ } \ } #endif blis-0.6.1/frame/include/level0/1m/bli_scal1ms_mxn.h000066400000000000000000000075461360743507500221420ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SCAL1MS_MXN_H #define BLIS_SCAL1MS_MXN_H // scal1ms_mxn #define bli_cscal1ms_mxn( schema, m, n, a, y, rs_y, cs_y, ld_y ) \ { \ dim_t i, j; \ \ /* Handle 1e and 1r separately. */ \ if ( bli_is_1e_packed( schema ) ) \ { \ scomplex* restrict y_ri = y; \ scomplex* restrict y_ir = y + ld_y/2; \ \ for ( j = 0; j < n; ++j ) \ for ( i = 0; i < m; ++i ) \ { \ bli_cscal1es( *(a), \ *(y_ri + i*rs_y + j*cs_y), \ *(y_ir + i*rs_y + j*cs_y) ); \ } \ } \ else /* if ( bli_is_1r_packed( schema ) ) */ \ { \ inc_t rs_y2 = rs_y; \ inc_t cs_y2 = cs_y; \ \ /* Scale the non-unit stride by two for the 1r loop, which steps in units of real (not complex) values. */ \ if ( rs_y2 == 1 ) { cs_y2 *= 2; } \ else /* if ( cs_y2 == 1 ) */ { rs_y2 *= 2; } \ \ float* restrict y_cast = ( float* )y; \ float* restrict y_r = y_cast; \ float* restrict y_i = y_cast + ld_y; \ \ for ( j = 0; j < n; ++j ) \ for ( i = 0; i < m; ++i ) \ { \ bli_cscal1rs( *(a), \ *(y_r + i*rs_y2 + j*cs_y2), \ *(y_i + i*rs_y2 + j*cs_y2) ); \ } \ } \ } #define bli_zscal1ms_mxn( schema, m, n, a, y, rs_y, cs_y, ld_y ) \ { \ dim_t i, j; \ \ /* Handle 1e and 1r separately. */ \ if ( bli_is_1e_packed( schema ) ) \ { \ dcomplex* restrict y_ri = y; \ dcomplex* restrict y_ir = y + ld_y/2; \ \ for ( j = 0; j < n; ++j ) \ for ( i = 0; i < m; ++i ) \ { \ bli_zscal1es( *(a), \ *(y_ri + i*rs_y + j*cs_y), \ *(y_ir + i*rs_y + j*cs_y) ); \ } \ } \ else /* if ( bli_is_1r_packed( schema ) ) */ \ { \ inc_t rs_y2 = rs_y; \ inc_t cs_y2 = cs_y; \ \ /* Scale the non-unit stride by two for the 1r loop, which steps in units of real (not complex) values. */ \ if ( rs_y2 == 1 ) { cs_y2 *= 2; } \ else /* if ( cs_y2 == 1 ) */ { rs_y2 *= 2; } \ \ double* restrict y_cast = ( double* )y; \ double* restrict y_r = y_cast; \ double* restrict y_i = y_cast + ld_y; \ \ for ( j = 0; j < n; ++j ) \ for ( i = 0; i < m; ++i ) \ { \ bli_zscal1rs( *(a), \ *(y_r + i*rs_y2 + j*cs_y2), \ *(y_i + i*rs_y2 + j*cs_y2) ); \ } \ } \ } #endif blis-0.6.1/frame/include/level0/1m/bli_scal21ms_mxn.h000066400000000000000000000134431360743507500222150ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SCAL21MS_MXN_H #define BLIS_SCAL21MS_MXN_H // scal21ms_mxn static void bli_cscal21ms_mxn ( const pack_t schema, const conj_t conjx, const dim_t m, const dim_t n, scomplex* restrict alpha, scomplex* restrict x, const inc_t rs_x, const inc_t cs_x, scomplex* restrict y, const inc_t rs_y, const inc_t cs_y, const inc_t ld_y ) { dim_t i, j; /* Handle 1e and 1r separately. */ if ( bli_is_1e_packed( schema ) ) { scomplex* restrict y_ri = y; scomplex* restrict y_ir = y + ld_y/2; if ( bli_is_conj( conjx ) ) { for ( j = 0; j < n; ++j ) for ( i = 0; i < m; ++i ) { bli_cscal2j1es( *(alpha), *(x + i*rs_x + j*cs_x), *(y_ri + i*rs_y + j*cs_y), *(y_ir + i*rs_y + j*cs_y) ); } } else /* if ( bli_is_noconj( conjx ) ) */ { for ( j = 0; j < n; ++j ) for ( i = 0; i < m; ++i ) { bli_cscal21es( *(alpha), *(x + i*rs_x + j*cs_x), *(y_ri + i*rs_y + j*cs_y), *(y_ir + i*rs_y + j*cs_y) ); } } } else /* if ( bli_is_1r_packed( schema ) ) */ { inc_t rs_y2 = rs_y; inc_t cs_y2 = cs_y; /* Scale the non-unit stride by two for the 1r loop, which steps in units of real (not complex) values. */ if ( rs_y2 == 1 ) { cs_y2 *= 2; } else /* if ( cs_y2 == 1 ) */ { rs_y2 *= 2; } float* restrict y_cast = ( float* )y; float* restrict y_r = y_cast; float* restrict y_i = y_cast + ld_y; if ( bli_is_conj( conjx ) ) { for ( j = 0; j < n; ++j ) for ( i = 0; i < m; ++i ) { bli_cscal2j1rs( *(alpha), *(x + i*rs_x + j*cs_x ), *(y_r + i*rs_y2 + j*cs_y2), *(y_i + i*rs_y2 + j*cs_y2) ); } } else /* if ( bli_is_noconj( conjx ) ) */ { for ( j = 0; j < n; ++j ) for ( i = 0; i < m; ++i ) { bli_cscal21rs( *(alpha), *(x + i*rs_x + j*cs_x ), *(y_r + i*rs_y2 + j*cs_y2), *(y_i + i*rs_y2 + j*cs_y2) ); } } } } static void bli_zscal21ms_mxn ( const pack_t schema, const conj_t conjx, const dim_t m, const dim_t n, dcomplex* restrict alpha, dcomplex* restrict x, const inc_t rs_x, const inc_t cs_x, dcomplex* restrict y, const inc_t rs_y, const inc_t cs_y, const inc_t ld_y ) { dim_t i, j; /* Handle 1e and 1r separately. */ if ( bli_is_1e_packed( schema ) ) { dcomplex* restrict y_ri = y; dcomplex* restrict y_ir = y + ld_y/2; if ( bli_is_conj( conjx ) ) { for ( j = 0; j < n; ++j ) for ( i = 0; i < m; ++i ) { bli_zscal2j1es( *(alpha), *(x + i*rs_x + j*cs_x), *(y_ri + i*rs_y + j*cs_y), *(y_ir + i*rs_y + j*cs_y) ); } } else /* if ( bli_is_noconj( conjx ) ) */ { for ( j = 0; j < n; ++j ) for ( i = 0; i < m; ++i ) { bli_zscal21es( *(alpha), *(x + i*rs_x + j*cs_x), *(y_ri + i*rs_y + j*cs_y), *(y_ir + i*rs_y + j*cs_y) ); } } } else /* if ( bli_is_1r_packed( schema ) ) */ { inc_t rs_y2 = rs_y; inc_t cs_y2 = cs_y; /* Scale the non-unit stride by two for the 1r loop, which steps in units of real (not complex) values. */ if ( rs_y2 == 1 ) { cs_y2 *= 2; } else /* if ( cs_y2 == 1 ) */ { rs_y2 *= 2; } double* restrict y_cast = ( double* )y; double* restrict y_r = y_cast; double* restrict y_i = y_cast + ld_y; if ( bli_is_conj( conjx ) ) { for ( j = 0; j < n; ++j ) for ( i = 0; i < m; ++i ) { bli_zscal2j1rs( *(alpha), *(x + i*rs_x + j*cs_x ), *(y_r + i*rs_y2 + j*cs_y2), *(y_i + i*rs_y2 + j*cs_y2) ); } } else /* if ( bli_is_noconj( conjx ) ) */ { for ( j = 0; j < n; ++j ) for ( i = 0; i < m; ++i ) { bli_zscal21rs( *(alpha), *(x + i*rs_x + j*cs_x ), *(y_r + i*rs_y2 + j*cs_y2), *(y_i + i*rs_y2 + j*cs_y2) ); } } } } #endif blis-0.6.1/frame/include/level0/1m/bli_scal21ms_mxn_diag.h000066400000000000000000000102701360743507500231740ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SCAL21MS_MXN_DIAG_H #define BLIS_SCAL21MS_MXN_DIAG_H // scal21ms_mxn_diag #define bli_cscscal21ms_mxn_diag( schema, m, n, a, x, rs_x, cs_x, y, rs_y, cs_y, ld_y ) \ { \ dim_t min_m_n = bli_min( m, n ); \ dim_t i; \ \ /* Handle 1e and 1r separately. */ \ if ( bli_is_1e_packed( schema ) ) \ { \ scomplex* restrict y_off_ri = y; \ scomplex* restrict y_off_ir = y + ld_y/2; \ \ for ( i = 0; i < min_m_n; ++i ) \ { \ bli_cscscal21es( *(a), \ *(x + i*rs_x + i*cs_x), \ *(y_off_ri + i*rs_y + i*cs_y), \ *(y_off_ir + i*rs_y + i*cs_y) ); \ } \ } \ else /* if ( bli_is_1r_packed( schema ) ) */ \ { \ inc_t rs_y2 = rs_y; \ inc_t cs_y2 = cs_y; \ \ /* Scale the non-unit stride by two for the 1r loop, which steps in units of real (not complex) values. */ \ if ( rs_y2 == 1 ) { cs_y2 *= 2; } \ else /* if ( cs_y2 == 1 ) */ { rs_y2 *= 2; } \ \ float* restrict y_cast = ( float* )y; \ float* restrict y_off_r = y_cast; \ float* restrict y_off_i = y_cast + ld_y; \ \ for ( i = 0; i < min_m_n; ++i ) \ { \ bli_cscscal21rs( *(a), \ *(x + i*rs_x + i*cs_x ), \ *(y_off_r + i*rs_y2 + i*cs_y2), \ *(y_off_i + i*rs_y2 + i*cs_y2) ); \ } \ } \ } #define bli_zdzscal21ms_mxn_diag( schema, m, n, a, x, rs_x, cs_x, y, rs_y, cs_y, ld_y ) \ { \ dim_t min_m_n = bli_min( m, n ); \ dim_t i; \ \ /* Handle 1e and 1r separately. */ \ if ( bli_is_1e_packed( schema ) ) \ { \ dcomplex* restrict y_off_ri = y; \ dcomplex* restrict y_off_ir = y + ld_y/2; \ \ for ( i = 0; i < min_m_n; ++i ) \ { \ bli_zdzscal21es( *(a), \ *(x + i*rs_x + i*cs_x), \ *(y_off_ri + i*rs_y + i*cs_y), \ *(y_off_ir + i*rs_y + i*cs_y) ); \ } \ } \ else /* if ( bli_is_1r_packed( schema ) ) */ \ { \ inc_t rs_y2 = rs_y; \ inc_t cs_y2 = cs_y; \ \ /* Scale the non-unit stride by two for the 1r loop, which steps in units of real (not complex) values. */ \ if ( rs_y2 == 1 ) { cs_y2 *= 2; } \ else /* if ( cs_y2 == 1 ) */ { rs_y2 *= 2; } \ \ double* restrict y_cast = ( double* )y; \ double* restrict y_off_r = y_cast; \ double* restrict y_off_i = y_cast + ld_y; \ \ for ( i = 0; i < min_m_n; ++i ) \ { \ bli_zdzscal21rs( *(a), \ *(x + i*rs_x + i*cs_x ), \ *(y_off_r + i*rs_y2 + i*cs_y2), \ *(y_off_i + i*rs_y2 + i*cs_y2) ); \ } \ } \ } #endif blis-0.6.1/frame/include/level0/1m/bli_scal21ms_mxn_uplo.h000066400000000000000000000210021360743507500232420ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SCAL21MS_MXN_UPLO_H #define BLIS_SCAL21MS_MXN_UPLO_H // scal21ms_mxn_uplo #define bli_cscal21ms_mxn_uplo( schema, uplo, conjx, m, a, x, rs_x, cs_x, y, rs_y, cs_y, ld_y ) \ { \ dim_t i, j; \ \ /* Handle 1e and 1r separately. */ \ if ( bli_is_1e_packed( schema ) ) \ { \ scomplex* restrict y_ri = y; \ scomplex* restrict y_ir = y + ld_y/2; \ \ if ( bli_is_lower( uplo ) ) \ { \ if ( bli_is_conj( conjx ) ) \ { \ for ( j = 0; j < m; ++j ) \ for ( i = j; i < m; ++i ) \ { \ bli_cscal2j1es( *(a), \ *(x + i*rs_x + j*cs_x), \ *(y_ri + i*rs_y + j*cs_y), \ *(y_ir + i*rs_y + j*cs_y) ); \ } \ } \ else /* if ( bli_is_noconj( conjx ) ) */ \ { \ for ( j = 0; j < m; ++j ) \ for ( i = j; i < m; ++i ) \ { \ bli_cscal21es( *(a), \ *(x + i*rs_x + j*cs_x), \ *(y_ri + i*rs_y + j*cs_y), \ *(y_ir + i*rs_y + j*cs_y) ); \ } \ } \ } \ else /* if ( bli_is_upper( uplo ) ) */ \ { \ if ( bli_is_conj( conjx ) ) \ { \ for ( j = 0; j < m; ++j ) \ for ( i = 0; i < j + 1; ++i ) \ { \ bli_cscal2j1es( *(a), \ *(x + i*rs_x + j*cs_x), \ *(y_ri + i*rs_y + j*cs_y), \ *(y_ir + i*rs_y + j*cs_y) ); \ } \ } \ else /* if ( bli_is_noconj( conjx ) ) */ \ { \ for ( j = 0; j < m; ++j ) \ for ( i = 0; i < j + 1; ++i ) \ { \ bli_cscal21es( *(a), \ *(x + i*rs_x + j*cs_x), \ *(y_ri + i*rs_y + j*cs_y), \ *(y_ir + i*rs_y + j*cs_y) ); \ } \ } \ } \ } \ else /* if ( bli_is_1r_packed( schema ) ) */ \ { \ inc_t rs_y2 = rs_y; \ inc_t cs_y2 = cs_y; \ \ /* Scale the non-unit stride by two for the 1r loop, which steps in units of real (not complex) values. */ \ if ( rs_y2 == 1 ) { cs_y2 *= 2; } \ else /* if ( cs_y2 == 1 ) */ { rs_y2 *= 2; } \ \ float* restrict y_cast = ( float* )y; \ float* restrict y_r = y_cast; \ float* restrict y_i = y_cast + ld_y; \ \ if ( bli_is_lower( uplo ) ) \ { \ if ( bli_is_conj( conjx ) ) \ { \ for ( j = 0; j < m; ++j ) \ for ( i = j; i < m; ++i ) \ { \ bli_cscal2j1rs( *(a), \ *(x + i*rs_x + j*cs_x ), \ *(y_r + i*rs_y2 + j*cs_y2), \ *(y_i + i*rs_y2 + j*cs_y2) ); \ } \ } \ else /* if ( bli_is_noconj( conjx ) ) */ \ { \ for ( j = 0; j < m; ++j ) \ for ( i = j; i < m; ++i ) \ { \ bli_cscal21rs( *(a), \ *(x + i*rs_x + j*cs_x ), \ *(y_r + i*rs_y2 + j*cs_y2), \ *(y_i + i*rs_y2 + j*cs_y2) ); \ } \ } \ } \ else /* if ( bli_is_upper( uplo ) ) */ \ { \ if ( bli_is_conj( conjx ) ) \ { \ for ( j = 0; j < m; ++j ) \ for ( i = 0; i < j + 1; ++i ) \ { \ bli_cscal2j1rs( *(a), \ *(x + i*rs_x + j*cs_x ), \ *(y_r + i*rs_y2 + j*cs_y2), \ *(y_i + i*rs_y2 + j*cs_y2) ); \ } \ } \ else /* if ( bli_is_noconj( conjx ) ) */ \ { \ for ( j = 0; j < m; ++j ) \ for ( i = 0; i < j + 1; ++i ) \ { \ bli_cscal21rs( *(a), \ *(x + i*rs_x + j*cs_x ), \ *(y_r + i*rs_y2 + j*cs_y2), \ *(y_i + i*rs_y2 + j*cs_y2) ); \ } \ } \ } \ } \ } #define bli_zscal21ms_mxn_uplo( schema, uplo, conjx, m, a, x, rs_x, cs_x, y, rs_y, cs_y, ld_y ) \ { \ dim_t i, j; \ \ /* Handle 1e and 1r separately. */ \ if ( bli_is_1e_packed( schema ) ) \ { \ dcomplex* restrict y_ri = y; \ dcomplex* restrict y_ir = y + ld_y/2; \ \ if ( bli_is_lower( uplo ) ) \ { \ if ( bli_is_conj( conjx ) ) \ { \ for ( j = 0; j < m; ++j ) \ for ( i = j; i < m; ++i ) \ { \ bli_zscal2j1es( *(a), \ *(x + i*rs_x + j*cs_x), \ *(y_ri + i*rs_y + j*cs_y), \ *(y_ir + i*rs_y + j*cs_y) ); \ } \ } \ else /* if ( bli_is_noconj( conjx ) ) */ \ { \ for ( j = 0; j < m; ++j ) \ for ( i = j; i < m; ++i ) \ { \ bli_zscal21es( *(a), \ *(x + i*rs_x + j*cs_x), \ *(y_ri + i*rs_y + j*cs_y), \ *(y_ir + i*rs_y + j*cs_y) ); \ } \ } \ } \ else /* if ( bli_is_upper( uplo ) ) */ \ { \ if ( bli_is_conj( conjx ) ) \ { \ for ( j = 0; j < m; ++j ) \ for ( i = 0; i < j + 1; ++i ) \ { \ bli_zscal2j1es( *(a), \ *(x + i*rs_x + j*cs_x), \ *(y_ri + i*rs_y + j*cs_y), \ *(y_ir + i*rs_y + j*cs_y) ); \ } \ } \ else /* if ( bli_is_noconj( conjx ) ) */ \ { \ for ( j = 0; j < m; ++j ) \ for ( i = 0; i < j + 1; ++i ) \ { \ bli_zscal21es( *(a), \ *(x + i*rs_x + j*cs_x), \ *(y_ri + i*rs_y + j*cs_y), \ *(y_ir + i*rs_y + j*cs_y) ); \ } \ } \ } \ } \ else /* if ( bli_is_1r_packed( schema ) ) */ \ { \ inc_t rs_y2 = rs_y; \ inc_t cs_y2 = cs_y; \ \ /* Scale the non-unit stride by two for the 1r loop, which steps in units of real (not complex) values. */ \ if ( rs_y2 == 1 ) { cs_y2 *= 2; } \ else /* if ( cs_y2 == 1 ) */ { rs_y2 *= 2; } \ \ double* restrict y_cast = ( double* )y; \ double* restrict y_r = y_cast; \ double* restrict y_i = y_cast + ld_y; \ \ if ( bli_is_lower( uplo ) ) \ { \ if ( bli_is_conj( conjx ) ) \ { \ for ( j = 0; j < m; ++j ) \ for ( i = j; i < m; ++i ) \ { \ bli_zscal2j1rs( *(a), \ *(x + i*rs_x + j*cs_x ), \ *(y_r + i*rs_y2 + j*cs_y2), \ *(y_i + i*rs_y2 + j*cs_y2) ); \ } \ } \ else /* if ( bli_is_noconj( conjx ) ) */ \ { \ for ( j = 0; j < m; ++j ) \ for ( i = j; i < m; ++i ) \ { \ bli_zscal21rs( *(a), \ *(x + i*rs_x + j*cs_x ), \ *(y_r + i*rs_y2 + j*cs_y2), \ *(y_i + i*rs_y2 + j*cs_y2) ); \ } \ } \ } \ else /* if ( bli_is_upper( uplo ) ) */ \ { \ if ( bli_is_conj( conjx ) ) \ { \ for ( j = 0; j < m; ++j ) \ for ( i = 0; i < j + 1; ++i ) \ { \ bli_zscal2j1rs( *(a), \ *(x + i*rs_x + j*cs_x ), \ *(y_r + i*rs_y2 + j*cs_y2), \ *(y_i + i*rs_y2 + j*cs_y2) ); \ } \ } \ else /* if ( bli_is_noconj( conjx ) ) */ \ { \ for ( j = 0; j < m; ++j ) \ for ( i = 0; i < j + 1; ++i ) \ { \ bli_zscal21rs( *(a), \ *(x + i*rs_x + j*cs_x ), \ *(y_r + i*rs_y2 + j*cs_y2), \ *(y_i + i*rs_y2 + j*cs_y2) ); \ } \ } \ } \ } \ } #endif blis-0.6.1/frame/include/level0/1m/bli_set1ms_mxn.h000066400000000000000000000145171360743507500220070ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SET1MS_MXN_H #define BLIS_SET1MS_MXN_H // set1ms_mxn #define bli_sset1ms_mxn( schema, offm, offn, m, n, a, y, rs_y, cs_y, ld_y ) \ { \ /* Include real domain version to facilitate macro-izing mixed-datatype components of packm. */ \ } #define bli_dset1ms_mxn( schema, offm, offn, m, n, a, y, rs_y, cs_y, ld_y ) \ { \ /* Include real domain version to facilitate macro-izing mixed-datatype components of packm. */ \ } static void bli_cset1ms_mxn ( const pack_t schema, const dim_t offm, const dim_t offn, const dim_t m, const dim_t n, scomplex* restrict alpha, scomplex* restrict y, const inc_t rs_y, const inc_t cs_y, const inc_t ld_y ) { inc_t offm_local = offm; inc_t offn_local = offn; dim_t m_local = m; dim_t n_local = n; inc_t rs_y1 = rs_y; inc_t cs_y1 = cs_y; inc_t rs_y2 = rs_y; inc_t cs_y2 = cs_y; dim_t i, j; /* Optimization: The loops walk through y with unit stride if y is column-stored. If y is row-stored, swap the dimensions and strides to preserve unit stride movement. */ if ( cs_y == 1 ) { bli_swap_incs( &offm_local, &offn_local ); bli_swap_dims( &m_local, &n_local ); bli_swap_incs( &rs_y1, &cs_y1 ); bli_swap_incs( &rs_y2, &cs_y2 ); } /* Handle 1e and 1r separately. */ if ( bli_is_1e_packed( schema ) ) { scomplex* restrict y_off_ri = y + (offm_local )*rs_y1 + (offn_local )*cs_y1; scomplex* restrict y_off_ir = y + (offm_local )*rs_y1 + (offn_local )*cs_y1 + ld_y/2; for ( j = 0; j < n_local; ++j ) for ( i = 0; i < m_local; ++i ) { bli_ccopy1es( *(alpha), *(y_off_ri + i*rs_y1 + j*cs_y1), *(y_off_ir + i*rs_y1 + j*cs_y1) ); } } else /* if ( bli_is_1r_packed( schema ) ) */ { /* Scale the non-unit stride by two for the 1r loop, which steps in units of real (not complex) values. */ if ( rs_y2 == 1 ) { cs_y2 *= 2; } else /* if ( cs_y2 == 1 ) */ { rs_y2 *= 2; } float* restrict y_cast = ( float* )y; float* restrict y_off_r = y_cast + (offm_local )*rs_y2 + (offn_local )*cs_y2; float* restrict y_off_i = y_cast + (offm_local )*rs_y2 + (offn_local )*cs_y2 + ld_y; for ( j = 0; j < n_local; ++j ) for ( i = 0; i < m_local; ++i ) { bli_ccopy1rs( *(alpha), *(y_off_r + i*rs_y2 + j*cs_y2), *(y_off_i + i*rs_y2 + j*cs_y2) ); } } } static void bli_zset1ms_mxn ( const pack_t schema, const dim_t offm, const dim_t offn, const dim_t m, const dim_t n, dcomplex* restrict alpha, dcomplex* restrict y, const inc_t rs_y, const inc_t cs_y, const inc_t ld_y ) { inc_t offm_local = offm; inc_t offn_local = offn; dim_t m_local = m; dim_t n_local = n; inc_t rs_y1 = rs_y; inc_t cs_y1 = cs_y; inc_t rs_y2 = rs_y; inc_t cs_y2 = cs_y; dim_t i, j; /* Optimization: The loops walk through y with unit stride if y is column-stored. If y is row-stored, swap the dimensions and strides to preserve unit stride movement. */ if ( cs_y == 1 ) { bli_swap_incs( &offm_local, &offn_local ); bli_swap_dims( &m_local, &n_local ); bli_swap_incs( &rs_y1, &cs_y1 ); bli_swap_incs( &rs_y2, &cs_y2 ); } /* Handle 1e and 1r separately. */ if ( bli_is_1e_packed( schema ) ) { dcomplex* restrict y_off_ri = y + (offm_local )*rs_y1 + (offn_local )*cs_y1; dcomplex* restrict y_off_ir = y + (offm_local )*rs_y1 + (offn_local )*cs_y1 + ld_y/2; for ( j = 0; j < n_local; ++j ) for ( i = 0; i < m_local; ++i ) { bli_zcopy1es( *(alpha), *(y_off_ri + i*rs_y1 + j*cs_y1), *(y_off_ir + i*rs_y1 + j*cs_y1) ); } } else /* if ( bli_is_1r_packed( schema ) ) */ { /* Scale the non-unit stride by two for the 1r loop, which steps in units of real (not complex) values. */ if ( rs_y2 == 1 ) { cs_y2 *= 2; } else /* if ( cs_y2 == 1 ) */ { rs_y2 *= 2; } double* restrict y_cast = ( double* )y; double* restrict y_off_r = y_cast + (offm_local )*rs_y2 + (offn_local )*cs_y2; double* restrict y_off_i = y_cast + (offm_local )*rs_y2 + (offn_local )*cs_y2 + ld_y; for ( j = 0; j < n_local; ++j ) for ( i = 0; i < m_local; ++i ) { bli_zcopy1rs( *(alpha), *(y_off_r + i*rs_y2 + j*cs_y2), *(y_off_i + i*rs_y2 + j*cs_y2) ); } } } #endif blis-0.6.1/frame/include/level0/1m/bli_set1ms_mxn_diag.h000066400000000000000000000107421360743507500227670ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SET1MS_MXN_DIAG_H #define BLIS_SET1MS_MXN_DIAG_H // set1ms_mxn_diag #define bli_cset1ms_mxn_diag( schema, offm, offn, m, n, a, y, rs_y, cs_y, ld_y ) \ { \ dim_t min_m_n = bli_min( m, n ); \ dim_t i; \ \ /* Handle 1e and 1r separately. */ \ if ( bli_is_1e_packed( schema ) ) \ { \ scomplex* restrict y_off_ri = y + (offm )*rs_y \ + (offn )*cs_y; \ scomplex* restrict y_off_ir = y + (offm )*rs_y \ + (offn )*cs_y + ld_y/2; \ \ for ( i = 0; i < min_m_n; ++i ) \ { \ bli_ccopy1es( *(a), \ *(y_off_ri + i*rs_y + i*cs_y), \ *(y_off_ir + i*rs_y + i*cs_y) ); \ } \ } \ else /* if ( bli_is_1r_packed( schema ) ) */ \ { \ inc_t rs_y2 = rs_y; \ inc_t cs_y2 = cs_y; \ \ /* Scale the non-unit stride by two for the 1r loop, which steps in units of real (not complex) values. */ \ if ( rs_y2 == 1 ) { cs_y2 *= 2; } \ else /* if ( cs_y2 == 1 ) */ { rs_y2 *= 2; } \ \ float* restrict y_cast = ( float* )y; \ float* restrict y_off_r = y_cast + (offm )*rs_y2 \ + (offn )*cs_y2; \ float* restrict y_off_i = y_cast + (offm )*rs_y2 \ + (offn )*cs_y2 + ld_y; \ \ for ( i = 0; i < min_m_n; ++i ) \ { \ bli_ccopy1rs( *(a), \ *(y_off_r + i*rs_y2 + i*cs_y2), \ *(y_off_i + i*rs_y2 + i*cs_y2) ); \ } \ } \ } #define bli_zset1ms_mxn_diag( schema, offm, offn, m, n, a, y, rs_y, cs_y, ld_y ) \ { \ dim_t min_m_n = bli_min( m, n ); \ dim_t i; \ \ /* Handle 1e and 1r separately. */ \ if ( bli_is_1e_packed( schema ) ) \ { \ dcomplex* restrict y_off_ri = y + (offm )*rs_y \ + (offn )*cs_y; \ dcomplex* restrict y_off_ir = y + (offm )*rs_y \ + (offn )*cs_y + ld_y/2; \ \ for ( i = 0; i < min_m_n; ++i ) \ { \ bli_zcopy1es( *(a), \ *(y_off_ri + i*rs_y + i*cs_y), \ *(y_off_ir + i*rs_y + i*cs_y) ); \ } \ } \ else /* if ( bli_is_1r_packed( schema ) ) */ \ { \ inc_t rs_y2 = rs_y; \ inc_t cs_y2 = cs_y; \ \ /* Scale the non-unit stride by two for the 1r loop, which steps in units of real (not complex) values. */ \ if ( rs_y2 == 1 ) { cs_y2 *= 2; } \ else /* if ( cs_y2 == 1 ) */ { rs_y2 *= 2; } \ \ double* restrict y_cast = ( double* )y; \ double* restrict y_off_r = y_cast + (offm )*rs_y2 \ + (offn )*cs_y2; \ double* restrict y_off_i = y_cast + (offm )*rs_y2 \ + (offn )*cs_y2 + ld_y; \ \ for ( i = 0; i < min_m_n; ++i ) \ { \ bli_zcopy1rs( *(a), \ *(y_off_r + i*rs_y2 + i*cs_y2), \ *(y_off_i + i*rs_y2 + i*cs_y2) ); \ } \ } \ } #endif blis-0.6.1/frame/include/level0/1m/bli_set1ms_mxn_uplo.h000066400000000000000000000137641360743507500230510ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SET1MS_MXN_UPLO_H #define BLIS_SET1MS_MXN_UPLO_H // set1ms_mxn_uplo #define bli_cset1ms_mxn_uplo( schema, diagoff, uplo, m, n, a, y, rs_y, cs_y, ld_y ) \ { \ doff_t diagoff_abs = bli_abs( diagoff ); \ inc_t offdiag_inc; \ dim_t i, j; \ \ /* Handle 1e and 1r separately. */ \ if ( bli_is_1e_packed( schema ) ) \ { \ /* Set the off-diagonal increment. */ \ if ( diagoff > 0 ) offdiag_inc = cs_y; \ else /* if ( diagoff < 0 ) */ offdiag_inc = rs_y; \ \ scomplex* restrict y0 = y + (diagoff_abs )*offdiag_inc; \ scomplex* restrict y_ri = y0; \ scomplex* restrict y_ir = y0 + ld_y/2; \ \ if ( bli_is_lower( uplo ) ) \ { \ for ( j = 0; j < n; ++j ) \ for ( i = j; i < m; ++i ) \ { \ bli_ccopy1es( *(a), \ *(y_ri + i*rs_y + j*cs_y), \ *(y_ir + i*rs_y + j*cs_y) ); \ } \ } \ else /* if ( bli_is_upper( uplo ) ) */ \ { \ for ( j = 0; j < n; ++j ) \ for ( i = 0; i < j + 1; ++i ) \ { \ bli_ccopy1es( *(a), \ *(y_ri + i*rs_y + j*cs_y), \ *(y_ir + i*rs_y + j*cs_y) ); \ } \ } \ } \ else /* if ( bli_is_1r_packed( schema ) ) */ \ { \ inc_t rs_y2 = rs_y; \ inc_t cs_y2 = cs_y; \ \ /* Scale the non-unit stride by two for the 1r loop, which steps in units of real (not complex) values. */ \ if ( rs_y2 == 1 ) { cs_y2 *= 2; } \ else /* if ( cs_y2 == 1 ) */ { rs_y2 *= 2; } \ \ /* Set the off-diagonal increment. */ \ if ( diagoff > 0 ) offdiag_inc = cs_y2; \ else /* if ( diagoff < 0 ) */ offdiag_inc = rs_y2; \ \ float* restrict y0 = ( float* )y + (diagoff_abs )*offdiag_inc; \ float* restrict y_r = y0; \ float* restrict y_i = y0 + ld_y; \ \ if ( bli_is_lower( uplo ) ) \ { \ for ( j = 0; j < n; ++j ) \ for ( i = j; i < m; ++i ) \ { \ bli_ccopy1rs( *(a), \ *(y_r + i*rs_y2 + j*cs_y2), \ *(y_i + i*rs_y2 + j*cs_y2) ); \ } \ } \ else /* if ( bli_is_upper( uplo ) ) */ \ { \ for ( j = 0; j < n; ++j ) \ for ( i = 0; i < j + 1; ++i ) \ { \ bli_ccopy1rs( *(a), \ *(y_r + i*rs_y2 + j*cs_y2), \ *(y_i + i*rs_y2 + j*cs_y2) ); \ } \ } \ } \ } #define bli_zset1ms_mxn_uplo( schema, diagoff, uplo, m, n, a, y, rs_y, cs_y, ld_y ) \ { \ doff_t diagoff_abs = bli_abs( diagoff ); \ inc_t offdiag_inc; \ dim_t i, j; \ \ /* Handle 1e and 1r separately. */ \ if ( bli_is_1e_packed( schema ) ) \ { \ /* Set the off-diagonal increment. */ \ if ( diagoff > 0 ) offdiag_inc = cs_y; \ else /* if ( diagoff < 0 ) */ offdiag_inc = rs_y; \ \ dcomplex* restrict y0 = y + (diagoff_abs )*offdiag_inc; \ dcomplex* restrict y_ri = y0; \ dcomplex* restrict y_ir = y0 + ld_y/2; \ \ if ( bli_is_lower( uplo ) ) \ { \ for ( j = 0; j < n; ++j ) \ for ( i = j; i < m; ++i ) \ { \ bli_zcopy1es( *(a), \ *(y_ri + i*rs_y + j*cs_y), \ *(y_ir + i*rs_y + j*cs_y) ); \ } \ } \ else /* if ( bli_is_upper( uplo ) ) */ \ { \ for ( j = 0; j < n; ++j ) \ for ( i = 0; i < j + 1; ++i ) \ { \ bli_zcopy1es( *(a), \ *(y_ri + i*rs_y + j*cs_y), \ *(y_ir + i*rs_y + j*cs_y) ); \ } \ } \ } \ else /* if ( bli_is_1r_packed( schema ) ) */ \ { \ inc_t rs_y2 = rs_y; \ inc_t cs_y2 = cs_y; \ \ /* Scale the non-unit stride by two for the 1r loop, which steps in units of real (not complex) values. */ \ if ( rs_y2 == 1 ) { cs_y2 *= 2; } \ else /* if ( cs_y2 == 1 ) */ { rs_y2 *= 2; } \ \ /* Set the off-diagonal increment. */ \ if ( diagoff > 0 ) offdiag_inc = cs_y2; \ else /* if ( diagoff < 0 ) */ offdiag_inc = rs_y2; \ \ double* restrict y0 = ( double* )y + (diagoff_abs )*offdiag_inc; \ double* restrict y_r = y0; \ double* restrict y_i = y0 + ld_y; \ \ if ( bli_is_lower( uplo ) ) \ { \ for ( j = 0; j < n; ++j ) \ for ( i = j; i < m; ++i ) \ { \ bli_zcopy1rs( *(a), \ *(y_r + i*rs_y2 + j*cs_y2), \ *(y_i + i*rs_y2 + j*cs_y2) ); \ } \ } \ else /* if ( bli_is_upper( uplo ) ) */ \ { \ for ( j = 0; j < n; ++j ) \ for ( i = 0; i < j + 1; ++i ) \ { \ bli_zcopy1rs( *(a), \ *(y_r + i*rs_y2 + j*cs_y2), \ *(y_i + i*rs_y2 + j*cs_y2) ); \ } \ } \ } \ } #endif blis-0.6.1/frame/include/level0/1m/bli_seti01ms_mxn_diag.h000066400000000000000000000071661360743507500232260ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SETI01MS_MXN_DIAG_H #define BLIS_SETI01MS_MXN_DIAG_H // seti01ms_mxn_diag #define bli_cseti01ms_mxn_diag( schema, m, n, y, rs_y, cs_y, ld_y ) \ { \ dim_t min_m_n = bli_min( m, n ); \ dim_t i; \ \ /* Handle 1e and 1r separately. */ \ if ( bli_is_1e_packed( schema ) ) \ { \ scomplex* restrict y_off_ri = y; \ scomplex* restrict y_off_ir = y + ld_y/2; \ \ for ( i = 0; i < min_m_n; ++i ) \ { \ bli_cseti0s( *(y_off_ri + i*rs_y + i*cs_y) ); \ bli_csetr0s( *(y_off_ir + i*rs_y + i*cs_y) ); \ } \ } \ else /* if ( bli_is_1r_packed( schema ) ) */ \ { \ inc_t rs_y2 = rs_y; \ inc_t cs_y2 = cs_y; \ \ /* Scale the non-unit stride by two for the 1r loop, which steps in units of real (not complex) values. */ \ if ( rs_y2 == 1 ) { cs_y2 *= 2; } \ else /* if ( cs_y2 == 1 ) */ { rs_y2 *= 2; } \ \ float* restrict y_cast = ( float* )y; \ float* restrict y_off_i = y_cast + ld_y; \ \ for ( i = 0; i < min_m_n; ++i ) \ { \ bli_sset0s( *(y_off_i + i*rs_y2 + i*cs_y2) ); \ } \ } \ } #define bli_zseti01ms_mxn_diag( schema, m, n, y, rs_y, cs_y, ld_y ) \ { \ dim_t min_m_n = bli_min( m, n ); \ dim_t i; \ \ /* Handle 1e and 1r separately. */ \ if ( bli_is_1e_packed( schema ) ) \ { \ dcomplex* restrict y_off_ri = y; \ dcomplex* restrict y_off_ir = y + ld_y/2; \ \ for ( i = 0; i < min_m_n; ++i ) \ { \ bli_zseti0s( *(y_off_ri + i*rs_y + i*cs_y) ); \ bli_zsetr0s( *(y_off_ir + i*rs_y + i*cs_y) ); \ } \ } \ else /* if ( bli_is_1r_packed( schema ) ) */ \ { \ inc_t rs_y2 = rs_y; \ inc_t cs_y2 = cs_y; \ \ /* Scale the non-unit stride by two for the 1r loop, which steps in units of real (not complex) values. */ \ if ( rs_y2 == 1 ) { cs_y2 *= 2; } \ else /* if ( cs_y2 == 1 ) */ { rs_y2 *= 2; } \ \ double* restrict y_cast = ( double* )y; \ double* restrict y_off_i = y_cast + ld_y; \ \ for ( i = 0; i < min_m_n; ++i ) \ { \ bli_dset0s( *(y_off_i + i*rs_y2 + i*cs_y2) ); \ } \ } \ } #endif blis-0.6.1/frame/include/level0/1r/000077500000000000000000000000001360743507500167075ustar00rootroot00000000000000blis-0.6.1/frame/include/level0/1r/bli_copy1rs.h000066400000000000000000000036201360743507500213070ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_COPY1RS_H #define BLIS_COPY1RS_H // copy1rs #define bli_ccopy1rs( a, br, bi ) \ { \ bli_ccopyris( bli_creal(a), bli_cimag(a), br, bi ); \ } #define bli_zcopy1rs( a, br, bi ) \ { \ bli_zcopyris( bli_zreal(a), bli_zimag(a), br, bi ); \ } #endif blis-0.6.1/frame/include/level0/1r/bli_copyj1rs.h000066400000000000000000000036271360743507500214700ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_COPYJ1RS_H #define BLIS_COPYJ1RS_H // copyj1rs #define bli_ccopyj1rs( a, br, bi ) \ { \ bli_ccopyjris( bli_creal(a), bli_cimag(a), br, bi ); \ } #define bli_zcopyj1rs( a, br, bi ) \ { \ bli_zcopyjris( bli_zreal(a), bli_zimag(a), br, bi ); \ } #endif blis-0.6.1/frame/include/level0/1r/bli_invert1rs.h000066400000000000000000000035101360743507500216420ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_INVERT1RS_H #define BLIS_INVERT1RS_H // invert1rs #define bli_cinvert1rs( xr, xi ) bli_cinvertris( xr, xi ) #define bli_zinvert1rs( xr, xi ) bli_zinvertris( xr, xi ) #endif blis-0.6.1/frame/include/level0/1r/bli_scal1rs.h000066400000000000000000000041301360743507500212540ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SCAL1RS_H #define BLIS_SCAL1RS_H // scal1rs #define bli_cscal1rs( a, yr, yi ) \ { \ bli_cscalris( bli_creal(a), bli_cimag(a), yr, yi ); \ } #define bli_zscal1rs( a, yr, yi ) \ { \ bli_zscalris( bli_zreal(a), bli_zimag(a), yr, yi ); \ } #define bli_scscal1rs( a, yr, yi ) \ { \ bli_scscalris( bli_sreal(a), bli_simag(a), yr, yi ); \ } #define bli_dzscal1rs( a, yr, yi ) \ { \ bli_dzscalris( bli_dreal(a), bli_dimag(a), yr, yi ); \ } #endif blis-0.6.1/frame/include/level0/1r/bli_scal21rs.h000066400000000000000000000045651360743507500213520ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SCAL21RS_H #define BLIS_SCAL21RS_H // scal21rs #define bli_cscscal21rs( a, x, yr, yi ) \ { \ bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), yr, yi ); \ } #define bli_cccscal21rs( a, x, yr, yi ) \ { \ bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), yr, yi ); \ } #define bli_zdzscal21rs( a, x, yr, yi ) \ { \ bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), yr, yi ); \ } #define bli_zzzscal21rs( a, x, yr, yi ) \ { \ bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), yr, yi ); \ } #define bli_cscal21rs( a, x, yr, yi ) bli_cccscal21rs( a, x, yr, yi ) #define bli_zscal21rs( a, x, yr, yi ) bli_zzzscal21rs( a, x, yr, yi ) #endif blis-0.6.1/frame/include/level0/1r/bli_scal2j1rs.h000066400000000000000000000046001360743507500215120ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SCAL2J1RS_H #define BLIS_SCAL2J1RS_H // scal2j1rs #define bli_cscscal2j1rs( a, x, yr, yi ) \ { \ bli_cscal2jris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), yr, yi ); \ } #define bli_cccscal2j1rs( a, x, yr, yi ) \ { \ bli_cscal2jris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), yr, yi ); \ } #define bli_zdzscal2j1rs( a, x, yr, yi ) \ { \ bli_zscal2jris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), yr, yi ); \ } #define bli_zzzscal2j1rs( a, x, yr, yi ) \ { \ bli_zscal2jris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), yr, yi ); \ } #define bli_cscal2j1rs( a, x, yr, yi ) bli_cccscal2j1rs( a, x, yr, yi ) #define bli_zscal2j1rs( a, x, yr, yi ) bli_zzzscal2j1rs( a, x, yr, yi ) #endif blis-0.6.1/frame/include/level0/bb/000077500000000000000000000000001360743507500167505ustar00rootroot00000000000000blis-0.6.1/frame/include/level0/bb/bli_bcastbbs_mxn.h000066400000000000000000000046531360743507500224240ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_BCASTBBS_MXN_H #define BLIS_BCASTBBS_MXN_H // bcastbbs_mxn #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ static void PASTEMAC(ch,opname) \ ( \ const dim_t m, \ const dim_t n, \ ctype* restrict y, const inc_t incy, const inc_t ldy \ ) \ { \ /* Assume that the duplication factor is the column stride of y. */ \ const dim_t d = ldy; \ const dim_t ds_y = 1; \ \ for ( dim_t i = 0; i < m; ++i ) \ { \ ctype* restrict yi = y + i*incy; \ \ for ( dim_t j = 0; j < n; ++j ) \ { \ ctype* restrict yij = yi + j*ldy; \ \ for ( dim_t p = 1; p < d; ++p ) \ { \ ctype* restrict yijd = yij + p*ds_y; \ \ PASTEMAC(ch,copys)( *yij, *yijd ); \ } \ } \ } \ } INSERT_GENTFUNC_BASIC0( bcastbbs_mxn ) #endif blis-0.6.1/frame/include/level0/bb/bli_scal2bbs_mxn.h000066400000000000000000000142731360743507500223330ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SCAL2BBS_MXN_H #define BLIS_SCAL2BBS_MXN_H // scal2bbs_mxn #undef GENTFUNCRO #define GENTFUNCRO( ctype, ch, opname ) \ \ static void PASTEMAC(ch,opname) \ ( \ const conj_t conjx, \ const dim_t m, \ const dim_t n, \ ctype* restrict alpha, \ ctype* restrict x, const inc_t incx, const inc_t ldx, \ ctype* restrict y, const inc_t incy, const inc_t ldy \ ) \ { \ /* Assume that the duplication factor is the row stride of y. */ \ const dim_t d = incy; \ const dim_t ds_y = 1; \ \ if ( bli_is_conj( conjx ) ) \ { \ for ( dim_t j = 0; j < n; ++j ) \ { \ ctype* restrict xj = x + j*ldx; \ ctype* restrict yj = y + j*ldy; \ \ for ( dim_t i = 0; i < m; ++i ) \ { \ ctype* restrict xij = xj + i*incx; \ ctype* restrict yij = yj + i*incy; \ \ PASTEMAC(ch,scal2js)( *alpha, *xij, *yij ); \ \ for ( dim_t p = 1; p < d; ++p ) \ { \ ctype* restrict yijd = yij + p*ds_y; \ \ PASTEMAC(ch,copys)( *yij, *yijd ); \ } \ } \ } \ } \ else /* if ( bli_is_noconj( conjx ) ) */ \ { \ for ( dim_t j = 0; j < n; ++j ) \ { \ ctype* restrict xj = x + j*ldx; \ ctype* restrict yj = y + j*ldy; \ \ for ( dim_t i = 0; i < m; ++i ) \ { \ ctype* restrict xij = xj + i*incx; \ ctype* restrict yij = yj + i*incy; \ \ PASTEMAC(ch,scal2s)( *alpha, *xij, *yij ); \ \ for ( dim_t p = 1; p < d; ++p ) \ { \ ctype* restrict yijd = yij + p*ds_y; \ \ PASTEMAC(ch,copys)( *yij, *yijd ); \ } \ } \ } \ } \ } INSERT_GENTFUNCRO_BASIC0( scal2bbs_mxn ) #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, opname ) \ \ static void PASTEMAC(ch,opname) \ ( \ const conj_t conjx, \ const dim_t m, \ const dim_t n, \ ctype* restrict alpha, \ ctype* restrict x, const inc_t incx, const inc_t ldx, \ ctype* restrict y, const inc_t incy, const inc_t ldy \ ) \ { \ /* Assume that the duplication factor is the row stride of y. */ \ const dim_t d = incy; \ const dim_t ds_y = 1; \ \ const inc_t incx2 = 2 * incx; \ const inc_t ldx2 = 2 * ldx; \ \ const inc_t incy2 = 2 * incy; \ const inc_t ldy2 = 2 * ldy; \ \ ctype_r* restrict alpha_r = ( ctype_r* )alpha; \ ctype_r* restrict alpha_i = ( ctype_r* )alpha + 1; \ ctype_r* restrict chi_r = ( ctype_r* )x; \ ctype_r* restrict chi_i = ( ctype_r* )x + 1; \ ctype_r* restrict psi_r = ( ctype_r* )y; \ ctype_r* restrict psi_i = ( ctype_r* )y + 1*d; \ \ if ( bli_is_conj( conjx ) ) \ { \ for ( dim_t j = 0; j < n; ++j ) \ { \ ctype_r* restrict chij_r = chi_r + j*ldx2; \ ctype_r* restrict chij_i = chi_i + j*ldx2; \ ctype_r* restrict psij_r = psi_r + j*ldy2; \ ctype_r* restrict psij_i = psi_i + j*ldy2; \ \ for ( dim_t i = 0; i < m; ++i ) \ { \ ctype_r* restrict chiij_r = chij_r + i*incx2; \ ctype_r* restrict chiij_i = chij_i + i*incx2; \ ctype_r* restrict psiij_r = psij_r + i*incy2; \ ctype_r* restrict psiij_i = psij_i + i*incy2; \ \ PASTEMAC(ch,scal2jris)( *alpha_r, *alpha_i, \ *chiij_r, *chiij_i, \ *psiij_r, *psiij_i ); \ \ for ( dim_t p = 1; p < d; ++p ) \ { \ ctype_r* restrict psiijd_r = psiij_r + p*ds_y; \ ctype_r* restrict psiijd_i = psiij_i + p*ds_y; \ \ PASTEMAC(ch,copyris)( *psiij_r, *psiij_i, \ *psiijd_r, *psiijd_i ); \ } \ } \ } \ } \ else /* if ( bli_is_noconj( conjx ) ) */ \ { \ for ( dim_t j = 0; j < n; ++j ) \ { \ ctype_r* restrict chij_r = chi_r + j*ldx2; \ ctype_r* restrict chij_i = chi_i + j*ldx2; \ ctype_r* restrict psij_r = psi_r + j*ldy2; \ ctype_r* restrict psij_i = psi_i + j*ldy2; \ \ for ( dim_t i = 0; i < m; ++i ) \ { \ ctype_r* restrict chiij_r = chij_r + i*incx2; \ ctype_r* restrict chiij_i = chij_i + i*incx2; \ ctype_r* restrict psiij_r = psij_r + i*incy2; \ ctype_r* restrict psiij_i = psij_i + i*incy2; \ \ PASTEMAC(ch,scal2ris)( *alpha_r, *alpha_i, \ *chiij_r, *chiij_i, \ *psiij_r, *psiij_i ); \ \ for ( dim_t p = 1; p < d; ++p ) \ { \ ctype_r* restrict psiijd_r = psiij_r + p*ds_y; \ ctype_r* restrict psiijd_i = psiij_i + p*ds_y; \ \ PASTEMAC(ch,copyris)( *psiij_r, *psiij_i, \ *psiijd_r, *psiijd_i ); \ } \ } \ } \ } \ } INSERT_GENTFUNCCO_BASIC0( scal2bbs_mxn ) #endif blis-0.6.1/frame/include/level0/bb/bli_set0bbs_mxn.h000066400000000000000000000046371360743507500222050ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SET0BBS_MXN_H #define BLIS_SET0BBS_MXN_H // set0bbs_mxn #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ static void PASTEMAC(ch,opname) \ ( \ const dim_t m, \ const dim_t n, \ ctype* restrict y, const inc_t incy, const inc_t ldy \ ) \ { \ /* Assume that the duplication factor is the row stride of y. */ \ const dim_t d = incy; \ const dim_t ds_y = 1; \ \ for ( dim_t j = 0; j < n; ++j ) \ { \ ctype* restrict yj = y + j*ldy; \ \ for ( dim_t i = 0; i < m; ++i ) \ { \ ctype* restrict yij = yj + i*incy; \ \ for ( dim_t p = 0; p < d; ++p ) \ { \ ctype* restrict yijd = yij + p*ds_y; \ \ PASTEMAC(ch,set0s)( *yijd ); \ } \ } \ } \ } INSERT_GENTFUNC_BASIC0( set0bbs_mxn ) #endif blis-0.6.1/frame/include/level0/bli_absq2s.h000066400000000000000000000115721360743507500205650ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_ABSQR2_H #define BLIS_ABSQR2_H // absq2s // Notes: // - The first char encodes the type of x. // - The second char encodes the type of a. #define bli_ssabsq2s( x, a ) bli_sabsq2ris( bli_sreal(x), bli_simag(x), bli_sreal(a), 0.0F ) #define bli_dsabsq2s( x, a ) bli_dabsq2ris( bli_dreal(x), bli_dimag(x), bli_sreal(a), 0.0F ) #define bli_csabsq2s( x, a ) { float ti; bli_cabsq2ris( bli_creal(x), bli_cimag(x), bli_sreal(a), ti ); ( void )ti; } #define bli_zsabsq2s( x, a ) { float ti; bli_zabsq2ris( bli_zreal(x), bli_zimag(x), bli_sreal(a), ti ); ( void )ti; } #define bli_sdabsq2s( x, a ) bli_sabsq2ris( bli_sreal(x), bli_simag(x), bli_dreal(a), 0.0 ) #define bli_ddabsq2s( x, a ) bli_dabsq2ris( bli_dreal(x), bli_dimag(x), bli_dreal(a), 0.0 ) #define bli_cdabsq2s( x, a ) { double ti; bli_cabsq2ris( bli_creal(x), bli_cimag(x), bli_dreal(a), ti ); ( void )ti; } #define bli_zdabsq2s( x, a ) { double ti; bli_zabsq2ris( bli_zreal(x), bli_zimag(x), bli_dreal(a), ti ); ( void )ti; } #ifndef BLIS_ENABLE_C99_COMPLEX #define bli_scabsq2s( x, a ) bli_sabsq2ris( bli_sreal(x), bli_simag(x), bli_creal(a), bli_cimag(a) ) #define bli_dcabsq2s( x, a ) bli_dabsq2ris( bli_dreal(x), bli_dimag(x), bli_creal(a), bli_cimag(a) ) #define bli_ccabsq2s( x, a ) bli_cabsq2ris( bli_creal(x), bli_cimag(x), bli_creal(a), bli_cimag(a) ) #define bli_zcabsq2s( x, a ) bli_zabsq2ris( bli_zreal(x), bli_zimag(x), bli_creal(a), bli_cimag(a) ) #define bli_szabsq2s( x, a ) bli_sabsq2ris( bli_sreal(x), bli_simag(x), bli_zreal(a), bli_zimag(a) ) #define bli_dzabsq2s( x, a ) bli_dabsq2ris( bli_dreal(x), bli_dimag(x), bli_zreal(a), bli_zimag(a) ) #define bli_czabsq2s( x, a ) bli_cabsq2ris( bli_creal(x), bli_cimag(x), bli_zreal(a), bli_zimag(a) ) #define bli_zzabsq2s( x, a ) bli_zabsq2ris( bli_zreal(x), bli_zimag(x), bli_zreal(a), bli_zimag(a) ) #else // ifdef BLIS_ENABLE_C99_COMPLEX #define bli_scabsq2s( x, a ) bli_scsets( (x) * (x), 0.0, (a) ) #define bli_dcabsq2s( x, a ) bli_dcsets( (x) * (x), 0.0, (a) ) #define bli_ccabsq2s( x, a ) bli_ccsets( bli_creal(x) * bli_creal(x) + \ bli_cimag(x) * bli_cimag(x), 0.0, (a) ) #define bli_zcabsq2s( x, a ) bli_zcsets( bli_zreal(x) * bli_zreal(x) + \ bli_zimag(x) * bli_zimag(x), 0.0, (a) ) #define bli_szabsq2s( x, a ) bli_szsets( (x) * (x), 0.0, (a) ) #define bli_dzabsq2s( x, a ) bli_dzsets( (x) * (x), 0.0, (a) ) #define bli_czabsq2s( x, a ) bli_czsets( bli_creal(x) * bli_creal(x) + \ bli_cimag(x) * bli_cimag(x), 0.0, (a) ) #define bli_zzabsq2s( x, a ) bli_zzsets( bli_zreal(x) * bli_zreal(x) + \ bli_zimag(x) * bli_zimag(x), 0.0, (a) ) #endif // BLIS_ENABLE_C99_COMPLEX #define bli_sabsq2s( x, a ) bli_ssabsq2s( x, a ) #define bli_dabsq2s( x, a ) bli_ddabsq2s( x, a ) #define bli_cabsq2s( x, a ) bli_ccabsq2s( x, a ) #define bli_zabsq2s( x, a ) bli_zzabsq2s( x, a ) #endif blis-0.6.1/frame/include/level0/bli_abval2s.h000066400000000000000000000117601360743507500207230ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_ABVAL2S_H #define BLIS_ABVAL2S_H // abval2s // Notes: // - The first char encodes the type of x. // - The second char encodes the type of a. #ifndef BLIS_ENABLE_C99_COMPLEX #define bli_ssabval2s( x, a ) bli_sabval2ris( bli_sreal(x), bli_simag(x), bli_sreal(a), 0.0F ) #define bli_dsabval2s( x, a ) bli_dabval2ris( bli_dreal(x), bli_dimag(x), bli_sreal(a), 0.0F ) #define bli_csabval2s( x, a ) { float ti; bli_cabval2ris( bli_creal(x), bli_cimag(x), bli_sreal(a), ti ); ( void )ti; } #define bli_zsabval2s( x, a ) { float ti; bli_zabval2ris( bli_zreal(x), bli_zimag(x), bli_sreal(a), ti ); ( void )ti; } #define bli_sdabval2s( x, a ) bli_sabval2ris( bli_sreal(x), bli_simag(x), bli_dreal(a), 0.0 ) #define bli_ddabval2s( x, a ) bli_dabval2ris( bli_dreal(x), bli_dimag(x), bli_dreal(a), 0.0 ) #define bli_cdabval2s( x, a ) { double ti; bli_cabval2ris( bli_creal(x), bli_cimag(x), bli_dreal(a), ti ); ( void )ti; } #define bli_zdabval2s( x, a ) { double ti; bli_zabval2ris( bli_zreal(x), bli_zimag(x), bli_dreal(a), ti ); ( void )ti; } #define bli_scabval2s( x, a ) bli_sabval2ris( bli_sreal(x), bli_simag(x), bli_creal(a), bli_cimag(a) ) #define bli_dcabval2s( x, a ) bli_dabval2ris( bli_dreal(x), bli_dimag(x), bli_creal(a), bli_cimag(a) ) #define bli_ccabval2s( x, a ) bli_cabval2ris( bli_creal(x), bli_cimag(x), bli_creal(a), bli_cimag(a) ) #define bli_zcabval2s( x, a ) bli_zabval2ris( bli_zreal(x), bli_zimag(x), bli_creal(a), bli_cimag(a) ) #define bli_szabval2s( x, a ) bli_sabval2ris( bli_sreal(x), bli_simag(x), bli_zreal(a), bli_zimag(a) ) #define bli_dzabval2s( x, a ) bli_dabval2ris( bli_dreal(x), bli_dimag(x), bli_zreal(a), bli_zimag(a) ) #define bli_czabval2s( x, a ) bli_cabval2ris( bli_creal(x), bli_cimag(x), bli_zreal(a), bli_zimag(a) ) #define bli_zzabval2s( x, a ) bli_zabval2ris( bli_zreal(x), bli_zimag(x), bli_zreal(a), bli_zimag(a) ) #else // ifdef BLIS_ENABLE_C99_COMPLEX #define bli_ssabval2s( x, a ) bli_sssets( fabsf(x), 0.0, (a) ) #define bli_dsabval2s( x, a ) bli_dssets( fabs (x), 0.0, (a) ) #define bli_csabval2s( x, a ) bli_cssets( cabsf(x), 0.0, (a) ) #define bli_zsabval2s( x, a ) bli_zssets( cabs (x), 0.0, (a) ) #define bli_sdabval2s( x, a ) bli_sdsets( fabsf(x), 0.0, (a) ) #define bli_ddabval2s( x, a ) bli_ddsets( fabs (x), 0.0, (a) ) #define bli_cdabval2s( x, a ) bli_cdsets( cabsf(x), 0.0, (a) ) #define bli_zdabval2s( x, a ) bli_zdsets( cabs (x), 0.0, (a) ) #define bli_scabval2s( x, a ) bli_scsets( fabsf(x), 0.0, (a) ) #define bli_dcabval2s( x, a ) bli_dcsets( fabs (x), 0.0, (a) ) #define bli_ccabval2s( x, a ) bli_ccsets( cabsf(x), 0.0, (a) ) #define bli_zcabval2s( x, a ) bli_zcsets( cabs (x), 0.0, (a) ) #define bli_szabval2s( x, a ) bli_szsets( fabsf(x), 0.0, (a) ) #define bli_dzabval2s( x, a ) bli_dzsets( fabs (x), 0.0, (a) ) #define bli_czabval2s( x, a ) bli_czsets( cabsf(x), 0.0, (a) ) #define bli_zzabval2s( x, a ) bli_zzsets( cabs (x), 0.0, (a) ) #endif // BLIS_ENABLE_C99_COMPLEX #define bli_sabval2s( x, a ) bli_ssabval2s( x, a ) #define bli_dabval2s( x, a ) bli_ddabval2s( x, a ) #define bli_cabval2s( x, a ) bli_ccabval2s( x, a ) #define bli_zabval2s( x, a ) bli_zzabval2s( x, a ) #endif blis-0.6.1/frame/include/level0/bli_add3s.h000066400000000000000000000311431360743507500203640ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_ADD3S_H #define BLIS_ADD3S_H // add3s // Notes: // - The first char encodes the type of a. // - The second char encodes the type of b. // - The third char encodes the type of c. // -- (axy) = (??s) ------------------------------------------------------------ #define bli_sssadd3s( a, b, c ) bli_sadd3ris( bli_sreal(a), bli_simag(a), bli_sreal(b), bli_simag(b), bli_sreal(c), bli_simag(c) ) #define bli_dssadd3s( a, b, c ) bli_sadd3ris( bli_dreal(a), bli_dimag(a), bli_sreal(b), bli_simag(b), bli_sreal(c), bli_simag(c) ) #define bli_cssadd3s( a, b, c ) bli_sadd3ris( bli_creal(a), bli_cimag(a), bli_sreal(b), bli_simag(b), bli_sreal(c), bli_simag(c) ) #define bli_zssadd3s( a, b, c ) bli_sadd3ris( bli_zreal(a), bli_zimag(a), bli_sreal(b), bli_simag(b), bli_sreal(c), bli_simag(c) ) #define bli_sdsadd3s( a, b, c ) bli_sadd3ris( bli_sreal(a), bli_simag(a), bli_dreal(b), bli_dimag(b), bli_sreal(c), bli_simag(c) ) #define bli_ddsadd3s( a, b, c ) bli_sadd3ris( bli_dreal(a), bli_dimag(a), bli_dreal(b), bli_dimag(b), bli_sreal(c), bli_simag(c) ) #define bli_cdsadd3s( a, b, c ) bli_sadd3ris( bli_creal(a), bli_cimag(a), bli_dreal(b), bli_dimag(b), bli_sreal(c), bli_simag(c) ) #define bli_zdsadd3s( a, b, c ) bli_sadd3ris( bli_zreal(a), bli_zimag(a), bli_dreal(b), bli_dimag(b), bli_sreal(c), bli_simag(c) ) #define bli_scsadd3s( a, b, c ) bli_sadd3ris( bli_sreal(a), bli_simag(a), bli_creal(b), bli_cimag(b), bli_sreal(c), bli_simag(c) ) #define bli_dcsadd3s( a, b, c ) bli_sadd3ris( bli_dreal(a), bli_dimag(a), bli_creal(b), bli_cimag(b), bli_sreal(c), bli_simag(c) ) #define bli_ccsadd3s( a, b, c ) bli_sadd3ris( bli_creal(a), bli_cimag(a), bli_creal(b), bli_cimag(b), bli_sreal(c), bli_simag(c) ) #define bli_zcsadd3s( a, b, c ) bli_sadd3ris( bli_zreal(a), bli_zimag(a), bli_creal(b), bli_cimag(b), bli_sreal(c), bli_simag(c) ) #define bli_szsadd3s( a, b, c ) bli_sadd3ris( bli_sreal(a), bli_simag(a), bli_zreal(b), bli_zimag(b), bli_sreal(c), bli_simag(c) ) #define bli_dzsadd3s( a, b, c ) bli_sadd3ris( bli_dreal(a), bli_dimag(a), bli_zreal(b), bli_zimag(b), bli_sreal(c), bli_simag(c) ) #define bli_czsadd3s( a, b, c ) bli_sadd3ris( bli_creal(a), bli_cimag(a), bli_zreal(b), bli_zimag(b), bli_sreal(c), bli_simag(c) ) #define bli_zzsadd3s( a, b, c ) bli_sadd3ris( bli_zreal(a), bli_zimag(a), bli_zreal(b), bli_zimag(b), bli_sreal(c), bli_simag(c) ) // -- (axy) = (??d) ------------------------------------------------------------ #define bli_ssdadd3s( a, b, c ) bli_dadd3ris( bli_sreal(a), bli_simag(a), bli_sreal(b), bli_simag(b), bli_dreal(c), bli_dimag(c) ) #define bli_dsdadd3s( a, b, c ) bli_dadd3ris( bli_dreal(a), bli_dimag(a), bli_sreal(b), bli_simag(b), bli_dreal(c), bli_dimag(c) ) #define bli_csdadd3s( a, b, c ) bli_dadd3ris( bli_creal(a), bli_cimag(a), bli_sreal(b), bli_simag(b), bli_dreal(c), bli_dimag(c) ) #define bli_zsdadd3s( a, b, c ) bli_dadd3ris( bli_zreal(a), bli_zimag(a), bli_sreal(b), bli_simag(b), bli_dreal(c), bli_dimag(c) ) #define bli_sddadd3s( a, b, c ) bli_dadd3ris( bli_sreal(a), bli_simag(a), bli_dreal(b), bli_dimag(b), bli_dreal(c), bli_dimag(c) ) #define bli_dddadd3s( a, b, c ) bli_dadd3ris( bli_dreal(a), bli_dimag(a), bli_dreal(b), bli_dimag(b), bli_dreal(c), bli_dimag(c) ) #define bli_cddadd3s( a, b, c ) bli_dadd3ris( bli_creal(a), bli_cimag(a), bli_dreal(b), bli_dimag(b), bli_dreal(c), bli_dimag(c) ) #define bli_zddadd3s( a, b, c ) bli_dadd3ris( bli_zreal(a), bli_zimag(a), bli_dreal(b), bli_dimag(b), bli_dreal(c), bli_dimag(c) ) #define bli_scdadd3s( a, b, c ) bli_dadd3ris( bli_sreal(a), bli_simag(a), bli_creal(b), bli_cimag(b), bli_dreal(c), bli_dimag(c) ) #define bli_dcdadd3s( a, b, c ) bli_dadd3ris( bli_dreal(a), bli_dimag(a), bli_creal(b), bli_cimag(b), bli_dreal(c), bli_dimag(c) ) #define bli_ccdadd3s( a, b, c ) bli_dadd3ris( bli_creal(a), bli_cimag(a), bli_creal(b), bli_cimag(b), bli_dreal(c), bli_dimag(c) ) #define bli_zcdadd3s( a, b, c ) bli_dadd3ris( bli_zreal(a), bli_zimag(a), bli_creal(b), bli_cimag(b), bli_dreal(c), bli_dimag(c) ) #define bli_szdadd3s( a, b, c ) bli_dadd3ris( bli_sreal(a), bli_simag(a), bli_zreal(b), bli_zimag(b), bli_dreal(c), bli_dimag(c) ) #define bli_dzdadd3s( a, b, c ) bli_dadd3ris( bli_dreal(a), bli_dimag(a), bli_zreal(b), bli_zimag(b), bli_dreal(c), bli_dimag(c) ) #define bli_czdadd3s( a, b, c ) bli_dadd3ris( bli_creal(a), bli_cimag(a), bli_zreal(b), bli_zimag(b), bli_dreal(c), bli_dimag(c) ) #define bli_zzdadd3s( a, b, c ) bli_dadd3ris( bli_zreal(a), bli_zimag(a), bli_zreal(b), bli_zimag(b), bli_dreal(c), bli_dimag(c) ) #ifndef BLIS_ENABLE_C99_COMPLEX // -- (axy) = (??c) ------------------------------------------------------------ #define bli_sscadd3s( a, b, c ) bli_sadd3ris( bli_sreal(a), bli_simag(a), bli_sreal(b), bli_simag(b), bli_creal(c), bli_cimag(c) ) #define bli_dscadd3s( a, b, c ) bli_sadd3ris( bli_dreal(a), bli_dimag(a), bli_sreal(b), bli_simag(b), bli_creal(c), bli_cimag(c) ) #define bli_cscadd3s( a, b, c ) bli_cadd3ris( bli_creal(a), bli_cimag(a), bli_sreal(b), bli_simag(b), bli_creal(c), bli_cimag(c) ) #define bli_zscadd3s( a, b, c ) bli_cadd3ris( bli_zreal(a), bli_zimag(a), bli_sreal(b), bli_simag(b), bli_creal(c), bli_cimag(c) ) #define bli_sdcadd3s( a, b, c ) bli_sadd3ris( bli_sreal(a), bli_simag(a), bli_dreal(b), bli_dimag(b), bli_creal(c), bli_cimag(c) ) #define bli_ddcadd3s( a, b, c ) bli_sadd3ris( bli_dreal(a), bli_dimag(a), bli_dreal(b), bli_dimag(b), bli_creal(c), bli_cimag(c) ) #define bli_cdcadd3s( a, b, c ) bli_cadd3ris( bli_creal(a), bli_cimag(a), bli_dreal(b), bli_dimag(b), bli_creal(c), bli_cimag(c) ) #define bli_zdcadd3s( a, b, c ) bli_cadd3ris( bli_zreal(a), bli_zimag(a), bli_dreal(b), bli_dimag(b), bli_creal(c), bli_cimag(c) ) #define bli_sccadd3s( a, b, c ) bli_cadd3ris( bli_sreal(a), bli_simag(a), bli_creal(b), bli_cimag(b), bli_creal(c), bli_cimag(c) ) #define bli_dccadd3s( a, b, c ) bli_cadd3ris( bli_dreal(a), bli_dimag(a), bli_creal(b), bli_cimag(b), bli_creal(c), bli_cimag(c) ) #define bli_cccadd3s( a, b, c ) bli_cadd3ris( bli_creal(a), bli_cimag(a), bli_creal(b), bli_cimag(b), bli_creal(c), bli_cimag(c) ) #define bli_zccadd3s( a, b, c ) bli_cadd3ris( bli_zreal(a), bli_zimag(a), bli_creal(b), bli_cimag(b), bli_creal(c), bli_cimag(c) ) #define bli_szcadd3s( a, b, c ) bli_cadd3ris( bli_sreal(a), bli_simag(a), bli_zreal(b), bli_zimag(b), bli_creal(c), bli_cimag(c) ) #define bli_dzcadd3s( a, b, c ) bli_cadd3ris( bli_dreal(a), bli_dimag(a), bli_zreal(b), bli_zimag(b), bli_creal(c), bli_cimag(c) ) #define bli_czcadd3s( a, b, c ) bli_cadd3ris( bli_creal(a), bli_cimag(a), bli_zreal(b), bli_zimag(b), bli_creal(c), bli_cimag(c) ) #define bli_zzcadd3s( a, b, c ) bli_cadd3ris( bli_zreal(a), bli_zimag(a), bli_zreal(b), bli_zimag(b), bli_creal(c), bli_cimag(c) ) // -- (axy) = (??z) ------------------------------------------------------------ #define bli_sszadd3s( a, b, c ) bli_dadd3ris( bli_sreal(a), bli_simag(a), bli_sreal(b), bli_simag(b), bli_zreal(c), bli_zimag(c) ) #define bli_dszadd3s( a, b, c ) bli_dadd3ris( bli_dreal(a), bli_dimag(a), bli_sreal(b), bli_simag(b), bli_zreal(c), bli_zimag(c) ) #define bli_cszadd3s( a, b, c ) bli_zadd3ris( bli_creal(a), bli_cimag(a), bli_sreal(b), bli_simag(b), bli_zreal(c), bli_zimag(c) ) #define bli_zszadd3s( a, b, c ) bli_zadd3ris( bli_zreal(a), bli_zimag(a), bli_sreal(b), bli_simag(b), bli_zreal(c), bli_zimag(c) ) #define bli_sdzadd3s( a, b, c ) bli_dadd3ris( bli_sreal(a), bli_simag(a), bli_dreal(b), bli_dimag(b), bli_zreal(c), bli_zimag(c) ) #define bli_ddzadd3s( a, b, c ) bli_dadd3ris( bli_dreal(a), bli_dimag(a), bli_dreal(b), bli_dimag(b), bli_zreal(c), bli_zimag(c) ) #define bli_cdzadd3s( a, b, c ) bli_zadd3ris( bli_creal(a), bli_cimag(a), bli_dreal(b), bli_dimag(b), bli_zreal(c), bli_zimag(c) ) #define bli_zdzadd3s( a, b, c ) bli_zadd3ris( bli_zreal(a), bli_zimag(a), bli_dreal(b), bli_dimag(b), bli_zreal(c), bli_zimag(c) ) #define bli_sczadd3s( a, b, c ) bli_zadd3ris( bli_sreal(a), bli_simag(a), bli_creal(b), bli_cimag(b), bli_zreal(c), bli_zimag(c) ) #define bli_dczadd3s( a, b, c ) bli_zadd3ris( bli_dreal(a), bli_dimag(a), bli_creal(b), bli_cimag(b), bli_zreal(c), bli_zimag(c) ) #define bli_cczadd3s( a, b, c ) bli_zadd3ris( bli_creal(a), bli_cimag(a), bli_creal(b), bli_cimag(b), bli_zreal(c), bli_zimag(c) ) #define bli_zczadd3s( a, b, c ) bli_zadd3ris( bli_zreal(a), bli_zimag(a), bli_creal(b), bli_cimag(b), bli_zreal(c), bli_zimag(c) ) #define bli_szzadd3s( a, b, c ) bli_zadd3ris( bli_sreal(a), bli_simag(a), bli_zreal(b), bli_zimag(b), bli_zreal(c), bli_zimag(c) ) #define bli_dzzadd3s( a, b, c ) bli_zadd3ris( bli_dreal(a), bli_dimag(a), bli_zreal(b), bli_zimag(b), bli_zreal(c), bli_zimag(c) ) #define bli_czzadd3s( a, b, c ) bli_zadd3ris( bli_creal(a), bli_cimag(a), bli_zreal(b), bli_zimag(b), bli_zreal(c), bli_zimag(c) ) #define bli_zzzadd3s( a, b, c ) bli_zadd3ris( bli_zreal(a), bli_zimag(a), bli_zreal(b), bli_zimag(b), bli_zreal(c), bli_zimag(c) ) #else // ifdef BLIS_ENABLE_C99_COMPLEX // -- (axy) = (??c) ------------------------------------------------------------ #define bli_sscadd3s( a, b, c ) { (c) = (a) + (b); } #define bli_dscadd3s( a, b, c ) { (c) = (a) + (b); } #define bli_cscadd3s( a, b, c ) { (c) = (a) + (b); } #define bli_zscadd3s( a, b, c ) { (c) = (a) + (b); } #define bli_sdcadd3s( a, b, c ) { (c) = (a) + (b); } #define bli_ddcadd3s( a, b, c ) { (c) = (a) + (b); } #define bli_cdcadd3s( a, b, c ) { (c) = (a) + (b); } #define bli_zdcadd3s( a, b, c ) { (c) = (a) + (b); } #define bli_sccadd3s( a, b, c ) { (c) = (a) + (b); } #define bli_dccadd3s( a, b, c ) { (c) = (a) + (b); } #define bli_cccadd3s( a, b, c ) { (c) = (a) + (b); } #define bli_zccadd3s( a, b, c ) { (c) = (a) + (b); } #define bli_szcadd3s( a, b, c ) { (c) = (a) + (b); } #define bli_dzcadd3s( a, b, c ) { (c) = (a) + (b); } #define bli_czcadd3s( a, b, c ) { (c) = (a) + (b); } #define bli_zzcadd3s( a, b, c ) { (c) = (a) + (b); } // -- (axy) = (??z) ------------------------------------------------------------ #define bli_sszadd3s( a, b, c ) { (c) = (a) + (b); } #define bli_dszadd3s( a, b, c ) { (c) = (a) + (b); } #define bli_cszadd3s( a, b, c ) { (c) = (a) + (b); } #define bli_zszadd3s( a, b, c ) { (c) = (a) + (b); } #define bli_sdzadd3s( a, b, c ) { (c) = (a) + (b); } #define bli_ddzadd3s( a, b, c ) { (c) = (a) + (b); } #define bli_cdzadd3s( a, b, c ) { (c) = (a) + (b); } #define bli_zdzadd3s( a, b, c ) { (c) = (a) + (b); } #define bli_sczadd3s( a, b, c ) { (c) = (a) + (b); } #define bli_dczadd3s( a, b, c ) { (c) = (a) + (b); } #define bli_cczadd3s( a, b, c ) { (c) = (a) + (b); } #define bli_zczadd3s( a, b, c ) { (c) = (a) + (b); } #define bli_szzadd3s( a, b, c ) { (c) = (a) + (b); } #define bli_dzzadd3s( a, b, c ) { (c) = (a) + (b); } #define bli_czzadd3s( a, b, c ) { (c) = (a) + (b); } #define bli_zzzadd3s( a, b, c ) { (c) = (a) + (b); } #endif // BLIS_ENABLE_C99_COMPLEX #define bli_sadd3s( a, b, c ) bli_sssadd3s( a, b, c ) #define bli_dadd3s( a, b, c ) bli_dddadd3s( a, b, c ) #define bli_cadd3s( a, b, c ) bli_cccadd3s( a, b, c ) #define bli_zadd3s( a, b, c ) bli_zzzadd3s( a, b, c ) #endif blis-0.6.1/frame/include/level0/bli_addjs.h000066400000000000000000000100551360743507500204520ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_ADDJS_H #define BLIS_ADDJS_H // addjs // Notes: // - The first char encodes the type of a. // - The second char encodes the type of y. #define bli_ssaddjs( a, y ) bli_saddjris( bli_sreal(a), bli_simag(a), bli_sreal(y), bli_simag(y) ) #define bli_dsaddjs( a, y ) bli_saddjris( bli_dreal(a), bli_dimag(a), bli_sreal(y), bli_simag(y) ) #define bli_csaddjs( a, y ) bli_saddjris( bli_creal(a), bli_cimag(a), bli_sreal(y), bli_simag(y) ) #define bli_zsaddjs( a, y ) bli_saddjris( bli_zreal(a), bli_zimag(a), bli_sreal(y), bli_simag(y) ) #define bli_sdaddjs( a, y ) bli_daddjris( bli_sreal(a), bli_simag(a), bli_dreal(y), bli_dimag(y) ) #define bli_ddaddjs( a, y ) bli_daddjris( bli_dreal(a), bli_dimag(a), bli_dreal(y), bli_dimag(y) ) #define bli_cdaddjs( a, y ) bli_daddjris( bli_creal(a), bli_cimag(a), bli_dreal(y), bli_dimag(y) ) #define bli_zdaddjs( a, y ) bli_daddjris( bli_zreal(a), bli_zimag(a), bli_dreal(y), bli_dimag(y) ) #ifndef BLIS_ENABLE_C99_COMPLEX #define bli_scaddjs( a, y ) bli_caddjris( bli_sreal(a), bli_simag(a), bli_creal(y), bli_cimag(y) ) #define bli_dcaddjs( a, y ) bli_caddjris( bli_dreal(a), bli_dimag(a), bli_creal(y), bli_cimag(y) ) #define bli_ccaddjs( a, y ) bli_caddjris( bli_creal(a), bli_cimag(a), bli_creal(y), bli_cimag(y) ) #define bli_zcaddjs( a, y ) bli_caddjris( bli_zreal(a), bli_zimag(a), bli_creal(y), bli_cimag(y) ) #define bli_szaddjs( a, y ) bli_zaddjris( bli_sreal(a), bli_simag(a), bli_zreal(y), bli_zimag(y) ) #define bli_dzaddjs( a, y ) bli_zaddjris( bli_dreal(a), bli_dimag(a), bli_zreal(y), bli_zimag(y) ) #define bli_czaddjs( a, y ) bli_zaddjris( bli_creal(a), bli_cimag(a), bli_zreal(y), bli_zimag(y) ) #define bli_zzaddjs( a, y ) bli_zaddjris( bli_zreal(a), bli_zimag(a), bli_zreal(y), bli_zimag(y) ) #else // ifdef BLIS_ENABLE_C99_COMPLEX #define bli_scaddjs( a, y ) { (y) += (a); } #define bli_dcaddjs( a, y ) { (y) += (a); } #define bli_ccaddjs( a, y ) { (y) += conjf(a); } #define bli_zcaddjs( a, y ) { (y) += conj (a); } #define bli_szaddjs( a, y ) { (y) += (a); } #define bli_dzaddjs( a, y ) { (y) += (a); } #define bli_czaddjs( a, y ) { (y) += conjf(a); } #define bli_zzaddjs( a, y ) { (y) += conj (a); } #endif // BLIS_ENABLE_C99_COMPLEX #define bli_saddjs( a, y ) bli_ssaddjs( a, y ) #define bli_daddjs( a, y ) bli_ddaddjs( a, y ) #define bli_caddjs( a, y ) bli_ccaddjs( a, y ) #define bli_zaddjs( a, y ) bli_zzaddjs( a, y ) #endif blis-0.6.1/frame/include/level0/bli_adds.h000066400000000000000000000077221360743507500203070ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_ADDS_H #define BLIS_ADDS_H // adds // Notes: // - The first char encodes the type of a. // - The second char encodes the type of y. #define bli_ssadds( a, y ) bli_saddris( bli_sreal(a), bli_simag(a), bli_sreal(y), bli_simag(y) ) #define bli_dsadds( a, y ) bli_saddris( bli_dreal(a), bli_dimag(a), bli_sreal(y), bli_simag(y) ) #define bli_csadds( a, y ) bli_saddris( bli_creal(a), bli_cimag(a), bli_sreal(y), bli_simag(y) ) #define bli_zsadds( a, y ) bli_saddris( bli_zreal(a), bli_zimag(a), bli_sreal(y), bli_simag(y) ) #define bli_sdadds( a, y ) bli_daddris( bli_sreal(a), bli_simag(a), bli_dreal(y), bli_dimag(y) ) #define bli_ddadds( a, y ) bli_daddris( bli_dreal(a), bli_dimag(a), bli_dreal(y), bli_dimag(y) ) #define bli_cdadds( a, y ) bli_daddris( bli_creal(a), bli_cimag(a), bli_dreal(y), bli_dimag(y) ) #define bli_zdadds( a, y ) bli_daddris( bli_zreal(a), bli_zimag(a), bli_dreal(y), bli_dimag(y) ) #ifndef BLIS_ENABLE_C99_COMPLEX #define bli_scadds( a, y ) bli_caddris( bli_sreal(a), bli_simag(a), bli_creal(y), bli_cimag(y) ) #define bli_dcadds( a, y ) bli_caddris( bli_dreal(a), bli_dimag(a), bli_creal(y), bli_cimag(y) ) #define bli_ccadds( a, y ) bli_caddris( bli_creal(a), bli_cimag(a), bli_creal(y), bli_cimag(y) ) #define bli_zcadds( a, y ) bli_caddris( bli_zreal(a), bli_zimag(a), bli_creal(y), bli_cimag(y) ) #define bli_szadds( a, y ) bli_zaddris( bli_sreal(a), bli_simag(a), bli_zreal(y), bli_zimag(y) ) #define bli_dzadds( a, y ) bli_zaddris( bli_dreal(a), bli_dimag(a), bli_zreal(y), bli_zimag(y) ) #define bli_czadds( a, y ) bli_zaddris( bli_creal(a), bli_cimag(a), bli_zreal(y), bli_zimag(y) ) #define bli_zzadds( a, y ) bli_zaddris( bli_zreal(a), bli_zimag(a), bli_zreal(y), bli_zimag(y) ) #else // ifdef BLIS_ENABLE_C99_COMPLEX #define bli_scadds( a, y ) { (y) += (a); } #define bli_dcadds( a, y ) { (y) += (a); } #define bli_ccadds( a, y ) { (y) += (a); } #define bli_zcadds( a, y ) { (y) += (a); } #define bli_szadds( a, y ) { (y) += (a); } #define bli_dzadds( a, y ) { (y) += (a); } #define bli_czadds( a, y ) { (y) += (a); } #define bli_zzadds( a, y ) { (y) += (a); } #endif // BLIS_ENABLE_C99_COMPLEX #define bli_sadds( a, y ) bli_ssadds( a, y ) #define bli_dadds( a, y ) bli_ddadds( a, y ) #define bli_cadds( a, y ) bli_ccadds( a, y ) #define bli_zadds( a, y ) bli_zzadds( a, y ) #endif blis-0.6.1/frame/include/level0/bli_adds_mxn.h000066400000000000000000000372411360743507500211700ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_ADDS_MXN_H #define BLIS_ADDS_MXN_H // adds_mxn // Notes: // - The first char encodes the type of x. // - The second char encodes the type of y. // xy = ?s static void bli_ssadds_mxn( const dim_t m, const dim_t n, float* restrict x, const inc_t rs_x, const inc_t cs_x, float* restrict y, const inc_t rs_y, const inc_t cs_y ) { #ifdef BLIS_ENABLE_CR_CASES if ( rs_x == 1 && rs_y == 1 ) { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_ssadds( *(x + ii + jj*cs_x), *(y + ii + jj*cs_y) ); } else if ( cs_x == 1 && cs_y == 1 ) { for ( dim_t ii = 0; ii < m; ++ii ) for ( dim_t jj = 0; jj < n; ++jj ) bli_ssadds( *(x + ii*rs_x + jj), *(y + ii*rs_y + jj) ); } else #endif { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_ssadds( *(x + ii*rs_x + jj*cs_x), *(y + ii*rs_y + jj*cs_y) ); } } static void bli_dsadds_mxn( const dim_t m, const dim_t n, double* restrict x, const inc_t rs_x, const inc_t cs_x, float* restrict y, const inc_t rs_y, const inc_t cs_y ) { #ifdef BLIS_ENABLE_CR_CASES if ( rs_x == 1 && rs_y == 1 ) { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_dsadds( *(x + ii + jj*cs_x), *(y + ii + jj*cs_y) ); } else if ( cs_x == 1 && cs_y == 1 ) { for ( dim_t ii = 0; ii < m; ++ii ) for ( dim_t jj = 0; jj < n; ++jj ) bli_dsadds( *(x + ii*rs_x + jj), *(y + ii*rs_y + jj) ); } else #endif { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_dsadds( *(x + ii*rs_x + jj*cs_x), *(y + ii*rs_y + jj*cs_y) ); } } static void bli_csadds_mxn( const dim_t m, const dim_t n, scomplex* restrict x, const inc_t rs_x, const inc_t cs_x, float* restrict y, const inc_t rs_y, const inc_t cs_y ) { #ifdef BLIS_ENABLE_CR_CASES if ( rs_x == 1 && rs_y == 1 ) { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_csadds( *(x + ii + jj*cs_x), *(y + ii + jj*cs_y) ); } else if ( cs_x == 1 && cs_y == 1 ) { for ( dim_t ii = 0; ii < m; ++ii ) for ( dim_t jj = 0; jj < n; ++jj ) bli_csadds( *(x + ii*rs_x + jj), *(y + ii*rs_y + jj) ); } else #endif { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_csadds( *(x + ii*rs_x + jj*cs_x), *(y + ii*rs_y + jj*cs_y) ); } } static void bli_zsadds_mxn( const dim_t m, const dim_t n, dcomplex* restrict x, const inc_t rs_x, const inc_t cs_x, float* restrict y, const inc_t rs_y, const inc_t cs_y ) { #ifdef BLIS_ENABLE_CR_CASES if ( rs_x == 1 && rs_y == 1 ) { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_zsadds( *(x + ii + jj*cs_x), *(y + ii + jj*cs_y) ); } else if ( cs_x == 1 && cs_y == 1 ) { for ( dim_t ii = 0; ii < m; ++ii ) for ( dim_t jj = 0; jj < n; ++jj ) bli_zsadds( *(x + ii*rs_x + jj), *(y + ii*rs_y + jj) ); } else #endif { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_zsadds( *(x + ii*rs_x + jj*cs_x), *(y + ii*rs_y + jj*cs_y) ); } } // xy = ?d static void bli_sdadds_mxn( const dim_t m, const dim_t n, float* restrict x, const inc_t rs_x, const inc_t cs_x, double* restrict y, const inc_t rs_y, const inc_t cs_y ) { #ifdef BLIS_ENABLE_CR_CASES if ( rs_x == 1 && rs_y == 1 ) { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_sdadds( *(x + ii + jj*cs_x), *(y + ii + jj*cs_y) ); } else if ( cs_x == 1 && cs_y == 1 ) { for ( dim_t ii = 0; ii < m; ++ii ) for ( dim_t jj = 0; jj < n; ++jj ) bli_sdadds( *(x + ii*rs_x + jj), *(y + ii*rs_y + jj) ); } else #endif { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_sdadds( *(x + ii*rs_x + jj*cs_x), *(y + ii*rs_y + jj*cs_y) ); } } static void bli_ddadds_mxn( const dim_t m, const dim_t n, double* restrict x, const inc_t rs_x, const inc_t cs_x, double* restrict y, const inc_t rs_y, const inc_t cs_y ) { #ifdef BLIS_ENABLE_CR_CASES if ( rs_x == 1 && rs_y == 1 ) { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_ddadds( *(x + ii + jj*cs_x), *(y + ii + jj*cs_y) ); } else if ( cs_x == 1 && cs_y == 1 ) { for ( dim_t ii = 0; ii < m; ++ii ) for ( dim_t jj = 0; jj < n; ++jj ) bli_ddadds( *(x + ii*rs_x + jj), *(y + ii*rs_y + jj) ); } else #endif { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_ddadds( *(x + ii*rs_x + jj*cs_x), *(y + ii*rs_y + jj*cs_y) ); } } static void bli_cdadds_mxn( const dim_t m, const dim_t n, scomplex* restrict x, const inc_t rs_x, const inc_t cs_x, double* restrict y, const inc_t rs_y, const inc_t cs_y ) { #ifdef BLIS_ENABLE_CR_CASES if ( rs_x == 1 && rs_y == 1 ) { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_cdadds( *(x + ii + jj*cs_x), *(y + ii + jj*cs_y) ); } else if ( cs_x == 1 && cs_y == 1 ) { for ( dim_t ii = 0; ii < m; ++ii ) for ( dim_t jj = 0; jj < n; ++jj ) bli_cdadds( *(x + ii*rs_x + jj), *(y + ii*rs_y + jj) ); } else #endif { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_cdadds( *(x + ii*rs_x + jj*cs_x), *(y + ii*rs_y + jj*cs_y) ); } } static void bli_zdadds_mxn( const dim_t m, const dim_t n, dcomplex* restrict x, const inc_t rs_x, const inc_t cs_x, double* restrict y, const inc_t rs_y, const inc_t cs_y ) { #ifdef BLIS_ENABLE_CR_CASES if ( rs_x == 1 && rs_y == 1 ) { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_zdadds( *(x + ii + jj*cs_x), *(y + ii + jj*cs_y) ); } else if ( cs_x == 1 && cs_y == 1 ) { for ( dim_t ii = 0; ii < m; ++ii ) for ( dim_t jj = 0; jj < n; ++jj ) bli_zdadds( *(x + ii*rs_x + jj), *(y + ii*rs_y + jj) ); } else #endif { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_zdadds( *(x + ii*rs_x + jj*cs_x), *(y + ii*rs_y + jj*cs_y) ); } } // xy = ?c static void bli_scadds_mxn( const dim_t m, const dim_t n, float* restrict x, const inc_t rs_x, const inc_t cs_x, scomplex* restrict y, const inc_t rs_y, const inc_t cs_y ) { #ifdef BLIS_ENABLE_CR_CASES if ( rs_x == 1 && rs_y == 1 ) { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_scadds( *(x + ii + jj*cs_x), *(y + ii + jj*cs_y) ); } else if ( cs_x == 1 && cs_y == 1 ) { for ( dim_t ii = 0; ii < m; ++ii ) for ( dim_t jj = 0; jj < n; ++jj ) bli_scadds( *(x + ii*rs_x + jj), *(y + ii*rs_y + jj) ); } else #endif { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_scadds( *(x + ii*rs_x + jj*cs_x), *(y + ii*rs_y + jj*cs_y) ); } } static void bli_dcadds_mxn( const dim_t m, const dim_t n, double* restrict x, const inc_t rs_x, const inc_t cs_x, scomplex* restrict y, const inc_t rs_y, const inc_t cs_y ) { #ifdef BLIS_ENABLE_CR_CASES if ( rs_x == 1 && rs_y == 1 ) { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_dcadds( *(x + ii + jj*cs_x), *(y + ii + jj*cs_y) ); } else if ( cs_x == 1 && cs_y == 1 ) { for ( dim_t ii = 0; ii < m; ++ii ) for ( dim_t jj = 0; jj < n; ++jj ) bli_dcadds( *(x + ii*rs_x + jj), *(y + ii*rs_y + jj) ); } else #endif { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_dcadds( *(x + ii*rs_x + jj*cs_x), *(y + ii*rs_y + jj*cs_y) ); } } static void bli_ccadds_mxn( const dim_t m, const dim_t n, scomplex* restrict x, const inc_t rs_x, const inc_t cs_x, scomplex* restrict y, const inc_t rs_y, const inc_t cs_y ) { #ifdef BLIS_ENABLE_CR_CASES if ( rs_x == 1 && rs_y == 1 ) { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_ccadds( *(x + ii + jj*cs_x), *(y + ii + jj*cs_y) ); } else if ( cs_x == 1 && cs_y == 1 ) { for ( dim_t ii = 0; ii < m; ++ii ) for ( dim_t jj = 0; jj < n; ++jj ) bli_ccadds( *(x + ii*rs_x + jj), *(y + ii*rs_y + jj) ); } else #endif { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_ccadds( *(x + ii*rs_x + jj*cs_x), *(y + ii*rs_y + jj*cs_y) ); } } static void bli_zcadds_mxn( const dim_t m, const dim_t n, dcomplex* restrict x, const inc_t rs_x, const inc_t cs_x, scomplex* restrict y, const inc_t rs_y, const inc_t cs_y ) { #ifdef BLIS_ENABLE_CR_CASES if ( rs_x == 1 && rs_y == 1 ) { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_zcadds( *(x + ii + jj*cs_x), *(y + ii + jj*cs_y) ); } else if ( cs_x == 1 && cs_y == 1 ) { for ( dim_t ii = 0; ii < m; ++ii ) for ( dim_t jj = 0; jj < n; ++jj ) bli_zcadds( *(x + ii*rs_x + jj), *(y + ii*rs_y + jj) ); } else #endif { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_zcadds( *(x + ii*rs_x + jj*cs_x), *(y + ii*rs_y + jj*cs_y) ); } } // xy = ?z static void bli_szadds_mxn( const dim_t m, const dim_t n, float* restrict x, const inc_t rs_x, const inc_t cs_x, dcomplex* restrict y, const inc_t rs_y, const inc_t cs_y ) { #ifdef BLIS_ENABLE_CR_CASES if ( rs_x == 1 && rs_y == 1 ) { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_szadds( *(x + ii + jj*cs_x), *(y + ii + jj*cs_y) ); } else if ( cs_x == 1 && cs_y == 1 ) { for ( dim_t ii = 0; ii < m; ++ii ) for ( dim_t jj = 0; jj < n; ++jj ) bli_szadds( *(x + ii*rs_x + jj), *(y + ii*rs_y + jj) ); } else #endif { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_szadds( *(x + ii*rs_x + jj*cs_x), *(y + ii*rs_y + jj*cs_y) ); } } static void bli_dzadds_mxn( const dim_t m, const dim_t n, double* restrict x, const inc_t rs_x, const inc_t cs_x, dcomplex* restrict y, const inc_t rs_y, const inc_t cs_y ) { #ifdef BLIS_ENABLE_CR_CASES if ( rs_x == 1 && rs_y == 1 ) { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_dzadds( *(x + ii + jj*cs_x), *(y + ii + jj*cs_y) ); } else if ( cs_x == 1 && cs_y == 1 ) { for ( dim_t ii = 0; ii < m; ++ii ) for ( dim_t jj = 0; jj < n; ++jj ) bli_dzadds( *(x + ii*rs_x + jj), *(y + ii*rs_y + jj) ); } else #endif { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_dzadds( *(x + ii*rs_x + jj*cs_x), *(y + ii*rs_y + jj*cs_y) ); } } static void bli_czadds_mxn( const dim_t m, const dim_t n, scomplex* restrict x, const inc_t rs_x, const inc_t cs_x, dcomplex* restrict y, const inc_t rs_y, const inc_t cs_y ) { #ifdef BLIS_ENABLE_CR_CASES if ( rs_x == 1 && rs_y == 1 ) { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_czadds( *(x + ii + jj*cs_x), *(y + ii + jj*cs_y) ); } else if ( cs_x == 1 && cs_y == 1 ) { for ( dim_t ii = 0; ii < m; ++ii ) for ( dim_t jj = 0; jj < n; ++jj ) bli_czadds( *(x + ii*rs_x + jj), *(y + ii*rs_y + jj) ); } else #endif { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_czadds( *(x + ii*rs_x + jj*cs_x), *(y + ii*rs_y + jj*cs_y) ); } } static void bli_zzadds_mxn( const dim_t m, const dim_t n, dcomplex* restrict x, const inc_t rs_x, const inc_t cs_x, dcomplex* restrict y, const inc_t rs_y, const inc_t cs_y ) { #ifdef BLIS_ENABLE_CR_CASES if ( rs_x == 1 && rs_y == 1 ) { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_zzadds( *(x + ii + jj*cs_x), *(y + ii + jj*cs_y) ); } else if ( cs_x == 1 && cs_y == 1 ) { for ( dim_t ii = 0; ii < m; ++ii ) for ( dim_t jj = 0; jj < n; ++jj ) bli_zzadds( *(x + ii*rs_x + jj), *(y + ii*rs_y + jj) ); } else #endif { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_zzadds( *(x + ii*rs_x + jj*cs_x), *(y + ii*rs_y + jj*cs_y) ); } } static void bli_sadds_mxn( const dim_t m, const dim_t n, float* restrict x, const inc_t rs_x, const inc_t cs_x, float* restrict y, const inc_t rs_y, const inc_t cs_y ) { bli_ssadds_mxn( m, n, x, rs_x, cs_x, y, rs_y, cs_y ); } static void bli_dadds_mxn( const dim_t m, const dim_t n, double* restrict x, const inc_t rs_x, const inc_t cs_x, double* restrict y, const inc_t rs_y, const inc_t cs_y ) { bli_ddadds_mxn( m, n, x, rs_x, cs_x, y, rs_y, cs_y ); } static void bli_cadds_mxn( const dim_t m, const dim_t n, scomplex* restrict x, const inc_t rs_x, const inc_t cs_x, scomplex* restrict y, const inc_t rs_y, const inc_t cs_y ) { bli_ccadds_mxn( m, n, x, rs_x, cs_x, y, rs_y, cs_y ); } static void bli_zadds_mxn( const dim_t m, const dim_t n, dcomplex* restrict x, const inc_t rs_x, const inc_t cs_x, dcomplex* restrict y, const inc_t rs_y, const inc_t cs_y ) { bli_zzadds_mxn( m, n, x, rs_x, cs_x, y, rs_y, cs_y ); } #endif blis-0.6.1/frame/include/level0/bli_adds_mxn_uplo.h000066400000000000000000000130231360743507500222170ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_ADDS_MXN_UPLO_H #define BLIS_ADDS_MXN_UPLO_H // adds_mxn_u #define bli_ssadds_mxn_u( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ) \ { \ dim_t _i, _j; \ \ for ( _j = 0; _j < n; ++_j ) \ { \ for ( _i = 0; _i < m; ++_i ) \ { \ if ( (doff_t)_j - (doff_t)_i >= diagoff ) \ { \ bli_ssadds( *(x + _i*rs_x + _j*cs_x), \ *(y + _i*rs_y + _j*cs_y) ); \ } \ } \ } \ } #define bli_ddadds_mxn_u( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ) \ { \ dim_t _i, _j; \ \ for ( _j = 0; _j < n; ++_j ) \ { \ for ( _i = 0; _i < m; ++_i ) \ { \ if ( (doff_t)_j - (doff_t)_i >= diagoff ) \ { \ bli_ddadds( *(x + _i*rs_x + _j*cs_x), \ *(y + _i*rs_y + _j*cs_y) ); \ } \ } \ } \ } #define bli_ccadds_mxn_u( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ) \ { \ dim_t _i, _j; \ \ for ( _j = 0; _j < n; ++_j ) \ { \ for ( _i = 0; _i < m; ++_i ) \ { \ if ( (doff_t)_j - (doff_t)_i >= diagoff ) \ { \ bli_ccadds( *(x + _i*rs_x + _j*cs_x), \ *(y + _i*rs_y + _j*cs_y) ); \ } \ } \ } \ } #define bli_zzadds_mxn_u( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ) \ { \ dim_t _i, _j; \ \ for ( _j = 0; _j < n; ++_j ) \ { \ for ( _i = 0; _i < m; ++_i ) \ { \ if ( (doff_t)_j - (doff_t)_i >= diagoff ) \ { \ bli_zzadds( *(x + _i*rs_x + _j*cs_x), \ *(y + _i*rs_y + _j*cs_y) ); \ } \ } \ } \ } // adds_mxn_l #define bli_ssadds_mxn_l( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ) \ { \ dim_t _i, _j; \ \ for ( _j = 0; _j < n; ++_j ) \ { \ for ( _i = 0; _i < m; ++_i ) \ { \ if ( (doff_t)_j - (doff_t)_i <= diagoff ) \ { \ bli_ssadds( *(x + _i*rs_x + _j*cs_x), \ *(y + _i*rs_y + _j*cs_y) ); \ } \ } \ } \ } #define bli_ddadds_mxn_l( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ) \ { \ dim_t _i, _j; \ \ for ( _j = 0; _j < n; ++_j ) \ { \ for ( _i = 0; _i < m; ++_i ) \ { \ if ( (doff_t)_j - (doff_t)_i <= diagoff ) \ { \ bli_ddadds( *(x + _i*rs_x + _j*cs_x), \ *(y + _i*rs_y + _j*cs_y) ); \ } \ } \ } \ } #define bli_ccadds_mxn_l( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ) \ { \ dim_t _i, _j; \ \ for ( _j = 0; _j < n; ++_j ) \ { \ for ( _i = 0; _i < m; ++_i ) \ { \ if ( (doff_t)_j - (doff_t)_i <= diagoff ) \ { \ bli_ccadds( *(x + _i*rs_x + _j*cs_x), \ *(y + _i*rs_y + _j*cs_y) ); \ } \ } \ } \ } #define bli_zzadds_mxn_l( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ) \ { \ dim_t _i, _j; \ \ for ( _j = 0; _j < n; ++_j ) \ { \ for ( _i = 0; _i < m; ++_i ) \ { \ if ( (doff_t)_j - (doff_t)_i <= diagoff ) \ { \ bli_zzadds( *(x + _i*rs_x + _j*cs_x), \ *(y + _i*rs_y + _j*cs_y) ); \ } \ } \ } \ } #define bli_sadds_mxn_u( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ) \ { \ bli_ssadds_mxn_u( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ); \ } #define bli_dadds_mxn_u( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ) \ { \ bli_ddadds_mxn_u( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ); \ } #define bli_cadds_mxn_u( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ) \ { \ bli_ccadds_mxn_u( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ); \ } #define bli_zadds_mxn_u( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ) \ { \ bli_zzadds_mxn_u( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ); \ } #define bli_sadds_mxn_l( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ) \ { \ bli_ssadds_mxn_l( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ); \ } #define bli_dadds_mxn_l( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ) \ { \ bli_ddadds_mxn_l( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ); \ } #define bli_cadds_mxn_l( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ) \ { \ bli_ccadds_mxn_l( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ); \ } #define bli_zadds_mxn_l( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ) \ { \ bli_zzadds_mxn_l( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ); \ } #endif blis-0.6.1/frame/include/level0/bli_axmys.h000066400000000000000000000312231360743507500205260ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_AXMYS_H #define BLIS_AXMYS_H // axmys // Notes: // - The first char encodes the type of a. // - The second char encodes the type of x. // - The third char encodes the type of y. // -- (axy) = (??s) ------------------------------------------------------------ #define bli_sssaxmys( a, x, y ) bli_saxmyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) #define bli_dssaxmys( a, x, y ) bli_saxmyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) #define bli_cssaxmys( a, x, y ) bli_saxmyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) #define bli_zssaxmys( a, x, y ) bli_saxmyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) #define bli_sdsaxmys( a, x, y ) bli_saxmyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) #define bli_ddsaxmys( a, x, y ) bli_saxmyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) #define bli_cdsaxmys( a, x, y ) bli_saxmyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) #define bli_zdsaxmys( a, x, y ) bli_saxmyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) #define bli_scsaxmys( a, x, y ) bli_saxmyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) #define bli_dcsaxmys( a, x, y ) bli_saxmyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) #define bli_ccsaxmys( a, x, y ) bli_saxmyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) #define bli_zcsaxmys( a, x, y ) bli_saxmyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) #define bli_szsaxmys( a, x, y ) bli_saxmyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) #define bli_dzsaxmys( a, x, y ) bli_saxmyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) #define bli_czsaxmys( a, x, y ) bli_saxmyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) #define bli_zzsaxmys( a, x, y ) bli_saxmyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) // -- (axy) = (??d) ------------------------------------------------------------ #define bli_ssdaxmys( a, x, y ) bli_daxmyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) #define bli_dsdaxmys( a, x, y ) bli_daxmyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) #define bli_csdaxmys( a, x, y ) bli_daxmyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) #define bli_zsdaxmys( a, x, y ) bli_daxmyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) #define bli_sddaxmys( a, x, y ) bli_daxmyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_dddaxmys( a, x, y ) bli_daxmyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_cddaxmys( a, x, y ) bli_daxmyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_zddaxmys( a, x, y ) bli_daxmyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_scdaxmys( a, x, y ) bli_daxmyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_dcdaxmys( a, x, y ) bli_daxmyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_ccdaxmys( a, x, y ) bli_daxmyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_zcdaxmys( a, x, y ) bli_daxmyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_szdaxmys( a, x, y ) bli_daxmyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_dzdaxmys( a, x, y ) bli_daxmyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_czdaxmys( a, x, y ) bli_daxmyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_zzdaxmys( a, x, y ) bli_daxmyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) #ifndef BLIS_ENABLE_C99_COMPLEX // -- (axy) = (??c) ------------------------------------------------------------ #define bli_sscaxmys( a, x, y ) bli_saxmyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) #define bli_dscaxmys( a, x, y ) bli_saxmyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) #define bli_cscaxmys( a, x, y ) bli_caxmyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) #define bli_zscaxmys( a, x, y ) bli_caxmyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) #define bli_sdcaxmys( a, x, y ) bli_saxmyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) #define bli_ddcaxmys( a, x, y ) bli_saxmyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) #define bli_cdcaxmys( a, x, y ) bli_caxmyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) #define bli_zdcaxmys( a, x, y ) bli_caxmyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) #define bli_sccaxmys( a, x, y ) bli_scaxmyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) #define bli_dccaxmys( a, x, y ) bli_scaxmyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) #define bli_cccaxmys( a, x, y ) bli_caxmyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) #define bli_zccaxmys( a, x, y ) bli_caxmyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) #define bli_szcaxmys( a, x, y ) bli_scaxmyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) #define bli_dzcaxmys( a, x, y ) bli_scaxmyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) #define bli_czcaxmys( a, x, y ) bli_caxmyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) #define bli_zzcaxmys( a, x, y ) bli_caxmyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) // -- (axy) = (??z) ------------------------------------------------------------ #define bli_sszaxmys( a, x, y ) bli_daxmyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) #define bli_dszaxmys( a, x, y ) bli_daxmyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) #define bli_cszaxmys( a, x, y ) bli_zaxmyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) #define bli_zszaxmys( a, x, y ) bli_zaxmyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) #define bli_sdzaxmys( a, x, y ) bli_daxmyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_ddzaxmys( a, x, y ) bli_daxmyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_cdzaxmys( a, x, y ) bli_zaxmyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_zdzaxmys( a, x, y ) bli_zaxmyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_sczaxmys( a, x, y ) bli_dzaxmyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_dczaxmys( a, x, y ) bli_dzaxmyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_cczaxmys( a, x, y ) bli_zaxmyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_zczaxmys( a, x, y ) bli_zaxmyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_szzaxmys( a, x, y ) bli_dzaxmyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_dzzaxmys( a, x, y ) bli_dzaxmyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_czzaxmys( a, x, y ) bli_zaxmyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_zzzaxmys( a, x, y ) bli_zaxmyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) #else // ifdef BLIS_ENABLE_C99_COMPLEX // -- (axy) = (??c) ------------------------------------------------------------ #define bli_sscaxmys( a, x, y ) { (y) -= (a) * (x); } #define bli_dscaxmys( a, x, y ) { (y) -= (a) * (x); } #define bli_cscaxmys( a, x, y ) { (y) -= (a) * (x); } #define bli_zscaxmys( a, x, y ) { (y) -= (a) * (x); } #define bli_sdcaxmys( a, x, y ) { (y) -= (a) * (x); } #define bli_ddcaxmys( a, x, y ) { (y) -= (a) * (x); } #define bli_cdcaxmys( a, x, y ) { (y) -= (a) * (x); } #define bli_zdcaxmys( a, x, y ) { (y) -= (a) * (x); } #define bli_sccaxmys( a, x, y ) { (y) -= (a) * (x); } #define bli_dccaxmys( a, x, y ) { (y) -= (a) * (x); } #define bli_cccaxmys( a, x, y ) { (y) -= (a) * (x); } #define bli_zccaxmys( a, x, y ) { (y) -= (a) * (x); } #define bli_szcaxmys( a, x, y ) { (y) -= (a) * (x); } #define bli_dzcaxmys( a, x, y ) { (y) -= (a) * (x); } #define bli_czcaxmys( a, x, y ) { (y) -= (a) * (x); } #define bli_zzcaxmys( a, x, y ) { (y) -= (a) * (x); } // -- (axy) = (??z) ------------------------------------------------------------ #define bli_sszaxmys( a, x, y ) { (y) -= (a) * (x); } #define bli_dszaxmys( a, x, y ) { (y) -= (a) * (x); } #define bli_cszaxmys( a, x, y ) { (y) -= (a) * (x); } #define bli_zszaxmys( a, x, y ) { (y) -= (a) * (x); } #define bli_sdzaxmys( a, x, y ) { (y) -= (a) * (x); } #define bli_ddzaxmys( a, x, y ) { (y) -= (a) * (x); } #define bli_cdzaxmys( a, x, y ) { (y) -= (a) * (x); } #define bli_zdzaxmys( a, x, y ) { (y) -= (a) * (x); } #define bli_sczaxmys( a, x, y ) { (y) -= (a) * (x); } #define bli_dczaxmys( a, x, y ) { (y) -= (a) * (x); } #define bli_cczaxmys( a, x, y ) { (y) -= (a) * (x); } #define bli_zczaxmys( a, x, y ) { (y) -= (a) * (x); } #define bli_szzaxmys( a, x, y ) { (y) -= (a) * (x); } #define bli_dzzaxmys( a, x, y ) { (y) -= (a) * (x); } #define bli_czzaxmys( a, x, y ) { (y) -= (a) * (x); } #define bli_zzzaxmys( a, x, y ) { (y) -= (a) * (x); } #endif // BLIS_ENABLE_C99_COMPLEX #define bli_saxmys( a, x, y ) bli_sssaxmys( a, x, y ) #define bli_daxmys( a, x, y ) bli_dddaxmys( a, x, y ) #define bli_caxmys( a, x, y ) bli_cccaxmys( a, x, y ) #define bli_zaxmys( a, x, y ) bli_zzzaxmys( a, x, y ) #endif blis-0.6.1/frame/include/level0/bli_axpbyjs.h000066400000000000000000001552001360743507500210470ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_AXPBYJS_H #define BLIS_AXPBYJS_H // axpbyjs // Notes: // - The first char encodes the type of a. // - The second char encodes the type of x. // - The third char encodes the type of b. // - The fourth char encodes the type of y. // -- (axby) = (???s) ---------------------------------------------------------- #define bli_ssssaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) #define bli_dsssaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) #define bli_csssaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) #define bli_zsssaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) #define bli_sdssaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) #define bli_ddssaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) #define bli_cdssaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) #define bli_zdssaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) #define bli_scssaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) #define bli_dcssaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) #define bli_ccssaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) #define bli_zcssaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) #define bli_szssaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) #define bli_dzssaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) #define bli_czssaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) #define bli_zzssaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) #define bli_ssdsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) #define bli_dsdsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) #define bli_csdsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) #define bli_zsdsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) #define bli_sddsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) #define bli_dddsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) #define bli_cddsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) #define bli_zddsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) #define bli_scdsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) #define bli_dcdsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) #define bli_ccdsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) #define bli_zcdsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) #define bli_szdsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) #define bli_dzdsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) #define bli_czdsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) #define bli_zzdsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) #define bli_sscsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) #define bli_dscsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) #define bli_cscsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) #define bli_zscsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) #define bli_sdcsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) #define bli_ddcsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) #define bli_cdcsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) #define bli_zdcsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) #define bli_sccsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) #define bli_dccsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) #define bli_cccsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) #define bli_zccsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) #define bli_szcsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) #define bli_dzcsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) #define bli_czcsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) #define bli_zzcsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) #define bli_sszsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) #define bli_dszsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) #define bli_cszsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) #define bli_zszsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) #define bli_sdzsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) #define bli_ddzsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) #define bli_cdzsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) #define bli_zdzsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) #define bli_sczsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) #define bli_dczsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) #define bli_cczsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) #define bli_zczsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) #define bli_szzsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) #define bli_dzzsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) #define bli_czzsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) #define bli_zzzsaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) // -- (axby) = (???d) ---------------------------------------------------------- #define bli_sssdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) #define bli_dssdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) #define bli_cssdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) #define bli_zssdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) #define bli_sdsdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) #define bli_ddsdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) #define bli_cdsdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) #define bli_zdsdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) #define bli_scsdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) #define bli_dcsdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) #define bli_ccsdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) #define bli_zcsdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) #define bli_szsdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) #define bli_dzsdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) #define bli_czsdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) #define bli_zzsdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) #define bli_ssddaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_dsddaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_csddaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_zsddaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_sdddaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_ddddaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_cdddaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_zdddaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_scddaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_dcddaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_ccddaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_zcddaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_szddaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_dzddaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_czddaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_zzddaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_sscdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_dscdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_cscdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_zscdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_sdcdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_ddcdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_cdcdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_zdcdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_sccdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_dccdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_cccdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_zccdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_szcdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_dzcdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_czcdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_zzcdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_sszdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_dszdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_cszdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_zszdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_sdzdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_ddzdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_cdzdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_zdzdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_sczdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_dczdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_cczdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_zczdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_szzdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_dzzdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_czzdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_zzzdaxpbyjs( a, x, b, y ) bli_rxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) #ifndef BLIS_ENABLE_C99_COMPLEX // -- (axby) = (???c) ---------------------------------------------------------- #define bli_ssscaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) #define bli_dsscaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) #define bli_csscaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) #define bli_zsscaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) #define bli_sdscaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) #define bli_ddscaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) #define bli_cdscaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) #define bli_zdscaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) #define bli_scscaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) #define bli_dcscaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) #define bli_ccscaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) #define bli_zcscaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) #define bli_szscaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) #define bli_dzscaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) #define bli_czscaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) #define bli_zzscaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) #define bli_ssdcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) #define bli_dsdcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) #define bli_csdcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) #define bli_zsdcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) #define bli_sddcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) #define bli_dddcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) #define bli_cddcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) #define bli_zddcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) #define bli_scdcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) #define bli_dcdcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) #define bli_ccdcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) #define bli_zcdcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) #define bli_szdcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) #define bli_dzdcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) #define bli_czdcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) #define bli_zzdcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) #define bli_ssccaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) #define bli_dsccaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) #define bli_csccaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) #define bli_zsccaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) #define bli_sdccaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) #define bli_ddccaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) #define bli_cdccaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) #define bli_zdccaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) #define bli_scccaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) #define bli_dcccaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) #define bli_ccccaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) #define bli_zcccaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) #define bli_szccaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) #define bli_dzccaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) #define bli_czccaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) #define bli_zzccaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) #define bli_sszcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) #define bli_dszcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) #define bli_cszcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) #define bli_zszcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) #define bli_sdzcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) #define bli_ddzcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) #define bli_cdzcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) #define bli_zdzcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) #define bli_sczcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) #define bli_dczcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) #define bli_cczcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) #define bli_zczcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) #define bli_szzcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) #define bli_dzzcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) #define bli_czzcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) #define bli_zzzcaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) // -- (axby) = (???z) ---------------------------------------------------------- #define bli_ssszaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) #define bli_dsszaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) #define bli_csszaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) #define bli_zsszaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) #define bli_sdszaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) #define bli_ddszaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) #define bli_cdszaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) #define bli_zdszaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) #define bli_scszaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) #define bli_dcszaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) #define bli_ccszaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) #define bli_zcszaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) #define bli_szszaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) #define bli_dzszaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) #define bli_czszaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) #define bli_zzszaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) #define bli_ssdzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_dsdzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_csdzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_zsdzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_sddzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_dddzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_cddzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_zddzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_scdzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_dcdzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_ccdzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_zcdzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_szdzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_dzdzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_czdzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_zzdzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_ssczaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_dsczaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_csczaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_zsczaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_sdczaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_ddczaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_cdczaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_zdczaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_scczaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_dcczaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_ccczaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_zcczaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_szczaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_dzczaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_czczaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_zzczaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_sszzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_dszzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_cszzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_zszzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_sdzzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_ddzzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_cdzzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_zdzzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_sczzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_dczzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_cczzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_zczzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_szzzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_dzzzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_czzzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_zzzzaxpbyjs( a, x, b, y ) bli_cxaxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) #else // ifdef BLIS_ENABLE_C99_COMPLEX // -- (axby) = (???c) ---------------------------------------------------------- #define bli_ssscaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_dsscaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_csscaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_zsscaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_sdscaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_ddscaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_cdscaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_zdscaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_scscaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } #define bli_dcscaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } #define bli_ccscaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } #define bli_zcscaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } #define bli_szscaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } #define bli_dzscaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } #define bli_czscaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } #define bli_zzscaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } #define bli_ssdcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_dsdcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_csdcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_zsdcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_sddcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_dddcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_cddcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_zddcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_scdcaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } #define bli_dcdcaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } #define bli_ccdcaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } #define bli_zcdcaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } #define bli_szdcaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } #define bli_dzdcaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } #define bli_czdcaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } #define bli_zzdcaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } #define bli_ssccaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_dsccaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_csccaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_zsccaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_sdccaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_ddccaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_cdccaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_zdccaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_scccaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } #define bli_dcccaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } #define bli_ccccaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } #define bli_zcccaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } #define bli_szccaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } #define bli_dzccaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } #define bli_czccaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } #define bli_zzccaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } #define bli_sszcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_dszcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_cszcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_zszcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_sdzcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_ddzcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_cdzcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_zdzcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_sczcaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } #define bli_dczcaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } #define bli_cczcaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } #define bli_zczcaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } #define bli_szzcaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } #define bli_dzzcaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } #define bli_czzcaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } #define bli_zzzcaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } // -- (axby) = (???z) ---------------------------------------------------------- #define bli_ssszaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_dsszaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_csszaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_zsszaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_sdszaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_ddszaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_cdszaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_zdszaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_scszaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } #define bli_dcszaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } #define bli_ccszaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } #define bli_zcszaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } #define bli_szszaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } #define bli_dzszaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } #define bli_czszaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } #define bli_zzszaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } #define bli_ssdzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_dsdzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_csdzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_zsdzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_sddzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_dddzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_cddzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_zddzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_scdzaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } #define bli_dcdzaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } #define bli_ccdzaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } #define bli_zcdzaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } #define bli_szdzaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } #define bli_dzdzaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } #define bli_czdzaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } #define bli_zzdzaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } #define bli_ssczaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_dsczaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_csczaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_zsczaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_sdczaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_ddczaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_cdczaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_zdczaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_scczaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } #define bli_dcczaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } #define bli_ccczaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } #define bli_zcczaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } #define bli_szczaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } #define bli_dzczaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } #define bli_czczaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } #define bli_zzczaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } #define bli_sszzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_dszzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_cszzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_zszzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_sdzzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_ddzzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_cdzzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_zdzzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_sczzaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } #define bli_dczzaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } #define bli_cczzaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } #define bli_zczzaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } #define bli_szzzaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } #define bli_dzzzaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } #define bli_czzzaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } #define bli_zzzzaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } #endif // BLIS_ENABLE_C99_COMPLEX #define bli_saxpbyjs( a, x, b, y ) bli_ssssaxpbyjs( a, x, b, y ) #define bli_daxpbyjs( a, x, b, y ) bli_ddddaxpbyjs( a, x, b, y ) #define bli_caxpbyjs( a, x, b, y ) bli_ccccaxpbyjs( a, x, b, y ) #define bli_zaxpbyjs( a, x, b, y ) bli_zzzzaxpbyjs( a, x, b, y ) #endif blis-0.6.1/frame/include/level0/bli_axpbys.h000066400000000000000000001525651360743507500207100ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_AXPBYS_H #define BLIS_AXPBYS_H // axpbys // Notes: // - The first char encodes the type of a. // - The second char encodes the type of x. // - The third char encodes the type of b. // - The fourth char encodes the type of y. // -- (axby) = (???s) ---------------------------------------------------------- #define bli_ssssaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) #define bli_dsssaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) #define bli_csssaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) #define bli_zsssaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) #define bli_sdssaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) #define bli_ddssaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) #define bli_cdssaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) #define bli_zdssaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) #define bli_scssaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) #define bli_dcssaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) #define bli_ccssaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) #define bli_zcssaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) #define bli_szssaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) #define bli_dzssaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) #define bli_czssaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) #define bli_zzssaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) #define bli_ssdsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) #define bli_dsdsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) #define bli_csdsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) #define bli_zsdsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) #define bli_sddsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) #define bli_dddsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) #define bli_cddsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) #define bli_zddsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) #define bli_scdsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) #define bli_dcdsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) #define bli_ccdsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) #define bli_zcdsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) #define bli_szdsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) #define bli_dzdsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) #define bli_czdsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) #define bli_zzdsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) #define bli_sscsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) #define bli_dscsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) #define bli_cscsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) #define bli_zscsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) #define bli_sdcsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) #define bli_ddcsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) #define bli_cdcsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) #define bli_zdcsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) #define bli_sccsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) #define bli_dccsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) #define bli_cccsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) #define bli_zccsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) #define bli_szcsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) #define bli_dzcsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) #define bli_czcsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) #define bli_zzcsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) #define bli_sszsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) #define bli_dszsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) #define bli_cszsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) #define bli_zszsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) #define bli_sdzsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) #define bli_ddzsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) #define bli_cdzsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) #define bli_zdzsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) #define bli_sczsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) #define bli_dczsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) #define bli_cczsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) #define bli_zczsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) #define bli_szzsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) #define bli_dzzsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) #define bli_czzsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) #define bli_zzzsaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) // -- (axby) = (???d) ---------------------------------------------------------- #define bli_sssdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) #define bli_dssdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) #define bli_cssdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) #define bli_zssdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) #define bli_sdsdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) #define bli_ddsdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) #define bli_cdsdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) #define bli_zdsdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) #define bli_scsdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) #define bli_dcsdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) #define bli_ccsdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) #define bli_zcsdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) #define bli_szsdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) #define bli_dzsdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) #define bli_czsdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) #define bli_zzsdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) #define bli_ssddaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_dsddaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_csddaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_zsddaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_sdddaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_ddddaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_cdddaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_zdddaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_scddaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_dcddaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_ccddaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_zcddaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_szddaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_dzddaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_czddaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_zzddaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_sscdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_dscdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_cscdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_zscdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_sdcdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_ddcdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_cdcdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_zdcdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_sccdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_dccdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_cccdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_zccdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_szcdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_dzcdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_czcdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_zzcdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_sszdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_dszdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_cszdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_zszdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_sdzdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_ddzdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_cdzdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_zdzdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_sczdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_dczdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_cczdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_zczdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_szzdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_dzzdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_czzdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_zzzdaxpbys( a, x, b, y ) bli_rxaxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) #ifndef BLIS_ENABLE_C99_COMPLEX // -- (axby) = (???c) ---------------------------------------------------------- #define bli_ssscaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) #define bli_dsscaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) #define bli_csscaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) #define bli_zsscaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) #define bli_sdscaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) #define bli_ddscaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) #define bli_cdscaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) #define bli_zdscaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) #define bli_scscaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) #define bli_dcscaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) #define bli_ccscaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) #define bli_zcscaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) #define bli_szscaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) #define bli_dzscaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) #define bli_czscaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) #define bli_zzscaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) #define bli_ssdcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) #define bli_dsdcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) #define bli_csdcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) #define bli_zsdcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) #define bli_sddcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) #define bli_dddcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) #define bli_cddcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) #define bli_zddcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) #define bli_scdcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) #define bli_dcdcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) #define bli_ccdcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) #define bli_zcdcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) #define bli_szdcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) #define bli_dzdcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) #define bli_czdcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) #define bli_zzdcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) #define bli_ssccaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) #define bli_dsccaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) #define bli_csccaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) #define bli_zsccaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) #define bli_sdccaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) #define bli_ddccaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) #define bli_cdccaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) #define bli_zdccaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) #define bli_scccaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) #define bli_dcccaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) #define bli_ccccaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) #define bli_zcccaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) #define bli_szccaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) #define bli_dzccaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) #define bli_czccaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) #define bli_zzccaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) #define bli_sszcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) #define bli_dszcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) #define bli_cszcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) #define bli_zszcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) #define bli_sdzcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) #define bli_ddzcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) #define bli_cdzcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) #define bli_zdzcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) #define bli_sczcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) #define bli_dczcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) #define bli_cczcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) #define bli_zczcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) #define bli_szzcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) #define bli_dzzcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) #define bli_czzcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) #define bli_zzzcaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) // -- (axby) = (???z) ---------------------------------------------------------- #define bli_ssszaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) #define bli_dsszaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) #define bli_csszaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) #define bli_zsszaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) #define bli_sdszaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) #define bli_ddszaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) #define bli_cdszaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) #define bli_zdszaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) #define bli_scszaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) #define bli_dcszaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) #define bli_ccszaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) #define bli_zcszaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) #define bli_szszaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) #define bli_dzszaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) #define bli_czszaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) #define bli_zzszaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) #define bli_ssdzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_dsdzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_csdzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_zsdzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_sddzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_dddzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_cddzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_zddzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_scdzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_dcdzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_ccdzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_zcdzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_szdzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_dzdzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_czdzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_zzdzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_ssczaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_dsczaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_csczaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_zsczaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_sdczaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_ddczaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_cdczaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_zdczaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_scczaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_dcczaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_ccczaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_zcczaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_szczaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_dzczaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_czczaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_zzczaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_sszzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_dszzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_cszzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_zszzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_sdzzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_ddzzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_cdzzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_zdzzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_sczzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_dczzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_cczzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_zczzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_szzzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_dzzzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_czzzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_zzzzaxpbys( a, x, b, y ) bli_cxaxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) #else // ifdef BLIS_ENABLE_C99_COMPLEX // -- (axby) = (???c) ---------------------------------------------------------- #define bli_ssscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_dsscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_csscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_zsscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_sdscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_ddscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_cdscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_zdscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_scscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_dcscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_ccscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_zcscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_szscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_dzscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_czscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_zzscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_ssdcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_dsdcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_csdcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_zsdcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_sddcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_dddcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_cddcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_zddcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_scdcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_dcdcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_ccdcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_zcdcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_szdcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_dzdcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_czdcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_zzdcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_ssccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_dsccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_csccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_zsccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_sdccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_ddccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_cdccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_zdccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_scccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_dcccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_ccccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_zcccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_szccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_dzccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_czccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_zzccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_sszcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_dszcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_cszcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_zszcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_sdzcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_ddzcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_cdzcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_zdzcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_sczcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_dczcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_cczcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_zczcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_szzcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_dzzcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_czzcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_zzzcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } // -- (axby) = (???z) ---------------------------------------------------------- #define bli_ssszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_dsszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_csszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_zsszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_sdszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_ddszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_cdszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_zdszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_scszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_dcszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_ccszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_zcszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_szszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_dzszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_czszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_zzszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_ssdzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_dsdzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_csdzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_zsdzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_sddzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_dddzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_cddzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_zddzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_scdzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_dcdzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_ccdzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_zcdzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_szdzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_dzdzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_czdzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_zzdzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_ssczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_dsczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_csczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_zsczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_sdczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_ddczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_cdczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_zdczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_scczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_dcczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_ccczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_zcczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_szczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_dzczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_czczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_zzczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_sszzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_dszzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_cszzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_zszzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_sdzzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_ddzzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_cdzzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_zdzzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_sczzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_dczzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_cczzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_zczzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_szzzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_dzzzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_czzzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #define bli_zzzzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } #endif // BLIS_ENABLE_C99_COMPLEX #define bli_saxpbys( a, x, b, y ) bli_ssssaxpbys( a, x, b, y ) #define bli_daxpbys( a, x, b, y ) bli_ddddaxpbys( a, x, b, y ) #define bli_caxpbys( a, x, b, y ) bli_ccccaxpbys( a, x, b, y ) #define bli_zaxpbys( a, x, b, y ) bli_zzzzaxpbys( a, x, b, y ) #endif blis-0.6.1/frame/include/level0/bli_axpyjs.h000066400000000000000000000316061360743507500207100ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_AXPYJS_H #define BLIS_AXPYJS_H // axpyjs // Notes: // - The first char encodes the type of a. // - The second char encodes the type of x. // - The third char encodes the type of y. // -- (axy) = (??s) ------------------------------------------------------------ #define bli_sssaxpyjs( a, x, y ) bli_saxpyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) #define bli_dssaxpyjs( a, x, y ) bli_saxpyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) #define bli_cssaxpyjs( a, x, y ) bli_saxpyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) #define bli_zssaxpyjs( a, x, y ) bli_saxpyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) #define bli_sdsaxpyjs( a, x, y ) bli_saxpyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) #define bli_ddsaxpyjs( a, x, y ) bli_saxpyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) #define bli_cdsaxpyjs( a, x, y ) bli_saxpyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) #define bli_zdsaxpyjs( a, x, y ) bli_saxpyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) #define bli_scsaxpyjs( a, x, y ) bli_saxpyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) #define bli_dcsaxpyjs( a, x, y ) bli_saxpyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) #define bli_ccsaxpyjs( a, x, y ) bli_saxpyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) #define bli_zcsaxpyjs( a, x, y ) bli_saxpyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) #define bli_szsaxpyjs( a, x, y ) bli_saxpyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) #define bli_dzsaxpyjs( a, x, y ) bli_saxpyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) #define bli_czsaxpyjs( a, x, y ) bli_saxpyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) #define bli_zzsaxpyjs( a, x, y ) bli_saxpyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) // -- (axy) = (??d) ------------------------------------------------------------ #define bli_ssdaxpyjs( a, x, y ) bli_daxpyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) #define bli_dsdaxpyjs( a, x, y ) bli_daxpyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) #define bli_csdaxpyjs( a, x, y ) bli_daxpyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) #define bli_zsdaxpyjs( a, x, y ) bli_daxpyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) #define bli_sddaxpyjs( a, x, y ) bli_daxpyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_dddaxpyjs( a, x, y ) bli_daxpyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_cddaxpyjs( a, x, y ) bli_daxpyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_zddaxpyjs( a, x, y ) bli_daxpyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_scdaxpyjs( a, x, y ) bli_daxpyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_dcdaxpyjs( a, x, y ) bli_daxpyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_ccdaxpyjs( a, x, y ) bli_daxpyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_zcdaxpyjs( a, x, y ) bli_daxpyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_szdaxpyjs( a, x, y ) bli_daxpyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_dzdaxpyjs( a, x, y ) bli_daxpyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_czdaxpyjs( a, x, y ) bli_daxpyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_zzdaxpyjs( a, x, y ) bli_daxpyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) #ifndef BLIS_ENABLE_C99_COMPLEX // -- (axy) = (??c) ------------------------------------------------------------ #define bli_sscaxpyjs( a, x, y ) bli_saxpyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) #define bli_dscaxpyjs( a, x, y ) bli_saxpyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) #define bli_cscaxpyjs( a, x, y ) bli_caxpyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) #define bli_zscaxpyjs( a, x, y ) bli_caxpyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) #define bli_sdcaxpyjs( a, x, y ) bli_saxpyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) #define bli_ddcaxpyjs( a, x, y ) bli_saxpyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) #define bli_cdcaxpyjs( a, x, y ) bli_caxpyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) #define bli_zdcaxpyjs( a, x, y ) bli_caxpyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) #define bli_sccaxpyjs( a, x, y ) bli_scaxpyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) #define bli_dccaxpyjs( a, x, y ) bli_scaxpyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) #define bli_cccaxpyjs( a, x, y ) bli_caxpyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) #define bli_zccaxpyjs( a, x, y ) bli_caxpyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) #define bli_szcaxpyjs( a, x, y ) bli_scaxpyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) #define bli_dzcaxpyjs( a, x, y ) bli_scaxpyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) #define bli_czcaxpyjs( a, x, y ) bli_caxpyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) #define bli_zzcaxpyjs( a, x, y ) bli_caxpyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) // -- (axy) = (??z) ------------------------------------------------------------ #define bli_sszaxpyjs( a, x, y ) bli_daxpyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) #define bli_dszaxpyjs( a, x, y ) bli_daxpyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) #define bli_cszaxpyjs( a, x, y ) bli_zaxpyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) #define bli_zszaxpyjs( a, x, y ) bli_zaxpyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) #define bli_sdzaxpyjs( a, x, y ) bli_daxpyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_ddzaxpyjs( a, x, y ) bli_daxpyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_cdzaxpyjs( a, x, y ) bli_zaxpyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_zdzaxpyjs( a, x, y ) bli_zaxpyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_sczaxpyjs( a, x, y ) bli_dzaxpyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_dczaxpyjs( a, x, y ) bli_dzaxpyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_cczaxpyjs( a, x, y ) bli_zaxpyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_zczaxpyjs( a, x, y ) bli_zaxpyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_szzaxpyjs( a, x, y ) bli_dzaxpyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_dzzaxpyjs( a, x, y ) bli_dzaxpyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_czzaxpyjs( a, x, y ) bli_zaxpyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_zzzaxpyjs( a, x, y ) bli_zaxpyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) #else // ifdef BLIS_ENABLE_C99_COMPLEX // -- (axy) = (??c) ------------------------------------------------------------ #define bli_sscaxpyjs( a, x, y ) { (y) += (a) * (x); } #define bli_dscaxpyjs( a, x, y ) { (y) += (a) * (x); } #define bli_cscaxpyjs( a, x, y ) { (y) += (a) * (x); } #define bli_zscaxpyjs( a, x, y ) { (y) += (a) * (x); } #define bli_sdcaxpyjs( a, x, y ) { (y) += (a) * (x); } #define bli_ddcaxpyjs( a, x, y ) { (y) += (a) * (x); } #define bli_cdcaxpyjs( a, x, y ) { (y) += (a) * (x); } #define bli_zdcaxpyjs( a, x, y ) { (y) += (a) * (x); } #define bli_sccaxpyjs( a, x, y ) { (y) += (a) * conjf(x); } #define bli_dccaxpyjs( a, x, y ) { (y) += (a) * conjf(x); } #define bli_cccaxpyjs( a, x, y ) { (y) += (a) * conjf(x); } #define bli_zccaxpyjs( a, x, y ) { (y) += (a) * conjf(x); } #define bli_szcaxpyjs( a, x, y ) { (y) += (a) * conj(x); } #define bli_dzcaxpyjs( a, x, y ) { (y) += (a) * conj(x); } #define bli_czcaxpyjs( a, x, y ) { (y) += (a) * conj(x); } #define bli_zzcaxpyjs( a, x, y ) { (y) += (a) * conj(x); } // -- (axy) = (??z) ------------------------------------------------------------ #define bli_sszaxpyjs( a, x, y ) { (y) += (a) * (x); } #define bli_dszaxpyjs( a, x, y ) { (y) += (a) * (x); } #define bli_cszaxpyjs( a, x, y ) { (y) += (a) * (x); } #define bli_zszaxpyjs( a, x, y ) { (y) += (a) * (x); } #define bli_sdzaxpyjs( a, x, y ) { (y) += (a) * (x); } #define bli_ddzaxpyjs( a, x, y ) { (y) += (a) * (x); } #define bli_cdzaxpyjs( a, x, y ) { (y) += (a) * (x); } #define bli_zdzaxpyjs( a, x, y ) { (y) += (a) * (x); } #define bli_sczaxpyjs( a, x, y ) { (y) += (a) * conjf(x); } #define bli_dczaxpyjs( a, x, y ) { (y) += (a) * conjf(x); } #define bli_cczaxpyjs( a, x, y ) { (y) += (a) * conjf(x); } #define bli_zczaxpyjs( a, x, y ) { (y) += (a) * conjf(x); } #define bli_szzaxpyjs( a, x, y ) { (y) += (a) * conj(x); } #define bli_dzzaxpyjs( a, x, y ) { (y) += (a) * conj(x); } #define bli_czzaxpyjs( a, x, y ) { (y) += (a) * conj(x); } #define bli_zzzaxpyjs( a, x, y ) { (y) += (a) * conj(x); } #endif // BLIS_ENABLE_C99_COMPLEX #define bli_saxpyjs( a, x, y ) bli_sssaxpyjs( a, x, y ) #define bli_daxpyjs( a, x, y ) bli_dddaxpyjs( a, x, y ) #define bli_caxpyjs( a, x, y ) bli_cccaxpyjs( a, x, y ) #define bli_zaxpyjs( a, x, y ) bli_zzzaxpyjs( a, x, y ) #endif blis-0.6.1/frame/include/level0/bli_axpys.h000066400000000000000000000312231360743507500205310ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_AXPYS_H #define BLIS_AXPYS_H // axpys // Notes: // - The first char encodes the type of a. // - The second char encodes the type of x. // - The third char encodes the type of y. // -- (axy) = (??s) ------------------------------------------------------------ #define bli_sssaxpys( a, x, y ) bli_saxpyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) #define bli_dssaxpys( a, x, y ) bli_saxpyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) #define bli_cssaxpys( a, x, y ) bli_saxpyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) #define bli_zssaxpys( a, x, y ) bli_saxpyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) #define bli_sdsaxpys( a, x, y ) bli_saxpyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) #define bli_ddsaxpys( a, x, y ) bli_saxpyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) #define bli_cdsaxpys( a, x, y ) bli_saxpyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) #define bli_zdsaxpys( a, x, y ) bli_saxpyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) #define bli_scsaxpys( a, x, y ) bli_saxpyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) #define bli_dcsaxpys( a, x, y ) bli_saxpyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) #define bli_ccsaxpys( a, x, y ) bli_saxpyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) #define bli_zcsaxpys( a, x, y ) bli_saxpyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) #define bli_szsaxpys( a, x, y ) bli_saxpyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) #define bli_dzsaxpys( a, x, y ) bli_saxpyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) #define bli_czsaxpys( a, x, y ) bli_saxpyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) #define bli_zzsaxpys( a, x, y ) bli_saxpyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) // -- (axy) = (??d) ------------------------------------------------------------ #define bli_ssdaxpys( a, x, y ) bli_daxpyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) #define bli_dsdaxpys( a, x, y ) bli_daxpyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) #define bli_csdaxpys( a, x, y ) bli_daxpyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) #define bli_zsdaxpys( a, x, y ) bli_daxpyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) #define bli_sddaxpys( a, x, y ) bli_daxpyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_dddaxpys( a, x, y ) bli_daxpyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_cddaxpys( a, x, y ) bli_daxpyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_zddaxpys( a, x, y ) bli_daxpyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_scdaxpys( a, x, y ) bli_daxpyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_dcdaxpys( a, x, y ) bli_daxpyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_ccdaxpys( a, x, y ) bli_daxpyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_zcdaxpys( a, x, y ) bli_daxpyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_szdaxpys( a, x, y ) bli_daxpyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_dzdaxpys( a, x, y ) bli_daxpyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_czdaxpys( a, x, y ) bli_daxpyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_zzdaxpys( a, x, y ) bli_daxpyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) #ifndef BLIS_ENABLE_C99_COMPLEX // -- (axy) = (??c) ------------------------------------------------------------ #define bli_sscaxpys( a, x, y ) bli_saxpyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) #define bli_dscaxpys( a, x, y ) bli_saxpyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) #define bli_cscaxpys( a, x, y ) bli_caxpyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) #define bli_zscaxpys( a, x, y ) bli_caxpyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) #define bli_sdcaxpys( a, x, y ) bli_saxpyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) #define bli_ddcaxpys( a, x, y ) bli_saxpyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) #define bli_cdcaxpys( a, x, y ) bli_caxpyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) #define bli_zdcaxpys( a, x, y ) bli_caxpyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) #define bli_sccaxpys( a, x, y ) bli_scaxpyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) #define bli_dccaxpys( a, x, y ) bli_scaxpyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) #define bli_cccaxpys( a, x, y ) bli_caxpyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) #define bli_zccaxpys( a, x, y ) bli_caxpyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) #define bli_szcaxpys( a, x, y ) bli_scaxpyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) #define bli_dzcaxpys( a, x, y ) bli_scaxpyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) #define bli_czcaxpys( a, x, y ) bli_caxpyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) #define bli_zzcaxpys( a, x, y ) bli_caxpyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) // -- (axy) = (??z) ------------------------------------------------------------ #define bli_sszaxpys( a, x, y ) bli_daxpyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) #define bli_dszaxpys( a, x, y ) bli_daxpyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) #define bli_cszaxpys( a, x, y ) bli_zaxpyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) #define bli_zszaxpys( a, x, y ) bli_zaxpyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) #define bli_sdzaxpys( a, x, y ) bli_daxpyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_ddzaxpys( a, x, y ) bli_daxpyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_cdzaxpys( a, x, y ) bli_zaxpyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_zdzaxpys( a, x, y ) bli_zaxpyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_sczaxpys( a, x, y ) bli_dzaxpyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_dczaxpys( a, x, y ) bli_dzaxpyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_cczaxpys( a, x, y ) bli_zaxpyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_zczaxpys( a, x, y ) bli_zaxpyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_szzaxpys( a, x, y ) bli_dzaxpyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_dzzaxpys( a, x, y ) bli_dzaxpyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_czzaxpys( a, x, y ) bli_zaxpyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_zzzaxpys( a, x, y ) bli_zaxpyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) #else // ifdef BLIS_ENABLE_C99_COMPLEX // -- (axy) = (??c) ------------------------------------------------------------ #define bli_sscaxpys( a, x, y ) { (y) += (a) * (x); } #define bli_dscaxpys( a, x, y ) { (y) += (a) * (x); } #define bli_cscaxpys( a, x, y ) { (y) += (a) * (x); } #define bli_zscaxpys( a, x, y ) { (y) += (a) * (x); } #define bli_sdcaxpys( a, x, y ) { (y) += (a) * (x); } #define bli_ddcaxpys( a, x, y ) { (y) += (a) * (x); } #define bli_cdcaxpys( a, x, y ) { (y) += (a) * (x); } #define bli_zdcaxpys( a, x, y ) { (y) += (a) * (x); } #define bli_sccaxpys( a, x, y ) { (y) += (a) * (x); } #define bli_dccaxpys( a, x, y ) { (y) += (a) * (x); } #define bli_cccaxpys( a, x, y ) { (y) += (a) * (x); } #define bli_zccaxpys( a, x, y ) { (y) += (a) * (x); } #define bli_szcaxpys( a, x, y ) { (y) += (a) * (x); } #define bli_dzcaxpys( a, x, y ) { (y) += (a) * (x); } #define bli_czcaxpys( a, x, y ) { (y) += (a) * (x); } #define bli_zzcaxpys( a, x, y ) { (y) += (a) * (x); } // -- (axy) = (??z) ------------------------------------------------------------ #define bli_sszaxpys( a, x, y ) { (y) += (a) * (x); } #define bli_dszaxpys( a, x, y ) { (y) += (a) * (x); } #define bli_cszaxpys( a, x, y ) { (y) += (a) * (x); } #define bli_zszaxpys( a, x, y ) { (y) += (a) * (x); } #define bli_sdzaxpys( a, x, y ) { (y) += (a) * (x); } #define bli_ddzaxpys( a, x, y ) { (y) += (a) * (x); } #define bli_cdzaxpys( a, x, y ) { (y) += (a) * (x); } #define bli_zdzaxpys( a, x, y ) { (y) += (a) * (x); } #define bli_sczaxpys( a, x, y ) { (y) += (a) * (x); } #define bli_dczaxpys( a, x, y ) { (y) += (a) * (x); } #define bli_cczaxpys( a, x, y ) { (y) += (a) * (x); } #define bli_zczaxpys( a, x, y ) { (y) += (a) * (x); } #define bli_szzaxpys( a, x, y ) { (y) += (a) * (x); } #define bli_dzzaxpys( a, x, y ) { (y) += (a) * (x); } #define bli_czzaxpys( a, x, y ) { (y) += (a) * (x); } #define bli_zzzaxpys( a, x, y ) { (y) += (a) * (x); } #endif // BLIS_ENABLE_C99_COMPLEX #define bli_saxpys( a, x, y ) bli_sssaxpys( a, x, y ) #define bli_daxpys( a, x, y ) bli_dddaxpys( a, x, y ) #define bli_caxpys( a, x, y ) bli_cccaxpys( a, x, y ) #define bli_zaxpys( a, x, y ) bli_zzzaxpys( a, x, y ) #endif blis-0.6.1/frame/include/level0/bli_conjs.h000066400000000000000000000042401360743507500205000ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_CONJS_H #define BLIS_CONJS_H // conjs #define bli_sconjs( x ) bli_sconjris( bli_sreal(x), bli_simag(x) ) #define bli_dconjs( x ) bli_dconjris( bli_dreal(x), bli_dimag(x) ) #ifndef BLIS_ENABLE_C99_COMPLEX #define bli_cconjs( x ) bli_cconjris( bli_creal(x), bli_cimag(x) ) #define bli_zconjs( x ) bli_zconjris( bli_zreal(x), bli_zimag(x) ) #else // ifdef BLIS_ENABLE_C99_COMPLEX #define bli_cconjs( x ) { (x) = conjf(x); } #define bli_zconjs( x ) { (x) = conj (x); } #endif // BLIS_ENABLE_C99_COMPLEX #endif blis-0.6.1/frame/include/level0/bli_constants.h000066400000000000000000000062051360743507500214030ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_CONSTANTS_H #define BLIS_CONSTANTS_H // return pointers to constants // 1 #define bli_s1 \ \ ( ( float* ) bli_obj_buffer_for_const( BLIS_FLOAT, &BLIS_ONE ) ) #define bli_d1 \ \ ( ( double* ) bli_obj_buffer_for_const( BLIS_DOUBLE, &BLIS_ONE ) ) #define bli_c1 \ \ ( ( scomplex* ) bli_obj_buffer_for_const( BLIS_SCOMPLEX, &BLIS_ONE ) ) #define bli_z1 \ \ ( ( dcomplex* ) bli_obj_buffer_for_const( BLIS_DCOMPLEX, &BLIS_ONE ) ) #define bli_i1 \ \ ( ( gint_t* ) bli_obj_buffer_for_const( BLIS_INT, &BLIS_ONE ) ) // 0 #define bli_s0 \ \ ( ( float* ) bli_obj_buffer_for_const( BLIS_FLOAT, &BLIS_ZERO ) ) #define bli_d0 \ \ ( ( double* ) bli_obj_buffer_for_const( BLIS_DOUBLE, &BLIS_ZERO ) ) #define bli_c0 \ \ ( ( scomplex* ) bli_obj_buffer_for_const( BLIS_SCOMPLEX, &BLIS_ZERO ) ) #define bli_z0 \ \ ( ( dcomplex* ) bli_obj_buffer_for_const( BLIS_DCOMPLEX, &BLIS_ZERO ) ) #define bli_i0 \ \ ( ( gint_t* ) bli_obj_buffer_for_const( BLIS_INT, &BLIS_ZERO ) ) // -1 #define bli_sm1 \ \ ( ( float* ) bli_obj_buffer_for_const( BLIS_FLOAT, &BLIS_MINUS_ONE ) ) #define bli_dm1 \ \ ( ( double* ) bli_obj_buffer_for_const( BLIS_DOUBLE, &BLIS_MINUS_ONE ) ) #define bli_cm1 \ \ ( ( scomplex* ) bli_obj_buffer_for_const( BLIS_SCOMPLEX, &BLIS_MINUS_ONE ) ) #define bli_zm1 \ \ ( ( dcomplex* ) bli_obj_buffer_for_const( BLIS_DCOMPLEX, &BLIS_MINUS_ONE ) ) #define bli_im1 \ \ ( ( gint_t* ) bli_obj_buffer_for_const( BLIS_INT, &BLIS_MINUS_ONE ) ) #endif blis-0.6.1/frame/include/level0/bli_copycjs.h000066400000000000000000000113171360743507500210410ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_COPYCJS_H #define BLIS_COPYCJS_H // copycjs // Notes: // - The first char encodes the type of x. // - The second char encodes the type of y. #define bli_sscopycjs( conjx, x, y ) bli_scopycjris( conjx, bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) #define bli_dscopycjs( conjx, x, y ) bli_scopycjris( conjx, bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) #define bli_cscopycjs( conjx, x, y ) bli_scopycjris( conjx, bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) #define bli_zscopycjs( conjx, x, y ) bli_scopycjris( conjx, bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) #define bli_sdcopycjs( conjx, x, y ) bli_dcopycjris( conjx, bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) #define bli_ddcopycjs( conjx, x, y ) bli_dcopycjris( conjx, bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_cdcopycjs( conjx, x, y ) bli_dcopycjris( conjx, bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_zdcopycjs( conjx, x, y ) bli_dcopycjris( conjx, bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) #ifndef BLIS_ENABLE_C99_COMPLEX #define bli_sccopycjs( conjx, x, y ) bli_ccopycjris( conjx, bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) #define bli_dccopycjs( conjx, x, y ) bli_ccopycjris( conjx, bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) #define bli_cccopycjs( conjx, x, y ) bli_ccopycjris( conjx, bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) #define bli_zccopycjs( conjx, x, y ) bli_ccopycjris( conjx, bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) #define bli_szcopycjs( conjx, x, y ) bli_zcopycjris( conjx, bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) #define bli_dzcopycjs( conjx, x, y ) bli_zcopycjris( conjx, bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_czcopycjs( conjx, x, y ) bli_zcopycjris( conjx, bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_zzcopycjs( conjx, x, y ) bli_zcopycjris( conjx, bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) #else // ifdef BLIS_ENABLE_C99_COMPLEX #define bli_sccopycjs( conjx, x, y ) { (y) = (x); } #define bli_dccopycjs( conjx, x, y ) { (y) = (x); } #define bli_cccopycjs( conjx, x, y ) { (y) = ( bli_is_conj( conjx ) ? conjf(x) : (x) ); } #define bli_zccopycjs( conjx, x, y ) { (y) = ( bli_is_conj( conjx ) ? conj (x) : (x) ); } #define bli_szcopycjs( conjx, x, y ) { (y) = (x); } #define bli_dzcopycjs( conjx, x, y ) { (y) = (x); } #define bli_czcopycjs( conjx, x, y ) { (y) = ( bli_is_conj( conjx ) ? conjf(x) : (x) ); } #define bli_zzcopycjs( conjx, x, y ) { (y) = ( bli_is_conj( conjx ) ? conj (x) : (x) ); } #endif // BLIS_ENABLE_C99_COMPLEX #define bli_iicopycjs( conjx, x, y ) { (y) = ( gint_t ) (x); } #define bli_scopycjs( conjx, x, y ) bli_sscopycjs( conjx, x, y ) #define bli_dcopycjs( conjx, x, y ) bli_ddcopycjs( conjx, x, y ) #define bli_ccopycjs( conjx, x, y ) bli_cccopycjs( conjx, x, y ) #define bli_zcopycjs( conjx, x, y ) bli_zzcopycjs( conjx, x, y ) #define bli_icopycjs( conjx, x, y ) bli_iicopycjs( conjx, x, y ) #endif blis-0.6.1/frame/include/level0/bli_copyjnzs.h000066400000000000000000000077501360743507500212540ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_COPYJNZS_H #define BLIS_COPYJNZS_H // copyjnzs // Notes: // - The first char encodes the type of x. // - The second char encodes the type of y. #define bli_sscopyjnzs( x, y ) bli_scopyjris( bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) #define bli_dscopyjnzs( x, y ) bli_scopyjris( bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) #define bli_cscopyjnzs( x, y ) bli_scopyjris( bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) #define bli_zscopyjnzs( x, y ) bli_scopyjris( bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) #define bli_sdcopyjnzs( x, y ) bli_dcopyjris( bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) #define bli_ddcopyjnzs( x, y ) bli_dcopyjris( bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_cdcopyjnzs( x, y ) bli_dcopyjris( bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_zdcopyjnzs( x, y ) bli_dcopyjris( bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) // NOTE: Use of scopyjris() (implemented in terms of scopyris()), is so we // don't touch the imaginary part of y. #define bli_sccopyjnzs( x, y ) bli_scopyjris( bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) #define bli_dccopyjnzs( x, y ) bli_scopyjris( bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) #define bli_cccopyjnzs( x, y ) bli_ccopyjris( bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) #define bli_zccopyjnzs( x, y ) bli_ccopyjris( bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) // NOTE: Use of dcopyjris() (implemented in terms of dcopyris()), is so we // don't touch the imaginary part of y. #define bli_szcopyjnzs( x, y ) bli_dcopyjris( bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) #define bli_dzcopyjnzs( x, y ) bli_dcopyjris( bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_czcopyjnzs( x, y ) bli_zcopyjris( bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_zzcopyjnzs( x, y ) bli_zcopyjris( bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_iicopyjnzs( x, y ) { (y) = ( gint_t ) (x); } #define bli_scopyjnzs( x, y ) bli_sscopyjnzs( x, y ) #define bli_dcopyjnzs( x, y ) bli_ddcopyjnzs( x, y ) #define bli_ccopyjnzs( x, y ) bli_cccopyjnzs( x, y ) #define bli_zcopyjnzs( x, y ) bli_zzcopyjnzs( x, y ) #define bli_icopyjnzs( x, y ) bli_iicopyjnzs( x, y ) #endif blis-0.6.1/frame/include/level0/bli_copyjs.h000066400000000000000000000103041360743507500206710ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_COPYJS_H #define BLIS_COPYJS_H // copyjs // Notes: // - The first char encodes the type of x. // - The second char encodes the type of y. #define bli_sscopyjs( x, y ) bli_scopyjris( bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) #define bli_dscopyjs( x, y ) bli_scopyjris( bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) #define bli_cscopyjs( x, y ) bli_scopyjris( bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) #define bli_zscopyjs( x, y ) bli_scopyjris( bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) #define bli_sdcopyjs( x, y ) bli_dcopyjris( bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) #define bli_ddcopyjs( x, y ) bli_dcopyjris( bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_cdcopyjs( x, y ) bli_dcopyjris( bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_zdcopyjs( x, y ) bli_dcopyjris( bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) #ifndef BLIS_ENABLE_C99_COMPLEX #define bli_sccopyjs( x, y ) bli_ccopyjris( bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) #define bli_dccopyjs( x, y ) bli_ccopyjris( bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) #define bli_cccopyjs( x, y ) bli_ccopyjris( bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) #define bli_zccopyjs( x, y ) bli_ccopyjris( bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) #define bli_szcopyjs( x, y ) bli_zcopyjris( bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) #define bli_dzcopyjs( x, y ) bli_zcopyjris( bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_czcopyjs( x, y ) bli_zcopyjris( bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_zzcopyjs( x, y ) bli_zcopyjris( bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) #else // ifdef BLIS_ENABLE_C99_COMPLEX #define bli_sccopyjs( x, y ) { (y) = (x); } #define bli_dccopyjs( x, y ) { (y) = (x); } #define bli_cccopyjs( x, y ) { (y) = conjf(x); } #define bli_zccopyjs( x, y ) { (y) = conj (x); } #define bli_szcopyjs( x, y ) { (y) = (x); } #define bli_dzcopyjs( x, y ) { (y) = (x); } #define bli_czcopyjs( x, y ) { (y) = conjf(x); } #define bli_zzcopyjs( x, y ) { (y) = conj (x); } #endif // BLIS_ENABLE_C99_COMPLEX #define bli_iicopyjs( x, y ) { (y) = ( gint_t ) (x); } #define bli_scopyjs( x, y ) bli_sscopyjs( x, y ) #define bli_dcopyjs( x, y ) bli_ddcopyjs( x, y ) #define bli_ccopyjs( x, y ) bli_cccopyjs( x, y ) #define bli_zcopyjs( x, y ) bli_zzcopyjs( x, y ) #define bli_icopyjs( x, y ) bli_iicopyjs( x, y ) #endif blis-0.6.1/frame/include/level0/bli_copynzs.h000066400000000000000000000075461360743507500211050ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_COPYNZS_H #define BLIS_COPYNZS_H // copynzs // Notes: // - The first char encodes the type of x. // - The second char encodes the type of y. #define bli_sscopynzs( x, y ) bli_scopyris( bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) #define bli_dscopynzs( x, y ) bli_scopyris( bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) #define bli_cscopynzs( x, y ) bli_scopyris( bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) #define bli_zscopynzs( x, y ) bli_scopyris( bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) #define bli_sdcopynzs( x, y ) bli_dcopyris( bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) #define bli_ddcopynzs( x, y ) bli_dcopyris( bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_cdcopynzs( x, y ) bli_dcopyris( bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_zdcopynzs( x, y ) bli_dcopyris( bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) // NOTE: Use of scopyris() is so we don't touch the imaginary part of y. #define bli_sccopynzs( x, y ) bli_scopyris( bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) #define bli_dccopynzs( x, y ) bli_scopyris( bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) #define bli_cccopynzs( x, y ) bli_ccopyris( bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) #define bli_zccopynzs( x, y ) bli_ccopyris( bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) // NOTE: Use of dcopyris() is so we don't touch the imaginary part of y. #define bli_szcopynzs( x, y ) bli_dcopyris( bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) #define bli_dzcopynzs( x, y ) bli_dcopyris( bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_czcopynzs( x, y ) bli_zcopyris( bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_zzcopynzs( x, y ) bli_zcopyris( bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_iicopynzs( x, y ) { (y) = ( gint_t ) (x); } #define bli_scopynzs( x, y ) bli_sscopynzs( x, y ) #define bli_dcopynzs( x, y ) bli_ddcopynzs( x, y ) #define bli_ccopynzs( x, y ) bli_cccopynzs( x, y ) #define bli_zcopynzs( x, y ) bli_zzcopynzs( x, y ) #define bli_icopynzs( x, y ) bli_iicopynzs( x, y ) #endif blis-0.6.1/frame/include/level0/bli_copys.h000066400000000000000000000075101360743507500205240ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_COPYS_H #define BLIS_COPYS_H // copys // Notes: // - The first char encodes the type of x. // - The second char encodes the type of y. #define bli_sscopys( x, y ) bli_scopyris( bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) #define bli_dscopys( x, y ) bli_scopyris( bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) #define bli_cscopys( x, y ) bli_scopyris( bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) #define bli_zscopys( x, y ) bli_scopyris( bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) #define bli_sdcopys( x, y ) bli_dcopyris( bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) #define bli_ddcopys( x, y ) bli_dcopyris( bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_cdcopys( x, y ) bli_dcopyris( bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_zdcopys( x, y ) bli_dcopyris( bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) // NOTE: Use of ccopyris() means the imaginary part of y will be overwritten with zero. #define bli_sccopys( x, y ) bli_ccopyris( bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) #define bli_dccopys( x, y ) bli_ccopyris( bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) #define bli_cccopys( x, y ) bli_ccopyris( bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) #define bli_zccopys( x, y ) bli_ccopyris( bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) // NOTE: Use of zcopyris() means the imaginary part of y will be overwritten with zero. #define bli_szcopys( x, y ) bli_zcopyris( bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) #define bli_dzcopys( x, y ) bli_zcopyris( bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_czcopys( x, y ) bli_zcopyris( bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_zzcopys( x, y ) bli_zcopyris( bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_iicopys( x, y ) { (y) = ( gint_t ) (x); } #define bli_scopys( x, y ) bli_sscopys( x, y ) #define bli_dcopys( x, y ) bli_ddcopys( x, y ) #define bli_ccopys( x, y ) bli_cccopys( x, y ) #define bli_zcopys( x, y ) bli_zzcopys( x, y ) #define bli_icopys( x, y ) bli_iicopys( x, y ) #endif blis-0.6.1/frame/include/level0/bli_copys_mxn.h000066400000000000000000000374551360743507500214210ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_COPYS_MXN_H #define BLIS_COPYS_MXN_H // copys_mxn // Notes: // - The first char encodes the type of x. // - The second char encodes the type of y. // xy = ?s static void bli_sscopys_mxn( const dim_t m, const dim_t n, float* restrict x, const inc_t rs_x, const inc_t cs_x, float* restrict y, const inc_t rs_y, const inc_t cs_y ) { #ifdef BLIS_ENABLE_CR_CASES if ( rs_x == 1 && rs_y == 1 ) { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_sscopys( *(x + ii + jj*cs_x), *(y + ii + jj*cs_y) ); } else if ( cs_x == 1 && cs_y == 1 ) { for ( dim_t ii = 0; ii < m; ++ii ) for ( dim_t jj = 0; jj < n; ++jj ) bli_sscopys( *(x + ii*rs_x + jj), *(y + ii*rs_y + jj) ); } else #endif { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_sscopys( *(x + ii*rs_x + jj*cs_x), *(y + ii*rs_y + jj*cs_y) ); } } static void bli_dscopys_mxn( const dim_t m, const dim_t n, double* restrict x, const inc_t rs_x, const inc_t cs_x, float* restrict y, const inc_t rs_y, const inc_t cs_y ) { #ifdef BLIS_ENABLE_CR_CASES if ( rs_x == 1 && rs_y == 1 ) { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_dscopys( *(x + ii + jj*cs_x), *(y + ii + jj*cs_y) ); } else if ( cs_x == 1 && cs_y == 1 ) { for ( dim_t ii = 0; ii < m; ++ii ) for ( dim_t jj = 0; jj < n; ++jj ) bli_dscopys( *(x + ii*rs_x + jj), *(y + ii*rs_y + jj) ); } else #endif { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_dscopys( *(x + ii*rs_x + jj*cs_x), *(y + ii*rs_y + jj*cs_y) ); } } static void bli_cscopys_mxn( const dim_t m, const dim_t n, scomplex* restrict x, const inc_t rs_x, const inc_t cs_x, float* restrict y, const inc_t rs_y, const inc_t cs_y ) { #ifdef BLIS_ENABLE_CR_CASES if ( rs_x == 1 && rs_y == 1 ) { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_cscopys( *(x + ii + jj*cs_x), *(y + ii + jj*cs_y) ); } else if ( cs_x == 1 && cs_y == 1 ) { for ( dim_t ii = 0; ii < m; ++ii ) for ( dim_t jj = 0; jj < n; ++jj ) bli_cscopys( *(x + ii*rs_x + jj), *(y + ii*rs_y + jj) ); } else #endif { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_cscopys( *(x + ii*rs_x + jj*cs_x), *(y + ii*rs_y + jj*cs_y) ); } } static void bli_zscopys_mxn( const dim_t m, const dim_t n, dcomplex* restrict x, const inc_t rs_x, const inc_t cs_x, float* restrict y, const inc_t rs_y, const inc_t cs_y ) { #ifdef BLIS_ENABLE_CR_CASES if ( rs_x == 1 && rs_y == 1 ) { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_zscopys( *(x + ii + jj*cs_x), *(y + ii + jj*cs_y) ); } else if ( cs_x == 1 && cs_y == 1 ) { for ( dim_t ii = 0; ii < m; ++ii ) for ( dim_t jj = 0; jj < n; ++jj ) bli_zscopys( *(x + ii*rs_x + jj), *(y + ii*rs_y + jj) ); } else #endif { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_zscopys( *(x + ii*rs_x + jj*cs_x), *(y + ii*rs_y + jj*cs_y) ); } } // xy = ?d static void bli_sdcopys_mxn( const dim_t m, const dim_t n, float* restrict x, const inc_t rs_x, const inc_t cs_x, double* restrict y, const inc_t rs_y, const inc_t cs_y ) { #ifdef BLIS_ENABLE_CR_CASES if ( rs_x == 1 && rs_y == 1 ) { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_sdcopys( *(x + ii + jj*cs_x), *(y + ii + jj*cs_y) ); } else if ( cs_x == 1 && cs_y == 1 ) { for ( dim_t ii = 0; ii < m; ++ii ) for ( dim_t jj = 0; jj < n; ++jj ) bli_sdcopys( *(x + ii*rs_x + jj), *(y + ii*rs_y + jj) ); } else #endif { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_sdcopys( *(x + ii*rs_x + jj*cs_x), *(y + ii*rs_y + jj*cs_y) ); } } static void bli_ddcopys_mxn( const dim_t m, const dim_t n, double* restrict x, const inc_t rs_x, const inc_t cs_x, double* restrict y, const inc_t rs_y, const inc_t cs_y ) { #ifdef BLIS_ENABLE_CR_CASES if ( rs_x == 1 && rs_y == 1 ) { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_ddcopys( *(x + ii + jj*cs_x), *(y + ii + jj*cs_y) ); } else if ( cs_x == 1 && cs_y == 1 ) { for ( dim_t ii = 0; ii < m; ++ii ) for ( dim_t jj = 0; jj < n; ++jj ) bli_ddcopys( *(x + ii*rs_x + jj), *(y + ii*rs_y + jj) ); } else #endif { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_ddcopys( *(x + ii*rs_x + jj*cs_x), *(y + ii*rs_y + jj*cs_y) ); } } static void bli_cdcopys_mxn( const dim_t m, const dim_t n, scomplex* restrict x, const inc_t rs_x, const inc_t cs_x, double* restrict y, const inc_t rs_y, const inc_t cs_y ) { #ifdef BLIS_ENABLE_CR_CASES if ( rs_x == 1 && rs_y == 1 ) { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_cdcopys( *(x + ii + jj*cs_x), *(y + ii + jj*cs_y) ); } else if ( cs_x == 1 && cs_y == 1 ) { for ( dim_t ii = 0; ii < m; ++ii ) for ( dim_t jj = 0; jj < n; ++jj ) bli_cdcopys( *(x + ii*rs_x + jj), *(y + ii*rs_y + jj) ); } else #endif { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_cdcopys( *(x + ii*rs_x + jj*cs_x), *(y + ii*rs_y + jj*cs_y) ); } } static void bli_zdcopys_mxn( const dim_t m, const dim_t n, dcomplex* restrict x, const inc_t rs_x, const inc_t cs_x, double* restrict y, const inc_t rs_y, const inc_t cs_y ) { #ifdef BLIS_ENABLE_CR_CASES if ( rs_x == 1 && rs_y == 1 ) { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_zdcopys( *(x + ii + jj*cs_x), *(y + ii + jj*cs_y) ); } else if ( cs_x == 1 && cs_y == 1 ) { for ( dim_t ii = 0; ii < m; ++ii ) for ( dim_t jj = 0; jj < n; ++jj ) bli_zdcopys( *(x + ii*rs_x + jj), *(y + ii*rs_y + jj) ); } else #endif { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_zdcopys( *(x + ii*rs_x + jj*cs_x), *(y + ii*rs_y + jj*cs_y) ); } } // xy = ?c static void bli_sccopys_mxn( const dim_t m, const dim_t n, float* restrict x, const inc_t rs_x, const inc_t cs_x, scomplex* restrict y, const inc_t rs_y, const inc_t cs_y ) { #ifdef BLIS_ENABLE_CR_CASES if ( rs_x == 1 && rs_y == 1 ) { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_sccopys( *(x + ii + jj*cs_x), *(y + ii + jj*cs_y) ); } else if ( cs_x == 1 && cs_y == 1 ) { for ( dim_t ii = 0; ii < m; ++ii ) for ( dim_t jj = 0; jj < n; ++jj ) bli_sccopys( *(x + ii*rs_x + jj), *(y + ii*rs_y + jj) ); } else #endif { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_sccopys( *(x + ii*rs_x + jj*cs_x), *(y + ii*rs_y + jj*cs_y) ); } } static void bli_dccopys_mxn( const dim_t m, const dim_t n, double* restrict x, const inc_t rs_x, const inc_t cs_x, scomplex* restrict y, const inc_t rs_y, const inc_t cs_y ) { #ifdef BLIS_ENABLE_CR_CASES if ( rs_x == 1 && rs_y == 1 ) { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_dccopys( *(x + ii + jj*cs_x), *(y + ii + jj*cs_y) ); } else if ( cs_x == 1 && cs_y == 1 ) { for ( dim_t ii = 0; ii < m; ++ii ) for ( dim_t jj = 0; jj < n; ++jj ) bli_dccopys( *(x + ii*rs_x + jj), *(y + ii*rs_y + jj) ); } else #endif { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_dccopys( *(x + ii*rs_x + jj*cs_x), *(y + ii*rs_y + jj*cs_y) ); } } static void bli_cccopys_mxn( const dim_t m, const dim_t n, scomplex* restrict x, const inc_t rs_x, const inc_t cs_x, scomplex* restrict y, const inc_t rs_y, const inc_t cs_y ) { #ifdef BLIS_ENABLE_CR_CASES if ( rs_x == 1 && rs_y == 1 ) { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_cccopys( *(x + ii + jj*cs_x), *(y + ii + jj*cs_y) ); } else if ( cs_x == 1 && cs_y == 1 ) { for ( dim_t ii = 0; ii < m; ++ii ) for ( dim_t jj = 0; jj < n; ++jj ) bli_cccopys( *(x + ii*rs_x + jj), *(y + ii*rs_y + jj) ); } else #endif { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_cccopys( *(x + ii*rs_x + jj*cs_x), *(y + ii*rs_y + jj*cs_y) ); } } static void bli_zccopys_mxn( const dim_t m, const dim_t n, dcomplex* restrict x, const inc_t rs_x, const inc_t cs_x, scomplex* restrict y, const inc_t rs_y, const inc_t cs_y ) { #ifdef BLIS_ENABLE_CR_CASES if ( rs_x == 1 && rs_y == 1 ) { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_zccopys( *(x + ii + jj*cs_x), *(y + ii + jj*cs_y) ); } else if ( cs_x == 1 && cs_y == 1 ) { for ( dim_t ii = 0; ii < m; ++ii ) for ( dim_t jj = 0; jj < n; ++jj ) bli_zccopys( *(x + ii*rs_x + jj), *(y + ii*rs_y + jj) ); } else #endif { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_zccopys( *(x + ii*rs_x + jj*cs_x), *(y + ii*rs_y + jj*cs_y) ); } } // xy = ?c static void bli_szcopys_mxn( const dim_t m, const dim_t n, float* restrict x, const inc_t rs_x, const inc_t cs_x, dcomplex* restrict y, const inc_t rs_y, const inc_t cs_y ) { #ifdef BLIS_ENABLE_CR_CASES if ( rs_x == 1 && rs_y == 1 ) { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_szcopys( *(x + ii + jj*cs_x), *(y + ii + jj*cs_y) ); } else if ( cs_x == 1 && cs_y == 1 ) { for ( dim_t ii = 0; ii < m; ++ii ) for ( dim_t jj = 0; jj < n; ++jj ) bli_szcopys( *(x + ii*rs_x + jj), *(y + ii*rs_y + jj) ); } else #endif { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_szcopys( *(x + ii*rs_x + jj*cs_x), *(y + ii*rs_y + jj*cs_y) ); } } static void bli_dzcopys_mxn( const dim_t m, const dim_t n, double* restrict x, const inc_t rs_x, const inc_t cs_x, dcomplex* restrict y, const inc_t rs_y, const inc_t cs_y ) { #ifdef BLIS_ENABLE_CR_CASES if ( rs_x == 1 && rs_y == 1 ) { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_dzcopys( *(x + ii + jj*cs_x), *(y + ii + jj*cs_y) ); } else if ( cs_x == 1 && cs_y == 1 ) { for ( dim_t ii = 0; ii < m; ++ii ) for ( dim_t jj = 0; jj < n; ++jj ) bli_dzcopys( *(x + ii*rs_x + jj), *(y + ii*rs_y + jj) ); } else #endif { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_dzcopys( *(x + ii*rs_x + jj*cs_x), *(y + ii*rs_y + jj*cs_y) ); } } static void bli_czcopys_mxn( const dim_t m, const dim_t n, scomplex* restrict x, const inc_t rs_x, const inc_t cs_x, dcomplex* restrict y, const inc_t rs_y, const inc_t cs_y ) { #ifdef BLIS_ENABLE_CR_CASES if ( rs_x == 1 && rs_y == 1 ) { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_czcopys( *(x + ii + jj*cs_x), *(y + ii + jj*cs_y) ); } else if ( cs_x == 1 && cs_y == 1 ) { for ( dim_t ii = 0; ii < m; ++ii ) for ( dim_t jj = 0; jj < n; ++jj ) bli_czcopys( *(x + ii*rs_x + jj), *(y + ii*rs_y + jj) ); } else #endif { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_czcopys( *(x + ii*rs_x + jj*cs_x), *(y + ii*rs_y + jj*cs_y) ); } } static void bli_zzcopys_mxn( const dim_t m, const dim_t n, dcomplex* restrict x, const inc_t rs_x, const inc_t cs_x, dcomplex* restrict y, const inc_t rs_y, const inc_t cs_y ) { #ifdef BLIS_ENABLE_CR_CASES if ( rs_x == 1 && rs_y == 1 ) { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_zzcopys( *(x + ii + jj*cs_x), *(y + ii + jj*cs_y) ); } else if ( cs_x == 1 && cs_y == 1 ) { for ( dim_t ii = 0; ii < m; ++ii ) for ( dim_t jj = 0; jj < n; ++jj ) bli_zzcopys( *(x + ii*rs_x + jj), *(y + ii*rs_y + jj) ); } else #endif { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_zzcopys( *(x + ii*rs_x + jj*cs_x), *(y + ii*rs_y + jj*cs_y) ); } } static void bli_scopys_mxn( const dim_t m, const dim_t n, float* restrict x, const inc_t rs_x, const inc_t cs_x, float* restrict y, const inc_t rs_y, const inc_t cs_y ) { bli_sscopys_mxn( m, n, x, rs_x, cs_x, y, rs_y, cs_y ); } static void bli_dcopys_mxn( const dim_t m, const dim_t n, double* restrict x, const inc_t rs_x, const inc_t cs_x, double* restrict y, const inc_t rs_y, const inc_t cs_y ) { bli_ddcopys_mxn( m, n, x, rs_x, cs_x, y, rs_y, cs_y ); } static void bli_ccopys_mxn( const dim_t m, const dim_t n, scomplex* restrict x, const inc_t rs_x, const inc_t cs_x, scomplex* restrict y, const inc_t rs_y, const inc_t cs_y ) { bli_cccopys_mxn( m, n, x, rs_x, cs_x, y, rs_y, cs_y ); } static void bli_zcopys_mxn( const dim_t m, const dim_t n, dcomplex* restrict x, const inc_t rs_x, const inc_t cs_x, dcomplex* restrict y, const inc_t rs_y, const inc_t cs_y ) { bli_zzcopys_mxn( m, n, x, rs_x, cs_x, y, rs_y, cs_y ); } #endif blis-0.6.1/frame/include/level0/bli_dotjs.h000066400000000000000000000133561360743507500205170ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_DOTJS_H #define BLIS_DOTJS_H // dotjs // Notes: // - The first char encodes the type of x. // - The second char encodes the type of y. // - The third char encodes the type of rho. // - x is used in conjugated form. #define bli_sssdotjs( x, y, a ) bli_sssaxpyjs( y, x, a ) #define bli_dssdotjs( x, y, a ) bli_sdsaxpyjs( y, x, a ) #define bli_cssdotjs( x, y, a ) bli_scsaxpyjs( y, x, a ) #define bli_zssdotjs( x, y, a ) bli_szsaxpyjs( y, x, a ) #define bli_sdsdotjs( x, y, a ) bli_dssaxpyjs( y, x, a ) #define bli_ddsdotjs( x, y, a ) bli_ddsaxpyjs( y, x, a ) #define bli_cdsdotjs( x, y, a ) bli_dcsaxpyjs( y, x, a ) #define bli_zdsdotjs( x, y, a ) bli_dzsaxpyjs( y, x, a ) #define bli_scsdotjs( x, y, a ) bli_cssaxpyjs( y, x, a ) #define bli_dcsdotjs( x, y, a ) bli_cdsaxpyjs( y, x, a ) #define bli_ccsdotjs( x, y, a ) bli_ccsaxpyjs( y, x, a ) #define bli_zcsdotjs( x, y, a ) bli_czsaxpyjs( y, x, a ) #define bli_szsdotjs( x, y, a ) bli_zssaxpyjs( y, x, a ) #define bli_dzsdotjs( x, y, a ) bli_zdsaxpyjs( y, x, a ) #define bli_czsdotjs( x, y, a ) bli_zcsaxpyjs( y, x, a ) #define bli_zzsdotjs( x, y, a ) bli_zzsaxpyjs( y, x, a ) #define bli_ssddotjs( x, y, a ) bli_ssdaxpyjs( y, x, a ) #define bli_dsddotjs( x, y, a ) bli_sddaxpyjs( y, x, a ) #define bli_csddotjs( x, y, a ) bli_scdaxpyjs( y, x, a ) #define bli_zsddotjs( x, y, a ) bli_szdaxpyjs( y, x, a ) #define bli_sdddotjs( x, y, a ) bli_dsdaxpyjs( y, x, a ) #define bli_ddddotjs( x, y, a ) bli_dddaxpyjs( y, x, a ) #define bli_cdddotjs( x, y, a ) bli_dcdaxpyjs( y, x, a ) #define bli_zdddotjs( x, y, a ) bli_dzdaxpyjs( y, x, a ) #define bli_scddotjs( x, y, a ) bli_csdaxpyjs( y, x, a ) #define bli_dcddotjs( x, y, a ) bli_cddaxpyjs( y, x, a ) #define bli_ccddotjs( x, y, a ) bli_ccdaxpyjs( y, x, a ) #define bli_zcddotjs( x, y, a ) bli_czdaxpyjs( y, x, a ) #define bli_szddotjs( x, y, a ) bli_zsdaxpyjs( y, x, a ) #define bli_dzddotjs( x, y, a ) bli_zddaxpyjs( y, x, a ) #define bli_czddotjs( x, y, a ) bli_zcdaxpyjs( y, x, a ) #define bli_zzddotjs( x, y, a ) bli_zzdaxpyjs( y, x, a ) #define bli_sscdotjs( x, y, a ) bli_sscaxpyjs( y, x, a ) #define bli_dscdotjs( x, y, a ) bli_sdcaxpyjs( y, x, a ) #define bli_cscdotjs( x, y, a ) bli_sccaxpyjs( y, x, a ) #define bli_zscdotjs( x, y, a ) bli_szcaxpyjs( y, x, a ) #define bli_sdcdotjs( x, y, a ) bli_dscaxpyjs( y, x, a ) #define bli_ddcdotjs( x, y, a ) bli_ddcaxpyjs( y, x, a ) #define bli_cdcdotjs( x, y, a ) bli_dccaxpyjs( y, x, a ) #define bli_zdcdotjs( x, y, a ) bli_dzcaxpyjs( y, x, a ) #define bli_sccdotjs( x, y, a ) bli_cscaxpyjs( y, x, a ) #define bli_dccdotjs( x, y, a ) bli_cdcaxpyjs( y, x, a ) #define bli_cccdotjs( x, y, a ) bli_cccaxpyjs( y, x, a ) #define bli_zccdotjs( x, y, a ) bli_czcaxpyjs( y, x, a ) #define bli_szcdotjs( x, y, a ) bli_zscaxpyjs( y, x, a ) #define bli_dzcdotjs( x, y, a ) bli_zdcaxpyjs( y, x, a ) #define bli_czcdotjs( x, y, a ) bli_zccaxpyjs( y, x, a ) #define bli_zzcdotjs( x, y, a ) bli_zzcaxpyjs( y, x, a ) #define bli_sszdotjs( x, y, a ) bli_sszaxpyjs( y, x, a ) #define bli_dszdotjs( x, y, a ) bli_sdzaxpyjs( y, x, a ) #define bli_cszdotjs( x, y, a ) bli_sczaxpyjs( y, x, a ) #define bli_zszdotjs( x, y, a ) bli_szzaxpyjs( y, x, a ) #define bli_sdzdotjs( x, y, a ) bli_dszaxpyjs( y, x, a ) #define bli_ddzdotjs( x, y, a ) bli_ddzaxpyjs( y, x, a ) #define bli_cdzdotjs( x, y, a ) bli_dczaxpyjs( y, x, a ) #define bli_zdzdotjs( x, y, a ) bli_dzzaxpyjs( y, x, a ) #define bli_sczdotjs( x, y, a ) bli_cszaxpyjs( y, x, a ) #define bli_dczdotjs( x, y, a ) bli_cdzaxpyjs( y, x, a ) #define bli_cczdotjs( x, y, a ) bli_cczaxpyjs( y, x, a ) #define bli_zczdotjs( x, y, a ) bli_czzaxpyjs( y, x, a ) #define bli_szzdotjs( x, y, a ) bli_zszaxpyjs( y, x, a ) #define bli_dzzdotjs( x, y, a ) bli_zdzaxpyjs( y, x, a ) #define bli_czzdotjs( x, y, a ) bli_zczaxpyjs( y, x, a ) #define bli_zzzdotjs( x, y, a ) bli_zzzaxpyjs( y, x, a ) #define bli_sdotjs( x, y, a ) bli_sssdotjs( x, y, a ) #define bli_ddotjs( x, y, a ) bli_ddddotjs( x, y, a ) #define bli_cdotjs( x, y, a ) bli_cccdotjs( x, y, a ) #define bli_zdotjs( x, y, a ) bli_zzzdotjs( x, y, a ) #endif blis-0.6.1/frame/include/level0/bli_dots.h000066400000000000000000000131011360743507500203310ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_DOTS_H #define BLIS_DOTS_H // dots // Notes: // - The first char encodes the type of x. // - The second char encodes the type of y. // - The third char encodes the type of rho. #define bli_sssdots( x, y, a ) bli_sssaxpys( x, y, a ) #define bli_dssdots( x, y, a ) bli_dssaxpys( x, y, a ) #define bli_cssdots( x, y, a ) bli_cssaxpys( x, y, a ) #define bli_zssdots( x, y, a ) bli_zssaxpys( x, y, a ) #define bli_sdsdots( x, y, a ) bli_sdsaxpys( x, y, a ) #define bli_ddsdots( x, y, a ) bli_ddsaxpys( x, y, a ) #define bli_cdsdots( x, y, a ) bli_cdsaxpys( x, y, a ) #define bli_zdsdots( x, y, a ) bli_zdsaxpys( x, y, a ) #define bli_scsdots( x, y, a ) bli_scsaxpys( x, y, a ) #define bli_dcsdots( x, y, a ) bli_dcsaxpys( x, y, a ) #define bli_ccsdots( x, y, a ) bli_ccsaxpys( x, y, a ) #define bli_zcsdots( x, y, a ) bli_zcsaxpys( x, y, a ) #define bli_szsdots( x, y, a ) bli_szsaxpys( x, y, a ) #define bli_dzsdots( x, y, a ) bli_dzsaxpys( x, y, a ) #define bli_czsdots( x, y, a ) bli_czsaxpys( x, y, a ) #define bli_zzsdots( x, y, a ) bli_zzsaxpys( x, y, a ) #define bli_ssddots( x, y, a ) bli_ssdaxpys( x, y, a ) #define bli_dsddots( x, y, a ) bli_dsdaxpys( x, y, a ) #define bli_csddots( x, y, a ) bli_csdaxpys( x, y, a ) #define bli_zsddots( x, y, a ) bli_zsdaxpys( x, y, a ) #define bli_sdddots( x, y, a ) bli_sddaxpys( x, y, a ) #define bli_ddddots( x, y, a ) bli_dddaxpys( x, y, a ) #define bli_cdddots( x, y, a ) bli_cddaxpys( x, y, a ) #define bli_zdddots( x, y, a ) bli_zddaxpys( x, y, a ) #define bli_scddots( x, y, a ) bli_scdaxpys( x, y, a ) #define bli_dcddots( x, y, a ) bli_dcdaxpys( x, y, a ) #define bli_ccddots( x, y, a ) bli_ccdaxpys( x, y, a ) #define bli_zcddots( x, y, a ) bli_zcdaxpys( x, y, a ) #define bli_szddots( x, y, a ) bli_szdaxpys( x, y, a ) #define bli_dzddots( x, y, a ) bli_dzdaxpys( x, y, a ) #define bli_czddots( x, y, a ) bli_czdaxpys( x, y, a ) #define bli_zzddots( x, y, a ) bli_zzdaxpys( x, y, a ) #define bli_sscdots( x, y, a ) bli_sscaxpys( x, y, a ) #define bli_dscdots( x, y, a ) bli_dscaxpys( x, y, a ) #define bli_cscdots( x, y, a ) bli_cscaxpys( x, y, a ) #define bli_zscdots( x, y, a ) bli_zscaxpys( x, y, a ) #define bli_sdcdots( x, y, a ) bli_sdcaxpys( x, y, a ) #define bli_ddcdots( x, y, a ) bli_ddcaxpys( x, y, a ) #define bli_cdcdots( x, y, a ) bli_cdcaxpys( x, y, a ) #define bli_zdcdots( x, y, a ) bli_zdcaxpys( x, y, a ) #define bli_sccdots( x, y, a ) bli_sccaxpys( x, y, a ) #define bli_dccdots( x, y, a ) bli_dccaxpys( x, y, a ) #define bli_cccdots( x, y, a ) bli_cccaxpys( x, y, a ) #define bli_zccdots( x, y, a ) bli_zccaxpys( x, y, a ) #define bli_szcdots( x, y, a ) bli_szcaxpys( x, y, a ) #define bli_dzcdots( x, y, a ) bli_dzcaxpys( x, y, a ) #define bli_czcdots( x, y, a ) bli_czcaxpys( x, y, a ) #define bli_zzcdots( x, y, a ) bli_zzcaxpys( x, y, a ) #define bli_sszdots( x, y, a ) bli_sszaxpys( x, y, a ) #define bli_dszdots( x, y, a ) bli_dszaxpys( x, y, a ) #define bli_cszdots( x, y, a ) bli_cszaxpys( x, y, a ) #define bli_zszdots( x, y, a ) bli_zszaxpys( x, y, a ) #define bli_sdzdots( x, y, a ) bli_sdzaxpys( x, y, a ) #define bli_ddzdots( x, y, a ) bli_ddzaxpys( x, y, a ) #define bli_cdzdots( x, y, a ) bli_cdzaxpys( x, y, a ) #define bli_zdzdots( x, y, a ) bli_zdzaxpys( x, y, a ) #define bli_sczdots( x, y, a ) bli_sczaxpys( x, y, a ) #define bli_dczdots( x, y, a ) bli_dczaxpys( x, y, a ) #define bli_cczdots( x, y, a ) bli_cczaxpys( x, y, a ) #define bli_zczdots( x, y, a ) bli_zczaxpys( x, y, a ) #define bli_szzdots( x, y, a ) bli_szzaxpys( x, y, a ) #define bli_dzzdots( x, y, a ) bli_dzzaxpys( x, y, a ) #define bli_czzdots( x, y, a ) bli_czzaxpys( x, y, a ) #define bli_zzzdots( x, y, a ) bli_zzzaxpys( x, y, a ) #define bli_sdots( x, y, a ) bli_sssdots( x, y, a ) #define bli_ddots( x, y, a ) bli_ddddots( x, y, a ) #define bli_cdots( x, y, a ) bli_cccdots( x, y, a ) #define bli_zdots( x, y, a ) bli_zzzdots( x, y, a ) #endif blis-0.6.1/frame/include/level0/bli_eq.h000066400000000000000000000077561360743507500200100ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_EQ_H #define BLIS_EQ_H // eq (passed by value) #define bli_seq( a, b ) ( (a) == (b) ) #define bli_deq( a, b ) ( (a) == (b) ) #ifndef BLIS_ENABLE_C99_COMPLEX #define bli_ceq( a, b ) ( ( bli_creal(a) == bli_creal(b) ) && ( bli_cimag(a) == bli_cimag(b) ) ) #define bli_zeq( a, b ) ( ( bli_zreal(a) == bli_zreal(b) ) && ( bli_zimag(a) == bli_zimag(b) ) ) #else // ifdef BLIS_ENABLE_C99_COMPLEX #define bli_ceq( a, b ) ( (a) == (b) ) #define bli_zeq( a, b ) ( (a) == (b) ) #endif // BLIS_ENABLE_C99_COMPLEX #define bli_ieq( a, b ) ( (a) == (b) ) // eqtori (passed by value) #define bli_seqtori( a, br, bi ) ( (a) == (br) ) #define bli_deqtori( a, br, bi ) ( (a) == (br) ) #ifndef BLIS_ENABLE_C99_COMPLEX #define bli_ceqtori( a, br, bi ) ( ( bli_creal(a) == (br) ) && ( bli_cimag(a) == (bi) ) ) #define bli_zeqtori( a, br, bi ) ( ( bli_zreal(a) == (br) ) && ( bli_zimag(a) == (bi) ) ) #else // ifdef BLIS_ENABLE_C99_COMPLEX #define bli_ceqtori( a, br, bi ) ( (a) == (br) + (bi) * (I) ) #define bli_zeqtori( a, br, bi ) ( (a) == (br) + (bi) * (I) ) #endif // BLIS_ENABLE_C99_COMPLEX // eqa (passed by address) #define bli_seqa( a, b ) bli_seq( *(( float* )(a)), *(( float* )(b)) ) #define bli_deqa( a, b ) bli_deq( *(( double* )(a)), *(( double* )(b)) ) #define bli_ceqa( a, b ) bli_ceq( *(( scomplex* )(a)), *(( scomplex* )(b)) ) #define bli_zeqa( a, b ) bli_zeq( *(( dcomplex* )(a)), *(( dcomplex* )(b)) ) #define bli_ieqa( a, b ) bli_ieq( *(( gint_t* )(a)), *(( gint_t* )(b)) ) // eq1 #define bli_seq1( a ) bli_seqtori( (a), 1.0F, 0.0F ) #define bli_deq1( a ) bli_deqtori( (a), 1.0, 0.0 ) #define bli_ceq1( a ) bli_ceqtori( (a), 1.0F, 0.0F ) #define bli_zeq1( a ) bli_zeqtori( (a), 1.0, 0.0 ) #define bli_ieq1( a ) bli_ieq ( (a), 1 ) // eq0 #define bli_seq0( a ) bli_seqtori( (a), 0.0F, 0.0F ) #define bli_deq0( a ) bli_deqtori( (a), 0.0, 0.0 ) #define bli_ceq0( a ) bli_ceqtori( (a), 0.0F, 0.0F ) #define bli_zeq0( a ) bli_zeqtori( (a), 0.0, 0.0 ) #define bli_ieq0( a ) bli_ieq ( (a), 0 ) // eqm1 #define bli_seqm1( a ) bli_seqtori( (a), -1.0F, 0.0F ) #define bli_deqm1( a ) bli_deqtori( (a), -1.0, 0.0 ) #define bli_ceqm1( a ) bli_ceqtori( (a), -1.0F, 0.0F ) #define bli_zeqm1( a ) bli_zeqtori( (a), -1.0, 0.0 ) #define bli_ieqm1( a ) bli_ieq ( (a), -1 ) #endif blis-0.6.1/frame/include/level0/bli_fprints.h000066400000000000000000000044271360743507500210600ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_FPRINTS_H #define BLIS_FPRINTS_H // prints #define bli_sfprints( file, spec, x ) \ { \ fprintf( file, spec, (x) ); \ } #define bli_dfprints( file, spec, x ) \ { \ fprintf( file, spec, (x) ); \ } #define bli_cfprints( file, spec, x ) \ { \ fprintf( file, spec, bli_creal(x) ); \ fprintf( file, " + " ); \ fprintf( file, spec, bli_cimag(x) ); \ fprintf( file, " " ); \ } #define bli_zfprints( file, spec, x ) \ { \ fprintf( file, spec, bli_zreal(x) ); \ fprintf( file, " + " ); \ fprintf( file, spec, bli_zimag(x) ); \ fprintf( file, " " ); \ } #define bli_ifprints( file, spec, x ) \ { \ fprintf( file, spec, (x) ); \ } #endif blis-0.6.1/frame/include/level0/bli_gets.h000066400000000000000000000076451360743507500203420ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_GETS_H #define BLIS_GETS_H // gets // Notes: // - The first char encodes the type of x. // - The second char encodes the type of y. #define bli_ssgets( x, yr, yi ) { (yr) = bli_sreal(x); (yi) = bli_simag(x); } #define bli_dsgets( x, yr, yi ) { (yr) = bli_dreal(x); (yi) = bli_dimag(x); } #define bli_csgets( x, yr, yi ) { (yr) = bli_creal(x); (yi) = bli_cimag(x); } #define bli_zsgets( x, yr, yi ) { (yr) = bli_zreal(x); (yi) = bli_zimag(x); } #define bli_isgets( x, yr, yi ) { (yr) = ( float )(x); (yi) = 0.0F; } #define bli_sdgets( x, yr, yi ) { (yr) = bli_sreal(x); (yi) = bli_simag(x); } #define bli_ddgets( x, yr, yi ) { (yr) = bli_dreal(x); (yi) = bli_dimag(x); } #define bli_cdgets( x, yr, yi ) { (yr) = bli_creal(x); (yi) = bli_cimag(x); } #define bli_zdgets( x, yr, yi ) { (yr) = bli_zreal(x); (yi) = bli_zimag(x); } #define bli_idgets( x, yr, yi ) { (yr) = ( double )(x); (yi) = 0.0; } #define bli_scgets( x, yr, yi ) { (yr) = bli_sreal(x); (yi) = bli_simag(x); } #define bli_dcgets( x, yr, yi ) { (yr) = bli_dreal(x); (yi) = bli_dimag(x); } #define bli_ccgets( x, yr, yi ) { (yr) = bli_creal(x); (yi) = bli_cimag(x); } #define bli_zcgets( x, yr, yi ) { (yr) = bli_zreal(x); (yi) = bli_zimag(x); } #define bli_icgets( x, yr, yi ) { (yr) = ( float )(x); (yi) = 0.0F; } #define bli_szgets( x, yr, yi ) { (yr) = bli_sreal(x); (yi) = bli_simag(x); } #define bli_dzgets( x, yr, yi ) { (yr) = bli_dreal(x); (yi) = bli_dimag(x); } #define bli_czgets( x, yr, yi ) { (yr) = bli_creal(x); (yi) = bli_cimag(x); } #define bli_zzgets( x, yr, yi ) { (yr) = bli_zreal(x); (yi) = bli_zimag(x); } #define bli_izgets( x, yr, yi ) { (yr) = ( double )(x); (yi) = 0.0; } #define bli_sigets( x, yr, yi ) { (yr) = bli_sreal(x); (yi) = 0; } #define bli_digets( x, yr, yi ) { (yr) = bli_dreal(x); (yi) = 0; } #define bli_cigets( x, yr, yi ) { (yr) = bli_creal(x); (yi) = 0; } #define bli_zigets( x, yr, yi ) { (yr) = bli_zreal(x); (yi) = 0; } #define bli_iigets( x, yr, yi ) { (yr) = (x); (yi) = 0; } #define bli_sgets( x, yr, yi ) bli_ssgets( x, yr, yi ) #define bli_dgets( x, yr, yi ) bli_ddgets( x, yr, yi ) #define bli_cgets( x, yr, yi ) bli_csgets( x, yr, yi ) #define bli_zgets( x, yr, yi ) bli_zdgets( x, yr, yi ) #define bli_igets( x, yr, yi ) bli_idgets( x, yr, yi ) #endif blis-0.6.1/frame/include/level0/bli_inverts.h000066400000000000000000000043641360743507500210650ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_INVERTS_H #define BLIS_INVERTS_H // inverts // Notes: // - The first char encodes the type of x. #define bli_sinverts( x ) bli_sinvertris( bli_sreal(x), bli_simag(x) ) #define bli_dinverts( x ) bli_dinvertris( bli_dreal(x), bli_dimag(x) ) #ifndef BLIS_ENABLE_C99_COMPLEX #define bli_cinverts( x ) bli_cinvertris( bli_creal(x), bli_cimag(x) ) #define bli_zinverts( x ) bli_zinvertris( bli_zreal(x), bli_zimag(x) ) #else // ifdef BLIS_ENABLE_C99_COMPLEX #define bli_cinverts( x ) { (x) = 1.0F / (x); } #define bli_zinverts( x ) { (x) = 1.0 / (x); } #endif // BLIS_ENABLE_C99_COMPLEX #endif blis-0.6.1/frame/include/level0/bli_invscaljs.h000066400000000000000000000104011360743507500213540ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_INVSCALJS_H #define BLIS_INVSCALJS_H // invscaljs // Notes: // - The first char encodes the type of a. // - The second char encodes the type of y. #define bli_ssinvscaljs( a, y ) bli_sinvscaljris( bli_sreal(a), bli_simag(a), bli_sreal(y), bli_simag(y) ) #define bli_dsinvscaljs( a, y ) bli_sinvscaljris( bli_dreal(a), bli_dimag(a), bli_sreal(y), bli_simag(y) ) #define bli_csinvscaljs( a, y ) bli_sinvscaljris( bli_creal(a), bli_cimag(a), bli_sreal(y), bli_simag(y) ) #define bli_zsinvscaljs( a, y ) bli_sinvscaljris( bli_zreal(a), bli_zimag(a), bli_sreal(y), bli_simag(y) ) #define bli_sdinvscaljs( a, y ) bli_dinvscaljris( bli_sreal(a), bli_simag(a), bli_dreal(y), bli_dimag(y) ) #define bli_ddinvscaljs( a, y ) bli_dinvscaljris( bli_dreal(a), bli_dimag(a), bli_dreal(y), bli_dimag(y) ) #define bli_cdinvscaljs( a, y ) bli_dinvscaljris( bli_creal(a), bli_cimag(a), bli_dreal(y), bli_dimag(y) ) #define bli_zdinvscaljs( a, y ) bli_dinvscaljris( bli_zreal(a), bli_zimag(a), bli_dreal(y), bli_dimag(y) ) #ifndef BLIS_ENABLE_C99_COMPLEX #define bli_scinvscaljs( a, y ) bli_scinvscaljris( bli_sreal(a), bli_simag(a), bli_creal(y), bli_cimag(y) ) #define bli_dcinvscaljs( a, y ) bli_scinvscaljris( bli_dreal(a), bli_dimag(a), bli_creal(y), bli_cimag(y) ) #define bli_ccinvscaljs( a, y ) bli_cinvscaljris( bli_creal(a), bli_cimag(a), bli_creal(y), bli_cimag(y) ) #define bli_zcinvscaljs( a, y ) bli_cinvscaljris( bli_zreal(a), bli_zimag(a), bli_creal(y), bli_cimag(y) ) #define bli_szinvscaljs( a, y ) bli_dzinvscaljris( bli_sreal(a), bli_simag(a), bli_zreal(y), bli_zimag(y) ) #define bli_dzinvscaljs( a, y ) bli_dzinvscaljris( bli_dreal(a), bli_dimag(a), bli_zreal(y), bli_zimag(y) ) #define bli_czinvscaljs( a, y ) bli_zinvscaljris( bli_creal(a), bli_cimag(a), bli_zreal(y), bli_zimag(y) ) #define bli_zzinvscaljs( a, y ) bli_zinvscaljris( bli_zreal(a), bli_zimag(a), bli_zreal(y), bli_zimag(y) ) #else // ifdef BLIS_ENABLE_C99_COMPLEX #define bli_scinvscaljs( a, y ) { (y) /= (a); } #define bli_dcinvscaljs( a, y ) { (y) /= (a); } #define bli_ccinvscaljs( a, y ) { (y) /= conjf(a); } #define bli_zcinvscaljs( a, y ) { (y) /= conj (a); } #define bli_szinvscaljs( a, y ) { (y) /= (a); } #define bli_dzinvscaljs( a, y ) { (y) /= (a); } #define bli_czinvscaljs( a, y ) { (y) /= conjf(a); } #define bli_zzinvscaljs( a, y ) { (y) /= conj (a); } #endif // BLIS_ENABLE_C99_COMPLEX #define bli_sinvscaljs( a, y ) bli_ssinvscaljs( a, y ) #define bli_dinvscaljs( a, y ) bli_ddinvscaljs( a, y ) #define bli_cinvscaljs( a, y ) bli_ccinvscaljs( a, y ) #define bli_zinvscaljs( a, y ) bli_zzinvscaljs( a, y ) #endif blis-0.6.1/frame/include/level0/bli_invscals.h000066400000000000000000000102461360743507500212110ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_INVSCALS_H #define BLIS_INVSCALS_H // invscals // Notes: // - The first char encodes the type of a. // - The second char encodes the type of y. #define bli_ssinvscals( a, y ) bli_sinvscalris( bli_sreal(a), bli_simag(a), bli_sreal(y), bli_simag(y) ) #define bli_dsinvscals( a, y ) bli_sinvscalris( bli_dreal(a), bli_dimag(a), bli_sreal(y), bli_simag(y) ) #define bli_csinvscals( a, y ) bli_sinvscalris( bli_creal(a), bli_cimag(a), bli_sreal(y), bli_simag(y) ) #define bli_zsinvscals( a, y ) bli_sinvscalris( bli_zreal(a), bli_zimag(a), bli_sreal(y), bli_simag(y) ) #define bli_sdinvscals( a, y ) bli_dinvscalris( bli_sreal(a), bli_simag(a), bli_dreal(y), bli_dimag(y) ) #define bli_ddinvscals( a, y ) bli_dinvscalris( bli_dreal(a), bli_dimag(a), bli_dreal(y), bli_dimag(y) ) #define bli_cdinvscals( a, y ) bli_dinvscalris( bli_creal(a), bli_cimag(a), bli_dreal(y), bli_dimag(y) ) #define bli_zdinvscals( a, y ) bli_dinvscalris( bli_zreal(a), bli_zimag(a), bli_dreal(y), bli_dimag(y) ) #ifndef BLIS_ENABLE_C99_COMPLEX #define bli_scinvscals( a, y ) bli_scinvscalris( bli_sreal(a), bli_simag(a), bli_creal(y), bli_cimag(y) ) #define bli_dcinvscals( a, y ) bli_scinvscalris( bli_dreal(a), bli_dimag(a), bli_creal(y), bli_cimag(y) ) #define bli_ccinvscals( a, y ) bli_cinvscalris( bli_creal(a), bli_cimag(a), bli_creal(y), bli_cimag(y) ) #define bli_zcinvscals( a, y ) bli_cinvscalris( bli_zreal(a), bli_zimag(a), bli_creal(y), bli_cimag(y) ) #define bli_szinvscals( a, y ) bli_dzinvscalris( bli_sreal(a), bli_simag(a), bli_zreal(y), bli_zimag(y) ) #define bli_dzinvscals( a, y ) bli_dzinvscalris( bli_dreal(a), bli_dimag(a), bli_zreal(y), bli_zimag(y) ) #define bli_czinvscals( a, y ) bli_zinvscalris( bli_creal(a), bli_cimag(a), bli_zreal(y), bli_zimag(y) ) #define bli_zzinvscals( a, y ) bli_zinvscalris( bli_zreal(a), bli_zimag(a), bli_zreal(y), bli_zimag(y) ) #else // ifdef BLIS_ENABLE_C99_COMPLEX #define bli_scinvscals( a, y ) { (y) /= (a); } #define bli_dcinvscals( a, y ) { (y) /= (a); } #define bli_ccinvscals( a, y ) { (y) /= (a); } #define bli_zcinvscals( a, y ) { (y) /= (a); } #define bli_szinvscals( a, y ) { (y) /= (a); } #define bli_dzinvscals( a, y ) { (y) /= (a); } #define bli_czinvscals( a, y ) { (y) /= (a); } #define bli_zzinvscals( a, y ) { (y) /= (a); } #endif // BLIS_ENABLE_C99_COMPLEX #define bli_sinvscals( a, y ) bli_ssinvscals( a, y ) #define bli_dinvscals( a, y ) bli_ddinvscals( a, y ) #define bli_cinvscals( a, y ) bli_ccinvscals( a, y ) #define bli_zinvscals( a, y ) bli_zzinvscals( a, y ) #endif blis-0.6.1/frame/include/level0/bli_neg2s.h000066400000000000000000000100051360743507500203760ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_NEG2S_H #define BLIS_NEG2S_H // neg2s // Notes: // - The first char encodes the type of x. // - The second char encodes the type of y. #define bli_ssneg2s( x, y ) bli_sneg2ris( bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) #define bli_dsneg2s( x, y ) bli_sneg2ris( bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) #define bli_csneg2s( x, y ) bli_sneg2ris( bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) #define bli_zsneg2s( x, y ) bli_sneg2ris( bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) #define bli_sdneg2s( x, y ) bli_dneg2ris( bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) #define bli_ddneg2s( x, y ) bli_dneg2ris( bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_cdneg2s( x, y ) bli_dneg2ris( bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_zdneg2s( x, y ) bli_dneg2ris( bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) #ifndef BLIS_ENABLE_C99_COMPLEX #define bli_scneg2s( x, y ) bli_cneg2ris( bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) #define bli_dcneg2s( x, y ) bli_cneg2ris( bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) #define bli_ccneg2s( x, y ) bli_cneg2ris( bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) #define bli_zcneg2s( x, y ) bli_cneg2ris( bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) #define bli_szneg2s( x, y ) bli_zneg2ris( bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) #define bli_dzneg2s( x, y ) bli_zneg2ris( bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_czneg2s( x, y ) bli_zneg2ris( bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_zzneg2s( x, y ) bli_zneg2ris( bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) #else // ifdef BLIS_ENABLE_C99_COMPLEX #define bli_scneg2s( x, y ) { (y) = -(x); } #define bli_dcneg2s( x, y ) { (y) = -(x); } #define bli_ccneg2s( x, y ) { (y) = -(x); } #define bli_zcneg2s( x, y ) { (y) = -(x); } #define bli_szneg2s( x, y ) { (y) = -(x); } #define bli_dzneg2s( x, y ) { (y) = -(x); } #define bli_czneg2s( x, y ) { (y) = -(x); } #define bli_zzneg2s( x, y ) { (y) = -(x); } #endif // BLIS_ENABLE_C99_COMPLEX #define bli_sneg2s( x, y ) bli_ssneg2s( x, y ) #define bli_dneg2s( x, y ) bli_ddneg2s( x, y ) #define bli_cneg2s( x, y ) bli_ccneg2s( x, y ) #define bli_zneg2s( x, y ) bli_zzneg2s( x, y ) #endif blis-0.6.1/frame/include/level0/bli_randnp2s.h000066400000000000000000000121141360743507500211120ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_RANDNP2S_H #define BLIS_RANDNP2S_H // randnp2s #define bli_srandnp2s( a ) \ { \ bli_drandnp2s( a ); \ } #define bli_drandnp2s_prev( a ) \ { \ const double m_max = 3.0; \ const double m_max2 = m_max + 2.0; \ double t; \ double r_val; \ \ /* Compute a narrow-range power of two. For the purposes of commentary, we'll assume that m_max = 4. This represents the largest power of two we will use to generate the random numbers. */ \ \ /* Generate a random real number t on the interval: [0.0, 6.0]. */ \ t = ( ( double ) rand() / ( double ) RAND_MAX ) * m_max2; \ \ /* Modify t to guarantee that is never equal to the upper bound of the interval (in this case, 6.0). */ \ if ( t == m_max2 ) t = t - 1.0; \ \ /* Transform the interval into the set of integers, {0,1,2,3,4,5}. */ \ t = floor( t ); \ \ /* Map values of t == 0 to a final value of 0. */ \ if ( t == 0.0 ) r_val = 0.0; \ else \ { \ /* This case handles values of t = {1,2,3,4,5}. */ \ \ double s_exp, s_val; \ \ /* Compute two random numbers to determine the signs of the exponent and the end result. */ \ PASTEMAC(d,rands)( s_exp ); \ PASTEMAC(d,rands)( s_val ); \ \ /* Compute r_val = 2^s where s = +/-(t-1) = {-4,-3,-2,-1,0,1,2,3,4}. */ \ if ( s_exp < 0.0 ) r_val = pow( 2.0, -(t - 1.0) ); \ else r_val = pow( 2.0, t - 1.0 ); \ \ /* If our sign value is negative, our random power of two will be negative. */ \ if ( s_val < 0.0 ) r_val = -r_val; \ } \ \ /* Normalize by the largest possible positive value. */ \ r_val = r_val / pow( 2.0, m_max ); \ \ /* r_val = 0, or +/-{2^-4, 2^-3, 2^-2, 2^-1, 2^0, 2^1, 2^2, 2^3, 2^4}. */ \ /* NOTE: For single-precision macros, this assignment results in typecast down to float. */ \ a = r_val; \ } #define bli_drandnp2s( a ) \ { \ const double m_max = 6.0; \ const double m_max2 = m_max + 2.0; \ double t; \ double r_val; \ \ /* Compute a narrow-range power of two. For the purposes of commentary, we'll assume that m_max = 4. This represents the largest power of two we will use to generate the random numbers. */ \ \ /* Generate a random real number t on the interval: [0.0, 6.0]. */ \ t = ( ( double ) rand() / ( double ) RAND_MAX ) * m_max2; \ \ /* Modify t to guarantee that is never equal to the upper bound of the interval (in this case, 6.0). */ \ if ( t == m_max2 ) t = t - 1.0; \ \ /* Transform the interval into the set of integers, {0,1,2,3,4,5}. */ \ t = floor( t ); \ \ /* Map values of t == 0 to a final value of 0. */ \ if ( t == 0.0 ) r_val = 0.0; \ else \ { \ /* This case handles values of t = {1,2,3,4,5}. */ \ \ double s_val; \ \ /* Compute r_val = 2^s where s = +/-(t-1) = {-4,-3,-2,-1,0}. */ \ r_val = pow( 2.0, -(t - 1.0) ); \ \ /* Compute a random number to determine the sign of the final result. */ \ PASTEMAC(d,rands)( s_val ); \ \ /* If our sign value is negative, our random power of two will be negative. */ \ if ( s_val < 0.0 ) r_val = -r_val; \ } \ \ /* r_val = 0, or +/-{2^0, 2^-1, 2^-2, 2^-3, 2^-4}. */ \ /* NOTE: For single-precision macros, this assignment results in typecast down to float. */ \ a = r_val; \ } #define bli_crandnp2s( a ) \ { \ float ar, ai; \ \ bli_srandnp2s( ar ); \ bli_srandnp2s( ai ); \ \ bli_csets( ar, ai, (a) ); \ } #define bli_zrandnp2s( a ) \ { \ double ar, ai; \ \ bli_drandnp2s( ar ); \ bli_drandnp2s( ai ); \ \ bli_zsets( ar, ai, (a) ); \ } #endif blis-0.6.1/frame/include/level0/bli_rands.h000066400000000000000000000043761360743507500205050ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_RANDS_H #define BLIS_RANDS_H // rands #define bli_srands( a ) \ { \ (a) = ( float ) ( ( double ) rand() / \ ( ( double ) RAND_MAX / 2.0 ) \ ) - 1.0F; \ } #define bli_drands( a ) \ { \ (a) = ( double ) ( ( double ) rand() / \ ( ( double ) RAND_MAX / 2.0 ) \ ) - 1.0; \ } #define bli_crands( a ) \ { \ float ar, ai; \ \ bli_srands( ar ); \ bli_srands( ai ); \ \ bli_csets( ar, ai, (a) ); \ } #define bli_zrands( a ) \ { \ double ar, ai; \ \ bli_drands( ar ); \ bli_drands( ai ); \ \ bli_zsets( ar, ai, (a) ); \ } #endif blis-0.6.1/frame/include/level0/bli_scal2js.h000066400000000000000000000321011360743507500207220ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SCAL2JS_H #define BLIS_SCAL2JS_H // scal2js // Notes: // - The first char encodes the type of a. // - The second char encodes the type of x. // - The third char encodes the type of y. // -- (axy) = (??s) ------------------------------------------------------------ #define bli_sssscal2js( a, x, y ) bli_rxscal2jris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) #define bli_dssscal2js( a, x, y ) bli_rxscal2jris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) #define bli_cssscal2js( a, x, y ) bli_rxscal2jris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) #define bli_zssscal2js( a, x, y ) bli_rxscal2jris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) #define bli_sdsscal2js( a, x, y ) bli_rxscal2jris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) #define bli_ddsscal2js( a, x, y ) bli_rxscal2jris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) #define bli_cdsscal2js( a, x, y ) bli_rxscal2jris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) #define bli_zdsscal2js( a, x, y ) bli_rxscal2jris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) #define bli_scsscal2js( a, x, y ) bli_rxscal2jris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) #define bli_dcsscal2js( a, x, y ) bli_rxscal2jris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) #define bli_ccsscal2js( a, x, y ) bli_roscal2jris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) #define bli_zcsscal2js( a, x, y ) bli_roscal2jris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) #define bli_szsscal2js( a, x, y ) bli_rxscal2jris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) #define bli_dzsscal2js( a, x, y ) bli_rxscal2jris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) #define bli_czsscal2js( a, x, y ) bli_roscal2jris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) #define bli_zzsscal2js( a, x, y ) bli_roscal2jris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) // -- (axy) = (??d) ------------------------------------------------------------ #define bli_ssdscal2js( a, x, y ) bli_rxscal2jris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) #define bli_dsdscal2js( a, x, y ) bli_rxscal2jris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) #define bli_csdscal2js( a, x, y ) bli_rxscal2jris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) #define bli_zsdscal2js( a, x, y ) bli_rxscal2jris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) #define bli_sddscal2js( a, x, y ) bli_rxscal2jris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_dddscal2js( a, x, y ) bli_rxscal2jris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_cddscal2js( a, x, y ) bli_rxscal2jris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_zddscal2js( a, x, y ) bli_rxscal2jris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_scdscal2js( a, x, y ) bli_rxscal2jris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_dcdscal2js( a, x, y ) bli_rxscal2jris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_ccdscal2js( a, x, y ) bli_roscal2jris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_zcdscal2js( a, x, y ) bli_roscal2jris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_szdscal2js( a, x, y ) bli_rxscal2jris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_dzdscal2js( a, x, y ) bli_rxscal2jris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_czdscal2js( a, x, y ) bli_roscal2jris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_zzdscal2js( a, x, y ) bli_roscal2jris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) #ifndef BLIS_ENABLE_C99_COMPLEX // -- (axy) = (??c) ------------------------------------------------------------ #define bli_sscscal2js( a, x, y ) bli_rxscal2jris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) #define bli_dscscal2js( a, x, y ) bli_rxscal2jris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) #define bli_cscscal2js( a, x, y ) bli_rcscal2jris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) #define bli_zscscal2js( a, x, y ) bli_rcscal2jris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) #define bli_sdcscal2js( a, x, y ) bli_rxscal2jris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) #define bli_ddcscal2js( a, x, y ) bli_rxscal2jris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) #define bli_cdcscal2js( a, x, y ) bli_rcscal2jris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) #define bli_zdcscal2js( a, x, y ) bli_rcscal2jris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) #define bli_sccscal2js( a, x, y ) bli_crscal2jris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) #define bli_dccscal2js( a, x, y ) bli_crscal2jris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) #define bli_cccscal2js( a, x, y ) bli_cxscal2jris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) #define bli_zccscal2js( a, x, y ) bli_cxscal2jris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) #define bli_szcscal2js( a, x, y ) bli_crscal2jris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) #define bli_dzcscal2js( a, x, y ) bli_crscal2jris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) #define bli_czcscal2js( a, x, y ) bli_cxscal2jris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) #define bli_zzcscal2js( a, x, y ) bli_cxscal2jris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) // -- (axy) = (??z) ------------------------------------------------------------ #define bli_sszscal2js( a, x, y ) bli_rxscal2jris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) #define bli_dszscal2js( a, x, y ) bli_rxscal2jris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) #define bli_cszscal2js( a, x, y ) bli_rcscal2jris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) #define bli_zszscal2js( a, x, y ) bli_rcscal2jris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) #define bli_sdzscal2js( a, x, y ) bli_rxscal2jris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_ddzscal2js( a, x, y ) bli_rxscal2jris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_cdzscal2js( a, x, y ) bli_rcscal2jris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_zdzscal2js( a, x, y ) bli_rcscal2jris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_sczscal2js( a, x, y ) bli_crscal2jris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_dczscal2js( a, x, y ) bli_crscal2jris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_cczscal2js( a, x, y ) bli_cxscal2jris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_zczscal2js( a, x, y ) bli_cxscal2jris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_szzscal2js( a, x, y ) bli_crscal2jris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_dzzscal2js( a, x, y ) bli_crscal2jris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_czzscal2js( a, x, y ) bli_cxscal2jris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_zzzscal2js( a, x, y ) bli_cxscal2jris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) #else // ifdef BLIS_ENABLE_C99_COMPLEX // -- (axy) = (??c) ------------------------------------------------------------ #define bli_sscscal2js( a, x, y ) { (y) = (a) * (x); } #define bli_dscscal2js( a, x, y ) { (y) = (a) * (x); } #define bli_cscscal2js( a, x, y ) { (y) = (a) * (x); } #define bli_zscscal2js( a, x, y ) { (y) = (a) * (x); } #define bli_sdcscal2js( a, x, y ) { (y) = (a) * (x); } #define bli_ddcscal2js( a, x, y ) { (y) = (a) * (x); } #define bli_cdcscal2js( a, x, y ) { (y) = (a) * (x); } #define bli_zdcscal2js( a, x, y ) { (y) = (a) * (x); } #define bli_sccscal2js( a, x, y ) { (y) = (a) * conjf(x); } #define bli_dccscal2js( a, x, y ) { (y) = (a) * conjf(x); } #define bli_cccscal2js( a, x, y ) { (y) = (a) * conjf(x); } #define bli_zccscal2js( a, x, y ) { (y) = (a) * conjf(x); } #define bli_szcscal2js( a, x, y ) { (y) = (a) * conj(x); } #define bli_dzcscal2js( a, x, y ) { (y) = (a) * conj(x); } #define bli_czcscal2js( a, x, y ) { (y) = (a) * conj(x); } #define bli_zzcscal2js( a, x, y ) { (y) = (a) * conj(x); } // -- (axy) = (??z) ------------------------------------------------------------ #define bli_sszscal2js( a, x, y ) { (y) = (a) * (x); } #define bli_dszscal2js( a, x, y ) { (y) = (a) * (x); } #define bli_cszscal2js( a, x, y ) { (y) = (a) * (x); } #define bli_zszscal2js( a, x, y ) { (y) = (a) * (x); } #define bli_sdzscal2js( a, x, y ) { (y) = (a) * (x); } #define bli_ddzscal2js( a, x, y ) { (y) = (a) * (x); } #define bli_cdzscal2js( a, x, y ) { (y) = (a) * (x); } #define bli_zdzscal2js( a, x, y ) { (y) = (a) * (x); } #define bli_sczscal2js( a, x, y ) { (y) = (a) * conjf(x); } #define bli_dczscal2js( a, x, y ) { (y) = (a) * conjf(x); } #define bli_cczscal2js( a, x, y ) { (y) = (a) * conjf(x); } #define bli_zczscal2js( a, x, y ) { (y) = (a) * conjf(x); } #define bli_szzscal2js( a, x, y ) { (y) = (a) * conj(x); } #define bli_dzzscal2js( a, x, y ) { (y) = (a) * conj(x); } #define bli_czzscal2js( a, x, y ) { (y) = (a) * conj(x); } #define bli_zzzscal2js( a, x, y ) { (y) = (a) * conj(x); } #endif // BLIS_ENABLE_C99_COMPLEX #define bli_sscal2js( a, x, y ) bli_sssscal2js( a, x, y ) #define bli_dscal2js( a, x, y ) bli_dddscal2js( a, x, y ) #define bli_cscal2js( a, x, y ) bli_cccscal2js( a, x, y ) #define bli_zscal2js( a, x, y ) bli_zzzscal2js( a, x, y ) #endif blis-0.6.1/frame/include/level0/bli_scal2s.h000066400000000000000000000315151360743507500205600ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SCAL2S_H #define BLIS_SCAL2S_H // scal2s // Notes: // - The first char encodes the type of a. // - The second char encodes the type of x. // - The third char encodes the type of y. // -- (axy) = (??s) ------------------------------------------------------------ #define bli_sssscal2s( a, x, y ) bli_rxscal2ris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) #define bli_dssscal2s( a, x, y ) bli_rxscal2ris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) #define bli_cssscal2s( a, x, y ) bli_rxscal2ris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) #define bli_zssscal2s( a, x, y ) bli_rxscal2ris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) #define bli_sdsscal2s( a, x, y ) bli_rxscal2ris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) #define bli_ddsscal2s( a, x, y ) bli_rxscal2ris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) #define bli_cdsscal2s( a, x, y ) bli_rxscal2ris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) #define bli_zdsscal2s( a, x, y ) bli_rxscal2ris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) #define bli_scsscal2s( a, x, y ) bli_rxscal2ris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) #define bli_dcsscal2s( a, x, y ) bli_rxscal2ris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) #define bli_ccsscal2s( a, x, y ) bli_roscal2ris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) #define bli_zcsscal2s( a, x, y ) bli_roscal2ris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) #define bli_szsscal2s( a, x, y ) bli_rxscal2ris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) #define bli_dzsscal2s( a, x, y ) bli_rxscal2ris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) #define bli_czsscal2s( a, x, y ) bli_roscal2ris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) #define bli_zzsscal2s( a, x, y ) bli_roscal2ris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) // -- (axy) = (??d) ------------------------------------------------------------ #define bli_ssdscal2s( a, x, y ) bli_rxscal2ris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) #define bli_dsdscal2s( a, x, y ) bli_rxscal2ris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) #define bli_csdscal2s( a, x, y ) bli_rxscal2ris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) #define bli_zsdscal2s( a, x, y ) bli_rxscal2ris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) #define bli_sddscal2s( a, x, y ) bli_rxscal2ris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_dddscal2s( a, x, y ) bli_rxscal2ris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_cddscal2s( a, x, y ) bli_rxscal2ris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_zddscal2s( a, x, y ) bli_rxscal2ris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_scdscal2s( a, x, y ) bli_rxscal2ris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_dcdscal2s( a, x, y ) bli_rxscal2ris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_ccdscal2s( a, x, y ) bli_roscal2ris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_zcdscal2s( a, x, y ) bli_roscal2ris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_szdscal2s( a, x, y ) bli_rxscal2ris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_dzdscal2s( a, x, y ) bli_rxscal2ris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_czdscal2s( a, x, y ) bli_roscal2ris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_zzdscal2s( a, x, y ) bli_roscal2ris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) #ifndef BLIS_ENABLE_C99_COMPLEX // -- (axy) = (??c) ------------------------------------------------------------ #define bli_sscscal2s( a, x, y ) bli_rxscal2ris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) #define bli_dscscal2s( a, x, y ) bli_rxscal2ris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) #define bli_cscscal2s( a, x, y ) bli_rcscal2ris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) #define bli_zscscal2s( a, x, y ) bli_rcscal2ris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) #define bli_sdcscal2s( a, x, y ) bli_rxscal2ris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) #define bli_ddcscal2s( a, x, y ) bli_rxscal2ris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) #define bli_cdcscal2s( a, x, y ) bli_rcscal2ris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) #define bli_zdcscal2s( a, x, y ) bli_rcscal2ris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) #define bli_sccscal2s( a, x, y ) bli_crscal2ris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) #define bli_dccscal2s( a, x, y ) bli_crscal2ris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) #define bli_cccscal2s( a, x, y ) bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) #define bli_zccscal2s( a, x, y ) bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) #define bli_szcscal2s( a, x, y ) bli_crscal2ris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) #define bli_dzcscal2s( a, x, y ) bli_crscal2ris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) #define bli_czcscal2s( a, x, y ) bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) #define bli_zzcscal2s( a, x, y ) bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) // -- (axy) = (??z) ------------------------------------------------------------ #define bli_sszscal2s( a, x, y ) bli_rxscal2ris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) #define bli_dszscal2s( a, x, y ) bli_rxscal2ris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) #define bli_cszscal2s( a, x, y ) bli_rcscal2ris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) #define bli_zszscal2s( a, x, y ) bli_rcscal2ris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) #define bli_sdzscal2s( a, x, y ) bli_rxscal2ris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_ddzscal2s( a, x, y ) bli_rxscal2ris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_cdzscal2s( a, x, y ) bli_rcscal2ris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_zdzscal2s( a, x, y ) bli_rcscal2ris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_sczscal2s( a, x, y ) bli_crscal2ris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_dczscal2s( a, x, y ) bli_crscal2ris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_cczscal2s( a, x, y ) bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_zczscal2s( a, x, y ) bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_szzscal2s( a, x, y ) bli_crscal2ris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_dzzscal2s( a, x, y ) bli_crscal2ris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_czzscal2s( a, x, y ) bli_cxscal2ris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_zzzscal2s( a, x, y ) bli_cxscal2ris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) #else // ifdef BLIS_ENABLE_C99_COMPLEX // -- (axy) = (??c) ------------------------------------------------------------ #define bli_sscscal2s( a, x, y ) { (y) = (a) * (x); } #define bli_dscscal2s( a, x, y ) { (y) = (a) * (x); } #define bli_cscscal2s( a, x, y ) { (y) = (a) * (x); } #define bli_zscscal2s( a, x, y ) { (y) = (a) * (x); } #define bli_sdcscal2s( a, x, y ) { (y) = (a) * (x); } #define bli_ddcscal2s( a, x, y ) { (y) = (a) * (x); } #define bli_cdcscal2s( a, x, y ) { (y) = (a) * (x); } #define bli_zdcscal2s( a, x, y ) { (y) = (a) * (x); } #define bli_sccscal2s( a, x, y ) { (y) = (a) * (x); } #define bli_dccscal2s( a, x, y ) { (y) = (a) * (x); } #define bli_cccscal2s( a, x, y ) { (y) = (a) * (x); } #define bli_zccscal2s( a, x, y ) { (y) = (a) * (x); } #define bli_szcscal2s( a, x, y ) { (y) = (a) * (x); } #define bli_dzcscal2s( a, x, y ) { (y) = (a) * (x); } #define bli_czcscal2s( a, x, y ) { (y) = (a) * (x); } #define bli_zzcscal2s( a, x, y ) { (y) = (a) * (x); } // -- (axy) = (??z) ------------------------------------------------------------ #define bli_sszscal2s( a, x, y ) { (y) = (a) * (x); } #define bli_dszscal2s( a, x, y ) { (y) = (a) * (x); } #define bli_cszscal2s( a, x, y ) { (y) = (a) * (x); } #define bli_zszscal2s( a, x, y ) { (y) = (a) * (x); } #define bli_sdzscal2s( a, x, y ) { (y) = (a) * (x); } #define bli_ddzscal2s( a, x, y ) { (y) = (a) * (x); } #define bli_cdzscal2s( a, x, y ) { (y) = (a) * (x); } #define bli_zdzscal2s( a, x, y ) { (y) = (a) * (x); } #define bli_sczscal2s( a, x, y ) { (y) = (a) * (x); } #define bli_dczscal2s( a, x, y ) { (y) = (a) * (x); } #define bli_cczscal2s( a, x, y ) { (y) = (a) * (x); } #define bli_zczscal2s( a, x, y ) { (y) = (a) * (x); } #define bli_szzscal2s( a, x, y ) { (y) = (a) * (x); } #define bli_dzzscal2s( a, x, y ) { (y) = (a) * (x); } #define bli_czzscal2s( a, x, y ) { (y) = (a) * (x); } #define bli_zzzscal2s( a, x, y ) { (y) = (a) * (x); } #endif // BLIS_ENABLE_C99_COMPLEX #define bli_sscal2s( a, x, y ) bli_sssscal2s( a, x, y ) #define bli_dscal2s( a, x, y ) bli_dddscal2s( a, x, y ) #define bli_cscal2s( a, x, y ) bli_cccscal2s( a, x, y ) #define bli_zscal2s( a, x, y ) bli_zzzscal2s( a, x, y ) #endif blis-0.6.1/frame/include/level0/bli_scal2s_mxn.h000066400000000000000000000055061360743507500214430ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SCAL2S_MXN_H #define BLIS_SCAL2S_MXN_H // scal2s_mxn #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ static void PASTEMAC(ch,opname) \ ( \ const conj_t conjx, \ const dim_t m, \ const dim_t n, \ ctype* restrict alpha, \ ctype* restrict x, const inc_t rs_x, const inc_t cs_x, \ ctype* restrict y, const inc_t rs_y, const inc_t cs_y \ ) \ { \ if ( bli_is_conj( conjx ) ) \ { \ for ( dim_t j = 0; j < n; ++j ) \ { \ ctype* restrict xj = x + j*cs_x; \ ctype* restrict yj = y + j*cs_y; \ \ for ( dim_t i = 0; i < m; ++i ) \ { \ ctype* restrict xij = xj + i*rs_x; \ ctype* restrict yij = yj + i*rs_y; \ \ PASTEMAC(ch,scal2js)( *alpha, *xij, *yij ); \ } \ } \ } \ else /* if ( bli_is_noconj( conjx ) ) */ \ { \ for ( dim_t j = 0; j < n; ++j ) \ { \ ctype* restrict xj = x + j*cs_x; \ ctype* restrict yj = y + j*cs_y; \ \ for ( dim_t i = 0; i < m; ++i ) \ { \ ctype* restrict xij = xj + i*rs_x; \ ctype* restrict yij = yj + i*rs_y; \ \ PASTEMAC(ch,scal2s)( *alpha, *xij, *yij ); \ } \ } \ } \ } INSERT_GENTFUNC_BASIC0( scal2s_mxn ) #endif blis-0.6.1/frame/include/level0/bli_scalcjs.h000066400000000000000000000111331360743507500210050ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SCALCJS_H #define BLIS_SCALCJS_H // scalcjs // Notes: // - The first char encodes the type of x. // - The second char encodes the type of y. #define bli_ssscalcjs( conjx, x, y ) bli_sscalcjris( conjx, bli_sreal(x), bli_simag(x), bli_sreal(y), bli_simag(y) ) #define bli_dsscalcjs( conjx, x, y ) bli_sscalcjris( conjx, bli_dreal(x), bli_dimag(x), bli_sreal(y), bli_simag(y) ) #define bli_csscalcjs( conjx, x, y ) bli_sscalcjris( conjx, bli_creal(x), bli_cimag(x), bli_sreal(y), bli_simag(y) ) #define bli_zsscalcjs( conjx, x, y ) bli_sscalcjris( conjx, bli_zreal(x), bli_zimag(x), bli_sreal(y), bli_simag(y) ) #define bli_sdscalcjs( conjx, x, y ) bli_dscalcjris( conjx, bli_sreal(x), bli_simag(x), bli_dreal(y), bli_dimag(y) ) #define bli_ddscalcjs( conjx, x, y ) bli_dscalcjris( conjx, bli_dreal(x), bli_dimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_cdscalcjs( conjx, x, y ) bli_dscalcjris( conjx, bli_creal(x), bli_cimag(x), bli_dreal(y), bli_dimag(y) ) #define bli_zdscalcjs( conjx, x, y ) bli_dscalcjris( conjx, bli_zreal(x), bli_zimag(x), bli_dreal(y), bli_dimag(y) ) #ifndef BLIS_ENABLE_C99_COMPLEX #define bli_scscalcjs( conjx, x, y ) bli_scscalcjris( conjx, bli_sreal(x), bli_simag(x), bli_creal(y), bli_cimag(y) ) #define bli_dcscalcjs( conjx, x, y ) bli_scscalcjris( conjx, bli_dreal(x), bli_dimag(x), bli_creal(y), bli_cimag(y) ) #define bli_ccscalcjs( conjx, x, y ) bli_cscalcjris( conjx, bli_creal(x), bli_cimag(x), bli_creal(y), bli_cimag(y) ) #define bli_zcscalcjs( conjx, x, y ) bli_cscalcjris( conjx, bli_zreal(x), bli_zimag(x), bli_creal(y), bli_cimag(y) ) #define bli_szscalcjs( conjx, x, y ) bli_dzscalcjris( conjx, bli_sreal(x), bli_simag(x), bli_zreal(y), bli_zimag(y) ) #define bli_dzscalcjs( conjx, x, y ) bli_dzscalcjris( conjx, bli_dreal(x), bli_dimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_czscalcjs( conjx, x, y ) bli_zscalcjris( conjx, bli_creal(x), bli_cimag(x), bli_zreal(y), bli_zimag(y) ) #define bli_zzscalcjs( conjx, x, y ) bli_zscalcjris( conjx, bli_zreal(x), bli_zimag(x), bli_zreal(y), bli_zimag(y) ) #else // ifdef BLIS_ENABLE_C99_COMPLEX #define bli_scscalcjs( conjx, x, y ) { (y) *= (x); } #define bli_dcscalcjs( conjx, x, y ) { (y) *= (x); } #define bli_ccscalcjs( conjx, x, y ) { (y) *= ( bli_is_conj( conjx ) ? conjf(x) : (x) ); } #define bli_zcscalcjs( conjx, x, y ) { (y) *= ( bli_is_conj( conjx ) ? conj (x) : (x) ); } #define bli_szscalcjs( conjx, x, y ) { (y) *= (x); } #define bli_dzscalcjs( conjx, x, y ) { (y) *= (x); } #define bli_czscalcjs( conjx, x, y ) { (y) *= ( bli_is_conj( conjx ) ? conjf(x) : (x) ); } #define bli_zzscalcjs( conjx, x, y ) { (y) *= ( bli_is_conj( conjx ) ? conj (x) : (x) ); } #endif // BLIS_ENABLE_C99_COMPLEX #define bli_sscalcjs( conjx, x, y ) bli_ssscalcjs( conjx, x, y ) #define bli_dscalcjs( conjx, x, y ) bli_ddscalcjs( conjx, x, y ) #define bli_cscalcjs( conjx, x, y ) bli_ccscalcjs( conjx, x, y ) #define bli_zscalcjs( conjx, x, y ) bli_zzscalcjs( conjx, x, y ) #endif blis-0.6.1/frame/include/level0/bli_scaljs.h000066400000000000000000000101501360743507500206400ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SCALJS_H #define BLIS_SCALJS_H // scaljs // Notes: // - The first char encodes the type of a. // - The second char encodes the type of y. #define bli_ssscaljs( a, y ) bli_sscaljris( bli_sreal(a), bli_simag(a), bli_sreal(y), bli_simag(y) ) #define bli_dsscaljs( a, y ) bli_sscaljris( bli_dreal(a), bli_dimag(a), bli_sreal(y), bli_simag(y) ) #define bli_csscaljs( a, y ) bli_sscaljris( bli_creal(a), bli_cimag(a), bli_sreal(y), bli_simag(y) ) #define bli_zsscaljs( a, y ) bli_sscaljris( bli_zreal(a), bli_zimag(a), bli_sreal(y), bli_simag(y) ) #define bli_sdscaljs( a, y ) bli_dscaljris( bli_sreal(a), bli_simag(a), bli_dreal(y), bli_dimag(y) ) #define bli_ddscaljs( a, y ) bli_dscaljris( bli_dreal(a), bli_dimag(a), bli_dreal(y), bli_dimag(y) ) #define bli_cdscaljs( a, y ) bli_dscaljris( bli_creal(a), bli_cimag(a), bli_dreal(y), bli_dimag(y) ) #define bli_zdscaljs( a, y ) bli_dscaljris( bli_zreal(a), bli_zimag(a), bli_dreal(y), bli_dimag(y) ) #ifndef BLIS_ENABLE_C99_COMPLEX #define bli_scscaljs( a, y ) bli_scscaljris( bli_sreal(a), bli_simag(a), bli_creal(y), bli_cimag(y) ) #define bli_dcscaljs( a, y ) bli_scscaljris( bli_dreal(a), bli_dimag(a), bli_creal(y), bli_cimag(y) ) #define bli_ccscaljs( a, y ) bli_cscaljris( bli_creal(a), bli_cimag(a), bli_creal(y), bli_cimag(y) ) #define bli_zcscaljs( a, y ) bli_cscaljris( bli_zreal(a), bli_zimag(a), bli_creal(y), bli_cimag(y) ) #define bli_szscaljs( a, y ) bli_dzscaljris( bli_sreal(a), bli_simag(a), bli_zreal(y), bli_zimag(y) ) #define bli_dzscaljs( a, y ) bli_dzscaljris( bli_dreal(a), bli_dimag(a), bli_zreal(y), bli_zimag(y) ) #define bli_czscaljs( a, y ) bli_zscaljris( bli_creal(a), bli_cimag(a), bli_zreal(y), bli_zimag(y) ) #define bli_zzscaljs( a, y ) bli_zscaljris( bli_zreal(a), bli_zimag(a), bli_zreal(y), bli_zimag(y) ) #else // ifdef BLIS_ENABLE_C99_COMPLEX #define bli_scscaljs( a, y ) { (y) *= (a); } #define bli_dcscaljs( a, y ) { (y) *= (a); } #define bli_ccscaljs( a, y ) { (y) *= conjf(a); } #define bli_zcscaljs( a, y ) { (y) *= conj (a); } #define bli_szscaljs( a, y ) { (y) *= (a); } #define bli_dzscaljs( a, y ) { (y) *= (a); } #define bli_czscaljs( a, y ) { (y) *= conjf(a); } #define bli_zzscaljs( a, y ) { (y) *= conj (a); } #endif // BLIS_ENABLE_C99_COMPLEX #define bli_sscaljs( a, y ) bli_ssscaljs( a, y ) #define bli_dscaljs( a, y ) bli_ddscaljs( a, y ) #define bli_cscaljs( a, y ) bli_ccscaljs( a, y ) #define bli_zscaljs( a, y ) bli_zzscaljs( a, y ) #endif blis-0.6.1/frame/include/level0/bli_scals.h000066400000000000000000000100151360743507500204660ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SCALS_H #define BLIS_SCALS_H // scals // Notes: // - The first char encodes the type of a. // - The second char encodes the type of y. #define bli_ssscals( a, y ) bli_sscalris( bli_sreal(a), bli_simag(a), bli_sreal(y), bli_simag(y) ) #define bli_dsscals( a, y ) bli_sscalris( bli_dreal(a), bli_dimag(a), bli_sreal(y), bli_simag(y) ) #define bli_csscals( a, y ) bli_sscalris( bli_creal(a), bli_cimag(a), bli_sreal(y), bli_simag(y) ) #define bli_zsscals( a, y ) bli_sscalris( bli_zreal(a), bli_zimag(a), bli_sreal(y), bli_simag(y) ) #define bli_sdscals( a, y ) bli_dscalris( bli_sreal(a), bli_simag(a), bli_dreal(y), bli_dimag(y) ) #define bli_ddscals( a, y ) bli_dscalris( bli_dreal(a), bli_dimag(a), bli_dreal(y), bli_dimag(y) ) #define bli_cdscals( a, y ) bli_dscalris( bli_creal(a), bli_cimag(a), bli_dreal(y), bli_dimag(y) ) #define bli_zdscals( a, y ) bli_dscalris( bli_zreal(a), bli_zimag(a), bli_dreal(y), bli_dimag(y) ) #ifndef BLIS_ENABLE_C99_COMPLEX #define bli_scscals( a, y ) bli_scscalris( bli_sreal(a), bli_simag(a), bli_creal(y), bli_cimag(y) ) #define bli_dcscals( a, y ) bli_scscalris( bli_dreal(a), bli_dimag(a), bli_creal(y), bli_cimag(y) ) #define bli_ccscals( a, y ) bli_cscalris( bli_creal(a), bli_cimag(a), bli_creal(y), bli_cimag(y) ) #define bli_zcscals( a, y ) bli_cscalris( bli_zreal(a), bli_zimag(a), bli_creal(y), bli_cimag(y) ) #define bli_szscals( a, y ) bli_dzscalris( bli_sreal(a), bli_simag(a), bli_zreal(y), bli_zimag(y) ) #define bli_dzscals( a, y ) bli_dzscalris( bli_dreal(a), bli_dimag(a), bli_zreal(y), bli_zimag(y) ) #define bli_czscals( a, y ) bli_zscalris( bli_creal(a), bli_cimag(a), bli_zreal(y), bli_zimag(y) ) #define bli_zzscals( a, y ) bli_zscalris( bli_zreal(a), bli_zimag(a), bli_zreal(y), bli_zimag(y) ) #else // ifdef BLIS_ENABLE_C99_COMPLEX #define bli_scscals( a, y ) { (y) *= (a); } #define bli_dcscals( a, y ) { (y) *= (a); } #define bli_ccscals( a, y ) { (y) *= (a); } #define bli_zcscals( a, y ) { (y) *= (a); } #define bli_szscals( a, y ) { (y) *= (a); } #define bli_dzscals( a, y ) { (y) *= (a); } #define bli_czscals( a, y ) { (y) *= (a); } #define bli_zzscals( a, y ) { (y) *= (a); } #endif // BLIS_ENABLE_C99_COMPLEX #define bli_sscals( a, y ) bli_ssscals( a, y ) #define bli_dscals( a, y ) bli_ddscals( a, y ) #define bli_cscals( a, y ) bli_ccscals( a, y ) #define bli_zscals( a, y ) bli_zzscals( a, y ) #endif blis-0.6.1/frame/include/level0/bli_set0s.h000066400000000000000000000036251360743507500204300ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SET0S_H #define BLIS_SET0S_H #define bli_sset0s( a ) bli_ssets( 0.0F, 0.0F, (a) ) #define bli_dset0s( a ) bli_dsets( 0.0 , 0.0 , (a) ) #define bli_cset0s( a ) bli_csets( 0.0F, 0.0F, (a) ) #define bli_zset0s( a ) bli_zsets( 0.0 , 0.0 , (a) ) #endif blis-0.6.1/frame/include/level0/bli_set0s_mxn.h000066400000000000000000000054571360743507500213170ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SET0S_MXN_H #define BLIS_SET0S_MXN_H // set0s_mxn // Notes: // - The first char encodes the type of x. // - The second char encodes the type of y. static void bli_sset0s_mxn( const dim_t m, const dim_t n, float* restrict y, const inc_t rs_y, const inc_t cs_y ) { for ( dim_t j = 0; j < n; ++j ) for ( dim_t i = 0; i < m; ++i ) bli_sset0s( *(y + i*rs_y + j*cs_y) ); } static void bli_dset0s_mxn( const dim_t m, const dim_t n, double* restrict y, const inc_t rs_y, const inc_t cs_y ) { for ( dim_t j = 0; j < n; ++j ) for ( dim_t i = 0; i < m; ++i ) bli_dset0s( *(y + i*rs_y + j*cs_y) ); } static void bli_cset0s_mxn( const dim_t m, const dim_t n, scomplex* restrict y, const inc_t rs_y, const inc_t cs_y ) { for ( dim_t j = 0; j < n; ++j ) for ( dim_t i = 0; i < m; ++i ) bli_cset0s( *(y + i*rs_y + j*cs_y) ); } static void bli_zset0s_mxn( const dim_t m, const dim_t n, dcomplex* restrict y, const inc_t rs_y, const inc_t cs_y ) { for ( dim_t j = 0; j < n; ++j ) for ( dim_t i = 0; i < m; ++i ) bli_zset0s( *(y + i*rs_y + j*cs_y) ); } #endif blis-0.6.1/frame/include/level0/bli_set1s.h000066400000000000000000000036251360743507500204310ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SET1S_H #define BLIS_SET1S_H #define bli_sset1s( a ) bli_ssets( 1.0F, 0.0F, (a) ) #define bli_dset1s( a ) bli_dsets( 1.0 , 0.0 , (a) ) #define bli_cset1s( a ) bli_csets( 1.0F, 0.0F, (a) ) #define bli_zset1s( a ) bli_zsets( 1.0 , 0.0 , (a) ) #endif blis-0.6.1/frame/include/level0/bli_seti0s.h000066400000000000000000000036071360743507500206010ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SETI0S_H #define BLIS_SETI0S_H #define bli_sseti0s( a ) bli_ssetis( 0.0F, (a) ) #define bli_dseti0s( a ) bli_dsetis( 0.0 , (a) ) #define bli_cseti0s( a ) bli_csetis( 0.0F, (a) ) #define bli_zseti0s( a ) bli_zsetis( 0.0 , (a) ) #endif blis-0.6.1/frame/include/level0/bli_setis.h000066400000000000000000000053351360743507500205210ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SETIS_H #define BLIS_SETIS_H // setis // Notes: // - The first char encodes the type of x. // - The second char encodes the type of y. #define bli_sssetis( xi, y ) { ; } #define bli_dssetis( xi, y ) { ; } #define bli_sdsetis( xi, y ) { ; } #define bli_ddsetis( xi, y ) { ; } #ifndef BLIS_ENABLE_C99_COMPLEX #define bli_scsetis( xi, y ) { bli_cimag(y) = (xi); } #define bli_dcsetis( xi, y ) { bli_cimag(y) = (xi); } #define bli_szsetis( xi, y ) { bli_zimag(y) = (xi); } #define bli_dzsetis( xi, y ) { bli_zimag(y) = (xi); } #else // ifdef BLIS_ENABLE_C99_COMPLEX #define bli_scsetis( xi, y ) { (y) = bli_creal(y) + (xi) * (I); } #define bli_dcsetis( xi, y ) { (y) = bli_creal(y) + (xi) * (I); } #define bli_szsetis( xi, y ) { (y) = bli_zreal(y) + (xi) * (I); } #define bli_dzsetis( xi, y ) { (y) = bli_zreal(y) + (xi) * (I); } #endif // BLIS_ENABLE_C99_COMPLEX #define bli_ssetis( xi, y ) bli_sssetis( xi, y ) #define bli_dsetis( xi, y ) bli_ddsetis( xi, y ) #define bli_csetis( xi, y ) bli_scsetis( xi, y ) #define bli_zsetis( xi, y ) bli_dzsetis( xi, y ) #endif blis-0.6.1/frame/include/level0/bli_setrs.h000066400000000000000000000054051360743507500205300ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SETRS_H #define BLIS_SETRS_H // setrs // Notes: // - The first char encodes the type of x. // - The second char encodes the type of y. #define bli_sssetrs( xr, y ) { (y) = (xr); } #define bli_dssetrs( xr, y ) { (y) = (xr); } #define bli_sdsetrs( xr, y ) { (y) = (xr); } #define bli_ddsetrs( xr, y ) { (y) = (xr); } #ifndef BLIS_ENABLE_C99_COMPLEX #define bli_scsetrs( xr, y ) { bli_creal(y) = (xr); } #define bli_dcsetrs( xr, y ) { bli_creal(y) = (xr); } #define bli_szsetrs( xr, y ) { bli_zreal(y) = (xr); } #define bli_dzsetrs( xr, y ) { bli_zreal(y) = (xr); } #else // ifdef BLIS_ENABLE_C99_COMPLEX #define bli_scsetrs( xr, y ) { (y) = (xr) + bli_cimag(y) * (I); } #define bli_dcsetrs( xr, y ) { (y) = (xr) + bli_cimag(y) * (I); } #define bli_szsetrs( xr, y ) { (y) = (xr) + bli_zimag(y) * (I); } #define bli_dzsetrs( xr, y ) { (y) = (xr) + bli_zimag(y) * (I); } #endif // BLIS_ENABLE_C99_COMPLEX #define bli_ssetrs( xr, y ) bli_sssetrs( xr, y ) #define bli_dsetrs( xr, y ) bli_ddsetrs( xr, y ) #define bli_csetrs( xr, y ) bli_scsetrs( xr, y ) #define bli_zsetrs( xr, y ) bli_dzsetrs( xr, y ) #endif blis-0.6.1/frame/include/level0/bli_sets.h000066400000000000000000000102671360743507500203500ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SETS_H #define BLIS_SETS_H // sets // Notes: // - The first char encodes the type of x. // - The second char encodes the type of y. #define bli_sssets( xr, xi, y ) { (y) = (xr); } #define bli_dssets( xr, xi, y ) { (y) = (xr); } #define bli_cssets( xr, xi, y ) { (y) = (xr); } #define bli_zssets( xr, xi, y ) { (y) = (xr); } #define bli_issets( xr, xi, y ) { (y) = (xr); } #define bli_sdsets( xr, xi, y ) { (y) = (xr); } #define bli_ddsets( xr, xi, y ) { (y) = (xr); } #define bli_cdsets( xr, xi, y ) { (y) = (xr); } #define bli_zdsets( xr, xi, y ) { (y) = (xr); } #define bli_idsets( xr, xi, y ) { (y) = (xr); } #ifndef BLIS_ENABLE_C99_COMPLEX #define bli_scsets( xr, xi, y ) { bli_creal(y) = (xr); bli_cimag(y) = (xi); } #define bli_dcsets( xr, xi, y ) { bli_creal(y) = (xr); bli_cimag(y) = (xi); } #define bli_ccsets( xr, xi, y ) { bli_creal(y) = (xr); bli_cimag(y) = (xi); } #define bli_zcsets( xr, xi, y ) { bli_creal(y) = (xr); bli_cimag(y) = (xi); } #define bli_icsets( xr, xi, y ) { bli_creal(y) = (xr); bli_cimag(y) = (xi); } #define bli_szsets( xr, xi, y ) { bli_zreal(y) = (xr); bli_zimag(y) = (xi); } #define bli_dzsets( xr, xi, y ) { bli_zreal(y) = (xr); bli_zimag(y) = (xi); } #define bli_czsets( xr, xi, y ) { bli_zreal(y) = (xr); bli_zimag(y) = (xi); } #define bli_zzsets( xr, xi, y ) { bli_zreal(y) = (xr); bli_zimag(y) = (xi); } #define bli_izsets( xr, xi, y ) { bli_zreal(y) = (xr); bli_zimag(y) = (xi); } #else // ifdef BLIS_ENABLE_C99_COMPLEX #define bli_scsets( xr, xi, y ) { (y) = (xr) + (xi) * (I); } #define bli_dcsets( xr, xi, y ) { (y) = (xr) + (xi) * (I); } #define bli_ccsets( xr, xi, y ) { (y) = (xr) + (xi) * (I); } #define bli_zcsets( xr, xi, y ) { (y) = (xr) + (xi) * (I); } #define bli_szsets( xr, xi, y ) { (y) = (xr) + (xi) * (I); } #define bli_dzsets( xr, xi, y ) { (y) = (xr) + (xi) * (I); } #define bli_czsets( xr, xi, y ) { (y) = (xr) + (xi) * (I); } #define bli_zzsets( xr, xi, y ) { (y) = (xr) + (xi) * (I); } #endif // BLIS_ENABLE_C99_COMPLEX #define bli_sisets( xr, xi, y ) { (y) = bli_sreal(xr); } #define bli_disets( xr, xi, y ) { (y) = bli_dreal(xr); } #define bli_cisets( xr, xi, y ) { (y) = bli_creal(xr); } #define bli_zisets( xr, xi, y ) { (y) = bli_zreal(xr); } #define bli_iisets( xr, xi, y ) { (y) = (xr); } #define bli_ssets( xr, xi, y ) bli_sssets( xr, xi, y ) #define bli_dsets( xr, xi, y ) bli_ddsets( xr, xi, y ) #define bli_csets( xr, xi, y ) bli_scsets( xr, xi, y ) #define bli_zsets( xr, xi, y ) bli_dzsets( xr, xi, y ) #define bli_isets( xr, xi, y ) bli_disets( xr, xi, y ) #endif blis-0.6.1/frame/include/level0/bli_sqrt2s.h000066400000000000000000000117211360743507500206240ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SQRT2S_H #define BLIS_SQRT2S_H // sqrt2s // Notes: // - The first char encodes the type of x. // - The second char encodes the type of a. #ifndef BLIS_ENABLE_C99_COMPLEX #define bli_sssqrt2s( x, a ) bli_ssqrt2ris( bli_sreal(x), bli_simag(x), bli_sreal(a), bli_simag(a) ) #define bli_dssqrt2s( x, a ) bli_ssqrt2ris( bli_dreal(x), bli_dimag(x), bli_sreal(a), bli_simag(a) ) #define bli_cssqrt2s( x, a ) bli_ssqrt2ris( bli_creal(x), bli_cimag(x), bli_sreal(a), bli_simag(a) ) #define bli_zssqrt2s( x, a ) bli_ssqrt2ris( bli_zreal(x), bli_zimag(x), bli_sreal(a), bli_simag(a) ) #define bli_sdsqrt2s( x, a ) bli_dsqrt2ris( bli_sreal(x), bli_simag(x), bli_dreal(a), bli_dimag(a) ) #define bli_ddsqrt2s( x, a ) bli_dsqrt2ris( bli_dreal(x), bli_dimag(x), bli_dreal(a), bli_dimag(a) ) #define bli_cdsqrt2s( x, a ) bli_dsqrt2ris( bli_creal(x), bli_cimag(x), bli_dreal(a), bli_dimag(a) ) #define bli_zdsqrt2s( x, a ) bli_dsqrt2ris( bli_zreal(x), bli_zimag(x), bli_dreal(a), bli_dimag(a) ) #define bli_scsqrt2s( x, a ) bli_scsqrt2ris( bli_sreal(x), bli_simag(x), bli_creal(a), bli_cimag(a) ) #define bli_dcsqrt2s( x, a ) bli_scsqrt2ris( bli_dreal(x), bli_dimag(x), bli_creal(a), bli_cimag(a) ) #define bli_ccsqrt2s( x, a ) bli_csqrt2ris( bli_creal(x), bli_cimag(x), bli_creal(a), bli_cimag(a) ) #define bli_zcsqrt2s( x, a ) bli_csqrt2ris( bli_zreal(x), bli_zimag(x), bli_creal(a), bli_cimag(a) ) #define bli_szsqrt2s( x, a ) bli_dzsqrt2ris( bli_sreal(x), bli_simag(x), bli_zreal(a), bli_zimag(a) ) #define bli_dzsqrt2s( x, a ) bli_dzsqrt2ris( bli_dreal(x), bli_dimag(x), bli_zreal(a), bli_zimag(a) ) #define bli_czsqrt2s( x, a ) bli_zsqrt2ris( bli_creal(x), bli_cimag(x), bli_zreal(a), bli_zimag(a) ) #define bli_zzsqrt2s( x, a ) bli_zsqrt2ris( bli_zreal(x), bli_zimag(x), bli_zreal(a), bli_zimag(a) ) #else // ifdef BLIS_ENABLE_C99_COMPLEX #define bli_sssqrt2s( x, a ) { (a) = ( float ) sqrtf( (x) ) ; } #define bli_dssqrt2s( x, a ) { (a) = ( float ) sqrt ( (x) ) ; } #define bli_cssqrt2s( x, a ) { (a) = ( float )bli_creal( csqrtf( (x) ) ); } #define bli_zssqrt2s( x, a ) { (a) = ( float )bli_zreal( csqrt ( (x) ) ); } #define bli_sdsqrt2s( x, a ) { (a) = ( double ) sqrtf( (x) ) ; } #define bli_ddsqrt2s( x, a ) { (a) = ( double ) sqrt ( (x) ) ; } #define bli_cdsqrt2s( x, a ) { (a) = ( double )bli_creal( csqrtf( (x) ) ); } #define bli_zdsqrt2s( x, a ) { (a) = ( double )bli_zreal( csqrt ( (x) ) ); } #define bli_scsqrt2s( x, a ) { (a) = ( scomplex ) sqrtf( (x) ) ; } #define bli_dcsqrt2s( x, a ) { (a) = ( scomplex ) sqrt ( (x) ) ; } #define bli_ccsqrt2s( x, a ) { (a) = ( scomplex ) csqrtf( (x) ) ; } #define bli_zcsqrt2s( x, a ) { (a) = ( scomplex ) csqrt ( (x) ) ; } #define bli_szsqrt2s( x, a ) { (a) = ( dcomplex ) sqrtf( (x) ) ; } #define bli_dzsqrt2s( x, a ) { (a) = ( dcomplex ) sqrt ( (x) ) ; } #define bli_czsqrt2s( x, a ) { (a) = ( dcomplex ) csqrtf( (x) ) ; } #define bli_zzsqrt2s( x, a ) { (a) = ( dcomplex ) csqrt ( (x) ) ; } #endif // BLIS_ENABLE_C99_COMPLEX #define bli_ssqrt2s( x, a ) bli_sssqrt2s( x, a ) #define bli_dsqrt2s( x, a ) bli_ddsqrt2s( x, a ) #define bli_csqrt2s( x, a ) bli_ccsqrt2s( x, a ) #define bli_zsqrt2s( x, a ) bli_zzsqrt2s( x, a ) #endif blis-0.6.1/frame/include/level0/bli_subjs.h000066400000000000000000000100551360743507500205130ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SUBJS_H #define BLIS_SUBJS_H // subjs // Notes: // - The first char encodes the type of a. // - The second char encodes the type of y. #define bli_sssubjs( a, y ) bli_ssubjris( bli_sreal(a), bli_simag(a), bli_sreal(y), bli_simag(y) ) #define bli_dssubjs( a, y ) bli_ssubjris( bli_dreal(a), bli_dimag(a), bli_sreal(y), bli_simag(y) ) #define bli_cssubjs( a, y ) bli_ssubjris( bli_creal(a), bli_cimag(a), bli_sreal(y), bli_simag(y) ) #define bli_zssubjs( a, y ) bli_ssubjris( bli_zreal(a), bli_zimag(a), bli_sreal(y), bli_simag(y) ) #define bli_sdsubjs( a, y ) bli_dsubjris( bli_sreal(a), bli_simag(a), bli_dreal(y), bli_dimag(y) ) #define bli_ddsubjs( a, y ) bli_dsubjris( bli_dreal(a), bli_dimag(a), bli_dreal(y), bli_dimag(y) ) #define bli_cdsubjs( a, y ) bli_dsubjris( bli_creal(a), bli_cimag(a), bli_dreal(y), bli_dimag(y) ) #define bli_zdsubjs( a, y ) bli_dsubjris( bli_zreal(a), bli_zimag(a), bli_dreal(y), bli_dimag(y) ) #ifndef BLIS_ENABLE_C99_COMPLEX #define bli_scsubjs( a, y ) bli_csubjris( bli_sreal(a), bli_simag(a), bli_creal(y), bli_cimag(y) ) #define bli_dcsubjs( a, y ) bli_csubjris( bli_dreal(a), bli_dimag(a), bli_creal(y), bli_cimag(y) ) #define bli_ccsubjs( a, y ) bli_csubjris( bli_creal(a), bli_cimag(a), bli_creal(y), bli_cimag(y) ) #define bli_zcsubjs( a, y ) bli_csubjris( bli_zreal(a), bli_zimag(a), bli_creal(y), bli_cimag(y) ) #define bli_szsubjs( a, y ) bli_zsubjris( bli_sreal(a), bli_simag(a), bli_zreal(y), bli_zimag(y) ) #define bli_dzsubjs( a, y ) bli_zsubjris( bli_dreal(a), bli_dimag(a), bli_zreal(y), bli_zimag(y) ) #define bli_czsubjs( a, y ) bli_zsubjris( bli_creal(a), bli_cimag(a), bli_zreal(y), bli_zimag(y) ) #define bli_zzsubjs( a, y ) bli_zsubjris( bli_zreal(a), bli_zimag(a), bli_zreal(y), bli_zimag(y) ) #else // ifdef BLIS_ENABLE_C99_COMPLEX #define bli_scsubjs( a, y ) { (y) -= (a); } #define bli_dcsubjs( a, y ) { (y) -= (a); } #define bli_ccsubjs( a, y ) { (y) -= conjf(a); } #define bli_zcsubjs( a, y ) { (y) -= conj (a); } #define bli_szsubjs( a, y ) { (y) -= (a); } #define bli_dzsubjs( a, y ) { (y) -= (a); } #define bli_czsubjs( a, y ) { (y) -= conjf(a); } #define bli_zzsubjs( a, y ) { (y) -= conj (a); } #endif // BLIS_ENABLE_C99_COMPLEX #define bli_ssubjs( a, y ) bli_sssubjs( a, y ) #define bli_dsubjs( a, y ) bli_ddsubjs( a, y ) #define bli_csubjs( a, y ) bli_ccsubjs( a, y ) #define bli_zsubjs( a, y ) bli_zzsubjs( a, y ) #endif blis-0.6.1/frame/include/level0/bli_subs.h000066400000000000000000000077221360743507500203500ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SUBS_H #define BLIS_SUBS_H // subs // Notes: // - The first char encodes the type of a. // - The second char encodes the type of y. #define bli_sssubs( a, y ) bli_ssubris( bli_sreal(a), bli_simag(a), bli_sreal(y), bli_simag(y) ) #define bli_dssubs( a, y ) bli_ssubris( bli_dreal(a), bli_dimag(a), bli_sreal(y), bli_simag(y) ) #define bli_cssubs( a, y ) bli_ssubris( bli_creal(a), bli_cimag(a), bli_sreal(y), bli_simag(y) ) #define bli_zssubs( a, y ) bli_ssubris( bli_zreal(a), bli_zimag(a), bli_sreal(y), bli_simag(y) ) #define bli_sdsubs( a, y ) bli_dsubris( bli_sreal(a), bli_simag(a), bli_dreal(y), bli_dimag(y) ) #define bli_ddsubs( a, y ) bli_dsubris( bli_dreal(a), bli_dimag(a), bli_dreal(y), bli_dimag(y) ) #define bli_cdsubs( a, y ) bli_dsubris( bli_creal(a), bli_cimag(a), bli_dreal(y), bli_dimag(y) ) #define bli_zdsubs( a, y ) bli_dsubris( bli_zreal(a), bli_zimag(a), bli_dreal(y), bli_dimag(y) ) #ifndef BLIS_ENABLE_C99_COMPLEX #define bli_scsubs( a, y ) bli_csubris( bli_sreal(a), bli_simag(a), bli_creal(y), bli_cimag(y) ) #define bli_dcsubs( a, y ) bli_csubris( bli_dreal(a), bli_dimag(a), bli_creal(y), bli_cimag(y) ) #define bli_ccsubs( a, y ) bli_csubris( bli_creal(a), bli_cimag(a), bli_creal(y), bli_cimag(y) ) #define bli_zcsubs( a, y ) bli_csubris( bli_zreal(a), bli_zimag(a), bli_creal(y), bli_cimag(y) ) #define bli_szsubs( a, y ) bli_zsubris( bli_sreal(a), bli_simag(a), bli_zreal(y), bli_zimag(y) ) #define bli_dzsubs( a, y ) bli_zsubris( bli_dreal(a), bli_dimag(a), bli_zreal(y), bli_zimag(y) ) #define bli_czsubs( a, y ) bli_zsubris( bli_creal(a), bli_cimag(a), bli_zreal(y), bli_zimag(y) ) #define bli_zzsubs( a, y ) bli_zsubris( bli_zreal(a), bli_zimag(a), bli_zreal(y), bli_zimag(y) ) #else // ifdef BLIS_ENABLE_C99_COMPLEX #define bli_scsubs( a, y ) { (y) -= (a); } #define bli_dcsubs( a, y ) { (y) -= (a); } #define bli_ccsubs( a, y ) { (y) -= (a); } #define bli_zcsubs( a, y ) { (y) -= (a); } #define bli_szsubs( a, y ) { (y) -= (a); } #define bli_dzsubs( a, y ) { (y) -= (a); } #define bli_czsubs( a, y ) { (y) -= (a); } #define bli_zzsubs( a, y ) { (y) -= (a); } #endif // BLIS_ENABLE_C99_COMPLEX #define bli_ssubs( a, y ) bli_sssubs( a, y ) #define bli_dsubs( a, y ) bli_ddsubs( a, y ) #define bli_csubs( a, y ) bli_ccsubs( a, y ) #define bli_zsubs( a, y ) bli_zzsubs( a, y ) #endif blis-0.6.1/frame/include/level0/bli_swaps.h000066400000000000000000000101421360743507500205170ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SWAPS_H #define BLIS_SWAPS_H // swaps // Notes: // - The first char encodes the type of x. // - The second char encodes the type of y. #define bli_ssswaps( x, y ) \ { \ float w; \ bli_sscopys( (y), (w) ); \ bli_sscopys( (x), (y) ); \ bli_sscopys( (w), (x) ); \ } #define bli_dsswaps( x, y ) \ { \ double w; \ bli_sdcopys( (y), (w) ); \ bli_dscopys( (x), (y) ); \ bli_ddcopys( (w), (x) ); \ } #define bli_csswaps( x, y ) \ { \ scomplex w; \ bli_sccopys( (y), (w) ); \ bli_cscopys( (x), (y) ); \ bli_cccopys( (w), (x) ); \ } #define bli_zsswaps( x, y ) \ { \ dcomplex w; \ bli_szcopys( (y), (w) ); \ bli_zscopys( (x), (y) ); \ bli_zzcopys( (w), (x) ); \ } #define bli_sdswaps( x, y ) \ { \ float w; \ bli_dscopys( (y), (w) ); \ bli_sdcopys( (x), (y) ); \ bli_sscopys( (w), (x) ); \ } #define bli_ddswaps( x, y ) \ { \ double w; \ bli_ddcopys( (y), (w) ); \ bli_ddcopys( (x), (y) ); \ bli_ddcopys( (w), (x) ); \ } #define bli_cdswaps( x, y ) \ { \ scomplex w; \ bli_dccopys( (y), (w) ); \ bli_cdcopys( (x), (y) ); \ bli_cccopys( (w), (x) ); \ } #define bli_zdswaps( x, y ) \ { \ dcomplex w; \ bli_dzcopys( (y), (w) ); \ bli_zdcopys( (x), (y) ); \ bli_zzcopys( (w), (x) ); \ } #define bli_scswaps( x, y ) \ { \ float w; \ bli_cscopys( (y), (w) ); \ bli_sccopys( (x), (y) ); \ bli_sscopys( (w), (x) ); \ } #define bli_dcswaps( x, y ) \ { \ double w; \ bli_cdcopys( (y), (w) ); \ bli_dccopys( (x), (y) ); \ bli_ddcopys( (w), (x) ); \ } #define bli_ccswaps( x, y ) \ { \ scomplex w; \ bli_cccopys( (y), (w) ); \ bli_cccopys( (x), (y) ); \ bli_cccopys( (w), (x) ); \ } #define bli_zcswaps( x, y ) \ { \ dcomplex w; \ bli_czcopys( (y), (w) ); \ bli_zccopys( (x), (y) ); \ bli_zzcopys( (w), (x) ); \ } #define bli_szswaps( x, y ) \ { \ float w; \ bli_zscopys( (y), (w) ); \ bli_szcopys( (x), (y) ); \ bli_sscopys( (w), (x) ); \ } #define bli_dzswaps( x, y ) \ { \ double w; \ bli_zdcopys( (y), (w) ); \ bli_dzcopys( (x), (y) ); \ bli_ddcopys( (w), (x) ); \ } #define bli_czswaps( x, y ) \ { \ scomplex w; \ bli_zccopys( (y), (w) ); \ bli_czcopys( (x), (y) ); \ bli_cccopys( (w), (x) ); \ } #define bli_zzswaps( x, y ) \ { \ dcomplex w; \ bli_zzcopys( (y), (w) ); \ bli_zzcopys( (x), (y) ); \ bli_zzcopys( (w), (x) ); \ } #define bli_sswaps( x, y ) bli_ssswaps( x, y ) #define bli_dswaps( x, y ) bli_ddswaps( x, y ) #define bli_cswaps( x, y ) bli_ccswaps( x, y ) #define bli_zswaps( x, y ) bli_zzswaps( x, y ) #endif blis-0.6.1/frame/include/level0/bli_xpbyjs.h000066400000000000000000000320151360743507500207040ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_XPBYJS_H #define BLIS_XPBYJS_H // xpbyjs // Notes: // - The first char encodes the type of x. // - The second char encodes the type of b. // - The third char encodes the type of y. // -- (xby) = (??s) ------------------------------------------------------------ #define bli_sssxpbyjs( x, b, y ) bli_rxxpbyjris( bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) #define bli_dssxpbyjs( x, b, y ) bli_rxxpbyjris( bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) #define bli_cssxpbyjs( x, b, y ) bli_rxxpbyjris( bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) #define bli_zssxpbyjs( x, b, y ) bli_rxxpbyjris( bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) #define bli_sdsxpbyjs( x, b, y ) bli_rxxpbyjris( bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) #define bli_ddsxpbyjs( x, b, y ) bli_rxxpbyjris( bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) #define bli_cdsxpbyjs( x, b, y ) bli_rxxpbyjris( bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) #define bli_zdsxpbyjs( x, b, y ) bli_rxxpbyjris( bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) #define bli_scsxpbyjs( x, b, y ) bli_rxxpbyjris( bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) #define bli_dcsxpbyjs( x, b, y ) bli_rxxpbyjris( bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) #define bli_ccsxpbyjs( x, b, y ) bli_rxxpbyjris( bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) #define bli_zcsxpbyjs( x, b, y ) bli_rxxpbyjris( bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) #define bli_szsxpbyjs( x, b, y ) bli_rxxpbyjris( bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) #define bli_dzsxpbyjs( x, b, y ) bli_rxxpbyjris( bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) #define bli_czsxpbyjs( x, b, y ) bli_rxxpbyjris( bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) #define bli_zzsxpbyjs( x, b, y ) bli_rxxpbyjris( bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) // -- (xby) = (??d) ------------------------------------------------------------ #define bli_ssdxpbyjs( x, b, y ) bli_rxxpbyjris( bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) #define bli_dsdxpbyjs( x, b, y ) bli_rxxpbyjris( bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) #define bli_csdxpbyjs( x, b, y ) bli_rxxpbyjris( bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) #define bli_zsdxpbyjs( x, b, y ) bli_rxxpbyjris( bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) #define bli_sddxpbyjs( x, b, y ) bli_rxxpbyjris( bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_dddxpbyjs( x, b, y ) bli_rxxpbyjris( bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_cddxpbyjs( x, b, y ) bli_rxxpbyjris( bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_zddxpbyjs( x, b, y ) bli_rxxpbyjris( bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_scdxpbyjs( x, b, y ) bli_rxxpbyjris( bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_dcdxpbyjs( x, b, y ) bli_rxxpbyjris( bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_ccdxpbyjs( x, b, y ) bli_rxxpbyjris( bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_zcdxpbyjs( x, b, y ) bli_rxxpbyjris( bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_szdxpbyjs( x, b, y ) bli_rxxpbyjris( bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_dzdxpbyjs( x, b, y ) bli_rxxpbyjris( bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_czdxpbyjs( x, b, y ) bli_rxxpbyjris( bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_zzdxpbyjs( x, b, y ) bli_rxxpbyjris( bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) #ifndef BLIS_ENABLE_C99_COMPLEX // -- (xby) = (??c) ------------------------------------------------------------ #define bli_sscxpbyjs( x, b, y ) bli_rxxpbyjris( bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) #define bli_dscxpbyjs( x, b, y ) bli_rxxpbyjris( bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) #define bli_cscxpbyjs( x, b, y ) bli_crxpbyjris( bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) #define bli_zscxpbyjs( x, b, y ) bli_crxpbyjris( bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) #define bli_sdcxpbyjs( x, b, y ) bli_rxxpbyjris( bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) #define bli_ddcxpbyjs( x, b, y ) bli_rxxpbyjris( bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) #define bli_cdcxpbyjs( x, b, y ) bli_crxpbyjris( bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) #define bli_zdcxpbyjs( x, b, y ) bli_crxpbyjris( bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) #define bli_sccxpbyjs( x, b, y ) bli_cxxpbyjris( bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) #define bli_dccxpbyjs( x, b, y ) bli_cxxpbyjris( bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) #define bli_cccxpbyjs( x, b, y ) bli_cxxpbyjris( bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) #define bli_zccxpbyjs( x, b, y ) bli_cxxpbyjris( bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) #define bli_szcxpbyjs( x, b, y ) bli_cxxpbyjris( bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) #define bli_dzcxpbyjs( x, b, y ) bli_cxxpbyjris( bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) #define bli_czcxpbyjs( x, b, y ) bli_cxxpbyjris( bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) #define bli_zzcxpbyjs( x, b, y ) bli_cxxpbyjris( bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) // -- (xby) = (??z) ------------------------------------------------------------ #define bli_sszxpbyjs( x, b, y ) bli_rxxpbyjris( bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) #define bli_dszxpbyjs( x, b, y ) bli_rxxpbyjris( bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) #define bli_cszxpbyjs( x, b, y ) bli_crxpbyjris( bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) #define bli_zszxpbyjs( x, b, y ) bli_crxpbyjris( bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) #define bli_sdzxpbyjs( x, b, y ) bli_rxxpbyjris( bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_ddzxpbyjs( x, b, y ) bli_rxxpbyjris( bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_cdzxpbyjs( x, b, y ) bli_crxpbyjris( bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_zdzxpbyjs( x, b, y ) bli_crxpbyjris( bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_sczxpbyjs( x, b, y ) bli_cxxpbyjris( bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_dczxpbyjs( x, b, y ) bli_cxxpbyjris( bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_cczxpbyjs( x, b, y ) bli_cxxpbyjris( bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_zczxpbyjs( x, b, y ) bli_cxxpbyjris( bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_szzxpbyjs( x, b, y ) bli_cxxpbyjris( bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_dzzxpbyjs( x, b, y ) bli_cxxpbyjris( bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_czzxpbyjs( x, b, y ) bli_cxxpbyjris( bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_zzzxpbyjs( x, b, y ) bli_cxxpbyjris( bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) #else // ifdef BLIS_ENABLE_C99_COMPLEX // -- (xby) = (??c) ------------------------------------------------------------ #define bli_sscxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_dscxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_cscxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_zscxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_sdcxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_ddcxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_cdcxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_zdcxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_sccxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_dccxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_cccxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_zccxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_szcxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_dzcxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_czcxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_zzcxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } // -- (xby) = (??z) ------------------------------------------------------------ #define bli_sszxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_dszxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_cszxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_zszxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_sdzxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_ddzxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_cdzxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_zdzxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_sczxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_dczxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_cczxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_zczxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_szzxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_dzzxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_czzxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_zzzxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } #endif // BLIS_ENABLE_C99_COMPLEX #define bli_sxpbyjs( x, b, y ) bli_sssxpbyjs( x, b, y ) #define bli_dxpbyjs( x, b, y ) bli_dddxpbyjs( x, b, y ) #define bli_cxpbyjs( x, b, y ) bli_cccxpbyjs( x, b, y ) #define bli_zxpbyjs( x, b, y ) bli_zzzxpbyjs( x, b, y ) #endif blis-0.6.1/frame/include/level0/bli_xpbys.h000066400000000000000000000315421360743507500205360ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_XPBYS_H #define BLIS_XPBYS_H // xpbys // Notes: // - The first char encodes the type of x. // - The second char encodes the type of b. // - The third char encodes the type of y. // -- (xby) = (??s) ------------------------------------------------------------ #define bli_sssxpbys( x, b, y ) bli_rxxpbyris( bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) #define bli_dssxpbys( x, b, y ) bli_rxxpbyris( bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) #define bli_cssxpbys( x, b, y ) bli_rxxpbyris( bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) #define bli_zssxpbys( x, b, y ) bli_rxxpbyris( bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) #define bli_sdsxpbys( x, b, y ) bli_rxxpbyris( bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) #define bli_ddsxpbys( x, b, y ) bli_rxxpbyris( bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) #define bli_cdsxpbys( x, b, y ) bli_rxxpbyris( bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) #define bli_zdsxpbys( x, b, y ) bli_rxxpbyris( bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) #define bli_scsxpbys( x, b, y ) bli_rxxpbyris( bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) #define bli_dcsxpbys( x, b, y ) bli_rxxpbyris( bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) #define bli_ccsxpbys( x, b, y ) bli_rxxpbyris( bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) #define bli_zcsxpbys( x, b, y ) bli_rxxpbyris( bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) #define bli_szsxpbys( x, b, y ) bli_rxxpbyris( bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) #define bli_dzsxpbys( x, b, y ) bli_rxxpbyris( bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) #define bli_czsxpbys( x, b, y ) bli_rxxpbyris( bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) #define bli_zzsxpbys( x, b, y ) bli_rxxpbyris( bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) // -- (xby) = (??d) ------------------------------------------------------------ #define bli_ssdxpbys( x, b, y ) bli_rxxpbyris( bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) #define bli_dsdxpbys( x, b, y ) bli_rxxpbyris( bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) #define bli_csdxpbys( x, b, y ) bli_rxxpbyris( bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) #define bli_zsdxpbys( x, b, y ) bli_rxxpbyris( bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) #define bli_sddxpbys( x, b, y ) bli_rxxpbyris( bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_dddxpbys( x, b, y ) bli_rxxpbyris( bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_cddxpbys( x, b, y ) bli_rxxpbyris( bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_zddxpbys( x, b, y ) bli_rxxpbyris( bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_scdxpbys( x, b, y ) bli_rxxpbyris( bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_dcdxpbys( x, b, y ) bli_rxxpbyris( bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_ccdxpbys( x, b, y ) bli_rxxpbyris( bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_zcdxpbys( x, b, y ) bli_rxxpbyris( bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_szdxpbys( x, b, y ) bli_rxxpbyris( bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_dzdxpbys( x, b, y ) bli_rxxpbyris( bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_czdxpbys( x, b, y ) bli_rxxpbyris( bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) #define bli_zzdxpbys( x, b, y ) bli_rxxpbyris( bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) #ifndef BLIS_ENABLE_C99_COMPLEX // -- (xby) = (??c) ------------------------------------------------------------ #define bli_sscxpbys( x, b, y ) bli_rxxpbyris( bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) #define bli_dscxpbys( x, b, y ) bli_rxxpbyris( bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) #define bli_cscxpbys( x, b, y ) bli_crxpbyris( bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) #define bli_zscxpbys( x, b, y ) bli_crxpbyris( bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) #define bli_sdcxpbys( x, b, y ) bli_rxxpbyris( bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) #define bli_ddcxpbys( x, b, y ) bli_rxxpbyris( bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) #define bli_cdcxpbys( x, b, y ) bli_crxpbyris( bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) #define bli_zdcxpbys( x, b, y ) bli_crxpbyris( bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) #define bli_sccxpbys( x, b, y ) bli_cxxpbyris( bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) #define bli_dccxpbys( x, b, y ) bli_cxxpbyris( bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) #define bli_cccxpbys( x, b, y ) bli_cxxpbyris( bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) #define bli_zccxpbys( x, b, y ) bli_cxxpbyris( bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) #define bli_szcxpbys( x, b, y ) bli_cxxpbyris( bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) #define bli_dzcxpbys( x, b, y ) bli_cxxpbyris( bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) #define bli_czcxpbys( x, b, y ) bli_cxxpbyris( bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) #define bli_zzcxpbys( x, b, y ) bli_cxxpbyris( bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) // -- (xby) = (??z) ------------------------------------------------------------ #define bli_sszxpbys( x, b, y ) bli_rxxpbyris( bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) #define bli_dszxpbys( x, b, y ) bli_rxxpbyris( bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) #define bli_cszxpbys( x, b, y ) bli_crxpbyris( bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) #define bli_zszxpbys( x, b, y ) bli_crxpbyris( bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) #define bli_sdzxpbys( x, b, y ) bli_rxxpbyris( bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_ddzxpbys( x, b, y ) bli_rxxpbyris( bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_cdzxpbys( x, b, y ) bli_crxpbyris( bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_zdzxpbys( x, b, y ) bli_crxpbyris( bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_sczxpbys( x, b, y ) bli_cxxpbyris( bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_dczxpbys( x, b, y ) bli_cxxpbyris( bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_cczxpbys( x, b, y ) bli_cxxpbyris( bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_zczxpbys( x, b, y ) bli_cxxpbyris( bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_szzxpbys( x, b, y ) bli_cxxpbyris( bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_dzzxpbys( x, b, y ) bli_cxxpbyris( bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_czzxpbys( x, b, y ) bli_cxxpbyris( bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) #define bli_zzzxpbys( x, b, y ) bli_cxxpbyris( bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) #else // ifdef BLIS_ENABLE_C99_COMPLEX // -- (xby) = (??c) ------------------------------------------------------------ #define bli_sscxpbys( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_dscxpbys( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_cscxpbys( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_zscxpbys( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_sdcxpbys( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_ddcxpbys( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_cdcxpbys( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_zdcxpbys( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_sccxpbys( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_dccxpbys( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_cccxpbys( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_zccxpbys( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_szcxpbys( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_dzcxpbys( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_czcxpbys( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_zzcxpbys( x, b, y ) { (y) = (x) + (b) * (y); } // -- (xby) = (??z) ------------------------------------------------------------ #define bli_sszxpbys( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_dszxpbys( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_cszxpbys( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_zszxpbys( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_sdzxpbys( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_ddzxpbys( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_cdzxpbys( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_zdzxpbys( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_sczxpbys( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_dczxpbys( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_cczxpbys( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_zczxpbys( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_szzxpbys( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_dzzxpbys( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_czzxpbys( x, b, y ) { (y) = (x) + (b) * (y); } #define bli_zzzxpbys( x, b, y ) { (y) = (x) + (b) * (y); } #endif // BLIS_ENABLE_C99_COMPLEX #define bli_sxpbys( x, b, y ) bli_sssxpbys( x, b, y ) #define bli_dxpbys( x, b, y ) bli_dddxpbys( x, b, y ) #define bli_cxpbys( x, b, y ) bli_cccxpbys( x, b, y ) #define bli_zxpbys( x, b, y ) bli_zzzxpbys( x, b, y ) #endif blis-0.6.1/frame/include/level0/bli_xpbys_mxn.h000066400000000000000000000516331360743507500214230ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_XPBYS_MXN_H #define BLIS_XPBYS_MXN_H // xpbys_mxn // Notes: // - The first char encodes the type of x. // - The second char encodes the type of b. // - The third char encodes the type of y. // -- (xby) = (?ss) ------------------------------------------------------------ static void bli_sssxpbys_mxn( const dim_t m, const dim_t n, float* restrict x, const inc_t rs_x, const inc_t cs_x, float* restrict beta, float* restrict y, const inc_t rs_y, const inc_t cs_y ) { // If beta is zero, overwrite y with x (in case y has infs or NaNs). if ( bli_seq0( *beta ) ) { bli_sscopys_mxn( m, n, x, rs_x, cs_x, y, rs_y, cs_y ); return; } #ifdef BLIS_ENABLE_CR_CASES if ( rs_x == 1 && rs_y == 1 ) { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_sssxpbys( *(x + ii + jj*cs_x), *beta, *(y + ii + jj*cs_y) ); } else if ( cs_x == 1 && cs_y == 1 ) { for ( dim_t ii = 0; ii < m; ++ii ) for ( dim_t jj = 0; jj < n; ++jj ) bli_sssxpbys( *(x + ii*rs_x + jj), *beta, *(y + ii*rs_y + jj) ); } else #endif { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_sssxpbys( *(x + ii*rs_x + jj*cs_x), *beta, *(y + ii*rs_y + jj*cs_y) ); } } static void bli_dssxpbys_mxn( const dim_t m, const dim_t n, double* restrict x, const inc_t rs_x, const inc_t cs_x, float* restrict beta, float* restrict y, const inc_t rs_y, const inc_t cs_y ) { // If beta is zero, overwrite y with x (in case y has infs or NaNs). if ( bli_seq0( *beta ) ) { bli_dscopys_mxn( m, n, x, rs_x, cs_x, y, rs_y, cs_y ); return; } #ifdef BLIS_ENABLE_CR_CASES if ( rs_x == 1 && rs_y == 1 ) { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_dssxpbys( *(x + ii + jj*cs_x), *beta, *(y + ii + jj*cs_y) ); } else if ( cs_x == 1 && cs_y == 1 ) { for ( dim_t ii = 0; ii < m; ++ii ) for ( dim_t jj = 0; jj < n; ++jj ) bli_dssxpbys( *(x + ii*rs_x + jj), *beta, *(y + ii*rs_y + jj) ); } else #endif { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_dssxpbys( *(x + ii*rs_x + jj*cs_x), *beta, *(y + ii*rs_y + jj*cs_y) ); } } static void bli_cssxpbys_mxn( const dim_t m, const dim_t n, scomplex* restrict x, const inc_t rs_x, const inc_t cs_x, float* restrict beta, float* restrict y, const inc_t rs_y, const inc_t cs_y ) { // If beta is zero, overwrite y with x (in case y has infs or NaNs). if ( bli_seq0( *beta ) ) { bli_cscopys_mxn( m, n, x, rs_x, cs_x, y, rs_y, cs_y ); return; } #ifdef BLIS_ENABLE_CR_CASES if ( rs_x == 1 && rs_y == 1 ) { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_cssxpbys( *(x + ii + jj*cs_x), *beta, *(y + ii + jj*cs_y) ); } else if ( cs_x == 1 && cs_y == 1 ) { for ( dim_t ii = 0; ii < m; ++ii ) for ( dim_t jj = 0; jj < n; ++jj ) bli_cssxpbys( *(x + ii*rs_x + jj), *beta, *(y + ii*rs_y + jj) ); } else #endif { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_cssxpbys( *(x + ii*rs_x + jj*cs_x), *beta, *(y + ii*rs_y + jj*cs_y) ); } } static void bli_zssxpbys_mxn( const dim_t m, const dim_t n, dcomplex* restrict x, const inc_t rs_x, const inc_t cs_x, float* restrict beta, float* restrict y, const inc_t rs_y, const inc_t cs_y ) { // If beta is zero, overwrite y with x (in case y has infs or NaNs). if ( bli_seq0( *beta ) ) { bli_zscopys_mxn( m, n, x, rs_x, cs_x, y, rs_y, cs_y ); return; } #ifdef BLIS_ENABLE_CR_CASES if ( rs_x == 1 && rs_y == 1 ) { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_zssxpbys( *(x + ii + jj*cs_x), *beta, *(y + ii + jj*cs_y) ); } else if ( cs_x == 1 && cs_y == 1 ) { for ( dim_t ii = 0; ii < m; ++ii ) for ( dim_t jj = 0; jj < n; ++jj ) bli_zssxpbys( *(x + ii*rs_x + jj), *beta, *(y + ii*rs_y + jj) ); } else #endif { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_zssxpbys( *(x + ii*rs_x + jj*cs_x), *beta, *(y + ii*rs_y + jj*cs_y) ); } } // -- (xby) = (?dd) ------------------------------------------------------------ static void bli_sddxpbys_mxn( const dim_t m, const dim_t n, float* restrict x, const inc_t rs_x, const inc_t cs_x, double* restrict beta, double* restrict y, const inc_t rs_y, const inc_t cs_y ) { // If beta is zero, overwrite y with x (in case y has infs or NaNs). if ( bli_deq0( *beta ) ) { bli_sdcopys_mxn( m, n, x, rs_x, cs_x, y, rs_y, cs_y ); return; } #ifdef BLIS_ENABLE_CR_CASES if ( rs_x == 1 && rs_y == 1 ) { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_sddxpbys( *(x + ii + jj*cs_x), *beta, *(y + ii + jj*cs_y) ); } else if ( cs_x == 1 && cs_y == 1 ) { for ( dim_t ii = 0; ii < m; ++ii ) for ( dim_t jj = 0; jj < n; ++jj ) bli_sddxpbys( *(x + ii*rs_x + jj), *beta, *(y + ii*rs_y + jj) ); } else #endif { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_sddxpbys( *(x + ii*rs_x + jj*cs_x), *beta, *(y + ii*rs_y + jj*cs_y) ); } } static void bli_dddxpbys_mxn( const dim_t m, const dim_t n, double* restrict x, const inc_t rs_x, const inc_t cs_x, double* restrict beta, double* restrict y, const inc_t rs_y, const inc_t cs_y ) { // If beta is zero, overwrite y with x (in case y has infs or NaNs). if ( bli_deq0( *beta ) ) { bli_ddcopys_mxn( m, n, x, rs_x, cs_x, y, rs_y, cs_y ); return; } #ifdef BLIS_ENABLE_CR_CASES if ( rs_x == 1 && rs_y == 1 ) { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_dddxpbys( *(x + ii + jj*cs_x), *beta, *(y + ii + jj*cs_y) ); } else if ( cs_x == 1 && cs_y == 1 ) { for ( dim_t ii = 0; ii < m; ++ii ) for ( dim_t jj = 0; jj < n; ++jj ) bli_dddxpbys( *(x + ii*rs_x + jj), *beta, *(y + ii*rs_y + jj) ); } else #endif { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_dddxpbys( *(x + ii*rs_x + jj*cs_x), *beta, *(y + ii*rs_y + jj*cs_y) ); } } static void bli_cddxpbys_mxn( const dim_t m, const dim_t n, scomplex* restrict x, const inc_t rs_x, const inc_t cs_x, double* restrict beta, double* restrict y, const inc_t rs_y, const inc_t cs_y ) { // If beta is zero, overwrite y with x (in case y has infs or NaNs). if ( bli_deq0( *beta ) ) { bli_cdcopys_mxn( m, n, x, rs_x, cs_x, y, rs_y, cs_y ); return; } #ifdef BLIS_ENABLE_CR_CASES if ( rs_x == 1 && rs_y == 1 ) { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_cddxpbys( *(x + ii + jj*cs_x), *beta, *(y + ii + jj*cs_y) ); } else if ( cs_x == 1 && cs_y == 1 ) { for ( dim_t ii = 0; ii < m; ++ii ) for ( dim_t jj = 0; jj < n; ++jj ) bli_cddxpbys( *(x + ii*rs_x + jj), *beta, *(y + ii*rs_y + jj) ); } else #endif { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_cddxpbys( *(x + ii*rs_x + jj*cs_x), *beta, *(y + ii*rs_y + jj*cs_y) ); } } static void bli_zddxpbys_mxn( const dim_t m, const dim_t n, dcomplex* restrict x, const inc_t rs_x, const inc_t cs_x, double* restrict beta, double* restrict y, const inc_t rs_y, const inc_t cs_y ) { // If beta is zero, overwrite y with x (in case y has infs or NaNs). if ( bli_deq0( *beta ) ) { bli_zdcopys_mxn( m, n, x, rs_x, cs_x, y, rs_y, cs_y ); return; } #ifdef BLIS_ENABLE_CR_CASES if ( rs_x == 1 && rs_y == 1 ) { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_zddxpbys( *(x + ii + jj*cs_x), *beta, *(y + ii + jj*cs_y) ); } else if ( cs_x == 1 && cs_y == 1 ) { for ( dim_t ii = 0; ii < m; ++ii ) for ( dim_t jj = 0; jj < n; ++jj ) bli_zddxpbys( *(x + ii*rs_x + jj), *beta, *(y + ii*rs_y + jj) ); } else #endif { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_zddxpbys( *(x + ii*rs_x + jj*cs_x), *beta, *(y + ii*rs_y + jj*cs_y) ); } } // -- (xby) = (?cc) ------------------------------------------------------------ static void bli_sccxpbys_mxn( const dim_t m, const dim_t n, float* restrict x, const inc_t rs_x, const inc_t cs_x, scomplex* restrict beta, scomplex* restrict y, const inc_t rs_y, const inc_t cs_y ) { // If beta is zero, overwrite y with x (in case y has infs or NaNs). if ( bli_ceq0( *beta ) ) { bli_sccopys_mxn( m, n, x, rs_x, cs_x, y, rs_y, cs_y ); return; } #ifdef BLIS_ENABLE_CR_CASES if ( rs_x == 1 && rs_y == 1 ) { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_sccxpbys( *(x + ii + jj*cs_x), *beta, *(y + ii + jj*cs_y) ); } else if ( cs_x == 1 && cs_y == 1 ) { for ( dim_t ii = 0; ii < m; ++ii ) for ( dim_t jj = 0; jj < n; ++jj ) bli_sccxpbys( *(x + ii*rs_x + jj), *beta, *(y + ii*rs_y + jj) ); } else #endif { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_sccxpbys( *(x + ii*rs_x + jj*cs_x), *beta, *(y + ii*rs_y + jj*cs_y) ); } } static void bli_dccxpbys_mxn( const dim_t m, const dim_t n, double* restrict x, const inc_t rs_x, const inc_t cs_x, scomplex* restrict beta, scomplex* restrict y, const inc_t rs_y, const inc_t cs_y ) { // If beta is zero, overwrite y with x (in case y has infs or NaNs). if ( bli_ceq0( *beta ) ) { bli_dccopys_mxn( m, n, x, rs_x, cs_x, y, rs_y, cs_y ); return; } #ifdef BLIS_ENABLE_CR_CASES if ( rs_x == 1 && rs_y == 1 ) { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_dccxpbys( *(x + ii + jj*cs_x), *beta, *(y + ii + jj*cs_y) ); } else if ( cs_x == 1 && cs_y == 1 ) { for ( dim_t ii = 0; ii < m; ++ii ) for ( dim_t jj = 0; jj < n; ++jj ) bli_dccxpbys( *(x + ii*rs_x + jj), *beta, *(y + ii*rs_y + jj) ); } else #endif { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_dccxpbys( *(x + ii*rs_x + jj*cs_x), *beta, *(y + ii*rs_y + jj*cs_y) ); } } static void bli_cccxpbys_mxn( const dim_t m, const dim_t n, scomplex* restrict x, const inc_t rs_x, const inc_t cs_x, scomplex* restrict beta, scomplex* restrict y, const inc_t rs_y, const inc_t cs_y ) { // If beta is zero, overwrite y with x (in case y has infs or NaNs). if ( bli_ceq0( *beta ) ) { bli_cccopys_mxn( m, n, x, rs_x, cs_x, y, rs_y, cs_y ); return; } #ifdef BLIS_ENABLE_CR_CASES if ( rs_x == 1 && rs_y == 1 ) { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_cccxpbys( *(x + ii + jj*cs_x), *beta, *(y + ii + jj*cs_y) ); } else if ( cs_x == 1 && cs_y == 1 ) { for ( dim_t ii = 0; ii < m; ++ii ) for ( dim_t jj = 0; jj < n; ++jj ) bli_cccxpbys( *(x + ii*rs_x + jj), *beta, *(y + ii*rs_y + jj) ); } else #endif { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_cccxpbys( *(x + ii*rs_x + jj*cs_x), *beta, *(y + ii*rs_y + jj*cs_y) ); } } static void bli_zccxpbys_mxn( const dim_t m, const dim_t n, dcomplex* restrict x, const inc_t rs_x, const inc_t cs_x, scomplex* restrict beta, scomplex* restrict y, const inc_t rs_y, const inc_t cs_y ) { // If beta is zero, overwrite y with x (in case y has infs or NaNs). if ( bli_ceq0( *beta ) ) { bli_zccopys_mxn( m, n, x, rs_x, cs_x, y, rs_y, cs_y ); return; } #ifdef BLIS_ENABLE_CR_CASES if ( rs_x == 1 && rs_y == 1 ) { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_zccxpbys( *(x + ii + jj*cs_x), *beta, *(y + ii + jj*cs_y) ); } else if ( cs_x == 1 && cs_y == 1 ) { for ( dim_t ii = 0; ii < m; ++ii ) for ( dim_t jj = 0; jj < n; ++jj ) bli_zccxpbys( *(x + ii*rs_x + jj), *beta, *(y + ii*rs_y + jj) ); } else #endif { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_zccxpbys( *(x + ii*rs_x + jj*cs_x), *beta, *(y + ii*rs_y + jj*cs_y) ); } } // -- (xby) = (?zz) ------------------------------------------------------------ static void bli_szzxpbys_mxn( const dim_t m, const dim_t n, float* restrict x, const inc_t rs_x, const inc_t cs_x, dcomplex* restrict beta, dcomplex* restrict y, const inc_t rs_y, const inc_t cs_y ) { // If beta is zero, overwrite y with x (in case y has infs or NaNs). if ( bli_zeq0( *beta ) ) { bli_szcopys_mxn( m, n, x, rs_x, cs_x, y, rs_y, cs_y ); return; } #ifdef BLIS_ENABLE_CR_CASES if ( rs_x == 1 && rs_y == 1 ) { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_szzxpbys( *(x + ii + jj*cs_x), *beta, *(y + ii + jj*cs_y) ); } else if ( cs_x == 1 && cs_y == 1 ) { for ( dim_t ii = 0; ii < m; ++ii ) for ( dim_t jj = 0; jj < n; ++jj ) bli_szzxpbys( *(x + ii*rs_x + jj), *beta, *(y + ii*rs_y + jj) ); } else #endif { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_szzxpbys( *(x + ii*rs_x + jj*cs_x), *beta, *(y + ii*rs_y + jj*cs_y) ); } } static void bli_dzzxpbys_mxn( const dim_t m, const dim_t n, double* restrict x, const inc_t rs_x, const inc_t cs_x, dcomplex* restrict beta, dcomplex* restrict y, const inc_t rs_y, const inc_t cs_y ) { // If beta is zero, overwrite y with x (in case y has infs or NaNs). if ( bli_zeq0( *beta ) ) { bli_dzcopys_mxn( m, n, x, rs_x, cs_x, y, rs_y, cs_y ); return; } #ifdef BLIS_ENABLE_CR_CASES if ( rs_x == 1 && rs_y == 1 ) { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_dzzxpbys( *(x + ii + jj*cs_x), *beta, *(y + ii + jj*cs_y) ); } else if ( cs_x == 1 && cs_y == 1 ) { for ( dim_t ii = 0; ii < m; ++ii ) for ( dim_t jj = 0; jj < n; ++jj ) bli_dzzxpbys( *(x + ii*rs_x + jj), *beta, *(y + ii*rs_y + jj) ); } else #endif { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_dzzxpbys( *(x + ii*rs_x + jj*cs_x), *beta, *(y + ii*rs_y + jj*cs_y) ); } } static void bli_czzxpbys_mxn( const dim_t m, const dim_t n, scomplex* restrict x, const inc_t rs_x, const inc_t cs_x, dcomplex* restrict beta, dcomplex* restrict y, const inc_t rs_y, const inc_t cs_y ) { // If beta is zero, overwrite y with x (in case y has infs or NaNs). if ( bli_zeq0( *beta ) ) { bli_czcopys_mxn( m, n, x, rs_x, cs_x, y, rs_y, cs_y ); return; } #ifdef BLIS_ENABLE_CR_CASES if ( rs_x == 1 && rs_y == 1 ) { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_czzxpbys( *(x + ii + jj*cs_x), *beta, *(y + ii + jj*cs_y) ); } else if ( cs_x == 1 && cs_y == 1 ) { for ( dim_t ii = 0; ii < m; ++ii ) for ( dim_t jj = 0; jj < n; ++jj ) bli_czzxpbys( *(x + ii*rs_x + jj), *beta, *(y + ii*rs_y + jj) ); } else #endif { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_czzxpbys( *(x + ii*rs_x + jj*cs_x), *beta, *(y + ii*rs_y + jj*cs_y) ); } } static void bli_zzzxpbys_mxn( const dim_t m, const dim_t n, dcomplex* restrict x, const inc_t rs_x, const inc_t cs_x, dcomplex* restrict beta, dcomplex* restrict y, const inc_t rs_y, const inc_t cs_y ) { // If beta is zero, overwrite y with x (in case y has infs or NaNs). if ( bli_zeq0( *beta ) ) { bli_zzcopys_mxn( m, n, x, rs_x, cs_x, y, rs_y, cs_y ); return; } #ifdef BLIS_ENABLE_CR_CASES if ( rs_x == 1 && rs_y == 1 ) { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_zzzxpbys( *(x + ii + jj*cs_x), *beta, *(y + ii + jj*cs_y) ); } else if ( cs_x == 1 && cs_y == 1 ) { for ( dim_t ii = 0; ii < m; ++ii ) for ( dim_t jj = 0; jj < n; ++jj ) bli_zzzxpbys( *(x + ii*rs_x + jj), *beta, *(y + ii*rs_y + jj) ); } else #endif { for ( dim_t jj = 0; jj < n; ++jj ) for ( dim_t ii = 0; ii < m; ++ii ) bli_zzzxpbys( *(x + ii*rs_x + jj*cs_x), *beta, *(y + ii*rs_y + jj*cs_y) ); } } static void bli_sxpbys_mxn( const dim_t m, const dim_t n, float* restrict x, const inc_t rs_x, const inc_t cs_x, float* restrict beta, float* restrict y, const inc_t rs_y, const inc_t cs_y ) { bli_sssxpbys_mxn( m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ); } static void bli_dxpbys_mxn( const dim_t m, const dim_t n, double* restrict x, const inc_t rs_x, const inc_t cs_x, double* restrict beta, double* restrict y, const inc_t rs_y, const inc_t cs_y ) { bli_dddxpbys_mxn( m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ); } static void bli_cxpbys_mxn( const dim_t m, const dim_t n, scomplex* restrict x, const inc_t rs_x, const inc_t cs_x, scomplex* restrict beta, scomplex* restrict y, const inc_t rs_y, const inc_t cs_y ) { bli_cccxpbys_mxn( m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ); } static void bli_zxpbys_mxn( const dim_t m, const dim_t n, dcomplex* restrict x, const inc_t rs_x, const inc_t cs_x, dcomplex* restrict beta, dcomplex* restrict y, const inc_t rs_y, const inc_t cs_y ) { bli_zzzxpbys_mxn( m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ); } #endif blis-0.6.1/frame/include/level0/bli_xpbys_mxn_uplo.h000066400000000000000000000206771360743507500224660ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_XPBYS_MXN_UPLO_H #define BLIS_XPBYS_MXN_UPLO_H // xpbys_mxn_u #define bli_sssxpbys_mxn_u( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \ { \ dim_t _i, _j; \ \ /* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \ if ( bli_seq0( *beta ) ) \ { \ for ( _j = 0; _j < n; ++_j ) \ for ( _i = 0; _i < m; ++_i ) \ if ( (doff_t)_j - (doff_t)_i >= diagoff ) \ { \ bli_sscopys( *(x + _i*rs_x + _j*cs_x), \ *(y + _i*rs_y + _j*cs_y) ); \ } \ } \ else \ { \ for ( _j = 0; _j < n; ++_j ) \ for ( _i = 0; _i < m; ++_i ) \ if ( (doff_t)_j - (doff_t)_i >= diagoff ) \ { \ bli_sssxpbys( *(x + _i*rs_x + _j*cs_x), \ *(beta), \ *(y + _i*rs_y + _j*cs_y) ); \ } \ } \ } #define bli_dddxpbys_mxn_u( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \ { \ dim_t _i, _j; \ \ /* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \ if ( bli_deq0( *beta ) ) \ { \ for ( _j = 0; _j < n; ++_j ) \ for ( _i = 0; _i < m; ++_i ) \ if ( (doff_t)_j - (doff_t)_i >= diagoff ) \ { \ bli_ddcopys( *(x + _i*rs_x + _j*cs_x), \ *(y + _i*rs_y + _j*cs_y) ); \ } \ } \ else \ { \ for ( _j = 0; _j < n; ++_j ) \ for ( _i = 0; _i < m; ++_i ) \ if ( (doff_t)_j - (doff_t)_i >= diagoff ) \ { \ bli_dddxpbys( *(x + _i*rs_x + _j*cs_x), \ *(beta), \ *(y + _i*rs_y + _j*cs_y) ); \ } \ } \ } #define bli_cccxpbys_mxn_u( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \ { \ dim_t _i, _j; \ \ /* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \ if ( bli_ceq0( *beta ) ) \ { \ for ( _j = 0; _j < n; ++_j ) \ for ( _i = 0; _i < m; ++_i ) \ if ( (doff_t)_j - (doff_t)_i >= diagoff ) \ { \ bli_cccopys( *(x + _i*rs_x + _j*cs_x), \ *(y + _i*rs_y + _j*cs_y) ); \ } \ } \ else \ { \ for ( _j = 0; _j < n; ++_j ) \ for ( _i = 0; _i < m; ++_i ) \ if ( (doff_t)_j - (doff_t)_i >= diagoff ) \ { \ bli_cccxpbys( *(x + _i*rs_x + _j*cs_x), \ *(beta), \ *(y + _i*rs_y + _j*cs_y) ); \ } \ } \ } #define bli_zzzxpbys_mxn_u( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \ { \ dim_t _i, _j; \ \ /* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \ if ( bli_zeq0( *beta ) ) \ { \ for ( _j = 0; _j < n; ++_j ) \ for ( _i = 0; _i < m; ++_i ) \ if ( (doff_t)_j - (doff_t)_i >= diagoff ) \ { \ bli_zzcopys( *(x + _i*rs_x + _j*cs_x), \ *(y + _i*rs_y + _j*cs_y) ); \ } \ } \ else \ { \ for ( _j = 0; _j < n; ++_j ) \ for ( _i = 0; _i < m; ++_i ) \ if ( (doff_t)_j - (doff_t)_i >= diagoff ) \ { \ bli_zzzxpbys( *(x + _i*rs_x + _j*cs_x), \ *(beta), \ *(y + _i*rs_y + _j*cs_y) ); \ } \ } \ } // xpbys_mxn_l #define bli_sssxpbys_mxn_l( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \ { \ dim_t _i, _j; \ \ /* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \ if ( bli_seq0( *beta ) ) \ { \ for ( _j = 0; _j < n; ++_j ) \ for ( _i = 0; _i < m; ++_i ) \ if ( (doff_t)_j - (doff_t)_i <= diagoff ) \ { \ bli_sscopys( *(x + _i*rs_x + _j*cs_x), \ *(y + _i*rs_y + _j*cs_y) ); \ } \ } \ else \ { \ for ( _j = 0; _j < n; ++_j ) \ for ( _i = 0; _i < m; ++_i ) \ if ( (doff_t)_j - (doff_t)_i <= diagoff ) \ { \ bli_sssxpbys( *(x + _i*rs_x + _j*cs_x), \ *(beta), \ *(y + _i*rs_y + _j*cs_y) ); \ } \ } \ } #define bli_dddxpbys_mxn_l( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \ { \ dim_t _i, _j; \ \ /* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \ if ( bli_deq0( *beta ) ) \ { \ for ( _j = 0; _j < n; ++_j ) \ for ( _i = 0; _i < m; ++_i ) \ if ( (doff_t)_j - (doff_t)_i <= diagoff ) \ { \ bli_ddcopys( *(x + _i*rs_x + _j*cs_x), \ *(y + _i*rs_y + _j*cs_y) ); \ } \ } \ else \ { \ for ( _j = 0; _j < n; ++_j ) \ for ( _i = 0; _i < m; ++_i ) \ if ( (doff_t)_j - (doff_t)_i <= diagoff ) \ { \ bli_dddxpbys( *(x + _i*rs_x + _j*cs_x), \ *(beta), \ *(y + _i*rs_y + _j*cs_y) ); \ } \ } \ } #define bli_cccxpbys_mxn_l( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \ { \ dim_t _i, _j; \ \ /* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \ if ( bli_ceq0( *beta ) ) \ { \ for ( _j = 0; _j < n; ++_j ) \ for ( _i = 0; _i < m; ++_i ) \ if ( (doff_t)_j - (doff_t)_i <= diagoff ) \ { \ bli_cccopys( *(x + _i*rs_x + _j*cs_x), \ *(y + _i*rs_y + _j*cs_y) ); \ } \ } \ else \ { \ for ( _j = 0; _j < n; ++_j ) \ for ( _i = 0; _i < m; ++_i ) \ if ( (doff_t)_j - (doff_t)_i <= diagoff ) \ { \ bli_cccxpbys( *(x + _i*rs_x + _j*cs_x), \ *(beta), \ *(y + _i*rs_y + _j*cs_y) ); \ } \ } \ } #define bli_zzzxpbys_mxn_l( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \ { \ dim_t _i, _j; \ \ /* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \ if ( bli_zeq0( *beta ) ) \ { \ for ( _j = 0; _j < n; ++_j ) \ for ( _i = 0; _i < m; ++_i ) \ if ( (doff_t)_j - (doff_t)_i <= diagoff ) \ { \ bli_zzcopys( *(x + _i*rs_x + _j*cs_x), \ *(y + _i*rs_y + _j*cs_y) ); \ } \ } \ else \ { \ for ( _j = 0; _j < n; ++_j ) \ for ( _i = 0; _i < m; ++_i ) \ if ( (doff_t)_j - (doff_t)_i <= diagoff ) \ { \ bli_zzzxpbys( *(x + _i*rs_x + _j*cs_x), \ *(beta), \ *(y + _i*rs_y + _j*cs_y) ); \ } \ } \ } #define bli_sxpbys_mxn_u( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \ {\ bli_sssxpbys_mxn_u( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ); \ } #define bli_dxpbys_mxn_u( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \ {\ bli_dddxpbys_mxn_u( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ); \ } #define bli_cxpbys_mxn_u( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \ {\ bli_cccxpbys_mxn_u( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ); \ } #define bli_zxpbys_mxn_u( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \ {\ bli_zzzxpbys_mxn_u( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ); \ } #define bli_sxpbys_mxn_l( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \ {\ bli_sssxpbys_mxn_l( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ); \ } #define bli_dxpbys_mxn_l( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \ {\ bli_dddxpbys_mxn_l( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ); \ } #define bli_cxpbys_mxn_l( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \ {\ bli_cccxpbys_mxn_l( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ); \ } #define bli_zxpbys_mxn_l( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \ {\ bli_zzzxpbys_mxn_l( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ); \ } #endif blis-0.6.1/frame/include/level0/io/000077500000000000000000000000001360743507500167745ustar00rootroot00000000000000blis-0.6.1/frame/include/level0/io/bli_scal2ios.h000066400000000000000000000041251360743507500215140ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyiight notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyiight notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SCAL2IOS_H #define BLIS_SCAL2IOS_H // scal2ios #define bli_cscal2ios( a, x, yi ) \ { \ (yi) = bli_cimag(a) * bli_creal(x) + bli_creal(a) * bli_cimag(x); \ } #define bli_zscal2ios( a, x, yi ) \ { \ (yi) = bli_zimag(a) * bli_zreal(x) + bli_zreal(a) * bli_zimag(x); \ } #define bli_scscal2ios( a, x, yi ) \ { \ (yi) = bli_creal(a) * bli_cimag(x); \ } #define bli_dzscal2ios( a, x, yi ) \ { \ (yi) = bli_zreal(a) * bli_zimag(x); \ } #endif blis-0.6.1/frame/include/level0/io/bli_scal2jios.h000066400000000000000000000036651360743507500216760ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyiight notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyiight notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SCAL2JIOS_H #define BLIS_SCAL2JIOS_H // scal2jios #define bli_cscal2jios( a, x, yi ) \ { \ (yi) = bli_cimag(a) * bli_creal(x) - bli_creal(a) * bli_cimag(x); \ } #define bli_zscal2jios( a, x, yi ) \ { \ (yi) = bli_zimag(a) * bli_zreal(x) - bli_zreal(a) * bli_zimag(x); \ } #endif blis-0.6.1/frame/include/level0/old/000077500000000000000000000000001360743507500171435ustar00rootroot00000000000000blis-0.6.1/frame/include/level0/old/bli_cast.h000066400000000000000000000110021360743507500210660ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_CAST_H #define BLIS_CAST_H // cast // Notes: // - The first char encodes the type of *ap. // - The second char encodes the type of b. #define bli_sscast( ap, b ) \ { \ (b) = ( float ) *(( float* )(ap)); \ } #define bli_dscast( ap, b ) \ { \ (b) = ( float ) *(( double* )(ap)); \ } #define bli_cscast( ap, b ) \ { \ (b) = ( float ) bli_creal( *(( scomplex* )(ap)) ); \ } #define bli_zscast( ap, b ) \ { \ (b) = ( float ) bli_zreal( *(( dcomplex* )(ap)) ); \ } #define bli_sdcast( ap, b ) \ { \ (b) = ( double ) *(( float* )(ap)); \ } #define bli_ddcast( ap, b ) \ { \ (b) = ( double ) *(( double* )(ap)); \ } #define bli_cdcast( ap, b ) \ { \ (b) = ( double ) bli_creal( *(( scomplex* )(ap)) ); \ } #define bli_zdcast( ap, b ) \ { \ (b) = ( double ) bli_zreal( *(( dcomplex* )(ap)) ); \ } #ifndef BLIS_ENABLE_C99_COMPLEX #define bli_sccast( ap, b ) \ { \ bli_scsets( bli_sreal( *(( float* )(ap)) ), \ 0.0, (b) ); \ } #define bli_dccast( ap, b ) \ { \ bli_dcsets( bli_dreal( *(( double* )(ap)) ), \ 0.0, (b) ); \ } #define bli_cccast( ap, b ) \ { \ bli_ccsets( bli_creal( *(( scomplex* )(ap)) ), \ bli_cimag( *(( scomplex* )(ap)) ), (b) ); \ } #define bli_zccast( ap, b ) \ { \ bli_zcsets( bli_zreal( *(( dcomplex* )(ap)) ), \ bli_zimag( *(( dcomplex* )(ap)) ), (b) ); \ } #define bli_szcast( ap, b ) \ { \ bli_szsets( bli_sreal( *(( float* )(ap)) ), \ 0.0, (b) ); \ } #define bli_dzcast( ap, b ) \ { \ bli_dzsets( bli_dreal( *(( double* )(ap)) ), \ 0.0, (b) ); \ } #define bli_czcast( ap, b ) \ { \ bli_czsets( bli_creal( *(( scomplex* )(ap)) ), \ bli_cimag( *(( scomplex* )(ap)) ), (b) ); \ } #define bli_zzcast( ap, b ) \ { \ bli_zzsets( bli_zreal( *(( dcomplex* )(ap)) ), \ bli_zimag( *(( dcomplex* )(ap)) ), (b) ); \ } #else // ifdef BLIS_ENABLE_C99_COMPLEX #define bli_sccast( ap, b ) { (b) = ( scomplex ) *(( float* )(ap)); } #define bli_dccast( ap, b ) { (b) = ( scomplex ) *(( double* )(ap)); } #define bli_cccast( ap, b ) { (b) = ( scomplex ) *(( scomplex* )(ap)); } #define bli_zccast( ap, b ) { (b) = ( scomplex ) *(( dcomplex* )(ap)); } #define bli_szcast( ap, b ) { (b) = ( dcomplex ) *(( float* )(ap)); } #define bli_dzcast( ap, b ) { (b) = ( dcomplex ) *(( double* )(ap)); } #define bli_czcast( ap, b ) { (b) = ( dcomplex ) *(( scomplex* )(ap)); } #define bli_zzcast( ap, b ) { (b) = ( dcomplex ) *(( dcomplex* )(ap)); } #endif // BLIS_ENABLE_C99_COMPLEX #define bli_scast( ap, b ) bli_sscast( ap, b ) #define bli_dcast( ap, b ) bli_ddcast( ap, b ) #define bli_ccast( ap, b ) bli_cccast( ap, b ) #define bli_zcast( ap, b ) bli_zzcast( ap, b ) #endif blis-0.6.1/frame/include/level0/old/bli_castfrom.h000066400000000000000000000032101360743507500217540ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ blis-0.6.1/frame/include/level0/old/bli_castto.h000066400000000000000000000032101360743507500214330ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ blis-0.6.1/frame/include/level0/old/bli_copynzjs.h000066400000000000000000000070061360743507500220240ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_COPYNZJS_H #define BLIS_COPYNZJS_H // copynzjs // Notes: // - The first char encodes the type of x. // - The second char encodes the type of y. // - x is copied in conjugated form. #define bli_sscopynzjs( x, y ) \ { \ (y) = ( float ) (x); \ } #define bli_dscopynzjs( x, y ) \ { \ (y) = ( float ) (x); \ } #define bli_cscopynzjs( x, y ) \ { \ (y) = ( float ) (x).real; \ } #define bli_zscopynzjs( x, y ) \ { \ (y) = ( float ) (x).real; \ } #define bli_sdcopynzjs( x, y ) \ { \ (y) = ( double ) (x); \ } #define bli_ddcopynzjs( x, y ) \ { \ (y) = ( double ) (x); \ } #define bli_cdcopynzjs( x, y ) \ { \ (y) = ( double ) (x).real; \ } #define bli_zdcopynzjs( x, y ) \ { \ (y) = ( double ) (x).real; \ } #define bli_sccopynzjs( x, y ) \ { \ (y).real = ( float ) (x); \ /* (y).imag = 0.0F; (SKIP COPYING OF ZERO) */ \ } #define bli_dccopynzjs( x, y ) \ { \ (y).real = ( float ) (x); \ /* (y).imag = 0.0F; (SKIP COPYING OF ZERO) */ \ } #define bli_cccopynzjs( x, y ) \ { \ (y).real = ( float ) (x).real; \ (y).imag = ( float ) -(x).imag; \ } #define bli_zccopynzjs( x, y ) \ { \ (y).real = ( float ) (x).real; \ (y).imag = ( float ) -(x).imag; \ } #define bli_szcopynzjs( x, y ) \ { \ (y).real = ( double ) (x); \ /* (y).imag = 0.0; (SKIP COPYING OF ZERO) */ \ } #define bli_dzcopynzjs( x, y ) \ { \ (y).real = ( double ) (x); \ /* (y).imag = 0.0; (SKIP COPYING OF ZERO) */ \ } #define bli_czcopynzjs( x, y ) \ { \ (y).real = ( double ) (x).real; \ (y).imag = ( double ) -(x).imag; \ } #define bli_zzcopynzjs( x, y ) \ { \ (y).real = ( double ) (x).real; \ (y).imag = ( double ) -(x).imag; \ } #define bli_scopynzjs( x, y ) \ { \ bli_sscopynzjs( x, y ); \ } #define bli_dcopynzjs( x, y ) \ { \ bli_ddcopynzjs( x, y ); \ } #define bli_ccopynzjs( x, y ) \ { \ bli_cccopynzjs( x, y ); \ } #define bli_zcopynzjs( x, y ) \ { \ bli_zzcopynzjs( x, y ); \ } #endif blis-0.6.1/frame/include/level0/old/bli_copynzs.h000066400000000000000000000066761360743507500216660ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_COPYNZS_H #define BLIS_COPYNZS_H // copynzs // Notes: // - The first char encodes the type of x. // - The second char encodes the type of y. #define bli_sscopynzs( x, y ) \ { \ (y) = ( float ) (x); \ } #define bli_dscopynzs( x, y ) \ { \ (y) = ( float ) (x); \ } #define bli_cscopynzs( x, y ) \ { \ (y) = ( float ) (x).real; \ } #define bli_zscopynzs( x, y ) \ { \ (y) = ( float ) (x).real; \ } #define bli_sdcopynzs( x, y ) \ { \ (y) = ( double ) (x); \ } #define bli_ddcopynzs( x, y ) \ { \ (y) = ( double ) (x); \ } #define bli_cdcopynzs( x, y ) \ { \ (y) = ( double ) (x).real; \ } #define bli_zdcopynzs( x, y ) \ { \ (y) = ( double ) (x).real; \ } #define bli_sccopynzs( x, y ) \ { \ (y).real = ( float ) (x); \ /* (y).imag = 0.0F; (SKIP COPYING OF ZERO) */ \ } #define bli_dccopynzs( x, y ) \ { \ (y).real = ( float ) (x); \ /* (y).imag = 0.0F (SKIP COPYING OF ZERO) */; \ } #define bli_cccopynzs( x, y ) \ { \ (y).real = ( float ) (x).real; \ (y).imag = ( float ) (x).imag; \ } #define bli_zccopynzs( x, y ) \ { \ (y).real = ( float ) (x).real; \ (y).imag = ( float ) (x).imag; \ } #define bli_szcopynzs( x, y ) \ { \ (y).real = ( double ) (x); \ /* (y).imag = 0.0; (SKIP COPYING OF ZERO) */ \ } #define bli_dzcopynzs( x, y ) \ { \ (y).real = ( double ) (x); \ /* (y).imag = 0.0; (SKIP COPYING OF ZERO) */ \ } #define bli_czcopynzs( x, y ) \ { \ (y).real = ( double ) (x).real; \ (y).imag = ( double ) (x).imag; \ } #define bli_zzcopynzs( x, y ) \ { \ (y).real = ( double ) (x).real; \ (y).imag = ( double ) (x).imag; \ } #define bli_scopynzs( x, y ) \ { \ bli_sscopynzs( x, y ); \ } #define bli_dcopynzs( x, y ) \ { \ bli_ddcopynzs( x, y ); \ } #define bli_ccopynzs( x, y ) \ { \ bli_cccopynzs( x, y ); \ } #define bli_zcopynzs( x, y ) \ { \ bli_zzcopynzs( x, y ); \ } #endif blis-0.6.1/frame/include/level0/old/bli_invscalcjs.h000066400000000000000000000122311360743507500223000ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_INVSCALCJS_H #define BLIS_INVSCALCJS_H // invscalcjs // Notes: // - The first char encodes the type of a. // - The second char encodes the type of x. #define bli_ssinvscalcjs( conj, a, x ) \ { \ (x) /= ( float ) (a); \ } #define bli_dsinvscalcjs( conj, a, x ) \ { \ (x) /= ( float ) (a); \ } #define bli_csinvscalcjs( conj, a, x ) \ { \ (x) /= ( float ) (a).real; \ } #define bli_zsinvscalcjs( conj, a, x ) \ { \ (x) /= ( float ) (a).real; \ } #define bli_sdinvscalcjs( conj, a, x ) \ { \ (x) /= ( double ) (a); \ } #define bli_ddinvscalcjs( conj, a, x ) \ { \ (x) /= ( double ) (a); \ } #define bli_cdinvscalcjs( conj, a, x ) \ { \ (x) /= ( double ) (a).real; \ } #define bli_zdinvscalcjs( conj, a, x ) \ { \ (x) /= ( double ) (a).real; \ } #define bli_scinvscalcjs( conj, a, x ) \ { \ (x).real /= ( float ) (a); \ (x).imag /= ( float ) (a); \ } #define bli_dcinvscalcjs( conj, a, x ) \ { \ (x).real /= ( float ) (a); \ (x).imag /= ( float ) (a); \ } #define bli_ccinvscalcjs( conj, a, x ) \ { \ float aimag = ( bli_is_conj( conj ) ? ( float ) -(a).imag : \ ( float ) (a).imag ); \ float temp = ( float ) (a).real * (a).real + ( float ) aimag * (a).imag; \ float xr = ( float ) ( ( float ) (a).real * (x).real + ( float ) aimag * (x).imag ) / temp; \ float xi = ( float ) ( ( float ) (a).real * (x).imag - ( float ) aimag * (x).real ) / temp; \ (x).real = xr; \ (x).imag = xi; \ } #define bli_zcinvscalcjs( conj, a, x ) \ { \ float aimag = ( bli_is_conj( conj ) ? ( float ) -(a).imag : \ ( float ) (a).imag ); \ float temp = ( float ) (a).real * (a).real + ( float ) aimag * (a).imag; \ float xr = ( float ) ( ( float ) (a).real * (x).real + ( float ) aimag * (x).imag ) / temp; \ float xi = ( float ) ( ( float ) (a).real * (x).imag - ( float ) aimag * (x).real ) / temp; \ (x).real = xr; \ (x).imag = xi; \ } #define bli_szinvscalcjs( conj, a, x ) \ { \ (x).real /= ( double ) (a); \ (x).imag /= ( double ) (a); \ } #define bli_dzinvscalcjs( conj, a, x ) \ { \ (x).real /= ( double ) (a); \ (x).imag /= ( double ) (a); \ } #define bli_czinvscalcjs( conj, a, x ) \ { \ double aimag = ( bli_is_conj( conj ) ? ( double ) -(a).imag : \ ( double ) (a).imag ); \ double temp = ( double ) (a).real * (a).real + ( double ) aimag * (a).imag; \ double xr = ( double ) ( ( double ) (a).real * (x).real + ( double ) aimag * (x).imag ) / temp; \ double xi = ( double ) ( ( double ) (a).real * (x).imag - ( double ) aimag * (x).real ) / temp; \ (x).real = xr; \ (x).imag = xi; \ } #define bli_zzinvscalcjs( conj, a, x ) \ { \ double aimag = ( bli_is_conj( conj ) ? ( double ) -(a).imag : \ ( double ) (a).imag ); \ double temp = ( double ) (a).real * (a).real + ( double ) aimag * (a).imag; \ double xr = ( double ) ( ( double ) (a).real * (x).real + ( double ) aimag * (x).imag ) / temp; \ double xi = ( double ) ( ( double ) (a).real * (x).imag - ( double ) aimag * (x).real ) / temp; \ (x).real = xr; \ (x).imag = xi; \ } #define bli_sinvscalcjs( conj, a, x ) \ { \ bli_ssinvscalcjs( conj, a, x ); \ } #define bli_dinvscalcjs( conj, a, x ) \ { \ bli_ddinvscalcjs( conj, a, x ); \ } #define bli_cinvscalcjs( conj, a, x ) \ { \ bli_ccinvscalcjs( conj, a, x ); \ } #define bli_zinvscalcjs( conj, a, x ) \ { \ bli_zzinvscalcjs( conj, a, x ); \ } #endif blis-0.6.1/frame/include/level0/old/bli_scalcjs.h000066400000000000000000000111431360743507500215640ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SCALCJS_H #define BLIS_SCALCJS_H // scalcjs // Notes: // - The first char encodes the type of a. // - The second char encodes the type of x. // - a is (conditionally) used in conjugated form. #define bli_ssscalcjs( conj, a, x ) \ { \ (x) *= ( float ) (a); \ } #define bli_dsscalcjs( conj, a, x ) \ { \ (x) *= ( float ) (a); \ } #define bli_csscalcjs( conj, a, x ) \ { \ (x) *= ( float ) (a).real; \ } #define bli_zsscalcjs( conj, a, x ) \ { \ (x) *= ( float ) (a).real; \ } #define bli_sdscalcjs( conj, a, x ) \ { \ (x) *= ( double ) (a); \ } #define bli_ddscalcjs( conj, a, x ) \ { \ (x) *= ( double ) (a); \ } #define bli_cdscalcjs( conj, a, x ) \ { \ (x) *= ( double ) (a).real; \ } #define bli_zdscalcjs( conj, a, x ) \ { \ (x) *= ( double ) (a).real; \ } #define bli_scscalcjs( conj, a, x ) \ { \ (x).real *= ( float ) (a); \ (x).imag *= ( float ) (a); \ } #define bli_dcscalcjs( conj, a, x ) \ { \ (x).real *= ( float ) (a); \ (x).imag *= ( float ) (a); \ } #define bli_ccscalcjs( conj, a, x ) \ { \ float aimag = ( bli_is_conj( conj ) ? ( float ) -(a).imag : \ ( float ) (a).imag ); \ float tempr = ( float ) (a).real * (x).real - ( float ) aimag * (x).imag; \ float tempi = ( float ) (a).real * (x).imag + ( float ) aimag * (x).real; \ (x).real = tempr; \ (x).imag = tempi; \ } #define bli_zcscalcjs( conj, a, x ) \ { \ float aimag = ( bli_is_conj( conj ) ? ( float ) -(a).imag : \ ( float ) (a).imag ); \ float tempr = ( float ) (a).real * (x).real - ( float ) aimag * (x).imag; \ float tempi = ( float ) (a).real * (x).imag + ( float ) aimag * (x).real; \ (x).real = tempr; \ (x).imag = tempi; \ } #define bli_szscalcjs( conj, a, x ) \ { \ (x).real *= ( double ) (a); \ (x).imag *= ( double ) (a); \ } #define bli_dzscalcjs( conj, a, x ) \ { \ (x).real *= ( double ) (a); \ (x).imag *= ( double ) (a); \ } #define bli_czscalcjs( conj, a, x ) \ { \ double aimag = ( bli_is_conj( conj ) ? ( double ) -(a).imag : \ ( double ) (a).imag ); \ double tempr = ( double ) (a).real * (x).real - ( double ) aimag * (x).imag; \ double tempi = ( double ) (a).real * (x).imag + ( double ) aimag * (x).real; \ (x).real = tempr; \ (x).imag = tempi; \ } #define bli_zzscalcjs( conj, a, x ) \ { \ double aimag = ( bli_is_conj( conj ) ? ( double ) -(a).imag : \ ( double ) (a).imag ); \ double tempr = ( double ) (a).real * (x).real - ( double ) aimag * (x).imag; \ double tempi = ( double ) (a).real * (x).imag + ( double ) aimag * (x).real; \ (x).real = tempr; \ (x).imag = tempi; \ } #define bli_sscalcjs( conj, a, x ) \ { \ bli_ssscalcjs( conj, a, x ); \ } #define bli_dscalcjs( conj, a, x ) \ { \ bli_ddscalcjs( conj, a, x ); \ } #define bli_cscalcjs( conj, a, x ) \ { \ bli_ccscalcjs( conj, a, x ); \ } #define bli_zscalcjs( conj, a, x ) \ { \ bli_zzscalcjs( conj, a, x ); \ } #endif blis-0.6.1/frame/include/level0/old/bli_set0ris_mxn.h000066400000000000000000000052041360743507500224160ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SET0RIS_MXN_H #define BLIS_SET0RIS_MXN_H // set0ris_mxn #define bli_sset0ris_mxn( m, n, ar, ai, rs_a, cs_a ) \ { \ dim_t _i, _j; \ \ for ( _j = 0; _j < n; ++_j ) \ for ( _i = 0; _i < m; ++_i ) \ bli_sset0ris( *(ar + _i*rs_a + _j*cs_a), \ *(ai + _i*rs_a + _j*cs_a) ); \ } #define bli_dset0ris_mxn( m, n, ar, ai, rs_a, cs_a ) \ { \ dim_t _i, _j; \ \ for ( _j = 0; _j < n; ++_j ) \ for ( _i = 0; _i < m; ++_i ) \ bli_dset0ris( *(ar + _i*rs_a + _j*cs_a), \ *(ai + _i*rs_a + _j*cs_a) ); \ } #define bli_cset0ris_mxn( m, n, ar, ai, rs_a, cs_a ) \ { \ dim_t _i, _j; \ \ for ( _j = 0; _j < n; ++_j ) \ for ( _i = 0; _i < m; ++_i ) \ bli_cset0ris( *(ar + _i*rs_a + _j*cs_a), \ *(ai + _i*rs_a + _j*cs_a) ); \ } #define bli_zset0ris_mxn( m, n, ar, ai, rs_a, cs_a ) \ { \ dim_t _i, _j; \ \ for ( _j = 0; _j < n; ++_j ) \ for ( _i = 0; _i < m; ++_i ) \ bli_zset0ris( *(ar + _i*rs_a + _j*cs_a), \ *(ai + _i*rs_a + _j*cs_a) ); \ } #endif blis-0.6.1/frame/include/level0/ri/000077500000000000000000000000001360743507500167775ustar00rootroot00000000000000blis-0.6.1/frame/include/level0/ri/bli_absq2ris.h000066400000000000000000000040521360743507500215250ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_ABSQ2RIS_H #define BLIS_ABSQ2RIS_H // absq2ris #define bli_sabsq2ris( ar, ai, br, bi ) \ { \ (br) = (ar) * (ar); \ } #define bli_dabsq2ris( ar, ai, br, bi ) \ { \ (br) = (ar) * (ar); \ } #define bli_cabsq2ris( ar, ai, br, bi ) \ { \ (br) = (ar) * (ar) + (ai) * (ai); \ (bi) = 0.0F; \ } #define bli_zabsq2ris( ar, ai, br, bi ) \ { \ (br) = (ar) * (ar) + (ai) * (ai); \ (bi) = 0.0; \ } #endif blis-0.6.1/frame/include/level0/ri/bli_abval2ris.h000066400000000000000000000047021360743507500216660ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_ABVAL2RIS_H #define BLIS_ABVAL2RIS_H // abval2ris #define bli_sabval2ris( xr, xi, ar, ai ) \ { \ (ar) = fabsf(xr); \ } #define bli_dabval2ris( xr, xi, ar, ai ) \ { \ (ar) = fabs(xr); \ } #define bli_cabval2ris( xr, xi, ar, ai ) \ { \ float s = bli_fmaxabs( (xr), (xi) ); \ float mag; \ if ( s == 0.0F ) mag = 0.0F; \ else \ { \ mag = sqrtf( s ) * \ sqrtf( ( (xr) / s ) * (xr) + \ ( (xi) / s ) * (xi) ); \ } \ (ar) = mag; \ (ai) = 0.0F; \ } #define bli_zabval2ris( xr, xi, ar, ai ) \ { \ double s = bli_fmaxabs( (xr), (xi) ); \ double mag; \ if ( s == 0.0 ) mag = 0.0; \ else \ { \ mag = sqrt( s ) * \ sqrt( ( (xr) / s ) * (xr) + \ ( (xi) / s ) * (xi) ); \ } \ (ar) = mag; \ (ai) = 0.0; \ } #endif blis-0.6.1/frame/include/level0/ri/bli_add3ris.h000066400000000000000000000040661360743507500213350ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_ADD3RIS_H #define BLIS_ADD3RIS_H // add3ris #define bli_sadd3ris( ar, ai, br, bi, cr, ci ) \ { \ (cr) = (ar) + (br); \ } #define bli_dadd3ris( ar, ai, br, bi, cr, ci ) \ { \ (cr) = (ar) + (br); \ } #define bli_cadd3ris( ar, ai, br, bi, cr, ci ) \ { \ (cr) = (ar) + (br); \ (ci) = (ai) + (bi); \ } #define bli_zadd3ris( ar, ai, br, bi, cr, ci ) \ { \ (cr) = (ar) + (br); \ (ci) = (ai) + (bi); \ } #endif blis-0.6.1/frame/include/level0/ri/bli_addjris.h000066400000000000000000000040111360743507500214120ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_ADDJRIS_H #define BLIS_ADDJRIS_H // addjris #define bli_saddjris( ar, ai, xr, xi ) bli_saddris( (ar), -(ai), (xr), (xi) ) #define bli_daddjris( ar, ai, xr, xi ) bli_daddris( (ar), -(ai), (xr), (xi) ) #define bli_caddjris( ar, ai, xr, xi ) bli_caddris( (ar), -(ai), (xr), (xi) ) #define bli_zaddjris( ar, ai, xr, xi ) bli_zaddris( (ar), -(ai), (xr), (xi) ) #endif blis-0.6.1/frame/include/level0/ri/bli_addris.h000066400000000000000000000040171360743507500212460ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_ADDRIS_H #define BLIS_ADDRIS_H // addris #define bli_saddris( ar, ai, xr, xi ) \ { \ (xr) = (xr) + (ar); \ } #define bli_daddris( ar, ai, xr, xi ) \ { \ (xr) = (xr) + (ar); \ } #define bli_caddris( ar, ai, xr, xi ) \ { \ (xr) = (xr) + (ar); \ (xi) = (xi) + (ai); \ } #define bli_zaddris( ar, ai, xr, xi ) \ { \ (xr) = (xr) + (ar); \ (xi) = (xi) + (ai); \ } #endif blis-0.6.1/frame/include/level0/ri/bli_axmyris.h000066400000000000000000000045061360743507500214770ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_AXMYRIS_H #define BLIS_AXMYRIS_H // axmyris #define bli_saxmyris( ar, ai, xr, xi, yr, yi ) \ { \ (yr) -= (ar) * (xr); \ } #define bli_daxmyris( ar, ai, xr, xi, yr, yi ) \ { \ (yr) -= (ar) * (xr); \ } #define bli_caxmyris( ar, ai, xr, xi, yr, yi ) \ { \ (yr) -= (ar) * (xr) - (ai) * (xi); \ (yi) -= (ai) * (xr) + (ar) * (xi); \ } #define bli_zaxmyris( ar, ai, xr, xi, yr, yi ) \ { \ (yr) -= (ar) * (xr) - (ai) * (xi); \ (yi) -= (ai) * (xr) + (ar) * (xi); \ } #define bli_scaxmyris( ar, ai, xr, xi, yr, yi ) \ { \ (yr) -= (ar) * (xr); \ (yi) -= (ar) * (xi); \ } #define bli_dzaxmyris( ar, ai, xr, xi, yr, yi ) \ { \ (yr) -= (ar) * (xr); \ (yi) -= (ar) * (xi); \ } #endif blis-0.6.1/frame/include/level0/ri/bli_axpbyjris.h000066400000000000000000000065041360743507500220160ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_AXPBYJRIS_H #define BLIS_AXPBYJRIS_H // axpbyjris #define bli_rxaxpbyjris( ar, ai, xr, xi, br, bi, yr, yi ) \ { \ (yr) = (ar) * (xr) + (br) * (yr); \ } #define bli_cxaxpbyjris( ar, ai, xr, xi, br, bi, yr, yi ) \ { \ const __typeof__(yr) yt_r = (ar) * (xr) + (ai) * (xi) + (br) * (yr) - (bi) * (yi); \ const __typeof__(yi) yt_i = (ai) * (xr) - (ar) * (xi) + (bi) * (yr) + (br) * (yi); \ (yr) = yt_r; \ (yi) = yt_i; \ } // Notes: // - The first char encodes the type of a. // - The second char encodes the type of x. // - The third char encodes the type of b. // - The fourth char encodes the type of y. // -- (axby) = (??ss) ---------------------------------------------------------- #define bli_ssssxpbyjris bli_rxxpbyjris #define bli_dsssxpbyjris bli_rxxpbyjris #define bli_csssxpbyjris bli_rxxpbyjris #define bli_zsssxpbyjris bli_rxxpbyjris #define bli_sdssxpbyjris bli_rxxpbyjris #define bli_ddssxpbyjris bli_rxxpbyjris #define bli_cdssxpbyjris bli_rxxpbyjris #define bli_zdssxpbyjris bli_rxxpbyjris #define bli_scssxpbyjris bli_rxxpbyjris #define bli_dcssxpbyjris bli_rxxpbyjris #define bli_ccssxpbyjris bli_rxxpbyjris #define bli_zcssxpbyjris bli_rxxpbyjris #define bli_szssxpbyjris bli_rxxpbyjris #define bli_dzssxpbyjris bli_rxxpbyjris #define bli_czssxpbyjris bli_rxxpbyjris #define bli_zzssxpbyjris bli_rxxpbyjris // NOTE: This series needs to be finished for all other char values for (by), but // not until something in BLIS actually needs mixed-datatype axpbyjris. #define bli_saxpbyjris bli_ssssaxpbyjris #define bli_daxpbyjris bli_ddddaxpbyjris #define bli_caxpbyjris bli_ccccaxpbyjris #define bli_zaxpbyjris bli_zzzzaxpbyjris #endif blis-0.6.1/frame/include/level0/ri/bli_axpbyris.h000066400000000000000000000064261360743507500216470ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_AXPBYRIS_H #define BLIS_AXPBYRIS_H // axpbyris #define bli_rxaxpbyris( ar, ai, xr, xi, br, bi, yr, yi ) \ { \ (yr) = (ar) * (xr) + (br) * (yr); \ } #define bli_cxaxpbyris( ar, ai, xr, xi, br, bi, yr, yi ) \ { \ const __typeof__(yr) yt_r = (ar) * (xr) - (ai) * (xi) + (br) * (yr) - (bi) * (yi); \ const __typeof__(yi) yt_i = (ai) * (xr) + (ar) * (xi) + (bi) * (yr) + (br) * (yi); \ (yr) = yt_r; \ (yi) = yt_i; \ } // Notes: // - The first char encodes the type of a. // - The second char encodes the type of x. // - The third char encodes the type of b. // - The fourth char encodes the type of y. // -- (axby) = (??ss) ---------------------------------------------------------- #define bli_ssssxpbyris bli_rxxpbyris #define bli_dsssxpbyris bli_rxxpbyris #define bli_csssxpbyris bli_rxxpbyris #define bli_zsssxpbyris bli_rxxpbyris #define bli_sdssxpbyris bli_rxxpbyris #define bli_ddssxpbyris bli_rxxpbyris #define bli_cdssxpbyris bli_rxxpbyris #define bli_zdssxpbyris bli_rxxpbyris #define bli_scssxpbyris bli_rxxpbyris #define bli_dcssxpbyris bli_rxxpbyris #define bli_ccssxpbyris bli_rxxpbyris #define bli_zcssxpbyris bli_rxxpbyris #define bli_szssxpbyris bli_rxxpbyris #define bli_dzssxpbyris bli_rxxpbyris #define bli_czssxpbyris bli_rxxpbyris #define bli_zzssxpbyris bli_rxxpbyris // NOTE: This series needs to be finished for all other char values for (by), but // not until something in BLIS actually needs mixed-datatype axpbyris. #define bli_saxpbyris bli_ssssaxpbyris #define bli_daxpbyris bli_ddddaxpbyris #define bli_caxpbyris bli_ccccaxpbyris #define bli_zaxpbyris bli_zzzzaxpbyris #endif blis-0.6.1/frame/include/level0/ri/bli_axpyjris.h000066400000000000000000000125511360743507500216530ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_AXPYJRIS_H #define BLIS_AXPYJRIS_H // axpyjris #define bli_rxaxpyjris( ar, ai, xr, xi, yr, yi ) \ { \ (yr) += (ar) * (xr); \ } #define bli_cxaxpyjris( ar, ai, xr, xi, yr, yi ) \ { \ (yr) += (ar) * (xr) + (ai) * (xi); \ (yi) += (ai) * (xr) - (ar) * (xi); \ } #define bli_roaxpyjris( ar, ai, xr, xi, yr, yi ) \ { \ (yr) += (ar) * (xr) + (ai) * (xi); \ } #define bli_craxpyjris( ar, ai, xr, xi, yr, yi ) \ { \ (yr) += (ar) * (xr); \ (yi) += (ar) * -(xi); \ } #define bli_rcaxpyjris( ar, ai, xr, xi, yr, yi ) \ { \ (yr) += (ar) * (xr); \ (yi) += (ai) * (xr); \ } // Notes: // - The first char encodes the type of a. // - The second char encodes the type of x. // - The third char encodes the type of y. // -- (axy) = (??s) ------------------------------------------------------------ #define bli_sssaxpyjris bli_rxaxpyjris #define bli_dssaxpyjris bli_rxaxpyjris #define bli_cssaxpyjris bli_rxaxpyjris #define bli_zssaxpyjris bli_rxaxpyjris #define bli_sdsaxpyjris bli_rxaxpyjris #define bli_ddsaxpyjris bli_rxaxpyjris #define bli_cdsaxpyjris bli_rxaxpyjris #define bli_zdsaxpyjris bli_rxaxpyjris #define bli_scsaxpyjris bli_rxaxpyjris #define bli_dcsaxpyjris bli_rxaxpyjris #define bli_ccsaxpyjris bli_roaxpyjris #define bli_zcsaxpyjris bli_roaxpyjris #define bli_szsaxpyjris bli_rxaxpyjris #define bli_dzsaxpyjris bli_rxaxpyjris #define bli_czsaxpyjris bli_roaxpyjris #define bli_zzsaxpyjris bli_roaxpyjris // -- (axy) = (??d) ------------------------------------------------------------ #define bli_ssdaxpyjris bli_rxaxpyjris #define bli_dsdaxpyjris bli_rxaxpyjris #define bli_csdaxpyjris bli_rxaxpyjris #define bli_zsdaxpyjris bli_rxaxpyjris #define bli_sddaxpyjris bli_rxaxpyjris #define bli_dddaxpyjris bli_rxaxpyjris #define bli_cddaxpyjris bli_rxaxpyjris #define bli_zddaxpyjris bli_rxaxpyjris #define bli_scdaxpyjris bli_rxaxpyjris #define bli_dcdaxpyjris bli_rxaxpyjris #define bli_ccdaxpyjris bli_roaxpyjris #define bli_zcdaxpyjris bli_roaxpyjris #define bli_szdaxpyjris bli_rxaxpyjris #define bli_dzdaxpyjris bli_rxaxpyjris #define bli_czdaxpyjris bli_roaxpyjris #define bli_zzdaxpyjris bli_roaxpyjris // -- (axy) = (??c) ------------------------------------------------------------ #define bli_sscaxpyjris bli_rxaxpyjris #define bli_dscaxpyjris bli_rxaxpyjris #define bli_cscaxpyjris bli_rcaxpyjris #define bli_zscaxpyjris bli_rcaxpyjris #define bli_sdcaxpyjris bli_rxaxpyjris #define bli_ddcaxpyjris bli_rxaxpyjris #define bli_cdcaxpyjris bli_rcaxpyjris #define bli_zdcaxpyjris bli_rcaxpyjris #define bli_sccaxpyjris bli_craxpyjris #define bli_dccaxpyjris bli_craxpyjris #define bli_cccaxpyjris bli_cxaxpyjris #define bli_zccaxpyjris bli_cxaxpyjris #define bli_szcaxpyjris bli_craxpyjris #define bli_dzcaxpyjris bli_craxpyjris #define bli_czcaxpyjris bli_cxaxpyjris #define bli_zzcaxpyjris bli_cxaxpyjris // -- (axy) = (??z) ------------------------------------------------------------ #define bli_sszaxpyjris bli_rxaxpyjris #define bli_dszaxpyjris bli_rxaxpyjris #define bli_cszaxpyjris bli_rcaxpyjris #define bli_zszaxpyjris bli_rcaxpyjris #define bli_sdzaxpyjris bli_rxaxpyjris #define bli_ddzaxpyjris bli_rxaxpyjris #define bli_cdzaxpyjris bli_rcaxpyjris #define bli_zdzaxpyjris bli_rcaxpyjris #define bli_sczaxpyjris bli_craxpyjris #define bli_dczaxpyjris bli_craxpyjris #define bli_cczaxpyjris bli_cxaxpyjris #define bli_zczaxpyjris bli_cxaxpyjris #define bli_szzaxpyjris bli_craxpyjris #define bli_dzzaxpyjris bli_craxpyjris #define bli_czzaxpyjris bli_cxaxpyjris #define bli_zzzaxpyjris bli_cxaxpyjris #define bli_saxpyjris bli_sssaxpyjris #define bli_daxpyjris bli_dddaxpyjris #define bli_caxpyjris bli_cccaxpyjris #define bli_zaxpyjris bli_zzzaxpyjris #endif blis-0.6.1/frame/include/level0/ri/bli_axpyris.h000066400000000000000000000123271360743507500215020ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_AXPYRIS_H #define BLIS_AXPYRIS_H // axpyris #define bli_rxaxpyris( ar, ai, xr, xi, yr, yi ) \ { \ (yr) += (ar) * (xr); \ } #define bli_cxaxpyris( ar, ai, xr, xi, yr, yi ) \ { \ (yr) += (ar) * (xr) - (ai) * (xi); \ (yi) += (ai) * (xr) + (ar) * (xi); \ } #define bli_roaxpyris( ar, ai, xr, xi, yr, yi ) \ { \ (yr) += (ar) * (xr) - (ai) * (xi); \ } #define bli_craxpyris( ar, ai, xr, xi, yr, yi ) \ { \ (yr) += (ar) * (xr); \ (yi) += (ar) * (xi); \ } #define bli_rcaxpyris( ar, ai, xr, xi, yr, yi ) \ { \ (yr) += (ar) * (xr); \ (yi) += (ai) * (xr); \ } // Notes: // - The first char encodes the type of a. // - The second char encodes the type of x. // - The third char encodes the type of y. // -- (axy) = (??s) ------------------------------------------------------------ #define bli_sssaxpyris bli_rxaxpyris #define bli_dssaxpyris bli_rxaxpyris #define bli_cssaxpyris bli_rxaxpyris #define bli_zssaxpyris bli_rxaxpyris #define bli_sdsaxpyris bli_rxaxpyris #define bli_ddsaxpyris bli_rxaxpyris #define bli_cdsaxpyris bli_rxaxpyris #define bli_zdsaxpyris bli_rxaxpyris #define bli_scsaxpyris bli_rxaxpyris #define bli_dcsaxpyris bli_rxaxpyris #define bli_ccsaxpyris bli_roaxpyris #define bli_zcsaxpyris bli_roaxpyris #define bli_szsaxpyris bli_rxaxpyris #define bli_dzsaxpyris bli_rxaxpyris #define bli_czsaxpyris bli_roaxpyris #define bli_zzsaxpyris bli_roaxpyris // -- (axy) = (??d) ------------------------------------------------------------ #define bli_ssdaxpyris bli_rxaxpyris #define bli_dsdaxpyris bli_rxaxpyris #define bli_csdaxpyris bli_rxaxpyris #define bli_zsdaxpyris bli_rxaxpyris #define bli_sddaxpyris bli_rxaxpyris #define bli_dddaxpyris bli_rxaxpyris #define bli_cddaxpyris bli_rxaxpyris #define bli_zddaxpyris bli_rxaxpyris #define bli_scdaxpyris bli_rxaxpyris #define bli_dcdaxpyris bli_rxaxpyris #define bli_ccdaxpyris bli_roaxpyris #define bli_zcdaxpyris bli_roaxpyris #define bli_szdaxpyris bli_rxaxpyris #define bli_dzdaxpyris bli_rxaxpyris #define bli_czdaxpyris bli_roaxpyris #define bli_zzdaxpyris bli_roaxpyris // -- (axy) = (??c) ------------------------------------------------------------ #define bli_sscaxpyris bli_rxaxpyris #define bli_dscaxpyris bli_rxaxpyris #define bli_cscaxpyris bli_rcaxpyris #define bli_zscaxpyris bli_rcaxpyris #define bli_sdcaxpyris bli_rxaxpyris #define bli_ddcaxpyris bli_rxaxpyris #define bli_cdcaxpyris bli_rcaxpyris #define bli_zdcaxpyris bli_rcaxpyris #define bli_sccaxpyris bli_craxpyris #define bli_dccaxpyris bli_craxpyris #define bli_cccaxpyris bli_cxaxpyris #define bli_zccaxpyris bli_cxaxpyris #define bli_szcaxpyris bli_craxpyris #define bli_dzcaxpyris bli_craxpyris #define bli_czcaxpyris bli_cxaxpyris #define bli_zzcaxpyris bli_cxaxpyris // -- (axy) = (??z) ------------------------------------------------------------ #define bli_sszaxpyris bli_rxaxpyris #define bli_dszaxpyris bli_rxaxpyris #define bli_cszaxpyris bli_rcaxpyris #define bli_zszaxpyris bli_rcaxpyris #define bli_sdzaxpyris bli_rxaxpyris #define bli_ddzaxpyris bli_rxaxpyris #define bli_cdzaxpyris bli_rcaxpyris #define bli_zdzaxpyris bli_rcaxpyris #define bli_sczaxpyris bli_craxpyris #define bli_dczaxpyris bli_craxpyris #define bli_cczaxpyris bli_cxaxpyris #define bli_zczaxpyris bli_cxaxpyris #define bli_szzaxpyris bli_craxpyris #define bli_dzzaxpyris bli_craxpyris #define bli_czzaxpyris bli_cxaxpyris #define bli_zzzaxpyris bli_cxaxpyris #define bli_saxpyris bli_sssaxpyris #define bli_daxpyris bli_dddaxpyris #define bli_caxpyris bli_cccaxpyris #define bli_zaxpyris bli_zzzaxpyris #endif blis-0.6.1/frame/include/level0/ri/bli_conjris.h000066400000000000000000000036301360743507500214470ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_CONJRIS_H #define BLIS_CONJRIS_H // conjris #define bli_sconjris( xr, xi ) \ { \ ; \ } #define bli_dconjris( xr, xi ) \ { \ ; \ } #define bli_cconjris( xr, xi ) \ { \ (xi) = -(xi); \ } #define bli_zconjris( xr, xi ) \ { \ (xi) = -(xi); \ } #endif blis-0.6.1/frame/include/level0/ri/bli_copycjris.h000066400000000000000000000045421360743507500220100ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_COPYCJRIS_H #define BLIS_COPYCJRIS_H // copycjris #define bli_scopycjris( conj, xr, xi, yr, yi ) \ { \ bli_scopyris( (xr), (xi), (yr), (yi) ); \ } #define bli_dcopycjris( conj, xr, xi, yr, yi ) \ { \ bli_dcopyris( (xr), (xi), (yr), (yi) ); \ } #define bli_ccopycjris( conj, xr, xi, yr, yi ) \ { \ (yr) = (xr); \ (yi) = ( bli_is_conj( conj ) ? -(xi) \ : (xi) ); \ } #define bli_zcopycjris( conj, xr, xi, yr, yi ) \ { \ (yr) = (xr); \ (yi) = ( bli_is_conj( conj ) ? -(xi) \ : (xi) ); \ } #define bli_icopycjris( conj, xr, xi, yr, yi ) \ { \ bli_icopyris( (xr), (xi), (yr), (yi) ); \ } #endif blis-0.6.1/frame/include/level0/ri/bli_copyjris.h000066400000000000000000000063301360743507500216420ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_COPYJRIS_H #define BLIS_COPYJRIS_H // copyjris #define bli_scopyjris( ar, ai, br, bi ) bli_scopyris( (ar), -(ai), (br), (bi) ) #define bli_dcopyjris( ar, ai, br, bi ) bli_dcopyris( (ar), -(ai), (br), (bi) ) #define bli_ccopyjris( ar, ai, br, bi ) bli_ccopyris( (ar), -(ai), (br), (bi) ) #define bli_zcopyjris( ar, ai, br, bi ) bli_zcopyris( (ar), -(ai), (br), (bi) ) #define bli_sscopyjris( ar, ai, br, bi ) bli_scopyjris( ar, 0.0F, br, bi ) #define bli_dscopyjris( ar, ai, br, bi ) bli_scopyjris( ar, 0.0, br, bi ) #define bli_cscopyjris( ar, ai, br, bi ) bli_scopyjris( ar, ai, br, bi ) #define bli_zscopyjris( ar, ai, br, bi ) bli_scopyjris( ar, ai, br, bi ) #define bli_sdcopyjris( ar, ai, br, bi ) bli_dcopyjris( ar, 0.0F, br, bi ) #define bli_ddcopyjris( ar, ai, br, bi ) bli_dcopyjris( ar, 0.0, br, bi ) #define bli_cdcopyjris( ar, ai, br, bi ) bli_dcopyjris( ar, ai, br, bi ) #define bli_zdcopyjris( ar, ai, br, bi ) bli_dcopyjris( ar, ai, br, bi ) #define bli_sccopyjris( ar, ai, br, bi ) bli_ccopyjris( ar, 0.0F, br, bi ) #define bli_dccopyjris( ar, ai, br, bi ) bli_ccopyjris( ar, 0.0, br, bi ) #define bli_cccopyjris( ar, ai, br, bi ) bli_ccopyjris( ar, ai, br, bi ) #define bli_zccopyjris( ar, ai, br, bi ) bli_ccopyjris( ar, ai, br, bi ) #define bli_szcopyjris( ar, ai, br, bi ) bli_zcopyjris( ar, 0.0F, br, bi ) #define bli_dzcopyjris( ar, ai, br, bi ) bli_zcopyjris( ar, 0.0, br, bi ) #define bli_czcopyjris( ar, ai, br, bi ) bli_zcopyjris( ar, ai, br, bi ) #define bli_zzcopyjris( ar, ai, br, bi ) bli_zcopyjris( ar, ai, br, bi ) #endif blis-0.6.1/frame/include/level0/ri/bli_copyris.h000066400000000000000000000062171360743507500214740ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_COPYRIS_H #define BLIS_COPYRIS_H // copyris #define bli_scopyris( ar, ai, br, bi ) \ { \ (br) = (ar); \ } #define bli_dcopyris( ar, ai, br, bi ) \ { \ (br) = (ar); \ } #define bli_ccopyris( ar, ai, br, bi ) \ { \ (br) = (ar); \ (bi) = (ai); \ } #define bli_zcopyris( ar, ai, br, bi ) \ { \ (br) = (ar); \ (bi) = (ai); \ } #define bli_sscopyris( ar, ai, br, bi ) bli_scopyris( ar, 0.0F, br, bi ) #define bli_dscopyris( ar, ai, br, bi ) bli_scopyris( ar, 0.0, br, bi ) #define bli_cscopyris( ar, ai, br, bi ) bli_scopyris( ar, ai, br, bi ) #define bli_zscopyris( ar, ai, br, bi ) bli_scopyris( ar, ai, br, bi ) #define bli_sdcopyris( ar, ai, br, bi ) bli_dcopyris( ar, 0.0F, br, bi ) #define bli_ddcopyris( ar, ai, br, bi ) bli_dcopyris( ar, 0.0, br, bi ) #define bli_cdcopyris( ar, ai, br, bi ) bli_dcopyris( ar, ai, br, bi ) #define bli_zdcopyris( ar, ai, br, bi ) bli_dcopyris( ar, ai, br, bi ) #define bli_sccopyris( ar, ai, br, bi ) bli_ccopyris( ar, 0.0F, br, bi ) #define bli_dccopyris( ar, ai, br, bi ) bli_ccopyris( ar, 0.0, br, bi ) #define bli_cccopyris( ar, ai, br, bi ) bli_ccopyris( ar, ai, br, bi ) #define bli_zccopyris( ar, ai, br, bi ) bli_ccopyris( ar, ai, br, bi ) #define bli_szcopyris( ar, ai, br, bi ) bli_zcopyris( ar, 0.0F, br, bi ) #define bli_dzcopyris( ar, ai, br, bi ) bli_zcopyris( ar, 0.0, br, bi ) #define bli_czcopyris( ar, ai, br, bi ) bli_zcopyris( ar, ai, br, bi ) #define bli_zzcopyris( ar, ai, br, bi ) bli_zcopyris( ar, ai, br, bi ) #endif blis-0.6.1/frame/include/level0/ri/bli_eqris.h000066400000000000000000000060741360743507500211300ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_EQRIS_H #define BLIS_EQRIS_H // eqris (passed by value) #define bli_seqris( ar, ai, br, bi ) ( (ar) == (br) ) #define bli_deqris( ar, ai, br, bi ) ( (ar) == (br) ) #define bli_ceqris( ar, ai, br, bi ) ( (ar) == (br) && (ai) == (bi) ) #define bli_zeqris( ar, ai, br, bi ) ( (ar) == (br) && (ai) == (bi) ) #define bli_ieqris( ar, ai, br, bi ) ( (ar) == (br) ) // eq1ris #define bli_seq1ris( ar, ai ) bli_seqris( (ar), (ai), 1.0F, 0.0F ) #define bli_deq1ris( ar, ai ) bli_deqris( (ar), (ai), 1.0, 0.0 ) #define bli_ceq1ris( ar, ai ) bli_ceqris( (ar), (ai), 1.0F, 0.0F ) #define bli_zeq1ris( ar, ai ) bli_zeqris( (ar), (ai), 1.0, 0.0 ) #define bli_ieq1ris( ar, ai ) bli_ieqris( (ar), (ai), 1, 0 ) // eq0ris #define bli_seq0ris( ar, ai ) bli_seqris( (ar), (ai), 0.0F, 0.0F ) #define bli_deq0ris( ar, ai ) bli_deqris( (ar), (ai), 0.0, 0.0 ) #define bli_ceq0ris( ar, ai ) bli_ceqris( (ar), (ai), 0.0F, 0.0F ) #define bli_zeq0ris( ar, ai ) bli_zeqris( (ar), (ai), 0.0, 0.0 ) #define bli_ieq0ris( ar, ai ) bli_ieqris( (ar), (ai), 0, 0 ) // eqm1ris #define bli_seqm1ris( ar, ai ) bli_seqris( (ar), (ai), -1.0F, 0.0F ) #define bli_deqm1ris( ar, ai ) bli_deqris( (ar), (ai), -1.0, 0.0 ) #define bli_ceqm1ris( ar, ai ) bli_ceqris( (ar), (ai), -1.0F, 0.0F ) #define bli_zeqm1ris( ar, ai ) bli_zeqris( (ar), (ai), -1.0, 0.0 ) #define bli_ieqm1ris( ar, ai ) bli_ieqris( (ar), (ai), -1, 0 ) #endif blis-0.6.1/frame/include/level0/ri/bli_invertris.h000066400000000000000000000045071360743507500220310ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_INVERTRIS_H #define BLIS_INVERTRIS_H // invertris #define bli_sinvertris( xr, xi ) \ { \ (xr) = 1.0F / (xr); \ } #define bli_dinvertris( xr, xi ) \ { \ (xr) = 1.0 / (xr); \ } #define bli_cinvertris( xr, xi ) \ { \ float s = bli_fmaxabs( (xr), (xi) ); \ float xr_s = (xr) / s; \ float xi_s = (xi) / s; \ float temp = ( xr_s * (xr) + xi_s * (xi) ); \ (xr) = xr_s / temp; \ (xi) = -xi_s / temp; \ } #define bli_zinvertris( xr, xi ) \ { \ double s = bli_fmaxabs( (xr), (xi) ); \ double xr_s = (xr) / s; \ double xi_s = (xi) / s; \ double temp = ( xr_s * (xr) + xi_s * (xi) ); \ (xr) = xr_s / temp; \ (xi) = -xi_s / temp; \ } #endif blis-0.6.1/frame/include/level0/ri/bli_invscaljris.h000066400000000000000000000043501360743507500223270ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_INVSCALJRIS_H #define BLIS_INVSCALJRIS_H // invscaljris #define bli_sinvscaljris( ar, ai, xr, xi ) bli_sinvscalris( (ar), -(ai), (xr), (xi) ) #define bli_dinvscaljris( ar, ai, xr, xi ) bli_dinvscalris( (ar), -(ai), (xr), (xi) ) #define bli_cinvscaljris( ar, ai, xr, xi ) bli_cinvscalris( (ar), -(ai), (xr), (xi) ) #define bli_zinvscaljris( ar, ai, xr, xi ) bli_zinvscalris( (ar), -(ai), (xr), (xi) ) #define bli_scinvscaljris( ar, ai, xr, xi ) bli_scinvscalris( (ar), -(ai), (xr), (xi) ) #define bli_dzinvscaljris( ar, ai, xr, xi ) bli_dzinvscalris( (ar), -(ai), (xr), (xi) ) #endif blis-0.6.1/frame/include/level0/ri/bli_invscalris.h000066400000000000000000000052621360743507500221600ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_INVSCALRIS_H #define BLIS_INVSCALRIS_H // invscalris #define bli_sinvscalris( ar, ai, xr, xi ) \ { \ (xr) /= (ar); \ } #define bli_dinvscalris( ar, ai, xr, xi ) \ { \ (xr) /= (ar); \ } #define bli_cinvscalris( ar, ai, xr, xi ) \ { \ float s = bli_fmaxabs( (ar), (ai) ); \ float ar_s = (ar) / s; \ float ai_s = (ai) / s; \ float xrt = (xr); \ float temp = ( ar_s * (ar) + ai_s * (ai) ); \ (xr) = ( (xrt) * ar_s + (xi) * ai_s ) / temp; \ (xi) = ( (xi) * ar_s - (xrt) * ai_s ) / temp; \ } #define bli_zinvscalris( ar, ai, xr, xi ) \ { \ double s = bli_fmaxabs( (ar), (ai) ); \ double ar_s = (ar) / s; \ double ai_s = (ai) / s; \ double xrt = (xr); \ double temp = ( ar_s * (ar) + ai_s * (ai) ); \ (xr) = ( (xrt) * ar_s + (xi) * ai_s ) / temp; \ (xi) = ( (xi) * ar_s - (xrt) * ai_s ) / temp; \ } #define bli_scinvscalris( ar, ai, xr, xi ) \ { \ (xr) /= (ar); \ (xi) /= (ar); \ } #define bli_dzinvscalris( ar, ai, xr, xi ) \ { \ (xr) /= (ar); \ (xi) /= (ar); \ } #endif blis-0.6.1/frame/include/level0/ri/bli_neg2ris.h000066400000000000000000000037621360743507500213570ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_NEG2RIS_H #define BLIS_NEG2RIS_H // neg2ris #define bli_sneg2ris( ar, ai, br, bi ) \ { \ (br) = -(ar); \ } #define bli_dneg2ris( ar, ai, br, bi ) \ { \ (br) = -(ar); \ } #define bli_cneg2ris( ar, ai, br, bi ) \ { \ (br) = -(ar); \ (bi) = -(ai); \ } #define bli_zneg2ris( ar, ai, br, bi ) \ { \ (br) = -(ar); \ (bi) = -(ai); \ } #endif blis-0.6.1/frame/include/level0/ri/bli_scal2jris.h000066400000000000000000000215651360743507500217030ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SCAL2JRIS_H #define BLIS_SCAL2JRIS_H // scal2jris #define bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) \ { \ (yr) = (ar) * (xr); \ } #define bli_cxscal2jris( ar, ai, xr, xi, yr, yi ) \ { \ (yr) = (ar) * (xr) + (ai) * (xi); \ (yi) = (ai) * (xr) - (ar) * (xi); \ } #define bli_roscal2jris( ar, ai, xr, xi, yr, yi ) \ { \ (yr) = (ar) * (xr) + (ai) * (xi); \ } #define bli_crscal2jris( ar, ai, xr, xi, yr, yi ) \ { \ (yr) = (ar) * (xr); \ (yi) = (ar) * -(xi); \ } #define bli_rcscal2jris( ar, ai, xr, xi, yr, yi ) \ { \ (yr) = (ar) * (xr); \ (yi) = (ai) * (xr); \ } // Notes: // - The first char encodes the type of a. // - The second char encodes the type of x. // - The third char encodes the type of y. // -- (axy) = (??s) ------------------------------------------------------------ #define bli_sssscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_dssscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_cssscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_zssscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_sdsscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_ddsscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_cdsscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_zdsscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_scsscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_dcsscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_ccsscal2jris( ar, ai, xr, xi, yr, yi ) bli_roscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_zcsscal2jris( ar, ai, xr, xi, yr, yi ) bli_roscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_szsscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_dzsscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_czsscal2jris( ar, ai, xr, xi, yr, yi ) bli_roscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_zzsscal2jris( ar, ai, xr, xi, yr, yi ) bli_roscal2jris( ar, ai, xr, xi, yr, yi ) // -- (axy) = (??d) ------------------------------------------------------------ #define bli_ssdscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_dsdscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_csdscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_zsdscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_sddscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_dddscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_cddscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_zddscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_scdscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_dcdscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_ccdscal2jris( ar, ai, xr, xi, yr, yi ) bli_roscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_zcdscal2jris( ar, ai, xr, xi, yr, yi ) bli_roscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_szdscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_dzdscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_czdscal2jris( ar, ai, xr, xi, yr, yi ) bli_roscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_zzdscal2jris( ar, ai, xr, xi, yr, yi ) bli_roscal2jris( ar, ai, xr, xi, yr, yi ) // -- (axy) = (??c) ------------------------------------------------------------ #define bli_sscscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_dscscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_cscscal2jris( ar, ai, xr, xi, yr, yi ) bli_rcscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_zscscal2jris( ar, ai, xr, xi, yr, yi ) bli_rcscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_sdcscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_ddcscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_cdcscal2jris( ar, ai, xr, xi, yr, yi ) bli_rcscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_zdcscal2jris( ar, ai, xr, xi, yr, yi ) bli_rcscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_sccscal2jris( ar, ai, xr, xi, yr, yi ) bli_crscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_dccscal2jris( ar, ai, xr, xi, yr, yi ) bli_crscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_cccscal2jris( ar, ai, xr, xi, yr, yi ) bli_cxscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_zccscal2jris( ar, ai, xr, xi, yr, yi ) bli_cxscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_szcscal2jris( ar, ai, xr, xi, yr, yi ) bli_crscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_dzcscal2jris( ar, ai, xr, xi, yr, yi ) bli_crscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_czcscal2jris( ar, ai, xr, xi, yr, yi ) bli_cxscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_zzcscal2jris( ar, ai, xr, xi, yr, yi ) bli_cxscal2jris( ar, ai, xr, xi, yr, yi ) // -- (axy) = (??z) ------------------------------------------------------------ #define bli_sszscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_dszscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_cszscal2jris( ar, ai, xr, xi, yr, yi ) bli_rcscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_zszscal2jris( ar, ai, xr, xi, yr, yi ) bli_rcscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_sdzscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_ddzscal2jris( ar, ai, xr, xi, yr, yi ) bli_rxscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_cdzscal2jris( ar, ai, xr, xi, yr, yi ) bli_rcscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_zdzscal2jris( ar, ai, xr, xi, yr, yi ) bli_rcscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_sczscal2jris( ar, ai, xr, xi, yr, yi ) bli_crscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_dczscal2jris( ar, ai, xr, xi, yr, yi ) bli_crscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_cczscal2jris( ar, ai, xr, xi, yr, yi ) bli_cxscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_zczscal2jris( ar, ai, xr, xi, yr, yi ) bli_cxscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_szzscal2jris( ar, ai, xr, xi, yr, yi ) bli_crscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_dzzscal2jris( ar, ai, xr, xi, yr, yi ) bli_crscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_czzscal2jris( ar, ai, xr, xi, yr, yi ) bli_cxscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_zzzscal2jris( ar, ai, xr, xi, yr, yi ) bli_cxscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_sscal2jris( ar, ai, xr, xi, yr, yi ) bli_sssscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_dscal2jris( ar, ai, xr, xi, yr, yi ) bli_dddscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_cscal2jris( ar, ai, xr, xi, yr, yi ) bli_cccscal2jris( ar, ai, xr, xi, yr, yi ) #define bli_zscal2jris( ar, ai, xr, xi, yr, yi ) bli_zzzscal2jris( ar, ai, xr, xi, yr, yi ) #endif blis-0.6.1/frame/include/level0/ri/bli_scal2ris.h000066400000000000000000000125371360743507500215300ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SCAL2RIS_H #define BLIS_SCAL2RIS_H // scal2ris #define bli_rxscal2ris( ar, ai, xr, xi, yr, yi ) \ { \ (yr) = (ar) * (xr); \ } #define bli_cxscal2ris( ar, ai, xr, xi, yr, yi ) \ { \ (yr) = (ar) * (xr) - (ai) * (xi); \ (yi) = (ai) * (xr) + (ar) * (xi); \ } #define bli_roscal2ris( ar, ai, xr, xi, yr, yi ) \ { \ (yr) = (ar) * (xr) - (ai) * (xi); \ } #define bli_crscal2ris( ar, ai, xr, xi, yr, yi ) \ { \ (yr) = (ar) * (xr); \ (yi) = (ar) * (xi); \ } #define bli_rcscal2ris( ar, ai, xr, xi, yr, yi ) \ { \ (yr) = (ar) * (xr); \ (yi) = (ai) * (xr); \ } // Notes: // - The first char encodes the type of a. // - The second char encodes the type of x. // - The third char encodes the type of y. // -- (axy) = (??s) ------------------------------------------------------------ #define bli_sssscal2ris bli_rxscal2ris #define bli_dssscal2ris bli_rxscal2ris #define bli_cssscal2ris bli_rxscal2ris #define bli_zssscal2ris bli_rxscal2ris #define bli_sdsscal2ris bli_rxscal2ris #define bli_ddsscal2ris bli_rxscal2ris #define bli_cdsscal2ris bli_rxscal2ris #define bli_zdsscal2ris bli_rxscal2ris #define bli_scsscal2ris bli_rxscal2ris #define bli_dcsscal2ris bli_rxscal2ris #define bli_ccsscal2ris bli_roscal2ris #define bli_zcsscal2ris bli_roscal2ris #define bli_szsscal2ris bli_rxscal2ris #define bli_dzsscal2ris bli_rxscal2ris #define bli_czsscal2ris bli_roscal2ris #define bli_zzsscal2ris bli_roscal2ris // -- (axy) = (??d) ------------------------------------------------------------ #define bli_ssdscal2ris bli_rxscal2ris #define bli_dsdscal2ris bli_rxscal2ris #define bli_csdscal2ris bli_rxscal2ris #define bli_zsdscal2ris bli_rxscal2ris #define bli_sddscal2ris bli_rxscal2ris #define bli_dddscal2ris bli_rxscal2ris #define bli_cddscal2ris bli_rxscal2ris #define bli_zddscal2ris bli_rxscal2ris #define bli_scdscal2ris bli_rxscal2ris #define bli_dcdscal2ris bli_rxscal2ris #define bli_ccdscal2ris bli_roscal2ris #define bli_zcdscal2ris bli_roscal2ris #define bli_szdscal2ris bli_rxscal2ris #define bli_dzdscal2ris bli_rxscal2ris #define bli_czdscal2ris bli_roscal2ris #define bli_zzdscal2ris bli_roscal2ris // -- (axy) = (??c) ------------------------------------------------------------ #define bli_sscscal2ris bli_rxscal2ris #define bli_dscscal2ris bli_rxscal2ris #define bli_cscscal2ris bli_rcscal2ris #define bli_zscscal2ris bli_rcscal2ris #define bli_sdcscal2ris bli_rxscal2ris #define bli_ddcscal2ris bli_rxscal2ris #define bli_cdcscal2ris bli_rcscal2ris #define bli_zdcscal2ris bli_rcscal2ris #define bli_sccscal2ris bli_crscal2ris #define bli_dccscal2ris bli_crscal2ris #define bli_cccscal2ris bli_cxscal2ris #define bli_zccscal2ris bli_cxscal2ris #define bli_szcscal2ris bli_crscal2ris #define bli_dzcscal2ris bli_crscal2ris #define bli_czcscal2ris bli_cxscal2ris #define bli_zzcscal2ris bli_cxscal2ris // -- (axy) = (??z) ------------------------------------------------------------ #define bli_sszscal2ris bli_rxscal2ris #define bli_dszscal2ris bli_rxscal2ris #define bli_cszscal2ris bli_rcscal2ris #define bli_zszscal2ris bli_rcscal2ris #define bli_sdzscal2ris bli_rxscal2ris #define bli_ddzscal2ris bli_rxscal2ris #define bli_cdzscal2ris bli_rcscal2ris #define bli_zdzscal2ris bli_rcscal2ris #define bli_sczscal2ris bli_crscal2ris #define bli_dczscal2ris bli_crscal2ris #define bli_cczscal2ris bli_cxscal2ris #define bli_zczscal2ris bli_cxscal2ris #define bli_szzscal2ris bli_crscal2ris #define bli_dzzscal2ris bli_crscal2ris #define bli_czzscal2ris bli_cxscal2ris #define bli_zzzscal2ris bli_cxscal2ris #define bli_sscal2ris bli_sssscal2ris #define bli_dscal2ris bli_dddscal2ris #define bli_cscal2ris bli_cccscal2ris #define bli_zscal2ris bli_zzzscal2ris #endif blis-0.6.1/frame/include/level0/ri/bli_scal2ris_mxn.h000066400000000000000000000122071360743507500224040ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SCAL2RIS_MXN_H #define BLIS_SCAL2RIS_MXN_H // scal2ris_mxn static void bli_cscal2ris_mxn ( const conj_t conjx, const dim_t m, const dim_t n, scomplex* restrict alpha, scomplex* restrict x, const inc_t rs_x, const inc_t cs_x, scomplex* restrict y, const inc_t rs_y, const inc_t cs_y, const inc_t is_y ) { float* restrict alpha_r = ( float* )alpha; \ float* restrict alpha_i = ( float* )alpha + 1; \ float* restrict x_r = ( float* )x; \ float* restrict x_i = ( float* )x + 1; \ float* restrict y_r = ( float* )y; \ float* restrict y_i = ( float* )y + is_y; \ const dim_t incx2 = 2*rs_x; \ const dim_t ldx2 = 2*cs_x; \ /* Treat the micro-panel as panel_dim x panel_len and column-stored (unit row stride). */ \ if ( bli_is_conj( conjx ) ) { for ( dim_t j = 0; j < n; ++j ) for ( dim_t i = 0; i < m; ++i ) { float* restrict chi11_r = x_r + (i )*incx2 + (j )*ldx2; float* restrict chi11_i = x_i + (i )*incx2 + (j )*ldx2; float* restrict psi11_r = y_r + (i )*1 + (j )*cs_y; float* restrict psi11_i = y_i + (i )*1 + (j )*cs_y; bli_cscal2jris ( *alpha_r, *alpha_i, *chi11_r, *chi11_i, *psi11_r, *psi11_i ); } } else /* if ( bli_is_noconj( conjx ) ) */ { for ( dim_t j = 0; j < n; ++j ) for ( dim_t i = 0; i < m; ++i ) { float* restrict chi11_r = x_r + (i )*incx2 + (j )*ldx2; float* restrict chi11_i = x_i + (i )*incx2 + (j )*ldx2; float* restrict psi11_r = y_r + (i )*1 + (j )*cs_y; float* restrict psi11_i = y_i + (i )*1 + (j )*cs_y; bli_cscal2ris ( *alpha_r, *alpha_i, *chi11_r, *chi11_i, *psi11_r, *psi11_i ); } } } static void bli_zscal2ris_mxn ( const conj_t conjx, const dim_t m, const dim_t n, dcomplex* restrict alpha, dcomplex* restrict x, const inc_t rs_x, const inc_t cs_x, dcomplex* restrict y, const inc_t rs_y, const inc_t cs_y, const inc_t is_y ) { double* restrict alpha_r = ( double* )alpha; \ double* restrict alpha_i = ( double* )alpha + 1; \ double* restrict x_r = ( double* )x; \ double* restrict x_i = ( double* )x + 1; \ double* restrict y_r = ( double* )y; \ double* restrict y_i = ( double* )y + is_y; \ const dim_t incx2 = 2*rs_x; \ const dim_t ldx2 = 2*cs_x; \ /* Treat the micro-panel as panel_dim x panel_len and column-stored (unit row stride). */ \ if ( bli_is_conj( conjx ) ) { for ( dim_t j = 0; j < n; ++j ) for ( dim_t i = 0; i < m; ++i ) { double* restrict chi11_r = x_r + (i )*incx2 + (j )*ldx2; double* restrict chi11_i = x_i + (i )*incx2 + (j )*ldx2; double* restrict psi11_r = y_r + (i )*1 + (j )*cs_y; double* restrict psi11_i = y_i + (i )*1 + (j )*cs_y; bli_zscal2jris ( *alpha_r, *alpha_i, *chi11_r, *chi11_i, *psi11_r, *psi11_i ); } } else /* if ( bli_is_noconj( conjx ) ) */ { for ( dim_t j = 0; j < n; ++j ) for ( dim_t i = 0; i < m; ++i ) { double* restrict chi11_r = x_r + (i )*incx2 + (j )*ldx2; double* restrict chi11_i = x_i + (i )*incx2 + (j )*ldx2; double* restrict psi11_r = y_r + (i )*1 + (j )*cs_y; double* restrict psi11_i = y_i + (i )*1 + (j )*cs_y; bli_zscal2ris ( *alpha_r, *alpha_i, *chi11_r, *chi11_i, *psi11_r, *psi11_i ); } } } #endif blis-0.6.1/frame/include/level0/ri/bli_scalcjris.h000066400000000000000000000051401360743507500217530ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SCALCJRIS_H #define BLIS_SCALCJRIS_H // scalcjris #define bli_sscalcjris( conj, ar, ai, xr, xi ) \ { \ bli_sscalris( (ar), (ai), (xr), (xi) ); \ } #define bli_dscalcjris( conj, ar, ai, xr, xi ) \ { \ bli_dscalris( (ar), (ai), (xr), (xi) ); \ } #define bli_cscalcjris( conj, ar, ai, xr, xi ) \ { \ if ( bli_is_conj( conj ) ) { bli_cscaljris( (ar), (ai), (xr), (xi) ); } \ else { bli_cscalris( (ar), (ai), (xr), (xi) ); } \ } #define bli_zscalcjris( conj, ar, ai, xr, xi ) \ { \ if ( bli_is_conj( conj ) ) { bli_zscaljris( (ar), (ai), (xr), (xi) ); } \ else { bli_zscalris( (ar), (ai), (xr), (xi) ); } \ } #define bli_iscalcjris( conj, ar, ai, xr, xi ) \ { \ bli_iscalris( (ar), (xi), (xr), (xi) ); \ } #define bli_scscalcjris( conj, ar, ai, xr, xi ) \ { \ bli_scscalris( (ar), (ai), (xr), (xi) ); \ } #define bli_dzscalcjris( conj, ar, ai, xr, xi ) \ { \ bli_dzscalris( (ar), (ai), (xr), (xi) ); \ } #endif blis-0.6.1/frame/include/level0/ri/bli_scaljris.h000066400000000000000000000042731360743507500216160ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SCALJRIS_H #define BLIS_SCALJRIS_H // scaljris #define bli_sscaljris( ar, ai, xr, xi ) bli_sscalris( (ar), -(ai), (xr), (xi) ) #define bli_dscaljris( ar, ai, xr, xi ) bli_dscalris( (ar), -(ai), (xr), (xi) ) #define bli_cscaljris( ar, ai, xr, xi ) bli_cscalris( (ar), -(ai), (xr), (xi) ) #define bli_zscaljris( ar, ai, xr, xi ) bli_zscalris( (ar), -(ai), (xr), (xi) ) #define bli_scscaljris( ar, ai, xr, xi ) bli_scscalris( (ar), -(ai), (xr), (xi) ) #define bli_dzscaljris( ar, ai, xr, xi ) bli_dzscalris( (ar), -(ai), (xr), (xi) ) #endif blis-0.6.1/frame/include/level0/ri/bli_scalris.h000066400000000000000000000045661360743507500214510ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SCALRIS_H #define BLIS_SCALRIS_H // scalris #define bli_sscalris( ar, ai, xr, xi ) \ { \ (xr) = (ar) * (xr); \ } #define bli_dscalris( ar, ai, xr, xi ) \ { \ (xr) = (ar) * (xr); \ } #define bli_cscalris( ar, ai, xr, xi ) \ { \ float yr = (ar) * (xr) - (ai) * (xi); \ float yi = (ai) * (xr) + (ar) * (xi); \ (xr) = yr; \ (xi) = yi; \ } #define bli_zscalris( ar, ai, xr, xi ) \ { \ double yr = (ar) * (xr) - (ai) * (xi); \ double yi = (ai) * (xr) + (ar) * (xi); \ (xr) = yr; \ (xi) = yi; \ } #define bli_scscalris( ar, ai, xr, xi ) \ { \ (xr) = (ar) * (xr); \ (xi) = (ar) * (xi); \ } #define bli_dzscalris( ar, ai, xr, xi ) \ { \ (xr) = (ar) * (xr); \ (xi) = (ar) * (xi); \ } #endif blis-0.6.1/frame/include/level0/ri/bli_scalris_mxn_uplo.h000066400000000000000000000063551360743507500233700ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SCALRIS_MXN_UPLO_H #define BLIS_SCALRIS_MXN_UPLO_H // scalris_mxn_u #define bli_cscalris_mxn_u( diagoff, m, n, ar, ai, xr, xi, rs_x, cs_x ) \ { \ dim_t _i, _j; \ \ for ( _j = 0; _j < n; ++_j ) \ for ( _i = 0; _i < m; ++_i ) \ { \ if ( (doff_t)_j - (doff_t)_i >= diagoff ) \ { \ bli_cscalris( *(ar), \ *(ai), \ *((xr) + _i*rs_x + _j*cs_x), \ *((xi) + _i*rs_x + _j*cs_x) ); \ } \ } \ } #define bli_zscalris_mxn_u( diagoff, m, n, ar, ai, xr, xi, rs_x, cs_x ) \ { \ dim_t _i, _j; \ \ for ( _j = 0; _j < n; ++_j ) \ for ( _i = 0; _i < m; ++_i ) \ { \ if ( (doff_t)_j - (doff_t)_i >= diagoff ) \ { \ bli_zscalris( *(ar), \ *(ai), \ *((xr) + _i*rs_x + _j*cs_x), \ *((xi) + _i*rs_x + _j*cs_x) ); \ } \ } \ } // scalris_mxn_l #define bli_cscalris_mxn_l( diagoff, m, n, ar, ai, xr, xi, rs_x, cs_x ) \ { \ dim_t _i, _j; \ \ for ( _j = 0; _j < n; ++_j ) \ for ( _i = 0; _i < m; ++_i ) \ { \ if ( (doff_t)_j - (doff_t)_i <= diagoff ) \ { \ bli_cscalris( *(ar), \ *(ai), \ *((xr) + _i*rs_x + _j*cs_x), \ *((xi) + _i*rs_x + _j*cs_x) ); \ } \ } \ } #define bli_zscalris_mxn_l( diagoff, m, n, ar, ai, xr, xi, rs_x, cs_x ) \ { \ dim_t _i, _j; \ \ for ( _j = 0; _j < n; ++_j ) \ for ( _i = 0; _i < m; ++_i ) \ { \ if ( (doff_t)_j - (doff_t)_i <= diagoff ) \ { \ bli_zscalris( *(ar), \ *(ai), \ *((xr) + _i*rs_x + _j*cs_x), \ *((xi) + _i*rs_x + _j*cs_x) ); \ } \ } \ } #endif blis-0.6.1/frame/include/level0/ri/bli_set0ris.h000066400000000000000000000037251360743507500213760ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SET0RIS_H #define BLIS_SET0RIS_H // set0ris #define bli_sset0ris( xr, xi ) bli_scopyris( 0.0F, 0.0F, xr, xi ) #define bli_dset0ris( xr, xi ) bli_dcopyris( 0.0 , 0.0 , xr, xi ) #define bli_cset0ris( xr, xi ) bli_ccopyris( 0.0F, 0.0F, xr, xi ) #define bli_zset0ris( xr, xi ) bli_zcopyris( 0.0 , 0.0 , xr, xi ) #endif blis-0.6.1/frame/include/level0/ri/bli_sqrt2ris.h000066400000000000000000000053731360743507500215770ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SQRT2RIS_H #define BLIS_SQRT2RIS_H // sqrt2ris #define bli_ssqrt2ris( xr, xi, ar, ai ) \ { \ (ar) = sqrtf( (xr) ); \ } #define bli_dsqrt2ris( xr, xi, ar, ai ) \ { \ (ar) = sqrt( (xr) ); \ } #define bli_csqrt2ris( xr, xi, ar, ai ) \ { \ float s = bli_fmaxabs( (xr), (xi) ); \ float mag; \ if ( s == 0.0F ) mag = 0.0F; \ else \ { \ mag = sqrtf( s ) * \ sqrtf( ( (xr) / s ) * (xr) + \ ( (xi) / s ) * (xi) ); \ } \ \ (ar) = sqrtf( ( mag + (xr) ) / 2.0F ); \ (ai) = sqrtf( ( mag - (xi) ) / 2.0F ); \ } #define bli_zsqrt2ris( xr, xi, ar, ai ) \ { \ double s = bli_fmaxabs( (xr), (xi) ); \ double mag; \ if ( s == 0.0 ) mag = 0.0; \ else \ { \ mag = sqrt( s ) * \ sqrt( ( (xr) / s ) * (xr) + \ ( (xi) / s ) * (xi) ); \ } \ \ (ar) = sqrt( ( mag + (xr) ) / 2.0 ); \ (ai) = sqrt( ( mag - (xi) ) / 2.0 ); \ } #define bli_scsqrt2ris( xr, xi, ar, ai ) \ { \ (ar) = sqrtf( (xr) ); \ (ai) = 0.0F; \ } #define bli_dzsqrt2ris( xr, xi, ar, ai ) \ { \ (ar) = sqrt( (xr) ); \ (ai) = 0.0; \ } #endif blis-0.6.1/frame/include/level0/ri/bli_subjris.h000066400000000000000000000040111360743507500214530ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SUBJRIS_H #define BLIS_SUBJRIS_H // subjris #define bli_ssubjris( ar, ai, xr, xi ) bli_ssubris( (ar), -(ai), (xr), (xi) ) #define bli_dsubjris( ar, ai, xr, xi ) bli_dsubris( (ar), -(ai), (xr), (xi) ) #define bli_csubjris( ar, ai, xr, xi ) bli_csubris( (ar), -(ai), (xr), (xi) ) #define bli_zsubjris( ar, ai, xr, xi ) bli_zsubris( (ar), -(ai), (xr), (xi) ) #endif blis-0.6.1/frame/include/level0/ri/bli_subris.h000066400000000000000000000040171360743507500213070ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SUBRIS_H #define BLIS_SUBRIS_H // subris #define bli_ssubris( ar, ai, xr, xi ) \ { \ (xr) = (xr) - (ar); \ } #define bli_dsubris( ar, ai, xr, xi ) \ { \ (xr) = (xr) - (ar); \ } #define bli_csubris( ar, ai, xr, xi ) \ { \ (xr) = (xr) - (ar); \ (xi) = (xi) - (ai); \ } #define bli_zsubris( ar, ai, xr, xi ) \ { \ (xr) = (xr) - (ar); \ (xi) = (xi) - (ai); \ } #endif blis-0.6.1/frame/include/level0/ri/bli_swapris.h000066400000000000000000000047431360743507500214760ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SWAPRIS_H #define BLIS_SWAPRIS_H // swapris #define bli_sswapris( ar, ai, br, bi ) \ { \ float tr, ti; \ \ bli_scopyris( (br), (bi), (tr), (ti) ); \ bli_scopyris( (ar), (ai), (br), (bi) ); \ bli_scopyris( (tr), (ti), (ar), (ai) ); \ } #define bli_dswapris( ar, ai, br, bi ) \ { \ double tr, ti; \ \ bli_dcopyris( (br), (bi), (tr), (ti) ); \ bli_dcopyris( (ar), (ai), (br), (bi) ); \ bli_dcopyris( (tr), (ti), (ar), (ai) ); \ } #define bli_cswapris( ar, ai, br, bi ) \ { \ scomplex tr, ti; \ \ bli_ccopyris( (br), (bi), (tr), (ti) ); \ bli_ccopyris( (ar), (ai), (br), (bi) ); \ bli_ccopyris( (tr), (ti), (ar), (ai) ); \ } #define bli_zswapris( ar, ai, br, bi ) \ { \ dcomplex tr, ti; \ \ bli_zcopyris( (br), (bi), (tr), (ti) ); \ bli_zcopyris( (ar), (ai), (br), (bi) ); \ bli_zcopyris( (tr), (ti), (ar), (ai) ); \ } #endif blis-0.6.1/frame/include/level0/ri/bli_xpbyjris.h000066400000000000000000000125231360743507500216530ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_XPBYJRIS_H #define BLIS_XPBYJRIS_H // xpbyjris #define bli_rxxpbyjris( xr, xi, br, bi, yr, yi ) \ { \ (yr) = (xr) + (br) * (yr); \ } #define bli_cxxpbyjris( xr, xi, br, bi, yr, yi ) \ { \ const __typeof__(yr) yt_r = (xr) + (br) * (yr) - (bi) * (yi); \ const __typeof__(yi) yt_i = -(xi) + (bi) * (yr) + (br) * (yi); \ (yr) = yt_r; \ (yi) = yt_i; \ } #define bli_crxpbyjris( xr, xi, br, bi, yr, yi ) \ { \ const __typeof__(yr) yt_r = (xr) + (br) * (yr); \ const __typeof__(yi) yt_i = -(xi) + (br) * (yi); \ (yr) = yt_r; \ (yi) = yt_i; \ } // Notes: // - The first char encodes the type of x. // - The second char encodes the type of b. // - The third char encodes the type of y. // -- (xby) = (??s) ------------------------------------------------------------ #define bli_sssxpbyjris bli_rxxpbyjris #define bli_dssxpbyjris bli_rxxpbyjris #define bli_cssxpbyjris bli_rxxpbyjris #define bli_zssxpbyjris bli_rxxpbyjris #define bli_sdsxpbyjris bli_rxxpbyjris #define bli_ddsxpbyjris bli_rxxpbyjris #define bli_cdsxpbyjris bli_rxxpbyjris #define bli_zdsxpbyjris bli_rxxpbyjris #define bli_scsxpbyjris bli_rxxpbyjris #define bli_dcsxpbyjris bli_rxxpbyjris #define bli_ccsxpbyjris bli_rxxpbyjris #define bli_zcsxpbyjris bli_rxxpbyjris #define bli_szsxpbyjris bli_rxxpbyjris #define bli_dzsxpbyjris bli_rxxpbyjris #define bli_czsxpbyjris bli_rxxpbyjris #define bli_zzsxpbyjris bli_rxxpbyjris // -- (xby) = (??d) ------------------------------------------------------------ #define bli_ssdxpbyjris bli_rxxpbyjris #define bli_dsdxpbyjris bli_rxxpbyjris #define bli_csdxpbyjris bli_rxxpbyjris #define bli_zsdxpbyjris bli_rxxpbyjris #define bli_sddxpbyjris bli_rxxpbyjris #define bli_dddxpbyjris bli_rxxpbyjris #define bli_cddxpbyjris bli_rxxpbyjris #define bli_zddxpbyjris bli_rxxpbyjris #define bli_scdxpbyjris bli_rxxpbyjris #define bli_dcdxpbyjris bli_rxxpbyjris #define bli_ccdxpbyjris bli_rxxpbyjris #define bli_zcdxpbyjris bli_rxxpbyjris #define bli_szdxpbyjris bli_rxxpbyjris #define bli_dzdxpbyjris bli_rxxpbyjris #define bli_czdxpbyjris bli_rxxpbyjris #define bli_zzdxpbyjris bli_rxxpbyjris // -- (xby) = (??c) ------------------------------------------------------------ #define bli_sscxpbyjris bli_rxxpbyjris #define bli_dscxpbyjris bli_rxxpbyjris #define bli_cscxpbyjris bli_crxpbyjris #define bli_zscxpbyjris bli_crxpbyjris #define bli_sdcxpbyjris bli_rxxpbyjris #define bli_ddcxpbyjris bli_rxxpbyjris #define bli_cdcxpbyjris bli_crxpbyjris #define bli_zdcxpbyjris bli_crxpbyjris #define bli_sccxpbyjris bli_cxxpbyjris #define bli_dccxpbyjris bli_cxxpbyjris #define bli_cccxpbyjris bli_cxxpbyjris #define bli_zccxpbyjris bli_cxxpbyjris #define bli_szcxpbyjris bli_cxxpbyjris #define bli_dzcxpbyjris bli_cxxpbyjris #define bli_czcxpbyjris bli_cxxpbyjris #define bli_zzcxpbyjris bli_cxxpbyjris // -- (xby) = (??z) ------------------------------------------------------------ #define bli_sszxpbyjris bli_rxxpbyjris #define bli_dszxpbyjris bli_rxxpbyjris #define bli_cszxpbyjris bli_crxpbyjris #define bli_zszxpbyjris bli_crxpbyjris #define bli_sdzxpbyjris bli_rxxpbyjris #define bli_ddzxpbyjris bli_rxxpbyjris #define bli_cdzxpbyjris bli_crxpbyjris #define bli_zdzxpbyjris bli_crxpbyjris #define bli_sczxpbyjris bli_cxxpbyjris #define bli_dczxpbyjris bli_cxxpbyjris #define bli_cczxpbyjris bli_cxxpbyjris #define bli_zczxpbyjris bli_cxxpbyjris #define bli_szzxpbyjris bli_cxxpbyjris #define bli_dzzxpbyjris bli_cxxpbyjris #define bli_czzxpbyjris bli_cxxpbyjris #define bli_zzzxpbyjris bli_cxxpbyjris #define bli_sxpbyjris bli_sssxpbyjris #define bli_dxpbyjris bli_dddxpbyjris #define bli_cxpbyjris bli_cccxpbyjris #define bli_zxpbyjris bli_zzzxpbyjris #endif blis-0.6.1/frame/include/level0/ri/bli_xpbyris.h000066400000000000000000000123011360743507500214730ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_XPBYRIS_H #define BLIS_XPBYRIS_H // xpbyris #define bli_rxxpbyris( xr, xi, br, bi, yr, yi ) \ { \ (yr) = (xr) + (br) * (yr); \ } #define bli_cxxpbyris( xr, xi, br, bi, yr, yi ) \ { \ const __typeof__(yr) yt_r = (xr) + (br) * (yr) - (bi) * (yi); \ const __typeof__(yi) yt_i = (xi) + (bi) * (yr) + (br) * (yi); \ (yr) = yt_r; \ (yi) = yt_i; \ } #define bli_crxpbyris( xr, xi, br, bi, yr, yi ) \ { \ const __typeof__(yr) yt_r = (xr) + (br) * (yr); \ const __typeof__(yi) yt_i = (xi) + (br) * (yi); \ (yr) = yt_r; \ (yi) = yt_i; \ } // Notes: // - The first char encodes the type of x. // - The second char encodes the type of b. // - The third char encodes the type of y. // -- (xby) = (??s) ------------------------------------------------------------ #define bli_sssxpbyris bli_rxxpbyris #define bli_dssxpbyris bli_rxxpbyris #define bli_cssxpbyris bli_rxxpbyris #define bli_zssxpbyris bli_rxxpbyris #define bli_sdsxpbyris bli_rxxpbyris #define bli_ddsxpbyris bli_rxxpbyris #define bli_cdsxpbyris bli_rxxpbyris #define bli_zdsxpbyris bli_rxxpbyris #define bli_scsxpbyris bli_rxxpbyris #define bli_dcsxpbyris bli_rxxpbyris #define bli_ccsxpbyris bli_rxxpbyris #define bli_zcsxpbyris bli_rxxpbyris #define bli_szsxpbyris bli_rxxpbyris #define bli_dzsxpbyris bli_rxxpbyris #define bli_czsxpbyris bli_rxxpbyris #define bli_zzsxpbyris bli_rxxpbyris // -- (xby) = (??d) ------------------------------------------------------------ #define bli_ssdxpbyris bli_rxxpbyris #define bli_dsdxpbyris bli_rxxpbyris #define bli_csdxpbyris bli_rxxpbyris #define bli_zsdxpbyris bli_rxxpbyris #define bli_sddxpbyris bli_rxxpbyris #define bli_dddxpbyris bli_rxxpbyris #define bli_cddxpbyris bli_rxxpbyris #define bli_zddxpbyris bli_rxxpbyris #define bli_scdxpbyris bli_rxxpbyris #define bli_dcdxpbyris bli_rxxpbyris #define bli_ccdxpbyris bli_rxxpbyris #define bli_zcdxpbyris bli_rxxpbyris #define bli_szdxpbyris bli_rxxpbyris #define bli_dzdxpbyris bli_rxxpbyris #define bli_czdxpbyris bli_rxxpbyris #define bli_zzdxpbyris bli_rxxpbyris // -- (xby) = (??c) ------------------------------------------------------------ #define bli_sscxpbyris bli_rxxpbyris #define bli_dscxpbyris bli_rxxpbyris #define bli_cscxpbyris bli_crxpbyris #define bli_zscxpbyris bli_crxpbyris #define bli_sdcxpbyris bli_rxxpbyris #define bli_ddcxpbyris bli_rxxpbyris #define bli_cdcxpbyris bli_crxpbyris #define bli_zdcxpbyris bli_crxpbyris #define bli_sccxpbyris bli_cxxpbyris #define bli_dccxpbyris bli_cxxpbyris #define bli_cccxpbyris bli_cxxpbyris #define bli_zccxpbyris bli_cxxpbyris #define bli_szcxpbyris bli_cxxpbyris #define bli_dzcxpbyris bli_cxxpbyris #define bli_czcxpbyris bli_cxxpbyris #define bli_zzcxpbyris bli_cxxpbyris // -- (xby) = (??z) ------------------------------------------------------------ #define bli_sszxpbyris bli_rxxpbyris #define bli_dszxpbyris bli_rxxpbyris #define bli_cszxpbyris bli_crxpbyris #define bli_zszxpbyris bli_crxpbyris #define bli_sdzxpbyris bli_rxxpbyris #define bli_ddzxpbyris bli_rxxpbyris #define bli_cdzxpbyris bli_crxpbyris #define bli_zdzxpbyris bli_crxpbyris #define bli_sczxpbyris bli_cxxpbyris #define bli_dczxpbyris bli_cxxpbyris #define bli_cczxpbyris bli_cxxpbyris #define bli_zczxpbyris bli_cxxpbyris #define bli_szzxpbyris bli_cxxpbyris #define bli_dzzxpbyris bli_cxxpbyris #define bli_czzxpbyris bli_cxxpbyris #define bli_zzzxpbyris bli_cxxpbyris #define bli_sxpbyris bli_sssxpbyris #define bli_dxpbyris bli_dddxpbyris #define bli_cxpbyris bli_cccxpbyris #define bli_zxpbyris bli_zzzxpbyris #endif blis-0.6.1/frame/include/level0/ri3/000077500000000000000000000000001360743507500170625ustar00rootroot00000000000000blis-0.6.1/frame/include/level0/ri3/bli_copyjri3s.h000066400000000000000000000041171360743507500220110ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_COPYJRI3S_H #define BLIS_COPYJRI3S_H // copyjri3s #define bli_scopyjri3s( ar, ai, br, bi, bri ) bli_scopyri3s( (ar), -(ai), (br), (bi), (bri) ) #define bli_dcopyjri3s( ar, ai, br, bi, bri ) bli_dcopyri3s( (ar), -(ai), (br), (bi), (bri) ) #define bli_ccopyjri3s( ar, ai, br, bi, bri ) bli_ccopyri3s( (ar), -(ai), (br), (bi), (bri) ) #define bli_zcopyjri3s( ar, ai, br, bi, bri ) bli_zcopyri3s( (ar), -(ai), (br), (bi), (bri) ) #endif blis-0.6.1/frame/include/level0/ri3/bli_copyri3s.h000066400000000000000000000040731360743507500216400ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_COPYRI3S_H #define BLIS_COPYRI3S_H // copyri3s #define bli_scopyri3s( ar, ai, br, bi, bri ) \ { \ (br) = (ar); \ } #define bli_dcopyri3s( ar, ai, br, bi, bri ) \ { \ (br) = (ar); \ } #define bli_ccopyri3s( ar, ai, br, bi, bri ) \ { \ (br) = (ar); \ (bi) = (ai); \ (bri) = (ar) + (ai); \ } #define bli_zcopyri3s( ar, ai, br, bi, bri ) \ { \ (br) = (ar); \ (bi) = (ai); \ (bri) = (ar) + (ai); \ } #endif blis-0.6.1/frame/include/level0/ri3/bli_scal2jri3s.h000066400000000000000000000047311360743507500220450ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SCAL2JRI3S_H #define BLIS_SCAL2JRI3S_H // scal2jri3s #define bli_sscal2jri3s( ar, ai, xr, xi, yr, yi, yri ) \ { \ (yr) = (ar) * (xr); \ } #define bli_dscal2jri3s( ar, ai, xr, xi, yr, yi, yri ) \ { \ (yr) = (ar) * (xr); \ } #define bli_cscal2jri3s( ar, ai, xr, xi, yr, yi, yri ) \ { \ (yr) = (ar) * (xr) + (ai) * (xi); \ (yi) = (ai) * (xr) - (ar) * (xi); \ (yri) = (yr) + (yi); \ } #define bli_zscal2jri3s( ar, ai, xr, xi, yr, yi, yri ) \ { \ (yr) = (ar) * (xr) + (ai) * (xi); \ (yi) = (ai) * (xr) - (ar) * (xi); \ (yri) = (yr) + (yi); \ } #define bli_scscal2jri3s( ar, ai, xr, xi, yr, yi, yri ) \ { \ (yr) = (ar) * (xr); \ (yi) = (ar) * -(xi); \ (yri) = (yr) + (yi); \ } #define bli_dzscal2jri3s( ar, ai, xr, xi, yr, yi, yri ) \ { \ (yr) = (ar) * (xr); \ (yi) = (ar) * -(xi); \ (yri) = (yr) + (yi); \ } #endif blis-0.6.1/frame/include/level0/ri3/bli_scal2ri3s.h000066400000000000000000000047141360743507500216740ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SCAL2RI3S_H #define BLIS_SCAL2RI3S_H // scal2ri3s #define bli_sscal2ri3s( ar, ai, xr, xi, yr, yi, yri ) \ { \ (yr) = (ar) * (xr); \ } #define bli_dscal2ri3s( ar, ai, xr, xi, yr, yi, yri ) \ { \ (yr) = (ar) * (xr); \ } #define bli_cscal2ri3s( ar, ai, xr, xi, yr, yi, yri ) \ { \ (yr) = (ar) * (xr) - (ai) * (xi); \ (yi) = (ai) * (xr) + (ar) * (xi); \ (yri) = (yr) + (yi); \ } #define bli_zscal2ri3s( ar, ai, xr, xi, yr, yi, yri ) \ { \ (yr) = (ar) * (xr) - (ai) * (xi); \ (yi) = (ai) * (xr) + (ar) * (xi); \ (yri) = (yr) + (yi); \ } #define bli_scscal2ri3s( ar, ai, xr, xi, yr, yi, yri ) \ { \ (yr) = (ar) * (xr); \ (yi) = (ar) * (xi); \ (yri) = (yr) + (yi); \ } #define bli_dzscal2ri3s( ar, ai, xr, xi, yr, yi, yri ) \ { \ (yr) = (ar) * (xr); \ (yi) = (ar) * (xi); \ (yri) = (yr) + (yi); \ } #endif blis-0.6.1/frame/include/level0/ri3/bli_scal2ri3s_mxn.h000066400000000000000000000132121360743507500225470ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SCAL2RI3S_MXN_H #define BLIS_SCAL2RI3S_MXN_H // scal2ri3s_mxn static void bli_cscal2ri3s_mxn ( const conj_t conjx, const dim_t m, const dim_t n, scomplex* restrict alpha, scomplex* restrict x, const inc_t rs_x, const inc_t cs_x, scomplex* restrict y, const inc_t rs_y, const inc_t cs_y, const inc_t is_y ) { float* restrict alpha_r = ( float* )alpha; \ float* restrict alpha_i = ( float* )alpha + 1; \ float* restrict x_r = ( float* )x; \ float* restrict x_i = ( float* )x + 1; \ float* restrict y_r = ( float* )y; \ float* restrict y_i = ( float* )y + is_y; \ float* restrict y_rpi = ( float* )y + 2*is_y; \ const dim_t incx2 = 2*rs_x; \ const dim_t ldx2 = 2*cs_x; \ /* Treat the micro-panel as panel_dim x panel_len and column-stored (unit row stride). */ \ if ( bli_is_conj( conjx ) ) { for ( dim_t j = 0; j < n; ++j ) for ( dim_t i = 0; i < m; ++i ) { float* restrict chi11_r = x_r + (i )*incx2 + (j )*ldx2; float* restrict chi11_i = x_i + (i )*incx2 + (j )*ldx2; float* restrict psi11_r = y_r + (i )*1 + (j )*cs_y; float* restrict psi11_i = y_i + (i )*1 + (j )*cs_y; float* restrict psi11_rpi = y_rpi + (i )*1 + (j )*cs_y; bli_cscal2jri3s ( *alpha_r, *alpha_i, *chi11_r, *chi11_i, *psi11_r, *psi11_i, *psi11_rpi ); } } else /* if ( bli_is_noconj( conjx ) ) */ { for ( dim_t j = 0; j < n; ++j ) for ( dim_t i = 0; i < m; ++i ) { float* restrict chi11_r = x_r + (i )*incx2 + (j )*ldx2; float* restrict chi11_i = x_i + (i )*incx2 + (j )*ldx2; float* restrict psi11_r = y_r + (i )*1 + (j )*cs_y; float* restrict psi11_i = y_i + (i )*1 + (j )*cs_y; float* restrict psi11_rpi = y_rpi + (i )*1 + (j )*cs_y; bli_cscal2ri3s ( *alpha_r, *alpha_i, *chi11_r, *chi11_i, *psi11_r, *psi11_i, *psi11_rpi ); } } } static void bli_zscal2ri3s_mxn ( const conj_t conjx, const dim_t m, const dim_t n, dcomplex* restrict alpha, dcomplex* restrict x, const inc_t rs_x, const inc_t cs_x, dcomplex* restrict y, const inc_t rs_y, const inc_t cs_y, const inc_t is_y ) { double* restrict alpha_r = ( double* )alpha; \ double* restrict alpha_i = ( double* )alpha + 1; \ double* restrict x_r = ( double* )x; \ double* restrict x_i = ( double* )x + 1; \ double* restrict y_r = ( double* )y; \ double* restrict y_i = ( double* )y + is_y; \ double* restrict y_rpi = ( double* )y + 2*is_y; \ const dim_t incx2 = 2*rs_x; \ const dim_t ldx2 = 2*cs_x; \ /* Treat the micro-panel as panel_dim x panel_len and column-stored (unit row stride). */ \ if ( bli_is_conj( conjx ) ) { for ( dim_t j = 0; j < n; ++j ) for ( dim_t i = 0; i < m; ++i ) { double* restrict chi11_r = x_r + (i )*incx2 + (j )*ldx2; double* restrict chi11_i = x_i + (i )*incx2 + (j )*ldx2; double* restrict psi11_r = y_r + (i )*1 + (j )*cs_y; double* restrict psi11_i = y_i + (i )*1 + (j )*cs_y; double* restrict psi11_rpi = y_rpi + (i )*1 + (j )*cs_y; bli_zscal2jri3s ( *alpha_r, *alpha_i, *chi11_r, *chi11_i, *psi11_r, *psi11_i, *psi11_rpi ); } } else /* if ( bli_is_noconj( conjx ) ) */ { for ( dim_t j = 0; j < n; ++j ) for ( dim_t i = 0; i < m; ++i ) { double* restrict chi11_r = x_r + (i )*incx2 + (j )*ldx2; double* restrict chi11_i = x_i + (i )*incx2 + (j )*ldx2; double* restrict psi11_r = y_r + (i )*1 + (j )*cs_y; double* restrict psi11_i = y_i + (i )*1 + (j )*cs_y; double* restrict psi11_rpi = y_rpi + (i )*1 + (j )*cs_y; bli_zscal2ri3s ( *alpha_r, *alpha_i, *chi11_r, *chi11_i, *psi11_r, *psi11_i, *psi11_rpi ); } } } #endif blis-0.6.1/frame/include/level0/rih/000077500000000000000000000000001360743507500171475ustar00rootroot00000000000000blis-0.6.1/frame/include/level0/rih/bli_scal2rihs_mxn.h000066400000000000000000000152521360743507500227270ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SCAL2RIHS_MXN_H #define BLIS_SCAL2RIHS_MXN_H // scal2rihs_mxn static void bli_cscal2rihs_mxn ( const pack_t schema, const conj_t conjx, const dim_t m, const dim_t n, scomplex* restrict alpha, scomplex* restrict x, const inc_t rs_x, const inc_t cs_x, scomplex* restrict y, const inc_t rs_y, const inc_t cs_y ) { scomplex* restrict x_r = x; float* restrict y_r = ( float* )y; if ( bli_is_ro_packed( schema ) ) { if ( bli_is_conj( conjx ) ) { for ( dim_t j = 0; j < n; ++j ) for ( dim_t i = 0; i < m; ++i ) { scomplex* restrict chi11 = x_r + (i )*rs_x + (j )*cs_x; float* restrict psi11_r = y_r + (i )*rs_y + (j )*cs_y; bli_cscal2jros ( *alpha, *chi11, *psi11_r ); } } else /* if ( bli_is_noconj( conjx ) ) */ { for ( dim_t j = 0; j < n; ++j ) for ( dim_t i = 0; i < m; ++i ) { scomplex* restrict chi11 = x_r + (i )*rs_x + (j )*cs_x; float* restrict psi11_r = y_r + (i )*rs_y + (j )*cs_y; bli_cscal2ros ( *alpha, *chi11, *psi11_r ); } } } else if ( bli_is_io_packed( schema ) ) { if ( bli_is_conj( conjx ) ) { for ( dim_t j = 0; j < n; ++j ) for ( dim_t i = 0; i < m; ++i ) { scomplex* restrict chi11 = x_r + (i )*rs_x + (j )*cs_x; float* restrict psi11_r = y_r + (i )*rs_y + (j )*cs_y; bli_cscal2jios ( *alpha, *chi11, *psi11_r ); } } else /* if ( bli_is_noconj( conjx ) ) */ { for ( dim_t j = 0; j < n; ++j ) for ( dim_t i = 0; i < m; ++i ) { scomplex* restrict chi11 = x_r + (i )*rs_x + (j )*cs_x; float* restrict psi11_r = y_r + (i )*rs_y + (j )*cs_y; bli_cscal2ios ( *alpha, *chi11, *psi11_r ); } } } else /* if ( bli_is_rpi_packed( schema ) ) */ { if ( bli_is_conj( conjx ) ) { for ( dim_t j = 0; j < n; ++j ) for ( dim_t i = 0; i < m; ++i ) { scomplex* restrict chi11 = x_r + (i )*rs_x + (j )*cs_x; float* restrict psi11_r = y_r + (i )*rs_y + (j )*cs_y; bli_cscal2jrpis ( *alpha, *chi11, *psi11_r ); } } else /* if ( bli_is_noconj( conjx ) ) */ { for ( dim_t j = 0; j < n; ++j ) for ( dim_t i = 0; i < m; ++i ) { scomplex* restrict chi11 = x_r + (i )*rs_x + (j )*cs_x; float* restrict psi11_r = y_r + (i )*rs_y + (j )*cs_y; bli_cscal2rpis ( *alpha, *chi11, *psi11_r ); } } } } static void bli_zscal2rihs_mxn ( const pack_t schema, const conj_t conjx, const dim_t m, const dim_t n, dcomplex* restrict alpha, dcomplex* restrict x, const inc_t rs_x, const inc_t cs_x, dcomplex* restrict y, const inc_t rs_y, const inc_t cs_y ) { dcomplex* restrict x_r = x; double* restrict y_r = ( double* )y; if ( bli_is_ro_packed( schema ) ) { if ( bli_is_conj( conjx ) ) { for ( dim_t j = 0; j < n; ++j ) for ( dim_t i = 0; i < m; ++i ) { dcomplex* restrict chi11 = x_r + (i )*rs_x + (j )*cs_x; double* restrict psi11_r = y_r + (i )*rs_y + (j )*cs_y; bli_zscal2jros ( *alpha, *chi11, *psi11_r ); } } else /* if ( bli_is_noconj( conjx ) ) */ { for ( dim_t j = 0; j < n; ++j ) for ( dim_t i = 0; i < m; ++i ) { dcomplex* restrict chi11 = x_r + (i )*rs_x + (j )*cs_x; double* restrict psi11_r = y_r + (i )*rs_y + (j )*cs_y; bli_zscal2ros ( *alpha, *chi11, *psi11_r ); } } } else if ( bli_is_io_packed( schema ) ) { if ( bli_is_conj( conjx ) ) { for ( dim_t j = 0; j < n; ++j ) for ( dim_t i = 0; i < m; ++i ) { dcomplex* restrict chi11 = x_r + (i )*rs_x + (j )*cs_x; double* restrict psi11_r = y_r + (i )*rs_y + (j )*cs_y; bli_zscal2jios ( *alpha, *chi11, *psi11_r ); } } else /* if ( bli_is_noconj( conjx ) ) */ { for ( dim_t j = 0; j < n; ++j ) for ( dim_t i = 0; i < m; ++i ) { dcomplex* restrict chi11 = x_r + (i )*rs_x + (j )*cs_x; double* restrict psi11_r = y_r + (i )*rs_y + (j )*cs_y; bli_zscal2ios ( *alpha, *chi11, *psi11_r ); } } } else /* if ( bli_is_rpi_packed( schema ) ) */ { if ( bli_is_conj( conjx ) ) { for ( dim_t j = 0; j < n; ++j ) for ( dim_t i = 0; i < m; ++i ) { dcomplex* restrict chi11 = x_r + (i )*rs_x + (j )*cs_x; double* restrict psi11_r = y_r + (i )*rs_y + (j )*cs_y; bli_zscal2jrpis ( *alpha, *chi11, *psi11_r ); } } else /* if ( bli_is_noconj( conjx ) ) */ { for ( dim_t j = 0; j < n; ++j ) for ( dim_t i = 0; i < m; ++i ) { dcomplex* restrict chi11 = x_r + (i )*rs_x + (j )*cs_x; double* restrict psi11_r = y_r + (i )*rs_y + (j )*cs_y; bli_zscal2rpis ( *alpha, *chi11, *psi11_r ); } } } } #endif blis-0.6.1/frame/include/level0/rih/bli_scal2rihs_mxn_diag.h000066400000000000000000000067241360743507500237170ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SCAL2RIHS_MXN_DIAG_H #define BLIS_SCAL2RIHS_MXN_DIAG_H // scal2rihs_mxn_diag #define bli_cscscal2rihs_mxn_diag( schema, m, n, a, x, rs_x, cs_x, y_r, rs_y, cs_y ) \ { \ dim_t min_m_n = bli_min( m, n ); \ dim_t _i; \ \ /* Handle ro, io, and rpi separately. */ \ if ( bli_is_ro_packed( schema ) ) \ { \ for ( _i = 0; _i < min_m_n; ++_i ) \ { \ bli_scscal2ros( *(x + _i*rs_x + _i*cs_x), \ *(a), \ *(y_r + _i*rs_y + _i*cs_y) ); \ } \ } \ else if ( bli_is_io_packed( schema ) ) \ { \ for ( _i = 0; _i < min_m_n; ++_i ) \ { \ bli_scscal2ios( *(x + _i*rs_x + _i*cs_x), \ *(a), \ *(y_r + _i*rs_y + _i*cs_y) ); \ } \ } \ else /* if ( bli_is_rpi_packed( schema ) ) */ \ { \ for ( _i = 0; _i < min_m_n; ++_i ) \ { \ bli_scscal2rpis( *(x + _i*rs_x + _i*cs_x), \ *(a), \ *(y_r + _i*rs_y + _i*cs_y) ); \ } \ } \ } #define bli_zdzscal2rihs_mxn_diag( schema, m, n, a, x, rs_x, cs_x, y_r, rs_y, cs_y ) \ { \ dim_t min_m_n = bli_min( m, n ); \ dim_t _i; \ \ /* Handle ro, io, and rpi separately. */ \ if ( bli_is_ro_packed( schema ) ) \ { \ for ( _i = 0; _i < min_m_n; ++_i ) \ { \ bli_dzscal2ros( *(x + _i*rs_x + _i*cs_x), \ *(a), \ *(y_r + _i*rs_y + _i*cs_y) ); \ } \ } \ else if ( bli_is_io_packed( schema ) ) \ { \ for ( _i = 0; _i < min_m_n; ++_i ) \ { \ bli_dzscal2ios( *(x + _i*rs_x + _i*cs_x), \ *(a), \ *(y_r + _i*rs_y + _i*cs_y) ); \ } \ } \ else /* if ( bli_is_rpi_packed( schema ) ) */ \ { \ for ( _i = 0; _i < min_m_n; ++_i ) \ { \ bli_dzscal2rpis( *(x + _i*rs_x + _i*cs_x), \ *(a), \ *(y_r + _i*rs_y + _i*cs_y) ); \ } \ } \ } #endif blis-0.6.1/frame/include/level0/rih/bli_scal2rihs_mxn_uplo.h000066400000000000000000000226141360743507500237660ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SCAL2RIHS_MXN_UPLO_H #define BLIS_SCAL2RIHS_MXN_UPLO_H // scal2rihs_mxn_uplo #define bli_cscal2rihs_mxn_uplo( schema, uplo, conjx, m, a, x, rs_x, cs_x, y_r, rs_y, cs_y ) \ { \ dim_t _i, _j; \ \ /* Handle ro, io, and rpi separately. */ \ if ( bli_is_ro_packed( schema ) ) \ { \ if ( bli_is_lower( uplo ) ) \ { \ if ( bli_is_conj( conjx ) ) \ { \ for ( _j = 0; _j < m; ++_j ) \ for ( _i = _j; _i < m; ++_i ) \ { \ bli_cscal2jros( *(a), \ *(x + _i*rs_x + _j*cs_x), \ *(y_r + _i*rs_y + _j*cs_y) ); \ } \ } \ else /* if ( bli_is_noconj( conjx ) ) */ \ { \ for ( _j = 0; _j < m; ++_j ) \ for ( _i = _j; _i < m; ++_i ) \ { \ bli_cscal2ros( *(a), \ *(x + _i*rs_x + _j*cs_x), \ *(y_r + _i*rs_y + _j*cs_y) ); \ } \ } \ } \ else /* if ( bli_is_upper( uplo ) ) */ \ { \ if ( bli_is_conj( conjx ) ) \ { \ for ( _j = 0; _j < m; ++_j ) \ for ( _i = 0; _i < _j + 1; ++_i ) \ { \ bli_cscal2jros( *(a), \ *(x + _i*rs_x + _j*cs_x), \ *(y_r + _i*rs_y + _j*cs_y) ); \ } \ } \ else /* if ( bli_is_noconj( conjx ) ) */ \ { \ for ( _j = 0; _j < m; ++_j ) \ for ( _i = 0; _i < _j + 1; ++_i ) \ { \ bli_cscal2ros( *(a), \ *(x + _i*rs_x + _j*cs_x), \ *(y_r + _i*rs_y + _j*cs_y) ); \ } \ } \ } \ } \ else if ( bli_is_io_packed( schema ) ) \ { \ if ( bli_is_lower( uplo ) ) \ { \ if ( bli_is_conj( conjx ) ) \ { \ for ( _j = 0; _j < m; ++_j ) \ for ( _i = _j; _i < m; ++_i ) \ { \ bli_cscal2jios( *(a), \ *(x + _i*rs_x + _j*cs_x), \ *(y_r + _i*rs_y + _j*cs_y) ); \ } \ } \ else /* if ( bli_is_noconj( conjx ) ) */ \ { \ for ( _j = 0; _j < m; ++_j ) \ for ( _i = _j; _i < m; ++_i ) \ { \ bli_cscal2ios( *(a), \ *(x + _i*rs_x + _j*cs_x), \ *(y_r + _i*rs_y + _j*cs_y) ); \ } \ } \ } \ else /* if ( bli_is_upper( uplo ) ) */ \ { \ if ( bli_is_conj( conjx ) ) \ { \ for ( _j = 0; _j < m; ++_j ) \ for ( _i = 0; _i < _j + 1; ++_i ) \ { \ bli_cscal2jios( *(a), \ *(x + _i*rs_x + _j*cs_x), \ *(y_r + _i*rs_y + _j*cs_y) ); \ } \ } \ else /* if ( bli_is_noconj( conjx ) ) */ \ { \ for ( _j = 0; _j < m; ++_j ) \ for ( _i = 0; _i < _j + 1; ++_i ) \ { \ bli_cscal2ios( *(a), \ *(x + _i*rs_x + _j*cs_x), \ *(y_r + _i*rs_y + _j*cs_y) ); \ } \ } \ } \ } \ else /* if ( bli_is_rpi_packed( schema ) ) */ \ { \ if ( bli_is_lower( uplo ) ) \ { \ if ( bli_is_conj( conjx ) ) \ { \ for ( _j = 0; _j < m; ++_j ) \ for ( _i = _j; _i < m; ++_i ) \ { \ bli_cscal2jrpis( *(a), \ *(x + _i*rs_x + _j*cs_x), \ *(y_r + _i*rs_y + _j*cs_y) ); \ } \ } \ else /* if ( bli_is_noconj( conjx ) ) */ \ { \ for ( _j = 0; _j < m; ++_j ) \ for ( _i = _j; _i < m; ++_i ) \ { \ bli_cscal2rpis( *(a), \ *(x + _i*rs_x + _j*cs_x), \ *(y_r + _i*rs_y + _j*cs_y) ); \ } \ } \ } \ else /* if ( bli_is_upper( uplo ) ) */ \ { \ if ( bli_is_conj( conjx ) ) \ { \ for ( _j = 0; _j < m; ++_j ) \ for ( _i = 0; _i < _j + 1; ++_i ) \ { \ bli_cscal2jrpis( *(a), \ *(x + _i*rs_x + _j*cs_x), \ *(y_r + _i*rs_y + _j*cs_y) ); \ } \ } \ else /* if ( bli_is_noconj( conjx ) ) */ \ { \ for ( _j = 0; _j < m; ++_j ) \ for ( _i = 0; _i < _j + 1; ++_i ) \ { \ bli_cscal2rpis( *(a), \ *(x + _i*rs_x + _j*cs_x), \ *(y_r + _i*rs_y + _j*cs_y) ); \ } \ } \ } \ } \ } #define bli_zscal2rihs_mxn_uplo( schema, uplo, conjx, m, a, x, rs_x, cs_x, y_r, rs_y, cs_y ) \ { \ dim_t _i, _j; \ \ /* Handle ro, io, and rpi separately. */ \ if ( bli_is_ro_packed( schema ) ) \ { \ if ( bli_is_lower( uplo ) ) \ { \ if ( bli_is_conj( conjx ) ) \ { \ for ( _j = 0; _j < m; ++_j ) \ for ( _i = _j; _i < m; ++_i ) \ { \ bli_zscal2jros( *(a), \ *(x + _i*rs_x + _j*cs_x), \ *(y_r + _i*rs_y + _j*cs_y) ); \ } \ } \ else /* if ( bli_is_noconj( conjx ) ) */ \ { \ for ( _j = 0; _j < m; ++_j ) \ for ( _i = _j; _i < m; ++_i ) \ { \ bli_zscal2ros( *(a), \ *(x + _i*rs_x + _j*cs_x), \ *(y_r + _i*rs_y + _j*cs_y) ); \ } \ } \ } \ else /* if ( bli_is_upper( uplo ) ) */ \ { \ if ( bli_is_conj( conjx ) ) \ { \ for ( _j = 0; _j < m; ++_j ) \ for ( _i = 0; _i < _j + 1; ++_i ) \ { \ bli_zscal2jros( *(a), \ *(x + _i*rs_x + _j*cs_x), \ *(y_r + _i*rs_y + _j*cs_y) ); \ } \ } \ else /* if ( bli_is_noconj( conjx ) ) */ \ { \ for ( _j = 0; _j < m; ++_j ) \ for ( _i = 0; _i < _j + 1; ++_i ) \ { \ bli_zscal2ros( *(a), \ *(x + _i*rs_x + _j*cs_x), \ *(y_r + _i*rs_y + _j*cs_y) ); \ } \ } \ } \ } \ else if ( bli_is_io_packed( schema ) ) \ { \ if ( bli_is_lower( uplo ) ) \ { \ if ( bli_is_conj( conjx ) ) \ { \ for ( _j = 0; _j < m; ++_j ) \ for ( _i = _j; _i < m; ++_i ) \ { \ bli_zscal2jios( *(a), \ *(x + _i*rs_x + _j*cs_x), \ *(y_r + _i*rs_y + _j*cs_y) ); \ } \ } \ else /* if ( bli_is_noconj( conjx ) ) */ \ { \ for ( _j = 0; _j < m; ++_j ) \ for ( _i = _j; _i < m; ++_i ) \ { \ bli_zscal2ios( *(a), \ *(x + _i*rs_x + _j*cs_x), \ *(y_r + _i*rs_y + _j*cs_y) ); \ } \ } \ } \ else /* if ( bli_is_upper( uplo ) ) */ \ { \ if ( bli_is_conj( conjx ) ) \ { \ for ( _j = 0; _j < m; ++_j ) \ for ( _i = 0; _i < _j + 1; ++_i ) \ { \ bli_zscal2jios( *(a), \ *(x + _i*rs_x + _j*cs_x), \ *(y_r + _i*rs_y + _j*cs_y) ); \ } \ } \ else /* if ( bli_is_noconj( conjx ) ) */ \ { \ for ( _j = 0; _j < m; ++_j ) \ for ( _i = 0; _i < _j + 1; ++_i ) \ { \ bli_zscal2ios( *(a), \ *(x + _i*rs_x + _j*cs_x), \ *(y_r + _i*rs_y + _j*cs_y) ); \ } \ } \ } \ } \ else /* if ( bli_is_rpi_packed( schema ) ) */ \ { \ if ( bli_is_lower( uplo ) ) \ { \ if ( bli_is_conj( conjx ) ) \ { \ for ( _j = 0; _j < m; ++_j ) \ for ( _i = _j; _i < m; ++_i ) \ { \ bli_zscal2jrpis( *(a), \ *(x + _i*rs_x + _j*cs_x), \ *(y_r + _i*rs_y + _j*cs_y) ); \ } \ } \ else /* if ( bli_is_noconj( conjx ) ) */ \ { \ for ( _j = 0; _j < m; ++_j ) \ for ( _i = _j; _i < m; ++_i ) \ { \ bli_zscal2rpis( *(a), \ *(x + _i*rs_x + _j*cs_x), \ *(y_r + _i*rs_y + _j*cs_y) ); \ } \ } \ } \ else /* if ( bli_is_upper( uplo ) ) */ \ { \ if ( bli_is_conj( conjx ) ) \ { \ for ( _j = 0; _j < m; ++_j ) \ for ( _i = 0; _i < _j + 1; ++_i ) \ { \ bli_zscal2jrpis( *(a), \ *(x + _i*rs_x + _j*cs_x), \ *(y_r + _i*rs_y + _j*cs_y) ); \ } \ } \ else /* if ( bli_is_noconj( conjx ) ) */ \ { \ for ( _j = 0; _j < m; ++_j ) \ for ( _i = 0; _i < _j + 1; ++_i ) \ { \ bli_zscal2rpis( *(a), \ *(x + _i*rs_x + _j*cs_x), \ *(y_r + _i*rs_y + _j*cs_y) ); \ } \ } \ } \ } \ } #endif blis-0.6.1/frame/include/level0/rih/bli_setrihs_mxn_diag.h000066400000000000000000000065221360743507500235020ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SETRIHS_MXN_DIAG_H #define BLIS_SETRIHS_MXN_DIAG_H // setrihs_mxn_diag #define bli_csetrihs_mxn_diag( schema, m, n, a, y_r, rs_y, cs_y ) \ { \ const float a_r = bli_zreal( *a ); \ const float a_i = bli_zimag( *a ); \ dim_t min_m_n = bli_min( m, n ); \ dim_t _i; \ \ /* Handle ro, io, and rpi separately. */ \ if ( bli_is_ro_packed( schema ) ) \ { \ for ( _i = 0; _i < min_m_n; ++_i ) \ { \ bli_scopys( (a_r), \ *(y_r + _i*rs_y + _i*cs_y) ); \ } \ } \ else if ( bli_is_io_packed( schema ) ) \ { \ for ( _i = 0; _i < min_m_n; ++_i ) \ { \ bli_scopys( (a_i), \ *(y_r + _i*rs_y + _i*cs_y) ); \ } \ } \ else /* if ( bli_is_rpi_packed( schema ) ) */ \ { \ for ( _i = 0; _i < min_m_n; ++_i ) \ { \ bli_sadd3s( (a_r), \ (a_i), \ *(y_r + _i*rs_y + _i*cs_y) ); \ } \ } \ } #define bli_zsetrihs_mxn_diag( schema, m, n, a, y_r, rs_y, cs_y ) \ { \ const double a_r = bli_zreal( *a ); \ const double a_i = bli_zimag( *a ); \ dim_t min_m_n = bli_min( m, n ); \ dim_t _i; \ \ /* Handle ro, io, and rpi separately. */ \ if ( bli_is_ro_packed( schema ) ) \ { \ for ( _i = 0; _i < min_m_n; ++_i ) \ { \ bli_dcopys( (a_r), \ *(y_r + _i*rs_y + _i*cs_y) ); \ } \ } \ else if ( bli_is_io_packed( schema ) ) \ { \ for ( _i = 0; _i < min_m_n; ++_i ) \ { \ bli_dcopys( (a_i), \ *(y_r + _i*rs_y + _i*cs_y) ); \ } \ } \ else /* if ( bli_is_rpi_packed( schema ) ) */ \ { \ for ( _i = 0; _i < min_m_n; ++_i ) \ { \ bli_dadd3s( (a_r), \ (a_i), \ *(y_r + _i*rs_y + _i*cs_y) ); \ } \ } \ } #endif blis-0.6.1/frame/include/level0/ro/000077500000000000000000000000001360743507500170055ustar00rootroot00000000000000blis-0.6.1/frame/include/level0/ro/bli_scal2jros.h000066400000000000000000000036641360743507500217170ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SCAL2JROS_H #define BLIS_SCAL2JROS_H // scal2jros #define bli_cscal2jros( a, x, yr ) \ { \ (yr) = bli_creal(a) * bli_creal(x) + bli_cimag(a) * bli_cimag(x); \ } #define bli_zscal2jros( a, x, yr ) \ { \ (yr) = bli_zreal(a) * bli_zreal(x) + bli_zimag(a) * bli_zimag(x); \ } #endif blis-0.6.1/frame/include/level0/ro/bli_scal2ros.h000066400000000000000000000041261360743507500215370ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SCAL2ROS_H #define BLIS_SCAL2ROS_H // scal2ros #define bli_cscal2ros( a, x, yr ) \ { \ (yr) = bli_creal(a) * bli_creal(x) - bli_cimag(a) * bli_cimag(x); \ } #define bli_zscal2ros( a, x, yr ) \ { \ (yr) = bli_zreal(a) * bli_zreal(x) - bli_zimag(a) * bli_zimag(x); \ } #define bli_scscal2ros( a, x, yr ) \ { \ (yr) = bli_creal(a) * bli_creal(x); \ } #define bli_dzscal2ros( a, x, yr ) \ { \ (yr) = bli_zreal(a) * bli_zreal(x); \ } #endif blis-0.6.1/frame/include/level0/rpi/000077500000000000000000000000001360743507500171575ustar00rootroot00000000000000blis-0.6.1/frame/include/level0/rpi/bli_scal2jrpis.h000066400000000000000000000040311360743507500222300ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyrpiight notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyrpiight notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SCAL2JRPIS_H #define BLIS_SCAL2JRPIS_H // scal2jrpis #define bli_cscal2jrpis( a, x, yrpi ) \ { \ (yrpi) = (bli_creal(a)+bli_cimag(a)) * bli_creal(x) + \ (bli_cimag(a)-bli_creal(a)) * bli_cimag(x); \ } #define bli_zscal2jrpis( a, x, yrpi ) \ { \ (yrpi) = (bli_zreal(a)+bli_zimag(a)) * bli_zreal(x) + \ (bli_zimag(a)-bli_zreal(a)) * bli_zimag(x); \ } #endif blis-0.6.1/frame/include/level0/rpi/bli_scal2rpis.h000066400000000000000000000044251360743507500220650ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SCAL2RPIS_H #define BLIS_SCAL2RPIS_H // scal2rpis #define bli_cscal2rpis( a, x, yrpi ) \ { \ (yrpi) = (bli_creal(a)+bli_cimag(a)) * bli_creal(x) + \ (bli_creal(a)-bli_cimag(a)) * bli_cimag(x); \ } #define bli_zscal2rpis( a, x, yrpi ) \ { \ (yrpi) = (bli_zreal(a)+bli_zimag(a)) * bli_zreal(x) + \ (bli_zreal(a)-bli_zimag(a)) * bli_zimag(x); \ } #define bli_scscal2rpis( a, x, yrpi ) \ { \ (yrpi) = bli_creal(a) * bli_creal(x) + \ bli_creal(a) * bli_cimag(x); \ } #define bli_dzscal2rpis( a, x, yrpi ) \ { \ (yrpi) = bli_zreal(a) * bli_zreal(x) + \ bli_zreal(a) * bli_zimag(x); \ } #endif blis-0.6.1/frame/ind/000077500000000000000000000000001360743507500143255ustar00rootroot00000000000000blis-0.6.1/frame/ind/bli_ind.c000066400000000000000000000145351360743507500161010ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" static char* bli_ind_impl_str[BLIS_NUM_IND_METHODS] = { /* 3mh */ "3mh", /* 3m1 */ "3m1", /* 4mh */ "4mh", /* 4m1b */ "4m1b", /* 4m1a */ "4m1a", /* 1m */ "1m", /* nat */ "native", }; // ----------------------------------------------------------------------------- void bli_ind_init( void ) { // Enable the default induced method (1m) if one or both complex domain // gemm micro-kernels are unoptimized in the native context. // NOTE: Instead of calling bli_gks_query_cntx(), we call // bli_gks_query_cntx_noinit() to avoid the call to bli_init_once(). cntx_t* cntx = bli_gks_query_cntx_noinit(); bool_t c_is_ref = bli_gks_cntx_l3_nat_ukr_is_ref ( BLIS_SCOMPLEX, BLIS_GEMM_UKR, cntx ); bool_t z_is_ref = bli_gks_cntx_l3_nat_ukr_is_ref ( BLIS_DCOMPLEX, BLIS_GEMM_UKR, cntx ); if ( c_is_ref ) bli_ind_enable_dt( BLIS_1M, BLIS_SCOMPLEX ); if ( z_is_ref ) bli_ind_enable_dt( BLIS_1M, BLIS_DCOMPLEX ); } void bli_ind_finalize( void ) { } // ----------------------------------------------------------------------------- void bli_ind_enable( ind_t method ) { bli_ind_enable_dt( method, BLIS_SCOMPLEX ); bli_ind_enable_dt( method, BLIS_DCOMPLEX ); } void bli_ind_disable( ind_t method ) { bli_ind_disable_dt( method, BLIS_SCOMPLEX ); bli_ind_disable_dt( method, BLIS_DCOMPLEX ); } void bli_ind_disable_all( void ) { bli_ind_disable_all_dt( BLIS_SCOMPLEX ); bli_ind_disable_all_dt( BLIS_DCOMPLEX ); } // ----------------------------------------------------------------------------- void bli_ind_enable_dt( ind_t method, num_t dt ) { if ( !bli_is_complex( dt ) ) return; bli_l3_ind_set_enable_dt( method, dt, TRUE ); } void bli_ind_disable_dt( ind_t method, num_t dt ) { if ( !bli_is_complex( dt ) ) return; bli_l3_ind_set_enable_dt( method, dt, FALSE ); } void bli_ind_disable_all_dt( num_t dt ) { ind_t im; for ( im = 0; im < BLIS_NUM_IND_METHODS; ++im ) { // Never disable native execution. if ( im != BLIS_NAT ) bli_ind_disable_dt( im, dt ); } } // ----------------------------------------------------------------------------- void bli_ind_oper_enable_only( opid_t oper, ind_t method, num_t dt ) { if ( !bli_is_complex( dt ) ) return; if ( bli_opid_is_level3( oper ) ) { bli_l3_ind_oper_enable_only( oper, method, dt ); } else { // Other operations are not implemented, so requests to enable // them for any given induced method are currently no-ops. ; } } // ----------------------------------------------------------------------------- bool_t bli_ind_oper_is_impl( opid_t oper, ind_t method ) { bool_t is_impl = FALSE; if ( bli_opid_is_level3( oper ) ) { // Look up whether its func_t pointer in the table is NULL. is_impl = ( bli_l3_ind_oper_get_func( oper, method ) != NULL ); } else { // All other operations should be reported as not implemented, // unless the requested check was for BLIS_NAT, in which case // all operations are implemented. if ( method == BLIS_NAT ) is_impl = TRUE; else is_impl = FALSE; } return is_impl; } #if 0 bool_t bli_ind_oper_has_avail( opid_t oper, num_t dt ) { ind_t method = bli_ind_oper_find_avail( oper, dt ); if ( method == BLIS_NAT ) return FALSE; else return TRUE; } #endif void_fp bli_ind_oper_get_avail( opid_t oper, num_t dt ) { void_fp func_p; if ( bli_opid_is_level3( oper ) ) { ind_t method = bli_ind_oper_find_avail( oper, dt ); func_p = bli_l3_ind_oper_get_func( oper, method ); } else { // Currently, any operation that is not level-3 does not // have induced method implementations. (This should actually // assign the pointer to be the native front-end, but for // now there are no calls to bli_ind_oper_get_avail() in the // context of level-2 operations. func_p = NULL; } return func_p; } ind_t bli_ind_oper_find_avail( opid_t oper, num_t dt ) { ind_t method; if ( bli_opid_is_level3( oper ) ) { method = bli_l3_ind_oper_find_avail( oper, dt ); } else { // Currently, any operation that is not level-3 is guaranteed // to be native. method = BLIS_NAT; } return method; } char* bli_ind_oper_get_avail_impl_string( opid_t oper, num_t dt ) { ind_t method = bli_ind_oper_find_avail( oper, dt ); return bli_ind_get_impl_string( method ); } // ----------------------------------------------------------------------------- char* bli_ind_get_impl_string( ind_t method ) { return bli_ind_impl_str[ method ]; } num_t bli_ind_map_cdt_to_index( num_t dt ) { // A non-complex datatype should never be passed in. if ( !bli_is_complex( dt ) ) bli_abort(); // Map the complex datatype to a zero-based index. if ( bli_is_scomplex( dt ) ) return 0; else /* if ( bli_is_dcomplex( dt ) ) */ return 1; } blis-0.6.1/frame/ind/bli_ind.h000066400000000000000000000056001360743507500160770ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_IND_H #define BLIS_IND_H // level-3 induced method management #include "bli_l3_ind.h" // level-3 object APIs #include "bli_l3_ind_oapi.h" // level-3 typed APIs #include "bli_l3_ind_tapi.h" // level-3 cntx initialization #include "bli_cntx_ind_stage.h" void bli_ind_init( void ); void bli_ind_finalize( void ); BLIS_EXPORT_BLIS void bli_ind_enable( ind_t method ); BLIS_EXPORT_BLIS void bli_ind_disable( ind_t method ); BLIS_EXPORT_BLIS void bli_ind_disable_all( void ); BLIS_EXPORT_BLIS void bli_ind_enable_dt( ind_t method, num_t dt ); BLIS_EXPORT_BLIS void bli_ind_disable_dt( ind_t method, num_t dt ); BLIS_EXPORT_BLIS void bli_ind_disable_all_dt( num_t dt ); BLIS_EXPORT_BLIS void bli_ind_oper_enable_only( opid_t oper, ind_t method, num_t dt ); BLIS_EXPORT_BLIS bool_t bli_ind_oper_is_impl( opid_t oper, ind_t method ); //bool_t bli_ind_oper_has_avail( opid_t oper, num_t dt ); BLIS_EXPORT_BLIS void_fp bli_ind_oper_get_avail( opid_t oper, num_t dt ); BLIS_EXPORT_BLIS ind_t bli_ind_oper_find_avail( opid_t oper, num_t dt ); BLIS_EXPORT_BLIS char* bli_ind_oper_get_avail_impl_string( opid_t oper, num_t dt ); char* bli_ind_get_impl_string( ind_t method ); num_t bli_ind_map_cdt_to_index( num_t dt ); #endif blis-0.6.1/frame/ind/bli_l3_ind.c000066400000000000000000000225551360743507500165000ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" static void_fp bli_l3_ind_oper_fp[BLIS_NUM_IND_METHODS][BLIS_NUM_LEVEL3_OPS] = { /* gemm hemm herk her2k symm syrk, syr2k trmm3 trmm trsm */ /* 3mh */ { bli_gemm3mh, bli_hemm3mh, bli_herk3mh, bli_her2k3mh, bli_symm3mh, bli_syrk3mh, bli_syr2k3mh, bli_trmm33mh, NULL, NULL }, /* 3m1 */ { bli_gemm3m1, bli_hemm3m1, bli_herk3m1, bli_her2k3m1, bli_symm3m1, bli_syrk3m1, bli_syr2k3m1, bli_trmm33m1, bli_trmm3m1, bli_trsm3m1 }, /* 4mh */ { bli_gemm4mh, bli_hemm4mh, bli_herk4mh, bli_her2k4mh, bli_symm4mh, bli_syrk4mh, bli_syr2k4mh, bli_trmm34mh, NULL, NULL }, /* 4mb */ { bli_gemm4mb, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL }, /* 4m1 */ { bli_gemm4m1, bli_hemm4m1, bli_herk4m1, bli_her2k4m1, bli_symm4m1, bli_syrk4m1, bli_syr2k4m1, bli_trmm34m1, bli_trmm4m1, bli_trsm4m1 }, /* 1m */ { bli_gemm1m, bli_hemm1m, bli_herk1m, bli_her2k1m, bli_symm1m, bli_syrk1m, bli_syr2k1m, bli_trmm31m, bli_trmm1m, bli_trsm1m }, /* nat */ { bli_gemmnat, bli_hemmnat, bli_herknat, bli_her2knat, bli_symmnat, bli_syrknat, bli_syr2knat, bli_trmm3nat, bli_trmmnat, bli_trsmnat }, }; // // NOTE: "2" is used instead of BLIS_NUM_FP_TYPES/2. // // BLIS provides APIs to modify this state during runtime. So, one application thread // can modify the state, before another starts the corresponding BLIS operation. // This is solved by making the induced method status array local to threads. static BLIS_THREAD_LOCAL bool_t bli_l3_ind_oper_st[BLIS_NUM_IND_METHODS][BLIS_NUM_LEVEL3_OPS][2] = { /* gemm hemm herk her2k symm syrk, syr2k trmm3 trmm trsm */ /* c z */ /* 3mh */ { {FALSE,FALSE}, {FALSE,FALSE}, {FALSE,FALSE}, {FALSE,FALSE}, {FALSE,FALSE}, {FALSE,FALSE}, {FALSE,FALSE}, {FALSE,FALSE}, {FALSE,FALSE}, {FALSE,FALSE} }, /* 3m1 */ { {FALSE,FALSE}, {FALSE,FALSE}, {FALSE,FALSE}, {FALSE,FALSE}, {FALSE,FALSE}, {FALSE,FALSE}, {FALSE,FALSE}, {FALSE,FALSE}, {FALSE,FALSE}, {FALSE,FALSE} }, /* 4mh */ { {FALSE,FALSE}, {FALSE,FALSE}, {FALSE,FALSE}, {FALSE,FALSE}, {FALSE,FALSE}, {FALSE,FALSE}, {FALSE,FALSE}, {FALSE,FALSE}, {FALSE,FALSE}, {FALSE,FALSE} }, /* 4mb */ { {FALSE,FALSE}, {FALSE,FALSE}, {FALSE,FALSE}, {FALSE,FALSE}, {FALSE,FALSE}, {FALSE,FALSE}, {FALSE,FALSE}, {FALSE,FALSE}, {FALSE,FALSE}, {FALSE,FALSE} }, /* 4m1 */ { {FALSE,FALSE}, {FALSE,FALSE}, {FALSE,FALSE}, {FALSE,FALSE}, {FALSE,FALSE}, {FALSE,FALSE}, {FALSE,FALSE}, {FALSE,FALSE}, {FALSE,FALSE}, {FALSE,FALSE} }, /* 1m */ { {FALSE,FALSE}, {FALSE,FALSE}, {FALSE,FALSE}, {FALSE,FALSE}, {FALSE,FALSE}, {FALSE,FALSE}, {FALSE,FALSE}, {FALSE,FALSE}, {FALSE,FALSE}, {FALSE,FALSE} }, /* nat */ { {TRUE,TRUE}, {TRUE,TRUE}, {TRUE,TRUE}, {TRUE,TRUE}, {TRUE,TRUE}, {TRUE,TRUE}, {TRUE,TRUE}, {TRUE,TRUE}, {TRUE,TRUE}, {TRUE,TRUE} }, }; // ----------------------------------------------------------------------------- #undef GENFUNC #define GENFUNC( opname, optype ) \ \ void_fp PASTEMAC(opname,ind_get_avail)( num_t dt ) \ { \ return bli_ind_oper_get_avail( optype, dt ); \ } /* bool_t PASTEMAC(opname,ind_has_avail)( num_t dt ) { return bli_ind_oper_has_avail( optype, dt ); } */ GENFUNC( gemm, BLIS_GEMM ) GENFUNC( hemm, BLIS_HEMM ) GENFUNC( herk, BLIS_HERK ) GENFUNC( her2k, BLIS_HER2K ) GENFUNC( symm, BLIS_SYMM ) GENFUNC( syrk, BLIS_SYRK ) GENFUNC( syr2k, BLIS_SYR2K ) GENFUNC( trmm3, BLIS_TRMM3 ) GENFUNC( trmm, BLIS_TRMM ) GENFUNC( trsm, BLIS_TRSM ) // ----------------------------------------------------------------------------- #if 0 bool_t bli_l3_ind_oper_is_avail( opid_t oper, ind_t method, num_t dt ) { void_fp func; bool_t stat; // If the datatype is real, it is never available. if ( !bli_is_complex( dt ) ) return FALSE; func = bli_l3_ind_oper_get_func( oper, method ); stat = bli_l3_ind_oper_get_enable( oper, method, dt ); return ( func != NULL && stat == TRUE ); } #endif // ----------------------------------------------------------------------------- ind_t bli_l3_ind_oper_find_avail( opid_t oper, num_t dt ) { bli_init_once(); ind_t im; // If the datatype is real, return native execution. if ( !bli_is_complex( dt ) ) return BLIS_NAT; // If the operation is not level-3, return native execution. if ( !bli_opid_is_level3( oper ) ) return BLIS_NAT; // Iterate over all induced methods and search for the first one // that is available (ie: both implemented and enabled) for the // current operation and datatype. for ( im = 0; im < BLIS_NUM_IND_METHODS; ++im ) { void_fp func = bli_l3_ind_oper_get_func( oper, im ); bool_t stat = bli_l3_ind_oper_get_enable( oper, im, dt ); if ( func != NULL && stat == TRUE ) return im; } // This return statement should never execute since the native index // should be found even if all induced methods are unavailable. We // include it simply to avoid a compiler warning. return BLIS_NAT; } // ----------------------------------------------------------------------------- void bli_l3_ind_set_enable_dt( ind_t method, num_t dt, bool_t status ) { opid_t iop; if ( !bli_is_complex( dt ) ) return; // Iterate over all level-3 operation ids. for ( iop = 0; iop < BLIS_NUM_LEVEL3_OPS; ++iop ) { bli_l3_ind_oper_set_enable( iop, method, dt, status ); } } // ----------------------------------------------------------------------------- void bli_l3_ind_oper_enable_only( opid_t oper, ind_t method, num_t dt ) { ind_t im; if ( !bli_is_complex( dt ) ) return; if ( !bli_opid_is_level3( oper ) ) return; for ( im = 0; im < BLIS_NUM_IND_METHODS; ++im ) { // Native execution should always stay enabled. if ( im == BLIS_NAT ) continue; // When we come upon the requested method, enable it for the given // operation and datatype. Otherwise, disable it. if ( im == method ) bli_l3_ind_oper_set_enable( oper, im, dt, TRUE ); else bli_l3_ind_oper_set_enable( oper, im, dt, FALSE ); } } void bli_l3_ind_oper_set_enable_all( opid_t oper, num_t dt, bool_t status ) { ind_t im; if ( !bli_is_complex( dt ) ) return; if ( !bli_opid_is_level3( oper ) ) return; for ( im = 0; im < BLIS_NUM_IND_METHODS; ++im ) { // Native execution should always stay enabled. if ( im != BLIS_NAT ) bli_l3_ind_oper_set_enable( oper, im, dt, status ); } } // ----------------------------------------------------------------------------- // A mutex to allow synchronous access to the bli_l3_ind_oper_st array. static bli_pthread_mutex_t oper_st_mutex = BLIS_PTHREAD_MUTEX_INITIALIZER; void bli_l3_ind_oper_set_enable( opid_t oper, ind_t method, num_t dt, bool_t status ) { num_t idt; if ( !bli_is_complex( dt ) ) return; if ( !bli_opid_is_level3( oper ) ) return; // Disallow changing status of native execution. if ( method == BLIS_NAT ) return; idt = bli_ind_map_cdt_to_index( dt ); // Acquire the mutex protecting bli_l3_ind_oper_st. bli_pthread_mutex_lock( &oper_st_mutex ); // BEGIN CRITICAL SECTION { bli_l3_ind_oper_st[ method ][ oper ][ idt ] = status; } // END CRITICAL SECTION // Release the mutex protecting bli_l3_ind_oper_st. bli_pthread_mutex_unlock( &oper_st_mutex ); } bool_t bli_l3_ind_oper_get_enable( opid_t oper, ind_t method, num_t dt ) { num_t idt = bli_ind_map_cdt_to_index( dt ); bool_t r_val; { r_val = bli_l3_ind_oper_st[ method ][ oper ][ idt ]; } return r_val; } // ----------------------------------------------------------------------------- void_fp bli_l3_ind_oper_get_func( opid_t oper, ind_t method ) { return bli_l3_ind_oper_fp[ method ][ oper ]; } blis-0.6.1/frame/ind/bli_l3_ind.h000066400000000000000000000053721360743507500165030ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_L3_IND_H #define BLIS_L3_IND_H // ----------------------------------------------------------------------------- #undef GENPROT #define GENPROT( opname ) \ \ void_fp PASTEMAC(opname,ind_get_avail)( num_t dt ); /*bool_t PASTEMAC(opname,ind_has_avail)( num_t dt ); */ GENPROT( gemm ) GENPROT( hemm ) GENPROT( herk ) GENPROT( her2k ) GENPROT( symm ) GENPROT( syrk ) GENPROT( syr2k ) GENPROT( trmm3 ) GENPROT( trmm ) GENPROT( trsm ) // ----------------------------------------------------------------------------- //bool_t bli_l3_ind_oper_is_avail( opid_t oper, ind_t method, num_t dt ); ind_t bli_l3_ind_oper_find_avail( opid_t oper, num_t dt ); void bli_l3_ind_set_enable_dt( ind_t method, num_t dt, bool_t status ); void bli_l3_ind_oper_enable_only( opid_t oper, ind_t method, num_t dt ); void bli_l3_ind_oper_set_enable_all( opid_t oper, num_t dt, bool_t status ); void bli_l3_ind_oper_set_enable( opid_t oper, ind_t method, num_t dt, bool_t status ); bool_t bli_l3_ind_oper_get_enable( opid_t oper, ind_t method, num_t dt ); void_fp bli_l3_ind_oper_get_func( opid_t oper, ind_t method ); #endif blis-0.6.1/frame/ind/cntx/000077500000000000000000000000001360743507500153015ustar00rootroot00000000000000blis-0.6.1/frame/ind/cntx/bli_cntx_ind_stage.c000066400000000000000000000115751360743507500212750ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" typedef void (*cntx_stage_ft)( dim_t stage, cntx_t* cntx ); static void_fp bli_cntx_ind_stage_fp[BLIS_NUM_IND_METHODS] = { /* 3mh */ bli_cntx_3mh_stage, /* 3m1 */ bli_cntx_3m1_stage, /* 4mh */ bli_cntx_4mh_stage, /* 4mb */ bli_cntx_4mb_stage, /* 4m1 */ bli_cntx_4m1_stage, /* 1m */ bli_cntx_1m_stage, /* nat */ bli_cntx_nat_stage }; // ----------------------------------------------------------------------------- // Execute the context initialization/finalization function associated // with a given induced method. void bli_cntx_ind_stage( ind_t method, dim_t stage, cntx_t* cntx ) { cntx_stage_ft func = bli_cntx_ind_stage_fp[ method ]; func( stage, cntx ); } // ----------------------------------------------------------------------------- // These functions modify a context, if needed, for the particular "stage" of // the induced method execution. Some induced methods do not make use of this // feature. NOTE: ANY INDUCED METHOD THAT HAS A NON-EMPTY _stage() FUNCTION // IS NOT THREAT-SAFE FOR APPLICATION-LEVEL THREADING. // ----------------------------------------------------------------------------- void bli_cntx_3mh_stage( dim_t stage, cntx_t* cntx ) { // Set the pack_t schemas as a function of the stage of execution. if ( stage == 0 ) { bli_cntx_set_schema_a_block( BLIS_PACKED_ROW_PANELS_RO, cntx ); bli_cntx_set_schema_b_panel( BLIS_PACKED_COL_PANELS_RO, cntx ); } else if ( stage == 1 ) { bli_cntx_set_schema_a_block( BLIS_PACKED_ROW_PANELS_IO, cntx ); bli_cntx_set_schema_b_panel( BLIS_PACKED_COL_PANELS_IO, cntx ); } else // if ( stage == 2 ) { bli_cntx_set_schema_a_block( BLIS_PACKED_ROW_PANELS_RPI, cntx ); bli_cntx_set_schema_b_panel( BLIS_PACKED_COL_PANELS_RPI, cntx ); } } // ----------------------------------------------------------------------------- void bli_cntx_3m1_stage( dim_t stage, cntx_t* cntx ) { } // ----------------------------------------------------------------------------- void bli_cntx_4mh_stage( dim_t stage, cntx_t* cntx ) { // Set the pack_t schemas as a function of the stage of execution. if ( stage == 0 ) { bli_cntx_set_schema_a_block( BLIS_PACKED_ROW_PANELS_RO, cntx ); bli_cntx_set_schema_b_panel( BLIS_PACKED_COL_PANELS_RO, cntx ); } else if ( stage == 1 ) { bli_cntx_set_schema_a_block( BLIS_PACKED_ROW_PANELS_IO, cntx ); bli_cntx_set_schema_b_panel( BLIS_PACKED_COL_PANELS_IO, cntx ); } else if ( stage == 2 ) { bli_cntx_set_schema_a_block( BLIS_PACKED_ROW_PANELS_RO, cntx ); bli_cntx_set_schema_b_panel( BLIS_PACKED_COL_PANELS_IO, cntx ); } else // if ( stage == 3 ) { bli_cntx_set_schema_a_block( BLIS_PACKED_ROW_PANELS_IO, cntx ); bli_cntx_set_schema_b_panel( BLIS_PACKED_COL_PANELS_RO, cntx ); } } // ----------------------------------------------------------------------------- void bli_cntx_4mb_stage( dim_t stage, cntx_t* cntx ) { } // ----------------------------------------------------------------------------- void bli_cntx_4m1_stage( dim_t stage, cntx_t* cntx ) { } // ----------------------------------------------------------------------------- void bli_cntx_1m_stage( dim_t stage, cntx_t* cntx ) { } // ----------------------------------------------------------------------------- void bli_cntx_nat_stage( dim_t stage, cntx_t* cntx ) { } blis-0.6.1/frame/ind/cntx/bli_cntx_ind_stage.h000066400000000000000000000041101360743507500212650ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void bli_cntx_ind_stage( ind_t method, dim_t stage, cntx_t* cntx ); void bli_cntx_3mh_stage( dim_t stage, cntx_t* cntx ); void bli_cntx_3m1_stage( dim_t stage, cntx_t* cntx ); void bli_cntx_4mh_stage( dim_t stage, cntx_t* cntx ); void bli_cntx_4mb_stage( dim_t stage, cntx_t* cntx ); void bli_cntx_4m1_stage( dim_t stage, cntx_t* cntx ); void bli_cntx_1m_stage( dim_t stage, cntx_t* cntx ); void bli_cntx_nat_stage( dim_t stage, cntx_t* cntx ); blis-0.6.1/frame/ind/oapi/000077500000000000000000000000001360743507500152555ustar00rootroot00000000000000blis-0.6.1/frame/ind/oapi/bli_l3_3m4m1m_oapi.c000066400000000000000000000336261360743507500207050ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // -- gemm/her2k/syr2k --------------------------------------------------------- #undef GENFRONT #define GENFRONT( opname, cname, imeth, nstage ) \ \ void PASTEMAC(opname,imeth) \ ( \ obj_t* alpha, \ obj_t* a, \ obj_t* b, \ obj_t* beta, \ obj_t* c, \ cntx_t* cntx, \ rntm_t* rntm \ ) \ { \ bli_init_once(); \ \ ind_t ind = PASTEMAC0(imeth); \ num_t dt = bli_obj_dt( c ); \ obj_t* beta_use = beta; \ \ dim_t i; \ \ /* If the objects are in the real domain, execute the native implementation. */ \ if ( bli_obj_is_real( c ) ) \ { \ PASTEMAC(opname,nat)( alpha, a, b, beta, c, cntx, rntm ); \ return; \ } \ \ /* A temporary hack to easily specify the 1m algorithm (block-panel or panel-block). */ \ /* if ( PASTEMAC(opname,imeth) == bli_gemm1m ) \ { \ bli_gemm1mbp( alpha, a, b, beta, c ); \ return; \ } \ else if ( PASTEMAC(opname,imeth) == bli_gemm3m1 ) \ { \ bli_gemm1mpb( alpha, a, b, beta, c ); \ return; \ } \ */ \ \ /* Query a context for the current induced method. This context is managed and cached by the gks and should not be freed by the caller. Note that the datatype argument is needed because it will be passed in when bli_gks_query_ind_cntx() eventually calls the induced method's _cntx_init() function. */ \ cntx = bli_gks_query_ind_cntx( ind, dt ); \ \ /* 3mh and 4mh change the context for each stage, and so in order to remain thread-safe, we must make a local copy of the context for those induced methods. */ \ cntx_t cntx_l; \ if ( ind == BLIS_3MH || ind == BLIS_4MH ) { cntx_l = *cntx; cntx = &cntx_l; } \ \ /* Initialize a local runtime with global settings if necessary. Note that in the case that a runtime is passed in, we make a local copy. */ \ rntm_t rntm_l; \ if ( rntm == NULL ) { bli_rntm_init_from_global( &rntm_l ); rntm = &rntm_l; } \ else { rntm_l = *rntm; rntm = &rntm_l; } \ \ /* Some induced methods execute in multiple "stages". */ \ for ( i = 0; i < nstage; ++i ) \ { \ /* Prepare the context for the ith stage of computation. */ \ bli_cntx_ind_stage( ind, i, cntx ); \ \ /* For multi-stage methods, use BLIS_ONE as beta after the first stage. */ \ if ( i > 0 ) beta_use = &BLIS_ONE; \ \ /* Invoke the operation's front end and request the default control tree. */ \ PASTEMAC(opname,_front)( alpha, a, b, beta_use, c, cntx, rntm, NULL ); \ } \ } // gemm GENFRONT( gemm, gemm, 3mh, 3 ) GENFRONT( gemm, gemm, 3m1, 1 ) GENFRONT( gemm, gemm, 4mh, 4 ) GENFRONT( gemm, gemm, 4mb, 1 ) GENFRONT( gemm, gemm, 4m1, 1 ) GENFRONT( gemm, gemm, 1m, 1 ) // her2k GENFRONT( her2k, gemm, 3mh, 3 ) GENFRONT( her2k, gemm, 3m1, 1 ) GENFRONT( her2k, gemm, 4mh, 4 ) //GENFRONT( her2k, gemm, 4mb, 1 ) // Not implemented. GENFRONT( her2k, gemm, 4m1, 1 ) GENFRONT( her2k, gemm, 1m, 1 ) // syr2k GENFRONT( syr2k, gemm, 3mh, 3 ) GENFRONT( syr2k, gemm, 3m1, 1 ) GENFRONT( syr2k, gemm, 4mh, 4 ) //GENFRONT( syr2k, gemm, 4mb, 1 ) // Not implemented. GENFRONT( syr2k, gemm, 4m1, 1 ) GENFRONT( syr2k, gemm, 1m, 1 ) // -- hemm/symm/trmm3 ---------------------------------------------------------- #undef GENFRONT #define GENFRONT( opname, cname, imeth, nstage ) \ \ void PASTEMAC(opname,imeth) \ ( \ side_t side, \ obj_t* alpha, \ obj_t* a, \ obj_t* b, \ obj_t* beta, \ obj_t* c, \ cntx_t* cntx, \ rntm_t* rntm \ ) \ { \ bli_init_once(); \ \ ind_t ind = PASTEMAC0(imeth); \ num_t dt = bli_obj_dt( c ); \ obj_t* beta_use = beta; \ \ dim_t i; \ \ /* If the objects are in the real domain, execute the native implementation. */ \ if ( bli_obj_is_real( c ) ) \ { \ PASTEMAC(opname,nat)( side, alpha, a, b, beta, c, cntx, rntm ); \ return; \ } \ \ /* Query a context for the current induced method. This context is managed and cached by the gks and should not be freed by the caller. Note that the datatype argument is needed because it will be passed in when bli_gks_query_ind_cntx() eventually calls the induced method's _cntx_init() function. */ \ cntx = bli_gks_query_ind_cntx( ind, dt ); \ \ /* 3mh and 4mh change the context for each stage, and so in order to remain thread-safe, we must make a local copy of the context for those induced methods. */ \ cntx_t cntx_l; \ if ( ind == BLIS_3MH || ind == BLIS_4MH ) { cntx_l = *cntx; cntx = &cntx_l; } \ \ /* Initialize a local runtime with global settings if necessary. Note that in the case that a runtime is passed in, we make a local copy. */ \ rntm_t rntm_l; \ if ( rntm == NULL ) { bli_rntm_init_from_global( &rntm_l ); rntm = &rntm_l; } \ else { rntm_l = *rntm; rntm = &rntm_l; } \ \ /* Some induced methods execute in multiple "stages". */ \ for ( i = 0; i < nstage; ++i ) \ { \ /* Prepare the context for the ith stage of computation. */ \ bli_cntx_ind_stage( ind, i, cntx ); \ \ /* For multi-stage methods, use BLIS_ONE as beta after the first stage. */ \ if ( i > 0 ) beta_use = &BLIS_ONE; \ \ /* Invoke the operation's front end and request the default control tree. */ \ PASTEMAC(opname,_front)( side, alpha, a, b, beta_use, c, cntx, rntm, NULL ); \ } \ } // hemm GENFRONT( hemm, gemm, 3mh, 3 ) GENFRONT( hemm, gemm, 3m1, 1 ) GENFRONT( hemm, gemm, 4mh, 4 ) //GENFRONT( hemm, gemm, 4mb, 1 ) // Not implemented. GENFRONT( hemm, gemm, 4m1, 1 ) GENFRONT( hemm, gemm, 1m, 1 ) // symm GENFRONT( symm, gemm, 3mh, 3 ) GENFRONT( symm, gemm, 3m1, 1 ) GENFRONT( symm, gemm, 4mh, 4 ) //GENFRONT( symm, gemm, 4mb, 1 ) // Not implemented. GENFRONT( symm, gemm, 4m1, 1 ) GENFRONT( symm, gemm, 1m, 1 ) // trmm3 GENFRONT( trmm3, gemm, 3mh, 3 ) GENFRONT( trmm3, gemm, 3m1, 1 ) GENFRONT( trmm3, gemm, 4mh, 4 ) //GENFRONT( trmm3, gemm, 4mb, 1 ) // Not implemented. GENFRONT( trmm3, gemm, 4m1, 1 ) GENFRONT( trmm3, gemm, 1m, 1 ) // -- herk/syrk ---------------------------------------------------------------- #undef GENFRONT #define GENFRONT( opname, cname, imeth, nstage ) \ \ void PASTEMAC(opname,imeth) \ ( \ obj_t* alpha, \ obj_t* a, \ obj_t* beta, \ obj_t* c, \ cntx_t* cntx, \ rntm_t* rntm \ ) \ { \ bli_init_once(); \ \ ind_t ind = PASTEMAC0(imeth); \ num_t dt = bli_obj_dt( c ); \ obj_t* beta_use = beta; \ \ dim_t i; \ \ /* If the objects are in the real domain, execute the native implementation. */ \ if ( bli_obj_is_real( c ) ) \ { \ PASTEMAC(opname,nat)( alpha, a, beta, c, cntx, rntm ); \ return; \ } \ \ /* Query a context for the current induced method. This context is managed and cached by the gks and should not be freed by the caller. Note that the datatype argument is needed because it will be passed in when bli_gks_query_ind_cntx() eventually calls the induced method's _cntx_init() function. */ \ cntx = bli_gks_query_ind_cntx( ind, dt ); \ \ /* 3mh and 4mh change the context for each stage, and so in order to remain thread-safe, we must make a local copy of the context for those induced methods. */ \ cntx_t cntx_l; \ if ( ind == BLIS_3MH || ind == BLIS_4MH ) { cntx_l = *cntx; cntx = &cntx_l; } \ \ /* Initialize a local runtime with global settings if necessary. Note that in the case that a runtime is passed in, we make a local copy. */ \ rntm_t rntm_l; \ if ( rntm == NULL ) { bli_rntm_init_from_global( &rntm_l ); rntm = &rntm_l; } \ else { rntm_l = *rntm; rntm = &rntm_l; } \ \ /* Some induced methods execute in multiple "stages". */ \ for ( i = 0; i < nstage; ++i ) \ { \ /* Prepare the context for the ith stage of computation. */ \ bli_cntx_ind_stage( ind, i, cntx ); \ \ /* For multi-stage methods, use BLIS_ONE as beta after the first stage. */ \ if ( i > 0 ) beta_use = &BLIS_ONE; \ \ /* Invoke the operation's front end and request the default control tree. */ \ PASTEMAC(opname,_front)( alpha, a, beta_use, c, cntx, rntm, NULL ); \ } \ } // herk GENFRONT( herk, gemm, 3mh, 3 ) GENFRONT( herk, gemm, 3m1, 1 ) GENFRONT( herk, gemm, 4mh, 4 ) //GENFRONT( herk, gemm, 4mb, 1 ) // Not implemented. GENFRONT( herk, gemm, 4m1, 1 ) GENFRONT( herk, gemm, 1m, 1 ) // syrk GENFRONT( syrk, gemm, 3mh, 3 ) GENFRONT( syrk, gemm, 3m1, 1 ) GENFRONT( syrk, gemm, 4mh, 4 ) //GENFRONT( syrk, gemm, 4mb, 1 ) // Not implemented. GENFRONT( syrk, gemm, 4m1, 1 ) GENFRONT( syrk, gemm, 1m, 1 ) // -- trmm --------------------------------------------------------------------- #undef GENFRONT #define GENFRONT( opname, cname, imeth, nstage ) \ \ void PASTEMAC(opname,imeth) \ ( \ side_t side, \ obj_t* alpha, \ obj_t* a, \ obj_t* b, \ cntx_t* cntx, \ rntm_t* rntm \ ) \ { \ bli_init_once(); \ \ ind_t ind = PASTEMAC0(imeth); \ num_t dt = bli_obj_dt( b ); \ \ dim_t i; \ \ /* If the objects are in the real domain, execute the native implementation. */ \ if ( bli_obj_is_real( b ) ) \ { \ PASTEMAC(opname,nat)( side, alpha, a, b, cntx, rntm ); \ return; \ } \ \ /* Query a context for the current induced method. This context is managed and cached by the gks and should not be freed by the caller. Note that the datatype argument is needed because it will be passed in when bli_gks_query_ind_cntx() eventually calls the induced method's _cntx_init() function. */ \ cntx = bli_gks_query_ind_cntx( ind, dt ); \ \ /* Initialize a local runtime with global settings if necessary. Note that in the case that a runtime is passed in, we make a local copy. */ \ rntm_t rntm_l; \ if ( rntm == NULL ) { bli_rntm_init_from_global( &rntm_l ); rntm = &rntm_l; } \ else { rntm_l = *rntm; rntm = &rntm_l; } \ \ /* Some induced methods execute in multiple "stages". */ \ for ( i = 0; i < nstage; ++i ) \ { \ /* Prepare the context for the ith stage of computation. */ \ bli_cntx_ind_stage( ind, i, cntx ); \ \ /* Invoke the operation's front end and request the default control tree. */ \ PASTEMAC(opname,_front)( side, alpha, a, b, cntx, rntm, NULL ); \ } \ } // trmm //GENFRONT( trmm, gemm, 3mh, 3 ) // Unimplementable. GENFRONT( trmm, gemm, 3m1, 1 ) //GENFRONT( trmm, gemm, 4mh, 4 ) // Unimplementable. //GENFRONT( trmm, gemm, 4mb, 1 ) // Unimplementable. GENFRONT( trmm, gemm, 4m1, 1 ) GENFRONT( trmm, gemm, 1m, 1 ) // -- trsm --------------------------------------------------------------------- #undef GENFRONT #define GENFRONT( opname, cname, imeth, nstage ) \ \ void PASTEMAC(opname,imeth) \ ( \ side_t side, \ obj_t* alpha, \ obj_t* a, \ obj_t* b, \ cntx_t* cntx, \ rntm_t* rntm \ ) \ { \ bli_init_once(); \ \ ind_t ind = PASTEMAC0(imeth); \ num_t dt = bli_obj_dt( b ); \ \ /* If the objects are in the real domain, execute the native implementation. */ \ if ( bli_obj_is_real( b ) ) \ { \ PASTEMAC(opname,nat)( side, alpha, a, b, cntx, rntm ); \ return; \ } \ \ /* Query a context for the current induced method. This context is managed and cached by the gks and should not be freed by the caller. Note that the datatype argument is needed because it will be passed in when bli_gks_query_ind_cntx() eventually calls the induced method's _cntx_init() function. */ \ cntx = bli_gks_query_ind_cntx( ind, dt ); \ \ /* Initialize a local runtime with global settings if necessary. Note that in the case that a runtime is passed in, we make a local copy. */ \ rntm_t rntm_l; \ if ( rntm == NULL ) { bli_rntm_init_from_global( &rntm_l ); rntm = &rntm_l; } \ else { rntm_l = *rntm; rntm = &rntm_l; } \ \ { \ /* NOTE: trsm cannot be implemented via any induced method that needs to execute in stages (e.g. 3mh, 4mh). */ \ \ /* Invoke the operation's front end and request the default control tree. */ \ PASTEMAC(opname,_front)( side, alpha, a, b, cntx, rntm, NULL ); \ } \ } // trsm //GENFRONT( trmm, trsm, 3mh, 3 ) // Unimplementable. GENFRONT( trsm, trsm, 3m1, 1 ) //GENFRONT( trmm, trsm, 4mh, 4 ) // Unimplementable. //GENFRONT( trmm, trsm, 4mb, 1 ) // Unimplementable. GENFRONT( trsm, trsm, 4m1, 1 ) GENFRONT( trsm, trsm, 1m, 1 ) blis-0.6.1/frame/ind/oapi/bli_l3_ind_oapi.c000066400000000000000000000124411360743507500204310ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // -- gemm/her2k/syr2k --------------------------------------------------------- #undef GENFRONT #define GENFRONT( opname, imeth ) \ \ void PASTEMAC(opname,imeth) \ ( \ obj_t* alpha, \ obj_t* a, \ obj_t* b, \ obj_t* beta, \ obj_t* c, \ cntx_t* cntx, \ rntm_t* rntm \ ) \ { \ bli_init_once(); \ \ num_t dt = bli_obj_dt( c ); \ PASTECH(opname,_oft) func = PASTEMAC(opname,ind_get_avail)( dt ); \ \ /* Initialize a local runtime with global settings if necessary. Note that in the case that a runtime is passed in, we make a local copy. */ \ rntm_t rntm_l; \ if ( rntm == NULL ) { bli_rntm_init_from_global( &rntm_l ); rntm = &rntm_l; } \ else { rntm_l = *rntm; rntm = &rntm_l; } \ \ func( alpha, a, b, beta, c, cntx, rntm ); \ } GENFRONT( gemm, ind ) GENFRONT( her2k, ind ) GENFRONT( syr2k, ind ) // -- hemm/symm/trmm3 ---------------------------------------------------------- #undef GENFRONT #define GENFRONT( opname, imeth ) \ \ void PASTEMAC(opname,imeth) \ ( \ side_t side, \ obj_t* alpha, \ obj_t* a, \ obj_t* b, \ obj_t* beta, \ obj_t* c, \ cntx_t* cntx, \ rntm_t* rntm \ ) \ { \ bli_init_once(); \ \ num_t dt = bli_obj_dt( c ); \ PASTECH(opname,_oft) func = PASTEMAC(opname,ind_get_avail)( dt ); \ \ /* Initialize a local runtime with global settings if necessary. Note that in the case that a runtime is passed in, we make a local copy. */ \ rntm_t rntm_l; \ if ( rntm == NULL ) { bli_rntm_init_from_global( &rntm_l ); rntm = &rntm_l; } \ else { rntm_l = *rntm; rntm = &rntm_l; } \ \ func( side, alpha, a, b, beta, c, cntx, rntm ); \ } GENFRONT( hemm, ind ) GENFRONT( symm, ind ) GENFRONT( trmm3, ind ) // -- herk/syrk ---------------------------------------------------------------- #undef GENFRONT #define GENFRONT( opname, imeth ) \ \ void PASTEMAC(opname,imeth) \ ( \ obj_t* alpha, \ obj_t* a, \ obj_t* beta, \ obj_t* c, \ cntx_t* cntx, \ rntm_t* rntm \ ) \ { \ bli_init_once(); \ \ num_t dt = bli_obj_dt( c ); \ PASTECH(opname,_oft) func = PASTEMAC(opname,ind_get_avail)( dt ); \ \ /* Initialize a local runtime with global settings if necessary. Note that in the case that a runtime is passed in, we make a local copy. */ \ rntm_t rntm_l; \ if ( rntm == NULL ) { bli_rntm_init_from_global( &rntm_l ); rntm = &rntm_l; } \ else { rntm_l = *rntm; rntm = &rntm_l; } \ \ func( alpha, a, beta, c, cntx, rntm ); \ } GENFRONT( herk, ind ) GENFRONT( syrk, ind ) // -- trmm/trsm ---------------------------------------------------------------- #undef GENFRONT #define GENFRONT( opname, imeth ) \ \ void PASTEMAC(opname,imeth) \ ( \ side_t side, \ obj_t* alpha, \ obj_t* a, \ obj_t* b, \ cntx_t* cntx, \ rntm_t* rntm \ ) \ { \ bli_init_once(); \ \ num_t dt = bli_obj_dt( b ); \ PASTECH(opname,_oft) func = PASTEMAC(opname,ind_get_avail)( dt ); \ \ /* Initialize a local runtime with global settings if necessary. Note that in the case that a runtime is passed in, we make a local copy. */ \ rntm_t rntm_l; \ if ( rntm == NULL ) { bli_rntm_init_from_global( &rntm_l ); rntm = &rntm_l; } \ else { rntm_l = *rntm; rntm = &rntm_l; } \ \ func( side, alpha, a, b, cntx, rntm ); \ } GENFRONT( trmm, ind ) GENFRONT( trsm, ind ) blis-0.6.1/frame/ind/oapi/bli_l3_ind_oapi.h000066400000000000000000000120471360743507500204400ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Generate object-based prototypes for induced methods that work for // trmm and trsm (ie: two-operand operations). // #undef GENPROT #define GENPROT( imeth ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(gemm,imeth) ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm ); \ BLIS_EXPORT_BLIS void PASTEMAC(hemm,imeth) ( side_t side, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm ); \ BLIS_EXPORT_BLIS void PASTEMAC(herk,imeth) ( obj_t* alpha, obj_t* a, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm ); \ BLIS_EXPORT_BLIS void PASTEMAC(her2k,imeth)( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm ); \ BLIS_EXPORT_BLIS void PASTEMAC(symm,imeth) ( side_t side, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm ); \ BLIS_EXPORT_BLIS void PASTEMAC(syrk,imeth) ( obj_t* alpha, obj_t* a, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm ); \ BLIS_EXPORT_BLIS void PASTEMAC(syr2k,imeth)( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm ); \ BLIS_EXPORT_BLIS void PASTEMAC(trmm3,imeth)( side_t side, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm ); \ BLIS_EXPORT_BLIS void PASTEMAC(trmm,imeth) ( side_t side, obj_t* alpha, obj_t* a, obj_t* b, cntx_t* cntx, rntm_t* rntm ); \ BLIS_EXPORT_BLIS void PASTEMAC(trsm,imeth) ( side_t side, obj_t* alpha, obj_t* a, obj_t* b, cntx_t* cntx, rntm_t* rntm ); GENPROT( nat ) GENPROT( ind ) GENPROT( 3m1 ) GENPROT( 4m1 ) GENPROT( 1m ) // // Generate object-based prototypes for induced methods that do NOT work // for trmm and trsm (ie: two-operand operations). // #undef GENPROT_NO2OP #define GENPROT_NO2OP( imeth ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(gemm,imeth) ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm ); \ BLIS_EXPORT_BLIS void PASTEMAC(hemm,imeth) ( side_t side, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm ); \ BLIS_EXPORT_BLIS void PASTEMAC(herk,imeth) ( obj_t* alpha, obj_t* a, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm ); \ BLIS_EXPORT_BLIS void PASTEMAC(her2k,imeth)( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm ); \ BLIS_EXPORT_BLIS void PASTEMAC(symm,imeth) ( side_t side, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm ); \ BLIS_EXPORT_BLIS void PASTEMAC(syrk,imeth) ( obj_t* alpha, obj_t* a, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm ); \ BLIS_EXPORT_BLIS void PASTEMAC(syr2k,imeth)( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm ); \ BLIS_EXPORT_BLIS void PASTEMAC(trmm3,imeth)( side_t side, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm ); GENPROT_NO2OP( 3mh ) GENPROT_NO2OP( 4mh ) GENPROT_NO2OP( 4mb ) // // Generate object-based prototypes for 1m methods that specify an algorithm // (e.g., block-panel or panel-block). // /* #undef GENPROT #define GENPROT( imeth, alg ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(gemm,imeth,alg) ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c ); \ */ //GENPROT( 1m, bp ) //GENPROT( 1m, pb ) blis-0.6.1/frame/ind/oapi/bli_l3_nat_oapi.c000066400000000000000000000161601360743507500204430ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // NOTE: The function definitions in this file can be consolidated with the // definitions for the other induced methods. The only advantage of keeping // them separate is that it allows us to avoid the very small loop overhead // of executing one iteration of a for loop, plus the overhead of calling a // function that does nothing (ie: the _cntx_init_stage() function). // -- gemm/her2k/syr2k --------------------------------------------------------- #undef GENFRONT #define GENFRONT( opname, cname, imeth ) \ \ void PASTEMAC(opname,imeth) \ ( \ obj_t* alpha, \ obj_t* a, \ obj_t* b, \ obj_t* beta, \ obj_t* c, \ cntx_t* cntx, \ rntm_t* rntm \ ) \ { \ bli_init_once(); \ \ /* Obtain a valid (native) context from the gks if necessary. */ \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ \ /* Initialize a local runtime with global settings if necessary. Note that in the case that a runtime is passed in, we make a local copy. */ \ rntm_t rntm_l; \ if ( rntm == NULL ) { bli_rntm_init_from_global( &rntm_l ); rntm = &rntm_l; } \ else { rntm_l = *rntm; rntm = &rntm_l; } \ \ /* Invoke the operation's front end. */ \ PASTEMAC(opname,_front) \ ( \ alpha, a, b, beta, c, cntx, rntm, NULL \ ); \ } // If a sandbox was enabled, do not define bli_gemmnat() since it will be // defined in the sandbox environment. #ifndef BLIS_ENABLE_SANDBOX GENFRONT( gemm, gemm, nat ) #endif GENFRONT( her2k, gemm, nat ) GENFRONT( syr2k, gemm, nat ) // -- hemm/symm/trmm3 ---------------------------------------------------------- #undef GENFRONT #define GENFRONT( opname, cname, imeth ) \ \ void PASTEMAC(opname,imeth) \ ( \ side_t side, \ obj_t* alpha, \ obj_t* a, \ obj_t* b, \ obj_t* beta, \ obj_t* c, \ cntx_t* cntx, \ rntm_t* rntm \ ) \ { \ bli_init_once(); \ \ /* Obtain a valid (native) context from the gks if necessary. */ \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ \ /* Initialize a local runtime with global settings if necessary. Note that in the case that a runtime is passed in, we make a local copy. */ \ rntm_t rntm_l; \ if ( rntm == NULL ) { bli_rntm_init_from_global( &rntm_l ); rntm = &rntm_l; } \ else { rntm_l = *rntm; rntm = &rntm_l; } \ \ /* Invoke the operation's front end. */ \ PASTEMAC(opname,_front) \ ( \ side, alpha, a, b, beta, c, cntx, rntm, NULL \ ); \ } GENFRONT( hemm, gemm, nat ) GENFRONT( symm, gemm, nat ) GENFRONT( trmm3, gemm, nat ) // -- herk/syrk ---------------------------------------------------------------- #undef GENFRONT #define GENFRONT( opname, cname, imeth ) \ \ void PASTEMAC(opname,imeth) \ ( \ obj_t* alpha, \ obj_t* a, \ obj_t* beta, \ obj_t* c, \ cntx_t* cntx, \ rntm_t* rntm \ ) \ { \ bli_init_once(); \ \ /* Obtain a valid (native) context from the gks if necessary. */ \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ \ /* Initialize a local runtime with global settings if necessary. Note that in the case that a runtime is passed in, we make a local copy. */ \ rntm_t rntm_l; \ if ( rntm == NULL ) { bli_rntm_init_from_global( &rntm_l ); rntm = &rntm_l; } \ else { rntm_l = *rntm; rntm = &rntm_l; } \ \ /* Invoke the operation's front end. */ \ PASTEMAC(opname,_front) \ ( \ alpha, a, beta, c, cntx, rntm, NULL \ ); \ } GENFRONT( herk, gemm, nat ) GENFRONT( syrk, gemm, nat ) // -- trmm --------------------------------------------------------------------- #undef GENFRONT #define GENFRONT( opname, cname, imeth ) \ \ void PASTEMAC(opname,imeth) \ ( \ side_t side, \ obj_t* alpha, \ obj_t* a, \ obj_t* b, \ cntx_t* cntx, \ rntm_t* rntm \ ) \ { \ bli_init_once(); \ \ /* Obtain a valid (native) context from the gks if necessary. */ \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ \ /* Initialize a local runtime with global settings if necessary. Note that in the case that a runtime is passed in, we make a local copy. */ \ rntm_t rntm_l; \ if ( rntm == NULL ) { bli_rntm_init_from_global( &rntm_l ); rntm = &rntm_l; } \ else { rntm_l = *rntm; rntm = &rntm_l; } \ \ /* Invoke the operation's front end. */ \ PASTEMAC(opname,_front) \ ( \ side, alpha, a, b, cntx, rntm, NULL \ ); \ } GENFRONT( trmm, gemm, nat ) // -- trsm --------------------------------------------------------------------- #undef GENFRONT #define GENFRONT( opname, cname, imeth ) \ \ void PASTEMAC(opname,imeth) \ ( \ side_t side, \ obj_t* alpha, \ obj_t* a, \ obj_t* b, \ cntx_t* cntx, \ rntm_t* rntm \ ) \ { \ bli_init_once(); \ \ /* Obtain a valid (native) context from the gks if necessary. */ \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ \ /* Initialize a local runtime with global settings if necessary. Note that in the case that a runtime is passed in, we make a local copy. */ \ rntm_t rntm_l; \ if ( rntm == NULL ) { bli_rntm_init_from_global( &rntm_l ); rntm = &rntm_l; } \ else { rntm_l = *rntm; rntm = &rntm_l; } \ \ /* Invoke the operation's front end. */ \ PASTEMAC(opname,_front) \ ( \ side, alpha, a, b, cntx, rntm, NULL \ ); \ } GENFRONT( trsm, trsm, nat ) blis-0.6.1/frame/ind/tapi/000077500000000000000000000000001360743507500152625ustar00rootroot00000000000000blis-0.6.1/frame/ind/tapi/bli_l3_ind_tapi.c000066400000000000000000000406641360743507500204530ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // -- gemm --------------------------------------------------------------------- #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ trans_t transa, \ trans_t transb, \ dim_t m, \ dim_t n, \ dim_t k, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* b, inc_t rs_b, inc_t cs_b, \ ctype* beta, \ ctype* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm \ ) \ { \ bli_init_once(); \ \ const num_t dt = PASTEMAC(ch,type); \ \ obj_t alphao, ao, bo, betao, co; \ \ dim_t m_a, n_a; \ dim_t m_b, n_b; \ \ bli_set_dims_with_trans( transa, m, k, &m_a, &n_a ); \ bli_set_dims_with_trans( transb, k, n, &m_b, &n_b ); \ \ bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ bli_obj_create_1x1_with_attached_buffer( dt, beta, &betao ); \ \ bli_obj_create_with_attached_buffer( dt, m_a, n_a, a, rs_a, cs_a, &ao ); \ bli_obj_create_with_attached_buffer( dt, m_b, n_b, b, rs_b, cs_b, &bo ); \ bli_obj_create_with_attached_buffer( dt, m, n, c, rs_c, cs_c, &co ); \ \ bli_obj_set_conjtrans( transa, &ao ); \ bli_obj_set_conjtrans( transb, &bo ); \ \ PASTEMAC0(opname) \ ( \ &alphao, \ &ao, \ &bo, \ &betao, \ &co, \ cntx, \ rntm \ ); \ } INSERT_GENTFUNC_BASIC0( gemm3mh ) INSERT_GENTFUNC_BASIC0( gemm3m1 ) INSERT_GENTFUNC_BASIC0( gemm4mh ) INSERT_GENTFUNC_BASIC0( gemm4mb ) INSERT_GENTFUNC_BASIC0( gemm4m1 ) INSERT_GENTFUNC_BASIC0( gemm1m ) // -- hemm --------------------------------------------------------------------- #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ side_t side, \ uplo_t uploa, \ conj_t conja, \ trans_t transb, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* b, inc_t rs_b, inc_t cs_b, \ ctype* beta, \ ctype* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm \ ) \ { \ bli_init_once(); \ \ const num_t dt = PASTEMAC(ch,type); \ \ obj_t alphao, ao, bo, betao, co; \ \ dim_t mn_a; \ dim_t m_b, n_b; \ \ bli_set_dim_with_side( side, m, n, &mn_a ); \ bli_set_dims_with_trans( transb, m, n, &m_b, &n_b ); \ \ bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ bli_obj_create_1x1_with_attached_buffer( dt, beta, &betao ); \ \ bli_obj_create_with_attached_buffer( dt, mn_a, mn_a, a, rs_a, cs_a, &ao ); \ bli_obj_create_with_attached_buffer( dt, m_b, n_b, b, rs_b, cs_b, &bo ); \ bli_obj_create_with_attached_buffer( dt, m, n, c, rs_c, cs_c, &co ); \ \ bli_obj_set_uplo( uploa, &ao ); \ bli_obj_set_conj( conja, &ao ); \ bli_obj_set_conjtrans( transb, &bo ); \ \ bli_obj_set_struc( BLIS_HERMITIAN, &ao ); \ \ PASTEMAC0(opname) \ ( \ side, \ &alphao, \ &ao, \ &bo, \ &betao, \ &co, \ cntx, \ rntm \ ); \ } INSERT_GENTFUNC_BASIC0( hemm3mh ) INSERT_GENTFUNC_BASIC0( hemm3m1 ) INSERT_GENTFUNC_BASIC0( hemm4mh ) INSERT_GENTFUNC_BASIC0( hemm4m1 ) INSERT_GENTFUNC_BASIC0( hemm1m ) // -- herk --------------------------------------------------------------------- #undef GENTFUNCR #define GENTFUNCR( ctype, ctype_r, ch, chr, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ uplo_t uploc, \ trans_t transa, \ dim_t m, \ dim_t k, \ ctype_r* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype_r* beta, \ ctype* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm \ ) \ { \ bli_init_once(); \ \ const num_t dt_r = PASTEMAC(chr,type); \ const num_t dt = PASTEMAC(ch,type); \ \ obj_t alphao, ao, betao, co; \ \ dim_t m_a, n_a; \ \ bli_set_dims_with_trans( transa, m, k, &m_a, &n_a ); \ \ bli_obj_create_1x1_with_attached_buffer( dt_r, alpha, &alphao ); \ bli_obj_create_1x1_with_attached_buffer( dt_r, beta, &betao ); \ \ bli_obj_create_with_attached_buffer( dt, m_a, n_a, a, rs_a, cs_a, &ao ); \ bli_obj_create_with_attached_buffer( dt, m, m, c, rs_c, cs_c, &co ); \ \ bli_obj_set_uplo( uploc, &co ); \ bli_obj_set_conjtrans( transa, &ao ); \ \ bli_obj_set_struc( BLIS_HERMITIAN, &co ); \ \ PASTEMAC0(opname) \ ( \ &alphao, \ &ao, \ &betao, \ &co, \ cntx, \ rntm \ ); \ } INSERT_GENTFUNCR_BASIC0( herk3mh ) INSERT_GENTFUNCR_BASIC0( herk3m1 ) INSERT_GENTFUNCR_BASIC0( herk4mh ) INSERT_GENTFUNCR_BASIC0( herk4m1 ) INSERT_GENTFUNCR_BASIC0( herk1m ) // -- her2k -------------------------------------------------------------------- #undef GENTFUNCR #define GENTFUNCR( ctype, ctype_r, ch, chr, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ uplo_t uploc, \ trans_t transa, \ trans_t transb, \ dim_t m, \ dim_t k, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* b, inc_t rs_b, inc_t cs_b, \ ctype_r* beta, \ ctype* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm \ ) \ { \ bli_init_once(); \ \ const num_t dt_r = PASTEMAC(chr,type); \ const num_t dt = PASTEMAC(ch,type); \ \ obj_t alphao, ao, bo, betao, co; \ \ dim_t m_a, n_a; \ dim_t m_b, n_b; \ \ bli_set_dims_with_trans( transa, m, k, &m_a, &n_a ); \ bli_set_dims_with_trans( transb, m, k, &m_b, &n_b ); \ \ bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ bli_obj_create_1x1_with_attached_buffer( dt_r, beta, &betao ); \ \ bli_obj_create_with_attached_buffer( dt, m_a, n_a, a, rs_a, cs_a, &ao ); \ bli_obj_create_with_attached_buffer( dt, m_b, n_b, b, rs_b, cs_b, &bo ); \ bli_obj_create_with_attached_buffer( dt, m, m, c, rs_c, cs_c, &co ); \ \ bli_obj_set_uplo( uploc, &co ); \ bli_obj_set_conjtrans( transa, &ao ); \ bli_obj_set_conjtrans( transb, &bo ); \ \ bli_obj_set_struc( BLIS_HERMITIAN, &co ); \ \ PASTEMAC0(opname) \ ( \ &alphao, \ &ao, \ &bo, \ &betao, \ &co, \ cntx, \ rntm \ ); \ } INSERT_GENTFUNCR_BASIC0( her2k3mh ) INSERT_GENTFUNCR_BASIC0( her2k3m1 ) INSERT_GENTFUNCR_BASIC0( her2k4mh ) INSERT_GENTFUNCR_BASIC0( her2k4m1 ) INSERT_GENTFUNCR_BASIC0( her2k1m ) // -- symm --------------------------------------------------------------------- #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ side_t side, \ uplo_t uploa, \ conj_t conja, \ trans_t transb, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* b, inc_t rs_b, inc_t cs_b, \ ctype* beta, \ ctype* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm \ ) \ { \ bli_init_once(); \ \ const num_t dt = PASTEMAC(ch,type); \ \ obj_t alphao, ao, bo, betao, co; \ \ dim_t mn_a; \ dim_t m_b, n_b; \ \ bli_set_dim_with_side( side, m, n, &mn_a ); \ bli_set_dims_with_trans( transb, m, n, &m_b, &n_b ); \ \ bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ bli_obj_create_1x1_with_attached_buffer( dt, beta, &betao ); \ \ bli_obj_create_with_attached_buffer( dt, mn_a, mn_a, a, rs_a, cs_a, &ao ); \ bli_obj_create_with_attached_buffer( dt, m_b, n_b, b, rs_b, cs_b, &bo ); \ bli_obj_create_with_attached_buffer( dt, m, n, c, rs_c, cs_c, &co ); \ \ bli_obj_set_uplo( uploa, &ao ); \ bli_obj_set_conj( conja, &ao ); \ bli_obj_set_conjtrans( transb, &bo ); \ \ bli_obj_set_struc( BLIS_SYMMETRIC, &ao ); \ \ PASTEMAC0(opname) \ ( \ side, \ &alphao, \ &ao, \ &bo, \ &betao, \ &co, \ cntx, \ rntm \ ); \ } INSERT_GENTFUNC_BASIC0( symm3mh ) INSERT_GENTFUNC_BASIC0( symm3m1 ) INSERT_GENTFUNC_BASIC0( symm4mh ) INSERT_GENTFUNC_BASIC0( symm4m1 ) INSERT_GENTFUNC_BASIC0( symm1m ) // -- syrk --------------------------------------------------------------------- #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ uplo_t uploc, \ trans_t transa, \ dim_t m, \ dim_t k, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* beta, \ ctype* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm \ ) \ { \ bli_init_once(); \ \ const num_t dt = PASTEMAC(ch,type); \ \ obj_t alphao, ao, betao, co; \ \ dim_t m_a, n_a; \ \ bli_set_dims_with_trans( transa, m, k, &m_a, &n_a ); \ \ bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ bli_obj_create_1x1_with_attached_buffer( dt, beta, &betao ); \ \ bli_obj_create_with_attached_buffer( dt, m_a, n_a, a, rs_a, cs_a, &ao ); \ bli_obj_create_with_attached_buffer( dt, m, m, c, rs_c, cs_c, &co ); \ \ bli_obj_set_uplo( uploc, &co ); \ bli_obj_set_conjtrans( transa, &ao ); \ \ bli_obj_set_struc( BLIS_SYMMETRIC, &co ); \ \ PASTEMAC0(opname) \ ( \ &alphao, \ &ao, \ &betao, \ &co, \ cntx, \ rntm \ ); \ } INSERT_GENTFUNC_BASIC0( syrk3mh ) INSERT_GENTFUNC_BASIC0( syrk3m1 ) INSERT_GENTFUNC_BASIC0( syrk4mh ) INSERT_GENTFUNC_BASIC0( syrk4m1 ) INSERT_GENTFUNC_BASIC0( syrk1m ) // -- syr2k -------------------------------------------------------------------- #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ uplo_t uploc, \ trans_t transa, \ trans_t transb, \ dim_t m, \ dim_t k, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* b, inc_t rs_b, inc_t cs_b, \ ctype* beta, \ ctype* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm \ ) \ { \ bli_init_once(); \ \ const num_t dt = PASTEMAC(ch,type); \ \ obj_t alphao, ao, bo, betao, co; \ \ dim_t m_a, n_a; \ dim_t m_b, n_b; \ \ bli_set_dims_with_trans( transa, m, k, &m_a, &n_a ); \ bli_set_dims_with_trans( transb, m, k, &m_b, &n_b ); \ \ bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ bli_obj_create_1x1_with_attached_buffer( dt, beta, &betao ); \ \ bli_obj_create_with_attached_buffer( dt, m_a, n_a, a, rs_a, cs_a, &ao ); \ bli_obj_create_with_attached_buffer( dt, m_b, n_b, b, rs_b, cs_b, &bo ); \ bli_obj_create_with_attached_buffer( dt, m, m, c, rs_c, cs_c, &co ); \ \ bli_obj_set_uplo( uploc, &co ); \ bli_obj_set_conjtrans( transa, &ao ); \ bli_obj_set_conjtrans( transb, &bo ); \ \ bli_obj_set_struc( BLIS_SYMMETRIC, &co ); \ \ PASTEMAC0(opname) \ ( \ &alphao, \ &ao, \ &bo, \ &betao, \ &co, \ cntx, \ rntm \ ); \ } INSERT_GENTFUNC_BASIC0( syr2k3mh ) INSERT_GENTFUNC_BASIC0( syr2k3m1 ) INSERT_GENTFUNC_BASIC0( syr2k4mh ) INSERT_GENTFUNC_BASIC0( syr2k4m1 ) INSERT_GENTFUNC_BASIC0( syr2k1m ) // -- trmm3 -------------------------------------------------------------------- #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ side_t side, \ uplo_t uploa, \ trans_t transa, \ diag_t diaga, \ trans_t transb, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* b, inc_t rs_b, inc_t cs_b, \ ctype* beta, \ ctype* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm \ ) \ { \ bli_init_once(); \ \ const num_t dt = PASTEMAC(ch,type); \ \ obj_t alphao, ao, bo, betao, co; \ \ dim_t mn_a; \ dim_t m_b, n_b; \ \ bli_set_dim_with_side( side, m, n, &mn_a ); \ bli_set_dims_with_trans( transb, m, n, &m_b, &n_b ); \ \ bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ bli_obj_create_1x1_with_attached_buffer( dt, beta, &betao ); \ \ bli_obj_create_with_attached_buffer( dt, mn_a, mn_a, a, rs_a, cs_a, &ao ); \ bli_obj_create_with_attached_buffer( dt, m_b, n_b, b, rs_b, cs_b, &bo ); \ bli_obj_create_with_attached_buffer( dt, m, n, c, rs_c, cs_c, &co ); \ \ bli_obj_set_uplo( uploa, &ao ); \ bli_obj_set_diag( diaga, &ao ); \ bli_obj_set_conjtrans( transa, &ao ); \ bli_obj_set_conjtrans( transb, &bo ); \ \ bli_obj_set_struc( BLIS_TRIANGULAR, &ao ); \ \ PASTEMAC0(opname) \ ( \ side, \ &alphao, \ &ao, \ &bo, \ &betao, \ &co, \ cntx, \ rntm \ ); \ } INSERT_GENTFUNC_BASIC0( trmm33mh ) INSERT_GENTFUNC_BASIC0( trmm33m1 ) INSERT_GENTFUNC_BASIC0( trmm34mh ) INSERT_GENTFUNC_BASIC0( trmm34m1 ) INSERT_GENTFUNC_BASIC0( trmm31m ) // -- trmm --------------------------------------------------------------------- #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ side_t side, \ uplo_t uploa, \ trans_t transa, \ diag_t diaga, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* b, inc_t rs_b, inc_t cs_b, \ cntx_t* cntx, \ rntm_t* rntm \ ) \ { \ bli_init_once(); \ \ const num_t dt = PASTEMAC(ch,type); \ \ obj_t alphao, ao, bo; \ \ dim_t mn_a; \ \ bli_set_dim_with_side( side, m, n, &mn_a ); \ \ bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ \ bli_obj_create_with_attached_buffer( dt, mn_a, mn_a, a, rs_a, cs_a, &ao ); \ bli_obj_create_with_attached_buffer( dt, m, n, b, rs_b, cs_b, &bo ); \ \ bli_obj_set_uplo( uploa, &ao ); \ bli_obj_set_diag( diaga, &ao ); \ bli_obj_set_conjtrans( transa, &ao ); \ \ bli_obj_set_struc( BLIS_TRIANGULAR, &ao ); \ \ PASTEMAC0(opname) \ ( \ side, \ &alphao, \ &ao, \ &bo, \ cntx, \ rntm \ ); \ } INSERT_GENTFUNC_BASIC0( trmm3m1 ) INSERT_GENTFUNC_BASIC0( trmm4m1 ) INSERT_GENTFUNC_BASIC0( trmm1m ) // -- trsm --------------------------------------------------------------------- #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ side_t side, \ uplo_t uploa, \ trans_t transa, \ diag_t diaga, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* b, inc_t rs_b, inc_t cs_b, \ cntx_t* cntx, \ rntm_t* rntm \ ) \ { \ bli_init_once(); \ \ const num_t dt = PASTEMAC(ch,type); \ \ obj_t alphao, ao, bo; \ \ dim_t mn_a; \ \ bli_set_dim_with_side( side, m, n, &mn_a ); \ \ bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ \ bli_obj_create_with_attached_buffer( dt, mn_a, mn_a, a, rs_a, cs_a, &ao ); \ bli_obj_create_with_attached_buffer( dt, m, n, b, rs_b, cs_b, &bo ); \ \ bli_obj_set_uplo( uploa, &ao ); \ bli_obj_set_diag( diaga, &ao ); \ bli_obj_set_conjtrans( transa, &ao ); \ \ bli_obj_set_struc( BLIS_TRIANGULAR, &ao ); \ \ PASTEMAC0(opname) \ ( \ side, \ &alphao, \ &ao, \ &bo, \ cntx, \ rntm \ ); \ } INSERT_GENTFUNC_BASIC0( trsm3m1 ) INSERT_GENTFUNC_BASIC0( trsm4m1 ) INSERT_GENTFUNC_BASIC0( trsm1m ) blis-0.6.1/frame/ind/tapi/bli_l3_ind_tapi.h000066400000000000000000000167411360743507500204570ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ trans_t transa, \ trans_t transb, \ dim_t m, \ dim_t n, \ dim_t k, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* b, inc_t rs_b, inc_t cs_b, \ ctype* beta, \ ctype* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm \ ); INSERT_GENTPROT_BASIC0( gemm3mh ) INSERT_GENTPROT_BASIC0( gemm3m1 ) INSERT_GENTPROT_BASIC0( gemm4mh ) INSERT_GENTPROT_BASIC0( gemm4mb ) INSERT_GENTPROT_BASIC0( gemm4m1 ) INSERT_GENTPROT_BASIC0( gemm1m ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ side_t side, \ uplo_t uploa, \ conj_t conja, \ trans_t transb, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* b, inc_t rs_b, inc_t cs_b, \ ctype* beta, \ ctype* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm \ ); INSERT_GENTPROT_BASIC0( hemm3mh ) INSERT_GENTPROT_BASIC0( hemm3m1 ) INSERT_GENTPROT_BASIC0( hemm4mh ) INSERT_GENTPROT_BASIC0( hemm4m1 ) INSERT_GENTPROT_BASIC0( hemm1m ) #undef GENTPROTR #define GENTPROTR( ctype, ctype_r, ch, chr, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ uplo_t uploc, \ trans_t transa, \ trans_t transb, \ dim_t m, \ dim_t k, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* b, inc_t rs_b, inc_t cs_b, \ ctype_r* beta, \ ctype* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntmx \ ); INSERT_GENTPROTR_BASIC0( her2k3mh ) INSERT_GENTPROTR_BASIC0( her2k3m1 ) INSERT_GENTPROTR_BASIC0( her2k4mh ) INSERT_GENTPROTR_BASIC0( her2k4m1 ) INSERT_GENTPROTR_BASIC0( her2k1m ) #undef GENTPROTR #define GENTPROTR( ctype, ctype_r, ch, chr, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ uplo_t uploc, \ trans_t transa, \ dim_t m, \ dim_t k, \ ctype_r* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype_r* beta, \ ctype* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntmx \ ); INSERT_GENTPROTR_BASIC0( herk3mh ) INSERT_GENTPROTR_BASIC0( herk3m1 ) INSERT_GENTPROTR_BASIC0( herk4mh ) INSERT_GENTPROTR_BASIC0( herk4m1 ) INSERT_GENTPROTR_BASIC0( herk1m ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ side_t side, \ uplo_t uploa, \ conj_t conja, \ trans_t transb, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* b, inc_t rs_b, inc_t cs_b, \ ctype* beta, \ ctype* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm \ ); INSERT_GENTPROT_BASIC0( symm3mh ) INSERT_GENTPROT_BASIC0( symm3m1 ) INSERT_GENTPROT_BASIC0( symm4mh ) INSERT_GENTPROT_BASIC0( symm4m1 ) INSERT_GENTPROT_BASIC0( symm1m ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ uplo_t uploc, \ trans_t transa, \ trans_t transb, \ dim_t m, \ dim_t k, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* b, inc_t rs_b, inc_t cs_b, \ ctype* beta, \ ctype* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm \ ); INSERT_GENTPROT_BASIC0( syr2k3mh ) INSERT_GENTPROT_BASIC0( syr2k3m1 ) INSERT_GENTPROT_BASIC0( syr2k4mh ) INSERT_GENTPROT_BASIC0( syr2k4m1 ) INSERT_GENTPROT_BASIC0( syr2k1m ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ uplo_t uploc, \ trans_t transa, \ dim_t m, \ dim_t k, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* beta, \ ctype* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm \ ); INSERT_GENTPROT_BASIC0( syrk3mh ) INSERT_GENTPROT_BASIC0( syrk3m1 ) INSERT_GENTPROT_BASIC0( syrk4mh ) INSERT_GENTPROT_BASIC0( syrk4m1 ) INSERT_GENTPROT_BASIC0( syrk1m ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ side_t side, \ uplo_t uploa, \ trans_t transa, \ diag_t diaga, \ trans_t transb, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* b, inc_t rs_b, inc_t cs_b, \ ctype* beta, \ ctype* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm \ ); INSERT_GENTPROT_BASIC0( trmm33mh ) INSERT_GENTPROT_BASIC0( trmm33m1 ) INSERT_GENTPROT_BASIC0( trmm34mh ) INSERT_GENTPROT_BASIC0( trmm34m1 ) INSERT_GENTPROT_BASIC0( trmm31m ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ side_t side, \ uplo_t uploa, \ trans_t transa, \ diag_t diaga, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* b, inc_t rs_b, inc_t cs_b, \ cntx_t* cntx, \ rntm_t* rntm \ ); INSERT_GENTPROT_BASIC0( trmm3m1 ) INSERT_GENTPROT_BASIC0( trmm4m1 ) INSERT_GENTPROT_BASIC0( trmm1m ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ side_t side, \ uplo_t uploa, \ trans_t transa, \ diag_t diaga, \ dim_t m, \ dim_t n, \ ctype* alpha, \ ctype* a, inc_t rs_a, inc_t cs_a, \ ctype* b, inc_t rs_b, inc_t cs_b, \ cntx_t* cntx, \ rntm_t* rntm \ ); INSERT_GENTPROT_BASIC0( trsm3m1 ) INSERT_GENTPROT_BASIC0( trsm4m1 ) INSERT_GENTPROT_BASIC0( trsm1m ) blis-0.6.1/frame/ind/ukernels/000077500000000000000000000000001360743507500161555ustar00rootroot00000000000000blis-0.6.1/frame/ind/ukernels/bli_l3_ind_ukr.h000066400000000000000000000071741360743507500212160ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Define template prototypes for level-3 micro-kernels. // // 1m micro-kernels #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ dim_t k, \ ctype* restrict alpha, \ ctype* restrict a, \ ctype* restrict b, \ ctype* restrict beta, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict data, \ cntx_t* restrict cntx \ ); INSERT_GENTPROT_BASIC0( gemm3mh_ukr_name ) INSERT_GENTPROT_BASIC0( gemm3m1_ukr_name ) INSERT_GENTPROT_BASIC0( gemm4mh_ukr_name ) INSERT_GENTPROT_BASIC0( gemm4mb_ukr_name ) INSERT_GENTPROT_BASIC0( gemm4m1_ukr_name ) INSERT_GENTPROT_BASIC0( gemm1m_ukr_name ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ dim_t k, \ ctype* restrict alpha, \ ctype* restrict a1x, \ ctype* restrict a11, \ ctype* restrict bx1, \ ctype* restrict b11, \ ctype* restrict c11, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict data, \ cntx_t* restrict cntx \ ); INSERT_GENTPROT_BASIC0( gemmtrsm3m1_l_ukr_name ) INSERT_GENTPROT_BASIC0( gemmtrsm3m1_u_ukr_name ) INSERT_GENTPROT_BASIC0( gemmtrsm4m1_l_ukr_name ) INSERT_GENTPROT_BASIC0( gemmtrsm4m1_u_ukr_name ) INSERT_GENTPROT_BASIC0( gemmtrsm1m_l_ukr_name ) INSERT_GENTPROT_BASIC0( gemmtrsm1m_u_ukr_name ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ ctype* restrict a, \ ctype* restrict b, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict data, \ cntx_t* restrict cntx \ ); INSERT_GENTPROT_BASIC0( trsm3m1_l_ukr_name ) INSERT_GENTPROT_BASIC0( trsm3m1_u_ukr_name ) INSERT_GENTPROT_BASIC0( trsm4m1_l_ukr_name ) INSERT_GENTPROT_BASIC0( trsm4m1_u_ukr_name ) INSERT_GENTPROT_BASIC0( trsm1m_l_ukr_name ) INSERT_GENTPROT_BASIC0( trsm1m_u_ukr_name ) blis-0.6.1/frame/thread/000077500000000000000000000000001360743507500150225ustar00rootroot00000000000000blis-0.6.1/frame/thread/bli_l3_decor.h000066400000000000000000000051141360743507500175140ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_L3_DECOR_H #define BLIS_L3_DECOR_H // -- conventional definitions ------------------------------------------------- // Level-3 internal function type. typedef void (*l3int_t) ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ); // Level-3 thread decorator prototype. void bli_l3_thread_decorator ( l3int_t func, opid_t family, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl ); // Include definitions specific to the method of multithreading for the // conventional code path. #include "bli_l3_decor_single.h" #include "bli_l3_decor_openmp.h" #include "bli_l3_decor_pthreads.h" #endif blis-0.6.1/frame/thread/bli_l3_decor_openmp.c000066400000000000000000000211061360743507500210640ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #ifdef BLIS_ENABLE_OPENMP // Define a dummy function bli_l3_thread_entry(), which is needed in the // pthreads version, so that when building Windows DLLs (with OpenMP enabled // or no multithreading) we don't risk having an unresolved symbol. void* bli_l3_thread_entry( void* data_void ) { return NULL; } //#define PRINT_THRINFO void bli_l3_thread_decorator ( l3int_t func, opid_t family, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl ) { // This is part of a hack to support mixed domain in bli_gemm_front(). // Sometimes we need to specify a non-standard schema for A and B, and // we decided to transmit them via the schema field in the obj_t's // rather than pass them in as function parameters. Once the values // have been read, we immediately reset them back to their expected // values for unpacked objects. pack_t schema_a = bli_obj_pack_schema( a ); pack_t schema_b = bli_obj_pack_schema( b ); bli_obj_set_pack_schema( BLIS_NOT_PACKED, a ); bli_obj_set_pack_schema( BLIS_NOT_PACKED, b ); // Query the total number of threads from the rntm_t object. const dim_t n_threads = bli_rntm_num_threads( rntm ); #ifdef PRINT_THRINFO thrinfo_t** threads = bli_malloc_intl( n_threads * sizeof( thrinfo_t* ) ); #endif // NOTE: The sba was initialized in bli_init(). // Check out an array_t from the small block allocator. This is done // with an internal lock to ensure only one application thread accesses // the sba at a time. bli_sba_checkout_array() will also automatically // resize the array_t, if necessary. array_t* restrict array = bli_sba_checkout_array( n_threads ); // Access the pool_t* for thread 0 and embed it into the rntm. We do // this up-front only so that we have the rntm_t.sba_pool field // initialized and ready for the global communicator creation below. bli_sba_rntm_set_pool( 0, array, rntm ); // Set the packing block allocator field of the rntm. This will be // inherited by all of the child threads when they make local copies of // the rntm below. bli_membrk_rntm_set_membrk( rntm ); // Allocate a global communicator for the root thrinfo_t structures. thrcomm_t* restrict gl_comm = bli_thrcomm_create( rntm, n_threads ); _Pragma( "omp parallel num_threads(n_threads)" ) { // Create a thread-local copy of the master thread's rntm_t. This is // necessary since we want each thread to be able to track its own // small block pool_t as it executes down the function stack. rntm_t rntm_l = *rntm; rntm_t* restrict rntm_p = &rntm_l; // Query the thread's id from OpenMP. const dim_t tid = omp_get_thread_num(); // Check for a somewhat obscure OpenMP thread-mistmatch issue. bli_l3_thread_decorator_thread_check( n_threads, tid, gl_comm, rntm_p ); // Use the thread id to access the appropriate pool_t* within the // array_t, and use it to set the sba_pool field within the rntm_t. // If the pool_t* element within the array_t is NULL, it will first // be allocated/initialized. bli_sba_rntm_set_pool( tid, array, rntm_p ); obj_t a_t, b_t, c_t; cntl_t* cntl_use; thrinfo_t* thread; // Alias thread-local copies of A, B, and C. These will be the objects // we pass down the algorithmic function stack. Making thread-local // alaises is highly recommended in case a thread needs to change any // of the properties of an object without affecting other threads' // objects. bli_obj_alias_to( a, &a_t ); bli_obj_alias_to( b, &b_t ); bli_obj_alias_to( c, &c_t ); // Create a default control tree for the operation, if needed. bli_l3_cntl_create_if( family, schema_a, schema_b, &a_t, &b_t, &c_t, rntm_p, cntl, &cntl_use ); // Create the root node of the current thread's thrinfo_t structure. bli_l3_thrinfo_create_root( tid, gl_comm, rntm_p, cntl_use, &thread ); #if 1 func ( alpha, &a_t, &b_t, beta, &c_t, cntx, rntm_p, cntl_use, thread ); #else bli_thrinfo_grow_tree ( rntm_p, cntl_use, thread ); #endif // Free the thread's local control tree. bli_l3_cntl_free( rntm_p, cntl_use, thread ); #ifdef PRINT_THRINFO threads[tid] = thread; #else // Free the current thread's thrinfo_t structure. bli_l3_thrinfo_free( rntm_p, thread ); #endif } // We shouldn't free the global communicator since it was already freed // by the global communicator's chief thread in bli_l3_thrinfo_free() // (called above). #ifdef PRINT_THRINFO if ( family != BLIS_TRSM ) bli_l3_thrinfo_print_gemm_paths( threads ); else bli_l3_thrinfo_print_trsm_paths( threads ); exit(1); #endif // Check the array_t back into the small block allocator. Similar to the // check-out, this is done using a lock embedded within the sba to ensure // mutual exclusion. bli_sba_checkin_array( array ); } // ----------------------------------------------------------------------------- void bli_l3_thread_decorator_thread_check ( dim_t n_threads, dim_t tid, thrcomm_t* gl_comm, rntm_t* rntm ) { dim_t n_threads_real = omp_get_num_threads(); // Check if the number of OpenMP threads created within this parallel // region is different from the number of threads that were requested // of BLIS. This inequality may trigger when, for example, the // following conditions are satisfied: // - an application is executing an OpenMP parallel region in which // BLIS is invoked, // - BLIS is configured for multithreading via OpenMP, // - OMP_NUM_THREADS = t > 1, // - the number of threads requested of BLIS (regardless of method) // is p <= t, // - OpenMP nesting is disabled. // In this situation, the application spawns t threads. Each application // thread calls gemm (for example). Each gemm will attempt to spawn p // threads via OpenMP. However, since nesting is disabled, the OpenMP // implementation finds that t >= p threads are already spawned, and // thus it doesn't spawn *any* additional threads for each gemm. if ( n_threads_real != n_threads ) { // If the number of threads active in the current region is not // equal to the number requested of BLIS, we then only continue // if the number of threads in the current region is 1. If, for // example, BLIS requested 4 threads but only got 3, then we // abort(). //if ( tid == 0 ) //{ if ( n_threads_real != 1 ) { bli_print_msg( "A different number of threads was " "created than was requested.", __FILE__, __LINE__ ); bli_abort(); } //n_threads = 1; // not needed since it has no effect? bli_thrcomm_init( 1, gl_comm ); bli_rntm_set_num_threads_only( 1, rntm ); bli_rntm_set_ways_only( 1, 1, 1, 1, 1, rntm ); //} // Synchronize all threads and continue. _Pragma( "omp barrier" ) } } #endif blis-0.6.1/frame/thread/bli_l3_decor_openmp.h000066400000000000000000000040211360743507500210660ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_L3_DECOR_OPENMP_H #define BLIS_L3_DECOR_OPENMP_H // Definitions specific to situations when OpenMP multithreading is enabled. #ifdef BLIS_ENABLE_OPENMP void bli_l3_thread_decorator_thread_check ( dim_t n_threads, dim_t tid, thrcomm_t* gl_comm, rntm_t* rntm ); #endif #endif blis-0.6.1/frame/thread/bli_l3_decor_pthreads.c000066400000000000000000000206611360743507500214050ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #ifdef BLIS_ENABLE_PTHREADS // A data structure to assist in passing operands to additional threads. typedef struct thread_data { l3int_t func; opid_t family; pack_t schema_a; pack_t schema_b; obj_t* alpha; obj_t* a; obj_t* b; obj_t* beta; obj_t* c; cntx_t* cntx; rntm_t* rntm; cntl_t* cntl; dim_t tid; thrcomm_t* gl_comm; array_t* array; } thread_data_t; // Entry point for additional threads void* bli_l3_thread_entry( void* data_void ) { thread_data_t* data = data_void; l3int_t func = data->func; opid_t family = data->family; pack_t schema_a = data->schema_a; pack_t schema_b = data->schema_b; obj_t* alpha = data->alpha; obj_t* a = data->a; obj_t* b = data->b; obj_t* beta = data->beta; obj_t* c = data->c; cntx_t* cntx = data->cntx; rntm_t* rntm = data->rntm; cntl_t* cntl = data->cntl; dim_t tid = data->tid; array_t* array = data->array; thrcomm_t* gl_comm = data->gl_comm; // Create a thread-local copy of the master thread's rntm_t. This is // necessary since we want each thread to be able to track its own // small block pool_t as it executes down the function stack. rntm_t rntm_l = *rntm; rntm_t* restrict rntm_p = &rntm_l; // Use the thread id to access the appropriate pool_t* within the // array_t, and use it to set the sba_pool field within the rntm_t. // If the pool_t* element within the array_t is NULL, it will first // be allocated/initialized. bli_sba_rntm_set_pool( tid, array, rntm_p ); obj_t a_t, b_t, c_t; cntl_t* cntl_use; thrinfo_t* thread; // Alias thread-local copies of A, B, and C. These will be the objects // we pass down the algorithmic function stack. Making thread-local // alaises is highly recommended in case a thread needs to change any // of the properties of an object without affecting other threads' // objects. bli_obj_alias_to( a, &a_t ); bli_obj_alias_to( b, &b_t ); bli_obj_alias_to( c, &c_t ); // Create a default control tree for the operation, if needed. bli_l3_cntl_create_if( family, schema_a, schema_b, &a_t, &b_t, &c_t, rntm_p, cntl, &cntl_use ); // Create the root node of the current thread's thrinfo_t structure. bli_l3_thrinfo_create_root( tid, gl_comm, rntm_p, cntl_use, &thread ); func ( alpha, &a_t, &b_t, beta, &c_t, cntx, rntm_p, cntl_use, thread ); // Free the thread's local control tree. bli_l3_cntl_free( rntm_p, cntl_use, thread ); // Free the current thread's thrinfo_t structure. bli_l3_thrinfo_free( rntm_p, thread ); return NULL; } void bli_l3_thread_decorator ( l3int_t func, opid_t family, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl ) { // This is part of a hack to support mixed domain in bli_gemm_front(). // Sometimes we need to specify a non-standard schema for A and B, and // we decided to transmit them via the schema field in the obj_t's // rather than pass them in as function parameters. Once the values // have been read, we immediately reset them back to their expected // values for unpacked objects. pack_t schema_a = bli_obj_pack_schema( a ); pack_t schema_b = bli_obj_pack_schema( b ); bli_obj_set_pack_schema( BLIS_NOT_PACKED, a ); bli_obj_set_pack_schema( BLIS_NOT_PACKED, b ); // Query the total number of threads from the context. const dim_t n_threads = bli_rntm_num_threads( rntm ); // NOTE: The sba was initialized in bli_init(). // Check out an array_t from the small block allocator. This is done // with an internal lock to ensure only one application thread accesses // the sba at a time. bli_sba_checkout_array() will also automatically // resize the array_t, if necessary. array_t* restrict array = bli_sba_checkout_array( n_threads ); // Access the pool_t* for thread 0 and embed it into the rntm. We do // this up-front only so that we have the rntm_t.sba_pool field // initialized and ready for the global communicator creation below. bli_sba_rntm_set_pool( 0, array, rntm ); // Set the packing block allocator field of the rntm. This will be // inherited by all of the child threads when they make local copies of // the rntm below. bli_membrk_rntm_set_membrk( rntm ); // Allocate a global communicator for the root thrinfo_t structures. thrcomm_t* restrict gl_comm = bli_thrcomm_create( rntm, n_threads ); // Allocate an array of pthread objects and auxiliary data structs to pass // to the thread entry functions. #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_l3_thread_decorator().pth: " ); #endif bli_pthread_t* pthreads = bli_malloc_intl( sizeof( bli_pthread_t ) * n_threads ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_l3_thread_decorator().pth: " ); #endif thread_data_t* datas = bli_malloc_intl( sizeof( thread_data_t ) * n_threads ); // NOTE: We must iterate backwards so that the chief thread (thread id 0) // can spawn all other threads before proceeding with its own computation. for ( dim_t tid = n_threads - 1; 0 <= tid; tid-- ) { // Set up thread data for additional threads (beyond thread 0). datas[tid].func = func; datas[tid].family = family; datas[tid].schema_a = schema_a; datas[tid].schema_b = schema_b; datas[tid].alpha = alpha; datas[tid].a = a; datas[tid].b = b; datas[tid].beta = beta; datas[tid].c = c; datas[tid].cntx = cntx; datas[tid].rntm = rntm; datas[tid].cntl = cntl; datas[tid].tid = tid; datas[tid].gl_comm = gl_comm; datas[tid].array = array; // Spawn additional threads for ids greater than 1. if ( tid != 0 ) bli_pthread_create( &pthreads[tid], NULL, &bli_l3_thread_entry, &datas[tid] ); else bli_l3_thread_entry( ( void* )(&datas[0]) ); } // We shouldn't free the global communicator since it was already freed // by the global communicator's chief thread in bli_l3_thrinfo_free() // (called from the thread entry function). // Thread 0 waits for additional threads to finish. for ( dim_t tid = 1; tid < n_threads; tid++ ) { bli_pthread_join( pthreads[tid], NULL ); } // Check the array_t back into the small block allocator. Similar to the // check-out, this is done using a lock embedded within the sba to ensure // mutual exclusion. bli_sba_checkin_array( array ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_l3_thread_decorator().pth: " ); #endif bli_free_intl( pthreads ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_l3_thread_decorator().pth: " ); #endif bli_free_intl( datas ); } #endif blis-0.6.1/frame/thread/bli_l3_decor_pthreads.h000066400000000000000000000036251360743507500214130ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_L3_DECOR_PTHREADS_H #define BLIS_L3_DECOR_PTHREADS_H // Definitions specific to situations when POSIX multithreading is enabled. #ifdef BLIS_ENABLE_PTHREADS // Thread entry point prototype. void* bli_l3_thread_entry( void* data_void ); #endif #endif blis-0.6.1/frame/thread/bli_l3_decor_single.c000066400000000000000000000126431360743507500210550ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #ifndef BLIS_ENABLE_MULTITHREADING void bli_l3_thread_decorator ( l3int_t func, opid_t family, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl ) { // This is part of a hack to support mixed domain in bli_gemm_front(). // Sometimes we need to specify a non-standard schema for A and B, and // we decided to transmit them via the schema field in the obj_t's // rather than pass them in as function parameters. Once the values // have been read, we immediately reset them back to their expected // values for unpacked objects. pack_t schema_a = bli_obj_pack_schema( a ); pack_t schema_b = bli_obj_pack_schema( b ); bli_obj_set_pack_schema( BLIS_NOT_PACKED, a ); bli_obj_set_pack_schema( BLIS_NOT_PACKED, b ); // For sequential execution, we use only one thread. const dim_t n_threads = 1; // NOTE: The sba was initialized in bli_init(). // Check out an array_t from the small block allocator. This is done // with an internal lock to ensure only one application thread accesses // the sba at a time. bli_sba_checkout_array() will also automatically // resize the array_t, if necessary. array_t* restrict array = bli_sba_checkout_array( n_threads ); // Access the pool_t* for thread 0 and embed it into the rntm. We do // this up-front only so that we can create the global comm below. bli_sba_rntm_set_pool( 0, array, rntm ); // Set the packing block allocator field of the rntm. bli_membrk_rntm_set_membrk( rntm ); // Allcoate a global communicator for the root thrinfo_t structures. thrcomm_t* restrict gl_comm = bli_thrcomm_create( rntm, n_threads ); { // NOTE: We don't need to create another copy of the rntm_t since // it was already copied in one of the high-level oapi functions. rntm_t* restrict rntm_p = rntm; cntl_t* cntl_use; thrinfo_t* thread; const dim_t tid = 0; // Use the thread id to access the appropriate pool_t* within the // array_t, and use it to set the sba_pool field within the rntm_t. // If the pool_t* element within the array_t is NULL, it will first // be allocated/initialized. // NOTE: This is commented out because, in the single-threaded case, // this is redundant since it's already been done above. //bli_sba_rntm_set_pool( tid, array, rntm_p ); // NOTE: Unlike with the _openmp.c and _pthreads.c variants, we don't // need to alias objects for A, B, and C since they were already aliased // in bli_*_front(). However, we may add aliasing here in the future so // that, with all three (_single.c, _openmp.c, _pthreads.c) implementations // consistently providing local aliases, we can then eliminate aliasing // elsewhere. // Create a default control tree for the operation, if needed. bli_l3_cntl_create_if( family, schema_a, schema_b, a, b, c, rntm_p, cntl, &cntl_use ); // Create the root node of the thread's thrinfo_t structure. bli_l3_thrinfo_create_root( tid, gl_comm, rntm_p, cntl_use, &thread ); func ( alpha, a, b, beta, c, cntx, rntm_p, cntl_use, thread ); // Free the thread's local control tree. bli_l3_cntl_free( rntm_p, cntl_use, thread ); // Free the current thread's thrinfo_t structure. bli_l3_thrinfo_free( rntm_p, thread ); } // We shouldn't free the global communicator since it was already freed // by the global communicator's chief thread in bli_l3_thrinfo_free() // (called above). // Check the array_t back into the small block allocator. Similar to the // check-out, this is done using a lock embedded within the sba to ensure // mutual exclusion. bli_sba_checkin_array( array ); } #endif blis-0.6.1/frame/thread/bli_l3_decor_single.h000066400000000000000000000035031360743507500210550ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_L3_DECOR_SINGLE_H #define BLIS_L3_DECOR_SINGLE_H // Definitions specific to situations when multithreading is disabled. #ifndef BLIS_ENABLE_MULTITHREADING #endif #endif blis-0.6.1/frame/thread/bli_l3_sup_decor.h000066400000000000000000000052521360743507500204060ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_L3_SUP_DECOR_H #define BLIS_L3_SUP_DECOR_H // -- sup definitions ---------------------------------------------------------- // Level-3 sup internal function type. typedef err_t (*l3supint_t) ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ); // Level-3 sup thread decorator prototype. err_t bli_l3_sup_thread_decorator ( l3supint_t func, opid_t family, //pack_t schema_a, //pack_t schema_b, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm ); // Include definitions specific to the method of multithreading for the // sup code path. #include "bli_l3_sup_decor_single.h" #include "bli_l3_sup_decor_openmp.h" #include "bli_l3_sup_decor_pthreads.h" #endif blis-0.6.1/frame/thread/bli_l3_sup_decor_openmp.c000066400000000000000000000141711360743507500217570ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #ifdef BLIS_ENABLE_OPENMP // Define a dummy function bli_l3_sup_thread_entry(), which is needed in the // pthreads version, so that when building Windows DLLs (with OpenMP enabled // or no multithreading) we don't risk having an unresolved symbol. //void* bli_l3_sup_thread_entry( void* data_void ) { return NULL; } err_t bli_l3_sup_thread_decorator ( l3supint_t func, opid_t family, //pack_t schema_a, //pack_t schema_b, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm ) { #if 0 return bli_gemmsup_int ( alpha, a, b, beta, c, cntx, rntm, 0 ); #else // This is part of a hack to support mixed domain in bli_gemm_front(). // Sometimes we need to specify a non-standard schema for A and B, and // we decided to transmit them via the schema field in the obj_t's // rather than pass them in as function parameters. Once the values // have been read, we immediately reset them back to their expected // values for unpacked objects. //pack_t schema_a = bli_obj_pack_schema( a ); //pack_t schema_b = bli_obj_pack_schema( b ); //bli_obj_set_pack_schema( BLIS_NOT_PACKED, a ); //bli_obj_set_pack_schema( BLIS_NOT_PACKED, b ); // For sequential execution, we use only one thread. const dim_t n_threads = 1; // NOTE: The sba was initialized in bli_init(). // Check out an array_t from the small block allocator. This is done // with an internal lock to ensure only one application thread accesses // the sba at a time. bli_sba_checkout_array() will also automatically // resize the array_t, if necessary. array_t* restrict array = bli_sba_checkout_array( n_threads ); // Access the pool_t* for thread 0 and embed it into the rntm. We do // this up-front only so that we can create the global comm below. bli_sba_rntm_set_pool( 0, array, rntm ); // Set the packing block allocator field of the rntm. bli_membrk_rntm_set_membrk( rntm ); #if 0 // Allcoate a global communicator for the root thrinfo_t structures. thrcomm_t* restrict gl_comm = bli_thrcomm_create( rntm, n_threads ); #endif { // NOTE: We don't need to create another copy of the rntm_t since // it was already copied in one of the high-level oapi functions. rntm_t* restrict rntm_p = rntm; cntl_t* cntl_use = NULL; //thrinfo_t* thread = NULL; thrinfo_t* thread = &BLIS_PACKM_SINGLE_THREADED; const dim_t tid = 0; // Use the thread id to access the appropriate pool_t* within the // array_t, and use it to set the sba_pool field within the rntm_t. // If the pool_t* element within the array_t is NULL, it will first // be allocated/initialized. // NOTE: This is commented out because, in the single-threaded case, // this is redundant since it's already been done above. //bli_sba_rntm_set_pool( tid, array, rntm_p ); // NOTE: Unlike with the _openmp.c and _pthreads.c variants, we don't // need to alias objects for A, B, and C since they were already aliased // in bli_*_front(). However, we may add aliasing here in the future so // that, with all three (_single.c, _openmp.c, _pthreads.c) implementations // consistently providing local aliases, we can then eliminate aliasing // elsewhere. // Create a default control tree for the operation, if needed. //bli_l3_cntl_create_if( family, schema_a, schema_b, // a, b, c, rntm_p, cntl, &cntl_use ); #if 0 cntl_use = bli_gemm_cntl_create( rntm_p, family, schema_a, schema_b ); // Create the root node of the thread's thrinfo_t structure. bli_l3_thrinfo_create_root( tid, gl_comm, rntm_p, cntl_use, &thread ); #endif ( void )tid; func ( alpha, a, b, beta, c, cntx, rntm_p, cntl_use, thread ); #if 0 // Free the thread's local control tree. //bli_l3_cntl_free( rntm_p, cntl_use, thread ); bli_gemm_cntl_free( rntm_p, cntl_use, thread ); // Free the current thread's thrinfo_t structure. bli_l3_thrinfo_free( rntm_p, thread ); #endif } // We shouldn't free the global communicator since it was already freed // by the global communicator's chief thread in bli_l3_thrinfo_free() // (called above). // Check the array_t back into the small block allocator. Similar to the // check-out, this is done using a lock embedded within the sba to ensure // mutual exclusion. bli_sba_checkin_array( array ); return BLIS_SUCCESS; #endif } #endif blis-0.6.1/frame/thread/bli_l3_sup_decor_openmp.h000066400000000000000000000035101360743507500217570ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_L3_SUP_DECOR_OPENMP_H #define BLIS_L3_SUP_DECOR_OPENMP_H // Definitions specific to situations when OpenMP multithreading is enabled. #ifdef BLIS_ENABLE_OPENMP #endif #endif blis-0.6.1/frame/thread/bli_l3_sup_decor_pthreads.c000066400000000000000000000135261360743507500222760ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #ifdef BLIS_ENABLE_PTHREADS err_t bli_l3_sup_thread_decorator ( l3supint_t func, opid_t family, //pack_t schema_a, //pack_t schema_b, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm ) { #if 0 return bli_gemmsup_int ( alpha, a, b, beta, c, cntx, rntm, 0 ); #else // This is part of a hack to support mixed domain in bli_gemm_front(). // Sometimes we need to specify a non-standard schema for A and B, and // we decided to transmit them via the schema field in the obj_t's // rather than pass them in as function parameters. Once the values // have been read, we immediately reset them back to their expected // values for unpacked objects. //pack_t schema_a = bli_obj_pack_schema( a ); //pack_t schema_b = bli_obj_pack_schema( b ); //bli_obj_set_pack_schema( BLIS_NOT_PACKED, a ); //bli_obj_set_pack_schema( BLIS_NOT_PACKED, b ); // For sequential execution, we use only one thread. const dim_t n_threads = 1; // NOTE: The sba was initialized in bli_init(). // Check out an array_t from the small block allocator. This is done // with an internal lock to ensure only one application thread accesses // the sba at a time. bli_sba_checkout_array() will also automatically // resize the array_t, if necessary. array_t* restrict array = bli_sba_checkout_array( n_threads ); // Access the pool_t* for thread 0 and embed it into the rntm. We do // this up-front only so that we can create the global comm below. bli_sba_rntm_set_pool( 0, array, rntm ); // Set the packing block allocator field of the rntm. bli_membrk_rntm_set_membrk( rntm ); #if 0 // Allcoate a global communicator for the root thrinfo_t structures. thrcomm_t* restrict gl_comm = bli_thrcomm_create( rntm, n_threads ); #endif { // NOTE: We don't need to create another copy of the rntm_t since // it was already copied in one of the high-level oapi functions. rntm_t* restrict rntm_p = rntm; cntl_t* cntl_use = NULL; //thrinfo_t* thread = NULL; thrinfo_t* thread = &BLIS_PACKM_SINGLE_THREADED; const dim_t tid = 0; // Use the thread id to access the appropriate pool_t* within the // array_t, and use it to set the sba_pool field within the rntm_t. // If the pool_t* element within the array_t is NULL, it will first // be allocated/initialized. // NOTE: This is commented out because, in the single-threaded case, // this is redundant since it's already been done above. //bli_sba_rntm_set_pool( tid, array, rntm_p ); // NOTE: Unlike with the _openmp.c and _pthreads.c variants, we don't // need to alias objects for A, B, and C since they were already aliased // in bli_*_front(). However, we may add aliasing here in the future so // that, with all three (_single.c, _openmp.c, _pthreads.c) implementations // consistently providing local aliases, we can then eliminate aliasing // elsewhere. // Create a default control tree for the operation, if needed. //bli_l3_cntl_create_if( family, schema_a, schema_b, // a, b, c, rntm_p, cntl, &cntl_use ); #if 0 cntl_use = bli_gemm_cntl_create( rntm_p, family, schema_a, schema_b ); // Create the root node of the thread's thrinfo_t structure. bli_l3_thrinfo_create_root( tid, gl_comm, rntm_p, cntl_use, &thread ); #endif ( void )tid; func ( alpha, a, b, beta, c, cntx, rntm_p, cntl_use, thread ); #if 0 // Free the thread's local control tree. //bli_l3_cntl_free( rntm_p, cntl_use, thread ); bli_gemm_cntl_free( rntm_p, cntl_use, thread ); // Free the current thread's thrinfo_t structure. bli_l3_thrinfo_free( rntm_p, thread ); #endif } // We shouldn't free the global communicator since it was already freed // by the global communicator's chief thread in bli_l3_thrinfo_free() // (called above). // Check the array_t back into the small block allocator. Similar to the // check-out, this is done using a lock embedded within the sba to ensure // mutual exclusion. bli_sba_checkin_array( array ); return BLIS_SUCCESS; #endif } #endif blis-0.6.1/frame/thread/bli_l3_sup_decor_pthreads.h000066400000000000000000000036411360743507500223000ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_L3_SUP_DECOR_PTHREADS_H #define BLIS_L3_SUP_DECOR_PTHREADS_H // Definitions specific to situations when POSIX multithreading is enabled. #ifdef BLIS_ENABLE_PTHREADS // Thread entry point prototype. void* bli_l3_sup_thread_entry( void* data_void ); #endif #endif blis-0.6.1/frame/thread/bli_l3_sup_decor_single.c000066400000000000000000000135351360743507500217450ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #ifndef BLIS_ENABLE_MULTITHREADING err_t bli_l3_sup_thread_decorator ( l3supint_t func, opid_t family, //pack_t schema_a, //pack_t schema_b, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm ) { #if 0 return bli_gemmsup_int ( alpha, a, b, beta, c, cntx, rntm, 0 ); #else // This is part of a hack to support mixed domain in bli_gemm_front(). // Sometimes we need to specify a non-standard schema for A and B, and // we decided to transmit them via the schema field in the obj_t's // rather than pass them in as function parameters. Once the values // have been read, we immediately reset them back to their expected // values for unpacked objects. //pack_t schema_a = bli_obj_pack_schema( a ); //pack_t schema_b = bli_obj_pack_schema( b ); //bli_obj_set_pack_schema( BLIS_NOT_PACKED, a ); //bli_obj_set_pack_schema( BLIS_NOT_PACKED, b ); // For sequential execution, we use only one thread. const dim_t n_threads = 1; // NOTE: The sba was initialized in bli_init(). // Check out an array_t from the small block allocator. This is done // with an internal lock to ensure only one application thread accesses // the sba at a time. bli_sba_checkout_array() will also automatically // resize the array_t, if necessary. array_t* restrict array = bli_sba_checkout_array( n_threads ); // Access the pool_t* for thread 0 and embed it into the rntm. We do // this up-front only so that we can create the global comm below. bli_sba_rntm_set_pool( 0, array, rntm ); // Set the packing block allocator field of the rntm. bli_membrk_rntm_set_membrk( rntm ); #if 0 // Allcoate a global communicator for the root thrinfo_t structures. thrcomm_t* restrict gl_comm = bli_thrcomm_create( rntm, n_threads ); #endif { // NOTE: We don't need to create another copy of the rntm_t since // it was already copied in one of the high-level oapi functions. rntm_t* restrict rntm_p = rntm; cntl_t* cntl_use = NULL; //thrinfo_t* thread = NULL; thrinfo_t* thread = &BLIS_PACKM_SINGLE_THREADED; const dim_t tid = 0; // Use the thread id to access the appropriate pool_t* within the // array_t, and use it to set the sba_pool field within the rntm_t. // If the pool_t* element within the array_t is NULL, it will first // be allocated/initialized. // NOTE: This is commented out because, in the single-threaded case, // this is redundant since it's already been done above. //bli_sba_rntm_set_pool( tid, array, rntm_p ); // NOTE: Unlike with the _openmp.c and _pthreads.c variants, we don't // need to alias objects for A, B, and C since they were already aliased // in bli_*_front(). However, we may add aliasing here in the future so // that, with all three (_single.c, _openmp.c, _pthreads.c) implementations // consistently providing local aliases, we can then eliminate aliasing // elsewhere. // Create a default control tree for the operation, if needed. //bli_l3_cntl_create_if( family, schema_a, schema_b, // a, b, c, rntm_p, cntl, &cntl_use ); #if 0 cntl_use = bli_gemm_cntl_create( rntm_p, family, schema_a, schema_b ); // Create the root node of the thread's thrinfo_t structure. bli_l3_thrinfo_create_root( tid, gl_comm, rntm_p, cntl_use, &thread ); #endif ( void )tid; func ( alpha, a, b, beta, c, cntx, rntm_p, cntl_use, thread ); #if 0 // Free the thread's local control tree. //bli_l3_cntl_free( rntm_p, cntl_use, thread ); bli_gemm_cntl_free( rntm_p, cntl_use, thread ); // Free the current thread's thrinfo_t structure. bli_l3_thrinfo_free( rntm_p, thread ); #endif } // We shouldn't free the global communicator since it was already freed // by the global communicator's chief thread in bli_l3_thrinfo_free() // (called above). // Check the array_t back into the small block allocator. Similar to the // check-out, this is done using a lock embedded within the sba to ensure // mutual exclusion. bli_sba_checkin_array( array ); return BLIS_SUCCESS; #endif } #endif blis-0.6.1/frame/thread/bli_l3_sup_decor_single.h000066400000000000000000000035131360743507500217450ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_L3_SUP_DECOR_SINGLE_H #define BLIS_L3_SUP_DECOR_SINGLE_H // Definitions specific to situations when multithreading is disabled. #ifndef BLIS_ENABLE_MULTITHREADING #endif #endif blis-0.6.1/frame/thread/bli_pthread.c000066400000000000000000000223111360743507500174420ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2018, Southern Methodist University Copyright (C) 2018, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include #if defined(_MSC_VER) // This branch defines a pthread-like API, bli_pthread_*(), and implements it // in terms of Windows API calls. int bli_pthread_mutex_init ( bli_pthread_mutex_t* mutex, const bli_pthread_mutexattr_t* attr ) { if ( attr ) return EINVAL; InitializeSRWLock( mutex ); return 0; } int bli_pthread_mutex_destroy ( bli_pthread_mutex_t* mutex ) { return 0; } int bli_pthread_mutex_lock ( bli_pthread_mutex_t* mutex ) { AcquireSRWLockExclusive( mutex ); return 0; } int bli_pthread_mutex_trylock ( bli_pthread_mutex_t* mutex ) { return TryAcquireSRWLockExclusive( mutex ) ? 0 : EBUSY; } int bli_pthread_mutex_unlock ( bli_pthread_mutex_t* mutex ) { ReleaseSRWLockExclusive( mutex ); return 0; } static BOOL bli_init_once_wrapper ( bli_pthread_once_t* once, void* param, void** context ) { ( void )once; ( void )context; typedef void (*callback)( void ); ((callback)param)(); return TRUE; } void bli_pthread_once ( bli_pthread_once_t* once, void (*init)(void) ) { InitOnceExecuteOnce( once, bli_init_once_wrapper, init, NULL ); } int bli_pthread_cond_init ( bli_pthread_cond_t* cond, const bli_pthread_condattr_t* attr ) { if ( attr ) return EINVAL; InitializeConditionVariable( cond ); return 0; } int bli_pthread_cond_destroy ( bli_pthread_cond_t* cond ) { ( void )cond; return 0; } int bli_pthread_cond_wait ( bli_pthread_cond_t* cond, bli_pthread_mutex_t* mutex ) { if ( !SleepConditionVariableSRW( cond, mutex, INFINITE, 0 ) ) return EAGAIN; return 0; } int bli_pthread_cond_broadcast ( bli_pthread_cond_t* cond ) { WakeAllConditionVariable( cond ); return 0; } typedef struct { void* (*start_routine)( void* ); void* param; void** retval; } bli_thread_param; static DWORD bli_thread_func ( void* param_ ) { bli_thread_param* param = param_; *param->retval = param->start_routine( param->param ); return 0; } int bli_pthread_create ( bli_pthread_t* thread, const bli_pthread_attr_t* attr, void* (*start_routine)(void*), void* arg ) { if ( attr ) return EINVAL; bli_thread_param param = { start_routine, arg, &thread->retval }; thread->handle = CreateThread( NULL, 0, bli_thread_func, ¶m, 0, NULL ); if ( !thread->handle ) return EAGAIN; return 0; } int bli_pthread_join ( bli_pthread_t thread, void** retval ) { if ( !WaitForSingleObject( thread.handle, INFINITE ) ) return EAGAIN; if ( retval ) *retval = thread.retval; return 0; } #else // !defined(_MSC_VER) // This branch defines a pthreads-like API, bli_pthreads_*(), and implements it // in terms of the corresponding pthreads_*() types, macros, and function calls. // This branch is compiled for Linux and other non-Windows environments where // we assume that *some* implementation of pthreads is provided (although it // may lack barriers--see below). // -- pthread_create(), pthread_join() -- int bli_pthread_create ( bli_pthread_t* thread, const bli_pthread_attr_t* attr, void* (*start_routine)(void*), void* arg ) { return pthread_create( thread, attr, start_routine, arg ); } int bli_pthread_join ( bli_pthread_t thread, void** retval ) { return pthread_join( thread, retval ); } // -- pthread_mutex_*() -- int bli_pthread_mutex_init ( bli_pthread_mutex_t* mutex, const bli_pthread_mutexattr_t* attr ) { return pthread_mutex_init( mutex, attr ); } int bli_pthread_mutex_destroy ( bli_pthread_mutex_t* mutex ) { return pthread_mutex_destroy( mutex ); } int bli_pthread_mutex_lock ( bli_pthread_mutex_t* mutex ) { return pthread_mutex_lock( mutex ); } int bli_pthread_mutex_trylock ( bli_pthread_mutex_t* mutex ) { return pthread_mutex_trylock( mutex ); } int bli_pthread_mutex_unlock ( bli_pthread_mutex_t* mutex ) { return pthread_mutex_unlock( mutex ); } // -- pthread_cond_*() -- int bli_pthread_cond_init ( bli_pthread_cond_t* cond, const bli_pthread_condattr_t* attr ) { return pthread_cond_init( cond, attr ); } int bli_pthread_cond_destroy ( bli_pthread_cond_t* cond ) { return pthread_cond_destroy( cond ); } int bli_pthread_cond_wait ( bli_pthread_cond_t* cond, bli_pthread_mutex_t* mutex ) { return pthread_cond_wait( cond, mutex ); } int bli_pthread_cond_broadcast ( bli_pthread_cond_t* cond ) { return pthread_cond_broadcast( cond ); } // -- pthread_once() -- void bli_pthread_once ( bli_pthread_once_t* once, void (*init)(void) ) { pthread_once( once, init ); } #endif // _MSC_VER // -- pthread_barrier_*() -- #if defined(__APPLE__) || defined(_MSC_VER) // For OS X and Windows, we define barriers ourselves in terms of the rest // of the API, though for slightly different reasons: For Windows, we must // define barriers because we are defining *everything* from scratch. For // OS X, we must define barriers because Apple chose to omit barriers from // their implementation of POSIX threads (since barriers are actually // optional to the POSIX standard). int bli_pthread_barrier_init ( bli_pthread_barrier_t* barrier, const bli_pthread_barrierattr_t* attr, unsigned int count ) { if ( attr ) return EINVAL; if ( count == 0 ) return EINVAL; int err; if ( (err = bli_pthread_mutex_init( &barrier->mutex, 0 )) != 0 ) return err; if ( (err = bli_pthread_cond_init( &barrier->cond, 0 )) != 0 ) { bli_pthread_mutex_destroy( &barrier->mutex ); return err; } barrier->tripCount = count; barrier->count = 0; return 0; } int bli_pthread_barrier_destroy ( bli_pthread_barrier_t *barrier ) { bli_pthread_cond_destroy( &barrier->cond ); bli_pthread_mutex_destroy( &barrier->mutex ); return 0; } int bli_pthread_barrier_wait ( bli_pthread_barrier_t *barrier ) { bli_pthread_mutex_lock( &barrier->mutex ); ++(barrier->count); if ( barrier->count >= barrier->tripCount ) { barrier->count = 0; bli_pthread_cond_broadcast( &barrier->cond ); bli_pthread_mutex_unlock( &barrier->mutex ); return 1; } else { bli_pthread_cond_wait( &barrier->cond, &(barrier->mutex) ); bli_pthread_mutex_unlock( &barrier->mutex ); return 0; } } #else // !( defined(__APPLE__) || defined(_MSC_VER) ) // Linux environments implement the pthread_barrier* sub-API. So, if we're // on Linux, we can simply call those functions, just as we did before for // the other functions. int bli_pthread_barrier_init ( bli_pthread_barrier_t* barrier, const bli_pthread_barrierattr_t* attr, unsigned int count ) { return pthread_barrier_init( barrier, attr, count ); } int bli_pthread_barrier_destroy ( bli_pthread_barrier_t* barrier ) { return pthread_barrier_destroy( barrier ); } int bli_pthread_barrier_wait ( bli_pthread_barrier_t* barrier ) { return pthread_barrier_wait( barrier ); } #endif // defined(__APPLE__) || defined(_MSC_VER) blis-0.6.1/frame/thread/bli_pthread.h000066400000000000000000000200001360743507500174400ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2018, Southern Methodist University Copyright (C) 2018, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_PTHREAD_H #define BLIS_PTHREAD_H #if defined(_MSC_VER) // This branch defines a pthread-like API, bli_pthread_*(), and implements it // in terms of Windows API calls. // -- pthread_mutex_*() -- typedef SRWLOCK bli_pthread_mutex_t; typedef void bli_pthread_mutexattr_t; #define BLIS_PTHREAD_MUTEX_INITIALIZER SRWLOCK_INIT BLIS_EXPORT_BLIS int bli_pthread_mutex_init ( bli_pthread_mutex_t* mutex, const bli_pthread_mutexattr_t* attr ); BLIS_EXPORT_BLIS int bli_pthread_mutex_destroy ( bli_pthread_mutex_t* mutex ); BLIS_EXPORT_BLIS int bli_pthread_mutex_lock ( bli_pthread_mutex_t* mutex ); BLIS_EXPORT_BLIS int bli_pthread_mutex_trylock ( bli_pthread_mutex_t* mutex ); BLIS_EXPORT_BLIS int bli_pthread_mutex_unlock ( bli_pthread_mutex_t* mutex ); // -- pthread_once_*() -- typedef INIT_ONCE bli_pthread_once_t; #define BLIS_PTHREAD_ONCE_INIT INIT_ONCE_STATIC_INIT BLIS_EXPORT_BLIS void bli_pthread_once ( bli_pthread_once_t* once, void (*init)(void) ); // -- pthread_cond_*() -- typedef CONDITION_VARIABLE bli_pthread_cond_t; typedef void bli_pthread_condattr_t; #define BLIS_PTHREAD_COND_INITIALIZER CONDITION_VARIABLE_INIT BLIS_EXPORT_BLIS int bli_pthread_cond_init ( bli_pthread_cond_t* cond, const bli_pthread_condattr_t* attr ); BLIS_EXPORT_BLIS int bli_pthread_cond_destroy ( bli_pthread_cond_t* cond ); BLIS_EXPORT_BLIS int bli_pthread_cond_wait ( bli_pthread_cond_t* cond, bli_pthread_mutex_t* mutex ); BLIS_EXPORT_BLIS int bli_pthread_cond_broadcast ( bli_pthread_cond_t* cond ); // -- pthread_create(), pthread_join() -- typedef struct { HANDLE handle; void* retval; } bli_pthread_t; typedef void bli_pthread_attr_t; BLIS_EXPORT_BLIS int bli_pthread_create ( bli_pthread_t* thread, const bli_pthread_attr_t* attr, void* (*start_routine)(void*), void* arg ); BLIS_EXPORT_BLIS int bli_pthread_join ( bli_pthread_t thread, void** retval ); // -- pthread_barrier_*() -- typedef void bli_pthread_barrierattr_t; typedef struct { bli_pthread_mutex_t mutex; bli_pthread_cond_t cond; int count; int tripCount; } bli_pthread_barrier_t; BLIS_EXPORT_BLIS int bli_pthread_barrier_init ( bli_pthread_barrier_t* barrier, const bli_pthread_barrierattr_t* attr, unsigned int count ); BLIS_EXPORT_BLIS int bli_pthread_barrier_destroy ( bli_pthread_barrier_t* barrier ); BLIS_EXPORT_BLIS int bli_pthread_barrier_wait ( bli_pthread_barrier_t* barrier ); #else // !defined(_MSC_VER) #include // This branch defines a pthreads-like API, bli_pthreads_*(), and implements it // in terms of the corresponding pthreads_*() types, macros, and function calls. // -- pthread types -- typedef pthread_t bli_pthread_t; typedef pthread_attr_t bli_pthread_attr_t; typedef pthread_mutex_t bli_pthread_mutex_t; typedef pthread_mutexattr_t bli_pthread_mutexattr_t; typedef pthread_cond_t bli_pthread_cond_t; typedef pthread_condattr_t bli_pthread_condattr_t; typedef pthread_once_t bli_pthread_once_t; #if defined(__APPLE__) // For OS X, we must define the barrier types ourselves since Apple does // not implement barriers in their variant of pthreads. typedef void bli_pthread_barrierattr_t; typedef struct { bli_pthread_mutex_t mutex; bli_pthread_cond_t cond; int count; int tripCount; } bli_pthread_barrier_t; #else // For other non-Windows OSes (primarily Linux), we can define the barrier // types in terms of existing pthreads barrier types since we expect they // will be provided by the pthreads implementation. typedef pthread_barrier_t bli_pthread_barrier_t; typedef pthread_barrierattr_t bli_pthread_barrierattr_t; #endif // -- pthreads macros -- #define BLIS_PTHREAD_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER #define BLIS_PTHREAD_COND_INITIALIZER PTHREAD_COND_INITIALIZER #define BLIS_PTHREAD_ONCE_INIT PTHREAD_ONCE_INIT // -- pthread_create(), pthread_join() -- BLIS_EXPORT_BLIS int bli_pthread_create ( bli_pthread_t* thread, const bli_pthread_attr_t* attr, void* (*start_routine)(void*), void* arg ); BLIS_EXPORT_BLIS int bli_pthread_join ( bli_pthread_t thread, void** retval ); // -- pthread_mutex_*() -- BLIS_EXPORT_BLIS int bli_pthread_mutex_init ( bli_pthread_mutex_t* mutex, const bli_pthread_mutexattr_t* attr ); BLIS_EXPORT_BLIS int bli_pthread_mutex_destroy ( bli_pthread_mutex_t* mutex ); BLIS_EXPORT_BLIS int bli_pthread_mutex_lock ( bli_pthread_mutex_t* mutex ); BLIS_EXPORT_BLIS int bli_pthread_mutex_trylock ( bli_pthread_mutex_t* mutex ); BLIS_EXPORT_BLIS int bli_pthread_mutex_unlock ( bli_pthread_mutex_t* mutex ); // -- pthread_cond_*() -- BLIS_EXPORT_BLIS int bli_pthread_cond_init ( bli_pthread_cond_t* cond, const bli_pthread_condattr_t* attr ); BLIS_EXPORT_BLIS int bli_pthread_cond_destroy ( bli_pthread_cond_t* cond ); BLIS_EXPORT_BLIS int bli_pthread_cond_wait ( bli_pthread_cond_t* cond, bli_pthread_mutex_t* mutex ); BLIS_EXPORT_BLIS int bli_pthread_cond_broadcast ( bli_pthread_cond_t* cond ); // -- pthread_once_*() -- BLIS_EXPORT_BLIS void bli_pthread_once ( bli_pthread_once_t* once, void (*init)(void) ); // -- pthread_barrier_*() -- BLIS_EXPORT_BLIS int bli_pthread_barrier_init ( bli_pthread_barrier_t* barrier, const bli_pthread_barrierattr_t* attr, unsigned int count ); BLIS_EXPORT_BLIS int bli_pthread_barrier_destroy ( bli_pthread_barrier_t* barrier ); BLIS_EXPORT_BLIS int bli_pthread_barrier_wait ( bli_pthread_barrier_t* barrier ); #endif // _MSC_VER #endif // BLIS_PTHREAD_H blis-0.6.1/frame/thread/bli_thrcomm.c000066400000000000000000000111711360743507500174660ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void* bli_thrcomm_bcast ( dim_t id, void* to_send, thrcomm_t* comm ) { if ( comm == NULL || comm->n_threads == 1 ) return to_send; if ( id == 0 ) comm->sent_object = to_send; bli_thrcomm_barrier( id, comm ); void* object = comm->sent_object; bli_thrcomm_barrier( id, comm ); return object; } // Use __sync_* builtins (assumed available) if __atomic_* ones are not present. #ifndef __ATOMIC_RELAXED #define __ATOMIC_RELAXED #define __ATOMIC_ACQUIRE #define __ATOMIC_RELEASE #define __ATOMIC_ACQ_REL #define __atomic_load_n(ptr, constraint) \ __sync_fetch_and_add(ptr, 0) #define __atomic_add_fetch(ptr, value, constraint) \ __sync_add_and_fetch(ptr, value) #define __atomic_fetch_add(ptr, value, constraint) \ __sync_fetch_and_add(ptr, value) #define __atomic_fetch_xor(ptr, value, constraint) \ __sync_fetch_and_xor(ptr, value) #endif void bli_thrcomm_barrier_atomic( dim_t t_id, thrcomm_t* comm ) { // Return early if the comm is NULL or if there is only one // thread participating. if ( comm == NULL || comm->n_threads == 1 ) return; // Read the "sense" variable. This variable is akin to a unique ID for // the current barrier. The first n-1 threads will spin on this variable // until it changes. The sense variable gets incremented by the last // thread to enter the barrier, just before it exits. But it turns out // that you don't need many unique IDs before you can wrap around. In // fact, if everything else is working, a binary variable is sufficient, // which is what we do here (i.e., 0 is incremented to 1, which is then // decremented back to 0, and so forth). bool_t orig_sense = __atomic_load_n( &comm->barrier_sense, __ATOMIC_RELAXED ); // Register ourselves (the current thread) as having arrived by // incrementing the barrier_threads_arrived variable. We must perform // this increment (and a subsequent read) atomically. dim_t my_threads_arrived = __atomic_add_fetch( &comm->barrier_threads_arrived, 1, __ATOMIC_ACQ_REL ); // If the current thread was the last thread to have arrived, then // it will take actions that effectively ends and resets the barrier. if ( my_threads_arrived == comm->n_threads ) { // Reset the variable tracking the number of threads that have arrived // to zero (which returns the barrier to the "empty" state. Then // atomically toggle the barrier sense variable. This will signal to // the other threads (which are spinning in the branch elow) that it // is now safe to exit the barrier. comm->barrier_threads_arrived = 0; __atomic_fetch_xor( &comm->barrier_sense, 1, __ATOMIC_RELEASE ); } else { // If the current thread is NOT the last thread to have arrived, then // it spins on the sense variable until that sense variable changes at // which time these threads will exit the barrier. while ( __atomic_load_n( &comm->barrier_sense, __ATOMIC_ACQUIRE ) == orig_sense ) ; // Empty loop body. } } blis-0.6.1/frame/thread/bli_thrcomm.h000066400000000000000000000050761360743507500175020ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_THRCOMM_H #define BLIS_THRCOMM_H // Include definitions (mostly thrcomm_t) specific to the method of // multithreading. #include "bli_thrcomm_single.h" #include "bli_thrcomm_openmp.h" #include "bli_thrcomm_pthreads.h" // thrcomm_t query (field only) static dim_t bli_thrcomm_num_threads( thrcomm_t* comm ) { return comm->n_threads; } // Thread communicator prototypes. thrcomm_t* bli_thrcomm_create( rntm_t* rntm, dim_t n_threads ); void bli_thrcomm_free( rntm_t* rntm, thrcomm_t* comm ); void bli_thrcomm_init( dim_t n_threads, thrcomm_t* comm ); void bli_thrcomm_cleanup( thrcomm_t* comm ); BLIS_EXPORT_BLIS void bli_thrcomm_barrier( dim_t thread_id, thrcomm_t* comm ); BLIS_EXPORT_BLIS void* bli_thrcomm_bcast( dim_t inside_id, void* to_send, thrcomm_t* comm ); void bli_thrcomm_barrier_atomic( dim_t thread_id, thrcomm_t* comm ); #endif blis-0.6.1/frame/thread/bli_thrcomm_openmp.c000066400000000000000000000126541360743507500210530ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #ifdef BLIS_ENABLE_OPENMP thrcomm_t* bli_thrcomm_create( rntm_t* rntm, dim_t n_threads ) { #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_thrcomm_create(): " ); #endif thrcomm_t* comm = bli_sba_acquire( rntm, sizeof(thrcomm_t) ); bli_thrcomm_init( n_threads, comm ); return comm; } void bli_thrcomm_free( rntm_t* rntm, thrcomm_t* comm ) { if ( comm == NULL ) return; bli_thrcomm_cleanup( comm ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_thrcomm_free(): " ); #endif bli_sba_release( rntm, comm ); } #ifndef BLIS_TREE_BARRIER void bli_thrcomm_init( dim_t n_threads, thrcomm_t* comm ) { if ( comm == NULL ) return; comm->sent_object = NULL; comm->n_threads = n_threads; comm->barrier_sense = 0; comm->barrier_threads_arrived = 0; } void bli_thrcomm_cleanup( thrcomm_t* comm ) { if ( comm == NULL ) return; } //'Normal' barrier for openmp //barrier routine taken from art of multicore programming void bli_thrcomm_barrier( dim_t t_id, thrcomm_t* comm ) { #if 0 if ( comm == NULL || comm->n_threads == 1 ) return; bool_t my_sense = comm->barrier_sense; dim_t my_threads_arrived; _Pragma( "omp atomic capture" ) my_threads_arrived = ++(comm->barrier_threads_arrived); if ( my_threads_arrived == comm->n_threads ) { comm->barrier_threads_arrived = 0; comm->barrier_sense = !comm->barrier_sense; } else { volatile bool_t* listener = &comm->barrier_sense; while ( *listener == my_sense ) {} } #endif bli_thrcomm_barrier_atomic( t_id, comm ); } #else void bli_thrcomm_init( dim_t n_threads, thrcomm_t* comm ) { if ( comm == NULL ) return; comm->sent_object = NULL; comm->n_threads = n_threads; comm->barriers = bli_malloc_intl( sizeof( barrier_t* ) * n_threads ); bli_thrcomm_tree_barrier_create( n_threads, BLIS_TREE_BARRIER_ARITY, comm->barriers, 0 ); } //Tree barrier used for Intel Xeon Phi barrier_t* bli_thrcomm_tree_barrier_create( int num_threads, int arity, barrier_t** leaves, int leaf_index ) { barrier_t* me = bli_malloc_intl( sizeof(barrier_t) ); me->dad = NULL; me->signal = 0; // Base Case if ( num_threads <= arity ) { //Now must be registered as a leaf for ( int i = 0; i < num_threads; i++ ) { leaves[ leaf_index + i ] = me; } me->count = num_threads; me->arity = num_threads; } else { // Otherwise this node has children int threads_per_kid = num_threads / arity; int defecit = num_threads - threads_per_kid * arity; for ( int i = 0; i < arity; i++ ) { int threads_this_kid = threads_per_kid; if ( i < defecit ) threads_this_kid++; barrier_t* kid = bli_thrcomm_tree_barrier_create( threads_this_kid, arity, leaves, leaf_index ); kid->dad = me; leaf_index += threads_this_kid; } me->count = arity; me->arity = arity; } return me; } void bli_thrcomm_cleanup( thrcomm_t* comm ) { if ( comm == NULL ) return; for ( dim_t i = 0; i < comm->n_threads; i++ ) { bli_thrcomm_tree_barrier_free( comm->barriers[i] ); } bli_free_intl( comm->barriers ); } void bli_thrcomm_tree_barrier_free( barrier_t* barrier ) { if ( barrier == NULL ) return; barrier->count--; if ( barrier->count == 0 ) { bli_thrcomm_tree_barrier_free( barrier->dad ); bli_free_intl( barrier ); } return; } void bli_thrcomm_barrier( dim_t t_id, thrcomm_t* comm ) { bli_thrcomm_tree_barrier( comm->barriers[t_id] ); } void bli_thrcomm_tree_barrier( barrier_t* barack ) { int my_signal = barack->signal; int my_count; _Pragma( "omp atomic capture" ) my_count = barack->count--; if ( my_count == 1 ) { if ( barack->dad != NULL ) { bli_thrcomm_tree_barrier( barack->dad ); } barack->count = barack->arity; barack->signal = !barack->signal; } else { volatile int* listener = &barack->signal; while ( *listener == my_signal ) {} } } #endif #endif blis-0.6.1/frame/thread/bli_thrcomm_openmp.h000066400000000000000000000053511360743507500210540ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_THRCOMM_OPENMP_H #define BLIS_THRCOMM_OPENMP_H // Define thrcomm_t for situations when OpenMP multithreading is enabled. #ifdef BLIS_ENABLE_OPENMP #include // Define thrcomm_t for tree barriers and non-tree barriers. #ifdef BLIS_TREE_BARRIER struct barrier_s { int arity; int count; struct barrier_s* dad; volatile int signal; }; typedef struct barrier_s barrier_t; struct thrcomm_s { void* sent_object; dim_t n_threads; barrier_t** barriers; }; #else struct thrcomm_s { void* sent_object; dim_t n_threads; //volatile bool_t barrier_sense; bool_t barrier_sense; dim_t barrier_threads_arrived; }; #endif typedef struct thrcomm_s thrcomm_t; // Prototypes specific to tree barriers. #ifdef BLIS_TREE_BARRIER barrier_t* bli_thrcomm_tree_barrier_create( int num_threads, int arity, barrier_t** leaves, int leaf_index ); void bli_thrcomm_tree_barrier_free( barrier_t* barrier ); void bli_thrcomm_tree_barrier( barrier_t* barack ); #endif #endif #endif blis-0.6.1/frame/thread/bli_thrcomm_pthreads.c000066400000000000000000000075231360743507500213660ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #ifdef BLIS_ENABLE_PTHREADS thrcomm_t* bli_thrcomm_create( rntm_t* rntm, dim_t n_threads ) { #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_thrcomm_create(): " ); #endif thrcomm_t* comm = bli_sba_acquire( rntm, sizeof(thrcomm_t) ); bli_thrcomm_init( n_threads, comm ); return comm; } void bli_thrcomm_free( rntm_t* rntm, thrcomm_t* comm ) { if ( comm == NULL ) return; bli_thrcomm_cleanup( comm ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_thrcomm_free(): " ); #endif bli_sba_release( rntm, comm ); } #ifdef BLIS_USE_PTHREAD_BARRIER void bli_thrcomm_init( dim_t n_threads, thrcomm_t* comm ) { if ( comm == NULL ) return; comm->sent_object = NULL; comm->n_threads = n_threads; bli_pthread_barrier_init( &comm->barrier, NULL, n_threads ); } void bli_thrcomm_cleanup( thrcomm_t* comm ) { if ( comm == NULL ) return; bli_pthread_barrier_destroy( &comm->barrier ); } void bli_thrcomm_barrier( dim_t t_id, thrcomm_t* comm ) { bli_pthread_barrier_wait( &comm->barrier ); } #else void bli_thrcomm_init( dim_t n_threads, thrcomm_t* comm ) { if ( comm == NULL ) return; comm->sent_object = NULL; comm->n_threads = n_threads; comm->barrier_sense = 0; comm->barrier_threads_arrived = 0; //#ifdef BLIS_USE_PTHREAD_MUTEX // bli_pthread_mutex_init( &comm->mutex, NULL ); //#endif } void bli_thrcomm_cleanup( thrcomm_t* comm ) { //#ifdef BLIS_USE_PTHREAD_MUTEX // if ( comm == NULL ) return; // bli_pthread_mutex_destroy( &comm->mutex ); //#endif } void bli_thrcomm_barrier( dim_t t_id, thrcomm_t* comm ) { #if 0 if ( comm == NULL || comm->n_threads == 1 ) return; bool_t my_sense = comm->sense; dim_t my_threads_arrived; #ifdef BLIS_USE_PTHREAD_MUTEX bli_pthread_mutex_lock( &comm->mutex ); my_threads_arrived = ++(comm->threads_arrived); bli_pthread_mutex_unlock( &comm->mutex ); #else my_threads_arrived = __sync_add_and_fetch(&(comm->threads_arrived), 1); #endif if ( my_threads_arrived == comm->n_threads ) { comm->threads_arrived = 0; comm->sense = !comm->sense; } else { volatile bool_t* listener = &comm->sense; while( *listener == my_sense ) {} } #endif bli_thrcomm_barrier_atomic( t_id, comm ); } #endif #endif blis-0.6.1/frame/thread/bli_thrcomm_pthreads.h000066400000000000000000000043631360743507500213720ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_THRCOMM_PTHREADS_H #define BLIS_THRCOMM_PTHREADS_H // Define thrcomm_t for situations when POSIX multithreading is enabled. #ifdef BLIS_ENABLE_PTHREADS #ifdef BLIS_USE_PTHREAD_BARRIER struct thrcomm_s { void* sent_object; dim_t n_threads; bli_pthread_barrier_t barrier; }; #else struct thrcomm_s { void* sent_object; dim_t n_threads; //#ifdef BLIS_USE_PTHREAD_MUTEX // bli_pthread_mutex_t mutex; //#endif //volatile bool_t barrier_sense; bool_t barrier_sense; dim_t barrier_threads_arrived; }; #endif typedef struct thrcomm_s thrcomm_t; #endif #endif blis-0.6.1/frame/thread/bli_thrcomm_single.c000066400000000000000000000052551360743507500210350ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #ifndef BLIS_ENABLE_MULTITHREADING //Constructors and destructors for constructors thrcomm_t* bli_thrcomm_create( rntm_t* rntm, dim_t n_threads ) { #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_thrcomm_create(): " ); #endif thrcomm_t* comm = bli_sba_acquire( rntm, sizeof( thrcomm_t ) ); bli_thrcomm_init( n_threads, comm ); return comm; } void bli_thrcomm_free( rntm_t* rntm, thrcomm_t* comm ) { if ( comm == NULL ) return; bli_thrcomm_cleanup( comm ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_thrcomm_free(): " ); #endif bli_sba_release( rntm, comm ); } void bli_thrcomm_init( dim_t n_threads, thrcomm_t* comm ) { if ( comm == NULL ) return; comm->sent_object = NULL; comm->n_threads = n_threads; comm->barrier_sense = 0; comm->barrier_threads_arrived = 0; } void bli_thrcomm_cleanup( thrcomm_t* comm ) { if ( comm == NULL ) return; } void bli_thrcomm_barrier( dim_t t_id, thrcomm_t* comm ) { return; } #endif blis-0.6.1/frame/thread/bli_thrcomm_single.h000066400000000000000000000045061360743507500210400ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_THRCOMM_SINGLE_H #define BLIS_THRCOMM_SINGLE_H // Define thrcomm_t for situations when multithreading is disabled. #ifndef BLIS_ENABLE_MULTITHREADING //thread communicators may be implementation dependent #ifdef BLIS_TREE_BARRIER struct barrier_s { int arity; int count; struct barrier_s* dad; int signal; }; typedef struct barrier_s barrier_t; struct thrcomm_s { void* sent_object; dim_t n_threads; barrier_t** barriers; }; #else struct thrcomm_s { void* sent_object; dim_t n_threads; bool_t barrier_sense; dim_t barrier_threads_arrived; }; #endif typedef struct thrcomm_s thrcomm_t; #endif #endif blis-0.6.1/frame/thread/bli_thread.c000066400000000000000000001157651360743507500173020ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" thrinfo_t BLIS_PACKM_SINGLE_THREADED = {}; thrinfo_t BLIS_GEMM_SINGLE_THREADED = {}; thrcomm_t BLIS_SINGLE_COMM = {}; // The global rntm_t structure. (The definition resides in bli_rntm.c.) extern rntm_t global_rntm; // A mutex to allow synchronous access to global_rntm. (The definition // resides in bli_rntm.c.) extern bli_pthread_mutex_t global_rntm_mutex; // ----------------------------------------------------------------------------- void bli_thread_init( void ) { bli_thrcomm_init( 1, &BLIS_SINGLE_COMM ); bli_packm_thrinfo_init_single( &BLIS_PACKM_SINGLE_THREADED ); bli_l3_thrinfo_init_single( &BLIS_GEMM_SINGLE_THREADED ); // Read the environment variables and use them to initialize the // global runtime object. bli_thread_init_rntm_from_env( &global_rntm ); } void bli_thread_finalize( void ) { } // ----------------------------------------------------------------------------- void bli_thread_range_sub ( thrinfo_t* thread, dim_t n, dim_t bf, bool_t handle_edge_low, dim_t* start, dim_t* end ) { dim_t n_way = bli_thread_n_way( thread ); if ( n_way == 1 ) { *start = 0; *end = n; return; } dim_t work_id = bli_thread_work_id( thread ); dim_t all_start = 0; dim_t all_end = n; dim_t size = all_end - all_start; dim_t n_bf_whole = size / bf; dim_t n_bf_left = size % bf; dim_t n_bf_lo = n_bf_whole / n_way; dim_t n_bf_hi = n_bf_whole / n_way; // In this function, we partition the space between all_start and // all_end into n_way partitions, each a multiple of block_factor // with the exception of the one partition that recieves the // "edge" case (if applicable). // // Here are examples of various thread partitionings, in units of // the block_factor, when n_way = 4. (A '+' indicates the thread // that receives the leftover edge case (ie: n_bf_left extra // rows/columns in its sub-range). // (all_start ... all_end) // n_bf_whole _left hel n_th_lo _hi thr0 thr1 thr2 thr3 // 12 =0 f 0 4 3 3 3 3 // 12 >0 f 0 4 3 3 3 3+ // 13 >0 f 1 3 4 3 3 3+ // 14 >0 f 2 2 4 4 3 3+ // 15 >0 f 3 1 4 4 4 3+ // 15 =0 f 3 1 4 4 4 3 // // 12 =0 t 4 0 3 3 3 3 // 12 >0 t 4 0 3+ 3 3 3 // 13 >0 t 3 1 3+ 3 3 4 // 14 >0 t 2 2 3+ 3 4 4 // 15 >0 t 1 3 3+ 4 4 4 // 15 =0 t 1 3 3 4 4 4 // As indicated by the table above, load is balanced as equally // as possible, even in the presence of an edge case. // First, we must differentiate between cases where the leftover // "edge" case (n_bf_left) should be allocated to a thread partition // at the low end of the index range or the high end. if ( handle_edge_low == FALSE ) { // Notice that if all threads receive the same number of // block_factors, those threads are considered "high" and // the "low" thread group is empty. dim_t n_th_lo = n_bf_whole % n_way; //dim_t n_th_hi = n_way - n_th_lo; // If some partitions must have more block_factors than others // assign the slightly larger partitions to lower index threads. if ( n_th_lo != 0 ) n_bf_lo += 1; // Compute the actual widths (in units of rows/columns) of // individual threads in the low and high groups. dim_t size_lo = n_bf_lo * bf; dim_t size_hi = n_bf_hi * bf; // Precompute the starting indices of the low and high groups. dim_t lo_start = all_start; dim_t hi_start = all_start + n_th_lo * size_lo; // Compute the start and end of individual threads' ranges // as a function of their work_ids and also the group to which // they belong (low or high). if ( work_id < n_th_lo ) { *start = lo_start + (work_id ) * size_lo; *end = lo_start + (work_id+1) * size_lo; } else // if ( n_th_lo <= work_id ) { *start = hi_start + (work_id-n_th_lo ) * size_hi; *end = hi_start + (work_id-n_th_lo+1) * size_hi; // Since the edge case is being allocated to the high // end of the index range, we have to advance the last // thread's end. if ( work_id == n_way - 1 ) *end += n_bf_left; } } else // if ( handle_edge_low == TRUE ) { // Notice that if all threads receive the same number of // block_factors, those threads are considered "low" and // the "high" thread group is empty. dim_t n_th_hi = n_bf_whole % n_way; dim_t n_th_lo = n_way - n_th_hi; // If some partitions must have more block_factors than others // assign the slightly larger partitions to higher index threads. if ( n_th_hi != 0 ) n_bf_hi += 1; // Compute the actual widths (in units of rows/columns) of // individual threads in the low and high groups. dim_t size_lo = n_bf_lo * bf; dim_t size_hi = n_bf_hi * bf; // Precompute the starting indices of the low and high groups. dim_t lo_start = all_start; dim_t hi_start = all_start + n_th_lo * size_lo + n_bf_left; // Compute the start and end of individual threads' ranges // as a function of their work_ids and also the group to which // they belong (low or high). if ( work_id < n_th_lo ) { *start = lo_start + (work_id ) * size_lo; *end = lo_start + (work_id+1) * size_lo; // Since the edge case is being allocated to the low // end of the index range, we have to advance the // starts/ends accordingly. if ( work_id == 0 ) *end += n_bf_left; else { *start += n_bf_left; *end += n_bf_left; } } else // if ( n_th_lo <= work_id ) { *start = hi_start + (work_id-n_th_lo ) * size_hi; *end = hi_start + (work_id-n_th_lo+1) * size_hi; } } } siz_t bli_thread_range_l2r ( thrinfo_t* thr, obj_t* a, blksz_t* bmult, dim_t* start, dim_t* end ) { num_t dt = bli_obj_dt( a ); dim_t m = bli_obj_length_after_trans( a ); dim_t n = bli_obj_width_after_trans( a ); dim_t bf = bli_blksz_get_def( dt, bmult ); bli_thread_range_sub( thr, n, bf, FALSE, start, end ); return m * ( *end - *start ); } siz_t bli_thread_range_r2l ( thrinfo_t* thr, obj_t* a, blksz_t* bmult, dim_t* start, dim_t* end ) { num_t dt = bli_obj_dt( a ); dim_t m = bli_obj_length_after_trans( a ); dim_t n = bli_obj_width_after_trans( a ); dim_t bf = bli_blksz_get_def( dt, bmult ); bli_thread_range_sub( thr, n, bf, TRUE, start, end ); return m * ( *end - *start ); } siz_t bli_thread_range_t2b ( thrinfo_t* thr, obj_t* a, blksz_t* bmult, dim_t* start, dim_t* end ) { num_t dt = bli_obj_dt( a ); dim_t m = bli_obj_length_after_trans( a ); dim_t n = bli_obj_width_after_trans( a ); dim_t bf = bli_blksz_get_def( dt, bmult ); bli_thread_range_sub( thr, m, bf, FALSE, start, end ); return n * ( *end - *start ); } siz_t bli_thread_range_b2t ( thrinfo_t* thr, obj_t* a, blksz_t* bmult, dim_t* start, dim_t* end ) { num_t dt = bli_obj_dt( a ); dim_t m = bli_obj_length_after_trans( a ); dim_t n = bli_obj_width_after_trans( a ); dim_t bf = bli_blksz_get_def( dt, bmult ); bli_thread_range_sub( thr, m, bf, TRUE, start, end ); return n * ( *end - *start ); } // ----------------------------------------------------------------------------- dim_t bli_thread_range_width_l ( doff_t diagoff_j, dim_t m, dim_t n_j, dim_t j, dim_t n_way, dim_t bf, dim_t bf_left, double area_per_thr, bool_t handle_edge_low ) { dim_t width; // In this function, we assume that we are somewhere in the process of // partitioning an m x n lower-stored region (with arbitrary diagonal // offset) n_ways along the n dimension (into column panels). The value // j identifies the left-to-right subpartition index (from 0 to n_way-1) // of the subpartition whose width we are about to compute using the // area per thread determined by the caller. n_j is the number of // columns in the remaining region of the matrix being partitioned, // and diagoff_j is that region's diagonal offset. // If this is the last subpartition, the width is simply equal to n_j. // Note that this statement handles cases where the "edge case" (if // one exists) is assigned to the high end of the index range (ie: // handle_edge_low == FALSE). if ( j == n_way - 1 ) return n_j; // At this point, we know there are at least two subpartitions left. // We also know that IF the submatrix contains a completely dense // rectangular submatrix, it will occur BEFORE the triangular (or // trapezoidal) part. // Here, we implement a somewhat minor load balancing optimization // that ends up getting employed only for relatively small matrices. // First, recall that all subpartition widths will be some multiple // of the blocking factor bf, except perhaps either the first or last // subpartition, which will receive the edge case, if it exists. // Also recall that j represents the current thread (or thread group, // or "caucus") for which we are computing a subpartition width. // If n_j is sufficiently small that we can only allocate bf columns // to each of the remaining threads, then we set the width to bf. We // do not allow the subpartition width to be less than bf, so, under // some conditions, if n_j is small enough, some of the reamining // threads may not get any work. For the purposes of this lower bound // on work (ie: width >= bf), we allow the edge case to count as a // "full" set of bf columns. { dim_t n_j_bf = n_j / bf + ( bf_left > 0 ? 1 : 0 ); if ( n_j_bf <= n_way - j ) { if ( j == 0 && handle_edge_low ) width = ( bf_left > 0 ? bf_left : bf ); else width = bf; // Make sure that the width does not exceed n_j. This would // occur if and when n_j_bf < n_way - j; that is, when the // matrix being partitioned is sufficiently small relative to // n_way such that there is not even enough work for every // (remaining) thread to get bf (or bf_left) columns. The // net effect of this safeguard is that some threads may get // assigned empty ranges (ie: no work), which of course must // happen in some situations. if ( width > n_j ) width = n_j; return width; } } // This block computes the width assuming that we are entirely within // a dense rectangle that precedes the triangular (or trapezoidal) // part. { // First compute the width of the current panel under the // assumption that the diagonal offset would not intersect. width = ( dim_t )bli_round( ( double )area_per_thr / ( double )m ); // Adjust the width, if necessary. Specifically, we may need // to allocate the edge case to the first subpartition, if // requested; otherwise, we just need to ensure that the // subpartition is a multiple of the blocking factor. if ( j == 0 && handle_edge_low ) { if ( width % bf != bf_left ) width += bf_left - ( width % bf ); } else // if interior case { // Round up to the next multiple of the blocking factor. //if ( width % bf != 0 ) width += bf - ( width % bf ); // Round to the nearest multiple of the blocking factor. if ( width % bf != 0 ) width = bli_round_to_mult( width, bf ); } } // We need to recompute width if the panel, according to the width // as currently computed, would intersect the diagonal. if ( diagoff_j < width ) { dim_t offm_inc, offn_inc; // Prune away the unstored region above the diagonal, if it exists. // Note that the entire region was pruned initially, so we know that // we don't need to try to prune the right side. (Also, we discard // the offset deltas since we don't need to actually index into the // subpartition.) bli_prune_unstored_region_top_l( &diagoff_j, &m, &n_j, &offm_inc ); //bli_prune_unstored_region_right_l( &diagoff_j, &m, &n_j, &offn_inc ); // We don't need offm_inc, offn_inc here. These statements should // prevent compiler warnings. ( void )offm_inc; ( void )offn_inc; // Prepare to solve a quadratic equation to find the width of the // current (jth) subpartition given the m dimension, diagonal offset, // and area. // NOTE: We know that the +/- in the quadratic formula must be a + // here because we know that the desired solution (the subpartition // width) will be smaller than (m + diagoff), not larger. If you // don't believe me, draw a picture! const double a = -0.5; const double b = ( double )m + ( double )diagoff_j + 0.5; const double c = -0.5 * ( ( double )diagoff_j * ( ( double )diagoff_j + 1.0 ) ) - area_per_thr; const double r = b * b - 4.0 * a * c; // If the quadratic solution is not imaginary, round it and use that // as our width, but make sure it didn't round to zero. Otherwise, // discard the quadratic solution and leave width, as previously // computed, unchanged. if ( r >= 0.0 ) { const double x = ( -b + sqrt( r ) ) / ( 2.0 * a ); width = ( dim_t )bli_round( x ); if ( width == 0 ) width = 1; } // Adjust the width, if necessary. if ( j == 0 && handle_edge_low ) { if ( width % bf != bf_left ) width += bf_left - ( width % bf ); } else // if interior case { // Round up to the next multiple of the blocking factor. //if ( width % bf != 0 ) width += bf - ( width % bf ); // Round to the nearest multiple of the blocking factor. if ( width % bf != 0 ) width = bli_round_to_mult( width, bf ); } } // Make sure that the width, after being adjusted, does not cause the // subpartition to exceed n_j. if ( width > n_j ) width = n_j; return width; } siz_t bli_find_area_trap_l ( dim_t m, dim_t n, doff_t diagoff ) { dim_t offm_inc = 0; dim_t offn_inc = 0; double tri_area; double area; // Prune away any rectangular region above where the diagonal // intersects the left edge of the subpartition, if it exists. bli_prune_unstored_region_top_l( &diagoff, &m, &n, &offm_inc ); // Prune away any rectangular region to the right of where the // diagonal intersects the bottom edge of the subpartition, if // it exists. (This shouldn't ever be needed, since the caller // would presumably have already performed rightward pruning, // but it's here just in case.) bli_prune_unstored_region_right_l( &diagoff, &m, &n, &offn_inc ); ( void )offm_inc; ( void )offn_inc; // Compute the area of the empty triangle so we can subtract it // from the area of the rectangle that bounds the subpartition. if ( bli_intersects_diag_n( diagoff, m, n ) ) { double tri_dim = ( double )( n - diagoff - 1 ); tri_area = tri_dim * ( tri_dim + 1.0 ) / 2.0; } else { // If the diagonal does not intersect the trapezoid, then // we can compute the area as a simple rectangle. tri_area = 0.0; } area = ( double )m * ( double )n - tri_area; return ( siz_t )area; } // ----------------------------------------------------------------------------- siz_t bli_thread_range_weighted_sub ( thrinfo_t* restrict thread, doff_t diagoff, uplo_t uplo, dim_t m, dim_t n, dim_t bf, bool_t handle_edge_low, dim_t* restrict j_start_thr, dim_t* restrict j_end_thr ) { dim_t n_way = bli_thread_n_way( thread ); dim_t my_id = bli_thread_work_id( thread ); dim_t bf_left = n % bf; dim_t j; dim_t off_j; doff_t diagoff_j; dim_t n_left; dim_t width_j; dim_t offm_inc, offn_inc; double tri_dim, tri_area; double area_total, area_per_thr; siz_t area = 0; // In this function, we assume that the caller has already determined // that (a) the diagonal intersects the submatrix, and (b) the submatrix // is either lower- or upper-stored. if ( bli_is_lower( uplo ) ) { // Prune away the unstored region above the diagonal, if it exists, // and then to the right of where the diagonal intersects the bottom, // if it exists. (Also, we discard the offset deltas since we don't // need to actually index into the subpartition.) bli_prune_unstored_region_top_l( &diagoff, &m, &n, &offm_inc ); bli_prune_unstored_region_right_l( &diagoff, &m, &n, &offn_inc ); // We don't need offm_inc, offn_inc here. These statements should // prevent compiler warnings. ( void )offm_inc; ( void )offn_inc; // Now that pruning has taken place, we know that diagoff >= 0. // Compute the total area of the submatrix, accounting for the // location of the diagonal, and divide it by the number of ways // of parallelism. tri_dim = ( double )( n - diagoff - 1 ); tri_area = tri_dim * ( tri_dim + 1.0 ) / 2.0; area_total = ( double )m * ( double )n - tri_area; area_per_thr = area_total / ( double )n_way; // Initialize some variables prior to the loop: the offset to the // current subpartition, the remainder of the n dimension, and // the diagonal offset of the current subpartition. off_j = 0; diagoff_j = diagoff; n_left = n; // Iterate over the subpartition indices corresponding to each // thread/caucus participating in the n_way parallelism. for ( j = 0; j < n_way; ++j ) { // Compute the width of the jth subpartition, taking the // current diagonal offset into account, if needed. width_j = bli_thread_range_width_l ( diagoff_j, m, n_left, j, n_way, bf, bf_left, area_per_thr, handle_edge_low ); // If the current thread belongs to caucus j, this is his // subpartition. So we compute the implied index range and // end our search. if ( j == my_id ) { *j_start_thr = off_j; *j_end_thr = off_j + width_j; area = bli_find_area_trap_l( m, width_j, diagoff_j ); break; } // Shift the current subpartition's starting and diagonal offsets, // as well as the remainder of the n dimension, according to the // computed width, and then iterate to the next subpartition. off_j += width_j; diagoff_j -= width_j; n_left -= width_j; } } else // if ( bli_is_upper( uplo ) ) { // Express the upper-stored case in terms of the lower-stored case. // First, we convert the upper-stored trapezoid to an equivalent // lower-stored trapezoid by rotating it 180 degrees. bli_rotate180_trapezoid( &diagoff, &uplo, &m, &n ); // Now that the trapezoid is "flipped" in the n dimension, negate // the bool that encodes whether to handle the edge case at the // low (or high) end of the index range. bli_toggle_bool( &handle_edge_low ); // Compute the appropriate range for the rotated trapezoid. area = bli_thread_range_weighted_sub ( thread, diagoff, uplo, m, n, bf, handle_edge_low, j_start_thr, j_end_thr ); // Reverse the indexing basis for the subpartition ranges so that // the indices, relative to left-to-right iteration through the // unrotated upper-stored trapezoid, map to the correct columns // (relative to the diagonal). This amounts to subtracting the // range from n. bli_reverse_index_direction( n, j_start_thr, j_end_thr ); } return area; } siz_t bli_thread_range_mdim ( dir_t direct, thrinfo_t* thr, obj_t* a, obj_t* b, obj_t* c, cntl_t* cntl, cntx_t* cntx, dim_t* start, dim_t* end ) { bszid_t bszid = bli_cntl_bszid( cntl ); opid_t family = bli_cntl_family( cntl ); // This is part of trsm's current implementation, whereby right side // cases are implemented in left-side micro-kernels, which requires // we swap the usage of the register blocksizes for the purposes of // packing A and B. if ( family == BLIS_TRSM ) { if ( bli_obj_root_is_triangular( a ) ) bszid = BLIS_MR; else bszid = BLIS_NR; } blksz_t* bmult = bli_cntx_get_bmult( bszid, cntx ); obj_t* x; bool_t use_weighted; // Use the operation family to choose the one of the two matrices // being partitioned that potentially has structure, and also to // decide whether or not we need to use weighted range partitioning. // NOTE: It's important that we use non-weighted range partitioning // for hemm and symm (ie: the gemm family) because the weighted // function will mistakenly skip over unstored regions of the // structured matrix, even though they represent part of that matrix // that will be dense and full (after packing). if ( family == BLIS_GEMM ) { x = a; use_weighted = FALSE; } else if ( family == BLIS_HERK ) { x = c; use_weighted = TRUE; } else if ( family == BLIS_TRMM ) { x = a; use_weighted = TRUE; } else /*family == BLIS_TRSM*/ { x = a; use_weighted = FALSE; } if ( use_weighted ) { if ( direct == BLIS_FWD ) return bli_thread_range_weighted_t2b( thr, x, bmult, start, end ); else return bli_thread_range_weighted_b2t( thr, x, bmult, start, end ); } else { if ( direct == BLIS_FWD ) return bli_thread_range_t2b( thr, x, bmult, start, end ); else return bli_thread_range_b2t( thr, x, bmult, start, end ); } } siz_t bli_thread_range_ndim ( dir_t direct, thrinfo_t* thr, obj_t* a, obj_t* b, obj_t* c, cntl_t* cntl, cntx_t* cntx, dim_t* start, dim_t* end ) { bszid_t bszid = bli_cntl_bszid( cntl ); opid_t family = bli_cntl_family( cntl ); // This is part of trsm's current implementation, whereby right side // cases are implemented in left-side micro-kernels, which requires // we swap the usage of the register blocksizes for the purposes of // packing A and B. if ( family == BLIS_TRSM ) { if ( bli_obj_root_is_triangular( b ) ) bszid = BLIS_MR; else bszid = BLIS_NR; } blksz_t* bmult = bli_cntx_get_bmult( bszid, cntx ); obj_t* x; bool_t use_weighted; // Use the operation family to choose the one of the two matrices // being partitioned that potentially has structure, and also to // decide whether or not we need to use weighted range partitioning. // NOTE: It's important that we use non-weighted range partitioning // for hemm and symm (ie: the gemm family) because the weighted // function will mistakenly skip over unstored regions of the // structured matrix, even though they represent part of that matrix // that will be dense and full (after packing). if ( family == BLIS_GEMM ) { x = b; use_weighted = FALSE; } else if ( family == BLIS_HERK ) { x = c; use_weighted = TRUE; } else if ( family == BLIS_TRMM ) { x = b; use_weighted = TRUE; } else /*family == BLIS_TRSM*/ { x = b; use_weighted = FALSE; } if ( use_weighted ) { if ( direct == BLIS_FWD ) return bli_thread_range_weighted_l2r( thr, x, bmult, start, end ); else return bli_thread_range_weighted_r2l( thr, x, bmult, start, end ); } else { if ( direct == BLIS_FWD ) return bli_thread_range_l2r( thr, x, bmult, start, end ); else return bli_thread_range_r2l( thr, x, bmult, start, end ); } } siz_t bli_thread_range_weighted_l2r ( thrinfo_t* thr, obj_t* a, blksz_t* bmult, dim_t* start, dim_t* end ) { siz_t area; // This function assigns area-weighted ranges in the n dimension // where the total range spans 0 to n-1 with 0 at the left end and // n-1 at the right end. if ( bli_obj_intersects_diag( a ) && bli_obj_is_upper_or_lower( a ) ) { num_t dt = bli_obj_dt( a ); doff_t diagoff = bli_obj_diag_offset( a ); uplo_t uplo = bli_obj_uplo( a ); dim_t m = bli_obj_length( a ); dim_t n = bli_obj_width( a ); dim_t bf = bli_blksz_get_def( dt, bmult ); // Support implicit transposition. if ( bli_obj_has_trans( a ) ) { bli_reflect_about_diag( &diagoff, &uplo, &m, &n ); } area = bli_thread_range_weighted_sub ( thr, diagoff, uplo, m, n, bf, FALSE, start, end ); } else // if dense or zeros { area = bli_thread_range_l2r ( thr, a, bmult, start, end ); } return area; } siz_t bli_thread_range_weighted_r2l ( thrinfo_t* thr, obj_t* a, blksz_t* bmult, dim_t* start, dim_t* end ) { siz_t area; // This function assigns area-weighted ranges in the n dimension // where the total range spans 0 to n-1 with 0 at the right end and // n-1 at the left end. if ( bli_obj_intersects_diag( a ) && bli_obj_is_upper_or_lower( a ) ) { num_t dt = bli_obj_dt( a ); doff_t diagoff = bli_obj_diag_offset( a ); uplo_t uplo = bli_obj_uplo( a ); dim_t m = bli_obj_length( a ); dim_t n = bli_obj_width( a ); dim_t bf = bli_blksz_get_def( dt, bmult ); // Support implicit transposition. if ( bli_obj_has_trans( a ) ) { bli_reflect_about_diag( &diagoff, &uplo, &m, &n ); } bli_rotate180_trapezoid( &diagoff, &uplo, &m, &n ); area = bli_thread_range_weighted_sub ( thr, diagoff, uplo, m, n, bf, TRUE, start, end ); } else // if dense or zeros { area = bli_thread_range_r2l ( thr, a, bmult, start, end ); } return area; } siz_t bli_thread_range_weighted_t2b ( thrinfo_t* thr, obj_t* a, blksz_t* bmult, dim_t* start, dim_t* end ) { siz_t area; // This function assigns area-weighted ranges in the m dimension // where the total range spans 0 to m-1 with 0 at the top end and // m-1 at the bottom end. if ( bli_obj_intersects_diag( a ) && bli_obj_is_upper_or_lower( a ) ) { num_t dt = bli_obj_dt( a ); doff_t diagoff = bli_obj_diag_offset( a ); uplo_t uplo = bli_obj_uplo( a ); dim_t m = bli_obj_length( a ); dim_t n = bli_obj_width( a ); dim_t bf = bli_blksz_get_def( dt, bmult ); // Support implicit transposition. if ( bli_obj_has_trans( a ) ) { bli_reflect_about_diag( &diagoff, &uplo, &m, &n ); } bli_reflect_about_diag( &diagoff, &uplo, &m, &n ); area = bli_thread_range_weighted_sub ( thr, diagoff, uplo, m, n, bf, FALSE, start, end ); } else // if dense or zeros { area = bli_thread_range_t2b ( thr, a, bmult, start, end ); } return area; } siz_t bli_thread_range_weighted_b2t ( thrinfo_t* thr, obj_t* a, blksz_t* bmult, dim_t* start, dim_t* end ) { siz_t area; // This function assigns area-weighted ranges in the m dimension // where the total range spans 0 to m-1 with 0 at the bottom end and // m-1 at the top end. if ( bli_obj_intersects_diag( a ) && bli_obj_is_upper_or_lower( a ) ) { num_t dt = bli_obj_dt( a ); doff_t diagoff = bli_obj_diag_offset( a ); uplo_t uplo = bli_obj_uplo( a ); dim_t m = bli_obj_length( a ); dim_t n = bli_obj_width( a ); dim_t bf = bli_blksz_get_def( dt, bmult ); // Support implicit transposition. if ( bli_obj_has_trans( a ) ) { bli_reflect_about_diag( &diagoff, &uplo, &m, &n ); } bli_reflect_about_diag( &diagoff, &uplo, &m, &n ); bli_rotate180_trapezoid( &diagoff, &uplo, &m, &n ); area = bli_thread_range_weighted_sub ( thr, diagoff, uplo, m, n, bf, TRUE, start, end ); } else // if dense or zeros { area = bli_thread_range_b2t ( thr, a, bmult, start, end ); } return area; } // ----------------------------------------------------------------------------- void bli_prime_factorization( dim_t n, bli_prime_factors_t* factors ) { factors->n = n; factors->sqrt_n = (dim_t)sqrt(n); factors->f = 2; } dim_t bli_next_prime_factor( bli_prime_factors_t* factors ) { // Return the prime factorization of the original number n one-by-one. // Return 1 after all factors have been exhausted. // Looping over possible factors in increasing order assures we will // only return prime factors (a la the Sieve of Eratosthenes). while ( factors->f <= factors->sqrt_n ) { // Special cases for factors 2-7 handle all numbers not divisible by 11 // or another larger prime. The slower loop version is used after that. // If you use a number of threads with large prime factors you get // what you deserve. if ( factors->f == 2 ) { if ( factors->n % 2 == 0 ) { factors->n /= 2; return 2; } factors->f = 3; } else if ( factors->f == 3 ) { if ( factors->n % 3 == 0 ) { factors->n /= 3; return 3; } factors->f = 5; } else if ( factors->f == 5 ) { if ( factors->n % 5 == 0 ) { factors->n /= 5; return 5; } factors->f = 7; } else if ( factors->f == 7 ) { if ( factors->n % 7 == 0 ) { factors->n /= 7; return 7; } factors->f = 11; } else { if ( factors->n % factors->f == 0 ) { factors->n /= factors->f; return factors->f; } factors->f++; } } // To get here we must be out of prime factors, leaving only n (if it is // prime) or an endless string of 1s. dim_t tmp = factors->n; factors->n = 1; return tmp; } void bli_partition_2x2( dim_t nthread, dim_t work1, dim_t work2, dim_t* nt1, dim_t* nt2 ) { // Partition a number of threads into two factors nt1 and nt2 such that // nt1/nt2 ~= work1/work2. There is a fast heuristic algorithm and a // slower optimal algorithm (which minimizes |nt1*work2 - nt2*work1|). // Return early small prime numbers of threads. if (nthread < 4) { *nt1 = ( work1 >= work2 ? nthread : 1 ); *nt2 = ( work1 < work2 ? nthread : 1 ); } *nt1 = 1; *nt2 = 1; // Both algorithms need the prime factorization of nthread. bli_prime_factors_t factors; bli_prime_factorization( nthread, &factors ); #if 1 // Fast algorithm: assign prime factors in increasing order to whichever // partition has more work to do. The work is divided by the number of // threads assigned at each iteration. This algorithm is sub-optimal, // for example in the partitioning of 12 with equal work (optimal solution // is 4x3, this algorithm finds 6x2). dim_t f; while ( ( f = bli_next_prime_factor( &factors ) ) > 1 ) { if ( work1 > work2 ) { work1 /= f; *nt1 *= f; } else { work2 /= f; *nt2 *= f; } } #else // Slow algorithm: exhaustively constructs all factor pairs of nthread and // chooses the best one. // Eight prime factors handles nthread up to 223092870. dim_t fact[8]; dim_t mult[8]; // There is always at least one prime factor, so use if for initialization. dim_t nfact = 1; fact[0] = bli_next_prime_factor( &factors ); mult[0] = 1; // Collect the remaining prime factors, accounting for multiplicity of // repeated factors. dim_t f; while ( ( f = bli_next_prime_factor( &factors ) ) > 1 ) { if ( f == fact[nfact-1] ) { mult[nfact-1]++; } else { nfact++; fact[nfact-1] = f; mult[nfact-1] = 1; } } // Now loop over all factor pairs. A single factor pair is denoted by how // many of each prime factor are included in the first factor (ntaken). dim_t ntake[8] = {0}; dim_t min_diff = INT_MAX; // Loop over how many prime factors to assign to the first factor in the // pair, for each prime factor. The total number of iterations is // \Prod_{i=0}^{nfact-1} mult[i]. bool done = false; while ( !done ) { dim_t x = 1; dim_t y = 1; // Form the factors by integer exponentiation and accumulation. for (dim_t i = 0 ; i < nfact ; i++ ) { x *= bli_ipow( fact[i], ntake[i] ); y *= bli_ipow( fact[i], mult[i]-ntake[i] ); } // Check if this factor pair is optimal by checking // |nt1*work2 - nt2*work1|. dim_t diff = llabs( x*work2 - y*work1 ); if ( diff < min_diff ) { min_diff = diff; *nt1 = x; *nt2 = y; } // Go to the next factor pair by doing an "odometer loop". for ( dim_t i = 0 ; i < nfact ; i++ ) { if ( ++ntake[i] > mult[i] ) { ntake[i] = 0; if ( i == nfact-1 ) done = true; else continue; } break; } } #endif } // ----------------------------------------------------------------------------- dim_t bli_gcd( dim_t x, dim_t y ) { while ( y != 0 ) { dim_t t = y; y = x % y; x = t; } return x; } dim_t bli_lcm( dim_t x, dim_t y) { return x * y / bli_gcd( x, y ); } dim_t bli_ipow( dim_t base, dim_t power ) { dim_t p = 1; for ( dim_t mask = 0x1 ; mask <= power ; mask <<= 1 ) { if ( power & mask ) p *= base; base *= base; } return p; } // ----------------------------------------------------------------------------- dim_t bli_thread_get_jc_nt( void ) { // We must ensure that global_rntm has been initialized. bli_init_once(); return bli_rntm_jc_ways( &global_rntm ); } dim_t bli_thread_get_pc_nt( void ) { // We must ensure that global_rntm has been initialized. bli_init_once(); return bli_rntm_pc_ways( &global_rntm ); } dim_t bli_thread_get_ic_nt( void ) { // We must ensure that global_rntm has been initialized. bli_init_once(); return bli_rntm_ic_ways( &global_rntm ); } dim_t bli_thread_get_jr_nt( void ) { // We must ensure that global_rntm has been initialized. bli_init_once(); return bli_rntm_jr_ways( &global_rntm ); } dim_t bli_thread_get_ir_nt( void ) { // We must ensure that global_rntm has been initialized. bli_init_once(); return bli_rntm_ir_ways( &global_rntm ); } dim_t bli_thread_get_num_threads( void ) { // We must ensure that global_rntm has been initialized. bli_init_once(); return bli_rntm_num_threads( &global_rntm ); } // ---------------------------------------------------------------------------- void bli_thread_set_ways( dim_t jc, dim_t pc, dim_t ic, dim_t jr, dim_t ir ) { // We must ensure that global_rntm has been initialized. bli_init_once(); // Acquire the mutex protecting global_rntm. bli_pthread_mutex_lock( &global_rntm_mutex ); bli_rntm_set_ways_only( jc, pc, ic, jr, ir, &global_rntm ); // Release the mutex protecting global_rntm. bli_pthread_mutex_unlock( &global_rntm_mutex ); } void bli_thread_set_num_threads( dim_t n_threads ) { // We must ensure that global_rntm has been initialized. bli_init_once(); // Acquire the mutex protecting global_rntm. bli_pthread_mutex_lock( &global_rntm_mutex ); bli_rntm_set_num_threads_only( n_threads, &global_rntm ); // Release the mutex protecting global_rntm. bli_pthread_mutex_unlock( &global_rntm_mutex ); } // ---------------------------------------------------------------------------- void bli_thread_init_rntm_from_env ( rntm_t* rntm ) { // NOTE: We don't need to acquire the global_rntm_mutex here because this // function is only called from bli_thread_init(), which is only called // by bli_init_once(). dim_t nt; dim_t jc, pc, ic, jr, ir; #ifdef BLIS_ENABLE_MULTITHREADING // Try to read BLIS_NUM_THREADS first. nt = bli_env_get_var( "BLIS_NUM_THREADS", -1 ); // If BLIS_NUM_THREADS was not set, try to read OMP_NUM_THREADS. if ( nt == -1 ) nt = bli_env_get_var( "OMP_NUM_THREADS", -1 ); // Read the environment variables for the number of threads (ways // of parallelism) for each individual loop. jc = bli_env_get_var( "BLIS_JC_NT", -1 ); pc = bli_env_get_var( "BLIS_PC_NT", -1 ); ic = bli_env_get_var( "BLIS_IC_NT", -1 ); jr = bli_env_get_var( "BLIS_JR_NT", -1 ); ir = bli_env_get_var( "BLIS_IR_NT", -1 ); // If any BLIS_*_NT environment variable was set, then we ignore the // value of BLIS_NUM_THREADS or OMP_NUM_THREADS and use the // BLIS_*_NT values instead (with unset variables being assumed to // contain 1). if ( jc != -1 || pc != -1 || ic != -1 || jr != -1 || ir != -1 ) { if ( jc == -1 ) jc = 1; if ( pc == -1 ) pc = 1; if ( ic == -1 ) ic = 1; if ( jr == -1 ) jr = 1; if ( ir == -1 ) ir = 1; // Unset the value for nt. nt = -1; } // By this time, either nt is set and the ways for each loop // are all unset, OR nt is unset and the ways for each loop // are all set. #else // When multithreading is disabled, always set the rntm_t ways // values to 1. nt = -1; jc = pc = ic = jr = ir = 1; #endif // Save the results back in the runtime object. bli_rntm_set_num_threads_only( nt, rntm ); bli_rntm_set_ways_only( jc, pc, ic, jr, ir, rntm ); #if 0 printf( "bli_thread_init_rntm_from_env()\n" ); bli_rntm_print( rntm ); #endif } blis-0.6.1/frame/thread/bli_thread.h000066400000000000000000000174621360743507500173020ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2016, Hewlett Packard Enterprise Development LP Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_THREAD_H #define BLIS_THREAD_H // Include thread communicator (thrcomm_t) object definitions and prototypes. #include "bli_thrcomm.h" // Include thread info (thrinfo_t) object definitions and prototypes. #include "bli_thrinfo.h" // Include some operation-specific thrinfo_t prototypes. // Note that the bli_packm_thrinfo.h must be included before the others! #include "bli_packm_thrinfo.h" #include "bli_l3_thrinfo.h" // Include the level-3 thread decorator and related definitions and prototypes // for the conventional code path. #include "bli_l3_decor.h" // Include the level-3 thread decorator and related definitions and prototypes // for the sup code path. #include "bli_l3_sup_decor.h" // Initialization-related prototypes. void bli_thread_init( void ); void bli_thread_finalize( void ); #ifdef _MSC_VER #define strerror_r(errno,buf,len) strerror_s(buf,len,errno) #endif // Thread range-related prototypes. BLIS_EXPORT_BLIS void bli_thread_range_sub ( thrinfo_t* thread, dim_t n, dim_t bf, bool_t handle_edge_low, dim_t* start, dim_t* end ); #undef GENPROT #define GENPROT( opname ) \ \ siz_t PASTEMAC0( opname ) \ ( \ dir_t direct, \ thrinfo_t* thr, \ obj_t* a, \ obj_t* b, \ obj_t* c, \ cntl_t* cntl, \ cntx_t* cntx, \ dim_t* start, \ dim_t* end \ ); GENPROT( thread_range_mdim ) GENPROT( thread_range_ndim ) #undef GENPROT #define GENPROT( opname ) \ \ siz_t PASTEMAC0( opname ) \ ( \ thrinfo_t* thr, \ obj_t* a, \ blksz_t* bmult, \ dim_t* start, \ dim_t* end \ ); GENPROT( thread_range_l2r ) GENPROT( thread_range_r2l ) GENPROT( thread_range_t2b ) GENPROT( thread_range_b2t ) GENPROT( thread_range_weighted_l2r ) GENPROT( thread_range_weighted_r2l ) GENPROT( thread_range_weighted_t2b ) GENPROT( thread_range_weighted_b2t ) dim_t bli_thread_range_width_l ( doff_t diagoff_j, dim_t m, dim_t n_j, dim_t j, dim_t n_way, dim_t bf, dim_t bf_left, double area_per_thr, bool_t handle_edge_low ); siz_t bli_find_area_trap_l ( dim_t m, dim_t n, doff_t diagoff ); siz_t bli_thread_range_weighted_sub ( thrinfo_t* restrict thread, doff_t diagoff, uplo_t uplo, dim_t m, dim_t n, dim_t bf, bool_t handle_edge_low, dim_t* restrict j_start_thr, dim_t* restrict j_end_thr ); // ----------------------------------------------------------------------------- // Factorization and partitioning prototypes typedef struct { dim_t n; dim_t sqrt_n; dim_t f; } bli_prime_factors_t; void bli_prime_factorization(dim_t n, bli_prime_factors_t* factors); dim_t bli_next_prime_factor(bli_prime_factors_t* factors); void bli_partition_2x2(dim_t nthread, dim_t work1, dim_t work2, dim_t* nt1, dim_t* nt2); // ----------------------------------------------------------------------------- dim_t bli_gcd( dim_t x, dim_t y ); dim_t bli_lcm( dim_t x, dim_t y ); dim_t bli_ipow( dim_t base, dim_t power ); // ----------------------------------------------------------------------------- BLIS_EXPORT_BLIS dim_t bli_thread_get_jc_nt( void ); BLIS_EXPORT_BLIS dim_t bli_thread_get_pc_nt( void ); BLIS_EXPORT_BLIS dim_t bli_thread_get_ic_nt( void ); BLIS_EXPORT_BLIS dim_t bli_thread_get_jr_nt( void ); BLIS_EXPORT_BLIS dim_t bli_thread_get_ir_nt( void ); BLIS_EXPORT_BLIS dim_t bli_thread_get_num_threads( void ); BLIS_EXPORT_BLIS void bli_thread_set_ways( dim_t jc, dim_t pc, dim_t ic, dim_t jr, dim_t ir ); BLIS_EXPORT_BLIS void bli_thread_set_num_threads( dim_t value ); void bli_thread_init_rntm_from_env( rntm_t* rntm ); // ----------------------------------------------------------------------------- static void bli_thread_range_jrir_rr ( thrinfo_t* thread, dim_t n, dim_t bf, bool_t handle_edge_low, dim_t* start, dim_t* end, dim_t* inc ) { // Use interleaved partitioning of jr/ir loops. *start = bli_thread_work_id( thread ); *inc = bli_thread_n_way( thread ); *end = n; } static void bli_thread_range_jrir_sl ( thrinfo_t* thread, dim_t n, dim_t bf, bool_t handle_edge_low, dim_t* start, dim_t* end, dim_t* inc ) { // Use contiguous slab partitioning of jr/ir loops. bli_thread_range_sub( thread, n, bf, handle_edge_low, start, end ); *inc = 1; } static void bli_thread_range_jrir ( thrinfo_t* thread, dim_t n, dim_t bf, bool_t handle_edge_low, dim_t* start, dim_t* end, dim_t* inc ) { // Define a general-purpose version of bli_thread_range_jrir() whose // definition depends on whether slab or round-robin partitioning was // requested at configure-time. #ifdef BLIS_ENABLE_JRIR_SLAB bli_thread_range_jrir_sl( thread, n, bf, handle_edge_low, start, end, inc ); #else bli_thread_range_jrir_rr( thread, n, bf, handle_edge_low, start, end, inc ); #endif } #if 0 static void bli_thread_range_weighted_jrir ( thrinfo_t* thread, doff_t diagoff, uplo_t uplo, dim_t m, dim_t n, dim_t bf, bool_t handle_edge_low, dim_t* start, dim_t* end, dim_t* inc ) { #ifdef BLIS_ENABLE_JRIR_SLAB // Use contiguous slab partitioning for jr/ir loops. bli_thread_range_weighted_sub( thread, diagoff, uplo, m, n, bf, handle_edge_low, start, end ); *start = *start / bf; *inc = 1; if ( *end % bf ) *end = *end / bf + 1; else *end = *end / bf; #else // Use interleaved partitioning of jr/ir loops. *start = bli_thread_work_id( thread ); *inc = bli_thread_n_way( thread ); *end = n; #endif } #endif #endif blis-0.6.1/frame/thread/bli_thrinfo.c000066400000000000000000000461661360743507500175020ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" thrinfo_t* bli_thrinfo_create ( rntm_t* rntm, thrcomm_t* ocomm, dim_t ocomm_id, dim_t n_way, dim_t work_id, bool_t free_comm, bszid_t bszid, thrinfo_t* sub_node ) { #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_thrinfo_create(): " ); #endif thrinfo_t* thread = bli_sba_acquire( rntm, sizeof( thrinfo_t ) ); bli_thrinfo_init ( thread, ocomm, ocomm_id, n_way, work_id, free_comm, bszid, sub_node ); return thread; } void bli_thrinfo_init ( thrinfo_t* thread, thrcomm_t* ocomm, dim_t ocomm_id, dim_t n_way, dim_t work_id, bool_t free_comm, bszid_t bszid, thrinfo_t* sub_node ) { thread->ocomm = ocomm; thread->ocomm_id = ocomm_id; thread->n_way = n_way; thread->work_id = work_id; thread->free_comm = free_comm; thread->bszid = bszid; thread->sub_prenode = NULL; thread->sub_node = sub_node; } void bli_thrinfo_init_single ( thrinfo_t* thread ) { bli_thrinfo_init ( thread, &BLIS_SINGLE_COMM, 0, 1, 0, FALSE, BLIS_NO_PART, thread ); } void bli_thrinfo_free ( rntm_t* rntm, thrinfo_t* thread ) { if ( thread == NULL || thread == &BLIS_PACKM_SINGLE_THREADED || thread == &BLIS_GEMM_SINGLE_THREADED ) return; thrinfo_t* thrinfo_sub_prenode = bli_thrinfo_sub_prenode( thread ); thrinfo_t* thrinfo_sub_node = bli_thrinfo_sub_node( thread ); // Recursively free all children of the current thrinfo_t. if ( thrinfo_sub_prenode != NULL ) { bli_thrinfo_free( rntm, thrinfo_sub_prenode ); } // Recursively free all children of the current thrinfo_t. if ( thrinfo_sub_node != NULL ) { bli_thrinfo_free( rntm, thrinfo_sub_node ); } // Free the communicators, but only if the current thrinfo_t struct // is marked as needing them to be freed. The most common example of // thrinfo_t nodes NOT marked as needing their comms freed are those // associated with packm thrinfo_t nodes. if ( bli_thrinfo_needs_free_comm( thread ) ) { // The ochief always frees his communicator. if ( bli_thread_am_ochief( thread ) ) bli_thrcomm_free( rntm, bli_thrinfo_ocomm( thread ) ); } #ifdef BLIS_ENABLE_MEM_TRACING printf( "bli_thrinfo_free(): " ); #endif // Free the thrinfo_t struct. bli_sba_release( rntm, thread ); } // ----------------------------------------------------------------------------- void bli_thrinfo_grow ( rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { // First, consider the prenode branch of the thrinfo_t tree, which should be // expanded only if there exists a prenode branch in the cntl_t tree. if ( bli_cntl_sub_prenode( cntl ) != NULL ) { // We only need to take action if the thrinfo_t sub-node is NULL; if it // is non-NULL, then it has already been created and we'll use it as-is. if ( bli_thrinfo_sub_prenode( thread ) == NULL ) { // Assertion / sanity check. if ( bli_cntl_bszid( cntl ) != BLIS_MC ) { printf( "Assertion failed: Expanding prenode for non-IC loop?\n" ); bli_abort(); } // Now we must create the packa, jr, and ir nodes that make up // the prenode branch of current cntl_t node. // Create a new node (or, if needed, multiple nodes) along the // prenode branch of the tree and return the pointer to the // (highest) child. thrinfo_t* thread_prenode = bli_thrinfo_rgrow_prenode ( rntm, cntl, bli_cntl_sub_prenode( cntl ), thread ); // Attach the child thrinfo_t node for the secondary branch to its // parent structure. bli_thrinfo_set_sub_prenode( thread_prenode, thread ); } } // Now, grow the primary branch of the thrinfo_t tree. // NOTE: If bli_thrinfo_rgrow() is being called, the sub_node field will // always be non-NULL, and so there's no need to check it. //if ( bli_cntl_sub_node( cntl ) != NULL ) { // We only need to take action if the thrinfo_t sub-node is NULL; if it // is non-NULL, then it has already been created and we'll use it as-is. if ( bli_thrinfo_sub_node( thread ) == NULL ) { // Create a new node (or, if needed, multiple nodes) along the // main sub-node branch of the tree and return the pointer to the // (highest) child. thrinfo_t* thread_child = bli_thrinfo_rgrow ( rntm, cntl, bli_cntl_sub_node( cntl ), thread ); // Attach the child thrinfo_t node for the primary branch to its // parent structure. bli_thrinfo_set_sub_node( thread_child, thread ); } } } // ----------------------------------------------------------------------------- thrinfo_t* bli_thrinfo_rgrow ( rntm_t* rntm, cntl_t* cntl_par, cntl_t* cntl_cur, thrinfo_t* thread_par ) { thrinfo_t* thread_cur; // We must handle two cases: those where the next node in the // control tree is a partitioning node, and those where it is // a non-partitioning (ie: packing) node. if ( bli_cntl_bszid( cntl_cur ) != BLIS_NO_PART ) { // Create the child thrinfo_t node corresponding to cntl_cur, // with cntl_par being the parent. thread_cur = bli_thrinfo_create_for_cntl ( rntm, cntl_par, cntl_cur, thread_par ); } else // if ( bli_cntl_bszid( cntl_cur ) == BLIS_NO_PART ) { // Recursively grow the thread structure and return the top-most // thrinfo_t node of that segment. thrinfo_t* thread_seg = bli_thrinfo_rgrow ( rntm, cntl_par, bli_cntl_sub_node( cntl_cur ), thread_par ); // Create a thrinfo_t node corresponding to cntl_cur. Since the // corresponding cntl node, cntl_cur, is a non-partitioning node // (bszid = BLIS_NO_PART), this means it's a packing node. Packing // thrinfo_t nodes are formed differently than those corresponding to // partitioning nodes; specifically, their work_id's are set equal to // the their comm_id's. Also, notice that the free_comm field is set // to FALSE since cntl_cur is a non-partitioning node. The reason: // the communicator used here will be freed when thread_seg, or one // of its descendents, is freed. thread_cur = bli_thrinfo_create ( rntm, // rntm bli_thrinfo_ocomm( thread_seg ), // ocomm bli_thread_ocomm_id( thread_seg ), // ocomm_id bli_cntl_calc_num_threads_in( rntm, cntl_cur ), // n_way bli_thread_ocomm_id( thread_seg ), // work_id FALSE, // free_comm BLIS_NO_PART, // bszid thread_seg // sub_node ); } return thread_cur; } #define BLIS_NUM_STATIC_COMMS 80 thrinfo_t* bli_thrinfo_create_for_cntl ( rntm_t* rntm, cntl_t* cntl_par, cntl_t* cntl_chl, thrinfo_t* thread_par ) { thrcomm_t* static_comms[ BLIS_NUM_STATIC_COMMS ]; thrcomm_t** new_comms = NULL; const bszid_t bszid_chl = bli_cntl_bszid( cntl_chl ); const dim_t parent_nt_in = bli_thread_num_threads( thread_par ); const dim_t parent_n_way = bli_thread_n_way( thread_par ); const dim_t parent_comm_id = bli_thread_ocomm_id( thread_par ); const dim_t parent_work_id = bli_thread_work_id( thread_par ); // Sanity check: make sure the number of threads in the parent's // communicator is divisible by the number of new sub-groups. if ( parent_nt_in % parent_n_way != 0 ) { printf( "Assertion failed: parent_nt_in parent_n_way != 0\n" ); bli_abort(); } // Compute: // - the number of threads inside the new child comm, // - the current thread's id within the new communicator, // - the current thread's work id, given the ways of parallelism // to be obtained within the next loop. const dim_t child_nt_in = bli_cntl_calc_num_threads_in( rntm, cntl_chl ); const dim_t child_n_way = bli_rntm_ways_for( bszid_chl, rntm ); const dim_t child_comm_id = parent_comm_id % child_nt_in; const dim_t child_work_id = child_comm_id / ( child_nt_in / child_n_way ); //printf( "thread %d: child_n_way = %d child_nt_in = %d parent_n_way = %d (bszid = %d->%d)\n", (int)child_comm_id, (int)child_nt_in, (int)child_n_way, (int)parent_n_way, (int)bli_cntl_bszid( cntl_par ), (int)bszid_chl ); // The parent's chief thread creates a temporary array of thrcomm_t // pointers. if ( bli_thread_am_ochief( thread_par ) ) { if ( parent_n_way > BLIS_NUM_STATIC_COMMS ) new_comms = bli_malloc_intl( parent_n_way * sizeof( thrcomm_t* ) ); else new_comms = static_comms; } // Broadcast the temporary array to all threads in the parent's // communicator. new_comms = bli_thread_obroadcast( thread_par, new_comms ); // Chiefs in the child communicator allocate the communicator // object and store it in the array element corresponding to the // parent's work id. if ( child_comm_id == 0 ) new_comms[ parent_work_id ] = bli_thrcomm_create( rntm, child_nt_in ); bli_thread_obarrier( thread_par ); // All threads create a new thrinfo_t node using the communicator // that was created by their chief, as identified by parent_work_id. thrinfo_t* thread_chl = bli_thrinfo_create ( rntm, // rntm new_comms[ parent_work_id ], // ocomm child_comm_id, // ocomm_id child_n_way, // n_way child_work_id, // work_id TRUE, // free_comm bszid_chl, // bszid NULL // sub_node ); bli_thread_obarrier( thread_par ); // The parent's chief thread frees the temporary array of thrcomm_t // pointers. if ( bli_thread_am_ochief( thread_par ) ) { if ( parent_n_way > BLIS_NUM_STATIC_COMMS ) bli_free_intl( new_comms ); } return thread_chl; } // ----------------------------------------------------------------------------- thrinfo_t* bli_thrinfo_rgrow_prenode ( rntm_t* rntm, cntl_t* cntl_par, cntl_t* cntl_cur, thrinfo_t* thread_par ) { thrinfo_t* thread_cur; // We must handle two cases: those where the next node in the // control tree is a partitioning node, and those where it is // a non-partitioning (ie: packing) node. if ( bli_cntl_bszid( cntl_cur ) != BLIS_NO_PART ) { // Create the child thrinfo_t node corresponding to cntl_cur, // with cntl_par being the parent. thread_cur = bli_thrinfo_create_for_cntl_prenode ( rntm, cntl_par, cntl_cur, thread_par ); } else // if ( bli_cntl_bszid( cntl_cur ) == BLIS_NO_PART ) { // Recursively grow the thread structure and return the top-most // thrinfo_t node of that segment. thrinfo_t* thread_seg = bli_thrinfo_rgrow_prenode ( rntm, cntl_par, bli_cntl_sub_node( cntl_cur ), thread_par ); // Create a thrinfo_t node corresponding to cntl_cur. Since the // corresponding cntl node, cntl_cur, is a non-partitioning node // (bszid = BLIS_NO_PART), this means it's a packing node. Packing // thrinfo_t nodes are formed differently than those corresponding to // partitioning nodes; specifically, their work_id's are set equal to // the their comm_id's. Also, notice that the free_comm field is set // to FALSE since cntl_cur is a non-partitioning node. The reason: // the communicator used here will be freed when thread_seg, or one // of its descendents, is freed. thread_cur = bli_thrinfo_create ( rntm, // rntm bli_thrinfo_ocomm( thread_seg ), // ocomm bli_thread_ocomm_id( thread_seg ), // ocomm_id bli_cntl_calc_num_threads_in( rntm, cntl_par ), // n_way bli_thread_ocomm_id( thread_seg ), // work_id FALSE, // free_comm BLIS_NO_PART, // bszid thread_seg // sub_node ); } return thread_cur; } thrinfo_t* bli_thrinfo_create_for_cntl_prenode ( rntm_t* rntm, cntl_t* cntl_par, cntl_t* cntl_chl, thrinfo_t* thread_par ) { // NOTE: This function only has to work for the ic -> (pa -> jr) // thrinfo_t tree branch extension. After that, the function // bli_thrinfo_create_for_cntl() will be called for the last jr->ir // branch extension. const bszid_t bszid_chl = bli_cntl_bszid( cntl_chl ); const dim_t parent_nt_in = bli_thread_num_threads( thread_par ); const dim_t parent_n_way = bli_thread_n_way( thread_par ); const dim_t parent_comm_id = bli_thread_ocomm_id( thread_par ); //const dim_t parent_work_id = bli_thread_work_id( thread_par ); // Sanity check: make sure the number of threads in the parent's // communicator is divisible by the number of new sub-groups. if ( parent_nt_in % parent_n_way != 0 ) { printf( "Assertion failed: parent_nt_in (%d) parent_n_way (%d) != 0\n", ( int )parent_nt_in, ( int )parent_n_way ); bli_abort(); } //dim_t child_nt_in = bli_cntl_calc_num_threads_in( rntm, cntl_chl ); //dim_t child_n_way = bli_rntm_ways_for( bszid_chl, rntm ); const dim_t child_nt_in = parent_nt_in; const dim_t child_n_way = parent_nt_in; const dim_t child_comm_id = parent_comm_id % child_nt_in; const dim_t child_work_id = child_comm_id / ( child_nt_in / child_n_way ); bli_thread_obarrier( thread_par ); // NOTE: Recall that parent_comm_id == child_comm_id, so checking for the // parent's chief-ness is equivalent to checking for chief-ness in the new // about-to-be-created communicator group. thrcomm_t* new_comm = NULL; if ( bli_thread_am_ochief( thread_par ) ) new_comm = bli_thrcomm_create( rntm, child_nt_in ); // Broadcast the new thrcomm_t address to the other threads in the // parent's group. new_comm = bli_thread_obroadcast( thread_par, new_comm ); // All threads create a new thrinfo_t node using the communicator // that was created by their chief, as identified by parent_work_id. thrinfo_t* thread_chl = bli_thrinfo_create ( rntm, // rntm new_comm, // ocomm child_comm_id, // ocomm_id child_n_way, // n_way child_work_id, // work_id TRUE, // free_comm bszid_chl, // bszid NULL // sub_node ); bli_thread_obarrier( thread_par ); return thread_chl; } // ----------------------------------------------------------------------------- #if 0 void bli_thrinfo_grow_tree ( rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { cntl_t* cntl_jc = cntl; thrinfo_t* thrinfo_jc = thread; bli_thrinfo_grow( rntm, cntl_jc, thrinfo_jc ); // inside jc loop: cntl_t* cntl_pc = bli_cntl_sub_node( cntl_jc ); thrinfo_t* thrinfo_pc = bli_thrinfo_sub_node( thrinfo_jc ); bli_thrinfo_grow( rntm, cntl_pc, thrinfo_pc ); // inside pc loop: cntl_t* cntl_pb = bli_cntl_sub_node( cntl_pc ); thrinfo_t* thrinfo_pb = bli_thrinfo_sub_node( thrinfo_pc ); bli_thrinfo_grow( rntm, cntl_pb, thrinfo_pb ); // after pb packing: cntl_t* cntl_ic = bli_cntl_sub_node( cntl_pb ); thrinfo_t* thrinfo_ic = bli_thrinfo_sub_node( thrinfo_pb ); bli_thrinfo_grow( rntm, cntl_ic, thrinfo_ic ); // -- main branch -- // inside ic loop: cntl_t* cntl_pa = bli_cntl_sub_node( cntl_ic ); thrinfo_t* thrinfo_pa = bli_thrinfo_sub_node( thrinfo_ic ); bli_thrinfo_grow( rntm, cntl_pa, thrinfo_pa ); // after pa packing: cntl_t* cntl_jr = bli_cntl_sub_node( cntl_pa ); thrinfo_t* thrinfo_jr = bli_thrinfo_sub_node( thrinfo_pa ); bli_thrinfo_grow( rntm, cntl_jr, thrinfo_jr ); // inside jr loop: //cntl_t* cntl_ir = bli_cntl_sub_node( cntl_jr ); //thrinfo_t* thrinfo_ir = bli_thrinfo_sub_node( thrinfo_jr ); // -- trsm branch -- // inside ic loop: cntl_t* cntl_pa0 = bli_cntl_sub_prenode( cntl_ic ); thrinfo_t* thrinfo_pa0 = bli_thrinfo_sub_prenode( thrinfo_ic ); bli_thrinfo_grow( rntm, cntl_pa0, thrinfo_pa0 ); // after pa packing: cntl_t* cntl_jr0 = bli_cntl_sub_node( cntl_pa0 ); thrinfo_t* thrinfo_jr0 = bli_thrinfo_sub_node( thrinfo_pa0 ); bli_thrinfo_grow( rntm, cntl_jr0, thrinfo_jr0 ); // inside jr loop: //cntl_t* cntl_ir0 = bli_cntl_sub_node( cntl_jr0 ); //thrinfo_t* thrinfo_ir0= bli_thrinfo_sub_node( thrinfo_jr0 ); } void bli_thrinfo_grow_tree_ic ( rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { cntl_t* cntl_ic = cntl; thrinfo_t* thrinfo_ic = thread; bli_thrinfo_grow( rntm, cntl_ic, thrinfo_ic ); // -- main branch -- // inside ic loop: cntl_t* cntl_pa = bli_cntl_sub_node( cntl_ic ); thrinfo_t* thrinfo_pa = bli_thrinfo_sub_node( thrinfo_ic ); bli_thrinfo_grow( rntm, cntl_pa, thrinfo_pa ); // after pa packing: cntl_t* cntl_jr = bli_cntl_sub_node( cntl_pa ); thrinfo_t* thrinfo_jr = bli_thrinfo_sub_node( thrinfo_pa ); bli_thrinfo_grow( rntm, cntl_jr, thrinfo_jr ); // inside jr loop: //cntl_t* cntl_ir = bli_cntl_sub_node( cntl_jr ); //thrinfo_t* thrinfo_ir = bli_thrinfo_sub_node( thrinfo_jr ); // -- trsm branch -- // inside ic loop: cntl_t* cntl_pa0 = bli_cntl_sub_prenode( cntl_ic ); thrinfo_t* thrinfo_pa0 = bli_thrinfo_sub_prenode( thrinfo_ic ); bli_thrinfo_grow( rntm, cntl_pa0, thrinfo_pa0 ); // after pa packing: cntl_t* cntl_jr0 = bli_cntl_sub_node( cntl_pa0 ); thrinfo_t* thrinfo_jr0 = bli_thrinfo_sub_node( thrinfo_pa0 ); bli_thrinfo_grow( rntm, cntl_jr0, thrinfo_jr0 ); // inside jr loop: //cntl_t* cntl_ir0 = bli_cntl_sub_node( cntl_jr0 ); //thrinfo_t* thrinfo_ir0= bli_thrinfo_sub_node( thrinfo_jr0 ); } #endif blis-0.6.1/frame/thread/bli_thrinfo.h000066400000000000000000000140141360743507500174720ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_THRINFO_H #define BLIS_THRINFO_H // Thread info structure definition struct thrinfo_s { // The thread communicator for the other threads sharing the same work // at this level. thrcomm_t* ocomm; // Our thread id within the ocomm thread communicator. dim_t ocomm_id; // The number of distinct threads used to parallelize the loop. dim_t n_way; // What we're working on. dim_t work_id; // When freeing, should the communicators in this node be freed? Usually, // this is field is true, but when nodes are created that share the same // communicators as other nodes (such as with packm nodes), this is set // to false. bool_t free_comm; // The bszid_t to help identify the node. This is mostly only useful when // debugging or tracing the allocation and release of thrinfo_t nodes. bszid_t bszid; struct thrinfo_s* sub_prenode; struct thrinfo_s* sub_node; }; typedef struct thrinfo_s thrinfo_t; // // thrinfo_t functions // NOTE: The naming of these should be made consistent at some point. // (ie: bli_thrinfo_ vs. bli_thread_) // // thrinfo_t query (field only) static dim_t bli_thread_num_threads( thrinfo_t* t ) { return (t->ocomm)->n_threads; } static dim_t bli_thread_ocomm_id( thrinfo_t* t ) { return t->ocomm_id; } static dim_t bli_thread_n_way( thrinfo_t* t ) { return t->n_way; } static dim_t bli_thread_work_id( thrinfo_t* t ) { return t->work_id; } static thrcomm_t* bli_thrinfo_ocomm( thrinfo_t* t ) { return t->ocomm; } static bool_t bli_thrinfo_needs_free_comm( thrinfo_t* t ) { return t->free_comm; } static dim_t bli_thread_bszid( thrinfo_t* t ) { return t->bszid; } static thrinfo_t* bli_thrinfo_sub_node( thrinfo_t* t ) { return t->sub_node; } static thrinfo_t* bli_thrinfo_sub_prenode( thrinfo_t* t ) { return t->sub_prenode; } // thrinfo_t query (complex) static bool_t bli_thread_am_ochief( thrinfo_t* t ) { return t->ocomm_id == 0; } // thrinfo_t modification static void bli_thrinfo_set_sub_node( thrinfo_t* sub_node, thrinfo_t* t ) { t->sub_node = sub_node; } static void bli_thrinfo_set_sub_prenode( thrinfo_t* sub_prenode, thrinfo_t* t ) { t->sub_prenode = sub_prenode; } // other thrinfo_t-related functions static void* bli_thread_obroadcast( thrinfo_t* t, void* p ) { return bli_thrcomm_bcast( t->ocomm_id, p, t->ocomm ); } static void bli_thread_obarrier( thrinfo_t* t ) { bli_thrcomm_barrier( t->ocomm_id, t->ocomm ); } // // Prototypes for level-3 thrinfo functions not specific to any operation. // thrinfo_t* bli_thrinfo_create ( rntm_t* rntm, thrcomm_t* ocomm, dim_t ocomm_id, dim_t n_way, dim_t work_id, bool_t free_comm, bszid_t bszid, thrinfo_t* sub_node ); void bli_thrinfo_init ( thrinfo_t* thread, thrcomm_t* ocomm, dim_t ocomm_id, dim_t n_way, dim_t work_id, bool_t free_comm, bszid_t bszid, thrinfo_t* sub_node ); void bli_thrinfo_init_single ( thrinfo_t* thread ); void bli_thrinfo_free ( rntm_t* rntm, thrinfo_t* thread ); // ----------------------------------------------------------------------------- void bli_thrinfo_grow ( rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ); thrinfo_t* bli_thrinfo_rgrow ( rntm_t* rntm, cntl_t* cntl_par, cntl_t* cntl_cur, thrinfo_t* thread_par ); thrinfo_t* bli_thrinfo_create_for_cntl ( rntm_t* rntm, cntl_t* cntl_par, cntl_t* cntl_chl, thrinfo_t* thread_par ); thrinfo_t* bli_thrinfo_rgrow_prenode ( rntm_t* rntm, cntl_t* cntl_par, cntl_t* cntl_cur, thrinfo_t* thread_par ); thrinfo_t* bli_thrinfo_create_for_cntl_prenode ( rntm_t* rntm, cntl_t* cntl_par, cntl_t* cntl_chl, thrinfo_t* thread_par ); // ----------------------------------------------------------------------------- #if 0 void bli_thrinfo_grow_tree ( rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ); void bli_thrinfo_grow_tree_ic ( rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ); #endif #endif blis-0.6.1/frame/thread/old/000077500000000000000000000000001360743507500156005ustar00rootroot00000000000000blis-0.6.1/frame/thread/old/bli_mutex.h000066400000000000000000000040061360743507500177410ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2016, Hewlett Packard Enterprise Development LP Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_MUTEX_H #define BLIS_MUTEX_H // Include definitions (mostly mtx_t) specific to the method of // multithreading. #include "bli_mutex_single.h" #include "bli_mutex_openmp.h" #include "bli_mutex_pthreads.h" // Thread mutex prototypes. #endif blis-0.6.1/frame/thread/old/bli_mutex_openmp.h000066400000000000000000000045651360743507500213310ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2016, Hewlett Packard Enterprise Development LP Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_MUTEX_OPENMP_H #define BLIS_MUTEX_OPENMP_H // Define mutex_t for situations when OpenMP multithreading is enabled. #ifdef BLIS_ENABLE_OPENMP #include // Define mtx_t. typedef struct mtx_s { omp_lock_t mutex; } mtx_t; // Define functions to operate on OpenMP-based mtx_t. static void bli_mutex_init( mtx_t* m ) { omp_init_lock( &(m->mutex) ); } static void bli_mutex_finalize( mtx_t* m ) { omp_destroy_lock( &(m->mutex) ); } static void bli_mutex_lock( mtx_t* m ) { omp_set_lock( &(m->mutex) ); } static void bli_mutex_unlock( mtx_t* m ) { omp_unset_lock( &(m->mutex) ); } #endif #endif blis-0.6.1/frame/thread/old/bli_mutex_pthreads.h000066400000000000000000000046451360743507500216440ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2016, Hewlett Packard Enterprise Development LP Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_MUTEX_PTHREADS_H #define BLIS_MUTEX_PTHREADS_H // Define mutex_t for situations when POSIX multithreading is enabled. #ifdef BLIS_ENABLE_PTHREADS #include // Define mtx_t. typedef struct mtx_s { pthread_mutex_t mutex; } mtx_t; // Define macros to operate on pthread-based mtx_t. static void bli_mutex_init( mtx_t* m ) { pthread_mutex_init( &(m->mutex), NULL ); \ } static void bli_mutex_finalize( mtx_t* m ) { pthread_mutex_destroy( &(m->mutex) ); \ } static void bli_mutex_lock( mtx_t* m ) { pthread_mutex_lock( &(m->mutex) ); \ } static void bli_mutex_unlock( mtx_t* m ) { pthread_mutex_unlock( &(m->mutex) ); \ } #endif #endif blis-0.6.1/frame/thread/old/bli_mutex_single.h000066400000000000000000000043201360743507500213010ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2016, Hewlett Packard Enterprise Development LP Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_MUTEX_SINGLE_H #define BLIS_MUTEX_SINGLE_H // Define mtx_t for situations when multithreading is disabled. #ifndef BLIS_ENABLE_MULTITHREADING // Define mtx_t. typedef struct mtx_s { } mtx_t; // Define macros to operate on pthread-based mtx_t. static void bli_mutex_init( mtx_t* m ) { } static void bli_mutex_finalize( mtx_t* m ) { } static void bli_mutex_lock( mtx_t* m ) { } static void bli_mutex_unlock( mtx_t* m ) { } #endif #endif blis-0.6.1/frame/util/000077500000000000000000000000001360743507500145305ustar00rootroot00000000000000blis-0.6.1/frame/util/bli_util.h000066400000000000000000000042671360743507500165150ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "bli_util_check.h" // Prototype object APIs (expert and non-expert). #include "bli_oapi_ex.h" #include "bli_util_oapi.h" #include "bli_oapi_ba.h" #include "bli_util_oapi.h" // Prototype typed APIs (expert and non-expert). #include "bli_tapi_ex.h" #include "bli_util_tapi.h" #include "bli_util_ft.h" #include "bli_tapi_ba.h" #include "bli_util_tapi.h" #include "bli_util_ft.h" // Generate function pointer arrays for tapi functions (expert only). #include "bli_util_fpa.h" // Prototype level-1m implementations. #include "bli_util_unb_var1.h" blis-0.6.1/frame/util/bli_util_check.c000066400000000000000000000202051360743507500176330ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define object-based check functions. // #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* x, \ obj_t* asum \ ) \ { \ bli_utilv_xa_check( x, asum ); \ } GENFRONT( asumv ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* x \ ) \ { \ bli_utilm_mkhst_check( x ); \ } GENFRONT( mkherm ) GENFRONT( mksymm ) GENFRONT( mktrim ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* x, \ obj_t* norm \ ) \ { \ bli_utilv_norm_check( x, norm ); \ } GENFRONT( norm1v ) GENFRONT( normfv ) GENFRONT( normiv ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* x, \ obj_t* norm \ ) \ { \ bli_utilm_norm_check( x, norm ); \ } GENFRONT( norm1m ) GENFRONT( normfm ) GENFRONT( normim ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ FILE* file, \ char* s1, \ obj_t* x, \ char* format, \ char* s2 \ ) \ { \ bli_utilm_fprint_check( file, s1, x, format, s2 ); \ } GENFRONT( fprintv ) GENFRONT( fprintm ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* x \ ) \ { \ bli_utilm_rand_check( x ); \ } GENFRONT( randv ) GENFRONT( randnv ) GENFRONT( randm ) GENFRONT( randnm ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* x, \ obj_t* scale, \ obj_t* sumsq \ ) \ { \ bli_utilv_sumsqv_check( x, scale, sumsq ); \ } GENFRONT( sumsqv ) // ----------------------------------------------------------------------------- void bli_utilv_xa_check ( obj_t* x, obj_t* asum ) { err_t e_val; // Check object datatypes. e_val = bli_check_floating_object( x ); bli_check_error_code( e_val ); e_val = bli_check_nonconstant_object( asum ); bli_check_error_code( e_val ); // Check object dimensions. e_val = bli_check_vector_object( x ); bli_check_error_code( e_val ); e_val = bli_check_scalar_object( asum ); bli_check_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( x ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( asum ); bli_check_error_code( e_val ); } void bli_utilm_mkhst_check ( obj_t* a ) { err_t e_val; // Check object datatypes. e_val = bli_check_floating_object( a ); bli_check_error_code( e_val ); e_val = bli_check_nonconstant_object( a ); bli_check_error_code( e_val ); // Check object dimensions. e_val = bli_check_matrix_object( a ); bli_check_error_code( e_val ); e_val = bli_check_square_object( a ); bli_check_error_code( e_val ); e_val = bli_check_object_diag_offset_equals( a, 0 ); bli_check_error_code( e_val ); // Check matrix storage. e_val = bli_check_upper_or_lower_object( a ); bli_check_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( a ); bli_check_error_code( e_val ); } void bli_utilv_norm_check ( obj_t* x, obj_t* norm ) { err_t e_val; // Check object datatypes. e_val = bli_check_floating_object( x ); bli_check_error_code( e_val ); e_val = bli_check_noninteger_object( norm ); bli_check_error_code( e_val ); e_val = bli_check_nonconstant_object( norm ); bli_check_error_code( e_val ); e_val = bli_check_object_real_proj_of( x, norm ); bli_check_error_code( e_val ); // Check object dimensions. e_val = bli_check_vector_object( x ); bli_check_error_code( e_val ); e_val = bli_check_scalar_object( norm ); bli_check_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( x ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( norm ); bli_check_error_code( e_val ); } void bli_utilm_norm_check ( obj_t* x, obj_t* norm ) { err_t e_val; // Check object datatypes. e_val = bli_check_floating_object( x ); bli_check_error_code( e_val ); e_val = bli_check_noninteger_object( norm ); bli_check_error_code( e_val ); e_val = bli_check_nonconstant_object( norm ); bli_check_error_code( e_val ); e_val = bli_check_object_real_proj_of( x, norm ); bli_check_error_code( e_val ); // Check object dimensions. e_val = bli_check_matrix_object( x ); bli_check_error_code( e_val ); e_val = bli_check_scalar_object( norm ); bli_check_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( x ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( norm ); bli_check_error_code( e_val ); } void bli_utilm_fprint_check ( FILE* file, char* s1, obj_t* x, char* format, char* s2 ) { err_t e_val; // Check argument pointers. e_val = bli_check_null_pointer( file ); bli_check_error_code( e_val ); e_val = bli_check_null_pointer( s1 ); bli_check_error_code( e_val ); e_val = bli_check_null_pointer( s2 ); bli_check_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( x ); bli_check_error_code( e_val ); } void bli_utilm_rand_check ( obj_t* x ) { err_t e_val; // Check object datatypes. e_val = bli_check_noninteger_object( x ); bli_check_error_code( e_val ); e_val = bli_check_nonconstant_object( x ); bli_check_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( x ); bli_check_error_code( e_val ); } void bli_utilv_sumsqv_check ( obj_t* x, obj_t* scale, obj_t* sumsq ) { err_t e_val; // Check object datatypes. e_val = bli_check_floating_object( x ); bli_check_error_code( e_val ); e_val = bli_check_nonconstant_object( scale ); bli_check_error_code( e_val ); e_val = bli_check_nonconstant_object( sumsq ); bli_check_error_code( e_val ); // Check object dimensions. e_val = bli_check_vector_object( x ); bli_check_error_code( e_val ); e_val = bli_check_scalar_object( scale ); bli_check_error_code( e_val ); e_val = bli_check_scalar_object( sumsq ); bli_check_error_code( e_val ); // Check object buffers (for non-NULLness). e_val = bli_check_object_buffer( x ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( scale ); bli_check_error_code( e_val ); e_val = bli_check_object_buffer( sumsq ); bli_check_error_code( e_val ); } blis-0.6.1/frame/util/bli_util_check.h000066400000000000000000000073421360743507500176470ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype object-based check functions. // #undef GENPROT #define GENPROT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* x, \ obj_t* asum \ ); GENPROT( asumv ) #undef GENPROT #define GENPROT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* x \ ); GENPROT( mkherm ) GENPROT( mksymm ) GENPROT( mktrim ) #undef GENPROT #define GENPROT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* x, \ obj_t* norm \ ); GENPROT( norm1v ) GENPROT( normfv ) GENPROT( normiv ) #undef GENPROT #define GENPROT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* x, \ obj_t* norm \ ); GENPROT( norm1m ) GENPROT( normfm ) GENPROT( normim ) #undef GENPROT #define GENPROT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ FILE* file, \ char* s1, \ obj_t* x, \ char* format, \ char* s2 \ ); GENPROT( fprintv ) GENPROT( fprintm ) #undef GENPROT #define GENPROT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* x \ ); GENPROT( randv ) GENPROT( randnv ) GENPROT( randm ) GENPROT( randnm ) #undef GENPROT #define GENPROT( opname ) \ \ void PASTEMAC(opname,_check) \ ( \ obj_t* x, \ obj_t* scale, \ obj_t* sumsq \ ); GENPROT( sumsqv ) // ----------------------------------------------------------------------------- void bli_utilv_xi_check ( obj_t* x, obj_t* index ); void bli_utilv_xa_check ( obj_t* x, obj_t* asum ); void bli_utilm_mkhst_check ( obj_t* a ); void bli_utilv_norm_check ( obj_t* x, obj_t* norm ); void bli_utilm_norm_check ( obj_t* x, obj_t* norm ); void bli_utilm_fprint_check ( FILE* file, char* s1, obj_t* x, char* format, char* s2 ); void bli_utilm_rand_check ( obj_t* x ); void bli_utilv_sumsqv_check ( obj_t* x, obj_t* scale, obj_t* sumsq ); blis-0.6.1/frame/util/bli_util_fpa.c000066400000000000000000000052141360743507500173270ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define function pointer query interfaces. // #undef GENFRONT #define GENFRONT( opname ) \ \ GENARRAY_FPA( PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft), \ PASTECH(opname,BLIS_TAPI_EX_SUF) ); \ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( num_t dt ) \ { \ return PASTECH2(opname,BLIS_TAPI_EX_SUF,_fpa)[ dt ]; \ } GENFRONT( asumv ) GENFRONT( mkherm ) GENFRONT( mksymm ) GENFRONT( mktrim ) GENFRONT( norm1v ) GENFRONT( normfv ) GENFRONT( normiv ) GENFRONT( norm1m ) GENFRONT( normfm ) GENFRONT( normim ) GENFRONT( randv ) GENFRONT( randnv ) GENFRONT( randm ) GENFRONT( randnm ) GENFRONT( sumsqv ) #undef GENFRONT #define GENFRONT( opname ) \ \ /* GENARRAY_FPA( void_fp, opname ); \ */ \ \ GENARRAY_FPA( PASTECH(opname,_vft), \ PASTECH0(opname) ); \ \ PASTECH(opname,_vft) \ PASTEMAC(opname,_qfp)( num_t dt ) \ { \ return PASTECH(opname,_fpa)[ dt ]; \ } GENFRONT( fprintv ) GENFRONT( fprintm ) //GENFRONT( printv ) //GENFRONT( printm ) blis-0.6.1/frame/util/bli_util_fpa.h000066400000000000000000000045361360743507500173420ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype function pointer query interface. // #undef GENPROT #define GENPROT( opname ) \ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( num_t dt ); GENPROT( asumv ) GENPROT( mkherm ) GENPROT( mksymm ) GENPROT( mktrim ) GENPROT( norm1v ) GENPROT( normfv ) GENPROT( normiv ) GENPROT( norm1m ) GENPROT( normfm ) GENPROT( normim ) GENPROT( fprintv ) GENPROT( fprintm ) //GENPROT( printv ) //GENPROT( printm ) GENPROT( randv ) GENPROT( randnv ) GENPROT( randm ) GENPROT( randnm ) GENPROT( sumsqv ) #undef GENPROT #define GENPROT( opname ) \ \ PASTECH(opname,_vft) \ PASTEMAC(opname,_qfp)( num_t dt ); GENPROT( fprintv ) GENPROT( fprintm ) //GENPROT( printv ) //GENPROT( printm ) blis-0.6.1/frame/util/bli_util_ft.h000066400000000000000000000113551360743507500172020ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // -- Utility function types --------------------------------------------------- // // asumv #undef GENTDEFR #define GENTDEFR( ctype, ctype_r, ch, chr, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \ ( \ dim_t n, \ ctype* x, inc_t incx, \ ctype_r* asum \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTDEFR( asumv ) // mkherm, mksymm, mktrim #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \ ( \ uplo_t uploa, \ dim_t m, \ ctype* a, inc_t rs_a, inc_t cs_a \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTDEF( mkherm ) INSERT_GENTDEF( mksymm ) INSERT_GENTDEF( mktrim ) // norm1v, normfv, normiv #undef GENTDEFR #define GENTDEFR( ctype, ctype_r, ch, chr, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \ ( \ dim_t n, \ ctype* x, inc_t incx, \ ctype_r* norm \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTDEFR( norm1v ) INSERT_GENTDEFR( normfv ) INSERT_GENTDEFR( normiv ) // norm1m, normfm, normim #undef GENTDEFR #define GENTDEFR( ctype, ctype_r, ch, chr, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \ ( \ doff_t diagoffx, \ diag_t diagx, \ uplo_t uplox, \ dim_t m, \ dim_t n, \ ctype* x, inc_t rs_x, inc_t cs_x, \ ctype_r* norm \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTDEFR( norm1m ) INSERT_GENTDEFR( normfm ) INSERT_GENTDEFR( normim ) // fprintv #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \ ( \ FILE* file, \ char* s1, \ dim_t n, \ ctype* x, inc_t incx, \ char* format, \ char* s2 \ ); INSERT_GENTDEF( fprintv ) // fprintm #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \ ( \ FILE* file, \ char* s1, \ dim_t m, \ dim_t n, \ ctype* x, inc_t rs_x, inc_t cs_x, \ char* format, \ char* s2 \ ); INSERT_GENTDEF( fprintm ) // randv, randnv #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \ ( \ dim_t n, \ ctype* x, inc_t incx \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTDEF( randv ) INSERT_GENTDEF( randnv ) // randm, randnm #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \ ( \ doff_t diagoffx, \ uplo_t uplox, \ dim_t m, \ dim_t n, \ ctype* x, inc_t rs_x, inc_t cs_x \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTDEF( randm ) INSERT_GENTDEF( randnm ) // sumsqv #undef GENTDEFR #define GENTDEFR( ctype, ctype_r, ch, chr, opname, tsuf ) \ \ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \ ( \ dim_t n, \ ctype* x, inc_t incx, \ ctype_r* scale, \ ctype_r* sumsq \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTDEFR( sumsqv ) blis-0.6.1/frame/util/bli_util_oapi.c000066400000000000000000000277631360743507500175260ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // Guard the function definitions so that they are only compiled when // #included from files that define the object API macros. #ifdef BLIS_ENABLE_OAPI // // Define object-based interfaces. // #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* x, \ obj_t* asum \ BLIS_OAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_OAPI_EX_DECLS \ \ num_t dt = bli_obj_dt( x ); \ \ dim_t n = bli_obj_vector_dim( x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \ inc_t incx = bli_obj_vector_inc( x ); \ \ void* buf_asum = bli_obj_buffer_at_off( asum ); \ \ if ( bli_error_checking_is_enabled() ) \ PASTEMAC(opname,_check)( x, asum ); \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) f = \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \ \ f \ ( \ n, \ buf_x, incx, \ buf_asum, \ cntx, \ rntm \ ); \ } GENFRONT( asumv ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* a \ BLIS_OAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_OAPI_EX_DECLS \ \ num_t dt = bli_obj_dt( a ); \ \ uplo_t uploa = bli_obj_uplo( a ); \ dim_t m = bli_obj_length( a ); \ void* buf_a = bli_obj_buffer_at_off( a ); \ inc_t rs_a = bli_obj_row_stride( a ); \ inc_t cs_a = bli_obj_col_stride( a ); \ \ if ( bli_error_checking_is_enabled() ) \ PASTEMAC(opname,_check)( a ); \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) f = \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \ \ f \ ( \ uploa, \ m, \ buf_a, rs_a, cs_a, \ cntx, \ rntm \ ); \ } GENFRONT( mkherm ) GENFRONT( mksymm ) GENFRONT( mktrim ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* x, \ obj_t* norm \ BLIS_OAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_OAPI_EX_DECLS \ \ num_t dt = bli_obj_dt( x ); \ \ dim_t n = bli_obj_vector_dim( x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \ inc_t incx = bli_obj_vector_inc( x ); \ void* buf_norm = bli_obj_buffer_at_off( norm ); \ \ if ( bli_error_checking_is_enabled() ) \ PASTEMAC(opname,_check)( x, norm ); \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) f = \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \ \ f \ ( \ n, \ buf_x, incx, \ buf_norm, \ cntx, \ rntm \ ); \ } GENFRONT( norm1v ) GENFRONT( normfv ) GENFRONT( normiv ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* x, \ obj_t* norm \ BLIS_OAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_OAPI_EX_DECLS \ \ num_t dt = bli_obj_dt( x ); \ \ doff_t diagoffx = bli_obj_diag_offset( x ); \ diag_t diagx = bli_obj_diag( x ); \ uplo_t uplox = bli_obj_uplo( x ); \ dim_t m = bli_obj_length( x ); \ dim_t n = bli_obj_width( x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \ inc_t rs_x = bli_obj_row_stride( x ); \ inc_t cs_x = bli_obj_col_stride( x ); \ void* buf_norm = bli_obj_buffer_at_off( norm ); \ \ if ( bli_error_checking_is_enabled() ) \ PASTEMAC(opname,_check)( x, norm ); \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) f = \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \ \ f \ ( \ diagoffx, \ diagx, \ uplox, \ m, \ n, \ buf_x, rs_x, cs_x, \ buf_norm, \ cntx, \ rntm \ ); \ } GENFRONT( norm1m ) GENFRONT( normfm ) GENFRONT( normim ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,EX_SUF) \ ( \ FILE* file, \ char* s1, \ obj_t* x, \ char* format, \ char* s2 \ BLIS_OAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_OAPI_EX_DECLS \ \ num_t dt = bli_obj_dt( x ); \ \ dim_t n = bli_obj_vector_dim( x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \ inc_t incx = bli_obj_vector_inc( x ); \ \ if ( bli_error_checking_is_enabled() ) \ PASTEMAC(opname,_check)( file, s1, x, format, s2 ); \ \ /* Handle constants up front. */ \ if ( dt == BLIS_CONSTANT ) \ { \ bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); \ } \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH(opname,_vft) f = \ PASTEMAC(opname,_qfp)( dt ); \ \ f \ ( \ file, \ s1, \ n, \ buf_x, incx, \ format, \ s2 \ ); \ } GENFRONT( fprintv ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,EX_SUF) \ ( \ FILE* file, \ char* s1, \ obj_t* x, \ char* format, \ char* s2 \ BLIS_OAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_OAPI_EX_DECLS \ \ num_t dt = bli_obj_dt( x ); \ \ dim_t m = bli_obj_length( x ); \ dim_t n = bli_obj_width( x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \ inc_t rs_x = bli_obj_row_stride( x ); \ inc_t cs_x = bli_obj_col_stride( x ); \ \ if ( bli_error_checking_is_enabled() ) \ PASTEMAC(opname,_check)( file, s1, x, format, s2 ); \ \ /* Handle constants up front. */ \ if ( dt == BLIS_CONSTANT ) \ { \ float* sp = bli_obj_buffer_for_const( BLIS_FLOAT, x ); \ double* dp = bli_obj_buffer_for_const( BLIS_DOUBLE, x ); \ scomplex* cp = bli_obj_buffer_for_const( BLIS_SCOMPLEX, x ); \ dcomplex* zp = bli_obj_buffer_for_const( BLIS_DCOMPLEX, x ); \ gint_t* ip = bli_obj_buffer_for_const( BLIS_INT, x ); \ \ fprintf( file, "%s\n", s1 ); \ fprintf( file, " float: %9.2e\n", bli_sreal( *sp ) ); \ fprintf( file, " double: %9.2e\n", bli_dreal( *dp ) ); \ fprintf( file, " scomplex: %9.2e + %9.2e\n", bli_creal( *cp ), \ bli_cimag( *cp ) ); \ fprintf( file, " dcomplex: %9.2e + %9.2e\n", bli_zreal( *zp ), \ bli_zimag( *zp ) ); \ fprintf( file, " int: %ld\n", ( long )(*ip) ); \ fprintf( file, "\n" ); \ return; \ } \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH(opname,_vft) f = \ PASTEMAC(opname,_qfp)( dt ); \ \ f \ ( \ file, \ s1, \ m, \ n, \ buf_x, rs_x, cs_x, \ format, \ s2 \ ); \ } GENFRONT( fprintm ) #undef GENFRONT #define GENFRONT( opname, varname ) \ \ void PASTEMAC(opname,EX_SUF) \ ( \ char* s1, \ obj_t* x, \ char* format, \ char* s2 \ BLIS_OAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_OAPI_EX_DECLS \ \ /* Suppress compiler warning about unused variables. */ \ ( void )cntx; \ \ /* Invoke the typed function. */ \ PASTEMAC0(varname) \ ( \ stdout, \ s1, \ x, \ format, \ s2 \ ); \ } GENFRONT( printv, fprintv ) GENFRONT( printm, fprintm ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* x \ BLIS_OAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_OAPI_EX_DECLS \ \ num_t dt = bli_obj_dt( x ); \ \ dim_t n = bli_obj_vector_dim( x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \ inc_t incx = bli_obj_vector_inc( x ); \ \ if ( bli_error_checking_is_enabled() ) \ PASTEMAC(opname,_check)( x ); \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) f = \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \ \ f \ ( \ n, \ buf_x, incx, \ cntx, \ rntm \ ); \ } GENFRONT( randv ) GENFRONT( randnv ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* x \ BLIS_OAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_OAPI_EX_DECLS \ \ num_t dt = bli_obj_dt( x ); \ \ doff_t diagoffx = bli_obj_diag_offset( x ); \ uplo_t uplox = bli_obj_uplo( x ); \ dim_t m = bli_obj_length( x ); \ dim_t n = bli_obj_width( x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \ inc_t rs_x = bli_obj_row_stride( x ); \ inc_t cs_x = bli_obj_col_stride( x ); \ \ if ( bli_error_checking_is_enabled() ) \ PASTEMAC(opname,_check)( x ); \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) f = \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \ \ f \ ( \ diagoffx, \ uplox, \ m, \ n, \ buf_x, rs_x, cs_x, \ cntx, \ rntm \ ); \ } GENFRONT( randm ) GENFRONT( randnm ) #undef GENFRONT #define GENFRONT( opname ) \ \ void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* x, \ obj_t* scale, \ obj_t* sumsq \ BLIS_OAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_OAPI_EX_DECLS \ \ num_t dt = bli_obj_dt( x ); \ \ dim_t n = bli_obj_vector_dim( x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \ inc_t incx = bli_obj_vector_inc( x ); \ void* buf_scale = bli_obj_buffer_at_off( scale ); \ void* buf_sumsq = bli_obj_buffer_at_off( sumsq ); \ \ if ( bli_error_checking_is_enabled() ) \ PASTEMAC(opname,_check)( x, scale, sumsq ); \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) f = \ PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \ \ f \ ( \ n, \ buf_x, incx, \ buf_scale, \ buf_sumsq, \ cntx, \ rntm \ ); \ } GENFRONT( sumsqv ) #endif blis-0.6.1/frame/util/bli_util_oapi.h000066400000000000000000000073411360743507500175210ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype object-based interfaces. // #undef GENPROT #define GENPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* x, \ obj_t* asum \ BLIS_OAPI_EX_PARAMS \ ); GENPROT( asumv ) #undef GENPROT #define GENPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* a \ BLIS_OAPI_EX_PARAMS \ ); GENPROT( mkherm ) GENPROT( mksymm ) GENPROT( mktrim ) #undef GENPROT #define GENPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* x, \ obj_t* norm \ BLIS_OAPI_EX_PARAMS \ ); GENPROT( norm1v ) GENPROT( normfv ) GENPROT( normiv ) #undef GENPROT #define GENPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* x, \ obj_t* norm \ BLIS_OAPI_EX_PARAMS \ ); GENPROT( norm1m ) GENPROT( normfm ) GENPROT( normim ) #undef GENPROT #define GENPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \ ( \ FILE* file, \ char* s1, \ obj_t* x, \ char* format, \ char* s2 \ BLIS_OAPI_EX_PARAMS \ ); GENPROT( fprintv ) GENPROT( fprintm ) #undef GENPROT #define GENPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \ ( \ char* s1, \ obj_t* x, \ char* format, \ char* s2 \ BLIS_OAPI_EX_PARAMS \ ); GENPROT( printv ) GENPROT( printm ) #undef GENPROT #define GENPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* x \ BLIS_OAPI_EX_PARAMS \ ); GENPROT( randv ) GENPROT( randnv ) #undef GENPROT #define GENPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* x \ BLIS_OAPI_EX_PARAMS \ ); GENPROT( randm ) GENPROT( randnm ) #undef GENPROT #define GENPROT( opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* x, \ obj_t* scale, \ obj_t* sumsq \ BLIS_OAPI_EX_PARAMS \ ); GENPROT( sumsqv ) blis-0.6.1/frame/util/bli_util_oapi_ba.c000066400000000000000000000036711360743507500201600ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // Include cpp macros that instantiate the API definition templates as // omitting expert parameters. #include "bli_oapi_ba.h" // Define the macro protecting the object API definitions. #define BLIS_ENABLE_OAPI // Include the object API definitions here. #include "bli_util_oapi.c" blis-0.6.1/frame/util/bli_util_oapi_ex.c000066400000000000000000000036671360743507500202170ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // Include cpp macros that instantiate the API definition templates as // having expert parameters. #include "bli_oapi_ex.h" // Define the macro protecting the object API definitions. #define BLIS_ENABLE_OAPI // Include the object API definitions here. #include "bli_util_oapi.c" blis-0.6.1/frame/util/bli_util_tapi.c000066400000000000000000000210641360743507500175170ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // Guard the function definitions so that they are only compiled when // #included from files that define the typed API macros. #ifdef BLIS_ENABLE_TAPI // // Define BLAS-like interfaces with typed operands. // #undef GENTFUNCR #define GENTFUNCR( ctype, ctype_r, ch, chr, opname ) \ \ void PASTEMAC2(ch,opname,EX_SUF) \ ( \ dim_t n, \ ctype* x, inc_t incx, \ ctype_r* asum \ BLIS_TAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_TAPI_EX_DECLS \ \ /* If the vector length is zero, set the absolute sum return value to zero and return early. */ \ if ( bli_zero_dim1( n ) ) \ { \ PASTEMAC(chr,set0s)( *asum ); \ return; \ } \ \ /* Obtain a valid context from the gks if necessary. */ \ /*if ( cntx == NULL ) cntx = bli_gks_query_cntx();*/ \ \ /* Invoke the helper variant, which loops over the appropriate kernel to implement the current operation. */ \ PASTEMAC2(ch,opname,_unb_var1) \ ( \ n, \ x, incx, \ asum, \ cntx, \ rntm \ ); \ } INSERT_GENTFUNCR_BASIC0( asumv ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTEMAC2(ch,opname,EX_SUF) \ ( \ uplo_t uploa, \ dim_t m, \ ctype* a, inc_t rs_a, inc_t cs_a \ BLIS_TAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_TAPI_EX_DECLS \ \ /* If either dimension is zero, return early. */ \ if ( bli_zero_dim2( m, m ) ) return; \ \ /* Obtain a valid context from the gks if necessary. */ \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ \ /* Invoke the helper variant, which loops over the appropriate kernel to implement the current operation. */ \ PASTEMAC2(ch,opname,_unb_var1) \ ( \ uploa, \ m, \ a, rs_a, cs_a, \ cntx, \ rntm \ ); \ } INSERT_GENTFUNC_BASIC0( mkherm ) INSERT_GENTFUNC_BASIC0( mksymm ) INSERT_GENTFUNC_BASIC0( mktrim ) #undef GENTFUNCR #define GENTFUNCR( ctype, ctype_r, ch, chr, opname ) \ \ void PASTEMAC2(ch,opname,EX_SUF) \ ( \ dim_t n, \ ctype* x, inc_t incx, \ ctype_r* norm \ BLIS_TAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_TAPI_EX_DECLS \ \ /* If the vector length is zero, set the norm to zero and return early. */ \ if ( bli_zero_dim1( n ) ) \ { \ PASTEMAC(chr,set0s)( *norm ); \ return; \ } \ \ /* Obtain a valid context from the gks if necessary. */ \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ \ /* Invoke the helper variant, which loops over the appropriate kernel to implement the current operation. */ \ PASTEMAC2(ch,opname,_unb_var1) \ ( \ n, \ x, incx, \ norm, \ cntx, \ rntm \ ); \ } INSERT_GENTFUNCR_BASIC0( norm1v ) INSERT_GENTFUNCR_BASIC0( normfv ) INSERT_GENTFUNCR_BASIC0( normiv ) #undef GENTFUNCR #define GENTFUNCR( ctype, ctype_r, ch, chr, opname ) \ \ void PASTEMAC2(ch,opname,EX_SUF) \ ( \ doff_t diagoffx, \ diag_t diagx, \ uplo_t uplox, \ dim_t m, \ dim_t n, \ ctype* x, inc_t rs_x, inc_t cs_x, \ ctype_r* norm \ BLIS_TAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_TAPI_EX_DECLS \ \ /* If either dimension is zero, set the norm to zero and return early. */ \ if ( bli_zero_dim2( m, n ) ) \ { \ PASTEMAC(chr,set0s)( *norm ); \ return; \ } \ \ /* Obtain a valid context from the gks if necessary. */ \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ \ /* Invoke the helper variant, which loops over the appropriate kernel to implement the current operation. */ \ PASTEMAC2(ch,opname,_unb_var1) \ ( \ diagoffx, \ diagx, \ uplox, \ m, \ n, \ x, rs_x, cs_x, \ norm, \ cntx, \ rntm \ ); \ } INSERT_GENTFUNCR_BASIC0( norm1m ) INSERT_GENTFUNCR_BASIC0( normfm ) INSERT_GENTFUNCR_BASIC0( normim ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, varname ) \ \ void PASTEMAC2(ch,opname,EX_SUF) \ ( \ char* s1, \ dim_t n, \ void* x, inc_t incx, \ char* format, \ char* s2 \ ) \ { \ bli_init_once(); \ \ PASTEMAC(ch,varname) \ ( \ stdout, \ s1, \ n, \ x, incx, \ format, \ s2 \ ); \ } INSERT_GENTFUNC_BASIC_I( printv, fprintv ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, varname ) \ \ void PASTEMAC2(ch,opname,EX_SUF) \ ( \ char* s1, \ dim_t m, \ dim_t n, \ void* x, inc_t rs_x, inc_t cs_x, \ char* format, \ char* s2 \ ) \ { \ bli_init_once(); \ \ PASTEMAC(ch,varname) \ ( \ stdout, \ s1, \ m, \ n, \ x, rs_x, cs_x, \ format, \ s2 \ ); \ } INSERT_GENTFUNC_BASIC_I( printm, fprintm ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTEMAC2(ch,opname,EX_SUF) \ ( \ dim_t n, \ ctype* x, inc_t incx \ BLIS_TAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_TAPI_EX_DECLS \ \ /* If the vector length is zero, return early. */ \ if ( bli_zero_dim1( n ) ) return; \ \ /* Obtain a valid context from the gks if necessary. */ \ /*if ( cntx == NULL ) cntx = bli_gks_query_cntx();*/ \ \ /* Invoke the helper variant, which loops over the appropriate kernel to implement the current operation. */ \ PASTEMAC2(ch,opname,_unb_var1) \ ( \ n, \ x, incx, \ cntx, \ rntm \ ); \ } INSERT_GENTFUNC_BASIC0( randv ) INSERT_GENTFUNC_BASIC0( randnv ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTEMAC2(ch,opname,EX_SUF) \ ( \ doff_t diagoffx, \ uplo_t uplox, \ dim_t m, \ dim_t n, \ ctype* x, inc_t rs_x, inc_t cs_x \ BLIS_TAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_TAPI_EX_DECLS \ \ /* If either dimension is zero, return early. */ \ if ( bli_zero_dim2( m, n ) ) return; \ \ /* Obtain a valid context from the gks if necessary. */ \ /*if ( cntx == NULL ) cntx = bli_gks_query_cntx();*/ \ \ /* Invoke the helper variant, which loops over the appropriate kernel to implement the current operation. */ \ PASTEMAC2(ch,opname,_unb_var1) \ ( \ diagoffx, \ uplox, \ m, \ n, \ x, rs_x, cs_x, \ cntx, \ rntm \ ); \ } INSERT_GENTFUNC_BASIC0( randm ) INSERT_GENTFUNC_BASIC0( randnm ) #undef GENTFUNCR #define GENTFUNCR( ctype, ctype_r, ch, chr, opname ) \ \ void PASTEMAC2(ch,opname,EX_SUF) \ ( \ dim_t n, \ ctype* x, inc_t incx, \ ctype_r* scale, \ ctype_r* sumsq \ BLIS_TAPI_EX_PARAMS \ ) \ { \ bli_init_once(); \ \ BLIS_TAPI_EX_DECLS \ \ /* If x is zero length, return with scale and sumsq unchanged. */ \ if ( bli_zero_dim1( n ) ) return; \ \ /* Obtain a valid context from the gks if necessary. */ \ /*if ( cntx == NULL ) cntx = bli_gks_query_cntx();*/ \ \ /* Invoke the helper variant, which loops over the appropriate kernel to implement the current operation. */ \ PASTEMAC2(ch,opname,_unb_var1) \ ( \ n, \ x, incx, \ scale, \ sumsq, \ cntx, \ rntm \ ); \ } INSERT_GENTFUNCR_BASIC0( sumsqv ) #endif blis-0.6.1/frame/util/bli_util_tapi.h000066400000000000000000000112101360743507500175140ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-like interfaces with typed operands. // #undef GENTPROTR #define GENTPROTR( ctype, ctype_r, ch, chr, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \ ( \ dim_t n, \ ctype* x, inc_t incx, \ ctype_r* asum \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTPROTR_BASIC0( asumv ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \ ( \ uplo_t uploa, \ dim_t m, \ ctype* a, inc_t rs_a, inc_t cs_a \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTPROT_BASIC0( mkherm ) INSERT_GENTPROT_BASIC0( mksymm ) INSERT_GENTPROT_BASIC0( mktrim ) #undef GENTPROTR #define GENTPROTR( ctype, ctype_r, ch, chr, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \ ( \ dim_t n, \ ctype* x, inc_t incx, \ ctype_r* norm \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTPROTR_BASIC0( norm1v ) INSERT_GENTPROTR_BASIC0( normfv ) INSERT_GENTPROTR_BASIC0( normiv ) #undef GENTPROTR #define GENTPROTR( ctype, ctype_r, ch, chr, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \ ( \ doff_t diagoffx, \ diag_t diagx, \ uplo_t uplox, \ dim_t m, \ dim_t n, \ ctype* x, inc_t rs_x, inc_t cs_x, \ ctype_r* norm \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTPROTR_BASIC0( norm1m ) INSERT_GENTPROTR_BASIC0( normfm ) INSERT_GENTPROTR_BASIC0( normim ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \ ( \ char* s1, \ dim_t n, \ void* x, inc_t incx, \ char* format, \ char* s2 \ ); INSERT_GENTPROT_BASIC0_I( printv ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \ ( \ char* s1, \ dim_t m, \ dim_t n, \ void* x, inc_t rs_x, inc_t cs_x, \ char* format, \ char* s2 \ ); INSERT_GENTPROT_BASIC0_I( printm ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \ ( \ dim_t n, \ ctype* x, inc_t incx \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTPROT_BASIC0( randv ) INSERT_GENTPROT_BASIC0( randnv ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \ ( \ doff_t diagoffx, \ uplo_t uplox, \ dim_t m, \ dim_t n, \ ctype* x, inc_t rs_x, inc_t cs_x \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTPROT_BASIC0( randm ) INSERT_GENTPROT_BASIC0( randnm ) #undef GENTPROTR #define GENTPROTR( ctype, ctype_r, ch, chr, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \ ( \ dim_t n, \ ctype* x, inc_t incx, \ ctype_r* scale, \ ctype_r* sumsq \ BLIS_TAPI_EX_PARAMS \ ); INSERT_GENTPROTR_BASIC0( sumsqv ) blis-0.6.1/frame/util/bli_util_tapi_ba.c000066400000000000000000000036671360743507500201720ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // Include cpp macros that instantiate the API definition templates as // omitting expert parameters. #include "bli_tapi_ba.h" // Define the macro protecting the typed API definitions. #define BLIS_ENABLE_TAPI // Include the typed API definitions here. #include "bli_util_tapi.c" blis-0.6.1/frame/util/bli_util_tapi_ex.c000066400000000000000000000036651360743507500202220ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // Include cpp macros that instantiate the API definition templates as // having expert parameters. #include "bli_tapi_ex.h" // Define the macro protecting the typed API definitions. #define BLIS_ENABLE_TAPI // Include the typed API definitions here. #include "bli_util_tapi.c" blis-0.6.1/frame/util/bli_util_unb_var1.c000066400000000000000000000706341360743507500203060ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include // // Define BLAS-like interfaces with typed operands. // #undef GENTFUNCR #define GENTFUNCR( ctype, ctype_r, ch, chr, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ dim_t n, \ ctype* x, inc_t incx, \ ctype_r* asum, \ cntx_t* cntx, \ rntm_t* rntm \ ) \ { \ ctype* chi1; \ ctype_r chi1_r; \ ctype_r chi1_i; \ ctype_r absum; \ dim_t i; \ \ /* Initialize the absolute sum accumulator to zero. */ \ PASTEMAC(chr,set0s)( absum ); \ \ for ( i = 0; i < n; ++i ) \ { \ chi1 = x + (i )*incx; \ \ /* Get the real and imaginary components of chi1. */ \ PASTEMAC2(ch,chr,gets)( *chi1, chi1_r, chi1_i ); \ \ /* Replace chi1_r and chi1_i with their absolute values. */ \ chi1_r = bli_fabs( chi1_r ); \ chi1_i = bli_fabs( chi1_i ); \ \ /* Accumulate the real and imaginary components into absum. */ \ PASTEMAC(chr,adds)( chi1_r, absum ); \ PASTEMAC(chr,adds)( chi1_i, absum ); \ } \ \ /* Store the final value of absum to the output variable. */ \ PASTEMAC(chr,copys)( absum, *asum ); \ } INSERT_GENTFUNCR_BASIC0( asumv_unb_var1 ) #undef GENTFUNCR #define GENTFUNCR( ctype, ctype_r, ch, chr, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ uplo_t uploa, \ dim_t m, \ ctype* a, inc_t rs_a, inc_t cs_a, \ cntx_t* cntx, \ rntm_t* rntm \ ) \ { \ ctype_r* zeror = PASTEMAC(chr,0); \ doff_t diagoffa; \ \ /* If the dimension is zero, return early. */ \ if ( bli_zero_dim1( m ) ) return; \ \ /* In order to avoid the main diagonal, we must nudge the diagonal either up or down by one, depending on which triangle is currently stored. */ \ if ( bli_is_upper( uploa ) ) diagoffa = 1; \ else /*if ( bli_is_lower( uploa ) )*/ diagoffa = -1; \ \ /* We will be reflecting the stored region over the diagonal into the unstored region, so a transposition is necessary. Furthermore, since we are creating a Hermitian matrix, we must also conjugate. */ \ PASTEMAC2(ch,copym,BLIS_TAPI_EX_SUF) \ ( \ diagoffa, \ BLIS_NONUNIT_DIAG, \ uploa, \ BLIS_CONJ_TRANSPOSE, \ m, \ m, \ a, rs_a, cs_a, \ a, rs_a, cs_a, \ cntx, \ rntm \ ); \ \ /* Set the imaginary parts of the diagonal elements to zero. */ \ PASTEMAC2(ch,setid,BLIS_TAPI_EX_SUF) \ ( \ 0, \ m, \ m, \ zeror, \ a, rs_a, cs_a, \ cntx, \ rntm \ ); \ } INSERT_GENTFUNCR_BASIC0( mkherm_unb_var1 ) #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ uplo_t uploa, \ dim_t m, \ ctype* a, inc_t rs_a, inc_t cs_a, \ cntx_t* cntx, \ rntm_t* rntm \ ) \ { \ doff_t diagoffa; \ \ /* If the dimension is zero, return early. */ \ if ( bli_zero_dim1( m ) ) return; \ \ /* In order to avoid the main diagonal, we must nudge the diagonal either up or down by one, depending on which triangle is currently stored. */ \ if ( bli_is_upper( uploa ) ) diagoffa = 1; \ else /*if ( bli_is_lower( uploa ) )*/ diagoffa = -1; \ \ /* We will be reflecting the stored region over the diagonal into the unstored region, so a transposition is necessary. */ \ PASTEMAC2(ch,copym,BLIS_TAPI_EX_SUF) \ ( \ diagoffa, \ BLIS_NONUNIT_DIAG, \ uploa, \ BLIS_TRANSPOSE, \ m, \ m, \ a, rs_a, cs_a, \ a, rs_a, cs_a, \ cntx, \ rntm \ ); \ } INSERT_GENTFUNC_BASIC0( mksymm_unb_var1 ) #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ uplo_t uploa, \ dim_t m, \ ctype* a, inc_t rs_a, inc_t cs_a, \ cntx_t* cntx, \ rntm_t* rntm \ ) \ { \ ctype* zero = PASTEMAC(ch,0); \ doff_t diagoffa; \ \ /* If the dimension is zero, return early. */ \ if ( bli_zero_dim1( m ) ) return; \ \ /* Toggle uplo so that it refers to the unstored triangle. */ \ bli_toggle_uplo( &uploa ); \ \ /* In order to avoid the main diagonal, we must nudge the diagonal either up or down by one, depending on which triangle is to be zeroed. */ \ if ( bli_is_upper( uploa ) ) diagoffa = 1; \ else /*if ( bli_is_lower( uploa ) )*/ diagoffa = -1; \ \ /* Set the unstored triangle to zero. */ \ PASTEMAC2(ch,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ diagoffa, \ BLIS_NONUNIT_DIAG, \ uploa, \ m, \ m, \ zero, \ a, rs_a, cs_a, \ cntx, \ rntm \ ); \ } INSERT_GENTFUNC_BASIC0( mktrim_unb_var1 ) #undef GENTFUNCR #define GENTFUNCR( ctype, ctype_r, ch, chr, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ dim_t n, \ ctype* x, inc_t incx, \ ctype_r* norm, \ cntx_t* cntx, \ rntm_t* rntm \ ) \ { \ ctype* chi1; \ ctype_r abs_chi1; \ ctype_r absum; \ dim_t i; \ \ /* Initialize the absolute sum accumulator to zero. */ \ PASTEMAC(chr,set0s)( absum ); \ \ for ( i = 0; i < n; ++i ) \ { \ chi1 = x + (i )*incx; \ \ /* Compute the absolute value (or complex magnitude) of chi1. */ \ PASTEMAC2(ch,chr,abval2s)( *chi1, abs_chi1 ); \ \ /* Accumulate the absolute value of chi1 into absum. */ \ PASTEMAC(chr,adds)( abs_chi1, absum ); \ } \ \ /* Store final value of absum to the output variable. */ \ PASTEMAC(chr,copys)( absum, *norm ); \ } INSERT_GENTFUNCR_BASIC0( norm1v_unb_var1 ) #undef GENTFUNCR #define GENTFUNCR( ctype, ctype_r, ch, chr, varname, kername ) \ \ void PASTEMAC(ch,varname) \ ( \ dim_t n, \ ctype* x, inc_t incx, \ ctype_r* norm, \ cntx_t* cntx, \ rntm_t* rntm \ ) \ { \ ctype_r* zero = PASTEMAC(chr,0); \ ctype_r* one = PASTEMAC(chr,1); \ ctype_r scale; \ ctype_r sumsq; \ ctype_r sqrt_sumsq; \ \ /* Initialize scale and sumsq to begin the summation. */ \ PASTEMAC(chr,copys)( *zero, scale ); \ PASTEMAC(chr,copys)( *one, sumsq ); \ \ /* Compute the sum of the squares of the vector. */ \ PASTEMAC(ch,kername) \ ( \ n, \ x, incx, \ &scale, \ &sumsq, \ cntx, \ rntm \ ); \ \ /* Compute: norm = scale * sqrt( sumsq ) */ \ PASTEMAC(chr,sqrt2s)( sumsq, sqrt_sumsq ); \ PASTEMAC(chr,scals)( scale, sqrt_sumsq ); \ \ /* Store the final value to the output variable. */ \ PASTEMAC(chr,copys)( sqrt_sumsq, *norm ); \ } //INSERT_GENTFUNCR_BASIC( normfv_unb_var1, sumsqv_unb_var1 ) GENTFUNCR( scomplex, float, c, s, normfv_unb_var1, sumsqv_unb_var1 ) GENTFUNCR( dcomplex, double, z, d, normfv_unb_var1, sumsqv_unb_var1 ) #undef GENTFUNCR // We've disabled the dotv-based implementation because that method of // computing the sum of the squares of x inherently does not check for // overflow. Instead, we use the fallback method based on sumsqv, which // takes care to not overflow unnecessarily (ie: takes care for the // sqrt( sum of the squares of x ) to not overflow if the sum of the // squares of x would normally overflow. See GitHub issue #332 for // discussion. #if 0 //defined(FE_OVERFLOW) && !defined(__APPLE__) #define GENTFUNCR( ctype, ctype_r, ch, chr, varname, kername ) \ \ void PASTEMAC(ch,varname) \ ( \ dim_t n, \ ctype* x, inc_t incx, \ ctype_r* norm, \ cntx_t* cntx, \ rntm_t* rntm \ ) \ { \ ctype_r* zero = PASTEMAC(chr,0); \ ctype_r* one = PASTEMAC(chr,1); \ ctype_r scale; \ ctype_r sumsq; \ ctype_r sqrt_sumsq; \ \ /* Initialize scale and sumsq to begin the summation. */ \ PASTEMAC(chr,copys)( *zero, scale ); \ PASTEMAC(chr,copys)( *one, sumsq ); \ \ /* An optimization: first try to use dotv to compute the sum of the squares of the vector. If no floating-point exceptions (specifically, overflow and invalid exceptions) were produced, then we accept the computed value and returne early. The cost of this optimization is the "sunk" cost of the initial dotv when sumsqv must be used instead. However, we expect that the vast majority of use cases will not produce exceptions, and therefore only one pass through the data, via dotv, will be required. */ \ if ( TRUE ) \ { \ int f_exp_raised;\ ctype sumsqc; \ \ feclearexcept( FE_ALL_EXCEPT );\ \ PASTEMAC2(ch,dotv,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ BLIS_NO_CONJUGATE, \ n,\ x, incx, \ x, incx, \ &sumsqc, \ cntx, \ rntm \ ); \ \ PASTEMAC2(ch,chr,copys)( sumsqc, sumsq ); \ \ f_exp_raised = fetestexcept( FE_OVERFLOW | FE_INVALID );\ \ if ( !f_exp_raised ) \ { \ PASTEMAC(chr,sqrt2s)( sumsq, *norm ); \ return; \ } \ } \ \ /* Compute the sum of the squares of the vector. */ \ PASTEMAC(ch,kername) \ ( \ n, \ x, incx, \ &scale, \ &sumsq, \ cntx, \ rntm \ ); \ \ /* Compute: norm = scale * sqrt( sumsq ) */ \ PASTEMAC(chr,sqrt2s)( sumsq, sqrt_sumsq ); \ PASTEMAC(chr,scals)( scale, sqrt_sumsq ); \ \ /* Store the final value to the output variable. */ \ PASTEMAC(chr,copys)( sqrt_sumsq, *norm ); \ } #else #define GENTFUNCR( ctype, ctype_r, ch, chr, varname, kername ) \ \ void PASTEMAC(ch,varname) \ ( \ dim_t n, \ ctype* x, inc_t incx, \ ctype_r* norm, \ cntx_t* cntx, \ rntm_t* rntm \ ) \ { \ ctype_r* zero = PASTEMAC(chr,0); \ ctype_r* one = PASTEMAC(chr,1); \ ctype_r scale; \ ctype_r sumsq; \ ctype_r sqrt_sumsq; \ \ /* Initialize scale and sumsq to begin the summation. */ \ PASTEMAC(chr,copys)( *zero, scale ); \ PASTEMAC(chr,copys)( *one, sumsq ); \ \ /* Compute the sum of the squares of the vector. */ \ \ PASTEMAC(ch,kername) \ ( \ n, \ x, incx, \ &scale, \ &sumsq, \ cntx, \ rntm \ ); \ \ /* Compute: norm = scale * sqrt( sumsq ) */ \ PASTEMAC(chr,sqrt2s)( sumsq, sqrt_sumsq ); \ PASTEMAC(chr,scals)( scale, sqrt_sumsq ); \ \ /* Store the final value to the output variable. */ \ PASTEMAC(chr,copys)( sqrt_sumsq, *norm ); \ } #endif GENTFUNCR( float, float, s, s, normfv_unb_var1, sumsqv_unb_var1 ) GENTFUNCR( double, double, d, d, normfv_unb_var1, sumsqv_unb_var1 ) #undef GENTFUNCR #define GENTFUNCR( ctype, ctype_r, ch, chr, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ dim_t n, \ ctype* x, inc_t incx, \ ctype_r* norm, \ cntx_t* cntx, \ rntm_t* rntm \ ) \ { \ ctype* chi1; \ ctype_r abs_chi1; \ ctype_r abs_chi1_max; \ dim_t i; \ \ /* Initialize the maximum absolute value to zero. */ \ PASTEMAC(chr,set0s)( abs_chi1_max ); \ \ for ( i = 0; i < n; ++i ) \ { \ chi1 = x + (i )*incx; \ \ /* Compute the absolute value (or complex magnitude) of chi1. */ \ PASTEMAC2(ch,chr,abval2s)( *chi1, abs_chi1 ); \ \ /* If the absolute value of the current element exceeds that of the previous largest, save it and its index. If NaN is encountered, then treat it the same as if it were a valid value that was larger than any previously seen. This behavior mimics that of LAPACK's ?lange(). */ \ if ( abs_chi1_max < abs_chi1 || bli_isnan( abs_chi1 ) ) \ { \ PASTEMAC(chr,copys)( abs_chi1, abs_chi1_max ); \ } \ } \ \ /* Store the final value to the output variable. */ \ PASTEMAC(chr,copys)( abs_chi1_max, *norm ); \ } INSERT_GENTFUNCR_BASIC0( normiv_unb_var1 ) #undef GENTFUNCR #define GENTFUNCR( ctype, ctype_r, ch, chr, varname, kername ) \ \ void PASTEMAC(ch,varname) \ ( \ doff_t diagoffx, \ diag_t diagx, \ uplo_t uplox, \ dim_t m, \ dim_t n, \ ctype* x, inc_t rs_x, inc_t cs_x, \ ctype_r* norm, \ cntx_t* cntx, \ rntm_t* rntm \ ) \ { \ ctype* one = PASTEMAC(ch,1); \ ctype* x0; \ ctype* chi1; \ ctype* x2; \ ctype_r absum_max; \ ctype_r absum_j; \ ctype_r abval_chi1; \ uplo_t uplox_eff; \ dim_t n_iter; \ dim_t n_elem, n_elem_max; \ inc_t ldx, incx; \ dim_t j, i; \ dim_t ij0, n_shift; \ \ /* Initialize the maximum absolute column sum to zero. */ \ PASTEMAC(chr,set0s)( absum_max ); \ \ /* If either dimension is zero, return with absum_max equal to zero. */ \ if ( bli_zero_dim2( m, n ) ) \ { \ PASTEMAC(chr,copys)( absum_max, *norm ); \ return; \ } \ \ /* Set various loop parameters. */ \ bli_set_dims_incs_uplo_1m_noswap \ ( \ diagoffx, BLIS_NONUNIT_DIAG, \ uplox, m, n, rs_x, cs_x, \ &uplox_eff, &n_elem_max, &n_iter, &incx, &ldx, \ &ij0, &n_shift \ ); \ \ /* If the matrix is zeros, return with absum_max equal to zero. */ \ if ( bli_is_zeros( uplox_eff ) ) \ { \ PASTEMAC(chr,copys)( absum_max, *norm ); \ return; \ } \ \ \ /* Handle dense and upper/lower storage cases separately. */ \ if ( bli_is_dense( uplox_eff ) ) \ { \ for ( j = 0; j < n_iter; ++j ) \ { \ n_elem = n_elem_max; \ \ x0 = x + (j )*ldx + (0 )*incx; \ \ /* Compute the norm of the current column. */ \ PASTEMAC(ch,kername) \ ( \ n_elem, \ x0, incx, \ &absum_j, \ cntx, \ rntm \ ); \ \ /* If absum_j is greater than the previous maximum value, then save it. */ \ if ( absum_max < absum_j || bli_isnan( absum_j ) ) \ { \ PASTEMAC(chr,copys)( absum_j, absum_max ); \ } \ } \ } \ else \ { \ if ( bli_is_upper( uplox_eff ) ) \ { \ for ( j = 0; j < n_iter; ++j ) \ { \ n_elem = bli_min( n_shift + j + 1, n_elem_max ); \ \ x0 = x + (ij0+j )*ldx + (0 )*incx; \ chi1 = x + (ij0+j )*ldx + (n_elem-1)*incx; \ \ /* Compute the norm of the super-diagonal elements. */ \ PASTEMAC(ch,kername) \ ( \ n_elem - 1, \ x0, incx, \ &absum_j, \ cntx, \ rntm \ ); \ \ if ( bli_is_unit_diag( diagx ) ) chi1 = one; \ \ /* Handle the diagonal element separately in case it's unit. */ \ PASTEMAC2(ch,chr,abval2s)( *chi1, abval_chi1 ); \ PASTEMAC(chr,adds)( abval_chi1, absum_j ); \ \ /* If absum_j is greater than the previous maximum value, then save it. */ \ if ( absum_max < absum_j || bli_isnan( absum_j ) ) \ { \ PASTEMAC(chr,copys)( absum_j, absum_max ); \ } \ } \ } \ else if ( bli_is_lower( uplox_eff ) ) \ { \ for ( j = 0; j < n_iter; ++j ) \ { \ i = bli_max( 0, ( doff_t )j - ( doff_t )n_shift ); \ n_elem = n_elem_max - i; \ \ chi1 = x + (j )*ldx + (ij0+i )*incx; \ x2 = x + (j )*ldx + (ij0+i+1)*incx; \ \ /* Compute the norm of the sub-diagonal elements. */ \ PASTEMAC(ch,kername) \ ( \ n_elem - 1, \ x2, incx, \ &absum_j, \ cntx, \ rntm \ ); \ \ if ( bli_is_unit_diag( diagx ) ) chi1 = one; \ \ /* Handle the diagonal element separately in case it's unit. */ \ PASTEMAC2(ch,chr,abval2s)( *chi1, abval_chi1 ); \ PASTEMAC(chr,adds)( abval_chi1, absum_j ); \ \ /* If absum_j is greater than the previous maximum value, then save it. */ \ if ( absum_max < absum_j || bli_isnan( absum_j ) ) \ { \ PASTEMAC(chr,copys)( absum_j, absum_max ); \ } \ } \ } \ } \ \ /* Store final value of absum_max to the output variable. */ \ PASTEMAC(chr,copys)( absum_max, *norm ); \ } INSERT_GENTFUNCR_BASIC( norm1m_unb_var1, norm1v_unb_var1 ) #undef GENTFUNCR #define GENTFUNCR( ctype, ctype_r, ch, chr, varname, kername ) \ \ void PASTEMAC(ch,varname) \ ( \ doff_t diagoffx, \ diag_t diagx, \ uplo_t uplox, \ dim_t m, \ dim_t n, \ ctype* x, inc_t rs_x, inc_t cs_x, \ ctype_r* norm, \ cntx_t* cntx, \ rntm_t* rntm \ ) \ { \ ctype* one = PASTEMAC(ch,1); \ ctype_r* one_r = PASTEMAC(chr,1); \ ctype_r* zero_r = PASTEMAC(chr,0); \ ctype* x0; \ ctype* chi1; \ ctype* x2; \ ctype_r scale; \ ctype_r sumsq; \ ctype_r sqrt_sumsq; \ uplo_t uplox_eff; \ dim_t n_iter; \ dim_t n_elem, n_elem_max; \ inc_t ldx, incx; \ dim_t j, i; \ dim_t ij0, n_shift; \ \ /* Return a norm of zero if either dimension is zero. */ \ if ( bli_zero_dim2( m, n ) ) \ { \ PASTEMAC(chr,set0s)( *norm ); \ return; \ } \ \ /* Set various loop parameters. Here, we pretend that diagx is equal to BLIS_NONUNIT_DIAG because we handle the unit diagonal case manually. */ \ bli_set_dims_incs_uplo_1m \ ( \ diagoffx, BLIS_NONUNIT_DIAG, \ uplox, m, n, rs_x, cs_x, \ &uplox_eff, &n_elem_max, &n_iter, &incx, &ldx, \ &ij0, &n_shift \ ); \ \ /* Check the effective uplo; if it's zeros, then our norm is zero. */ \ if ( bli_is_zeros( uplox_eff ) ) \ { \ PASTEMAC(chr,set0s)( *norm ); \ return; \ } \ \ /* Initialize scale and sumsq to begin the summation. */ \ PASTEMAC(chr,copys)( *zero_r, scale ); \ PASTEMAC(chr,copys)( *one_r, sumsq ); \ \ /* Handle dense and upper/lower storage cases separately. */ \ if ( bli_is_dense( uplox_eff ) ) \ { \ for ( j = 0; j < n_iter; ++j ) \ { \ n_elem = n_elem_max; \ \ x0 = x + (j )*ldx + (0 )*incx; \ \ /* Compute the norm of the current column. */ \ PASTEMAC(ch,kername) \ ( \ n_elem, \ x0, incx, \ &scale, \ &sumsq, \ cntx, \ rntm \ ); \ } \ } \ else \ { \ if ( bli_is_upper( uplox_eff ) ) \ { \ for ( j = 0; j < n_iter; ++j ) \ { \ n_elem = bli_min( n_shift + j + 1, n_elem_max ); \ \ x0 = x + (ij0+j )*ldx + (0 )*incx; \ chi1 = x + (ij0+j )*ldx + (n_elem-1)*incx; \ \ /* Sum the squares of the super-diagonal elements. */ \ PASTEMAC(ch,kername) \ ( \ n_elem - 1, \ x0, incx, \ &scale, \ &sumsq, \ cntx, \ rntm \ ); \ \ if ( bli_is_unit_diag( diagx ) ) chi1 = one; \ \ /* Handle the diagonal element separately in case it's unit. */ \ PASTEMAC(ch,kername) \ ( \ 1, \ chi1, incx, \ &scale, \ &sumsq, \ cntx, \ rntm \ ); \ } \ } \ else if ( bli_is_lower( uplox_eff ) ) \ { \ for ( j = 0; j < n_iter; ++j ) \ { \ i = bli_max( 0, ( doff_t )j - ( doff_t )n_shift ); \ n_elem = n_elem_max - i; \ \ chi1 = x + (j )*ldx + (ij0+i )*incx; \ x2 = x + (j )*ldx + (ij0+i+1)*incx; \ \ /* Sum the squares of the sub-diagonal elements. */ \ PASTEMAC(ch,kername) \ ( \ n_elem - 1, \ x2, incx, \ &scale, \ &sumsq, \ cntx, \ rntm \ ); \ \ if ( bli_is_unit_diag( diagx ) ) chi1 = one; \ \ /* Handle the diagonal element separately in case it's unit. */ \ PASTEMAC(ch,kername) \ ( \ 1, \ chi1, incx, \ &scale, \ &sumsq, \ cntx, \ rntm \ ); \ } \ } \ } \ \ /* Compute: norm = scale * sqrt( sumsq ) */ \ PASTEMAC(chr,sqrt2s)( sumsq, sqrt_sumsq ); \ PASTEMAC(chr,scals)( scale, sqrt_sumsq ); \ \ /* Store the final value to the output variable. */ \ PASTEMAC(chr,copys)( sqrt_sumsq, *norm ); \ } INSERT_GENTFUNCR_BASIC( normfm_unb_var1, sumsqv_unb_var1 ) #undef GENTFUNCR #define GENTFUNCR( ctype, ctype_r, ch, chr, varname, kername ) \ \ void PASTEMAC(ch,varname) \ ( \ doff_t diagoffx, \ diag_t diagx, \ uplo_t uplox, \ dim_t m, \ dim_t n, \ ctype* x, inc_t rs_x, inc_t cs_x, \ ctype_r* norm, \ cntx_t* cntx, \ rntm_t* rntm \ ) \ { \ /* Induce a transposition so that rows become columns. */ \ bli_swap_dims( &m, &n ); \ bli_swap_incs( &rs_x, &cs_x ); \ bli_toggle_uplo( &uplox ); \ bli_negate_diag_offset( &diagoffx ); \ \ /* Now we can simply compute the 1-norm of this transposed matrix, which will be equivalent to the infinity-norm of the original matrix. */ \ PASTEMAC(ch,kername) \ ( \ diagoffx, \ diagx, \ uplox, \ m, \ n, \ x, rs_x, cs_x, \ norm, \ cntx, \ rntm \ ); \ } INSERT_GENTFUNCR_BASIC( normim_unb_var1, norm1m_unb_var1 ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ FILE* file, \ char* s1, \ dim_t n, \ ctype* x, inc_t incx, \ char* format, \ char* s2 \ ) \ { \ dim_t i; \ ctype* chi1; \ char default_spec[32] = PASTEMAC(ch,formatspec)(); \ \ if ( format == NULL ) format = default_spec; \ \ chi1 = x; \ \ fprintf( file, "%s\n", s1 ); \ \ for ( i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,fprints)( file, format, *chi1 ); \ fprintf( file, "\n" ); \ \ chi1 += incx; \ } \ \ fprintf( file, "%s\n", s2 ); \ } INSERT_GENTFUNC_BASIC0_I( fprintv ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ FILE* file, \ char* s1, \ dim_t m, \ dim_t n, \ ctype* x, inc_t rs_x, inc_t cs_x, \ char* format, \ char* s2 \ ) \ { \ dim_t i, j; \ ctype* chi1; \ char default_spec[32] = PASTEMAC(ch,formatspec)(); \ \ if ( format == NULL ) format = default_spec; \ \ fprintf( file, "%s\n", s1 ); \ \ for ( i = 0; i < m; ++i ) \ { \ for ( j = 0; j < n; ++j ) \ { \ chi1 = (( ctype* ) x) + i*rs_x + j*cs_x; \ \ PASTEMAC(ch,fprints)( file, format, *chi1 ); \ fprintf( file, " " ); \ } \ \ fprintf( file, "\n" ); \ } \ \ fprintf( file, "%s\n", s2 ); \ fflush( file ); \ } INSERT_GENTFUNC_BASIC0_I( fprintm ) #undef GENTFUNC #define GENTFUNC( ctype, ch, varname, randmac ) \ \ void PASTEMAC(ch,varname) \ ( \ dim_t n, \ ctype* x, inc_t incx, \ cntx_t* cntx, \ rntm_t* rntm \ ) \ { \ ctype* chi1; \ dim_t i; \ \ chi1 = x; \ \ for ( i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,randmac)( *chi1 ); \ \ chi1 += incx; \ } \ } INSERT_GENTFUNC_BASIC( randv_unb_var1, rands ) INSERT_GENTFUNC_BASIC( randnv_unb_var1, randnp2s ) #undef GENTFUNC #define GENTFUNC( ctype, ch, varname, kername ) \ \ void PASTEMAC(ch,varname) \ ( \ doff_t diagoffx, \ uplo_t uplox, \ dim_t m, \ dim_t n, \ ctype* x, inc_t rs_x, inc_t cs_x, \ cntx_t* cntx, \ rntm_t* rntm \ ) \ { \ ctype* one = PASTEMAC(ch,1); \ ctype* x0; \ ctype* x1; \ ctype* x2; \ ctype* chi1; \ ctype beta; \ ctype omega; \ double max_m_n; \ uplo_t uplox_eff; \ dim_t n_iter; \ dim_t n_elem, n_elem_max; \ inc_t ldx, incx; \ dim_t j, i; \ dim_t ij0, n_shift; \ \ /* Set various loop parameters. Here, we pretend that diagx is equal to BLIS_NONUNIT_DIAG because we handle the unit diagonal case manually. */ \ bli_set_dims_incs_uplo_1m \ ( \ diagoffx, BLIS_NONUNIT_DIAG, \ uplox, m, n, rs_x, cs_x, \ &uplox_eff, &n_elem_max, &n_iter, &incx, &ldx, \ &ij0, &n_shift \ ); \ \ if ( bli_is_zeros( uplox_eff ) ) return; \ \ /* Handle dense and upper/lower storage cases separately. */ \ if ( bli_is_dense( uplox_eff ) ) \ { \ for ( j = 0; j < n_iter; ++j ) \ { \ n_elem = n_elem_max; \ \ x1 = x + (j )*ldx + (0 )*incx; \ \ PASTEMAC2(ch,kername,BLIS_TAPI_EX_SUF) \ ( \ n_elem, \ x1, incx, \ cntx, \ rntm \ ); \ } \ } \ else \ { \ max_m_n = bli_max( m, n ); \ \ PASTEMAC2(d,ch,sets)( max_m_n, 0.0, omega ); \ PASTEMAC(ch,copys)( *one, beta ); \ PASTEMAC(ch,invscals)( omega, beta ); \ \ if ( bli_is_upper( uplox_eff ) ) \ { \ for ( j = 0; j < n_iter; ++j ) \ { \ n_elem = bli_min( n_shift + j + 1, n_elem_max ); \ \ x1 = x + (ij0+j )*ldx + (0 )*incx; \ x0 = x1; \ chi1 = x1 + (n_elem-1)*incx; \ \ PASTEMAC2(ch,kername,BLIS_TAPI_EX_SUF) \ ( \ n_elem, \ x1, incx, \ cntx, \ rntm \ ); \ \ ( void )x0; \ ( void )chi1; \ /* We want positive diagonal elements between 1 and 2. */ \ /* PASTEMAC(ch,abval2s)( *chi1, *chi1 ); \ PASTEMAC(ch,adds)( *one, *chi1 ); \ */ \ \ /* Scale the super-diagonal elements by 1/max(m,n). */ \ /* PASTEMAC(ch,scalv) \ ( \ BLIS_NO_CONJUGATE, \ n_elem - 1, \ &beta, \ x0, incx, \ cntx \ ); \ */ \ } \ } \ else if ( bli_is_lower( uplox_eff ) ) \ { \ for ( j = 0; j < n_iter; ++j ) \ { \ i = bli_max( 0, ( doff_t )j - ( doff_t )n_shift ); \ n_elem = n_elem_max - i; \ \ x1 = x + (j )*ldx + (ij0+i )*incx; \ x2 = x1 + incx; \ chi1 = x1; \ \ PASTEMAC2(ch,kername,BLIS_TAPI_EX_SUF) \ ( \ n_elem, \ x1, incx, \ cntx, \ rntm \ ); \ \ ( void )x2; \ ( void )chi1; \ /* We want positive diagonal elements between 1 and 2. */ \ /* PASTEMAC(ch,abval2s)( *chi1, *chi1 ); \ PASTEMAC(ch,adds)( *one, *chi1 ); \ */ \ \ /* Scale the sub-diagonal elements by 1/max(m,n). */ \ /* PASTEMAC(ch,scalv) \ ( \ BLIS_NO_CONJUGATE, \ n_elem - 1, \ &beta, \ x2, incx, \ cntx \ ); \ */ \ } \ } \ } \ } INSERT_GENTFUNC_BASIC( randm_unb_var1, randv ) INSERT_GENTFUNC_BASIC( randnm_unb_var1, randnv ) #undef GENTFUNCR #define GENTFUNCR( ctype, ctype_r, ch, chr, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ dim_t n, \ ctype* x, inc_t incx, \ ctype_r* scale, \ ctype_r* sumsq, \ cntx_t* cntx, \ rntm_t* rntm \ ) \ { \ const ctype_r zero_r = *PASTEMAC(chr,0); \ const ctype_r one_r = *PASTEMAC(chr,1); \ \ ctype* chi1; \ ctype_r chi1_r; \ ctype_r chi1_i; \ ctype_r scale_r; \ ctype_r sumsq_r; \ ctype_r abs_chi1_r; \ dim_t i; \ \ /* NOTE: This function attempts to mimic the algorithm for computing the Frobenius norm in netlib LAPACK's ?lassq(). */ \ \ /* Copy scale and sumsq to local variables. */ \ PASTEMAC(chr,copys)( *scale, scale_r ); \ PASTEMAC(chr,copys)( *sumsq, sumsq_r ); \ \ chi1 = x; \ \ for ( i = 0; i < n; ++i ) \ { \ /* Get the real and imaginary components of chi1. */ \ PASTEMAC2(ch,chr,gets)( *chi1, chi1_r, chi1_i ); \ \ abs_chi1_r = bli_fabs( chi1_r ); \ \ /* Accumulate real component into sumsq, adjusting scale if needed. */ \ if ( abs_chi1_r > zero_r || bli_isnan( abs_chi1_r) ) \ { \ if ( scale_r < abs_chi1_r ) \ { \ sumsq_r = one_r + \ sumsq_r * ( scale_r / abs_chi1_r ) * \ ( scale_r / abs_chi1_r ); \ \ PASTEMAC(chr,copys)( abs_chi1_r, scale_r ); \ } \ else \ { \ sumsq_r = sumsq_r + ( abs_chi1_r / scale_r ) * \ ( abs_chi1_r / scale_r ); \ } \ } \ \ abs_chi1_r = bli_fabs( chi1_i ); \ \ /* Accumulate imaginary component into sumsq, adjusting scale if needed. */ \ if ( abs_chi1_r > zero_r || bli_isnan( abs_chi1_r) ) \ { \ if ( scale_r < abs_chi1_r ) \ { \ sumsq_r = one_r + \ sumsq_r * ( scale_r / abs_chi1_r ) * \ ( scale_r / abs_chi1_r ); \ \ PASTEMAC(chr,copys)( abs_chi1_r, scale_r ); \ } \ else \ { \ sumsq_r = sumsq_r + ( abs_chi1_r / scale_r ) * \ ( abs_chi1_r / scale_r ); \ } \ } \ \ chi1 += incx; \ } \ \ /* Store final values of scale and sumsq to output variables. */ \ PASTEMAC(chr,copys)( scale_r, *scale ); \ PASTEMAC(chr,copys)( sumsq_r, *sumsq ); \ } INSERT_GENTFUNCR_BASIC0( sumsqv_unb_var1 ) blis-0.6.1/frame/util/bli_util_unb_var1.h000066400000000000000000000114011360743507500202760ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype BLAS-like interfaces with typed operands. // #undef GENTPROTR #define GENTPROTR( ctype, ctype_r, ch, chr, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ dim_t n, \ ctype* x, inc_t incx, \ ctype_r* asum, \ cntx_t* cntx, \ rntm_t* rntm \ ); INSERT_GENTPROTR_BASIC0( asumv_unb_var1 ) #undef GENTPROT #define GENTPROT( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ uplo_t uploa, \ dim_t m, \ ctype* a, inc_t rs_a, inc_t cs_a, \ cntx_t* cntx, \ rntm_t* rntm \ ); INSERT_GENTPROT_BASIC0( mkherm_unb_var1 ) INSERT_GENTPROT_BASIC0( mksymm_unb_var1 ) INSERT_GENTPROT_BASIC0( mktrim_unb_var1 ) #undef GENTPROTR #define GENTPROTR( ctype, ctype_r, ch, chr, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ dim_t n, \ ctype* x, inc_t incx, \ ctype_r* norm, \ cntx_t* cntx, \ rntm_t* rntm \ ); INSERT_GENTPROTR_BASIC0( norm1v_unb_var1 ) INSERT_GENTPROTR_BASIC0( normfv_unb_var1 ) INSERT_GENTPROTR_BASIC0( normiv_unb_var1 ) #undef GENTPROTR #define GENTPROTR( ctype, ctype_r, ch, chr, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ doff_t diagoffx, \ diag_t diagx, \ uplo_t uplox, \ dim_t m, \ dim_t n, \ ctype* x, inc_t rs_x, inc_t cs_x, \ ctype_r* norm, \ cntx_t* cntx, \ rntm_t* rntm \ ); INSERT_GENTPROTR_BASIC0( norm1m_unb_var1 ) INSERT_GENTPROTR_BASIC0( normfm_unb_var1 ) INSERT_GENTPROTR_BASIC0( normim_unb_var1 ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(ch,opname) \ ( \ FILE* file, \ char* s1, \ dim_t n, \ ctype* x, inc_t incx, \ char* format, \ char* s2 \ ); INSERT_GENTPROT_BASIC0_I( fprintv ) #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ BLIS_EXPORT_BLIS void PASTEMAC(ch,opname) \ ( \ FILE* file, \ char* s1, \ dim_t m, \ dim_t n, \ ctype* x, inc_t rs_x, inc_t cs_x, \ char* format, \ char* s2 \ ); INSERT_GENTPROT_BASIC0_I( fprintm ) #undef GENTPROT #define GENTPROT( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ dim_t n, \ ctype* x, inc_t incx, \ cntx_t* cntx, \ rntm_t* rntm \ ); INSERT_GENTPROT_BASIC0( randv_unb_var1 ) INSERT_GENTPROT_BASIC0( randnv_unb_var1 ) #undef GENTPROT #define GENTPROT( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ doff_t diagoffx, \ uplo_t uplox, \ dim_t m, \ dim_t n, \ ctype* x, inc_t rs_x, inc_t cs_x, \ cntx_t* cntx, \ rntm_t* rntm \ ); INSERT_GENTPROT_BASIC0( randm_unb_var1 ) INSERT_GENTPROT_BASIC0( randnm_unb_var1 ) #undef GENTPROTR #define GENTPROTR( ctype, ctype_r, ch, chr, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ dim_t n, \ ctype* x, inc_t incx, \ ctype_r* scale, \ ctype_r* sumsq, \ cntx_t* cntx, \ rntm_t* rntm \ ); INSERT_GENTPROTR_BASIC0( sumsqv_unb_var1 ) blis-0.6.1/kernels/000077500000000000000000000000001360743507500141245ustar00rootroot00000000000000blis-0.6.1/kernels/armv7a/000077500000000000000000000000001360743507500153215ustar00rootroot00000000000000blis-0.6.1/kernels/armv7a/3/000077500000000000000000000000001360743507500154635ustar00rootroot00000000000000blis-0.6.1/kernels/armv7a/3/bli_cgemm_armv7a_asm_2x2.S000066400000000000000000000233231360743507500223400ustar00rootroot00000000000000 #define REALNAME bli_cgemm_armv7a_ker_2x2 #define STACKSIZE 256 #define K r0 #define PTR_ALPHA r1 #define OLD_A r2 #define OLD_B r3 #define PTR_BETA [fp, #0 ] #define OLD_C [fp, #4 ] #define OLD_RSC [fp, #8 ] #define OLD_CSC [fp, #12 ] #define AUX [fp, #16 ] /****************************************************** * [fp, #-128] - [fp, #-64] is reserved * for store and restore of floating point * register *******************************************************/ #define L r2 #define AO r5 #define BO r6 #define CO1 r7 #define CO2 r8 #define A_PRE 96 #define B_PRE 96 #define C_PRE 0 /************************************************************************************** * Macro definitions **************************************************************************************/ #define FMAC_BR fnmacs #define FMAC_BI fmacs #define NN 1 #if defined(NN) || defined(NT) || defined(TN) || defined(TT) #define FADD_R fsubs #define FADD_I fadds #define FMAC_R1 fnmacs #define FMAC_R2 fnmacs #define FMAC_I1 fmacs #define FMAC_I2 fnmacs #elif defined(CN) || defined(CT) #define FADD_R fadds #define FADD_I fsubs #define FMAC_R1 fmacs #define FMAC_R2 fmacs #define FMAC_I1 fnmacs #define FMAC_I2 fmacs #elif defined(NC) || defined(TC) #define FADD_R fadds #define FADD_I fsubs #define FMAC_R1 fmacs #define FMAC_R2 fnmacs #define FMAC_I1 fmacs #define FMAC_I2 fmacs #else #define FADD_R fsubs #define FADD_I fadds #define FMAC_R1 fnmacs #define FMAC_R2 fmacs #define FMAC_I1 fnmacs #define FMAC_I2 fnmacs #endif .macro INIT2x2 vsub.f32 s16 , s16 , s16 vmov.f32 s17, s16 vmov.f32 s18, s16 vmov.f32 s19, s16 vmov.f32 s20, s16 vmov.f32 s21, s16 vmov.f32 s22, s16 vmov.f32 s23, s16 vmov.f32 s24, s16 vmov.f32 s25, s16 vmov.f32 s26, s16 vmov.f32 s27, s16 vmov.f32 s28, s16 vmov.f32 s29, s16 vmov.f32 s30, s16 vmov.f32 s31, s16 .endm .macro KERNEL2x2_I pld [ AO , #A_PRE ] pld [ BO , #B_PRE ] flds s0 , [ AO ] flds s1 , [ AO, #4 ] flds s8 , [ BO ] flds s9 , [ BO, #4 ] fmuls s16 , s0, s8 flds s2 , [ AO, #8 ] fmuls s24 , s1, s9 flds s3 , [ AO, #12 ] fmuls s17 , s0, s9 flds s10, [ BO, #8 ] fmuls s25 , s1, s8 flds s11, [ BO, #12 ] fmuls s18 , s2, s8 add BO , BO, #16 fmuls s26 , s3, s9 add AO , AO, #16 fmuls s19 , s2, s9 pld [ BO , #B_PRE ] fmuls s27 , s3, s8 pld [ AO , #A_PRE ] fmuls s20 , s0, s10 flds s4 , [ AO, #0 ] fmuls s28 , s1, s11 flds s5 , [ AO, #4 ] fmuls s21 , s0, s11 flds s12, [ BO ] fmuls s29 , s1, s10 flds s13, [ BO, #4 ] fmuls s22 , s2, s10 flds s6 , [ AO, #8 ] fmuls s30 , s3, s11 flds s7 , [ AO, #12 ] fmuls s23 , s2, s11 flds s14, [ BO, #8 ] fmuls s31 , s3, s10 flds s15, [ BO, #12 ] add BO , BO, #16 add AO , AO, #16 .endm .macro KERNEL2x2_M1 pld [ AO , #A_PRE ] fmacs s16 , s0, s8 pld [ BO , #B_PRE ] fmacs s24 , s1, s9 flds s4 , [ AO, #0 ] fmacs s17 , s0, s9 flds s5 , [ AO, #4 ] fmacs s25 , s1, s8 flds s12, [ BO ] fmacs s18 , s2, s8 flds s13, [ BO, #4 ] fmacs s26 , s3, s9 flds s6 , [ AO, #8 ] fmacs s19 , s2, s9 flds s7 , [ AO, #12 ] fmacs s27 , s3, s8 fmacs s20 , s0, s10 flds s14, [ BO, #8 ] fmacs s28 , s1, s11 fmacs s21 , s0, s11 flds s15, [ BO, #12 ] fmacs s29 , s1, s10 fmacs s22 , s2, s10 add BO , BO, #16 fmacs s30 , s3, s11 fmacs s23 , s2, s11 add AO , AO, #16 fmacs s31 , s3, s10 .endm .macro KERNEL2x2_M2 fmacs s16 , s4, s12 fmacs s24 , s5, s13 flds s0 , [ AO, #0 ] fmacs s17 , s4, s13 flds s1 , [ AO, #4 ] fmacs s25 , s5, s12 fmacs s18 , s6, s12 flds s8 , [ BO ] fmacs s26 , s7, s13 flds s9 , [ BO, #4 ] fmacs s19 , s6, s13 fmacs s27 , s7, s12 flds s2 , [ AO, #8 ] fmacs s20 , s4, s14 flds s3 , [ AO, #12 ] fmacs s28 , s5, s15 fmacs s21 , s4, s15 flds s10, [ BO, #8 ] fmacs s29 , s5, s14 flds s11, [ BO, #12 ] fmacs s22 , s6, s14 fmacs s30 , s7, s15 add BO , BO, #16 fmacs s23 , s6, s15 add AO , AO, #16 fmacs s31 , s7, s14 .endm .macro KERNEL2x2_E fmacs s16 , s4, s12 fmacs s24 , s5, s13 fmacs s17 , s4, s13 fmacs s25 , s5, s12 fmacs s18 , s6, s12 fmacs s26 , s7, s13 fmacs s19 , s6, s13 fmacs s27 , s7, s12 fmacs s20 , s4, s14 fmacs s28 , s5, s15 fmacs s21 , s4, s15 fmacs s29 , s5, s14 fmacs s22 , s6, s14 fmacs s30 , s7, s15 fmacs s23 , s6, s15 fmacs s31 , s7, s14 .endm .macro KERNEL2x2_SUB flds s0 , [ AO ] flds s1 , [ AO, #4 ] flds s8 , [ BO ] flds s9 , [ BO, #4 ] fmacs s16 , s0, s8 flds s2 , [ AO, #8 ] fmacs s24 , s1, s9 flds s3 , [ AO, #12 ] fmacs s17 , s0, s9 flds s10, [ BO, #8 ] fmacs s25 , s1, s8 flds s11, [ BO, #12 ] fmacs s18 , s2, s8 fmacs s26 , s3, s9 fmacs s19 , s2, s9 fmacs s27 , s3, s8 fmacs s20 , s0, s10 fmacs s28 , s1, s11 fmacs s21 , s0, s11 fmacs s29 , s1, s10 fmacs s22 , s2, s10 add BO , BO, #16 fmacs s30 , s3, s11 fmacs s23 , s2, s11 add AO , AO, #16 fmacs s31 , s3, s10 .endm .macro SAVE2x2 ldr r3, OLD_RSC // Row stride size lsl r3, r3, #3 // multiply with size of complex float flds s0, [ PTR_ALPHA ] // load real part of alpha flds s1, [ PTR_ALPHA, #4 ] // load imag part of alpha ldr r4, PTR_BETA flds s2, [ r4 ] // load real part of beta flds s3, [ r4, #4 ] // load imag part of beta // Add/Sub the real and the imag parts FADD_R s16, s24 , s16 FADD_I s17, s25 , s17 FADD_R s18, s26 , s18 FADD_I s19, s27 , s19 FADD_R s20, s28 , s20 FADD_I s21, s29 , s21 FADD_R s22, s30 , s22 FADD_I s23, s31 , s23 mov r4, CO1 // save pointer fldmias CO1, { s4 - s5 } // read real and imag part from C add CO1, CO1, r3 mov r2, CO2 // save pointer fldmias CO2, { s8 - s9 } // read real and imag part from C add CO2, CO2, r3 fmuls s24, s4, s2 // multiply Beta-real with C-real fmuls s25, s5, s2 // multiply Beta-real with C-imag fmuls s28, s8, s2 // multiply Beta-real with C-real fmuls s29, s9, s2 // multiply Beta-real with C-imag FMAC_BR s24, s3, s5 // multiply beta-imag with C-imag and add FMAC_BI s25, s3, s4 // multiply beta-imag with C-real and add FMAC_BR s28, s3, s9 // multiply beta-imag with C-imag and add FMAC_BI s29, s3, s8 // multiply beta-imag with C-real and add FMAC_R1 s24 , s0 , s16 FMAC_I1 s25 , s0 , s17 FMAC_R2 s24 , s1 , s17 FMAC_I2 s25 , s1 , s16 FMAC_R1 s28 , s0 , s20 FMAC_I1 s29 , s0 , s21 FMAC_R2 s28 , s1 , s21 FMAC_I2 s29 , s1 , s20 fldmias CO1, { s4 - s5 } // read real and imag part from C fldmias CO2, { s8 - s9 } // read real and imag part from C fmuls s26, s4, s2 // multiply Beta-real with C-real fmuls s27, s5, s2 // multiply Beta-real with C-imag fmuls s30, s8, s2 // multiply Beta-real with C-real fmuls s31, s9, s2 // multiply Beta-real with C-imag FMAC_BR s26, s3, s5 // multiply beta-imag with C-imag and add FMAC_BI s27, s3, s4 // multiply beta-imag with C-real and add FMAC_BR s30, s3, s9 // multiply beta-imag with C-imag and add FMAC_BI s31, s3, s8 // multiply beta-imag with C-real and add FMAC_R1 s26 , s0 , s18 FMAC_I1 s27 , s0 , s19 FMAC_R2 s26 , s1 , s19 FMAC_I2 s27 , s1 , s18 FMAC_R1 s30, s0 , s22 FMAC_I1 s31, s0 , s23 FMAC_R2 s30, s1 , s23 FMAC_I2 s31, s1 , s22 mov CO1, r4 // restore pointer mov CO2, r2 // restore pointer fstmias CO1, { s24 - s25 } fstmias CO2, { s28 - s29 } add CO1, CO1, r3 add CO2, CO2, r3 fstmias CO1, { s26 - s27 } fstmias CO2, { s30 - s31 } .endm /************************************************************************************** * End of macro definitions **************************************************************************************/ .arm .global REALNAME .func REALNAME REALNAME: push {r4 - r9, fp} // save register add fp, sp, #28 // add number of saved register multiplied by size of int sub sp, sp, #STACKSIZE // reserve stack mov AO, OLD_A // pointer matrix A mov BO, OLD_B // pointer matrix B sub r3, fp, #128 vstm r3, { s8 - s31} // store floating point registers ldr r2, OLD_C // pointer matrix C ldr r3, OLD_CSC // Col stride size of C lsl r3, r3, #3 // multiply with size of complex float mov CO1, r2 // first line of C add CO2, CO1, r3 // second line of C pld [ CO1, #C_PRE ] // prefetch the lines of C pld [ CO2, #C_PRE ] // prefetch the lines of C cgemm_kernel_L2_M2_20: asrs L , K, #3 // L = K / 8 cmp L , #2 blt cgemm_kernel_L2_M2_32 KERNEL2x2_I KERNEL2x2_M2 KERNEL2x2_M1 KERNEL2x2_M2 KERNEL2x2_M1 KERNEL2x2_M2 KERNEL2x2_M1 KERNEL2x2_M2 subs L, L, #2 ble cgemm_kernel_L2_M2_22a .align 5 cgemm_kernel_L2_M2_22: KERNEL2x2_M1 KERNEL2x2_M2 KERNEL2x2_M1 KERNEL2x2_M2 KERNEL2x2_M1 KERNEL2x2_M2 KERNEL2x2_M1 KERNEL2x2_M2 subs L, L, #1 bgt cgemm_kernel_L2_M2_22 cgemm_kernel_L2_M2_22a: KERNEL2x2_M1 KERNEL2x2_M2 KERNEL2x2_M1 KERNEL2x2_M2 KERNEL2x2_M1 KERNEL2x2_M2 KERNEL2x2_M1 KERNEL2x2_E b cgemm_kernel_L2_M2_44 cgemm_kernel_L2_M2_32: tst L, #1 ble cgemm_kernel_L2_M2_40 KERNEL2x2_I KERNEL2x2_M2 KERNEL2x2_M1 KERNEL2x2_M2 KERNEL2x2_M1 KERNEL2x2_M2 KERNEL2x2_M1 KERNEL2x2_E b cgemm_kernel_L2_M2_44 cgemm_kernel_L2_M2_40: INIT2x2 cgemm_kernel_L2_M2_44: ands L , K, #7 // L = K % 8 ble cgemm_kernel_L2_M2_100 cgemm_kernel_L2_M2_46: KERNEL2x2_SUB subs L, L, #1 bne cgemm_kernel_L2_M2_46 cgemm_kernel_L2_M2_100: SAVE2x2 cgemm_kernel_L999: sub r3, fp, #128 vldm r3, { s8 - s31} // restore floating point registers sub sp, fp, #28 pop {r4 - r9, fp} bx lr blis-0.6.1/kernels/armv7a/3/bli_dgemm_armv7a_asm_4x4.S000066400000000000000000000270251360743507500223500ustar00rootroot00000000000000 #define REALNAME bli_dgemm_armv7a_ker_4x4 #define STACKSIZE 256 #define K r0 #define PTR_ALPHA r1 #define OLD_A r2 #define OLD_B r3 #define PTR_BETA [fp, #0 ] #define OLD_C [fp, #4 ] #define OLD_RSC [fp, #8 ] #define OLD_CSC [fp, #12 ] #define AUX [fp, #16 ] /****************************************************** * [fp, #-128] - [fp, #-64] is reserved * for store and restore of floating point * register *******************************************************/ #define L r2 #define AO r5 #define BO r6 #define CO1 r7 #define CO2 r8 #define CO3 r9 #define CO4 r12 #define A_PRE 96 #define B_PRE 96 #define C_PRE 0 /************************************************************************************** * Macro definitions **************************************************************************************/ .macro INIT4x4 vsub.f64 d16 , d16 , d16 vmov.f64 d17, d16 vmov.f64 d18, d16 vmov.f64 d19, d16 vmov.f64 d20, d16 vmov.f64 d21, d16 vmov.f64 d22, d16 vmov.f64 d23, d16 vmov.f64 d24, d16 vmov.f64 d25, d16 vmov.f64 d26, d16 vmov.f64 d27, d16 vmov.f64 d28, d16 vmov.f64 d29, d16 vmov.f64 d30, d16 vmov.f64 d31, d16 .endm .macro KERNEL4x4_I pld [ BO , #B_PRE ] fldd d8 , [ BO ] fldd d0 , [ AO ] pld [ AO , #A_PRE ] fldd d1 , [ AO, #8 ] fmuld d16 , d0, d8 fldd d2 , [ AO, #16 ] fmuld d17 , d1, d8 fldd d3 , [ AO, #24 ] fmuld d18 , d2, d8 fldd d9 , [ BO, #8 ] fmuld d19 , d3, d8 fldd d10, [ BO, #16 ] fmuld d20 , d0, d9 fldd d11, [ BO, #24 ] fmuld d21 , d1, d9 add BO , BO, #32 add AO , AO, #32 fmuld d22 , d2, d9 pld [ BO , #B_PRE ] fldd d12, [ BO ] fmuld d23 , d3, d9 pld [ AO , #A_PRE ] fldd d4 , [ AO, #0 ] fmuld d24 , d0, d10 fldd d5 , [ AO, #8 ] fmuld d25 , d1, d10 fldd d6 , [ AO, #16 ] fmuld d26 , d2, d10 fldd d7 , [ AO, #24 ] fmuld d27 , d3, d10 fldd d13, [ BO, #8 ] fmuld d28 , d0, d11 fldd d14, [ BO, #16 ] fmuld d29 , d1, d11 fldd d15, [ BO, #24 ] fmuld d30 , d2, d11 fmuld d31 , d3, d11 .endm .macro KERNEL4x4_M2 fmacd d16 , d4, d12 pld [ AO , #A_PRE+32 ] fmacd d17 , d5, d12 fldd d0 , [ AO , #32 ] fmacd d18 , d6, d12 pld [ BO , #B_PRE+32 ] fmacd d19 , d7, d12 fldd d8 , [ BO , #32 ] fmacd d20 , d4, d13 fldd d1 , [ AO, #40 ] fmacd d21 , d5, d13 fldd d2 , [ AO, #48 ] fmacd d22 , d6, d13 fldd d3 , [ AO, #56 ] fmacd d23 , d7, d13 fmacd d24 , d4, d14 fmacd d25 , d5, d14 fldd d9 , [ BO, #40 ] fmacd d26 , d6, d14 fldd d10, [ BO, #48 ] fmacd d27 , d7, d14 fldd d11, [ BO, #56 ] fmacd d28 , d4, d15 fmacd d29 , d5, d15 add AO , AO, #64 fmacd d30 , d6, d15 add BO , BO, #64 fmacd d31 , d7, d15 .endm .macro KERNEL4x4_M1 fmacd d16 , d0, d8 pld [ AO , #A_PRE ] fmacd d17 , d1, d8 fldd d4 , [ AO ] fmacd d18 , d2, d8 pld [ BO , #B_PRE ] fmacd d19 , d3, d8 fldd d12, [ BO ] fmacd d20 , d0, d9 fldd d5 , [ AO, #8 ] fmacd d21 , d1, d9 fldd d6 , [ AO, #16 ] fmacd d22 , d2, d9 fldd d7 , [ AO, #24 ] fmacd d23 , d3, d9 fmacd d24 , d0, d10 fmacd d25 , d1, d10 fldd d13, [ BO, #8 ] fmacd d26 , d2, d10 fldd d14, [ BO, #16 ] fmacd d27 , d3, d10 fldd d15, [ BO, #24 ] fmacd d28 , d0, d11 fmacd d29 , d1, d11 fmacd d30 , d2, d11 fmacd d31 , d3, d11 .endm .macro KERNEL4x4_E fmacd d16 , d4, d12 fmacd d17 , d5, d12 add BO , BO, #32 fmacd d18 , d6, d12 add AO , AO, #32 fmacd d19 , d7, d12 fmacd d20 , d4, d13 fmacd d21 , d5, d13 fmacd d22 , d6, d13 fmacd d23 , d7, d13 fmacd d24 , d4, d14 fmacd d25 , d5, d14 fmacd d26 , d6, d14 fmacd d27 , d7, d14 fmacd d28 , d4, d15 fmacd d29 , d5, d15 fmacd d30 , d6, d15 fmacd d31 , d7, d15 .endm .macro KERNEL4x4_SUB fldd d8 , [ BO ] pld [ BO , #B_PRE ] fldd d0 , [ AO ] pld [ AO , #A_PRE ] fldd d1 , [ AO, #8 ] fmacd d16 , d0, d8 fldd d2 , [ AO, #16 ] fmacd d17 , d1, d8 fldd d3 , [ AO, #24 ] fmacd d18 , d2, d8 fldd d9 , [ BO, #8 ] fmacd d19 , d3, d8 fldd d10, [ BO, #16 ] fmacd d20 , d0, d9 fldd d11, [ BO, #24 ] fmacd d21 , d1, d9 fmacd d22 , d2, d9 fmacd d23 , d3, d9 fmacd d24 , d0, d10 fmacd d25 , d1, d10 fmacd d26 , d2, d10 fmacd d27 , d3, d10 fmacd d28 , d0, d11 fmacd d29 , d1, d11 add AO , AO, #32 fmacd d30 , d2, d11 add BO , BO, #32 fmacd d31 , d3, d11 .endm .macro SAVE4x4 ldr r3, OLD_RSC // Row stride size lsl r3, r3, #3 // multiply with size of double fldd d0, [ PTR_ALPHA ] // load alpha ldr r4, PTR_BETA fldd d1, [ r4 ] // load beta //----------------------------------------------------------- mov r2, CO1 // save pointer mov r4, CO2 // save pointer fldd d8, [ CO1 ] // load value from C fldd d12, [ CO2 ] // load value from C fmuld d8, d8, d1 // multiply with beta add CO1, CO1, r3 // compute next pointer fmacd d8, d0, d16 // multiply sum with alpha and add to value of C add CO2, CO2, r3 // compute next pointer fldd d9, [ CO1 ] // load value from C fldd d13, [ CO2 ] // load value from C fmuld d9, d9, d1 // multiply with beta add CO1, CO1, r3 // compute next pointer fmacd d9, d0, d17 // multiply sum with alpha and add to value of C add CO2, CO2, r3 // compute next pointer fldd d10, [ CO1 ] // load value from C fldd d14, [ CO2 ] // load value from C fmuld d10, d10, d1 // multiply with beta add CO1, CO1, r3 // compute next pointer fmacd d10, d0, d18 // multiply sum with alpha and add to value of C add CO2, CO2, r3 // compute next pointer fldd d11, [ CO1 ] // load value from C fldd d15, [ CO2 ] // load value from C fmuld d11, d11, d1 // multiply with beta mov CO1, r2 // restore pointer fmacd d11, d0, d19 // multiply sum with alpha and add to value of C mov CO2, r4 // restore pointer fstd d8, [ CO1 ] // store value in C add CO1 , CO1, r3 // compute next pointer fstd d9, [ CO1 ] // store value in C add CO1 , CO1, r3 // compute next pointer fstd d10, [ CO1 ] // store value in C add CO1 , CO1, r3 // compute next pointer fstd d11, [ CO1 ] // store value in C //----------------------------------------------------------- mov r2, CO3 // save pointer fldd d8, [ CO3 ] // load value from C fmuld d12, d12, d1 // multiply with beta add CO3, CO3, r3 // compute next pointer fmacd d12, d0, d20 // multiply sum with alpha and add to value of C fldd d9, [ CO3 ] // load value from C fmuld d13, d13, d1 // multiply with beta add CO3, CO3, r3 // compute next pointer fmacd d13, d0, d21 // multiply sum with alpha and add to value of C fldd d10, [ CO3 ] // load value from C fmuld d14, d14, d1 // multiply with beta add CO3, CO3, r3 // compute next pointer fmacd d14, d0, d22 // multiply sum with alpha and add to value of C fldd d11, [ CO3 ] // load value from C fmuld d15, d15, d1 // multiply with beta mov CO3, r2 // restore pointer fmacd d15, d0, d23 // multiply sum with alpha and add to value of C fstd d12, [ CO2 ] // store value in C add CO2 , CO2, r3 // compute next pointer fstd d13, [ CO2 ] // store value in C add CO2 , CO2, r3 // compute next pointer fstd d14, [ CO2 ] // store value in C add CO2 , CO2, r3 // compute next pointer fstd d15, [ CO2 ] // store value in C //----------------------------------------------------------- mov r4, CO4 // save pointer fldd d12, [ CO4 ] // load value from C fmuld d8, d8, d1 // multiply with beta add CO4, CO4, r3 // compute next pointer fmacd d8, d0, d24 // multiply sum with alpha and add to value of C fldd d13, [ CO4 ] // load value from C fmuld d9, d9, d1 // multiply with beta add CO4, CO4, r3 // compute next pointer fmacd d9, d0, d25 // multiply sum with alpha and add to value of C fldd d14, [ CO4 ] // load value from C fmuld d10, d10, d1 // multiply with beta add CO4, CO4, r3 // compute next pointer fmacd d10, d0, d26 // multiply sum with alpha and add to value of C fldd d15, [ CO4 ] // load value from C fmuld d11, d11, d1 // multiply with beta mov CO4, r4 // restore pointer fmacd d11, d0, d27 // multiply sum with alpha and add to value of C //----------------------------------------------------------- fstd d8, [ CO3 ] // store value in C fmuld d12, d12, d1 // multiply with beta add CO3 , CO3, r3 // compute next pointer fmacd d12, d0, d28 // multiply sum with alpha and add to value of C fstd d9, [ CO3 ] // store value in C fmuld d13, d13, d1 // multiply with beta add CO3 , CO3, r3 // compute next pointer fmacd d13, d0, d29 // multiply sum with alpha and add to value of C fstd d10, [ CO3 ] // store value in C fmuld d14, d14, d1 // multiply with beta add CO3 , CO3, r3 // compute next pointer fmacd d14, d0, d30 // multiply sum with alpha and add to value of C fstd d11, [ CO3 ] // store value in C fmuld d15, d15, d1 // multiply with beta fstd d12, [ CO4 ] // store value in C fmacd d15, d0, d31 // multiply sum with alpha and add to value of C add CO4 , CO4, r3 // compute next pointer fstd d13, [ CO4 ] // store value in C add CO4 , CO4, r3 // compute next pointer fstd d14, [ CO4 ] // store value in C add CO4 , CO4, r3 // compute next pointer fstd d15, [ CO4 ] // store value in C .endm /************************************************************************************** * End of macro definitions **************************************************************************************/ .arm .global REALNAME .func REALNAME REALNAME: push {r4 - r9, fp} // save register add fp, sp, #28 // add number of saved register multiplied by size of int sub sp, sp, #STACKSIZE // reserve stack mov AO, OLD_A // pointer matrix A mov BO, OLD_B // pointer matrix B sub r3, fp, #128 vstm r3, { d8 - d15} // store floating point registers ldr r2, OLD_C // pointer matrix C ldr r3, OLD_CSC // Col stride size of C lsl r3, r3, #3 // multiply with size of double mov CO1, r2 // first line of C add CO2, CO1, r3 // second line of C add CO3, CO2, r3 // third line of C add CO4, CO3, r3 // fourth line of C pld [ CO1, #C_PRE ] // prefetch the lines of C pld [ CO2, #C_PRE ] // prefetch the lines of C pld [ CO3, #C_PRE ] // prefetch the lines of C pld [ CO3, #C_PRE ] // prefetch the lines of C dgemm_kernel_L4_M4_20: asrs L , K, #3 // L = K / 8 cmp L , #2 blt dgemm_kernel_L4_M4_32 KERNEL4x4_I KERNEL4x4_M2 KERNEL4x4_M1 KERNEL4x4_M2 KERNEL4x4_M1 KERNEL4x4_M2 KERNEL4x4_M1 KERNEL4x4_M2 subs L, L, #2 ble dgemm_kernel_L4_M4_22a .align 5 dgemm_kernel_L4_M4_22: KERNEL4x4_M1 KERNEL4x4_M2 KERNEL4x4_M1 KERNEL4x4_M2 KERNEL4x4_M1 KERNEL4x4_M2 KERNEL4x4_M1 KERNEL4x4_M2 subs L, L, #1 bgt dgemm_kernel_L4_M4_22 dgemm_kernel_L4_M4_22a: KERNEL4x4_M1 KERNEL4x4_M2 KERNEL4x4_M1 KERNEL4x4_M2 KERNEL4x4_M1 KERNEL4x4_M2 KERNEL4x4_M1 KERNEL4x4_E b dgemm_kernel_L4_M4_44 dgemm_kernel_L4_M4_32: tst L, #1 ble dgemm_kernel_L4_M4_40 KERNEL4x4_I KERNEL4x4_M2 KERNEL4x4_M1 KERNEL4x4_M2 KERNEL4x4_M1 KERNEL4x4_M2 KERNEL4x4_M1 KERNEL4x4_E b dgemm_kernel_L4_M4_44 dgemm_kernel_L4_M4_40: INIT4x4 dgemm_kernel_L4_M4_44: ands L , K, #7 // L = K % 8 ble dgemm_kernel_L4_M4_100 dgemm_kernel_L4_M4_46: KERNEL4x4_SUB subs L, L, #1 bne dgemm_kernel_L4_M4_46 dgemm_kernel_L4_M4_100: SAVE4x4 dgemm_kernel_L999: sub r3, fp, #128 vldm r3, { d8 - d15} // restore floating point registers sub sp, fp, #28 pop {r4 - r9, fp} bx lr blis-0.6.1/kernels/armv7a/3/bli_gemm_armv7a_asm_d4x4.c000066400000000000000000000122521360743507500223640ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" extern void bli_sgemm_armv7a_ker_4x4 ( uint32_t k, float* restrict alpha, float* restrict a, float* restrict b, float* restrict beta, float* restrict c, uint32_t rs_c, uint32_t cs_c, auxinfo_t* restrict data ); void bli_sgemm_armv7a_asm_4x4 ( dim_t k0, float* restrict alpha, float* restrict a, float* restrict b, float* restrict beta, float* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint32_t k = k0; uint32_t rs_c = rs_c0; uint32_t cs_c = cs_c0; bli_sgemm_armv7a_ker_4x4( k, alpha, a, b, beta, c, rs_c, cs_c, data ); } extern void bli_dgemm_armv7a_ker_4x4 ( uint32_t k, double* restrict alpha, double* restrict a, double* restrict b, double* restrict beta, double* restrict c, uint32_t rs_c, uint32_t cs_c, auxinfo_t* restrict data ); void bli_dgemm_armv7a_asm_4x4 ( dim_t k0, double* restrict alpha, double* restrict a, double* restrict b, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint32_t k = k0; uint32_t rs_c = rs_c0; uint32_t cs_c = cs_c0; bli_dgemm_armv7a_ker_4x4( k, alpha, a, b, beta, c, rs_c, cs_c, data ); } extern void bli_cgemm_armv7a_ker_2x2 ( uint32_t k, scomplex* restrict alpha, scomplex* restrict a, scomplex* restrict b, scomplex* restrict beta, scomplex* restrict c, uint32_t rs_c, uint32_t cs_c, auxinfo_t* restrict data ); void bli_cgemm_armv7a_asm_2x2 ( dim_t k0, scomplex* restrict alpha, scomplex* restrict a, scomplex* restrict b, scomplex* restrict beta, scomplex* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint32_t k = k0; uint32_t rs_c = rs_c0; uint32_t cs_c = cs_c0; bli_cgemm_armv7a_ker_2x2( k, alpha, a, b, beta, c, rs_c, cs_c, data ); } extern void bli_zgemm_armv7a_ker_2x2 ( uint32_t k, dcomplex* restrict alpha, dcomplex* restrict a, dcomplex* restrict b, dcomplex* restrict beta, dcomplex* restrict c, uint32_t rs_c, uint32_t cs_c, auxinfo_t* restrict data ); void bli_zgemm_armv7a_asm_2x2 ( dim_t k0, dcomplex* restrict alpha, dcomplex* restrict a, dcomplex* restrict b, dcomplex* restrict beta, dcomplex* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint32_t k = k0; uint32_t rs_c = rs_c0; uint32_t cs_c = cs_c0; bli_zgemm_armv7a_ker_2x2( k, alpha, a, b, beta, c, rs_c, cs_c, data ); } blis-0.6.1/kernels/armv7a/3/bli_gemm_armv7a_int_d4x4.c000066400000000000000000000314411360743507500223770ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "arm_neon.h" void bli_sgemm_armv7a_int_4x4 ( dim_t k0, float* restrict alpha, float* restrict a, float* restrict b, float* restrict beta, float* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint32_t k_iter = k0 / 4; uint32_t k_left = k0 % 4; uint32_t rs_c = rs_c0; uint32_t cs_c = cs_c0; uint32_t i; void* a_next = bli_auxinfo_next_a( data ); void* b_next = bli_auxinfo_next_b( data ); float32x4_t alphav; alphav = vmovq_n_f32( *alpha ); float32x4_t av1; float32x4_t av2; float32x4_t av3; float32x4_t av4; float32x4_t bv1; float32x4_t bv2; float32x4_t bv3; float32x4_t bv4; // Vector for column 0 float32x4_t cv0; // Vector for column 1 float32x4_t cv1; // Vector for column 2 float32x4_t cv2; // Vector for column 3 float32x4_t cv3; if( rs_c == 1 ) { // Load column 0 cv0 = vld1q_f32( c + 0*rs_c + 0*cs_c ); // Load column 1 cv1 = vld1q_f32( c + 0*rs_c + 1*cs_c ); // Load column 2 cv2 = vld1q_f32( c + 0*rs_c + 2*cs_c ); // Load column 3 cv3 = vld1q_f32( c + 0*rs_c + 3*cs_c ); } else { // Load column 0 cv0 = vld1q_lane_f32( c + 0*rs_c + 0*cs_c, cv0, 0); cv0 = vld1q_lane_f32( c + 1*rs_c + 0*cs_c, cv0, 1); cv0 = vld1q_lane_f32( c + 2*rs_c + 0*cs_c, cv0, 2); cv0 = vld1q_lane_f32( c + 3*rs_c + 0*cs_c, cv0, 3); // Load column 1 cv1 = vld1q_lane_f32( c + 0*rs_c + 1*cs_c, cv1, 0); cv1 = vld1q_lane_f32( c + 1*rs_c + 1*cs_c, cv1, 1); cv1 = vld1q_lane_f32( c + 2*rs_c + 1*cs_c, cv1, 2); cv1 = vld1q_lane_f32( c + 3*rs_c + 1*cs_c, cv1, 3); // Load column 2 cv2 = vld1q_lane_f32( c + 0*rs_c + 2*cs_c, cv2, 0); cv2 = vld1q_lane_f32( c + 1*rs_c + 2*cs_c, cv2, 1); cv2 = vld1q_lane_f32( c + 2*rs_c + 2*cs_c, cv2, 2); cv2 = vld1q_lane_f32( c + 3*rs_c + 2*cs_c, cv2, 3); // Load column 3 cv3 = vld1q_lane_f32( c + 0*rs_c + 3*cs_c, cv3, 0); cv3 = vld1q_lane_f32( c + 1*rs_c + 3*cs_c, cv3, 1); cv3 = vld1q_lane_f32( c + 2*rs_c + 3*cs_c, cv3, 2); cv3 = vld1q_lane_f32( c + 3*rs_c + 3*cs_c, cv3, 3); } // Vector for accummulating column 0 float32x4_t abv0; // Initialize vector to 0.0 abv0 = vmovq_n_f32( 0.0 ); // Vector for accummulating column 1 float32x4_t abv1; // Initialize vector to 0.0 abv1 = vmovq_n_f32( 0.0 ); // Vector for accummulating column 2 float32x4_t abv2; // Initialize vector to 0.0 abv2 = vmovq_n_f32( 0.0 ); // Vector for accummulating column 3 float32x4_t abv3; // Initialize vector to 0.0 abv3 = vmovq_n_f32( 0.0 ); for ( i = 0; i < k_iter; ++i ) { // Begin iter 0 av1 = vld1q_f32( a ); __builtin_prefetch( a + 224 ); __builtin_prefetch( b + 224 ); bv1 = vld1q_f32( b ); abv0 = vmlaq_lane_f32( abv0, av1, vget_low_f32(bv1), 0 ); abv1 = vmlaq_lane_f32( abv1, av1, vget_low_f32(bv1), 1 ); abv2 = vmlaq_lane_f32( abv2, av1, vget_high_f32(bv1), 0 ); abv3 = vmlaq_lane_f32( abv3, av1, vget_high_f32(bv1), 1 ); av2 = vld1q_f32( a+4 ); //__builtin_prefetch( a + 116 ); //__builtin_prefetch( b + 116 ); bv2 = vld1q_f32( b+4 ); abv0 = vmlaq_lane_f32( abv0, av2, vget_low_f32(bv2), 0 ); abv1 = vmlaq_lane_f32( abv1, av2, vget_low_f32(bv2), 1 ); abv2 = vmlaq_lane_f32( abv2, av2, vget_high_f32(bv2), 0 ); abv3 = vmlaq_lane_f32( abv3, av2, vget_high_f32(bv2), 1 ); av3 = vld1q_f32( a+8 ); //__builtin_prefetch( a + 120 ); //__builtin_prefetch( b + 120 ); bv3 = vld1q_f32( b+8 ); abv0 = vmlaq_lane_f32( abv0, av3, vget_low_f32(bv3), 0 ); abv1 = vmlaq_lane_f32( abv1, av3, vget_low_f32(bv3), 1 ); abv2 = vmlaq_lane_f32( abv2, av3, vget_high_f32(bv3), 0 ); abv3 = vmlaq_lane_f32( abv3, av3, vget_high_f32(bv3), 1 ); av4 = vld1q_f32( a+12); //__builtin_prefetch( a + 124 ); //__builtin_prefetch( b + 124 ); bv4 = vld1q_f32( b+12); abv0 = vmlaq_lane_f32( abv0, av4, vget_low_f32(bv4), 0 ); abv1 = vmlaq_lane_f32( abv1, av4, vget_low_f32(bv4), 1 ); abv2 = vmlaq_lane_f32( abv2, av4, vget_high_f32(bv4), 0 ); abv3 = vmlaq_lane_f32( abv3, av4, vget_high_f32(bv4), 1 ); a += 16; b += 16; } for ( i = 0; i < k_left; ++i ) { av1 = vld1q_f32( a ); __builtin_prefetch( a + 112 ); __builtin_prefetch( b + 112 ); bv1 = vld1q_f32( b ); abv0 = vmlaq_lane_f32( abv0, av1, vget_low_f32(bv1), 0 ); abv1 = vmlaq_lane_f32( abv1, av1, vget_low_f32(bv1), 1 ); abv2 = vmlaq_lane_f32( abv2, av1, vget_high_f32(bv1), 0 ); abv3 = vmlaq_lane_f32( abv3, av1, vget_high_f32(bv1), 1 ); a += 4; b += 4; } __builtin_prefetch( a_next ); __builtin_prefetch( b_next ); cv0 = vmulq_n_f32( cv0, *beta ); cv1 = vmulq_n_f32( cv1, *beta ); cv2 = vmulq_n_f32( cv2, *beta ); cv3 = vmulq_n_f32( cv3, *beta ); cv0 = vmlaq_f32( cv0, abv0, alphav ); cv1 = vmlaq_f32( cv1, abv1, alphav ); cv2 = vmlaq_f32( cv2, abv2, alphav ); cv3 = vmlaq_f32( cv3, abv3, alphav ); if( rs_c == 1 ) { // Store column 0 vst1q_f32( c + 0*rs_c + 0*cs_c, cv0 ); // Store column 1 vst1q_f32( c + 0*rs_c + 1*cs_c, cv1 ); // Store column 2 vst1q_f32( c + 0*rs_c + 2*cs_c, cv2 ); // Store column 3 vst1q_f32( c + 0*rs_c + 3*cs_c, cv3 ); } else{ // Store column 0 vst1q_lane_f32( c + 0*rs_c + 0*cs_c, cv0, 0); vst1q_lane_f32( c + 1*rs_c + 0*cs_c, cv0, 1); vst1q_lane_f32( c + 2*rs_c + 0*cs_c, cv0, 2); vst1q_lane_f32( c + 3*rs_c + 0*cs_c, cv0, 3); // Store column 1 vst1q_lane_f32( c + 0*rs_c + 1*cs_c, cv1, 0); vst1q_lane_f32( c + 1*rs_c + 1*cs_c, cv1, 1); vst1q_lane_f32( c + 2*rs_c + 1*cs_c, cv1, 2); vst1q_lane_f32( c + 3*rs_c + 1*cs_c, cv1, 3); // Store column 2 vst1q_lane_f32( c + 0*rs_c + 2*cs_c, cv2, 0); vst1q_lane_f32( c + 1*rs_c + 2*cs_c, cv2, 1); vst1q_lane_f32( c + 2*rs_c + 2*cs_c, cv2, 2); vst1q_lane_f32( c + 3*rs_c + 2*cs_c, cv2, 3); // Store column 3 vst1q_lane_f32( c + 0*rs_c + 3*cs_c, cv3, 0); vst1q_lane_f32( c + 1*rs_c + 3*cs_c, cv3, 1); vst1q_lane_f32( c + 2*rs_c + 3*cs_c, cv3, 2); vst1q_lane_f32( c + 3*rs_c + 3*cs_c, cv3, 3); } } void bli_dgemm_armv7a_int_4x4 ( dim_t k, double* restrict alpha, double* restrict a, double* restrict b, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. //uint32_t k_iter = k0 / 4; uint32_t k_left = k % 4; uint32_t rs_c = rs_c0; uint32_t cs_c = cs_c0; uint32_t i; //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); register double a0; register double a1; register double a2; register double a3; register double A0; register double A1; register double A2; register double A3; double b0, b1, b2, b3; double B0, B1, B2, B3; double ab00, ab01, ab02, ab03; double ab10, ab11, ab12, ab13; double ab20, ab21, ab22, ab23; double ab30, ab31, ab32, ab33; double* restrict c00, * restrict c01, * restrict c02, * restrict c03; double* restrict c10, * restrict c11, * restrict c12, * restrict c13; double* restrict c20, * restrict c21, * restrict c22, * restrict c23; double* restrict c30, * restrict c31, * restrict c32, * restrict c33; double* restrict ap = a; double* restrict bp = b; double* restrict Ap = a + 4; double* restrict Bp = b + 4; c00 = (c + 0*rs_c + 0*cs_c); c10 = (c + 1*rs_c + 0*cs_c); c20 = (c + 2*rs_c + 0*cs_c); c30 = (c + 3*rs_c + 0*cs_c); c01 = (c + 0*rs_c + 1*cs_c); c11 = (c + 1*rs_c + 1*cs_c); c21 = (c + 2*rs_c + 1*cs_c); c31 = (c + 3*rs_c + 1*cs_c); c02 = (c + 0*rs_c + 2*cs_c); c12 = (c + 1*rs_c + 2*cs_c); c22 = (c + 2*rs_c + 2*cs_c); c32 = (c + 3*rs_c + 2*cs_c); c03 = (c + 0*rs_c + 3*cs_c); c13 = (c + 1*rs_c + 3*cs_c); c23 = (c + 2*rs_c + 3*cs_c); c33 = (c + 3*rs_c + 3*cs_c); ab00 = 0.0; ab10 = 0.0; ab20 = 0.0; ab30 = 0.0; ab01 = 0.0; ab11 = 0.0; ab21 = 0.0; ab31 = 0.0; ab02 = 0.0; ab12 = 0.0; ab22 = 0.0; ab32 = 0.0; ab03 = 0.0; ab13 = 0.0; ab23 = 0.0; ab33 = 0.0; A0 = *(Ap + 0); A1 = *(Ap + 1); A2 = *(Ap + 2); A3 = *(Ap + 3); a0 = *(ap + 0); a1 = *(ap + 1); a2 = *(ap + 2); B0 = *(Bp + 0); B1 = *(Bp + 1); B2 = *(Bp + 2); B3 = *(Bp + 3); b0 = *(bp + 0); b1 = *(bp + 1); b2 = *(bp + 2); double *Aplast = (Ap + 4*(k-k_left)); //for ( i = 0; i < k_iter; ++i ) // Unroll by factor 4. for ( ; Ap != Aplast ; ) // Unroll by factor 4. { /* Prefetch */ //__asm__ ("pld\t[%0],#100\n\t" : :"r"(Ap) : ); __builtin_prefetch( ap + 112 ); __builtin_prefetch( Ap + 112 ); __builtin_prefetch( bp + 112 ); __builtin_prefetch( Bp + 112 ); // Iteration 0. ab00 += A0 * B0; a3 = *(ap + 3); ab10 += A1 * B0; b3 = *(bp + 3); ab20 += A2 * B0; ab30 += A3 * B0; ab01 += A0 * B1; ab11 += A1 * B1; B0 = *(Bp + 8); // Prefetch. ab21 += A2 * B1; ab31 += A3 * B1; ab02 += A0 * B2; B1 = *(Bp + 9); ab12 += A1 * B2; ab22 += A2 * B2; ab32 += A3 * B2; B2 = *(Bp + 10); ab03 += A0 * B3; A0 = *(Ap + 8); // Prefetch. ab13 += A1 * B3; A1 = *(Ap + 9); // Prefetch. ab23 += A2 * B3; ab33 += A3 * B3; A2 = *(Ap + 10); // Prefetch. // Iteration 1. //__asm__ ("pld\t[%0],#200\n\t" : :"r"(Ap) : ); ab00 += a0 * b0; ab10 += a1 * b0; A3 = *(Ap + 11); // Prefetch. ab20 += a2 * b0; ab30 += a3 * b0; B3 = *(Bp + 11); ab01 += a0 * b1; b0 = *(bp + 8); ab11 += a1 * b1; ab21 += a2 * b1; ab31 += a3 * b1; b1 = *(bp + 9); ab02 += a0 * b2; ab12 += a1 * b2; ab22 += a2 * b2; ab32 += a3 * b2; b2 = *(bp + 10); ab03 += a0 * b3; a0 = *(ap + 8); ab13 += a1 * b3; a1 = *(ap + 9); ab23 += a2 * b3; a2 = *(ap + 10); ab33 += a3 * b3; //a3 = *(ap + 11); ap += 8; Ap += 8; bp += 8; Bp += 8; } for ( i = 0; i < k_left; ++i ) { a0 = *(ap + 0); a1 = *(ap + 1); a2 = *(ap + 2); a3 = *(ap + 3); b0 = *(bp + 0); b1 = *(bp + 1); b2 = *(bp + 2); b3 = *(bp + 3); ab00 += a0 * b0; ab10 += a1 * b0; ab20 += a2 * b0; ab30 += a3 * b0; ab01 += a0 * b1; ab11 += a1 * b1; ab21 += a2 * b1; ab31 += a3 * b1; ab02 += a0 * b2; ab12 += a1 * b2; ab22 += a2 * b2; ab32 += a3 * b2; ab03 += a0 * b3; ab13 += a1 * b3; ab23 += a2 * b3; ab33 += a3 * b3; ap += 4; bp += 4; } *c00 = *c00 * *beta; *c10 = *c10 * *beta; *c20 = *c20 * *beta; *c30 = *c30 * *beta; *c01 = *c01 * *beta; *c11 = *c11 * *beta; *c21 = *c21 * *beta; *c31 = *c31 * *beta; *c02 = *c02 * *beta; *c12 = *c12 * *beta; *c22 = *c22 * *beta; *c32 = *c32 * *beta; *c03 = *c03 * *beta; *c13 = *c13 * *beta; *c23 = *c23 * *beta; *c33 = *c33 * *beta; *c00 += ab00 * *alpha; *c10 += ab10 * *alpha; *c20 += ab20 * *alpha; *c30 += ab30 * *alpha; *c01 += ab01 * *alpha; *c11 += ab11 * *alpha; *c21 += ab21 * *alpha; *c31 += ab31 * *alpha; *c02 += ab02 * *alpha; *c12 += ab12 * *alpha; *c22 += ab22 * *alpha; *c32 += ab32 * *alpha; *c03 += ab03 * *alpha; *c13 += ab13 * *alpha; *c23 += ab23 * *alpha; *c33 += ab33 * *alpha; } blis-0.6.1/kernels/armv7a/3/bli_sgemm_armv7a_asm_4x4.S000066400000000000000000000260351360743507500223670ustar00rootroot00000000000000 #define REALNAME bli_sgemm_armv7a_ker_4x4 #define STACKSIZE 256 #define K r0 #define PTR_ALPHA r1 #define OLD_A r2 #define OLD_B r3 #define PTR_BETA [fp, #0 ] #define OLD_C [fp, #4 ] #define OLD_RSC [fp, #8 ] #define OLD_CSC [fp, #12 ] #define AUX [fp, #16 ] /****************************************************** * [fp, #-128] - [fp, #-64] is reserved * for store and restore of floating point * register *******************************************************/ #define L r2 #define AO r5 #define BO r6 #define CO1 r7 #define CO2 r8 #define CO3 r9 #define CO4 r12 #define A_PRE 96 #define B_PRE 96 #define C_PRE 0 /************************************************************************************** * Macro definitions **************************************************************************************/ .macro INIT4x4 vsub.f32 s16 , s16 , s16 vmov.f32 s17, s16 vmov.f32 s18, s16 vmov.f32 s19, s16 vmov.f32 s20, s16 vmov.f32 s21, s16 vmov.f32 s22, s16 vmov.f32 s23, s16 vmov.f32 s24, s16 vmov.f32 s25, s16 vmov.f32 s26, s16 vmov.f32 s27, s16 vmov.f32 s28, s16 vmov.f32 s29, s16 vmov.f32 s30, s16 vmov.f32 s31, s16 .endm .macro KERNEL4x4_I pld [ AO , #A_PRE ] fldmias AO!, { s0 - s1 } pld [ BO , #B_PRE ] fldmias BO!, { s8 - s9 } fmuls s16 , s0, s8 fldmias AO!, { s2 - s3 } fmuls s17 , s1, s8 fmuls s18 , s2, s8 fldmias BO!, { s10 - s11 } fmuls s19 , s3, s8 fmuls s20 , s0, s9 fldmias AO!, { s4 - s5 } fmuls s21 , s1, s9 fmuls s22 , s2, s9 fldmias AO!, { s6 - s7 } fmuls s23 , s3, s9 fmuls s24 , s0, s10 fldmias BO!, { s12 - s13 } fmuls s25 , s1, s10 fmuls s26 , s2, s10 fldmias BO!, { s14 - s15 } fmuls s27 , s3, s10 fmuls s28 , s0, s11 fmuls s29 , s1, s11 fmuls s30 , s2, s11 fmuls s31 , s3, s11 .endm .macro KERNEL4x4_M2 pld [ AO , #A_PRE ] fmacs s16 , s4, s12 fmacs s17 , s5, s12 fldmias AO!, { s0 - s3 } fmacs s18 , s6, s12 pld [ BO , #B_PRE ] fmacs s19 , s7, s12 fmacs s20 , s4, s13 fldmias BO!, { s8 - s11 } fmacs s21 , s5, s13 fmacs s22 , s6, s13 //fldmias AO!, { s2 - s3 } fmacs s23 , s7, s13 fmacs s24 , s4, s14 //fldmias BO!, { s10 - s11 } fmacs s25 , s5, s14 fmacs s26 , s6, s14 fmacs s27 , s7, s14 fmacs s28 , s4, s15 fmacs s29 , s5, s15 fmacs s30 , s6, s15 fmacs s31 , s7, s15 .endm .macro KERNEL4x4_M1 fmacs s16 , s0, s8 fldmias AO!, { s4 - s7 } fmacs s17 , s1, s8 fmacs s18 , s2, s8 fldmias BO!, { s12 - s15 } //fldmias AO!, { s6 - s7 } fmacs s19 , s3, s8 fmacs s20 , s0, s9 fmacs s21 , s1, s9 fmacs s22 , s2, s9 //fldmias BO!, { s14 - s15 } fmacs s23 , s3, s9 fmacs s24 , s0, s10 fmacs s25 , s1, s10 fmacs s26 , s2, s10 fmacs s27 , s3, s10 fmacs s28 , s0, s11 fmacs s29 , s1, s11 fmacs s30 , s2, s11 fmacs s31 , s3, s11 .endm .macro KERNEL4x4_E fmacs s16 , s4, s12 fmacs s17 , s5, s12 fmacs s18 , s6, s12 fmacs s19 , s7, s12 fmacs s20 , s4, s13 fmacs s21 , s5, s13 fmacs s22 , s6, s13 fmacs s23 , s7, s13 fmacs s24 , s4, s14 fmacs s25 , s5, s14 fmacs s26 , s6, s14 fmacs s27 , s7, s14 fmacs s28 , s4, s15 fmacs s29 , s5, s15 fmacs s30 , s6, s15 fmacs s31 , s7, s15 .endm .macro KERNEL4x4_SUB flds s8 , [ BO ] flds s0 , [ AO ] flds s1 , [ AO, #4 ] fmacs s16 , s0, s8 flds s2 , [ AO, #8 ] fmacs s17 , s1, s8 flds s3 , [ AO, #12 ] fmacs s18 , s2, s8 flds s9 , [ BO, #4 ] fmacs s19 , s3, s8 flds s10, [ BO, #8 ] fmacs s20 , s0, s9 flds s11, [ BO, #12 ] fmacs s21 , s1, s9 fmacs s22 , s2, s9 fmacs s23 , s3, s9 fmacs s24 , s0, s10 fmacs s25 , s1, s10 fmacs s26 , s2, s10 fmacs s27 , s3, s10 fmacs s28 , s0, s11 fmacs s29 , s1, s11 add AO , AO, #16 fmacs s30 , s2, s11 add BO , BO, #16 fmacs s31 , s3, s11 .endm .macro SAVE4x4 ldr r3, OLD_RSC // Row stride size lsl r3, r3, #2 // multiply with size of float flds s0, [ PTR_ALPHA ] // load alpha ldr r4, PTR_BETA flds s1, [ r4 ] // load beta //----------------------------------------------------------- mov r2, CO1 // save pointer mov r4, CO2 // save pointer flds s8, [ CO1 ] // load value from C flds s12, [ CO2 ] // load value from C fmuls s8, s8, s1 // multiply with beta add CO1, CO1, r3 // compute next pointer fmacs s8, s0, s16 // multiply sum with alpha and add to value of C add CO2, CO2, r3 // compute next pointer flds s9, [ CO1 ] // load value from C flds s13, [ CO2 ] // load value from C fmuls s9, s9, s1 // multiply with beta add CO1, CO1, r3 // compute next pointer fmacs s9, s0, s17 // multiply sum with alpha and add to value of C add CO2, CO2, r3 // compute next pointer flds s10, [ CO1 ] // load value from C flds s14, [ CO2 ] // load value from C fmuls s10, s10, s1 // multiply with beta add CO1, CO1, r3 // compute next pointer fmacs s10, s0, s18 // multiply sum with alpha and add to value of C add CO2, CO2, r3 // compute next pointer flds s11, [ CO1 ] // load value from C flds s15, [ CO2 ] // load value from C fmuls s11, s11, s1 // multiply with beta mov CO1, r2 // restore pointer fmacs s11, s0, s19 // multiply sum with alpha and add to value of C mov CO2, r4 // restore pointer fsts s8, [ CO1 ] // store value in C add CO1 , CO1, r3 // compute next pointer fsts s9, [ CO1 ] // store value in C add CO1 , CO1, r3 // compute next pointer fsts s10, [ CO1 ] // store value in C add CO1 , CO1, r3 // compute next pointer fsts s11, [ CO1 ] // store value in C //----------------------------------------------------------- mov r2, CO3 // save pointer flds s8, [ CO3 ] // load value from C fmuls s12, s12, s1 // multiply with beta add CO3, CO3, r3 // compute next pointer fmacs s12, s0, s20 // multiply sum with alpha and add to value of C flds s9, [ CO3 ] // load value from C fmuls s13, s13, s1 // multiply with beta add CO3, CO3, r3 // compute next pointer fmacs s13, s0, s21 // multiply sum with alpha and add to value of C flds s10, [ CO3 ] // load value from C fmuls s14, s14, s1 // multiply with beta add CO3, CO3, r3 // compute next pointer fmacs s14, s0, s22 // multiply sum with alpha and add to value of C flds s11, [ CO3 ] // load value from C fmuls s15, s15, s1 // multiply with beta mov CO3, r2 // restore pointer fmacs s15, s0, s23 // multiply sum with alpha and add to value of C fsts s12, [ CO2 ] // store value in C add CO2 , CO2, r3 // compute next pointer fsts s13, [ CO2 ] // store value in C add CO2 , CO2, r3 // compute next pointer fsts s14, [ CO2 ] // store value in C add CO2 , CO2, r3 // compute next pointer fsts s15, [ CO2 ] // store value in C //----------------------------------------------------------- mov r4, CO4 // save pointer flds s12, [ CO4 ] // load value from C fmuls s8, s8, s1 // multiply with beta add CO4, CO4, r3 // compute next pointer fmacs s8, s0, s24 // multiply sum with alpha and add to value of C flds s13, [ CO4 ] // load value from C fmuls s9, s9, s1 // multiply with beta add CO4, CO4, r3 // compute next pointer fmacs s9, s0, s25 // multiply sum with alpha and add to value of C flds s14, [ CO4 ] // load value from C fmuls s10, s10, s1 // multiply with beta add CO4, CO4, r3 // compute next pointer fmacs s10, s0, s26 // multiply sum with alpha and add to value of C flds s15, [ CO4 ] // load value from C fmuls s11, s11, s1 // multiply with beta mov CO4, r4 // restore pointer fmacs s11, s0, s27 // multiply sum with alpha and add to value of C //----------------------------------------------------------- fsts s8, [ CO3 ] // store value in C fmuls s12, s12, s1 // multiply with beta add CO3 , CO3, r3 // compute next pointer fmacs s12, s0, s28 // multiply sum with alpha and add to value of C fsts s9, [ CO3 ] // store value in C fmuls s13, s13, s1 // multiply with beta add CO3 , CO3, r3 // compute next pointer fmacs s13, s0, s29 // multiply sum with alpha and add to value of C fsts s10, [ CO3 ] // store value in C fmuls s14, s14, s1 // multiply with beta add CO3 , CO3, r3 // compute next pointer fmacs s14, s0, s30 // multiply sum with alpha and add to value of C fsts s11, [ CO3 ] // store value in C fmuls s15, s15, s1 // multiply with beta fsts s12, [ CO4 ] // store value in C fmacs s15, s0, s31 // multiply sum with alpha and add to value of C add CO4 , CO4, r3 // compute next pointer fsts s13, [ CO4 ] // store value in C add CO4 , CO4, r3 // compute next pointer fsts s14, [ CO4 ] // store value in C add CO4 , CO4, r3 // compute next pointer fsts s15, [ CO4 ] // store value in C .endm /************************************************************************************** * End of macro definitions **************************************************************************************/ .arm .global REALNAME .func REALNAME REALNAME: push {r4 - r9, fp} // save register add fp, sp, #28 // add number of saved register multiplied by size of int sub sp, sp, #STACKSIZE // reserve stack mov AO, OLD_A // pointer matrix A mov BO, OLD_B // pointer matrix B sub r3, fp, #128 vstm r3, { s8 - s31 } // store floating point registers ldr r2, OLD_C // pointer matrix C ldr r3, OLD_CSC // Col stride size of C lsl r3, r3, #2 // multiply with size of float mov CO1, r2 // first line of C add CO2, CO1, r3 // second line of C add CO3, CO2, r3 // third line of C add CO4, CO3, r3 // fourth line of C pld [ CO1, #C_PRE ] // prefetch the lines of C pld [ CO2, #C_PRE ] // prefetch the lines of C pld [ CO3, #C_PRE ] // prefetch the lines of C pld [ CO3, #C_PRE ] // prefetch the lines of C sgemm_kernel_L4_M4_20: asrs L , K, #3 // L = K / 8 cmp L , #2 blt sgemm_kernel_L4_M4_32 KERNEL4x4_I KERNEL4x4_M2 KERNEL4x4_M1 KERNEL4x4_M2 KERNEL4x4_M1 KERNEL4x4_M2 KERNEL4x4_M1 KERNEL4x4_M2 subs L, L, #2 ble sgemm_kernel_L4_M4_22a .align 5 sgemm_kernel_L4_M4_22: KERNEL4x4_M1 KERNEL4x4_M2 KERNEL4x4_M1 KERNEL4x4_M2 KERNEL4x4_M1 KERNEL4x4_M2 KERNEL4x4_M1 KERNEL4x4_M2 subs L, L, #1 bgt sgemm_kernel_L4_M4_22 sgemm_kernel_L4_M4_22a: KERNEL4x4_M1 KERNEL4x4_M2 KERNEL4x4_M1 KERNEL4x4_M2 KERNEL4x4_M1 KERNEL4x4_M2 KERNEL4x4_M1 KERNEL4x4_E b sgemm_kernel_L4_M4_44 sgemm_kernel_L4_M4_32: tst L, #1 ble sgemm_kernel_L4_M4_40 KERNEL4x4_I KERNEL4x4_M2 KERNEL4x4_M1 KERNEL4x4_M2 KERNEL4x4_M1 KERNEL4x4_M2 KERNEL4x4_M1 KERNEL4x4_E b sgemm_kernel_L4_M4_44 sgemm_kernel_L4_M4_40: INIT4x4 sgemm_kernel_L4_M4_44: ands L , K, #7 // L = K % 8 ble sgemm_kernel_L4_M4_100 sgemm_kernel_L4_M4_46: KERNEL4x4_SUB subs L, L, #1 bne sgemm_kernel_L4_M4_46 sgemm_kernel_L4_M4_100: SAVE4x4 sgemm_kernel_L999: sub r3, fp, #128 vldm r3, { s8 - s31 } // restore floating point registers sub sp, fp, #28 pop {r4 - r9, fp} bx lr blis-0.6.1/kernels/armv7a/3/bli_zgemm_armv7a_asm_2x2.S000066400000000000000000000234631360743507500223740ustar00rootroot00000000000000 #define REALNAME bli_zgemm_armv7a_ker_2x2 #define STACKSIZE 256 #define K r0 #define PTR_ALPHA r1 #define OLD_A r2 #define OLD_B r3 #define PTR_BETA [fp, #0 ] #define OLD_C [fp, #4 ] #define OLD_RSC [fp, #8 ] #define OLD_CSC [fp, #12 ] #define AUX [fp, #16 ] /****************************************************** * [fp, #-128] - [fp, #-64] is reserved * for store and restore of floating point * register *******************************************************/ #define L r2 #define AO r5 #define BO r6 #define CO1 r7 #define CO2 r8 #define A_PRE 96 #define B_PRE 96 #define C_PRE 0 /************************************************************************************** * Macro definitions **************************************************************************************/ #define FMAC_BR fnmacd #define FMAC_BI fmacd #define NN 1 #if defined(NN) || defined(NT) || defined(TN) || defined(TT) #define FADD_R fsubd #define FADD_I faddd #define FMAC_R1 fnmacd #define FMAC_R2 fnmacd #define FMAC_I1 fmacd #define FMAC_I2 fnmacd #elif defined(CN) || defined(CT) #define FADD_R faddd #define FADD_I fsubd #define FMAC_R1 fmacd #define FMAC_R2 fmacd #define FMAC_I1 fnmacd #define FMAC_I2 fmacd #elif defined(NC) || defined(TC) #define FADD_R faddd #define FADD_I fsubd #define FMAC_R1 fmacd #define FMAC_R2 fnmacd #define FMAC_I1 fmacd #define FMAC_I2 fmacd #else #define FADD_R fsubd #define FADD_I faddd #define FMAC_R1 fnmacd #define FMAC_R2 fmacd #define FMAC_I1 fnmacd #define FMAC_I2 fnmacd #endif .macro INIT2x2 vsub.f64 d16 , d16 , d16 vmov.f64 d17, d16 vmov.f64 d18, d16 vmov.f64 d19, d16 vmov.f64 d20, d16 vmov.f64 d21, d16 vmov.f64 d22, d16 vmov.f64 d23, d16 vmov.f64 d24, d16 vmov.f64 d25, d16 vmov.f64 d26, d16 vmov.f64 d27, d16 vmov.f64 d28, d16 vmov.f64 d29, d16 vmov.f64 d30, d16 vmov.f64 d31, d16 .endm .macro KERNEL2x2_I pld [ AO , #A_PRE ] pld [ BO , #B_PRE ] fldd d0 , [ AO ] fldd d1 , [ AO, #8 ] fldd d8 , [ BO ] fldd d9 , [ BO, #8 ] fmuld d16 , d0, d8 fldd d2 , [ AO, #16 ] fmuld d24 , d1, d9 fldd d3 , [ AO, #24 ] fmuld d17 , d0, d9 fldd d10, [ BO, #16 ] fmuld d25 , d1, d8 fldd d11, [ BO, #24 ] fmuld d18 , d2, d8 add BO , BO, #32 fmuld d26 , d3, d9 add AO , AO, #32 fmuld d19 , d2, d9 pld [ BO , #B_PRE ] fmuld d27 , d3, d8 pld [ AO , #A_PRE ] fmuld d20 , d0, d10 fldd d4 , [ AO, #0 ] fmuld d28 , d1, d11 fldd d5 , [ AO, #8 ] fmuld d21 , d0, d11 fldd d12, [ BO ] fmuld d29 , d1, d10 fldd d13, [ BO, #8 ] fmuld d22 , d2, d10 fldd d6 , [ AO, #16 ] fmuld d30 , d3, d11 fldd d7 , [ AO, #24 ] fmuld d23 , d2, d11 fldd d14, [ BO, #16 ] fmuld d31 , d3, d10 fldd d15, [ BO, #24 ] add BO , BO, #32 add AO , AO, #32 .endm .macro KERNEL2x2_M1 pld [ AO , #A_PRE ] fmacd d16 , d0, d8 pld [ BO , #B_PRE ] fmacd d24 , d1, d9 fldd d4 , [ AO, #0 ] fmacd d17 , d0, d9 fldd d5 , [ AO, #8 ] fmacd d25 , d1, d8 fldd d12, [ BO ] fmacd d18 , d2, d8 fldd d13, [ BO, #8 ] fmacd d26 , d3, d9 fldd d6 , [ AO, #16 ] fmacd d19 , d2, d9 fldd d7 , [ AO, #24 ] fmacd d27 , d3, d8 fmacd d20 , d0, d10 fldd d14, [ BO, #16 ] fmacd d28 , d1, d11 fmacd d21 , d0, d11 fldd d15, [ BO, #24 ] fmacd d29 , d1, d10 fmacd d22 , d2, d10 add BO , BO, #32 fmacd d30 , d3, d11 fmacd d23 , d2, d11 add AO , AO, #32 fmacd d31 , d3, d10 .endm .macro KERNEL2x2_M2 pld [ AO , #A_PRE ] fmacd d16 , d4, d12 pld [ BO , #B_PRE ] fmacd d24 , d5, d13 fldd d0 , [ AO, #0 ] fmacd d17 , d4, d13 fldd d1 , [ AO, #8 ] fmacd d25 , d5, d12 fmacd d18 , d6, d12 fldd d8 , [ BO ] fmacd d26 , d7, d13 fldd d9 , [ BO, #8 ] fmacd d19 , d6, d13 fmacd d27 , d7, d12 fldd d2 , [ AO, #16 ] fmacd d20 , d4, d14 fldd d3 , [ AO, #24 ] fmacd d28 , d5, d15 fmacd d21 , d4, d15 fldd d10, [ BO, #16 ] fmacd d29 , d5, d14 fldd d11, [ BO, #24 ] fmacd d22 , d6, d14 fmacd d30 , d7, d15 add BO , BO, #32 fmacd d23 , d6, d15 add AO , AO, #32 fmacd d31 , d7, d14 .endm .macro KERNEL2x2_E fmacd d16 , d4, d12 fmacd d24 , d5, d13 fmacd d17 , d4, d13 fmacd d25 , d5, d12 fmacd d18 , d6, d12 fmacd d26 , d7, d13 fmacd d19 , d6, d13 fmacd d27 , d7, d12 fmacd d20 , d4, d14 fmacd d28 , d5, d15 fmacd d21 , d4, d15 fmacd d29 , d5, d14 fmacd d22 , d6, d14 fmacd d30 , d7, d15 fmacd d23 , d6, d15 fmacd d31 , d7, d14 .endm .macro KERNEL2x2_SUB pld [ AO , #A_PRE ] pld [ BO , #B_PRE ] fldd d0 , [ AO ] fldd d1 , [ AO, #8 ] fldd d8 , [ BO ] fldd d9 , [ BO, #8 ] fmacd d16 , d0, d8 fldd d2 , [ AO, #16 ] fmacd d24 , d1, d9 fldd d3 , [ AO, #24 ] fmacd d17 , d0, d9 fldd d10, [ BO, #16 ] fmacd d25 , d1, d8 fldd d11, [ BO, #24 ] fmacd d18 , d2, d8 fmacd d26 , d3, d9 fmacd d19 , d2, d9 fmacd d27 , d3, d8 fmacd d20 , d0, d10 fmacd d28 , d1, d11 fmacd d21 , d0, d11 fmacd d29 , d1, d10 fmacd d22 , d2, d10 add BO , BO, #32 fmacd d30 , d3, d11 fmacd d23 , d2, d11 add AO , AO, #32 fmacd d31 , d3, d10 .endm .macro SAVE2x2 ldr r3, OLD_RSC // Row stride size lsl r3, r3, #4 // multiply with size of complex double fldd d0, [ PTR_ALPHA ] // load real part of alpha fldd d1, [ PTR_ALPHA, #8 ] // load imag part of alpha ldr r4, PTR_BETA fldd d2, [ r4 ] // load real part of beta fldd d3, [ r4, #8 ] // load imag part of beta // Add/Sub the real and the imag parts FADD_R d16, d24 , d16 FADD_I d17, d25 , d17 FADD_R d18, d26 , d18 FADD_I d19, d27 , d19 FADD_R d20, d28 , d20 FADD_I d21, d29 , d21 FADD_R d22, d30 , d22 FADD_I d23, d31 , d23 mov r4, CO1 // save pointer fldmiad CO1, { d4 - d5 } // read real and imag part from C add CO1, CO1, r3 mov r2, CO2 // save pointer fldmiad CO2, { d8 - d9 } // read real and imag part from C add CO2, CO2, r3 fmuld d24, d4, d2 // multiply Beta-real with C-real fmuld d25, d5, d2 // multiply Beta-real with C-imag fmuld d28, d8, d2 // multiply Beta-real with C-real fmuld d29, d9, d2 // multiply Beta-real with C-imag FMAC_BR d24, d3, d5 // multiply beta-imag with C-imag and add FMAC_BI d25, d3, d4 // multiply beta-imag with C-real and add FMAC_BR d28, d3, d9 // multiply beta-imag with C-imag and add FMAC_BI d29, d3, d8 // multiply beta-imag with C-real and add FMAC_R1 d24 , d0 , d16 FMAC_I1 d25 , d0 , d17 FMAC_R2 d24 , d1 , d17 FMAC_I2 d25 , d1 , d16 FMAC_R1 d28 , d0 , d20 FMAC_I1 d29 , d0 , d21 FMAC_R2 d28 , d1 , d21 FMAC_I2 d29 , d1 , d20 fldmiad CO1, { d4 - d5 } // read real and imag part from C fldmiad CO2, { d8 - d9 } // read real and imag part from C fmuld d26, d4, d2 // multiply Beta-real with C-real fmuld d27, d5, d2 // multiply Beta-real with C-imag fmuld d30, d8, d2 // multiply Beta-real with C-real fmuld d31, d9, d2 // multiply Beta-real with C-imag FMAC_BR d26, d3, d5 // multiply beta-imag with C-imag and add FMAC_BI d27, d3, d4 // multiply beta-imag with C-real and add FMAC_BR d30, d3, d9 // multiply beta-imag with C-imag and add FMAC_BI d31, d3, d8 // multiply beta-imag with C-real and add FMAC_R1 d26 , d0 , d18 FMAC_I1 d27 , d0 , d19 FMAC_R2 d26 , d1 , d19 FMAC_I2 d27 , d1 , d18 FMAC_R1 d30, d0 , d22 FMAC_I1 d31, d0 , d23 FMAC_R2 d30, d1 , d23 FMAC_I2 d31, d1 , d22 mov CO1, r4 // restore pointer mov CO2, r2 // restore pointer fstmiad CO1, { d24 - d25 } fstmiad CO2, { d28 - d29 } add CO1, CO1, r3 add CO2, CO2, r3 fstmiad CO1, { d26 - d27 } fstmiad CO2, { d30 - d31 } .endm /************************************************************************************** * End of macro definitions **************************************************************************************/ .arm .global REALNAME .func REALNAME REALNAME: push {r4 - r9, fp} // save register add fp, sp, #28 // add number of saved register multiplied by size of int sub sp, sp, #STACKSIZE // reserve stack mov AO, OLD_A // pointer matrix A mov BO, OLD_B // pointer matrix B sub r3, fp, #128 vstm r3, { d8 - d15} // store floating point registers ldr r2, OLD_C // pointer matrix C ldr r3, OLD_CSC // Col stride size of C lsl r3, r3, #4 // multiply with size of complex double mov CO1, r2 // first line of C add CO2, CO1, r3 // second line of C pld [ CO1, #C_PRE ] // prefetch the lines of C pld [ CO2, #C_PRE ] // prefetch the lines of C zgemm_kernel_L2_M2_20: asrs L , K, #3 // L = K / 8 cmp L , #2 blt zgemm_kernel_L2_M2_32 KERNEL2x2_I KERNEL2x2_M2 KERNEL2x2_M1 KERNEL2x2_M2 KERNEL2x2_M1 KERNEL2x2_M2 KERNEL2x2_M1 KERNEL2x2_M2 subs L, L, #2 ble zgemm_kernel_L2_M2_22a .align 5 zgemm_kernel_L2_M2_22: KERNEL2x2_M1 KERNEL2x2_M2 KERNEL2x2_M1 KERNEL2x2_M2 KERNEL2x2_M1 KERNEL2x2_M2 KERNEL2x2_M1 KERNEL2x2_M2 subs L, L, #1 bgt zgemm_kernel_L2_M2_22 zgemm_kernel_L2_M2_22a: KERNEL2x2_M1 KERNEL2x2_M2 KERNEL2x2_M1 KERNEL2x2_M2 KERNEL2x2_M1 KERNEL2x2_M2 KERNEL2x2_M1 KERNEL2x2_E b zgemm_kernel_L2_M2_44 zgemm_kernel_L2_M2_32: tst L, #1 ble zgemm_kernel_L2_M2_40 KERNEL2x2_I KERNEL2x2_M2 KERNEL2x2_M1 KERNEL2x2_M2 KERNEL2x2_M1 KERNEL2x2_M2 KERNEL2x2_M1 KERNEL2x2_E b zgemm_kernel_L2_M2_44 zgemm_kernel_L2_M2_40: INIT2x2 zgemm_kernel_L2_M2_44: ands L , K, #7 // L = K % 8 ble zgemm_kernel_L2_M2_100 zgemm_kernel_L2_M2_46: KERNEL2x2_SUB subs L, L, #1 bne zgemm_kernel_L2_M2_46 zgemm_kernel_L2_M2_100: SAVE2x2 zgemm_kernel_L999: sub r3, fp, #128 vldm r3, { d8 - d15} // restore floating point registers sub sp, fp, #28 pop {r4 - r9, fp} bx lr blis-0.6.1/kernels/armv7a/bli_kernels_armv7a.h000066400000000000000000000036661360743507500212530ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ GEMM_UKR_PROT( float, s, gemm_armv7a_asm_4x4 ) GEMM_UKR_PROT( double, d, gemm_armv7a_asm_4x4 ) GEMM_UKR_PROT( scomplex, c, gemm_armv7a_asm_2x2 ) GEMM_UKR_PROT( dcomplex, z, gemm_armv7a_asm_2x2 ) GEMM_UKR_PROT( float, s, gemm_armv7a_int_4x4 ) GEMM_UKR_PROT( double, d, gemm_armv7a_int_4x4 ) blis-0.6.1/kernels/armv8a/000077500000000000000000000000001360743507500153225ustar00rootroot00000000000000blis-0.6.1/kernels/armv8a/3/000077500000000000000000000000001360743507500154645ustar00rootroot00000000000000blis-0.6.1/kernels/armv8a/3/bli_gemm_armv8a_asm_d6x8.c000066400000000000000000004037601360743507500224040ustar00rootroot00000000000000 /* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" /* o 4x4 Single precision micro-kernel fully functional. o Runnable on ARMv8, compiled with aarch64 GCC. o Use it together with the armv8 BLIS configuration. o Tested on Juno board. Around 7.3 GFLOPS @ 1.1 GHz. December 2014. * UPDATE NOVEMBER 2015 * Micro-kernel changed to 8x12 * Tested on Juno Board. Around 8.1 GFLOPS, 1 x A57 core @ 1.1 GHz. * Tested on Juno Board. Around 15.9 GFLOPS, 2 x A57 cores @ 1.1 GHz. * Tested on Juno board. Around 3.1 GFLOPS, 1 x A53 core @ 850 MHz. * Tested on Juno board. Around 12 GFLOPS, 4 x A53 cores @ 850 MHz. */ void bli_sgemm_armv8a_asm_8x12 ( dim_t k0, float* restrict alpha, float* restrict a, float* restrict b, float* restrict beta, float* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { void* a_next = bli_auxinfo_next_a( data ); void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; __asm__ volatile ( " \n\t" " \n\t" " ldr x0,%[aaddr] \n\t" // Load address of A. " ldr x1,%[baddr] \n\t" // Load address of B. " ldr x2,%[caddr] \n\t" // Load address of C. " \n\t" " ldr x3,%[a_next] \n\t" // Pointer to next block of A. " ldr x4,%[b_next] \n\t" // Pointer to next pointer of B. " \n\t" " ldr x5,%[k_iter] \n\t" // Number of unrolled iterations (k_iter). " ldr x6,%[k_left] \n\t" // Number of remaining iterations (k_left). " \n\t" " ldr x7,%[alpha] \n\t" // Alpha address. " ldr x8,%[beta] \n\t" // Beta address. " \n\t" " ldr x9,%[cs_c] \n\t" // Load cs_c. " lsl x10,x9,#2 \n\t" // cs_c * sizeof(float) -- AUX. " \n\t" " ldr x13,%[rs_c] \n\t" // Load rs_c. " lsl x14,x13,#2 \n\t" // rs_c * sizeof(float). " \n\t" " add x16,x2,x10 \n\t" //Load address Column 1 of C " add x17,x16,x10 \n\t" //Load address Column 2 of C " add x18,x17,x10 \n\t" //Load address Column 3 of C " add x19,x18,x10 \n\t" //Load address Column 4 of C " add x20,x19,x10 \n\t" //Load address Column 5 of C " add x21,x20,x10 \n\t" //Load address Column 6 of C " add x22,x21,x10 \n\t" //Load address Column 7 of C " add x23,x22,x10 \n\t" //Load address Column 8 of C " add x24,x23,x10 \n\t" //Load address Column 9 of C " add x25,x24,x10 \n\t" //Load address Column 10 of C " add x26,x25,x10 \n\t" //Load address Column 11 of C " \n\t" " ldr q0, [x0] \n\t" " ldr q1, [x0, #16] \n\t" // Load a " \n\t" " ldr q2, [x1] \n\t" // Load b " ldr q3, [x1, #16] \n\t" " ldr q4, [x1, #32] \n\t" " \n\t" " prfm pldl1keep,[x2] \n\t" // Prefetch c. " prfm pldl1keep,[x16] \n\t" // Prefetch c. " prfm pldl1keep,[x17] \n\t" // Prefetch c. " prfm pldl1keep,[x18] \n\t" // Prefetch c. " prfm pldl1keep,[x19] \n\t" // Prefetch c. " prfm pldl1keep,[x20] \n\t" // Prefetch c. " prfm pldl1keep,[x21] \n\t" // Prefetch c. " prfm pldl1keep,[x22] \n\t" // Prefetch c. " prfm pldl1keep,[x23] \n\t" // Prefetch c. " prfm pldl1keep,[x24] \n\t" // Prefetch c. " prfm pldl1keep,[x25] \n\t" // Prefetch c. " prfm pldl1keep,[x26] \n\t" // Prefetch c. " \n\t" " dup v8.4s, wzr \n\t" // Vector for accummulating column 0 " prfm PLDL1KEEP, [x1, #192] \n\t" " dup v9.4s, wzr \n\t" // Vector for accummulating column 0 " prfm PLDL1KEEP, [x1, #256] \n\t" " dup v10.4s, wzr \n\t" // Vector for accummulating column 1 " prfm PLDL1KEEP, [x1, #320] \n\t" " dup v11.4s, wzr \n\t" // Vector for accummulating column 1 " dup v12.4s, wzr \n\t" // Vector for accummulating column 2 " dup v13.4s, wzr \n\t" // Vector for accummulating column 2 " \n\t" " dup v14.4s, wzr \n\t" // Vector for accummulating column 3 " prfm PLDL1KEEP, [x0, #128] \n\t" " dup v15.4s, wzr \n\t" // Vector for accummulating column 3 " prfm PLDL1KEEP, [x0, #192] \n\t" " dup v16.4s, wzr \n\t" // Vector for accummulating column 4 " dup v17.4s, wzr \n\t" // Vector for accummulating column 4 " dup v18.4s, wzr \n\t" // Vector for accummulating column 5 " dup v19.4s, wzr \n\t" // Vector for accummulating column 5 " \n\t" " dup v20.4s, wzr \n\t" // Vector for accummulating column 6 " dup v21.4s, wzr \n\t" // Vector for accummulating column 6 " dup v22.4s, wzr \n\t" // Vector for accummulating column 7 " dup v23.4s, wzr \n\t" // Vector for accummulating column 7 " dup v24.4s, wzr \n\t" // Vector for accummulating column 8 " dup v25.4s, wzr \n\t" // Vector for accummulating column 8 " \n\t" " dup v26.4s, wzr \n\t" // Vector for accummulating column 9 " dup v27.4s, wzr \n\t" // Vector for accummulating column 9 " dup v28.4s, wzr \n\t" // Vector for accummulating column 10 " dup v29.4s, wzr \n\t" // Vector for accummulating column 10 " dup v30.4s, wzr \n\t" // Vector for accummulating column 11 " dup v31.4s, wzr \n\t" // Vector for accummulating column 11 " \n\t" " cmp x5,#0 \n\t" // If k_iter == 0, jump to k_left. " beq .SCONSIDERKLEFT \n\t" " \n\t" "add x0, x0, #32 \n\t" //update address of A "add x1, x1, #48 \n\t" //update address of B " \n\t" " cmp x5,1 \n\t" // If there is just one k_iter, jump to that one. " beq .SLASTITER \n\t" // (as loop is do-while-like). " \n\t" " .SLOOPKITER: \n\t" // Body of the k_iter loop. " \n\t" " ldr q5, [x0] \n\t" " fmla v8.4s, v0.4s,v2.s[0] \n\t" // Accummulate. " fmla v9.4s, v1.4s,v2.s[0] \n\t" // Accummulate. " ldr q6, [x0, #16] \n\t" " fmla v10.4s,v0.4s,v2.s[1] \n\t" // Accummulate. " fmla v11.4s,v1.4s,v2.s[1] \n\t" // Accummulate. " fmla v12.4s,v0.4s,v2.s[2] \n\t" // Accummulate. " fmla v13.4s,v1.4s,v2.s[2] \n\t" // Accummulate. " fmla v14.4s,v0.4s,v2.s[3] \n\t" // Accummulate. " fmla v15.4s,v1.4s,v2.s[3] \n\t" // Accummulate. " ldr q2, [x1] \n\t" " \n\t" " fmla v16.4s,v0.4s,v3.s[0] \n\t" // Accummulate. " prfm PLDL1KEEP, [x1, #336] \n\t" " fmla v17.4s,v1.4s,v3.s[0] \n\t" // Accummulate. " prfm PLDL1KEEP, [x1, #400] \n\t" " fmla v18.4s,v0.4s,v3.s[1] \n\t" // Accummulate. " fmla v19.4s,v1.4s,v3.s[1] \n\t" // Accummulate. " prfm PLDL1KEEP, [x1, #464] \n\t" " fmla v20.4s,v0.4s,v3.s[2] \n\t" // Accummulate. " fmla v21.4s,v1.4s,v3.s[2] \n\t" // Accummulate. " fmla v22.4s,v0.4s,v3.s[3] \n\t" // Accummulate. " fmla v23.4s,v1.4s,v3.s[3] \n\t" // Accummulate. " \n\t" " fmla v24.4s,v0.4s,v4.s[0] \n\t" // Accummulate. " fmla v26.4s,v0.4s,v4.s[1] \n\t" // Accummulate. " fmla v28.4s,v0.4s,v4.s[2] \n\t" // Accummulate. " fmla v30.4s,v0.4s,v4.s[3] \n\t" // Accummulate. " ldr q3, [x1, #16] \n\t" " \n\t" " fmla v25.4s,v1.4s,v4.s[0] \n\t" // Accummulate. " fmla v27.4s,v1.4s,v4.s[1] \n\t" // Accummulate. " fmla v29.4s,v1.4s,v4.s[2] \n\t" // Accummulate. " fmla v31.4s,v1.4s,v4.s[3] \n\t" // Accummulate. " ldr q4, [x1, #32] \n\t" " \n\t" //End It 1 " \n\t" " ldr q0, [x0, #32] \n\t" " fmla v8.4s,v5.4s,v2.s[0] \n\t" // Accummulate. " fmla v9.4s,v6.4s,v2.s[0] \n\t" // Accummulate. " ldr q1, [x0, #48] \n\t" " fmla v10.4s,v5.4s,v2.s[1] \n\t" // Accummulate. " fmla v11.4s,v6.4s,v2.s[1] \n\t" // Accummulate. " fmla v12.4s,v5.4s,v2.s[2] \n\t" // Accummulate. " fmla v13.4s,v6.4s,v2.s[2] \n\t" // Accummulate. " fmla v14.4s,v5.4s,v2.s[3] \n\t" // Accummulate. " fmla v15.4s,v6.4s,v2.s[3] \n\t" // Accummulate. " ldr q2, [x1, #48] \n\t" " \n\t" " fmla v16.4s,v5.4s,v3.s[0] \n\t" // Accummulate. " prfm PLDL1KEEP, [x0, #224] \n\t" " fmla v17.4s,v6.4s,v3.s[0] \n\t" // Accummulate. " prfm PLDL1KEEP, [x0, #288] \n\t" " fmla v18.4s,v5.4s,v3.s[1] \n\t" // Accummulate. " fmla v19.4s,v6.4s,v3.s[1] \n\t" // Accummulate. " fmla v20.4s,v5.4s,v3.s[2] \n\t" // Accummulate. " fmla v21.4s,v6.4s,v3.s[2] \n\t" // Accummulate. " fmla v22.4s,v5.4s,v3.s[3] \n\t" // Accummulate. " fmla v23.4s,v6.4s,v3.s[3] \n\t" // Accummulate. " \n\t" " fmla v24.4s,v5.4s,v4.s[0] \n\t" // Accummulate. " fmla v26.4s,v5.4s,v4.s[1] \n\t" // Accummulate. " fmla v28.4s,v5.4s,v4.s[2] \n\t" // Accummulate. " fmla v30.4s,v5.4s,v4.s[3] \n\t" // Accummulate. " ldr q3, [x1, #64] \n\t" " \n\t" " fmla v25.4s,v6.4s,v4.s[0] \n\t" // Accummulate. " fmla v27.4s,v6.4s,v4.s[1] \n\t" // Accummulate. " fmla v29.4s,v6.4s,v4.s[2] \n\t" // Accummulate. " fmla v31.4s,v6.4s,v4.s[3] \n\t" // Accummulate. " ldr q4, [x1, #80] \n\t" " \n\t" //End It 2 " \n\t" " ldr q5, [x0, #64] \n\t" " fmla v8.4s,v0.4s,v2.s[0] \n\t" // Accummulate. " fmla v9.4s,v1.4s,v2.s[0] \n\t" // Accummulate. " ldr q6, [x0, #80] \n\t" " fmla v10.4s,v0.4s,v2.s[1] \n\t" // Accummulate. " fmla v11.4s,v1.4s,v2.s[1] \n\t" // Accummulate. " fmla v12.4s,v0.4s,v2.s[2] \n\t" // Accummulate. " fmla v13.4s,v1.4s,v2.s[2] \n\t" // Accummulate. " fmla v14.4s,v0.4s,v2.s[3] \n\t" // Accummulate. " fmla v15.4s,v1.4s,v2.s[3] \n\t" // Accummulate. " ldr q2, [x1, #96] \n\t" " \n\t" " fmla v16.4s,v0.4s,v3.s[0] \n\t" // Accummulate. " fmla v17.4s,v1.4s,v3.s[0] \n\t" // Accummulate. " fmla v18.4s,v0.4s,v3.s[1] \n\t" // Accummulate. " fmla v19.4s,v1.4s,v3.s[1] \n\t" // Accummulate. " fmla v20.4s,v0.4s,v3.s[2] \n\t" // Accummulate. " fmla v21.4s,v1.4s,v3.s[2] \n\t" // Accummulate. " fmla v22.4s,v0.4s,v3.s[3] \n\t" // Accummulate. " fmla v23.4s,v1.4s,v3.s[3] \n\t" // Accummulate. " \n\t" " fmla v24.4s,v0.4s,v4.s[0] \n\t" // Accummulate. " fmla v26.4s,v0.4s,v4.s[1] \n\t" // Accummulate. " fmla v28.4s,v0.4s,v4.s[2] \n\t" // Accummulate. " fmla v30.4s,v0.4s,v4.s[3] \n\t" // Accummulate. " ldr q3, [x1, #112] \n\t" " \n\t" " fmla v25.4s,v1.4s,v4.s[0] \n\t" // Accummulate. " fmla v27.4s,v1.4s,v4.s[1] \n\t" // Accummulate. " fmla v29.4s,v1.4s,v4.s[2] \n\t" // Accummulate. " fmla v31.4s,v1.4s,v4.s[3] \n\t" // Accummulate. " ldr q4, [x1, #128] \n\t" " \n\t" //End It 3 " \n\t" " ldr q0, [x0, #96] \n\t" " fmla v8.4s,v5.4s,v2.s[0] \n\t" // Accummulate. " fmla v9.4s,v6.4s,v2.s[0] \n\t" // Accummulate. " ldr q1, [x0, #112] \n\t" " fmla v10.4s,v5.4s,v2.s[1] \n\t" // Accummulate. " fmla v11.4s,v6.4s,v2.s[1] \n\t" // Accummulate. " fmla v12.4s,v5.4s,v2.s[2] \n\t" // Accummulate. " fmla v13.4s,v6.4s,v2.s[2] \n\t" // Accummulate. " fmla v14.4s,v5.4s,v2.s[3] \n\t" // Accummulate. " fmla v15.4s,v6.4s,v2.s[3] \n\t" // Accummulate. " ldr q2, [x1, #144] \n\t" " \n\t" " fmla v16.4s,v5.4s,v3.s[0] \n\t" // Accummulate. " fmla v17.4s,v6.4s,v3.s[0] \n\t" // Accummulate. " fmla v18.4s,v5.4s,v3.s[1] \n\t" // Accummulate. " fmla v19.4s,v6.4s,v3.s[1] \n\t" // Accummulate. " fmla v20.4s,v5.4s,v3.s[2] \n\t" // Accummulate. " fmla v21.4s,v6.4s,v3.s[2] \n\t" // Accummulate. " fmla v22.4s,v5.4s,v3.s[3] \n\t" // Accummulate. " fmla v23.4s,v6.4s,v3.s[3] \n\t" // Accummulate. " \n\t" " fmla v24.4s,v5.4s,v4.s[0] \n\t" // Accummulate. " fmla v26.4s,v5.4s,v4.s[1] \n\t" // Accummulate. " fmla v28.4s,v5.4s,v4.s[2] \n\t" // Accummulate. " fmla v30.4s,v5.4s,v4.s[3] \n\t" // Accummulate. " ldr q3, [x1, #160] \n\t" " \n\t" " fmla v25.4s,v6.4s,v4.s[0] \n\t" // Accummulate. " fmla v27.4s,v6.4s,v4.s[1] \n\t" // Accummulate. " fmla v29.4s,v6.4s,v4.s[2] \n\t" // Accummulate. " fmla v31.4s,v6.4s,v4.s[3] \n\t" // Accummulate. " ldr q4, [x1, #176] \n\t" " add x1, x1, #192 \n\t" " add x0, x0, #128 \n\t" " \n\t" //End It 4 " sub x5,x5,1 \n\t" // i-=1. " cmp x5,1 \n\t" // Iterate again if we are not in k_iter == 1. " bne .SLOOPKITER \n\t" " \n\t" " .SLASTITER: \n\t" // Last iteration of k_iter loop. " \n\t" " \n\t" " ldr q5, [x0] \n\t" " fmla v8.4s,v0.4s,v2.s[0] \n\t" // Accummulate. " fmla v9.4s,v1.4s,v2.s[0] \n\t" // Accummulate. " ldr q6, [x0, #16] \n\t" " fmla v10.4s,v0.4s,v2.s[1] \n\t" // Accummulate. " fmla v11.4s,v1.4s,v2.s[1] \n\t" // Accummulate. " fmla v12.4s,v0.4s,v2.s[2] \n\t" // Accummulate. " fmla v13.4s,v1.4s,v2.s[2] \n\t" // Accummulate. " fmla v14.4s,v0.4s,v2.s[3] \n\t" // Accummulate. " fmla v15.4s,v1.4s,v2.s[3] \n\t" // Accummulate. " ldr q2, [x1] \n\t" " \n\t" " fmla v16.4s,v0.4s,v3.s[0] \n\t" // Accummulate. " fmla v17.4s,v1.4s,v3.s[0] \n\t" // Accummulate. " fmla v18.4s,v0.4s,v3.s[1] \n\t" // Accummulate. " fmla v19.4s,v1.4s,v3.s[1] \n\t" // Accummulate. " fmla v20.4s,v0.4s,v3.s[2] \n\t" // Accummulate. " fmla v21.4s,v1.4s,v3.s[2] \n\t" // Accummulate. " fmla v22.4s,v0.4s,v3.s[3] \n\t" // Accummulate. " fmla v23.4s,v1.4s,v3.s[3] \n\t" // Accummulate. " \n\t" " fmla v24.4s,v0.4s,v4.s[0] \n\t" // Accummulate. " fmla v26.4s,v0.4s,v4.s[1] \n\t" // Accummulate. " fmla v28.4s,v0.4s,v4.s[2] \n\t" // Accummulate. " fmla v30.4s,v0.4s,v4.s[3] \n\t" // Accummulate. " ldr q3, [x1, #16] \n\t" " \n\t" " fmla v25.4s,v1.4s,v4.s[0] \n\t" // Accummulate. " fmla v27.4s,v1.4s,v4.s[1] \n\t" // Accummulate. " fmla v29.4s,v1.4s,v4.s[2] \n\t" // Accummulate. " fmla v31.4s,v1.4s,v4.s[3] \n\t" // Accummulate. " ldr q4, [x1, #32] \n\t" " \n\t" //End It 1 " \n\t" " ldr q0, [x0, #32] \n\t" " fmla v8.4s,v5.4s,v2.s[0] \n\t" // Accummulate. " fmla v9.4s,v6.4s,v2.s[0] \n\t" // Accummulate. " ldr q1, [x0, #48] \n\t" " fmla v10.4s,v5.4s,v2.s[1] \n\t" // Accummulate. " fmla v11.4s,v6.4s,v2.s[1] \n\t" // Accummulate. " fmla v12.4s,v5.4s,v2.s[2] \n\t" // Accummulate. " fmla v13.4s,v6.4s,v2.s[2] \n\t" // Accummulate. " fmla v14.4s,v5.4s,v2.s[3] \n\t" // Accummulate. " fmla v15.4s,v6.4s,v2.s[3] \n\t" // Accummulate. " ldr q2, [x1, #48] \n\t" " \n\t" " fmla v16.4s,v5.4s,v3.s[0] \n\t" // Accummulate. " fmla v17.4s,v6.4s,v3.s[0] \n\t" // Accummulate. " fmla v18.4s,v5.4s,v3.s[1] \n\t" // Accummulate. " fmla v19.4s,v6.4s,v3.s[1] \n\t" // Accummulate. " fmla v20.4s,v5.4s,v3.s[2] \n\t" // Accummulate. " fmla v21.4s,v6.4s,v3.s[2] \n\t" // Accummulate. " fmla v22.4s,v5.4s,v3.s[3] \n\t" // Accummulate. " fmla v23.4s,v6.4s,v3.s[3] \n\t" // Accummulate. " \n\t" " fmla v24.4s,v5.4s,v4.s[0] \n\t" // Accummulate. " fmla v26.4s,v5.4s,v4.s[1] \n\t" // Accummulate. " fmla v28.4s,v5.4s,v4.s[2] \n\t" // Accummulate. " fmla v30.4s,v5.4s,v4.s[3] \n\t" // Accummulate. " ldr q3, [x1, #64] \n\t" " \n\t" " fmla v25.4s,v6.4s,v4.s[0] \n\t" // Accummulate. " fmla v27.4s,v6.4s,v4.s[1] \n\t" // Accummulate. " fmla v29.4s,v6.4s,v4.s[2] \n\t" // Accummulate. " fmla v31.4s,v6.4s,v4.s[3] \n\t" // Accummulate. " ldr q4, [x1, #80] \n\t" " \n\t" //End It 2 " \n\t" " ldr q5, [x0, #64] \n\t" " fmla v8.4s,v0.4s,v2.s[0] \n\t" // Accummulate. " fmla v9.4s,v1.4s,v2.s[0] \n\t" // Accummulate. " ldr q6, [x0, #80] \n\t" " fmla v10.4s,v0.4s,v2.s[1] \n\t" // Accummulate. " fmla v11.4s,v1.4s,v2.s[1] \n\t" // Accummulate. " fmla v12.4s,v0.4s,v2.s[2] \n\t" // Accummulate. " fmla v13.4s,v1.4s,v2.s[2] \n\t" // Accummulate. " fmla v14.4s,v0.4s,v2.s[3] \n\t" // Accummulate. " fmla v15.4s,v1.4s,v2.s[3] \n\t" // Accummulate. " ldr q2, [x1, #96] \n\t" " \n\t" " fmla v16.4s,v0.4s,v3.s[0] \n\t" // Accummulate. " fmla v17.4s,v1.4s,v3.s[0] \n\t" // Accummulate. " fmla v18.4s,v0.4s,v3.s[1] \n\t" // Accummulate. " fmla v19.4s,v1.4s,v3.s[1] \n\t" // Accummulate. " fmla v20.4s,v0.4s,v3.s[2] \n\t" // Accummulate. " fmla v21.4s,v1.4s,v3.s[2] \n\t" // Accummulate. " fmla v22.4s,v0.4s,v3.s[3] \n\t" // Accummulate. " fmla v23.4s,v1.4s,v3.s[3] \n\t" // Accummulate. " \n\t" " fmla v24.4s,v0.4s,v4.s[0] \n\t" // Accummulate. " fmla v26.4s,v0.4s,v4.s[1] \n\t" // Accummulate. " fmla v28.4s,v0.4s,v4.s[2] \n\t" // Accummulate. " fmla v30.4s,v0.4s,v4.s[3] \n\t" // Accummulate. " ldr q3, [x1, #112] \n\t" " \n\t" " fmla v25.4s,v1.4s,v4.s[0] \n\t" // Accummulate. " fmla v27.4s,v1.4s,v4.s[1] \n\t" // Accummulate. " fmla v29.4s,v1.4s,v4.s[2] \n\t" // Accummulate. " fmla v31.4s,v1.4s,v4.s[3] \n\t" // Accummulate. " ldr q4, [x1, #128] \n\t" " \n\t" //End It 3 " \n\t" " fmla v8.4s,v5.4s,v2.s[0] \n\t" // Accummulate. " fmla v9.4s,v6.4s,v2.s[0] \n\t" // Accummulate. " fmla v10.4s,v5.4s,v2.s[1] \n\t" // Accummulate. " fmla v11.4s,v6.4s,v2.s[1] \n\t" // Accummulate. " fmla v12.4s,v5.4s,v2.s[2] \n\t" // Accummulate. " fmla v13.4s,v6.4s,v2.s[2] \n\t" // Accummulate. " fmla v14.4s,v5.4s,v2.s[3] \n\t" // Accummulate. " fmla v15.4s,v6.4s,v2.s[3] \n\t" // Accummulate. " \n\t" " fmla v16.4s,v5.4s,v3.s[0] \n\t" // Accummulate. " fmla v17.4s,v6.4s,v3.s[0] \n\t" // Accummulate. " fmla v18.4s,v5.4s,v3.s[1] \n\t" // Accummulate. " fmla v19.4s,v6.4s,v3.s[1] \n\t" // Accummulate. " fmla v20.4s,v5.4s,v3.s[2] \n\t" // Accummulate. " fmla v21.4s,v6.4s,v3.s[2] \n\t" // Accummulate. " fmla v22.4s,v5.4s,v3.s[3] \n\t" // Accummulate. " fmla v23.4s,v6.4s,v3.s[3] \n\t" // Accummulate. " \n\t" " fmla v24.4s,v5.4s,v4.s[0] \n\t" // Accummulate. " fmla v26.4s,v5.4s,v4.s[1] \n\t" // Accummulate. " fmla v28.4s,v5.4s,v4.s[2] \n\t" // Accummulate. " fmla v30.4s,v5.4s,v4.s[3] \n\t" // Accummulate. " \n\t" " fmla v25.4s,v6.4s,v4.s[0] \n\t" // Accummulate. " fmla v27.4s,v6.4s,v4.s[1] \n\t" // Accummulate. " fmla v29.4s,v6.4s,v4.s[2] \n\t" // Accummulate. " fmla v31.4s,v6.4s,v4.s[3] \n\t" // Accummulate. " add x1, x1, #144 \n\t" " add x0, x0, #96 \n\t" " \n\t" //End It 4 " \n\t" " .SCONSIDERKLEFT: \n\t" " cmp x6,0 \n\t" // If k_left == 0, we are done. " beq .SPOSTACCUM \n\t" // else, we enter the k_left loop. " \n\t" " .SLOOPKLEFT: \n\t" // Body of the left iterations " \n\t" " ldr q0, [x0],#16 \n\t" " ldr q1, [x0],#16 \n\t" // Load a " \n\t" " ldr q2, [x1],#16 \n\t" // Load b " ldr q3, [x1],#16 \n\t" " ldr q4, [x1],#16 \n\t" " \n\t" " sub x6,x6,1 \n\t" // i = i-1. " \n\t" " fmla v8.4s,v0.4s,v2.s[0] \n\t" // Accummulate. " fmla v9.4s,v1.4s,v2.s[0] \n\t" // Accummulate. " fmla v10.4s,v0.4s,v2.s[1] \n\t" // Accummulate. " fmla v11.4s,v1.4s,v2.s[1] \n\t" // Accummulate. " fmla v12.4s,v0.4s,v2.s[2] \n\t" // Accummulate. " fmla v13.4s,v1.4s,v2.s[2] \n\t" // Accummulate. " fmla v14.4s,v0.4s,v2.s[3] \n\t" // Accummulate. " fmla v15.4s,v1.4s,v2.s[3] \n\t" // Accummulate. " \n\t" " fmla v16.4s,v0.4s,v3.s[0] \n\t" // Accummulate. " fmla v17.4s,v1.4s,v3.s[0] \n\t" // Accummulate. " fmla v18.4s,v0.4s,v3.s[1] \n\t" // Accummulate. " fmla v19.4s,v1.4s,v3.s[1] \n\t" // Accummulate. " fmla v20.4s,v0.4s,v3.s[2] \n\t" // Accummulate. " fmla v21.4s,v1.4s,v3.s[2] \n\t" // Accummulate. " fmla v22.4s,v0.4s,v3.s[3] \n\t" // Accummulate. " fmla v23.4s,v1.4s,v3.s[3] \n\t" // Accummulate. " \n\t" " fmla v24.4s,v0.4s,v4.s[0] \n\t" // Accummulate. " fmla v26.4s,v0.4s,v4.s[1] \n\t" // Accummulate. " fmla v28.4s,v0.4s,v4.s[2] \n\t" // Accummulate. " fmla v30.4s,v0.4s,v4.s[3] \n\t" // Accummulate. " fmla v25.4s,v1.4s,v4.s[0] \n\t" // Accummulate. " fmla v27.4s,v1.4s,v4.s[1] \n\t" // Accummulate. " fmla v29.4s,v1.4s,v4.s[2] \n\t" // Accummulate. " fmla v31.4s,v1.4s,v4.s[3] \n\t" // Accummulate. " \n\t" " cmp x6,0 \n\t" // Iterate again. " bne .SLOOPKLEFT \n\t" // if i!=0. " \n\t" " .SPOSTACCUM: \n\t" " \n\t" " ld1r {v6.4s},[x7] \n\t" // Load alpha. " ld1r {v7.4s},[x8] \n\t" // Load beta " \n\t" " cmp x13,#1 \n\t" // If rs_c != 1 (column-major) " bne .SGENSTORED \n\t" " \n\t" " .SCOLSTORED: \n\t" // C is column-major. " \n\t" " dup v0.4s, wzr \n\t" " dup v1.4s, wzr \n\t" " dup v2.4s, wzr \n\t" " dup v3.4s, wzr \n\t" " dup v4.4s, wzr \n\t" " dup v5.4s, wzr \n\t" " \n\t" " fcmp s7,#0.0 \n\t" " beq .SBETAZEROCOLSTOREDS1 \n\t" // Taking care of the beta==0 case. " \n\t" " ldr q0, [x2] \n\t" //Load column 0 of C " ldr q1, [x2, #16] \n\t" " ldr q2, [x16] \n\t" //Load column 1 of C " ldr q3, [x16, #16] \n\t" " ldr q4, [x17] \n\t" //Load column 2 of C " ldr q5, [x17, #16] \n\t" " \n\t" " fmul v0.4s,v0.4s,v7.s[0] \n\t" // Scale by beta " fmul v1.4s,v1.4s,v7.s[0] \n\t" // Scale by beta " fmul v2.4s,v2.4s,v7.s[0] \n\t" // Scale by beta " fmul v3.4s,v3.4s,v7.s[0] \n\t" // Scale by beta " fmul v4.4s,v4.4s,v7.s[0] \n\t" // Scale by beta " fmul v5.4s,v5.4s,v7.s[0] \n\t" // Scale by beta " \n\t" " .SBETAZEROCOLSTOREDS1: \n\t" " \n\t" " fmla v0.4s,v8.4s,v6.s[0] \n\t" // Scale by alpha " fmla v1.4s,v9.4s,v6.s[0] \n\t" // Scale by alpha " fmla v2.4s,v10.4s,v6.s[0] \n\t" // Scale by alpha " fmla v3.4s,v11.4s,v6.s[0] \n\t" // Scale by alpha " fmla v4.4s,v12.4s,v6.s[0] \n\t" // Scale by alpha " fmla v5.4s,v13.4s,v6.s[0] \n\t" // Scale by alpha " \n\t" " str q0, [x2] \n\t" //Store column 0 of C " str q1, [x2, #16] \n\t" " str q2, [x16] \n\t" //Store column 1 of C " str q3, [x16, #16] \n\t" " str q4, [x17] \n\t" //Store column 2 of C " str q5, [x17, #16] \n\t" " \n\t" " dup v8.4s, wzr \n\t" " dup v9.4s, wzr \n\t" " dup v10.4s, wzr \n\t" " dup v11.4s, wzr \n\t" " dup v12.4s, wzr \n\t" " dup v13.4s, wzr \n\t" " \n\t" " fcmp s7,#0.0 \n\t" " beq .SBETAZEROCOLSTOREDS2 \n\t" // Taking care of the beta==0 case. " \n\t" " ldr q8, [x18] \n\t" //Load column 3 of C " ldr q9, [x18, #16] \n\t" " ldr q10, [x19] \n\t" //Load column 4 of C " ldr q11, [x19, #16] \n\t" " ldr q12, [x20] \n\t" //Load column 5 of C " ldr q13, [x20, #16] \n\t" " \n\t" " fmul v8.4s, v8.4s, v7.s[0] \n\t" // Scale by beta " fmul v9.4s, v9.4s, v7.s[0] \n\t" // Scale by beta " fmul v10.4s,v10.4s,v7.s[0] \n\t" // Scale by beta " fmul v11.4s,v11.4s,v7.s[0] \n\t" // Scale by beta " fmul v12.4s,v12.4s,v7.s[0] \n\t" // Scale by beta " fmul v13.4s,v13.4s,v7.s[0] \n\t" // Scale by beta " \n\t" " .SBETAZEROCOLSTOREDS2: \n\t" " \n\t" " fmla v8.4s, v14.4s,v6.s[0] \n\t" // Scale by alpha " fmla v9.4s, v15.4s,v6.s[0] \n\t" // Scale by alpha " fmla v10.4s,v16.4s,v6.s[0] \n\t" // Scale by alpha " fmla v11.4s,v17.4s,v6.s[0] \n\t" // Scale by alpha " fmla v12.4s,v18.4s,v6.s[0] \n\t" // Scale by alpha " fmla v13.4s,v19.4s,v6.s[0] \n\t" // Scale by alpha " \n\t" " str q8, [x18] \n\t" //Store column 3 of C " str q9, [x18, #16] \n\t" " str q10, [x19] \n\t" //Store column 4 of C " str q11, [x19, #16] \n\t" " str q12, [x20] \n\t" //Store column 5 of C " str q13, [x20, #16] \n\t" " \n\t" " dup v0.4s, wzr \n\t" " dup v1.4s, wzr \n\t" " dup v2.4s, wzr \n\t" " dup v3.4s, wzr \n\t" " dup v4.4s, wzr \n\t" " dup v5.4s, wzr \n\t" " \n\t" " fcmp s7,#0.0 \n\t" " beq .SBETAZEROCOLSTOREDS3 \n\t" // Taking care of the beta==0 case. " \n\t" " ldr q0, [x21] \n\t" //Load column 6 of C " ldr q1, [x21, #16] \n\t" " ldr q2, [x22] \n\t" //Load column 7 of C " ldr q3, [x22, #16] \n\t" " ldr q4, [x23] \n\t" //Load column 8 of C " ldr q5, [x23, #16] \n\t" " \n\t" " fmul v0.4s,v0.4s,v7.s[0] \n\t" // Scale by beta " fmul v1.4s,v1.4s,v7.s[0] \n\t" // Scale by beta " fmul v2.4s,v2.4s,v7.s[0] \n\t" // Scale by beta " fmul v3.4s,v3.4s,v7.s[0] \n\t" // Scale by beta " fmul v4.4s,v4.4s,v7.s[0] \n\t" // Scale by beta " fmul v5.4s,v5.4s,v7.s[0] \n\t" // Scale by beta " \n\t" " .SBETAZEROCOLSTOREDS3: \n\t" " \n\t" " fmla v0.4s,v20.4s,v6.s[0] \n\t" // Scale by alpha " fmla v1.4s,v21.4s,v6.s[0] \n\t" // Scale by alpha " fmla v2.4s,v22.4s,v6.s[0] \n\t" // Scale by alpha " fmla v3.4s,v23.4s,v6.s[0] \n\t" // Scale by alpha " fmla v4.4s,v24.4s,v6.s[0] \n\t" // Scale by alpha " fmla v5.4s,v25.4s,v6.s[0] \n\t" // Scale by alpha " \n\t" " str q0, [x21] \n\t" //Store column 6 of C " str q1, [x21, #16] \n\t" " str q2, [x22] \n\t" //Store column 7 of C " str q3, [x22, #16] \n\t" " str q4, [x23] \n\t" //Store column 8 of C " str q5, [x23, #16] \n\t" " \n\t" " dup v8.4s, wzr \n\t" " dup v9.4s, wzr \n\t" " dup v10.4s, wzr \n\t" " dup v11.4s, wzr \n\t" " dup v12.4s, wzr \n\t" " dup v13.4s, wzr \n\t" " \n\t" " fcmp s7,#0.0 \n\t" " beq .SBETAZEROCOLSTOREDS4 \n\t" // Taking care of the beta==0 case. " \n\t" " ldr q8, [x24] \n\t" //Load column 9 of C " ldr q9, [x24, #16] \n\t" " ldr q10, [x25] \n\t" //Load column 10 of C " ldr q11, [x25, #16] \n\t" " ldr q12, [x26] \n\t" //Load column 11 of C " ldr q13, [x26, #16] \n\t" " \n\t" " fmul v8.4s, v8.4s, v7.s[0] \n\t" // Scale by beta " fmul v9.4s, v9.4s, v7.s[0] \n\t" // Scale by beta " fmul v10.4s,v10.4s,v7.s[0] \n\t" // Scale by beta " fmul v11.4s,v11.4s,v7.s[0] \n\t" // Scale by beta " fmul v12.4s,v12.4s,v7.s[0] \n\t" // Scale by beta " fmul v13.4s,v13.4s,v7.s[0] \n\t" // Scale by beta " \n\t" " .SBETAZEROCOLSTOREDS4: \n\t" " \n\t" " prfm pldl2keep,[x3] \n\t" " prfm pldl2keep,[x4] \n\t" " \n\t" " fmla v8.4s, v26.4s,v6.s[0] \n\t" // Scale by alpha " fmla v9.4s, v27.4s,v6.s[0] \n\t" // Scale by alpha " fmla v10.4s,v28.4s,v6.s[0] \n\t" // Scale by alpha " fmla v11.4s,v29.4s,v6.s[0] \n\t" // Scale by alpha " fmla v12.4s,v30.4s,v6.s[0] \n\t" // Scale by alpha " fmla v13.4s,v31.4s,v6.s[0] \n\t" // Scale by alpha " \n\t" " str q8, [x24] \n\t" //Store column 9 of C " str q9, [x24, #16] \n\t" " str q10, [x25] \n\t" //Store column 10 of C " str q11, [x25, #16] \n\t" " str q12, [x26] \n\t" //Store column 11 of C " str q13, [x26, #16] \n\t" " \n\t" " \n\t" " b .SEND \n\t" // Done (TODO: this obviously needs to be moved down to remove jump). " \n\t" " \n\t" " .SGENSTORED: \n\t" // C is general-stride stored. " \n\t" " \n\t" " dup v0.4s, wzr \n\t" " dup v1.4s, wzr \n\t" " dup v2.4s, wzr \n\t" " dup v3.4s, wzr \n\t" " dup v4.4s, wzr \n\t" " dup v5.4s, wzr \n\t" " \n\t" " fcmp s7,#0.0 \n\t" " beq .SBETAZEROGENSTOREDS1 \n\t" // Taking care of the beta==0 case. " \n\t" " mov x27, x2 \n\t" " \n\t" " ld1 {v0.s}[0],[x27],x14 \n\t" // Load c00 into quad and increment by rs_c. " ld1 {v0.s}[1],[x27],x14 \n\t" // Load c01 into quad and increment by rs_c. " ld1 {v0.s}[2],[x27],x14 \n\t" // Load c02 into quad and increment by rs_c. " ld1 {v0.s}[3],[x27],x14 \n\t" // Load c03 into quad and increment by rs_c. " ld1 {v1.s}[0],[x27],x14 \n\t" // Load c04 into quad and increment by rs_c. " ld1 {v1.s}[1],[x27],x14 \n\t" // Load c05 into quad and increment by rs_c. " ld1 {v1.s}[2],[x27],x14 \n\t" // Load c06 into quad and increment by rs_c. " ld1 {v1.s}[3],[x27],x14 \n\t" // Load c07 into quad and increment by rs_c. " \n\t" " mov x27, x16 \n\t" " \n\t" " ld1 {v2.s}[0],[x27],x14 \n\t" // Load c10 into quad and increment by rs_c. " ld1 {v2.s}[1],[x27],x14 \n\t" // Load c11 into quad and increment by rs_c. " ld1 {v2.s}[2],[x27],x14 \n\t" // Load c12 into quad and increment by rs_c. " ld1 {v2.s}[3],[x27],x14 \n\t" // Load c13 into quad and increment by rs_c. " ld1 {v3.s}[0],[x27],x14 \n\t" // Load c14 into quad and increment by rs_c. " ld1 {v3.s}[1],[x27],x14 \n\t" // Load c15 into quad and increment by rs_c. " ld1 {v3.s}[2],[x27],x14 \n\t" // Load c16 into quad and increment by rs_c. " ld1 {v3.s}[3],[x27],x14 \n\t" // Load c17 into quad and increment by rs_c. " \n\t" " mov x27, x17 \n\t" " \n\t" " ld1 {v4.s}[0],[x27],x14 \n\t" // Load c20 into quad and increment by rs_c. " ld1 {v4.s}[1],[x27],x14 \n\t" // Load c21 into quad and increment by rs_c. " ld1 {v4.s}[2],[x27],x14 \n\t" // Load c22 into quad and increment by rs_c. " ld1 {v4.s}[3],[x27],x14 \n\t" // Load c23 into quad and increment by rs_c. " ld1 {v5.s}[0],[x27],x14 \n\t" // Load c24 into quad and increment by rs_c. " ld1 {v5.s}[1],[x27],x14 \n\t" // Load c25 into quad and increment by rs_c. " ld1 {v5.s}[2],[x27],x14 \n\t" // Load c26 into quad and increment by rs_c. " ld1 {v5.s}[3],[x27],x14 \n\t" // Load c27 into quad and increment by rs_c. " \n\t" " fmul v0.4s,v0.4s,v7.s[0] \n\t" // Scale by beta " fmul v1.4s,v1.4s,v7.s[0] \n\t" // Scale by beta " fmul v2.4s,v2.4s,v7.s[0] \n\t" // Scale by beta " fmul v3.4s,v3.4s,v7.s[0] \n\t" // Scale by beta " fmul v4.4s,v4.4s,v7.s[0] \n\t" // Scale by beta " fmul v5.4s,v5.4s,v7.s[0] \n\t" // Scale by beta " \n\t" " .SBETAZEROGENSTOREDS1: \n\t" " \n\t" " fmla v0.4s, v8.4s,v6.s[0] \n\t" // Scale by alpha " fmla v1.4s, v9.4s,v6.s[0] \n\t" // Scale by alpha " fmla v2.4s,v10.4s,v6.s[0] \n\t" // Scale by alpha " fmla v3.4s,v11.4s,v6.s[0] \n\t" // Scale by alpha " fmla v4.4s,v12.4s,v6.s[0] \n\t" // Scale by alpha " fmla v5.4s,v13.4s,v6.s[0] \n\t" // Scale by alpha " \n\t" " mov x27, x2 \n\t" " \n\t" " st1 {v0.s}[0],[x27],x14 \n\t" // Store c00 into quad and increment by rs_c. " st1 {v0.s}[1],[x27],x14 \n\t" // Store c01 into quad and increment by rs_c. " st1 {v0.s}[2],[x27],x14 \n\t" // Store c02 into quad and increment by rs_c. " st1 {v0.s}[3],[x27],x14 \n\t" // Store c03 into quad and increment by rs_c. " st1 {v1.s}[0],[x27],x14 \n\t" // Store c04 into quad and increment by rs_c. " st1 {v1.s}[1],[x27],x14 \n\t" // Store c05 into quad and increment by rs_c. " st1 {v1.s}[2],[x27],x14 \n\t" // Store c06 into quad and increment by rs_c. " st1 {v1.s}[3],[x27],x14 \n\t" // Store c07 into quad and increment by rs_c. " \n\t" " mov x27, x16 \n\t" " \n\t" " st1 {v2.s}[0],[x27],x14 \n\t" // Store c10 into quad and increment by rs_c. " st1 {v2.s}[1],[x27],x14 \n\t" // Store c11 into quad and increment by rs_c. " st1 {v2.s}[2],[x27],x14 \n\t" // Store c12 into quad and increment by rs_c. " st1 {v2.s}[3],[x27],x14 \n\t" // Store c13 into quad and increment by rs_c. " st1 {v3.s}[0],[x27],x14 \n\t" // Store c14 into quad and increment by rs_c. " st1 {v3.s}[1],[x27],x14 \n\t" // Store c15 into quad and increment by rs_c. " st1 {v3.s}[2],[x27],x14 \n\t" // Store c16 into quad and increment by rs_c. " st1 {v3.s}[3],[x27],x14 \n\t" // Store c17 into quad and increment by rs_c. " \n\t" " mov x27, x17 \n\t" " \n\t" " st1 {v4.s}[0],[x27],x14 \n\t" // Store c20 into quad and increment by rs_c. " st1 {v4.s}[1],[x27],x14 \n\t" // Store c21 into quad and increment by rs_c. " st1 {v4.s}[2],[x27],x14 \n\t" // Store c22 into quad and increment by rs_c. " st1 {v4.s}[3],[x27],x14 \n\t" // Store c23 into quad and increment by rs_c. " st1 {v5.s}[0],[x27],x14 \n\t" // Store c24 into quad and increment by rs_c. " st1 {v5.s}[1],[x27],x14 \n\t" // Store c25 into quad and increment by rs_c. " st1 {v5.s}[2],[x27],x14 \n\t" // Store c26 into quad and increment by rs_c. " st1 {v5.s}[3],[x27],x14 \n\t" // Store c27 into quad and increment by rs_c. " \n\t" " dup v8.4s, wzr \n\t" " dup v9.4s, wzr \n\t" " dup v10.4s, wzr \n\t" " dup v11.4s, wzr \n\t" " dup v12.4s, wzr \n\t" " dup v13.4s, wzr \n\t" " \n\t" " fcmp s7,#0.0 \n\t" " beq .SBETAZEROGENSTOREDS2 \n\t" // Taking care of the beta==0 case. " \n\t" " mov x27, x18 \n\t" " \n\t" " ld1 {v8.s}[0],[x27],x14 \n\t" // Load c30 into quad and increment by rs_c. " ld1 {v8.s}[1],[x27],x14 \n\t" // Load c31 into quad and increment by rs_c. " ld1 {v8.s}[2],[x27],x14 \n\t" // Load c32 into quad and increment by rs_c. " ld1 {v8.s}[3],[x27],x14 \n\t" // Load c33 into quad and increment by rs_c. " ld1 {v9.s}[0],[x27],x14 \n\t" // Load c34 into quad and increment by rs_c. " ld1 {v9.s}[1],[x27],x14 \n\t" // Load c35 into quad and increment by rs_c. " ld1 {v9.s}[2],[x27],x14 \n\t" // Load c36 into quad and increment by rs_c. " ld1 {v9.s}[3],[x27],x14 \n\t" // Load c37 into quad and increment by rs_c. " \n\t" " mov x27, x19 \n\t" " \n\t" " ld1 {v10.s}[0],[x27],x14 \n\t" // Load c40 into quad and increment by rs_c. " ld1 {v10.s}[1],[x27],x14 \n\t" // Load c41 into quad and increment by rs_c. " ld1 {v10.s}[2],[x27],x14 \n\t" // Load c42 into quad and increment by rs_c. " ld1 {v10.s}[3],[x27],x14 \n\t" // Load c43 into quad and increment by rs_c. " ld1 {v11.s}[0],[x27],x14 \n\t" // Load c44 into quad and increment by rs_c. " ld1 {v11.s}[1],[x27],x14 \n\t" // Load c45 into quad and increment by rs_c. " ld1 {v11.s}[2],[x27],x14 \n\t" // Load c46 into quad and increment by rs_c. " ld1 {v11.s}[3],[x27],x14 \n\t" // Load c47 into quad and increment by rs_c. " \n\t" " mov x27, x20 \n\t" " \n\t" " ld1 {v12.s}[0],[x27],x14 \n\t" // Load c50 into quad and increment by rs_c. " ld1 {v12.s}[1],[x27],x14 \n\t" // Load c51 into quad and increment by rs_c. " ld1 {v12.s}[2],[x27],x14 \n\t" // Load c52 into quad and increment by rs_c. " ld1 {v12.s}[3],[x27],x14 \n\t" // Load c53 into quad and increment by rs_c. " ld1 {v13.s}[0],[x27],x14 \n\t" // Load c54 into quad and increment by rs_c. " ld1 {v13.s}[1],[x27],x14 \n\t" // Load c55 into quad and increment by rs_c. " ld1 {v13.s}[2],[x27],x14 \n\t" // Load c56 into quad and increment by rs_c. " ld1 {v13.s}[3],[x27],x14 \n\t" // Load c57 into quad and increment by rs_c. " \n\t" " fmul v8.4s, v8.4s, v7.s[0] \n\t" // Scale by beta " fmul v9.4s, v9.4s, v7.s[0] \n\t" // Scale by beta " fmul v10.4s,v10.4s,v7.s[0] \n\t" // Scale by beta " fmul v11.4s,v11.4s,v7.s[0] \n\t" // Scale by beta " fmul v12.4s,v12.4s,v7.s[0] \n\t" // Scale by beta " fmul v13.4s,v13.4s,v7.s[0] \n\t" // Scale by beta " \n\t" " .SBETAZEROGENSTOREDS2: \n\t" " \n\t" " fmla v8.4s, v14.4s,v6.s[0] \n\t" // Scale by alpha " fmla v9.4s, v15.4s,v6.s[0] \n\t" // Scale by alpha " fmla v10.4s,v16.4s,v6.s[0] \n\t" // Scale by alpha " fmla v11.4s,v17.4s,v6.s[0] \n\t" // Scale by alpha " fmla v12.4s,v18.4s,v6.s[0] \n\t" // Scale by alpha " fmla v13.4s,v19.4s,v6.s[0] \n\t" // Scale by alpha " \n\t" " mov x27, x18 \n\t" " \n\t" " st1 {v8.s}[0],[x27],x14 \n\t" // Store c30 into quad and increment by rs_c. " st1 {v8.s}[1],[x27],x14 \n\t" // Store c31 into quad and increment by rs_c. " st1 {v8.s}[2],[x27],x14 \n\t" // Store c32 into quad and increment by rs_c. " st1 {v8.s}[3],[x27],x14 \n\t" // Store c33 into quad and increment by rs_c. " st1 {v9.s}[0],[x27],x14 \n\t" // Store c34 into quad and increment by rs_c. " st1 {v9.s}[1],[x27],x14 \n\t" // Store c35 into quad and increment by rs_c. " st1 {v9.s}[2],[x27],x14 \n\t" // Store c36 into quad and increment by rs_c. " st1 {v9.s}[3],[x27],x14 \n\t" // Store c37 into quad and increment by rs_c. " \n\t" " mov x27, x19 \n\t" " \n\t" " st1 {v10.s}[0],[x27],x14 \n\t" // Store c40 into quad and increment by rs_c. " st1 {v10.s}[1],[x27],x14 \n\t" // Store c41 into quad and increment by rs_c. " st1 {v10.s}[2],[x27],x14 \n\t" // Store c42 into quad and increment by rs_c. " st1 {v10.s}[3],[x27],x14 \n\t" // Store c43 into quad and increment by rs_c. " st1 {v11.s}[0],[x27],x14 \n\t" // Store c44 into quad and increment by rs_c. " st1 {v11.s}[1],[x27],x14 \n\t" // Store c45 into quad and increment by rs_c. " st1 {v11.s}[2],[x27],x14 \n\t" // Store c46 into quad and increment by rs_c. " st1 {v11.s}[3],[x27],x14 \n\t" // Store c47 into quad and increment by rs_c. " \n\t" " mov x27, x20 \n\t" " \n\t" " st1 {v12.s}[0],[x27],x14 \n\t" // Store c50 into quad and increment by rs_c. " st1 {v12.s}[1],[x27],x14 \n\t" // Store c51 into quad and increment by rs_c. " st1 {v12.s}[2],[x27],x14 \n\t" // Store c52 into quad and increment by rs_c. " st1 {v12.s}[3],[x27],x14 \n\t" // Store c53 into quad and increment by rs_c. " st1 {v13.s}[0],[x27],x14 \n\t" // Store c54 into quad and increment by rs_c. " st1 {v13.s}[1],[x27],x14 \n\t" // Store c55 into quad and increment by rs_c. " st1 {v13.s}[2],[x27],x14 \n\t" // Store c56 into quad and increment by rs_c. " st1 {v13.s}[3],[x27],x14 \n\t" // Store c57 into quad and increment by rs_c. " \n\t" " dup v0.4s, wzr \n\t" " dup v1.4s, wzr \n\t" " dup v2.4s, wzr \n\t" " dup v3.4s, wzr \n\t" " dup v4.4s, wzr \n\t" " dup v5.4s, wzr \n\t" " \n\t" " fcmp s7,#0.0 \n\t" " beq .SBETAZEROGENSTOREDS3 \n\t" // Taking care of the beta==0 case. " \n\t" " mov x27, x21 \n\t" " \n\t" " ld1 {v0.s}[0],[x27],x14 \n\t" // Load c60 into quad and increment by rs_c. " ld1 {v0.s}[1],[x27],x14 \n\t" // Load c61 into quad and increment by rs_c. " ld1 {v0.s}[2],[x27],x14 \n\t" // Load c62 into quad and increment by rs_c. " ld1 {v0.s}[3],[x27],x14 \n\t" // Load c63 into quad and increment by rs_c. " ld1 {v1.s}[0],[x27],x14 \n\t" // Load c64 into quad and increment by rs_c. " ld1 {v1.s}[1],[x27],x14 \n\t" // Load c65 into quad and increment by rs_c. " ld1 {v1.s}[2],[x27],x14 \n\t" // Load c66 into quad and increment by rs_c. " ld1 {v1.s}[3],[x27],x14 \n\t" // Load c67 into quad and increment by rs_c. " \n\t" " mov x27, x22 \n\t" " \n\t" " ld1 {v2.s}[0],[x27],x14 \n\t" // Load c70 into quad and increment by rs_c. " ld1 {v2.s}[1],[x27],x14 \n\t" // Load c71 into quad and increment by rs_c. " ld1 {v2.s}[2],[x27],x14 \n\t" // Load c72 into quad and increment by rs_c. " ld1 {v2.s}[3],[x27],x14 \n\t" // Load c73 into quad and increment by rs_c. " ld1 {v3.s}[0],[x27],x14 \n\t" // Load c74 into quad and increment by rs_c. " ld1 {v3.s}[1],[x27],x14 \n\t" // Load c75 into quad and increment by rs_c. " ld1 {v3.s}[2],[x27],x14 \n\t" // Load c76 into quad and increment by rs_c. " ld1 {v3.s}[3],[x27],x14 \n\t" // Load c77 into quad and increment by rs_c. " \n\t" " mov x27, x23 \n\t" " \n\t" " ld1 {v4.s}[0],[x27],x14 \n\t" // Load c80 into quad and increment by rs_c. " ld1 {v4.s}[1],[x27],x14 \n\t" // Load c81 into quad and increment by rs_c. " ld1 {v4.s}[2],[x27],x14 \n\t" // Load c82 into quad and increment by rs_c. " ld1 {v4.s}[3],[x27],x14 \n\t" // Load c83 into quad and increment by rs_c. " ld1 {v5.s}[0],[x27],x14 \n\t" // Load c84 into quad and increment by rs_c. " ld1 {v5.s}[1],[x27],x14 \n\t" // Load c85 into quad and increment by rs_c. " ld1 {v5.s}[2],[x27],x14 \n\t" // Load c86 into quad and increment by rs_c. " ld1 {v5.s}[3],[x27],x14 \n\t" // Load c87 into quad and increment by rs_c. " \n\t" " fmul v0.4s,v0.4s,v7.s[0] \n\t" // Scale by beta " fmul v1.4s,v1.4s,v7.s[0] \n\t" // Scale by beta " fmul v2.4s,v2.4s,v7.s[0] \n\t" // Scale by beta " fmul v3.4s,v3.4s,v7.s[0] \n\t" // Scale by beta " fmul v4.4s,v4.4s,v7.s[0] \n\t" // Scale by beta " fmul v5.4s,v5.4s,v7.s[0] \n\t" // Scale by beta " \n\t" " .SBETAZEROGENSTOREDS3: \n\t" " \n\t" " fmla v0.4s,v20.4s,v6.s[0] \n\t" // Scale by alpha " fmla v1.4s,v21.4s,v6.s[0] \n\t" // Scale by alpha " fmla v2.4s,v22.4s,v6.s[0] \n\t" // Scale by alpha " fmla v3.4s,v23.4s,v6.s[0] \n\t" // Scale by alpha " fmla v4.4s,v24.4s,v6.s[0] \n\t" // Scale by alpha " fmla v5.4s,v25.4s,v6.s[0] \n\t" // Scale by alpha " \n\t" " mov x27, x21 \n\t" " \n\t" " st1 {v0.s}[0],[x27],x14 \n\t" // Store c60 into quad and increment by rs_c. " st1 {v0.s}[1],[x27],x14 \n\t" // Store c61 into quad and increment by rs_c. " st1 {v0.s}[2],[x27],x14 \n\t" // Store c62 into quad and increment by rs_c. " st1 {v0.s}[3],[x27],x14 \n\t" // Store c63 into quad and increment by rs_c. " st1 {v1.s}[0],[x27],x14 \n\t" // Store c64 into quad and increment by rs_c. " st1 {v1.s}[1],[x27],x14 \n\t" // Store c65 into quad and increment by rs_c. " st1 {v1.s}[2],[x27],x14 \n\t" // Store c66 into quad and increment by rs_c. " st1 {v1.s}[3],[x27],x14 \n\t" // Store c67 into quad and increment by rs_c. " \n\t" " mov x27, x22 \n\t" " \n\t" " st1 {v2.s}[0],[x27],x14 \n\t" // Store c70 into quad and increment by rs_c. " st1 {v2.s}[1],[x27],x14 \n\t" // Store c71 into quad and increment by rs_c. " st1 {v2.s}[2],[x27],x14 \n\t" // Store c72 into quad and increment by rs_c. " st1 {v2.s}[3],[x27],x14 \n\t" // Store c73 into quad and increment by rs_c. " st1 {v3.s}[0],[x27],x14 \n\t" // Store c74 into quad and increment by rs_c. " st1 {v3.s}[1],[x27],x14 \n\t" // Store c75 into quad and increment by rs_c. " st1 {v3.s}[2],[x27],x14 \n\t" // Store c76 into quad and increment by rs_c. " st1 {v3.s}[3],[x27],x14 \n\t" // Store c77 into quad and increment by rs_c. " \n\t" " mov x27, x23 \n\t" " \n\t" " st1 {v4.s}[0],[x27],x14 \n\t" // Store c80 into quad and increment by rs_c. " st1 {v4.s}[1],[x27],x14 \n\t" // Store c81 into quad and increment by rs_c. " st1 {v4.s}[2],[x27],x14 \n\t" // Store c82 into quad and increment by rs_c. " st1 {v4.s}[3],[x27],x14 \n\t" // Store c83 into quad and increment by rs_c. " st1 {v5.s}[0],[x27],x14 \n\t" // Store c84 into quad and increment by rs_c. " st1 {v5.s}[1],[x27],x14 \n\t" // Store c85 into quad and increment by rs_c. " st1 {v5.s}[2],[x27],x14 \n\t" // Store c86 into quad and increment by rs_c. " st1 {v5.s}[3],[x27],x14 \n\t" // Store c87 into quad and increment by rs_c. " \n\t" " dup v8.4s, wzr \n\t" " dup v9.4s, wzr \n\t" " dup v10.4s, wzr \n\t" " dup v11.4s, wzr \n\t" " dup v12.4s, wzr \n\t" " dup v13.4s, wzr \n\t" " \n\t" " fcmp s7,#0.0 \n\t" " beq .SBETAZEROGENSTOREDS4 \n\t" // Taking care of the beta==0 case. " \n\t" " mov x27, x24 \n\t" " \n\t" " ld1 {v8.s}[0],[x27],x14 \n\t" // Load c90 into quad and increment by rs_c. " ld1 {v8.s}[1],[x27],x14 \n\t" // Load c91 into quad and increment by rs_c. " ld1 {v8.s}[2],[x27],x14 \n\t" // Load c92 into quad and increment by rs_c. " ld1 {v8.s}[3],[x27],x14 \n\t" // Load c93 into quad and increment by rs_c. " ld1 {v9.s}[0],[x27],x14 \n\t" // Load c94 into quad and increment by rs_c. " ld1 {v9.s}[1],[x27],x14 \n\t" // Load c95 into quad and increment by rs_c. " ld1 {v9.s}[2],[x27],x14 \n\t" // Load c96 into quad and increment by rs_c. " ld1 {v9.s}[3],[x27],x14 \n\t" // Load c97 into quad and increment by rs_c. " \n\t" " mov x27, x25 \n\t" " \n\t" " ld1 {v10.s}[0],[x27],x14 \n\t" // Load c100 into quad and increment by rs_c. " ld1 {v10.s}[1],[x27],x14 \n\t" // Load c101 into quad and increment by rs_c. " ld1 {v10.s}[2],[x27],x14 \n\t" // Load c102 into quad and increment by rs_c. " ld1 {v10.s}[3],[x27],x14 \n\t" // Load c103 into quad and increment by rs_c. " ld1 {v11.s}[0],[x27],x14 \n\t" // Load c104 into quad and increment by rs_c. " ld1 {v11.s}[1],[x27],x14 \n\t" // Load c105 into quad and increment by rs_c. " ld1 {v11.s}[2],[x27],x14 \n\t" // Load c106 into quad and increment by rs_c. " ld1 {v11.s}[3],[x27],x14 \n\t" // Load c107 into quad and increment by rs_c. " \n\t" " mov x27, x26 \n\t" " \n\t" " ld1 {v12.s}[0],[x27],x14 \n\t" // Load c110 into quad and increment by rs_c. " ld1 {v12.s}[1],[x27],x14 \n\t" // Load c111 into quad and increment by rs_c. " ld1 {v12.s}[2],[x27],x14 \n\t" // Load c112 into quad and increment by rs_c. " ld1 {v12.s}[3],[x27],x14 \n\t" // Load c113 into quad and increment by rs_c. " ld1 {v13.s}[0],[x27],x14 \n\t" // Load c114 into quad and increment by rs_c. " ld1 {v13.s}[1],[x27],x14 \n\t" // Load c115 into quad and increment by rs_c. " ld1 {v13.s}[2],[x27],x14 \n\t" // Load c116 into quad and increment by rs_c. " ld1 {v13.s}[3],[x27],x14 \n\t" // Load c117 into quad and increment by rs_c. " \n\t" " fmul v8.4s, v8.4s, v7.s[0] \n\t" // Scale by beta " fmul v9.4s, v9.4s, v7.s[0] \n\t" // Scale by beta " fmul v10.4s,v10.4s,v7.s[0] \n\t" // Scale by beta " fmul v11.4s,v11.4s,v7.s[0] \n\t" // Scale by beta " fmul v12.4s,v12.4s,v7.s[0] \n\t" // Scale by beta " fmul v13.4s,v13.4s,v7.s[0] \n\t" // Scale by beta " \n\t" " .SBETAZEROGENSTOREDS4: \n\t" " \n\t" " prfm pldl2keep,[x3] \n\t" " prfm pldl2keep,[x4] \n\t" " \n\t" " fmla v8.4s, v26.4s,v6.s[0] \n\t" // Scale by alpha " fmla v9.4s, v27.4s,v6.s[0] \n\t" // Scale by alpha " fmla v10.4s,v28.4s,v6.s[0] \n\t" // Scale by alpha " fmla v11.4s,v29.4s,v6.s[0] \n\t" // Scale by alpha " fmla v12.4s,v30.4s,v6.s[0] \n\t" // Scale by alpha " fmla v13.4s,v31.4s,v6.s[0] \n\t" // Scale by alpha " \n\t" " mov x27, x24 \n\t" " \n\t" " st1 {v8.s}[0],[x27],x14 \n\t" // Store c90 into quad and increment by rs_c. " st1 {v8.s}[1],[x27],x14 \n\t" // Store c91 into quad and increment by rs_c. " st1 {v8.s}[2],[x27],x14 \n\t" // Store c92 into quad and increment by rs_c. " st1 {v8.s}[3],[x27],x14 \n\t" // Store c93 into quad and increment by rs_c. " st1 {v9.s}[0],[x27],x14 \n\t" // Store c94 into quad and increment by rs_c. " st1 {v9.s}[1],[x27],x14 \n\t" // Store c95 into quad and increment by rs_c. " st1 {v9.s}[2],[x27],x14 \n\t" // Store c96 into quad and increment by rs_c. " st1 {v9.s}[3],[x27],x14 \n\t" // Store c97 into quad and increment by rs_c. " \n\t" " mov x27, x25 \n\t" " \n\t" " st1 {v10.s}[0],[x27],x14 \n\t" // Store c100 into quad and increment by rs_c. " st1 {v10.s}[1],[x27],x14 \n\t" // Store c101 into quad and increment by rs_c. " st1 {v10.s}[2],[x27],x14 \n\t" // Store c102 into quad and increment by rs_c. " st1 {v10.s}[3],[x27],x14 \n\t" // Store c103 into quad and increment by rs_c. " st1 {v11.s}[0],[x27],x14 \n\t" // Store c104 into quad and increment by rs_c. " st1 {v11.s}[1],[x27],x14 \n\t" // Store c105 into quad and increment by rs_c. " st1 {v11.s}[2],[x27],x14 \n\t" // Store c106 into quad and increment by rs_c. " st1 {v11.s}[3],[x27],x14 \n\t" // Store c107 into quad and increment by rs_c. " \n\t" " mov x27, x26 \n\t" " \n\t" " st1 {v12.s}[0],[x27],x14 \n\t" // Store c110 into quad and increment by rs_c. " st1 {v12.s}[1],[x27],x14 \n\t" // Store c111 into quad and increment by rs_c. " st1 {v12.s}[2],[x27],x14 \n\t" // Store c112 into quad and increment by rs_c. " st1 {v12.s}[3],[x27],x14 \n\t" // Store c113 into quad and increment by rs_c. " st1 {v13.s}[0],[x27],x14 \n\t" // Store c114 into quad and increment by rs_c. " st1 {v13.s}[1],[x27],x14 \n\t" // Store c115 into quad and increment by rs_c. " st1 {v13.s}[2],[x27],x14 \n\t" // Store c116 into quad and increment by rs_c. " st1 {v13.s}[3],[x27],x14 \n\t" // Store c147 into quad and increment by rs_c. " \n\t" " .SEND: \n\t" // Done! " \n\t" :// output operands (none) :// input operands [aaddr] "m" (a), // 0 [baddr] "m" (b), // 1 [caddr] "m" (c), // 2 [k_iter] "m" (k_iter), // 3 [k_left] "m" (k_left), // 4 [alpha] "m" (alpha), // 5 [beta] "m" (beta), // 6 [rs_c] "m" (rs_c), // 7 [cs_c] "m" (cs_c), // 8 [a_next] "m" (a_next), // 9 [b_next] "m" (b_next) // 10 :// Register clobber list "x0", "x1", "x2","x3","x4", "x5", "x6", "x7", "x8", "x9", "x10","x11","x12", "x13","x14","x15", "x16","x17","x18","x19", "x20","x21","x22","x23", "x24","x25","x26","x27", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10","v11", "v12","v13","v14","v15", "v16","v17","v18","v19", "v20","v21","v22","v23", "v24","v25","v26","v27", "v28","v29","v30","v31" ); } /* o 4x4 Double precision micro-kernel NOT fully functional yet. o Runnable on ARMv8, compiled with aarch64 GCC. o Use it together with the armv8 BLIS configuration. o Tested on Juno board. Around 3 GFLOPS @ 1.1 GHz. December 2014. * UPDATE OCTOBER 2015: Now is fully functional. * Tested on Juno board. Around 5.6 GFLOPS, 2 A57 cores @ 1.1 GHz. * Tested on Juno board. Around 4 GFLOPS, 4 A53 cores @ 850 MHz. * UPDATE NOVEMBER 2015 * Micro-kernel changed to 6x8 * Tested on Juno Board. Around 4 GFLOPS, 1 x A57 core @ 1.1 GHz. * Tested on Juno Board. Around 7.6 GFLOPS, 2 x A57 cores @ 1.1 GHz. * Tested on Juno board. Around 1.5 GFLOPS, 1 x A53 core @ 850 MHz. * Tested on Juno board. Around 5.5 GFLOPS, 4 x A53 cores @ 850 MHz. */ void bli_dgemm_armv8a_asm_6x8 ( dim_t k0, double* restrict alpha, double* restrict a, double* restrict b, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { void* a_next = bli_auxinfo_next_a( data ); void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; __asm__ volatile ( " \n\t" " ldr x0,%[aaddr] \n\t" // Load address of A " ldr x1,%[baddr] \n\t" // Load address of B " ldr x2,%[caddr] \n\t" // Load address of C " \n\t" " ldr x3,%[a_next] \n\t" // Move pointer " ldr x4,%[b_next] \n\t" // Move pointer " \n\t" " ldr x5,%[k_iter] \n\t" // Init guard (k_iter) " ldr x6,%[k_left] \n\t" // Init guard (k_iter) " \n\t" " ldr x7,%[alpha] \n\t" // Alpha address " ldr x8,%[beta] \n\t" // Beta address " \n\t" " ldr x9,%[cs_c] \n\t" // Load cs_c " lsl x10,x9,#3 \n\t" // cs_c * sizeof(double) " \n\t" " ldr x13,%[rs_c] \n\t" // Load rs_c. " lsl x14,x13,#3 \n\t" // rs_c * sizeof(double). " \n\t" " add x20,x2,x10 \n\t" //Load address Column 1 of C " add x21,x20,x10 \n\t" //Load address Column 2 of C " add x22,x21,x10 \n\t" //Load address Column 3 of C " add x23,x22,x10 \n\t" //Load address Column 4 of C " add x24,x23,x10 \n\t" //Load address Column 5 of C " add x25,x24,x10 \n\t" //Load address Column 6 of C " add x26,x25,x10 \n\t" //Load address Column 7 of C " \n\t" " prfm pldl1keep,[x2] \n\t" // Prefetch c. " prfm pldl1keep,[x20] \n\t" // Prefetch c. " prfm pldl1keep,[x21] \n\t" // Prefetch c. " prfm pldl1keep,[x22] \n\t" // Prefetch c. " prfm pldl1keep,[x23] \n\t" // Prefetch c. " prfm pldl1keep,[x24] \n\t" // Prefetch c. " prfm pldl1keep,[x25] \n\t" // Prefetch c. " prfm pldl1keep,[x26] \n\t" // Prefetch c. " \n\t" " ldr q0, [x0] \n\t" " ldr q1, [x0, #16] \n\t" // Load a " ldr q2, [x0, #32] \n\t" " \n\t" " ldr q3, [x1] \n\t" // Load b " ldr q4, [x1, #16] \n\t" " ldr q5, [x1, #32] \n\t" " ldr q6, [x1, #48] \n\t" " \n\t" " dup v8.2d, xzr \n\t" // Vector for accummulating column 0 " prfm PLDL1KEEP, [x1, #256] \n\t" " dup v9.2d, xzr \n\t" // Vector for accummulating column 0 " prfm PLDL1KEEP, [x1, #320] \n\t" " dup v10.2d, xzr \n\t" // Vector for accummulating column 0 " prfm PLDL1KEEP, [x1, #384] \n\t" " dup v11.2d, xzr \n\t" // Vector for accummulating column 1 " prfm PLDL1KEEP, [x1, #448] \n\t" " dup v12.2d, xzr \n\t" // Vector for accummulating column 1 " dup v13.2d, xzr \n\t" // Vector for accummulating column 1 " \n\t" " dup v14.2d, xzr \n\t" // Vector for accummulating column 2 " prfm PLDL1KEEP, [x0, #192] \n\t" " dup v15.2d, xzr \n\t" // Vector for accummulating column 2 " prfm PLDL1KEEP, [x0, #256] \n\t" " dup v16.2d, xzr \n\t" // Vector for accummulating column 2 " prfm PLDL1KEEP, [x0, #320] \n\t" " dup v17.2d, xzr \n\t" // Vector for accummulating column 3 " dup v18.2d, xzr \n\t" // Vector for accummulating column 3 " dup v19.2d, xzr \n\t" // Vector for accummulating column 3 " \n\t" " dup v20.2d, xzr \n\t" // Vector for accummulating column 4 " dup v21.2d, xzr \n\t" // Vector for accummulating column 4 " dup v22.2d, xzr \n\t" // Vector for accummulating column 4 " dup v23.2d, xzr \n\t" // Vector for accummulating column 5 " dup v24.2d, xzr \n\t" // Vector for accummulating column 5 " dup v25.2d, xzr \n\t" // Vector for accummulating column 5 " \n\t" " dup v26.2d, xzr \n\t" // Vector for accummulating column 6 " dup v27.2d, xzr \n\t" // Vector for accummulating column 6 " dup v28.2d, xzr \n\t" // Vector for accummulating column 6 " dup v29.2d, xzr \n\t" // Vector for accummulating column 7 " dup v30.2d, xzr \n\t" // Vector for accummulating column 7 " dup v31.2d, xzr \n\t" // Vector for accummulating column 7 " \n\t" " \n\t" " cmp x5,#0 \n\t" // If k_iter == 0, jump to k_left. " beq .DCONSIDERKLEFT \n\t" " \n\t" "add x0, x0, #48 \n\t" //update address of A "add x1, x1, #64 \n\t" //update address of B " \n\t" " cmp x5,1 \n\t" // If there is just one k_iter, jump to that one. " beq .DLASTITER \n\t" // (as loop is do-while-like). " \n\t" " DLOOP: \n\t" // Body " \n\t" " fmla v8.2d ,v0.2d,v3.d[0] \n\t" // Accummulate " prfm PLDL1KEEP, [x1, #448] \n\t" //512-64=448 " fmla v9.2d ,v1.2d,v3.d[0] \n\t" // Accummulate " prfm PLDL1KEEP, [x1, #512] \n\t" " fmla v10.2d,v2.2d,v3.d[0] \n\t" // Accummulate " prfm PLDL1KEEP, [x1, #576] \n\t" " \n\t" " fmla v11.2d,v0.2d,v3.d[1] \n\t" // Accummulate " fmla v12.2d,v1.2d,v3.d[1] \n\t" // Accummulate " fmla v13.2d,v2.2d,v3.d[1] \n\t" // Accummulate " \n\t" " fmla v14.2d,v0.2d,v4.d[0] \n\t" // Accummulate " fmla v15.2d,v1.2d,v4.d[0] \n\t" // Accummulate " fmla v16.2d,v2.2d,v4.d[0] \n\t" // Accummulate " ldr q3, [x1] \n\t" " \n\t" " fmla v17.2d,v0.2d,v4.d[1] \n\t" // Accummulate " fmla v18.2d,v1.2d,v4.d[1] \n\t" // Accummulate " fmla v19.2d,v2.2d,v4.d[1] \n\t" // Accummulate " ldr q7, [x0, #32] \n\t" " \n\t" " fmla v20.2d,v0.2d,v5.d[0] \n\t" // Accummulate " fmla v21.2d,v1.2d,v5.d[0] \n\t" // Accummulate " fmla v22.2d,v2.2d,v5.d[0] \n\t" // Accummulate " ldr q4, [x1, #16] \n\t" " \n\t" " fmla v23.2d,v0.2d,v5.d[1] \n\t" // Accummulate " fmla v24.2d,v1.2d,v5.d[1] \n\t" // Accummulate " fmla v25.2d,v2.2d,v5.d[1] \n\t" // Accummulate " ldr q5, [x1, #32] \n\t" " \n\t" " fmla v26.2d,v0.2d,v6.d[0] \n\t" // Accummulate " fmla v29.2d,v0.2d,v6.d[1] \n\t" // Accummulate " ldr q0, [x0] \n\t" " \n\t" " fmla v27.2d,v1.2d,v6.d[0] \n\t" // Accummulate " fmla v30.2d,v1.2d,v6.d[1] \n\t" // Accummulate " ldr q1, [x0, #16] \n\t" " \n\t" " fmla v28.2d,v2.2d,v6.d[0] \n\t" // Accummulate " fmla v31.2d,v2.2d,v6.d[1] \n\t" // Accummulate " ldr q6, [x1, #48] \n\t" " \n\t" // End it 1 " fmla v8.2d ,v0.2d,v3.d[0] \n\t" // Accummulate " prfm PLDL1KEEP, [x1, #640] \n\t" " fmla v9.2d ,v1.2d,v3.d[0] \n\t" // Accummulate " prfm PLDL1KEEP, [x0, #336] \n\t" " fmla v10.2d,v7.2d,v3.d[0] \n\t" // Accummulate " prfm PLDL1KEEP, [x0, #400] \n\t" " \n\t" " fmla v11.2d,v0.2d,v3.d[1] \n\t" // Accummulate " fmla v12.2d,v1.2d,v3.d[1] \n\t" // Accummulate " fmla v13.2d,v7.2d,v3.d[1] \n\t" // Accummulate " \n\t" " fmla v14.2d,v0.2d,v4.d[0] \n\t" // Accummulate " fmla v15.2d,v1.2d,v4.d[0] \n\t" // Accummulate " fmla v16.2d,v7.2d,v4.d[0] \n\t" // Accummulate " ldr q3, [x1, #64] \n\t" " \n\t" " fmla v17.2d,v0.2d,v4.d[1] \n\t" // Accummulate " fmla v18.2d,v1.2d,v4.d[1] \n\t" // Accummulate " fmla v19.2d,v7.2d,v4.d[1] \n\t" // Accummulate " ldr q2, [x0, #80] \n\t" " \n\t" " fmla v20.2d,v0.2d,v5.d[0] \n\t" // Accummulate " fmla v21.2d,v1.2d,v5.d[0] \n\t" // Accummulate " fmla v22.2d,v7.2d,v5.d[0] \n\t" // Accummulate " ldr q4, [x1, #80] \n\t" " \n\t" " fmla v23.2d,v0.2d,v5.d[1] \n\t" // Accummulate " fmla v24.2d,v1.2d,v5.d[1] \n\t" // Accummulate " fmla v25.2d,v7.2d,v5.d[1] \n\t" // Accummulate " ldr q5, [x1, #96] \n\t" " \n\t" " fmla v26.2d,v0.2d,v6.d[0] \n\t" // Accummulate " fmla v29.2d,v0.2d,v6.d[1] \n\t" // Accummulate " ldr q0, [x0, #48] \n\t" " \n\t" " fmla v27.2d,v1.2d,v6.d[0] \n\t" // Accummulate " fmla v30.2d,v1.2d,v6.d[1] \n\t" // Accummulate " ldr q1, [x0, #64] \n\t" " \n\t" " fmla v28.2d,v7.2d,v6.d[0] \n\t" // Accummulate " fmla v31.2d,v7.2d,v6.d[1] \n\t" // Accummulate " ldr q6, [x1, #112] \n\t" " \n\t" //End it 2 " fmla v8.2d ,v0.2d,v3.d[0] \n\t" // Accummulate " prfm PLDL1KEEP, [x0, #464] \n\t" " fmla v9.2d ,v1.2d,v3.d[0] \n\t" // Accummulate " fmla v10.2d,v2.2d,v3.d[0] \n\t" // Accummulate " \n\t" " fmla v11.2d,v0.2d,v3.d[1] \n\t" // Accummulate " fmla v12.2d,v1.2d,v3.d[1] \n\t" // Accummulate " fmla v13.2d,v2.2d,v3.d[1] \n\t" // Accummulate " \n\t" " fmla v14.2d,v0.2d,v4.d[0] \n\t" // Accummulate " fmla v15.2d,v1.2d,v4.d[0] \n\t" // Accummulate " fmla v16.2d,v2.2d,v4.d[0] \n\t" // Accummulate " ldr q3, [x1, #128] \n\t" " \n\t" " fmla v17.2d,v0.2d,v4.d[1] \n\t" // Accummulate " fmla v18.2d,v1.2d,v4.d[1] \n\t" // Accummulate " fmla v19.2d,v2.2d,v4.d[1] \n\t" // Accummulate " ldr q7, [x0, #128] \n\t" " \n\t" " fmla v20.2d,v0.2d,v5.d[0] \n\t" // Accummulate " fmla v21.2d,v1.2d,v5.d[0] \n\t" // Accummulate " fmla v22.2d,v2.2d,v5.d[0] \n\t" // Accummulate " ldr q4, [x1, #144] \n\t" " \n\t" " fmla v23.2d,v0.2d,v5.d[1] \n\t" // Accummulate " fmla v24.2d,v1.2d,v5.d[1] \n\t" // Accummulate " fmla v25.2d,v2.2d,v5.d[1] \n\t" // Accummulate " ldr q5, [x1, #160] \n\t" " \n\t" " fmla v26.2d,v0.2d,v6.d[0] \n\t" // Accummulate " fmla v29.2d,v0.2d,v6.d[1] \n\t" // Accummulate " ldr q0, [x0, #96] \n\t" " \n\t" " fmla v27.2d,v1.2d,v6.d[0] \n\t" // Accummulate " fmla v30.2d,v1.2d,v6.d[1] \n\t" // Accummulate " ldr q1, [x0, #112] \n\t" " \n\t" " fmla v28.2d,v2.2d,v6.d[0] \n\t" // Accummulate " fmla v31.2d,v2.2d,v6.d[1] \n\t" // Accummulate " ldr q6, [x1, #176] \n\t" " \n\t" // End it 3 " fmla v8.2d ,v0.2d,v3.d[0] \n\t" // Accummulate " fmla v9.2d ,v1.2d,v3.d[0] \n\t" // Accummulate " fmla v10.2d,v7.2d,v3.d[0] \n\t" // Accummulate " \n\t" " fmla v11.2d,v0.2d,v3.d[1] \n\t" // Accummulate " fmla v12.2d,v1.2d,v3.d[1] \n\t" // Accummulate " fmla v13.2d,v7.2d,v3.d[1] \n\t" // Accummulate " ldr q3, [x1, #192] \n\t" " \n\t" " fmla v14.2d,v0.2d,v4.d[0] \n\t" // Accummulate " fmla v15.2d,v1.2d,v4.d[0] \n\t" // Accummulate " fmla v16.2d,v7.2d,v4.d[0] \n\t" // Accummulate " ldr q2, [x0, #176] \n\t" " \n\t" " fmla v17.2d,v0.2d,v4.d[1] \n\t" // Accummulate " fmla v18.2d,v1.2d,v4.d[1] \n\t" // Accummulate " fmla v19.2d,v7.2d,v4.d[1] \n\t" // Accummulate " ldr q4, [x1, #208] \n\t" " \n\t" " fmla v20.2d,v0.2d,v5.d[0] \n\t" // Accummulate " fmla v21.2d,v1.2d,v5.d[0] \n\t" // Accummulate " fmla v22.2d,v7.2d,v5.d[0] \n\t" // Accummulate " \n\t" " fmla v23.2d,v0.2d,v5.d[1] \n\t" // Accummulate " fmla v24.2d,v1.2d,v5.d[1] \n\t" // Accummulate " fmla v25.2d,v7.2d,v5.d[1] \n\t" // Accummulate " ldr q5, [x1, #224] \n\t" " \n\t" " fmla v26.2d,v0.2d,v6.d[0] \n\t" // Accummulate " fmla v29.2d,v0.2d,v6.d[1] \n\t" // Accummulate " ldr q0, [x0, #144] \n\t" " \n\t" " fmla v27.2d,v1.2d,v6.d[0] \n\t" // Accummulate " fmla v30.2d,v1.2d,v6.d[1] \n\t" // Accummulate " ldr q1, [x0, #160] \n\t" " \n\t" " fmla v28.2d,v7.2d,v6.d[0] \n\t" // Accummulate " fmla v31.2d,v7.2d,v6.d[1] \n\t" // Accummulate " ldr q6, [x1, #240] \n\t" " \n\t" //End it 4 " add x0, x0, #192 \n\t" " add x1, x1, #256 \n\t" " \n\t" " sub x5,x5,1 \n\t" // i-=1 " cmp x5,1 \n\t" // Iterate again if we are not in k_iter == 1. " bne DLOOP \n\t" " \n\t" ".DLASTITER: \n\t" " \n\t" " fmla v8.2d ,v0.2d,v3.d[0] \n\t" // Accummulate " fmla v9.2d ,v1.2d,v3.d[0] \n\t" // Accummulate " fmla v10.2d,v2.2d,v3.d[0] \n\t" // Accummulate " \n\t" " fmla v11.2d,v0.2d,v3.d[1] \n\t" // Accummulate " fmla v12.2d,v1.2d,v3.d[1] \n\t" // Accummulate " fmla v13.2d,v2.2d,v3.d[1] \n\t" // Accummulate " ldr q3, [x1] \n\t" " \n\t" " fmla v14.2d,v0.2d,v4.d[0] \n\t" // Accummulate " fmla v15.2d,v1.2d,v4.d[0] \n\t" // Accummulate " fmla v16.2d,v2.2d,v4.d[0] \n\t" // Accummulate " ldr q7, [x0, #32] \n\t" " \n\t" " fmla v17.2d,v0.2d,v4.d[1] \n\t" // Accummulate " fmla v18.2d,v1.2d,v4.d[1] \n\t" // Accummulate " fmla v19.2d,v2.2d,v4.d[1] \n\t" // Accummulate " ldr q4, [x1, #16] \n\t" " \n\t" " fmla v20.2d,v0.2d,v5.d[0] \n\t" // Accummulate " fmla v21.2d,v1.2d,v5.d[0] \n\t" // Accummulate " fmla v22.2d,v2.2d,v5.d[0] \n\t" // Accummulate " \n\t" " fmla v23.2d,v0.2d,v5.d[1] \n\t" // Accummulate " fmla v24.2d,v1.2d,v5.d[1] \n\t" // Accummulate " fmla v25.2d,v2.2d,v5.d[1] \n\t" // Accummulate " ldr q5, [x1, #32] \n\t" " \n\t" " fmla v26.2d,v0.2d,v6.d[0] \n\t" // Accummulate " fmla v29.2d,v0.2d,v6.d[1] \n\t" // Accummulate " ldr q0, [x0] \n\t" " \n\t" " fmla v27.2d,v1.2d,v6.d[0] \n\t" // Accummulate " fmla v30.2d,v1.2d,v6.d[1] \n\t" // Accummulate " ldr q1, [x0, #16] \n\t" " \n\t" " fmla v28.2d,v2.2d,v6.d[0] \n\t" // Accummulate " fmla v31.2d,v2.2d,v6.d[1] \n\t" // Accummulate " ldr q6, [x1, #48] \n\t" " \n\t" // End it 1 " fmla v8.2d ,v0.2d,v3.d[0] \n\t" // Accummulate " fmla v9.2d ,v1.2d,v3.d[0] \n\t" // Accummulate " fmla v10.2d,v7.2d,v3.d[0] \n\t" // Accummulate " \n\t" " fmla v11.2d,v0.2d,v3.d[1] \n\t" // Accummulate " fmla v12.2d,v1.2d,v3.d[1] \n\t" // Accummulate " fmla v13.2d,v7.2d,v3.d[1] \n\t" // Accummulate " ldr q3, [x1, #64] \n\t" " \n\t" " fmla v14.2d,v0.2d,v4.d[0] \n\t" // Accummulate " fmla v15.2d,v1.2d,v4.d[0] \n\t" // Accummulate " fmla v16.2d,v7.2d,v4.d[0] \n\t" // Accummulate " ldr q2, [x0, #80] \n\t" " \n\t" " fmla v17.2d,v0.2d,v4.d[1] \n\t" // Accummulate " fmla v18.2d,v1.2d,v4.d[1] \n\t" // Accummulate " fmla v19.2d,v7.2d,v4.d[1] \n\t" // Accummulate " ldr q4, [x1, #80] \n\t" " \n\t" " fmla v20.2d,v0.2d,v5.d[0] \n\t" // Accummulate " fmla v21.2d,v1.2d,v5.d[0] \n\t" // Accummulate " fmla v22.2d,v7.2d,v5.d[0] \n\t" // Accummulate " \n\t" " fmla v23.2d,v0.2d,v5.d[1] \n\t" // Accummulate " fmla v24.2d,v1.2d,v5.d[1] \n\t" // Accummulate " fmla v25.2d,v7.2d,v5.d[1] \n\t" // Accummulate " ldr q5, [x1, #96] \n\t" " \n\t" " fmla v26.2d,v0.2d,v6.d[0] \n\t" // Accummulate " fmla v29.2d,v0.2d,v6.d[1] \n\t" // Accummulate " ldr q0, [x0, #48] \n\t" " \n\t" " fmla v27.2d,v1.2d,v6.d[0] \n\t" // Accummulate " fmla v30.2d,v1.2d,v6.d[1] \n\t" // Accummulate " ldr q1, [x0, #64] \n\t" " \n\t" " fmla v28.2d,v7.2d,v6.d[0] \n\t" // Accummulate " fmla v31.2d,v7.2d,v6.d[1] \n\t" // Accummulate " ldr q6, [x1, #112] \n\t" " \n\t" //End it 2 " fmla v8.2d ,v0.2d,v3.d[0] \n\t" // Accummulate " fmla v9.2d ,v1.2d,v3.d[0] \n\t" // Accummulate " fmla v10.2d,v2.2d,v3.d[0] \n\t" // Accummulate " \n\t" " fmla v11.2d,v0.2d,v3.d[1] \n\t" // Accummulate " fmla v12.2d,v1.2d,v3.d[1] \n\t" // Accummulate " fmla v13.2d,v2.2d,v3.d[1] \n\t" // Accummulate " ldr q3, [x1, #128] \n\t" " \n\t" " fmla v14.2d,v0.2d,v4.d[0] \n\t" // Accummulate " fmla v15.2d,v1.2d,v4.d[0] \n\t" // Accummulate " fmla v16.2d,v2.2d,v4.d[0] \n\t" // Accummulate " ldr q7, [x0, #128] \n\t" " \n\t" " fmla v17.2d,v0.2d,v4.d[1] \n\t" // Accummulate " fmla v18.2d,v1.2d,v4.d[1] \n\t" // Accummulate " fmla v19.2d,v2.2d,v4.d[1] \n\t" // Accummulate " ldr q4, [x1, #144] \n\t" " \n\t" " fmla v20.2d,v0.2d,v5.d[0] \n\t" // Accummulate " fmla v21.2d,v1.2d,v5.d[0] \n\t" // Accummulate " fmla v22.2d,v2.2d,v5.d[0] \n\t" // Accummulate " \n\t" " fmla v23.2d,v0.2d,v5.d[1] \n\t" // Accummulate " fmla v24.2d,v1.2d,v5.d[1] \n\t" // Accummulate " fmla v25.2d,v2.2d,v5.d[1] \n\t" // Accummulate " ldr q5, [x1, #160] \n\t" " \n\t" " fmla v26.2d,v0.2d,v6.d[0] \n\t" // Accummulate " fmla v29.2d,v0.2d,v6.d[1] \n\t" // Accummulate " ldr q0, [x0, #96] \n\t" " \n\t" " fmla v27.2d,v1.2d,v6.d[0] \n\t" // Accummulate " fmla v30.2d,v1.2d,v6.d[1] \n\t" // Accummulate " ldr q1, [x0, #112] \n\t" " \n\t" " fmla v28.2d,v2.2d,v6.d[0] \n\t" // Accummulate " fmla v31.2d,v2.2d,v6.d[1] \n\t" // Accummulate " ldr q6, [x1, #176] \n\t" " \n\t" // End it 3 " fmla v8.2d ,v0.2d,v3.d[0] \n\t" // Accummulate " fmla v9.2d ,v1.2d,v3.d[0] \n\t" // Accummulate " fmla v10.2d,v7.2d,v3.d[0] \n\t" // Accummulate " \n\t" " fmla v11.2d,v0.2d,v3.d[1] \n\t" // Accummulate " fmla v12.2d,v1.2d,v3.d[1] \n\t" // Accummulate " fmla v13.2d,v7.2d,v3.d[1] \n\t" // Accummulate " \n\t" " fmla v14.2d,v0.2d,v4.d[0] \n\t" // Accummulate " fmla v15.2d,v1.2d,v4.d[0] \n\t" // Accummulate " fmla v16.2d,v7.2d,v4.d[0] \n\t" // Accummulate " \n\t" " fmla v17.2d,v0.2d,v4.d[1] \n\t" // Accummulate " fmla v18.2d,v1.2d,v4.d[1] \n\t" // Accummulate " fmla v19.2d,v7.2d,v4.d[1] \n\t" // Accummulate " \n\t" " fmla v20.2d,v0.2d,v5.d[0] \n\t" // Accummulate " fmla v21.2d,v1.2d,v5.d[0] \n\t" // Accummulate " fmla v22.2d,v7.2d,v5.d[0] \n\t" // Accummulate " \n\t" " fmla v23.2d,v0.2d,v5.d[1] \n\t" // Accummulate " fmla v24.2d,v1.2d,v5.d[1] \n\t" // Accummulate " fmla v25.2d,v7.2d,v5.d[1] \n\t" // Accummulate " \n\t" " fmla v26.2d,v0.2d,v6.d[0] \n\t" // Accummulate " add x1, x1, #192 \n\t" " fmla v29.2d,v0.2d,v6.d[1] \n\t" // Accummulate " \n\t" " fmla v27.2d,v1.2d,v6.d[0] \n\t" // Accummulate " fmla v30.2d,v1.2d,v6.d[1] \n\t" // Accummulate " \n\t" " fmla v28.2d,v7.2d,v6.d[0] \n\t" // Accummulate " fmla v31.2d,v7.2d,v6.d[1] \n\t" // Accummulate " \n\t" //End it 4 " add x0, x0, #144 \n\t" " \n\t" " .DCONSIDERKLEFT: \n\t" " cmp x6,0 \n\t" // If k_left == 0, we are done. " beq .DPOSTACCUM \n\t" // else, we enter the k_left loop. " \n\t" ".DLOOPKLEFT: \n\t" " \n\t" " ldr q0, [x0],#16 \n\t" " ldr q1, [x0],#16 \n\t" // Load a " ldr q2, [x0],#16 \n\t" " \n\t" " ldr q3, [x1],#16 \n\t" // Load b " ldr q4, [x1],#16 \n\t" " ldr q5, [x1],#16 \n\t" " ldr q6, [x1],#16 \n\t" " \n\t" " sub x6,x6,1 \n\t" " \n\t" " fmla v8.2d ,v0.2d,v3.d[0] \n\t" // Accummulate " fmla v9.2d ,v1.2d,v3.d[0] \n\t" // Accummulate " fmla v10.2d,v2.2d,v3.d[0] \n\t" // Accummulate " \n\t" " fmla v11.2d,v0.2d,v3.d[1] \n\t" // Accummulate " fmla v12.2d,v1.2d,v3.d[1] \n\t" // Accummulate " fmla v13.2d,v2.2d,v3.d[1] \n\t" // Accummulate " \n\t" " fmla v14.2d,v0.2d,v4.d[0] \n\t" // Accummulate " fmla v15.2d,v1.2d,v4.d[0] \n\t" // Accummulate " fmla v16.2d,v2.2d,v4.d[0] \n\t" // Accummulate " \n\t" " fmla v17.2d,v0.2d,v4.d[1] \n\t" // Accummulate " fmla v18.2d,v1.2d,v4.d[1] \n\t" // Accummulate " fmla v19.2d,v2.2d,v4.d[1] \n\t" // Accummulate " \n\t" " fmla v20.2d,v0.2d,v5.d[0] \n\t" // Accummulate " fmla v21.2d,v1.2d,v5.d[0] \n\t" // Accummulate " fmla v22.2d,v2.2d,v5.d[0] \n\t" // Accummulate " \n\t" " fmla v23.2d,v0.2d,v5.d[1] \n\t" // Accummulate " fmla v24.2d,v1.2d,v5.d[1] \n\t" // Accummulate " fmla v25.2d,v2.2d,v5.d[1] \n\t" // Accummulate " \n\t" " fmla v26.2d,v0.2d,v6.d[0] \n\t" // Accummulate " fmla v29.2d,v0.2d,v6.d[1] \n\t" // Accummulate " \n\t" " fmla v27.2d,v1.2d,v6.d[0] \n\t" // Accummulate " fmla v30.2d,v1.2d,v6.d[1] \n\t" // Accummulate " \n\t" " fmla v28.2d,v2.2d,v6.d[0] \n\t" // Accummulate " fmla v31.2d,v2.2d,v6.d[1] \n\t" // Accummulate " \n\t" " cmp x6,0 \n\t" // Iterate again. " bne .DLOOPKLEFT \n\t" // if i!=0. " \n\t" " .DPOSTACCUM: \n\t" " \n\t" " ld1r {v6.2d},[x7] \n\t" // Load alpha. " ld1r {v7.2d},[x8] \n\t" // Load beta " \n\t" " cmp x13,#1 \n\t" // If rs_c != 1 (column-major) " bne .DGENSTORED \n\t" " \n\t" " .DCOLSTORED: \n\t" // C is column-major. " \n\t" " dup v0.2d, xzr \n\t" " dup v1.2d, xzr \n\t" " dup v2.2d, xzr \n\t" " dup v3.2d, xzr \n\t" " dup v4.2d, xzr \n\t" " dup v5.2d, xzr \n\t" " \n\t" " fcmp d7,#0.0 \n\t" " beq .DBETAZEROCOLSTOREDS1 \n\t" // Taking care of the beta==0 case. " \n\t" " ldr q0, [x2] \n\t" //Load column 0 of C " ldr q1, [x2, #16] \n\t" " ldr q2, [x2, #32] \n\t" " \n\t" " ldr q3, [x20] \n\t" //Load column 1 of C " ldr q4, [x20, #16] \n\t" " ldr q5, [x20, #32] \n\t" " \n\t" " fmul v0.2d,v0.2d,v7.d[0] \n\t" // Scale by beta " fmul v1.2d,v1.2d,v7.d[0] \n\t" // Scale by beta " fmul v2.2d,v2.2d,v7.d[0] \n\t" // Scale by beta " fmul v3.2d,v3.2d,v7.d[0] \n\t" // Scale by beta " fmul v4.2d,v4.2d,v7.d[0] \n\t" // Scale by beta " fmul v5.2d,v5.2d,v7.d[0] \n\t" // Scale by beta " \n\t" " .DBETAZEROCOLSTOREDS1: \n\t" " \n\t" " fmla v0.2d,v8.2d,v6.d[0] \n\t" // Scale by alpha " fmla v1.2d,v9.2d,v6.d[0] \n\t" // Scale by alpha " fmla v2.2d,v10.2d,v6.d[0] \n\t" // Scale by alpha " fmla v3.2d,v11.2d,v6.d[0] \n\t" // Scale by alpha " fmla v4.2d,v12.2d,v6.d[0] \n\t" // Scale by alpha " fmla v5.2d,v13.2d,v6.d[0] \n\t" // Scale by alpha " \n\t" " str q0, [x2] \n\t" //Store column 0 of C " str q1, [x2, #16] \n\t" " str q2, [x2, #32] \n\t" " \n\t" " str q3, [x20] \n\t" //Store column 1 of C " str q4, [x20, #16] \n\t" " str q5, [x20, #32] \n\t" " \n\t" " dup v8.2d, xzr \n\t" " dup v9.2d, xzr \n\t" " dup v10.2d, xzr \n\t" " dup v11.2d, xzr \n\t" " dup v12.2d, xzr \n\t" " dup v13.2d, xzr \n\t" " \n\t" " fcmp d7,#0.0 \n\t" " beq .DBETAZEROCOLSTOREDS2 \n\t" // Taking care of the beta==0 case. " \n\t" " ldr q8, [x21] \n\t" //Load column 2 of C " ldr q9, [x21, #16] \n\t" " ldr q10, [x21, #32] \n\t" " \n\t" " ldr q11, [x22] \n\t" //Load column 3 of C " ldr q12, [x22, #16] \n\t" " ldr q13, [x22, #32] \n\t" " \n\t" " fmul v8.2d, v8.2d, v7.d[0] \n\t" // Scale by beta " fmul v9.2d, v9.2d, v7.d[0] \n\t" // Scale by beta " fmul v10.2d,v10.2d,v7.d[0] \n\t" // Scale by beta " fmul v11.2d,v11.2d,v7.d[0] \n\t" // Scale by beta " fmul v12.2d,v12.2d,v7.d[0] \n\t" // Scale by beta " fmul v13.2d,v13.2d,v7.d[0] \n\t" // Scale by beta " \n\t" " .DBETAZEROCOLSTOREDS2: \n\t" " \n\t" " fmla v8.2d, v14.2d,v6.d[0] \n\t" // Scale by alpha " fmla v9.2d, v15.2d,v6.d[0] \n\t" // Scale by alpha " fmla v10.2d,v16.2d,v6.d[0] \n\t" // Scale by alpha " fmla v11.2d,v17.2d,v6.d[0] \n\t" // Scale by alpha " fmla v12.2d,v18.2d,v6.d[0] \n\t" // Scale by alpha " fmla v13.2d,v19.2d,v6.d[0] \n\t" // Scale by alpha " \n\t" " str q8, [x21] \n\t" //Store column 2 of C " str q9, [x21, #16] \n\t" " str q10, [x21, #32] \n\t" " \n\t" " str q11, [x22] \n\t" //Store column 3 of C " str q12, [x22, #16] \n\t" " str q13, [x22, #32] \n\t" " \n\t" " dup v0.2d, xzr \n\t" " dup v1.2d, xzr \n\t" " dup v2.2d, xzr \n\t" " dup v3.2d, xzr \n\t" " dup v4.2d, xzr \n\t" " dup v5.2d, xzr \n\t" " \n\t" " fcmp d7,#0.0 \n\t" " beq .DBETAZEROCOLSTOREDS3 \n\t" // Taking care of the beta==0 case. " \n\t" " ldr q0, [x23] \n\t" //Load column 4 of C " ldr q1, [x23, #16] \n\t" " ldr q2, [x23, #32] \n\t" " \n\t" " ldr q3, [x24] \n\t" //Load column 5 of C " ldr q4, [x24, #16] \n\t" " ldr q5, [x24, #32] \n\t" " \n\t" " fmul v0.2d,v0.2d,v7.d[0] \n\t" // Scale by beta " fmul v1.2d,v1.2d,v7.d[0] \n\t" // Scale by beta " fmul v2.2d,v2.2d,v7.d[0] \n\t" // Scale by beta " fmul v3.2d,v3.2d,v7.d[0] \n\t" // Scale by beta " fmul v4.2d,v4.2d,v7.d[0] \n\t" // Scale by beta " fmul v5.2d,v5.2d,v7.d[0] \n\t" // Scale by beta " \n\t" " .DBETAZEROCOLSTOREDS3: \n\t" " \n\t" " fmla v0.2d,v20.2d,v6.d[0] \n\t" // Scale by alpha " fmla v1.2d,v21.2d,v6.d[0] \n\t" // Scale by alpha " fmla v2.2d,v22.2d,v6.d[0] \n\t" // Scale by alpha " fmla v3.2d,v23.2d,v6.d[0] \n\t" // Scale by alpha " fmla v4.2d,v24.2d,v6.d[0] \n\t" // Scale by alpha " fmla v5.2d,v25.2d,v6.d[0] \n\t" // Scale by alpha " \n\t" " str q0, [x23] \n\t" //Store column 4 of C " str q1, [x23, #16] \n\t" " str q2, [x23, #32] \n\t" " \n\t" " str q3, [x24] \n\t" //Store column 5 of C " str q4, [x24, #16] \n\t" " str q5, [x24, #32] \n\t" " \n\t" " dup v8.2d, xzr \n\t" " dup v9.2d, xzr \n\t" " dup v10.2d, xzr \n\t" " dup v11.2d, xzr \n\t" " dup v12.2d, xzr \n\t" " dup v13.2d, xzr \n\t" " \n\t" " fcmp d7,#0.0 \n\t" " beq .DBETAZEROCOLSTOREDS4 \n\t" // Taking care of the beta==0 case. " \n\t" " ldr q8, [x25] \n\t" //Load column 6 of C " ldr q9, [x25, #16] \n\t" " ldr q10, [x25, #32] \n\t" " \n\t" " ldr q11, [x26] \n\t" //Load column 7 of C " ldr q12, [x26, #16] \n\t" " ldr q13, [x26, #32] \n\t" " \n\t" " fmul v8.2d, v8.2d, v7.d[0] \n\t" // Scale by beta " fmul v9.2d, v9.2d, v7.d[0] \n\t" // Scale by beta " fmul v10.2d,v10.2d,v7.d[0] \n\t" // Scale by beta " fmul v11.2d,v11.2d,v7.d[0] \n\t" // Scale by beta " fmul v12.2d,v12.2d,v7.d[0] \n\t" // Scale by beta " fmul v13.2d,v13.2d,v7.d[0] \n\t" // Scale by beta " \n\t" " .DBETAZEROCOLSTOREDS4: \n\t" " \n\t" " prfm pldl2keep,[x3] \n\t" " prfm pldl2keep,[x4] \n\t" " \n\t" " fmla v8.2d, v26.2d,v6.d[0] \n\t" // Scale by alpha " fmla v9.2d, v27.2d,v6.d[0] \n\t" // Scale by alpha " fmla v10.2d,v28.2d,v6.d[0] \n\t" // Scale by alpha " fmla v11.2d,v29.2d,v6.d[0] \n\t" // Scale by alpha " fmla v12.2d,v30.2d,v6.d[0] \n\t" // Scale by alpha " fmla v13.2d,v31.2d,v6.d[0] \n\t" // Scale by alpha " \n\t" " str q8, [x25] \n\t" //Store column 6 of C " str q9, [x25, #16] \n\t" " str q10, [x25, #32] \n\t" " \n\t" " str q11, [x26] \n\t" //Store column 7 of C " str q12, [x26, #16] \n\t" " str q13, [x26, #32] \n\t" " \n\t" " b .DEND \n\t" " \n\t" " .DGENSTORED: \n\t" // C is general-stride stored. " \n\t" " dup v0.2d, xzr \n\t" " dup v1.2d, xzr \n\t" " dup v2.2d, xzr \n\t" " dup v3.2d, xzr \n\t" " dup v4.2d, xzr \n\t" " dup v5.2d, xzr \n\t" " \n\t" " fcmp d7,#0.0 \n\t" " beq .DBETAZEROGENSTOREDS1 \n\t" // Taking care of the beta==0 case. " \n\t" " mov x27, x2 \n\t" " \n\t" // Load address of C. " ld1 {v0.d}[0],[x27],x14 \n\t" // Load c00 into quad and increment by rs_c. " ld1 {v0.d}[1],[x27],x14 \n\t" // Load c01 into quad and increment by rs_c. " ld1 {v1.d}[0],[x27],x14 \n\t" // Load c02 into quad and increment by rs_c. " ld1 {v1.d}[1],[x27],x14 \n\t" // Load c03 into quad and increment by rs_c. " ld1 {v2.d}[0],[x27],x14 \n\t" // Load c04 into quad and increment by rs_c. " ld1 {v2.d}[1],[x27],x14 \n\t" // Load c05 into quad and increment by rs_c. " \n\t" " mov x27, x20 \n\t" // Load address of C. " \n\t" " ld1 {v3.d}[0],[x27],x14 \n\t" // Load c10 into quad and increment by rs_c. " ld1 {v3.d}[1],[x27],x14 \n\t" // Load c11 into quad and increment by rs_c. " ld1 {v4.d}[0],[x27],x14 \n\t" // Load c12 into quad and increment by rs_c. " ld1 {v4.d}[1],[x27],x14 \n\t" // Load c13 into quad and increment by rs_c. " ld1 {v5.d}[0],[x27],x14 \n\t" // Load c14 into quad and increment by rs_c. " ld1 {v5.d}[1],[x27],x14 \n\t" // Load c15 into quad and increment by rs_c. " \n\t" " fmul v0.2d,v0.2d,v7.d[0] \n\t" // Scale by beta " fmul v1.2d,v1.2d,v7.d[0] \n\t" // Scale by beta " fmul v2.2d,v2.2d,v7.d[0] \n\t" // Scale by beta " fmul v3.2d,v3.2d,v7.d[0] \n\t" // Scale by beta " fmul v4.2d,v4.2d,v7.d[0] \n\t" // Scale by beta " fmul v5.2d,v5.2d,v7.d[0] \n\t" // Scale by beta " \n\t" " .DBETAZEROGENSTOREDS1: \n\t" " \n\t" " fmla v0.2d,v8.2d,v6.d[0] \n\t" // Scale by alpha " fmla v1.2d,v9.2d,v6.d[0] \n\t" // Scale by alpha " fmla v2.2d,v10.2d,v6.d[0] \n\t" // Scale by alpha " fmla v3.2d,v11.2d,v6.d[0] \n\t" // Scale by alpha " fmla v4.2d,v12.2d,v6.d[0] \n\t" // Scale by alpha " fmla v5.2d,v13.2d,v6.d[0] \n\t" // Scale by alpha " \n\t" " mov x27, x2 \n\t" // Load address of C. " \n\t" " st1 {v0.d}[0],[x27],x14 \n\t" // Store c00 into quad and increment by rs_c. " st1 {v0.d}[1],[x27],x14 \n\t" // Store c01 into quad and increment by rs_c. " st1 {v1.d}[0],[x27],x14 \n\t" // Store c02 into quad and increment by rs_c. " st1 {v1.d}[1],[x27],x14 \n\t" // Store c03 into quad and increment by rs_c. " st1 {v2.d}[0],[x27],x14 \n\t" // Store c04 into quad and increment by rs_c. " st1 {v2.d}[1],[x27],x14 \n\t" // Store c05 into quad and increment by rs_c. " \n\t" " mov x27, x20 \n\t" // Load address of C. " \n\t" " st1 {v3.d}[0],[x27],x14 \n\t" // Store c10 into quad and increment by rs_c. " st1 {v3.d}[1],[x27],x14 \n\t" // Store c11 into quad and increment by rs_c. " st1 {v4.d}[0],[x27],x14 \n\t" // Store c12 into quad and increment by rs_c. " st1 {v4.d}[1],[x27],x14 \n\t" // Store c13 into quad and increment by rs_c. " st1 {v5.d}[0],[x27],x14 \n\t" // Store c14 into quad and increment by rs_c. " st1 {v5.d}[1],[x27],x14 \n\t" // Store c15 into quad and increment by rs_c. " \n\t" " dup v8.2d, xzr \n\t" " dup v9.2d, xzr \n\t" " dup v10.2d, xzr \n\t" " dup v11.2d, xzr \n\t" " dup v12.2d, xzr \n\t" " dup v13.2d, xzr \n\t" " \n\t" " fcmp d7,#0.0 \n\t" " beq .DBETAZEROGENSTOREDS2 \n\t" // Taking care of the beta==0 case. " \n\t" " mov x27, x21 \n\t" // Load address of C. " \n\t" " ld1 {v8.d}[0], [x27],x14 \n\t" // Load c20 into quad and increment by rs_c. " ld1 {v8.d}[1], [x27],x14 \n\t" // Load c21 into quad and increment by rs_c. " ld1 {v9.d}[0], [x27],x14 \n\t" // Load c22 into quad and increment by rs_c. " ld1 {v9.d}[1], [x27],x14 \n\t" // Load c23 into quad and increment by rs_c. " ld1 {v10.d}[0],[x27],x14 \n\t" // Load c24 into quad and increment by rs_c. " ld1 {v10.d}[1],[x27],x14 \n\t" // Load c25 into quad and increment by rs_c. " \n\t" " mov x27, x22 \n\t" // Load address of C. " \n\t" " ld1 {v11.d}[0],[x27],x14 \n\t" // Load c30 into quad and increment by rs_c. " ld1 {v11.d}[1],[x27],x14 \n\t" // Load c31 into quad and increment by rs_c. " ld1 {v12.d}[0],[x27],x14 \n\t" // Load c32 into quad and increment by rs_c. " ld1 {v12.d}[1],[x27],x14 \n\t" // Load c33 into quad and increment by rs_c. " ld1 {v13.d}[0],[x27],x14 \n\t" // Load c34 into quad and increment by rs_c. " ld1 {v13.d}[1],[x27],x14 \n\t" // Load c35 into quad and increment by rs_c. " \n\t" " fmul v8.2d, v8.2d, v7.d[0] \n\t" // Scale by beta " fmul v9.2d, v9.2d, v7.d[0] \n\t" // Scale by beta " fmul v10.2d,v10.2d,v7.d[0] \n\t" // Scale by beta " fmul v11.2d,v11.2d,v7.d[0] \n\t" // Scale by beta " fmul v12.2d,v12.2d,v7.d[0] \n\t" // Scale by beta " fmul v13.2d,v13.2d,v7.d[0] \n\t" // Scale by beta " \n\t" " .DBETAZEROGENSTOREDS2: \n\t" " \n\t" " fmla v8.2d, v14.2d,v6.d[0] \n\t" // Scale by alpha " fmla v9.2d, v15.2d,v6.d[0] \n\t" // Scale by alpha " fmla v10.2d,v16.2d,v6.d[0] \n\t" // Scale by alpha " fmla v11.2d,v17.2d,v6.d[0] \n\t" // Scale by alpha " fmla v12.2d,v18.2d,v6.d[0] \n\t" // Scale by alpha " fmla v13.2d,v19.2d,v6.d[0] \n\t" // Scale by alpha " \n\t" " mov x27, x21 \n\t" // Load address of C. " \n\t" " st1 {v8.d}[0], [x27],x14 \n\t" // Store c20 into quad and increment by rs_c. " st1 {v8.d}[1], [x27],x14 \n\t" // Store c21 into quad and increment by rs_c. " st1 {v9.d}[0], [x27],x14 \n\t" // Store c22 into quad and increment by rs_c. " st1 {v9.d}[1], [x27],x14 \n\t" // Store c23 into quad and increment by rs_c. " st1 {v10.d}[0],[x27],x14 \n\t" // Store c24 into quad and increment by rs_c. " st1 {v10.d}[1],[x27],x14 \n\t" // Store c25 into quad and increment by rs_c. " \n\t" " mov x27, x22 \n\t" // Load address of C. " \n\t" " st1 {v11.d}[0],[x27],x14 \n\t" // Store c30 into quad and increment by rs_c. " st1 {v11.d}[1],[x27],x14 \n\t" // Store c31 into quad and increment by rs_c. " st1 {v12.d}[0],[x27],x14 \n\t" // Store c32 into quad and increment by rs_c. " st1 {v12.d}[1],[x27],x14 \n\t" // Store c33 into quad and increment by rs_c. " st1 {v13.d}[0],[x27],x14 \n\t" // Store c34 into quad and increment by rs_c. " st1 {v13.d}[1],[x27],x14 \n\t" // Store c35 into quad and increment by rs_c. " \n\t" " dup v0.2d, xzr \n\t" " dup v1.2d, xzr \n\t" " dup v2.2d, xzr \n\t" " dup v3.2d, xzr \n\t" " dup v4.2d, xzr \n\t" " dup v5.2d, xzr \n\t" " \n\t" " fcmp d7,#0.0 \n\t" " beq .DBETAZEROGENSTOREDS3 \n\t" // Taking care of the beta==0 case. " \n\t" " mov x27, x23 \n\t" // Load address of C. " \n\t" " ld1 {v0.d}[0],[x27],x14 \n\t" // Load c40 into quad and increment by rs_c. " ld1 {v0.d}[1],[x27],x14 \n\t" // Load c41 into quad and increment by rs_c. " ld1 {v1.d}[0],[x27],x14 \n\t" // Load c42 into quad and increment by rs_c. " ld1 {v1.d}[1],[x27],x14 \n\t" // Load c43 into quad and increment by rs_c. " ld1 {v2.d}[0],[x27],x14 \n\t" // Load c44 into quad and increment by rs_c. " ld1 {v2.d}[1],[x27],x14 \n\t" // Load c45 into quad and increment by rs_c. " \n\t" " mov x27, x24 \n\t" // Load address of C. " \n\t" " ld1 {v3.d}[0],[x27],x14 \n\t" // Load c50 into quad and increment by rs_c. " ld1 {v3.d}[1],[x27],x14 \n\t" // Load c51 into quad and increment by rs_c. " ld1 {v4.d}[0],[x27],x14 \n\t" // Load c52 into quad and increment by rs_c. " ld1 {v4.d}[1],[x27],x14 \n\t" // Load c53 into quad and increment by rs_c. " ld1 {v5.d}[0],[x27],x14 \n\t" // Load c54 into quad and increment by rs_c. " ld1 {v5.d}[1],[x27],x14 \n\t" // Load c55 into quad and increment by rs_c. " \n\t" " fmul v0.2d,v0.2d,v7.d[0] \n\t" // Scale by beta " fmul v1.2d,v1.2d,v7.d[0] \n\t" // Scale by beta " fmul v2.2d,v2.2d,v7.d[0] \n\t" // Scale by beta " fmul v3.2d,v3.2d,v7.d[0] \n\t" // Scale by beta " fmul v4.2d,v4.2d,v7.d[0] \n\t" // Scale by beta " fmul v5.2d,v5.2d,v7.d[0] \n\t" // Scale by beta " \n\t" " .DBETAZEROGENSTOREDS3: \n\t" " \n\t" " fmla v0.2d,v20.2d,v6.d[0] \n\t" // Scale by alpha " fmla v1.2d,v21.2d,v6.d[0] \n\t" // Scale by alpha " fmla v2.2d,v22.2d,v6.d[0] \n\t" // Scale by alpha " fmla v3.2d,v23.2d,v6.d[0] \n\t" // Scale by alpha " fmla v4.2d,v24.2d,v6.d[0] \n\t" // Scale by alpha " fmla v5.2d,v25.2d,v6.d[0] \n\t" // Scale by alpha " \n\t" " mov x27, x23 \n\t" // Load address of C. " \n\t" " st1 {v0.d}[0],[x27],x14 \n\t" // Store c40 into quad and increment by rs_c. " st1 {v0.d}[1],[x27],x14 \n\t" // Store c41 into quad and increment by rs_c. " st1 {v1.d}[0],[x27],x14 \n\t" // Store c42 into quad and increment by rs_c. " st1 {v1.d}[1],[x27],x14 \n\t" // Store c43 into quad and increment by rs_c. " st1 {v2.d}[0],[x27],x14 \n\t" // Store c44 into quad and increment by rs_c. " st1 {v2.d}[1],[x27],x14 \n\t" // Store c45 into quad and increment by rs_c. " \n\t" " mov x27, x24 \n\t" // Load address of C. " \n\t" " st1 {v3.d}[0],[x27],x14 \n\t" // Store c50 into quad and increment by rs_c. " st1 {v3.d}[1],[x27],x14 \n\t" // Store c51 into quad and increment by rs_c. " st1 {v4.d}[0],[x27],x14 \n\t" // Store c52 into quad and increment by rs_c. " st1 {v4.d}[1],[x27],x14 \n\t" // Store c53 into quad and increment by rs_c. " st1 {v5.d}[0],[x27],x14 \n\t" // Store c54 into quad and increment by rs_c. " st1 {v5.d}[1],[x27],x14 \n\t" // Store c55 into quad and increment by rs_c. " \n\t" " dup v8.2d, xzr \n\t" " dup v9.2d, xzr \n\t" " dup v10.2d, xzr \n\t" " dup v11.2d, xzr \n\t" " dup v12.2d, xzr \n\t" " dup v13.2d, xzr \n\t" " \n\t" " fcmp d7,#0.0 \n\t" " beq .DBETAZEROGENSTOREDS4 \n\t" // Taking care of the beta==0 case. " \n\t" " mov x27, x25 \n\t" " \n\t" " ld1 {v8.d}[0], [x27],x14 \n\t" // Load c60 into quad and increment by rs_c. " ld1 {v8.d}[1], [x27],x14 \n\t" // Load c61 into quad and increment by rs_c. " ld1 {v9.d}[0], [x27],x14 \n\t" // Load c62 into quad and increment by rs_c. " ld1 {v9.d}[1], [x27],x14 \n\t" // Load c63 into quad and increment by rs_c. " ld1 {v10.d}[0],[x27],x14 \n\t" // Load c64 into quad and increment by rs_c. " ld1 {v10.d}[1],[x27],x14 \n\t" // Load c65 into quad and increment by rs_c. " \n\t" " mov x27, x26 \n\t" // Load address of C. " \n\t" " ld1 {v11.d}[0],[x27],x14 \n\t" // Load c70 into quad and increment by rs_c. " ld1 {v11.d}[1],[x27],x14 \n\t" // Load c71 into quad and increment by rs_c. " ld1 {v12.d}[0],[x27],x14 \n\t" // Load c72 into quad and increment by rs_c. " ld1 {v12.d}[1],[x27],x14 \n\t" // Load c73 into quad and increment by rs_c. " ld1 {v13.d}[0],[x27],x14 \n\t" // Load c74 into quad and increment by rs_c. " ld1 {v13.d}[1],[x27],x14 \n\t" // Load c75 into quad and increment by rs_c. " \n\t" " fmul v8.2d, v8.2d, v7.d[0] \n\t" // Scale by beta " fmul v9.2d, v9.2d, v7.d[0] \n\t" // Scale by beta " fmul v10.2d,v10.2d,v7.d[0] \n\t" // Scale by beta " fmul v11.2d,v11.2d,v7.d[0] \n\t" // Scale by beta " fmul v12.2d,v12.2d,v7.d[0] \n\t" // Scale by beta " fmul v13.2d,v13.2d,v7.d[0] \n\t" // Scale by beta " \n\t" " .DBETAZEROGENSTOREDS4: \n\t" " \n\t" " prfm pldl2keep,[x3] \n\t" " prfm pldl2keep,[x4] \n\t" " \n\t" " fmla v8.2d, v26.2d,v6.d[0] \n\t" // Scale by alpha " fmla v9.2d, v27.2d,v6.d[0] \n\t" // Scale by alpha " fmla v10.2d,v28.2d,v6.d[0] \n\t" // Scale by alpha " fmla v11.2d,v29.2d,v6.d[0] \n\t" // Scale by alpha " fmla v12.2d,v30.2d,v6.d[0] \n\t" // Scale by alpha " fmla v13.2d,v31.2d,v6.d[0] \n\t" // Scale by alpha " \n\t" " mov x27, x25 \n\t" // Load address of C. " \n\t" " st1 {v8.d}[0], [x27],x14 \n\t" // Store c60 into quad and increment by rs_c. " st1 {v8.d}[1], [x27],x14 \n\t" // Store c61 into quad and increment by rs_c. " st1 {v9.d}[0], [x27],x14 \n\t" // Store c62 into quad and increment by rs_c. " st1 {v9.d}[1], [x27],x14 \n\t" // Store c63 into quad and increment by rs_c. " st1 {v10.d}[0],[x27],x14 \n\t" // Store c64 into quad and increment by rs_c. " st1 {v10.d}[1],[x27],x14 \n\t" // Store c65 into quad and increment by rs_c. " \n\t" " mov x27, x26 \n\t" // Load address of C. " \n\t" " st1 {v11.d}[0],[x27],x14 \n\t" // Store c70 into quad and increment by rs_c. " st1 {v11.d}[1],[x27],x14 \n\t" // Store c71 into quad and increment by rs_c. " st1 {v12.d}[0],[x27],x14 \n\t" // Store c72 into quad and increment by rs_c. " st1 {v12.d}[1],[x27],x14 \n\t" // Store c73 into quad and increment by rs_c. " st1 {v13.d}[0],[x27],x14 \n\t" // Store c74 into quad and increment by rs_c. " st1 {v13.d}[1],[x27],x14 \n\t" // Store c75 into quad and increment by rs_c. " \n\t" " .DEND: \n\t" // Done! " \n\t" :// output operands (none) :// input operands [aaddr] "m" (a), // 0 [baddr] "m" (b), // 1 [caddr] "m" (c), // 2 [k_iter] "m" (k_iter), // 3 [k_left] "m" (k_left), // 4 [alpha] "m" (alpha), // 5 [beta] "m" (beta), // 6 [rs_c] "m" (rs_c), // 6 [cs_c] "m" (cs_c), // 7 [a_next] "m" (a_next), // 8 [b_next] "m" (b_next) // 9 :// Register clobber list "x0","x1","x2","x3", "x4","x5","x6", "x7","x8","x9", "x10","x11","x12","x13","x14","x16","x17", "x20","x21","x22","x23","x24","x25","x26", "x27", "v0","v1","v2", "v3","v4","v5", "v6","v7","v8", "v9","v10","v11", "v12","v13","v14", "v15","v16","v17","v18","v19", "v20","v21","v22","v23", "v24","v25","v26","v27", "v28","v29","v30","v31" ); } #if 0 void bli_cgemm_armv8a_opt_4x4 ( dim_t k, scomplex* restrict alpha, scomplex* restrict a, scomplex* restrict b, scomplex* restrict beta, scomplex* restrict c, inc_t rs_c, inc_t cs_c, auxinfo_t* restrict data, cntx_t* restrict cntx ) { } void bli_zgemm_armv8a_opt_4x4 ( dim_t k, dcomplex* restrict alpha, dcomplex* restrict a, dcomplex* restrict b, dcomplex* restrict beta, dcomplex* restrict c, inc_t rs_c, inc_t cs_c, auxinfo_t* restrict data, cntx_t* restrict cntx ) { } #endif blis-0.6.1/kernels/armv8a/bli_kernels_armv8a.h000066400000000000000000000033561360743507500212510ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ GEMM_UKR_PROT( float, s, gemm_armv8a_asm_8x12 ) GEMM_UKR_PROT( double, d, gemm_armv8a_asm_6x8 ) blis-0.6.1/kernels/bgq/000077500000000000000000000000001360743507500146755ustar00rootroot00000000000000blis-0.6.1/kernels/bgq/1/000077500000000000000000000000001360743507500150355ustar00rootroot00000000000000blis-0.6.1/kernels/bgq/1/bli_axpyv_bgq_int.c000066400000000000000000000056201360743507500207040ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_daxpyv_bgq_int ( conj_t conjx, dim_t n, double* restrict alpha, double* restrict x, inc_t incx, double* restrict y, inc_t incy, cntx_t* restrict cntx ) { if ( bli_zero_dim1( n ) ) return; // If there is anything that would interfere with our use of aligned // vector loads/stores, call the reference implementation. bool_t use_ref = FALSE; if ( incx != 1 || incy != 1 || bli_is_unaligned_to( ( siz_t )x, 32 ) || bli_is_unaligned_to( ( siz_t )y, 32 ) ) { use_ref = TRUE; } // Call the reference implementation if needed. if ( use_ref == TRUE ) { BLIS_DAXPYV_KERNEL_REF( conjx, n, alpha, x, incx, y, incy, cntx ); return; } dim_t n_run = n / 4; dim_t n_left = n % 4; vector4double xv, yv, zv; vector4double alphav = vec_lds( 0 * sizeof(double), (double*)alpha ); #pragma omp parallel for for ( dim_t i = 0; i < n_run; i++ ) { xv = vec_lda( 0 * sizeof(double), &x[i*4] ); yv = vec_lda( 0 * sizeof(double), &y[i*4] ); zv = vec_madd( alphav, xv, yv ); vec_sta( zv, 0 * sizeof(double), &y[i*4] ); } for ( dim_t i = 0; i < n_left; i++ ) { y[4*n_run + i] += *alpha * x[4*n_run + i]; } } blis-0.6.1/kernels/bgq/1/bli_dotv_bgq_int.c000066400000000000000000000064641360743507500205200ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_ddotv_bgq_int ( conj_t conjx, conj_t conjy, dim_t n, double* restrict x, inc_t incx, double* restrict y, inc_t incy, double* restrict rho, cntx_t* restrict cntx ) { bool_t use_ref = FALSE; // If the vector lengths are zero, set rho to zero and return. if ( bli_zero_dim1( n ) ) { PASTEMAC(d,set0s)( *rho ); return; } // If there is anything that would interfere with our use of aligned // vector loads/stores, call the reference implementation. if ( incx != 1 || incy != 1 || bli_is_unaligned_to( ( siz_t )x, 32 ) || bli_is_unaligned_to( ( siz_t )y, 32 ) ) use_ref = TRUE; // Call the reference implementation if needed. if ( use_ref ) { BLIS_DDOTV_KERNEL_REF( conjx, conjy, n, x, incx, y, incy, rho, cntx ); return; } dim_t n_run = n / 4; dim_t n_left = n % 4; double rhos = 0.0; #pragma omp parallel reduction(+:rhos) { dim_t n_threads; dim_t t_id = omp_get_thread_num(); n_threads = omp_get_num_threads(); vector4double rhov = vec_splats( 0.0 ); vector4double xv, yv; for ( dim_t i = t_id; i < n_run; i += n_threads ) { xv = vec_lda( 0 * sizeof(double), &x[i*4] ); yv = vec_lda( 0 * sizeof(double), &y[i*4] ); rhov = vec_madd( xv, yv, rhov ); } rhos += vec_extract( rhov, 0 ); rhos += vec_extract( rhov, 1 ); rhos += vec_extract( rhov, 2 ); rhos += vec_extract( rhov, 3 ); } for ( dim_t i = 0; i < n_left; i++ ) { rhos += x[4*n_run + i] * y[4*n_run + i]; } *rho = rhos; } blis-0.6.1/kernels/bgq/1f/000077500000000000000000000000001360743507500152035ustar00rootroot00000000000000blis-0.6.1/kernels/bgq/1f/bli_axpyf_bgq_int.c000066400000000000000000000126611360743507500210350ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_daxpyf_bgq_int ( conj_t conja, conj_t conjx, dim_t m, dim_t b_n, double* restrict alpha, double* restrict a, inc_t inca, inc_t lda, double* restrict x, inc_t incx, double* restrict y, inc_t incy, cntx_t* restrict cntx ) { const dim_t fusefac = 8; if ( bli_zero_dim2( m, b_n ) ) return; bool_t use_ref = FALSE; // printf("%d\t%d\t%d\t%d\t%d\t%d\t%d\n", b_n, fusefac, inca, incx, incy, bli_is_unaligned_to( ( siz_t )a, 32 ), bli_is_unaligned_to( ( siz_t )y, 32)); // If there is anything that would interfere with our use of aligned // vector loads/stores, call the reference implementation. if ( ( b_n < fusefac) || inca != 1 || incx != 1 || incy != 1 || bli_is_unaligned_to( ( siz_t )a, 32 ) || bli_is_unaligned_to( ( siz_t )y, 32 ) ) use_ref = TRUE; // Call the reference implementation if needed. if ( use_ref == TRUE ) { // printf("%d\t%d\t%d\t%d\t%d\t%d\n", fusefac, inca, incx, incy, bli_is_unaligned_to( ( siz_t )a, 32 ), bli_is_unaligned_to( ( siz_t )y, 32)); // printf("DEFAULTING TO REFERENCE IMPLEMENTATION\n"); BLIS_DAXPYF_KERNEL_REF( conja, conjx, m, b_n, alpha, a, inca, lda, x, incx, y, incy, cntx ); return; } dim_t m_run = m / 4; dim_t m_left = m % 4; double * a0 = a + 0*lda; double * a1 = a + 1*lda; double * a2 = a + 2*lda; double * a3 = a + 3*lda; double * a4 = a + 4*lda; double * a5 = a + 5*lda; double * a6 = a + 6*lda; double * a7 = a + 7*lda; double * y0 = y; double chi0 = *(x + 0*incx); double chi1 = *(x + 1*incx); double chi2 = *(x + 2*incx); double chi3 = *(x + 3*incx); double chi4 = *(x + 4*incx); double chi5 = *(x + 5*incx); double chi6 = *(x + 6*incx); double chi7 = *(x + 7*incx); PASTEMAC2(d,d,scals)( *alpha, chi0 ); PASTEMAC2(d,d,scals)( *alpha, chi1 ); PASTEMAC2(d,d,scals)( *alpha, chi2 ); PASTEMAC2(d,d,scals)( *alpha, chi3 ); PASTEMAC2(d,d,scals)( *alpha, chi4 ); PASTEMAC2(d,d,scals)( *alpha, chi5 ); PASTEMAC2(d,d,scals)( *alpha, chi6 ); PASTEMAC2(d,d,scals)( *alpha, chi7 ); vector4double a0v, a1v, a2v, a3v, a4v, a5v, a6v, a7v; vector4double yv; vector4double chi0v, chi1v, chi2v, chi3v, chi4v, chi5v, chi6v, chi7v; chi0v = vec_splats( chi0 ); chi1v = vec_splats( chi1 ); chi2v = vec_splats( chi2 ); chi3v = vec_splats( chi3 ); chi4v = vec_splats( chi4 ); chi5v = vec_splats( chi5 ); chi6v = vec_splats( chi6 ); chi7v = vec_splats( chi7 ); for ( dim_t i = 0; i < m_run; i += 1 ) { yv = vec_lda( 0 * sizeof(double), &y0[i*4]); a0v = vec_lda( 0 * sizeof(double), &a0[i*4]); a1v = vec_lda( 0 * sizeof(double), &a1[i*4]); a2v = vec_lda( 0 * sizeof(double), &a2[i*4]); a3v = vec_lda( 0 * sizeof(double), &a3[i*4]); a4v = vec_lda( 0 * sizeof(double), &a4[i*4]); a5v = vec_lda( 0 * sizeof(double), &a5[i*4]); a6v = vec_lda( 0 * sizeof(double), &a6[i*4]); a7v = vec_lda( 0 * sizeof(double), &a7[i*4]); yv = vec_madd( chi0v, a0v, yv ); yv = vec_madd( chi1v, a1v, yv ); yv = vec_madd( chi2v, a2v, yv ); yv = vec_madd( chi3v, a3v, yv ); yv = vec_madd( chi4v, a4v, yv ); yv = vec_madd( chi5v, a5v, yv ); yv = vec_madd( chi6v, a6v, yv ); yv = vec_madd( chi7v, a7v, yv ); vec_sta( yv, 0 * sizeof(double), &y0[i*4]); } for ( dim_t i = 0; i < m_left; ++i ) { y0[4*m_run + i] += chi0 * a0[4*m_run + i] + chi1 * a1[4*m_run + i] + chi2 * a2[4*m_run + i] + chi3 * a3[4*m_run + i] + chi4 * a4[4*m_run + i] + chi5 * a5[4*m_run + i] + chi6 * a6[4*m_run + i] + chi7 * a7[4*m_run + i]; } } blis-0.6.1/kernels/bgq/3/000077500000000000000000000000001360743507500150375ustar00rootroot00000000000000blis-0.6.1/kernels/bgq/3/bli_gemm_bgq_int_8x8.c000066400000000000000000000322541360743507500211760ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef restrict #include #include /* * Here is dgemm kernel for QPX. * Instruction mix was divined by a statement in an email from John Gunnels when asked about the peak performance with a single thread: * "Achievable peak can either be: * 1) 12.8 GF 8 FMAs cycle * 1.6 GHz * 2) 8.53 GF Takes intoo account the instruction mix in DGEMM and the fact that you can only do an FMA or a load/store in a single cycle with just one thread * 3) 7.58 GF (2) + the fact that we can only issue 8 instructions in 9 cycles with one thread" * * Which I have taken to mean: 8.53 GFLOPS implies on average 5.33 flops/cycle. * I know the kernel John uses is 8x8, so 16 flops per loop iteration. * Thus there must be 24 total instructions per iteration because 16/24 = 5.33. * * Here, we have 6 loads per iteration. These are executed on a different pipeline from FMAs so * we could (maybe) theoretically hit 100% of peak with this instruction mix */ void bli_dgemm_bgq_int_8x8 ( dim_t k, double* restrict alpha, double* restrict a, double* restrict b, double* restrict beta, double* restrict c, inc_t rs_c, inc_t cs_c, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //Registers for storing C. //4 4x4 subblocks of C, c00, c01, c10, c11 //4 registers per subblock: a, b, c, d //There is an excel file that details which register ends up storing what vector4double c00a = vec_splats( 0.0 ); vector4double c00b = vec_splats( 0.0 ); vector4double c00c = vec_splats( 0.0 ); vector4double c00d = vec_splats( 0.0 ); vector4double c01a = vec_splats( 0.0 ); vector4double c01b = vec_splats( 0.0 ); vector4double c01c = vec_splats( 0.0 ); vector4double c01d = vec_splats( 0.0 ); vector4double c10a = vec_splats( 0.0 ); vector4double c10b = vec_splats( 0.0 ); vector4double c10c = vec_splats( 0.0 ); vector4double c10d = vec_splats( 0.0 ); vector4double c11a = vec_splats( 0.0 ); vector4double c11b = vec_splats( 0.0 ); vector4double c11c = vec_splats( 0.0 ); vector4double c11d = vec_splats( 0.0 ); vector4double b0a, b1a; vector4double b0b, b1b; vector4double a0, a1; for( dim_t i = 0; i < k; i++ ) { b0a = vec_ld2a( 0 * sizeof(double), &b[8*i] ); b0b = vec_ld2a( 2 * sizeof(double), &b[8*i] ); b1a = vec_ld2a( 4 * sizeof(double), &b[8*i] ); b1b = vec_ld2a( 6 * sizeof(double), &b[8*i] ); a0 = vec_lda ( 0 * sizeof(double), &a[8*i] ); a1 = vec_lda ( 4 * sizeof(double), &a[8*i] ); c00a = vec_xmadd ( b0a, a0, c00a ); c00b = vec_xxmadd( a0, b0a, c00b ); c00c = vec_xmadd ( b0b, a0, c00c ); c00d = vec_xxmadd( a0, b0b, c00d ); c01a = vec_xmadd ( b1a, a0, c01a ); c01b = vec_xxmadd( a0, b1a, c01b ); c01c = vec_xmadd ( b1b, a0, c01c ); c01d = vec_xxmadd( a0, b1b, c01d ); c10a = vec_xmadd ( b0a, a1, c10a ); c10b = vec_xxmadd( a1, b0a, c10b ); c10c = vec_xmadd ( b0b, a1, c10c ); c10d = vec_xxmadd( a1, b0b, c10d ); c11a = vec_xmadd ( b1a, a1, c11a ); c11b = vec_xxmadd( a1, b1a, c11b ); c11c = vec_xmadd ( b1b, a1, c11c ); c11d = vec_xxmadd( a1, b1b, c11d ); } // Create patterns for permuting Cb and Cd vector4double pattern = vec_gpci( 01032 ); vector4double AB; vector4double C = vec_splats( 0.0 ); vector4double betav = vec_lds( 0, ( double* )beta ); vector4double alphav = vec_lds( 0, ( double* )alpha ); double ct; //Macro to update 4 elements of C in a column. //REG is the register holding those 4 elements //ADDR is the address to write them to //OFFSET is the number of rows from ADDR to write to #define UPDATE( REG, ADDR, OFFSET ) \ { \ ct = *(ADDR + (OFFSET + 0) * rs_c); \ C = vec_insert( ct, C, 0 ); \ ct = *(ADDR + (OFFSET + 1) * rs_c); \ C = vec_insert( ct, C, 1 ); \ ct = *(ADDR + (OFFSET + 2) * rs_c); \ C = vec_insert( ct, C, 2 ); \ ct = *(ADDR + (OFFSET + 3) * rs_c); \ C = vec_insert( ct, C, 3 ); \ \ AB = vec_mul( REG, alphav ); \ AB = vec_madd( C, betav, AB); \ \ ct = vec_extract( AB, 0 ); \ *(ADDR + (OFFSET + 0) * rs_c) = ct; \ ct = vec_extract( AB, 1 ); \ *(ADDR + (OFFSET + 1) * rs_c) = ct; \ ct = vec_extract( AB, 2 ); \ *(ADDR + (OFFSET + 2) * rs_c) = ct; \ ct = vec_extract( AB, 3 ); \ *(ADDR + (OFFSET + 3) * rs_c) = ct; \ } //Update c00 and c10 sub-blocks UPDATE( c00a, c, 0 ); UPDATE( c10a, c, 4 ); c = c + cs_c; AB = vec_perm( c00b, c00b, pattern ); UPDATE( AB, c, 0 ); AB = vec_perm( c10b, c10b, pattern ); UPDATE( AB, c, 4 ); c = c + cs_c; UPDATE( c00c, c, 0 ); UPDATE( c10c, c, 4 ); c = c + cs_c; AB = vec_perm( c00d, c00d, pattern ); UPDATE( AB, c, 0 ); AB = vec_perm( c10d, c10d, pattern ); UPDATE( AB, c, 4 ); //Update c01 and c11 sub-blocks c = c + cs_c; UPDATE( c01a, c, 0 ); UPDATE( c11a, c, 4 ); c = c + cs_c; AB = vec_perm( c01b, c01b, pattern ); UPDATE( AB, c, 0 ); AB = vec_perm( c11b, c11b, pattern ); UPDATE( AB, c, 4 ); c = c + cs_c; UPDATE( c01c, c, 0 ); UPDATE( c11c, c, 4 ); c = c + cs_c; AB = vec_perm( c01d, c01d, pattern ); UPDATE( AB, c, 0 ); AB = vec_perm( c11d, c11d, pattern ); UPDATE( AB, c, 4 ); } void printvec(vector4double v) { double a = vec_extract(v, 0); double b = vec_extract(v, 1); double c = vec_extract(v, 2); double d = vec_extract(v, 3); printf("%4.3f\t%4.3f\t%4.3f\t%4.3f\n", a, b, c, d); } void bli_zgemm_bgq_int_4x4 ( dim_t k, dcomplex* restrict alpha, dcomplex* restrict a, dcomplex* restrict b, dcomplex* restrict beta, dcomplex* restrict c, inc_t rs_c, inc_t cs_c, auxinfo_t* restrict data, cntx_t* restrict cntx ) { double* a_d = ( double* )a; double* b_d = ( double* )b; double* c_d = ( double* )c; //Registers for storing C. //2 2x4 subblocks of C, c0, and c1 //Each sub-block has 4 columns, 0, 1, 2, 3 //Each column has 2 partial sum, a and b, and contains 2 complex numbers. vector4double c00a = vec_splats( 0.0 ); vector4double c00b = vec_splats( 0.0 ); vector4double c01a = vec_splats( 0.0 ); vector4double c01b = vec_splats( 0.0 ); vector4double c02a = vec_splats( 0.0 ); vector4double c02b = vec_splats( 0.0 ); vector4double c03a = vec_splats( 0.0 ); vector4double c03b = vec_splats( 0.0 ); vector4double c10a = vec_splats( 0.0 ); vector4double c10b = vec_splats( 0.0 ); vector4double c11a = vec_splats( 0.0 ); vector4double c11b = vec_splats( 0.0 ); vector4double c12a = vec_splats( 0.0 ); vector4double c12b = vec_splats( 0.0 ); vector4double c13a = vec_splats( 0.0 ); vector4double c13b = vec_splats( 0.0 ); vector4double b0, b1, b2, b3; vector4double a0, a1; for( dim_t i = 0; i < k; i++ ) { b0 = vec_ld2a( 0 * sizeof(double), &b_d[8*i] ); b1 = vec_ld2a( 2 * sizeof(double), &b_d[8*i] ); b2 = vec_ld2a( 4 * sizeof(double), &b_d[8*i] ); b3 = vec_ld2a( 6 * sizeof(double), &b_d[8*i] ); a0 = vec_lda ( 0 * sizeof(double), &a_d[8*i] ); a1 = vec_lda ( 4 * sizeof(double), &a_d[8*i] ); c00a = vec_xmadd ( b0, a0, c00a ); c00b = vec_xxcpnmadd( a0, b0, c00b ); c01a = vec_xmadd ( b1, a0, c01a ); c01b = vec_xxcpnmadd( a0, b1, c01b ); c02a = vec_xmadd ( b2, a0, c02a ); c02b = vec_xxcpnmadd( a0, b2, c02b ); c03a = vec_xmadd ( b3, a0, c03a ); c03b = vec_xxcpnmadd( a0, b3, c03b ); c10a = vec_xmadd ( b0, a1, c10a ); c10b = vec_xxcpnmadd( a1, b0, c10b ); c11a = vec_xmadd ( b1, a1, c11a ); c11b = vec_xxcpnmadd( a1, b1, c11b ); c12a = vec_xmadd ( b2, a1, c12a ); c12b = vec_xxcpnmadd( a1, b2, c12b ); c13a = vec_xmadd ( b3, a1, c13a ); c13b = vec_xxcpnmadd( a1, b3, c13b ); } // Create patterns for permuting the "b" parts of each vector vector4double pattern = vec_gpci( 01032 ); vector4double zed = vec_splats( 0.0 ); vector4double AB; vector4double C = vec_splats( 0.0 ); vector4double C1 = vec_splats( 0.0 ); vector4double C2 = vec_splats( 0.0 ); double alphar = bli_zreal( *alpha ); double alphai = bli_zimag( *alpha ); double betar = bli_zreal( *beta ); double betai = bli_zimag( *beta ); vector4double alphav = vec_splats( 0.0 ); vector4double betav = vec_splats( 0.0 ); alphav = vec_insert( alphar, alphav, 0); alphav = vec_insert( alphai, alphav, 1); alphav = vec_insert( alphar, alphav, 2); alphav = vec_insert( alphai, alphav, 3); betav = vec_insert( betar, betav, 0); betav = vec_insert( betai, betav, 1); betav = vec_insert( betar, betav, 2); betav = vec_insert( betai, betav, 3); double ct; //Macro to update 2 elements of C in a column. //REG1 is the register holding the first partial sum of those 2 elements //REG2 is the register holding the second partial sum of those 2 elements //ADDR is the address to write them to //OFFSET is the number of rows from ADDR to write to #define ZUPDATE( REG1, REG2, ADDR, OFFSET ) \ { \ ct = *(ADDR + (OFFSET + 0) * rs_c); \ C = vec_insert( ct, C, 0 ); \ ct = *(ADDR + (OFFSET + 0) * rs_c + 1); \ C = vec_insert( ct, C, 1 ); \ ct = *(ADDR + (OFFSET + 2) * rs_c); \ C = vec_insert( ct, C, 2 ); \ ct = *(ADDR + (OFFSET + 2) * rs_c + 1); \ C = vec_insert( ct, C, 3 ); \ \ AB = vec_sub(REG1, REG2 ); \ \ /* Scale by alpha */ \ REG1 = vec_xmadd( alphav, AB, zed ); \ REG2 = vec_xxcpnmadd( AB, alphav, zed ); \ AB = vec_sub(REG1, REG2 ); \ \ \ /* Scale by beta */ \ REG1 = vec_xmadd( betav, C, zed ); \ REG2 = vec_xxcpnmadd( C, betav, zed ); \ C = vec_sub(REG1, REG2 ); \ \ /* Add AB to C */ \ C = vec_add( AB, C ); \ \ ct = vec_extract( C, 0 ); \ *(ADDR + (OFFSET + 0) * rs_c) = ct; \ ct = vec_extract( C, 1 ); \ *(ADDR + (OFFSET + 0) * rs_c + 1) = ct; \ ct = vec_extract( C, 2 ); \ *(ADDR + (OFFSET + 2) * rs_c) = ct; \ ct = vec_extract( C, 3 ); \ *(ADDR + (OFFSET + 2) * rs_c + 1) = ct; \ } ZUPDATE( c00a, c00b, c_d, 0 ); ZUPDATE( c10a, c10b, c_d, 4 ); c_d += 2*cs_c; ZUPDATE( c01a, c01b, c_d, 0 ); ZUPDATE( c11a, c11b, c_d, 4 ); c_d += 2*cs_c; ZUPDATE( c02a, c02b, c_d, 0 ); ZUPDATE( c12a, c12b, c_d, 4 ); c_d += 2*cs_c; ZUPDATE( c03a, c03b, c_d, 0 ); ZUPDATE( c13a, c13b, c_d, 4 ); } blis-0.6.1/kernels/bgq/bli_kernels_bgq.h000066400000000000000000000035561360743507500202010ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ GEMM_UKR_PROT( double, d, gemm_bgq_int_8x8 ) GEMM_UKR_PROT( dcomplex, z, gemm_bgq_int_4x4 ) AXPYF_KER_PROT( double, d, axpyf_bgq_int ) AXPYV_KER_PROT( double, d, axpyv_bgq_int ) DOTV_KER_PROT( double, d, dotv_bgq_int ) blis-0.6.1/kernels/bulldozer/000077500000000000000000000000001360743507500161265ustar00rootroot00000000000000blis-0.6.1/kernels/bulldozer/3/000077500000000000000000000000001360743507500162705ustar00rootroot00000000000000blis-0.6.1/kernels/bulldozer/3/bli_gemm_bulldozer_asm_d4x6_fma4.c000066400000000000000000002252271360743507500247170ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name of The University of Texas nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define BLIS_ASM_SYNTAX_ATT #include "bli_x86_asm_macros.h" #define GROUP_YMM_BY_4 \ vmovaps(ymm15, ymm7)\ vshufps(imm(0xe4), ymm13, ymm15, ymm15)\ vshufps(imm(0xe4), ymm7, ymm13, ymm13)\ \ vmovaps(ymm11, ymm7)\ vshufps(imm(0xe4), ymm9, ymm11, ymm11)\ vshufps(imm(0xe4), ymm7, ymm9, ymm9)\ \ vmovaps(ymm14, ymm7)\ vshufps(imm(0xe4), ymm12, ymm14, ymm14)\ vshufps(imm(0xe4), ymm7, ymm12, ymm12)\ \ vmovaps(ymm10, ymm7)\ vshufps(imm(0xe4), ymm8, ymm10, ymm10)\ vshufps(imm(0xe4), ymm7, ymm8, ymm8)\ \ vmovaps(ymm15, ymm7)\ vperm2f128(imm(0x12), ymm15, ymm11, ymm15)\ vperm2f128(imm(0x30), ymm7, ymm11, ymm11)\ \ vmovaps(ymm13, ymm7)\ vperm2f128(imm(0x12), ymm13, ymm9, ymm13)\ vperm2f128(imm(0x30), ymm7, ymm9, ymm9)\ \ vmovaps(ymm14, ymm7)\ vperm2f128(imm(0x12), ymm14, ymm10, ymm14)\ vperm2f128(imm(0x30), ymm7, ymm10, ymm10)\ \ vmovaps(ymm12, ymm7)\ vperm2f128(imm(0x12), ymm12, ymm8, ymm12)\ vperm2f128(imm(0x30), ymm7, ymm8, ymm8) #define STORE_SS \ vextractf128(imm(1), ymm0, xmm2)\ vmovss(xmm0, mem(rcx))\ vpermilps(imm(0x39), xmm0, xmm1)\ vmovss(xmm1, mem(rcx, rsi, 1))\ vpermilps(imm(0x39), xmm1, xmm0)\ vmovss(xmm0, mem(rcx, r12, 1))\ vpermilps(imm(0x39), xmm0, xmm1)\ vmovss(xmm1, mem(rcx, r13, 1))\ vmovss(xmm2, mem(rdx))\ vpermilps(imm(0x39), xmm2, xmm3)\ vmovss(xmm3, mem(rdx, rsi, 1))\ vpermilps(imm(0x39), xmm3, xmm2)\ vmovss(xmm2, mem(rdx, r12, 1))\ vpermilps(imm(0x39), xmm2, xmm3)\ vmovss(xmm3, mem(rdx, r13, 1))\ void bli_sgemm_bulldozer_asm_8x8_fma4 ( dim_t k0, float* restrict alpha, float* restrict a, float* restrict b, float* restrict beta, float* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; begin_asm() mov(var(a), rax) // load address of a. mov(var(b), rbx) // load address of b. vmovaps(mem(rax, 0*32), ymm0) // initialize loop by pre-loading vmovsldup(mem(rbx, 0*32), ymm2) // elements of a and b. vpermilps(imm(0x4e), ymm2, ymm3) mov(var(c), rcx) // load address of c mov(var(cs_c), rdi) // load cs_c lea(mem(, rdi, 4), rdi) // cs_c *= sizeof(float) lea(mem(rcx, rdi, 4), r10) // load address of c + 4*cs_c; lea(mem(rdi, rdi, 2), r14) // r14 = 3*cs_c; prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*cs_c prefetch(0, mem(rcx, rdi, 1, 7*8)) // prefetch c + 1*cs_c prefetch(0, mem(rcx, rdi, 2, 7*8)) // prefetch c + 2*cs_c prefetch(0, mem(rcx, r14, 1, 7*8)) // prefetch c + 3*cs_c prefetch(0, mem(r10, 7*8)) // prefetch c + 4*cs_c prefetch(0, mem(r10, rdi, 1, 7*8)) // prefetch c + 5*cs_c prefetch(0, mem(r10, rdi, 2, 7*8)) // prefetch c + 6*cs_c prefetch(0, mem(r10, r14, 1, 7*8)) // prefetch c + 7*cs_c vxorps(ymm8, ymm8, ymm8) vxorps(ymm9, ymm9, ymm9) vxorps(ymm10, ymm10, ymm10) vxorps(ymm11, ymm11, ymm11) vxorps(ymm12, ymm12, ymm12) vxorps(ymm13, ymm13, ymm13) vxorps(ymm14, ymm14, ymm14) vxorps(ymm15, ymm15, ymm15) mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.SCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.SLOOPKITER) // MAIN LOOP // iteration 0 prefetch(0, mem(rax, 16*32)) vfmaddps(ymm15, ymm0, ymm2, ymm15) vperm2f128(imm(0x03), ymm2, ymm2, ymm4) vmovshdup(mem(rbx, 0*32), ymm2) vfmaddps(ymm13, ymm0, ymm3, ymm13) vperm2f128(imm(0x03), ymm3, ymm3, ymm5) vmovaps(mem(rax, 1*32), ymm1) vpermilps(imm(0x4e), ymm2, ymm3) vfmaddps(ymm11, ymm0, ymm4, ymm11) vfmaddps(ymm9, ymm0, ymm5, ymm9) vfmaddps(ymm14, ymm0, ymm2, ymm14) vperm2f128(imm(0x03), ymm2, ymm2, ymm4) vmovsldup(mem(rbx, 1*32), ymm2) vfmaddps(ymm12, ymm0, ymm3, ymm12) vperm2f128(imm(0x03), ymm3, ymm3, ymm5) vpermilps(imm(0x4e), ymm2, ymm3) vfmaddps(ymm10, ymm0, ymm4, ymm10) vfmaddps(ymm8, ymm0, ymm5, ymm8) // iteration 1 vfmaddps(ymm15, ymm1, ymm2, ymm15) vperm2f128(imm(0x03), ymm2, ymm2, ymm4) vmovshdup(mem(rbx, 1*32), ymm2) vfmaddps(ymm13, ymm1, ymm3, ymm13) vperm2f128(imm(0x03), ymm3, ymm3, ymm5) vmovaps(mem(rax, 2*32), ymm0) vpermilps(imm(0x4e), ymm2, ymm3) vfmaddps(ymm11, ymm1, ymm4, ymm11) vfmaddps(ymm9, ymm1, ymm5, ymm9) vfmaddps(ymm14, ymm1, ymm2, ymm14) vperm2f128(imm(0x03), ymm2, ymm2, ymm4) vmovsldup(mem(rbx, 2*32), ymm2) vfmaddps(ymm12, ymm1, ymm3, ymm12) vperm2f128(imm(0x03), ymm3, ymm3, ymm5) vpermilps(imm(0x4e), ymm2, ymm3) vfmaddps(ymm10, ymm1, ymm4, ymm10) vfmaddps(ymm8, ymm1, ymm5, ymm8) // iteration 2 prefetch(0, mem(rax, 18*32)) vfmaddps(ymm15, ymm0, ymm2, ymm15) vperm2f128(imm(0x03), ymm2, ymm2, ymm4) vmovshdup(mem(rbx, 2*32), ymm2) vfmaddps(ymm13, ymm0, ymm3, ymm13) vperm2f128(imm(0x03), ymm3, ymm3, ymm5) vmovaps(mem(rax, 3*32), ymm1) add(imm(4*8*4), rax) // a += 4*8 (unroll x mr) vpermilps(imm(0x4e), ymm2, ymm3) vfmaddps(ymm11, ymm0, ymm4, ymm11) vfmaddps(ymm9, ymm0, ymm5, ymm9) vfmaddps(ymm14, ymm0, ymm2, ymm14) vperm2f128(imm(0x03), ymm2, ymm2, ymm4) vmovsldup(mem(rbx, 3*32), ymm2) vfmaddps(ymm12, ymm0, ymm3, ymm12) vperm2f128(imm(0x03), ymm3, ymm3, ymm5) vpermilps(imm(0x4e), ymm2, ymm3) vfmaddps(ymm10, ymm0, ymm4, ymm10) vfmaddps(ymm8, ymm0, ymm5, ymm8) // iteration 3 vfmaddps(ymm15, ymm1, ymm2, ymm15) vperm2f128(imm(0x03), ymm2, ymm2, ymm4) vmovshdup(mem(rbx, 3*32), ymm2) add(imm(4*8*4), rbx) // b += 4*8 (unroll x nr) vfmaddps(ymm13, ymm1, ymm3, ymm13) vperm2f128(imm(0x03), ymm3, ymm3, ymm5) vmovaps(mem(rax, 0*32), ymm0) vpermilps(imm(0x4e), ymm2, ymm3) vfmaddps(ymm11, ymm1, ymm4, ymm11) vfmaddps(ymm9, ymm1, ymm5, ymm9) vfmaddps(ymm14, ymm1, ymm2, ymm14) vperm2f128(imm(0x03), ymm2, ymm2, ymm4) vmovsldup(mem(rbx, 0*32), ymm2) vfmaddps(ymm12, ymm1, ymm3, ymm12) vperm2f128(imm(0x03), ymm3, ymm3, ymm5) vpermilps(imm(0x4e), ymm2, ymm3) vfmaddps(ymm10, ymm1, ymm4, ymm10) vfmaddps(ymm8, ymm1, ymm5, ymm8) dec(rsi) // i -= 1; jne(.SLOOPKITER) // iterate again if i != 0. label(.SCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.SPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.SLOOPKLEFT) // EDGE LOOP prefetch(0, mem(rax, 16*32)) vfmaddps(ymm15, ymm0, ymm2, ymm15) vperm2f128(imm(0x3), ymm2, ymm2, ymm4) vmovshdup(mem(rbx, 0*32), ymm2) vfmaddps(ymm13, ymm0, ymm3, ymm13) vperm2f128(imm(0x3), ymm3, ymm3, ymm5) vmovaps(mem(rax, 1*32), ymm1) add(imm(8*1*4), rax) // a += 8 (1 x mr) vpermilps(imm(0x4e), ymm2, ymm3) vfmaddps(ymm11, ymm0, ymm4, ymm11) vfmaddps(ymm9, ymm0, ymm5, ymm9) vfmaddps(ymm14, ymm0, ymm2, ymm14) vperm2f128(imm(0x3), ymm2, ymm2, ymm4) vmovsldup(mem(rbx, 1*32), ymm2) add(imm(8*1*4), rbx) // b += 8 (1 x nr) vfmaddps(ymm12, ymm0, ymm3, ymm12) vperm2f128(imm(0x3), ymm3, ymm3, ymm5) vpermilps(imm(0x4e), ymm2, ymm3) vfmaddps(ymm10, ymm0, ymm4, ymm10) vfmaddps(ymm8, ymm0, ymm5, ymm8) vmovaps(ymm1, ymm0) dec(rsi) // i -= 1; jne(.SLOOPKLEFT) // iterate again if i != 0. label(.SPOSTACCUM) // ymm15: ymm13: ymm11: ymm9: // ( ab00 ( ab02 ( ab04 ( ab06 // ab10 ab12 ab14 ab16 // ab22 ab20 ab26 ab24 // ab32 ab30 ab36 ab34 // ab44 ab46 ab40 ab42 // ab54 ab56 ab50 ab52 // ab66 ab64 ab62 ab60 // ab76 ) ab74 ) ab72 ) ab70 ) // ymm14: ymm12: ymm10: ymm8: // ( ab01 ( ab03 ( ab05 ( ab07 // ab11 ab13 ab15 ab17 // ab23 ab21 ab27 ab25 // ab33 ab31 ab37 ab35 // ab45 ab47 ab41 ab43 // ab55 ab57 ab51 ab53 // ab67 ab65 ab63 ab61 // ab77 ) ab75 ) ab73 ) ab71 ) GROUP_YMM_BY_4 // ymm15: ymm13: ymm11: ymm9: // ( ab00 ( ab02 ( ab04 ( ab06 // ab10 ab12 ab14 ab16 // ab20 ab22 ab24 ab26 // ab30 ab32 ab34 ab36 // ab44 ab46 ab40 ab42 // ab54 ab56 ab50 ab52 // ab64 ab66 ab60 ab62 // ab74 ) ab76 ) ab70 ) ab72 ) // ymm14: ymm12: ymm10: ymm8: // ( ab01 ( ab03 ( ab05 ( ab07 // ab11 ab13 ab15 ab17 // ab21 ab23 ab25 ab27 // ab31 ab33 ab35 ab37 // ab45 ab47 ab41 ab43 // ab55 ab57 ab51 ab53 // ab65 ab67 ab61 ab63 // ab75 ) ab77 ) ab71 ) ab73 ) // ymm15: ymm13: ymm11: ymm9: // ( ab00 ( ab02 ( ab04 ( ab06 // ab10 ab12 ab14 ab16 // ab20 ab22 ab24 ab26 // ab30 ab32 ab34 ab36 // ab40 ab42 ab44 ab46 // ab50 ab52 ab54 ab56 // ab60 ab62 ab64 ab66 // ab70 ) ab72 ) ab74 ) ab76 ) // ymm14: ymm12: ymm10: ymm8: // ( ab01 ( ab03 ( ab05 ( ab07 // ab11 ab13 ab15 ab17 // ab21 ab23 ab25 ab27 // ab31 ab33 ab35 ab37 // ab41 ab43 ab45 ab47 // ab51 ab53 ab55 ab57 // ab61 ab63 ab65 ab67 // ab71 ) ab73 ) ab75 ) ab77 ) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastss(mem(rax), ymm0) // load alpha and duplicate vbroadcastss(mem(rbx), ymm4) // load beta and duplicate vmulps(ymm0, ymm8, ymm8) // scale by alpha vmulps(ymm0, ymm9, ymm9) vmulps(ymm0, ymm10, ymm10) vmulps(ymm0, ymm11, ymm11) vmulps(ymm0, ymm12, ymm12) vmulps(ymm0, ymm13, ymm13) vmulps(ymm0, ymm14, ymm14) vmulps(ymm0, ymm15, ymm15) mov(var(rs_c), rsi) // load rs_c lea(mem(, rsi, 4), rsi) // rsi = rs_c * sizeof(float) lea(mem(rcx, rsi, 4), rdx) // load address of c + 4*rs_c; lea(mem(, rsi, 2), r12) // r12 = 2*rs_c; lea(mem(r12, rsi, 1), r13) // r13 = 3*rs_c; // determine if // c % 32 == 0, AND // 4*cs_c % 32 == 0, AND // rs_c == 1 // ie: aligned, ldim aligned, and // column-stored cmp(imm(4), rsi) // set ZF if (4*rs_c) == 4. sete(bl) // bl = ( ZF == 1 ? 1 : 0 ); test(imm(31), rcx) // set ZF if c & 32 is zero. setz(bh) // bh = ( ZF == 0 ? 1 : 0 ); test(imm(31), rdi) // set ZF if (4*cs_c) & 32 is zero. setz(al) // al = ( ZF == 0 ? 1 : 0 ); // and(bl,bh) followed by // and(bh,al) will reveal result // now avoid loading C if beta == 0 vxorps(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomiss(xmm0, xmm4) // set ZF if beta == 0. je(.SBETAZERO) // if ZF = 1, jump to beta == 0 case // check if aligned/column-stored and(bl, bh) // set ZF if bl & bh == 1. and(bh, al) // set ZF if bh & al == 1. jne(.SCOLSTORED) // jump to column storage case label(.SGENSTORED) // update c00:c70 vmovlps(mem(rcx), xmm0, xmm0) vmovhps(mem(rcx, rsi, 1), xmm0, xmm0) vmovlps(mem(rcx, r12, 1), xmm1, xmm1) vmovhps(mem(rcx, r13, 1), xmm1, xmm1) vshufps(imm(0x88), xmm1, xmm0, xmm0) vmovlps(mem(rdx), xmm2, xmm2) vmovhps(mem(rdx, rsi, 1), xmm2, xmm2) vmovlps(mem(rdx, r12, 1), xmm3, xmm3) vmovhps(mem(rdx, r13, 1), xmm3, xmm3) vshufps(imm(0x88), xmm3, xmm2, xmm2) vperm2f128(imm(0x20), ymm2, ymm0, ymm0) vfmaddps(ymm15, ymm0, ymm4, ymm0) // scale by beta and add the gemm result, STORE_SS add(rdi, rcx) // c += cs_c; add(rdi, rdx) // c += cs_c; // update c01:c71 vmovlps(mem(rcx), xmm0, xmm0) vmovhps(mem(rcx, rsi, 1), xmm0, xmm0) vmovlps(mem(rcx, r12, 1), xmm1, xmm1) vmovhps(mem(rcx, r13, 1), xmm1, xmm1) vshufps(imm(0x88), xmm1, xmm0, xmm0) vmovlps(mem(rdx), xmm2, xmm2) vmovhps(mem(rdx, rsi, 1), xmm2, xmm2) vmovlps(mem(rdx, r12, 1), xmm3, xmm3) vmovhps(mem(rdx, r13, 1), xmm3, xmm3) vshufps(imm(0x88), xmm3, xmm2, xmm2) vperm2f128(imm(0x20), ymm2, ymm0, ymm0) // vmulps(ymm4, ymm0, ymm0) // scale by beta, // vaddps(ymm14, ymm0, ymm0) // add the gemm result, vfmaddps(ymm14, ymm0, ymm4, ymm0) // scale by beta and add the gemm result, STORE_SS add(rdi, rcx) // c += cs_c; add(rdi, rdx) // c += cs_c; // update c02:c72 vmovlps(mem(rcx), xmm0, xmm0) vmovhps(mem(rcx, rsi, 1), xmm0, xmm0) vmovlps(mem(rcx, r12, 1), xmm1, xmm1) vmovhps(mem(rcx, r13, 1), xmm1, xmm1) vshufps(imm(0x88), xmm1, xmm0, xmm0) vmovlps(mem(rdx), xmm2, xmm2) vmovhps(mem(rdx, rsi, 1), xmm2, xmm2) vmovlps(mem(rdx, r12, 1), xmm3, xmm3) vmovhps(mem(rdx, r13, 1), xmm3, xmm3) vshufps(imm(0x88), xmm3, xmm2, xmm2) vperm2f128(imm(0x20), ymm2, ymm0, ymm0) // vmulps(ymm4, ymm0, ymm0) // scale by beta, // vaddps(ymm13, ymm0, ymm0) // add the gemm result, vfmaddps(ymm13, ymm0, ymm4, ymm0) // scale by beta and add the gemm result, STORE_SS add(rdi, rcx) // c += cs_c; add(rdi, rdx) // c += cs_c; // update c03:c73 vmovlps(mem(rcx), xmm0, xmm0) vmovhps(mem(rcx, rsi, 1), xmm0, xmm0) vmovlps(mem(rcx, r12, 1), xmm1, xmm1) vmovhps(mem(rcx, r13, 1), xmm1, xmm1) vshufps(imm(0x88), xmm1, xmm0, xmm0) vmovlps(mem(rdx), xmm2, xmm2) vmovhps(mem(rdx, rsi, 1), xmm2, xmm2) vmovlps(mem(rdx, r12, 1), xmm3, xmm3) vmovhps(mem(rdx, r13, 1), xmm3, xmm3) vshufps(imm(0x88), xmm3, xmm2, xmm2) vperm2f128(imm(0x20), ymm2, ymm0, ymm0) // vmulps(ymm4, ymm0, ymm0) // scale by beta, // vaddps(ymm12, ymm0, ymm0) // add the gemm result, vfmaddps(ymm12, ymm0, ymm4, ymm0) // scale by beta and add the gemm result, STORE_SS add(rdi, rcx) // c += cs_c; add(rdi, rdx) // c += cs_c; // update c04:c74 vmovlps(mem(rcx), xmm0, xmm0) vmovhps(mem(rcx, rsi, 1), xmm0, xmm0) vmovlps(mem(rcx, r12, 1), xmm1, xmm1) vmovhps(mem(rcx, r13, 1), xmm1, xmm1) vshufps(imm(0x88), xmm1, xmm0, xmm0) vmovlps(mem(rdx), xmm2, xmm2) vmovhps(mem(rdx, rsi, 1), xmm2, xmm2) vmovlps(mem(rdx, r12, 1), xmm3, xmm3) vmovhps(mem(rdx, r13, 1), xmm3, xmm3) vshufps(imm(0x88), xmm3, xmm2, xmm2) vperm2f128(imm(0x20), ymm2, ymm0, ymm0) // vmulps(ymm4, ymm0, ymm0) // scale by beta, // vaddps(ymm11, ymm0, ymm0) // add the gemm result, vfmaddps(ymm11, ymm0, ymm4, ymm0) // scale by beta and add the gemm result, STORE_SS add(rdi, rcx) // c += cs_c; add(rdi, rdx) // c += cs_c; // update c05:c75 vmovlps(mem(rcx), xmm0, xmm0) vmovhps(mem(rcx, rsi, 1), xmm0, xmm0) vmovlps(mem(rcx, r12, 1), xmm1, xmm1) vmovhps(mem(rcx, r13, 1), xmm1, xmm1) vshufps(imm(0x88), xmm1, xmm0, xmm0) vmovlps(mem(rdx), xmm2, xmm2) vmovhps(mem(rdx, rsi, 1), xmm2, xmm2) vmovlps(mem(rdx, r12, 1), xmm3, xmm3) vmovhps(mem(rdx, r13, 1), xmm3, xmm3) vshufps(imm(0x88), xmm3, xmm2, xmm2) vperm2f128(imm(0x20), ymm2, ymm0, ymm0) // vmulps(ymm4, ymm0, ymm0) // scale by beta, // vaddps(ymm10, ymm0, ymm0) // add the gemm result, vfmaddps(ymm10, ymm0, ymm4, ymm0) // scale by beta and add the gemm result, STORE_SS add(rdi, rcx) // c += cs_c; add(rdi, rdx) // c += cs_c; // update c06:c76 vmovlps(mem(rcx), xmm0, xmm0) vmovhps(mem(rcx, rsi, 1), xmm0, xmm0) vmovlps(mem(rcx, r12, 1), xmm1, xmm1) vmovhps(mem(rcx, r13, 1), xmm1, xmm1) vshufps(imm(0x88), xmm1, xmm0, xmm0) vmovlps(mem(rdx), xmm2, xmm2) vmovhps(mem(rdx, rsi, 1), xmm2, xmm2) vmovlps(mem(rdx, r12, 1), xmm3, xmm3) vmovhps(mem(rdx, r13, 1), xmm3, xmm3) vshufps(imm(0x88), xmm3, xmm2, xmm2) vperm2f128(imm(0x20), ymm2, ymm0, ymm0) // vmulps(ymm4, ymm0, ymm0) // scale by beta, // vaddps(ymm9, ymm0, ymm0) // add the gemm result, vfmaddps(ymm9, ymm0, ymm4, ymm0) // scale by beta and add the gemm result, STORE_SS add(rdi, rcx) // c += cs_c; add(rdi, rdx) // c += cs_c; // update c07:c77 vmovlps(mem(rcx), xmm0, xmm0) vmovhps(mem(rcx, rsi, 1), xmm0, xmm0) vmovlps(mem(rcx, r12, 1), xmm1, xmm1) vmovhps(mem(rcx, r13, 1), xmm1, xmm1) vshufps(imm(0x88), xmm1, xmm0, xmm0) vmovlps(mem(rdx), xmm2, xmm2) vmovhps(mem(rdx, rsi, 1), xmm2, xmm2) vmovlps(mem(rdx, r12, 1), xmm3, xmm3) vmovhps(mem(rdx, r13, 1), xmm3, xmm3) vshufps(imm(0x88), xmm3, xmm2, xmm2) vperm2f128(imm(0x20), ymm2, ymm0, ymm0) // vmulps(ymm4, ymm0, ymm0) // scale by beta, // vaddps(ymm8, ymm0, ymm0) // add the gemm result, vfmaddps(ymm8, ymm0, ymm4, ymm0) // scale by beta and add the gemm result, STORE_SS jmp(.SDONE) // jump to end. label(.SCOLSTORED) vmovaps(mem(rcx), ymm0) // load c00:c70, // vmulps(ymm4, ymm0, ymm0) // scale by beta, // vaddps(ymm15, ymm0, ymm0) // add the gemm result, vfmaddps(ymm15, ymm0, ymm4, ymm0) // scale by beta and add the gemm result, vmovaps(ymm0, mem(rcx)) // and store back to memory. add(rdi, rcx) // c += cs_c; vmovaps(mem(rcx), ymm1) // load c01:c71, // vmulps(ymm4, ymm1, ymm1) // scale by beta, // vaddps(ymm14, ymm1, ymm1) // add the gemm result, vfmaddps(ymm14, ymm1, ymm4, ymm1) // scale by beta and add the gemm result, vmovaps(ymm1, mem(rcx)) // and store back to memory. add(rdi, rcx) // c += cs_c; vmovaps(mem(rcx), ymm0) // load c02:c72, // vmulps(ymm4, ymm0, ymm0) // scale by beta, // vaddps(ymm13, ymm0, ymm0) // add the gemm result, vfmaddps(ymm13, ymm0, ymm4, ymm0) // scale by beta and add the gemm result, vmovaps(ymm0, mem(rcx)) // and store back to memory. add(rdi, rcx) // c += cs_c; vmovaps(mem(rcx), ymm1) // load c03:c73, // vmulps(ymm4, ymm1, ymm1) // scale by beta, // vaddps(ymm12, ymm1, ymm1) // add the gemm result, vfmaddps(ymm12, ymm1, ymm4, ymm1) // scale by beta and add the gemm result, vmovaps(ymm1, mem(rcx)) // and store back to memory. add(rdi, rcx) // c += cs_c; vmovaps(mem(rcx), ymm0) // load c04:c74, // vmulps(ymm4, ymm0, ymm0) // scale by beta, // vaddps(ymm11, ymm0, ymm0) // add the gemm result, vfmaddps(ymm11, ymm0, ymm4, ymm0) // scale by beta and add the gemm result, vmovaps(ymm0, mem(rcx)) // and store back to memory. add(rdi, rcx) // c += cs_c; vmovaps(mem(rcx), ymm1) // load c05:c75, // vmulps(ymm4, ymm1, ymm1) // scale by beta, // vaddps(ymm10, ymm1, ymm1) // add the gemm result, vfmaddps(ymm10, ymm1, ymm4, ymm1) // scale by beta and add the gemm result, vmovaps(ymm1, mem(rcx)) // and store back to memory. add(rdi, rcx) // c += cs_c; vmovaps(mem(rcx), ymm0) // load c06:c76, // vmulps(ymm4, ymm0, ymm0) // scale by beta, // vaddps(ymm9, ymm0, ymm0) // add the gemm result, vfmaddps(ymm9, ymm0, ymm4, ymm0) // scale by beta and add the gemm result, vmovaps(ymm0, mem(rcx)) // and store back to memory. add(rdi, rcx) // c += cs_c; vmovaps(mem(rcx), ymm1) // load c07:c77, // vmulps(ymm4, ymm1, ymm1) // scale by beta, // vaddps(ymm8, ymm1, ymm1) // add the gemm result, vfmaddps(ymm8, ymm1, ymm4, ymm1) // scale by beta and add the gemm result, vmovaps(ymm1, mem(rcx)) // and store back to memory. jmp(.SDONE) // jump to end. label(.SBETAZERO) // check if aligned/column-stored and(bl, bh) // set ZF if bl & bh == 1. and(bh, al) // set ZF if bh & al == 1. jne(.SCOLSTORBZ) // jump to column storage case label(.SGENSTORBZ) // update c00:c70 vmovapd(ymm15, ymm0) STORE_SS add(rdi, rcx) // c += cs_c; add(rdi, rdx) // c += cs_c; // update c01:c71 vmovapd(ymm14, ymm0) STORE_SS add(rdi, rcx) // c += cs_c; add(rdi, rdx) // c += cs_c; // update c02:c72 vmovapd(ymm13, ymm0) STORE_SS add(rdi, rcx) // c += cs_c; add(rdi, rdx) // c += cs_c; // update c03:c73 vmovapd(ymm12, ymm0) STORE_SS add(rdi, rcx) // c += cs_c; add(rdi, rdx) // c += cs_c; // update c04:c74 vmovapd(ymm11, ymm0) STORE_SS add(rdi, rcx) // c += cs_c; add(rdi, rdx) // c += cs_c; // update c05:c75 vmovapd(ymm10, ymm0) STORE_SS add(rdi, rcx) // c += cs_c; add(rdi, rdx) // c += cs_c; // update c06:c76 vmovapd(ymm9, ymm0) STORE_SS add(rdi, rcx) // c += cs_c; add(rdi, rdx) // c += cs_c; // update c07:c77 vmovapd(ymm8, ymm0) STORE_SS jmp(.SDONE) // jump to end. label(.SCOLSTORBZ) vmovaps(ymm15, mem(rcx)) // and store back to memory. add(rdi, rcx) // c += cs_c; vmovaps(ymm14, mem(rcx)) // and store back to memory. add(rdi, rcx) // c += cs_c; vmovaps(ymm13, mem(rcx)) // and store back to memory. add(rdi, rcx) // c += cs_c; vmovaps(ymm12, mem(rcx)) // and store back to memory. add(rdi, rcx) // c += cs_c; vmovaps(ymm11, mem(rcx)) // and store back to memory. add(rdi, rcx) // c += cs_c; vmovaps(ymm10, mem(rcx)) // and store back to memory. add(rdi, rcx) // c += cs_c; vmovaps(ymm9, mem(rcx)) // and store back to memory. add(rdi, rcx) // c += cs_c; vmovaps(ymm8, mem(rcx)) // and store back to memory. label(.SDONE) end_asm( : // output operands (none) : // input operands [k_iter] "m" (k_iter), // 0 [k_left] "m" (k_left), // 1 [a] "m" (a), // 2 [b] "m" (b), // 3 [alpha] "m" (alpha), // 4 [beta] "m" (beta), // 5 [c] "m" (c), // 6 [rs_c] "m" (rs_c), // 7 [cs_c] "m" (cs_c)/*, // 8 [b_next] "m" (b_next), // 9 [a_next] "m" (a_next)*/ // 10 : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } #undef KERNEL4x6_1 #undef KERNEL4x6_2 #undef KERNEL4x6_3 #undef KERNEL4x6_4 #define KERNEL4x6_1(xx) \ ALIGN4\ vmovddup(mem(rax, -8*8), xmm0)\ vfmaddpd(xmm4, xmm1, xmm0, xmm4)\ vfmaddpd(xmm5, xmm2, xmm0, xmm5)\ vfmaddpd(xmm6, xmm3, xmm0, xmm6)\ vmovddup(mem(rax, -7*8), xmm0)\ vfmaddpd(xmm7, xmm1, xmm0, xmm7)\ prefetch(0, mem(rax, 128))\ vfmaddpd(xmm8, xmm2, xmm0, xmm8)\ vfmaddpd(xmm9, xmm3, xmm0, xmm9)\ vmovddup(mem(rax, -6*8), xmm0)\ vfmaddpd(xmm10, xmm1, xmm0, xmm10)\ vfmaddpd(xmm11, xmm2, xmm0, xmm11)\ vfmaddpd(xmm12, xmm3, xmm0, xmm12)\ vmovddup(mem(rax, -5*8), xmm0)\ vfmaddpd(xmm13, xmm1, xmm0, xmm13)\ vmovaps(mem(rbx, -6*8), xmm1)\ vfmaddpd(xmm14, xmm2, xmm0, xmm14)\ vmovaps(mem(rbx, -4*8), xmm2)\ vfmaddpd(xmm15, xmm3, xmm0, xmm15)\ vmovaps(mem(rbx, -2*8), xmm3) #define KERNEL4x6_2(xx) \ vmovddup(mem(rax, -4*8), xmm0)\ vfmaddpd(xmm4, xmm1, xmm0, xmm4)\ prefetch(0, mem(rax, 192))\ vfmaddpd(xmm5, xmm2, xmm0, xmm5)\ vfmaddpd(xmm6, xmm3, xmm0, xmm6)\ vmovddup(mem(rax, -3*8), xmm0)\ vfmaddpd(xmm7, xmm1, xmm0, xmm7)\ vfmaddpd(xmm8, xmm2, xmm0, xmm8)\ vfmaddpd(xmm9, xmm3, xmm0, xmm9)\ vmovddup(mem(rax, -2*8), xmm0)\ vfmaddpd(xmm10, xmm1, xmm0, xmm10)\ vfmaddpd(xmm11, xmm2, xmm0, xmm11)\ vfmaddpd(xmm12, xmm3, xmm0, xmm12)\ vmovddup(mem(rax, -1*8), xmm0)\ vfmaddpd(xmm13, xmm1, xmm0, xmm13)\ vmovaps(mem(rbx, 0*8), xmm1)\ vfmaddpd(xmm14, xmm2, xmm0, xmm14)\ vmovaps(mem(rbx, 2*8), xmm2)\ vfmaddpd(xmm15, xmm3, xmm0, xmm15)\ vmovaps(mem(rbx, 4*8), xmm3)\ #define KERNEL4x6_3(xx) \ vmovddup(mem(rax, 0*8), xmm0)\ vfmaddpd(xmm4, xmm1, xmm0, xmm4)\ vfmaddpd(xmm5, xmm2, xmm0, xmm5)\ vfmaddpd(xmm6, xmm3, xmm0, xmm6)\ vmovddup(mem(rax, 1*8), xmm0)\ vfmaddpd(xmm7, xmm1, xmm0, xmm7)\ prefetch(0, mem(rax, 224))\ vfmaddpd(xmm8, xmm2, xmm0, xmm8)\ vfmaddpd(xmm9, xmm3, xmm0, xmm9)\ vmovddup(mem(rax, 2*8), xmm0)\ vfmaddpd(xmm10, xmm1, xmm0, xmm10)\ vfmaddpd(xmm11, xmm2, xmm0, xmm11)\ vfmaddpd(xmm12, xmm3, xmm0, xmm12)\ vmovddup(mem(rax, 3*8), xmm0)\ vfmaddpd(xmm13, xmm1, xmm0, xmm13)\ vmovaps(mem(rbx, 6*8), xmm1)\ vfmaddpd(xmm14, xmm2, xmm0, xmm14)\ vmovaps(mem(rbx, 8*8), xmm2)\ vfmaddpd(xmm15, xmm3, xmm0, xmm15)\ vmovaps(mem(rbx, 10*8), xmm3) #define KERNEL4x6_4(xx) \ vmovddup(mem(rax, 4*8), xmm0)\ vfmaddpd(xmm4, xmm1, xmm0, xmm4)\ prefetch(0, mem(rax, 224))\ vfmaddpd(xmm5, xmm2, xmm0, xmm5)\ vfmaddpd(xmm6, xmm3, xmm0, xmm6)\ vmovddup(mem(rax, 5*8), xmm0)\ vfmaddpd(xmm7, xmm1, xmm0, xmm7)\ vfmaddpd(xmm8, xmm2, xmm0, xmm8)\ vfmaddpd(xmm9, xmm3, xmm0, xmm9)\ vmovddup(mem(rax, 6*8), xmm0)\ vfmaddpd(xmm10, xmm1, xmm0, xmm10)\ vfmaddpd(xmm11, xmm2, xmm0, xmm11)\ vfmaddpd(xmm12, xmm3, xmm0, xmm12)\ vmovddup(mem(rax, 7*8), xmm0)\ vfmaddpd(xmm13, xmm1, xmm0, xmm13)\ vmovaps(mem(rbx, 12*8), xmm1)\ vfmaddpd(xmm14, xmm2, xmm0, xmm14)\ vmovaps(mem(rbx, 14*8), xmm2)\ vfmaddpd(xmm15, xmm3, xmm0, xmm15)\ add(imm(16*8), rax)\ vmovaps(mem(rbx, 16*8), xmm3)\ add(imm(24*8), rbx) void bli_dgemm_bulldozer_asm_4x6_fma4 ( dim_t k0, double* restrict alpha, double* restrict a, double* restrict b, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 12; uint64_t k_left = k0 % 12; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; begin_asm() vzeroall() mov(var(b), rbx) // load address of b. mov(var(a), rax) // load address of a. prefetch(0, mem(rax, 64)) vmovaps(mem(rbx, 0*8), xmm1) vmovaps(mem(rbx, 2*8), xmm2) vmovaps(mem(rbx, 4*8), xmm3) add(imm(12*8), rbx) add(imm(8*8), rax) mov(var(k_iter), rsi) // i = k_iter; notice var(k_iter) not $0 test(rsi, rsi) je(.CONSIDERKLEFT) ALIGN32 label(.LOOPKITER) // MAIN LOOP KERNEL4x6_1(xx) KERNEL4x6_2(xx) KERNEL4x6_3(xx) KERNEL4x6_4(xx) KERNEL4x6_1(xx) KERNEL4x6_2(xx) KERNEL4x6_3(xx) KERNEL4x6_4(xx) KERNEL4x6_1(xx) KERNEL4x6_2(xx) KERNEL4x6_3(xx) KERNEL4x6_4(xx) dec(rsi) jne(.LOOPKITER) label(.CONSIDERKLEFT) mov(var(k_left), rsi) test(rsi, rsi) label(.LOOPKLEFT) je(.POSTACCUM) KERNEL4x6_1(xx) add(imm(6*8), rbx) add(imm(4*8), rax) dec(rsi) jmp(.LOOPKLEFT) // iterate again if i != 0. label(.POSTACCUM) mov(var(rs_c), rsi) // load cs_c mov(var(cs_c), rdi) // load rs_c vmovddup(mem(var(alpha)), xmm2) //load alpha vmovddup(mem(var(beta)), xmm3) //load beta mov(var(c), rcx) // load address of c sal(imm(3), rsi) // cs_c *= sizeof(double) sal(imm(3), rdi) // rs_c *= sizeof(double) lea(mem(rcx, rdi, 2), rdx) vmovlpd(mem(rcx), xmm0, xmm0) vmovlpd(mem(rdx), xmm1, xmm1) vmovhpd(mem(rcx, rdi, 1), xmm0, xmm0) vmovhpd(mem(rdx, rdi, 1), xmm1, xmm1) lea(mem(rdx, rdi, 2), r8) vmulpd(xmm2, xmm4, xmm4) // scale by alpha, vmulpd(xmm2, xmm5, xmm5) // scale by alpha, vfmaddpd(xmm4, xmm0, xmm3, xmm4) // scale by beta, and add the gemm result vmovlpd(mem(r8), xmm0, xmm0) vfmaddpd(xmm5, xmm1, xmm3, xmm5) // scale by beta, and add the gemm result vmovhpd(mem(r8, rdi, 1), xmm0, xmm0) vmovlpd(xmm4, mem(rcx)) // and store back to memory. vmovlpd(xmm5, mem(rdx)) // and store back to memory. vmovhpd(xmm4, mem(rcx, rdi, 1)) add(rsi, rcx) vmovhpd(xmm5, mem(rdx, rdi, 1)) add(rsi, rdx) vmulpd(xmm2, xmm6, xmm6) // scale by alpha, vfmaddpd(xmm6, xmm0, xmm3, xmm6) // scale by beta, and add the gemm result vmovlpd(xmm6, mem(r8)) // and store back to memory. vmovhpd(xmm6, mem(r8, rdi, 1)) add(rsi, r8) vmovlpd(mem(rcx), xmm0, xmm0) vmovlpd(mem(rdx), xmm1, xmm1) vmovlpd(mem(r8), xmm4, xmm4) vmovhpd(mem(rcx, rdi, 1), xmm0, xmm0) vmovhpd(mem(rdx, rdi, 1), xmm1, xmm1) vmovhpd(mem(r8, rdi, 1), xmm4, xmm4) vmulpd(xmm2, xmm7, xmm7) // scale by alpha, vmulpd(xmm2, xmm8, xmm8) // scale by alpha, vmulpd(xmm2, xmm9, xmm9) // scale by alpha, vfmaddpd(xmm7, xmm0, xmm3, xmm7) // scale by beta, and add the gemm result vfmaddpd(xmm8, xmm1, xmm3, xmm8) // scale by beta, and add the gemm result vfmaddpd(xmm9, xmm4, xmm3, xmm9) // scale by beta, and add the gemm result vmovlpd(xmm7, mem(rcx)) // and store back to memory. vmovlpd(xmm8, mem(rdx)) // and store back to memory. vmovlpd(xmm9, mem(r8)) // and store back to memory. vmovhpd(xmm7, mem(rcx, rdi, 1)) add(rsi, rcx) vmovhpd(xmm8, mem(rdx, rdi, 1)) add(rsi, rdx) vmovhpd(xmm9, mem(r8, rdi, 1)) add(rsi, r8) vmovlpd(mem(rcx), xmm0, xmm0) vmovlpd(mem(rdx), xmm1, xmm1) vmovlpd(mem(r8), xmm4, xmm4) vmovhpd(mem(rcx, rdi, 1), xmm0, xmm0) vmovhpd(mem(rdx, rdi, 1), xmm1, xmm1) vmovhpd(mem(r8, rdi, 1), xmm4, xmm4) vmulpd(xmm2, xmm10, xmm10) // scale by alpha, vmulpd(xmm2, xmm11, xmm11) // scale by alpha, vmulpd(xmm2, xmm12, xmm12) // scale by alpha, vfmaddpd(xmm10, xmm0, xmm3, xmm10) // scale by beta, and add the gemm result vfmaddpd(xmm11, xmm1, xmm3, xmm11) // scale by beta, and add the gemm result vfmaddpd(xmm12, xmm4, xmm3, xmm12) // scale by beta, and add the gemm result vmovlpd(xmm10, mem(rcx)) // and store back to memory. vmovlpd(xmm11, mem(rdx)) // and store back to memory. vmovlpd(xmm12, mem(r8)) // and store back to memory. vmovhpd(xmm10, mem(rcx, rdi, 1)) add(rsi, rcx) vmovhpd(xmm11, mem(rdx, rdi, 1)) add(rsi, rdx) vmovhpd(xmm12, mem(r8, rdi, 1)) add(rsi, r8) vmovlpd(mem(rcx), xmm0, xmm0) vmovlpd(mem(rdx), xmm1, xmm1) vmovlpd(mem(r8), xmm4, xmm4) vmovhpd(mem(rcx, rdi, 1), xmm0, xmm0) vmovhpd(mem(rdx, rdi, 1), xmm1, xmm1) vmovhpd(mem(r8, rdi, 1), xmm4, xmm4) vmulpd(xmm2, xmm13, xmm13) // scale by alpha, vmulpd(xmm2, xmm14, xmm14) // scale by alpha, vmulpd(xmm2, xmm15, xmm15) // scale by alpha, vfmaddpd(xmm13, xmm0, xmm3, xmm13) // scale by beta, and add the gemm result vfmaddpd(xmm14, xmm1, xmm3, xmm14) // scale by beta, and add the gemm result vfmaddpd(xmm15, xmm4, xmm3, xmm15) // scale by beta, and add the gemm result vmovlpd(xmm13, mem(rcx)) // and store back to memory. vmovlpd(xmm14, mem(rdx)) // and store back to memory. vmovlpd(xmm15, mem(r8)) // and store back to memory. vmovhpd(xmm13, mem(rcx, rdi, 1)) vmovhpd(xmm14, mem(rdx, rdi, 1)) vmovhpd(xmm15, mem(r8, rdi, 1)) end_asm( : // output operands (none) : // input operands [k_iter] "r" (k_iter), // 0 [k_left] "r" (k_left), // 1 [a] "r" (a), // 2 [b] "r" (b), // 3 [alpha] "r" (alpha), // 4 [beta] "r" (beta), // 5 [c] "r" (c), // 6 [rs_c] "m" (rs_c), // 7 [cs_c] "m" (cs_c)/*, // 8 [b_next] "m" (b_next), // 9 [a_next] "m" (a_next)*/ // 10 : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } //The parameter "i" is the iteration number, i.e. the B values to read #define MADD_TO_YMM(i) \ vfmaddps(ymm15, ymm0, ymm2, ymm15)\ vperm2f128(imm(0x3), ymm2, ymm2, ymm4)\ vfmaddps(ymm13, ymm0, ymm3, ymm13)\ vperm2f128(imm(0x3), ymm3, ymm3, ymm5)\ vfmaddps(ymm14, ymm1, ymm2, ymm14)\ vmovshdup(mem(rbx, i*32), ymm2)\ vfmaddps(ymm12, ymm1, ymm3, ymm12)\ vpermilps(imm(0x4e), ymm2, ymm3)\ vfmaddps(ymm11, ymm0, ymm4, ymm11)\ vfmaddps(ymm9, ymm0, ymm5, ymm9)\ vpermilps(imm(0xb1), ymm0, ymm0)\ vfmaddps(ymm10, ymm1, ymm4, ymm10)\ vperm2f128(imm(0x3), ymm2, ymm2, ymm4)\ vfmaddps(ymm8, ymm1, ymm5, ymm8)\ vperm2f128(imm(0x3), ymm3, ymm3, ymm5)\ void bli_cgemm_bulldozer_asm_8x4_fma4 ( dim_t k0, scomplex* restrict alpha, scomplex* restrict a, scomplex* restrict b, scomplex* restrict beta, scomplex* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; begin_asm() mov(var(a), rax) // load address of a. mov(var(b), rbx) // load address of b. mov(var(b_next), r15) // load address of b_next. //mov(var(a_next), r14) // load address of a_next. sub(imm(4*64), r15) vmovaps(mem(rax, 0*32), ymm0) // initialize loop by pre-loading vmovsldup(mem(rbx, 0*32), ymm2) vpermilps(imm(0x4e), ymm2, ymm3) mov(var(c), rcx) // load address of c mov(var(cs_c), rdi) // load cs_c lea(mem(, rdi, 8), rdi) // cs_c *= sizeof(scomplex) lea(mem(rcx, rdi, 2), r10) // load address of c + 2*cs_c; prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*cs_c prefetch(0, mem(rcx, rdi, 1, 3*8)) // prefetch c + 1*cs_c prefetch(0, mem(r10, 3*8)) // prefetch c + 2*cs_c prefetch(0, mem(r10, rdi, 1, 3*8)) // prefetch c + 3*cs_c vxorps(ymm8, ymm8, ymm8) vxorps(ymm9, ymm9, ymm9) vxorps(ymm10, ymm10, ymm10) vxorps(ymm11, ymm11, ymm11) vxorps(ymm12, ymm12, ymm12) vxorps(ymm13, ymm13, ymm13) vxorps(ymm14, ymm14, ymm14) vxorps(ymm15, ymm15, ymm15) mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.CCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.CLOOPKITER) // MAIN LOOP add(imm(4*4*8), r15) // b_next += 4*4 (unroll x nr) // iteration 0 prefetch(0, mem(rax, 8*32)) vmovaps(mem(rax, 1*32), ymm1) MADD_TO_YMM(0) vpermilps(imm(0xb1), ymm1, ymm1) vmulps(ymm0, ymm2, ymm6) vaddsubps(ymm6, ymm15, ymm15) vmulps(ymm0, ymm3, ymm7) vaddsubps(ymm7, ymm13, ymm13) vmulps(ymm1, ymm2, ymm6) vmovsldup(mem(rbx, 1*32), ymm2) vmulps(ymm1, ymm3, ymm7) vpermilps(imm(0x4e), ymm2, ymm3) vaddsubps(ymm6, ymm14, ymm14) vaddsubps(ymm7, ymm12, ymm12) vmulps(ymm0, ymm4, ymm6) vmulps(ymm0, ymm5, ymm7) vmovaps(mem(rax, 2*32), ymm0) vaddsubps(ymm6, ymm11, ymm11) vaddsubps(ymm7, ymm9, ymm9) vmulps(ymm1, ymm4, ymm6) vmulps(ymm1, ymm5, ymm7) vaddsubps(ymm6, ymm10, ymm10) vaddsubps(ymm7, ymm8, ymm8) // iteration 1 prefetch(0, mem(rax, 10*32)) vmovaps(mem(rax, 3*32), ymm1) MADD_TO_YMM(1) vpermilps(imm(0xb1), ymm1, ymm1) vmulps(ymm0, ymm2, ymm6) vmulps(ymm0, ymm3, ymm7) vaddsubps(ymm6, ymm15, ymm15) vaddsubps(ymm7, ymm13, ymm13) vmulps(ymm1, ymm2, ymm6) vmovsldup(mem(rbx, 2*32), ymm2) vmulps(ymm1, ymm3, ymm7) vpermilps(imm(0x4e), ymm2, ymm3) vaddsubps(ymm6, ymm14, ymm14) vaddsubps(ymm7, ymm12, ymm12) vmulps(ymm0, ymm4, ymm6) vmulps(ymm0, ymm5, ymm7) vmovaps(mem(rax, 4*32), ymm0) vaddsubps(ymm6, ymm11, ymm11) vaddsubps(ymm7, ymm9, ymm9) vmulps(ymm1, ymm4, ymm6) vmulps(ymm1, ymm5, ymm7) vaddsubps(ymm6, ymm10, ymm10) vaddsubps(ymm7, ymm8, ymm8) // iteration 2 prefetch(0, mem(rax, 12*32)) vmovaps(mem(rax, 5*32), ymm1) MADD_TO_YMM(2) prefetch(0, mem(r15, 2*32)) // prefetch b_next[2*4] vpermilps(imm(0xb1), ymm1, ymm1) vmulps(ymm0, ymm2, ymm6) vmulps(ymm0, ymm3, ymm7) vaddsubps(ymm6, ymm15, ymm15) vaddsubps(ymm7, ymm13, ymm13) vmulps(ymm1, ymm2, ymm6) vmovsldup(mem(rbx, 3*32), ymm2) vmulps(ymm1, ymm3, ymm7) vpermilps(imm(0x4e), ymm2, ymm3) vaddsubps(ymm6, ymm14, ymm14) vaddsubps(ymm7, ymm12, ymm12) vmulps(ymm0, ymm4, ymm6) vmulps(ymm0, ymm5, ymm7) vmovaps(mem(rax, 6*32), ymm0) vaddsubps(ymm6, ymm11, ymm11) vaddsubps(ymm7, ymm9, ymm9) vmulps(ymm1, ymm4, ymm6) vmulps(ymm1, ymm5, ymm7) vaddsubps(ymm6, ymm10, ymm10) vaddsubps(ymm7, ymm8, ymm8) // iteration 3 prefetch(0, mem(rax, 14*32)) vmovaps(mem(rax, 7*32), ymm1) MADD_TO_YMM(3) vpermilps(imm(0xb1), ymm1, ymm1) vmulps(ymm0, ymm2, ymm6) vmulps(ymm0, ymm3, ymm7) vaddsubps(ymm6, ymm15, ymm15) vaddsubps(ymm7, ymm13, ymm13) vmulps(ymm1, ymm2, ymm6) vmovsldup(mem(rbx, 4*32), ymm2) vmulps(ymm1, ymm3, ymm7) vpermilps(imm(0x4e), ymm2, ymm3) vaddsubps(ymm6, ymm14, ymm14) vaddsubps(ymm7, ymm12, ymm12) vmulps(ymm0, ymm4, ymm6) vmulps(ymm0, ymm5, ymm7) vmovaps(mem(rax, 8*32), ymm0) vaddsubps(ymm6, ymm11, ymm11) vaddsubps(ymm7, ymm9, ymm9) vmulps(ymm1, ymm4, ymm6) vmulps(ymm1, ymm5, ymm7) vaddsubps(ymm6, ymm10, ymm10) vaddsubps(ymm7, ymm8, ymm8) add(imm(8*4*8), rax) // a += 8*4 (unroll x mr) add(imm(4*4*8), rbx) // b += 4*4 (unroll x nr) dec(rsi) // i -= 1; jne(.CLOOPKITER) // iterate again if i != 0. label(.CCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.CPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.CLOOPKLEFT) // EDGE LOOP // iteration 0 prefetch(0, mem(rax, 8*32)) vmovaps(mem(rax, 1*32), ymm1) MADD_TO_YMM(0) vpermilps(imm(0xb1), ymm1, ymm1) vmulps(ymm0, ymm2, ymm6) vmulps(ymm0, ymm3, ymm7) vaddsubps(ymm6, ymm15, ymm15) vaddsubps(ymm7, ymm13, ymm13) vmulps(ymm1, ymm2, ymm6) vmovsldup(mem(rbx, 1*32), ymm2) vmulps(ymm1, ymm3, ymm7) vpermilps(imm(0x4e), ymm2, ymm3) vaddsubps(ymm6, ymm14, ymm14) vaddsubps(ymm7, ymm12, ymm12) vmulps(ymm0, ymm4, ymm6) vmulps(ymm0, ymm5, ymm7) vmovaps(mem(rax, 2*32), ymm0) vaddsubps(ymm6, ymm11, ymm11) vaddsubps(ymm7, ymm9, ymm9) vmulps(ymm1, ymm4, ymm6) vmulps(ymm1, ymm5, ymm7) vaddsubps(ymm6, ymm10, ymm10) vaddsubps(ymm7, ymm8, ymm8) add(imm(8*1*8), rax) // a += 8 (1 x mr) add(imm(4*1*8), rbx) // b += 4 (1 x nr) dec(rsi) // i -= 1; jne(.CLOOPKLEFT) // iterate again if i != 0. label(.CPOSTACCUM) // ymm15: ymm13: ymm11: ymm9: // ( ab00 ( ab01 ( ab02 ( ab03 // ab10 ab11 ab12 ab13 // ab21 ab20 ab23 ab22 // ab31 ab30 ab33 ab32 // ab42 ab43 ab40 ab41 // ab52 ab53 ab50 ab51 // ab63 ab62 ab61 ab60 // ab73 ) ab72 ) ab71 ) ab70 ) // ymm14: ymm12: ymm10: ymm8: // ( ab80 ( ab81 ( ab82 ( ab83 // ab90 ab91 ab92 ab93 // aba1 aba0 aba3 aba2 // abb1 abb0 abb3 abb2 // abc2 abc3 abc0 abc1 // abd2 abd3 abd0 abd1 // abe3 abe2 abe1 abe0 // abf3 abf2 abf1 abf0 ) GROUP_YMM_BY_4 // ymm15: ymm13: ymm11: ymm9: // ( ab00 ( ab01 ( ab02 ( ab03 // ab10 ab11 ab12 ab13 // ab20 ab21 ab22 ab23 // ab30 ab31 ab32 ab33 // ab42 ab43 ab40 ab41 // ab52 ab53 ab50 ab51 // ab62 ab63 ab60 ab61 // ab72 ) ab73 ) ab70 ) ab71 ) // ymm14: ymm12: ymm10: ymm8: // ( ab80 ( ab81 ( ab82 ( ab83 // ab90 ab91 ab92 ab93 // aba0 aba1 aba2 aba3 // abb0 abb1 abb2 abb3 // abc2 abc3 abc0 abc1 // abd2 abd3 abd0 abd1 // abe2 abe3 abe0 abe1 // abf2 ) abf3 ) abf0 ) abf1 ) // ymm15: ymm13: ymm11: ymm9: // ( ab00 ( ab01 ( ab02 ( ab03 // ab10 ab11 ab12 ab13 // ab20 ab21 ab22 ab23 // ab30 ab31 ab32 ab33 // ab40 ab41 ab42 ab43 // ab50 ab51 ab52 ab53 // ab60 ab61 ab62 ab63 // ab70 ) ab71 ) ab72 ) ab73 ) // ymm14: ymm12: ymm10: ymm8: // ( ab80 ( ab81 ( ab82 ( ab83 // ab90 ab91 ab92 ab93 // aba0 aba1 aba2 aba3 // abb0 abb1 abb2 abb3 // abc0 abc1 abc2 abc3 // abd0 abd1 abd2 abd3 // abe0 abe1 abe2 abe3 // abf0 ) abf1 ) abf2 ) abf3 ) // scale by alpha mov(var(alpha), rax) // load address of alpha vbroadcastss(mem(rax), ymm7) // load alpha_r and duplicate vbroadcastss(mem(rax, 4), ymm6) // load alpha_i and duplicate vpermilps(imm(0xb1), ymm15, ymm3) vmulps(ymm7, ymm15, ymm15) vmulps(ymm6, ymm3, ymm3) vaddsubps(ymm3, ymm15, ymm15) vpermilps(imm(0xb1), ymm14, ymm2) vmulps(ymm7, ymm14, ymm14) vmulps(ymm6, ymm2, ymm2) vaddsubps(ymm2, ymm14, ymm14) vpermilps(imm(0xb1), ymm13, ymm1) vmulps(ymm7, ymm13, ymm13) vmulps(ymm6, ymm1, ymm1) vaddsubps(ymm1, ymm13, ymm13) vpermilps(imm(0xb1), ymm12, ymm0) vmulps(ymm7, ymm12, ymm12) vmulps(ymm6, ymm0, ymm0) vaddsubps(ymm0, ymm12, ymm12) vpermilps(imm(0xb1), ymm11, ymm3) vmulps(ymm7, ymm11, ymm11) vmulps(ymm6, ymm3, ymm3) vaddsubps(ymm3, ymm11, ymm11) vpermilps(imm(0xb1), ymm10, ymm2) vmulps(ymm7, ymm10, ymm10) vmulps(ymm6, ymm2, ymm2) vaddsubps(ymm2, ymm10, ymm10) vpermilps(imm(0xb1), ymm9, ymm1) vmulps(ymm7, ymm9, ymm9) vmulps(ymm6, ymm1, ymm1) vaddsubps(ymm1, ymm9, ymm9) vpermilps(imm(0xb1), ymm8, ymm0) vmulps(ymm7, ymm8, ymm8) vmulps(ymm6, ymm0, ymm0) vaddsubps(ymm0, ymm8, ymm8) mov(var(beta), rbx) // load address of beta vbroadcastss(mem(rbx), ymm7) // load beta_r and duplicate vbroadcastss(mem(rbx, 4), ymm6) // load beta_i and duplicate mov(var(rs_c), rsi) // load rs_c lea(mem(, rsi, 8), rsi) // rsi = rs_c * sizeof(scomplex) lea(mem(rcx, rsi, 4), rdx) // load address of c + 4*rs_c; lea(mem(, rsi, 2), r12) // r12 = 2*rs_c; lea(mem(r12, rsi, 1), r13) // r13 = 3*rs_c; // determine if // c % 32 == 0, AND // 8*cs_c % 32 == 0, AND // rs_c == 1 // ie: aligned, ldim aligned, and // column-stored cmp(imm(8), rsi) // set ZF if (8*rs_c) == 8. sete(bl) // bl = ( ZF == 1 ? 1 : 0 ); test(imm(31), rcx) // set ZF if c & 32 is zero. setz(bh) // bh = ( ZF == 0 ? 1 : 0 ); test(imm(31), rdi) // set ZF if (8*cs_c) & 32 is zero. setz(al) // al = ( ZF == 0 ? 1 : 0 ); // and(bl,bh) followed by // and(bh,al) will reveal result // now avoid loading C if beta == 0 vxorps(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomiss(xmm0, xmm7) // set ZF if beta_r == 0. sete(r8b) // r8b = ( ZF == 1 ? 1 : 0 ); vucomiss(xmm0, xmm6) // set ZF if beta_i == 0. sete(r9b) // r9b = ( ZF == 1 ? 1 : 0 ); and(r8b, r9b) // set ZF if r8b & r9b == 1. jne(.CBETAZERO) // if ZF = 0, jump to beta == 0 case // check if aligned/column-stored and(bl, bh) // set ZF if bl & bh == 1. and(bh, al) // set ZF if bh & al == 1. jne(.CCOLSTORED) // jump to column storage case label(.CGENSTORED) // update c00:c70 vmovlpd(mem(rcx), xmm0, xmm0) // load (c00,10) into xmm0[0:1] vmovhpd(mem(rcx, rsi, 1), xmm0, xmm0) // load (c20,30) into xmm0[2:3] vmovlpd(mem(rcx, r12, 1), xmm2, xmm2) // load (c40,50) into xmm2[0:1] vmovhpd(mem(rcx, r13, 1), xmm2, xmm2) // load (c60,70) into xmm2[2:3] vinsertf128(imm(1), xmm2, ymm0, ymm0) // ymm0 := (ymm0[0:3],xmm2) vpermilps(imm(0xb1), ymm0, ymm2) // scale ymm0 by beta vmulps(ymm7, ymm0, ymm0) vmulps(ymm6, ymm2, ymm2) vaddsubps(ymm2, ymm0, ymm0) vaddps(ymm15, ymm0, ymm0) // add the gemm result to ymm0 vextractf128(imm(1), ymm0, xmm2) // xmm2 := ymm0[4:7] vmovlpd(xmm0, mem(rcx)) // store (c00,c10) vmovhpd(xmm0, mem(rcx, rsi, 1)) // store (c20,c30) vmovlpd(xmm2, mem(rcx, r12, 1)) // store (c40,c50) vmovhpd(xmm2, mem(rcx, r13, 1)) // store (c60,c70) add(rdi, rcx) // c += cs_c; // update c80:cf0 vmovlpd(mem(rdx), xmm0, xmm0) // load (c80,90) into xmm0[0:1] vmovhpd(mem(rdx, rsi, 1), xmm0, xmm0) // load (ca0,b0) into xmm0[2:3] vmovlpd(mem(rdx, r12, 1), xmm2, xmm2) // load (cc0,d0) into xmm2[0:1] vmovhpd(mem(rdx, r13, 1), xmm2, xmm2) // load (ce0,f0) into xmm2[2:3] vinsertf128(imm(1), xmm2, ymm0, ymm0) // ymm0 := (ymm0[0:3],xmm2) vpermilps(imm(0xb1), ymm0, ymm2) // scale ymm0 by beta vmulps(ymm7, ymm0, ymm0) vmulps(ymm6, ymm2, ymm2) vaddsubps(ymm2, ymm0, ymm0) vaddps(ymm14, ymm0, ymm0) // add the gemm result to ymm0 vextractf128(imm(1), ymm0, xmm2) // xmm2 := ymm0[4:7] vmovlpd(xmm0, mem(rdx)) // store (c80,c90) vmovhpd(xmm0, mem(rdx, rsi, 1)) // store (ca0,cb0) vmovlpd(xmm2, mem(rdx, r12, 1)) // store (cc0,cd0) vmovhpd(xmm2, mem(rdx, r13, 1)) // store (ce0,cf0) add(rdi, rdx) // c += cs_c; // update c01:c71 vmovlpd(mem(rcx), xmm0, xmm0) // load (c01,11) into xmm0[0:1] vmovhpd(mem(rcx, rsi, 1), xmm0, xmm0) // load (c21,31) into xmm0[2:3] vmovlpd(mem(rcx, r12, 1), xmm2, xmm2) // load (c41,51) into xmm2[0:1] vmovhpd(mem(rcx, r13, 1), xmm2, xmm2) // load (c61,71) into xmm2[2:3] vinsertf128(imm(1), xmm2, ymm0, ymm0) // ymm0 := (ymm0[0:3],xmm2) vpermilps(imm(0xb1), ymm0, ymm2) // scale ymm0 by beta vmulps(ymm7, ymm0, ymm0) vmulps(ymm6, ymm2, ymm2) vaddsubps(ymm2, ymm0, ymm0) vaddps(ymm13, ymm0, ymm0) // add the gemm result to ymm0 vextractf128(imm(1), ymm0, xmm2) // xmm2 := ymm0[4:7] vmovlpd(xmm0, mem(rcx)) // store (c01,c11) vmovhpd(xmm0, mem(rcx, rsi, 1)) // store (c21,c31) vmovlpd(xmm2, mem(rcx, r12, 1)) // store (c41,c51) vmovhpd(xmm2, mem(rcx, r13, 1)) // store (c61,c71) add(rdi, rcx) // c += cs_c; // update c81:cf1 vmovlpd(mem(rdx), xmm0, xmm0) // load (c81,91) into xmm0[0:1] vmovhpd(mem(rdx, rsi, 1), xmm0, xmm0) // load (ca1,b1) into xmm0[2:3] vmovlpd(mem(rdx, r12, 1), xmm2, xmm2) // load (cc1,d1) into xmm2[0:1] vmovhpd(mem(rdx, r13, 1), xmm2, xmm2) // load (ce1,f1) into xmm2[2:3] vinsertf128(imm(1), xmm2, ymm0, ymm0) // ymm0 := (ymm0[0:3],xmm2) vpermilps(imm(0xb1), ymm0, ymm2) // scale ymm0 by beta vmulps(ymm7, ymm0, ymm0) vmulps(ymm6, ymm2, ymm2) vaddsubps(ymm2, ymm0, ymm0) vaddps(ymm12, ymm0, ymm0) // add the gemm result to ymm0 vextractf128(imm(1), ymm0, xmm2) // xmm2 := ymm0[4:7] vmovlpd(xmm0, mem(rdx)) // store (c81,c91) vmovhpd(xmm0, mem(rdx, rsi, 1)) // store (ca1,cb1) vmovlpd(xmm2, mem(rdx, r12, 1)) // store (cc1,cd1) vmovhpd(xmm2, mem(rdx, r13, 1)) // store (ce1,cf1) add(rdi, rdx) // c += cs_c; // update c02:c72 vmovlpd(mem(rcx), xmm0, xmm0) // load (c02,12) into xmm0[0:1] vmovhpd(mem(rcx, rsi, 1), xmm0, xmm0) // load (c22,32) into xmm0[2:3] vmovlpd(mem(rcx, r12, 1), xmm2, xmm2) // load (c42,52) into xmm2[0:1] vmovhpd(mem(rcx, r13, 1), xmm2, xmm2) // load (c62,72) into xmm2[2:3] vinsertf128(imm(1), xmm2, ymm0, ymm0) // ymm0 := (ymm0[0:3],xmm2) vpermilps(imm(0xb1), ymm0, ymm2) // scale ymm0 by beta vmulps(ymm7, ymm0, ymm0) vmulps(ymm6, ymm2, ymm2) vaddsubps(ymm2, ymm0, ymm0) vaddps(ymm11, ymm0, ymm0) // add the gemm result to ymm0 vextractf128(imm(1), ymm0, xmm2) // xmm2 := ymm0[4:7] vmovlpd(xmm0, mem(rcx)) // store (c02,c12) vmovhpd(xmm0, mem(rcx, rsi, 1)) // store (c22,c32) vmovlpd(xmm2, mem(rcx, r12, 1)) // store (c42,c52) vmovhpd(xmm2, mem(rcx, r13, 1)) // store (c62,c72) add(rdi, rcx) // c += cs_c; // update c82:cf2 vmovlpd(mem(rdx), xmm0, xmm0) // load (c82,92) into xmm0[0:1] vmovhpd(mem(rdx, rsi, 1), xmm0, xmm0) // load (ca2,b2) into xmm0[2:3] vmovlpd(mem(rdx, r12, 1), xmm2, xmm2) // load (cc2,d2) into xmm2[0:1] vmovhpd(mem(rdx, r13, 1), xmm2, xmm2) // load (ce2,f2) into xmm2[2:3] vinsertf128(imm(1), xmm2, ymm0, ymm0) // ymm0 := (ymm0[0:3],xmm2) vpermilps(imm(0xb1), ymm0, ymm2) // scale ymm0 by beta vmulps(ymm7, ymm0, ymm0) vmulps(ymm6, ymm2, ymm2) vaddsubps(ymm2, ymm0, ymm0) vaddps(ymm10, ymm0, ymm0) // add the gemm result to ymm0 vextractf128(imm(1), ymm0, xmm2) // xmm2 := ymm0[4:7] vmovlpd(xmm0, mem(rdx)) // store (c82,c92) vmovhpd(xmm0, mem(rdx, rsi, 1)) // store (ca2,cb2) vmovlpd(xmm2, mem(rdx, r12, 1)) // store (cc2,cd2) vmovhpd(xmm2, mem(rdx, r13, 1)) // store (ce2,cf2) add(rdi, rdx) // c += cs_c; // update c03:c73 vmovlpd(mem(rcx), xmm0, xmm0) // load (c03,13) into xmm0[0:1] vmovhpd(mem(rcx, rsi, 1), xmm0, xmm0) // load (c23,33) into xmm0[2:3] vmovlpd(mem(rcx, r12, 1), xmm2, xmm2) // load (c43,53) into xmm2[0:1] vmovhpd(mem(rcx, r13, 1), xmm2, xmm2) // load (c63,73) into xmm2[2:3] vinsertf128(imm(1), xmm2, ymm0, ymm0) // ymm0 := (ymm0[0:3],xmm2) vpermilps(imm(0xb1), ymm0, ymm2) // scale ymm0 by beta vmulps(ymm7, ymm0, ymm0) vmulps(ymm6, ymm2, ymm2) vaddsubps(ymm2, ymm0, ymm0) vaddps(ymm9, ymm0, ymm0) // add the gemm result to ymm0 vextractf128(imm(1), ymm0, xmm2) // xmm2 := ymm0[4:7] vmovlpd(xmm0, mem(rcx)) // store (c03,c13) vmovhpd(xmm0, mem(rcx, rsi, 1)) // store (c23,c33) vmovlpd(xmm2, mem(rcx, r12, 1)) // store (c43,c53) vmovhpd(xmm2, mem(rcx, r13, 1)) // store (c63,c73) add(rdi, rcx) // c += cs_c; // update c83:cf3 vmovlpd(mem(rdx), xmm0, xmm0) // load (c83,93) into xmm0[0:1] vmovhpd(mem(rdx, rsi, 1), xmm0, xmm0) // load (ca3,b3) into xmm0[2:3] vmovlpd(mem(rdx, r12, 1), xmm2, xmm2) // load (cc3,d3) into xmm2[0:1] vmovhpd(mem(rdx, r13, 1), xmm2, xmm2) // load (ce3,f3) into xmm2[2:3] vinsertf128(imm(1), xmm2, ymm0, ymm0) // ymm0 := (ymm0[0:3],xmm2) vpermilps(imm(0xb1), ymm0, ymm2) // scale ymm0 by beta vmulps(ymm7, ymm0, ymm0) vmulps(ymm6, ymm2, ymm2) vaddsubps(ymm2, ymm0, ymm0) vaddps(ymm8, ymm0, ymm0) // add the gemm result to ymm0 vextractf128(imm(1), ymm0, xmm2) // xmm2 := ymm0[4:7] vmovlpd(xmm0, mem(rdx)) // store (c83,c93) vmovhpd(xmm0, mem(rdx, rsi, 1)) // store (ca3,cb3) vmovlpd(xmm2, mem(rdx, r12, 1)) // store (cc3,cd3) vmovhpd(xmm2, mem(rdx, r13, 1)) // store (ce3,cf3) add(rdi, rdx) // c += cs_c; jmp(.CDONE) // jump to end. label(.CCOLSTORED) // update c00:c70 vmovaps(mem(rcx), ymm0) // load c00:c70 into ymm0 vpermilps(imm(0xb1), ymm0, ymm2) // scale ymm0 by beta vmulps(ymm7, ymm0, ymm0) vmulps(ymm6, ymm2, ymm2) vaddsubps(ymm2, ymm0, ymm0) vaddps(ymm15, ymm0, ymm0) // add the gemm result to ymm0 vmovaps(ymm0, mem(rcx)) // store c00:c70 add(rdi, rcx) // c += cs_c; // update c80:cf0 vmovaps(mem(rdx), ymm0) // load c80:f0 into ymm0 vpermilps(imm(0xb1), ymm0, ymm2) // scale ymm0 by beta vmulps(ymm7, ymm0, ymm0) vmulps(ymm6, ymm2, ymm2) vaddsubps(ymm2, ymm0, ymm0) vaddps(ymm14, ymm0, ymm0) // add the gemm result to ymm0 vmovaps(ymm0, mem(rdx)) // store c80:cf0 add(rdi, rdx) // c += cs_c; // update c00:c70 vmovaps(mem(rcx), ymm0) // load c01:c71 into ymm0 vpermilps(imm(0xb1), ymm0, ymm2) // scale ymm0 by beta vmulps(ymm7, ymm0, ymm0) vmulps(ymm6, ymm2, ymm2) vaddsubps(ymm2, ymm0, ymm0) vaddps(ymm13, ymm0, ymm0) // add the gemm result to ymm0 vmovaps(ymm0, mem(rcx)) // store c01:c71 add(rdi, rcx) // c += cs_c; // update c81:cf1 vmovaps(mem(rdx), ymm0) // load c81:f1 into ymm0 vpermilps(imm(0xb1), ymm0, ymm2) // scale ymm0 by beta vmulps(ymm7, ymm0, ymm0) vmulps(ymm6, ymm2, ymm2) vaddsubps(ymm2, ymm0, ymm0) vaddps(ymm12, ymm0, ymm0) // add the gemm result to ymm0 vmovaps(ymm0, mem(rdx)) // store c81:cf1 add(rdi, rdx) // c += cs_c; // update c02:c72 vmovaps(mem(rcx), ymm0) // load c02:c72 into ymm0 vpermilps(imm(0xb1), ymm0, ymm2) // scale ymm0 by beta vmulps(ymm7, ymm0, ymm0) vmulps(ymm6, ymm2, ymm2) vaddsubps(ymm2, ymm0, ymm0) vaddps(ymm11, ymm0, ymm0) // add the gemm result to ymm0 vmovaps(ymm0, mem(rcx)) // store c02:c72 add(rdi, rcx) // c += cs_c; // update c82:cf2 vmovaps(mem(rdx), ymm0) // load c82:f2 into ymm0 vpermilps(imm(0xb1), ymm0, ymm2) // scale ymm0 by beta vmulps(ymm7, ymm0, ymm0) vmulps(ymm6, ymm2, ymm2) vaddsubps(ymm2, ymm0, ymm0) vaddps(ymm10, ymm0, ymm0) // add the gemm result to ymm0 vmovaps(ymm0, mem(rdx)) // store c82:cf2 add(rdi, rdx) // c += cs_c; // update c03:c73 vmovaps(mem(rcx), ymm0) // load c03:c73 into ymm0 vpermilps(imm(0xb1), ymm0, ymm2) // scale ymm0 by beta vmulps(ymm7, ymm0, ymm0) vmulps(ymm6, ymm2, ymm2) vaddsubps(ymm2, ymm0, ymm0) vaddps(ymm9, ymm0, ymm0) // add the gemm result to ymm0 vmovaps(ymm0, mem(rcx)) // store c03:c73 add(rdi, rcx) // c += cs_c; // update c83:cf3 vmovaps(mem(rdx), ymm0) // load c83:f3 into ymm0 vpermilps(imm(0xb1), ymm0, ymm2) // scale ymm0 by beta vmulps(ymm7, ymm0, ymm0) vmulps(ymm6, ymm2, ymm2) vaddsubps(ymm2, ymm0, ymm0) vaddps(ymm8, ymm0, ymm0) // add the gemm result to ymm0 vmovaps(ymm0, mem(rdx)) // store c83:cf3 add(rdi, rdx) // c += cs_c; jmp(.CDONE) // jump to end. label(.CBETAZERO) // check if aligned/column-stored // check if aligned/column-stored and(bl, bh) // set ZF if bl & bh == 1. and(bh, al) // set ZF if bh & al == 1. jne(.CCOLSTORBZ) // jump to column storage case label(.CGENSTORBZ) // update c00:c70 vextractf128(imm(1), ymm15, xmm2) // xmm2 := ymm0[4:7] vmovlpd(xmm15, mem(rcx)) // store (c00,c10) vmovhpd(xmm15, mem(rcx, rsi, 1)) // store (c20,c30) vmovlpd(xmm2, mem(rcx, r12, 1)) // store (c40,c50) vmovhpd(xmm2, mem(rcx, r13, 1)) // store (c60,c70) add(rdi, rcx) // c += cs_c; // update c80:cf0 vextractf128(imm(1), ymm14, xmm2) // xmm2 := ymm0[4:7] vmovlpd(xmm14, mem(rdx)) // store (c80,c90) vmovhpd(xmm14, mem(rdx, rsi, 1)) // store (ca0,cb0) vmovlpd(xmm2, mem(rdx, r12, 1)) // store (cc0,cd0) vmovhpd(xmm2, mem(rdx, r13, 1)) // store (ce0,cf0) add(rdi, rdx) // c += cs_c; // update c01:c71 vextractf128(imm(1), ymm13, xmm2) // xmm2 := ymm0[4:7] vmovlpd(xmm13, mem(rcx)) // store (c01,c11) vmovhpd(xmm13, mem(rcx, rsi, 1)) // store (c21,c31) vmovlpd(xmm2, mem(rcx, r12, 1)) // store (c41,c51) vmovhpd(xmm2, mem(rcx, r13, 1)) // store (c61,c71) add(rdi, rcx) // c += cs_c; // update c81:cf1 vextractf128(imm(1), ymm12, xmm2) // xmm2 := ymm0[4:7] vmovlpd(xmm12, mem(rdx)) // store (c81,c91) vmovhpd(xmm12, mem(rdx, rsi, 1)) // store (ca1,cb1) vmovlpd(xmm2, mem(rdx, r12, 1)) // store (cc1,cd1) vmovhpd(xmm2, mem(rdx, r13, 1)) // store (ce1,cf1) add(rdi, rdx) // c += cs_c; // update c02:c72 vextractf128(imm(1), ymm11, xmm2) // xmm2 := ymm0[4:7] vmovlpd(xmm11, mem(rcx)) // store (c02,c12) vmovhpd(xmm11, mem(rcx, rsi, 1)) // store (c22,c32) vmovlpd(xmm2, mem(rcx, r12, 1)) // store (c42,c52) vmovhpd(xmm2, mem(rcx, r13, 1)) // store (c62,c72) add(rdi, rcx) // c += cs_c; // update c82:cf2 vextractf128(imm(1), ymm10, xmm2) // xmm2 := ymm0[4:7] vmovlpd(xmm10, mem(rdx)) // store (c82,c92) vmovhpd(xmm10, mem(rdx, rsi, 1)) // store (ca2,cb2) vmovlpd(xmm2, mem(rdx, r12, 1)) // store (cc2,cd2) vmovhpd(xmm2, mem(rdx, r13, 1)) // store (ce2,cf2) add(rdi, rdx) // c += cs_c; // update c03:c73 vextractf128(imm(1), ymm9, xmm2) // xmm2 := ymm0[4:7] vmovlpd(xmm9, mem(rcx)) // store (c03,c13) vmovhpd(xmm9, mem(rcx, rsi, 1)) // store (c23,c33) vmovlpd(xmm2, mem(rcx, r12, 1)) // store (c43,c53) vmovhpd(xmm2, mem(rcx, r13, 1)) // store (c63,c73) add(rdi, rcx) // c += cs_c; // update c83:cf3 vextractf128(imm(1), ymm8, xmm2) // xmm2 := ymm0[4:7] vmovlpd(xmm8, mem(rdx)) // store (c83,c93) vmovhpd(xmm8, mem(rdx, rsi, 1)) // store (ca3,cb3) vmovlpd(xmm2, mem(rdx, r12, 1)) // store (cc3,cd3) vmovhpd(xmm2, mem(rdx, r13, 1)) // store (ce3,cf3) add(rdi, rdx) // c += cs_c; jmp(.CDONE) // jump to end. label(.CCOLSTORBZ) vmovaps(ymm15, mem(rcx)) // store c00:c70 add(rdi, rcx) // c += cs_c; vmovaps(ymm14, mem(rdx)) // store c80:cf0 add(rdi, rdx) // c += cs_c; vmovaps(ymm13, mem(rcx)) // store c01:c71 add(rdi, rcx) // c += cs_c; vmovaps(ymm12, mem(rdx)) // store c81:cf1 add(rdi, rdx) // c += cs_c; vmovaps(ymm11, mem(rcx)) // store c02:c72 add(rdi, rcx) // c += cs_c; vmovaps(ymm10, mem(rdx)) // store c82:cf2 add(rdi, rdx) // c += cs_c; vmovaps(ymm9, mem(rcx)) // store c03:c73 add(rdi, rcx) // c += cs_c; vmovaps(ymm8, mem(rdx)) // store c83:cf3 add(rdi, rdx) // c += cs_c; label(.CDONE) end_asm( : // output operands (none) : // input operands [k_iter] "m" (k_iter), // 0 [k_left] "m" (k_left), // 1 [a] "m" (a), // 2 [b] "m" (b), // 3 [alpha] "m" (alpha), // 4 [beta] "m" (beta), // 5 [c] "m" (c), // 6 [rs_c] "m" (rs_c), // 7 [cs_c] "m" (cs_c), // 8 [b_next] "m" (b_next)/*, // 9 [a_next] "m" (a_next)*/ // 10 : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15", "memory" ) } #define MADDSUBPD_TO_YMM \ vfmaddpd(ymm13, ymm0, ymm4, ymm13)\ vfmaddpd(ymm9, ymm0, ymm5, ymm9)\ vpermilpd(imm(0x5), ymm0, ymm0)\ \ vfmaddpd(ymm12, ymm1, ymm4, ymm12)\ vperm2f128(imm(0x3), ymm2, ymm2, ymm4)\ vfmaddpd(ymm8, ymm1, ymm5, ymm8)\ vperm2f128(imm(0x3), ymm3, ymm3, ymm5)\ \ vpermilpd(imm(0x5), ymm1, ymm1)\ vmulpd(ymm0, ymm2, ymm6)\ vmulpd(ymm0, ymm3, ymm7)\ vaddsubpd(ymm6, ymm15, ymm15)\ vaddsubpd(ymm7, ymm11, ymm11)\ \ #define Z_ALPHA(i, j) \ vpermilpd(imm(0x5), ymm(i), ymm(j))\ vmulpd(ymm7, ymm(i), ymm(i))\ vmulpd(ymm6, ymm(j), ymm(j))\ vaddsubpd(ymm(j), ymm(i), ymm(i))\ void bli_zgemm_bulldozer_asm_4x4_fma4 ( dim_t k0, dcomplex* restrict alpha, dcomplex* restrict a, dcomplex* restrict b, dcomplex* restrict beta, dcomplex* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; begin_asm() mov(var(a), rax) // load address of a. mov(var(b), rbx) // load address of b. //mov(var(b_next), r15) // load address of b_next. //mov(var(a_next), r14) // load address of a_next. vmovapd(mem(rax, 0*32), ymm0) // initialize loop by pre-loading vmovddup(mem(rbx, 0+0*32), ymm2) vmovddup(mem(rbx, 0+1*32), ymm3) mov(var(c), rcx) // load address of c mov(var(cs_c), rdi) // load cs_c lea(mem(, rdi, 8), rdi) // cs_c *= sizeof(dcomplex) lea(mem(, rdi, 2), rdi) lea(mem(rcx, rdi, 2), r10) // load address of c + 2*cs_c; prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*cs_c prefetch(0, mem(rcx, rdi, 1, 3*8)) // prefetch c + 1*cs_c prefetch(0, mem(r10, 3*8)) // prefetch c + 2*cs_c prefetch(0, mem(r10, rdi, 1, 3*8)) // prefetch c + 3*cs_c vxorpd(ymm8, ymm8, ymm8) vxorpd(ymm9, ymm9, ymm9) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm11, ymm11, ymm11) vxorpd(ymm12, ymm12, ymm12) vxorpd(ymm13, ymm13, ymm13) vxorpd(ymm14, ymm14, ymm14) vxorpd(ymm15, ymm15, ymm15) mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.ZCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.ZLOOPKITER) // MAIN LOOP // iteration 0 vmovapd(mem(rax, 1*32), ymm1) vfmaddpd(ymm15, ymm0, ymm2, ymm15) vperm2f128(imm(0x3), ymm2, ymm2, ymm4) vfmaddpd(ymm11, ymm0, ymm3, ymm11) vperm2f128(imm(0x3), ymm3, ymm3, ymm5) prefetch(0, mem(rax, 16*32)) vfmaddpd(ymm14, ymm1, ymm2, ymm14) vmovddup(mem(rbx, 8+0*32), ymm2) vfmaddpd(ymm10, ymm1, ymm3, ymm10) vmovddup(mem(rbx, 8+1*32), ymm3) MADDSUBPD_TO_YMM vmulpd(ymm1, ymm2, ymm6) vmovddup(mem(rbx, 0+2*32), ymm2) vmulpd(ymm1, ymm3, ymm7) vmovddup(mem(rbx, 0+3*32), ymm3) vaddsubpd(ymm6, ymm14, ymm14) vaddsubpd(ymm7, ymm10, ymm10) vmulpd(ymm0, ymm4, ymm6) vmulpd(ymm0, ymm5, ymm7) vmovapd(mem(rax, 2*32), ymm0) vaddsubpd(ymm6, ymm13, ymm13) vaddsubpd(ymm7, ymm9, ymm9) vmulpd(ymm1, ymm4, ymm6) vmulpd(ymm1, ymm5, ymm7) vaddsubpd(ymm6, ymm12, ymm12) vaddsubpd(ymm7, ymm8, ymm8) // iteration 1 vmovapd(mem(rax, 3*32), ymm1) vfmaddpd(ymm15, ymm0, ymm2, ymm15) vperm2f128(imm(0x3), ymm2, ymm2, ymm4) vfmaddpd(ymm11, ymm0, ymm3, ymm11) vperm2f128(imm(0x3), ymm3, ymm3, ymm5) prefetch(0, mem(rax, 18*32)) vfmaddpd(ymm14, ymm1, ymm2, ymm14) vmovddup(mem(rbx, 8+2*32), ymm2) vfmaddpd(ymm10, ymm1, ymm3, ymm10) vmovddup(mem(rbx, 8+3*32), ymm3) MADDSUBPD_TO_YMM vmulpd(ymm1, ymm2, ymm6) vmovddup(mem(rbx, 0+4*32), ymm2) vmulpd(ymm1, ymm3, ymm7) vmovddup(mem(rbx, 0+5*32), ymm3) vaddsubpd(ymm6, ymm14, ymm14) vaddsubpd(ymm7, ymm10, ymm10) vmulpd(ymm0, ymm4, ymm6) vmulpd(ymm0, ymm5, ymm7) vmovapd(mem(rax, 4*32), ymm0) vaddsubpd(ymm6, ymm13, ymm13) vaddsubpd(ymm7, ymm9, ymm9) vmulpd(ymm1, ymm4, ymm6) vmulpd(ymm1, ymm5, ymm7) vaddsubpd(ymm6, ymm12, ymm12) vaddsubpd(ymm7, ymm8, ymm8) // iteration 2 vmovapd(mem(rax, 5*32), ymm1) vfmaddpd(ymm15, ymm0, ymm2, ymm15) vperm2f128(imm(0x3), ymm2, ymm2, ymm4) vfmaddpd(ymm11, ymm0, ymm3, ymm11) vperm2f128(imm(0x3), ymm3, ymm3, ymm5) prefetch(0, mem(rax, 20*32)) vfmaddpd(ymm14, ymm1, ymm2, ymm14) vmovddup(mem(rbx, 8+4*32), ymm2) vfmaddpd(ymm10, ymm1, ymm3, ymm10) vmovddup(mem(rbx, 8+5*32), ymm3) MADDSUBPD_TO_YMM vmulpd(ymm1, ymm2, ymm6) vmovddup(mem(rbx, 0+6*32), ymm2) vmulpd(ymm1, ymm3, ymm7) vmovddup(mem(rbx, 0+7*32), ymm3) vaddsubpd(ymm6, ymm14, ymm14) vaddsubpd(ymm7, ymm10, ymm10) vmulpd(ymm0, ymm4, ymm6) vmulpd(ymm0, ymm5, ymm7) vmovapd(mem(rax, 6*32), ymm0) vaddsubpd(ymm6, ymm13, ymm13) vaddsubpd(ymm7, ymm9, ymm9) vmulpd(ymm1, ymm4, ymm6) vmulpd(ymm1, ymm5, ymm7) vaddsubpd(ymm6, ymm12, ymm12) vaddsubpd(ymm7, ymm8, ymm8) // iteration 3 vmovapd(mem(rax, 7*32), ymm1) vfmaddpd(ymm15, ymm0, ymm2, ymm15) vperm2f128(imm(0x3), ymm2, ymm2, ymm4) vfmaddpd(ymm11, ymm0, ymm3, ymm11) vperm2f128(imm(0x3), ymm3, ymm3, ymm5) prefetch(0, mem(rax, 22*32)) vfmaddpd(ymm14, ymm1, ymm2, ymm14) vmovddup(mem(rbx, 8+6*32), ymm2) vfmaddpd(ymm10, ymm1, ymm3, ymm10) vmovddup(mem(rbx, 8+7*32), ymm3) MADDSUBPD_TO_YMM vmulpd(ymm1, ymm2, ymm6) vmovddup(mem(rbx, 0+8*32), ymm2) vmulpd(ymm1, ymm3, ymm7) vmovddup(mem(rbx, 0+9*32), ymm3) vaddsubpd(ymm6, ymm14, ymm14) vaddsubpd(ymm7, ymm10, ymm10) vmulpd(ymm0, ymm4, ymm6) vmulpd(ymm0, ymm5, ymm7) vmovapd(mem(rax, 8*32), ymm0) vaddsubpd(ymm6, ymm13, ymm13) vaddsubpd(ymm7, ymm9, ymm9) vmulpd(ymm1, ymm4, ymm6) vmulpd(ymm1, ymm5, ymm7) vaddsubpd(ymm6, ymm12, ymm12) vaddsubpd(ymm7, ymm8, ymm8) add(imm(4*4*16), rbx) // b += 4*4 (unroll x nr) add(imm(4*4*16), rax) // a += 4*4 (unroll x mr) dec(rsi) // i -= 1; jne(.ZLOOPKITER) // iterate again if i != 0. label(.ZCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.ZPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.ZLOOPKLEFT) // EDGE LOOP // iteration 0 vmovapd(mem(rax, 1*32), ymm1) vfmaddpd(ymm15, ymm0, ymm2, ymm15) vperm2f128(imm(0x3), ymm2, ymm2, ymm4) vfmaddpd(ymm11, ymm0, ymm3, ymm11) vperm2f128(imm(0x3), ymm3, ymm3, ymm5) prefetch(0, mem(rax, 16*32)) vfmaddpd(ymm14, ymm1, ymm2, ymm14) vmovddup(mem(rbx, 8+0*32), ymm2) vfmaddpd(ymm10, ymm1, ymm3, ymm10) vmovddup(mem(rbx, 8+1*32), ymm3) MADDSUBPD_TO_YMM vmulpd(ymm1, ymm2, ymm6) vmovddup(mem(rbx, 0+2*32), ymm2) vmulpd(ymm1, ymm3, ymm7) vmovddup(mem(rbx, 0+3*32), ymm3) vaddsubpd(ymm6, ymm14, ymm14) vaddsubpd(ymm7, ymm10, ymm10) vmulpd(ymm0, ymm4, ymm6) vmulpd(ymm0, ymm5, ymm7) vmovapd(mem(rax, 2*32), ymm0) vaddsubpd(ymm6, ymm13, ymm13) vaddsubpd(ymm7, ymm9, ymm9) vmulpd(ymm1, ymm4, ymm6) vmulpd(ymm1, ymm5, ymm7) vaddsubpd(ymm6, ymm12, ymm12) vaddsubpd(ymm7, ymm8, ymm8) add(imm(4*1*16), rax) // a += 4 (1 x mr) add(imm(4*1*16), rbx) // b += 4 (1 x nr) dec(rsi) // i -= 1; jne(.ZLOOPKLEFT) // iterate again if i != 0. label(.ZPOSTACCUM) // ymm15: ymm13: ymm11: ymm9: // ( ab00 ( ab01 ( ab02 ( ab03 // ab10 ab11 ab12 ab13 // ab21 ab20 ab23 ab22 // ab31 ) ab30 ) ab33 ) ab32 ) // ymm14: ymm12: ymm10: ymm8: // ( ab40 ( ab41 ( ab42 ( ab43 // ab50 ab51 ab52 ab53 // ab61 ab60 ab63 ab62 // ab71 ) ab70 ) ab73 ) ab72 ) vmovapd(ymm15, ymm7) vperm2f128(imm(0x12), ymm15, ymm13, ymm15) vperm2f128(imm(0x30), ymm7, ymm13, ymm13) vmovapd(ymm11, ymm7) vperm2f128(imm(0x12), ymm11, ymm9, ymm11) vperm2f128(imm(0x30), ymm7, ymm9, ymm9) vmovapd(ymm14, ymm7) vperm2f128(imm(0x12), ymm14, ymm12, ymm14) vperm2f128(imm(0x30), ymm7, ymm12, ymm12) vmovapd(ymm10, ymm7) vperm2f128(imm(0x12), ymm10, ymm8, ymm10) vperm2f128(imm(0x30), ymm7, ymm8, ymm8) // ymm15: ymm13: ymm11: ymm9: // ( ab00 ( ab01 ( ab02 ( ab03 // ab10 ab11 ab12 ab13 // ab20 ab21 ab22 ab23 // ab30 ) ab31 ) ab32 ) ab33 ) // ymm14: ymm12: ymm10: ymm8: // ( ab40 ( ab41 ( ab42 ( ab43 // ab50 ab51 ab52 ab53 // ab60 ab61 ab62 ab63 // ab70 ) ab71 ) ab72 ) ab73 ) // scale by alpha mov(var(alpha), rax) // load address of alpha vbroadcastsd(mem(rax), ymm7) // load alpha_r and duplicate vbroadcastsd(mem(rax, 8), ymm6) // load alpha_i and duplicate Z_ALPHA(15, 3) Z_ALPHA(14, 2) Z_ALPHA(13, 1) Z_ALPHA(12, 0) Z_ALPHA(11, 3) Z_ALPHA(10, 2) Z_ALPHA(9, 1) Z_ALPHA(8, 0) mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rbx), ymm7) // load beta_r and duplicate vbroadcastsd(mem(rbx, 8), ymm6) // load beta_i and duplicate mov(var(rs_c), rsi) // load rs_c lea(mem(, rsi, 8), rsi) // rsi = rs_c * sizeof(dcomplex) lea(mem(, rsi, 2), rsi) lea(mem(rcx, rsi, 2), rdx) // load address of c + 2*rs_c; // determine if // c % 32 == 0, AND // 16*cs_c % 32 == 0, AND // rs_c == 1 // ie: aligned, ldim aligned, and // column-stored cmp(imm(16), rsi) // set ZF if (16*rs_c) == 16. sete(bl) // bl = ( ZF == 1 ? 1 : 0 ); test(imm(31), rcx) // set ZF if c & 32 is zero. setz(bh) // bh = ( ZF == 0 ? 1 : 0 ); test(imm(31), rdi) // set ZF if (16*cs_c) & 32 is zero. setz(al) // al = ( ZF == 0 ? 1 : 0 ); // and(bl,bh) followed by // and(bh,al) will reveal result // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm7) // set ZF if beta_r == 0. sete(r8b) // r8b = ( ZF == 1 ? 1 : 0 ); vucomisd(xmm0, xmm6) // set ZF if beta_i == 0. sete(r9b) // r9b = ( ZF == 1 ? 1 : 0 ); and(r8b, r9b) // set ZF if r8b & r9b == 1. jne(.ZBETAZERO) // if ZF = 0, jump to beta == 0 case // check if aligned/column-stored and(bl, bh) // set ZF if bl & bh == 1. and(bh, al) // set ZF if bh & al == 1. jne(.ZCOLSTORED) // jump to column storage case label(.ZGENSTORED) // update c00:c30 vmovupd(mem(rcx), xmm0) // load (c00,c10) into xmm0 vmovupd(mem(rcx, rsi, 1), xmm2) // load (c20,c30) into xmm2 vinsertf128(imm(1), xmm2, ymm0, ymm0) // ymm0 := (ymm0[0:1],xmm2) Z_ALPHA(0, 2) // scale ymm0 by beta vaddpd(ymm15, ymm0, ymm0) // add the gemm result to ymm0 vextractf128(imm(1), ymm0, xmm2) // xmm2 := ymm0[2:3] vmovupd(xmm0, mem(rcx)) // store (c00,c10) vmovupd(xmm2, mem(rcx, rsi, 1)) // store (c20,c30) add(rdi, rcx) // c += cs_c; // update c40:c70 vmovupd(mem(rdx), xmm0) // load (c40,c50) into xmm0 vmovupd(mem(rdx, rsi, 1), xmm2) // load (c60,c70) into xmm2 vinsertf128(imm(1), xmm2, ymm0, ymm0) // ymm0 := (ymm0[0:1],xmm2) Z_ALPHA(0, 2) // scale ymm0 by beta vaddpd(ymm14, ymm0, ymm0) // add the gemm result to ymm0 vextractf128(imm(1), ymm0, xmm2) // xmm2 := ymm0[2:3] vmovupd(xmm0, mem(rdx)) // store (c40,c50) vmovupd(xmm2, mem(rdx, rsi, 1)) // store (c60,c70) add(rdi, rdx) // c += cs_c; // update c01:c31 vmovupd(mem(rcx), xmm0) // load (c01,c11) into xmm0 vmovupd(mem(rcx, rsi, 1), xmm2) // load (c21,c31) into xmm2 vinsertf128(imm(1), xmm2, ymm0, ymm0) // ymm0 := (ymm0[0:1],xmm2) Z_ALPHA(0, 2) // scale ymm0 by beta vaddpd(ymm13, ymm0, ymm0) // add the gemm result to ymm0 vextractf128(imm(1), ymm0, xmm2) // xmm2 := ymm0[2:3] vmovupd(xmm0, mem(rcx)) // store (c01,c11) vmovupd(xmm2, mem(rcx, rsi, 1)) // store (c21,c31) add(rdi, rcx) // c += cs_c; // update c41:c71 vmovupd(mem(rdx), xmm0) // load (c41,c51) into xmm0 vmovupd(mem(rdx, rsi, 1), xmm2) // load (c61,c71) into xmm2 vinsertf128(imm(1), xmm2, ymm0, ymm0) // ymm0 := (ymm0[0:1],xmm2) Z_ALPHA(0, 2) // scale ymm0 by beta vaddpd(ymm12, ymm0, ymm0) // add the gemm result to ymm0 vextractf128(imm(1), ymm0, xmm2) // xmm2 := ymm0[2:3] vmovupd(xmm0, mem(rdx)) // store (c41,c51) vmovupd(xmm2, mem(rdx, rsi, 1)) // store (c61,c71) add(rdi, rdx) // c += cs_c; // update c02:c32 vmovupd(mem(rcx), xmm0) // load (c02,c12) into xmm0 vmovupd(mem(rcx, rsi, 1), xmm2) // load (c22,c32) into xmm2 vinsertf128(imm(1), xmm2, ymm0, ymm0) // ymm0 := (ymm0[0:1],xmm2) Z_ALPHA(0, 2) // scale ymm0 by beta vaddpd(ymm11, ymm0, ymm0) // add the gemm result to ymm0 vextractf128(imm(1), ymm0, xmm2) // xmm2 := ymm0[2:3] vmovupd(xmm0, mem(rcx)) // store (c02,c12) vmovupd(xmm2, mem(rcx, rsi, 1)) // store (c22,c32) add(rdi, rcx) // c += cs_c; // update c42:c72 vmovupd(mem(rdx), xmm0) // load (c42,c52) into xmm0 vmovupd(mem(rdx, rsi, 1), xmm2) // load (c62,c72) into xmm2 vinsertf128(imm(1), xmm2, ymm0, ymm0) // ymm0 := (ymm0[0:1],xmm2) Z_ALPHA(0, 2) // scale ymm0 by beta vaddpd(ymm10, ymm0, ymm0) // add the gemm result to ymm0 vextractf128(imm(1), ymm0, xmm2) // xmm2 := ymm0[2:3] vmovupd(xmm0, mem(rdx)) // store (c42,c52) vmovupd(xmm2, mem(rdx, rsi, 1)) // store (c62,c72) add(rdi, rdx) // c += cs_c; // update c03:c33 vmovupd(mem(rcx), xmm0) // load (c03,c13) into xmm0 vmovupd(mem(rcx, rsi, 1), xmm2) // load (c23,c33) into xmm2 vinsertf128(imm(1), xmm2, ymm0, ymm0) // ymm0 := (ymm0[0:1],xmm2) Z_ALPHA(0, 2) // scale ymm0 by beta vaddpd(ymm9, ymm0, ymm0) // add the gemm result to ymm0 vextractf128(imm(1), ymm0, xmm2) // xmm2 := ymm0[2:3] vmovupd(xmm0, mem(rcx)) // store (c03,c13) vmovupd(xmm2, mem(rcx, rsi, 1)) // store (c23,c33) add(rdi, rcx) // c += cs_c; // update c43:c73 vmovupd(mem(rdx), xmm0) // load (c43,c53) into xmm0 vmovupd(mem(rdx, rsi, 1), xmm2) // load (c63,c73) into xmm2 vinsertf128(imm(1), xmm2, ymm0, ymm0) // ymm0 := (ymm0[0:1],xmm2) Z_ALPHA(0, 2) // scale ymm0 by beta vaddpd(ymm8, ymm0, ymm0) // add the gemm result to ymm0 vextractf128(imm(1), ymm0, xmm2) // xmm2 := ymm0[2:3] vmovupd(xmm0, mem(rdx)) // store (c43,c53) vmovupd(xmm2, mem(rdx, rsi, 1)) // store (c63,c73) jmp(.ZDONE) // jump to end. label(.ZCOLSTORED) // update c00:c30 vmovapd(mem(rcx), ymm0) // load c00:c30 into ymm0 Z_ALPHA(0, 2) // scale ymm0 by beta vaddpd(ymm15, ymm0, ymm0) // add the gemm result to ymm0 vmovapd(ymm0, mem(rcx)) // store c00:c30 add(rdi, rcx) // c += cs_c; // update c40:c70 vmovapd(mem(rdx), ymm0) // load c40:c70 into ymm0 Z_ALPHA(0, 2) // scale ymm0 by beta vaddpd(ymm14, ymm0, ymm0) // add the gemm result to ymm0 vmovapd(ymm0, mem(rdx)) // store c40:c70 add(rdi, rdx) // c += cs_c; // update c01:c31 vmovapd(mem(rcx), ymm0) // load c01:c31 into ymm0 Z_ALPHA(0, 2) // scale ymm0 by beta vaddpd(ymm13, ymm0, ymm0) // add the gemm result to ymm0 vmovapd(ymm0, mem(rcx)) // store c01:c31 add(rdi, rcx) // c += cs_c; // update c41:c71 vmovapd(mem(rdx), ymm0) // load c41:c71 into ymm0 Z_ALPHA(0, 2) // scale ymm0 by beta vaddpd(ymm12, ymm0, ymm0) // add the gemm result to ymm0 vmovapd(ymm0, mem(rdx)) // store c41:c71 add(rdi, rdx) // c += cs_c; // update c02:c32 vmovapd(mem(rcx), ymm0) // load c02:c32 into ymm0 Z_ALPHA(0, 2) // scale ymm0 by beta vaddpd(ymm11, ymm0, ymm0) // add the gemm result to ymm0 vmovapd(ymm0, mem(rcx)) // store c02:c32 add(rdi, rcx) // c += cs_c; // update c42:c72 vmovapd(mem(rdx), ymm0) // load c42:c72 into ymm0 Z_ALPHA(0, 2) // scale ymm0 by beta vaddpd(ymm10, ymm0, ymm0) // add the gemm result to ymm0 vmovapd(ymm0, mem(rdx)) // store c42:c72 add(rdi, rdx) // c += cs_c; // update c03:c33 vmovapd(mem(rcx), ymm0) // load c03:c33 into ymm0 Z_ALPHA(0, 2) // scale ymm0 by beta vaddpd(ymm9, ymm0, ymm0) // add the gemm result to ymm0 vmovapd(ymm0, mem(rcx)) // store c03:c33 add(rdi, rcx) // c += cs_c; // update c43:c73 vmovapd(mem(rdx), ymm0) // load c43:c73 into ymm0 Z_ALPHA(0, 2) // scale ymm0 by beta vaddpd(ymm8, ymm0, ymm0) // add the gemm result to ymm0 vmovapd(ymm0, mem(rdx)) // store c43:c73 jmp(.ZDONE) // jump to end. label(.ZBETAZERO) // check if aligned/column-stored // check if aligned/column-stored and(bl, bh) // set ZF if bl & bh == 1. and(bh, al) // set ZF if bh & al == 1. jne(.ZCOLSTORBZ) // jump to column storage case label(.ZGENSTORBZ) // update c00:c30 vextractf128(imm(1), ymm15, xmm2) vmovupd(xmm15, mem(rcx)) // store (c00,c10) vmovupd(xmm2, mem(rcx, rsi, 1)) // store (c20,c30) add(rdi, rcx) // c += cs_c; // update c40:c70 vextractf128(imm(1), ymm14, xmm2) vmovupd(xmm14, mem(rdx)) // store (c40,c50) vmovupd(xmm2, mem(rdx, rsi, 1)) // store (c60,c70) add(rdi, rdx) // c += cs_c; // update c01:c31 vextractf128(imm(1), ymm13, xmm2) vmovupd(xmm13, mem(rcx)) // store (c01,c11) vmovupd(xmm2, mem(rcx, rsi, 1)) // store (c21,c31) add(rdi, rcx) // c += cs_c; // update c41:c71 vextractf128(imm(1), ymm12, xmm2) vmovupd(xmm12, mem(rdx)) // store (c41,c51) vmovupd(xmm2, mem(rdx, rsi, 1)) // store (c61,c71) add(rdi, rdx) // c += cs_c; // update c02:c32 vextractf128(imm(1), ymm11, xmm2) vmovupd(xmm11, mem(rcx)) // store (c02,c12) vmovupd(xmm2, mem(rcx, rsi, 1)) // store (c22,c32) add(rdi, rcx) // c += cs_c; // update c42:c72 vextractf128(imm(1), ymm10, xmm2) vmovupd(xmm10, mem(rdx)) // store (c42,c52) vmovupd(xmm2, mem(rdx, rsi, 1)) // store (c62,c72) add(rdi, rdx) // c += cs_c; // update c03:c33 vextractf128(imm(1), ymm9, xmm2) vmovupd(xmm9, mem(rcx)) // store (c03,c13) vmovupd(xmm2, mem(rcx, rsi, 1)) // store (c23,c33) add(rdi, rcx) // c += cs_c; // update c43:c73 vextractf128(imm(1), ymm8, xmm2) vmovupd(xmm8, mem(rdx)) // store (c43,c53) vmovupd(xmm2, mem(rdx, rsi, 1)) // store (c63,c73) jmp(.ZDONE) // jump to end. label(.ZCOLSTORBZ) vmovapd(ymm15, mem(rcx)) // store c00:c30 add(rdi, rcx) // c += cs_c; vmovapd(ymm14, mem(rdx)) // store c40:c70 add(rdi, rdx) // c += cs_c; vmovapd(ymm13, mem(rcx)) // store c01:c31 add(rdi, rcx) // c += cs_c; vmovapd(ymm12, mem(rdx)) // store c41:c71 add(rdi, rdx) // c += cs_c; vmovapd(ymm11, mem(rcx)) // store c02:c32 add(rdi, rcx) // c += cs_c; vmovapd(ymm10, mem(rdx)) // store c42:c72 add(rdi, rdx) // c += cs_c; vmovapd(ymm9, mem(rcx)) // store c03:c33 add(rdi, rcx) // c += cs_c; vmovapd(ymm8, mem(rdx)) // store c43:c73 label(.ZDONE) end_asm( : // output operands (none) : // input operands [k_iter] "m" (k_iter), // 0 [k_left] "m" (k_left), // 1 [a] "m" (a), // 2 [b] "m" (b), // 3 [alpha] "m" (alpha), // 4 [beta] "m" (beta), // 5 [c] "m" (c), // 6 [rs_c] "m" (rs_c), // 7 [cs_c] "m" (cs_c)/*, // 8 [b_next] "m" (b_next), // 9 [a_next] "m" (a_next)*/ // 10 : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15", "memory" ) } blis-0.6.1/kernels/bulldozer/bli_kernels_bulldozer.h000066400000000000000000000035621360743507500226600ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ GEMM_UKR_PROT( float, s, gemm_bulldozer_asm_8x8_fma4 ) GEMM_UKR_PROT( double, d, gemm_bulldozer_asm_4x6_fma4 ) GEMM_UKR_PROT( scomplex, c, gemm_bulldozer_asm_8x4_fma4 ) GEMM_UKR_PROT( dcomplex, z, gemm_bulldozer_asm_4x4_fma4 ) blis-0.6.1/kernels/generic/000077500000000000000000000000001360743507500155405ustar00rootroot00000000000000blis-0.6.1/kernels/generic/generic.txt000066400000000000000000000012441360743507500177160ustar00rootroot00000000000000 generic.txt ----------- This file in 'kernels/generic' exists only to force 'git' to track what would otherwise be an empty directory. Having this empty directory is necessary because the 'generic' singleton family is defined in the configuration registry as: generic: generic which implies that the 'generic' sub-configuration depends on the 'generic' kernel set (because there were no complementary kernel sets specified via '/'). Thus, we need there to be a kernel set named 'generic', but we don't actually refer to any such kernels in BLIS. In other words, this file is simply a workaround to a quirk in the syntax and semantics of the config_registry file. -FGVZ blis-0.6.1/kernels/haswell/000077500000000000000000000000001360743507500155635ustar00rootroot00000000000000blis-0.6.1/kernels/haswell/3/000077500000000000000000000000001360743507500157255ustar00rootroot00000000000000blis-0.6.1/kernels/haswell/3/bli_gemm_haswell_asm_d6x8.c000066400000000000000000001753161360743507500231110ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define BLIS_ASM_SYNTAX_ATT #include "bli_x86_asm_macros.h" #define SGEMM_INPUT_GS_BETA_NZ \ vmovlps(mem(rcx), xmm0, xmm0) \ vmovhps(mem(rcx, rsi, 1), xmm0, xmm0) \ vmovlps(mem(rcx, rsi, 2), xmm1, xmm1) \ vmovhps(mem(rcx, r13, 1), xmm1, xmm1) \ vshufps(imm(0x88), xmm1, xmm0, xmm0) \ vmovlps(mem(rcx, rsi, 4), xmm2, xmm2) \ vmovhps(mem(rcx, r15, 1), xmm2, xmm2) \ /* We can't use vmovhps for loading the last element becauase that might result in reading beyond valid memory. (vmov[lh]psd load pairs of adjacent floats at a time.) So we need to use vmovss instead. But since we're limited to using ymm0 through ymm2 (ymm3 contains beta and ymm4 through ymm15 contain the microtile) and due to the way vmovss zeros out all bits above 31, we have to load element 7 before element 6. */ \ vmovss(mem(rcx, r10, 1), xmm1) \ vpermilps(imm(0xcf), xmm1, xmm1) \ vmovlps(mem(rcx, r13, 2), xmm1, xmm1) \ /*vmovhps(mem(rcx, r10, 1), xmm1, xmm1)*/ \ vshufps(imm(0x88), xmm1, xmm2, xmm2) \ vperm2f128(imm(0x20), ymm2, ymm0, ymm0) #define SGEMM_OUTPUT_GS_BETA_NZ \ vextractf128(imm(1), ymm0, xmm2) \ vmovss(xmm0, mem(rcx)) \ vpermilps(imm(0x39), xmm0, xmm1) \ vmovss(xmm1, mem(rcx, rsi, 1)) \ vpermilps(imm(0x39), xmm1, xmm0) \ vmovss(xmm0, mem(rcx, rsi, 2)) \ vpermilps(imm(0x39), xmm0, xmm1) \ vmovss(xmm1, mem(rcx, r13, 1)) \ vmovss(xmm2, mem(rcx, rsi, 4)) \ vpermilps(imm(0x39), xmm2, xmm1) \ vmovss(xmm1, mem(rcx, r15, 1)) \ vpermilps(imm(0x39), xmm1, xmm2) \ vmovss(xmm2, mem(rcx, r13, 2)) \ vpermilps(imm(0x39), xmm2, xmm1) \ vmovss(xmm1, mem(rcx, r10, 1)) void bli_sgemm_haswell_asm_6x16 ( dim_t k0, float* restrict alpha, float* restrict a, float* restrict b, float* restrict beta, float* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; begin_asm() vzeroall() // zero all xmm/ymm registers. mov(var(a), rax) // load address of a. mov(var(b), rbx) // load address of b. //mov(%9, r15) // load address of b_next. add(imm(32*4), rbx) // initialize loop by pre-loading vmovaps(mem(rbx, -4*32), ymm0) vmovaps(mem(rbx, -3*32), ymm1) mov(var(c), rcx) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 4), rdi) // rs_c *= sizeof(float) lea(mem(rdi, rdi, 2), r13) // r13 = 3*rs_c; lea(mem(rcx, r13, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 7*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 7*8)) // prefetch c + 2*rs_c prefetch(0, mem(rdx, 7*8)) // prefetch c + 3*rs_c prefetch(0, mem(rdx, rdi, 1, 7*8)) // prefetch c + 4*rs_c prefetch(0, mem(rdx, rdi, 2, 7*8)) // prefetch c + 5*rs_c mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.SCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.SLOOPKITER) // MAIN LOOP // iteration 0 prefetch(0, mem(rax, 64*4)) vbroadcastss(mem(rax, 0*4), ymm2) vbroadcastss(mem(rax, 1*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm4) vfmadd231ps(ymm1, ymm2, ymm5) vfmadd231ps(ymm0, ymm3, ymm6) vfmadd231ps(ymm1, ymm3, ymm7) vbroadcastss(mem(rax, 2*4), ymm2) vbroadcastss(mem(rax, 3*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm8) vfmadd231ps(ymm1, ymm2, ymm9) vfmadd231ps(ymm0, ymm3, ymm10) vfmadd231ps(ymm1, ymm3, ymm11) vbroadcastss(mem(rax, 4*4), ymm2) vbroadcastss(mem(rax, 5*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm12) vfmadd231ps(ymm1, ymm2, ymm13) vfmadd231ps(ymm0, ymm3, ymm14) vfmadd231ps(ymm1, ymm3, ymm15) vmovaps(mem(rbx, -2*32), ymm0) vmovaps(mem(rbx, -1*32), ymm1) // iteration 1 vbroadcastss(mem(rax, 6*4), ymm2) vbroadcastss(mem(rax, 7*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm4) vfmadd231ps(ymm1, ymm2, ymm5) vfmadd231ps(ymm0, ymm3, ymm6) vfmadd231ps(ymm1, ymm3, ymm7) vbroadcastss(mem(rax, 8*4), ymm2) vbroadcastss(mem(rax, 9*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm8) vfmadd231ps(ymm1, ymm2, ymm9) vfmadd231ps(ymm0, ymm3, ymm10) vfmadd231ps(ymm1, ymm3, ymm11) vbroadcastss(mem(rax, 10*4), ymm2) vbroadcastss(mem(rax, 11*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm12) vfmadd231ps(ymm1, ymm2, ymm13) vfmadd231ps(ymm0, ymm3, ymm14) vfmadd231ps(ymm1, ymm3, ymm15) vmovaps(mem(rbx, 0*32), ymm0) vmovaps(mem(rbx, 1*32), ymm1) // iteration 2 prefetch(0, mem(rax, 76*4)) vbroadcastss(mem(rax, 12*4), ymm2) vbroadcastss(mem(rax, 13*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm4) vfmadd231ps(ymm1, ymm2, ymm5) vfmadd231ps(ymm0, ymm3, ymm6) vfmadd231ps(ymm1, ymm3, ymm7) vbroadcastss(mem(rax, 14*4), ymm2) vbroadcastss(mem(rax, 15*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm8) vfmadd231ps(ymm1, ymm2, ymm9) vfmadd231ps(ymm0, ymm3, ymm10) vfmadd231ps(ymm1, ymm3, ymm11) vbroadcastss(mem(rax, 16*4), ymm2) vbroadcastss(mem(rax, 17*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm12) vfmadd231ps(ymm1, ymm2, ymm13) vfmadd231ps(ymm0, ymm3, ymm14) vfmadd231ps(ymm1, ymm3, ymm15) vmovaps(mem(rbx, 2*32), ymm0) vmovaps(mem(rbx, 3*32), ymm1) // iteration 3 vbroadcastss(mem(rax, 18*4), ymm2) vbroadcastss(mem(rax, 19*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm4) vfmadd231ps(ymm1, ymm2, ymm5) vfmadd231ps(ymm0, ymm3, ymm6) vfmadd231ps(ymm1, ymm3, ymm7) vbroadcastss(mem(rax, 20*4), ymm2) vbroadcastss(mem(rax, 21*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm8) vfmadd231ps(ymm1, ymm2, ymm9) vfmadd231ps(ymm0, ymm3, ymm10) vfmadd231ps(ymm1, ymm3, ymm11) vbroadcastss(mem(rax, 22*4), ymm2) vbroadcastss(mem(rax, 23*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm12) vfmadd231ps(ymm1, ymm2, ymm13) vfmadd231ps(ymm0, ymm3, ymm14) vfmadd231ps(ymm1, ymm3, ymm15) add(imm(4*6*4), rax) // a += 4*6 (unroll x mr) add(imm(4*16*4), rbx) // b += 4*16 (unroll x nr) vmovaps(mem(rbx, -4*32), ymm0) vmovaps(mem(rbx, -3*32), ymm1) dec(rsi) // i -= 1; jne(.SLOOPKITER) // iterate again if i != 0. label(.SCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.SPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.SLOOPKLEFT) // EDGE LOOP prefetch(0, mem(rax, 64*4)) vbroadcastss(mem(rax, 0*4), ymm2) vbroadcastss(mem(rax, 1*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm4) vfmadd231ps(ymm1, ymm2, ymm5) vfmadd231ps(ymm0, ymm3, ymm6) vfmadd231ps(ymm1, ymm3, ymm7) vbroadcastss(mem(rax, 2*4), ymm2) vbroadcastss(mem(rax, 3*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm8) vfmadd231ps(ymm1, ymm2, ymm9) vfmadd231ps(ymm0, ymm3, ymm10) vfmadd231ps(ymm1, ymm3, ymm11) vbroadcastss(mem(rax, 4*4), ymm2) vbroadcastss(mem(rax, 5*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm12) vfmadd231ps(ymm1, ymm2, ymm13) vfmadd231ps(ymm0, ymm3, ymm14) vfmadd231ps(ymm1, ymm3, ymm15) add(imm(1*6*4), rax) // a += 1*6 (unroll x mr) add(imm(1*16*4), rbx) // b += 1*16 (unroll x nr) vmovaps(mem(rbx, -4*32), ymm0) vmovaps(mem(rbx, -3*32), ymm1) dec(rsi) // i -= 1; jne(.SLOOPKLEFT) // iterate again if i != 0. label(.SPOSTACCUM) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastss(mem(rax), ymm0) // load alpha and duplicate vbroadcastss(mem(rbx), ymm3) // load beta and duplicate vmulps(ymm0, ymm4, ymm4) // scale by alpha vmulps(ymm0, ymm5, ymm5) vmulps(ymm0, ymm6, ymm6) vmulps(ymm0, ymm7, ymm7) vmulps(ymm0, ymm8, ymm8) vmulps(ymm0, ymm9, ymm9) vmulps(ymm0, ymm10, ymm10) vmulps(ymm0, ymm11, ymm11) vmulps(ymm0, ymm12, ymm12) vmulps(ymm0, ymm13, ymm13) vmulps(ymm0, ymm14, ymm14) vmulps(ymm0, ymm15, ymm15) mov(var(cs_c), rsi) // load cs_c lea(mem(, rsi, 4), rsi) // rsi = cs_c * sizeof(float) lea(mem(rcx, rsi, 8), rdx) // load address of c + 8*cs_c; lea(mem(rcx, rdi, 4), r14) // load address of c + 4*rs_c; lea(mem(rsi, rsi, 2), r13) // r13 = 3*cs_c; lea(mem(rsi, rsi, 4), r15) // r15 = 5*cs_c; lea(mem(r13, rsi, 4), r10) // r10 = 7*cs_c; // now avoid loading C if beta == 0 vxorps(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomiss(xmm0, xmm3) // set ZF if beta == 0. je(.SBETAZERO) // if ZF = 1, jump to beta == 0 case cmp(imm(4), rsi) // set ZF if (4*cs_c) == 4. jz(.SROWSTORED) // jump to row storage case cmp(imm(4), rdi) // set ZF if (4*cs_c) == 4. jz(.SCOLSTORED) // jump to column storage case label(.SGENSTORED) SGEMM_INPUT_GS_BETA_NZ vfmadd213ps(ymm4, ymm3, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += rs_c; SGEMM_INPUT_GS_BETA_NZ vfmadd213ps(ymm6, ymm3, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += rs_c; SGEMM_INPUT_GS_BETA_NZ vfmadd213ps(ymm8, ymm3, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += rs_c; SGEMM_INPUT_GS_BETA_NZ vfmadd213ps(ymm10, ymm3, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += rs_c; SGEMM_INPUT_GS_BETA_NZ vfmadd213ps(ymm12, ymm3, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += rs_c; SGEMM_INPUT_GS_BETA_NZ vfmadd213ps(ymm14, ymm3, ymm0) SGEMM_OUTPUT_GS_BETA_NZ //add(rdi, rcx) // c += rs_c; mov(rdx, rcx) // rcx = c + 8*cs_c SGEMM_INPUT_GS_BETA_NZ vfmadd213ps(ymm5, ymm3, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += rs_c; SGEMM_INPUT_GS_BETA_NZ vfmadd213ps(ymm7, ymm3, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += rs_c; SGEMM_INPUT_GS_BETA_NZ vfmadd213ps(ymm9, ymm3, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += rs_c; SGEMM_INPUT_GS_BETA_NZ vfmadd213ps(ymm11, ymm3, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += rs_c; SGEMM_INPUT_GS_BETA_NZ vfmadd213ps(ymm13, ymm3, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += rs_c; SGEMM_INPUT_GS_BETA_NZ vfmadd213ps(ymm15, ymm3, ymm0) SGEMM_OUTPUT_GS_BETA_NZ //add(rdi, rcx) // c += rs_c; jmp(.SDONE) // jump to end. label(.SROWSTORED) vfmadd231ps(mem(rcx), ymm3, ymm4) vmovups(ymm4, mem(rcx)) add(rdi, rcx) vfmadd231ps(mem(rdx), ymm3, ymm5) vmovups(ymm5, mem(rdx)) add(rdi, rdx) vfmadd231ps(mem(rcx), ymm3, ymm6) vmovups(ymm6, mem(rcx)) add(rdi, rcx) vfmadd231ps(mem(rdx), ymm3, ymm7) vmovups(ymm7, mem(rdx)) add(rdi, rdx) vfmadd231ps(mem(rcx), ymm3, ymm8) vmovups(ymm8, mem(rcx)) add(rdi, rcx) vfmadd231ps(mem(rdx), ymm3, ymm9) vmovups(ymm9, mem(rdx)) add(rdi, rdx) vfmadd231ps(mem(rcx), ymm3, ymm10) vmovups(ymm10, mem(rcx)) add(rdi, rcx) vfmadd231ps(mem(rdx), ymm3, ymm11) vmovups(ymm11, mem(rdx)) add(rdi, rdx) vfmadd231ps(mem(rcx), ymm3, ymm12) vmovups(ymm12, mem(rcx)) add(rdi, rcx) vfmadd231ps(mem(rdx), ymm3, ymm13) vmovups(ymm13, mem(rdx)) add(rdi, rdx) vfmadd231ps(mem(rcx), ymm3, ymm14) vmovups(ymm14, mem(rcx)) //add(rdi, rcx) vfmadd231ps(mem(rdx), ymm3, ymm15) vmovups(ymm15, mem(rdx)) //add(rdi, rdx) jmp(.SDONE) // jump to end. label(.SCOLSTORED) vbroadcastss(mem(rbx), ymm3) vunpcklps(ymm6, ymm4, ymm0) vunpcklps(ymm10, ymm8, ymm1) vshufps(imm(0x4e), ymm1, ymm0, ymm2) vblendps(imm(0xcc), ymm2, ymm0, ymm0) vblendps(imm(0x33), ymm2, ymm1, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vfmadd231ps(mem(rcx), xmm3, xmm0) vfmadd231ps(mem(rcx, rsi, 4), xmm3, xmm2) vmovups(xmm0, mem(rcx)) // store ( gamma00..gamma30 ) vmovups(xmm2, mem(rcx, rsi, 4)) // store ( gamma04..gamma34 ) vextractf128(imm(0x1), ymm1, xmm2) vfmadd231ps(mem(rcx, rsi, 1), xmm3, xmm1) vfmadd231ps(mem(rcx, r15, 1), xmm3, xmm2) vmovups(xmm1, mem(rcx, rsi, 1)) // store ( gamma01..gamma31 ) vmovups(xmm2, mem(rcx, r15, 1)) // store ( gamma05..gamma35 ) vunpckhps(ymm6, ymm4, ymm0) vunpckhps(ymm10, ymm8, ymm1) vshufps(imm(0x4e), ymm1, ymm0, ymm2) vblendps(imm(0xcc), ymm2, ymm0, ymm0) vblendps(imm(0x33), ymm2, ymm1, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vfmadd231ps(mem(rcx, rsi, 2), xmm3, xmm0) vfmadd231ps(mem(rcx, r13, 2), xmm3, xmm2) vmovups(xmm0, mem(rcx, rsi, 2)) // store ( gamma02..gamma32 ) vmovups(xmm2, mem(rcx, r13, 2)) // store ( gamma06..gamma36 ) vextractf128(imm(0x1), ymm1, xmm2) vfmadd231ps(mem(rcx, r13, 1), xmm3, xmm1) vfmadd231ps(mem(rcx, r10, 1), xmm3, xmm2) vmovups(xmm1, mem(rcx, r13, 1)) // store ( gamma03..gamma33 ) vmovups(xmm2, mem(rcx, r10, 1)) // store ( gamma07..gamma37 ) lea(mem(rcx, rsi, 8), rcx) // rcx += 8*cs_c vunpcklps(ymm14, ymm12, ymm0) vextractf128(imm(0x1), ymm0, xmm2) vmovlpd(mem(r14), xmm1, xmm1) vmovhpd(mem(r14, rsi, 1), xmm1, xmm1) vfmadd231ps(xmm1, xmm3, xmm0) vmovlpd(xmm0, mem(r14)) // store ( gamma40..gamma50 ) vmovhpd(xmm0, mem(r14, rsi, 1)) // store ( gamma41..gamma51 ) vmovlpd(mem(r14, rsi, 4), xmm1, xmm1) vmovhpd(mem(r14, r15, 1), xmm1, xmm1) vfmadd231ps(xmm1, xmm3, xmm2) vmovlpd(xmm2, mem(r14, rsi, 4)) // store ( gamma44..gamma54 ) vmovhpd(xmm2, mem(r14, r15, 1)) // store ( gamma45..gamma55 ) vunpckhps(ymm14, ymm12, ymm0) vextractf128(imm(0x1), ymm0, xmm2) vmovlpd(mem(r14, rsi, 2), xmm1, xmm1) vmovhpd(mem(r14, r13, 1), xmm1, xmm1) vfmadd231ps(xmm1, xmm3, xmm0) vmovlpd(xmm0, mem(r14, rsi, 2)) // store ( gamma42..gamma52 ) vmovhpd(xmm0, mem(r14, r13, 1)) // store ( gamma43..gamma53 ) vmovlpd(mem(r14, r13, 2), xmm1, xmm1) vmovhpd(mem(r14, r10, 1), xmm1, xmm1) vfmadd231ps(xmm1, xmm3, xmm2) vmovlpd(xmm2, mem(r14, r13, 2)) // store ( gamma46..gamma56 ) vmovhpd(xmm2, mem(r14, r10, 1)) // store ( gamma47..gamma57 ) lea(mem(r14, rsi, 8), r14) // r14 += 8*cs_c vunpcklps(ymm7, ymm5, ymm0) vunpcklps(ymm11, ymm9, ymm1) vshufps(imm(0x4e), ymm1, ymm0, ymm2) vblendps(imm(0xcc), ymm2, ymm0, ymm0) vblendps(imm(0x33), ymm2, ymm1, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vfmadd231ps(mem(rcx), xmm3, xmm0) vfmadd231ps(mem(rcx, rsi, 4), xmm3, xmm2) vmovups(xmm0, mem(rcx)) // store ( gamma00..gamma30 ) vmovups(xmm2, mem(rcx, rsi, 4)) // store ( gamma04..gamma34 ) vextractf128(imm(0x1), ymm1, xmm2) vfmadd231ps(mem(rcx, rsi, 1), xmm3, xmm1) vfmadd231ps(mem(rcx, r15, 1), xmm3, xmm2) vmovups(xmm1, mem(rcx, rsi, 1)) // store ( gamma01..gamma31 ) vmovups(xmm2, mem(rcx, r15, 1)) // store ( gamma05..gamma35 ) vunpckhps(ymm7, ymm5, ymm0) vunpckhps(ymm11, ymm9, ymm1) vshufps(imm(0x4e), ymm1, ymm0, ymm2) vblendps(imm(0xcc), ymm2, ymm0, ymm0) vblendps(imm(0x33), ymm2, ymm1, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vfmadd231ps(mem(rcx, rsi, 2), xmm3, xmm0) vfmadd231ps(mem(rcx, r13, 2), xmm3, xmm2) vmovups(xmm0, mem(rcx, rsi, 2)) // store ( gamma02..gamma32 ) vmovups(xmm2, mem(rcx, r13, 2)) // store ( gamma06..gamma36 ) vextractf128(imm(0x1), ymm1, xmm2) vfmadd231ps(mem(rcx, r13, 1), xmm3, xmm1) vfmadd231ps(mem(rcx, r10, 1), xmm3, xmm2) vmovups(xmm1, mem(rcx, r13, 1)) // store ( gamma03..gamma33 ) vmovups(xmm2, mem(rcx, r10, 1)) // store ( gamma07..gamma37 ) //lea(mem(rcx, rsi, 8), rcx) // rcx += 8*cs_c vunpcklps(ymm15, ymm13, ymm0) vextractf128(imm(0x1), ymm0, xmm2) vmovlpd(mem(r14), xmm1, xmm1) vmovhpd(mem(r14, rsi, 1), xmm1, xmm1) vfmadd231ps(xmm1, xmm3, xmm0) vmovlpd(xmm0, mem(r14)) // store ( gamma40..gamma50 ) vmovhpd(xmm0, mem(r14, rsi, 1)) // store ( gamma41..gamma51 ) vmovlpd(mem(r14, rsi, 4), xmm1, xmm1) vmovhpd(mem(r14, r15, 1), xmm1, xmm1) vfmadd231ps(xmm1, xmm3, xmm2) vmovlpd(xmm2, mem(r14, rsi, 4)) // store ( gamma44..gamma54 ) vmovhpd(xmm2, mem(r14, r15, 1)) // store ( gamma45..gamma55 ) vunpckhps(ymm15, ymm13, ymm0) vextractf128(imm(0x1), ymm0, xmm2) vmovlpd(mem(r14, rsi, 2), xmm1, xmm1) vmovhpd(mem(r14, r13, 1), xmm1, xmm1) vfmadd231ps(xmm1, xmm3, xmm0) vmovlpd(xmm0, mem(r14, rsi, 2)) // store ( gamma42..gamma52 ) vmovhpd(xmm0, mem(r14, r13, 1)) // store ( gamma43..gamma53 ) vmovlpd(mem(r14, r13, 2), xmm1, xmm1) vmovhpd(mem(r14, r10, 1), xmm1, xmm1) vfmadd231ps(xmm1, xmm3, xmm2) vmovlpd(xmm2, mem(r14, r13, 2)) // store ( gamma46..gamma56 ) vmovhpd(xmm2, mem(r14, r10, 1)) // store ( gamma47..gamma57 ) //lea(mem(r14, rsi, 8), r14) // r14 += 8*cs_c jmp(.SDONE) // jump to end. label(.SBETAZERO) cmp(imm(4), rsi) // set ZF if (4*cs_c) == 4. jz(.SROWSTORBZ) // jump to row storage case cmp(imm(4), rdi) // set ZF if (4*cs_c) == 4. jz(.SCOLSTORBZ) // jump to column storage case label(.SGENSTORBZ) vmovaps(ymm4, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += rs_c; vmovaps(ymm6, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += rs_c; vmovaps(ymm8, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += rs_c; vmovaps(ymm10, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += rs_c; vmovaps(ymm12, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += rs_c; vmovaps(ymm14, ymm0) SGEMM_OUTPUT_GS_BETA_NZ //add(rdi, rcx) // c += rs_c; mov(rdx, rcx) // rcx = c + 8*cs_c vmovaps(ymm5, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += rs_c; vmovaps(ymm7, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += rs_c; vmovaps(ymm9, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += rs_c; vmovaps(ymm11, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += rs_c; vmovaps(ymm13, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += rs_c; vmovaps(ymm15, ymm0) SGEMM_OUTPUT_GS_BETA_NZ //add(rdi, rcx) // c += rs_c; jmp(.SDONE) // jump to end. label(.SROWSTORBZ) vmovups(ymm4, mem(rcx)) add(rdi, rcx) vmovups(ymm5, mem(rdx)) add(rdi, rdx) vmovups(ymm6, mem(rcx)) add(rdi, rcx) vmovups(ymm7, mem(rdx)) add(rdi, rdx) vmovups(ymm8, mem(rcx)) add(rdi, rcx) vmovups(ymm9, mem(rdx)) add(rdi, rdx) vmovups(ymm10, mem(rcx)) add(rdi, rcx) vmovups(ymm11, mem(rdx)) add(rdi, rdx) vmovups(ymm12, mem(rcx)) add(rdi, rcx) vmovups(ymm13, mem(rdx)) add(rdi, rdx) vmovups(ymm14, mem(rcx)) //add(rdi, rcx) vmovups(ymm15, mem(rdx)) //add(rdi, rdx) jmp(.SDONE) // jump to end. label(.SCOLSTORBZ) vunpcklps(ymm6, ymm4, ymm0) vunpcklps(ymm10, ymm8, ymm1) vshufps(imm(0x4e), ymm1, ymm0, ymm2) vblendps(imm(0xcc), ymm2, ymm0, ymm0) vblendps(imm(0x33), ymm2, ymm1, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vmovups(xmm0, mem(rcx)) // store ( gamma00..gamma30 ) vmovups(xmm2, mem(rcx, rsi, 4)) // store ( gamma04..gamma34 ) vextractf128(imm(0x1), ymm1, xmm2) vmovups(xmm1, mem(rcx, rsi, 1)) // store ( gamma01..gamma31 ) vmovups(xmm2, mem(rcx, r15, 1)) // store ( gamma05..gamma35 ) vunpckhps(ymm6, ymm4, ymm0) vunpckhps(ymm10, ymm8, ymm1) vshufps(imm(0x4e), ymm1, ymm0, ymm2) vblendps(imm(0xcc), ymm2, ymm0, ymm0) vblendps(imm(0x33), ymm2, ymm1, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vmovups(xmm0, mem(rcx, rsi, 2)) // store ( gamma02..gamma32 ) vmovups(xmm2, mem(rcx, r13, 2)) // store ( gamma06..gamma36 ) vextractf128(imm(0x1), ymm1, xmm2) vmovups(xmm1, mem(rcx, r13, 1)) // store ( gamma03..gamma33 ) vmovups(xmm2, mem(rcx, r10, 1)) // store ( gamma07..gamma37 ) lea(mem(rcx, rsi, 8), rcx) // rcx += 8*cs_c vunpcklps(ymm14, ymm12, ymm0) vextractf128(imm(0x1), ymm0, xmm2) vmovlpd(xmm0, mem(r14)) // store ( gamma40..gamma50 ) vmovhpd(xmm0, mem(r14, rsi, 1)) // store ( gamma41..gamma51 ) vmovlpd(xmm2, mem(r14, rsi, 4)) // store ( gamma44..gamma54 ) vmovhpd(xmm2, mem(r14, r15, 1)) // store ( gamma45..gamma55 ) vunpckhps(ymm14, ymm12, ymm0) vextractf128(imm(0x1), ymm0, xmm2) vmovlpd(xmm0, mem(r14, rsi, 2)) // store ( gamma42..gamma52 ) vmovhpd(xmm0, mem(r14, r13, 1)) // store ( gamma43..gamma53 ) vmovlpd(xmm2, mem(r14, r13, 2)) // store ( gamma46..gamma56 ) vmovhpd(xmm2, mem(r14, r10, 1)) // store ( gamma47..gamma57 ) lea(mem(r14, rsi, 8), r14) // r14 += 8*cs_c vunpcklps(ymm7, ymm5, ymm0) vunpcklps(ymm11, ymm9, ymm1) vshufps(imm(0x4e), ymm1, ymm0, ymm2) vblendps(imm(0xcc), ymm2, ymm0, ymm0) vblendps(imm(0x33), ymm2, ymm1, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vmovups(xmm0, mem(rcx)) // store ( gamma00..gamma30 ) vmovups(xmm2, mem(rcx, rsi, 4)) // store ( gamma04..gamma34 ) vextractf128(imm(0x1), ymm1, xmm2) vmovups(xmm1, mem(rcx, rsi, 1)) // store ( gamma01..gamma31 ) vmovups(xmm2, mem(rcx, r15, 1)) // store ( gamma05..gamma35 ) vunpckhps(ymm7, ymm5, ymm0) vunpckhps(ymm11, ymm9, ymm1) vshufps(imm(0x4e), ymm1, ymm0, ymm2) vblendps(imm(0xcc), ymm2, ymm0, ymm0) vblendps(imm(0x33), ymm2, ymm1, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vmovups(xmm0, mem(rcx, rsi, 2)) // store ( gamma02..gamma32 ) vmovups(xmm2, mem(rcx, r13, 2)) // store ( gamma06..gamma36 ) vextractf128(imm(0x1), ymm1, xmm2) vmovups(xmm1, mem(rcx, r13, 1)) // store ( gamma03..gamma33 ) vmovups(xmm2, mem(rcx, r10, 1)) // store ( gamma07..gamma37 ) //lea(mem(rcx, rsi, 8), rcx) // rcx += 8*cs_c vunpcklps(ymm15, ymm13, ymm0) vextractf128(imm(0x1), ymm0, xmm2) vmovlpd(xmm0, mem(r14)) // store ( gamma40..gamma50 ) vmovhpd(xmm0, mem(r14, rsi, 1)) // store ( gamma41..gamma51 ) vmovlpd(xmm2, mem(r14, rsi, 4)) // store ( gamma44..gamma54 ) vmovhpd(xmm2, mem(r14, r15, 1)) // store ( gamma45..gamma55 ) vunpckhps(ymm15, ymm13, ymm0) vextractf128(imm(0x1), ymm0, xmm2) vmovlpd(xmm0, mem(r14, rsi, 2)) // store ( gamma42..gamma52 ) vmovhpd(xmm0, mem(r14, r13, 1)) // store ( gamma43..gamma53 ) vmovlpd(xmm2, mem(r14, r13, 2)) // store ( gamma46..gamma56 ) vmovhpd(xmm2, mem(r14, r10, 1)) // store ( gamma47..gamma57 ) //lea(mem(r14, rsi, 8), r14) // r14 += 8*cs_c label(.SDONE) end_asm( : // output operands (none) : // input operands [k_iter] "m" (k_iter), // 0 [k_left] "m" (k_left), // 1 [a] "m" (a), // 2 [b] "m" (b), // 3 [alpha] "m" (alpha), // 4 [beta] "m" (beta), // 5 [c] "m" (c), // 6 [rs_c] "m" (rs_c), // 7 [cs_c] "m" (cs_c)/*, // 8 [b_next] "m" (b_next), // 9 [a_next] "m" (a_next)*/ // 10 : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } #define DGEMM_INPUT_GS_BETA_NZ \ vmovlpd(mem(rcx), xmm0, xmm0) \ vmovhpd(mem(rcx, rsi, 1), xmm0, xmm0) \ vmovlpd(mem(rcx, rsi, 2), xmm1, xmm1) \ vmovhpd(mem(rcx, r13, 1), xmm1, xmm1) \ vperm2f128(imm(0x20), ymm1, ymm0, ymm0) /*\ vmovlpd(mem(rcx, rsi, 4), xmm2, xmm2) \ vmovhpd(mem(rcx, r15, 1), xmm2, xmm2) \ vmovlpd(mem(rcx, r13, 2), xmm1, xmm1) \ vmovhpd(mem(rcx, r10, 1), xmm1, xmm1) \ vperm2f128(imm(0x20), ymm1, ymm2, ymm2)*/ #define DGEMM_OUTPUT_GS_BETA_NZ \ vextractf128(imm(1), ymm0, xmm1) \ vmovlpd(xmm0, mem(rcx)) \ vmovhpd(xmm0, mem(rcx, rsi, 1)) \ vmovlpd(xmm1, mem(rcx, rsi, 2)) \ vmovhpd(xmm1, mem(rcx, r13, 1)) /*\ vextractf128(imm(1), ymm2, xmm1) \ vmovlpd(xmm2, mem(rcx, rsi, 4)) \ vmovhpd(xmm2, mem(rcx, r15, 1)) \ vmovlpd(xmm1, mem(rcx, r13, 2)) \ vmovhpd(xmm1, mem(rcx, r10, 1))*/ void bli_dgemm_haswell_asm_6x8 ( dim_t k0, double* restrict alpha, double* restrict a, double* restrict b, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; begin_asm() vzeroall() // zero all xmm/ymm registers. mov(var(a), rax) // load address of a. mov(var(b), rbx) // load address of b. //mov(%9, r15) // load address of b_next. add(imm(32*4), rbx) // initialize loop by pre-loading vmovapd(mem(rbx, -4*32), ymm0) vmovapd(mem(rbx, -3*32), ymm1) mov(var(c), rcx) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) lea(mem(rdi, rdi, 2), r13) // r13 = 3*rs_c; lea(mem(rcx, r13, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 7*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 7*8)) // prefetch c + 2*rs_c prefetch(0, mem(rdx, 7*8)) // prefetch c + 3*rs_c prefetch(0, mem(rdx, rdi, 1, 7*8)) // prefetch c + 4*rs_c prefetch(0, mem(rdx, rdi, 2, 7*8)) // prefetch c + 5*rs_c mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.DLOOPKITER) // MAIN LOOP // iteration 0 prefetch(0, mem(rax, 64*8)) vbroadcastsd(mem(rax, 0*8), ymm2) vbroadcastsd(mem(rax, 1*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, 2*8), ymm2) vbroadcastsd(mem(rax, 3*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, 4*8), ymm2) vbroadcastsd(mem(rax, 5*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) vmovapd(mem(rbx, -2*32), ymm0) vmovapd(mem(rbx, -1*32), ymm1) // iteration 1 vbroadcastsd(mem(rax, 6*8), ymm2) vbroadcastsd(mem(rax, 7*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, 8*8), ymm2) vbroadcastsd(mem(rax, 9*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, 10*8), ymm2) vbroadcastsd(mem(rax, 11*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) vmovapd(mem(rbx, 0*32), ymm0) vmovapd(mem(rbx, 1*32), ymm1) // iteration 2 prefetch(0, mem(rax, 76*8)) vbroadcastsd(mem(rax, 12*8), ymm2) vbroadcastsd(mem(rax, 13*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, 14*8), ymm2) vbroadcastsd(mem(rax, 15*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, 16*8), ymm2) vbroadcastsd(mem(rax, 17*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) vmovapd(mem(rbx, 2*32), ymm0) vmovapd(mem(rbx, 3*32), ymm1) // iteration 3 vbroadcastsd(mem(rax, 18*8), ymm2) vbroadcastsd(mem(rax, 19*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, 20*8), ymm2) vbroadcastsd(mem(rax, 21*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, 22*8), ymm2) vbroadcastsd(mem(rax, 23*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) add(imm(4*6*8), rax) // a += 4*6 (unroll x mr) add(imm(4*8*8), rbx) // b += 4*8 (unroll x nr) vmovapd(mem(rbx, -4*32), ymm0) vmovapd(mem(rbx, -3*32), ymm1) dec(rsi) // i -= 1; jne(.DLOOPKITER) // iterate again if i != 0. label(.DCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.DLOOPKLEFT) // EDGE LOOP prefetch(0, mem(rax, 64*8)) vbroadcastsd(mem(rax, 0*8), ymm2) vbroadcastsd(mem(rax, 1*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, 2*8), ymm2) vbroadcastsd(mem(rax, 3*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, 4*8), ymm2) vbroadcastsd(mem(rax, 5*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) add(imm(1*6*8), rax) // a += 1*6 (unroll x mr) add(imm(1*8*8), rbx) // b += 1*8 (unroll x nr) vmovapd(mem(rbx, -4*32), ymm0) vmovapd(mem(rbx, -3*32), ymm1) dec(rsi) // i -= 1; jne(.DLOOPKLEFT) // iterate again if i != 0. label(.DPOSTACCUM) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) vmulpd(ymm0, ymm6, ymm6) vmulpd(ymm0, ymm7, ymm7) vmulpd(ymm0, ymm8, ymm8) vmulpd(ymm0, ymm9, ymm9) vmulpd(ymm0, ymm10, ymm10) vmulpd(ymm0, ymm11, ymm11) vmulpd(ymm0, ymm12, ymm12) vmulpd(ymm0, ymm13, ymm13) vmulpd(ymm0, ymm14, ymm14) vmulpd(ymm0, ymm15, ymm15) mov(var(cs_c), rsi) // load cs_c lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) lea(mem(rcx, rsi, 4), rdx) // load address of c + 4*cs_c; lea(mem(rcx, rdi, 4), r14) // load address of c + 4*rs_c; lea(mem(rsi, rsi, 2), r13) // r13 = 3*cs_c; //lea(mem(rsi, rsi, 4), r15) // r15 = 5*cs_c; //lea(mem(r13, rsi, 4), r10) // r10 = 7*cs_c; // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case cmp(imm(8), rsi) // set ZF if (8*cs_c) == 8. jz(.DROWSTORED) // jump to row storage case cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORED) // jump to column storage case label(.DGENSTORED) DGEMM_INPUT_GS_BETA_NZ vfmadd213pd(ymm4, ymm3, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += rs_c; DGEMM_INPUT_GS_BETA_NZ vfmadd213pd(ymm6, ymm3, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += rs_c; DGEMM_INPUT_GS_BETA_NZ vfmadd213pd(ymm8, ymm3, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += rs_c; DGEMM_INPUT_GS_BETA_NZ vfmadd213pd(ymm10, ymm3, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += rs_c; DGEMM_INPUT_GS_BETA_NZ vfmadd213pd(ymm12, ymm3, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += rs_c; DGEMM_INPUT_GS_BETA_NZ vfmadd213pd(ymm14, ymm3, ymm0) DGEMM_OUTPUT_GS_BETA_NZ mov(rdx, rcx) // rcx = c + 4*cs_c DGEMM_INPUT_GS_BETA_NZ vfmadd213pd(ymm5, ymm3, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += rs_c; DGEMM_INPUT_GS_BETA_NZ vfmadd213pd(ymm7, ymm3, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += rs_c; DGEMM_INPUT_GS_BETA_NZ vfmadd213pd(ymm9, ymm3, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += rs_c; DGEMM_INPUT_GS_BETA_NZ vfmadd213pd(ymm11, ymm3, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += rs_c; DGEMM_INPUT_GS_BETA_NZ vfmadd213pd(ymm13, ymm3, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += rs_c; DGEMM_INPUT_GS_BETA_NZ vfmadd213pd(ymm15, ymm3, ymm0) DGEMM_OUTPUT_GS_BETA_NZ jmp(.DDONE) // jump to end. label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rdx), ymm3, ymm5) vmovupd(ymm5, mem(rdx)) add(rdi, rdx) vfmadd231pd(mem(rcx), ymm3, ymm6) vmovupd(ymm6, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rdx), ymm3, ymm7) vmovupd(ymm7, mem(rdx)) add(rdi, rdx) vfmadd231pd(mem(rcx), ymm3, ymm8) vmovupd(ymm8, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rdx), ymm3, ymm9) vmovupd(ymm9, mem(rdx)) add(rdi, rdx) vfmadd231pd(mem(rcx), ymm3, ymm10) vmovupd(ymm10, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rdx), ymm3, ymm11) vmovupd(ymm11, mem(rdx)) add(rdi, rdx) vfmadd231pd(mem(rcx), ymm3, ymm12) vmovupd(ymm12, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rdx), ymm3, ymm13) vmovupd(ymm13, mem(rdx)) add(rdi, rdx) vfmadd231pd(mem(rcx), ymm3, ymm14) vmovupd(ymm14, mem(rcx)) //add(rdi, rcx) vfmadd231pd(mem(rdx), ymm3, ymm15) vmovupd(ymm15, mem(rdx)) //add(rdi, rdx) jmp(.DDONE) // jump to end. label(.DCOLSTORED) vunpcklpd(ymm6, ymm4, ymm0) vunpckhpd(ymm6, ymm4, ymm1) vunpcklpd(ymm10, ymm8, ymm2) vunpckhpd(ymm10, ymm8, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) vperm2f128(imm(0x31), ymm2, ymm0, ymm8) vperm2f128(imm(0x31), ymm3, ymm1, ymm10) vbroadcastsd(mem(rbx), ymm3) vfmadd231pd(mem(rcx), ymm3, ymm4) vfmadd231pd(mem(rcx, rsi, 1), ymm3, ymm6) vfmadd231pd(mem(rcx, rsi, 2), ymm3, ymm8) vfmadd231pd(mem(rcx, r13, 1), ymm3, ymm10) vmovupd(ymm4, mem(rcx)) vmovupd(ymm6, mem(rcx, rsi, 1)) vmovupd(ymm8, mem(rcx, rsi, 2)) vmovupd(ymm10, mem(rcx, r13, 1)) lea(mem(rcx, rsi, 4), rcx) vunpcklpd(ymm14, ymm12, ymm0) vunpckhpd(ymm14, ymm12, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm4) vfmadd231pd(mem(r14), xmm3, xmm0) vfmadd231pd(mem(r14, rsi, 1), xmm3, xmm1) vfmadd231pd(mem(r14, rsi, 2), xmm3, xmm2) vfmadd231pd(mem(r14, r13, 1), xmm3, xmm4) vmovupd(xmm0, mem(r14)) vmovupd(xmm1, mem(r14, rsi, 1)) vmovupd(xmm2, mem(r14, rsi, 2)) vmovupd(xmm4, mem(r14, r13, 1)) lea(mem(r14, rsi, 4), r14) vunpcklpd(ymm7, ymm5, ymm0) vunpckhpd(ymm7, ymm5, ymm1) vunpcklpd(ymm11, ymm9, ymm2) vunpckhpd(ymm11, ymm9, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm5) vinsertf128(imm(0x1), xmm3, ymm1, ymm7) vperm2f128(imm(0x31), ymm2, ymm0, ymm9) vperm2f128(imm(0x31), ymm3, ymm1, ymm11) vbroadcastsd(mem(rbx), ymm3) vfmadd231pd(mem(rcx), ymm3, ymm5) vfmadd231pd(mem(rcx, rsi, 1), ymm3, ymm7) vfmadd231pd(mem(rcx, rsi, 2), ymm3, ymm9) vfmadd231pd(mem(rcx, r13, 1), ymm3, ymm11) vmovupd(ymm5, mem(rcx)) vmovupd(ymm7, mem(rcx, rsi, 1)) vmovupd(ymm9, mem(rcx, rsi, 2)) vmovupd(ymm11, mem(rcx, r13, 1)) //lea(mem(rcx, rsi, 4), rcx) vunpcklpd(ymm15, ymm13, ymm0) vunpckhpd(ymm15, ymm13, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm4) vfmadd231pd(mem(r14), xmm3, xmm0) vfmadd231pd(mem(r14, rsi, 1), xmm3, xmm1) vfmadd231pd(mem(r14, rsi, 2), xmm3, xmm2) vfmadd231pd(mem(r14, r13, 1), xmm3, xmm4) vmovupd(xmm0, mem(r14)) vmovupd(xmm1, mem(r14, rsi, 1)) vmovupd(xmm2, mem(r14, rsi, 2)) vmovupd(xmm4, mem(r14, r13, 1)) //lea(mem(r14, rsi, 4), r14) jmp(.DDONE) // jump to end. label(.DBETAZERO) cmp(imm(8), rsi) // set ZF if (8*cs_c) == 8. jz(.DROWSTORBZ) // jump to row storage case cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORBZ) // jump to column storage case label(.DGENSTORBZ) vmovapd(ymm4, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += rs_c; vmovapd(ymm6, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += rs_c; vmovapd(ymm8, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += rs_c; vmovapd(ymm10, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += rs_c; vmovapd(ymm12, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += rs_c; vmovapd(ymm14, ymm0) DGEMM_OUTPUT_GS_BETA_NZ mov(rdx, rcx) // rcx = c + 4*cs_c vmovapd(ymm5, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += rs_c; vmovapd(ymm7, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += rs_c; vmovapd(ymm9, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += rs_c; vmovapd(ymm11, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += rs_c; vmovapd(ymm13, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += rs_c; vmovapd(ymm15, ymm0) DGEMM_OUTPUT_GS_BETA_NZ jmp(.DDONE) // jump to end. label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vmovupd(ymm5, mem(rdx)) add(rdi, rdx) vmovupd(ymm6, mem(rcx)) add(rdi, rcx) vmovupd(ymm7, mem(rdx)) add(rdi, rdx) vmovupd(ymm8, mem(rcx)) add(rdi, rcx) vmovupd(ymm9, mem(rdx)) add(rdi, rdx) vmovupd(ymm10, mem(rcx)) add(rdi, rcx) vmovupd(ymm11, mem(rdx)) add(rdi, rdx) vmovupd(ymm12, mem(rcx)) add(rdi, rcx) vmovupd(ymm13, mem(rdx)) add(rdi, rdx) vmovupd(ymm14, mem(rcx)) //add(rdi, rcx) vmovupd(ymm15, mem(rdx)) //add(rdi, rdx) jmp(.DDONE) // jump to end. label(.DCOLSTORBZ) vunpcklpd(ymm6, ymm4, ymm0) vunpckhpd(ymm6, ymm4, ymm1) vunpcklpd(ymm10, ymm8, ymm2) vunpckhpd(ymm10, ymm8, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) vperm2f128(imm(0x31), ymm2, ymm0, ymm8) vperm2f128(imm(0x31), ymm3, ymm1, ymm10) vmovupd(ymm4, mem(rcx)) vmovupd(ymm6, mem(rcx, rsi, 1)) vmovupd(ymm8, mem(rcx, rsi, 2)) vmovupd(ymm10, mem(rcx, r13, 1)) lea(mem(rcx, rsi, 4), rcx) vunpcklpd(ymm14, ymm12, ymm0) vunpckhpd(ymm14, ymm12, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm4) vmovupd(xmm0, mem(r14)) vmovupd(xmm1, mem(r14, rsi, 1)) vmovupd(xmm2, mem(r14, rsi, 2)) vmovupd(xmm4, mem(r14, r13, 1)) lea(mem(r14, rsi, 4), r14) vunpcklpd(ymm7, ymm5, ymm0) vunpckhpd(ymm7, ymm5, ymm1) vunpcklpd(ymm11, ymm9, ymm2) vunpckhpd(ymm11, ymm9, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm5) vinsertf128(imm(0x1), xmm3, ymm1, ymm7) vperm2f128(imm(0x31), ymm2, ymm0, ymm9) vperm2f128(imm(0x31), ymm3, ymm1, ymm11) vmovupd(ymm5, mem(rcx)) vmovupd(ymm7, mem(rcx, rsi, 1)) vmovupd(ymm9, mem(rcx, rsi, 2)) vmovupd(ymm11, mem(rcx, r13, 1)) //lea(mem(rcx, rsi, 4), rcx) vunpcklpd(ymm15, ymm13, ymm0) vunpckhpd(ymm15, ymm13, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm4) vmovupd(xmm0, mem(r14)) vmovupd(xmm1, mem(r14, rsi, 1)) vmovupd(xmm2, mem(r14, rsi, 2)) vmovupd(xmm4, mem(r14, r13, 1)) //lea(mem(r14, rsi, 4), r14) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter] "m" (k_iter), // 0 [k_left] "m" (k_left), // 1 [a] "m" (a), // 2 [b] "m" (b), // 3 [alpha] "m" (alpha), // 4 [beta] "m" (beta), // 5 [c] "m" (c), // 6 [rs_c] "m" (rs_c), // 7 [cs_c] "m" (cs_c)/*, // 8 [b_next] "m" (b_next), // 9 [a_next] "m" (a_next)*/ // 10 : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } // assumes beta.r, beta.i have been broadcast into ymm1, ymm2. // outputs to ymm0 #define CGEMM_INPUT_SCALE_GS_BETA_NZ \ vmovlpd(mem(rcx), xmm0, xmm0) \ vmovhpd(mem(rcx, rsi, 1), xmm0, xmm0) \ vmovlpd(mem(rcx, rsi, 2), xmm3, xmm3) \ vmovhpd(mem(rcx, r13, 1), xmm3, xmm3) \ vinsertf128(imm(1), xmm3, ymm0, ymm0) \ vpermilps(imm(0xb1), ymm0, ymm3) \ vmulps(ymm1, ymm0, ymm0) \ vmulps(ymm2, ymm3, ymm3) \ vaddsubps(ymm3, ymm0, ymm0) // assumes values to output are in ymm0 #define CGEMM_OUTPUT_GS \ vextractf128(imm(1), ymm0, xmm3) \ vmovlpd(xmm0, mem(rcx)) \ vmovhpd(xmm0, mem(rcx, rsi, 1)) \ vmovlpd(xmm3, mem(rcx, rsi, 2)) \ vmovhpd(xmm3, mem(rcx, r13, 1)) #define CGEMM_INPUT_SCALE_RS_BETA_NZ \ vmovups(mem(rcx), ymm0) \ vpermilps(imm(0xb1), ymm0, ymm3) \ vmulps(ymm1, ymm0, ymm0) \ vmulps(ymm2, ymm3, ymm3) \ vaddsubps(ymm3, ymm0, ymm0) #define CGEMM_OUTPUT_RS \ vmovups(ymm0, mem(rcx)) \ void bli_cgemm_haswell_asm_3x8 ( dim_t k0, scomplex* restrict alpha, scomplex* restrict a, scomplex* restrict b, scomplex* restrict beta, scomplex* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; begin_asm() vzeroall() // zero all xmm/ymm registers. mov(var(a), rax) // load address of a. mov(var(b), rbx) // load address of b. //mov(%9, r15) // load address of b_next. add(imm(32*4), rbx) // initialize loop by pre-loading vmovaps(mem(rbx, -4*32), ymm0) vmovaps(mem(rbx, -3*32), ymm1) mov(var(c), rcx) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(scomplex) lea(mem(rcx, rdi, 1), r11) // r11 = c + 1*rs_c; lea(mem(rcx, rdi, 2), r12) // r12 = c + 2*rs_c; prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c prefetch(0, mem(r11, 7*8)) // prefetch c + 1*rs_c prefetch(0, mem(r12, 7*8)) // prefetch c + 2*rs_c mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.CCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.CLOOPKITER) // MAIN LOOP // iteration 0 prefetch(0, mem(rax, 32*8)) vbroadcastss(mem(rax, 0*4), ymm2) vbroadcastss(mem(rax, 1*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm4) vfmadd231ps(ymm1, ymm2, ymm5) vfmadd231ps(ymm0, ymm3, ymm6) vfmadd231ps(ymm1, ymm3, ymm7) vbroadcastss(mem(rax, 2*4), ymm2) vbroadcastss(mem(rax, 3*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm8) vfmadd231ps(ymm1, ymm2, ymm9) vfmadd231ps(ymm0, ymm3, ymm10) vfmadd231ps(ymm1, ymm3, ymm11) vbroadcastss(mem(rax, 4*4), ymm2) vbroadcastss(mem(rax, 5*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm12) vfmadd231ps(ymm1, ymm2, ymm13) vfmadd231ps(ymm0, ymm3, ymm14) vfmadd231ps(ymm1, ymm3, ymm15) vmovaps(mem(rbx, -2*32), ymm0) vmovaps(mem(rbx, -1*32), ymm1) // iteration 1 vbroadcastss(mem(rax, 6*4), ymm2) vbroadcastss(mem(rax, 7*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm4) vfmadd231ps(ymm1, ymm2, ymm5) vfmadd231ps(ymm0, ymm3, ymm6) vfmadd231ps(ymm1, ymm3, ymm7) vbroadcastss(mem(rax, 8*4), ymm2) vbroadcastss(mem(rax, 9*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm8) vfmadd231ps(ymm1, ymm2, ymm9) vfmadd231ps(ymm0, ymm3, ymm10) vfmadd231ps(ymm1, ymm3, ymm11) vbroadcastss(mem(rax, 10*4), ymm2) vbroadcastss(mem(rax, 11*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm12) vfmadd231ps(ymm1, ymm2, ymm13) vfmadd231ps(ymm0, ymm3, ymm14) vfmadd231ps(ymm1, ymm3, ymm15) vmovaps(mem(rbx, 0*32), ymm0) vmovaps(mem(rbx, 1*32), ymm1) // iteration 2 prefetch(0, mem(rax, 38*8)) vbroadcastss(mem(rax, 12*4), ymm2) vbroadcastss(mem(rax, 13*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm4) vfmadd231ps(ymm1, ymm2, ymm5) vfmadd231ps(ymm0, ymm3, ymm6) vfmadd231ps(ymm1, ymm3, ymm7) vbroadcastss(mem(rax, 14*4), ymm2) vbroadcastss(mem(rax, 15*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm8) vfmadd231ps(ymm1, ymm2, ymm9) vfmadd231ps(ymm0, ymm3, ymm10) vfmadd231ps(ymm1, ymm3, ymm11) vbroadcastss(mem(rax, 16*4), ymm2) vbroadcastss(mem(rax, 17*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm12) vfmadd231ps(ymm1, ymm2, ymm13) vfmadd231ps(ymm0, ymm3, ymm14) vfmadd231ps(ymm1, ymm3, ymm15) vmovaps(mem(rbx, 2*32), ymm0) vmovaps(mem(rbx, 3*32), ymm1) // iteration 3 vbroadcastss(mem(rax, 18*4), ymm2) vbroadcastss(mem(rax, 19*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm4) vfmadd231ps(ymm1, ymm2, ymm5) vfmadd231ps(ymm0, ymm3, ymm6) vfmadd231ps(ymm1, ymm3, ymm7) vbroadcastss(mem(rax, 20*4), ymm2) vbroadcastss(mem(rax, 21*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm8) vfmadd231ps(ymm1, ymm2, ymm9) vfmadd231ps(ymm0, ymm3, ymm10) vfmadd231ps(ymm1, ymm3, ymm11) vbroadcastss(mem(rax, 22*4), ymm2) vbroadcastss(mem(rax, 23*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm12) vfmadd231ps(ymm1, ymm2, ymm13) vfmadd231ps(ymm0, ymm3, ymm14) vfmadd231ps(ymm1, ymm3, ymm15) add(imm(4*3*8), rax) // a += 4*3 (unroll x mr) add(imm(4*8*8), rbx) // b += 4*8 (unroll x nr) vmovaps(mem(rbx, -4*32), ymm0) vmovaps(mem(rbx, -3*32), ymm1) dec(rsi) // i -= 1; jne(.CLOOPKITER) // iterate again if i != 0. label(.CCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.CPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.CLOOPKLEFT) // EDGE LOOP prefetch(0, mem(rax, 32*8)) vbroadcastss(mem(rax, 0*4), ymm2) vbroadcastss(mem(rax, 1*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm4) vfmadd231ps(ymm1, ymm2, ymm5) vfmadd231ps(ymm0, ymm3, ymm6) vfmadd231ps(ymm1, ymm3, ymm7) vbroadcastss(mem(rax, 2*4), ymm2) vbroadcastss(mem(rax, 3*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm8) vfmadd231ps(ymm1, ymm2, ymm9) vfmadd231ps(ymm0, ymm3, ymm10) vfmadd231ps(ymm1, ymm3, ymm11) vbroadcastss(mem(rax, 4*4), ymm2) vbroadcastss(mem(rax, 5*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm12) vfmadd231ps(ymm1, ymm2, ymm13) vfmadd231ps(ymm0, ymm3, ymm14) vfmadd231ps(ymm1, ymm3, ymm15) add(imm(1*3*8), rax) // a += 1*3 (unroll x mr) add(imm(1*8*8), rbx) // b += 1*8 (unroll x nr) vmovaps(mem(rbx, -4*32), ymm0) vmovaps(mem(rbx, -3*32), ymm1) dec(rsi) // i -= 1; jne(.CLOOPKLEFT) // iterate again if i != 0. label(.CPOSTACCUM) // permute even and odd elements // of ymm6/7, ymm10/11, ymm/14/15 vpermilps(imm(0xb1), ymm6, ymm6) vpermilps(imm(0xb1), ymm7, ymm7) vpermilps(imm(0xb1), ymm10, ymm10) vpermilps(imm(0xb1), ymm11, ymm11) vpermilps(imm(0xb1), ymm14, ymm14) vpermilps(imm(0xb1), ymm15, ymm15) // subtract/add even/odd elements vaddsubps(ymm6, ymm4, ymm4) vaddsubps(ymm7, ymm5, ymm5) vaddsubps(ymm10, ymm8, ymm8) vaddsubps(ymm11, ymm9, ymm9) vaddsubps(ymm14, ymm12, ymm12) vaddsubps(ymm15, ymm13, ymm13) mov(var(alpha), rax) // load address of alpha vbroadcastss(mem(rax), ymm0) // load alpha_r and duplicate vbroadcastss(mem(rax, 4), ymm1) // load alpha_i and duplicate vpermilps(imm(0xb1), ymm4, ymm3) vmulps(ymm0, ymm4, ymm4) vmulps(ymm1, ymm3, ymm3) vaddsubps(ymm3, ymm4, ymm4) vpermilps(imm(0xb1), ymm5, ymm3) vmulps(ymm0, ymm5, ymm5) vmulps(ymm1, ymm3, ymm3) vaddsubps(ymm3, ymm5, ymm5) vpermilps(imm(0xb1), ymm8, ymm3) vmulps(ymm0, ymm8, ymm8) vmulps(ymm1, ymm3, ymm3) vaddsubps(ymm3, ymm8, ymm8) vpermilps(imm(0xb1), ymm9, ymm3) vmulps(ymm0, ymm9, ymm9) vmulps(ymm1, ymm3, ymm3) vaddsubps(ymm3, ymm9, ymm9) vpermilps(imm(0xb1), ymm12, ymm3) vmulps(ymm0, ymm12, ymm12) vmulps(ymm1, ymm3, ymm3) vaddsubps(ymm3, ymm12, ymm12) vpermilps(imm(0xb1), ymm13, ymm3) vmulps(ymm0, ymm13, ymm13) vmulps(ymm1, ymm3, ymm3) vaddsubps(ymm3, ymm13, ymm13) mov(var(beta), rbx) // load address of beta vbroadcastss(mem(rbx), ymm1) // load beta_r and duplicate vbroadcastss(mem(rbx, 4), ymm2) // load beta_i and duplicate mov(var(cs_c), rsi) // load cs_c lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(scomplex) lea(mem(, rsi, 4), rdx) // rdx = 4*cs_c; lea(mem(rsi, rsi, 2), r13) // r13 = 3*cs_c; // now avoid loading C if beta == 0 vxorps(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomiss(xmm0, xmm1) // set ZF if beta_r == 0. sete(r8b) // r8b = ( ZF == 1 ? 1 : 0 ); vucomiss(xmm0, xmm2) // set ZF if beta_i == 0. sete(r9b) // r9b = ( ZF == 1 ? 1 : 0 ); and(r8b, r9b) // set ZF if r8b & r9b == 1. jne(.CBETAZERO) // if ZF = 1, jump to beta == 0 case cmp(imm(8), rsi) // set ZF if (8*cs_c) == 8. jz(.CROWSTORED) // jump to row storage case label(.CGENSTORED) CGEMM_INPUT_SCALE_GS_BETA_NZ vaddps(ymm4, ymm0, ymm0) CGEMM_OUTPUT_GS add(rdx, rcx) // c += 4*cs_c; CGEMM_INPUT_SCALE_GS_BETA_NZ vaddps(ymm5, ymm0, ymm0) CGEMM_OUTPUT_GS mov(r11, rcx) // rcx = c + 1*rs_c CGEMM_INPUT_SCALE_GS_BETA_NZ vaddps(ymm8, ymm0, ymm0) CGEMM_OUTPUT_GS add(rdx, rcx) // c += 4*cs_c; CGEMM_INPUT_SCALE_GS_BETA_NZ vaddps(ymm9, ymm0, ymm0) CGEMM_OUTPUT_GS mov(r12, rcx) // rcx = c + 2*rs_c CGEMM_INPUT_SCALE_GS_BETA_NZ vaddps(ymm12, ymm0, ymm0) CGEMM_OUTPUT_GS add(rdx, rcx) // c += 4*cs_c; CGEMM_INPUT_SCALE_GS_BETA_NZ vaddps(ymm13, ymm0, ymm0) CGEMM_OUTPUT_GS jmp(.CDONE) // jump to end. label(.CROWSTORED) CGEMM_INPUT_SCALE_RS_BETA_NZ vaddps(ymm4, ymm0, ymm0) CGEMM_OUTPUT_RS add(rdx, rcx) // c += 4*cs_c; CGEMM_INPUT_SCALE_RS_BETA_NZ vaddps(ymm5, ymm0, ymm0) CGEMM_OUTPUT_RS mov(r11, rcx) // rcx = c + 1*rs_c CGEMM_INPUT_SCALE_RS_BETA_NZ vaddps(ymm8, ymm0, ymm0) CGEMM_OUTPUT_RS add(rdx, rcx) // c += 4*cs_c; CGEMM_INPUT_SCALE_RS_BETA_NZ vaddps(ymm9, ymm0, ymm0) CGEMM_OUTPUT_RS mov(r12, rcx) // rcx = c + 2*rs_c CGEMM_INPUT_SCALE_RS_BETA_NZ vaddps(ymm12, ymm0, ymm0) CGEMM_OUTPUT_RS add(rdx, rcx) // c += 4*cs_c; CGEMM_INPUT_SCALE_RS_BETA_NZ vaddps(ymm13, ymm0, ymm0) CGEMM_OUTPUT_RS jmp(.CDONE) // jump to end. label(.CBETAZERO) cmp(imm(8), rsi) // set ZF if (8*cs_c) == 8. jz(.CROWSTORBZ) // jump to row storage case label(.CGENSTORBZ) vmovaps(ymm4, ymm0) CGEMM_OUTPUT_GS add(rdx, rcx) // c += 2*cs_c; vmovaps(ymm5, ymm0) CGEMM_OUTPUT_GS mov(r11, rcx) // rcx = c + 1*rs_c vmovaps(ymm8, ymm0) CGEMM_OUTPUT_GS add(rdx, rcx) // c += 2*cs_c; vmovaps(ymm9, ymm0) CGEMM_OUTPUT_GS mov(r12, rcx) // rcx = c + 2*rs_c vmovaps(ymm12, ymm0) CGEMM_OUTPUT_GS add(rdx, rcx) // c += 2*cs_c; vmovaps(ymm13, ymm0) CGEMM_OUTPUT_GS jmp(.CDONE) // jump to end. label(.CROWSTORBZ) vmovups(ymm4, mem(rcx)) vmovups(ymm5, mem(rcx, rdx, 1)) vmovups(ymm8, mem(r11)) vmovups(ymm9, mem(r11, rdx, 1)) vmovups(ymm12, mem(r12)) vmovups(ymm13, mem(r12, rdx, 1)) label(.CDONE) end_asm( : // output operands (none) : // input operands [k_iter] "m" (k_iter), // 0 [k_left] "m" (k_left), // 1 [a] "m" (a), // 2 [b] "m" (b), // 3 [alpha] "m" (alpha), // 4 [beta] "m" (beta), // 5 [c] "m" (c), // 6 [rs_c] "m" (rs_c), // 7 [cs_c] "m" (cs_c)/*, // 8 [b_next] "m" (b_next), // 9 [a_next] "m" (a_next)*/ // 10 : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } // assumes beta.r, beta.i have been broadcast into ymm1, ymm2. // outputs to ymm0 #define ZGEMM_INPUT_SCALE_GS_BETA_NZ \ vmovupd(mem(rcx), xmm0) \ vmovupd(mem(rcx, rsi, 1), xmm3) \ vinsertf128(imm(1), xmm3, ymm0, ymm0) \ vpermilpd(imm(0x5), ymm0, ymm3) \ vmulpd(ymm1, ymm0, ymm0) \ vmulpd(ymm2, ymm3, ymm3) \ vaddsubpd(ymm3, ymm0, ymm0) // assumes values to output are in ymm0 #define ZGEMM_OUTPUT_GS \ vextractf128(imm(1), ymm0, xmm3) \ vmovupd(xmm0, mem(rcx)) \ vmovupd(xmm3, mem(rcx, rsi, 1)) \ #define ZGEMM_INPUT_SCALE_RS_BETA_NZ \ vmovupd(mem(rcx), ymm0) \ vpermilpd(imm(0x5), ymm0, ymm3) \ vmulpd(ymm1, ymm0, ymm0) \ vmulpd(ymm2, ymm3, ymm3) \ vaddsubpd(ymm3, ymm0, ymm0) #define ZGEMM_OUTPUT_RS \ vmovupd(ymm0, mem(rcx)) \ void bli_zgemm_haswell_asm_3x4 ( dim_t k0, dcomplex* restrict alpha, dcomplex* restrict a, dcomplex* restrict b, dcomplex* restrict beta, dcomplex* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; begin_asm() vzeroall() // zero all xmm/ymm registers. mov(var(a), rax) // load address of a. mov(var(b), rbx) // load address of b. //mov(%9, r15) // load address of b_next. add(imm(32*4), rbx) // initialize loop by pre-loading vmovapd(mem(rbx, -4*32), ymm0) vmovapd(mem(rbx, -3*32), ymm1) mov(var(c), rcx) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(dcomplex) lea(mem(, rdi, 2), rdi) lea(mem(rcx, rdi, 1), r11) // r11 = c + 1*rs_c; lea(mem(rcx, rdi, 2), r12) // r12 = c + 2*rs_c; prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c prefetch(0, mem(r11, 7*8)) // prefetch c + 1*rs_c prefetch(0, mem(r12, 7*8)) // prefetch c + 2*rs_c mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.ZCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.ZLOOPKITER) // MAIN LOOP // iteration 0 prefetch(0, mem(rax, 32*16)) vbroadcastsd(mem(rax, 0*8), ymm2) vbroadcastsd(mem(rax, 1*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, 2*8), ymm2) vbroadcastsd(mem(rax, 3*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, 4*8), ymm2) vbroadcastsd(mem(rax, 5*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) vmovapd(mem(rbx, -2*32), ymm0) vmovapd(mem(rbx, -1*32), ymm1) // iteration 1 vbroadcastsd(mem(rax, 6*8), ymm2) vbroadcastsd(mem(rax, 7*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, 8*8), ymm2) vbroadcastsd(mem(rax, 9*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, 10*8), ymm2) vbroadcastsd(mem(rax, 11*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) vmovapd(mem(rbx, 0*32), ymm0) vmovapd(mem(rbx, 1*32), ymm1) // iteration 2 prefetch(0, mem(rax, 38*16)) vbroadcastsd(mem(rax, 12*8), ymm2) vbroadcastsd(mem(rax, 13*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, 14*8), ymm2) vbroadcastsd(mem(rax, 15*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, 16*8), ymm2) vbroadcastsd(mem(rax, 17*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) vmovapd(mem(rbx, 2*32), ymm0) vmovapd(mem(rbx, 3*32), ymm1) // iteration 3 vbroadcastsd(mem(rax, 18*8), ymm2) vbroadcastsd(mem(rax, 19*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, 20*8), ymm2) vbroadcastsd(mem(rax, 21*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, 22*8), ymm2) vbroadcastsd(mem(rax, 23*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) add(imm(4*3*16), rax) // a += 4*3 (unroll x mr) add(imm(4*4*16), rbx) // b += 4*4 (unroll x nr) vmovapd(mem(rbx, -4*32), ymm0) vmovapd(mem(rbx, -3*32), ymm1) dec(rsi) // i -= 1; jne(.ZLOOPKITER) // iterate again if i != 0. label(.ZCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.ZPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.ZLOOPKLEFT) // EDGE LOOP prefetch(0, mem(rax, 32*16)) vbroadcastsd(mem(rax, 0*8), ymm2) vbroadcastsd(mem(rax, 1*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, 2*8), ymm2) vbroadcastsd(mem(rax, 3*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, 4*8), ymm2) vbroadcastsd(mem(rax, 5*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) add(imm(1*3*16), rax) // a += 1*3 (unroll x mr) add(imm(1*4*16), rbx) // b += 1*4 (unroll x nr) vmovapd(mem(rbx, -4*32), ymm0) vmovapd(mem(rbx, -3*32), ymm1) dec(rsi) // i -= 1; jne(.ZLOOPKLEFT) // iterate again if i != 0. label(.ZPOSTACCUM) // permute even and odd elements // of ymm6/7, ymm10/11, ymm/14/15 vpermilpd(imm(0x5), ymm6, ymm6) vpermilpd(imm(0x5), ymm7, ymm7) vpermilpd(imm(0x5), ymm10, ymm10) vpermilpd(imm(0x5), ymm11, ymm11) vpermilpd(imm(0x5), ymm14, ymm14) vpermilpd(imm(0x5), ymm15, ymm15) // subtract/add even/odd elements vaddsubpd(ymm6, ymm4, ymm4) vaddsubpd(ymm7, ymm5, ymm5) vaddsubpd(ymm10, ymm8, ymm8) vaddsubpd(ymm11, ymm9, ymm9) vaddsubpd(ymm14, ymm12, ymm12) vaddsubpd(ymm15, ymm13, ymm13) mov(var(alpha), rax) // load address of alpha vbroadcastsd(mem(rax), ymm0) // load alpha_r and duplicate vbroadcastsd(mem(rax, 8), ymm1) // load alpha_i and duplicate vpermilpd(imm(0x5), ymm4, ymm3) vmulpd(ymm0, ymm4, ymm4) vmulpd(ymm1, ymm3, ymm3) vaddsubpd(ymm3, ymm4, ymm4) vpermilpd(imm(0x5), ymm5, ymm3) vmulpd(ymm0, ymm5, ymm5) vmulpd(ymm1, ymm3, ymm3) vaddsubpd(ymm3, ymm5, ymm5) vpermilpd(imm(0x5), ymm8, ymm3) vmulpd(ymm0, ymm8, ymm8) vmulpd(ymm1, ymm3, ymm3) vaddsubpd(ymm3, ymm8, ymm8) vpermilpd(imm(0x5), ymm9, ymm3) vmulpd(ymm0, ymm9, ymm9) vmulpd(ymm1, ymm3, ymm3) vaddsubpd(ymm3, ymm9, ymm9) vpermilpd(imm(0x5), ymm12, ymm3) vmulpd(ymm0, ymm12, ymm12) vmulpd(ymm1, ymm3, ymm3) vaddsubpd(ymm3, ymm12, ymm12) vpermilpd(imm(0x5), ymm13, ymm3) vmulpd(ymm0, ymm13, ymm13) vmulpd(ymm1, ymm3, ymm3) vaddsubpd(ymm3, ymm13, ymm13) mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rbx), ymm1) // load beta_r and duplicate vbroadcastsd(mem(rbx, 8), ymm2) // load beta_i and duplicate mov(var(cs_c), rsi) // load cs_c lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(dcomplex) lea(mem(, rsi, 2), rsi) lea(mem(, rsi, 2), rdx) // rdx = 2*cs_c; // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm1) // set ZF if beta_r == 0. sete(r8b) // r8b = ( ZF == 1 ? 1 : 0 ); vucomisd(xmm0, xmm2) // set ZF if beta_i == 0. sete(r9b) // r9b = ( ZF == 1 ? 1 : 0 ); and(r8b, r9b) // set ZF if r8b & r9b == 1. jne(.ZBETAZERO) // if ZF = 1, jump to beta == 0 case cmp(imm(16), rsi) // set ZF if (16*cs_c) == 16. jz(.ZROWSTORED) // jump to row storage case label(.ZGENSTORED) ZGEMM_INPUT_SCALE_GS_BETA_NZ vaddpd(ymm4, ymm0, ymm0) ZGEMM_OUTPUT_GS add(rdx, rcx) // c += 2*cs_c; ZGEMM_INPUT_SCALE_GS_BETA_NZ vaddpd(ymm5, ymm0, ymm0) ZGEMM_OUTPUT_GS mov(r11, rcx) // rcx = c + 1*rs_c ZGEMM_INPUT_SCALE_GS_BETA_NZ vaddpd(ymm8, ymm0, ymm0) ZGEMM_OUTPUT_GS add(rdx, rcx) // c += 2*cs_c; ZGEMM_INPUT_SCALE_GS_BETA_NZ vaddpd(ymm9, ymm0, ymm0) ZGEMM_OUTPUT_GS mov(r12, rcx) // rcx = c + 2*rs_c ZGEMM_INPUT_SCALE_GS_BETA_NZ vaddpd(ymm12, ymm0, ymm0) ZGEMM_OUTPUT_GS add(rdx, rcx) // c += 2*cs_c; ZGEMM_INPUT_SCALE_GS_BETA_NZ vaddpd(ymm13, ymm0, ymm0) ZGEMM_OUTPUT_GS jmp(.ZDONE) // jump to end. label(.ZROWSTORED) ZGEMM_INPUT_SCALE_RS_BETA_NZ vaddpd(ymm4, ymm0, ymm0) ZGEMM_OUTPUT_RS add(rdx, rcx) // c += 2*cs_c; ZGEMM_INPUT_SCALE_RS_BETA_NZ vaddpd(ymm5, ymm0, ymm0) ZGEMM_OUTPUT_RS mov(r11, rcx) // rcx = c + 1*rs_c ZGEMM_INPUT_SCALE_RS_BETA_NZ vaddpd(ymm8, ymm0, ymm0) ZGEMM_OUTPUT_RS add(rdx, rcx) // c += 2*cs_c; ZGEMM_INPUT_SCALE_RS_BETA_NZ vaddpd(ymm9, ymm0, ymm0) ZGEMM_OUTPUT_RS mov(r12, rcx) // rcx = c + 2*rs_c ZGEMM_INPUT_SCALE_RS_BETA_NZ vaddpd(ymm12, ymm0, ymm0) ZGEMM_OUTPUT_RS add(rdx, rcx) // c += 2*cs_c; ZGEMM_INPUT_SCALE_RS_BETA_NZ vaddpd(ymm13, ymm0, ymm0) ZGEMM_OUTPUT_RS jmp(.ZDONE) // jump to end. label(.ZBETAZERO) cmp(imm(16), rsi) // set ZF if (16*cs_c) == 16. jz(.ZROWSTORBZ) // jump to row storage case label(.ZGENSTORBZ) vmovapd(ymm4, ymm0) ZGEMM_OUTPUT_GS add(rdx, rcx) // c += 2*cs_c; vmovapd(ymm5, ymm0) ZGEMM_OUTPUT_GS mov(r11, rcx) // rcx = c + 1*rs_c vmovapd(ymm8, ymm0) ZGEMM_OUTPUT_GS add(rdx, rcx) // c += 2*cs_c; vmovapd(ymm9, ymm0) ZGEMM_OUTPUT_GS mov(r12, rcx) // rcx = c + 2*rs_c vmovapd(ymm12, ymm0) ZGEMM_OUTPUT_GS add(rdx, rcx) // c += 2*cs_c; vmovapd(ymm13, ymm0) ZGEMM_OUTPUT_GS jmp(.ZDONE) // jump to end. label(.ZROWSTORBZ) vmovupd(ymm4, mem(rcx)) vmovupd(ymm5, mem(rcx, rdx, 1)) vmovupd(ymm8, mem(r11)) vmovupd(ymm9, mem(r11, rdx, 1)) vmovupd(ymm12, mem(r12)) vmovupd(ymm13, mem(r12, rdx, 1)) label(.ZDONE) end_asm( : // output operands (none) : // input operands [k_iter] "m" (k_iter), // 0 [k_left] "m" (k_left), // 1 [a] "m" (a), // 2 [b] "m" (b), // 3 [alpha] "m" (alpha), // 4 [beta] "m" (beta), // 5 [c] "m" (c), // 6 [rs_c] "m" (rs_c), // 7 [cs_c] "m" (cs_c)/*, // 8 [b_next] "m" (b_next), // 9 [a_next] "m" (a_next)*/ // 10 : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } blis-0.6.1/kernels/haswell/3/bli_gemm_haswell_asm_d8x6.c000066400000000000000000001442011360743507500230760ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define BLIS_ASM_SYNTAX_ATT #include "bli_x86_asm_macros.h" #define SGEMM_INPUT_GS_BETA_NZ \ vmovlps(mem(rcx), xmm0, xmm0) \ vmovhps(mem(rcx, rsi, 1), xmm0, xmm0) \ vmovlps(mem(rcx, rsi, 2), xmm1, xmm1) \ vmovhps(mem(rcx, r13, 1), xmm1, xmm1) \ vshufps(imm(0x88), xmm1, xmm0, xmm0) \ vmovlps(mem(rcx, rsi, 4), xmm2, xmm2) \ vmovhps(mem(rcx, r15, 1), xmm2, xmm2) \ /* We can't use vmovhps for loading the last element becauase that might result in reading beyond valid memory. (vmov[lh]psd load pairs of adjacent floats at a time.) So we need to use vmovss instead. But since we're limited to using ymm0 through ymm2 (ymm3 contains beta and ymm4 through ymm15 contain the microtile) and due to the way vmovss zeros out all bits above 31, we have to load element 7 before element 6. */ \ vmovss(mem(rcx, r10, 1), xmm1) \ vpermilps(imm(0xcf), xmm1, xmm1) \ vmovlps(mem(rcx, r13, 2), xmm1, xmm1) \ /*vmovhps(mem(rcx, r10, 1), xmm1, xmm1)*/ \ vshufps(imm(0x88), xmm1, xmm2, xmm2) \ vperm2f128(imm(0x20), ymm2, ymm0, ymm0) #define SGEMM_OUTPUT_GS_BETA_NZ \ vextractf128(imm(1), ymm0, xmm2) \ vmovss(xmm0, mem(rcx)) \ vpermilps(imm(0x39), xmm0, xmm1) \ vmovss(xmm1, mem(rcx, rsi, 1)) \ vpermilps(imm(0x39), xmm1, xmm0) \ vmovss(xmm0, mem(rcx, rsi, 2)) \ vpermilps(imm(0x39), xmm0, xmm1) \ vmovss(xmm1, mem(rcx, r13, 1)) \ vmovss(xmm2, mem(rcx, rsi, 4)) \ vpermilps(imm(0x39), xmm2, xmm1) \ vmovss(xmm1, mem(rcx, r15, 1)) \ vpermilps(imm(0x39), xmm1, xmm2) \ vmovss(xmm2, mem(rcx, r13, 2)) \ vpermilps(imm(0x39), xmm2, xmm1) \ vmovss(xmm1, mem(rcx, r10, 1)) void bli_sgemm_haswell_asm_16x6 ( dim_t k0, float* restrict alpha, float* restrict a, float* restrict b, float* restrict beta, float* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; begin_asm() vzeroall() // zero all xmm/ymm registers. mov(var(a), rax) // load address of a. mov(var(b), rbx) // load address of b. //mov(%9, r15) // load address of b_next. add(imm(32*4), rax) // initialize loop by pre-loading vmovaps(mem(rax, -4*32), ymm0) vmovaps(mem(rax, -3*32), ymm1) mov(var(c), rcx) // load address of c mov(var(cs_c), rdi) // load cs_c lea(mem(, rdi, 4), rdi) // cs_c *= sizeof(float) lea(mem(rdi, rdi, 2), r13) // r13 = 3*cs_c; lea(mem(rcx, r13, 1), rdx) // rdx = c + 3*cs_c; prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*cs_c prefetch(0, mem(rcx, rdi, 1, 7*8)) // prefetch c + 1*cs_c prefetch(0, mem(rcx, rdi, 2, 7*8)) // prefetch c + 2*cs_c prefetch(0, mem(rdx, 7*8)) // prefetch c + 3*cs_c prefetch(0, mem(rdx, rdi, 1, 7*8)) // prefetch c + 4*cs_c prefetch(0, mem(rdx, rdi, 2, 7*8)) // prefetch c + 5*cs_c mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.SCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.SLOOPKITER) // MAIN LOOP // iteration 0 prefetch(0, mem(rax, 128*4)) vbroadcastss(mem(rbx, 0*4), ymm2) vbroadcastss(mem(rbx, 1*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm4) vfmadd231ps(ymm1, ymm2, ymm5) vfmadd231ps(ymm0, ymm3, ymm6) vfmadd231ps(ymm1, ymm3, ymm7) vbroadcastss(mem(rbx, 2*4), ymm2) vbroadcastss(mem(rbx, 3*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm8) vfmadd231ps(ymm1, ymm2, ymm9) vfmadd231ps(ymm0, ymm3, ymm10) vfmadd231ps(ymm1, ymm3, ymm11) vbroadcastss(mem(rbx, 4*4), ymm2) vbroadcastss(mem(rbx, 5*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm12) vfmadd231ps(ymm1, ymm2, ymm13) vfmadd231ps(ymm0, ymm3, ymm14) vfmadd231ps(ymm1, ymm3, ymm15) vmovaps(mem(rax, -2*32), ymm0) vmovaps(mem(rax, -1*32), ymm1) // iteration 1 vbroadcastss(mem(rbx, 6*4), ymm2) vbroadcastss(mem(rbx, 7*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm4) vfmadd231ps(ymm1, ymm2, ymm5) vfmadd231ps(ymm0, ymm3, ymm6) vfmadd231ps(ymm1, ymm3, ymm7) vbroadcastss(mem(rbx, 8*4), ymm2) vbroadcastss(mem(rbx, 9*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm8) vfmadd231ps(ymm1, ymm2, ymm9) vfmadd231ps(ymm0, ymm3, ymm10) vfmadd231ps(ymm1, ymm3, ymm11) vbroadcastss(mem(rbx, 10*4), ymm2) vbroadcastss(mem(rbx, 11*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm12) vfmadd231ps(ymm1, ymm2, ymm13) vfmadd231ps(ymm0, ymm3, ymm14) vfmadd231ps(ymm1, ymm3, ymm15) vmovaps(mem(rax, 0*32), ymm0) vmovaps(mem(rax, 1*32), ymm1) // iteration 2 prefetch(0, mem(rax, 152*4)) vbroadcastss(mem(rbx, 12*4), ymm2) vbroadcastss(mem(rbx, 13*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm4) vfmadd231ps(ymm1, ymm2, ymm5) vfmadd231ps(ymm0, ymm3, ymm6) vfmadd231ps(ymm1, ymm3, ymm7) vbroadcastss(mem(rbx, 14*4), ymm2) vbroadcastss(mem(rbx, 15*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm8) vfmadd231ps(ymm1, ymm2, ymm9) vfmadd231ps(ymm0, ymm3, ymm10) vfmadd231ps(ymm1, ymm3, ymm11) vbroadcastss(mem(rbx, 16*4), ymm2) vbroadcastss(mem(rbx, 17*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm12) vfmadd231ps(ymm1, ymm2, ymm13) vfmadd231ps(ymm0, ymm3, ymm14) vfmadd231ps(ymm1, ymm3, ymm15) vmovaps(mem(rax, 2*32), ymm0) vmovaps(mem(rax, 3*32), ymm1) // iteration 3 vbroadcastss(mem(rbx, 18*4), ymm2) vbroadcastss(mem(rbx, 19*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm4) vfmadd231ps(ymm1, ymm2, ymm5) vfmadd231ps(ymm0, ymm3, ymm6) vfmadd231ps(ymm1, ymm3, ymm7) vbroadcastss(mem(rbx, 20*4), ymm2) vbroadcastss(mem(rbx, 21*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm8) vfmadd231ps(ymm1, ymm2, ymm9) vfmadd231ps(ymm0, ymm3, ymm10) vfmadd231ps(ymm1, ymm3, ymm11) vbroadcastss(mem(rbx, 22*4), ymm2) vbroadcastss(mem(rbx, 23*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm12) vfmadd231ps(ymm1, ymm2, ymm13) vfmadd231ps(ymm0, ymm3, ymm14) vfmadd231ps(ymm1, ymm3, ymm15) add(imm(4*16*4), rax) // a += 4*16 (unroll x mr) add(imm(4*6*4), rbx) // b += 4*6 (unroll x nr) vmovaps(mem(rax, -4*32), ymm0) vmovaps(mem(rax, -3*32), ymm1) dec(rsi) // i -= 1; jne(.SLOOPKITER) // iterate again if i != 0. label(.SCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.SPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.SLOOPKLEFT) // EDGE LOOP prefetch(0, mem(rax, 128*4)) vbroadcastss(mem(rbx, 0*4), ymm2) vbroadcastss(mem(rbx, 1*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm4) vfmadd231ps(ymm1, ymm2, ymm5) vfmadd231ps(ymm0, ymm3, ymm6) vfmadd231ps(ymm1, ymm3, ymm7) vbroadcastss(mem(rbx, 2*4), ymm2) vbroadcastss(mem(rbx, 3*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm8) vfmadd231ps(ymm1, ymm2, ymm9) vfmadd231ps(ymm0, ymm3, ymm10) vfmadd231ps(ymm1, ymm3, ymm11) vbroadcastss(mem(rbx, 4*4), ymm2) vbroadcastss(mem(rbx, 5*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm12) vfmadd231ps(ymm1, ymm2, ymm13) vfmadd231ps(ymm0, ymm3, ymm14) vfmadd231ps(ymm1, ymm3, ymm15) add(imm(1*16*4), rax) // a += 1*16 (unroll x mr) add(imm(1*6*4), rbx) // b += 1*6 (unroll x nr) vmovaps(mem(rax, -4*32), ymm0) vmovaps(mem(rax, -3*32), ymm1) dec(rsi) // i -= 1; jne(.SLOOPKLEFT) // iterate again if i != 0. label(.SPOSTACCUM) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastss(mem(rax), ymm0) // load alpha and duplicate vbroadcastss(mem(rbx), ymm3) // load beta and duplicate vmulps(ymm0, ymm4, ymm4) // scale by alpha vmulps(ymm0, ymm5, ymm5) vmulps(ymm0, ymm6, ymm6) vmulps(ymm0, ymm7, ymm7) vmulps(ymm0, ymm8, ymm8) vmulps(ymm0, ymm9, ymm9) vmulps(ymm0, ymm10, ymm10) vmulps(ymm0, ymm11, ymm11) vmulps(ymm0, ymm12, ymm12) vmulps(ymm0, ymm13, ymm13) vmulps(ymm0, ymm14, ymm14) vmulps(ymm0, ymm15, ymm15) mov(var(rs_c), rsi) // load rs_c lea(mem(, rsi, 4), rsi) // rsi = rs_c * sizeof(float) lea(mem(rcx, rsi, 8), rdx) // load address of c + 8*rs_c; lea(mem(rsi, rsi, 2), r13) // r13 = 3*rs_c; lea(mem(rsi, rsi, 4), r15) // r15 = 5*rs_c; lea(mem(r13, rsi, 4), r10) // r10 = 7*rs_c; // now avoid loading C if beta == 0 vxorps(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomiss(xmm0, xmm3) // set ZF if beta == 0. je(.SBETAZERO) // if ZF = 1, jump to beta == 0 case cmp(imm(4), rsi) // set ZF if (4*rs_c) == 4. jz(.SCOLSTORED) // jump to column storage case label(.SGENSTORED) SGEMM_INPUT_GS_BETA_NZ vfmadd213ps(ymm4, ymm3, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += cs_c; SGEMM_INPUT_GS_BETA_NZ vfmadd213ps(ymm6, ymm3, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += cs_c; SGEMM_INPUT_GS_BETA_NZ vfmadd213ps(ymm8, ymm3, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += cs_c; SGEMM_INPUT_GS_BETA_NZ vfmadd213ps(ymm10, ymm3, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += cs_c; SGEMM_INPUT_GS_BETA_NZ vfmadd213ps(ymm12, ymm3, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += cs_c; SGEMM_INPUT_GS_BETA_NZ vfmadd213ps(ymm14, ymm3, ymm0) SGEMM_OUTPUT_GS_BETA_NZ //add(rdi, rcx) // c += cs_c; mov(rdx, rcx) // rcx = c + 8*rs_c SGEMM_INPUT_GS_BETA_NZ vfmadd213ps(ymm5, ymm3, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += cs_c; SGEMM_INPUT_GS_BETA_NZ vfmadd213ps(ymm7, ymm3, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += cs_c; SGEMM_INPUT_GS_BETA_NZ vfmadd213ps(ymm9, ymm3, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += cs_c; SGEMM_INPUT_GS_BETA_NZ vfmadd213ps(ymm11, ymm3, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += cs_c; SGEMM_INPUT_GS_BETA_NZ vfmadd213ps(ymm13, ymm3, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += cs_c; SGEMM_INPUT_GS_BETA_NZ vfmadd213ps(ymm15, ymm3, ymm0) SGEMM_OUTPUT_GS_BETA_NZ //add(rdi, rcx) // c += cs_c; jmp(.SDONE) // jump to end. label(.SCOLSTORED) vfmadd231ps(mem(rcx), ymm3, ymm4) vmovups(ymm4, mem(rcx)) add(rdi, rcx) vfmadd231ps(mem(rdx), ymm3, ymm5) vmovups(ymm5, mem(rdx)) add(rdi, rdx) vfmadd231ps(mem(rcx), ymm3, ymm6) vmovups(ymm6, mem(rcx)) add(rdi, rcx) vfmadd231ps(mem(rdx), ymm3, ymm7) vmovups(ymm7, mem(rdx)) add(rdi, rdx) vfmadd231ps(mem(rcx), ymm3, ymm8) vmovups(ymm8, mem(rcx)) add(rdi, rcx) vfmadd231ps(mem(rdx), ymm3, ymm9) vmovups(ymm9, mem(rdx)) add(rdi, rdx) vfmadd231ps(mem(rcx), ymm3, ymm10) vmovups(ymm10, mem(rcx)) add(rdi, rcx) vfmadd231ps(mem(rdx), ymm3, ymm11) vmovups(ymm11, mem(rdx)) add(rdi, rdx) vfmadd231ps(mem(rcx), ymm3, ymm12) vmovups(ymm12, mem(rcx)) add(rdi, rcx) vfmadd231ps(mem(rdx), ymm3, ymm13) vmovups(ymm13, mem(rdx)) add(rdi, rdx) vfmadd231ps(mem(rcx), ymm3, ymm14) vmovups(ymm14, mem(rcx)) //add(rdi, rcx) vfmadd231ps(mem(rdx), ymm3, ymm15) vmovups(ymm15, mem(rdx)) //add(rdi, rdx) jmp(.SDONE) // jump to end. label(.SBETAZERO) cmp(imm(4), rsi) // set ZF if (4*rs_c) == 4. jz(.SCOLSTORBZ) // jump to column storage case label(.SGENSTORBZ) vmovaps(ymm4, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += cs_c; vmovaps(ymm6, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += cs_c; vmovaps(ymm8, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += cs_c; vmovaps(ymm10, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += cs_c; vmovaps(ymm12, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += cs_c; vmovaps(ymm14, ymm0) SGEMM_OUTPUT_GS_BETA_NZ //add(rdi, rcx) // c += cs_c; mov(rdx, rcx) // rcx = c + 8*rs_c vmovaps(ymm5, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += cs_c; vmovaps(ymm7, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += cs_c; vmovaps(ymm9, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += cs_c; vmovaps(ymm11, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += cs_c; vmovaps(ymm13, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += cs_c; vmovaps(ymm15, ymm0) SGEMM_OUTPUT_GS_BETA_NZ //add(rdi, rcx) // c += cs_c; jmp(.SDONE) // jump to end. label(.SCOLSTORBZ) vmovups(ymm4, mem(rcx)) add(rdi, rcx) vmovups(ymm5, mem(rdx)) add(rdi, rdx) vmovups(ymm6, mem(rcx)) add(rdi, rcx) vmovups(ymm7, mem(rdx)) add(rdi, rdx) vmovups(ymm8, mem(rcx)) add(rdi, rcx) vmovups(ymm9, mem(rdx)) add(rdi, rdx) vmovups(ymm10, mem(rcx)) add(rdi, rcx) vmovups(ymm11, mem(rdx)) add(rdi, rdx) vmovups(ymm12, mem(rcx)) add(rdi, rcx) vmovups(ymm13, mem(rdx)) add(rdi, rdx) vmovups(ymm14, mem(rcx)) //add(rdi, rcx) vmovups(ymm15, mem(rdx)) //add(rdi, rdx) label(.SDONE) end_asm( : // output operands (none) : // input operands [k_iter] "m" (k_iter), // 0 [k_left] "m" (k_left), // 1 [a] "m" (a), // 2 [b] "m" (b), // 3 [alpha] "m" (alpha), // 4 [beta] "m" (beta), // 5 [c] "m" (c), // 6 [rs_c] "m" (rs_c), // 7 [cs_c] "m" (cs_c)/*, // 8 [b_next] "m" (b_next), // 9 [a_next] "m" (a_next)*/ // 10 : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } #define DGEMM_INPUT_GS_BETA_NZ \ vmovlpd(mem(rcx), xmm0, xmm0) \ vmovhpd(mem(rcx, rsi, 1), xmm0, xmm0) \ vmovlpd(mem(rcx, rsi, 2), xmm1, xmm1) \ vmovhpd(mem(rcx, r13, 1), xmm1, xmm1) \ vperm2f128(imm(0x20), ymm1, ymm0, ymm0) /*\ vmovlpd(mem(rcx, rsi, 4), xmm2, xmm2) \ vmovhpd(mem(rcx, r15, 1), xmm2, xmm2) \ vmovlpd(mem(rcx, r13, 2), xmm1, xmm1) \ vmovhpd(mem(rcx, r10, 1), xmm1, xmm1) \ vperm2f128(imm(0x20), ymm1, ymm2, ymm2)*/ #define DGEMM_OUTPUT_GS_BETA_NZ \ vextractf128(imm(1), ymm0, xmm1) \ vmovlpd(xmm0, mem(rcx)) \ vmovhpd(xmm0, mem(rcx, rsi, 1)) \ vmovlpd(xmm1, mem(rcx, rsi, 2)) \ vmovhpd(xmm1, mem(rcx, r13, 1)) /*\ vextractf128(imm(1), ymm2, xmm1) \ vmovlpd(xmm2, mem(rcx, rsi, 4)) \ vmovhpd(xmm2, mem(rcx, r15, 1)) \ vmovlpd(xmm1, mem(rcx, r13, 2)) \ vmovhpd(xmm1, mem(rcx, r10, 1))*/ void bli_dgemm_haswell_asm_8x6 ( dim_t k0, double* restrict alpha, double* restrict a, double* restrict b, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; begin_asm() vzeroall() // zero all xmm/ymm registers. mov(var(a), rax) // load address of a. mov(var(b), rbx) // load address of b. //mov(%9, r15) // load address of b_next. add(imm(32*4), rax) // initialize loop by pre-loading vmovapd(mem(rax, -4*32), ymm0) vmovapd(mem(rax, -3*32), ymm1) mov(var(c), rcx) // load address of c mov(var(cs_c), rdi) // load cs_c lea(mem(, rdi, 8), rdi) // cs_c *= sizeof(double) lea(mem(rdi, rdi, 2), r13) // r13 = 3*cs_c; lea(mem(rcx, r13, 1), rdx) // rdx = c + 3*cs_c; prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*cs_c prefetch(0, mem(rcx, rdi, 1, 7*8)) // prefetch c + 1*cs_c prefetch(0, mem(rcx, rdi, 2, 7*8)) // prefetch c + 2*cs_c prefetch(0, mem(rdx, 7*8)) // prefetch c + 3*cs_c prefetch(0, mem(rdx, rdi, 1, 7*8)) // prefetch c + 4*cs_c prefetch(0, mem(rdx, rdi, 2, 7*8)) // prefetch c + 5*cs_c mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.DLOOPKITER) // MAIN LOOP // iteration 0 prefetch(0, mem(rax, 64*8)) vbroadcastsd(mem(rbx, 0*8), ymm2) vbroadcastsd(mem(rbx, 1*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rbx, 2*8), ymm2) vbroadcastsd(mem(rbx, 3*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rbx, 4*8), ymm2) vbroadcastsd(mem(rbx, 5*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) vmovapd(mem(rax, -2*32), ymm0) vmovapd(mem(rax, -1*32), ymm1) // iteration 1 vbroadcastsd(mem(rbx, 6*8), ymm2) vbroadcastsd(mem(rbx, 7*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rbx, 8*8), ymm2) vbroadcastsd(mem(rbx, 9*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rbx, 10*8), ymm2) vbroadcastsd(mem(rbx, 11*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) vmovapd(mem(rax, 0*32), ymm0) vmovapd(mem(rax, 1*32), ymm1) // iteration 2 prefetch(0, mem(rax, 76*8)) vbroadcastsd(mem(rbx, 12*8), ymm2) vbroadcastsd(mem(rbx, 13*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rbx, 14*8), ymm2) vbroadcastsd(mem(rbx, 15*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rbx, 16*8), ymm2) vbroadcastsd(mem(rbx, 17*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) vmovapd(mem(rax, 2*32), ymm0) vmovapd(mem(rax, 3*32), ymm1) // iteration 3 vbroadcastsd(mem(rbx, 18*8), ymm2) vbroadcastsd(mem(rbx, 19*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rbx, 20*8), ymm2) vbroadcastsd(mem(rbx, 21*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rbx, 22*8), ymm2) vbroadcastsd(mem(rbx, 23*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) add(imm(4*8*8), rax) // a += 4*8 (unroll x mr) add(imm(4*6*8), rbx) // b += 4*6 (unroll x nr) vmovapd(mem(rax, -4*32), ymm0) vmovapd(mem(rax, -3*32), ymm1) dec(rsi) // i -= 1; jne(.DLOOPKITER) // iterate again if i != 0. label(.DCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.DLOOPKLEFT) // EDGE LOOP prefetch(0, mem(rax, 64*8)) vbroadcastsd(mem(rbx, 0*8), ymm2) vbroadcastsd(mem(rbx, 1*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rbx, 2*8), ymm2) vbroadcastsd(mem(rbx, 3*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rbx, 4*8), ymm2) vbroadcastsd(mem(rbx, 5*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) add(imm(1*8*8), rax) // a += 1*8 (unroll x mr) add(imm(1*6*8), rbx) // b += 1*6 (unroll x nr) vmovapd(mem(rax, -4*32), ymm0) vmovapd(mem(rax, -3*32), ymm1) dec(rsi) // i -= 1; jne(.DLOOPKLEFT) // iterate again if i != 0. label(.DPOSTACCUM) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) vmulpd(ymm0, ymm6, ymm6) vmulpd(ymm0, ymm7, ymm7) vmulpd(ymm0, ymm8, ymm8) vmulpd(ymm0, ymm9, ymm9) vmulpd(ymm0, ymm10, ymm10) vmulpd(ymm0, ymm11, ymm11) vmulpd(ymm0, ymm12, ymm12) vmulpd(ymm0, ymm13, ymm13) vmulpd(ymm0, ymm14, ymm14) vmulpd(ymm0, ymm15, ymm15) mov(var(rs_c), rsi) // load rs_c lea(mem(, rsi, 8), rsi) // rsi = rs_c * sizeof(double) lea(mem(rcx, rsi, 4), rdx) // load address of c + 4*rs_c; lea(mem(rsi, rsi, 2), r13) // r13 = 3*rs_c; //lea(mem(rsi, rsi, 4), r15) // r15 = 5*rs_c; //lea(mem(r13, rsi, 4), r10) // r10 = 7*rs_c; // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case cmp(imm(8), rsi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORED) // jump to column storage case label(.DGENSTORED) DGEMM_INPUT_GS_BETA_NZ vfmadd213pd(ymm4, ymm3, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += cs_c; DGEMM_INPUT_GS_BETA_NZ vfmadd213pd(ymm6, ymm3, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += cs_c; DGEMM_INPUT_GS_BETA_NZ vfmadd213pd(ymm8, ymm3, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += cs_c; DGEMM_INPUT_GS_BETA_NZ vfmadd213pd(ymm10, ymm3, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += cs_c; DGEMM_INPUT_GS_BETA_NZ vfmadd213pd(ymm12, ymm3, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += cs_c; DGEMM_INPUT_GS_BETA_NZ vfmadd213pd(ymm14, ymm3, ymm0) DGEMM_OUTPUT_GS_BETA_NZ //add(rdi, rcx) // c += cs_c; mov(rdx, rcx) // rcx = c + 4*rs_c DGEMM_INPUT_GS_BETA_NZ vfmadd213pd(ymm5, ymm3, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += cs_c; DGEMM_INPUT_GS_BETA_NZ vfmadd213pd(ymm7, ymm3, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += cs_c; DGEMM_INPUT_GS_BETA_NZ vfmadd213pd(ymm9, ymm3, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += cs_c; DGEMM_INPUT_GS_BETA_NZ vfmadd213pd(ymm11, ymm3, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += cs_c; DGEMM_INPUT_GS_BETA_NZ vfmadd213pd(ymm13, ymm3, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += cs_c; DGEMM_INPUT_GS_BETA_NZ vfmadd213pd(ymm15, ymm3, ymm0) DGEMM_OUTPUT_GS_BETA_NZ //add(rdi, rcx) // c += cs_c; jmp(.DDONE) // jump to end. label(.DCOLSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rdx), ymm3, ymm5) vmovupd(ymm5, mem(rdx)) add(rdi, rdx) vfmadd231pd(mem(rcx), ymm3, ymm6) vmovupd(ymm6, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rdx), ymm3, ymm7) vmovupd(ymm7, mem(rdx)) add(rdi, rdx) vfmadd231pd(mem(rcx), ymm3, ymm8) vmovupd(ymm8, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rdx), ymm3, ymm9) vmovupd(ymm9, mem(rdx)) add(rdi, rdx) vfmadd231pd(mem(rcx), ymm3, ymm10) vmovupd(ymm10, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rdx), ymm3, ymm11) vmovupd(ymm11, mem(rdx)) add(rdi, rdx) vfmadd231pd(mem(rcx), ymm3, ymm12) vmovupd(ymm12, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rdx), ymm3, ymm13) vmovupd(ymm13, mem(rdx)) add(rdi, rdx) vfmadd231pd(mem(rcx), ymm3, ymm14) vmovupd(ymm14, mem(rcx)) //add(rdi, rcx) vfmadd231pd(mem(rdx), ymm3, ymm15) vmovupd(ymm15, mem(rdx)) //add(rdi, rdx) jmp(.DDONE) // jump to end. label(.DBETAZERO) cmp(imm(8), rsi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORBZ) // jump to column storage case label(.DGENSTORBZ) vmovapd(ymm4, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += cs_c; vmovapd(ymm6, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += cs_c; vmovapd(ymm8, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += cs_c; vmovapd(ymm10, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += cs_c; vmovapd(ymm12, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += cs_c; vmovapd(ymm14, ymm0) DGEMM_OUTPUT_GS_BETA_NZ //add(rdi, rcx) // c += cs_c; mov(rdx, rcx) // rcx = c + 4*rs_c vmovapd(ymm5, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += cs_c; vmovapd(ymm7, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += cs_c; vmovapd(ymm9, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += cs_c; vmovapd(ymm11, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += cs_c; vmovapd(ymm13, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c += cs_c; vmovapd(ymm15, ymm0) DGEMM_OUTPUT_GS_BETA_NZ //add(rdi, rcx) // c += cs_c; jmp(.DDONE) // jump to end. label(.DCOLSTORBZ) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vmovupd(ymm5, mem(rdx)) add(rdi, rdx) vmovupd(ymm6, mem(rcx)) add(rdi, rcx) vmovupd(ymm7, mem(rdx)) add(rdi, rdx) vmovupd(ymm8, mem(rcx)) add(rdi, rcx) vmovupd(ymm9, mem(rdx)) add(rdi, rdx) vmovupd(ymm10, mem(rcx)) add(rdi, rcx) vmovupd(ymm11, mem(rdx)) add(rdi, rdx) vmovupd(ymm12, mem(rcx)) add(rdi, rcx) vmovupd(ymm13, mem(rdx)) add(rdi, rdx) vmovupd(ymm14, mem(rcx)) //add(rdi, rcx) vmovupd(ymm15, mem(rdx)) //add(rdi, rdx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter] "m" (k_iter), // 0 [k_left] "m" (k_left), // 1 [a] "m" (a), // 2 [b] "m" (b), // 3 [alpha] "m" (alpha), // 4 [beta] "m" (beta), // 5 [c] "m" (c), // 6 [rs_c] "m" (rs_c), // 7 [cs_c] "m" (cs_c)/*, // 8 [b_next] "m" (b_next), // 9 [a_next] "m" (a_next)*/ // 10 : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } // assumes beta.r, beta.i have been broadcast into ymm1, ymm2. // outputs to ymm0 #define CGEMM_INPUT_SCALE_GS_BETA_NZ \ vmovlpd(mem(rcx), xmm0, xmm0) \ vmovhpd(mem(rcx, rsi, 1), xmm0, xmm0) \ vmovlpd(mem(rcx, rsi, 2), xmm3, xmm3) \ vmovhpd(mem(rcx, r13, 1), xmm3, xmm3) \ vinsertf128(imm(1), xmm3, ymm0, ymm0) \ vpermilps(imm(0xb1), ymm0, ymm3) \ vmulps(ymm1, ymm0, ymm0) \ vmulps(ymm2, ymm3, ymm3) \ vaddsubps(ymm3, ymm0, ymm0) // assumes values to output are in ymm0 #define CGEMM_OUTPUT_GS \ vextractf128(imm(1), ymm0, xmm3) \ vmovlpd(xmm0, mem(rcx)) \ vmovhpd(xmm0, mem(rcx, rsi, 1)) \ vmovlpd(xmm3, mem(rcx, rsi, 2)) \ vmovhpd(xmm3, mem(rcx, r13, 1)) #define CGEMM_INPUT_SCALE_CS_BETA_NZ \ vmovups(mem(rcx), ymm0) \ vpermilps(imm(0xb1), ymm0, ymm3) \ vmulps(ymm1, ymm0, ymm0) \ vmulps(ymm2, ymm3, ymm3) \ vaddsubps(ymm3, ymm0, ymm0) #define CGEMM_OUTPUT_CS \ vmovups(ymm0, mem(rcx)) \ void bli_cgemm_haswell_asm_8x3 ( dim_t k0, scomplex* restrict alpha, scomplex* restrict a, scomplex* restrict b, scomplex* restrict beta, scomplex* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; begin_asm() vzeroall() // zero all xmm/ymm registers. mov(var(a), rax) // load address of a. mov(var(b), rbx) // load address of b. //mov(%9, r15) // load address of b_next. add(imm(32*4), rax) // initialize loop by pre-loading vmovaps(mem(rax, -4*32), ymm0) vmovaps(mem(rax, -3*32), ymm1) mov(var(c), rcx) // load address of c mov(var(cs_c), rdi) // load cs_c lea(mem(, rdi, 8), rdi) // cs_c *= sizeof(scomplex) lea(mem(rcx, rdi, 1), r11) // r11 = c + 1*cs_c; lea(mem(rcx, rdi, 2), r12) // r12 = c + 2*cs_c; prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*cs_c prefetch(0, mem(r11, 7*8)) // prefetch c + 1*cs_c prefetch(0, mem(r12, 7*8)) // prefetch c + 2*cs_c mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.CCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.CLOOPKITER) // MAIN LOOP // iteration 0 prefetch(0, mem(rax, 32*8)) vbroadcastss(mem(rbx, 0*4), ymm2) vbroadcastss(mem(rbx, 1*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm4) vfmadd231ps(ymm1, ymm2, ymm5) vfmadd231ps(ymm0, ymm3, ymm6) vfmadd231ps(ymm1, ymm3, ymm7) vbroadcastss(mem(rbx, 2*4), ymm2) vbroadcastss(mem(rbx, 3*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm8) vfmadd231ps(ymm1, ymm2, ymm9) vfmadd231ps(ymm0, ymm3, ymm10) vfmadd231ps(ymm1, ymm3, ymm11) vbroadcastss(mem(rbx, 4*4), ymm2) vbroadcastss(mem(rbx, 5*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm12) vfmadd231ps(ymm1, ymm2, ymm13) vfmadd231ps(ymm0, ymm3, ymm14) vfmadd231ps(ymm1, ymm3, ymm15) vmovaps(mem(rax, -2*32), ymm0) vmovaps(mem(rax, -1*32), ymm1) // iteration 1 vbroadcastss(mem(rbx, 6*4), ymm2) vbroadcastss(mem(rbx, 7*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm4) vfmadd231ps(ymm1, ymm2, ymm5) vfmadd231ps(ymm0, ymm3, ymm6) vfmadd231ps(ymm1, ymm3, ymm7) vbroadcastss(mem(rbx, 8*4), ymm2) vbroadcastss(mem(rbx, 9*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm8) vfmadd231ps(ymm1, ymm2, ymm9) vfmadd231ps(ymm0, ymm3, ymm10) vfmadd231ps(ymm1, ymm3, ymm11) vbroadcastss(mem(rbx, 10*4), ymm2) vbroadcastss(mem(rbx, 11*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm12) vfmadd231ps(ymm1, ymm2, ymm13) vfmadd231ps(ymm0, ymm3, ymm14) vfmadd231ps(ymm1, ymm3, ymm15) vmovaps(mem(rax, 0*32), ymm0) vmovaps(mem(rax, 1*32), ymm1) // iteration 2 prefetch(0, mem(rax, 38*8)) vbroadcastss(mem(rbx, 12*4), ymm2) vbroadcastss(mem(rbx, 13*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm4) vfmadd231ps(ymm1, ymm2, ymm5) vfmadd231ps(ymm0, ymm3, ymm6) vfmadd231ps(ymm1, ymm3, ymm7) vbroadcastss(mem(rbx, 14*4), ymm2) vbroadcastss(mem(rbx, 15*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm8) vfmadd231ps(ymm1, ymm2, ymm9) vfmadd231ps(ymm0, ymm3, ymm10) vfmadd231ps(ymm1, ymm3, ymm11) vbroadcastss(mem(rbx, 16*4), ymm2) vbroadcastss(mem(rbx, 17*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm12) vfmadd231ps(ymm1, ymm2, ymm13) vfmadd231ps(ymm0, ymm3, ymm14) vfmadd231ps(ymm1, ymm3, ymm15) vmovaps(mem(rax, 2*32), ymm0) vmovaps(mem(rax, 3*32), ymm1) // iteration 3 vbroadcastss(mem(rbx, 18*4), ymm2) vbroadcastss(mem(rbx, 19*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm4) vfmadd231ps(ymm1, ymm2, ymm5) vfmadd231ps(ymm0, ymm3, ymm6) vfmadd231ps(ymm1, ymm3, ymm7) vbroadcastss(mem(rbx, 20*4), ymm2) vbroadcastss(mem(rbx, 21*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm8) vfmadd231ps(ymm1, ymm2, ymm9) vfmadd231ps(ymm0, ymm3, ymm10) vfmadd231ps(ymm1, ymm3, ymm11) vbroadcastss(mem(rbx, 22*4), ymm2) vbroadcastss(mem(rbx, 23*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm12) vfmadd231ps(ymm1, ymm2, ymm13) vfmadd231ps(ymm0, ymm3, ymm14) vfmadd231ps(ymm1, ymm3, ymm15) add(imm(4*8*8), rax) // a += 4*8 (unroll x mr) add(imm(4*3*8), rbx) // b += 4*3 (unroll x nr) vmovaps(mem(rax, -4*32), ymm0) vmovaps(mem(rax, -3*32), ymm1) dec(rsi) // i -= 1; jne(.CLOOPKITER) // iterate again if i != 0. label(.CCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.CPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.CLOOPKLEFT) // EDGE LOOP prefetch(0, mem(rax, 32*8)) vbroadcastss(mem(rbx, 0*4), ymm2) vbroadcastss(mem(rbx, 1*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm4) vfmadd231ps(ymm1, ymm2, ymm5) vfmadd231ps(ymm0, ymm3, ymm6) vfmadd231ps(ymm1, ymm3, ymm7) vbroadcastss(mem(rbx, 2*4), ymm2) vbroadcastss(mem(rbx, 3*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm8) vfmadd231ps(ymm1, ymm2, ymm9) vfmadd231ps(ymm0, ymm3, ymm10) vfmadd231ps(ymm1, ymm3, ymm11) vbroadcastss(mem(rbx, 4*4), ymm2) vbroadcastss(mem(rbx, 5*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm12) vfmadd231ps(ymm1, ymm2, ymm13) vfmadd231ps(ymm0, ymm3, ymm14) vfmadd231ps(ymm1, ymm3, ymm15) add(imm(1*8*8), rax) // a += 1*8 (unroll x mr) add(imm(1*3*8), rbx) // b += 1*3 (unroll x nr) vmovaps(mem(rax, -4*32), ymm0) vmovaps(mem(rax, -3*32), ymm1) dec(rsi) // i -= 1; jne(.CLOOPKLEFT) // iterate again if i != 0. label(.CPOSTACCUM) // permute even and odd elements // of ymm6/7, ymm10/11, ymm/14/15 vpermilps(imm(0xb1), ymm6, ymm6) vpermilps(imm(0xb1), ymm7, ymm7) vpermilps(imm(0xb1), ymm10, ymm10) vpermilps(imm(0xb1), ymm11, ymm11) vpermilps(imm(0xb1), ymm14, ymm14) vpermilps(imm(0xb1), ymm15, ymm15) // subtract/add even/odd elements vaddsubps(ymm6, ymm4, ymm4) vaddsubps(ymm7, ymm5, ymm5) vaddsubps(ymm10, ymm8, ymm8) vaddsubps(ymm11, ymm9, ymm9) vaddsubps(ymm14, ymm12, ymm12) vaddsubps(ymm15, ymm13, ymm13) mov(var(alpha), rax) // load address of alpha vbroadcastss(mem(rax), ymm0) // load alpha_r and duplicate vbroadcastss(mem(rax, 4), ymm1) // load alpha_i and duplicate vpermilps(imm(0xb1), ymm4, ymm3) vmulps(ymm0, ymm4, ymm4) vmulps(ymm1, ymm3, ymm3) vaddsubps(ymm3, ymm4, ymm4) vpermilps(imm(0xb1), ymm5, ymm3) vmulps(ymm0, ymm5, ymm5) vmulps(ymm1, ymm3, ymm3) vaddsubps(ymm3, ymm5, ymm5) vpermilps(imm(0xb1), ymm8, ymm3) vmulps(ymm0, ymm8, ymm8) vmulps(ymm1, ymm3, ymm3) vaddsubps(ymm3, ymm8, ymm8) vpermilps(imm(0xb1), ymm9, ymm3) vmulps(ymm0, ymm9, ymm9) vmulps(ymm1, ymm3, ymm3) vaddsubps(ymm3, ymm9, ymm9) vpermilps(imm(0xb1), ymm12, ymm3) vmulps(ymm0, ymm12, ymm12) vmulps(ymm1, ymm3, ymm3) vaddsubps(ymm3, ymm12, ymm12) vpermilps(imm(0xb1), ymm13, ymm3) vmulps(ymm0, ymm13, ymm13) vmulps(ymm1, ymm3, ymm3) vaddsubps(ymm3, ymm13, ymm13) mov(var(beta), rbx) // load address of beta vbroadcastss(mem(rbx), ymm1) // load beta_r and duplicate vbroadcastss(mem(rbx, 4), ymm2) // load beta_i and duplicate mov(var(rs_c), rsi) // load rs_c lea(mem(, rsi, 8), rsi) // rsi = rs_c * sizeof(scomplex) lea(mem(, rsi, 4), rdx) // rdx = 4*rs_c; lea(mem(rsi, rsi, 2), r13) // r13 = 3*rs_c; // now avoid loading C if beta == 0 vxorps(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomiss(xmm0, xmm1) // set ZF if beta_r == 0. sete(r8b) // r8b = ( ZF == 1 ? 1 : 0 ); vucomiss(xmm0, xmm2) // set ZF if beta_i == 0. sete(r9b) // r9b = ( ZF == 1 ? 1 : 0 ); and(r8b, r9b) // set ZF if r8b & r9b == 1. jne(.CBETAZERO) // if ZF = 1, jump to beta == 0 case cmp(imm(8), rsi) // set ZF if (8*cs_c) == 8. jz(.CCOLSTORED) // jump to row storage case label(.CGENSTORED) CGEMM_INPUT_SCALE_GS_BETA_NZ vaddps(ymm4, ymm0, ymm0) CGEMM_OUTPUT_GS add(rdx, rcx) // c += 4*rs_c; CGEMM_INPUT_SCALE_GS_BETA_NZ vaddps(ymm5, ymm0, ymm0) CGEMM_OUTPUT_GS mov(r11, rcx) // rcx = c + 1*cs_c CGEMM_INPUT_SCALE_GS_BETA_NZ vaddps(ymm8, ymm0, ymm0) CGEMM_OUTPUT_GS add(rdx, rcx) // c += 4*rs_c; CGEMM_INPUT_SCALE_GS_BETA_NZ vaddps(ymm9, ymm0, ymm0) CGEMM_OUTPUT_GS mov(r12, rcx) // rcx = c + 2*cs_c CGEMM_INPUT_SCALE_GS_BETA_NZ vaddps(ymm12, ymm0, ymm0) CGEMM_OUTPUT_GS add(rdx, rcx) // c += 4*rs_c; CGEMM_INPUT_SCALE_GS_BETA_NZ vaddps(ymm13, ymm0, ymm0) CGEMM_OUTPUT_GS jmp(.CDONE) // jump to end. label(.CCOLSTORED) CGEMM_INPUT_SCALE_CS_BETA_NZ vaddps(ymm4, ymm0, ymm0) CGEMM_OUTPUT_CS add(rdx, rcx) // c += 4*rs_c; CGEMM_INPUT_SCALE_CS_BETA_NZ vaddps(ymm5, ymm0, ymm0) CGEMM_OUTPUT_CS mov(r11, rcx) // rcx = c + 1*cs_c CGEMM_INPUT_SCALE_CS_BETA_NZ vaddps(ymm8, ymm0, ymm0) CGEMM_OUTPUT_CS add(rdx, rcx) // c += 4*rs_c; CGEMM_INPUT_SCALE_CS_BETA_NZ vaddps(ymm9, ymm0, ymm0) CGEMM_OUTPUT_CS mov(r12, rcx) // rcx = c + 2*cs_c CGEMM_INPUT_SCALE_CS_BETA_NZ vaddps(ymm12, ymm0, ymm0) CGEMM_OUTPUT_CS add(rdx, rcx) // c += 4*rs_c; CGEMM_INPUT_SCALE_CS_BETA_NZ vaddps(ymm13, ymm0, ymm0) CGEMM_OUTPUT_CS jmp(.CDONE) // jump to end. label(.CBETAZERO) cmp(imm(8), rsi) // set ZF if (8*rs_c) == 8. jz(.CCOLSTORBZ) // jump to row storage case label(.CGENSTORBZ) vmovaps(ymm4, ymm0) CGEMM_OUTPUT_GS add(rdx, rcx) // c += 4*rs_c; vmovaps(ymm5, ymm0) CGEMM_OUTPUT_GS mov(r11, rcx) // rcx = c + 1*cs_c vmovaps(ymm8, ymm0) CGEMM_OUTPUT_GS add(rdx, rcx) // c += 4*rs_c; vmovaps(ymm9, ymm0) CGEMM_OUTPUT_GS mov(r12, rcx) // rcx = c + 2*cs_c vmovaps(ymm12, ymm0) CGEMM_OUTPUT_GS add(rdx, rcx) // c += 4*rs_c; vmovaps(ymm13, ymm0) CGEMM_OUTPUT_GS jmp(.CDONE) // jump to end. label(.CCOLSTORBZ) vmovups(ymm4, mem(rcx)) vmovups(ymm5, mem(rcx, rdx, 1)) vmovups(ymm8, mem(r11)) vmovups(ymm9, mem(r11, rdx, 1)) vmovups(ymm12, mem(r12)) vmovups(ymm13, mem(r12, rdx, 1)) label(.CDONE) end_asm( : // output operands (none) : // input operands [k_iter] "m" (k_iter), // 0 [k_left] "m" (k_left), // 1 [a] "m" (a), // 2 [b] "m" (b), // 3 [alpha] "m" (alpha), // 4 [beta] "m" (beta), // 5 [c] "m" (c), // 6 [rs_c] "m" (rs_c), // 7 [cs_c] "m" (cs_c)/*, // 8 [b_next] "m" (b_next), // 9 [a_next] "m" (a_next)*/ // 10 : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } // assumes beta.r, beta.i have been broadcast into ymm1, ymm2. // outputs to ymm0 #define ZGEMM_INPUT_SCALE_GS_BETA_NZ \ vmovupd(mem(rcx), xmm0) \ vmovupd(mem(rcx, rsi, 1), xmm3) \ vinsertf128(imm(1), xmm3, ymm0, ymm0) \ vpermilpd(imm(0x5), ymm0, ymm3) \ vmulpd(ymm1, ymm0, ymm0) \ vmulpd(ymm2, ymm3, ymm3) \ vaddsubpd(ymm3, ymm0, ymm0) // assumes values to output are in ymm0 #define ZGEMM_OUTPUT_GS \ vextractf128(imm(1), ymm0, xmm3) \ vmovupd(xmm0, mem(rcx)) \ vmovupd(xmm3, mem(rcx, rsi, 1)) \ #define ZGEMM_INPUT_SCALE_CS_BETA_NZ \ vmovups(mem(rcx), ymm0) \ vpermilpd(imm(0x5), ymm0, ymm3) \ vmulpd(ymm1, ymm0, ymm0) \ vmulpd(ymm2, ymm3, ymm3) \ vaddsubpd(ymm3, ymm0, ymm0) #define ZGEMM_OUTPUT_CS \ vmovupd(ymm0, mem(rcx)) \ void bli_zgemm_haswell_asm_4x3 ( dim_t k0, dcomplex* restrict alpha, dcomplex* restrict a, dcomplex* restrict b, dcomplex* restrict beta, dcomplex* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; begin_asm() vzeroall() // zero all xmm/ymm registers. mov(var(a), rax) // load address of a. mov(var(b), rbx) // load address of b. //mov(%9, r15) // load address of b_next. add(imm(32*4), rax) // initialize loop by pre-loading vmovapd(mem(rax, -4*32), ymm0) vmovapd(mem(rax, -3*32), ymm1) mov(var(c), rcx) // load address of c mov(var(cs_c), rdi) // load cs_c lea(mem(, rdi, 8), rdi) // cs_c *= sizeof(dcomplex) lea(mem(, rdi, 2), rdi) lea(mem(rcx, rdi, 1), r11) // r11 = c + 1*cs_c; lea(mem(rcx, rdi, 2), r12) // r12 = c + 2*cs_c; prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*cs_c prefetch(0, mem(r11, 7*8)) // prefetch c + 1*cs_c prefetch(0, mem(r12, 7*8)) // prefetch c + 2*cs_c mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.ZCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.ZLOOPKITER) // MAIN LOOP // iteration 0 prefetch(0, mem(rax, 32*16)) vbroadcastsd(mem(rbx, 0*8), ymm2) vbroadcastsd(mem(rbx, 1*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rbx, 2*8), ymm2) vbroadcastsd(mem(rbx, 3*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rbx, 4*8), ymm2) vbroadcastsd(mem(rbx, 5*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) vmovapd(mem(rax, -2*32), ymm0) vmovapd(mem(rax, -1*32), ymm1) // iteration 1 vbroadcastsd(mem(rbx, 6*8), ymm2) vbroadcastsd(mem(rbx, 7*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rbx, 8*8), ymm2) vbroadcastsd(mem(rbx, 9*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rbx, 10*8), ymm2) vbroadcastsd(mem(rbx, 11*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) vmovapd(mem(rax, 0*32), ymm0) vmovapd(mem(rax, 1*32), ymm1) // iteration 2 prefetch(0, mem(rax, 38*16)) vbroadcastsd(mem(rbx, 12*8), ymm2) vbroadcastsd(mem(rbx, 13*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rbx, 14*8), ymm2) vbroadcastsd(mem(rbx, 15*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rbx, 16*8), ymm2) vbroadcastsd(mem(rbx, 17*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) vmovapd(mem(rax, 2*32), ymm0) vmovapd(mem(rax, 3*32), ymm1) // iteration 3 vbroadcastsd(mem(rbx, 18*8), ymm2) vbroadcastsd(mem(rbx, 19*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rbx, 20*8), ymm2) vbroadcastsd(mem(rbx, 21*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rbx, 22*8), ymm2) vbroadcastsd(mem(rbx, 23*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) add(imm(4*4*16), rax) // a += 4*4 (unroll x mr) add(imm(4*3*16), rbx) // b += 4*3 (unroll x nr) vmovapd(mem(rax, -4*32), ymm0) vmovapd(mem(rax, -3*32), ymm1) dec(rsi) // i -= 1; jne(.ZLOOPKITER) // iterate again if i != 0. label(.ZCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.ZPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.ZLOOPKLEFT) // EDGE LOOP prefetch(0, mem(rax, 32*16)) vbroadcastsd(mem(rbx, 0*8), ymm2) vbroadcastsd(mem(rbx, 1*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rbx, 2*8), ymm2) vbroadcastsd(mem(rbx, 3*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rbx, 4*8), ymm2) vbroadcastsd(mem(rbx, 5*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) add(imm(1*4*16), rax) // a += 1*4 (unroll x mr) add(imm(1*3*16), rbx) // b += 1*3 (unroll x nr) vmovapd(mem(rax, -4*32), ymm0) vmovapd(mem(rax, -3*32), ymm1) dec(rsi) // i -= 1; jne(.ZLOOPKLEFT) // iterate again if i != 0. label(.ZPOSTACCUM) // permute even and odd elements // of ymm6/7, ymm10/11, ymm/14/15 vpermilpd(imm(0x5), ymm6, ymm6) vpermilpd(imm(0x5), ymm7, ymm7) vpermilpd(imm(0x5), ymm10, ymm10) vpermilpd(imm(0x5), ymm11, ymm11) vpermilpd(imm(0x5), ymm14, ymm14) vpermilpd(imm(0x5), ymm15, ymm15) // subtract/add even/odd elements vaddsubpd(ymm6, ymm4, ymm4) vaddsubpd(ymm7, ymm5, ymm5) vaddsubpd(ymm10, ymm8, ymm8) vaddsubpd(ymm11, ymm9, ymm9) vaddsubpd(ymm14, ymm12, ymm12) vaddsubpd(ymm15, ymm13, ymm13) mov(var(alpha), rax) // load address of alpha vbroadcastsd(mem(rax), ymm0) // load alpha_r and duplicate vbroadcastsd(mem(rax, 8), ymm1) // load alpha_i and duplicate vpermilpd(imm(0x5), ymm4, ymm3) vmulpd(ymm0, ymm4, ymm4) vmulpd(ymm1, ymm3, ymm3) vaddsubpd(ymm3, ymm4, ymm4) vpermilpd(imm(0x5), ymm5, ymm3) vmulpd(ymm0, ymm5, ymm5) vmulpd(ymm1, ymm3, ymm3) vaddsubpd(ymm3, ymm5, ymm5) vpermilpd(imm(0x5), ymm8, ymm3) vmulpd(ymm0, ymm8, ymm8) vmulpd(ymm1, ymm3, ymm3) vaddsubpd(ymm3, ymm8, ymm8) vpermilpd(imm(0x5), ymm9, ymm3) vmulpd(ymm0, ymm9, ymm9) vmulpd(ymm1, ymm3, ymm3) vaddsubpd(ymm3, ymm9, ymm9) vpermilpd(imm(0x5), ymm12, ymm3) vmulpd(ymm0, ymm12, ymm12) vmulpd(ymm1, ymm3, ymm3) vaddsubpd(ymm3, ymm12, ymm12) vpermilpd(imm(0x5), ymm13, ymm3) vmulpd(ymm0, ymm13, ymm13) vmulpd(ymm1, ymm3, ymm3) vaddsubpd(ymm3, ymm13, ymm13) mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rbx), ymm1) // load beta_r and duplicate vbroadcastsd(mem(rbx, 8), ymm2) // load beta_i and duplicate mov(var(rs_c), rsi) // load rs_c lea(mem(, rsi, 8), rsi) // rsi = rs_c * sizeof(dcomplex) lea(mem(, rsi, 2), rsi) lea(mem(, rsi, 2), rdx) // rdx = 2*rs_c; // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm1) // set ZF if beta_r == 0. sete(r8b) // r8b = ( ZF == 1 ? 1 : 0 ); vucomisd(xmm0, xmm2) // set ZF if beta_i == 0. sete(r9b) // r9b = ( ZF == 1 ? 1 : 0 ); and(r8b, r9b) // set ZF if r8b & r9b == 1. jne(.ZBETAZERO) // if ZF = 1, jump to beta == 0 case cmp(imm(16), rsi) // set ZF if (16*rs_c) == 16. jz(.ZCOLSTORED) // jump to row storage case label(.ZGENSTORED) ZGEMM_INPUT_SCALE_GS_BETA_NZ vaddpd(ymm4, ymm0, ymm0) ZGEMM_OUTPUT_GS add(rdx, rcx) // c += 2*rs_c; ZGEMM_INPUT_SCALE_GS_BETA_NZ vaddpd(ymm5, ymm0, ymm0) ZGEMM_OUTPUT_GS mov(r11, rcx) // rcx = c + 1*cs_c ZGEMM_INPUT_SCALE_GS_BETA_NZ vaddpd(ymm8, ymm0, ymm0) ZGEMM_OUTPUT_GS add(rdx, rcx) // c += 2*rs_c; ZGEMM_INPUT_SCALE_GS_BETA_NZ vaddpd(ymm9, ymm0, ymm0) ZGEMM_OUTPUT_GS mov(r12, rcx) // rcx = c + 2*cs_c ZGEMM_INPUT_SCALE_GS_BETA_NZ vaddpd(ymm12, ymm0, ymm0) ZGEMM_OUTPUT_GS add(rdx, rcx) // c += 2*rs_c; ZGEMM_INPUT_SCALE_GS_BETA_NZ vaddpd(ymm13, ymm0, ymm0) ZGEMM_OUTPUT_GS jmp(.ZDONE) // jump to end. label(.ZCOLSTORED) ZGEMM_INPUT_SCALE_CS_BETA_NZ vaddpd(ymm4, ymm0, ymm0) ZGEMM_OUTPUT_CS add(rdx, rcx) // c += 2*rs_c; ZGEMM_INPUT_SCALE_CS_BETA_NZ vaddpd(ymm5, ymm0, ymm0) ZGEMM_OUTPUT_CS mov(r11, rcx) // rcx = c + 1*cs_c ZGEMM_INPUT_SCALE_CS_BETA_NZ vaddpd(ymm8, ymm0, ymm0) ZGEMM_OUTPUT_CS add(rdx, rcx) // c += 2*rs_c; ZGEMM_INPUT_SCALE_CS_BETA_NZ vaddpd(ymm9, ymm0, ymm0) ZGEMM_OUTPUT_CS mov(r12, rcx) // rcx = c + 2*cs_c ZGEMM_INPUT_SCALE_CS_BETA_NZ vaddpd(ymm12, ymm0, ymm0) ZGEMM_OUTPUT_CS add(rdx, rcx) // c += 2*rs_c; ZGEMM_INPUT_SCALE_CS_BETA_NZ vaddpd(ymm13, ymm0, ymm0) ZGEMM_OUTPUT_CS jmp(.ZDONE) // jump to end. label(.ZBETAZERO) cmp(imm(16), rsi) // set ZF if (16*rs_c) == 16. jz(.ZCOLSTORBZ) // jump to row storage case label(.ZGENSTORBZ) vmovapd(ymm4, ymm0) ZGEMM_OUTPUT_GS add(rdx, rcx) // c += 2*rs_c; vmovapd(ymm5, ymm0) ZGEMM_OUTPUT_GS mov(r11, rcx) // rcx = c + 1*cs_c vmovapd(ymm8, ymm0) ZGEMM_OUTPUT_GS add(rdx, rcx) // c += 2*rs_c; vmovapd(ymm9, ymm0) ZGEMM_OUTPUT_GS mov(r12, rcx) // rcx = c + 2*cs_c vmovapd(ymm12, ymm0) ZGEMM_OUTPUT_GS add(rdx, rcx) // c += 2*rs_c; vmovapd(ymm13, ymm0) ZGEMM_OUTPUT_GS jmp(.ZDONE) // jump to end. label(.ZCOLSTORBZ) vmovupd(ymm4, mem(rcx)) vmovupd(ymm5, mem(rcx, rdx, 1)) vmovupd(ymm8, mem(r11)) vmovupd(ymm9, mem(r11, rdx, 1)) vmovupd(ymm12, mem(r12)) vmovupd(ymm13, mem(r12, rdx, 1)) label(.ZDONE) end_asm( : // output operands (none) : // input operands [k_iter] "m" (k_iter), // 0 [k_left] "m" (k_left), // 1 [a] "m" (a), // 2 [b] "m" (b), // 3 [alpha] "m" (alpha), // 4 [beta] "m" (beta), // 5 [c] "m" (c), // 6 [rs_c] "m" (rs_c), // 7 [cs_c] "m" (cs_c)/*, // 8 [b_next] "m" (b_next), // 9 [a_next] "m" (a_next)*/ // 10 : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } blis-0.6.1/kernels/haswell/3/bli_gemmtrsm_l_haswell_asm_d6x8.c000066400000000000000000001165251360743507500243270ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define BLIS_ASM_SYNTAX_ATT #include "bli_x86_asm_macros.h" #define SGEMM_OUTPUT_GS_BETA_NZ \ vextractf128(imm(1), ymm0, xmm2) \ vmovss(xmm0, mem(rcx)) \ vpermilps(imm(0x39), xmm0, xmm1) \ vmovss(xmm1, mem(rcx, rsi, 1)) \ vpermilps(imm(0x39), xmm1, xmm0) \ vmovss(xmm0, mem(rcx, rsi, 2)) \ vpermilps(imm(0x39), xmm0, xmm1) \ vmovss(xmm1, mem(rcx, r13, 1)) \ vmovss(xmm2, mem(rcx, rsi, 4)) \ vpermilps(imm(0x39), xmm2, xmm1) \ vmovss(xmm1, mem(rcx, r15, 1)) \ vpermilps(imm(0x39), xmm1, xmm2) \ vmovss(xmm2, mem(rcx, r13, 2)) \ vpermilps(imm(0x39), xmm2, xmm1) \ vmovss(xmm1, mem(rcx, r10, 1)) void bli_sgemmtrsm_l_haswell_asm_6x16 ( dim_t k0, float* restrict alpha, float* restrict a10, float* restrict a11, float* restrict b01, float* restrict b11, float* restrict c11, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; float* beta = bli_sm1; begin_asm() vzeroall() // zero all xmm/ymm registers. mov(var(a10), rax) // load address of a. mov(var(b01), rbx) // load address of b. add(imm(32*4), rbx) // initialize loop by pre-loading vmovaps(mem(rbx, -4*32), ymm0) vmovaps(mem(rbx, -3*32), ymm1) mov(var(b11), rcx) // load address of b11 mov(imm(16), rdi) // set rs_b = PACKNR = 16 lea(mem(, rdi, 4), rdi) // rs_b *= sizeof(float) // NOTE: c11, rs_c, and cs_c aren't // needed for a while, but we load // them now to avoid stalling later. mov(var(c11), r8) // load address of c11 mov(var(rs_c), r9) // load rs_c lea(mem(, r9 , 4), r9) // rs_c *= sizeof(float) mov(var(k_left)0, r10) // load cs_c lea(mem(, r10, 4), r10) // cs_c *= sizeof(float) mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.SCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.SLOOPKITER) // MAIN LOOP // iteration 0 prefetch(0, mem(rax, 64*4)) vbroadcastss(mem(rax, 0*4), ymm2) vbroadcastss(mem(rax, 1*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm4) vfmadd231ps(ymm1, ymm2, ymm5) vfmadd231ps(ymm0, ymm3, ymm6) vfmadd231ps(ymm1, ymm3, ymm7) vbroadcastss(mem(rax, 2*4), ymm2) vbroadcastss(mem(rax, 3*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm8) vfmadd231ps(ymm1, ymm2, ymm9) vfmadd231ps(ymm0, ymm3, ymm10) vfmadd231ps(ymm1, ymm3, ymm11) vbroadcastss(mem(rax, 4*4), ymm2) vbroadcastss(mem(rax, 5*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm12) vfmadd231ps(ymm1, ymm2, ymm13) vfmadd231ps(ymm0, ymm3, ymm14) vfmadd231ps(ymm1, ymm3, ymm15) vmovaps(mem(rbx, -2*32), ymm0) vmovaps(mem(rbx, -1*32), ymm1) // iteration 1 vbroadcastss(mem(rax, 6*4), ymm2) vbroadcastss(mem(rax, 7*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm4) vfmadd231ps(ymm1, ymm2, ymm5) vfmadd231ps(ymm0, ymm3, ymm6) vfmadd231ps(ymm1, ymm3, ymm7) vbroadcastss(mem(rax, 8*4), ymm2) vbroadcastss(mem(rax, 9*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm8) vfmadd231ps(ymm1, ymm2, ymm9) vfmadd231ps(ymm0, ymm3, ymm10) vfmadd231ps(ymm1, ymm3, ymm11) vbroadcastss(mem(rax, 10*4), ymm2) vbroadcastss(mem(rax, 11*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm12) vfmadd231ps(ymm1, ymm2, ymm13) vfmadd231ps(ymm0, ymm3, ymm14) vfmadd231ps(ymm1, ymm3, ymm15) vmovaps(mem(rbx, 0*32), ymm0) vmovaps(mem(rbx, 1*32), ymm1) // iteration 2 prefetch(0, mem(rax, 76*4)) vbroadcastss(mem(rax, 12*4), ymm2) vbroadcastss(mem(rax, 13*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm4) vfmadd231ps(ymm1, ymm2, ymm5) vfmadd231ps(ymm0, ymm3, ymm6) vfmadd231ps(ymm1, ymm3, ymm7) vbroadcastss(mem(rax, 14*4), ymm2) vbroadcastss(mem(rax, 15*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm8) vfmadd231ps(ymm1, ymm2, ymm9) vfmadd231ps(ymm0, ymm3, ymm10) vfmadd231ps(ymm1, ymm3, ymm11) vbroadcastss(mem(rax, 16*4), ymm2) vbroadcastss(mem(rax, 17*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm12) vfmadd231ps(ymm1, ymm2, ymm13) vfmadd231ps(ymm0, ymm3, ymm14) vfmadd231ps(ymm1, ymm3, ymm15) vmovaps(mem(rbx, 2*32), ymm0) vmovaps(mem(rbx, 3*32), ymm1) // iteration 3 vbroadcastss(mem(rax, 18*4), ymm2) vbroadcastss(mem(rax, 19*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm4) vfmadd231ps(ymm1, ymm2, ymm5) vfmadd231ps(ymm0, ymm3, ymm6) vfmadd231ps(ymm1, ymm3, ymm7) vbroadcastss(mem(rax, 20*4), ymm2) vbroadcastss(mem(rax, 21*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm8) vfmadd231ps(ymm1, ymm2, ymm9) vfmadd231ps(ymm0, ymm3, ymm10) vfmadd231ps(ymm1, ymm3, ymm11) vbroadcastss(mem(rax, 22*4), ymm2) vbroadcastss(mem(rax, 23*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm12) vfmadd231ps(ymm1, ymm2, ymm13) vfmadd231ps(ymm0, ymm3, ymm14) vfmadd231ps(ymm1, ymm3, ymm15) add(imm(4*6*4), rax) // a += 4*6 (unroll x mr) add(imm(4*16*4), rbx) // b += 4*16 (unroll x nr) vmovaps(mem(rbx, -4*32), ymm0) vmovaps(mem(rbx, -3*32), ymm1) dec(rsi) // i -= 1; jne(.SLOOPKITER) // iterate again if i != 0. label(.SCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.SPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.SLOOPKLEFT) // EDGE LOOP prefetch(0, mem(rax, 64*4)) vbroadcastss(mem(rax, 0*4), ymm2) vbroadcastss(mem(rax, 1*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm4) vfmadd231ps(ymm1, ymm2, ymm5) vfmadd231ps(ymm0, ymm3, ymm6) vfmadd231ps(ymm1, ymm3, ymm7) vbroadcastss(mem(rax, 2*4), ymm2) vbroadcastss(mem(rax, 3*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm8) vfmadd231ps(ymm1, ymm2, ymm9) vfmadd231ps(ymm0, ymm3, ymm10) vfmadd231ps(ymm1, ymm3, ymm11) vbroadcastss(mem(rax, 4*4), ymm2) vbroadcastss(mem(rax, 5*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm12) vfmadd231ps(ymm1, ymm2, ymm13) vfmadd231ps(ymm0, ymm3, ymm14) vfmadd231ps(ymm1, ymm3, ymm15) add(imm(1*6*4), rax) // a += 1*6 (unroll x mr) add(imm(1*16*4), rbx) // b += 1*16 (unroll x nr) vmovaps(mem(rbx, -4*32), ymm0) vmovaps(mem(rbx, -3*32), ymm1) dec(rsi) // i -= 1; jne(.SLOOPKLEFT) // iterate again if i != 0. label(.SPOSTACCUM) // ymm4..ymm15 = -a10 * b01 mov(var(alpha), rbx) // load address of alpha vbroadcastss(mem(rbx), ymm3) // load alpha and duplicate mov(imm(1), rsi) // load cs_b = 1 lea(mem(, rsi, 4), rsi) // cs_b *= sizeof(float) lea(mem(rcx, rsi, 8), rdx) // load address of b11 + 8*cs_b mov(rcx, r11) // save rcx = b11 for later mov(rdx, r14) // save rdx = b11+8*cs_b for later // b11 := alpha * b11 - a10 * b01 vfmsub231ps(mem(rcx), ymm3, ymm4) add(rdi, rcx) vfmsub231ps(mem(rdx), ymm3, ymm5) add(rdi, rdx) vfmsub231ps(mem(rcx), ymm3, ymm6) add(rdi, rcx) vfmsub231ps(mem(rdx), ymm3, ymm7) add(rdi, rdx) vfmsub231ps(mem(rcx), ymm3, ymm8) add(rdi, rcx) vfmsub231ps(mem(rdx), ymm3, ymm9) add(rdi, rdx) vfmsub231ps(mem(rcx), ymm3, ymm10) add(rdi, rcx) vfmsub231ps(mem(rdx), ymm3, ymm11) add(rdi, rdx) vfmsub231ps(mem(rcx), ymm3, ymm12) add(rdi, rcx) vfmsub231ps(mem(rdx), ymm3, ymm13) add(rdi, rdx) vfmsub231ps(mem(rcx), ymm3, ymm14) //add(rdi, rcx) vfmsub231ps(mem(rdx), ymm3, ymm15) //add(rdi, rdx) // prefetch c11 #if 0 mov(r8, rcx) // load address of c11 from r8 // Note: r9 = rs_c * sizeof(float) lea(mem(r9 , r9 , 2), r13) // r13 = 3*rs_c; lea(mem(rcx, r13, 1), rdx) // rdx = c11 + 3*rs_c; prefetch(0, mem(rcx, 0*8)) // prefetch c11 + 0*rs_c prefetch(0, mem(rcx, r9, 1, 0*8)) // prefetch c11 + 1*rs_c prefetch(0, mem(rcx, r9 , 2, 0*8)) // prefetch c11 + 2*rs_c prefetch(0, mem(rdx, 0*8)) // prefetch c11 + 3*rs_c prefetch(0, mem(rdx, r9, 1, 0*8)) // prefetch c11 + 4*rs_c prefetch(0, mem(rdx, r9 , 2, 0*8)) // prefetch c11 + 5*rs_c #endif // trsm computation begins here // Note: contents of b11 are stored as // ymm4 ymm5 = ( beta00..07 ) ( beta08..0F ) // ymm6 ymm7 = ( beta10..17 ) ( beta18..1F ) // ymm8 ymm9 = ( beta20..27 ) ( beta28..2F ) // ymm10 ymm11 = ( beta30..37 ) ( beta38..3F ) // ymm12 ymm13 = ( beta40..47 ) ( beta48..4F ) // ymm14 ymm15 = ( beta50..57 ) ( beta58..5F ) mov(var(a11), rax) // load address of a11 mov(r11, rcx) // recall address of b11 mov(r14, rdx) // recall address of b11+8*cs_b // Note: rdi = rs_b // iteration 0 ------------- vbroadcastss(mem(0+0*6)*4(rax), ymm0) // ymm0 = (1/alpha00) vmulps(ymm0, ymm4, ymm4) // ymm4 *= (1/alpha00) vmulps(ymm0, ymm5, ymm5) // ymm5 *= (1/alpha00) vmovups(ymm4, mem(rcx)) // store ( beta00..beta07 ) = ymm4 vmovups(ymm5, mem(rdx)) // store ( beta08..beta0F ) = ymm5 add(rdi, rcx) // rcx += rs_b add(rdi, rdx) // rdx += rs_b // iteration 1 ------------- vbroadcastss(mem(1+0*6)*4(rax), ymm0) // ymm0 = alpha10 vbroadcastss(mem(1+1*6)*4(rax), ymm1) // ymm1 = (1/alpha11) vmulps(ymm0, ymm4, ymm2) // ymm2 = alpha10 * ymm4 vmulps(ymm0, ymm5, ymm3) // ymm3 = alpha10 * ymm5 vsubps(ymm2, ymm6, ymm6) // ymm6 -= ymm2 vsubps(ymm3, ymm7, ymm7) // ymm7 -= ymm3 vmulps(ymm6, ymm1, ymm6) // ymm6 *= (1/alpha11) vmulps(ymm7, ymm1, ymm7) // ymm7 *= (1/alpha11) vmovups(ymm6, mem(rcx)) // store ( beta10..beta17 ) = ymm6 vmovups(ymm7, mem(rdx)) // store ( beta18..beta1F ) = ymm7 add(rdi, rcx) // rcx += rs_b add(rdi, rdx) // rdx += rs_b // iteration 2 ------------- vbroadcastss(mem(2+0*6)*4(rax), ymm0) // ymm0 = alpha20 vbroadcastss(mem(2+1*6)*4(rax), ymm1) // ymm1 = alpha21 vmulps(ymm0, ymm4, ymm2) // ymm2 = alpha20 * ymm4 vmulps(ymm0, ymm5, ymm3) // ymm3 = alpha20 * ymm5 vbroadcastss(mem(2+2*6)*4(rax), ymm0) // ymm0 = (1/alpha22) vfmadd231ps(ymm1, ymm6, ymm2) // ymm2 += alpha21 * ymm6 vfmadd231ps(ymm1, ymm7, ymm3) // ymm3 += alpha21 * ymm7 vsubps(ymm2, ymm8, ymm8) // ymm8 -= ymm2 vsubps(ymm3, ymm9, ymm9) // ymm9 -= ymm3 vmulps(ymm8, ymm0, ymm8) // ymm8 *= (1/alpha22) vmulps(ymm9, ymm0, ymm9) // ymm9 *= (1/alpha22) vmovups(ymm8, mem(rcx)) // store ( beta20..beta27 ) = ymm8 vmovups(ymm9, mem(rdx)) // store ( beta28..beta2F ) = ymm9 add(rdi, rcx) // rcx += rs_b add(rdi, rdx) // rdx += rs_b // iteration 3 ------------- vbroadcastss(mem(3+0*6)*4(rax), ymm0) // ymm0 = alpha30 vbroadcastss(mem(3+1*6)*4(rax), ymm1) // ymm1 = alpha31 vmulps(ymm0, ymm4, ymm2) // ymm2 = alpha30 * ymm4 vmulps(ymm0, ymm5, ymm3) // ymm3 = alpha30 * ymm5 vbroadcastss(mem(3+2*6)*4(rax), ymm0) // ymm0 = alpha32 vfmadd231ps(ymm1, ymm6, ymm2) // ymm2 += alpha31 * ymm6 vfmadd231ps(ymm1, ymm7, ymm3) // ymm3 += alpha31 * ymm7 vbroadcastss(mem(3+3*6)*4(rax), ymm1) // ymm0 = (1/alpha33) vfmadd231ps(ymm0, ymm8, ymm2) // ymm2 += alpha32 * ymm8 vfmadd231ps(ymm0, ymm9, ymm3) // ymm3 += alpha32 * ymm9 vsubps(ymm2, ymm10, ymm10) // ymm10 -= ymm2 vsubps(ymm3, ymm11, ymm11) // ymm11 -= ymm3 vmulps(ymm10, ymm1, ymm10) // ymm10 *= (1/alpha33) vmulps(ymm11, ymm1, ymm11) // ymm11 *= (1/alpha33) vmovups(ymm10, mem(rcx)) // store ( beta30..beta37 ) = ymm10 vmovups(ymm11, mem(rdx)) // store ( beta38..beta3F ) = ymm11 add(rdi, rcx) // rcx += rs_b add(rdi, rdx) // rdx += rs_b // iteration 4 ------------- vbroadcastss(mem(4+0*6)*4(rax), ymm0) // ymm0 = alpha40 vbroadcastss(mem(4+1*6)*4(rax), ymm1) // ymm1 = alpha41 vmulps(ymm0, ymm4, ymm2) // ymm2 = alpha40 * ymm4 vmulps(ymm0, ymm5, ymm3) // ymm3 = alpha40 * ymm5 vbroadcastss(mem(4+2*6)*4(rax), ymm0) // ymm0 = alpha42 vfmadd231ps(ymm1, ymm6, ymm2) // ymm2 += alpha41 * ymm6 vfmadd231ps(ymm1, ymm7, ymm3) // ymm3 += alpha41 * ymm7 vbroadcastss(mem(4+3*6)*4(rax), ymm1) // ymm1 = alpha43 vfmadd231ps(ymm0, ymm8, ymm2) // ymm2 += alpha42 * ymm8 vfmadd231ps(ymm0, ymm9, ymm3) // ymm3 += alpha42 * ymm9 vbroadcastss(mem(4+4*6)*4(rax), ymm0) // ymm0 = (1/alpha44) vfmadd231ps(ymm1, ymm10, ymm2) // ymm2 += alpha43 * ymm10 vfmadd231ps(ymm1, ymm11, ymm3) // ymm3 += alpha43 * ymm11 vsubps(ymm2, ymm12, ymm12) // ymm12 -= ymm2 vsubps(ymm3, ymm13, ymm13) // ymm13 -= ymm3 vmulps(ymm12, ymm0, ymm12) // ymm12 *= (1/alpha44) vmulps(ymm13, ymm0, ymm13) // ymm13 *= (1/alpha44) vmovups(ymm12, mem(rcx)) // store ( beta40..beta47 ) = ymm12 vmovups(ymm13, mem(rdx)) // store ( beta48..beta4F ) = ymm13 add(rdi, rcx) // rcx += rs_b add(rdi, rdx) // rdx += rs_b // iteration 5 ------------- vbroadcastss(mem(5+0*6)*4(rax), ymm0) // ymm0 = alpha50 vbroadcastss(mem(5+1*6)*4(rax), ymm1) // ymm1 = alpha51 vmulps(ymm0, ymm4, ymm2) // ymm2 = alpha50 * ymm4 vmulps(ymm0, ymm5, ymm3) // ymm3 = alpha50 * ymm5 vbroadcastss(mem(5+2*6)*4(rax), ymm0) // ymm0 = alpha52 vfmadd231ps(ymm1, ymm6, ymm2) // ymm2 += alpha51 * ymm6 vfmadd231ps(ymm1, ymm7, ymm3) // ymm3 += alpha51 * ymm7 vbroadcastss(mem(5+3*6)*4(rax), ymm1) // ymm1 = alpha53 vfmadd231ps(ymm0, ymm8, ymm2) // ymm2 += alpha52 * ymm8 vfmadd231ps(ymm0, ymm9, ymm3) // ymm3 += alpha52 * ymm9 vbroadcastss(mem(5+4*6)*4(rax), ymm0) // ymm0 = alpha54 vfmadd231ps(ymm1, ymm10, ymm2) // ymm2 += alpha53 * ymm10 vfmadd231ps(ymm1, ymm11, ymm3) // ymm3 += alpha53 * ymm11 vbroadcastss(mem(5+5*6)*4(rax), ymm1) // ymm1 = (1/alpha55) vfmadd231ps(ymm0, ymm12, ymm2) // ymm2 += alpha54 * ymm12 vfmadd231ps(ymm0, ymm13, ymm3) // ymm3 += alpha54 * ymm13 vsubps(ymm2, ymm14, ymm14) // ymm14 -= ymm2 vsubps(ymm3, ymm15, ymm15) // ymm15 -= ymm3 vmulps(ymm14, ymm1, ymm14) // ymm14 *= (1/alpha55) vmulps(ymm15, ymm1, ymm15) // ymm15 *= (1/alpha55) vmovups(ymm14, mem(rcx)) // store ( beta50..beta57 ) = ymm14 vmovups(ymm15, mem(rdx)) // store ( beta58..beta5F ) = ymm15 add(rdi, rcx) // rcx += rs_b add(rdi, rdx) // rdx += rs_b mov(r8, rcx) // load address of c11 from r8 mov(r9, rdi) // load rs_c (in bytes) from r9 mov(r10, rsi) // load cs_c (in bytes) from r10 lea(mem(rcx, rsi, 8), rdx) // load address of c11 + 8*cs_c; lea(mem(rcx, rdi, 4), r14) // load address of c11 + 4*rs_c; // These are used in the macros below. lea(mem(rsi, rsi, 2), r13) // r13 = 3*cs_c; lea(mem(rsi, rsi, 4), r15) // r15 = 5*cs_c; lea(mem(r13, rsi, 4), r10) // r10 = 7*cs_c; cmp(imm(4), rsi) // set ZF if (4*cs_c) == 4. jz(.SROWSTORED) // jump to row storage case cmp(imm(4), rdi) // set ZF if (4*rs_c) == 4. jz(.SCOLSTORED) // jump to column storage case // if neither row- or column- // stored, use general case. label(.SGENSTORED) vmovaps(ymm4, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c11 += rs_c; vmovaps(ymm6, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c11 += rs_c; vmovaps(ymm8, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c11 += rs_c; vmovaps(ymm10, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c11 += rs_c; vmovaps(ymm12, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c11 += rs_c; vmovaps(ymm14, ymm0) SGEMM_OUTPUT_GS_BETA_NZ mov(rdx, rcx) // rcx = c11 + 8*cs_c vmovaps(ymm5, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c11 += rs_c; vmovaps(ymm7, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c11 += rs_c; vmovaps(ymm9, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c11 += rs_c; vmovaps(ymm11, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c11 += rs_c; vmovaps(ymm13, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c11 += rs_c; vmovaps(ymm15, ymm0) SGEMM_OUTPUT_GS_BETA_NZ jmp(.SDONE) label(.SROWSTORED) vmovups(ymm4, mem(rcx)) add(rdi, rcx) vmovups(ymm5, mem(rdx)) add(rdi, rdx) vmovups(ymm6, mem(rcx)) add(rdi, rcx) vmovups(ymm7, mem(rdx)) add(rdi, rdx) vmovups(ymm8, mem(rcx)) add(rdi, rcx) vmovups(ymm9, mem(rdx)) add(rdi, rdx) vmovups(ymm10, mem(rcx)) add(rdi, rcx) vmovups(ymm11, mem(rdx)) add(rdi, rdx) vmovups(ymm12, mem(rcx)) add(rdi, rcx) vmovups(ymm13, mem(rdx)) add(rdi, rdx) vmovups(ymm14, mem(rcx)) //add(rdi, rcx) vmovups(ymm15, mem(rdx)) //add(rdi, rdx) jmp(.SDONE) label(.SCOLSTORED) vunpcklps(ymm6, ymm4, ymm0) vunpcklps(ymm10, ymm8, ymm1) vshufps(imm(0x4e), ymm1, ymm0, ymm2) vblendps(imm(0xcc), ymm2, ymm0, ymm0) vblendps(imm(0x33), ymm2, ymm1, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm3) vmovups(xmm0, mem(rcx)) // store ( gamma00..gamma30 ) vmovups(xmm1, mem(rcx, rsi, 1)) // store ( gamma01..gamma31 ) vmovups(xmm2, mem(rcx, rsi, 4)) // store ( gamma04..gamma34 ) vmovups(xmm3, mem(rcx, r15, 1)) // store ( gamma05..gamma35 ) vunpckhps(ymm6, ymm4, ymm0) vunpckhps(ymm10, ymm8, ymm1) vshufps(imm(0x4e), ymm1, ymm0, ymm2) vblendps(imm(0xcc), ymm2, ymm0, ymm0) vblendps(imm(0x33), ymm2, ymm1, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm3) vmovups(xmm0, mem(rcx, rsi, 2)) // store ( gamma02..gamma32 ) vmovups(xmm1, mem(rcx, r13, 1)) // store ( gamma03..gamma33 ) vmovups(xmm2, mem(rcx, r13, 2)) // store ( gamma06..gamma36 ) vmovups(xmm3, mem(rcx, r10, 1)) // store ( gamma07..gamma37 ) lea(mem(rcx, rsi, 8), rcx) // rcx += 8*cs_c vunpcklps(ymm14, ymm12, ymm0) vunpckhps(ymm14, ymm12, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm3) vmovlpd(xmm0, mem(r14)) // store ( gamma40..gamma50 ) vmovhpd(xmm0, mem(r14, rsi, 1)) // store ( gamma41..gamma51 ) vmovlpd(xmm1, mem(r14, rsi, 2)) // store ( gamma42..gamma52 ) vmovhpd(xmm1, mem(r14, r13, 1)) // store ( gamma43..gamma53 ) vmovlpd(xmm2, mem(r14, rsi, 4)) // store ( gamma44..gamma54 ) vmovhpd(xmm2, mem(r14, r15, 1)) // store ( gamma45..gamma55 ) vmovlpd(xmm3, mem(r14, r13, 2)) // store ( gamma46..gamma56 ) vmovhpd(xmm3, mem(r14, r10, 1)) // store ( gamma47..gamma57 ) lea(mem(r14, rsi, 8), r14) // r14 += 8*cs_c vunpcklps(ymm7, ymm5, ymm0) vunpcklps(ymm11, ymm9, ymm1) vshufps(imm(0x4e), ymm1, ymm0, ymm2) vblendps(imm(0xcc), ymm2, ymm0, ymm0) vblendps(imm(0x33), ymm2, ymm1, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm3) vmovups(xmm0, mem(rcx)) // store ( gamma08..gamma38 ) vmovups(xmm1, mem(rcx, rsi, 1)) // store ( gamma09..gamma39 ) vmovups(xmm2, mem(rcx, rsi, 4)) // store ( gamma0C..gamma3C ) vmovups(xmm3, mem(rcx, r15, 1)) // store ( gamma0D..gamma3D ) vunpckhps(ymm7, ymm5, ymm0) vunpckhps(ymm11, ymm9, ymm1) vshufps(imm(0x4e), ymm1, ymm0, ymm2) vblendps(imm(0xcc), ymm2, ymm0, ymm0) vblendps(imm(0x33), ymm2, ymm1, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm3) vmovups(xmm0, mem(rcx, rsi, 2)) // store ( gamma0A..gamma3A ) vmovups(xmm1, mem(rcx, r13, 1)) // store ( gamma0B..gamma3B ) vmovups(xmm2, mem(rcx, r13, 2)) // store ( gamma0E..gamma3E ) vmovups(xmm3, mem(rcx, r10, 1)) // store ( gamma0F..gamma3F ) //lea(mem(rcx, rsi, 8), rcx) // rcx += 8*cs_c vunpcklps(ymm15, ymm13, ymm0) vunpckhps(ymm15, ymm13, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm3) vmovlpd(xmm0, mem(r14)) // store ( gamma48..gamma58 ) vmovhpd(xmm0, mem(r14, rsi, 1)) // store ( gamma49..gamma59 ) vmovlpd(xmm1, mem(r14, rsi, 2)) // store ( gamma4A..gamma5A ) vmovhpd(xmm1, mem(r14, r13, 1)) // store ( gamma4B..gamma5B ) vmovlpd(xmm2, mem(r14, rsi, 4)) // store ( gamma4C..gamma5C ) vmovhpd(xmm2, mem(r14, r15, 1)) // store ( gamma4D..gamma5D ) vmovlpd(xmm3, mem(r14, r13, 2)) // store ( gamma4E..gamma5E ) vmovhpd(xmm3, mem(r14, r10, 1)) // store ( gamma4F..gamma5F ) //lea(mem(r14, rsi, 8), r14) // r14 += 8*cs_c label(.SDONE) vzeroupper() end_asm( : // output operands (none) : // input operands [k_iter] "m" (k_iter), // 0 [k_left] "m" (k_left), // 1 [a10] "m" (a10), // 2 [b01] "m" (b01), // 3 [beta] "m" (beta), // 4 [alpha] "m" (alpha), // 5 [a11] "m" (a11), // 6 [b11] "m" (b11), // 7 [c11] "m" (c11), // 8 [rs_c] "m" (rs_c), // 9 [cs_c] "m" (cs_c) // 10 : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } #define DGEMM_OUTPUT_GS_BETA_NZ \ vextractf128(imm(1), ymm0, xmm1) \ vmovlpd(xmm0, mem(rcx)) \ vmovhpd(xmm0, mem(rcx, rsi, 1)) \ vmovlpd(xmm1, mem(rcx, rsi, 2)) \ vmovhpd(xmm1, mem(rcx, r13, 1)) /*\ vextractf128(imm(1), ymm2, xmm1) \ vmovlpd(xmm2, mem(rcx, rsi, 4)) \ vmovhpd(xmm2, mem(rcx, r15, 1)) \ vmovlpd(xmm1, mem(rcx, r13, 2)) \ vmovhpd(xmm1, mem(rcx, r10, 1))*/ void bli_dgemmtrsm_l_haswell_asm_6x8 ( dim_t k0, double* restrict alpha, double* restrict a10, double* restrict a11, double* restrict b01, double* restrict b11, double* restrict c11, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; double* beta = bli_dm1; begin_asm() vzeroall() // zero all xmm/ymm registers. mov(var(a10), rax) // load address of a. mov(var(b01), rbx) // load address of b. add(imm(32*4), rbx) // initialize loop by pre-loading vmovapd(mem(rbx, -4*32), ymm0) vmovapd(mem(rbx, -3*32), ymm1) mov(var(b11), rcx) // load address of b11 mov(imm(8), rdi) // set rs_b = PACKNR = 8 lea(mem(, rdi, 8), rdi) // rs_b *= sizeof(double) // NOTE: c11, rs_c, and cs_c aren't // needed for a while, but we load // them now to avoid stalling later. mov(var(c11), r8) // load address of c11 mov(var(rs_c), r9) // load rs_c lea(mem(, r9 , 8), r9) // rs_c *= sizeof(double) mov(var(k_left)0, r10) // load cs_c lea(mem(, r10, 8), r10) // cs_c *= sizeof(double) mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.DLOOPKITER) // MAIN LOOP // iteration 0 prefetch(0, mem(rax, 64*8)) vbroadcastsd(mem(rax, 0*8), ymm2) vbroadcastsd(mem(rax, 1*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, 2*8), ymm2) vbroadcastsd(mem(rax, 3*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, 4*8), ymm2) vbroadcastsd(mem(rax, 5*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) vmovapd(mem(rbx, -2*32), ymm0) vmovapd(mem(rbx, -1*32), ymm1) // iteration 1 vbroadcastsd(mem(rax, 6*8), ymm2) vbroadcastsd(mem(rax, 7*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, 8*8), ymm2) vbroadcastsd(mem(rax, 9*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, 10*8), ymm2) vbroadcastsd(mem(rax, 11*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) vmovapd(mem(rbx, 0*32), ymm0) vmovapd(mem(rbx, 1*32), ymm1) // iteration 2 prefetch(0, mem(rax, 76*8)) vbroadcastsd(mem(rax, 12*8), ymm2) vbroadcastsd(mem(rax, 13*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, 14*8), ymm2) vbroadcastsd(mem(rax, 15*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, 16*8), ymm2) vbroadcastsd(mem(rax, 17*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) vmovapd(mem(rbx, 2*32), ymm0) vmovapd(mem(rbx, 3*32), ymm1) // iteration 3 vbroadcastsd(mem(rax, 18*8), ymm2) vbroadcastsd(mem(rax, 19*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, 20*8), ymm2) vbroadcastsd(mem(rax, 21*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, 22*8), ymm2) vbroadcastsd(mem(rax, 23*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) add(imm(4*6*8), rax) // a += 4*6 (unroll x mr) add(imm(4*8*8), rbx) // b += 4*8 (unroll x nr) vmovapd(mem(rbx, -4*32), ymm0) vmovapd(mem(rbx, -3*32), ymm1) dec(rsi) // i -= 1; jne(.DLOOPKITER) // iterate again if i != 0. label(.DCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.DLOOPKLEFT) // EDGE LOOP prefetch(0, mem(rax, 64*8)) vbroadcastsd(mem(rax, 0*8), ymm2) vbroadcastsd(mem(rax, 1*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, 2*8), ymm2) vbroadcastsd(mem(rax, 3*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, 4*8), ymm2) vbroadcastsd(mem(rax, 5*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) add(imm(1*6*8), rax) // a += 1*6 (unroll x mr) add(imm(1*8*8), rbx) // b += 1*8 (unroll x nr) vmovapd(mem(rbx, -4*32), ymm0) vmovapd(mem(rbx, -3*32), ymm1) dec(rsi) // i -= 1; jne(.DLOOPKLEFT) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4..ymm15 = -a10 * b01 mov(var(alpha), rbx) // load address of alpha vbroadcastsd(mem(rbx), ymm3) // load alpha and duplicate mov(imm(1), rsi) // set cs_b = 1 lea(mem(, rsi, 8), rsi) // cs_b *= sizeof(double) lea(mem(rcx, rsi, 4), rdx) // load address of b11 + 4*cs_b mov(rcx, r11) // save rcx = b11 for later mov(rdx, r14) // save rdx = b11+4*cs_b for later // b11 := alpha * b11 - a10 * b01 vfmsub231pd(mem(rcx), ymm3, ymm4) add(rdi, rcx) vfmsub231pd(mem(rdx), ymm3, ymm5) add(rdi, rdx) vfmsub231pd(mem(rcx), ymm3, ymm6) add(rdi, rcx) vfmsub231pd(mem(rdx), ymm3, ymm7) add(rdi, rdx) vfmsub231pd(mem(rcx), ymm3, ymm8) add(rdi, rcx) vfmsub231pd(mem(rdx), ymm3, ymm9) add(rdi, rdx) vfmsub231pd(mem(rcx), ymm3, ymm10) add(rdi, rcx) vfmsub231pd(mem(rdx), ymm3, ymm11) add(rdi, rdx) vfmsub231pd(mem(rcx), ymm3, ymm12) add(rdi, rcx) vfmsub231pd(mem(rdx), ymm3, ymm13) add(rdi, rdx) vfmsub231pd(mem(rcx), ymm3, ymm14) //add(rdi, rcx) vfmsub231pd(mem(rdx), ymm3, ymm15) //add(rdi, rdx) // prefetch c11 #if 0 mov(r8, rcx) // load address of c11 from r8 // Note: r9 = rs_c * sizeof(double) lea(mem(r9 , r9 , 2), r13) // r13 = 3*rs_c; lea(mem(rcx, r13, 1), rdx) // rdx = c11 + 3*rs_c; prefetch(0, mem(rcx, 7*8)) // prefetch c11 + 0*rs_c prefetch(0, mem(rcx, r9, 1, 7*8)) // prefetch c11 + 1*rs_c prefetch(0, mem(rcx, r9 , 2, 7*8)) // prefetch c11 + 2*rs_c prefetch(0, mem(rdx, 7*8)) // prefetch c11 + 3*rs_c prefetch(0, mem(rdx, r9, 1, 7*8)) // prefetch c11 + 4*rs_c prefetch(0, mem(rdx, r9 , 2, 7*8)) // prefetch c11 + 5*rs_c #endif // trsm computation begins here // Note: contents of b11 are stored as // ymm4 ymm5 = ( beta00..03 ) ( beta04..07 ) // ymm6 ymm7 = ( beta10..13 ) ( beta14..17 ) // ymm8 ymm9 = ( beta20..23 ) ( beta24..27 ) // ymm10 ymm11 = ( beta30..33 ) ( beta34..37 ) // ymm12 ymm13 = ( beta40..43 ) ( beta44..47 ) // ymm14 ymm15 = ( beta50..53 ) ( beta54..57 ) mov(var(a11), rax) // load address of a11 mov(r11, rcx) // recall address of b11 mov(r14, rdx) // recall address of b11+4*cs_b // Note: rdi = rs_b // iteration 0 ------------- vbroadcastsd(mem(0+0*6)*8(rax), ymm0) // ymm0 = (1/alpha00) vmulpd(ymm0, ymm4, ymm4) // ymm4 *= (1/alpha00) vmulpd(ymm0, ymm5, ymm5) // ymm5 *= (1/alpha00) vmovupd(ymm4, mem(rcx)) // store ( beta00..beta03 ) = ymm4 vmovupd(ymm5, mem(rdx)) // store ( beta04..beta07 ) = ymm5 add(rdi, rcx) // rcx += rs_b add(rdi, rdx) // rdx += rs_b // iteration 1 ------------- vbroadcastsd(mem(1+0*6)*8(rax), ymm0) // ymm0 = alpha10 vbroadcastsd(mem(1+1*6)*8(rax), ymm1) // ymm1 = (1/alpha11) vmulpd(ymm0, ymm4, ymm2) // ymm2 = alpha10 * ymm4 vmulpd(ymm0, ymm5, ymm3) // ymm3 = alpha10 * ymm5 vsubpd(ymm2, ymm6, ymm6) // ymm6 -= ymm2 vsubpd(ymm3, ymm7, ymm7) // ymm7 -= ymm3 vmulpd(ymm6, ymm1, ymm6) // ymm6 *= (1/alpha11) vmulpd(ymm7, ymm1, ymm7) // ymm7 *= (1/alpha11) vmovupd(ymm6, mem(rcx)) // store ( beta10..beta13 ) = ymm6 vmovupd(ymm7, mem(rdx)) // store ( beta14..beta17 ) = ymm7 add(rdi, rcx) // rcx += rs_b add(rdi, rdx) // rdx += rs_b // iteration 2 ------------- vbroadcastsd(mem(2+0*6)*8(rax), ymm0) // ymm0 = alpha20 vbroadcastsd(mem(2+1*6)*8(rax), ymm1) // ymm1 = alpha21 vmulpd(ymm0, ymm4, ymm2) // ymm2 = alpha20 * ymm4 vmulpd(ymm0, ymm5, ymm3) // ymm3 = alpha20 * ymm5 vbroadcastsd(mem(2+2*6)*8(rax), ymm0) // ymm0 = (1/alpha22) vfmadd231pd(ymm1, ymm6, ymm2) // ymm2 += alpha21 * ymm6 vfmadd231pd(ymm1, ymm7, ymm3) // ymm3 += alpha21 * ymm7 vsubpd(ymm2, ymm8, ymm8) // ymm8 -= ymm2 vsubpd(ymm3, ymm9, ymm9) // ymm9 -= ymm3 vmulpd(ymm8, ymm0, ymm8) // ymm8 *= (1/alpha22) vmulpd(ymm9, ymm0, ymm9) // ymm9 *= (1/alpha22) vmovupd(ymm8, mem(rcx)) // store ( beta20..beta23 ) = ymm8 vmovupd(ymm9, mem(rdx)) // store ( beta24..beta27 ) = ymm9 add(rdi, rcx) // rcx += rs_b add(rdi, rdx) // rdx += rs_b // iteration 3 ------------- vbroadcastsd(mem(3+0*6)*8(rax), ymm0) // ymm0 = alpha30 vbroadcastsd(mem(3+1*6)*8(rax), ymm1) // ymm1 = alpha31 vmulpd(ymm0, ymm4, ymm2) // ymm2 = alpha30 * ymm4 vmulpd(ymm0, ymm5, ymm3) // ymm3 = alpha30 * ymm5 vbroadcastsd(mem(3+2*6)*8(rax), ymm0) // ymm0 = alpha32 vfmadd231pd(ymm1, ymm6, ymm2) // ymm2 += alpha31 * ymm6 vfmadd231pd(ymm1, ymm7, ymm3) // ymm3 += alpha31 * ymm7 vbroadcastsd(mem(3+3*6)*8(rax), ymm1) // ymm1 = (1/alpha33) vfmadd231pd(ymm0, ymm8, ymm2) // ymm2 += alpha32 * ymm8 vfmadd231pd(ymm0, ymm9, ymm3) // ymm3 += alpha32 * ymm9 vsubpd(ymm2, ymm10, ymm10) // ymm10 -= ymm2 vsubpd(ymm3, ymm11, ymm11) // ymm11 -= ymm3 vmulpd(ymm10, ymm1, ymm10) // ymm10 *= (1/alpha33) vmulpd(ymm11, ymm1, ymm11) // ymm11 *= (1/alpha33) vmovupd(ymm10, mem(rcx)) // store ( beta30..beta33 ) = ymm10 vmovupd(ymm11, mem(rdx)) // store ( beta34..beta37 ) = ymm11 add(rdi, rcx) // rcx += rs_b add(rdi, rdx) // rdx += rs_b // iteration 4 ------------- vbroadcastsd(mem(4+0*6)*8(rax), ymm0) // ymm0 = alpha40 vbroadcastsd(mem(4+1*6)*8(rax), ymm1) // ymm1 = alpha41 vmulpd(ymm0, ymm4, ymm2) // ymm2 = alpha40 * ymm4 vmulpd(ymm0, ymm5, ymm3) // ymm3 = alpha40 * ymm5 vbroadcastsd(mem(4+2*6)*8(rax), ymm0) // ymm0 = alpha42 vfmadd231pd(ymm1, ymm6, ymm2) // ymm2 += alpha41 * ymm6 vfmadd231pd(ymm1, ymm7, ymm3) // ymm3 += alpha41 * ymm7 vbroadcastsd(mem(4+3*6)*8(rax), ymm1) // ymm1 = alpha43 vfmadd231pd(ymm0, ymm8, ymm2) // ymm2 += alpha42 * ymm8 vfmadd231pd(ymm0, ymm9, ymm3) // ymm3 += alpha42 * ymm9 vbroadcastsd(mem(4+4*6)*8(rax), ymm0) // ymm4 = (1/alpha44) vfmadd231pd(ymm1, ymm10, ymm2) // ymm2 += alpha43 * ymm10 vfmadd231pd(ymm1, ymm11, ymm3) // ymm3 += alpha43 * ymm11 vsubpd(ymm2, ymm12, ymm12) // ymm12 -= ymm2 vsubpd(ymm3, ymm13, ymm13) // ymm13 -= ymm3 vmulpd(ymm12, ymm0, ymm12) // ymm12 *= (1/alpha44) vmulpd(ymm13, ymm0, ymm13) // ymm13 *= (1/alpha44) vmovupd(ymm12, mem(rcx)) // store ( beta40..beta43 ) = ymm12 vmovupd(ymm13, mem(rdx)) // store ( beta44..beta47 ) = ymm13 add(rdi, rcx) // rcx += rs_b add(rdi, rdx) // rdx += rs_b // iteration 5 ------------- vbroadcastsd(mem(5+0*6)*8(rax), ymm0) // ymm0 = alpha50 vbroadcastsd(mem(5+1*6)*8(rax), ymm1) // ymm1 = alpha51 vmulpd(ymm0, ymm4, ymm2) // ymm2 = alpha50 * ymm4 vmulpd(ymm0, ymm5, ymm3) // ymm3 = alpha50 * ymm5 vbroadcastsd(mem(5+2*6)*8(rax), ymm0) // ymm0 = alpha52 vfmadd231pd(ymm1, ymm6, ymm2) // ymm2 += alpha51 * ymm6 vfmadd231pd(ymm1, ymm7, ymm3) // ymm3 += alpha51 * ymm7 vbroadcastsd(mem(5+3*6)*8(rax), ymm1) // ymm1 = alpha53 vfmadd231pd(ymm0, ymm8, ymm2) // ymm2 += alpha52 * ymm8 vfmadd231pd(ymm0, ymm9, ymm3) // ymm3 += alpha52 * ymm9 vbroadcastsd(mem(5+4*6)*8(rax), ymm0) // ymm0 = alpha54 vfmadd231pd(ymm1, ymm10, ymm2) // ymm2 += alpha53 * ymm10 vfmadd231pd(ymm1, ymm11, ymm3) // ymm3 += alpha53 * ymm11 vbroadcastsd(mem(5+5*6)*8(rax), ymm1) // ymm1 = (1/alpha55) vfmadd231pd(ymm0, ymm12, ymm2) // ymm2 += alpha54 * ymm12 vfmadd231pd(ymm0, ymm13, ymm3) // ymm3 += alpha54 * ymm13 vsubpd(ymm2, ymm14, ymm14) // ymm14 -= ymm2 vsubpd(ymm3, ymm15, ymm15) // ymm15 -= ymm3 vmulpd(ymm14, ymm1, ymm14) // ymm14 *= (1/alpha55) vmulpd(ymm15, ymm1, ymm15) // ymm15 *= (1/alpha55) vmovupd(ymm14, mem(rcx)) // store ( beta50..beta53 ) = ymm14 vmovupd(ymm15, mem(rdx)) // store ( beta54..beta57 ) = ymm15 add(rdi, rcx) // rcx += rs_b add(rdi, rdx) // rdx += rs_b mov(r8, rcx) // load address of c11 from r8 mov(r9, rdi) // load rs_c (in bytes) from r9 mov(r10, rsi) // load cs_c (in bytes) from r10 lea(mem(rcx, rsi, 4), rdx) // load address of c11 + 4*cs_c; lea(mem(rcx, rdi, 4), r14) // load address of c11 + 4*rs_c; // These are used in the macros below. lea(mem(rsi, rsi, 2), r13) // r13 = 3*cs_c; //lea(mem(rsi, rsi, 4), r15) // r15 = 5*cs_c; //lea(mem(r13, rsi, 4), r10) // r10 = 7*cs_c; cmp(imm(8), rsi) // set ZF if (8*cs_c) == 8. jz(.DROWSTORED) // jump to row storage case cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORED) // jump to column storage case // if neither row- or column- // stored, use general case. label(.DGENSTORED) vmovapd(ymm4, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c11 += rs_c; vmovapd(ymm6, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c11 += rs_c; vmovapd(ymm8, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c11 += rs_c; vmovapd(ymm10, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c11 += rs_c; vmovapd(ymm12, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c11 += rs_c; vmovapd(ymm14, ymm0) DGEMM_OUTPUT_GS_BETA_NZ mov(rdx, rcx) // rcx = c11 + 4*cs_c vmovapd(ymm5, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c11 += rs_c; vmovapd(ymm7, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c11 += rs_c; vmovapd(ymm9, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c11 += rs_c; vmovapd(ymm11, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c11 += rs_c; vmovapd(ymm13, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c11 += rs_c; vmovapd(ymm15, ymm0) DGEMM_OUTPUT_GS_BETA_NZ jmp(.DDONE) label(.DROWSTORED) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vmovupd(ymm5, mem(rdx)) add(rdi, rdx) vmovupd(ymm6, mem(rcx)) add(rdi, rcx) vmovupd(ymm7, mem(rdx)) add(rdi, rdx) vmovupd(ymm8, mem(rcx)) add(rdi, rcx) vmovupd(ymm9, mem(rdx)) add(rdi, rdx) vmovupd(ymm10, mem(rcx)) add(rdi, rcx) vmovupd(ymm11, mem(rdx)) add(rdi, rdx) vmovupd(ymm12, mem(rcx)) add(rdi, rcx) vmovupd(ymm13, mem(rdx)) add(rdi, rdx) vmovupd(ymm14, mem(rcx)) //add(rdi, rcx) vmovupd(ymm15, mem(rdx)) //add(rdi, rdx) jmp(.DDONE) label(.DCOLSTORED) vunpcklpd(ymm6, ymm4, ymm0) vunpckhpd(ymm6, ymm4, ymm1) vunpcklpd(ymm10, ymm8, ymm2) vunpckhpd(ymm10, ymm8, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) vperm2f128(imm(0x31), ymm2, ymm0, ymm8) vperm2f128(imm(0x31), ymm3, ymm1, ymm10) vmovupd(ymm4, mem(rcx)) vmovupd(ymm6, mem(rcx, rsi, 1)) vmovupd(ymm8, mem(rcx, rsi, 2)) vmovupd(ymm10, mem(rcx, r13, 1)) lea(mem(rcx, rsi, 4), rcx) vunpcklpd(ymm14, ymm12, ymm0) vunpckhpd(ymm14, ymm12, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm3) vmovupd(xmm0, mem(r14)) vmovupd(xmm1, mem(r14, rsi, 1)) vmovupd(xmm2, mem(r14, rsi, 2)) vmovupd(xmm3, mem(r14, r13, 1)) lea(mem(r14, rsi, 4), r14) vunpcklpd(ymm7, ymm5, ymm0) vunpckhpd(ymm7, ymm5, ymm1) vunpcklpd(ymm11, ymm9, ymm2) vunpckhpd(ymm11, ymm9, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm5) vinsertf128(imm(0x1), xmm3, ymm1, ymm7) vperm2f128(imm(0x31), ymm2, ymm0, ymm9) vperm2f128(imm(0x31), ymm3, ymm1, ymm11) vmovupd(ymm5, mem(rcx)) vmovupd(ymm7, mem(rcx, rsi, 1)) vmovupd(ymm9, mem(rcx, rsi, 2)) vmovupd(ymm11, mem(rcx, r13, 1)) //lea(mem(rcx, rsi, 4), rcx) vunpcklpd(ymm15, ymm13, ymm0) vunpckhpd(ymm15, ymm13, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm3) vmovupd(xmm0, mem(r14)) vmovupd(xmm1, mem(r14, rsi, 1)) vmovupd(xmm2, mem(r14, rsi, 2)) vmovupd(xmm3, mem(r14, r13, 1)) //lea(mem(r14, rsi, 4), r14) label(.DDONE) vzeroupper() end_asm( : // output operands (none) : // input operands [k_iter] "m" (k_iter), // 0 [k_left] "m" (k_left), // 1 [a10] "m" (a10), // 2 [b01] "m" (b01), // 3 [beta] "m" (beta), // 4 [alpha] "m" (alpha), // 5 [a11] "m" (a11), // 6 [b11] "m" (b11), // 7 [c11] "m" (c11), // 8 [rs_c] "m" (rs_c), // 9 [cs_c] "m" (cs_c) // 10 : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } blis-0.6.1/kernels/haswell/3/bli_gemmtrsm_u_haswell_asm_d6x8.c000066400000000000000000001172471360743507500243420ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define BLIS_ASM_SYNTAX_ATT #include "bli_x86_asm_macros.h" #define SGEMM_OUTPUT_GS_BETA_NZ \ vextractf128(imm(1), ymm0, xmm2) \ vmovss(xmm0, mem(rcx)) \ vpermilps(imm(0x39), xmm0, xmm1) \ vmovss(xmm1, mem(rcx, rsi, 1)) \ vpermilps(imm(0x39), xmm1, xmm0) \ vmovss(xmm0, mem(rcx, rsi, 2)) \ vpermilps(imm(0x39), xmm0, xmm1) \ vmovss(xmm1, mem(rcx, r13, 1)) \ vmovss(xmm2, mem(rcx, rsi, 4)) \ vpermilps(imm(0x39), xmm2, xmm1) \ vmovss(xmm1, mem(rcx, r15, 1)) \ vpermilps(imm(0x39), xmm1, xmm2) \ vmovss(xmm2, mem(rcx, r13, 2)) \ vpermilps(imm(0x39), xmm2, xmm1) \ vmovss(xmm1, mem(rcx, r10, 1)) void bli_sgemmtrsm_u_haswell_asm_6x16 ( dim_t k0, float* restrict alpha, float* restrict a10, float* restrict a11, float* restrict b01, float* restrict b11, float* restrict c11, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; float* beta = bli_sm1; begin_asm() vzeroall() // zero all xmm/ymm registers. mov(var(a10), rax) // load address of a. mov(var(b01), rbx) // load address of b. add(imm(32*4), rbx) // initialize loop by pre-loading vmovaps(mem(rbx, -4*32), ymm0) vmovaps(mem(rbx, -3*32), ymm1) mov(var(b11), rcx) // load address of b11 mov(imm(16), rdi) // set rs_b = PACKNR = 16 lea(mem(, rdi, 4), rdi) // rs_b *= sizeof(float) // NOTE: c11, rs_c, and cs_c aren't // needed for a while, but we load // them now to avoid stalling later. mov(var(c11), r8) // load address of c11 mov(var(rs_c), r9) // load rs_c lea(mem(, r9 , 4), r9) // rs_c *= sizeof(float) mov(var(k_left)0, r10) // load cs_c lea(mem(, r10, 4), r10) // cs_c *= sizeof(float) mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.SCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.SLOOPKITER) // MAIN LOOP // iteration 0 prefetch(0, mem(rax, 64*4)) vbroadcastss(mem(rax, 0*4), ymm2) vbroadcastss(mem(rax, 1*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm4) vfmadd231ps(ymm1, ymm2, ymm5) vfmadd231ps(ymm0, ymm3, ymm6) vfmadd231ps(ymm1, ymm3, ymm7) vbroadcastss(mem(rax, 2*4), ymm2) vbroadcastss(mem(rax, 3*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm8) vfmadd231ps(ymm1, ymm2, ymm9) vfmadd231ps(ymm0, ymm3, ymm10) vfmadd231ps(ymm1, ymm3, ymm11) vbroadcastss(mem(rax, 4*4), ymm2) vbroadcastss(mem(rax, 5*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm12) vfmadd231ps(ymm1, ymm2, ymm13) vfmadd231ps(ymm0, ymm3, ymm14) vfmadd231ps(ymm1, ymm3, ymm15) vmovaps(mem(rbx, -2*32), ymm0) vmovaps(mem(rbx, -1*32), ymm1) // iteration 1 vbroadcastss(mem(rax, 6*4), ymm2) vbroadcastss(mem(rax, 7*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm4) vfmadd231ps(ymm1, ymm2, ymm5) vfmadd231ps(ymm0, ymm3, ymm6) vfmadd231ps(ymm1, ymm3, ymm7) vbroadcastss(mem(rax, 8*4), ymm2) vbroadcastss(mem(rax, 9*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm8) vfmadd231ps(ymm1, ymm2, ymm9) vfmadd231ps(ymm0, ymm3, ymm10) vfmadd231ps(ymm1, ymm3, ymm11) vbroadcastss(mem(rax, 10*4), ymm2) vbroadcastss(mem(rax, 11*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm12) vfmadd231ps(ymm1, ymm2, ymm13) vfmadd231ps(ymm0, ymm3, ymm14) vfmadd231ps(ymm1, ymm3, ymm15) vmovaps(mem(rbx, 0*32), ymm0) vmovaps(mem(rbx, 1*32), ymm1) // iteration 2 prefetch(0, mem(rax, 76*4)) vbroadcastss(mem(rax, 12*4), ymm2) vbroadcastss(mem(rax, 13*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm4) vfmadd231ps(ymm1, ymm2, ymm5) vfmadd231ps(ymm0, ymm3, ymm6) vfmadd231ps(ymm1, ymm3, ymm7) vbroadcastss(mem(rax, 14*4), ymm2) vbroadcastss(mem(rax, 15*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm8) vfmadd231ps(ymm1, ymm2, ymm9) vfmadd231ps(ymm0, ymm3, ymm10) vfmadd231ps(ymm1, ymm3, ymm11) vbroadcastss(mem(rax, 16*4), ymm2) vbroadcastss(mem(rax, 17*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm12) vfmadd231ps(ymm1, ymm2, ymm13) vfmadd231ps(ymm0, ymm3, ymm14) vfmadd231ps(ymm1, ymm3, ymm15) vmovaps(mem(rbx, 2*32), ymm0) vmovaps(mem(rbx, 3*32), ymm1) // iteration 3 vbroadcastss(mem(rax, 18*4), ymm2) vbroadcastss(mem(rax, 19*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm4) vfmadd231ps(ymm1, ymm2, ymm5) vfmadd231ps(ymm0, ymm3, ymm6) vfmadd231ps(ymm1, ymm3, ymm7) vbroadcastss(mem(rax, 20*4), ymm2) vbroadcastss(mem(rax, 21*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm8) vfmadd231ps(ymm1, ymm2, ymm9) vfmadd231ps(ymm0, ymm3, ymm10) vfmadd231ps(ymm1, ymm3, ymm11) vbroadcastss(mem(rax, 22*4), ymm2) vbroadcastss(mem(rax, 23*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm12) vfmadd231ps(ymm1, ymm2, ymm13) vfmadd231ps(ymm0, ymm3, ymm14) vfmadd231ps(ymm1, ymm3, ymm15) add(imm(4*6*4), rax) // a += 4*6 (unroll x mr) add(imm(4*16*4), rbx) // b += 4*16 (unroll x nr) vmovaps(mem(rbx, -4*32), ymm0) vmovaps(mem(rbx, -3*32), ymm1) dec(rsi) // i -= 1; jne(.SLOOPKITER) // iterate again if i != 0. label(.SCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.SPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.SLOOPKLEFT) // EDGE LOOP prefetch(0, mem(rax, 64*4)) vbroadcastss(mem(rax, 0*4), ymm2) vbroadcastss(mem(rax, 1*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm4) vfmadd231ps(ymm1, ymm2, ymm5) vfmadd231ps(ymm0, ymm3, ymm6) vfmadd231ps(ymm1, ymm3, ymm7) vbroadcastss(mem(rax, 2*4), ymm2) vbroadcastss(mem(rax, 3*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm8) vfmadd231ps(ymm1, ymm2, ymm9) vfmadd231ps(ymm0, ymm3, ymm10) vfmadd231ps(ymm1, ymm3, ymm11) vbroadcastss(mem(rax, 4*4), ymm2) vbroadcastss(mem(rax, 5*4), ymm3) vfmadd231ps(ymm0, ymm2, ymm12) vfmadd231ps(ymm1, ymm2, ymm13) vfmadd231ps(ymm0, ymm3, ymm14) vfmadd231ps(ymm1, ymm3, ymm15) add(imm(1*6*4), rax) // a += 1*6 (unroll x mr) add(imm(1*16*4), rbx) // b += 1*16 (unroll x nr) vmovaps(mem(rbx, -4*32), ymm0) vmovaps(mem(rbx, -3*32), ymm1) dec(rsi) // i -= 1; jne(.SLOOPKLEFT) // iterate again if i != 0. label(.SPOSTACCUM) // ymm4..ymm15 = -a10 * b01 mov(var(alpha), rbx) // load address of alpha vbroadcastss(mem(rbx), ymm3) // load alpha and duplicate mov(imm(1), rsi) // load cs_b = 1 lea(mem(, rsi, 4), rsi) // cs_b *= sizeof(float) lea(mem(rcx, rsi, 8), rdx) // load address of b11 + 8*cs_b mov(rcx, r11) // save rcx = b11 for later mov(rdx, r14) // save rdx = b11+8*cs_b for later // b11 := alpha * b11 - a10 * b01 vfmsub231ps(mem(rcx), ymm3, ymm4) add(rdi, rcx) vfmsub231ps(mem(rdx), ymm3, ymm5) add(rdi, rdx) vfmsub231ps(mem(rcx), ymm3, ymm6) add(rdi, rcx) vfmsub231ps(mem(rdx), ymm3, ymm7) add(rdi, rdx) vfmsub231ps(mem(rcx), ymm3, ymm8) add(rdi, rcx) vfmsub231ps(mem(rdx), ymm3, ymm9) add(rdi, rdx) vfmsub231ps(mem(rcx), ymm3, ymm10) add(rdi, rcx) vfmsub231ps(mem(rdx), ymm3, ymm11) add(rdi, rdx) vfmsub231ps(mem(rcx), ymm3, ymm12) add(rdi, rcx) vfmsub231ps(mem(rdx), ymm3, ymm13) add(rdi, rdx) vfmsub231ps(mem(rcx), ymm3, ymm14) //add(rdi, rcx) vfmsub231ps(mem(rdx), ymm3, ymm15) //add(rdi, rdx) // prefetch c11 #if 0 mov(r8, rcx) // load address of c11 from r8 // Note: r9 = rs_c * sizeof(float) lea(mem(r9 , r9 , 2), r13) // r13 = 3*rs_c; lea(mem(rcx, r13, 1), rdx) // rdx = c11 + 3*rs_c; prefetch(0, mem(rcx, 0*8)) // prefetch c11 + 0*rs_c prefetch(0, mem(rcx, r9, 1, 0*8)) // prefetch c11 + 1*rs_c prefetch(0, mem(rcx, r9 , 2, 0*8)) // prefetch c11 + 2*rs_c prefetch(0, mem(rdx, 0*8)) // prefetch c11 + 3*rs_c prefetch(0, mem(rdx, r9, 1, 0*8)) // prefetch c11 + 4*rs_c prefetch(0, mem(rdx, r9 , 2, 0*8)) // prefetch c11 + 5*rs_c #endif // trsm computation begins here // Note: contents of b11 are stored as // ymm4 ymm5 = ( beta00..07 ) ( beta08..0F ) // ymm6 ymm7 = ( beta10..17 ) ( beta18..1F ) // ymm8 ymm9 = ( beta20..27 ) ( beta28..2F ) // ymm10 ymm11 = ( beta30..37 ) ( beta38..3F ) // ymm12 ymm13 = ( beta40..47 ) ( beta48..4F ) // ymm14 ymm15 = ( beta50..57 ) ( beta58..5F ) mov(var(a11), rax) // load address of a11 mov(r11, rcx) // recall address of b11 mov(r14, rdx) // recall address of b11+8*cs_b lea(mem(rcx, rdi, 4), rcx) // rcx = b11 + (6-1)*rs_b lea(mem(rcx, rdi, 1), rcx) lea(mem(rdx, rdi, 4), rdx) // rdx = b11 + (6-1)*rs_b + 8*cs_b lea(mem(rdx, rdi, 1), rdx) // iteration 0 ------------- vbroadcastss(mem(5+5*6)*4(rax), ymm0) // ymm0 = (1/alpha55) vmulps(ymm0, ymm14, ymm14) // ymm14 *= (1/alpha55) vmulps(ymm0, ymm15, ymm15) // ymm15 *= (1/alpha55) vmovups(ymm14, mem(rcx)) // store ( beta50..beta57 ) = ymm14 vmovups(ymm15, mem(rdx)) // store ( beta58..beta5F ) = ymm15 sub(rdi, rcx) // rcx -= rs_b sub(rdi, rdx) // rdx -= rs_b // iteration 1 ------------- vbroadcastss(mem(4+5*6)*4(rax), ymm0) // ymm0 = alpha45 vbroadcastss(mem(4+4*6)*4(rax), ymm1) // ymm1 = (1/alpha44) vmulps(ymm0, ymm14, ymm2) // ymm2 = alpha45 * ymm14 vmulps(ymm0, ymm15, ymm3) // ymm3 = alpha45 * ymm15 vsubps(ymm2, ymm12, ymm12) // ymm12 -= ymm2 vsubps(ymm3, ymm13, ymm13) // ymm13 -= ymm3 vmulps(ymm12, ymm1, ymm12) // ymm12 *= (1/alpha44) vmulps(ymm13, ymm1, ymm13) // ymm13 *= (1/alpha44) vmovups(ymm12, mem(rcx)) // store ( beta40..beta47 ) = ymm12 vmovups(ymm13, mem(rdx)) // store ( beta48..beta4F ) = ymm13 sub(rdi, rcx) // rcx -= rs_b sub(rdi, rdx) // rdx -= rs_b // iteration 2 ------------- vbroadcastss(mem(3+5*6)*4(rax), ymm0) // ymm0 = alpha35 vbroadcastss(mem(3+4*6)*4(rax), ymm1) // ymm1 = alpha34 vmulps(ymm0, ymm14, ymm2) // ymm2 = alpha35 * ymm14 vmulps(ymm0, ymm15, ymm3) // ymm3 = alpha35 * ymm15 vbroadcastss(mem(3+3*6)*4(rax), ymm0) // ymm0 = (1/alpha33) vfmadd231ps(ymm1, ymm12, ymm2) // ymm2 += alpha34 * ymm12 vfmadd231ps(ymm1, ymm13, ymm3) // ymm3 += alpha34 * ymm13 vsubps(ymm2, ymm10, ymm10) // ymm10 -= ymm2 vsubps(ymm3, ymm11, ymm11) // ymm11 -= ymm3 vmulps(ymm10, ymm0, ymm10) // ymm10 *= (1/alpha33) vmulps(ymm11, ymm0, ymm11) // ymm11 *= (1/alpha33) vmovups(ymm10, mem(rcx)) // store ( beta30..beta37 ) = ymm10 vmovups(ymm11, mem(rdx)) // store ( beta38..beta3F ) = ymm11 sub(rdi, rcx) // rcx -= rs_b sub(rdi, rdx) // rdx -= rs_b // iteration 3 ------------- vbroadcastss(mem(2+5*6)*4(rax), ymm0) // ymm0 = alpha25 vbroadcastss(mem(2+4*6)*4(rax), ymm1) // ymm1 = alpha24 vmulps(ymm0, ymm14, ymm2) // ymm2 = alpha25 * ymm14 vmulps(ymm0, ymm15, ymm3) // ymm3 = alpha25 * ymm15 vbroadcastss(mem(2+3*6)*4(rax), ymm0) // ymm0 = alpha23 vfmadd231ps(ymm1, ymm12, ymm2) // ymm2 += alpha24 * ymm12 vfmadd231ps(ymm1, ymm13, ymm3) // ymm3 += alpha24 * ymm13 vbroadcastss(mem(2+2*6)*4(rax), ymm1) // ymm1 = (1/alpha22) vfmadd231ps(ymm0, ymm10, ymm2) // ymm2 += alpha23 * ymm10 vfmadd231ps(ymm0, ymm11, ymm3) // ymm3 += alpha23 * ymm11 vsubps(ymm2, ymm8, ymm8) // ymm8 -= ymm2 vsubps(ymm3, ymm9, ymm9) // ymm9 -= ymm3 vmulps(ymm8, ymm1, ymm8) // ymm8 *= (1/alpha33) vmulps(ymm9, ymm1, ymm9) // ymm9 *= (1/alpha33) vmovups(ymm8, mem(rcx)) // store ( beta20..beta27 ) = ymm8 vmovups(ymm9, mem(rdx)) // store ( beta28..beta2F ) = ymm9 sub(rdi, rcx) // rcx -= rs_b sub(rdi, rdx) // rdx -= rs_b // iteration 4 ------------- vbroadcastss(mem(1+5*6)*4(rax), ymm0) // ymm0 = alpha15 vbroadcastss(mem(1+4*6)*4(rax), ymm1) // ymm1 = alpha14 vmulps(ymm0, ymm14, ymm2) // ymm2 = alpha15 * ymm14 vmulps(ymm0, ymm15, ymm3) // ymm3 = alpha15 * ymm15 vbroadcastss(mem(1+3*6)*4(rax), ymm0) // ymm0 = alpha13 vfmadd231ps(ymm1, ymm12, ymm2) // ymm2 += alpha14 * ymm12 vfmadd231ps(ymm1, ymm13, ymm3) // ymm3 += alpha14 * ymm13 vbroadcastss(mem(1+2*6)*4(rax), ymm1) // ymm1 = alpha12 vfmadd231ps(ymm0, ymm10, ymm2) // ymm2 += alpha13 * ymm10 vfmadd231ps(ymm0, ymm11, ymm3) // ymm3 += alpha13 * ymm11 vbroadcastss(mem(1+1*6)*4(rax), ymm0) // ymm4 = (1/alpha11) vfmadd231ps(ymm1, ymm8, ymm2) // ymm2 += alpha12 * ymm8 vfmadd231ps(ymm1, ymm9, ymm3) // ymm3 += alpha12 * ymm9 vsubps(ymm2, ymm6, ymm6) // ymm6 -= ymm2 vsubps(ymm3, ymm7, ymm7) // ymm7 -= ymm3 vmulps(ymm6, ymm0, ymm6) // ymm6 *= (1/alpha44) vmulps(ymm7, ymm0, ymm7) // ymm7 *= (1/alpha44) vmovups(ymm6, mem(rcx)) // store ( beta10..beta17 ) = ymm6 vmovups(ymm7, mem(rdx)) // store ( beta18..beta1F ) = ymm7 sub(rdi, rcx) // rcx -= rs_b sub(rdi, rdx) // rdx -= rs_b // iteration 5 ------------- vbroadcastss(mem(0+5*6)*4(rax), ymm0) // ymm0 = alpha05 vbroadcastss(mem(0+4*6)*4(rax), ymm1) // ymm1 = alpha04 vmulps(ymm0, ymm14, ymm2) // ymm2 = alpha05 * ymm14 vmulps(ymm0, ymm15, ymm3) // ymm3 = alpha05 * ymm15 vbroadcastss(mem(0+3*6)*4(rax), ymm0) // ymm0 = alpha03 vfmadd231ps(ymm1, ymm12, ymm2) // ymm2 += alpha04 * ymm12 vfmadd231ps(ymm1, ymm13, ymm3) // ymm3 += alpha04 * ymm13 vbroadcastss(mem(0+2*6)*4(rax), ymm1) // ymm1 = alpha02 vfmadd231ps(ymm0, ymm10, ymm2) // ymm2 += alpha03 * ymm10 vfmadd231ps(ymm0, ymm11, ymm3) // ymm3 += alpha03 * ymm11 vbroadcastss(mem(0+1*6)*4(rax), ymm0) // ymm0 = alpha01 vfmadd231ps(ymm1, ymm8, ymm2) // ymm2 += alpha02 * ymm8 vfmadd231ps(ymm1, ymm9, ymm3) // ymm3 += alpha02 * ymm9 vbroadcastss(mem(0+0*6)*4(rax), ymm1) // ymm1 = (1/alpha00) vfmadd231ps(ymm0, ymm6, ymm2) // ymm2 += alpha01 * ymm6 vfmadd231ps(ymm0, ymm7, ymm3) // ymm3 += alpha01 * ymm7 vsubps(ymm2, ymm4, ymm4) // ymm4 -= ymm2 vsubps(ymm3, ymm5, ymm5) // ymm5 -= ymm3 vmulps(ymm4, ymm1, ymm4) // ymm4 *= (1/alpha00) vmulps(ymm5, ymm1, ymm5) // ymm5 *= (1/alpha00) vmovups(ymm4, mem(rcx)) // store ( beta00..beta07 ) = ymm4 vmovups(ymm5, mem(rdx)) // store ( beta08..beta0F ) = ymm5 sub(rdi, rcx) // rcx -= rs_b sub(rdi, rdx) // rdx -= rs_b mov(r8, rcx) // load address of c11 from r8 mov(r9, rdi) // load rs_c (in bytes) from r9 mov(r10, rsi) // load cs_c (in bytes) from r10 lea(mem(rcx, rsi, 8), rdx) // load address of c11 + 8*cs_c; lea(mem(rcx, rdi, 4), r14) // load address of c11 + 4*rs_c; // These are used in the macros below. lea(mem(rsi, rsi, 2), r13) // r13 = 3*cs_c; lea(mem(rsi, rsi, 4), r15) // r15 = 5*cs_c; lea(mem(r13, rsi, 4), r10) // r10 = 7*cs_c; cmp(imm(4), rsi) // set ZF if (4*cs_c) == 4. jz(.SROWSTORED) // jump to row storage case cmp(imm(4), rdi) // set ZF if (4*rs_c) == 4. jz(.SCOLSTORED) // jump to column storage case // if neither row- or column- // stored, use general case. label(.SGENSTORED) vmovaps(ymm4, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c11 += rs_c; vmovaps(ymm6, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c11 += rs_c; vmovaps(ymm8, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c11 += rs_c; vmovaps(ymm10, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c11 += rs_c; vmovaps(ymm12, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c11 += rs_c; vmovaps(ymm14, ymm0) SGEMM_OUTPUT_GS_BETA_NZ mov(rdx, rcx) // rcx = c11 + 8*cs_c vmovaps(ymm5, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c11 += rs_c; vmovaps(ymm7, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c11 += rs_c; vmovaps(ymm9, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c11 += rs_c; vmovaps(ymm11, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c11 += rs_c; vmovaps(ymm13, ymm0) SGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c11 += rs_c; vmovaps(ymm15, ymm0) SGEMM_OUTPUT_GS_BETA_NZ jmp(.SDONE) label(.SROWSTORED) vmovups(ymm4, mem(rcx)) add(rdi, rcx) vmovups(ymm5, mem(rdx)) add(rdi, rdx) vmovups(ymm6, mem(rcx)) add(rdi, rcx) vmovups(ymm7, mem(rdx)) add(rdi, rdx) vmovups(ymm8, mem(rcx)) add(rdi, rcx) vmovups(ymm9, mem(rdx)) add(rdi, rdx) vmovups(ymm10, mem(rcx)) add(rdi, rcx) vmovups(ymm11, mem(rdx)) add(rdi, rdx) vmovups(ymm12, mem(rcx)) add(rdi, rcx) vmovups(ymm13, mem(rdx)) add(rdi, rdx) vmovups(ymm14, mem(rcx)) //add(rdi, rcx) vmovups(ymm15, mem(rdx)) //add(rdi, rdx) jmp(.SDONE) label(.SCOLSTORED) vunpcklps(ymm6, ymm4, ymm0) vunpcklps(ymm10, ymm8, ymm1) vshufps(imm(0x4e), ymm1, ymm0, ymm2) vblendps(imm(0xcc), ymm2, ymm0, ymm0) vblendps(imm(0x33), ymm2, ymm1, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm3) vmovups(xmm0, mem(rcx)) // store ( gamma00..gamma30 ) vmovups(xmm1, mem(rcx, rsi, 1)) // store ( gamma01..gamma31 ) vmovups(xmm2, mem(rcx, rsi, 4)) // store ( gamma04..gamma34 ) vmovups(xmm3, mem(rcx, r15, 1)) // store ( gamma05..gamma35 ) vunpckhps(ymm6, ymm4, ymm0) vunpckhps(ymm10, ymm8, ymm1) vshufps(imm(0x4e), ymm1, ymm0, ymm2) vblendps(imm(0xcc), ymm2, ymm0, ymm0) vblendps(imm(0x33), ymm2, ymm1, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm3) vmovups(xmm0, mem(rcx, rsi, 2)) // store ( gamma02..gamma32 ) vmovups(xmm1, mem(rcx, r13, 1)) // store ( gamma03..gamma33 ) vmovups(xmm2, mem(rcx, r13, 2)) // store ( gamma06..gamma36 ) vmovups(xmm3, mem(rcx, r10, 1)) // store ( gamma07..gamma37 ) lea(mem(rcx, rsi, 8), rcx) // rcx += 8*cs_c vunpcklps(ymm14, ymm12, ymm0) vunpckhps(ymm14, ymm12, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm3) vmovlpd(xmm0, mem(r14)) // store ( gamma40..gamma50 ) vmovhpd(xmm0, mem(r14, rsi, 1)) // store ( gamma41..gamma51 ) vmovlpd(xmm1, mem(r14, rsi, 2)) // store ( gamma42..gamma52 ) vmovhpd(xmm1, mem(r14, r13, 1)) // store ( gamma43..gamma53 ) vmovlpd(xmm2, mem(r14, rsi, 4)) // store ( gamma44..gamma54 ) vmovhpd(xmm2, mem(r14, r15, 1)) // store ( gamma45..gamma55 ) vmovlpd(xmm3, mem(r14, r13, 2)) // store ( gamma46..gamma56 ) vmovhpd(xmm3, mem(r14, r10, 1)) // store ( gamma47..gamma57 ) lea(mem(r14, rsi, 8), r14) // r14 += 8*cs_c vunpcklps(ymm7, ymm5, ymm0) vunpcklps(ymm11, ymm9, ymm1) vshufps(imm(0x4e), ymm1, ymm0, ymm2) vblendps(imm(0xcc), ymm2, ymm0, ymm0) vblendps(imm(0x33), ymm2, ymm1, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm3) vmovups(xmm0, mem(rcx)) // store ( gamma08..gamma38 ) vmovups(xmm1, mem(rcx, rsi, 1)) // store ( gamma09..gamma39 ) vmovups(xmm2, mem(rcx, rsi, 4)) // store ( gamma0C..gamma3C ) vmovups(xmm3, mem(rcx, r15, 1)) // store ( gamma0D..gamma3D ) vunpckhps(ymm7, ymm5, ymm0) vunpckhps(ymm11, ymm9, ymm1) vshufps(imm(0x4e), ymm1, ymm0, ymm2) vblendps(imm(0xcc), ymm2, ymm0, ymm0) vblendps(imm(0x33), ymm2, ymm1, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm3) vmovups(xmm0, mem(rcx, rsi, 2)) // store ( gamma0A..gamma3A ) vmovups(xmm1, mem(rcx, r13, 1)) // store ( gamma0B..gamma3B ) vmovups(xmm2, mem(rcx, r13, 2)) // store ( gamma0E..gamma3E ) vmovups(xmm3, mem(rcx, r10, 1)) // store ( gamma0F..gamma3F ) //lea(mem(rcx, rsi, 8), rcx) // rcx += 8*cs_c vunpcklps(ymm15, ymm13, ymm0) vunpckhps(ymm15, ymm13, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm3) vmovlpd(xmm0, mem(r14)) // store ( gamma48..gamma58 ) vmovhpd(xmm0, mem(r14, rsi, 1)) // store ( gamma49..gamma59 ) vmovlpd(xmm1, mem(r14, rsi, 2)) // store ( gamma4A..gamma5A ) vmovhpd(xmm1, mem(r14, r13, 1)) // store ( gamma4B..gamma5B ) vmovlpd(xmm2, mem(r14, rsi, 4)) // store ( gamma4C..gamma5C ) vmovhpd(xmm2, mem(r14, r15, 1)) // store ( gamma4D..gamma5D ) vmovlpd(xmm3, mem(r14, r13, 2)) // store ( gamma4E..gamma5E ) vmovhpd(xmm3, mem(r14, r10, 1)) // store ( gamma4F..gamma5F ) //lea(mem(r14, rsi, 8), r14) // r14 += 8*cs_c label(.SDONE) vzeroupper() end_asm( : // output operands (none) : // input operands [k_iter] "m" (k_iter), // 0 [k_left] "m" (k_left), // 1 [a10] "m" (a10), // 2 [b01] "m" (b01), // 3 [beta] "m" (beta), // 4 [alpha] "m" (alpha), // 5 [a11] "m" (a11), // 6 [b11] "m" (b11), // 7 [c11] "m" (c11), // 8 [rs_c] "m" (rs_c), // 9 [cs_c] "m" (cs_c) // 10 : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } #define DGEMM_OUTPUT_GS_BETA_NZ \ vextractf128(imm(1), ymm0, xmm1) \ vmovlpd(xmm0, mem(rcx)) \ vmovhpd(xmm0, mem(rcx, rsi, 1)) \ vmovlpd(xmm1, mem(rcx, rsi, 2)) \ vmovhpd(xmm1, mem(rcx, r13, 1)) /*\ vextractf128(imm(1), ymm2, xmm1) \ vmovlpd(xmm2, mem(rcx, rsi, 4)) \ vmovhpd(xmm2, mem(rcx, r15, 1)) \ vmovlpd(xmm1, mem(rcx, r13, 2)) \ vmovhpd(xmm1, mem(rcx, r10, 1))*/ void bli_dgemmtrsm_u_haswell_asm_6x8 ( dim_t k0, double* restrict alpha, double* restrict a10, double* restrict a11, double* restrict b01, double* restrict b11, double* restrict c11, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; double* beta = bli_dm1; begin_asm() vzeroall() // zero all xmm/ymm registers. mov(var(a10), rax) // load address of a. mov(var(b01), rbx) // load address of b. add(imm(32*4), rbx) // initialize loop by pre-loading vmovapd(mem(rbx, -4*32), ymm0) vmovapd(mem(rbx, -3*32), ymm1) mov(var(b11), rcx) // load address of b11 mov(imm(8), rdi) // set rs_b = PACKNR = 8 lea(mem(, rdi, 8), rdi) // rs_b *= sizeof(double) // NOTE: c11, rs_c, and cs_c aren't // needed for a while, but we load // them now to avoid stalling later. mov(var(c11), r8) // load address of c11 mov(var(rs_c), r9) // load rs_c lea(mem(, r9 , 8), r9) // rs_c *= sizeof(double) mov(var(k_left)0, r10) // load cs_c lea(mem(, r10, 8), r10) // cs_c *= sizeof(double) mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.DLOOPKITER) // MAIN LOOP // iteration 0 prefetch(0, mem(rax, 64*8)) vbroadcastsd(mem(rax, 0*8), ymm2) vbroadcastsd(mem(rax, 1*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, 2*8), ymm2) vbroadcastsd(mem(rax, 3*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, 4*8), ymm2) vbroadcastsd(mem(rax, 5*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) vmovapd(mem(rbx, -2*32), ymm0) vmovapd(mem(rbx, -1*32), ymm1) // iteration 1 vbroadcastsd(mem(rax, 6*8), ymm2) vbroadcastsd(mem(rax, 7*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, 8*8), ymm2) vbroadcastsd(mem(rax, 9*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, 10*8), ymm2) vbroadcastsd(mem(rax, 11*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) vmovapd(mem(rbx, 0*32), ymm0) vmovapd(mem(rbx, 1*32), ymm1) // iteration 2 prefetch(0, mem(rax, 76*8)) vbroadcastsd(mem(rax, 12*8), ymm2) vbroadcastsd(mem(rax, 13*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, 14*8), ymm2) vbroadcastsd(mem(rax, 15*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, 16*8), ymm2) vbroadcastsd(mem(rax, 17*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) vmovapd(mem(rbx, 2*32), ymm0) vmovapd(mem(rbx, 3*32), ymm1) // iteration 3 vbroadcastsd(mem(rax, 18*8), ymm2) vbroadcastsd(mem(rax, 19*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, 20*8), ymm2) vbroadcastsd(mem(rax, 21*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, 22*8), ymm2) vbroadcastsd(mem(rax, 23*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) add(imm(4*6*8), rax) // a += 4*6 (unroll x mr) add(imm(4*8*8), rbx) // b += 4*8 (unroll x nr) vmovapd(mem(rbx, -4*32), ymm0) vmovapd(mem(rbx, -3*32), ymm1) dec(rsi) // i -= 1; jne(.DLOOPKITER) // iterate again if i != 0. label(.DCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.DLOOPKLEFT) // EDGE LOOP prefetch(0, mem(rax, 64*8)) vbroadcastsd(mem(rax, 0*8), ymm2) vbroadcastsd(mem(rax, 1*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, 2*8), ymm2) vbroadcastsd(mem(rax, 3*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, 4*8), ymm2) vbroadcastsd(mem(rax, 5*8), ymm3) vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) add(imm(1*6*8), rax) // a += 1*6 (unroll x mr) add(imm(1*8*8), rbx) // b += 1*8 (unroll x nr) vmovapd(mem(rbx, -4*32), ymm0) vmovapd(mem(rbx, -3*32), ymm1) dec(rsi) // i -= 1; jne(.DLOOPKLEFT) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4..ymm15 = -a10 * b01 mov(var(alpha), rbx) // load address of alpha vbroadcastsd(mem(rbx), ymm3) // load alpha and duplicate mov(imm(1), rsi) // set cs_b = 1 lea(mem(, rsi, 8), rsi) // cs_b *= sizeof(double) lea(mem(rcx, rsi, 4), rdx) // load address of b11 + 4*cs_b mov(rcx, r11) // save rcx = b11 for later mov(rdx, r14) // save rdx = b11+4*cs_b for later // b11 := alpha * b11 - a10 * b01 vfmsub231pd(mem(rcx), ymm3, ymm4) add(rdi, rcx) vfmsub231pd(mem(rdx), ymm3, ymm5) add(rdi, rdx) vfmsub231pd(mem(rcx), ymm3, ymm6) add(rdi, rcx) vfmsub231pd(mem(rdx), ymm3, ymm7) add(rdi, rdx) vfmsub231pd(mem(rcx), ymm3, ymm8) add(rdi, rcx) vfmsub231pd(mem(rdx), ymm3, ymm9) add(rdi, rdx) vfmsub231pd(mem(rcx), ymm3, ymm10) add(rdi, rcx) vfmsub231pd(mem(rdx), ymm3, ymm11) add(rdi, rdx) vfmsub231pd(mem(rcx), ymm3, ymm12) add(rdi, rcx) vfmsub231pd(mem(rdx), ymm3, ymm13) add(rdi, rdx) vfmsub231pd(mem(rcx), ymm3, ymm14) //add(rdi, rcx) vfmsub231pd(mem(rdx), ymm3, ymm15) //add(rdi, rdx) // prefetch c11 #if 0 mov(r8, rcx) // load address of c11 from r8 // Note: r9 = rs_c * sizeof(double) lea(mem(r9 , r9 , 2), r13) // r13 = 3*rs_c; lea(mem(rcx, r13, 1), rdx) // rdx = c11 + 3*rs_c; prefetch(0, mem(rcx, 7*8)) // prefetch c11 + 0*rs_c prefetch(0, mem(rcx, r9, 1, 7*8)) // prefetch c11 + 1*rs_c prefetch(0, mem(rcx, r9 , 2, 7*8)) // prefetch c11 + 2*rs_c prefetch(0, mem(rdx, 7*8)) // prefetch c11 + 3*rs_c prefetch(0, mem(rdx, r9, 1, 7*8)) // prefetch c11 + 4*rs_c prefetch(0, mem(rdx, r9 , 2, 7*8)) // prefetch c11 + 5*rs_c #endif // trsm computation begins here // Note: contents of b11 are stored as // ymm4 ymm5 = ( beta00..03 ) ( beta04..07 ) // ymm6 ymm7 = ( beta10..13 ) ( beta14..17 ) // ymm8 ymm9 = ( beta20..23 ) ( beta24..27 ) // ymm10 ymm11 = ( beta30..33 ) ( beta34..37 ) // ymm12 ymm13 = ( beta40..43 ) ( beta44..47 ) // ymm14 ymm15 = ( beta50..53 ) ( beta54..57 ) mov(var(a11), rax) // load address of a11 mov(r11, rcx) // recall address of b11 mov(r14, rdx) // recall address of b11+4*cs_b lea(mem(rcx, rdi, 4), rcx) // rcx = b11 + (6-1)*rs_b lea(mem(rcx, rdi, 1), rcx) lea(mem(rdx, rdi, 4), rdx) // rdx = b11 + (6-1)*rs_b + 4*cs_b lea(mem(rdx, rdi, 1), rdx) // iteration 0 ------------- vbroadcastsd(mem(5+5*6)*8(rax), ymm0) // ymm0 = (1/alpha55) vmulpd(ymm0, ymm14, ymm14) // ymm14 *= (1/alpha55) vmulpd(ymm0, ymm15, ymm15) // ymm15 *= (1/alpha55) vmovupd(ymm14, mem(rcx)) // store ( beta50..beta53 ) = ymm14 vmovupd(ymm15, mem(rdx)) // store ( beta54..beta57 ) = ymm15 sub(rdi, rcx) // rcx -= rs_b sub(rdi, rdx) // rdx -= rs_b // iteration 1 ------------- vbroadcastsd(mem(4+5*6)*8(rax), ymm0) // ymm0 = alpha45 vbroadcastsd(mem(4+4*6)*8(rax), ymm1) // ymm1 = (1/alpha44) vmulpd(ymm0, ymm14, ymm2) // ymm2 = alpha45 * ymm14 vmulpd(ymm0, ymm15, ymm3) // ymm3 = alpha45 * ymm15 vsubpd(ymm2, ymm12, ymm12) // ymm12 -= ymm2 vsubpd(ymm3, ymm13, ymm13) // ymm13 -= ymm3 vmulpd(ymm12, ymm1, ymm12) // ymm12 *= (1/alpha44) vmulpd(ymm13, ymm1, ymm13) // ymm13 *= (1/alpha44) vmovupd(ymm12, mem(rcx)) // store ( beta40..beta43 ) = ymm12 vmovupd(ymm13, mem(rdx)) // store ( beta44..beta47 ) = ymm13 sub(rdi, rcx) // rcx -= rs_b sub(rdi, rdx) // rdx -= rs_b // iteration 2 ------------- vbroadcastsd(mem(3+5*6)*8(rax), ymm0) // ymm0 = alpha35 vbroadcastsd(mem(3+4*6)*8(rax), ymm1) // ymm1 = alpha34 vmulpd(ymm0, ymm14, ymm2) // ymm2 = alpha35 * ymm14 vmulpd(ymm0, ymm15, ymm3) // ymm3 = alpha35 * ymm15 vbroadcastsd(mem(3+3*6)*8(rax), ymm0) // ymm0 = (1/alpha33) vfmadd231pd(ymm1, ymm12, ymm2) // ymm2 += alpha34 * ymm12 vfmadd231pd(ymm1, ymm13, ymm3) // ymm3 += alpha34 * ymm13 vsubpd(ymm2, ymm10, ymm10) // ymm10 -= ymm2 vsubpd(ymm3, ymm11, ymm11) // ymm11 -= ymm3 vmulpd(ymm10, ymm0, ymm10) // ymm10 *= (1/alpha33) vmulpd(ymm11, ymm0, ymm11) // ymm11 *= (1/alpha33) vmovupd(ymm10, mem(rcx)) // store ( beta30..beta33 ) = ymm10 vmovupd(ymm11, mem(rdx)) // store ( beta34..beta37 ) = ymm11 sub(rdi, rcx) // rcx -= rs_b sub(rdi, rdx) // rdx -= rs_b // iteration 3 ------------- vbroadcastsd(mem(2+5*6)*8(rax), ymm0) // ymm0 = alpha25 vbroadcastsd(mem(2+4*6)*8(rax), ymm1) // ymm1 = alpha24 vmulpd(ymm0, ymm14, ymm2) // ymm2 = alpha25 * ymm14 vmulpd(ymm0, ymm15, ymm3) // ymm3 = alpha25 * ymm15 vbroadcastsd(mem(2+3*6)*8(rax), ymm0) // ymm0 = alpha23 vfmadd231pd(ymm1, ymm12, ymm2) // ymm2 += alpha24 * ymm12 vfmadd231pd(ymm1, ymm13, ymm3) // ymm3 += alpha24 * ymm13 vbroadcastsd(mem(2+2*6)*8(rax), ymm1) // ymm1 = (1/alpha22) vfmadd231pd(ymm0, ymm10, ymm2) // ymm2 += alpha23 * ymm10 vfmadd231pd(ymm0, ymm11, ymm3) // ymm3 += alpha23 * ymm11 vsubpd(ymm2, ymm8, ymm8) // ymm8 -= ymm2 vsubpd(ymm3, ymm9, ymm9) // ymm9 -= ymm3 vmulpd(ymm8, ymm1, ymm8) // ymm8 *= (1/alpha33) vmulpd(ymm9, ymm1, ymm9) // ymm9 *= (1/alpha33) vmovupd(ymm8, mem(rcx)) // store ( beta20..beta23 ) = ymm8 vmovupd(ymm9, mem(rdx)) // store ( beta24..beta27 ) = ymm9 sub(rdi, rcx) // rcx -= rs_b sub(rdi, rdx) // rdx -= rs_b // iteration 4 ------------- vbroadcastsd(mem(1+5*6)*8(rax), ymm0) // ymm0 = alpha15 vbroadcastsd(mem(1+4*6)*8(rax), ymm1) // ymm1 = alpha14 vmulpd(ymm0, ymm14, ymm2) // ymm2 = alpha15 * ymm14 vmulpd(ymm0, ymm15, ymm3) // ymm3 = alpha15 * ymm15 vbroadcastsd(mem(1+3*6)*8(rax), ymm0) // ymm0 = alpha13 vfmadd231pd(ymm1, ymm12, ymm2) // ymm2 += alpha14 * ymm12 vfmadd231pd(ymm1, ymm13, ymm3) // ymm3 += alpha14 * ymm13 vbroadcastsd(mem(1+2*6)*8(rax), ymm1) // ymm1 = alpha12 vfmadd231pd(ymm0, ymm10, ymm2) // ymm2 += alpha13 * ymm10 vfmadd231pd(ymm0, ymm11, ymm3) // ymm3 += alpha13 * ymm11 vbroadcastsd(mem(1+1*6)*8(rax), ymm0) // ymm4 = (1/alpha11) vfmadd231pd(ymm1, ymm8, ymm2) // ymm2 += alpha12 * ymm8 vfmadd231pd(ymm1, ymm9, ymm3) // ymm3 += alpha12 * ymm9 vsubpd(ymm2, ymm6, ymm6) // ymm6 -= ymm2 vsubpd(ymm3, ymm7, ymm7) // ymm7 -= ymm3 vmulpd(ymm6, ymm0, ymm6) // ymm6 *= (1/alpha44) vmulpd(ymm7, ymm0, ymm7) // ymm7 *= (1/alpha44) vmovupd(ymm6, mem(rcx)) // store ( beta10..beta13 ) = ymm6 vmovupd(ymm7, mem(rdx)) // store ( beta14..beta17 ) = ymm7 sub(rdi, rcx) // rcx -= rs_b sub(rdi, rdx) // rdx -= rs_b // iteration 5 ------------- vbroadcastsd(mem(0+5*6)*8(rax), ymm0) // ymm0 = alpha05 vbroadcastsd(mem(0+4*6)*8(rax), ymm1) // ymm1 = alpha04 vmulpd(ymm0, ymm14, ymm2) // ymm2 = alpha05 * ymm14 vmulpd(ymm0, ymm15, ymm3) // ymm3 = alpha05 * ymm15 vbroadcastsd(mem(0+3*6)*8(rax), ymm0) // ymm0 = alpha03 vfmadd231pd(ymm1, ymm12, ymm2) // ymm2 += alpha04 * ymm12 vfmadd231pd(ymm1, ymm13, ymm3) // ymm3 += alpha04 * ymm13 vbroadcastsd(mem(0+2*6)*8(rax), ymm1) // ymm1 = alpha02 vfmadd231pd(ymm0, ymm10, ymm2) // ymm2 += alpha03 * ymm10 vfmadd231pd(ymm0, ymm11, ymm3) // ymm3 += alpha03 * ymm11 vbroadcastsd(mem(0+1*6)*8(rax), ymm0) // ymm0 = alpha01 vfmadd231pd(ymm1, ymm8, ymm2) // ymm2 += alpha02 * ymm8 vfmadd231pd(ymm1, ymm9, ymm3) // ymm3 += alpha02 * ymm9 vbroadcastsd(mem(0+0*6)*8(rax), ymm1) // ymm1 = (1/alpha00) vfmadd231pd(ymm0, ymm6, ymm2) // ymm2 += alpha01 * ymm6 vfmadd231pd(ymm0, ymm7, ymm3) // ymm3 += alpha01 * ymm7 vsubpd(ymm2, ymm4, ymm4) // ymm4 -= ymm2 vsubpd(ymm3, ymm5, ymm5) // ymm5 -= ymm3 vmulpd(ymm4, ymm1, ymm4) // ymm4 *= (1/alpha00) vmulpd(ymm5, ymm1, ymm5) // ymm5 *= (1/alpha00) vmovupd(ymm4, mem(rcx)) // store ( beta00..beta03 ) = ymm4 vmovupd(ymm5, mem(rdx)) // store ( beta04..beta07 ) = ymm5 sub(rdi, rcx) // rcx -= rs_b sub(rdi, rdx) // rdx -= rs_b mov(r8, rcx) // load address of c11 from r8 mov(r9, rdi) // load rs_c (in bytes) from r9 mov(r10, rsi) // load cs_c (in bytes) from r10 lea(mem(rcx, rsi, 4), rdx) // load address of c11 + 4*cs_c; lea(mem(rcx, rdi, 4), r14) // load address of c11 + 4*rs_c; // These are used in the macros below. lea(mem(rsi, rsi, 2), r13) // r13 = 3*cs_c; //lea(mem(rsi, rsi, 4), r15) // r15 = 5*cs_c; //lea(mem(r13, rsi, 4), r10) // r10 = 7*cs_c; cmp(imm(8), rsi) // set ZF if (8*cs_c) == 8. jz(.DROWSTORED) // jump to row storage case cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORED) // jump to column storage case // if neither row- or column- // stored, use general case. label(.DGENSTORED) vmovapd(ymm4, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c11 += rs_c; vmovapd(ymm6, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c11 += rs_c; vmovapd(ymm8, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c11 += rs_c; vmovapd(ymm10, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c11 += rs_c; vmovapd(ymm12, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c11 += rs_c; vmovapd(ymm14, ymm0) DGEMM_OUTPUT_GS_BETA_NZ mov(rdx, rcx) // rcx = c11 + 4*cs_c vmovapd(ymm5, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c11 += rs_c; vmovapd(ymm7, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c11 += rs_c; vmovapd(ymm9, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c11 += rs_c; vmovapd(ymm11, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c11 += rs_c; vmovapd(ymm13, ymm0) DGEMM_OUTPUT_GS_BETA_NZ add(rdi, rcx) // c11 += rs_c; vmovapd(ymm15, ymm0) DGEMM_OUTPUT_GS_BETA_NZ jmp(.DDONE) label(.DROWSTORED) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vmovupd(ymm5, mem(rdx)) add(rdi, rdx) vmovupd(ymm6, mem(rcx)) add(rdi, rcx) vmovupd(ymm7, mem(rdx)) add(rdi, rdx) vmovupd(ymm8, mem(rcx)) add(rdi, rcx) vmovupd(ymm9, mem(rdx)) add(rdi, rdx) vmovupd(ymm10, mem(rcx)) add(rdi, rcx) vmovupd(ymm11, mem(rdx)) add(rdi, rdx) vmovupd(ymm12, mem(rcx)) add(rdi, rcx) vmovupd(ymm13, mem(rdx)) add(rdi, rdx) vmovupd(ymm14, mem(rcx)) //add(rdi, rcx) vmovupd(ymm15, mem(rdx)) //add(rdi, rdx) jmp(.DDONE) label(.DCOLSTORED) vunpcklpd(ymm6, ymm4, ymm0) vunpckhpd(ymm6, ymm4, ymm1) vunpcklpd(ymm10, ymm8, ymm2) vunpckhpd(ymm10, ymm8, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) vperm2f128(imm(0x31), ymm2, ymm0, ymm8) vperm2f128(imm(0x31), ymm3, ymm1, ymm10) vmovupd(ymm4, mem(rcx)) vmovupd(ymm6, mem(rcx, rsi, 1)) vmovupd(ymm8, mem(rcx, rsi, 2)) vmovupd(ymm10, mem(rcx, r13, 1)) lea(mem(rcx, rsi, 4), rcx) vunpcklpd(ymm14, ymm12, ymm0) vunpckhpd(ymm14, ymm12, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm3) vmovupd(xmm0, mem(r14)) vmovupd(xmm1, mem(r14, rsi, 1)) vmovupd(xmm2, mem(r14, rsi, 2)) vmovupd(xmm3, mem(r14, r13, 1)) lea(mem(r14, rsi, 4), r14) vunpcklpd(ymm7, ymm5, ymm0) vunpckhpd(ymm7, ymm5, ymm1) vunpcklpd(ymm11, ymm9, ymm2) vunpckhpd(ymm11, ymm9, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm5) vinsertf128(imm(0x1), xmm3, ymm1, ymm7) vperm2f128(imm(0x31), ymm2, ymm0, ymm9) vperm2f128(imm(0x31), ymm3, ymm1, ymm11) vmovupd(ymm5, mem(rcx)) vmovupd(ymm7, mem(rcx, rsi, 1)) vmovupd(ymm9, mem(rcx, rsi, 2)) vmovupd(ymm11, mem(rcx, r13, 1)) //lea(mem(rcx, rsi, 4), rcx) vunpcklpd(ymm15, ymm13, ymm0) vunpckhpd(ymm15, ymm13, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm3) vmovupd(xmm0, mem(r14)) vmovupd(xmm1, mem(r14, rsi, 1)) vmovupd(xmm2, mem(r14, rsi, 2)) vmovupd(xmm3, mem(r14, r13, 1)) //lea(mem(r14, rsi, 4), r14) label(.DDONE) vzeroupper() end_asm( : // output operands (none) : // input operands [k_iter] "m" (k_iter), // 0 [k_left] "m" (k_left), // 1 [a10] "m" (a10), // 2 [b01] "m" (b01), // 3 [beta] "m" (beta), // 4 [alpha] "m" (alpha), // 5 [a11] "m" (a11), // 6 [b11] "m" (b11), // 7 [c11] "m" (c11), // 8 [rs_c] "m" (rs_c), // 9 [cs_c] "m" (cs_c) // 10 : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } blis-0.6.1/kernels/haswell/3/old/000077500000000000000000000000001360743507500165035ustar00rootroot00000000000000blis-0.6.1/kernels/haswell/3/old/bli_gemm_haswell_asm_d12x4.c000066400000000000000000002063101360743507500237250ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define SGEMM_INPUT_GS_BETA_NZ \ "vmovlps (%%rcx ), %%xmm0, %%xmm0 \n\t" \ "vmovhps (%%rcx,%%rsi,1), %%xmm0, %%xmm0 \n\t" \ "vmovlps (%%rcx,%%rsi,2), %%xmm1, %%xmm1 \n\t" \ "vmovhps (%%rcx,%%r13 ), %%xmm1, %%xmm1 \n\t" \ "vshufps $0x88, %%xmm1, %%xmm0, %%xmm0 \n\t" \ "vmovlps (%%rcx,%%rsi,4), %%xmm2, %%xmm2 \n\t" \ "vmovhps (%%rcx,%%r15 ), %%xmm2, %%xmm2 \n\t" \ /* We can't use vmovhps for loading the last element becauase that might result in reading beyond valid memory. (vmov[lh]psd load pairs of adjacent floats at a time.) So we need to use vmovss instead. But since we're limited to using ymm0 through ymm2 (ymm3 contains beta and ymm4 through ymm15 contain the microtile) and due to the way vmovss zeros out all bits above 31, we have to load element 7 before element 6. */ \ "vmovss (%%rcx,%%r10 ), %%xmm1 \n\t" \ "vpermilps $0xcf, %%xmm1, %%xmm1 \n\t" \ "vmovlps (%%rcx,%%r13,2), %%xmm1, %%xmm1 \n\t" \ /*"vmovhps (%%rcx,%%r10 ), %%xmm1, %%xmm1 \n\t"*/ \ "vshufps $0x88, %%xmm1, %%xmm2, %%xmm2 \n\t" \ "vperm2f128 $0x20, %%ymm2, %%ymm0, %%ymm0 \n\t" #define SGEMM_OUTPUT_GS_BETA_NZ \ "vextractf128 $1, %%ymm0, %%xmm2 \n\t" \ "vmovss %%xmm0, (%%rcx ) \n\t" \ "vpermilps $0x39, %%xmm0, %%xmm1 \n\t" \ "vmovss %%xmm1, (%%rcx,%%rsi,1) \n\t" \ "vpermilps $0x39, %%xmm1, %%xmm0 \n\t" \ "vmovss %%xmm0, (%%rcx,%%rsi,2) \n\t" \ "vpermilps $0x39, %%xmm0, %%xmm1 \n\t" \ "vmovss %%xmm1, (%%rcx,%%r13 ) \n\t" \ "vmovss %%xmm2, (%%rcx,%%rsi,4) \n\t" \ "vpermilps $0x39, %%xmm2, %%xmm1 \n\t" \ "vmovss %%xmm1, (%%rcx,%%r15 ) \n\t" \ "vpermilps $0x39, %%xmm1, %%xmm2 \n\t" \ "vmovss %%xmm2, (%%rcx,%%r13,2) \n\t" \ "vpermilps $0x39, %%xmm2, %%xmm1 \n\t" \ "vmovss %%xmm1, (%%rcx,%%r10 ) \n\t" void bli_sgemm_haswell_asm_24x4 ( dim_t k0, float* restrict alpha, float* restrict a, float* restrict b, float* restrict beta, float* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; __asm__ volatile ( " \n\t" "vzeroall \n\t" // zero all xmm/ymm registers. " \n\t" " \n\t" "movq %2, %%rax \n\t" // load address of a. "movq %3, %%rbx \n\t" // load address of b. //"movq %9, %%r15 \n\t" // load address of b_next. " \n\t" " \n\t" // initialize loop by pre-loading "vmovaps 0 * 32(%%rax), %%ymm0 \n\t" "vmovaps 1 * 32(%%rax), %%ymm1 \n\t" "vmovaps 2 * 32(%%rax), %%ymm2 \n\t" " \n\t" "movq %6, %%rcx \n\t" // load address of c "movq %8, %%rdi \n\t" // load cs_c "leaq (,%%rdi,4), %%rdi \n\t" // cs_c *= sizeof(float) " \n\t" "leaq (%%rdi,%%rdi,2), %%r13 \n\t" // r13 = 3*cs_c; "prefetcht0 7 * 4(%%rcx) \n\t" // prefetch c + 0*cs_c "prefetcht0 7 * 4(%%rcx,%%rdi) \n\t" // prefetch c + 1*cs_c "prefetcht0 7 * 4(%%rcx,%%rdi,2) \n\t" // prefetch c + 2*cs_c "prefetcht0 7 * 4(%%rcx,%%r13) \n\t" // prefetch c + 3*cs_c " \n\t" " \n\t" " \n\t" " \n\t" "movq %0, %%rsi \n\t" // i = k_iter; "testq %%rsi, %%rsi \n\t" // check i via logical AND. "je .SCONSIDKLEFT \n\t" // if i == 0, jump to code that " \n\t" // contains the k_left loop. " \n\t" " \n\t" ".SLOOPKITER: \n\t" // MAIN LOOP " \n\t" " \n\t" " \n\t" // iteration 0 "prefetcht0 16 * 32(%%rax) \n\t" " \n\t" "vbroadcastss 0 * 4(%%rbx), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm4 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm5 \n\t" "vfmadd231ps %%ymm2, %%ymm3, %%ymm6 \n\t" " \n\t" "vbroadcastss 1 * 4(%%rbx), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm7 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm8 \n\t" "vfmadd231ps %%ymm2, %%ymm3, %%ymm9 \n\t" " \n\t" "vbroadcastss 2 * 4(%%rbx), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm11 \n\t" "vfmadd231ps %%ymm2, %%ymm3, %%ymm12 \n\t" " \n\t" "vbroadcastss 3 * 4(%%rbx), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm13 \n\t" "vmovaps 3 * 32(%%rax), %%ymm0 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm14 \n\t" "vmovaps 4 * 32(%%rax), %%ymm1 \n\t" "vfmadd231ps %%ymm2, %%ymm3, %%ymm15 \n\t" "vmovaps 5 * 32(%%rax), %%ymm2 \n\t" " \n\t" " \n\t" " \n\t" " \n\t" // iteration 1 "vbroadcastss 4 * 4(%%rbx), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm4 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm5 \n\t" "vfmadd231ps %%ymm2, %%ymm3, %%ymm6 \n\t" " \n\t" "vbroadcastss 5 * 4(%%rbx), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm7 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm8 \n\t" "vfmadd231ps %%ymm2, %%ymm3, %%ymm9 \n\t" " \n\t" "vbroadcastss 6 * 4(%%rbx), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm11 \n\t" "vfmadd231ps %%ymm2, %%ymm3, %%ymm12 \n\t" " \n\t" "vbroadcastss 7 * 4(%%rbx), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm13 \n\t" "vmovaps 6 * 32(%%rax), %%ymm0 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm14 \n\t" "vmovaps 7 * 32(%%rax), %%ymm1 \n\t" "vfmadd231ps %%ymm2, %%ymm3, %%ymm15 \n\t" "vmovaps 8 * 32(%%rax), %%ymm2 \n\t" " \n\t" " \n\t" " \n\t" " \n\t" // iteration 2 "prefetcht0 22 * 32(%%rax) \n\t" " \n\t" "vbroadcastss 8 * 4(%%rbx), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm4 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm5 \n\t" "vfmadd231ps %%ymm2, %%ymm3, %%ymm6 \n\t" " \n\t" "vbroadcastss 9 * 4(%%rbx), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm7 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm8 \n\t" "vfmadd231ps %%ymm2, %%ymm3, %%ymm9 \n\t" " \n\t" "vbroadcastss 10 * 4(%%rbx), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm11 \n\t" "vfmadd231ps %%ymm2, %%ymm3, %%ymm12 \n\t" " \n\t" "vbroadcastss 11 * 4(%%rbx), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm13 \n\t" "vmovaps 9 * 32(%%rax), %%ymm0 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm14 \n\t" "vmovaps 10 * 32(%%rax), %%ymm1 \n\t" "vfmadd231ps %%ymm2, %%ymm3, %%ymm15 \n\t" "vmovaps 11 * 32(%%rax), %%ymm2 \n\t" " \n\t" " \n\t" " \n\t" " \n\t" // iteration 3 "vbroadcastss 12 * 4(%%rbx), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm4 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm5 \n\t" "vfmadd231ps %%ymm2, %%ymm3, %%ymm6 \n\t" " \n\t" "vbroadcastss 13 * 4(%%rbx), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm7 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm8 \n\t" "vfmadd231ps %%ymm2, %%ymm3, %%ymm9 \n\t" " \n\t" "vbroadcastss 14 * 4(%%rbx), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm11 \n\t" "vfmadd231ps %%ymm2, %%ymm3, %%ymm12 \n\t" " \n\t" "vbroadcastss 15 * 4(%%rbx), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm13 \n\t" "vmovaps 12 * 32(%%rax), %%ymm0 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm14 \n\t" "vmovaps 13 * 32(%%rax), %%ymm1 \n\t" "vfmadd231ps %%ymm2, %%ymm3, %%ymm15 \n\t" "vmovaps 14 * 32(%%rax), %%ymm2 \n\t" " \n\t" " \n\t" " \n\t" "addq $4 * 24 * 4, %%rax \n\t" // a += 4*24 (unroll x mr) "addq $4 * 4 * 4, %%rbx \n\t" // b += 4*4 (unroll x nr) " \n\t" " \n\t" "decq %%rsi \n\t" // i -= 1; "jne .SLOOPKITER \n\t" // iterate again if i != 0. " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" ".SCONSIDKLEFT: \n\t" " \n\t" "movq %1, %%rsi \n\t" // i = k_left; "testq %%rsi, %%rsi \n\t" // check i via logical AND. "je .SPOSTACCUM \n\t" // if i == 0, we're done; jump to end. " \n\t" // else, we prepare to enter k_left loop. " \n\t" " \n\t" ".SLOOPKLEFT: \n\t" // EDGE LOOP " \n\t" "prefetcht0 16 * 32(%%rax) \n\t" " \n\t" "vbroadcastss 0 * 4(%%rbx), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm4 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm5 \n\t" "vfmadd231ps %%ymm2, %%ymm3, %%ymm6 \n\t" " \n\t" "vbroadcastss 1 * 4(%%rbx), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm7 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm8 \n\t" "vfmadd231ps %%ymm2, %%ymm3, %%ymm9 \n\t" " \n\t" "vbroadcastss 2 * 4(%%rbx), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm11 \n\t" "vfmadd231ps %%ymm2, %%ymm3, %%ymm12 \n\t" " \n\t" "vbroadcastss 3 * 4(%%rbx), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm13 \n\t" "vmovaps 3 * 32(%%rax), %%ymm0 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm14 \n\t" "vmovaps 4 * 32(%%rax), %%ymm1 \n\t" "vfmadd231ps %%ymm2, %%ymm3, %%ymm15 \n\t" "vmovaps 5 * 32(%%rax), %%ymm2 \n\t" " \n\t" " \n\t" " \n\t" "addq $1 * 24 * 4, %%rax \n\t" // a += 1*24 (unroll x mr) "addq $1 * 4 * 4, %%rbx \n\t" // b += 1*4 (unroll x nr) " \n\t" " \n\t" "decq %%rsi \n\t" // i -= 1; "jne .SLOOPKLEFT \n\t" // iterate again if i != 0. " \n\t" " \n\t" " \n\t" ".SPOSTACCUM: \n\t" " \n\t" " \n\t" " \n\t" " \n\t" "movq %4, %%rax \n\t" // load address of alpha "movq %5, %%rbx \n\t" // load address of beta "vbroadcastss (%%rax), %%ymm0 \n\t" // load alpha and duplicate "vbroadcastss (%%rbx), %%ymm3 \n\t" // load beta and duplicate " \n\t" "vmulps %%ymm0, %%ymm4, %%ymm4 \n\t" // scale by alpha "vmulps %%ymm0, %%ymm5, %%ymm5 \n\t" "vmulps %%ymm0, %%ymm6, %%ymm6 \n\t" "vmulps %%ymm0, %%ymm7, %%ymm7 \n\t" "vmulps %%ymm0, %%ymm8, %%ymm8 \n\t" "vmulps %%ymm0, %%ymm9, %%ymm9 \n\t" "vmulps %%ymm0, %%ymm10, %%ymm10 \n\t" "vmulps %%ymm0, %%ymm11, %%ymm11 \n\t" "vmulps %%ymm0, %%ymm12, %%ymm12 \n\t" "vmulps %%ymm0, %%ymm13, %%ymm13 \n\t" "vmulps %%ymm0, %%ymm14, %%ymm14 \n\t" "vmulps %%ymm0, %%ymm15, %%ymm15 \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" "movq %7, %%rsi \n\t" // load rs_c "leaq (,%%rsi,4), %%rsi \n\t" // rsi = rs_c * sizeof(float) " \n\t" "leaq (%%rcx,%%rsi,8), %%rdx \n\t" // rdx = c + 8*rs_c; "leaq (%%rdx,%%rsi,8), %%r12 \n\t" // r12 = c + 16*rs_c; " \n\t" "leaq (%%rsi,%%rsi,2), %%r13 \n\t" // r13 = 3*rs_c; "leaq (%%rsi,%%rsi,4), %%r15 \n\t" // r15 = 5*rs_c; "leaq (%%r13,%%rsi,4), %%r10 \n\t" // r10 = 7*rs_c; " \n\t" " \n\t" " \n\t" " \n\t" // determine if " \n\t" // c % 32 == 0, AND " \n\t" // 8*cs_c % 32 == 0, AND " \n\t" // rs_c == 1 " \n\t" // ie: aligned, ldim aligned, and " \n\t" // column-stored " \n\t" "cmpq $4, %%rsi \n\t" // set ZF if (4*rs_c) == 4. "sete %%bl \n\t" // bl = ( ZF == 1 ? 1 : 0 ); "testq $31, %%rcx \n\t" // set ZF if c & 32 is zero. "setz %%bh \n\t" // bh = ( ZF == 0 ? 1 : 0 ); "testq $31, %%rdi \n\t" // set ZF if (4*cs_c) & 32 is zero. "setz %%al \n\t" // al = ( ZF == 0 ? 1 : 0 ); " \n\t" // and(bl,bh) followed by " \n\t" // and(bh,al) will reveal result " \n\t" " \n\t" // now avoid loading C if beta == 0 " \n\t" "vxorps %%ymm0, %%ymm0, %%ymm0 \n\t" // set ymm0 to zero. "vucomiss %%xmm0, %%xmm3 \n\t" // set ZF if beta == 0. "je .SBETAZERO \n\t" // if ZF = 1, jump to beta == 0 case " \n\t" " \n\t" " \n\t" // check if aligned/column-stored "andb %%bl, %%bh \n\t" // set ZF if bl & bh == 1. "andb %%bh, %%al \n\t" // set ZF if bh & al == 1. "jne .SCOLSTORED \n\t" // jump to column storage case " \n\t" " \n\t" " \n\t" ".SGENSTORED: \n\t" " \n\t" " \n\t" SGEMM_INPUT_GS_BETA_NZ "vfmadd213ps %%ymm4, %%ymm3, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" SGEMM_INPUT_GS_BETA_NZ "vfmadd213ps %%ymm7, %%ymm3, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" SGEMM_INPUT_GS_BETA_NZ "vfmadd213ps %%ymm10, %%ymm3, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" SGEMM_INPUT_GS_BETA_NZ "vfmadd213ps %%ymm13, %%ymm3, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ //"addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "movq %%rdx, %%rcx \n\t" // rcx = c + 8*rs_c " \n\t" " \n\t" SGEMM_INPUT_GS_BETA_NZ "vfmadd213ps %%ymm5, %%ymm3, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" SGEMM_INPUT_GS_BETA_NZ "vfmadd213ps %%ymm8, %%ymm3, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" SGEMM_INPUT_GS_BETA_NZ "vfmadd213ps %%ymm11, %%ymm3, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" SGEMM_INPUT_GS_BETA_NZ "vfmadd213ps %%ymm14, %%ymm3, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ //"addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "movq %%r12, %%rcx \n\t" // rcx = c + 16*rs_c " \n\t" " \n\t" SGEMM_INPUT_GS_BETA_NZ "vfmadd213ps %%ymm6, %%ymm3, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" SGEMM_INPUT_GS_BETA_NZ "vfmadd213ps %%ymm9, %%ymm3, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" SGEMM_INPUT_GS_BETA_NZ "vfmadd213ps %%ymm12, %%ymm3, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" SGEMM_INPUT_GS_BETA_NZ "vfmadd213ps %%ymm15, %%ymm3, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ //"addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" "jmp .SDONE \n\t" // jump to end. " \n\t" " \n\t" " \n\t" ".SCOLSTORED: \n\t" " \n\t" " \n\t" "vmovaps (%%rcx), %%ymm0 \n\t" "vfmadd213ps %%ymm4, %%ymm3, %%ymm0 \n\t" "vmovaps %%ymm0, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vmovaps (%%rdx), %%ymm1 \n\t" "vfmadd213ps %%ymm5, %%ymm3, %%ymm1 \n\t" "vmovaps %%ymm1, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" "vmovaps (%%r12), %%ymm2 \n\t" "vfmadd213ps %%ymm6, %%ymm3, %%ymm2 \n\t" "vmovaps %%ymm2, (%%r12) \n\t" "addq %%rdi, %%r12 \n\t" " \n\t" " \n\t" "vmovaps (%%rcx), %%ymm0 \n\t" "vfmadd213ps %%ymm7, %%ymm3, %%ymm0 \n\t" "vmovaps %%ymm0, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vmovaps (%%rdx), %%ymm1 \n\t" "vfmadd213ps %%ymm8, %%ymm3, %%ymm1 \n\t" "vmovaps %%ymm1, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" "vmovaps (%%r12), %%ymm2 \n\t" "vfmadd213ps %%ymm9, %%ymm3, %%ymm2 \n\t" "vmovaps %%ymm2, (%%r12) \n\t" "addq %%rdi, %%r12 \n\t" " \n\t" " \n\t" "vmovaps (%%rcx), %%ymm0 \n\t" "vfmadd213ps %%ymm10, %%ymm3, %%ymm0 \n\t" "vmovaps %%ymm0, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vmovaps (%%rdx), %%ymm1 \n\t" "vfmadd213ps %%ymm11, %%ymm3, %%ymm1 \n\t" "vmovaps %%ymm1, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" "vmovaps (%%r12), %%ymm2 \n\t" "vfmadd213ps %%ymm12, %%ymm3, %%ymm2 \n\t" "vmovaps %%ymm2, (%%r12) \n\t" "addq %%rdi, %%r12 \n\t" " \n\t" " \n\t" "vmovaps (%%rcx), %%ymm0 \n\t" "vfmadd213ps %%ymm13, %%ymm3, %%ymm0 \n\t" "vmovaps %%ymm0, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vmovaps (%%rdx), %%ymm1 \n\t" "vfmadd213ps %%ymm14, %%ymm3, %%ymm1 \n\t" "vmovaps %%ymm1, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" "vmovaps (%%r12), %%ymm2 \n\t" "vfmadd213ps %%ymm15, %%ymm3, %%ymm2 \n\t" "vmovaps %%ymm2, (%%r12) \n\t" "addq %%rdi, %%r12 \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" "jmp .SDONE \n\t" // jump to end. " \n\t" " \n\t" " \n\t" ".SBETAZERO: \n\t" " \n\t" // check if aligned/column-stored "andb %%bl, %%bh \n\t" // set ZF if bl & bh == 1. "andb %%bh, %%al \n\t" // set ZF if bh & al == 1. "jne .SCOLSTORBZ \n\t" // jump to column storage case " \n\t" " \n\t" " \n\t" ".SGENSTORBZ: \n\t" " \n\t" " \n\t" "vmovaps %%ymm4, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "vmovaps %%ymm7, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "vmovaps %%ymm10, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "vmovaps %%ymm13, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ //"addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "movq %%rdx, %%rcx \n\t" // rcx = c + 8*rs_c " \n\t" " \n\t" "vmovaps %%ymm5, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "vmovaps %%ymm8, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "vmovaps %%ymm11, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "vmovaps %%ymm14, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ //"addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "movq %%r12, %%rcx \n\t" // rcx = c + 16*rs_c " \n\t" " \n\t" "vmovaps %%ymm6, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "vmovaps %%ymm9, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "vmovaps %%ymm12, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "vmovaps %%ymm15, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ //"addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" "jmp .SDONE \n\t" // jump to end. " \n\t" " \n\t" " \n\t" ".SCOLSTORBZ: \n\t" " \n\t" " \n\t" "vmovaps %%ymm4, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vmovaps %%ymm5, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" "vmovaps %%ymm6, (%%r12) \n\t" "addq %%rdi, %%r12 \n\t" " \n\t" "vmovaps %%ymm7, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vmovaps %%ymm8, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" "vmovaps %%ymm9, (%%r12) \n\t" "addq %%rdi, %%r12 \n\t" " \n\t" "vmovaps %%ymm10, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vmovaps %%ymm11, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" "vmovaps %%ymm12, (%%r12) \n\t" "addq %%rdi, %%r12 \n\t" " \n\t" "vmovaps %%ymm13, (%%rcx) \n\t" //"addq %%rdi, %%rcx \n\t" "vmovaps %%ymm14, (%%rdx) \n\t" //"addq %%rdi, %%rdx \n\t" "vmovaps %%ymm15, (%%r12) \n\t" //"addq %%rdi, %%r12 \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" ".SDONE: \n\t" " \n\t" "vzeroupper \n\t" " \n\t" : // output operands (none) : // input operands "m" (k_iter), // 0 "m" (k_left), // 1 "m" (a), // 2 "m" (b), // 3 "m" (alpha), // 4 "m" (beta), // 5 "m" (c), // 6 "m" (rs_c), // 7 "m" (cs_c)/*, // 8 "m" (b_next), // 9 "m" (a_next)*/ // 10 : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ); } #define DGEMM_INPUT_GS_BETA_NZ \ "vmovlpd (%%rcx ), %%xmm0, %%xmm0 \n\t" \ "vmovhpd (%%rcx,%%rsi,1), %%xmm0, %%xmm0 \n\t" \ "vmovlpd (%%rcx,%%rsi,2), %%xmm1, %%xmm1 \n\t" \ "vmovhpd (%%rcx,%%r13 ), %%xmm1, %%xmm1 \n\t" \ "vperm2f128 $0x20, %%ymm1, %%ymm0, %%ymm0 \n\t" /*\ "vmovlps (%%rcx,%%rsi,4), %%xmm2, %%xmm2 \n\t" \ "vmovhps (%%rcx,%%r15 ), %%xmm2, %%xmm2 \n\t" \ "vmovlps (%%rcx,%%r13,2), %%xmm1, %%xmm1 \n\t" \ "vmovhps (%%rcx,%%r10 ), %%xmm1, %%xmm1 \n\t" \ "vperm2f128 $0x20, %%ymm1, %%ymm2, %%ymm2 \n\t"*/ #define DGEMM_OUTPUT_GS_BETA_NZ \ "vextractf128 $1, %%ymm0, %%xmm1 \n\t" \ "vmovlpd %%xmm0, (%%rcx ) \n\t" \ "vmovhpd %%xmm0, (%%rcx,%%rsi ) \n\t" \ "vmovlpd %%xmm1, (%%rcx,%%rsi,2) \n\t" \ "vmovhpd %%xmm1, (%%rcx,%%r13 ) \n\t" /*\ "vextractf128 $1, %%ymm2, %%xmm1 \n\t" \ "vmovlpd %%xmm2, (%%rcx,%%rsi,4) \n\t" \ "vmovhpd %%xmm2, (%%rcx,%%r15 ) \n\t" \ "vmovlpd %%xmm1, (%%rcx,%%r13,2) \n\t" \ "vmovhpd %%xmm1, (%%rcx,%%r10 ) \n\t"*/ void bli_dgemm_haswell_asm_12x4 ( dim_t k0, double* restrict alpha, double* restrict a, double* restrict b, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; __asm__ volatile ( " \n\t" "vzeroall \n\t" // zero all xmm/ymm registers. " \n\t" " \n\t" "movq %2, %%rax \n\t" // load address of a. "movq %3, %%rbx \n\t" // load address of b. //"movq %9, %%r15 \n\t" // load address of b_next. " \n\t" " \n\t" // initialize loop by pre-loading "vmovapd 0 * 32(%%rax), %%ymm0 \n\t" "vmovapd 1 * 32(%%rax), %%ymm1 \n\t" "vmovapd 2 * 32(%%rax), %%ymm2 \n\t" " \n\t" "movq %6, %%rcx \n\t" // load address of c "movq %8, %%rdi \n\t" // load cs_c "leaq (,%%rdi,8), %%rdi \n\t" // cs_c *= sizeof(double) " \n\t" "leaq (%%rdi,%%rdi,2), %%r13 \n\t" // r13 = 3*cs_c; "prefetcht0 7 * 8(%%rcx) \n\t" // prefetch c + 0*cs_c "prefetcht0 7 * 8(%%rcx,%%rdi) \n\t" // prefetch c + 1*cs_c "prefetcht0 7 * 8(%%rcx,%%rdi,2) \n\t" // prefetch c + 2*cs_c "prefetcht0 7 * 8(%%rcx,%%r13) \n\t" // prefetch c + 3*cs_c " \n\t" " \n\t" " \n\t" " \n\t" "movq %0, %%rsi \n\t" // i = k_iter; "testq %%rsi, %%rsi \n\t" // check i via logical AND. "je .DCONSIDKLEFT \n\t" // if i == 0, jump to code that " \n\t" // contains the k_left loop. " \n\t" " \n\t" ".DLOOPKITER: \n\t" // MAIN LOOP " \n\t" " \n\t" " \n\t" // iteration 0 "prefetcht0 16 * 32(%%rax) \n\t" " \n\t" "vbroadcastsd 0 * 8(%%rbx), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm4 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm5 \n\t" "vfmadd231pd %%ymm2, %%ymm3, %%ymm6 \n\t" " \n\t" "vbroadcastsd 1 * 8(%%rbx), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm7 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm8 \n\t" "vfmadd231pd %%ymm2, %%ymm3, %%ymm9 \n\t" " \n\t" "vbroadcastsd 2 * 8(%%rbx), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm11 \n\t" "vfmadd231pd %%ymm2, %%ymm3, %%ymm12 \n\t" " \n\t" "vbroadcastsd 3 * 8(%%rbx), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm13 \n\t" "vmovapd 3 * 32(%%rax), %%ymm0 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm14 \n\t" "vmovapd 4 * 32(%%rax), %%ymm1 \n\t" "vfmadd231pd %%ymm2, %%ymm3, %%ymm15 \n\t" "vmovapd 5 * 32(%%rax), %%ymm2 \n\t" " \n\t" " \n\t" " \n\t" " \n\t" // iteration 1 "vbroadcastsd 4 * 8(%%rbx), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm4 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm5 \n\t" "vfmadd231pd %%ymm2, %%ymm3, %%ymm6 \n\t" " \n\t" "vbroadcastsd 5 * 8(%%rbx), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm7 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm8 \n\t" "vfmadd231pd %%ymm2, %%ymm3, %%ymm9 \n\t" " \n\t" "vbroadcastsd 6 * 8(%%rbx), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm11 \n\t" "vfmadd231pd %%ymm2, %%ymm3, %%ymm12 \n\t" " \n\t" "vbroadcastsd 7 * 8(%%rbx), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm13 \n\t" "vmovapd 6 * 32(%%rax), %%ymm0 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm14 \n\t" "vmovapd 7 * 32(%%rax), %%ymm1 \n\t" "vfmadd231pd %%ymm2, %%ymm3, %%ymm15 \n\t" "vmovapd 8 * 32(%%rax), %%ymm2 \n\t" " \n\t" " \n\t" " \n\t" " \n\t" // iteration 2 "prefetcht0 22 * 32(%%rax) \n\t" " \n\t" "vbroadcastsd 8 * 8(%%rbx), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm4 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm5 \n\t" "vfmadd231pd %%ymm2, %%ymm3, %%ymm6 \n\t" " \n\t" "vbroadcastsd 9 * 8(%%rbx), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm7 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm8 \n\t" "vfmadd231pd %%ymm2, %%ymm3, %%ymm9 \n\t" " \n\t" "vbroadcastsd 10 * 8(%%rbx), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm11 \n\t" "vfmadd231pd %%ymm2, %%ymm3, %%ymm12 \n\t" " \n\t" "vbroadcastsd 11 * 8(%%rbx), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm13 \n\t" "vmovapd 9 * 32(%%rax), %%ymm0 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm14 \n\t" "vmovapd 10 * 32(%%rax), %%ymm1 \n\t" "vfmadd231pd %%ymm2, %%ymm3, %%ymm15 \n\t" "vmovapd 11 * 32(%%rax), %%ymm2 \n\t" " \n\t" " \n\t" " \n\t" " \n\t" // iteration 3 "vbroadcastsd 12 * 8(%%rbx), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm4 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm5 \n\t" "vfmadd231pd %%ymm2, %%ymm3, %%ymm6 \n\t" " \n\t" "vbroadcastsd 13 * 8(%%rbx), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm7 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm8 \n\t" "vfmadd231pd %%ymm2, %%ymm3, %%ymm9 \n\t" " \n\t" "vbroadcastsd 14 * 8(%%rbx), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm11 \n\t" "vfmadd231pd %%ymm2, %%ymm3, %%ymm12 \n\t" " \n\t" "vbroadcastsd 15 * 8(%%rbx), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm13 \n\t" "vmovapd 12 * 32(%%rax), %%ymm0 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm14 \n\t" "vmovapd 13 * 32(%%rax), %%ymm1 \n\t" "vfmadd231pd %%ymm2, %%ymm3, %%ymm15 \n\t" "vmovapd 14 * 32(%%rax), %%ymm2 \n\t" " \n\t" " \n\t" " \n\t" "addq $4 * 12 * 8, %%rax \n\t" // a += 4*12 (unroll x mr) "addq $4 * 4 * 8, %%rbx \n\t" // b += 4*4 (unroll x nr) " \n\t" " \n\t" "decq %%rsi \n\t" // i -= 1; "jne .DLOOPKITER \n\t" // iterate again if i != 0. " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" ".DCONSIDKLEFT: \n\t" " \n\t" "movq %1, %%rsi \n\t" // i = k_left; "testq %%rsi, %%rsi \n\t" // check i via logical AND. "je .DPOSTACCUM \n\t" // if i == 0, we're done; jump to end. " \n\t" // else, we prepare to enter k_left loop. " \n\t" " \n\t" ".DLOOPKLEFT: \n\t" // EDGE LOOP " \n\t" "prefetcht0 16 * 32(%%rax) \n\t" " \n\t" "vbroadcastsd 0 * 8(%%rbx), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm4 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm5 \n\t" "vfmadd231pd %%ymm2, %%ymm3, %%ymm6 \n\t" " \n\t" "vbroadcastsd 1 * 8(%%rbx), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm7 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm8 \n\t" "vfmadd231pd %%ymm2, %%ymm3, %%ymm9 \n\t" " \n\t" "vbroadcastsd 2 * 8(%%rbx), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm11 \n\t" "vfmadd231pd %%ymm2, %%ymm3, %%ymm12 \n\t" " \n\t" "vbroadcastsd 3 * 8(%%rbx), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm13 \n\t" "vmovapd 3 * 32(%%rax), %%ymm0 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm14 \n\t" "vmovapd 4 * 32(%%rax), %%ymm1 \n\t" "vfmadd231pd %%ymm2, %%ymm3, %%ymm15 \n\t" "vmovapd 5 * 32(%%rax), %%ymm2 \n\t" " \n\t" " \n\t" " \n\t" "addq $1 * 12 * 8, %%rax \n\t" // a += 1*12 (unroll x mr) "addq $1 * 4 * 8, %%rbx \n\t" // b += 1*4 (unroll x nr) " \n\t" " \n\t" "decq %%rsi \n\t" // i -= 1; "jne .DLOOPKLEFT \n\t" // iterate again if i != 0. " \n\t" " \n\t" " \n\t" ".DPOSTACCUM: \n\t" " \n\t" " \n\t" " \n\t" " \n\t" "movq %4, %%rax \n\t" // load address of alpha "movq %5, %%rbx \n\t" // load address of beta "vbroadcastsd (%%rax), %%ymm0 \n\t" // load alpha and duplicate "vbroadcastsd (%%rbx), %%ymm3 \n\t" // load beta and duplicate " \n\t" "vmulpd %%ymm0, %%ymm4, %%ymm4 \n\t" // scale by alpha "vmulpd %%ymm0, %%ymm5, %%ymm5 \n\t" "vmulpd %%ymm0, %%ymm6, %%ymm6 \n\t" "vmulpd %%ymm0, %%ymm7, %%ymm7 \n\t" "vmulpd %%ymm0, %%ymm8, %%ymm8 \n\t" "vmulpd %%ymm0, %%ymm9, %%ymm9 \n\t" "vmulpd %%ymm0, %%ymm10, %%ymm10 \n\t" "vmulpd %%ymm0, %%ymm11, %%ymm11 \n\t" "vmulpd %%ymm0, %%ymm12, %%ymm12 \n\t" "vmulpd %%ymm0, %%ymm13, %%ymm13 \n\t" "vmulpd %%ymm0, %%ymm14, %%ymm14 \n\t" "vmulpd %%ymm0, %%ymm15, %%ymm15 \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" "movq %7, %%rsi \n\t" // load rs_c "leaq (,%%rsi,8), %%rsi \n\t" // rsi = rs_c * sizeof(double) " \n\t" "leaq (%%rcx,%%rsi,4), %%rdx \n\t" // rdx = c + 4*rs_c; "leaq (%%rcx,%%rsi,8), %%r12 \n\t" // r12 = c + 8*rs_c; " \n\t" "leaq (%%rsi,%%rsi,2), %%r13 \n\t" // r13 = 3*rs_c; //"leaq (%%rsi,%%rsi,4), %%r15 \n\t" // r15 = 5*rs_c; //"leaq (%%r13,%%rsi,4), %%r10 \n\t" // r10 = 7*rs_c; " \n\t" " \n\t" " \n\t" " \n\t" // determine if " \n\t" // c % 32 == 0, AND " \n\t" // 8*cs_c % 32 == 0, AND " \n\t" // rs_c == 1 " \n\t" // ie: aligned, ldim aligned, and " \n\t" // column-stored " \n\t" "cmpq $8, %%rsi \n\t" // set ZF if (8*rs_c) == 8. "sete %%bl \n\t" // bl = ( ZF == 1 ? 1 : 0 ); "testq $31, %%rcx \n\t" // set ZF if c & 32 is zero. "setz %%bh \n\t" // bh = ( ZF == 0 ? 1 : 0 ); "testq $31, %%rdi \n\t" // set ZF if (8*cs_c) & 32 is zero. "setz %%al \n\t" // al = ( ZF == 0 ? 1 : 0 ); " \n\t" // and(bl,bh) followed by " \n\t" // and(bh,al) will reveal result " \n\t" " \n\t" // now avoid loading C if beta == 0 " \n\t" "vxorpd %%ymm0, %%ymm0, %%ymm0 \n\t" // set ymm0 to zero. "vucomisd %%xmm0, %%xmm3 \n\t" // set ZF if beta == 0. "je .DBETAZERO \n\t" // if ZF = 1, jump to beta == 0 case " \n\t" " \n\t" " \n\t" // check if aligned/column-stored "andb %%bl, %%bh \n\t" // set ZF if bl & bh == 1. "andb %%bh, %%al \n\t" // set ZF if bh & al == 1. "jne .DCOLSTORED \n\t" // jump to column storage case " \n\t" " \n\t" " \n\t" ".DGENSTORED: \n\t" " \n\t" " \n\t" DGEMM_INPUT_GS_BETA_NZ "vfmadd213pd %%ymm4, %%ymm3, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" DGEMM_INPUT_GS_BETA_NZ "vfmadd213pd %%ymm7, %%ymm3, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" DGEMM_INPUT_GS_BETA_NZ "vfmadd213pd %%ymm10, %%ymm3, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" DGEMM_INPUT_GS_BETA_NZ "vfmadd213pd %%ymm13, %%ymm3, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ //"addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "movq %%rdx, %%rcx \n\t" // rcx = c + 4*rs_c " \n\t" " \n\t" DGEMM_INPUT_GS_BETA_NZ "vfmadd213pd %%ymm5, %%ymm3, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" DGEMM_INPUT_GS_BETA_NZ "vfmadd213pd %%ymm8, %%ymm3, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" DGEMM_INPUT_GS_BETA_NZ "vfmadd213pd %%ymm11, %%ymm3, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" DGEMM_INPUT_GS_BETA_NZ "vfmadd213pd %%ymm14, %%ymm3, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ //"addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "movq %%r12, %%rcx \n\t" // rcx = c + 8*rs_c " \n\t" " \n\t" DGEMM_INPUT_GS_BETA_NZ "vfmadd213pd %%ymm6, %%ymm3, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" DGEMM_INPUT_GS_BETA_NZ "vfmadd213pd %%ymm9, %%ymm3, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" DGEMM_INPUT_GS_BETA_NZ "vfmadd213pd %%ymm12, %%ymm3, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" DGEMM_INPUT_GS_BETA_NZ "vfmadd213pd %%ymm15, %%ymm3, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ //"addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" "jmp .DDONE \n\t" // jump to end. " \n\t" " \n\t" " \n\t" ".DCOLSTORED: \n\t" " \n\t" " \n\t" "vmovapd (%%rcx), %%ymm0 \n\t" "vfmadd213pd %%ymm4, %%ymm3, %%ymm0 \n\t" "vmovapd %%ymm0, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vmovapd (%%rdx), %%ymm1 \n\t" "vfmadd213pd %%ymm5, %%ymm3, %%ymm1 \n\t" "vmovapd %%ymm1, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" "vmovapd (%%r12), %%ymm2 \n\t" "vfmadd213pd %%ymm6, %%ymm3, %%ymm2 \n\t" "vmovapd %%ymm2, (%%r12) \n\t" "addq %%rdi, %%r12 \n\t" " \n\t" " \n\t" "vmovapd (%%rcx), %%ymm0 \n\t" "vfmadd213pd %%ymm7, %%ymm3, %%ymm0 \n\t" "vmovapd %%ymm0, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vmovapd (%%rdx), %%ymm1 \n\t" "vfmadd213pd %%ymm8, %%ymm3, %%ymm1 \n\t" "vmovapd %%ymm1, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" "vmovapd (%%r12), %%ymm2 \n\t" "vfmadd213pd %%ymm9, %%ymm3, %%ymm2 \n\t" "vmovapd %%ymm2, (%%r12) \n\t" "addq %%rdi, %%r12 \n\t" " \n\t" " \n\t" "vmovapd (%%rcx), %%ymm0 \n\t" "vfmadd213pd %%ymm10, %%ymm3, %%ymm0 \n\t" "vmovapd %%ymm0, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vmovapd (%%rdx), %%ymm1 \n\t" "vfmadd213pd %%ymm11, %%ymm3, %%ymm1 \n\t" "vmovapd %%ymm1, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" "vmovapd (%%r12), %%ymm2 \n\t" "vfmadd213pd %%ymm12, %%ymm3, %%ymm2 \n\t" "vmovapd %%ymm2, (%%r12) \n\t" "addq %%rdi, %%r12 \n\t" " \n\t" " \n\t" "vmovapd (%%rcx), %%ymm0 \n\t" "vfmadd213pd %%ymm13, %%ymm3, %%ymm0 \n\t" "vmovapd %%ymm0, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vmovapd (%%rdx), %%ymm1 \n\t" "vfmadd213pd %%ymm14, %%ymm3, %%ymm1 \n\t" "vmovapd %%ymm1, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" "vmovapd (%%r12), %%ymm2 \n\t" "vfmadd213pd %%ymm15, %%ymm3, %%ymm2 \n\t" "vmovapd %%ymm2, (%%r12) \n\t" "addq %%rdi, %%r12 \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" "jmp .DDONE \n\t" // jump to end. " \n\t" " \n\t" " \n\t" ".DBETAZERO: \n\t" " \n\t" // check if aligned/column-stored "andb %%bl, %%bh \n\t" // set ZF if bl & bh == 1. "andb %%bh, %%al \n\t" // set ZF if bh & al == 1. "jne .DCOLSTORBZ \n\t" // jump to column storage case " \n\t" " \n\t" " \n\t" ".DGENSTORBZ: \n\t" " \n\t" " \n\t" "vmovapd %%ymm4, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "vmovapd %%ymm7, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "vmovapd %%ymm10, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "vmovapd %%ymm13, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ //"addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "movq %%rdx, %%rcx \n\t" // rcx = c + 4*rs_c " \n\t" " \n\t" "vmovapd %%ymm5, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "vmovapd %%ymm8, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "vmovapd %%ymm11, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "vmovapd %%ymm14, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ //"addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "movq %%r12, %%rcx \n\t" // rcx = c + 8*rs_c " \n\t" " \n\t" "vmovapd %%ymm6, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "vmovapd %%ymm9, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "vmovapd %%ymm12, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "vmovapd %%ymm15, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ //"addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" "jmp .DDONE \n\t" // jump to end. " \n\t" " \n\t" " \n\t" ".DCOLSTORBZ: \n\t" " \n\t" " \n\t" "vmovapd %%ymm4, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vmovapd %%ymm5, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" "vmovapd %%ymm6, (%%r12) \n\t" "addq %%rdi, %%r12 \n\t" " \n\t" "vmovapd %%ymm7, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vmovapd %%ymm8, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" "vmovapd %%ymm9, (%%r12) \n\t" "addq %%rdi, %%r12 \n\t" " \n\t" "vmovapd %%ymm10, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vmovapd %%ymm11, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" "vmovapd %%ymm12, (%%r12) \n\t" "addq %%rdi, %%r12 \n\t" " \n\t" "vmovapd %%ymm13, (%%rcx) \n\t" //"addq %%rdi, %%rcx \n\t" "vmovapd %%ymm14, (%%rdx) \n\t" //"addq %%rdi, %%rdx \n\t" "vmovapd %%ymm15, (%%r12) \n\t" //"addq %%rdi, %%r12 \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" ".DDONE: \n\t" " \n\t" "vzeroupper \n\t" " \n\t" : // output operands (none) : // input operands "m" (k_iter), // 0 "m" (k_left), // 1 "m" (a), // 2 "m" (b), // 3 "m" (alpha), // 4 "m" (beta), // 5 "m" (c), // 6 "m" (rs_c), // 7 "m" (cs_c)/*, // 8 "m" (b_next), // 9 "m" (a_next)*/ // 10 : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ); } #if 0 void bli_cgemm_haswell_asm_ ( dim_t k0, scomplex* restrict alpha, scomplex* restrict a, scomplex* restrict b, scomplex* restrict beta, scomplex* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; } void bli_zgemm_haswell_asm_ ( dim_t k0, dcomplex* restrict alpha, dcomplex* restrict a, dcomplex* restrict b, dcomplex* restrict beta, dcomplex* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; } #endif blis-0.6.1/kernels/haswell/3/old/bli_gemm_haswell_asm_d4x12.c000066400000000000000000001755501360743507500237400ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define SGEMM_INPUT_GS_BETA_NZ \ "vmovlps (%%rcx ), %%xmm0, %%xmm0 \n\t" \ "vmovhps (%%rcx,%%rsi,1), %%xmm0, %%xmm0 \n\t" \ "vmovlps (%%rcx,%%rsi,2), %%xmm1, %%xmm1 \n\t" \ "vmovhps (%%rcx,%%r13 ), %%xmm1, %%xmm1 \n\t" \ "vshufps $0x88, %%xmm1, %%xmm0, %%xmm0 \n\t" \ "vmovlps (%%rcx,%%rsi,4), %%xmm2, %%xmm2 \n\t" \ "vmovhps (%%rcx,%%r15 ), %%xmm2, %%xmm2 \n\t" \ /* We can't use vmovhps for loading the last element becauase that might result in reading beyond valid memory. (vmov[lh]psd load pairs of adjacent floats at a time.) So we need to use vmovss instead. But since we're limited to using ymm0 through ymm2 (ymm3 contains beta and ymm4 through ymm15 contain the microtile) and due to the way vmovss zeros out all bits above 31, we have to load element 7 before element 6. */ \ "vmovss (%%rcx,%%r10 ), %%xmm1 \n\t" \ "vpermilps $0xcf, %%xmm1, %%xmm1 \n\t" \ "vmovlps (%%rcx,%%r13,2), %%xmm1, %%xmm1 \n\t" \ /*"vmovhps (%%rcx,%%r10 ), %%xmm1, %%xmm1 \n\t"*/ \ "vshufps $0x88, %%xmm1, %%xmm2, %%xmm2 \n\t" \ "vperm2f128 $0x20, %%ymm2, %%ymm0, %%ymm0 \n\t" #define SGEMM_OUTPUT_GS_BETA_NZ \ "vextractf128 $1, %%ymm0, %%xmm2 \n\t" \ "vmovss %%xmm0, (%%rcx ) \n\t" \ "vpermilps $0x39, %%xmm0, %%xmm1 \n\t" \ "vmovss %%xmm1, (%%rcx,%%rsi,1) \n\t" \ "vpermilps $0x39, %%xmm1, %%xmm0 \n\t" \ "vmovss %%xmm0, (%%rcx,%%rsi,2) \n\t" \ "vpermilps $0x39, %%xmm0, %%xmm1 \n\t" \ "vmovss %%xmm1, (%%rcx,%%r13 ) \n\t" \ "vmovss %%xmm2, (%%rcx,%%rsi,4) \n\t" \ "vpermilps $0x39, %%xmm2, %%xmm1 \n\t" \ "vmovss %%xmm1, (%%rcx,%%r15 ) \n\t" \ "vpermilps $0x39, %%xmm1, %%xmm2 \n\t" \ "vmovss %%xmm2, (%%rcx,%%r13,2) \n\t" \ "vpermilps $0x39, %%xmm2, %%xmm1 \n\t" \ "vmovss %%xmm1, (%%rcx,%%r10 ) \n\t" void bli_sgemm_haswell_asm_4x24 ( dim_t k0, float* restrict alpha, float* restrict a, float* restrict b, float* restrict beta, float* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; __asm__ volatile ( " \n\t" "vzeroall \n\t" // zero all xmm/ymm registers. " \n\t" " \n\t" "movq %2, %%rax \n\t" // load address of a. "movq %3, %%rbx \n\t" // load address of b. //"movq %9, %%r15 \n\t" // load address of b_next. " \n\t" " \n\t" // initialize loop by pre-loading "vmovaps 0 * 32(%%rbx), %%ymm0 \n\t" "vmovaps 1 * 32(%%rbx), %%ymm1 \n\t" "vmovaps 2 * 32(%%rbx), %%ymm2 \n\t" " \n\t" "movq %6, %%rcx \n\t" // load address of c "movq %7, %%rdi \n\t" // load rs_c "leaq (,%%rdi,4), %%rdi \n\t" // rs_c *= sizeof(float) " \n\t" "leaq (%%rdi,%%rdi,2), %%r13 \n\t" // r13 = 3*rs_c; "prefetcht0 7 * 4(%%rcx) \n\t" // prefetch c + 0*rs_c "prefetcht0 7 * 4(%%rcx,%%rdi) \n\t" // prefetch c + 1*rs_c "prefetcht0 7 * 4(%%rcx,%%rdi,2) \n\t" // prefetch c + 2*rs_c "prefetcht0 7 * 4(%%rcx,%%r13) \n\t" // prefetch c + 3*rs_c " \n\t" " \n\t" " \n\t" " \n\t" "movq %0, %%rsi \n\t" // i = k_iter; "testq %%rsi, %%rsi \n\t" // check i via logical AND. "je .SCONSIDKLEFT \n\t" // if i == 0, jump to code that " \n\t" // contains the k_left loop. " \n\t" " \n\t" ".SLOOPKITER: \n\t" // MAIN LOOP " \n\t" " \n\t" " \n\t" // iteration 0 "prefetcht0 16 * 32(%%rax) \n\t" " \n\t" "vbroadcastss 0 * 4(%%rax), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm4 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm5 \n\t" "vfmadd231ps %%ymm2, %%ymm3, %%ymm6 \n\t" " \n\t" "vbroadcastss 1 * 4(%%rax), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm7 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm8 \n\t" "vfmadd231ps %%ymm2, %%ymm3, %%ymm9 \n\t" " \n\t" "vbroadcastss 2 * 4(%%rax), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm11 \n\t" "vfmadd231ps %%ymm2, %%ymm3, %%ymm12 \n\t" " \n\t" "vbroadcastss 3 * 4(%%rax), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm13 \n\t" "vmovaps 3 * 32(%%rbx), %%ymm0 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm14 \n\t" "vmovaps 4 * 32(%%rbx), %%ymm1 \n\t" "vfmadd231ps %%ymm2, %%ymm3, %%ymm15 \n\t" "vmovaps 5 * 32(%%rbx), %%ymm2 \n\t" " \n\t" " \n\t" " \n\t" " \n\t" // iteration 1 "vbroadcastss 4 * 4(%%rax), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm4 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm5 \n\t" "vfmadd231ps %%ymm2, %%ymm3, %%ymm6 \n\t" " \n\t" "vbroadcastss 5 * 4(%%rax), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm7 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm8 \n\t" "vfmadd231ps %%ymm2, %%ymm3, %%ymm9 \n\t" " \n\t" "vbroadcastss 6 * 4(%%rax), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm11 \n\t" "vfmadd231ps %%ymm2, %%ymm3, %%ymm12 \n\t" " \n\t" "vbroadcastss 7 * 4(%%rax), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm13 \n\t" "vmovaps 6 * 32(%%rbx), %%ymm0 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm14 \n\t" "vmovaps 7 * 32(%%rbx), %%ymm1 \n\t" "vfmadd231ps %%ymm2, %%ymm3, %%ymm15 \n\t" "vmovaps 8 * 32(%%rbx), %%ymm2 \n\t" " \n\t" " \n\t" " \n\t" " \n\t" // iteration 2 "prefetcht0 22 * 32(%%rax) \n\t" " \n\t" "vbroadcastss 8 * 4(%%rax), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm4 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm5 \n\t" "vfmadd231ps %%ymm2, %%ymm3, %%ymm6 \n\t" " \n\t" "vbroadcastss 9 * 4(%%rax), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm7 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm8 \n\t" "vfmadd231ps %%ymm2, %%ymm3, %%ymm9 \n\t" " \n\t" "vbroadcastss 10 * 4(%%rax), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm11 \n\t" "vfmadd231ps %%ymm2, %%ymm3, %%ymm12 \n\t" " \n\t" "vbroadcastss 11 * 4(%%rax), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm13 \n\t" "vmovaps 9 * 32(%%rbx), %%ymm0 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm14 \n\t" "vmovaps 10 * 32(%%rbx), %%ymm1 \n\t" "vfmadd231ps %%ymm2, %%ymm3, %%ymm15 \n\t" "vmovaps 11 * 32(%%rbx), %%ymm2 \n\t" " \n\t" " \n\t" " \n\t" " \n\t" // iteration 3 "vbroadcastss 12 * 4(%%rax), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm4 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm5 \n\t" "vfmadd231ps %%ymm2, %%ymm3, %%ymm6 \n\t" " \n\t" "vbroadcastss 13 * 4(%%rax), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm7 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm8 \n\t" "vfmadd231ps %%ymm2, %%ymm3, %%ymm9 \n\t" " \n\t" "vbroadcastss 14 * 4(%%rax), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm11 \n\t" "vfmadd231ps %%ymm2, %%ymm3, %%ymm12 \n\t" " \n\t" "vbroadcastss 15 * 4(%%rax), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm13 \n\t" "vmovaps 12 * 32(%%rbx), %%ymm0 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm14 \n\t" "vmovaps 13 * 32(%%rbx), %%ymm1 \n\t" "vfmadd231ps %%ymm2, %%ymm3, %%ymm15 \n\t" "vmovaps 14 * 32(%%rbx), %%ymm2 \n\t" " \n\t" " \n\t" " \n\t" "addq $4 * 4 * 4, %%rax \n\t" // a += 4*4 (unroll x mr) "addq $4 * 24 * 4, %%rbx \n\t" // b += 4*24 (unroll x nr) " \n\t" " \n\t" "decq %%rsi \n\t" // i -= 1; "jne .SLOOPKITER \n\t" // iterate again if i != 0. " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" ".SCONSIDKLEFT: \n\t" " \n\t" "movq %1, %%rsi \n\t" // i = k_left; "testq %%rsi, %%rsi \n\t" // check i via logical AND. "je .SPOSTACCUM \n\t" // if i == 0, we're done; jump to end. " \n\t" // else, we prepare to enter k_left loop. " \n\t" " \n\t" ".SLOOPKLEFT: \n\t" // EDGE LOOP " \n\t" "prefetcht0 16 * 32(%%rax) \n\t" " \n\t" "vbroadcastss 0 * 4(%%rax), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm4 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm5 \n\t" "vfmadd231ps %%ymm2, %%ymm3, %%ymm6 \n\t" " \n\t" "vbroadcastss 1 * 4(%%rax), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm7 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm8 \n\t" "vfmadd231ps %%ymm2, %%ymm3, %%ymm9 \n\t" " \n\t" "vbroadcastss 2 * 4(%%rax), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm11 \n\t" "vfmadd231ps %%ymm2, %%ymm3, %%ymm12 \n\t" " \n\t" "vbroadcastss 3 * 4(%%rax), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm13 \n\t" "vmovaps 3 * 32(%%rbx), %%ymm0 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm14 \n\t" "vmovaps 4 * 32(%%rbx), %%ymm1 \n\t" "vfmadd231ps %%ymm2, %%ymm3, %%ymm15 \n\t" "vmovaps 5 * 32(%%rbx), %%ymm2 \n\t" " \n\t" " \n\t" " \n\t" "addq $1 * 4 * 4, %%rax \n\t" // a += 1*4 (unroll x mr) "addq $1 * 24 * 4, %%rbx \n\t" // b += 1*24 (unroll x nr) " \n\t" " \n\t" "decq %%rsi \n\t" // i -= 1; "jne .SLOOPKLEFT \n\t" // iterate again if i != 0. " \n\t" " \n\t" " \n\t" ".SPOSTACCUM: \n\t" " \n\t" " \n\t" " \n\t" " \n\t" "movq %4, %%rax \n\t" // load address of alpha "movq %5, %%rbx \n\t" // load address of beta "vbroadcastss (%%rax), %%ymm0 \n\t" // load alpha and duplicate "vbroadcastss (%%rbx), %%ymm3 \n\t" // load beta and duplicate " \n\t" "vmulps %%ymm0, %%ymm4, %%ymm4 \n\t" // scale by alpha "vmulps %%ymm0, %%ymm5, %%ymm5 \n\t" "vmulps %%ymm0, %%ymm6, %%ymm6 \n\t" "vmulps %%ymm0, %%ymm7, %%ymm7 \n\t" "vmulps %%ymm0, %%ymm8, %%ymm8 \n\t" "vmulps %%ymm0, %%ymm9, %%ymm9 \n\t" "vmulps %%ymm0, %%ymm10, %%ymm10 \n\t" "vmulps %%ymm0, %%ymm11, %%ymm11 \n\t" "vmulps %%ymm0, %%ymm12, %%ymm12 \n\t" "vmulps %%ymm0, %%ymm13, %%ymm13 \n\t" "vmulps %%ymm0, %%ymm14, %%ymm14 \n\t" "vmulps %%ymm0, %%ymm15, %%ymm15 \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" "movq %8, %%rsi \n\t" // load cs_c "leaq (,%%rsi,4), %%rsi \n\t" // rsi = cs_c * sizeof(float) " \n\t" "leaq (%%rcx,%%rsi,8), %%rdx \n\t" // rdx = c + 8*cs_c; "leaq (%%rdx,%%rsi,8), %%r12 \n\t" // r12 = c + 16*cs_c; " \n\t" "leaq (%%rsi,%%rsi,2), %%r13 \n\t" // r13 = 3*cs_c; "leaq (%%rsi,%%rsi,4), %%r15 \n\t" // r15 = 5*cs_c; "leaq (%%r13,%%rsi,4), %%r10 \n\t" // r10 = 7*cs_c; " \n\t" " \n\t" " \n\t" // now avoid loading C if beta == 0 " \n\t" "vxorps %%ymm0, %%ymm0, %%ymm0 \n\t" // set ymm0 to zero. "vucomiss %%xmm0, %%xmm3 \n\t" // set ZF if beta == 0. "je .SBETAZERO \n\t" // if ZF = 1, jump to beta == 0 case " \n\t" " \n\t" "cmpq $4, %%rsi \n\t" // set ZF if (4*cs_c) == 4. "jz .SROWSTORED \n\t" // jump to row storage case " \n\t" " \n\t" " \n\t" ".SGENSTORED: \n\t" " \n\t" " \n\t" SGEMM_INPUT_GS_BETA_NZ "vfmadd213ps %%ymm4, %%ymm3, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" SGEMM_INPUT_GS_BETA_NZ "vfmadd213ps %%ymm7, %%ymm3, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" SGEMM_INPUT_GS_BETA_NZ "vfmadd213ps %%ymm10, %%ymm3, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" SGEMM_INPUT_GS_BETA_NZ "vfmadd213ps %%ymm13, %%ymm3, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ //"addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" "movq %%rdx, %%rcx \n\t" // rcx = c + 8*cs_c " \n\t" " \n\t" SGEMM_INPUT_GS_BETA_NZ "vfmadd213ps %%ymm5, %%ymm3, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" SGEMM_INPUT_GS_BETA_NZ "vfmadd213ps %%ymm8, %%ymm3, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" SGEMM_INPUT_GS_BETA_NZ "vfmadd213ps %%ymm11, %%ymm3, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" SGEMM_INPUT_GS_BETA_NZ "vfmadd213ps %%ymm14, %%ymm3, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ //"addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" "movq %%r12, %%rcx \n\t" // rcx = c + 16*cs_c " \n\t" " \n\t" SGEMM_INPUT_GS_BETA_NZ "vfmadd213ps %%ymm6, %%ymm3, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" SGEMM_INPUT_GS_BETA_NZ "vfmadd213ps %%ymm9, %%ymm3, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" SGEMM_INPUT_GS_BETA_NZ "vfmadd213ps %%ymm12, %%ymm3, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" SGEMM_INPUT_GS_BETA_NZ "vfmadd213ps %%ymm15, %%ymm3, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ //"addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" "jmp .SDONE \n\t" // jump to end. " \n\t" " \n\t" " \n\t" ".SROWSTORED: \n\t" " \n\t" " \n\t" "vfmadd231ps (%%rcx), %%ymm3, %%ymm4 \n\t" "vmovups %%ymm4, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vfmadd231ps (%%rdx), %%ymm3, %%ymm5 \n\t" "vmovups %%ymm5, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" "vfmadd231ps (%%r12), %%ymm3, %%ymm6 \n\t" "vmovups %%ymm6, (%%r12) \n\t" "addq %%rdi, %%r12 \n\t" " \n\t" " \n\t" "vfmadd231ps (%%rcx), %%ymm3, %%ymm7 \n\t" "vmovups %%ymm7, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vfmadd231ps (%%rdx), %%ymm3, %%ymm8 \n\t" "vmovups %%ymm8, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" "vfmadd231ps (%%r12), %%ymm3, %%ymm9 \n\t" "vmovups %%ymm9, (%%r12) \n\t" "addq %%rdi, %%r12 \n\t" " \n\t" " \n\t" "vfmadd231ps (%%rcx), %%ymm3, %%ymm10 \n\t" "vmovups %%ymm10, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vfmadd231ps (%%rdx), %%ymm3, %%ymm11 \n\t" "vmovups %%ymm11, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" "vfmadd231ps (%%r12), %%ymm3, %%ymm12 \n\t" "vmovups %%ymm12, (%%r12) \n\t" "addq %%rdi, %%r12 \n\t" " \n\t" " \n\t" "vfmadd231ps (%%rcx), %%ymm3, %%ymm13 \n\t" "vmovups %%ymm13, (%%rcx) \n\t" //"addq %%rdi, %%rcx \n\t" "vfmadd231ps (%%rdx), %%ymm3, %%ymm14 \n\t" "vmovups %%ymm14, (%%rdx) \n\t" //"addq %%rdi, %%rdx \n\t" "vfmadd231ps (%%r12), %%ymm3, %%ymm15 \n\t" "vmovups %%ymm15, (%%r12) \n\t" //"addq %%rdi, %%r12 \n\t" " \n\t" " \n\t" " \n\t" "jmp .SDONE \n\t" // jump to end. " \n\t" " \n\t" " \n\t" ".SBETAZERO: \n\t" " \n\t" "cmpq $4, %%rsi \n\t" // set ZF if (4*cs_c) == 4. "jz .SROWSTORBZ \n\t" // jump to row storage case " \n\t" " \n\t" " \n\t" ".SGENSTORBZ: \n\t" " \n\t" " \n\t" "vmovaps %%ymm4, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" "vmovaps %%ymm7, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" "vmovaps %%ymm10, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" "vmovaps %%ymm13, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ //"addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" "movq %%rdx, %%rcx \n\t" // rcx = c + 8*cs_c " \n\t" " \n\t" "vmovaps %%ymm5, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" "vmovaps %%ymm8, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" "vmovaps %%ymm11, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" "vmovaps %%ymm14, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ //"addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" "movq %%r12, %%rcx \n\t" // rcx = c + 16*cs_c " \n\t" " \n\t" "vmovaps %%ymm6, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" "vmovaps %%ymm9, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" "vmovaps %%ymm12, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" "vmovaps %%ymm15, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ //"addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" "jmp .SDONE \n\t" // jump to end. " \n\t" " \n\t" " \n\t" ".SROWSTORBZ: \n\t" " \n\t" " \n\t" "vmovups %%ymm4, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vmovups %%ymm5, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" "vmovups %%ymm6, (%%r12) \n\t" "addq %%rdi, %%r12 \n\t" " \n\t" "vmovups %%ymm7, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vmovups %%ymm8, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" "vmovups %%ymm9, (%%r12) \n\t" "addq %%rdi, %%r12 \n\t" " \n\t" "vmovups %%ymm10, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vmovups %%ymm11, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" "vmovups %%ymm12, (%%r12) \n\t" "addq %%rdi, %%r12 \n\t" " \n\t" "vmovups %%ymm13, (%%rcx) \n\t" //"addq %%rdi, %%rcx \n\t" "vmovups %%ymm14, (%%rdx) \n\t" //"addq %%rdi, %%rdx \n\t" "vmovups %%ymm15, (%%r12) \n\t" //"addq %%rdi, %%r12 \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" ".SDONE: \n\t" " \n\t" "vzeroupper \n\t" " \n\t" : // output operands (none) : // input operands "m" (k_iter), // 0 "m" (k_left), // 1 "m" (a), // 2 "m" (b), // 3 "m" (alpha), // 4 "m" (beta), // 5 "m" (c), // 6 "m" (rs_c), // 7 "m" (cs_c)/*, // 8 "m" (b_next), // 9 "m" (a_next)*/ // 10 : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ); } #define DGEMM_INPUT_GS_BETA_NZ \ "vmovlpd (%%rcx ), %%xmm0, %%xmm0 \n\t" \ "vmovhpd (%%rcx,%%rsi,1), %%xmm0, %%xmm0 \n\t" \ "vmovlpd (%%rcx,%%rsi,2), %%xmm1, %%xmm1 \n\t" \ "vmovhpd (%%rcx,%%r13 ), %%xmm1, %%xmm1 \n\t" \ "vperm2f128 $0x20, %%ymm1, %%ymm0, %%ymm0 \n\t" /*\ "vmovlps (%%rcx,%%rsi,4), %%xmm2, %%xmm2 \n\t" \ "vmovhps (%%rcx,%%r15 ), %%xmm2, %%xmm2 \n\t" \ "vmovlps (%%rcx,%%r13,2), %%xmm1, %%xmm1 \n\t" \ "vmovhps (%%rcx,%%r10 ), %%xmm1, %%xmm1 \n\t" \ "vperm2f128 $0x20, %%ymm1, %%ymm2, %%ymm2 \n\t"*/ #define DGEMM_OUTPUT_GS_BETA_NZ \ "vextractf128 $1, %%ymm0, %%xmm1 \n\t" \ "vmovlpd %%xmm0, (%%rcx ) \n\t" \ "vmovhpd %%xmm0, (%%rcx,%%rsi ) \n\t" \ "vmovlpd %%xmm1, (%%rcx,%%rsi,2) \n\t" \ "vmovhpd %%xmm1, (%%rcx,%%r13 ) \n\t" /*\ "vextractf128 $1, %%ymm2, %%xmm1 \n\t" \ "vmovlpd %%xmm2, (%%rcx,%%rsi,4) \n\t" \ "vmovhpd %%xmm2, (%%rcx,%%r15 ) \n\t" \ "vmovlpd %%xmm1, (%%rcx,%%r13,2) \n\t" \ "vmovhpd %%xmm1, (%%rcx,%%r10 ) \n\t"*/ void bli_dgemm_haswell_asm_4x12 ( dim_t k0, double* restrict alpha, double* restrict a, double* restrict b, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; __asm__ volatile ( " \n\t" "vzeroall \n\t" // zero all xmm/ymm registers. " \n\t" " \n\t" "movq %2, %%rax \n\t" // load address of a. "movq %3, %%rbx \n\t" // load address of b. //"movq %9, %%r15 \n\t" // load address of b_next. " \n\t" " \n\t" // initialize loop by pre-loading "vmovapd 0 * 32(%%rbx), %%ymm0 \n\t" "vmovapd 1 * 32(%%rbx), %%ymm1 \n\t" "vmovapd 2 * 32(%%rbx), %%ymm2 \n\t" " \n\t" "movq %6, %%rcx \n\t" // load address of c "movq %7, %%rdi \n\t" // load rs_c "leaq (,%%rdi,8), %%rdi \n\t" // rs_c *= sizeof(double) " \n\t" "leaq (%%rdi,%%rdi,2), %%r13 \n\t" // r13 = 3*rs_c; "prefetcht0 7 * 8(%%rcx) \n\t" // prefetch c + 0*rs_c "prefetcht0 7 * 8(%%rcx,%%rdi) \n\t" // prefetch c + 1*rs_c "prefetcht0 7 * 8(%%rcx,%%rdi,2) \n\t" // prefetch c + 2*rs_c "prefetcht0 7 * 8(%%rcx,%%r13) \n\t" // prefetch c + 3*rs_c " \n\t" " \n\t" " \n\t" " \n\t" "movq %0, %%rsi \n\t" // i = k_iter; "testq %%rsi, %%rsi \n\t" // check i via logical AND. "je .DCONSIDKLEFT \n\t" // if i == 0, jump to code that " \n\t" // contains the k_left loop. " \n\t" " \n\t" ".DLOOPKITER: \n\t" // MAIN LOOP " \n\t" " \n\t" " \n\t" // iteration 0 "prefetcht0 16 * 32(%%rax) \n\t" " \n\t" "vbroadcastsd 0 * 8(%%rax), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm4 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm5 \n\t" "vfmadd231pd %%ymm2, %%ymm3, %%ymm6 \n\t" " \n\t" "vbroadcastsd 1 * 8(%%rax), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm7 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm8 \n\t" "vfmadd231pd %%ymm2, %%ymm3, %%ymm9 \n\t" " \n\t" "vbroadcastsd 2 * 8(%%rax), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm11 \n\t" "vfmadd231pd %%ymm2, %%ymm3, %%ymm12 \n\t" " \n\t" "vbroadcastsd 3 * 8(%%rax), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm13 \n\t" "vmovapd 3 * 32(%%rbx), %%ymm0 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm14 \n\t" "vmovapd 4 * 32(%%rbx), %%ymm1 \n\t" "vfmadd231pd %%ymm2, %%ymm3, %%ymm15 \n\t" "vmovapd 5 * 32(%%rbx), %%ymm2 \n\t" " \n\t" " \n\t" " \n\t" " \n\t" // iteration 1 "vbroadcastsd 4 * 8(%%rax), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm4 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm5 \n\t" "vfmadd231pd %%ymm2, %%ymm3, %%ymm6 \n\t" " \n\t" "vbroadcastsd 5 * 8(%%rax), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm7 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm8 \n\t" "vfmadd231pd %%ymm2, %%ymm3, %%ymm9 \n\t" " \n\t" "vbroadcastsd 6 * 8(%%rax), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm11 \n\t" "vfmadd231pd %%ymm2, %%ymm3, %%ymm12 \n\t" " \n\t" "vbroadcastsd 7 * 8(%%rax), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm13 \n\t" "vmovapd 6 * 32(%%rbx), %%ymm0 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm14 \n\t" "vmovapd 7 * 32(%%rbx), %%ymm1 \n\t" "vfmadd231pd %%ymm2, %%ymm3, %%ymm15 \n\t" "vmovapd 8 * 32(%%rbx), %%ymm2 \n\t" " \n\t" " \n\t" " \n\t" " \n\t" // iteration 2 "prefetcht0 22 * 32(%%rax) \n\t" " \n\t" "vbroadcastsd 8 * 8(%%rax), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm4 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm5 \n\t" "vfmadd231pd %%ymm2, %%ymm3, %%ymm6 \n\t" " \n\t" "vbroadcastsd 9 * 8(%%rax), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm7 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm8 \n\t" "vfmadd231pd %%ymm2, %%ymm3, %%ymm9 \n\t" " \n\t" "vbroadcastsd 10 * 8(%%rax), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm11 \n\t" "vfmadd231pd %%ymm2, %%ymm3, %%ymm12 \n\t" " \n\t" "vbroadcastsd 11 * 8(%%rax), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm13 \n\t" "vmovapd 9 * 32(%%rbx), %%ymm0 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm14 \n\t" "vmovapd 10 * 32(%%rbx), %%ymm1 \n\t" "vfmadd231pd %%ymm2, %%ymm3, %%ymm15 \n\t" "vmovapd 11 * 32(%%rbx), %%ymm2 \n\t" " \n\t" " \n\t" " \n\t" " \n\t" // iteration 3 "vbroadcastsd 12 * 8(%%rax), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm4 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm5 \n\t" "vfmadd231pd %%ymm2, %%ymm3, %%ymm6 \n\t" " \n\t" "vbroadcastsd 13 * 8(%%rax), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm7 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm8 \n\t" "vfmadd231pd %%ymm2, %%ymm3, %%ymm9 \n\t" " \n\t" "vbroadcastsd 14 * 8(%%rax), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm11 \n\t" "vfmadd231pd %%ymm2, %%ymm3, %%ymm12 \n\t" " \n\t" "vbroadcastsd 15 * 8(%%rax), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm13 \n\t" "vmovapd 12 * 32(%%rbx), %%ymm0 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm14 \n\t" "vmovapd 13 * 32(%%rbx), %%ymm1 \n\t" "vfmadd231pd %%ymm2, %%ymm3, %%ymm15 \n\t" "vmovapd 14 * 32(%%rbx), %%ymm2 \n\t" " \n\t" " \n\t" " \n\t" "addq $4 * 4 * 8, %%rax \n\t" // a += 4*4 (unroll x mr) "addq $4 * 12 * 8, %%rbx \n\t" // b += 4*12 (unroll x nr) " \n\t" " \n\t" "decq %%rsi \n\t" // i -= 1; "jne .DLOOPKITER \n\t" // iterate again if i != 0. " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" ".DCONSIDKLEFT: \n\t" " \n\t" "movq %1, %%rsi \n\t" // i = k_left; "testq %%rsi, %%rsi \n\t" // check i via logical AND. "je .DPOSTACCUM \n\t" // if i == 0, we're done; jump to end. " \n\t" // else, we prepare to enter k_left loop. " \n\t" " \n\t" ".DLOOPKLEFT: \n\t" // EDGE LOOP " \n\t" "prefetcht0 16 * 32(%%rax) \n\t" " \n\t" "vbroadcastsd 0 * 8(%%rax), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm4 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm5 \n\t" "vfmadd231pd %%ymm2, %%ymm3, %%ymm6 \n\t" " \n\t" "vbroadcastsd 1 * 8(%%rax), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm7 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm8 \n\t" "vfmadd231pd %%ymm2, %%ymm3, %%ymm9 \n\t" " \n\t" "vbroadcastsd 2 * 8(%%rax), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm11 \n\t" "vfmadd231pd %%ymm2, %%ymm3, %%ymm12 \n\t" " \n\t" "vbroadcastsd 3 * 8(%%rax), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm13 \n\t" "vmovapd 3 * 32(%%rbx), %%ymm0 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm14 \n\t" "vmovapd 4 * 32(%%rbx), %%ymm1 \n\t" "vfmadd231pd %%ymm2, %%ymm3, %%ymm15 \n\t" "vmovapd 5 * 32(%%rbx), %%ymm2 \n\t" " \n\t" " \n\t" " \n\t" "addq $1 * 4 * 8, %%rax \n\t" // a += 1*4 (unroll x mr) "addq $1 * 12 * 8, %%rbx \n\t" // b += 1*12 (unroll x nr) " \n\t" " \n\t" "decq %%rsi \n\t" // i -= 1; "jne .DLOOPKLEFT \n\t" // iterate again if i != 0. " \n\t" " \n\t" " \n\t" ".DPOSTACCUM: \n\t" " \n\t" " \n\t" " \n\t" " \n\t" "movq %4, %%rax \n\t" // load address of alpha "movq %5, %%rbx \n\t" // load address of beta "vbroadcastsd (%%rax), %%ymm0 \n\t" // load alpha and duplicate "vbroadcastsd (%%rbx), %%ymm3 \n\t" // load beta and duplicate " \n\t" "vmulpd %%ymm0, %%ymm4, %%ymm4 \n\t" // scale by alpha "vmulpd %%ymm0, %%ymm5, %%ymm5 \n\t" "vmulpd %%ymm0, %%ymm6, %%ymm6 \n\t" "vmulpd %%ymm0, %%ymm7, %%ymm7 \n\t" "vmulpd %%ymm0, %%ymm8, %%ymm8 \n\t" "vmulpd %%ymm0, %%ymm9, %%ymm9 \n\t" "vmulpd %%ymm0, %%ymm10, %%ymm10 \n\t" "vmulpd %%ymm0, %%ymm11, %%ymm11 \n\t" "vmulpd %%ymm0, %%ymm12, %%ymm12 \n\t" "vmulpd %%ymm0, %%ymm13, %%ymm13 \n\t" "vmulpd %%ymm0, %%ymm14, %%ymm14 \n\t" "vmulpd %%ymm0, %%ymm15, %%ymm15 \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" "movq %8, %%rsi \n\t" // load cs_c "leaq (,%%rsi,8), %%rsi \n\t" // rsi = cs_c * sizeof(double) " \n\t" "leaq (%%rcx,%%rsi,4), %%rdx \n\t" // rdx = c + 4*cs_c; "leaq (%%rcx,%%rsi,8), %%r12 \n\t" // r12 = c + 8*cs_c; " \n\t" "leaq (%%rsi,%%rsi,2), %%r13 \n\t" // r13 = 3*cs_c; //"leaq (%%rsi,%%rsi,4), %%r15 \n\t" // r15 = 5*cs_c; //"leaq (%%r13,%%rsi,4), %%r10 \n\t" // r10 = 7*cs_c; " \n\t" " \n\t" " \n\t" " \n\t" // now avoid loading C if beta == 0 " \n\t" "vxorpd %%ymm0, %%ymm0, %%ymm0 \n\t" // set ymm0 to zero. "vucomisd %%xmm0, %%xmm3 \n\t" // set ZF if beta == 0. "je .DBETAZERO \n\t" // if ZF = 1, jump to beta == 0 case " \n\t" " \n\t" "cmpq $8, %%rsi \n\t" // set ZF if (8*cs_c) == 8. "jz .DROWSTORED \n\t" // jump to row storage case " \n\t" " \n\t" " \n\t" ".DGENSTORED: \n\t" " \n\t" " \n\t" DGEMM_INPUT_GS_BETA_NZ "vfmadd213pd %%ymm4, %%ymm3, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" DGEMM_INPUT_GS_BETA_NZ "vfmadd213pd %%ymm7, %%ymm3, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" DGEMM_INPUT_GS_BETA_NZ "vfmadd213pd %%ymm10, %%ymm3, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" DGEMM_INPUT_GS_BETA_NZ "vfmadd213pd %%ymm13, %%ymm3, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ //"addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" "movq %%rdx, %%rcx \n\t" // rcx = c + 4*cs_c " \n\t" " \n\t" DGEMM_INPUT_GS_BETA_NZ "vfmadd213pd %%ymm5, %%ymm3, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" DGEMM_INPUT_GS_BETA_NZ "vfmadd213pd %%ymm8, %%ymm3, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" DGEMM_INPUT_GS_BETA_NZ "vfmadd213pd %%ymm11, %%ymm3, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" DGEMM_INPUT_GS_BETA_NZ "vfmadd213pd %%ymm14, %%ymm3, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ //"addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" "movq %%r12, %%rcx \n\t" // rcx = c + 8*cs_c " \n\t" " \n\t" DGEMM_INPUT_GS_BETA_NZ "vfmadd213pd %%ymm6, %%ymm3, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" DGEMM_INPUT_GS_BETA_NZ "vfmadd213pd %%ymm9, %%ymm3, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" DGEMM_INPUT_GS_BETA_NZ "vfmadd213pd %%ymm12, %%ymm3, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" DGEMM_INPUT_GS_BETA_NZ "vfmadd213pd %%ymm15, %%ymm3, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ //"addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" "jmp .DDONE \n\t" // jump to end. " \n\t" " \n\t" " \n\t" ".DROWSTORED: \n\t" " \n\t" " \n\t" "vfmadd231pd (%%rcx), %%ymm3, %%ymm4 \n\t" "vmovupd %%ymm4, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vfmadd231pd (%%rdx), %%ymm3, %%ymm5 \n\t" "vmovupd %%ymm5, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" "vfmadd231pd (%%r12), %%ymm3, %%ymm6 \n\t" "vmovupd %%ymm6, (%%r12) \n\t" "addq %%rdi, %%r12 \n\t" " \n\t" " \n\t" "vfmadd231pd (%%rcx), %%ymm3, %%ymm7 \n\t" "vmovupd %%ymm7, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vfmadd231pd (%%rdx), %%ymm3, %%ymm8 \n\t" "vmovupd %%ymm8, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" "vfmadd231pd (%%r12), %%ymm3, %%ymm9 \n\t" "vmovupd %%ymm9, (%%r12) \n\t" "addq %%rdi, %%r12 \n\t" " \n\t" " \n\t" "vfmadd231pd (%%rcx), %%ymm3, %%ymm10 \n\t" "vmovupd %%ymm10, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vfmadd231pd (%%rdx), %%ymm3, %%ymm11 \n\t" "vmovupd %%ymm11, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" "vfmadd231pd (%%r12), %%ymm3, %%ymm12 \n\t" "vmovupd %%ymm12, (%%r12) \n\t" "addq %%rdi, %%r12 \n\t" " \n\t" " \n\t" "vfmadd231pd (%%rcx), %%ymm3, %%ymm13 \n\t" "vmovupd %%ymm13, (%%rcx) \n\t" //"addq %%rdi, %%rcx \n\t" "vfmadd231pd (%%rdx), %%ymm3, %%ymm14 \n\t" "vmovupd %%ymm14, (%%rdx) \n\t" //"addq %%rdi, %%rdx \n\t" "vfmadd231pd (%%r12), %%ymm3, %%ymm15 \n\t" "vmovupd %%ymm15, (%%r12) \n\t" //"addq %%rdi, %%r12 \n\t" " \n\t" " \n\t" " \n\t" "jmp .DDONE \n\t" // jump to end. " \n\t" " \n\t" " \n\t" ".DBETAZERO: \n\t" " \n\t" // check if aligned/column-stored "cmpq $8, %%rsi \n\t" // set ZF if (8*cs_c) == 8. "jz .DROWSTORBZ \n\t" // jump to row storage case " \n\t" " \n\t" " \n\t" ".DGENSTORBZ: \n\t" " \n\t" " \n\t" "vmovapd %%ymm4, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" "vmovapd %%ymm7, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" "vmovapd %%ymm10, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" "vmovapd %%ymm13, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ //"addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" "movq %%rdx, %%rcx \n\t" // rcx = c + 4*cs_c " \n\t" " \n\t" "vmovapd %%ymm5, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" "vmovapd %%ymm8, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" "vmovapd %%ymm11, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" "vmovapd %%ymm14, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ //"addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" "movq %%r12, %%rcx \n\t" // rcx = c + 8*cs_c " \n\t" " \n\t" "vmovapd %%ymm6, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" "vmovapd %%ymm9, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" "vmovapd %%ymm12, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" "vmovapd %%ymm15, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ //"addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" "jmp .DDONE \n\t" // jump to end. " \n\t" " \n\t" " \n\t" ".DROWSTORBZ: \n\t" " \n\t" " \n\t" "vmovupd %%ymm4, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vmovupd %%ymm5, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" "vmovupd %%ymm6, (%%r12) \n\t" "addq %%rdi, %%r12 \n\t" " \n\t" "vmovupd %%ymm7, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vmovupd %%ymm8, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" "vmovupd %%ymm9, (%%r12) \n\t" "addq %%rdi, %%r12 \n\t" " \n\t" "vmovupd %%ymm10, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vmovupd %%ymm11, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" "vmovupd %%ymm12, (%%r12) \n\t" "addq %%rdi, %%r12 \n\t" " \n\t" "vmovupd %%ymm13, (%%rcx) \n\t" //"addq %%rdi, %%rcx \n\t" "vmovupd %%ymm14, (%%rdx) \n\t" //"addq %%rdi, %%rdx \n\t" "vmovupd %%ymm15, (%%r12) \n\t" //"addq %%rdi, %%r12 \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" ".DDONE: \n\t" " \n\t" "vzeroupper \n\t" " \n\t" : // output operands (none) : // input operands "m" (k_iter), // 0 "m" (k_left), // 1 "m" (a), // 2 "m" (b), // 3 "m" (alpha), // 4 "m" (beta), // 5 "m" (c), // 6 "m" (rs_c), // 7 "m" (cs_c)/*, // 8 "m" (b_next), // 9 "m" (a_next)*/ // 10 : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ); } #if 0 void bli_cgemm_haswell_asm_ ( dim_t k0, scomplex* restrict alpha, scomplex* restrict a, scomplex* restrict b, scomplex* restrict beta, scomplex* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; } void bli_zgemm_haswell_asm_ ( dim_t k0, dcomplex* restrict alpha, dcomplex* restrict a, dcomplex* restrict b, dcomplex* restrict beta, dcomplex* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; } #endif blis-0.6.1/kernels/haswell/3/old/bli_gemm_haswell_asm_d6x8.c000066400000000000000000003420721360743507500236620ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define SGEMM_INPUT_GS_BETA_NZ \ "vmovlps (%%rcx ), %%xmm0, %%xmm0 \n\t" \ "vmovhps (%%rcx,%%rsi,1), %%xmm0, %%xmm0 \n\t" \ "vmovlps (%%rcx,%%rsi,2), %%xmm1, %%xmm1 \n\t" \ "vmovhps (%%rcx,%%r13 ), %%xmm1, %%xmm1 \n\t" \ "vshufps $0x88, %%xmm1, %%xmm0, %%xmm0 \n\t" \ "vmovlps (%%rcx,%%rsi,4), %%xmm2, %%xmm2 \n\t" \ "vmovhps (%%rcx,%%r15 ), %%xmm2, %%xmm2 \n\t" \ /* We can't use vmovhps for loading the last element becauase that might result in reading beyond valid memory. (vmov[lh]psd load pairs of adjacent floats at a time.) So we need to use vmovss instead. But since we're limited to using ymm0 through ymm2 (ymm3 contains beta and ymm4 through ymm15 contain the microtile) and due to the way vmovss zeros out all bits above 31, we have to load element 7 before element 6. */ \ "vmovss (%%rcx,%%r10 ), %%xmm1 \n\t" \ "vpermilps $0xcf, %%xmm1, %%xmm1 \n\t" \ "vmovlps (%%rcx,%%r13,2), %%xmm1, %%xmm1 \n\t" \ /*"vmovhps (%%rcx,%%r10 ), %%xmm1, %%xmm1 \n\t"*/ \ "vshufps $0x88, %%xmm1, %%xmm2, %%xmm2 \n\t" \ "vperm2f128 $0x20, %%ymm2, %%ymm0, %%ymm0 \n\t" #define SGEMM_OUTPUT_GS_BETA_NZ \ "vextractf128 $1, %%ymm0, %%xmm2 \n\t" \ "vmovss %%xmm0, (%%rcx ) \n\t" \ "vpermilps $0x39, %%xmm0, %%xmm1 \n\t" \ "vmovss %%xmm1, (%%rcx,%%rsi,1) \n\t" \ "vpermilps $0x39, %%xmm1, %%xmm0 \n\t" \ "vmovss %%xmm0, (%%rcx,%%rsi,2) \n\t" \ "vpermilps $0x39, %%xmm0, %%xmm1 \n\t" \ "vmovss %%xmm1, (%%rcx,%%r13 ) \n\t" \ "vmovss %%xmm2, (%%rcx,%%rsi,4) \n\t" \ "vpermilps $0x39, %%xmm2, %%xmm1 \n\t" \ "vmovss %%xmm1, (%%rcx,%%r15 ) \n\t" \ "vpermilps $0x39, %%xmm1, %%xmm2 \n\t" \ "vmovss %%xmm2, (%%rcx,%%r13,2) \n\t" \ "vpermilps $0x39, %%xmm2, %%xmm1 \n\t" \ "vmovss %%xmm1, (%%rcx,%%r10 ) \n\t" void bli_sgemm_haswell_asm_6x16 ( dim_t k0, float* restrict alpha, float* restrict a, float* restrict b, float* restrict beta, float* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; __asm__ volatile ( " \n\t" "vzeroall \n\t" // zero all xmm/ymm registers. " \n\t" " \n\t" "movq %2, %%rax \n\t" // load address of a. "movq %3, %%rbx \n\t" // load address of b. //"movq %9, %%r15 \n\t" // load address of b_next. " \n\t" "addq $32 * 4, %%rbx \n\t" " \n\t" // initialize loop by pre-loading "vmovaps -4 * 32(%%rbx), %%ymm0 \n\t" "vmovaps -3 * 32(%%rbx), %%ymm1 \n\t" " \n\t" "movq %6, %%rcx \n\t" // load address of c "movq %7, %%rdi \n\t" // load rs_c "leaq (,%%rdi,4), %%rdi \n\t" // rs_c *= sizeof(float) " \n\t" "leaq (%%rdi,%%rdi,2), %%r13 \n\t" // r13 = 3*rs_c; "leaq (%%rcx,%%r13,1), %%rdx \n\t" // rdx = c + 3*rs_c; "prefetcht0 7 * 8(%%rcx) \n\t" // prefetch c + 0*rs_c "prefetcht0 7 * 8(%%rcx,%%rdi) \n\t" // prefetch c + 1*rs_c "prefetcht0 7 * 8(%%rcx,%%rdi,2) \n\t" // prefetch c + 2*rs_c "prefetcht0 7 * 8(%%rdx) \n\t" // prefetch c + 3*rs_c "prefetcht0 7 * 8(%%rdx,%%rdi) \n\t" // prefetch c + 4*rs_c "prefetcht0 7 * 8(%%rdx,%%rdi,2) \n\t" // prefetch c + 5*rs_c " \n\t" " \n\t" " \n\t" " \n\t" "movq %0, %%rsi \n\t" // i = k_iter; "testq %%rsi, %%rsi \n\t" // check i via logical AND. "je .SCONSIDKLEFT \n\t" // if i == 0, jump to code that " \n\t" // contains the k_left loop. " \n\t" " \n\t" ".SLOOPKITER: \n\t" // MAIN LOOP " \n\t" " \n\t" " \n\t" // iteration 0 "prefetcht0 64 * 4(%%rax) \n\t" " \n\t" "vbroadcastss 0 * 4(%%rax), %%ymm2 \n\t" "vbroadcastss 1 * 4(%%rax), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm4 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm5 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm6 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm7 \n\t" " \n\t" "vbroadcastss 2 * 4(%%rax), %%ymm2 \n\t" "vbroadcastss 3 * 4(%%rax), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm8 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm9 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm11 \n\t" " \n\t" "vbroadcastss 4 * 4(%%rax), %%ymm2 \n\t" "vbroadcastss 5 * 4(%%rax), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm12 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm13 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm14 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm15 \n\t" " \n\t" "vmovaps -2 * 32(%%rbx), %%ymm0 \n\t" "vmovaps -1 * 32(%%rbx), %%ymm1 \n\t" " \n\t" " \n\t" // iteration 1 "vbroadcastss 6 * 4(%%rax), %%ymm2 \n\t" "vbroadcastss 7 * 4(%%rax), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm4 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm5 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm6 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm7 \n\t" " \n\t" "vbroadcastss 8 * 4(%%rax), %%ymm2 \n\t" "vbroadcastss 9 * 4(%%rax), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm8 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm9 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm11 \n\t" " \n\t" "vbroadcastss 10 * 4(%%rax), %%ymm2 \n\t" "vbroadcastss 11 * 4(%%rax), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm12 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm13 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm14 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm15 \n\t" " \n\t" "vmovaps 0 * 32(%%rbx), %%ymm0 \n\t" "vmovaps 1 * 32(%%rbx), %%ymm1 \n\t" " \n\t" " \n\t" // iteration 2 "prefetcht0 76 * 4(%%rax) \n\t" " \n\t" "vbroadcastss 12 * 4(%%rax), %%ymm2 \n\t" "vbroadcastss 13 * 4(%%rax), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm4 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm5 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm6 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm7 \n\t" " \n\t" "vbroadcastss 14 * 4(%%rax), %%ymm2 \n\t" "vbroadcastss 15 * 4(%%rax), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm8 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm9 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm11 \n\t" " \n\t" "vbroadcastss 16 * 4(%%rax), %%ymm2 \n\t" "vbroadcastss 17 * 4(%%rax), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm12 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm13 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm14 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm15 \n\t" " \n\t" "vmovaps 2 * 32(%%rbx), %%ymm0 \n\t" "vmovaps 3 * 32(%%rbx), %%ymm1 \n\t" " \n\t" " \n\t" // iteration 3 "vbroadcastss 18 * 4(%%rax), %%ymm2 \n\t" "vbroadcastss 19 * 4(%%rax), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm4 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm5 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm6 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm7 \n\t" " \n\t" "vbroadcastss 20 * 4(%%rax), %%ymm2 \n\t" "vbroadcastss 21 * 4(%%rax), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm8 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm9 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm11 \n\t" " \n\t" "vbroadcastss 22 * 4(%%rax), %%ymm2 \n\t" "vbroadcastss 23 * 4(%%rax), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm12 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm13 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm14 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm15 \n\t" " \n\t" "addq $4 * 6 * 4, %%rax \n\t" // a += 4*6 (unroll x mr) "addq $4 * 16 * 4, %%rbx \n\t" // b += 4*16 (unroll x nr) " \n\t" "vmovaps -4 * 32(%%rbx), %%ymm0 \n\t" "vmovaps -3 * 32(%%rbx), %%ymm1 \n\t" " \n\t" " \n\t" "decq %%rsi \n\t" // i -= 1; "jne .SLOOPKITER \n\t" // iterate again if i != 0. " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" ".SCONSIDKLEFT: \n\t" " \n\t" "movq %1, %%rsi \n\t" // i = k_left; "testq %%rsi, %%rsi \n\t" // check i via logical AND. "je .SPOSTACCUM \n\t" // if i == 0, we're done; jump to end. " \n\t" // else, we prepare to enter k_left loop. " \n\t" " \n\t" ".SLOOPKLEFT: \n\t" // EDGE LOOP " \n\t" "prefetcht0 64 * 4(%%rax) \n\t" " \n\t" "vbroadcastss 0 * 4(%%rax), %%ymm2 \n\t" "vbroadcastss 1 * 4(%%rax), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm4 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm5 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm6 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm7 \n\t" " \n\t" "vbroadcastss 2 * 4(%%rax), %%ymm2 \n\t" "vbroadcastss 3 * 4(%%rax), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm8 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm9 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm11 \n\t" " \n\t" "vbroadcastss 4 * 4(%%rax), %%ymm2 \n\t" "vbroadcastss 5 * 4(%%rax), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm12 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm13 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm14 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm15 \n\t" " \n\t" "addq $1 * 6 * 4, %%rax \n\t" // a += 1*6 (unroll x mr) "addq $1 * 16 * 4, %%rbx \n\t" // b += 1*16 (unroll x nr) " \n\t" "vmovaps -4 * 32(%%rbx), %%ymm0 \n\t" "vmovaps -3 * 32(%%rbx), %%ymm1 \n\t" " \n\t" " \n\t" "decq %%rsi \n\t" // i -= 1; "jne .SLOOPKLEFT \n\t" // iterate again if i != 0. " \n\t" " \n\t" " \n\t" ".SPOSTACCUM: \n\t" " \n\t" " \n\t" " \n\t" " \n\t" "movq %4, %%rax \n\t" // load address of alpha "movq %5, %%rbx \n\t" // load address of beta "vbroadcastss (%%rax), %%ymm0 \n\t" // load alpha and duplicate "vbroadcastss (%%rbx), %%ymm3 \n\t" // load beta and duplicate " \n\t" "vmulps %%ymm0, %%ymm4, %%ymm4 \n\t" // scale by alpha "vmulps %%ymm0, %%ymm5, %%ymm5 \n\t" "vmulps %%ymm0, %%ymm6, %%ymm6 \n\t" "vmulps %%ymm0, %%ymm7, %%ymm7 \n\t" "vmulps %%ymm0, %%ymm8, %%ymm8 \n\t" "vmulps %%ymm0, %%ymm9, %%ymm9 \n\t" "vmulps %%ymm0, %%ymm10, %%ymm10 \n\t" "vmulps %%ymm0, %%ymm11, %%ymm11 \n\t" "vmulps %%ymm0, %%ymm12, %%ymm12 \n\t" "vmulps %%ymm0, %%ymm13, %%ymm13 \n\t" "vmulps %%ymm0, %%ymm14, %%ymm14 \n\t" "vmulps %%ymm0, %%ymm15, %%ymm15 \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" "movq %8, %%rsi \n\t" // load cs_c "leaq (,%%rsi,4), %%rsi \n\t" // rsi = cs_c * sizeof(float) " \n\t" "leaq (%%rcx,%%rsi,8), %%rdx \n\t" // load address of c + 8*cs_c; " \n\t" "leaq (%%rsi,%%rsi,2), %%r13 \n\t" // r13 = 3*cs_c; "leaq (%%rsi,%%rsi,4), %%r15 \n\t" // r15 = 5*cs_c; "leaq (%%r13,%%rsi,4), %%r10 \n\t" // r10 = 7*cs_c; " \n\t" " \n\t" " \n\t" // now avoid loading C if beta == 0 " \n\t" "vxorps %%ymm0, %%ymm0, %%ymm0 \n\t" // set ymm0 to zero. "vucomiss %%xmm0, %%xmm3 \n\t" // set ZF if beta == 0. "je .SBETAZERO \n\t" // if ZF = 1, jump to beta == 0 case " \n\t" " \n\t" "cmpq $4, %%rsi \n\t" // set ZF if (4*cs_c) == 4. "jz .SROWSTORED \n\t" // jump to row storage case " \n\t" " \n\t" " \n\t" ".SGENSTORED: \n\t" " \n\t" " \n\t" SGEMM_INPUT_GS_BETA_NZ "vfmadd213ps %%ymm4, %%ymm3, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" SGEMM_INPUT_GS_BETA_NZ "vfmadd213ps %%ymm6, %%ymm3, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" SGEMM_INPUT_GS_BETA_NZ "vfmadd213ps %%ymm8, %%ymm3, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" SGEMM_INPUT_GS_BETA_NZ "vfmadd213ps %%ymm10, %%ymm3, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" SGEMM_INPUT_GS_BETA_NZ "vfmadd213ps %%ymm12, %%ymm3, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" SGEMM_INPUT_GS_BETA_NZ "vfmadd213ps %%ymm14, %%ymm3, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ //"addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" "movq %%rdx, %%rcx \n\t" // rcx = c + 8*cs_c " \n\t" " \n\t" SGEMM_INPUT_GS_BETA_NZ "vfmadd213ps %%ymm5, %%ymm3, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" SGEMM_INPUT_GS_BETA_NZ "vfmadd213ps %%ymm7, %%ymm3, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" SGEMM_INPUT_GS_BETA_NZ "vfmadd213ps %%ymm9, %%ymm3, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" SGEMM_INPUT_GS_BETA_NZ "vfmadd213ps %%ymm11, %%ymm3, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" SGEMM_INPUT_GS_BETA_NZ "vfmadd213ps %%ymm13, %%ymm3, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" SGEMM_INPUT_GS_BETA_NZ "vfmadd213ps %%ymm15, %%ymm3, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ //"addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" " \n\t" "jmp .SDONE \n\t" // jump to end. " \n\t" " \n\t" " \n\t" ".SROWSTORED: \n\t" " \n\t" " \n\t" "vfmadd231ps (%%rcx), %%ymm3, %%ymm4 \n\t" "vmovups %%ymm4, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vfmadd231ps (%%rdx), %%ymm3, %%ymm5 \n\t" "vmovups %%ymm5, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" " \n\t" " \n\t" "vfmadd231ps (%%rcx), %%ymm3, %%ymm6 \n\t" "vmovups %%ymm6, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vfmadd231ps (%%rdx), %%ymm3, %%ymm7 \n\t" "vmovups %%ymm7, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" " \n\t" " \n\t" "vfmadd231ps (%%rcx), %%ymm3, %%ymm8 \n\t" "vmovups %%ymm8, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vfmadd231ps (%%rdx), %%ymm3, %%ymm9 \n\t" "vmovups %%ymm9, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" " \n\t" " \n\t" "vfmadd231ps (%%rcx), %%ymm3, %%ymm10 \n\t" "vmovups %%ymm10, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vfmadd231ps (%%rdx), %%ymm3, %%ymm11 \n\t" "vmovups %%ymm11, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" " \n\t" " \n\t" "vfmadd231ps (%%rcx), %%ymm3, %%ymm12 \n\t" "vmovups %%ymm12, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vfmadd231ps (%%rdx), %%ymm3, %%ymm13 \n\t" "vmovups %%ymm13, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" " \n\t" " \n\t" "vfmadd231ps (%%rcx), %%ymm3, %%ymm14 \n\t" "vmovups %%ymm14, (%%rcx) \n\t" //"addq %%rdi, %%rcx \n\t" "vfmadd231ps (%%rdx), %%ymm3, %%ymm15 \n\t" "vmovups %%ymm15, (%%rdx) \n\t" //"addq %%rdi, %%rdx \n\t" " \n\t" " \n\t" " \n\t" "jmp .SDONE \n\t" // jump to end. " \n\t" " \n\t" " \n\t" ".SBETAZERO: \n\t" " \n\t" "cmpq $4, %%rsi \n\t" // set ZF if (4*cs_c) == 4. "jz .SROWSTORBZ \n\t" // jump to row storage case " \n\t" " \n\t" " \n\t" ".SGENSTORBZ: \n\t" " \n\t" " \n\t" "vmovaps %%ymm4, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" "vmovaps %%ymm6, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" "vmovaps %%ymm8, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" "vmovaps %%ymm10, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" "vmovaps %%ymm12, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" "vmovaps %%ymm14, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ //"addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" "movq %%rdx, %%rcx \n\t" // rcx = c + 8*cs_c " \n\t" " \n\t" "vmovaps %%ymm5, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" "vmovaps %%ymm7, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" "vmovaps %%ymm9, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" "vmovaps %%ymm11, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" "vmovaps %%ymm13, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" "vmovaps %%ymm15, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ //"addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" " \n\t" "jmp .SDONE \n\t" // jump to end. " \n\t" " \n\t" " \n\t" ".SROWSTORBZ: \n\t" " \n\t" " \n\t" "vmovups %%ymm4, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vmovups %%ymm5, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" " \n\t" "vmovups %%ymm6, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vmovups %%ymm7, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" " \n\t" " \n\t" "vmovups %%ymm8, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vmovups %%ymm9, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" " \n\t" " \n\t" "vmovups %%ymm10, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vmovups %%ymm11, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" " \n\t" " \n\t" "vmovups %%ymm12, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vmovups %%ymm13, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" " \n\t" " \n\t" "vmovups %%ymm14, (%%rcx) \n\t" //"addq %%rdi, %%rcx \n\t" "vmovups %%ymm15, (%%rdx) \n\t" //"addq %%rdi, %%rdx \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" ".SDONE: \n\t" " \n\t" "vzeroupper \n\t" " \n\t" : // output operands (none) : // input operands "m" (k_iter), // 0 "m" (k_left), // 1 "m" (a), // 2 "m" (b), // 3 "m" (alpha), // 4 "m" (beta), // 5 "m" (c), // 6 "m" (rs_c), // 7 "m" (cs_c)/*, // 8 "m" (b_next), // 9 "m" (a_next)*/ // 10 : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ); } #define DGEMM_INPUT_GS_BETA_NZ \ "vmovlpd (%%rcx ), %%xmm0, %%xmm0 \n\t" \ "vmovhpd (%%rcx,%%rsi,1), %%xmm0, %%xmm0 \n\t" \ "vmovlpd (%%rcx,%%rsi,2), %%xmm1, %%xmm1 \n\t" \ "vmovhpd (%%rcx,%%r13 ), %%xmm1, %%xmm1 \n\t" \ "vperm2f128 $0x20, %%ymm1, %%ymm0, %%ymm0 \n\t" /*\ "vmovlpd (%%rcx,%%rsi,4), %%xmm2, %%xmm2 \n\t" \ "vmovhpd (%%rcx,%%r15 ), %%xmm2, %%xmm2 \n\t" \ "vmovlpd (%%rcx,%%r13,2), %%xmm1, %%xmm1 \n\t" \ "vmovhpd (%%rcx,%%r10 ), %%xmm1, %%xmm1 \n\t" \ "vperm2f128 $0x20, %%ymm1, %%ymm2, %%ymm2 \n\t"*/ #define DGEMM_OUTPUT_GS_BETA_NZ \ "vextractf128 $1, %%ymm0, %%xmm1 \n\t" \ "vmovlpd %%xmm0, (%%rcx ) \n\t" \ "vmovhpd %%xmm0, (%%rcx,%%rsi ) \n\t" \ "vmovlpd %%xmm1, (%%rcx,%%rsi,2) \n\t" \ "vmovhpd %%xmm1, (%%rcx,%%r13 ) \n\t" /*\ "vextractf128 $1, %%ymm2, %%xmm1 \n\t" \ "vmovlpd %%xmm2, (%%rcx,%%rsi,4) \n\t" \ "vmovhpd %%xmm2, (%%rcx,%%r15 ) \n\t" \ "vmovlpd %%xmm1, (%%rcx,%%r13,2) \n\t" \ "vmovhpd %%xmm1, (%%rcx,%%r10 ) \n\t"*/ void bli_dgemm_haswell_asm_6x8 ( dim_t k0, double* restrict alpha, double* restrict a, double* restrict b, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; __asm__ volatile ( " \n\t" "vzeroall \n\t" // zero all xmm/ymm registers. " \n\t" " \n\t" "movq %2, %%rax \n\t" // load address of a. "movq %3, %%rbx \n\t" // load address of b. //"movq %9, %%r15 \n\t" // load address of b_next. " \n\t" "addq $32 * 4, %%rbx \n\t" " \n\t" // initialize loop by pre-loading "vmovapd -4 * 32(%%rbx), %%ymm0 \n\t" "vmovapd -3 * 32(%%rbx), %%ymm1 \n\t" " \n\t" "movq %6, %%rcx \n\t" // load address of c "movq %7, %%rdi \n\t" // load rs_c "leaq (,%%rdi,8), %%rdi \n\t" // rs_c *= sizeof(double) " \n\t" "leaq (%%rdi,%%rdi,2), %%r13 \n\t" // r13 = 3*rs_c; "leaq (%%rcx,%%r13,1), %%rdx \n\t" // rdx = c + 3*rs_c; "prefetcht0 7 * 8(%%rcx) \n\t" // prefetch c + 0*rs_c "prefetcht0 7 * 8(%%rcx,%%rdi) \n\t" // prefetch c + 1*rs_c "prefetcht0 7 * 8(%%rcx,%%rdi,2) \n\t" // prefetch c + 2*rs_c "prefetcht0 7 * 8(%%rdx) \n\t" // prefetch c + 3*rs_c "prefetcht0 7 * 8(%%rdx,%%rdi) \n\t" // prefetch c + 4*rs_c "prefetcht0 7 * 8(%%rdx,%%rdi,2) \n\t" // prefetch c + 5*rs_c " \n\t" " \n\t" " \n\t" " \n\t" "movq %0, %%rsi \n\t" // i = k_iter; "testq %%rsi, %%rsi \n\t" // check i via logical AND. "je .DCONSIDKLEFT \n\t" // if i == 0, jump to code that " \n\t" // contains the k_left loop. " \n\t" " \n\t" ".DLOOPKITER: \n\t" // MAIN LOOP " \n\t" " \n\t" " \n\t" // iteration 0 "prefetcht0 64 * 8(%%rax) \n\t" " \n\t" "vbroadcastsd 0 * 8(%%rax), %%ymm2 \n\t" "vbroadcastsd 1 * 8(%%rax), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm4 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm5 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm6 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm7 \n\t" " \n\t" "vbroadcastsd 2 * 8(%%rax), %%ymm2 \n\t" "vbroadcastsd 3 * 8(%%rax), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm8 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm9 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm11 \n\t" " \n\t" "vbroadcastsd 4 * 8(%%rax), %%ymm2 \n\t" "vbroadcastsd 5 * 8(%%rax), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm12 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm13 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm14 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm15 \n\t" " \n\t" "vmovapd -2 * 32(%%rbx), %%ymm0 \n\t" "vmovapd -1 * 32(%%rbx), %%ymm1 \n\t" " \n\t" " \n\t" // iteration 1 "vbroadcastsd 6 * 8(%%rax), %%ymm2 \n\t" "vbroadcastsd 7 * 8(%%rax), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm4 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm5 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm6 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm7 \n\t" " \n\t" "vbroadcastsd 8 * 8(%%rax), %%ymm2 \n\t" "vbroadcastsd 9 * 8(%%rax), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm8 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm9 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm11 \n\t" " \n\t" "vbroadcastsd 10 * 8(%%rax), %%ymm2 \n\t" "vbroadcastsd 11 * 8(%%rax), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm12 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm13 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm14 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm15 \n\t" " \n\t" "vmovapd 0 * 32(%%rbx), %%ymm0 \n\t" "vmovapd 1 * 32(%%rbx), %%ymm1 \n\t" " \n\t" " \n\t" // iteration 2 "prefetcht0 76 * 8(%%rax) \n\t" " \n\t" "vbroadcastsd 12 * 8(%%rax), %%ymm2 \n\t" "vbroadcastsd 13 * 8(%%rax), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm4 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm5 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm6 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm7 \n\t" " \n\t" "vbroadcastsd 14 * 8(%%rax), %%ymm2 \n\t" "vbroadcastsd 15 * 8(%%rax), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm8 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm9 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm11 \n\t" " \n\t" "vbroadcastsd 16 * 8(%%rax), %%ymm2 \n\t" "vbroadcastsd 17 * 8(%%rax), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm12 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm13 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm14 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm15 \n\t" " \n\t" "vmovapd 2 * 32(%%rbx), %%ymm0 \n\t" "vmovapd 3 * 32(%%rbx), %%ymm1 \n\t" " \n\t" " \n\t" // iteration 3 "vbroadcastsd 18 * 8(%%rax), %%ymm2 \n\t" "vbroadcastsd 19 * 8(%%rax), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm4 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm5 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm6 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm7 \n\t" " \n\t" "vbroadcastsd 20 * 8(%%rax), %%ymm2 \n\t" "vbroadcastsd 21 * 8(%%rax), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm8 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm9 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm11 \n\t" " \n\t" "vbroadcastsd 22 * 8(%%rax), %%ymm2 \n\t" "vbroadcastsd 23 * 8(%%rax), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm12 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm13 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm14 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm15 \n\t" " \n\t" "addq $4 * 6 * 8, %%rax \n\t" // a += 4*6 (unroll x mr) "addq $4 * 8 * 8, %%rbx \n\t" // b += 4*8 (unroll x nr) " \n\t" "vmovapd -4 * 32(%%rbx), %%ymm0 \n\t" "vmovapd -3 * 32(%%rbx), %%ymm1 \n\t" " \n\t" " \n\t" "decq %%rsi \n\t" // i -= 1; "jne .DLOOPKITER \n\t" // iterate again if i != 0. " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" ".DCONSIDKLEFT: \n\t" " \n\t" "movq %1, %%rsi \n\t" // i = k_left; "testq %%rsi, %%rsi \n\t" // check i via logical AND. "je .DPOSTACCUM \n\t" // if i == 0, we're done; jump to end. " \n\t" // else, we prepare to enter k_left loop. " \n\t" " \n\t" ".DLOOPKLEFT: \n\t" // EDGE LOOP " \n\t" "prefetcht0 64 * 8(%%rax) \n\t" " \n\t" "vbroadcastsd 0 * 8(%%rax), %%ymm2 \n\t" "vbroadcastsd 1 * 8(%%rax), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm4 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm5 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm6 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm7 \n\t" " \n\t" "vbroadcastsd 2 * 8(%%rax), %%ymm2 \n\t" "vbroadcastsd 3 * 8(%%rax), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm8 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm9 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm11 \n\t" " \n\t" "vbroadcastsd 4 * 8(%%rax), %%ymm2 \n\t" "vbroadcastsd 5 * 8(%%rax), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm12 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm13 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm14 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm15 \n\t" " \n\t" "addq $1 * 6 * 8, %%rax \n\t" // a += 1*6 (unroll x mr) "addq $1 * 8 * 8, %%rbx \n\t" // b += 1*8 (unroll x nr) " \n\t" "vmovapd -4 * 32(%%rbx), %%ymm0 \n\t" "vmovapd -3 * 32(%%rbx), %%ymm1 \n\t" " \n\t" " \n\t" "decq %%rsi \n\t" // i -= 1; "jne .DLOOPKLEFT \n\t" // iterate again if i != 0. " \n\t" " \n\t" " \n\t" ".DPOSTACCUM: \n\t" " \n\t" " \n\t" " \n\t" " \n\t" "movq %4, %%rax \n\t" // load address of alpha "movq %5, %%rbx \n\t" // load address of beta "vbroadcastsd (%%rax), %%ymm0 \n\t" // load alpha and duplicate "vbroadcastsd (%%rbx), %%ymm3 \n\t" // load beta and duplicate " \n\t" "vmulpd %%ymm0, %%ymm4, %%ymm4 \n\t" // scale by alpha "vmulpd %%ymm0, %%ymm5, %%ymm5 \n\t" "vmulpd %%ymm0, %%ymm6, %%ymm6 \n\t" "vmulpd %%ymm0, %%ymm7, %%ymm7 \n\t" "vmulpd %%ymm0, %%ymm8, %%ymm8 \n\t" "vmulpd %%ymm0, %%ymm9, %%ymm9 \n\t" "vmulpd %%ymm0, %%ymm10, %%ymm10 \n\t" "vmulpd %%ymm0, %%ymm11, %%ymm11 \n\t" "vmulpd %%ymm0, %%ymm12, %%ymm12 \n\t" "vmulpd %%ymm0, %%ymm13, %%ymm13 \n\t" "vmulpd %%ymm0, %%ymm14, %%ymm14 \n\t" "vmulpd %%ymm0, %%ymm15, %%ymm15 \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" "movq %8, %%rsi \n\t" // load cs_c "leaq (,%%rsi,8), %%rsi \n\t" // rsi = cs_c * sizeof(double) " \n\t" "leaq (%%rcx,%%rsi,4), %%rdx \n\t" // load address of c + 4*cs_c; " \n\t" "leaq (%%rsi,%%rsi,2), %%r13 \n\t" // r13 = 3*cs_c; //"leaq (%%rsi,%%rsi,4), %%r15 \n\t" // r15 = 5*cs_c; //"leaq (%%r13,%%rsi,4), %%r10 \n\t" // r10 = 7*cs_c; " \n\t" " \n\t" " \n\t" // now avoid loading C if beta == 0 " \n\t" "vxorpd %%ymm0, %%ymm0, %%ymm0 \n\t" // set ymm0 to zero. "vucomisd %%xmm0, %%xmm3 \n\t" // set ZF if beta == 0. "je .DBETAZERO \n\t" // if ZF = 1, jump to beta == 0 case " \n\t" " \n\t" "cmpq $8, %%rsi \n\t" // set ZF if (8*cs_c) == 8. "jz .DROWSTORED \n\t" // jump to row storage case " \n\t" " \n\t" " \n\t" ".DGENSTORED: \n\t" " \n\t" " \n\t" DGEMM_INPUT_GS_BETA_NZ "vfmadd213pd %%ymm4, %%ymm3, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" DGEMM_INPUT_GS_BETA_NZ "vfmadd213pd %%ymm6, %%ymm3, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" DGEMM_INPUT_GS_BETA_NZ "vfmadd213pd %%ymm8, %%ymm3, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" DGEMM_INPUT_GS_BETA_NZ "vfmadd213pd %%ymm10, %%ymm3, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" DGEMM_INPUT_GS_BETA_NZ "vfmadd213pd %%ymm12, %%ymm3, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" DGEMM_INPUT_GS_BETA_NZ "vfmadd213pd %%ymm14, %%ymm3, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ " \n\t" " \n\t" "movq %%rdx, %%rcx \n\t" // rcx = c + 4*cs_c " \n\t" " \n\t" DGEMM_INPUT_GS_BETA_NZ "vfmadd213pd %%ymm5, %%ymm3, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" DGEMM_INPUT_GS_BETA_NZ "vfmadd213pd %%ymm7, %%ymm3, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" DGEMM_INPUT_GS_BETA_NZ "vfmadd213pd %%ymm9, %%ymm3, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" DGEMM_INPUT_GS_BETA_NZ "vfmadd213pd %%ymm11, %%ymm3, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" DGEMM_INPUT_GS_BETA_NZ "vfmadd213pd %%ymm13, %%ymm3, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" DGEMM_INPUT_GS_BETA_NZ "vfmadd213pd %%ymm15, %%ymm3, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ " \n\t" " \n\t" " \n\t" "jmp .DDONE \n\t" // jump to end. " \n\t" " \n\t" " \n\t" ".DROWSTORED: \n\t" " \n\t" " \n\t" "vfmadd231pd (%%rcx), %%ymm3, %%ymm4 \n\t" "vmovupd %%ymm4, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vfmadd231pd (%%rdx), %%ymm3, %%ymm5 \n\t" "vmovupd %%ymm5, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" " \n\t" " \n\t" "vfmadd231pd (%%rcx), %%ymm3, %%ymm6 \n\t" "vmovupd %%ymm6, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vfmadd231pd (%%rdx), %%ymm3, %%ymm7 \n\t" "vmovupd %%ymm7, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" " \n\t" " \n\t" "vfmadd231pd (%%rcx), %%ymm3, %%ymm8 \n\t" "vmovupd %%ymm8, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vfmadd231pd (%%rdx), %%ymm3, %%ymm9 \n\t" "vmovupd %%ymm9, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" " \n\t" " \n\t" "vfmadd231pd (%%rcx), %%ymm3, %%ymm10 \n\t" "vmovupd %%ymm10, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vfmadd231pd (%%rdx), %%ymm3, %%ymm11 \n\t" "vmovupd %%ymm11, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" " \n\t" " \n\t" "vfmadd231pd (%%rcx), %%ymm3, %%ymm12 \n\t" "vmovupd %%ymm12, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vfmadd231pd (%%rdx), %%ymm3, %%ymm13 \n\t" "vmovupd %%ymm13, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" " \n\t" " \n\t" "vfmadd231pd (%%rcx), %%ymm3, %%ymm14 \n\t" "vmovupd %%ymm14, (%%rcx) \n\t" //"addq %%rdi, %%rcx \n\t" "vfmadd231pd (%%rdx), %%ymm3, %%ymm15 \n\t" "vmovupd %%ymm15, (%%rdx) \n\t" //"addq %%rdi, %%rdx \n\t" " \n\t" " \n\t" " \n\t" "jmp .DDONE \n\t" // jump to end. " \n\t" " \n\t" " \n\t" ".DBETAZERO: \n\t" " \n\t" "cmpq $8, %%rsi \n\t" // set ZF if (8*cs_c) == 8. "jz .DROWSTORBZ \n\t" // jump to row storage case " \n\t" " \n\t" " \n\t" ".DGENSTORBZ: \n\t" " \n\t" " \n\t" "vmovapd %%ymm4, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" "vmovapd %%ymm6, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" "vmovapd %%ymm8, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" "vmovapd %%ymm10, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" "vmovapd %%ymm12, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" "vmovapd %%ymm14, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ " \n\t" " \n\t" "movq %%rdx, %%rcx \n\t" // rcx = c + 4*cs_c " \n\t" " \n\t" "vmovapd %%ymm5, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" "vmovapd %%ymm7, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" "vmovapd %%ymm9, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" "vmovapd %%ymm11, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" "vmovapd %%ymm13, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += rs_c; " \n\t" " \n\t" "vmovapd %%ymm15, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ " \n\t" " \n\t" " \n\t" "jmp .DDONE \n\t" // jump to end. " \n\t" " \n\t" " \n\t" ".DROWSTORBZ: \n\t" " \n\t" " \n\t" "vmovupd %%ymm4, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vmovupd %%ymm5, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" " \n\t" "vmovupd %%ymm6, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vmovupd %%ymm7, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" " \n\t" " \n\t" "vmovupd %%ymm8, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vmovupd %%ymm9, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" " \n\t" " \n\t" "vmovupd %%ymm10, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vmovupd %%ymm11, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" " \n\t" " \n\t" "vmovupd %%ymm12, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vmovupd %%ymm13, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" " \n\t" " \n\t" "vmovupd %%ymm14, (%%rcx) \n\t" //"addq %%rdi, %%rcx \n\t" "vmovupd %%ymm15, (%%rdx) \n\t" //"addq %%rdi, %%rdx \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" ".DDONE: \n\t" " \n\t" "vzeroupper \n\t" " \n\t" : // output operands (none) : // input operands "m" (k_iter), // 0 "m" (k_left), // 1 "m" (a), // 2 "m" (b), // 3 "m" (alpha), // 4 "m" (beta), // 5 "m" (c), // 6 "m" (rs_c), // 7 "m" (cs_c)/*, // 8 "m" (b_next), // 9 "m" (a_next)*/ // 10 : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ); } // assumes beta.r, beta.i have been broadcast into ymm1, ymm2. // outputs to ymm0 #define CGEMM_INPUT_SCALE_GS_BETA_NZ \ "vmovlpd (%%rcx ), %%xmm0, %%xmm0 \n\t" \ "vmovhpd (%%rcx,%%rsi,1), %%xmm0, %%xmm0 \n\t" \ "vmovlpd (%%rcx,%%rsi,2), %%xmm3, %%xmm3 \n\t" \ "vmovhpd (%%rcx,%%r13 ), %%xmm3, %%xmm3 \n\t" \ "vinsertf128 $1, %%xmm3, %%ymm0, %%ymm0 \n\t" \ "vpermilps $0xb1, %%ymm0, %%ymm3 \n\t" \ "vmulps %%ymm1, %%ymm0, %%ymm0 \n\t" \ "vmulps %%ymm2, %%ymm3, %%ymm3 \n\t" \ "vaddsubps %%ymm3, %%ymm0, %%ymm0 \n\t" // assumes values to output are in ymm0 #define CGEMM_OUTPUT_GS \ "vextractf128 $1, %%ymm0, %%xmm3 \n\t" \ "vmovlpd %%xmm0, (%%rcx ) \n\t" \ "vmovhpd %%xmm0, (%%rcx,%%rsi,1) \n\t" \ "vmovlpd %%xmm3, (%%rcx,%%rsi,2) \n\t" \ "vmovhpd %%xmm3, (%%rcx,%%r13 ) \n\t" #define CGEMM_INPUT_SCALE_RS_BETA_NZ \ "vmovups (%%rcx), %%ymm0 \n\t" \ "vpermilps $0xb1, %%ymm0, %%ymm3 \n\t" \ "vmulps %%ymm1, %%ymm0, %%ymm0 \n\t" \ "vmulps %%ymm2, %%ymm3, %%ymm3 \n\t" \ "vaddsubps %%ymm3, %%ymm0, %%ymm0 \n\t" #define CGEMM_OUTPUT_RS \ "vmovups %%ymm0, (%%rcx) \n\t" \ void bli_cgemm_haswell_asm_3x8 ( dim_t k0, scomplex* restrict alpha, scomplex* restrict a, scomplex* restrict b, scomplex* restrict beta, scomplex* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; __asm__ volatile ( " \n\t" "vzeroall \n\t" // zero all xmm/ymm registers. " \n\t" " \n\t" "movq %2, %%rax \n\t" // load address of a. "movq %3, %%rbx \n\t" // load address of b. //"movq %9, %%r15 \n\t" // load address of b_next. " \n\t" "addq $32 * 4, %%rbx \n\t" " \n\t" // initialize loop by pre-loading "vmovaps -4 * 32(%%rbx), %%ymm0 \n\t" "vmovaps -3 * 32(%%rbx), %%ymm1 \n\t" " \n\t" "movq %6, %%rcx \n\t" // load address of c "movq %7, %%rdi \n\t" // load rs_c "leaq (,%%rdi,8), %%rdi \n\t" // rs_c *= sizeof(scomplex) " \n\t" "leaq (%%rcx,%%rdi,1), %%r11 \n\t" // r11 = c + 1*rs_c; "leaq (%%rcx,%%rdi,2), %%r12 \n\t" // r12 = c + 2*rs_c; " \n\t" "prefetcht0 7 * 8(%%rcx) \n\t" // prefetch c + 0*rs_c "prefetcht0 7 * 8(%%r11) \n\t" // prefetch c + 1*rs_c "prefetcht0 7 * 8(%%r12) \n\t" // prefetch c + 2*rs_c " \n\t" " \n\t" " \n\t" " \n\t" "movq %0, %%rsi \n\t" // i = k_iter; "testq %%rsi, %%rsi \n\t" // check i via logical AND. "je .CCONSIDKLEFT \n\t" // if i == 0, jump to code that " \n\t" // contains the k_left loop. " \n\t" " \n\t" ".CLOOPKITER: \n\t" // MAIN LOOP " \n\t" " \n\t" " \n\t" // iteration 0 "prefetcht0 32 * 8(%%rax) \n\t" " \n\t" "vbroadcastss 0 * 4(%%rax), %%ymm2 \n\t" "vbroadcastss 1 * 4(%%rax), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm4 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm5 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm6 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm7 \n\t" " \n\t" "vbroadcastss 2 * 4(%%rax), %%ymm2 \n\t" "vbroadcastss 3 * 4(%%rax), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm8 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm9 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm11 \n\t" " \n\t" "vbroadcastss 4 * 4(%%rax), %%ymm2 \n\t" "vbroadcastss 5 * 4(%%rax), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm12 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm13 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm14 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm15 \n\t" " \n\t" "vmovaps -2 * 32(%%rbx), %%ymm0 \n\t" "vmovaps -1 * 32(%%rbx), %%ymm1 \n\t" " \n\t" " \n\t" // iteration 1 "vbroadcastss 6 * 4(%%rax), %%ymm2 \n\t" "vbroadcastss 7 * 4(%%rax), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm4 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm5 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm6 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm7 \n\t" " \n\t" "vbroadcastss 8 * 4(%%rax), %%ymm2 \n\t" "vbroadcastss 9 * 4(%%rax), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm8 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm9 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm11 \n\t" " \n\t" "vbroadcastss 10 * 4(%%rax), %%ymm2 \n\t" "vbroadcastss 11 * 4(%%rax), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm12 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm13 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm14 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm15 \n\t" " \n\t" "vmovaps 0 * 32(%%rbx), %%ymm0 \n\t" "vmovaps 1 * 32(%%rbx), %%ymm1 \n\t" " \n\t" " \n\t" // iteration 2 "prefetcht0 38 * 8(%%rax) \n\t" " \n\t" "vbroadcastss 12 * 4(%%rax), %%ymm2 \n\t" "vbroadcastss 13 * 4(%%rax), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm4 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm5 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm6 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm7 \n\t" " \n\t" "vbroadcastss 14 * 4(%%rax), %%ymm2 \n\t" "vbroadcastss 15 * 4(%%rax), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm8 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm9 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm11 \n\t" " \n\t" "vbroadcastss 16 * 4(%%rax), %%ymm2 \n\t" "vbroadcastss 17 * 4(%%rax), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm12 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm13 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm14 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm15 \n\t" " \n\t" "vmovaps 2 * 32(%%rbx), %%ymm0 \n\t" "vmovaps 3 * 32(%%rbx), %%ymm1 \n\t" " \n\t" " \n\t" // iteration 3 "vbroadcastss 18 * 4(%%rax), %%ymm2 \n\t" "vbroadcastss 19 * 4(%%rax), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm4 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm5 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm6 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm7 \n\t" " \n\t" "vbroadcastss 20 * 4(%%rax), %%ymm2 \n\t" "vbroadcastss 21 * 4(%%rax), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm8 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm9 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm11 \n\t" " \n\t" "vbroadcastss 22 * 4(%%rax), %%ymm2 \n\t" "vbroadcastss 23 * 4(%%rax), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm12 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm13 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm14 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm15 \n\t" " \n\t" "addq $4 * 3 * 8, %%rax \n\t" // a += 4*3 (unroll x mr) "addq $4 * 8 * 8, %%rbx \n\t" // b += 4*8 (unroll x nr) " \n\t" "vmovaps -4 * 32(%%rbx), %%ymm0 \n\t" "vmovaps -3 * 32(%%rbx), %%ymm1 \n\t" " \n\t" " \n\t" "decq %%rsi \n\t" // i -= 1; "jne .CLOOPKITER \n\t" // iterate again if i != 0. " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" ".CCONSIDKLEFT: \n\t" " \n\t" "movq %1, %%rsi \n\t" // i = k_left; "testq %%rsi, %%rsi \n\t" // check i via logical AND. "je .CPOSTACCUM \n\t" // if i == 0, we're done; jump to end. " \n\t" // else, we prepare to enter k_left loop. " \n\t" " \n\t" ".CLOOPKLEFT: \n\t" // EDGE LOOP " \n\t" "prefetcht0 32 * 8(%%rax) \n\t" " \n\t" "vbroadcastss 0 * 4(%%rax), %%ymm2 \n\t" "vbroadcastss 1 * 4(%%rax), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm4 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm5 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm6 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm7 \n\t" " \n\t" "vbroadcastss 2 * 4(%%rax), %%ymm2 \n\t" "vbroadcastss 3 * 4(%%rax), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm8 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm9 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm11 \n\t" " \n\t" "vbroadcastss 4 * 4(%%rax), %%ymm2 \n\t" "vbroadcastss 5 * 4(%%rax), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm12 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm13 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm14 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm15 \n\t" " \n\t" "addq $1 * 3 * 8, %%rax \n\t" // a += 1*3 (unroll x mr) "addq $1 * 8 * 8, %%rbx \n\t" // b += 1*8 (unroll x nr) " \n\t" "vmovaps -4 * 32(%%rbx), %%ymm0 \n\t" "vmovaps -3 * 32(%%rbx), %%ymm1 \n\t" " \n\t" " \n\t" "decq %%rsi \n\t" // i -= 1; "jne .CLOOPKLEFT \n\t" // iterate again if i != 0. " \n\t" " \n\t" " \n\t" ".CPOSTACCUM: \n\t" " \n\t" " \n\t" " \n\t" // permute even and odd elements " \n\t" // of ymm6/7, ymm10/11, ymm/14/15 "vpermilps $0xb1, %%ymm6, %%ymm6 \n\t" "vpermilps $0xb1, %%ymm7, %%ymm7 \n\t" "vpermilps $0xb1, %%ymm10, %%ymm10 \n\t" "vpermilps $0xb1, %%ymm11, %%ymm11 \n\t" "vpermilps $0xb1, %%ymm14, %%ymm14 \n\t" "vpermilps $0xb1, %%ymm15, %%ymm15 \n\t" " \n\t" " \n\t" " \n\t" // subtract/add even/odd elements "vaddsubps %%ymm6, %%ymm4, %%ymm4 \n\t" "vaddsubps %%ymm7, %%ymm5, %%ymm5 \n\t" " \n\t" "vaddsubps %%ymm10, %%ymm8, %%ymm8 \n\t" "vaddsubps %%ymm11, %%ymm9, %%ymm9 \n\t" " \n\t" "vaddsubps %%ymm14, %%ymm12, %%ymm12 \n\t" "vaddsubps %%ymm15, %%ymm13, %%ymm13 \n\t" " \n\t" " \n\t" " \n\t" " \n\t" "movq %4, %%rax \n\t" // load address of alpha "vbroadcastss (%%rax), %%ymm0 \n\t" // load alpha_r and duplicate "vbroadcastss 4(%%rax), %%ymm1 \n\t" // load alpha_i and duplicate " \n\t" " \n\t" "vpermilps $0xb1, %%ymm4, %%ymm3 \n\t" "vmulps %%ymm0, %%ymm4, %%ymm4 \n\t" "vmulps %%ymm1, %%ymm3, %%ymm3 \n\t" "vaddsubps %%ymm3, %%ymm4, %%ymm4 \n\t" " \n\t" "vpermilps $0xb1, %%ymm5, %%ymm3 \n\t" "vmulps %%ymm0, %%ymm5, %%ymm5 \n\t" "vmulps %%ymm1, %%ymm3, %%ymm3 \n\t" "vaddsubps %%ymm3, %%ymm5, %%ymm5 \n\t" " \n\t" " \n\t" "vpermilps $0xb1, %%ymm8, %%ymm3 \n\t" "vmulps %%ymm0, %%ymm8, %%ymm8 \n\t" "vmulps %%ymm1, %%ymm3, %%ymm3 \n\t" "vaddsubps %%ymm3, %%ymm8, %%ymm8 \n\t" " \n\t" "vpermilps $0xb1, %%ymm9, %%ymm3 \n\t" "vmulps %%ymm0, %%ymm9, %%ymm9 \n\t" "vmulps %%ymm1, %%ymm3, %%ymm3 \n\t" "vaddsubps %%ymm3, %%ymm9, %%ymm9 \n\t" " \n\t" " \n\t" "vpermilps $0xb1, %%ymm12, %%ymm3 \n\t" "vmulps %%ymm0, %%ymm12, %%ymm12 \n\t" "vmulps %%ymm1, %%ymm3, %%ymm3 \n\t" "vaddsubps %%ymm3, %%ymm12, %%ymm12 \n\t" " \n\t" "vpermilps $0xb1, %%ymm13, %%ymm3 \n\t" "vmulps %%ymm0, %%ymm13, %%ymm13 \n\t" "vmulps %%ymm1, %%ymm3, %%ymm3 \n\t" "vaddsubps %%ymm3, %%ymm13, %%ymm13 \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" "movq %5, %%rbx \n\t" // load address of beta "vbroadcastss (%%rbx), %%ymm1 \n\t" // load beta_r and duplicate "vbroadcastss 4(%%rbx), %%ymm2 \n\t" // load beta_i and duplicate " \n\t" " \n\t" " \n\t" " \n\t" "movq %8, %%rsi \n\t" // load cs_c "leaq (,%%rsi,8), %%rsi \n\t" // rsi = cs_c * sizeof(scomplex) "leaq (,%%rsi,4), %%rdx \n\t" // rdx = 4*cs_c; "leaq (%%rsi,%%rsi,2), %%r13 \n\t" // r13 = 3*cs_c; " \n\t" " \n\t" " \n\t" " \n\t" // now avoid loading C if beta == 0 "vxorps %%ymm0, %%ymm0, %%ymm0 \n\t" // set ymm0 to zero. "vucomiss %%xmm0, %%xmm1 \n\t" // set ZF if beta_r == 0. "sete %%r8b \n\t" // r8b = ( ZF == 1 ? 1 : 0 ); "vucomiss %%xmm0, %%xmm2 \n\t" // set ZF if beta_i == 0. "sete %%r9b \n\t" // r9b = ( ZF == 1 ? 1 : 0 ); "andb %%r8b, %%r9b \n\t" // set ZF if r8b & r9b == 1. "jne .CBETAZERO \n\t" // if ZF = 1, jump to beta == 0 case " \n\t" " \n\t" "cmpq $8, %%rsi \n\t" // set ZF if (8*cs_c) == 8. "jz .CROWSTORED \n\t" // jump to row storage case " \n\t" " \n\t" " \n\t" ".CGENSTORED: \n\t" " \n\t" " \n\t" CGEMM_INPUT_SCALE_GS_BETA_NZ "vaddps %%ymm4, %%ymm0, %%ymm0 \n\t" CGEMM_OUTPUT_GS "addq %%rdx, %%rcx \n\t" // c += 4*cs_c; " \n\t" " \n\t" CGEMM_INPUT_SCALE_GS_BETA_NZ "vaddps %%ymm5, %%ymm0, %%ymm0 \n\t" CGEMM_OUTPUT_GS "movq %%r11, %%rcx \n\t" // rcx = c + 1*rs_c " \n\t" " \n\t" " \n\t" CGEMM_INPUT_SCALE_GS_BETA_NZ "vaddps %%ymm8, %%ymm0, %%ymm0 \n\t" CGEMM_OUTPUT_GS "addq %%rdx, %%rcx \n\t" // c += 4*cs_c; " \n\t" " \n\t" CGEMM_INPUT_SCALE_GS_BETA_NZ "vaddps %%ymm9, %%ymm0, %%ymm0 \n\t" CGEMM_OUTPUT_GS "movq %%r12, %%rcx \n\t" // rcx = c + 2*rs_c " \n\t" " \n\t" " \n\t" CGEMM_INPUT_SCALE_GS_BETA_NZ "vaddps %%ymm12, %%ymm0, %%ymm0 \n\t" CGEMM_OUTPUT_GS "addq %%rdx, %%rcx \n\t" // c += 4*cs_c; " \n\t" " \n\t" CGEMM_INPUT_SCALE_GS_BETA_NZ "vaddps %%ymm13, %%ymm0, %%ymm0 \n\t" CGEMM_OUTPUT_GS " \n\t" " \n\t" " \n\t" "jmp .CDONE \n\t" // jump to end. " \n\t" " \n\t" " \n\t" ".CROWSTORED: \n\t" " \n\t" " \n\t" CGEMM_INPUT_SCALE_RS_BETA_NZ "vaddps %%ymm4, %%ymm0, %%ymm0 \n\t" CGEMM_OUTPUT_RS "addq %%rdx, %%rcx \n\t" // c += 4*cs_c; " \n\t" " \n\t" CGEMM_INPUT_SCALE_RS_BETA_NZ "vaddps %%ymm5, %%ymm0, %%ymm0 \n\t" CGEMM_OUTPUT_RS "movq %%r11, %%rcx \n\t" // rcx = c + 1*rs_c " \n\t" " \n\t" " \n\t" CGEMM_INPUT_SCALE_RS_BETA_NZ "vaddps %%ymm8, %%ymm0, %%ymm0 \n\t" CGEMM_OUTPUT_RS "addq %%rdx, %%rcx \n\t" // c += 4*cs_c; " \n\t" " \n\t" CGEMM_INPUT_SCALE_RS_BETA_NZ "vaddps %%ymm9, %%ymm0, %%ymm0 \n\t" CGEMM_OUTPUT_RS "movq %%r12, %%rcx \n\t" // rcx = c + 2*rs_c " \n\t" " \n\t" " \n\t" CGEMM_INPUT_SCALE_RS_BETA_NZ "vaddps %%ymm12, %%ymm0, %%ymm0 \n\t" CGEMM_OUTPUT_RS "addq %%rdx, %%rcx \n\t" // c += 4*cs_c; " \n\t" " \n\t" CGEMM_INPUT_SCALE_RS_BETA_NZ "vaddps %%ymm13, %%ymm0, %%ymm0 \n\t" CGEMM_OUTPUT_RS " \n\t" " \n\t" " \n\t" "jmp .CDONE \n\t" // jump to end. " \n\t" " \n\t" " \n\t" ".CBETAZERO: \n\t" " \n\t" "cmpq $8, %%rsi \n\t" // set ZF if (8*cs_c) == 8. "jz .CROWSTORBZ \n\t" // jump to row storage case " \n\t" " \n\t" " \n\t" ".CGENSTORBZ: \n\t" " \n\t" " \n\t" "vmovaps %%ymm4, %%ymm0 \n\t" CGEMM_OUTPUT_GS "addq %%rdx, %%rcx \n\t" // c += 2*cs_c; " \n\t" " \n\t" "vmovaps %%ymm5, %%ymm0 \n\t" CGEMM_OUTPUT_GS "movq %%r11, %%rcx \n\t" // rcx = c + 1*rs_c " \n\t" " \n\t" " \n\t" "vmovaps %%ymm8, %%ymm0 \n\t" CGEMM_OUTPUT_GS "addq %%rdx, %%rcx \n\t" // c += 2*cs_c; " \n\t" " \n\t" "vmovaps %%ymm9, %%ymm0 \n\t" CGEMM_OUTPUT_GS "movq %%r12, %%rcx \n\t" // rcx = c + 2*rs_c " \n\t" " \n\t" " \n\t" "vmovaps %%ymm12, %%ymm0 \n\t" CGEMM_OUTPUT_GS "addq %%rdx, %%rcx \n\t" // c += 2*cs_c; " \n\t" " \n\t" "vmovaps %%ymm13, %%ymm0 \n\t" CGEMM_OUTPUT_GS " \n\t" " \n\t" " \n\t" "jmp .CDONE \n\t" // jump to end. " \n\t" " \n\t" " \n\t" ".CROWSTORBZ: \n\t" " \n\t" " \n\t" "vmovups %%ymm4, (%%rcx) \n\t" "vmovups %%ymm5, (%%rcx,%%rdx,1) \n\t" " \n\t" "vmovups %%ymm8, (%%r11) \n\t" "vmovups %%ymm9, (%%r11,%%rdx,1) \n\t" " \n\t" "vmovups %%ymm12, (%%r12) \n\t" "vmovups %%ymm13, (%%r12,%%rdx,1) \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" ".CDONE: \n\t" " \n\t" "vzeroupper \n\t" " \n\t" : // output operands (none) : // input operands "m" (k_iter), // 0 "m" (k_left), // 1 "m" (a), // 2 "m" (b), // 3 "m" (alpha), // 4 "m" (beta), // 5 "m" (c), // 6 "m" (rs_c), // 7 "m" (cs_c)/*, // 8 "m" (b_next), // 9 "m" (a_next)*/ // 10 : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ); } // assumes beta.r, beta.i have been broadcast into ymm1, ymm2. // outputs to ymm0 #define ZGEMM_INPUT_SCALE_GS_BETA_NZ \ "vmovupd (%%rcx), %%xmm0 \n\t" \ "vmovupd (%%rcx,%%rsi), %%xmm3 \n\t" \ "vinsertf128 $1, %%xmm3, %%ymm0, %%ymm0 \n\t" \ "vpermilpd $0x5, %%ymm0, %%ymm3 \n\t" \ "vmulpd %%ymm1, %%ymm0, %%ymm0 \n\t" \ "vmulpd %%ymm2, %%ymm3, %%ymm3 \n\t" \ "vaddsubpd %%ymm3, %%ymm0, %%ymm0 \n\t" // assumes values to output are in ymm0 #define ZGEMM_OUTPUT_GS \ "vextractf128 $1, %%ymm0, %%xmm3 \n\t" \ "vmovupd %%xmm0, (%%rcx) \n\t" \ "vmovupd %%xmm3, (%%rcx,%%rsi ) \n\t" \ #define ZGEMM_INPUT_SCALE_RS_BETA_NZ \ "vmovupd (%%rcx), %%ymm0 \n\t" \ "vpermilpd $0x5, %%ymm0, %%ymm3 \n\t" \ "vmulpd %%ymm1, %%ymm0, %%ymm0 \n\t" \ "vmulpd %%ymm2, %%ymm3, %%ymm3 \n\t" \ "vaddsubpd %%ymm3, %%ymm0, %%ymm0 \n\t" #define ZGEMM_OUTPUT_RS \ "vmovupd %%ymm0, (%%rcx) \n\t" \ void bli_zgemm_haswell_asm_3x4 ( dim_t k0, dcomplex* restrict alpha, dcomplex* restrict a, dcomplex* restrict b, dcomplex* restrict beta, dcomplex* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; __asm__ volatile ( " \n\t" "vzeroall \n\t" // zero all xmm/ymm registers. " \n\t" " \n\t" "movq %2, %%rax \n\t" // load address of a. "movq %3, %%rbx \n\t" // load address of b. //"movq %9, %%r15 \n\t" // load address of b_next. " \n\t" "addq $32 * 4, %%rbx \n\t" " \n\t" // initialize loop by pre-loading "vmovapd -4 * 32(%%rbx), %%ymm0 \n\t" "vmovapd -3 * 32(%%rbx), %%ymm1 \n\t" " \n\t" "movq %6, %%rcx \n\t" // load address of c "movq %7, %%rdi \n\t" // load rs_c "leaq (,%%rdi,8), %%rdi \n\t" // rs_c *= sizeof(dcomplex) "leaq (,%%rdi,2), %%rdi \n\t" " \n\t" "leaq (%%rcx,%%rdi,1), %%r11 \n\t" // r11 = c + 1*rs_c; "leaq (%%rcx,%%rdi,2), %%r12 \n\t" // r12 = c + 2*rs_c; " \n\t" "prefetcht0 7 * 8(%%rcx) \n\t" // prefetch c + 0*rs_c "prefetcht0 7 * 8(%%r11) \n\t" // prefetch c + 1*rs_c "prefetcht0 7 * 8(%%r12) \n\t" // prefetch c + 2*rs_c " \n\t" " \n\t" " \n\t" " \n\t" "movq %0, %%rsi \n\t" // i = k_iter; "testq %%rsi, %%rsi \n\t" // check i via logical AND. "je .ZCONSIDKLEFT \n\t" // if i == 0, jump to code that " \n\t" // contains the k_left loop. " \n\t" " \n\t" ".ZLOOPKITER: \n\t" // MAIN LOOP " \n\t" " \n\t" " \n\t" // iteration 0 "prefetcht0 32 * 16(%%rax) \n\t" " \n\t" "vbroadcastsd 0 * 8(%%rax), %%ymm2 \n\t" "vbroadcastsd 1 * 8(%%rax), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm4 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm5 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm6 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm7 \n\t" " \n\t" "vbroadcastsd 2 * 8(%%rax), %%ymm2 \n\t" "vbroadcastsd 3 * 8(%%rax), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm8 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm9 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm11 \n\t" " \n\t" "vbroadcastsd 4 * 8(%%rax), %%ymm2 \n\t" "vbroadcastsd 5 * 8(%%rax), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm12 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm13 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm14 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm15 \n\t" " \n\t" "vmovapd -2 * 32(%%rbx), %%ymm0 \n\t" "vmovapd -1 * 32(%%rbx), %%ymm1 \n\t" " \n\t" " \n\t" // iteration 1 "vbroadcastsd 6 * 8(%%rax), %%ymm2 \n\t" "vbroadcastsd 7 * 8(%%rax), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm4 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm5 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm6 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm7 \n\t" " \n\t" "vbroadcastsd 8 * 8(%%rax), %%ymm2 \n\t" "vbroadcastsd 9 * 8(%%rax), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm8 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm9 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm11 \n\t" " \n\t" "vbroadcastsd 10 * 8(%%rax), %%ymm2 \n\t" "vbroadcastsd 11 * 8(%%rax), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm12 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm13 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm14 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm15 \n\t" " \n\t" "vmovapd 0 * 32(%%rbx), %%ymm0 \n\t" "vmovapd 1 * 32(%%rbx), %%ymm1 \n\t" " \n\t" " \n\t" // iteration 2 "prefetcht0 38 * 16(%%rax) \n\t" " \n\t" "vbroadcastsd 12 * 8(%%rax), %%ymm2 \n\t" "vbroadcastsd 13 * 8(%%rax), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm4 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm5 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm6 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm7 \n\t" " \n\t" "vbroadcastsd 14 * 8(%%rax), %%ymm2 \n\t" "vbroadcastsd 15 * 8(%%rax), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm8 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm9 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm11 \n\t" " \n\t" "vbroadcastsd 16 * 8(%%rax), %%ymm2 \n\t" "vbroadcastsd 17 * 8(%%rax), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm12 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm13 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm14 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm15 \n\t" " \n\t" "vmovapd 2 * 32(%%rbx), %%ymm0 \n\t" "vmovapd 3 * 32(%%rbx), %%ymm1 \n\t" " \n\t" " \n\t" // iteration 3 "vbroadcastsd 18 * 8(%%rax), %%ymm2 \n\t" "vbroadcastsd 19 * 8(%%rax), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm4 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm5 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm6 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm7 \n\t" " \n\t" "vbroadcastsd 20 * 8(%%rax), %%ymm2 \n\t" "vbroadcastsd 21 * 8(%%rax), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm8 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm9 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm11 \n\t" " \n\t" "vbroadcastsd 22 * 8(%%rax), %%ymm2 \n\t" "vbroadcastsd 23 * 8(%%rax), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm12 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm13 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm14 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm15 \n\t" " \n\t" "addq $4 * 3 * 16, %%rax \n\t" // a += 4*3 (unroll x mr) "addq $4 * 4 * 16, %%rbx \n\t" // b += 4*4 (unroll x nr) " \n\t" "vmovapd -4 * 32(%%rbx), %%ymm0 \n\t" "vmovapd -3 * 32(%%rbx), %%ymm1 \n\t" " \n\t" " \n\t" "decq %%rsi \n\t" // i -= 1; "jne .ZLOOPKITER \n\t" // iterate again if i != 0. " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" ".ZCONSIDKLEFT: \n\t" " \n\t" "movq %1, %%rsi \n\t" // i = k_left; "testq %%rsi, %%rsi \n\t" // check i via logical AND. "je .ZPOSTACCUM \n\t" // if i == 0, we're done; jump to end. " \n\t" // else, we prepare to enter k_left loop. " \n\t" " \n\t" ".ZLOOPKLEFT: \n\t" // EDGE LOOP " \n\t" "prefetcht0 32 * 16(%%rax) \n\t" " \n\t" "vbroadcastsd 0 * 8(%%rax), %%ymm2 \n\t" "vbroadcastsd 1 * 8(%%rax), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm4 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm5 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm6 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm7 \n\t" " \n\t" "vbroadcastsd 2 * 8(%%rax), %%ymm2 \n\t" "vbroadcastsd 3 * 8(%%rax), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm8 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm9 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm11 \n\t" " \n\t" "vbroadcastsd 4 * 8(%%rax), %%ymm2 \n\t" "vbroadcastsd 5 * 8(%%rax), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm12 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm13 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm14 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm15 \n\t" " \n\t" "addq $1 * 3 * 16, %%rax \n\t" // a += 1*3 (unroll x mr) "addq $1 * 4 * 16, %%rbx \n\t" // b += 1*4 (unroll x nr) " \n\t" "vmovapd -4 * 32(%%rbx), %%ymm0 \n\t" "vmovapd -3 * 32(%%rbx), %%ymm1 \n\t" " \n\t" " \n\t" "decq %%rsi \n\t" // i -= 1; "jne .ZLOOPKLEFT \n\t" // iterate again if i != 0. " \n\t" " \n\t" " \n\t" ".ZPOSTACCUM: \n\t" " \n\t" " \n\t" // permute even and odd elements " \n\t" // of ymm6/7, ymm10/11, ymm/14/15 "vpermilpd $0x5, %%ymm6, %%ymm6 \n\t" "vpermilpd $0x5, %%ymm7, %%ymm7 \n\t" "vpermilpd $0x5, %%ymm10, %%ymm10 \n\t" "vpermilpd $0x5, %%ymm11, %%ymm11 \n\t" "vpermilpd $0x5, %%ymm14, %%ymm14 \n\t" "vpermilpd $0x5, %%ymm15, %%ymm15 \n\t" " \n\t" " \n\t" " \n\t" // subtract/add even/odd elements "vaddsubpd %%ymm6, %%ymm4, %%ymm4 \n\t" "vaddsubpd %%ymm7, %%ymm5, %%ymm5 \n\t" " \n\t" "vaddsubpd %%ymm10, %%ymm8, %%ymm8 \n\t" "vaddsubpd %%ymm11, %%ymm9, %%ymm9 \n\t" " \n\t" "vaddsubpd %%ymm14, %%ymm12, %%ymm12 \n\t" "vaddsubpd %%ymm15, %%ymm13, %%ymm13 \n\t" " \n\t" " \n\t" " \n\t" " \n\t" "movq %4, %%rax \n\t" // load address of alpha "vbroadcastsd (%%rax), %%ymm0 \n\t" // load alpha_r and duplicate "vbroadcastsd 8(%%rax), %%ymm1 \n\t" // load alpha_i and duplicate " \n\t" " \n\t" "vpermilpd $0x5, %%ymm4, %%ymm3 \n\t" "vmulpd %%ymm0, %%ymm4, %%ymm4 \n\t" "vmulpd %%ymm1, %%ymm3, %%ymm3 \n\t" "vaddsubpd %%ymm3, %%ymm4, %%ymm4 \n\t" " \n\t" "vpermilpd $0x5, %%ymm5, %%ymm3 \n\t" "vmulpd %%ymm0, %%ymm5, %%ymm5 \n\t" "vmulpd %%ymm1, %%ymm3, %%ymm3 \n\t" "vaddsubpd %%ymm3, %%ymm5, %%ymm5 \n\t" " \n\t" " \n\t" "vpermilpd $0x5, %%ymm8, %%ymm3 \n\t" "vmulpd %%ymm0, %%ymm8, %%ymm8 \n\t" "vmulpd %%ymm1, %%ymm3, %%ymm3 \n\t" "vaddsubpd %%ymm3, %%ymm8, %%ymm8 \n\t" " \n\t" "vpermilpd $0x5, %%ymm9, %%ymm3 \n\t" "vmulpd %%ymm0, %%ymm9, %%ymm9 \n\t" "vmulpd %%ymm1, %%ymm3, %%ymm3 \n\t" "vaddsubpd %%ymm3, %%ymm9, %%ymm9 \n\t" " \n\t" " \n\t" "vpermilpd $0x5, %%ymm12, %%ymm3 \n\t" "vmulpd %%ymm0, %%ymm12, %%ymm12 \n\t" "vmulpd %%ymm1, %%ymm3, %%ymm3 \n\t" "vaddsubpd %%ymm3, %%ymm12, %%ymm12 \n\t" " \n\t" "vpermilpd $0x5, %%ymm13, %%ymm3 \n\t" "vmulpd %%ymm0, %%ymm13, %%ymm13 \n\t" "vmulpd %%ymm1, %%ymm3, %%ymm3 \n\t" "vaddsubpd %%ymm3, %%ymm13, %%ymm13 \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" "movq %5, %%rbx \n\t" // load address of beta "vbroadcastsd (%%rbx), %%ymm1 \n\t" // load beta_r and duplicate "vbroadcastsd 8(%%rbx), %%ymm2 \n\t" // load beta_i and duplicate " \n\t" " \n\t" " \n\t" " \n\t" "movq %8, %%rsi \n\t" // load cs_c "leaq (,%%rsi,8), %%rsi \n\t" // rsi = cs_c * sizeof(dcomplex) "leaq (,%%rsi,2), %%rsi \n\t" "leaq (,%%rsi,2), %%rdx \n\t" // rdx = 2*cs_c; " \n\t" " \n\t" " \n\t" " \n\t" // now avoid loading C if beta == 0 "vxorpd %%ymm0, %%ymm0, %%ymm0 \n\t" // set ymm0 to zero. "vucomisd %%xmm0, %%xmm1 \n\t" // set ZF if beta_r == 0. "sete %%r8b \n\t" // r8b = ( ZF == 1 ? 1 : 0 ); "vucomisd %%xmm0, %%xmm2 \n\t" // set ZF if beta_i == 0. "sete %%r9b \n\t" // r9b = ( ZF == 1 ? 1 : 0 ); "andb %%r8b, %%r9b \n\t" // set ZF if r8b & r9b == 1. "jne .ZBETAZERO \n\t" // if ZF = 1, jump to beta == 0 case " \n\t" " \n\t" "cmpq $16, %%rsi \n\t" // set ZF if (16*cs_c) == 16. "jz .ZROWSTORED \n\t" // jump to row storage case " \n\t" " \n\t" " \n\t" ".ZGENSTORED: \n\t" " \n\t" " \n\t" ZGEMM_INPUT_SCALE_GS_BETA_NZ "vaddpd %%ymm4, %%ymm0, %%ymm0 \n\t" ZGEMM_OUTPUT_GS "addq %%rdx, %%rcx \n\t" // c += 2*cs_c; " \n\t" " \n\t" ZGEMM_INPUT_SCALE_GS_BETA_NZ "vaddpd %%ymm5, %%ymm0, %%ymm0 \n\t" ZGEMM_OUTPUT_GS "movq %%r11, %%rcx \n\t" // rcx = c + 1*rs_c " \n\t" " \n\t" " \n\t" ZGEMM_INPUT_SCALE_GS_BETA_NZ "vaddpd %%ymm8, %%ymm0, %%ymm0 \n\t" ZGEMM_OUTPUT_GS "addq %%rdx, %%rcx \n\t" // c += 2*cs_c; " \n\t" " \n\t" ZGEMM_INPUT_SCALE_GS_BETA_NZ "vaddpd %%ymm9, %%ymm0, %%ymm0 \n\t" ZGEMM_OUTPUT_GS "movq %%r12, %%rcx \n\t" // rcx = c + 2*rs_c " \n\t" " \n\t" " \n\t" ZGEMM_INPUT_SCALE_GS_BETA_NZ "vaddpd %%ymm12, %%ymm0, %%ymm0 \n\t" ZGEMM_OUTPUT_GS "addq %%rdx, %%rcx \n\t" // c += 2*cs_c; " \n\t" " \n\t" ZGEMM_INPUT_SCALE_GS_BETA_NZ "vaddpd %%ymm13, %%ymm0, %%ymm0 \n\t" ZGEMM_OUTPUT_GS " \n\t" " \n\t" " \n\t" "jmp .ZDONE \n\t" // jump to end. " \n\t" " \n\t" " \n\t" ".ZROWSTORED: \n\t" " \n\t" " \n\t" ZGEMM_INPUT_SCALE_RS_BETA_NZ "vaddpd %%ymm4, %%ymm0, %%ymm0 \n\t" ZGEMM_OUTPUT_RS "addq %%rdx, %%rcx \n\t" // c += 2*cs_c; " \n\t" " \n\t" ZGEMM_INPUT_SCALE_RS_BETA_NZ "vaddpd %%ymm5, %%ymm0, %%ymm0 \n\t" ZGEMM_OUTPUT_RS "movq %%r11, %%rcx \n\t" // rcx = c + 1*rs_c " \n\t" " \n\t" " \n\t" ZGEMM_INPUT_SCALE_RS_BETA_NZ "vaddpd %%ymm8, %%ymm0, %%ymm0 \n\t" ZGEMM_OUTPUT_RS "addq %%rdx, %%rcx \n\t" // c += 2*cs_c; " \n\t" " \n\t" ZGEMM_INPUT_SCALE_RS_BETA_NZ "vaddpd %%ymm9, %%ymm0, %%ymm0 \n\t" ZGEMM_OUTPUT_RS "movq %%r12, %%rcx \n\t" // rcx = c + 2*rs_c " \n\t" " \n\t" " \n\t" ZGEMM_INPUT_SCALE_RS_BETA_NZ "vaddpd %%ymm12, %%ymm0, %%ymm0 \n\t" ZGEMM_OUTPUT_RS "addq %%rdx, %%rcx \n\t" // c += 2*cs_c; " \n\t" " \n\t" ZGEMM_INPUT_SCALE_RS_BETA_NZ "vaddpd %%ymm13, %%ymm0, %%ymm0 \n\t" ZGEMM_OUTPUT_RS " \n\t" " \n\t" " \n\t" "jmp .ZDONE \n\t" // jump to end. " \n\t" " \n\t" " \n\t" ".ZBETAZERO: \n\t" " \n\t" "cmpq $16, %%rsi \n\t" // set ZF if (16*cs_c) == 16. "jz .ZROWSTORBZ \n\t" // jump to row storage case " \n\t" " \n\t" " \n\t" ".ZGENSTORBZ: \n\t" " \n\t" " \n\t" "vmovapd %%ymm4, %%ymm0 \n\t" ZGEMM_OUTPUT_GS "addq %%rdx, %%rcx \n\t" // c += 2*cs_c; " \n\t" " \n\t" "vmovapd %%ymm5, %%ymm0 \n\t" ZGEMM_OUTPUT_GS "movq %%r11, %%rcx \n\t" // rcx = c + 1*rs_c " \n\t" " \n\t" " \n\t" "vmovapd %%ymm8, %%ymm0 \n\t" ZGEMM_OUTPUT_GS "addq %%rdx, %%rcx \n\t" // c += 2*cs_c; " \n\t" " \n\t" "vmovapd %%ymm9, %%ymm0 \n\t" ZGEMM_OUTPUT_GS "movq %%r12, %%rcx \n\t" // rcx = c + 2*rs_c " \n\t" " \n\t" " \n\t" "vmovapd %%ymm12, %%ymm0 \n\t" ZGEMM_OUTPUT_GS "addq %%rdx, %%rcx \n\t" // c += 2*cs_c; " \n\t" " \n\t" "vmovapd %%ymm13, %%ymm0 \n\t" ZGEMM_OUTPUT_GS " \n\t" " \n\t" " \n\t" "jmp .ZDONE \n\t" // jump to end. " \n\t" " \n\t" " \n\t" ".ZROWSTORBZ: \n\t" " \n\t" " \n\t" "vmovupd %%ymm4, (%%rcx) \n\t" "vmovupd %%ymm5, (%%rcx,%%rdx,1) \n\t" " \n\t" "vmovupd %%ymm8, (%%r11) \n\t" "vmovupd %%ymm9, (%%r11,%%rdx,1) \n\t" " \n\t" "vmovupd %%ymm12, (%%r12) \n\t" "vmovupd %%ymm13, (%%r12,%%rdx,1) \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" ".ZDONE: \n\t" " \n\t" "vzeroupper \n\t" " \n\t" : // output operands (none) : // input operands "m" (k_iter), // 0 "m" (k_left), // 1 "m" (a), // 2 "m" (b), // 3 "m" (alpha), // 4 "m" (beta), // 5 "m" (c), // 6 "m" (rs_c), // 7 "m" (cs_c)/*, // 8 "m" (b_next), // 9 "m" (a_next)*/ // 10 : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ); } blis-0.6.1/kernels/haswell/3/old/bli_gemm_haswell_asm_d8x6.c000066400000000000000000003426731360743507500236710ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define SGEMM_INPUT_GS_BETA_NZ \ "vmovlps (%%rcx ), %%xmm0, %%xmm0 \n\t" \ "vmovhps (%%rcx,%%rsi,1), %%xmm0, %%xmm0 \n\t" \ "vmovlps (%%rcx,%%rsi,2), %%xmm1, %%xmm1 \n\t" \ "vmovhps (%%rcx,%%r13 ), %%xmm1, %%xmm1 \n\t" \ "vshufps $0x88, %%xmm1, %%xmm0, %%xmm0 \n\t" \ "vmovlps (%%rcx,%%rsi,4), %%xmm2, %%xmm2 \n\t" \ "vmovhps (%%rcx,%%r15 ), %%xmm2, %%xmm2 \n\t" \ /* We can't use vmovhps for loading the last element becauase that might result in reading beyond valid memory. (vmov[lh]psd load pairs of adjacent floats at a time.) So we need to use vmovss instead. But since we're limited to using ymm0 through ymm2 (ymm3 contains beta and ymm4 through ymm15 contain the microtile) and due to the way vmovss zeros out all bits above 31, we have to load element 7 before element 6. */ \ "vmovss (%%rcx,%%r10 ), %%xmm1 \n\t" \ "vpermilps $0xcf, %%xmm1, %%xmm1 \n\t" \ "vmovlps (%%rcx,%%r13,2), %%xmm1, %%xmm1 \n\t" \ /*"vmovhps (%%rcx,%%r10 ), %%xmm1, %%xmm1 \n\t"*/ \ "vshufps $0x88, %%xmm1, %%xmm2, %%xmm2 \n\t" \ "vperm2f128 $0x20, %%ymm2, %%ymm0, %%ymm0 \n\t" #define SGEMM_OUTPUT_GS_BETA_NZ \ "vextractf128 $1, %%ymm0, %%xmm2 \n\t" \ "vmovss %%xmm0, (%%rcx ) \n\t" \ "vpermilps $0x39, %%xmm0, %%xmm1 \n\t" \ "vmovss %%xmm1, (%%rcx,%%rsi,1) \n\t" \ "vpermilps $0x39, %%xmm1, %%xmm0 \n\t" \ "vmovss %%xmm0, (%%rcx,%%rsi,2) \n\t" \ "vpermilps $0x39, %%xmm0, %%xmm1 \n\t" \ "vmovss %%xmm1, (%%rcx,%%r13 ) \n\t" \ "vmovss %%xmm2, (%%rcx,%%rsi,4) \n\t" \ "vpermilps $0x39, %%xmm2, %%xmm1 \n\t" \ "vmovss %%xmm1, (%%rcx,%%r15 ) \n\t" \ "vpermilps $0x39, %%xmm1, %%xmm2 \n\t" \ "vmovss %%xmm2, (%%rcx,%%r13,2) \n\t" \ "vpermilps $0x39, %%xmm2, %%xmm1 \n\t" \ "vmovss %%xmm1, (%%rcx,%%r10 ) \n\t" void bli_sgemm_haswell_asm_16x6 ( dim_t k0, float* restrict alpha, float* restrict a, float* restrict b, float* restrict beta, float* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; __asm__ volatile ( " \n\t" "vzeroall \n\t" // zero all xmm/ymm registers. " \n\t" " \n\t" "movq %2, %%rax \n\t" // load address of a. "movq %3, %%rbx \n\t" // load address of b. //"movq %9, %%r15 \n\t" // load address of b_next. " \n\t" "addq $32 * 4, %%rax \n\t" " \n\t" // initialize loop by pre-loading "vmovaps -4 * 32(%%rax), %%ymm0 \n\t" "vmovaps -3 * 32(%%rax), %%ymm1 \n\t" " \n\t" "movq %6, %%rcx \n\t" // load address of c "movq %8, %%rdi \n\t" // load cs_c "leaq (,%%rdi,4), %%rdi \n\t" // cs_c *= sizeof(float) " \n\t" "leaq (%%rdi,%%rdi,2), %%r13 \n\t" // r13 = 3*cs_c; "leaq (%%rcx,%%r13,1), %%rdx \n\t" // rdx = c + 3*cs_c; "prefetcht0 7 * 8(%%rcx) \n\t" // prefetch c + 0*cs_c "prefetcht0 7 * 8(%%rcx,%%rdi) \n\t" // prefetch c + 1*cs_c "prefetcht0 7 * 8(%%rcx,%%rdi,2) \n\t" // prefetch c + 2*cs_c "prefetcht0 7 * 8(%%rdx) \n\t" // prefetch c + 3*cs_c "prefetcht0 7 * 8(%%rdx,%%rdi) \n\t" // prefetch c + 4*cs_c "prefetcht0 7 * 8(%%rdx,%%rdi,2) \n\t" // prefetch c + 5*cs_c " \n\t" " \n\t" " \n\t" " \n\t" "movq %0, %%rsi \n\t" // i = k_iter; "testq %%rsi, %%rsi \n\t" // check i via logical AND. "je .SCONSIDKLEFT \n\t" // if i == 0, jump to code that " \n\t" // contains the k_left loop. " \n\t" " \n\t" ".SLOOPKITER: \n\t" // MAIN LOOP " \n\t" " \n\t" " \n\t" // iteration 0 "prefetcht0 128 * 4(%%rax) \n\t" " \n\t" "vbroadcastss 0 * 4(%%rbx), %%ymm2 \n\t" "vbroadcastss 1 * 4(%%rbx), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm4 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm5 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm6 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm7 \n\t" " \n\t" "vbroadcastss 2 * 4(%%rbx), %%ymm2 \n\t" "vbroadcastss 3 * 4(%%rbx), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm8 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm9 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm11 \n\t" " \n\t" "vbroadcastss 4 * 4(%%rbx), %%ymm2 \n\t" "vbroadcastss 5 * 4(%%rbx), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm12 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm13 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm14 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm15 \n\t" " \n\t" "vmovaps -2 * 32(%%rax), %%ymm0 \n\t" "vmovaps -1 * 32(%%rax), %%ymm1 \n\t" " \n\t" " \n\t" // iteration 1 "vbroadcastss 6 * 4(%%rbx), %%ymm2 \n\t" "vbroadcastss 7 * 4(%%rbx), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm4 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm5 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm6 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm7 \n\t" " \n\t" "vbroadcastss 8 * 4(%%rbx), %%ymm2 \n\t" "vbroadcastss 9 * 4(%%rbx), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm8 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm9 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm11 \n\t" " \n\t" "vbroadcastss 10 * 4(%%rbx), %%ymm2 \n\t" "vbroadcastss 11 * 4(%%rbx), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm12 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm13 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm14 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm15 \n\t" " \n\t" "vmovaps 0 * 32(%%rax), %%ymm0 \n\t" "vmovaps 1 * 32(%%rax), %%ymm1 \n\t" " \n\t" " \n\t" // iteration 2 "prefetcht0 152 * 4(%%rax) \n\t" " \n\t" "vbroadcastss 12 * 4(%%rbx), %%ymm2 \n\t" "vbroadcastss 13 * 4(%%rbx), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm4 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm5 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm6 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm7 \n\t" " \n\t" "vbroadcastss 14 * 4(%%rbx), %%ymm2 \n\t" "vbroadcastss 15 * 4(%%rbx), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm8 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm9 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm11 \n\t" " \n\t" "vbroadcastss 16 * 4(%%rbx), %%ymm2 \n\t" "vbroadcastss 17 * 4(%%rbx), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm12 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm13 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm14 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm15 \n\t" " \n\t" "vmovaps 2 * 32(%%rax), %%ymm0 \n\t" "vmovaps 3 * 32(%%rax), %%ymm1 \n\t" " \n\t" " \n\t" // iteration 3 "vbroadcastss 18 * 4(%%rbx), %%ymm2 \n\t" "vbroadcastss 19 * 4(%%rbx), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm4 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm5 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm6 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm7 \n\t" " \n\t" "vbroadcastss 20 * 4(%%rbx), %%ymm2 \n\t" "vbroadcastss 21 * 4(%%rbx), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm8 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm9 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm11 \n\t" " \n\t" "vbroadcastss 22 * 4(%%rbx), %%ymm2 \n\t" "vbroadcastss 23 * 4(%%rbx), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm12 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm13 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm14 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm15 \n\t" " \n\t" "addq $4 * 16 * 4, %%rax \n\t" // a += 4*16 (unroll x mr) "addq $4 * 6 * 4, %%rbx \n\t" // b += 4*6 (unroll x nr) " \n\t" "vmovaps -4 * 32(%%rax), %%ymm0 \n\t" "vmovaps -3 * 32(%%rax), %%ymm1 \n\t" " \n\t" " \n\t" "decq %%rsi \n\t" // i -= 1; "jne .SLOOPKITER \n\t" // iterate again if i != 0. " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" ".SCONSIDKLEFT: \n\t" " \n\t" "movq %1, %%rsi \n\t" // i = k_left; "testq %%rsi, %%rsi \n\t" // check i via logical AND. "je .SPOSTACCUM \n\t" // if i == 0, we're done; jump to end. " \n\t" // else, we prepare to enter k_left loop. " \n\t" " \n\t" ".SLOOPKLEFT: \n\t" // EDGE LOOP " \n\t" "prefetcht0 128 * 4(%%rax) \n\t" " \n\t" "vbroadcastss 0 * 4(%%rbx), %%ymm2 \n\t" "vbroadcastss 1 * 4(%%rbx), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm4 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm5 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm6 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm7 \n\t" " \n\t" "vbroadcastss 2 * 4(%%rbx), %%ymm2 \n\t" "vbroadcastss 3 * 4(%%rbx), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm8 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm9 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm11 \n\t" " \n\t" "vbroadcastss 4 * 4(%%rbx), %%ymm2 \n\t" "vbroadcastss 5 * 4(%%rbx), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm12 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm13 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm14 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm15 \n\t" " \n\t" "addq $1 * 16 * 4, %%rax \n\t" // a += 1*16 (unroll x mr) "addq $1 * 6 * 4, %%rbx \n\t" // b += 1*6 (unroll x nr) " \n\t" "vmovaps -4 * 32(%%rax), %%ymm0 \n\t" "vmovaps -3 * 32(%%rax), %%ymm1 \n\t" " \n\t" " \n\t" "decq %%rsi \n\t" // i -= 1; "jne .SLOOPKLEFT \n\t" // iterate again if i != 0. " \n\t" " \n\t" " \n\t" ".SPOSTACCUM: \n\t" " \n\t" " \n\t" " \n\t" " \n\t" "movq %4, %%rax \n\t" // load address of alpha "movq %5, %%rbx \n\t" // load address of beta "vbroadcastss (%%rax), %%ymm0 \n\t" // load alpha and duplicate "vbroadcastss (%%rbx), %%ymm3 \n\t" // load beta and duplicate " \n\t" "vmulps %%ymm0, %%ymm4, %%ymm4 \n\t" // scale by alpha "vmulps %%ymm0, %%ymm5, %%ymm5 \n\t" "vmulps %%ymm0, %%ymm6, %%ymm6 \n\t" "vmulps %%ymm0, %%ymm7, %%ymm7 \n\t" "vmulps %%ymm0, %%ymm8, %%ymm8 \n\t" "vmulps %%ymm0, %%ymm9, %%ymm9 \n\t" "vmulps %%ymm0, %%ymm10, %%ymm10 \n\t" "vmulps %%ymm0, %%ymm11, %%ymm11 \n\t" "vmulps %%ymm0, %%ymm12, %%ymm12 \n\t" "vmulps %%ymm0, %%ymm13, %%ymm13 \n\t" "vmulps %%ymm0, %%ymm14, %%ymm14 \n\t" "vmulps %%ymm0, %%ymm15, %%ymm15 \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" "movq %7, %%rsi \n\t" // load rs_c "leaq (,%%rsi,4), %%rsi \n\t" // rsi = rs_c * sizeof(float) " \n\t" "leaq (%%rcx,%%rsi,8), %%rdx \n\t" // load address of c + 8*rs_c; " \n\t" "leaq (%%rsi,%%rsi,2), %%r13 \n\t" // r13 = 3*rs_c; "leaq (%%rsi,%%rsi,4), %%r15 \n\t" // r15 = 5*rs_c; "leaq (%%r13,%%rsi,4), %%r10 \n\t" // r10 = 7*rs_c; " \n\t" " \n\t" " \n\t" // now avoid loading C if beta == 0 " \n\t" "vxorps %%ymm0, %%ymm0, %%ymm0 \n\t" // set ymm0 to zero. "vucomiss %%xmm0, %%xmm3 \n\t" // set ZF if beta == 0. "je .SBETAZERO \n\t" // if ZF = 1, jump to beta == 0 case " \n\t" " \n\t" "cmpq $4, %%rsi \n\t" // set ZF if (4*rs_c) == 4. "jz .SCOLSTORED \n\t" // jump to column storage case " \n\t" " \n\t" " \n\t" ".SGENSTORED: \n\t" " \n\t" " \n\t" SGEMM_INPUT_GS_BETA_NZ "vfmadd213ps %%ymm4, %%ymm3, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" SGEMM_INPUT_GS_BETA_NZ "vfmadd213ps %%ymm6, %%ymm3, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" SGEMM_INPUT_GS_BETA_NZ "vfmadd213ps %%ymm8, %%ymm3, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" SGEMM_INPUT_GS_BETA_NZ "vfmadd213ps %%ymm10, %%ymm3, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" SGEMM_INPUT_GS_BETA_NZ "vfmadd213ps %%ymm12, %%ymm3, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" SGEMM_INPUT_GS_BETA_NZ "vfmadd213ps %%ymm14, %%ymm3, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ //"addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "movq %%rdx, %%rcx \n\t" // rcx = c + 8*rs_c " \n\t" " \n\t" SGEMM_INPUT_GS_BETA_NZ "vfmadd213ps %%ymm5, %%ymm3, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" SGEMM_INPUT_GS_BETA_NZ "vfmadd213ps %%ymm7, %%ymm3, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" SGEMM_INPUT_GS_BETA_NZ "vfmadd213ps %%ymm9, %%ymm3, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" SGEMM_INPUT_GS_BETA_NZ "vfmadd213ps %%ymm11, %%ymm3, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" SGEMM_INPUT_GS_BETA_NZ "vfmadd213ps %%ymm13, %%ymm3, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" SGEMM_INPUT_GS_BETA_NZ "vfmadd213ps %%ymm15, %%ymm3, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ //"addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" " \n\t" "jmp .SDONE \n\t" // jump to end. " \n\t" " \n\t" " \n\t" ".SCOLSTORED: \n\t" " \n\t" " \n\t" "vfmadd231ps (%%rcx), %%ymm3, %%ymm4 \n\t" "vmovups %%ymm4, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vfmadd231ps (%%rdx), %%ymm3, %%ymm5 \n\t" "vmovups %%ymm5, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" " \n\t" " \n\t" "vfmadd231ps (%%rcx), %%ymm3, %%ymm6 \n\t" "vmovups %%ymm6, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vfmadd231ps (%%rdx), %%ymm3, %%ymm7 \n\t" "vmovups %%ymm7, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" " \n\t" " \n\t" "vfmadd231ps (%%rcx), %%ymm3, %%ymm8 \n\t" "vmovups %%ymm8, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vfmadd231ps (%%rdx), %%ymm3, %%ymm9 \n\t" "vmovups %%ymm9, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" " \n\t" " \n\t" "vfmadd231ps (%%rcx), %%ymm3, %%ymm10 \n\t" "vmovups %%ymm10, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vfmadd231ps (%%rdx), %%ymm3, %%ymm11 \n\t" "vmovups %%ymm11, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" " \n\t" " \n\t" "vfmadd231ps (%%rcx), %%ymm3, %%ymm12 \n\t" "vmovups %%ymm12, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vfmadd231ps (%%rdx), %%ymm3, %%ymm13 \n\t" "vmovups %%ymm13, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" " \n\t" " \n\t" "vfmadd231ps (%%rcx), %%ymm3, %%ymm14 \n\t" "vmovups %%ymm14, (%%rcx) \n\t" //"addq %%rdi, %%rcx \n\t" "vfmadd231ps (%%rdx), %%ymm3, %%ymm15 \n\t" "vmovups %%ymm15, (%%rdx) \n\t" //"addq %%rdi, %%rdx \n\t" " \n\t" " \n\t" " \n\t" " \n\t" "jmp .SDONE \n\t" // jump to end. " \n\t" " \n\t" " \n\t" ".SBETAZERO: \n\t" " \n\t" "cmpq $4, %%rsi \n\t" // set ZF if (4*rs_c) == 4. "jz .SCOLSTORBZ \n\t" // jump to column storage case " \n\t" " \n\t" " \n\t" ".SGENSTORBZ: \n\t" " \n\t" " \n\t" "vmovaps %%ymm4, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "vmovaps %%ymm6, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "vmovaps %%ymm8, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "vmovaps %%ymm10, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "vmovaps %%ymm12, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "vmovaps %%ymm14, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ //"addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "movq %%rdx, %%rcx \n\t" // rcx = c + 8*rs_c " \n\t" " \n\t" "vmovaps %%ymm5, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "vmovaps %%ymm7, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "vmovaps %%ymm9, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "vmovaps %%ymm11, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "vmovaps %%ymm13, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "vmovaps %%ymm15, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ //"addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" " \n\t" "jmp .SDONE \n\t" // jump to end. " \n\t" " \n\t" " \n\t" ".SCOLSTORBZ: \n\t" " \n\t" " \n\t" "vmovups %%ymm4, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vmovups %%ymm5, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" " \n\t" "vmovups %%ymm6, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vmovups %%ymm7, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" " \n\t" " \n\t" "vmovups %%ymm8, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vmovups %%ymm9, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" " \n\t" " \n\t" "vmovups %%ymm10, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vmovups %%ymm11, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" " \n\t" " \n\t" "vmovups %%ymm12, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vmovups %%ymm13, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" " \n\t" " \n\t" "vmovups %%ymm14, (%%rcx) \n\t" //"addq %%rdi, %%rcx \n\t" "vmovups %%ymm15, (%%rdx) \n\t" //"addq %%rdi, %%rdx \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" ".SDONE: \n\t" " \n\t" "vzeroupper \n\t" " \n\t" : // output operands (none) : // input operands "m" (k_iter), // 0 "m" (k_left), // 1 "m" (a), // 2 "m" (b), // 3 "m" (alpha), // 4 "m" (beta), // 5 "m" (c), // 6 "m" (rs_c), // 7 "m" (cs_c)/*, // 8 "m" (b_next), // 9 "m" (a_next)*/ // 10 : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ); } #define DGEMM_INPUT_GS_BETA_NZ \ "vmovlpd (%%rcx ), %%xmm0, %%xmm0 \n\t" \ "vmovhpd (%%rcx,%%rsi,1), %%xmm0, %%xmm0 \n\t" \ "vmovlpd (%%rcx,%%rsi,2), %%xmm1, %%xmm1 \n\t" \ "vmovhpd (%%rcx,%%r13 ), %%xmm1, %%xmm1 \n\t" \ "vperm2f128 $0x20, %%ymm1, %%ymm0, %%ymm0 \n\t" /*\ "vmovlpd (%%rcx,%%rsi,4), %%xmm2, %%xmm2 \n\t" \ "vmovhpd (%%rcx,%%r15 ), %%xmm2, %%xmm2 \n\t" \ "vmovlpd (%%rcx,%%r13,2), %%xmm1, %%xmm1 \n\t" \ "vmovhpd (%%rcx,%%r10 ), %%xmm1, %%xmm1 \n\t" \ "vperm2f128 $0x20, %%ymm1, %%ymm2, %%ymm2 \n\t"*/ #define DGEMM_OUTPUT_GS_BETA_NZ \ "vextractf128 $1, %%ymm0, %%xmm1 \n\t" \ "vmovlpd %%xmm0, (%%rcx ) \n\t" \ "vmovhpd %%xmm0, (%%rcx,%%rsi ) \n\t" \ "vmovlpd %%xmm1, (%%rcx,%%rsi,2) \n\t" \ "vmovhpd %%xmm1, (%%rcx,%%r13 ) \n\t" /*\ "vextractf128 $1, %%ymm2, %%xmm1 \n\t" \ "vmovlpd %%xmm2, (%%rcx,%%rsi,4) \n\t" \ "vmovhpd %%xmm2, (%%rcx,%%r15 ) \n\t" \ "vmovlpd %%xmm1, (%%rcx,%%r13,2) \n\t" \ "vmovhpd %%xmm1, (%%rcx,%%r10 ) \n\t"*/ void bli_dgemm_haswell_asm_8x6 ( dim_t k0, double* restrict alpha, double* restrict a, double* restrict b, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; __asm__ volatile ( " \n\t" "vzeroall \n\t" // zero all xmm/ymm registers. " \n\t" " \n\t" "movq %2, %%rax \n\t" // load address of a. "movq %3, %%rbx \n\t" // load address of b. //"movq %9, %%r15 \n\t" // load address of b_next. " \n\t" "addq $32 * 4, %%rax \n\t" " \n\t" // initialize loop by pre-loading "vmovapd -4 * 32(%%rax), %%ymm0 \n\t" "vmovapd -3 * 32(%%rax), %%ymm1 \n\t" " \n\t" "movq %6, %%rcx \n\t" // load address of c "movq %8, %%rdi \n\t" // load cs_c "leaq (,%%rdi,8), %%rdi \n\t" // cs_c *= sizeof(double) " \n\t" "leaq (%%rdi,%%rdi,2), %%r13 \n\t" // r13 = 3*cs_c; "leaq (%%rcx,%%r13,1), %%rdx \n\t" // rdx = c + 3*cs_c; "prefetcht0 7 * 8(%%rcx) \n\t" // prefetch c + 0*cs_c "prefetcht0 7 * 8(%%rcx,%%rdi) \n\t" // prefetch c + 1*cs_c "prefetcht0 7 * 8(%%rcx,%%rdi,2) \n\t" // prefetch c + 2*cs_c "prefetcht0 7 * 8(%%rdx) \n\t" // prefetch c + 3*cs_c "prefetcht0 7 * 8(%%rdx,%%rdi) \n\t" // prefetch c + 4*cs_c "prefetcht0 7 * 8(%%rdx,%%rdi,2) \n\t" // prefetch c + 5*cs_c " \n\t" " \n\t" " \n\t" " \n\t" "movq %0, %%rsi \n\t" // i = k_iter; "testq %%rsi, %%rsi \n\t" // check i via logical AND. "je .DCONSIDKLEFT \n\t" // if i == 0, jump to code that " \n\t" // contains the k_left loop. " \n\t" " \n\t" ".DLOOPKITER: \n\t" // MAIN LOOP " \n\t" " \n\t" " \n\t" // iteration 0 "prefetcht0 64 * 8(%%rax) \n\t" " \n\t" "vbroadcastsd 0 * 8(%%rbx), %%ymm2 \n\t" "vbroadcastsd 1 * 8(%%rbx), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm4 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm5 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm6 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm7 \n\t" " \n\t" "vbroadcastsd 2 * 8(%%rbx), %%ymm2 \n\t" "vbroadcastsd 3 * 8(%%rbx), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm8 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm9 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm11 \n\t" " \n\t" "vbroadcastsd 4 * 8(%%rbx), %%ymm2 \n\t" "vbroadcastsd 5 * 8(%%rbx), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm12 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm13 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm14 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm15 \n\t" " \n\t" "vmovapd -2 * 32(%%rax), %%ymm0 \n\t" "vmovapd -1 * 32(%%rax), %%ymm1 \n\t" " \n\t" " \n\t" // iteration 1 "vbroadcastsd 6 * 8(%%rbx), %%ymm2 \n\t" "vbroadcastsd 7 * 8(%%rbx), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm4 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm5 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm6 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm7 \n\t" " \n\t" "vbroadcastsd 8 * 8(%%rbx), %%ymm2 \n\t" "vbroadcastsd 9 * 8(%%rbx), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm8 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm9 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm11 \n\t" " \n\t" "vbroadcastsd 10 * 8(%%rbx), %%ymm2 \n\t" "vbroadcastsd 11 * 8(%%rbx), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm12 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm13 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm14 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm15 \n\t" " \n\t" "vmovapd 0 * 32(%%rax), %%ymm0 \n\t" "vmovapd 1 * 32(%%rax), %%ymm1 \n\t" " \n\t" " \n\t" // iteration 2 "prefetcht0 76 * 8(%%rax) \n\t" " \n\t" "vbroadcastsd 12 * 8(%%rbx), %%ymm2 \n\t" "vbroadcastsd 13 * 8(%%rbx), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm4 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm5 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm6 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm7 \n\t" " \n\t" "vbroadcastsd 14 * 8(%%rbx), %%ymm2 \n\t" "vbroadcastsd 15 * 8(%%rbx), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm8 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm9 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm11 \n\t" " \n\t" "vbroadcastsd 16 * 8(%%rbx), %%ymm2 \n\t" "vbroadcastsd 17 * 8(%%rbx), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm12 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm13 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm14 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm15 \n\t" " \n\t" "vmovapd 2 * 32(%%rax), %%ymm0 \n\t" "vmovapd 3 * 32(%%rax), %%ymm1 \n\t" " \n\t" " \n\t" // iteration 3 "vbroadcastsd 18 * 8(%%rbx), %%ymm2 \n\t" "vbroadcastsd 19 * 8(%%rbx), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm4 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm5 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm6 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm7 \n\t" " \n\t" "vbroadcastsd 20 * 8(%%rbx), %%ymm2 \n\t" "vbroadcastsd 21 * 8(%%rbx), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm8 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm9 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm11 \n\t" " \n\t" "vbroadcastsd 22 * 8(%%rbx), %%ymm2 \n\t" "vbroadcastsd 23 * 8(%%rbx), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm12 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm13 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm14 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm15 \n\t" " \n\t" "addq $4 * 8 * 8, %%rax \n\t" // a += 4*8 (unroll x mr) "addq $4 * 6 * 8, %%rbx \n\t" // b += 4*6 (unroll x nr) " \n\t" "vmovapd -4 * 32(%%rax), %%ymm0 \n\t" "vmovapd -3 * 32(%%rax), %%ymm1 \n\t" " \n\t" " \n\t" "decq %%rsi \n\t" // i -= 1; "jne .DLOOPKITER \n\t" // iterate again if i != 0. " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" ".DCONSIDKLEFT: \n\t" " \n\t" "movq %1, %%rsi \n\t" // i = k_left; "testq %%rsi, %%rsi \n\t" // check i via logical AND. "je .DPOSTACCUM \n\t" // if i == 0, we're done; jump to end. " \n\t" // else, we prepare to enter k_left loop. " \n\t" " \n\t" ".DLOOPKLEFT: \n\t" // EDGE LOOP " \n\t" "prefetcht0 64 * 8(%%rax) \n\t" " \n\t" "vbroadcastsd 0 * 8(%%rbx), %%ymm2 \n\t" "vbroadcastsd 1 * 8(%%rbx), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm4 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm5 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm6 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm7 \n\t" " \n\t" "vbroadcastsd 2 * 8(%%rbx), %%ymm2 \n\t" "vbroadcastsd 3 * 8(%%rbx), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm8 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm9 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm11 \n\t" " \n\t" "vbroadcastsd 4 * 8(%%rbx), %%ymm2 \n\t" "vbroadcastsd 5 * 8(%%rbx), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm12 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm13 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm14 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm15 \n\t" " \n\t" "addq $1 * 8 * 8, %%rax \n\t" // a += 1*8 (unroll x mr) "addq $1 * 6 * 8, %%rbx \n\t" // b += 1*6 (unroll x nr) " \n\t" "vmovapd -4 * 32(%%rax), %%ymm0 \n\t" "vmovapd -3 * 32(%%rax), %%ymm1 \n\t" " \n\t" " \n\t" "decq %%rsi \n\t" // i -= 1; "jne .DLOOPKLEFT \n\t" // iterate again if i != 0. " \n\t" " \n\t" " \n\t" ".DPOSTACCUM: \n\t" " \n\t" " \n\t" " \n\t" " \n\t" "movq %4, %%rax \n\t" // load address of alpha "movq %5, %%rbx \n\t" // load address of beta "vbroadcastsd (%%rax), %%ymm0 \n\t" // load alpha and duplicate "vbroadcastsd (%%rbx), %%ymm3 \n\t" // load beta and duplicate " \n\t" "vmulpd %%ymm0, %%ymm4, %%ymm4 \n\t" // scale by alpha "vmulpd %%ymm0, %%ymm5, %%ymm5 \n\t" "vmulpd %%ymm0, %%ymm6, %%ymm6 \n\t" "vmulpd %%ymm0, %%ymm7, %%ymm7 \n\t" "vmulpd %%ymm0, %%ymm8, %%ymm8 \n\t" "vmulpd %%ymm0, %%ymm9, %%ymm9 \n\t" "vmulpd %%ymm0, %%ymm10, %%ymm10 \n\t" "vmulpd %%ymm0, %%ymm11, %%ymm11 \n\t" "vmulpd %%ymm0, %%ymm12, %%ymm12 \n\t" "vmulpd %%ymm0, %%ymm13, %%ymm13 \n\t" "vmulpd %%ymm0, %%ymm14, %%ymm14 \n\t" "vmulpd %%ymm0, %%ymm15, %%ymm15 \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" "movq %7, %%rsi \n\t" // load rs_c "leaq (,%%rsi,8), %%rsi \n\t" // rsi = rs_c * sizeof(double) " \n\t" "leaq (%%rcx,%%rsi,4), %%rdx \n\t" // load address of c + 4*rs_c; " \n\t" "leaq (%%rsi,%%rsi,2), %%r13 \n\t" // r13 = 3*rs_c; //"leaq (%%rsi,%%rsi,4), %%r15 \n\t" // r15 = 5*rs_c; //"leaq (%%r13,%%rsi,4), %%r10 \n\t" // r10 = 7*rs_c; " \n\t" " \n\t" " \n\t" // now avoid loading C if beta == 0 " \n\t" "vxorpd %%ymm0, %%ymm0, %%ymm0 \n\t" // set ymm0 to zero. "vucomisd %%xmm0, %%xmm3 \n\t" // set ZF if beta == 0. "je .DBETAZERO \n\t" // if ZF = 1, jump to beta == 0 case " \n\t" " \n\t" "cmpq $8, %%rsi \n\t" // set ZF if (8*rs_c) == 8. "jz .DCOLSTORED \n\t" // jump to column storage case " \n\t" " \n\t" " \n\t" ".DGENSTORED: \n\t" " \n\t" " \n\t" DGEMM_INPUT_GS_BETA_NZ "vfmadd213pd %%ymm4, %%ymm3, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" DGEMM_INPUT_GS_BETA_NZ "vfmadd213pd %%ymm6, %%ymm3, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" DGEMM_INPUT_GS_BETA_NZ "vfmadd213pd %%ymm8, %%ymm3, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" DGEMM_INPUT_GS_BETA_NZ "vfmadd213pd %%ymm10, %%ymm3, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" DGEMM_INPUT_GS_BETA_NZ "vfmadd213pd %%ymm12, %%ymm3, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" DGEMM_INPUT_GS_BETA_NZ "vfmadd213pd %%ymm14, %%ymm3, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ //"addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "movq %%rdx, %%rcx \n\t" // rcx = c + 4*rs_c " \n\t" " \n\t" DGEMM_INPUT_GS_BETA_NZ "vfmadd213pd %%ymm5, %%ymm3, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" DGEMM_INPUT_GS_BETA_NZ "vfmadd213pd %%ymm7, %%ymm3, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" DGEMM_INPUT_GS_BETA_NZ "vfmadd213pd %%ymm9, %%ymm3, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" DGEMM_INPUT_GS_BETA_NZ "vfmadd213pd %%ymm11, %%ymm3, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" DGEMM_INPUT_GS_BETA_NZ "vfmadd213pd %%ymm13, %%ymm3, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" DGEMM_INPUT_GS_BETA_NZ "vfmadd213pd %%ymm15, %%ymm3, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ //"addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" " \n\t" "jmp .DDONE \n\t" // jump to end. " \n\t" " \n\t" " \n\t" ".DCOLSTORED: \n\t" " \n\t" " \n\t" "vfmadd231pd (%%rcx), %%ymm3, %%ymm4 \n\t" "vmovupd %%ymm4, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vfmadd231pd (%%rdx), %%ymm3, %%ymm5 \n\t" "vmovupd %%ymm5, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" " \n\t" " \n\t" "vfmadd231pd (%%rcx), %%ymm3, %%ymm6 \n\t" "vmovupd %%ymm6, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vfmadd231pd (%%rdx), %%ymm3, %%ymm7 \n\t" "vmovupd %%ymm7, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" " \n\t" " \n\t" "vfmadd231pd (%%rcx), %%ymm3, %%ymm8 \n\t" "vmovupd %%ymm8, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vfmadd231pd (%%rdx), %%ymm3, %%ymm9 \n\t" "vmovupd %%ymm9, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" " \n\t" " \n\t" "vfmadd231pd (%%rcx), %%ymm3, %%ymm10 \n\t" "vmovupd %%ymm10, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vfmadd231pd (%%rdx), %%ymm3, %%ymm11 \n\t" "vmovupd %%ymm11, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" " \n\t" " \n\t" "vfmadd231pd (%%rcx), %%ymm3, %%ymm12 \n\t" "vmovupd %%ymm12, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vfmadd231pd (%%rdx), %%ymm3, %%ymm13 \n\t" "vmovupd %%ymm13, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" " \n\t" " \n\t" "vfmadd231pd (%%rcx), %%ymm3, %%ymm14 \n\t" "vmovupd %%ymm14, (%%rcx) \n\t" //"addq %%rdi, %%rcx \n\t" "vfmadd231pd (%%rdx), %%ymm3, %%ymm15 \n\t" "vmovupd %%ymm15, (%%rdx) \n\t" //"addq %%rdi, %%rdx \n\t" " \n\t" " \n\t" " \n\t" "jmp .DDONE \n\t" // jump to end. " \n\t" " \n\t" " \n\t" ".DBETAZERO: \n\t" " \n\t" "cmpq $8, %%rsi \n\t" // set ZF if (8*rs_c) == 8. "jz .DCOLSTORBZ \n\t" // jump to column storage case " \n\t" " \n\t" " \n\t" ".DGENSTORBZ: \n\t" " \n\t" " \n\t" "vmovapd %%ymm4, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "vmovapd %%ymm6, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "vmovapd %%ymm8, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "vmovapd %%ymm10, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "vmovapd %%ymm12, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "vmovapd %%ymm14, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ //"addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "movq %%rdx, %%rcx \n\t" // rcx = c + 4*rs_c " \n\t" " \n\t" "vmovapd %%ymm5, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "vmovapd %%ymm7, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "vmovapd %%ymm9, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "vmovapd %%ymm11, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "vmovapd %%ymm13, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ "addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "vmovapd %%ymm15, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ //"addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" " \n\t" "jmp .DDONE \n\t" // jump to end. " \n\t" " \n\t" " \n\t" ".DCOLSTORBZ: \n\t" " \n\t" " \n\t" "vmovupd %%ymm4, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vmovupd %%ymm5, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" " \n\t" "vmovupd %%ymm6, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vmovupd %%ymm7, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" " \n\t" " \n\t" "vmovupd %%ymm8, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vmovupd %%ymm9, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" " \n\t" " \n\t" "vmovupd %%ymm10, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vmovupd %%ymm11, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" " \n\t" " \n\t" "vmovupd %%ymm12, (%%rcx) \n\t" "addq %%rdi, %%rcx \n\t" "vmovupd %%ymm13, (%%rdx) \n\t" "addq %%rdi, %%rdx \n\t" " \n\t" " \n\t" "vmovupd %%ymm14, (%%rcx) \n\t" //"addq %%rdi, %%rcx \n\t" "vmovupd %%ymm15, (%%rdx) \n\t" //"addq %%rdi, %%rdx \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" ".DDONE: \n\t" " \n\t" "vzeroupper \n\t" " \n\t" : // output operands (none) : // input operands "m" (k_iter), // 0 "m" (k_left), // 1 "m" (a), // 2 "m" (b), // 3 "m" (alpha), // 4 "m" (beta), // 5 "m" (c), // 6 "m" (rs_c), // 7 "m" (cs_c)/*, // 8 "m" (b_next), // 9 "m" (a_next)*/ // 10 : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ); } // assumes beta.r, beta.i have been broadcast into ymm1, ymm2. // outputs to ymm0 #define CGEMM_INPUT_SCALE_GS_BETA_NZ \ "vmovlpd (%%rcx ), %%xmm0, %%xmm0 \n\t" \ "vmovhpd (%%rcx,%%rsi,1), %%xmm0, %%xmm0 \n\t" \ "vmovlpd (%%rcx,%%rsi,2), %%xmm3, %%xmm3 \n\t" \ "vmovhpd (%%rcx,%%r13 ), %%xmm3, %%xmm3 \n\t" \ "vinsertf128 $1, %%xmm3, %%ymm0, %%ymm0 \n\t" \ "vpermilps $0xb1, %%ymm0, %%ymm3 \n\t" \ "vmulps %%ymm1, %%ymm0, %%ymm0 \n\t" \ "vmulps %%ymm2, %%ymm3, %%ymm3 \n\t" \ "vaddsubps %%ymm3, %%ymm0, %%ymm0 \n\t" // assumes values to output are in ymm0 #define CGEMM_OUTPUT_GS \ "vextractf128 $1, %%ymm0, %%xmm3 \n\t" \ "vmovlpd %%xmm0, (%%rcx ) \n\t" \ "vmovhpd %%xmm0, (%%rcx,%%rsi,1) \n\t" \ "vmovlpd %%xmm3, (%%rcx,%%rsi,2) \n\t" \ "vmovhpd %%xmm3, (%%rcx,%%r13 ) \n\t" #define CGEMM_INPUT_SCALE_CS_BETA_NZ \ "vmovups (%%rcx), %%ymm0 \n\t" \ "vpermilps $0xb1, %%ymm0, %%ymm3 \n\t" \ "vmulps %%ymm1, %%ymm0, %%ymm0 \n\t" \ "vmulps %%ymm2, %%ymm3, %%ymm3 \n\t" \ "vaddsubps %%ymm3, %%ymm0, %%ymm0 \n\t" #define CGEMM_OUTPUT_CS \ "vmovups %%ymm0, (%%rcx) \n\t" \ void bli_cgemm_haswell_asm_8x3 ( dim_t k0, scomplex* restrict alpha, scomplex* restrict a, scomplex* restrict b, scomplex* restrict beta, scomplex* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; __asm__ volatile ( " \n\t" "vzeroall \n\t" // zero all xmm/ymm registers. " \n\t" " \n\t" "movq %2, %%rax \n\t" // load address of a. "movq %3, %%rbx \n\t" // load address of b. //"movq %9, %%r15 \n\t" // load address of b_next. " \n\t" "addq $32 * 4, %%rax \n\t" " \n\t" // initialize loop by pre-loading "vmovaps -4 * 32(%%rax), %%ymm0 \n\t" "vmovaps -3 * 32(%%rax), %%ymm1 \n\t" " \n\t" "movq %6, %%rcx \n\t" // load address of c "movq %8, %%rdi \n\t" // load cs_c "leaq (,%%rdi,8), %%rdi \n\t" // cs_c *= sizeof(scomplex) " \n\t" "leaq (%%rcx,%%rdi,1), %%r11 \n\t" // r11 = c + 1*cs_c; "leaq (%%rcx,%%rdi,2), %%r12 \n\t" // r12 = c + 2*cs_c; " \n\t" "prefetcht0 7 * 8(%%rcx) \n\t" // prefetch c + 0*cs_c "prefetcht0 7 * 8(%%r11) \n\t" // prefetch c + 1*cs_c "prefetcht0 7 * 8(%%r12) \n\t" // prefetch c + 2*cs_c " \n\t" " \n\t" " \n\t" " \n\t" "movq %0, %%rsi \n\t" // i = k_iter; "testq %%rsi, %%rsi \n\t" // check i via logical AND. "je .CCONSIDKLEFT \n\t" // if i == 0, jump to code that " \n\t" // contains the k_left loop. " \n\t" " \n\t" ".CLOOPKITER: \n\t" // MAIN LOOP " \n\t" " \n\t" " \n\t" // iteration 0 "prefetcht0 32 * 8(%%rax) \n\t" " \n\t" "vbroadcastss 0 * 4(%%rbx), %%ymm2 \n\t" "vbroadcastss 1 * 4(%%rbx), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm4 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm5 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm6 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm7 \n\t" " \n\t" "vbroadcastss 2 * 4(%%rbx), %%ymm2 \n\t" "vbroadcastss 3 * 4(%%rbx), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm8 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm9 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm11 \n\t" " \n\t" "vbroadcastss 4 * 4(%%rbx), %%ymm2 \n\t" "vbroadcastss 5 * 4(%%rbx), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm12 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm13 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm14 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm15 \n\t" " \n\t" "vmovaps -2 * 32(%%rax), %%ymm0 \n\t" "vmovaps -1 * 32(%%rax), %%ymm1 \n\t" " \n\t" " \n\t" // iteration 1 "vbroadcastss 6 * 4(%%rbx), %%ymm2 \n\t" "vbroadcastss 7 * 4(%%rbx), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm4 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm5 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm6 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm7 \n\t" " \n\t" "vbroadcastss 8 * 4(%%rbx), %%ymm2 \n\t" "vbroadcastss 9 * 4(%%rbx), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm8 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm9 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm11 \n\t" " \n\t" "vbroadcastss 10 * 4(%%rbx), %%ymm2 \n\t" "vbroadcastss 11 * 4(%%rbx), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm12 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm13 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm14 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm15 \n\t" " \n\t" "vmovaps 0 * 32(%%rax), %%ymm0 \n\t" "vmovaps 1 * 32(%%rax), %%ymm1 \n\t" " \n\t" " \n\t" // iteration 2 "prefetcht0 38 * 8(%%rax) \n\t" " \n\t" "vbroadcastss 12 * 4(%%rbx), %%ymm2 \n\t" "vbroadcastss 13 * 4(%%rbx), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm4 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm5 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm6 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm7 \n\t" " \n\t" "vbroadcastss 14 * 4(%%rbx), %%ymm2 \n\t" "vbroadcastss 15 * 4(%%rbx), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm8 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm9 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm11 \n\t" " \n\t" "vbroadcastss 16 * 4(%%rbx), %%ymm2 \n\t" "vbroadcastss 17 * 4(%%rbx), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm12 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm13 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm14 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm15 \n\t" " \n\t" "vmovaps 2 * 32(%%rax), %%ymm0 \n\t" "vmovaps 3 * 32(%%rax), %%ymm1 \n\t" " \n\t" " \n\t" // iteration 3 "vbroadcastss 18 * 4(%%rbx), %%ymm2 \n\t" "vbroadcastss 19 * 4(%%rbx), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm4 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm5 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm6 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm7 \n\t" " \n\t" "vbroadcastss 20 * 4(%%rbx), %%ymm2 \n\t" "vbroadcastss 21 * 4(%%rbx), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm8 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm9 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm11 \n\t" " \n\t" "vbroadcastss 22 * 4(%%rbx), %%ymm2 \n\t" "vbroadcastss 23 * 4(%%rbx), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm12 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm13 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm14 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm15 \n\t" " \n\t" "addq $4 * 8 * 8, %%rax \n\t" // a += 4*8 (unroll x mr) "addq $4 * 3 * 8, %%rbx \n\t" // b += 4*3 (unroll x nr) " \n\t" "vmovaps -4 * 32(%%rax), %%ymm0 \n\t" "vmovaps -3 * 32(%%rax), %%ymm1 \n\t" " \n\t" " \n\t" "decq %%rsi \n\t" // i -= 1; "jne .CLOOPKITER \n\t" // iterate again if i != 0. " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" ".CCONSIDKLEFT: \n\t" " \n\t" "movq %1, %%rsi \n\t" // i = k_left; "testq %%rsi, %%rsi \n\t" // check i via logical AND. "je .CPOSTACCUM \n\t" // if i == 0, we're done; jump to end. " \n\t" // else, we prepare to enter k_left loop. " \n\t" " \n\t" ".CLOOPKLEFT: \n\t" // EDGE LOOP " \n\t" "prefetcht0 32 * 8(%%rax) \n\t" " \n\t" "vbroadcastss 0 * 4(%%rbx), %%ymm2 \n\t" "vbroadcastss 1 * 4(%%rbx), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm4 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm5 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm6 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm7 \n\t" " \n\t" "vbroadcastss 2 * 4(%%rbx), %%ymm2 \n\t" "vbroadcastss 3 * 4(%%rbx), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm8 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm9 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm11 \n\t" " \n\t" "vbroadcastss 4 * 4(%%rbx), %%ymm2 \n\t" "vbroadcastss 5 * 4(%%rbx), %%ymm3 \n\t" "vfmadd231ps %%ymm0, %%ymm2, %%ymm12 \n\t" "vfmadd231ps %%ymm1, %%ymm2, %%ymm13 \n\t" "vfmadd231ps %%ymm0, %%ymm3, %%ymm14 \n\t" "vfmadd231ps %%ymm1, %%ymm3, %%ymm15 \n\t" " \n\t" "addq $1 * 8 * 8, %%rax \n\t" // a += 1*8 (unroll x mr) "addq $1 * 3 * 8, %%rbx \n\t" // b += 1*3 (unroll x nr) " \n\t" "vmovaps -4 * 32(%%rax), %%ymm0 \n\t" "vmovaps -3 * 32(%%rax), %%ymm1 \n\t" " \n\t" " \n\t" "decq %%rsi \n\t" // i -= 1; "jne .CLOOPKLEFT \n\t" // iterate again if i != 0. " \n\t" " \n\t" " \n\t" ".CPOSTACCUM: \n\t" " \n\t" " \n\t" " \n\t" // permute even and odd elements " \n\t" // of ymm6/7, ymm10/11, ymm/14/15 "vpermilps $0xb1, %%ymm6, %%ymm6 \n\t" "vpermilps $0xb1, %%ymm7, %%ymm7 \n\t" "vpermilps $0xb1, %%ymm10, %%ymm10 \n\t" "vpermilps $0xb1, %%ymm11, %%ymm11 \n\t" "vpermilps $0xb1, %%ymm14, %%ymm14 \n\t" "vpermilps $0xb1, %%ymm15, %%ymm15 \n\t" " \n\t" " \n\t" " \n\t" // subtract/add even/odd elements "vaddsubps %%ymm6, %%ymm4, %%ymm4 \n\t" "vaddsubps %%ymm7, %%ymm5, %%ymm5 \n\t" " \n\t" "vaddsubps %%ymm10, %%ymm8, %%ymm8 \n\t" "vaddsubps %%ymm11, %%ymm9, %%ymm9 \n\t" " \n\t" "vaddsubps %%ymm14, %%ymm12, %%ymm12 \n\t" "vaddsubps %%ymm15, %%ymm13, %%ymm13 \n\t" " \n\t" " \n\t" " \n\t" " \n\t" "movq %4, %%rax \n\t" // load address of alpha "vbroadcastss (%%rax), %%ymm0 \n\t" // load alpha_r and duplicate "vbroadcastss 4(%%rax), %%ymm1 \n\t" // load alpha_i and duplicate " \n\t" " \n\t" "vpermilps $0xb1, %%ymm4, %%ymm3 \n\t" "vmulps %%ymm0, %%ymm4, %%ymm4 \n\t" "vmulps %%ymm1, %%ymm3, %%ymm3 \n\t" "vaddsubps %%ymm3, %%ymm4, %%ymm4 \n\t" " \n\t" "vpermilps $0xb1, %%ymm5, %%ymm3 \n\t" "vmulps %%ymm0, %%ymm5, %%ymm5 \n\t" "vmulps %%ymm1, %%ymm3, %%ymm3 \n\t" "vaddsubps %%ymm3, %%ymm5, %%ymm5 \n\t" " \n\t" " \n\t" "vpermilps $0xb1, %%ymm8, %%ymm3 \n\t" "vmulps %%ymm0, %%ymm8, %%ymm8 \n\t" "vmulps %%ymm1, %%ymm3, %%ymm3 \n\t" "vaddsubps %%ymm3, %%ymm8, %%ymm8 \n\t" " \n\t" "vpermilps $0xb1, %%ymm9, %%ymm3 \n\t" "vmulps %%ymm0, %%ymm9, %%ymm9 \n\t" "vmulps %%ymm1, %%ymm3, %%ymm3 \n\t" "vaddsubps %%ymm3, %%ymm9, %%ymm9 \n\t" " \n\t" " \n\t" "vpermilps $0xb1, %%ymm12, %%ymm3 \n\t" "vmulps %%ymm0, %%ymm12, %%ymm12 \n\t" "vmulps %%ymm1, %%ymm3, %%ymm3 \n\t" "vaddsubps %%ymm3, %%ymm12, %%ymm12 \n\t" " \n\t" "vpermilps $0xb1, %%ymm13, %%ymm3 \n\t" "vmulps %%ymm0, %%ymm13, %%ymm13 \n\t" "vmulps %%ymm1, %%ymm3, %%ymm3 \n\t" "vaddsubps %%ymm3, %%ymm13, %%ymm13 \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" "movq %5, %%rbx \n\t" // load address of beta "vbroadcastss (%%rbx), %%ymm1 \n\t" // load beta_r and duplicate "vbroadcastss 4(%%rbx), %%ymm2 \n\t" // load beta_i and duplicate " \n\t" " \n\t" " \n\t" " \n\t" "movq %7, %%rsi \n\t" // load rs_c "leaq (,%%rsi,8), %%rsi \n\t" // rsi = rs_c * sizeof(scomplex) "leaq (,%%rsi,4), %%rdx \n\t" // rdx = 4*rs_c; "leaq (%%rsi,%%rsi,2), %%r13 \n\t" // r13 = 3*rs_c; " \n\t" " \n\t" " \n\t" " \n\t" // now avoid loading C if beta == 0 "vxorps %%ymm0, %%ymm0, %%ymm0 \n\t" // set ymm0 to zero. "vucomiss %%xmm0, %%xmm1 \n\t" // set ZF if beta_r == 0. "sete %%r8b \n\t" // r8b = ( ZF == 1 ? 1 : 0 ); "vucomiss %%xmm0, %%xmm2 \n\t" // set ZF if beta_i == 0. "sete %%r9b \n\t" // r9b = ( ZF == 1 ? 1 : 0 ); "andb %%r8b, %%r9b \n\t" // set ZF if r8b & r9b == 1. "jne .CBETAZERO \n\t" // if ZF = 1, jump to beta == 0 case " \n\t" " \n\t" "cmpq $8, %%rsi \n\t" // set ZF if (8*cs_c) == 8. "jz .CCOLSTORED \n\t" // jump to row storage case " \n\t" " \n\t" " \n\t" ".CGENSTORED: \n\t" " \n\t" " \n\t" CGEMM_INPUT_SCALE_GS_BETA_NZ "vaddps %%ymm4, %%ymm0, %%ymm0 \n\t" CGEMM_OUTPUT_GS "addq %%rdx, %%rcx \n\t" // c += 4*rs_c; " \n\t" " \n\t" CGEMM_INPUT_SCALE_GS_BETA_NZ "vaddps %%ymm5, %%ymm0, %%ymm0 \n\t" CGEMM_OUTPUT_GS "movq %%r11, %%rcx \n\t" // rcx = c + 1*cs_c " \n\t" " \n\t" " \n\t" CGEMM_INPUT_SCALE_GS_BETA_NZ "vaddps %%ymm8, %%ymm0, %%ymm0 \n\t" CGEMM_OUTPUT_GS "addq %%rdx, %%rcx \n\t" // c += 4*rs_c; " \n\t" " \n\t" CGEMM_INPUT_SCALE_GS_BETA_NZ "vaddps %%ymm9, %%ymm0, %%ymm0 \n\t" CGEMM_OUTPUT_GS "movq %%r12, %%rcx \n\t" // rcx = c + 2*cs_c " \n\t" " \n\t" " \n\t" CGEMM_INPUT_SCALE_GS_BETA_NZ "vaddps %%ymm12, %%ymm0, %%ymm0 \n\t" CGEMM_OUTPUT_GS "addq %%rdx, %%rcx \n\t" // c += 4*rs_c; " \n\t" " \n\t" CGEMM_INPUT_SCALE_GS_BETA_NZ "vaddps %%ymm13, %%ymm0, %%ymm0 \n\t" CGEMM_OUTPUT_GS " \n\t" " \n\t" " \n\t" "jmp .CDONE \n\t" // jump to end. " \n\t" " \n\t" " \n\t" ".CCOLSTORED: \n\t" " \n\t" " \n\t" CGEMM_INPUT_SCALE_CS_BETA_NZ "vaddps %%ymm4, %%ymm0, %%ymm0 \n\t" CGEMM_OUTPUT_CS "addq %%rdx, %%rcx \n\t" // c += 4*rs_c; " \n\t" " \n\t" CGEMM_INPUT_SCALE_CS_BETA_NZ "vaddps %%ymm5, %%ymm0, %%ymm0 \n\t" CGEMM_OUTPUT_CS "movq %%r11, %%rcx \n\t" // rcx = c + 1*cs_c " \n\t" " \n\t" " \n\t" CGEMM_INPUT_SCALE_CS_BETA_NZ "vaddps %%ymm8, %%ymm0, %%ymm0 \n\t" CGEMM_OUTPUT_CS "addq %%rdx, %%rcx \n\t" // c += 4*rs_c; " \n\t" " \n\t" CGEMM_INPUT_SCALE_CS_BETA_NZ "vaddps %%ymm9, %%ymm0, %%ymm0 \n\t" CGEMM_OUTPUT_CS "movq %%r12, %%rcx \n\t" // rcx = c + 2*cs_c " \n\t" " \n\t" " \n\t" CGEMM_INPUT_SCALE_CS_BETA_NZ "vaddps %%ymm12, %%ymm0, %%ymm0 \n\t" CGEMM_OUTPUT_CS "addq %%rdx, %%rcx \n\t" // c += 4*rs_c; " \n\t" " \n\t" CGEMM_INPUT_SCALE_CS_BETA_NZ "vaddps %%ymm13, %%ymm0, %%ymm0 \n\t" CGEMM_OUTPUT_CS " \n\t" " \n\t" " \n\t" "jmp .CDONE \n\t" // jump to end. " \n\t" " \n\t" " \n\t" ".CBETAZERO: \n\t" " \n\t" "cmpq $8, %%rsi \n\t" // set ZF if (8*rs_c) == 8. "jz .CCOLSTORBZ \n\t" // jump to row storage case " \n\t" " \n\t" " \n\t" ".CGENSTORBZ: \n\t" " \n\t" " \n\t" "vmovaps %%ymm4, %%ymm0 \n\t" CGEMM_OUTPUT_GS "addq %%rdx, %%rcx \n\t" // c += 4*rs_c; " \n\t" " \n\t" "vmovaps %%ymm5, %%ymm0 \n\t" CGEMM_OUTPUT_GS "movq %%r11, %%rcx \n\t" // rcx = c + 1*cs_c " \n\t" " \n\t" " \n\t" "vmovaps %%ymm8, %%ymm0 \n\t" CGEMM_OUTPUT_GS "addq %%rdx, %%rcx \n\t" // c += 4*rs_c; " \n\t" " \n\t" "vmovaps %%ymm9, %%ymm0 \n\t" CGEMM_OUTPUT_GS "movq %%r12, %%rcx \n\t" // rcx = c + 2*cs_c " \n\t" " \n\t" " \n\t" "vmovaps %%ymm12, %%ymm0 \n\t" CGEMM_OUTPUT_GS "addq %%rdx, %%rcx \n\t" // c += 4*rs_c; " \n\t" " \n\t" "vmovaps %%ymm13, %%ymm0 \n\t" CGEMM_OUTPUT_GS " \n\t" " \n\t" " \n\t" "jmp .CDONE \n\t" // jump to end. " \n\t" " \n\t" " \n\t" ".CCOLSTORBZ: \n\t" " \n\t" " \n\t" "vmovups %%ymm4, (%%rcx) \n\t" "vmovups %%ymm5, (%%rcx,%%rdx,1) \n\t" " \n\t" "vmovups %%ymm8, (%%r11) \n\t" "vmovups %%ymm9, (%%r11,%%rdx,1) \n\t" " \n\t" "vmovups %%ymm12, (%%r12) \n\t" "vmovups %%ymm13, (%%r12,%%rdx,1) \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" ".CDONE: \n\t" " \n\t" "vzeroupper \n\t" " \n\t" : // output operands (none) : // input operands "m" (k_iter), // 0 "m" (k_left), // 1 "m" (a), // 2 "m" (b), // 3 "m" (alpha), // 4 "m" (beta), // 5 "m" (c), // 6 "m" (rs_c), // 7 "m" (cs_c)/*, // 8 "m" (b_next), // 9 "m" (a_next)*/ // 10 : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ); } // assumes beta.r, beta.i have been broadcast into ymm1, ymm2. // outputs to ymm0 #define ZGEMM_INPUT_SCALE_GS_BETA_NZ \ "vmovupd (%%rcx), %%xmm0 \n\t" \ "vmovupd (%%rcx,%%rsi), %%xmm3 \n\t" \ "vinsertf128 $1, %%xmm3, %%ymm0, %%ymm0 \n\t" \ "vpermilpd $0x5, %%ymm0, %%ymm3 \n\t" \ "vmulpd %%ymm1, %%ymm0, %%ymm0 \n\t" \ "vmulpd %%ymm2, %%ymm3, %%ymm3 \n\t" \ "vaddsubpd %%ymm3, %%ymm0, %%ymm0 \n\t" // assumes values to output are in ymm0 #define ZGEMM_OUTPUT_GS \ "vextractf128 $1, %%ymm0, %%xmm3 \n\t" \ "vmovupd %%xmm0, (%%rcx) \n\t" \ "vmovupd %%xmm3, (%%rcx,%%rsi ) \n\t" \ #define ZGEMM_INPUT_SCALE_CS_BETA_NZ \ "vmovups (%%rcx), %%ymm0 \n\t" \ "vpermilpd $0x5, %%ymm0, %%ymm3 \n\t" \ "vmulpd %%ymm1, %%ymm0, %%ymm0 \n\t" \ "vmulpd %%ymm2, %%ymm3, %%ymm3 \n\t" \ "vaddsubpd %%ymm3, %%ymm0, %%ymm0 \n\t" #define ZGEMM_OUTPUT_CS \ "vmovupd %%ymm0, (%%rcx) \n\t" \ void bli_zgemm_haswell_asm_4x3 ( dim_t k0, dcomplex* restrict alpha, dcomplex* restrict a, dcomplex* restrict b, dcomplex* restrict beta, dcomplex* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; __asm__ volatile ( " \n\t" "vzeroall \n\t" // zero all xmm/ymm registers. " \n\t" " \n\t" "movq %2, %%rax \n\t" // load address of a. "movq %3, %%rbx \n\t" // load address of b. //"movq %9, %%r15 \n\t" // load address of b_next. " \n\t" "addq $32 * 4, %%rax \n\t" " \n\t" // initialize loop by pre-loading "vmovapd -4 * 32(%%rax), %%ymm0 \n\t" "vmovapd -3 * 32(%%rax), %%ymm1 \n\t" " \n\t" "movq %6, %%rcx \n\t" // load address of c "movq %8, %%rdi \n\t" // load cs_c "leaq (,%%rdi,8), %%rdi \n\t" // cs_c *= sizeof(dcomplex) "leaq (,%%rdi,2), %%rdi \n\t" " \n\t" "leaq (%%rcx,%%rdi,1), %%r11 \n\t" // r11 = c + 1*cs_c; "leaq (%%rcx,%%rdi,2), %%r12 \n\t" // r12 = c + 2*cs_c; " \n\t" "prefetcht0 7 * 8(%%rcx) \n\t" // prefetch c + 0*cs_c "prefetcht0 7 * 8(%%r11) \n\t" // prefetch c + 1*cs_c "prefetcht0 7 * 8(%%r12) \n\t" // prefetch c + 2*cs_c " \n\t" " \n\t" " \n\t" " \n\t" "movq %0, %%rsi \n\t" // i = k_iter; "testq %%rsi, %%rsi \n\t" // check i via logical AND. "je .ZCONSIDKLEFT \n\t" // if i == 0, jump to code that " \n\t" // contains the k_left loop. " \n\t" " \n\t" ".ZLOOPKITER: \n\t" // MAIN LOOP " \n\t" " \n\t" " \n\t" // iteration 0 "prefetcht0 32 * 16(%%rax) \n\t" " \n\t" "vbroadcastsd 0 * 8(%%rbx), %%ymm2 \n\t" "vbroadcastsd 1 * 8(%%rbx), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm4 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm5 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm6 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm7 \n\t" " \n\t" "vbroadcastsd 2 * 8(%%rbx), %%ymm2 \n\t" "vbroadcastsd 3 * 8(%%rbx), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm8 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm9 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm11 \n\t" " \n\t" "vbroadcastsd 4 * 8(%%rbx), %%ymm2 \n\t" "vbroadcastsd 5 * 8(%%rbx), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm12 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm13 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm14 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm15 \n\t" " \n\t" "vmovapd -2 * 32(%%rax), %%ymm0 \n\t" "vmovapd -1 * 32(%%rax), %%ymm1 \n\t" " \n\t" " \n\t" // iteration 1 "vbroadcastsd 6 * 8(%%rbx), %%ymm2 \n\t" "vbroadcastsd 7 * 8(%%rbx), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm4 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm5 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm6 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm7 \n\t" " \n\t" "vbroadcastsd 8 * 8(%%rbx), %%ymm2 \n\t" "vbroadcastsd 9 * 8(%%rbx), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm8 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm9 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm11 \n\t" " \n\t" "vbroadcastsd 10 * 8(%%rbx), %%ymm2 \n\t" "vbroadcastsd 11 * 8(%%rbx), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm12 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm13 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm14 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm15 \n\t" " \n\t" "vmovapd 0 * 32(%%rax), %%ymm0 \n\t" "vmovapd 1 * 32(%%rax), %%ymm1 \n\t" " \n\t" " \n\t" // iteration 2 "prefetcht0 38 * 16(%%rax) \n\t" " \n\t" "vbroadcastsd 12 * 8(%%rbx), %%ymm2 \n\t" "vbroadcastsd 13 * 8(%%rbx), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm4 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm5 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm6 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm7 \n\t" " \n\t" "vbroadcastsd 14 * 8(%%rbx), %%ymm2 \n\t" "vbroadcastsd 15 * 8(%%rbx), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm8 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm9 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm11 \n\t" " \n\t" "vbroadcastsd 16 * 8(%%rbx), %%ymm2 \n\t" "vbroadcastsd 17 * 8(%%rbx), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm12 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm13 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm14 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm15 \n\t" " \n\t" "vmovapd 2 * 32(%%rax), %%ymm0 \n\t" "vmovapd 3 * 32(%%rax), %%ymm1 \n\t" " \n\t" " \n\t" // iteration 3 "vbroadcastsd 18 * 8(%%rbx), %%ymm2 \n\t" "vbroadcastsd 19 * 8(%%rbx), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm4 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm5 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm6 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm7 \n\t" " \n\t" "vbroadcastsd 20 * 8(%%rbx), %%ymm2 \n\t" "vbroadcastsd 21 * 8(%%rbx), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm8 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm9 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm11 \n\t" " \n\t" "vbroadcastsd 22 * 8(%%rbx), %%ymm2 \n\t" "vbroadcastsd 23 * 8(%%rbx), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm12 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm13 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm14 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm15 \n\t" " \n\t" "addq $4 * 4 * 16, %%rax \n\t" // a += 4*4 (unroll x mr) "addq $4 * 3 * 16, %%rbx \n\t" // b += 4*3 (unroll x nr) " \n\t" "vmovapd -4 * 32(%%rax), %%ymm0 \n\t" "vmovapd -3 * 32(%%rax), %%ymm1 \n\t" " \n\t" " \n\t" "decq %%rsi \n\t" // i -= 1; "jne .ZLOOPKITER \n\t" // iterate again if i != 0. " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" ".ZCONSIDKLEFT: \n\t" " \n\t" "movq %1, %%rsi \n\t" // i = k_left; "testq %%rsi, %%rsi \n\t" // check i via logical AND. "je .ZPOSTACCUM \n\t" // if i == 0, we're done; jump to end. " \n\t" // else, we prepare to enter k_left loop. " \n\t" " \n\t" ".ZLOOPKLEFT: \n\t" // EDGE LOOP " \n\t" "prefetcht0 32 * 16(%%rax) \n\t" " \n\t" "vbroadcastsd 0 * 8(%%rbx), %%ymm2 \n\t" "vbroadcastsd 1 * 8(%%rbx), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm4 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm5 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm6 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm7 \n\t" " \n\t" "vbroadcastsd 2 * 8(%%rbx), %%ymm2 \n\t" "vbroadcastsd 3 * 8(%%rbx), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm8 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm9 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm10 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm11 \n\t" " \n\t" "vbroadcastsd 4 * 8(%%rbx), %%ymm2 \n\t" "vbroadcastsd 5 * 8(%%rbx), %%ymm3 \n\t" "vfmadd231pd %%ymm0, %%ymm2, %%ymm12 \n\t" "vfmadd231pd %%ymm1, %%ymm2, %%ymm13 \n\t" "vfmadd231pd %%ymm0, %%ymm3, %%ymm14 \n\t" "vfmadd231pd %%ymm1, %%ymm3, %%ymm15 \n\t" " \n\t" "addq $1 * 4 * 16, %%rax \n\t" // a += 1*4 (unroll x mr) "addq $1 * 3 * 16, %%rbx \n\t" // b += 1*3 (unroll x nr) " \n\t" "vmovapd -4 * 32(%%rax), %%ymm0 \n\t" "vmovapd -3 * 32(%%rax), %%ymm1 \n\t" " \n\t" " \n\t" "decq %%rsi \n\t" // i -= 1; "jne .ZLOOPKLEFT \n\t" // iterate again if i != 0. " \n\t" " \n\t" " \n\t" ".ZPOSTACCUM: \n\t" " \n\t" " \n\t" // permute even and odd elements " \n\t" // of ymm6/7, ymm10/11, ymm/14/15 "vpermilpd $0x5, %%ymm6, %%ymm6 \n\t" "vpermilpd $0x5, %%ymm7, %%ymm7 \n\t" "vpermilpd $0x5, %%ymm10, %%ymm10 \n\t" "vpermilpd $0x5, %%ymm11, %%ymm11 \n\t" "vpermilpd $0x5, %%ymm14, %%ymm14 \n\t" "vpermilpd $0x5, %%ymm15, %%ymm15 \n\t" " \n\t" " \n\t" " \n\t" // subtract/add even/odd elements "vaddsubpd %%ymm6, %%ymm4, %%ymm4 \n\t" "vaddsubpd %%ymm7, %%ymm5, %%ymm5 \n\t" " \n\t" "vaddsubpd %%ymm10, %%ymm8, %%ymm8 \n\t" "vaddsubpd %%ymm11, %%ymm9, %%ymm9 \n\t" " \n\t" "vaddsubpd %%ymm14, %%ymm12, %%ymm12 \n\t" "vaddsubpd %%ymm15, %%ymm13, %%ymm13 \n\t" " \n\t" " \n\t" " \n\t" " \n\t" "movq %4, %%rax \n\t" // load address of alpha "vbroadcastsd (%%rax), %%ymm0 \n\t" // load alpha_r and duplicate "vbroadcastsd 8(%%rax), %%ymm1 \n\t" // load alpha_i and duplicate " \n\t" " \n\t" "vpermilpd $0x5, %%ymm4, %%ymm3 \n\t" "vmulpd %%ymm0, %%ymm4, %%ymm4 \n\t" "vmulpd %%ymm1, %%ymm3, %%ymm3 \n\t" "vaddsubpd %%ymm3, %%ymm4, %%ymm4 \n\t" " \n\t" "vpermilpd $0x5, %%ymm5, %%ymm3 \n\t" "vmulpd %%ymm0, %%ymm5, %%ymm5 \n\t" "vmulpd %%ymm1, %%ymm3, %%ymm3 \n\t" "vaddsubpd %%ymm3, %%ymm5, %%ymm5 \n\t" " \n\t" " \n\t" "vpermilpd $0x5, %%ymm8, %%ymm3 \n\t" "vmulpd %%ymm0, %%ymm8, %%ymm8 \n\t" "vmulpd %%ymm1, %%ymm3, %%ymm3 \n\t" "vaddsubpd %%ymm3, %%ymm8, %%ymm8 \n\t" " \n\t" "vpermilpd $0x5, %%ymm9, %%ymm3 \n\t" "vmulpd %%ymm0, %%ymm9, %%ymm9 \n\t" "vmulpd %%ymm1, %%ymm3, %%ymm3 \n\t" "vaddsubpd %%ymm3, %%ymm9, %%ymm9 \n\t" " \n\t" " \n\t" "vpermilpd $0x5, %%ymm12, %%ymm3 \n\t" "vmulpd %%ymm0, %%ymm12, %%ymm12 \n\t" "vmulpd %%ymm1, %%ymm3, %%ymm3 \n\t" "vaddsubpd %%ymm3, %%ymm12, %%ymm12 \n\t" " \n\t" "vpermilpd $0x5, %%ymm13, %%ymm3 \n\t" "vmulpd %%ymm0, %%ymm13, %%ymm13 \n\t" "vmulpd %%ymm1, %%ymm3, %%ymm3 \n\t" "vaddsubpd %%ymm3, %%ymm13, %%ymm13 \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" "movq %5, %%rbx \n\t" // load address of beta "vbroadcastsd (%%rbx), %%ymm1 \n\t" // load beta_r and duplicate "vbroadcastsd 8(%%rbx), %%ymm2 \n\t" // load beta_i and duplicate " \n\t" " \n\t" " \n\t" " \n\t" "movq %7, %%rsi \n\t" // load rs_c "leaq (,%%rsi,8), %%rsi \n\t" // rsi = rs_c * sizeof(dcomplex) "leaq (,%%rsi,2), %%rsi \n\t" "leaq (,%%rsi,2), %%rdx \n\t" // rdx = 2*rs_c; " \n\t" " \n\t" " \n\t" " \n\t" // now avoid loading C if beta == 0 "vxorpd %%ymm0, %%ymm0, %%ymm0 \n\t" // set ymm0 to zero. "vucomisd %%xmm0, %%xmm1 \n\t" // set ZF if beta_r == 0. "sete %%r8b \n\t" // r8b = ( ZF == 1 ? 1 : 0 ); "vucomisd %%xmm0, %%xmm2 \n\t" // set ZF if beta_i == 0. "sete %%r9b \n\t" // r9b = ( ZF == 1 ? 1 : 0 ); "andb %%r8b, %%r9b \n\t" // set ZF if r8b & r9b == 1. "jne .ZBETAZERO \n\t" // if ZF = 1, jump to beta == 0 case " \n\t" " \n\t" "cmpq $16, %%rsi \n\t" // set ZF if (16*rs_c) == 16. "jz .ZCOLSTORED \n\t" // jump to row storage case " \n\t" " \n\t" " \n\t" ".ZGENSTORED: \n\t" " \n\t" " \n\t" ZGEMM_INPUT_SCALE_GS_BETA_NZ "vaddpd %%ymm4, %%ymm0, %%ymm0 \n\t" ZGEMM_OUTPUT_GS "addq %%rdx, %%rcx \n\t" // c += 2*rs_c; " \n\t" " \n\t" ZGEMM_INPUT_SCALE_GS_BETA_NZ "vaddpd %%ymm5, %%ymm0, %%ymm0 \n\t" ZGEMM_OUTPUT_GS "movq %%r11, %%rcx \n\t" // rcx = c + 1*cs_c " \n\t" " \n\t" " \n\t" ZGEMM_INPUT_SCALE_GS_BETA_NZ "vaddpd %%ymm8, %%ymm0, %%ymm0 \n\t" ZGEMM_OUTPUT_GS "addq %%rdx, %%rcx \n\t" // c += 2*rs_c; " \n\t" " \n\t" ZGEMM_INPUT_SCALE_GS_BETA_NZ "vaddpd %%ymm9, %%ymm0, %%ymm0 \n\t" ZGEMM_OUTPUT_GS "movq %%r12, %%rcx \n\t" // rcx = c + 2*cs_c " \n\t" " \n\t" " \n\t" ZGEMM_INPUT_SCALE_GS_BETA_NZ "vaddpd %%ymm12, %%ymm0, %%ymm0 \n\t" ZGEMM_OUTPUT_GS "addq %%rdx, %%rcx \n\t" // c += 2*rs_c; " \n\t" " \n\t" ZGEMM_INPUT_SCALE_GS_BETA_NZ "vaddpd %%ymm13, %%ymm0, %%ymm0 \n\t" ZGEMM_OUTPUT_GS " \n\t" " \n\t" " \n\t" "jmp .ZDONE \n\t" // jump to end. " \n\t" " \n\t" " \n\t" ".ZCOLSTORED: \n\t" " \n\t" " \n\t" ZGEMM_INPUT_SCALE_CS_BETA_NZ "vaddpd %%ymm4, %%ymm0, %%ymm0 \n\t" ZGEMM_OUTPUT_CS "addq %%rdx, %%rcx \n\t" // c += 2*rs_c; " \n\t" " \n\t" ZGEMM_INPUT_SCALE_CS_BETA_NZ "vaddpd %%ymm5, %%ymm0, %%ymm0 \n\t" ZGEMM_OUTPUT_CS "movq %%r11, %%rcx \n\t" // rcx = c + 1*cs_c " \n\t" " \n\t" " \n\t" ZGEMM_INPUT_SCALE_CS_BETA_NZ "vaddpd %%ymm8, %%ymm0, %%ymm0 \n\t" ZGEMM_OUTPUT_CS "addq %%rdx, %%rcx \n\t" // c += 2*rs_c; " \n\t" " \n\t" ZGEMM_INPUT_SCALE_CS_BETA_NZ "vaddpd %%ymm9, %%ymm0, %%ymm0 \n\t" ZGEMM_OUTPUT_CS "movq %%r12, %%rcx \n\t" // rcx = c + 2*cs_c " \n\t" " \n\t" " \n\t" ZGEMM_INPUT_SCALE_CS_BETA_NZ "vaddpd %%ymm12, %%ymm0, %%ymm0 \n\t" ZGEMM_OUTPUT_CS "addq %%rdx, %%rcx \n\t" // c += 2*rs_c; " \n\t" " \n\t" ZGEMM_INPUT_SCALE_CS_BETA_NZ "vaddpd %%ymm13, %%ymm0, %%ymm0 \n\t" ZGEMM_OUTPUT_CS " \n\t" " \n\t" " \n\t" "jmp .ZDONE \n\t" // jump to end. " \n\t" " \n\t" " \n\t" ".ZBETAZERO: \n\t" " \n\t" "cmpq $16, %%rsi \n\t" // set ZF if (16*rs_c) == 16. "jz .ZCOLSTORBZ \n\t" // jump to row storage case " \n\t" " \n\t" " \n\t" ".ZGENSTORBZ: \n\t" " \n\t" " \n\t" "vmovapd %%ymm4, %%ymm0 \n\t" ZGEMM_OUTPUT_GS "addq %%rdx, %%rcx \n\t" // c += 2*rs_c; " \n\t" " \n\t" "vmovapd %%ymm5, %%ymm0 \n\t" ZGEMM_OUTPUT_GS "movq %%r11, %%rcx \n\t" // rcx = c + 1*cs_c " \n\t" " \n\t" " \n\t" "vmovapd %%ymm8, %%ymm0 \n\t" ZGEMM_OUTPUT_GS "addq %%rdx, %%rcx \n\t" // c += 2*rs_c; " \n\t" " \n\t" "vmovapd %%ymm9, %%ymm0 \n\t" ZGEMM_OUTPUT_GS "movq %%r12, %%rcx \n\t" // rcx = c + 2*cs_c " \n\t" " \n\t" " \n\t" "vmovapd %%ymm12, %%ymm0 \n\t" ZGEMM_OUTPUT_GS "addq %%rdx, %%rcx \n\t" // c += 2*rs_c; " \n\t" " \n\t" "vmovapd %%ymm13, %%ymm0 \n\t" ZGEMM_OUTPUT_GS " \n\t" " \n\t" " \n\t" "jmp .ZDONE \n\t" // jump to end. " \n\t" " \n\t" " \n\t" ".ZCOLSTORBZ: \n\t" " \n\t" " \n\t" "vmovupd %%ymm4, (%%rcx) \n\t" "vmovupd %%ymm5, (%%rcx,%%rdx,1) \n\t" " \n\t" "vmovupd %%ymm8, (%%r11) \n\t" "vmovupd %%ymm9, (%%r11,%%rdx,1) \n\t" " \n\t" "vmovupd %%ymm12, (%%r12) \n\t" "vmovupd %%ymm13, (%%r12,%%rdx,1) \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" " \n\t" ".ZDONE: \n\t" " \n\t" "vzeroupper \n\t" " \n\t" : // output operands (none) : // input operands "m" (k_iter), // 0 "m" (k_left), // 1 "m" (a), // 2 "m" (b), // 3 "m" (alpha), // 4 "m" (beta), // 5 "m" (c), // 6 "m" (rs_c), // 7 "m" (cs_c)/*, // 8 "m" (b_next), // 9 "m" (a_next)*/ // 10 : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ); } blis-0.6.1/kernels/haswell/3/sup/000077500000000000000000000000001360743507500165345ustar00rootroot00000000000000blis-0.6.1/kernels/haswell/3/sup/bli_gemmsup_rd_haswell_asm_d6x8.c000066400000000000000000003604641360743507500251350ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define BLIS_ASM_SYNTAX_ATT #include "bli_x86_asm_macros.h" /* rrc: -------- ------ | | | | | | | | -------- ------ | | | | | | | | -------- += ------ ... | | | | | | | | -------- ------ | | | | | | | | -------- ------ : -------- ------ : Assumptions: - C is row-stored and B is column-stored; - A is row-stored; - m0 and n0 are at most MR and NR, respectively. Therefore, this (r)ow-preferential microkernel is well-suited for a dot-product-based accumulation that performs vector loads from both A and B. */ // Prototype reference microkernels. GEMMSUP_KER_PROT( double, d, gemmsup_r_haswell_ref ) #if 0 // Define parameters and variables for edge case kernel map. #define NUM_MR 4 #define NUM_NR 4 #define FUNCPTR_T dgemmsup_ker_ft static dim_t mrs[NUM_MR] = { 6, 3, 2, 1 }; static dim_t nrs[NUM_NR] = { 8, 4, 2, 1 }; static FUNCPTR_T kmap[NUM_MR][NUM_NR] = { /* 8 4 2 1 */ /* 6 */ { bli_dgemmsup_rd_haswell_asm_6x8m, bli_dgemmsup_rd_haswell_asm_6x4m, bli_dgemmsup_rd_haswell_asm_6x2m, bli_dgemmsup_r_haswell_ref_6x1 }, /* 3 */ { bli_dgemmsup_rd_haswell_asm_3x8m, bli_dgemmsup_rd_haswell_asm_3x4m, bli_dgemmsup_rd_haswell_asm_3x2m, bli_dgemmsup_r_haswell_ref_3x1 }, /* 2 */ { bli_dgemmsup_rd_haswell_asm_2x8m, bli_dgemmsup_rd_haswell_asm_2x4m, bli_dgemmsup_rd_haswell_asm_2x2m, bli_dgemmsup_r_haswell_ref_2x1 }, /* 1 */ { bli_dgemmsup_rd_haswell_asm_1x8m, bli_dgemmsup_rd_haswell_asm_1x4m, bli_dgemmsup_rd_haswell_asm_1x2m, bli_dgemmsup_r_haswell_ref_1x1 } }; #endif void bli_dgemmsup_rd_haswell_asm_6x8 ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { uint64_t n_left = n0 % 8; // First check whether this is a edge case in the n dimension. If so, // dispatch other 6x?m kernels, as needed. if ( n_left ) { double* restrict cij = c; double* restrict bj = b; double* restrict ai = a; if ( 4 <= n_left ) { const dim_t nr_cur = 4; bli_dgemmsup_rd_haswell_asm_6x4 ( conja, conjb, m0, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += nr_cur*cs_c0; bj += nr_cur*cs_b0; n_left -= nr_cur; } if ( 2 <= n_left ) { const dim_t nr_cur = 2; bli_dgemmsup_rd_haswell_asm_6x2 ( conja, conjb, m0, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += nr_cur*cs_c0; bj += nr_cur*cs_b0; n_left -= nr_cur; } if ( 1 == n_left ) { #if 0 const dim_t nr_cur = 1; bli_dgemmsup_r_haswell_ref ( conja, conjb, m0, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); #else bli_dgemv_ex ( BLIS_NO_TRANSPOSE, conjb, m0, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, beta, cij, rs_c0, cntx, NULL ); #endif } return; } //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t m_iter = m0 / 3; uint64_t m_left = m0 % 3; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; if ( m_iter == 0 ) goto consider_edge_cases; // ------------------------------------------------------------------------- begin_asm() //vzeroall() // zero all xmm/ymm registers. //mov(var(a), r14) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rdx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b lea(mem(r8, r8, 2), r10) // r10 = 3*rs_a //mov(var(c), r12) // load address of c //mov(var(rs_c), rdi) // load rs_c //lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) // r12 = rcx = c // r14 = rax = a // rdx = rbx = b // r9 = m dim index ii // r15 = n dim index jj mov(imm(0), r15) // jj = 0; label(.DLOOP3X4J) // LOOP OVER jj = [ 0 1 ... ] mov(var(a), r14) // load address of a mov(var(c), r12) // load address of c lea(mem( , r15, 1), rsi) // rsi = r15 = 4*jj; imul(imm(1*8), rsi) // rsi *= cs_c = 1*8 lea(mem(r12, rsi, 1), r12) // r12 = c + 4*jj*cs_c; lea(mem( , r15, 1), rsi) // rsi = r15 = 4*jj; imul(r11, rsi) // rsi *= cs_b; lea(mem(rdx, rsi, 1), rdx) // rbx = b + 4*jj*cs_b; mov(var(m_iter), r9) // ii = m_iter; label(.DLOOP3X4I) // LOOP OVER ii = [ m_iter ... 1 0 ] #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm6, ymm6, ymm6) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm8, ymm8, ymm8) vxorpd(ymm9, ymm9, ymm9) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm11, ymm11, ymm11) vxorpd(ymm12, ymm12, ymm12) vxorpd(ymm13, ymm13, ymm13) vxorpd(ymm14, ymm14, ymm14) vxorpd(ymm15, ymm15, ymm15) #endif lea(mem(r12), rcx) // rcx = c_iijj; lea(mem(r14), rax) // rax = a_ii; lea(mem(rdx), rbx) // rbx = b_jj; #if 1 mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 3*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 3*8)) // prefetch c + 2*rs_c #endif lea(mem(r8, r8, 4), rdi) // rdi = 5*rs_a mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 #if 0 prefetch(0, mem(rax, r10, 1, 0*8)) // prefetch rax + 3*cs_a prefetch(0, mem(rax, r8, 4, 0*8)) // prefetch rax + 4*cs_a prefetch(0, mem(rax, rdi, 1, 0*8)) // prefetch rax + 5*cs_a #endif vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 1 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 2 #if 0 prefetch(0, mem(rax, r10, 1, 0*8)) // prefetch rax + 3*cs_a prefetch(0, mem(rax, r8, 4, 0*8)) // prefetch rax + 4*cs_a prefetch(0, mem(rax, rdi, 1, 0*8)) // prefetch rax + 5*cs_a #endif vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 3 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) #if 0 prefetch(0, mem(rax, r10, 1, 0*8)) // prefetch rax + 3*cs_a prefetch(0, mem(rax, r8, 4, 0*8)) // prefetch rax + 4*cs_a prefetch(0, mem(rax, rdi, 1, 0*8)) // prefetch rax + 5*cs_a #endif vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rax ), xmm0) vmovsd(mem(rax, r8, 1), xmm1) vmovsd(mem(rax, r8, 2), xmm2) add(imm(1*8), rax) // a += 1*cs_b = 1*8; vmovsd(mem(rbx ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovsd(mem(rbx, r11, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovsd(mem(rbx, r11, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovsd(mem(rbx, r13, 1), xmm3) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm7 ymm10 ymm13 // ymm5 ymm8 ymm11 ymm14 // ymm6 ymm9 ymm12 ymm15 vhaddpd( ymm7, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm7) vhaddpd( ymm13, ymm10, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) // xmm2[0] = sum(ymm10); xmm2[1] = sum(ymm13) vperm2f128(imm(0x20), ymm2, ymm0, ymm4 ) vhaddpd( ymm8, ymm5, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm14, ymm11, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm5 ) vhaddpd( ymm9, ymm6, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm15, ymm12, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm6 ) // ymm4 = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13) // ymm5 = sum(ymm5) sum(ymm8) sum(ymm11) sum(ymm14) // ymm6 = sum(ymm6) sum(ymm9) sum(ymm12) sum(ymm15) mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) vmulpd(ymm0, ymm6, ymm6) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm5) vmovupd(ymm5, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm6) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vmovupd(ymm5, mem(rcx)) add(rdi, rcx) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) label(.DDONE) lea(mem(r12, rdi, 2), r12) // lea(mem(r12, rdi, 1), r12) // c_ii = r12 += 3*rs_c lea(mem(r14, r8, 2), r14) // lea(mem(r14, r8, 1), r14) // a_ii = r14 += 3*rs_a dec(r9) // ii -= 1; jne(.DLOOP3X4I) // iterate again if ii != 0. add(imm(4), r15) // jj += 4; cmp(imm(4), r15) // compare jj to 4 jle(.DLOOP3X4J) // if jj <= 4, jump to beginning // of jj loop; otherwise, loop ends. label(.DRETURN) end_asm( : // output operands (none) : // input operands [m_iter] "m" (m_iter), [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) consider_edge_cases: // Handle edge cases in the m dimension, if they exist. if ( m_left ) { const dim_t nr_cur = 8; const dim_t i_edge = m0 - ( dim_t )m_left; double* restrict cij = c + i_edge*rs_c; double* restrict bj = b; double* restrict ai = a + i_edge*rs_a; if ( 2 == m_left ) { const dim_t mr_cur = 2; bli_dgemmsup_rd_haswell_asm_2x8 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); //cij += mr_cur*rs_c0; ai += mr_cur*rs_a0; m_left -= mr_cur; } if ( 1 == m_left ) { const dim_t mr_cur = 1; bli_dgemmsup_rd_haswell_asm_1x8 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); } } } void bli_dgemmsup_rd_haswell_asm_2x8 ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() //vzeroall() // zero all xmm/ymm registers. mov(var(a), r14) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rdx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b mov(var(c), r12) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) // r12 = rcx = c // r14 = rax = a // rdx = rbx = b // r9 = unused // r15 = n dim index jj // r10 = unused mov(imm(0), r15) // jj = 0; label(.DLOOP3X4J) // LOOP OVER jj = [ 0 1 ... ] #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm8, ymm8, ymm8) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm11, ymm11, ymm11) vxorpd(ymm13, ymm13, ymm13) vxorpd(ymm14, ymm14, ymm14) #endif lea(mem( , r15, 1), rsi) // rsi = r15 = 4*jj; imul(imm(1*8), rsi) // rsi *= cs_c = 1*8 lea(mem(r12, rsi, 1), rcx) // rcx = c + 4*jj*cs_c; lea(mem( , r15, 1), rsi) // rsi = r15 = 4*jj; imul(r11, rsi) // rsi *= cs_b; lea(mem(rdx, rsi, 1), rbx) // rbx = b + 4*jj*cs_b; lea(mem(r14), rax) // rax = a; #if 0 prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 7*8)) // prefetch c + 1*rs_c #else prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 3*8)) // prefetch c + 1*rs_c #endif mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) // ---------------------------------- iteration 1 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) // ---------------------------------- iteration 2 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) // ---------------------------------- iteration 3 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rax ), xmm0) vmovsd(mem(rax, r8, 1), xmm1) add(imm(1*8), rax) // a += 1*cs_b = 1*8; vmovsd(mem(rbx ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovsd(mem(rbx, r11, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovsd(mem(rbx, r11, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovsd(mem(rbx, r13, 1), xmm3) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm7 ymm10 ymm13 // ymm5 ymm8 ymm11 ymm14 vhaddpd( ymm7, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm7) vhaddpd( ymm13, ymm10, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) // xmm2[0] = sum(ymm10); xmm2[1] = sum(ymm13) vperm2f128(imm(0x20), ymm2, ymm0, ymm4 ) vhaddpd( ymm8, ymm5, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm14, ymm11, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm5 ) // ymm4 = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13) // ymm5 = sum(ymm5) sum(ymm8) sum(ymm11) sum(ymm14) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm5) vmovupd(ymm5, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vmovupd(ymm5, mem(rcx)) //add(rdi, rcx) label(.DDONE) add(imm(4), r15) // jj += 4; cmp(imm(4), r15) // compare jj to 4 jle(.DLOOP3X4J) // if jj <= 4, jump to beginning // of jj loop; otherwise, loop ends. label(.DRETURN) end_asm( : // output operands (none) : // input operands [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rd_haswell_asm_1x8 ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() //vzeroall() // zero all xmm/ymm registers. mov(var(a), r14) // load address of a. //mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a //lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rdx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b mov(var(c), r12) // load address of c //mov(var(rs_c), rdi) // load rs_c //lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) // r12 = rcx = c // r14 = rax = a // rdx = rbx = b // r9 = m dim index ii // r15 = n dim index jj mov(imm(0), r15) // jj = 0; label(.DLOOP3X4J) // LOOP OVER jj = [ 0 1 ... ] #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm13, ymm13, ymm13) #endif lea(mem( , r15, 1), rsi) // rsi = r15 = 4*jj; imul(imm(1*8), rsi) // rsi *= cs_c = 1*8 lea(mem(r12, rsi, 1), rcx) // rcx = c + 4*jj*cs_c; lea(mem( , r15, 1), rsi) // rsi = r15 = 4*jj; imul(r11, rsi) // rsi *= cs_b; lea(mem(rdx, rsi, 1), rbx) // rbx = b + 4*jj*cs_b; lea(mem(r14), rax) // rax = a; #if 0 prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c #else prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c #endif mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) // ---------------------------------- iteration 1 vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) // ---------------------------------- iteration 2 vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) // ---------------------------------- iteration 3 vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rax ), xmm0) add(imm(1*8), rax) // a += 1*cs_b = 1*8; vmovsd(mem(rbx ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovsd(mem(rbx, r11, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovsd(mem(rbx, r11, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovsd(mem(rbx, r13, 1), xmm3) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vfmadd231pd(ymm0, ymm3, ymm13) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm7 ymm10 ymm13 vhaddpd( ymm7, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm7) vhaddpd( ymm13, ymm10, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) // xmm2[0] = sum(ymm10); xmm2[1] = sum(ymm13) vperm2f128(imm(0x20), ymm2, ymm0, ymm4 ) // ymm4 = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) //add(rdi, rcx) label(.DDONE) add(imm(4), r15) // jj += 4; cmp(imm(4), r15) // compare jj to 4 jle(.DLOOP3X4J) // if jj <= 4, jump to beginning // of jj loop; otherwise, loop ends. label(.DRETURN) end_asm( : // output operands (none) : // input operands [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rd_haswell_asm_6x4 ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t m_iter = m0 / 3; uint64_t m_left = m0 % 3; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; if ( m_iter == 0 ) goto consider_edge_cases; // ------------------------------------------------------------------------- begin_asm() //vzeroall() // zero all xmm/ymm registers. mov(var(a), r14) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rdx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b lea(mem(r8, r8, 2), r10) // r10 = 3*rs_a mov(var(c), r12) // load address of c //mov(var(rs_c), rdi) // load rs_c //lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) // r12 = rcx = c // r14 = rax = a // rdx = rbx = b // r9 = m dim index ii // r15 = n dim index jj mov(var(m_iter), r9) // ii = m_iter; label(.DLOOP3X4I) // LOOP OVER ii = [ m_iter .. 1 0 ] #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm6, ymm6, ymm6) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm8, ymm8, ymm8) vxorpd(ymm9, ymm9, ymm9) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm11, ymm11, ymm11) vxorpd(ymm12, ymm12, ymm12) vxorpd(ymm13, ymm13, ymm13) vxorpd(ymm14, ymm14, ymm14) vxorpd(ymm15, ymm15, ymm15) #endif lea(mem(r12), rcx) // rcx = c + 3*ii*rs_c; lea(mem(r14), rax) // rax = a + 3*ii*rs_a; lea(mem(rdx), rbx) // rbx = b; #if 1 mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 3*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 3*8)) // prefetch c + 2*rs_c #endif lea(mem(r8, r8, 4), rdi) // rdi = 5*rs_a mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 #if 0 prefetch(0, mem(rax, r10, 1, 0*8)) // prefetch rax + 3*cs_a prefetch(0, mem(rax, r8, 4, 0*8)) // prefetch rax + 4*cs_a prefetch(0, mem(rax, rdi, 1, 0*8)) // prefetch rax + 5*cs_a #endif vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 1 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 2 #if 0 prefetch(0, mem(rax, r10, 1, 0*8)) // prefetch rax + 3*cs_a prefetch(0, mem(rax, r8, 4, 0*8)) // prefetch rax + 4*cs_a prefetch(0, mem(rax, rdi, 1, 0*8)) // prefetch rax + 5*cs_a #endif vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 3 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rax ), xmm0) vmovsd(mem(rax, r8, 1), xmm1) vmovsd(mem(rax, r8, 2), xmm2) add(imm(1*8), rax) // a += 1*cs_b = 1*8; vmovsd(mem(rbx ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovsd(mem(rbx, r11, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovsd(mem(rbx, r11, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovsd(mem(rbx, r13, 1), xmm3) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm7 ymm10 ymm13 // ymm5 ymm8 ymm11 ymm14 // ymm6 ymm9 ymm12 ymm15 vhaddpd( ymm7, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm7) vhaddpd( ymm13, ymm10, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) // xmm2[0] = sum(ymm10); xmm2[1] = sum(ymm13) vperm2f128(imm(0x20), ymm2, ymm0, ymm4 ) vhaddpd( ymm8, ymm5, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm14, ymm11, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm5 ) vhaddpd( ymm9, ymm6, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm15, ymm12, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm6 ) // ymm4 = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13) // ymm5 = sum(ymm5) sum(ymm8) sum(ymm11) sum(ymm14) // ymm6 = sum(ymm6) sum(ymm9) sum(ymm12) sum(ymm15) mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) vmulpd(ymm0, ymm6, ymm6) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm5) vmovupd(ymm5, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm6) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vmovupd(ymm5, mem(rcx)) add(rdi, rcx) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) label(.DDONE) lea(mem(r12, rdi, 2), r12) // lea(mem(r12, rdi, 1), r12) // c_ii = r12 += 3*rs_c lea(mem(r14, r8, 2), r14) // lea(mem(r14, r8, 1), r14) // a_ii = r14 += 3*rs_a dec(r9) // ii -= 1; jne(.DLOOP3X4I) // iterate again if ii != 0. label(.DRETURN) end_asm( : // output operands (none) : // input operands [m_iter] "m" (m_iter), [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) consider_edge_cases: // Handle edge cases in the m dimension, if they exist. if ( m_left ) { const dim_t nr_cur = 4; const dim_t i_edge = m0 - ( dim_t )m_left; double* restrict cij = c + i_edge*rs_c; double* restrict bj = b; double* restrict ai = a + i_edge*rs_a; if ( 2 == m_left ) { const dim_t mr_cur = 2; bli_dgemmsup_rd_haswell_asm_2x4 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); //cij += mr_cur*rs_c0; ai += mr_cur*rs_a0; m_left -= mr_cur; } if ( 1 == m_left ) { const dim_t mr_cur = 1; bli_dgemmsup_rd_haswell_asm_1x4 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); } } } void bli_dgemmsup_rd_haswell_asm_2x4 ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm8, ymm8, ymm8) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm11, ymm11, ymm11) vxorpd(ymm13, ymm13, ymm13) vxorpd(ymm14, ymm14, ymm14) #endif mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b mov(var(c), rcx) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 3*8)) // prefetch c + 1*rs_c mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) // ---------------------------------- iteration 1 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) // ---------------------------------- iteration 2 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) // ---------------------------------- iteration 3 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rax ), xmm0) vmovsd(mem(rax, r8, 1), xmm1) add(imm(1*8), rax) // a += 1*cs_b = 1*8; vmovsd(mem(rbx ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovsd(mem(rbx, r11, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovsd(mem(rbx, r11, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovsd(mem(rbx, r13, 1), xmm3) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm7 ymm10 ymm13 // ymm5 ymm8 ymm11 ymm14 vhaddpd( ymm7, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm7) vhaddpd( ymm13, ymm10, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) // xmm2[0] = sum(ymm10); xmm2[1] = sum(ymm13) vperm2f128(imm(0x20), ymm2, ymm0, ymm4 ) vhaddpd( ymm8, ymm5, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm14, ymm11, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm5 ) // ymm4 = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13) // ymm5 = sum(ymm5) sum(ymm8) sum(ymm11) sum(ymm14) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm5) vmovupd(ymm5, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vmovupd(ymm5, mem(rcx)) //add(rdi, rcx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rd_haswell_asm_1x4 ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; /* rrc: -------- -- -- -- | | | | -------- -- -- -- ... | | | | -------- += -- -- -- | | | | -------- | | | | -------- : -------- : */ // ------------------------------------------------------------------------- begin_asm() #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm13, ymm13, ymm13) #endif mov(var(a), rax) // load address of a. //mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a //lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b mov(var(c), rcx) // load address of c //mov(var(rs_c), rdi) // load rs_c //lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) // ---------------------------------- iteration 1 vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) // ---------------------------------- iteration 2 vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) // ---------------------------------- iteration 3 vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rax ), xmm0) add(imm(1*8), rax) // a += 1*cs_b = 1*8; vmovsd(mem(rbx ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovsd(mem(rbx, r11, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovsd(mem(rbx, r11, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovsd(mem(rbx, r13, 1), xmm3) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vfmadd231pd(ymm0, ymm3, ymm13) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm7 ymm10 ymm13 vhaddpd( ymm7, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm7) vhaddpd( ymm13, ymm10, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) // xmm2[0] = sum(ymm10); xmm2[1] = sum(ymm13) vperm2f128(imm(0x20), ymm2, ymm0, ymm4 ) // ymm4 = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) //add(rdi, rcx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rd_haswell_asm_6x2 ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t m_iter = m0 / 6; uint64_t m_left = m0 % 6; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; if ( m_iter == 0 ) goto consider_edge_cases; // ------------------------------------------------------------------------- begin_asm() //vzeroall() // zero all xmm/ymm registers. mov(var(a), r14) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rdx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) //lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b mov(var(c), r12) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) // r12 = rcx = c // r14 = rax = a // rdx = rbx = b // r9 = m dim index ii mov(var(m_iter), r9) // ii = m_iter; label(.DLOOP3X4I) // LOOP OVER ii = [ m_iter ... 1 0 ] #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm6, ymm6, ymm6) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm8, ymm8, ymm8) vxorpd(ymm9, ymm9, ymm9) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm11, ymm11, ymm11) vxorpd(ymm12, ymm12, ymm12) vxorpd(ymm13, ymm13, ymm13) vxorpd(ymm14, ymm14, ymm14) vxorpd(ymm15, ymm15, ymm15) #endif lea(mem(r12), rcx) // rcx = c + 6*ii*rs_c; lea(mem(r14), rax) // rax = a + 6*ii*rs_a; lea(mem(rdx), rbx) // rbx = b; lea(mem(rcx, rdi, 2), r10) // lea(mem(r10, rdi, 1), r10) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 1*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 1*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 1*8)) // prefetch c + 2*rs_c prefetch(0, mem(r10, 1*8)) // prefetch c + 3*rs_c prefetch(0, mem(r10, rdi, 1, 1*8)) // prefetch c + 4*rs_c prefetch(0, mem(r10, rdi, 2, 1*8)) // prefetch c + 5*rs_c mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) vmovupd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rax, r8, 4), ymm3) vfmadd231pd(ymm0, ymm3, ymm12) vfmadd231pd(ymm1, ymm3, ymm13) vmovupd(mem(rax, r15, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) // ---------------------------------- iteration 1 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) vmovupd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rax, r8, 4), ymm3) vfmadd231pd(ymm0, ymm3, ymm12) vfmadd231pd(ymm1, ymm3, ymm13) vmovupd(mem(rax, r15, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) // ---------------------------------- iteration 2 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) vmovupd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rax, r8, 4), ymm3) vfmadd231pd(ymm0, ymm3, ymm12) vfmadd231pd(ymm1, ymm3, ymm13) vmovupd(mem(rax, r15, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) // ---------------------------------- iteration 3 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) vmovupd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rax, r8, 4), ymm3) vfmadd231pd(ymm0, ymm3, ymm12) vfmadd231pd(ymm1, ymm3, ymm13) vmovupd(mem(rax, r15, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) vmovupd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rax, r8, 4), ymm3) vfmadd231pd(ymm0, ymm3, ymm12) vfmadd231pd(ymm1, ymm3, ymm13) vmovupd(mem(rax, r15, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rbx ), xmm0) vmovsd(mem(rbx, r11, 1), xmm1) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vmovsd(mem(rax ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovsd(mem(rax, r8, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovsd(mem(rax, r8, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) vmovsd(mem(rax, r13, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovsd(mem(rax, r8, 4), xmm3) vfmadd231pd(ymm0, ymm3, ymm12) vfmadd231pd(ymm1, ymm3, ymm13) vmovsd(mem(rax, r15, 1), xmm3) add(imm(1*8), rax) // a += 1*cs_a = 1*8; vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm5 // ymm6 ymm7 // ymm8 ymm9 // ymm10 ymm11 // ymm12 ymm13 // ymm14 ymm15 vhaddpd( ymm5, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm4 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm5) vhaddpd( ymm7, ymm6, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm6 ) vhaddpd( ymm9, ymm8, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm8 ) vhaddpd( ymm11, ymm10, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm10 ) vhaddpd( ymm13, ymm12, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm12 ) vhaddpd( ymm15, ymm14, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm14 ) // xmm4 = sum(ymm4) sum(ymm5) // xmm6 = sum(ymm6) sum(ymm7) // xmm8 = sum(ymm8) sum(ymm9) // xmm10 = sum(ymm10) sum(ymm11) // xmm12 = sum(ymm12) sum(ymm13) // xmm14 = sum(ymm14) sum(ymm15) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(xmm0, xmm4, xmm4) // scale by alpha vmulpd(xmm0, xmm6, xmm6) vmulpd(xmm0, xmm8, xmm8) vmulpd(xmm0, xmm10, xmm10) vmulpd(xmm0, xmm12, xmm12) vmulpd(xmm0, xmm14, xmm14) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), xmm3, xmm4) vmovupd(xmm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm6) vmovupd(xmm6, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm8) vmovupd(xmm8, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm10) vmovupd(xmm10, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm12) vmovupd(xmm12, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm14) vmovupd(xmm14, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(xmm4, mem(rcx)) add(rdi, rcx) vmovupd(xmm6, mem(rcx)) add(rdi, rcx) vmovupd(xmm8, mem(rcx)) add(rdi, rcx) vmovupd(xmm10, mem(rcx)) add(rdi, rcx) vmovupd(xmm12, mem(rcx)) add(rdi, rcx) vmovupd(xmm14, mem(rcx)) //add(rdi, rcx) label(.DDONE) lea(mem(r12, rdi, 4), r12) // lea(mem(r12, rdi, 2), r12) // c_ii = r12 += 6*rs_c lea(mem(r14, r8, 4), r14) // lea(mem(r14, r8, 2), r14) // a_ii = r14 += 6*rs_a dec(r9) // ii -= 1; jne(.DLOOP3X4I) // iterate again if ii != 0. label(.DRETURN) end_asm( : // output operands (none) : // input operands [m_iter] "m" (m_iter), [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) consider_edge_cases: // Handle edge cases in the m dimension, if they exist. if ( m_left ) { const dim_t nr_cur = 2; const dim_t i_edge = m0 - ( dim_t )m_left; double* restrict cij = c + i_edge*rs_c; double* restrict bj = b; double* restrict ai = a + i_edge*rs_a; if ( 3 <= m_left ) { const dim_t mr_cur = 3; bli_dgemmsup_rd_haswell_asm_3x2 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += mr_cur*rs_c0; ai += mr_cur*rs_a0; m_left -= mr_cur; } if ( 2 <= m_left ) { const dim_t mr_cur = 2; bli_dgemmsup_rd_haswell_asm_2x2 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += mr_cur*rs_c0; ai += mr_cur*rs_a0; m_left -= mr_cur; } if ( 1 == m_left ) { const dim_t mr_cur = 1; bli_dgemmsup_rd_haswell_asm_1x2 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); } } } void bli_dgemmsup_rd_haswell_asm_3x2 ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; /* rrc: -------- -- -- -- | | -------- -- -- -- ... | | -------- += -- -- -- | | -------- -- -- -- | | -------- -- -- -- : -------- -- -- -- : */ // ------------------------------------------------------------------------- begin_asm() vzeroall() // zero all xmm/ymm registers. mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) //lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b // initialize loop by pre-loading // a column of a. mov(var(c), rcx) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) //lea(mem(rcx, rdi, 2), rdx) // //lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 7*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 7*8)) // prefetch c + 2*rs_c mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) // ---------------------------------- iteration 1 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) // ---------------------------------- iteration 2 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) // ---------------------------------- iteration 3 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rbx ), xmm0) vmovsd(mem(rbx, r11, 1), xmm1) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vmovsd(mem(rax ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovsd(mem(rax, r8, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovsd(mem(rax, r8, 2), xmm3) add(imm(1*8), rax) // a += 1*cs_a = 1*8; vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm5 // ymm6 ymm7 // ymm8 ymm9 vhaddpd( ymm5, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm4 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm5) vhaddpd( ymm7, ymm6, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm6 ) vhaddpd( ymm9, ymm8, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm8 ) // xmm4 = sum(ymm4) sum(ymm5) // xmm6 = sum(ymm6) sum(ymm7) // xmm8 = sum(ymm8) sum(ymm9) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(xmm0, xmm4, xmm4) // scale by alpha vmulpd(xmm0, xmm6, xmm6) vmulpd(xmm0, xmm8, xmm8) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), xmm3, xmm4) vmovupd(xmm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm6) vmovupd(xmm6, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm8) vmovupd(xmm8, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(xmm4, mem(rcx)) add(rdi, rcx) vmovupd(xmm6, mem(rcx)) add(rdi, rcx) vmovupd(xmm8, mem(rcx)) //add(rdi, rcx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rd_haswell_asm_2x2 ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; /* rrc: -------- -- -- -- | | -------- -- -- -- ... | | -------- += -- -- -- | | -------- -- -- -- | | -------- -- -- -- : -------- -- -- -- : */ // ------------------------------------------------------------------------- begin_asm() #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm6, ymm6, ymm6) vxorpd(ymm7, ymm7, ymm7) #endif mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) //lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b // initialize loop by pre-loading // a column of a. mov(var(c), rcx) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) //lea(mem(rcx, rdi, 2), rdx) // //lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 1*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 1*8)) // prefetch c + 1*rs_c mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) // ---------------------------------- iteration 1 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) // ---------------------------------- iteration 2 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) // ---------------------------------- iteration 3 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rbx ), xmm0) vmovsd(mem(rbx, r11, 1), xmm1) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vmovsd(mem(rax ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovsd(mem(rax, r8, 1), xmm3) add(imm(1*8), rax) // a += 1*cs_a = 1*8; vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm5 // ymm6 ymm7 vhaddpd( ymm5, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm4 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm5) vhaddpd( ymm7, ymm6, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm6 ) // xmm4 = sum(ymm4) sum(ymm5) // xmm6 = sum(ymm6) sum(ymm7) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(xmm0, xmm4, xmm4) // scale by alpha vmulpd(xmm0, xmm6, xmm6) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), xmm3, xmm4) vmovupd(xmm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm6) vmovupd(xmm6, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(xmm4, mem(rcx)) add(rdi, rcx) vmovupd(xmm6, mem(rcx)) //add(rdi, rcx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rd_haswell_asm_1x2 ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; /* rrc: -------- -- -- -- | | -------- -- -- -- ... | | -------- += -- -- -- | | -------- -- -- -- | | -------- -- -- -- : -------- -- -- -- : */ // ------------------------------------------------------------------------- begin_asm() #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) #endif mov(var(a), rax) // load address of a. //mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a //lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) //lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b // initialize loop by pre-loading // a column of a. mov(var(c), rcx) // load address of c //mov(var(rs_c), rdi) // load rs_c //lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) //lea(mem(rcx, rdi, 2), rdx) // //lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 1*8)) // prefetch c + 0*rs_c mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) // ---------------------------------- iteration 1 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) // ---------------------------------- iteration 2 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) // ---------------------------------- iteration 3 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rbx ), xmm0) vmovsd(mem(rbx, r11, 1), xmm1) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vmovsd(mem(rax ), xmm3) add(imm(1*8), rax) // a += 1*cs_a = 1*8; vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm5 vhaddpd( ymm5, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm4 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm5) // xmm4 = sum(ymm4) sum(ymm5) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(xmm0, xmm4, xmm4) // scale by alpha //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), xmm3, xmm4) vmovupd(xmm4, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(xmm4, mem(rcx)) //add(rdi, rcx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } blis-0.6.1/kernels/haswell/3/sup/bli_gemmsup_rd_haswell_asm_d6x8m.c000066400000000000000000001453111360743507500253020ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define BLIS_ASM_SYNTAX_ATT #include "bli_x86_asm_macros.h" /* rrc: -------- ------ | | | | | | | | -------- ------ | | | | | | | | -------- += ------ ... | | | | | | | | -------- ------ | | | | | | | | -------- ------ : -------- ------ : Assumptions: - C is row-stored and B is column-stored; - A is row-stored; - m0 and n0 are at most MR and NR, respectively. Therefore, this (r)ow-preferential microkernel is well-suited for a dot-product-based accumulation that performs vector loads from both A and B. NOTE: These kernels implicitly support column-oriented IO, implemented via an a high-level transposition of the entire operation. A and B will effectively remain row- and column-stored, respectively, but C will then effectively appear column-stored. Thus, this kernel may be used for both rrc and crc cases. */ // Prototype reference microkernels. GEMMSUP_KER_PROT( double, d, gemmsup_r_haswell_ref ) #if 0 // Define parameters and variables for edge case kernel map. #define NUM_MR 4 #define NUM_NR 4 #define FUNCPTR_T dgemmsup_ker_ft static dim_t mrs[NUM_MR] = { 6, 3, 2, 1 }; static dim_t nrs[NUM_NR] = { 8, 4, 2, 1 }; static FUNCPTR_T kmap[NUM_MR][NUM_NR] = { /* 8 4 2 1 */ /* 6 */ { bli_dgemmsup_rd_haswell_asm_6x8m, bli_dgemmsup_rd_haswell_asm_6x4m, bli_dgemmsup_rd_haswell_asm_6x2m, bli_dgemmsup_r_haswell_ref_6x1 }, /* 3 */ { bli_dgemmsup_rd_haswell_asm_3x8m, bli_dgemmsup_rd_haswell_asm_3x4m, bli_dgemmsup_rd_haswell_asm_3x2m, bli_dgemmsup_r_haswell_ref_3x1 }, /* 2 */ { bli_dgemmsup_rd_haswell_asm_2x8m, bli_dgemmsup_rd_haswell_asm_2x4m, bli_dgemmsup_rd_haswell_asm_2x2m, bli_dgemmsup_r_haswell_ref_2x1 }, /* 1 */ { bli_dgemmsup_rd_haswell_asm_1x8m, bli_dgemmsup_rd_haswell_asm_1x4m, bli_dgemmsup_rd_haswell_asm_1x2m, bli_dgemmsup_r_haswell_ref_1x1 } }; #endif void bli_dgemmsup_rd_haswell_asm_6x8m ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { uint64_t n_left = n0 % 8; // First check whether this is a edge case in the n dimension. If so, // dispatch other 6x?m kernels, as needed. if ( n_left ) { double* restrict cij = c; double* restrict bj = b; double* restrict ai = a; if ( 4 <= n_left ) { const dim_t nr_cur = 4; bli_dgemmsup_rd_haswell_asm_6x4m ( conja, conjb, m0, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += nr_cur*cs_c0; bj += nr_cur*cs_b0; n_left -= nr_cur; } if ( 2 <= n_left ) { const dim_t nr_cur = 2; bli_dgemmsup_rd_haswell_asm_6x2m ( conja, conjb, m0, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += nr_cur*cs_c0; bj += nr_cur*cs_b0; n_left -= nr_cur; } if ( 1 == n_left ) { #if 0 const dim_t nr_cur = 1; bli_dgemmsup_r_haswell_ref ( conja, conjb, m0, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); #else bli_dgemv_ex ( BLIS_NO_TRANSPOSE, conjb, m0, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, beta, cij, rs_c0, cntx, NULL ); #endif } return; } //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t m_iter = m0 / 3; uint64_t m_left = m0 % 3; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; if ( m_iter == 0 ) goto consider_edge_cases; // ------------------------------------------------------------------------- begin_asm() //vzeroall() // zero all xmm/ymm registers. //mov(var(a), r14) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rdx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b lea(mem(r8, r8, 2), r10) // r10 = 3*rs_a //mov(var(c), r12) // load address of c //mov(var(rs_c), rdi) // load rs_c //lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) // r12 = rcx = c // r14 = rax = a // rdx = rbx = b // r9 = m dim index ii // r15 = n dim index jj mov(imm(0), r15) // jj = 0; label(.DLOOP3X4J) // LOOP OVER jj = [ 0 1 ... ] mov(var(a), r14) // load address of a mov(var(c), r12) // load address of c lea(mem( , r15, 1), rsi) // rsi = r15 = 4*jj; imul(imm(1*8), rsi) // rsi *= cs_c = 1*8 lea(mem(r12, rsi, 1), r12) // r12 = c + 4*jj*cs_c; lea(mem( , r15, 1), rsi) // rsi = r15 = 4*jj; imul(r11, rsi) // rsi *= cs_b; lea(mem(rdx, rsi, 1), rdx) // rbx = b + 4*jj*cs_b; mov(var(m_iter), r9) // ii = m_iter; label(.DLOOP3X4I) // LOOP OVER ii = [ m_iter ... 1 0 ] #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm6, ymm6, ymm6) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm8, ymm8, ymm8) vxorpd(ymm9, ymm9, ymm9) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm11, ymm11, ymm11) vxorpd(ymm12, ymm12, ymm12) vxorpd(ymm13, ymm13, ymm13) vxorpd(ymm14, ymm14, ymm14) vxorpd(ymm15, ymm15, ymm15) #endif lea(mem(r12), rcx) // rcx = c_iijj; lea(mem(r14), rax) // rax = a_ii; lea(mem(rdx), rbx) // rbx = b_jj; #if 0 mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 3*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 3*8)) // prefetch c + 2*rs_c #endif lea(mem(r8, r8, 4), rdi) // rdi = 5*rs_a mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 #if 1 prefetch(0, mem(rax, r10, 1, 0*8)) // prefetch rax + 3*cs_a prefetch(0, mem(rax, r8, 4, 0*8)) // prefetch rax + 4*cs_a prefetch(0, mem(rax, rdi, 1, 0*8)) // prefetch rax + 5*cs_a #endif vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 1 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 2 #if 1 prefetch(0, mem(rax, r10, 1, 0*8)) // prefetch rax + 3*cs_a prefetch(0, mem(rax, r8, 4, 0*8)) // prefetch rax + 4*cs_a prefetch(0, mem(rax, rdi, 1, 0*8)) // prefetch rax + 5*cs_a #endif vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 3 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) #if 1 prefetch(0, mem(rax, r10, 1, 0*8)) // prefetch rax + 3*cs_a prefetch(0, mem(rax, r8, 4, 0*8)) // prefetch rax + 4*cs_a prefetch(0, mem(rax, rdi, 1, 0*8)) // prefetch rax + 5*cs_a #endif vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rax ), xmm0) vmovsd(mem(rax, r8, 1), xmm1) vmovsd(mem(rax, r8, 2), xmm2) add(imm(1*8), rax) // a += 1*cs_b = 1*8; vmovsd(mem(rbx ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovsd(mem(rbx, r11, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovsd(mem(rbx, r11, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovsd(mem(rbx, r13, 1), xmm3) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm7 ymm10 ymm13 // ymm5 ymm8 ymm11 ymm14 // ymm6 ymm9 ymm12 ymm15 vhaddpd( ymm7, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm7) vhaddpd( ymm13, ymm10, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) // xmm2[0] = sum(ymm10); xmm2[1] = sum(ymm13) vperm2f128(imm(0x20), ymm2, ymm0, ymm4 ) vhaddpd( ymm8, ymm5, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm14, ymm11, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm5 ) vhaddpd( ymm9, ymm6, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm15, ymm12, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm6 ) // ymm4 = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13) // ymm5 = sum(ymm5) sum(ymm8) sum(ymm11) sum(ymm14) // ymm6 = sum(ymm6) sum(ymm9) sum(ymm12) sum(ymm15) mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) vmulpd(ymm0, ymm6, ymm6) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm5) vmovupd(ymm5, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm6) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vmovupd(ymm5, mem(rcx)) add(rdi, rcx) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) label(.DDONE) lea(mem(r12, rdi, 2), r12) // lea(mem(r12, rdi, 1), r12) // c_ii = r12 += 3*rs_c lea(mem(r14, r8, 2), r14) // lea(mem(r14, r8, 1), r14) // a_ii = r14 += 3*rs_a dec(r9) // ii -= 1; jne(.DLOOP3X4I) // iterate again if ii != 0. add(imm(4), r15) // jj += 4; cmp(imm(4), r15) // compare jj to 4 jle(.DLOOP3X4J) // if jj <= 4, jump to beginning // of jj loop; otherwise, loop ends. label(.DRETURN) end_asm( : // output operands (none) : // input operands [m_iter] "m" (m_iter), [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) consider_edge_cases: // Handle edge cases in the m dimension, if they exist. if ( m_left ) { const dim_t nr_cur = 8; const dim_t i_edge = m0 - ( dim_t )m_left; double* restrict cij = c + i_edge*rs_c; double* restrict bj = b; double* restrict ai = a + i_edge*rs_a; if ( 2 == m_left ) { const dim_t mr_cur = 2; bli_dgemmsup_rd_haswell_asm_2x8 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); //cij += mr_cur*rs_c0; ai += mr_cur*rs_a0; m_left -= mr_cur; } if ( 1 == m_left ) { const dim_t mr_cur = 1; bli_dgemmsup_rd_haswell_asm_1x8 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); } } } void bli_dgemmsup_rd_haswell_asm_6x4m ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t m_iter = m0 / 3; uint64_t m_left = m0 % 3; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; if ( m_iter == 0 ) goto consider_edge_cases; // ------------------------------------------------------------------------- begin_asm() //vzeroall() // zero all xmm/ymm registers. mov(var(a), r14) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rdx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b lea(mem(r8, r8, 2), r10) // r10 = 3*rs_a mov(var(c), r12) // load address of c //mov(var(rs_c), rdi) // load rs_c //lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) // r12 = rcx = c // r14 = rax = a // rdx = rbx = b // r9 = m dim index ii // r15 = n dim index jj // r10 = unused mov(var(m_iter), r9) // ii = m_iter; label(.DLOOP3X4I) // LOOP OVER ii = [ m_iter .. 1 0 ] #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm6, ymm6, ymm6) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm8, ymm8, ymm8) vxorpd(ymm9, ymm9, ymm9) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm11, ymm11, ymm11) vxorpd(ymm12, ymm12, ymm12) vxorpd(ymm13, ymm13, ymm13) vxorpd(ymm14, ymm14, ymm14) vxorpd(ymm15, ymm15, ymm15) #endif lea(mem(r12), rcx) // rcx = c + 3*ii*rs_c; lea(mem(r14), rax) // rax = a + 3*ii*rs_a; lea(mem(rdx), rbx) // rbx = b; #if 0 mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 3*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 3*8)) // prefetch c + 2*rs_c #endif lea(mem(r8, r8, 4), rdi) // rdi = 5*rs_a mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 #if 1 prefetch(0, mem(rax, r10, 1, 0*8)) // prefetch rax + 3*cs_a prefetch(0, mem(rax, r8, 4, 0*8)) // prefetch rax + 4*cs_a prefetch(0, mem(rax, rdi, 1, 0*8)) // prefetch rax + 5*cs_a #endif vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 1 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 2 #if 1 prefetch(0, mem(rax, r10, 1, 0*8)) // prefetch rax + 3*cs_a prefetch(0, mem(rax, r8, 4, 0*8)) // prefetch rax + 4*cs_a prefetch(0, mem(rax, rdi, 1, 0*8)) // prefetch rax + 5*cs_a #endif vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 3 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rax ), xmm0) vmovsd(mem(rax, r8, 1), xmm1) vmovsd(mem(rax, r8, 2), xmm2) add(imm(1*8), rax) // a += 1*cs_b = 1*8; vmovsd(mem(rbx ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovsd(mem(rbx, r11, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovsd(mem(rbx, r11, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovsd(mem(rbx, r13, 1), xmm3) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm7 ymm10 ymm13 // ymm5 ymm8 ymm11 ymm14 // ymm6 ymm9 ymm12 ymm15 vhaddpd( ymm7, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm7) vhaddpd( ymm13, ymm10, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) // xmm2[0] = sum(ymm10); xmm2[1] = sum(ymm13) vperm2f128(imm(0x20), ymm2, ymm0, ymm4 ) vhaddpd( ymm8, ymm5, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm14, ymm11, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm5 ) vhaddpd( ymm9, ymm6, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm15, ymm12, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm6 ) // ymm4 = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13) // ymm5 = sum(ymm5) sum(ymm8) sum(ymm11) sum(ymm14) // ymm6 = sum(ymm6) sum(ymm9) sum(ymm12) sum(ymm15) mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) vmulpd(ymm0, ymm6, ymm6) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm5) vmovupd(ymm5, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm6) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vmovupd(ymm5, mem(rcx)) add(rdi, rcx) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) label(.DDONE) lea(mem(r12, rdi, 2), r12) // lea(mem(r12, rdi, 1), r12) // c_ii = r12 += 3*rs_c lea(mem(r14, r8, 2), r14) // lea(mem(r14, r8, 1), r14) // a_ii = r14 += 3*rs_a dec(r9) // ii -= 1; jne(.DLOOP3X4I) // iterate again if ii != 0. label(.DRETURN) end_asm( : // output operands (none) : // input operands [m_iter] "m" (m_iter), [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) consider_edge_cases: // Handle edge cases in the m dimension, if they exist. if ( m_left ) { const dim_t nr_cur = 4; const dim_t i_edge = m0 - ( dim_t )m_left; double* restrict cij = c + i_edge*rs_c; double* restrict bj = b; double* restrict ai = a + i_edge*rs_a; if ( 2 == m_left ) { const dim_t mr_cur = 2; bli_dgemmsup_rd_haswell_asm_2x4 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); //cij += mr_cur*rs_c0; ai += mr_cur*rs_a0; m_left -= mr_cur; } if ( 1 == m_left ) { const dim_t mr_cur = 1; bli_dgemmsup_rd_haswell_asm_1x4 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); } } } void bli_dgemmsup_rd_haswell_asm_6x2m ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t m_iter = m0 / 6; uint64_t m_left = m0 % 6; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; if ( m_iter == 0 ) goto consider_edge_cases; // ------------------------------------------------------------------------- begin_asm() //vzeroall() // zero all xmm/ymm registers. mov(var(a), r14) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rdx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) //lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b mov(var(c), r12) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) // r12 = rcx = c // r14 = rax = a // rdx = rbx = b // r9 = m dim index ii mov(var(m_iter), r9) // ii = m_iter; label(.DLOOP3X4I) // LOOP OVER ii = [ m_iter ... 1 0 ] #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm6, ymm6, ymm6) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm8, ymm8, ymm8) vxorpd(ymm9, ymm9, ymm9) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm11, ymm11, ymm11) vxorpd(ymm12, ymm12, ymm12) vxorpd(ymm13, ymm13, ymm13) vxorpd(ymm14, ymm14, ymm14) vxorpd(ymm15, ymm15, ymm15) #endif lea(mem(r12), rcx) // rcx = c + 6*ii*rs_c; lea(mem(r14), rax) // rax = a + 6*ii*rs_a; lea(mem(rdx), rbx) // rbx = b; lea(mem(rcx, rdi, 2), r10) // lea(mem(r10, rdi, 1), r10) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 1*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 1*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 1*8)) // prefetch c + 2*rs_c prefetch(0, mem(r10, 1*8)) // prefetch c + 3*rs_c prefetch(0, mem(r10, rdi, 1, 1*8)) // prefetch c + 4*rs_c prefetch(0, mem(r10, rdi, 2, 1*8)) // prefetch c + 5*rs_c mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) vmovupd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rax, r8, 4), ymm3) vfmadd231pd(ymm0, ymm3, ymm12) vfmadd231pd(ymm1, ymm3, ymm13) vmovupd(mem(rax, r15, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) // ---------------------------------- iteration 1 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) vmovupd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rax, r8, 4), ymm3) vfmadd231pd(ymm0, ymm3, ymm12) vfmadd231pd(ymm1, ymm3, ymm13) vmovupd(mem(rax, r15, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) // ---------------------------------- iteration 2 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) vmovupd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rax, r8, 4), ymm3) vfmadd231pd(ymm0, ymm3, ymm12) vfmadd231pd(ymm1, ymm3, ymm13) vmovupd(mem(rax, r15, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) // ---------------------------------- iteration 3 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) vmovupd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rax, r8, 4), ymm3) vfmadd231pd(ymm0, ymm3, ymm12) vfmadd231pd(ymm1, ymm3, ymm13) vmovupd(mem(rax, r15, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) vmovupd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rax, r8, 4), ymm3) vfmadd231pd(ymm0, ymm3, ymm12) vfmadd231pd(ymm1, ymm3, ymm13) vmovupd(mem(rax, r15, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rbx ), xmm0) vmovsd(mem(rbx, r11, 1), xmm1) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vmovsd(mem(rax ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovsd(mem(rax, r8, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovsd(mem(rax, r8, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) vmovsd(mem(rax, r13, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovsd(mem(rax, r8, 4), xmm3) vfmadd231pd(ymm0, ymm3, ymm12) vfmadd231pd(ymm1, ymm3, ymm13) vmovsd(mem(rax, r15, 1), xmm3) add(imm(1*8), rax) // a += 1*cs_a = 1*8; vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm5 // ymm6 ymm7 // ymm8 ymm9 // ymm10 ymm11 // ymm12 ymm13 // ymm14 ymm15 vhaddpd( ymm5, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm4 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm5) vhaddpd( ymm7, ymm6, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm6 ) vhaddpd( ymm9, ymm8, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm8 ) vhaddpd( ymm11, ymm10, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm10 ) vhaddpd( ymm13, ymm12, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm12 ) vhaddpd( ymm15, ymm14, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm14 ) // xmm4 = sum(ymm4) sum(ymm5) // xmm6 = sum(ymm6) sum(ymm7) // xmm8 = sum(ymm8) sum(ymm9) // xmm10 = sum(ymm10) sum(ymm11) // xmm12 = sum(ymm12) sum(ymm13) // xmm14 = sum(ymm14) sum(ymm15) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(xmm0, xmm4, xmm4) // scale by alpha vmulpd(xmm0, xmm6, xmm6) vmulpd(xmm0, xmm8, xmm8) vmulpd(xmm0, xmm10, xmm10) vmulpd(xmm0, xmm12, xmm12) vmulpd(xmm0, xmm14, xmm14) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), xmm3, xmm4) vmovupd(xmm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm6) vmovupd(xmm6, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm8) vmovupd(xmm8, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm10) vmovupd(xmm10, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm12) vmovupd(xmm12, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm14) vmovupd(xmm14, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(xmm4, mem(rcx)) add(rdi, rcx) vmovupd(xmm6, mem(rcx)) add(rdi, rcx) vmovupd(xmm8, mem(rcx)) add(rdi, rcx) vmovupd(xmm10, mem(rcx)) add(rdi, rcx) vmovupd(xmm12, mem(rcx)) add(rdi, rcx) vmovupd(xmm14, mem(rcx)) //add(rdi, rcx) label(.DDONE) lea(mem(r12, rdi, 4), r12) // lea(mem(r12, rdi, 2), r12) // c_ii = r12 += 6*rs_c lea(mem(r14, r8, 4), r14) // lea(mem(r14, r8, 2), r14) // a_ii = r14 += 6*rs_a dec(r9) // ii -= 1; jne(.DLOOP3X4I) // iterate again if ii != 0. label(.DRETURN) end_asm( : // output operands (none) : // input operands [m_iter] "m" (m_iter), [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) consider_edge_cases: // Handle edge cases in the m dimension, if they exist. if ( m_left ) { const dim_t nr_cur = 2; const dim_t i_edge = m0 - ( dim_t )m_left; double* restrict cij = c + i_edge*rs_c; double* restrict bj = b; double* restrict ai = a + i_edge*rs_a; if ( 3 <= m_left ) { const dim_t mr_cur = 3; bli_dgemmsup_rd_haswell_asm_3x2 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += mr_cur*rs_c0; ai += mr_cur*rs_a0; m_left -= mr_cur; } if ( 2 <= m_left ) { const dim_t mr_cur = 2; bli_dgemmsup_rd_haswell_asm_2x2 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += mr_cur*rs_c0; ai += mr_cur*rs_a0; m_left -= mr_cur; } if ( 1 == m_left ) { const dim_t mr_cur = 1; bli_dgemmsup_rd_haswell_asm_1x2 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); } } } blis-0.6.1/kernels/haswell/3/sup/bli_gemmsup_rd_haswell_asm_d6x8n.c000066400000000000000000001771661360743507500253200ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define BLIS_ASM_SYNTAX_ATT #include "bli_x86_asm_macros.h" /* rrc: -------- ------ | | | | | | | | -------- ------ | | | | | | | | -------- += ------ ... | | | | | | | | -------- ------ | | | | | | | | -------- ------ : -------- ------ : Assumptions: - C is row-stored and B is column-stored; - A is row-stored; - m0 and n0 are at most MR and NR, respectively. Therefore, this (r)ow-preferential microkernel is well-suited for a dot-product-based accumulation that performs vector loads from both A and B. NOTE: These kernels implicitly support column-oriented IO, implemented via an a high-level transposition of the entire operation. A and B will effectively remain row- and column-stored, respectively, but C will then effectively appear column-stored. Thus, this kernel may be used for both rrc and crc cases. */ // Prototype reference microkernels. GEMMSUP_KER_PROT( double, d, gemmsup_r_haswell_ref ) #if 0 // Define parameters and variables for edge case kernel map. #define NUM_MR 4 #define NUM_NR 4 #define FUNCPTR_T dgemmsup_ker_ft static dim_t mrs[NUM_MR] = { 6, 3, 2, 1 }; static dim_t nrs[NUM_NR] = { 8, 4, 2, 1 }; static FUNCPTR_T kmap[NUM_MR][NUM_NR] = { /* 8 4 2 1 */ /* 6 */ { bli_dgemmsup_rd_haswell_asm_6x8n, bli_dgemmsup_rd_haswell_asm_6x4n, bli_dgemmsup_rd_haswell_asm_6x2n, bli_dgemmsup_r_haswell_ref_6x1 }, /* 3 */ { bli_dgemmsup_rd_haswell_asm_3x8n, bli_dgemmsup_rd_haswell_asm_3x4n, bli_dgemmsup_rd_haswell_asm_3x2n, bli_dgemmsup_r_haswell_ref_3x1 }, /* 2 */ { bli_dgemmsup_rd_haswell_asm_2x8n, bli_dgemmsup_rd_haswell_asm_2x4n, bli_dgemmsup_rd_haswell_asm_2x2n, bli_dgemmsup_r_haswell_ref_2x1 }, /* 1 */ { bli_dgemmsup_rd_haswell_asm_1x8n, bli_dgemmsup_rd_haswell_asm_1x4n, bli_dgemmsup_rd_haswell_asm_1x2n, bli_dgemmsup_r_haswell_ref_1x1 } }; #endif void bli_dgemmsup_rd_haswell_asm_6x8n ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { uint64_t m_left = m0 % 6; // First check whether this is a edge case in the n dimension. If so, // dispatch other ?x8m kernels, as needed. if ( m_left ) { double* restrict cij = c; double* restrict bj = b; double* restrict ai = a; #if 1 // We add special handling for slightly inflated MR blocksizes // at edge cases, up to a maximum of 9. if ( 6 < m0 ) { dgemmsup_ker_ft ker_fp1 = NULL; dgemmsup_ker_ft ker_fp2 = NULL; dim_t mr1, mr2; if ( m0 == 7 ) { mr1 = 6; mr2 = 1; ker_fp1 = bli_dgemmsup_rd_haswell_asm_6x8n; ker_fp2 = bli_dgemmsup_rd_haswell_asm_1x8n; } else if ( m0 == 8 ) { mr1 = 6; mr2 = 2; ker_fp1 = bli_dgemmsup_rd_haswell_asm_6x8n; ker_fp2 = bli_dgemmsup_rd_haswell_asm_2x8n; } else // if ( m0 == 9 ) { mr1 = 6; mr2 = 3; ker_fp1 = bli_dgemmsup_rd_haswell_asm_6x8n; ker_fp2 = bli_dgemmsup_rd_haswell_asm_3x8n; } ker_fp1 ( conja, conjb, mr1, n0, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += mr1*rs_c0; ai += mr1*rs_a0; ker_fp2 ( conja, conjb, mr2, n0, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); return; } #endif if ( 3 <= m_left ) { const dim_t mr_cur = 3; bli_dgemmsup_rd_haswell_asm_3x8n ( conja, conjb, mr_cur, n0, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += mr_cur*rs_c0; ai += mr_cur*rs_a0; m_left -= mr_cur; } if ( 2 <= m_left ) { const dim_t mr_cur = 2; bli_dgemmsup_rd_haswell_asm_2x8n ( conja, conjb, mr_cur, n0, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += mr_cur*rs_c0; ai += mr_cur*rs_a0; m_left -= mr_cur; } if ( 1 == m_left ) { #if 0 const dim_t mr_cur = 1; bli_dgemmsup_rd_haswell_asm_1x8n ( conja, conjb, mr_cur, n0, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); #else bli_dgemv_ex ( BLIS_TRANSPOSE, conja, k0, n0, alpha, bj, rs_b0, cs_b0, ai, cs_a0, beta, cij, cs_c0, cntx, NULL ); #endif } return; } //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t n_iter = n0 / 4; uint64_t n_left = n0 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; if ( n_iter == 0 ) goto consider_edge_cases; // ------------------------------------------------------------------------- begin_asm() //vzeroall() // zero all xmm/ymm registers. mov(var(a), rdx) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a //mov(var(b), r14) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b //mov(var(c), r12) // load address of c //mov(var(rs_c), rdi) // load rs_c //lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) // r12 = rcx = c // rdx = rax = a // r14 = rbx = b // r9 = m dim index ii // r15 = n dim index jj mov(imm(0), r9) // ii = 0; label(.DLOOP3X4I) // LOOP OVER ii = [ 0 1 ... ] mov(var(b), r14) // load address of b mov(var(c), r12) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) lea(mem( , r9, 1), rsi) // rsi = r9 = 3*ii; imul(rdi, rsi) // rsi *= rs_c lea(mem(r12, rsi, 1), r12) // r12 = c + 3*ii*rs_c; lea(mem( , r9, 1), rsi) // rsi = r9 = 3*ii; imul(r8, rsi) // rsi *= rs_a; lea(mem(rdx, rsi, 1), rdx) // rax = a + 3*ii*rs_a; mov(var(n_iter), r15) // jj = n_iter; label(.DLOOP3X4J) // LOOP OVER jj = [ n_iter ... 1 0 ] #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm6, ymm6, ymm6) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm8, ymm8, ymm8) vxorpd(ymm9, ymm9, ymm9) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm11, ymm11, ymm11) vxorpd(ymm12, ymm12, ymm12) vxorpd(ymm13, ymm13, ymm13) vxorpd(ymm14, ymm14, ymm14) vxorpd(ymm15, ymm15, ymm15) #endif lea(mem(r12), rcx) // rcx = c_iijj; lea(mem(rdx), rax) // rax = a_ii; lea(mem(r14), rbx) // rbx = b_jj; #if 1 mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 3*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 3*8)) // prefetch c + 2*rs_c #endif lea(mem(r11, r11, 2), rdi) // rdi = 3*cs_b lea(mem(rbx, r11, 4), r10) // r10 = rbx + 4*cs_b mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 #if 0 prefetch(0, mem(r10, 0*8)) // prefetch rbx + 4*cs_b prefetch(0, mem(r10, r11, 1, 0*8)) // prefetch rbx + 5*cs_b prefetch(0, mem(r10, r11, 2, 0*8)) // prefetch rbx + 6*cs_b prefetch(0, mem(r10, r13, 1, 0*8)) // prefetch rbx + 7*cs_b add(imm(8*8), r10) // r10 += 8*rs_b = 8*8; #else prefetch(0, mem(r10, 0*8)) // prefetch rbx + 4*cs_b prefetch(0, mem(r10, r11, 1, 0*8)) // prefetch rbx + 5*cs_b #endif vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 1 #if 1 prefetch(0, mem(r10, r11, 2, 0*8)) // prefetch rbx + 6*cs_b prefetch(0, mem(r10, r13, 1, 0*8)) // prefetch rbx + 7*cs_b #endif vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 2 #if 1 prefetch(0, mem(r10, 8*8)) // prefetch rbx + 4*cs_b + 8*rs_b prefetch(0, mem(r10, r11, 1, 8*8)) // prefetch rbx + 5*cs_b + 8*rs_b #endif vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 3 #if 1 prefetch(0, mem(r10, r11, 2, 8*8)) // prefetch rbx + 6*cs_b + 8*rs_b prefetch(0, mem(r10, r13, 1, 8*8)) // prefetch rbx + 7*cs_b + 8*rs_b add(imm(16*8), r10) // r10 += 8*rs_b = 8*8; #endif vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rax ), xmm0) vmovsd(mem(rax, r8, 1), xmm1) vmovsd(mem(rax, r8, 2), xmm2) add(imm(1*8), rax) // a += 1*cs_b = 1*8; vmovsd(mem(rbx ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovsd(mem(rbx, r11, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovsd(mem(rbx, r11, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovsd(mem(rbx, r13, 1), xmm3) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm7 ymm10 ymm13 // ymm5 ymm8 ymm11 ymm14 // ymm6 ymm9 ymm12 ymm15 vhaddpd( ymm7, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm7) vhaddpd( ymm13, ymm10, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) // xmm2[0] = sum(ymm10); xmm2[1] = sum(ymm13) vperm2f128(imm(0x20), ymm2, ymm0, ymm4 ) vhaddpd( ymm8, ymm5, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm14, ymm11, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm5 ) vhaddpd( ymm9, ymm6, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm15, ymm12, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm6 ) // ymm4 = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13) // ymm5 = sum(ymm5) sum(ymm8) sum(ymm11) sum(ymm14) // ymm6 = sum(ymm6) sum(ymm9) sum(ymm12) sum(ymm15) mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) vmulpd(ymm0, ymm6, ymm6) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm5) vmovupd(ymm5, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm6) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vmovupd(ymm5, mem(rcx)) add(rdi, rcx) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) label(.DDONE) add(imm(4*8), r12) // c_jj = r12 += 4*cs_c lea(mem(r14, r11, 4), r14) // b_jj = r14 += 4*cs_b dec(r15) // jj -= 1; jne(.DLOOP3X4J) // iterate again if jj != 0. add(imm(3), r9) // ii += 3; cmp(imm(3), r9) // compare ii to 3 jle(.DLOOP3X4I) // if ii <= 3, jump to beginning // of ii loop; otherwise, loop ends. label(.DRETURN) end_asm( : // output operands (none) : // input operands [n_iter] "m" (n_iter), [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) consider_edge_cases: // Handle edge cases in the m dimension, if they exist. if ( n_left ) { const dim_t mr_cur = 6; const dim_t j_edge = n0 - ( dim_t )n_left; double* restrict cij = c + j_edge*cs_c; double* restrict ai = a; double* restrict bj = b + j_edge*cs_b; if ( 2 <= n_left ) { const dim_t nr_cur = 2; bli_dgemmsup_rd_haswell_asm_6x2 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += nr_cur*cs_c0; bj += nr_cur*cs_b0; n_left -= nr_cur; } if ( 1 == n_left ) { #if 0 const dim_t nr_cur = 1; //bli_dgemmsup_rd_haswell_asm_6x1n bli_dgemmsup_r_haswell_ref ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); #else bli_dgemv_ex ( BLIS_NO_TRANSPOSE, conjb, mr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, beta, cij, rs_c0, cntx, NULL ); #endif } } } void bli_dgemmsup_rd_haswell_asm_3x8n ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t n_iter = n0 / 4; uint64_t n_left = n0 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; if ( n_iter == 0 ) goto consider_edge_cases; // ------------------------------------------------------------------------- begin_asm() //vzeroall() // zero all xmm/ymm registers. mov(var(a), rdx) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), r14) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b mov(var(c), r12) // load address of c //mov(var(rs_c), rdi) // load rs_c //lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) // r12 = rcx = c // rdx = rax = a // r14 = rbx = b // r9 = unused // r15 = n dim index jj mov(var(n_iter), r15) // jj = n_iter; label(.DLOOP3X4J) // LOOP OVER jj = [ n_iter ... 1 0 ] #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm6, ymm6, ymm6) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm8, ymm8, ymm8) vxorpd(ymm9, ymm9, ymm9) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm11, ymm11, ymm11) vxorpd(ymm12, ymm12, ymm12) vxorpd(ymm13, ymm13, ymm13) vxorpd(ymm14, ymm14, ymm14) vxorpd(ymm15, ymm15, ymm15) #endif lea(mem(r12), rcx) // rcx = c_iijj; lea(mem(rdx), rax) // rax = a_ii; lea(mem(r14), rbx) // rbx = b_jj; #if 1 mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 3*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 3*8)) // prefetch c + 2*rs_c #endif lea(mem(r11, r11, 2), rdi) // rdi = 3*cs_b lea(mem(rbx, r11, 4), r10) // r10 = rbx + 4*cs_b mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 #if 0 prefetch(0, mem(r10, 0*8)) // prefetch rbx + 4*cs_b prefetch(0, mem(r10, r11, 1, 0*8)) // prefetch rbx + 5*cs_b prefetch(0, mem(r10, r11, 2, 0*8)) // prefetch rbx + 6*cs_b prefetch(0, mem(r10, r13, 1, 0*8)) // prefetch rbx + 7*cs_b add(imm(8*8), r10) // r10 += 8*rs_b = 8*8; #else prefetch(0, mem(r10, 0*8)) // prefetch rbx + 4*cs_b prefetch(0, mem(r10, r11, 1, 0*8)) // prefetch rbx + 5*cs_b #endif vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 1 #if 1 prefetch(0, mem(r10, r11, 2, 0*8)) // prefetch rbx + 6*cs_b prefetch(0, mem(r10, r13, 1, 0*8)) // prefetch rbx + 7*cs_b #endif vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 2 #if 1 prefetch(0, mem(r10, 8*8)) // prefetch rbx + 4*cs_b + 8*rs_b prefetch(0, mem(r10, r11, 1, 8*8)) // prefetch rbx + 5*cs_b + 8*rs_b #endif vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 3 #if 1 prefetch(0, mem(r10, r11, 2, 8*8)) // prefetch rbx + 6*cs_b + 8*rs_b prefetch(0, mem(r10, r13, 1, 8*8)) // prefetch rbx + 7*cs_b + 8*rs_b add(imm(16*8), r10) // r10 += 8*rs_b = 8*8; #endif vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rax ), xmm0) vmovsd(mem(rax, r8, 1), xmm1) vmovsd(mem(rax, r8, 2), xmm2) add(imm(1*8), rax) // a += 1*cs_b = 1*8; vmovsd(mem(rbx ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovsd(mem(rbx, r11, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovsd(mem(rbx, r11, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovsd(mem(rbx, r13, 1), xmm3) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm7 ymm10 ymm13 // ymm5 ymm8 ymm11 ymm14 // ymm6 ymm9 ymm12 ymm15 vhaddpd( ymm7, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm7) vhaddpd( ymm13, ymm10, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) // xmm2[0] = sum(ymm10); xmm2[1] = sum(ymm13) vperm2f128(imm(0x20), ymm2, ymm0, ymm4 ) vhaddpd( ymm8, ymm5, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm14, ymm11, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm5 ) vhaddpd( ymm9, ymm6, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm15, ymm12, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm6 ) // ymm4 = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13) // ymm5 = sum(ymm5) sum(ymm8) sum(ymm11) sum(ymm14) // ymm6 = sum(ymm6) sum(ymm9) sum(ymm12) sum(ymm15) mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) vmulpd(ymm0, ymm6, ymm6) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm5) vmovupd(ymm5, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm6) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vmovupd(ymm5, mem(rcx)) add(rdi, rcx) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) label(.DDONE) add(imm(4*8), r12) // c_jj = r12 += 4*cs_c lea(mem(r14, r11, 4), r14) // b_jj = r14 += 4*cs_b dec(r15) // jj -= 1; jne(.DLOOP3X4J) // iterate again if jj != 0. label(.DRETURN) end_asm( : // output operands (none) : // input operands [n_iter] "m" (n_iter), [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) consider_edge_cases: // Handle edge cases in the m dimension, if they exist. if ( n_left ) { const dim_t mr_cur = 3; const dim_t j_edge = n0 - ( dim_t )n_left; double* restrict cij = c + j_edge*cs_c; double* restrict ai = a; double* restrict bj = b + j_edge*cs_b; if ( 2 <= n_left ) { const dim_t nr_cur = 2; bli_dgemmsup_rd_haswell_asm_3x2 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += nr_cur*cs_c0; bj += nr_cur*cs_b0; n_left -= nr_cur; } if ( 1 == n_left ) { #if 0 const dim_t nr_cur = 1; bli_dgemmsup_r_haswell_ref_3x1 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); #else bli_dgemv_ex ( BLIS_NO_TRANSPOSE, conjb, mr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, beta, cij, rs_c0, cntx, NULL ); #endif } } } void bli_dgemmsup_rd_haswell_asm_2x8n ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t n_iter = n0 / 4; uint64_t n_left = n0 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; if ( n_iter == 0 ) goto consider_edge_cases; // ------------------------------------------------------------------------- begin_asm() //vzeroall() // zero all xmm/ymm registers. mov(var(a), rdx) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), r14) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b mov(var(c), r12) // load address of c //mov(var(rs_c), rdi) // load rs_c //lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) // r12 = rcx = c // rdx = rax = a // r14 = rbx = b // r9 = unused // r15 = n dim index jj mov(var(n_iter), r15) // jj = n_iter; label(.DLOOP3X4J) // LOOP OVER jj = [ n_iter ... 1 0 ] #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm8, ymm8, ymm8) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm11, ymm11, ymm11) vxorpd(ymm13, ymm13, ymm13) vxorpd(ymm14, ymm14, ymm14) #endif lea(mem(r12), rcx) // rcx = c_iijj; lea(mem(rdx), rax) // rax = a_ii; lea(mem(r14), rbx) // rbx = b_jj; #if 1 mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 3*8)) // prefetch c + 1*rs_c #endif lea(mem(r11, r11, 2), rdi) // rdi = 3*cs_b lea(mem(rbx, r11, 4), r10) // r10 = rbx + 4*cs_b mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 #if 0 prefetch(0, mem(r10, 0*8)) // prefetch rbx + 4*cs_b prefetch(0, mem(r10, r11, 1, 0*8)) // prefetch rbx + 5*cs_b prefetch(0, mem(r10, r11, 2, 0*8)) // prefetch rbx + 6*cs_b prefetch(0, mem(r10, r13, 1, 0*8)) // prefetch rbx + 7*cs_b add(imm(8*8), r10) // r10 += 8*rs_b = 8*8; #else prefetch(0, mem(r10, 0*8)) // prefetch rbx + 4*cs_b prefetch(0, mem(r10, r11, 1, 0*8)) // prefetch rbx + 5*cs_b #endif vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) // ---------------------------------- iteration 1 #if 1 prefetch(0, mem(r10, r11, 2, 0*8)) // prefetch rbx + 6*cs_b prefetch(0, mem(r10, r13, 1, 0*8)) // prefetch rbx + 7*cs_b #endif vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) // ---------------------------------- iteration 2 #if 1 prefetch(0, mem(r10, 8*8)) // prefetch rbx + 4*cs_b + 8*rs_b prefetch(0, mem(r10, r11, 1, 8*8)) // prefetch rbx + 5*cs_b + 8*rs_b #endif vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) // ---------------------------------- iteration 3 #if 1 prefetch(0, mem(r10, r11, 2, 8*8)) // prefetch rbx + 6*cs_b + 8*rs_b prefetch(0, mem(r10, r13, 1, 8*8)) // prefetch rbx + 7*cs_b + 8*rs_b add(imm(16*8), r10) // r10 += 8*rs_b = 8*8; #endif vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rax ), xmm0) vmovsd(mem(rax, r8, 1), xmm1) add(imm(1*8), rax) // a += 1*cs_b = 1*8; vmovsd(mem(rbx ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovsd(mem(rbx, r11, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovsd(mem(rbx, r11, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovsd(mem(rbx, r13, 1), xmm3) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm7 ymm10 ymm13 // ymm5 ymm8 ymm11 ymm14 // ymm6 ymm9 ymm12 ymm15 vhaddpd( ymm7, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm7) vhaddpd( ymm13, ymm10, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) // xmm2[0] = sum(ymm10); xmm2[1] = sum(ymm13) vperm2f128(imm(0x20), ymm2, ymm0, ymm4 ) vhaddpd( ymm8, ymm5, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm14, ymm11, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm5 ) // ymm4 = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13) // ymm5 = sum(ymm5) sum(ymm8) sum(ymm11) sum(ymm14) mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) vmulpd(ymm0, ymm6, ymm6) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm5) vmovupd(ymm5, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vmovupd(ymm5, mem(rcx)) //add(rdi, rcx) label(.DDONE) add(imm(4*8), r12) // c_jj = r12 += 4*cs_c lea(mem(r14, r11, 4), r14) // b_jj = r14 += 4*cs_b dec(r15) // jj -= 1; jne(.DLOOP3X4J) // iterate again if jj != 0. label(.DRETURN) end_asm( : // output operands (none) : // input operands [n_iter] "m" (n_iter), [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) consider_edge_cases: // Handle edge cases in the m dimension, if they exist. if ( n_left ) { const dim_t mr_cur = 2; const dim_t j_edge = n0 - ( dim_t )n_left; double* restrict cij = c + j_edge*cs_c; double* restrict ai = a; double* restrict bj = b + j_edge*cs_b; if ( 2 <= n_left ) { const dim_t nr_cur = 2; bli_dgemmsup_rd_haswell_asm_2x2 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += nr_cur*cs_c0; bj += nr_cur*cs_b0; n_left -= nr_cur; } if ( 1 == n_left ) { #if 0 const dim_t nr_cur = 1; bli_dgemmsup_r_haswell_ref_2x1 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); #else bli_dgemv_ex ( BLIS_NO_TRANSPOSE, conjb, mr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, beta, cij, rs_c0, cntx, NULL ); #endif } } } void bli_dgemmsup_rd_haswell_asm_1x8n ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t n_iter = n0 / 4; uint64_t n_left = n0 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; if ( n_iter == 0 ) goto consider_edge_cases; // ------------------------------------------------------------------------- begin_asm() //vzeroall() // zero all xmm/ymm registers. mov(var(a), rdx) // load address of a. //mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a //lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), r14) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b mov(var(c), r12) // load address of c //mov(var(rs_c), rdi) // load rs_c //lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) // r12 = rcx = c // rdx = rax = a // r14 = rbx = b // r9 = unused // r15 = n dim index jj mov(var(n_iter), r15) // jj = n_iter; label(.DLOOP3X4J) // LOOP OVER jj = [ n_iter ... 1 0 ] #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm13, ymm13, ymm13) #endif lea(mem(r12), rcx) // rcx = c_iijj; lea(mem(rdx), rax) // rax = a_ii; lea(mem(r14), rbx) // rbx = b_jj; #if 1 mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 3*8)) // prefetch c + 1*rs_c #endif lea(mem(r11, r11, 2), rdi) // rdi = 3*cs_b lea(mem(rbx, r11, 4), r10) // r10 = rbx + 4*cs_b mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 #if 0 prefetch(0, mem(r10, 0*8)) // prefetch rbx + 4*cs_b prefetch(0, mem(r10, r11, 1, 0*8)) // prefetch rbx + 5*cs_b prefetch(0, mem(r10, r11, 2, 0*8)) // prefetch rbx + 6*cs_b prefetch(0, mem(r10, r13, 1, 0*8)) // prefetch rbx + 7*cs_b add(imm(8*8), r10) // r10 += 8*rs_b = 8*8; #else prefetch(0, mem(r10, 0*8)) // prefetch rbx + 4*cs_b prefetch(0, mem(r10, r11, 1, 0*8)) // prefetch rbx + 5*cs_b #endif vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) // ---------------------------------- iteration 1 #if 1 prefetch(0, mem(r10, r11, 2, 0*8)) // prefetch rbx + 6*cs_b prefetch(0, mem(r10, r13, 1, 0*8)) // prefetch rbx + 7*cs_b #endif vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) // ---------------------------------- iteration 2 #if 1 prefetch(0, mem(r10, 8*8)) // prefetch rbx + 4*cs_b + 8*rs_b prefetch(0, mem(r10, r11, 1, 8*8)) // prefetch rbx + 5*cs_b + 8*rs_b #endif vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) // ---------------------------------- iteration 3 #if 1 prefetch(0, mem(r10, r11, 2, 8*8)) // prefetch rbx + 6*cs_b + 8*rs_b prefetch(0, mem(r10, r13, 1, 8*8)) // prefetch rbx + 7*cs_b + 8*rs_b add(imm(16*8), r10) // r10 += 8*rs_b = 8*8; #endif vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rax ), xmm0) add(imm(1*8), rax) // a += 1*cs_b = 1*8; vmovsd(mem(rbx ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovsd(mem(rbx, r11, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovsd(mem(rbx, r11, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovsd(mem(rbx, r13, 1), xmm3) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vfmadd231pd(ymm0, ymm3, ymm13) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm7 ymm10 ymm13 // ymm5 ymm8 ymm11 ymm14 // ymm6 ymm9 ymm12 ymm15 vhaddpd( ymm7, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm7) vhaddpd( ymm13, ymm10, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) // xmm2[0] = sum(ymm10); xmm2[1] = sum(ymm13) vperm2f128(imm(0x20), ymm2, ymm0, ymm4 ) // ymm4 = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13) mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) //add(rdi, rcx) label(.DDONE) add(imm(4*8), r12) // c_jj = r12 += 4*cs_c lea(mem(r14, r11, 4), r14) // b_jj = r14 += 4*cs_b dec(r15) // jj -= 1; jne(.DLOOP3X4J) // iterate again if jj != 0. label(.DRETURN) end_asm( : // output operands (none) : // input operands [n_iter] "m" (n_iter), [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) consider_edge_cases: // Handle edge cases in the m dimension, if they exist. if ( n_left ) { const dim_t mr_cur = 1; const dim_t j_edge = n0 - ( dim_t )n_left; double* restrict cij = c + j_edge*cs_c; double* restrict ai = a; double* restrict bj = b + j_edge*cs_b; if ( 2 <= n_left ) { const dim_t nr_cur = 2; bli_dgemmsup_rd_haswell_asm_1x2 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += nr_cur*cs_c0; bj += nr_cur*cs_b0; n_left -= nr_cur; } if ( 1 == n_left ) { #if 0 const dim_t nr_cur = 1; bli_dgemmsup_r_haswell_ref_1x1 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); #else bli_ddotxv_ex ( conja, conjb, k0, alpha, ai, cs_a0, bj, rs_b0, beta, cij, cntx, NULL ); #endif } } } blis-0.6.1/kernels/haswell/3/sup/bli_gemmsup_rv_haswell_asm_d6x8.c000066400000000000000000011226561360743507500251570ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define BLIS_ASM_SYNTAX_ATT #include "bli_x86_asm_macros.h" /* rrr: -------- ------ -------- -------- ------ -------- -------- += ------ ... -------- -------- ------ -------- -------- ------ : -------- ------ : rcr: -------- | | | | -------- -------- | | | | -------- -------- += | | | | ... -------- -------- | | | | -------- -------- | | | | : -------- | | | | : Assumptions: - B is row-stored; - A is row- or column-stored; - m0 and n0 are at most MR and NR, respectively. Therefore, this (r)ow-preferential kernel is well-suited for contiguous (v)ector loads on B and single-element broadcasts from A. NOTE: These kernels explicitly support column-oriented IO, implemented via an in-register transpose. And thus they also support the crr and ccr cases, though only crr is ever utilized (because ccr is handled by transposing the operation and executing rcr, which does not incur the cost of the in-register transpose). crr: | | | | | | | | ------ -------- | | | | | | | | ------ -------- | | | | | | | | += ------ ... -------- | | | | | | | | ------ -------- | | | | | | | | ------ : | | | | | | | | ------ : */ // Prototype reference microkernels. GEMMSUP_KER_PROT( double, d, gemmsup_r_haswell_ref ) // Define parameters and variables for edge case kernel map. #define NUM_MR 4 #define NUM_NR 4 #define FUNCPTR_T dgemmsup_ker_ft static dim_t mrs[NUM_MR] = { 6, 4, 2, 1 }; static dim_t nrs[NUM_NR] = { 8, 4, 2, 1 }; static FUNCPTR_T kmap[NUM_MR][NUM_NR] = { /* 8 4 2 1 */ /* 6 */ { bli_dgemmsup_rv_haswell_asm_6x8, bli_dgemmsup_rv_haswell_asm_6x4, bli_dgemmsup_rv_haswell_asm_6x2, bli_dgemmsup_r_haswell_ref_6x1 }, /* 4 */ { bli_dgemmsup_rv_haswell_asm_4x8, bli_dgemmsup_rv_haswell_asm_4x4, bli_dgemmsup_rv_haswell_asm_4x2, bli_dgemmsup_r_haswell_ref_4x1 }, /* 2 */ { bli_dgemmsup_rv_haswell_asm_2x8, bli_dgemmsup_rv_haswell_asm_2x4, bli_dgemmsup_rv_haswell_asm_2x2, bli_dgemmsup_r_haswell_ref_2x1 }, /* 1 */ { bli_dgemmsup_rv_haswell_asm_1x8, bli_dgemmsup_rv_haswell_asm_1x4, bli_dgemmsup_rv_haswell_asm_1x2, bli_dgemmsup_r_haswell_ref_1x1 }, }; void bli_dgemmsup_rv_haswell_asm_6x8 ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { #if 0 bli_dgemmsup_r_haswell_ref ( conja, conjb, m0, n0, k0, alpha, a, rs_a0, cs_a0, b, rs_b0, cs_b0, beta, c, rs_c0, cs_c0, data, cntx ); return; #endif // Use a reference kernel if this is an edge case in the m or n // dimensions. if ( m0 < 6 || n0 < 8 ) { #if 0 bli_dgemmsup_r_haswell_ref ( conja, conjb, m0, n0, k0, alpha, a, rs_a0, cs_a0, b, rs_b0, cs_b0, beta, c, rs_c0, cs_c0, data, cntx ); return; #endif dim_t n_left = n0; double* restrict cj = c; double* restrict bj = b; // Iterate across columns (corresponding to elements of nrs) until // n_left is zero. for ( dim_t j = 0; n_left != 0; ++j ) { const dim_t nr_cur = nrs[ j ]; // Once we find the value of nrs that is less than (or equal to) // n_left, we use the kernels in that column. if ( nr_cur <= n_left ) { dim_t m_left = m0; double* restrict cij = cj; double* restrict ai = a; // Iterate down the current column (corresponding to elements // of mrs) until m_left is zero. for ( dim_t i = 0; m_left != 0; ++i ) { const dim_t mr_cur = mrs[ i ]; // Once we find the value of mrs that is less than (or equal // to) m_left, we select that kernel. if ( mr_cur <= m_left ) { FUNCPTR_T ker_fp = kmap[i][j]; //printf( "executing %d x %d sup kernel.\n", (int)mr_cur, (int)nr_cur ); // Call the kernel using current mrs and nrs values. ker_fp ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); // Advance C and A pointers by the mrs and nrs we just // used, and decrement m_left. cij += mr_cur*rs_c0; ai += mr_cur*rs_a0; m_left -= mr_cur; } } // Advance C and B pointers by the mrs and nrs we just used, and // decrement n_left. cj += nr_cur*cs_c0; bj += nr_cur*cs_b0; n_left -= nr_cur; } } return; } //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() vzeroall() // zero all xmm/ymm registers. mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. mov(var(rs_b), r10) // load rs_b //mov(var(cs_b), r11) // load cs_b lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) //lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) // NOTE: We cannot pre-load elements of a or b // because it could eventually, in the last // unrolled iter or the cleanup loop, result // in reading beyond the bounds allocated mem // (the likely result: a segmentation fault). mov(var(c), rcx) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) #if 0 lea(mem(rcx, rdi, 2), rdx) // lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 7*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 7*8)) // prefetch c + 2*rs_c prefetch(0, mem(rdx, 7*8)) // prefetch c + 3*rs_c prefetch(0, mem(rdx, rdi, 1, 7*8)) // prefetch c + 4*rs_c prefetch(0, mem(rdx, rdi, 2, 7*8)) // prefetch c + 5*rs_c #else cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLPFETCH) // jump to column storage case label(.DROWPFETCH) // row-stored prefetching on c lea(mem(rcx, rdi, 2), rdx) // lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 7*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 7*8)) // prefetch c + 2*rs_c prefetch(0, mem(rdx, 7*8)) // prefetch c + 3*rs_c prefetch(0, mem(rdx, rdi, 1, 7*8)) // prefetch c + 4*rs_c jmp(.DPOSTPFETCH) // jump to end of prefetching c label(.DCOLPFETCH) // column-stored prefetching c mov(var(cs_c), rsi) // load cs_c to rsi (temporarily) lea(mem(, rsi, 8), rsi) // cs_c *= sizeof(double) lea(mem(rcx, rsi, 2), rdx) // lea(mem(rdx, rsi, 1), rdx) // rdx = c + 3*cs_c; prefetch(0, mem(rcx, 4*8)) // prefetch c + 0*cs_c prefetch(0, mem(rcx, rsi, 1, 4*8)) // prefetch c + 1*cs_c prefetch(0, mem(rcx, rsi, 2, 4*8)) // prefetch c + 2*cs_c prefetch(0, mem(rdx, 4*8)) // prefetch c + 3*cs_c prefetch(0, mem(rdx, rsi, 1, 4*8)) // prefetch c + 4*cs_c prefetch(0, mem(rdx, rsi, 2, 4*8)) // prefetch c + 5*cs_c lea(mem(rdx, rsi, 2), rdx) // rdx = c + 5*cs_c; prefetch(0, mem(rdx, rsi, 1, 4*8)) // prefetch c + 6*cs_c prefetch(0, mem(rdx, rsi, 2, 4*8)) // prefetch c + 7*cs_c label(.DPOSTPFETCH) // done prefetching c #endif #if 1 lea(mem(rax, r9, 8), rdx) // lea(mem(rdx, r9, 8), rdx) // rdx = a + 16*cs_a; #endif mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.DLOOPKITER) // MAIN LOOP // ---------------------------------- iteration 0 #if 1 prefetch(0, mem(rdx, 5*8)) #endif vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, r8, 4), ymm2) vbroadcastsd(mem(rax, r15, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) // ---------------------------------- iteration 1 #if 0 prefetch(0, mem(rdx, r9, 1, 5*8)) #endif vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, r8, 4), ymm2) vbroadcastsd(mem(rax, r15, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) // ---------------------------------- iteration 2 #if 1 prefetch(0, mem(rdx, r9, 2, 5*8)) #endif vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, r8, 4), ymm2) vbroadcastsd(mem(rax, r15, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) // ---------------------------------- iteration 3 #if 1 lea(mem(rdx, r9, 4), rdx) // a_prefetch += 4*cs_a; #endif vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, r8, 4), ymm2) vbroadcastsd(mem(rax, r15, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER) // iterate again if i != 0. label(.DCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.DLOOPKLEFT) // EDGE LOOP vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, r8, 4), ymm2) vbroadcastsd(mem(rax, r15, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKLEFT) // iterate again if i != 0. label(.DPOSTACCUM) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) vmulpd(ymm0, ymm6, ymm6) vmulpd(ymm0, ymm7, ymm7) vmulpd(ymm0, ymm8, ymm8) vmulpd(ymm0, ymm9, ymm9) vmulpd(ymm0, ymm10, ymm10) vmulpd(ymm0, ymm11, ymm11) vmulpd(ymm0, ymm12, ymm12) vmulpd(ymm0, ymm13, ymm13) vmulpd(ymm0, ymm14, ymm14) vmulpd(ymm0, ymm15, ymm15) mov(var(cs_c), rsi) // load cs_c lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) //lea(mem(rcx, rsi, 4), rdx) // load address of c + 4*cs_c; lea(mem(rcx, rdi, 4), rdx) // load address of c + 4*rs_c; lea(mem(rsi, rsi, 2), rax) // rax = 3*cs_c; // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORED) // jump to column storage case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), ymm3, ymm5) vmovupd(ymm5, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm6) vmovupd(ymm6, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), ymm3, ymm7) vmovupd(ymm7, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm8) vmovupd(ymm8, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), ymm3, ymm9) vmovupd(ymm9, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm10) vmovupd(ymm10, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), ymm3, ymm11) vmovupd(ymm11, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm12) vmovupd(ymm12, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), ymm3, ymm13) vmovupd(ymm13, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm14) vmovupd(ymm14, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), ymm3, ymm15) vmovupd(ymm15, mem(rcx, rsi, 4)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORED) vunpcklpd(ymm6, ymm4, ymm0) vunpckhpd(ymm6, ymm4, ymm1) vunpcklpd(ymm10, ymm8, ymm2) vunpckhpd(ymm10, ymm8, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) vperm2f128(imm(0x31), ymm2, ymm0, ymm8) vperm2f128(imm(0x31), ymm3, ymm1, ymm10) vbroadcastsd(mem(rbx), ymm3) vfmadd231pd(mem(rcx), ymm3, ymm4) vfmadd231pd(mem(rcx, rsi, 1), ymm3, ymm6) vfmadd231pd(mem(rcx, rsi, 2), ymm3, ymm8) vfmadd231pd(mem(rcx, rax, 1), ymm3, ymm10) vmovupd(ymm4, mem(rcx)) vmovupd(ymm6, mem(rcx, rsi, 1)) vmovupd(ymm8, mem(rcx, rsi, 2)) vmovupd(ymm10, mem(rcx, rax, 1)) lea(mem(rcx, rsi, 4), rcx) vunpcklpd(ymm14, ymm12, ymm0) vunpckhpd(ymm14, ymm12, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm4) vfmadd231pd(mem(rdx), xmm3, xmm0) vfmadd231pd(mem(rdx, rsi, 1), xmm3, xmm1) vfmadd231pd(mem(rdx, rsi, 2), xmm3, xmm2) vfmadd231pd(mem(rdx, rax, 1), xmm3, xmm4) vmovupd(xmm0, mem(rdx)) vmovupd(xmm1, mem(rdx, rsi, 1)) vmovupd(xmm2, mem(rdx, rsi, 2)) vmovupd(xmm4, mem(rdx, rax, 1)) lea(mem(rdx, rsi, 4), rdx) vunpcklpd(ymm7, ymm5, ymm0) vunpckhpd(ymm7, ymm5, ymm1) vunpcklpd(ymm11, ymm9, ymm2) vunpckhpd(ymm11, ymm9, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm5) vinsertf128(imm(0x1), xmm3, ymm1, ymm7) vperm2f128(imm(0x31), ymm2, ymm0, ymm9) vperm2f128(imm(0x31), ymm3, ymm1, ymm11) vbroadcastsd(mem(rbx), ymm3) vfmadd231pd(mem(rcx), ymm3, ymm5) vfmadd231pd(mem(rcx, rsi, 1), ymm3, ymm7) vfmadd231pd(mem(rcx, rsi, 2), ymm3, ymm9) vfmadd231pd(mem(rcx, rax, 1), ymm3, ymm11) vmovupd(ymm5, mem(rcx)) vmovupd(ymm7, mem(rcx, rsi, 1)) vmovupd(ymm9, mem(rcx, rsi, 2)) vmovupd(ymm11, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) vunpcklpd(ymm15, ymm13, ymm0) vunpckhpd(ymm15, ymm13, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm4) vfmadd231pd(mem(rdx), xmm3, xmm0) vfmadd231pd(mem(rdx, rsi, 1), xmm3, xmm1) vfmadd231pd(mem(rdx, rsi, 2), xmm3, xmm2) vfmadd231pd(mem(rdx, rax, 1), xmm3, xmm4) vmovupd(xmm0, mem(rdx)) vmovupd(xmm1, mem(rdx, rsi, 1)) vmovupd(xmm2, mem(rdx, rsi, 2)) vmovupd(xmm4, mem(rdx, rax, 1)) //lea(mem(rdx, rsi, 4), rdx) jmp(.DDONE) // jump to end. label(.DBETAZERO) cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORBZ) // jump to column storage case label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) vmovupd(ymm5, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm6, mem(rcx)) vmovupd(ymm7, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm8, mem(rcx)) vmovupd(ymm9, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm10, mem(rcx)) vmovupd(ymm11, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm12, mem(rcx)) vmovupd(ymm13, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm14, mem(rcx)) vmovupd(ymm15, mem(rcx, rsi, 4)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORBZ) vunpcklpd(ymm6, ymm4, ymm0) vunpckhpd(ymm6, ymm4, ymm1) vunpcklpd(ymm10, ymm8, ymm2) vunpckhpd(ymm10, ymm8, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) vperm2f128(imm(0x31), ymm2, ymm0, ymm8) vperm2f128(imm(0x31), ymm3, ymm1, ymm10) vmovupd(ymm4, mem(rcx)) vmovupd(ymm6, mem(rcx, rsi, 1)) vmovupd(ymm8, mem(rcx, rsi, 2)) vmovupd(ymm10, mem(rcx, rax, 1)) lea(mem(rcx, rsi, 4), rcx) vunpcklpd(ymm14, ymm12, ymm0) vunpckhpd(ymm14, ymm12, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm4) vmovupd(xmm0, mem(rdx)) vmovupd(xmm1, mem(rdx, rsi, 1)) vmovupd(xmm2, mem(rdx, rsi, 2)) vmovupd(xmm4, mem(rdx, rax, 1)) lea(mem(rdx, rsi, 4), rdx) vunpcklpd(ymm7, ymm5, ymm0) vunpckhpd(ymm7, ymm5, ymm1) vunpcklpd(ymm11, ymm9, ymm2) vunpckhpd(ymm11, ymm9, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm5) vinsertf128(imm(0x1), xmm3, ymm1, ymm7) vperm2f128(imm(0x31), ymm2, ymm0, ymm9) vperm2f128(imm(0x31), ymm3, ymm1, ymm11) vmovupd(ymm5, mem(rcx)) vmovupd(ymm7, mem(rcx, rsi, 1)) vmovupd(ymm9, mem(rcx, rsi, 2)) vmovupd(ymm11, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) vunpcklpd(ymm15, ymm13, ymm0) vunpckhpd(ymm15, ymm13, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm4) vmovupd(xmm0, mem(rdx)) vmovupd(xmm1, mem(rdx, rsi, 1)) vmovupd(xmm2, mem(rdx, rsi, 2)) vmovupd(xmm4, mem(rdx, rax, 1)) //lea(mem(rdx, rsi, 4), rdx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter] "m" (k_iter), [k_left] "m" (k_left), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rv_haswell_asm_5x8 ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() vzeroall() // zero all xmm/ymm registers. mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. mov(var(rs_b), r10) // load rs_b //mov(var(cs_b), r11) // load cs_b lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) //lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) // NOTE: We cannot pre-load elements of a or b // because it could eventually, in the last // unrolled iter or the cleanup loop, result // in reading beyond the bounds allocated mem // (the likely result: a segmentation fault). mov(var(c), rcx) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLPFETCH) // jump to column storage case label(.DROWPFETCH) // row-stored prefetching on c lea(mem(rcx, rdi, 2), rdx) // lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 7*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 7*8)) // prefetch c + 2*rs_c prefetch(0, mem(rdx, 7*8)) // prefetch c + 3*rs_c prefetch(0, mem(rdx, rdi, 1, 7*8)) // prefetch c + 4*rs_c jmp(.DPOSTPFETCH) // jump to end of prefetching c label(.DCOLPFETCH) // column-stored prefetching c mov(var(cs_c), rsi) // load cs_c to rsi (temporarily) lea(mem(, rsi, 8), rsi) // cs_c *= sizeof(double) lea(mem(rcx, rsi, 2), rdx) // lea(mem(rdx, rsi, 1), rdx) // rdx = c + 3*cs_c; prefetch(0, mem(rcx, 4*8)) // prefetch c + 0*cs_c prefetch(0, mem(rcx, rsi, 1, 4*8)) // prefetch c + 1*cs_c prefetch(0, mem(rcx, rsi, 2, 4*8)) // prefetch c + 2*cs_c prefetch(0, mem(rdx, 4*8)) // prefetch c + 3*cs_c prefetch(0, mem(rdx, rsi, 1, 4*8)) // prefetch c + 4*cs_c prefetch(0, mem(rdx, rsi, 2, 4*8)) // prefetch c + 5*cs_c lea(mem(rdx, rsi, 2), rdx) // rdx = c + 5*cs_c; prefetch(0, mem(rdx, rsi, 1, 4*8)) // prefetch c + 6*cs_c prefetch(0, mem(rdx, rsi, 2, 4*8)) // prefetch c + 7*cs_c label(.DPOSTPFETCH) // done prefetching c mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.DLOOPKITER) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, r8, 4), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) // ---------------------------------- iteration 1 vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, r8, 4), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) // ---------------------------------- iteration 2 vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, r8, 4), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) // ---------------------------------- iteration 3 vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, r8, 4), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) dec(rsi) // i -= 1; jne(.DLOOPKITER) // iterate again if i != 0. label(.DCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.DLOOPKLEFT) // EDGE LOOP vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, r8, 4), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) dec(rsi) // i -= 1; jne(.DLOOPKLEFT) // iterate again if i != 0. label(.DPOSTACCUM) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) vmulpd(ymm0, ymm6, ymm6) vmulpd(ymm0, ymm7, ymm7) vmulpd(ymm0, ymm8, ymm8) vmulpd(ymm0, ymm9, ymm9) vmulpd(ymm0, ymm10, ymm10) vmulpd(ymm0, ymm11, ymm11) vmulpd(ymm0, ymm12, ymm12) vmulpd(ymm0, ymm13, ymm13) mov(var(cs_c), rsi) // load cs_c lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) //lea(mem(rcx, rsi, 4), rdx) // load address of c + 4*cs_c; lea(mem(rcx, rdi, 4), rdx) // load address of c + 4*rs_c; lea(mem(rsi, rsi, 2), rax) // rax = 3*cs_c; // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORED) // jump to column storage case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), ymm3, ymm5) vmovupd(ymm5, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm6) vmovupd(ymm6, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), ymm3, ymm7) vmovupd(ymm7, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm8) vmovupd(ymm8, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), ymm3, ymm9) vmovupd(ymm9, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm10) vmovupd(ymm10, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), ymm3, ymm11) vmovupd(ymm11, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm12) vmovupd(ymm12, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), ymm3, ymm13) vmovupd(ymm13, mem(rcx, rsi, 4)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORED) vunpcklpd(ymm6, ymm4, ymm0) vunpckhpd(ymm6, ymm4, ymm1) vunpcklpd(ymm10, ymm8, ymm2) vunpckhpd(ymm10, ymm8, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) vperm2f128(imm(0x31), ymm2, ymm0, ymm8) vperm2f128(imm(0x31), ymm3, ymm1, ymm10) vbroadcastsd(mem(rbx), ymm3) vfmadd231pd(mem(rcx), ymm3, ymm4) vfmadd231pd(mem(rcx, rsi, 1), ymm3, ymm6) vfmadd231pd(mem(rcx, rsi, 2), ymm3, ymm8) vfmadd231pd(mem(rcx, rax, 1), ymm3, ymm10) vmovupd(ymm4, mem(rcx)) vmovupd(ymm6, mem(rcx, rsi, 1)) vmovupd(ymm8, mem(rcx, rsi, 2)) vmovupd(ymm10, mem(rcx, rax, 1)) lea(mem(rcx, rsi, 4), rcx) #if 0 vunpcklpd(ymm14, ymm12, ymm0) vunpckhpd(ymm14, ymm12, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm4) vfmadd231pd(mem(rdx), xmm3, xmm0) vfmadd231pd(mem(rdx, rsi, 1), xmm3, xmm1) vfmadd231pd(mem(rdx, rsi, 2), xmm3, xmm2) vfmadd231pd(mem(rdx, rax, 1), xmm3, xmm4) vmovupd(xmm0, mem(rdx)) vmovupd(xmm1, mem(rdx, rsi, 1)) vmovupd(xmm2, mem(rdx, rsi, 2)) vmovupd(xmm4, mem(rdx, rax, 1)) #else vmovlpd(mem(rdx), xmm0, xmm0) vmovhpd(mem(rdx, rsi, 1), xmm0, xmm0) vmovlpd(mem(rdx, rsi, 2), xmm1, xmm1) vmovhpd(mem(rdx, rax, 1), xmm1, xmm1) vperm2f128(imm(0x20), ymm1, ymm0, ymm0) vfmadd213pd(ymm12, ymm3, ymm0) vextractf128(imm(1), ymm0, xmm1) vmovlpd(xmm0, mem(rdx)) vmovhpd(xmm0, mem(rdx, rsi, 1)) vmovlpd(xmm1, mem(rdx, rsi, 2)) vmovhpd(xmm1, mem(rdx, rax, 1)) #endif lea(mem(rdx, rsi, 4), rdx) vunpcklpd(ymm7, ymm5, ymm0) vunpckhpd(ymm7, ymm5, ymm1) vunpcklpd(ymm11, ymm9, ymm2) vunpckhpd(ymm11, ymm9, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm5) vinsertf128(imm(0x1), xmm3, ymm1, ymm7) vperm2f128(imm(0x31), ymm2, ymm0, ymm9) vperm2f128(imm(0x31), ymm3, ymm1, ymm11) vbroadcastsd(mem(rbx), ymm3) vfmadd231pd(mem(rcx), ymm3, ymm5) vfmadd231pd(mem(rcx, rsi, 1), ymm3, ymm7) vfmadd231pd(mem(rcx, rsi, 2), ymm3, ymm9) vfmadd231pd(mem(rcx, rax, 1), ymm3, ymm11) vmovupd(ymm5, mem(rcx)) vmovupd(ymm7, mem(rcx, rsi, 1)) vmovupd(ymm9, mem(rcx, rsi, 2)) vmovupd(ymm11, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) #if 0 vunpcklpd(ymm15, ymm13, ymm0) vunpckhpd(ymm15, ymm13, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm4) vfmadd231pd(mem(rdx), xmm3, xmm0) vfmadd231pd(mem(rdx, rsi, 1), xmm3, xmm1) vfmadd231pd(mem(rdx, rsi, 2), xmm3, xmm2) vfmadd231pd(mem(rdx, rax, 1), xmm3, xmm4) vmovupd(xmm0, mem(rdx)) vmovupd(xmm1, mem(rdx, rsi, 1)) vmovupd(xmm2, mem(rdx, rsi, 2)) vmovupd(xmm4, mem(rdx, rax, 1)) #else vmovlpd(mem(rdx), xmm0, xmm0) vmovhpd(mem(rdx, rsi, 1), xmm0, xmm0) vmovlpd(mem(rdx, rsi, 2), xmm1, xmm1) vmovhpd(mem(rdx, rax, 1), xmm1, xmm1) vperm2f128(imm(0x20), ymm1, ymm0, ymm0) vfmadd213pd(ymm13, ymm3, ymm0) vextractf128(imm(1), ymm0, xmm1) vmovlpd(xmm0, mem(rdx)) vmovhpd(xmm0, mem(rdx, rsi, 1)) vmovlpd(xmm1, mem(rdx, rsi, 2)) vmovhpd(xmm1, mem(rdx, rax, 1)) #endif //lea(mem(rdx, rsi, 4), rdx) jmp(.DDONE) // jump to end. label(.DBETAZERO) cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORBZ) // jump to column storage case label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) vmovupd(ymm5, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm6, mem(rcx)) vmovupd(ymm7, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm8, mem(rcx)) vmovupd(ymm9, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm10, mem(rcx)) vmovupd(ymm11, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm12, mem(rcx)) vmovupd(ymm13, mem(rcx, rsi, 4)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORBZ) vunpcklpd(ymm6, ymm4, ymm0) vunpckhpd(ymm6, ymm4, ymm1) vunpcklpd(ymm10, ymm8, ymm2) vunpckhpd(ymm10, ymm8, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) vperm2f128(imm(0x31), ymm2, ymm0, ymm8) vperm2f128(imm(0x31), ymm3, ymm1, ymm10) vmovupd(ymm4, mem(rcx)) vmovupd(ymm6, mem(rcx, rsi, 1)) vmovupd(ymm8, mem(rcx, rsi, 2)) vmovupd(ymm10, mem(rcx, rax, 1)) lea(mem(rcx, rsi, 4), rcx) #if 0 vunpcklpd(ymm14, ymm12, ymm0) vunpckhpd(ymm14, ymm12, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm4) vmovupd(xmm0, mem(rdx)) vmovupd(xmm1, mem(rdx, rsi, 1)) vmovupd(xmm2, mem(rdx, rsi, 2)) vmovupd(xmm4, mem(rdx, rax, 1)) #else vmovupd(ymm12, ymm0) vextractf128(imm(1), ymm0, xmm1) vmovlpd(xmm0, mem(rcx)) vmovhpd(xmm0, mem(rcx, rsi, 1)) vmovlpd(xmm1, mem(rcx, rsi, 2)) vmovhpd(xmm1, mem(rcx, rax, 1)) #endif lea(mem(rdx, rsi, 4), rdx) vunpcklpd(ymm7, ymm5, ymm0) vunpckhpd(ymm7, ymm5, ymm1) vunpcklpd(ymm11, ymm9, ymm2) vunpckhpd(ymm11, ymm9, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm5) vinsertf128(imm(0x1), xmm3, ymm1, ymm7) vperm2f128(imm(0x31), ymm2, ymm0, ymm9) vperm2f128(imm(0x31), ymm3, ymm1, ymm11) vmovupd(ymm5, mem(rcx)) vmovupd(ymm7, mem(rcx, rsi, 1)) vmovupd(ymm9, mem(rcx, rsi, 2)) vmovupd(ymm11, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) #if 0 vunpcklpd(ymm15, ymm13, ymm0) vunpckhpd(ymm15, ymm13, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm4) vmovupd(xmm0, mem(rdx)) vmovupd(xmm1, mem(rdx, rsi, 1)) vmovupd(xmm2, mem(rdx, rsi, 2)) vmovupd(xmm4, mem(rdx, rax, 1)) #else vmovupd(ymm13, ymm0) vextractf128(imm(1), ymm0, xmm1) vmovlpd(xmm0, mem(rdx)) vmovhpd(xmm0, mem(rdx, rsi, 1)) vmovlpd(xmm1, mem(rdx, rsi, 2)) vmovhpd(xmm1, mem(rdx, rax, 1)) #endif //lea(mem(rdx, rsi, 4), rdx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter] "m" (k_iter), [k_left] "m" (k_left), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rv_haswell_asm_4x8 ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() vzeroall() // zero all xmm/ymm registers. mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. mov(var(rs_b), r10) // load rs_b //mov(var(cs_b), r11) // load cs_b lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) //lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) // NOTE: We cannot pre-load elements of a or b // because it could eventually, in the last // unrolled iter or the cleanup loop, result // in reading beyond the bounds allocated mem // (the likely result: a segmentation fault). mov(var(c), rcx) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) #if 0 lea(mem(rcx, rdi, 2), rdx) // lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 7*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 7*8)) // prefetch c + 2*rs_c prefetch(0, mem(rdx, 7*8)) // prefetch c + 3*rs_c #else cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLPFETCH) // jump to column storage case label(.DROWPFETCH) // row-stored prefetching on c lea(mem(rcx, rdi, 2), rdx) // lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 7*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 7*8)) // prefetch c + 2*rs_c prefetch(0, mem(rdx, 7*8)) // prefetch c + 3*rs_c jmp(.DPOSTPFETCH) // jump to end of prefetching c label(.DCOLPFETCH) // column-stored prefetching c mov(var(cs_c), rsi) // load cs_c to rsi (temporarily) lea(mem(, rsi, 8), rsi) // cs_c *= sizeof(double) lea(mem(rcx, rsi, 2), rdx) // lea(mem(rdx, rsi, 1), rdx) // rdx = c + 3*cs_c; prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*cs_c prefetch(0, mem(rcx, rsi, 1, 3*8)) // prefetch c + 1*cs_c prefetch(0, mem(rcx, rsi, 2, 3*8)) // prefetch c + 2*cs_c prefetch(0, mem(rdx, 3*8)) // prefetch c + 3*cs_c prefetch(0, mem(rdx, rsi, 1, 3*8)) // prefetch c + 4*cs_c prefetch(0, mem(rdx, rsi, 2, 3*8)) // prefetch c + 5*cs_c lea(mem(rdx, rsi, 2), rdx) // rdx = c + 5*cs_c; prefetch(0, mem(rdx, rsi, 1, 3*8)) // prefetch c + 6*cs_c prefetch(0, mem(rdx, rsi, 2, 3*8)) // prefetch c + 7*cs_c label(.DPOSTPFETCH) // done prefetching c #endif mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.DLOOPKITER) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) // ---------------------------------- iteration 1 vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) // ---------------------------------- iteration 2 vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) // ---------------------------------- iteration 3 vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) dec(rsi) // i -= 1; jne(.DLOOPKITER) // iterate again if i != 0. label(.DCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.DLOOPKLEFT) // EDGE LOOP vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) dec(rsi) // i -= 1; jne(.DLOOPKLEFT) // iterate again if i != 0. label(.DPOSTACCUM) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) vmulpd(ymm0, ymm6, ymm6) vmulpd(ymm0, ymm7, ymm7) vmulpd(ymm0, ymm8, ymm8) vmulpd(ymm0, ymm9, ymm9) vmulpd(ymm0, ymm10, ymm10) vmulpd(ymm0, ymm11, ymm11) mov(var(cs_c), rsi) // load cs_c lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) //lea(mem(rcx, rsi, 4), rdx) // load address of c + 4*cs_c; //lea(mem(rcx, rdi, 4), rdx) // load address of c + 4*rs_c; lea(mem(rsi, rsi, 2), rax) // rax = 3*cs_c; // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORED) // jump to column storage case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), ymm3, ymm5) vmovupd(ymm5, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm6) vmovupd(ymm6, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), ymm3, ymm7) vmovupd(ymm7, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm8) vmovupd(ymm8, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), ymm3, ymm9) vmovupd(ymm9, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm10) vmovupd(ymm10, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), ymm3, ymm11) vmovupd(ymm11, mem(rcx, rsi, 4)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORED) vunpcklpd(ymm6, ymm4, ymm0) vunpckhpd(ymm6, ymm4, ymm1) vunpcklpd(ymm10, ymm8, ymm2) vunpckhpd(ymm10, ymm8, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) vperm2f128(imm(0x31), ymm2, ymm0, ymm8) vperm2f128(imm(0x31), ymm3, ymm1, ymm10) vbroadcastsd(mem(rbx), ymm3) vfmadd231pd(mem(rcx), ymm3, ymm4) vfmadd231pd(mem(rcx, rsi, 1), ymm3, ymm6) vfmadd231pd(mem(rcx, rsi, 2), ymm3, ymm8) vfmadd231pd(mem(rcx, rax, 1), ymm3, ymm10) vmovupd(ymm4, mem(rcx)) vmovupd(ymm6, mem(rcx, rsi, 1)) vmovupd(ymm8, mem(rcx, rsi, 2)) vmovupd(ymm10, mem(rcx, rax, 1)) lea(mem(rcx, rsi, 4), rcx) vunpcklpd(ymm7, ymm5, ymm0) vunpckhpd(ymm7, ymm5, ymm1) vunpcklpd(ymm11, ymm9, ymm2) vunpckhpd(ymm11, ymm9, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm5) vinsertf128(imm(0x1), xmm3, ymm1, ymm7) vperm2f128(imm(0x31), ymm2, ymm0, ymm9) vperm2f128(imm(0x31), ymm3, ymm1, ymm11) vbroadcastsd(mem(rbx), ymm3) vfmadd231pd(mem(rcx), ymm3, ymm5) vfmadd231pd(mem(rcx, rsi, 1), ymm3, ymm7) vfmadd231pd(mem(rcx, rsi, 2), ymm3, ymm9) vfmadd231pd(mem(rcx, rax, 1), ymm3, ymm11) vmovupd(ymm5, mem(rcx)) vmovupd(ymm7, mem(rcx, rsi, 1)) vmovupd(ymm9, mem(rcx, rsi, 2)) vmovupd(ymm11, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORBZ) // jump to column storage case label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) vmovupd(ymm5, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm6, mem(rcx)) vmovupd(ymm7, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm8, mem(rcx)) vmovupd(ymm9, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm10, mem(rcx)) vmovupd(ymm11, mem(rcx, rsi, 4)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORBZ) vunpcklpd(ymm6, ymm4, ymm0) vunpckhpd(ymm6, ymm4, ymm1) vunpcklpd(ymm10, ymm8, ymm2) vunpckhpd(ymm10, ymm8, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) vperm2f128(imm(0x31), ymm2, ymm0, ymm8) vperm2f128(imm(0x31), ymm3, ymm1, ymm10) vmovupd(ymm4, mem(rcx)) vmovupd(ymm6, mem(rcx, rsi, 1)) vmovupd(ymm8, mem(rcx, rsi, 2)) vmovupd(ymm10, mem(rcx, rax, 1)) lea(mem(rcx, rsi, 4), rcx) vunpcklpd(ymm7, ymm5, ymm0) vunpckhpd(ymm7, ymm5, ymm1) vunpcklpd(ymm11, ymm9, ymm2) vunpckhpd(ymm11, ymm9, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm5) vinsertf128(imm(0x1), xmm3, ymm1, ymm7) vperm2f128(imm(0x31), ymm2, ymm0, ymm9) vperm2f128(imm(0x31), ymm3, ymm1, ymm11) vmovupd(ymm5, mem(rcx)) vmovupd(ymm7, mem(rcx, rsi, 1)) vmovupd(ymm9, mem(rcx, rsi, 2)) vmovupd(ymm11, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter] "m" (k_iter), [k_left] "m" (k_left), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rv_haswell_asm_3x8 ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() vzeroall() // zero all xmm/ymm registers. mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. mov(var(rs_b), r10) // load rs_b //mov(var(cs_b), r11) // load cs_b lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) //lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) // NOTE: We cannot pre-load elements of a or b // because it could eventually, in the last // unrolled iter or the cleanup loop, result // in reading beyond the bounds allocated mem // (the likely result: a segmentation fault). mov(var(c), rcx) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) #if 0 lea(mem(rcx, rdi, 2), rdx) // lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 7*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 7*8)) // prefetch c + 2*rs_c prefetch(0, mem(rdx, 7*8)) // prefetch c + 3*rs_c #else cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLPFETCH) // jump to column storage case label(.DROWPFETCH) // row-stored prefetching on c //lea(mem(rcx, rdi, 2), rdx) // //lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 7*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 7*8)) // prefetch c + 2*rs_c jmp(.DPOSTPFETCH) // jump to end of prefetching c label(.DCOLPFETCH) // column-stored prefetching c mov(var(cs_c), rsi) // load cs_c to rsi (temporarily) lea(mem(, rsi, 8), rsi) // cs_c *= sizeof(double) lea(mem(rcx, rsi, 2), rdx) // lea(mem(rdx, rsi, 1), rdx) // rdx = c + 3*cs_c; prefetch(0, mem(rcx, 2*8)) // prefetch c + 0*cs_c prefetch(0, mem(rcx, rsi, 1, 2*8)) // prefetch c + 1*cs_c prefetch(0, mem(rcx, rsi, 2, 2*8)) // prefetch c + 2*cs_c prefetch(0, mem(rdx, 2*8)) // prefetch c + 3*cs_c prefetch(0, mem(rdx, rsi, 1, 2*8)) // prefetch c + 4*cs_c prefetch(0, mem(rdx, rsi, 2, 2*8)) // prefetch c + 5*cs_c lea(mem(rdx, rsi, 2), rdx) // rdx = c + 5*cs_c; prefetch(0, mem(rdx, rsi, 1, 2*8)) // prefetch c + 6*cs_c prefetch(0, mem(rdx, rsi, 2, 2*8)) // prefetch c + 7*cs_c label(.DPOSTPFETCH) // done prefetching c #endif mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.DLOOPKITER) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) // ---------------------------------- iteration 1 vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) // ---------------------------------- iteration 2 vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) // ---------------------------------- iteration 3 vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) dec(rsi) // i -= 1; jne(.DLOOPKITER) // iterate again if i != 0. label(.DCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.DLOOPKLEFT) // EDGE LOOP vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) dec(rsi) // i -= 1; jne(.DLOOPKLEFT) // iterate again if i != 0. label(.DPOSTACCUM) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) vmulpd(ymm0, ymm6, ymm6) vmulpd(ymm0, ymm7, ymm7) vmulpd(ymm0, ymm8, ymm8) vmulpd(ymm0, ymm9, ymm9) mov(var(cs_c), rsi) // load cs_c lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) //lea(mem(rcx, rsi, 4), rdx) // load address of c + 4*cs_c; lea(mem(rcx, rdi, 2), rdx) // load address of c + 2*rs_c; lea(mem(rsi, rsi, 2), rax) // rax = 3*cs_c; // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORED) // jump to column storage case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), ymm3, ymm5) vmovupd(ymm5, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm6) vmovupd(ymm6, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), ymm3, ymm7) vmovupd(ymm7, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm8) vmovupd(ymm8, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), ymm3, ymm9) vmovupd(ymm9, mem(rcx, rsi, 4)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORED) vunpcklpd(ymm6, ymm4, ymm0) vunpckhpd(ymm6, ymm4, ymm1) vunpcklpd(ymm10, ymm8, ymm2) vunpckhpd(ymm10, ymm8, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) vperm2f128(imm(0x31), ymm2, ymm0, ymm8) vperm2f128(imm(0x31), ymm3, ymm1, ymm10) vextractf128(imm(0x1), ymm4, xmm12) vextractf128(imm(0x1), ymm6, xmm13) vextractf128(imm(0x1), ymm8, xmm14) vextractf128(imm(0x1), ymm10, xmm15) vbroadcastsd(mem(rbx), ymm3) vfmadd231pd(mem(rcx), xmm3, xmm4) vfmadd231pd(mem(rcx, rsi, 1), xmm3, xmm6) vfmadd231pd(mem(rcx, rsi, 2), xmm3, xmm8) vfmadd231pd(mem(rcx, rax, 1), xmm3, xmm10) vmovupd(xmm4, mem(rcx)) vmovupd(xmm6, mem(rcx, rsi, 1)) vmovupd(xmm8, mem(rcx, rsi, 2)) vmovupd(xmm10, mem(rcx, rax, 1)) lea(mem(rcx, rsi, 4), rcx) vfmadd231sd(mem(rdx), xmm3, xmm12) vfmadd231sd(mem(rdx, rsi, 1), xmm3, xmm13) vfmadd231sd(mem(rdx, rsi, 2), xmm3, xmm14) vfmadd231sd(mem(rdx, rax, 1), xmm3, xmm15) vmovsd(xmm12, mem(rdx)) vmovsd(xmm13, mem(rdx, rsi, 1)) vmovsd(xmm14, mem(rdx, rsi, 2)) vmovsd(xmm15, mem(rdx, rax, 1)) lea(mem(rdx, rsi, 4), rdx) vunpcklpd(ymm7, ymm5, ymm0) vunpckhpd(ymm7, ymm5, ymm1) vunpcklpd(ymm11, ymm9, ymm2) vunpckhpd(ymm11, ymm9, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm5) vinsertf128(imm(0x1), xmm3, ymm1, ymm7) vperm2f128(imm(0x31), ymm2, ymm0, ymm9) vperm2f128(imm(0x31), ymm3, ymm1, ymm11) vextractf128(imm(0x1), ymm5, xmm12) vextractf128(imm(0x1), ymm7, xmm13) vextractf128(imm(0x1), ymm9, xmm14) vextractf128(imm(0x1), ymm11, xmm15) vbroadcastsd(mem(rbx), ymm3) vfmadd231pd(mem(rcx), xmm3, xmm5) vfmadd231pd(mem(rcx, rsi, 1), xmm3, xmm7) vfmadd231pd(mem(rcx, rsi, 2), xmm3, xmm9) vfmadd231pd(mem(rcx, rax, 1), xmm3, xmm11) vmovupd(xmm5, mem(rcx)) vmovupd(xmm7, mem(rcx, rsi, 1)) vmovupd(xmm9, mem(rcx, rsi, 2)) vmovupd(xmm11, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) vfmadd231sd(mem(rdx), xmm3, xmm12) vfmadd231sd(mem(rdx, rsi, 1), xmm3, xmm13) vfmadd231sd(mem(rdx, rsi, 2), xmm3, xmm14) vfmadd231sd(mem(rdx, rax, 1), xmm3, xmm15) vmovsd(xmm12, mem(rdx)) vmovsd(xmm13, mem(rdx, rsi, 1)) vmovsd(xmm14, mem(rdx, rsi, 2)) vmovsd(xmm15, mem(rdx, rax, 1)) //lea(mem(rdx, rsi, 4), rdx) jmp(.DDONE) // jump to end. label(.DBETAZERO) cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORBZ) // jump to column storage case label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) vmovupd(ymm5, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm6, mem(rcx)) vmovupd(ymm7, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm8, mem(rcx)) vmovupd(ymm9, mem(rcx, rsi, 4)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORBZ) vunpcklpd(ymm6, ymm4, ymm0) vunpckhpd(ymm6, ymm4, ymm1) vunpcklpd(ymm10, ymm8, ymm2) vunpckhpd(ymm10, ymm8, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) vperm2f128(imm(0x31), ymm2, ymm0, ymm8) vperm2f128(imm(0x31), ymm3, ymm1, ymm10) vextractf128(imm(0x1), ymm4, xmm12) vextractf128(imm(0x1), ymm6, xmm13) vextractf128(imm(0x1), ymm8, xmm14) vextractf128(imm(0x1), ymm10, xmm15) vmovupd(xmm4, mem(rcx)) vmovupd(xmm6, mem(rcx, rsi, 1)) vmovupd(xmm8, mem(rcx, rsi, 2)) vmovupd(xmm10, mem(rcx, rax, 1)) lea(mem(rcx, rsi, 4), rcx) vmovsd(xmm12, mem(rdx)) vmovsd(xmm13, mem(rdx, rsi, 1)) vmovsd(xmm14, mem(rdx, rsi, 2)) vmovsd(xmm15, mem(rdx, rax, 1)) lea(mem(rdx, rsi, 4), rdx) vunpcklpd(ymm7, ymm5, ymm0) vunpckhpd(ymm7, ymm5, ymm1) vunpcklpd(ymm11, ymm9, ymm2) vunpckhpd(ymm11, ymm9, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm5) vinsertf128(imm(0x1), xmm3, ymm1, ymm7) vperm2f128(imm(0x31), ymm2, ymm0, ymm9) vperm2f128(imm(0x31), ymm3, ymm1, ymm11) vextractf128(imm(0x1), ymm5, xmm12) vextractf128(imm(0x1), ymm7, xmm13) vextractf128(imm(0x1), ymm9, xmm14) vextractf128(imm(0x1), ymm11, xmm15) vmovupd(xmm5, mem(rcx)) vmovupd(xmm7, mem(rcx, rsi, 1)) vmovupd(xmm9, mem(rcx, rsi, 2)) vmovupd(xmm11, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) vmovsd(xmm12, mem(rdx)) vmovsd(xmm13, mem(rdx, rsi, 1)) vmovsd(xmm14, mem(rdx, rsi, 2)) vmovsd(xmm15, mem(rdx, rax, 1)) //lea(mem(rdx, rsi, 4), rdx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter] "m" (k_iter), [k_left] "m" (k_left), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rv_haswell_asm_2x8 ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() vzeroall() // zero all xmm/ymm registers. mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. mov(var(rs_b), r10) // load rs_b //mov(var(cs_b), r11) // load cs_b lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) //lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) // NOTE: We cannot pre-load elements of a or b // because it could eventually, in the last // unrolled iter or the cleanup loop, result // in reading beyond the bounds allocated mem // (the likely result: a segmentation fault). mov(var(c), rcx) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) #if 0 //lea(mem(rcx, rdi, 2), rdx) // //lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 7*8)) // prefetch c + 1*rs_c #else cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLPFETCH) // jump to column storage case label(.DROWPFETCH) // row-stored prefetching on c //lea(mem(rcx, rdi, 2), rdx) // //lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 7*8)) // prefetch c + 1*rs_c jmp(.DPOSTPFETCH) // jump to end of prefetching c label(.DCOLPFETCH) // column-stored prefetching c mov(var(cs_c), rsi) // load cs_c to rsi (temporarily) lea(mem(, rsi, 8), rsi) // cs_c *= sizeof(double) lea(mem(rcx, rsi, 2), rdx) // lea(mem(rdx, rsi, 1), rdx) // rdx = c + 3*cs_c; prefetch(0, mem(rcx, 1*8)) // prefetch c + 0*cs_c prefetch(0, mem(rcx, rsi, 1, 1*8)) // prefetch c + 1*cs_c prefetch(0, mem(rcx, rsi, 2, 1*8)) // prefetch c + 2*cs_c prefetch(0, mem(rdx, 1*8)) // prefetch c + 3*cs_c prefetch(0, mem(rdx, rsi, 1, 1*8)) // prefetch c + 4*cs_c prefetch(0, mem(rdx, rsi, 2, 1*8)) // prefetch c + 5*cs_c lea(mem(rdx, rsi, 2), rdx) // rdx = c + 5*cs_c; prefetch(0, mem(rdx, rsi, 1, 1*8)) // prefetch c + 6*cs_c prefetch(0, mem(rdx, rsi, 2, 1*8)) // prefetch c + 7*cs_c label(.DPOSTPFETCH) // done prefetching c #endif mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.DLOOPKITER) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) // ---------------------------------- iteration 1 vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) // ---------------------------------- iteration 2 vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) // ---------------------------------- iteration 3 vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) dec(rsi) // i -= 1; jne(.DLOOPKITER) // iterate again if i != 0. label(.DCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.DLOOPKLEFT) // EDGE LOOP vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) dec(rsi) // i -= 1; jne(.DLOOPKLEFT) // iterate again if i != 0. label(.DPOSTACCUM) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) vmulpd(ymm0, ymm6, ymm6) vmulpd(ymm0, ymm7, ymm7) mov(var(cs_c), rsi) // load cs_c lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) //lea(mem(rcx, rsi, 4), rdx) // load address of c + 4*cs_c; //lea(mem(rcx, rdi, 4), rdx) // load address of c + 4*rs_c; lea(mem(rsi, rsi, 2), rax) // rax = 3*cs_c; // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORED) // jump to column storage case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), ymm3, ymm5) vmovupd(ymm5, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm6) vmovupd(ymm6, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), ymm3, ymm7) vmovupd(ymm7, mem(rcx, rsi, 4)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORED) vunpcklpd(ymm6, ymm4, ymm0) vunpckhpd(ymm6, ymm4, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm4) vfmadd231pd(mem(rcx), xmm3, xmm0) vfmadd231pd(mem(rcx, rsi, 1), xmm3, xmm1) vfmadd231pd(mem(rcx, rsi, 2), xmm3, xmm2) vfmadd231pd(mem(rcx, rax, 1), xmm3, xmm4) vmovupd(xmm0, mem(rcx)) vmovupd(xmm1, mem(rcx, rsi, 1)) vmovupd(xmm2, mem(rcx, rsi, 2)) vmovupd(xmm4, mem(rcx, rax, 1)) lea(mem(rcx, rsi, 4), rcx) vunpcklpd(ymm7, ymm5, ymm0) vunpckhpd(ymm7, ymm5, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm4) vfmadd231pd(mem(rcx), xmm3, xmm0) vfmadd231pd(mem(rcx, rsi, 1), xmm3, xmm1) vfmadd231pd(mem(rcx, rsi, 2), xmm3, xmm2) vfmadd231pd(mem(rcx, rax, 1), xmm3, xmm4) vmovupd(xmm0, mem(rcx)) vmovupd(xmm1, mem(rcx, rsi, 1)) vmovupd(xmm2, mem(rcx, rsi, 2)) vmovupd(xmm4, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORBZ) // jump to column storage case label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) vmovupd(ymm5, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm6, mem(rcx)) vmovupd(ymm7, mem(rcx, rsi, 4)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORBZ) vunpcklpd(ymm6, ymm4, ymm0) vunpckhpd(ymm6, ymm4, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm4) vmovupd(xmm0, mem(rcx)) vmovupd(xmm1, mem(rcx, rsi, 1)) vmovupd(xmm2, mem(rcx, rsi, 2)) vmovupd(xmm4, mem(rcx, rax, 1)) lea(mem(rcx, rsi, 4), rcx) vunpcklpd(ymm7, ymm5, ymm0) vunpckhpd(ymm7, ymm5, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm4) vmovupd(xmm0, mem(rcx)) vmovupd(xmm1, mem(rcx, rsi, 1)) vmovupd(xmm2, mem(rcx, rsi, 2)) vmovupd(xmm4, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter] "m" (k_iter), [k_left] "m" (k_left), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rv_haswell_asm_1x8 ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() vzeroall() // zero all xmm/ymm registers. mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. mov(var(rs_b), r10) // load rs_b //mov(var(cs_b), r11) // load cs_b lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) //lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) // NOTE: We cannot pre-load elements of a or b // because it could eventually, in the last // unrolled iter or the cleanup loop, result // in reading beyond the bounds allocated mem // (the likely result: a segmentation fault). mov(var(c), rcx) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) #if 0 //lea(mem(rcx, rdi, 2), rdx) // //lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c #else cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLPFETCH) // jump to column storage case label(.DROWPFETCH) // row-stored prefetching on c //lea(mem(rcx, rdi, 2), rdx) // //lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c jmp(.DPOSTPFETCH) // jump to end of prefetching c label(.DCOLPFETCH) // column-stored prefetching c mov(var(cs_c), rsi) // load cs_c to rsi (temporarily) lea(mem(, rsi, 8), rsi) // cs_c *= sizeof(double) lea(mem(rcx, rsi, 2), rdx) // lea(mem(rdx, rsi, 1), rdx) // rdx = c + 3*cs_c; prefetch(0, mem(rcx, 0*8)) // prefetch c + 0*cs_c prefetch(0, mem(rcx, rsi, 1, 0*8)) // prefetch c + 1*cs_c prefetch(0, mem(rcx, rsi, 2, 0*8)) // prefetch c + 2*cs_c prefetch(0, mem(rdx, 0*8)) // prefetch c + 3*cs_c prefetch(0, mem(rdx, rsi, 1, 0*8)) // prefetch c + 4*cs_c prefetch(0, mem(rdx, rsi, 2, 0*8)) // prefetch c + 5*cs_c lea(mem(rdx, rsi, 2), rdx) // rdx = c + 5*cs_c; prefetch(0, mem(rdx, rsi, 1, 0*8)) // prefetch c + 6*cs_c prefetch(0, mem(rdx, rsi, 2, 0*8)) // prefetch c + 7*cs_c label(.DPOSTPFETCH) // done prefetching c #endif mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.DLOOPKITER) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) // ---------------------------------- iteration 1 vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) // ---------------------------------- iteration 2 vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) // ---------------------------------- iteration 3 vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) dec(rsi) // i -= 1; jne(.DLOOPKITER) // iterate again if i != 0. label(.DCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.DLOOPKLEFT) // EDGE LOOP vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) dec(rsi) // i -= 1; jne(.DLOOPKLEFT) // iterate again if i != 0. label(.DPOSTACCUM) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) mov(var(cs_c), rsi) // load cs_c lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) //lea(mem(rcx, rsi, 4), rdx) // load address of c + 4*cs_c; //lea(mem(rcx, rdi, 4), rdx) // load address of c + 4*rs_c; lea(mem(rsi, rsi, 2), rax) // rax = 3*cs_c; // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORED) // jump to column storage case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), ymm3, ymm5) vmovupd(ymm5, mem(rcx, rsi, 4)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORED) vmovlpd(mem(rcx), xmm0, xmm0) vmovhpd(mem(rcx, rsi, 1), xmm0, xmm0) vmovlpd(mem(rcx, rsi, 2), xmm1, xmm1) vmovhpd(mem(rcx, rax, 1), xmm1, xmm1) vperm2f128(imm(0x20), ymm1, ymm0, ymm0) vfmadd213pd(ymm4, ymm3, ymm0) vextractf128(imm(1), ymm0, xmm1) vmovlpd(xmm0, mem(rcx)) vmovhpd(xmm0, mem(rcx, rsi, 1)) vmovlpd(xmm1, mem(rcx, rsi, 2)) vmovhpd(xmm1, mem(rcx, rax, 1)) lea(mem(rcx, rsi, 4), rcx) vmovlpd(mem(rcx), xmm0, xmm0) vmovhpd(mem(rcx, rsi, 1), xmm0, xmm0) vmovlpd(mem(rcx, rsi, 2), xmm1, xmm1) vmovhpd(mem(rcx, rax, 1), xmm1, xmm1) vperm2f128(imm(0x20), ymm1, ymm0, ymm0) vfmadd213pd(ymm5, ymm3, ymm0) vextractf128(imm(1), ymm0, xmm1) vmovlpd(xmm0, mem(rcx)) vmovhpd(xmm0, mem(rcx, rsi, 1)) vmovlpd(xmm1, mem(rcx, rsi, 2)) vmovhpd(xmm1, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORBZ) // jump to column storage case label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) vmovupd(ymm5, mem(rcx, rsi, 4)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORBZ) vmovupd(ymm4, ymm0) vextractf128(imm(1), ymm0, xmm1) vmovlpd(xmm0, mem(rcx)) vmovhpd(xmm0, mem(rcx, rsi, 1)) vmovlpd(xmm1, mem(rcx, rsi, 2)) vmovhpd(xmm1, mem(rcx, rax, 1)) lea(mem(rcx, rsi, 4), rcx) vmovupd(ymm5, ymm0) vextractf128(imm(1), ymm0, xmm1) vmovlpd(xmm0, mem(rcx)) vmovhpd(xmm0, mem(rcx, rsi, 1)) vmovlpd(xmm1, mem(rcx, rsi, 2)) vmovhpd(xmm1, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter] "m" (k_iter), [k_left] "m" (k_left), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rv_haswell_asm_6x6 ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() vzeroall() // zero all xmm/ymm registers. mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. mov(var(rs_b), r10) // load rs_b //mov(var(cs_b), r11) // load cs_b lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) //lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) // NOTE: We cannot pre-load elements of a or b // because it could eventually, in the last // unrolled iter or the cleanup loop, result // in reading beyond the bounds allocated mem // (the likely result: a segmentation fault). mov(var(c), rcx) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLPFETCH) // jump to column storage case label(.DROWPFETCH) // row-stored prefetching on c lea(mem(rcx, rdi, 2), rdx) // lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 5*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 5*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 5*8)) // prefetch c + 2*rs_c prefetch(0, mem(rdx, 5*8)) // prefetch c + 3*rs_c prefetch(0, mem(rdx, rdi, 1, 5*8)) // prefetch c + 4*rs_c prefetch(0, mem(rdx, rdi, 2, 5*8)) // prefetch c + 5*rs_c jmp(.DPOSTPFETCH) // jump to end of prefetching c label(.DCOLPFETCH) // column-stored prefetching c mov(var(cs_c), rsi) // load cs_c to rsi (temporarily) lea(mem(, rsi, 8), rsi) // cs_c *= sizeof(double) lea(mem(rcx, rsi, 2), rdx) // lea(mem(rdx, rsi, 1), rdx) // rdx = c + 3*cs_c; prefetch(0, mem(rcx, 5*8)) // prefetch c + 0*cs_c prefetch(0, mem(rcx, rsi, 1, 5*8)) // prefetch c + 1*cs_c prefetch(0, mem(rcx, rsi, 2, 5*8)) // prefetch c + 2*cs_c prefetch(0, mem(rdx, 5*8)) // prefetch c + 3*cs_c prefetch(0, mem(rdx, rsi, 1, 5*8)) // prefetch c + 4*cs_c prefetch(0, mem(rdx, rsi, 2, 5*8)) // prefetch c + 5*cs_c label(.DPOSTPFETCH) // done prefetching c mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.DLOOPKITER) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), xmm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, r8, 4), ymm2) vbroadcastsd(mem(rax, r15, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) // ---------------------------------- iteration 1 vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), xmm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, r8, 4), ymm2) vbroadcastsd(mem(rax, r15, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) // ---------------------------------- iteration 2 vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), xmm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, r8, 4), ymm2) vbroadcastsd(mem(rax, r15, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) // ---------------------------------- iteration 3 vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), xmm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, r8, 4), ymm2) vbroadcastsd(mem(rax, r15, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER) // iterate again if i != 0. label(.DCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.DLOOPKLEFT) // EDGE LOOP vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), xmm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, r8, 4), ymm2) vbroadcastsd(mem(rax, r15, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKLEFT) // iterate again if i != 0. label(.DPOSTACCUM) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(xmm0, xmm5, xmm5) vmulpd(ymm0, ymm6, ymm6) vmulpd(xmm0, xmm7, xmm7) vmulpd(ymm0, ymm8, ymm8) vmulpd(xmm0, xmm9, xmm9) vmulpd(ymm0, ymm10, ymm10) vmulpd(xmm0, xmm11, xmm11) vmulpd(ymm0, ymm12, ymm12) vmulpd(xmm0, xmm13, xmm13) vmulpd(ymm0, ymm14, ymm14) vmulpd(xmm0, xmm15, xmm15) mov(var(cs_c), rsi) // load cs_c lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) //lea(mem(rcx, rsi, 4), rdx) // load address of c + 4*cs_c; lea(mem(rcx, rdi, 4), rdx) // load address of c + 4*rs_c; lea(mem(rsi, rsi, 2), rax) // rax = 3*cs_c; // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORED) // jump to column storage case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), xmm3, xmm5) vmovupd(xmm5, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm6) vmovupd(ymm6, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), xmm3, xmm7) vmovupd(xmm7, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm8) vmovupd(ymm8, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), xmm3, xmm9) vmovupd(xmm9, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm10) vmovupd(ymm10, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), xmm3, xmm11) vmovupd(xmm11, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm12) vmovupd(ymm12, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), xmm3, xmm13) vmovupd(xmm13, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm14) vmovupd(ymm14, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), xmm3, xmm15) vmovupd(xmm15, mem(rcx, rsi, 4)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORED) vunpcklpd(ymm6, ymm4, ymm0) vunpckhpd(ymm6, ymm4, ymm1) vunpcklpd(ymm10, ymm8, ymm2) vunpckhpd(ymm10, ymm8, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) vperm2f128(imm(0x31), ymm2, ymm0, ymm8) vperm2f128(imm(0x31), ymm3, ymm1, ymm10) vbroadcastsd(mem(rbx), ymm3) vfmadd231pd(mem(rcx), ymm3, ymm4) vfmadd231pd(mem(rcx, rsi, 1), ymm3, ymm6) vfmadd231pd(mem(rcx, rsi, 2), ymm3, ymm8) vfmadd231pd(mem(rcx, rax, 1), ymm3, ymm10) vmovupd(ymm4, mem(rcx)) vmovupd(ymm6, mem(rcx, rsi, 1)) vmovupd(ymm8, mem(rcx, rsi, 2)) vmovupd(ymm10, mem(rcx, rax, 1)) lea(mem(rcx, rsi, 4), rcx) vunpcklpd(ymm14, ymm12, ymm0) vunpckhpd(ymm14, ymm12, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm4) vfmadd231pd(mem(rdx), xmm3, xmm0) vfmadd231pd(mem(rdx, rsi, 1), xmm3, xmm1) vfmadd231pd(mem(rdx, rsi, 2), xmm3, xmm2) vfmadd231pd(mem(rdx, rax, 1), xmm3, xmm4) vmovupd(xmm0, mem(rdx)) vmovupd(xmm1, mem(rdx, rsi, 1)) vmovupd(xmm2, mem(rdx, rsi, 2)) vmovupd(xmm4, mem(rdx, rax, 1)) lea(mem(rdx, rsi, 4), rdx) vunpcklpd(ymm7, ymm5, ymm0) vunpckhpd(ymm7, ymm5, ymm1) vunpcklpd(ymm11, ymm9, ymm2) vunpckhpd(ymm11, ymm9, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm5) vinsertf128(imm(0x1), xmm3, ymm1, ymm7) //vperm2f128(imm(0x31), ymm2, ymm0, ymm9) //vperm2f128(imm(0x31), ymm3, ymm1, ymm11) vbroadcastsd(mem(rbx), ymm3) vfmadd231pd(mem(rcx), ymm3, ymm5) vfmadd231pd(mem(rcx, rsi, 1), ymm3, ymm7) //vfmadd231pd(mem(rcx, rsi, 2), ymm3, ymm9) //vfmadd231pd(mem(rcx, rax, 1), ymm3, ymm11) vmovupd(ymm5, mem(rcx)) vmovupd(ymm7, mem(rcx, rsi, 1)) //vmovupd(ymm9, mem(rcx, rsi, 2)) //vmovupd(ymm11, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) vunpcklpd(ymm15, ymm13, ymm0) vunpckhpd(ymm15, ymm13, ymm1) //vextractf128(imm(0x1), ymm0, xmm2) //vextractf128(imm(0x1), ymm1, xmm4) vfmadd231pd(mem(rdx), xmm3, xmm0) vfmadd231pd(mem(rdx, rsi, 1), xmm3, xmm1) //vfmadd231pd(mem(rdx, rsi, 2), xmm3, xmm2) //vfmadd231pd(mem(rdx, rax, 1), xmm3, xmm4) vmovupd(xmm0, mem(rdx)) vmovupd(xmm1, mem(rdx, rsi, 1)) //vmovupd(xmm2, mem(rdx, rsi, 2)) //vmovupd(xmm4, mem(rdx, rax, 1)) //lea(mem(rdx, rsi, 4), rdx) jmp(.DDONE) // jump to end. label(.DBETAZERO) cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORBZ) // jump to column storage case label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) vmovupd(xmm5, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm6, mem(rcx)) vmovupd(xmm7, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm8, mem(rcx)) vmovupd(xmm9, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm10, mem(rcx)) vmovupd(xmm11, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm12, mem(rcx)) vmovupd(xmm13, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm14, mem(rcx)) vmovupd(xmm15, mem(rcx, rsi, 4)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORBZ) vunpcklpd(ymm6, ymm4, ymm0) vunpckhpd(ymm6, ymm4, ymm1) vunpcklpd(ymm10, ymm8, ymm2) vunpckhpd(ymm10, ymm8, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) vperm2f128(imm(0x31), ymm2, ymm0, ymm8) vperm2f128(imm(0x31), ymm3, ymm1, ymm10) vmovupd(ymm4, mem(rcx)) vmovupd(ymm6, mem(rcx, rsi, 1)) vmovupd(ymm8, mem(rcx, rsi, 2)) vmovupd(ymm10, mem(rcx, rax, 1)) lea(mem(rcx, rsi, 4), rcx) vunpcklpd(ymm14, ymm12, ymm0) vunpckhpd(ymm14, ymm12, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm4) vmovupd(xmm0, mem(rdx)) vmovupd(xmm1, mem(rdx, rsi, 1)) vmovupd(xmm2, mem(rdx, rsi, 2)) vmovupd(xmm4, mem(rdx, rax, 1)) lea(mem(rdx, rsi, 4), rdx) vunpcklpd(ymm7, ymm5, ymm0) vunpckhpd(ymm7, ymm5, ymm1) vunpcklpd(ymm11, ymm9, ymm2) vunpckhpd(ymm11, ymm9, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm5) vinsertf128(imm(0x1), xmm3, ymm1, ymm7) //vperm2f128(imm(0x31), ymm2, ymm0, ymm9) //vperm2f128(imm(0x31), ymm3, ymm1, ymm11) vmovupd(ymm5, mem(rcx)) vmovupd(ymm7, mem(rcx, rsi, 1)) //vmovupd(ymm9, mem(rcx, rsi, 2)) //vmovupd(ymm11, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) vunpcklpd(ymm15, ymm13, ymm0) vunpckhpd(ymm15, ymm13, ymm1) //vextractf128(imm(0x1), ymm0, xmm2) //vextractf128(imm(0x1), ymm1, xmm4) vmovupd(xmm0, mem(rdx)) vmovupd(xmm1, mem(rdx, rsi, 1)) //vmovupd(xmm2, mem(rdx, rsi, 2)) //vmovupd(xmm4, mem(rdx, rax, 1)) //lea(mem(rdx, rsi, 4), rdx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter] "m" (k_iter), [k_left] "m" (k_left), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rv_haswell_asm_5x6 ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() vzeroall() // zero all xmm/ymm registers. mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. mov(var(rs_b), r10) // load rs_b //mov(var(cs_b), r11) // load cs_b lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) //lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) // NOTE: We cannot pre-load elements of a or b // because it could eventually, in the last // unrolled iter or the cleanup loop, result // in reading beyond the bounds allocated mem // (the likely result: a segmentation fault). mov(var(c), rcx) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLPFETCH) // jump to column storage case label(.DROWPFETCH) // row-stored prefetching on c lea(mem(rcx, rdi, 2), rdx) // lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 5*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 5*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 5*8)) // prefetch c + 2*rs_c prefetch(0, mem(rdx, 5*8)) // prefetch c + 3*rs_c prefetch(0, mem(rdx, rdi, 1, 5*8)) // prefetch c + 4*rs_c jmp(.DPOSTPFETCH) // jump to end of prefetching c label(.DCOLPFETCH) // column-stored prefetching c mov(var(cs_c), rsi) // load cs_c to rsi (temporarily) lea(mem(, rsi, 8), rsi) // cs_c *= sizeof(double) lea(mem(rcx, rsi, 2), rdx) // lea(mem(rdx, rsi, 1), rdx) // rdx = c + 3*cs_c; prefetch(0, mem(rcx, 4*8)) // prefetch c + 0*cs_c prefetch(0, mem(rcx, rsi, 1, 4*8)) // prefetch c + 1*cs_c prefetch(0, mem(rcx, rsi, 2, 4*8)) // prefetch c + 2*cs_c prefetch(0, mem(rdx, 4*8)) // prefetch c + 3*cs_c prefetch(0, mem(rdx, rsi, 1, 4*8)) // prefetch c + 4*cs_c prefetch(0, mem(rdx, rsi, 2, 4*8)) // prefetch c + 5*cs_c label(.DPOSTPFETCH) // done prefetching c mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.DLOOPKITER) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), xmm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, r8, 4), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) // ---------------------------------- iteration 1 vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), xmm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, r8, 4), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) // ---------------------------------- iteration 2 vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), xmm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, r8, 4), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) // ---------------------------------- iteration 3 vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), xmm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, r8, 4), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) dec(rsi) // i -= 1; jne(.DLOOPKITER) // iterate again if i != 0. label(.DCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.DLOOPKLEFT) // EDGE LOOP vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), xmm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, r8, 4), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) dec(rsi) // i -= 1; jne(.DLOOPKLEFT) // iterate again if i != 0. label(.DPOSTACCUM) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(xmm0, xmm5, xmm5) vmulpd(ymm0, ymm6, ymm6) vmulpd(xmm0, xmm7, xmm7) vmulpd(ymm0, ymm8, ymm8) vmulpd(xmm0, xmm9, xmm9) vmulpd(ymm0, ymm10, ymm10) vmulpd(xmm0, xmm11, xmm11) vmulpd(ymm0, ymm12, ymm12) vmulpd(xmm0, xmm13, xmm13) mov(var(cs_c), rsi) // load cs_c lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) //lea(mem(rcx, rsi, 4), rdx) // load address of c + 4*cs_c; lea(mem(rcx, rdi, 4), rdx) // load address of c + 4*rs_c; lea(mem(rsi, rsi, 2), rax) // rax = 3*cs_c; // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORED) // jump to column storage case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), xmm3, xmm5) vmovupd(xmm5, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm6) vmovupd(ymm6, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), xmm3, xmm7) vmovupd(xmm7, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm8) vmovupd(ymm8, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), xmm3, xmm9) vmovupd(xmm9, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm10) vmovupd(ymm10, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), xmm3, xmm11) vmovupd(xmm11, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm12) vmovupd(ymm12, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), xmm3, xmm13) vmovupd(xmm13, mem(rcx, rsi, 4)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORED) vunpcklpd(ymm6, ymm4, ymm0) vunpckhpd(ymm6, ymm4, ymm1) vunpcklpd(ymm10, ymm8, ymm2) vunpckhpd(ymm10, ymm8, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) vperm2f128(imm(0x31), ymm2, ymm0, ymm8) vperm2f128(imm(0x31), ymm3, ymm1, ymm10) vbroadcastsd(mem(rbx), ymm3) vfmadd231pd(mem(rcx), ymm3, ymm4) vfmadd231pd(mem(rcx, rsi, 1), ymm3, ymm6) vfmadd231pd(mem(rcx, rsi, 2), ymm3, ymm8) vfmadd231pd(mem(rcx, rax, 1), ymm3, ymm10) vmovupd(ymm4, mem(rcx)) vmovupd(ymm6, mem(rcx, rsi, 1)) vmovupd(ymm8, mem(rcx, rsi, 2)) vmovupd(ymm10, mem(rcx, rax, 1)) lea(mem(rcx, rsi, 4), rcx) #if 0 vunpcklpd(ymm14, ymm12, ymm0) vunpckhpd(ymm14, ymm12, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm4) vfmadd231pd(mem(rdx), xmm3, xmm0) vfmadd231pd(mem(rdx, rsi, 1), xmm3, xmm1) vfmadd231pd(mem(rdx, rsi, 2), xmm3, xmm2) vfmadd231pd(mem(rdx, rax, 1), xmm3, xmm4) vmovupd(xmm0, mem(rdx)) vmovupd(xmm1, mem(rdx, rsi, 1)) vmovupd(xmm2, mem(rdx, rsi, 2)) vmovupd(xmm4, mem(rdx, rax, 1)) #else vmovlpd(mem(rdx), xmm0, xmm0) vmovhpd(mem(rdx, rsi, 1), xmm0, xmm0) vmovlpd(mem(rdx, rsi, 2), xmm1, xmm1) vmovhpd(mem(rdx, rax, 1), xmm1, xmm1) vperm2f128(imm(0x20), ymm1, ymm0, ymm0) vfmadd213pd(ymm12, ymm3, ymm0) vextractf128(imm(1), ymm0, xmm1) vmovlpd(xmm0, mem(rdx)) vmovhpd(xmm0, mem(rdx, rsi, 1)) vmovlpd(xmm1, mem(rdx, rsi, 2)) vmovhpd(xmm1, mem(rdx, rax, 1)) #endif lea(mem(rdx, rsi, 4), rdx) vunpcklpd(ymm7, ymm5, ymm0) vunpckhpd(ymm7, ymm5, ymm1) vunpcklpd(ymm11, ymm9, ymm2) vunpckhpd(ymm11, ymm9, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm5) vinsertf128(imm(0x1), xmm3, ymm1, ymm7) //vperm2f128(imm(0x31), ymm2, ymm0, ymm9) //vperm2f128(imm(0x31), ymm3, ymm1, ymm11) vbroadcastsd(mem(rbx), ymm3) vfmadd231pd(mem(rcx), ymm3, ymm5) vfmadd231pd(mem(rcx, rsi, 1), ymm3, ymm7) //vfmadd231pd(mem(rcx, rsi, 2), ymm3, ymm9) //vfmadd231pd(mem(rcx, rax, 1), ymm3, ymm11) vmovupd(ymm5, mem(rcx)) vmovupd(ymm7, mem(rcx, rsi, 1)) //vmovupd(ymm9, mem(rcx, rsi, 2)) //vmovupd(ymm11, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) #if 0 vunpcklpd(ymm15, ymm13, ymm0) vunpckhpd(ymm15, ymm13, ymm1) //vextractf128(imm(0x1), ymm0, xmm2) //vextractf128(imm(0x1), ymm1, xmm4) vfmadd231pd(mem(rdx), xmm3, xmm0) vfmadd231pd(mem(rdx, rsi, 1), xmm3, xmm1) //vfmadd231pd(mem(rdx, rsi, 2), xmm3, xmm2) //vfmadd231pd(mem(rdx, rax, 1), xmm3, xmm4) vmovupd(xmm0, mem(rdx)) vmovupd(xmm1, mem(rdx, rsi, 1)) //vmovupd(xmm2, mem(rdx, rsi, 2)) //vmovupd(xmm4, mem(rdx, rax, 1)) #else vmovlpd(mem(rdx), xmm0, xmm0) vmovhpd(mem(rdx, rsi, 1), xmm0, xmm0) //vmovlpd(mem(rdx, rsi, 2), xmm1, xmm1) //vmovhpd(mem(rdx, rax, 1), xmm1, xmm1) //vperm2f128(imm(0x20), ymm1, ymm0, ymm0) vfmadd213pd(xmm13, xmm3, xmm0) //vextractf128(imm(1), ymm0, xmm1) vmovlpd(xmm0, mem(rdx)) vmovhpd(xmm0, mem(rdx, rsi, 1)) //vmovlpd(xmm1, mem(rdx, rsi, 2)) //vmovhpd(xmm1, mem(rdx, rax, 1)) #endif //lea(mem(rdx, rsi, 4), rdx) jmp(.DDONE) // jump to end. label(.DBETAZERO) cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORBZ) // jump to column storage case label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) vmovupd(xmm5, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm6, mem(rcx)) vmovupd(xmm7, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm8, mem(rcx)) vmovupd(xmm9, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm10, mem(rcx)) vmovupd(xmm11, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm12, mem(rcx)) vmovupd(xmm13, mem(rcx, rsi, 4)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORBZ) vunpcklpd(ymm6, ymm4, ymm0) vunpckhpd(ymm6, ymm4, ymm1) vunpcklpd(ymm10, ymm8, ymm2) vunpckhpd(ymm10, ymm8, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) vperm2f128(imm(0x31), ymm2, ymm0, ymm8) vperm2f128(imm(0x31), ymm3, ymm1, ymm10) vmovupd(ymm4, mem(rcx)) vmovupd(ymm6, mem(rcx, rsi, 1)) vmovupd(ymm8, mem(rcx, rsi, 2)) vmovupd(ymm10, mem(rcx, rax, 1)) lea(mem(rcx, rsi, 4), rcx) #if 0 vunpcklpd(ymm14, ymm12, ymm0) vunpckhpd(ymm14, ymm12, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm4) vmovupd(xmm0, mem(rdx)) vmovupd(xmm1, mem(rdx, rsi, 1)) vmovupd(xmm2, mem(rdx, rsi, 2)) vmovupd(xmm4, mem(rdx, rax, 1)) #else vmovupd(ymm12, ymm0) vextractf128(imm(1), ymm0, xmm1) vmovlpd(xmm0, mem(rdx)) vmovhpd(xmm0, mem(rdx, rsi, 1)) vmovlpd(xmm1, mem(rdx, rsi, 2)) vmovhpd(xmm1, mem(rdx, rax, 1)) #endif lea(mem(rdx, rsi, 4), rdx) vunpcklpd(ymm7, ymm5, ymm0) vunpckhpd(ymm7, ymm5, ymm1) vunpcklpd(ymm11, ymm9, ymm2) vunpckhpd(ymm11, ymm9, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm5) vinsertf128(imm(0x1), xmm3, ymm1, ymm7) //vperm2f128(imm(0x31), ymm2, ymm0, ymm9) //vperm2f128(imm(0x31), ymm3, ymm1, ymm11) vmovupd(ymm5, mem(rcx)) vmovupd(ymm7, mem(rcx, rsi, 1)) //vmovupd(ymm9, mem(rcx, rsi, 2)) //vmovupd(ymm11, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) #if 0 vunpcklpd(ymm15, ymm13, ymm0) vunpckhpd(ymm15, ymm13, ymm1) //vextractf128(imm(0x1), ymm0, xmm2) //vextractf128(imm(0x1), ymm1, xmm4) vmovupd(xmm0, mem(rdx)) vmovupd(xmm1, mem(rdx, rsi, 1)) //vmovupd(xmm2, mem(rdx, rsi, 2)) //vmovupd(xmm4, mem(rdx, rax, 1)) #else vmovupd(ymm13, ymm0) //vextractf128(imm(1), ymm0, xmm1) vmovlpd(xmm0, mem(rdx)) vmovhpd(xmm0, mem(rdx, rsi, 1)) //vmovlpd(xmm1, mem(rdx, rsi, 2)) //vmovhpd(xmm1, mem(rdx, rax, 1)) #endif //lea(mem(rdx, rsi, 4), rdx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter] "m" (k_iter), [k_left] "m" (k_left), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rv_haswell_asm_4x6 ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() vzeroall() // zero all xmm/ymm registers. mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. mov(var(rs_b), r10) // load rs_b //mov(var(cs_b), r11) // load cs_b lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) //lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) // NOTE: We cannot pre-load elements of a or b // because it could eventually, in the last // unrolled iter or the cleanup loop, result // in reading beyond the bounds allocated mem // (the likely result: a segmentation fault). mov(var(c), rcx) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLPFETCH) // jump to column storage case label(.DROWPFETCH) // row-stored prefetching on c lea(mem(rcx, rdi, 2), rdx) // lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 5*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 5*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 5*8)) // prefetch c + 2*rs_c prefetch(0, mem(rdx, 5*8)) // prefetch c + 3*rs_c jmp(.DPOSTPFETCH) // jump to end of prefetching c label(.DCOLPFETCH) // column-stored prefetching c mov(var(cs_c), rsi) // load cs_c to rsi (temporarily) lea(mem(, rsi, 8), rsi) // cs_c *= sizeof(double) lea(mem(rcx, rsi, 2), rdx) // lea(mem(rdx, rsi, 1), rdx) // rdx = c + 3*cs_c; prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*cs_c prefetch(0, mem(rcx, rsi, 1, 3*8)) // prefetch c + 1*cs_c prefetch(0, mem(rcx, rsi, 2, 3*8)) // prefetch c + 2*cs_c prefetch(0, mem(rdx, 3*8)) // prefetch c + 3*cs_c prefetch(0, mem(rdx, rsi, 1, 3*8)) // prefetch c + 4*cs_c prefetch(0, mem(rdx, rsi, 2, 3*8)) // prefetch c + 5*cs_c label(.DPOSTPFETCH) // done prefetching c mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.DLOOPKITER) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), xmm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) // ---------------------------------- iteration 1 vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), xmm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) // ---------------------------------- iteration 2 vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), xmm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) // ---------------------------------- iteration 3 vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), xmm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) dec(rsi) // i -= 1; jne(.DLOOPKITER) // iterate again if i != 0. label(.DCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.DLOOPKLEFT) // EDGE LOOP vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), xmm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) dec(rsi) // i -= 1; jne(.DLOOPKLEFT) // iterate again if i != 0. label(.DPOSTACCUM) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(xmm0, xmm5, xmm5) vmulpd(ymm0, ymm6, ymm6) vmulpd(xmm0, xmm7, xmm7) vmulpd(ymm0, ymm8, ymm8) vmulpd(xmm0, xmm9, xmm9) vmulpd(ymm0, ymm10, ymm10) vmulpd(xmm0, xmm11, xmm11) mov(var(cs_c), rsi) // load cs_c lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) //lea(mem(rcx, rsi, 4), rdx) // load address of c + 4*cs_c; //lea(mem(rcx, rdi, 4), rdx) // load address of c + 4*rs_c; lea(mem(rsi, rsi, 2), rax) // rax = 3*cs_c; // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORED) // jump to column storage case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), xmm3, xmm5) vmovupd(xmm5, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm6) vmovupd(ymm6, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), xmm3, xmm7) vmovupd(xmm7, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm8) vmovupd(ymm8, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), xmm3, xmm9) vmovupd(xmm9, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm10) vmovupd(ymm10, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), xmm3, xmm11) vmovupd(xmm11, mem(rcx, rsi, 4)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORED) vunpcklpd(ymm6, ymm4, ymm0) vunpckhpd(ymm6, ymm4, ymm1) vunpcklpd(ymm10, ymm8, ymm2) vunpckhpd(ymm10, ymm8, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) vperm2f128(imm(0x31), ymm2, ymm0, ymm8) vperm2f128(imm(0x31), ymm3, ymm1, ymm10) vbroadcastsd(mem(rbx), ymm3) vfmadd231pd(mem(rcx), ymm3, ymm4) vfmadd231pd(mem(rcx, rsi, 1), ymm3, ymm6) vfmadd231pd(mem(rcx, rsi, 2), ymm3, ymm8) vfmadd231pd(mem(rcx, rax, 1), ymm3, ymm10) vmovupd(ymm4, mem(rcx)) vmovupd(ymm6, mem(rcx, rsi, 1)) vmovupd(ymm8, mem(rcx, rsi, 2)) vmovupd(ymm10, mem(rcx, rax, 1)) lea(mem(rcx, rsi, 4), rcx) vunpcklpd(ymm7, ymm5, ymm0) vunpckhpd(ymm7, ymm5, ymm1) vunpcklpd(ymm11, ymm9, ymm2) vunpckhpd(ymm11, ymm9, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm5) vinsertf128(imm(0x1), xmm3, ymm1, ymm7) //vperm2f128(imm(0x31), ymm2, ymm0, ymm9) //vperm2f128(imm(0x31), ymm3, ymm1, ymm11) vbroadcastsd(mem(rbx), ymm3) vfmadd231pd(mem(rcx), ymm3, ymm5) vfmadd231pd(mem(rcx, rsi, 1), ymm3, ymm7) //vfmadd231pd(mem(rcx, rsi, 2), ymm3, ymm9) //vfmadd231pd(mem(rcx, rax, 1), ymm3, ymm11) vmovupd(ymm5, mem(rcx)) vmovupd(ymm7, mem(rcx, rsi, 1)) //vmovupd(ymm9, mem(rcx, rsi, 2)) //vmovupd(ymm11, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORBZ) // jump to column storage case label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) vmovupd(xmm5, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm6, mem(rcx)) vmovupd(xmm7, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm8, mem(rcx)) vmovupd(xmm9, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm10, mem(rcx)) vmovupd(xmm11, mem(rcx, rsi, 4)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORBZ) vunpcklpd(ymm6, ymm4, ymm0) vunpckhpd(ymm6, ymm4, ymm1) vunpcklpd(ymm10, ymm8, ymm2) vunpckhpd(ymm10, ymm8, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) vperm2f128(imm(0x31), ymm2, ymm0, ymm8) vperm2f128(imm(0x31), ymm3, ymm1, ymm10) vmovupd(ymm4, mem(rcx)) vmovupd(ymm6, mem(rcx, rsi, 1)) vmovupd(ymm8, mem(rcx, rsi, 2)) vmovupd(ymm10, mem(rcx, rax, 1)) lea(mem(rcx, rsi, 4), rcx) vunpcklpd(ymm7, ymm5, ymm0) vunpckhpd(ymm7, ymm5, ymm1) vunpcklpd(ymm11, ymm9, ymm2) vunpckhpd(ymm11, ymm9, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm5) vinsertf128(imm(0x1), xmm3, ymm1, ymm7) //vperm2f128(imm(0x31), ymm2, ymm0, ymm9) //vperm2f128(imm(0x31), ymm3, ymm1, ymm11) vmovupd(ymm5, mem(rcx)) vmovupd(ymm7, mem(rcx, rsi, 1)) //vmovupd(ymm9, mem(rcx, rsi, 2)) //vmovupd(ymm11, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter] "m" (k_iter), [k_left] "m" (k_left), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rv_haswell_asm_3x6 ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() vzeroall() // zero all xmm/ymm registers. mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. mov(var(rs_b), r10) // load rs_b //mov(var(cs_b), r11) // load cs_b lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) //lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) // NOTE: We cannot pre-load elements of a or b // because it could eventually, in the last // unrolled iter or the cleanup loop, result // in reading beyond the bounds allocated mem // (the likely result: a segmentation fault). mov(var(c), rcx) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) #if 0 lea(mem(rcx, rdi, 2), rdx) // lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 7*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 7*8)) // prefetch c + 2*rs_c prefetch(0, mem(rdx, 7*8)) // prefetch c + 3*rs_c #else cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLPFETCH) // jump to column storage case label(.DROWPFETCH) // row-stored prefetching on c //lea(mem(rcx, rdi, 2), rdx) // //lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 5*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 5*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 5*8)) // prefetch c + 2*rs_c jmp(.DPOSTPFETCH) // jump to end of prefetching c label(.DCOLPFETCH) // column-stored prefetching c mov(var(cs_c), rsi) // load cs_c to rsi (temporarily) lea(mem(, rsi, 8), rsi) // cs_c *= sizeof(double) lea(mem(rcx, rsi, 2), rdx) // lea(mem(rdx, rsi, 1), rdx) // rdx = c + 3*cs_c; prefetch(0, mem(rcx, 2*8)) // prefetch c + 0*cs_c prefetch(0, mem(rcx, rsi, 1, 2*8)) // prefetch c + 1*cs_c prefetch(0, mem(rcx, rsi, 2, 2*8)) // prefetch c + 2*cs_c prefetch(0, mem(rdx, 2*8)) // prefetch c + 3*cs_c prefetch(0, mem(rdx, rsi, 1, 2*8)) // prefetch c + 4*cs_c prefetch(0, mem(rdx, rsi, 2, 2*8)) // prefetch c + 5*cs_c label(.DPOSTPFETCH) // done prefetching c #endif mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.DLOOPKITER) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), xmm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) // ---------------------------------- iteration 1 vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), xmm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) // ---------------------------------- iteration 2 vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), xmm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) // ---------------------------------- iteration 3 vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), xmm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) dec(rsi) // i -= 1; jne(.DLOOPKITER) // iterate again if i != 0. label(.DCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.DLOOPKLEFT) // EDGE LOOP vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), xmm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) dec(rsi) // i -= 1; jne(.DLOOPKLEFT) // iterate again if i != 0. label(.DPOSTACCUM) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(xmm0, xmm5, xmm5) vmulpd(ymm0, ymm6, ymm6) vmulpd(xmm0, xmm7, xmm7) vmulpd(ymm0, ymm8, ymm8) vmulpd(xmm0, xmm9, xmm9) mov(var(cs_c), rsi) // load cs_c lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) //lea(mem(rcx, rsi, 4), rdx) // load address of c + 4*cs_c; lea(mem(rcx, rdi, 2), rdx) // load address of c + 2*rs_c; lea(mem(rsi, rsi, 2), rax) // rax = 3*cs_c; // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORED) // jump to column storage case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), xmm3, xmm5) vmovupd(xmm5, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm6) vmovupd(ymm6, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), xmm3, xmm7) vmovupd(xmm7, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm8) vmovupd(ymm8, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), xmm3, xmm9) vmovupd(xmm9, mem(rcx, rsi, 4)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORED) vunpcklpd(ymm6, ymm4, ymm0) vunpckhpd(ymm6, ymm4, ymm1) vunpcklpd(ymm10, ymm8, ymm2) vunpckhpd(ymm10, ymm8, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) vperm2f128(imm(0x31), ymm2, ymm0, ymm8) vperm2f128(imm(0x31), ymm3, ymm1, ymm10) vextractf128(imm(0x1), ymm4, xmm12) vextractf128(imm(0x1), ymm6, xmm13) vextractf128(imm(0x1), ymm8, xmm14) vextractf128(imm(0x1), ymm10, xmm15) vbroadcastsd(mem(rbx), ymm3) vfmadd231pd(mem(rcx), xmm3, xmm4) vfmadd231pd(mem(rcx, rsi, 1), xmm3, xmm6) vfmadd231pd(mem(rcx, rsi, 2), xmm3, xmm8) vfmadd231pd(mem(rcx, rax, 1), xmm3, xmm10) vmovupd(xmm4, mem(rcx)) vmovupd(xmm6, mem(rcx, rsi, 1)) vmovupd(xmm8, mem(rcx, rsi, 2)) vmovupd(xmm10, mem(rcx, rax, 1)) lea(mem(rcx, rsi, 4), rcx) vfmadd231sd(mem(rdx), xmm3, xmm12) vfmadd231sd(mem(rdx, rsi, 1), xmm3, xmm13) vfmadd231sd(mem(rdx, rsi, 2), xmm3, xmm14) vfmadd231sd(mem(rdx, rax, 1), xmm3, xmm15) vmovsd(xmm12, mem(rdx)) vmovsd(xmm13, mem(rdx, rsi, 1)) vmovsd(xmm14, mem(rdx, rsi, 2)) vmovsd(xmm15, mem(rdx, rax, 1)) lea(mem(rdx, rsi, 4), rdx) vunpcklpd(ymm7, ymm5, ymm0) vunpckhpd(ymm7, ymm5, ymm1) vunpcklpd(ymm11, ymm9, ymm2) vunpckhpd(ymm11, ymm9, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm5) vinsertf128(imm(0x1), xmm3, ymm1, ymm7) vperm2f128(imm(0x31), ymm2, ymm0, ymm9) vperm2f128(imm(0x31), ymm3, ymm1, ymm11) vextractf128(imm(0x1), ymm5, xmm12) vextractf128(imm(0x1), ymm7, xmm13) //vextractf128(imm(0x1), ymm9, xmm14) //vextractf128(imm(0x1), ymm11, xmm15) vbroadcastsd(mem(rbx), ymm3) vfmadd231pd(mem(rcx), xmm3, xmm5) vfmadd231pd(mem(rcx, rsi, 1), xmm3, xmm7) vfmadd231pd(mem(rcx, rsi, 2), xmm3, xmm9) vfmadd231pd(mem(rcx, rax, 1), xmm3, xmm11) vmovupd(xmm5, mem(rcx)) vmovupd(xmm7, mem(rcx, rsi, 1)) //vmovupd(xmm9, mem(rcx, rsi, 2)) //vmovupd(xmm11, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) vfmadd231sd(mem(rdx), xmm3, xmm12) vfmadd231sd(mem(rdx, rsi, 1), xmm3, xmm13) //vfmadd231sd(mem(rdx, rsi, 2), xmm3, xmm14) //vfmadd231sd(mem(rdx, rax, 1), xmm3, xmm15) vmovsd(xmm12, mem(rdx)) vmovsd(xmm13, mem(rdx, rsi, 1)) //vmovsd(xmm14, mem(rdx, rsi, 2)) //vmovsd(xmm15, mem(rdx, rax, 1)) //lea(mem(rdx, rsi, 4), rdx) jmp(.DDONE) // jump to end. label(.DBETAZERO) cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORBZ) // jump to column storage case label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) vmovupd(xmm5, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm6, mem(rcx)) vmovupd(xmm7, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm8, mem(rcx)) vmovupd(xmm9, mem(rcx, rsi, 4)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORBZ) vunpcklpd(ymm6, ymm4, ymm0) vunpckhpd(ymm6, ymm4, ymm1) vunpcklpd(ymm10, ymm8, ymm2) vunpckhpd(ymm10, ymm8, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) vperm2f128(imm(0x31), ymm2, ymm0, ymm8) vperm2f128(imm(0x31), ymm3, ymm1, ymm10) vextractf128(imm(0x1), ymm4, xmm12) vextractf128(imm(0x1), ymm6, xmm13) vextractf128(imm(0x1), ymm8, xmm14) vextractf128(imm(0x1), ymm10, xmm15) vmovupd(xmm4, mem(rcx)) vmovupd(xmm6, mem(rcx, rsi, 1)) vmovupd(xmm8, mem(rcx, rsi, 2)) vmovupd(xmm10, mem(rcx, rax, 1)) lea(mem(rcx, rsi, 4), rcx) vmovsd(xmm12, mem(rdx)) vmovsd(xmm13, mem(rdx, rsi, 1)) vmovsd(xmm14, mem(rdx, rsi, 2)) vmovsd(xmm15, mem(rdx, rax, 1)) lea(mem(rdx, rsi, 4), rdx) vunpcklpd(ymm7, ymm5, ymm0) vunpckhpd(ymm7, ymm5, ymm1) vunpcklpd(ymm11, ymm9, ymm2) vunpckhpd(ymm11, ymm9, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm5) vinsertf128(imm(0x1), xmm3, ymm1, ymm7) vperm2f128(imm(0x31), ymm2, ymm0, ymm9) vperm2f128(imm(0x31), ymm3, ymm1, ymm11) vextractf128(imm(0x1), ymm5, xmm12) vextractf128(imm(0x1), ymm7, xmm13) //vextractf128(imm(0x1), ymm9, xmm14) //vextractf128(imm(0x1), ymm11, xmm15) vmovupd(xmm5, mem(rcx)) vmovupd(xmm7, mem(rcx, rsi, 1)) //vmovupd(xmm9, mem(rcx, rsi, 2)) //vmovupd(xmm11, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) vmovsd(xmm12, mem(rdx)) vmovsd(xmm13, mem(rdx, rsi, 1)) //vmovsd(xmm14, mem(rdx, rsi, 2)) //vmovsd(xmm15, mem(rdx, rax, 1)) //lea(mem(rdx, rsi, 4), rdx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter] "m" (k_iter), [k_left] "m" (k_left), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rv_haswell_asm_2x6 ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() vzeroall() // zero all xmm/ymm registers. mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. mov(var(rs_b), r10) // load rs_b //mov(var(cs_b), r11) // load cs_b lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) //lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) // NOTE: We cannot pre-load elements of a or b // because it could eventually, in the last // unrolled iter or the cleanup loop, result // in reading beyond the bounds allocated mem // (the likely result: a segmentation fault). mov(var(c), rcx) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLPFETCH) // jump to column storage case label(.DROWPFETCH) // row-stored prefetching on c //lea(mem(rcx, rdi, 2), rdx) // //lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 5*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 5*8)) // prefetch c + 1*rs_c jmp(.DPOSTPFETCH) // jump to end of prefetching c label(.DCOLPFETCH) // column-stored prefetching c mov(var(cs_c), rsi) // load cs_c to rsi (temporarily) lea(mem(, rsi, 8), rsi) // cs_c *= sizeof(double) lea(mem(rcx, rsi, 2), rdx) // lea(mem(rdx, rsi, 1), rdx) // rdx = c + 3*cs_c; prefetch(0, mem(rcx, 1*8)) // prefetch c + 0*cs_c prefetch(0, mem(rcx, rsi, 1, 1*8)) // prefetch c + 1*cs_c prefetch(0, mem(rcx, rsi, 2, 1*8)) // prefetch c + 2*cs_c prefetch(0, mem(rdx, 1*8)) // prefetch c + 3*cs_c prefetch(0, mem(rdx, rsi, 1, 1*8)) // prefetch c + 4*cs_c prefetch(0, mem(rdx, rsi, 2, 1*8)) // prefetch c + 5*cs_c label(.DPOSTPFETCH) // done prefetching c mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.DLOOPKITER) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), xmm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) // ---------------------------------- iteration 1 vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), xmm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) // ---------------------------------- iteration 2 vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), xmm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) // ---------------------------------- iteration 3 vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), xmm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) dec(rsi) // i -= 1; jne(.DLOOPKITER) // iterate again if i != 0. label(.DCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.DLOOPKLEFT) // EDGE LOOP vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), xmm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) dec(rsi) // i -= 1; jne(.DLOOPKLEFT) // iterate again if i != 0. label(.DPOSTACCUM) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(xmm0, xmm5, xmm5) vmulpd(ymm0, ymm6, ymm6) vmulpd(xmm0, xmm7, xmm7) mov(var(cs_c), rsi) // load cs_c lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) //lea(mem(rcx, rsi, 4), rdx) // load address of c + 4*cs_c; //lea(mem(rcx, rdi, 4), rdx) // load address of c + 4*rs_c; lea(mem(rsi, rsi, 2), rax) // rax = 3*cs_c; // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORED) // jump to column storage case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), xmm3, xmm5) vmovupd(xmm5, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm6) vmovupd(ymm6, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), xmm3, xmm7) vmovupd(xmm7, mem(rcx, rsi, 4)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORED) vunpcklpd(ymm6, ymm4, ymm0) vunpckhpd(ymm6, ymm4, ymm1) vunpcklpd(ymm10, ymm8, ymm2) vunpckhpd(ymm10, ymm8, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) vperm2f128(imm(0x31), ymm2, ymm0, ymm8) vperm2f128(imm(0x31), ymm3, ymm1, ymm10) vbroadcastsd(mem(rbx), ymm3) vfmadd231pd(mem(rcx), xmm3, xmm4) vfmadd231pd(mem(rcx, rsi, 1), xmm3, xmm6) vfmadd231pd(mem(rcx, rsi, 2), xmm3, xmm8) vfmadd231pd(mem(rcx, rax, 1), xmm3, xmm10) vmovupd(xmm4, mem(rcx)) vmovupd(xmm6, mem(rcx, rsi, 1)) vmovupd(xmm8, mem(rcx, rsi, 2)) vmovupd(xmm10, mem(rcx, rax, 1)) lea(mem(rcx, rsi, 4), rcx) vunpcklpd(ymm7, ymm5, ymm0) vunpckhpd(ymm7, ymm5, ymm1) vunpcklpd(ymm11, ymm9, ymm2) vunpckhpd(ymm11, ymm9, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm5) vinsertf128(imm(0x1), xmm3, ymm1, ymm7) //vperm2f128(imm(0x31), ymm2, ymm0, ymm9) //vperm2f128(imm(0x31), ymm3, ymm1, ymm11) vbroadcastsd(mem(rbx), ymm3) vfmadd231pd(mem(rcx), xmm3, xmm5) vfmadd231pd(mem(rcx, rsi, 1), xmm3, xmm7) //vfmadd231pd(mem(rcx, rsi, 2), ymm3, ymm9) //vfmadd231pd(mem(rcx, rax, 1), ymm3, ymm11) vmovupd(xmm5, mem(rcx)) vmovupd(xmm7, mem(rcx, rsi, 1)) //vmovupd(ymm9, mem(rcx, rsi, 2)) //vmovupd(ymm11, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORBZ) // jump to column storage case label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) vmovupd(xmm5, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm6, mem(rcx)) vmovupd(xmm7, mem(rcx, rsi, 4)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORBZ) vunpcklpd(ymm6, ymm4, ymm0) vunpckhpd(ymm6, ymm4, ymm1) vunpcklpd(ymm10, ymm8, ymm2) vunpckhpd(ymm10, ymm8, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) vperm2f128(imm(0x31), ymm2, ymm0, ymm8) vperm2f128(imm(0x31), ymm3, ymm1, ymm10) vmovupd(xmm4, mem(rcx)) vmovupd(xmm6, mem(rcx, rsi, 1)) vmovupd(xmm8, mem(rcx, rsi, 2)) vmovupd(xmm10, mem(rcx, rax, 1)) lea(mem(rcx, rsi, 4), rcx) vunpcklpd(ymm7, ymm5, ymm0) vunpckhpd(ymm7, ymm5, ymm1) vunpcklpd(ymm11, ymm9, ymm2) vunpckhpd(ymm11, ymm9, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm5) vinsertf128(imm(0x1), xmm3, ymm1, ymm7) //vperm2f128(imm(0x31), ymm2, ymm0, ymm9) //vperm2f128(imm(0x31), ymm3, ymm1, ymm11) vmovupd(xmm5, mem(rcx)) vmovupd(xmm7, mem(rcx, rsi, 1)) //vmovupd(ymm9, mem(rcx, rsi, 2)) //vmovupd(ymm11, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter] "m" (k_iter), [k_left] "m" (k_left), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rv_haswell_asm_1x6 ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() vzeroall() // zero all xmm/ymm registers. mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. mov(var(rs_b), r10) // load rs_b //mov(var(cs_b), r11) // load cs_b lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) //lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) // NOTE: We cannot pre-load elements of a or b // because it could eventually, in the last // unrolled iter or the cleanup loop, result // in reading beyond the bounds allocated mem // (the likely result: a segmentation fault). mov(var(c), rcx) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) #if 1 prefetch(0, mem(rcx, 5*8)) // prefetch c + 0*rs_c #else cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLPFETCH) // jump to column storage case label(.DROWPFETCH) // row-stored prefetching on c //lea(mem(rcx, rdi, 2), rdx) // //lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 5*8)) // prefetch c + 0*rs_c jmp(.DPOSTPFETCH) // jump to end of prefetching c label(.DCOLPFETCH) // column-stored prefetching c mov(var(cs_c), rsi) // load cs_c to rsi (temporarily) lea(mem(, rsi, 8), rsi) // cs_c *= sizeof(double) lea(mem(rcx, rsi, 2), rdx) // lea(mem(rdx, rsi, 1), rdx) // rdx = c + 3*cs_c; prefetch(0, mem(rcx, 0*8)) // prefetch c + 0*cs_c prefetch(0, mem(rcx, rsi, 1, 0*8)) // prefetch c + 1*cs_c prefetch(0, mem(rcx, rsi, 2, 0*8)) // prefetch c + 2*cs_c prefetch(0, mem(rdx, 0*8)) // prefetch c + 3*cs_c prefetch(0, mem(rdx, rsi, 1, 0*8)) // prefetch c + 4*cs_c prefetch(0, mem(rdx, rsi, 2, 0*8)) // prefetch c + 5*cs_c label(.DPOSTPFETCH) // done prefetching c #endif mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.DLOOPKITER) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), xmm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) // ---------------------------------- iteration 1 vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), xmm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) // ---------------------------------- iteration 2 vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), xmm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) // ---------------------------------- iteration 3 vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), xmm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) dec(rsi) // i -= 1; jne(.DLOOPKITER) // iterate again if i != 0. label(.DCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.DLOOPKLEFT) // EDGE LOOP vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), xmm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) dec(rsi) // i -= 1; jne(.DLOOPKLEFT) // iterate again if i != 0. label(.DPOSTACCUM) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(xmm0, xmm5, xmm5) mov(var(cs_c), rsi) // load cs_c lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) //lea(mem(rcx, rsi, 4), rdx) // load address of c + 4*cs_c; //lea(mem(rcx, rdi, 4), rdx) // load address of c + 4*rs_c; lea(mem(rsi, rsi, 2), rax) // rax = 3*cs_c; // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORED) // jump to column storage case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), xmm3, xmm5) vmovupd(xmm5, mem(rcx, rsi, 4)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORED) vmovlpd(mem(rcx), xmm0, xmm0) vmovhpd(mem(rcx, rsi, 1), xmm0, xmm0) vmovlpd(mem(rcx, rsi, 2), xmm1, xmm1) vmovhpd(mem(rcx, rax, 1), xmm1, xmm1) vperm2f128(imm(0x20), ymm1, ymm0, ymm0) vfmadd213pd(ymm4, ymm3, ymm0) vextractf128(imm(1), ymm0, xmm1) vmovlpd(xmm0, mem(rcx)) vmovhpd(xmm0, mem(rcx, rsi, 1)) vmovlpd(xmm1, mem(rcx, rsi, 2)) vmovhpd(xmm1, mem(rcx, rax, 1)) lea(mem(rcx, rsi, 4), rcx) vmovlpd(mem(rcx), xmm0, xmm0) vmovhpd(mem(rcx, rsi, 1), xmm0, xmm0) //vmovlpd(mem(rcx, rsi, 2), xmm1, xmm1) //vmovhpd(mem(rcx, rax, 1), xmm1, xmm1) //vperm2f128(imm(0x20), ymm1, ymm0, ymm0) vfmadd213pd(xmm5, xmm3, xmm0) //vextractf128(imm(1), ymm0, xmm1) vmovlpd(xmm0, mem(rcx)) vmovhpd(xmm0, mem(rcx, rsi, 1)) //vmovlpd(xmm1, mem(rcx, rsi, 2)) //vmovhpd(xmm1, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORBZ) // jump to column storage case label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) vmovupd(xmm5, mem(rcx, rsi, 4)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORBZ) vmovupd(ymm4, ymm0) vextractf128(imm(1), ymm0, xmm1) vmovlpd(xmm0, mem(rcx)) vmovhpd(xmm0, mem(rcx, rsi, 1)) vmovlpd(xmm1, mem(rcx, rsi, 2)) vmovhpd(xmm1, mem(rcx, rax, 1)) lea(mem(rcx, rsi, 4), rcx) vmovupd(xmm5, xmm0) //vextractf128(imm(1), ymm0, xmm1) vmovlpd(xmm0, mem(rcx)) vmovhpd(xmm0, mem(rcx, rsi, 1)) //vmovlpd(xmm1, mem(rcx, rsi, 2)) //vmovhpd(xmm1, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter] "m" (k_iter), [k_left] "m" (k_left), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rv_haswell_asm_6x4 ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() vzeroall() // zero all xmm/ymm registers. mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. mov(var(rs_b), r10) // load rs_b //mov(var(cs_b), r11) // load cs_b lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) //lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) // NOTE: We cannot pre-load elements of a or b // because it could eventually, in the last // unrolled iter or the cleanup loop, result // in reading beyond the bounds allocated mem // (the likely result: a segmentation fault). mov(var(c), rcx) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) #if 0 lea(mem(rcx, rdi, 2), rdx) // lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 7*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 7*8)) // prefetch c + 2*rs_c prefetch(0, mem(rdx, 7*8)) // prefetch c + 3*rs_c prefetch(0, mem(rdx, rdi, 1, 7*8)) // prefetch c + 4*rs_c prefetch(0, mem(rdx, rdi, 2, 7*8)) // prefetch c + 5*rs_c #else cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLPFETCH) // jump to column storage case label(.DROWPFETCH) // row-stored prefetching on c lea(mem(rcx, rdi, 2), rdx) // lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 3*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 3*8)) // prefetch c + 2*rs_c prefetch(0, mem(rdx, 3*8)) // prefetch c + 3*rs_c prefetch(0, mem(rdx, rdi, 1, 3*8)) // prefetch c + 4*rs_c prefetch(0, mem(rdx, rdi, 2, 3*8)) // prefetch c + 5*rs_c jmp(.DPOSTPFETCH) // jump to end of prefetching c label(.DCOLPFETCH) // column-stored prefetching c mov(var(cs_c), rsi) // load cs_c to rsi (temporarily) lea(mem(, rsi, 8), rsi) // cs_c *= sizeof(double) lea(mem(rcx, rsi, 2), rdx) // lea(mem(rdx, rsi, 1), rdx) // rdx = c + 3*cs_c; prefetch(0, mem(rcx, 5*8)) // prefetch c + 0*cs_c prefetch(0, mem(rcx, rsi, 1, 5*8)) // prefetch c + 1*cs_c prefetch(0, mem(rcx, rsi, 2, 5*8)) // prefetch c + 2*cs_c prefetch(0, mem(rdx, 5*8)) // prefetch c + 3*cs_c label(.DPOSTPFETCH) // done prefetching c #endif mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.DLOOPKITER) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rbx, 0*32), ymm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm0, ymm3, ymm6) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm0, ymm3, ymm10) vbroadcastsd(mem(rax, r8, 4), ymm2) vbroadcastsd(mem(rax, r15, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm0, ymm3, ymm14) // ---------------------------------- iteration 1 vmovupd(mem(rbx, 0*32), ymm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm0, ymm3, ymm6) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm0, ymm3, ymm10) vbroadcastsd(mem(rax, r8, 4), ymm2) vbroadcastsd(mem(rax, r15, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm0, ymm3, ymm14) // ---------------------------------- iteration 2 vmovupd(mem(rbx, 0*32), ymm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm0, ymm3, ymm6) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm0, ymm3, ymm10) vbroadcastsd(mem(rax, r8, 4), ymm2) vbroadcastsd(mem(rax, r15, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm0, ymm3, ymm14) // ---------------------------------- iteration 3 vmovupd(mem(rbx, 0*32), ymm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm0, ymm3, ymm6) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm0, ymm3, ymm10) vbroadcastsd(mem(rax, r8, 4), ymm2) vbroadcastsd(mem(rax, r15, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm0, ymm3, ymm14) dec(rsi) // i -= 1; jne(.DLOOPKITER) // iterate again if i != 0. label(.DCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.DLOOPKLEFT) // EDGE LOOP vmovupd(mem(rbx, 0*32), ymm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm0, ymm3, ymm6) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm0, ymm3, ymm10) vbroadcastsd(mem(rax, r8, 4), ymm2) vbroadcastsd(mem(rax, r15, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm0, ymm3, ymm14) dec(rsi) // i -= 1; jne(.DLOOPKLEFT) // iterate again if i != 0. label(.DPOSTACCUM) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm6, ymm6) vmulpd(ymm0, ymm8, ymm8) vmulpd(ymm0, ymm10, ymm10) vmulpd(ymm0, ymm12, ymm12) vmulpd(ymm0, ymm14, ymm14) mov(var(cs_c), rsi) // load cs_c lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) //lea(mem(rcx, rsi, 4), rdx) // load address of c + 4*cs_c; lea(mem(rcx, rdi, 4), rdx) // load address of c + 4*rs_c; lea(mem(rsi, rsi, 2), rax) // rax = 3*cs_c; // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORED) // jump to column storage case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm6) vmovupd(ymm6, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm8) vmovupd(ymm8, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm10) vmovupd(ymm10, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm12) vmovupd(ymm12, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm14) vmovupd(ymm14, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORED) vunpcklpd(ymm6, ymm4, ymm0) vunpckhpd(ymm6, ymm4, ymm1) vunpcklpd(ymm10, ymm8, ymm2) vunpckhpd(ymm10, ymm8, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) vperm2f128(imm(0x31), ymm2, ymm0, ymm8) vperm2f128(imm(0x31), ymm3, ymm1, ymm10) vbroadcastsd(mem(rbx), ymm3) vfmadd231pd(mem(rcx), ymm3, ymm4) vfmadd231pd(mem(rcx, rsi, 1), ymm3, ymm6) vfmadd231pd(mem(rcx, rsi, 2), ymm3, ymm8) vfmadd231pd(mem(rcx, rax, 1), ymm3, ymm10) vmovupd(ymm4, mem(rcx)) vmovupd(ymm6, mem(rcx, rsi, 1)) vmovupd(ymm8, mem(rcx, rsi, 2)) vmovupd(ymm10, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) vunpcklpd(ymm14, ymm12, ymm0) vunpckhpd(ymm14, ymm12, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm4) vfmadd231pd(mem(rdx), xmm3, xmm0) vfmadd231pd(mem(rdx, rsi, 1), xmm3, xmm1) vfmadd231pd(mem(rdx, rsi, 2), xmm3, xmm2) vfmadd231pd(mem(rdx, rax, 1), xmm3, xmm4) vmovupd(xmm0, mem(rdx)) vmovupd(xmm1, mem(rdx, rsi, 1)) vmovupd(xmm2, mem(rdx, rsi, 2)) vmovupd(xmm4, mem(rdx, rax, 1)) //lea(mem(rdx, rsi, 4), rdx) jmp(.DDONE) // jump to end. label(.DBETAZERO) cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORBZ) // jump to column storage case label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vmovupd(ymm6, mem(rcx)) add(rdi, rcx) vmovupd(ymm8, mem(rcx)) add(rdi, rcx) vmovupd(ymm10, mem(rcx)) add(rdi, rcx) vmovupd(ymm12, mem(rcx)) add(rdi, rcx) vmovupd(ymm14, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORBZ) vunpcklpd(ymm6, ymm4, ymm0) vunpckhpd(ymm6, ymm4, ymm1) vunpcklpd(ymm10, ymm8, ymm2) vunpckhpd(ymm10, ymm8, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) vperm2f128(imm(0x31), ymm2, ymm0, ymm8) vperm2f128(imm(0x31), ymm3, ymm1, ymm10) vmovupd(ymm4, mem(rcx)) vmovupd(ymm6, mem(rcx, rsi, 1)) vmovupd(ymm8, mem(rcx, rsi, 2)) vmovupd(ymm10, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) vunpcklpd(ymm14, ymm12, ymm0) vunpckhpd(ymm14, ymm12, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm4) vmovupd(xmm0, mem(rdx)) vmovupd(xmm1, mem(rdx, rsi, 1)) vmovupd(xmm2, mem(rdx, rsi, 2)) vmovupd(xmm4, mem(rdx, rax, 1)) //lea(mem(rdx, rsi, 4), rdx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter] "m" (k_iter), [k_left] "m" (k_left), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rv_haswell_asm_5x4 ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() vzeroall() // zero all xmm/ymm registers. mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. mov(var(rs_b), r10) // load rs_b //mov(var(cs_b), r11) // load cs_b lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) //lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) // NOTE: We cannot pre-load elements of a or b // because it could eventually, in the last // unrolled iter or the cleanup loop, result // in reading beyond the bounds allocated mem // (the likely result: a segmentation fault). mov(var(c), rcx) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) #if 0 lea(mem(rcx, rdi, 2), rdx) // lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 7*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 7*8)) // prefetch c + 2*rs_c prefetch(0, mem(rdx, 7*8)) // prefetch c + 3*rs_c prefetch(0, mem(rdx, rdi, 1, 7*8)) // prefetch c + 4*rs_c prefetch(0, mem(rdx, rdi, 2, 7*8)) // prefetch c + 5*rs_c #else cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLPFETCH) // jump to column storage case label(.DROWPFETCH) // row-stored prefetching on c lea(mem(rcx, rdi, 2), rdx) // lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 3*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 3*8)) // prefetch c + 2*rs_c prefetch(0, mem(rdx, 3*8)) // prefetch c + 3*rs_c prefetch(0, mem(rdx, rdi, 1, 3*8)) // prefetch c + 4*rs_c jmp(.DPOSTPFETCH) // jump to end of prefetching c label(.DCOLPFETCH) // column-stored prefetching c mov(var(cs_c), rsi) // load cs_c to rsi (temporarily) lea(mem(, rsi, 8), rsi) // cs_c *= sizeof(double) lea(mem(rcx, rsi, 2), rdx) // lea(mem(rdx, rsi, 1), rdx) // rdx = c + 3*cs_c; prefetch(0, mem(rcx, 4*8)) // prefetch c + 0*cs_c prefetch(0, mem(rcx, rsi, 1, 4*8)) // prefetch c + 1*cs_c prefetch(0, mem(rcx, rsi, 2, 4*8)) // prefetch c + 2*cs_c prefetch(0, mem(rdx, 4*8)) // prefetch c + 3*cs_c label(.DPOSTPFETCH) // done prefetching c #endif #if 0 lea(mem(rax, r9, 8), rdx) // use rdx for prefetching b. lea(mem(rdx, r9, 8), rdx) // rdx = b + 16*rs_b; #else #if 1 mov(r9, rsi) // rsi = rs_b; sal(imm(5), rsi) // rsi = 16*rs_b; lea(mem(rax, rsi, 1), rdx) // rdx = b + 16*rs_b; #endif #endif mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.DLOOPKITER) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rbx, 0*32), ymm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm0, ymm3, ymm6) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm0, ymm3, ymm10) vbroadcastsd(mem(rax, r8, 4), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) // ---------------------------------- iteration 1 vmovupd(mem(rbx, 0*32), ymm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm0, ymm3, ymm6) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm0, ymm3, ymm10) vbroadcastsd(mem(rax, r8, 4), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) // ---------------------------------- iteration 2 vmovupd(mem(rbx, 0*32), ymm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm0, ymm3, ymm6) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm0, ymm3, ymm10) vbroadcastsd(mem(rax, r8, 4), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) // ---------------------------------- iteration 3 vmovupd(mem(rbx, 0*32), ymm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm0, ymm3, ymm6) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm0, ymm3, ymm10) vbroadcastsd(mem(rax, r8, 4), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) dec(rsi) // i -= 1; jne(.DLOOPKITER) // iterate again if i != 0. label(.DCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.DLOOPKLEFT) // EDGE LOOP vmovupd(mem(rbx, 0*32), ymm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm0, ymm3, ymm6) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm0, ymm3, ymm10) vbroadcastsd(mem(rax, r8, 4), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) dec(rsi) // i -= 1; jne(.DLOOPKLEFT) // iterate again if i != 0. label(.DPOSTACCUM) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm6, ymm6) vmulpd(ymm0, ymm8, ymm8) vmulpd(ymm0, ymm10, ymm10) vmulpd(ymm0, ymm12, ymm12) mov(var(cs_c), rsi) // load cs_c lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) //lea(mem(rcx, rsi, 4), rdx) // load address of c + 4*cs_c; lea(mem(rcx, rdi, 4), rdx) // load address of c + 4*rs_c; lea(mem(rsi, rsi, 2), rax) // rax = 3*cs_c; // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORED) // jump to column storage case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm6) vmovupd(ymm6, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm8) vmovupd(ymm8, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm10) vmovupd(ymm10, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm12) vmovupd(ymm12, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORED) vunpcklpd(ymm6, ymm4, ymm0) vunpckhpd(ymm6, ymm4, ymm1) vunpcklpd(ymm10, ymm8, ymm2) vunpckhpd(ymm10, ymm8, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) vperm2f128(imm(0x31), ymm2, ymm0, ymm8) vperm2f128(imm(0x31), ymm3, ymm1, ymm10) vbroadcastsd(mem(rbx), ymm3) vfmadd231pd(mem(rcx), ymm3, ymm4) vfmadd231pd(mem(rcx, rsi, 1), ymm3, ymm6) vfmadd231pd(mem(rcx, rsi, 2), ymm3, ymm8) vfmadd231pd(mem(rcx, rax, 1), ymm3, ymm10) vmovupd(ymm4, mem(rcx)) vmovupd(ymm6, mem(rcx, rsi, 1)) vmovupd(ymm8, mem(rcx, rsi, 2)) vmovupd(ymm10, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) #if 0 vunpcklpd(ymm14, ymm12, ymm0) vunpckhpd(ymm14, ymm12, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm4) vfmadd231pd(mem(rdx), xmm3, xmm0) vfmadd231pd(mem(rdx, rsi, 1), xmm3, xmm1) vfmadd231pd(mem(rdx, rsi, 2), xmm3, xmm2) vfmadd231pd(mem(rdx, rax, 1), xmm3, xmm4) vmovupd(xmm0, mem(rdx)) vmovupd(xmm1, mem(rdx, rsi, 1)) vmovupd(xmm2, mem(rdx, rsi, 2)) vmovupd(xmm4, mem(rdx, rax, 1)) #else vmovlpd(mem(rdx), xmm0, xmm0) vmovhpd(mem(rdx, rsi, 1), xmm0, xmm0) vmovlpd(mem(rdx, rsi, 2), xmm1, xmm1) vmovhpd(mem(rdx, rax, 1), xmm1, xmm1) vperm2f128(imm(0x20), ymm1, ymm0, ymm0) vfmadd213pd(ymm12, ymm3, ymm0) vextractf128(imm(1), ymm0, xmm1) vmovlpd(xmm0, mem(rdx)) vmovhpd(xmm0, mem(rdx, rsi, 1)) vmovlpd(xmm1, mem(rdx, rsi, 2)) vmovhpd(xmm1, mem(rdx, rax, 1)) #endif //lea(mem(rdx, rsi, 4), rdx) jmp(.DDONE) // jump to end. label(.DBETAZERO) cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORBZ) // jump to column storage case label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vmovupd(ymm6, mem(rcx)) add(rdi, rcx) vmovupd(ymm8, mem(rcx)) add(rdi, rcx) vmovupd(ymm10, mem(rcx)) add(rdi, rcx) vmovupd(ymm12, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORBZ) vunpcklpd(ymm6, ymm4, ymm0) vunpckhpd(ymm6, ymm4, ymm1) vunpcklpd(ymm10, ymm8, ymm2) vunpckhpd(ymm10, ymm8, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) vperm2f128(imm(0x31), ymm2, ymm0, ymm8) vperm2f128(imm(0x31), ymm3, ymm1, ymm10) vmovupd(ymm4, mem(rcx)) vmovupd(ymm6, mem(rcx, rsi, 1)) vmovupd(ymm8, mem(rcx, rsi, 2)) vmovupd(ymm10, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) #if 0 vunpcklpd(ymm14, ymm12, ymm0) vunpckhpd(ymm14, ymm12, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm4) vmovupd(xmm0, mem(rdx)) vmovupd(xmm1, mem(rdx, rsi, 1)) vmovupd(xmm2, mem(rdx, rsi, 2)) vmovupd(xmm4, mem(rdx, rax, 1)) #else vmovupd(ymm12, ymm0) vextractf128(imm(1), ymm0, xmm1) vmovlpd(xmm0, mem(rdx)) vmovhpd(xmm0, mem(rdx, rsi, 1)) vmovlpd(xmm1, mem(rdx, rsi, 2)) vmovhpd(xmm1, mem(rdx, rax, 1)) #endif //lea(mem(rdx, rsi, 4), rdx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter] "m" (k_iter), [k_left] "m" (k_left), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rv_haswell_asm_4x4 ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() vzeroall() // zero all xmm/ymm registers. mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. mov(var(rs_b), r10) // load rs_b //mov(var(cs_b), r11) // load cs_b lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) //lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) // NOTE: We cannot pre-load elements of a or b // because it could eventually, in the last // unrolled iter or the cleanup loop, result // in reading beyond the bounds allocated mem // (the likely result: a segmentation fault). mov(var(c), rcx) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) #if 0 lea(mem(rcx, rdi, 2), rdx) // lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 3*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 3*8)) // prefetch c + 2*rs_c prefetch(0, mem(rdx, 3*8)) // prefetch c + 3*rs_c #else cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLPFETCH) // jump to column storage case label(.DROWPFETCH) // row-stored prefetching on c lea(mem(rcx, rdi, 2), rdx) // lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 3*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 3*8)) // prefetch c + 2*rs_c prefetch(0, mem(rdx, 3*8)) // prefetch c + 3*rs_c jmp(.DPOSTPFETCH) // jump to end of prefetching c label(.DCOLPFETCH) // column-stored prefetching c mov(var(cs_c), rsi) // load cs_c to rsi (temporarily) lea(mem(, rsi, 8), rsi) // cs_c *= sizeof(double) lea(mem(rcx, rsi, 2), rdx) // lea(mem(rdx, rsi, 1), rdx) // rdx = c + 3*cs_c; prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*cs_c prefetch(0, mem(rcx, rsi, 1, 3*8)) // prefetch c + 1*cs_c prefetch(0, mem(rcx, rsi, 2, 3*8)) // prefetch c + 2*cs_c prefetch(0, mem(rdx, 3*8)) // prefetch c + 3*cs_c label(.DPOSTPFETCH) // done prefetching c #endif mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.DLOOPKITER) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rbx, 0*32), ymm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm0, ymm3, ymm6) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm0, ymm3, ymm10) // ---------------------------------- iteration 1 vmovupd(mem(rbx, 0*32), ymm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm0, ymm3, ymm6) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm0, ymm3, ymm10) // ---------------------------------- iteration 2 vmovupd(mem(rbx, 0*32), ymm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm0, ymm3, ymm6) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm0, ymm3, ymm10) // ---------------------------------- iteration 3 vmovupd(mem(rbx, 0*32), ymm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm0, ymm3, ymm6) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm0, ymm3, ymm10) dec(rsi) // i -= 1; jne(.DLOOPKITER) // iterate again if i != 0. label(.DCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.DLOOPKLEFT) // EDGE LOOP vmovupd(mem(rbx, 0*32), ymm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm0, ymm3, ymm6) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm0, ymm3, ymm10) dec(rsi) // i -= 1; jne(.DLOOPKLEFT) // iterate again if i != 0. label(.DPOSTACCUM) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm6, ymm6) vmulpd(ymm0, ymm8, ymm8) vmulpd(ymm0, ymm10, ymm10) mov(var(cs_c), rsi) // load cs_c lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) //lea(mem(rcx, rsi, 4), rdx) // load address of c + 4*cs_c; //lea(mem(rcx, rdi, 4), r14) // load address of c + 4*rs_c; lea(mem(rsi, rsi, 2), rax) // rax = 3*cs_c; // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORED) // jump to column storage case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm6) vmovupd(ymm6, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm8) vmovupd(ymm8, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm10) vmovupd(ymm10, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORED) vunpcklpd(ymm6, ymm4, ymm0) vunpckhpd(ymm6, ymm4, ymm1) vunpcklpd(ymm10, ymm8, ymm2) vunpckhpd(ymm10, ymm8, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) vperm2f128(imm(0x31), ymm2, ymm0, ymm8) vperm2f128(imm(0x31), ymm3, ymm1, ymm10) vbroadcastsd(mem(rbx), ymm3) vfmadd231pd(mem(rcx), ymm3, ymm4) vfmadd231pd(mem(rcx, rsi, 1), ymm3, ymm6) vfmadd231pd(mem(rcx, rsi, 2), ymm3, ymm8) vfmadd231pd(mem(rcx, rax, 1), ymm3, ymm10) vmovupd(ymm4, mem(rcx)) vmovupd(ymm6, mem(rcx, rsi, 1)) vmovupd(ymm8, mem(rcx, rsi, 2)) vmovupd(ymm10, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORBZ) // jump to column storage case label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vmovupd(ymm6, mem(rcx)) add(rdi, rcx) vmovupd(ymm8, mem(rcx)) add(rdi, rcx) vmovupd(ymm10, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORBZ) vunpcklpd(ymm6, ymm4, ymm0) vunpckhpd(ymm6, ymm4, ymm1) vunpcklpd(ymm10, ymm8, ymm2) vunpckhpd(ymm10, ymm8, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) vperm2f128(imm(0x31), ymm2, ymm0, ymm8) vperm2f128(imm(0x31), ymm3, ymm1, ymm10) vmovupd(ymm4, mem(rcx)) vmovupd(ymm6, mem(rcx, rsi, 1)) vmovupd(ymm8, mem(rcx, rsi, 2)) vmovupd(ymm10, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter] "m" (k_iter), [k_left] "m" (k_left), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rv_haswell_asm_3x4 ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() vzeroall() // zero all xmm/ymm registers. mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. mov(var(rs_b), r10) // load rs_b //mov(var(cs_b), r11) // load cs_b lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) //lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) // NOTE: We cannot pre-load elements of a or b // because it could eventually, in the last // unrolled iter or the cleanup loop, result // in reading beyond the bounds allocated mem // (the likely result: a segmentation fault). mov(var(c), rcx) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) #if 0 lea(mem(rcx, rdi, 2), rdx) // lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 7*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 7*8)) // prefetch c + 2*rs_c prefetch(0, mem(rdx, 7*8)) // prefetch c + 3*rs_c #else cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLPFETCH) // jump to column storage case label(.DROWPFETCH) // row-stored prefetching on c //lea(mem(rcx, rdi, 2), rdx) // //lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 3*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 3*8)) // prefetch c + 2*rs_c jmp(.DPOSTPFETCH) // jump to end of prefetching c label(.DCOLPFETCH) // column-stored prefetching c mov(var(cs_c), rsi) // load cs_c to rsi (temporarily) lea(mem(, rsi, 8), rsi) // cs_c *= sizeof(double) lea(mem(rcx, rsi, 2), rdx) // lea(mem(rdx, rsi, 1), rdx) // rdx = c + 3*cs_c; prefetch(0, mem(rcx, 2*8)) // prefetch c + 0*cs_c prefetch(0, mem(rcx, rsi, 1, 2*8)) // prefetch c + 1*cs_c prefetch(0, mem(rcx, rsi, 2, 2*8)) // prefetch c + 2*cs_c prefetch(0, mem(rdx, 2*8)) // prefetch c + 3*cs_c label(.DPOSTPFETCH) // done prefetching c #endif mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.DLOOPKITER) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rbx, 0*32), ymm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm0, ymm3, ymm6) vbroadcastsd(mem(rax, r8, 2), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm8) // ---------------------------------- iteration 1 vmovupd(mem(rbx, 0*32), ymm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm0, ymm3, ymm6) vbroadcastsd(mem(rax, r8, 2), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm8) // ---------------------------------- iteration 2 vmovupd(mem(rbx, 0*32), ymm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm0, ymm3, ymm6) vbroadcastsd(mem(rax, r8, 2), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm8) // ---------------------------------- iteration 3 vmovupd(mem(rbx, 0*32), ymm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm0, ymm3, ymm6) vbroadcastsd(mem(rax, r8, 2), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm8) dec(rsi) // i -= 1; jne(.DLOOPKITER) // iterate again if i != 0. label(.DCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.DLOOPKLEFT) // EDGE LOOP vmovupd(mem(rbx, 0*32), ymm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm0, ymm3, ymm6) vbroadcastsd(mem(rax, r8, 2), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm8) dec(rsi) // i -= 1; jne(.DLOOPKLEFT) // iterate again if i != 0. label(.DPOSTACCUM) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm6, ymm6) vmulpd(ymm0, ymm8, ymm8) mov(var(cs_c), rsi) // load cs_c lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) //lea(mem(rcx, rsi, 4), rdx) // load address of c + 4*cs_c; lea(mem(rcx, rdi, 2), rdx) // load address of c + 2*rs_c; lea(mem(rsi, rsi, 2), rax) // rax = 3*cs_c; // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORED) // jump to column storage case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm6) vmovupd(ymm6, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm8) vmovupd(ymm8, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORED) vunpcklpd(ymm6, ymm4, ymm0) vunpckhpd(ymm6, ymm4, ymm1) vunpcklpd(ymm10, ymm8, ymm2) vunpckhpd(ymm10, ymm8, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) vperm2f128(imm(0x31), ymm2, ymm0, ymm8) vperm2f128(imm(0x31), ymm3, ymm1, ymm10) vextractf128(imm(0x1), ymm4, xmm12) vextractf128(imm(0x1), ymm6, xmm13) vextractf128(imm(0x1), ymm8, xmm14) vextractf128(imm(0x1), ymm10, xmm15) vbroadcastsd(mem(rbx), ymm3) vfmadd231pd(mem(rcx), xmm3, xmm4) vfmadd231pd(mem(rcx, rsi, 1), xmm3, xmm6) vfmadd231pd(mem(rcx, rsi, 2), xmm3, xmm8) vfmadd231pd(mem(rcx, rax, 1), xmm3, xmm10) vmovupd(xmm4, mem(rcx)) vmovupd(xmm6, mem(rcx, rsi, 1)) vmovupd(xmm8, mem(rcx, rsi, 2)) vmovupd(xmm10, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) vfmadd231sd(mem(rdx), xmm3, xmm12) vfmadd231sd(mem(rdx, rsi, 1), xmm3, xmm13) vfmadd231sd(mem(rdx, rsi, 2), xmm3, xmm14) vfmadd231sd(mem(rdx, rax, 1), xmm3, xmm15) vmovsd(xmm12, mem(rdx)) vmovsd(xmm13, mem(rdx, rsi, 1)) vmovsd(xmm14, mem(rdx, rsi, 2)) vmovsd(xmm15, mem(rdx, rax, 1)) //lea(mem(rdx, rsi, 4), rdx) jmp(.DDONE) // jump to end. label(.DBETAZERO) cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORBZ) // jump to column storage case label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vmovupd(ymm6, mem(rcx)) add(rdi, rcx) vmovupd(ymm8, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORBZ) vunpcklpd(ymm6, ymm4, ymm0) vunpckhpd(ymm6, ymm4, ymm1) vunpcklpd(ymm10, ymm8, ymm2) vunpckhpd(ymm10, ymm8, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) vperm2f128(imm(0x31), ymm2, ymm0, ymm8) vperm2f128(imm(0x31), ymm3, ymm1, ymm10) vextractf128(imm(0x1), ymm4, xmm12) vextractf128(imm(0x1), ymm6, xmm13) vextractf128(imm(0x1), ymm8, xmm14) vextractf128(imm(0x1), ymm10, xmm15) vmovupd(xmm4, mem(rcx)) vmovupd(xmm6, mem(rcx, rsi, 1)) vmovupd(xmm8, mem(rcx, rsi, 2)) vmovupd(xmm10, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) vmovsd(xmm12, mem(rdx)) vmovsd(xmm13, mem(rdx, rsi, 1)) vmovsd(xmm14, mem(rdx, rsi, 2)) vmovsd(xmm15, mem(rdx, rax, 1)) //lea(mem(rdx, rsi, 4), rdx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter] "m" (k_iter), [k_left] "m" (k_left), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rv_haswell_asm_2x4 ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() vzeroall() // zero all xmm/ymm registers. mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. mov(var(rs_b), r10) // load rs_b //mov(var(cs_b), r11) // load cs_b lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) //lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) // NOTE: We cannot pre-load elements of a or b // because it could eventually, in the last // unrolled iter or the cleanup loop, result // in reading beyond the bounds allocated mem // (the likely result: a segmentation fault). mov(var(c), rcx) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) #if 0 lea(mem(rcx, rdi, 2), rdx) // //lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 7*8)) // prefetch c + 1*rs_c #else cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLPFETCH) // jump to column storage case label(.DROWPFETCH) // row-stored prefetching on c //lea(mem(rcx, rdi, 2), rdx) // //lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 3*8)) // prefetch c + 1*rs_c jmp(.DPOSTPFETCH) // jump to end of prefetching c label(.DCOLPFETCH) // column-stored prefetching c mov(var(cs_c), rsi) // load cs_c to rsi (temporarily) lea(mem(, rsi, 8), rsi) // cs_c *= sizeof(double) lea(mem(rcx, rsi, 2), rdx) // lea(mem(rdx, rsi, 1), rdx) // rdx = c + 3*cs_c; prefetch(0, mem(rcx, 1*8)) // prefetch c + 0*cs_c prefetch(0, mem(rcx, rsi, 1, 1*8)) // prefetch c + 1*cs_c prefetch(0, mem(rcx, rsi, 2, 1*8)) // prefetch c + 2*cs_c prefetch(0, mem(rdx, 1*8)) // prefetch c + 3*cs_c label(.DPOSTPFETCH) // done prefetching c #endif mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.DLOOPKITER) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rbx, 0*32), ymm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm0, ymm3, ymm6) // ---------------------------------- iteration 1 vmovupd(mem(rbx, 0*32), ymm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm0, ymm3, ymm6) // ---------------------------------- iteration 2 vmovupd(mem(rbx, 0*32), ymm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm0, ymm3, ymm6) // ---------------------------------- iteration 3 vmovupd(mem(rbx, 0*32), ymm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm0, ymm3, ymm6) dec(rsi) // i -= 1; jne(.DLOOPKITER) // iterate again if i != 0. label(.DCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.DLOOPKLEFT) // EDGE LOOP vmovupd(mem(rbx, 0*32), ymm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm0, ymm3, ymm6) dec(rsi) // i -= 1; jne(.DLOOPKLEFT) // iterate again if i != 0. label(.DPOSTACCUM) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm6, ymm6) mov(var(cs_c), rsi) // load cs_c lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) //lea(mem(rcx, rsi, 4), rdx) // load address of c + 4*cs_c; //lea(mem(rcx, rdi, 4), r14) // load address of c + 4*rs_c; lea(mem(rsi, rsi, 2), rax) // rax = 3*cs_c; // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORED) // jump to column storage case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm6) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORED) vunpcklpd(ymm6, ymm4, ymm0) vunpckhpd(ymm6, ymm4, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm4) vfmadd231pd(mem(rcx), xmm3, xmm0) vfmadd231pd(mem(rcx, rsi, 1), xmm3, xmm1) vfmadd231pd(mem(rcx, rsi, 2), xmm3, xmm2) vfmadd231pd(mem(rcx, rax, 1), xmm3, xmm4) vmovupd(xmm0, mem(rcx)) vmovupd(xmm1, mem(rcx, rsi, 1)) vmovupd(xmm2, mem(rcx, rsi, 2)) vmovupd(xmm4, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORBZ) // jump to column storage case label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORBZ) vunpcklpd(ymm6, ymm4, ymm0) vunpckhpd(ymm6, ymm4, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm4) vmovupd(xmm0, mem(rcx)) vmovupd(xmm1, mem(rcx, rsi, 1)) vmovupd(xmm2, mem(rcx, rsi, 2)) vmovupd(xmm4, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter] "m" (k_iter), [k_left] "m" (k_left), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rv_haswell_asm_1x4 ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() vzeroall() // zero all xmm/ymm registers. mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. mov(var(rs_b), r10) // load rs_b //mov(var(cs_b), r11) // load cs_b lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) //lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) // NOTE: We cannot pre-load elements of a or b // because it could eventually, in the last // unrolled iter or the cleanup loop, result // in reading beyond the bounds allocated mem // (the likely result: a segmentation fault). mov(var(c), rcx) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) #if 0 lea(mem(rcx, rdi, 2), rdx) // //lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c #else cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLPFETCH) // jump to column storage case label(.DROWPFETCH) // row-stored prefetching on c //lea(mem(rcx, rdi, 2), rdx) // //lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c jmp(.DPOSTPFETCH) // jump to end of prefetching c label(.DCOLPFETCH) // column-stored prefetching c mov(var(cs_c), rsi) // load cs_c to rsi (temporarily) lea(mem(, rsi, 8), rsi) // cs_c *= sizeof(double) lea(mem(rcx, rsi, 2), rdx) // lea(mem(rdx, rsi, 1), rdx) // rdx = c + 3*cs_c; prefetch(0, mem(rcx, 0*8)) // prefetch c + 0*cs_c prefetch(0, mem(rcx, rsi, 1, 0*8)) // prefetch c + 1*cs_c prefetch(0, mem(rcx, rsi, 2, 0*8)) // prefetch c + 2*cs_c prefetch(0, mem(rdx, 0*8)) // prefetch c + 3*cs_c label(.DPOSTPFETCH) // done prefetching c #endif mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.DLOOPKITER) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rbx, 0*32), ymm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm4) // ---------------------------------- iteration 1 vmovupd(mem(rbx, 0*32), ymm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm4) // ---------------------------------- iteration 2 vmovupd(mem(rbx, 0*32), ymm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm4) // ---------------------------------- iteration 3 vmovupd(mem(rbx, 0*32), ymm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm4) dec(rsi) // i -= 1; jne(.DLOOPKITER) // iterate again if i != 0. label(.DCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.DLOOPKLEFT) // EDGE LOOP vmovupd(mem(rbx, 0*32), ymm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm4) dec(rsi) // i -= 1; jne(.DLOOPKLEFT) // iterate again if i != 0. label(.DPOSTACCUM) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha mov(var(cs_c), rsi) // load cs_c lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) //lea(mem(rcx, rsi, 4), rdx) // load address of c + 4*cs_c; //lea(mem(rcx, rdi, 4), r14) // load address of c + 4*rs_c; lea(mem(rsi, rsi, 2), rax) // rax = 3*cs_c; // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORED) // jump to column storage case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORED) vmovlpd(mem(rcx), xmm0, xmm0) vmovhpd(mem(rcx, rsi, 1), xmm0, xmm0) vmovlpd(mem(rcx, rsi, 2), xmm1, xmm1) vmovhpd(mem(rcx, rax, 1), xmm1, xmm1) vperm2f128(imm(0x20), ymm1, ymm0, ymm0) vfmadd213pd(ymm4, ymm3, ymm0) vextractf128(imm(1), ymm0, xmm1) vmovlpd(xmm0, mem(rcx)) vmovhpd(xmm0, mem(rcx, rsi, 1)) vmovlpd(xmm1, mem(rcx, rsi, 2)) vmovhpd(xmm1, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORBZ) // jump to column storage case label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORBZ) vmovupd(ymm4, ymm0) vextractf128(imm(1), ymm0, xmm1) vmovlpd(xmm0, mem(rcx)) vmovhpd(xmm0, mem(rcx, rsi, 1)) vmovlpd(xmm1, mem(rcx, rsi, 2)) vmovhpd(xmm1, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter] "m" (k_iter), [k_left] "m" (k_left), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rv_haswell_asm_6x2 ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() vzeroall() // zero all xmm/ymm registers. mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. mov(var(rs_b), r10) // load rs_b //mov(var(cs_b), r11) // load cs_b lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) //lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) // NOTE: We cannot pre-load elements of a or b // because it could eventually, in the last // unrolled iter or the cleanup loop, result // in reading beyond the bounds allocated mem // (the likely result: a segmentation fault). mov(var(c), rcx) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) #if 0 lea(mem(rcx, rdi, 2), rdx) // lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 7*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 7*8)) // prefetch c + 2*rs_c prefetch(0, mem(rdx, 7*8)) // prefetch c + 3*rs_c prefetch(0, mem(rdx, rdi, 1, 7*8)) // prefetch c + 4*rs_c prefetch(0, mem(rdx, rdi, 2, 7*8)) // prefetch c + 5*rs_c #else cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLPFETCH) // jump to column storage case label(.DROWPFETCH) // row-stored prefetching on c lea(mem(rcx, rdi, 2), rdx) // lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 1*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 1*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 1*8)) // prefetch c + 2*rs_c prefetch(0, mem(rdx, 1*8)) // prefetch c + 3*rs_c prefetch(0, mem(rdx, rdi, 1, 1*8)) // prefetch c + 4*rs_c prefetch(0, mem(rdx, rdi, 2, 1*8)) // prefetch c + 5*rs_c jmp(.DPOSTPFETCH) // jump to end of prefetching c label(.DCOLPFETCH) // column-stored prefetching c mov(var(cs_c), rsi) // load cs_c to rsi (temporarily) lea(mem(, rsi, 8), rsi) // cs_c *= sizeof(double) //lea(mem(rcx, rsi, 2), rdx) // //lea(mem(rdx, rsi, 1), rdx) // rdx = c + 3*cs_c; prefetch(0, mem(rcx, 5*8)) // prefetch c + 0*cs_c prefetch(0, mem(rcx, rsi, 1, 5*8)) // prefetch c + 1*cs_c label(.DPOSTPFETCH) // done prefetching c #endif mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.DLOOPKITER) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rbx, 0*32), xmm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(xmm0, xmm2, xmm4) vfmadd231pd(xmm0, xmm3, xmm6) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(xmm0, xmm2, xmm8) vfmadd231pd(xmm0, xmm3, xmm10) vbroadcastsd(mem(rax, r8, 4), ymm2) vbroadcastsd(mem(rax, r15, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(xmm0, xmm2, xmm12) vfmadd231pd(xmm0, xmm3, xmm14) // ---------------------------------- iteration 1 vmovupd(mem(rbx, 0*32), xmm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(xmm0, xmm2, xmm4) vfmadd231pd(xmm0, xmm3, xmm6) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(xmm0, xmm2, xmm8) vfmadd231pd(xmm0, xmm3, xmm10) vbroadcastsd(mem(rax, r8, 4), ymm2) vbroadcastsd(mem(rax, r15, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(xmm0, xmm2, xmm12) vfmadd231pd(xmm0, xmm3, xmm14) // ---------------------------------- iteration 2 vmovupd(mem(rbx, 0*32), xmm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(xmm0, xmm2, xmm4) vfmadd231pd(xmm0, xmm3, xmm6) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(xmm0, xmm2, xmm8) vfmadd231pd(xmm0, xmm3, xmm10) vbroadcastsd(mem(rax, r8, 4), ymm2) vbroadcastsd(mem(rax, r15, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(xmm0, xmm2, xmm12) vfmadd231pd(xmm0, xmm3, xmm14) // ---------------------------------- iteration 3 vmovupd(mem(rbx, 0*32), xmm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(xmm0, xmm2, xmm4) vfmadd231pd(xmm0, xmm3, xmm6) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(xmm0, xmm2, xmm8) vfmadd231pd(xmm0, xmm3, xmm10) vbroadcastsd(mem(rax, r8, 4), ymm2) vbroadcastsd(mem(rax, r15, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(xmm0, xmm2, xmm12) vfmadd231pd(xmm0, xmm3, xmm14) dec(rsi) // i -= 1; jne(.DLOOPKITER) // iterate again if i != 0. label(.DCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.DLOOPKLEFT) // EDGE LOOP vmovupd(mem(rbx, 0*32), xmm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(xmm0, xmm2, xmm4) vfmadd231pd(xmm0, xmm3, xmm6) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(xmm0, xmm2, xmm8) vfmadd231pd(xmm0, xmm3, xmm10) vbroadcastsd(mem(rax, r8, 4), ymm2) vbroadcastsd(mem(rax, r15, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(xmm0, xmm2, xmm12) vfmadd231pd(xmm0, xmm3, xmm14) dec(rsi) // i -= 1; jne(.DLOOPKLEFT) // iterate again if i != 0. label(.DPOSTACCUM) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(xmm0, xmm4, xmm4) // scale by alpha vmulpd(xmm0, xmm6, xmm6) vmulpd(xmm0, xmm8, xmm8) vmulpd(xmm0, xmm10, xmm10) vmulpd(xmm0, xmm12, xmm12) vmulpd(xmm0, xmm14, xmm14) mov(var(cs_c), rsi) // load cs_c lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) //lea(mem(rcx, rsi, 4), rdx) // load address of c + 4*cs_c; lea(mem(rcx, rdi, 4), rdx) // load address of c + 4*rs_c; //lea(mem(rsi, rsi, 2), rax) // rax = 3*cs_c; // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORED) // jump to column storage case label(.DROWSTORED) vfmadd231pd(mem(rcx), xmm3, xmm4) vmovupd(xmm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm6) vmovupd(xmm6, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm8) vmovupd(xmm8, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm10) vmovupd(xmm10, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm12) vmovupd(xmm12, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm14) vmovupd(xmm14, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORED) vunpcklpd(xmm6, xmm4, xmm0) vunpckhpd(xmm6, xmm4, xmm1) vunpcklpd(xmm10, xmm8, xmm2) vunpckhpd(xmm10, xmm8, xmm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) vbroadcastsd(mem(rbx), ymm3) vfmadd231pd(mem(rcx), ymm3, ymm4) vfmadd231pd(mem(rcx, rsi, 1), ymm3, ymm6) vmovupd(ymm4, mem(rcx)) vmovupd(ymm6, mem(rcx, rsi, 1)) //lea(mem(rcx, rsi, 4), rcx) vunpcklpd(xmm14, xmm12, xmm0) vunpckhpd(xmm14, xmm12, xmm1) vfmadd231pd(mem(rdx), xmm3, xmm0) vfmadd231pd(mem(rdx, rsi, 1), xmm3, xmm1) vmovupd(xmm0, mem(rdx)) vmovupd(xmm1, mem(rdx, rsi, 1)) //lea(mem(rdx, rsi, 4), rdx) jmp(.DDONE) // jump to end. label(.DBETAZERO) cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORBZ) // jump to column storage case label(.DROWSTORBZ) vmovupd(xmm4, mem(rcx)) add(rdi, rcx) vmovupd(xmm6, mem(rcx)) add(rdi, rcx) vmovupd(xmm8, mem(rcx)) add(rdi, rcx) vmovupd(xmm10, mem(rcx)) add(rdi, rcx) vmovupd(xmm12, mem(rcx)) add(rdi, rcx) vmovupd(xmm14, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORBZ) vunpcklpd(xmm6, xmm4, xmm0) vunpckhpd(xmm6, xmm4, xmm1) vunpcklpd(xmm10, xmm8, xmm2) vunpckhpd(xmm10, xmm8, xmm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) vmovupd(ymm4, mem(rcx)) vmovupd(ymm6, mem(rcx, rsi, 1)) //lea(mem(rcx, rsi, 4), rcx) vunpcklpd(xmm14, xmm12, xmm0) vunpckhpd(xmm14, xmm12, xmm1) vmovupd(xmm0, mem(rdx)) vmovupd(xmm1, mem(rdx, rsi, 1)) //lea(mem(rdx, rsi, 4), rdx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter] "m" (k_iter), [k_left] "m" (k_left), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rv_haswell_asm_5x2 ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() vzeroall() // zero all xmm/ymm registers. mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. mov(var(rs_b), r10) // load rs_b //mov(var(cs_b), r11) // load cs_b lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) //lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) // NOTE: We cannot pre-load elements of a or b // because it could eventually, in the last // unrolled iter or the cleanup loop, result // in reading beyond the bounds allocated mem // (the likely result: a segmentation fault). mov(var(c), rcx) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) #if 0 lea(mem(rcx, rdi, 2), rdx) // lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 7*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 7*8)) // prefetch c + 2*rs_c prefetch(0, mem(rdx, 7*8)) // prefetch c + 3*rs_c prefetch(0, mem(rdx, rdi, 1, 7*8)) // prefetch c + 4*rs_c prefetch(0, mem(rdx, rdi, 2, 7*8)) // prefetch c + 5*rs_c #else cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLPFETCH) // jump to column storage case label(.DROWPFETCH) // row-stored prefetching on c lea(mem(rcx, rdi, 2), rdx) // lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 1*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 1*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 1*8)) // prefetch c + 2*rs_c prefetch(0, mem(rdx, 1*8)) // prefetch c + 3*rs_c prefetch(0, mem(rdx, rdi, 1, 1*8)) // prefetch c + 4*rs_c jmp(.DPOSTPFETCH) // jump to end of prefetching c label(.DCOLPFETCH) // column-stored prefetching c mov(var(cs_c), rsi) // load cs_c to rsi (temporarily) lea(mem(, rsi, 8), rsi) // cs_c *= sizeof(double) prefetch(0, mem(rcx, 4*8)) // prefetch c + 0*cs_c prefetch(0, mem(rcx, rsi, 1, 4*8)) // prefetch c + 1*cs_c label(.DPOSTPFETCH) // done prefetching c #endif mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.DLOOPKITER) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rbx, 0*32), xmm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(xmm0, xmm2, xmm4) vfmadd231pd(xmm0, xmm3, xmm6) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(xmm0, xmm2, xmm8) vfmadd231pd(xmm0, xmm3, xmm10) vbroadcastsd(mem(rax, r8, 4), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(xmm0, xmm2, xmm12) // ---------------------------------- iteration 1 vmovupd(mem(rbx, 0*32), xmm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(xmm0, xmm2, xmm4) vfmadd231pd(xmm0, xmm3, xmm6) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(xmm0, xmm2, xmm8) vfmadd231pd(xmm0, xmm3, xmm10) vbroadcastsd(mem(rax, r8, 4), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(xmm0, xmm2, xmm12) // ---------------------------------- iteration 2 vmovupd(mem(rbx, 0*32), xmm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(xmm0, xmm2, xmm4) vfmadd231pd(xmm0, xmm3, xmm6) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(xmm0, xmm2, xmm8) vfmadd231pd(xmm0, xmm3, xmm10) vbroadcastsd(mem(rax, r8, 4), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(xmm0, xmm2, xmm12) // ---------------------------------- iteration 3 vmovupd(mem(rbx, 0*32), xmm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(xmm0, xmm2, xmm4) vfmadd231pd(xmm0, xmm3, xmm6) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(xmm0, xmm2, xmm8) vfmadd231pd(xmm0, xmm3, xmm10) vbroadcastsd(mem(rax, r8, 4), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(xmm0, xmm2, xmm12) dec(rsi) // i -= 1; jne(.DLOOPKITER) // iterate again if i != 0. label(.DCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.DLOOPKLEFT) // EDGE LOOP vmovupd(mem(rbx, 0*32), xmm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(xmm0, xmm2, xmm4) vfmadd231pd(xmm0, xmm3, xmm6) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(xmm0, xmm2, xmm8) vfmadd231pd(xmm0, xmm3, xmm10) vbroadcastsd(mem(rax, r8, 4), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(xmm0, xmm2, xmm12) dec(rsi) // i -= 1; jne(.DLOOPKLEFT) // iterate again if i != 0. label(.DPOSTACCUM) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(xmm0, xmm4, xmm4) // scale by alpha vmulpd(xmm0, xmm6, xmm6) vmulpd(xmm0, xmm8, xmm8) vmulpd(xmm0, xmm10, xmm10) vmulpd(xmm0, xmm12, xmm12) mov(var(cs_c), rsi) // load cs_c lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) //lea(mem(rcx, rsi, 4), rdx) // load address of c + 4*cs_c; lea(mem(rcx, rdi, 4), rdx) // load address of c + 4*rs_c; //lea(mem(rsi, rsi, 2), rax) // r13 = 3*cs_c; // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORED) // jump to column storage case label(.DROWSTORED) vfmadd231pd(mem(rcx), xmm3, xmm4) vmovupd(xmm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm6) vmovupd(xmm6, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm8) vmovupd(xmm8, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm10) vmovupd(xmm10, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm12) vmovupd(xmm12, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORED) vunpcklpd(xmm6, xmm4, xmm0) vunpckhpd(xmm6, xmm4, xmm1) vunpcklpd(xmm10, xmm8, xmm2) vunpckhpd(xmm10, xmm8, xmm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) vbroadcastsd(mem(rbx), ymm3) vfmadd231pd(mem(rcx), ymm3, ymm4) vfmadd231pd(mem(rcx, rsi, 1), ymm3, ymm6) vmovupd(ymm4, mem(rcx)) vmovupd(ymm6, mem(rcx, rsi, 1)) //lea(mem(rcx, rsi, 4), rcx) #if 0 vunpcklpd(xmm14, xmm12, xmm0) vunpckhpd(xmm14, xmm12, xmm1) vfmadd231pd(mem(rdx), xmm3, xmm0) vfmadd231pd(mem(rdx, rsi, 1), xmm3, xmm1) vmovupd(xmm0, mem(rdx)) vmovupd(xmm1, mem(rdx, rsi, 1)) #else vmovlpd(mem(rdx), xmm0, xmm0) vmovhpd(mem(rdx, rsi, 1), xmm0, xmm0) vfmadd213pd(xmm12, xmm3, xmm0) vmovlpd(xmm0, mem(rdx)) vmovhpd(xmm0, mem(rdx, rsi, 1)) #endif //lea(mem(rdx, rsi, 4), rdx) jmp(.DDONE) // jump to end. label(.DBETAZERO) cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORBZ) // jump to column storage case label(.DROWSTORBZ) vmovupd(xmm4, mem(rcx)) add(rdi, rcx) vmovupd(xmm6, mem(rcx)) add(rdi, rcx) vmovupd(xmm8, mem(rcx)) add(rdi, rcx) vmovupd(xmm10, mem(rcx)) add(rdi, rcx) vmovupd(xmm12, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORBZ) vunpcklpd(xmm6, xmm4, xmm0) vunpckhpd(xmm6, xmm4, xmm1) vunpcklpd(xmm10, xmm8, xmm2) vunpckhpd(xmm10, xmm8, xmm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) vmovupd(ymm4, mem(rcx)) vmovupd(ymm6, mem(rcx, rsi, 1)) //lea(mem(rcx, rsi, 4), rcx) #if 0 vunpcklpd(xmm14, xmm12, xmm0) vunpckhpd(xmm14, xmm12, xmm1) vmovupd(xmm0, mem(rdx)) vmovupd(xmm1, mem(rdx, rsi, 1)) #else vmovupd(xmm12, xmm0) vmovlpd(xmm0, mem(rdx)) vmovhpd(xmm0, mem(rdx, rsi, 1)) #endif //lea(mem(rdx, rsi, 4), rdx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter] "m" (k_iter), [k_left] "m" (k_left), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rv_haswell_asm_4x2 ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() vzeroall() // zero all xmm/ymm registers. mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. mov(var(rs_b), r10) // load rs_b //mov(var(cs_b), r11) // load cs_b lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) //lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) // NOTE: We cannot pre-load elements of a or b // because it could eventually, in the last // unrolled iter or the cleanup loop, result // in reading beyond the bounds allocated mem // (the likely result: a segmentation fault). mov(var(c), rcx) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) #if 0 lea(mem(rcx, rdi, 2), rdx) // lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 7*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 7*8)) // prefetch c + 2*rs_c prefetch(0, mem(rdx, 7*8)) // prefetch c + 3*rs_c #else cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLPFETCH) // jump to column storage case label(.DROWPFETCH) // row-stored prefetching on c lea(mem(rcx, rdi, 2), rdx) // lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 1*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 1*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 1*8)) // prefetch c + 2*rs_c prefetch(0, mem(rdx, 1*8)) // prefetch c + 3*rs_c jmp(.DPOSTPFETCH) // jump to end of prefetching c label(.DCOLPFETCH) // column-stored prefetching c mov(var(cs_c), rsi) // load cs_c to rsi (temporarily) lea(mem(, rsi, 8), rsi) // cs_c *= sizeof(double) lea(mem(rcx, rsi, 2), rdx) // lea(mem(rdx, rsi, 1), rdx) // rdx = c + 3*cs_c; prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*cs_c prefetch(0, mem(rcx, rsi, 1, 3*8)) // prefetch c + 1*cs_c label(.DPOSTPFETCH) // done prefetching c #endif mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.DLOOPKITER) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rbx, 0*32), xmm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(xmm0, xmm2, xmm4) vfmadd231pd(xmm0, xmm3, xmm6) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(xmm0, xmm2, xmm8) vfmadd231pd(xmm0, xmm3, xmm10) // ---------------------------------- iteration 1 vmovupd(mem(rbx, 0*32), xmm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(xmm0, xmm2, xmm4) vfmadd231pd(xmm0, xmm3, xmm6) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(xmm0, xmm2, xmm8) vfmadd231pd(xmm0, xmm3, xmm10) // ---------------------------------- iteration 2 vmovupd(mem(rbx, 0*32), xmm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(xmm0, xmm2, xmm4) vfmadd231pd(xmm0, xmm3, xmm6) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(xmm0, xmm2, xmm8) vfmadd231pd(xmm0, xmm3, xmm10) // ---------------------------------- iteration 3 vmovupd(mem(rbx, 0*32), xmm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(xmm0, xmm2, xmm4) vfmadd231pd(xmm0, xmm3, xmm6) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(xmm0, xmm2, xmm8) vfmadd231pd(xmm0, xmm3, xmm10) dec(rsi) // i -= 1; jne(.DLOOPKITER) // iterate again if i != 0. label(.DCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.DLOOPKLEFT) // EDGE LOOP vmovupd(mem(rbx, 0*32), xmm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(xmm0, xmm2, xmm4) vfmadd231pd(xmm0, xmm3, xmm6) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(xmm0, xmm2, xmm8) vfmadd231pd(xmm0, xmm3, xmm10) dec(rsi) // i -= 1; jne(.DLOOPKLEFT) // iterate again if i != 0. label(.DPOSTACCUM) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(xmm0, xmm4, xmm4) // scale by alpha vmulpd(xmm0, xmm6, xmm6) vmulpd(xmm0, xmm8, xmm8) vmulpd(xmm0, xmm10, xmm10) mov(var(cs_c), rsi) // load cs_c lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) //lea(mem(rcx, rsi, 4), rdx) // load address of c + 4*cs_c; //lea(mem(rcx, rdi, 4), r14) // load address of c + 4*rs_c; //lea(mem(rsi, rsi, 2), rax) // rax = 3*cs_c; // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORED) // jump to column storage case label(.DROWSTORED) vfmadd231pd(mem(rcx), xmm3, xmm4) vmovupd(xmm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm6) vmovupd(xmm6, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm8) vmovupd(xmm8, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm10) vmovupd(xmm10, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORED) vunpcklpd(xmm6, xmm4, xmm0) vunpckhpd(xmm6, xmm4, xmm1) vunpcklpd(xmm10, xmm8, xmm2) vunpckhpd(xmm10, xmm8, xmm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) vbroadcastsd(mem(rbx), ymm3) vfmadd231pd(mem(rcx), ymm3, ymm4) vfmadd231pd(mem(rcx, rsi, 1), ymm3, ymm6) vmovupd(ymm4, mem(rcx)) vmovupd(ymm6, mem(rcx, rsi, 1)) //lea(mem(rcx, rsi, 4), rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORBZ) // jump to column storage case label(.DROWSTORBZ) vmovupd(xmm4, mem(rcx)) add(rdi, rcx) vmovupd(xmm6, mem(rcx)) add(rdi, rcx) vmovupd(xmm8, mem(rcx)) add(rdi, rcx) vmovupd(xmm10, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORBZ) vunpcklpd(xmm6, xmm4, xmm0) vunpckhpd(xmm6, xmm4, xmm1) vunpcklpd(xmm10, xmm8, xmm2) vunpckhpd(xmm10, xmm8, xmm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) vmovupd(ymm4, mem(rcx)) vmovupd(ymm6, mem(rcx, rsi, 1)) //lea(mem(rcx, rsi, 4), rcx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter] "m" (k_iter), [k_left] "m" (k_left), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rv_haswell_asm_3x2 ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() vzeroall() // zero all xmm/ymm registers. mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. mov(var(rs_b), r10) // load rs_b //mov(var(cs_b), r11) // load cs_b lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) //lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) // NOTE: We cannot pre-load elements of a or b // because it could eventually, in the last // unrolled iter or the cleanup loop, result // in reading beyond the bounds allocated mem // (the likely result: a segmentation fault). mov(var(c), rcx) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) #if 0 lea(mem(rcx, rdi, 2), rdx) // lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 7*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 7*8)) // prefetch c + 2*rs_c prefetch(0, mem(rdx, 7*8)) // prefetch c + 3*rs_c #else cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLPFETCH) // jump to column storage case label(.DROWPFETCH) // row-stored prefetching on c //lea(mem(rcx, rdi, 2), rdx) // //lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 1*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 1*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 1*8)) // prefetch c + 2*rs_c jmp(.DPOSTPFETCH) // jump to end of prefetching c label(.DCOLPFETCH) // column-stored prefetching c mov(var(cs_c), rsi) // load cs_c to rsi (temporarily) lea(mem(, rsi, 8), rsi) // cs_c *= sizeof(double) //lea(mem(rcx, rsi, 2), rdx) // //lea(mem(rdx, rsi, 1), rdx) // rdx = c + 3*cs_c; prefetch(0, mem(rcx, 2*8)) // prefetch c + 0*cs_c prefetch(0, mem(rcx, rsi, 1, 2*8)) // prefetch c + 1*cs_c label(.DPOSTPFETCH) // done prefetching c #endif mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.DLOOPKITER) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rbx, 0*32), xmm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(xmm0, xmm2, xmm4) vfmadd231pd(xmm0, xmm3, xmm6) vbroadcastsd(mem(rax, r8, 2), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(xmm0, xmm2, xmm8) // ---------------------------------- iteration 1 vmovupd(mem(rbx, 0*32), xmm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(xmm0, xmm2, xmm4) vfmadd231pd(xmm0, xmm3, xmm6) vbroadcastsd(mem(rax, r8, 2), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(xmm0, xmm2, xmm8) // ---------------------------------- iteration 2 vmovupd(mem(rbx, 0*32), xmm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(xmm0, xmm2, xmm4) vfmadd231pd(xmm0, xmm3, xmm6) vbroadcastsd(mem(rax, r8, 2), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(xmm0, xmm2, xmm8) // ---------------------------------- iteration 3 vmovupd(mem(rbx, 0*32), xmm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(xmm0, xmm2, xmm4) vfmadd231pd(xmm0, xmm3, xmm6) vbroadcastsd(mem(rax, r8, 2), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(xmm0, xmm2, xmm8) dec(rsi) // i -= 1; jne(.DLOOPKITER) // iterate again if i != 0. label(.DCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.DLOOPKLEFT) // EDGE LOOP vmovupd(mem(rbx, 0*32), xmm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(xmm0, xmm2, xmm4) vfmadd231pd(xmm0, xmm3, xmm6) vbroadcastsd(mem(rax, r8, 2), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(xmm0, xmm2, xmm8) dec(rsi) // i -= 1; jne(.DLOOPKLEFT) // iterate again if i != 0. label(.DPOSTACCUM) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(xmm0, xmm4, xmm4) // scale by alpha vmulpd(xmm0, xmm6, xmm6) vmulpd(xmm0, xmm8, xmm8) mov(var(cs_c), rsi) // load cs_c lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) //lea(mem(rcx, rsi, 4), rdx) // load address of c + 4*cs_c; lea(mem(rcx, rdi, 2), rdx) // load address of c + 2*rs_c; lea(mem(rsi, rsi, 2), rax) // rax = 3*cs_c; // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORED) // jump to column storage case label(.DROWSTORED) vfmadd231pd(mem(rcx), xmm3, xmm4) vmovupd(xmm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm6) vmovupd(xmm6, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm8) vmovupd(xmm8, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORED) vunpcklpd(ymm6, ymm4, ymm0) vunpckhpd(ymm6, ymm4, ymm1) vunpcklpd(ymm10, ymm8, ymm2) vunpckhpd(ymm10, ymm8, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) //vperm2f128(imm(0x31), ymm2, ymm0, ymm8) //vperm2f128(imm(0x31), ymm3, ymm1, ymm10) vextractf128(imm(0x1), ymm4, xmm12) vextractf128(imm(0x1), ymm6, xmm13) //vextractf128(imm(0x1), ymm8, xmm14) //vextractf128(imm(0x1), ymm10, xmm15) vbroadcastsd(mem(rbx), ymm3) vfmadd231pd(mem(rcx), xmm3, xmm4) vfmadd231pd(mem(rcx, rsi, 1), xmm3, xmm6) //vfmadd231pd(mem(rcx, rsi, 2), xmm3, xmm8) //vfmadd231pd(mem(rcx, rax, 1), xmm3, xmm10) vmovupd(xmm4, mem(rcx)) vmovupd(xmm6, mem(rcx, rsi, 1)) //vmovupd(xmm8, mem(rcx, rsi, 2)) //vmovupd(xmm10, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) vfmadd231sd(mem(rdx), xmm3, xmm12) vfmadd231sd(mem(rdx, rsi, 1), xmm3, xmm13) //vfmadd231sd(mem(rdx, rsi, 2), xmm3, xmm14) //vfmadd231sd(mem(rdx, rax, 1), xmm3, xmm15) vmovsd(xmm12, mem(rdx)) vmovsd(xmm13, mem(rdx, rsi, 1)) //vmovsd(xmm14, mem(rdx, rsi, 2)) //vmovsd(xmm15, mem(rdx, rax, 1)) //lea(mem(rdx, rsi, 4), rdx) jmp(.DDONE) // jump to end. label(.DBETAZERO) cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORBZ) // jump to column storage case label(.DROWSTORBZ) vmovupd(xmm4, mem(rcx)) add(rdi, rcx) vmovupd(xmm6, mem(rcx)) add(rdi, rcx) vmovupd(xmm8, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORBZ) vunpcklpd(ymm6, ymm4, ymm0) vunpckhpd(ymm6, ymm4, ymm1) vunpcklpd(ymm10, ymm8, ymm2) vunpckhpd(ymm10, ymm8, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) //vperm2f128(imm(0x31), ymm2, ymm0, ymm8) //vperm2f128(imm(0x31), ymm3, ymm1, ymm10) vextractf128(imm(0x1), ymm4, xmm12) vextractf128(imm(0x1), ymm6, xmm13) //vextractf128(imm(0x1), ymm8, xmm14) //vextractf128(imm(0x1), ymm10, xmm15) vmovupd(xmm4, mem(rcx)) vmovupd(xmm6, mem(rcx, rsi, 1)) //vmovupd(xmm8, mem(rcx, rsi, 2)) //vmovupd(xmm10, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) vmovsd(xmm12, mem(rdx)) vmovsd(xmm13, mem(rdx, rsi, 1)) //vmovsd(xmm14, mem(rdx, rsi, 2)) //vmovsd(xmm15, mem(rdx, rax, 1)) //lea(mem(rdx, rsi, 4), rdx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter] "m" (k_iter), [k_left] "m" (k_left), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rv_haswell_asm_2x2 ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() vzeroall() // zero all xmm/ymm registers. mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. mov(var(rs_b), r10) // load rs_b //mov(var(cs_b), r11) // load cs_b lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) //lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) // NOTE: We cannot pre-load elements of a or b // because it could eventually, in the last // unrolled iter or the cleanup loop, result // in reading beyond the bounds allocated mem // (the likely result: a segmentation fault). mov(var(c), rcx) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) #if 0 lea(mem(rcx, rdi, 2), rdx) // //lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 7*8)) // prefetch c + 1*rs_c #else cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLPFETCH) // jump to column storage case label(.DROWPFETCH) // row-stored prefetching on c //lea(mem(rcx, rdi, 2), rdx) // //lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 1*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 1*8)) // prefetch c + 1*rs_c jmp(.DPOSTPFETCH) // jump to end of prefetching c label(.DCOLPFETCH) // column-stored prefetching c mov(var(cs_c), rsi) // load cs_c to rsi (temporarily) lea(mem(, rsi, 8), rsi) // cs_c *= sizeof(double) //lea(mem(rcx, rsi, 2), rdx) // //lea(mem(rdx, rsi, 1), rdx) // rdx = c + 3*cs_c; prefetch(0, mem(rcx, 1*8)) // prefetch c + 0*cs_c prefetch(0, mem(rcx, rsi, 1, 1*8)) // prefetch c + 1*cs_c label(.DPOSTPFETCH) // done prefetching c #endif mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.DLOOPKITER) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rbx, 0*32), xmm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(xmm0, xmm2, xmm4) vfmadd231pd(xmm0, xmm3, xmm6) // ---------------------------------- iteration 1 vmovupd(mem(rbx, 0*32), xmm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(xmm0, xmm2, xmm4) vfmadd231pd(xmm0, xmm3, xmm6) // ---------------------------------- iteration 2 vmovupd(mem(rbx, 0*32), xmm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(xmm0, xmm2, xmm4) vfmadd231pd(xmm0, xmm3, xmm6) // ---------------------------------- iteration 3 vmovupd(mem(rbx, 0*32), xmm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(xmm0, xmm2, xmm4) vfmadd231pd(xmm0, xmm3, xmm6) dec(rsi) // i -= 1; jne(.DLOOPKITER) // iterate again if i != 0. label(.DCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.DLOOPKLEFT) // EDGE LOOP vmovupd(mem(rbx, 0*32), xmm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(xmm0, xmm2, xmm4) vfmadd231pd(xmm0, xmm3, xmm6) dec(rsi) // i -= 1; jne(.DLOOPKLEFT) // iterate again if i != 0. label(.DPOSTACCUM) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(xmm0, xmm4, xmm4) // scale by alpha vmulpd(xmm0, xmm6, xmm6) mov(var(cs_c), rsi) // load cs_c lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) //lea(mem(rcx, rsi, 4), rdx) // load address of c + 4*cs_c; //lea(mem(rcx, rdi, 4), r14) // load address of c + 4*rs_c; //lea(mem(rsi, rsi, 2), rax) // rax = 3*cs_c; // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORED) // jump to column storage case label(.DROWSTORED) vfmadd231pd(mem(rcx), xmm3, xmm4) vmovupd(xmm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm6) vmovupd(xmm6, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORED) vunpcklpd(xmm6, xmm4, xmm0) vunpckhpd(xmm6, xmm4, xmm1) vfmadd231pd(mem(rcx), xmm3, xmm0) vfmadd231pd(mem(rcx, rsi, 1), xmm3, xmm1) vmovupd(xmm0, mem(rcx)) vmovupd(xmm1, mem(rcx, rsi, 1)) //lea(mem(rcx, rsi, 4), rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORBZ) // jump to column storage case label(.DROWSTORBZ) vmovupd(xmm4, mem(rcx)) add(rdi, rcx) vmovupd(xmm6, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORBZ) vunpcklpd(xmm6, xmm4, xmm0) vunpckhpd(xmm6, xmm4, xmm1) vmovupd(xmm0, mem(rcx)) vmovupd(xmm1, mem(rcx, rsi, 1)) //lea(mem(rcx, rsi, 4), rcx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter] "m" (k_iter), [k_left] "m" (k_left), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rv_haswell_asm_1x2 ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() vzeroall() // zero all xmm/ymm registers. mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. mov(var(rs_b), r10) // load rs_b //mov(var(cs_b), r11) // load cs_b lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) //lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) // NOTE: We cannot pre-load elements of a or b // because it could eventually, in the last // unrolled iter or the cleanup loop, result // in reading beyond the bounds allocated mem // (the likely result: a segmentation fault). mov(var(c), rcx) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) #if 0 lea(mem(rcx, rdi, 2), rdx) // //lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c #else cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLPFETCH) // jump to column storage case label(.DROWPFETCH) // row-stored prefetching on c //lea(mem(rcx, rdi, 2), rdx) // //lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 1*8)) // prefetch c + 0*rs_c jmp(.DPOSTPFETCH) // jump to end of prefetching c label(.DCOLPFETCH) // column-stored prefetching c mov(var(cs_c), rsi) // load cs_c to rsi (temporarily) lea(mem(, rsi, 8), rsi) // cs_c *= sizeof(double) //lea(mem(rcx, rsi, 2), rdx) // //lea(mem(rdx, rsi, 1), rdx) // rdx = c + 3*cs_c; prefetch(0, mem(rcx, 0*8)) // prefetch c + 0*cs_c prefetch(0, mem(rcx, rsi, 1, 0*8)) // prefetch c + 1*cs_c label(.DPOSTPFETCH) // done prefetching c #endif mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.DLOOPKITER) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rbx, 0*32), xmm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(xmm0, xmm2, xmm4) // ---------------------------------- iteration 1 vmovupd(mem(rbx, 0*32), xmm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(xmm0, xmm2, xmm4) // ---------------------------------- iteration 2 vmovupd(mem(rbx, 0*32), xmm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(xmm0, xmm2, xmm4) // ---------------------------------- iteration 3 vmovupd(mem(rbx, 0*32), xmm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(xmm0, xmm2, xmm4) dec(rsi) // i -= 1; jne(.DLOOPKITER) // iterate again if i != 0. label(.DCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.DLOOPKLEFT) // EDGE LOOP vmovupd(mem(rbx, 0*32), xmm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(xmm0, xmm2, xmm4) dec(rsi) // i -= 1; jne(.DLOOPKLEFT) // iterate again if i != 0. label(.DPOSTACCUM) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(xmm0, xmm4, xmm4) // scale by alpha mov(var(cs_c), rsi) // load cs_c lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) //lea(mem(rcx, rsi, 4), rdx) // load address of c + 4*cs_c; //lea(mem(rcx, rdi, 4), r14) // load address of c + 4*rs_c; //lea(mem(rsi, rsi, 2), rax) // rax = 3*cs_c; // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORED) // jump to column storage case label(.DROWSTORED) vfmadd231pd(mem(rcx), xmm3, xmm4) vmovupd(xmm4, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORED) vmovlpd(mem(rcx), xmm0, xmm0) vmovhpd(mem(rcx, rsi, 1), xmm0, xmm0) vfmadd213pd(xmm4, xmm3, xmm0) vmovlpd(xmm0, mem(rcx)) vmovhpd(xmm0, mem(rcx, rsi, 1)) //lea(mem(rcx, rsi, 4), rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORBZ) // jump to column storage case label(.DROWSTORBZ) vmovupd(xmm4, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORBZ) vmovlpd(xmm4, mem(rcx)) vmovhpd(xmm4, mem(rcx, rsi, 1)) //lea(mem(rcx, rsi, 4), rcx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter] "m" (k_iter), [k_left] "m" (k_left), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } // ----------------------------------------------------------------------------- // NOTE: Normally, for any "?x1" kernel, we would call the reference kernel. // However, at least one other subconfiguration (zen) uses this kernel set, so // we need to be able to call a set of "?x1" kernels that we know will actually // exist regardless of which subconfiguration these kernels were used by. Thus, // the compromise employed here is to inline the reference kernel so it gets // compiled as part of the haswell kernel set, and hence can unconditionally be // called by other kernels within that kernel set. #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, mdim ) \ \ void PASTEMAC(ch,opname) \ ( \ conj_t conja, \ conj_t conjb, \ dim_t m, \ dim_t n, \ dim_t k, \ ctype* restrict alpha, \ ctype* restrict a, inc_t rs_a, inc_t cs_a, \ ctype* restrict b, inc_t rs_b, inc_t cs_b, \ ctype* restrict beta, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict data, \ cntx_t* restrict cntx \ ) \ { \ for ( dim_t i = 0; i < mdim; ++i ) \ { \ ctype* restrict ci = &c[ i*rs_c ]; \ ctype* restrict ai = &a[ i*rs_a ]; \ \ /* for ( dim_t j = 0; j < 1; ++j ) */ \ { \ ctype* restrict cij = ci /*[ j*cs_c ]*/ ; \ ctype* restrict bj = b /*[ j*cs_b ]*/ ; \ ctype ab; \ \ PASTEMAC(ch,set0s)( ab ); \ \ /* Perform a dot product to update the (i,j) element of c. */ \ for ( dim_t l = 0; l < k; ++l ) \ { \ ctype* restrict aij = &ai[ l*cs_a ]; \ ctype* restrict bij = &bj[ l*rs_b ]; \ \ PASTEMAC(ch,dots)( *aij, *bij, ab ); \ } \ \ /* If beta is one, add ab into c. If beta is zero, overwrite c with the result in ab. Otherwise, scale by beta and accumulate ab to c. */ \ if ( PASTEMAC(ch,eq1)( *beta ) ) \ { \ PASTEMAC(ch,axpys)( *alpha, ab, *cij ); \ } \ else if ( PASTEMAC(d,eq0)( *beta ) ) \ { \ PASTEMAC(ch,scal2s)( *alpha, ab, *cij ); \ } \ else \ { \ PASTEMAC(ch,axpbys)( *alpha, ab, *beta, *cij ); \ } \ } \ } \ } GENTFUNC( double, d, gemmsup_r_haswell_ref_6x1, 6 ) GENTFUNC( double, d, gemmsup_r_haswell_ref_5x1, 5 ) GENTFUNC( double, d, gemmsup_r_haswell_ref_4x1, 4 ) GENTFUNC( double, d, gemmsup_r_haswell_ref_3x1, 3 ) GENTFUNC( double, d, gemmsup_r_haswell_ref_2x1, 2 ) GENTFUNC( double, d, gemmsup_r_haswell_ref_1x1, 1 ) blis-0.6.1/kernels/haswell/3/sup/bli_gemmsup_rv_haswell_asm_d6x8m.c000066400000000000000000002560731360743507500253340ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define BLIS_ASM_SYNTAX_ATT #include "bli_x86_asm_macros.h" /* rrr: -------- ------ -------- -------- ------ -------- -------- += ------ ... -------- -------- ------ -------- -------- ------ : -------- ------ : rcr: -------- | | | | -------- -------- | | | | -------- -------- += | | | | ... -------- -------- | | | | -------- -------- | | | | : -------- | | | | : Assumptions: - B is row-stored; - A is row- or column-stored; - m0 and n0 are at most MR and NR, respectively. Therefore, this (r)ow-preferential kernel is well-suited for contiguous (v)ector loads on B and single-element broadcasts from A. NOTE: These kernels explicitly support column-oriented IO, implemented via an in-register transpose. And thus they also support the crr and ccr cases, though only crr is ever utilized (because ccr is handled by transposing the operation and executing rcr, which does not incur the cost of the in-register transpose). crr: | | | | | | | | ------ -------- | | | | | | | | ------ -------- | | | | | | | | += ------ ... -------- | | | | | | | | ------ -------- | | | | | | | | ------ : | | | | | | | | ------ : */ // Prototype reference microkernels. GEMMSUP_KER_PROT( double, d, gemmsup_r_haswell_ref ) #if 0 // Define parameters and variables for edge case kernel map. #define NUM_MR 4 #define NUM_NR 4 #define FUNCPTR_T dgemmsup_ker_ft static dim_t mrs[NUM_MR] = { 6, 4, 2, 1 }; static dim_t nrs[NUM_NR] = { 8, 4, 2, 1 }; static FUNCPTR_T kmap[NUM_MR][NUM_NR] = { /* 8 4 2 1 */ /* 6 */ { bli_dgemmsup_rv_haswell_asm_6x8m, bli_dgemmsup_rv_haswell_asm_6x4m, bli_dgemmsup_rv_haswell_asm_6x2m, bli_dgemmsup_r_haswell_ref_6x1 }, /* 4 */ { bli_dgemmsup_rv_haswell_asm_4x8m, bli_dgemmsup_rv_haswell_asm_4x4m, bli_dgemmsup_rv_haswell_asm_4x2m, bli_dgemmsup_r_haswell_ref_4x1 }, /* 2 */ { bli_dgemmsup_rv_haswell_asm_2x8m, bli_dgemmsup_rv_haswell_asm_2x4m, bli_dgemmsup_rv_haswell_asm_2x2m, bli_dgemmsup_r_haswell_ref_2x1 }, /* 1 */ { bli_dgemmsup_rv_haswell_asm_1x8m, bli_dgemmsup_rv_haswell_asm_1x4m, bli_dgemmsup_rv_haswell_asm_1x2m, bli_dgemmsup_r_haswell_ref_1x1 }, }; #endif void bli_dgemmsup_rv_haswell_asm_6x8m ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { uint64_t n_left = n0 % 8; // First check whether this is a edge case in the n dimension. If so, // dispatch other 6x?m kernels, as needed. if ( n_left ) { double* restrict cij = c; double* restrict bj = b; double* restrict ai = a; if ( 6 <= n_left ) { const dim_t nr_cur = 6; bli_dgemmsup_rv_haswell_asm_6x6m ( conja, conjb, m0, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += nr_cur*cs_c0; bj += nr_cur*cs_b0; n_left -= nr_cur; } if ( 4 <= n_left ) { const dim_t nr_cur = 4; bli_dgemmsup_rv_haswell_asm_6x4m ( conja, conjb, m0, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += nr_cur*cs_c0; bj += nr_cur*cs_b0; n_left -= nr_cur; } if ( 2 <= n_left ) { const dim_t nr_cur = 2; bli_dgemmsup_rv_haswell_asm_6x2m ( conja, conjb, m0, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += nr_cur*cs_c0; bj += nr_cur*cs_b0; n_left -= nr_cur; } if ( 1 == n_left ) { #if 0 const dim_t nr_cur = 1; bli_dgemmsup_r_haswell_ref ( conja, conjb, m0, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); #else dim_t ps_a0 = bli_auxinfo_ps_a( data ); if ( ps_a0 == 6 * rs_a0 ) { // Since A is not packed, we can use one gemv. bli_dgemv_ex ( BLIS_NO_TRANSPOSE, conjb, m0, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, beta, cij, rs_c0, cntx, NULL ); } else { const dim_t mr = 6; // Since A is packed into row panels, we must use a loop over // gemv. dim_t m_iter = ( m0 + mr - 1 ) / mr; dim_t m_left = m0 % mr; double* restrict ai_ii = ai; double* restrict cij_ii = cij; for ( dim_t ii = 0; ii < m_iter; ii += 1 ) { dim_t mr_cur = ( bli_is_not_edge_f( ii, m_iter, m_left ) ? mr : m_left ); bli_dgemv_ex ( BLIS_NO_TRANSPOSE, conjb, mr_cur, k0, alpha, ai_ii, rs_a0, cs_a0, bj, rs_b0, beta, cij_ii, rs_c0, cntx, NULL ); cij_ii += mr*rs_c0; ai_ii += ps_a0; } } #endif } return; } //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t m_iter = m0 / 6; uint64_t m_left = m0 % 6; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // Query the panel stride of A and convert it to units of bytes. uint64_t ps_a = bli_auxinfo_ps_a( data ); uint64_t ps_a8 = ps_a * sizeof( double ); if ( m_iter == 0 ) goto consider_edge_cases; // ------------------------------------------------------------------------- begin_asm() //vzeroall() // zero all xmm/ymm registers. mov(var(a), r14) // load address of a. mov(var(rs_a), r8) // load rs_a mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a //mov(var(b), rbx) // load address of b. mov(var(rs_b), r10) // load rs_b //mov(var(cs_b), r11) // load cs_b lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) //lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) // NOTE: We cannot pre-load elements of a or b // because it could eventually, in the last // unrolled iter or the cleanup loop, result // in reading beyond the bounds allocated mem // (the likely result: a segmentation fault). mov(var(c), r12) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) // During preamble and loops: // r12 = rcx = c // r14 = rax = a // read rbx from var(b) near beginning of loop // r11 = m dim index ii mov(var(m_iter), r11) // ii = m_iter; label(.DLOOP6X8I) // LOOP OVER ii = [ m_iter ... 1 0 ] #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm6, ymm6, ymm6) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm8, ymm8, ymm8) vxorpd(ymm9, ymm9, ymm9) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm11, ymm11, ymm11) vxorpd(ymm12, ymm12, ymm12) vxorpd(ymm13, ymm13, ymm13) vxorpd(ymm14, ymm14, ymm14) vxorpd(ymm15, ymm15, ymm15) #endif mov(var(b), rbx) // load address of b. //mov(r12, rcx) // reset rcx to current utile of c. mov(r14, rax) // reset rax to current upanel of a. cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLPFETCH) // jump to column storage case label(.DROWPFETCH) // row-stored prefetching on c lea(mem(r12, rdi, 2), rdx) // lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(r12, 7*8)) // prefetch c + 0*rs_c prefetch(0, mem(r12, rdi, 1, 7*8)) // prefetch c + 1*rs_c prefetch(0, mem(r12, rdi, 2, 7*8)) // prefetch c + 2*rs_c prefetch(0, mem(rdx, 7*8)) // prefetch c + 3*rs_c prefetch(0, mem(rdx, rdi, 1, 7*8)) // prefetch c + 4*rs_c prefetch(0, mem(rdx, rdi, 2, 7*8)) // prefetch c + 5*rs_c jmp(.DPOSTPFETCH) // jump to end of prefetching c label(.DCOLPFETCH) // column-stored prefetching c mov(var(cs_c), rsi) // load cs_c to rsi (temporarily) lea(mem(, rsi, 8), rsi) // cs_c *= sizeof(double) lea(mem(r12, rsi, 2), rdx) // lea(mem(rdx, rsi, 1), rdx) // rdx = c + 3*cs_c; prefetch(0, mem(r12, 5*8)) // prefetch c + 0*cs_c prefetch(0, mem(r12, rsi, 1, 5*8)) // prefetch c + 1*cs_c prefetch(0, mem(r12, rsi, 2, 5*8)) // prefetch c + 2*cs_c prefetch(0, mem(rdx, 5*8)) // prefetch c + 3*cs_c prefetch(0, mem(rdx, rsi, 1, 5*8)) // prefetch c + 4*cs_c prefetch(0, mem(rdx, rsi, 2, 5*8)) // prefetch c + 5*cs_c lea(mem(rdx, rsi, 2), rdx) // rdx = c + 5*cs_c; prefetch(0, mem(rdx, rsi, 1, 5*8)) // prefetch c + 6*cs_c prefetch(0, mem(rdx, rsi, 2, 5*8)) // prefetch c + 7*cs_c label(.DPOSTPFETCH) // done prefetching c #if 1 lea(mem(r9, r9, 2), rcx) // rcx = 3*cs_a; lea(mem(rax, r8, 4), rdx) // use rdx for prefetching lines lea(mem(rdx, r8, 2), rdx) // from next upanel of a. #else lea(mem(rax, r9, 8), rdx) // use rdx for prefetching a. lea(mem(rdx, r9, 8), rdx) // rdx = a + 16*cs_a; //mov(r9, rsi) // rsi = cs_a; //sal(imm(4), rsi) // rsi = 16*cs_a; //lea(mem(rax, rsi, 1), rdx) // rdx = a + 16*cs_a; #endif mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.DLOOPKITER) // MAIN LOOP // ---------------------------------- iteration 0 #if 1 prefetch(0, mem(rdx, 5*8)) //prefetch(0, mem(rax, 5*8)) #else prefetch(0, mem(rdx, 5*8)) #endif vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, r8, 4), ymm2) vbroadcastsd(mem(rax, r15, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) // ---------------------------------- iteration 1 #if 1 prefetch(0, mem(rdx, r9, 1, 5*8)) //prefetch(0, mem(rax, 5*8)) #else #endif vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, r8, 4), ymm2) vbroadcastsd(mem(rax, r15, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) // ---------------------------------- iteration 2 #if 1 prefetch(0, mem(rdx, r9, 2, 5*8)) //prefetch(0, mem(rax, 5*8)) #else prefetch(0, mem(rdx, r9, 2, 5*8)) //prefetch(0, mem(rdx, r9, 2)) //lea(mem(rdx, r9, 4), rdx) // rdx += 4*cs_a; #endif vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, r8, 4), ymm2) vbroadcastsd(mem(rax, r15, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) // ---------------------------------- iteration 3 #if 1 prefetch(0, mem(rdx, rcx, 1, 5*8)) lea(mem(rdx, r9, 4), rdx) // a_prefetch += 4*cs_a; //prefetch(0, mem(rax, 5*8)) #else lea(mem(rdx, r9, 4), rdx) // a_prefetch += 4*cs_a; #endif vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, r8, 4), ymm2) vbroadcastsd(mem(rax, r15, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER) // iterate again if i != 0. label(.DCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.DLOOPKLEFT) // EDGE LOOP vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, r8, 4), ymm2) vbroadcastsd(mem(rax, r15, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKLEFT) // iterate again if i != 0. label(.DPOSTACCUM) mov(r12, rcx) // reset rcx to current utile of c. mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) vmulpd(ymm0, ymm6, ymm6) vmulpd(ymm0, ymm7, ymm7) vmulpd(ymm0, ymm8, ymm8) vmulpd(ymm0, ymm9, ymm9) vmulpd(ymm0, ymm10, ymm10) vmulpd(ymm0, ymm11, ymm11) vmulpd(ymm0, ymm12, ymm12) vmulpd(ymm0, ymm13, ymm13) vmulpd(ymm0, ymm14, ymm14) vmulpd(ymm0, ymm15, ymm15) mov(var(cs_c), rsi) // load cs_c lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) //lea(mem(rcx, rsi, 4), rdx) // load address of c + 4*cs_c; lea(mem(rcx, rdi, 4), rdx) // load address of c + 4*rs_c; lea(mem(rsi, rsi, 2), rax) // rax = 3*cs_c; // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORED) // jump to column storage case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), ymm3, ymm5) vmovupd(ymm5, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm6) vmovupd(ymm6, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), ymm3, ymm7) vmovupd(ymm7, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm8) vmovupd(ymm8, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), ymm3, ymm9) vmovupd(ymm9, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm10) vmovupd(ymm10, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), ymm3, ymm11) vmovupd(ymm11, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm12) vmovupd(ymm12, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), ymm3, ymm13) vmovupd(ymm13, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm14) vmovupd(ymm14, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), ymm3, ymm15) vmovupd(ymm15, mem(rcx, rsi, 4)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORED) vunpcklpd(ymm6, ymm4, ymm0) vunpckhpd(ymm6, ymm4, ymm1) vunpcklpd(ymm10, ymm8, ymm2) vunpckhpd(ymm10, ymm8, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) vperm2f128(imm(0x31), ymm2, ymm0, ymm8) vperm2f128(imm(0x31), ymm3, ymm1, ymm10) vbroadcastsd(mem(rbx), ymm3) vfmadd231pd(mem(rcx), ymm3, ymm4) vfmadd231pd(mem(rcx, rsi, 1), ymm3, ymm6) vfmadd231pd(mem(rcx, rsi, 2), ymm3, ymm8) vfmadd231pd(mem(rcx, rax, 1), ymm3, ymm10) vmovupd(ymm4, mem(rcx)) vmovupd(ymm6, mem(rcx, rsi, 1)) vmovupd(ymm8, mem(rcx, rsi, 2)) vmovupd(ymm10, mem(rcx, rax, 1)) lea(mem(rcx, rsi, 4), rcx) vunpcklpd(ymm14, ymm12, ymm0) vunpckhpd(ymm14, ymm12, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm4) vfmadd231pd(mem(rdx), xmm3, xmm0) vfmadd231pd(mem(rdx, rsi, 1), xmm3, xmm1) vfmadd231pd(mem(rdx, rsi, 2), xmm3, xmm2) vfmadd231pd(mem(rdx, rax, 1), xmm3, xmm4) vmovupd(xmm0, mem(rdx)) vmovupd(xmm1, mem(rdx, rsi, 1)) vmovupd(xmm2, mem(rdx, rsi, 2)) vmovupd(xmm4, mem(rdx, rax, 1)) lea(mem(rdx, rsi, 4), rdx) vunpcklpd(ymm7, ymm5, ymm0) vunpckhpd(ymm7, ymm5, ymm1) vunpcklpd(ymm11, ymm9, ymm2) vunpckhpd(ymm11, ymm9, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm5) vinsertf128(imm(0x1), xmm3, ymm1, ymm7) vperm2f128(imm(0x31), ymm2, ymm0, ymm9) vperm2f128(imm(0x31), ymm3, ymm1, ymm11) vbroadcastsd(mem(rbx), ymm3) vfmadd231pd(mem(rcx), ymm3, ymm5) vfmadd231pd(mem(rcx, rsi, 1), ymm3, ymm7) vfmadd231pd(mem(rcx, rsi, 2), ymm3, ymm9) vfmadd231pd(mem(rcx, rax, 1), ymm3, ymm11) vmovupd(ymm5, mem(rcx)) vmovupd(ymm7, mem(rcx, rsi, 1)) vmovupd(ymm9, mem(rcx, rsi, 2)) vmovupd(ymm11, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) vunpcklpd(ymm15, ymm13, ymm0) vunpckhpd(ymm15, ymm13, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm4) vfmadd231pd(mem(rdx), xmm3, xmm0) vfmadd231pd(mem(rdx, rsi, 1), xmm3, xmm1) vfmadd231pd(mem(rdx, rsi, 2), xmm3, xmm2) vfmadd231pd(mem(rdx, rax, 1), xmm3, xmm4) vmovupd(xmm0, mem(rdx)) vmovupd(xmm1, mem(rdx, rsi, 1)) vmovupd(xmm2, mem(rdx, rsi, 2)) vmovupd(xmm4, mem(rdx, rax, 1)) //lea(mem(rdx, rsi, 4), rdx) jmp(.DDONE) // jump to end. label(.DBETAZERO) cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORBZ) // jump to column storage case label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) vmovupd(ymm5, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm6, mem(rcx)) vmovupd(ymm7, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm8, mem(rcx)) vmovupd(ymm9, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm10, mem(rcx)) vmovupd(ymm11, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm12, mem(rcx)) vmovupd(ymm13, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm14, mem(rcx)) vmovupd(ymm15, mem(rcx, rsi, 4)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORBZ) vunpcklpd(ymm6, ymm4, ymm0) vunpckhpd(ymm6, ymm4, ymm1) vunpcklpd(ymm10, ymm8, ymm2) vunpckhpd(ymm10, ymm8, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) vperm2f128(imm(0x31), ymm2, ymm0, ymm8) vperm2f128(imm(0x31), ymm3, ymm1, ymm10) vmovupd(ymm4, mem(rcx)) vmovupd(ymm6, mem(rcx, rsi, 1)) vmovupd(ymm8, mem(rcx, rsi, 2)) vmovupd(ymm10, mem(rcx, rax, 1)) lea(mem(rcx, rsi, 4), rcx) vunpcklpd(ymm14, ymm12, ymm0) vunpckhpd(ymm14, ymm12, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm4) vmovupd(xmm0, mem(rdx)) vmovupd(xmm1, mem(rdx, rsi, 1)) vmovupd(xmm2, mem(rdx, rsi, 2)) vmovupd(xmm4, mem(rdx, rax, 1)) lea(mem(rdx, rsi, 4), rdx) vunpcklpd(ymm7, ymm5, ymm0) vunpckhpd(ymm7, ymm5, ymm1) vunpcklpd(ymm11, ymm9, ymm2) vunpckhpd(ymm11, ymm9, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm5) vinsertf128(imm(0x1), xmm3, ymm1, ymm7) vperm2f128(imm(0x31), ymm2, ymm0, ymm9) vperm2f128(imm(0x31), ymm3, ymm1, ymm11) vmovupd(ymm5, mem(rcx)) vmovupd(ymm7, mem(rcx, rsi, 1)) vmovupd(ymm9, mem(rcx, rsi, 2)) vmovupd(ymm11, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) vunpcklpd(ymm15, ymm13, ymm0) vunpckhpd(ymm15, ymm13, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm4) vmovupd(xmm0, mem(rdx)) vmovupd(xmm1, mem(rdx, rsi, 1)) vmovupd(xmm2, mem(rdx, rsi, 2)) vmovupd(xmm4, mem(rdx, rax, 1)) //lea(mem(rdx, rsi, 4), rdx) label(.DDONE) lea(mem(r12, rdi, 4), r12) // lea(mem(r12, rdi, 2), r12) // c_ii = r12 += 6*rs_c //lea(mem(r14, r8, 4), r14) // //lea(mem(r14, r8, 2), r14) // a_ii = r14 += 6*rs_a mov(var(ps_a8), rax) // load ps_a8 lea(mem(r14, rax, 1), r14) // a_ii = r14 += ps_a8 dec(r11) // ii -= 1; jne(.DLOOP6X8I) // iterate again if ii != 0. label(.DRETURN) end_asm( : // output operands (none) : // input operands [m_iter] "m" (m_iter), [k_iter] "m" (k_iter), [k_left] "m" (k_left), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [ps_a8] "m" (ps_a8), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) consider_edge_cases: // Handle edge cases in the m dimension, if they exist. if ( m_left ) { const dim_t nr_cur = 8; const dim_t i_edge = m0 - ( dim_t )m_left; double* restrict cij = c + i_edge*rs_c; //double* restrict ai = a + i_edge*rs_a; //double* restrict ai = a + ( i_edge / 6 ) * ps_a; double* restrict ai = a + m_iter * ps_a; double* restrict bj = b; #if 0 // We add special handling for slightly inflated MR blocksizes // at edge cases, up to a maximum of 9. if ( 6 < m_left ) { dgemmsup_ker_ft ker_fp1 = NULL; dgemmsup_ker_ft ker_fp2 = NULL; dim_t mr1, mr2; if ( m_left == 7 ) { mr1 = 4; mr2 = 3; ker_fp1 = bli_dgemmsup_rv_haswell_asm_4x8; ker_fp2 = bli_dgemmsup_rv_haswell_asm_3x8; } else if ( m_left == 8 ) { mr1 = 4; mr2 = 4; ker_fp1 = bli_dgemmsup_rv_haswell_asm_4x8; ker_fp2 = bli_dgemmsup_rv_haswell_asm_4x8; } else // if ( m_left == 9 ) { mr1 = 4; mr2 = 5; ker_fp1 = bli_dgemmsup_rv_haswell_asm_4x8; ker_fp2 = bli_dgemmsup_rv_haswell_asm_5x8; } ker_fp1 ( conja, conjb, mr1, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += mr1*rs_c0; ai += mr1*rs_a0; ker_fp2 ( conja, conjb, mr2, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); return; } #endif #if 1 dgemmsup_ker_ft ker_fps[6] = { NULL, bli_dgemmsup_rv_haswell_asm_1x8, bli_dgemmsup_rv_haswell_asm_2x8, bli_dgemmsup_rv_haswell_asm_3x8, bli_dgemmsup_rv_haswell_asm_4x8, bli_dgemmsup_rv_haswell_asm_5x8 }; dgemmsup_ker_ft ker_fp = ker_fps[ m_left ]; ker_fp ( conja, conjb, m_left, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); return; #else if ( 5 <= m_left ) { const dim_t mr_cur = 5; bli_dgemmsup_rv_haswell_asm_5x8 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += mr_cur*rs_c0; ai += mr_cur*rs_a0; m_left -= mr_cur; } if ( 4 <= m_left ) { const dim_t mr_cur = 4; bli_dgemmsup_rv_haswell_asm_4x8 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += mr_cur*rs_c0; ai += mr_cur*rs_a0; m_left -= mr_cur; } if ( 3 <= m_left ) { const dim_t mr_cur = 3; bli_dgemmsup_rv_haswell_asm_3x8 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += mr_cur*rs_c0; ai += mr_cur*rs_a0; m_left -= mr_cur; } if ( 2 <= m_left ) { const dim_t mr_cur = 2; bli_dgemmsup_rv_haswell_asm_2x8 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += mr_cur*rs_c0; ai += mr_cur*rs_a0; m_left -= mr_cur; } if ( 1 == m_left ) { const dim_t mr_cur = 1; bli_dgemmsup_rv_haswell_asm_1x8 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); } #endif } } void bli_dgemmsup_rv_haswell_asm_6x6m ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t m_iter = m0 / 6; uint64_t m_left = m0 % 6; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // Query the panel stride of A and convert it to units of bytes. uint64_t ps_a = bli_auxinfo_ps_a( data ); uint64_t ps_a8 = ps_a * sizeof( double ); if ( m_iter == 0 ) goto consider_edge_cases; // ------------------------------------------------------------------------- begin_asm() //vzeroall() // zero all xmm/ymm registers. mov(var(a), r14) // load address of a. mov(var(rs_a), r8) // load rs_a mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a //mov(var(b), rbx) // load address of b. mov(var(rs_b), r10) // load rs_b //mov(var(cs_b), r11) // load cs_b lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) //lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) // NOTE: We cannot pre-load elements of a or b // because it could eventually, in the last // unrolled iter or the cleanup loop, result // in reading beyond the bounds allocated mem // (the likely result: a segmentation fault). mov(var(c), r12) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) // During preamble and loops: // r12 = rcx = c // r14 = rax = a // read rbx from var(b) near beginning of loop // r11 = m dim index ii mov(var(m_iter), r11) // ii = m_iter; label(.DLOOP6X8I) // LOOP OVER ii = [ m_iter ... 1 0 ] #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm1, ymm1, ymm1) // zero ymm1 since we only use the lower vxorpd(ymm4, ymm4, ymm4) // half (xmm1), and nans/infs may slow us vxorpd(ymm5, ymm5, ymm5) // down. vxorpd(ymm6, ymm6, ymm6) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm8, ymm8, ymm8) vxorpd(ymm9, ymm9, ymm9) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm11, ymm11, ymm11) vxorpd(ymm12, ymm12, ymm12) vxorpd(ymm13, ymm13, ymm13) vxorpd(ymm14, ymm14, ymm14) vxorpd(ymm15, ymm15, ymm15) #endif mov(var(b), rbx) // load address of b. //mov(r12, rcx) // reset rcx to current utile of c. mov(r14, rax) // reset rax to current upanel of a. cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLPFETCH) // jump to column storage case label(.DROWPFETCH) // row-stored prefetching on c lea(mem(r12, rdi, 2), rdx) // lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(r12, 5*8)) // prefetch c + 0*rs_c prefetch(0, mem(r12, rdi, 1, 5*8)) // prefetch c + 1*rs_c prefetch(0, mem(r12, rdi, 2, 5*8)) // prefetch c + 2*rs_c prefetch(0, mem(rdx, 5*8)) // prefetch c + 3*rs_c prefetch(0, mem(rdx, rdi, 1, 5*8)) // prefetch c + 4*rs_c prefetch(0, mem(rdx, rdi, 2, 5*8)) // prefetch c + 5*rs_c jmp(.DPOSTPFETCH) // jump to end of prefetching c label(.DCOLPFETCH) // column-stored prefetching c mov(var(cs_c), rsi) // load cs_c to rsi (temporarily) lea(mem(, rsi, 8), rsi) // cs_c *= sizeof(double) lea(mem(r12, rsi, 2), rdx) // lea(mem(rdx, rsi, 1), rdx) // rdx = c + 3*cs_c; prefetch(0, mem(r12, 5*8)) // prefetch c + 0*cs_c prefetch(0, mem(r12, rsi, 1, 5*8)) // prefetch c + 1*cs_c prefetch(0, mem(r12, rsi, 2, 5*8)) // prefetch c + 2*cs_c prefetch(0, mem(rdx, 5*8)) // prefetch c + 3*cs_c prefetch(0, mem(rdx, rsi, 1, 5*8)) // prefetch c + 4*cs_c prefetch(0, mem(rdx, rsi, 2, 5*8)) // prefetch c + 5*cs_c label(.DPOSTPFETCH) // done prefetching c #if 1 lea(mem(r9, r9, 2), rcx) // rcx = 3*cs_a; lea(mem(rax, r8, 4), rdx) // use rdx for prefetching lines lea(mem(rdx, r8, 2), rdx) // from next upanel of a. #else lea(mem(rax, r9, 8), rdx) // use rdx for prefetching a. lea(mem(rdx, r9, 8), rdx) // rdx = a + 16*cs_a; //mov(r9, rsi) // rsi = cs_a; //sal(imm(4), rsi) // rsi = 16*cs_a; //lea(mem(rax, rsi, 1), rdx) // rdx = a + 16*cs_a; #endif mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.DLOOPKITER) // MAIN LOOP // ---------------------------------- iteration 0 #if 1 prefetch(0, mem(rdx, 5*8)) //prefetch(0, mem(rax, 5*8)) #else prefetch(0, mem(rdx, 5*8)) #endif vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), xmm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, r8, 4), ymm2) vbroadcastsd(mem(rax, r15, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) // ---------------------------------- iteration 1 #if 1 prefetch(0, mem(rdx, r9, 1, 5*8)) //prefetch(0, mem(rax, 5*8)) #else #endif vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), xmm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, r8, 4), ymm2) vbroadcastsd(mem(rax, r15, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) // ---------------------------------- iteration 2 #if 1 prefetch(0, mem(rdx, r9, 2, 5*8)) //prefetch(0, mem(rax, 5*8)) #else prefetch(0, mem(rdx, r9, 2, 5*8)) //prefetch(0, mem(rdx, r9, 2)) //lea(mem(rdx, r9, 4), rdx) // rdx += 4*cs_a; #endif vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), xmm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, r8, 4), ymm2) vbroadcastsd(mem(rax, r15, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) // ---------------------------------- iteration 3 #if 1 prefetch(0, mem(rdx, rcx, 1, 5*8)) lea(mem(rdx, r9, 4), rdx) // a_prefetch += 4*cs_a; //prefetch(0, mem(rax, 5*8)) #else lea(mem(rdx, r9, 4), rdx) // a_prefetch += 4*cs_a; #endif vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), xmm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, r8, 4), ymm2) vbroadcastsd(mem(rax, r15, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER) // iterate again if i != 0. label(.DCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.DLOOPKLEFT) // EDGE LOOP vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), xmm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, r8, 4), ymm2) vbroadcastsd(mem(rax, r15, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKLEFT) // iterate again if i != 0. label(.DPOSTACCUM) mov(r12, rcx) // reset rcx to current utile of c. mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(xmm0, xmm5, xmm5) vmulpd(ymm0, ymm6, ymm6) vmulpd(xmm0, xmm7, xmm7) vmulpd(ymm0, ymm8, ymm8) vmulpd(xmm0, xmm9, xmm9) vmulpd(ymm0, ymm10, ymm10) vmulpd(xmm0, xmm11, xmm11) vmulpd(ymm0, ymm12, ymm12) vmulpd(xmm0, xmm13, xmm13) vmulpd(ymm0, ymm14, ymm14) vmulpd(xmm0, xmm15, xmm15) mov(var(cs_c), rsi) // load cs_c lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) //lea(mem(rcx, rsi, 4), rdx) // load address of c + 4*cs_c; lea(mem(rcx, rdi, 4), rdx) // load address of c + 4*rs_c; lea(mem(rsi, rsi, 2), rax) // rax = 3*cs_c; // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORED) // jump to column storage case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), xmm3, xmm5) vmovupd(xmm5, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm6) vmovupd(ymm6, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), xmm3, xmm7) vmovupd(xmm7, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm8) vmovupd(ymm8, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), xmm3, xmm9) vmovupd(xmm9, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm10) vmovupd(ymm10, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), xmm3, xmm11) vmovupd(xmm11, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm12) vmovupd(ymm12, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), xmm3, xmm13) vmovupd(xmm13, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm14) vmovupd(ymm14, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), xmm3, xmm15) vmovupd(xmm15, mem(rcx, rsi, 4)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORED) vunpcklpd(ymm6, ymm4, ymm0) vunpckhpd(ymm6, ymm4, ymm1) vunpcklpd(ymm10, ymm8, ymm2) vunpckhpd(ymm10, ymm8, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) vperm2f128(imm(0x31), ymm2, ymm0, ymm8) vperm2f128(imm(0x31), ymm3, ymm1, ymm10) vbroadcastsd(mem(rbx), ymm3) vfmadd231pd(mem(rcx), ymm3, ymm4) vfmadd231pd(mem(rcx, rsi, 1), ymm3, ymm6) vfmadd231pd(mem(rcx, rsi, 2), ymm3, ymm8) vfmadd231pd(mem(rcx, rax, 1), ymm3, ymm10) vmovupd(ymm4, mem(rcx)) vmovupd(ymm6, mem(rcx, rsi, 1)) vmovupd(ymm8, mem(rcx, rsi, 2)) vmovupd(ymm10, mem(rcx, rax, 1)) lea(mem(rcx, rsi, 4), rcx) vunpcklpd(ymm14, ymm12, ymm0) vunpckhpd(ymm14, ymm12, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm4) vfmadd231pd(mem(rdx), xmm3, xmm0) vfmadd231pd(mem(rdx, rsi, 1), xmm3, xmm1) vfmadd231pd(mem(rdx, rsi, 2), xmm3, xmm2) vfmadd231pd(mem(rdx, rax, 1), xmm3, xmm4) vmovupd(xmm0, mem(rdx)) vmovupd(xmm1, mem(rdx, rsi, 1)) vmovupd(xmm2, mem(rdx, rsi, 2)) vmovupd(xmm4, mem(rdx, rax, 1)) lea(mem(rdx, rsi, 4), rdx) vunpcklpd(ymm7, ymm5, ymm0) vunpckhpd(ymm7, ymm5, ymm1) vunpcklpd(ymm11, ymm9, ymm2) vunpckhpd(ymm11, ymm9, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm5) vinsertf128(imm(0x1), xmm3, ymm1, ymm7) //vperm2f128(imm(0x31), ymm2, ymm0, ymm9) //vperm2f128(imm(0x31), ymm3, ymm1, ymm11) vbroadcastsd(mem(rbx), ymm3) vfmadd231pd(mem(rcx), ymm3, ymm5) vfmadd231pd(mem(rcx, rsi, 1), ymm3, ymm7) //vfmadd231pd(mem(rcx, rsi, 2), ymm3, ymm9) //vfmadd231pd(mem(rcx, rax, 1), ymm3, ymm11) vmovupd(ymm5, mem(rcx)) vmovupd(ymm7, mem(rcx, rsi, 1)) //vmovupd(ymm9, mem(rcx, rsi, 2)) //vmovupd(ymm11, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) vunpcklpd(ymm15, ymm13, ymm0) vunpckhpd(ymm15, ymm13, ymm1) //vextractf128(imm(0x1), ymm0, xmm2) //vextractf128(imm(0x1), ymm1, xmm4) vfmadd231pd(mem(rdx), xmm3, xmm0) vfmadd231pd(mem(rdx, rsi, 1), xmm3, xmm1) //vfmadd231pd(mem(rdx, rsi, 2), xmm3, xmm2) //vfmadd231pd(mem(rdx, rax, 1), xmm3, xmm4) vmovupd(xmm0, mem(rdx)) vmovupd(xmm1, mem(rdx, rsi, 1)) //vmovupd(xmm2, mem(rdx, rsi, 2)) //vmovupd(xmm4, mem(rdx, rax, 1)) //lea(mem(rdx, rsi, 4), rdx) jmp(.DDONE) // jump to end. label(.DBETAZERO) cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORBZ) // jump to column storage case label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) vmovupd(xmm5, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm6, mem(rcx)) vmovupd(xmm7, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm8, mem(rcx)) vmovupd(xmm9, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm10, mem(rcx)) vmovupd(xmm11, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm12, mem(rcx)) vmovupd(xmm13, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm14, mem(rcx)) vmovupd(xmm15, mem(rcx, rsi, 4)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORBZ) vunpcklpd(ymm6, ymm4, ymm0) vunpckhpd(ymm6, ymm4, ymm1) vunpcklpd(ymm10, ymm8, ymm2) vunpckhpd(ymm10, ymm8, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) vperm2f128(imm(0x31), ymm2, ymm0, ymm8) vperm2f128(imm(0x31), ymm3, ymm1, ymm10) vmovupd(ymm4, mem(rcx)) vmovupd(ymm6, mem(rcx, rsi, 1)) vmovupd(ymm8, mem(rcx, rsi, 2)) vmovupd(ymm10, mem(rcx, rax, 1)) lea(mem(rcx, rsi, 4), rcx) vunpcklpd(ymm14, ymm12, ymm0) vunpckhpd(ymm14, ymm12, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm4) vmovupd(xmm0, mem(rdx)) vmovupd(xmm1, mem(rdx, rsi, 1)) vmovupd(xmm2, mem(rdx, rsi, 2)) vmovupd(xmm4, mem(rdx, rax, 1)) lea(mem(rdx, rsi, 4), rdx) vunpcklpd(ymm7, ymm5, ymm0) vunpckhpd(ymm7, ymm5, ymm1) vunpcklpd(ymm11, ymm9, ymm2) vunpckhpd(ymm11, ymm9, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm5) vinsertf128(imm(0x1), xmm3, ymm1, ymm7) //vperm2f128(imm(0x31), ymm2, ymm0, ymm9) //vperm2f128(imm(0x31), ymm3, ymm1, ymm11) vmovupd(ymm5, mem(rcx)) vmovupd(ymm7, mem(rcx, rsi, 1)) //vmovupd(ymm9, mem(rcx, rsi, 2)) //vmovupd(ymm11, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) vunpcklpd(ymm15, ymm13, ymm0) vunpckhpd(ymm15, ymm13, ymm1) //vextractf128(imm(0x1), ymm0, xmm2) //vextractf128(imm(0x1), ymm1, xmm4) vmovupd(xmm0, mem(rdx)) vmovupd(xmm1, mem(rdx, rsi, 1)) //vmovupd(xmm2, mem(rdx, rsi, 2)) //vmovupd(xmm4, mem(rdx, rax, 1)) //lea(mem(rdx, rsi, 4), rdx) label(.DDONE) lea(mem(r12, rdi, 4), r12) // lea(mem(r12, rdi, 2), r12) // c_ii = r12 += 6*rs_c //lea(mem(r14, r8, 4), r14) // //lea(mem(r14, r8, 2), r14) // a_ii = r14 += 6*rs_a mov(var(ps_a8), rax) // load ps_a8 lea(mem(r14, rax, 1), r14) // a_ii = r14 += ps_a8 dec(r11) // ii -= 1; jne(.DLOOP6X8I) // iterate again if ii != 0. label(.DRETURN) end_asm( : // output operands (none) : // input operands [m_iter] "m" (m_iter), [k_iter] "m" (k_iter), [k_left] "m" (k_left), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [ps_a8] "m" (ps_a8), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) consider_edge_cases: // Handle edge cases in the m dimension, if they exist. if ( m_left ) { const dim_t nr_cur = 6; const dim_t i_edge = m0 - ( dim_t )m_left; double* restrict cij = c + i_edge*rs_c; //double* restrict ai = a + i_edge*rs_a; //double* restrict ai = a + ( i_edge / 6 ) * ps_a; double* restrict ai = a + m_iter * ps_a; double* restrict bj = b; #if 0 // We add special handling for slightly inflated MR blocksizes // at edge cases, up to a maximum of 9. if ( 6 < m_left ) { dgemmsup_ker_ft ker_fp1 = NULL; dgemmsup_ker_ft ker_fp2 = NULL; dim_t mr1, mr2; if ( m_left == 7 ) { mr1 = 4; mr2 = 3; ker_fp1 = bli_dgemmsup_rv_haswell_asm_4x6; ker_fp2 = bli_dgemmsup_rv_haswell_asm_3x6; } else if ( m_left == 8 ) { mr1 = 4; mr2 = 4; ker_fp1 = bli_dgemmsup_rv_haswell_asm_4x6; ker_fp2 = bli_dgemmsup_rv_haswell_asm_4x6; } else // if ( m_left == 9 ) { mr1 = 4; mr2 = 5; ker_fp1 = bli_dgemmsup_rv_haswell_asm_4x6; ker_fp2 = bli_dgemmsup_rv_haswell_asm_5x6; } ker_fp1 ( conja, conjb, mr1, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += mr1*rs_c0; ai += mr1*rs_a0; ker_fp2 ( conja, conjb, mr2, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); return; } #endif #if 1 dgemmsup_ker_ft ker_fps[6] = { NULL, bli_dgemmsup_rv_haswell_asm_1x6, bli_dgemmsup_rv_haswell_asm_2x6, bli_dgemmsup_rv_haswell_asm_3x6, bli_dgemmsup_rv_haswell_asm_4x6, bli_dgemmsup_rv_haswell_asm_5x6 }; dgemmsup_ker_ft ker_fp = ker_fps[ m_left ]; ker_fp ( conja, conjb, m_left, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); return; #else if ( 5 <= m_left ) { const dim_t mr_cur = 5; bli_dgemmsup_rv_haswell_asm_5x6 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += mr_cur*rs_c0; ai += mr_cur*rs_a0; m_left -= mr_cur; } if ( 4 <= m_left ) { const dim_t mr_cur = 4; bli_dgemmsup_rv_haswell_asm_4x6 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += mr_cur*rs_c0; ai += mr_cur*rs_a0; m_left -= mr_cur; } if ( 3 <= m_left ) { const dim_t mr_cur = 3; bli_dgemmsup_rv_haswell_asm_3x6 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += mr_cur*rs_c0; ai += mr_cur*rs_a0; m_left -= mr_cur; } if ( 2 <= m_left ) { const dim_t mr_cur = 2; bli_dgemmsup_rv_haswell_asm_2x6 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += mr_cur*rs_c0; ai += mr_cur*rs_a0; m_left -= mr_cur; } if ( 1 == m_left ) { const dim_t mr_cur = 1; bli_dgemmsup_rv_haswell_asm_1x6 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); } #endif } } void bli_dgemmsup_rv_haswell_asm_6x4m ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t m_iter = m0 / 6; uint64_t m_left = m0 % 6; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // Query the panel stride of A and convert it to units of bytes. uint64_t ps_a = bli_auxinfo_ps_a( data ); uint64_t ps_a8 = ps_a * sizeof( double ); if ( m_iter == 0 ) goto consider_edge_cases; // ------------------------------------------------------------------------- begin_asm() //vzeroall() // zero all xmm/ymm registers. mov(var(a), r14) // load address of a. mov(var(rs_a), r8) // load rs_a mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a //mov(var(b), rbx) // load address of b. mov(var(rs_b), r10) // load rs_b //mov(var(cs_b), r11) // load cs_b lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) //lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) // NOTE: We cannot pre-load elements of a or b // because it could eventually, in the last // unrolled iter or the cleanup loop, result // in reading beyond the bounds allocated mem // (the likely result: a segmentation fault). mov(var(c), r12) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) // During preamble and loops: // r12 = rcx = c // r14 = rax = a // read rbx from var(b) near beginning of loop // r11 = m dim index ii mov(var(m_iter), r11) // ii = m_iter; label(.DLOOP6X4I) // LOOP OVER ii = [ m_iter ... 1 0 ] #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm6, ymm6, ymm6) vxorpd(ymm8, ymm8, ymm8) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm12, ymm12, ymm12) vxorpd(ymm14, ymm14, ymm14) #endif mov(var(b), rbx) // load address of b. //mov(r12, rcx) // reset rcx to current utile of c. mov(r14, rax) #if 0 lea(mem(rcx, rdi, 2), rdx) // lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 7*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 7*8)) // prefetch c + 2*rs_c prefetch(0, mem(rdx, 7*8)) // prefetch c + 3*rs_c prefetch(0, mem(rdx, rdi, 1, 7*8)) // prefetch c + 4*rs_c prefetch(0, mem(rdx, rdi, 2, 7*8)) // prefetch c + 5*rs_c #else cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLPFETCH) // jump to column storage case label(.DROWPFETCH) // row-stored prefetching on c lea(mem(r12, rdi, 2), rdx) // lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(r12, 3*8)) // prefetch c + 0*rs_c prefetch(0, mem(r12, rdi, 1, 3*8)) // prefetch c + 1*rs_c prefetch(0, mem(r12, rdi, 2, 3*8)) // prefetch c + 2*rs_c prefetch(0, mem(rdx, 3*8)) // prefetch c + 3*rs_c prefetch(0, mem(rdx, rdi, 1, 3*8)) // prefetch c + 4*rs_c prefetch(0, mem(rdx, rdi, 2, 3*8)) // prefetch c + 5*rs_c jmp(.DPOSTPFETCH) // jump to end of prefetching c label(.DCOLPFETCH) // column-stored prefetching c mov(var(cs_c), rsi) // load cs_c to rsi (temporarily) lea(mem(, rsi, 8), rsi) // cs_c *= sizeof(double) lea(mem(r12, rsi, 2), rdx) // lea(mem(rdx, rsi, 1), rdx) // rdx = c + 3*cs_c; prefetch(0, mem(r12, 5*8)) // prefetch c + 0*cs_c prefetch(0, mem(r12, rsi, 1, 5*8)) // prefetch c + 1*cs_c prefetch(0, mem(r12, rsi, 2, 5*8)) // prefetch c + 2*cs_c prefetch(0, mem(rdx, 5*8)) // prefetch c + 3*cs_c label(.DPOSTPFETCH) // done prefetching c #endif #if 1 lea(mem(r9, r9, 2), rcx) // rcx = 3*cs_a; lea(mem(rax, r8, 4), rdx) // use rdx for prefetching lines lea(mem(rdx, r8, 2), rdx) // from next upanel of a. //lea(mem(rax, r9, 8), rdx) // use rdx for prefetching a. //lea(mem(rdx, r9, 8), rdx) // rdx = a + 16*cs_a; #endif mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.DLOOPKITER) // MAIN LOOP // ---------------------------------- iteration 0 #if 1 prefetch(0, mem(rdx, 5*8)) #endif vmovupd(mem(rbx, 0*32), ymm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm0, ymm3, ymm6) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm0, ymm3, ymm10) vbroadcastsd(mem(rax, r8, 4), ymm2) vbroadcastsd(mem(rax, r15, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm0, ymm3, ymm14) // ---------------------------------- iteration 1 #if 1 prefetch(0, mem(rdx, r9, 1, 5*8)) #endif vmovupd(mem(rbx, 0*32), ymm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm0, ymm3, ymm6) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm0, ymm3, ymm10) vbroadcastsd(mem(rax, r8, 4), ymm2) vbroadcastsd(mem(rax, r15, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm0, ymm3, ymm14) // ---------------------------------- iteration 2 #if 1 prefetch(0, mem(rdx, r9, 2, 5*8)) #endif vmovupd(mem(rbx, 0*32), ymm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm0, ymm3, ymm6) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm0, ymm3, ymm10) vbroadcastsd(mem(rax, r8, 4), ymm2) vbroadcastsd(mem(rax, r15, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm0, ymm3, ymm14) // ---------------------------------- iteration 3 #if 1 prefetch(0, mem(rdx, rcx, 1, 5*8)) lea(mem(rdx, r9, 4), rdx) // rdx += 4*cs_a; #endif vmovupd(mem(rbx, 0*32), ymm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm0, ymm3, ymm6) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm0, ymm3, ymm10) vbroadcastsd(mem(rax, r8, 4), ymm2) vbroadcastsd(mem(rax, r15, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm0, ymm3, ymm14) dec(rsi) // i -= 1; jne(.DLOOPKITER) // iterate again if i != 0. label(.DCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.DLOOPKLEFT) // EDGE LOOP #if 1 prefetch(0, mem(rdx, 5*8)) add(r9, rdx) // rdx += cs_a; #endif vmovupd(mem(rbx, 0*32), ymm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm0, ymm3, ymm6) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm0, ymm3, ymm10) vbroadcastsd(mem(rax, r8, 4), ymm2) vbroadcastsd(mem(rax, r15, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm0, ymm3, ymm14) dec(rsi) // i -= 1; jne(.DLOOPKLEFT) // iterate again if i != 0. label(.DPOSTACCUM) mov(r12, rcx) // reset rcx to current utile of c. mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm6, ymm6) vmulpd(ymm0, ymm8, ymm8) vmulpd(ymm0, ymm10, ymm10) vmulpd(ymm0, ymm12, ymm12) vmulpd(ymm0, ymm14, ymm14) mov(var(cs_c), rsi) // load cs_c lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) //lea(mem(rcx, rsi, 4), rdx) // load address of c + 4*cs_c; lea(mem(rcx, rdi, 4), rdx) // load address of c + 4*rs_c; lea(mem(rsi, rsi, 2), rax) // rax = 3*cs_c; // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORED) // jump to column storage case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm6) vmovupd(ymm6, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm8) vmovupd(ymm8, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm10) vmovupd(ymm10, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm12) vmovupd(ymm12, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm14) vmovupd(ymm14, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORED) vunpcklpd(ymm6, ymm4, ymm0) vunpckhpd(ymm6, ymm4, ymm1) vunpcklpd(ymm10, ymm8, ymm2) vunpckhpd(ymm10, ymm8, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) vperm2f128(imm(0x31), ymm2, ymm0, ymm8) vperm2f128(imm(0x31), ymm3, ymm1, ymm10) vbroadcastsd(mem(rbx), ymm3) vfmadd231pd(mem(rcx), ymm3, ymm4) vfmadd231pd(mem(rcx, rsi, 1), ymm3, ymm6) vfmadd231pd(mem(rcx, rsi, 2), ymm3, ymm8) vfmadd231pd(mem(rcx, rax, 1), ymm3, ymm10) vmovupd(ymm4, mem(rcx)) vmovupd(ymm6, mem(rcx, rsi, 1)) vmovupd(ymm8, mem(rcx, rsi, 2)) vmovupd(ymm10, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) vunpcklpd(ymm14, ymm12, ymm0) vunpckhpd(ymm14, ymm12, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm4) vfmadd231pd(mem(rdx), xmm3, xmm0) vfmadd231pd(mem(rdx, rsi, 1), xmm3, xmm1) vfmadd231pd(mem(rdx, rsi, 2), xmm3, xmm2) vfmadd231pd(mem(rdx, rax, 1), xmm3, xmm4) vmovupd(xmm0, mem(rdx)) vmovupd(xmm1, mem(rdx, rsi, 1)) vmovupd(xmm2, mem(rdx, rsi, 2)) vmovupd(xmm4, mem(rdx, rax, 1)) //lea(mem(rdx, rsi, 4), rdx) jmp(.DDONE) // jump to end. label(.DBETAZERO) cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORBZ) // jump to column storage case label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vmovupd(ymm6, mem(rcx)) add(rdi, rcx) vmovupd(ymm8, mem(rcx)) add(rdi, rcx) vmovupd(ymm10, mem(rcx)) add(rdi, rcx) vmovupd(ymm12, mem(rcx)) add(rdi, rcx) vmovupd(ymm14, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORBZ) vunpcklpd(ymm6, ymm4, ymm0) vunpckhpd(ymm6, ymm4, ymm1) vunpcklpd(ymm10, ymm8, ymm2) vunpckhpd(ymm10, ymm8, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) vperm2f128(imm(0x31), ymm2, ymm0, ymm8) vperm2f128(imm(0x31), ymm3, ymm1, ymm10) vmovupd(ymm4, mem(rcx)) vmovupd(ymm6, mem(rcx, rsi, 1)) vmovupd(ymm8, mem(rcx, rsi, 2)) vmovupd(ymm10, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) vunpcklpd(ymm14, ymm12, ymm0) vunpckhpd(ymm14, ymm12, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm4) vmovupd(xmm0, mem(rdx)) vmovupd(xmm1, mem(rdx, rsi, 1)) vmovupd(xmm2, mem(rdx, rsi, 2)) vmovupd(xmm4, mem(rdx, rax, 1)) //lea(mem(rdx, rsi, 4), rdx) label(.DDONE) lea(mem(r12, rdi, 4), r12) // lea(mem(r12, rdi, 2), r12) // c_ii = r12 += 6*rs_c //lea(mem(r14, r8, 4), r14) // //lea(mem(r14, r8, 2), r14) // a_ii = r14 += 6*rs_a mov(var(ps_a8), rax) // load ps_a8 lea(mem(r14, rax, 1), r14) // a_ii = r14 += ps_a8 dec(r11) // ii -= 1; jne(.DLOOP6X4I) // iterate again if ii != 0. label(.DRETURN) end_asm( : // output operands (none) : // input operands [m_iter] "m" (m_iter), [k_iter] "m" (k_iter), [k_left] "m" (k_left), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [ps_a8] "m" (ps_a8), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) consider_edge_cases: // Handle edge cases in the m dimension, if they exist. if ( m_left ) { const dim_t nr_cur = 4; const dim_t i_edge = m0 - ( dim_t )m_left; double* restrict cij = c + i_edge*rs_c; //double* restrict ai = a + i_edge*rs_a; //double* restrict ai = a + ( i_edge / 6 ) * ps_a; double* restrict ai = a + m_iter * ps_a; double* restrict bj = b; #if 0 // We add special handling for slightly inflated MR blocksizes // at edge cases, up to a maximum of 9. if ( 6 < m_left ) { dgemmsup_ker_ft ker_fp1 = NULL; dgemmsup_ker_ft ker_fp2 = NULL; dim_t mr1, mr2; if ( m_left == 7 ) { mr1 = 4; mr2 = 3; ker_fp1 = bli_dgemmsup_rv_haswell_asm_4x4; ker_fp2 = bli_dgemmsup_rv_haswell_asm_3x4; } else if ( m_left == 8 ) { mr1 = 4; mr2 = 4; ker_fp1 = bli_dgemmsup_rv_haswell_asm_4x4; ker_fp2 = bli_dgemmsup_rv_haswell_asm_4x4; } else // if ( m_left == 9 ) { mr1 = 4; mr2 = 5; ker_fp1 = bli_dgemmsup_rv_haswell_asm_4x4; ker_fp2 = bli_dgemmsup_rv_haswell_asm_5x4; } ker_fp1 ( conja, conjb, mr1, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += mr1*rs_c0; ai += mr1*rs_a0; ker_fp2 ( conja, conjb, mr2, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); return; } #endif #if 1 dgemmsup_ker_ft ker_fps[6] = { NULL, bli_dgemmsup_rv_haswell_asm_1x4, bli_dgemmsup_rv_haswell_asm_2x4, bli_dgemmsup_rv_haswell_asm_3x4, bli_dgemmsup_rv_haswell_asm_4x4, bli_dgemmsup_rv_haswell_asm_5x4 }; dgemmsup_ker_ft ker_fp = ker_fps[ m_left ]; ker_fp ( conja, conjb, m_left, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); return; #else if ( 5 <= m_left ) { const dim_t mr_cur = 5; bli_dgemmsup_rv_haswell_asm_5x4 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += mr_cur*rs_c0; ai += mr_cur*rs_a0; m_left -= mr_cur; } if ( 4 <= m_left ) { const dim_t mr_cur = 4; bli_dgemmsup_rv_haswell_asm_4x4 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += mr_cur*rs_c0; ai += mr_cur*rs_a0; m_left -= mr_cur; } if ( 3 <= m_left ) { const dim_t mr_cur = 3; bli_dgemmsup_rv_haswell_asm_3x4 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += mr_cur*rs_c0; ai += mr_cur*rs_a0; m_left -= mr_cur; } if ( 2 <= m_left ) { const dim_t mr_cur = 2; bli_dgemmsup_rv_haswell_asm_2x4 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += mr_cur*rs_c0; ai += mr_cur*rs_a0; m_left -= mr_cur; } if ( 1 == m_left ) { const dim_t mr_cur = 1; bli_dgemmsup_rv_haswell_asm_1x4 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); } #endif } } void bli_dgemmsup_rv_haswell_asm_6x2m ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t m_iter = m0 / 6; uint64_t m_left = m0 % 6; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // Query the panel stride of A and convert it to units of bytes. uint64_t ps_a = bli_auxinfo_ps_a( data ); uint64_t ps_a8 = ps_a * sizeof( double ); if ( m_iter == 0 ) goto consider_edge_cases; // ------------------------------------------------------------------------- begin_asm() //vzeroall() // zero all xmm/ymm registers. mov(var(a), r14) // load address of a. mov(var(rs_a), r8) // load rs_a mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a //mov(var(b), rbx) // load address of b. mov(var(rs_b), r10) // load rs_b //mov(var(cs_b), r11) // load cs_b lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) //lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) // NOTE: We cannot pre-load elements of a or b // because it could eventually, in the last // unrolled iter or the cleanup loop, result // in reading beyond the bounds allocated mem // (the likely result: a segmentation fault). mov(var(c), r12) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) // During preamble and loops: // r12 = rcx = c // r14 = rax = a // read rbx from var(b) near beginning of loop // r11 = m dim index ii mov(var(m_iter), r11) // ii = m_iter; label(.DLOOP6X2I) // LOOP OVER ii = [ m_iter ... 1 0 ] #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(xmm4, xmm4, xmm4) vxorpd(xmm6, xmm6, xmm6) vxorpd(xmm8, xmm8, xmm8) vxorpd(xmm10, xmm10, xmm10) vxorpd(xmm12, xmm12, xmm12) vxorpd(xmm14, xmm14, xmm14) #endif mov(var(b), rbx) // load address of b. //mov(r12, rcx) // reset rcx to current utile of c. mov(r14, rax) #if 0 lea(mem(rcx, rdi, 2), rdx) // lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 7*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 7*8)) // prefetch c + 2*rs_c prefetch(0, mem(rdx, 7*8)) // prefetch c + 3*rs_c prefetch(0, mem(rdx, rdi, 1, 7*8)) // prefetch c + 4*rs_c prefetch(0, mem(rdx, rdi, 2, 7*8)) // prefetch c + 5*rs_c #else cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLPFETCH) // jump to column storage case label(.DROWPFETCH) // row-stored prefetching on c lea(mem(r12, rdi, 2), rdx) // lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(r12, 1*8)) // prefetch c + 0*rs_c prefetch(0, mem(r12, rdi, 1, 1*8)) // prefetch c + 1*rs_c prefetch(0, mem(r12, rdi, 2, 1*8)) // prefetch c + 2*rs_c prefetch(0, mem(rdx, 1*8)) // prefetch c + 3*rs_c prefetch(0, mem(rdx, rdi, 1, 1*8)) // prefetch c + 4*rs_c prefetch(0, mem(rdx, rdi, 2, 1*8)) // prefetch c + 5*rs_c jmp(.DPOSTPFETCH) // jump to end of prefetching c label(.DCOLPFETCH) // column-stored prefetching c mov(var(cs_c), rsi) // load cs_c to rsi (temporarily) lea(mem(, rsi, 8), rsi) // cs_c *= sizeof(double) prefetch(0, mem(r12, 5*8)) // prefetch c + 0*cs_c prefetch(0, mem(r12, rsi, 1, 5*8)) // prefetch c + 1*cs_c label(.DPOSTPFETCH) // done prefetching c #endif #if 1 lea(mem(r9, r9, 2), rcx) // rcx = 3*cs_a; lea(mem(rax, r8, 4), rdx) // use rdx for prefetching lines lea(mem(rdx, r8, 2), rdx) // from next upanel of a. //lea(mem(rax, r9, 8), rdx) // use rdx for prefetching a. //lea(mem(rdx, r9, 8), rdx) // rdx = a + 16*cs_a; #endif mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.DLOOPKITER) // MAIN LOOP // ---------------------------------- iteration 0 #if 1 prefetch(0, mem(rdx, 5*8)) #endif vmovupd(mem(rbx, 0*32), xmm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(xmm0, xmm2, xmm4) vfmadd231pd(xmm0, xmm3, xmm6) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(xmm0, xmm2, xmm8) vfmadd231pd(xmm0, xmm3, xmm10) vbroadcastsd(mem(rax, r8, 4), ymm2) vbroadcastsd(mem(rax, r15, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(xmm0, xmm2, xmm12) vfmadd231pd(xmm0, xmm3, xmm14) // ---------------------------------- iteration 1 #if 1 prefetch(0, mem(rdx, r9, 1, 5*8)) #endif vmovupd(mem(rbx, 0*32), xmm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(xmm0, xmm2, xmm4) vfmadd231pd(xmm0, xmm3, xmm6) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(xmm0, xmm2, xmm8) vfmadd231pd(xmm0, xmm3, xmm10) vbroadcastsd(mem(rax, r8, 4), ymm2) vbroadcastsd(mem(rax, r15, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(xmm0, xmm2, xmm12) vfmadd231pd(xmm0, xmm3, xmm14) // ---------------------------------- iteration 2 #if 1 prefetch(0, mem(rdx, r9, 2, 5*8)) #endif vmovupd(mem(rbx, 0*32), xmm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(xmm0, xmm2, xmm4) vfmadd231pd(xmm0, xmm3, xmm6) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(xmm0, xmm2, xmm8) vfmadd231pd(xmm0, xmm3, xmm10) vbroadcastsd(mem(rax, r8, 4), ymm2) vbroadcastsd(mem(rax, r15, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(xmm0, xmm2, xmm12) vfmadd231pd(xmm0, xmm3, xmm14) // ---------------------------------- iteration 3 #if 1 prefetch(0, mem(rdx, rcx, 1, 5*8)) lea(mem(rdx, r9, 4), rdx) // rdx += 4*cs_a; #endif vmovupd(mem(rbx, 0*32), xmm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(xmm0, xmm2, xmm4) vfmadd231pd(xmm0, xmm3, xmm6) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(xmm0, xmm2, xmm8) vfmadd231pd(xmm0, xmm3, xmm10) vbroadcastsd(mem(rax, r8, 4), ymm2) vbroadcastsd(mem(rax, r15, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(xmm0, xmm2, xmm12) vfmadd231pd(xmm0, xmm3, xmm14) dec(rsi) // i -= 1; jne(.DLOOPKITER) // iterate again if i != 0. label(.DCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.DLOOPKLEFT) // EDGE LOOP #if 1 prefetch(0, mem(rdx, 5*8)) add(r9, rdx) // rdx += cs_a; #endif vmovupd(mem(rbx, 0*32), xmm0) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(xmm0, xmm2, xmm4) vfmadd231pd(xmm0, xmm3, xmm6) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(xmm0, xmm2, xmm8) vfmadd231pd(xmm0, xmm3, xmm10) vbroadcastsd(mem(rax, r8, 4), ymm2) vbroadcastsd(mem(rax, r15, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(xmm0, xmm2, xmm12) vfmadd231pd(xmm0, xmm3, xmm14) dec(rsi) // i -= 1; jne(.DLOOPKLEFT) // iterate again if i != 0. label(.DPOSTACCUM) mov(r12, rcx) // reset rcx to current utile of c. mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(xmm0, xmm4, xmm4) // scale by alpha vmulpd(xmm0, xmm6, xmm6) vmulpd(xmm0, xmm8, xmm8) vmulpd(xmm0, xmm10, xmm10) vmulpd(xmm0, xmm12, xmm12) vmulpd(xmm0, xmm14, xmm14) mov(var(cs_c), rsi) // load cs_c lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) //lea(mem(rcx, rsi, 4), rdx) // load address of c + 4*cs_c; lea(mem(rcx, rdi, 4), rdx) // load address of c + 4*rs_c; //lea(mem(rsi, rsi, 2), rax) // rax = 3*cs_c; // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORED) // jump to column storage case label(.DROWSTORED) vfmadd231pd(mem(rcx), xmm3, xmm4) vmovupd(xmm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm6) vmovupd(xmm6, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm8) vmovupd(xmm8, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm10) vmovupd(xmm10, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm12) vmovupd(xmm12, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm14) vmovupd(xmm14, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORED) vunpcklpd(xmm6, xmm4, xmm0) vunpckhpd(xmm6, xmm4, xmm1) vunpcklpd(xmm10, xmm8, xmm2) vunpckhpd(xmm10, xmm8, xmm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) vbroadcastsd(mem(rbx), ymm3) vfmadd231pd(mem(rcx), ymm3, ymm4) vfmadd231pd(mem(rcx, rsi, 1), ymm3, ymm6) vmovupd(ymm4, mem(rcx)) vmovupd(ymm6, mem(rcx, rsi, 1)) //lea(mem(rcx, rsi, 4), rcx) vunpcklpd(xmm14, xmm12, xmm0) vunpckhpd(xmm14, xmm12, xmm1) vfmadd231pd(mem(rdx), xmm3, xmm0) vfmadd231pd(mem(rdx, rsi, 1), xmm3, xmm1) vmovupd(xmm0, mem(rdx)) vmovupd(xmm1, mem(rdx, rsi, 1)) //lea(mem(rdx, rsi, 4), rdx) jmp(.DDONE) // jump to end. label(.DBETAZERO) cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORBZ) // jump to column storage case label(.DROWSTORBZ) vmovupd(xmm4, mem(rcx)) add(rdi, rcx) vmovupd(xmm6, mem(rcx)) add(rdi, rcx) vmovupd(xmm8, mem(rcx)) add(rdi, rcx) vmovupd(xmm10, mem(rcx)) add(rdi, rcx) vmovupd(xmm12, mem(rcx)) add(rdi, rcx) vmovupd(xmm14, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORBZ) vunpcklpd(xmm6, xmm4, xmm0) vunpckhpd(xmm6, xmm4, xmm1) vunpcklpd(xmm10, xmm8, xmm2) vunpckhpd(xmm10, xmm8, xmm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) vmovupd(ymm4, mem(rcx)) vmovupd(ymm6, mem(rcx, rsi, 1)) //lea(mem(rcx, rsi, 4), rcx) vunpcklpd(xmm14, xmm12, xmm0) vunpckhpd(xmm14, xmm12, xmm1) vmovupd(xmm0, mem(rdx)) vmovupd(xmm1, mem(rdx, rsi, 1)) //lea(mem(rdx, rsi, 4), rdx) label(.DDONE) lea(mem(r12, rdi, 4), r12) // lea(mem(r12, rdi, 2), r12) // c_ii = r12 += 6*rs_c //lea(mem(r14, r8, 4), r14) // //lea(mem(r14, r8, 2), r14) // a_ii = r14 += 6*rs_a mov(var(ps_a8), rax) // load ps_a8 lea(mem(r14, rax, 1), r14) // a_ii = r14 += ps_a8 dec(r11) // ii -= 1; jne(.DLOOP6X2I) // iterate again if ii != 0. label(.DRETURN) end_asm( : // output operands (none) : // input operands [m_iter] "m" (m_iter), [k_iter] "m" (k_iter), [k_left] "m" (k_left), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [ps_a8] "m" (ps_a8), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) consider_edge_cases: // Handle edge cases in the m dimension, if they exist. if ( m_left ) { const dim_t nr_cur = 2; const dim_t i_edge = m0 - ( dim_t )m_left; double* restrict cij = c + i_edge*rs_c; //double* restrict ai = a + i_edge*rs_a; //double* restrict ai = a + ( i_edge / 6 ) * ps_a; double* restrict ai = a + m_iter * ps_a; double* restrict bj = b; #if 0 // We add special handling for slightly inflated MR blocksizes // at edge cases, up to a maximum of 9. if ( 6 < m_left ) { dgemmsup_ker_ft ker_fp1 = NULL; dgemmsup_ker_ft ker_fp2 = NULL; dim_t mr1, mr2; if ( m_left == 7 ) { mr1 = 4; mr2 = 3; ker_fp1 = bli_dgemmsup_rv_haswell_asm_4x2; ker_fp2 = bli_dgemmsup_rv_haswell_asm_3x2; } else if ( m_left == 8 ) { mr1 = 4; mr2 = 4; ker_fp1 = bli_dgemmsup_rv_haswell_asm_4x2; ker_fp2 = bli_dgemmsup_rv_haswell_asm_4x2; } else // if ( m_left == 9 ) { mr1 = 4; mr2 = 5; ker_fp1 = bli_dgemmsup_rv_haswell_asm_4x2; ker_fp2 = bli_dgemmsup_rv_haswell_asm_5x2; } ker_fp1 ( conja, conjb, mr1, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += mr1*rs_c0; ai += mr1*rs_a0; ker_fp2 ( conja, conjb, mr2, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); return; } #endif #if 1 dgemmsup_ker_ft ker_fps[6] = { NULL, bli_dgemmsup_rv_haswell_asm_1x2, bli_dgemmsup_rv_haswell_asm_2x2, bli_dgemmsup_rv_haswell_asm_3x2, bli_dgemmsup_rv_haswell_asm_4x2, bli_dgemmsup_rv_haswell_asm_5x2 }; dgemmsup_ker_ft ker_fp = ker_fps[ m_left ]; ker_fp ( conja, conjb, m_left, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); return; #else if ( 5 <= m_left ) { const dim_t mr_cur = 5; bli_dgemmsup_rv_haswell_asm_5x2 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += mr_cur*rs_c0; ai += mr_cur*rs_a0; m_left -= mr_cur; } if ( 4 <= m_left ) { const dim_t mr_cur = 4; bli_dgemmsup_rv_haswell_asm_4x2 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += mr_cur*rs_c0; ai += mr_cur*rs_a0; m_left -= mr_cur; } if ( 3 <= m_left ) { const dim_t mr_cur = 3; bli_dgemmsup_rv_haswell_asm_3x2 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += mr_cur*rs_c0; ai += mr_cur*rs_a0; m_left -= mr_cur; } if ( 2 <= m_left ) { const dim_t mr_cur = 2; bli_dgemmsup_rv_haswell_asm_2x2 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += mr_cur*rs_c0; ai += mr_cur*rs_a0; m_left -= mr_cur; } if ( 1 == m_left ) { const dim_t mr_cur = 1; bli_dgemmsup_rv_haswell_asm_1x2 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); } #endif } } blis-0.6.1/kernels/haswell/3/sup/bli_gemmsup_rv_haswell_asm_d6x8n.c000066400000000000000000003355051360743507500253330ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define BLIS_ASM_SYNTAX_ATT #include "bli_x86_asm_macros.h" /* rrr: -------- ------ -------- -------- ------ -------- -------- += ------ ... -------- -------- ------ -------- -------- ------ : -------- ------ : rcr: -------- | | | | -------- -------- | | | | -------- -------- += | | | | ... -------- -------- | | | | -------- -------- | | | | : -------- | | | | : Assumptions: - B is row-stored; - A is row- or column-stored; - m0 and n0 are at most MR and NR, respectively. Therefore, this (r)ow-preferential kernel is well-suited for contiguous (v)ector loads on B and single-element broadcasts from A. NOTE: These kernels explicitly support column-oriented IO, implemented via an in-register transpose. And thus they also support the crr and ccr cases, though only crr is ever utilized (because ccr is handled by transposing the operation and executing rcr, which does not incur the cost of the in-register transpose). crr: | | | | | | | | ------ -------- | | | | | | | | ------ -------- | | | | | | | | += ------ ... -------- | | | | | | | | ------ -------- | | | | | | | | ------ : | | | | | | | | ------ : */ // Prototype reference microkernels. GEMMSUP_KER_PROT( double, d, gemmsup_r_haswell_ref ) #if 0 // Define parameters and variables for edge case kernel map. #define NUM_MR 4 #define NUM_NR 4 #define FUNCPTR_T dgemmsup_ker_ft static dim_t mrs[NUM_MR] = { 6, 4, 2, 1 }; static dim_t nrs[NUM_NR] = { 8, 4, 2, 1 }; static FUNCPTR_T kmap[NUM_MR][NUM_NR] = { /* 8 4 2 1 */ /* 6 */ { bli_dgemmsup_rv_haswell_asm_6x8n, bli_dgemmsup_rv_haswell_asm_6x4n, bli_dgemmsup_rv_haswell_asm_6x2n, bli_dgemmsup_r_haswell_ref_6x1 }, /* 4 */ { bli_dgemmsup_rv_haswell_asm_4x8n, bli_dgemmsup_rv_haswell_asm_4x4n, bli_dgemmsup_rv_haswell_asm_4x2n, bli_dgemmsup_r_haswell_ref_4x1 }, /* 2 */ { bli_dgemmsup_rv_haswell_asm_2x8n, bli_dgemmsup_rv_haswell_asm_2x4n, bli_dgemmsup_rv_haswell_asm_2x2n, bli_dgemmsup_r_haswell_ref_2x1 }, /* 1 */ { bli_dgemmsup_rv_haswell_asm_1x8n, bli_dgemmsup_rv_haswell_asm_1x4n, bli_dgemmsup_rv_haswell_asm_1x2n, bli_dgemmsup_r_haswell_ref_1x1 }, }; #endif void bli_dgemmsup_rv_haswell_asm_6x8n ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { uint64_t m_left = m0 % 6; #if 0 bli_dgemmsup_r_haswell_ref ( conja, conjb, m0, n0, k0, alpha, a, rs_a0, cs_a0, b, rs_b0, cs_b0, beta, c, rs_c0, cs_c0, data, cntx ); return; #endif //printf( "rv_6x8n: %d %d %d\n", (int)m0, (int)n0, (int)k0 ); // First check whether this is a edge case in the m dimension. If so, // dispatch other ?x8m kernels, as needed. if ( m_left ) { double* restrict cij = c; double* restrict bj = b; double* restrict ai = a; #if 1 // We add special handling for slightly inflated MR blocksizes // at edge cases, up to a maximum of 9. if ( 6 < m0 ) { dgemmsup_ker_ft ker_fp1 = NULL; dgemmsup_ker_ft ker_fp2 = NULL; dim_t mr1, mr2; if ( m0 == 7 ) { mr1 = 4; mr2 = 3; ker_fp1 = bli_dgemmsup_rv_haswell_asm_4x8n; ker_fp2 = bli_dgemmsup_rv_haswell_asm_3x8n; } else if ( m0 == 8 ) { mr1 = 4; mr2 = 4; ker_fp1 = bli_dgemmsup_rv_haswell_asm_4x8n; ker_fp2 = bli_dgemmsup_rv_haswell_asm_4x8n; } else // if ( m0 == 9 ) { mr1 = 4; mr2 = 5; ker_fp1 = bli_dgemmsup_rv_haswell_asm_4x8n; ker_fp2 = bli_dgemmsup_rv_haswell_asm_5x8n; } ker_fp1 ( conja, conjb, mr1, n0, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += mr1*rs_c0; ai += mr1*rs_a0; ker_fp2 ( conja, conjb, mr2, n0, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); return; } #endif #if 1 dgemmsup_ker_ft ker_fps[6] = { NULL, bli_dgemmsup_rv_haswell_asm_1x8n, bli_dgemmsup_rv_haswell_asm_2x8n, bli_dgemmsup_rv_haswell_asm_3x8n, bli_dgemmsup_rv_haswell_asm_4x8n, bli_dgemmsup_rv_haswell_asm_5x8n }; dgemmsup_ker_ft ker_fp = ker_fps[ m_left ]; ker_fp ( conja, conjb, m_left, n0, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); return; #else if ( 5 <= m_left ) { const dim_t mr_cur = 5; bli_dgemmsup_rv_haswell_asm_5x8n ( conja, conjb, mr_cur, n0, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += mr_cur*rs_c0; ai += mr_cur*rs_a0; m_left -= mr_cur; } if ( 4 <= m_left ) { const dim_t mr_cur = 4; bli_dgemmsup_rv_haswell_asm_4x8n ( conja, conjb, mr_cur, n0, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += mr_cur*rs_c0; ai += mr_cur*rs_a0; m_left -= mr_cur; } if ( 3 <= m_left ) { const dim_t mr_cur = 3; bli_dgemmsup_rv_haswell_asm_3x8n ( conja, conjb, mr_cur, n0, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += mr_cur*rs_c0; ai += mr_cur*rs_a0; m_left -= mr_cur; } if ( 2 <= m_left ) { const dim_t mr_cur = 2; bli_dgemmsup_rv_haswell_asm_2x8n ( conja, conjb, mr_cur, n0, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += mr_cur*rs_c0; ai += mr_cur*rs_a0; m_left -= mr_cur; } if ( 1 == m_left ) { #if 1 const dim_t mr_cur = 1; bli_dgemmsup_rv_haswell_asm_1x8n ( conja, conjb, mr_cur, n0, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); #else bli_dgemv_ex ( BLIS_TRANSPOSE, conja, k0, n0, alpha, bj, rs_b0, cs_b0, ai, cs_a0, beta, cij, cs_c0, cntx, NULL ); #endif } return; #endif } //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t n_iter = n0 / 8; uint64_t n_left = n0 % 8; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // Query the panel stride of B and convert it to units of bytes. uint64_t ps_b = bli_auxinfo_ps_b( data ); uint64_t ps_b8 = ps_b * sizeof( double ); if ( n_iter == 0 ) goto consider_edge_cases; // ------------------------------------------------------------------------- begin_asm() //vzeroall() // zero all xmm/ymm registers. //mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), r14) // load address of b. mov(var(rs_b), r10) // load rs_b //mov(var(cs_b), r11) // load cs_b lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) //lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) // NOTE: We cannot pre-load elements of a or b // because it could eventually, in the last // unrolled iter or the cleanup loop, result // in reading beyond the bounds allocated mem // (the likely result: a segmentation fault). mov(var(c), r12) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) // During preamble and loops: // r12 = rcx = c // r14 = rbx = b // read rax from var(a) near beginning of loop // r11 = m dim index ii mov(var(n_iter), r11) // jj = n_iter; label(.DLOOP6X8J) // LOOP OVER jj = [ n_iter ... 1 0 ] #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm6, ymm6, ymm6) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm8, ymm8, ymm8) vxorpd(ymm9, ymm9, ymm9) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm11, ymm11, ymm11) vxorpd(ymm12, ymm12, ymm12) vxorpd(ymm13, ymm13, ymm13) vxorpd(ymm14, ymm14, ymm14) vxorpd(ymm15, ymm15, ymm15) #endif mov(var(a), rax) // load address of a. //mov(r12, rcx) // reset rcx to current utile of c. mov(r14, rbx) // reset rbx to current upanel of b. cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLPFETCH) // jump to column storage case label(.DROWPFETCH) // row-stored prefetching on c lea(mem(r12, rdi, 2), rdx) // lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(r12, 7*8)) // prefetch c + 0*rs_c prefetch(0, mem(r12, rdi, 1, 7*8)) // prefetch c + 1*rs_c prefetch(0, mem(r12, rdi, 2, 7*8)) // prefetch c + 2*rs_c prefetch(0, mem(rdx, 7*8)) // prefetch c + 3*rs_c prefetch(0, mem(rdx, rdi, 1, 7*8)) // prefetch c + 4*rs_c prefetch(0, mem(rdx, rdi, 2, 7*8)) // prefetch c + 5*rs_c jmp(.DPOSTPFETCH) // jump to end of prefetching c label(.DCOLPFETCH) // column-stored prefetching c mov(var(cs_c), rsi) // load cs_c to rsi (temporarily) lea(mem(, rsi, 8), rsi) // cs_c *= sizeof(double) lea(mem(r12, rsi, 2), rdx) // lea(mem(rdx, rsi, 1), rdx) // rdx = c + 3*cs_c; prefetch(0, mem(r12, 5*8)) // prefetch c + 0*cs_c prefetch(0, mem(r12, rsi, 1, 5*8)) // prefetch c + 1*cs_c prefetch(0, mem(r12, rsi, 2, 5*8)) // prefetch c + 2*cs_c prefetch(0, mem(rdx, 5*8)) // prefetch c + 3*cs_c prefetch(0, mem(rdx, rsi, 1, 5*8)) // prefetch c + 4*cs_c prefetch(0, mem(rdx, rsi, 2, 5*8)) // prefetch c + 5*cs_c lea(mem(rdx, rsi, 2), rdx) // rdx = c + 5*cs_c; prefetch(0, mem(rdx, rsi, 1, 5*8)) // prefetch c + 6*cs_c prefetch(0, mem(rdx, rsi, 2, 5*8)) // prefetch c + 7*cs_c label(.DPOSTPFETCH) // done prefetching c #if 1 // use byte offsets from rbx to // prefetch lines from next upanel // of b. #endif mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.DLOOPKITER) // MAIN LOOP // ---------------------------------- iteration 0 #if 1 //prefetch(0, mem(rdx, 11*8)) // prefetch line of next upanel of b prefetch(0, mem(rbx, 11*8)) // prefetch line of next upanel of b #else prefetch(0, mem(rdx, 5*8)) #endif vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, r8, 4), ymm2) vbroadcastsd(mem(rax, r15, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) // ---------------------------------- iteration 1 #if 1 //prefetch(0, mem(rdx, r10, 1, 11*8)) prefetch(0, mem(rbx, 11*8)) #else prefetch(0, mem(rdx, r10, 1, 5*8)) #endif vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, r8, 4), ymm2) vbroadcastsd(mem(rax, r15, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) // ---------------------------------- iteration 2 #if 1 //prefetch(0, mem(rdx, r10, 2, 11*8)) prefetch(0, mem(rbx, 11*8)) #else prefetch(0, mem(rdx, r10, 2, 5*8)) #endif vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, r8, 4), ymm2) vbroadcastsd(mem(rax, r15, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) // ---------------------------------- iteration 3 #if 1 //prefetch(0, mem(rdx, rcx, 1, 11*8)) prefetch(0, mem(rbx, 11*8)) //prefetch(0, mem(rdx, r9, 1, 7*8)) //lea(mem(rdx, r10, 4), rdx) // a_prefetch += 4*cs_a; #else prefetch(0, mem(rdx, rcx, 1, 5*8)) #endif vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, r8, 4), ymm2) vbroadcastsd(mem(rax, r15, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER) // iterate again if i != 0. label(.DCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.DLOOPKLEFT) // EDGE LOOP #if 1 prefetch(0, mem(rbx, 11*8)) #endif vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, r8, 4), ymm2) vbroadcastsd(mem(rax, r15, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKLEFT) // iterate again if i != 0. label(.DPOSTACCUM) mov(r12, rcx) // reset rcx to current utile of c. mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) vmulpd(ymm0, ymm6, ymm6) vmulpd(ymm0, ymm7, ymm7) vmulpd(ymm0, ymm8, ymm8) vmulpd(ymm0, ymm9, ymm9) vmulpd(ymm0, ymm10, ymm10) vmulpd(ymm0, ymm11, ymm11) vmulpd(ymm0, ymm12, ymm12) vmulpd(ymm0, ymm13, ymm13) vmulpd(ymm0, ymm14, ymm14) vmulpd(ymm0, ymm15, ymm15) mov(var(cs_c), rsi) // load cs_c lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) //lea(mem(rcx, rsi, 4), rdx) // load address of c + 4*cs_c; lea(mem(rcx, rdi, 4), rdx) // load address of c + 4*rs_c; lea(mem(rsi, rsi, 2), rax) // rax = 3*cs_c; // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORED) // jump to column storage case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), ymm3, ymm5) vmovupd(ymm5, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm6) vmovupd(ymm6, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), ymm3, ymm7) vmovupd(ymm7, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm8) vmovupd(ymm8, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), ymm3, ymm9) vmovupd(ymm9, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm10) vmovupd(ymm10, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), ymm3, ymm11) vmovupd(ymm11, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm12) vmovupd(ymm12, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), ymm3, ymm13) vmovupd(ymm13, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm14) vmovupd(ymm14, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), ymm3, ymm15) vmovupd(ymm15, mem(rcx, rsi, 4)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORED) vunpcklpd(ymm6, ymm4, ymm0) vunpckhpd(ymm6, ymm4, ymm1) vunpcklpd(ymm10, ymm8, ymm2) vunpckhpd(ymm10, ymm8, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) vperm2f128(imm(0x31), ymm2, ymm0, ymm8) vperm2f128(imm(0x31), ymm3, ymm1, ymm10) vbroadcastsd(mem(rbx), ymm3) vfmadd231pd(mem(rcx), ymm3, ymm4) vfmadd231pd(mem(rcx, rsi, 1), ymm3, ymm6) vfmadd231pd(mem(rcx, rsi, 2), ymm3, ymm8) vfmadd231pd(mem(rcx, rax, 1), ymm3, ymm10) vmovupd(ymm4, mem(rcx)) vmovupd(ymm6, mem(rcx, rsi, 1)) vmovupd(ymm8, mem(rcx, rsi, 2)) vmovupd(ymm10, mem(rcx, rax, 1)) lea(mem(rcx, rsi, 4), rcx) vunpcklpd(ymm14, ymm12, ymm0) vunpckhpd(ymm14, ymm12, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm4) vfmadd231pd(mem(rdx), xmm3, xmm0) vfmadd231pd(mem(rdx, rsi, 1), xmm3, xmm1) vfmadd231pd(mem(rdx, rsi, 2), xmm3, xmm2) vfmadd231pd(mem(rdx, rax, 1), xmm3, xmm4) vmovupd(xmm0, mem(rdx)) vmovupd(xmm1, mem(rdx, rsi, 1)) vmovupd(xmm2, mem(rdx, rsi, 2)) vmovupd(xmm4, mem(rdx, rax, 1)) lea(mem(rdx, rsi, 4), rdx) vunpcklpd(ymm7, ymm5, ymm0) vunpckhpd(ymm7, ymm5, ymm1) vunpcklpd(ymm11, ymm9, ymm2) vunpckhpd(ymm11, ymm9, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm5) vinsertf128(imm(0x1), xmm3, ymm1, ymm7) vperm2f128(imm(0x31), ymm2, ymm0, ymm9) vperm2f128(imm(0x31), ymm3, ymm1, ymm11) vbroadcastsd(mem(rbx), ymm3) vfmadd231pd(mem(rcx), ymm3, ymm5) vfmadd231pd(mem(rcx, rsi, 1), ymm3, ymm7) vfmadd231pd(mem(rcx, rsi, 2), ymm3, ymm9) vfmadd231pd(mem(rcx, rax, 1), ymm3, ymm11) vmovupd(ymm5, mem(rcx)) vmovupd(ymm7, mem(rcx, rsi, 1)) vmovupd(ymm9, mem(rcx, rsi, 2)) vmovupd(ymm11, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) vunpcklpd(ymm15, ymm13, ymm0) vunpckhpd(ymm15, ymm13, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm4) vfmadd231pd(mem(rdx), xmm3, xmm0) vfmadd231pd(mem(rdx, rsi, 1), xmm3, xmm1) vfmadd231pd(mem(rdx, rsi, 2), xmm3, xmm2) vfmadd231pd(mem(rdx, rax, 1), xmm3, xmm4) vmovupd(xmm0, mem(rdx)) vmovupd(xmm1, mem(rdx, rsi, 1)) vmovupd(xmm2, mem(rdx, rsi, 2)) vmovupd(xmm4, mem(rdx, rax, 1)) //lea(mem(rdx, rsi, 4), rdx) jmp(.DDONE) // jump to end. label(.DBETAZERO) cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORBZ) // jump to column storage case label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) vmovupd(ymm5, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm6, mem(rcx)) vmovupd(ymm7, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm8, mem(rcx)) vmovupd(ymm9, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm10, mem(rcx)) vmovupd(ymm11, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm12, mem(rcx)) vmovupd(ymm13, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm14, mem(rcx)) vmovupd(ymm15, mem(rcx, rsi, 4)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORBZ) vunpcklpd(ymm6, ymm4, ymm0) vunpckhpd(ymm6, ymm4, ymm1) vunpcklpd(ymm10, ymm8, ymm2) vunpckhpd(ymm10, ymm8, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) vperm2f128(imm(0x31), ymm2, ymm0, ymm8) vperm2f128(imm(0x31), ymm3, ymm1, ymm10) vmovupd(ymm4, mem(rcx)) vmovupd(ymm6, mem(rcx, rsi, 1)) vmovupd(ymm8, mem(rcx, rsi, 2)) vmovupd(ymm10, mem(rcx, rax, 1)) lea(mem(rcx, rsi, 4), rcx) vunpcklpd(ymm14, ymm12, ymm0) vunpckhpd(ymm14, ymm12, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm4) vmovupd(xmm0, mem(rdx)) vmovupd(xmm1, mem(rdx, rsi, 1)) vmovupd(xmm2, mem(rdx, rsi, 2)) vmovupd(xmm4, mem(rdx, rax, 1)) lea(mem(rdx, rsi, 4), rdx) vunpcklpd(ymm7, ymm5, ymm0) vunpckhpd(ymm7, ymm5, ymm1) vunpcklpd(ymm11, ymm9, ymm2) vunpckhpd(ymm11, ymm9, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm5) vinsertf128(imm(0x1), xmm3, ymm1, ymm7) vperm2f128(imm(0x31), ymm2, ymm0, ymm9) vperm2f128(imm(0x31), ymm3, ymm1, ymm11) vmovupd(ymm5, mem(rcx)) vmovupd(ymm7, mem(rcx, rsi, 1)) vmovupd(ymm9, mem(rcx, rsi, 2)) vmovupd(ymm11, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) vunpcklpd(ymm15, ymm13, ymm0) vunpckhpd(ymm15, ymm13, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm4) vmovupd(xmm0, mem(rdx)) vmovupd(xmm1, mem(rdx, rsi, 1)) vmovupd(xmm2, mem(rdx, rsi, 2)) vmovupd(xmm4, mem(rdx, rax, 1)) //lea(mem(rdx, rsi, 4), rdx) label(.DDONE) lea(mem(r12, rsi, 8), r12) // c_jj = r12 += 8*cs_c //add(imm(8*8), r14) // b_jj = r14 += 8*cs_b mov(var(ps_b8), rbx) // load ps_b8 lea(mem(r14, rbx, 1), r14) // a_ii = r14 += ps_b8 dec(r11) // jj -= 1; jne(.DLOOP6X8J) // iterate again if jj != 0. label(.DRETURN) end_asm( : // output operands (none) : // input operands [n_iter] "m" (n_iter), [k_iter] "m" (k_iter), [k_left] "m" (k_left), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [ps_b8] "m" (ps_b8), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) consider_edge_cases: // Handle edge cases in the m dimension, if they exist. if ( n_left ) { const dim_t mr_cur = 6; const dim_t j_edge = n0 - ( dim_t )n_left; double* restrict cij = c + j_edge*cs_c; double* restrict ai = a; //double* restrict bj = b + j_edge*cs_b; //double* restrict bj = b + ( j_edge / 8 ) * ps_b; double* restrict bj = b + n_iter * ps_b; if ( 4 <= n_left ) { const dim_t nr_cur = 4; bli_dgemmsup_rv_haswell_asm_6x4 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += nr_cur*cs_c0; bj += nr_cur*cs_b0; n_left -= nr_cur; } if ( 2 <= n_left ) { const dim_t nr_cur = 2; bli_dgemmsup_rv_haswell_asm_6x2 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += nr_cur*cs_c0; bj += nr_cur*cs_b0; n_left -= nr_cur; } if ( 1 == n_left ) { #if 1 const dim_t nr_cur = 1; bli_dgemmsup_r_haswell_ref_6x1 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); #else bli_dgemv_ex ( BLIS_NO_TRANSPOSE, conjb, m0, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, beta, cij, rs_c0, cntx, NULL ); #endif } } } void bli_dgemmsup_rv_haswell_asm_5x8n ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t n_iter = n0 / 8; uint64_t n_left = n0 % 8; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // Query the panel stride of B and convert it to units of bytes. uint64_t ps_b = bli_auxinfo_ps_b( data ); uint64_t ps_b8 = ps_b * sizeof( double ); if ( n_iter == 0 ) goto consider_edge_cases; // ------------------------------------------------------------------------- begin_asm() //vzeroall() // zero all xmm/ymm registers. //mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), r14) // load address of b. mov(var(rs_b), r10) // load rs_b //mov(var(cs_b), r11) // load cs_b lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) //lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) // NOTE: We cannot pre-load elements of a or b // because it could eventually, in the last // unrolled iter or the cleanup loop, result // in reading beyond the bounds allocated mem // (the likely result: a segmentation fault). mov(var(c), r12) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) // During preamble and loops: // r12 = rcx = c // r14 = rbx = b // read rax from var(a) near beginning of loop // r11 = m dim index ii mov(var(n_iter), r11) // jj = n_iter; label(.DLOOP6X8J) // LOOP OVER jj = [ n_iter ... 1 0 ] #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm6, ymm6, ymm6) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm8, ymm8, ymm8) vxorpd(ymm9, ymm9, ymm9) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm11, ymm11, ymm11) vxorpd(ymm12, ymm12, ymm12) vxorpd(ymm13, ymm13, ymm13) //vxorpd(ymm14, ymm14, ymm14) //vxorpd(ymm15, ymm15, ymm15) #endif mov(var(a), rax) // load address of a. //mov(r12, rcx) // reset rcx to current utile of c. mov(r14, rbx) // reset rbx to current upanel of b. cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLPFETCH) // jump to column storage case label(.DROWPFETCH) // row-stored prefetching on c lea(mem(r12, rdi, 2), rdx) // lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(r12, 7*8)) // prefetch c + 0*rs_c prefetch(0, mem(r12, rdi, 1, 7*8)) // prefetch c + 1*rs_c prefetch(0, mem(r12, rdi, 2, 7*8)) // prefetch c + 2*rs_c prefetch(0, mem(rdx, 7*8)) // prefetch c + 3*rs_c prefetch(0, mem(rdx, rdi, 1, 7*8)) // prefetch c + 4*rs_c jmp(.DPOSTPFETCH) // jump to end of prefetching c label(.DCOLPFETCH) // column-stored prefetching c mov(var(cs_c), rsi) // load cs_c to rsi (temporarily) lea(mem(, rsi, 8), rsi) // cs_c *= sizeof(double) lea(mem(r12, rsi, 2), rdx) // lea(mem(rdx, rsi, 1), rdx) // rdx = c + 3*cs_c; prefetch(0, mem(r12, 4*8)) // prefetch c + 0*cs_c prefetch(0, mem(r12, rsi, 1, 4*8)) // prefetch c + 1*cs_c prefetch(0, mem(r12, rsi, 2, 4*8)) // prefetch c + 2*cs_c prefetch(0, mem(rdx, 4*8)) // prefetch c + 3*cs_c prefetch(0, mem(rdx, rsi, 1, 4*8)) // prefetch c + 4*cs_c prefetch(0, mem(rdx, rsi, 2, 4*8)) // prefetch c + 5*cs_c lea(mem(rdx, rsi, 2), rdx) // rdx = c + 5*cs_c; prefetch(0, mem(rdx, rsi, 1, 4*8)) // prefetch c + 6*cs_c prefetch(0, mem(rdx, rsi, 2, 4*8)) // prefetch c + 7*cs_c label(.DPOSTPFETCH) // done prefetching c #if 1 // use byte offsets from rbx to // prefetch lines from next upanel // of b. #else lea(mem(r10, r10, 2), rcx) // rcx = 3*rs_b; lea(mem(rbx, r10, 8), rdx) // use rdx for prefetching b. lea(mem(rdx, r10, 8), rdx) // rdx = b + 16*rs_b; #if 0 mov(r9, rsi) // rsi = rs_b; sal(imm(5), rsi) // rsi = 16*rs_b; lea(mem(rax, rsi, 1), rdx) // rdx = b + 16*rs_b; #endif #endif mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.DLOOPKITER) // MAIN LOOP // ---------------------------------- iteration 0 #if 1 //prefetch(0, mem(rdx, 11*8)) // prefetch line of next upanel of b prefetch(0, mem(rbx, 11*8)) // prefetch line of next upanel of b #else prefetch(0, mem(rdx, 5*8)) #endif vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, r8, 4), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) // ---------------------------------- iteration 1 #if 1 //prefetch(0, mem(rdx, r10, 1, 11*8)) prefetch(0, mem(rbx, 11*8)) #else prefetch(0, mem(rdx, r10, 1, 5*8)) #endif vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, r8, 4), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) // ---------------------------------- iteration 2 #if 1 //prefetch(0, mem(rdx, r10, 2, 11*8)) prefetch(0, mem(rbx, 11*8)) #else prefetch(0, mem(rdx, r10, 2, 5*8)) #endif vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, r8, 4), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) // ---------------------------------- iteration 3 #if 1 //prefetch(0, mem(rdx, rcx, 1, 11*8)) prefetch(0, mem(rbx, 11*8)) //prefetch(0, mem(rdx, r9, 1, 7*8)) //lea(mem(rdx, r10, 4), rdx) // a_prefetch += 4*cs_a; #else prefetch(0, mem(rdx, rcx, 1, 5*8)) #endif vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, r8, 4), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) dec(rsi) // i -= 1; jne(.DLOOPKITER) // iterate again if i != 0. label(.DCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.DLOOPKLEFT) // EDGE LOOP #if 1 prefetch(0, mem(rbx, 11*8)) #endif vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vbroadcastsd(mem(rax, r8, 4), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm12) vfmadd231pd(ymm1, ymm2, ymm13) dec(rsi) // i -= 1; jne(.DLOOPKLEFT) // iterate again if i != 0. label(.DPOSTACCUM) mov(r12, rcx) // reset rcx to current utile of c. mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) vmulpd(ymm0, ymm6, ymm6) vmulpd(ymm0, ymm7, ymm7) vmulpd(ymm0, ymm8, ymm8) vmulpd(ymm0, ymm9, ymm9) vmulpd(ymm0, ymm10, ymm10) vmulpd(ymm0, ymm11, ymm11) vmulpd(ymm0, ymm12, ymm12) vmulpd(ymm0, ymm13, ymm13) mov(var(cs_c), rsi) // load cs_c lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) //lea(mem(rcx, rsi, 4), rdx) // load address of c + 4*cs_c; lea(mem(rcx, rdi, 4), rdx) // load address of c + 4*rs_c; lea(mem(rsi, rsi, 2), rax) // rax = 3*cs_c; // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORED) // jump to column storage case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), ymm3, ymm5) vmovupd(ymm5, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm6) vmovupd(ymm6, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), ymm3, ymm7) vmovupd(ymm7, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm8) vmovupd(ymm8, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), ymm3, ymm9) vmovupd(ymm9, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm10) vmovupd(ymm10, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), ymm3, ymm11) vmovupd(ymm11, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm12) vmovupd(ymm12, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), ymm3, ymm13) vmovupd(ymm13, mem(rcx, rsi, 4)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORED) vunpcklpd(ymm6, ymm4, ymm0) vunpckhpd(ymm6, ymm4, ymm1) vunpcklpd(ymm10, ymm8, ymm2) vunpckhpd(ymm10, ymm8, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) vperm2f128(imm(0x31), ymm2, ymm0, ymm8) vperm2f128(imm(0x31), ymm3, ymm1, ymm10) vbroadcastsd(mem(rbx), ymm3) vfmadd231pd(mem(rcx), ymm3, ymm4) vfmadd231pd(mem(rcx, rsi, 1), ymm3, ymm6) vfmadd231pd(mem(rcx, rsi, 2), ymm3, ymm8) vfmadd231pd(mem(rcx, rax, 1), ymm3, ymm10) vmovupd(ymm4, mem(rcx)) vmovupd(ymm6, mem(rcx, rsi, 1)) vmovupd(ymm8, mem(rcx, rsi, 2)) vmovupd(ymm10, mem(rcx, rax, 1)) lea(mem(rcx, rsi, 4), rcx) #if 0 vunpcklpd(ymm14, ymm12, ymm0) vunpckhpd(ymm14, ymm12, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm4) vfmadd231pd(mem(rdx), xmm3, xmm0) vfmadd231pd(mem(rdx, rsi, 1), xmm3, xmm1) vfmadd231pd(mem(rdx, rsi, 2), xmm3, xmm2) vfmadd231pd(mem(rdx, rax, 1), xmm3, xmm4) vmovupd(xmm0, mem(rdx)) vmovupd(xmm1, mem(rdx, rsi, 1)) vmovupd(xmm2, mem(rdx, rsi, 2)) vmovupd(xmm4, mem(rdx, rax, 1)) #else vmovlpd(mem(rdx), xmm0, xmm0) vmovhpd(mem(rdx, rsi, 1), xmm0, xmm0) vmovlpd(mem(rdx, rsi, 2), xmm1, xmm1) vmovhpd(mem(rdx, rax, 1), xmm1, xmm1) vperm2f128(imm(0x20), ymm1, ymm0, ymm0) vfmadd213pd(ymm12, ymm3, ymm0) vextractf128(imm(1), ymm0, xmm1) vmovlpd(xmm0, mem(rdx)) vmovhpd(xmm0, mem(rdx, rsi, 1)) vmovlpd(xmm1, mem(rdx, rsi, 2)) vmovhpd(xmm1, mem(rdx, rax, 1)) #endif lea(mem(rdx, rsi, 4), rdx) vunpcklpd(ymm7, ymm5, ymm0) vunpckhpd(ymm7, ymm5, ymm1) vunpcklpd(ymm11, ymm9, ymm2) vunpckhpd(ymm11, ymm9, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm5) vinsertf128(imm(0x1), xmm3, ymm1, ymm7) vperm2f128(imm(0x31), ymm2, ymm0, ymm9) vperm2f128(imm(0x31), ymm3, ymm1, ymm11) vbroadcastsd(mem(rbx), ymm3) vfmadd231pd(mem(rcx), ymm3, ymm5) vfmadd231pd(mem(rcx, rsi, 1), ymm3, ymm7) vfmadd231pd(mem(rcx, rsi, 2), ymm3, ymm9) vfmadd231pd(mem(rcx, rax, 1), ymm3, ymm11) vmovupd(ymm5, mem(rcx)) vmovupd(ymm7, mem(rcx, rsi, 1)) vmovupd(ymm9, mem(rcx, rsi, 2)) vmovupd(ymm11, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) #if 0 vunpcklpd(ymm15, ymm13, ymm0) vunpckhpd(ymm15, ymm13, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm4) vfmadd231pd(mem(rdx), xmm3, xmm0) vfmadd231pd(mem(rdx, rsi, 1), xmm3, xmm1) vfmadd231pd(mem(rdx, rsi, 2), xmm3, xmm2) vfmadd231pd(mem(rdx, rax, 1), xmm3, xmm4) vmovupd(xmm0, mem(rdx)) vmovupd(xmm1, mem(rdx, rsi, 1)) vmovupd(xmm2, mem(rdx, rsi, 2)) vmovupd(xmm4, mem(rdx, rax, 1)) #else vmovlpd(mem(rdx), xmm0, xmm0) vmovhpd(mem(rdx, rsi, 1), xmm0, xmm0) vmovlpd(mem(rdx, rsi, 2), xmm1, xmm1) vmovhpd(mem(rdx, rax, 1), xmm1, xmm1) vperm2f128(imm(0x20), ymm1, ymm0, ymm0) vfmadd213pd(ymm13, ymm3, ymm0) vextractf128(imm(1), ymm0, xmm1) vmovlpd(xmm0, mem(rdx)) vmovhpd(xmm0, mem(rdx, rsi, 1)) vmovlpd(xmm1, mem(rdx, rsi, 2)) vmovhpd(xmm1, mem(rdx, rax, 1)) #endif //lea(mem(rdx, rsi, 4), rdx) jmp(.DDONE) // jump to end. label(.DBETAZERO) cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORBZ) // jump to column storage case label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) vmovupd(ymm5, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm6, mem(rcx)) vmovupd(ymm7, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm8, mem(rcx)) vmovupd(ymm9, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm10, mem(rcx)) vmovupd(ymm11, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm12, mem(rcx)) vmovupd(ymm13, mem(rcx, rsi, 4)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORBZ) vunpcklpd(ymm6, ymm4, ymm0) vunpckhpd(ymm6, ymm4, ymm1) vunpcklpd(ymm10, ymm8, ymm2) vunpckhpd(ymm10, ymm8, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) vperm2f128(imm(0x31), ymm2, ymm0, ymm8) vperm2f128(imm(0x31), ymm3, ymm1, ymm10) vmovupd(ymm4, mem(rcx)) vmovupd(ymm6, mem(rcx, rsi, 1)) vmovupd(ymm8, mem(rcx, rsi, 2)) vmovupd(ymm10, mem(rcx, rax, 1)) lea(mem(rcx, rsi, 4), rcx) #if 0 vunpcklpd(ymm14, ymm12, ymm0) vunpckhpd(ymm14, ymm12, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm4) vmovupd(xmm0, mem(rdx)) vmovupd(xmm1, mem(rdx, rsi, 1)) vmovupd(xmm2, mem(rdx, rsi, 2)) vmovupd(xmm4, mem(rdx, rax, 1)) #else vmovupd(ymm12, ymm0) vextractf128(imm(1), ymm0, xmm1) vmovlpd(xmm0, mem(rdx)) vmovhpd(xmm0, mem(rdx, rsi, 1)) vmovlpd(xmm1, mem(rdx, rsi, 2)) vmovhpd(xmm1, mem(rdx, rax, 1)) #endif lea(mem(rdx, rsi, 4), rdx) vunpcklpd(ymm7, ymm5, ymm0) vunpckhpd(ymm7, ymm5, ymm1) vunpcklpd(ymm11, ymm9, ymm2) vunpckhpd(ymm11, ymm9, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm5) vinsertf128(imm(0x1), xmm3, ymm1, ymm7) vperm2f128(imm(0x31), ymm2, ymm0, ymm9) vperm2f128(imm(0x31), ymm3, ymm1, ymm11) vmovupd(ymm5, mem(rcx)) vmovupd(ymm7, mem(rcx, rsi, 1)) vmovupd(ymm9, mem(rcx, rsi, 2)) vmovupd(ymm11, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) #if 0 vunpcklpd(ymm15, ymm13, ymm0) vunpckhpd(ymm15, ymm13, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm4) vmovupd(xmm0, mem(rdx)) vmovupd(xmm1, mem(rdx, rsi, 1)) vmovupd(xmm2, mem(rdx, rsi, 2)) vmovupd(xmm4, mem(rdx, rax, 1)) #else vmovupd(ymm13, ymm0) vextractf128(imm(1), ymm0, xmm1) vmovlpd(xmm0, mem(rdx)) vmovhpd(xmm0, mem(rdx, rsi, 1)) vmovlpd(xmm1, mem(rdx, rsi, 2)) vmovhpd(xmm1, mem(rdx, rax, 1)) #endif //lea(mem(rdx, rsi, 4), rdx) label(.DDONE) lea(mem(r12, rsi, 8), r12) // c_jj = r12 += 8*cs_c //add(imm(8*8), r14) // b_jj = r14 += 8*cs_b mov(var(ps_b8), rbx) // load ps_b8 lea(mem(r14, rbx, 1), r14) // a_ii = r14 += ps_b8 dec(r11) // jj -= 1; jne(.DLOOP6X8J) // iterate again if jj != 0. label(.DRETURN) end_asm( : // output operands (none) : // input operands [n_iter] "m" (n_iter), [k_iter] "m" (k_iter), [k_left] "m" (k_left), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [ps_b8] "m" (ps_b8), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) consider_edge_cases: // Handle edge cases in the m dimension, if they exist. if ( n_left ) { const dim_t mr_cur = 5; const dim_t j_edge = n0 - ( dim_t )n_left; double* restrict cij = c + j_edge*cs_c; double* restrict ai = a; //double* restrict bj = b + j_edge*cs_b; //double* restrict bj = b + ( j_edge / 8 ) * ps_b; double* restrict bj = b + n_iter * ps_b; if ( 4 <= n_left ) { const dim_t nr_cur = 4; bli_dgemmsup_rv_haswell_asm_5x4 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += nr_cur*cs_c0; bj += nr_cur*cs_b0; n_left -= nr_cur; } if ( 2 <= n_left ) { const dim_t nr_cur = 2; bli_dgemmsup_rv_haswell_asm_5x2 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += nr_cur*cs_c0; bj += nr_cur*cs_b0; n_left -= nr_cur; } if ( 1 == n_left ) { #if 1 const dim_t nr_cur = 1; bli_dgemmsup_r_haswell_ref_5x1 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); #else bli_dgemv_ex ( BLIS_NO_TRANSPOSE, conjb, m0, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, beta, cij, rs_c0, cntx, NULL ); #endif } } } void bli_dgemmsup_rv_haswell_asm_4x8n ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t n_iter = n0 / 8; uint64_t n_left = n0 % 8; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // Query the panel stride of B and convert it to units of bytes. uint64_t ps_b = bli_auxinfo_ps_b( data ); uint64_t ps_b8 = ps_b * sizeof( double ); if ( n_iter == 0 ) goto consider_edge_cases; // ------------------------------------------------------------------------- begin_asm() //vzeroall() // zero all xmm/ymm registers. //mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), r14) // load address of b. mov(var(rs_b), r10) // load rs_b //mov(var(cs_b), r11) // load cs_b lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) //lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) // NOTE: We cannot pre-load elements of a or b // because it could eventually, in the last // unrolled iter or the cleanup loop, result // in reading beyond the bounds allocated mem // (the likely result: a segmentation fault). mov(var(c), r12) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) // During preamble and loops: // r12 = rcx = c // r14 = rbx = b // read rax from var(a) near beginning of loop // r11 = m dim index ii mov(var(n_iter), r11) // jj = n_iter; label(.DLOOP4X8J) // LOOP OVER jj = [ n_iter ... 1 0 ] #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm6, ymm6, ymm6) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm8, ymm8, ymm8) vxorpd(ymm9, ymm9, ymm9) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm11, ymm11, ymm11) #endif mov(var(a), rax) // load address of a. //mov(r12, rcx) // reset rcx to current utile of c. mov(r14, rbx) // reset rbx to current upanel of b. cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLPFETCH) // jump to column storage case label(.DROWPFETCH) // row-stored prefetching on c lea(mem(r12, rdi, 2), rdx) // lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(r12, 7*8)) // prefetch c + 0*rs_c prefetch(0, mem(r12, rdi, 1, 7*8)) // prefetch c + 1*rs_c prefetch(0, mem(r12, rdi, 2, 7*8)) // prefetch c + 2*rs_c prefetch(0, mem(rdx, 7*8)) // prefetch c + 3*rs_c jmp(.DPOSTPFETCH) // jump to end of prefetching c label(.DCOLPFETCH) // column-stored prefetching c mov(var(cs_c), rsi) // load cs_c to rsi (temporarily) lea(mem(, rsi, 8), rsi) // cs_c *= sizeof(double) lea(mem(r12, rsi, 2), rdx) // lea(mem(rdx, rsi, 1), rdx) // rdx = c + 3*cs_c; prefetch(0, mem(r12, 3*8)) // prefetch c + 0*cs_c prefetch(0, mem(r12, rsi, 1, 3*8)) // prefetch c + 1*cs_c prefetch(0, mem(r12, rsi, 2, 3*8)) // prefetch c + 2*cs_c prefetch(0, mem(rdx, 3*8)) // prefetch c + 3*cs_c prefetch(0, mem(rdx, rsi, 1, 3*8)) // prefetch c + 4*cs_c prefetch(0, mem(rdx, rsi, 2, 3*8)) // prefetch c + 5*cs_c lea(mem(rdx, rsi, 2), rdx) // rdx = c + 5*cs_c; prefetch(0, mem(rdx, rsi, 1, 3*8)) // prefetch c + 6*cs_c prefetch(0, mem(rdx, rsi, 2, 3*8)) // prefetch c + 7*cs_c label(.DPOSTPFETCH) // done prefetching c #if 1 //lea(mem(r10, r10, 2), rcx) // rcx = 3*rs_b; // use byte offsets from rbx to // prefetch lines from next upanel // of b. #endif mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.DLOOPKITER) // MAIN LOOP // ---------------------------------- iteration 0 #if 1 //prefetch(0, mem(rdx, 11*8)) // prefetch line of next upanel of b prefetch(0, mem(rbx, 11*8)) // prefetch line of next upanel of b #endif vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) // ---------------------------------- iteration 1 #if 1 //prefetch(0, mem(rdx, r10, 1, 11*8)) prefetch(0, mem(rbx, 11*8)) #endif vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) // ---------------------------------- iteration 2 #if 1 //prefetch(0, mem(rdx, r10, 2, 11*8)) prefetch(0, mem(rbx, 11*8)) #endif vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) // ---------------------------------- iteration 3 #if 1 //prefetch(0, mem(rdx, rcx, 1, 11*8)) //lea(mem(rdx, r10, 4), rdx) // a_prefetch += 4*cs_a; prefetch(0, mem(rbx, 11*8)) #endif vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) dec(rsi) // i -= 1; jne(.DLOOPKITER) // iterate again if i != 0. label(.DCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.DLOOPKLEFT) // EDGE LOOP #if 1 prefetch(0, mem(rbx, 11*8)) #endif vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) vbroadcastsd(mem(rax, r13, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) dec(rsi) // i -= 1; jne(.DLOOPKLEFT) // iterate again if i != 0. label(.DPOSTACCUM) mov(r12, rcx) // reset rcx to current utile of c. mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) vmulpd(ymm0, ymm6, ymm6) vmulpd(ymm0, ymm7, ymm7) vmulpd(ymm0, ymm8, ymm8) vmulpd(ymm0, ymm9, ymm9) vmulpd(ymm0, ymm10, ymm10) vmulpd(ymm0, ymm11, ymm11) mov(var(cs_c), rsi) // load cs_c lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) //lea(mem(rcx, rsi, 4), rdx) // load address of c + 4*cs_c; //lea(mem(rcx, rdi, 4), rdx) // load address of c + 4*rs_c; lea(mem(rsi, rsi, 2), rax) // rax = 3*cs_c; // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORED) // jump to column storage case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), ymm3, ymm5) vmovupd(ymm5, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm6) vmovupd(ymm6, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), ymm3, ymm7) vmovupd(ymm7, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm8) vmovupd(ymm8, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), ymm3, ymm9) vmovupd(ymm9, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm10) vmovupd(ymm10, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), ymm3, ymm11) vmovupd(ymm11, mem(rcx, rsi, 4)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORED) vunpcklpd(ymm6, ymm4, ymm0) vunpckhpd(ymm6, ymm4, ymm1) vunpcklpd(ymm10, ymm8, ymm2) vunpckhpd(ymm10, ymm8, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) vperm2f128(imm(0x31), ymm2, ymm0, ymm8) vperm2f128(imm(0x31), ymm3, ymm1, ymm10) vbroadcastsd(mem(rbx), ymm3) vfmadd231pd(mem(rcx), ymm3, ymm4) vfmadd231pd(mem(rcx, rsi, 1), ymm3, ymm6) vfmadd231pd(mem(rcx, rsi, 2), ymm3, ymm8) vfmadd231pd(mem(rcx, rax, 1), ymm3, ymm10) vmovupd(ymm4, mem(rcx)) vmovupd(ymm6, mem(rcx, rsi, 1)) vmovupd(ymm8, mem(rcx, rsi, 2)) vmovupd(ymm10, mem(rcx, rax, 1)) lea(mem(rcx, rsi, 4), rcx) vunpcklpd(ymm7, ymm5, ymm0) vunpckhpd(ymm7, ymm5, ymm1) vunpcklpd(ymm11, ymm9, ymm2) vunpckhpd(ymm11, ymm9, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm5) vinsertf128(imm(0x1), xmm3, ymm1, ymm7) vperm2f128(imm(0x31), ymm2, ymm0, ymm9) vperm2f128(imm(0x31), ymm3, ymm1, ymm11) vbroadcastsd(mem(rbx), ymm3) vfmadd231pd(mem(rcx), ymm3, ymm5) vfmadd231pd(mem(rcx, rsi, 1), ymm3, ymm7) vfmadd231pd(mem(rcx, rsi, 2), ymm3, ymm9) vfmadd231pd(mem(rcx, rax, 1), ymm3, ymm11) vmovupd(ymm5, mem(rcx)) vmovupd(ymm7, mem(rcx, rsi, 1)) vmovupd(ymm9, mem(rcx, rsi, 2)) vmovupd(ymm11, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORBZ) // jump to column storage case label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) vmovupd(ymm5, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm6, mem(rcx)) vmovupd(ymm7, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm8, mem(rcx)) vmovupd(ymm9, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm10, mem(rcx)) vmovupd(ymm11, mem(rcx, rsi, 4)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORBZ) vunpcklpd(ymm6, ymm4, ymm0) vunpckhpd(ymm6, ymm4, ymm1) vunpcklpd(ymm10, ymm8, ymm2) vunpckhpd(ymm10, ymm8, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) vperm2f128(imm(0x31), ymm2, ymm0, ymm8) vperm2f128(imm(0x31), ymm3, ymm1, ymm10) vmovupd(ymm4, mem(rcx)) vmovupd(ymm6, mem(rcx, rsi, 1)) vmovupd(ymm8, mem(rcx, rsi, 2)) vmovupd(ymm10, mem(rcx, rax, 1)) lea(mem(rcx, rsi, 4), rcx) vunpcklpd(ymm7, ymm5, ymm0) vunpckhpd(ymm7, ymm5, ymm1) vunpcklpd(ymm11, ymm9, ymm2) vunpckhpd(ymm11, ymm9, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm5) vinsertf128(imm(0x1), xmm3, ymm1, ymm7) vperm2f128(imm(0x31), ymm2, ymm0, ymm9) vperm2f128(imm(0x31), ymm3, ymm1, ymm11) vmovupd(ymm5, mem(rcx)) vmovupd(ymm7, mem(rcx, rsi, 1)) vmovupd(ymm9, mem(rcx, rsi, 2)) vmovupd(ymm11, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) label(.DDONE) lea(mem(r12, rsi, 8), r12) // c_jj = r12 += 8*cs_c //add(imm(8*8), r14) // b_jj = r14 += 8*cs_b mov(var(ps_b8), rbx) // load ps_b8 lea(mem(r14, rbx, 1), r14) // a_ii = r14 += ps_b8 dec(r11) // jj -= 1; jne(.DLOOP4X8J) // iterate again if jj != 0. label(.DRETURN) end_asm( : // output operands (none) : // input operands [n_iter] "m" (n_iter), [k_iter] "m" (k_iter), [k_left] "m" (k_left), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [ps_b8] "m" (ps_b8), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) consider_edge_cases: // Handle edge cases in the m dimension, if they exist. if ( n_left ) { const dim_t mr_cur = 4; const dim_t j_edge = n0 - ( dim_t )n_left; double* restrict cij = c + j_edge*cs_c; double* restrict ai = a; //double* restrict bj = b + j_edge*cs_b; //double* restrict bj = b + ( j_edge / 8 ) * ps_b; double* restrict bj = b + n_iter * ps_b; if ( 4 <= n_left ) { const dim_t nr_cur = 4; bli_dgemmsup_rv_haswell_asm_4x4 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += nr_cur*cs_c0; bj += nr_cur*cs_b0; n_left -= nr_cur; } if ( 2 <= n_left ) { const dim_t nr_cur = 2; bli_dgemmsup_rv_haswell_asm_4x2 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += nr_cur*cs_c0; bj += nr_cur*cs_b0; n_left -= nr_cur; } if ( 1 == n_left ) { const dim_t nr_cur = 1; bli_dgemmsup_r_haswell_ref_4x1 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); } } } void bli_dgemmsup_rv_haswell_asm_3x8n ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t n_iter = n0 / 8; uint64_t n_left = n0 % 8; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // Query the panel stride of B and convert it to units of bytes. uint64_t ps_b = bli_auxinfo_ps_b( data ); uint64_t ps_b8 = ps_b * sizeof( double ); if ( n_iter == 0 ) goto consider_edge_cases; // ------------------------------------------------------------------------- begin_asm() //vzeroall() // zero all xmm/ymm registers. //mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), r14) // load address of b. mov(var(rs_b), r10) // load rs_b //mov(var(cs_b), r11) // load cs_b lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) //lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) // NOTE: We cannot pre-load elements of a or b // because it could eventually, in the last // unrolled iter or the cleanup loop, result // in reading beyond the bounds allocated mem // (the likely result: a segmentation fault). mov(var(c), r12) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) // During preamble and loops: // r12 = rcx = c // r14 = rbx = b // read rax from var(a) near beginning of loop // r11 = m dim index ii mov(var(n_iter), r11) // jj = n_iter; label(.DLOOP4X8J) // LOOP OVER jj = [ n_iter ... 1 0 ] #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm6, ymm6, ymm6) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm8, ymm8, ymm8) vxorpd(ymm9, ymm9, ymm9) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm11, ymm11, ymm11) vxorpd(ymm12, ymm12, ymm12) vxorpd(ymm13, ymm13, ymm13) vxorpd(ymm14, ymm14, ymm14) vxorpd(ymm15, ymm15, ymm15) #endif mov(var(a), rax) // load address of a. //mov(r12, rcx) // reset rcx to current utile of c. mov(r14, rbx) // reset rbx to current upanel of b. cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLPFETCH) // jump to column storage case label(.DROWPFETCH) // row-stored prefetching on c //lea(mem(r12, rdi, 2), rdx) // //lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(r12, 7*8)) // prefetch c + 0*rs_c prefetch(0, mem(r12, rdi, 1, 7*8)) // prefetch c + 1*rs_c prefetch(0, mem(r12, rdi, 2, 7*8)) // prefetch c + 2*rs_c jmp(.DPOSTPFETCH) // jump to end of prefetching c label(.DCOLPFETCH) // column-stored prefetching c mov(var(cs_c), rsi) // load cs_c to rsi (temporarily) lea(mem(, rsi, 8), rsi) // cs_c *= sizeof(double) lea(mem(r12, rsi, 2), rdx) // lea(mem(rdx, rsi, 1), rdx) // rdx = c + 3*cs_c; prefetch(0, mem(r12, 2*8)) // prefetch c + 0*cs_c prefetch(0, mem(r12, rsi, 1, 2*8)) // prefetch c + 1*cs_c prefetch(0, mem(r12, rsi, 2, 2*8)) // prefetch c + 2*cs_c prefetch(0, mem(rdx, 2*8)) // prefetch c + 3*cs_c prefetch(0, mem(rdx, rsi, 1, 2*8)) // prefetch c + 4*cs_c prefetch(0, mem(rdx, rsi, 2, 2*8)) // prefetch c + 5*cs_c lea(mem(rdx, rsi, 2), rdx) // rdx = c + 5*cs_c; prefetch(0, mem(rdx, rsi, 1, 2*8)) // prefetch c + 6*cs_c prefetch(0, mem(rdx, rsi, 2, 2*8)) // prefetch c + 7*cs_c label(.DPOSTPFETCH) // done prefetching c #if 1 //lea(mem(r10, r10, 2), rcx) // rcx = 3*rs_b; // use byte offsets from rbx to // prefetch lines from next upanel // of b. #endif mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.DLOOPKITER) // MAIN LOOP // ---------------------------------- iteration 0 #if 1 //prefetch(0, mem(rdx, 11*8)) // prefetch line of next upanel of b prefetch(0, mem(rbx, 11*8)) // prefetch line of next upanel of b #endif vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) // ---------------------------------- iteration 1 #if 1 //prefetch(0, mem(rdx, r10, 1, 11*8)) prefetch(0, mem(rbx, 11*8)) #endif vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) // ---------------------------------- iteration 2 #if 1 //prefetch(0, mem(rdx, r10, 2, 11*8)) prefetch(0, mem(rbx, 11*8)) #endif vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) // ---------------------------------- iteration 3 #if 1 //prefetch(0, mem(rdx, rcx, 1, 11*8)) //lea(mem(rdx, r10, 4), rdx) // a_prefetch += 4*cs_a; prefetch(0, mem(rbx, 11*8)) #endif vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) dec(rsi) // i -= 1; jne(.DLOOPKITER) // iterate again if i != 0. label(.DCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.DLOOPKLEFT) // EDGE LOOP #if 1 prefetch(0, mem(rbx, 11*8)) #endif vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vbroadcastsd(mem(rax, r8, 2), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm8) vfmadd231pd(ymm1, ymm2, ymm9) dec(rsi) // i -= 1; jne(.DLOOPKLEFT) // iterate again if i != 0. label(.DPOSTACCUM) mov(r12, rcx) // reset rcx to current utile of c. mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) vmulpd(ymm0, ymm6, ymm6) vmulpd(ymm0, ymm7, ymm7) vmulpd(ymm0, ymm8, ymm8) vmulpd(ymm0, ymm9, ymm9) mov(var(cs_c), rsi) // load cs_c lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) //lea(mem(rcx, rsi, 4), rdx) // load address of c + 4*cs_c; lea(mem(rcx, rdi, 2), rdx) // load address of c + 2*rs_c; lea(mem(rsi, rsi, 2), rax) // rax = 3*cs_c; // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORED) // jump to column storage case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), ymm3, ymm5) vmovupd(ymm5, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm6) vmovupd(ymm6, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), ymm3, ymm7) vmovupd(ymm7, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm8) vmovupd(ymm8, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), ymm3, ymm9) vmovupd(ymm9, mem(rcx, rsi, 4)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORED) vunpcklpd(ymm6, ymm4, ymm0) vunpckhpd(ymm6, ymm4, ymm1) vunpcklpd(ymm10, ymm8, ymm2) vunpckhpd(ymm10, ymm8, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) vperm2f128(imm(0x31), ymm2, ymm0, ymm8) vperm2f128(imm(0x31), ymm3, ymm1, ymm10) vextractf128(imm(0x1), ymm4, xmm12) vextractf128(imm(0x1), ymm6, xmm13) vextractf128(imm(0x1), ymm8, xmm14) vextractf128(imm(0x1), ymm10, xmm15) vbroadcastsd(mem(rbx), ymm3) vfmadd231pd(mem(rcx), xmm3, xmm4) vfmadd231pd(mem(rcx, rsi, 1), xmm3, xmm6) vfmadd231pd(mem(rcx, rsi, 2), xmm3, xmm8) vfmadd231pd(mem(rcx, rax, 1), xmm3, xmm10) vmovupd(xmm4, mem(rcx)) vmovupd(xmm6, mem(rcx, rsi, 1)) vmovupd(xmm8, mem(rcx, rsi, 2)) vmovupd(xmm10, mem(rcx, rax, 1)) lea(mem(rcx, rsi, 4), rcx) vfmadd231sd(mem(rdx), xmm3, xmm12) vfmadd231sd(mem(rdx, rsi, 1), xmm3, xmm13) vfmadd231sd(mem(rdx, rsi, 2), xmm3, xmm14) vfmadd231sd(mem(rdx, rax, 1), xmm3, xmm15) vmovsd(xmm12, mem(rdx)) vmovsd(xmm13, mem(rdx, rsi, 1)) vmovsd(xmm14, mem(rdx, rsi, 2)) vmovsd(xmm15, mem(rdx, rax, 1)) lea(mem(rdx, rsi, 4), rdx) vunpcklpd(ymm7, ymm5, ymm0) vunpckhpd(ymm7, ymm5, ymm1) vunpcklpd(ymm11, ymm9, ymm2) vunpckhpd(ymm11, ymm9, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm5) vinsertf128(imm(0x1), xmm3, ymm1, ymm7) vperm2f128(imm(0x31), ymm2, ymm0, ymm9) vperm2f128(imm(0x31), ymm3, ymm1, ymm11) vextractf128(imm(0x1), ymm5, xmm12) vextractf128(imm(0x1), ymm7, xmm13) vextractf128(imm(0x1), ymm9, xmm14) vextractf128(imm(0x1), ymm11, xmm15) vbroadcastsd(mem(rbx), ymm3) vfmadd231pd(mem(rcx), xmm3, xmm5) vfmadd231pd(mem(rcx, rsi, 1), xmm3, xmm7) vfmadd231pd(mem(rcx, rsi, 2), xmm3, xmm9) vfmadd231pd(mem(rcx, rax, 1), xmm3, xmm11) vmovupd(xmm5, mem(rcx)) vmovupd(xmm7, mem(rcx, rsi, 1)) vmovupd(xmm9, mem(rcx, rsi, 2)) vmovupd(xmm11, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) vfmadd231sd(mem(rdx), xmm3, xmm12) vfmadd231sd(mem(rdx, rsi, 1), xmm3, xmm13) vfmadd231sd(mem(rdx, rsi, 2), xmm3, xmm14) vfmadd231sd(mem(rdx, rax, 1), xmm3, xmm15) vmovsd(xmm12, mem(rdx)) vmovsd(xmm13, mem(rdx, rsi, 1)) vmovsd(xmm14, mem(rdx, rsi, 2)) vmovsd(xmm15, mem(rdx, rax, 1)) //lea(mem(rdx, rsi, 4), rdx) jmp(.DDONE) // jump to end. label(.DBETAZERO) cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORBZ) // jump to column storage case label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) vmovupd(ymm5, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm6, mem(rcx)) vmovupd(ymm7, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm8, mem(rcx)) vmovupd(ymm9, mem(rcx, rsi, 4)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORBZ) vunpcklpd(ymm6, ymm4, ymm0) vunpckhpd(ymm6, ymm4, ymm1) vunpcklpd(ymm10, ymm8, ymm2) vunpckhpd(ymm10, ymm8, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm4) vinsertf128(imm(0x1), xmm3, ymm1, ymm6) vperm2f128(imm(0x31), ymm2, ymm0, ymm8) vperm2f128(imm(0x31), ymm3, ymm1, ymm10) vextractf128(imm(0x1), ymm4, xmm12) vextractf128(imm(0x1), ymm6, xmm13) vextractf128(imm(0x1), ymm8, xmm14) vextractf128(imm(0x1), ymm10, xmm15) vmovupd(xmm4, mem(rcx)) vmovupd(xmm6, mem(rcx, rsi, 1)) vmovupd(xmm8, mem(rcx, rsi, 2)) vmovupd(xmm10, mem(rcx, rax, 1)) lea(mem(rcx, rsi, 4), rcx) vmovsd(xmm12, mem(rdx)) vmovsd(xmm13, mem(rdx, rsi, 1)) vmovsd(xmm14, mem(rdx, rsi, 2)) vmovsd(xmm15, mem(rdx, rax, 1)) lea(mem(rdx, rsi, 4), rdx) vunpcklpd(ymm7, ymm5, ymm0) vunpckhpd(ymm7, ymm5, ymm1) vunpcklpd(ymm11, ymm9, ymm2) vunpckhpd(ymm11, ymm9, ymm3) vinsertf128(imm(0x1), xmm2, ymm0, ymm5) vinsertf128(imm(0x1), xmm3, ymm1, ymm7) vperm2f128(imm(0x31), ymm2, ymm0, ymm9) vperm2f128(imm(0x31), ymm3, ymm1, ymm11) vextractf128(imm(0x1), ymm5, xmm12) vextractf128(imm(0x1), ymm7, xmm13) vextractf128(imm(0x1), ymm9, xmm14) vextractf128(imm(0x1), ymm11, xmm15) vmovupd(xmm5, mem(rcx)) vmovupd(xmm7, mem(rcx, rsi, 1)) vmovupd(xmm9, mem(rcx, rsi, 2)) vmovupd(xmm11, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) vmovsd(xmm12, mem(rdx)) vmovsd(xmm13, mem(rdx, rsi, 1)) vmovsd(xmm14, mem(rdx, rsi, 2)) vmovsd(xmm15, mem(rdx, rax, 1)) //lea(mem(rdx, rsi, 4), rdx) label(.DDONE) lea(mem(r12, rsi, 8), r12) // c_jj = r12 += 8*cs_c //add(imm(8*8), r14) // b_jj = r14 += 8*cs_b mov(var(ps_b8), rbx) // load ps_b8 lea(mem(r14, rbx, 1), r14) // a_ii = r14 += ps_b8 dec(r11) // jj -= 1; jne(.DLOOP4X8J) // iterate again if jj != 0. label(.DRETURN) end_asm( : // output operands (none) : // input operands [n_iter] "m" (n_iter), [k_iter] "m" (k_iter), [k_left] "m" (k_left), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [ps_b8] "m" (ps_b8), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) consider_edge_cases: // Handle edge cases in the m dimension, if they exist. if ( n_left ) { const dim_t mr_cur = 3; const dim_t j_edge = n0 - ( dim_t )n_left; double* restrict cij = c + j_edge*cs_c; double* restrict ai = a; //double* restrict bj = b + j_edge*cs_b; //double* restrict bj = b + ( j_edge / 8 ) * ps_b; double* restrict bj = b + n_iter * ps_b; if ( 4 <= n_left ) { const dim_t nr_cur = 4; bli_dgemmsup_rv_haswell_asm_3x4 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += nr_cur*cs_c0; bj += nr_cur*cs_b0; n_left -= nr_cur; } if ( 2 <= n_left ) { const dim_t nr_cur = 2; bli_dgemmsup_rv_haswell_asm_3x2 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += nr_cur*cs_c0; bj += nr_cur*cs_b0; n_left -= nr_cur; } if ( 1 == n_left ) { const dim_t nr_cur = 1; bli_dgemmsup_r_haswell_ref_3x1 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); } } } void bli_dgemmsup_rv_haswell_asm_2x8n ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t n_iter = n0 / 8; uint64_t n_left = n0 % 8; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // Query the panel stride of B and convert it to units of bytes. uint64_t ps_b = bli_auxinfo_ps_b( data ); uint64_t ps_b8 = ps_b * sizeof( double ); if ( n_iter == 0 ) goto consider_edge_cases; // ------------------------------------------------------------------------- begin_asm() //vzeroall() // zero all xmm/ymm registers. //mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), r14) // load address of b. mov(var(rs_b), r10) // load rs_b //mov(var(cs_b), r11) // load cs_b lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) //lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) // NOTE: We cannot pre-load elements of a or b // because it could eventually, in the last // unrolled iter or the cleanup loop, result // in reading beyond the bounds allocated mem // (the likely result: a segmentation fault). mov(var(c), r12) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) // During preamble and loops: // r12 = rcx = c // r14 = rbx = b // read rax from var(a) near beginning of loop // r11 = m dim index ii mov(var(n_iter), r11) // jj = n_iter; label(.DLOOP2X8J) // LOOP OVER jj = [ n_iter ... 1 0 ] #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm6, ymm6, ymm6) vxorpd(ymm7, ymm7, ymm7) #endif mov(var(a), rax) // load address of a. //mov(r12, rcx) // reset rcx to current utile of c. mov(r14, rbx) // reset rbx to current upanel of b. cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLPFETCH) // jump to column storage case label(.DROWPFETCH) // row-stored prefetching on c //lea(mem(r12, rdi, 2), rdx) // //lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(r12, 7*8)) // prefetch c + 0*rs_c prefetch(0, mem(r12, rdi, 1, 7*8)) // prefetch c + 1*rs_c jmp(.DPOSTPFETCH) // jump to end of prefetching c label(.DCOLPFETCH) // column-stored prefetching c mov(var(cs_c), rsi) // load cs_c to rsi (temporarily) lea(mem(, rsi, 8), rsi) // cs_c *= sizeof(double) lea(mem(r12, rsi, 2), rdx) // lea(mem(rdx, rsi, 1), rdx) // rdx = c + 3*cs_c; prefetch(0, mem(r12, 1*8)) // prefetch c + 0*cs_c prefetch(0, mem(r12, rsi, 1, 1*8)) // prefetch c + 1*cs_c prefetch(0, mem(r12, rsi, 2, 1*8)) // prefetch c + 2*cs_c prefetch(0, mem(rdx, 1*8)) // prefetch c + 3*cs_c prefetch(0, mem(rdx, rsi, 1, 1*8)) // prefetch c + 4*cs_c prefetch(0, mem(rdx, rsi, 2, 1*8)) // prefetch c + 5*cs_c lea(mem(rdx, rsi, 2), rdx) // rdx = c + 5*cs_c; prefetch(0, mem(rdx, rsi, 1, 1*8)) // prefetch c + 6*cs_c prefetch(0, mem(rdx, rsi, 2, 1*8)) // prefetch c + 7*cs_c label(.DPOSTPFETCH) // done prefetching c #if 1 //lea(mem(r10, r10, 2), rcx) // rcx = 3*rs_b; // use byte offsets from rbx to // prefetch lines from next upanel // of b. #endif mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.DLOOPKITER) // MAIN LOOP // ---------------------------------- iteration 0 #if 1 //prefetch(0, mem(rdx, 11*8)) // prefetch line of next upanel of b prefetch(0, mem(rbx, 11*8)) // prefetch line of next upanel of b #endif vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) // ---------------------------------- iteration 1 #if 1 //prefetch(0, mem(rdx, r10, 1, 11*8)) prefetch(0, mem(rbx, 11*8)) #endif vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) // ---------------------------------- iteration 2 #if 1 //prefetch(0, mem(rdx, r10, 2, 11*8)) prefetch(0, mem(rbx, 11*8)) #endif vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) // ---------------------------------- iteration 3 #if 1 //prefetch(0, mem(rdx, rcx, 1, 11*8)) //lea(mem(rdx, r10, 4), rdx) // a_prefetch += 4*cs_a; prefetch(0, mem(rbx, 11*8)) #endif vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) dec(rsi) // i -= 1; jne(.DLOOPKITER) // iterate again if i != 0. label(.DCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.DLOOPKLEFT) // EDGE LOOP #if 1 prefetch(0, mem(rbx, 11*8)) #endif vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) vbroadcastsd(mem(rax, r8, 1), ymm3) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) dec(rsi) // i -= 1; jne(.DLOOPKLEFT) // iterate again if i != 0. label(.DPOSTACCUM) mov(r12, rcx) // reset rcx to current utile of c. mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) vmulpd(ymm0, ymm6, ymm6) vmulpd(ymm0, ymm7, ymm7) mov(var(cs_c), rsi) // load cs_c lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) //lea(mem(rcx, rsi, 4), rdx) // load address of c + 4*cs_c; //lea(mem(rcx, rdi, 4), rdx) // load address of c + 4*rs_c; lea(mem(rsi, rsi, 2), rax) // rax = 3*cs_c; // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORED) // jump to column storage case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), ymm3, ymm5) vmovupd(ymm5, mem(rcx, rsi, 4)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm6) vmovupd(ymm6, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), ymm3, ymm7) vmovupd(ymm7, mem(rcx, rsi, 4)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORED) vunpcklpd(ymm6, ymm4, ymm0) vunpckhpd(ymm6, ymm4, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm4) vfmadd231pd(mem(rcx), xmm3, xmm0) vfmadd231pd(mem(rcx, rsi, 1), xmm3, xmm1) vfmadd231pd(mem(rcx, rsi, 2), xmm3, xmm2) vfmadd231pd(mem(rcx, rax, 1), xmm3, xmm4) vmovupd(xmm0, mem(rcx)) vmovupd(xmm1, mem(rcx, rsi, 1)) vmovupd(xmm2, mem(rcx, rsi, 2)) vmovupd(xmm4, mem(rcx, rax, 1)) lea(mem(rcx, rsi, 4), rcx) vunpcklpd(ymm7, ymm5, ymm0) vunpckhpd(ymm7, ymm5, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm4) vfmadd231pd(mem(rcx), xmm3, xmm0) vfmadd231pd(mem(rcx, rsi, 1), xmm3, xmm1) vfmadd231pd(mem(rcx, rsi, 2), xmm3, xmm2) vfmadd231pd(mem(rcx, rax, 1), xmm3, xmm4) vmovupd(xmm0, mem(rcx)) vmovupd(xmm1, mem(rcx, rsi, 1)) vmovupd(xmm2, mem(rcx, rsi, 2)) vmovupd(xmm4, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORBZ) // jump to column storage case label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) vmovupd(ymm5, mem(rcx, rsi, 4)) add(rdi, rcx) vmovupd(ymm6, mem(rcx)) vmovupd(ymm7, mem(rcx, rsi, 4)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORBZ) vunpcklpd(ymm6, ymm4, ymm0) vunpckhpd(ymm6, ymm4, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm4) vmovupd(xmm0, mem(rcx)) vmovupd(xmm1, mem(rcx, rsi, 1)) vmovupd(xmm2, mem(rcx, rsi, 2)) vmovupd(xmm4, mem(rcx, rax, 1)) lea(mem(rcx, rsi, 4), rcx) vunpcklpd(ymm7, ymm5, ymm0) vunpckhpd(ymm7, ymm5, ymm1) vextractf128(imm(0x1), ymm0, xmm2) vextractf128(imm(0x1), ymm1, xmm4) vmovupd(xmm0, mem(rcx)) vmovupd(xmm1, mem(rcx, rsi, 1)) vmovupd(xmm2, mem(rcx, rsi, 2)) vmovupd(xmm4, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) label(.DDONE) lea(mem(r12, rsi, 8), r12) // c_jj = r12 += 8*cs_c //add(imm(8*8), r14) // b_jj = r14 += 8*cs_b mov(var(ps_b8), rbx) // load ps_b8 lea(mem(r14, rbx, 1), r14) // a_ii = r14 += ps_b8 dec(r11) // jj -= 1; jne(.DLOOP2X8J) // iterate again if jj != 0. label(.DRETURN) end_asm( : // output operands (none) : // input operands [n_iter] "m" (n_iter), [k_iter] "m" (k_iter), [k_left] "m" (k_left), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [ps_b8] "m" (ps_b8), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) consider_edge_cases: // Handle edge cases in the m dimension, if they exist. if ( n_left ) { const dim_t mr_cur = 2; const dim_t j_edge = n0 - ( dim_t )n_left; double* restrict cij = c + j_edge*cs_c; double* restrict ai = a; //double* restrict bj = b + j_edge*cs_b; //double* restrict bj = b + ( j_edge / 8 ) * ps_b; double* restrict bj = b + n_iter * ps_b; if ( 4 <= n_left ) { const dim_t nr_cur = 4; bli_dgemmsup_rv_haswell_asm_2x4 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += nr_cur*cs_c0; bj += nr_cur*cs_b0; n_left -= nr_cur; } if ( 2 <= n_left ) { const dim_t nr_cur = 2; bli_dgemmsup_rv_haswell_asm_2x2 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += nr_cur*cs_c0; bj += nr_cur*cs_b0; n_left -= nr_cur; } if ( 1 == n_left ) { const dim_t nr_cur = 1; bli_dgemmsup_r_haswell_ref_2x1 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); } } } void bli_dgemmsup_rv_haswell_asm_1x8n ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t n_iter = n0 / 8; uint64_t n_left = n0 % 8; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // Query the panel stride of B and convert it to units of bytes. uint64_t ps_b = bli_auxinfo_ps_b( data ); uint64_t ps_b8 = ps_b * sizeof( double ); if ( n_iter == 0 ) goto consider_edge_cases; // ------------------------------------------------------------------------- begin_asm() //vzeroall() // zero all xmm/ymm registers. //mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), r14) // load address of b. mov(var(rs_b), r10) // load rs_b //mov(var(cs_b), r11) // load cs_b lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) //lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) // NOTE: We cannot pre-load elements of a or b // because it could eventually, in the last // unrolled iter or the cleanup loop, result // in reading beyond the bounds allocated mem // (the likely result: a segmentation fault). mov(var(c), r12) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) // During preamble and loops: // r12 = rcx = c // r14 = rbx = b // read rax from var(a) near beginning of loop // r11 = m dim index ii mov(var(n_iter), r11) // jj = n_iter; label(.DLOOP1X8J) // LOOP OVER jj = [ n_iter ... 1 0 ] #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) #endif mov(var(a), rax) // load address of a. //mov(r12, rcx) // reset rcx to current utile of c. mov(r14, rbx) // reset rbx to current upanel of b. cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLPFETCH) // jump to column storage case label(.DROWPFETCH) // row-stored prefetching on c //lea(mem(r12, rdi, 2), rdx) // //lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(r12, 7*8)) // prefetch c + 0*rs_c jmp(.DPOSTPFETCH) // jump to end of prefetching c label(.DCOLPFETCH) // column-stored prefetching c mov(var(cs_c), rsi) // load cs_c to rsi (temporarily) lea(mem(, rsi, 8), rsi) // cs_c *= sizeof(double) lea(mem(r12, rsi, 2), rdx) // lea(mem(rdx, rsi, 1), rdx) // rdx = c + 3*cs_c; prefetch(0, mem(r12, 0*8)) // prefetch c + 0*cs_c prefetch(0, mem(r12, rsi, 1, 0*8)) // prefetch c + 1*cs_c prefetch(0, mem(r12, rsi, 2, 0*8)) // prefetch c + 2*cs_c prefetch(0, mem(rdx, 1*8)) // prefetch c + 3*cs_c prefetch(0, mem(rdx, rsi, 1, 0*8)) // prefetch c + 4*cs_c prefetch(0, mem(rdx, rsi, 2, 0*8)) // prefetch c + 5*cs_c lea(mem(rdx, rsi, 2), rdx) // rdx = c + 5*cs_c; prefetch(0, mem(rdx, rsi, 1, 0*8)) // prefetch c + 6*cs_c prefetch(0, mem(rdx, rsi, 2, 0*8)) // prefetch c + 7*cs_c label(.DPOSTPFETCH) // done prefetching c #if 1 //lea(mem(r10, r10, 2), rcx) // rcx = 3*rs_b; // use byte offsets from rbx to // prefetch lines from next upanel // of b. #endif mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.DLOOPKITER) // MAIN LOOP // ---------------------------------- iteration 0 #if 1 //prefetch(0, mem(rdx, 11*8)) // prefetch line of next upanel of b prefetch(0, mem(rbx, 11*8)) // prefetch line of next upanel of b #endif vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) // ---------------------------------- iteration 1 #if 1 //prefetch(0, mem(rdx, r10, 1, 11*8)) prefetch(0, mem(rbx, 11*8)) #endif vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) // ---------------------------------- iteration 2 #if 1 //prefetch(0, mem(rdx, r10, 2, 11*8)) prefetch(0, mem(rbx, 11*8)) #endif vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) // ---------------------------------- iteration 3 #if 1 //prefetch(0, mem(rdx, rcx, 1, 11*8)) //lea(mem(rdx, r10, 4), rdx) // a_prefetch += 4*cs_a; prefetch(0, mem(rbx, 11*8)) #endif vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) dec(rsi) // i -= 1; jne(.DLOOPKITER) // iterate again if i != 0. label(.DCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.DLOOPKLEFT) // EDGE LOOP #if 1 prefetch(0, mem(rbx, 11*8)) #endif vmovupd(mem(rbx, 0*32), ymm0) vmovupd(mem(rbx, 1*32), ymm1) add(r10, rbx) // b += rs_b; vbroadcastsd(mem(rax ), ymm2) add(r9, rax) // a += cs_a; vfmadd231pd(ymm0, ymm2, ymm4) vfmadd231pd(ymm1, ymm2, ymm5) dec(rsi) // i -= 1; jne(.DLOOPKLEFT) // iterate again if i != 0. label(.DPOSTACCUM) mov(r12, rcx) // reset rcx to current utile of c. mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) mov(var(cs_c), rsi) // load cs_c lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) //lea(mem(rcx, rsi, 4), rdx) // load address of c + 4*cs_c; //lea(mem(rcx, rdi, 4), rdx) // load address of c + 4*rs_c; lea(mem(rsi, rsi, 2), rax) // rax = 3*cs_c; // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORED) // jump to column storage case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) vfmadd231pd(mem(rcx, rsi, 4), ymm3, ymm5) vmovupd(ymm5, mem(rcx, rsi, 4)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORED) vmovlpd(mem(rcx), xmm0, xmm0) vmovhpd(mem(rcx, rsi, 1), xmm0, xmm0) vmovlpd(mem(rcx, rsi, 2), xmm1, xmm1) vmovhpd(mem(rcx, rax, 1), xmm1, xmm1) vperm2f128(imm(0x20), ymm1, ymm0, ymm0) vfmadd213pd(ymm4, ymm3, ymm0) vextractf128(imm(1), ymm0, xmm1) vmovlpd(xmm0, mem(rcx)) vmovhpd(xmm0, mem(rcx, rsi, 1)) vmovlpd(xmm1, mem(rcx, rsi, 2)) vmovhpd(xmm1, mem(rcx, rax, 1)) lea(mem(rcx, rsi, 4), rcx) vmovlpd(mem(rcx), xmm0, xmm0) vmovhpd(mem(rcx, rsi, 1), xmm0, xmm0) vmovlpd(mem(rcx, rsi, 2), xmm1, xmm1) vmovhpd(mem(rcx, rax, 1), xmm1, xmm1) vperm2f128(imm(0x20), ymm1, ymm0, ymm0) vfmadd213pd(ymm5, ymm3, ymm0) vextractf128(imm(1), ymm0, xmm1) vmovlpd(xmm0, mem(rcx)) vmovhpd(xmm0, mem(rcx, rsi, 1)) vmovlpd(xmm1, mem(rcx, rsi, 2)) vmovhpd(xmm1, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) cmp(imm(8), rdi) // set ZF if (8*rs_c) == 8. jz(.DCOLSTORBZ) // jump to column storage case label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) vmovupd(ymm5, mem(rcx, rsi, 4)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DCOLSTORBZ) vmovupd(ymm4, ymm0) vextractf128(imm(1), ymm0, xmm1) vmovlpd(xmm0, mem(rcx)) vmovhpd(xmm0, mem(rcx, rsi, 1)) vmovlpd(xmm1, mem(rcx, rsi, 2)) vmovhpd(xmm1, mem(rcx, rax, 1)) lea(mem(rcx, rsi, 4), rcx) vmovupd(ymm5, ymm0) vextractf128(imm(1), ymm0, xmm1) vmovlpd(xmm0, mem(rcx)) vmovhpd(xmm0, mem(rcx, rsi, 1)) vmovlpd(xmm1, mem(rcx, rsi, 2)) vmovhpd(xmm1, mem(rcx, rax, 1)) //lea(mem(rcx, rsi, 4), rcx) label(.DDONE) lea(mem(r12, rsi, 8), r12) // c_jj = r12 += 8*cs_c //add(imm(8*8), r14) // b_jj = r14 += 8*cs_b mov(var(ps_b8), rbx) // load ps_b8 lea(mem(r14, rbx, 1), r14) // a_ii = r14 += ps_b8 dec(r11) // jj -= 1; jne(.DLOOP1X8J) // iterate again if jj != 0. label(.DRETURN) end_asm( : // output operands (none) : // input operands [n_iter] "m" (n_iter), [k_iter] "m" (k_iter), [k_left] "m" (k_left), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [ps_b8] "m" (ps_b8), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) consider_edge_cases: // Handle edge cases in the m dimension, if they exist. if ( n_left ) { const dim_t mr_cur = 1; const dim_t j_edge = n0 - ( dim_t )n_left; double* restrict cij = c + j_edge*cs_c; double* restrict ai = a; //double* restrict bj = b + j_edge*cs_b; //double* restrict bj = b + ( j_edge / 8 ) * ps_b; double* restrict bj = b + n_iter * ps_b; if ( 4 <= n_left ) { const dim_t nr_cur = 4; bli_dgemmsup_rv_haswell_asm_1x4 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += nr_cur*cs_c0; bj += nr_cur*cs_b0; n_left -= nr_cur; } if ( 2 <= n_left ) { const dim_t nr_cur = 2; bli_dgemmsup_rv_haswell_asm_1x2 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += nr_cur*cs_c0; bj += nr_cur*cs_b0; n_left -= nr_cur; } if ( 1 == n_left ) { #if 1 const dim_t nr_cur = 1; bli_dgemmsup_r_haswell_ref_1x1 ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); #else bli_ddotxv_ex ( conja, conjb, k0, alpha, ai, cs_a0, bj, rs_b0, beta, cij, cntx, NULL ); #endif } } } blis-0.6.1/kernels/haswell/3/sup/old/000077500000000000000000000000001360743507500173125ustar00rootroot00000000000000blis-0.6.1/kernels/haswell/3/sup/old/bli_gemmsup_rd_haswell_asm_d6x8.c000066400000000000000000004243301360743507500257040ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define BLIS_ASM_SYNTAX_ATT #include "bli_x86_asm_macros.h" /* rrc: -------- ------ | | | | | | | | -------- ------ | | | | | | | | -------- += ------ ... | | | | | | | | -------- ------ | | | | | | | | -------- ------ : -------- ------ : Assumptions: - C is row-stored and B is column-stored; - A is row-stored; - m0 and n0 are at most MR and NR, respectively. Therefore, this (r)ow-preferential microkernel is well-suited for a dot-product-based accumulation that performs vector loads from both A and B. */ // Prototype reference microkernels. GEMMSUP_KER_PROT( float, s, gemmsup_r_haswell_ref ) GEMMSUP_KER_PROT( double, d, gemmsup_r_haswell_ref ) GEMMSUP_KER_PROT( scomplex, c, gemmsup_r_haswell_ref ) GEMMSUP_KER_PROT( dcomplex, z, gemmsup_r_haswell_ref ) // Define parameters and variables for edge case kernel map. #define NUM_MR 4 #define NUM_NR 4 #define FUNCPTR_T dgemmsup_ker_ft static dim_t mrs[NUM_MR] = { 6, 3, 2, 1 }; static dim_t nrs[NUM_NR] = { 8, 4, 2, 1 }; static FUNCPTR_T kmap[NUM_MR][NUM_NR] = { /* 8 4 2 1 */ /* 6 */ { bli_dgemmsup_rd_haswell_asm_6x8, bli_dgemmsup_rd_haswell_asm_6x4, bli_dgemmsup_rd_haswell_asm_6x2, bli_dgemmsup_r_haswell_ref }, /* 3 */ { bli_dgemmsup_rd_haswell_asm_3x8, bli_dgemmsup_rd_haswell_asm_3x4, bli_dgemmsup_rd_haswell_asm_3x2, bli_dgemmsup_r_haswell_ref }, /* 2 */ { bli_dgemmsup_rd_haswell_asm_2x8, bli_dgemmsup_rd_haswell_asm_2x4, bli_dgemmsup_rd_haswell_asm_2x2, bli_dgemmsup_r_haswell_ref }, /* 1 */ { bli_dgemmsup_rd_haswell_asm_1x8, bli_dgemmsup_rd_haswell_asm_1x4, bli_dgemmsup_rd_haswell_asm_1x2, bli_dgemmsup_r_haswell_ref } }; void bli_dgemmsup_rd_haswell_asm_6x8 ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { // Use a reference kernel if this is an edge case in the m or n // dimensions. if ( m0 < 6 || n0 < 8 ) { dim_t n_left = n0; double* restrict cj = c; double* restrict bj = b; // Iterate across columns (corresponding to elements of nrs) until // n_left is zero. for ( dim_t j = 0; n_left != 0; ++j ) { const dim_t nr_cur = nrs[ j ]; // Once we find the value of nrs that is less than (or equal to) // n_left, we use the kernels in that column. if ( nr_cur <= n_left ) { dim_t m_left = m0; double* restrict cij = cj; double* restrict ai = a; // Iterate down the current column (corresponding to elements // of mrs) until m_left is zero. for ( dim_t i = 0; m_left != 0; ++i ) { const dim_t mr_cur = mrs[ i ]; // Once we find the value of mrs that is less than (or equal // to) m_left, we select that kernel. if ( mr_cur <= m_left ) { FUNCPTR_T ker_fp = kmap[i][j]; //printf( "executing %d x %d sup kernel.\n", (int)mr_cur, (int)nr_cur ); // Call the kernel using current mrs and nrs values. ker_fp ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); // Advance C and A pointers by the mrs and nrs we just // used, and decrement m_left. cij += mr_cur*rs_c0; ai += mr_cur*rs_a0; m_left -= mr_cur; } } // Advance C and B pointers by the mrs and nrs we just used, and // decrement n_left. cj += nr_cur*cs_c0; bj += nr_cur*cs_b0; n_left -= nr_cur; } } return; } //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() //vzeroall() // zero all xmm/ymm registers. mov(var(a), r12) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), r14) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b mov(var(c), r10) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) // r10 = rcx = c // r12 = rax = a // r14 = rbx = b // r9 = m dim index ii // r15 = n dim index jj #if 1 mov(imm(0), r9) // ii = 0; label(.DLOOP3X4I) // LOOP OVER ii = [ 0 1 ... ] lea(mem( , r9, 1), rsi) // rsi = r9 = 3*ii; imul(rdi, rsi) // rsi *= rs_c; lea(mem(r10, rsi, 1), rdx) // rdx = c_jj + 3*ii*rs_c; lea(mem( , r9, 1), rsi) // rsi = r9 = 3*ii; imul(r8, rsi) // rsi *= rs_a; lea(mem(r12, rsi, 1), r12) // rax = a + 3*ii*rs_a; mov(imm(0), r15) // jj = 0; label(.DLOOP3X4J) // LOOP OVER jj = [ 0 1 ... ] vzeroall() // zero all xmm/ymm registers. lea(mem( , r15, 1), rsi) // rsi = r15 = 4*jj; imul(imm(1*8), rsi) // rsi *= cs_c = 1*8 lea(mem(rdx, rsi, 1), rcx) // rcx = c + 4*jj*cs_c; lea(mem( , r15, 1), rsi) // rsi = r15 = 4*jj; imul(r11, rsi) // rsi *= cs_b; lea(mem(r14, rsi, 1), rbx) // rbx = b + 4*jj*cs_b; lea(mem( , r12, 1), rax) // rax = a_ii; #endif #if 0 prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 7*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 7*8)) // prefetch c + 2*rs_c #else prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 3*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 3*8)) // prefetch c + 2*rs_c #endif mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 1 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 2 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 3 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rax ), xmm0) vmovsd(mem(rax, r8, 1), xmm1) vmovsd(mem(rax, r8, 2), xmm2) add(imm(1*8), rax) // a += 1*cs_b = 1*8; vmovsd(mem(rbx ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovsd(mem(rbx, r11, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovsd(mem(rbx, r11, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovsd(mem(rbx, r13, 1), xmm3) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm7 ymm10 ymm13 // ymm5 ymm8 ymm11 ymm14 // ymm6 ymm9 ymm12 ymm15 vhaddpd( ymm7, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm7) vhaddpd( ymm13, ymm10, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) // xmm2[0] = sum(ymm10); xmm2[1] = sum(ymm13) vperm2f128(imm(0x20), ymm2, ymm0, ymm4 ) vhaddpd( ymm8, ymm5, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm14, ymm11, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm5 ) vhaddpd( ymm9, ymm6, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm15, ymm12, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm6 ) // ymm4 = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13) // ymm5 = sum(ymm5) sum(ymm8) sum(ymm11) sum(ymm14) // ymm6 = sum(ymm6) sum(ymm9) sum(ymm12) sum(ymm15) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) vmulpd(ymm0, ymm6, ymm6) mov(var(cs_c), rsi) // load cs_c lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm5) vmovupd(ymm5, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm6) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vmovupd(ymm5, mem(rcx)) add(rdi, rcx) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) label(.DDONE) #if 1 add(imm(4), r15) // jj += 4; cmp(imm(4), r15) // compare jj to 4 jle(.DLOOP3X4J) // if jj <= 4, jump to beginning // of jj loop; otherwise, loop ends. add(imm(3), r9) // ii += 3; cmp(imm(3), r9) // compare ii to 3 jle(.DLOOP3X4I) // if ii <= 3, jump to beginning #endif label(.DRETURN) end_asm( : // output operands (none) : // input operands [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rd_haswell_asm_3x8 ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() //vzeroall() // zero all xmm/ymm registers. mov(var(a), r12) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), r14) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b mov(var(c), r10) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) // r10 = rcx = c // r12 = rax = a // r14 = rbx = b // r9 = m dim index ii // r15 = n dim index jj mov(imm(0), r15) // jj = 0; label(.DLOOP3X4J) // LOOP OVER jj = [ 0 1 ... ] vzeroall() // zero all xmm/ymm registers. lea(mem( , r15, 1), rsi) // rsi = r15 = 4*jj; imul(imm(1*8), rsi) // rsi *= cs_c = 1*8 lea(mem(r10, rsi, 1), rcx) // rcx = c + 4*jj*cs_c; lea(mem( , r15, 1), rsi) // rsi = r15 = 4*jj; imul(r11, rsi) // rsi *= cs_b; lea(mem(r14, rsi, 1), rbx) // rbx = b + 4*jj*cs_b; lea(mem( , r12, 1), rax) // rax = a; prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 7*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 7*8)) // prefetch c + 2*rs_c mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 1 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 2 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 3 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rax ), xmm0) vmovsd(mem(rax, r8, 1), xmm1) vmovsd(mem(rax, r8, 2), xmm2) add(imm(1*8), rax) // a += 1*cs_b = 1*8; vmovsd(mem(rbx ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovsd(mem(rbx, r11, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovsd(mem(rbx, r11, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovsd(mem(rbx, r13, 1), xmm3) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm7 ymm10 ymm13 // ymm5 ymm8 ymm11 ymm14 // ymm6 ymm9 ymm12 ymm15 vhaddpd( ymm7, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm7) vhaddpd( ymm13, ymm10, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) // xmm2[0] = sum(ymm10); xmm2[1] = sum(ymm13) vperm2f128(imm(0x20), ymm2, ymm0, ymm4 ) vhaddpd( ymm8, ymm5, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm14, ymm11, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm5 ) vhaddpd( ymm9, ymm6, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm15, ymm12, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm6 ) // ymm4 = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13) // ymm5 = sum(ymm5) sum(ymm8) sum(ymm11) sum(ymm14) // ymm6 = sum(ymm6) sum(ymm9) sum(ymm12) sum(ymm15) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) vmulpd(ymm0, ymm6, ymm6) mov(var(cs_c), rsi) // load cs_c lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm5) vmovupd(ymm5, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm6) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vmovupd(ymm5, mem(rcx)) add(rdi, rcx) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) label(.DDONE) add(imm(4), r15) // jj += 4; cmp(imm(4), r15) // compare jj to 4 jle(.DLOOP3X4J) // if jj <= 4, jump to beginning // of jj loop; otherwise, loop ends. label(.DRETURN) end_asm( : // output operands (none) : // input operands [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rd_haswell_asm_2x8 ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { #if 0 bli_dgemmsup_r_haswell_ref ( conja, conjb, m0, n0, k0, alpha, a, rs_a0, cs_a0, b, rs_b0, cs_b0, beta, c, rs_c0, cs_c0, data, cntx ); return; #endif //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() //vzeroall() // zero all xmm/ymm registers. mov(var(a), r12) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), r14) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b mov(var(c), r10) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) // r10 = rcx = c // r12 = rax = a // r14 = rbx = b // r9 = m dim index ii // r15 = n dim index jj mov(imm(0), r15) // jj = 0; label(.DLOOP3X4J) // LOOP OVER jj = [ 0 1 ... ] vzeroall() // zero all xmm/ymm registers. lea(mem( , r15, 1), rsi) // rsi = r15 = 4*jj; imul(imm(1*8), rsi) // rsi *= cs_c = 1*8 lea(mem(r10, rsi, 1), rcx) // rcx = c + 4*jj*cs_c; lea(mem( , r15, 1), rsi) // rsi = r15 = 4*jj; imul(r11, rsi) // rsi *= cs_b; lea(mem(r14, rsi, 1), rbx) // rbx = b + 4*jj*cs_b; lea(mem( , r12, 1), rax) // rax = a; prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 7*8)) // prefetch c + 1*rs_c mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) // ---------------------------------- iteration 1 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) // ---------------------------------- iteration 2 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) // ---------------------------------- iteration 3 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rax ), xmm0) vmovsd(mem(rax, r8, 1), xmm1) add(imm(1*8), rax) // a += 1*cs_b = 1*8; vmovsd(mem(rbx ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovsd(mem(rbx, r11, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovsd(mem(rbx, r11, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovsd(mem(rbx, r13, 1), xmm3) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm7 ymm10 ymm13 // ymm5 ymm8 ymm11 ymm14 vhaddpd( ymm7, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm7) vhaddpd( ymm13, ymm10, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) // xmm2[0] = sum(ymm10); xmm2[1] = sum(ymm13) vperm2f128(imm(0x20), ymm2, ymm0, ymm4 ) vhaddpd( ymm8, ymm5, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm14, ymm11, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm5 ) // ymm4 = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13) // ymm5 = sum(ymm5) sum(ymm8) sum(ymm11) sum(ymm14) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) mov(var(cs_c), rsi) // load cs_c lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm5) vmovupd(ymm5, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vmovupd(ymm5, mem(rcx)) //add(rdi, rcx) label(.DDONE) add(imm(4), r15) // jj += 4; cmp(imm(4), r15) // compare jj to 4 jle(.DLOOP3X4J) // if jj <= 4, jump to beginning // of jj loop; otherwise, loop ends. label(.DRETURN) end_asm( : // output operands (none) : // input operands [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rd_haswell_asm_1x8 ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { #if 0 bli_dgemmsup_r_haswell_ref ( conja, conjb, m0, n0, k0, alpha, a, rs_a0, cs_a0, b, rs_b0, cs_b0, beta, c, rs_c0, cs_c0, data, cntx ); return; #endif //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() //vzeroall() // zero all xmm/ymm registers. mov(var(a), r12) // load address of a. //mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), r14) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b mov(var(c), r10) // load address of c //mov(var(rs_c), rdi) // load rs_c //lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) // r10 = rcx = c // r12 = rax = a // r14 = rbx = b // r9 = m dim index ii // r15 = n dim index jj mov(imm(0), r15) // jj = 0; label(.DLOOP3X4J) // LOOP OVER jj = [ 0 1 ... ] vzeroall() // zero all xmm/ymm registers. lea(mem( , r15, 1), rsi) // rsi = r15 = 4*jj; imul(imm(1*8), rsi) // rsi *= cs_c = 1*8 lea(mem(r10, rsi, 1), rcx) // rcx = c + 4*jj*cs_c; lea(mem( , r15, 1), rsi) // rsi = r15 = 4*jj; imul(r11, rsi) // rsi *= cs_b; lea(mem(r14, rsi, 1), rbx) // rbx = b + 4*jj*cs_b; lea(mem( , r12, 1), rax) // rax = a; prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) // ---------------------------------- iteration 1 vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) // ---------------------------------- iteration 2 vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) // ---------------------------------- iteration 3 vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rax ), xmm0) add(imm(1*8), rax) // a += 1*cs_b = 1*8; vmovsd(mem(rbx ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovsd(mem(rbx, r11, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovsd(mem(rbx, r11, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovsd(mem(rbx, r13, 1), xmm3) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vfmadd231pd(ymm0, ymm3, ymm13) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm7 ymm10 ymm13 vhaddpd( ymm7, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm7) vhaddpd( ymm13, ymm10, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) // xmm2[0] = sum(ymm10); xmm2[1] = sum(ymm13) vperm2f128(imm(0x20), ymm2, ymm0, ymm4 ) // ymm4 = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha mov(var(cs_c), rsi) // load cs_c lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) //add(rdi, rcx) label(.DDONE) add(imm(4), r15) // jj += 4; cmp(imm(4), r15) // compare jj to 4 jle(.DLOOP3X4J) // if jj <= 4, jump to beginning // of jj loop; otherwise, loop ends. label(.DRETURN) end_asm( : // output operands (none) : // input operands [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rd_haswell_asm_6x4 ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { #if 0 bli_dgemmsup_r_haswell_ref ( conja, conjb, m0, n0, k0, alpha, a, rs_a0, cs_a0, b, rs_b0, cs_b0, beta, c, rs_c0, cs_c0, data, cntx ); return; #endif //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() //vzeroall() // zero all xmm/ymm registers. mov(var(a), r12) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), r14) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b mov(var(c), r10) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) // r10 = rcx = c // r12 = rax = a // r14 = rbx = b // r9 = m dim index ii // r15 = n dim index jj mov(imm(0), r9) // ii = 0; label(.DLOOP3X4I) // LOOP OVER ii = [ 0 1 ... ] vzeroall() // zero all xmm/ymm registers. lea(mem( , r9, 1), rsi) // rsi = r9 = 3*ii; imul(rdi, rsi) // rsi *= rs_c; lea(mem(r10, rsi, 1), rcx) // rcx = c + 3*ii*rs_c; lea(mem( , r9, 1), rsi) // rsi = r9 = 3*ii; imul(r8, rsi) // rsi *= rs_a; lea(mem(r12, rsi, 1), rax) // rax = a + 3*ii*rs_a; lea(mem( , r14, 1), rbx) // rbx = b; prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 7*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 7*8)) // prefetch c + 2*rs_c mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 1 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 2 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 3 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rax ), xmm0) vmovsd(mem(rax, r8, 1), xmm1) vmovsd(mem(rax, r8, 2), xmm2) add(imm(1*8), rax) // a += 1*cs_b = 1*8; vmovsd(mem(rbx ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovsd(mem(rbx, r11, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovsd(mem(rbx, r11, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovsd(mem(rbx, r13, 1), xmm3) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm7 ymm10 ymm13 // ymm5 ymm8 ymm11 ymm14 // ymm6 ymm9 ymm12 ymm15 vhaddpd( ymm7, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm7) vhaddpd( ymm13, ymm10, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) // xmm2[0] = sum(ymm10); xmm2[1] = sum(ymm13) vperm2f128(imm(0x20), ymm2, ymm0, ymm4 ) vhaddpd( ymm8, ymm5, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm14, ymm11, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm5 ) vhaddpd( ymm9, ymm6, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm15, ymm12, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm6 ) // ymm4 = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13) // ymm5 = sum(ymm5) sum(ymm8) sum(ymm11) sum(ymm14) // ymm6 = sum(ymm6) sum(ymm9) sum(ymm12) sum(ymm15) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) vmulpd(ymm0, ymm6, ymm6) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm5) vmovupd(ymm5, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm6) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vmovupd(ymm5, mem(rcx)) add(rdi, rcx) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) label(.DDONE) add(imm(3), r9) // ii += 3; cmp(imm(3), r9) // compare ii to 3 jle(.DLOOP3X4I) // if ii <= 3, jump to beginning // of ii loop; otherwise, loop ends. label(.DRETURN) end_asm( : // output operands (none) : // input operands [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rd_haswell_asm_3x4 ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { #if 0 bli_dgemmsup_r_haswell_ref ( conja, conjb, m0, n0, k0, alpha, a, rs_a0, cs_a0, b, rs_b0, cs_b0, beta, c, rs_c0, cs_c0, data, cntx ); return; #endif //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() vzeroall() // zero all xmm/ymm registers. mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b // initialize loop by pre-loading // a column of a. mov(var(c), rcx) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 7*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 7*8)) // prefetch c + 2*rs_c mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 1 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 2 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 3 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rax ), xmm0) vmovsd(mem(rax, r8, 1), xmm1) vmovsd(mem(rax, r8, 2), xmm2) add(imm(1*8), rax) // a += 1*cs_b = 1*8; vmovsd(mem(rbx ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovsd(mem(rbx, r11, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovsd(mem(rbx, r11, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovsd(mem(rbx, r13, 1), xmm3) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm7 ymm10 ymm13 // ymm5 ymm8 ymm11 ymm14 // ymm6 ymm9 ymm12 ymm15 vhaddpd( ymm7, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm7) vhaddpd( ymm13, ymm10, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) // xmm2[0] = sum(ymm10); xmm2[1] = sum(ymm13) vperm2f128(imm(0x20), ymm2, ymm0, ymm4 ) vhaddpd( ymm8, ymm5, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm14, ymm11, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm5 ) vhaddpd( ymm9, ymm6, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm15, ymm12, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm6 ) // ymm4 = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13) // ymm5 = sum(ymm5) sum(ymm8) sum(ymm11) sum(ymm14) // ymm6 = sum(ymm6) sum(ymm9) sum(ymm12) sum(ymm15) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) vmulpd(ymm0, ymm6, ymm6) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm5) vmovupd(ymm5, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm6) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vmovupd(ymm5, mem(rcx)) add(rdi, rcx) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rd_haswell_asm_2x4 ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { #if 0 bli_dgemmsup_r_haswell_ref ( conja, conjb, m0, n0, k0, alpha, a, rs_a0, cs_a0, b, rs_b0, cs_b0, beta, c, rs_c0, cs_c0, data, cntx ); return; #endif //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() vzeroall() // zero all xmm/ymm registers. mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b // initialize loop by pre-loading // a column of a. mov(var(c), rcx) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 7*8)) // prefetch c + 1*rs_c mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) // ---------------------------------- iteration 1 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) // ---------------------------------- iteration 2 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) // ---------------------------------- iteration 3 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rax ), xmm0) vmovsd(mem(rax, r8, 1), xmm1) add(imm(1*8), rax) // a += 1*cs_b = 1*8; vmovsd(mem(rbx ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovsd(mem(rbx, r11, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovsd(mem(rbx, r11, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovsd(mem(rbx, r13, 1), xmm3) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm7 ymm10 ymm13 // ymm5 ymm8 ymm11 ymm14 vhaddpd( ymm7, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm7) vhaddpd( ymm13, ymm10, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) // xmm2[0] = sum(ymm10); xmm2[1] = sum(ymm13) vperm2f128(imm(0x20), ymm2, ymm0, ymm4 ) vhaddpd( ymm8, ymm5, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm14, ymm11, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm5 ) // ymm4 = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13) // ymm5 = sum(ymm5) sum(ymm8) sum(ymm11) sum(ymm14) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm5) vmovupd(ymm5, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vmovupd(ymm5, mem(rcx)) //add(rdi, rcx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rd_haswell_asm_1x4 ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { #if 0 bli_dgemmsup_r_haswell_ref ( conja, conjb, m0, n0, k0, alpha, a, rs_a0, cs_a0, b, rs_b0, cs_b0, beta, c, rs_c0, cs_c0, data, cntx ); return; #endif //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; /* rrc: -------- -- -- -- | | | | -------- -- -- -- ... | | | | -------- += -- -- -- | | | | -------- | | | | -------- : -------- : */ // ------------------------------------------------------------------------- begin_asm() vzeroall() // zero all xmm/ymm registers. mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b // initialize loop by pre-loading // a column of a. mov(var(c), rcx) // load address of c //mov(var(rs_c), rdi) // load rs_c //lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) // ---------------------------------- iteration 1 vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) // ---------------------------------- iteration 2 vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) // ---------------------------------- iteration 3 vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rax ), xmm0) add(imm(1*8), rax) // a += 1*cs_b = 1*8; vmovsd(mem(rbx ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovsd(mem(rbx, r11, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovsd(mem(rbx, r11, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovsd(mem(rbx, r13, 1), xmm3) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vfmadd231pd(ymm0, ymm3, ymm13) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm7 ymm10 ymm13 vhaddpd( ymm7, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm7) vhaddpd( ymm13, ymm10, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) // xmm2[0] = sum(ymm10); xmm2[1] = sum(ymm13) vperm2f128(imm(0x20), ymm2, ymm0, ymm4 ) // ymm4 = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) //add(rdi, rcx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rd_haswell_asm_6x2 ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { #if 0 bli_dgemmsup_r_haswell_ref ( conja, conjb, m0, n0, k0, alpha, a, rs_a0, cs_a0, b, rs_b0, cs_b0, beta, c, rs_c0, cs_c0, data, cntx ); return; #endif //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; /* rrc: -------- -- -- -- | | -------- -- -- -- ... | | -------- += -- -- -- | | -------- -- -- -- | | -------- -- -- -- : -------- -- -- -- : */ // ------------------------------------------------------------------------- begin_asm() vzeroall() // zero all xmm/ymm registers. mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) //lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b // initialize loop by pre-loading // a column of a. mov(var(c), rcx) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) lea(mem(rcx, rdi, 2), rdx) // lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 7*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 7*8)) // prefetch c + 2*rs_c prefetch(0, mem(rdx, 7*8)) // prefetch c + 3*rs_c prefetch(0, mem(rdx, rdi, 1, 7*8)) // prefetch c + 4*rs_c prefetch(0, mem(rdx, rdi, 2, 7*8)) // prefetch c + 5*rs_c mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) vmovupd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rax, r8, 4), ymm3) vfmadd231pd(ymm0, ymm3, ymm12) vfmadd231pd(ymm1, ymm3, ymm13) vmovupd(mem(rax, r15, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) // ---------------------------------- iteration 1 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) vmovupd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rax, r8, 4), ymm3) vfmadd231pd(ymm0, ymm3, ymm12) vfmadd231pd(ymm1, ymm3, ymm13) vmovupd(mem(rax, r15, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) // ---------------------------------- iteration 2 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) vmovupd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rax, r8, 4), ymm3) vfmadd231pd(ymm0, ymm3, ymm12) vfmadd231pd(ymm1, ymm3, ymm13) vmovupd(mem(rax, r15, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) // ---------------------------------- iteration 3 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) vmovupd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rax, r8, 4), ymm3) vfmadd231pd(ymm0, ymm3, ymm12) vfmadd231pd(ymm1, ymm3, ymm13) vmovupd(mem(rax, r15, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) vmovupd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rax, r8, 4), ymm3) vfmadd231pd(ymm0, ymm3, ymm12) vfmadd231pd(ymm1, ymm3, ymm13) vmovupd(mem(rax, r15, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rbx ), xmm0) vmovsd(mem(rbx, r11, 1), xmm1) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vmovsd(mem(rax ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovsd(mem(rax, r8, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovsd(mem(rax, r8, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) vmovsd(mem(rax, r13, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovsd(mem(rax, r8, 4), xmm3) vfmadd231pd(ymm0, ymm3, ymm12) vfmadd231pd(ymm1, ymm3, ymm13) vmovsd(mem(rax, r15, 1), xmm3) add(imm(1*8), rax) // a += 1*cs_a = 1*8; vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm5 // ymm6 ymm7 // ymm8 ymm9 // ymm10 ymm11 // ymm12 ymm13 // ymm14 ymm15 vhaddpd( ymm5, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm4 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm5) vhaddpd( ymm7, ymm6, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm6 ) vhaddpd( ymm9, ymm8, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm8 ) vhaddpd( ymm11, ymm10, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm10 ) vhaddpd( ymm13, ymm12, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm12 ) vhaddpd( ymm15, ymm14, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm14 ) // xmm4 = sum(ymm4) sum(ymm5) // xmm6 = sum(ymm6) sum(ymm7) // xmm8 = sum(ymm8) sum(ymm9) // xmm10 = sum(ymm10) sum(ymm11) // xmm12 = sum(ymm12) sum(ymm13) // xmm14 = sum(ymm14) sum(ymm15) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(xmm0, xmm4, xmm4) // scale by alpha vmulpd(xmm0, xmm6, xmm6) vmulpd(xmm0, xmm8, xmm8) vmulpd(xmm0, xmm10, xmm10) vmulpd(xmm0, xmm12, xmm12) vmulpd(xmm0, xmm14, xmm14) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), xmm3, xmm4) vmovupd(xmm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm6) vmovupd(xmm6, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm8) vmovupd(xmm8, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm10) vmovupd(xmm10, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm12) vmovupd(xmm12, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm14) vmovupd(xmm14, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(xmm4, mem(rcx)) add(rdi, rcx) vmovupd(xmm6, mem(rcx)) add(rdi, rcx) vmovupd(xmm8, mem(rcx)) add(rdi, rcx) vmovupd(xmm10, mem(rcx)) add(rdi, rcx) vmovupd(xmm12, mem(rcx)) add(rdi, rcx) vmovupd(xmm14, mem(rcx)) //add(rdi, rcx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rd_haswell_asm_3x2 ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { #if 0 bli_dgemmsup_r_haswell_ref ( conja, conjb, m0, n0, k0, alpha, a, rs_a0, cs_a0, b, rs_b0, cs_b0, beta, c, rs_c0, cs_c0, data, cntx ); return; #endif //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; /* rrc: -------- -- -- -- | | -------- -- -- -- ... | | -------- += -- -- -- | | -------- -- -- -- | | -------- -- -- -- : -------- -- -- -- : */ // ------------------------------------------------------------------------- begin_asm() vzeroall() // zero all xmm/ymm registers. mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) //lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b // initialize loop by pre-loading // a column of a. mov(var(c), rcx) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) //lea(mem(rcx, rdi, 2), rdx) // //lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 7*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 7*8)) // prefetch c + 2*rs_c mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) // ---------------------------------- iteration 1 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) // ---------------------------------- iteration 2 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) // ---------------------------------- iteration 3 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rbx ), xmm0) vmovsd(mem(rbx, r11, 1), xmm1) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vmovsd(mem(rax ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovsd(mem(rax, r8, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovsd(mem(rax, r8, 2), xmm3) add(imm(1*8), rax) // a += 1*cs_a = 1*8; vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm5 // ymm6 ymm7 // ymm8 ymm9 vhaddpd( ymm5, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm4 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm5) vhaddpd( ymm7, ymm6, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm6 ) vhaddpd( ymm9, ymm8, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm8 ) // xmm4 = sum(ymm4) sum(ymm5) // xmm6 = sum(ymm6) sum(ymm7) // xmm8 = sum(ymm8) sum(ymm9) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(xmm0, xmm4, xmm4) // scale by alpha vmulpd(xmm0, xmm6, xmm6) vmulpd(xmm0, xmm8, xmm8) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), xmm3, xmm4) vmovupd(xmm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm6) vmovupd(xmm6, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm8) vmovupd(xmm8, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(xmm4, mem(rcx)) add(rdi, rcx) vmovupd(xmm6, mem(rcx)) add(rdi, rcx) vmovupd(xmm8, mem(rcx)) //add(rdi, rcx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rd_haswell_asm_2x2 ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { #if 0 bli_dgemmsup_r_haswell_ref ( conja, conjb, m0, n0, k0, alpha, a, rs_a0, cs_a0, b, rs_b0, cs_b0, beta, c, rs_c0, cs_c0, data, cntx ); return; #endif //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; /* rrc: -------- -- -- -- | | -------- -- -- -- ... | | -------- += -- -- -- | | -------- -- -- -- | | -------- -- -- -- : -------- -- -- -- : */ // ------------------------------------------------------------------------- begin_asm() vzeroall() // zero all xmm/ymm registers. mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) //lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b // initialize loop by pre-loading // a column of a. mov(var(c), rcx) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) //lea(mem(rcx, rdi, 2), rdx) // //lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 7*8)) // prefetch c + 1*rs_c mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) // ---------------------------------- iteration 1 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) // ---------------------------------- iteration 2 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) // ---------------------------------- iteration 3 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rbx ), xmm0) vmovsd(mem(rbx, r11, 1), xmm1) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vmovsd(mem(rax ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovsd(mem(rax, r8, 1), xmm3) add(imm(1*8), rax) // a += 1*cs_a = 1*8; vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm5 // ymm6 ymm7 vhaddpd( ymm5, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm4 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm5) vhaddpd( ymm7, ymm6, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm6 ) // xmm4 = sum(ymm4) sum(ymm5) // xmm6 = sum(ymm6) sum(ymm7) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(xmm0, xmm4, xmm4) // scale by alpha vmulpd(xmm0, xmm6, xmm6) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), xmm3, xmm4) vmovupd(xmm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm6) vmovupd(xmm6, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(xmm4, mem(rcx)) add(rdi, rcx) vmovupd(xmm6, mem(rcx)) //add(rdi, rcx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rd_haswell_asm_1x2 ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { #if 0 bli_dgemmsup_r_haswell_ref ( conja, conjb, m0, n0, k0, alpha, a, rs_a0, cs_a0, b, rs_b0, cs_b0, beta, c, rs_c0, cs_c0, data, cntx ); return; #endif //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; /* rrc: -------- -- -- -- | | -------- -- -- -- ... | | -------- += -- -- -- | | -------- -- -- -- | | -------- -- -- -- : -------- -- -- -- : */ // ------------------------------------------------------------------------- begin_asm() vzeroall() // zero all xmm/ymm registers. mov(var(a), rax) // load address of a. //mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a //lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) //lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b // initialize loop by pre-loading // a column of a. mov(var(c), rcx) // load address of c //mov(var(rs_c), rdi) // load rs_c //lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) //lea(mem(rcx, rdi, 2), rdx) // //lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) // ---------------------------------- iteration 1 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) // ---------------------------------- iteration 2 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) // ---------------------------------- iteration 3 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rbx ), xmm0) vmovsd(mem(rbx, r11, 1), xmm1) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vmovsd(mem(rax ), xmm3) add(imm(1*8), rax) // a += 1*cs_a = 1*8; vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm5 vhaddpd( ymm5, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm4 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm5) // xmm4 = sum(ymm4) sum(ymm5) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(xmm0, xmm4, xmm4) // scale by alpha //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), xmm3, xmm4) vmovupd(xmm4, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(xmm4, mem(rcx)) //add(rdi, rcx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } blis-0.6.1/kernels/haswell/3/sup/old/bli_gemmsup_rd_haswell_asm_d6x8m.c000066400000000000000000004443601360743507500260660ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define BLIS_ASM_SYNTAX_ATT #include "bli_x86_asm_macros.h" /* rrc: -------- ------ | | | | | | | | -------- ------ | | | | | | | | -------- += ------ ... | | | | | | | | -------- ------ | | | | | | | | -------- ------ : -------- ------ : Assumptions: - C is row-stored and B is column-stored; - A is row-stored; - m0 and n0 are at most MR and NR, respectively. Therefore, this (r)ow-preferential microkernel is well-suited for a dot-product-based accumulation that performs vector loads from both A and B. */ // Prototype reference microkernels. GEMMSUP_KER_PROT( float, s, gemmsup_r_haswell_ref ) GEMMSUP_KER_PROT( double, d, gemmsup_r_haswell_ref ) GEMMSUP_KER_PROT( scomplex, c, gemmsup_r_haswell_ref ) GEMMSUP_KER_PROT( dcomplex, z, gemmsup_r_haswell_ref ) // Define parameters and variables for edge case kernel map. #define NUM_MR 4 #define NUM_NR 4 #define FUNCPTR_T dgemmsup_ker_ft #if 0 static dim_t mrs[NUM_MR] = { 6, 3, 2, 1 }; static dim_t nrs[NUM_NR] = { 8, 4, 2, 1 }; static FUNCPTR_T kmap[NUM_MR][NUM_NR] = { /* 8 4 2 1 */ /* 6 */ { bli_dgemmsup_rd_haswell_asm_6x8m, bli_dgemmsup_rd_haswell_asm_6x4m, bli_dgemmsup_rd_haswell_asm_6x2m, bli_dgemmsup_r_haswell_ref }, /* 3 */ { bli_dgemmsup_rd_haswell_asm_3x8m, bli_dgemmsup_rd_haswell_asm_3x4m, bli_dgemmsup_rd_haswell_asm_3x2m, bli_dgemmsup_r_haswell_ref }, /* 2 */ { bli_dgemmsup_rd_haswell_asm_2x8m, bli_dgemmsup_rd_haswell_asm_2x4m, bli_dgemmsup_rd_haswell_asm_2x2m, bli_dgemmsup_r_haswell_ref }, /* 1 */ { bli_dgemmsup_rd_haswell_asm_1x8m, bli_dgemmsup_rd_haswell_asm_1x4m, bli_dgemmsup_rd_haswell_asm_1x2m, bli_dgemmsup_r_haswell_ref } }; #endif void bli_dgemmsup_rd_haswell_asm_6x8m ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { uint64_t n_left = n0 % 8; // First check whether this is a edge case in the n dimension. If so, // dispatch other 6x?m kernels, as needed. if ( n_left ) { double* restrict cij = c; double* restrict bj = b; double* restrict ai = a; if ( 4 <= n_left ) { const dim_t nr_cur = 4; bli_dgemmsup_rd_haswell_asm_6x4m ( conja, conjb, m0, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += nr_cur*cs_c0; bj += nr_cur*cs_b0; n_left -= nr_cur; } if ( 2 <= n_left ) { const dim_t nr_cur = 2; bli_dgemmsup_rd_haswell_asm_6x2m ( conja, conjb, m0, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += nr_cur*cs_c0; bj += nr_cur*cs_b0; n_left -= nr_cur; } if ( 1 == n_left ) { #if 0 const dim_t nr_cur = 1; bli_dgemmsup_r_haswell_ref ( conja, conjb, m0, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); #else bli_dgemv_ex ( BLIS_NO_TRANSPOSE, conjb, m0, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, beta, cij, rs_c0, cntx, NULL ); #endif } return; } //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t m_iter = m0 / 3; uint64_t m_left = m0 % 3; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; if ( m_iter == 0 ) goto consider_edge_cases; // ------------------------------------------------------------------------- begin_asm() //vzeroall() // zero all xmm/ymm registers. //mov(var(a), r14) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rdx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b lea(mem(r8, r8, 2), r10) // r10 = 3*rs_a //mov(var(c), r12) // load address of c //mov(var(rs_c), rdi) // load rs_c //lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) // r12 = rcx = c // r14 = rax = a // rdx = rbx = b // r9 = m dim index ii // r15 = n dim index jj mov(imm(0), r15) // jj = 0; label(.DLOOP3X4J) // LOOP OVER jj = [ 0 1 ... ] mov(var(a), r14) // load address of a mov(var(c), r12) // load address of c lea(mem( , r15, 1), rsi) // rsi = r15 = 4*jj; imul(imm(1*8), rsi) // rsi *= cs_c = 1*8 lea(mem(r12, rsi, 1), r12) // r12 = c + 4*jj*cs_c; lea(mem( , r15, 1), rsi) // rsi = r15 = 4*jj; imul(r11, rsi) // rsi *= cs_b; lea(mem(rdx, rsi, 1), rdx) // rbx = b + 4*jj*cs_b; mov(var(m_iter), r9) // ii = m_iter; label(.DLOOP3X4I) // LOOP OVER ii = [ m_iter ... 1 0 ] #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm6, ymm6, ymm6) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm8, ymm8, ymm8) vxorpd(ymm9, ymm9, ymm9) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm11, ymm11, ymm11) vxorpd(ymm12, ymm12, ymm12) vxorpd(ymm13, ymm13, ymm13) vxorpd(ymm14, ymm14, ymm14) vxorpd(ymm15, ymm15, ymm15) #endif lea(mem(r12), rcx) // rcx = c_iijj; lea(mem(r14), rax) // rax = a_ii; lea(mem(rdx), rbx) // rbx = b_jj; #if 0 mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 3*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 3*8)) // prefetch c + 2*rs_c #endif lea(mem(r8, r8, 4), rdi) // rdi = 5*rs_a mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 #if 1 prefetch(0, mem(rax, r10, 1, 0*8)) // prefetch rax + 3*cs_a prefetch(0, mem(rax, r8, 4, 0*8)) // prefetch rax + 4*cs_a prefetch(0, mem(rax, rdi, 1, 0*8)) // prefetch rax + 5*cs_a #endif vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 1 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 2 #if 1 prefetch(0, mem(rax, r10, 1, 0*8)) // prefetch rax + 3*cs_a prefetch(0, mem(rax, r8, 4, 0*8)) // prefetch rax + 4*cs_a prefetch(0, mem(rax, rdi, 1, 0*8)) // prefetch rax + 5*cs_a #endif vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 3 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) #if 1 prefetch(0, mem(rax, r10, 1, 0*8)) // prefetch rax + 3*cs_a prefetch(0, mem(rax, r8, 4, 0*8)) // prefetch rax + 4*cs_a prefetch(0, mem(rax, rdi, 1, 0*8)) // prefetch rax + 5*cs_a #endif vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rax ), xmm0) vmovsd(mem(rax, r8, 1), xmm1) vmovsd(mem(rax, r8, 2), xmm2) add(imm(1*8), rax) // a += 1*cs_b = 1*8; vmovsd(mem(rbx ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovsd(mem(rbx, r11, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovsd(mem(rbx, r11, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovsd(mem(rbx, r13, 1), xmm3) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm7 ymm10 ymm13 // ymm5 ymm8 ymm11 ymm14 // ymm6 ymm9 ymm12 ymm15 vhaddpd( ymm7, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm7) vhaddpd( ymm13, ymm10, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) // xmm2[0] = sum(ymm10); xmm2[1] = sum(ymm13) vperm2f128(imm(0x20), ymm2, ymm0, ymm4 ) vhaddpd( ymm8, ymm5, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm14, ymm11, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm5 ) vhaddpd( ymm9, ymm6, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm15, ymm12, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm6 ) // ymm4 = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13) // ymm5 = sum(ymm5) sum(ymm8) sum(ymm11) sum(ymm14) // ymm6 = sum(ymm6) sum(ymm9) sum(ymm12) sum(ymm15) mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) vmulpd(ymm0, ymm6, ymm6) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm5) vmovupd(ymm5, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm6) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vmovupd(ymm5, mem(rcx)) add(rdi, rcx) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) label(.DDONE) lea(mem(r12, rdi, 2), r12) // lea(mem(r12, rdi, 1), r12) // c_ii = r12 += 3*rs_c lea(mem(r14, r8, 2), r14) // lea(mem(r14, r8, 1), r14) // a_ii = r14 += 3*rs_a dec(r9) // ii -= 1; jne(.DLOOP3X4I) // iterate again if ii != 0. add(imm(4), r15) // jj += 4; cmp(imm(4), r15) // compare jj to 4 jle(.DLOOP3X4J) // if jj <= 4, jump to beginning // of jj loop; otherwise, loop ends. label(.DRETURN) end_asm( : // output operands (none) : // input operands [m_iter] "m" (m_iter), [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) consider_edge_cases: // Handle edge cases in the m dimension, if they exist. if ( m_left ) { const dim_t nr_cur = 8; const dim_t i_edge = m0 - ( dim_t )m_left; double* restrict cij = c + i_edge*rs_c; double* restrict bj = b; double* restrict ai = a + i_edge*rs_a; if ( 2 == m_left ) { const dim_t mr_cur = 2; bli_dgemmsup_rd_haswell_asm_2x8m ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); //cij += mr_cur*rs_c0; ai += mr_cur*rs_a0; m_left -= mr_cur; } if ( 1 == m_left ) { const dim_t mr_cur = 1; bli_dgemmsup_rd_haswell_asm_1x8m ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); } } } void bli_dgemmsup_rd_haswell_asm_3x8m ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() //vzeroall() // zero all xmm/ymm registers. mov(var(a), r14) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rdx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b mov(var(c), r12) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) // r12 = rcx = c // r14 = rax = a // rdx = rbx = b // r9 = unused // r15 = n dim index jj // r10 = unused mov(imm(0), r15) // jj = 0; label(.DLOOP3X4J) // LOOP OVER jj = [ 0 1 ... ] #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm6, ymm6, ymm6) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm8, ymm8, ymm8) vxorpd(ymm9, ymm9, ymm9) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm11, ymm11, ymm11) vxorpd(ymm12, ymm12, ymm12) vxorpd(ymm13, ymm13, ymm13) vxorpd(ymm14, ymm14, ymm14) vxorpd(ymm15, ymm15, ymm15) #endif lea(mem( , r15, 1), rsi) // rsi = r15 = 4*jj; imul(imm(1*8), rsi) // rsi *= cs_c = 1*8 lea(mem(r12, rsi, 1), rcx) // rcx = c + 4*jj*cs_c; lea(mem( , r15, 1), rsi) // rsi = r15 = 4*jj; imul(r11, rsi) // rsi *= cs_b; lea(mem(rdx, rsi, 1), rbx) // rbx = b + 4*jj*cs_b; lea(mem(r14), rax) // rax = a; #if 0 prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 7*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 7*8)) // prefetch c + 2*rs_c #else prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 3*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 3*8)) // prefetch c + 2*rs_c #endif mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 1 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 2 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 3 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rax ), xmm0) vmovsd(mem(rax, r8, 1), xmm1) vmovsd(mem(rax, r8, 2), xmm2) add(imm(1*8), rax) // a += 1*cs_b = 1*8; vmovsd(mem(rbx ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovsd(mem(rbx, r11, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovsd(mem(rbx, r11, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovsd(mem(rbx, r13, 1), xmm3) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm7 ymm10 ymm13 // ymm5 ymm8 ymm11 ymm14 // ymm6 ymm9 ymm12 ymm15 vhaddpd( ymm7, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm7) vhaddpd( ymm13, ymm10, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) // xmm2[0] = sum(ymm10); xmm2[1] = sum(ymm13) vperm2f128(imm(0x20), ymm2, ymm0, ymm4 ) vhaddpd( ymm8, ymm5, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm14, ymm11, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm5 ) vhaddpd( ymm9, ymm6, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm15, ymm12, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm6 ) // ymm4 = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13) // ymm5 = sum(ymm5) sum(ymm8) sum(ymm11) sum(ymm14) // ymm6 = sum(ymm6) sum(ymm9) sum(ymm12) sum(ymm15) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) vmulpd(ymm0, ymm6, ymm6) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm5) vmovupd(ymm5, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm6) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vmovupd(ymm5, mem(rcx)) add(rdi, rcx) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) label(.DDONE) add(imm(4), r15) // jj += 4; cmp(imm(4), r15) // compare jj to 4 jle(.DLOOP3X4J) // if jj <= 4, jump to beginning // of jj loop; otherwise, loop ends. label(.DRETURN) end_asm( : // output operands (none) : // input operands [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rd_haswell_asm_2x8m ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() //vzeroall() // zero all xmm/ymm registers. mov(var(a), r14) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rdx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b mov(var(c), r12) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) // r12 = rcx = c // r14 = rax = a // rdx = rbx = b // r9 = unused // r15 = n dim index jj // r10 = unused mov(imm(0), r15) // jj = 0; label(.DLOOP3X4J) // LOOP OVER jj = [ 0 1 ... ] #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm8, ymm8, ymm8) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm11, ymm11, ymm11) vxorpd(ymm13, ymm13, ymm13) vxorpd(ymm14, ymm14, ymm14) #endif lea(mem( , r15, 1), rsi) // rsi = r15 = 4*jj; imul(imm(1*8), rsi) // rsi *= cs_c = 1*8 lea(mem(r12, rsi, 1), rcx) // rcx = c + 4*jj*cs_c; lea(mem( , r15, 1), rsi) // rsi = r15 = 4*jj; imul(r11, rsi) // rsi *= cs_b; lea(mem(rdx, rsi, 1), rbx) // rbx = b + 4*jj*cs_b; lea(mem(r14), rax) // rax = a; #if 0 prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 7*8)) // prefetch c + 1*rs_c #else prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 3*8)) // prefetch c + 1*rs_c #endif mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) // ---------------------------------- iteration 1 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) // ---------------------------------- iteration 2 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) // ---------------------------------- iteration 3 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rax ), xmm0) vmovsd(mem(rax, r8, 1), xmm1) add(imm(1*8), rax) // a += 1*cs_b = 1*8; vmovsd(mem(rbx ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovsd(mem(rbx, r11, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovsd(mem(rbx, r11, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovsd(mem(rbx, r13, 1), xmm3) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm7 ymm10 ymm13 // ymm5 ymm8 ymm11 ymm14 vhaddpd( ymm7, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm7) vhaddpd( ymm13, ymm10, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) // xmm2[0] = sum(ymm10); xmm2[1] = sum(ymm13) vperm2f128(imm(0x20), ymm2, ymm0, ymm4 ) vhaddpd( ymm8, ymm5, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm14, ymm11, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm5 ) // ymm4 = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13) // ymm5 = sum(ymm5) sum(ymm8) sum(ymm11) sum(ymm14) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm5) vmovupd(ymm5, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vmovupd(ymm5, mem(rcx)) //add(rdi, rcx) label(.DDONE) add(imm(4), r15) // jj += 4; cmp(imm(4), r15) // compare jj to 4 jle(.DLOOP3X4J) // if jj <= 4, jump to beginning // of jj loop; otherwise, loop ends. label(.DRETURN) end_asm( : // output operands (none) : // input operands [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rd_haswell_asm_1x8m ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() //vzeroall() // zero all xmm/ymm registers. mov(var(a), r14) // load address of a. //mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a //lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rdx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b mov(var(c), r12) // load address of c //mov(var(rs_c), rdi) // load rs_c //lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) // r12 = rcx = c // r14 = rax = a // rdx = rbx = b // r9 = m dim index ii // r15 = n dim index jj mov(imm(0), r15) // jj = 0; label(.DLOOP3X4J) // LOOP OVER jj = [ 0 1 ... ] #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm13, ymm13, ymm13) #endif lea(mem( , r15, 1), rsi) // rsi = r15 = 4*jj; imul(imm(1*8), rsi) // rsi *= cs_c = 1*8 lea(mem(r12, rsi, 1), rcx) // rcx = c + 4*jj*cs_c; lea(mem( , r15, 1), rsi) // rsi = r15 = 4*jj; imul(r11, rsi) // rsi *= cs_b; lea(mem(rdx, rsi, 1), rbx) // rbx = b + 4*jj*cs_b; lea(mem(r14), rax) // rax = a; #if 0 prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c #else prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c #endif mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) // ---------------------------------- iteration 1 vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) // ---------------------------------- iteration 2 vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) // ---------------------------------- iteration 3 vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rax ), xmm0) add(imm(1*8), rax) // a += 1*cs_b = 1*8; vmovsd(mem(rbx ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovsd(mem(rbx, r11, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovsd(mem(rbx, r11, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovsd(mem(rbx, r13, 1), xmm3) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vfmadd231pd(ymm0, ymm3, ymm13) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm7 ymm10 ymm13 vhaddpd( ymm7, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm7) vhaddpd( ymm13, ymm10, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) // xmm2[0] = sum(ymm10); xmm2[1] = sum(ymm13) vperm2f128(imm(0x20), ymm2, ymm0, ymm4 ) // ymm4 = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) //add(rdi, rcx) label(.DDONE) add(imm(4), r15) // jj += 4; cmp(imm(4), r15) // compare jj to 4 jle(.DLOOP3X4J) // if jj <= 4, jump to beginning // of jj loop; otherwise, loop ends. label(.DRETURN) end_asm( : // output operands (none) : // input operands [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rd_haswell_asm_6x4m ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t m_iter = m0 / 3; uint64_t m_left = m0 % 3; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; if ( m_iter == 0 ) goto consider_edge_cases; // ------------------------------------------------------------------------- begin_asm() //vzeroall() // zero all xmm/ymm registers. mov(var(a), r14) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rdx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b lea(mem(r8, r8, 2), r10) // r10 = 3*rs_a mov(var(c), r12) // load address of c //mov(var(rs_c), rdi) // load rs_c //lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) // r12 = rcx = c // r14 = rax = a // rdx = rbx = b // r9 = m dim index ii // r15 = n dim index jj // r10 = unused mov(var(m_iter), r9) // ii = m_iter; label(.DLOOP3X4I) // LOOP OVER ii = [ m_iter .. 1 0 ] #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm6, ymm6, ymm6) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm8, ymm8, ymm8) vxorpd(ymm9, ymm9, ymm9) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm11, ymm11, ymm11) vxorpd(ymm12, ymm12, ymm12) vxorpd(ymm13, ymm13, ymm13) vxorpd(ymm14, ymm14, ymm14) vxorpd(ymm15, ymm15, ymm15) #endif lea(mem(r12), rcx) // rcx = c + 3*ii*rs_c; lea(mem(r14), rax) // rax = a + 3*ii*rs_a; lea(mem(rdx), rbx) // rbx = b; #if 0 mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 3*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 3*8)) // prefetch c + 2*rs_c #endif lea(mem(r8, r8, 4), rdi) // rdi = 5*rs_a mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 #if 1 prefetch(0, mem(rax, r10, 1, 0*8)) // prefetch rax + 3*cs_a prefetch(0, mem(rax, r8, 4, 0*8)) // prefetch rax + 4*cs_a prefetch(0, mem(rax, rdi, 1, 0*8)) // prefetch rax + 5*cs_a #endif vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 1 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 2 #if 1 prefetch(0, mem(rax, r10, 1, 0*8)) // prefetch rax + 3*cs_a prefetch(0, mem(rax, r8, 4, 0*8)) // prefetch rax + 4*cs_a prefetch(0, mem(rax, rdi, 1, 0*8)) // prefetch rax + 5*cs_a #endif vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 3 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rax ), xmm0) vmovsd(mem(rax, r8, 1), xmm1) vmovsd(mem(rax, r8, 2), xmm2) add(imm(1*8), rax) // a += 1*cs_b = 1*8; vmovsd(mem(rbx ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovsd(mem(rbx, r11, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovsd(mem(rbx, r11, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovsd(mem(rbx, r13, 1), xmm3) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm7 ymm10 ymm13 // ymm5 ymm8 ymm11 ymm14 // ymm6 ymm9 ymm12 ymm15 vhaddpd( ymm7, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm7) vhaddpd( ymm13, ymm10, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) // xmm2[0] = sum(ymm10); xmm2[1] = sum(ymm13) vperm2f128(imm(0x20), ymm2, ymm0, ymm4 ) vhaddpd( ymm8, ymm5, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm14, ymm11, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm5 ) vhaddpd( ymm9, ymm6, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm15, ymm12, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm6 ) // ymm4 = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13) // ymm5 = sum(ymm5) sum(ymm8) sum(ymm11) sum(ymm14) // ymm6 = sum(ymm6) sum(ymm9) sum(ymm12) sum(ymm15) mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) vmulpd(ymm0, ymm6, ymm6) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm5) vmovupd(ymm5, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm6) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vmovupd(ymm5, mem(rcx)) add(rdi, rcx) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) label(.DDONE) lea(mem(r12, rdi, 2), r12) // lea(mem(r12, rdi, 1), r12) // c_ii = r12 += 3*rs_c lea(mem(r14, r8, 2), r14) // lea(mem(r14, r8, 1), r14) // a_ii = r14 += 3*rs_a dec(r9) // ii -= 1; jne(.DLOOP3X4I) // iterate again if ii != 0. label(.DRETURN) end_asm( : // output operands (none) : // input operands [m_iter] "m" (m_iter), [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) consider_edge_cases: // Handle edge cases in the m dimension, if they exist. if ( m_left ) { const dim_t nr_cur = 4; const dim_t i_edge = m0 - ( dim_t )m_left; double* restrict cij = c + i_edge*rs_c; double* restrict bj = b; double* restrict ai = a + i_edge*rs_a; if ( 2 == m_left ) { const dim_t mr_cur = 2; bli_dgemmsup_rd_haswell_asm_2x4m ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); //cij += mr_cur*rs_c0; ai += mr_cur*rs_a0; m_left -= mr_cur; } if ( 1 == m_left ) { const dim_t mr_cur = 1; bli_dgemmsup_rd_haswell_asm_1x4m ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); } } } void bli_dgemmsup_rd_haswell_asm_3x4m ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm6, ymm6, ymm6) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm8, ymm8, ymm8) vxorpd(ymm9, ymm9, ymm9) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm11, ymm11, ymm11) vxorpd(ymm12, ymm12, ymm12) vxorpd(ymm13, ymm13, ymm13) vxorpd(ymm14, ymm14, ymm14) vxorpd(ymm15, ymm15, ymm15) #endif mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b mov(var(c), rcx) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 3*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 3*8)) // prefetch c + 2*rs_c mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 1 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 2 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 3 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rax ), xmm0) vmovsd(mem(rax, r8, 1), xmm1) vmovsd(mem(rax, r8, 2), xmm2) add(imm(1*8), rax) // a += 1*cs_b = 1*8; vmovsd(mem(rbx ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovsd(mem(rbx, r11, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovsd(mem(rbx, r11, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovsd(mem(rbx, r13, 1), xmm3) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm7 ymm10 ymm13 // ymm5 ymm8 ymm11 ymm14 // ymm6 ymm9 ymm12 ymm15 vhaddpd( ymm7, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm7) vhaddpd( ymm13, ymm10, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) // xmm2[0] = sum(ymm10); xmm2[1] = sum(ymm13) vperm2f128(imm(0x20), ymm2, ymm0, ymm4 ) vhaddpd( ymm8, ymm5, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm14, ymm11, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm5 ) vhaddpd( ymm9, ymm6, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm15, ymm12, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm6 ) // ymm4 = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13) // ymm5 = sum(ymm5) sum(ymm8) sum(ymm11) sum(ymm14) // ymm6 = sum(ymm6) sum(ymm9) sum(ymm12) sum(ymm15) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) vmulpd(ymm0, ymm6, ymm6) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm5) vmovupd(ymm5, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm6) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vmovupd(ymm5, mem(rcx)) add(rdi, rcx) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rd_haswell_asm_2x4m ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm8, ymm8, ymm8) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm11, ymm11, ymm11) vxorpd(ymm13, ymm13, ymm13) vxorpd(ymm14, ymm14, ymm14) #endif mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b mov(var(c), rcx) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 3*8)) // prefetch c + 1*rs_c mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) // ---------------------------------- iteration 1 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) // ---------------------------------- iteration 2 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) // ---------------------------------- iteration 3 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rax ), xmm0) vmovsd(mem(rax, r8, 1), xmm1) add(imm(1*8), rax) // a += 1*cs_b = 1*8; vmovsd(mem(rbx ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovsd(mem(rbx, r11, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovsd(mem(rbx, r11, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovsd(mem(rbx, r13, 1), xmm3) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm7 ymm10 ymm13 // ymm5 ymm8 ymm11 ymm14 vhaddpd( ymm7, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm7) vhaddpd( ymm13, ymm10, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) // xmm2[0] = sum(ymm10); xmm2[1] = sum(ymm13) vperm2f128(imm(0x20), ymm2, ymm0, ymm4 ) vhaddpd( ymm8, ymm5, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm14, ymm11, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm5 ) // ymm4 = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13) // ymm5 = sum(ymm5) sum(ymm8) sum(ymm11) sum(ymm14) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm5) vmovupd(ymm5, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vmovupd(ymm5, mem(rcx)) //add(rdi, rcx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rd_haswell_asm_1x4m ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; /* rrc: -------- -- -- -- | | | | -------- -- -- -- ... | | | | -------- += -- -- -- | | | | -------- | | | | -------- : -------- : */ // ------------------------------------------------------------------------- begin_asm() #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm13, ymm13, ymm13) #endif mov(var(a), rax) // load address of a. //mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a //lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b mov(var(c), rcx) // load address of c //mov(var(rs_c), rdi) // load rs_c //lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) // ---------------------------------- iteration 1 vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) // ---------------------------------- iteration 2 vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) // ---------------------------------- iteration 3 vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rax ), xmm0) add(imm(1*8), rax) // a += 1*cs_b = 1*8; vmovsd(mem(rbx ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovsd(mem(rbx, r11, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovsd(mem(rbx, r11, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovsd(mem(rbx, r13, 1), xmm3) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vfmadd231pd(ymm0, ymm3, ymm13) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm7 ymm10 ymm13 vhaddpd( ymm7, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm7) vhaddpd( ymm13, ymm10, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) // xmm2[0] = sum(ymm10); xmm2[1] = sum(ymm13) vperm2f128(imm(0x20), ymm2, ymm0, ymm4 ) // ymm4 = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) //add(rdi, rcx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rd_haswell_asm_6x2m ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t m_iter = m0 / 6; uint64_t m_left = m0 % 6; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; if ( m_iter == 0 ) goto consider_edge_cases; // ------------------------------------------------------------------------- begin_asm() //vzeroall() // zero all xmm/ymm registers. mov(var(a), r14) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rdx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) //lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b mov(var(c), r12) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) // r12 = rcx = c // r14 = rax = a // rdx = rbx = b // r9 = m dim index ii mov(var(m_iter), r9) // ii = m_iter; label(.DLOOP3X4I) // LOOP OVER ii = [ m_iter ... 1 0 ] #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm6, ymm6, ymm6) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm8, ymm8, ymm8) vxorpd(ymm9, ymm9, ymm9) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm11, ymm11, ymm11) vxorpd(ymm12, ymm12, ymm12) vxorpd(ymm13, ymm13, ymm13) vxorpd(ymm14, ymm14, ymm14) vxorpd(ymm15, ymm15, ymm15) #endif lea(mem(r12), rcx) // rcx = c + 6*ii*rs_c; lea(mem(r14), rax) // rax = a + 6*ii*rs_a; lea(mem(rdx), rbx) // rbx = b; lea(mem(rcx, rdi, 2), r10) // lea(mem(r10, rdi, 1), r10) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 1*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 1*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 1*8)) // prefetch c + 2*rs_c prefetch(0, mem(r10, 1*8)) // prefetch c + 3*rs_c prefetch(0, mem(r10, rdi, 1, 1*8)) // prefetch c + 4*rs_c prefetch(0, mem(r10, rdi, 2, 1*8)) // prefetch c + 5*rs_c mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) vmovupd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rax, r8, 4), ymm3) vfmadd231pd(ymm0, ymm3, ymm12) vfmadd231pd(ymm1, ymm3, ymm13) vmovupd(mem(rax, r15, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) // ---------------------------------- iteration 1 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) vmovupd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rax, r8, 4), ymm3) vfmadd231pd(ymm0, ymm3, ymm12) vfmadd231pd(ymm1, ymm3, ymm13) vmovupd(mem(rax, r15, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) // ---------------------------------- iteration 2 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) vmovupd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rax, r8, 4), ymm3) vfmadd231pd(ymm0, ymm3, ymm12) vfmadd231pd(ymm1, ymm3, ymm13) vmovupd(mem(rax, r15, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) // ---------------------------------- iteration 3 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) vmovupd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rax, r8, 4), ymm3) vfmadd231pd(ymm0, ymm3, ymm12) vfmadd231pd(ymm1, ymm3, ymm13) vmovupd(mem(rax, r15, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) vmovupd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rax, r8, 4), ymm3) vfmadd231pd(ymm0, ymm3, ymm12) vfmadd231pd(ymm1, ymm3, ymm13) vmovupd(mem(rax, r15, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rbx ), xmm0) vmovsd(mem(rbx, r11, 1), xmm1) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vmovsd(mem(rax ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovsd(mem(rax, r8, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovsd(mem(rax, r8, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) vmovsd(mem(rax, r13, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovsd(mem(rax, r8, 4), xmm3) vfmadd231pd(ymm0, ymm3, ymm12) vfmadd231pd(ymm1, ymm3, ymm13) vmovsd(mem(rax, r15, 1), xmm3) add(imm(1*8), rax) // a += 1*cs_a = 1*8; vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm5 // ymm6 ymm7 // ymm8 ymm9 // ymm10 ymm11 // ymm12 ymm13 // ymm14 ymm15 vhaddpd( ymm5, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm4 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm5) vhaddpd( ymm7, ymm6, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm6 ) vhaddpd( ymm9, ymm8, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm8 ) vhaddpd( ymm11, ymm10, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm10 ) vhaddpd( ymm13, ymm12, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm12 ) vhaddpd( ymm15, ymm14, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm14 ) // xmm4 = sum(ymm4) sum(ymm5) // xmm6 = sum(ymm6) sum(ymm7) // xmm8 = sum(ymm8) sum(ymm9) // xmm10 = sum(ymm10) sum(ymm11) // xmm12 = sum(ymm12) sum(ymm13) // xmm14 = sum(ymm14) sum(ymm15) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(xmm0, xmm4, xmm4) // scale by alpha vmulpd(xmm0, xmm6, xmm6) vmulpd(xmm0, xmm8, xmm8) vmulpd(xmm0, xmm10, xmm10) vmulpd(xmm0, xmm12, xmm12) vmulpd(xmm0, xmm14, xmm14) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), xmm3, xmm4) vmovupd(xmm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm6) vmovupd(xmm6, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm8) vmovupd(xmm8, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm10) vmovupd(xmm10, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm12) vmovupd(xmm12, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm14) vmovupd(xmm14, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(xmm4, mem(rcx)) add(rdi, rcx) vmovupd(xmm6, mem(rcx)) add(rdi, rcx) vmovupd(xmm8, mem(rcx)) add(rdi, rcx) vmovupd(xmm10, mem(rcx)) add(rdi, rcx) vmovupd(xmm12, mem(rcx)) add(rdi, rcx) vmovupd(xmm14, mem(rcx)) //add(rdi, rcx) label(.DDONE) lea(mem(r12, rdi, 4), r12) // lea(mem(r12, rdi, 2), r12) // c_ii = r12 += 6*rs_c lea(mem(r14, r8, 4), r14) // lea(mem(r14, r8, 2), r14) // a_ii = r14 += 6*rs_a dec(r9) // ii -= 1; jne(.DLOOP3X4I) // iterate again if ii != 0. label(.DRETURN) end_asm( : // output operands (none) : // input operands [m_iter] "m" (m_iter), [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) consider_edge_cases: // Handle edge cases in the m dimension, if they exist. if ( m_left ) { const dim_t nr_cur = 2; const dim_t i_edge = m0 - ( dim_t )m_left; double* restrict cij = c + i_edge*rs_c; double* restrict bj = b; double* restrict ai = a + i_edge*rs_a; if ( 3 <= m_left ) { const dim_t mr_cur = 3; bli_dgemmsup_rd_haswell_asm_3x2m ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += mr_cur*rs_c0; ai += mr_cur*rs_a0; m_left -= mr_cur; } if ( 2 <= m_left ) { const dim_t mr_cur = 2; bli_dgemmsup_rd_haswell_asm_2x2m ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += mr_cur*rs_c0; ai += mr_cur*rs_a0; m_left -= mr_cur; } if ( 1 == m_left ) { const dim_t mr_cur = 1; bli_dgemmsup_rd_haswell_asm_1x2m ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); } } } void bli_dgemmsup_rd_haswell_asm_3x2m ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm6, ymm6, ymm6) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm8, ymm8, ymm8) vxorpd(ymm9, ymm9, ymm9) #endif mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) //lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b // initialize loop by pre-loading // a column of a. mov(var(c), rcx) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) prefetch(0, mem(rcx, 1*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 1*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 1*8)) // prefetch c + 2*rs_c mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) // ---------------------------------- iteration 1 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) // ---------------------------------- iteration 2 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) // ---------------------------------- iteration 3 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rbx ), xmm0) vmovsd(mem(rbx, r11, 1), xmm1) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vmovsd(mem(rax ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovsd(mem(rax, r8, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovsd(mem(rax, r8, 2), xmm3) add(imm(1*8), rax) // a += 1*cs_a = 1*8; vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm5 // ymm6 ymm7 // ymm8 ymm9 vhaddpd( ymm5, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm4 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm5) vhaddpd( ymm7, ymm6, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm6 ) vhaddpd( ymm9, ymm8, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm8 ) // xmm4 = sum(ymm4) sum(ymm5) // xmm6 = sum(ymm6) sum(ymm7) // xmm8 = sum(ymm8) sum(ymm9) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(xmm0, xmm4, xmm4) // scale by alpha vmulpd(xmm0, xmm6, xmm6) vmulpd(xmm0, xmm8, xmm8) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), xmm3, xmm4) vmovupd(xmm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm6) vmovupd(xmm6, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm8) vmovupd(xmm8, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(xmm4, mem(rcx)) add(rdi, rcx) vmovupd(xmm6, mem(rcx)) add(rdi, rcx) vmovupd(xmm8, mem(rcx)) //add(rdi, rcx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rd_haswell_asm_2x2m ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; /* rrc: -------- -- -- -- | | -------- -- -- -- ... | | -------- += -- -- -- | | -------- -- -- -- | | -------- -- -- -- : -------- -- -- -- : */ // ------------------------------------------------------------------------- begin_asm() #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm6, ymm6, ymm6) vxorpd(ymm7, ymm7, ymm7) #endif mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) //lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b // initialize loop by pre-loading // a column of a. mov(var(c), rcx) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) //lea(mem(rcx, rdi, 2), rdx) // //lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 1*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 1*8)) // prefetch c + 1*rs_c mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) // ---------------------------------- iteration 1 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) // ---------------------------------- iteration 2 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) // ---------------------------------- iteration 3 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rbx ), xmm0) vmovsd(mem(rbx, r11, 1), xmm1) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vmovsd(mem(rax ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovsd(mem(rax, r8, 1), xmm3) add(imm(1*8), rax) // a += 1*cs_a = 1*8; vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm5 // ymm6 ymm7 vhaddpd( ymm5, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm4 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm5) vhaddpd( ymm7, ymm6, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm6 ) // xmm4 = sum(ymm4) sum(ymm5) // xmm6 = sum(ymm6) sum(ymm7) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(xmm0, xmm4, xmm4) // scale by alpha vmulpd(xmm0, xmm6, xmm6) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), xmm3, xmm4) vmovupd(xmm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm6) vmovupd(xmm6, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(xmm4, mem(rcx)) add(rdi, rcx) vmovupd(xmm6, mem(rcx)) //add(rdi, rcx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rd_haswell_asm_1x2m ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; /* rrc: -------- -- -- -- | | -------- -- -- -- ... | | -------- += -- -- -- | | -------- -- -- -- | | -------- -- -- -- : -------- -- -- -- : */ // ------------------------------------------------------------------------- begin_asm() #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) #endif mov(var(a), rax) // load address of a. //mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a //lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) //lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b // initialize loop by pre-loading // a column of a. mov(var(c), rcx) // load address of c //mov(var(rs_c), rdi) // load rs_c //lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) //lea(mem(rcx, rdi, 2), rdx) // //lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 1*8)) // prefetch c + 0*rs_c mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) // ---------------------------------- iteration 1 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) // ---------------------------------- iteration 2 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) // ---------------------------------- iteration 3 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rbx ), xmm0) vmovsd(mem(rbx, r11, 1), xmm1) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vmovsd(mem(rax ), xmm3) add(imm(1*8), rax) // a += 1*cs_a = 1*8; vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm5 vhaddpd( ymm5, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm4 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm5) // xmm4 = sum(ymm4) sum(ymm5) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(xmm0, xmm4, xmm4) // scale by alpha //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), xmm3, xmm4) vmovupd(xmm4, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(xmm4, mem(rcx)) //add(rdi, rcx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } blis-0.6.1/kernels/haswell/3/sup/old/bli_gemmsup_rd_haswell_asm_d6x8m.c.newji000066400000000000000000004465211360743507500272020ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define BLIS_ASM_SYNTAX_ATT #include "bli_x86_asm_macros.h" /* rrc: -------- ------ | | | | | | | | -------- ------ | | | | | | | | -------- += ------ ... | | | | | | | | -------- ------ | | | | | | | | -------- ------ : -------- ------ : Assumptions: - C is row-stored and B is column-stored; - A is row-stored; - m0 and n0 are at most MR and NR, respectively. Therefore, this (r)ow-preferential microkernel is well-suited for a dot-product-based accumulation that performs vector loads from both A and B. */ // Prototype reference microkernels. GEMMSUP_KER_PROT( float, s, gemmsup_r_haswell_ref ) GEMMSUP_KER_PROT( double, d, gemmsup_r_haswell_ref ) GEMMSUP_KER_PROT( scomplex, c, gemmsup_r_haswell_ref ) GEMMSUP_KER_PROT( dcomplex, z, gemmsup_r_haswell_ref ) // Define parameters and variables for edge case kernel map. #define NUM_MR 4 #define NUM_NR 4 #define FUNCPTR_T dgemmsup_ker_ft #if 0 static dim_t mrs[NUM_MR] = { 6, 3, 2, 1 }; static dim_t nrs[NUM_NR] = { 8, 4, 2, 1 }; static FUNCPTR_T kmap[NUM_MR][NUM_NR] = { /* 8 4 2 1 */ /* 6 */ { bli_dgemmsup_rd_haswell_asm_6x8m, bli_dgemmsup_rd_haswell_asm_6x4m, bli_dgemmsup_rd_haswell_asm_6x2m, bli_dgemmsup_r_haswell_ref }, /* 3 */ { bli_dgemmsup_rd_haswell_asm_3x8m, bli_dgemmsup_rd_haswell_asm_3x4m, bli_dgemmsup_rd_haswell_asm_3x2m, bli_dgemmsup_r_haswell_ref }, /* 2 */ { bli_dgemmsup_rd_haswell_asm_2x8m, bli_dgemmsup_rd_haswell_asm_2x4m, bli_dgemmsup_rd_haswell_asm_2x2m, bli_dgemmsup_r_haswell_ref }, /* 1 */ { bli_dgemmsup_rd_haswell_asm_1x8m, bli_dgemmsup_rd_haswell_asm_1x4m, bli_dgemmsup_rd_haswell_asm_1x2m, bli_dgemmsup_r_haswell_ref } }; #endif void bli_dgemmsup_rd_haswell_asm_6x8m ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { uint64_t n_left = n0 % 8; // First check whether this is a edge case in the n dimension. If so, // dispatch other 6x?m kernels, as needed. if ( n_left ) { double* restrict cij = c; double* restrict bj = b; double* restrict ai = a; if ( 4 <= n_left ) { const dim_t nr_cur = 4; bli_dgemmsup_rd_haswell_asm_6x4m ( conja, conjb, m0, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += nr_cur*cs_c0; bj += nr_cur*cs_b0; n_left -= nr_cur; } if ( 2 <= n_left ) { const dim_t nr_cur = 2; bli_dgemmsup_rd_haswell_asm_6x2m ( conja, conjb, m0, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += nr_cur*cs_c0; bj += nr_cur*cs_b0; n_left -= nr_cur; } if ( 1 == n_left ) { #if 0 const dim_t nr_cur = 1; bli_dgemmsup_r_haswell_ref ( conja, conjb, m0, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); #else bli_dgemv_ex ( BLIS_NO_TRANSPOSE, conjb, m0, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, beta, cij, rs_c0, cntx, NULL ); #endif } return; } //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t m_iter = m0 / 3; uint64_t m_left = m0 % 3; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; if ( m_iter == 0 ) goto consider_edge_cases; // ------------------------------------------------------------------------- begin_asm() //vzeroall() // zero all xmm/ymm registers. //mov(var(a), r14) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rdx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b lea(mem(r8, r8, 2), r10) // r10 = 3*rs_a //mov(var(c), r12) // load address of c //mov(var(rs_c), rdi) // load rs_c //lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) // r12 = rcx = c // r14 = rax = a // rdx = rbx = b // r9 = m dim index ii // r15 = n dim index jj // r10 = unused mov(imm(0), r15) // jj = 0; label(.DLOOP3X4J) // LOOP OVER jj = [ 0 1 ... ] mov(var(a), r14) // load address of a mov(var(c), r12) // load address of c lea(mem( , r15, 1), rsi) // rsi = r15 = 4*jj; imul(imm(1*8), rsi) // rsi *= cs_c = 1*8 lea(mem(r12, rsi, 1), r12) // r12 = c + 4*jj*cs_c; lea(mem( , r15, 1), rsi) // rsi = r15 = 4*jj; imul(r11, rsi) // rsi *= cs_b; lea(mem(rdx, rsi, 1), rdx) // rbx = b + 4*jj*cs_b; mov(var(m_iter), r9) // ii = m_iter; label(.DLOOP3X4I) // LOOP OVER ii = [ m_iter ... 1 0 ] #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm6, ymm6, ymm6) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm8, ymm8, ymm8) vxorpd(ymm9, ymm9, ymm9) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm11, ymm11, ymm11) vxorpd(ymm12, ymm12, ymm12) vxorpd(ymm13, ymm13, ymm13) vxorpd(ymm14, ymm14, ymm14) vxorpd(ymm15, ymm15, ymm15) #endif lea(mem(r12), rcx) // rcx = c_iijj; lea(mem(r14), rax) // rax = a_ii; lea(mem(rdx), rbx) // rbx = b_jj; #if 0 mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 3*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 3*8)) // prefetch c + 2*rs_c #endif lea(mem(r8, r8, 4), rdi) // rdi = 5*rs_a mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 #if 1 prefetch(0, mem(rax, r10, 1, 0*8)) // prefetch rax + 3*cs_a prefetch(0, mem(rax, r8, 4, 0*8)) // prefetch rax + 4*cs_a prefetch(0, mem(rax, rdi, 1, 0*8)) // prefetch rax + 5*cs_a #endif vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 1 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 2 #if 1 prefetch(0, mem(rax, r10, 1, 0*8)) // prefetch rax + 3*cs_a prefetch(0, mem(rax, r8, 4, 0*8)) // prefetch rax + 4*cs_a prefetch(0, mem(rax, rdi, 1, 0*8)) // prefetch rax + 5*cs_a #endif vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 3 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rax ), xmm0) vmovsd(mem(rax, r8, 1), xmm1) vmovsd(mem(rax, r8, 2), xmm2) add(imm(1*8), rax) // a += 1*cs_b = 1*8; vmovsd(mem(rbx ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovsd(mem(rbx, r11, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovsd(mem(rbx, r11, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovsd(mem(rbx, r13, 1), xmm3) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm7 ymm10 ymm13 // ymm5 ymm8 ymm11 ymm14 // ymm6 ymm9 ymm12 ymm15 vhaddpd( ymm7, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm7) vhaddpd( ymm13, ymm10, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) // xmm2[0] = sum(ymm10); xmm2[1] = sum(ymm13) vperm2f128(imm(0x20), ymm2, ymm0, ymm4 ) vhaddpd( ymm8, ymm5, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm14, ymm11, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm5 ) vhaddpd( ymm9, ymm6, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm15, ymm12, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm6 ) // ymm4 = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13) // ymm5 = sum(ymm5) sum(ymm8) sum(ymm11) sum(ymm14) // ymm6 = sum(ymm6) sum(ymm9) sum(ymm12) sum(ymm15) mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) vmulpd(ymm0, ymm6, ymm6) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm5) vmovupd(ymm5, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm6) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vmovupd(ymm5, mem(rcx)) add(rdi, rcx) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) label(.DDONE) lea(mem(r12, rdi, 2), r12) // lea(mem(r12, rdi, 1), r12) // c_ii = r12 += 3*rs_c lea(mem(r14, r8, 2), r14) // lea(mem(r14, r8, 1), r14) // a_ii = r14 += 3*rs_a dec(r9) // ii -= 1; jne(.DLOOP3X4I) // iterate again if ii != 0. add(imm(4), r15) // jj += 4; cmp(imm(4), r15) // compare jj to 4 jle(.DLOOP3X4J) // if jj <= 4, jump to beginning // of jj loop; otherwise, loop ends. label(.DRETURN) end_asm( : // output operands (none) : // input operands [m_iter] "m" (m_iter), [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) consider_edge_cases: // Handle edge cases in the m dimension, if they exist. if ( m_left ) { const dim_t nr_cur = 8; const dim_t i_edge = m0 - ( dim_t )m_left; double* restrict cij = c + i_edge*rs_c; double* restrict bj = b; double* restrict ai = a + i_edge*rs_a; if ( 2 == m_left ) { const dim_t mr_cur = 2; bli_dgemmsup_rd_haswell_asm_2x8m ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); //cij += mr_cur*rs_c0; ai += mr_cur*rs_a0; m_left -= mr_cur; } if ( 1 == m_left ) { const dim_t mr_cur = 1; bli_dgemmsup_rd_haswell_asm_1x8m ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); } } } void bli_dgemmsup_rd_haswell_asm_3x8m ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { #if 0 bli_dgemmsup_r_haswell_ref ( conja, conjb, m0, n0, k0, alpha, a, rs_a0, cs_a0, b, rs_b0, cs_b0, beta, c, rs_c0, cs_c0, data, cntx ); return; #endif //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() //vzeroall() // zero all xmm/ymm registers. mov(var(a), r14) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rdx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b mov(var(c), r12) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) // r12 = rcx = c // r14 = rax = a // rdx = rbx = b // r9 = unused // r15 = n dim index jj // r10 = unused mov(imm(0), r15) // jj = 0; label(.DLOOP3X4J) // LOOP OVER jj = [ 0 1 ... ] #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm6, ymm6, ymm6) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm8, ymm8, ymm8) vxorpd(ymm9, ymm9, ymm9) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm11, ymm11, ymm11) vxorpd(ymm12, ymm12, ymm12) vxorpd(ymm13, ymm13, ymm13) vxorpd(ymm14, ymm14, ymm14) vxorpd(ymm15, ymm15, ymm15) #endif lea(mem( , r15, 1), rsi) // rsi = r15 = 4*jj; imul(imm(1*8), rsi) // rsi *= cs_c = 1*8 lea(mem(r12, rsi, 1), rcx) // rcx = c + 4*jj*cs_c; lea(mem( , r15, 1), rsi) // rsi = r15 = 4*jj; imul(r11, rsi) // rsi *= cs_b; lea(mem(rdx, rsi, 1), rbx) // rbx = b + 4*jj*cs_b; lea(mem(r14), rax) // rax = a; #if 0 prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 7*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 7*8)) // prefetch c + 2*rs_c #else prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 3*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 3*8)) // prefetch c + 2*rs_c #endif mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 1 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 2 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 3 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rax ), xmm0) vmovsd(mem(rax, r8, 1), xmm1) vmovsd(mem(rax, r8, 2), xmm2) add(imm(1*8), rax) // a += 1*cs_b = 1*8; vmovsd(mem(rbx ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovsd(mem(rbx, r11, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovsd(mem(rbx, r11, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovsd(mem(rbx, r13, 1), xmm3) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm7 ymm10 ymm13 // ymm5 ymm8 ymm11 ymm14 // ymm6 ymm9 ymm12 ymm15 vhaddpd( ymm7, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm7) vhaddpd( ymm13, ymm10, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) // xmm2[0] = sum(ymm10); xmm2[1] = sum(ymm13) vperm2f128(imm(0x20), ymm2, ymm0, ymm4 ) vhaddpd( ymm8, ymm5, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm14, ymm11, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm5 ) vhaddpd( ymm9, ymm6, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm15, ymm12, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm6 ) // ymm4 = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13) // ymm5 = sum(ymm5) sum(ymm8) sum(ymm11) sum(ymm14) // ymm6 = sum(ymm6) sum(ymm9) sum(ymm12) sum(ymm15) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) vmulpd(ymm0, ymm6, ymm6) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm5) vmovupd(ymm5, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm6) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vmovupd(ymm5, mem(rcx)) add(rdi, rcx) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) label(.DDONE) add(imm(4), r15) // jj += 4; cmp(imm(4), r15) // compare jj to 4 jle(.DLOOP3X4J) // if jj <= 4, jump to beginning // of jj loop; otherwise, loop ends. label(.DRETURN) end_asm( : // output operands (none) : // input operands [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rd_haswell_asm_2x8m ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { #if 0 bli_dgemmsup_r_haswell_ref ( conja, conjb, m0, n0, k0, alpha, a, rs_a0, cs_a0, b, rs_b0, cs_b0, beta, c, rs_c0, cs_c0, data, cntx ); return; #endif //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() //vzeroall() // zero all xmm/ymm registers. mov(var(a), r14) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rdx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b mov(var(c), r12) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) // r12 = rcx = c // r14 = rax = a // rdx = rbx = b // r9 = unused // r15 = n dim index jj // r10 = unused mov(imm(0), r15) // jj = 0; label(.DLOOP3X4J) // LOOP OVER jj = [ 0 1 ... ] #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm8, ymm8, ymm8) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm11, ymm11, ymm11) vxorpd(ymm13, ymm13, ymm13) vxorpd(ymm14, ymm14, ymm14) #endif lea(mem( , r15, 1), rsi) // rsi = r15 = 4*jj; imul(imm(1*8), rsi) // rsi *= cs_c = 1*8 lea(mem(r12, rsi, 1), rcx) // rcx = c + 4*jj*cs_c; lea(mem( , r15, 1), rsi) // rsi = r15 = 4*jj; imul(r11, rsi) // rsi *= cs_b; lea(mem(rdx, rsi, 1), rbx) // rbx = b + 4*jj*cs_b; lea(mem(r14), rax) // rax = a; #if 0 prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 7*8)) // prefetch c + 1*rs_c #else prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 3*8)) // prefetch c + 1*rs_c #endif mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) // ---------------------------------- iteration 1 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) // ---------------------------------- iteration 2 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) // ---------------------------------- iteration 3 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rax ), xmm0) vmovsd(mem(rax, r8, 1), xmm1) add(imm(1*8), rax) // a += 1*cs_b = 1*8; vmovsd(mem(rbx ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovsd(mem(rbx, r11, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovsd(mem(rbx, r11, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovsd(mem(rbx, r13, 1), xmm3) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm7 ymm10 ymm13 // ymm5 ymm8 ymm11 ymm14 vhaddpd( ymm7, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm7) vhaddpd( ymm13, ymm10, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) // xmm2[0] = sum(ymm10); xmm2[1] = sum(ymm13) vperm2f128(imm(0x20), ymm2, ymm0, ymm4 ) vhaddpd( ymm8, ymm5, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm14, ymm11, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm5 ) // ymm4 = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13) // ymm5 = sum(ymm5) sum(ymm8) sum(ymm11) sum(ymm14) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm5) vmovupd(ymm5, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vmovupd(ymm5, mem(rcx)) //add(rdi, rcx) label(.DDONE) add(imm(4), r15) // jj += 4; cmp(imm(4), r15) // compare jj to 4 jle(.DLOOP3X4J) // if jj <= 4, jump to beginning // of jj loop; otherwise, loop ends. label(.DRETURN) end_asm( : // output operands (none) : // input operands [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rd_haswell_asm_1x8m ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { #if 0 bli_dgemmsup_r_haswell_ref ( conja, conjb, m0, n0, k0, alpha, a, rs_a0, cs_a0, b, rs_b0, cs_b0, beta, c, rs_c0, cs_c0, data, cntx ); return; #endif //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() //vzeroall() // zero all xmm/ymm registers. mov(var(a), r14) // load address of a. //mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a //lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rdx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b mov(var(c), r12) // load address of c //mov(var(rs_c), rdi) // load rs_c //lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) // r12 = rcx = c // r14 = rax = a // rdx = rbx = b // r9 = m dim index ii // r15 = n dim index jj mov(imm(0), r15) // jj = 0; label(.DLOOP3X4J) // LOOP OVER jj = [ 0 1 ... ] #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm13, ymm13, ymm13) #endif lea(mem( , r15, 1), rsi) // rsi = r15 = 4*jj; imul(imm(1*8), rsi) // rsi *= cs_c = 1*8 lea(mem(r12, rsi, 1), rcx) // rcx = c + 4*jj*cs_c; lea(mem( , r15, 1), rsi) // rsi = r15 = 4*jj; imul(r11, rsi) // rsi *= cs_b; lea(mem(rdx, rsi, 1), rbx) // rbx = b + 4*jj*cs_b; lea(mem(r14), rax) // rax = a; #if 0 prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c #else prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c #endif mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) // ---------------------------------- iteration 1 vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) // ---------------------------------- iteration 2 vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) // ---------------------------------- iteration 3 vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rax ), xmm0) add(imm(1*8), rax) // a += 1*cs_b = 1*8; vmovsd(mem(rbx ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovsd(mem(rbx, r11, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovsd(mem(rbx, r11, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovsd(mem(rbx, r13, 1), xmm3) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vfmadd231pd(ymm0, ymm3, ymm13) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm7 ymm10 ymm13 vhaddpd( ymm7, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm7) vhaddpd( ymm13, ymm10, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) // xmm2[0] = sum(ymm10); xmm2[1] = sum(ymm13) vperm2f128(imm(0x20), ymm2, ymm0, ymm4 ) // ymm4 = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) //add(rdi, rcx) label(.DDONE) add(imm(4), r15) // jj += 4; cmp(imm(4), r15) // compare jj to 4 jle(.DLOOP3X4J) // if jj <= 4, jump to beginning // of jj loop; otherwise, loop ends. label(.DRETURN) end_asm( : // output operands (none) : // input operands [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rd_haswell_asm_6x4m ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { #if 0 bli_dgemmsup_r_haswell_ref ( conja, conjb, m0, n0, k0, alpha, a, rs_a0, cs_a0, b, rs_b0, cs_b0, beta, c, rs_c0, cs_c0, data, cntx ); return; #endif //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t m_iter = m0 / 3; uint64_t m_left = m0 % 3; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; if ( m_iter == 0 ) goto consider_edge_cases; // ------------------------------------------------------------------------- begin_asm() //vzeroall() // zero all xmm/ymm registers. mov(var(a), r14) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rdx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b mov(var(c), r12) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) // r12 = rcx = c // r14 = rax = a // rdx = rbx = b // r9 = m dim index ii // r15 = n dim index jj // r10 = unused mov(var(m_iter), r9) // ii = m_iter; label(.DLOOP3X4I) // LOOP OVER ii = [ m_iter .. 1 0 ] #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm6, ymm6, ymm6) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm8, ymm8, ymm8) vxorpd(ymm9, ymm9, ymm9) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm11, ymm11, ymm11) vxorpd(ymm12, ymm12, ymm12) vxorpd(ymm13, ymm13, ymm13) vxorpd(ymm14, ymm14, ymm14) vxorpd(ymm15, ymm15, ymm15) #endif lea(mem(r12), rcx) // rcx = c + 3*ii*rs_c; lea(mem(r14), rax) // rax = a + 3*ii*rs_a; lea(mem(rdx), rbx) // rbx = b; #if 0 prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 7*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 7*8)) // prefetch c + 2*rs_c #else prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 3*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 3*8)) // prefetch c + 2*rs_c #endif mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 1 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 2 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 3 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rax ), xmm0) vmovsd(mem(rax, r8, 1), xmm1) vmovsd(mem(rax, r8, 2), xmm2) add(imm(1*8), rax) // a += 1*cs_b = 1*8; vmovsd(mem(rbx ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovsd(mem(rbx, r11, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovsd(mem(rbx, r11, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovsd(mem(rbx, r13, 1), xmm3) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm7 ymm10 ymm13 // ymm5 ymm8 ymm11 ymm14 // ymm6 ymm9 ymm12 ymm15 vhaddpd( ymm7, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm7) vhaddpd( ymm13, ymm10, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) // xmm2[0] = sum(ymm10); xmm2[1] = sum(ymm13) vperm2f128(imm(0x20), ymm2, ymm0, ymm4 ) vhaddpd( ymm8, ymm5, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm14, ymm11, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm5 ) vhaddpd( ymm9, ymm6, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm15, ymm12, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm6 ) // ymm4 = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13) // ymm5 = sum(ymm5) sum(ymm8) sum(ymm11) sum(ymm14) // ymm6 = sum(ymm6) sum(ymm9) sum(ymm12) sum(ymm15) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) vmulpd(ymm0, ymm6, ymm6) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm5) vmovupd(ymm5, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm6) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vmovupd(ymm5, mem(rcx)) add(rdi, rcx) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) label(.DDONE) lea(mem(r12, rdi, 2), r12) // lea(mem(r12, rdi, 1), r12) // c_ii = r12 += 3*rs_c lea(mem(r14, r8, 2), r14) // lea(mem(r14, r8, 1), r14) // a_ii = r14 += 3*rs_a dec(r9) // ii -= 1; jne(.DLOOP3X4I) // iterate again if ii != 0. label(.DRETURN) end_asm( : // output operands (none) : // input operands [m_iter] "m" (m_iter), [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) consider_edge_cases: // Handle edge cases in the m dimension, if they exist. if ( m_left ) { const dim_t nr_cur = 4; const dim_t i_edge = m0 - ( dim_t )m_left; double* restrict cij = c + i_edge*rs_c; double* restrict bj = b; double* restrict ai = a + i_edge*rs_a; if ( 2 == m_left ) { const dim_t mr_cur = 2; bli_dgemmsup_rd_haswell_asm_2x4m ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); //cij += mr_cur*rs_c0; ai += mr_cur*rs_a0; m_left -= mr_cur; } if ( 1 == m_left ) { const dim_t mr_cur = 1; bli_dgemmsup_rd_haswell_asm_1x4m ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); } } } void bli_dgemmsup_rd_haswell_asm_3x4m ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { #if 0 bli_dgemmsup_r_haswell_ref ( conja, conjb, m0, n0, k0, alpha, a, rs_a0, cs_a0, b, rs_b0, cs_b0, beta, c, rs_c0, cs_c0, data, cntx ); return; #endif //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm6, ymm6, ymm6) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm8, ymm8, ymm8) vxorpd(ymm9, ymm9, ymm9) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm11, ymm11, ymm11) vxorpd(ymm12, ymm12, ymm12) vxorpd(ymm13, ymm13, ymm13) vxorpd(ymm14, ymm14, ymm14) vxorpd(ymm15, ymm15, ymm15) #endif mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b mov(var(c), rcx) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 3*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 3*8)) // prefetch c + 2*rs_c mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 1 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 2 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 3 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rax ), xmm0) vmovsd(mem(rax, r8, 1), xmm1) vmovsd(mem(rax, r8, 2), xmm2) add(imm(1*8), rax) // a += 1*cs_b = 1*8; vmovsd(mem(rbx ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovsd(mem(rbx, r11, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovsd(mem(rbx, r11, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovsd(mem(rbx, r13, 1), xmm3) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm7 ymm10 ymm13 // ymm5 ymm8 ymm11 ymm14 // ymm6 ymm9 ymm12 ymm15 vhaddpd( ymm7, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm7) vhaddpd( ymm13, ymm10, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) // xmm2[0] = sum(ymm10); xmm2[1] = sum(ymm13) vperm2f128(imm(0x20), ymm2, ymm0, ymm4 ) vhaddpd( ymm8, ymm5, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm14, ymm11, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm5 ) vhaddpd( ymm9, ymm6, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm15, ymm12, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm6 ) // ymm4 = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13) // ymm5 = sum(ymm5) sum(ymm8) sum(ymm11) sum(ymm14) // ymm6 = sum(ymm6) sum(ymm9) sum(ymm12) sum(ymm15) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) vmulpd(ymm0, ymm6, ymm6) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm5) vmovupd(ymm5, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm6) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vmovupd(ymm5, mem(rcx)) add(rdi, rcx) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rd_haswell_asm_2x4m ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { #if 0 bli_dgemmsup_r_haswell_ref ( conja, conjb, m0, n0, k0, alpha, a, rs_a0, cs_a0, b, rs_b0, cs_b0, beta, c, rs_c0, cs_c0, data, cntx ); return; #endif //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm8, ymm8, ymm8) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm11, ymm11, ymm11) vxorpd(ymm13, ymm13, ymm13) vxorpd(ymm14, ymm14, ymm14) #endif mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b mov(var(c), rcx) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 3*8)) // prefetch c + 1*rs_c mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) // ---------------------------------- iteration 1 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) // ---------------------------------- iteration 2 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) // ---------------------------------- iteration 3 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rax ), xmm0) vmovsd(mem(rax, r8, 1), xmm1) add(imm(1*8), rax) // a += 1*cs_b = 1*8; vmovsd(mem(rbx ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovsd(mem(rbx, r11, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovsd(mem(rbx, r11, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovsd(mem(rbx, r13, 1), xmm3) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm7 ymm10 ymm13 // ymm5 ymm8 ymm11 ymm14 vhaddpd( ymm7, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm7) vhaddpd( ymm13, ymm10, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) // xmm2[0] = sum(ymm10); xmm2[1] = sum(ymm13) vperm2f128(imm(0x20), ymm2, ymm0, ymm4 ) vhaddpd( ymm8, ymm5, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm14, ymm11, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm5 ) // ymm4 = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13) // ymm5 = sum(ymm5) sum(ymm8) sum(ymm11) sum(ymm14) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm5) vmovupd(ymm5, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vmovupd(ymm5, mem(rcx)) //add(rdi, rcx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rd_haswell_asm_1x4m ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { #if 0 bli_dgemmsup_r_haswell_ref ( conja, conjb, m0, n0, k0, alpha, a, rs_a0, cs_a0, b, rs_b0, cs_b0, beta, c, rs_c0, cs_c0, data, cntx ); return; #endif //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; /* rrc: -------- -- -- -- | | | | -------- -- -- -- ... | | | | -------- += -- -- -- | | | | -------- | | | | -------- : -------- : */ // ------------------------------------------------------------------------- begin_asm() #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm13, ymm13, ymm13) #endif mov(var(a), rax) // load address of a. //mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a //lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b mov(var(c), rcx) // load address of c //mov(var(rs_c), rdi) // load rs_c //lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) // ---------------------------------- iteration 1 vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) // ---------------------------------- iteration 2 vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) // ---------------------------------- iteration 3 vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rax ), xmm0) add(imm(1*8), rax) // a += 1*cs_b = 1*8; vmovsd(mem(rbx ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovsd(mem(rbx, r11, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovsd(mem(rbx, r11, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovsd(mem(rbx, r13, 1), xmm3) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vfmadd231pd(ymm0, ymm3, ymm13) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm7 ymm10 ymm13 vhaddpd( ymm7, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm7) vhaddpd( ymm13, ymm10, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) // xmm2[0] = sum(ymm10); xmm2[1] = sum(ymm13) vperm2f128(imm(0x20), ymm2, ymm0, ymm4 ) // ymm4 = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) //add(rdi, rcx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rd_haswell_asm_6x2m ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { #if 0 bli_dgemmsup_r_haswell_ref ( conja, conjb, m0, n0, k0, alpha, a, rs_a0, cs_a0, b, rs_b0, cs_b0, beta, c, rs_c0, cs_c0, data, cntx ); return; #endif //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t m_iter = m0 / 6; uint64_t m_left = m0 % 6; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; if ( m_iter == 0 ) goto consider_edge_cases; // ------------------------------------------------------------------------- begin_asm() //vzeroall() // zero all xmm/ymm registers. mov(var(a), r14) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rdx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) //lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b mov(var(c), r12) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) // r12 = rcx = c // r14 = rax = a // rdx = rbx = b // r9 = m dim index ii mov(var(m_iter), r9) // ii = m_iter; label(.DLOOP3X4I) // LOOP OVER ii = [ m_iter ... 1 0 ] #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm6, ymm6, ymm6) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm8, ymm8, ymm8) vxorpd(ymm9, ymm9, ymm9) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm11, ymm11, ymm11) vxorpd(ymm12, ymm12, ymm12) vxorpd(ymm13, ymm13, ymm13) vxorpd(ymm14, ymm14, ymm14) vxorpd(ymm15, ymm15, ymm15) #endif lea(mem(r12), rcx) // rcx = c + 6*ii*rs_c; lea(mem(r14), rax) // rax = a + 6*ii*rs_a; lea(mem(rdx), rbx) // rbx = b; lea(mem(rcx, rdi, 2), r10) // lea(mem(r10, rdi, 1), r10) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 3*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 3*8)) // prefetch c + 2*rs_c prefetch(0, mem(r10, 3*8)) // prefetch c + 3*rs_c prefetch(0, mem(r10, rdi, 1, 3*8)) // prefetch c + 4*rs_c prefetch(0, mem(r10, rdi, 2, 3*8)) // prefetch c + 5*rs_c mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) vmovupd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rax, r8, 4), ymm3) vfmadd231pd(ymm0, ymm3, ymm12) vfmadd231pd(ymm1, ymm3, ymm13) vmovupd(mem(rax, r15, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) // ---------------------------------- iteration 1 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) vmovupd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rax, r8, 4), ymm3) vfmadd231pd(ymm0, ymm3, ymm12) vfmadd231pd(ymm1, ymm3, ymm13) vmovupd(mem(rax, r15, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) // ---------------------------------- iteration 2 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) vmovupd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rax, r8, 4), ymm3) vfmadd231pd(ymm0, ymm3, ymm12) vfmadd231pd(ymm1, ymm3, ymm13) vmovupd(mem(rax, r15, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) // ---------------------------------- iteration 3 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) vmovupd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rax, r8, 4), ymm3) vfmadd231pd(ymm0, ymm3, ymm12) vfmadd231pd(ymm1, ymm3, ymm13) vmovupd(mem(rax, r15, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) vmovupd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rax, r8, 4), ymm3) vfmadd231pd(ymm0, ymm3, ymm12) vfmadd231pd(ymm1, ymm3, ymm13) vmovupd(mem(rax, r15, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rbx ), xmm0) vmovsd(mem(rbx, r11, 1), xmm1) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vmovsd(mem(rax ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovsd(mem(rax, r8, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovsd(mem(rax, r8, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) vmovsd(mem(rax, r13, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovsd(mem(rax, r8, 4), xmm3) vfmadd231pd(ymm0, ymm3, ymm12) vfmadd231pd(ymm1, ymm3, ymm13) vmovsd(mem(rax, r15, 1), xmm3) add(imm(1*8), rax) // a += 1*cs_a = 1*8; vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm5 // ymm6 ymm7 // ymm8 ymm9 // ymm10 ymm11 // ymm12 ymm13 // ymm14 ymm15 vhaddpd( ymm5, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm4 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm5) vhaddpd( ymm7, ymm6, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm6 ) vhaddpd( ymm9, ymm8, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm8 ) vhaddpd( ymm11, ymm10, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm10 ) vhaddpd( ymm13, ymm12, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm12 ) vhaddpd( ymm15, ymm14, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm14 ) // xmm4 = sum(ymm4) sum(ymm5) // xmm6 = sum(ymm6) sum(ymm7) // xmm8 = sum(ymm8) sum(ymm9) // xmm10 = sum(ymm10) sum(ymm11) // xmm12 = sum(ymm12) sum(ymm13) // xmm14 = sum(ymm14) sum(ymm15) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(xmm0, xmm4, xmm4) // scale by alpha vmulpd(xmm0, xmm6, xmm6) vmulpd(xmm0, xmm8, xmm8) vmulpd(xmm0, xmm10, xmm10) vmulpd(xmm0, xmm12, xmm12) vmulpd(xmm0, xmm14, xmm14) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), xmm3, xmm4) vmovupd(xmm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm6) vmovupd(xmm6, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm8) vmovupd(xmm8, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm10) vmovupd(xmm10, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm12) vmovupd(xmm12, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm14) vmovupd(xmm14, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(xmm4, mem(rcx)) add(rdi, rcx) vmovupd(xmm6, mem(rcx)) add(rdi, rcx) vmovupd(xmm8, mem(rcx)) add(rdi, rcx) vmovupd(xmm10, mem(rcx)) add(rdi, rcx) vmovupd(xmm12, mem(rcx)) add(rdi, rcx) vmovupd(xmm14, mem(rcx)) //add(rdi, rcx) label(.DDONE) lea(mem(r12, rdi, 4), r12) // lea(mem(r12, rdi, 2), r12) // c_ii = r12 += 6*rs_c lea(mem(r14, r8, 4), r14) // lea(mem(r14, r8, 2), r14) // a_ii = r14 += 6*rs_a dec(r9) // ii -= 1; jne(.DLOOP3X4I) // iterate again if ii != 0. label(.DRETURN) end_asm( : // output operands (none) : // input operands [m_iter] "m" (m_iter), [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) consider_edge_cases: // Handle edge cases in the m dimension, if they exist. if ( m_left ) { const dim_t nr_cur = 2; const dim_t i_edge = m0 - ( dim_t )m_left; double* restrict cij = c + i_edge*rs_c; double* restrict bj = b; double* restrict ai = a + i_edge*rs_a; if ( 3 <= m_left ) { const dim_t mr_cur = 3; bli_dgemmsup_rd_haswell_asm_3x2m ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += mr_cur*rs_c0; ai += mr_cur*rs_a0; m_left -= mr_cur; } if ( 2 <= m_left ) { const dim_t mr_cur = 2; bli_dgemmsup_rd_haswell_asm_2x2m ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += mr_cur*rs_c0; ai += mr_cur*rs_a0; m_left -= mr_cur; } if ( 1 == m_left ) { const dim_t mr_cur = 1; bli_dgemmsup_rd_haswell_asm_1x2m ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); } } } void bli_dgemmsup_rd_haswell_asm_3x2m ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { #if 0 bli_dgemmsup_r_haswell_ref ( conja, conjb, m0, n0, k0, alpha, a, rs_a0, cs_a0, b, rs_b0, cs_b0, beta, c, rs_c0, cs_c0, data, cntx ); return; #endif //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm6, ymm6, ymm6) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm8, ymm8, ymm8) vxorpd(ymm9, ymm9, ymm9) #endif mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) //lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b // initialize loop by pre-loading // a column of a. mov(var(c), rcx) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) prefetch(0, mem(rcx, 1*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 1*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 1*8)) // prefetch c + 2*rs_c mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) // ---------------------------------- iteration 1 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) // ---------------------------------- iteration 2 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) // ---------------------------------- iteration 3 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rbx ), xmm0) vmovsd(mem(rbx, r11, 1), xmm1) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vmovsd(mem(rax ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovsd(mem(rax, r8, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovsd(mem(rax, r8, 2), xmm3) add(imm(1*8), rax) // a += 1*cs_a = 1*8; vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm5 // ymm6 ymm7 // ymm8 ymm9 vhaddpd( ymm5, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm4 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm5) vhaddpd( ymm7, ymm6, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm6 ) vhaddpd( ymm9, ymm8, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm8 ) // xmm4 = sum(ymm4) sum(ymm5) // xmm6 = sum(ymm6) sum(ymm7) // xmm8 = sum(ymm8) sum(ymm9) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(xmm0, xmm4, xmm4) // scale by alpha vmulpd(xmm0, xmm6, xmm6) vmulpd(xmm0, xmm8, xmm8) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), xmm3, xmm4) vmovupd(xmm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm6) vmovupd(xmm6, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm8) vmovupd(xmm8, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(xmm4, mem(rcx)) add(rdi, rcx) vmovupd(xmm6, mem(rcx)) add(rdi, rcx) vmovupd(xmm8, mem(rcx)) //add(rdi, rcx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rd_haswell_asm_2x2m ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { #if 0 bli_dgemmsup_r_haswell_ref ( conja, conjb, m0, n0, k0, alpha, a, rs_a0, cs_a0, b, rs_b0, cs_b0, beta, c, rs_c0, cs_c0, data, cntx ); return; #endif //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; /* rrc: -------- -- -- -- | | -------- -- -- -- ... | | -------- += -- -- -- | | -------- -- -- -- | | -------- -- -- -- : -------- -- -- -- : */ // ------------------------------------------------------------------------- begin_asm() #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm6, ymm6, ymm6) vxorpd(ymm7, ymm7, ymm7) #endif mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) //lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b // initialize loop by pre-loading // a column of a. mov(var(c), rcx) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) //lea(mem(rcx, rdi, 2), rdx) // //lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 1*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 1*8)) // prefetch c + 1*rs_c mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) // ---------------------------------- iteration 1 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) // ---------------------------------- iteration 2 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) // ---------------------------------- iteration 3 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rbx ), xmm0) vmovsd(mem(rbx, r11, 1), xmm1) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vmovsd(mem(rax ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovsd(mem(rax, r8, 1), xmm3) add(imm(1*8), rax) // a += 1*cs_a = 1*8; vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm5 // ymm6 ymm7 vhaddpd( ymm5, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm4 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm5) vhaddpd( ymm7, ymm6, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm6 ) // xmm4 = sum(ymm4) sum(ymm5) // xmm6 = sum(ymm6) sum(ymm7) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(xmm0, xmm4, xmm4) // scale by alpha vmulpd(xmm0, xmm6, xmm6) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), xmm3, xmm4) vmovupd(xmm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm6) vmovupd(xmm6, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(xmm4, mem(rcx)) add(rdi, rcx) vmovupd(xmm6, mem(rcx)) //add(rdi, rcx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rd_haswell_asm_1x2m ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { #if 0 bli_dgemmsup_r_haswell_ref ( conja, conjb, m0, n0, k0, alpha, a, rs_a0, cs_a0, b, rs_b0, cs_b0, beta, c, rs_c0, cs_c0, data, cntx ); return; #endif //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; /* rrc: -------- -- -- -- | | -------- -- -- -- ... | | -------- += -- -- -- | | -------- -- -- -- | | -------- -- -- -- : -------- -- -- -- : */ // ------------------------------------------------------------------------- begin_asm() #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) #endif mov(var(a), rax) // load address of a. //mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a //lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) //lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b // initialize loop by pre-loading // a column of a. mov(var(c), rcx) // load address of c //mov(var(rs_c), rdi) // load rs_c //lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) //lea(mem(rcx, rdi, 2), rdx) // //lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 1*8)) // prefetch c + 0*rs_c mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) // ---------------------------------- iteration 1 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) // ---------------------------------- iteration 2 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) // ---------------------------------- iteration 3 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rbx ), xmm0) vmovsd(mem(rbx, r11, 1), xmm1) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vmovsd(mem(rax ), xmm3) add(imm(1*8), rax) // a += 1*cs_a = 1*8; vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm5 vhaddpd( ymm5, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm4 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm5) // xmm4 = sum(ymm4) sum(ymm5) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(xmm0, xmm4, xmm4) // scale by alpha //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), xmm3, xmm4) vmovupd(xmm4, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(xmm4, mem(rcx)) //add(rdi, rcx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } blis-0.6.1/kernels/haswell/3/sup/old/bli_gemmsup_rd_haswell_asm_d6x8m.c.worksij000066400000000000000000004471741360743507500275630ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define BLIS_ASM_SYNTAX_ATT #include "bli_x86_asm_macros.h" /* rrc: -------- ------ | | | | | | | | -------- ------ | | | | | | | | -------- += ------ ... | | | | | | | | -------- ------ | | | | | | | | -------- ------ : -------- ------ : Assumptions: - C is row-stored and B is column-stored; - A is row-stored; - m0 and n0 are at most MR and NR, respectively. Therefore, this (r)ow-preferential microkernel is well-suited for a dot-product-based accumulation that performs vector loads from both A and B. */ // Prototype reference microkernels. GEMMSUP_KER_PROT( float, s, gemmsup_r_haswell_ref ) GEMMSUP_KER_PROT( double, d, gemmsup_r_haswell_ref ) GEMMSUP_KER_PROT( scomplex, c, gemmsup_r_haswell_ref ) GEMMSUP_KER_PROT( dcomplex, z, gemmsup_r_haswell_ref ) // Define parameters and variables for edge case kernel map. #define NUM_MR 4 #define NUM_NR 4 #define FUNCPTR_T dgemmsup_ker_ft #if 0 static dim_t mrs[NUM_MR] = { 6, 3, 2, 1 }; static dim_t nrs[NUM_NR] = { 8, 4, 2, 1 }; static FUNCPTR_T kmap[NUM_MR][NUM_NR] = { /* 8 4 2 1 */ /* 6 */ { bli_dgemmsup_rd_haswell_asm_6x8m, bli_dgemmsup_rd_haswell_asm_6x4m, bli_dgemmsup_rd_haswell_asm_6x2m, bli_dgemmsup_r_haswell_ref }, /* 3 */ { bli_dgemmsup_rd_haswell_asm_3x8m, bli_dgemmsup_rd_haswell_asm_3x4m, bli_dgemmsup_rd_haswell_asm_3x2m, bli_dgemmsup_r_haswell_ref }, /* 2 */ { bli_dgemmsup_rd_haswell_asm_2x8m, bli_dgemmsup_rd_haswell_asm_2x4m, bli_dgemmsup_rd_haswell_asm_2x2m, bli_dgemmsup_r_haswell_ref }, /* 1 */ { bli_dgemmsup_rd_haswell_asm_1x8m, bli_dgemmsup_rd_haswell_asm_1x4m, bli_dgemmsup_rd_haswell_asm_1x2m, bli_dgemmsup_r_haswell_ref } }; #endif void bli_dgemmsup_rd_haswell_asm_6x8m ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { uint64_t n_left = n0 % 8; // First check whether this is a edge case in the n dimension. If so, // dispatch other 6x?m kernels, as needed. if ( n_left ) { double* restrict cij = c; double* restrict bj = b; double* restrict ai = a; if ( 4 <= n_left ) { const dim_t nr_cur = 4; bli_dgemmsup_rd_haswell_asm_6x4m ( conja, conjb, m0, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += nr_cur*cs_c0; bj += nr_cur*cs_b0; n_left -= nr_cur; } if ( 2 <= n_left ) { const dim_t nr_cur = 2; bli_dgemmsup_rd_haswell_asm_6x2m ( conja, conjb, m0, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += nr_cur*cs_c0; bj += nr_cur*cs_b0; n_left -= nr_cur; } if ( 1 == n_left ) { #if 0 const dim_t nr_cur = 1; bli_dgemmsup_r_haswell_ref ( conja, conjb, m0, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); #else bli_dgemv_ex ( BLIS_NO_TRANSPOSE, conjb, m0, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, beta, cij, rs_c0, cntx, NULL ); #endif } return; } //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t m_iter = m0 / 3; uint64_t m_left = m0 % 3; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; if ( m_iter == 0 ) goto consider_edge_cases; // ------------------------------------------------------------------------- begin_asm() //vzeroall() // zero all xmm/ymm registers. mov(var(a), r14) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rdx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b lea(mem(r8, r8, 2), r10) // r10 = 3*rs_a mov(var(c), r12) // load address of c //mov(var(rs_c), rdi) // load rs_c //lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) // r12 = rcx = c // r14 = rax = a // rdx = rbx = b // r9 = m dim index ii // r15 = n dim index jj mov(var(m_iter), r9) // ii = m_iter; label(.DLOOP3X4I) // LOOP OVER ii = [ m_iter ... 1 0 ] mov(imm(0), r15) // jj = 0; label(.DLOOP3X4J) // LOOP OVER jj = [ 0 1 ... ] #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm6, ymm6, ymm6) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm8, ymm8, ymm8) vxorpd(ymm9, ymm9, ymm9) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm11, ymm11, ymm11) vxorpd(ymm12, ymm12, ymm12) vxorpd(ymm13, ymm13, ymm13) vxorpd(ymm14, ymm14, ymm14) vxorpd(ymm15, ymm15, ymm15) #endif lea(mem( , r15, 1), rsi) // rsi = r15 = 4*jj; imul(imm(1*8), rsi) // rsi *= cs_c = 1*8 lea(mem(r12, rsi, 1), rcx) // rcx = c + 4*jj*cs_c; lea(mem( , r15, 1), rsi) // rsi = r15 = 4*jj; imul(r11, rsi) // rsi *= cs_b; lea(mem(rdx, rsi, 1), rbx) // rbx = b + 4*jj*cs_b; lea(mem(r14), rax) // rax = a_ii; #if 0 mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 3*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 3*8)) // prefetch c + 2*rs_c #endif lea(mem(r8, r8, 4), rdi) // rdi = 5*rs_a mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 #if 1 prefetch(0, mem(rax, r10, 1, 0*8)) // prefetch rax + 3*cs_a prefetch(0, mem(rax, r8, 4, 0*8)) // prefetch rax + 4*cs_a prefetch(0, mem(rax, rdi, 1, 0*8)) // prefetch rax + 5*cs_a #endif vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 1 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 2 #if 1 prefetch(0, mem(rax, r10, 1, 0*8)) // prefetch rax + 3*cs_a prefetch(0, mem(rax, r8, 4, 0*8)) // prefetch rax + 4*cs_a prefetch(0, mem(rax, rdi, 1, 0*8)) // prefetch rax + 5*cs_a #endif vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 3 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rax ), xmm0) vmovsd(mem(rax, r8, 1), xmm1) vmovsd(mem(rax, r8, 2), xmm2) add(imm(1*8), rax) // a += 1*cs_b = 1*8; vmovsd(mem(rbx ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovsd(mem(rbx, r11, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovsd(mem(rbx, r11, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovsd(mem(rbx, r13, 1), xmm3) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm7 ymm10 ymm13 // ymm5 ymm8 ymm11 ymm14 // ymm6 ymm9 ymm12 ymm15 vhaddpd( ymm7, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm7) vhaddpd( ymm13, ymm10, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) // xmm2[0] = sum(ymm10); xmm2[1] = sum(ymm13) vperm2f128(imm(0x20), ymm2, ymm0, ymm4 ) vhaddpd( ymm8, ymm5, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm14, ymm11, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm5 ) vhaddpd( ymm9, ymm6, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm15, ymm12, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm6 ) // ymm4 = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13) // ymm5 = sum(ymm5) sum(ymm8) sum(ymm11) sum(ymm14) // ymm6 = sum(ymm6) sum(ymm9) sum(ymm12) sum(ymm15) mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) vmulpd(ymm0, ymm6, ymm6) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm5) vmovupd(ymm5, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm6) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vmovupd(ymm5, mem(rcx)) add(rdi, rcx) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) label(.DDONE) add(imm(4), r15) // jj += 4; cmp(imm(4), r15) // compare jj to 4 jle(.DLOOP3X4J) // if jj <= 4, jump to beginning // of jj loop; otherwise, loop ends. lea(mem(r12, rdi, 2), r12) // lea(mem(r12, rdi, 1), r12) // c_ii = r12 += 3*rs_c lea(mem(r14, r8, 2), r14) // lea(mem(r14, r8, 1), r14) // a_ii = r14 += 3*rs_a dec(r9) // ii -= 1; jne(.DLOOP3X4I) // iterate again if ii != 0. label(.DRETURN) end_asm( : // output operands (none) : // input operands [m_iter] "m" (m_iter), [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) consider_edge_cases: // Handle edge cases in the m dimension, if they exist. if ( m_left ) { const dim_t nr_cur = 8; const dim_t i_edge = m0 - ( dim_t )m_left; double* restrict cij = c + i_edge*rs_c; double* restrict bj = b; double* restrict ai = a + i_edge*rs_a; if ( 2 == m_left ) { const dim_t mr_cur = 2; bli_dgemmsup_rd_haswell_asm_2x8m ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); //cij += mr_cur*rs_c0; ai += mr_cur*rs_a0; m_left -= mr_cur; } if ( 1 == m_left ) { const dim_t mr_cur = 1; bli_dgemmsup_rd_haswell_asm_1x8m ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); } } } void bli_dgemmsup_rd_haswell_asm_3x8m ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { #if 0 bli_dgemmsup_r_haswell_ref ( conja, conjb, m0, n0, k0, alpha, a, rs_a0, cs_a0, b, rs_b0, cs_b0, beta, c, rs_c0, cs_c0, data, cntx ); return; #endif //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() //vzeroall() // zero all xmm/ymm registers. mov(var(a), r14) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rdx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b mov(var(c), r12) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) // r12 = rcx = c // r14 = rax = a // rdx = rbx = b // r9 = unused // r15 = n dim index jj // r10 = unused mov(imm(0), r15) // jj = 0; label(.DLOOP3X4J) // LOOP OVER jj = [ 0 1 ... ] #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm6, ymm6, ymm6) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm8, ymm8, ymm8) vxorpd(ymm9, ymm9, ymm9) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm11, ymm11, ymm11) vxorpd(ymm12, ymm12, ymm12) vxorpd(ymm13, ymm13, ymm13) vxorpd(ymm14, ymm14, ymm14) vxorpd(ymm15, ymm15, ymm15) #endif lea(mem( , r15, 1), rsi) // rsi = r15 = 4*jj; imul(imm(1*8), rsi) // rsi *= cs_c = 1*8 lea(mem(r12, rsi, 1), rcx) // rcx = c + 4*jj*cs_c; lea(mem( , r15, 1), rsi) // rsi = r15 = 4*jj; imul(r11, rsi) // rsi *= cs_b; lea(mem(rdx, rsi, 1), rbx) // rbx = b + 4*jj*cs_b; lea(mem(r14), rax) // rax = a; #if 0 prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 7*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 7*8)) // prefetch c + 2*rs_c #else prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 3*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 3*8)) // prefetch c + 2*rs_c #endif mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 1 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 2 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 3 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rax ), xmm0) vmovsd(mem(rax, r8, 1), xmm1) vmovsd(mem(rax, r8, 2), xmm2) add(imm(1*8), rax) // a += 1*cs_b = 1*8; vmovsd(mem(rbx ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovsd(mem(rbx, r11, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovsd(mem(rbx, r11, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovsd(mem(rbx, r13, 1), xmm3) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm7 ymm10 ymm13 // ymm5 ymm8 ymm11 ymm14 // ymm6 ymm9 ymm12 ymm15 vhaddpd( ymm7, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm7) vhaddpd( ymm13, ymm10, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) // xmm2[0] = sum(ymm10); xmm2[1] = sum(ymm13) vperm2f128(imm(0x20), ymm2, ymm0, ymm4 ) vhaddpd( ymm8, ymm5, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm14, ymm11, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm5 ) vhaddpd( ymm9, ymm6, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm15, ymm12, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm6 ) // ymm4 = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13) // ymm5 = sum(ymm5) sum(ymm8) sum(ymm11) sum(ymm14) // ymm6 = sum(ymm6) sum(ymm9) sum(ymm12) sum(ymm15) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) vmulpd(ymm0, ymm6, ymm6) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm5) vmovupd(ymm5, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm6) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vmovupd(ymm5, mem(rcx)) add(rdi, rcx) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) label(.DDONE) add(imm(4), r15) // jj += 4; cmp(imm(4), r15) // compare jj to 4 jle(.DLOOP3X4J) // if jj <= 4, jump to beginning // of jj loop; otherwise, loop ends. label(.DRETURN) end_asm( : // output operands (none) : // input operands [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rd_haswell_asm_2x8m ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { #if 0 bli_dgemmsup_r_haswell_ref ( conja, conjb, m0, n0, k0, alpha, a, rs_a0, cs_a0, b, rs_b0, cs_b0, beta, c, rs_c0, cs_c0, data, cntx ); return; #endif //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() //vzeroall() // zero all xmm/ymm registers. mov(var(a), r14) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rdx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b mov(var(c), r12) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) // r12 = rcx = c // r14 = rax = a // rdx = rbx = b // r9 = unused // r15 = n dim index jj // r10 = unused mov(imm(0), r15) // jj = 0; label(.DLOOP3X4J) // LOOP OVER jj = [ 0 1 ... ] #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm8, ymm8, ymm8) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm11, ymm11, ymm11) vxorpd(ymm13, ymm13, ymm13) vxorpd(ymm14, ymm14, ymm14) #endif lea(mem( , r15, 1), rsi) // rsi = r15 = 4*jj; imul(imm(1*8), rsi) // rsi *= cs_c = 1*8 lea(mem(r12, rsi, 1), rcx) // rcx = c + 4*jj*cs_c; lea(mem( , r15, 1), rsi) // rsi = r15 = 4*jj; imul(r11, rsi) // rsi *= cs_b; lea(mem(rdx, rsi, 1), rbx) // rbx = b + 4*jj*cs_b; lea(mem(r14), rax) // rax = a; #if 0 prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 7*8)) // prefetch c + 1*rs_c #else prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 3*8)) // prefetch c + 1*rs_c #endif mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) // ---------------------------------- iteration 1 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) // ---------------------------------- iteration 2 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) // ---------------------------------- iteration 3 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rax ), xmm0) vmovsd(mem(rax, r8, 1), xmm1) add(imm(1*8), rax) // a += 1*cs_b = 1*8; vmovsd(mem(rbx ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovsd(mem(rbx, r11, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovsd(mem(rbx, r11, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovsd(mem(rbx, r13, 1), xmm3) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm7 ymm10 ymm13 // ymm5 ymm8 ymm11 ymm14 vhaddpd( ymm7, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm7) vhaddpd( ymm13, ymm10, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) // xmm2[0] = sum(ymm10); xmm2[1] = sum(ymm13) vperm2f128(imm(0x20), ymm2, ymm0, ymm4 ) vhaddpd( ymm8, ymm5, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm14, ymm11, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm5 ) // ymm4 = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13) // ymm5 = sum(ymm5) sum(ymm8) sum(ymm11) sum(ymm14) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm5) vmovupd(ymm5, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vmovupd(ymm5, mem(rcx)) //add(rdi, rcx) label(.DDONE) add(imm(4), r15) // jj += 4; cmp(imm(4), r15) // compare jj to 4 jle(.DLOOP3X4J) // if jj <= 4, jump to beginning // of jj loop; otherwise, loop ends. label(.DRETURN) end_asm( : // output operands (none) : // input operands [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rd_haswell_asm_1x8m ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { #if 0 bli_dgemmsup_r_haswell_ref ( conja, conjb, m0, n0, k0, alpha, a, rs_a0, cs_a0, b, rs_b0, cs_b0, beta, c, rs_c0, cs_c0, data, cntx ); return; #endif //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() //vzeroall() // zero all xmm/ymm registers. mov(var(a), r14) // load address of a. //mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a //lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rdx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b mov(var(c), r12) // load address of c //mov(var(rs_c), rdi) // load rs_c //lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) // r12 = rcx = c // r14 = rax = a // rdx = rbx = b // r9 = m dim index ii // r15 = n dim index jj mov(imm(0), r15) // jj = 0; label(.DLOOP3X4J) // LOOP OVER jj = [ 0 1 ... ] #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm13, ymm13, ymm13) #endif lea(mem( , r15, 1), rsi) // rsi = r15 = 4*jj; imul(imm(1*8), rsi) // rsi *= cs_c = 1*8 lea(mem(r12, rsi, 1), rcx) // rcx = c + 4*jj*cs_c; lea(mem( , r15, 1), rsi) // rsi = r15 = 4*jj; imul(r11, rsi) // rsi *= cs_b; lea(mem(rdx, rsi, 1), rbx) // rbx = b + 4*jj*cs_b; lea(mem(r14), rax) // rax = a; #if 0 prefetch(0, mem(rcx, 7*8)) // prefetch c + 0*rs_c #else prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c #endif mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) // ---------------------------------- iteration 1 vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) // ---------------------------------- iteration 2 vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) // ---------------------------------- iteration 3 vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rax ), xmm0) add(imm(1*8), rax) // a += 1*cs_b = 1*8; vmovsd(mem(rbx ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovsd(mem(rbx, r11, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovsd(mem(rbx, r11, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovsd(mem(rbx, r13, 1), xmm3) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vfmadd231pd(ymm0, ymm3, ymm13) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm7 ymm10 ymm13 vhaddpd( ymm7, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm7) vhaddpd( ymm13, ymm10, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) // xmm2[0] = sum(ymm10); xmm2[1] = sum(ymm13) vperm2f128(imm(0x20), ymm2, ymm0, ymm4 ) // ymm4 = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) //add(rdi, rcx) label(.DDONE) add(imm(4), r15) // jj += 4; cmp(imm(4), r15) // compare jj to 4 jle(.DLOOP3X4J) // if jj <= 4, jump to beginning // of jj loop; otherwise, loop ends. label(.DRETURN) end_asm( : // output operands (none) : // input operands [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rd_haswell_asm_6x4m ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { #if 0 bli_dgemmsup_r_haswell_ref ( conja, conjb, m0, n0, k0, alpha, a, rs_a0, cs_a0, b, rs_b0, cs_b0, beta, c, rs_c0, cs_c0, data, cntx ); return; #endif //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t m_iter = m0 / 3; uint64_t m_left = m0 % 3; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; if ( m_iter == 0 ) goto consider_edge_cases; // ------------------------------------------------------------------------- begin_asm() //vzeroall() // zero all xmm/ymm registers. mov(var(a), r14) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rdx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b lea(mem(r8, r8, 2), r10) // r10 = 3*rs_a mov(var(c), r12) // load address of c //mov(var(rs_c), rdi) // load rs_c //lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) // r12 = rcx = c // r14 = rax = a // rdx = rbx = b // r9 = m dim index ii // r15 = n dim index jj // r10 = unused mov(var(m_iter), r9) // ii = m_iter; label(.DLOOP3X4I) // LOOP OVER ii = [ m_iter .. 1 0 ] #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm6, ymm6, ymm6) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm8, ymm8, ymm8) vxorpd(ymm9, ymm9, ymm9) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm11, ymm11, ymm11) vxorpd(ymm12, ymm12, ymm12) vxorpd(ymm13, ymm13, ymm13) vxorpd(ymm14, ymm14, ymm14) vxorpd(ymm15, ymm15, ymm15) #endif lea(mem(r12), rcx) // rcx = c + 3*ii*rs_c; lea(mem(r14), rax) // rax = a + 3*ii*rs_a; lea(mem(rdx), rbx) // rbx = b; #if 0 mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 3*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 3*8)) // prefetch c + 2*rs_c #endif lea(mem(r8, r8, 4), rdi) // rdi = 5*rs_a mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 #if 1 prefetch(0, mem(rax, r10, 1, 0*8)) // prefetch rax + 3*cs_a prefetch(0, mem(rax, r8, 4, 0*8)) // prefetch rax + 4*cs_a prefetch(0, mem(rax, rdi, 1, 0*8)) // prefetch rax + 5*cs_a #endif vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 1 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 2 #if 1 prefetch(0, mem(rax, r10, 1, 0*8)) // prefetch rax + 3*cs_a prefetch(0, mem(rax, r8, 4, 0*8)) // prefetch rax + 4*cs_a prefetch(0, mem(rax, rdi, 1, 0*8)) // prefetch rax + 5*cs_a #endif vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 3 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rax ), xmm0) vmovsd(mem(rax, r8, 1), xmm1) vmovsd(mem(rax, r8, 2), xmm2) add(imm(1*8), rax) // a += 1*cs_b = 1*8; vmovsd(mem(rbx ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovsd(mem(rbx, r11, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovsd(mem(rbx, r11, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovsd(mem(rbx, r13, 1), xmm3) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm7 ymm10 ymm13 // ymm5 ymm8 ymm11 ymm14 // ymm6 ymm9 ymm12 ymm15 vhaddpd( ymm7, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm7) vhaddpd( ymm13, ymm10, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) // xmm2[0] = sum(ymm10); xmm2[1] = sum(ymm13) vperm2f128(imm(0x20), ymm2, ymm0, ymm4 ) vhaddpd( ymm8, ymm5, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm14, ymm11, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm5 ) vhaddpd( ymm9, ymm6, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm15, ymm12, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm6 ) // ymm4 = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13) // ymm5 = sum(ymm5) sum(ymm8) sum(ymm11) sum(ymm14) // ymm6 = sum(ymm6) sum(ymm9) sum(ymm12) sum(ymm15) mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) vmulpd(ymm0, ymm6, ymm6) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm5) vmovupd(ymm5, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm6) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vmovupd(ymm5, mem(rcx)) add(rdi, rcx) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) label(.DDONE) lea(mem(r12, rdi, 2), r12) // lea(mem(r12, rdi, 1), r12) // c_ii = r12 += 3*rs_c lea(mem(r14, r8, 2), r14) // lea(mem(r14, r8, 1), r14) // a_ii = r14 += 3*rs_a dec(r9) // ii -= 1; jne(.DLOOP3X4I) // iterate again if ii != 0. label(.DRETURN) end_asm( : // output operands (none) : // input operands [m_iter] "m" (m_iter), [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) consider_edge_cases: // Handle edge cases in the m dimension, if they exist. if ( m_left ) { const dim_t nr_cur = 4; const dim_t i_edge = m0 - ( dim_t )m_left; double* restrict cij = c + i_edge*rs_c; double* restrict bj = b; double* restrict ai = a + i_edge*rs_a; if ( 2 == m_left ) { const dim_t mr_cur = 2; bli_dgemmsup_rd_haswell_asm_2x4m ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); //cij += mr_cur*rs_c0; ai += mr_cur*rs_a0; m_left -= mr_cur; } if ( 1 == m_left ) { const dim_t mr_cur = 1; bli_dgemmsup_rd_haswell_asm_1x4m ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); } } } void bli_dgemmsup_rd_haswell_asm_3x4m ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { #if 0 bli_dgemmsup_r_haswell_ref ( conja, conjb, m0, n0, k0, alpha, a, rs_a0, cs_a0, b, rs_b0, cs_b0, beta, c, rs_c0, cs_c0, data, cntx ); return; #endif //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm6, ymm6, ymm6) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm8, ymm8, ymm8) vxorpd(ymm9, ymm9, ymm9) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm11, ymm11, ymm11) vxorpd(ymm12, ymm12, ymm12) vxorpd(ymm13, ymm13, ymm13) vxorpd(ymm14, ymm14, ymm14) vxorpd(ymm15, ymm15, ymm15) #endif mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b mov(var(c), rcx) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 3*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 3*8)) // prefetch c + 2*rs_c mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 1 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 2 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 3 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rax ), xmm0) vmovsd(mem(rax, r8, 1), xmm1) vmovsd(mem(rax, r8, 2), xmm2) add(imm(1*8), rax) // a += 1*cs_b = 1*8; vmovsd(mem(rbx ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovsd(mem(rbx, r11, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovsd(mem(rbx, r11, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovsd(mem(rbx, r13, 1), xmm3) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm7 ymm10 ymm13 // ymm5 ymm8 ymm11 ymm14 // ymm6 ymm9 ymm12 ymm15 vhaddpd( ymm7, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm7) vhaddpd( ymm13, ymm10, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) // xmm2[0] = sum(ymm10); xmm2[1] = sum(ymm13) vperm2f128(imm(0x20), ymm2, ymm0, ymm4 ) vhaddpd( ymm8, ymm5, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm14, ymm11, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm5 ) vhaddpd( ymm9, ymm6, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm15, ymm12, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm6 ) // ymm4 = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13) // ymm5 = sum(ymm5) sum(ymm8) sum(ymm11) sum(ymm14) // ymm6 = sum(ymm6) sum(ymm9) sum(ymm12) sum(ymm15) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) vmulpd(ymm0, ymm6, ymm6) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm5) vmovupd(ymm5, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm6) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vmovupd(ymm5, mem(rcx)) add(rdi, rcx) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rd_haswell_asm_2x4m ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { #if 0 bli_dgemmsup_r_haswell_ref ( conja, conjb, m0, n0, k0, alpha, a, rs_a0, cs_a0, b, rs_b0, cs_b0, beta, c, rs_c0, cs_c0, data, cntx ); return; #endif //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm8, ymm8, ymm8) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm11, ymm11, ymm11) vxorpd(ymm13, ymm13, ymm13) vxorpd(ymm14, ymm14, ymm14) #endif mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b mov(var(c), rcx) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 3*8)) // prefetch c + 1*rs_c mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) // ---------------------------------- iteration 1 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) // ---------------------------------- iteration 2 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) // ---------------------------------- iteration 3 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rax ), xmm0) vmovsd(mem(rax, r8, 1), xmm1) add(imm(1*8), rax) // a += 1*cs_b = 1*8; vmovsd(mem(rbx ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovsd(mem(rbx, r11, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovsd(mem(rbx, r11, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovsd(mem(rbx, r13, 1), xmm3) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm7 ymm10 ymm13 // ymm5 ymm8 ymm11 ymm14 vhaddpd( ymm7, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm7) vhaddpd( ymm13, ymm10, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) // xmm2[0] = sum(ymm10); xmm2[1] = sum(ymm13) vperm2f128(imm(0x20), ymm2, ymm0, ymm4 ) vhaddpd( ymm8, ymm5, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm14, ymm11, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm5 ) // ymm4 = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13) // ymm5 = sum(ymm5) sum(ymm8) sum(ymm11) sum(ymm14) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm5) vmovupd(ymm5, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vmovupd(ymm5, mem(rcx)) //add(rdi, rcx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rd_haswell_asm_1x4m ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { #if 0 bli_dgemmsup_r_haswell_ref ( conja, conjb, m0, n0, k0, alpha, a, rs_a0, cs_a0, b, rs_b0, cs_b0, beta, c, rs_c0, cs_c0, data, cntx ); return; #endif //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; /* rrc: -------- -- -- -- | | | | -------- -- -- -- ... | | | | -------- += -- -- -- | | | | -------- | | | | -------- : -------- : */ // ------------------------------------------------------------------------- begin_asm() #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm13, ymm13, ymm13) #endif mov(var(a), rax) // load address of a. //mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a //lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b mov(var(c), rcx) // load address of c //mov(var(rs_c), rdi) // load rs_c //lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) // ---------------------------------- iteration 1 vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) // ---------------------------------- iteration 2 vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) // ---------------------------------- iteration 3 vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rax ), xmm0) add(imm(1*8), rax) // a += 1*cs_b = 1*8; vmovsd(mem(rbx ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovsd(mem(rbx, r11, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovsd(mem(rbx, r11, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovsd(mem(rbx, r13, 1), xmm3) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vfmadd231pd(ymm0, ymm3, ymm13) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm7 ymm10 ymm13 vhaddpd( ymm7, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm7) vhaddpd( ymm13, ymm10, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) // xmm2[0] = sum(ymm10); xmm2[1] = sum(ymm13) vperm2f128(imm(0x20), ymm2, ymm0, ymm4 ) // ymm4 = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) //add(rdi, rcx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rd_haswell_asm_6x2m ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { #if 0 bli_dgemmsup_r_haswell_ref ( conja, conjb, m0, n0, k0, alpha, a, rs_a0, cs_a0, b, rs_b0, cs_b0, beta, c, rs_c0, cs_c0, data, cntx ); return; #endif //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t m_iter = m0 / 6; uint64_t m_left = m0 % 6; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; if ( m_iter == 0 ) goto consider_edge_cases; // ------------------------------------------------------------------------- begin_asm() //vzeroall() // zero all xmm/ymm registers. mov(var(a), r14) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rdx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) //lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b mov(var(c), r12) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) // r12 = rcx = c // r14 = rax = a // rdx = rbx = b // r9 = m dim index ii mov(var(m_iter), r9) // ii = m_iter; label(.DLOOP3X4I) // LOOP OVER ii = [ m_iter ... 1 0 ] #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm6, ymm6, ymm6) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm8, ymm8, ymm8) vxorpd(ymm9, ymm9, ymm9) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm11, ymm11, ymm11) vxorpd(ymm12, ymm12, ymm12) vxorpd(ymm13, ymm13, ymm13) vxorpd(ymm14, ymm14, ymm14) vxorpd(ymm15, ymm15, ymm15) #endif lea(mem(r12), rcx) // rcx = c + 6*ii*rs_c; lea(mem(r14), rax) // rax = a + 6*ii*rs_a; lea(mem(rdx), rbx) // rbx = b; lea(mem(rcx, rdi, 2), r10) // lea(mem(r10, rdi, 1), r10) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 3*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 3*8)) // prefetch c + 2*rs_c prefetch(0, mem(r10, 3*8)) // prefetch c + 3*rs_c prefetch(0, mem(r10, rdi, 1, 3*8)) // prefetch c + 4*rs_c prefetch(0, mem(r10, rdi, 2, 3*8)) // prefetch c + 5*rs_c mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) vmovupd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rax, r8, 4), ymm3) vfmadd231pd(ymm0, ymm3, ymm12) vfmadd231pd(ymm1, ymm3, ymm13) vmovupd(mem(rax, r15, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) // ---------------------------------- iteration 1 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) vmovupd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rax, r8, 4), ymm3) vfmadd231pd(ymm0, ymm3, ymm12) vfmadd231pd(ymm1, ymm3, ymm13) vmovupd(mem(rax, r15, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) // ---------------------------------- iteration 2 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) vmovupd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rax, r8, 4), ymm3) vfmadd231pd(ymm0, ymm3, ymm12) vfmadd231pd(ymm1, ymm3, ymm13) vmovupd(mem(rax, r15, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) // ---------------------------------- iteration 3 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) vmovupd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rax, r8, 4), ymm3) vfmadd231pd(ymm0, ymm3, ymm12) vfmadd231pd(ymm1, ymm3, ymm13) vmovupd(mem(rax, r15, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) vmovupd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rax, r8, 4), ymm3) vfmadd231pd(ymm0, ymm3, ymm12) vfmadd231pd(ymm1, ymm3, ymm13) vmovupd(mem(rax, r15, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rbx ), xmm0) vmovsd(mem(rbx, r11, 1), xmm1) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vmovsd(mem(rax ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovsd(mem(rax, r8, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovsd(mem(rax, r8, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) vmovsd(mem(rax, r13, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovsd(mem(rax, r8, 4), xmm3) vfmadd231pd(ymm0, ymm3, ymm12) vfmadd231pd(ymm1, ymm3, ymm13) vmovsd(mem(rax, r15, 1), xmm3) add(imm(1*8), rax) // a += 1*cs_a = 1*8; vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm5 // ymm6 ymm7 // ymm8 ymm9 // ymm10 ymm11 // ymm12 ymm13 // ymm14 ymm15 vhaddpd( ymm5, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm4 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm5) vhaddpd( ymm7, ymm6, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm6 ) vhaddpd( ymm9, ymm8, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm8 ) vhaddpd( ymm11, ymm10, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm10 ) vhaddpd( ymm13, ymm12, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm12 ) vhaddpd( ymm15, ymm14, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm14 ) // xmm4 = sum(ymm4) sum(ymm5) // xmm6 = sum(ymm6) sum(ymm7) // xmm8 = sum(ymm8) sum(ymm9) // xmm10 = sum(ymm10) sum(ymm11) // xmm12 = sum(ymm12) sum(ymm13) // xmm14 = sum(ymm14) sum(ymm15) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(xmm0, xmm4, xmm4) // scale by alpha vmulpd(xmm0, xmm6, xmm6) vmulpd(xmm0, xmm8, xmm8) vmulpd(xmm0, xmm10, xmm10) vmulpd(xmm0, xmm12, xmm12) vmulpd(xmm0, xmm14, xmm14) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), xmm3, xmm4) vmovupd(xmm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm6) vmovupd(xmm6, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm8) vmovupd(xmm8, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm10) vmovupd(xmm10, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm12) vmovupd(xmm12, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm14) vmovupd(xmm14, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(xmm4, mem(rcx)) add(rdi, rcx) vmovupd(xmm6, mem(rcx)) add(rdi, rcx) vmovupd(xmm8, mem(rcx)) add(rdi, rcx) vmovupd(xmm10, mem(rcx)) add(rdi, rcx) vmovupd(xmm12, mem(rcx)) add(rdi, rcx) vmovupd(xmm14, mem(rcx)) //add(rdi, rcx) label(.DDONE) lea(mem(r12, rdi, 4), r12) // lea(mem(r12, rdi, 2), r12) // c_ii = r12 += 6*rs_c lea(mem(r14, r8, 4), r14) // lea(mem(r14, r8, 2), r14) // a_ii = r14 += 6*rs_a dec(r9) // ii -= 1; jne(.DLOOP3X4I) // iterate again if ii != 0. label(.DRETURN) end_asm( : // output operands (none) : // input operands [m_iter] "m" (m_iter), [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) consider_edge_cases: // Handle edge cases in the m dimension, if they exist. if ( m_left ) { const dim_t nr_cur = 2; const dim_t i_edge = m0 - ( dim_t )m_left; double* restrict cij = c + i_edge*rs_c; double* restrict bj = b; double* restrict ai = a + i_edge*rs_a; if ( 3 <= m_left ) { const dim_t mr_cur = 3; bli_dgemmsup_rd_haswell_asm_3x2m ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += mr_cur*rs_c0; ai += mr_cur*rs_a0; m_left -= mr_cur; } if ( 2 <= m_left ) { const dim_t mr_cur = 2; bli_dgemmsup_rd_haswell_asm_2x2m ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += mr_cur*rs_c0; ai += mr_cur*rs_a0; m_left -= mr_cur; } if ( 1 == m_left ) { const dim_t mr_cur = 1; bli_dgemmsup_rd_haswell_asm_1x2m ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); } } } void bli_dgemmsup_rd_haswell_asm_3x2m ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { #if 0 bli_dgemmsup_r_haswell_ref ( conja, conjb, m0, n0, k0, alpha, a, rs_a0, cs_a0, b, rs_b0, cs_b0, beta, c, rs_c0, cs_c0, data, cntx ); return; #endif //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm6, ymm6, ymm6) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm8, ymm8, ymm8) vxorpd(ymm9, ymm9, ymm9) #endif mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) //lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b // initialize loop by pre-loading // a column of a. mov(var(c), rcx) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) prefetch(0, mem(rcx, 1*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 1*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 1*8)) // prefetch c + 2*rs_c mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) // ---------------------------------- iteration 1 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) // ---------------------------------- iteration 2 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) // ---------------------------------- iteration 3 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rbx ), xmm0) vmovsd(mem(rbx, r11, 1), xmm1) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vmovsd(mem(rax ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovsd(mem(rax, r8, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovsd(mem(rax, r8, 2), xmm3) add(imm(1*8), rax) // a += 1*cs_a = 1*8; vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm5 // ymm6 ymm7 // ymm8 ymm9 vhaddpd( ymm5, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm4 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm5) vhaddpd( ymm7, ymm6, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm6 ) vhaddpd( ymm9, ymm8, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm8 ) // xmm4 = sum(ymm4) sum(ymm5) // xmm6 = sum(ymm6) sum(ymm7) // xmm8 = sum(ymm8) sum(ymm9) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(xmm0, xmm4, xmm4) // scale by alpha vmulpd(xmm0, xmm6, xmm6) vmulpd(xmm0, xmm8, xmm8) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), xmm3, xmm4) vmovupd(xmm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm6) vmovupd(xmm6, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm8) vmovupd(xmm8, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(xmm4, mem(rcx)) add(rdi, rcx) vmovupd(xmm6, mem(rcx)) add(rdi, rcx) vmovupd(xmm8, mem(rcx)) //add(rdi, rcx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rd_haswell_asm_2x2m ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { #if 0 bli_dgemmsup_r_haswell_ref ( conja, conjb, m0, n0, k0, alpha, a, rs_a0, cs_a0, b, rs_b0, cs_b0, beta, c, rs_c0, cs_c0, data, cntx ); return; #endif //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; /* rrc: -------- -- -- -- | | -------- -- -- -- ... | | -------- += -- -- -- | | -------- -- -- -- | | -------- -- -- -- : -------- -- -- -- : */ // ------------------------------------------------------------------------- begin_asm() #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm6, ymm6, ymm6) vxorpd(ymm7, ymm7, ymm7) #endif mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) //lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b // initialize loop by pre-loading // a column of a. mov(var(c), rcx) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) //lea(mem(rcx, rdi, 2), rdx) // //lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 1*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 1*8)) // prefetch c + 1*rs_c mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) // ---------------------------------- iteration 1 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) // ---------------------------------- iteration 2 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) // ---------------------------------- iteration 3 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rbx ), xmm0) vmovsd(mem(rbx, r11, 1), xmm1) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vmovsd(mem(rax ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovsd(mem(rax, r8, 1), xmm3) add(imm(1*8), rax) // a += 1*cs_a = 1*8; vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm5 // ymm6 ymm7 vhaddpd( ymm5, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm4 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm5) vhaddpd( ymm7, ymm6, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm6 ) // xmm4 = sum(ymm4) sum(ymm5) // xmm6 = sum(ymm6) sum(ymm7) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(xmm0, xmm4, xmm4) // scale by alpha vmulpd(xmm0, xmm6, xmm6) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), xmm3, xmm4) vmovupd(xmm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm6) vmovupd(xmm6, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(xmm4, mem(rcx)) add(rdi, rcx) vmovupd(xmm6, mem(rcx)) //add(rdi, rcx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rd_haswell_asm_1x2m ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { #if 0 bli_dgemmsup_r_haswell_ref ( conja, conjb, m0, n0, k0, alpha, a, rs_a0, cs_a0, b, rs_b0, cs_b0, beta, c, rs_c0, cs_c0, data, cntx ); return; #endif //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; /* rrc: -------- -- -- -- | | -------- -- -- -- ... | | -------- += -- -- -- | | -------- -- -- -- | | -------- -- -- -- : -------- -- -- -- : */ // ------------------------------------------------------------------------- begin_asm() #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) #endif mov(var(a), rax) // load address of a. //mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a //lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) //lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b // initialize loop by pre-loading // a column of a. mov(var(c), rcx) // load address of c //mov(var(rs_c), rdi) // load rs_c //lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) //lea(mem(rcx, rdi, 2), rdx) // //lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 1*8)) // prefetch c + 0*rs_c mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) // ---------------------------------- iteration 1 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) // ---------------------------------- iteration 2 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) // ---------------------------------- iteration 3 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rbx ), xmm0) vmovsd(mem(rbx, r11, 1), xmm1) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vmovsd(mem(rax ), xmm3) add(imm(1*8), rax) // a += 1*cs_a = 1*8; vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm5 vhaddpd( ymm5, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm4 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm5) // xmm4 = sum(ymm4) sum(ymm5) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(xmm0, xmm4, xmm4) // scale by alpha //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), xmm3, xmm4) vmovupd(xmm4, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(xmm4, mem(rcx)) //add(rdi, rcx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } blis-0.6.1/kernels/haswell/3/sup/old/bli_gemmsup_rd_haswell_asm_d6x8n.c000066400000000000000000004622051360743507500260650ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define BLIS_ASM_SYNTAX_ATT #include "bli_x86_asm_macros.h" /* rrc: -------- ------ | | | | | | | | -------- ------ | | | | | | | | -------- += ------ ... | | | | | | | | -------- ------ | | | | | | | | -------- ------ : -------- ------ : Assumptions: - C is row-stored and B is column-stored; - A is row-stored; - m0 and n0 are at most MR and NR, respectively. Therefore, this (r)ow-preferential microkernel is well-suited for a dot-product-based accumulation that performs vector loads from both A and B. */ // Prototype reference microkernels. GEMMSUP_KER_PROT( float, s, gemmsup_r_haswell_ref ) GEMMSUP_KER_PROT( double, d, gemmsup_r_haswell_ref ) GEMMSUP_KER_PROT( scomplex, c, gemmsup_r_haswell_ref ) GEMMSUP_KER_PROT( dcomplex, z, gemmsup_r_haswell_ref ) // Define parameters and variables for edge case kernel map. #define NUM_MR 4 #define NUM_NR 4 #define FUNCPTR_T dgemmsup_ker_ft #if 0 static dim_t mrs[NUM_MR] = { 6, 3, 2, 1 }; static dim_t nrs[NUM_NR] = { 8, 4, 2, 1 }; static FUNCPTR_T kmap[NUM_MR][NUM_NR] = { /* 8 4 2 1 */ /* 6 */ { bli_dgemmsup_rd_haswell_asm_6x8n, bli_dgemmsup_rd_haswell_asm_6x4n, bli_dgemmsup_rd_haswell_asm_6x2n, bli_dgemmsup_r_haswell_ref }, /* 3 */ { bli_dgemmsup_rd_haswell_asm_3x8n, bli_dgemmsup_rd_haswell_asm_3x4n, bli_dgemmsup_rd_haswell_asm_3x2n, bli_dgemmsup_r_haswell_ref }, /* 2 */ { bli_dgemmsup_rd_haswell_asm_2x8n, bli_dgemmsup_rd_haswell_asm_2x4n, bli_dgemmsup_rd_haswell_asm_2x2n, bli_dgemmsup_r_haswell_ref }, /* 1 */ { bli_dgemmsup_rd_haswell_asm_1x8n, bli_dgemmsup_rd_haswell_asm_1x4n, bli_dgemmsup_rd_haswell_asm_1x2n, bli_dgemmsup_r_haswell_ref } }; #endif void bli_dgemmsup_rd_haswell_asm_6x8n ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { #if 0 bli_dgemmsup_r_haswell_ref ( conja, conjb, m0, n0, k0, alpha, a, rs_a0, cs_a0, b, rs_b0, cs_b0, beta, c, rs_c0, cs_c0, data, cntx ); return; #endif uint64_t m_left = m0 % 6; // First check whether this is a edge case in the n dimension. If so, // dispatch other ?x8m kernels, as needed. if ( m_left ) { double* restrict cij = c; double* restrict bj = b; double* restrict ai = a; if ( 3 <= m_left ) { const dim_t mr_cur = 3; bli_dgemmsup_rd_haswell_asm_3x8n ( conja, conjb, mr_cur, n0, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += mr_cur*rs_c0; ai += mr_cur*rs_a0; m_left -= mr_cur; } if ( 2 <= m_left ) { const dim_t mr_cur = 2; bli_dgemmsup_rd_haswell_asm_2x8n ( conja, conjb, mr_cur, n0, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += mr_cur*rs_c0; ai += mr_cur*rs_a0; m_left -= mr_cur; } if ( 1 == m_left ) { #if 0 const dim_t mr_cur = 1; //bli_dgemmsup_r_haswell_ref bli_dgemmsup_rd_haswell_asm_1x8n ( conja, conjb, mr_cur, n0, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); #else bli_dgemv_ex ( BLIS_TRANSPOSE, conja, k0, n0, alpha, bj, rs_b0, cs_b0, ai, cs_a0, beta, cij, cs_c0, cntx, NULL ); #endif } return; } //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t n_iter = n0 / 4; uint64_t n_left = n0 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; if ( n_iter == 0 ) goto consider_edge_cases; // ------------------------------------------------------------------------- begin_asm() //vzeroall() // zero all xmm/ymm registers. mov(var(a), rdx) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a //mov(var(b), r14) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b //mov(var(c), r12) // load address of c //mov(var(rs_c), rdi) // load rs_c //lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) // r12 = rcx = c // rdx = rax = a // r14 = rbx = b // r9 = m dim index ii // r15 = n dim index jj mov(imm(0), r9) // ii = 0; label(.DLOOP3X4I) // LOOP OVER ii = [ 0 1 ... ] mov(var(b), r14) // load address of b mov(var(c), r12) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) lea(mem( , r9, 1), rsi) // rsi = r9 = 3*ii; imul(rdi, rsi) // rsi *= rs_c lea(mem(r12, rsi, 1), r12) // r12 = c + 3*ii*rs_c; lea(mem( , r9, 1), rsi) // rsi = r9 = 3*ii; imul(r8, rsi) // rsi *= rs_a; lea(mem(rdx, rsi, 1), rdx) // rax = a + 3*ii*rs_a; mov(var(n_iter), r15) // jj = n_iter; label(.DLOOP3X4J) // LOOP OVER jj = [ n_iter ... 1 0 ] #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm6, ymm6, ymm6) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm8, ymm8, ymm8) vxorpd(ymm9, ymm9, ymm9) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm11, ymm11, ymm11) vxorpd(ymm12, ymm12, ymm12) vxorpd(ymm13, ymm13, ymm13) vxorpd(ymm14, ymm14, ymm14) vxorpd(ymm15, ymm15, ymm15) #endif lea(mem(r12), rcx) // rcx = c_iijj; lea(mem(rdx), rax) // rax = a_ii; lea(mem(r14), rbx) // rbx = b_jj; #if 1 mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 3*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 3*8)) // prefetch c + 2*rs_c #endif lea(mem(r11, r11, 2), rdi) // rdi = 3*cs_b lea(mem(rbx, r11, 4), r10) // r10 = rbx + 4*cs_b mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 #if 0 prefetch(0, mem(r10, 0*8)) // prefetch rbx + 4*cs_b prefetch(0, mem(r10, r11, 1, 0*8)) // prefetch rbx + 5*cs_b prefetch(0, mem(r10, r11, 2, 0*8)) // prefetch rbx + 6*cs_b prefetch(0, mem(r10, r13, 1, 0*8)) // prefetch rbx + 7*cs_b add(imm(8*8), r10) // r10 += 8*rs_b = 8*8; #else prefetch(0, mem(r10, 0*8)) // prefetch rbx + 4*cs_b prefetch(0, mem(r10, r11, 1, 0*8)) // prefetch rbx + 5*cs_b #endif vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 1 #if 1 prefetch(0, mem(r10, r11, 2, 0*8)) // prefetch rbx + 6*cs_b prefetch(0, mem(r10, r13, 1, 0*8)) // prefetch rbx + 7*cs_b #endif vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 2 #if 1 prefetch(0, mem(r10, 8*8)) // prefetch rbx + 4*cs_b + 8*rs_b prefetch(0, mem(r10, r11, 1, 8*8)) // prefetch rbx + 5*cs_b + 8*rs_b #endif vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 3 #if 1 prefetch(0, mem(r10, r11, 2, 8*8)) // prefetch rbx + 6*cs_b + 8*rs_b prefetch(0, mem(r10, r13, 1, 8*8)) // prefetch rbx + 7*cs_b + 8*rs_b add(imm(16*8), r10) // r10 += 8*rs_b = 8*8; #endif vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rax ), xmm0) vmovsd(mem(rax, r8, 1), xmm1) vmovsd(mem(rax, r8, 2), xmm2) add(imm(1*8), rax) // a += 1*cs_b = 1*8; vmovsd(mem(rbx ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovsd(mem(rbx, r11, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovsd(mem(rbx, r11, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovsd(mem(rbx, r13, 1), xmm3) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm7 ymm10 ymm13 // ymm5 ymm8 ymm11 ymm14 // ymm6 ymm9 ymm12 ymm15 vhaddpd( ymm7, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm7) vhaddpd( ymm13, ymm10, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) // xmm2[0] = sum(ymm10); xmm2[1] = sum(ymm13) vperm2f128(imm(0x20), ymm2, ymm0, ymm4 ) vhaddpd( ymm8, ymm5, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm14, ymm11, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm5 ) vhaddpd( ymm9, ymm6, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm15, ymm12, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm6 ) // ymm4 = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13) // ymm5 = sum(ymm5) sum(ymm8) sum(ymm11) sum(ymm14) // ymm6 = sum(ymm6) sum(ymm9) sum(ymm12) sum(ymm15) mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) vmulpd(ymm0, ymm6, ymm6) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm5) vmovupd(ymm5, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm6) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vmovupd(ymm5, mem(rcx)) add(rdi, rcx) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) label(.DDONE) add(imm(4*8), r12) // c_jj = r12 += 4*cs_c lea(mem(r14, r11, 4), r14) // b_jj = r14 += 4*cs_b dec(r15) // jj -= 1; jne(.DLOOP3X4J) // iterate again if jj != 0. add(imm(3), r9) // ii += 3; cmp(imm(3), r9) // compare ii to 3 jle(.DLOOP3X4I) // if ii <= 3, jump to beginning // of ii loop; otherwise, loop ends. label(.DRETURN) end_asm( : // output operands (none) : // input operands [n_iter] "m" (n_iter), [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) consider_edge_cases: // Handle edge cases in the m dimension, if they exist. if ( n_left ) { const dim_t mr_cur = 6; const dim_t j_edge = n0 - ( dim_t )n_left; double* restrict cij = c + j_edge*cs_c; double* restrict ai = a; double* restrict bj = b + j_edge*cs_b; if ( 2 <= n_left ) { const dim_t nr_cur = 2; bli_dgemmsup_rd_haswell_asm_6x2n ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += nr_cur*cs_c0; bj += nr_cur*cs_b0; n_left -= nr_cur; } if ( 1 == n_left ) { #if 0 const dim_t nr_cur = 1; //bli_dgemmsup_rd_haswell_asm_6x1n bli_dgemmsup_r_haswell_ref ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); #else bli_dgemv_ex ( BLIS_NO_TRANSPOSE, conjb, mr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, beta, cij, rs_c0, cntx, NULL ); #endif } } } void bli_dgemmsup_rd_haswell_asm_3x8n ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { #if 0 bli_dgemmsup_r_haswell_ref ( conja, conjb, m0, n0, k0, alpha, a, rs_a0, cs_a0, b, rs_b0, cs_b0, beta, c, rs_c0, cs_c0, data, cntx ); return; #endif //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t n_iter = n0 / 4; uint64_t n_left = n0 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; if ( n_iter == 0 ) goto consider_edge_cases; // ------------------------------------------------------------------------- begin_asm() //vzeroall() // zero all xmm/ymm registers. mov(var(a), rdx) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), r14) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b mov(var(c), r12) // load address of c //mov(var(rs_c), rdi) // load rs_c //lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) // r12 = rcx = c // rdx = rax = a // r14 = rbx = b // r9 = unused // r15 = n dim index jj mov(var(n_iter), r15) // jj = n_iter; label(.DLOOP3X4J) // LOOP OVER jj = [ n_iter ... 1 0 ] #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm6, ymm6, ymm6) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm8, ymm8, ymm8) vxorpd(ymm9, ymm9, ymm9) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm11, ymm11, ymm11) vxorpd(ymm12, ymm12, ymm12) vxorpd(ymm13, ymm13, ymm13) vxorpd(ymm14, ymm14, ymm14) vxorpd(ymm15, ymm15, ymm15) #endif lea(mem(r12), rcx) // rcx = c_iijj; lea(mem(rdx), rax) // rax = a_ii; lea(mem(r14), rbx) // rbx = b_jj; #if 1 mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 3*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 3*8)) // prefetch c + 2*rs_c #endif lea(mem(r11, r11, 2), rdi) // rdi = 3*cs_b lea(mem(rbx, r11, 4), r10) // r10 = rbx + 4*cs_b mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 #if 0 prefetch(0, mem(r10, 0*8)) // prefetch rbx + 4*cs_b prefetch(0, mem(r10, r11, 1, 0*8)) // prefetch rbx + 5*cs_b prefetch(0, mem(r10, r11, 2, 0*8)) // prefetch rbx + 6*cs_b prefetch(0, mem(r10, r13, 1, 0*8)) // prefetch rbx + 7*cs_b add(imm(8*8), r10) // r10 += 8*rs_b = 8*8; #else prefetch(0, mem(r10, 0*8)) // prefetch rbx + 4*cs_b prefetch(0, mem(r10, r11, 1, 0*8)) // prefetch rbx + 5*cs_b #endif vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 1 #if 1 prefetch(0, mem(r10, r11, 2, 0*8)) // prefetch rbx + 6*cs_b prefetch(0, mem(r10, r13, 1, 0*8)) // prefetch rbx + 7*cs_b #endif vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 2 #if 1 prefetch(0, mem(r10, 8*8)) // prefetch rbx + 4*cs_b + 8*rs_b prefetch(0, mem(r10, r11, 1, 8*8)) // prefetch rbx + 5*cs_b + 8*rs_b #endif vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 3 #if 1 prefetch(0, mem(r10, r11, 2, 8*8)) // prefetch rbx + 6*cs_b + 8*rs_b prefetch(0, mem(r10, r13, 1, 8*8)) // prefetch rbx + 7*cs_b + 8*rs_b add(imm(16*8), r10) // r10 += 8*rs_b = 8*8; #endif vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rax ), xmm0) vmovsd(mem(rax, r8, 1), xmm1) vmovsd(mem(rax, r8, 2), xmm2) add(imm(1*8), rax) // a += 1*cs_b = 1*8; vmovsd(mem(rbx ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovsd(mem(rbx, r11, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovsd(mem(rbx, r11, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovsd(mem(rbx, r13, 1), xmm3) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm7 ymm10 ymm13 // ymm5 ymm8 ymm11 ymm14 // ymm6 ymm9 ymm12 ymm15 vhaddpd( ymm7, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm7) vhaddpd( ymm13, ymm10, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) // xmm2[0] = sum(ymm10); xmm2[1] = sum(ymm13) vperm2f128(imm(0x20), ymm2, ymm0, ymm4 ) vhaddpd( ymm8, ymm5, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm14, ymm11, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm5 ) vhaddpd( ymm9, ymm6, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm15, ymm12, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm6 ) // ymm4 = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13) // ymm5 = sum(ymm5) sum(ymm8) sum(ymm11) sum(ymm14) // ymm6 = sum(ymm6) sum(ymm9) sum(ymm12) sum(ymm15) mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) vmulpd(ymm0, ymm6, ymm6) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm5) vmovupd(ymm5, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm6) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vmovupd(ymm5, mem(rcx)) add(rdi, rcx) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) label(.DDONE) add(imm(4*8), r12) // c_jj = r12 += 4*cs_c lea(mem(r14, r11, 4), r14) // b_jj = r14 += 4*cs_b dec(r15) // jj -= 1; jne(.DLOOP3X4J) // iterate again if jj != 0. label(.DRETURN) end_asm( : // output operands (none) : // input operands [n_iter] "m" (n_iter), [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) consider_edge_cases: // Handle edge cases in the m dimension, if they exist. if ( n_left ) { const dim_t mr_cur = 3; const dim_t j_edge = n0 - ( dim_t )n_left; double* restrict cij = c + j_edge*cs_c; double* restrict ai = a; double* restrict bj = b + j_edge*cs_b; if ( 2 <= n_left ) { const dim_t nr_cur = 2; bli_dgemmsup_rd_haswell_asm_3x2n ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += nr_cur*cs_c0; bj += nr_cur*cs_b0; n_left -= nr_cur; } if ( 1 == n_left ) { #if 0 const dim_t nr_cur = 1; //bli_dgemmsup_rd_haswell_asm_3x1n bli_dgemmsup_r_haswell_ref ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); #else bli_dgemv_ex ( BLIS_NO_TRANSPOSE, conjb, mr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, beta, cij, rs_c0, cntx, NULL ); #endif } } } void bli_dgemmsup_rd_haswell_asm_2x8n ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { #if 0 bli_dgemmsup_r_haswell_ref ( conja, conjb, m0, n0, k0, alpha, a, rs_a0, cs_a0, b, rs_b0, cs_b0, beta, c, rs_c0, cs_c0, data, cntx ); return; #endif //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t n_iter = n0 / 4; uint64_t n_left = n0 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; if ( n_iter == 0 ) goto consider_edge_cases; // ------------------------------------------------------------------------- begin_asm() //vzeroall() // zero all xmm/ymm registers. mov(var(a), rdx) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), r14) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b mov(var(c), r12) // load address of c //mov(var(rs_c), rdi) // load rs_c //lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) // r12 = rcx = c // rdx = rax = a // r14 = rbx = b // r9 = unused // r15 = n dim index jj mov(var(n_iter), r15) // jj = n_iter; label(.DLOOP3X4J) // LOOP OVER jj = [ n_iter ... 1 0 ] #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm8, ymm8, ymm8) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm11, ymm11, ymm11) vxorpd(ymm13, ymm13, ymm13) vxorpd(ymm14, ymm14, ymm14) #endif lea(mem(r12), rcx) // rcx = c_iijj; lea(mem(rdx), rax) // rax = a_ii; lea(mem(r14), rbx) // rbx = b_jj; #if 1 mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 3*8)) // prefetch c + 1*rs_c #endif lea(mem(r11, r11, 2), rdi) // rdi = 3*cs_b lea(mem(rbx, r11, 4), r10) // r10 = rbx + 4*cs_b mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 #if 0 prefetch(0, mem(r10, 0*8)) // prefetch rbx + 4*cs_b prefetch(0, mem(r10, r11, 1, 0*8)) // prefetch rbx + 5*cs_b prefetch(0, mem(r10, r11, 2, 0*8)) // prefetch rbx + 6*cs_b prefetch(0, mem(r10, r13, 1, 0*8)) // prefetch rbx + 7*cs_b add(imm(8*8), r10) // r10 += 8*rs_b = 8*8; #else prefetch(0, mem(r10, 0*8)) // prefetch rbx + 4*cs_b prefetch(0, mem(r10, r11, 1, 0*8)) // prefetch rbx + 5*cs_b #endif vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) // ---------------------------------- iteration 1 #if 1 prefetch(0, mem(r10, r11, 2, 0*8)) // prefetch rbx + 6*cs_b prefetch(0, mem(r10, r13, 1, 0*8)) // prefetch rbx + 7*cs_b #endif vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) // ---------------------------------- iteration 2 #if 1 prefetch(0, mem(r10, 8*8)) // prefetch rbx + 4*cs_b + 8*rs_b prefetch(0, mem(r10, r11, 1, 8*8)) // prefetch rbx + 5*cs_b + 8*rs_b #endif vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) // ---------------------------------- iteration 3 #if 1 prefetch(0, mem(r10, r11, 2, 8*8)) // prefetch rbx + 6*cs_b + 8*rs_b prefetch(0, mem(r10, r13, 1, 8*8)) // prefetch rbx + 7*cs_b + 8*rs_b add(imm(16*8), r10) // r10 += 8*rs_b = 8*8; #endif vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rax ), xmm0) vmovsd(mem(rax, r8, 1), xmm1) add(imm(1*8), rax) // a += 1*cs_b = 1*8; vmovsd(mem(rbx ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovsd(mem(rbx, r11, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovsd(mem(rbx, r11, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovsd(mem(rbx, r13, 1), xmm3) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm7 ymm10 ymm13 // ymm5 ymm8 ymm11 ymm14 // ymm6 ymm9 ymm12 ymm15 vhaddpd( ymm7, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm7) vhaddpd( ymm13, ymm10, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) // xmm2[0] = sum(ymm10); xmm2[1] = sum(ymm13) vperm2f128(imm(0x20), ymm2, ymm0, ymm4 ) vhaddpd( ymm8, ymm5, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm14, ymm11, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm5 ) // ymm4 = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13) // ymm5 = sum(ymm5) sum(ymm8) sum(ymm11) sum(ymm14) mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) vmulpd(ymm0, ymm6, ymm6) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm5) vmovupd(ymm5, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vmovupd(ymm5, mem(rcx)) //add(rdi, rcx) label(.DDONE) add(imm(4*8), r12) // c_jj = r12 += 4*cs_c lea(mem(r14, r11, 4), r14) // b_jj = r14 += 4*cs_b dec(r15) // jj -= 1; jne(.DLOOP3X4J) // iterate again if jj != 0. label(.DRETURN) end_asm( : // output operands (none) : // input operands [n_iter] "m" (n_iter), [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) consider_edge_cases: // Handle edge cases in the m dimension, if they exist. if ( n_left ) { const dim_t mr_cur = 2; const dim_t j_edge = n0 - ( dim_t )n_left; double* restrict cij = c + j_edge*cs_c; double* restrict ai = a; double* restrict bj = b + j_edge*cs_b; if ( 2 <= n_left ) { const dim_t nr_cur = 2; bli_dgemmsup_rd_haswell_asm_2x2n ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += nr_cur*cs_c0; bj += nr_cur*cs_b0; n_left -= nr_cur; } if ( 1 == n_left ) { #if 0 const dim_t nr_cur = 1; //bli_dgemmsup_rd_haswell_asm_2x1n bli_dgemmsup_r_haswell_ref ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); #else bli_dgemv_ex ( BLIS_NO_TRANSPOSE, conjb, mr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, beta, cij, rs_c0, cntx, NULL ); #endif } } } void bli_dgemmsup_rd_haswell_asm_1x8n ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { #if 0 bli_dgemmsup_r_haswell_ref ( conja, conjb, m0, n0, k0, alpha, a, rs_a0, cs_a0, b, rs_b0, cs_b0, beta, c, rs_c0, cs_c0, data, cntx ); return; #endif //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t n_iter = n0 / 4; uint64_t n_left = n0 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; if ( n_iter == 0 ) goto consider_edge_cases; // ------------------------------------------------------------------------- begin_asm() //vzeroall() // zero all xmm/ymm registers. mov(var(a), rdx) // load address of a. //mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a //lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), r14) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b mov(var(c), r12) // load address of c //mov(var(rs_c), rdi) // load rs_c //lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) // r12 = rcx = c // rdx = rax = a // r14 = rbx = b // r9 = unused // r15 = n dim index jj mov(var(n_iter), r15) // jj = n_iter; label(.DLOOP3X4J) // LOOP OVER jj = [ n_iter ... 1 0 ] #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm13, ymm13, ymm13) #endif lea(mem(r12), rcx) // rcx = c_iijj; lea(mem(rdx), rax) // rax = a_ii; lea(mem(r14), rbx) // rbx = b_jj; #if 1 mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 3*8)) // prefetch c + 1*rs_c #endif lea(mem(r11, r11, 2), rdi) // rdi = 3*cs_b lea(mem(rbx, r11, 4), r10) // r10 = rbx + 4*cs_b mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 #if 0 prefetch(0, mem(r10, 0*8)) // prefetch rbx + 4*cs_b prefetch(0, mem(r10, r11, 1, 0*8)) // prefetch rbx + 5*cs_b prefetch(0, mem(r10, r11, 2, 0*8)) // prefetch rbx + 6*cs_b prefetch(0, mem(r10, r13, 1, 0*8)) // prefetch rbx + 7*cs_b add(imm(8*8), r10) // r10 += 8*rs_b = 8*8; #else prefetch(0, mem(r10, 0*8)) // prefetch rbx + 4*cs_b prefetch(0, mem(r10, r11, 1, 0*8)) // prefetch rbx + 5*cs_b #endif vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) // ---------------------------------- iteration 1 #if 1 prefetch(0, mem(r10, r11, 2, 0*8)) // prefetch rbx + 6*cs_b prefetch(0, mem(r10, r13, 1, 0*8)) // prefetch rbx + 7*cs_b #endif vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) // ---------------------------------- iteration 2 #if 1 prefetch(0, mem(r10, 8*8)) // prefetch rbx + 4*cs_b + 8*rs_b prefetch(0, mem(r10, r11, 1, 8*8)) // prefetch rbx + 5*cs_b + 8*rs_b #endif vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) // ---------------------------------- iteration 3 #if 1 prefetch(0, mem(r10, r11, 2, 8*8)) // prefetch rbx + 6*cs_b + 8*rs_b prefetch(0, mem(r10, r13, 1, 8*8)) // prefetch rbx + 7*cs_b + 8*rs_b add(imm(16*8), r10) // r10 += 8*rs_b = 8*8; #endif vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rax ), xmm0) add(imm(1*8), rax) // a += 1*cs_b = 1*8; vmovsd(mem(rbx ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovsd(mem(rbx, r11, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovsd(mem(rbx, r11, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovsd(mem(rbx, r13, 1), xmm3) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vfmadd231pd(ymm0, ymm3, ymm13) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm7 ymm10 ymm13 // ymm5 ymm8 ymm11 ymm14 // ymm6 ymm9 ymm12 ymm15 vhaddpd( ymm7, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm7) vhaddpd( ymm13, ymm10, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) // xmm2[0] = sum(ymm10); xmm2[1] = sum(ymm13) vperm2f128(imm(0x20), ymm2, ymm0, ymm4 ) // ymm4 = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13) mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) //add(rdi, rcx) label(.DDONE) add(imm(4*8), r12) // c_jj = r12 += 4*cs_c lea(mem(r14, r11, 4), r14) // b_jj = r14 += 4*cs_b dec(r15) // jj -= 1; jne(.DLOOP3X4J) // iterate again if jj != 0. label(.DRETURN) end_asm( : // output operands (none) : // input operands [n_iter] "m" (n_iter), [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) consider_edge_cases: // Handle edge cases in the m dimension, if they exist. if ( n_left ) { const dim_t mr_cur = 1; const dim_t j_edge = n0 - ( dim_t )n_left; double* restrict cij = c + j_edge*cs_c; double* restrict ai = a; double* restrict bj = b + j_edge*cs_b; if ( 2 <= n_left ) { const dim_t nr_cur = 2; bli_dgemmsup_rd_haswell_asm_1x2n ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); cij += nr_cur*cs_c0; bj += nr_cur*cs_b0; n_left -= nr_cur; } if ( 1 == n_left ) { #if 0 const dim_t nr_cur = 1; //bli_dgemmsup_rd_haswell_asm_1x1n bli_dgemmsup_r_haswell_ref ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); #else bli_ddotxv_ex ( conja, conjb, k0, alpha, ai, cs_a0, bj, rs_b0, beta, cij, cntx, NULL ); #endif } } } void bli_dgemmsup_rd_haswell_asm_6x4n ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { #if 0 bli_dgemmsup_r_haswell_ref ( conja, conjb, m0, n0, k0, alpha, a, rs_a0, cs_a0, b, rs_b0, cs_b0, beta, c, rs_c0, cs_c0, data, cntx ); return; #endif //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t m_iter = m0 / 3; uint64_t m_left = m0 % 3; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; if ( m_iter == 0 ) goto consider_edge_cases; // ------------------------------------------------------------------------- begin_asm() //vzeroall() // zero all xmm/ymm registers. mov(var(a), r14) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rdx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b lea(mem(r8, r8, 2), r10) // r10 = 3*rs_a mov(var(c), r12) // load address of c //mov(var(rs_c), rdi) // load rs_c //lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) // r12 = rcx = c // r14 = rax = a // rdx = rbx = b // r9 = m dim index ii // r15 = n dim index jj // r10 = unused mov(var(m_iter), r9) // ii = m_iter; label(.DLOOP3X4I) // LOOP OVER ii = [ m_iter .. 1 0 ] #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm6, ymm6, ymm6) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm8, ymm8, ymm8) vxorpd(ymm9, ymm9, ymm9) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm11, ymm11, ymm11) vxorpd(ymm12, ymm12, ymm12) vxorpd(ymm13, ymm13, ymm13) vxorpd(ymm14, ymm14, ymm14) vxorpd(ymm15, ymm15, ymm15) #endif lea(mem(r12), rcx) // rcx = c + 3*ii*rs_c; lea(mem(r14), rax) // rax = a + 3*ii*rs_a; lea(mem(rdx), rbx) // rbx = b; #if 0 mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 3*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 3*8)) // prefetch c + 2*rs_c #endif lea(mem(r8, r8, 4), rdi) // rdi = 5*rs_a mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 #if 1 prefetch(0, mem(rax, r10, 1, 0*8)) // prefetch rax + 3*cs_a prefetch(0, mem(rax, r8, 4, 0*8)) // prefetch rax + 4*cs_a prefetch(0, mem(rax, rdi, 1, 0*8)) // prefetch rax + 5*cs_a #endif vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 1 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 2 #if 1 prefetch(0, mem(rax, r10, 1, 0*8)) // prefetch rax + 3*cs_a prefetch(0, mem(rax, r8, 4, 0*8)) // prefetch rax + 4*cs_a prefetch(0, mem(rax, rdi, 1, 0*8)) // prefetch rax + 5*cs_a #endif vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 3 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rax ), xmm0) vmovsd(mem(rax, r8, 1), xmm1) vmovsd(mem(rax, r8, 2), xmm2) add(imm(1*8), rax) // a += 1*cs_b = 1*8; vmovsd(mem(rbx ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovsd(mem(rbx, r11, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovsd(mem(rbx, r11, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovsd(mem(rbx, r13, 1), xmm3) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm7 ymm10 ymm13 // ymm5 ymm8 ymm11 ymm14 // ymm6 ymm9 ymm12 ymm15 vhaddpd( ymm7, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm7) vhaddpd( ymm13, ymm10, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) // xmm2[0] = sum(ymm10); xmm2[1] = sum(ymm13) vperm2f128(imm(0x20), ymm2, ymm0, ymm4 ) vhaddpd( ymm8, ymm5, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm14, ymm11, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm5 ) vhaddpd( ymm9, ymm6, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm15, ymm12, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm6 ) // ymm4 = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13) // ymm5 = sum(ymm5) sum(ymm8) sum(ymm11) sum(ymm14) // ymm6 = sum(ymm6) sum(ymm9) sum(ymm12) sum(ymm15) mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) vmulpd(ymm0, ymm6, ymm6) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm5) vmovupd(ymm5, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm6) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vmovupd(ymm5, mem(rcx)) add(rdi, rcx) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) label(.DDONE) lea(mem(r12, rdi, 2), r12) // lea(mem(r12, rdi, 1), r12) // c_ii = r12 += 3*rs_c lea(mem(r14, r8, 2), r14) // lea(mem(r14, r8, 1), r14) // a_ii = r14 += 3*rs_a dec(r9) // ii -= 1; jne(.DLOOP3X4I) // iterate again if ii != 0. label(.DRETURN) end_asm( : // output operands (none) : // input operands [m_iter] "m" (m_iter), [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) consider_edge_cases: // Handle edge cases in the m dimension, if they exist. if ( m_left ) { const dim_t nr_cur = 4; const dim_t i_edge = m0 - ( dim_t )m_left; double* restrict cij = c + i_edge*rs_c; double* restrict bj = b; double* restrict ai = a + i_edge*rs_a; if ( 2 == m_left ) { const dim_t mr_cur = 2; bli_dgemmsup_rd_haswell_asm_2x4n ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); //cij += mr_cur*rs_c0; ai += mr_cur*rs_a0; m_left -= mr_cur; } if ( 1 == m_left ) { const dim_t mr_cur = 1; bli_dgemmsup_rd_haswell_asm_1x4n ( conja, conjb, mr_cur, nr_cur, k0, alpha, ai, rs_a0, cs_a0, bj, rs_b0, cs_b0, beta, cij, rs_c0, cs_c0, data, cntx ); } } } void bli_dgemmsup_rd_haswell_asm_3x4n ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { #if 0 bli_dgemmsup_r_haswell_ref ( conja, conjb, m0, n0, k0, alpha, a, rs_a0, cs_a0, b, rs_b0, cs_b0, beta, c, rs_c0, cs_c0, data, cntx ); return; #endif //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm6, ymm6, ymm6) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm8, ymm8, ymm8) vxorpd(ymm9, ymm9, ymm9) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm11, ymm11, ymm11) vxorpd(ymm12, ymm12, ymm12) vxorpd(ymm13, ymm13, ymm13) vxorpd(ymm14, ymm14, ymm14) vxorpd(ymm15, ymm15, ymm15) #endif mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b mov(var(c), rcx) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 3*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 3*8)) // prefetch c + 2*rs_c mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 1 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 2 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) // ---------------------------------- iteration 3 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) vmovupd(mem(rax, r8, 2), ymm2) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rax ), xmm0) vmovsd(mem(rax, r8, 1), xmm1) vmovsd(mem(rax, r8, 2), xmm2) add(imm(1*8), rax) // a += 1*cs_b = 1*8; vmovsd(mem(rbx ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vfmadd231pd(ymm2, ymm3, ymm6) vmovsd(mem(rbx, r11, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vfmadd231pd(ymm2, ymm3, ymm9) vmovsd(mem(rbx, r11, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vfmadd231pd(ymm2, ymm3, ymm12) vmovsd(mem(rbx, r13, 1), xmm3) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) vfmadd231pd(ymm2, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm7 ymm10 ymm13 // ymm5 ymm8 ymm11 ymm14 // ymm6 ymm9 ymm12 ymm15 vhaddpd( ymm7, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm7) vhaddpd( ymm13, ymm10, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) // xmm2[0] = sum(ymm10); xmm2[1] = sum(ymm13) vperm2f128(imm(0x20), ymm2, ymm0, ymm4 ) vhaddpd( ymm8, ymm5, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm14, ymm11, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm5 ) vhaddpd( ymm9, ymm6, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm15, ymm12, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm6 ) // ymm4 = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13) // ymm5 = sum(ymm5) sum(ymm8) sum(ymm11) sum(ymm14) // ymm6 = sum(ymm6) sum(ymm9) sum(ymm12) sum(ymm15) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) vmulpd(ymm0, ymm6, ymm6) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm5) vmovupd(ymm5, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm6) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vmovupd(ymm5, mem(rcx)) add(rdi, rcx) vmovupd(ymm6, mem(rcx)) //add(rdi, rcx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rd_haswell_asm_2x4n ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { #if 0 bli_dgemmsup_r_haswell_ref ( conja, conjb, m0, n0, k0, alpha, a, rs_a0, cs_a0, b, rs_b0, cs_b0, beta, c, rs_c0, cs_c0, data, cntx ); return; #endif //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm8, ymm8, ymm8) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm11, ymm11, ymm11) vxorpd(ymm13, ymm13, ymm13) vxorpd(ymm14, ymm14, ymm14) #endif mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b mov(var(c), rcx) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 3*8)) // prefetch c + 1*rs_c mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) // ---------------------------------- iteration 1 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) // ---------------------------------- iteration 2 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) // ---------------------------------- iteration 3 vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rax ), ymm0) vmovupd(mem(rax, r8, 1), ymm1) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rax ), xmm0) vmovsd(mem(rax, r8, 1), xmm1) add(imm(1*8), rax) // a += 1*cs_b = 1*8; vmovsd(mem(rbx ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovsd(mem(rbx, r11, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm7) vfmadd231pd(ymm1, ymm3, ymm8) vmovsd(mem(rbx, r11, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovsd(mem(rbx, r13, 1), xmm3) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vfmadd231pd(ymm0, ymm3, ymm13) vfmadd231pd(ymm1, ymm3, ymm14) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm7 ymm10 ymm13 // ymm5 ymm8 ymm11 ymm14 vhaddpd( ymm7, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm7) vhaddpd( ymm13, ymm10, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) // xmm2[0] = sum(ymm10); xmm2[1] = sum(ymm13) vperm2f128(imm(0x20), ymm2, ymm0, ymm4 ) vhaddpd( ymm8, ymm5, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) vhaddpd( ymm14, ymm11, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) vperm2f128(imm(0x20), ymm2, ymm0, ymm5 ) // ymm4 = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13) // ymm5 = sum(ymm5) sum(ymm8) sum(ymm11) sum(ymm14) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha vmulpd(ymm0, ymm5, ymm5) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), ymm3, ymm5) vmovupd(ymm5, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) add(rdi, rcx) vmovupd(ymm5, mem(rcx)) //add(rdi, rcx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rd_haswell_asm_1x4n ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { #if 0 bli_dgemmsup_r_haswell_ref ( conja, conjb, m0, n0, k0, alpha, a, rs_a0, cs_a0, b, rs_b0, cs_b0, beta, c, rs_c0, cs_c0, data, cntx ); return; #endif //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; /* rrc: -------- -- -- -- | | | | -------- -- -- -- ... | | | | -------- += -- -- -- | | | | -------- | | | | -------- : -------- : */ // ------------------------------------------------------------------------- begin_asm() #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm13, ymm13, ymm13) #endif mov(var(a), rax) // load address of a. //mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a //lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b mov(var(c), rcx) // load address of c //mov(var(rs_c), rdi) // load rs_c //lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) prefetch(0, mem(rcx, 3*8)) // prefetch c + 0*rs_c mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) // ---------------------------------- iteration 1 vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) // ---------------------------------- iteration 2 vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) // ---------------------------------- iteration 3 vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rax ), ymm0) add(imm(4*8), rax) // a += 4*cs_b = 4*8; vmovupd(mem(rbx ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovupd(mem(rbx, r11, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovupd(mem(rbx, r11, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovupd(mem(rbx, r13, 1), ymm3) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vfmadd231pd(ymm0, ymm3, ymm13) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rax ), xmm0) add(imm(1*8), rax) // a += 1*cs_b = 1*8; vmovsd(mem(rbx ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vmovsd(mem(rbx, r11, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm7) vmovsd(mem(rbx, r11, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vmovsd(mem(rbx, r13, 1), xmm3) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vfmadd231pd(ymm0, ymm3, ymm13) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm7 ymm10 ymm13 vhaddpd( ymm7, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm0 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm7) vhaddpd( ymm13, ymm10, ymm2 ) vextractf128(imm(1), ymm2, xmm1 ) vaddpd( xmm2, xmm1, xmm2 ) // xmm2[0] = sum(ymm10); xmm2[1] = sum(ymm13) vperm2f128(imm(0x20), ymm2, ymm0, ymm4 ) // ymm4 = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(ymm0, ymm4, ymm4) // scale by alpha //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), ymm3, ymm4) vmovupd(ymm4, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(ymm4, mem(rcx)) //add(rdi, rcx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rd_haswell_asm_6x2n ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { #if 0 bli_dgemmsup_r_haswell_ref ( conja, conjb, m0, n0, k0, alpha, a, rs_a0, cs_a0, b, rs_b0, cs_b0, beta, c, rs_c0, cs_c0, data, cntx ); return; #endif //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() //vzeroall() // zero all xmm/ymm registers. mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) //lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b mov(var(c), rcx) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm6, ymm6, ymm6) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm8, ymm8, ymm8) vxorpd(ymm9, ymm9, ymm9) vxorpd(ymm10, ymm10, ymm10) vxorpd(ymm11, ymm11, ymm11) vxorpd(ymm12, ymm12, ymm12) vxorpd(ymm13, ymm13, ymm13) vxorpd(ymm14, ymm14, ymm14) vxorpd(ymm15, ymm15, ymm15) #endif lea(mem(rcx, rdi, 2), r10) // lea(mem(r10, rdi, 1), r10) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 1*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 1*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 1*8)) // prefetch c + 2*rs_c prefetch(0, mem(r10, 1*8)) // prefetch c + 3*rs_c prefetch(0, mem(r10, rdi, 1, 1*8)) // prefetch c + 4*rs_c prefetch(0, mem(r10, rdi, 2, 1*8)) // prefetch c + 5*rs_c mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) vmovupd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rax, r8, 4), ymm3) vfmadd231pd(ymm0, ymm3, ymm12) vfmadd231pd(ymm1, ymm3, ymm13) vmovupd(mem(rax, r15, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) // ---------------------------------- iteration 1 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) vmovupd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rax, r8, 4), ymm3) vfmadd231pd(ymm0, ymm3, ymm12) vfmadd231pd(ymm1, ymm3, ymm13) vmovupd(mem(rax, r15, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) // ---------------------------------- iteration 2 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) vmovupd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rax, r8, 4), ymm3) vfmadd231pd(ymm0, ymm3, ymm12) vfmadd231pd(ymm1, ymm3, ymm13) vmovupd(mem(rax, r15, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) // ---------------------------------- iteration 3 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) vmovupd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rax, r8, 4), ymm3) vfmadd231pd(ymm0, ymm3, ymm12) vfmadd231pd(ymm1, ymm3, ymm13) vmovupd(mem(rax, r15, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) vmovupd(mem(rax, r13, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovupd(mem(rax, r8, 4), ymm3) vfmadd231pd(ymm0, ymm3, ymm12) vfmadd231pd(ymm1, ymm3, ymm13) vmovupd(mem(rax, r15, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rbx ), xmm0) vmovsd(mem(rbx, r11, 1), xmm1) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vmovsd(mem(rax ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovsd(mem(rax, r8, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovsd(mem(rax, r8, 2), xmm3) vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) vmovsd(mem(rax, r13, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm10) vfmadd231pd(ymm1, ymm3, ymm11) vmovsd(mem(rax, r8, 4), xmm3) vfmadd231pd(ymm0, ymm3, ymm12) vfmadd231pd(ymm1, ymm3, ymm13) vmovsd(mem(rax, r15, 1), xmm3) add(imm(1*8), rax) // a += 1*cs_a = 1*8; vfmadd231pd(ymm0, ymm3, ymm14) vfmadd231pd(ymm1, ymm3, ymm15) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm5 // ymm6 ymm7 // ymm8 ymm9 // ymm10 ymm11 // ymm12 ymm13 // ymm14 ymm15 vhaddpd( ymm5, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm4 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm5) vhaddpd( ymm7, ymm6, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm6 ) vhaddpd( ymm9, ymm8, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm8 ) vhaddpd( ymm11, ymm10, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm10 ) vhaddpd( ymm13, ymm12, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm12 ) vhaddpd( ymm15, ymm14, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm14 ) // xmm4 = sum(ymm4) sum(ymm5) // xmm6 = sum(ymm6) sum(ymm7) // xmm8 = sum(ymm8) sum(ymm9) // xmm10 = sum(ymm10) sum(ymm11) // xmm12 = sum(ymm12) sum(ymm13) // xmm14 = sum(ymm14) sum(ymm15) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(xmm0, xmm4, xmm4) // scale by alpha vmulpd(xmm0, xmm6, xmm6) vmulpd(xmm0, xmm8, xmm8) vmulpd(xmm0, xmm10, xmm10) vmulpd(xmm0, xmm12, xmm12) vmulpd(xmm0, xmm14, xmm14) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), xmm3, xmm4) vmovupd(xmm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm6) vmovupd(xmm6, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm8) vmovupd(xmm8, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm10) vmovupd(xmm10, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm12) vmovupd(xmm12, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm14) vmovupd(xmm14, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(xmm4, mem(rcx)) add(rdi, rcx) vmovupd(xmm6, mem(rcx)) add(rdi, rcx) vmovupd(xmm8, mem(rcx)) add(rdi, rcx) vmovupd(xmm10, mem(rcx)) add(rdi, rcx) vmovupd(xmm12, mem(rcx)) add(rdi, rcx) vmovupd(xmm14, mem(rcx)) //add(rdi, rcx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rd_haswell_asm_3x2n ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { #if 0 bli_dgemmsup_r_haswell_ref ( conja, conjb, m0, n0, k0, alpha, a, rs_a0, cs_a0, b, rs_b0, cs_b0, beta, c, rs_c0, cs_c0, data, cntx ); return; #endif //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; // ------------------------------------------------------------------------- begin_asm() #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm6, ymm6, ymm6) vxorpd(ymm7, ymm7, ymm7) vxorpd(ymm8, ymm8, ymm8) vxorpd(ymm9, ymm9, ymm9) #endif mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) //lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b // initialize loop by pre-loading // a column of a. mov(var(c), rcx) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) prefetch(0, mem(rcx, 1*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 1*8)) // prefetch c + 1*rs_c prefetch(0, mem(rcx, rdi, 2, 1*8)) // prefetch c + 2*rs_c mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) // ---------------------------------- iteration 1 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) // ---------------------------------- iteration 2 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) // ---------------------------------- iteration 3 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovupd(mem(rax, r8, 2), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rbx ), xmm0) vmovsd(mem(rbx, r11, 1), xmm1) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vmovsd(mem(rax ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovsd(mem(rax, r8, 1), xmm3) vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) vmovsd(mem(rax, r8, 2), xmm3) add(imm(1*8), rax) // a += 1*cs_a = 1*8; vfmadd231pd(ymm0, ymm3, ymm8) vfmadd231pd(ymm1, ymm3, ymm9) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm5 // ymm6 ymm7 // ymm8 ymm9 vhaddpd( ymm5, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm4 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm5) vhaddpd( ymm7, ymm6, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm6 ) vhaddpd( ymm9, ymm8, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm8 ) // xmm4 = sum(ymm4) sum(ymm5) // xmm6 = sum(ymm6) sum(ymm7) // xmm8 = sum(ymm8) sum(ymm9) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(xmm0, xmm4, xmm4) // scale by alpha vmulpd(xmm0, xmm6, xmm6) vmulpd(xmm0, xmm8, xmm8) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), xmm3, xmm4) vmovupd(xmm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm6) vmovupd(xmm6, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm8) vmovupd(xmm8, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(xmm4, mem(rcx)) add(rdi, rcx) vmovupd(xmm6, mem(rcx)) add(rdi, rcx) vmovupd(xmm8, mem(rcx)) //add(rdi, rcx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rd_haswell_asm_2x2n ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { #if 0 bli_dgemmsup_r_haswell_ref ( conja, conjb, m0, n0, k0, alpha, a, rs_a0, cs_a0, b, rs_b0, cs_b0, beta, c, rs_c0, cs_c0, data, cntx ); return; #endif //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; /* rrc: -------- -- -- -- | | -------- -- -- -- ... | | -------- += -- -- -- | | -------- -- -- -- | | -------- -- -- -- : -------- -- -- -- : */ // ------------------------------------------------------------------------- begin_asm() #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) vxorpd(ymm6, ymm6, ymm6) vxorpd(ymm7, ymm7, ymm7) #endif mov(var(a), rax) // load address of a. mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) //lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b // initialize loop by pre-loading // a column of a. mov(var(c), rcx) // load address of c mov(var(rs_c), rdi) // load rs_c lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) //lea(mem(rcx, rdi, 2), rdx) // //lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 1*8)) // prefetch c + 0*rs_c prefetch(0, mem(rcx, rdi, 1, 1*8)) // prefetch c + 1*rs_c mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) // ---------------------------------- iteration 1 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) // ---------------------------------- iteration 2 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) // ---------------------------------- iteration 3 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovupd(mem(rax, r8, 1), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rbx ), xmm0) vmovsd(mem(rbx, r11, 1), xmm1) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vmovsd(mem(rax ), xmm3) vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) vmovsd(mem(rax, r8, 1), xmm3) add(imm(1*8), rax) // a += 1*cs_a = 1*8; vfmadd231pd(ymm0, ymm3, ymm6) vfmadd231pd(ymm1, ymm3, ymm7) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm5 // ymm6 ymm7 vhaddpd( ymm5, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm4 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm5) vhaddpd( ymm7, ymm6, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm6 ) // xmm4 = sum(ymm4) sum(ymm5) // xmm6 = sum(ymm6) sum(ymm7) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(xmm0, xmm4, xmm4) // scale by alpha vmulpd(xmm0, xmm6, xmm6) //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), xmm3, xmm4) vmovupd(xmm4, mem(rcx)) add(rdi, rcx) vfmadd231pd(mem(rcx), xmm3, xmm6) vmovupd(xmm6, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(xmm4, mem(rcx)) add(rdi, rcx) vmovupd(xmm6, mem(rcx)) //add(rdi, rcx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemmsup_rd_haswell_asm_1x2n ( conj_t conja, conj_t conjb, dim_t m0, dim_t n0, dim_t k0, double* restrict alpha, double* restrict a, inc_t rs_a0, inc_t cs_a0, double* restrict b, inc_t rs_b0, inc_t cs_b0, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { #if 0 bli_dgemmsup_r_haswell_ref ( conja, conjb, m0, n0, k0, alpha, a, rs_a0, cs_a0, b, rs_b0, cs_b0, beta, c, rs_c0, cs_c0, data, cntx ); return; #endif //void* a_next = bli_auxinfo_next_a( data ); //void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter16 = k0 / 16; uint64_t k_left16 = k0 % 16; uint64_t k_iter4 = k_left16 / 4; uint64_t k_left1 = k_left16 % 4; uint64_t rs_a = rs_a0; uint64_t cs_a = cs_a0; uint64_t rs_b = rs_b0; uint64_t cs_b = cs_b0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; /* rrc: -------- -- -- -- | | -------- -- -- -- ... | | -------- += -- -- -- | | -------- -- -- -- | | -------- -- -- -- : -------- -- -- -- : */ // ------------------------------------------------------------------------- begin_asm() #if 0 vzeroall() // zero all xmm/ymm registers. #else // skylake can execute 3 vxorpd ipc with // a latency of 1 cycle, while vzeroall // has a latency of 12 cycles. vxorpd(ymm4, ymm4, ymm4) vxorpd(ymm5, ymm5, ymm5) #endif mov(var(a), rax) // load address of a. //mov(var(rs_a), r8) // load rs_a //mov(var(cs_a), r9) // load cs_a //lea(mem(, r8, 8), r8) // rs_a *= sizeof(double) //lea(mem(, r9, 8), r9) // cs_a *= sizeof(double) //lea(mem(r8, r8, 2), r13) // r13 = 3*rs_a //lea(mem(r8, r8, 4), r15) // r15 = 5*rs_a mov(var(b), rbx) // load address of b. //mov(var(rs_b), r10) // load rs_b mov(var(cs_b), r11) // load cs_b //lea(mem(, r10, 8), r10) // rs_b *= sizeof(double) lea(mem(, r11, 8), r11) // cs_b *= sizeof(double) //lea(mem(r11, r11, 2), r13) // r13 = 3*cs_b // initialize loop by pre-loading // a column of a. mov(var(c), rcx) // load address of c //mov(var(rs_c), rdi) // load rs_c //lea(mem(, rdi, 8), rdi) // rs_c *= sizeof(double) //lea(mem(rcx, rdi, 2), rdx) // //lea(mem(rdx, rdi, 1), rdx) // rdx = c + 3*rs_c; prefetch(0, mem(rcx, 1*8)) // prefetch c + 0*rs_c mov(var(k_iter16), rsi) // i = k_iter16; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKITER4) // if i == 0, jump to code that // contains the k_iter4 loop. label(.DLOOPKITER16) // MAIN LOOP // ---------------------------------- iteration 0 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) // ---------------------------------- iteration 1 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) // ---------------------------------- iteration 2 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) // ---------------------------------- iteration 3 vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) dec(rsi) // i -= 1; jne(.DLOOPKITER16) // iterate again if i != 0. label(.DCONSIDKITER4) mov(var(k_iter4), rsi) // i = k_iter4; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT1) // if i == 0, jump to code that // considers k_left1 loop. // else, we prepare to enter k_iter4 loop. label(.DLOOPKITER4) // EDGE LOOP (ymm) vmovupd(mem(rbx ), ymm0) vmovupd(mem(rbx, r11, 1), ymm1) add(imm(4*8), rbx) // b += 4*rs_b = 4*8; vmovupd(mem(rax ), ymm3) add(imm(4*8), rax) // a += 4*cs_a = 4*8; vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) dec(rsi) // i -= 1; jne(.DLOOPKITER4) // iterate again if i != 0. label(.DCONSIDKLEFT1) mov(var(k_left1), rsi) // i = k_left1; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left1 loop. label(.DLOOPKLEFT1) // EDGE LOOP (scalar) // NOTE: We must use ymm registers here bc // using the xmm registers would zero out the // high bits of the destination registers, // which would destory intermediate results. vmovsd(mem(rbx ), xmm0) vmovsd(mem(rbx, r11, 1), xmm1) add(imm(1*8), rbx) // b += 1*rs_b = 1*8; vmovsd(mem(rax ), xmm3) add(imm(1*8), rax) // a += 1*cs_a = 1*8; vfmadd231pd(ymm0, ymm3, ymm4) vfmadd231pd(ymm1, ymm3, ymm5) dec(rsi) // i -= 1; jne(.DLOOPKLEFT1) // iterate again if i != 0. label(.DPOSTACCUM) // ymm4 ymm5 vhaddpd( ymm5, ymm4, ymm0 ) vextractf128(imm(1), ymm0, xmm1 ) vaddpd( xmm0, xmm1, xmm4 ) // xmm0[0] = sum(ymm4); xmm0[1] = sum(ymm5) // xmm4 = sum(ymm4) sum(ymm5) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastsd(mem(rax), ymm0) // load alpha and duplicate vbroadcastsd(mem(rbx), ymm3) // load beta and duplicate vmulpd(xmm0, xmm4, xmm4) // scale by alpha //mov(var(cs_c), rsi) // load cs_c //lea(mem(, rsi, 8), rsi) // rsi = cs_c * sizeof(double) // now avoid loading C if beta == 0 vxorpd(ymm0, ymm0, ymm0) // set ymm0 to zero. vucomisd(xmm0, xmm3) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case label(.DROWSTORED) vfmadd231pd(mem(rcx), xmm3, xmm4) vmovupd(xmm4, mem(rcx)) //add(rdi, rcx) jmp(.DDONE) // jump to end. label(.DBETAZERO) label(.DROWSTORBZ) vmovupd(xmm4, mem(rcx)) //add(rdi, rcx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter16] "m" (k_iter16), [k_iter4] "m" (k_iter4), [k_left1] "m" (k_left1), [a] "m" (a), [rs_a] "m" (rs_a), [cs_a] "m" (cs_a), [b] "m" (b), [rs_b] "m" (rs_b), [cs_b] "m" (cs_b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c)/*, [a_next] "m" (a_next), [b_next] "m" (b_next)*/ : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } blis-0.6.1/kernels/haswell/bli_kernels_haswell.h000066400000000000000000000145211360743507500217470ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // -- level-3 ------------------------------------------------------------------ // gemm (asm d6x8) GEMM_UKR_PROT( float, s, gemm_haswell_asm_6x16 ) GEMM_UKR_PROT( double, d, gemm_haswell_asm_6x8 ) GEMM_UKR_PROT( scomplex, c, gemm_haswell_asm_3x8 ) GEMM_UKR_PROT( dcomplex, z, gemm_haswell_asm_3x4 ) // gemm (asm d8x6) GEMM_UKR_PROT( float, s, gemm_haswell_asm_16x6 ) GEMM_UKR_PROT( double, d, gemm_haswell_asm_8x6 ) GEMM_UKR_PROT( scomplex, c, gemm_haswell_asm_8x3 ) GEMM_UKR_PROT( dcomplex, z, gemm_haswell_asm_4x3 ) // gemmtrsm_l (asm d6x8) GEMMTRSM_UKR_PROT( float, s, gemmtrsm_l_haswell_asm_6x16 ) GEMMTRSM_UKR_PROT( double, d, gemmtrsm_l_haswell_asm_6x8 ) // gemmtrsm_u (asm d6x8) GEMMTRSM_UKR_PROT( float, s, gemmtrsm_u_haswell_asm_6x16 ) GEMMTRSM_UKR_PROT( double, d, gemmtrsm_u_haswell_asm_6x8 ) // gemm (asm d8x6) //GEMM_UKR_PROT( float, s, gemm_haswell_asm_16x6 ) //GEMM_UKR_PROT( double, d, gemm_haswell_asm_8x6 ) //GEMM_UKR_PROT( scomplex, c, gemm_haswell_asm_8x3 ) //GEMM_UKR_PROT( dcomplex, z, gemm_haswell_asm_4x3 ) // -- level-3 sup -------------------------------------------------------------- // gemmsup_rv GEMMSUP_KER_PROT( double, d, gemmsup_rv_haswell_asm_6x8 ) GEMMSUP_KER_PROT( double, d, gemmsup_rv_haswell_asm_5x8 ) GEMMSUP_KER_PROT( double, d, gemmsup_rv_haswell_asm_4x8 ) GEMMSUP_KER_PROT( double, d, gemmsup_rv_haswell_asm_3x8 ) GEMMSUP_KER_PROT( double, d, gemmsup_rv_haswell_asm_2x8 ) GEMMSUP_KER_PROT( double, d, gemmsup_rv_haswell_asm_1x8 ) GEMMSUP_KER_PROT( double, d, gemmsup_rv_haswell_asm_6x6 ) GEMMSUP_KER_PROT( double, d, gemmsup_rv_haswell_asm_5x6 ) GEMMSUP_KER_PROT( double, d, gemmsup_rv_haswell_asm_4x6 ) GEMMSUP_KER_PROT( double, d, gemmsup_rv_haswell_asm_3x6 ) GEMMSUP_KER_PROT( double, d, gemmsup_rv_haswell_asm_2x6 ) GEMMSUP_KER_PROT( double, d, gemmsup_rv_haswell_asm_1x6 ) GEMMSUP_KER_PROT( double, d, gemmsup_rv_haswell_asm_6x4 ) GEMMSUP_KER_PROT( double, d, gemmsup_rv_haswell_asm_5x4 ) GEMMSUP_KER_PROT( double, d, gemmsup_rv_haswell_asm_4x4 ) GEMMSUP_KER_PROT( double, d, gemmsup_rv_haswell_asm_3x4 ) GEMMSUP_KER_PROT( double, d, gemmsup_rv_haswell_asm_2x4 ) GEMMSUP_KER_PROT( double, d, gemmsup_rv_haswell_asm_1x4 ) GEMMSUP_KER_PROT( double, d, gemmsup_rv_haswell_asm_6x2 ) GEMMSUP_KER_PROT( double, d, gemmsup_rv_haswell_asm_5x2 ) GEMMSUP_KER_PROT( double, d, gemmsup_rv_haswell_asm_4x2 ) GEMMSUP_KER_PROT( double, d, gemmsup_rv_haswell_asm_3x2 ) GEMMSUP_KER_PROT( double, d, gemmsup_rv_haswell_asm_2x2 ) GEMMSUP_KER_PROT( double, d, gemmsup_rv_haswell_asm_1x2 ) GEMMSUP_KER_PROT( double, d, gemmsup_r_haswell_ref_6x1 ) GEMMSUP_KER_PROT( double, d, gemmsup_r_haswell_ref_5x1 ) GEMMSUP_KER_PROT( double, d, gemmsup_r_haswell_ref_4x1 ) GEMMSUP_KER_PROT( double, d, gemmsup_r_haswell_ref_3x1 ) GEMMSUP_KER_PROT( double, d, gemmsup_r_haswell_ref_2x1 ) GEMMSUP_KER_PROT( double, d, gemmsup_r_haswell_ref_1x1 ) // gemmsup_rv (mkernel in m dim) GEMMSUP_KER_PROT( double, d, gemmsup_rv_haswell_asm_6x8m ) GEMMSUP_KER_PROT( double, d, gemmsup_rv_haswell_asm_6x6m ) GEMMSUP_KER_PROT( double, d, gemmsup_rv_haswell_asm_6x4m ) GEMMSUP_KER_PROT( double, d, gemmsup_rv_haswell_asm_6x2m ) // gemmsup_rv (mkernel in n dim) GEMMSUP_KER_PROT( double, d, gemmsup_rv_haswell_asm_6x8n ) GEMMSUP_KER_PROT( double, d, gemmsup_rv_haswell_asm_5x8n ) GEMMSUP_KER_PROT( double, d, gemmsup_rv_haswell_asm_4x8n ) GEMMSUP_KER_PROT( double, d, gemmsup_rv_haswell_asm_3x8n ) GEMMSUP_KER_PROT( double, d, gemmsup_rv_haswell_asm_2x8n ) GEMMSUP_KER_PROT( double, d, gemmsup_rv_haswell_asm_1x8n ) // gemmsup_rd GEMMSUP_KER_PROT( double, d, gemmsup_rd_haswell_asm_6x8 ) GEMMSUP_KER_PROT( double, d, gemmsup_rd_haswell_asm_2x8 ) GEMMSUP_KER_PROT( double, d, gemmsup_rd_haswell_asm_1x8 ) GEMMSUP_KER_PROT( double, d, gemmsup_rd_haswell_asm_6x4 ) GEMMSUP_KER_PROT( double, d, gemmsup_rd_haswell_asm_2x4 ) GEMMSUP_KER_PROT( double, d, gemmsup_rd_haswell_asm_1x4 ) GEMMSUP_KER_PROT( double, d, gemmsup_rd_haswell_asm_6x2 ) GEMMSUP_KER_PROT( double, d, gemmsup_rd_haswell_asm_3x2 ) GEMMSUP_KER_PROT( double, d, gemmsup_rd_haswell_asm_2x2 ) GEMMSUP_KER_PROT( double, d, gemmsup_rd_haswell_asm_1x2 ) // gemmsup_rd (mkernel in m dim) GEMMSUP_KER_PROT( double, d, gemmsup_rd_haswell_asm_6x8m ) GEMMSUP_KER_PROT( double, d, gemmsup_rd_haswell_asm_6x4m ) GEMMSUP_KER_PROT( double, d, gemmsup_rd_haswell_asm_6x2m ) // gemmsup_rd (mkernel in n dim) GEMMSUP_KER_PROT( double, d, gemmsup_rd_haswell_asm_6x8n ) GEMMSUP_KER_PROT( double, d, gemmsup_rd_haswell_asm_3x8n ) GEMMSUP_KER_PROT( double, d, gemmsup_rd_haswell_asm_2x8n ) GEMMSUP_KER_PROT( double, d, gemmsup_rd_haswell_asm_1x8n ) blis-0.6.1/kernels/knc/000077500000000000000000000000001360743507500146775ustar00rootroot00000000000000blis-0.6.1/kernels/knc/3/000077500000000000000000000000001360743507500150415ustar00rootroot00000000000000blis-0.6.1/kernels/knc/3/bli_dgemm_knc_asm_30x8.c000066400000000000000000000544011360743507500214050ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include #define A_L1_PREFETCH_DIST 4 #define B_L1_PREFETCH_DIST 2 #define L2_PREFETCH_DIST 16 // Must be greater than 10, because of the way the loop is constructed. //Alternate code path uused if C is not row-major #define UPDATE_C_ROW_SCATTERED(REG1, NUM, BASE_DEST) \ { \ __asm kmov k3, ebx \ __asm GATHER##NUM: \ __asm vgatherdpd zmm31{k3}, [BASE_DEST + zmm30 * 8] \ __asm jknzd k3, GATHER##NUM \ \ __asm vmulpd REG1, REG1, 0[r12]{1to8} /*scale by alpha*/ \ __asm vfmadd132pd zmm31, REG1, 0[r13]{1to8} /*scale by beta, add in result*/\ __asm kmov k3, ebx \ \ __asm SCATTER##NUM: \ __asm vscatterdpd [BASE_DEST + zmm30 * 8]{k3}, zmm31 \ __asm jknzd k3, SCATTER##NUM \ __asm add BASE_DEST, r11 \ } //One iteration of the k_r loop. //Each iteration, we prefetch A into L1 and into L2 #define ONE_ITER_MAIN_LOOP(C_ADDR, COUNTER) \ {\ __asm vbroadcastf64x4 zmm30, 0[r15] \ __asm vmovapd zmm31, 0[rbx] \ \ __asm vfmadd231pd zmm0, zmm31, zmm30{aaaa} \ __asm vfmadd231pd zmm4, zmm31, 4*8[r15]{1to8} \ __asm vprefetch0 A_L1_PREFETCH_DIST*256[r15] \ __asm vfmadd231pd zmm5, zmm31, 5*8[r15]{1to8} \ __asm vprefetch0 A_L1_PREFETCH_DIST*256+64[r15] \ __asm vfmadd231pd zmm6, zmm31, 6*8[r15]{1to8} \ __asm vprefetch0 A_L1_PREFETCH_DIST*256+128[r15]\ __asm vfmadd231pd zmm7, zmm31, 7*8[r15]{1to8} \ __asm vprefetch0 A_L1_PREFETCH_DIST*256+192[r15]\ __asm vfmadd231pd zmm8, zmm31, 8*8[r15]{1to8} \ \ __asm vprefetch1 0[r15 + r14] \ __asm vfmadd231pd zmm9, zmm31, 9*8[r15]{1to8} \ __asm vfmadd231pd zmm1, zmm31, zmm30{bbbb} \ __asm vfmadd231pd zmm2, zmm31, zmm30{cccc} \ __asm vfmadd231pd zmm3, zmm31, zmm30{dddd} \ __asm vfmadd231pd zmm10, zmm31, 10*8[r15]{1to8} \ \ __asm vprefetch1 64[r15 + r14] \ __asm vfmadd231pd zmm11, zmm31, 11*8[r15]{1to8} \ __asm vfmadd231pd zmm12, zmm31, 12*8[r15]{1to8} \ __asm vfmadd231pd zmm13, zmm31, 13*8[r15]{1to8} \ __asm vfmadd231pd zmm14, zmm31, 14*8[r15]{1to8} \ __asm vfmadd231pd zmm15, zmm31, 15*8[r15]{1to8} \ \ __asm vprefetch1 2*64[r15 + r14] \ __asm vfmadd231pd zmm16, zmm31, 16*8[r15]{1to8} \ __asm vfmadd231pd zmm17, zmm31, 17*8[r15]{1to8} \ __asm vfmadd231pd zmm18, zmm31, 18*8[r15]{1to8} \ __asm vfmadd231pd zmm19, zmm31, 19*8[r15]{1to8} \ __asm vfmadd231pd zmm20, zmm31, 20*8[r15]{1to8} \ \ __asm vprefetch1 3*64[r15 + r14] \ __asm vfmadd231pd zmm21, zmm31, 21*8[r15]{1to8} \ __asm add r15, r12 \ __asm vfmadd231pd zmm22, zmm31, -10*8[r15]{1to8}\ __asm vfmadd231pd zmm23, zmm31, -9*8[r15]{1to8} \ __asm vfmadd231pd zmm24, zmm31, -8*8[r15]{1to8} \ __asm dec COUNTER \ __asm vfmadd231pd zmm25, zmm31, -7*8[r15]{1to8} \ \ \ __asm vprefetch1 0[rbx + r13] \ __asm vfmadd231pd zmm26, zmm31, -6*8[r15]{1to8} \ __asm vprefetch0 B_L1_PREFETCH_DIST*8*8[rbx] \ __asm vfmadd231pd zmm27, zmm31, -5*8[r15]{1to8} \ __asm add rbx, r9 \ __asm vfmadd231pd zmm28, zmm31, -4*8[r15]{1to8} \ __asm cmp COUNTER, 0 \ __asm vfmadd231pd zmm29, zmm31, -3*8[r15]{1to8} \ } //One iteration of the k_r loop. //Same as ONE_ITER_MAIN_LOOP, but additionally, we prefetch one line of C into the L2 cache //Current placement of this prefetch instruction is somewhat arbitrary. #define ONE_ITER_PC_L2(C_ADDR) \ {\ __asm vbroadcastf64x4 zmm30, 0[r15] \ __asm vmovapd zmm31, 0[rbx] \ \ __asm vfmadd231pd zmm0, zmm31, zmm30{aaaa} \ __asm vfmadd231pd zmm4, zmm31, 4*8[r15]{1to8} \ __asm vprefetch0 A_L1_PREFETCH_DIST*256[r15] \ __asm vfmadd231pd zmm5, zmm31, 5*8[r15]{1to8} \ __asm vprefetch0 A_L1_PREFETCH_DIST*256+64[r15] \ __asm vfmadd231pd zmm6, zmm31, 6*8[r15]{1to8} \ __asm vprefetch0 A_L1_PREFETCH_DIST*256+128[r15]\ __asm vfmadd231pd zmm7, zmm31, 7*8[r15]{1to8} \ __asm vprefetch0 A_L1_PREFETCH_DIST*256+192[r15]\ __asm vfmadd231pd zmm8, zmm31, 8*8[r15]{1to8} \ \ __asm vprefetch1 0[r15 + r14] \ __asm vfmadd231pd zmm9, zmm31, 9*8[r15]{1to8} \ __asm vfmadd231pd zmm1, zmm31, zmm30{bbbb} \ __asm vfmadd231pd zmm2, zmm31, zmm30{cccc} \ __asm vfmadd231pd zmm3, zmm31, zmm30{dddd} \ __asm vfmadd231pd zmm10, zmm31, 10*8[r15]{1to8} \ \ __asm vprefetch1 64[r15 + r14] \ __asm vfmadd231pd zmm11, zmm31, 11*8[r15]{1to8} \ __asm vprefetch1 0[C_ADDR] \ __asm vfmadd231pd zmm12, zmm31, 12*8[r15]{1to8} \ __asm vfmadd231pd zmm13, zmm31, 13*8[r15]{1to8} \ __asm vfmadd231pd zmm14, zmm31, 14*8[r15]{1to8} \ __asm vfmadd231pd zmm15, zmm31, 15*8[r15]{1to8} \ \ __asm vprefetch1 2*64[r15 + r14] \ __asm vfmadd231pd zmm16, zmm31, 16*8[r15]{1to8} \ __asm vfmadd231pd zmm17, zmm31, 17*8[r15]{1to8} \ __asm vfmadd231pd zmm18, zmm31, 18*8[r15]{1to8} \ __asm vfmadd231pd zmm19, zmm31, 19*8[r15]{1to8} \ __asm vfmadd231pd zmm20, zmm31, 20*8[r15]{1to8} \ \ __asm vprefetch1 3*64[r15 + r14] \ __asm vfmadd231pd zmm21, zmm31, 21*8[r15]{1to8} \ __asm add r15, r12 \ __asm vfmadd231pd zmm22, zmm31, -10*8[r15]{1to8}\ __asm vfmadd231pd zmm23, zmm31, -9*8[r15]{1to8} \ __asm add C_ADDR, r11 \ __asm vfmadd231pd zmm24, zmm31, -8*8[r15]{1to8} \ __asm dec r8 \ __asm vfmadd231pd zmm25, zmm31, -7*8[r15]{1to8} \ \ \ __asm vprefetch1 0[rbx + r13] \ __asm vfmadd231pd zmm26, zmm31, -6*8[r15]{1to8} \ __asm vprefetch0 B_L1_PREFETCH_DIST*8*8[rbx] \ __asm vfmadd231pd zmm27, zmm31, -5*8[r15]{1to8} \ __asm add rbx, r9 \ __asm vfmadd231pd zmm28, zmm31, -4*8[r15]{1to8} \ __asm cmp r8, 0 \ __asm vfmadd231pd zmm29, zmm31, -3*8[r15]{1to8} \ \ } //One iteration of the k_r loop. //Same as ONE_ITER_MAIN_LOOP, but additionally, we prefetch 3 cache lines of C into the L1 cache //Current placement of these prefetch instructions is somewhat arbitrary. #define ONE_ITER_PC_L1(C_ADDR) \ {\ __asm vbroadcastf64x4 zmm30, 0[r15] \ __asm vmovapd zmm31, 0[rbx] \ \ __asm vfmadd231pd zmm0, zmm31, zmm30{aaaa} \ __asm vfmadd231pd zmm4, zmm31, 4*8[r15]{1to8} \ __asm vprefetch0 A_L1_PREFETCH_DIST*256[r15] \ __asm vfmadd231pd zmm5, zmm31, 5*8[r15]{1to8} \ __asm vprefetch0 A_L1_PREFETCH_DIST*256+64[r15] \ __asm vfmadd231pd zmm6, zmm31, 6*8[r15]{1to8} \ __asm vprefetch0 A_L1_PREFETCH_DIST*256+128[r15]\ __asm vfmadd231pd zmm7, zmm31, 7*8[r15]{1to8} \ __asm vprefetch0 A_L1_PREFETCH_DIST*256+192[r15]\ __asm vfmadd231pd zmm8, zmm31, 8*8[r15]{1to8} \ \ __asm vprefetch1 0[r15 + r14] \ __asm vfmadd231pd zmm9, zmm31, 9*8[r15]{1to8} \ __asm vprefetch0 0[C_ADDR] \ __asm vfmadd231pd zmm1, zmm31, zmm30{bbbb} \ __asm add C_ADDR, r11 \ __asm vfmadd231pd zmm2, zmm31, zmm30{cccc} \ __asm vfmadd231pd zmm3, zmm31, zmm30{dddd} \ __asm vfmadd231pd zmm10, zmm31, 10*8[r15]{1to8} \ \ __asm vprefetch1 64[r15 + r14] \ __asm vfmadd231pd zmm11, zmm31, 11*8[r15]{1to8} \ __asm vprefetch0 0[C_ADDR] \ __asm vfmadd231pd zmm12, zmm31, 12*8[r15]{1to8} \ __asm add C_ADDR, r11 \ __asm vfmadd231pd zmm13, zmm31, 13*8[r15]{1to8} \ __asm vfmadd231pd zmm14, zmm31, 14*8[r15]{1to8} \ __asm vfmadd231pd zmm15, zmm31, 15*8[r15]{1to8} \ \ __asm vprefetch1 2*64[r15 + r14] \ __asm vfmadd231pd zmm16, zmm31, 16*8[r15]{1to8} \ __asm vprefetch0 0[C_ADDR] \ __asm vfmadd231pd zmm17, zmm31, 17*8[r15]{1to8} \ __asm add C_ADDR, r11 \ __asm vfmadd231pd zmm18, zmm31, 18*8[r15]{1to8} \ __asm vfmadd231pd zmm19, zmm31, 19*8[r15]{1to8} \ __asm vfmadd231pd zmm20, zmm31, 20*8[r15]{1to8} \ \ __asm vprefetch1 3*64[r15 + r14] \ __asm vfmadd231pd zmm21, zmm31, 21*8[r15]{1to8} \ __asm add r15, r12 \ __asm vfmadd231pd zmm22, zmm31, -10*8[r15]{1to8}\ __asm vfmadd231pd zmm23, zmm31, -9*8[r15]{1to8} \ __asm vfmadd231pd zmm24, zmm31, -8*8[r15]{1to8} \ __asm dec r8 \ __asm vfmadd231pd zmm25, zmm31, -7*8[r15]{1to8} \ \ \ __asm vprefetch1 0[rbx + r13] \ __asm vfmadd231pd zmm26, zmm31, -6*8[r15]{1to8} \ __asm vprefetch0 B_L1_PREFETCH_DIST*8*8[rbx] \ __asm vfmadd231pd zmm27, zmm31, -5*8[r15]{1to8} \ __asm add rbx, r9 \ __asm vfmadd231pd zmm28, zmm31, -4*8[r15]{1to8} \ __asm cmp r8, 0 \ __asm vfmadd231pd zmm29, zmm31, -3*8[r15]{1to8} \ \ } //This is an array used for the scattter/gather instructions. extern int offsets[16]; //#define MONITORS //#define LOOPMON void bli_dgemm_knc_asm_30x8 ( dim_t k, double* restrict alpha, double* restrict a, double* restrict b, double* restrict beta, double* restrict c, inc_t rs_c, inc_t cs_c, auxinfo_t* restrict data, cntx_t* restrict cntx ) { double * a_next = bli_auxinfo_next_a( data ); double * b_next = bli_auxinfo_next_b( data ); int * offsetPtr = &offsets[0]; uint64_t k64 = k; #ifdef MONITORS int toph, topl, both, botl, midl, midh, mid2l, mid2h; #endif #ifdef LOOPMON int tlooph, tloopl, blooph, bloopl; #endif __asm { #ifdef MONITORS rdtsc mov topl, eax mov toph, edx #endif vpxord zmm0, zmm0, zmm0 vmovaps zmm1, zmm0 //clear out registers vmovaps zmm2, zmm0 mov rsi, k64 //loop index vmovaps zmm3, zmm0 mov r11, rs_c //load row stride vmovaps zmm4, zmm0 sal r11, 3 //scale row stride vmovaps zmm5, zmm0 mov r15, a //load address of a vmovaps zmm6, zmm0 mov rbx, b //load address of b vmovaps zmm7, zmm0 vmovaps zmm8, zmm0 lea r10, [r11 + 2*r11 + 0] //r10 has 3 * r11 vmovaps zmm9, zmm0 vmovaps zmm10, zmm0 mov rdi, r11 vmovaps zmm11, zmm0 sal rdi, 2 //rdi has 4*r11 vmovaps zmm12, zmm0 mov rcx, c //load address of c for prefetching vmovaps zmm13, zmm0 vmovaps zmm14, zmm0 mov r8, k64 vmovaps zmm15, zmm0 vmovaps zmm16, zmm0 vmovaps zmm17, zmm0 mov r13, L2_PREFETCH_DIST*8*8 vmovaps zmm18, zmm0 mov r14, L2_PREFETCH_DIST*8*32 vmovaps zmm19, zmm0 vmovaps zmm20, zmm0 vmovaps zmm21, zmm0 vmovaps zmm22, zmm0 vmovaps zmm23, zmm0 sub r8, 30 + L2_PREFETCH_DIST //Check if we have over 40 operations to do. vmovaps zmm24, zmm0 mov r8, 30 vmovaps zmm25, zmm0 mov r9, 8*8 //amount to increment b* by each iteration vmovaps zmm26, zmm0 mov r12, 32*8 //amount to increment a* by each iteration vmovaps zmm27, zmm0 vmovaps zmm28, zmm0 vmovaps zmm29, zmm0 #ifdef MONITORS rdtsc mov midl, eax mov midh, edx #endif jle CONSIDER_UNDER_40 sub rsi, 30 + L2_PREFETCH_DIST //First 30 iterations LOOPREFECHCL2: ONE_ITER_PC_L2(rcx) jne LOOPREFECHCL2 mov rcx, c //Main Loop. LOOPMAIN: ONE_ITER_MAIN_LOOP(rcx, rsi) jne LOOPMAIN //Penultimate 22 iterations. //Break these off from the main loop to avoid prefetching extra shit. mov r14, a_next mov r13, b_next sub r14, r15 sub r13, rbx mov rsi, L2_PREFETCH_DIST-10 LOOPMAIN2: ONE_ITER_MAIN_LOOP(rcx, rsi) jne LOOPMAIN2 //Last 10 iterations mov r8, 10 LOOPREFETCHCL1: ONE_ITER_PC_L1(rcx) jne LOOPREFETCHCL1 jmp POSTACCUM //Alternate main loop, with no prefetching of C //Used when <= 40 iterations CONSIDER_UNDER_40: mov rsi, k64 test rsi, rsi je POSTACCUM LOOP_UNDER_40: ONE_ITER_MAIN_LOOP(rcx, rsi) jne LOOP_UNDER_40 POSTACCUM: #ifdef MONITORS rdtsc mov mid2l, eax mov mid2h, edx #endif mov r9, c //load address of c for update mov r12, alpha //load address of alpha // Check if C is row stride. If not, jump to the slow scattered update mov r14, cs_c dec r14 jne SCATTEREDUPDATE mov r14, beta vbroadcastsd zmm31, 0[r14] vmulpd zmm0, zmm0, 0[r12]{1to8} vmulpd zmm1, zmm1, 0[r12]{1to8} vmulpd zmm2, zmm2, 0[r12]{1to8} vmulpd zmm3, zmm3, 0[r12]{1to8} vfmadd231pd zmm0, zmm31, [r9+0] vfmadd231pd zmm1, zmm31, [r9+r11+0] vfmadd231pd zmm2, zmm31, [r9+2*r11+0] vfmadd231pd zmm3, zmm31, [r9+r10+0] vmovapd [r9+0], zmm0 vmovapd [r9+r11+0], zmm1 vmovapd [r9+2*r11+0], zmm2 vmovapd [r9+r10+0], zmm3 add r9, rdi vmulpd zmm4, zmm4, 0[r12]{1to8} vmulpd zmm5, zmm5, 0[r12]{1to8} vmulpd zmm6, zmm6, 0[r12]{1to8} vmulpd zmm7, zmm7, 0[r12]{1to8} vfmadd231pd zmm4, zmm31, [r9+0] vfmadd231pd zmm5, zmm31, [r9+r11+0] vfmadd231pd zmm6, zmm31, [r9+2*r11+0] vfmadd231pd zmm7, zmm31, [r9+r10+0] vmovapd [r9+0], zmm4 vmovapd [r9+r11+0], zmm5 vmovapd [r9+2*r11+0], zmm6 vmovapd [r9+r10+0], zmm7 add r9, rdi vmulpd zmm8, zmm8, 0[r12]{1to8} vmulpd zmm9, zmm9, 0[r12]{1to8} vmulpd zmm10, zmm10, 0[r12]{1to8} vmulpd zmm11, zmm11, 0[r12]{1to8} vfmadd231pd zmm8, zmm31, [r9+0] vfmadd231pd zmm9, zmm31, [r9+r11+0] vfmadd231pd zmm10, zmm31, [r9+2*r11+0] vfmadd231pd zmm11, zmm31, [r9+r10+0] vmovapd [r9+0], zmm8 vmovapd [r9+r11+0], zmm9 vmovapd [r9+2*r11+0], zmm10 vmovapd [r9+r10+0], zmm11 add r9, rdi vmulpd zmm12, zmm12, 0[r12]{1to8} vmulpd zmm13, zmm13, 0[r12]{1to8} vmulpd zmm14, zmm14, 0[r12]{1to8} vmulpd zmm15, zmm15, 0[r12]{1to8} vfmadd231pd zmm12, zmm31, [r9+0] vfmadd231pd zmm13, zmm31, [r9+r11+0] vfmadd231pd zmm14, zmm31, [r9+2*r11+0] vfmadd231pd zmm15, zmm31, [r9+r10+0] vmovapd [r9+0], zmm12 vmovapd [r9+r11+0], zmm13 vmovapd [r9+2*r11+0], zmm14 vmovapd [r9+r10+0], zmm15 add r9, rdi vmulpd zmm16, zmm16, 0[r12]{1to8} vmulpd zmm17, zmm17, 0[r12]{1to8} vmulpd zmm18, zmm18, 0[r12]{1to8} vmulpd zmm19, zmm19, 0[r12]{1to8} vfmadd231pd zmm16, zmm31, [r9+0] vfmadd231pd zmm17, zmm31, [r9+r11+0] vfmadd231pd zmm18, zmm31, [r9+2*r11+0] vfmadd231pd zmm19, zmm31, [r9+r10+0] vmovapd [r9+0], zmm16 vmovapd [r9+r11+0], zmm17 vmovapd [r9+2*r11+0], zmm18 vmovapd [r9+r10+0], zmm19 add r9, rdi vmulpd zmm20, zmm20, 0[r12]{1to8} vmulpd zmm21, zmm21, 0[r12]{1to8} vmulpd zmm22, zmm22, 0[r12]{1to8} vmulpd zmm23, zmm23, 0[r12]{1to8} vfmadd231pd zmm20, zmm31, [r9+0] vfmadd231pd zmm21, zmm31, [r9+r11+0] vfmadd231pd zmm22, zmm31, [r9+2*r11+0] vfmadd231pd zmm23, zmm31, [r9+r10+0] vmovapd [r9+0], zmm20 vmovapd [r9+r11+0], zmm21 vmovapd [r9+2*r11+0], zmm22 vmovapd [r9+r10+0], zmm23 add r9, rdi vmulpd zmm24, zmm24, 0[r12]{1to8} vmulpd zmm25, zmm25, 0[r12]{1to8} vmulpd zmm26, zmm26, 0[r12]{1to8} vmulpd zmm27, zmm27, 0[r12]{1to8} vfmadd231pd zmm24, zmm31, [r9+0] vfmadd231pd zmm25, zmm31, [r9+r11+0] vfmadd231pd zmm26, zmm31, [r9+2*r11+0] vfmadd231pd zmm27, zmm31, [r9+r10+0] vmovapd [r9+0], zmm24 vmovapd [r9+r11+0], zmm25 vmovapd [r9+2*r11+0], zmm26 vmovapd [r9+r10+0], zmm27 add r9, rdi vmulpd zmm28, zmm28, 0[r12]{1to8} vmulpd zmm29, zmm29, 0[r12]{1to8} vfmadd231pd zmm28, zmm31, [r9+0] vfmadd231pd zmm29, zmm31, [r9+r11+0] vmovapd [r9+0], zmm28 vmovapd [r9+r11+0], zmm29 jmp END SCATTEREDUPDATE: mov r10, offsetPtr vmovapd zmm31, 0[r10] vpbroadcastd zmm30, cs_c mov r13, beta vpmulld zmm30, zmm31, zmm30 mov ebx, 255 UPDATE_C_ROW_SCATTERED(zmm0, 0, r9) UPDATE_C_ROW_SCATTERED(zmm1, 1, r9) UPDATE_C_ROW_SCATTERED(zmm2, 2, r9) UPDATE_C_ROW_SCATTERED(zmm3, 3, r9) UPDATE_C_ROW_SCATTERED(zmm4, 4, r9) UPDATE_C_ROW_SCATTERED(zmm5, 5, r9) UPDATE_C_ROW_SCATTERED(zmm6, 6, r9) UPDATE_C_ROW_SCATTERED(zmm7, 7, r9) UPDATE_C_ROW_SCATTERED(zmm8, 8, r9) UPDATE_C_ROW_SCATTERED(zmm9, 9, r9) UPDATE_C_ROW_SCATTERED(zmm10, 10, r9) UPDATE_C_ROW_SCATTERED(zmm11, 11, r9) UPDATE_C_ROW_SCATTERED(zmm12, 12, r9) UPDATE_C_ROW_SCATTERED(zmm13, 13, r9) UPDATE_C_ROW_SCATTERED(zmm14, 14, r9) UPDATE_C_ROW_SCATTERED(zmm15, 15, r9) UPDATE_C_ROW_SCATTERED(zmm16, 16, r9) UPDATE_C_ROW_SCATTERED(zmm17, 17, r9) UPDATE_C_ROW_SCATTERED(zmm18, 18, r9) UPDATE_C_ROW_SCATTERED(zmm19, 19, r9) UPDATE_C_ROW_SCATTERED(zmm20, 20, r9) UPDATE_C_ROW_SCATTERED(zmm21, 21, r9) UPDATE_C_ROW_SCATTERED(zmm22, 22, r9) UPDATE_C_ROW_SCATTERED(zmm23, 23, r9) UPDATE_C_ROW_SCATTERED(zmm24, 24, r9) UPDATE_C_ROW_SCATTERED(zmm25, 25, r9) UPDATE_C_ROW_SCATTERED(zmm26, 26, r9) UPDATE_C_ROW_SCATTERED(zmm27, 27, r9) UPDATE_C_ROW_SCATTERED(zmm28, 28, r9) UPDATE_C_ROW_SCATTERED(zmm29, 29, r9) END: #ifdef MONITORS rdtsc mov botl, eax mov both, edx #endif } #ifdef LOOPMON printf("looptime = \t%d\n", bloopl - tloopl); #endif #ifdef MONITORS dim_t top = ((dim_t)toph << 32) | topl; dim_t mid = ((dim_t)midh << 32) | midl; dim_t mid2 = ((dim_t)mid2h << 32) | mid2l; dim_t bot = ((dim_t)both << 32) | botl; printf("setup =\t%u\tmain loop =\t%u\tcleanup=\t%u\ttotal=\t%u\n", mid - top, mid2 - mid, bot - mid2, bot - top); #endif } blis-0.6.1/kernels/knc/3/bli_sgemm_knc_asm_30x16.c000066400000000000000000000547201360743507500215070ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include #define A_L1_PREFETCH_DIST 4 #define B_L1_PREFETCH_DIST 2 #define L2_PREFETCH_DIST 16 // Must be greater than 10, because of the way the loop is constructed. //Alternate code path uused if C is not row-major #define UPDATE_C_ROW_SCATTERED(REG1, NUM, BASE_DEST) \ { \ __asm kmov k3, ebx \ __asm GATHER##NUM: \ __asm vgatherdps zmm31{k3}, [BASE_DEST + zmm30 * 4] \ __asm jknzd k3, GATHER##NUM \ \ __asm vmulps REG1, REG1, 0[r12]{1to16} /*scale by alpha*/ \ __asm vfmadd132ps zmm31, REG1, 0[r13]{1to16} /*scale by beta, add in result*/\ __asm kmov k3, ebx \ \ __asm SCATTER##NUM: \ __asm vscatterdps [BASE_DEST + zmm30 * 4]{k3}, zmm31 \ __asm jknzd k3, SCATTER##NUM \ __asm add BASE_DEST, r11 \ } //One iteration of the k_r loop. //Each iteration, we prefetch A into L1 and into L2 #define ONE_ITER_MAIN_LOOP(C_ADDR, COUNTER) \ {\ __asm vbroadcastf32x4 zmm30, 0[r15] \ __asm vmovaps zmm31, 0[rbx] \ \ __asm vfmadd231ps zmm0, zmm31, zmm30{aaaa} \ __asm vfmadd231ps zmm4, zmm31, 4*4[r15]{1to16} \ __asm vprefetch0 A_L1_PREFETCH_DIST*256[r15] \ __asm vfmadd231ps zmm5, zmm31, 5*4[r15]{1to16} \ __asm vprefetch0 A_L1_PREFETCH_DIST*256+64[r15] \ __asm vfmadd231ps zmm6, zmm31, 6*4[r15]{1to16} \ __asm vprefetch0 A_L1_PREFETCH_DIST*256+128[r15]\ __asm vfmadd231ps zmm7, zmm31, 7*4[r15]{1to16} \ __asm vprefetch0 A_L1_PREFETCH_DIST*256+192[r15]\ __asm vfmadd231ps zmm8, zmm31, 8*4[r15]{1to16} \ \ __asm vprefetch1 0[r15 + r14] \ __asm vfmadd231ps zmm9, zmm31, 9*4[r15]{1to16} \ __asm vfmadd231ps zmm1, zmm31, zmm30{bbbb} \ __asm vfmadd231ps zmm2, zmm31, zmm30{cccc} \ __asm vfmadd231ps zmm3, zmm31, zmm30{dddd} \ __asm vfmadd231ps zmm10, zmm31, 10*4[r15]{1to16} \ \ __asm vprefetch1 64[r15 + r14] \ __asm vfmadd231ps zmm11, zmm31, 11*4[r15]{1to16} \ __asm vfmadd231ps zmm12, zmm31, 12*4[r15]{1to16} \ __asm vfmadd231ps zmm13, zmm31, 13*4[r15]{1to16} \ __asm vfmadd231ps zmm14, zmm31, 14*4[r15]{1to16} \ __asm vfmadd231ps zmm15, zmm31, 15*4[r15]{1to16} \ \ __asm vprefetch1 2*64[r15 + r14] \ __asm vfmadd231ps zmm16, zmm31, 16*4[r15]{1to16} \ __asm vfmadd231ps zmm17, zmm31, 17*4[r15]{1to16} \ __asm vfmadd231ps zmm18, zmm31, 18*4[r15]{1to16} \ __asm vfmadd231ps zmm19, zmm31, 19*4[r15]{1to16} \ __asm vfmadd231ps zmm20, zmm31, 20*4[r15]{1to16} \ \ __asm vprefetch1 3*64[r15 + r14] \ __asm vfmadd231ps zmm21, zmm31, 21*4[r15]{1to16} \ __asm add r15, r12 \ __asm vfmadd231ps zmm22, zmm31, -10*4[r15]{1to16}\ __asm vfmadd231ps zmm23, zmm31, -9*4[r15]{1to16} \ __asm vfmadd231ps zmm24, zmm31, -8*4[r15]{1to16} \ __asm dec COUNTER \ __asm vfmadd231ps zmm25, zmm31, -7*4[r15]{1to16} \ \ \ __asm vprefetch1 0[rbx + r13] \ __asm vfmadd231ps zmm26, zmm31, -6*4[r15]{1to16} \ __asm vprefetch0 B_L1_PREFETCH_DIST*16*4[rbx] \ __asm vfmadd231ps zmm27, zmm31, -5*4[r15]{1to16} \ __asm add rbx, r9 \ __asm vfmadd231ps zmm28, zmm31, -4*4[r15]{1to16} \ __asm cmp COUNTER, 0 \ __asm vfmadd231ps zmm29, zmm31, -3*4[r15]{1to16} \ } //One iteration of the k_r loop. //Same as ONE_ITER_MAIN_LOOP, but additionally, we prefetch one line of C into the L2 cache //Current placement of this prefetch instruction is somewhat arbitrary. #define ONE_ITER_PC_L2(C_ADDR) \ {\ __asm vbroadcastf32x4 zmm30, 0[r15] \ __asm vmovaps zmm31, 0[rbx] \ \ __asm vfmadd231ps zmm0, zmm31, zmm30{aaaa} \ __asm vfmadd231ps zmm4, zmm31, 4*4[r15]{1to16} \ __asm vprefetch0 A_L1_PREFETCH_DIST*256[r15] \ __asm vfmadd231ps zmm5, zmm31, 5*4[r15]{1to16} \ __asm vprefetch0 A_L1_PREFETCH_DIST*256+64[r15] \ __asm vfmadd231ps zmm6, zmm31, 6*4[r15]{1to16} \ __asm vprefetch0 A_L1_PREFETCH_DIST*256+128[r15]\ __asm vfmadd231ps zmm7, zmm31, 7*4[r15]{1to16} \ __asm vprefetch0 A_L1_PREFETCH_DIST*256+192[r15]\ __asm vfmadd231ps zmm8, zmm31, 8*4[r15]{1to16} \ \ __asm vprefetch1 0[r15 + r14] \ __asm vfmadd231ps zmm9, zmm31, 9*4[r15]{1to16} \ __asm vfmadd231ps zmm1, zmm31, zmm30{bbbb} \ __asm vfmadd231ps zmm2, zmm31, zmm30{cccc} \ __asm vfmadd231ps zmm3, zmm31, zmm30{dddd} \ __asm vfmadd231ps zmm10, zmm31, 10*4[r15]{1to16} \ \ __asm vprefetch1 64[r15 + r14] \ __asm vfmadd231ps zmm11, zmm31, 11*4[r15]{1to16} \ __asm vprefetch1 0[C_ADDR] \ __asm vfmadd231ps zmm12, zmm31, 12*4[r15]{1to16} \ __asm vfmadd231ps zmm13, zmm31, 13*4[r15]{1to16} \ __asm vfmadd231ps zmm14, zmm31, 14*4[r15]{1to16} \ __asm vfmadd231ps zmm15, zmm31, 15*4[r15]{1to16} \ \ __asm vprefetch1 2*64[r15 + r14] \ __asm vfmadd231ps zmm16, zmm31, 16*4[r15]{1to16} \ __asm vfmadd231ps zmm17, zmm31, 17*4[r15]{1to16} \ __asm vfmadd231ps zmm18, zmm31, 18*4[r15]{1to16} \ __asm vfmadd231ps zmm19, zmm31, 19*4[r15]{1to16} \ __asm vfmadd231ps zmm20, zmm31, 20*4[r15]{1to16} \ \ __asm vprefetch1 3*64[r15 + r14] \ __asm vfmadd231ps zmm21, zmm31, 21*4[r15]{1to16} \ __asm add r15, r12 \ __asm vfmadd231ps zmm22, zmm31, -10*4[r15]{1to16}\ __asm vfmadd231ps zmm23, zmm31, -9*4[r15]{1to16} \ __asm add C_ADDR, r11 \ __asm vfmadd231ps zmm24, zmm31, -8*4[r15]{1to16} \ __asm dec r8 \ __asm vfmadd231ps zmm25, zmm31, -7*4[r15]{1to16} \ \ \ __asm vprefetch1 0[rbx + r13] \ __asm vfmadd231ps zmm26, zmm31, -6*4[r15]{1to16} \ __asm vprefetch0 B_L1_PREFETCH_DIST*16*4[rbx] \ __asm vfmadd231ps zmm27, zmm31, -5*4[r15]{1to16} \ __asm add rbx, r9 \ __asm vfmadd231ps zmm28, zmm31, -4*4[r15]{1to16} \ __asm cmp r8, 0 \ __asm vfmadd231ps zmm29, zmm31, -3*4[r15]{1to16} \ \ } //One iteration of the k_r loop. //Same as ONE_ITER_MAIN_LOOP, but additionally, we prefetch 3 cache lines of C into the L1 cache //Current placement of these prefetch instructions is somewhat arbitrary. #define ONE_ITER_PC_L1(C_ADDR) \ {\ __asm vbroadcastf32x4 zmm30, 0[r15] \ __asm vmovaps zmm31, 0[rbx] \ \ __asm vfmadd231ps zmm0, zmm31, zmm30{aaaa} \ __asm vfmadd231ps zmm4, zmm31, 4*4[r15]{1to16} \ __asm vprefetch0 A_L1_PREFETCH_DIST*256[r15] \ __asm vfmadd231ps zmm5, zmm31, 5*4[r15]{1to16} \ __asm vprefetch0 A_L1_PREFETCH_DIST*256+64[r15] \ __asm vfmadd231ps zmm6, zmm31, 6*4[r15]{1to16} \ __asm vprefetch0 A_L1_PREFETCH_DIST*256+128[r15]\ __asm vfmadd231ps zmm7, zmm31, 7*4[r15]{1to16} \ __asm vprefetch0 A_L1_PREFETCH_DIST*256+192[r15]\ __asm vfmadd231ps zmm8, zmm31, 8*4[r15]{1to16} \ \ __asm vprefetch1 0[r15 + r14] \ __asm vfmadd231ps zmm9, zmm31, 9*4[r15]{1to16} \ __asm vprefetch0 0[C_ADDR] \ __asm vfmadd231ps zmm1, zmm31, zmm30{bbbb} \ __asm add C_ADDR, r11 \ __asm vfmadd231ps zmm2, zmm31, zmm30{cccc} \ __asm vfmadd231ps zmm3, zmm31, zmm30{dddd} \ __asm vfmadd231ps zmm10, zmm31, 10*4[r15]{1to16} \ \ __asm vprefetch1 64[r15 + r14] \ __asm vfmadd231ps zmm11, zmm31, 11*4[r15]{1to16} \ __asm vprefetch0 0[C_ADDR] \ __asm vfmadd231ps zmm12, zmm31, 12*4[r15]{1to16} \ __asm add C_ADDR, r11 \ __asm vfmadd231ps zmm13, zmm31, 13*4[r15]{1to16} \ __asm vfmadd231ps zmm14, zmm31, 14*4[r15]{1to16} \ __asm vfmadd231ps zmm15, zmm31, 15*4[r15]{1to16} \ \ __asm vprefetch1 2*64[r15 + r14] \ __asm vfmadd231ps zmm16, zmm31, 16*4[r15]{1to16} \ __asm vprefetch0 0[C_ADDR] \ __asm vfmadd231ps zmm17, zmm31, 17*4[r15]{1to16} \ __asm add C_ADDR, r11 \ __asm vfmadd231ps zmm18, zmm31, 18*4[r15]{1to16} \ __asm vfmadd231ps zmm19, zmm31, 19*4[r15]{1to16} \ __asm vfmadd231ps zmm20, zmm31, 20*4[r15]{1to16} \ \ __asm vprefetch1 3*64[r15 + r14] \ __asm vfmadd231ps zmm21, zmm31, 21*4[r15]{1to16} \ __asm add r15, r12 \ __asm vfmadd231ps zmm22, zmm31, -10*4[r15]{1to16}\ __asm vfmadd231ps zmm23, zmm31, -9*4[r15]{1to16} \ __asm vfmadd231ps zmm24, zmm31, -8*4[r15]{1to16} \ __asm dec r8 \ __asm vfmadd231ps zmm25, zmm31, -7*4[r15]{1to16} \ \ \ __asm vprefetch1 0[rbx + r13] \ __asm vfmadd231ps zmm26, zmm31, -6*4[r15]{1to16} \ __asm vprefetch0 B_L1_PREFETCH_DIST*16*4[rbx] \ __asm vfmadd231ps zmm27, zmm31, -5*4[r15]{1to16} \ __asm add rbx, r9 \ __asm vfmadd231ps zmm28, zmm31, -4*4[r15]{1to16} \ __asm cmp r8, 0 \ __asm vfmadd231ps zmm29, zmm31, -3*4[r15]{1to16} \ \ } //This is an array used for the scattter/gather instructions. int offsets[16] __attribute__((aligned(0x1000))) = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; //#define MONITORS //#define LOOPMON void bli_sgemm_knc_asm_30x16 ( dim_t k, float* restrict alpha, float* restrict a, float* restrict b, float* restrict beta, float* restrict c, inc_t rs_c, inc_t cs_c, auxinfo_t* restrict data, cntx_t* restrict cntx ) { float * a_next = bli_auxinfo_next_a( data ); float * b_next = bli_auxinfo_next_b( data ); int * offsetPtr = &offsets[0]; uint64_t k64 = k; #ifdef MONITORS int toph, topl, both, botl, midl, midh, mid2l, mid2h; #endif #ifdef LOOPMON int tlooph, tloopl, blooph, bloopl; #endif __asm { #ifdef MONITORS rdtsc mov topl, eax mov toph, edx #endif vpxord zmm0, zmm0, zmm0 vmovaps zmm1, zmm0 //clear out registers vmovaps zmm2, zmm0 mov rsi, k64 //loop index vmovaps zmm3, zmm0 mov r11, rs_c //load row stride vmovaps zmm4, zmm0 sal r11, 2 //scale row stride vmovaps zmm5, zmm0 mov r15, a //load address of a vmovaps zmm6, zmm0 mov rbx, b //load address of b vmovaps zmm7, zmm0 vmovaps zmm8, zmm0 lea r10, [r11 + 2*r11 + 0] //r10 has 3 * r11 vmovaps zmm9, zmm0 vmovaps zmm10, zmm0 mov rdi, r11 vmovaps zmm11, zmm0 sal rdi, 2 //rdi has 4*r11 vmovaps zmm12, zmm0 mov rcx, c //load address of c for prefetching vmovaps zmm13, zmm0 vmovaps zmm14, zmm0 mov r8, k64 vmovaps zmm15, zmm0 vmovaps zmm16, zmm0 vmovaps zmm17, zmm0 mov r13, L2_PREFETCH_DIST*4*16 vmovaps zmm18, zmm0 mov r14, L2_PREFETCH_DIST*4*32 vmovaps zmm19, zmm0 vmovaps zmm20, zmm0 vmovaps zmm21, zmm0 vmovaps zmm22, zmm0 vmovaps zmm23, zmm0 sub r8, 30 + L2_PREFETCH_DIST //Check if we have over 40 operations to do. vmovaps zmm24, zmm0 mov r8, 30 vmovaps zmm25, zmm0 mov r9, 16*4 //amount to increment b* by each iteration vmovaps zmm26, zmm0 mov r12, 32*4 //amount to increment a* by each iteration vmovaps zmm27, zmm0 vmovaps zmm28, zmm0 vmovaps zmm29, zmm0 #ifdef MONITORS rdtsc mov midl, eax mov midh, edx #endif jle CONSIDER_UNDER_40 sub rsi, 30 + L2_PREFETCH_DIST //First 30 iterations LOOPREFECHCL2: ONE_ITER_PC_L2(rcx) jne LOOPREFECHCL2 mov rcx, c //Main Loop. LOOPMAIN: ONE_ITER_MAIN_LOOP(rcx, rsi) jne LOOPMAIN //Penultimate 22 iterations. //Break these off from the main loop to avoid prefetching extra shit. mov r14, a_next mov r13, b_next sub r14, r15 sub r13, rbx mov rsi, L2_PREFETCH_DIST-10 LOOPMAIN2: ONE_ITER_MAIN_LOOP(rcx, rsi) jne LOOPMAIN2 //Last 10 iterations mov r8, 10 LOOPREFETCHCL1: ONE_ITER_PC_L1(rcx) jne LOOPREFETCHCL1 jmp POSTACCUM //Alternate main loop, with no prefetching of C //Used when <= 40 iterations CONSIDER_UNDER_40: mov rsi, k64 test rsi, rsi je POSTACCUM LOOP_UNDER_40: ONE_ITER_MAIN_LOOP(rcx, rsi) jne LOOP_UNDER_40 POSTACCUM: #ifdef MONITORS rdtsc mov mid2l, eax mov mid2h, edx #endif mov r9, c //load address of c for update mov r12, alpha //load address of alpha // Check if C is row stride. If not, jump to the slow scattered update mov r14, cs_c dec r14 jne SCATTEREDUPDATE mov r14, beta vbroadcastss zmm31, 0[r14] vmulps zmm0, zmm0, 0[r12]{1to16} vmulps zmm1, zmm1, 0[r12]{1to16} vmulps zmm2, zmm2, 0[r12]{1to16} vmulps zmm3, zmm3, 0[r12]{1to16} vfmadd231ps zmm0, zmm31, [r9+0] vfmadd231ps zmm1, zmm31, [r9+r11+0] vfmadd231ps zmm2, zmm31, [r9+2*r11+0] vfmadd231ps zmm3, zmm31, [r9+r10+0] vmovaps [r9+0], zmm0 vmovaps [r9+r11+0], zmm1 vmovaps [r9+2*r11+0], zmm2 vmovaps [r9+r10+0], zmm3 add r9, rdi vmulps zmm4, zmm4, 0[r12]{1to16} vmulps zmm5, zmm5, 0[r12]{1to16} vmulps zmm6, zmm6, 0[r12]{1to16} vmulps zmm7, zmm7, 0[r12]{1to16} vfmadd231ps zmm4, zmm31, [r9+0] vfmadd231ps zmm5, zmm31, [r9+r11+0] vfmadd231ps zmm6, zmm31, [r9+2*r11+0] vfmadd231ps zmm7, zmm31, [r9+r10+0] vmovaps [r9+0], zmm4 vmovaps [r9+r11+0], zmm5 vmovaps [r9+2*r11+0], zmm6 vmovaps [r9+r10+0], zmm7 add r9, rdi vmulps zmm8, zmm8, 0[r12]{1to16} vmulps zmm9, zmm9, 0[r12]{1to16} vmulps zmm10, zmm10, 0[r12]{1to16} vmulps zmm11, zmm11, 0[r12]{1to16} vfmadd231ps zmm8, zmm31, [r9+0] vfmadd231ps zmm9, zmm31, [r9+r11+0] vfmadd231ps zmm10, zmm31, [r9+2*r11+0] vfmadd231ps zmm11, zmm31, [r9+r10+0] vmovaps [r9+0], zmm8 vmovaps [r9+r11+0], zmm9 vmovaps [r9+2*r11+0], zmm10 vmovaps [r9+r10+0], zmm11 add r9, rdi vmulps zmm12, zmm12, 0[r12]{1to16} vmulps zmm13, zmm13, 0[r12]{1to16} vmulps zmm14, zmm14, 0[r12]{1to16} vmulps zmm15, zmm15, 0[r12]{1to16} vfmadd231ps zmm12, zmm31, [r9+0] vfmadd231ps zmm13, zmm31, [r9+r11+0] vfmadd231ps zmm14, zmm31, [r9+2*r11+0] vfmadd231ps zmm15, zmm31, [r9+r10+0] vmovaps [r9+0], zmm12 vmovaps [r9+r11+0], zmm13 vmovaps [r9+2*r11+0], zmm14 vmovaps [r9+r10+0], zmm15 add r9, rdi vmulps zmm16, zmm16, 0[r12]{1to16} vmulps zmm17, zmm17, 0[r12]{1to16} vmulps zmm18, zmm18, 0[r12]{1to16} vmulps zmm19, zmm19, 0[r12]{1to16} vfmadd231ps zmm16, zmm31, [r9+0] vfmadd231ps zmm17, zmm31, [r9+r11+0] vfmadd231ps zmm18, zmm31, [r9+2*r11+0] vfmadd231ps zmm19, zmm31, [r9+r10+0] vmovaps [r9+0], zmm16 vmovaps [r9+r11+0], zmm17 vmovaps [r9+2*r11+0], zmm18 vmovaps [r9+r10+0], zmm19 add r9, rdi vmulps zmm20, zmm20, 0[r12]{1to16} vmulps zmm21, zmm21, 0[r12]{1to16} vmulps zmm22, zmm22, 0[r12]{1to16} vmulps zmm23, zmm23, 0[r12]{1to16} vfmadd231ps zmm20, zmm31, [r9+0] vfmadd231ps zmm21, zmm31, [r9+r11+0] vfmadd231ps zmm22, zmm31, [r9+2*r11+0] vfmadd231ps zmm23, zmm31, [r9+r10+0] vmovaps [r9+0], zmm20 vmovaps [r9+r11+0], zmm21 vmovaps [r9+2*r11+0], zmm22 vmovaps [r9+r10+0], zmm23 add r9, rdi vmulps zmm24, zmm24, 0[r12]{1to16} vmulps zmm25, zmm25, 0[r12]{1to16} vmulps zmm26, zmm26, 0[r12]{1to16} vmulps zmm27, zmm27, 0[r12]{1to16} vfmadd231ps zmm24, zmm31, [r9+0] vfmadd231ps zmm25, zmm31, [r9+r11+0] vfmadd231ps zmm26, zmm31, [r9+2*r11+0] vfmadd231ps zmm27, zmm31, [r9+r10+0] vmovaps [r9+0], zmm24 vmovaps [r9+r11+0], zmm25 vmovaps [r9+2*r11+0], zmm26 vmovaps [r9+r10+0], zmm27 add r9, rdi vmulps zmm28, zmm28, 0[r12]{1to16} vmulps zmm29, zmm29, 0[r12]{1to16} vfmadd231ps zmm28, zmm31, [r9+0] vfmadd231ps zmm29, zmm31, [r9+r11+0] vmovaps [r9+0], zmm28 vmovaps [r9+r11+0], zmm29 jmp END SCATTEREDUPDATE: mov r10, offsetPtr vmovaps zmm31, 0[r10] vpbroadcastd zmm30, cs_c mov r13, beta vpmulld zmm30, zmm31, zmm30 mov ebx, 0xFFFF UPDATE_C_ROW_SCATTERED(zmm0, 0, r9) UPDATE_C_ROW_SCATTERED(zmm1, 1, r9) UPDATE_C_ROW_SCATTERED(zmm2, 2, r9) UPDATE_C_ROW_SCATTERED(zmm3, 3, r9) UPDATE_C_ROW_SCATTERED(zmm4, 4, r9) UPDATE_C_ROW_SCATTERED(zmm5, 5, r9) UPDATE_C_ROW_SCATTERED(zmm6, 6, r9) UPDATE_C_ROW_SCATTERED(zmm7, 7, r9) UPDATE_C_ROW_SCATTERED(zmm8, 8, r9) UPDATE_C_ROW_SCATTERED(zmm9, 9, r9) UPDATE_C_ROW_SCATTERED(zmm10, 10, r9) UPDATE_C_ROW_SCATTERED(zmm11, 11, r9) UPDATE_C_ROW_SCATTERED(zmm12, 12, r9) UPDATE_C_ROW_SCATTERED(zmm13, 13, r9) UPDATE_C_ROW_SCATTERED(zmm14, 14, r9) UPDATE_C_ROW_SCATTERED(zmm15, 15, r9) UPDATE_C_ROW_SCATTERED(zmm16, 16, r9) UPDATE_C_ROW_SCATTERED(zmm17, 17, r9) UPDATE_C_ROW_SCATTERED(zmm18, 18, r9) UPDATE_C_ROW_SCATTERED(zmm19, 19, r9) UPDATE_C_ROW_SCATTERED(zmm20, 20, r9) UPDATE_C_ROW_SCATTERED(zmm21, 21, r9) UPDATE_C_ROW_SCATTERED(zmm22, 22, r9) UPDATE_C_ROW_SCATTERED(zmm23, 23, r9) UPDATE_C_ROW_SCATTERED(zmm24, 24, r9) UPDATE_C_ROW_SCATTERED(zmm25, 25, r9) UPDATE_C_ROW_SCATTERED(zmm26, 26, r9) UPDATE_C_ROW_SCATTERED(zmm27, 27, r9) UPDATE_C_ROW_SCATTERED(zmm28, 28, r9) UPDATE_C_ROW_SCATTERED(zmm29, 29, r9) END: #ifdef MONITORS rdtsc mov botl, eax mov both, edx #endif } #ifdef LOOPMON printf("looptime = \t%d\n", bloopl - tloopl); #endif #ifdef MONITORS dim_t top = ((dim_t)toph << 32) | topl; dim_t mid = ((dim_t)midh << 32) | midl; dim_t mid2 = ((dim_t)mid2h << 32) | mid2l; dim_t bot = ((dim_t)both << 32) | botl; printf("setup =\t%u\tmain loop =\t%u\tcleanup=\t%u\ttotal=\t%u\n", mid - top, mid2 - mid, bot - mid2, bot - top); #endif } blis-0.6.1/kernels/knc/bli_kernels_knc.h000066400000000000000000000033531360743507500202000ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ GEMM_UKR_PROT( float, s, gemm_knc_asm_30x16 ) GEMM_UKR_PROT( double, d, gemm_knc_asm_30x8 ) blis-0.6.1/kernels/knl/000077500000000000000000000000001360743507500147105ustar00rootroot00000000000000blis-0.6.1/kernels/knl/1m/000077500000000000000000000000001360743507500152255ustar00rootroot00000000000000blis-0.6.1/kernels/knl/1m/bli_dpackm_knl_asm_24x8.c000066400000000000000000000463451360743507500217630ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define BLIS_ASM_SYNTAX_INTEL #include "bli_x86_asm_macros.h" #define LOADMUL8x8(a,o,s1,s3,s5,s7, \ z0,z1,z2,z3,z4,z5,z6,z7) \ \ VMULPD(ZMM(z0), ZMM(31), MEM(a, o)) \ VMULPD(ZMM(z1), ZMM(31), MEM(a,s1,1,o)) \ VMULPD(ZMM(z2), ZMM(31), MEM(a,s1,2,o)) \ VMULPD(ZMM(z3), ZMM(31), MEM(a,s3,1,o)) \ VMULPD(ZMM(z4), ZMM(31), MEM(a,s1,4,o)) \ VMULPD(ZMM(z5), ZMM(31), MEM(a,s5,1,o)) \ VMULPD(ZMM(z6), ZMM(31), MEM(a,s3,2,o)) \ VMULPD(ZMM(z7), ZMM(31), MEM(a,s7,1,o)) #define LOADMUL8x8_MASK(a,o,s1,s3,s5,s7, \ z0,z1,z2,z3,z4,z5,z6,z7,k) \ \ VMULPD(ZMM(z0) MASK_KZ(k), ZMM(31), MEM(a, o)) \ VMULPD(ZMM(z1) MASK_KZ(k), ZMM(31), MEM(a,s1,1,o)) \ VMULPD(ZMM(z2) MASK_KZ(k), ZMM(31), MEM(a,s1,2,o)) \ VMULPD(ZMM(z3) MASK_KZ(k), ZMM(31), MEM(a,s3,1,o)) \ VMULPD(ZMM(z4) MASK_KZ(k), ZMM(31), MEM(a,s1,4,o)) \ VMULPD(ZMM(z5) MASK_KZ(k), ZMM(31), MEM(a,s5,1,o)) \ VMULPD(ZMM(z6) MASK_KZ(k), ZMM(31), MEM(a,s3,2,o)) \ VMULPD(ZMM(z7) MASK_KZ(k), ZMM(31), MEM(a,s7,1,o)) #define STORE8x8(a,o,s1,s3,s5,s7, \ z0,z1,z2,z3,z4,z5,z6,z7) \ \ VMOVUPD(MEM(a, o), ZMM(z0)) \ VMOVUPD(MEM(a,s1,1,o), ZMM(z1)) \ VMOVUPD(MEM(a,s1,2,o), ZMM(z2)) \ VMOVUPD(MEM(a,s3,1,o), ZMM(z3)) \ VMOVUPD(MEM(a,s1,4,o), ZMM(z4)) \ VMOVUPD(MEM(a,s5,1,o), ZMM(z5)) \ VMOVUPD(MEM(a,s3,2,o), ZMM(z6)) \ VMOVUPD(MEM(a,s7,1,o), ZMM(z7)) #define TRANSPOSE8x8(a0,a1,a2,a3,a4,a5,a6,a7, \ b0,b1,b2,b3,b4,b5,b6,b7) \ \ VUNPCKLPD(ZMM(b0), ZMM(a0), ZMM(a1)) \ VUNPCKHPD(ZMM(b1), ZMM(a0), ZMM(a1)) \ VUNPCKLPD(ZMM(b2), ZMM(a2), ZMM(a3)) \ VUNPCKHPD(ZMM(b3), ZMM(a2), ZMM(a3)) \ VUNPCKLPD(ZMM(b4), ZMM(a4), ZMM(a5)) \ VUNPCKHPD(ZMM(b5), ZMM(a4), ZMM(a5)) \ VUNPCKLPD(ZMM(b6), ZMM(a6), ZMM(a7)) \ VUNPCKHPD(ZMM(b7), ZMM(a6), ZMM(a7)) \ VSHUFF64X2(ZMM(a0), ZMM(b0), ZMM(b2), IMM(0x44)) \ VSHUFF64X2(ZMM(a1), ZMM(b1), ZMM(b3), IMM(0x44)) \ VSHUFF64X2(ZMM(a2), ZMM(b0), ZMM(b2), IMM(0xEE)) \ VSHUFF64X2(ZMM(a3), ZMM(b1), ZMM(b3), IMM(0xEE)) \ VSHUFF64X2(ZMM(a4), ZMM(b4), ZMM(b6), IMM(0x44)) \ VSHUFF64X2(ZMM(a5), ZMM(b5), ZMM(b7), IMM(0x44)) \ VSHUFF64X2(ZMM(a6), ZMM(b4), ZMM(b6), IMM(0xEE)) \ VSHUFF64X2(ZMM(a7), ZMM(b5), ZMM(b7), IMM(0xEE)) \ VSHUFF64X2(ZMM(b0), ZMM(a0), ZMM(a4), IMM(0x88)) \ VSHUFF64X2(ZMM(b1), ZMM(a1), ZMM(a5), IMM(0x88)) \ VSHUFF64X2(ZMM(b2), ZMM(a0), ZMM(a4), IMM(0xDD)) \ VSHUFF64X2(ZMM(b3), ZMM(a1), ZMM(a5), IMM(0xDD)) \ VSHUFF64X2(ZMM(b4), ZMM(a2), ZMM(a6), IMM(0x88)) \ VSHUFF64X2(ZMM(b5), ZMM(a3), ZMM(a7), IMM(0x88)) \ VSHUFF64X2(ZMM(b6), ZMM(a2), ZMM(a6), IMM(0xDD)) \ VSHUFF64X2(ZMM(b7), ZMM(a3), ZMM(a7), IMM(0xDD)) //This is an array used for the scatter/gather instructions. static int32_t offsets[32] __attribute__((aligned(64))) = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15, 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}; void bli_dpackm_knl_asm_8xk ( conj_t conja, pack_t schema, dim_t cdim_, dim_t n_, dim_t n_max_, void* restrict kappa_, void* restrict a_, inc_t inca_, inc_t lda_, void* restrict p_, inc_t ldp_, cntx_t* restrict cntx ) { const int32_t* offsetPtr = &offsets[0]; double* a = ( double* )a_; double* p = ( double* )p_; double* kappa = ( double* )kappa_; const int64_t cdim = cdim_; const int64_t mnr = 8; const int64_t n = n_; const int64_t n_max = n_max_; const int64_t inca = inca_; const int64_t lda = lda_; const int64_t ldp = ldp_; if ( cdim == mnr ) { BEGIN_ASM() MOV(RSI, VAR(n)) MOV(RAX, VAR(a)) MOV(RBX, VAR(inca)) MOV(RCX, VAR(lda)) MOV(R14, VAR(p)) MOV(RDI, VAR(ldp)) TEST(RSI, RSI) JZ(PACK8_DONE) LEA(RBX, MEM(,RBX,8)) //inca in bytes LEA(RCX, MEM(,RCX,8)) //lda in bytes LEA(RDI, MEM(,RDI,8)) //ldp in bytes LEA(R11, MEM(RDI,RDI,2)) //ldp*3 LEA(R12, MEM(RDI,RDI,4)) //ldp*5 LEA(R13, MEM(R11,RDI,4)) //ldp*7 VBROADCASTSD(ZMM(31), VAR(kappa)) CMP(RBX, IMM(8)) JNE(PACK8_T) LABEL(PACK8_N) MOV(RDX, RSI) AND(RDX, IMM(7)) SAR(RSI, IMM(3)) JZ(PACK8_N_TAIL) LEA(R8, MEM(RCX,RCX,2)) //lda*3 LEA(R9, MEM(RCX,RCX,4)) //lda*5 LEA(R10, MEM(R8 ,RCX,4)) //lda*7 LABEL(PACK8_N_LOOP) LOADMUL8x8(RAX,0,RCX,R8,R9,R10,0,1,2,3,4,5,6,7) STORE8x8(R14,0,RDI,R11,R12,R13,0,1,2,3,4,5,6,7) LEA(RAX, MEM(RAX,RCX,8)) LEA(R14, MEM(R14,RDI,8)) SUB(RSI, IMM(1)) JNZ(PACK8_N_LOOP) TEST(RDX, RDX) JZ(PACK8_DONE) LABEL(PACK8_N_TAIL) VMULPD(ZMM(0), ZMM(31), MEM(RAX)) VMOVUPD(MEM(R14), ZMM(0)) LEA(RAX, MEM(RAX,RCX,1)) LEA(R14, MEM(R14,RDI,1)) SUB(RDX, IMM(1)) JNZ(PACK8_N_TAIL) JMP(PACK8_DONE) LABEL(PACK8_T) CMP(RCX, IMM(8)) JNE(PACK8_G) LEA(R8, MEM(RBX,RBX,2)) //inca*3 LEA(R9, MEM(RBX,RBX,4)) //inca*5 LEA(R10, MEM(R8 ,RBX,4)) //inca*7 MOV(RDX, RSI) AND(RDX, IMM(7)) SAR(RSI, IMM(3)) JZ(PACK8_T_TAIL) LABEL(PACK8_T_LOOP) LOADMUL8x8(RAX,0,RBX,R8,R9,R10,0,1,2,3,4,5,6,7) TRANSPOSE8x8( 0, 1, 2, 3, 4, 5, 6, 7, 16,17,18,19,20,21,22,23) STORE8x8(R14,0,RDI,R11,R12,R13,16,17,18,19,20,21,22,23) LEA(RAX, MEM(RAX,RCX,8)) LEA(R14, MEM(R14,RDI,8)) SUB(RSI, IMM(1)) JNZ(PACK8_T_LOOP) TEST(RDX, RDX) JZ(PACK8_DONE) LABEL(PACK8_T_TAIL) MOV(RSI, IMM(1)) SHLX(RSI, RSI, RDX) SUB(RSI, IMM(1)) KMOVW(K(1), ESI) //mask for n%8 elements LOADMUL8x8_MASK(RAX,0,RBX,R8,R9,R10,0,1,2,3,4,5,6,7,1) TRANSPOSE8x8( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15) VMOVUPD(MEM(R14 ), ZMM( 8)) SUB(RDX, IMM(1)) JZ(PACK8_DONE) VMOVUPD(MEM(R14,RDI,1), ZMM( 9)) SUB(RDX, IMM(1)) JZ(PACK8_DONE) VMOVUPD(MEM(R14,RDI,2), ZMM(10)) SUB(RDX, IMM(1)) JZ(PACK8_DONE) VMOVUPD(MEM(R14,R11,1), ZMM(11)) SUB(RDX, IMM(1)) JZ(PACK8_DONE) VMOVUPD(MEM(R14,RDI,4), ZMM(12)) SUB(RDX, IMM(1)) JZ(PACK8_DONE) VMOVUPD(MEM(R14,R12,1), ZMM(13)) SUB(RDX, IMM(1)) JZ(PACK8_DONE) VMOVUPD(MEM(R14,R11,2), ZMM(14)) JMP(PACK8_DONE) LABEL(PACK8_G) VPBROADCASTD(ZMM(3), VAR(inca)) MOV(RBX, VAR(offsetPtr)) VPMULLD(YMM(0), YMM(3), MEM(RBX)) LABEL(PACK8_G_LOOP) KXNORW(K(1), K(0), K(0)) VGATHERDPD(ZMM(3) MASK_K(1), MEM(RAX,YMM(0),8)) VMULPD(ZMM(3), ZMM(3), ZMM(31)) VMOVUPD(MEM(R14), ZMM(3)) LEA(RAX, MEM(RAX,RCX,1)) LEA(R14, MEM(R14,RDI,1)) SUB(RSI, IMM(1)) JNZ(PACK8_G_LOOP) LABEL(PACK8_DONE) END_ASM( : //output operands : //input operands [n] "m" (n), [kappa] "m" (*kappa), [a] "m" (a), [inca] "m" (inca), [lda] "m" (lda), [p] "m" (p), [ldp] "m" (ldp), [offsetPtr] "m" (offsetPtr) : //clobbers "zmm0", "zmm1", "zmm2", "zmm3", "zmm4", "zmm5", "zmm6", "zmm7", "zmm8", "zmm9", "zmm10", "zmm11", "zmm12", "zmm13", "zmm14", "zmm15", "zmm16", "zmm17", "zmm18", "zmm19", "zmm20", "zmm21", "zmm22", "zmm23", "zmm24", "zmm25", "zmm26", "zmm27", "zmm28", "zmm29", "zmm30", "zmm31", "rax", "rbx", "rcx", "rdx", "rdi", "rsi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "memory" ) } else // if ( cdim < mnr ) { bli_dscal2m_ex \ ( \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ ( trans_t )conja, \ cdim, \ n, \ kappa, \ a, inca, lda, \ p, 1, ldp, \ cntx, \ NULL \ ); \ // if ( cdim < mnr ) { const dim_t i = cdim; const dim_t m_edge = mnr - i; const dim_t n_edge = n_max; double* restrict p_edge = p + (i )*1; bli_dset0s_mxn ( m_edge, n_edge, p_edge, 1, ldp ); } } if ( n < n_max ) { const dim_t j = n; const dim_t m_edge = mnr; const dim_t n_edge = n_max - j; double* restrict p_edge = p + (j )*ldp; bli_dset0s_mxn ( m_edge, n_edge, p_edge, 1, ldp ); } } void bli_dpackm_knl_asm_24xk ( conj_t conja, pack_t schema, dim_t cdim_, dim_t n_, dim_t n_max_, void* restrict kappa_, void* restrict a_, inc_t inca_, inc_t lda_, void* restrict p_, inc_t ldp_, cntx_t* restrict cntx ) { const int32_t* offsetPtr = &offsets[0]; double* a = ( double* )a_; double* p = ( double* )p_; double* kappa = ( double* )kappa_; const int64_t cdim = cdim_; const int64_t mnr = 24; const int64_t n = n_; const int64_t n_max = n_max_; const int64_t inca = inca_; const int64_t lda = lda_; const int64_t ldp = ldp_; if ( cdim == mnr ) { BEGIN_ASM() MOV(RSI, VAR(n)) MOV(RAX, VAR(a)) MOV(RBX, VAR(inca)) MOV(RCX, VAR(lda)) MOV(R15, VAR(p)) MOV(RDI, VAR(ldp)) LEA(RBX, MEM(,RBX,8)) //inca in bytes LEA(RCX, MEM(,RCX,8)) //lda in bytes LEA(RDI, MEM(,RDI,8)) //ldp in bytes LEA(R11, MEM(RDI,RDI,2)) //ldp*3 LEA(R12, MEM(RDI,RDI,4)) //ldp*5 LEA(R13, MEM(R11,RDI,4)) //ldp*7 VBROADCASTSD(ZMM(31), VAR(kappa)) TEST(RSI, RSI) JZ(PACK24_DONE) CMP(RBX, IMM(8)) JNE(PACK24_T) LABEL(PACK24_N) SAR(RSI, IMM(3)) JZ(PACK24_N_TAIL) LEA(R8, MEM(RCX,RCX,2)) //lda*3 LEA(R9, MEM(RCX,RCX,4)) //lda*5 LEA(R10, MEM(R8 ,RCX,4)) //lda*7 LABEL(PACK24_N_LOOP) LOADMUL8x8(RAX, 0,RCX,R8,R9,R10, 0, 1, 2, 3, 4, 5, 6, 7) LOADMUL8x8(RAX, 64,RCX,R8,R9,R10, 8, 9,10,11,12,13,14,15) LOADMUL8x8(RAX,128,RCX,R8,R9,R10,16,17,18,19,20,21,22,23) STORE8x8(R15, 0,RDI,R11,R12,R13, 0, 1, 2, 3, 4, 5, 6, 7) STORE8x8(R15, 64,RDI,R11,R12,R13, 8, 9,10,11,12,13,14,15) STORE8x8(R15,128,RDI,R11,R12,R13,16,17,18,19,20,21,22,23) LEA(RAX, MEM(RAX,RCX,8)) LEA(R15, MEM(R15,RDI,8)) SUB(RSI, IMM(1)) JNZ(PACK24_N_LOOP) LABEL(PACK24_N_TAIL) MOV(RSI, VAR(n)) AND(RSI, IMM(7)) TEST(RSI, RSI) JZ(PACK24_DONE) LABEL(PACK24_N_TAIL_LOOP) VMULPD(ZMM(0), ZMM(31), MEM(RAX, 0)) VMULPD(ZMM(1), ZMM(31), MEM(RAX, 64)) VMULPD(ZMM(2), ZMM(31), MEM(RAX,128)) VMOVUPD(MEM(R15, 0), ZMM(0)) VMOVUPD(MEM(R15, 64), ZMM(1)) VMOVUPD(MEM(R15,128), ZMM(2)) LEA(RAX, MEM(RAX,RCX,1)) LEA(R15, MEM(R15,RDI,1)) SUB(RSI, IMM(1)) JNZ(PACK24_N_TAIL_LOOP) JMP(PACK24_DONE) LABEL(PACK24_T) CMP(RCX, IMM(8)) JNE(PACK24_G) LEA(R8, MEM(RBX,RBX,2)) //inca*3 LEA(R9, MEM(RBX,RBX,4)) //inca*5 LEA(R10, MEM(R8 ,RBX,4)) //inca*7 LEA(R14, MEM(RAX,RBX,8)) LEA(RCX, MEM(R14,RBX,8)) SAR(RSI, IMM(3)) JZ(PACK24_T_TAIL) LABEL(PACK24_T_LOOP) LOADMUL8x8(RAX,0,RBX,R8,R9,R10, 0, 1, 2, 3, 4, 5, 6, 7) LOADMUL8x8(R14,0,RBX,R8,R9,R10, 8, 9,10,11,12,13,14,15) TRANSPOSE8x8( 0, 1, 2, 3, 4, 5, 6, 7, 16,17,18,19,20,21,22,23) STORE8x8(R15, 0,RDI,R11,R12,R13,16,17,18,19,20,21,22,23) LOADMUL8x8(RCX,0,RBX,R8,R9,R10, 0, 1, 2, 3, 4, 5, 6, 7) TRANSPOSE8x8( 8, 9,10,11,12,13,14,15, 16,17,18,19,20,21,22,23) STORE8x8(R15, 64,RDI,R11,R12,R13,16,17,18,19,20,21,22,23) TRANSPOSE8x8( 0, 1, 2, 3, 4, 5, 6, 7, 16,17,18,19,20,21,22,23) STORE8x8(R15,128,RDI,R11,R12,R13,16,17,18,19,20,21,22,23) LEA(RAX, MEM(RAX,64)) LEA(R14, MEM(R14,64)) LEA(RCX, MEM(RCX,64)) LEA(R15, MEM(R15,RDI,8)) SUB(RSI, IMM(1)) JNZ(PACK24_T_LOOP) LABEL(PACK24_T_TAIL) MOV(RSI, VAR(n)) AND(RSI, IMM(7)) TEST(RSI, RSI) JZ(PACK24_DONE) MOV(R13, IMM(1)) SHLX(R13, R13, RSI) SUB(R13, IMM(1)) KMOVW(K(1), R13D) //mask for n%8 elements LOADMUL8x8_MASK(RAX,0,RBX,R8,R9,R10, 0, 1, 2, 3, 4, 5, 6, 7,1) LOADMUL8x8_MASK(R14,0,RBX,R8,R9,R10, 8, 9,10,11,12,13,14,15,1) LOADMUL8x8_MASK(RCX,0,RBX,R8,R9,R10,16,17,18,19,20,21,22,23,1) TRANSPOSE8x8(16,17,18,19,20,21,22,23, 24,25,26,27,28,29,30,31) TRANSPOSE8x8( 8, 9,10,11,12,13,14,15, 16,17,18,19,20,21,22,23) TRANSPOSE8x8( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15) VMOVUPD(MEM(R15, 0), ZMM( 8)) VMOVUPD(MEM(R15, 64), ZMM(16)) VMOVUPD(MEM(R15, 128), ZMM(24)) SUB(RSI, IMM(1)) JZ(PACK24_DONE) VMOVUPD(MEM(R15,RDI,1, 0), ZMM( 9)) VMOVUPD(MEM(R15,RDI,1, 64), ZMM(17)) VMOVUPD(MEM(R15,RDI,1,128), ZMM(25)) SUB(RSI, IMM(1)) JZ(PACK24_DONE) VMOVUPD(MEM(R15,RDI,2, 0), ZMM(10)) VMOVUPD(MEM(R15,RDI,2, 64), ZMM(18)) VMOVUPD(MEM(R15,RDI,2,128), ZMM(26)) SUB(RSI, IMM(1)) JZ(PACK24_DONE) VMOVUPD(MEM(R15,R11,1, 0), ZMM(11)) VMOVUPD(MEM(R15,R11,1, 64), ZMM(19)) VMOVUPD(MEM(R15,R11,1,128), ZMM(27)) SUB(RSI, IMM(1)) JZ(PACK24_DONE) VMOVUPD(MEM(R15,RDI,4, 0), ZMM(12)) VMOVUPD(MEM(R15,RDI,4, 64), ZMM(20)) VMOVUPD(MEM(R15,RDI,4,128), ZMM(28)) SUB(RSI, IMM(1)) JZ(PACK24_DONE) VMOVUPD(MEM(R15,R12,1, 0), ZMM(13)) VMOVUPD(MEM(R15,R12,1, 64), ZMM(21)) VMOVUPD(MEM(R15,R12,1,128), ZMM(29)) SUB(RSI, IMM(1)) JZ(PACK24_DONE) VMOVUPD(MEM(R15,R11,2, 0), ZMM(14)) VMOVUPD(MEM(R15,R11,2, 64), ZMM(22)) VMOVUPD(MEM(R15,R11,2,128), ZMM(30)) JMP(PACK24_DONE) LABEL(PACK24_G) VPBROADCASTD(ZMM(3), VAR(inca)) MOV(RBX, VAR(offsetPtr)) VPMULLD(YMM(0), YMM(3), MEM(RBX, 0)) VPMULLD(YMM(1), YMM(3), MEM(RBX,32)) VPMULLD(YMM(2), YMM(3), MEM(RBX,64)) LABEL(PACK24_G_LOOP) KXNORW(K(1), K(0), K(0)) KXNORW(K(2), K(0), K(0)) KXNORW(K(3), K(0), K(0)) VGATHERDPD(ZMM(3) MASK_K(1), MEM(RAX,YMM(0),8)) VGATHERDPD(ZMM(4) MASK_K(2), MEM(RAX,YMM(1),8)) VGATHERDPD(ZMM(5) MASK_K(3), MEM(RAX,YMM(2),8)) VMULPD(ZMM(3), ZMM(3), ZMM(31)) VMULPD(ZMM(4), ZMM(4), ZMM(31)) VMULPD(ZMM(5), ZMM(5), ZMM(31)) VMOVUPD(MEM(R15, 0), ZMM(3)) VMOVUPD(MEM(R15, 64), ZMM(4)) VMOVUPD(MEM(R15,128), ZMM(5)) LEA(RAX, MEM(RAX,RCX,1)) LEA(R15, MEM(R15,RDI,1)) SUB(RSI, IMM(1)) JNZ(PACK24_G_LOOP) LABEL(PACK24_DONE) END_ASM( : //output operands : //input operands [n] "m" (n), [kappa] "m" (*kappa), [a] "m" (a), [inca] "m" (inca), [lda] "m" (lda), [p] "m" (p), [ldp] "m" (ldp), [offsetPtr] "m" (offsetPtr) : //clobbers "zmm0", "zmm1", "zmm2", "zmm3", "zmm4", "zmm5", "zmm6", "zmm7", "zmm8", "zmm9", "zmm10", "zmm11", "zmm12", "zmm13", "zmm14", "zmm15", "zmm16", "zmm17", "zmm18", "zmm19", "zmm20", "zmm21", "zmm22", "zmm23", "zmm24", "zmm25", "zmm26", "zmm27", "zmm28", "zmm29", "zmm30", "zmm31", "rax", "rbx", "rcx", "rdi", "rsi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "memory" ) } else // if ( cdim < mnr ) { bli_dscal2m_ex \ ( \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ ( trans_t )conja, \ cdim, \ n, \ kappa, \ a, inca, lda, \ p, 1, ldp, \ cntx, \ NULL \ ); \ // if ( cdim < mnr ) { const dim_t i = cdim; const dim_t m_edge = mnr - i; const dim_t n_edge = n_max; double* restrict p_edge = p + (i )*1; bli_dset0s_mxn ( m_edge, n_edge, p_edge, 1, ldp ); } } if ( n < n_max ) { const dim_t j = n; const dim_t m_edge = mnr; const dim_t n_edge = n_max - j; double* restrict p_edge = p + (j )*ldp; bli_dset0s_mxn ( m_edge, n_edge, p_edge, 1, ldp ); } } blis-0.6.1/kernels/knl/1m/bli_spackm_knl_asm_24x16.c000066400000000000000000000502301360743507500220450ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define BLIS_ASM_SYNTAX_INTEL #include "bli_x86_asm_macros.h" #define LOADMUL8x8(a,o,s1,s3,s5,s7, \ z0,z1,z2,z3,z4,z5,z6,z7) \ \ VMULPS(YMM(z0), YMM(15), MEM(a, o)) \ VMULPS(YMM(z1), YMM(15), MEM(a,s1,1,o)) \ VMULPS(YMM(z2), YMM(15), MEM(a,s1,2,o)) \ VMULPS(YMM(z3), YMM(15), MEM(a,s3,1,o)) \ VMULPS(YMM(z4), YMM(15), MEM(a,s1,4,o)) \ VMULPS(YMM(z5), YMM(15), MEM(a,s5,1,o)) \ VMULPS(YMM(z6), YMM(15), MEM(a,s3,2,o)) \ VMULPS(YMM(z7), YMM(15), MEM(a,s7,1,o)) #define STORE8x8(a,o,s, \ z0,z1,z2,z3,z4,z5,z6,z7) \ \ VMOVUPS(MEM(a,(o)+0*(s)), YMM(z0)) \ VMOVUPS(MEM(a,(o)+1*(s)), YMM(z1)) \ VMOVUPS(MEM(a,(o)+2*(s)), YMM(z2)) \ VMOVUPS(MEM(a,(o)+3*(s)), YMM(z3)) \ VMOVUPS(MEM(a,(o)+4*(s)), YMM(z4)) \ VMOVUPS(MEM(a,(o)+5*(s)), YMM(z5)) \ VMOVUPS(MEM(a,(o)+6*(s)), YMM(z6)) \ VMOVUPS(MEM(a,(o)+7*(s)), YMM(z7)) #define STORETRANS8x8(a,o,s, \ a0,a1,a2,a3,a4,a5,a6,a7, \ t0,t1,t2,t3,t4,t5) \ \ VUNPCKLPS(YMM(t0), YMM(a0), YMM(a1)) \ VUNPCKLPS(YMM(t2), YMM(a2), YMM(a3)) \ VUNPCKLPS(YMM(t1), YMM(a4), YMM(a5)) \ VUNPCKLPS(YMM(t3), YMM(a6), YMM(a7)) \ \ VSHUFPS(YMM(t4), YMM(t0), YMM(t2), IMM(0x44)) \ VSHUFPS(YMM(t5), YMM(t1), YMM(t3), IMM(0x44)) \ VMOVUPS(MEM(a,(o )+0*(s)), XMM(t4)) \ VMOVUPS(MEM(a,(o+16)+0*(s)), XMM(t5)) \ VEXTRACTF128(MEM(a,(o )+4*(s)), YMM(t4), IMM(1)) \ VEXTRACTF128(MEM(a,(o+16)+4*(s)), YMM(t5), IMM(1)) \ \ VSHUFPS(YMM(t4), YMM(t0), YMM(t2), IMM(0xEE)) \ VSHUFPS(YMM(t5), YMM(t1), YMM(t3), IMM(0xEE)) \ VMOVUPS(MEM(a,(o )+1*(s)), XMM(t4)) \ VMOVUPS(MEM(a,(o+16)+1*(s)), XMM(t5)) \ VEXTRACTF128(MEM(a,(o )+5*(s)), YMM(t4), IMM(1)) \ VEXTRACTF128(MEM(a,(o+16)+5*(s)), YMM(t5), IMM(1)) \ \ VUNPCKHPS(YMM(t0), YMM(a0), YMM(a1)) \ VUNPCKHPS(YMM(t2), YMM(a2), YMM(a3)) \ VUNPCKHPS(YMM(t1), YMM(a4), YMM(a5)) \ VUNPCKHPS(YMM(t3), YMM(a6), YMM(a7)) \ \ VSHUFPS(YMM(t4), YMM(t0), YMM(t2), IMM(0x44)) \ VSHUFPS(YMM(t5), YMM(t1), YMM(t3), IMM(0x44)) \ VMOVUPS(MEM(a,(o )+2*(s)), XMM(t4)) \ VMOVUPS(MEM(a,(o+16)+2*(s)), XMM(t5)) \ VEXTRACTF128(MEM(a,(o )+6*(s)), YMM(t4), IMM(1)) \ VEXTRACTF128(MEM(a,(o+16)+6*(s)), YMM(t5), IMM(1)) \ \ VSHUFPS(YMM(t4), YMM(t0), YMM(t2), IMM(0xEE)) \ VSHUFPS(YMM(t5), YMM(t1), YMM(t3), IMM(0xEE)) \ VMOVUPS(MEM(a,(o )+3*(s)), XMM(t4)) \ VMOVUPS(MEM(a,(o+16)+3*(s)), XMM(t5)) \ VEXTRACTF128(MEM(a,(o )+7*(s)), YMM(t4), IMM(1)) \ VEXTRACTF128(MEM(a,(o+16)+7*(s)), YMM(t5), IMM(1)) //This is an array used for the scatter/gather instructions. static int32_t offsets[32] __attribute__((aligned(64))) = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15, 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}; void bli_spackm_knl_asm_16xk ( conj_t conja, pack_t schema, dim_t cdim_, dim_t n_, dim_t n_max_, void* restrict kappa_, void* restrict a_, inc_t inca_, inc_t lda_, void* restrict p_, inc_t ldp_, cntx_t* restrict cntx ) { const int32_t* offsetPtr = &offsets[0]; float* a = ( float* )a_; float* p = ( float* )p_; float* kappa = ( float* )kappa_; const int64_t cdim = cdim_; const int64_t mnr = 16; const int64_t n = n_; const int64_t n_max = n_max_; const int64_t inca = inca_; const int64_t lda = lda_; const int64_t ldp = ldp_; if ( cdim == mnr ) { BEGIN_ASM() MOV(RSI, VAR(n)) MOV(RAX, VAR(a)) MOV(RBX, VAR(inca)) MOV(RCX, VAR(lda)) MOV(R14, VAR(p)) TEST(RSI, RSI) JZ(PACK16_DONE) LEA(RBX, MEM(,RBX,4)) //inca in bytes LEA(RCX, MEM(,RCX,4)) //lda in bytes VBROADCASTSS(YMM(15), VAR(kappa)) CMP(RBX, IMM(4)) JNE(PACK16_T) LABEL(PACK16_N) MOV(RDX, RSI) AND(RDX, IMM(7)) SAR(RSI, IMM(3)) JZ(PACK16_N_TAIL) LEA(R8, MEM(RCX,RCX,2)) //lda*3 LEA(R9, MEM(RCX,RCX,4)) //lda*5 LEA(R10, MEM(R8 ,RCX,4)) //lda*7 LABEL(PACK16_N_LOOP) LOADMUL8x8(RAX,0,RCX,R8,R9,R10,0,1,2,3,4,5,6,7) STORE8x8(R14,0,16*4,0,1,2,3,4,5,6,7) LOADMUL8x8(RAX,32,RCX,R8,R9,R10,0,1,2,3,4,5,6,7) STORE8x8(R14,32,16*4,0,1,2,3,4,5,6,7) LEA(RAX, MEM(RAX,RCX,8)) LEA(R14, MEM(R14,16*8*4)) SUB(RSI, IMM(1)) JNZ(PACK16_N_LOOP) TEST(RDX, RDX) JZ(PACK16_DONE) LABEL(PACK16_N_TAIL) VMULPS(YMM(0), YMM(15), MEM(RAX )) VMULPS(YMM(1), YMM(15), MEM(RAX,32)) VMOVUPS(MEM(R14 ), YMM(0)) VMOVUPS(MEM(R14,32), YMM(1)) LEA(RAX, MEM(RAX,RCX,1)) LEA(R14, MEM(R14, 16*4)) SUB(RDX, IMM(1)) JNZ(PACK16_N_TAIL) JMP(PACK16_DONE) LABEL(PACK16_T) CMP(RCX, IMM(4)) JNE(PACK16_G) LEA(R8, MEM(RBX,RBX,2)) //inca*3 LEA(R9, MEM(RBX,RBX,4)) //inca*5 LEA(R10, MEM(R8 ,RBX,4)) //inca*7 LEA(R11, MEM(RAX,RBX,8)) MOV(RDX, RSI) AND(RDX, IMM(7)) SAR(RSI, IMM(3)) JZ(PACK16_T_TAIL) LABEL(PACK16_T_LOOP) LOADMUL8x8(RAX,0,RBX,R8,R9,R10,0,1,2,3,4,5,6,7) STORETRANS8x8(R14,0,16*4,0,1,2,3,4,5,6,7,8,9,10,11,12,13) LOADMUL8x8(R11,0,RBX,R8,R9,R10,0,1,2,3,4,5,6,7) STORETRANS8x8(R14,32,16*4,0,1,2,3,4,5,6,7,8,9,10,11,12,13) LEA(RAX, MEM(RAX, 8*4)) LEA(R11, MEM(R11, 8*4)) LEA(R14, MEM(R14,16*8*4)) SUB(RSI, IMM(1)) JNZ(PACK16_T_LOOP) TEST(RDX, RDX) JZ(PACK16_DONE) LABEL(PACK16_T_TAIL) VMULSS(XMM(0), XMM(15), MEM(RAX )) VMULSS(XMM(1), XMM(15), MEM(RAX,RBX,1)) VMULSS(XMM(2), XMM(15), MEM(RAX,RBX,2)) VMULSS(XMM(3), XMM(15), MEM(RAX,R8 ,1)) VMULSS(XMM(4), XMM(15), MEM(RAX,RBX,4)) VMULSS(XMM(5), XMM(15), MEM(RAX,R9 ,1)) VMULSS(XMM(6), XMM(15), MEM(RAX,R8 ,2)) VMULSS(XMM(7), XMM(15), MEM(RAX,R10,1)) VMOVSS(MEM(R14,0*4), XMM(0)) VMOVSS(MEM(R14,1*4), XMM(1)) VMOVSS(MEM(R14,2*4), XMM(2)) VMOVSS(MEM(R14,3*4), XMM(3)) VMOVSS(MEM(R14,4*4), XMM(4)) VMOVSS(MEM(R14,5*4), XMM(5)) VMOVSS(MEM(R14,6*4), XMM(6)) VMOVSS(MEM(R14,7*4), XMM(7)) VMULSS(XMM(0), XMM(15), MEM(R11 )) VMULSS(XMM(1), XMM(15), MEM(R11,RBX,1)) VMULSS(XMM(2), XMM(15), MEM(R11,RBX,2)) VMULSS(XMM(3), XMM(15), MEM(R11,R8 ,1)) VMULSS(XMM(4), XMM(15), MEM(R11,RBX,4)) VMULSS(XMM(5), XMM(15), MEM(R11,R9 ,1)) VMULSS(XMM(6), XMM(15), MEM(R11,R8 ,2)) VMULSS(XMM(7), XMM(15), MEM(R11,R10,1)) VMOVSS(MEM(R14, 8*4), XMM(0)) VMOVSS(MEM(R14, 9*4), XMM(1)) VMOVSS(MEM(R14,10*4), XMM(2)) VMOVSS(MEM(R14,11*4), XMM(3)) VMOVSS(MEM(R14,12*4), XMM(4)) VMOVSS(MEM(R14,13*4), XMM(5)) VMOVSS(MEM(R14,14*4), XMM(6)) VMOVSS(MEM(R14,15*4), XMM(7)) LEA(RAX, MEM(RAX, 4)) LEA(R11, MEM(R11, 4)) LEA(R14, MEM(R14,16*4)) SUB(RDX, IMM(1)) JNZ(PACK16_T_TAIL) JMP(PACK16_DONE) LABEL(PACK16_G) VPBROADCASTD(ZMM(3), VAR(inca)) MOV(RBX, VAR(offsetPtr)) VPMULLD(ZMM(0), ZMM(3), MEM(RBX)) LABEL(PACK16_G_LOOP) KXNORW(K(1), K(0), K(0)) VGATHERDPS(ZMM(3) MASK_K(1), MEM(RAX,ZMM(0),8)) VMULPS(ZMM(3), ZMM(3), ZMM(15)) VMOVUPS(MEM(R14), ZMM(3)) LEA(RAX, MEM(RAX,RCX,1)) LEA(R14, MEM(R14, 16*4)) SUB(RSI, IMM(1)) JNZ(PACK16_G_LOOP) LABEL(PACK16_DONE) END_ASM( : //output operands : //input operands [n] "m" (n), [kappa] "m" (*kappa), [a] "m" (a), [inca] "m" (inca), [lda] "m" (lda), [p] "m" (p), [ldp] "m" (ldp), [offsetPtr] "m" (offsetPtr) : //clobbers "zmm0", "zmm1", "zmm2", "zmm3", "zmm4", "zmm5", "zmm6", "zmm7", "zmm8", "zmm9", "zmm10", "zmm11", "zmm12", "zmm13", "zmm14", "zmm15", "zmm16", "zmm17", "zmm18", "zmm19", "zmm20", "zmm21", "zmm22", "zmm23", "zmm24", "zmm25", "zmm26", "zmm27", "zmm28", "zmm29", "zmm30", "zmm31", "rax", "rbx", "rcx", "rdx", "rdi", "rsi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "memory" ) } else // if ( cdim < mnr ) { bli_sscal2m_ex \ ( \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ ( trans_t )conja, \ cdim, \ n, \ kappa, \ a, inca, lda, \ p, 1, ldp, \ cntx, \ NULL \ ); \ // if ( cdim < mnr ) { const dim_t i = cdim; const dim_t m_edge = mnr - i; const dim_t n_edge = n_max; float* restrict p_edge = p + (i )*1; bli_sset0s_mxn ( m_edge, n_edge, p_edge, 1, ldp ); } } if ( n < n_max ) { const dim_t j = n; const dim_t m_edge = mnr; const dim_t n_edge = n_max - j; float* restrict p_edge = p + (j )*ldp; bli_sset0s_mxn ( m_edge, n_edge, p_edge, 1, ldp ); } } void bli_spackm_knl_asm_24xk ( conj_t conja, pack_t schema, dim_t cdim_, dim_t n_, dim_t n_max_, void* restrict kappa_, void* restrict a_, inc_t inca_, inc_t lda_, void* restrict p_, inc_t ldp_, cntx_t* restrict cntx ) { const int32_t* offsetPtr = &offsets[0]; float* a = ( float* )a_; float* p = ( float* )p_; float* kappa = ( float* )kappa_; const int64_t cdim = cdim_; const int64_t mnr = 24; const int64_t n = n_; const int64_t n_max = n_max_; const int64_t inca = inca_; const int64_t lda = lda_; const int64_t ldp = ldp_; if ( cdim == mnr ) { BEGIN_ASM() MOV(RSI, VAR(n)) MOV(RAX, VAR(a)) MOV(RBX, VAR(inca)) MOV(RCX, VAR(lda)) MOV(R14, VAR(p)) MOV(RDI, VAR(ldp)) TEST(RSI, RSI) JZ(PACK24_DONE) LEA(RBX, MEM(,RBX,4)) //inca in bytes LEA(RCX, MEM(,RCX,4)) //lda in bytes LEA(RDI, MEM(,RDI,4)) //ldp in bytes VBROADCASTSS(ZMM(15), VAR(kappa)) CMP(RBX, IMM(4)) JNE(PACK24_T) LABEL(PACK24_N) MOV(RDX, RSI) AND(RDX, IMM(7)) SAR(RSI, IMM(3)) JZ(PACK24_N_TAIL) LEA(R8, MEM(RCX,RCX,2)) //lda*3 LEA(R9, MEM(RCX,RCX,4)) //lda*5 LEA(R10, MEM(R8 ,RCX,4)) //lda*7 LABEL(PACK24_N_LOOP) LOADMUL8x8(RAX,0,RCX,R8,R9,R10,0,1,2,3,4,5,6,7) STORE8x8(R14,0,24*4,0,1,2,3,4,5,6,7) LOADMUL8x8(RAX,32,RCX,R8,R9,R10,0,1,2,3,4,5,6,7) STORE8x8(R14,32,24*4,0,1,2,3,4,5,6,7) LOADMUL8x8(RAX,64,RCX,R8,R9,R10,0,1,2,3,4,5,6,7) STORE8x8(R14,64,24*4,0,1,2,3,4,5,6,7) LEA(RAX, MEM(RAX,RCX,8)) LEA(R14, MEM(R14,RDI,8)) SUB(RSI, IMM(1)) JNZ(PACK24_N_LOOP) TEST(RDX, RDX) JZ(PACK24_DONE) LABEL(PACK24_N_TAIL) VMULPS(ZMM(0), ZMM(15), MEM(RAX)) VMOVUPS(MEM(R14), ZMM(0)) VMULPS(YMM(1), YMM(15), MEM(RAX,64)) VMOVUPS(MEM(R14,64), YMM(1)) LEA(RAX, MEM(RAX,RCX,1)) LEA(R14, MEM(R14,RDI,1)) SUB(RDX, IMM(1)) JNZ(PACK24_N_TAIL) JMP(PACK24_DONE) LABEL(PACK24_T) CMP(RCX, IMM(4)) JNE(PACK24_G) LEA(R8, MEM(RBX,RBX,2)) //inca*3 LEA(R9, MEM(RBX,RBX,4)) //inca*5 LEA(R10, MEM(R8 ,RBX,4)) //inca*7 LEA(R11, MEM(RAX,RBX,8)) LEA(R12, MEM(R11,RBX,8)) MOV(RDX, RSI) AND(RDX, IMM(7)) SAR(RSI, IMM(3)) JZ(PACK24_T_TAIL) LABEL(PACK24_T_LOOP) LOADMUL8x8(RAX,0,RBX,R8,R9,R10,0,1,2,3,4,5,6,7) STORETRANS8x8(R14,0,24*4,0,1,2,3,4,5,6,7,8,9,10,11,12,13) LOADMUL8x8(R11,0,RBX,R8,R9,R10,0,1,2,3,4,5,6,7) STORETRANS8x8(R14,32,24*4,0,1,2,3,4,5,6,7,8,9,10,11,12,13) LOADMUL8x8(R12,0,RBX,R8,R9,R10,0,1,2,3,4,5,6,7) STORETRANS8x8(R14,64,24*4,0,1,2,3,4,5,6,7,8,9,10,11,12,13) LEA(RAX, MEM(RAX,RCX,8)) LEA(R11, MEM(R11,RCX,8)) LEA(R12, MEM(R12,RCX,8)) LEA(R14, MEM(R14,RDI,8)) SUB(RSI, IMM(1)) JNZ(PACK24_T_LOOP) TEST(RDX, RDX) JZ(PACK24_DONE) LABEL(PACK24_T_TAIL) VMULSS(XMM(0), XMM(15), MEM(RAX)) VMULSS(XMM(1), XMM(15), MEM(RAX,RBX,1)) VMULSS(XMM(2), XMM(15), MEM(RAX,RBX,2)) VMULSS(XMM(3), XMM(15), MEM(RAX,R8,1)) VMULSS(XMM(4), XMM(15), MEM(RAX,RBX,4)) VMULSS(XMM(5), XMM(15), MEM(RAX,R9,1)) VMULSS(XMM(6), XMM(15), MEM(RAX,R8,2)) VMULSS(XMM(7), XMM(15), MEM(RAX,R10,1)) VMOVSS(MEM(R14,0*4), XMM(0)) VMOVSS(MEM(R14,1*4), XMM(1)) VMOVSS(MEM(R14,2*4), XMM(2)) VMOVSS(MEM(R14,3*4), XMM(3)) VMOVSS(MEM(R14,4*4), XMM(4)) VMOVSS(MEM(R14,5*4), XMM(5)) VMOVSS(MEM(R14,6*4), XMM(6)) VMOVSS(MEM(R14,7*4), XMM(7)) VMULSS(XMM(0), XMM(15), MEM(R11)) VMULSS(XMM(1), XMM(15), MEM(R11,RBX,1)) VMULSS(XMM(2), XMM(15), MEM(R11,RBX,2)) VMULSS(XMM(3), XMM(15), MEM(R11,R8,1)) VMULSS(XMM(4), XMM(15), MEM(R11,RBX,4)) VMULSS(XMM(5), XMM(15), MEM(R11,R9,1)) VMULSS(XMM(6), XMM(15), MEM(R11,R8,2)) VMULSS(XMM(7), XMM(15), MEM(R11,R10,1)) VMOVSS(MEM(R14, 8*4), XMM(0)) VMOVSS(MEM(R14, 9*4), XMM(1)) VMOVSS(MEM(R14,10*4), XMM(2)) VMOVSS(MEM(R14,11*4), XMM(3)) VMOVSS(MEM(R14,12*4), XMM(4)) VMOVSS(MEM(R14,13*4), XMM(5)) VMOVSS(MEM(R14,14*4), XMM(6)) VMOVSS(MEM(R14,15*4), XMM(7)) VMULSS(XMM(0), XMM(15), MEM(R12)) VMULSS(XMM(1), XMM(15), MEM(R12,RBX,1)) VMULSS(XMM(2), XMM(15), MEM(R12,RBX,2)) VMULSS(XMM(3), XMM(15), MEM(R12,R8,1)) VMULSS(XMM(4), XMM(15), MEM(R12,RBX,4)) VMULSS(XMM(5), XMM(15), MEM(R12,R9,1)) VMULSS(XMM(6), XMM(15), MEM(R12,R8,2)) VMULSS(XMM(7), XMM(15), MEM(R12,R10,1)) VMOVSS(MEM(R14,16*4), XMM(0)) VMOVSS(MEM(R14,17*4), XMM(1)) VMOVSS(MEM(R14,18*4), XMM(2)) VMOVSS(MEM(R14,19*4), XMM(3)) VMOVSS(MEM(R14,20*4), XMM(4)) VMOVSS(MEM(R14,21*4), XMM(5)) VMOVSS(MEM(R14,22*4), XMM(6)) VMOVSS(MEM(R14,23*4), XMM(7)) LEA(RAX, MEM(RAX,RCX,1)) LEA(R11, MEM(R11,RCX,1)) LEA(R12, MEM(R12,RCX,1)) LEA(R14, MEM(R14,RDI,1)) SUB(RDX, IMM(1)) JNZ(PACK24_T_TAIL) JMP(PACK24_DONE) LABEL(PACK24_G) VPBROADCASTD(ZMM(3), VAR(inca)) MOV(RBX, VAR(offsetPtr)) VPMULLD(ZMM(0), ZMM(3), MEM(RBX)) LEA(R11, MEM(RAX,RBX,8)) LEA(R11, MEM(R11,RBX,8)) LABEL(PACK24_G_LOOP) KXNORW(K(1), K(0), K(0)) KSHIFTRW(K(2), K(1), IMM(8)) VGATHERDPS(ZMM(3) MASK_K(1), MEM(RAX,ZMM(0),8)) VGATHERDPS(ZMM(4) MASK_K(2), MEM(R11,ZMM(0),8)) VMULPS(ZMM(3), ZMM(3), ZMM(15)) VMULPS(YMM(4), YMM(4), YMM(15)) VMOVUPS(MEM(R14), ZMM(3)) VMOVUPS(MEM(R14,64), YMM(4)) LEA(RAX, MEM(RAX,RCX,1)) LEA(R14, MEM(R14,RDI,1)) SUB(RSI, IMM(1)) JNZ(PACK24_G_LOOP) LABEL(PACK24_DONE) END_ASM( : //output operands : //input operands [n] "m" (n), [kappa] "m" (*kappa), [a] "m" (a), [inca] "m" (inca), [lda] "m" (lda), [p] "m" (p), [ldp] "m" (ldp), [offsetPtr] "m" (offsetPtr) : //clobbers "zmm0", "zmm1", "zmm2", "zmm3", "zmm4", "zmm5", "zmm6", "zmm7", "zmm8", "zmm9", "zmm10", "zmm11", "zmm12", "zmm13", "zmm14", "zmm15", "zmm16", "zmm17", "zmm18", "zmm19", "zmm20", "zmm21", "zmm22", "zmm23", "zmm24", "zmm25", "zmm26", "zmm27", "zmm28", "zmm29", "zmm30", "zmm31", "rax", "rbx", "rcx", "rdx", "rdi", "rsi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "memory" ) } else // if ( cdim < mnr ) { bli_sscal2m_ex \ ( \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ ( trans_t )conja, \ cdim, \ n, \ kappa, \ a, inca, lda, \ p, 1, ldp, \ cntx, \ NULL \ ); \ // if ( cdim < mnr ) { const dim_t i = cdim; const dim_t m_edge = mnr - i; const dim_t n_edge = n_max; float* restrict p_edge = p + (i )*1; bli_sset0s_mxn ( m_edge, n_edge, p_edge, 1, ldp ); } } if ( n < n_max ) { const dim_t j = n; const dim_t m_edge = mnr; const dim_t n_edge = n_max - j; float* restrict p_edge = p + (j )*ldp; bli_sset0s_mxn ( m_edge, n_edge, p_edge, 1, ldp ); } } blis-0.6.1/kernels/knl/1m/old/000077500000000000000000000000001360743507500160035ustar00rootroot00000000000000blis-0.6.1/kernels/knl/1m/old/bli_packm_knl_asm_30x8.c000066400000000000000000000365131360743507500223660ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "bli_avx512_macros.h" #include "blis.h" #define LOADMUL8x8(a,o,s1,s3,s5,s7, \ z0,z1,z2,z3,z4,z5,z6,z7) \ \ VMULPD(ZMM(z0), ZMM(31), MEM(a, o)) \ VMULPD(ZMM(z1), ZMM(31), MEM(a,s1,1,o)) \ VMULPD(ZMM(z2), ZMM(31), MEM(a,s1,2,o)) \ VMULPD(ZMM(z3), ZMM(31), MEM(a,s3,1,o)) \ VMULPD(ZMM(z4), ZMM(31), MEM(a,s1,4,o)) \ VMULPD(ZMM(z5), ZMM(31), MEM(a,s5,1,o)) \ VMULPD(ZMM(z6), ZMM(31), MEM(a,s3,2,o)) \ VMULPD(ZMM(z7), ZMM(31), MEM(a,s7,1,o)) #define LOADMUL6x8(a,o,s1,s3,s5, \ z0,z1,z2,z3,z4,z5) \ \ VMULPD(ZMM(z0), ZMM(31), MEM(a, o)) \ VMULPD(ZMM(z1), ZMM(31), MEM(a,s1,1,o)) \ VMULPD(ZMM(z2), ZMM(31), MEM(a,s1,2,o)) \ VMULPD(ZMM(z3), ZMM(31), MEM(a,s3,1,o)) \ VMULPD(ZMM(z4), ZMM(31), MEM(a,s1,4,o)) \ VMULPD(ZMM(z5), ZMM(31), MEM(a,s5,1,o)) #define LOADMUL8x6(a,o,s1,s3,s5,s7, \ z0,z1,z2,z3,z4,z5,z6,z7) \ \ KXNORW(K(7), K(0), K(0)) \ KSHIFTRW(K(7), K(7), IMM(10)) \ LOADMUL8x8_MASK(a,o,s1,s3,s5,s7,z0,z1,z2,z3,z4,z5,z6,z7,7) #define LOADMUL8x8_MASK(a,o,s1,s3,s5,s7, \ z0,z1,z2,z3,z4,z5,z6,z7,k) \ \ VMULPD(ZMM(z0) MASK_KZ(k), ZMM(31), MEM(a, o)) \ VMULPD(ZMM(z1) MASK_KZ(k), ZMM(31), MEM(a,s1,1,o)) \ VMULPD(ZMM(z2) MASK_KZ(k), ZMM(31), MEM(a,s1,2,o)) \ VMULPD(ZMM(z3) MASK_KZ(k), ZMM(31), MEM(a,s3,1,o)) \ VMULPD(ZMM(z4) MASK_KZ(k), ZMM(31), MEM(a,s1,4,o)) \ VMULPD(ZMM(z5) MASK_KZ(k), ZMM(31), MEM(a,s5,1,o)) \ VMULPD(ZMM(z6) MASK_KZ(k), ZMM(31), MEM(a,s3,2,o)) \ VMULPD(ZMM(z7) MASK_KZ(k), ZMM(31), MEM(a,s7,1,o)) #define LOADMUL6x8_MASK(a,o,s1,s3,s5, \ z0,z1,z2,z3,z4,z5,k) \ \ VMULPD(ZMM(z0) MASK_KZ(k), ZMM(31), MEM(a, o)) \ VMULPD(ZMM(z1) MASK_KZ(k), ZMM(31), MEM(a,s1,1,o)) \ VMULPD(ZMM(z2) MASK_KZ(k), ZMM(31), MEM(a,s1,2,o)) \ VMULPD(ZMM(z3) MASK_KZ(k), ZMM(31), MEM(a,s3,1,o)) \ VMULPD(ZMM(z4) MASK_KZ(k), ZMM(31), MEM(a,s1,4,o)) \ VMULPD(ZMM(z5) MASK_KZ(k), ZMM(31), MEM(a,s5,1,o)) #define STORE8x8(a,o,s1,s3,s5,s7, \ z0,z1,z2,z3,z4,z5,z6,z7) \ \ VMOVUPD(MEM(a, o), ZMM(z0)) \ VMOVUPD(MEM(a,s1,1,o), ZMM(z1)) \ VMOVUPD(MEM(a,s1,2,o), ZMM(z2)) \ VMOVUPD(MEM(a,s3,1,o), ZMM(z3)) \ VMOVUPD(MEM(a,s1,4,o), ZMM(z4)) \ VMOVUPD(MEM(a,s5,1,o), ZMM(z5)) \ VMOVUPD(MEM(a,s3,2,o), ZMM(z6)) \ VMOVUPD(MEM(a,s7,1,o), ZMM(z7)) #define TRANSPOSE8x8(a0,a1,a2,a3,a4,a5,a6,a7, \ b0,b1,b2,b3,b4,b5,b6,b7) \ \ VUNPCKLPD(ZMM(b0), ZMM(a0), ZMM(a1)) \ VUNPCKHPD(ZMM(b1), ZMM(a0), ZMM(a1)) \ VUNPCKLPD(ZMM(b2), ZMM(a2), ZMM(a3)) \ VUNPCKHPD(ZMM(b3), ZMM(a2), ZMM(a3)) \ VUNPCKLPD(ZMM(b4), ZMM(a4), ZMM(a5)) \ VUNPCKHPD(ZMM(b5), ZMM(a4), ZMM(a5)) \ VUNPCKLPD(ZMM(b6), ZMM(a6), ZMM(a7)) \ VUNPCKHPD(ZMM(b7), ZMM(a6), ZMM(a7)) \ VSHUFF64X2(ZMM(a0), ZMM(b0), ZMM(b2), IMM(0x44)) \ VSHUFF64X2(ZMM(a1), ZMM(b1), ZMM(b3), IMM(0x44)) \ VSHUFF64X2(ZMM(a2), ZMM(b0), ZMM(b2), IMM(0xEE)) \ VSHUFF64X2(ZMM(a3), ZMM(b1), ZMM(b3), IMM(0xEE)) \ VSHUFF64X2(ZMM(a4), ZMM(b4), ZMM(b6), IMM(0x44)) \ VSHUFF64X2(ZMM(a5), ZMM(b5), ZMM(b7), IMM(0x44)) \ VSHUFF64X2(ZMM(a6), ZMM(b4), ZMM(b6), IMM(0xEE)) \ VSHUFF64X2(ZMM(a7), ZMM(b5), ZMM(b7), IMM(0xEE)) \ VSHUFF64X2(ZMM(b0), ZMM(a0), ZMM(a4), IMM(0x88)) \ VSHUFF64X2(ZMM(b1), ZMM(a1), ZMM(a5), IMM(0x88)) \ VSHUFF64X2(ZMM(b2), ZMM(a0), ZMM(a4), IMM(0xDD)) \ VSHUFF64X2(ZMM(b3), ZMM(a1), ZMM(a5), IMM(0xDD)) \ VSHUFF64X2(ZMM(b4), ZMM(a2), ZMM(a6), IMM(0x88)) \ VSHUFF64X2(ZMM(b5), ZMM(a3), ZMM(a7), IMM(0x88)) \ VSHUFF64X2(ZMM(b6), ZMM(a2), ZMM(a6), IMM(0xDD)) \ VSHUFF64X2(ZMM(b7), ZMM(a3), ZMM(a7), IMM(0xDD)) //This is an array used for the scatter/gather instructions. extern int32_t offsets[32]; // NOTE: assumes packdim_mr == 32 void bli_dpackm_knl_asm_30xk ( conj_t conja, dim_t n_, void* restrict kappa_, void* restrict a_, inc_t inca_, inc_t lda_, void* restrict p_, inc_t ldp_, cntx_t* restrict cntx ) { (void)conja; const int32_t * offsetPtr = &offsets[0]; double* a = (double*)a_; double* p = (double*)p_; double* kappa = (double*)kappa_; const int64_t n = n_; const int64_t inca = inca_; const int64_t lda = lda_; const int64_t ldp = ldp_; __asm__ volatile ( MOV(RSI, VAR(n)) MOV(RAX, VAR(a)) MOV(RBX, VAR(inca)) MOV(RCX, VAR(lda)) MOV(R15, VAR(p)) MOV(RDI, VAR(ldp)) LEA(RBX, MEM(,RBX,8)) //inca in bytes LEA(RCX, MEM(,RCX,8)) //lda in bytes LEA(RDI, MEM(,RDI,8)) //ldp in bytes LEA(R11, MEM(RDI,RDI,2)) //ldp*3 LEA(R12, MEM(RDI,RDI,4)) //ldp*5 LEA(R13, MEM(R11,RDI,4)) //ldp*7 VBROADCASTSD(ZMM(31), VAR(kappa)) TEST(RSI, RSI) JZ(PACK30_DONE) CMP(RBX, IMM(8)) JNE(PACK30_T) LABEL(PACK30_N) MOV(RDX, RSI) AND(RDX, IMM(7)) SAR(RSI, IMM(3)) JZ(PACK30_N_TAIL) LEA(R8, MEM(RCX,RCX,2)) //lda*3 LEA(R9, MEM(RCX,RCX,4)) //lda*5 LEA(R10, MEM(R8 ,RCX,4)) //lda*7 LABEL(PACK30_N_LOOP) LOADMUL8x8(RAX, 0,RCX,R8, R9, R10, 0, 1, 2, 3, 4, 5, 6, 7) LOADMUL8x8(RAX, 64,RCX,R8, R9, R10, 8, 9,10,11,12,13,14,15) LOADMUL8x8(RAX,128,RCX,R8, R9, R10,16,17,18,19,20,21,22,23) STORE8x8 (R15, 0,RDI,R11,R12,R13, 0, 1, 2, 3, 4, 5, 6, 7) STORE8x8 (R15, 64,RDI,R11,R12,R13, 8, 9,10,11,12,13,14,15) STORE8x8 (R15,128,RDI,R11,R12,R13,16,17,18,19,20,21,22,23) LOADMUL8x6(RAX,192,RCX,R8, R9, R10, 0, 1, 2, 3, 4, 5, 6, 7) STORE8x8 (R15,192,RDI,R11,R12,R13, 0, 1, 2, 3, 4, 5, 6, 7) LEA(RAX, MEM(RAX,RCX,8)) LEA(R15, MEM(R15,RDI,8)) SUB(RSI, IMM(1)) JNZ(PACK30_N_LOOP) TEST(RDX, RDX) JZ(PACK30_DONE) LABEL(PACK30_N_TAIL) KXNORW(K(7), K(0), K(0)) KSHIFTRW(K(7), K(7), IMM(10)) VMULPD(ZMM(0), ZMM(31), MEM(RAX, 0)) VMULPD(ZMM(1), ZMM(31), MEM(RAX, 64)) VMULPD(ZMM(2), ZMM(31), MEM(RAX,128)) VMULPD(ZMM(3) MASK_KZ(7), ZMM(31), MEM(RAX,192)) VMOVUPD(MEM(R15, 0), ZMM(0)) VMOVUPD(MEM(R15, 64), ZMM(1)) VMOVUPD(MEM(R15,128), ZMM(2)) VMOVUPD(MEM(R15,192), ZMM(3)) LEA(RAX, MEM(RAX,RCX,1)) LEA(R15, MEM(R15,RDI,1)) SUB(RDX, IMM(1)) JNZ(PACK30_N_TAIL) JMP(PACK30_DONE) LABEL(PACK30_T) CMP(RCX, IMM(8)) JNE(PACK30_G) LEA(R8, MEM(RBX,RBX,2)) //inca*3 LEA(R9, MEM(RBX,RBX,4)) //inca*5 LEA(R10, MEM(R8 ,RBX,4)) //inca*7 LEA(R14, MEM(RAX,RBX,8)) LEA(RCX, MEM(R14,RBX,8)) SAR(RSI, IMM(3)) JZ(PACK30_T_TAIL) LABEL(PACK30_T_LOOP) LOADMUL8x8(RAX,0,RBX,R8,R9,R10, 0, 1, 2, 3, 4, 5, 6, 7) LOADMUL8x8(R14,0,RBX,R8,R9,R10, 8, 9,10,11,12,13,14,15) TRANSPOSE8x8( 0, 1, 2, 3, 4, 5, 6, 7, 16,17,18,19,20,21,22,23) STORE8x8(R15, 0,RDI,R11,R12,R13,16,17,18,19,20,21,22,23) LOADMUL8x8(RCX,0,RBX,R8,R9,R10, 0, 1, 2, 3, 4, 5, 6, 7) TRANSPOSE8x8( 8, 9,10,11,12,13,14,15, 16,17,18,19,20,21,22,23) STORE8x8(R15, 64,RDI,R11,R12,R13,16,17,18,19,20,21,22,23) LEA(RCX, MEM(RCX,RBX,8)) LOADMUL6x8(RCX,0,RBX,R8,R9, 8, 9,10,11,12,13) TRANSPOSE8x8( 0, 1, 2, 3, 4, 5, 6, 7, 16,17,18,19,20,21,22,23) STORE8x8(R15,128,RDI,R11,R12,R13,16,17,18,19,20,21,22,23) TRANSPOSE8x8( 8, 9,10,11,12,13,14,15, 0, 1, 2, 3, 4, 5, 6, 7) STORE8x8(R15,192,RDI,R11,R12,R13, 0, 1, 2, 3, 4, 5, 6, 7) LEA(RAX, MEM(RAX,64)) LEA(R14, MEM(R14,64)) LEA(RCX, MEM(R14,RBX,8)) LEA(R15, MEM(R15,RDI,8)) SUB(RSI, IMM(1)) JNZ(PACK30_T_LOOP) LABEL(PACK30_T_TAIL) MOV(RSI, VAR(n)) AND(RSI, IMM(7)) TEST(RSI, RSI) JZ(PACK30_DONE) MOV(R13, IMM(1)) SHLX(R13, R13, RSI) SUB(R13, IMM(1)) KMOV(K(1), R13D) //mask for n%8 elements LOADMUL8x8_MASK(RAX,0,RBX,R8,R9,R10, 0, 1, 2, 3, 4, 5, 6, 7,1) LOADMUL8x8_MASK(R14,0,RBX,R8,R9,R10, 8, 9,10,11,12,13,14,15,1) LOADMUL8x8_MASK(RCX,0,RBX,R8,R9,R10,16,17,18,19,20,21,22,23,1) TRANSPOSE8x8(16,17,18,19,20,21,22,23, 24,25,26,27,28,29,30,31) TRANSPOSE8x8( 8, 9,10,11,12,13,14,15, 16,17,18,19,20,21,22,23) TRANSPOSE8x8( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15) VMOVUPD(MEM(R15, 0), ZMM( 8)) VMOVUPD(MEM(R15, 64), ZMM(16)) VMOVUPD(MEM(R15, 128), ZMM(24)) SUB(RSI, IMM(1)) JZ(PACK30_T_ALMOST_DONE) VMOVUPD(MEM(R15,RDI,1, 0), ZMM( 9)) VMOVUPD(MEM(R15,RDI,1, 64), ZMM(17)) VMOVUPD(MEM(R15,RDI,1,128), ZMM(25)) SUB(RSI, IMM(1)) JZ(PACK30_T_ALMOST_DONE) VMOVUPD(MEM(R15,RDI,2, 0), ZMM(10)) VMOVUPD(MEM(R15,RDI,2, 64), ZMM(18)) VMOVUPD(MEM(R15,RDI,2,128), ZMM(26)) SUB(RSI, IMM(1)) JZ(PACK30_T_ALMOST_DONE) VMOVUPD(MEM(R15,R11,1, 0), ZMM(11)) VMOVUPD(MEM(R15,R11,1, 64), ZMM(19)) VMOVUPD(MEM(R15,R11,1,128), ZMM(27)) SUB(RSI, IMM(1)) JZ(PACK30_T_ALMOST_DONE) VMOVUPD(MEM(R15,RDI,4, 0), ZMM(12)) VMOVUPD(MEM(R15,RDI,4, 64), ZMM(20)) VMOVUPD(MEM(R15,RDI,4,128), ZMM(28)) SUB(RSI, IMM(1)) JZ(PACK30_T_ALMOST_DONE) VMOVUPD(MEM(R15,R12,1, 0), ZMM(13)) VMOVUPD(MEM(R15,R12,1, 64), ZMM(21)) VMOVUPD(MEM(R15,R12,1,128), ZMM(29)) SUB(RSI, IMM(1)) JZ(PACK30_T_ALMOST_DONE) VMOVUPD(MEM(R15,R11,2, 0), ZMM(14)) VMOVUPD(MEM(R15,R11,2, 64), ZMM(22)) VMOVUPD(MEM(R15,R11,2,128), ZMM(30)) LABEL(PACK30_T_ALMOST_DONE) MOV(RSI, VAR(n)) AND(RSI, IMM(7)) VBROADCASTSD(ZMM(31), VAR(kappa)) LEA(RAX, MEM(RCX,RBX,8)) LOADMUL6x8_MASK(RAX,0,RBX,R8,R9, 0, 1, 2, 3, 4, 5,1) TRANSPOSE8x8( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15) VMOVUPD(MEM(R15, 192), ZMM( 8)) SUB(RSI, IMM(1)) JZ(PACK30_DONE) VMOVUPD(MEM(R15,RDI,1,192), ZMM( 9)) SUB(RSI, IMM(1)) JZ(PACK30_DONE) VMOVUPD(MEM(R15,RDI,2,192), ZMM(10)) SUB(RSI, IMM(1)) JZ(PACK30_DONE) VMOVUPD(MEM(R15,R11,1,192), ZMM(11)) SUB(RSI, IMM(1)) JZ(PACK30_DONE) VMOVUPD(MEM(R15,RDI,4,192), ZMM(12)) SUB(RSI, IMM(1)) JZ(PACK30_DONE) VMOVUPD(MEM(R15,R12,1,192), ZMM(13)) SUB(RSI, IMM(1)) JZ(PACK30_DONE) VMOVUPD(MEM(R15,R11,2,192), ZMM(14)) JMP(PACK30_DONE) LABEL(PACK30_G) VPBROADCASTD(ZMM(4), VAR(inca)) MOV(RBX, VAR(offsetPtr)) VPMULLD(YMM(0), YMM(4), MEM(RBX, 0)) VPMULLD(YMM(1), YMM(4), MEM(RBX,32)) VPMULLD(YMM(2), YMM(4), MEM(RBX,64)) VPMULLD(YMM(3), YMM(4), MEM(RBX,96)) LABEL(PACK30_G_LOOP) KXNORW(K(1), K(0), K(0)) KXNORW(K(2), K(0), K(0)) KXNORW(K(3), K(0), K(0)) KSHIFTRW(K(4), K(3), IMM(10)) VGATHERDPD(ZMM(4) MASK_K(1), MEM(RAX,YMM(0),8)) VGATHERDPD(ZMM(5) MASK_K(2), MEM(RAX,YMM(1),8)) VGATHERDPD(ZMM(6) MASK_K(3), MEM(RAX,YMM(2),8)) VGATHERDPD(ZMM(7) MASK_K(4), MEM(RAX,YMM(3),8)) VMULPD(ZMM(4), ZMM(4), ZMM(31)) VMULPD(ZMM(5), ZMM(5), ZMM(31)) VMULPD(ZMM(6), ZMM(6), ZMM(31)) VMULPD(ZMM(7), ZMM(7), ZMM(31)) VMOVUPD(MEM(R15, 0), ZMM(4)) VMOVUPD(MEM(R15, 64), ZMM(5)) VMOVUPD(MEM(R15,128), ZMM(6)) VMOVUPD(MEM(R15,192), ZMM(7)) LEA(RAX, MEM(RAX,RCX,1)) LEA(R15, MEM(R15,RDI,1)) SUB(RSI, IMM(1)) JNZ(PACK30_G_LOOP) LABEL(PACK30_DONE) : //output operands : //input operands [n] "m" (n), [kappa] "m" (*kappa), [a] "m" (a), [inca] "m" (inca), [lda] "m" (lda), [p] "m" (p), [ldp] "m" (ldp), [offsetPtr] "m" (offsetPtr) : //clobbers "zmm0", "zmm1", "zmm2", "zmm3", "zmm4", "zmm5", "zmm6", "zmm7", "zmm8", "zmm9", "zmm10", "zmm11", "zmm12", "zmm13", "zmm14", "zmm15", "zmm16", "zmm17", "zmm18", "zmm19", "zmm20", "zmm21", "zmm22", "zmm23", "zmm24", "zmm25", "zmm26", "zmm27", "zmm28", "zmm29", "zmm30", "zmm31", "rax", "rbx", "rcx", "rdi", "rsi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "memory" ); } blis-0.6.1/kernels/knl/3/000077500000000000000000000000001360743507500150525ustar00rootroot00000000000000blis-0.6.1/kernels/knl/3/bli_dgemm_knl_asm_24x8.c000066400000000000000000000516721360743507500214410ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include #define BLIS_ASM_SYNTAX_INTEL #include "bli_x86_asm_macros.h" #define UNROLL_K 32 #define SCATTER_PREFETCH_C 1 #define PREFETCH_A_L2 0 #define PREFETCH_B_L2 0 #define L2_PREFETCH_DIST 64 #define A_L1_PREFETCH_DIST 18 #define B_L1_PREFETCH_DIST 18 #define LOOP_ALIGN ALIGN16 #define UPDATE_C_FOUR_ROWS(R1,R2,R3,R4) \ \ VMULPD(ZMM(R1), ZMM(R1), ZMM(0)) \ VMULPD(ZMM(R2), ZMM(R2), ZMM(0)) \ VMULPD(ZMM(R3), ZMM(R3), ZMM(0)) \ VMULPD(ZMM(R4), ZMM(R4), ZMM(0)) \ VFMADD231PD(ZMM(R1), ZMM(1), MEM(RCX )) \ VFMADD231PD(ZMM(R2), ZMM(1), MEM(RCX,RAX,1)) \ VFMADD231PD(ZMM(R3), ZMM(1), MEM(RCX,RAX,2)) \ VFMADD231PD(ZMM(R4), ZMM(1), MEM(RCX,RDI,1)) \ VMOVUPD(MEM(RCX ), ZMM(R1)) \ VMOVUPD(MEM(RCX,RAX,1), ZMM(R2)) \ VMOVUPD(MEM(RCX,RAX,2), ZMM(R3)) \ VMOVUPD(MEM(RCX,RDI,1), ZMM(R4)) \ LEA(RCX, MEM(RCX,RAX,4)) #define UPDATE_C_BZ_FOUR_ROWS(R1,R2,R3,R4) \ \ VMULPD(ZMM(R1), ZMM(R1), ZMM(0)) \ VMULPD(ZMM(R2), ZMM(R2), ZMM(0)) \ VMULPD(ZMM(R3), ZMM(R3), ZMM(0)) \ VMULPD(ZMM(R4), ZMM(R4), ZMM(0)) \ VMOVUPD(MEM(RCX ), ZMM(R1)) \ VMOVUPD(MEM(RCX,RAX,1), ZMM(R2)) \ VMOVUPD(MEM(RCX,RAX,2), ZMM(R3)) \ VMOVUPD(MEM(RCX,RDI,1), ZMM(R4)) \ LEA(RCX, MEM(RCX,RAX,4)) #define UPDATE_C_ROW_SCATTERED(NUM) \ \ KXNORW(K(1), K(0), K(0)) \ KXNORW(K(2), K(0), K(0)) \ VMULPD(ZMM(NUM), ZMM(NUM), ZMM(0)) \ VGATHERDPD(ZMM(3) MASK_K(1), MEM(RCX,YMM(2),8)) \ VFMADD231PD(ZMM(NUM), ZMM(3), ZMM(1)) \ VSCATTERDPD(MEM(RCX,YMM(2),8) MASK_K(2), ZMM(NUM)) \ ADD(RCX, RAX) #define UPDATE_C_BZ_ROW_SCATTERED(NUM) \ \ KXNORW(K(1), K(0), K(0)) \ VMULPD(ZMM(NUM), ZMM(NUM), ZMM(0)) \ VSCATTERDPD(MEM(RCX,YMM(2),8) MASK_K(1), ZMM(NUM)) \ ADD(RCX, RAX) #define PREFETCH_A_L1_1(n) PREFETCH(0, MEM(RAX,(A_L1_PREFETCH_DIST+n)*24*8)) #define PREFETCH_A_L1_2(n) PREFETCH(0, MEM(RAX,(A_L1_PREFETCH_DIST+n)*24*8+64)) #define PREFETCH_A_L1_3(n) PREFETCH(0, MEM(RAX,(A_L1_PREFETCH_DIST+n)*24*8+128)) #if PREFETCH_A_L2 #undef PREFETCH_A_L2 #define PREFETCH_A_L2(n) \ \ PREFETCH(1, MEM(RAX,(L2_PREFETCH_DIST+n)*24*8)) \ PREFETCH(1, MEM(RAX,(L2_PREFETCH_DIST+n)*24*8+64)) \ PREFETCH(1, MEM(RAX,(L2_PREFETCH_DIST+n)*24*8+128)) #else #undef PREFETCH_A_L2 #define PREFETCH_A_L2(...) #endif #define PREFETCH_B_L1(n) PREFETCH(0, MEM(RBX,(B_L1_PREFETCH_DIST+n)*8*8)) #if PREFETCH_B_L2 #undef PREFETCH_B_L2 #define PREFETCH_B_L2(n) PREFETCH(1, MEM(RBX,(L2_PREFETCH_DIST+n)*8*8)) #else #undef PREFETCH_B_L2 #define PREFETCH_B_L2(...) #endif #define PREFETCH_C_L1_1 #define PREFETCH_C_L1_2 #define PREFETCH_C_L1_3 // // n: index in unrolled loop // // a: ZMM register to load into // b: ZMM register to read from // // ...: addressing for A, except for offset // #define SUBITER(n,a,b,...) \ \ PREFETCH_A_L2(n) \ \ VMOVAPD(ZMM(a), MEM(RBX,(n+1)*64)) \ VFMADD231PD(ZMM( 8), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*24+ 0)*8)) \ VFMADD231PD(ZMM( 9), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*24+ 1)*8)) \ VFMADD231PD(ZMM(10), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*24+ 2)*8)) \ PREFETCH_A_L1_1(n) \ VFMADD231PD(ZMM(11), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*24+ 3)*8)) \ VFMADD231PD(ZMM(12), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*24+ 4)*8)) \ VFMADD231PD(ZMM(13), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*24+ 5)*8)) \ PREFETCH_C_L1_1 \ VFMADD231PD(ZMM(14), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*24+ 6)*8)) \ VFMADD231PD(ZMM(15), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*24+ 7)*8)) \ VFMADD231PD(ZMM(16), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*24+ 8)*8)) \ PREFETCH_A_L1_2(n) \ VFMADD231PD(ZMM(17), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*24+ 9)*8)) \ VFMADD231PD(ZMM(18), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*24+10)*8)) \ VFMADD231PD(ZMM(19), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*24+11)*8)) \ PREFETCH_C_L1_2 \ VFMADD231PD(ZMM(20), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*24+12)*8)) \ VFMADD231PD(ZMM(21), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*24+13)*8)) \ VFMADD231PD(ZMM(22), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*24+14)*8)) \ PREFETCH_A_L1_3(n) \ VFMADD231PD(ZMM(23), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*24+15)*8)) \ VFMADD231PD(ZMM(24), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*24+16)*8)) \ VFMADD231PD(ZMM(25), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*24+17)*8)) \ PREFETCH_C_L1_3 \ VFMADD231PD(ZMM(26), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*24+18)*8)) \ VFMADD231PD(ZMM(27), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*24+19)*8)) \ VFMADD231PD(ZMM(28), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*24+20)*8)) \ PREFETCH_B_L1(n) \ VFMADD231PD(ZMM(29), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*24+21)*8)) \ VFMADD231PD(ZMM(30), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*24+22)*8)) \ VFMADD231PD(ZMM(31), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*24+23)*8)) \ PREFETCH_B_L2(n) //This is an array used for the scatter/gather instructions. static int32_t offsets[32] __attribute__((aligned(64))) = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15, 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}; //#define MONITORS //#define LOOPMON void bli_dgemm_knl_asm_24x8 ( dim_t k_, double* restrict alpha, double* restrict a, double* restrict b, double* restrict beta, double* restrict c, inc_t rs_c_, inc_t cs_c_, auxinfo_t* restrict data, cntx_t* restrict cntx ) { (void)data; (void)cntx; const double * a_next = bli_auxinfo_next_a( data ); const double * b_next = bli_auxinfo_next_b( data ); const int32_t * offsetPtr = &offsets[0]; const int64_t k = k_; const int64_t rs_c = rs_c_; const int64_t cs_c = cs_c_; #ifdef MONITORS int toph, topl, both, botl, midl, midh, mid2l, mid2h; #endif #ifdef LOOPMON int tlooph, tloopl, blooph, bloopl; #endif BEGIN_ASM() #ifdef MONITORS RDTSC MOV(VAR(topl), EAX) MOV(VAR(toph), EDX) #endif VPXORD(ZMM(8), ZMM(8), ZMM(8)) //clear out registers VMOVAPS(ZMM( 9), ZMM(8)) MOV(R12, VAR(rs_c)) VMOVAPS(ZMM(10), ZMM(8)) MOV(RSI, VAR(k)) //loop index VMOVAPS(ZMM(11), ZMM(8)) MOV(RAX, VAR(a)) //load address of a VMOVAPS(ZMM(12), ZMM(8)) MOV(RBX, VAR(b)) //load address of b VMOVAPS(ZMM(13), ZMM(8)) MOV(RCX, VAR(c)) //load address of c VMOVAPS(ZMM(14), ZMM(8)) VMOVAPD(ZMM(0), MEM(RBX)) //pre-load b VMOVAPS(ZMM(15), ZMM(8)) MOV(RDI, VAR(offsetPtr)) VMOVAPS(ZMM(16), ZMM(8)) VMOVAPS(ZMM(4), MEM(RDI)) #if SCATTER_PREFETCH_C VMOVAPS(ZMM(17), ZMM(8)) VMOVAPS(ZMM(18), ZMM(8)) VMOVAPS(ZMM(19), ZMM(8)) VBROADCASTSS(ZMM(5), VAR(rs_c)) VMOVAPS(ZMM(20), ZMM(8)) VMOVAPS(ZMM(21), ZMM(8)) VPMULLD(ZMM(2), ZMM(4), ZMM(5)) VMOVAPS(ZMM(22), ZMM(8)) VMOVAPS(YMM(3), MEM(RDI,64)) VMOVAPS(ZMM(23), ZMM(8)) VPMULLD(YMM(3), YMM(3), YMM(5)) #else VMOVAPS(ZMM(17), ZMM(8)) VMOVAPS(ZMM(18), ZMM(8)) LEA(R13, MEM(R12,R12,2)) VMOVAPS(ZMM(19), ZMM(8)) LEA(R14, MEM(R12,R12,4)) VMOVAPS(ZMM(20), ZMM(8)) LEA(R15, MEM(R13,R12,4)) VMOVAPS(ZMM(21), ZMM(8)) VMOVAPS(ZMM(22), ZMM(8)) VMOVAPS(ZMM(23), ZMM(8)) #endif VMOVAPS(ZMM(24), ZMM(8)) VPSLLD(ZMM(4), ZMM(4), IMM(3)) VMOVAPS(ZMM(25), ZMM(8)) MOV(R8, IMM(4*24*8)) //offset for 4 iterations VMOVAPS(ZMM(26), ZMM(8)) LEA(R9, MEM(R8,R8,2)) //*3 VMOVAPS(ZMM(27), ZMM(8)) LEA(R10, MEM(R8,R8,4)) //*5 VMOVAPS(ZMM(28), ZMM(8)) LEA(R11, MEM(R9,R8,4)) //*7 VMOVAPS(ZMM(29), ZMM(8)) VMOVAPS(ZMM(30), ZMM(8)) VMOVAPS(ZMM(31), ZMM(8)) #ifdef MONITORS RDTSC MOV(VAR(midl), EAX) MOV(VAR(midh), EDX) #endif SUB(RSI, IMM(32)) JLE(TAIL) //prefetch C into L2 #if SCATTER_PREFETCH_C ADD(RSI, IMM(24)) KXNORW(K(1), K(0), K(0)) KXNORW(K(2), K(0), K(0)) VSCATTERPFDPS(1, MEM(RCX,ZMM(2),8) MASK_K(1)) VSCATTERPFDPD(1, MEM(RCX,YMM(3),8) MASK_K(2)) #else PREFETCHW1(MEM(RCX )) SUBITER( 0,1,0,RAX ) PREFETCHW1(MEM(RCX,R12,1)) SUBITER( 1,0,1,RAX ) PREFETCHW1(MEM(RCX,R12,2)) SUBITER( 2,1,0,RAX ) PREFETCHW1(MEM(RCX,R13,1)) SUBITER( 3,0,1,RAX ) PREFETCHW1(MEM(RCX,R12,4)) SUBITER( 4,1,0,RAX,R8, 1) PREFETCHW1(MEM(RCX,R14,1)) SUBITER( 5,0,1,RAX,R8, 1) PREFETCHW1(MEM(RCX,R13,2)) SUBITER( 6,1,0,RAX,R8, 1) PREFETCHW1(MEM(RCX,R15,1)) SUBITER( 7,0,1,RAX,R8, 1) LEA(RDX, MEM(RCX,R12,8)) PREFETCHW1(MEM(RDX )) SUBITER( 8,1,0,RAX,R8, 2) PREFETCHW1(MEM(RDX,R12,1)) SUBITER( 9,0,1,RAX,R8, 2) PREFETCHW1(MEM(RDX,R12,2)) SUBITER(10,1,0,RAX,R8, 2) PREFETCHW1(MEM(RDX,R13,1)) SUBITER(11,0,1,RAX,R8, 2) PREFETCHW1(MEM(RDX,R12,4)) SUBITER(12,1,0,RAX,R9, 1) PREFETCHW1(MEM(RDX,R14,1)) SUBITER(13,0,1,RAX,R9, 1) PREFETCHW1(MEM(RDX,R13,2)) SUBITER(14,1,0,RAX,R9, 1) PREFETCHW1(MEM(RDX,R15,1)) SUBITER(15,0,1,RAX,R9, 1) LEA(RDI, MEM(RDX,R12,8)) PREFETCHW1(MEM(RDI )) SUBITER(16,1,0,RAX,R8, 4) PREFETCHW1(MEM(RDI,R12,1)) SUBITER(17,0,1,RAX,R8, 4) PREFETCHW1(MEM(RDI,R12,2)) SUBITER(18,1,0,RAX,R8, 4) PREFETCHW1(MEM(RDI,R13,1)) SUBITER(19,0,1,RAX,R8, 4) PREFETCHW1(MEM(RDI,R12,4)) SUBITER(20,1,0,RAX,R10,1) PREFETCHW1(MEM(RDI,R14,1)) SUBITER(21,0,1,RAX,R10,1) PREFETCHW1(MEM(RDI,R13,2)) SUBITER(22,1,0,RAX,R10,1) PREFETCHW1(MEM(RDI,R15,1)) SUBITER(23,0,1,RAX,R10,1) ADD(RAX, IMM(24*24*8)) ADD(RBX, IMM(24* 8*8)) #endif MOV(RDI, RSI) AND(RDI, IMM(31)) SAR(RSI, IMM(5)) JZ(REM_1) LOOP_ALIGN LABEL(MAIN_LOOP) SUBITER( 0,1,0,RAX ) SUBITER( 1,0,1,RAX ) SUBITER( 2,1,0,RAX ) SUBITER( 3,0,1,RAX ) SUBITER( 4,1,0,RAX,R8, 1) SUBITER( 5,0,1,RAX,R8, 1) SUBITER( 6,1,0,RAX,R8, 1) SUBITER( 7,0,1,RAX,R8, 1) SUBITER( 8,1,0,RAX,R8, 2) SUBITER( 9,0,1,RAX,R8, 2) SUBITER(10,1,0,RAX,R8, 2) SUBITER(11,0,1,RAX,R8, 2) SUBITER(12,1,0,RAX,R9, 1) SUBITER(13,0,1,RAX,R9, 1) SUBITER(14,1,0,RAX,R9, 1) SUBITER(15,0,1,RAX,R9, 1) SUBITER(16,1,0,RAX,R8, 4) SUBITER(17,0,1,RAX,R8, 4) SUBITER(18,1,0,RAX,R8, 4) SUBITER(19,0,1,RAX,R8, 4) SUBITER(20,1,0,RAX,R10,1) SUBITER(21,0,1,RAX,R10,1) SUBITER(22,1,0,RAX,R10,1) SUBITER(23,0,1,RAX,R10,1) SUBITER(24,1,0,RAX,R9, 2) SUBITER(25,0,1,RAX,R9, 2) SUBITER(26,1,0,RAX,R9, 2) SUBITER(27,0,1,RAX,R9, 2) SUBITER(28,1,0,RAX,R11,1) SUBITER(29,0,1,RAX,R11,1) SUBITER(30,1,0,RAX,R11,1) SUBITER(31,0,1,RAX,R11,1) ADD(RAX, IMM(32*24*8)) ADD(RBX, IMM(32* 8*8)) SUB(RSI, IMM(1)) JNZ(MAIN_LOOP) LABEL(REM_1) SAR(RDI) JNC(REM_2) SUBITER(0,1,0,RAX) VMOVAPD(ZMM(0), ZMM(1)) ADD(RAX, IMM(24*8)) ADD(RBX, IMM( 8*8)) LABEL(REM_2) SAR(RDI) JNC(REM_4) SUBITER(0,1,0,RAX) SUBITER(1,0,1,RAX) ADD(RAX, IMM(2*24*8)) ADD(RBX, IMM(2* 8*8)) LABEL(REM_4) SAR(RDI) JNC(REM_8) SUBITER(0,1,0,RAX) SUBITER(1,0,1,RAX) SUBITER(2,1,0,RAX) SUBITER(3,0,1,RAX) ADD(RAX, IMM(4*24*8)) ADD(RBX, IMM(4* 8*8)) LABEL(REM_8) SAR(RDI) JNC(REM_16) SUBITER(0,1,0,RAX ) SUBITER(1,0,1,RAX ) SUBITER(2,1,0,RAX ) SUBITER(3,0,1,RAX ) SUBITER(4,1,0,RAX,R8,1) SUBITER(5,0,1,RAX,R8,1) SUBITER(6,1,0,RAX,R8,1) SUBITER(7,0,1,RAX,R8,1) ADD(RAX, IMM(8*24*8)) ADD(RBX, IMM(8* 8*8)) LABEL(REM_16) SAR(RDI) JNC(AFTER_LOOP) SUBITER( 0,1,0,RAX ) SUBITER( 1,0,1,RAX ) SUBITER( 2,1,0,RAX ) SUBITER( 3,0,1,RAX ) SUBITER( 4,1,0,RAX,R8, 1) SUBITER( 5,0,1,RAX,R8, 1) SUBITER( 6,1,0,RAX,R8, 1) SUBITER( 7,0,1,RAX,R8, 1) SUBITER( 8,1,0,RAX,R8, 2) SUBITER( 9,0,1,RAX,R8, 2) SUBITER(10,1,0,RAX,R8, 2) SUBITER(11,0,1,RAX,R8, 2) SUBITER(12,1,0,RAX,R9, 1) SUBITER(13,0,1,RAX,R9, 1) SUBITER(14,1,0,RAX,R9, 1) SUBITER(15,0,1,RAX,R9, 1) ADD(RAX, IMM(16*24*8)) ADD(RBX, IMM(16* 8*8)) LABEL(AFTER_LOOP) //prefetch C into L1 #if SCATTER_PREFETCH_C KXNORW(K(1), K(0), K(0)) KXNORW(K(2), K(0), K(0)) VSCATTERPFDPS(0, MEM(RCX,ZMM(2),8) MASK_K(1)) VSCATTERPFDPD(0, MEM(RCX,YMM(3),8) MASK_K(2)) SUBITER(0,1,0,RAX ) SUBITER(1,0,1,RAX ) SUBITER(2,1,0,RAX ) SUBITER(3,0,1,RAX ) SUBITER(4,1,0,RAX,R8,1) SUBITER(5,0,1,RAX,R8,1) SUBITER(6,1,0,RAX,R8,1) SUBITER(7,0,1,RAX,R8,1) #else LEA(RDX, MEM(RCX,R12,8)) LEA(RDI, MEM(RDX,R12,8)) #undef PREFETCH_C_L1_1 #undef PREFETCH_C_L1_2 #undef PREFETCH_C_L1_3 #define PREFETCH_C_L1_1 PREFETCHW0(MEM(RCX )) #define PREFETCH_C_L1_2 PREFETCHW0(MEM(RCX,R12,1)) #define PREFETCH_C_L1_3 PREFETCHW0(MEM(RCX,R12,2)) SUBITER(0,1,0,RAX ) #undef PREFETCH_C_L1_1 #undef PREFETCH_C_L1_2 #undef PREFETCH_C_L1_3 #define PREFETCH_C_L1_1 PREFETCHW0(MEM(RCX,R13,1)) #define PREFETCH_C_L1_2 PREFETCHW0(MEM(RCX,R12,4)) #define PREFETCH_C_L1_3 PREFETCHW0(MEM(RCX,R14,1)) SUBITER(1,0,1,RAX ) #undef PREFETCH_C_L1_1 #undef PREFETCH_C_L1_2 #undef PREFETCH_C_L1_3 #define PREFETCH_C_L1_1 PREFETCHW0(MEM(RCX,R13,2)) #define PREFETCH_C_L1_2 PREFETCHW0(MEM(RCX,R15,1)) #define PREFETCH_C_L1_3 PREFETCHW0(MEM(RDX )) SUBITER(2,1,0,RAX ) #undef PREFETCH_C_L1_1 #undef PREFETCH_C_L1_2 #undef PREFETCH_C_L1_3 #define PREFETCH_C_L1_1 PREFETCHW0(MEM(RDX,R12,1)) #define PREFETCH_C_L1_2 PREFETCHW0(MEM(RDX,R12,2)) #define PREFETCH_C_L1_3 PREFETCHW0(MEM(RDX,R13,1)) SUBITER(3,0,1,RAX ) #undef PREFETCH_C_L1_1 #undef PREFETCH_C_L1_2 #undef PREFETCH_C_L1_3 #define PREFETCH_C_L1_1 PREFETCHW0(MEM(RDX,R12,4)) #define PREFETCH_C_L1_2 PREFETCHW0(MEM(RDX,R14,1)) #define PREFETCH_C_L1_3 PREFETCHW0(MEM(RDX,R13,2)) SUBITER(4,1,0,RAX,R8,1) #undef PREFETCH_C_L1_1 #undef PREFETCH_C_L1_2 #undef PREFETCH_C_L1_3 #define PREFETCH_C_L1_1 PREFETCHW0(MEM(RDX,R15,1)) #define PREFETCH_C_L1_2 PREFETCHW0(MEM(RDI )) #define PREFETCH_C_L1_3 PREFETCHW0(MEM(RDI,R12,1)) SUBITER(5,0,1,RAX,R8,1) #undef PREFETCH_C_L1_1 #undef PREFETCH_C_L1_2 #undef PREFETCH_C_L1_3 #define PREFETCH_C_L1_1 PREFETCHW0(MEM(RDI,R12,2)) #define PREFETCH_C_L1_2 PREFETCHW0(MEM(RDI,R13,1)) #define PREFETCH_C_L1_3 PREFETCHW0(MEM(RDI,R12,4)) SUBITER(6,1,0,RAX,R8,1) #undef PREFETCH_C_L1_1 #undef PREFETCH_C_L1_2 #undef PREFETCH_C_L1_3 #define PREFETCH_C_L1_1 PREFETCHW0(MEM(RDI,R14,1)) #define PREFETCH_C_L1_2 PREFETCHW0(MEM(RDI,R13,2)) #define PREFETCH_C_L1_3 PREFETCHW0(MEM(RDI,R15,1)) SUBITER(7,0,1,RAX,R8,1) #endif JMP(POSTACCUM) LABEL(TAIL) MOV(RDX, RCX) ADD(RSI, IMM(32)) JZ(POSTACCUM) LABEL(TAIL_LOOP) PREFETCHW0(MEM(RDX)) ADD(RDX, R12) SUBITER(0,1,0,RAX) VMOVAPD(ZMM(0), ZMM(1)) ADD(RAX, IMM(24*8)) ADD(RBX, IMM( 8*8)) SUB(RSI, IMM(1)) JNZ(TAIL_LOOP) LABEL(POSTACCUM) #ifdef MONITORS RDTSC MOV(VAR(mid2l), EAX) MOV(VAR(mid2h), EDX) #endif MOV(RAX, VAR(alpha)) MOV(RBX, VAR(beta)) VBROADCASTSD(ZMM(0), MEM(RAX)) VBROADCASTSD(ZMM(1), MEM(RBX)) // Check if C is row stride. If not, jump to the slow scattered update MOV(RAX, VAR(rs_c)) LEA(RAX, MEM(,RAX,8)) MOV(RBX, VAR(cs_c)) LEA(RDI, MEM(RAX,RAX,2)) CMP(RBX, IMM(1)) JNE(SCATTEREDUPDATE) VMOVQ(RDX, XMM(1)) SAL(RDX) //shift out sign bit JZ(COLSTORBZ) UPDATE_C_FOUR_ROWS( 8, 9,10,11) UPDATE_C_FOUR_ROWS(12,13,14,15) UPDATE_C_FOUR_ROWS(16,17,18,19) UPDATE_C_FOUR_ROWS(20,21,22,23) UPDATE_C_FOUR_ROWS(24,25,26,27) UPDATE_C_FOUR_ROWS(28,29,30,31) JMP(END) LABEL(COLSTORBZ) UPDATE_C_BZ_FOUR_ROWS( 8, 9,10,11) UPDATE_C_BZ_FOUR_ROWS(12,13,14,15) UPDATE_C_BZ_FOUR_ROWS(16,17,18,19) UPDATE_C_BZ_FOUR_ROWS(20,21,22,23) UPDATE_C_BZ_FOUR_ROWS(24,25,26,27) UPDATE_C_BZ_FOUR_ROWS(28,29,30,31) JMP(END) LABEL(SCATTEREDUPDATE) MOV(RDI, VAR(offsetPtr)) VMOVAPS(ZMM(2), MEM(RDI)) /* Note that this ignores the upper 32 bits in cs_c */ VPBROADCASTD(ZMM(3), EBX) VPMULLD(ZMM(2), ZMM(3), ZMM(2)) VMOVQ(RDX, XMM(1)) SAL(RDX) //shift out sign bit JZ(SCATTERBZ) UPDATE_C_ROW_SCATTERED( 8) UPDATE_C_ROW_SCATTERED( 9) UPDATE_C_ROW_SCATTERED(10) UPDATE_C_ROW_SCATTERED(11) UPDATE_C_ROW_SCATTERED(12) UPDATE_C_ROW_SCATTERED(13) UPDATE_C_ROW_SCATTERED(14) UPDATE_C_ROW_SCATTERED(15) UPDATE_C_ROW_SCATTERED(16) UPDATE_C_ROW_SCATTERED(17) UPDATE_C_ROW_SCATTERED(18) UPDATE_C_ROW_SCATTERED(19) UPDATE_C_ROW_SCATTERED(20) UPDATE_C_ROW_SCATTERED(21) UPDATE_C_ROW_SCATTERED(22) UPDATE_C_ROW_SCATTERED(23) UPDATE_C_ROW_SCATTERED(24) UPDATE_C_ROW_SCATTERED(25) UPDATE_C_ROW_SCATTERED(26) UPDATE_C_ROW_SCATTERED(27) UPDATE_C_ROW_SCATTERED(28) UPDATE_C_ROW_SCATTERED(29) UPDATE_C_ROW_SCATTERED(30) UPDATE_C_ROW_SCATTERED(31) JMP(END) LABEL(SCATTERBZ) UPDATE_C_BZ_ROW_SCATTERED( 8) UPDATE_C_BZ_ROW_SCATTERED( 9) UPDATE_C_BZ_ROW_SCATTERED(10) UPDATE_C_BZ_ROW_SCATTERED(11) UPDATE_C_BZ_ROW_SCATTERED(12) UPDATE_C_BZ_ROW_SCATTERED(13) UPDATE_C_BZ_ROW_SCATTERED(14) UPDATE_C_BZ_ROW_SCATTERED(15) UPDATE_C_BZ_ROW_SCATTERED(16) UPDATE_C_BZ_ROW_SCATTERED(17) UPDATE_C_BZ_ROW_SCATTERED(18) UPDATE_C_BZ_ROW_SCATTERED(19) UPDATE_C_BZ_ROW_SCATTERED(20) UPDATE_C_BZ_ROW_SCATTERED(21) UPDATE_C_BZ_ROW_SCATTERED(22) UPDATE_C_BZ_ROW_SCATTERED(23) UPDATE_C_BZ_ROW_SCATTERED(24) UPDATE_C_BZ_ROW_SCATTERED(25) UPDATE_C_BZ_ROW_SCATTERED(26) UPDATE_C_BZ_ROW_SCATTERED(27) UPDATE_C_BZ_ROW_SCATTERED(28) UPDATE_C_BZ_ROW_SCATTERED(29) UPDATE_C_BZ_ROW_SCATTERED(30) UPDATE_C_BZ_ROW_SCATTERED(31) LABEL(END) #ifdef MONITORS RDTSC MOV(VAR(botl), EAX) MOV(VAR(both), EDX) #endif END_ASM( : // output operands #ifdef MONITORS [topl] "=m" (topl), [toph] "=m" (toph), [midl] "=m" (midl), [midh] "=m" (midh), [mid2l] "=m" (mid2l), [mid2h] "=m" (mid2h), [botl] "=m" (botl), [both] "=m" (both) #endif : // input operands [k] "m" (k), [a] "m" (a), [b] "m" (b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c), [a_next] "m" (a_next), [b_next] "m" (b_next), [offsetPtr] "m" (offsetPtr) : // register clobber list "rax", "rbx", "rcx", "rdx", "rdi", "rsi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "zmm0", "zmm1", "zmm2", "zmm3", "zmm4", "zmm5", "zmm6", "zmm7", "zmm8", "zmm9", "zmm10", "zmm11", "zmm12", "zmm13", "zmm14", "zmm15", "zmm16", "zmm17", "zmm18", "zmm19", "zmm20", "zmm21", "zmm22", "zmm23", "zmm24", "zmm25", "zmm26", "zmm27", "zmm28", "zmm29", "zmm30", "zmm31", "memory" ) #ifdef LOOPMON printf("looptime = \t%d\n", bloopl - tloopl); #endif #ifdef MONITORS dim_t top = ((dim_t)toph << 32) | topl; dim_t mid = ((dim_t)midh << 32) | midl; dim_t mid2 = ((dim_t)mid2h << 32) | mid2l; dim_t bot = ((dim_t)both << 32) | botl; printf("setup =\t%u\tmain loop =\t%u\tcleanup=\t%u\ttotal=\t%u\n", mid - top, mid2 - mid, bot - mid2, bot - top); #endif } blis-0.6.1/kernels/knl/3/bli_sgemm_knl_asm_24x16.c000066400000000000000000000514551360743507500215360ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include #define BLIS_ASM_SYNTAX_INTEL #include "bli_x86_asm_macros.h" #define UNROLL_K 32 #define SCATTER_PREFETCH_C 1 #define PREFETCH_A_L2 0 #define PREFETCH_B_L2 0 #define L2_PREFETCH_DIST 64 #define A_L1_PREFETCH_DIST 36 #define B_L1_PREFETCH_DIST 18 #define LOOP_ALIGN ALIGN16 #define UPDATE_C_FOUR_ROWS(R1,R2,R3,R4) \ \ VMULPS(ZMM(R1), ZMM(R1), ZMM(0)) \ VMULPS(ZMM(R2), ZMM(R2), ZMM(0)) \ VMULPS(ZMM(R3), ZMM(R3), ZMM(0)) \ VMULPS(ZMM(R4), ZMM(R4), ZMM(0)) \ VFMADD231PS(ZMM(R1), ZMM(1), MEM(RCX )) \ VFMADD231PS(ZMM(R2), ZMM(1), MEM(RCX,RAX,1)) \ VFMADD231PS(ZMM(R3), ZMM(1), MEM(RCX,RAX,2)) \ VFMADD231PS(ZMM(R4), ZMM(1), MEM(RCX,RDI,1)) \ VMOVUPS(MEM(RCX ), ZMM(R1)) \ VMOVUPS(MEM(RCX,RAX,1), ZMM(R2)) \ VMOVUPS(MEM(RCX,RAX,2), ZMM(R3)) \ VMOVUPS(MEM(RCX,RDI,1), ZMM(R4)) \ LEA(RCX, MEM(RCX,RAX,4)) #define UPDATE_C_BZ_FOUR_ROWS(R1,R2,R3,R4) \ \ VMULPS(ZMM(R1), ZMM(R1), ZMM(0)) \ VMULPS(ZMM(R2), ZMM(R2), ZMM(0)) \ VMULPS(ZMM(R3), ZMM(R3), ZMM(0)) \ VMULPS(ZMM(R4), ZMM(R4), ZMM(0)) \ VMOVUPS(MEM(RCX ), ZMM(R1)) \ VMOVUPS(MEM(RCX,RAX,1), ZMM(R2)) \ VMOVUPS(MEM(RCX,RAX,2), ZMM(R3)) \ VMOVUPS(MEM(RCX,RDI,1), ZMM(R4)) \ LEA(RCX, MEM(RCX,RAX,4)) #define UPDATE_C_ROW_SCATTERED(NUM) \ \ KXNORW(K(1), K(0), K(0)) \ KXNORW(K(2), K(0), K(0)) \ VMULPS(ZMM(NUM), ZMM(NUM), ZMM(0)) \ VGATHERDPS(ZMM(3) MASK_K(1), MEM(RCX,ZMM(2),4)) \ VFMADD231PS(ZMM(NUM), ZMM(3), ZMM(1)) \ VSCATTERDPS(MEM(RCX,ZMM(2),4) MASK_K(2), ZMM(NUM)) \ ADD(RCX, RAX) #define UPDATE_C_BZ_ROW_SCATTERED(NUM) \ \ KXNORW(K(1), K(0), K(0)) \ VMULPS(ZMM(NUM), ZMM(NUM), ZMM(0)) \ VSCATTERDPS(MEM(RCX,ZMM(2),4) MASK_K(1), ZMM(NUM)) \ ADD(RCX, RAX) #define PREFETCH_A_L1_1(n) PREFETCH(0, MEM(RAX,(A_L1_PREFETCH_DIST+n)*24*4)) #define PREFETCH_A_L1_2(n) PREFETCH(0, MEM(RAX,(A_L1_PREFETCH_DIST+n)*24*4+64)) #if PREFETCH_A_L2 #undef PREFETCH_A_L2 #define PREFETCH_A_L2(n) \ \ PREFETCH(1, MEM(RAX,(L2_PREFETCH_DIST+n)*24*4)) \ PREFETCH(1, MEM(RAX,(L2_PREFETCH_DIST+n)*24*4+64)) #else #undef PREFETCH_A_L2 #define PREFETCH_A_L2(...) #endif #define PREFETCH_B_L1(n) PREFETCH(0, MEM(RBX,(B_L1_PREFETCH_DIST+n)*16*4)) #if PREFETCH_B_L2 #undef PREFETCH_B_L2 #define PREFETCH_B_L2(n) PREFETCH(1, MEM(RBX,(L2_PREFETCH_DIST+n)*16*4)) #else #undef PREFETCH_B_L2 #define PREFETCH_B_L2(...) #endif #define PREFETCH_C_L1_1 #define PREFETCH_C_L1_2 #define PREFETCH_C_L1_3 // // n: index in unrolled loop // // a: ZMM register to load into // b: ZMM register to read from // // ...: addressing for A, except for offset // #define SUBITER(n,a,b,...) \ \ PREFETCH_A_L2(n) \ \ VMOVAPS(ZMM(a), MEM(RBX,(n+1)*64)) \ VFMADD231PS(ZMM( 8), ZMM(b), MEM_1TO16(__VA_ARGS__,((n%%4)*24+ 0)*4)) \ VFMADD231PS(ZMM( 9), ZMM(b), MEM_1TO16(__VA_ARGS__,((n%%4)*24+ 1)*4)) \ VFMADD231PS(ZMM(10), ZMM(b), MEM_1TO16(__VA_ARGS__,((n%%4)*24+ 2)*4)) \ PREFETCH_A_L1_1(n) \ VFMADD231PS(ZMM(11), ZMM(b), MEM_1TO16(__VA_ARGS__,((n%%4)*24+ 3)*4)) \ VFMADD231PS(ZMM(12), ZMM(b), MEM_1TO16(__VA_ARGS__,((n%%4)*24+ 4)*4)) \ VFMADD231PS(ZMM(13), ZMM(b), MEM_1TO16(__VA_ARGS__,((n%%4)*24+ 5)*4)) \ PREFETCH_C_L1_1 \ VFMADD231PS(ZMM(14), ZMM(b), MEM_1TO16(__VA_ARGS__,((n%%4)*24+ 6)*4)) \ VFMADD231PS(ZMM(15), ZMM(b), MEM_1TO16(__VA_ARGS__,((n%%4)*24+ 7)*4)) \ VFMADD231PS(ZMM(16), ZMM(b), MEM_1TO16(__VA_ARGS__,((n%%4)*24+ 8)*4)) \ PREFETCH_A_L1_2(n) \ VFMADD231PS(ZMM(17), ZMM(b), MEM_1TO16(__VA_ARGS__,((n%%4)*24+ 9)*4)) \ VFMADD231PS(ZMM(18), ZMM(b), MEM_1TO16(__VA_ARGS__,((n%%4)*24+10)*4)) \ VFMADD231PS(ZMM(19), ZMM(b), MEM_1TO16(__VA_ARGS__,((n%%4)*24+11)*4)) \ PREFETCH_C_L1_2 \ VFMADD231PS(ZMM(20), ZMM(b), MEM_1TO16(__VA_ARGS__,((n%%4)*24+12)*4)) \ VFMADD231PS(ZMM(21), ZMM(b), MEM_1TO16(__VA_ARGS__,((n%%4)*24+13)*4)) \ VFMADD231PS(ZMM(22), ZMM(b), MEM_1TO16(__VA_ARGS__,((n%%4)*24+14)*4)) \ PREFETCH_C_L1_3 \ VFMADD231PS(ZMM(23), ZMM(b), MEM_1TO16(__VA_ARGS__,((n%%4)*24+15)*4)) \ VFMADD231PS(ZMM(24), ZMM(b), MEM_1TO16(__VA_ARGS__,((n%%4)*24+16)*4)) \ VFMADD231PS(ZMM(25), ZMM(b), MEM_1TO16(__VA_ARGS__,((n%%4)*24+17)*4)) \ PREFETCH_B_L1(n) \ VFMADD231PS(ZMM(26), ZMM(b), MEM_1TO16(__VA_ARGS__,((n%%4)*24+18)*4)) \ VFMADD231PS(ZMM(27), ZMM(b), MEM_1TO16(__VA_ARGS__,((n%%4)*24+19)*4)) \ VFMADD231PS(ZMM(28), ZMM(b), MEM_1TO16(__VA_ARGS__,((n%%4)*24+20)*4)) \ PREFETCH_B_L2(n) \ VFMADD231PS(ZMM(29), ZMM(b), MEM_1TO16(__VA_ARGS__,((n%%4)*24+21)*4)) \ VFMADD231PS(ZMM(30), ZMM(b), MEM_1TO16(__VA_ARGS__,((n%%4)*24+22)*4)) \ VFMADD231PS(ZMM(31), ZMM(b), MEM_1TO16(__VA_ARGS__,((n%%4)*24+23)*4)) //This is an array used for the scatter/gather instructions. static int32_t offsets[32] __attribute__((aligned(64))) = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15, 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}; //#define MONITORS //#define LOOPMON void bli_sgemm_knl_asm_24x16 ( dim_t k_, double* restrict alpha, double* restrict a, double* restrict b, double* restrict beta, double* restrict c, inc_t rs_c_, inc_t cs_c_, auxinfo_t* data, cntx_t* restrict cntx ) { (void)data; (void)cntx; const double * a_next = bli_auxinfo_next_a( data ); const double * b_next = bli_auxinfo_next_b( data ); const int32_t * offsetPtr = &offsets[0]; const int64_t k = k_; const int64_t rs_c = rs_c_; const int64_t cs_c = cs_c_; #ifdef MONITORS int toph, topl, both, botl, midl, midh, mid2l, mid2h; #endif #ifdef LOOPMON int tlooph, tloopl, blooph, bloopl; #endif BEGIN_ASM() #ifdef MONITORS RDTSC MOV(VAR(topl), EAX) MOV(VAR(toph), EDX) #endif VPXORD(ZMM(8), ZMM(8), ZMM(8)) //clear out registers VMOVAPS(ZMM( 9), ZMM(8)) MOV(R12, VAR(rs_c)) VMOVAPS(ZMM(10), ZMM(8)) MOV(RSI, VAR(k)) //loop index VMOVAPS(ZMM(11), ZMM(8)) MOV(RAX, VAR(a)) //load address of a VMOVAPS(ZMM(12), ZMM(8)) MOV(RBX, VAR(b)) //load address of b VMOVAPS(ZMM(13), ZMM(8)) MOV(RCX, VAR(c)) //load address of c VMOVAPS(ZMM(14), ZMM(8)) VMOVAPD(ZMM(0), MEM(RBX)) //pre-load b VMOVAPS(ZMM(15), ZMM(8)) MOV(RDI, VAR(offsetPtr)) VMOVAPS(ZMM(16), ZMM(8)) VMOVAPS(ZMM(4), MEM(RDI)) #if SCATTER_PREFETCH_C VMOVAPS(ZMM(17), ZMM(8)) VMOVAPS(ZMM(18), ZMM(8)) VMOVAPS(ZMM(19), ZMM(8)) VBROADCASTSS(ZMM(5), VAR(rs_c)) VMOVAPS(ZMM(20), ZMM(8)) VMOVAPS(ZMM(21), ZMM(8)) VPMULLD(ZMM(2), ZMM(4), ZMM(5)) VMOVAPS(ZMM(22), ZMM(8)) VMOVAPS(YMM(3), MEM(RDI,64)) VMOVAPS(ZMM(23), ZMM(8)) VPMULLD(YMM(3), YMM(3), YMM(5)) #else VMOVAPS(ZMM(17), ZMM(8)) VMOVAPS(ZMM(18), ZMM(8)) LEA(R13, MEM(R12,R12,2)) VMOVAPS(ZMM(19), ZMM(8)) LEA(R14, MEM(R12,R12,4)) VMOVAPS(ZMM(20), ZMM(8)) LEA(R15, MEM(R13,R12,4)) VMOVAPS(ZMM(21), ZMM(8)) VMOVAPS(ZMM(22), ZMM(8)) VMOVAPS(ZMM(23), ZMM(8)) #endif VMOVAPS(ZMM(24), ZMM(8)) VPSLLD(ZMM(4), ZMM(4), IMM(2)) VMOVAPS(ZMM(25), ZMM(8)) MOV(R8, IMM(4*24*4)) //offset for 4 iterations VMOVAPS(ZMM(26), ZMM(8)) LEA(R9, MEM(R8,R8,2)) //*3 VMOVAPS(ZMM(27), ZMM(8)) LEA(R10, MEM(R8,R8,4)) //*5 VMOVAPS(ZMM(28), ZMM(8)) LEA(R11, MEM(R9,R8,4)) //*7 VMOVAPS(ZMM(29), ZMM(8)) VMOVAPS(ZMM(30), ZMM(8)) VMOVAPS(ZMM(31), ZMM(8)) #ifdef MONITORS RDTSC MOV(VAR(midl), EAX) MOV(VAR(midh), EDX) #endif SUB(RSI, IMM(32)) JLE(TAIL) //prefetch C into L2 #if SCATTER_PREFETCH_C ADD(RSI, IMM(24)) KXNORW(K(1), K(0), K(0)) KXNORW(K(2), K(0), K(0)) VSCATTERPFDPS(1, MEM(RCX,ZMM(2),8) MASK_K(1)) VSCATTERPFDPD(1, MEM(RCX,YMM(3),8) MASK_K(2)) #else PREFETCHW1(MEM(RCX )) SUBITER( 0,1,0,RAX ) PREFETCHW1(MEM(RCX,R12,1)) SUBITER( 1,0,1,RAX ) PREFETCHW1(MEM(RCX,R12,2)) SUBITER( 2,1,0,RAX ) PREFETCHW1(MEM(RCX,R13,1)) SUBITER( 3,0,1,RAX ) PREFETCHW1(MEM(RCX,R12,4)) SUBITER( 4,1,0,RAX,R8, 1) PREFETCHW1(MEM(RCX,R14,1)) SUBITER( 5,0,1,RAX,R8, 1) PREFETCHW1(MEM(RCX,R13,2)) SUBITER( 6,1,0,RAX,R8, 1) PREFETCHW1(MEM(RCX,R15,1)) SUBITER( 7,0,1,RAX,R8, 1) LEA(RDX, MEM(RCX,R12,8)) PREFETCHW1(MEM(RDX )) SUBITER( 8,1,0,RAX,R8, 2) PREFETCHW1(MEM(RDX,R12,1)) SUBITER( 9,0,1,RAX,R8, 2) PREFETCHW1(MEM(RDX,R12,2)) SUBITER(10,1,0,RAX,R8, 2) PREFETCHW1(MEM(RDX,R13,1)) SUBITER(11,0,1,RAX,R8, 2) PREFETCHW1(MEM(RDX,R12,4)) SUBITER(12,1,0,RAX,R9, 1) PREFETCHW1(MEM(RDX,R14,1)) SUBITER(13,0,1,RAX,R9, 1) PREFETCHW1(MEM(RDX,R13,2)) SUBITER(14,1,0,RAX,R9, 1) PREFETCHW1(MEM(RDX,R15,1)) SUBITER(15,0,1,RAX,R9, 1) LEA(RDI, MEM(RDX,R12,8)) PREFETCHW1(MEM(RDI )) SUBITER(16,1,0,RAX,R8, 4) PREFETCHW1(MEM(RDI,R12,1)) SUBITER(17,0,1,RAX,R8, 4) PREFETCHW1(MEM(RDI,R12,2)) SUBITER(18,1,0,RAX,R8, 4) PREFETCHW1(MEM(RDI,R13,1)) SUBITER(19,0,1,RAX,R8, 4) PREFETCHW1(MEM(RDI,R12,4)) SUBITER(20,1,0,RAX,R10,1) PREFETCHW1(MEM(RDI,R14,1)) SUBITER(21,0,1,RAX,R10,1) PREFETCHW1(MEM(RDI,R13,2)) SUBITER(22,1,0,RAX,R10,1) PREFETCHW1(MEM(RDI,R15,1)) SUBITER(23,0,1,RAX,R10,1) ADD(RAX, IMM(24*24*4)) ADD(RBX, IMM(24*16*4)) #endif MOV(RDI, RSI) AND(RDI, IMM(31)) SAR(RSI, IMM(5)) JZ(REM_1) LOOP_ALIGN LABEL(MAIN_LOOP) SUBITER( 0,1,0,RAX ) SUBITER( 1,0,1,RAX ) SUBITER( 2,1,0,RAX ) SUBITER( 3,0,1,RAX ) SUBITER( 4,1,0,RAX,R8, 1) SUBITER( 5,0,1,RAX,R8, 1) SUBITER( 6,1,0,RAX,R8, 1) SUBITER( 7,0,1,RAX,R8, 1) SUBITER( 8,1,0,RAX,R8, 2) SUBITER( 9,0,1,RAX,R8, 2) SUBITER(10,1,0,RAX,R8, 2) SUBITER(11,0,1,RAX,R8, 2) SUBITER(12,1,0,RAX,R9, 1) SUBITER(13,0,1,RAX,R9, 1) SUBITER(14,1,0,RAX,R9, 1) SUBITER(15,0,1,RAX,R9, 1) SUBITER(16,1,0,RAX,R8, 4) SUBITER(17,0,1,RAX,R8, 4) SUBITER(18,1,0,RAX,R8, 4) SUBITER(19,0,1,RAX,R8, 4) SUBITER(20,1,0,RAX,R10,1) SUBITER(21,0,1,RAX,R10,1) SUBITER(22,1,0,RAX,R10,1) SUBITER(23,0,1,RAX,R10,1) SUBITER(24,1,0,RAX,R9, 2) SUBITER(25,0,1,RAX,R9, 2) SUBITER(26,1,0,RAX,R9, 2) SUBITER(27,0,1,RAX,R9, 2) SUBITER(28,1,0,RAX,R11,1) SUBITER(29,0,1,RAX,R11,1) SUBITER(30,1,0,RAX,R11,1) SUBITER(31,0,1,RAX,R11,1) ADD(RAX, IMM(32*24*4)) ADD(RBX, IMM(32*16*4)) SUB(RSI, IMM(1)) JNZ(MAIN_LOOP) LABEL(REM_1) SAR(RDI) JNC(REM_2) SUBITER(0,1,0,RAX) VMOVAPD(ZMM(0), ZMM(1)) ADD(RAX, IMM(24*4)) ADD(RBX, IMM(16*4)) LABEL(REM_2) SAR(RDI) JNC(REM_4) SUBITER(0,1,0,RAX) SUBITER(1,0,1,RAX) ADD(RAX, IMM(2*24*4)) ADD(RBX, IMM(2*16*4)) LABEL(REM_4) SAR(RDI) JNC(REM_8) SUBITER(0,1,0,RAX) SUBITER(1,0,1,RAX) SUBITER(2,1,0,RAX) SUBITER(3,0,1,RAX) ADD(RAX, IMM(4*24*4)) ADD(RBX, IMM(4*16*4)) LABEL(REM_8) SAR(RDI) JNC(REM_16) SUBITER(0,1,0,RAX ) SUBITER(1,0,1,RAX ) SUBITER(2,1,0,RAX ) SUBITER(3,0,1,RAX ) SUBITER(4,1,0,RAX,R8,1) SUBITER(5,0,1,RAX,R8,1) SUBITER(6,1,0,RAX,R8,1) SUBITER(7,0,1,RAX,R8,1) ADD(RAX, IMM(8*24*4)) ADD(RBX, IMM(8*16*4)) LABEL(REM_16) SAR(RDI) JNC(AFTER_LOOP) SUBITER( 0,1,0,RAX ) SUBITER( 1,0,1,RAX ) SUBITER( 2,1,0,RAX ) SUBITER( 3,0,1,RAX ) SUBITER( 4,1,0,RAX,R8, 1) SUBITER( 5,0,1,RAX,R8, 1) SUBITER( 6,1,0,RAX,R8, 1) SUBITER( 7,0,1,RAX,R8, 1) SUBITER( 8,1,0,RAX,R8, 2) SUBITER( 9,0,1,RAX,R8, 2) SUBITER(10,1,0,RAX,R8, 2) SUBITER(11,0,1,RAX,R8, 2) SUBITER(12,1,0,RAX,R9, 1) SUBITER(13,0,1,RAX,R9, 1) SUBITER(14,1,0,RAX,R9, 1) SUBITER(15,0,1,RAX,R9, 1) ADD(RAX, IMM(16*24*4)) ADD(RBX, IMM(16*16*4)) LABEL(AFTER_LOOP) //prefetch C into L1 #if SCATTER_PREFETCH_C KXNORW(K(1), K(0), K(0)) KXNORW(K(2), K(0), K(0)) VSCATTERPFDPS(0, MEM(RCX,ZMM(2),8) MASK_K(1)) VSCATTERPFDPD(0, MEM(RCX,YMM(3),8) MASK_K(2)) SUBITER(0,1,0,RAX ) SUBITER(1,0,1,RAX ) SUBITER(2,1,0,RAX ) SUBITER(3,0,1,RAX ) SUBITER(4,1,0,RAX,R8,1) SUBITER(5,0,1,RAX,R8,1) SUBITER(6,1,0,RAX,R8,1) SUBITER(7,0,1,RAX,R8,1) #else LEA(RDX, MEM(RCX,R12,8)) LEA(RDI, MEM(RDX,R12,8)) #undef PREFETCH_C_L1_1 #undef PREFETCH_C_L1_2 #undef PREFETCH_C_L1_3 #define PREFETCH_C_L1_1 PREFETCHW0(MEM(RCX )) #define PREFETCH_C_L1_2 PREFETCHW0(MEM(RCX,R12,1)) #define PREFETCH_C_L1_3 PREFETCHW0(MEM(RCX,R12,2)) SUBITER(0,1,0,RAX ) #undef PREFETCH_C_L1_1 #undef PREFETCH_C_L1_2 #undef PREFETCH_C_L1_3 #define PREFETCH_C_L1_1 PREFETCHW0(MEM(RCX,R13,1)) #define PREFETCH_C_L1_2 PREFETCHW0(MEM(RCX,R12,4)) #define PREFETCH_C_L1_3 PREFETCHW0(MEM(RCX,R14,1)) SUBITER(1,0,1,RAX ) #undef PREFETCH_C_L1_1 #undef PREFETCH_C_L1_2 #undef PREFETCH_C_L1_3 #define PREFETCH_C_L1_1 PREFETCHW0(MEM(RCX,R13,2)) #define PREFETCH_C_L1_2 PREFETCHW0(MEM(RCX,R15,1)) #define PREFETCH_C_L1_3 PREFETCHW0(MEM(RDX )) SUBITER(2,1,0,RAX ) #undef PREFETCH_C_L1_1 #undef PREFETCH_C_L1_2 #undef PREFETCH_C_L1_3 #define PREFETCH_C_L1_1 PREFETCHW0(MEM(RDX,R12,1)) #define PREFETCH_C_L1_2 PREFETCHW0(MEM(RDX,R12,2)) #define PREFETCH_C_L1_3 PREFETCHW0(MEM(RDX,R13,1)) SUBITER(3,0,1,RAX ) #undef PREFETCH_C_L1_1 #undef PREFETCH_C_L1_2 #undef PREFETCH_C_L1_3 #define PREFETCH_C_L1_1 PREFETCHW0(MEM(RDX,R12,4)) #define PREFETCH_C_L1_2 PREFETCHW0(MEM(RDX,R14,1)) #define PREFETCH_C_L1_3 PREFETCHW0(MEM(RDX,R13,2)) SUBITER(4,1,0,RAX,R8,1) #undef PREFETCH_C_L1_1 #undef PREFETCH_C_L1_2 #undef PREFETCH_C_L1_3 #define PREFETCH_C_L1_1 PREFETCHW0(MEM(RDX,R15,1)) #define PREFETCH_C_L1_2 PREFETCHW0(MEM(RDI )) #define PREFETCH_C_L1_3 PREFETCHW0(MEM(RDI,R12,1)) SUBITER(5,0,1,RAX,R8,1) #undef PREFETCH_C_L1_1 #undef PREFETCH_C_L1_2 #undef PREFETCH_C_L1_3 #define PREFETCH_C_L1_1 PREFETCHW0(MEM(RDI,R12,2)) #define PREFETCH_C_L1_2 PREFETCHW0(MEM(RDI,R13,1)) #define PREFETCH_C_L1_3 PREFETCHW0(MEM(RDI,R12,4)) SUBITER(6,1,0,RAX,R8,1) #undef PREFETCH_C_L1_1 #undef PREFETCH_C_L1_2 #undef PREFETCH_C_L1_3 #define PREFETCH_C_L1_1 PREFETCHW0(MEM(RDI,R14,1)) #define PREFETCH_C_L1_2 PREFETCHW0(MEM(RDI,R13,2)) #define PREFETCH_C_L1_3 PREFETCHW0(MEM(RDI,R15,1)) SUBITER(7,0,1,RAX,R8,1) #endif JMP(POSTACCUM) LABEL(TAIL) MOV(RDX, RCX) ADD(RSI, IMM(32)) JZ(POSTACCUM) LABEL(TAIL_LOOP) PREFETCHW0(MEM(RDX)) ADD(RDX, R12) SUBITER(0,1,0,RAX) VMOVAPD(ZMM(0), ZMM(1)) ADD(RAX, IMM(24*4)) ADD(RBX, IMM(16*4)) SUB(RSI, IMM(1)) JNZ(TAIL_LOOP) LABEL(POSTACCUM) #ifdef MONITORS RDTSC MOV(VAR(mid2l), EAX) MOV(VAR(mid2h), EDX) #endif MOV(RAX, VAR(alpha)) MOV(RBX, VAR(beta)) VBROADCASTSS(ZMM(0), MEM(RAX)) VBROADCASTSS(ZMM(1), MEM(RBX)) // Check if C is row stride. If not, jump to the slow scattered update MOV(RAX, VAR(rs_c)) LEA(RAX, MEM(,RAX,4)) MOV(RBX, VAR(cs_c)) LEA(RDI, MEM(RAX,RAX,2)) CMP(RBX, IMM(1)) JNE(SCATTEREDUPDATE) VMOVD(EDX, XMM(1)) SAL(EDX) //shift out sign bit JZ(COLSTORBZ) UPDATE_C_FOUR_ROWS( 8, 9,10,11) UPDATE_C_FOUR_ROWS(12,13,14,15) UPDATE_C_FOUR_ROWS(16,17,18,19) UPDATE_C_FOUR_ROWS(20,21,22,23) UPDATE_C_FOUR_ROWS(24,25,26,27) UPDATE_C_FOUR_ROWS(28,29,30,31) JMP(END) LABEL(COLSTORBZ) UPDATE_C_BZ_FOUR_ROWS( 8, 9,10,11) UPDATE_C_BZ_FOUR_ROWS(12,13,14,15) UPDATE_C_BZ_FOUR_ROWS(16,17,18,19) UPDATE_C_BZ_FOUR_ROWS(20,21,22,23) UPDATE_C_BZ_FOUR_ROWS(24,25,26,27) UPDATE_C_BZ_FOUR_ROWS(28,29,30,31) JMP(END) LABEL(SCATTEREDUPDATE) MOV(RDI, VAR(offsetPtr)) VMOVAPS(ZMM(2), MEM(RDI)) /* Note that this ignores the upper 32 bits in cs_c */ VPBROADCASTD(ZMM(3), EBX) VPMULLD(ZMM(2), ZMM(3), ZMM(2)) VMOVD(EDX, XMM(1)) SAL(EDX) //shift out sign bit JZ(SCATTERBZ) UPDATE_C_ROW_SCATTERED( 8) UPDATE_C_ROW_SCATTERED( 9) UPDATE_C_ROW_SCATTERED(10) UPDATE_C_ROW_SCATTERED(11) UPDATE_C_ROW_SCATTERED(12) UPDATE_C_ROW_SCATTERED(13) UPDATE_C_ROW_SCATTERED(14) UPDATE_C_ROW_SCATTERED(15) UPDATE_C_ROW_SCATTERED(16) UPDATE_C_ROW_SCATTERED(17) UPDATE_C_ROW_SCATTERED(18) UPDATE_C_ROW_SCATTERED(19) UPDATE_C_ROW_SCATTERED(20) UPDATE_C_ROW_SCATTERED(21) UPDATE_C_ROW_SCATTERED(22) UPDATE_C_ROW_SCATTERED(23) UPDATE_C_ROW_SCATTERED(24) UPDATE_C_ROW_SCATTERED(25) UPDATE_C_ROW_SCATTERED(26) UPDATE_C_ROW_SCATTERED(27) UPDATE_C_ROW_SCATTERED(28) UPDATE_C_ROW_SCATTERED(29) UPDATE_C_ROW_SCATTERED(30) UPDATE_C_ROW_SCATTERED(31) JMP(END) LABEL(SCATTERBZ) UPDATE_C_BZ_ROW_SCATTERED( 8) UPDATE_C_BZ_ROW_SCATTERED( 9) UPDATE_C_BZ_ROW_SCATTERED(10) UPDATE_C_BZ_ROW_SCATTERED(11) UPDATE_C_BZ_ROW_SCATTERED(12) UPDATE_C_BZ_ROW_SCATTERED(13) UPDATE_C_BZ_ROW_SCATTERED(14) UPDATE_C_BZ_ROW_SCATTERED(15) UPDATE_C_BZ_ROW_SCATTERED(16) UPDATE_C_BZ_ROW_SCATTERED(17) UPDATE_C_BZ_ROW_SCATTERED(18) UPDATE_C_BZ_ROW_SCATTERED(19) UPDATE_C_BZ_ROW_SCATTERED(20) UPDATE_C_BZ_ROW_SCATTERED(21) UPDATE_C_BZ_ROW_SCATTERED(22) UPDATE_C_BZ_ROW_SCATTERED(23) UPDATE_C_BZ_ROW_SCATTERED(24) UPDATE_C_BZ_ROW_SCATTERED(25) UPDATE_C_BZ_ROW_SCATTERED(26) UPDATE_C_BZ_ROW_SCATTERED(27) UPDATE_C_BZ_ROW_SCATTERED(28) UPDATE_C_BZ_ROW_SCATTERED(29) UPDATE_C_BZ_ROW_SCATTERED(30) UPDATE_C_BZ_ROW_SCATTERED(31) LABEL(END) #ifdef MONITORS RDTSC MOV(VAR(botl), EAX) MOV(VAR(both), EDX) #endif END_ASM( : // output operands #ifdef MONITORS [topl] "=m" (topl), [toph] "=m" (toph), [midl] "=m" (midl), [midh] "=m" (midh), [mid2l] "=m" (mid2l), [mid2h] "=m" (mid2h), [botl] "=m" (botl), [both] "=m" (both) #endif : // input operands [k] "m" (k), [a] "m" (a), [b] "m" (b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c), [a_next] "m" (a_next), [b_next] "m" (b_next), [offsetPtr] "m" (offsetPtr) : // register clobber list "rax", "rbx", "rcx", "rdx", "rdi", "rsi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "zmm0", "zmm1", "zmm2", "zmm3", "zmm4", "zmm5", "zmm6", "zmm7", "zmm8", "zmm9", "zmm10", "zmm11", "zmm12", "zmm13", "zmm14", "zmm15", "zmm16", "zmm17", "zmm18", "zmm19", "zmm20", "zmm21", "zmm22", "zmm23", "zmm24", "zmm25", "zmm26", "zmm27", "zmm28", "zmm29", "zmm30", "zmm31", "memory" ) #ifdef LOOPMON printf("looptime = \t%d\n", bloopl - tloopl); #endif #ifdef MONITORS dim_t top = ((dim_t)toph << 32) | topl; dim_t mid = ((dim_t)midh << 32) | midl; dim_t mid2 = ((dim_t)mid2h << 32) | mid2l; dim_t bot = ((dim_t)both << 32) | botl; printf("setup =\t%u\tmain loop =\t%u\tcleanup=\t%u\ttotal=\t%u\n", mid - top, mid2 - mid, bot - mid2, bot - top); #endif } blis-0.6.1/kernels/knl/3/other/000077500000000000000000000000001360743507500161735ustar00rootroot00000000000000blis-0.6.1/kernels/knl/3/other/bli_dgemm_knl_asm_12x16.c000066400000000000000000000604751360743507500226370ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "bli_avx512_macros.h" extern int32_t offsets[16]; void bli_dgemm_knl_asm_12x16 ( dim_t k, double* restrict alpha, double* restrict a, double* restrict b, double* restrict beta, double* restrict c, inc_t rs_c, inc_t cs_c, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //const void* a_next = bli_auxinfo_next_a( data ); //const void* b_next = bli_auxinfo_next_b( data ); const int32_t * offsetPtr = &offsets[0]; __asm__ volatile ( VPXORD(ZMM(8), ZMM(8), ZMM(8)) MOV(RAX, VAR(a)) VMOVAPD(ZMM( 9), ZMM(8)) MOV(RBX, VAR(b)) VMOVAPD(ZMM(10), ZMM(8)) //no ADD(RBX, IMM(4*64)) VMOVAPD(ZMM(11), ZMM(8)) //maybe? PREFETCH(0, MEM(RAX, 0)) VMOVAPD(ZMM(12), ZMM(8)) //maybe? PREFETCH(0, MEM(RAX,64)) VMOVAPD(ZMM(13), ZMM(8)) VMOVAPD(ZMM(0), MEM(RBX,0*64)) VMOVAPD(ZMM(14), ZMM(8)) VMOVAPD(ZMM(1), MEM(RBX,1*64)) VMOVAPD(ZMM(15), ZMM(8)) MOV(RCX, VAR(c)) VMOVAPD(ZMM(16), ZMM(8)) MOV(RDI, RCX) VMOVAPD(ZMM(17), ZMM(8)) VBROADCASTSS(ZMM(4), VAR(cs_c)) VMOVAPD(ZMM(18), ZMM(8)) VMOVAPS(ZMM(5), VAR(offsetPtr)) VMOVAPD(ZMM(19), ZMM(8)) VPMULLD(ZMM(4), ZMM(5), ZMM(4)) VMOVAPD(ZMM(20), ZMM(8)) MOV(RDX, IMM(0xFFF)) VMOVAPD(ZMM(21), ZMM(8)) KMOV(K(1), EDX) VMOVAPD(ZMM(22), ZMM(8)) KMOV(K(2), EDX) VMOVAPD(ZMM(23), ZMM(8)) KMOV(K(3), EDX) VMOVAPD(ZMM(24), ZMM(8)) VSCATTERPFDPS(0, MEM(RCX,ZMM(4),8, 0) MASK_K(1)) VMOVAPD(ZMM(25), ZMM(8)) VSCATTERPFDPS(0, MEM(RCX,ZMM(4),8, 8*8) MASK_K(2)) VMOVAPD(ZMM(26), ZMM(8)) VSCATTERPFDPS(0, MEM(RCX,ZMM(4),8,15*8) MASK_K(3)) VMOVAPD(ZMM(27), ZMM(8)) MOV(RSI, VAR(k)) VMOVAPD(ZMM(28), ZMM(8)) SAR(RSI, IMM(2)) // rsi = k/4 VMOVAPD(ZMM(29), ZMM(8)) VMOVAPD(ZMM(30), ZMM(8)) VMOVAPD(ZMM(31), ZMM(8)) JZ(.DCONSIDKLEFT) ALIGN16 LABEL(.DLOOPKITER) VBROADCASTSD(ZMM(2), MEM(RAX, 0*8)) // Iteration 0 VBROADCASTSD(ZMM(3), MEM(RAX, 1*8)) VBROADCASTSD(ZMM(4), MEM(RAX, 2*8)) VBROADCASTSD(ZMM(5), MEM(RAX, 3*8)) VBROADCASTSD(ZMM(6), MEM(RAX, 4*8)) VBROADCASTSD(ZMM(7), MEM(RAX, 5*8)) VFMADD231PD(ZMM( 8), ZMM(0), ZMM(2)) VFMADD231PD(ZMM( 9), ZMM(1), ZMM(2)) VFMADD231PD(ZMM(10), ZMM(0), ZMM(3)) VFMADD231PD(ZMM(11), ZMM(1), ZMM(3)) VFMADD231PD(ZMM(12), ZMM(0), ZMM(4)) VFMADD231PD(ZMM(13), ZMM(1), ZMM(4)) VFMADD231PD(ZMM(14), ZMM(0), ZMM(5)) VFMADD231PD(ZMM(15), ZMM(1), ZMM(5)) VFMADD231PD(ZMM(16), ZMM(0), ZMM(6)) VFMADD231PD(ZMM(17), ZMM(1), ZMM(6)) VFMADD231PD(ZMM(18), ZMM(0), ZMM(7)) VFMADD231PD(ZMM(19), ZMM(1), ZMM(7)) VBROADCASTSD(ZMM(2), MEM(RAX, 6*8)) VBROADCASTSD(ZMM(3), MEM(RAX, 7*8)) VBROADCASTSD(ZMM(4), MEM(RAX, 8*8)) VBROADCASTSD(ZMM(5), MEM(RAX, 9*8)) VBROADCASTSD(ZMM(6), MEM(RAX, 10*8)) VBROADCASTSD(ZMM(7), MEM(RAX, 11*8)) VFMADD231PD(ZMM(20), ZMM(0), ZMM(2)) VFMADD231PD(ZMM(21), ZMM(1), ZMM(2)) VFMADD231PD(ZMM(22), ZMM(0), ZMM(3)) VFMADD231PD(ZMM(23), ZMM(1), ZMM(3)) VFMADD231PD(ZMM(24), ZMM(0), ZMM(4)) VFMADD231PD(ZMM(25), ZMM(1), ZMM(4)) VFMADD231PD(ZMM(26), ZMM(0), ZMM(5)) VFMADD231PD(ZMM(27), ZMM(1), ZMM(5)) VFMADD231PD(ZMM(28), ZMM(0), ZMM(6)) VFMADD231PD(ZMM(29), ZMM(1), ZMM(6)) VFMADD231PD(ZMM(30), ZMM(0), ZMM(7)) VFMADD231PD(ZMM(31), ZMM(1), ZMM(7)) VMOVAPD(ZMM(0), MEM(RBX,2*64)) VMOVAPD(ZMM(1), MEM(RBX,3*64)) PREFETCH(0, MEM(RAX, 64*8)) PREFETCH(0, MEM(RAX, 72*8)) VBROADCASTSD(ZMM(2), MEM(RAX, 12*8)) // Iteration 1 VBROADCASTSD(ZMM(3), MEM(RAX, 13*8)) VBROADCASTSD(ZMM(4), MEM(RAX, 14*8)) VBROADCASTSD(ZMM(5), MEM(RAX, 15*8)) VBROADCASTSD(ZMM(6), MEM(RAX, 16*8)) VBROADCASTSD(ZMM(7), MEM(RAX, 17*8)) VFMADD231PD(ZMM( 8), ZMM(0), ZMM(2)) VFMADD231PD(ZMM( 9), ZMM(1), ZMM(2)) VFMADD231PD(ZMM(10), ZMM(0), ZMM(3)) VFMADD231PD(ZMM(11), ZMM(1), ZMM(3)) VFMADD231PD(ZMM(12), ZMM(0), ZMM(4)) VFMADD231PD(ZMM(13), ZMM(1), ZMM(4)) VFMADD231PD(ZMM(14), ZMM(0), ZMM(5)) VFMADD231PD(ZMM(15), ZMM(1), ZMM(5)) VFMADD231PD(ZMM(16), ZMM(0), ZMM(6)) VFMADD231PD(ZMM(17), ZMM(1), ZMM(6)) VFMADD231PD(ZMM(18), ZMM(0), ZMM(7)) VFMADD231PD(ZMM(19), ZMM(1), ZMM(7)) VBROADCASTSD(ZMM(2), MEM(RAX, 18*8)) VBROADCASTSD(ZMM(3), MEM(RAX, 19*8)) VBROADCASTSD(ZMM(4), MEM(RAX, 20*8)) VBROADCASTSD(ZMM(5), MEM(RAX, 21*8)) VBROADCASTSD(ZMM(6), MEM(RAX, 22*8)) VBROADCASTSD(ZMM(7), MEM(RAX, 23*8)) VFMADD231PD(ZMM(20), ZMM(0), ZMM(2)) VFMADD231PD(ZMM(21), ZMM(1), ZMM(2)) VFMADD231PD(ZMM(22), ZMM(0), ZMM(3)) VFMADD231PD(ZMM(23), ZMM(1), ZMM(3)) VFMADD231PD(ZMM(24), ZMM(0), ZMM(4)) VFMADD231PD(ZMM(25), ZMM(1), ZMM(4)) VFMADD231PD(ZMM(26), ZMM(0), ZMM(5)) VFMADD231PD(ZMM(27), ZMM(1), ZMM(5)) VFMADD231PD(ZMM(28), ZMM(0), ZMM(6)) VFMADD231PD(ZMM(29), ZMM(1), ZMM(6)) VFMADD231PD(ZMM(30), ZMM(0), ZMM(7)) VFMADD231PD(ZMM(31), ZMM(1), ZMM(7)) VMOVAPD(ZMM(0), MEM(RBX,4*64)) VMOVAPD(ZMM(1), MEM(RBX,5*64)) PREFETCH(0, MEM(RAX, 80*8)) PREFETCH(0, MEM(RAX, 88*8)) VBROADCASTSD(ZMM(2), MEM(RAX, 24*8)) // Iteration 2 VBROADCASTSD(ZMM(3), MEM(RAX, 25*8)) VBROADCASTSD(ZMM(4), MEM(RAX, 26*8)) VBROADCASTSD(ZMM(5), MEM(RAX, 27*8)) VBROADCASTSD(ZMM(6), MEM(RAX, 28*8)) VBROADCASTSD(ZMM(7), MEM(RAX, 29*8)) VFMADD231PD(ZMM( 8), ZMM(0), ZMM(2)) VFMADD231PD(ZMM( 9), ZMM(1), ZMM(2)) VFMADD231PD(ZMM(10), ZMM(0), ZMM(3)) VFMADD231PD(ZMM(11), ZMM(1), ZMM(3)) VFMADD231PD(ZMM(12), ZMM(0), ZMM(4)) VFMADD231PD(ZMM(13), ZMM(1), ZMM(4)) VFMADD231PD(ZMM(14), ZMM(0), ZMM(5)) VFMADD231PD(ZMM(15), ZMM(1), ZMM(5)) VFMADD231PD(ZMM(16), ZMM(0), ZMM(6)) VFMADD231PD(ZMM(17), ZMM(1), ZMM(6)) VFMADD231PD(ZMM(18), ZMM(0), ZMM(7)) VFMADD231PD(ZMM(19), ZMM(1), ZMM(7)) VBROADCASTSD(ZMM(2), MEM(RAX, 30*8)) VBROADCASTSD(ZMM(3), MEM(RAX, 31*8)) VBROADCASTSD(ZMM(4), MEM(RAX, 32*8)) VBROADCASTSD(ZMM(5), MEM(RAX, 33*8)) VBROADCASTSD(ZMM(6), MEM(RAX, 34*8)) VBROADCASTSD(ZMM(7), MEM(RAX, 35*8)) VFMADD231PD(ZMM(20), ZMM(0), ZMM(2)) VFMADD231PD(ZMM(21), ZMM(1), ZMM(2)) VFMADD231PD(ZMM(22), ZMM(0), ZMM(3)) VFMADD231PD(ZMM(23), ZMM(1), ZMM(3)) VFMADD231PD(ZMM(24), ZMM(0), ZMM(4)) VFMADD231PD(ZMM(25), ZMM(1), ZMM(4)) VFMADD231PD(ZMM(26), ZMM(0), ZMM(5)) VFMADD231PD(ZMM(27), ZMM(1), ZMM(5)) VFMADD231PD(ZMM(28), ZMM(0), ZMM(6)) VFMADD231PD(ZMM(29), ZMM(1), ZMM(6)) VFMADD231PD(ZMM(30), ZMM(0), ZMM(7)) VFMADD231PD(ZMM(31), ZMM(1), ZMM(7)) VMOVAPD(ZMM(0), MEM(RBX,6*64)) VMOVAPD(ZMM(1), MEM(RBX,7*64)) ADD(RBX, IMM(4*8*16)) PREFETCH(0, MEM(RAX, 96*8)) PREFETCH(0, MEM(RAX, 104*8)) VBROADCASTSD(ZMM(2), MEM(RAX, 36*8)) // Iteration 3 VBROADCASTSD(ZMM(3), MEM(RAX, 37*8)) VBROADCASTSD(ZMM(4), MEM(RAX, 38*8)) VBROADCASTSD(ZMM(5), MEM(RAX, 39*8)) VBROADCASTSD(ZMM(6), MEM(RAX, 40*8)) VBROADCASTSD(ZMM(7), MEM(RAX, 41*8)) VFMADD231PD(ZMM( 8), ZMM(0), ZMM(2)) VFMADD231PD(ZMM( 9), ZMM(1), ZMM(2)) VFMADD231PD(ZMM(10), ZMM(0), ZMM(3)) VFMADD231PD(ZMM(11), ZMM(1), ZMM(3)) VFMADD231PD(ZMM(12), ZMM(0), ZMM(4)) VFMADD231PD(ZMM(13), ZMM(1), ZMM(4)) VFMADD231PD(ZMM(14), ZMM(0), ZMM(5)) VFMADD231PD(ZMM(15), ZMM(1), ZMM(5)) VFMADD231PD(ZMM(16), ZMM(0), ZMM(6)) VFMADD231PD(ZMM(17), ZMM(1), ZMM(6)) VFMADD231PD(ZMM(18), ZMM(0), ZMM(7)) VFMADD231PD(ZMM(19), ZMM(1), ZMM(7)) VBROADCASTSD(ZMM(2), MEM(RAX, 42*8)) VBROADCASTSD(ZMM(3), MEM(RAX, 43*8)) VBROADCASTSD(ZMM(4), MEM(RAX, 44*8)) VBROADCASTSD(ZMM(5), MEM(RAX, 45*8)) VBROADCASTSD(ZMM(6), MEM(RAX, 46*8)) VBROADCASTSD(ZMM(7), MEM(RAX, 47*8)) VFMADD231PD(ZMM(20), ZMM(0), ZMM(2)) VFMADD231PD(ZMM(21), ZMM(1), ZMM(2)) VFMADD231PD(ZMM(22), ZMM(0), ZMM(3)) VFMADD231PD(ZMM(23), ZMM(1), ZMM(3)) VFMADD231PD(ZMM(24), ZMM(0), ZMM(4)) VFMADD231PD(ZMM(25), ZMM(1), ZMM(4)) VFMADD231PD(ZMM(26), ZMM(0), ZMM(5)) VFMADD231PD(ZMM(27), ZMM(1), ZMM(5)) VFMADD231PD(ZMM(28), ZMM(0), ZMM(6)) VFMADD231PD(ZMM(29), ZMM(1), ZMM(6)) VFMADD231PD(ZMM(30), ZMM(0), ZMM(7)) VFMADD231PD(ZMM(31), ZMM(1), ZMM(7)) ADD(RAX, IMM(4*8*12)) SUB(RSI, IMM(1)) VMOVAPD(ZMM(0), MEM(RBX,0*64)) VMOVAPD(ZMM(1), MEM(RBX,1*64)) JNZ(.DLOOPKITER) LABEL(.DCONSIDKLEFT) MOV(RSI, VAR(k)) AND(RSI, IMM(3)) // rsi = k%4 JZ(.DPOSTACCUM) ALIGN16 LABEL(.DLOOPKLEFT) VBROADCASTSD(ZMM(2), MEM(RAX, 0*8)) VBROADCASTSD(ZMM(3), MEM(RAX, 1*8)) VBROADCASTSD(ZMM(4), MEM(RAX, 2*8)) VBROADCASTSD(ZMM(5), MEM(RAX, 3*8)) VBROADCASTSD(ZMM(6), MEM(RAX, 4*8)) VBROADCASTSD(ZMM(7), MEM(RAX, 5*8)) VFMADD231PD(ZMM( 8), ZMM(0), ZMM(2)) VFMADD231PD(ZMM( 9), ZMM(1), ZMM(2)) VFMADD231PD(ZMM(10), ZMM(0), ZMM(3)) VFMADD231PD(ZMM(11), ZMM(1), ZMM(3)) VFMADD231PD(ZMM(12), ZMM(0), ZMM(4)) VFMADD231PD(ZMM(13), ZMM(1), ZMM(4)) VFMADD231PD(ZMM(14), ZMM(0), ZMM(5)) VFMADD231PD(ZMM(15), ZMM(1), ZMM(5)) VFMADD231PD(ZMM(16), ZMM(0), ZMM(6)) VFMADD231PD(ZMM(17), ZMM(1), ZMM(6)) VFMADD231PD(ZMM(18), ZMM(0), ZMM(7)) VFMADD231PD(ZMM(19), ZMM(1), ZMM(7)) VBROADCASTSD(ZMM(2), MEM(RAX, 6*8)) VBROADCASTSD(ZMM(3), MEM(RAX, 7*8)) VBROADCASTSD(ZMM(4), MEM(RAX, 8*8)) VBROADCASTSD(ZMM(5), MEM(RAX, 9*8)) VBROADCASTSD(ZMM(6), MEM(RAX, 10*8)) VBROADCASTSD(ZMM(7), MEM(RAX, 11*8)) VFMADD231PD(ZMM(20), ZMM(0), ZMM(2)) VFMADD231PD(ZMM(21), ZMM(1), ZMM(2)) VFMADD231PD(ZMM(22), ZMM(0), ZMM(3)) VFMADD231PD(ZMM(23), ZMM(1), ZMM(3)) VFMADD231PD(ZMM(24), ZMM(0), ZMM(4)) VFMADD231PD(ZMM(25), ZMM(1), ZMM(4)) VFMADD231PD(ZMM(26), ZMM(0), ZMM(5)) VFMADD231PD(ZMM(27), ZMM(1), ZMM(5)) VFMADD231PD(ZMM(28), ZMM(0), ZMM(6)) VFMADD231PD(ZMM(29), ZMM(1), ZMM(6)) VFMADD231PD(ZMM(30), ZMM(0), ZMM(7)) VFMADD231PD(ZMM(31), ZMM(1), ZMM(7)) ADD(RAX, IMM(12*8)) ADD(RBX, IMM(16*8)) SUB(RSI, IMM(1)) VMOVAPD(ZMM(0), MEM(RBX,0*64)) VMOVAPD(ZMM(1), MEM(RBX,1*64)) JNZ(.DLOOPKLEFT) LABEL(.DPOSTACCUM) MOV(RAX, VAR(alpha)) MOV(RBX, VAR(beta)) VBROADCASTSD(ZMM(0), MEM(RAX)) VBROADCASTSD(ZMM(1), MEM(RBX)) VMULPD(ZMM( 8), ZMM( 8), ZMM(0)) VMULPD(ZMM( 9), ZMM( 9), ZMM(0)) VMULPD(ZMM(10), ZMM(10), ZMM(0)) VMULPD(ZMM(11), ZMM(11), ZMM(0)) VMULPD(ZMM(12), ZMM(12), ZMM(0)) VMULPD(ZMM(13), ZMM(13), ZMM(0)) VMULPD(ZMM(14), ZMM(14), ZMM(0)) VMULPD(ZMM(15), ZMM(15), ZMM(0)) VMULPD(ZMM(16), ZMM(16), ZMM(0)) VMULPD(ZMM(17), ZMM(17), ZMM(0)) VMULPD(ZMM(18), ZMM(18), ZMM(0)) VMULPD(ZMM(19), ZMM(19), ZMM(0)) VMULPD(ZMM(20), ZMM(20), ZMM(0)) VMULPD(ZMM(21), ZMM(21), ZMM(0)) VMULPD(ZMM(22), ZMM(22), ZMM(0)) VMULPD(ZMM(23), ZMM(23), ZMM(0)) VMULPD(ZMM(24), ZMM(24), ZMM(0)) VMULPD(ZMM(25), ZMM(25), ZMM(0)) VMULPD(ZMM(26), ZMM(26), ZMM(0)) VMULPD(ZMM(27), ZMM(27), ZMM(0)) VMULPD(ZMM(28), ZMM(28), ZMM(0)) VMULPD(ZMM(29), ZMM(29), ZMM(0)) VMULPD(ZMM(30), ZMM(30), ZMM(0)) VMULPD(ZMM(31), ZMM(31), ZMM(0)) MOV(RDI, VAR(rs_c)) SUB(RDI, IMM(1)) JNZ(.DGENSTORED) LABEL(.ROWSTORED) MOV(RSI, VAR(cs_c)) MOV(R(8), MEM(RBX)) LEA(RSI, MEM(,RSI,8)) LEA(RDX, MEM(RCX,RSI,4)) LEA(RDI, MEM(RCX,RSI,8)) LEA(R(13), MEM(RSI,RSI,2)) SAL1(R(8)) // shift out the sign bit to check for +/- zero JZ(.DROWSTORBZ) VFMADD231PD(ZMM( 8), ZMM(1), MEM(RCX)) VFMADD231PD(ZMM( 9), ZMM(1), MEM(RCX,64)) VMOVUPD(MEM(RCX), ZMM( 8)) VMOVUPD(MEM(RCX,64), ZMM( 9)) VFMADD231PD(ZMM(10), ZMM(1), MEM(RCX,RSI,1)) VFMADD231PD(ZMM(11), ZMM(1), MEM(RCX,RSI,1,64)) VMOVUPD(MEM(RCX,RSI,1), ZMM(10)) VMOVUPD(MEM(RCX,RSI,1,64), ZMM(11)) VFMADD231PD(ZMM(12), ZMM(1), MEM(RCX,RSI,2)) VFMADD231PD(ZMM(13), ZMM(1), MEM(RCX,RSI,2,64)) VMOVUPD(MEM(RCX,RSI,2), ZMM(12)) VMOVUPD(MEM(RCX,RSI,2,64), ZMM(13)) VFMADD231PD(ZMM(14), ZMM(1), MEM(RCX,R(13),1)) VFMADD231PD(ZMM(15), ZMM(1), MEM(RCX,R(13),1,64)) VMOVUPD(MEM(RCX,R(13),1), ZMM(14)) VMOVUPD(MEM(RCX,R(13),1,64), ZMM(15)) VFMADD231PD(ZMM(16), ZMM(1), MEM(RDX)) VFMADD231PD(ZMM(17), ZMM(1), MEM(RDX,64)) VMOVUPD(MEM(RDX), ZMM(16)) VMOVUPD(MEM(RDX,64), ZMM(17)) VFMADD231PD(ZMM(18), ZMM(1), MEM(RDX,RSI,1)) VFMADD231PD(ZMM(19), ZMM(1), MEM(RDX,RSI,1,64)) VMOVUPD(MEM(RDX,RSI,1), ZMM(18)) VMOVUPD(MEM(RDX,RSI,1,64), ZMM(19)) VFMADD231PD(ZMM(20), ZMM(1), MEM(RDX,RSI,2)) VFMADD231PD(ZMM(21), ZMM(1), MEM(RDX,RSI,2,64)) VMOVUPD(MEM(RDX,RSI,2), ZMM(20)) VMOVUPD(MEM(RDX,RSI,2,64), ZMM(21)) VFMADD231PD(ZMM(22), ZMM(1), MEM(RDX,R(13),1)) VFMADD231PD(ZMM(23), ZMM(1), MEM(RDX,R(13),1,64)) VMOVUPD(MEM(RDX,R(13),1), ZMM(22)) VMOVUPD(MEM(RDX,R(13),1,64), ZMM(23)) VFMADD231PD(ZMM(24), ZMM(1), MEM(RDI)) VFMADD231PD(ZMM(25), ZMM(1), MEM(RDI,64)) VMOVUPD(MEM(RDI), ZMM(24)) VMOVUPD(MEM(RDI,64), ZMM(25)) VFMADD231PD(ZMM(26), ZMM(1), MEM(RDI,RSI,1)) VFMADD231PD(ZMM(27), ZMM(1), MEM(RDI,RSI,1,64)) VMOVUPD(MEM(RDI,RSI,1), ZMM(26)) VMOVUPD(MEM(RDI,RSI,1,64), ZMM(27)) VFMADD231PD(ZMM(28), ZMM(1), MEM(RDI,RSI,2)) VFMADD231PD(ZMM(29), ZMM(1), MEM(RDI,RSI,2,64)) VMOVUPD(MEM(RDI,RSI,2), ZMM(28)) VMOVUPD(MEM(RDI,RSI,2,64), ZMM(29)) VFMADD231PD(ZMM(30), ZMM(1), MEM(RDI,R(13),1)) VFMADD231PD(ZMM(31), ZMM(1), MEM(RDI,R(13),1,64)) VMOVUPD(MEM(RDI,R(13),1), ZMM(30)) VMOVUPD(MEM(RDI,R(13),1,64), ZMM(31)) JMP(.DDONE) LABEL(.DROWSTORBZ) VMOVUPD(MEM(RCX), ZMM( 8)) VMOVUPD(MEM(RCX,64), ZMM( 9)) VMOVUPD(MEM(RCX,RSI,1), ZMM(10)) VMOVUPD(MEM(RCX,RSI,1,64), ZMM(11)) VMOVUPD(MEM(RCX,RSI,2), ZMM(12)) VMOVUPD(MEM(RCX,RSI,2,64), ZMM(13)) VMOVUPD(MEM(RCX,R(13),1), ZMM(14)) VMOVUPD(MEM(RCX,R(13),1,64), ZMM(15)) VMOVUPD(MEM(RDX), ZMM(16)) VMOVUPD(MEM(RDX,64), ZMM(17)) VMOVUPD(MEM(RDX,RSI,1), ZMM(18)) VMOVUPD(MEM(RDX,RSI,1,64), ZMM(19)) VMOVUPD(MEM(RDX,RSI,2), ZMM(20)) VMOVUPD(MEM(RDX,RSI,2,64), ZMM(21)) VMOVUPD(MEM(RDX,R(13),1), ZMM(22)) VMOVUPD(MEM(RDX,R(13),1,64), ZMM(23)) VMOVUPD(MEM(RDI), ZMM(24)) VMOVUPD(MEM(RDI,64), ZMM(25)) VMOVUPD(MEM(RDI,RSI,1), ZMM(26)) VMOVUPD(MEM(RDI,RSI,1,64), ZMM(27)) VMOVUPD(MEM(RDI,RSI,2), ZMM(28)) VMOVUPD(MEM(RDI,RSI,2,64), ZMM(29)) VMOVUPD(MEM(RDI,R(13),1), ZMM(30)) VMOVUPD(MEM(RDI,R(13),1,64), ZMM(31)) JMP(.DDONE) LABEL(.DGENSTORED) MOV(RDI, VAR(cs_c)) MOV(RAX, VAR(rs_c)) LEA(RDI, MEM(,RDI,8)) MOV(R(8), MEM(RBX)) VBROADCASTSS(YMM(4), VAR(rs_c)) VMOVAPS(YMM(5), VAR(offsetPtr)) VPMULLD(YMM(4), YMM(5), YMM(4)) LEA(RDX, MEM(RCX,RAX,8)) MOV(RSI, 0x3F) SAL1(R(8)) // shift out the sign bit to check for +/- zero JZ(.DGENSTORBZ) KMOV(K(1), ESI) KMOV(K(2), ESI) VGATHERDPD(ZMM(2) MASK_K(1), MEM(RCX,YMM(4),8)) VGATHERDPD(ZMM(3) MASK_K(2), MEM(RDX,YMM(4),8)) VFMADD231PD(ZMM(2), ZMM(1), ZMM( 8)) VFMADD231PD(ZMM(3), ZMM(1), ZMM( 9)) KMOV(K(1), ESI) KMOV(K(2), ESI) VSCATTERDPD(MEM(RCX,YMM(4),8) MASK_K(1), ZMM( 8)) VSCATTERDPD(MEM(RDX,YMM(4),8) MASK_K(2), ZMM( 9)) ADD(RCX, RDI) ADD(RDX, RDI) KMOV(K(3), ESI) KMOV(K(4), ESI) VGATHERDPD(ZMM(2) MASK_K(3), MEM(RCX,YMM(4),8)) VGATHERDPD(ZMM(3) MASK_K(4), MEM(RDX,YMM(4),8)) VFMADD231PD(ZMM(2), ZMM(1), ZMM(10)) VFMADD231PD(ZMM(3), ZMM(1), ZMM(11)) KMOV(K(3), ESI) KMOV(K(4), ESI) VSCATTERDPD(MEM(RCX,YMM(4),8) MASK_K(3), ZMM(10)) VSCATTERDPD(MEM(RDX,YMM(4),8) MASK_K(4), ZMM(11)) ADD(RCX, RDI) ADD(RDX, RDI) KMOV(K(1), ESI) KMOV(K(2), ESI) VGATHERDPD(ZMM(2) MASK_K(1), MEM(RCX,YMM(4),8)) VGATHERDPD(ZMM(3) MASK_K(2), MEM(RDX,YMM(4),8)) VFMADD231PD(ZMM(2), ZMM(1), ZMM(12)) VFMADD231PD(ZMM(3), ZMM(1), ZMM(13)) KMOV(K(1), ESI) KMOV(K(2), ESI) VSCATTERDPD(MEM(RCX,YMM(4),8) MASK_K(1), ZMM(12)) VSCATTERDPD(MEM(RDX,YMM(4),8) MASK_K(2), ZMM(13)) ADD(RCX, RDI) ADD(RDX, RDI) KMOV(K(3), ESI) KMOV(K(4), ESI) VGATHERDPD(ZMM(2) MASK_K(3), MEM(RCX,YMM(4),8)) VGATHERDPD(ZMM(3) MASK_K(4), MEM(RDX,YMM(4),8)) VFMADD231PD(ZMM(2), ZMM(1), ZMM(14)) VFMADD231PD(ZMM(3), ZMM(1), ZMM(15)) KMOV(K(3), ESI) KMOV(K(4), ESI) VSCATTERDPD(MEM(RCX,YMM(4),8) MASK_K(3), ZMM(14)) VSCATTERDPD(MEM(RDX,YMM(4),8) MASK_K(4), ZMM(15)) ADD(RCX, RDI) ADD(RDX, RDI) KMOV(K(1), ESI) KMOV(K(2), ESI) VGATHERDPD(ZMM(2) MASK_K(1), MEM(RCX,YMM(4),8)) VGATHERDPD(ZMM(3) MASK_K(2), MEM(RDX,YMM(4),8)) VFMADD231PD(ZMM(2), ZMM(1), ZMM(16)) VFMADD231PD(ZMM(3), ZMM(1), ZMM(17)) KMOV(K(1), ESI) KMOV(K(2), ESI) VSCATTERDPD(MEM(RCX,YMM(4),8) MASK_K(1), ZMM(16)) VSCATTERDPD(MEM(RDX,YMM(4),8) MASK_K(2), ZMM(17)) ADD(RCX, RDI) ADD(RDX, RDI) KMOV(K(3), ESI) KMOV(K(4), ESI) VGATHERDPD(ZMM(2) MASK_K(3), MEM(RCX,YMM(4),8)) VGATHERDPD(ZMM(3) MASK_K(4), MEM(RDX,YMM(4),8)) VFMADD231PD(ZMM(2), ZMM(1), ZMM(18)) VFMADD231PD(ZMM(3), ZMM(1), ZMM(19)) KMOV(K(3), ESI) KMOV(K(4), ESI) VSCATTERDPD(MEM(RCX,YMM(4),8) MASK_K(3), ZMM(18)) VSCATTERDPD(MEM(RDX,YMM(4),8) MASK_K(4), ZMM(19)) ADD(RCX, RDI) ADD(RDX, RDI) KMOV(K(1), ESI) KMOV(K(2), ESI) VGATHERDPD(ZMM(2) MASK_K(1), MEM(RCX,YMM(4),8)) VGATHERDPD(ZMM(3) MASK_K(2), MEM(RDX,YMM(4),8)) VFMADD231PD(ZMM(2), ZMM(1), ZMM(20)) VFMADD231PD(ZMM(3), ZMM(1), ZMM(21)) KMOV(K(1), ESI) KMOV(K(2), ESI) VSCATTERDPD(MEM(RCX,YMM(4),8) MASK_K(1), ZMM(20)) VSCATTERDPD(MEM(RDX,YMM(4),8) MASK_K(2), ZMM(21)) ADD(RCX, RDI) ADD(RDX, RDI) KMOV(K(3), ESI) KMOV(K(4), ESI) VGATHERDPD(ZMM(2) MASK_K(3), MEM(RCX,YMM(4),8)) VGATHERDPD(ZMM(3) MASK_K(4), MEM(RDX,YMM(4),8)) VFMADD231PD(ZMM(2), ZMM(1), ZMM(22)) VFMADD231PD(ZMM(3), ZMM(1), ZMM(23)) KMOV(K(3), ESI) KMOV(K(4), ESI) VSCATTERDPD(MEM(RCX,YMM(4),8) MASK_K(3), ZMM(22)) VSCATTERDPD(MEM(RDX,YMM(4),8) MASK_K(4), ZMM(23)) ADD(RCX, RDI) ADD(RDX, RDI) KMOV(K(1), ESI) KMOV(K(2), ESI) VGATHERDPD(ZMM(2) MASK_K(1), MEM(RCX,YMM(4),8)) VGATHERDPD(ZMM(3) MASK_K(2), MEM(RDX,YMM(4),8)) VFMADD231PD(ZMM(2), ZMM(1), ZMM(24)) VFMADD231PD(ZMM(3), ZMM(1), ZMM(25)) KMOV(K(1), ESI) KMOV(K(2), ESI) VSCATTERDPD(MEM(RCX,YMM(4),8) MASK_K(1), ZMM(24)) VSCATTERDPD(MEM(RDX,YMM(4),8) MASK_K(2), ZMM(25)) ADD(RCX, RDI) ADD(RDX, RDI) KMOV(K(3), ESI) KMOV(K(4), ESI) VGATHERDPD(ZMM(2) MASK_K(3), MEM(RCX,YMM(4),8)) VGATHERDPD(ZMM(3) MASK_K(4), MEM(RDX,YMM(4),8)) VFMADD231PD(ZMM(2), ZMM(1), ZMM(26)) VFMADD231PD(ZMM(3), ZMM(1), ZMM(27)) KMOV(K(3), ESI) KMOV(K(4), ESI) VSCATTERDPD(MEM(RCX,YMM(4),8) MASK_K(3), ZMM(26)) VSCATTERDPD(MEM(RDX,YMM(4),8) MASK_K(4), ZMM(27)) ADD(RCX, RDI) ADD(RDX, RDI) KMOV(K(1), ESI) KMOV(K(2), ESI) VGATHERDPD(ZMM(2) MASK_K(1), MEM(RCX,YMM(4),8)) VGATHERDPD(ZMM(3) MASK_K(2), MEM(RDX,YMM(4),8)) VFMADD231PD(ZMM(2), ZMM(1), ZMM(28)) VFMADD231PD(ZMM(3), ZMM(1), ZMM(29)) KMOV(K(1), ESI) KMOV(K(2), ESI) VSCATTERDPD(MEM(RCX,YMM(4),8) MASK_K(1), ZMM(28)) VSCATTERDPD(MEM(RDX,YMM(4),8) MASK_K(2), ZMM(29)) ADD(RCX, RDI) ADD(RDX, RDI) KMOV(K(3), ESI) KMOV(K(4), ESI) VGATHERDPD(ZMM(2) MASK_K(3), MEM(RCX,YMM(4),8)) VGATHERDPD(ZMM(3) MASK_K(4), MEM(RDX,YMM(4),8)) VFMADD231PD(ZMM(2), ZMM(1), ZMM(30)) VFMADD231PD(ZMM(3), ZMM(1), ZMM(31)) KMOV(K(3), ESI) KMOV(K(4), ESI) VSCATTERDPD(MEM(RCX,YMM(4),8) MASK_K(3), ZMM(30)) VSCATTERDPD(MEM(RDX,YMM(4),8) MASK_K(4), ZMM(31)) ADD(RCX, RDI) ADD(RDX, RDI) JMP(.DDONE) LABEL(.DGENSTORBZ) KMOV(K(1), ESI) KMOV(K(2), ESI) VSCATTERDPD(MEM(RCX,YMM(4),8) MASK_K(1), ZMM( 8)) VSCATTERDPD(MEM(RDX,YMM(4),8) MASK_K(2), ZMM( 9)) ADD(RCX, RDI) ADD(RDX, RDI) KMOV(K(1), ESI) KMOV(K(2), ESI) VSCATTERDPD(MEM(RCX,YMM(4),8) MASK_K(1), ZMM(10)) VSCATTERDPD(MEM(RDX,YMM(4),8) MASK_K(2), ZMM(11)) ADD(RCX, RDI) ADD(RDX, RDI) KMOV(K(1), ESI) KMOV(K(2), ESI) VSCATTERDPD(MEM(RCX,YMM(4),8) MASK_K(1), ZMM(12)) VSCATTERDPD(MEM(RDX,YMM(4),8) MASK_K(2), ZMM(13)) ADD(RCX, RDI) ADD(RDX, RDI) KMOV(K(1), ESI) KMOV(K(2), ESI) VSCATTERDPD(MEM(RCX,YMM(4),8) MASK_K(1), ZMM(14)) VSCATTERDPD(MEM(RDX,YMM(4),8) MASK_K(2), ZMM(15)) ADD(RCX, RDI) ADD(RDX, RDI) KMOV(K(1), ESI) KMOV(K(2), ESI) VSCATTERDPD(MEM(RCX,YMM(4),8) MASK_K(1), ZMM(16)) VSCATTERDPD(MEM(RDX,YMM(4),8) MASK_K(2), ZMM(17)) ADD(RCX, RDI) ADD(RDX, RDI) KMOV(K(1), ESI) KMOV(K(2), ESI) VSCATTERDPD(MEM(RCX,YMM(4),8) MASK_K(1), ZMM(18)) VSCATTERDPD(MEM(RDX,YMM(4),8) MASK_K(2), ZMM(19)) ADD(RCX, RDI) ADD(RDX, RDI) KMOV(K(1), ESI) KMOV(K(2), ESI) VSCATTERDPD(MEM(RCX,YMM(4),8) MASK_K(1), ZMM(20)) VSCATTERDPD(MEM(RDX,YMM(4),8) MASK_K(2), ZMM(21)) ADD(RCX, RDI) ADD(RDX, RDI) KMOV(K(1), ESI) KMOV(K(2), ESI) VSCATTERDPD(MEM(RCX,YMM(4),8) MASK_K(1), ZMM(22)) VSCATTERDPD(MEM(RDX,YMM(4),8) MASK_K(2), ZMM(23)) ADD(RCX, RDI) ADD(RDX, RDI) KMOV(K(1), ESI) KMOV(K(2), ESI) VSCATTERDPD(MEM(RCX,YMM(4),8) MASK_K(1), ZMM(24)) VSCATTERDPD(MEM(RDX,YMM(4),8) MASK_K(2), ZMM(25)) ADD(RCX, RDI) ADD(RDX, RDI) KMOV(K(1), ESI) KMOV(K(2), ESI) VSCATTERDPD(MEM(RCX,YMM(4),8) MASK_K(1), ZMM(26)) VSCATTERDPD(MEM(RDX,YMM(4),8) MASK_K(2), ZMM(27)) ADD(RCX, RDI) ADD(RDX, RDI) KMOV(K(1), ESI) KMOV(K(2), ESI) VSCATTERDPD(MEM(RCX,YMM(4),8) MASK_K(1), ZMM(28)) VSCATTERDPD(MEM(RDX,YMM(4),8) MASK_K(2), ZMM(29)) ADD(RCX, RDI) ADD(RDX, RDI) KMOV(K(1), ESI) KMOV(K(2), ESI) VSCATTERDPD(MEM(RCX,YMM(4),8) MASK_K(1), ZMM(30)) VSCATTERDPD(MEM(RDX,YMM(4),8) MASK_K(2), ZMM(31)) ADD(RCX, RDI) ADD(RDX, RDI) LABEL(.DDONE) : // output operands (none) : // input operands [k] "m" (k), [a] "m" (a), [b] "m" (b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c), //[a_next] "m" (a_next), //[b_next] "m" (b_next), [offsetPtr] "m" (offsetPtr) : // register clobber list "rax", "rbx", "rcx", "rdx", "rdi", "rsi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "zmm0", "zmm1", "zmm2", "zmm3", "zmm4", "zmm5", "zmm6", "zmm7", "zmm8", "zmm9", "zmm10", "zmm11", "zmm12", "zmm13", "zmm14", "zmm15", "zmm16", "zmm17", "zmm18", "zmm19", "zmm20", "zmm21", "zmm22", "zmm23", "zmm24", "zmm25", "zmm26", "zmm27", "zmm28", "zmm29", "zmm30", "zmm31", "memory" ); } blis-0.6.1/kernels/knl/3/other/bli_dgemm_knl_asm_30x8.c000066400000000000000000000563231360743507500225550ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include #include "bli_avx512_macros.h" #define UNROLL_K 32 #define SCATTER_PREFETCH_C 1 #define PREFETCH_A_L2 0 #define PREFETCH_B_L2 0 #define L2_PREFETCH_DIST 64 #define A_L1_PREFETCH_DIST 18 #define B_L1_PREFETCH_DIST 18 #define LOOP_ALIGN ALIGN16 #define UPDATE_C_FOUR_ROWS(R1,R2,R3,R4) \ \ VMULPD(ZMM(R1), ZMM(R1), ZMM(0)) \ VMULPD(ZMM(R2), ZMM(R2), ZMM(0)) \ VMULPD(ZMM(R3), ZMM(R3), ZMM(0)) \ VMULPD(ZMM(R4), ZMM(R4), ZMM(0)) \ VFMADD231PD(ZMM(R1), ZMM(1), MEM(RCX )) \ VFMADD231PD(ZMM(R2), ZMM(1), MEM(RCX,RAX,1)) \ VFMADD231PD(ZMM(R3), ZMM(1), MEM(RCX,RAX,2)) \ VFMADD231PD(ZMM(R4), ZMM(1), MEM(RCX,RDI,1)) \ VMOVUPD(MEM(RCX ), ZMM(R1)) \ VMOVUPD(MEM(RCX,RAX,1), ZMM(R2)) \ VMOVUPD(MEM(RCX,RAX,2), ZMM(R3)) \ VMOVUPD(MEM(RCX,RDI,1), ZMM(R4)) \ LEA(RCX, MEM(RCX,RAX,4)) #define UPDATE_C_TWO_ROWS(R1,R2) \ \ VMULPD(ZMM(R1), ZMM(R1), ZMM(0)) \ VMULPD(ZMM(R2), ZMM(R2), ZMM(0)) \ VFMADD231PD(ZMM(R1), ZMM(1), MEM(RCX )) \ VFMADD231PD(ZMM(R2), ZMM(1), MEM(RCX,RAX,1)) \ VMOVUPD(MEM(RCX ), ZMM(R1)) \ VMOVUPD(MEM(RCX,RAX,1), ZMM(R2)) \ LEA(RCX, MEM(RCX,RAX,2)) #define UPDATE_C_BZ_FOUR_ROWS(R1,R2,R3,R4) \ \ VMULPD(ZMM(R1), ZMM(R1), ZMM(0)) \ VMULPD(ZMM(R2), ZMM(R2), ZMM(0)) \ VMULPD(ZMM(R3), ZMM(R3), ZMM(0)) \ VMULPD(ZMM(R4), ZMM(R4), ZMM(0)) \ VMOVUPD(MEM(RCX ), ZMM(R1)) \ VMOVUPD(MEM(RCX,RAX,1), ZMM(R2)) \ VMOVUPD(MEM(RCX,RAX,2), ZMM(R3)) \ VMOVUPD(MEM(RCX,RDI,1), ZMM(R4)) \ LEA(RCX, MEM(RCX,RAX,4)) #define UPDATE_C_BZ_TWO_ROWS(R1,R2) \ \ VMULPD(ZMM(R1), ZMM(R1), ZMM(0)) \ VMULPD(ZMM(R2), ZMM(R2), ZMM(0)) \ VMOVUPD(MEM(RCX ), ZMM(R1)) \ VMOVUPD(MEM(RCX,RAX,1), ZMM(R2)) \ LEA(RCX, MEM(RCX,RAX,2)) #define UPDATE_C_ROW_SCATTERED(NUM) \ \ KXNORW(K(1), K(0), K(0)) \ KXNORW(K(2), K(0), K(0)) \ VGATHERDPD(ZMM(1) MASK_K(1), MEM(RCX,YMM(0),8)) \ VFMADD231PD(ZMM(NUM), ZMM(1), MEM_1TO8(RBX)) \ VSCATTERDPD(MEM(RCX,YMM(0),8) MASK_K(2), ZMM(NUM)) \ ADD(RCX, RAX) #define UPDATE_C_BZ_ROW_SCATTERED(NUM) \ \ KXNORW(K(1), K(0), K(0)) \ VSCATTERDPD(MEM(RCX,YMM(0),8) MASK_K(1), ZMM(NUM)) \ ADD(RCX, RAX) #define PREFETCH_A_L1_1(n) PREFETCH(0, MEM(RAX,(A_L1_PREFETCH_DIST+n)*32*8)) #define PREFETCH_A_L1_2(n) PREFETCH(0, MEM(RAX,(A_L1_PREFETCH_DIST+n)*32*8+64)) #define PREFETCH_A_L1_3(n) PREFETCH(0, MEM(RAX,(A_L1_PREFETCH_DIST+n)*32*8+128)) #define PREFETCH_A_L1_4(n) PREFETCH(0, MEM(RAX,(A_L1_PREFETCH_DIST+n)*32*8+192)) #if PREFETCH_A_L2 #undef PREFETCH_A_L2 #define PREFETCH_A_L2(n) \ \ PREFETCH(1, MEM(RAX,(L2_PREFETCH_DIST+n)*32*8)) \ PREFETCH(1, MEM(RAX,(L2_PREFETCH_DIST+n)*32*8+64)) \ PREFETCH(1, MEM(RAX,(L2_PREFETCH_DIST+n)*32*8+128)) \ PREFETCH(1, MEM(RAX,(L2_PREFETCH_DIST+n)*32*8+192)) #else #undef PREFETCH_A_L2 #define PREFETCH_A_L2(...) #endif #define PREFETCH_B_L1(n) PREFETCH(0, MEM(RBX,(B_L1_PREFETCH_DIST+n)*8*8)) #if PREFETCH_B_L2 #undef PREFETCH_B_L2 #define PREFETCH_B_L2(n) PREFETCH(1, MEM(RBX,(L2_PREFETCH_DIST+n)*8*8)) #else #undef PREFETCH_B_L2 #define PREFETCH_B_L2(...) #endif #define PREFETCH_C_L1_1 #define PREFETCH_C_L1_2 #define PREFETCH_C_L1_3 #define PREFETCH_C_L1_4 // // n: index in unrolled loop // // a: ZMM register to load into // b: ZMM register to read from // // ...: addressing for A, except for offset // #define SUBITER(n,a,b,...) \ \ PREFETCH_A_L2(n) \ \ VMOVAPD(ZMM(a), MEM(RBX,(n+1)*64)) \ VFMADD231PD(ZMM( 2), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*32+ 0)*8)) \ VFMADD231PD(ZMM( 3), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*32+ 1)*8)) \ VFMADD231PD(ZMM( 4), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*32+ 2)*8)) \ PREFETCH_A_L1_1(n) \ VFMADD231PD(ZMM( 5), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*32+ 3)*8)) \ VFMADD231PD(ZMM( 6), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*32+ 4)*8)) \ VFMADD231PD(ZMM( 7), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*32+ 5)*8)) \ PREFETCH_C_L1_1 \ VFMADD231PD(ZMM( 8), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*32+ 6)*8)) \ VFMADD231PD(ZMM( 9), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*32+ 7)*8)) \ VFMADD231PD(ZMM(10), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*32+ 8)*8)) \ PREFETCH_A_L1_2(n) \ VFMADD231PD(ZMM(11), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*32+ 9)*8)) \ VFMADD231PD(ZMM(12), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*32+10)*8)) \ VFMADD231PD(ZMM(13), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*32+11)*8)) \ PREFETCH_C_L1_2 \ VFMADD231PD(ZMM(14), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*32+12)*8)) \ VFMADD231PD(ZMM(15), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*32+13)*8)) \ VFMADD231PD(ZMM(16), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*32+14)*8)) \ PREFETCH_A_L1_3(n) \ VFMADD231PD(ZMM(17), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*32+15)*8)) \ VFMADD231PD(ZMM(18), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*32+16)*8)) \ VFMADD231PD(ZMM(19), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*32+17)*8)) \ PREFETCH_C_L1_3 \ VFMADD231PD(ZMM(20), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*32+18)*8)) \ VFMADD231PD(ZMM(21), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*32+19)*8)) \ VFMADD231PD(ZMM(22), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*32+20)*8)) \ PREFETCH_A_L1_4(n) \ VFMADD231PD(ZMM(23), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*32+21)*8)) \ VFMADD231PD(ZMM(24), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*32+22)*8)) \ VFMADD231PD(ZMM(25), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*32+23)*8)) \ PREFETCH_C_L1_4 \ VFMADD231PD(ZMM(26), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*32+24)*8)) \ VFMADD231PD(ZMM(27), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*32+25)*8)) \ VFMADD231PD(ZMM(28), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*32+26)*8)) \ PREFETCH_B_L1(n) \ VFMADD231PD(ZMM(29), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*32+27)*8)) \ VFMADD231PD(ZMM(30), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*32+28)*8)) \ VFMADD231PD(ZMM(31), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*32+29)*8)) \ PREFETCH_B_L2(n) //This is an array used for the scatter/gather instructions. extern int32_t offsets[32]; //#define MONITORS //#define LOOPMON void bli_dgemm_knl_asm_30x8 ( dim_t k, double* restrict alpha, double* restrict a, double* restrict b, double* restrict beta, double* restrict c, inc_t rs_c, inc_t cs_c, auxinfo_t* restrict data, cntx_t* restrict cntx ) { const int32_t * offsetPtr = &offsets[0]; uint64_t k64 = k; __asm__ volatile ( VPXORD(ZMM(2), ZMM(2), ZMM(2)) //clear out registers VMOVAPS(ZMM( 3), ZMM(2)) VMOVAPS(ZMM( 4), ZMM(2)) VMOVAPS(ZMM( 5), ZMM(2)) VMOVAPS(ZMM( 6), ZMM(2)) VMOVAPS(ZMM( 7), ZMM(2)) VMOVAPS(ZMM( 8), ZMM(2)) VMOVAPS(ZMM( 9), ZMM(2)) MOV(R12, VAR(rs_c)) VMOVAPS(ZMM(10), ZMM(2)) MOV(RSI, VAR(k)) //loop index VMOVAPS(ZMM(11), ZMM(2)) MOV(RAX, VAR(a)) //load address of a VMOVAPS(ZMM(12), ZMM(2)) MOV(RBX, VAR(b)) //load address of b VMOVAPS(ZMM(13), ZMM(2)) MOV(RCX, VAR(c)) //load address of c VMOVAPS(ZMM(14), ZMM(2)) VMOVAPD(ZMM(0), MEM(RBX)) //pre-load b VMOVAPS(ZMM(15), ZMM(2)) MOV(RDI, VAR(offsetPtr)) VMOVAPS(ZMM(16), ZMM(2)) VMOVAPS(ZMM(17), ZMM(2)) VMOVAPS(ZMM(18), ZMM(2)) VMOVAPS(ZMM(19), ZMM(2)) LEA(R13, MEM(R12,R12,2)) VMOVAPS(ZMM(20), ZMM(2)) LEA(R14, MEM(R12,R12,4)) VMOVAPS(ZMM(21), ZMM(2)) LEA(R15, MEM(R13,R12,4)) VMOVAPS(ZMM(22), ZMM(2)) VMOVAPS(ZMM(23), ZMM(2)) VMOVAPS(ZMM(24), ZMM(2)) VMOVAPS(ZMM(25), ZMM(2)) MOV(R8, IMM(4*32*8)) //offset for 4 iterations VMOVAPS(ZMM(26), ZMM(2)) LEA(R9, MEM(R8,R8,2)) //*3 VMOVAPS(ZMM(27), ZMM(2)) LEA(R10, MEM(R8,R8,4)) //*5 VMOVAPS(ZMM(28), ZMM(2)) LEA(R11, MEM(R9,R8,4)) //*7 VMOVAPS(ZMM(29), ZMM(2)) VMOVAPS(ZMM(30), ZMM(2)) VMOVAPS(ZMM(31), ZMM(2)) SUB(RSI, IMM(38)) JLE(TAIL) //prefetch C into L2 #if SCATTER_PREFETCH_C VPBROADCASTD(ZMM(0), R12D) VPBROADCASTD(ZMM(1), R12D) VPMULLD(ZMM(0), ZMM(0), MEM(RDI)) VPMULLD(ZMM(1), ZMM(1), MEM(RDI,64)) ADD(RSI, IMM(30)) KXNORW(K(1), K(0), K(0)) KSHIFTRW(K(2), K(1), IMM(2)) VSCATTERPFDPS(1, MEM(RCX,ZMM(0),8) MASK_K(1)) VSCATTERPFDPS(1, MEM(RCX,ZMM(1),8) MASK_K(2)) VMOVAPD(ZMM(0), MEM(RBX)) #else PREFETCHW1(MEM(RCX )) SUBITER( 0,1,0,RAX ) PREFETCHW1(MEM(RCX,R12,1)) SUBITER( 1,0,1,RAX ) PREFETCHW1(MEM(RCX,R12,2)) SUBITER( 2,1,0,RAX ) PREFETCHW1(MEM(RCX,R13,1)) SUBITER( 3,0,1,RAX ) PREFETCHW1(MEM(RCX,R12,4)) SUBITER( 4,1,0,RAX,R8, 1) PREFETCHW1(MEM(RCX,R14,1)) SUBITER( 5,0,1,RAX,R8, 1) PREFETCHW1(MEM(RCX,R13,2)) SUBITER( 6,1,0,RAX,R8, 1) PREFETCHW1(MEM(RCX,R15,1)) LEA(RDX, MEM(RCX,R12,8)) SUBITER( 7,0,1,RAX,R8, 1) PREFETCHW1(MEM(RDX )) SUBITER( 8,1,0,RAX,R8, 2) PREFETCHW1(MEM(RDX,R12,1)) SUBITER( 9,0,1,RAX,R8, 2) PREFETCHW1(MEM(RDX,R12,2)) SUBITER(10,1,0,RAX,R8, 2) PREFETCHW1(MEM(RDX,R13,1)) SUBITER(11,0,1,RAX,R8, 2) PREFETCHW1(MEM(RDX,R12,4)) SUBITER(12,1,0,RAX,R9, 1) PREFETCHW1(MEM(RDX,R14,1)) SUBITER(13,0,1,RAX,R9, 1) PREFETCHW1(MEM(RDX,R13,2)) SUBITER(14,1,0,RAX,R9, 1) PREFETCHW1(MEM(RDX,R15,1)) LEA(RDX, MEM(RDX,R12,8)) SUBITER(15,0,1,RAX,R9, 1) PREFETCHW1(MEM(RDX )) SUBITER(16,1,0,RAX,R8, 4) PREFETCHW1(MEM(RDX,R12,1)) SUBITER(17,0,1,RAX,R8, 4) PREFETCHW1(MEM(RDX,R12,2)) SUBITER(18,1,0,RAX,R8, 4) PREFETCHW1(MEM(RDX,R13,1)) SUBITER(19,0,1,RAX,R8, 4) PREFETCHW1(MEM(RDX,R12,4)) SUBITER(20,1,0,RAX,R10,1) PREFETCHW1(MEM(RDX,R14,1)) SUBITER(21,0,1,RAX,R10,1) PREFETCHW1(MEM(RDX,R13,2)) SUBITER(22,1,0,RAX,R10,1) PREFETCHW1(MEM(RDX,R15,1)) LEA(RDX, MEM(RDX,R12,8)) SUBITER(23,0,1,RAX,R10,1) PREFETCHW1(MEM(RDX )) SUBITER(24,1,0,RAX,R9, 2) PREFETCHW1(MEM(RDX,R12,1)) SUBITER(25,0,1,RAX,R9, 2) PREFETCHW1(MEM(RDX,R12,2)) SUBITER(26,1,0,RAX,R9, 2) PREFETCHW1(MEM(RDX,R13,1)) SUBITER(27,0,1,RAX,R9, 2) PREFETCHW1(MEM(RDX,R12,4)) SUBITER(28,1,0,RAX,R11,1) PREFETCHW1(MEM(RDX,R14,1)) SUBITER(29,0,1,RAX,R11,1) ADD(RAX, IMM(30*32*8)) ADD(RBX, IMM(30* 8*8)) #endif MOV(RDI, RSI) AND(RDI, IMM(31)) SAR(RSI, IMM(5)) JZ(REM_1) LOOP_ALIGN LABEL(MAIN_LOOP) SUBITER( 0,1,0,RAX ) SUBITER( 1,0,1,RAX ) SUBITER( 2,1,0,RAX ) SUBITER( 3,0,1,RAX ) SUBITER( 4,1,0,RAX,R8, 1) SUBITER( 5,0,1,RAX,R8, 1) SUBITER( 6,1,0,RAX,R8, 1) SUBITER( 7,0,1,RAX,R8, 1) SUBITER( 8,1,0,RAX,R8, 2) SUBITER( 9,0,1,RAX,R8, 2) SUBITER(10,1,0,RAX,R8, 2) SUBITER(11,0,1,RAX,R8, 2) SUBITER(12,1,0,RAX,R9, 1) SUBITER(13,0,1,RAX,R9, 1) SUBITER(14,1,0,RAX,R9, 1) SUBITER(15,0,1,RAX,R9, 1) SUBITER(16,1,0,RAX,R8, 4) SUBITER(17,0,1,RAX,R8, 4) SUBITER(18,1,0,RAX,R8, 4) SUBITER(19,0,1,RAX,R8, 4) SUBITER(20,1,0,RAX,R10,1) SUBITER(21,0,1,RAX,R10,1) SUBITER(22,1,0,RAX,R10,1) SUBITER(23,0,1,RAX,R10,1) SUBITER(24,1,0,RAX,R9, 2) SUBITER(25,0,1,RAX,R9, 2) SUBITER(26,1,0,RAX,R9, 2) SUBITER(27,0,1,RAX,R9, 2) SUBITER(28,1,0,RAX,R11,1) SUBITER(29,0,1,RAX,R11,1) SUBITER(30,1,0,RAX,R11,1) SUBITER(31,0,1,RAX,R11,1) ADD(RAX, IMM(32*32*8)) ADD(RBX, IMM(32* 8*8)) SUB(RSI, IMM(1)) JNZ(MAIN_LOOP) LABEL(REM_1) SAR1(RDI) JNC(REM_2) SUBITER(0,1,0,RAX) VMOVAPD(ZMM(0), ZMM(1)) ADD(RAX, IMM(32*8)) ADD(RBX, IMM( 8*8)) LABEL(REM_2) SAR1(RDI) JNC(REM_4) SUBITER(0,1,0,RAX) SUBITER(1,0,1,RAX) ADD(RAX, IMM(2*32*8)) ADD(RBX, IMM(2* 8*8)) LABEL(REM_4) SAR1(RDI) JNC(REM_8) SUBITER(0,1,0,RAX) SUBITER(1,0,1,RAX) SUBITER(2,1,0,RAX) SUBITER(3,0,1,RAX) ADD(RAX, IMM(4*32*8)) ADD(RBX, IMM(4* 8*8)) LABEL(REM_8) SAR1(RDI) JNC(REM_16) SUBITER(0,1,0,RAX ) SUBITER(1,0,1,RAX ) SUBITER(2,1,0,RAX ) SUBITER(3,0,1,RAX ) SUBITER(4,1,0,RAX,R8,1) SUBITER(5,0,1,RAX,R8,1) SUBITER(6,1,0,RAX,R8,1) SUBITER(7,0,1,RAX,R8,1) ADD(RAX, IMM(8*32*8)) ADD(RBX, IMM(8* 8*8)) LABEL(REM_16) SAR1(RDI) JNC(AFTER_LOOP) SUBITER( 0,1,0,RAX ) SUBITER( 1,0,1,RAX ) SUBITER( 2,1,0,RAX ) SUBITER( 3,0,1,RAX ) SUBITER( 4,1,0,RAX,R8, 1) SUBITER( 5,0,1,RAX,R8, 1) SUBITER( 6,1,0,RAX,R8, 1) SUBITER( 7,0,1,RAX,R8, 1) SUBITER( 8,1,0,RAX,R8, 2) SUBITER( 9,0,1,RAX,R8, 2) SUBITER(10,1,0,RAX,R8, 2) SUBITER(11,0,1,RAX,R8, 2) SUBITER(12,1,0,RAX,R9, 1) SUBITER(13,0,1,RAX,R9, 1) SUBITER(14,1,0,RAX,R9, 1) SUBITER(15,0,1,RAX,R9, 1) ADD(RAX, IMM(16*32*8)) ADD(RBX, IMM(16* 8*8)) LABEL(AFTER_LOOP) //prefetch C into L1 #if SCATTER_PREFETCH_C MOV(RDI, VAR(offsetPtr)) VPBROADCASTD(ZMM(0), R12D) VPBROADCASTD(ZMM(1), R12D) VPMULLD(ZMM(0), ZMM(0), MEM(RDI)) VPMULLD(ZMM(1), ZMM(1), MEM(RDI,64)) KXNORW(K(1), K(0), K(0)) KSHIFTRW(K(2), K(1), IMM(2)) VSCATTERPFDPS(0, MEM(RCX,ZMM(0),8) MASK_K(1)) VSCATTERPFDPS(0, MEM(RCX,ZMM(1),8) MASK_K(2)) VMOVAPD(ZMM(0), MEM(RBX)) SUBITER(0,1,0,RAX ) SUBITER(1,0,1,RAX ) SUBITER(2,1,0,RAX ) SUBITER(3,0,1,RAX ) SUBITER(4,1,0,RAX,R8,1) SUBITER(5,0,1,RAX,R8,1) SUBITER(6,1,0,RAX,R8,1) SUBITER(7,0,1,RAX,R8,1) #else #undef PREFETCH_C_L1_1 #undef PREFETCH_C_L1_2 #undef PREFETCH_C_L1_3 #undef PREFETCH_C_L1_4 #define PREFETCH_C_L1_1 PREFETCHW0(MEM(RCX )) #define PREFETCH_C_L1_2 PREFETCHW0(MEM(RCX,R12,1)) #define PREFETCH_C_L1_3 PREFETCHW0(MEM(RCX,R12,2)) #define PREFETCH_C_L1_4 PREFETCHW0(MEM(RCX,R13,1)) SUBITER(0,1,0,RAX ) #undef PREFETCH_C_L1_1 #undef PREFETCH_C_L1_2 #undef PREFETCH_C_L1_3 #undef PREFETCH_C_L1_4 #define PREFETCH_C_L1_1 PREFETCHW0(MEM(RCX,R12,4)) #define PREFETCH_C_L1_2 PREFETCHW0(MEM(RCX,R14,1)) #define PREFETCH_C_L1_3 PREFETCHW0(MEM(RCX,R13,2)) #define PREFETCH_C_L1_4 PREFETCHW0(MEM(RCX,R15,1)) SUBITER(1,0,1,RAX ) LEA(RDX, MEM(RCX,R12,8)) #undef PREFETCH_C_L1_1 #undef PREFETCH_C_L1_2 #undef PREFETCH_C_L1_3 #undef PREFETCH_C_L1_4 #define PREFETCH_C_L1_1 PREFETCHW0(MEM(RDX )) #define PREFETCH_C_L1_2 PREFETCHW0(MEM(RDX,R12,1)) #define PREFETCH_C_L1_3 PREFETCHW0(MEM(RDX,R12,2)) #define PREFETCH_C_L1_4 PREFETCHW0(MEM(RDX,R13,1)) SUBITER(2,1,0,RAX ) #undef PREFETCH_C_L1_1 #undef PREFETCH_C_L1_2 #undef PREFETCH_C_L1_3 #undef PREFETCH_C_L1_4 #define PREFETCH_C_L1_1 PREFETCHW0(MEM(RDX,R12,4)) #define PREFETCH_C_L1_2 PREFETCHW0(MEM(RDX,R14,1)) #define PREFETCH_C_L1_3 PREFETCHW0(MEM(RDX,R13,2)) #define PREFETCH_C_L1_4 PREFETCHW0(MEM(RDX,R15,1)) SUBITER(3,0,1,RAX ) LEA(RDX, MEM(RDX,R12,8)) #undef PREFETCH_C_L1_1 #undef PREFETCH_C_L1_2 #undef PREFETCH_C_L1_3 #undef PREFETCH_C_L1_4 #define PREFETCH_C_L1_1 PREFETCHW0(MEM(RDX )) #define PREFETCH_C_L1_2 PREFETCHW0(MEM(RDX,R12,1)) #define PREFETCH_C_L1_3 PREFETCHW0(MEM(RDX,R12,2)) #define PREFETCH_C_L1_4 PREFETCHW0(MEM(RDX,R13,1)) SUBITER(4,1,0,RAX,R8,1) #undef PREFETCH_C_L1_1 #undef PREFETCH_C_L1_2 #undef PREFETCH_C_L1_3 #undef PREFETCH_C_L1_4 #define PREFETCH_C_L1_1 PREFETCHW0(MEM(RDX,R12,4)) #define PREFETCH_C_L1_2 PREFETCHW0(MEM(RDX,R14,1)) #define PREFETCH_C_L1_3 PREFETCHW0(MEM(RDX,R13,2)) #define PREFETCH_C_L1_4 PREFETCHW0(MEM(RDX,R15,1)) SUBITER(5,0,1,RAX,R8,1) LEA(RDX, MEM(RDX,R12,8)) #undef PREFETCH_C_L1_1 #undef PREFETCH_C_L1_2 #undef PREFETCH_C_L1_3 #undef PREFETCH_C_L1_4 #define PREFETCH_C_L1_1 PREFETCHW0(MEM(RDX )) #define PREFETCH_C_L1_2 PREFETCHW0(MEM(RDX,R12,1)) #define PREFETCH_C_L1_3 PREFETCHW0(MEM(RDX,R12,2)) #define PREFETCH_C_L1_4 PREFETCHW0(MEM(RDX,R13,1)) SUBITER(6,1,0,RAX,R8,1) #undef PREFETCH_C_L1_1 #undef PREFETCH_C_L1_2 #undef PREFETCH_C_L1_3 #undef PREFETCH_C_L1_4 #define PREFETCH_C_L1_1 PREFETCHW0(MEM(RDX,R12,4)) #define PREFETCH_C_L1_2 PREFETCHW0(MEM(RDX,R14,1)) #define PREFETCH_C_L1_3 #define PREFETCH_C_L1_4 SUBITER(7,0,1,RAX,R8,1) #endif JMP(POSTACCUM) LABEL(TAIL) MOV(RDX, RCX) ADD(RSI, IMM(38)) LABEL(TAIL_LOOP) PREFETCHW0(MEM(RDX)) ADD(RDX, R12) SUBITER(0,1,0,RAX) VMOVAPD(ZMM(0), ZMM(1)) ADD(RAX, IMM(32*8)) ADD(RBX, IMM( 8*8)) SUB(RSI, IMM(1)) JNZ(TAIL_LOOP) LABEL(POSTACCUM) MOV(RAX, VAR(alpha)) MOV(RBX, VAR(beta)) VBROADCASTSD(ZMM(0), MEM(RAX)) VBROADCASTSD(ZMM(1), MEM(RBX)) // Check if C is row stride. If not, jump to the slow scattered update MOV(RAX, VAR(rs_c)) LEA(RAX, MEM(,RAX,8)) MOV(RBX, VAR(cs_c)) LEA(RDI, MEM(RAX,RAX,2)) CMP(RBX, IMM(1)) JNE(SCATTEREDUPDATE) VMOVQ(RDX, XMM(1)) SAL1(RDX) //shift out sign bit JZ(COLSTORBZ) UPDATE_C_FOUR_ROWS( 2, 3, 4, 5) UPDATE_C_FOUR_ROWS( 6, 7, 8, 9) UPDATE_C_FOUR_ROWS(10,11,12,13) UPDATE_C_FOUR_ROWS(14,15,16,17) UPDATE_C_FOUR_ROWS(18,19,20,21) UPDATE_C_FOUR_ROWS(22,23,24,25) UPDATE_C_FOUR_ROWS(26,27,28,29) UPDATE_C_TWO_ROWS (30,31) JMP(END) LABEL(COLSTORBZ) UPDATE_C_BZ_FOUR_ROWS( 2, 3, 4, 5) UPDATE_C_BZ_FOUR_ROWS( 6, 7, 8, 9) UPDATE_C_BZ_FOUR_ROWS(10,11,12,13) UPDATE_C_BZ_FOUR_ROWS(14,15,16,17) UPDATE_C_BZ_FOUR_ROWS(18,19,20,21) UPDATE_C_BZ_FOUR_ROWS(22,23,24,25) UPDATE_C_BZ_FOUR_ROWS(26,27,28,29) UPDATE_C_BZ_TWO_ROWS (30,31) JMP(END) LABEL(SCATTEREDUPDATE) VMULPD(ZMM( 2), ZMM( 2), ZMM(0)) VMULPD(ZMM( 3), ZMM( 3), ZMM(0)) VMULPD(ZMM( 4), ZMM( 4), ZMM(0)) VMULPD(ZMM( 5), ZMM( 5), ZMM(0)) VMULPD(ZMM( 6), ZMM( 6), ZMM(0)) VMULPD(ZMM( 7), ZMM( 7), ZMM(0)) VMULPD(ZMM( 8), ZMM( 8), ZMM(0)) VMULPD(ZMM( 9), ZMM( 9), ZMM(0)) VMULPD(ZMM(10), ZMM(10), ZMM(0)) VMULPD(ZMM(11), ZMM(11), ZMM(0)) VMULPD(ZMM(12), ZMM(12), ZMM(0)) VMULPD(ZMM(13), ZMM(13), ZMM(0)) VMULPD(ZMM(14), ZMM(14), ZMM(0)) VMULPD(ZMM(15), ZMM(15), ZMM(0)) VMULPD(ZMM(16), ZMM(16), ZMM(0)) VMULPD(ZMM(17), ZMM(17), ZMM(0)) VMULPD(ZMM(18), ZMM(18), ZMM(0)) VMULPD(ZMM(19), ZMM(19), ZMM(0)) VMULPD(ZMM(20), ZMM(20), ZMM(0)) VMULPD(ZMM(21), ZMM(21), ZMM(0)) VMULPD(ZMM(22), ZMM(22), ZMM(0)) VMULPD(ZMM(23), ZMM(23), ZMM(0)) VMULPD(ZMM(24), ZMM(24), ZMM(0)) VMULPD(ZMM(25), ZMM(25), ZMM(0)) VMULPD(ZMM(26), ZMM(26), ZMM(0)) VMULPD(ZMM(27), ZMM(27), ZMM(0)) VMULPD(ZMM(28), ZMM(28), ZMM(0)) VMULPD(ZMM(29), ZMM(29), ZMM(0)) VMULPD(ZMM(30), ZMM(30), ZMM(0)) VMULPD(ZMM(31), ZMM(31), ZMM(0)) VMOVQ(RDX, XMM(1)) /* Note that this ignores the upper 32 bits in cs_c */ MOV(RDI, VAR(offsetPtr)) VPBROADCASTD(ZMM(0), EBX) VPMULLD(ZMM(0), ZMM(0), MEM(RDI)) MOV(RBX, VAR(beta)) SAL1(RDX) //shift out sign bit JZ(SCATTERBZ) UPDATE_C_ROW_SCATTERED( 2) UPDATE_C_ROW_SCATTERED( 3) UPDATE_C_ROW_SCATTERED( 4) UPDATE_C_ROW_SCATTERED( 5) UPDATE_C_ROW_SCATTERED( 6) UPDATE_C_ROW_SCATTERED( 7) UPDATE_C_ROW_SCATTERED( 8) UPDATE_C_ROW_SCATTERED( 9) UPDATE_C_ROW_SCATTERED(10) UPDATE_C_ROW_SCATTERED(11) UPDATE_C_ROW_SCATTERED(12) UPDATE_C_ROW_SCATTERED(13) UPDATE_C_ROW_SCATTERED(14) UPDATE_C_ROW_SCATTERED(15) UPDATE_C_ROW_SCATTERED(16) UPDATE_C_ROW_SCATTERED(17) UPDATE_C_ROW_SCATTERED(18) UPDATE_C_ROW_SCATTERED(19) UPDATE_C_ROW_SCATTERED(20) UPDATE_C_ROW_SCATTERED(21) UPDATE_C_ROW_SCATTERED(22) UPDATE_C_ROW_SCATTERED(23) UPDATE_C_ROW_SCATTERED(24) UPDATE_C_ROW_SCATTERED(25) UPDATE_C_ROW_SCATTERED(26) UPDATE_C_ROW_SCATTERED(27) UPDATE_C_ROW_SCATTERED(28) UPDATE_C_ROW_SCATTERED(29) UPDATE_C_ROW_SCATTERED(30) UPDATE_C_ROW_SCATTERED(31) JMP(END) LABEL(SCATTERBZ) UPDATE_C_BZ_ROW_SCATTERED( 2) UPDATE_C_BZ_ROW_SCATTERED( 3) UPDATE_C_BZ_ROW_SCATTERED( 4) UPDATE_C_BZ_ROW_SCATTERED( 5) UPDATE_C_BZ_ROW_SCATTERED( 6) UPDATE_C_BZ_ROW_SCATTERED( 7) UPDATE_C_BZ_ROW_SCATTERED( 8) UPDATE_C_BZ_ROW_SCATTERED( 9) UPDATE_C_BZ_ROW_SCATTERED(10) UPDATE_C_BZ_ROW_SCATTERED(11) UPDATE_C_BZ_ROW_SCATTERED(12) UPDATE_C_BZ_ROW_SCATTERED(13) UPDATE_C_BZ_ROW_SCATTERED(14) UPDATE_C_BZ_ROW_SCATTERED(15) UPDATE_C_BZ_ROW_SCATTERED(16) UPDATE_C_BZ_ROW_SCATTERED(17) UPDATE_C_BZ_ROW_SCATTERED(18) UPDATE_C_BZ_ROW_SCATTERED(19) UPDATE_C_BZ_ROW_SCATTERED(20) UPDATE_C_BZ_ROW_SCATTERED(21) UPDATE_C_BZ_ROW_SCATTERED(22) UPDATE_C_BZ_ROW_SCATTERED(23) UPDATE_C_BZ_ROW_SCATTERED(24) UPDATE_C_BZ_ROW_SCATTERED(25) UPDATE_C_BZ_ROW_SCATTERED(26) UPDATE_C_BZ_ROW_SCATTERED(27) UPDATE_C_BZ_ROW_SCATTERED(28) UPDATE_C_BZ_ROW_SCATTERED(29) UPDATE_C_BZ_ROW_SCATTERED(30) UPDATE_C_BZ_ROW_SCATTERED(31) LABEL(END) : // output operands : // input operands [k] "m" (k64), [a] "m" (a), [b] "m" (b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c), [offsetPtr] "m" (offsetPtr) : // register clobber list "rax", "rbx", "rcx", "rdx", "rdi", "rsi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "zmm0", "zmm1", "zmm2", "zmm3", "zmm4", "zmm5", "zmm6", "zmm7", "zmm8", "zmm9", "zmm10", "zmm11", "zmm12", "zmm13", "zmm14", "zmm15", "zmm16", "zmm17", "zmm18", "zmm19", "zmm20", "zmm21", "zmm22", "zmm23", "zmm24", "zmm25", "zmm26", "zmm27", "zmm28", "zmm29", "zmm30", "zmm31", "memory" ); } blis-0.6.1/kernels/knl/3/other/bli_dgemm_knl_asm_30x8_knc.c000066400000000000000000000371001360743507500234000ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include #include "bli_avx512_macros.h" #define A_L1_PREFETCH_DIST 4 #define B_L1_PREFETCH_DIST 2 #define L2_PREFETCH_DIST 16 // Must be greater than 10, because of the way the loop is constructed. //Alternate code path uused if C is not row-major // r9 = c // ymm0 = cs_c * 1...8 // r11 = rs_c // r12 = &alpha // r13 = &beta #define UPDATE_C_ROW_SCATTERED_(NUM,BNZ1,BNZ2) \ \ BNZ1 KXNORW(K(2), K(0), K(0)) BNZ2 \ KXNORW(K(3), K(0), K(0)) \ BNZ1 VGATHERDPD(ZMM(31) MASK_K(2), MEM(R(9),YMM(0),8)) BNZ2 \ VMULPD(ZMM(NUM), ZMM(NUM), MEM_1TO8(R(12))) /*scale by alpha*/ \ BNZ1 VFMADD231PD(ZMM(NUM), ZMM(31), MEM_1TO8(R(13))) BNZ2 /*scale by beta, add in result*/ \ VSCATTERDPD(MEM(R(9),YMM(0),8) MASK_K(3), ZMM(NUM)) \ ADD(R(9), R(11)) #define UPDATE_C_ROW_SCATTERED(NUM) UPDATE_C_ROW_SCATTERED_(NUM,,) #define UPDATE_C_BZ_ROW_SCATTERED(NUM) UPDATE_C_ROW_SCATTERED_(NUM,COMMENT_BEGIN,COMMENT_END) // r12 = &alpha // zmm31 = beta // r9 = c // r11 = rs_c // r10 = 3*rs_c // rdi = 4*rs_c #define UPDATE_C_4_ROWS_(R1,R2,R3,R4,BNZ1,BNZ2) \ \ VMULPD(ZMM(R1), ZMM(R1), MEM_1TO8(R(12))) \ VMULPD(ZMM(R2), ZMM(R2), MEM_1TO8(R(12))) \ VMULPD(ZMM(R3), ZMM(R3), MEM_1TO8(R(12))) \ VMULPD(ZMM(R4), ZMM(R4), MEM_1TO8(R(12))) \ BNZ1 VFMADD231PD(ZMM(R1), ZMM(31), MEM(R(9) )) BNZ2 \ BNZ1 VFMADD231PD(ZMM(R2), ZMM(31), MEM(R(9),R(11),1)) BNZ2 \ BNZ1 VFMADD231PD(ZMM(R3), ZMM(31), MEM(R(9),R(11),2)) BNZ2 \ BNZ1 VFMADD231PD(ZMM(R4), ZMM(31), MEM(R(9),R(10),1)) BNZ2 \ VMOVUPD(MEM(R(9) ), ZMM(R1)) \ VMOVUPD(MEM(R(9),R(11),1), ZMM(R2)) \ VMOVUPD(MEM(R(9),R(11),2), ZMM(R3)) \ VMOVUPD(MEM(R(9),R(10),1), ZMM(R4)) \ ADD(R(9), RDI) // r12 = &alpha // zmm31 = beta // r9 = c // r11 = rs_c #define UPDATE_C_2_ROWS_(R1,R2,BNZ1,BNZ2) \ \ VMULPD(ZMM(R1), ZMM(R1), MEM_1TO8(R(12))) \ VMULPD(ZMM(R2), ZMM(R2), MEM_1TO8(R(12))) \ BNZ1 VFMADD231PD(ZMM(R1), ZMM(31), MEM(R(9) )) BNZ2 \ BNZ1 VFMADD231PD(ZMM(R2), ZMM(31), MEM(R(9),R(11),1)) BNZ2 \ VMOVUPD(MEM(R(9) ), ZMM(R1)) \ VMOVUPD(MEM(R(9),R(11),1), ZMM(R2)) \ #define UPDATE_C_4_ROWS(R1,R2,R3,R4) UPDATE_C_4_ROWS_(R1,R2,R3,R4,,) #define UPDATE_C_2_ROWS(R1,R2) UPDATE_C_2_ROWS_(R1,R2,,) #define UPDATE_C_BZ_4_ROWS(R1,R2,R3,R4) UPDATE_C_4_ROWS_(R1,R2,R3,R4,COMMENT_BEGIN,COMMENT_END) #define UPDATE_C_BZ_2_ROWS(R1,R2) UPDATE_C_2_ROWS_(R1,R2,COMMENT_BEGIN,COMMENT_END) #define A_TIMES_B_ROW(n) VFMADD231PD(ZMM(n), ZMM(31), MEM_1TO8(R(15),(n-1)*8)) #define A_TIMES_B_ROW_PREV(n) VFMADD231PD(ZMM(n), ZMM(31), MEM_1TO8(R(15),((n-1)-32)*8)) #define PREFETCH_A_L1(n) PREFETCH(0, MEM(R(15),A_L1_PREFETCH_DIST*8*32+n*64)) #define PREFETCH_A_L2(n) PREFETCH(1, MEM(R(15),R(14),1,n*64)) #define PREFETCH_B_L1 PREFETCH(0, MEM(RBX,B_L1_PREFETCH_DIST*8*8)) #define PREFETCH_B_L2 PREFETCH(1, MEM(RBX,R(13),1)) //One iteration of the k_r loop. //Each iteration, we prefetch A into L1 and into L2 // r15 = a // rbx = b // rcx = c // r11 = rs_c // r13 = L2_PREFETCH_DIST*8*8 // r14 = L2_PREFETCH_DIST*8*32 // r12 = 32*8 = dist. to next sliver of a // r9 = 8*8 = dist. to next sliver of b #define MAIN_LOOP_(COUNTER, PC_L1_1, PC_L1_2, PC_L2_1, PC_L2_2) \ \ /* Can this be pre-loaded for next it. in zmm0? */ \ VMOVAPD(ZMM(31), MEM(RBX)) \ \ A_TIMES_B_ROW ( 1) \ A_TIMES_B_ROW ( 2) PREFETCH_A_L1(0) \ A_TIMES_B_ROW ( 3) PREFETCH_A_L1(1) \ A_TIMES_B_ROW ( 4) PREFETCH_A_L1(2) \ A_TIMES_B_ROW ( 5) PREFETCH_A_L1(3) \ A_TIMES_B_ROW ( 6) PREFETCH_A_L2(0) \ A_TIMES_B_ROW ( 7) PC_L1_1 PREFETCH(0, MEM(RCX)) PC_L1_2 \ A_TIMES_B_ROW ( 8) PC_L1_1 ADD(RCX, R(11)) PC_L1_2 \ A_TIMES_B_ROW ( 9) \ A_TIMES_B_ROW (10) PC_L2_1 PREFETCH(1, MEM(RCX)) PC_L2_2 \ A_TIMES_B_ROW (11) PREFETCH_A_L2(1) \ A_TIMES_B_ROW (12) PC_L1_1 PREFETCH(0, MEM(RCX)) PC_L1_2 \ A_TIMES_B_ROW (13) PC_L1_1 ADD(RCX, R(11)) PC_L1_2 \ A_TIMES_B_ROW (14) \ A_TIMES_B_ROW (15) \ A_TIMES_B_ROW (16) PREFETCH_A_L2(2) \ A_TIMES_B_ROW (17) PC_L1_1 PREFETCH(0, MEM(RCX)) PC_L1_2 \ A_TIMES_B_ROW (18) PC_L1_1 ADD(RCX, R(11)) PC_L1_2 \ A_TIMES_B_ROW (19) \ A_TIMES_B_ROW (20) \ A_TIMES_B_ROW (21) PREFETCH_A_L2(3) \ A_TIMES_B_ROW (22) ADD(R(15), R(12)) \ A_TIMES_B_ROW_PREV(23) \ A_TIMES_B_ROW_PREV(24) PC_L2_1 ADD(RCX, R(11)) PC_L2_2 \ A_TIMES_B_ROW_PREV(25) DEC(COUNTER) \ A_TIMES_B_ROW_PREV(26) PREFETCH_B_L2 \ A_TIMES_B_ROW_PREV(27) PREFETCH_B_L1 \ A_TIMES_B_ROW_PREV(28) ADD(RBX, R(9)) \ A_TIMES_B_ROW_PREV(29) CMP(COUNTER, IMM(0)) \ A_TIMES_B_ROW_PREV(30) #define MAIN_LOOP(COUNTER) MAIN_LOOP_(COUNTER,COMMENT_BEGIN,COMMENT_END,COMMENT_BEGIN,COMMENT_END) #define MAIN_LOOP_PC_L1(COUNTER) MAIN_LOOP_(COUNTER,,,COMMENT_BEGIN,COMMENT_END) #define MAIN_LOOP_PC_L2(COUNTER) MAIN_LOOP_(COUNTER,COMMENT_BEGIN,COMMENT_END,,) //This is an array used for the scatter/gather instructions. extern int32_t offsets[16]; //#define MONITORS //#define LOOPMON void bli_dgemm_knl_asm_30x8_knc ( dim_t k, double* restrict alpha, double* restrict a, double* restrict b, double* restrict beta, double* restrict c, inc_t rs_c, inc_t cs_c, auxinfo_t* restrict data, cntx_t* restrict cntx ) { const double * a_next = bli_auxinfo_next_a( data ); const double * b_next = bli_auxinfo_next_b( data ); const int32_t * offsetPtr = &offsets[0]; #ifdef MONITORS int toph, topl, both, botl, midl, midh, mid2l, mid2h; #endif #ifdef LOOPMON int tlooph, tloopl, blooph, bloopl; #endif __asm__ volatile ( #ifdef MONITORS RDTSC MOV(VAR(topl), EAX) MOV(VAR(toph), EDX) #endif VPXORD(ZMM(1), ZMM(1), ZMM(1)) //clear out registers VMOVAPS(ZMM( 2), ZMM(1)) VMOVAPS(ZMM( 3), ZMM(1)) MOV(RSI, VAR(k)) //loop index VMOVAPS(ZMM( 4), ZMM(1)) MOV(R(11), VAR(rs_c)) //load row stride VMOVAPS(ZMM( 5), ZMM(1)) SAL(R(11), IMM(3)) //scale row stride VMOVAPS(ZMM( 6), ZMM(1)) MOV(R(15), VAR(a)) //load address of a VMOVAPS(ZMM( 7), ZMM(1)) MOV(RBX, VAR(b)) //load address of b VMOVAPS(ZMM( 8), ZMM(1)) VMOVAPS(ZMM( 9), ZMM(1)) LEA(R(10), MEM(R(11),R(11),2)) //r10 has 3 * r11 VMOVAPS(ZMM(10), ZMM(1)) VMOVAPS(ZMM(11), ZMM(1)) MOV(RDI, R(11)) VMOVAPS(ZMM(12), ZMM(1)) SAL(RDI, IMM(2)) //rdi has 4*r11 VMOVAPS(ZMM(13), ZMM(1)) MOV(RCX, VAR(c)) //load address of c for prefetching VMOVAPS(ZMM(14), ZMM(1)) VMOVAPS(ZMM(15), ZMM(1)) MOV(R(8), VAR(k)) VMOVAPS(ZMM(16), ZMM(1)) VMOVAPS(ZMM(17), ZMM(1)) VMOVAPS(ZMM(18), ZMM(1)) MOV(R(13), IMM(8*8*L2_PREFETCH_DIST)) VMOVAPS(ZMM(19), ZMM(1)) MOV(R(14), IMM(8*32*L2_PREFETCH_DIST)) VMOVAPS(ZMM(20), ZMM(1)) VMOVAPS(ZMM(21), ZMM(1)) VMOVAPS(ZMM(22), ZMM(1)) VMOVAPS(ZMM(23), ZMM(1)) VMOVAPS(ZMM(24), ZMM(1)) SUB(R(8), IMM(30+L2_PREFETCH_DIST)) //Check if we have over 40 operations to do. VMOVAPS(ZMM(25), ZMM(1)) MOV(R(8), IMM(30)) VMOVAPS(ZMM(26), ZMM(1)) MOV(R(9), IMM(8*8)) //amount to increment b* by each iteration VMOVAPS(ZMM(27), ZMM(1)) MOV(R(12), IMM(8*32)) //amount to increment a* by each iteration VMOVAPS(ZMM(28), ZMM(1)) VMOVAPS(ZMM(29), ZMM(1)) VMOVAPS(ZMM(30), ZMM(1)) #ifdef MONITORS RDTSC MOV(VAR(midl), EAX) MOV(VAR(midh), EDX) #endif JLE(CONSIDER_UNDER_40) SUB(RSI, IMM(30+L2_PREFETCH_DIST)) //First 30 iterations LABEL(LOOPREFECHCL2) MAIN_LOOP_PC_L2(R(8)) JNZ(LOOPREFECHCL2) MOV(RCX, VAR(c)) //Main Loop. LABEL(LOOPMAIN) MAIN_LOOP(RSI) JNZ(LOOPMAIN) //Penultimate 22 iterations. //Break these off from the main loop to avoid prefetching extra shit. MOV(R(14), VAR(a_next)) MOV(R(13), VAR(b_next)) SUB(R(14), R(15)) SUB(R(13), RBX) //Yes, I know 10-20 = -10 MOV(RSI, IMM(10+L2_PREFETCH_DIST-20)) LABEL(LOOPMAIN2) MAIN_LOOP(RSI) JNZ(LOOPMAIN2) //Last 10 iterations MOV(R(8), IMM(10)) LABEL(LOOPREFETCHCL1) MAIN_LOOP_PC_L1(R(8)) JNZ(LOOPREFETCHCL1) JMP(POSTACCUM) //Alternate main loop, with no prefetching of C //Used when <= 40 iterations LABEL(CONSIDER_UNDER_40) MOV(RSI, VAR(k)) TEST(RSI, RSI) JZ(POSTACCUM) LABEL(LOOP_UNDER_40) MAIN_LOOP(RSI) JNZ(LOOP_UNDER_40) LABEL(POSTACCUM) #ifdef MONITORS RDTSC MOV(VAR(mid2l), EAX) MOV(VAR(mid2h), EDX) #endif MOV(R(9), VAR(c)) //load address of c for update MOV(R(12), VAR(alpha)) //load address of alpha // Check if C is row stride. If not, jump to the slow scattered update MOV(R(14), VAR(cs_c)) DEC(R(14)) JNZ(SCATTEREDUPDATE) MOV(R(14), VAR(beta)) VBROADCASTSD(ZMM(31), MEM(R(14))) MOV(RBX, MEM(R(14))) TEST(RBX, RBX) JZ(COLSTORBZ) UPDATE_C_4_ROWS( 1, 2, 3, 4) UPDATE_C_4_ROWS( 5, 6, 7, 8) UPDATE_C_4_ROWS( 9,10,11,12) UPDATE_C_4_ROWS(13,14,15,16) UPDATE_C_4_ROWS(17,18,19,20) UPDATE_C_4_ROWS(21,22,23,24) UPDATE_C_4_ROWS(25,26,27,28) UPDATE_C_2_ROWS(29,30) JMP(END) LABEL(COLSTORBZ) UPDATE_C_BZ_4_ROWS( 1, 2, 3, 4) UPDATE_C_BZ_4_ROWS( 5, 6, 7, 8) UPDATE_C_BZ_4_ROWS( 9,10,11,12) UPDATE_C_BZ_4_ROWS(13,14,15,16) UPDATE_C_BZ_4_ROWS(17,18,19,20) UPDATE_C_BZ_4_ROWS(21,22,23,24) UPDATE_C_BZ_4_ROWS(25,26,27,28) UPDATE_C_BZ_2_ROWS(29,30) JMP(END) LABEL(SCATTEREDUPDATE) MOV(R(13), VAR(beta)) MOV(R(10), VAR(offsetPtr)) VMOVAPS(ZMM(0), MEM(R(10))) MOV(RBX, MEM(R(13))) /* Note that this ignores the upper 32 bits in cs_c */ VPBROADCASTD(ZMM(31), VAR(cs_c)) VPMULLD(ZMM(0), ZMM(31), ZMM(0)) TEST(RBX, RBX) JZ(SCATTERBZ) UPDATE_C_ROW_SCATTERED( 1) UPDATE_C_ROW_SCATTERED( 2) UPDATE_C_ROW_SCATTERED( 3) UPDATE_C_ROW_SCATTERED( 4) UPDATE_C_ROW_SCATTERED( 5) UPDATE_C_ROW_SCATTERED( 6) UPDATE_C_ROW_SCATTERED( 7) UPDATE_C_ROW_SCATTERED( 8) UPDATE_C_ROW_SCATTERED( 9) UPDATE_C_ROW_SCATTERED(10) UPDATE_C_ROW_SCATTERED(11) UPDATE_C_ROW_SCATTERED(12) UPDATE_C_ROW_SCATTERED(13) UPDATE_C_ROW_SCATTERED(14) UPDATE_C_ROW_SCATTERED(15) UPDATE_C_ROW_SCATTERED(16) UPDATE_C_ROW_SCATTERED(17) UPDATE_C_ROW_SCATTERED(18) UPDATE_C_ROW_SCATTERED(19) UPDATE_C_ROW_SCATTERED(20) UPDATE_C_ROW_SCATTERED(21) UPDATE_C_ROW_SCATTERED(22) UPDATE_C_ROW_SCATTERED(23) UPDATE_C_ROW_SCATTERED(24) UPDATE_C_ROW_SCATTERED(25) UPDATE_C_ROW_SCATTERED(26) UPDATE_C_ROW_SCATTERED(27) UPDATE_C_ROW_SCATTERED(28) UPDATE_C_ROW_SCATTERED(29) UPDATE_C_ROW_SCATTERED(30) JMP(END) LABEL(SCATTERBZ) UPDATE_C_BZ_ROW_SCATTERED( 1) UPDATE_C_BZ_ROW_SCATTERED( 2) UPDATE_C_BZ_ROW_SCATTERED( 3) UPDATE_C_BZ_ROW_SCATTERED( 4) UPDATE_C_BZ_ROW_SCATTERED( 5) UPDATE_C_BZ_ROW_SCATTERED( 6) UPDATE_C_BZ_ROW_SCATTERED( 7) UPDATE_C_BZ_ROW_SCATTERED( 8) UPDATE_C_BZ_ROW_SCATTERED( 9) UPDATE_C_BZ_ROW_SCATTERED(10) UPDATE_C_BZ_ROW_SCATTERED(11) UPDATE_C_BZ_ROW_SCATTERED(12) UPDATE_C_BZ_ROW_SCATTERED(13) UPDATE_C_BZ_ROW_SCATTERED(14) UPDATE_C_BZ_ROW_SCATTERED(15) UPDATE_C_BZ_ROW_SCATTERED(16) UPDATE_C_BZ_ROW_SCATTERED(17) UPDATE_C_BZ_ROW_SCATTERED(18) UPDATE_C_BZ_ROW_SCATTERED(19) UPDATE_C_BZ_ROW_SCATTERED(20) UPDATE_C_BZ_ROW_SCATTERED(21) UPDATE_C_BZ_ROW_SCATTERED(22) UPDATE_C_BZ_ROW_SCATTERED(23) UPDATE_C_BZ_ROW_SCATTERED(24) UPDATE_C_BZ_ROW_SCATTERED(25) UPDATE_C_BZ_ROW_SCATTERED(26) UPDATE_C_BZ_ROW_SCATTERED(27) UPDATE_C_BZ_ROW_SCATTERED(28) UPDATE_C_BZ_ROW_SCATTERED(29) UPDATE_C_BZ_ROW_SCATTERED(30) LABEL(END) #ifdef MONITORS RDTSC MOV(VAR(botl), EAX) MOV(VAR(both), EDX) #endif : // output operands #ifdef MONITORS [topl] "=m" (topl), [toph] "=m" (toph), [midl] "=m" (midl), [midh] "=m" (midh), [mid2l] "=m" (mid2l), [mid2h] "=m" (mid2h), [botl] "=m" (botl), [both] "=m" (both) #endif : // input operands [k] "m" (k), [a] "m" (a), [b] "m" (b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c), [a_next] "m" (a_next), [b_next] "m" (b_next), [offsetPtr] "m" (offsetPtr) : // register clobber list "rax", "rbx", "rcx", "rdx", "rdi", "rsi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "zmm0", "zmm1", "zmm2", "zmm3", "zmm4", "zmm5", "zmm6", "zmm7", "zmm8", "zmm9", "zmm10", "zmm11", "zmm12", "zmm13", "zmm14", "zmm15", "zmm16", "zmm17", "zmm18", "zmm19", "zmm20", "zmm21", "zmm22", "zmm23", "zmm24", "zmm25", "zmm26", "zmm27", "zmm28", "zmm29", "zmm30", "zmm31", "memory" ); #ifdef LOOPMON printf("looptime = \t%d\n", bloopl - tloopl); #endif #ifdef MONITORS dim_t top = ((dim_t)toph << 32) | topl; dim_t mid = ((dim_t)midh << 32) | midl; dim_t mid2 = ((dim_t)mid2h << 32) | mid2l; dim_t bot = ((dim_t)both << 32) | botl; printf("setup =\t%u\tmain loop =\t%u\tcleanup=\t%u\ttotal=\t%u\n", mid - top, mid2 - mid, bot - mid2, bot - top); #endif } blis-0.6.1/kernels/knl/3/other/bli_dgemm_knl_asm_8x24.c000066400000000000000000000530411360743507500225520ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include #include "bli_avx512_macros.h" #define UNROLL_K 8 #define SCATTER_PREFETCH_AB 0 #define SCATTER_PREFETCH_C 1 #define PREFETCH_A_L2 0 #define PREFETCH_B_L2 0 #define L2_PREFETCH_DIST 64 #define A_L1_PREFETCH_DIST 32 #define B_L1_PREFETCH_DIST 12 #define C_MIN_L2_ITERS 64 //C is not prefetched into L2 for k <= this #define C_L1_ITERS 8 //number of iterations before the end to prefetch C into L1 //make sure there is an unrolled MAIN_LOOP_X for this number #define LOOP_ALIGN ALIGN16 #define UPDATE_C_FOUR_ROWS(R1,R2,R3,R4) \ \ VMULPD(ZMM(R1), ZMM(R1), ZMM(0)) \ VMULPD(ZMM(R2), ZMM(R2), ZMM(0)) \ VMULPD(ZMM(R3), ZMM(R3), ZMM(0)) \ VMULPD(ZMM(R4), ZMM(R4), ZMM(0)) \ VFMADD231PD(ZMM(R1), ZMM(1), MEM(RCX )) \ VFMADD231PD(ZMM(R2), ZMM(1), MEM(RCX,RAX,1)) \ VFMADD231PD(ZMM(R3), ZMM(1), MEM(RCX,RAX,2)) \ VFMADD231PD(ZMM(R4), ZMM(1), MEM(RCX,RDI,1)) \ VMOVUPD(MEM(RCX ), ZMM(R1)) \ VMOVUPD(MEM(RCX,RAX,1), ZMM(R2)) \ VMOVUPD(MEM(RCX,RAX,2), ZMM(R3)) \ VMOVUPD(MEM(RCX,RDI,1), ZMM(R4)) \ LEA(RCX, MEM(RCX,RAX,4)) #define UPDATE_C_BZ_FOUR_ROWS(R1,R2,R3,R4) \ \ VMULPD(ZMM(R1), ZMM(R1), ZMM(0)) \ VMULPD(ZMM(R2), ZMM(R2), ZMM(0)) \ VMULPD(ZMM(R3), ZMM(R3), ZMM(0)) \ VMULPD(ZMM(R4), ZMM(R4), ZMM(0)) \ VMOVUPD(MEM(RCX ), ZMM(R1)) \ VMOVUPD(MEM(RCX,RAX,1), ZMM(R2)) \ VMOVUPD(MEM(RCX,RAX,2), ZMM(R3)) \ VMOVUPD(MEM(RCX,RDI,1), ZMM(R4)) \ LEA(RCX, MEM(RCX,RAX,4)) #define UPDATE_C_ROW_SCATTERED(NUM) \ \ KXNORW(K(1), K(0), K(0)) \ KXNORW(K(2), K(0), K(0)) \ VMULPD(ZMM(NUM), ZMM(NUM), ZMM(0)) \ VGATHERDPD(ZMM(3) MASK_K(1), MEM(RCX,YMM(2),8)) \ VFMADD231PD(ZMM(NUM), ZMM(3), ZMM(1)) \ VSCATTERDPD(MEM(RCX,YMM(2),8) MASK_K(2), ZMM(NUM)) \ ADD(RCX, RAX) #define UPDATE_C_BZ_ROW_SCATTERED(NUM) \ \ KXNORW(K(1), K(0), K(0)) \ VMULPD(ZMM(NUM), ZMM(NUM), ZMM(0)) \ VSCATTERDPD(MEM(RCX,YMM(2),8) MASK_K(1), ZMM(NUM)) \ ADD(RCX, RAX) #define PREFETCH_B_L1_1(n) PREFETCH(0, MEM(RBX,(B_L1_PREFETCH_DIST+n)*24*8)) #define PREFETCH_B_L1_2(n) PREFETCH(0, MEM(RBX,(B_L1_PREFETCH_DIST+n)*24*8+64)) #define PREFETCH_B_L1_3(n) PREFETCH(0, MEM(RBX,(B_L1_PREFETCH_DIST+n)*24*8+128)) #if PREFETCH_B_L2 #undef PREFETCH_B_L2 #define PREFETCH_B_L2(n) \ \ PREFETCH(1, MEM(RBX,(L2_PREFETCH_DIST+n)*24*8)) \ PREFETCH(1, MEM(RBX,(L2_PREFETCH_DIST+n)*24*8+64)) \ PREFETCH(1, MEM(RBX,(L2_PREFETCH_DIST+n)*24*8+128)) #else #undef PREFETCH_B_L2 #define PREFETCH_B_L2(...) #endif #define PREFETCH_A_L1(n) PREFETCH(0, MEM(RAX,(A_L1_PREFETCH_DIST+n)*8*8)) #if PREFETCH_A_L2 #undef PREFETCH_A_L2 #define PREFETCH_A_L2(n) PREFETCH(1, MEM(RAX,(L2_PREFETCH_DIST+n)*8*8)) #else #undef PREFETCH_A_L2 #define PREFETCH_A_L2(...) #endif #if SCATTER_PREFETCH_AB #undef SCATTER_PREFETCH_AB #undef PREFETCH_B_L1_1 #undef PREFETCH_B_L1_2 #undef PREFETCH_B_L1_3 #undef PREFETCH_A_L1 #define SCATTER_PREFETCH_AB(n) \ \ KXNORW(K(1), K(0), K(0)) \ VGATHERPFDPS(0, MEM(RBX,ZMM(4),8,((3*n )*16+3*B_L1_PREFETCH_DIST)*64) MASK_K(1)) \ KXNORW(K(2), K(0), K(0)) \ VGATHERPFDPS(0, MEM(RBX,ZMM(4),8,((3*n+1)*16+3*B_L1_PREFETCH_DIST)*64) MASK_K(2)) \ KXNORW(K(3), K(0), K(0)) \ VGATHERPFDPS(0, MEM(RBX,ZMM(4),8,((3*n+2)*16+3*B_L1_PREFETCH_DIST)*64) MASK_K(3)) \ KXNORW(K(4), K(0), K(0)) \ VGATHERPFDPS(0, MEM(RAX,ZMM(4),8,( n *16+ A_L1_PREFETCH_DIST)*64) MASK_K(4)) #define PREFETCH_B_L1_1(...) #define PREFETCH_B_L1_2(...) #define PREFETCH_B_L1_3(...) #define PREFETCH_A_L1(...) #else #undef SCATTER_PREFETCH_AB #define SCATTER_PREFETCH_AB(...) #endif // // n: index in unrolled loop (for prefetching offsets) // // a: ZMM register to load into // b: ZMM register to read from // // ...: addressing for B, except for offset // #define SUBITER(n,a,b,...) \ \ PREFETCH_B_L2(n) \ \ VMOVAPD(ZMM(a), MEM(RAX,(n+1)*64)) \ VFMADD231PD(ZMM( 8), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*24+ 0)*8)) \ VFMADD231PD(ZMM( 9), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*24+ 1)*8)) \ VFMADD231PD(ZMM(10), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*24+ 2)*8)) \ VFMADD231PD(ZMM(11), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*24+ 3)*8)) \ PREFETCH_B_L1_1(n) \ VFMADD231PD(ZMM(12), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*24+ 4)*8)) \ VFMADD231PD(ZMM(13), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*24+ 5)*8)) \ VFMADD231PD(ZMM(14), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*24+ 6)*8)) \ VFMADD231PD(ZMM(15), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*24+ 7)*8)) \ PREFETCH_B_L1_2(n) \ VFMADD231PD(ZMM(16), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*24+ 8)*8)) \ VFMADD231PD(ZMM(17), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*24+ 9)*8)) \ VFMADD231PD(ZMM(18), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*24+10)*8)) \ VFMADD231PD(ZMM(19), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*24+11)*8)) \ PREFETCH_B_L1_3(n) \ VFMADD231PD(ZMM(20), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*24+12)*8)) \ VFMADD231PD(ZMM(21), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*24+13)*8)) \ VFMADD231PD(ZMM(22), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*24+14)*8)) \ VFMADD231PD(ZMM(23), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*24+15)*8)) \ PREFETCH_A_L1(n) \ VFMADD231PD(ZMM(24), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*24+16)*8)) \ VFMADD231PD(ZMM(25), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*24+17)*8)) \ VFMADD231PD(ZMM(26), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*24+18)*8)) \ VFMADD231PD(ZMM(27), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*24+19)*8)) \ PREFETCH_A_L2(n) \ VFMADD231PD(ZMM(28), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*24+20)*8)) \ VFMADD231PD(ZMM(29), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*24+21)*8)) \ VFMADD231PD(ZMM(30), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*24+22)*8)) \ VFMADD231PD(ZMM(31), ZMM(b), MEM_1TO8(__VA_ARGS__,((n%%4)*24+23)*8)) #define TAIL_LOOP(NAME) \ \ LOOP_ALIGN \ LABEL(NAME) \ \ SUBITER(0,1,0,RBX) \ \ VMOVAPD(ZMM(0), ZMM(1)) \ \ LEA(RBX, MEM(RBX,24*8)) \ LEA(RAX, MEM(RAX, 8*8)) \ \ SUB(RDI, IMM(1)) \ \ JNZ(NAME) #define MAIN_LOOP_1(NAME) \ \ LOOP_ALIGN \ LABEL(NAME##_LOOP) \ \ SUBITER(0,1,0,RBX) \ \ VMOVAPD(ZMM(0), ZMM(1)) \ \ LEA(RBX, MEM(RBX,24*8)) \ LEA(RAX, MEM(RAX, 8*8)) \ \ SUB(RSI, IMM(1)) \ \ JNZ(NAME##_LOOP) #define MAIN_LOOP_2(NAME) \ \ MOV(RDI, RSI) \ AND(RDI, IMM(1)) \ SAR1(RSI) \ JZ(NAME##_TAIL) \ \ LOOP_ALIGN \ LABEL(NAME##_LOOP) \ \ SUBITER(0,1,0,RBX) \ SUBITER(1,0,1,RBX) \ \ LEA(RBX, MEM(RBX,2*24*8)) \ LEA(RAX, MEM(RAX,2* 8*8)) \ \ SUB(RSI, IMM(1)) \ \ JNZ(NAME##_LOOP) \ \ TEST(RDI, RDI) \ JZ(NAME##_DONE) \ \ LABEL(NAME##_TAIL) \ \ SUBITER(0,1,0,RBX) \ \ VMOVAPD(ZMM(0), ZMM(1)) \ \ LEA(RBX, MEM(RBX,24*8)) \ LEA(RAX, MEM(RAX, 8*8)) \ \ LABEL(NAME##_DONE) #define MAIN_LOOP_4(NAME) \ \ MOV(RDI, RSI) \ AND(RDI, IMM(3)) \ SAR(RSI, IMM(2)) \ JZ(NAME##_TAIL) \ \ LOOP_ALIGN \ LABEL(NAME##_LOOP) \ \ SUBITER(0,1,0,RBX) \ SUBITER(1,0,1,RBX) \ SUBITER(2,1,0,RBX) \ SUBITER(3,0,1,RBX) \ \ LEA(RBX, MEM(RBX,4*24*8)) \ LEA(RAX, MEM(RAX,4* 8*8)) \ \ SUB(RSI, IMM(1)) \ \ JNZ(NAME##_LOOP) \ \ TEST(RDI, RDI) \ JZ(NAME##_DONE) \ \ TAIL_LOOP(NAME##_TAIL) \ \ LABEL(NAME##_DONE) #define MAIN_LOOP_8(NAME) \ \ MOV(RDI, RSI) \ AND(RDI, IMM(7)) \ SAR(RSI, IMM(3)) \ JZ(NAME##_TAIL) \ \ LOOP_ALIGN \ LABEL(NAME##_LOOP) \ \ SUBITER(0,1,0,RBX) \ SUBITER(1,0,1,RBX) \ SUBITER(2,1,0,RBX) \ SUBITER(3,0,1,RBX) \ SUBITER(4,1,0,RBX,R8,1) \ SUBITER(5,0,1,RBX,R8,1) \ SUBITER(6,1,0,RBX,R8,1) \ SUBITER(7,0,1,RBX,R8,1) \ \ LEA(RBX, MEM(RBX,8*24*8)) \ LEA(RAX, MEM(RAX,8* 8*8)) \ \ SUB(RSI, IMM(1)) \ \ JNZ(NAME##_LOOP) \ \ TEST(RDI, RDI) \ JZ(NAME##_DONE) \ \ TAIL_LOOP(NAME##_TAIL) \ \ LABEL(NAME##_DONE) #define MAIN_LOOP_16(NAME) \ \ MOV(RDI, RSI) \ AND(RDI, IMM(15)) \ SAR(RSI, IMM(4)) \ JZ(NAME##_TAIL) \ \ LOOP_ALIGN \ LABEL(NAME##_LOOP) \ \ SCATTER_PREFETCH_AB(0) \ \ SUBITER( 0,1,0,RBX) \ SUBITER( 1,0,1,RBX) \ SUBITER( 2,1,0,RBX) \ SUBITER( 3,0,1,RBX) \ SUBITER( 4,1,0,RBX,R8,1) \ SUBITER( 5,0,1,RBX,R8,1) \ SUBITER( 6,1,0,RBX,R8,1) \ SUBITER( 7,0,1,RBX,R8,1) \ SUBITER( 8,1,0,RBX,R8,2) \ SUBITER( 9,0,1,RBX,R8,2) \ SUBITER(10,1,0,RBX,R8,2) \ SUBITER(11,0,1,RBX,R8,2) \ SUBITER(12,1,0,RBX,R9,1) \ SUBITER(13,0,1,RBX,R9,1) \ SUBITER(14,1,0,RBX,R9,1) \ SUBITER(15,0,1,RBX,R9,1) \ \ LEA(RBX, MEM(RBX,16*24*8)) \ LEA(RAX, MEM(RAX,16* 8*8)) \ \ SUB(RSI, IMM(1)) \ \ JNZ(NAME##_LOOP) \ \ TEST(RDI, RDI) \ JZ(NAME##_DONE) \ \ SCATTER_PREFETCH_AB(0) \ \ TAIL_LOOP(NAME##_TAIL) \ \ LABEL(NAME##_DONE) #define MAIN_LOOP_32(NAME) \ \ MOV(RDI, RSI) \ AND(RDI, IMM(31)) \ SAR(RSI, IMM(5)) \ JZ(NAME##_TAIL) \ \ LOOP_ALIGN \ LABEL(NAME##_LOOP) \ \ SCATTER_PREFETCH_AB(0) \ \ SUBITER( 0,1,0,RBX) \ SUBITER( 1,0,1,RBX) \ SUBITER( 2,1,0,RBX) \ SUBITER( 3,0,1,RBX) \ SUBITER( 4,1,0,RBX,R8,1) \ SUBITER( 5,0,1,RBX,R8,1) \ SUBITER( 6,1,0,RBX,R8,1) \ SUBITER( 7,0,1,RBX,R8,1) \ SUBITER( 8,1,0,RBX,R8,2) \ SUBITER( 9,0,1,RBX,R8,2) \ SUBITER(10,1,0,RBX,R8,2) \ SUBITER(11,0,1,RBX,R8,2) \ SUBITER(12,1,0,RBX,R9,1) \ SUBITER(13,0,1,RBX,R9,1) \ SUBITER(14,1,0,RBX,R9,1) \ SUBITER(15,0,1,RBX,R9,1) \ \ SCATTER_PREFETCH_AB(1) \ \ SUBITER(16,1,0,RBX,R8,4) \ SUBITER(17,0,1,RBX,R8,4) \ SUBITER(18,1,0,RBX,R8,4) \ SUBITER(19,0,1,RBX,R8,4) \ SUBITER(20,1,0,RBX,R10,1) \ SUBITER(21,0,1,RBX,R10,1) \ SUBITER(22,1,0,RBX,R10,1) \ SUBITER(23,0,1,RBX,R10,1) \ SUBITER(24,1,0,RBX,R9,2) \ SUBITER(25,0,1,RBX,R9,2) \ SUBITER(26,1,0,RBX,R9,2) \ SUBITER(27,0,1,RBX,R9,2) \ SUBITER(28,1,0,RBX,R11,1) \ SUBITER(29,0,1,RBX,R11,1) \ SUBITER(30,1,0,RBX,R11,1) \ SUBITER(31,0,1,RBX,R11,1) \ \ LEA(RBX, MEM(RBX,32*24*8)) \ LEA(RAX, MEM(RAX,32* 8*8)) \ \ SUB(RSI, IMM(1)) \ \ JNZ(NAME##_LOOP) \ \ TEST(RDI, RDI) \ JZ(NAME##_DONE) \ \ SCATTER_PREFETCH_AB(0) \ SCATTER_PREFETCH_AB(1) \ \ TAIL_LOOP(NAME##_TAIL) \ \ LABEL(NAME##_DONE) #define LOOP_K_(M,K) M##K #define LOOP_K(M,K,NAME) LOOP_K_(M,K)(NAME) #define MAIN_LOOP_L2 LOOP_K(MAIN_LOOP_,UNROLL_K,MAIN_LOOP_L2) #define MAIN_LOOP_L1 LOOP_K(MAIN_LOOP_,C_L1_ITERS,MAIN_LOOP_L1) //This is an array used for the scatter/gather instructions. extern int32_t offsets[24]; //#define MONITORS //#define LOOPMON void bli_dgemm_knl_asm_8x24 ( dim_t k, double* restrict alpha, double* restrict a, double* restrict b, double* restrict beta, double* restrict c, inc_t rs_c, inc_t cs_c, auxinfo_t* restrict data, cntx_t* restrict cntx ) { const double * a_next = bli_auxinfo_next_a( data ); const double * b_next = bli_auxinfo_next_b( data ); const int32_t * offsetPtr = &offsets[0]; uint64_t k64 = k; #ifdef MONITORS int toph, topl, both, botl, midl, midh, mid2l, mid2h; #endif #ifdef LOOPMON int tlooph, tloopl, blooph, bloopl; #endif __asm__ volatile ( #ifdef MONITORS RDTSC MOV(VAR(topl), EAX) MOV(VAR(toph), EDX) #endif VPXORD(ZMM(8), ZMM(8), ZMM(8)) //clear out registers VMOVAPS(ZMM( 9), ZMM(8)) VMOVAPS(ZMM(10), ZMM(8)) MOV(RSI, VAR(k)) //loop index VMOVAPS(ZMM(11), ZMM(8)) MOV(RAX, VAR(a)) //load address of a VMOVAPS(ZMM(12), ZMM(8)) MOV(RBX, VAR(b)) //load address of b VMOVAPS(ZMM(13), ZMM(8)) MOV(RCX, VAR(c)) //load address of c VMOVAPS(ZMM(14), ZMM(8)) VMOVAPD(ZMM(0), MEM(RAX)) //pre-load a VMOVAPS(ZMM(15), ZMM(8)) MOV(RDI, VAR(offsetPtr)) VMOVAPS(ZMM(16), ZMM(8)) VMOVAPS(ZMM(4), MEM(RDI)) #if SCATTER_PREFETCH_C VMOVAPS(ZMM(17), ZMM(8)) VMOVAPS(ZMM(18), ZMM(8)) VMOVAPS(ZMM(19), ZMM(8)) VBROADCASTSS(ZMM(5), VAR(cs_c)) VMOVAPS(ZMM(20), ZMM(8)) VMOVAPS(ZMM(21), ZMM(8)) VPMULLD(ZMM(2), ZMM(4), ZMM(5)) VMOVAPS(ZMM(22), ZMM(8)) VMOVAPS(YMM(3), MEM(RDI,64)) VMOVAPS(ZMM(23), ZMM(8)) VPMULLD(YMM(3), YMM(3), YMM(5)) #else VMOVAPS(ZMM(17), ZMM(8)) MOV(R12, VAR(cs_c)) VMOVAPS(ZMM(18), ZMM(8)) LEA(R13, MEM(R12,R12,2)) VMOVAPS(ZMM(19), ZMM(8)) LEA(R14, MEM(R12,R12,4)) VMOVAPS(ZMM(20), ZMM(8)) LEA(R15, MEM(R13,R12,4)) VMOVAPS(ZMM(21), ZMM(8)) LEA(RDX, MEM(RCX,R12,8)) VMOVAPS(ZMM(22), ZMM(8)) LEA(RDI, MEM(RDX,R12,8)) VMOVAPS(ZMM(23), ZMM(8)) #endif VMOVAPS(ZMM(24), ZMM(8)) VPSLLD(ZMM(4), ZMM(4), IMM(3)) VMOVAPS(ZMM(25), ZMM(8)) MOV(R8, IMM(4*24*8)) //offset for 4 iterations VMOVAPS(ZMM(26), ZMM(8)) LEA(R9, MEM(R8,R8,2)) //*3 VMOVAPS(ZMM(27), ZMM(8)) LEA(R10, MEM(R8,R8,4)) //*5 VMOVAPS(ZMM(28), ZMM(8)) LEA(R11, MEM(R9,R8,4)) //*7 VMOVAPS(ZMM(29), ZMM(8)) VMOVAPS(ZMM(30), ZMM(8)) VMOVAPS(ZMM(31), ZMM(8)) #ifdef MONITORS RDTSC MOV(VAR(midl), EAX) MOV(VAR(midh), EDX) #endif //need 0+... to satisfy preprocessor CMP(RSI, IMM(0+C_MIN_L2_ITERS)) JLE(PREFETCH_C_L1) SUB(RSI, IMM(0+C_L1_ITERS)) //prefetch C into L2 #if SCATTER_PREFETCH_C KXNORW(K(1), K(0), K(0)) KXNORW(K(2), K(0), K(0)) VSCATTERPFDPS(1, MEM(RCX,ZMM(2),8) MASK_K(1)) VSCATTERPFDPD(1, MEM(RCX,YMM(3),8) MASK_K(2)) #else PREFETCH(1, MEM(RCX )) PREFETCH(1, MEM(RCX,R12,1)) PREFETCH(1, MEM(RCX,R12,2)) PREFETCH(1, MEM(RCX,R13,1)) PREFETCH(1, MEM(RCX,R12,4)) PREFETCH(1, MEM(RCX,R14,1)) PREFETCH(1, MEM(RCX,R13,2)) PREFETCH(1, MEM(RCX,R15,1)) PREFETCH(1, MEM(RDX )) PREFETCH(1, MEM(RDX,R12,1)) PREFETCH(1, MEM(RDX,R12,2)) PREFETCH(1, MEM(RDX,R13,1)) PREFETCH(1, MEM(RDX,R12,4)) PREFETCH(1, MEM(RDX,R14,1)) PREFETCH(1, MEM(RDX,R13,2)) PREFETCH(1, MEM(RDX,R15,1)) PREFETCH(1, MEM(RDI )) PREFETCH(1, MEM(RDI,R12,1)) PREFETCH(1, MEM(RDI,R12,2)) PREFETCH(1, MEM(RDI,R13,1)) PREFETCH(1, MEM(RDI,R12,4)) PREFETCH(1, MEM(RDI,R14,1)) PREFETCH(1, MEM(RDI,R13,2)) PREFETCH(1, MEM(RDI,R15,1)) #endif MAIN_LOOP_L2 MOV(RSI, IMM(0+C_L1_ITERS)) LABEL(PREFETCH_C_L1) //prefetch C into L1 #if SCATTER_PREFETCH_C KXNORW(K(1), K(0), K(0)) KXNORW(K(2), K(0), K(0)) VSCATTERPFDPS(0, MEM(RCX,ZMM(2),8) MASK_K(1)) VSCATTERPFDPD(0, MEM(RCX,YMM(3),8) MASK_K(2)) #else PREFETCH(0, MEM(RCX )) PREFETCH(0, MEM(RCX,R12,1)) PREFETCH(0, MEM(RCX,R12,2)) PREFETCH(0, MEM(RCX,R13,1)) PREFETCH(0, MEM(RCX,R12,4)) PREFETCH(0, MEM(RCX,R14,1)) PREFETCH(0, MEM(RCX,R13,2)) PREFETCH(0, MEM(RCX,R15,1)) PREFETCH(0, MEM(RDX )) PREFETCH(0, MEM(RDX,R12,1)) PREFETCH(0, MEM(RDX,R12,2)) PREFETCH(0, MEM(RDX,R13,1)) PREFETCH(0, MEM(RDX,R12,4)) PREFETCH(0, MEM(RDX,R14,1)) PREFETCH(0, MEM(RDX,R13,2)) PREFETCH(0, MEM(RDX,R15,1)) PREFETCH(0, MEM(RDI )) PREFETCH(0, MEM(RDI,R12,1)) PREFETCH(0, MEM(RDI,R12,2)) PREFETCH(0, MEM(RDI,R13,1)) PREFETCH(0, MEM(RDI,R12,4)) PREFETCH(0, MEM(RDI,R14,1)) PREFETCH(0, MEM(RDI,R13,2)) PREFETCH(0, MEM(RDI,R15,1)) #endif MAIN_LOOP_L1 LABEL(POSTACCUM) #ifdef MONITORS RDTSC MOV(VAR(mid2l), EAX) MOV(VAR(mid2h), EDX) #endif MOV(RAX, VAR(alpha)) MOV(RBX, VAR(beta)) VBROADCASTSD(ZMM(0), MEM(RAX)) VBROADCASTSD(ZMM(1), MEM(RBX)) // Check if C is column stride. If not, jump to the slow scattered update MOV(RAX, VAR(cs_c)) LEA(RAX, MEM(,RAX,8)) MOV(RBX, VAR(rs_c)) LEA(RDI, MEM(RAX,RAX,2)) CMP(RBX, IMM(1)) JNE(SCATTEREDUPDATE) VMOVQ(RDX, XMM(1)) SAL1(RDX) //shift out sign bit JZ(COLSTORBZ) UPDATE_C_FOUR_ROWS( 8, 9,10,11) UPDATE_C_FOUR_ROWS(12,13,14,15) UPDATE_C_FOUR_ROWS(16,17,18,19) UPDATE_C_FOUR_ROWS(20,21,22,23) UPDATE_C_FOUR_ROWS(24,25,26,27) UPDATE_C_FOUR_ROWS(28,29,30,31) JMP(END) LABEL(COLSTORBZ) UPDATE_C_BZ_FOUR_ROWS( 8, 9,10,11) UPDATE_C_BZ_FOUR_ROWS(12,13,14,15) UPDATE_C_BZ_FOUR_ROWS(16,17,18,19) UPDATE_C_BZ_FOUR_ROWS(20,21,22,23) UPDATE_C_BZ_FOUR_ROWS(24,25,26,27) UPDATE_C_BZ_FOUR_ROWS(28,29,30,31) JMP(END) LABEL(SCATTEREDUPDATE) MOV(RDI, VAR(offsetPtr)) VMOVAPS(ZMM(2), MEM(RDI)) /* Note that this ignores the upper 32 bits in rs_c */ VPBROADCASTD(ZMM(3), EBX) VPMULLD(ZMM(2), ZMM(3), ZMM(2)) VMOVQ(RDX, XMM(1)) SAL1(RDX) //shift out sign bit JZ(SCATTERBZ) UPDATE_C_ROW_SCATTERED( 8) UPDATE_C_ROW_SCATTERED( 9) UPDATE_C_ROW_SCATTERED(10) UPDATE_C_ROW_SCATTERED(11) UPDATE_C_ROW_SCATTERED(12) UPDATE_C_ROW_SCATTERED(13) UPDATE_C_ROW_SCATTERED(14) UPDATE_C_ROW_SCATTERED(15) UPDATE_C_ROW_SCATTERED(16) UPDATE_C_ROW_SCATTERED(17) UPDATE_C_ROW_SCATTERED(18) UPDATE_C_ROW_SCATTERED(19) UPDATE_C_ROW_SCATTERED(20) UPDATE_C_ROW_SCATTERED(21) UPDATE_C_ROW_SCATTERED(22) UPDATE_C_ROW_SCATTERED(23) UPDATE_C_ROW_SCATTERED(24) UPDATE_C_ROW_SCATTERED(25) UPDATE_C_ROW_SCATTERED(26) UPDATE_C_ROW_SCATTERED(27) UPDATE_C_ROW_SCATTERED(28) UPDATE_C_ROW_SCATTERED(29) UPDATE_C_ROW_SCATTERED(30) UPDATE_C_ROW_SCATTERED(31) JMP(END) LABEL(SCATTERBZ) UPDATE_C_BZ_ROW_SCATTERED( 8) UPDATE_C_BZ_ROW_SCATTERED( 9) UPDATE_C_BZ_ROW_SCATTERED(10) UPDATE_C_BZ_ROW_SCATTERED(11) UPDATE_C_BZ_ROW_SCATTERED(12) UPDATE_C_BZ_ROW_SCATTERED(13) UPDATE_C_BZ_ROW_SCATTERED(14) UPDATE_C_BZ_ROW_SCATTERED(15) UPDATE_C_BZ_ROW_SCATTERED(16) UPDATE_C_BZ_ROW_SCATTERED(17) UPDATE_C_BZ_ROW_SCATTERED(18) UPDATE_C_BZ_ROW_SCATTERED(19) UPDATE_C_BZ_ROW_SCATTERED(20) UPDATE_C_BZ_ROW_SCATTERED(21) UPDATE_C_BZ_ROW_SCATTERED(22) UPDATE_C_BZ_ROW_SCATTERED(23) UPDATE_C_BZ_ROW_SCATTERED(24) UPDATE_C_BZ_ROW_SCATTERED(25) UPDATE_C_BZ_ROW_SCATTERED(26) UPDATE_C_BZ_ROW_SCATTERED(27) UPDATE_C_BZ_ROW_SCATTERED(28) UPDATE_C_BZ_ROW_SCATTERED(29) UPDATE_C_BZ_ROW_SCATTERED(30) UPDATE_C_BZ_ROW_SCATTERED(31) LABEL(END) #ifdef MONITORS RDTSC MOV(VAR(botl), EAX) MOV(VAR(both), EDX) #endif : // output operands #ifdef MONITORS [topl] "=m" (topl), [toph] "=m" (toph), [midl] "=m" (midl), [midh] "=m" (midh), [mid2l] "=m" (mid2l), [mid2h] "=m" (mid2h), [botl] "=m" (botl), [both] "=m" (both) #endif : // input operands [k] "m" (k64), [a] "m" (a), [b] "m" (b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c), [a_next] "m" (a_next), [b_next] "m" (b_next), [offsetPtr] "m" (offsetPtr) : // register clobber list "rax", "rbx", "rcx", "rdx", "rdi", "rsi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "zmm0", "zmm1", "zmm2", "zmm3", "zmm4", "zmm5", "zmm6", "zmm7", "zmm8", "zmm9", "zmm10", "zmm11", "zmm12", "zmm13", "zmm14", "zmm15", "zmm16", "zmm17", "zmm18", "zmm19", "zmm20", "zmm21", "zmm22", "zmm23", "zmm24", "zmm25", "zmm26", "zmm27", "zmm28", "zmm29", "zmm30", "zmm31", "memory" ); #ifdef LOOPMON printf("looptime = \t%d\n", bloopl - tloopl); #endif #ifdef MONITORS dim_t top = ((dim_t)toph << 32) | topl; dim_t mid = ((dim_t)midh << 32) | midl; dim_t mid2 = ((dim_t)mid2h << 32) | mid2l; dim_t bot = ((dim_t)both << 32) | botl; printf("setup =\t%u\tmain loop =\t%u\tcleanup=\t%u\ttotal=\t%u\n", mid - top, mid2 - mid, bot - mid2, bot - top); #endif } blis-0.6.1/kernels/knl/3/other/bli_sgemm_knl_asm_30x16_knc.c000066400000000000000000000400051360743507500234740ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include #include "bli_avx512_macros.h" #define A_L1_PREFETCH_DIST 4 #define B_L1_PREFETCH_DIST 2 #define L2_PREFETCH_DIST 16 // Must be greater than 10, because of the way the loop is constructed. //Alternate code path uused if C is not row-major // r9 = c // zmm30 = cs_c * 1...16 // r11 = rs_c // r12 = &alpha // r13 = &beta #define UPDATE_C_ROW_SCATTERED_(NUM,BNZ1,BNZ2) \ \ BNZ1 KXNORW(K(2), K(0), K(0)) BNZ2 \ KXNORW(K(3), K(0), K(0)) \ BNZ1 VGATHERDPS(ZMM(31) MASK_K(2), MEM(R(9),ZMM(30),4)) BNZ2 \ VMULPS(ZMM(NUM), ZMM(NUM), MEM_1TO16(R(12))) /*scale by alpha*/ \ BNZ1 VFMADD231PS(ZMM(NUM), ZMM(31), MEM_1TO16(R(13))) BNZ2 /*scale by beta, add in result*/ \ VSCATTERDPS(MEM(R(9),ZMM(30),4) MASK_K(3), ZMM(NUM)) \ ADD(R(9), R(11)) #define UPDATE_C_ROW_SCATTERED(NUM) UPDATE_C_ROW_SCATTERED_(NUM,,) #define UPDATE_C_BZ_ROW_SCATTERED(NUM) UPDATE_C_ROW_SCATTERED_(NUM,COMMENT_BEGIN,COMMENT_END) // r12 = &alpha // zmm31 = beta // r9 = c // r11 = rs_c // r10 = 3*rs_c // rdi = 4*rs_c #define UPDATE_C_4_ROWS_(R1,R2,R3,R4,BNZ1,BNZ2) \ \ VMULPS(ZMM(R1), ZMM(R1), MEM_1TO16(R(12))) \ VMULPS(ZMM(R2), ZMM(R2), MEM_1TO16(R(12))) \ VMULPS(ZMM(R3), ZMM(R3), MEM_1TO16(R(12))) \ VMULPS(ZMM(R4), ZMM(R4), MEM_1TO16(R(12))) \ BNZ1 VFMADD231PS(ZMM(R1), ZMM(31), MEM(R(9) )) BNZ2 \ BNZ1 VFMADD231PS(ZMM(R2), ZMM(31), MEM(R(9),R(11),1)) BNZ2 \ BNZ1 VFMADD231PS(ZMM(R3), ZMM(31), MEM(R(9),R(11),2)) BNZ2 \ BNZ1 VFMADD231PS(ZMM(R4), ZMM(31), MEM(R(9),R(10),1)) BNZ2 \ VMOVUPS(MEM(R(9) ), ZMM(R1)) \ VMOVUPS(MEM(R(9),R(11),1), ZMM(R2)) \ VMOVUPS(MEM(R(9),R(11),2), ZMM(R3)) \ VMOVUPS(MEM(R(9),R(10),1), ZMM(R4)) \ ADD(R(9), RDI) // r12 = &alpha // zmm31 = beta // r9 = c // r11 = rs_c #define UPDATE_C_2_ROWS_(R1,R2,BNZ1,BNZ2) \ \ VMULPS(ZMM(R1), ZMM(R1), MEM_1TO16(R(12))) \ VMULPS(ZMM(R2), ZMM(R2), MEM_1TO16(R(12))) \ BNZ1 VFMADD231PS(ZMM(R1), ZMM(31), MEM(R(9) )) BNZ2 \ BNZ1 VFMADD231PS(ZMM(R2), ZMM(31), MEM(R(9),R(11),1)) BNZ2 \ VMOVUPS(MEM(R(9) ), ZMM(R1)) \ VMOVUPS(MEM(R(9),R(11),1), ZMM(R2)) #define UPDATE_C_4_ROWS(R1,R2,R3,R4) UPDATE_C_4_ROWS_(R1,R2,R3,R4,,) #define UPDATE_C_2_ROWS(R1,R2) UPDATE_C_2_ROWS_(R1,R2,,) #define UPDATE_C_BZ_4_ROWS(R1,R2,R3,R4) UPDATE_C_4_ROWS_(R1,R2,R3,R4,COMMENT_BEGIN,COMMENT_END) #define UPDATE_C_BZ_2_ROWS(R1,R2) UPDATE_C_2_ROWS_(R1,R2,COMMENT_BEGIN,COMMENT_END) #define A_TIMES_B_ROW(n) VFMADD231PS(ZMM(n), ZMM(31), MEM_1TO16(R(15),n*4)) #define A_TIMES_B_ROW_PREV(n) VFMADD231PS(ZMM(n), ZMM(31), MEM_1TO16(R(15),(n-32)*4)) #define PREFETCH_A_L1(n) PREFETCH(0, MEM(R(15),A_L1_PREFETCH_DIST*4*32+n*64)) #define PREFETCH_A_L2(n) PREFETCH(1, MEM(R(15),R(14),1,n*64)) #define PREFETCH_B_L1 PREFETCH(0, MEM(RBX,B_L1_PREFETCH_DIST*4*16)) #define PREFETCH_B_L2 PREFETCH(1, MEM(RBX,R(13),1)) //One iteration of the k_r loop. //Each iteration, we prefetch A into L1 and into L2 // r15 = a // rbx = b // rcx = c // r11 = rs_c // r13 = L2_PREFETCH_DIST*4*16 // r14 = L2_PREFETCH_DIST*4*32 // r12 = 32*4 = dist. to next sliver of a // r9 = 16*4 = dist. to next sliver of b #define MAIN_LOOP_(COUNTER, PC_L1_1, PC_L1_2, PC_L2_1, PC_L2_2) \ \ /* Can this be pre-loaded for next it. in zmm30? */ \ VMOVAPS(ZMM(31), MEM(RBX)) \ \ A_TIMES_B_ROW ( 0) \ A_TIMES_B_ROW ( 1) PREFETCH_A_L1(0) \ A_TIMES_B_ROW ( 2) PREFETCH_A_L1(1) \ A_TIMES_B_ROW ( 3) PREFETCH_A_L1(2) \ A_TIMES_B_ROW ( 4) PREFETCH_A_L1(3) \ A_TIMES_B_ROW ( 5) PREFETCH_A_L2(0) \ A_TIMES_B_ROW ( 6) PC_L1_1 PREFETCH(0, MEM(RCX)) PC_L1_2 \ A_TIMES_B_ROW ( 7) PC_L1_1 ADD(RCX, R(11)) PC_L1_2 \ A_TIMES_B_ROW ( 8) \ A_TIMES_B_ROW ( 9) PC_L2_1 PREFETCH(1, MEM(RCX)) PC_L2_2 \ A_TIMES_B_ROW (10) PREFETCH_A_L2(1) \ A_TIMES_B_ROW (11) PC_L1_1 PREFETCH(0, MEM(RCX)) PC_L1_2 \ A_TIMES_B_ROW (12) PC_L1_1 ADD(RCX, R(11)) PC_L1_2 \ A_TIMES_B_ROW (13) \ A_TIMES_B_ROW (14) \ A_TIMES_B_ROW (15) PREFETCH_A_L2(2) \ A_TIMES_B_ROW (16) PC_L1_1 PREFETCH(0, MEM(RCX)) PC_L1_2 \ A_TIMES_B_ROW (17) PC_L1_1 ADD(RCX, R(11)) PC_L1_2 \ A_TIMES_B_ROW (18) \ A_TIMES_B_ROW (19) \ A_TIMES_B_ROW (20) PREFETCH_A_L2(3) \ A_TIMES_B_ROW (21) ADD(R(15), R(12)) \ A_TIMES_B_ROW_PREV(22) \ A_TIMES_B_ROW_PREV(23) PC_L2_1 ADD(RCX, R(11)) PC_L2_2 \ A_TIMES_B_ROW_PREV(24) DEC(COUNTER) \ A_TIMES_B_ROW_PREV(25) PREFETCH_B_L2 \ A_TIMES_B_ROW_PREV(26) PREFETCH_B_L1 \ A_TIMES_B_ROW_PREV(27) ADD(RBX, R(9)) \ A_TIMES_B_ROW_PREV(28) CMP(COUNTER, IMM(0)) \ A_TIMES_B_ROW_PREV(29) #define MAIN_LOOP(COUNTER) MAIN_LOOP_(COUNTER,COMMENT_BEGIN,COMMENT_END,COMMENT_BEGIN,COMMENT_END) #define MAIN_LOOP_PC_L1(COUNTER) MAIN_LOOP_(COUNTER,,,COMMENT_BEGIN,COMMENT_END) #define MAIN_LOOP_PC_L2(COUNTER) MAIN_LOOP_(COUNTER,COMMENT_BEGIN,COMMENT_END,,) //This is an array used for the scatter/gather instructions. int32_t offsets[32] __attribute__((aligned(0x1000))) = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}; //#define MONITORS //#define LOOPMON void bli_sgemm_knl_asm_30x16_knc ( dim_t k_, float* restrict alpha, float* restrict a, float* restrict b, float* restrict beta, float* restrict c, inc_t rs_c_, inc_t cs_c_, auxinfo_t* restrict data, cntx_t* restrict cntx ) { (void)data; (void)cntx; const float * a_next = bli_auxinfo_next_a( data ); const float * b_next = bli_auxinfo_next_b( data ); const int32_t * offsetPtr = &offsets[0]; const int64_t k = k_; const int64_t rs_c = rs_c_; const int64_t cs_c = cs_c_; #ifdef MONITORS int toph, topl, both, botl, midl, midh, mid2l, mid2h; #endif #ifdef LOOPMON int tlooph, tloopl, blooph, bloopl; #endif __asm__ volatile ( #ifdef MONITORS RDTSC MOV(VAR(topl), EAX) MOV(VAR(toph), EDX) #endif VPXORD(ZMM(0), ZMM(0), ZMM(0)) //clear out registers VMOVAPS(ZMM( 1), ZMM(0)) VMOVAPS(ZMM( 2), ZMM(0)) MOV(RSI, VAR(k)) //loop index VMOVAPS(ZMM( 3), ZMM(0)) MOV(R(11), VAR(rs_c)) //load row stride VMOVAPS(ZMM( 4), ZMM(0)) SAL(R(11), IMM(2)) //scale row stride VMOVAPS(ZMM( 5), ZMM(0)) MOV(R(15), VAR(a)) //load address of a VMOVAPS(ZMM( 6), ZMM(0)) MOV(RBX, VAR(b)) //load address of b VMOVAPS(ZMM( 7), ZMM(0)) VMOVAPS(ZMM( 8), ZMM(0)) LEA(R(10), MEM(R(11),R(11),2)) //r10 has 3 * r11 VMOVAPS(ZMM( 9), ZMM(0)) VMOVAPS(ZMM(10), ZMM(0)) MOV(RDI, R(11)) VMOVAPS(ZMM(11), ZMM(0)) SAL(RDI, IMM(2)) //rdi has 4*r11 VMOVAPS(ZMM(12), ZMM(0)) MOV(RCX, VAR(c)) //load address of c for prefetching VMOVAPS(ZMM(13), ZMM(0)) VMOVAPS(ZMM(14), ZMM(0)) MOV(R(8), VAR(k)) VMOVAPS(ZMM(15), ZMM(0)) VMOVAPS(ZMM(16), ZMM(0)) VMOVAPS(ZMM(17), ZMM(0)) MOV(R(13), IMM(4*16*L2_PREFETCH_DIST)) VMOVAPS(ZMM(18), ZMM(0)) MOV(R(14), IMM(4*32*L2_PREFETCH_DIST)) VMOVAPS(ZMM(19), ZMM(0)) VMOVAPS(ZMM(20), ZMM(0)) VMOVAPS(ZMM(21), ZMM(0)) VMOVAPS(ZMM(22), ZMM(0)) VMOVAPS(ZMM(23), ZMM(0)) SUB(R(8), IMM(30+L2_PREFETCH_DIST)) //Check if we have over 40 operations to do. VMOVAPS(ZMM(24), ZMM(0)) MOV(R(8), IMM(30)) VMOVAPS(ZMM(25), ZMM(0)) MOV(R(9), IMM(4*16)) //amount to increment b* by each iteration VMOVAPS(ZMM(26), ZMM(0)) MOV(R(12), IMM(4*32)) //amount to increment a* by each iteration VMOVAPS(ZMM(27), ZMM(0)) VMOVAPS(ZMM(28), ZMM(0)) VMOVAPS(ZMM(29), ZMM(0)) #ifdef MONITORS RDTSC MOV(VAR(midl), EAX) MOV(VAR(midh), EDX) #endif JLE(CONSIDER_UNDER_40) SUB(RSI, IMM(30+L2_PREFETCH_DIST)) //First 30 iterations LABEL(LOOPREFECHCL2) MAIN_LOOP_PC_L2(R(8)) JNZ(LOOPREFECHCL2) MOV(RCX, VAR(c)) //Main Loop. LABEL(LOOPMAIN) MAIN_LOOP(RSI) JNZ(LOOPMAIN) //Penultimate 22 iterations. //Break these off from the main loop to avoid prefetching extra shit. MOV(R(14), VAR(a_next)) MOV(R(13), VAR(b_next)) SUB(R(14), R(15)) SUB(R(13), RBX) //Yes, I know 10-20 = -10 MOV(RSI, IMM(10+L2_PREFETCH_DIST-20)) LABEL(LOOPMAIN2) MAIN_LOOP(RSI) JNZ(LOOPMAIN2) //Last 10 iterations MOV(R(8), IMM(10)) LABEL(LOOPREFETCHCL1) MAIN_LOOP_PC_L1(R(8)) JNZ(LOOPREFETCHCL1) JMP(POSTACCUM) //Alternate main loop, with no prefetching of C //Used when <= 40 iterations LABEL(CONSIDER_UNDER_40) MOV(RSI, VAR(k)) TEST(RSI, RSI) JZ(POSTACCUM) LABEL(LOOP_UNDER_40) MAIN_LOOP(RSI) JNZ(LOOP_UNDER_40) LABEL(POSTACCUM) #ifdef MONITORS RDTSC MOV(VAR(mid2l), EAX) MOV(VAR(mid2h), EDX) #endif MOV(R(9), VAR(c)) //load address of c for update MOV(R(12), VAR(alpha)) //load address of alpha // Check if C is row stride. If not, jump to the slow scattered update MOV(R(14), VAR(cs_c)) DEC(R(14)) JNZ(SCATTEREDUPDATE) MOV(R(14), VAR(beta)) VBROADCASTSS(ZMM(31), MEM(R(14))) MOV(EBX, MEM(R(14))) TEST(EBX, EBX) JZ(COLSTORBZ) UPDATE_C_4_ROWS( 0, 1, 2, 3) UPDATE_C_4_ROWS( 4, 5, 6, 7) UPDATE_C_4_ROWS( 8, 9,10,11) UPDATE_C_4_ROWS(12,13,14,15) UPDATE_C_4_ROWS(16,17,18,19) UPDATE_C_4_ROWS(20,21,22,23) UPDATE_C_4_ROWS(24,25,26,27) UPDATE_C_2_ROWS(28,29) JMP(END) LABEL(COLSTORBZ) UPDATE_C_BZ_4_ROWS( 0, 1, 2, 3) UPDATE_C_BZ_4_ROWS( 4, 5, 6, 7) UPDATE_C_BZ_4_ROWS( 8, 9,10,11) UPDATE_C_BZ_4_ROWS(12,13,14,15) UPDATE_C_BZ_4_ROWS(16,17,18,19) UPDATE_C_BZ_4_ROWS(20,21,22,23) UPDATE_C_BZ_4_ROWS(24,25,26,27) UPDATE_C_BZ_2_ROWS(28,29) JMP(END) LABEL(SCATTEREDUPDATE) MOV(R(13), VAR(beta)) MOV(R(10), VAR(offsetPtr)) VMOVAPS(ZMM(30), MEM(R(10))) MOV(EBX, MEM(R(13))) /* Note that this ignores the upper 32 bits in cs_c */ VPBROADCASTD(ZMM(31), VAR(cs_c)) VPMULLD(ZMM(30), ZMM(31), ZMM(30)) TEST(EBX, EBX) JZ(SCATTERBZ) UPDATE_C_ROW_SCATTERED( 0) UPDATE_C_ROW_SCATTERED( 1) UPDATE_C_ROW_SCATTERED( 2) UPDATE_C_ROW_SCATTERED( 3) UPDATE_C_ROW_SCATTERED( 4) UPDATE_C_ROW_SCATTERED( 5) UPDATE_C_ROW_SCATTERED( 6) UPDATE_C_ROW_SCATTERED( 7) UPDATE_C_ROW_SCATTERED( 8) UPDATE_C_ROW_SCATTERED( 9) UPDATE_C_ROW_SCATTERED(10) UPDATE_C_ROW_SCATTERED(11) UPDATE_C_ROW_SCATTERED(12) UPDATE_C_ROW_SCATTERED(13) UPDATE_C_ROW_SCATTERED(14) UPDATE_C_ROW_SCATTERED(15) UPDATE_C_ROW_SCATTERED(16) UPDATE_C_ROW_SCATTERED(17) UPDATE_C_ROW_SCATTERED(18) UPDATE_C_ROW_SCATTERED(19) UPDATE_C_ROW_SCATTERED(20) UPDATE_C_ROW_SCATTERED(21) UPDATE_C_ROW_SCATTERED(22) UPDATE_C_ROW_SCATTERED(23) UPDATE_C_ROW_SCATTERED(24) UPDATE_C_ROW_SCATTERED(25) UPDATE_C_ROW_SCATTERED(26) UPDATE_C_ROW_SCATTERED(27) UPDATE_C_ROW_SCATTERED(28) UPDATE_C_ROW_SCATTERED(29) JMP(END) LABEL(SCATTERBZ) UPDATE_C_BZ_ROW_SCATTERED( 0) UPDATE_C_BZ_ROW_SCATTERED( 1) UPDATE_C_BZ_ROW_SCATTERED( 2) UPDATE_C_BZ_ROW_SCATTERED( 3) UPDATE_C_BZ_ROW_SCATTERED( 4) UPDATE_C_BZ_ROW_SCATTERED( 5) UPDATE_C_BZ_ROW_SCATTERED( 6) UPDATE_C_BZ_ROW_SCATTERED( 7) UPDATE_C_BZ_ROW_SCATTERED( 8) UPDATE_C_BZ_ROW_SCATTERED( 9) UPDATE_C_BZ_ROW_SCATTERED(10) UPDATE_C_BZ_ROW_SCATTERED(11) UPDATE_C_BZ_ROW_SCATTERED(12) UPDATE_C_BZ_ROW_SCATTERED(13) UPDATE_C_BZ_ROW_SCATTERED(14) UPDATE_C_BZ_ROW_SCATTERED(15) UPDATE_C_BZ_ROW_SCATTERED(16) UPDATE_C_BZ_ROW_SCATTERED(17) UPDATE_C_BZ_ROW_SCATTERED(18) UPDATE_C_BZ_ROW_SCATTERED(19) UPDATE_C_BZ_ROW_SCATTERED(20) UPDATE_C_BZ_ROW_SCATTERED(21) UPDATE_C_BZ_ROW_SCATTERED(22) UPDATE_C_BZ_ROW_SCATTERED(23) UPDATE_C_BZ_ROW_SCATTERED(24) UPDATE_C_BZ_ROW_SCATTERED(25) UPDATE_C_BZ_ROW_SCATTERED(26) UPDATE_C_BZ_ROW_SCATTERED(27) UPDATE_C_BZ_ROW_SCATTERED(28) UPDATE_C_BZ_ROW_SCATTERED(29) LABEL(END) #ifdef MONITORS RDTSC MOV(VAR(botl), EAX) MOV(VAR(both), EDX) #endif : // output operands #ifdef MONITORS [topl] "=m" (topl), [toph] "=m" (toph), [midl] "=m" (midl), [midh] "=m" (midh), [mid2l] "=m" (mid2l), [mid2h] "=m" (mid2h), [botl] "=m" (botl), [both] "=m" (both) #endif : // input operands [k] "m" (k), [a] "m" (a), [b] "m" (b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c), [a_next] "m" (a_next), [b_next] "m" (b_next), [offsetPtr] "m" (offsetPtr) : // register clobber list "rax", "rbx", "rcx", "rdx", "rdi", "rsi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "zmm0", "zmm1", "zmm2", "zmm3", "zmm4", "zmm5", "zmm6", "zmm7", "zmm8", "zmm9", "zmm10", "zmm11", "zmm12", "zmm13", "zmm14", "zmm15", "zmm16", "zmm17", "zmm18", "zmm19", "zmm20", "zmm21", "zmm22", "zmm23", "zmm24", "zmm25", "zmm26", "zmm27", "zmm28", "zmm29", "zmm30", "zmm31", "memory" ); #ifdef LOOPMON printf("looptime = \t%d\n", bloopl - tloopl); #endif #ifdef MONITORS dim_t top = ((dim_t)toph << 32) | topl; dim_t mid = ((dim_t)midh << 32) | midl; dim_t mid2 = ((dim_t)mid2h << 32) | mid2l; dim_t bot = ((dim_t)both << 32) | botl; printf("setup =\t%u\tmain loop =\t%u\tcleanup=\t%u\ttotal=\t%u\n", mid - top, mid2 - mid, bot - mid2, bot - top); #endif } blis-0.6.1/kernels/knl/bli_kernels_knl.h000066400000000000000000000042041360743507500202160ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ GEMM_UKR_PROT( double, s, gemm_knl_asm_24x16 ) GEMM_UKR_PROT( double, d, gemm_knl_asm_24x8 ) PACKM_KER_PROT( double, s, packm_knl_asm_24xk ) PACKM_KER_PROT( double, s, packm_knl_asm_16xk ) PACKM_KER_PROT( double, d, packm_knl_asm_24xk ) PACKM_KER_PROT( double, d, packm_knl_asm_8xk ) // unused: GEMM_UKR_PROT( double, d, gemm_knl_asm_12x16 ) GEMM_UKR_PROT( double, d, gemm_knl_asm_30x8 ) GEMM_UKR_PROT( double, d, gemm_knl_asm_8x24 ) PACKM_KER_PROT( double, d, packm_knl_asm_30xk ) blis-0.6.1/kernels/old/000077500000000000000000000000001360743507500147025ustar00rootroot00000000000000blis-0.6.1/kernels/old/c99/000077500000000000000000000000001360743507500153065ustar00rootroot00000000000000blis-0.6.1/kernels/old/c99/3/000077500000000000000000000000001360743507500154505ustar00rootroot00000000000000blis-0.6.1/kernels/old/c99/3/bli_gemm_c99_4x4.c000066400000000000000000000142671360743507500205640ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, kername ) \ \ void PASTEMAC(ch,kername) \ ( \ dim_t k, \ ctype* restrict alpha, \ ctype* restrict a, \ ctype* restrict b, \ ctype* restrict beta, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict data, \ cntx_t* restrict cntx \ ) \ { \ ctype a0; \ ctype a1; \ ctype a2; \ ctype a3; \ \ ctype b0, b1, b2, b3; \ \ ctype ab00, ab01, ab02, ab03; \ ctype ab10, ab11, ab12, ab13; \ ctype ab20, ab21, ab22, ab23; \ ctype ab30, ab31, ab32, ab33; \ \ ctype* c00, * c01, * c02, * c03; \ ctype* c10, * c11, * c12, * c13; \ ctype* c20, * c21, * c22, * c23; \ ctype* c30, * c31, * c32, * c33; \ \ dim_t i; \ \ \ c00 = (c + 0*rs_c + 0*cs_c); \ c10 = (c + 1*rs_c + 0*cs_c); \ c20 = (c + 2*rs_c + 0*cs_c); \ c30 = (c + 3*rs_c + 0*cs_c); \ \ c01 = (c + 0*rs_c + 1*cs_c); \ c11 = (c + 1*rs_c + 1*cs_c); \ c21 = (c + 2*rs_c + 1*cs_c); \ c31 = (c + 3*rs_c + 1*cs_c); \ \ c02 = (c + 0*rs_c + 2*cs_c); \ c12 = (c + 1*rs_c + 2*cs_c); \ c22 = (c + 2*rs_c + 2*cs_c); \ c32 = (c + 3*rs_c + 2*cs_c); \ \ c03 = (c + 0*rs_c + 3*cs_c); \ c13 = (c + 1*rs_c + 3*cs_c); \ c23 = (c + 2*rs_c + 3*cs_c); \ c33 = (c + 3*rs_c + 3*cs_c); \ \ PASTEMAC(ch,set0s)( ab00 ); \ PASTEMAC(ch,set0s)( ab10 ); \ PASTEMAC(ch,set0s)( ab20 ); \ PASTEMAC(ch,set0s)( ab30 ); \ \ PASTEMAC(ch,set0s)( ab01 ); \ PASTEMAC(ch,set0s)( ab11 ); \ PASTEMAC(ch,set0s)( ab21 ); \ PASTEMAC(ch,set0s)( ab31 ); \ \ PASTEMAC(ch,set0s)( ab02 ); \ PASTEMAC(ch,set0s)( ab12 ); \ PASTEMAC(ch,set0s)( ab22 ); \ PASTEMAC(ch,set0s)( ab32 ); \ \ PASTEMAC(ch,set0s)( ab03 ); \ PASTEMAC(ch,set0s)( ab13 ); \ PASTEMAC(ch,set0s)( ab23 ); \ PASTEMAC(ch,set0s)( ab33 ); \ \ for ( i = 0; i < k; ++i ) \ { \ a0 = *(a + 0); \ a1 = *(a + 1); \ a2 = *(a + 2); \ a3 = *(a + 3); \ \ b0 = *(b + 0); \ b1 = *(b + 1); \ b2 = *(b + 2); \ b3 = *(b + 3); \ \ PASTEMAC(ch,dots)( a0, b0, ab00 ); \ PASTEMAC(ch,dots)( a1, b0, ab10 ); \ PASTEMAC(ch,dots)( a2, b0, ab20 ); \ PASTEMAC(ch,dots)( a3, b0, ab30 ); \ \ PASTEMAC(ch,dots)( a0, b1, ab01 ); \ PASTEMAC(ch,dots)( a1, b1, ab11 ); \ PASTEMAC(ch,dots)( a2, b1, ab21 ); \ PASTEMAC(ch,dots)( a3, b1, ab31 ); \ \ PASTEMAC(ch,dots)( a0, b2, ab02 ); \ PASTEMAC(ch,dots)( a1, b2, ab12 ); \ PASTEMAC(ch,dots)( a2, b2, ab22 ); \ PASTEMAC(ch,dots)( a3, b2, ab32 ); \ \ PASTEMAC(ch,dots)( a0, b3, ab03 ); \ PASTEMAC(ch,dots)( a1, b3, ab13 ); \ PASTEMAC(ch,dots)( a2, b3, ab23 ); \ PASTEMAC(ch,dots)( a3, b3, ab33 ); \ \ a += 4; \ b += 4; \ } \ \ if ( PASTEMAC(ch,eq0)( *beta ) ) \ { \ PASTEMAC(ch,set0s)( *c00 ); \ PASTEMAC(ch,set0s)( *c10 ); \ PASTEMAC(ch,set0s)( *c20 ); \ PASTEMAC(ch,set0s)( *c30 ); \ \ PASTEMAC(ch,set0s)( *c01 ); \ PASTEMAC(ch,set0s)( *c11 ); \ PASTEMAC(ch,set0s)( *c21 ); \ PASTEMAC(ch,set0s)( *c31 ); \ \ PASTEMAC(ch,set0s)( *c02 ); \ PASTEMAC(ch,set0s)( *c12 ); \ PASTEMAC(ch,set0s)( *c22 ); \ PASTEMAC(ch,set0s)( *c32 ); \ \ PASTEMAC(ch,set0s)( *c03 ); \ PASTEMAC(ch,set0s)( *c13 ); \ PASTEMAC(ch,set0s)( *c23 ); \ PASTEMAC(ch,set0s)( *c33 ); \ } \ else \ { \ PASTEMAC(ch,scals)( *beta, *c00 ); \ PASTEMAC(ch,scals)( *beta, *c10 ); \ PASTEMAC(ch,scals)( *beta, *c20 ); \ PASTEMAC(ch,scals)( *beta, *c30 ); \ \ PASTEMAC(ch,scals)( *beta, *c01 ); \ PASTEMAC(ch,scals)( *beta, *c11 ); \ PASTEMAC(ch,scals)( *beta, *c21 ); \ PASTEMAC(ch,scals)( *beta, *c31 ); \ \ PASTEMAC(ch,scals)( *beta, *c02 ); \ PASTEMAC(ch,scals)( *beta, *c12 ); \ PASTEMAC(ch,scals)( *beta, *c22 ); \ PASTEMAC(ch,scals)( *beta, *c32 ); \ \ PASTEMAC(ch,scals)( *beta, *c03 ); \ PASTEMAC(ch,scals)( *beta, *c13 ); \ PASTEMAC(ch,scals)( *beta, *c23 ); \ PASTEMAC(ch,scals)( *beta, *c33 ); \ } \ \ PASTEMAC(ch,dots)( *alpha, ab00, *c00 ); \ PASTEMAC(ch,dots)( *alpha, ab10, *c10 ); \ PASTEMAC(ch,dots)( *alpha, ab20, *c20 ); \ PASTEMAC(ch,dots)( *alpha, ab30, *c30 ); \ \ PASTEMAC(ch,dots)( *alpha, ab01, *c01 ); \ PASTEMAC(ch,dots)( *alpha, ab11, *c11 ); \ PASTEMAC(ch,dots)( *alpha, ab21, *c21 ); \ PASTEMAC(ch,dots)( *alpha, ab31, *c31 ); \ \ PASTEMAC(ch,dots)( *alpha, ab02, *c02 ); \ PASTEMAC(ch,dots)( *alpha, ab12, *c12 ); \ PASTEMAC(ch,dots)( *alpha, ab22, *c22 ); \ PASTEMAC(ch,dots)( *alpha, ab32, *c32 ); \ \ PASTEMAC(ch,dots)( *alpha, ab03, *c03 ); \ PASTEMAC(ch,dots)( *alpha, ab13, *c13 ); \ PASTEMAC(ch,dots)( *alpha, ab23, *c23 ); \ PASTEMAC(ch,dots)( *alpha, ab33, *c33 ); \ } INSERT_GENTFUNC_BASIC0( gemm_c99_4x4 ) blis-0.6.1/kernels/old/c99/3/bli_gemmtrsm_l_c99_4x4.c000066400000000000000000000053701360743507500220000ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, varname, gemmkerid, trsmkerid ) \ \ void PASTEMAC(ch,varname) \ ( \ dim_t k, \ ctype* restrict alpha, \ ctype* restrict a10, \ ctype* restrict a11, \ ctype* restrict b01, \ ctype* restrict b11, \ ctype* restrict c11, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict data, \ cntx_t* restrict cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ const inc_t rs_b = 4; \ const inc_t cs_b = 1; \ \ ctype* minus_one = PASTEMAC(ch,m1); \ \ PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, gemmkerid, cntx ); \ PASTECH(ch,trsm_ukr_ft) \ trsm_ukr = bli_cntx_get_l3_ukr_dt( dt, trsmkerid, cntx ); \ \ gemm_ukr \ ( \ k, \ minus_one, \ a10, \ b01, \ alpha, \ b11, rs_b, cs_b, \ data, \ cntx \ ); \ \ trsm_ukr \ ( \ a11, \ b11, \ c11, rs_c, cs_c, \ data, \ cntx \ ); \ } INSERT_GENTFUNC_BASIC2( gemmtrsm_l_c99_4x4, BLIS_GEMM_UKR, BLIS_TRSM_L_UKR ) blis-0.6.1/kernels/old/c99/3/bli_gemmtrsm_u_c99_4x4.c000066400000000000000000000053701360743507500220110ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, varname, gemmkerid, trsmkerid ) \ \ void PASTEMAC(ch,varname) \ ( \ dim_t k, \ ctype* restrict alpha, \ ctype* restrict a12, \ ctype* restrict a11, \ ctype* restrict b21, \ ctype* restrict b11, \ ctype* restrict c11, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict data, \ cntx_t* restrict cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ const inc_t rs_b = 4; \ const inc_t cs_b = 1; \ \ ctype* minus_one = PASTEMAC(ch,m1); \ \ PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, gemmkerid, cntx ); \ PASTECH(ch,trsm_ukr_ft) \ trsm_ukr = bli_cntx_get_l3_ukr_dt( dt, trsmkerid, cntx ); \ \ gemm_ukr \ ( \ k, \ minus_one, \ a12, \ b21, \ alpha, \ b11, rs_b, cs_b, \ data, \ cntx \ ); \ \ trsm_ukr \ ( \ a11, \ b11, \ c11, rs_c, cs_c, \ data, \ cntx \ ); \ } INSERT_GENTFUNC_BASIC2( gemmtrsm_u_c99_4x4, BLIS_GEMM_UKR, BLIS_TRSM_U_UKR ) blis-0.6.1/kernels/old/c99/3/bli_trsm_l_c99_4x4.c000066400000000000000000000135231360743507500211310ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ ctype* restrict a, \ ctype* restrict b, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict data, \ cntx_t* restrict cntx \ ) \ { \ const dim_t rs_a = 1; \ const dim_t cs_a = 4; \ \ const dim_t rs_b = 4; \ const dim_t cs_b = 1; \ \ ctype a00; \ ctype a10, a11; \ ctype a20, a21, a22; \ ctype a30, a31, a32, a33; \ \ ctype b00, b01, b02, b03; \ ctype b10, b11, b12, b13; \ ctype b20, b21, b22, b23; \ ctype b30, b31, b32, b33; \ \ \ /* Load contents of B. */ \ \ b00 = *(b + 0*rs_b + 0*cs_b); \ b01 = *(b + 0*rs_b + 1*cs_b); \ b02 = *(b + 0*rs_b + 2*cs_b); \ b03 = *(b + 0*rs_b + 3*cs_b); \ \ b10 = *(b + 1*rs_b + 0*cs_b); \ b11 = *(b + 1*rs_b + 1*cs_b); \ b12 = *(b + 1*rs_b + 2*cs_b); \ b13 = *(b + 1*rs_b + 3*cs_b); \ \ b20 = *(b + 2*rs_b + 0*cs_b); \ b21 = *(b + 2*rs_b + 1*cs_b); \ b22 = *(b + 2*rs_b + 2*cs_b); \ b23 = *(b + 2*rs_b + 3*cs_b); \ \ b30 = *(b + 3*rs_b + 0*cs_b); \ b31 = *(b + 3*rs_b + 1*cs_b); \ b32 = *(b + 3*rs_b + 2*cs_b); \ b33 = *(b + 3*rs_b + 3*cs_b); \ \ \ /* iteration 0 */ \ \ a00 = *(a + 0*rs_a + 0*cs_a); \ \ PASTEMAC(ch,scals)( a00, b00 ); \ PASTEMAC(ch,scals)( a00, b01 ); \ PASTEMAC(ch,scals)( a00, b02 ); \ PASTEMAC(ch,scals)( a00, b03 ); \ \ *(b + 0*rs_b + 0*cs_b) = b00; \ *(b + 0*rs_b + 1*cs_b) = b01; \ *(b + 0*rs_b + 2*cs_b) = b02; \ *(b + 0*rs_b + 3*cs_b) = b03; \ \ *(c + 0*rs_c + 0*cs_c) = b00; \ *(c + 0*rs_c + 1*cs_c) = b01; \ *(c + 0*rs_c + 2*cs_c) = b02; \ *(c + 0*rs_c + 3*cs_c) = b03; \ \ \ /* iteration 1 */ \ \ a10 = *(a + 1*rs_a + 0*cs_a); \ a11 = *(a + 1*rs_a + 1*cs_a); \ \ PASTEMAC(ch,axmys)( a10, b00, b10 ); \ PASTEMAC(ch,axmys)( a10, b01, b11 ); \ PASTEMAC(ch,axmys)( a10, b02, b12 ); \ PASTEMAC(ch,axmys)( a10, b03, b13 ); \ \ PASTEMAC(ch,scals)( a11, b10 ); \ PASTEMAC(ch,scals)( a11, b11 ); \ PASTEMAC(ch,scals)( a11, b12 ); \ PASTEMAC(ch,scals)( a11, b13 ); \ \ *(b + 1*rs_b + 0*cs_b) = b10; \ *(b + 1*rs_b + 1*cs_b) = b11; \ *(b + 1*rs_b + 2*cs_b) = b12; \ *(b + 1*rs_b + 3*cs_b) = b13; \ \ *(c + 1*rs_c + 0*cs_c) = b10; \ *(c + 1*rs_c + 1*cs_c) = b11; \ *(c + 1*rs_c + 2*cs_c) = b12; \ *(c + 1*rs_c + 3*cs_c) = b13; \ \ \ /* iteration 2 */ \ \ a20 = *(a + 2*rs_a + 0*cs_a); \ a21 = *(a + 2*rs_a + 1*cs_a); \ a22 = *(a + 2*rs_a + 2*cs_a); \ \ PASTEMAC(ch,axmys)( a20, b00, b20 ); \ PASTEMAC(ch,axmys)( a20, b01, b21 ); \ PASTEMAC(ch,axmys)( a20, b02, b22 ); \ PASTEMAC(ch,axmys)( a20, b03, b23 ); \ \ PASTEMAC(ch,axmys)( a21, b10, b20 ); \ PASTEMAC(ch,axmys)( a21, b11, b21 ); \ PASTEMAC(ch,axmys)( a21, b12, b22 ); \ PASTEMAC(ch,axmys)( a21, b13, b23 ); \ \ PASTEMAC(ch,scals)( a22, b20 ); \ PASTEMAC(ch,scals)( a22, b21 ); \ PASTEMAC(ch,scals)( a22, b22 ); \ PASTEMAC(ch,scals)( a22, b23 ); \ \ *(b + 2*rs_b + 0*cs_b) = b20; \ *(b + 2*rs_b + 1*cs_b) = b21; \ *(b + 2*rs_b + 2*cs_b) = b22; \ *(b + 2*rs_b + 3*cs_b) = b23; \ \ *(c + 2*rs_c + 0*cs_c) = b20; \ *(c + 2*rs_c + 1*cs_c) = b21; \ *(c + 2*rs_c + 2*cs_c) = b22; \ *(c + 2*rs_c + 3*cs_c) = b23; \ \ \ /* iteration 3 */ \ \ a30 = *(a + 3*rs_a + 0*cs_a); \ a31 = *(a + 3*rs_a + 1*cs_a); \ a32 = *(a + 3*rs_a + 2*cs_a); \ a33 = *(a + 3*rs_a + 3*cs_a); \ \ PASTEMAC(ch,axmys)( a30, b00, b30 ); \ PASTEMAC(ch,axmys)( a30, b01, b31 ); \ PASTEMAC(ch,axmys)( a30, b02, b32 ); \ PASTEMAC(ch,axmys)( a30, b03, b33 ); \ \ PASTEMAC(ch,axmys)( a31, b10, b30 ); \ PASTEMAC(ch,axmys)( a31, b11, b31 ); \ PASTEMAC(ch,axmys)( a31, b12, b32 ); \ PASTEMAC(ch,axmys)( a31, b13, b33 ); \ \ PASTEMAC(ch,axmys)( a32, b20, b30 ); \ PASTEMAC(ch,axmys)( a32, b21, b31 ); \ PASTEMAC(ch,axmys)( a32, b22, b32 ); \ PASTEMAC(ch,axmys)( a32, b23, b33 ); \ \ PASTEMAC(ch,scals)( a33, b30 ); \ PASTEMAC(ch,scals)( a33, b31 ); \ PASTEMAC(ch,scals)( a33, b32 ); \ PASTEMAC(ch,scals)( a33, b33 ); \ \ *(b + 3*rs_b + 0*cs_b) = b30; \ *(b + 3*rs_b + 1*cs_b) = b31; \ *(b + 3*rs_b + 2*cs_b) = b32; \ *(b + 3*rs_b + 3*cs_b) = b33; \ \ *(c + 3*rs_c + 0*cs_c) = b30; \ *(c + 3*rs_c + 1*cs_c) = b31; \ *(c + 3*rs_c + 2*cs_c) = b32; \ *(c + 3*rs_c + 3*cs_c) = b33; \ } INSERT_GENTFUNC_BASIC0( trsm_l_c99_4x4 ) blis-0.6.1/kernels/old/c99/3/bli_trsm_u_c99_4x4.c000066400000000000000000000135611360743507500211440ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ ctype* restrict a, \ ctype* restrict b, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict data, \ cntx_t* restrict cntx \ ) \ { \ const dim_t rs_a = 1; \ const dim_t cs_a = 4; \ \ const dim_t rs_b = 4; \ const dim_t cs_b = 1; \ \ ctype a00, a01, a02, a03; \ ctype a11, a12, a13; \ ctype a22, a23; \ ctype a33; \ \ ctype b00, b01, b02, b03; \ ctype b10, b11, b12, b13; \ ctype b20, b21, b22, b23; \ ctype b30, b31, b32, b33; \ \ \ /* Load contents of B. */ \ \ b00 = *(b + 0*rs_b + 0*cs_b); \ b01 = *(b + 0*rs_b + 1*cs_b); \ b02 = *(b + 0*rs_b + 2*cs_b); \ b03 = *(b + 0*rs_b + 3*cs_b); \ \ b10 = *(b + 1*rs_b + 0*cs_b); \ b11 = *(b + 1*rs_b + 1*cs_b); \ b12 = *(b + 1*rs_b + 2*cs_b); \ b13 = *(b + 1*rs_b + 3*cs_b); \ \ b20 = *(b + 2*rs_b + 0*cs_b); \ b21 = *(b + 2*rs_b + 1*cs_b); \ b22 = *(b + 2*rs_b + 2*cs_b); \ b23 = *(b + 2*rs_b + 3*cs_b); \ \ b30 = *(b + 3*rs_b + 0*cs_b); \ b31 = *(b + 3*rs_b + 1*cs_b); \ b32 = *(b + 3*rs_b + 2*cs_b); \ b33 = *(b + 3*rs_b + 3*cs_b); \ \ \ /* iteration 0 */ \ \ a33 = *(a + 3*rs_a + 3*cs_a); \ \ PASTEMAC(ch,scals)( a33, b30 ); \ PASTEMAC(ch,scals)( a33, b31 ); \ PASTEMAC(ch,scals)( a33, b32 ); \ PASTEMAC(ch,scals)( a33, b33 ); \ \ *(b + 3*rs_b + 0*cs_b) = b30; \ *(b + 3*rs_b + 1*cs_b) = b31; \ *(b + 3*rs_b + 2*cs_b) = b32; \ *(b + 3*rs_b + 3*cs_b) = b33; \ \ *(c + 3*rs_c + 0*cs_c) = b30; \ *(c + 3*rs_c + 1*cs_c) = b31; \ *(c + 3*rs_c + 2*cs_c) = b32; \ *(c + 3*rs_c + 3*cs_c) = b33; \ \ \ /* iteration 1 */ \ \ a22 = *(a + 2*rs_a + 2*cs_a); \ a23 = *(a + 2*rs_a + 3*cs_a); \ \ PASTEMAC(ch,axmys)( a23, b30, b20 ); \ PASTEMAC(ch,axmys)( a23, b31, b21 ); \ PASTEMAC(ch,axmys)( a23, b32, b22 ); \ PASTEMAC(ch,axmys)( a23, b33, b23 ); \ \ PASTEMAC(ch,scals)( a22, b20 ); \ PASTEMAC(ch,scals)( a22, b21 ); \ PASTEMAC(ch,scals)( a22, b22 ); \ PASTEMAC(ch,scals)( a22, b23 ); \ \ *(b + 2*rs_b + 0*cs_b) = b20; \ *(b + 2*rs_b + 1*cs_b) = b21; \ *(b + 2*rs_b + 2*cs_b) = b22; \ *(b + 2*rs_b + 3*cs_b) = b23; \ \ *(c + 2*rs_c + 0*cs_c) = b20; \ *(c + 2*rs_c + 1*cs_c) = b21; \ *(c + 2*rs_c + 2*cs_c) = b22; \ *(c + 2*rs_c + 3*cs_c) = b23; \ \ \ /* iteration 2 */ \ \ a11 = *(a + 1*rs_a + 1*cs_a); \ a12 = *(a + 1*rs_a + 2*cs_a); \ a13 = *(a + 1*rs_a + 3*cs_a); \ \ PASTEMAC(ch,axmys)( a12, b20, b10 ); \ PASTEMAC(ch,axmys)( a12, b21, b11 ); \ PASTEMAC(ch,axmys)( a12, b22, b12 ); \ PASTEMAC(ch,axmys)( a12, b23, b13 ); \ \ PASTEMAC(ch,axmys)( a13, b30, b10 ); \ PASTEMAC(ch,axmys)( a13, b31, b11 ); \ PASTEMAC(ch,axmys)( a13, b32, b12 ); \ PASTEMAC(ch,axmys)( a13, b33, b13 ); \ \ PASTEMAC(ch,scals)( a11, b10 ); \ PASTEMAC(ch,scals)( a11, b11 ); \ PASTEMAC(ch,scals)( a11, b12 ); \ PASTEMAC(ch,scals)( a11, b13 ); \ \ *(b + 1*rs_b + 0*cs_b) = b10; \ *(b + 1*rs_b + 1*cs_b) = b11; \ *(b + 1*rs_b + 2*cs_b) = b12; \ *(b + 1*rs_b + 3*cs_b) = b13; \ \ *(c + 1*rs_c + 0*cs_c) = b10; \ *(c + 1*rs_c + 1*cs_c) = b11; \ *(c + 1*rs_c + 2*cs_c) = b12; \ *(c + 1*rs_c + 3*cs_c) = b13; \ \ \ /* iteration 3 */ \ \ a00 = *(a + 0*rs_a + 0*cs_a); \ a01 = *(a + 0*rs_a + 1*cs_a); \ a02 = *(a + 0*rs_a + 2*cs_a); \ a03 = *(a + 0*rs_a + 3*cs_a); \ \ PASTEMAC(ch,axmys)( a01, b10, b00 ); \ PASTEMAC(ch,axmys)( a01, b11, b01 ); \ PASTEMAC(ch,axmys)( a01, b12, b02 ); \ PASTEMAC(ch,axmys)( a01, b13, b03 ); \ \ PASTEMAC(ch,axmys)( a02, b20, b00 ); \ PASTEMAC(ch,axmys)( a02, b21, b01 ); \ PASTEMAC(ch,axmys)( a02, b22, b02 ); \ PASTEMAC(ch,axmys)( a02, b23, b03 ); \ \ PASTEMAC(ch,axmys)( a03, b30, b00 ); \ PASTEMAC(ch,axmys)( a03, b31, b01 ); \ PASTEMAC(ch,axmys)( a03, b32, b02 ); \ PASTEMAC(ch,axmys)( a03, b33, b03 ); \ \ PASTEMAC(ch,scals)( a00, b00 ); \ PASTEMAC(ch,scals)( a00, b01 ); \ PASTEMAC(ch,scals)( a00, b02 ); \ PASTEMAC(ch,scals)( a00, b03 ); \ \ *(b + 0*rs_b + 0*cs_b) = b00; \ *(b + 0*rs_b + 1*cs_b) = b01; \ *(b + 0*rs_b + 2*cs_b) = b02; \ *(b + 0*rs_b + 3*cs_b) = b03; \ \ *(c + 0*rs_c + 0*cs_c) = b00; \ *(c + 0*rs_c + 1*cs_c) = b01; \ *(c + 0*rs_c + 2*cs_c) = b02; \ *(c + 0*rs_c + 3*cs_c) = b03; \ } INSERT_GENTFUNC_BASIC0( trsm_u_c99_4x4 ) blis-0.6.1/kernels/old/c99/bli_kernels_c99.h000066400000000000000000000045061360743507500204410ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ GEMM_UKR_PROT( gemm_c99_4x4 ) GEMM_UKR_PROT( gemm_c99_4x4 ) GEMM_UKR_PROT( gemm_c99_4x4 ) GEMM_UKR_PROT( gemm_c99_4x4 ) GEMMTRSM_UKR_PROT( gemmtrsm_l_c99_4x4 ) GEMMTRSM_UKR_PROT( gemmtrsm_l_c99_4x4 ) GEMMTRSM_UKR_PROT( gemmtrsm_l_c99_4x4 ) GEMMTRSM_UKR_PROT( gemmtrsm_l_c99_4x4 ) GEMMTRSM_UKR_PROT( gemmtrsm_u_c99_4x4 ) GEMMTRSM_UKR_PROT( gemmtrsm_u_c99_4x4 ) GEMMTRSM_UKR_PROT( gemmtrsm_u_c99_4x4 ) GEMMTRSM_UKR_PROT( gemmtrsm_u_c99_4x4 ) TRSM_UKR_PROT( trsm_l_c99_4x4 ) TRSM_UKR_PROT( trsm_l_c99_4x4 ) TRSM_UKR_PROT( trsm_l_c99_4x4 ) TRSM_UKR_PROT( trsm_l_c99_4x4 ) TRSM_UKR_PROT( trsm_u_c99_4x4 ) TRSM_UKR_PROT( trsm_u_c99_4x4 ) TRSM_UKR_PROT( trsm_u_c99_4x4 ) TRSM_UKR_PROT( trsm_u_c99_4x4 ) blis-0.6.1/kernels/old/loongson3a/000077500000000000000000000000001360743507500167645ustar00rootroot00000000000000blis-0.6.1/kernels/old/loongson3a/3/000077500000000000000000000000001360743507500171265ustar00rootroot00000000000000blis-0.6.1/kernels/old/loongson3a/3/bli_gemm_loongson3a_opt_d4x4.c000066400000000000000000000550131360743507500247400ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_dgemm_loongson3a_opt_4x4 ( dim_t k, double* restrict alpha, double* restrict a, double* restrict b, double* restrict beta, double* restrict c, inc_t rs_c, inc_t cs_c, auxinfo_t* restrict data, cntx_t* restrict cntx ) { uint64_t k_iter = k / 4; uint64_t k_left = k % 4; __asm__ volatile ( //General purpose registers // //$8=k_iter, $9=k_left //$10=a address, $11=b address //$12=prefetch a, $13=prefetch b //$14=rs_c, $15=cs_c, // //$16=c00 address, $17=c01 address, //$18=c02 address, $19=c03 address, // //Floating-point registers // //$f0=a0, $f1=a1, $f2=a4, $f3=a3 //$f4=next_a0, $f5=next_a1, $f6=next_a2, $f7=next_a3 // //$f8=b0, $f9=b1, $f10=b2, $f11=b3 //$f12=next_b0, $f13=next_b1, $f14=next_b2, $f15=next_b3 // //$f16=a0b0, $f17=a0b1, $f18=a0b2, $f19=a0b3 //$f20=a1b0, $f21=a1b1, $f22=a1b2, $f23=a1b3 //$f24=a2b0, $f25=a2b1, $f26=a2b2, $f27=a2b3 //$f28=a3b0, $f29=a3b1, $f30=a3b2, $f31=a3b3 // "ld $8, %0 \n\t" //load k_iter "dmtc1 $0, $f16 \n\t" //Init "ld $9, %1 \n\t" //load k_left "dmtc1 $0, $f17 \n\t" //Init "ld $14, %7 \n\t" //load rs_c "dmtc1 $0, $f18 \n\t" //Init "ld $15, %8 \n\t" //load cs_c "dmtc1 $0, $f19 \n\t" //Init "ld $16, %6 \n\t" //load c "dmtc1 $0, $f20 \n\t" //Init "ld $10, %2 \n\t" //load a "dmtc1 $0, $f21 \n\t" //Init "ld $11, %3 \n\t" //load b "dmtc1 $0, $f22 \n\t" //Init "dsll $14, $14, 3 \n\t" //rs_c * sizeof(double) "dmtc1 $0, $f23 \n\t" //Init "dsll $15, $15, 3 \n\t" //cs_c * sizeof(double) "dmtc1 $0, $f24 \n\t" //Init "dadd $17, $16, $15 \n\t" //c01 address "ld $12, %9 \n\t" //load kc "dmtc1 $0, $f25 \n\t" //Init "dmtc1 $0, $f26 \n\t" //Init "dadd $18, $17, $15 \n\t" //c02 address "dsll $13, $12, 5 \n\t" //B prefetch distance= next panel B(nr*kc = kc*4*8bytes = kc<<5) "dmtc1 $0, $f27 \n\t" //Init "dmtc1 $0, $f28 \n\t" //Init "dadd $19, $18, $15 \n\t" //c03 address "dsll $12, $12, 4 \n\t" //A prefetch distance= panel A/2(mr*kc/2 = kc*4*8bytes/2 = kc<<4) "dmtc1 $0, $f29 \n\t" //Init "dmtc1 $0, $f30 \n\t" //Init "dadd $13, $11, $13 \n\t" //B prefetch address "ld $0, 0($16) \n\t" //prefetch c00 "dmtc1 $0, $f31 \n\t" //Init "dadd $12, $10, $12 \n\t" //A prefetch address "ld $0, 0($17) \n\t" //prefetch c01 "gsLQC1 $f1, $f0, 0($10) \n\t" //load 2 values from a "gsLQC1 $f9, $f8, 0($11) \n\t" //load 2 values from b "gsLQC1 $f3, $f2, 1*16($10) \n\t" //load 2 values from a "gsLQC1 $f11, $f10, 1*16($11) \n\t" //load 2 values from b "ld $0, 0($18) \n\t" //prefetch c02 "ld $0, 0($19) \n\t" //prefetch c03 "beqz $8, .Remain \n\t" ".align 4 \n\t" ".MainLoop: \n\t" " \n\t" //iteration 0 "daddiu $8, $8, -1 \n\t" //k_iter-- "gsLQC1 $f5, $f4, 2*16($10) \n\t" //load next 2 values from a "madd.d $f16, $f16, $f0, $f8 \n\t" //a0b0 "madd.d $f20, $f20, $f1, $f8 \n\t" //a1b0 " \n\t" "gsLQC1 $f13, $f12, 2*16($11) \n\t" //load next 2 values from b "madd.d $f17, $f17, $f0, $f9 \n\t" //a0b1 "madd.d $f21, $f21, $f1, $f9 \n\t" //a1b1 " \n\t" "gsLQC1 $f7, $f6, 3*16($10) \n\t" //load next 2 values from a "madd.d $f24, $f24, $f2, $f8 \n\t" //a2b0 "madd.d $f28, $f28, $f3, $f8 \n\t" //a3b0 " \n\t" "gsLQC1 $f15, $f14, 3*16($11) \n\t" //load next 2 values from b "madd.d $f25, $f25, $f2, $f9 \n\t" //a2b1 "madd.d $f29, $f29, $f3, $f9 \n\t" //a3b1 " \n\t" "ld $0, 0($13) \n\t" //prefetch B "madd.d $f18, $f18, $f0, $f10 \n\t" //a0b2 "madd.d $f22, $f22, $f1, $f10 \n\t" //a1b2 " \n\t" "madd.d $f19, $f19, $f0, $f11 \n\t" //a0b3 "madd.d $f23, $f23, $f1, $f11 \n\t" //a1b3 " \n\t" "ld $0, 0($12) \n\t" //prefetch A "madd.d $f26, $f26, $f2, $f10 \n\t" //a2b2 "madd.d $f30, $f30, $f3, $f10 \n\t" //a3b2 " \n\t" "madd.d $f27, $f27, $f2, $f11 \n\t" //a2b3 "madd.d $f31, $f31, $f3, $f11 \n\t" //a3b3 " \n\t" //iteration 1 "gsLQC1 $f1, $f0, 4*16($10) \n\t" //load next 2 values from a "madd.d $f16, $f16, $f4, $f12 \n\t" //a0b0 "madd.d $f20, $f20, $f5, $f12 \n\t" //a1b0 " \n\t" "gsLQC1 $f9, $f8, 4*16($11) \n\t" //load next 2 values from b "madd.d $f17, $f17, $f4, $f13 \n\t" //a0b1 "madd.d $f21, $f21, $f5, $f13 \n\t" //a1b1 " \n\t" "gsLQC1 $f3, $f2, 5*16($10) \n\t" //load next 2 values from a "madd.d $f24, $f24, $f6, $f12 \n\t" //a2b0 "madd.d $f28, $f28, $f7, $f12 \n\t" //a3b0 " \n\t" "gsLQC1 $f11, $f10, 5*16($11) \n\t" //load next 2 values from b "madd.d $f25, $f25, $f6, $f13 \n\t" //a2b1 "madd.d $f29, $f29, $f7, $f13 \n\t" //a3b1 " \n\t" "ld $0, 4*8($13) \n\t" //prefetch B "madd.d $f18, $f18, $f4, $f14 \n\t" //a0b2 "madd.d $f22, $f22, $f5, $f14 \n\t" //a1b2 " \n\t" "madd.d $f19, $f19, $f4, $f15 \n\t" //a0b3 "madd.d $f23, $f23, $f5, $f15 \n\t" //a1b3 " \n\t" "ld $0, 4*8($12) \n\t" //prefetch A "madd.d $f26, $f26, $f6, $f14 \n\t" //a2b2 "madd.d $f30, $f30, $f7, $f14 \n\t" //a3b2 " \n\t" "madd.d $f27, $f27, $f6, $f15 \n\t" //a2b3 "madd.d $f31, $f31, $f7, $f15 \n\t" //a3b3 " \n\t" //iteration 2 "gsLQC1 $f5, $f4, 6*16($10) \n\t" //load next 2 values from a "madd.d $f16, $f16, $f0, $f8 \n\t" //a0b0 "madd.d $f20, $f20, $f1, $f8 \n\t" //a1b0 " \n\t" "gsLQC1 $f13, $f12, 6*16($11) \n\t" //load next 2 values from b "madd.d $f17, $f17, $f0, $f9 \n\t" //a0b1 "madd.d $f21, $f21, $f1, $f9 \n\t" //a1b1 " \n\t" "gsLQC1 $f7, $f6, 7*16($10) \n\t" //load next 2 values from a "madd.d $f24, $f24, $f2, $f8 \n\t" //a2b0 "madd.d $f28, $f28, $f3, $f8 \n\t" //a3b0 "daddu $10, $10, 16*8 \n\t" //move A address " \n\t" "gsLQC1 $f15, $f14, 7*16($11) \n\t" //load next 2 values from b "madd.d $f25, $f25, $f2, $f9 \n\t" //a2b1 "madd.d $f29, $f29, $f3, $f9 \n\t" //a3b1 "daddu $11, $11, 16*8 \n\t" //move B address " \n\t" "ld $0, 8*8($13) \n\t" //prefetch B "madd.d $f18, $f18, $f0, $f10 \n\t" //a0b2 "madd.d $f22, $f22, $f1, $f10 \n\t" //a1b2 " \n\t" "madd.d $f19, $f19, $f0, $f11 \n\t" //a0b3 "madd.d $f23, $f23, $f1, $f11 \n\t" //a1b3 " \n\t" "ld $0, 8*8($12) \n\t" //prefetch A "madd.d $f26, $f26, $f2, $f10 \n\t" //a2b2 "madd.d $f30, $f30, $f3, $f10 \n\t" //a3b2 " \n\t" "madd.d $f27, $f27, $f2, $f11 \n\t" //a2b3 "madd.d $f31, $f31, $f3, $f11 \n\t" //a3b3 " \n\t" //iteration 3 "gsLQC1 $f1, $f0, 0($10) \n\t" //load next 2 values from a "madd.d $f16, $f16, $f4, $f12 \n\t" //a0b0 "madd.d $f20, $f20, $f5, $f12 \n\t" //a1b0 " \n\t" "gsLQC1 $f9, $f8, 0($11) \n\t" //load next 2 values from b "madd.d $f17, $f17, $f4, $f13 \n\t" //a0b1 "madd.d $f21, $f21, $f5, $f13 \n\t" //a1b1 " \n\t" "gsLQC1 $f3, $f2, 1*16($10) \n\t" //load next 2 values from a "madd.d $f24, $f24, $f6, $f12 \n\t" //a2b0 "madd.d $f28, $f28, $f7, $f12 \n\t" //a3b0 " \n\t" "gsLQC1 $f11, $f10, 1*16($11) \n\t" //load next 2 values from b "madd.d $f25, $f25, $f6, $f13 \n\t" //a2b1 "madd.d $f29, $f29, $f7, $f13 \n\t" //a3b1 " \n\t" "ld $0, 12*8($13) \n\t" //prefetch B "madd.d $f18, $f18, $f4, $f14 \n\t" //a0b2 "madd.d $f22, $f22, $f5, $f14 \n\t" //a1b2 "daddu $13, $13, 16*8 \n\t" //move prefetch B address " \n\t" "madd.d $f19, $f19, $f4, $f15 \n\t" //a0b3 "madd.d $f23, $f23, $f5, $f15 \n\t" //a1b3 " \n\t" "ld $0, 12*8($12) \n\t" //prefetch A "madd.d $f26, $f26, $f6, $f14 \n\t" //a2b2 "madd.d $f30, $f30, $f7, $f14 \n\t" //a3b2 "daddu $12, $12, 16*8 \n\t" //move prefetch B address " \n\t" "madd.d $f27, $f27, $f6, $f15 \n\t" //a2b3 "madd.d $f31, $f31, $f7, $f15 \n\t" //a3b3 "bnez $8, .MainLoop \n\t" ".align 4 \n\t" ".Remain: \n\t" //deal with the tail. k%4 "beqz $9, .StoreC \n\t" "andi $8, $9, 2 \n\t" "nop \n\t" "nop \n\t" "beqz $8, .Remaink1 \n\t" "nop \n\t" " \n\t" // k%4=2 "gsLQC1 $f5, $f4, 2*16($10) \n\t" //load next 2 values from a "madd.d $f16, $f16, $f0, $f8 \n\t" //a0b0 "madd.d $f20, $f20, $f1, $f8 \n\t" //a1b0 " \n\t" "gsLQC1 $f13, $f12, 2*16($11) \n\t" //load next 2 values from b "madd.d $f17, $f17, $f0, $f9 \n\t" //a0b1 "madd.d $f21, $f21, $f1, $f9 \n\t" //a1b1 " \n\t" "gsLQC1 $f7, $f6, 3*16($10) \n\t" //load next 2 values from a "madd.d $f24, $f24, $f2, $f8 \n\t" //a2b0 "madd.d $f28, $f28, $f3, $f8 \n\t" //a3b0 "daddu $10, $10, 8*8 \n\t" //move A address " \n\t" "gsLQC1 $f15, $f14, 3*16($11) \n\t" //load next 2 values from b "madd.d $f25, $f25, $f2, $f9 \n\t" //a2b1 "madd.d $f29, $f29, $f3, $f9 \n\t" //a3b1 "daddu $11, $11, 8*8 \n\t" //move B address " \n\t" "ld $0, 0($13) \n\t" //prefetch B "madd.d $f18, $f18, $f0, $f10 \n\t" //a0b2 "madd.d $f22, $f22, $f1, $f10 \n\t" //a1b2 " \n\t" "madd.d $f19, $f19, $f0, $f11 \n\t" //a0b3 "madd.d $f23, $f23, $f1, $f11 \n\t" //a1b3 " \n\t" "ld $0, 0($12) \n\t" //prefetch A "madd.d $f26, $f26, $f2, $f10 \n\t" //a2b2 "madd.d $f30, $f30, $f3, $f10 \n\t" //a3b2 " \n\t" "madd.d $f27, $f27, $f2, $f11 \n\t" //a2b3 "madd.d $f31, $f31, $f3, $f11 \n\t" //a3b3 " \n\t" "gsLQC1 $f1, $f0, 0*16($10) \n\t" //load next 2 values from a "madd.d $f16, $f16, $f4, $f12 \n\t" //a0b0 "madd.d $f20, $f20, $f5, $f12 \n\t" //a1b0 " \n\t" "gsLQC1 $f9, $f8, 0*16($11) \n\t" //load next 2 values from b "madd.d $f17, $f17, $f4, $f13 \n\t" //a0b1 "madd.d $f21, $f21, $f5, $f13 \n\t" //a1b1 " \n\t" "gsLQC1 $f3, $f2, 1*16($10) \n\t" //load next 2 values from a "madd.d $f24, $f24, $f6, $f12 \n\t" //a2b0 "madd.d $f28, $f28, $f7, $f12 \n\t" //a3b0 " \n\t" "gsLQC1 $f11, $f10, 1*16($11) \n\t" //load next 2 values from b "madd.d $f25, $f25, $f6, $f13 \n\t" //a2b1 "madd.d $f29, $f29, $f7, $f13 \n\t" //a3b1 " \n\t" "ld $0, 4*8($13) \n\t" //prefetch B "madd.d $f18, $f18, $f4, $f14 \n\t" //a0b2 "madd.d $f22, $f22, $f5, $f14 \n\t" //a1b2 " \n\t" "daddu $13, $13, 8*8 \n\t" "madd.d $f19, $f19, $f4, $f15 \n\t" //a0b3 "madd.d $f23, $f23, $f5, $f15 \n\t" //a1b3 " \n\t" "ld $0, 4*8($12) \n\t" //prefetch A "madd.d $f26, $f26, $f6, $f14 \n\t" //a2b2 "madd.d $f30, $f30, $f7, $f14 \n\t" //a3b2 " \n\t" "daddu $12, $12, 8*8 \n\t" "madd.d $f27, $f27, $f6, $f15 \n\t" //a2b3 "madd.d $f31, $f31, $f7, $f15 \n\t" //a3b3 ".align 4 \n\t" ".Remaink1: \n\t" // k%4=1 "andi $8, $9, 1 \n\t" "beqz $8, .StoreC \n\t" "nop \n\t" " \n\t" "ld $0, 0($13) \n\t" //prefetch B "madd.d $f16, $f16, $f0, $f8 \n\t" //a0b0 "madd.d $f20, $f20, $f1, $f8 \n\t" //a1b0 " \n\t" "madd.d $f17, $f17, $f0, $f9 \n\t" //a0b1 "madd.d $f21, $f21, $f1, $f9 \n\t" //a1b1 " \n\t" "ld $0, 0($12) \n\t" //prefetch A "madd.d $f24, $f24, $f2, $f8 \n\t" //a2b0 "madd.d $f28, $f28, $f3, $f8 \n\t" //a3b0 " \n\t" "madd.d $f25, $f25, $f2, $f9 \n\t" //a2b1 "madd.d $f29, $f29, $f3, $f9 \n\t" //a3b1 " \n\t" "madd.d $f18, $f18, $f0, $f10 \n\t" //a0b2 "madd.d $f22, $f22, $f1, $f10 \n\t" //a1b2 " \n\t" "madd.d $f19, $f19, $f0, $f11 \n\t" //a0b3 "madd.d $f23, $f23, $f1, $f11 \n\t" //a1b3 " \n\t" "madd.d $f26, $f26, $f2, $f10 \n\t" //a2b2 "madd.d $f30, $f30, $f3, $f10 \n\t" //a3b2 " \n\t" "madd.d $f27, $f27, $f2, $f11 \n\t" //a2b3 "madd.d $f31, $f31, $f3, $f11 \n\t" //a3b3 ".align 4 \n\t" ".StoreC: \n\t" //Write C " \n\t" //$f14=alpha, $f15=beta " \n\t" "ld $8, %4 \n\t" //load alpha address "ld $9, %5 \n\t" //load beta address "ldc1 $f14, 0($8) \n\t" //load alpha "ldc1 $f15, 0($9) \n\t" //load beta " \n\t" "ldc1 $f0, 0($16) \n\t" //load c00 "dadd $20, $16, $14 \n\t" "ldc1 $f1, 0($17) \n\t" //load c01 "dadd $21, $17, $14 \n\t" "ldc1 $f2, 0($18) \n\t" //load c02 "dadd $22, $18, $14 \n\t" "ldc1 $f3, 0($19) \n\t" //load c03 "dadd $23, $19, $14 \n\t" " \n\t" "ldc1 $f4, 0($20) \n\t" //load c10 "dadd $8, $20, $14 \n\t" "mul.d $f0, $f0, $f15 \n\t" //c00 * beta "ldc1 $f5, 0($21) \n\t" //load c11 "dadd $9, $21, $14 \n\t" "mul.d $f1, $f1, $f15 \n\t" //c01 * beta "ldc1 $f6, 0($22) \n\t" //load c12 "dadd $10, $22, $14 \n\t" "mul.d $f2, $f2, $f15 \n\t" //c02 * beta "ldc1 $f7, 0($23) \n\t" //load c13 "dadd $11, $23, $14 \n\t" "mul.d $f3, $f3, $f15 \n\t" //c03 * beta " \n\t" "ldc1 $f8, 0($8) \n\t" //load c20 "dadd $12, $8, $14 \n\t" "mul.d $f4, $f4, $f15 \n\t" //c10 * beta "madd.d $f16, $f0, $f16, $f14\n\t" //c00+=alpha*a0b0 "ldc1 $f9, 0($9) \n\t" //load c21 "dadd $13, $9, $14 \n\t" "mul.d $f5, $f5, $f15 \n\t" //c11 * beta "madd.d $f17, $f1, $f17, $f14\n\t" //c01+=alpha*a0b1 "ldc1 $f10, 0($10) \n\t" //load c22 "dadd $24, $10, $14 \n\t" "mul.d $f6, $f6, $f15 \n\t" //c12 * beta "madd.d $f18, $f2, $f18, $f14\n\t" //c02+=alpha*a0b2 "ldc1 $f11, 0($11) \n\t" //load c23 "dadd $25, $11, $14 \n\t" "mul.d $f7, $f7, $f15 \n\t" //c13 * beta "madd.d $f19, $f3, $f19, $f14\n\t" //c03+=alpha*a0b3 " \n\t" "ldc1 $f12, 0($12) \n\t" //load c30 "mul.d $f8, $f8, $f15 \n\t" //c20 * beta "madd.d $f20, $f4, $f20, $f14 \n\t" //c10+=alpha*a1b0 "ldc1 $f13, 0($13) \n\t" //load c31 "mul.d $f9, $f9, $f15 \n\t" //c21 * beta "madd.d $f21, $f5, $f21, $f14 \n\t" //c11+=alpha*a1b1 "ldc1 $f0, 0($24) \n\t" //load c32 "mul.d $f10, $f10, $f15 \n\t" //c22 * beta "madd.d $f22, $f6, $f22, $f14 \n\t" //c12+=alpha*a1b2 "ldc1 $f1, 0($25) \n\t" //load c33 "mul.d $f11, $f11, $f15 \n\t" //c23 * beta "madd.d $f23, $f7, $f23, $f14 \n\t" //c13+=alpha*a1b3 " \n\t" "sdc1 $f16, 0($16) \n\t" //store c00 "mul.d $f12, $f12, $f15 \n\t" //c30 * beta "madd.d $f24, $f8, $f24, $f14 \n\t" //c20+=alpha*a2b0 "sdc1 $f17, 0($17) \n\t" //store c01 "mul.d $f13, $f13, $f15 \n\t" //c31 * beta "madd.d $f25, $f9, $f25, $f14 \n\t" //c21+=alpha*a2b1 "sdc1 $f18, 0($18) \n\t" //store c02 "mul.d $f0, $f0, $f15 \n\t" //c32 * beta "madd.d $f26, $f10, $f26, $f14 \n\t" //c22+=alpha*a2b2 "sdc1 $f19, 0($19) \n\t" //store c03 "mul.d $f1, $f1, $f15 \n\t" //c33 * beta "madd.d $f27, $f11, $f27, $f14 \n\t" //c23+=alpha*a2b3 " \n\t" "sdc1 $f20, 0($20) \n\t" //store c10 "madd.d $f28, $f12, $f28, $f14 \n\t" //c30+=alpha*a3b0 "sdc1 $f21, 0($21) \n\t" //store c11 "madd.d $f29, $f13, $f29, $f14 \n\t" //c31+=alpha*a3b1 "sdc1 $f22, 0($22) \n\t" //store c12 "madd.d $f30, $f0, $f30, $f14 \n\t" //c32+=alpha*a3b2 "sdc1 $f23, 0($23) \n\t" //store c13 "madd.d $f31, $f1, $f31, $f14 \n\t" //c33+=alpha*a3b3 " \n\t" "sdc1 $f24, 0($8) \n\t" //store c20 "sdc1 $f25, 0($9) \n\t" //store c21 "sdc1 $f26, 0($10) \n\t" //store c22 "sdc1 $f27, 0($11) \n\t" //store c23 " \n\t" "sdc1 $f28, 0($12) \n\t" //store c30 "sdc1 $f29, 0($13) \n\t" //store c31 "sdc1 $f30, 0($24) \n\t" //store c32 "sdc1 $f31, 0($25) \n\t" //store c33 " \n\t" ://output operands (none) ://input operands "m" (k_iter), "m" (k_left), "m" (a), "m" (b), "m" (alpha), "m" (beta), "m" (c), "m" (rs_c), "m" (cs_c), "m" (k) ://register clober list //general purpose registers "$8", "$9", "$10", "$11", "$12", "$13", "$14", "$15", "$16", "$17", "$18", "$19", "$20", "$21", "$22", "$23", "$24", "$25", //floating-point registers "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6", "$f7", "$f8", "$f9", "$f10", "$f11", "$f12", "$f13", "$f14", "$f15", "$f16", "$f17", "$f18", "$f19", "$f20", "$f21", "$f22", "$f23", "$f24", "$f25", "$f26", "$f27", "$f28", "$f29", "$f30", "$f31", "memory" ); } blis-0.6.1/kernels/old/nacl/000077500000000000000000000000001360743507500156175ustar00rootroot00000000000000blis-0.6.1/kernels/old/nacl/pnacl/000077500000000000000000000000001360743507500167145ustar00rootroot00000000000000blis-0.6.1/kernels/old/nacl/pnacl/1/000077500000000000000000000000001360743507500170545ustar00rootroot00000000000000blis-0.6.1/kernels/old/nacl/pnacl/1/bli_axpyv_opt.c000066400000000000000000000115241360743507500221020ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #if PPAPI_RELEASE >= 36 typedef float v4sf __attribute__ ((vector_size(16))); inline v4sf v4sf_splat(float x) { return (v4sf) { x, x, x, x }; } inline v4sf v4sf_load(const float* a) { return *((const v4sf*)a); } inline v4sf v4sf_cload(const scomplex* a) { return *((const v4sf*)a); } inline void v4sf_store(float* a, v4sf x) { *((v4sf*)a) = x; } inline void v4sf_cstore(scomplex* a, v4sf x) { *((v4sf*)a) = x; } inline v4sf v4sf_zero() { return (v4sf) { 0.0f, 0.0f, 0.0f, 0.0f }; } #endif void bli_saxpyv_opt( conj_t conjx, dim_t n, float alpha[restrict static 1], float x[restrict static n], inc_t incx, float y[restrict static n], inc_t incy) { if (bli_zero_dim1(n)) { return; } if (bli_seq0(*alpha)) { return; } #if PPAPI_RELEASE >= 36 if (!bli_has_nonunit_inc2(incx, incy)) { const v4sf alphav = v4sf_splat(*alpha); while (n >= 4) { const v4sf xv = v4sf_load(x); v4sf yv = v4sf_load(y); yv += xv * alphav; v4sf_store(y, yv); x += 4; y += 4; n -= 4; } const float alphac = *alpha; while (n--) { (*y++) += (*x++) * alphac; } } #endif /* Just call the reference implementation. */ BLIS_SAXPYV_KERNEL_REF( conjx, n, alpha, x, incx, y, incy); } void bli_caxpyv_opt( conj_t conjx, dim_t n, scomplex alpha[restrict static 1], scomplex x[restrict static n], inc_t incx, scomplex y[restrict static n], inc_t incy) { if (bli_zero_dim1(n)) { return; } if (bli_ceq0(*alpha)) { return; } #if PPAPI_RELEASE >= 36 if (!bli_has_nonunit_inc2(incx, incy)) { if (bli_is_noconj(conjx)) { const v4sf alphav0 = v4sf_splat(alpha->real); const v4sf alphav1 = (v4sf) { -alpha->imag, alpha->imag, -alpha->imag, alpha->imag }; while (n >= 2) { const v4sf xv0 = v4sf_cload(x); v4sf yv = v4sf_cload(y); const v4sf xv1 = __builtin_shufflevector(xv0, xv0, 1, 0, 3, 2); yv += xv0 * alphav0 + xv1 * alphav1; v4sf_cstore(y, yv); x += 2; y += 2; n -= 2; } const float alphar = alpha->real; const float alphai = alpha->imag; while (n--) { const float xr = x->real; const float xi = x->imag; const float yr = y->real; const float yi = y->imag; y->real = yr + xr * alphar - xi * alphai; y->imag = yi + xr * alphai + xi * alphar; x += 1; y += 1; } } else { const v4sf alphav0 = (v4sf) { alpha->real, -alpha->real, alpha->real, -alpha->real }; const v4sf alphav1 = v4sf_splat(alpha->imag); while (n >= 2) { const v4sf xv0 = v4sf_cload(x); v4sf yv = v4sf_cload(y); const v4sf xv1 = __builtin_shufflevector(xv0, xv0, 1, 0, 3, 2); yv += xv0 * alphav0 + xv1 * alphav1; v4sf_cstore(y, yv); x += 2; y += 2; n -= 2; } const float alphar = alpha->real; const float alphai = alpha->imag; while (n--) { const float xr = x->real; const float xi = x->imag; const float yr = y->real; const float yi = y->imag; y->real = yr + xr * alphar + xi * alphai; y->imag = yi + xr * alphai - xi * alphar; x += 1; y += 1; } } } #endif /* Just call the reference implementation. */ BLIS_CAXPYV_KERNEL_REF( conjx, n, alpha, x, incx, y, incy); } blis-0.6.1/kernels/old/nacl/pnacl/1/bli_dotv_opt.c000066400000000000000000000336141360743507500217130ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #if PPAPI_RELEASE >= 36 typedef float v4sf __attribute__ ((vector_size(16))); inline v4sf v4sf_splat(float x) { return (v4sf) { x, x, x, x }; } inline v4sf v4sf_load(const float* a) { return *((const v4sf*)a); } inline v4sf v4sf_cload(const scomplex* a) { return *((const v4sf*)a); } inline void v4sf_store(float* a, v4sf x) { *((v4sf*)a) = x; } inline void v4sf_cstore(scomplex* a, v4sf x) { *((v4sf*)a) = x; } inline v4sf v4sf_zero() { return (v4sf) { 0.0f, 0.0f, 0.0f, 0.0f }; } #endif void bli_sdotv_opt( conj_t conjx, conj_t conjy, dim_t n, float x[restrict static n], inc_t incx, float y[restrict static n], inc_t incy, float rho[restrict static 1]) { #if PPAPI_RELEASE >= 36 // If the vector lengths are zero, set rho to zero and return. if (bli_zero_dim1(n)) { *rho = 0.0f; return; } // If there is anything that would interfere with our use of aligned // vector loads/stores, call the reference implementation. if (bli_has_nonunit_inc2(incx, incy)) { float sum0 = 0.0f, sum1 = 0.0f, sum2 = 0.0f, sum3 = 0.0f, sum4 = 0.0f, sum5 = 0.0f; while (n >= 6) { sum0 += (*x) * (*y); x += incx; y += incy; sum1 += (*x) * (*y); x += incx; y += incy; sum2 += (*x) * (*y); x += incx; y += incy; sum3 += (*x) * (*y); x += incx; y += incy; sum4 += (*x) * (*y); x += incx; y += incy; sum5 += (*x) * (*y); x += incx; y += incy; n -= 6; } float sum = (sum0 + sum1 + sum2) + (sum3 + sum4 + sum5); while (n--) { sum += (*x) * (*y); x += incx; y += incy; } *rho = sum; } else { v4sf vsum0 = v4sf_zero(), vsum1 = v4sf_zero(), vsum2 = v4sf_zero(); v4sf vsum3 = v4sf_zero(), vsum4 = v4sf_zero(), vsum5 = v4sf_zero(); while (n >= 24) { vsum0 += v4sf_load(x) * v4sf_load(y); vsum1 += v4sf_load(x+4) * v4sf_load(y+4); vsum2 += v4sf_load(x+8) * v4sf_load(y+8); vsum3 += v4sf_load(x+12) * v4sf_load(y+12); vsum4 += v4sf_load(x+16) * v4sf_load(y+16); vsum5 += v4sf_load(x+20) * v4sf_load(y+20); x += 24; y += 24; n -= 24; } v4sf vsum = (vsum0 + vsum1 + vsum2) + (vsum3 + vsum4 + vsum5); while (n >= 4) { vsum += v4sf_load(x) * v4sf_load(y); x += 4; y += 4; n -= 4; } float sum = (vsum[0] + vsum[1]) + (vsum[2] + vsum[3]); while (n--) { sum += (*x++) * (*y++); } *rho = sum; } #else float sum0 = 0.0f, sum1 = 0.0f, sum2 = 0.0f, sum3 = 0.0f, sum4 = 0.0f, sum5 = 0.0f; while (n >= 6) { sum0 += (*x) * (*y); x += incx; y += incy; sum1 += (*x) * (*y); x += incx; y += incy; sum2 += (*x) * (*y); x += incx; y += incy; sum3 += (*x) * (*y); x += incx; y += incy; sum4 += (*x) * (*y); x += incx; y += incy; sum5 += (*x) * (*y); x += incx; y += incy; n -= 6; } float sum = (sum0 + sum1 + sum2) + (sum3 + sum4 + sum5); while (n--) { sum += (*x) * (*y); x += incx; y += incy; } *rho = sum; #endif } void bli_ddotv_opt( conj_t conjx, conj_t conjy, dim_t n, double x[restrict static n], inc_t incx, double y[restrict static n], inc_t incy, double rho[restrict static 1]) { double sum0 = 0.0, sum1 = 0.0, sum2 = 0.0, sum3 = 0.0, sum4 = 0.0, sum5 = 0.0; while (n >= 6) { sum0 += (*x) * (*y); x += incx; y += incy; sum1 += (*x) * (*y); x += incx; y += incy; sum2 += (*x) * (*y); x += incx; y += incy; sum3 += (*x) * (*y); x += incx; y += incy; sum4 += (*x) * (*y); x += incx; y += incy; sum5 += (*x) * (*y); x += incx; y += incy; n -= 6; } double sum = (sum0 + sum1 + sum2) + (sum3 + sum4 + sum5); while (n--) { sum += (*x) * (*y); x += incx; y += incy; } *rho = sum; } void bli_cdotv_opt( conj_t conjx, conj_t conjy, dim_t n, scomplex x[restrict static n], inc_t incx, scomplex y[restrict static n], inc_t incy, scomplex rho[restrict static 1]) { if (bli_is_conj(conjy)) { bli_toggle_conj(&conjx); } if (bli_zero_dim1(n)) { rho->real = 0.0f; rho->imag = 0.0f; return; } float sumr; float sumi; #if PPAPI_RELEASE >= 36 if (bli_is_noconj(conjx)) { if (bli_has_nonunit_inc2(incx, incy)) { float sum0r = 0.0f, sum1r = 0.0f; float sum0i = 0.0f, sum1i = 0.0f; while (n >= 2) { const float x0r = x->real; const float x0i = x->imag; const float y0r = y->real; const float y0i = y->imag; sum0r += x0r * y0r - x0i * y0i; sum0i += x0r * y0i + x0i * y0r; x += incx; y += incy; const float x1r = x->real; const float x1i = x->imag; const float y1r = y->real; const float y1i = y->imag; sum1r += x1r * y1r - x1i * y1i; sum1i += x1r * y1i + x1i * y1r; x += incx; y += incy; n -= 2; } sumr = sum0r + sum1r; sumi = sum0i + sum1i; } else { v4sf sumv0r = v4sf_zero(), sumv1r = v4sf_zero(); v4sf sumv0i = v4sf_zero(), sumv1i = v4sf_zero(); while (n >= 8) { const v4sf xv0t = v4sf_cload(x); const v4sf xv0b = v4sf_cload(x+2); const v4sf yv0t = v4sf_cload(y); const v4sf yv0b = v4sf_cload(y+2); const v4sf xv0r = __builtin_shufflevector(xv0t, xv0b, 0, 2, 4, 6); const v4sf xv0i = __builtin_shufflevector(xv0t, xv0b, 1, 3, 5, 7); const v4sf yv0r = __builtin_shufflevector(yv0t, yv0b, 0, 2, 4, 6); const v4sf yv0i = __builtin_shufflevector(yv0t, yv0b, 1, 3, 5, 7); sumv0r += xv0r * yv0r - xv0i * yv0i; sumv0i += xv0r * yv0i + xv0i * yv0r; const v4sf xv1t = v4sf_cload(x+4); const v4sf xv1b = v4sf_cload(x+6); const v4sf yv1t = v4sf_cload(y+4); const v4sf yv1b = v4sf_cload(y+6); const v4sf xv1r = __builtin_shufflevector(xv1t, xv1b, 0, 2, 4, 6); const v4sf xv1i = __builtin_shufflevector(xv1t, xv1b, 1, 3, 5, 7); const v4sf yv1r = __builtin_shufflevector(yv1t, yv1b, 0, 2, 4, 6); const v4sf yv1i = __builtin_shufflevector(yv1t, yv1b, 1, 3, 5, 7); sumv1r += xv1r * yv1r - xv1i * yv1i; sumv1i += xv1r * yv1i + xv1i * yv1r; x += 8; y += 8; n -= 8; } const v4sf sumvr = sumv0r + sumv1r; const v4sf sumvi = sumv0i + sumv1i; sumr = (sumvr[0] + sumvr[1]) + (sumvr[2] + sumvr[3]); sumi = (sumvi[0] + sumvi[1]) + (sumvi[2] + sumvi[3]); } while (n--) { const float xr = x->real; const float xi = x->imag; const float yr = y->real; const float yi = y->imag; sumr += xr * yr - xi * yi; sumi += xr * yi + xi * yr; x += incx; y += incy; } } else { if (bli_has_nonunit_inc2(incx, incy)) { float sum0r = 0.0f, sum1r = 0.0f; float sum0i = 0.0f, sum1i = 0.0f; while (n >= 2) { const float x0r = x->real; const float x0i = x->imag; const float y0r = y->real; const float y0i = y->imag; sum0r += x0r * y0r + x0i * y0i; sum0i += x0r * y0i - x0i * y0r; x += incx; y += incy; const float x1r = x->real; const float x1i = x->imag; const float y1r = y->real; const float y1i = y->imag; sum1r += x1r * y1r + x1i * y1i; sum1i += x1r * y1i - x1i * y1r; x += incx; y += incy; n -= 2; } sumr = sum0r + sum1r; sumi = sum0i + sum1i; } else { v4sf sumv0r = v4sf_zero(), sumv1r = v4sf_zero(); v4sf sumv0i = v4sf_zero(), sumv1i = v4sf_zero(); while (n >= 8) { const v4sf xv0t = v4sf_cload(x); const v4sf xv0b = v4sf_cload(x+2); const v4sf yv0t = v4sf_cload(y); const v4sf yv0b = v4sf_cload(y+2); const v4sf xv0r = __builtin_shufflevector(xv0t, xv0b, 0, 2, 4, 6); const v4sf xv0i = __builtin_shufflevector(xv0t, xv0b, 1, 3, 5, 7); const v4sf yv0r = __builtin_shufflevector(yv0t, yv0b, 0, 2, 4, 6); const v4sf yv0i = __builtin_shufflevector(yv0t, yv0b, 1, 3, 5, 7); sumv0r += xv0r * yv0r + xv0i * yv0i; sumv0i += xv0r * yv0i - xv0i * yv0r; const v4sf xv1t = v4sf_cload(x+4); const v4sf xv1b = v4sf_cload(x+6); const v4sf yv1t = v4sf_cload(y+4); const v4sf yv1b = v4sf_cload(y+6); const v4sf xv1r = __builtin_shufflevector(xv1t, xv1b, 0, 2, 4, 6); const v4sf xv1i = __builtin_shufflevector(xv1t, xv1b, 1, 3, 5, 7); const v4sf yv1r = __builtin_shufflevector(yv1t, yv1b, 0, 2, 4, 6); const v4sf yv1i = __builtin_shufflevector(yv1t, yv1b, 1, 3, 5, 7); sumv1r += xv1r * yv1r + xv1i * yv1i; sumv1i += xv1r * yv1i - xv1i * yv1r; x += 8; y += 8; n -= 8; } const v4sf sumvr = sumv0r + sumv1r; const v4sf sumvi = sumv0i + sumv1i; sumr = (sumvr[0] + sumvr[1]) + (sumvr[2] + sumvr[3]); sumi = (sumvi[0] + sumvi[1]) + (sumvi[2] + sumvi[3]); } while (n--) { const float xr = x->real; const float xi = x->imag; const float yr = y->real; const float yi = y->imag; sumr += xr * yr + xi * yi; sumi += xr * yi - xi * yr; x += incx; y += incy; } } #else if (bli_is_noconj(conjx)) { float sum0r = 0.0f, sum1r = 0.0f; float sum0i = 0.0f, sum1i = 0.0f; while (n >= 2) { const float x0r = x->real; const float x0i = x->imag; const float y0r = y->real; const float y0i = y->imag; sum0r += x0r * y0r - x0i * y0i; sum0i += x0r * y0i + x0i * y0r; x += incx; y += incy; const float x1r = x->real; const float x1i = x->imag; const float y1r = y->real; const float y1i = y->imag; sum1r += x1r * y1r - x1i * y1i; sum1i += x1r * y1i + x1i * y1r; x += incx; y += incy; n -= 2; } sumr = sum0r + sum1r; sumi = sum0i + sum1i; if (n != 0) { const float xr = x->real; const float xi = x->imag; const float yr = y->real; const float yi = y->imag; sumr += xr * yr - xi * yi; sumi += xr * yi + xi * yr; } } else { float sum0r = 0.0f, sum1r = 0.0f; float sum0i = 0.0f, sum1i = 0.0f; while (n >= 2) { const float x0r = x->real; const float x0i = x->imag; const float y0r = y->real; const float y0i = y->imag; sum0r += x0r * y0r + x0i * y0i; sum0i += x0r * y0i - x0i * y0r; x += incx; y += incy; const float x1r = x->real; const float x1i = x->imag; const float y1r = y->real; const float y1i = y->imag; sum1r += x1r * y1r + x1i * y1i; sum1i += x1r * y1i - x1i * y1r; x += incx; y += incy; n -= 2; } sumr = sum0r + sum1r; sumi = sum0i + sum1i; if (n != 0) { const float xr = x->real; const float xi = x->imag; const float yr = y->real; const float yi = y->imag; sumr += xr * yr + xi * yi; sumi += xr * yi - xi * yr; } } #endif rho->real = sumr; rho->imag = bli_is_conj(conjy) ? -sumi : sumi; } void bli_zdotv_opt( conj_t conjx, conj_t conjy, dim_t n, dcomplex x[restrict static n], inc_t incx, dcomplex y[restrict static n], inc_t incy, dcomplex rho[restrict static 1]) { if (bli_is_conj(conjy)) { bli_toggle_conj(&conjx); } if (bli_zero_dim1(n)) { rho->real = 0.0; rho->imag = 0.0; return; } double sumr; double sumi; if (bli_is_noconj(conjx)) { double sum0r = 0.0, sum1r = 0.0; double sum0i = 0.0, sum1i = 0.0; while (n >= 2) { const double x0r = x->real; const double x0i = x->imag; const double y0r = y->real; const double y0i = y->imag; sum0r += x0r * y0r - x0i * y0i; sum0i += x0r * y0i + x0i * y0r; x += incx; y += incy; const double x1r = x->real; const double x1i = x->imag; const double y1r = y->real; const double y1i = y->imag; sum1r += x1r * y1r - x1i * y1i; sum1i += x1r * y1i + x1i * y1r; x += incx; y += incy; n -= 2; } sumr = sum0r + sum1r; sumi = sum0i + sum1i; if (n != 0) { const double xr = x->real; const double xi = x->imag; const double yr = y->real; const double yi = y->imag; sumr += xr * yr - xi * yi; sumi += xr * yi + xi * yr; } } else { double sum0r = 0.0, sum1r = 0.0; double sum0i = 0.0, sum1i = 0.0; while (n >= 2) { const double x0r = x->real; const double x0i = x->imag; const double y0r = y->real; const double y0i = y->imag; sum0r += x0r * y0r + x0i * y0i; sum0i += x0r * y0i - x0i * y0r; x += incx; y += incy; const double x1r = x->real; const double x1i = x->imag; const double y1r = y->real; const double y1i = y->imag; sum1r += x1r * y1r + x1i * y1i; sum1i += x1r * y1i - x1i * y1r; x += incx; y += incy; n -= 2; } sumr = sum0r + sum1r; sumi = sum0i + sum1i; if (n != 0) { const double xr = x->real; const double xi = x->imag; const double yr = y->real; const double yi = y->imag; sumr += xr * yr + xi * yi; sumi += xr * yi - xi * yr; } } rho->real = sumr; rho->imag = bli_is_conj(conjy) ? -sumi : sumi; } blis-0.6.1/kernels/old/nacl/pnacl/3/000077500000000000000000000000001360743507500170565ustar00rootroot00000000000000blis-0.6.1/kernels/old/nacl/pnacl/3/bli_gemm_opt.c000066400000000000000000000335631360743507500216710ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #if PPAPI_RELEASE >= 36 typedef float v4sf __attribute__ ((vector_size(16))); inline v4sf v4sf_splat(float x) { return (v4sf) { x, x, x, x }; } inline v4sf v4sf_load(const float* a) { return *((const v4sf*)a); } inline v4sf v4sf_cload(const scomplex* a) { return *((const v4sf*)a); } inline void v4sf_store(float* a, v4sf x) { *((v4sf*)a) = x; } inline void v4sf_cstore(scomplex* a, v4sf x) { *((v4sf*)a) = x; } inline v4sf v4sf_zero() { return (v4sf) { 0.0f, 0.0f, 0.0f, 0.0f }; } void bli_sgemm_opt ( dim_t k, float alpha[restrict static 1], float a[restrict static 8*k], float b[restrict static k*4], float beta[restrict static 1], float c[restrict static 8*4], inc_t rs_c, inc_t cs_c, auxinfo_t* data, cntx_t* cntx ) { // Vectors for accummulating column 0, 1, 2, 3 (initialize to 0.0) v4sf abv0t = v4sf_zero(), abv1t = v4sf_zero(), abv2t = v4sf_zero(), abv3t = v4sf_zero(); v4sf abv0b = v4sf_zero(), abv1b = v4sf_zero(), abv2b = v4sf_zero(), abv3b = v4sf_zero(); for (dim_t i = 0; i < k; i += 1) { const v4sf avt = v4sf_load(a); const v4sf avb = v4sf_load(a+4); const v4sf bv_xxxx = v4sf_splat(b[0]); abv0t += avt * bv_xxxx; abv0b += avb * bv_xxxx; const v4sf bv_yyyy = v4sf_splat(b[1]); abv1t += avt * bv_yyyy; abv1b += avb * bv_yyyy; const v4sf bv_zzzz = v4sf_splat(b[2]); abv2t += avt * bv_zzzz; abv2b += avb * bv_zzzz; const v4sf bv_wwww = v4sf_splat(b[3]); abv3t += avt * bv_wwww; abv3b += avb * bv_wwww; a += 8; b += 4; } const v4sf alphav = v4sf_splat(*alpha); abv0t *= alphav; abv0b *= alphav; abv1t *= alphav; abv1b *= alphav; abv2t *= alphav; abv2b *= alphav; abv3t *= alphav; abv3b *= alphav; if (rs_c == 1) { v4sf cv0t = v4sf_load(&c[0*rs_c + 0*cs_c]); v4sf cv1t = v4sf_load(&c[0*rs_c + 1*cs_c]); v4sf cv2t = v4sf_load(&c[0*rs_c + 2*cs_c]); v4sf cv3t = v4sf_load(&c[0*rs_c + 3*cs_c]); v4sf cv0b = v4sf_load(&c[4*rs_c + 0*cs_c]); v4sf cv1b = v4sf_load(&c[4*rs_c + 1*cs_c]); v4sf cv2b = v4sf_load(&c[4*rs_c + 2*cs_c]); v4sf cv3b = v4sf_load(&c[4*rs_c + 3*cs_c]); const v4sf betav = v4sf_splat(*beta); cv0t = cv0t * betav + abv0t; cv1t = cv1t * betav + abv1t; cv2t = cv2t * betav + abv2t; cv3t = cv3t * betav + abv3t; cv0b = cv0b * betav + abv0b; cv1b = cv1b * betav + abv1b; cv2b = cv2b * betav + abv2b; cv3b = cv3b * betav + abv3b; v4sf_store(&c[0*rs_c + 0*cs_c], cv0t); v4sf_store(&c[0*rs_c + 1*cs_c], cv1t); v4sf_store(&c[0*rs_c + 2*cs_c], cv2t); v4sf_store(&c[0*rs_c + 3*cs_c], cv3t); v4sf_store(&c[4*rs_c + 0*cs_c], cv0b); v4sf_store(&c[4*rs_c + 1*cs_c], cv1b); v4sf_store(&c[4*rs_c + 2*cs_c], cv2b); v4sf_store(&c[4*rs_c + 3*cs_c], cv3b); } else { // Load columns 0, 1, 2, 3 (top part) v4sf cv0t = (v4sf){ c[0*rs_c + 0*cs_c], c[1*rs_c + 0*cs_c], c[2*rs_c + 0*cs_c], c[3*rs_c + 0*cs_c] }; v4sf cv1t = (v4sf){ c[0*rs_c + 1*cs_c], c[1*rs_c + 1*cs_c], c[2*rs_c + 1*cs_c], c[3*rs_c + 1*cs_c] }; v4sf cv2t = (v4sf){ c[0*rs_c + 2*cs_c], c[1*rs_c + 2*cs_c], c[2*rs_c + 2*cs_c], c[3*rs_c + 2*cs_c] }; v4sf cv3t = (v4sf){ c[0*rs_c + 3*cs_c], c[1*rs_c + 3*cs_c], c[2*rs_c + 3*cs_c], c[3*rs_c + 3*cs_c] }; // Load columns 0, 1, 2, 3 (bottom part) v4sf cv0b = (v4sf){ c[4*rs_c + 0*cs_c], c[5*rs_c + 0*cs_c], c[6*rs_c + 0*cs_c], c[7*rs_c + 0*cs_c] }; v4sf cv1b = (v4sf){ c[4*rs_c + 1*cs_c], c[5*rs_c + 1*cs_c], c[6*rs_c + 1*cs_c], c[7*rs_c + 1*cs_c] }; v4sf cv2b = (v4sf){ c[4*rs_c + 2*cs_c], c[5*rs_c + 2*cs_c], c[6*rs_c + 2*cs_c], c[7*rs_c + 2*cs_c] }; v4sf cv3b = (v4sf){ c[4*rs_c + 3*cs_c], c[5*rs_c + 3*cs_c], c[6*rs_c + 3*cs_c], c[7*rs_c + 3*cs_c] }; const v4sf betav = v4sf_splat(*beta); cv0t = cv0t * betav + abv0t; cv1t = cv1t * betav + abv1t; cv2t = cv2t * betav + abv2t; cv3t = cv3t * betav + abv3t; cv0b = cv0b * betav + abv0b; cv1b = cv1b * betav + abv1b; cv2b = cv2b * betav + abv2b; cv3b = cv3b * betav + abv3b; // Store column 0 c[0*rs_c + 0*cs_c] = cv0t[0]; c[1*rs_c + 0*cs_c] = cv0t[1]; c[2*rs_c + 0*cs_c] = cv0t[2]; c[3*rs_c + 0*cs_c] = cv0t[3]; c[4*rs_c + 0*cs_c] = cv0b[0]; c[5*rs_c + 0*cs_c] = cv0b[1]; c[6*rs_c + 0*cs_c] = cv0b[2]; c[7*rs_c + 0*cs_c] = cv0b[3]; // Store column 1 c[0*rs_c + 1*cs_c] = cv1t[0]; c[1*rs_c + 1*cs_c] = cv1t[1]; c[2*rs_c + 1*cs_c] = cv1t[2]; c[3*rs_c + 1*cs_c] = cv1t[3]; c[4*rs_c + 1*cs_c] = cv1b[0]; c[5*rs_c + 1*cs_c] = cv1b[1]; c[6*rs_c + 1*cs_c] = cv1b[2]; c[7*rs_c + 1*cs_c] = cv1b[3]; // Store column 2 c[0*rs_c + 2*cs_c] = cv2t[0]; c[1*rs_c + 2*cs_c] = cv2t[1]; c[2*rs_c + 2*cs_c] = cv2t[2]; c[3*rs_c + 2*cs_c] = cv2t[3]; c[4*rs_c + 2*cs_c] = cv2b[0]; c[5*rs_c + 2*cs_c] = cv2b[1]; c[6*rs_c + 2*cs_c] = cv2b[2]; c[7*rs_c + 2*cs_c] = cv2b[3]; // Store column 3 c[0*rs_c + 3*cs_c] = cv3t[0]; c[1*rs_c + 3*cs_c] = cv3t[1]; c[2*rs_c + 3*cs_c] = cv3t[2]; c[3*rs_c + 3*cs_c] = cv3t[3]; c[4*rs_c + 3*cs_c] = cv3b[0]; c[5*rs_c + 3*cs_c] = cv3b[1]; c[6*rs_c + 3*cs_c] = cv3b[2]; c[7*rs_c + 3*cs_c] = cv3b[3]; } } void bli_cgemm_opt ( dim_t k, scomplex alpha[restrict static 1], scomplex a[restrict static 4*k], scomplex b[restrict static k*4], scomplex beta[restrict static 1], scomplex c[restrict static 4*4], inc_t rs_c, inc_t cs_c, auxinfo_t* data, cntx_t* cntx ) { // Vectors for accummulating column 0, 1, 2, 3 (initialize to 0.0) v4sf abv0r = v4sf_zero(), abv1r = v4sf_zero(), abv2r = v4sf_zero(), abv3r = v4sf_zero(); v4sf abv0i = v4sf_zero(), abv1i = v4sf_zero(), abv2i = v4sf_zero(), abv3i = v4sf_zero(); for (dim_t i = 0; i < k; i += 1) { const v4sf avt = v4sf_cload(a); const v4sf avb = v4sf_cload(a+2); const v4sf avr = __builtin_shufflevector(avt, avb, 0, 2, 4, 6); const v4sf avi = __builtin_shufflevector(avt, avb, 1, 3, 5, 7); const v4sf bv0r = v4sf_splat(b[0].real); const v4sf bv0i = v4sf_splat(b[0].imag); abv0r += avr * bv0r - avi * bv0i; abv0i += avr * bv0i + avi * bv0r; const v4sf bv1r = v4sf_splat(b[1].real); const v4sf bv1i = v4sf_splat(b[1].imag); abv1r += avr * bv1r - avi * bv1i; abv1i += avr * bv1i + avi * bv1r; const v4sf bv2r = v4sf_splat(b[2].real); const v4sf bv2i = v4sf_splat(b[2].imag); abv2r += avr * bv2r - avi * bv2i; abv2i += avr * bv2i + avi * bv2r; const v4sf bv3r = v4sf_splat(b[3].real); const v4sf bv3i = v4sf_splat(b[3].imag); abv3r += avr * bv3r - avi * bv3i; abv3i += avr * bv3i + avi * bv3r; a += 4; b += 4; } const v4sf alphavr = v4sf_splat(alpha->real); const v4sf alphavi = v4sf_splat(alpha->imag); v4sf temp; temp = abv0r * alphavr - abv0i * alphavi; abv0i = abv0r * alphavi + abv0i * alphavr; abv0r = temp; temp = abv1r * alphavr - abv1i * alphavi; abv1i = abv1r * alphavi + abv1i * alphavr; abv1r = temp; temp = abv2r * alphavr - abv2i * alphavi; abv2i = abv2r * alphavi + abv2i * alphavr; abv2r = temp; temp = abv3r * alphavr - abv3i * alphavi; abv3i = abv3r * alphavi + abv3i * alphavr; abv3r = temp; if (rs_c == 1) { const v4sf cv0t = v4sf_cload(&c[0*rs_c + 0*cs_c]); const v4sf cv1t = v4sf_cload(&c[0*rs_c + 1*cs_c]); const v4sf cv2t = v4sf_cload(&c[0*rs_c + 2*cs_c]); const v4sf cv3t = v4sf_cload(&c[0*rs_c + 3*cs_c]); const v4sf cv0b = v4sf_cload(&c[2*rs_c + 0*cs_c]); const v4sf cv1b = v4sf_cload(&c[2*rs_c + 1*cs_c]); const v4sf cv2b = v4sf_cload(&c[2*rs_c + 2*cs_c]); const v4sf cv3b = v4sf_cload(&c[2*rs_c + 3*cs_c]); v4sf cv0r = __builtin_shufflevector(cv0t, cv0b, 0, 2, 4, 6); v4sf cv0i = __builtin_shufflevector(cv0t, cv0b, 1, 3, 5, 7); v4sf cv1r = __builtin_shufflevector(cv1t, cv1b, 0, 2, 4, 6); v4sf cv1i = __builtin_shufflevector(cv1t, cv1b, 1, 3, 5, 7); v4sf cv2r = __builtin_shufflevector(cv2t, cv2b, 0, 2, 4, 6); v4sf cv2i = __builtin_shufflevector(cv2t, cv2b, 1, 3, 5, 7); v4sf cv3r = __builtin_shufflevector(cv3t, cv3b, 0, 2, 4, 6); v4sf cv3i = __builtin_shufflevector(cv3t, cv3b, 1, 3, 5, 7); const v4sf betavr = v4sf_splat(beta->real); const v4sf betavi = v4sf_splat(beta->imag); temp = abv0r + cv0r * betavr - cv0i * betavi; cv0i = abv0i + cv0r * betavi + cv0i * betavr; cv0r = temp; temp = abv1r + cv1r * betavr - cv1i * betavi; cv1i = abv1i + cv1r * betavi + cv1i * betavr; cv1r = temp; temp = abv2r + cv2r * betavr - cv2i * betavi; cv2i = abv2i + cv2r * betavi + cv2i * betavr; cv2r = temp; temp = abv3r + cv3r * betavr - cv3i * betavi; cv3i = abv3i + cv3r * betavi + cv3i * betavr; cv3r = temp; v4sf_cstore(&c[0*rs_c + 0*cs_c], __builtin_shufflevector(cv0r, cv0i, 0, 4, 1, 5)); v4sf_cstore(&c[2*rs_c + 0*cs_c], __builtin_shufflevector(cv0r, cv0i, 2, 6, 3, 7)); v4sf_cstore(&c[0*rs_c + 1*cs_c], __builtin_shufflevector(cv1r, cv1i, 0, 4, 1, 5)); v4sf_cstore(&c[2*rs_c + 1*cs_c], __builtin_shufflevector(cv1r, cv1i, 2, 6, 3, 7)); v4sf_cstore(&c[0*rs_c + 2*cs_c], __builtin_shufflevector(cv2r, cv2i, 0, 4, 1, 5)); v4sf_cstore(&c[2*rs_c + 2*cs_c], __builtin_shufflevector(cv2r, cv2i, 2, 6, 3, 7)); v4sf_cstore(&c[0*rs_c + 3*cs_c], __builtin_shufflevector(cv3r, cv3i, 0, 4, 1, 5)); v4sf_cstore(&c[2*rs_c + 3*cs_c], __builtin_shufflevector(cv3r, cv3i, 2, 6, 3, 7)); } else { // Load columns 0, 1, 2, 3 (real part) v4sf cv0r = (v4sf){ c[0*rs_c + 0*cs_c].real, c[1*rs_c + 0*cs_c].real, c[2*rs_c + 0*cs_c].real, c[3*rs_c + 0*cs_c].real }; v4sf cv1r = (v4sf){ c[0*rs_c + 1*cs_c].real, c[1*rs_c + 1*cs_c].real, c[2*rs_c + 1*cs_c].real, c[3*rs_c + 1*cs_c].real }; v4sf cv2r = (v4sf){ c[0*rs_c + 2*cs_c].real, c[1*rs_c + 2*cs_c].real, c[2*rs_c + 2*cs_c].real, c[3*rs_c + 2*cs_c].real }; v4sf cv3r = (v4sf){ c[0*rs_c + 3*cs_c].real, c[1*rs_c + 3*cs_c].real, c[2*rs_c + 3*cs_c].real, c[3*rs_c + 3*cs_c].real }; // Load columns 0, 1, 2, 3 (imaginary part) v4sf cv0i = (v4sf){ c[0*rs_c + 0*cs_c].imag, c[1*rs_c + 0*cs_c].imag, c[2*rs_c + 0*cs_c].imag, c[3*rs_c + 0*cs_c].imag }; v4sf cv1i = (v4sf){ c[0*rs_c + 1*cs_c].imag, c[1*rs_c + 1*cs_c].imag, c[2*rs_c + 1*cs_c].imag, c[3*rs_c + 1*cs_c].imag }; v4sf cv2i = (v4sf){ c[0*rs_c + 2*cs_c].imag, c[1*rs_c + 2*cs_c].imag, c[2*rs_c + 2*cs_c].imag, c[3*rs_c + 2*cs_c].imag }; v4sf cv3i = (v4sf){ c[0*rs_c + 3*cs_c].imag, c[1*rs_c + 3*cs_c].imag, c[2*rs_c + 3*cs_c].imag, c[3*rs_c + 3*cs_c].imag }; const v4sf betavr = v4sf_splat(beta->real); const v4sf betavi = v4sf_splat(beta->imag); temp = abv0r + cv0r * betavr - cv0i * betavi; cv0i = abv0i + cv0r * betavi + cv0i * betavr; cv0r = temp; temp = abv1r + cv1r * betavr - cv1i * betavi; cv1i = abv1i + cv1r * betavi + cv1i * betavr; cv1r = temp; temp = abv2r + cv2r * betavr - cv2i * betavi; cv2i = abv2i + cv2r * betavi + cv2i * betavr; cv2r = temp; temp = abv3r + cv3r * betavr - cv3i * betavi; cv3i = abv3i + cv3r * betavi + cv3i * betavr; cv3r = temp; // Store column 0 c[0*rs_c + 0*cs_c].real = cv0r[0]; c[0*rs_c + 0*cs_c].imag = cv0i[0]; c[1*rs_c + 0*cs_c].real = cv0r[1]; c[1*rs_c + 0*cs_c].imag = cv0i[1]; c[2*rs_c + 0*cs_c].real = cv0r[2]; c[2*rs_c + 0*cs_c].imag = cv0i[2]; c[3*rs_c + 0*cs_c].real = cv0r[3]; c[3*rs_c + 0*cs_c].imag = cv0i[3]; // Store column 1 c[0*rs_c + 1*cs_c].real = cv1r[0]; c[0*rs_c + 1*cs_c].imag = cv1i[0]; c[1*rs_c + 1*cs_c].real = cv1r[1]; c[1*rs_c + 1*cs_c].imag = cv1i[1]; c[2*rs_c + 1*cs_c].real = cv1r[2]; c[2*rs_c + 1*cs_c].imag = cv1i[2]; c[3*rs_c + 1*cs_c].real = cv1r[3]; c[3*rs_c + 1*cs_c].imag = cv1i[3]; // Store column 2 c[0*rs_c + 2*cs_c].real = cv2r[0]; c[0*rs_c + 2*cs_c].imag = cv2i[0]; c[1*rs_c + 2*cs_c].real = cv2r[1]; c[1*rs_c + 2*cs_c].imag = cv2i[1]; c[2*rs_c + 2*cs_c].real = cv2r[2]; c[2*rs_c + 2*cs_c].imag = cv2i[2]; c[3*rs_c + 2*cs_c].real = cv2r[3]; c[3*rs_c + 2*cs_c].imag = cv2i[3]; // Store column 3 c[0*rs_c + 3*cs_c].real = cv3r[0]; c[0*rs_c + 3*cs_c].imag = cv3i[0]; c[1*rs_c + 3*cs_c].real = cv3r[1]; c[1*rs_c + 3*cs_c].imag = cv3i[1]; c[2*rs_c + 3*cs_c].real = cv3r[2]; c[2*rs_c + 3*cs_c].imag = cv3i[2]; c[3*rs_c + 3*cs_c].real = cv3r[3]; c[3*rs_c + 3*cs_c].imag = cv3i[3]; } } #endif blis-0.6.1/kernels/old/x86/000077500000000000000000000000001360743507500153275ustar00rootroot00000000000000blis-0.6.1/kernels/old/x86/1m/000077500000000000000000000000001360743507500156445ustar00rootroot00000000000000blis-0.6.1/kernels/old/x86/1m/bli_packm_2xk.c000066400000000000000000000242731360743507500205250ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_spackm_2xk( conj_t conja, dim_t n, void* beta, void* a, inc_t inca, inc_t lda, void* p ) { bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); } void bli_dpackm_2xk( conj_t conja, dim_t n, void* beta, void* a, inc_t inca, inc_t lda, void* p ) { double* restrict beta_cast = beta; double* restrict alpha1 = a; double* restrict pi1 = p; inc_t off1 = 1 * inca * sizeof(double); inc_t ldas = lda * sizeof(double); if ( bli_deq1( *beta_cast ) ) { dim_t n_iter = n / 4; dim_t n_left = n % 4; __asm__ volatile ( " \n\t" "movl %2, %%edi \n\t" // load a "movl %3, %%ebp \n\t" // load p " \n\t" "movl %4, %%eax \n\t" // load ldas "leal (%%edi,%%eax), %%edx \n\t" // load a + ldas "sall $1, %%eax \n\t" // ldas *= 2; " \n\t" "movl %5, %%ebx \n\t" // load off1 " \n\t" " \n\t" "movl %0, %%esi \n\t" "testl %%esi, %%esi \n\t" "je .DCONSIDERKLEFT \n\t" " \n\t" " \n\t" ".DLOOPKITER: \n\t" " \n\t" "addl $64, %%ebp \n\t" " \n\t" "movlpd (%%edi ), %%xmm0 \n\t" // iteration 0 "movhpd (%%edi,%%ebx, ), %%xmm0 \n\t" "addl %%eax, %%edi \n\t" "movapd %%xmm0, -8 * 8(%%ebp) \n\t" " \n\t" "movlpd (%%edx ), %%xmm1 \n\t" // iteration 1 "movhpd (%%edx,%%ebx, ), %%xmm1 \n\t" "addl %%eax, %%edx \n\t" "movapd %%xmm1, -6 * 8(%%ebp) \n\t" " \n\t" "movlpd (%%edi ), %%xmm2 \n\t" // iteration 2 "movhpd (%%edi,%%ebx, ), %%xmm2 \n\t" "addl %%eax, %%edi \n\t" "movapd %%xmm2, -4 * 8(%%ebp) \n\t" " \n\t" "movlpd (%%edx ), %%xmm3 \n\t" // iteration 3 "movhpd (%%edx,%%ebx, ), %%xmm3 \n\t" "addl %%eax, %%edx \n\t" "movapd %%xmm3, -2 * 8(%%ebp) \n\t" " \n\t" "decl %%esi \n\t" "jne .DLOOPKITER \n\t" " \n\t" " \n\t" " \n\t" ".DCONSIDERKLEFT: \n\t" " \n\t" "movl %1, %%esi \n\t" "testl %%esi, %%esi \n\t" "je .DDONE \n\t" " \n\t" " \n\t" " \n\t" ".DLOOPKLEFT: \n\t" " \n\t" "addl $16, %%ebp \n\t" " \n\t" "movlpd (%%edi ), %%xmm0 \n\t" "movhpd (%%edi,%%ebx, ), %%xmm0 \n\t" "addl %%eax, %%edi \n\t" "movapd %%xmm0, -2 * 8(%%ebp) \n\t" " \n\t" "decl %%esi \n\t" "jne .DLOOPKLEFT \n\t" " \n\t" " \n\t" " \n\t" ".DDONE: \n\t" " \n\t" : // output operands : // input operands "m" (n_iter), "m" (n_left), "m" (alpha1), "m" (pi1), "m" (ldas), "m" (off1) : // register clobber list "eax", "ebx", "ecx", "edx", "edi", "ebp", "esi", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "memory" ); } else { dim_t n_iter = n / 4; dim_t n_left = n % 4; __asm__ volatile ( " \n\t" "movl %2, %%edi \n\t" // load a "movl %3, %%ebp \n\t" // load p " \n\t" "movl %4, %%eax \n\t" // load ldas "leal (%%edi,%%eax), %%edx \n\t" // load a + ldas "sall $1, %%eax \n\t" // ldas *= 2; " \n\t" "movl %5, %%ebx \n\t" // load off1 " \n\t" "movl %6, %%esi \n\t" // load beta "movddup (%%esi), %%xmm7 \n\t" // load and duplicate *beta " \n\t" "movl %0, %%esi \n\t" "testl %%esi, %%esi \n\t" "je .DCONSIDERKLEFT2 \n\t" " \n\t" " \n\t" ".DLOOPKITER2: \n\t" " \n\t" "addl $64, %%ebp \n\t" " \n\t" "movlpd (%%edi ), %%xmm0 \n\t" // iteration 0 "movhpd (%%edi,%%ebx, ), %%xmm0 \n\t" "mulpd %%xmm7, %%xmm0 \n\t" "addl %%eax, %%edi \n\t" "movapd %%xmm0, -8 * 8(%%ebp) \n\t" " \n\t" "movlpd (%%edx ), %%xmm1 \n\t" // iteration 1 "movhpd (%%edx,%%ebx, ), %%xmm1 \n\t" "mulpd %%xmm7, %%xmm1 \n\t" "addl %%eax, %%edx \n\t" "movapd %%xmm1, -6 * 8(%%ebp) \n\t" " \n\t" "movlpd (%%edi ), %%xmm2 \n\t" // iteration 2 "movhpd (%%edi,%%ebx, ), %%xmm2 \n\t" "mulpd %%xmm7, %%xmm2 \n\t" "addl %%eax, %%edi \n\t" "movapd %%xmm2, -4 * 8(%%ebp) \n\t" " \n\t" "movlpd (%%edx ), %%xmm3 \n\t" // iteration 3 "movhpd (%%edx,%%ebx, ), %%xmm3 \n\t" "mulpd %%xmm7, %%xmm3 \n\t" "addl %%eax, %%edx \n\t" "movapd %%xmm3, -2 * 8(%%ebp) \n\t" " \n\t" "decl %%esi \n\t" "jne .DLOOPKITER2 \n\t" " \n\t" " \n\t" " \n\t" ".DCONSIDERKLEFT2: \n\t" " \n\t" "movl %1, %%esi \n\t" "testl %%esi, %%esi \n\t" "je .DDONE2 \n\t" " \n\t" " \n\t" " \n\t" ".DLOOPKLEFT2: \n\t" " \n\t" "addl $16, %%ebp \n\t" " \n\t" "movlpd (%%edi ), %%xmm0 \n\t" "movhpd (%%edi,%%ebx, ), %%xmm0 \n\t" "mulpd %%xmm7, %%xmm0 \n\t" "addl %%eax, %%edi \n\t" "movapd %%xmm0, -2 * 8(%%ebp) \n\t" " \n\t" "decl %%esi \n\t" "jne .DLOOPKLEFT2 \n\t" " \n\t" " \n\t" " \n\t" ".DDONE2: \n\t" " \n\t" : // output operands : // input operands "m" (n_iter), "m" (n_left), "m" (alpha1), "m" (pi1), "m" (ldas), "m" (off1), "m" (beta) : // register clobber list "eax", "ebx", "ecx", "edx", "edi", "ebp", "esi", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "memory" ); } } void bli_cpackm_2xk( conj_t conja, dim_t n, void* beta, void* a, inc_t inca, inc_t lda, void* p ) { bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); } void bli_zpackm_2xk( conj_t conja, dim_t n, void* beta, void* a, inc_t inca, inc_t lda, void* p ) { bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); } blis-0.6.1/kernels/old/x86/1m/bli_packm_2xk.h000066400000000000000000000040101360743507500205150ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #undef GENTPROT #define GENTPROT( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname)( \ conj_t conja, \ dim_t n, \ void* beta, \ void* a, inc_t inca, inc_t lda, \ void* p \ ); INSERT_GENTPROT_BASIC( packm_2xk ) blis-0.6.1/kernels/old/x86/1m/bli_packm_4xk.c000066400000000000000000000261621360743507500205260ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_spackm_4xk( conj_t conja, dim_t n, void* beta, void* a, inc_t inca, inc_t lda, void* p ) { bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); } void bli_dpackm_4xk( conj_t conja, dim_t n, void* beta, void* a, inc_t inca, inc_t lda, void* p ) { double* restrict beta_cast = beta; double* restrict alpha1 = a; double* restrict pi1 = p; inc_t off1 = 1 * inca * sizeof(double); inc_t off3 = 3 * inca * sizeof(double); inc_t ldas = lda * sizeof(double); if ( bli_deq1( *beta_cast ) ) { dim_t n_iter = n / 4; dim_t n_left = n % 4; __asm__ volatile ( " \n\t" //"movapd 4096(%%ebp), %%xmm7 \n\t" //"movapd %%xmm7, 4096(%%ebp) \n\t" " \n\t" "movl %2, %%edi \n\t" // load a "movl %3, %%ebp \n\t" // load p " \n\t" "movl %4, %%eax \n\t" // load ldas "leal (%%edi,%%eax), %%edx \n\t" // load a + ldas "sall $1, %%eax \n\t" // ldas *= 2; " \n\t" "movl %5, %%ebx \n\t" // load off1 "movl %6, %%ecx \n\t" // load off3 " \n\t" "movl %0, %%esi \n\t" "testl %%esi, %%esi \n\t" "je .DCONSIDERKLEFT \n\t" " \n\t" " \n\t" " \n\t" ".DLOOPKITER: \n\t" " \n\t" "addl $128, %%ebp \n\t" " \n\t" "movlpd (%%edi ), %%xmm0 \n\t" // iteration 0 "movhpd (%%edi,%%ebx, ), %%xmm0 \n\t" "movlpd (%%edi,%%ebx,2), %%xmm1 \n\t" "movhpd (%%edi,%%ecx, ), %%xmm1 \n\t" "addl %%eax, %%edi \n\t" "movapd %%xmm0, -16 * 8(%%ebp) \n\t" "movapd %%xmm1, -14 * 8(%%ebp) \n\t" " \n\t" "movlpd (%%edx ), %%xmm2 \n\t" // iteration 1 "movhpd (%%edx,%%ebx, ), %%xmm2 \n\t" "movlpd (%%edx,%%ebx,2), %%xmm3 \n\t" "movhpd (%%edx,%%ecx, ), %%xmm3 \n\t" "addl %%eax, %%edx \n\t" "movapd %%xmm2, -12 * 8(%%ebp) \n\t" "movapd %%xmm3, -10 * 8(%%ebp) \n\t" " \n\t" "movlpd (%%edi ), %%xmm4 \n\t" // iteration 2 "movhpd (%%edi,%%ebx, ), %%xmm4 \n\t" "movlpd (%%edi,%%ebx,2), %%xmm5 \n\t" "movhpd (%%edi,%%ecx, ), %%xmm5 \n\t" "addl %%eax, %%edi \n\t" "movapd %%xmm4, -8 * 8(%%ebp) \n\t" "movapd %%xmm5, -6 * 8(%%ebp) \n\t" " \n\t" "movlpd (%%edx ), %%xmm6 \n\t" // iteration 3 "movhpd (%%edx,%%ebx, ), %%xmm6 \n\t" "movlpd (%%edx,%%ebx,2), %%xmm7 \n\t" "movhpd (%%edx,%%ecx, ), %%xmm7 \n\t" "addl %%eax, %%edx \n\t" "movapd %%xmm6, -4 * 8(%%ebp) \n\t" "movapd %%xmm7, -2 * 8(%%ebp) \n\t" " \n\t" "decl %%esi \n\t" "jne .DLOOPKITER \n\t" " \n\t" " \n\t" " \n\t" ".DCONSIDERKLEFT: \n\t" " \n\t" "movl %1, %%esi \n\t" "testl %%esi, %%esi \n\t" "je .DDONE \n\t" " \n\t" " \n\t" " \n\t" ".DLOOPKLEFT: \n\t" " \n\t" "addl $32, %%ebp \n\t" " \n\t" "movlpd (%%edi ), %%xmm0 \n\t" "movhpd (%%edi,%%ebx, ), %%xmm0 \n\t" "movlpd (%%edi,%%ebx,2), %%xmm1 \n\t" "movhpd (%%edi,%%ecx, ), %%xmm1 \n\t" "addl %%eax, %%edi \n\t" "movapd %%xmm0, -4 * 8(%%ebp) \n\t" "movapd %%xmm1, -2 * 8(%%ebp) \n\t" " \n\t" "decl %%esi \n\t" "jne .DLOOPKLEFT \n\t" " \n\t" " \n\t" " \n\t" ".DDONE: \n\t" " \n\t" : // output operands : // input operands "m" (n_iter), "m" (n_left), "m" (alpha1), "m" (pi1), "m" (ldas), "m" (off1), "m" (off3) : // register clobber list "eax", "ebx", "ecx", "edx", "edi", "ebp", "esi", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "memory" ); } else { dim_t n_iter = n / 2; dim_t n_left = n % 2; __asm__ volatile ( " \n\t" "movl %2, %%edi \n\t" // load a "movl %3, %%ebp \n\t" // load p " \n\t" "movl %4, %%eax \n\t" // load ldas "leal (%%edi,%%eax), %%edx \n\t" // load a + ldas "sall $1, %%eax \n\t" // ldas *= 2; " \n\t" "movl %5, %%ebx \n\t" // load off1 "movl %6, %%ecx \n\t" // load off3 " \n\t" "movl %7, %%esi \n\t" // load beta "movddup (%%esi), %%xmm7 \n\t" // load and duplicate *beta " \n\t" "movl %0, %%esi \n\t" "testl %%esi, %%esi \n\t" "je .DCONSIDERKLEFT2 \n\t" " \n\t" " \n\t" ".DLOOPKITER2: \n\t" " \n\t" "addl $64, %%ebp \n\t" " \n\t" "movlpd (%%edi ), %%xmm0 \n\t" // iteration 0 "movhpd (%%edi,%%ebx, ), %%xmm0 \n\t" "movlpd (%%edi,%%ebx,2), %%xmm1 \n\t" "movhpd (%%edi,%%ecx, ), %%xmm1 \n\t" "mulpd %%xmm7, %%xmm0 \n\t" "mulpd %%xmm7, %%xmm1 \n\t" "addl %%eax, %%edi \n\t" "movapd %%xmm0, -8 * 8(%%ebp) \n\t" "movapd %%xmm1, -6 * 8(%%ebp) \n\t" " \n\t" "movlpd (%%edx ), %%xmm2 \n\t" // iteration 1 "movhpd (%%edx,%%ebx, ), %%xmm2 \n\t" "movlpd (%%edx,%%ebx,2), %%xmm3 \n\t" "movhpd (%%edx,%%ecx, ), %%xmm3 \n\t" "mulpd %%xmm7, %%xmm2 \n\t" "mulpd %%xmm7, %%xmm3 \n\t" "addl %%eax, %%edx \n\t" "movapd %%xmm2, -4 * 8(%%ebp) \n\t" "movapd %%xmm3, -2 * 8(%%ebp) \n\t" " \n\t" "decl %%esi \n\t" "jne .DLOOPKITER2 \n\t" " \n\t" " \n\t" " \n\t" ".DCONSIDERKLEFT2: \n\t" " \n\t" "movl %1, %%esi \n\t" "testl %%esi, %%esi \n\t" "je .DDONE2 \n\t" " \n\t" " \n\t" " \n\t" ".DLOOPKLEFT2: \n\t" " \n\t" "addl $32, %%ebp \n\t" " \n\t" "movlpd (%%edi ), %%xmm0 \n\t" "movhpd (%%edi,%%ebx, ), %%xmm0 \n\t" "movlpd (%%edi,%%ebx,2), %%xmm1 \n\t" "movhpd (%%edi,%%ecx, ), %%xmm1 \n\t" "mulpd %%xmm7, %%xmm0 \n\t" "mulpd %%xmm7, %%xmm1 \n\t" "addl %%eax, %%edi \n\t" "movapd %%xmm0, -4 * 8(%%ebp) \n\t" "movapd %%xmm1, -2 * 8(%%ebp) \n\t" " \n\t" "decl %%esi \n\t" "jne .DLOOPKLEFT2 \n\t" " \n\t" " \n\t" " \n\t" ".DDONE2: \n\t" " \n\t" : // output operands : // input operands "m" (n_iter), "m" (n_left), "m" (alpha1), "m" (pi1), "m" (ldas), "m" (off1), "m" (off3), "m" (beta) : // register clobber list "eax", "ebx", "ecx", "edx", "edi", "ebp", "esi", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "memory" ); } } void bli_cpackm_4xk( conj_t conja, dim_t n, void* beta, void* a, inc_t inca, inc_t lda, void* p ) { bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); } void bli_zpackm_4xk( conj_t conja, dim_t n, void* beta, void* a, inc_t inca, inc_t lda, void* p ) { bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); } blis-0.6.1/kernels/old/x86/1m/bli_packm_4xk.h000066400000000000000000000040101360743507500205170ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #undef GENTPROT #define GENTPROT( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname)( \ conj_t conja, \ dim_t n, \ void* beta, \ void* a, inc_t inca, inc_t lda, \ void* p \ ); INSERT_GENTPROT_BASIC( packm_4xk ) blis-0.6.1/kernels/old/x86/3/000077500000000000000000000000001360743507500154715ustar00rootroot00000000000000blis-0.6.1/kernels/old/x86/3/bli_gemm_opt_d2x4.c000066400000000000000000000407361360743507500211450ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_sgemm_opt_d2x4( dim_t k, float* restrict alpha, float* restrict a, float* restrict b, float* restrict beta, float* restrict c, inc_t rs_c, inc_t cs_c ) { bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); } void bli_dgemm_opt_d2x4( dim_t k, double* restrict alpha, double* restrict a, double* restrict b, double* restrict beta, double* restrict c, inc_t rs_c, inc_t cs_c ) { dim_t k_iter; dim_t k_left; k_iter = k / 8; k_left = k % 8; __asm__ volatile ( " \n\t" "movl %6, %%ecx \n\t" // load address of c " \n\t" "movl %8, %%edi \n\t" // load cs_c "sall $3, %%edi \n\t" // cs_c *= sizeof(double) " \n\t" "leal (%%ecx,%%edi,2), %%edx \n\t" // load address of c + 2*cs_c " \n\t" "prefetcht0 (%%ecx) \n\t" // give a T0 prefetch hint for c00. "prefetcht0 (%%ecx,%%edi) \n\t" // give a T0 prefetch hint for c01. "prefetcht0 (%%edx) \n\t" // give a T0 prefetch hint for c02. "prefetcht0 (%%edx,%%edi) \n\t" // give a T0 prefetch hint for c03. " \n\t" "movl %2, %%eax \n\t" // load address of a. "movl %3, %%ebx \n\t" // load address of b. " \n\t" "addl $8 * 16, %%eax \n\t" // increment pointers to allow byte "addl $8 * 16, %%ebx \n\t" // offsets in the unrolled iterations. " \n\t" "movapd -8 * 16(%%eax), %%xmm0 \n\t" // initialize loop by pre-loading elements "movapd -8 * 16(%%ebx), %%xmm1 \n\t" // of a and b. " \n\t" "pxor %%xmm2, %%xmm2 \n\t" "pxor %%xmm3, %%xmm3 \n\t" " \n\t" "pxor %%xmm4, %%xmm4 \n\t" "pxor %%xmm5, %%xmm5 \n\t" "pxor %%xmm6, %%xmm6 \n\t" "pxor %%xmm7, %%xmm7 \n\t" " \n\t" " \n\t" " \n\t" "movl %0, %%esi \n\t" // i = k_iter; "testl %%esi, %%esi \n\t" // check i via logical AND. "je .CONSIDERKLEFT \n\t" // if i == 0, jump to code that " \n\t" // contains the k_left loop. " \n\t" " \n\t" ".LOOPKITER: \n\t" // MAIN LOOP " \n\t" "prefetcht0 (8*21+4)*8(%%eax) \n\t" " \n\t" " \n\t" "addpd %%xmm3, %%xmm7 \n\t" // iteration 0 "movapd -7 * 16(%%ebx), %%xmm3 \n\t" "addpd %%xmm2, %%xmm6 \n\t" "pshufd $0x4e, %%xmm1, %%xmm2 \n\t" "mulpd %%xmm0, %%xmm1 \n\t" "mulpd %%xmm0, %%xmm2 \n\t" " \n\t" "addpd %%xmm1, %%xmm5 \n\t" "movapd -6 * 16(%%ebx), %%xmm1 \n\t" "addpd %%xmm2, %%xmm4 \n\t" "pshufd $0x4e, %%xmm3, %%xmm2 \n\t" "mulpd %%xmm0, %%xmm3 \n\t" "mulpd %%xmm0, %%xmm2 \n\t" "movapd -7 * 16(%%eax), %%xmm0 \n\t" " \n\t" " \n\t" "addpd %%xmm3, %%xmm7 \n\t" // iteration 1 "movapd -5 * 16(%%ebx), %%xmm3 \n\t" "addpd %%xmm2, %%xmm6 \n\t" "pshufd $0x4e, %%xmm1, %%xmm2 \n\t" "mulpd %%xmm0, %%xmm1 \n\t" "mulpd %%xmm0, %%xmm2 \n\t" " \n\t" "addpd %%xmm1, %%xmm5 \n\t" "movapd -4 * 16(%%ebx), %%xmm1 \n\t" "addpd %%xmm2, %%xmm4 \n\t" "pshufd $0x4e, %%xmm3, %%xmm2 \n\t" "mulpd %%xmm0, %%xmm3 \n\t" "mulpd %%xmm0, %%xmm2 \n\t" "movapd -6 * 16(%%eax), %%xmm0 \n\t" " \n\t" " \n\t" "addpd %%xmm3, %%xmm7 \n\t" // iteration 2 "movapd -3 * 16(%%ebx), %%xmm3 \n\t" "addpd %%xmm2, %%xmm6 \n\t" "pshufd $0x4e, %%xmm1, %%xmm2 \n\t" "mulpd %%xmm0, %%xmm1 \n\t" "mulpd %%xmm0, %%xmm2 \n\t" " \n\t" "addpd %%xmm1, %%xmm5 \n\t" "movapd -2 * 16(%%ebx), %%xmm1 \n\t" "addpd %%xmm2, %%xmm4 \n\t" "pshufd $0x4e, %%xmm3, %%xmm2 \n\t" "mulpd %%xmm0, %%xmm3 \n\t" "mulpd %%xmm0, %%xmm2 \n\t" "movapd -5 * 16(%%eax), %%xmm0 \n\t" " \n\t" " \n\t" "addpd %%xmm3, %%xmm7 \n\t" // iteration 3 "movapd -1 * 16(%%ebx), %%xmm3 \n\t" "addpd %%xmm2, %%xmm6 \n\t" "pshufd $0x4e, %%xmm1, %%xmm2 \n\t" "mulpd %%xmm0, %%xmm1 \n\t" "mulpd %%xmm0, %%xmm2 \n\t" " \n\t" "addpd %%xmm1, %%xmm5 \n\t" "movapd 0 * 16(%%ebx), %%xmm1 \n\t" "addpd %%xmm2, %%xmm4 \n\t" "pshufd $0x4e, %%xmm3, %%xmm2 \n\t" "mulpd %%xmm0, %%xmm3 \n\t" "mulpd %%xmm0, %%xmm2 \n\t" "movapd -4 * 16(%%eax), %%xmm0 \n\t" " \n\t" " \n\t" "prefetcht0 (8*21+12)*8(%%eax) \n\t" " \n\t" " \n\t" "addpd %%xmm3, %%xmm7 \n\t" // iteration 4 "movapd 1 * 16(%%ebx), %%xmm3 \n\t" "addpd %%xmm2, %%xmm6 \n\t" "pshufd $0x4e, %%xmm1, %%xmm2 \n\t" "mulpd %%xmm0, %%xmm1 \n\t" "mulpd %%xmm0, %%xmm2 \n\t" " \n\t" "addpd %%xmm1, %%xmm5 \n\t" "movapd 2 * 16(%%ebx), %%xmm1 \n\t" "addpd %%xmm2, %%xmm4 \n\t" "pshufd $0x4e, %%xmm3, %%xmm2 \n\t" "mulpd %%xmm0, %%xmm3 \n\t" "mulpd %%xmm0, %%xmm2 \n\t" "movapd -3 * 16(%%eax), %%xmm0 \n\t" " \n\t" " \n\t" "addpd %%xmm3, %%xmm7 \n\t" // iteration 5 "movapd 3 * 16(%%ebx), %%xmm3 \n\t" "addpd %%xmm2, %%xmm6 \n\t" "pshufd $0x4e, %%xmm1, %%xmm2 \n\t" "mulpd %%xmm0, %%xmm1 \n\t" "mulpd %%xmm0, %%xmm2 \n\t" " \n\t" "addpd %%xmm1, %%xmm5 \n\t" "movapd 4 * 16(%%ebx), %%xmm1 \n\t" "addpd %%xmm2, %%xmm4 \n\t" "pshufd $0x4e, %%xmm3, %%xmm2 \n\t" "mulpd %%xmm0, %%xmm3 \n\t" "mulpd %%xmm0, %%xmm2 \n\t" "movapd -2 * 16(%%eax), %%xmm0 \n\t" " \n\t" " \n\t" "addpd %%xmm3, %%xmm7 \n\t" // iteration 6 "movapd 5 * 16(%%ebx), %%xmm3 \n\t" "addpd %%xmm2, %%xmm6 \n\t" "pshufd $0x4e, %%xmm1, %%xmm2 \n\t" "mulpd %%xmm0, %%xmm1 \n\t" "mulpd %%xmm0, %%xmm2 \n\t" " \n\t" "addpd %%xmm1, %%xmm5 \n\t" "movapd 6 * 16(%%ebx), %%xmm1 \n\t" "addpd %%xmm2, %%xmm4 \n\t" "pshufd $0x4e, %%xmm3, %%xmm2 \n\t" "mulpd %%xmm0, %%xmm3 \n\t" "mulpd %%xmm0, %%xmm2 \n\t" "movapd -1 * 16(%%eax), %%xmm0 \n\t" " \n\t" " \n\t" "addpd %%xmm3, %%xmm7 \n\t" // iteration 7 "movapd 7 * 16(%%ebx), %%xmm3 \n\t" "addpd %%xmm2, %%xmm6 \n\t" "pshufd $0x4e, %%xmm1, %%xmm2 \n\t" "mulpd %%xmm0, %%xmm1 \n\t" "mulpd %%xmm0, %%xmm2 \n\t" " \n\t" "addpd %%xmm1, %%xmm5 \n\t" "movapd 8 * 16(%%ebx), %%xmm1 \n\t" "addpd %%xmm2, %%xmm4 \n\t" "pshufd $0x4e, %%xmm3, %%xmm2 \n\t" "mulpd %%xmm0, %%xmm3 \n\t" "mulpd %%xmm0, %%xmm2 \n\t" "movapd 0 * 16(%%eax), %%xmm0 \n\t" " \n\t" "addl $256, %%ebx \n\t" // b += 8*2*2 (unroll x nr x ndup) "addl $128, %%eax \n\t" // a += 8*4 (unroll x mr) " \n\t" " \n\t" "decl %%esi \n\t" // i -= 1; "jne .LOOPKITER \n\t" // iterate again if i != 0. " \n\t" " \n\t" " \n\t" ".CONSIDERKLEFT: \n\t" " \n\t" "movl %1, %%esi \n\t" // i = k_left; "testl %%esi, %%esi \n\t" // check i via logical AND. "je .POSTACCUM \n\t" // if i == 0, we're done; jump to end. " \n\t" // else, we prepare to enter k_left loop. " \n\t" " \n\t" ".LOOPKLEFT: \n\t" // EDGE LOOP " \n\t" "addpd %%xmm3, %%xmm7 \n\t" // iteration i "movapd -7 * 16(%%ebx), %%xmm3 \n\t" "addpd %%xmm2, %%xmm6 \n\t" "pshufd $0x4e, %%xmm1, %%xmm2 \n\t" "mulpd %%xmm0, %%xmm1 \n\t" "mulpd %%xmm0, %%xmm2 \n\t" " \n\t" "addpd %%xmm1, %%xmm5 \n\t" "movapd -6 * 16(%%ebx), %%xmm1 \n\t" "addpd %%xmm2, %%xmm4 \n\t" "pshufd $0x4e, %%xmm3, %%xmm2 \n\t" "mulpd %%xmm0, %%xmm3 \n\t" "mulpd %%xmm0, %%xmm2 \n\t" "movapd -7 * 16(%%eax), %%xmm0 \n\t" " \n\t" " \n\t" " \n\t" " \n\t" "addl $32, %%ebx \n\t" // b += 4 (1 x mr) "addl $16, %%eax \n\t" // a += 2*2 (1 x nr x ndup) " \n\t" "decl %%esi \n\t" // i -= 1; "jne .LOOPKLEFT \n\t" // iterate again if i != 0. " \n\t" " \n\t" " \n\t" ".POSTACCUM: \n\t" " \n\t" "addpd %%xmm2, %%xmm6 \n\t" "addpd %%xmm3, %%xmm7 \n\t" " \n\t" " \n\t" "movl %4, %%eax \n\t" // load address of alpha "movl %5, %%ebx \n\t" // load address of beta "movddup (%%eax), %%xmm2 \n\t" // load alpha and duplicate "movddup (%%ebx), %%xmm3 \n\t" // load beta and duplicate " \n\t" " \n\t" " \n\t" "movl %7, %%esi \n\t" // load rs_c "sall $3, %%esi \n\t" // rs_c *= sizeof(double) " \n\t" " \n\t" "movapd %%xmm4, %%xmm0 \n\t" "movsd %%xmm5, %%xmm4 \n\t" "movsd %%xmm0, %%xmm5 \n\t" " \n\t" "movapd %%xmm6, %%xmm0 \n\t" "movsd %%xmm7, %%xmm6 \n\t" "movsd %%xmm0, %%xmm7 \n\t" " \n\t" " \n\t" " \n\t" "movlpd (%%ecx), %%xmm0 \n\t" // load c00 and c10, "movhpd (%%ecx,%%esi), %%xmm0 \n\t" "mulpd %%xmm2, %%xmm4 \n\t" // scale by alpha, "mulpd %%xmm3, %%xmm0 \n\t" // scale by beta, "addpd %%xmm4, %%xmm0 \n\t" // add the gemm result, "movlpd %%xmm0, (%%ecx) \n\t" // and store back to memory. "movhpd %%xmm0, (%%ecx,%%esi) \n\t" "addl %%edi, %%ecx \n\t" " \n\t" "movlpd (%%edx), %%xmm1 \n\t" // load c02 and c12, "movhpd (%%edx,%%esi), %%xmm1 \n\t" "mulpd %%xmm2, %%xmm6 \n\t" // scale by alpha, "mulpd %%xmm3, %%xmm1 \n\t" // scale by beta, "addpd %%xmm6, %%xmm1 \n\t" // add the gemm result, "movlpd %%xmm1, (%%edx) \n\t" // and store back to memory. "movhpd %%xmm1, (%%edx,%%esi) \n\t" "addl %%edi, %%edx \n\t" " \n\t" "movlpd (%%ecx), %%xmm0 \n\t" // load c01 and c11, "movhpd (%%ecx,%%esi), %%xmm0 \n\t" "mulpd %%xmm2, %%xmm5 \n\t" // scale by alpha, "mulpd %%xmm3, %%xmm0 \n\t" // scale by beta, "addpd %%xmm5, %%xmm0 \n\t" // add the gemm result, "movlpd %%xmm0, (%%ecx) \n\t" // and store back to memory. "movhpd %%xmm0, (%%ecx,%%esi) \n\t" " \n\t" "movlpd (%%edx), %%xmm1 \n\t" // load c03 and c13, "movhpd (%%edx,%%esi), %%xmm1 \n\t" "mulpd %%xmm2, %%xmm7 \n\t" // scale by alpha, "mulpd %%xmm3, %%xmm1 \n\t" // scale by beta, "addpd %%xmm7, %%xmm1 \n\t" // add the gemm result, "movlpd %%xmm1, (%%edx) \n\t" // and store back to memory. "movhpd %%xmm1, (%%edx,%%esi) \n\t" " \n\t" " \n\t" " \n\t" : // output operands (none) : // input operands "m" (k_iter), "m" (k_left), "m" (a), "m" (b), "m" (alpha), "m" (beta), "m" (c), "m" (rs_c), "m" (cs_c) : // register clobber list "eax", "ebx", "ecx", "edx", "esi", "edi", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "memory" ); } void bli_cgemm_opt_d2x4( dim_t k, scomplex* restrict alpha, scomplex* restrict a, scomplex* restrict b, scomplex* restrict beta, scomplex* restrict c, inc_t rs_c, inc_t cs_c ) { bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); } void bli_zgemm_opt_d2x4( dim_t k, dcomplex* restrict alpha, dcomplex* restrict a, dcomplex* restrict b, dcomplex* restrict beta, dcomplex* restrict c, inc_t rs_c, inc_t cs_c ) { bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); } blis-0.6.1/kernels/old/x86/3/bli_gemm_opt_d4x2.c000066400000000000000000000356511360743507500211450ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_sgemm_opt_d4x2( dim_t k, float* restrict alpha, float* restrict a, float* restrict b, float* restrict beta, float* restrict c, inc_t rs_c, inc_t cs_c, float* restrict a_next, float* restrict b_next ) { bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); } void bli_dgemm_opt_d4x2( dim_t k, double* restrict alpha, double* restrict a, double* restrict b, double* restrict beta, double* restrict c, inc_t rs_c, inc_t cs_c, double* restrict a_next, double* restrict b_next ) { dim_t k_iter; dim_t k_left; k_iter = k / 8; k_left = k % 8; __asm__ volatile ( " \n\t" "movl %6, %%ecx \n\t" // load address of c " \n\t" "movl %8, %%edi \n\t" // load cs_c "sall $3, %%edi \n\t" // cs_c *= sizeof(double) " \n\t" "prefetcht0 (%%ecx) \n\t" // give a T0 prefetch hint for c00. "prefetcht0 (%%ecx,%%edi) \n\t" // give a T0 prefetch hint for c01. " \n\t" "movl %2, %%eax \n\t" // load address of a. "movl %3, %%ebx \n\t" // load address of b. " \n\t" "addl $8 * 16, %%eax \n\t" // increment pointers to allow byte "addl $8 * 16, %%ebx \n\t" // offsets in the unrolled iterations. " \n\t" "movapd -8 * 16(%%eax), %%xmm0 \n\t" // initialize loop by pre-loading elements "movapd -4 * 16(%%eax), %%xmm3 \n\t" // of a. " \n\t" "pxor %%xmm4, %%xmm4 \n\t" "pxor %%xmm5, %%xmm5 \n\t" "pxor %%xmm6, %%xmm6 \n\t" "pxor %%xmm7, %%xmm7 \n\t" " \n\t" " \n\t" " \n\t" "movl %0, %%esi \n\t" // i = k_iter; "testl %%esi, %%esi \n\t" // check i via logical AND. "je .CONSIDERKLEFT \n\t" // if i == 0, jump to code that " \n\t" // contains the k_left loop. " \n\t" " \n\t" ".LOOPKITER: \n\t" // MAIN LOOP " \n\t" "movapd -8 * 16(%%ebx), %%xmm1 \n\t" // iteration 0 "movapd %%xmm1, %%xmm2 \n\t" "mulpd %%xmm0, %%xmm1 \n\t" "addpd %%xmm1, %%xmm4 \n\t" "movapd -7 * 16(%%ebx), %%xmm1 \n\t" "mulpd %%xmm1, %%xmm0 \n\t" "addpd %%xmm0, %%xmm5 \n\t" "movapd -7 * 16(%%eax), %%xmm0 \n\t" "mulpd %%xmm0, %%xmm2 \n\t" "mulpd %%xmm0, %%xmm1 \n\t" "movapd -6 * 16(%%eax), %%xmm0 \n\t" "addpd %%xmm2, %%xmm6 \n\t" "addpd %%xmm1, %%xmm7 \n\t" " \n\t" "movapd -6 * 16(%%ebx), %%xmm1 \n\t" // iteration 1 "movapd %%xmm1, %%xmm2 \n\t" "mulpd %%xmm0, %%xmm1 \n\t" "addpd %%xmm1, %%xmm4 \n\t" "movapd -5 * 16(%%ebx), %%xmm1 \n\t" "mulpd %%xmm1, %%xmm0 \n\t" "addpd %%xmm0, %%xmm5 \n\t" "movapd -5 * 16(%%eax), %%xmm0 \n\t" "mulpd %%xmm0, %%xmm2 \n\t" "mulpd %%xmm0, %%xmm1 \n\t" "movapd 0 * 16(%%eax), %%xmm0 \n\t" "addpd %%xmm2, %%xmm6 \n\t" "addpd %%xmm1, %%xmm7 \n\t" " \n\t" "movapd -4 * 16(%%ebx), %%xmm1 \n\t" // iteration 2 "movapd %%xmm1, %%xmm2 \n\t" "mulpd %%xmm3, %%xmm1 \n\t" "addpd %%xmm1, %%xmm4 \n\t" "movapd -3 * 16(%%ebx), %%xmm1 \n\t" "mulpd %%xmm1, %%xmm3 \n\t" "addpd %%xmm3, %%xmm5 \n\t" "movapd -3 * 16(%%eax), %%xmm3 \n\t" "mulpd %%xmm3, %%xmm2 \n\t" "mulpd %%xmm3, %%xmm1 \n\t" "movapd -2 * 16(%%eax), %%xmm3 \n\t" "addpd %%xmm2, %%xmm6 \n\t" "addpd %%xmm1, %%xmm7 \n\t" " \n\t" "movapd -2 * 16(%%ebx), %%xmm1 \n\t" // iteration 3 "movapd %%xmm1, %%xmm2 \n\t" "mulpd %%xmm3, %%xmm1 \n\t" "addpd %%xmm1, %%xmm4 \n\t" "movapd -1 * 16(%%ebx), %%xmm1 \n\t" "mulpd %%xmm1, %%xmm3 \n\t" "addpd %%xmm3, %%xmm5 \n\t" "movapd -1 * 16(%%eax), %%xmm3 \n\t" "mulpd %%xmm3, %%xmm2 \n\t" "mulpd %%xmm3, %%xmm1 \n\t" "movapd 4 * 16(%%eax), %%xmm3 \n\t" "addpd %%xmm2, %%xmm6 \n\t" "addpd %%xmm1, %%xmm7 \n\t" " \n\t" "movapd 0 * 16(%%ebx), %%xmm1 \n\t" // iteration 4 "movapd %%xmm1, %%xmm2 \n\t" "mulpd %%xmm0, %%xmm1 \n\t" "addpd %%xmm1, %%xmm4 \n\t" "movapd 1 * 16(%%ebx), %%xmm1 \n\t" "mulpd %%xmm1, %%xmm0 \n\t" "addpd %%xmm0, %%xmm5 \n\t" "movapd 1 * 16(%%eax), %%xmm0 \n\t" "mulpd %%xmm0, %%xmm2 \n\t" "mulpd %%xmm0, %%xmm1 \n\t" "movapd 2 * 16(%%eax), %%xmm0 \n\t" "addpd %%xmm2, %%xmm6 \n\t" "addpd %%xmm1, %%xmm7 \n\t" " \n\t" "movapd 2 * 16(%%ebx), %%xmm1 \n\t" // iteration 5 "movapd %%xmm1, %%xmm2 \n\t" "mulpd %%xmm0, %%xmm1 \n\t" "addpd %%xmm1, %%xmm4 \n\t" "movapd 3 * 16(%%ebx), %%xmm1 \n\t" "mulpd %%xmm1, %%xmm0 \n\t" "addpd %%xmm0, %%xmm5 \n\t" "movapd 3 * 16(%%eax), %%xmm0 \n\t" "mulpd %%xmm0, %%xmm2 \n\t" "mulpd %%xmm0, %%xmm1 \n\t" "movapd 8 * 16(%%eax), %%xmm0 \n\t" "addpd %%xmm2, %%xmm6 \n\t" "addpd %%xmm1, %%xmm7 \n\t" " \n\t" "movapd 4 * 16(%%ebx), %%xmm1 \n\t" // iteration 6 "movapd %%xmm1, %%xmm2 \n\t" "mulpd %%xmm3, %%xmm1 \n\t" "addpd %%xmm1, %%xmm4 \n\t" "movapd 5 * 16(%%ebx), %%xmm1 \n\t" "mulpd %%xmm1, %%xmm3 \n\t" "addpd %%xmm3, %%xmm5 \n\t" "movapd 5 * 16(%%eax), %%xmm3 \n\t" "mulpd %%xmm3, %%xmm2 \n\t" "mulpd %%xmm3, %%xmm1 \n\t" "movapd 6 * 16(%%eax), %%xmm3 \n\t" "addl $8 * 4 * 8, %%eax \n\t" // a += 8*4 (unroll x mr) "addpd %%xmm2, %%xmm6 \n\t" "addpd %%xmm1, %%xmm7 \n\t" " \n\t" "movapd 6 * 16(%%ebx), %%xmm1 \n\t" // iteration 7 "movapd %%xmm1, %%xmm2 \n\t" "mulpd %%xmm3, %%xmm1 \n\t" "addpd %%xmm1, %%xmm4 \n\t" "movapd 7 * 16(%%ebx), %%xmm1 \n\t" "addl $8 * 2 * 2 * 8, %%ebx \n\t" // b += 8*2*2 (unroll x nr x ndup) "mulpd %%xmm1, %%xmm3 \n\t" "addpd %%xmm3, %%xmm5 \n\t" "movapd -9 * 16(%%eax), %%xmm3 \n\t" "mulpd %%xmm3, %%xmm2 \n\t" "mulpd %%xmm3, %%xmm1 \n\t" "decl %%esi \n\t" // i -= 1; "movapd -4 * 16(%%eax), %%xmm3 \n\t" "addpd %%xmm2, %%xmm6 \n\t" "addpd %%xmm1, %%xmm7 \n\t" " \n\t" "jne .LOOPKITER \n\t" // iterate again if i != 0. " \n\t" " \n\t" " \n\t" ".CONSIDERKLEFT: \n\t" " \n\t" "movl %1, %%esi \n\t" // i = k_left; "testl %%esi, %%esi \n\t" // check i via logical AND. "je .POSTACCUM \n\t" // if i == 0, we're done; jump to end. " \n\t" // else, we prepare to enter k_left loop. " \n\t" " \n\t" ".LOOPKLEFT: \n\t" // EDGE LOOP " \n\t" "movapd -8 * 16(%%ebx), %%xmm1 \n\t" // iteration i "movapd %%xmm1, %%xmm2 \n\t" "mulpd %%xmm0, %%xmm1 \n\t" "addpd %%xmm1, %%xmm4 \n\t" "movapd -7 * 16(%%ebx), %%xmm1 \n\t" "addl $1 * 2 * 2 * 8, %%ebx \n\t" // b += 2*2 (1 x nr x ndup) "mulpd %%xmm1, %%xmm0 \n\t" "addpd %%xmm0, %%xmm5 \n\t" "movapd -7 * 16(%%eax), %%xmm0 \n\t" "mulpd %%xmm0, %%xmm2 \n\t" "mulpd %%xmm0, %%xmm1 \n\t" "movapd -6 * 16(%%eax), %%xmm0 \n\t" "addl $1 * 4 * 8, %%eax \n\t" // a += 4 (1 x mr) "addpd %%xmm2, %%xmm6 \n\t" "addpd %%xmm1, %%xmm7 \n\t" " \n\t" "decl %%esi \n\t" // i -= 1; "jne .LOOPKLEFT \n\t" // iterate again if i != 0. " \n\t" " \n\t" " \n\t" ".POSTACCUM: \n\t" " \n\t" "movl %4, %%eax \n\t" // load address of alpha "movl %5, %%ebx \n\t" // load address of beta "movddup (%%eax), %%xmm2 \n\t" // load alpha and duplicate "movddup (%%ebx), %%xmm3 \n\t" // load beta and duplicate " \n\t" " \n\t" " \n\t" "movl %7, %%esi \n\t" // load rs_c "sall $3, %%esi \n\t" // rs_c *= sizeof(double) " \n\t" "leal (%%ecx,%%esi,2), %%edx \n\t" // load address of c + 2*rs_c; " \n\t" " \n\t" " \n\t" "movlpd (%%ecx), %%xmm0 \n\t" // load c00 and c10, "movhpd (%%ecx,%%esi), %%xmm0 \n\t" "mulpd %%xmm2, %%xmm4 \n\t" // scale by alpha, "mulpd %%xmm3, %%xmm0 \n\t" // scale by beta, "addpd %%xmm4, %%xmm0 \n\t" // add the gemm result, "movlpd %%xmm0, (%%ecx) \n\t" // and store back to memory. "movhpd %%xmm0, (%%ecx,%%esi) \n\t" "addl %%edi, %%ecx \n\t" " \n\t" "movlpd (%%edx), %%xmm1 \n\t" // load c01 and c11, "movhpd (%%edx,%%esi), %%xmm1 \n\t" "mulpd %%xmm2, %%xmm6 \n\t" // scale by alpha, "mulpd %%xmm3, %%xmm1 \n\t" // scale by beta, "addpd %%xmm6, %%xmm1 \n\t" // add the gemm result, "movlpd %%xmm1, (%%edx) \n\t" // and store back to memory. "movhpd %%xmm1, (%%edx,%%esi) \n\t" "addl %%edi, %%edx \n\t" " \n\t" "movlpd (%%ecx), %%xmm0 \n\t" // load c20 and c30, "movhpd (%%ecx,%%esi), %%xmm0 \n\t" "mulpd %%xmm2, %%xmm5 \n\t" // scale by alpha, "mulpd %%xmm3, %%xmm0 \n\t" // scale by beta, "addpd %%xmm5, %%xmm0 \n\t" // add the gemm result, "movlpd %%xmm0, (%%ecx) \n\t" // and store back to memory. "movhpd %%xmm0, (%%ecx,%%esi) \n\t" " \n\t" "movlpd (%%edx), %%xmm1 \n\t" // load c21 and c31, "movhpd (%%edx,%%esi), %%xmm1 \n\t" "mulpd %%xmm2, %%xmm7 \n\t" // scale by alpha, "mulpd %%xmm3, %%xmm1 \n\t" // scale by beta, "addpd %%xmm7, %%xmm1 \n\t" // add the gemm result, "movlpd %%xmm1, (%%edx) \n\t" // and store back to memory. "movhpd %%xmm1, (%%edx,%%esi) \n\t" " \n\t" " \n\t" " \n\t" : // output operands (none) : // input operands "m" (k_iter), "m" (k_left), "m" (a), "m" (b), "m" (alpha), "m" (beta), "m" (c), "m" (rs_c), "m" (cs_c) : // register clobber list "eax", "ebx", "ecx", "edx", "esi", "edi", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "memory" ); } void bli_cgemm_opt_d4x2( dim_t k, scomplex* restrict alpha, scomplex* restrict a, scomplex* restrict b, scomplex* restrict beta, scomplex* restrict c, inc_t rs_c, inc_t cs_c, scomplex* restrict a_next, scomplex* restrict b_next ) { bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); } void bli_zgemm_opt_d4x2( dim_t k, dcomplex* restrict alpha, dcomplex* restrict a, dcomplex* restrict b, dcomplex* restrict beta, dcomplex* restrict c, inc_t rs_c, inc_t cs_c, dcomplex* restrict a_next, dcomplex* restrict b_next ) { bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); } blis-0.6.1/kernels/old/x86/3/bli_gemmtrsm_l_opt_d4x2.c000066400000000000000000000516121360743507500223610ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_sgemmtrsm_l_opt_d4x2( dim_t k, float* restrict alpha, float* restrict a10, float* restrict a11, float* restrict bd01, float* restrict bd11, float* restrict b11, float* restrict c11, inc_t rs_c, inc_t cs_c, float* restrict a_next, float* restrict b_next ) { bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); } void bli_dgemmtrsm_l_opt_d4x2( dim_t k, double* restrict alpha, double* restrict a10, double* restrict a11, double* restrict bd01, double* restrict bd11, double* restrict b11, double* restrict c11, inc_t rs_c, inc_t cs_c, double* restrict a_next, double* restrict b_next ) { dim_t k_iter; dim_t k_left; k_iter = k / 8; k_left = k % 8; __asm__ volatile ( " \n\t" "movl %2, %%eax \n\t" // load address of a10. "movl %4, %%ebx \n\t" // load address of bd01. " \n\t" "addl $8 * 16, %%eax \n\t" // increment pointers to allow byte "addl $8 * 16, %%ebx \n\t" // offsets in the unrolled iterations. " \n\t" "movapd -8 * 16(%%eax), %%xmm0 \n\t" // initialize loop by pre-loading elements "movapd -4 * 16(%%eax), %%xmm3 \n\t" // and of a. " \n\t" "pxor %%xmm4, %%xmm4 \n\t" "pxor %%xmm5, %%xmm5 \n\t" "pxor %%xmm6, %%xmm6 \n\t" "pxor %%xmm7, %%xmm7 \n\t" " \n\t" " \n\t" " \n\t" "movl %0, %%esi \n\t" // i = k_iter; "testl %%esi, %%esi \n\t" // check i via logical AND. "je .CONSIDERKLEFT \n\t" // if i == 0, jump to code that " \n\t" // contains the k_left loop. " \n\t" " \n\t" ".LOOPKITER: \n\t" // MAIN LOOP " \n\t" "movapd -8 * 16(%%ebx), %%xmm1 \n\t" // iteration 0 "movapd %%xmm1, %%xmm2 \n\t" "mulpd %%xmm0, %%xmm1 \n\t" "addpd %%xmm1, %%xmm4 \n\t" "movapd -7 * 16(%%ebx), %%xmm1 \n\t" "mulpd %%xmm1, %%xmm0 \n\t" "addpd %%xmm0, %%xmm5 \n\t" "movapd -7 * 16(%%eax), %%xmm0 \n\t" "mulpd %%xmm0, %%xmm2 \n\t" "mulpd %%xmm0, %%xmm1 \n\t" "movapd -6 * 16(%%eax), %%xmm0 \n\t" "addpd %%xmm2, %%xmm6 \n\t" "addpd %%xmm1, %%xmm7 \n\t" " \n\t" "movapd -6 * 16(%%ebx), %%xmm1 \n\t" // iteration 1 "movapd %%xmm1, %%xmm2 \n\t" "mulpd %%xmm0, %%xmm1 \n\t" "addpd %%xmm1, %%xmm4 \n\t" "movapd -5 * 16(%%ebx), %%xmm1 \n\t" "mulpd %%xmm1, %%xmm0 \n\t" "addpd %%xmm0, %%xmm5 \n\t" "movapd -5 * 16(%%eax), %%xmm0 \n\t" "mulpd %%xmm0, %%xmm2 \n\t" "mulpd %%xmm0, %%xmm1 \n\t" "movapd 0 * 16(%%eax), %%xmm0 \n\t" "addpd %%xmm2, %%xmm6 \n\t" "addpd %%xmm1, %%xmm7 \n\t" " \n\t" "movapd -4 * 16(%%ebx), %%xmm1 \n\t" // iteration 2 "movapd %%xmm1, %%xmm2 \n\t" "mulpd %%xmm3, %%xmm1 \n\t" "addpd %%xmm1, %%xmm4 \n\t" "movapd -3 * 16(%%ebx), %%xmm1 \n\t" "mulpd %%xmm1, %%xmm3 \n\t" "addpd %%xmm3, %%xmm5 \n\t" "movapd -3 * 16(%%eax), %%xmm3 \n\t" "mulpd %%xmm3, %%xmm2 \n\t" "mulpd %%xmm3, %%xmm1 \n\t" "movapd -2 * 16(%%eax), %%xmm3 \n\t" "addpd %%xmm2, %%xmm6 \n\t" "addpd %%xmm1, %%xmm7 \n\t" " \n\t" "movapd -2 * 16(%%ebx), %%xmm1 \n\t" // iteration 3 "movapd %%xmm1, %%xmm2 \n\t" "mulpd %%xmm3, %%xmm1 \n\t" "addpd %%xmm1, %%xmm4 \n\t" "movapd -1 * 16(%%ebx), %%xmm1 \n\t" "mulpd %%xmm1, %%xmm3 \n\t" "addpd %%xmm3, %%xmm5 \n\t" "movapd -1 * 16(%%eax), %%xmm3 \n\t" "mulpd %%xmm3, %%xmm2 \n\t" "mulpd %%xmm3, %%xmm1 \n\t" "movapd 4 * 16(%%eax), %%xmm3 \n\t" "addpd %%xmm2, %%xmm6 \n\t" "addpd %%xmm1, %%xmm7 \n\t" " \n\t" "movapd 0 * 16(%%ebx), %%xmm1 \n\t" // iteration 4 "movapd %%xmm1, %%xmm2 \n\t" "mulpd %%xmm0, %%xmm1 \n\t" "addpd %%xmm1, %%xmm4 \n\t" "movapd 1 * 16(%%ebx), %%xmm1 \n\t" "mulpd %%xmm1, %%xmm0 \n\t" "addpd %%xmm0, %%xmm5 \n\t" "movapd 1 * 16(%%eax), %%xmm0 \n\t" "mulpd %%xmm0, %%xmm2 \n\t" "mulpd %%xmm0, %%xmm1 \n\t" "movapd 2 * 16(%%eax), %%xmm0 \n\t" "addpd %%xmm2, %%xmm6 \n\t" "addpd %%xmm1, %%xmm7 \n\t" " \n\t" "movapd 2 * 16(%%ebx), %%xmm1 \n\t" // iteration 5 "movapd %%xmm1, %%xmm2 \n\t" "mulpd %%xmm0, %%xmm1 \n\t" "addpd %%xmm1, %%xmm4 \n\t" "movapd 3 * 16(%%ebx), %%xmm1 \n\t" "mulpd %%xmm1, %%xmm0 \n\t" "addpd %%xmm0, %%xmm5 \n\t" "movapd 3 * 16(%%eax), %%xmm0 \n\t" "mulpd %%xmm0, %%xmm2 \n\t" "mulpd %%xmm0, %%xmm1 \n\t" "movapd 8 * 16(%%eax), %%xmm0 \n\t" "addpd %%xmm2, %%xmm6 \n\t" "addpd %%xmm1, %%xmm7 \n\t" " \n\t" "movapd 4 * 16(%%ebx), %%xmm1 \n\t" // iteration 6 "movapd %%xmm1, %%xmm2 \n\t" "mulpd %%xmm3, %%xmm1 \n\t" "addpd %%xmm1, %%xmm4 \n\t" "movapd 5 * 16(%%ebx), %%xmm1 \n\t" "mulpd %%xmm1, %%xmm3 \n\t" "addpd %%xmm3, %%xmm5 \n\t" "movapd 5 * 16(%%eax), %%xmm3 \n\t" "mulpd %%xmm3, %%xmm2 \n\t" "mulpd %%xmm3, %%xmm1 \n\t" "movapd 6 * 16(%%eax), %%xmm3 \n\t" "addl $8 * 4 * 8, %%eax \n\t" // a += 8*4 (unroll x mr) "addpd %%xmm2, %%xmm6 \n\t" "addpd %%xmm1, %%xmm7 \n\t" " \n\t" "movapd 6 * 16(%%ebx), %%xmm1 \n\t" // iteration 7 "movapd %%xmm1, %%xmm2 \n\t" "mulpd %%xmm3, %%xmm1 \n\t" "addpd %%xmm1, %%xmm4 \n\t" "movapd 7 * 16(%%ebx), %%xmm1 \n\t" "addl $8 * 2 * 2 * 8, %%ebx \n\t" // b += 8*2*2 (unroll x nr x ndup) "mulpd %%xmm1, %%xmm3 \n\t" "addpd %%xmm3, %%xmm5 \n\t" "movapd -9 * 16(%%eax), %%xmm3 \n\t" "mulpd %%xmm3, %%xmm2 \n\t" "mulpd %%xmm3, %%xmm1 \n\t" "decl %%esi \n\t" // i -= 1; "movapd -4 * 16(%%eax), %%xmm3 \n\t" "addpd %%xmm2, %%xmm6 \n\t" "addpd %%xmm1, %%xmm7 \n\t" " \n\t" "jne .LOOPKITER \n\t" // iterate again if i != 0. " \n\t" " \n\t" " \n\t" ".CONSIDERKLEFT: \n\t" " \n\t" "movl %1, %%esi \n\t" // i = k_left; "testl %%esi, %%esi \n\t" // check i via logical AND. "je .POSTACCUM \n\t" // if i == 0, we're done; jump to end. " \n\t" // else, we prepare to enter k_left loop. " \n\t" " \n\t" ".LOOPKLEFT: \n\t" // EDGE LOOP " \n\t" "movapd -8 * 16(%%ebx), %%xmm1 \n\t" // iteration i "movapd %%xmm1, %%xmm2 \n\t" "mulpd %%xmm0, %%xmm1 \n\t" "addpd %%xmm1, %%xmm4 \n\t" "movapd -7 * 16(%%ebx), %%xmm1 \n\t" "addl $1 * 2 * 2 * 8, %%ebx \n\t" // b += 2*2 (1 x nr x ndup) "mulpd %%xmm1, %%xmm0 \n\t" "addpd %%xmm0, %%xmm5 \n\t" "movapd -7 * 16(%%eax), %%xmm0 \n\t" "mulpd %%xmm0, %%xmm2 \n\t" "mulpd %%xmm0, %%xmm1 \n\t" "movapd -6 * 16(%%eax), %%xmm0 \n\t" "addl $1 * 4 * 8, %%eax \n\t" // a += 4 (1 x mr) "addpd %%xmm2, %%xmm6 \n\t" "addpd %%xmm1, %%xmm7 \n\t" " \n\t" "decl %%esi \n\t" // i -= 1; "jne .LOOPKLEFT \n\t" // iterate again if i != 0. " \n\t" " \n\t" " \n\t" ".POSTACCUM: \n\t" " \n\t" " \n\t" "movl %6, %%ebx \n\t" // load address of b11. " \n\t" " \n\t" // xmm4 == ( ab00 xmm5 == ( ab01 " \n\t" // ab10 ) ab11 ) " \n\t" // xmm6 == ( ab20 xmm7 == ( ab21 " \n\t" // ab30 ) ab31 ) "movapd %%xmm4, %%xmm0 \n\t" "unpcklpd %%xmm5, %%xmm0 \n\t" "unpckhpd %%xmm5, %%xmm4 \n\t" "movapd %%xmm4, %%xmm1 \n\t" " \n\t" "movapd %%xmm6, %%xmm2 \n\t" "unpcklpd %%xmm7, %%xmm2 \n\t" "unpckhpd %%xmm7, %%xmm6 \n\t" "movapd %%xmm6, %%xmm3 \n\t" " \n\t" // xmm0 == ( ab00 ab01 ) " \n\t" // xmm1 == ( ab10 ab11 ) " \n\t" // xmm2 == ( ab20 ab21 ) " \n\t" // xmm3 == ( ab30 ab31 ) " \n\t" "movl %10, %%eax \n\t" // load address of alpha "movddup (%%eax), %%xmm7 \n\t" // load alpha and duplicate " \n\t" "movapd 0 * 16(%%ebx), %%xmm4 \n\t" "movapd 1 * 16(%%ebx), %%xmm5 \n\t" "mulpd %%xmm7, %%xmm4 \n\t" // xmm4 = alpha * ( beta00 beta01 ) "mulpd %%xmm7, %%xmm5 \n\t" // xmm5 = alpha * ( beta10 beta11 ) "movapd 2 * 16(%%ebx), %%xmm6 \n\t" "mulpd %%xmm7, %%xmm6 \n\t" // xmm6 = alpha * ( beta20 beta21 ) "mulpd 3 * 16(%%ebx), %%xmm7 \n\t" // xmm7 = alpha * ( beta30 beta31 ) " \n\t" "subpd %%xmm0, %%xmm4 \n\t" // xmm4 -= xmm0 "subpd %%xmm1, %%xmm5 \n\t" // xmm5 -= xmm1 "subpd %%xmm2, %%xmm6 \n\t" // xmm6 -= xmm2 "subpd %%xmm3, %%xmm7 \n\t" // xmm7 -= xmm3 " \n\t" " \n\t" " \n\t" ".TRSM: \n\t" " \n\t" " \n\t" "movl %3, %%eax \n\t" // load address of a11 "movl %7, %%ecx \n\t" // load address of c11 " \n\t" "movl %8, %%edi \n\t" // load rs_c "movl %9, %%esi \n\t" // load cs_c "sall $3, %%edi \n\t" // rs_c *= sizeof( double ) "sall $3, %%esi \n\t" // cs_c *= sizeof( double ) " \n\t" " \n\t" " \n\t" " \n\t" // iteration 0 " \n\t" "movddup (0+0*4)*8(%%eax), %%xmm0 \n\t" // load xmm0 = (1/alpha00) " \n\t" "mulpd %%xmm0, %%xmm4 \n\t" // xmm4 *= (1/alpha00); " \n\t" "movapd %%xmm4, 0 * 16(%%ebx) \n\t" // store ( beta00 beta01 ) = xmm4 "movlpd %%xmm4, (%%ecx) \n\t" // store ( gamma00 ) = xmm4[0] "movhpd %%xmm4, (%%ecx,%%esi) \n\t" // store ( gamma01 ) = xmm4[1] "addl %%edi, %%ecx \n\t" // c11 += rs_c " \n\t" " \n\t" " \n\t" " \n\t" // iteration 1 " \n\t" "movddup (1+0*4)*8(%%eax), %%xmm0 \n\t" // load xmm0 = alpha10 "movddup (1+1*4)*8(%%eax), %%xmm1 \n\t" // load xmm1 = (1/alpha11) " \n\t" "mulpd %%xmm4, %%xmm0 \n\t" // xmm0 = alpha10 * ( beta00 beta01 ) "subpd %%xmm0, %%xmm5 \n\t" // xmm5 -= xmm0 "mulpd %%xmm1, %%xmm5 \n\t" // xmm5 *= (1/alpha11); " \n\t" "movapd %%xmm5, 1 * 16(%%ebx) \n\t" // store ( beta10 beta11 ) = xmm5 "movlpd %%xmm5, (%%ecx) \n\t" // store ( gamma10 ) = xmm5[0] "movhpd %%xmm5, (%%ecx,%%esi) \n\t" // store ( gamma11 ) = xmm5[1] "addl %%edi, %%ecx \n\t" // c11 += rs_c " \n\t" " \n\t" " \n\t" " \n\t" // iteration 2 " \n\t" "movddup (2+0*4)*8(%%eax), %%xmm0 \n\t" // load xmm0 = alpha20 "movddup (2+1*4)*8(%%eax), %%xmm1 \n\t" // load xmm1 = alpha21 "movddup (2+2*4)*8(%%eax), %%xmm2 \n\t" // load xmm2 = (1/alpha22) " \n\t" "mulpd %%xmm4, %%xmm0 \n\t" // xmm0 = alpha20 * ( beta00 beta01 ) "mulpd %%xmm5, %%xmm1 \n\t" // xmm1 = alpha21 * ( beta10 beta11 ) "addpd %%xmm1, %%xmm0 \n\t" // xmm0 += xmm1; "subpd %%xmm0, %%xmm6 \n\t" // xmm6 -= xmm0 "mulpd %%xmm2, %%xmm6 \n\t" // xmm6 *= (1/alpha22); " \n\t" "movapd %%xmm6, 2 * 16(%%ebx) \n\t" // store ( beta20 beta21 ) = xmm6 "movlpd %%xmm6, (%%ecx) \n\t" // store ( gamma20 ) = xmm6[0] "movhpd %%xmm6, (%%ecx,%%esi) \n\t" // store ( gamma21 ) = xmm6[1] "addl %%edi, %%ecx \n\t" // c11 += rs_c " \n\t" " \n\t" " \n\t" " \n\t" // iteration 3 " \n\t" "movddup (3+0*4)*8(%%eax), %%xmm0 \n\t" // load xmm0 = alpha30 "movddup (3+1*4)*8(%%eax), %%xmm1 \n\t" // load xmm1 = alpha31 "movddup (3+2*4)*8(%%eax), %%xmm2 \n\t" // load xmm2 = alpha32 "movddup (3+3*4)*8(%%eax), %%xmm3 \n\t" // load xmm3 = (1/alpha33) " \n\t" "mulpd %%xmm4, %%xmm0 \n\t" // xmm0 = alpha30 * ( beta00 beta01 ) "mulpd %%xmm5, %%xmm1 \n\t" // xmm1 = alpha31 * ( beta10 beta11 ) "mulpd %%xmm6, %%xmm2 \n\t" // xmm2 = alpha32 * ( beta20 beta21 ) "addpd %%xmm1, %%xmm0 \n\t" // xmm0 += xmm1 "addpd %%xmm2, %%xmm0 \n\t" // xmm0 += xmm2 "subpd %%xmm0, %%xmm7 \n\t" // xmm7 -= xmm0 "mulpd %%xmm3, %%xmm7 \n\t" // xmm7 *= (1/alpha33); " \n\t" "movapd %%xmm7, 3 * 16(%%ebx) \n\t" // store ( beta30 beta31 ) = xmm7 "movlpd %%xmm7, (%%ecx) \n\t" // store ( gamma30 ) = xmm7[0] "movhpd %%xmm7, (%%ecx,%%esi) \n\t" // store ( gamma31 ) = xmm7[1] " \n\t" " \n\t" " \n\t" ".UPDATEBD11: \n\t" " \n\t" " \n\t" "movl %5, %%edx \n\t" " \n\t" "movddup %%xmm4, %%xmm0 \n\t" "movddup %%xmm5, %%xmm1 \n\t" "movddup %%xmm6, %%xmm2 \n\t" "movddup %%xmm7, %%xmm3 \n\t" " \n\t" "unpckhpd %%xmm4, %%xmm4 \n\t" "unpckhpd %%xmm5, %%xmm5 \n\t" "unpckhpd %%xmm6, %%xmm6 \n\t" "unpckhpd %%xmm7, %%xmm7 \n\t" " \n\t" "movapd %%xmm0, 0 * 16(%%edx) \n\t" "movapd %%xmm4, 1 * 16(%%edx) \n\t" "movapd %%xmm1, 2 * 16(%%edx) \n\t" "movapd %%xmm5, 3 * 16(%%edx) \n\t" "movapd %%xmm2, 4 * 16(%%edx) \n\t" "movapd %%xmm6, 5 * 16(%%edx) \n\t" "movapd %%xmm3, 6 * 16(%%edx) \n\t" "movapd %%xmm7, 7 * 16(%%edx) \n\t" " \n\t" " \n\t" : // output operands (none) : // input operands "m" (k_iter), "m" (k_left), "m" (a10), "m" (a11), "m" (bd01), "m" (bd11), "m" (b11), "m" (c11), "m" (rs_c), "m" (cs_c), "m" (alpha) : // register clobber list "eax", "ebx", "ecx", "edx", "esi", "edi", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "memory" ); } void bli_cgemmtrsm_l_opt_d4x2( dim_t k, scomplex* restrict alpha, scomplex* restrict a10, scomplex* restrict a11, scomplex* restrict bd01, scomplex* restrict bd11, scomplex* restrict b11, scomplex* restrict c11, inc_t rs_c, inc_t cs_c, scomplex* restrict a_next, scomplex* restrict b_next ) { bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); } void bli_zgemmtrsm_l_opt_d4x2( dim_t k, dcomplex* restrict alpha, dcomplex* restrict a10, dcomplex* restrict a11, dcomplex* restrict bd01, dcomplex* restrict bd11, dcomplex* restrict b11, dcomplex* restrict c11, inc_t rs_c, inc_t cs_c, dcomplex* restrict a_next, dcomplex* restrict b_next ) { bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); } blis-0.6.1/kernels/old/x86/3/bli_gemmtrsm_u_opt_d4x2.c000066400000000000000000000522251360743507500223730ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_sgemmtrsm_u_opt_d4x2( dim_t k, float* restrict alpha, float* restrict a12, float* restrict a11, float* restrict bd21, float* restrict bd11, float* restrict b11, float* restrict c11, inc_t rs_c, inc_t cs_c, float* restrict a_next, float* restrict b_next ) { bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); } void bli_dgemmtrsm_u_opt_d4x2( dim_t k, double* restrict alpha, double* restrict a12, double* restrict a11, double* restrict bd21, double* restrict bd11, double* restrict b11, double* restrict c11, inc_t rs_c, inc_t cs_c, double* restrict a_next, double* restrict b_next ) { dim_t k_iter; dim_t k_left; k_iter = k / 8; k_left = k % 8; __asm__ volatile ( " \n\t" "movl %2, %%eax \n\t" // load address of a12. "movl %4, %%ebx \n\t" // load address of bd21. " \n\t" "addl $8 * 16, %%eax \n\t" // increment pointers to allow byte "addl $8 * 16, %%ebx \n\t" // offsets in the unrolled iterations. " \n\t" "movapd -8 * 16(%%eax), %%xmm0 \n\t" // initialize loop by pre-loading elements "movapd -4 * 16(%%eax), %%xmm3 \n\t" // of a. " \n\t" "pxor %%xmm4, %%xmm4 \n\t" "pxor %%xmm5, %%xmm5 \n\t" "pxor %%xmm6, %%xmm6 \n\t" "pxor %%xmm7, %%xmm7 \n\t" " \n\t" " \n\t" " \n\t" "movl %0, %%esi \n\t" // i = k_iter; "testl %%esi, %%esi \n\t" // check i via logical AND. "je .CONSIDERKLEFT \n\t" // if i == 0, jump to code that " \n\t" // contains the k_left loop. " \n\t" " \n\t" ".LOOPKITER: \n\t" // MAIN LOOP " \n\t" "movapd -8 * 16(%%ebx), %%xmm1 \n\t" // iteration 0 "movapd %%xmm1, %%xmm2 \n\t" "mulpd %%xmm0, %%xmm1 \n\t" "addpd %%xmm1, %%xmm4 \n\t" "movapd -7 * 16(%%ebx), %%xmm1 \n\t" "mulpd %%xmm1, %%xmm0 \n\t" "addpd %%xmm0, %%xmm5 \n\t" "movapd -7 * 16(%%eax), %%xmm0 \n\t" "mulpd %%xmm0, %%xmm2 \n\t" "mulpd %%xmm0, %%xmm1 \n\t" "movapd -6 * 16(%%eax), %%xmm0 \n\t" "addpd %%xmm2, %%xmm6 \n\t" "addpd %%xmm1, %%xmm7 \n\t" " \n\t" "movapd -6 * 16(%%ebx), %%xmm1 \n\t" // iteration 1 "movapd %%xmm1, %%xmm2 \n\t" "mulpd %%xmm0, %%xmm1 \n\t" "addpd %%xmm1, %%xmm4 \n\t" "movapd -5 * 16(%%ebx), %%xmm1 \n\t" "mulpd %%xmm1, %%xmm0 \n\t" "addpd %%xmm0, %%xmm5 \n\t" "movapd -5 * 16(%%eax), %%xmm0 \n\t" "mulpd %%xmm0, %%xmm2 \n\t" "mulpd %%xmm0, %%xmm1 \n\t" "movapd 0 * 16(%%eax), %%xmm0 \n\t" "addpd %%xmm2, %%xmm6 \n\t" "addpd %%xmm1, %%xmm7 \n\t" " \n\t" "movapd -4 * 16(%%ebx), %%xmm1 \n\t" // iteration 2 "movapd %%xmm1, %%xmm2 \n\t" "mulpd %%xmm3, %%xmm1 \n\t" "addpd %%xmm1, %%xmm4 \n\t" "movapd -3 * 16(%%ebx), %%xmm1 \n\t" "mulpd %%xmm1, %%xmm3 \n\t" "addpd %%xmm3, %%xmm5 \n\t" "movapd -3 * 16(%%eax), %%xmm3 \n\t" "mulpd %%xmm3, %%xmm2 \n\t" "mulpd %%xmm3, %%xmm1 \n\t" "movapd -2 * 16(%%eax), %%xmm3 \n\t" "addpd %%xmm2, %%xmm6 \n\t" "addpd %%xmm1, %%xmm7 \n\t" " \n\t" "movapd -2 * 16(%%ebx), %%xmm1 \n\t" // iteration 3 "movapd %%xmm1, %%xmm2 \n\t" "mulpd %%xmm3, %%xmm1 \n\t" "addpd %%xmm1, %%xmm4 \n\t" "movapd -1 * 16(%%ebx), %%xmm1 \n\t" "mulpd %%xmm1, %%xmm3 \n\t" "addpd %%xmm3, %%xmm5 \n\t" "movapd -1 * 16(%%eax), %%xmm3 \n\t" "mulpd %%xmm3, %%xmm2 \n\t" "mulpd %%xmm3, %%xmm1 \n\t" "movapd 4 * 16(%%eax), %%xmm3 \n\t" "addpd %%xmm2, %%xmm6 \n\t" "addpd %%xmm1, %%xmm7 \n\t" " \n\t" "movapd 0 * 16(%%ebx), %%xmm1 \n\t" // iteration 4 "movapd %%xmm1, %%xmm2 \n\t" "mulpd %%xmm0, %%xmm1 \n\t" "addpd %%xmm1, %%xmm4 \n\t" "movapd 1 * 16(%%ebx), %%xmm1 \n\t" "mulpd %%xmm1, %%xmm0 \n\t" "addpd %%xmm0, %%xmm5 \n\t" "movapd 1 * 16(%%eax), %%xmm0 \n\t" "mulpd %%xmm0, %%xmm2 \n\t" "mulpd %%xmm0, %%xmm1 \n\t" "movapd 2 * 16(%%eax), %%xmm0 \n\t" "addpd %%xmm2, %%xmm6 \n\t" "addpd %%xmm1, %%xmm7 \n\t" " \n\t" "movapd 2 * 16(%%ebx), %%xmm1 \n\t" // iteration 5 "movapd %%xmm1, %%xmm2 \n\t" "mulpd %%xmm0, %%xmm1 \n\t" "addpd %%xmm1, %%xmm4 \n\t" "movapd 3 * 16(%%ebx), %%xmm1 \n\t" "mulpd %%xmm1, %%xmm0 \n\t" "addpd %%xmm0, %%xmm5 \n\t" "movapd 3 * 16(%%eax), %%xmm0 \n\t" "mulpd %%xmm0, %%xmm2 \n\t" "mulpd %%xmm0, %%xmm1 \n\t" "movapd 8 * 16(%%eax), %%xmm0 \n\t" "addpd %%xmm2, %%xmm6 \n\t" "addpd %%xmm1, %%xmm7 \n\t" " \n\t" "movapd 4 * 16(%%ebx), %%xmm1 \n\t" // iteration 6 "movapd %%xmm1, %%xmm2 \n\t" "mulpd %%xmm3, %%xmm1 \n\t" "addpd %%xmm1, %%xmm4 \n\t" "movapd 5 * 16(%%ebx), %%xmm1 \n\t" "mulpd %%xmm1, %%xmm3 \n\t" "addpd %%xmm3, %%xmm5 \n\t" "movapd 5 * 16(%%eax), %%xmm3 \n\t" "mulpd %%xmm3, %%xmm2 \n\t" "mulpd %%xmm3, %%xmm1 \n\t" "movapd 6 * 16(%%eax), %%xmm3 \n\t" "addl $8 * 4 * 8, %%eax \n\t" // a += 8*4 (unroll x mr) "addpd %%xmm2, %%xmm6 \n\t" "addpd %%xmm1, %%xmm7 \n\t" " \n\t" "movapd 6 * 16(%%ebx), %%xmm1 \n\t" // iteration 7 "movapd %%xmm1, %%xmm2 \n\t" "mulpd %%xmm3, %%xmm1 \n\t" "addpd %%xmm1, %%xmm4 \n\t" "movapd 7 * 16(%%ebx), %%xmm1 \n\t" "addl $8 * 2 * 2 * 8, %%ebx \n\t" // b += 8*2*2 (unroll x nr x ndup) "mulpd %%xmm1, %%xmm3 \n\t" "addpd %%xmm3, %%xmm5 \n\t" "movapd -9 * 16(%%eax), %%xmm3 \n\t" "mulpd %%xmm3, %%xmm2 \n\t" "mulpd %%xmm3, %%xmm1 \n\t" "decl %%esi \n\t" // i -= 1; "movapd -4 * 16(%%eax), %%xmm3 \n\t" "addpd %%xmm2, %%xmm6 \n\t" "addpd %%xmm1, %%xmm7 \n\t" " \n\t" "jne .LOOPKITER \n\t" // iterate again if i != 0. " \n\t" " \n\t" " \n\t" ".CONSIDERKLEFT: \n\t" " \n\t" "movl %1, %%esi \n\t" // i = k_left; "testl %%esi, %%esi \n\t" // check i via logical AND. "je .POSTACCUM \n\t" // if i == 0, we're done; jump to end. " \n\t" // else, we prepare to enter k_left loop. " \n\t" " \n\t" ".LOOPKLEFT: \n\t" // EDGE LOOP " \n\t" "movapd -8 * 16(%%ebx), %%xmm1 \n\t" // iteration i "movapd %%xmm1, %%xmm2 \n\t" "mulpd %%xmm0, %%xmm1 \n\t" "addpd %%xmm1, %%xmm4 \n\t" "movapd -7 * 16(%%ebx), %%xmm1 \n\t" "addl $1 * 2 * 2 * 8, %%ebx \n\t" // b += 2*2 (1 x nr x ndup) "mulpd %%xmm1, %%xmm0 \n\t" "addpd %%xmm0, %%xmm5 \n\t" "movapd -7 * 16(%%eax), %%xmm0 \n\t" "mulpd %%xmm0, %%xmm2 \n\t" "mulpd %%xmm0, %%xmm1 \n\t" "movapd -6 * 16(%%eax), %%xmm0 \n\t" "addl $1 * 4 * 8, %%eax \n\t" // a += 4 (1 x mr) "addpd %%xmm2, %%xmm6 \n\t" "addpd %%xmm1, %%xmm7 \n\t" " \n\t" "decl %%esi \n\t" // i -= 1; "jne .LOOPKLEFT \n\t" // iterate again if i != 0. " \n\t" " \n\t" " \n\t" ".POSTACCUM: \n\t" " \n\t" " \n\t" "movl %6, %%ebx \n\t" // load address of b11. " \n\t" " \n\t" // xmm4 == ( ab00 xmm5 == ( ab01 " \n\t" // ab10 ) ab11 ) " \n\t" // xmm6 == ( ab20 xmm7 == ( ab21 " \n\t" // ab30 ) ab31 ) "movapd %%xmm4, %%xmm0 \n\t" "unpcklpd %%xmm5, %%xmm0 \n\t" "unpckhpd %%xmm5, %%xmm4 \n\t" "movapd %%xmm4, %%xmm1 \n\t" " \n\t" "movapd %%xmm6, %%xmm2 \n\t" "unpcklpd %%xmm7, %%xmm2 \n\t" "unpckhpd %%xmm7, %%xmm6 \n\t" "movapd %%xmm6, %%xmm3 \n\t" " \n\t" // xmm0 == ( ab00 ab01 ) " \n\t" // xmm1 == ( ab10 ab11 ) " \n\t" // xmm2 == ( ab20 ab21 ) " \n\t" // xmm3 == ( ab30 ab31 ) " \n\t" "movl %10, %%eax \n\t" // load address of alpha "movddup (%%eax), %%xmm7 \n\t" // load alpha and duplicate " \n\t" "movapd 0 * 16(%%ebx), %%xmm4 \n\t" // load xmm4 = ( beta00 beta01 ) "movapd 1 * 16(%%ebx), %%xmm5 \n\t" // load xmm5 = ( beta10 beta11 ) "movapd 2 * 16(%%ebx), %%xmm6 \n\t" // load xmm6 = ( beta20 beta21 ) "mulpd %%xmm7, %%xmm4 \n\t" // xmm4 *= alpha "mulpd %%xmm7, %%xmm5 \n\t" // xmm5 *= alpha "mulpd %%xmm7, %%xmm6 \n\t" // xmm6 *= alpha //"movapd 3 * 16(%%ebx), %%xmm7 \n\t" // load xmm7 = ( beta30 beta31 ) "mulpd 3 * 16(%%ebx), %%xmm7 \n\t" // xmm7 = alpha * ( beta30 beta31 ) " \n\t" "subpd %%xmm0, %%xmm4 \n\t" // xmm4 -= xmm0 "subpd %%xmm1, %%xmm5 \n\t" // xmm5 -= xmm1 "subpd %%xmm2, %%xmm6 \n\t" // xmm6 -= xmm2 "subpd %%xmm3, %%xmm7 \n\t" // xmm7 -= xmm3 " \n\t" " \n\t" " \n\t" ".TRSM: \n\t" " \n\t" " \n\t" "movl %3, %%eax \n\t" // load address of a11 "movl %7, %%ecx \n\t" // load address of c11 " \n\t" "movl %8, %%edi \n\t" // load rs_c "movl %9, %%esi \n\t" // load cs_c "sall $3, %%edi \n\t" // rs_c *= sizeof( double ) "sall $3, %%esi \n\t" // cs_c *= sizeof( double ) " \n\t" "addl %%edi, %%ecx \n\t" // c11 += (4-1)*rs_c "addl %%edi, %%ecx \n\t" "addl %%edi, %%ecx \n\t" " \n\t" " \n\t" " \n\t" // iteration 0 " \n\t" "movddup (3+3*4)*8(%%eax), %%xmm3 \n\t" // load xmm3 = (1/alpha33) " \n\t" "mulpd %%xmm3, %%xmm7 \n\t" // xmm7 *= (1/alpha33); " \n\t" "movapd %%xmm7, 3 * 16(%%ebx) \n\t" // store ( beta30 beta31 ) = xmm7 "movlpd %%xmm7, (%%ecx) \n\t" // store ( gamma30 ) = xmm7[0] "movhpd %%xmm7, (%%ecx,%%esi) \n\t" // store ( gamma31 ) = xmm7[1] "subl %%edi, %%ecx \n\t" // c11 -= rs_c " \n\t" " \n\t" " \n\t" " \n\t" // iteration 1 " \n\t" "movddup (2+2*4)*8(%%eax), %%xmm2 \n\t" // load xmm2 = (1/alpha22) "movddup (2+3*4)*8(%%eax), %%xmm3 \n\t" // load xmm3 = alpha23 " \n\t" "mulpd %%xmm7, %%xmm3 \n\t" // xmm3 = alpha23 * ( beta30 beta31 ) "subpd %%xmm3, %%xmm6 \n\t" // xmm6 -= xmm3 "mulpd %%xmm2, %%xmm6 \n\t" // xmm6 *= (1/alpha22); " \n\t" "movapd %%xmm6, 2 * 16(%%ebx) \n\t" // store ( beta20 beta21 ) = xmm6 "movlpd %%xmm6, (%%ecx) \n\t" // store ( gamma20 ) = xmm6[0] "movhpd %%xmm6, (%%ecx,%%esi) \n\t" // store ( gamma21 ) = xmm6[1] "subl %%edi, %%ecx \n\t" // c11 -= rs_c " \n\t" " \n\t" " \n\t" " \n\t" // iteration 2 " \n\t" "movddup (1+1*4)*8(%%eax), %%xmm1 \n\t" // load xmm1 = (1/alpha11) "movddup (1+2*4)*8(%%eax), %%xmm2 \n\t" // load xmm2 = alpha12 "movddup (1+3*4)*8(%%eax), %%xmm3 \n\t" // load xmm3 = alpha13 " \n\t" "mulpd %%xmm6, %%xmm2 \n\t" // xmm2 = alpha12 * ( beta20 beta21 ) "mulpd %%xmm7, %%xmm3 \n\t" // xmm3 = alpha13 * ( beta30 beta31 ) "addpd %%xmm3, %%xmm2 \n\t" // xmm2 += xmm3; "subpd %%xmm2, %%xmm5 \n\t" // xmm5 -= xmm2 "mulpd %%xmm1, %%xmm5 \n\t" // xmm5 *= (1/alpha11); " \n\t" "movapd %%xmm5, 1 * 16(%%ebx) \n\t" // store ( beta10 beta11 ) = xmm5 "movlpd %%xmm5, (%%ecx) \n\t" // store ( gamma10 ) = xmm5[0] "movhpd %%xmm5, (%%ecx,%%esi) \n\t" // store ( gamma11 ) = xmm5[1] "subl %%edi, %%ecx \n\t" // c11 -= rs_c " \n\t" " \n\t" " \n\t" " \n\t" // iteration 3 " \n\t" "movddup (0+0*4)*8(%%eax), %%xmm0 \n\t" // load xmm0 = (1/alpha00) "movddup (0+1*4)*8(%%eax), %%xmm1 \n\t" // load xmm1 = alpha01 "movddup (0+2*4)*8(%%eax), %%xmm2 \n\t" // load xmm2 = alpha02 "movddup (0+3*4)*8(%%eax), %%xmm3 \n\t" // load xmm3 = alpha03 " \n\t" "mulpd %%xmm5, %%xmm1 \n\t" // xmm1 = alpha01 * ( beta10 beta11 ) "mulpd %%xmm6, %%xmm2 \n\t" // xmm2 = alpha02 * ( beta20 beta21 ) "mulpd %%xmm7, %%xmm3 \n\t" // xmm3 = alpha03 * ( beta30 beta31 ) "addpd %%xmm2, %%xmm1 \n\t" // xmm1 += xmm2; "addpd %%xmm3, %%xmm1 \n\t" // xmm1 += xmm3; "subpd %%xmm1, %%xmm4 \n\t" // xmm4 -= xmm1 "mulpd %%xmm0, %%xmm4 \n\t" // xmm4 *= (1/alpha00); " \n\t" "movapd %%xmm4, 0 * 16(%%ebx) \n\t" // store ( beta00 beta01 ) = xmm4 "movlpd %%xmm4, (%%ecx) \n\t" // store ( gamma00 ) = xmm4[0] "movhpd %%xmm4, (%%ecx,%%esi) \n\t" // store ( gamma01 ) = xmm4[1] " \n\t" " \n\t" " \n\t" ".UPDATEBD11: \n\t" " \n\t" " \n\t" "movl %5, %%edx \n\t" " \n\t" "movddup %%xmm4, %%xmm0 \n\t" "movddup %%xmm5, %%xmm1 \n\t" "movddup %%xmm6, %%xmm2 \n\t" "movddup %%xmm7, %%xmm3 \n\t" " \n\t" "unpckhpd %%xmm4, %%xmm4 \n\t" "unpckhpd %%xmm5, %%xmm5 \n\t" "unpckhpd %%xmm6, %%xmm6 \n\t" "unpckhpd %%xmm7, %%xmm7 \n\t" " \n\t" "movapd %%xmm0, 0 * 16(%%edx) \n\t" "movapd %%xmm4, 1 * 16(%%edx) \n\t" "movapd %%xmm1, 2 * 16(%%edx) \n\t" "movapd %%xmm5, 3 * 16(%%edx) \n\t" "movapd %%xmm2, 4 * 16(%%edx) \n\t" "movapd %%xmm6, 5 * 16(%%edx) \n\t" "movapd %%xmm3, 6 * 16(%%edx) \n\t" "movapd %%xmm7, 7 * 16(%%edx) \n\t" " \n\t" " \n\t" : // output operands (none) : // input operands "m" (k_iter), "m" (k_left), "m" (a12), "m" (a11), "m" (bd21), "m" (bd11), "m" (b11), "m" (c11), "m" (rs_c), "m" (cs_c), "m" (alpha) : // register clobber list "eax", "ebx", "ecx", "edx", "esi", "edi", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "memory" ); } void bli_cgemmtrsm_u_opt_d4x2( dim_t k, scomplex* restrict alpha, scomplex* restrict a12, scomplex* restrict a11, scomplex* restrict bd21, scomplex* restrict bd11, scomplex* restrict b11, scomplex* restrict c11, inc_t rs_c, inc_t cs_c, scomplex* restrict a_next, scomplex* restrict b_next ) { bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); } void bli_zgemmtrsm_u_opt_d4x2( dim_t k, dcomplex* restrict alpha, dcomplex* restrict a12, dcomplex* restrict a11, dcomplex* restrict bd21, dcomplex* restrict bd11, dcomplex* restrict b11, dcomplex* restrict c11, inc_t rs_c, inc_t cs_c, dcomplex* restrict a_next, dcomplex* restrict b_next ) { bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); } blis-0.6.1/kernels/old/x86/3/bli_trsm_l_opt_d4x2.c000066400000000000000000000226171360743507500215160ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" void bli_strsm_l_opt_d4x2( float* restrict a11, float* restrict b11, float* restrict bd11, float* restrict c11, inc_t rs_c, inc_t cs_c ) { bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); } void bli_dtrsm_l_opt_d4x2( double* restrict a11, double* restrict b11, double* restrict bd11, double* restrict c11, inc_t rs_c, inc_t cs_c ) { __asm__ volatile ( " \n\t" "movl %1, %%ebx \n\t" // load address of b11. " \n\t" "movapd 0 * 16(%%ebx), %%xmm4 \n\t" // load xmm4 = ( beta00 beta01 ) "movapd 1 * 16(%%ebx), %%xmm5 \n\t" // load xmm5 = ( beta10 beta11 ) "movapd 2 * 16(%%ebx), %%xmm6 \n\t" // load xmm6 = ( beta20 beta21 ) "movapd 3 * 16(%%ebx), %%xmm7 \n\t" // load xmm7 = ( beta30 beta31 ) " \n\t" " \n\t" "movl %0, %%eax \n\t" // load address of a11 "movl %3, %%ecx \n\t" // load address of c11 " \n\t" "movl %4, %%edi \n\t" // load rs_c "movl %5, %%esi \n\t" // load cs_c "sall $3, %%edi \n\t" // rs_c *= sizeof( double ) "sall $3, %%esi \n\t" // cs_c *= sizeof( double ) " \n\t" " \n\t" " \n\t" " \n\t" // iteration 0 " \n\t" "movddup (0+0*4)*8(%%eax), %%xmm0 \n\t" // load xmm0 = (1/alpha00) " \n\t" "mulpd %%xmm0, %%xmm4 \n\t" // xmm4 *= (1/alpha00); " \n\t" "movapd %%xmm4, 0 * 16(%%ebx) \n\t" // store ( beta00 beta01 ) = xmm4 "movlpd %%xmm4, (%%ecx) \n\t" // store ( gamma00 ) = xmm4[0] "movhpd %%xmm4, (%%ecx,%%esi) \n\t" // store ( gamma01 ) = xmm4[1] "addl %%edi, %%ecx \n\t" // c11 += rs_c " \n\t" " \n\t" " \n\t" " \n\t" // iteration 1 " \n\t" "movddup (1+0*4)*8(%%eax), %%xmm0 \n\t" // load xmm0 = alpha10 "movddup (1+1*4)*8(%%eax), %%xmm1 \n\t" // load xmm1 = (1/alpha11) " \n\t" "mulpd %%xmm4, %%xmm0 \n\t" // xmm0 = alpha10 * ( beta00 beta01 ) "subpd %%xmm0, %%xmm5 \n\t" // xmm5 -= xmm0 "mulpd %%xmm1, %%xmm5 \n\t" // xmm5 *= (1/alpha11); " \n\t" "movapd %%xmm5, 1 * 16(%%ebx) \n\t" // store ( beta10 beta11 ) = xmm5 "movlpd %%xmm5, (%%ecx) \n\t" // store ( gamma10 ) = xmm5[0] "movhpd %%xmm5, (%%ecx,%%esi) \n\t" // store ( gamma11 ) = xmm5[1] "addl %%edi, %%ecx \n\t" // c11 += rs_c " \n\t" " \n\t" " \n\t" " \n\t" // iteration 2 " \n\t" "movddup (2+0*4)*8(%%eax), %%xmm0 \n\t" // load xmm0 = alpha20 "movddup (2+1*4)*8(%%eax), %%xmm1 \n\t" // load xmm1 = alpha21 "movddup (2+2*4)*8(%%eax), %%xmm2 \n\t" // load xmm2 = (1/alpha22) " \n\t" "mulpd %%xmm4, %%xmm0 \n\t" // xmm0 = alpha20 * ( beta00 beta01 ) "mulpd %%xmm5, %%xmm1 \n\t" // xmm1 = alpha21 * ( beta10 beta11 ) "addpd %%xmm1, %%xmm0 \n\t" // xmm0 += xmm1; "subpd %%xmm0, %%xmm6 \n\t" // xmm6 -= xmm0 "mulpd %%xmm2, %%xmm6 \n\t" // xmm6 *= (1/alpha22); " \n\t" "movapd %%xmm6, 2 * 16(%%ebx) \n\t" // store ( beta20 beta21 ) = xmm6 "movlpd %%xmm6, (%%ecx) \n\t" // store ( gamma20 ) = xmm6[0] "movhpd %%xmm6, (%%ecx,%%esi) \n\t" // store ( gamma21 ) = xmm6[1] "addl %%edi, %%ecx \n\t" // c11 += rs_c " \n\t" " \n\t" " \n\t" " \n\t" // iteration 3 " \n\t" "movddup (3+0*4)*8(%%eax), %%xmm0 \n\t" // load xmm0 = alpha30 "movddup (3+1*4)*8(%%eax), %%xmm1 \n\t" // load xmm1 = alpha31 "movddup (3+2*4)*8(%%eax), %%xmm2 \n\t" // load xmm2 = alpha32 "movddup (3+3*4)*8(%%eax), %%xmm3 \n\t" // load xmm3 = (1/alpha33) " \n\t" "mulpd %%xmm4, %%xmm0 \n\t" // xmm0 = alpha30 * ( beta00 beta01 ) "mulpd %%xmm5, %%xmm1 \n\t" // xmm1 = alpha31 * ( beta10 beta11 ) "mulpd %%xmm6, %%xmm2 \n\t" // xmm2 = alpha32 * ( beta20 beta21 ) "addpd %%xmm1, %%xmm0 \n\t" // xmm0 += xmm1 "addpd %%xmm2, %%xmm0 \n\t" // xmm0 += xmm2 "subpd %%xmm0, %%xmm7 \n\t" // xmm7 -= xmm0 "mulpd %%xmm3, %%xmm7 \n\t" // xmm7 *= (1/alpha33); " \n\t" "movapd %%xmm7, 3 * 16(%%ebx) \n\t" // store ( beta30 beta31 ) = xmm7 "movlpd %%xmm7, (%%ecx) \n\t" // store ( gamma30 ) = xmm7[0] "movhpd %%xmm7, (%%ecx,%%esi) \n\t" // store ( gamma31 ) = xmm7[1] " \n\t" " \n\t" " \n\t" ".UPDATEBD11: \n\t" " \n\t" " \n\t" "movl %2, %%edx \n\t" " \n\t" "movddup %%xmm4, %%xmm0 \n\t" "movddup %%xmm5, %%xmm1 \n\t" "movddup %%xmm6, %%xmm2 \n\t" "movddup %%xmm7, %%xmm3 \n\t" " \n\t" "unpckhpd %%xmm4, %%xmm4 \n\t" "unpckhpd %%xmm5, %%xmm5 \n\t" "unpckhpd %%xmm6, %%xmm6 \n\t" "unpckhpd %%xmm7, %%xmm7 \n\t" " \n\t" "movapd %%xmm0, 0 * 16(%%edx) \n\t" "movapd %%xmm4, 1 * 16(%%edx) \n\t" "movapd %%xmm1, 2 * 16(%%edx) \n\t" "movapd %%xmm5, 3 * 16(%%edx) \n\t" "movapd %%xmm2, 4 * 16(%%edx) \n\t" "movapd %%xmm6, 5 * 16(%%edx) \n\t" "movapd %%xmm3, 6 * 16(%%edx) \n\t" "movapd %%xmm7, 7 * 16(%%edx) \n\t" " \n\t" " \n\t" : // output operands (none) : // input operands "m" (a11), "m" (b11), "m" (bd11), "m" (c11), "m" (rs_c), "m" (cs_c) : // register clobber list "eax", "ebx", "ecx", "edx", "esi", "edi", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "memory" ); } void bli_ctrsm_l_opt_d4x2( scomplex* restrict a11, scomplex* restrict b11, scomplex* restrict bd11, scomplex* restrict c11, inc_t rs_c, inc_t cs_c ) { bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); } void bli_ztrsm_l_opt_d4x2( dcomplex* restrict a11, dcomplex* restrict b11, dcomplex* restrict bd11, dcomplex* restrict c11, inc_t rs_c, inc_t cs_c ) { bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); } blis-0.6.1/kernels/penryn/000077500000000000000000000000001360743507500154375ustar00rootroot00000000000000blis-0.6.1/kernels/penryn/1/000077500000000000000000000000001360743507500155775ustar00rootroot00000000000000blis-0.6.1/kernels/penryn/1/bli_axpyv_penryn_int.c000066400000000000000000000106701360743507500222110ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "pmmintrin.h" #include "blis.h" typedef union { __m128d v; double d[2]; } v2df_t; void bli_daxpyv_penryn_int ( conj_t conjx, dim_t n, double* restrict alpha, double* restrict x, inc_t incx, double* restrict y, inc_t incy, cntx_t* restrict cntx ) { double* restrict alpha_cast = alpha; double* restrict x_cast = x; double* restrict y_cast = y; dim_t i; const dim_t n_elem_per_reg = 2; const dim_t n_iter_unroll = 4; dim_t n_pre; dim_t n_run; dim_t n_left; double* restrict x1; double* restrict y1; double alpha1c, x1c; v2df_t alpha1v; v2df_t x1v, x2v, x3v, x4v; v2df_t y1v, y2v, y3v, y4v; bool_t use_ref = FALSE; if ( bli_zero_dim1( n ) ) return; n_pre = 0; // If there is anything that would interfere with our use of aligned // vector loads/stores, call the reference implementation. if ( incx != 1 || incy != 1 ) { use_ref = TRUE; } else if ( bli_is_unaligned_to( ( siz_t )x, 16 ) || bli_is_unaligned_to( ( siz_t )y, 16 ) ) { use_ref = TRUE; if ( bli_is_unaligned_to( ( siz_t )x, 16 ) && bli_is_unaligned_to( ( siz_t )y, 16 ) ) { use_ref = FALSE; n_pre = 1; } } // Call the reference implementation if needed. if ( use_ref == TRUE ) { daxpyv_ker_ft f = bli_cntx_get_l1v_ker_dt( BLIS_DOUBLE, BLIS_AXPYV_KER, cntx ); f ( conjx, n, alpha, x, incx, y, incy, cntx ); return; } n_run = ( n - n_pre ) / ( n_elem_per_reg * n_iter_unroll ); n_left = ( n - n_pre ) % ( n_elem_per_reg * n_iter_unroll ); alpha1c = *alpha_cast; x1 = x_cast; y1 = y_cast; if ( n_pre == 1 ) { x1c = *x1; *y1 += alpha1c * x1c; x1 += incx; y1 += incy; } alpha1v.v = _mm_loaddup_pd( ( double* )&alpha1c ); for ( i = 0; i < n_run; ++i ) { y1v.v = _mm_load_pd( ( double* )y1 ); x1v.v = _mm_load_pd( ( double* )x1 ); y1v.v += alpha1v.v * x1v.v; _mm_store_pd( ( double* )(y1 ), y1v.v ); y2v.v = _mm_load_pd( ( double* )(y1 + 2) ); x2v.v = _mm_load_pd( ( double* )(x1 + 2) ); y2v.v += alpha1v.v * x2v.v; _mm_store_pd( ( double* )(y1 + 2), y2v.v ); y3v.v = _mm_load_pd( ( double* )(y1 + 4) ); x3v.v = _mm_load_pd( ( double* )(x1 + 4) ); y3v.v += alpha1v.v * x3v.v; _mm_store_pd( ( double* )(y1 + 4), y3v.v ); y4v.v = _mm_load_pd( ( double* )(y1 + 6) ); x4v.v = _mm_load_pd( ( double* )(x1 + 6) ); y4v.v += alpha1v.v * x4v.v; _mm_store_pd( ( double* )(y1 + 6), y4v.v ); x1 += n_elem_per_reg * n_iter_unroll; y1 += n_elem_per_reg * n_iter_unroll; } if ( n_left > 0 ) { for ( i = 0; i < n_left; ++i ) { x1c = *x1; *y1 += alpha1c * x1c; x1 += incx; y1 += incy; } } } blis-0.6.1/kernels/penryn/1/bli_dotv_penryn_int.c000066400000000000000000000076341360743507500220240ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "pmmintrin.h" #include "blis.h" typedef union { __m128d v; double d[2]; } v2df_t; void bli_ddotv_penryn_int ( conj_t conjx, conj_t conjy, dim_t n, double* restrict x, inc_t incx, double* restrict y, inc_t incy, double* restrict rho, cntx_t* restrict cntx ) { double* restrict x_cast = x; double* restrict y_cast = y; double* restrict rho_cast = rho; dim_t i; dim_t n_pre; dim_t n_run; dim_t n_left; double* restrict x1; double* restrict y1; double rho1; double x1c, y1c; v2df_t rho1v; v2df_t x1v, y1v; bool_t use_ref = FALSE; // If the vector lengths are zero, set rho to zero and return. if ( bli_zero_dim1( n ) ) { PASTEMAC(d,set0s)( *rho_cast ); return; } n_pre = 0; // If there is anything that would interfere with our use of aligned // vector loads/stores, call the reference implementation. if ( incx != 1 || incy != 1 ) { use_ref = TRUE; } else if ( bli_is_unaligned_to( ( siz_t )x, 16 ) || bli_is_unaligned_to( ( siz_t )y, 16 ) ) { use_ref = TRUE; if ( bli_is_unaligned_to( ( siz_t )x, 16 ) && bli_is_unaligned_to( ( siz_t )y, 16 ) ) { use_ref = FALSE; n_pre = 1; } } // Call the reference implementation if needed. if ( use_ref == TRUE ) { ddotv_ker_ft f = bli_cntx_get_l1v_ker_dt( BLIS_DOUBLE, BLIS_DOTV_KER, cntx ); f ( conjx, conjy, n, x, incx, y, incy, rho, cntx ); return; } n_run = ( n - n_pre ) / 2; n_left = ( n - n_pre ) % 2; x1 = x_cast; y1 = y_cast; PASTEMAC(d,set0s)( rho1 ); if ( n_pre == 1 ) { x1c = *x1; y1c = *y1; rho1 += x1c * y1c; x1 += incx; y1 += incy; } rho1v.v = _mm_setzero_pd(); for ( i = 0; i < n_run; ++i ) { x1v.v = _mm_load_pd( ( double* )x1 ); y1v.v = _mm_load_pd( ( double* )y1 ); rho1v.v += x1v.v * y1v.v; //x1 += 2*incx; //y1 += 2*incy; x1 += 2; y1 += 2; } rho1 += rho1v.d[0] + rho1v.d[1]; if ( n_left > 0 ) { for ( i = 0; i < n_left; ++i ) { x1c = *x1; y1c = *y1; rho1 += x1c * y1c; x1 += incx; y1 += incy; } } PASTEMAC(d,copys)( rho1, *rho_cast ); } blis-0.6.1/kernels/penryn/1f/000077500000000000000000000000001360743507500157455ustar00rootroot00000000000000blis-0.6.1/kernels/penryn/1f/bli_axpy2v_penryn_int.c000066400000000000000000000153301360743507500224370ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "pmmintrin.h" #include "blis.h" typedef union { __m128d v; double d[2]; } v2df_t; void bli_daxpy2v_penryn_int ( conj_t conjx, conj_t conjy, dim_t n, double* restrict alpha, double* restrict beta, double* restrict x, inc_t incx, double* restrict y, inc_t incy, double* restrict z, inc_t incz, cntx_t* restrict cntx ) { double* restrict alpha_cast = alpha; double* restrict beta_cast = beta; double* restrict x_cast = x; double* restrict y_cast = y; double* restrict z_cast = z; dim_t i; const dim_t n_elem_per_reg = 2; const dim_t n_iter_unroll = 4; dim_t n_pre; dim_t n_run; dim_t n_left; double* restrict x1; double* restrict y1; double* restrict z1; double alphac, betac, x1c, y1c; v2df_t alphav, betav; v2df_t x1v, y1v, z1v; v2df_t x2v, y2v, z2v; bool_t use_ref = FALSE; if ( bli_zero_dim1( n ) ) return; n_pre = 0; // If there is anything that would interfere with our use of aligned // vector loads/stores, call the reference implementation. if ( incx != 1 || incy != 1 || incz != 1 ) { use_ref = TRUE; } else if ( bli_is_unaligned_to( ( siz_t )x, 16 ) || bli_is_unaligned_to( ( siz_t )y, 16 ) || bli_is_unaligned_to( ( siz_t )z, 16 ) ) { use_ref = TRUE; if ( bli_is_unaligned_to( ( siz_t )x, 16 ) && bli_is_unaligned_to( ( siz_t )y, 16 ) && bli_is_unaligned_to( ( siz_t )z, 16 ) ) { use_ref = FALSE; n_pre = 1; } } // Call the reference implementation if needed. if ( use_ref == TRUE ) { daxpy2v_ker_ft f = bli_cntx_get_l1f_ker_dt( BLIS_DOUBLE, BLIS_AXPY2V_KER, cntx ); f ( conjx, conjy, n, alpha, beta, x, incx, y, incy, z, incz, cntx ); return; } n_run = ( n - n_pre ) / ( n_elem_per_reg * n_iter_unroll ); n_left = ( n - n_pre ) % ( n_elem_per_reg * n_iter_unroll ); alphac = *alpha_cast; betac = *beta_cast; x1 = x_cast; y1 = y_cast; z1 = z_cast; if ( n_pre == 1 ) { x1c = *x1; y1c = *y1; *z1 += alphac * x1c + betac * y1c; x1 += incx; y1 += incy; z1 += incz; } alphav.v = _mm_loaddup_pd( ( double* )alpha_cast ); betav.v = _mm_loaddup_pd( ( double* )beta_cast ); for ( i = 0; i < n_run; ++i ) { /* z1v.v = _mm_load_pd( ( double* )z1 + 0*n_elem_per_reg ); x1v.v = _mm_load_pd( ( double* )x1 + 0*n_elem_per_reg ); y1v.v = _mm_load_pd( ( double* )y1 + 0*n_elem_per_reg ); z1v.v += alphav.v * x1v.v; z1v.v += betav.v * y1v.v; _mm_store_pd( ( double* )(z1 + 0*n_elem_per_reg ), z1v.v ); z1v.v = _mm_load_pd( ( double* )z1 + 1*n_elem_per_reg ); x1v.v = _mm_load_pd( ( double* )x1 + 1*n_elem_per_reg ); y1v.v = _mm_load_pd( ( double* )y1 + 1*n_elem_per_reg ); z1v.v += alphav.v * x1v.v; z1v.v += betav.v * y1v.v; _mm_store_pd( ( double* )(z1 + 1*n_elem_per_reg ), z1v.v ); */ /* z1v.v = _mm_load_pd( ( double* )z1 + 0*n_elem_per_reg ); x1v.v = _mm_load_pd( ( double* )x1 + 0*n_elem_per_reg ); y1v.v = _mm_load_pd( ( double* )y1 + 0*n_elem_per_reg ); z2v.v = _mm_load_pd( ( double* )z1 + 1*n_elem_per_reg ); x2v.v = _mm_load_pd( ( double* )x1 + 1*n_elem_per_reg ); y2v.v = _mm_load_pd( ( double* )y1 + 1*n_elem_per_reg ); z1v.v += alphav.v * x1v.v; z1v.v += betav.v * y1v.v; _mm_store_pd( ( double* )(z1 + 0*n_elem_per_reg ), z1v.v ); z2v.v += alphav.v * x2v.v; z2v.v += betav.v * y2v.v; _mm_store_pd( ( double* )(z1 + 1*n_elem_per_reg ), z2v.v ); */ z1v.v = _mm_load_pd( ( double* )z1 + 0*n_elem_per_reg ); x1v.v = _mm_load_pd( ( double* )x1 + 0*n_elem_per_reg ); y1v.v = _mm_load_pd( ( double* )y1 + 0*n_elem_per_reg ); z2v.v = _mm_load_pd( ( double* )z1 + 1*n_elem_per_reg ); x2v.v = _mm_load_pd( ( double* )x1 + 1*n_elem_per_reg ); y2v.v = _mm_load_pd( ( double* )y1 + 1*n_elem_per_reg ); z1v.v += alphav.v * x1v.v; z1v.v += betav.v * y1v.v; _mm_store_pd( ( double* )(z1 + 0*n_elem_per_reg ), z1v.v ); z1v.v = _mm_load_pd( ( double* )z1 + 2*n_elem_per_reg ); x1v.v = _mm_load_pd( ( double* )x1 + 2*n_elem_per_reg ); y1v.v = _mm_load_pd( ( double* )y1 + 2*n_elem_per_reg ); z2v.v += alphav.v * x2v.v; z2v.v += betav.v * y2v.v; _mm_store_pd( ( double* )(z1 + 1*n_elem_per_reg ), z2v.v ); z2v.v = _mm_load_pd( ( double* )z1 + 3*n_elem_per_reg ); x2v.v = _mm_load_pd( ( double* )x1 + 3*n_elem_per_reg ); y2v.v = _mm_load_pd( ( double* )y1 + 3*n_elem_per_reg ); z1v.v += alphav.v * x1v.v; z1v.v += betav.v * y1v.v; _mm_store_pd( ( double* )(z1 + 2*n_elem_per_reg ), z1v.v ); z2v.v += alphav.v * x2v.v; z2v.v += betav.v * y2v.v; _mm_store_pd( ( double* )(z1 + 3*n_elem_per_reg ), z2v.v ); x1 += n_elem_per_reg * n_iter_unroll; y1 += n_elem_per_reg * n_iter_unroll; z1 += n_elem_per_reg * n_iter_unroll; } if ( n_left > 0 ) { for ( i = 0; i < n_left; ++i ) { x1c = *x1; y1c = *y1; *z1 += alphac * x1c + betac * y1c; x1 += incx; y1 += incy; z1 += incz; } } } blis-0.6.1/kernels/penryn/1f/bli_axpyf_penryn_int.c000066400000000000000000000142611360743507500223370ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "pmmintrin.h" #include "blis.h" typedef union { __m128d v; double d[2]; } v2df_t; void bli_daxpyf_penryn_int ( conj_t conja, conj_t conjx, dim_t m, dim_t b_n, double* restrict alpha, double* restrict a, inc_t inca, inc_t lda, double* restrict x, inc_t incx, double* restrict y, inc_t incy, cntx_t* restrict cntx ) { double* restrict alpha_cast = alpha; double* restrict a_cast = a; double* restrict x_cast = x; double* restrict y_cast = y; dim_t i; const dim_t n_elem_per_reg = 2; const dim_t n_iter_unroll = 2; dim_t m_pre; dim_t m_run; dim_t m_left; double* restrict a0; double* restrict a1; double* restrict a2; double* restrict a3; double* restrict y0; double a0c, a1c, a2c, a3c; double chi0, chi1, chi2, chi3; v2df_t a00v, a01v, a02v, a03v, y0v; v2df_t a10v, a11v, a12v, a13v, y1v; v2df_t chi0v, chi1v, chi2v, chi3v; bool_t use_ref = FALSE; if ( bli_zero_dim2( m, b_n ) ) return; m_pre = 0; // If there is anything that would interfere with our use of aligned // vector loads/stores, call the reference implementation. if ( b_n < bli_cntx_get_blksz_def_dt( BLIS_DOUBLE, BLIS_AF, cntx ) ) { use_ref = TRUE; } else if ( inca != 1 || incx != 1 || incy != 1 || bli_is_unaligned_to( ( siz_t )(lda*sizeof(double)), 16 ) ) { use_ref = TRUE; } else if ( bli_is_unaligned_to( ( siz_t )a, 16 ) || bli_is_unaligned_to( ( siz_t )y, 16 ) ) { use_ref = TRUE; if ( bli_is_unaligned_to( ( siz_t )a, 16 ) && bli_is_unaligned_to( ( siz_t )y, 16 ) ) { use_ref = FALSE; m_pre = 1; } } // Call the reference implementation if needed. if ( use_ref == TRUE ) { daxpyf_ker_ft f = bli_cntx_get_l1f_ker_dt( BLIS_DOUBLE, BLIS_AXPYF_KER, cntx ); f ( conja, conjx, m, b_n, alpha_cast, a_cast, inca, lda, x_cast, incx, y_cast, incy, cntx ); return; } m_run = ( m - m_pre ) / ( n_elem_per_reg * n_iter_unroll ); m_left = ( m - m_pre ) % ( n_elem_per_reg * n_iter_unroll ); a0 = a_cast + 0*lda; a1 = a_cast + 1*lda; a2 = a_cast + 2*lda; a3 = a_cast + 3*lda; y0 = y_cast; chi0 = *(x_cast + 0*incx); chi1 = *(x_cast + 1*incx); chi2 = *(x_cast + 2*incx); chi3 = *(x_cast + 3*incx); PASTEMAC2(d,d,scals)( *alpha_cast, chi0 ); PASTEMAC2(d,d,scals)( *alpha_cast, chi1 ); PASTEMAC2(d,d,scals)( *alpha_cast, chi2 ); PASTEMAC2(d,d,scals)( *alpha_cast, chi3 ); if ( m_pre == 1 ) { a0c = *a0; a1c = *a1; a2c = *a2; a3c = *a3; *y0 += chi0 * a0c + chi1 * a1c + chi2 * a2c + chi3 * a3c; a0 += inca; a1 += inca; a2 += inca; a3 += inca; y0 += incy; } chi0v.v = _mm_loaddup_pd( ( double* )&chi0 ); chi1v.v = _mm_loaddup_pd( ( double* )&chi1 ); chi2v.v = _mm_loaddup_pd( ( double* )&chi2 ); chi3v.v = _mm_loaddup_pd( ( double* )&chi3 ); for ( i = 0; i < m_run; ++i ) { y0v.v = _mm_load_pd( ( double* )(y0 + 0*n_elem_per_reg) ); a00v.v = _mm_load_pd( ( double* )(a0 + 0*n_elem_per_reg) ); a01v.v = _mm_load_pd( ( double* )(a1 + 0*n_elem_per_reg) ); y0v.v += chi0v.v * a00v.v; y0v.v += chi1v.v * a01v.v; a02v.v = _mm_load_pd( ( double* )(a2 + 0*n_elem_per_reg) ); a03v.v = _mm_load_pd( ( double* )(a3 + 0*n_elem_per_reg) ); y0v.v += chi2v.v * a02v.v; y0v.v += chi3v.v * a03v.v; _mm_store_pd( ( double* )(y0 + 0*n_elem_per_reg), y0v.v ); y1v.v = _mm_load_pd( ( double* )(y0 + 1*n_elem_per_reg) ); a10v.v = _mm_load_pd( ( double* )(a0 + 1*n_elem_per_reg) ); a11v.v = _mm_load_pd( ( double* )(a1 + 1*n_elem_per_reg) ); y1v.v += chi0v.v * a10v.v; y1v.v += chi1v.v * a11v.v; a12v.v = _mm_load_pd( ( double* )(a2 + 1*n_elem_per_reg) ); a13v.v = _mm_load_pd( ( double* )(a3 + 1*n_elem_per_reg) ); y1v.v += chi2v.v * a12v.v; y1v.v += chi3v.v * a13v.v; _mm_store_pd( ( double* )(y0 + 1*n_elem_per_reg), y1v.v ); a0 += n_elem_per_reg * n_iter_unroll; a1 += n_elem_per_reg * n_iter_unroll; a2 += n_elem_per_reg * n_iter_unroll; a3 += n_elem_per_reg * n_iter_unroll; y0 += n_elem_per_reg * n_iter_unroll; } if ( m_left > 0 ) { for ( i = 0; i < m_left; ++i ) { a0c = *a0; a1c = *a1; a2c = *a2; a3c = *a3; *y0 += chi0 * a0c + chi1 * a1c + chi2 * a2c + chi3 * a3c; a0 += inca; a1 += inca; a2 += inca; a3 += inca; y0 += incy; } } } blis-0.6.1/kernels/penryn/1f/bli_dotaxpyv_penryn_int.c000066400000000000000000000116111360743507500230620ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "pmmintrin.h" #include "blis.h" typedef union { __m128d v; double d[2]; } v2df_t; void bli_ddotaxpyv_penryn_int ( conj_t conjxt, conj_t conjx, conj_t conjy, dim_t n, double* restrict alpha, double* restrict x, inc_t incx, double* restrict y, inc_t incy, double* restrict rho, double* restrict z, inc_t incz, cntx_t* restrict cntx ) { double* restrict alpha_cast = alpha; double* restrict x_cast = x; double* restrict y_cast = y; double* restrict rho_cast = rho; double* restrict z_cast = z; dim_t n_pre; dim_t n_run; dim_t n_left; double* restrict chi1; double* restrict psi1; double* restrict zeta1; double alpha1c, chi1c, psi1c, rho1c; dim_t i; //inc_t stepx, stepy, stepz; v2df_t alphav, rhov; v2df_t x1v, y1v, z1v; bool_t use_ref = FALSE; // If the vector lengths are zero, set rho to zero and return. if ( bli_zero_dim1( n ) ) { PASTEMAC(d,set0s)( *rho_cast ); return; } n_pre = 0; // If there is anything that would interfere with our use of aligned // vector loads/stores, call the reference implementation. if ( incx != 1 || incy != 1 || incz != 1 ) { use_ref = TRUE; } else if ( bli_is_unaligned_to( ( siz_t )x, 16 ) || bli_is_unaligned_to( ( siz_t )y, 16 ) || bli_is_unaligned_to( ( siz_t )z, 16 ) ) { use_ref = TRUE; if ( bli_is_unaligned_to( ( siz_t )x, 16 ) && bli_is_unaligned_to( ( siz_t )y, 16 ) && bli_is_unaligned_to( ( siz_t )z, 16 ) ) { use_ref = FALSE; n_pre = 1; } } // Call the reference implementation if needed. if ( use_ref == TRUE ) { ddotaxpyv_ker_ft f = bli_cntx_get_l1f_ker_dt( BLIS_DOUBLE, BLIS_DOTAXPYV_KER, cntx ); f ( conjxt, conjx, conjy, n, alpha, x, incx, y, incy, rho, z, incz, cntx ); return; } n_run = ( n - n_pre ) / ( 2 * 1 ); n_left = ( n - n_pre ) % ( 2 * 1 ); //stepx = 2 * incx; //stepy = 2 * incy; //stepz = 2 * incz; PASTEMAC(d,set0s)( rho1c ); alpha1c = *alpha_cast; chi1 = x_cast; psi1 = y_cast; zeta1 = z_cast; if ( n_pre == 1 ) { chi1c = *chi1; psi1c = *psi1; rho1c += chi1c * psi1c; *zeta1 += alpha1c * chi1c; chi1 += incx; psi1 += incy; zeta1 += incz; } rhov.v = _mm_setzero_pd(); alphav.v = _mm_loaddup_pd( ( double* )alpha_cast ); for ( i = 0; i < n_run; ++i ) { x1v.v = _mm_load_pd( ( double* )chi1 ); y1v.v = _mm_load_pd( ( double* )psi1 ); z1v.v = _mm_load_pd( ( double* )zeta1 ); //y1v.v = _mm_setr_pd( *psi1, *(psi1 + incy) ); //z1v.v = _mm_setr_pd( *zeta1, *(zeta1 + incz) ); rhov.v += x1v.v * y1v.v; z1v.v += alphav.v * x1v.v; _mm_store_pd( ( double* )zeta1, z1v.v ); //chi1 += stepx; //psi1 += stepy; //zeta1 += stepz; chi1 += 2; psi1 += 2; zeta1 += 2; } if ( n_left > 0 ) { for ( i = 0; i < n_left; ++i ) { chi1c = *chi1; psi1c = *psi1; rho1c += chi1c * psi1c; *zeta1 += alpha1c * chi1c; chi1 += incx; psi1 += incy; zeta1 += incz; } } rho1c += rhov.d[0] + rhov.d[1]; *rho_cast = rho1c; } blis-0.6.1/kernels/penryn/1f/bli_dotxaxpyf_penryn_int.c000066400000000000000000000225101360743507500232320ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "pmmintrin.h" #include "blis.h" typedef union { __m128d v; double d[2]; } v2df_t; void bli_ddotxaxpyf_penryn_int ( conj_t conjat, conj_t conja, conj_t conjw, conj_t conjx, dim_t m, dim_t b_n, double* restrict alpha, double* restrict a, inc_t inca, inc_t lda, double* restrict w, inc_t incw, double* restrict x, inc_t incx, double* restrict beta, double* restrict y, inc_t incy, double* restrict z, inc_t incz, cntx_t* restrict cntx ) { double* restrict alpha_cast = alpha; double* restrict beta_cast = beta; double* restrict a_cast = a; double* restrict w_cast = w; double* restrict x_cast = x; double* restrict y_cast = y; double* restrict z_cast = z; dim_t i; const dim_t n_elem_per_reg = 2; const dim_t n_iter_unroll = 2; dim_t m_pre; dim_t m_run; dim_t m_left; double* restrict a0; double* restrict a1; double* restrict a2; double* restrict a3; double* restrict w1; double* restrict z1; double rho0, rho1, rho2, rho3; double chi0, chi1, chi2, chi3; double a0c, a1c, a2c, a3c, w1c, z1c; v2df_t rho0v, rho1v, rho2v, rho3v; v2df_t chi0v, chi1v, chi2v, chi3v; //v2df_t a0v, a1v, a2v, a3v, w1v, z1v; v2df_t a00v, a01v, a02v, a03v; v2df_t a10v, a11v, a12v, a13v; v2df_t w1v, z1v; v2df_t w2v, z2v; v2df_t psi0v, psi1v, betav, alphav; bool_t use_ref = FALSE; if ( bli_zero_dim1( b_n ) ) return; // If the vector lengths are zero, scale y by beta and return. if ( bli_zero_dim1( m ) ) { dscalv_ker_ft f = bli_cntx_get_l1v_ker_dt( BLIS_DOUBLE, BLIS_SCALV_KER, cntx ); f ( BLIS_NO_CONJUGATE, b_n, beta, y, incy, cntx ); return; } m_pre = 0; // If there is anything that would interfere with our use of aligned // vector loads/stores, call the reference implementation. if ( b_n < bli_cntx_get_blksz_def_dt( BLIS_DOUBLE, BLIS_XF, cntx ) ) { use_ref = TRUE; } else if ( inca != 1 || incw != 1 || incx != 1 || incy != 1 || incz != 1 || bli_is_unaligned_to( ( siz_t )(lda*sizeof(double)), 16 ) ) { use_ref = TRUE; } else if ( bli_is_unaligned_to( ( siz_t )a, 16 ) || bli_is_unaligned_to( ( siz_t )w, 16 ) || bli_is_unaligned_to( ( siz_t )z, 16 ) || bli_is_unaligned_to( ( siz_t )y, 16 ) ) { use_ref = TRUE; if ( bli_is_unaligned_to( ( siz_t )a, 16 ) && bli_is_unaligned_to( ( siz_t )w, 16 ) && bli_is_unaligned_to( ( siz_t )z, 16 ) && bli_is_aligned_to( ( siz_t )y, 16 ) ) // Note: y is not affected by a, w, and z being unaligned. { use_ref = FALSE; m_pre = 1; } } if ( use_ref == TRUE ) { ddotxaxpyf_ker_ft f = bli_cntx_get_l1f_ker_dt( BLIS_DOUBLE, BLIS_DOTXAXPYF_KER, cntx ); f ( conjat, conja, conjw, conjx, m, b_n, alpha_cast, a_cast, inca, lda, w_cast, incw, x_cast, incx, beta_cast, y_cast, incy, z_cast, incz, cntx ); return; } m_run = ( m - m_pre ) / ( n_elem_per_reg * n_iter_unroll ); m_left = ( m - m_pre ) % ( n_elem_per_reg * n_iter_unroll ); a0 = a_cast + 0*lda; a1 = a_cast + 1*lda; a2 = a_cast + 2*lda; a3 = a_cast + 3*lda; w1 = w_cast; z1 = z_cast; chi0 = *(x_cast + 0*incx); chi1 = *(x_cast + 1*incx); chi2 = *(x_cast + 2*incx); chi3 = *(x_cast + 3*incx); PASTEMAC2(d,d,scals)( *alpha_cast, chi0 ); PASTEMAC2(d,d,scals)( *alpha_cast, chi1 ); PASTEMAC2(d,d,scals)( *alpha_cast, chi2 ); PASTEMAC2(d,d,scals)( *alpha_cast, chi3 ); PASTEMAC(d,set0s)( rho0 ); PASTEMAC(d,set0s)( rho1 ); PASTEMAC(d,set0s)( rho2 ); PASTEMAC(d,set0s)( rho3 ); if ( m_pre == 1 ) { a0c = *a0; a1c = *a1; a2c = *a2; a3c = *a3; w1c = *w1; z1c = *z1; rho0 += a0c * w1c; rho1 += a1c * w1c; rho2 += a2c * w1c; rho3 += a3c * w1c; z1c += chi0 * a0c + chi1 * a1c + chi2 * a2c + chi3 * a3c; *z1 = z1c; a0 += inca; a1 += inca; a2 += inca; a3 += inca; w1 += incw; z1 += incz; } rho0v.v = _mm_setzero_pd(); rho1v.v = _mm_setzero_pd(); rho2v.v = _mm_setzero_pd(); rho3v.v = _mm_setzero_pd(); chi0v.v = _mm_loaddup_pd( ( double* )&chi0 ); chi1v.v = _mm_loaddup_pd( ( double* )&chi1 ); chi2v.v = _mm_loaddup_pd( ( double* )&chi2 ); chi3v.v = _mm_loaddup_pd( ( double* )&chi3 ); /* y = beta * y + alpha * A^T w; */ \ /* z = z + alpha * A x; */ \ //for ( i = 0; i < m_run; ++i ) for ( i = m_run; i != 0; --i ) { z1v.v = _mm_load_pd( ( double* )(z1 + 0*n_elem_per_reg) ); w1v.v = _mm_load_pd( ( double* )(w1 + 0*n_elem_per_reg) ); a00v.v = _mm_load_pd( ( double* )(a0 + 0*n_elem_per_reg) ); //a01v.v = _mm_load_pd( ( double* )(a1 + 0*n_elem_per_reg) ); a01v.v = _mm_load_pd( ( double* )(a0 + 1*lda + 0*n_elem_per_reg) ); rho0v.v += a00v.v * w1v.v; rho1v.v += a01v.v * w1v.v; z1v.v += chi0v.v * a00v.v; z1v.v += chi1v.v * a01v.v; a02v.v = _mm_load_pd( ( double* )(a2 + 0*n_elem_per_reg) ); //a03v.v = _mm_load_pd( ( double* )(a3 + 0*n_elem_per_reg) ); a03v.v = _mm_load_pd( ( double* )(a2 + 1*lda + 0*n_elem_per_reg) ); rho2v.v += a02v.v * w1v.v; rho3v.v += a03v.v * w1v.v; z1v.v += chi2v.v * a02v.v; z1v.v += chi3v.v * a03v.v; _mm_store_pd( ( double* )(z1 + 0*n_elem_per_reg), z1v.v ); z2v.v = _mm_load_pd( ( double* )(z1 + 1*n_elem_per_reg) ); w2v.v = _mm_load_pd( ( double* )(w1 + 1*n_elem_per_reg) ); a10v.v = _mm_load_pd( ( double* )(a0 + 1*n_elem_per_reg) ); //a11v.v = _mm_load_pd( ( double* )(a1 + 1*n_elem_per_reg) ); a11v.v = _mm_load_pd( ( double* )(a0 + 1*lda + 1*n_elem_per_reg) ); rho0v.v += a10v.v * w2v.v; rho1v.v += a11v.v * w2v.v; z2v.v += chi0v.v * a10v.v; z2v.v += chi1v.v * a11v.v; a12v.v = _mm_load_pd( ( double* )(a2 + 1*n_elem_per_reg) ); //a13v.v = _mm_load_pd( ( double* )(a3 + 1*n_elem_per_reg) ); a13v.v = _mm_load_pd( ( double* )(a2 + 1*lda + 1*n_elem_per_reg) ); rho2v.v += a12v.v * w2v.v; rho3v.v += a13v.v * w2v.v; z2v.v += chi2v.v * a12v.v; z2v.v += chi3v.v * a13v.v; _mm_store_pd( ( double* )(z1 + 1*n_elem_per_reg), z2v.v ); a0 += n_elem_per_reg * n_iter_unroll; //a1 += n_elem_per_reg * n_iter_unroll; a2 += n_elem_per_reg * n_iter_unroll; //a3 += n_elem_per_reg * n_iter_unroll; w1 += n_elem_per_reg * n_iter_unroll; z1 += n_elem_per_reg * n_iter_unroll; } rho0 += rho0v.d[0] + rho0v.d[1]; rho1 += rho1v.d[0] + rho1v.d[1]; rho2 += rho2v.d[0] + rho2v.d[1]; rho3 += rho3v.d[0] + rho3v.d[1]; if ( m_left > 0 ) { for ( i = 0; i < m_left; ++i ) { a0c = *a0; //a1c = *a1; a1c = *(a0 + lda); a2c = *a2; //a3c = *a3; a3c = *(a2 + lda); w1c = *w1; z1c = *z1; rho0 += a0c * w1c; rho1 += a1c * w1c; rho2 += a2c * w1c; rho3 += a3c * w1c; z1c += chi0 * a0c + chi1 * a1c + chi2 * a2c + chi3 * a3c; *z1 = z1c; a0 += inca; //a1 += inca; a2 += inca; //a3 += inca; w1 += incw; z1 += incz; } } rho0v.d[0] = rho0; rho0v.d[1] = rho1; rho1v.d[0] = rho2; rho1v.d[1] = rho3; betav.v = _mm_loaddup_pd( ( double* ) beta_cast ); alphav.v = _mm_loaddup_pd( ( double* ) alpha_cast ); psi0v.v = _mm_load_pd( ( double* )(y_cast + 0*n_elem_per_reg ) ); psi1v.v = _mm_load_pd( ( double* )(y_cast + 1*n_elem_per_reg ) ); psi0v.v = betav.v * psi0v.v + alphav.v * rho0v.v; psi1v.v = betav.v * psi1v.v + alphav.v * rho1v.v; _mm_store_pd( ( double* )(y_cast + 0*n_elem_per_reg ), psi0v.v ); _mm_store_pd( ( double* )(y_cast + 1*n_elem_per_reg ), psi1v.v ); } blis-0.6.1/kernels/penryn/1f/bli_dotxf_penryn_int.c000066400000000000000000000203251360743507500223320ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "pmmintrin.h" #include "blis.h" typedef union { __m128d v; double d[2]; } v2df_t; void bli_ddotxf_penryn_int ( conj_t conjat, conj_t conjx, dim_t m, dim_t b_n, double* restrict alpha, double* restrict a, inc_t inca, inc_t lda, double* restrict x, inc_t incx, double* restrict beta, double* restrict y, inc_t incy, cntx_t* restrict cntx ) { double* restrict alpha_cast = alpha; double* restrict beta_cast = beta; double* restrict a_cast = a; double* restrict x_cast = x; double* restrict y_cast = y; dim_t i; const dim_t n_elem_per_reg = 2; const dim_t n_iter_unroll = 4; dim_t m_pre; dim_t m_run; dim_t m_left; double* restrict x0; double* restrict x1; double* restrict x2; double* restrict x3; double* restrict y0; double rho0, rho1, rho2, rho3; double x0c, x1c, x2c, x3c, y0c; v2df_t rho0v, rho1v, rho2v, rho3v; v2df_t x0v, x1v, x2v, x3v, y0v, betav, alphav; bool_t use_ref = FALSE; if ( bli_zero_dim1( b_n ) ) return; // If the vector lengths are zero, scale r by beta and return. if ( bli_zero_dim1( m ) ) { dscalv_ker_ft f = bli_cntx_get_l1v_ker_dt( BLIS_DOUBLE, BLIS_SCALV_KER, cntx ); f ( BLIS_NO_CONJUGATE, b_n, beta_cast, y_cast, incy, cntx ); return; } m_pre = 0; // If there is anything that would interfere with our use of aligned // vector loads/stores, call the reference implementation. if ( b_n < bli_cntx_get_blksz_def_dt( BLIS_DOUBLE, BLIS_DF, cntx ) ) { use_ref = TRUE; } else if ( inca != 1 || incx != 1 || incy != 1 || bli_is_unaligned_to( ( siz_t )(lda*sizeof(double)), 16 ) ) { use_ref = TRUE; } else if ( bli_is_unaligned_to( ( siz_t )a, 16 ) || bli_is_unaligned_to( ( siz_t )x, 16 ) || bli_is_unaligned_to( ( siz_t )y, 16 ) ) { use_ref = TRUE; if ( bli_is_unaligned_to( ( siz_t )a, 16 ) && bli_is_unaligned_to( ( siz_t )x, 16 ) && bli_is_aligned_to( ( siz_t )y, 16 ) ) // Note: r is not affected by x and y being unaligned. { use_ref = FALSE; m_pre = 1; } } // Call the reference implementation if needed. if ( use_ref == TRUE ) { ddotxf_ker_ft f = bli_cntx_get_l1f_ker_dt( BLIS_DOUBLE, BLIS_DOTXF_KER, cntx ); f ( conjat, conjx, m, b_n, alpha_cast, a_cast, inca, lda, x_cast, incx, beta_cast, y_cast, incy, cntx ); return; } m_run = ( m - m_pre ) / ( n_elem_per_reg * n_iter_unroll ); m_left = ( m - m_pre ) % ( n_elem_per_reg * n_iter_unroll ); x0 = a_cast; x1 = a_cast + lda; x2 = a_cast + 2*lda; x3 = a_cast + 3*lda; y0 = x_cast; PASTEMAC(d,set0s)( rho0 ); PASTEMAC(d,set0s)( rho1 ); PASTEMAC(d,set0s)( rho2 ); PASTEMAC(d,set0s)( rho3 ); if ( m_pre == 1 ) { x0c = *x0; x1c = *x1; x2c = *x2; x3c = *x3; y0c = *y0; rho0 += x0c * y0c; rho1 += x1c * y0c; rho2 += x2c * y0c; rho3 += x3c * y0c; x0 += inca; x1 += inca; x2 += inca; x3 += inca; y0 += incx; } rho0v.v = _mm_setzero_pd(); rho1v.v = _mm_setzero_pd(); rho2v.v = _mm_setzero_pd(); rho3v.v = _mm_setzero_pd(); for ( i = 0; i < m_run; ++i ) { x0v.v = _mm_load_pd( ( double* )(x0 + 0*n_elem_per_reg) ); x1v.v = _mm_load_pd( ( double* )(x1 + 0*n_elem_per_reg) ); x2v.v = _mm_load_pd( ( double* )(x2 + 0*n_elem_per_reg) ); x3v.v = _mm_load_pd( ( double* )(x3 + 0*n_elem_per_reg) ); y0v.v = _mm_load_pd( ( double* )(y0 + 0*n_elem_per_reg) ); rho0v.v += x0v.v * y0v.v; rho1v.v += x1v.v * y0v.v; rho2v.v += x2v.v * y0v.v; rho3v.v += x3v.v * y0v.v; x0v.v = _mm_load_pd( ( double* )(x0 + 1*n_elem_per_reg) ); x1v.v = _mm_load_pd( ( double* )(x1 + 1*n_elem_per_reg) ); x2v.v = _mm_load_pd( ( double* )(x2 + 1*n_elem_per_reg) ); x3v.v = _mm_load_pd( ( double* )(x3 + 1*n_elem_per_reg) ); y0v.v = _mm_load_pd( ( double* )(y0 + 1*n_elem_per_reg) ); rho0v.v += x0v.v * y0v.v; rho1v.v += x1v.v * y0v.v; rho2v.v += x2v.v * y0v.v; rho3v.v += x3v.v * y0v.v; x0v.v = _mm_load_pd( ( double* )(x0 + 2*n_elem_per_reg) ); x1v.v = _mm_load_pd( ( double* )(x1 + 2*n_elem_per_reg) ); x2v.v = _mm_load_pd( ( double* )(x2 + 2*n_elem_per_reg) ); x3v.v = _mm_load_pd( ( double* )(x3 + 2*n_elem_per_reg) ); y0v.v = _mm_load_pd( ( double* )(y0 + 2*n_elem_per_reg) ); rho0v.v += x0v.v * y0v.v; rho1v.v += x1v.v * y0v.v; rho2v.v += x2v.v * y0v.v; rho3v.v += x3v.v * y0v.v; x0v.v = _mm_load_pd( ( double* )(x0 + 3*n_elem_per_reg) ); x1v.v = _mm_load_pd( ( double* )(x1 + 3*n_elem_per_reg) ); x2v.v = _mm_load_pd( ( double* )(x2 + 3*n_elem_per_reg) ); x3v.v = _mm_load_pd( ( double* )(x3 + 3*n_elem_per_reg) ); y0v.v = _mm_load_pd( ( double* )(y0 + 3*n_elem_per_reg) ); rho0v.v += x0v.v * y0v.v; rho1v.v += x1v.v * y0v.v; rho2v.v += x2v.v * y0v.v; rho3v.v += x3v.v * y0v.v; x0 += n_elem_per_reg * n_iter_unroll; x1 += n_elem_per_reg * n_iter_unroll; x2 += n_elem_per_reg * n_iter_unroll; x3 += n_elem_per_reg * n_iter_unroll; y0 += n_elem_per_reg * n_iter_unroll; } rho0 += rho0v.d[0] + rho0v.d[1]; rho1 += rho1v.d[0] + rho1v.d[1]; rho2 += rho2v.d[0] + rho2v.d[1]; rho3 += rho3v.d[0] + rho3v.d[1]; if ( m_left > 0 ) { for ( i = 0; i < m_left; ++i ) { x0c = *x0; x1c = *x1; x2c = *x2; x3c = *x3; y0c = *y0; rho0 += x0c * y0c; rho1 += x1c * y0c; rho2 += x2c * y0c; rho3 += x3c * y0c; x0 += inca; x1 += inca; x2 += inca; x3 += inca; y0 += incx; } } /* PASTEMAC2(d,d,scals)( *beta_cast, *(y_cast ) ); \ PASTEMAC2(d,d,scals)( *beta_cast, *(y_cast+1) ); \ PASTEMAC2(d,d,scals)( *beta_cast, *(y_cast+2) ); \ PASTEMAC2(d,d,scals)( *beta_cast, *(y_cast+3) ); \ PASTEMAC3(d,d,d,axpys)( *alpha_cast, rho1, *(y_cast ) ); \ PASTEMAC3(d,d,d,axpys)( *alpha_cast, rho2, *(y_cast+1) ); \ PASTEMAC3(d,d,d,axpys)( *alpha_cast, rho3, *(y_cast+2) ); \ PASTEMAC3(d,d,d,axpys)( *alpha_cast, rho4, *(y_cast+3) ); \ */ rho1v.d[0] = rho0; rho1v.d[1] = rho1; rho3v.d[0] = rho2; rho3v.d[1] = rho3; betav.v = _mm_loaddup_pd( ( double* ) beta_cast ); alphav.v = _mm_loaddup_pd( ( double* ) alpha_cast ); rho0v.v = _mm_load_pd( ( double* )(y_cast + 0*n_elem_per_reg) ); rho2v.v = _mm_load_pd( ( double* )(y_cast + 1*n_elem_per_reg) ); rho0v.v *= betav.v; rho2v.v *= betav.v; rho0v.v += alphav.v * rho1v.v; rho2v.v += alphav.v * rho3v.v; _mm_store_pd( ( double* )(y_cast + 0*n_elem_per_reg), rho0v.v ); _mm_store_pd( ( double* )(y_cast + 1*n_elem_per_reg), rho2v.v ); } blis-0.6.1/kernels/penryn/3/000077500000000000000000000000001360743507500156015ustar00rootroot00000000000000blis-0.6.1/kernels/penryn/3/bli_gemm_penryn_asm_d4x4.c000066400000000000000000001140231360743507500226170ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define BLIS_ASM_SYNTAX_ATT #include "bli_x86_asm_macros.h" void bli_sgemm_penryn_asm_8x4 ( dim_t k0, float* restrict alpha, float* restrict a, float* restrict b, float* restrict beta, float* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; begin_asm() mov(var(a), rax) // load address of a. mov(var(b), rbx) // load address of b. mov(var(b_next), r9) // load address of b_next. sub(imm(0-8*16), rax) // increment pointers to allow byte sub(imm(0-8*16), rbx) // offsets in the unrolled iterations. movaps(mem(rax, -8*16), xmm0) // initialize loop by pre-loading elements movaps(mem(rax, -7*16), xmm1) // of a and b. movaps(mem(rbx, -8*16), xmm2) mov(var(c), rcx) // load address of c mov(var(cs_c), rdi) // load cs_c lea(mem(, rdi, 4), rdi) // cs_c *= sizeof(float) mov(rdi, r12) // make a copy of cs_c (in bytes) lea(mem(rcx, rdi, 2), r10) // load address of c + 2*cs_c; prefetch(2, mem(r9, 0*4)) // prefetch b_next xorps(xmm3, xmm3) xorps(xmm4, xmm4) xorps(xmm5, xmm5) xorps(xmm6, xmm6) prefetch(2, mem(rcx, 6*4)) // prefetch c + 0*cs_c xorps(xmm8, xmm8) xorps(xmm9, xmm9) prefetch(2, mem(rcx, rdi, 1, 6*4)) // prefetch c + 1*cs_c xorps(xmm10, xmm10) xorps(xmm11, xmm11) prefetch(2, mem(r10, 6*4)) // prefetch c + 2*cs_c xorps(xmm12, xmm12) xorps(xmm13, xmm13) prefetch(2, mem(r10, rdi, 1, 6*4)) // prefetch c + 3*cs_c xorps(xmm14, xmm14) xorps(xmm15, xmm15) mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.SCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.SLOOPKITER) // MAIN LOOP prefetch(0, mem(rax, (4*35+1)*8)) addps(xmm6, xmm10) // iteration 0 addps(xmm3, xmm14) movaps(xmm2, xmm3) pshufd(imm(0x39), xmm2, xmm7) mulps(xmm0, xmm2) mulps(xmm1, xmm3) addps(xmm4, xmm11) addps(xmm5, xmm15) movaps(xmm7, xmm5) pshufd(imm(0x39), xmm7, xmm6) mulps(xmm0, xmm7) mulps(xmm1, xmm5) addps(xmm2, xmm8) movaps(mem(rbx, -7*16), xmm2) addps(xmm3, xmm12) movaps(xmm6, xmm3) pshufd(imm(0x39), xmm6, xmm4) mulps(xmm0, xmm6) mulps(xmm1, xmm3) addps(xmm7, xmm9) addps(xmm5, xmm13) movaps(xmm4, xmm5) mulps(xmm0, xmm4) movaps(mem(rax, -6*16), xmm0) mulps(xmm1, xmm5) movaps(mem(rax, -5*16), xmm1) addps(xmm6, xmm10) // iteration 1 addps(xmm3, xmm14) movaps(xmm2, xmm3) pshufd(imm(0x39), xmm2, xmm7) mulps(xmm0, xmm2) mulps(xmm1, xmm3) addps(xmm4, xmm11) addps(xmm5, xmm15) movaps(xmm7, xmm5) pshufd(imm(0x39), xmm7, xmm6) mulps(xmm0, xmm7) mulps(xmm1, xmm5) addps(xmm2, xmm8) movaps(mem(rbx, -6*16), xmm2) addps(xmm3, xmm12) movaps(xmm6, xmm3) pshufd(imm(0x39), xmm6, xmm4) mulps(xmm0, xmm6) mulps(xmm1, xmm3) addps(xmm7, xmm9) addps(xmm5, xmm13) movaps(xmm4, xmm5) mulps(xmm0, xmm4) movaps(mem(rax, -4*16), xmm0) mulps(xmm1, xmm5) movaps(mem(rax, -3*16), xmm1) addps(xmm6, xmm10) // iteration 2 addps(xmm3, xmm14) movaps(xmm2, xmm3) pshufd(imm(0x39), xmm2, xmm7) mulps(xmm0, xmm2) mulps(xmm1, xmm3) addps(xmm4, xmm11) addps(xmm5, xmm15) movaps(xmm7, xmm5) pshufd(imm(0x39), xmm7, xmm6) mulps(xmm0, xmm7) mulps(xmm1, xmm5) addps(xmm2, xmm8) movaps(mem(rbx, -5*16), xmm2) addps(xmm3, xmm12) movaps(xmm6, xmm3) pshufd(imm(0x39), xmm6, xmm4) mulps(xmm0, xmm6) mulps(xmm1, xmm3) addps(xmm7, xmm9) addps(xmm5, xmm13) movaps(xmm4, xmm5) mulps(xmm0, xmm4) movaps(mem(rax, -2*16), xmm0) mulps(xmm1, xmm5) movaps(mem(rax, -1*16), xmm1) addps(xmm6, xmm10) // iteration 3 addps(xmm3, xmm14) movaps(xmm2, xmm3) pshufd(imm(0x39), xmm2, xmm7) mulps(xmm0, xmm2) mulps(xmm1, xmm3) sub(imm(0-4*8*4), rax) // a += 4*8 (unroll x mr) addps(xmm4, xmm11) addps(xmm5, xmm15) movaps(xmm7, xmm5) pshufd(imm(0x39), xmm7, xmm6) mulps(xmm0, xmm7) mulps(xmm1, xmm5) sub(imm(0-4*4*4), r9) // b_next += 4*4 (unroll x nr) addps(xmm2, xmm8) movaps(mem(rbx, -4*16), xmm2) addps(xmm3, xmm12) movaps(xmm6, xmm3) pshufd(imm(0x39), xmm6, xmm4) mulps(xmm0, xmm6) mulps(xmm1, xmm3) sub(imm(0-4*4*4), rbx) // b += 4*4 (unroll x nr) addps(xmm7, xmm9) addps(xmm5, xmm13) movaps(xmm4, xmm5) mulps(xmm0, xmm4) movaps(mem(rax, -8*16), xmm0) mulps(xmm1, xmm5) movaps(mem(rax, -7*16), xmm1) prefetch(2, mem(r9, 0*4)) // prefetch b_next[0] prefetch(2, mem(r9, 16*4)) // prefetch b_next[16] dec(rsi) // i -= 1; jne(.SLOOPKITER) // iterate again if i != 0. label(.SCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.SPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.SLOOPKLEFT) // EDGE LOOP addps(xmm6, xmm10) // iteration 0 addps(xmm3, xmm14) movaps(xmm2, xmm3) pshufd(imm(0x39), xmm2, xmm7) mulps(xmm0, xmm2) mulps(xmm1, xmm3) addps(xmm4, xmm11) addps(xmm5, xmm15) movaps(xmm7, xmm5) pshufd(imm(0x39), xmm7, xmm6) mulps(xmm0, xmm7) mulps(xmm1, xmm5) addps(xmm2, xmm8) movaps(mem(rbx, -7*16), xmm2) addps(xmm3, xmm12) movaps(xmm6, xmm3) pshufd(imm(0x39), xmm6, xmm4) mulps(xmm0, xmm6) mulps(xmm1, xmm3) addps(xmm7, xmm9) addps(xmm5, xmm13) movaps(xmm4, xmm5) mulps(xmm0, xmm4) movaps(mem(rax, -6*16), xmm0) mulps(xmm1, xmm5) movaps(mem(rax, -5*16), xmm1) sub(imm(0-1*8*4), rax) // a += 8 (1 x mr) sub(imm(0-1*4*4), rbx) // b += 4 (1 x nr) dec(rsi) // i -= 1; jne(.SLOOPKLEFT) // iterate again if i != 0. label(.SPOSTACCUM) addps(xmm6, xmm10) addps(xmm3, xmm14) addps(xmm4, xmm11) addps(xmm5, xmm15) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta movss(mem(rax), xmm6) // load alpha to bottom 4 bytes of xmm6 movss(mem(rbx), xmm7) // load beta to bottom 4 bytes of xmm7 pshufd(imm(0x00), xmm6, xmm6) // populate xmm6 with four alphas pshufd(imm(0x00), xmm7, xmm7) // populate xmm7 with four betas mov(var(rs_c), rsi) // load rs_c mov(rsi, r8) // make a copy of rs_c lea(mem(, rsi, 4), rsi) // rsi = rs_c * sizeof(float) lea(mem(rsi, rsi, 2), r11) // r11 = 3*(rs_c * sizeof(float)) lea(mem(rcx, rsi, 4), rdx) // load address of c + 4*rs_c; // xmm8: xmm9: xmm10: xmm11: // ( ab00 ( ab01 ( ab02 ( ab03 // ab11 ab12 ab13 ab10 // ab22 ab23 ab20 ab21 // ab33 ) ab30 ) ab31 ) ab32 ) // // xmm12: xmm13: xmm14: xmm15: // ( ab40 ( ab41 ( ab42 ( ab43 // ab51 ab52 ab53 ab50 // ab62 ab63 ab60 ab61 // ab73 ) ab70 ) ab71 ) ab72 ) movaps(xmm9, xmm4) shufps(imm(0xd8), xmm8, xmm9) shufps(imm(0xd8), xmm11, xmm8) shufps(imm(0xd8), xmm10, xmm11) shufps(imm(0xd8), xmm4, xmm10) movaps(xmm8, xmm4) shufps(imm(0xd8), xmm10, xmm8) shufps(imm(0xd8), xmm4, xmm10) movaps(xmm9, xmm5) shufps(imm(0xd8), xmm11, xmm9) shufps(imm(0xd8), xmm5, xmm11) movaps(xmm13, xmm4) shufps(imm(0xd8), xmm12, xmm13) shufps(imm(0xd8), xmm15, xmm12) shufps(imm(0xd8), xmm14, xmm15) shufps(imm(0xd8), xmm4, xmm14) movaps(xmm12, xmm4) shufps(imm(0xd8), xmm14, xmm12) shufps(imm(0xd8), xmm4, xmm14) movaps(xmm13, xmm5) shufps(imm(0xd8), xmm15, xmm13) shufps(imm(0xd8), xmm5, xmm15) // xmm8: xmm9: xmm10: xmm11: // ( ab00 ( ab01 ( ab02 ( ab03 // ab10 ab11 ab12 ab13 // ab20 ab21 ab22 ab23 // ab30 ) ab31 ) ab32 ) ab33 ) // // xmm12: xmm13: xmm14: xmm15: // ( ab40 ( ab41 ( ab42 ( ab43 // ab50 ab51 ab52 ab53 // ab60 ab61 ab62 ab63 // ab70 ) ab71 ) ab72 ) ab73 ) // determine if // c % 16 == 0, AND // 8*cs_c % 16 == 0, AND // rs_c == 1 // ie: aligned, ldim aligned, and // column-stored cmp(imm(1), r8) // set ZF if rs_c == 1. sete(bl) // bl = ( ZF == 1 ? 1 : 0 ); test(imm(15), rcx) // set ZF if c & 16 is zero. setz(bh) // bh = ( ZF == 1 ? 1 : 0 ); test(imm(15), r12) // set ZF if (4*cs_c) & 16 is zero. setz(al) // al = ( ZF == 1 ? 1 : 0 ); // and(bl,bh) followed by // and(bh,al) will reveal result // now avoid loading C if beta == 0 xorpd(xmm0, xmm0) // set xmm0 to zero. ucomisd(xmm0, xmm7) // check if beta == 0. je(.SBETAZERO) // if ZF = 1, jump to beta == 0 case // check if aligned/column-stored and(bl, bh) // set ZF if bl & bh == 1. and(bh, al) // set ZF if bh & al == 1. jne(.SCOLSTORED) // jump to column storage case label(.SGENSTORED) movlps(mem(rcx), xmm0) // load c00 ~ c30 movhps(mem(rcx, rsi, 1), xmm0) movlps(mem(rcx, rsi, 2), xmm1) movhps(mem(rcx, r11, 1), xmm1) shufps(imm(0x88), xmm1, xmm0) mulps(xmm6, xmm8) // scale by alpha, mulps(xmm7, xmm0) // scale by beta, addps(xmm8, xmm0) // add the gemm result, movss(xmm0, mem(rcx)) // and store back to memory. pshufd(imm(0x39), xmm0, xmm1) movss(xmm1, mem(rcx, rsi, 1)) pshufd(imm(0x39), xmm1, xmm2) movss(xmm2, mem(rcx, rsi, 2)) pshufd(imm(0x39), xmm2, xmm3) movss(xmm3, mem(rcx, r11, 1)) add(rdi, rcx) movlps(mem(rdx), xmm0) // load c40 ~ c70 movhps(mem(rdx, rsi, 1), xmm0) movlps(mem(rdx, rsi, 2), xmm1) movhps(mem(rdx, r11, 1), xmm1) shufps(imm(0x88), xmm1, xmm0) mulps(xmm6, xmm12) // scale by alpha, mulps(xmm7, xmm0) // scale by beta, addps(xmm12, xmm0) // add the gemm result, movss(xmm0, mem(rdx)) // and store back to memory. pshufd(imm(0x39), xmm0, xmm1) movss(xmm1, mem(rdx, rsi, 1)) pshufd(imm(0x39), xmm1, xmm2) movss(xmm2, mem(rdx, rsi, 2)) pshufd(imm(0x39), xmm2, xmm3) movss(xmm3, mem(rdx, r11, 1)) add(rdi, rdx) movlps(mem(rcx), xmm0) // load c01 ~ c31 movhps(mem(rcx, rsi, 1), xmm0) movlps(mem(rcx, rsi, 2), xmm1) movhps(mem(rcx, r11, 1), xmm1) shufps(imm(0x88), xmm1, xmm0) mulps(xmm6, xmm9) // scale by alpha, mulps(xmm7, xmm0) // scale by beta, addps(xmm9, xmm0) // add the gemm result, movss(xmm0, mem(rcx)) // and store back to memory. pshufd(imm(0x39), xmm0, xmm1) movss(xmm1, mem(rcx, rsi, 1)) pshufd(imm(0x39), xmm1, xmm2) movss(xmm2, mem(rcx, rsi, 2)) pshufd(imm(0x39), xmm2, xmm3) movss(xmm3, mem(rcx, r11, 1)) add(rdi, rcx) movlps(mem(rdx), xmm0) // load c41 ~ c71 movhps(mem(rdx, rsi, 1), xmm0) movlps(mem(rdx, rsi, 2), xmm1) movhps(mem(rdx, r11, 1), xmm1) shufps(imm(0x88), xmm1, xmm0) mulps(xmm6, xmm13) // scale by alpha, mulps(xmm7, xmm0) // scale by beta, addps(xmm13, xmm0) // add the gemm result, movss(xmm0, mem(rdx)) // and store back to memory. pshufd(imm(0x39), xmm0, xmm1) movss(xmm1, mem(rdx, rsi, 1)) pshufd(imm(0x39), xmm1, xmm2) movss(xmm2, mem(rdx, rsi, 2)) pshufd(imm(0x39), xmm2, xmm3) movss(xmm3, mem(rdx, r11, 1)) add(rdi, rdx) movlps(mem(rcx), xmm0) // load c02 ~ c32 movhps(mem(rcx, rsi, 1), xmm0) movlps(mem(rcx, rsi, 2), xmm1) movhps(mem(rcx, r11, 1), xmm1) shufps(imm(0x88), xmm1, xmm0) mulps(xmm6, xmm10) // scale by alpha, mulps(xmm7, xmm0) // scale by beta, addps(xmm10, xmm0) // add the gemm result, movss(xmm0, mem(rcx)) // and store back to memory. pshufd(imm(0x39), xmm0, xmm1) movss(xmm1, mem(rcx, rsi, 1)) pshufd(imm(0x39), xmm1, xmm2) movss(xmm2, mem(rcx, rsi, 2)) pshufd(imm(0x39), xmm2, xmm3) movss(xmm3, mem(rcx, r11, 1)) add(rdi, rcx) movlps(mem(rdx), xmm0) // load c42 ~ c72 movhps(mem(rdx, rsi, 1), xmm0) movlps(mem(rdx, rsi, 2), xmm1) movhps(mem(rdx, r11, 1), xmm1) shufps(imm(0x88), xmm1, xmm0) mulps(xmm6, xmm14) // scale by alpha, mulps(xmm7, xmm0) // scale by beta, addps(xmm14, xmm0) // add the gemm result, movss(xmm0, mem(rdx)) // and store back to memory. pshufd(imm(0x39), xmm0, xmm1) movss(xmm1, mem(rdx, rsi, 1)) pshufd(imm(0x39), xmm1, xmm2) movss(xmm2, mem(rdx, rsi, 2)) pshufd(imm(0x39), xmm2, xmm3) movss(xmm3, mem(rdx, r11, 1)) add(rdi, rdx) movlps(mem(rcx), xmm0) // load c03 ~ c33 movhps(mem(rcx, rsi, 1), xmm0) movlps(mem(rcx, rsi, 2), xmm1) movhps(mem(rcx, r11, 1), xmm1) shufps(imm(0x88), xmm1, xmm0) mulps(xmm6, xmm11) // scale by alpha, mulps(xmm7, xmm0) // scale by beta, addps(xmm11, xmm0) // add the gemm result, movss(xmm0, mem(rcx)) // and store back to memory. pshufd(imm(0x39), xmm0, xmm1) movss(xmm1, mem(rcx, rsi, 1)) pshufd(imm(0x39), xmm1, xmm2) movss(xmm2, mem(rcx, rsi, 2)) pshufd(imm(0x39), xmm2, xmm3) movss(xmm3, mem(rcx, r11, 1)) movlps(mem(rdx), xmm0) // load c43 ~ c73 movhps(mem(rdx, rsi, 1), xmm0) movlps(mem(rdx, rsi, 2), xmm1) movhps(mem(rdx, r11, 1), xmm1) shufps(imm(0x88), xmm1, xmm0) mulps(xmm6, xmm15) // scale by alpha, mulps(xmm7, xmm0) // scale by beta, addps(xmm15, xmm0) // add the gemm result, movss(xmm0, mem(rdx)) // and store back to memory. pshufd(imm(0x39), xmm0, xmm1) movss(xmm1, mem(rdx, rsi, 1)) pshufd(imm(0x39), xmm1, xmm2) movss(xmm2, mem(rdx, rsi, 2)) pshufd(imm(0x39), xmm2, xmm3) movss(xmm3, mem(rdx, r11, 1)) jmp(.SDONE) // jump to end. label(.SCOLSTORED) movaps(mem(rcx), xmm0) // load c00 ~ c30, mulps(xmm6, xmm8) // scale by alpha, mulps(xmm7, xmm0) // scale by beta, addps(xmm8, xmm0) // add the gemm result, movaps(xmm0, mem(rcx)) // and store back to memory. add(rdi, rcx) movaps(mem(rdx), xmm1) // load c40 ~ c70, mulps(xmm6, xmm12) // scale by alpha, mulps(xmm7, xmm1) // scale by beta, addps(xmm12, xmm1) // add the gemm result, movaps(xmm1, mem(rdx)) // and store back to memory. add(rdi, rdx) movaps(mem(rcx), xmm0) // load c01 ~ c31, mulps(xmm6, xmm9) // scale by alpha, mulps(xmm7, xmm0) // scale by beta, addps(xmm9, xmm0) // add the gemm result, movaps(xmm0, mem(rcx)) // and store back to memory. add(rdi, rcx) movaps(mem(rdx), xmm1) // load c41 ~ c71, mulps(xmm6, xmm13) // scale by alpha, mulps(xmm7, xmm1) // scale by beta, addps(xmm13, xmm1) // add the gemm result, movaps(xmm1, mem(rdx)) // and store back to memory. add(rdi, rdx) movaps(mem(rcx), xmm0) // load c02 ~ c32, mulps(xmm6, xmm10) // scale by alpha, mulps(xmm7, xmm0) // scale by beta, addps(xmm10, xmm0) // add the gemm result, movaps(xmm0, mem(rcx)) // and store back to memory. add(rdi, rcx) movaps(mem(rdx), xmm1) // load c42 ~ c72, mulps(xmm6, xmm14) // scale by alpha, mulps(xmm7, xmm1) // scale by beta, addps(xmm14, xmm1) // add the gemm result, movaps(xmm1, mem(rdx)) // and store back to memory. add(rdi, rdx) movaps(mem(rcx), xmm0) // load c03 ~ c33, mulps(xmm6, xmm11) // scale by alpha, mulps(xmm7, xmm0) // scale by beta, addps(xmm11, xmm0) // add the gemm result, movaps(xmm0, mem(rcx)) // and store back to memory. movaps(mem(rdx), xmm1) // load c43 ~ c73, mulps(xmm6, xmm15) // scale by alpha, mulps(xmm7, xmm1) // scale by beta, addps(xmm15, xmm1) // add the gemm result, movaps(xmm1, mem(rdx)) // and store back to memory. jmp(.SDONE) // jump to end. label(.SBETAZERO) // check if aligned/column-stored and(bl, bh) // set ZF if bl & bh == 1. and(bh, al) // set ZF if bh & al == 1. jne(.SCOLSTORBZ) // jump to column storage case label(.SGENSTORBZ) mulps(xmm6, xmm8) // scale by alpha, movaps(xmm8, xmm0) movss(xmm0, mem(rcx)) // and store back to memory. pshufd(imm(0x39), xmm0, xmm1) movss(xmm1, mem(rcx, rsi, 1)) pshufd(imm(0x39), xmm1, xmm2) movss(xmm2, mem(rcx, rsi, 2)) pshufd(imm(0x39), xmm2, xmm3) movss(xmm3, mem(rcx, r11, 1)) add(rdi, rcx) mulps(xmm6, xmm12) // scale by alpha, movaps(xmm12, xmm0) movss(xmm0, mem(rdx)) // and store back to memory. pshufd(imm(0x39), xmm0, xmm1) movss(xmm1, mem(rdx, rsi, 1)) pshufd(imm(0x39), xmm1, xmm2) movss(xmm2, mem(rdx, rsi, 2)) pshufd(imm(0x39), xmm2, xmm3) movss(xmm3, mem(rdx, r11, 1)) add(rdi, rdx) mulps(xmm6, xmm9) // scale by alpha, movaps(xmm9, xmm0) movss(xmm0, mem(rcx)) // and store back to memory. pshufd(imm(0x39), xmm0, xmm1) movss(xmm1, mem(rcx, rsi, 1)) pshufd(imm(0x39), xmm1, xmm2) movss(xmm2, mem(rcx, rsi, 2)) pshufd(imm(0x39), xmm2, xmm3) movss(xmm3, mem(rcx, r11, 1)) add(rdi, rcx) mulps(xmm6, xmm13) // scale by alpha, movaps(xmm13, xmm0) movss(xmm0, mem(rdx)) // and store back to memory. pshufd(imm(0x39), xmm0, xmm1) movss(xmm1, mem(rdx, rsi, 1)) pshufd(imm(0x39), xmm1, xmm2) movss(xmm2, mem(rdx, rsi, 2)) pshufd(imm(0x39), xmm2, xmm3) movss(xmm3, mem(rdx, r11, 1)) add(rdi, rdx) mulps(xmm6, xmm10) // scale by alpha, movaps(xmm10, xmm0) movss(xmm0, mem(rcx)) // and store back to memory. pshufd(imm(0x39), xmm0, xmm1) movss(xmm1, mem(rcx, rsi, 1)) pshufd(imm(0x39), xmm1, xmm2) movss(xmm2, mem(rcx, rsi, 2)) pshufd(imm(0x39), xmm2, xmm3) movss(xmm3, mem(rcx, r11, 1)) add(rdi, rcx) mulps(xmm6, xmm14) // scale by alpha, movaps(xmm14, xmm0) movss(xmm0, mem(rdx)) // and store back to memory. pshufd(imm(0x39), xmm0, xmm1) movss(xmm1, mem(rdx, rsi, 1)) pshufd(imm(0x39), xmm1, xmm2) movss(xmm2, mem(rdx, rsi, 2)) pshufd(imm(0x39), xmm2, xmm3) movss(xmm3, mem(rdx, r11, 1)) add(rdi, rdx) mulps(xmm6, xmm11) // scale by alpha, movaps(xmm11, xmm0) movss(xmm0, mem(rcx)) // and store back to memory. pshufd(imm(0x39), xmm0, xmm1) movss(xmm1, mem(rcx, rsi, 1)) pshufd(imm(0x39), xmm1, xmm2) movss(xmm2, mem(rcx, rsi, 2)) pshufd(imm(0x39), xmm2, xmm3) movss(xmm3, mem(rcx, r11, 1)) mulps(xmm6, xmm15) // scale by alpha, movaps(xmm15, xmm0) movss(xmm0, mem(rdx)) // and store back to memory. pshufd(imm(0x39), xmm0, xmm1) movss(xmm1, mem(rdx, rsi, 1)) pshufd(imm(0x39), xmm1, xmm2) movss(xmm2, mem(rdx, rsi, 2)) pshufd(imm(0x39), xmm2, xmm3) movss(xmm3, mem(rdx, r11, 1)) jmp(.SDONE) // jump to end. label(.SCOLSTORBZ) // skip loading c00 ~ c30, mulps(xmm6, xmm8) // scale by alpha, movaps(xmm8, mem(rcx)) // and store back to memory. add(rdi, rcx) // skip loading c40 ~ c70, mulps(xmm6, xmm12) // scale by alpha, movaps(xmm12, mem(rdx)) // and store back to memory. add(rdi, rdx) // skip loading c01 ~ c31, mulps(xmm6, xmm9) // scale by alpha, movaps(xmm9, mem(rcx)) // and store back to memory. add(rdi, rcx) // skip loading c41 ~ c71, mulps(xmm6, xmm13) // scale by alpha, movaps(xmm13, mem(rdx)) // and store back to memory. add(rdi, rdx) // skip loading c02 ~ c32, mulps(xmm6, xmm10) // scale by alpha, movaps(xmm10, mem(rcx)) // and store back to memory. add(rdi, rcx) // skip loading c42 ~ c72, mulps(xmm6, xmm14) // scale by alpha, movaps(xmm14, mem(rdx)) // and store back to memory. add(rdi, rdx) // skip loading c03 ~ c33, mulps(xmm6, xmm11) // scale by alpha, movaps(xmm11, mem(rcx)) // and store back to memory. // skip loading c43 ~ c73, mulps(xmm6, xmm15) // scale by alpha, movaps(xmm15, mem(rdx)) // and store back to memory. label(.SDONE) end_asm( : // output operands (none) : // input operands [k_iter] "m" (k_iter), // 0 [k_left] "m" (k_left), // 1 [a] "m" (a), // 2 [b] "m" (b), // 3 [alpha] "m" (alpha), // 4 [beta] "m" (beta), // 5 [c] "m" (c), // 6 [rs_c] "m" (rs_c), // 7 [cs_c] "m" (cs_c), // 8 [b_next] "m" (b_next)/*, // 9 [a_next] "m" (a_next)*/ // 10 : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemm_penryn_asm_4x4 ( dim_t k0, double* restrict alpha, double* restrict a, double* restrict b, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { void* a_next = bli_auxinfo_next_a( data ); void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; begin_asm() mov(var(a), rax) // load address of a. mov(var(b), rbx) // load address of b. mov(var(b_next), r9) // load address of b_next. mov(var(a_next), r11) // load address of a_next. sub(imm(0-8*16), rax) // increment pointers to allow byte sub(imm(0-8*16), rbx) // offsets in the unrolled iterations. movaps(mem(rax, -8*16), xmm0) // initialize loop by pre-loading elements movaps(mem(rax, -7*16), xmm1) // of a and b. movaps(mem(rbx, -8*16), xmm2) mov(var(c), rcx) // load address of c mov(var(cs_c), rdi) // load cs_c lea(mem(, rdi, 8), rdi) // cs_c *= sizeof(double) mov(rdi, r12) // make a copy of cs_c (in bytes) lea(mem(rcx, rdi, 2), r10) // load address of c + 2*cs_c; prefetch(2, mem(r9, 0*8)) // prefetch b_next xorpd(xmm3, xmm3) xorpd(xmm4, xmm4) xorpd(xmm5, xmm5) xorpd(xmm6, xmm6) prefetch(2, mem(rcx, 3*8)) // prefetch c + 0*cs_c xorpd(xmm8, xmm8) xorpd(xmm9, xmm9) prefetch(2, mem(rcx, rdi, 1, 3*8)) // prefetch c + 1*cs_c xorpd(xmm10, xmm10) xorpd(xmm11, xmm11) prefetch(2, mem(r10, 3*8)) // prefetch c + 2*cs_c xorpd(xmm12, xmm12) xorpd(xmm13, xmm13) prefetch(2, mem(r10, rdi, 1, 3*8)) // prefetch c + 3*cs_c xorpd(xmm14, xmm14) xorpd(xmm15, xmm15) mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.DLOOPKITER) // MAIN LOOP prefetch(0, mem(rax, (4*35+1)*8)) //prefetch(0, mem(rax, (8*97+4)*8)) //prefetch(0, mem(r11, 67*4*8)) // prefetch a_next[0] addpd(xmm3, xmm11) // iteration 0 movaps(mem(rbx, -7*16), xmm3) addpd(xmm4, xmm15) movaps(xmm2, xmm4) pshufd(imm(0x4e), xmm2, xmm7) mulpd(xmm0, xmm2) mulpd(xmm1, xmm4) addpd(xmm5, xmm10) addpd(xmm6, xmm14) movaps(xmm7, xmm6) mulpd(xmm0, xmm7) mulpd(xmm1, xmm6) addpd(xmm2, xmm9) movaps(mem(rbx, -6*16), xmm2) addpd(xmm4, xmm13) movaps(xmm3, xmm4) pshufd(imm(0x4e), xmm3, xmm5) mulpd(xmm0, xmm3) mulpd(xmm1, xmm4) addpd(xmm7, xmm8) addpd(xmm6, xmm12) movaps(xmm5, xmm6) mulpd(xmm0, xmm5) movaps(mem(rax, -6*16), xmm0) mulpd(xmm1, xmm6) movaps(mem(rax, -5*16), xmm1) addpd(xmm3, xmm11) // iteration 1 movaps(mem(rbx, -5*16), xmm3) addpd(xmm4, xmm15) movaps(xmm2, xmm4) pshufd(imm(0x4e), xmm2, xmm7) mulpd(xmm0, xmm2) mulpd(xmm1, xmm4) addpd(xmm5, xmm10) addpd(xmm6, xmm14) movaps(xmm7, xmm6) mulpd(xmm0, xmm7) mulpd(xmm1, xmm6) addpd(xmm2, xmm9) movaps(mem(rbx, -4*16), xmm2) addpd(xmm4, xmm13) movaps(xmm3, xmm4) pshufd(imm(0x4e), xmm3, xmm5) mulpd(xmm0, xmm3) mulpd(xmm1, xmm4) addpd(xmm7, xmm8) addpd(xmm6, xmm12) movaps(xmm5, xmm6) mulpd(xmm0, xmm5) movaps(mem(rax, -4*16), xmm0) mulpd(xmm1, xmm6) movaps(mem(rax, -3*16), xmm1) prefetch(0, mem(rax, (4*37+1)*8)) //prefetch(0, mem(rax, (8*97+12)*8)) //prefetch(0, mem(r11, 69*4*8)) // prefetch a_next[8] //sub(imm(-4*4*8), r11) // a_next += 4*4 (unroll x mr) addpd(xmm3, xmm11) // iteration 2 movaps(mem(rbx, -3*16), xmm3) addpd(xmm4, xmm15) movaps(xmm2, xmm4) pshufd(imm(0x4e), xmm2, xmm7) mulpd(xmm0, xmm2) mulpd(xmm1, xmm4) addpd(xmm5, xmm10) addpd(xmm6, xmm14) movaps(xmm7, xmm6) mulpd(xmm0, xmm7) mulpd(xmm1, xmm6) addpd(xmm2, xmm9) movaps(mem(rbx, -2*16), xmm2) addpd(xmm4, xmm13) movaps(xmm3, xmm4) pshufd(imm(0x4e), xmm3, xmm5) mulpd(xmm0, xmm3) mulpd(xmm1, xmm4) addpd(xmm7, xmm8) addpd(xmm6, xmm12) movaps(xmm5, xmm6) mulpd(xmm0, xmm5) movaps(mem(rax, -2*16), xmm0) mulpd(xmm1, xmm6) movaps(mem(rax, -1*16), xmm1) addpd(xmm3, xmm11) // iteration 3 movaps(mem(rbx, -1*16), xmm3) addpd(xmm4, xmm15) movaps(xmm2, xmm4) pshufd(imm(0x4e), xmm2, xmm7) mulpd(xmm0, xmm2) mulpd(xmm1, xmm4) sub(imm(0-4*4*8), rax) // a += 4*4 (unroll x mr) addpd(xmm5, xmm10) addpd(xmm6, xmm14) movaps(xmm7, xmm6) mulpd(xmm0, xmm7) mulpd(xmm1, xmm6) sub(imm(0-4*4*8), r9) // b_next += 4*4 (unroll x nr) addpd(xmm2, xmm9) movaps(mem(rbx, 0*16), xmm2) addpd(xmm4, xmm13) movaps(xmm3, xmm4) pshufd(imm(0x4e), xmm3, xmm5) mulpd(xmm0, xmm3) mulpd(xmm1, xmm4) sub(imm(0-4*4*8), rbx) // b += 4*4 (unroll x nr) addpd(xmm7, xmm8) addpd(xmm6, xmm12) movaps(xmm5, xmm6) mulpd(xmm0, xmm5) movaps(mem(rax, -8*16), xmm0) mulpd(xmm1, xmm6) movaps(mem(rax, -7*16), xmm1) prefetch(2, mem(r9, 0*8)) // prefetch b_next[0] prefetch(2, mem(r9, 8*8)) // prefetch b_next[8] dec(rsi) // i -= 1; jne(.DLOOPKITER) // iterate again if i != 0. //prefetch(2, mem(r9, -8*8)) // prefetch b_next[-8] label(.DCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.DLOOPKLEFT) // EDGE LOOP addpd(xmm3, xmm11) // iteration 0 movaps(mem(rbx, -7*16), xmm3) addpd(xmm4, xmm15) movaps(xmm2, xmm4) pshufd(imm(0x4e), xmm2, xmm7) mulpd(xmm0, xmm2) mulpd(xmm1, xmm4) addpd(xmm5, xmm10) addpd(xmm6, xmm14) movaps(xmm7, xmm6) mulpd(xmm0, xmm7) mulpd(xmm1, xmm6) addpd(xmm2, xmm9) movaps(mem(rbx, -6*16), xmm2) addpd(xmm4, xmm13) movaps(xmm3, xmm4) pshufd(imm(0x4e), xmm3, xmm5) mulpd(xmm0, xmm3) mulpd(xmm1, xmm4) addpd(xmm7, xmm8) addpd(xmm6, xmm12) movaps(xmm5, xmm6) mulpd(xmm0, xmm5) movaps(mem(rax, -6*16), xmm0) mulpd(xmm1, xmm6) movaps(mem(rax, -5*16), xmm1) sub(imm(0-4*1*8), rax) // a += 4 (1 x mr) sub(imm(0-4*1*8), rbx) // b += 4 (1 x nr) dec(rsi) // i -= 1; jne(.DLOOPKLEFT) // iterate again if i != 0. label(.DPOSTACCUM) addpd(xmm3, xmm11) addpd(xmm4, xmm15) addpd(xmm5, xmm10) addpd(xmm6, xmm14) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta movddup(mem(rax), xmm6) // load alpha and duplicate movddup(mem(rbx), xmm7) // load beta and duplicate mov(var(rs_c), rsi) // load rs_c mov(rsi, r8) // make a copy of rs_c lea(mem(, rsi, 8), rsi) // rsi = rs_c * sizeof(double) lea(mem(rcx, rsi, 2), rdx) // load address of c + 2*rs_c; // xmm8: xmm9: xmm10: xmm11: // ( ab01 ( ab00 ( ab03 ( ab02 // ab10 ) ab11 ) ab12 ) ab13 ) // // xmm12: xmm13: xmm14: xmm15: // ( ab21 ( ab20 ( ab23 ( ab22 // ab30 ) ab31 ) ab32 ) ab33 ) movaps(xmm8, xmm0) movsd(xmm9, xmm8) movsd(xmm0, xmm9) movaps(xmm10, xmm0) movsd(xmm11, xmm10) movsd(xmm0, xmm11) movaps(xmm12, xmm0) movsd(xmm13, xmm12) movsd(xmm0, xmm13) movaps(xmm14, xmm0) movsd(xmm15, xmm14) movsd(xmm0, xmm15) // xmm8: xmm9: xmm10: xmm11: // ( ab00 ( ab01 ( ab02 ( ab03 // ab10 ) ab11 ) ab12 ) ab13 ) // // xmm12: xmm13: xmm14: xmm15: // ( ab20 ( ab21 ( ab22 ( ab23 // ab30 ) ab31 ) ab32 ) ab33 ) // determine if // c % 16 == 0, AND // 8*cs_c % 16 == 0, AND // rs_c == 1 // ie: aligned, ldim aligned, and // column-stored cmp(imm(1), r8) // set ZF if rs_c == 1. sete(bl) // bl = ( ZF == 1 ? 1 : 0 ); test(imm(15), rcx) // set ZF if c & 16 is zero. setz(bh) // bh = ( ZF == 1 ? 1 : 0 ); test(imm(15), r12) // set ZF if (8*cs_c) & 16 is zero. setz(al) // al = ( ZF == 1 ? 1 : 0 ); // and(bl,bh) followed by // and(bh,al) will reveal result // now avoid loading C if beta == 0 xorpd(xmm0, xmm0) // set xmm0 to zero. ucomisd(xmm0, xmm7) // check if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case // check if aligned/column-stored and(bl, bh) // set ZF if bl & bh == 1. and(bh, al) // set ZF if bh & al == 1. jne(.DCOLSTORED) // jump to column storage case label(.DGENSTORED) movlpd(mem(rcx), xmm0) // load c00 and c10, movhpd(mem(rcx, rsi, 1), xmm0) mulpd(xmm6, xmm8) // scale by alpha, mulpd(xmm7, xmm0) // scale by beta, addpd(xmm8, xmm0) // add the gemm result, movlpd(xmm0, mem(rcx)) // and store back to memory. movhpd(xmm0, mem(rcx, rsi, 1)) add(rdi, rcx) movlpd(mem(rdx), xmm1) // load c20 and c30, movhpd(mem(rdx, rsi, 1), xmm1) mulpd(xmm6, xmm12) // scale by alpha, mulpd(xmm7, xmm1) // scale by beta, addpd(xmm12, xmm1) // add the gemm result, movlpd(xmm1, mem(rdx)) // and store back to memory. movhpd(xmm1, mem(rdx, rsi, 1)) add(rdi, rdx) movlpd(mem(rcx), xmm0) // load c01 and c11, movhpd(mem(rcx, rsi, 1), xmm0) mulpd(xmm6, xmm9) // scale by alpha, mulpd(xmm7, xmm0) // scale by beta, addpd(xmm9, xmm0) // add the gemm result, movlpd(xmm0, mem(rcx)) // and store back to memory. movhpd(xmm0, mem(rcx, rsi, 1)) add(rdi, rcx) movlpd(mem(rdx), xmm1) // load c21 and c31, movhpd(mem(rdx, rsi, 1), xmm1) mulpd(xmm6, xmm13) // scale by alpha, mulpd(xmm7, xmm1) // scale by beta, addpd(xmm13, xmm1) // add the gemm result, movlpd(xmm1, mem(rdx)) // and store back to memory. movhpd(xmm1, mem(rdx, rsi, 1)) add(rdi, rdx) movlpd(mem(rcx), xmm0) // load c02 and c12, movhpd(mem(rcx, rsi, 1), xmm0) mulpd(xmm6, xmm10) // scale by alpha, mulpd(xmm7, xmm0) // scale by beta, addpd(xmm10, xmm0) // add the gemm result, movlpd(xmm0, mem(rcx)) // and store back to memory. movhpd(xmm0, mem(rcx, rsi, 1)) add(rdi, rcx) movlpd(mem(rdx), xmm1) // load c22 and c32, movhpd(mem(rdx, rsi, 1), xmm1) mulpd(xmm6, xmm14) // scale by alpha, mulpd(xmm7, xmm1) // scale by beta, addpd(xmm14, xmm1) // add the gemm result, movlpd(xmm1, mem(rdx)) // and store back to memory. movhpd(xmm1, mem(rdx, rsi, 1)) add(rdi, rdx) movlpd(mem(rcx), xmm0) // load c03 and c13, movhpd(mem(rcx, rsi, 1), xmm0) mulpd(xmm6, xmm11) // scale by alpha, mulpd(xmm7, xmm0) // scale by beta, addpd(xmm11, xmm0) // add the gemm result, movlpd(xmm0, mem(rcx)) // and store back to memory. movhpd(xmm0, mem(rcx, rsi, 1)) movlpd(mem(rdx), xmm1) // load c23 and c33, movhpd(mem(rdx, rsi, 1), xmm1) mulpd(xmm6, xmm15) // scale by alpha, mulpd(xmm7, xmm1) // scale by beta, addpd(xmm15, xmm1) // add the gemm result, movlpd(xmm1, mem(rdx)) // and store back to memory. movhpd(xmm1, mem(rdx, rsi, 1)) jmp(.DDONE) // jump to end. label(.DCOLSTORED) movaps(mem(rcx), xmm0) // load c00 and c10, mulpd(xmm6, xmm8) // scale by alpha, mulpd(xmm7, xmm0) // scale by beta, addpd(xmm8, xmm0) // add the gemm result, movaps(xmm0, mem(rcx)) // and store back to memory. add(rdi, rcx) movaps(mem(rdx), xmm1) // load c20 and c30, mulpd(xmm6, xmm12) // scale by alpha, mulpd(xmm7, xmm1) // scale by beta, addpd(xmm12, xmm1) // add the gemm result, movaps(xmm1, mem(rdx)) // and store back to memory. add(rdi, rdx) movaps(mem(rcx), xmm0) // load c01 and c11, mulpd(xmm6, xmm9) // scale by alpha, mulpd(xmm7, xmm0) // scale by beta, addpd(xmm9, xmm0) // add the gemm result, movaps(xmm0, mem(rcx)) // and store back to memory. add(rdi, rcx) movaps(mem(rdx), xmm1) // load c21 and c31, mulpd(xmm6, xmm13) // scale by alpha, mulpd(xmm7, xmm1) // scale by beta, addpd(xmm13, xmm1) // add the gemm result, movaps(xmm1, mem(rdx)) // and store back to memory. add(rdi, rdx) movaps(mem(rcx), xmm0) // load c02 and c12, mulpd(xmm6, xmm10) // scale by alpha, mulpd(xmm7, xmm0) // scale by beta, addpd(xmm10, xmm0) // add the gemm result, movaps(xmm0, mem(rcx)) // and store back to memory. add(rdi, rcx) movaps(mem(rdx), xmm1) // load c22 and c32, mulpd(xmm6, xmm14) // scale by alpha, mulpd(xmm7, xmm1) // scale by beta, addpd(xmm14, xmm1) // add the gemm result, movaps(xmm1, mem(rdx)) // and store back to memory. add(rdi, rdx) movaps(mem(rcx), xmm0) // load c03 and c13, mulpd(xmm6, xmm11) // scale by alpha, mulpd(xmm7, xmm0) // scale by beta, addpd(xmm11, xmm0) // add the gemm result, movaps(xmm0, mem(rcx)) // and store back to memory. movaps(mem(rdx), xmm1) // load c23 and c33, mulpd(xmm6, xmm15) // scale by alpha, mulpd(xmm7, xmm1) // scale by beta, addpd(xmm15, xmm1) // add the gemm result, movaps(xmm1, mem(rdx)) // and store back to memory. jmp(.DDONE) // jump to end. label(.DBETAZERO) // check if aligned/column-stored and(bl, bh) // set ZF if bl & bh == 1. and(bh, al) // set ZF if bh & al == 1. jne(.DCOLSTORBZ) // jump to column storage case label(.DGENSTORBZ) // skip loading c00 and c10, mulpd(xmm6, xmm8) // scale by alpha, movlpd(xmm8, mem(rcx)) // and store back to memory. movhpd(xmm8, mem(rcx, rsi, 1)) add(rdi, rcx) // skip loading c20 and c30, mulpd(xmm6, xmm12) // scale by alpha, movlpd(xmm12, mem(rdx)) // and store back to memory. movhpd(xmm12, mem(rdx, rsi, 1)) add(rdi, rdx) // skip loading c01 and c11, mulpd(xmm6, xmm9) // scale by alpha, movlpd(xmm9, mem(rcx)) // and store back to memory. movhpd(xmm9, mem(rcx, rsi, 1)) add(rdi, rcx) // skip loading c21 and c31, mulpd(xmm6, xmm13) // scale by alpha, movlpd(xmm13, mem(rdx)) // and store back to memory. movhpd(xmm13, mem(rdx, rsi, 1)) add(rdi, rdx) // skip loading c02 and c12, mulpd(xmm6, xmm10) // scale by alpha, movlpd(xmm10, mem(rcx)) // and store back to memory. movhpd(xmm10, mem(rcx, rsi, 1)) add(rdi, rcx) // skip loading c22 and c32, mulpd(xmm6, xmm14) // scale by alpha, movlpd(xmm14, mem(rdx)) // and store back to memory. movhpd(xmm14, mem(rdx, rsi, 1)) add(rdi, rdx) // skip loading c03 and c13, mulpd(xmm6, xmm11) // scale by alpha, movlpd(xmm11, mem(rcx)) // and store back to memory. movhpd(xmm11, mem(rcx, rsi, 1)) // skip loading c23 and c33, mulpd(xmm6, xmm15) // scale by alpha, movlpd(xmm15, mem(rdx)) // and store back to memory. movhpd(xmm15, mem(rdx, rsi, 1)) jmp(.DDONE) // jump to end. label(.DCOLSTORBZ) // skip loading c00 and c10, mulpd(xmm6, xmm8) // scale by alpha, movaps(xmm8, mem(rcx)) // and store back to memory. add(rdi, rcx) // skip loading c20 and c30, mulpd(xmm6, xmm12) // scale by alpha, movaps(xmm12, mem(rdx)) // and store back to memory. add(rdi, rdx) // skip loading c01 and c11, mulpd(xmm6, xmm9) // scale by alpha, movaps(xmm9, mem(rcx)) // and store back to memory. add(rdi, rcx) // skip loading c21 and c31, mulpd(xmm6, xmm13) // scale by alpha, movaps(xmm13, mem(rdx)) // and store back to memory. add(rdi, rdx) // skip loading c02 and c12, mulpd(xmm6, xmm10) // scale by alpha, movaps(xmm10, mem(rcx)) // and store back to memory. add(rdi, rcx) // skip loading c22 and c32, mulpd(xmm6, xmm14) // scale by alpha, movaps(xmm14, mem(rdx)) // and store back to memory. add(rdi, rdx) // skip loading c03 and c13, mulpd(xmm6, xmm11) // scale by alpha, movaps(xmm11, mem(rcx)) // and store back to memory. // skip loading c23 and c33, mulpd(xmm6, xmm15) // scale by alpha, movaps(xmm15, mem(rdx)) // and store back to memory. label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter] "m" (k_iter), // 0 [k_left] "m" (k_left), // 1 [a] "m" (a), // 2 [b] "m" (b), // 3 [alpha] "m" (alpha), // 4 [beta] "m" (beta), // 5 [c] "m" (c), // 6 [rs_c] "m" (rs_c), // 7 [cs_c] "m" (cs_c), // 8 [b_next] "m" (b_next), // 9 [a_next] "m" (a_next) // 10 : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } blis-0.6.1/kernels/penryn/3/bli_gemmtrsm_l_penryn_asm_d4x4.c000066400000000000000000000370141360743507500240440ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define BLIS_ASM_SYNTAX_ATT #include "bli_x86_asm_macros.h" #if 0 void bli_sgemmtrsm_l_penryn_asm_8x4 ( dim_t k0, float* restrict alpha, float* restrict a10, float* restrict a11, float* restrict b01, float* restrict b11, float* restrict c11, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { } #endif void bli_dgemmtrsm_l_penryn_asm_4x4 ( dim_t k0, double* restrict alpha, double* restrict a10, double* restrict a11, double* restrict b01, double* restrict b11, double* restrict c11, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; begin_asm() mov(var(a10), rax) // load address of a10. mov(var(b01), rbx) // load address of b01. //mov(var(b_next), r9) // load address of b_next. sub(imm(0-8*16), rax) // increment pointers to allow byte sub(imm(0-8*16), rbx) // offsets in the unrolled iterations. movaps(mem(rax, -8*16), xmm0) // initialize loop by pre-loading elements movaps(mem(rax, -7*16), xmm1) // of a and b. movaps(mem(rbx, -8*16), xmm2) //mov(var(c11), rcx) // load address of c11 //mov(var(rs_c), rdi) // load cs_c //lea(mem(, rdi, 8), rdi) // cs_c *= sizeof(double) //lea(mem(rcx, rdi, 2), rdx) // load address of c + 2*cs_c; //prefetch(2, mem(r9, 0*8)) // prefetch b_next xorpd(xmm3, xmm3) xorpd(xmm4, xmm4) xorpd(xmm5, xmm5) xorpd(xmm6, xmm6) //prefetch(2, mem(rcx, 3*8)) // prefetch c + 0*cs_c xorpd(xmm8, xmm8) movaps(xmm8, xmm9) //prefetch(2, mem(rcx, rdi, 1, 3*8)) // prefetch c + 1*cs_c movaps(xmm8, xmm10) movaps(xmm8, xmm11) //prefetch(2, mem(rdx, 3*8)) // prefetch c + 2*cs_c movaps(xmm8, xmm12) movaps(xmm8, xmm13) //prefetch(2, mem(rdx, rdi, 1, 3*8)) // prefetch c + 3*cs_c movaps(xmm8, xmm14) movaps(xmm8, xmm15) mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.CONSIDERKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.LOOPKITER) // MAIN LOOP //prefetch(0, mem(rax, 1264)) prefetch(0, mem(rax, (4*35+1)*8)) addpd(xmm3, xmm11) // iteration 0 movaps(mem(rbx, -7*16), xmm3) addpd(xmm4, xmm15) movaps(xmm2, xmm4) pshufd(imm(0x4e), xmm2, xmm7) mulpd(xmm0, xmm2) mulpd(xmm1, xmm4) addpd(xmm5, xmm10) addpd(xmm6, xmm14) movaps(xmm7, xmm6) mulpd(xmm0, xmm7) mulpd(xmm1, xmm6) addpd(xmm2, xmm9) movaps(mem(rbx, -6*16), xmm2) addpd(xmm4, xmm13) movaps(xmm3, xmm4) pshufd(imm(0x4e), xmm3, xmm5) mulpd(xmm0, xmm3) mulpd(xmm1, xmm4) addpd(xmm7, xmm8) addpd(xmm6, xmm12) movaps(xmm5, xmm6) mulpd(xmm0, xmm5) movaps(mem(rax, -6*16), xmm0) mulpd(xmm1, xmm6) movaps(mem(rax, -5*16), xmm1) addpd(xmm3, xmm11) // iteration 1 movaps(mem(rbx, -5*16), xmm3) addpd(xmm4, xmm15) movaps(xmm2, xmm4) pshufd(imm(0x4e), xmm2, xmm7) mulpd(xmm0, xmm2) mulpd(xmm1, xmm4) addpd(xmm5, xmm10) addpd(xmm6, xmm14) movaps(xmm7, xmm6) mulpd(xmm0, xmm7) mulpd(xmm1, xmm6) addpd(xmm2, xmm9) movaps(mem(rbx, -4*16), xmm2) addpd(xmm4, xmm13) movaps(xmm3, xmm4) pshufd(imm(0x4e), xmm3, xmm5) mulpd(xmm0, xmm3) mulpd(xmm1, xmm4) addpd(xmm7, xmm8) addpd(xmm6, xmm12) movaps(xmm5, xmm6) mulpd(xmm0, xmm5) movaps(mem(rax, -4*16), xmm0) mulpd(xmm1, xmm6) movaps(mem(rax, -3*16), xmm1) //prefetch(0, mem(rax, 1328)) prefetch(0, mem(rax, (4*37+1)*8)) addpd(xmm3, xmm11) // iteration 2 movaps(mem(rbx, -3*16), xmm3) addpd(xmm4, xmm15) movaps(xmm2, xmm4) pshufd(imm(0x4e), xmm2, xmm7) mulpd(xmm0, xmm2) mulpd(xmm1, xmm4) addpd(xmm5, xmm10) addpd(xmm6, xmm14) movaps(xmm7, xmm6) mulpd(xmm0, xmm7) mulpd(xmm1, xmm6) addpd(xmm2, xmm9) movaps(mem(rbx, -2*16), xmm2) addpd(xmm4, xmm13) movaps(xmm3, xmm4) pshufd(imm(0x4e), xmm3, xmm5) mulpd(xmm0, xmm3) mulpd(xmm1, xmm4) addpd(xmm7, xmm8) addpd(xmm6, xmm12) movaps(xmm5, xmm6) mulpd(xmm0, xmm5) movaps(mem(rax, -2*16), xmm0) mulpd(xmm1, xmm6) movaps(mem(rax, -1*16), xmm1) addpd(xmm3, xmm11) // iteration 3 movaps(mem(rbx, -1*16), xmm3) addpd(xmm4, xmm15) movaps(xmm2, xmm4) pshufd(imm(0x4e), xmm2, xmm7) mulpd(xmm0, xmm2) mulpd(xmm1, xmm4) sub(imm(0-4*4*8), rax) // a += 4*4 (unroll x mr) addpd(xmm5, xmm10) addpd(xmm6, xmm14) movaps(xmm7, xmm6) mulpd(xmm0, xmm7) mulpd(xmm1, xmm6) //sub(imm(-4*4*8), r9) // b_next += 4*4 (unroll x nr) addpd(xmm2, xmm9) movaps(mem(rbx, 0*16), xmm2) addpd(xmm4, xmm13) movaps(xmm3, xmm4) pshufd(imm(0x4e), xmm3, xmm5) mulpd(xmm0, xmm3) mulpd(xmm1, xmm4) sub(imm(0-4*4*8), rbx) // b += 4*4 (unroll x nr) addpd(xmm7, xmm8) addpd(xmm6, xmm12) movaps(xmm5, xmm6) mulpd(xmm0, xmm5) movaps(mem(rax, -8*16), xmm0) mulpd(xmm1, xmm6) movaps(mem(rax, -7*16), xmm1) //prefetch(2, mem(r9, 0*8)) // prefetch b_next[0] //prefetch(2, mem(r9, 8*8)) // prefetch b_next[8] dec(rsi) // i -= 1; jne(.LOOPKITER) // iterate again if i != 0. label(.CONSIDERKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.POSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.LOOPKLEFT) // EDGE LOOP addpd(xmm3, xmm11) // iteration 0 movaps(mem(rbx, -7*16), xmm3) addpd(xmm4, xmm15) movaps(xmm2, xmm4) pshufd(imm(0x4e), xmm2, xmm7) mulpd(xmm0, xmm2) mulpd(xmm1, xmm4) addpd(xmm5, xmm10) addpd(xmm6, xmm14) movaps(xmm7, xmm6) mulpd(xmm0, xmm7) mulpd(xmm1, xmm6) addpd(xmm2, xmm9) movaps(mem(rbx, -6*16), xmm2) addpd(xmm4, xmm13) movaps(xmm3, xmm4) pshufd(imm(0x4e), xmm3, xmm5) mulpd(xmm0, xmm3) mulpd(xmm1, xmm4) addpd(xmm7, xmm8) addpd(xmm6, xmm12) movaps(xmm5, xmm6) mulpd(xmm0, xmm5) movaps(mem(rax, -6*16), xmm0) mulpd(xmm1, xmm6) movaps(mem(rax, -5*16), xmm1) sub(imm(0-4*1*8), rax) // a += 4 (1 x mr) sub(imm(0-4*1*8), rbx) // b += 4 (1 x nr) dec(rsi) // i -= 1; jne(.LOOPKLEFT) // iterate again if i != 0. label(.POSTACCUM) addpd(xmm3, xmm11) addpd(xmm4, xmm15) addpd(xmm5, xmm10) addpd(xmm6, xmm14) mov(var(b11), rbx) // load address of b11. // xmm8: xmm9: xmm10: xmm11: // ( ab01 ( ab00 ( ab03 ( ab02 // ab10 ) ab11 ) ab12 ) ab13 ) // // xmm12: xmm13: xmm14: xmm15: // ( ab21 ( ab20 ( ab23 ( ab22 // ab30 ) ab31 ) ab32 ) ab33 ) movaps(xmm9, xmm0) movaps(xmm8, xmm1) unpcklpd(xmm8, xmm0) unpckhpd(xmm9, xmm1) movaps(xmm11, xmm4) movaps(xmm10, xmm5) unpcklpd(xmm10, xmm4) unpckhpd(xmm11, xmm5) movaps(xmm13, xmm2) movaps(xmm12, xmm3) unpcklpd(xmm12, xmm2) unpckhpd(xmm13, xmm3) movaps(xmm15, xmm6) movaps(xmm14, xmm7) unpcklpd(xmm14, xmm6) unpckhpd(xmm15, xmm7) // xmm0: ( ab00 ab01 ) xmm4: ( ab02 ab03 ) // xmm1: ( ab10 ab11 ) xmm5: ( ab12 ab13 ) // xmm2: ( ab20 ab21 ) xmm6: ( ab22 ab23 ) // xmm3: ( ab30 ab31 ) xmm7: ( ab32 ab33 ) mov(var(alpha), rax) // load address of alpha movddup(mem(rax), xmm15) // load alpha and duplicate movaps(mem(rbx, 0*16), xmm8) movaps(mem(rbx, 1*16), xmm12) mulpd(xmm15, xmm8) // xmm8 = alpha * ( beta00 beta01 ) mulpd(xmm15, xmm12) // xmm12 = alpha * ( beta02 beta03 ) movaps(mem(rbx, 2*16), xmm9) movaps(mem(rbx, 3*16), xmm13) mulpd(xmm15, xmm9) // xmm9 = alpha * ( beta10 beta11 ) mulpd(xmm15, xmm13) // xmm13 = alpha * ( beta12 beta13 ) movaps(mem(rbx, 4*16), xmm10) movaps(mem(rbx, 5*16), xmm14) mulpd(xmm15, xmm10) // xmm10 = alpha * ( beta20 beta21 ) mulpd(xmm15, xmm14) // xmm14 = alpha * ( beta22 beta23 ) movaps(mem(rbx, 6*16), xmm11) mulpd(xmm15, xmm11) // xmm11 = alpha * ( beta30 beta31 ) mulpd(mem(rbx, 7*16), xmm15) // xmm15 = alpha * ( beta32 beta33 ) // (Now scaled by alpha:) // xmm8: ( beta00 beta01 ) xmm12: ( beta02 beta03 ) // xmm9: ( beta10 beta11 ) xmm13: ( beta12 beta13 ) // xmm10: ( beta20 beta21 ) xmm14: ( beta22 beta23 ) // xmm11: ( beta30 beta31 ) xmm15: ( beta32 beta33 ) subpd(xmm0, xmm8) // xmm8 -= xmm0 subpd(xmm1, xmm9) // xmm9 -= xmm1 subpd(xmm2, xmm10) // xmm10 -= xmm2 subpd(xmm3, xmm11) // xmm11 -= xmm3 subpd(xmm4, xmm12) // xmm12 -= xmm4 subpd(xmm5, xmm13) // xmm13 -= xmm5 subpd(xmm6, xmm14) // xmm14 -= xmm6 subpd(xmm7, xmm15) // xmm15 -= xmm7 label(.TRSM) mov(var(a11), rax) // load address of a11 mov(var(c11), rcx) // load address of c11 mov(var(rs_c), rsi) // load rs_c mov(var(cs_c), rdi) // load cs_c sal(imm(3), rsi) // rs_c *= sizeof( double ) sal(imm(3), rdi) // cs_c *= sizeof( double ) lea(mem(rcx, rdi, 2), rdx) // c11_2 = c11 + 2*cs_c // iteration 0 movddup(mem(0+0*4)*8(rax), xmm0) // load xmm0 = (1/alpha00) mulpd(xmm0, xmm8) // xmm8 *= (1/alpha00); mulpd(xmm0, xmm12) // xmm12 *= (1/alpha00); movaps(xmm8, mem(rbx, 0*16)) // store ( beta00 beta01 ) = xmm8 movaps(xmm12, mem(rbx, 1*16)) // store ( beta02 beta03 ) = xmm12 movlpd(xmm8, mem(rcx)) // store ( gamma00 ) = xmm8[0] movhpd(xmm8, mem(rcx, rdi, 1)) // store ( gamma01 ) = xmm8[1] movlpd(xmm12, mem(rdx)) // store ( gamma02 ) = xmm12[0] movhpd(xmm12, mem(rdx, rdi, 1)) // store ( gamma03 ) = xmm12[1] add(rsi, rcx) // c11 += rs_c add(rsi, rdx) // c11_2 += rs_c // iteration 1 movddup(mem(1+0*4)*8(rax), xmm0) // load xmm0 = alpha10 movddup(mem(1+1*4)*8(rax), xmm1) // load xmm1 = (1/alpha11) movaps(xmm0, xmm4) // xmm4 = xmm0 mulpd(xmm8, xmm0) // xmm0 = alpha10 * ( beta00 beta01 ) mulpd(xmm12, xmm4) // xmm4 = alpha10 * ( beta02 beta03 ) subpd(xmm0, xmm9) // xmm9 -= xmm0 subpd(xmm4, xmm13) // xmm13 -= xmm4 mulpd(xmm1, xmm9) // xmm9 *= (1/alpha11); mulpd(xmm1, xmm13) // xmm13 *= (1/alpha11); movaps(xmm9, mem(rbx, 2*16)) // store ( beta10 beta11 ) = xmm9 movaps(xmm13, mem(rbx, 3*16)) // store ( beta12 beta13 ) = xmm13 movlpd(xmm9, mem(rcx)) // store ( gamma10 ) = xmm9[0] movhpd(xmm9, mem(rcx, rdi, 1)) // store ( gamma11 ) = xmm9[1] movlpd(xmm13, mem(rdx)) // store ( gamma12 ) = xmm13[0] movhpd(xmm13, mem(rdx, rdi, 1)) // store ( gamma13 ) = xmm13[1] add(rsi, rcx) // c11 += rs_c add(rsi, rdx) // c11_2 += rs_c // iteration 2 movddup(mem(2+0*4)*8(rax), xmm0) // load xmm0 = alpha20 movddup(mem(2+1*4)*8(rax), xmm1) // load xmm1 = alpha21 movddup(mem(2+2*4)*8(rax), xmm2) // load xmm2 = (1/alpha22) movaps(xmm0, xmm4) // xmm4 = xmm0 movaps(xmm1, xmm5) // xmm5 = xmm1 mulpd(xmm8, xmm0) // xmm0 = alpha20 * ( beta00 beta01 ) mulpd(xmm12, xmm4) // xmm4 = alpha20 * ( beta02 beta03 ) mulpd(xmm9, xmm1) // xmm1 = alpha21 * ( beta10 beta11 ) mulpd(xmm13, xmm5) // xmm5 = alpha21 * ( beta12 beta13 ) addpd(xmm1, xmm0) // xmm0 += xmm1; addpd(xmm5, xmm4) // xmm4 += xmm5; subpd(xmm0, xmm10) // xmm10 -= xmm0 subpd(xmm4, xmm14) // xmm14 -= xmm4 mulpd(xmm2, xmm10) // xmm10 *= (1/alpha22); mulpd(xmm2, xmm14) // xmm14 *= (1/alpha22); movaps(xmm10, mem(rbx, 4*16)) // store ( beta20 beta21 ) = xmm10 movaps(xmm14, mem(rbx, 5*16)) // store ( beta22 beta23 ) = xmm14 movlpd(xmm10, mem(rcx)) // store ( gamma20 ) = xmm10[0] movhpd(xmm10, mem(rcx, rdi, 1)) // store ( gamma21 ) = xmm10[1] movlpd(xmm14, mem(rdx)) // store ( gamma22 ) = xmm14[0] movhpd(xmm14, mem(rdx, rdi, 1)) // store ( gamma23 ) = xmm14[1] add(rsi, rcx) // c11 += rs_c add(rsi, rdx) // c11_2 += rs_c // iteration 3 movddup(mem(3+0*4)*8(rax), xmm0) // load xmm0 = alpha30 movddup(mem(3+1*4)*8(rax), xmm1) // load xmm1 = alpha31 movddup(mem(3+2*4)*8(rax), xmm2) // load xmm2 = alpha32 movddup(mem(3+3*4)*8(rax), xmm3) // load xmm3 = (1/alpha33) movaps(xmm0, xmm4) // xmm4 = xmm0 movaps(xmm1, xmm5) // xmm5 = xmm1 movaps(xmm2, xmm6) // xmm6 = xmm2 mulpd(xmm8, xmm0) // xmm0 = alpha30 * ( beta00 beta01 ) mulpd(xmm12, xmm4) // xmm4 = alpha30 * ( beta02 beta03 ) mulpd(xmm9, xmm1) // xmm1 = alpha31 * ( beta10 beta11 ) mulpd(xmm13, xmm5) // xmm5 = alpha31 * ( beta12 beta13 ) mulpd(xmm10, xmm2) // xmm2 = alpha32 * ( beta20 beta21 ) mulpd(xmm14, xmm6) // xmm6 = alpha32 * ( beta22 beta23 ) addpd(xmm1, xmm0) // xmm0 += xmm1; addpd(xmm5, xmm4) // xmm4 += xmm5; addpd(xmm2, xmm0) // xmm0 += xmm2; addpd(xmm6, xmm4) // xmm4 += xmm6; subpd(xmm0, xmm11) // xmm11 -= xmm0 subpd(xmm4, xmm15) // xmm15 -= xmm4 mulpd(xmm3, xmm11) // xmm11 *= (1/alpha33); mulpd(xmm3, xmm15) // xmm15 *= (1/alpha33); movaps(xmm11, mem(rbx, 6*16)) // store ( beta30 beta31 ) = xmm11 movaps(xmm15, mem(rbx, 7*16)) // store ( beta32 beta33 ) = xmm15 movlpd(xmm11, mem(rcx)) // store ( gamma30 ) = xmm11[0] movhpd(xmm11, mem(rcx, rdi, 1)) // store ( gamma31 ) = xmm11[1] movlpd(xmm15, mem(rdx)) // store ( gamma32 ) = xmm15[0] movhpd(xmm15, mem(rdx, rdi, 1)) // store ( gamma33 ) = xmm15[1] end_asm( : // output operands (none) : // input operands [k_iter] "m" (k_iter), // 0 [k_left] "m" (k_left), // 1 [a10] "m" (a10), // 2 [a11] "m" (a11), // 3 [b01] "m" (b01), // 4 [b11] "m" (b11), // 5 [c11] "m" (c11), // 6 [rs_c] "m" (rs_c), // 7 [cs_c] "m" (cs_c), // 8 [alpha] "m" (alpha), // 9 [b_next] "m" (b_next) // 10 : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", //"r8", "r9", "r10", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } blis-0.6.1/kernels/penryn/3/bli_gemmtrsm_u_penryn_asm_d4x4.c000066400000000000000000000354331360743507500240600ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define BLIS_ASM_SYNTAX_ATT #include "bli_x86_asm_macros.h" #if 0 void bli_sgemmtrsm_u_penryn_asm_8x4 ( dim_t k0, float* restrict alpha, float* restrict a12, float* restrict a11, float* restrict b21, float* restrict b11, float* restrict c11, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { } #endif void bli_dgemmtrsm_u_penryn_asm_4x4 ( dim_t k0, double* restrict alpha, double* restrict a12, double* restrict a11, double* restrict b21, double* restrict b11, double* restrict c11, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 4; uint64_t k_left = k0 % 4; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; begin_asm() mov(var(a12), rax) // load address of a12. mov(var(b21), rbx) // load address of b21. //mov(var(b_next), r9) // load address of b_next. add(imm(8*16), rax) // increment pointers to allow byte add(imm(8*16), rbx) // offsets in the unrolled iterations. movaps(mem(rax, -8*16), xmm0) // initialize loop by pre-loading elements movaps(mem(rax, -7*16), xmm1) // of a and b. movaps(mem(rbx, -8*16), xmm2) xorpd(xmm3, xmm3) xorpd(xmm4, xmm4) xorpd(xmm5, xmm5) xorpd(xmm6, xmm6) xorpd(xmm8, xmm8) movaps(xmm8, xmm9) movaps(xmm8, xmm10) movaps(xmm8, xmm11) movaps(xmm8, xmm12) movaps(xmm8, xmm13) movaps(xmm8, xmm14) movaps(xmm8, xmm15) mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.CONSIDERKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.LOOPKITER) // MAIN LOOP prefetch(0, mem(rax, 1264)) addpd(xmm3, xmm11) // iteration 0 movaps(mem(rbx, -7*16), xmm3) addpd(xmm4, xmm15) movaps(xmm2, xmm4) pshufd(imm(0x4e), xmm2, xmm7) mulpd(xmm0, xmm2) mulpd(xmm1, xmm4) addpd(xmm5, xmm10) addpd(xmm6, xmm14) movaps(xmm7, xmm6) mulpd(xmm0, xmm7) mulpd(xmm1, xmm6) addpd(xmm2, xmm9) movaps(mem(rbx, -6*16), xmm2) addpd(xmm4, xmm13) movaps(xmm3, xmm4) pshufd(imm(0x4e), xmm3, xmm5) mulpd(xmm0, xmm3) mulpd(xmm1, xmm4) addpd(xmm7, xmm8) addpd(xmm6, xmm12) movaps(xmm5, xmm6) mulpd(xmm0, xmm5) movaps(mem(rax, -6*16), xmm0) mulpd(xmm1, xmm6) movaps(mem(rax, -5*16), xmm1) addpd(xmm3, xmm11) // iteration 1 movaps(mem(rbx, -5*16), xmm3) addpd(xmm4, xmm15) movaps(xmm2, xmm4) pshufd(imm(0x4e), xmm2, xmm7) mulpd(xmm0, xmm2) mulpd(xmm1, xmm4) addpd(xmm5, xmm10) addpd(xmm6, xmm14) movaps(xmm7, xmm6) mulpd(xmm0, xmm7) mulpd(xmm1, xmm6) addpd(xmm2, xmm9) movaps(mem(rbx, -4*16), xmm2) addpd(xmm4, xmm13) movaps(xmm3, xmm4) pshufd(imm(0x4e), xmm3, xmm5) mulpd(xmm0, xmm3) mulpd(xmm1, xmm4) addpd(xmm7, xmm8) addpd(xmm6, xmm12) movaps(xmm5, xmm6) mulpd(xmm0, xmm5) movaps(mem(rax, -4*16), xmm0) mulpd(xmm1, xmm6) movaps(mem(rax, -3*16), xmm1) prefetch(0, mem(rax, 1328)) addpd(xmm3, xmm11) // iteration 2 movaps(mem(rbx, -3*16), xmm3) addpd(xmm4, xmm15) movaps(xmm2, xmm4) pshufd(imm(0x4e), xmm2, xmm7) mulpd(xmm0, xmm2) mulpd(xmm1, xmm4) addpd(xmm5, xmm10) addpd(xmm6, xmm14) movaps(xmm7, xmm6) mulpd(xmm0, xmm7) mulpd(xmm1, xmm6) addpd(xmm2, xmm9) movaps(mem(rbx, -2*16), xmm2) addpd(xmm4, xmm13) movaps(xmm3, xmm4) pshufd(imm(0x4e), xmm3, xmm5) mulpd(xmm0, xmm3) mulpd(xmm1, xmm4) addpd(xmm7, xmm8) addpd(xmm6, xmm12) movaps(xmm5, xmm6) mulpd(xmm0, xmm5) movaps(mem(rax, -2*16), xmm0) mulpd(xmm1, xmm6) movaps(mem(rax, -1*16), xmm1) addpd(xmm3, xmm11) // iteration 3 movaps(mem(rbx, -1*16), xmm3) addpd(xmm4, xmm15) movaps(xmm2, xmm4) pshufd(imm(0x4e), xmm2, xmm7) mulpd(xmm0, xmm2) mulpd(xmm1, xmm4) addpd(xmm5, xmm10) addpd(xmm6, xmm14) movaps(xmm7, xmm6) mulpd(xmm0, xmm7) mulpd(xmm1, xmm6) add(imm(4*4*8), rax) // a += 4*4 (unroll x mr) addpd(xmm2, xmm9) movaps(mem(rbx, 0*16), xmm2) addpd(xmm4, xmm13) movaps(xmm3, xmm4) pshufd(imm(0x4e), xmm3, xmm5) mulpd(xmm0, xmm3) mulpd(xmm1, xmm4) add(imm(4*4*8), rbx) // b += 4*4 (unroll x nr) addpd(xmm7, xmm8) addpd(xmm6, xmm12) movaps(xmm5, xmm6) mulpd(xmm0, xmm5) movaps(mem(rax, -8*16), xmm0) mulpd(xmm1, xmm6) movaps(mem(rax, -7*16), xmm1) dec(rsi) // i -= 1; jne(.LOOPKITER) // iterate again if i != 0. label(.CONSIDERKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.POSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.LOOPKLEFT) // EDGE LOOP addpd(xmm3, xmm11) // iteration 0 movaps(mem(rbx, -7*16), xmm3) addpd(xmm4, xmm15) movaps(xmm2, xmm4) pshufd(imm(0x4e), xmm2, xmm7) mulpd(xmm0, xmm2) mulpd(xmm1, xmm4) addpd(xmm5, xmm10) addpd(xmm6, xmm14) movaps(xmm7, xmm6) mulpd(xmm0, xmm7) mulpd(xmm1, xmm6) addpd(xmm2, xmm9) movaps(mem(rbx, -6*16), xmm2) addpd(xmm4, xmm13) movaps(xmm3, xmm4) pshufd(imm(0x4e), xmm3, xmm5) mulpd(xmm0, xmm3) mulpd(xmm1, xmm4) addpd(xmm7, xmm8) addpd(xmm6, xmm12) movaps(xmm5, xmm6) mulpd(xmm0, xmm5) movaps(mem(rax, -6*16), xmm0) mulpd(xmm1, xmm6) movaps(mem(rax, -5*16), xmm1) add(imm(4*1*8), rax) // a += 4 (1 x mr) add(imm(4*1*8), rbx) // b += 4 (1 x nr) dec(rsi) // i -= 1; jne(.LOOPKLEFT) // iterate again if i != 0. label(.POSTACCUM) addpd(xmm3, xmm11) addpd(xmm4, xmm15) addpd(xmm5, xmm10) addpd(xmm6, xmm14) mov(var(b11), rbx) // load address of b11. // xmm8: xmm9: xmm10: xmm11: // ( ab01 ( ab00 ( ab03 ( ab02 // ab10 ) ab11 ) ab12 ) ab13 ) // // xmm12: xmm13: xmm14: xmm15: // ( ab21 ( ab20 ( ab23 ( ab22 // ab30 ) ab31 ) ab32 ) ab33 ) movaps(xmm9, xmm0) movaps(xmm8, xmm1) unpcklpd(xmm8, xmm0) unpckhpd(xmm9, xmm1) movaps(xmm11, xmm4) movaps(xmm10, xmm5) unpcklpd(xmm10, xmm4) unpckhpd(xmm11, xmm5) movaps(xmm13, xmm2) movaps(xmm12, xmm3) unpcklpd(xmm12, xmm2) unpckhpd(xmm13, xmm3) movaps(xmm15, xmm6) movaps(xmm14, xmm7) unpcklpd(xmm14, xmm6) unpckhpd(xmm15, xmm7) // xmm0: ( ab00 ab01 ) xmm4: ( ab02 ab03 ) // xmm1: ( ab10 ab11 ) xmm5: ( ab12 ab13 ) // xmm2: ( ab20 ab21 ) xmm6: ( ab22 ab23 ) // xmm3: ( ab30 ab31 ) xmm7: ( ab32 ab33 ) mov(var(alpha), rax) // load address of alpha movddup(mem(rax), xmm15) // load alpha and duplicate movaps(mem(rbx, 0*16), xmm8) movaps(mem(rbx, 1*16), xmm12) mulpd(xmm15, xmm8) // xmm8 = alpha * ( beta00 beta01 ) mulpd(xmm15, xmm12) // xmm12 = alpha * ( beta02 beta03 ) movaps(mem(rbx, 2*16), xmm9) movaps(mem(rbx, 3*16), xmm13) mulpd(xmm15, xmm9) // xmm9 = alpha * ( beta10 beta11 ) mulpd(xmm15, xmm13) // xmm13 = alpha * ( beta12 beta13 ) movaps(mem(rbx, 4*16), xmm10) movaps(mem(rbx, 5*16), xmm14) mulpd(xmm15, xmm10) // xmm10 = alpha * ( beta20 beta21 ) mulpd(xmm15, xmm14) // xmm14 = alpha * ( beta22 beta23 ) movaps(mem(rbx, 6*16), xmm11) mulpd(xmm15, xmm11) // xmm11 = alpha * ( beta30 beta31 ) mulpd(mem(rbx, 7*16), xmm15) // xmm15 = alpha * ( beta32 beta33 ) // (Now scaled by alpha:) // xmm8: ( beta00 beta01 ) xmm12: ( beta02 beta03 ) // xmm9: ( beta10 beta11 ) xmm13: ( beta12 beta13 ) // xmm10: ( beta20 beta21 ) xmm14: ( beta22 beta23 ) // xmm11: ( beta30 beta31 ) xmm15: ( beta32 beta33 ) subpd(xmm0, xmm8) // xmm8 -= xmm0 subpd(xmm1, xmm9) // xmm9 -= xmm1 subpd(xmm2, xmm10) // xmm10 -= xmm2 subpd(xmm3, xmm11) // xmm11 -= xmm3 subpd(xmm4, xmm12) // xmm12 -= xmm4 subpd(xmm5, xmm13) // xmm13 -= xmm5 subpd(xmm6, xmm14) // xmm14 -= xmm6 subpd(xmm7, xmm15) // xmm15 -= xmm7 label(.TRSM) mov(var(a11), rax) // load address of a11 mov(var(c11), rcx) // load address of c11 mov(var(rs_c), rsi) // load rs_c mov(var(cs_c), rdi) // load cs_c sal(imm(3), rsi) // rs_c *= sizeof( double ) sal(imm(3), rdi) // cs_c *= sizeof( double ) add(rsi, rcx) // c11 += (4-1)*rs_c add(rsi, rcx) add(rsi, rcx) lea(mem(rcx, rdi, 2), rdx) // c11_2 = c11 + 2*cs_c; // iteration 0 movddup(mem(3+3*4)*8(rax), xmm3) // load xmm3 = (1/alpha33) mulpd(xmm3, xmm11) // xmm11 *= (1/alpha33); mulpd(xmm3, xmm15) // xmm15 *= (1/alpha33); movaps(xmm11, mem(rbx, 6*16)) // store ( beta30 beta31 ) = xmm11 movaps(xmm15, mem(rbx, 7*16)) // store ( beta32 beta33 ) = xmm15 movlpd(xmm11, mem(rcx)) // store ( gamma30 ) = xmm11[0] movhpd(xmm11, mem(rcx, rdi, 1)) // store ( gamma31 ) = xmm11[1] movlpd(xmm15, mem(rdx)) // store ( gamma32 ) = xmm15[0] movhpd(xmm15, mem(rdx, rdi, 1)) // store ( gamma33 ) = xmm15[1] sub(rsi, rcx) // c11 -= rs_c sub(rsi, rdx) // c11_2 -= rs_c // iteration 1 movddup(mem(2+2*4)*8(rax), xmm2) // load xmm2 = (1/alpha22) movddup(mem(2+3*4)*8(rax), xmm3) // load xmm3 = alpha23 movaps(xmm3, xmm7) // xmm7 = xmm3 mulpd(xmm11, xmm3) // xmm3 = alpha23 * ( beta30 beta31 ) mulpd(xmm15, xmm7) // xmm7 = alpha23 * ( beta32 beta33 ) subpd(xmm3, xmm10) // xmm10 -= xmm3 subpd(xmm7, xmm14) // xmm14 -= xmm7 mulpd(xmm2, xmm10) // xmm10 *= (1/alpha22); mulpd(xmm2, xmm14) // xmm14 *= (1/alpha22); movaps(xmm10, mem(rbx, 4*16)) // store ( beta20 beta21 ) = xmm10 movaps(xmm14, mem(rbx, 5*16)) // store ( beta22 beta23 ) = xmm14 movlpd(xmm10, mem(rcx)) // store ( gamma20 ) = xmm10[0] movhpd(xmm10, mem(rcx, rdi, 1)) // store ( gamma21 ) = xmm10[1] movlpd(xmm14, mem(rdx)) // store ( gamma22 ) = xmm14[0] movhpd(xmm14, mem(rdx, rdi, 1)) // store ( gamma23 ) = xmm14[1] sub(rsi, rcx) // c11 -= rs_c sub(rsi, rdx) // c11_2 -= rs_c // iteration 2 movddup(mem(1+1*4)*8(rax), xmm1) // load xmm1 = (1/alpha11) movddup(mem(1+2*4)*8(rax), xmm2) // load xmm2 = alpha12 movddup(mem(1+3*4)*8(rax), xmm3) // load xmm3 = alpha13 movaps(xmm2, xmm6) // xmm6 = xmm2 movaps(xmm3, xmm7) // xmm7 = xmm3 mulpd(xmm10, xmm2) // xmm2 = alpha12 * ( beta20 beta21 ) mulpd(xmm14, xmm6) // xmm6 = alpha12 * ( beta22 beta23 ) mulpd(xmm11, xmm3) // xmm3 = alpha13 * ( beta30 beta31 ) mulpd(xmm15, xmm7) // xmm7 = alpha13 * ( beta32 beta33 ) addpd(xmm3, xmm2) // xmm2 += xmm3; addpd(xmm7, xmm6) // xmm6 += xmm7; subpd(xmm2, xmm9) // xmm9 -= xmm2 subpd(xmm6, xmm13) // xmm13 -= xmm6 mulpd(xmm1, xmm9) // xmm9 *= (1/alpha11); mulpd(xmm1, xmm13) // xmm13 *= (1/alpha11); movaps(xmm9, mem(rbx, 2*16)) // store ( beta10 beta11 ) = xmm9 movaps(xmm13, mem(rbx, 3*16)) // store ( beta12 beta13 ) = xmm13 movlpd(xmm9, mem(rcx)) // store ( gamma10 ) = xmm9[0] movhpd(xmm9, mem(rcx, rdi, 1)) // store ( gamma11 ) = xmm9[1] movlpd(xmm13, mem(rdx)) // store ( gamma12 ) = xmm13[0] movhpd(xmm13, mem(rdx, rdi, 1)) // store ( gamma13 ) = xmm13[1] sub(rsi, rcx) // c11 -= rs_c sub(rsi, rdx) // c11_2 -= rs_c // iteration 3 movddup(mem(0+0*4)*8(rax), xmm0) // load xmm0 = (1/alpha00) movddup(mem(0+1*4)*8(rax), xmm1) // load xmm1 = alpha01 movddup(mem(0+2*4)*8(rax), xmm2) // load xmm2 = alpha02 movddup(mem(0+3*4)*8(rax), xmm3) // load xmm3 = alpha03 movaps(xmm1, xmm5) // xmm5 = xmm1 movaps(xmm2, xmm6) // xmm6 = xmm2 movaps(xmm3, xmm7) // xmm7 = xmm3 mulpd(xmm9, xmm1) // xmm1 = alpha01 * ( beta10 beta11 ) mulpd(xmm13, xmm5) // xmm5 = alpha01 * ( beta12 beta13 ) mulpd(xmm10, xmm2) // xmm2 = alpha02 * ( beta20 beta21 ) mulpd(xmm14, xmm6) // xmm6 = alpha02 * ( beta22 beta23 ) mulpd(xmm11, xmm3) // xmm3 = alpha03 * ( beta30 beta31 ) mulpd(xmm15, xmm7) // xmm7 = alpha03 * ( beta32 beta33 ) addpd(xmm2, xmm1) // xmm1 += xmm2; addpd(xmm6, xmm5) // xmm5 += xmm6; addpd(xmm3, xmm1) // xmm1 += xmm3; addpd(xmm7, xmm5) // xmm5 += xmm7; subpd(xmm1, xmm8) // xmm8 -= xmm1 subpd(xmm5, xmm12) // xmm12 -= xmm5 mulpd(xmm0, xmm8) // xmm8 *= (1/alpha00); mulpd(xmm0, xmm12) // xmm12 *= (1/alpha00); movaps(xmm8, mem(rbx, 0*16)) // store ( beta00 beta01 ) = xmm8 movaps(xmm12, mem(rbx, 1*16)) // store ( beta02 beta03 ) = xmm12 movlpd(xmm8, mem(rcx)) // store ( gamma00 ) = xmm8[0] movhpd(xmm8, mem(rcx, rdi, 1)) // store ( gamma01 ) = xmm8[1] movlpd(xmm12, mem(rdx)) // store ( gamma02 ) = xmm12[0] movhpd(xmm12, mem(rdx, rdi, 1)) // store ( gamma03 ) = xmm12[1] end_asm( : // output operands (none) : // input operands [k_iter] "m" (k_iter), // 0 [k_left] "m" (k_left), // 1 [a12] "m" (a12), // 2 [a11] "m" (a11), // 3 [b21] "m" (b21), // 4 [b11] "m" (b11), // 5 [c11] "m" (c11), // 6 [rs_c] "m" (rs_c), // 7 [cs_c] "m" (cs_c), // 8 [alpha] "m" (alpha), // 9 [b_next] "m" (b_next) // 10 : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } blis-0.6.1/kernels/penryn/3/bli_trsm_l_penryn_asm_d4x4.c000066400000000000000000000174761360743507500232100ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define BLIS_ASM_SYNTAX_ATT #include "bli_x86_asm_macros.h" #if 0 void bli_strsm_l_penryn_asm_8x4 ( float* restrict a11, float* restrict b11, float* restrict c11, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { } #endif void bli_dtrsm_l_penryn_asm_4x4 ( double* restrict a11, double* restrict b11, double* restrict c11, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; begin_asm() mov(var(b11), rbx) // load address of b11. movaps(mem(rbx, 0*16), xmm8) // xmm8 = ( beta00 beta01 ) movaps(mem(rbx, 1*16), xmm12) // xmm9 = ( beta02 beta03 ) movaps(mem(rbx, 2*16), xmm9) // xmm10 = ( beta10 beta11 ) movaps(mem(rbx, 3*16), xmm13) // xmm11 = ( beta12 beta13 ) movaps(mem(rbx, 4*16), xmm10) // xmm12 = ( beta20 beta21 ) movaps(mem(rbx, 5*16), xmm14) // xmm13 = ( beta22 beta23 ) movaps(mem(rbx, 6*16), xmm11) // xmm14 = ( beta30 beta31 ) movaps(mem(rbx, 7*16), xmm15) // xmm15 = ( beta32 beta33 ) mov(var(a11), rax) // load address of a11 mov(var(c11), rcx) // load address of c11 mov(var(rs_c), rsi) // load rs_c mov(var(cs_c), rdi) // load cs_c sal(imm(3), rsi) // rs_c *= sizeof( double ) sal(imm(3), rdi) // cs_c *= sizeof( double ) lea(mem(rcx, rdi, 2), rdx) // c11_2 = c11 + 2*cs_c // iteration 0 movddup(mem(0+0*4)*8(rax), xmm0) // load xmm0 = (1/alpha00) mulpd(xmm0, xmm8) // xmm8 *= (1/alpha00); mulpd(xmm0, xmm12) // xmm12 *= (1/alpha00); movaps(xmm8, mem(rbx, 0*16)) // store ( beta00 beta01 ) = xmm8 movaps(xmm12, mem(rbx, 1*16)) // store ( beta02 beta03 ) = xmm12 movlpd(xmm8, mem(rcx)) // store ( gamma00 ) = xmm8[0] movhpd(xmm8, mem(rcx, rdi, 1)) // store ( gamma01 ) = xmm8[1] movlpd(xmm12, mem(rdx)) // store ( gamma02 ) = xmm12[0] movhpd(xmm12, mem(rdx, rdi, 1)) // store ( gamma03 ) = xmm12[1] add(rsi, rcx) // c11 += rs_c add(rsi, rdx) // c11_2 += rs_c // iteration 1 movddup(mem(1+0*4)*8(rax), xmm0) // load xmm0 = alpha10 movddup(mem(1+1*4)*8(rax), xmm1) // load xmm1 = (1/alpha11) movaps(xmm0, xmm4) // xmm4 = xmm0 mulpd(xmm8, xmm0) // xmm0 = alpha10 * ( beta00 beta01 ) mulpd(xmm12, xmm4) // xmm4 = alpha10 * ( beta02 beta03 ) subpd(xmm0, xmm9) // xmm9 -= xmm0 subpd(xmm4, xmm13) // xmm13 -= xmm4 mulpd(xmm1, xmm9) // xmm9 *= (1/alpha11); mulpd(xmm1, xmm13) // xmm13 *= (1/alpha11); movaps(xmm9, mem(rbx, 2*16)) // store ( beta10 beta11 ) = xmm9 movaps(xmm13, mem(rbx, 3*16)) // store ( beta12 beta13 ) = xmm13 movlpd(xmm9, mem(rcx)) // store ( gamma10 ) = xmm9[0] movhpd(xmm9, mem(rcx, rdi, 1)) // store ( gamma11 ) = xmm9[1] movlpd(xmm13, mem(rdx)) // store ( gamma12 ) = xmm13[0] movhpd(xmm13, mem(rdx, rdi, 1)) // store ( gamma13 ) = xmm13[1] add(rsi, rcx) // c11 += rs_c add(rsi, rdx) // c11_2 += rs_c // iteration 2 movddup(mem(2+0*4)*8(rax), xmm0) // load xmm0 = alpha20 movddup(mem(2+1*4)*8(rax), xmm1) // load xmm1 = alpha21 movddup(mem(2+2*4)*8(rax), xmm2) // load xmm2 = (1/alpha22) movaps(xmm0, xmm4) // xmm4 = xmm0 movaps(xmm1, xmm5) // xmm5 = xmm1 mulpd(xmm8, xmm0) // xmm0 = alpha20 * ( beta00 beta01 ) mulpd(xmm12, xmm4) // xmm4 = alpha20 * ( beta02 beta03 ) mulpd(xmm9, xmm1) // xmm1 = alpha21 * ( beta10 beta11 ) mulpd(xmm13, xmm5) // xmm5 = alpha21 * ( beta12 beta13 ) addpd(xmm1, xmm0) // xmm0 += xmm1; addpd(xmm5, xmm4) // xmm4 += xmm5; subpd(xmm0, xmm10) // xmm10 -= xmm0 subpd(xmm4, xmm14) // xmm14 -= xmm4 mulpd(xmm2, xmm10) // xmm10 *= (1/alpha22); mulpd(xmm2, xmm14) // xmm14 *= (1/alpha22); movaps(xmm10, mem(rbx, 4*16)) // store ( beta20 beta21 ) = xmm10 movaps(xmm14, mem(rbx, 5*16)) // store ( beta22 beta23 ) = xmm14 movlpd(xmm10, mem(rcx)) // store ( gamma20 ) = xmm10[0] movhpd(xmm10, mem(rcx, rdi, 1)) // store ( gamma21 ) = xmm10[1] movlpd(xmm14, mem(rdx)) // store ( gamma22 ) = xmm14[0] movhpd(xmm14, mem(rdx, rdi, 1)) // store ( gamma23 ) = xmm14[1] add(rsi, rcx) // c11 += rs_c add(rsi, rdx) // c11_2 += rs_c // iteration 3 movddup(mem(3+0*4)*8(rax), xmm0) // load xmm0 = alpha30 movddup(mem(3+1*4)*8(rax), xmm1) // load xmm1 = alpha31 movddup(mem(3+2*4)*8(rax), xmm2) // load xmm2 = alpha32 movddup(mem(3+3*4)*8(rax), xmm3) // load xmm3 = (1/alpha33) movaps(xmm0, xmm4) // xmm4 = xmm0 movaps(xmm1, xmm5) // xmm5 = xmm1 movaps(xmm2, xmm6) // xmm6 = xmm2 mulpd(xmm8, xmm0) // xmm0 = alpha30 * ( beta00 beta01 ) mulpd(xmm12, xmm4) // xmm4 = alpha30 * ( beta02 beta03 ) mulpd(xmm9, xmm1) // xmm1 = alpha31 * ( beta10 beta11 ) mulpd(xmm13, xmm5) // xmm5 = alpha31 * ( beta12 beta13 ) mulpd(xmm10, xmm2) // xmm2 = alpha32 * ( beta20 beta21 ) mulpd(xmm14, xmm6) // xmm6 = alpha32 * ( beta22 beta23 ) addpd(xmm1, xmm0) // xmm0 += xmm1; addpd(xmm5, xmm4) // xmm4 += xmm5; addpd(xmm2, xmm0) // xmm0 += xmm2; addpd(xmm6, xmm4) // xmm4 += xmm6; subpd(xmm0, xmm11) // xmm11 -= xmm0 subpd(xmm4, xmm15) // xmm15 -= xmm4 mulpd(xmm3, xmm11) // xmm11 *= (1/alpha33); mulpd(xmm3, xmm15) // xmm15 *= (1/alpha33); movaps(xmm11, mem(rbx, 6*16)) // store ( beta30 beta31 ) = xmm11 movaps(xmm15, mem(rbx, 7*16)) // store ( beta32 beta33 ) = xmm15 movlpd(xmm11, mem(rcx)) // store ( gamma30 ) = xmm11[0] movhpd(xmm11, mem(rcx, rdi, 1)) // store ( gamma31 ) = xmm11[1] movlpd(xmm15, mem(rdx)) // store ( gamma32 ) = xmm15[0] movhpd(xmm15, mem(rdx, rdi, 1)) // store ( gamma33 ) = xmm15[1] end_asm( : // output operands (none) : // input operands [a11] "m" (a11), // 0 [b11] "m" (b11), // 1 [c11] "m" (c11), // 2 [rs_c] "m" (rs_c), // 3 [cs_c] "m" (cs_c) // 4 : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", //"r8", "r9", "r10", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } blis-0.6.1/kernels/penryn/3/bli_trsm_u_penryn_asm_d4x4.c000066400000000000000000000175241360743507500232130ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define BLIS_ASM_SYNTAX_ATT #include "bli_x86_asm_macros.h" #if 0 void bli_strsm_u_penryn_asm_8x4 ( float* restrict a11, float* restrict b11, float* restrict c11, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { } #endif void bli_dtrsm_u_penryn_asm_4x4 ( double* restrict a11, double* restrict b11, double* restrict c11, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; begin_asm() mov(var(b11), rbx) // load address of b11. movaps(mem(rbx, 0*16), xmm8) // xmm8 = ( beta00 beta01 ) movaps(mem(rbx, 1*16), xmm12) // xmm9 = ( beta02 beta03 ) movaps(mem(rbx, 2*16), xmm9) // xmm10 = ( beta10 beta11 ) movaps(mem(rbx, 3*16), xmm13) // xmm11 = ( beta12 beta13 ) movaps(mem(rbx, 4*16), xmm10) // xmm12 = ( beta20 beta21 ) movaps(mem(rbx, 5*16), xmm14) // xmm13 = ( beta22 beta23 ) movaps(mem(rbx, 6*16), xmm11) // xmm14 = ( beta30 beta31 ) movaps(mem(rbx, 7*16), xmm15) // xmm15 = ( beta32 beta33 ) mov(var(a11), rax) // load address of a11 mov(var(c11), rcx) // load address of c11 mov(var(rs_c), rsi) // load rs_c mov(var(cs_c), rdi) // load cs_c sal(imm(3), rsi) // rs_c *= sizeof( double ) sal(imm(3), rdi) // cs_c *= sizeof( double ) add(rsi, rcx) // c11 += (4-1)*rs_c add(rsi, rcx) add(rsi, rcx) lea(mem(rcx, rdi, 2), rdx) // c11_2 = c11 + 2*cs_c; // iteration 0 movddup(mem(3+3*4)*8(rax), xmm3) // load xmm3 = (1/alpha33) mulpd(xmm3, xmm11) // xmm11 *= (1/alpha33); mulpd(xmm3, xmm15) // xmm15 *= (1/alpha33); movaps(xmm11, mem(rbx, 6*16)) // store ( beta30 beta31 ) = xmm11 movaps(xmm15, mem(rbx, 7*16)) // store ( beta32 beta33 ) = xmm15 movlpd(xmm11, mem(rcx)) // store ( gamma30 ) = xmm11[0] movhpd(xmm11, mem(rcx, rdi, 1)) // store ( gamma31 ) = xmm11[1] movlpd(xmm15, mem(rdx)) // store ( gamma32 ) = xmm15[0] movhpd(xmm15, mem(rdx, rdi, 1)) // store ( gamma33 ) = xmm15[1] sub(rsi, rcx) // c11 -= rs_c sub(rsi, rdx) // c11_2 -= rs_c // iteration 1 movddup(mem(2+2*4)*8(rax), xmm2) // load xmm2 = (1/alpha22) movddup(mem(2+3*4)*8(rax), xmm3) // load xmm3 = alpha23 movaps(xmm3, xmm7) // xmm7 = xmm3 mulpd(xmm11, xmm3) // xmm3 = alpha23 * ( beta30 beta31 ) mulpd(xmm15, xmm7) // xmm7 = alpha23 * ( beta32 beta33 ) subpd(xmm3, xmm10) // xmm10 -= xmm3 subpd(xmm7, xmm14) // xmm14 -= xmm7 mulpd(xmm2, xmm10) // xmm10 *= (1/alpha22); mulpd(xmm2, xmm14) // xmm14 *= (1/alpha22); movaps(xmm10, mem(rbx, 4*16)) // store ( beta20 beta21 ) = xmm10 movaps(xmm14, mem(rbx, 5*16)) // store ( beta22 beta23 ) = xmm14 movlpd(xmm10, mem(rcx)) // store ( gamma20 ) = xmm10[0] movhpd(xmm10, mem(rcx, rdi, 1)) // store ( gamma21 ) = xmm10[1] movlpd(xmm14, mem(rdx)) // store ( gamma22 ) = xmm14[0] movhpd(xmm14, mem(rdx, rdi, 1)) // store ( gamma23 ) = xmm14[1] sub(rsi, rcx) // c11 -= rs_c sub(rsi, rdx) // c11_2 -= rs_c // iteration 2 movddup(mem(1+1*4)*8(rax), xmm1) // load xmm1 = (1/alpha11) movddup(mem(1+2*4)*8(rax), xmm2) // load xmm2 = alpha12 movddup(mem(1+3*4)*8(rax), xmm3) // load xmm3 = alpha13 movaps(xmm2, xmm6) // xmm6 = xmm2 movaps(xmm3, xmm7) // xmm7 = xmm3 mulpd(xmm10, xmm2) // xmm2 = alpha12 * ( beta20 beta21 ) mulpd(xmm14, xmm6) // xmm6 = alpha12 * ( beta22 beta23 ) mulpd(xmm11, xmm3) // xmm3 = alpha13 * ( beta30 beta31 ) mulpd(xmm15, xmm7) // xmm7 = alpha13 * ( beta32 beta33 ) addpd(xmm3, xmm2) // xmm2 += xmm3; addpd(xmm7, xmm6) // xmm6 += xmm7; subpd(xmm2, xmm9) // xmm9 -= xmm2 subpd(xmm6, xmm13) // xmm13 -= xmm6 mulpd(xmm1, xmm9) // xmm9 *= (1/alpha11); mulpd(xmm1, xmm13) // xmm13 *= (1/alpha11); movaps(xmm9, mem(rbx, 2*16)) // store ( beta10 beta11 ) = xmm9 movaps(xmm13, mem(rbx, 3*16)) // store ( beta12 beta13 ) = xmm13 movlpd(xmm9, mem(rcx)) // store ( gamma10 ) = xmm9[0] movhpd(xmm9, mem(rcx, rdi, 1)) // store ( gamma11 ) = xmm9[1] movlpd(xmm13, mem(rdx)) // store ( gamma12 ) = xmm13[0] movhpd(xmm13, mem(rdx, rdi, 1)) // store ( gamma13 ) = xmm13[1] sub(rsi, rcx) // c11 -= rs_c sub(rsi, rdx) // c11_2 -= rs_c // iteration 3 movddup(mem(0+0*4)*8(rax), xmm0) // load xmm0 = (1/alpha00) movddup(mem(0+1*4)*8(rax), xmm1) // load xmm1 = alpha01 movddup(mem(0+2*4)*8(rax), xmm2) // load xmm2 = alpha02 movddup(mem(0+3*4)*8(rax), xmm3) // load xmm3 = alpha03 movaps(xmm1, xmm5) // xmm5 = xmm1 movaps(xmm2, xmm6) // xmm6 = xmm2 movaps(xmm3, xmm7) // xmm7 = xmm3 mulpd(xmm9, xmm1) // xmm1 = alpha01 * ( beta10 beta11 ) mulpd(xmm13, xmm5) // xmm5 = alpha01 * ( beta12 beta13 ) mulpd(xmm10, xmm2) // xmm2 = alpha02 * ( beta20 beta21 ) mulpd(xmm14, xmm6) // xmm6 = alpha02 * ( beta22 beta23 ) mulpd(xmm11, xmm3) // xmm3 = alpha03 * ( beta30 beta31 ) mulpd(xmm15, xmm7) // xmm7 = alpha03 * ( beta32 beta33 ) addpd(xmm2, xmm1) // xmm1 += xmm2; addpd(xmm6, xmm5) // xmm5 += xmm6; addpd(xmm3, xmm1) // xmm1 += xmm3; addpd(xmm7, xmm5) // xmm5 += xmm7; subpd(xmm1, xmm8) // xmm8 -= xmm1 subpd(xmm5, xmm12) // xmm12 -= xmm5 mulpd(xmm0, xmm8) // xmm8 *= (1/alpha00); mulpd(xmm0, xmm12) // xmm12 *= (1/alpha00); movaps(xmm8, mem(rbx, 0*16)) // store ( beta00 beta01 ) = xmm8 movaps(xmm12, mem(rbx, 1*16)) // store ( beta02 beta03 ) = xmm12 movlpd(xmm8, mem(rcx)) // store ( gamma00 ) = xmm8[0] movhpd(xmm8, mem(rcx, rdi, 1)) // store ( gamma01 ) = xmm8[1] movlpd(xmm12, mem(rdx)) // store ( gamma02 ) = xmm12[0] movhpd(xmm12, mem(rdx, rdi, 1)) // store ( gamma03 ) = xmm12[1] end_asm( : // output operands (none) : // input operands [a11] "m" (a11), // 0 [b11] "m" (b11), // 1 [c11] "m" (c11), // 2 [rs_c] "m" (rs_c), // 3 [cs_c] "m" (cs_c) // 4 : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } blis-0.6.1/kernels/penryn/bli_kernels_penryn.h000066400000000000000000000037201360743507500214760ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ GEMM_UKR_PROT( float, s, gemm_penryn_asm_8x4 ) GEMM_UKR_PROT( double, d, gemm_penryn_asm_4x4 ) GEMMTRSM_UKR_PROT( double, d, gemmtrsm_l_penryn_asm_4x4 ) GEMMTRSM_UKR_PROT( double, d, gemmtrsm_u_penryn_asm_4x4 ) TRSM_UKR_PROT( double, d, trsm_l_penryn_asm_4x4 ) TRSM_UKR_PROT( double, d, trsm_u_penryn_asm_4x4 ) blis-0.6.1/kernels/piledriver/000077500000000000000000000000001360743507500162715ustar00rootroot00000000000000blis-0.6.1/kernels/piledriver/3/000077500000000000000000000000001360743507500164335ustar00rootroot00000000000000blis-0.6.1/kernels/piledriver/3/bli_gemm_piledriver_asm_d8x3.c000066400000000000000000002137401360743507500243140ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* NOTE: The micro-kernels in this file were partially inspired by portions of code found in OpenBLAS 0.2.12 (http://www.openblas.net/). -FGVZ */ #include "blis.h" #define BLIS_ASM_SYNTAX_ATT #include "bli_x86_asm_macros.h" void bli_sgemm_piledriver_asm_16x3 ( dim_t k0, float* restrict alpha, float* restrict a, float* restrict b, float* restrict beta, float* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { void* a_next = bli_auxinfo_next_a( data ); void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 8; uint64_t k_left = k0 % 8; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; begin_asm() mov(var(a), rax) // load address of a. mov(var(b), rbx) // load address of b. mov(var(b_next), r15) // load address of b_next. mov(var(a_next), r14) // load address of a_next. prefetch(0, mem(rbx, 128)) // prefetch b prefetch(0, mem(rbx, 64+128)) // prefetch b prefetch(0, mem(rbx, 128+128)) // prefetch b add(imm(32*4), rax) add(imm(12*4), rbx) mov(var(c), rcx) // load address of c mov(var(cs_c), rdi) // load cs_c lea(mem(, rdi, 4), rdi) // cs_c *= sizeof(float) lea(mem(rcx, rdi, 1), r10) // load address of c + 1*cs_c; lea(mem(rcx, rdi, 2), r11) // load address of c + 2*cs_c; vbroadcastss(mem(rbx, -12*4), xmm1) vbroadcastss(mem(rbx, -11*4), xmm2) vbroadcastss(mem(rbx, -10*4), xmm3) vxorps(xmm4, xmm4, xmm4) vxorps(xmm5, xmm5, xmm5) vxorps(xmm6, xmm6, xmm6) vxorps(xmm7, xmm7, xmm7) vxorps(xmm8, xmm8, xmm8) vxorps(xmm9, xmm9, xmm9) vxorps(xmm10, xmm10, xmm10) vxorps(xmm11, xmm11, xmm11) vxorps(xmm12, xmm12, xmm12) vxorps(xmm13, xmm13, xmm13) vxorps(xmm14, xmm14, xmm14) vxorps(xmm15, xmm15, xmm15) mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.SCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.SLOOPKITER) // MAIN LOOP je(.SCONSIDKLEFT) // if i == 0, jump to k_left code. prefetch(0, mem(rbx, 16+192)) // prefetch b // iteration 0 vmovaps(mem(rax, -32*4), xmm0) prefetch(0, mem(rax, 384)) vfmadd231ps(xmm1, xmm0, xmm4) vfmadd231ps(xmm2, xmm0, xmm5) vfmadd231ps(xmm3, xmm0, xmm6) vmovaps(mem(rax, -28*4), xmm0) vfmadd231ps(xmm1, xmm0, xmm7) vfmadd231ps(xmm2, xmm0, xmm8) vfmadd231ps(xmm3, xmm0, xmm9) vmovaps(mem(rax, -24*4), xmm0) vfmadd231ps(xmm1, xmm0, xmm10) vfmadd231ps(xmm2, xmm0, xmm11) vfmadd231ps(xmm3, xmm0, xmm12) vmovaps(mem(rax, -20*4), xmm0) vfmadd231ps(xmm1, xmm0, xmm13) vbroadcastss(mem(rbx, -9*4), xmm1) vfmadd231ps(xmm2, xmm0, xmm14) vbroadcastss(mem(rbx, -8*4), xmm2) vfmadd231ps(xmm3, xmm0, xmm15) // iteration 1 vmovaps(mem(rax, -16*4), xmm0) vbroadcastss(mem(rbx, -7*4), xmm3) prefetch(0, mem(rax, 64+384)) vfmadd231ps(xmm1, xmm0, xmm4) vfmadd231ps(xmm2, xmm0, xmm5) vfmadd231ps(xmm3, xmm0, xmm6) vmovaps(mem(rax, -12*4), xmm0) vfmadd231ps(xmm1, xmm0, xmm7) vfmadd231ps(xmm2, xmm0, xmm8) vfmadd231ps(xmm3, xmm0, xmm9) vmovaps(mem(rax, -8*4), xmm0) vfmadd231ps(xmm1, xmm0, xmm10) vfmadd231ps(xmm2, xmm0, xmm11) vfmadd231ps(xmm3, xmm0, xmm12) vmovaps(mem(rax, -4*4), xmm0) vfmadd231ps(xmm1, xmm0, xmm13) vbroadcastss(mem(rbx, -6*4), xmm1) vfmadd231ps(xmm2, xmm0, xmm14) vbroadcastss(mem(rbx, -5*4), xmm2) vfmadd231ps(xmm3, xmm0, xmm15) // iteration 2 vmovaps(mem(rax, 0*4), xmm0) vbroadcastss(mem(rbx, -4*4), xmm3) prefetch(0, mem(rax, 128+384)) vfmadd231ps(xmm1, xmm0, xmm4) vfmadd231ps(xmm2, xmm0, xmm5) vfmadd231ps(xmm3, xmm0, xmm6) vmovaps(mem(rax, 4*4), xmm0) vfmadd231ps(xmm1, xmm0, xmm7) vfmadd231ps(xmm2, xmm0, xmm8) vfmadd231ps(xmm3, xmm0, xmm9) vmovaps(mem(rax, 8*4), xmm0) vfmadd231ps(xmm1, xmm0, xmm10) vfmadd231ps(xmm2, xmm0, xmm11) vfmadd231ps(xmm3, xmm0, xmm12) vmovaps(mem(rax, 12*4), xmm0) vfmadd231ps(xmm1, xmm0, xmm13) vbroadcastss(mem(rbx, -3*4), xmm1) vfmadd231ps(xmm2, xmm0, xmm14) vbroadcastss(mem(rbx, -2*4), xmm2) vfmadd231ps(xmm3, xmm0, xmm15) // iteration 3 vmovaps(mem(rax, 16*4), xmm0) vbroadcastss(mem(rbx, -1*4), xmm3) prefetch(0, mem(rax, 192+384)) vfmadd231ps(xmm1, xmm0, xmm4) vfmadd231ps(xmm2, xmm0, xmm5) vfmadd231ps(xmm3, xmm0, xmm6) vmovaps(mem(rax, 20*4), xmm0) vfmadd231ps(xmm1, xmm0, xmm7) vfmadd231ps(xmm2, xmm0, xmm8) vfmadd231ps(xmm3, xmm0, xmm9) vmovaps(mem(rax, 24*4), xmm0) vfmadd231ps(xmm1, xmm0, xmm10) vfmadd231ps(xmm2, xmm0, xmm11) vfmadd231ps(xmm3, xmm0, xmm12) vmovaps(mem(rax, 28*4), xmm0) vfmadd231ps(xmm1, xmm0, xmm13) vbroadcastss(mem(rbx, 0*4), xmm1) vfmadd231ps(xmm2, xmm0, xmm14) vbroadcastss(mem(rbx, 1*4), xmm2) vfmadd231ps(xmm3, xmm0, xmm15) add(imm(4*16*4), rax) // a += 4*16 (unroll x mr) // iteration 4 vmovaps(mem(rax, -32*4), xmm0) vbroadcastss(mem(rbx, 2*4), xmm3) prefetch(0, mem(rax, 384)) vfmadd231ps(xmm1, xmm0, xmm4) vfmadd231ps(xmm2, xmm0, xmm5) vfmadd231ps(xmm3, xmm0, xmm6) vmovaps(mem(rax, -28*4), xmm0) vfmadd231ps(xmm1, xmm0, xmm7) vfmadd231ps(xmm2, xmm0, xmm8) vfmadd231ps(xmm3, xmm0, xmm9) vmovaps(mem(rax, -24*4), xmm0) vfmadd231ps(xmm1, xmm0, xmm10) vfmadd231ps(xmm2, xmm0, xmm11) vfmadd231ps(xmm3, xmm0, xmm12) vmovaps(mem(rax, -20*4), xmm0) vfmadd231ps(xmm1, xmm0, xmm13) vbroadcastss(mem(rbx, 3*4), xmm1) vfmadd231ps(xmm2, xmm0, xmm14) vbroadcastss(mem(rbx, 4*4), xmm2) vfmadd231ps(xmm3, xmm0, xmm15) prefetch(0, mem(rbx, 80+192)) // prefetch b // iteration 5 vmovaps(mem(rax, -16*4), xmm0) vbroadcastss(mem(rbx, 5*4), xmm3) prefetch(0, mem(rax, 64+384)) vfmadd231ps(xmm1, xmm0, xmm4) vfmadd231ps(xmm2, xmm0, xmm5) vfmadd231ps(xmm3, xmm0, xmm6) vmovaps(mem(rax, -12*4), xmm0) vfmadd231ps(xmm1, xmm0, xmm7) vfmadd231ps(xmm2, xmm0, xmm8) vfmadd231ps(xmm3, xmm0, xmm9) vmovaps(mem(rax, -8*4), xmm0) vfmadd231ps(xmm1, xmm0, xmm10) vfmadd231ps(xmm2, xmm0, xmm11) vfmadd231ps(xmm3, xmm0, xmm12) vmovaps(mem(rax, -4*4), xmm0) vfmadd231ps(xmm1, xmm0, xmm13) vbroadcastss(mem(rbx, 6*4), xmm1) vfmadd231ps(xmm2, xmm0, xmm14) vbroadcastss(mem(rbx, 7*4), xmm2) vfmadd231ps(xmm3, xmm0, xmm15) // iteration 6 vmovaps(mem(rax, 0*4), xmm0) vbroadcastss(mem(rbx, 8*4), xmm3) prefetch(0, mem(rax, 128+384)) vfmadd231ps(xmm1, xmm0, xmm4) vfmadd231ps(xmm2, xmm0, xmm5) vfmadd231ps(xmm3, xmm0, xmm6) vmovaps(mem(rax, 4*4), xmm0) vfmadd231ps(xmm1, xmm0, xmm7) vfmadd231ps(xmm2, xmm0, xmm8) vfmadd231ps(xmm3, xmm0, xmm9) vmovaps(mem(rax, 8*4), xmm0) vfmadd231ps(xmm1, xmm0, xmm10) vfmadd231ps(xmm2, xmm0, xmm11) vfmadd231ps(xmm3, xmm0, xmm12) vmovaps(mem(rax, 12*4), xmm0) vfmadd231ps(xmm1, xmm0, xmm13) vbroadcastss(mem(rbx, 9*4), xmm1) vfmadd231ps(xmm2, xmm0, xmm14) vbroadcastss(mem(rbx, 10*4), xmm2) vfmadd231ps(xmm3, xmm0, xmm15) // iteration 7 vmovaps(mem(rax, 16*4), xmm0) vbroadcastss(mem(rbx, 11*4), xmm3) add(imm(8*3*4), rbx) // a += 4*3 (unroll x nr) prefetch(0, mem(rax, 192+384)) vfmadd231ps(xmm1, xmm0, xmm4) vfmadd231ps(xmm2, xmm0, xmm5) vfmadd231ps(xmm3, xmm0, xmm6) vmovaps(mem(rax, 20*4), xmm0) vfmadd231ps(xmm1, xmm0, xmm7) vfmadd231ps(xmm2, xmm0, xmm8) vfmadd231ps(xmm3, xmm0, xmm9) vmovaps(mem(rax, 24*4), xmm0) vfmadd231ps(xmm1, xmm0, xmm10) vfmadd231ps(xmm2, xmm0, xmm11) vfmadd231ps(xmm3, xmm0, xmm12) vmovaps(mem(rax, 28*4), xmm0) add(imm(4*16*4), rax) // a += 4*16 (unroll x mr) vfmadd231ps(xmm1, xmm0, xmm13) vbroadcastss(mem(rbx, -12*4), xmm1) vfmadd231ps(xmm2, xmm0, xmm14) vbroadcastss(mem(rbx, -11*4), xmm2) vfmadd231ps(xmm3, xmm0, xmm15) vbroadcastss(mem(rbx, -10*4), xmm3) dec(rsi) // i -= 1; jmp(.SLOOPKITER) // jump to beginning of loop. label(.SCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.SPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.SLOOPKLEFT) // EDGE LOOP je(.SPOSTACCUM) // if i == 0, we're done. prefetch(0, mem(rbx, 16+192)) // prefetch b // iteration 0 vmovaps(mem(rax, -32*4), xmm0) prefetch(0, mem(rax, 384)) vfmadd231ps(xmm1, xmm0, xmm4) vfmadd231ps(xmm2, xmm0, xmm5) vfmadd231ps(xmm3, xmm0, xmm6) vmovaps(mem(rax, -28*4), xmm0) vfmadd231ps(xmm1, xmm0, xmm7) vfmadd231ps(xmm2, xmm0, xmm8) vfmadd231ps(xmm3, xmm0, xmm9) vmovaps(mem(rax, -24*4), xmm0) vfmadd231ps(xmm1, xmm0, xmm10) vfmadd231ps(xmm2, xmm0, xmm11) vfmadd231ps(xmm3, xmm0, xmm12) vmovaps(mem(rax, -20*4), xmm0) vfmadd231ps(xmm1, xmm0, xmm13) vbroadcastss(mem(rbx, -9*4), xmm1) vfmadd231ps(xmm2, xmm0, xmm14) vbroadcastss(mem(rbx, -8*4), xmm2) vfmadd231ps(xmm3, xmm0, xmm15) vbroadcastss(mem(rbx, -7*4), xmm3) add(imm(1*16*4), rax) // a += 4*16 (unroll x mr) add(imm(1*3*4), rbx) // a += 4*3 (unroll x nr) dec(rsi) // i -= 1; jmp(.SLOOPKLEFT) // jump to beginning of loop. label(.SPOSTACCUM) prefetchw0(mem(rcx, 0*8)) // prefetch c + 0*cs_c prefetchw0(mem(r10, 0*8)) // prefetch c + 1*cs_c prefetchw0(mem(r11, 0*8)) // prefetch c + 2*cs_c // xmm4: xmm5: xmm6: // ( ab00 ( ab01 ( ab02 // ab10 ab11 ab12 // ab20 ab21 ab22 // ab30 ) ab31 ) ab32 ) // xmm7: xmm8: xmm9: // ( ab40 ( ab41 ( ab42 // ab50 ab51 ab52 // ab60 ab61 ab62 // ab70 ) ab71 ) ab72 ) // xmm10: xmm11: xmm12: // ( ab80 ( ab01 ( ab02 // ab90 ab11 ab12 // abA0 abA1 abA2 // abB0 ) abB1 ) abB2 ) // xmm13: xmm14: xmm15: // ( abC0 ( abC1 ( abC2 // abD0 abD1 abD2 // abE0 abE1 abE2 // abF0 ) abF1 ) abF2 ) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vbroadcastss(mem(rax), xmm0) // load alpha and duplicate vbroadcastss(mem(rbx), xmm2) // load beta and duplicate vmulps(xmm0, xmm4, xmm4) // scale by alpha vmulps(xmm0, xmm5, xmm5) vmulps(xmm0, xmm6, xmm6) vmulps(xmm0, xmm7, xmm7) vmulps(xmm0, xmm8, xmm8) vmulps(xmm0, xmm9, xmm9) vmulps(xmm0, xmm10, xmm10) vmulps(xmm0, xmm11, xmm11) vmulps(xmm0, xmm12, xmm12) vmulps(xmm0, xmm13, xmm13) vmulps(xmm0, xmm14, xmm14) vmulps(xmm0, xmm15, xmm15) prefetch(0, mem(r14)) // prefetch a_next prefetch(0, mem(r14, 64)) // prefetch a_next mov(var(rs_c), rsi) // load rs_c lea(mem(, rsi, 4), rsi) // rsi = rs_c * sizeof(float) //lea(mem(rcx, rsi, 4), rdx) // load address of c + 4*rs_c; lea(mem(, rsi, 2), r12) // r12 = 2*rs_c; lea(mem(rsi, rsi, 2), r13) // r13 = 3*rs_c; // determine if // c % 32 == 0, AND // 4*cs_c % 32 == 0, AND // rs_c == 1 // ie: aligned, ldim aligned, and // column-stored cmp(imm(4), rsi) // set ZF if (4*rs_c) == 4. sete(bl) // bl = ( ZF == 1 ? 1 : 0 ); test(imm(31), rcx) // set ZF if c & 32 is zero. setz(bh) // bh = ( ZF == 0 ? 1 : 0 ); test(imm(31), rdi) // set ZF if (4*cs_c) & 32 is zero. setz(al) // al = ( ZF == 0 ? 1 : 0 ); // and(bl,bh) followed by // and(bh,al) will reveal result prefetch(0, mem(r15)) // prefetch b_next prefetch(0, mem(r15, 64)) // prefetch b_next // now avoid loading C if beta == 0 vxorps(xmm0, xmm0, xmm0) // set xmm0 to zero. vucomiss(xmm0, xmm2) // set ZF if beta == 0. je(.SBETAZERO) // if ZF = 1, jump to beta == 0 case // check if aligned/column-stored and(bl, bh) // set ZF if bl & bh == 1. and(bh, al) // set ZF if bh & al == 1. jne(.SCOLSTORED) // jump to column storage case label(.SGENSTORED) vmovlps(mem(rcx), xmm0, xmm0) // load c00:c30 vmovhps(mem(rcx, rsi, 1), xmm0, xmm0) vmovlps(mem(rcx, r12, 1), xmm1, xmm1) vmovhps(mem(rcx, r13, 1), xmm1, xmm1) vshufps(imm(0x88), xmm1, xmm0, xmm0) vmulps(xmm2, xmm0, xmm0) vaddps(xmm4, xmm0, xmm0) vmovss(xmm0, mem(rcx)) // store c00:c30 vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(rcx, rsi, 1)) vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(rcx, r12, 1)) vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(rcx, r13, 1)) lea(mem(rcx, rsi, 4), rcx) // c += 4*rs_c; vmovlps(mem(rcx), xmm0, xmm0) // load c40:c70 vmovhps(mem(rcx, rsi, 1), xmm0, xmm0) vmovlps(mem(rcx, r12, 1), xmm1, xmm1) vmovhps(mem(rcx, r13, 1), xmm1, xmm1) vshufps(imm(0x88), xmm1, xmm0, xmm0) vmulps(xmm2, xmm0, xmm0) vaddps(xmm7, xmm0, xmm0) vmovss(xmm0, mem(rcx)) // store c40:c70 vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(rcx, rsi, 1)) vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(rcx, r12, 1)) vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(rcx, r13, 1)) lea(mem(rcx, rsi, 4), rcx) // c += 4*rs_c; vmovlps(mem(rcx), xmm0, xmm0) // load c80:cB0 vmovhps(mem(rcx, rsi, 1), xmm0, xmm0) vmovlps(mem(rcx, r12, 1), xmm1, xmm1) vmovhps(mem(rcx, r13, 1), xmm1, xmm1) vshufps(imm(0x88), xmm1, xmm0, xmm0) vmulps(xmm2, xmm0, xmm0) vaddps(xmm10, xmm0, xmm0) vmovss(xmm0, mem(rcx)) // store c80:cB0 vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(rcx, rsi, 1)) vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(rcx, r12, 1)) vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(rcx, r13, 1)) lea(mem(rcx, rsi, 4), rcx) // c += 4*rs_c; vmovlps(mem(rcx), xmm0, xmm0) // load cC0:cF0 vmovhps(mem(rcx, rsi, 1), xmm0, xmm0) vmovlps(mem(rcx, r12, 1), xmm1, xmm1) vmovhps(mem(rcx, r13, 1), xmm1, xmm1) vshufps(imm(0x88), xmm1, xmm0, xmm0) vmulps(xmm2, xmm0, xmm0) vaddps(xmm13, xmm0, xmm0) vmovss(xmm0, mem(rcx)) // store cC0:cF0 vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(rcx, rsi, 1)) vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(rcx, r12, 1)) vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(rcx, r13, 1)) lea(mem(rcx, rsi, 4), rcx) // c += 4*rs_c; vmovlps(mem(r10), xmm0, xmm0) // load c01:c31 vmovhps(mem(r10, rsi, 1), xmm0, xmm0) vmovlps(mem(r10, r12, 1), xmm1, xmm1) vmovhps(mem(r10, r13, 1), xmm1, xmm1) vshufps(imm(0x88), xmm1, xmm0, xmm0) vmulps(xmm2, xmm0, xmm0) vaddps(xmm5, xmm0, xmm0) vmovss(xmm0, mem(r10)) // store c01:c31 vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(r10, rsi, 1)) vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(r10, r12, 1)) vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(r10, r13, 1)) lea(mem(r10, rsi, 4), r10) // c += 4*rs_c; vmovlps(mem(r10), xmm0, xmm0) // load c41:c71 vmovhps(mem(r10, rsi, 1), xmm0, xmm0) vmovlps(mem(r10, r12, 1), xmm1, xmm1) vmovhps(mem(r10, r13, 1), xmm1, xmm1) vshufps(imm(0x88), xmm1, xmm0, xmm0) vmulps(xmm2, xmm0, xmm0) vaddps(xmm8, xmm0, xmm0) vmovss(xmm0, mem(r10)) // store c41:c71 vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(r10, rsi, 1)) vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(r10, r12, 1)) vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(r10, r13, 1)) lea(mem(r10, rsi, 4), r10) // c += 4*rs_c; vmovlps(mem(r10), xmm0, xmm0) // load c81:cB1 vmovhps(mem(r10, rsi, 1), xmm0, xmm0) vmovlps(mem(r10, r12, 1), xmm1, xmm1) vmovhps(mem(r10, r13, 1), xmm1, xmm1) vshufps(imm(0x88), xmm1, xmm0, xmm0) vmulps(xmm2, xmm0, xmm0) vaddps(xmm11, xmm0, xmm0) vmovss(xmm0, mem(r10)) // store c81:cB1 vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(r10, rsi, 1)) vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(r10, r12, 1)) vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(r10, r13, 1)) lea(mem(r10, rsi, 4), r10) // c += 4*rs_c; vmovlps(mem(r10), xmm0, xmm0) // load cC1:cF1 vmovhps(mem(r10, rsi, 1), xmm0, xmm0) vmovlps(mem(r10, r12, 1), xmm1, xmm1) vmovhps(mem(r10, r13, 1), xmm1, xmm1) vshufps(imm(0x88), xmm1, xmm0, xmm0) vmulps(xmm2, xmm0, xmm0) vaddps(xmm14, xmm0, xmm0) vmovss(xmm0, mem(r10)) // store cC1:cF1 vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(r10, rsi, 1)) vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(r10, r12, 1)) vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(r10, r13, 1)) lea(mem(r10, rsi, 4), r10) // c += 4*rs_c; vmovlps(mem(r11), xmm0, xmm0) // load c02:c32 vmovhps(mem(r11, rsi, 1), xmm0, xmm0) vmovlps(mem(r11, r12, 1), xmm1, xmm1) vmovhps(mem(r11, r13, 1), xmm1, xmm1) vshufps(imm(0x88), xmm1, xmm0, xmm0) vmulps(xmm2, xmm0, xmm0) vaddps(xmm6, xmm0, xmm0) vmovss(xmm0, mem(r11)) // store c02:c32 vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(r11, rsi, 1)) vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(r11, r12, 1)) vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(r11, r13, 1)) lea(mem(r11, rsi, 4), r11) // c += 4*rs_c; vmovlps(mem(r11), xmm0, xmm0) // load c42:c72 vmovhps(mem(r11, rsi, 1), xmm0, xmm0) vmovlps(mem(r11, r12, 1), xmm1, xmm1) vmovhps(mem(r11, r13, 1), xmm1, xmm1) vshufps(imm(0x88), xmm1, xmm0, xmm0) vmulps(xmm2, xmm0, xmm0) vaddps(xmm9, xmm0, xmm0) vmovss(xmm0, mem(r11)) // store c42:c72 vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(r11, rsi, 1)) vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(r11, r12, 1)) vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(r11, r13, 1)) lea(mem(r11, rsi, 4), r11) // c += 4*rs_c; vmovlps(mem(r11), xmm0, xmm0) // load c82:cB2 vmovhps(mem(r11, rsi, 1), xmm0, xmm0) vmovlps(mem(r11, r12, 1), xmm1, xmm1) vmovhps(mem(r11, r13, 1), xmm1, xmm1) vshufps(imm(0x88), xmm1, xmm0, xmm0) vmulps(xmm2, xmm0, xmm0) vaddps(xmm12, xmm0, xmm0) vmovss(xmm0, mem(r11)) // store c82:cB2 vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(r11, rsi, 1)) vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(r11, r12, 1)) vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(r11, r13, 1)) lea(mem(r11, rsi, 4), r11) // c += 4*rs_c; vmovlps(mem(r11), xmm0, xmm0) // load cC2:cF2 vmovhps(mem(r11, rsi, 1), xmm0, xmm0) vmovlps(mem(r11, r12, 1), xmm1, xmm1) vmovhps(mem(r11, r13, 1), xmm1, xmm1) vshufps(imm(0x88), xmm1, xmm0, xmm0) vmulps(xmm2, xmm0, xmm0) vaddps(xmm15, xmm0, xmm0) vmovss(xmm0, mem(r11)) // store cC2:cF1 vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(r11, rsi, 1)) vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(r11, r12, 1)) vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(r11, r13, 1)) lea(mem(r11, rsi, 4), r11) // c += 4*rs_c; jmp(.SDONE) // jump to end. label(.SCOLSTORED) vfmadd231ps(mem(rcx, 0*16), xmm2, xmm4) vfmadd231ps(mem(rcx, 1*16), xmm2, xmm7) vfmadd231ps(mem(rcx, 2*16), xmm2, xmm10) vfmadd231ps(mem(rcx, 3*16), xmm2, xmm13) vmovups(xmm4, mem(rcx, 0*16)) vmovups(xmm7, mem(rcx, 1*16)) vmovups(xmm10, mem(rcx, 2*16)) vmovups(xmm13, mem(rcx, 3*16)) vfmadd231ps(mem(r10, 0*16), xmm2, xmm5) vfmadd231ps(mem(r10, 1*16), xmm2, xmm8) vfmadd231ps(mem(r10, 2*16), xmm2, xmm11) vfmadd231ps(mem(r10, 3*16), xmm2, xmm14) vmovups(xmm5, mem(r10, 0*16)) vmovups(xmm8, mem(r10, 1*16)) vmovups(xmm11, mem(r10, 2*16)) vmovups(xmm14, mem(r10, 3*16)) vfmadd231ps(mem(r11, 0*16), xmm2, xmm6) vfmadd231ps(mem(r11, 1*16), xmm2, xmm9) vfmadd231ps(mem(r11, 2*16), xmm2, xmm12) vfmadd231ps(mem(r11, 3*16), xmm2, xmm15) vmovups(xmm6, mem(r11, 0*16)) vmovups(xmm9, mem(r11, 1*16)) vmovups(xmm12, mem(r11, 2*16)) vmovups(xmm15, mem(r11, 3*16)) jmp(.SDONE) // jump to end. label(.SBETAZERO) // check if aligned/column-stored and(bl, bh) // set ZF if bl & bh == 1. and(bh, al) // set ZF if bh & al == 1. jne(.SCOLSTORBZ) // jump to column storage case label(.SGENSTORBZ) vmovaps(xmm4, xmm0) vmovss(xmm0, mem(rcx)) // store c00:c30 vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(rcx, rsi, 1)) vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(rcx, r12, 1)) vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(rcx, r13, 1)) lea(mem(rcx, rsi, 4), rcx) // c += 4*rs_c; vmovaps(xmm7, xmm0) vmovss(xmm0, mem(rcx)) // store c40:c70 vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(rcx, rsi, 1)) vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(rcx, r12, 1)) vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(rcx, r13, 1)) lea(mem(rcx, rsi, 4), rcx) // c += 4*rs_c; vmovaps(xmm10, xmm0) vmovss(xmm0, mem(rcx)) // store c80:cB0 vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(rcx, rsi, 1)) vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(rcx, r12, 1)) vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(rcx, r13, 1)) lea(mem(rcx, rsi, 4), rcx) // c += 4*rs_c; vmovaps(xmm13, xmm0) vmovss(xmm0, mem(rcx)) // store cC0:cF0 vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(rcx, rsi, 1)) vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(rcx, r12, 1)) vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(rcx, r13, 1)) lea(mem(rcx, rsi, 4), rcx) // c += 4*rs_c; vmovaps(xmm5, xmm0) vmovss(xmm0, mem(r10)) // store c01:c31 vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(r10, rsi, 1)) vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(r10, r12, 1)) vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(r10, r13, 1)) lea(mem(r10, rsi, 4), r10) // c += 4*rs_c; vmovaps(xmm8, xmm0) vmovss(xmm0, mem(r10)) // store c41:c71 vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(r10, rsi, 1)) vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(r10, r12, 1)) vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(r10, r13, 1)) lea(mem(r10, rsi, 4), r10) // c += 4*rs_c; vmovaps(xmm11, xmm0) vmovss(xmm0, mem(r10)) // store c81:cB1 vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(r10, rsi, 1)) vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(r10, r12, 1)) vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(r10, r13, 1)) lea(mem(r10, rsi, 4), r10) // c += 4*rs_c; vmovaps(xmm14, xmm0) vmovss(xmm0, mem(r10)) // store cC1:cF1 vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(r10, rsi, 1)) vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(r10, r12, 1)) vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(r10, r13, 1)) lea(mem(r10, rsi, 4), r10) // c += 4*rs_c; vmovaps(xmm6, xmm0) vmovss(xmm0, mem(r11)) // store c02:c32 vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(r11, rsi, 1)) vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(r11, r12, 1)) vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(r11, r13, 1)) lea(mem(r11, rsi, 4), r11) // c += 4*rs_c; vmovaps(xmm9, xmm0) vmovss(xmm0, mem(r11)) // store c42:c72 vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(r11, rsi, 1)) vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(r11, r12, 1)) vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(r11, r13, 1)) lea(mem(r11, rsi, 4), r11) // c += 4*rs_c; vmovaps(xmm12, xmm0) vmovss(xmm0, mem(r11)) // store c82:cB2 vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(r11, rsi, 1)) vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(r11, r12, 1)) vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(r11, r13, 1)) lea(mem(r11, rsi, 4), r11) // c += 4*rs_c; vmovaps(xmm15, xmm0) vmovss(xmm0, mem(r11)) // store cC2:cF1 vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(r11, rsi, 1)) vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(r11, r12, 1)) vpermilps(imm(0x39), xmm0, xmm0) vmovss(xmm0, mem(r11, r13, 1)) lea(mem(r11, rsi, 4), r11) // c += 4*rs_c; jmp(.SDONE) // jump to end. label(.SCOLSTORBZ) vmovups(xmm4, mem(rcx, 0*16)) vmovups(xmm7, mem(rcx, 1*16)) vmovups(xmm10, mem(rcx, 2*16)) vmovups(xmm13, mem(rcx, 3*16)) vmovups(xmm5, mem(r10, 0*16)) vmovups(xmm8, mem(r10, 1*16)) vmovups(xmm11, mem(r10, 2*16)) vmovups(xmm14, mem(r10, 3*16)) vmovups(xmm6, mem(r11, 0*16)) vmovups(xmm9, mem(r11, 1*16)) vmovups(xmm12, mem(r11, 2*16)) vmovups(xmm15, mem(r11, 3*16)) label(.SDONE) end_asm( : // output operands (none) : // input operands [k_iter] "m" (k_iter), // 0 [k_left] "m" (k_left), // 1 [a] "m" (a), // 2 [b] "m" (b), // 3 [alpha] "m" (alpha), // 4 [beta] "m" (beta), // 5 [c] "m" (c), // 6 [rs_c] "m" (rs_c), // 7 [cs_c] "m" (cs_c), // 8 [b_next] "m" (b_next), // 9 [a_next] "m" (a_next) // 10 : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_dgemm_piledriver_asm_8x3 ( dim_t k0, double* restrict alpha, double* restrict a, double* restrict b, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { void* a_next = bli_auxinfo_next_a( data ); void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 8; uint64_t k_left = k0 % 8; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; begin_asm() mov(var(a), rax) // load address of a. mov(var(b), rbx) // load address of b. mov(var(b_next), r15) // load address of b_next. mov(var(a_next), r14) // load address of a_next. prefetch(0, mem(rbx, 128)) // prefetch b prefetch(0, mem(rbx, 64+128)) // prefetch b prefetch(0, mem(rbx, 128+128)) // prefetch b add(imm(16*8), rax) add(imm(12*8), rbx) mov(var(c), rcx) // load address of c mov(var(cs_c), rdi) // load cs_c lea(mem(, rdi, 8), rdi) // cs_c *= sizeof(double) lea(mem(rcx, rdi, 1), r10) // load address of c + 1*cs_c; lea(mem(rcx, rdi, 2), r11) // load address of c + 2*cs_c; vmovddup(mem(rbx, -12*8), xmm1) vmovddup(mem(rbx, -11*8), xmm2) vmovddup(mem(rbx, -10*8), xmm3) vxorpd(xmm4, xmm4, xmm4) vxorpd(xmm5, xmm5, xmm5) vxorpd(xmm6, xmm6, xmm6) vxorpd(xmm7, xmm7, xmm7) vxorpd(xmm8, xmm8, xmm8) vxorpd(xmm9, xmm9, xmm9) vxorpd(xmm10, xmm10, xmm10) vxorpd(xmm11, xmm11, xmm11) vxorpd(xmm12, xmm12, xmm12) vxorpd(xmm13, xmm13, xmm13) vxorpd(xmm14, xmm14, xmm14) vxorpd(xmm15, xmm15, xmm15) mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.DCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.DLOOPKITER) // MAIN LOOP je(.DCONSIDKLEFT) // if i == 0, jump to k_left code. prefetch(0, mem(rbx, -32+256)) // prefetch b prefetch(0, mem(rbx, 32+256)) // prefetch b // iteration 0 vmovaps(mem(rax, -8*16), xmm0) prefetch(0, mem(rax, 384)) // prefetch a vfmadd231pd(xmm1, xmm0, xmm4) vfmadd231pd(xmm2, xmm0, xmm5) vfmadd231pd(xmm3, xmm0, xmm6) vmovaps(mem(rax, -7*16), xmm0) vfmadd231pd(xmm1, xmm0, xmm7) vfmadd231pd(xmm2, xmm0, xmm8) vfmadd231pd(xmm3, xmm0, xmm9) vmovaps(mem(rax, -6*16), xmm0) vfmadd231pd(xmm1, xmm0, xmm10) vfmadd231pd(xmm2, xmm0, xmm11) vfmadd231pd(xmm3, xmm0, xmm12) vmovaps(mem(rax, -5*16), xmm0) vfmadd231pd(xmm1, xmm0, xmm13) vmovddup(mem(rbx, -9*8), xmm1) vfmadd231pd(xmm2, xmm0, xmm14) vmovddup(mem(rbx, -8*8), xmm2) vfmadd231pd(xmm3, xmm0, xmm15) // iteration 1 vmovaps(mem(rax, -4*16), xmm0) prefetch(0, mem(rax, 64+384)) // prefetch a vmovddup(mem(rbx, -7*8), xmm3) vfmadd231pd(xmm1, xmm0, xmm4) vfmadd231pd(xmm2, xmm0, xmm5) vfmadd231pd(xmm3, xmm0, xmm6) vmovaps(mem(rax, -3*16), xmm0) vfmadd231pd(xmm1, xmm0, xmm7) vfmadd231pd(xmm2, xmm0, xmm8) vfmadd231pd(xmm3, xmm0, xmm9) vmovaps(mem(rax, -2*16), xmm0) vfmadd231pd(xmm1, xmm0, xmm10) vfmadd231pd(xmm2, xmm0, xmm11) vfmadd231pd(xmm3, xmm0, xmm12) vmovaps(mem(rax, -1*16), xmm0) vfmadd231pd(xmm1, xmm0, xmm13) vmovddup(mem(rbx, -6*8), xmm1) vfmadd231pd(xmm2, xmm0, xmm14) vmovddup(mem(rbx, -5*8), xmm2) vfmadd231pd(xmm3, xmm0, xmm15) // iteration 2 vmovaps(mem(rax, 0*16), xmm0) prefetch(0, mem(rax, 128+384)) // prefetch a vmovddup(mem(rbx, -4*8), xmm3) vfmadd231pd(xmm1, xmm0, xmm4) vfmadd231pd(xmm2, xmm0, xmm5) vfmadd231pd(xmm3, xmm0, xmm6) vmovaps(mem(rax, 1*16), xmm0) vfmadd231pd(xmm1, xmm0, xmm7) vfmadd231pd(xmm2, xmm0, xmm8) vfmadd231pd(xmm3, xmm0, xmm9) vmovaps(mem(rax, 2*16), xmm0) vfmadd231pd(xmm1, xmm0, xmm10) vfmadd231pd(xmm2, xmm0, xmm11) vfmadd231pd(xmm3, xmm0, xmm12) vmovaps(mem(rax, 3*16), xmm0) vfmadd231pd(xmm1, xmm0, xmm13) vmovddup(mem(rbx, -3*8), xmm1) vfmadd231pd(xmm2, xmm0, xmm14) vmovddup(mem(rbx, -2*8), xmm2) vfmadd231pd(xmm3, xmm0, xmm15) // iteration 3 vmovaps(mem(rax, 4*16), xmm0) prefetch(0, mem(rax, 192+384)) // prefetch a vmovddup(mem(rbx, -1*8), xmm3) vfmadd231pd(xmm1, xmm0, xmm4) vfmadd231pd(xmm2, xmm0, xmm5) vfmadd231pd(xmm3, xmm0, xmm6) vmovaps(mem(rax, 5*16), xmm0) vfmadd231pd(xmm1, xmm0, xmm7) vfmadd231pd(xmm2, xmm0, xmm8) vfmadd231pd(xmm3, xmm0, xmm9) vmovaps(mem(rax, 6*16), xmm0) vfmadd231pd(xmm1, xmm0, xmm10) vfmadd231pd(xmm2, xmm0, xmm11) vfmadd231pd(xmm3, xmm0, xmm12) vmovaps(mem(rax, 7*16), xmm0) add(imm(4*8*8), rax) // a += 4*8 (unroll x mr) vfmadd231pd(xmm1, xmm0, xmm13) vmovddup(mem(rbx, 0*8), xmm1) vfmadd231pd(xmm2, xmm0, xmm14) vmovddup(mem(rbx, 1*8), xmm2) vfmadd231pd(xmm3, xmm0, xmm15) // iteration 4 vmovaps(mem(rax, -8*16), xmm0) prefetch(0, mem(rax, 384)) // prefetch a vmovddup(mem(rbx, 2*8), xmm3) vfmadd231pd(xmm1, xmm0, xmm4) vfmadd231pd(xmm2, xmm0, xmm5) vfmadd231pd(xmm3, xmm0, xmm6) vmovaps(mem(rax, -7*16), xmm0) vfmadd231pd(xmm1, xmm0, xmm7) vfmadd231pd(xmm2, xmm0, xmm8) vfmadd231pd(xmm3, xmm0, xmm9) vmovaps(mem(rax, -6*16), xmm0) vfmadd231pd(xmm1, xmm0, xmm10) vfmadd231pd(xmm2, xmm0, xmm11) vfmadd231pd(xmm3, xmm0, xmm12) vmovaps(mem(rax, -5*16), xmm0) vfmadd231pd(xmm1, xmm0, xmm13) vmovddup(mem(rbx, 3*8), xmm1) vfmadd231pd(xmm2, xmm0, xmm14) vmovddup(mem(rbx, 4*8), xmm2) vfmadd231pd(xmm3, xmm0, xmm15) prefetch(0, mem(rbx, 96+256)) // prefetch b // iteration 5 vmovaps(mem(rax, -4*16), xmm0) prefetch(0, mem(rax, 64+384)) // prefetch a vmovddup(mem(rbx, 5*8), xmm3) vfmadd231pd(xmm1, xmm0, xmm4) vfmadd231pd(xmm2, xmm0, xmm5) vfmadd231pd(xmm3, xmm0, xmm6) vmovaps(mem(rax, -3*16), xmm0) vfmadd231pd(xmm1, xmm0, xmm7) vfmadd231pd(xmm2, xmm0, xmm8) vfmadd231pd(xmm3, xmm0, xmm9) vmovaps(mem(rax, -2*16), xmm0) vfmadd231pd(xmm1, xmm0, xmm10) vfmadd231pd(xmm2, xmm0, xmm11) vfmadd231pd(xmm3, xmm0, xmm12) vmovaps(mem(rax, -1*16), xmm0) vfmadd231pd(xmm1, xmm0, xmm13) vmovddup(mem(rbx, 6*8), xmm1) vfmadd231pd(xmm2, xmm0, xmm14) vmovddup(mem(rbx, 7*8), xmm2) vfmadd231pd(xmm3, xmm0, xmm15) // iteration 6 vmovaps(mem(rax, 0*16), xmm0) prefetch(0, mem(rax, 128+384)) // prefetch a vmovddup(mem(rbx, 8*8), xmm3) vfmadd231pd(xmm1, xmm0, xmm4) vfmadd231pd(xmm2, xmm0, xmm5) vfmadd231pd(xmm3, xmm0, xmm6) vmovaps(mem(rax, 1*16), xmm0) vfmadd231pd(xmm1, xmm0, xmm7) vfmadd231pd(xmm2, xmm0, xmm8) vfmadd231pd(xmm3, xmm0, xmm9) vmovaps(mem(rax, 2*16), xmm0) vfmadd231pd(xmm1, xmm0, xmm10) vfmadd231pd(xmm2, xmm0, xmm11) vfmadd231pd(xmm3, xmm0, xmm12) vmovaps(mem(rax, 3*16), xmm0) vfmadd231pd(xmm1, xmm0, xmm13) vmovddup(mem(rbx, 9*8), xmm1) vfmadd231pd(xmm2, xmm0, xmm14) vmovddup(mem(rbx, 10*8), xmm2) vfmadd231pd(xmm3, xmm0, xmm15) // iteration 7 vmovaps(mem(rax, 4*16), xmm0) prefetch(0, mem(rax, 192+384)) // prefetch a vmovddup(mem(rbx, 11*8), xmm3) add(imm(8*3*8), rbx) // b += 8*3 (unroll x nr) vfmadd231pd(xmm1, xmm0, xmm4) vfmadd231pd(xmm2, xmm0, xmm5) vfmadd231pd(xmm3, xmm0, xmm6) vmovaps(mem(rax, 5*16), xmm0) vfmadd231pd(xmm1, xmm0, xmm7) vfmadd231pd(xmm2, xmm0, xmm8) vfmadd231pd(xmm3, xmm0, xmm9) vmovaps(mem(rax, 6*16), xmm0) vfmadd231pd(xmm1, xmm0, xmm10) vfmadd231pd(xmm2, xmm0, xmm11) vfmadd231pd(xmm3, xmm0, xmm12) vmovaps(mem(rax, 7*16), xmm0) add(imm(4*8*8), rax) // a += 4*8 (unroll x mr) vfmadd231pd(xmm1, xmm0, xmm13) vmovddup(mem(rbx, -12*8), xmm1) vfmadd231pd(xmm2, xmm0, xmm14) vmovddup(mem(rbx, -11*8), xmm2) vfmadd231pd(xmm3, xmm0, xmm15) vmovddup(mem(rbx, -10*8), xmm3) dec(rsi) // i -= 1; jmp(.DLOOPKITER) // jump to beginning of loop. label(.DCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.DPOSTACCUM) // if i == 0, we're done. // else, we prepare to // enter k_left loop. label(.DLOOPKLEFT) // EDGE LOOP je(.DPOSTACCUM) // if i == 0, we're done. // iteration 0 vmovaps(mem(rax, -8*16), xmm0) prefetch(0, mem(rax, 512)) // prefetch a vfmadd231pd(xmm1, xmm0, xmm4) vfmadd231pd(xmm2, xmm0, xmm5) vfmadd231pd(xmm3, xmm0, xmm6) vmovaps(mem(rax, -7*16), xmm0) vfmadd231pd(xmm1, xmm0, xmm7) vfmadd231pd(xmm2, xmm0, xmm8) vfmadd231pd(xmm3, xmm0, xmm9) vmovaps(mem(rax, -6*16), xmm0) vfmadd231pd(xmm1, xmm0, xmm10) vfmadd231pd(xmm2, xmm0, xmm11) vfmadd231pd(xmm3, xmm0, xmm12) vmovaps(mem(rax, -5*16), xmm0) vfmadd231pd(xmm1, xmm0, xmm13) vmovddup(mem(rbx, -9*8), xmm1) vfmadd231pd(xmm2, xmm0, xmm14) vmovddup(mem(rbx, -8*8), xmm2) vfmadd231pd(xmm3, xmm0, xmm15) vmovddup(mem(rbx, -7*8), xmm3) add(imm(1*8*8), rax) // a += 1*8 (1 x mr) add(imm(1*3*8), rbx) // b += 1*3 (1 x nr) dec(rsi) // i -= 1; jmp(.DLOOPKLEFT) // jump to beginning of loop. label(.DPOSTACCUM) prefetchw0(mem(rcx, 0*8)) // prefetch c + 0*cs_c prefetchw0(mem(r10, 0*8)) // prefetch c + 1*cs_c prefetchw0(mem(r11, 0*8)) // prefetch c + 2*cs_c // xmm4: xmm5: xmm6: // ( ab00 ( ab01 ( ab02 // ab10 ) ab11 ) ab12 ) // // xmm7: xmm8: xmm9: // ( ab20 ( ab21 ( ab22 // ab30 ) ab31 ) ab32 ) // // xmm10: xmm11: xmm12: // ( ab40 ( ab41 ( ab42 // ab50 ) ab51 ) ab52 ) // // xmm13: xmm14: xmm15: // ( ab60 ( ab61 ( ab62 // ab70 ) ab71 ) ab72 ) mov(var(alpha), rax) // load address of alpha mov(var(beta), rbx) // load address of beta vmovddup(mem(rax), xmm0) // load alpha and duplicate vmovddup(mem(rbx), xmm2) // load beta and duplicate vmulpd(xmm0, xmm4, xmm4) // scale by alpha vmulpd(xmm0, xmm5, xmm5) vmulpd(xmm0, xmm6, xmm6) vmulpd(xmm0, xmm7, xmm7) vmulpd(xmm0, xmm8, xmm8) vmulpd(xmm0, xmm9, xmm9) vmulpd(xmm0, xmm10, xmm10) vmulpd(xmm0, xmm11, xmm11) vmulpd(xmm0, xmm12, xmm12) vmulpd(xmm0, xmm13, xmm13) vmulpd(xmm0, xmm14, xmm14) vmulpd(xmm0, xmm15, xmm15) prefetch(0, mem(r14)) // prefetch a_next prefetch(0, mem(r14, 64)) // prefetch a_next mov(var(rs_c), rsi) // load rs_c lea(mem(, rsi, 8), rsi) // rsi = rs_c * sizeof(double) lea(mem(rcx, rsi, 4), rdx) // load address of c + 4*rs_c; lea(mem(, rsi, 2), r12) // r12 = 2*rs_c; lea(mem(rsi, rsi, 2), r13) // r13 = 3*rs_c; // determine if // c % 32 == 0, AND // 8*cs_c % 32 == 0, AND // rs_c == 1 // ie: aligned, ldim aligned, and // column-stored cmp(imm(8), rsi) // set ZF if (8*rs_c) == 8. sete(bl) // bl = ( ZF == 1 ? 1 : 0 ); test(imm(31), rcx) // set ZF if c & 32 is zero. setz(bh) // bh = ( ZF == 0 ? 1 : 0 ); test(imm(31), rdi) // set ZF if (8*cs_c) & 32 is zero. setz(al) // al = ( ZF == 0 ? 1 : 0 ); // and(bl,bh) followed by // and(bh,al) will reveal result prefetch(0, mem(r15)) // prefetch b_next prefetch(0, mem(r15, 64)) // prefetch b_next // now avoid loading C if beta == 0 vxorpd(xmm0, xmm0, xmm0) // set xmm0 to zero. vucomisd(xmm0, xmm2) // set ZF if beta == 0. je(.DBETAZERO) // if ZF = 1, jump to beta == 0 case // check if aligned/column-stored and(bl, bh) // set ZF if bl & bh == 1. and(bh, al) // set ZF if bh & al == 1. je(.DGENSTORED) // jump to column storage case label(.DCOLSTORED) // xmm4: xmm5: xmm6: // ( ab00 ( ab01 ( ab02 // ab10 ) ab11 ) ab12 ) // // xmm7: xmm8: xmm9: // ( ab20 ( ab21 ( ab22 // ab30 ) ab31 ) ab32 ) // // xmm10: xmm11: xmm12: // ( ab40 ( ab41 ( ab42 // ab50 ) ab51 ) ab52 ) // // xmm13: xmm14: xmm15: // ( ab60 ( ab61 ( ab62 // ab70 ) ab71 ) ab72 ) vfmadd231pd(mem(rcx, 0*16), xmm2, xmm4) vfmadd231pd(mem(rcx, 1*16), xmm2, xmm7) vfmadd231pd(mem(rcx, 2*16), xmm2, xmm10) vfmadd231pd(mem(rcx, 3*16), xmm2, xmm13) vfmadd231pd(mem(r10, 0*16), xmm2, xmm5) vfmadd231pd(mem(r10, 1*16), xmm2, xmm8) vfmadd231pd(mem(r10, 2*16), xmm2, xmm11) vfmadd231pd(mem(r10, 3*16), xmm2, xmm14) vfmadd231pd(mem(r11, 0*16), xmm2, xmm6) vfmadd231pd(mem(r11, 1*16), xmm2, xmm9) vfmadd231pd(mem(r11, 2*16), xmm2, xmm12) vfmadd231pd(mem(r11, 3*16), xmm2, xmm15) vmovups(xmm4, mem(rcx, 0*16)) vmovups(xmm7, mem(rcx, 1*16)) vmovups(xmm10, mem(rcx, 2*16)) vmovups(xmm13, mem(rcx, 3*16)) vmovups(xmm5, mem(r10, 0*16)) vmovups(xmm8, mem(r10, 1*16)) vmovups(xmm11, mem(r10, 2*16)) vmovups(xmm14, mem(r10, 3*16)) vmovups(xmm6, mem(r11, 0*16)) vmovups(xmm9, mem(r11, 1*16)) vmovups(xmm12, mem(r11, 2*16)) vmovups(xmm15, mem(r11, 3*16)) /* vmovupd(mem(rcx), xmm0) // load c00:c10 vmovupd(mem(rcx, r12, 1), xmm1) // load c20:c30 vfmadd231pd(xmm2, xmm0, xmm4) vfmadd231pd(xmm2, xmm1, xmm7) vmovupd(xmm4, mem(rcx)) // store c00:c10 vmovupd(xmm7, mem(rcx, r12, 1)) // store c20:c30 add(rdi, rcx) vmovupd(mem(rdx), xmm0) // load c40:c50 vmovupd(mem(rdx, r12, 1), xmm1) // load c60:c70 vfmadd213pd(xmm10, xmm2, xmm0) vfmadd213pd(xmm13, xmm2, xmm1) vmovupd(xmm0, mem(rdx)) // store c40:c50 vmovupd(xmm1, mem(rdx, r12, 1)) // store c60:c70 add(rdi, rdx) vmovupd(mem(rcx), xmm0) // load c01:c11 vmovupd(mem(rcx, r12, 1), xmm1) // load c21:c31 vfmadd213pd(xmm5, xmm2, xmm0) vfmadd213pd(xmm8, xmm2, xmm1) vmovupd(xmm0, mem(rcx)) // store c01:c11 vmovupd(xmm1, mem(rcx, r12, 1)) // store c21:c31 add(rdi, rcx) vmovupd(mem(rdx), xmm0) // load c41:c51 vmovupd(mem(rdx, r12, 1), xmm1) // load c61:c71 vfmadd213pd(xmm11, xmm2, xmm0) vfmadd213pd(xmm14, xmm2, xmm1) vmovupd(xmm0, mem(rdx)) // store c41:c51 vmovupd(xmm1, mem(rdx, r12, 1)) // store c61:c71 add(rdi, rdx) vmovupd(mem(rcx), xmm0) // load c02:c12 vmovupd(mem(rcx, r12, 1), xmm1) // load c22:c32 vfmadd213pd(xmm6, xmm2, xmm0) vfmadd213pd(xmm9, xmm2, xmm1) vmovupd(xmm0, mem(rcx)) // store c02:c12 vmovupd(xmm1, mem(rcx, r12, 1)) // store c22:c32 vmovupd(mem(rdx), xmm0) // load c42:c52 vmovupd(mem(rdx, r12, 1), xmm1) // load c62:c72 vfmadd213pd(xmm12, xmm2, xmm0) vfmadd213pd(xmm15, xmm2, xmm1) vmovupd(xmm0, mem(rdx)) // store c42:c52 vmovupd(xmm1, mem(rdx, r12, 1)) // store c62:c72 */ jmp(.DDONE) // jump to end. label(.DGENSTORED) vmovlpd(mem(rcx), xmm0, xmm0) // load c00:c10 vmovhpd(mem(rcx, rsi, 1), xmm0, xmm0) vmulpd(xmm2, xmm0, xmm0) vaddpd(xmm4, xmm0, xmm0) vmovlpd(xmm0, mem(rcx)) // store c00:c10 vmovhpd(xmm0, mem(rcx, rsi, 1)) vmovlpd(mem(rcx, r12, 1), xmm0, xmm0) // load c20:c30 vmovhpd(mem(rcx, r13, 1), xmm0, xmm0) vmulpd(xmm2, xmm0, xmm0) vaddpd(xmm7, xmm0, xmm0) vmovlpd(xmm0, mem(rcx, r12, 1)) // store c20:c30 vmovhpd(xmm0, mem(rcx, r13, 1)) add(rdi, rcx) vmovlpd(mem(rdx), xmm0, xmm0) // load c40:c50 vmovhpd(mem(rdx, rsi, 1), xmm0, xmm0) vmulpd(xmm2, xmm0, xmm0) vaddpd(xmm10, xmm0, xmm0) vmovlpd(xmm0, mem(rdx)) // store c40:c50 vmovhpd(xmm0, mem(rdx, rsi, 1)) vmovlpd(mem(rdx, r12, 1), xmm0, xmm0) // load c60:c70 vmovhpd(mem(rdx, r13, 1), xmm0, xmm0) vmulpd(xmm2, xmm0, xmm0) vaddpd(xmm13, xmm0, xmm0) vmovlpd(xmm0, mem(rdx, r12, 1)) // store c60:c70 vmovhpd(xmm0, mem(rdx, r13, 1)) add(rdi, rdx) vmovlpd(mem(rcx), xmm0, xmm0) // load c01:c11 vmovhpd(mem(rcx, rsi, 1), xmm0, xmm0) vmulpd(xmm2, xmm0, xmm0) vaddpd(xmm5, xmm0, xmm0) vmovlpd(xmm0, mem(rcx)) // store c01:c11 vmovhpd(xmm0, mem(rcx, rsi, 1)) vmovlpd(mem(rcx, r12, 1), xmm0, xmm0) // load c21:c31 vmovhpd(mem(rcx, r13, 1), xmm0, xmm0) vmulpd(xmm2, xmm0, xmm0) vaddpd(xmm8, xmm0, xmm0) vmovlpd(xmm0, mem(rcx, r12, 1)) // store c21:c31 vmovhpd(xmm0, mem(rcx, r13, 1)) add(rdi, rcx) vmovlpd(mem(rdx), xmm0, xmm0) // load c41:c51 vmovhpd(mem(rdx, rsi, 1), xmm0, xmm0) vmulpd(xmm2, xmm0, xmm0) vaddpd(xmm11, xmm0, xmm0) vmovlpd(xmm0, mem(rdx)) // store c41:c51 vmovhpd(xmm0, mem(rdx, rsi, 1)) vmovlpd(mem(rdx, r12, 1), xmm0, xmm0) // load c61:c71 vmovhpd(mem(rdx, r13, 1), xmm0, xmm0) vmulpd(xmm2, xmm0, xmm0) vaddpd(xmm14, xmm0, xmm0) vmovlpd(xmm0, mem(rdx, r12, 1)) // store c61:c71 vmovhpd(xmm0, mem(rdx, r13, 1)) add(rdi, rdx) vmovlpd(mem(rcx), xmm0, xmm0) // load c02:c12 vmovhpd(mem(rcx, rsi, 1), xmm0, xmm0) vmulpd(xmm2, xmm0, xmm0) vaddpd(xmm6, xmm0, xmm0) vmovlpd(xmm0, mem(rcx)) // store c02:c12 vmovhpd(xmm0, mem(rcx, rsi, 1)) vmovlpd(mem(rcx, r12, 1), xmm0, xmm0) // load c22:c32 vmovhpd(mem(rcx, r13, 1), xmm0, xmm0) vmulpd(xmm2, xmm0, xmm0) vaddpd(xmm9, xmm0, xmm0) vmovlpd(xmm0, mem(rcx, r12, 1)) // store c22:c32 vmovhpd(xmm0, mem(rcx, r13, 1)) add(rdi, rcx) vmovlpd(mem(rdx), xmm0, xmm0) // load c42:c52 vmovhpd(mem(rdx, rsi, 1), xmm0, xmm0) vmulpd(xmm2, xmm0, xmm0) vaddpd(xmm12, xmm0, xmm0) vmovlpd(xmm0, mem(rdx)) // store c42:c52 vmovhpd(xmm0, mem(rdx, rsi, 1)) vmovlpd(mem(rdx, r12, 1), xmm0, xmm0) // load c62:c72 vmovhpd(mem(rdx, r13, 1), xmm0, xmm0) vmulpd(xmm2, xmm0, xmm0) vaddpd(xmm15, xmm0, xmm0) vmovlpd(xmm0, mem(rdx, r12, 1)) // store c62:c72 vmovhpd(xmm0, mem(rdx, r13, 1)) add(rdi, rdx) jmp(.DDONE) // jump to end. label(.DBETAZERO) // check if aligned/column-stored and(bl, bh) // set ZF if bl & bh == 1. and(bh, al) // set ZF if bh & al == 1. jne(.DCOLSTORBZ) // jump to column storage case label(.DGENSTORBZ) vmovlpd(xmm4, mem(rcx)) vmovhpd(xmm4, mem(rcx, rsi, 1)) vmovlpd(xmm7, mem(rcx, r12, 1)) vmovhpd(xmm7, mem(rcx, r13, 1)) add(rdi, rcx) vmovlpd(xmm10, mem(rdx)) vmovhpd(xmm10, mem(rdx, rsi, 1)) vmovlpd(xmm13, mem(rdx, r12, 1)) vmovhpd(xmm13, mem(rdx, r13, 1)) add(rdi, rdx) vmovlpd(xmm5, mem(rcx)) vmovhpd(xmm5, mem(rcx, rsi, 1)) vmovlpd(xmm8, mem(rcx, r12, 1)) vmovhpd(xmm8, mem(rcx, r13, 1)) add(rdi, rcx) vmovlpd(xmm11, mem(rdx)) vmovhpd(xmm11, mem(rdx, rsi, 1)) vmovlpd(xmm14, mem(rdx, r12, 1)) vmovhpd(xmm14, mem(rdx, r13, 1)) add(rdi, rdx) vmovlpd(xmm6, mem(rcx)) vmovhpd(xmm6, mem(rcx, rsi, 1)) vmovlpd(xmm9, mem(rcx, r12, 1)) vmovhpd(xmm9, mem(rcx, r13, 1)) add(rdi, rcx) vmovlpd(xmm12, mem(rdx)) vmovhpd(xmm12, mem(rdx, rsi, 1)) vmovlpd(xmm15, mem(rdx, r12, 1)) vmovhpd(xmm15, mem(rdx, r13, 1)) add(rdi, rdx) jmp(.DDONE) // jump to end. label(.DCOLSTORBZ) vmovupd(xmm4, mem(rcx)) vmovupd(xmm7, mem(rcx, r12, 1)) add(rdi, rcx) vmovupd(xmm10, mem(rdx)) vmovupd(xmm13, mem(rdx, r12, 1)) add(rdi, rdx) vmovupd(xmm5, mem(rcx)) vmovupd(xmm8, mem(rcx, r12, 1)) add(rdi, rcx) vmovupd(xmm11, mem(rdx)) vmovupd(xmm14, mem(rdx, r12, 1)) add(rdi, rdx) vmovupd(xmm6, mem(rcx)) vmovupd(xmm9, mem(rcx, r12, 1)) add(rdi, rcx) vmovupd(xmm12, mem(rdx)) vmovupd(xmm15, mem(rdx, r12, 1)) add(rdi, rdx) label(.DDONE) end_asm( : // output operands (none) : // input operands [k_iter] "m" (k_iter), // 0 [k_left] "m" (k_left), // 1 [a] "m" (a), // 2 [b] "m" (b), // 3 [alpha] "m" (alpha), // 4 [beta] "m" (beta), // 5 [c] "m" (c), // 6 [rs_c] "m" (rs_c), // 7 [cs_c] "m" (cs_c), // 8 [b_next] "m" (b_next), // 9 [a_next] "m" (a_next) // 10 : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_cgemm_piledriver_asm_4x2 ( dim_t k0, scomplex* restrict alpha, scomplex* restrict a, scomplex* restrict b, scomplex* restrict beta, scomplex* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { void* a_next = bli_auxinfo_next_a( data ); void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 8; uint64_t k_left = k0 % 8; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; begin_asm() mov(var(a), rax) // load address of a. mov(var(b), rbx) // load address of b. mov(var(b_next), r15) // load address of b_next. mov(var(a_next), r14) // load address of a_next. mov(var(c), rcx) // load address of c mov(var(cs_c), rdi) // load cs_c lea(mem(, rdi, 8), rdi) // cs_c *= sizeof(scomplex) lea(mem(rcx, rdi, 1), r10) // load address of c + 1*cs_c; add(imm(32*4), rax) add(imm(16*4), rbx) vxorps(xmm8, xmm8, xmm8) vxorps(xmm9, xmm9, xmm9) vxorps(xmm10, xmm10, xmm10) vxorps(xmm11, xmm11, xmm11) vxorps(xmm12, xmm12, xmm12) vxorps(xmm13, xmm13, xmm13) vxorps(xmm14, xmm14, xmm14) vxorps(xmm15, xmm15, xmm15) //vzeroall() mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.CCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.CLOOPKITER) // MAIN LOOP je(.CCONSIDKLEFT) // if i == 0, jump to k_left code. prefetch(0, mem(rbx, 256)) prefetch(0, mem(rax, 512)) // iteration 0 vmovaps(mem(rax, -32*4), xmm0) vbroadcastss(mem(rbx, -16*4), xmm4) vfmadd231ps(xmm0, xmm4, xmm8) vmovaps(mem(rax, -28*4), xmm1) vfmadd231ps(xmm1, xmm4, xmm12) vbroadcastss(mem(rbx, -15*4), xmm5) vfmadd231ps(xmm0, xmm5, xmm9) vfmadd231ps(xmm1, xmm5, xmm13) vbroadcastss(mem(rbx, -14*4), xmm6) vfmadd231ps(xmm0, xmm6, xmm10) vfmadd231ps(xmm1, xmm6, xmm14) vbroadcastss(mem(rbx, -13*4), xmm7) vfmadd231ps(xmm0, xmm7, xmm11) vfmadd231ps(xmm1, xmm7, xmm15) // iteration 1 vmovaps(mem(rax, -24*4), xmm0) vbroadcastss(mem(rbx, -12*4), xmm4) vfmadd231ps(xmm0, xmm4, xmm8) vmovaps(mem(rax, -20*4), xmm1) vfmadd231ps(xmm1, xmm4, xmm12) vbroadcastss(mem(rbx, -11*4), xmm5) vfmadd231ps(xmm0, xmm5, xmm9) vfmadd231ps(xmm1, xmm5, xmm13) vbroadcastss(mem(rbx, -10*4), xmm6) vfmadd231ps(xmm0, xmm6, xmm10) vfmadd231ps(xmm1, xmm6, xmm14) vbroadcastss(mem(rbx, -9*4), xmm7) vfmadd231ps(xmm0, xmm7, xmm11) vfmadd231ps(xmm1, xmm7, xmm15) prefetch(0, mem(rbx, 64+256)) prefetch(0, mem(rax, 64+512)) // iteration 2 vmovaps(mem(rax, -16*4), xmm0) vbroadcastss(mem(rbx, -8*4), xmm4) vfmadd231ps(xmm0, xmm4, xmm8) vmovaps(mem(rax, -12*4), xmm1) vfmadd231ps(xmm1, xmm4, xmm12) vbroadcastss(mem(rbx, -7*4), xmm5) vfmadd231ps(xmm0, xmm5, xmm9) vfmadd231ps(xmm1, xmm5, xmm13) vbroadcastss(mem(rbx, -6*4), xmm6) vfmadd231ps(xmm0, xmm6, xmm10) vfmadd231ps(xmm1, xmm6, xmm14) vbroadcastss(mem(rbx, -5*4), xmm7) vfmadd231ps(xmm0, xmm7, xmm11) vfmadd231ps(xmm1, xmm7, xmm15) // iteration 3 vmovaps(mem(rax, -8*4), xmm0) vbroadcastss(mem(rbx, -4*4), xmm4) vfmadd231ps(xmm0, xmm4, xmm8) vmovaps(mem(rax, -4*4), xmm1) vfmadd231ps(xmm1, xmm4, xmm12) vbroadcastss(mem(rbx, -3*4), xmm5) vfmadd231ps(xmm0, xmm5, xmm9) vfmadd231ps(xmm1, xmm5, xmm13) vbroadcastss(mem(rbx, -2*4), xmm6) vfmadd231ps(xmm0, xmm6, xmm10) vfmadd231ps(xmm1, xmm6, xmm14) vbroadcastss(mem(rbx, -1*4), xmm7) vfmadd231ps(xmm0, xmm7, xmm11) vfmadd231ps(xmm1, xmm7, xmm15) prefetch(0, mem(rbx, 128+256)) prefetch(0, mem(rax, 128+512)) // iteration 4 vmovaps(mem(rax, 0*4), xmm0) vbroadcastss(mem(rbx, 0*4), xmm4) vfmadd231ps(xmm0, xmm4, xmm8) vmovaps(mem(rax, 4*4), xmm1) vfmadd231ps(xmm1, xmm4, xmm12) vbroadcastss(mem(rbx, 1*4), xmm5) vfmadd231ps(xmm0, xmm5, xmm9) vfmadd231ps(xmm1, xmm5, xmm13) vbroadcastss(mem(rbx, 2*4), xmm6) vfmadd231ps(xmm0, xmm6, xmm10) vfmadd231ps(xmm1, xmm6, xmm14) vbroadcastss(mem(rbx, 3*4), xmm7) vfmadd231ps(xmm0, xmm7, xmm11) vfmadd231ps(xmm1, xmm7, xmm15) // iteration 5 vmovaps(mem(rax, 8*4), xmm0) vbroadcastss(mem(rbx, 4*4), xmm4) vfmadd231ps(xmm0, xmm4, xmm8) vmovaps(mem(rax, 12*4), xmm1) vfmadd231ps(xmm1, xmm4, xmm12) vbroadcastss(mem(rbx, 5*4), xmm5) vfmadd231ps(xmm0, xmm5, xmm9) vfmadd231ps(xmm1, xmm5, xmm13) vbroadcastss(mem(rbx, 6*4), xmm6) vfmadd231ps(xmm0, xmm6, xmm10) vfmadd231ps(xmm1, xmm6, xmm14) vbroadcastss(mem(rbx, 7*4), xmm7) vfmadd231ps(xmm0, xmm7, xmm11) vfmadd231ps(xmm1, xmm7, xmm15) prefetch(0, mem(rbx, 128+256)) prefetch(0, mem(rax, 128+512)) // iteration 6 vmovaps(mem(rax, 16*4), xmm0) vbroadcastss(mem(rbx, 8*4), xmm4) vfmadd231ps(xmm0, xmm4, xmm8) vmovaps(mem(rax, 20*4), xmm1) vfmadd231ps(xmm1, xmm4, xmm12) vbroadcastss(mem(rbx, 9*4), xmm5) vfmadd231ps(xmm0, xmm5, xmm9) vfmadd231ps(xmm1, xmm5, xmm13) vbroadcastss(mem(rbx, 10*4), xmm6) vfmadd231ps(xmm0, xmm6, xmm10) vfmadd231ps(xmm1, xmm6, xmm14) vbroadcastss(mem(rbx, 11*4), xmm7) vfmadd231ps(xmm0, xmm7, xmm11) vfmadd231ps(xmm1, xmm7, xmm15) // iteration 7 vmovaps(mem(rax, 24*4), xmm0) vbroadcastss(mem(rbx, 12*4), xmm4) vfmadd231ps(xmm0, xmm4, xmm8) vmovaps(mem(rax, 28*4), xmm1) add(imm(8*4*8), rax) // a += 8*2 (unroll x mr) vfmadd231ps(xmm1, xmm4, xmm12) vbroadcastss(mem(rbx, 13*4), xmm5) vfmadd231ps(xmm0, xmm5, xmm9) vfmadd231ps(xmm1, xmm5, xmm13) vbroadcastss(mem(rbx, 14*4), xmm6) vfmadd231ps(xmm0, xmm6, xmm10) vfmadd231ps(xmm1, xmm6, xmm14) vbroadcastss(mem(rbx, 15*4), xmm7) add(imm(8*2*8), rbx) // b += 8*2 (unroll x nr) vfmadd231ps(xmm0, xmm7, xmm11) vfmadd231ps(xmm1, xmm7, xmm15) dec(rsi) // i -= 1; jmp(.CLOOPKITER) // jump to beginning of loop. label(.CCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.CPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.CLOOPKLEFT) // EDGE LOOP je(.CPOSTACCUM) // if i == 0, we're done. prefetch(0, mem(rbx, 256)) prefetch(0, mem(rax, 512)) // iteration 0 vmovaps(mem(rax, -32*4), xmm0) vbroadcastss(mem(rbx, -16*4), xmm4) vfmadd231ps(xmm0, xmm4, xmm8) vmovaps(mem(rax, -28*4), xmm1) vfmadd231ps(xmm1, xmm4, xmm12) vbroadcastss(mem(rbx, -15*4), xmm5) vfmadd231ps(xmm0, xmm5, xmm9) vfmadd231ps(xmm1, xmm5, xmm13) vbroadcastss(mem(rbx, -14*4), xmm6) vfmadd231ps(xmm0, xmm6, xmm10) vfmadd231ps(xmm1, xmm6, xmm14) vbroadcastss(mem(rbx, -13*4), xmm7) vfmadd231ps(xmm0, xmm7, xmm11) vfmadd231ps(xmm1, xmm7, xmm15) add(imm(1*4*8), rax) // a += 1*2 (1 x mr) add(imm(1*2*8), rbx) // b += 1*2 (1 x nr) dec(rsi) // i -= 1; jmp(.CLOOPKLEFT) // jump to beginning of loop. label(.CPOSTACCUM) prefetchw0(mem(rcx, 0*8)) // prefetch c + 0*cs_c prefetchw0(mem(r10, 0*8)) // prefetch c + 1*cs_c vpermilps(imm(0xb1), xmm9, xmm9) vpermilps(imm(0xb1), xmm11, xmm11) vpermilps(imm(0xb1), xmm13, xmm13) vpermilps(imm(0xb1), xmm15, xmm15) vaddsubps(xmm9, xmm8, xmm8) vaddsubps(xmm11, xmm10, xmm10) vaddsubps(xmm13, xmm12, xmm12) vaddsubps(xmm15, xmm14, xmm14) // xmm8: xmm10: // ( ab00 ( ab01 // ab10 ab11 // ab20 ab21 // ab30 ) ab31 ) // xmm12: xmm14: // ( ab40 ( ab41 // ab50 ab51 // ab60 ab61 // ab70 ) ab71 ) prefetch(0, mem(r14)) // prefetch a_next prefetch(0, mem(r14, 64)) // prefetch a_next // scale by alpha mov(var(alpha), rax) // load address of alpha vbroadcastss(mem(rax), xmm0) // load alpha_r and duplicate vbroadcastss(mem(rax, 4), xmm1) // load alpha_i and duplicate vpermilps(imm(0xb1), xmm8, xmm9) vpermilps(imm(0xb1), xmm10, xmm11) vpermilps(imm(0xb1), xmm12, xmm13) vpermilps(imm(0xb1), xmm14, xmm15) vmulps(xmm8, xmm0, xmm8) vmulps(xmm10, xmm0, xmm10) vmulps(xmm12, xmm0, xmm12) vmulps(xmm14, xmm0, xmm14) vmulps(xmm9, xmm1, xmm9) vmulps(xmm11, xmm1, xmm11) vmulps(xmm13, xmm1, xmm13) vmulps(xmm15, xmm1, xmm15) vaddsubps(xmm9, xmm8, xmm8) vaddsubps(xmm11, xmm10, xmm10) vaddsubps(xmm13, xmm12, xmm12) vaddsubps(xmm15, xmm14, xmm14) mov(var(beta), rbx) // load address of beta vbroadcastss(mem(rbx), xmm6) // load beta_r and duplicate vbroadcastss(mem(rbx, 4), xmm7) // load beta_i and duplicate mov(var(rs_c), rsi) // load rs_c lea(mem(, rsi, 8), rsi) // rsi = rs_c * sizeof(scomplex) lea(mem(, rsi, 2), r12) // r12 = 2*rs_c; lea(mem(rsi, rsi, 2), r13) // r13 = 3*rs_c; prefetch(0, mem(r15)) // prefetch b_next prefetch(0, mem(r15, 64)) // prefetch b_next // determine if // c % 32 == 0, AND // 8*cs_c % 32 == 0, AND // rs_c == 1 // ie: aligned, ldim aligned, and // column-stored cmp(imm(8), rsi) // set ZF if (8*rs_c) == 8. sete(bl) // bl = ( ZF == 1 ? 1 : 0 ); test(imm(31), rcx) // set ZF if c & 32 is zero. setz(bh) // bh = ( ZF == 0 ? 1 : 0 ); test(imm(31), rdi) // set ZF if (8*cs_c) & 32 is zero. setz(al) // al = ( ZF == 0 ? 1 : 0 ); // and(bl,bh) followed by // and(bh,al) will reveal result // now avoid loading C if beta == 0 vxorps(xmm0, xmm0, xmm0) // set xmm0 to zero. vucomiss(xmm0, xmm6) // set ZF if beta_r == 0. sete(r8b) // r8b = ( ZF == 1 ? 1 : 0 ); vucomiss(xmm0, xmm7) // set ZF if beta_i == 0. sete(r9b) // r9b = ( ZF == 1 ? 1 : 0 ); and(r8b, r9b) // set ZF if r8b & r9b == 1. jne(.CBETAZERO) // if ZF = 0, jump to beta == 0 case // check if aligned/column-stored and(bl, bh) // set ZF if bl & bh == 1. and(bh, al) // set ZF if bh & al == 1. jne(.CCOLSTORED) // jump to column storage case label(.CGENSTORED) vmovlps(mem(rcx), xmm0, xmm0) // load c00:c10 vmovhps(mem(rcx, rsi, 1), xmm0, xmm0) vmovlps(mem(rcx, r12, 1), xmm2, xmm2) // load c20:c30 vmovhps(mem(rcx, r13, 1), xmm2, xmm2) vpermilps(imm(0xb1), xmm0, xmm1) vpermilps(imm(0xb1), xmm2, xmm3) vmulps(xmm6, xmm0, xmm0) vmulps(xmm7, xmm1, xmm1) vaddsubps(xmm1, xmm0, xmm0) vaddps(xmm8, xmm0, xmm0) vmovlps(xmm0, mem(rcx)) // store c00:c10 vmovhps(xmm0, mem(rcx, rsi, 1)) vmulps(xmm6, xmm2, xmm2) vmulps(xmm7, xmm3, xmm3) vaddsubps(xmm3, xmm2, xmm2) vaddps(xmm12, xmm2, xmm2) vmovlps(xmm2, mem(rcx, r12, 1)) // store c20:c30 vmovhps(xmm2, mem(rcx, r13, 1)) vmovlps(mem(r10), xmm0, xmm0) // load c01:c11 vmovhps(mem(r10, rsi, 1), xmm0, xmm0) vmovlps(mem(r10, r12, 1), xmm2, xmm2) // load c21:c31 vmovhps(mem(r10, r13, 1), xmm2, xmm2) vpermilps(imm(0xb1), xmm0, xmm1) vpermilps(imm(0xb1), xmm2, xmm3) vmulps(xmm6, xmm0, xmm0) vmulps(xmm7, xmm1, xmm1) vaddsubps(xmm1, xmm0, xmm0) vaddps(xmm10, xmm0, xmm0) vmovlps(xmm0, mem(r10)) // store c01:c11 vmovhps(xmm0, mem(r10, rsi, 1)) vmulps(xmm6, xmm2, xmm2) vmulps(xmm7, xmm3, xmm3) vaddsubps(xmm3, xmm2, xmm2) vaddps(xmm14, xmm2, xmm2) vmovlps(xmm2, mem(r10, r12, 1)) // store c21:c31 vmovhps(xmm2, mem(r10, r13, 1)) jmp(.CDONE) // jump to end. label(.CCOLSTORED) vmovups(mem(rcx), xmm0) // load c00:c10 vmovups(mem(rcx, 16), xmm2) // load c20:c30 vpermilps(imm(0xb1), xmm0, xmm1) vpermilps(imm(0xb1), xmm2, xmm3) vmulps(xmm6, xmm0, xmm0) vmulps(xmm7, xmm1, xmm1) vaddsubps(xmm1, xmm0, xmm0) vaddps(xmm8, xmm0, xmm0) vmovups(xmm0, mem(rcx)) // store c00:c10 vmulps(xmm6, xmm2, xmm2) vmulps(xmm7, xmm3, xmm3) vaddsubps(xmm3, xmm2, xmm2) vaddps(xmm12, xmm2, xmm2) vmovups(xmm2, mem(rcx, 16)) // store c20:c30 vmovups(mem(r10), xmm0) // load c01:c11 vmovups(mem(r10, 16), xmm2) // load c21:c31 vpermilps(imm(0xb1), xmm0, xmm1) vpermilps(imm(0xb1), xmm2, xmm3) vmulps(xmm6, xmm0, xmm0) vmulps(xmm7, xmm1, xmm1) vaddsubps(xmm1, xmm0, xmm0) vaddps(xmm10, xmm0, xmm0) vmovups(xmm0, mem(r10)) // store c01:c11 vmulps(xmm6, xmm2, xmm2) vmulps(xmm7, xmm3, xmm3) vaddsubps(xmm3, xmm2, xmm2) vaddps(xmm14, xmm2, xmm2) vmovups(xmm2, mem(r10, 16)) // store c21:c31 jmp(.CDONE) // jump to end. label(.CBETAZERO) // check if aligned/column-stored // check if aligned/column-stored and(bl, bh) // set ZF if bl & bh == 1. and(bh, al) // set ZF if bh & al == 1. jne(.CCOLSTORBZ) // jump to column storage case label(.CGENSTORBZ) vmovlps(xmm8, mem(rcx)) // store c00:c10 vmovhps(xmm8, mem(rcx, rsi, 1)) vmovlps(xmm12, mem(rcx, r12, 1)) // store c20:c30 vmovhps(xmm12, mem(rcx, r13, 1)) vmovlps(xmm10, mem(r10)) // store c01:c11 vmovhps(xmm10, mem(r10, rsi, 1)) vmovlps(xmm14, mem(r10, r12, 1)) // store c21:c31 vmovhps(xmm14, mem(r10, r13, 1)) jmp(.CDONE) // jump to end. label(.CCOLSTORBZ) vmovups(xmm8, mem(rcx)) // store c00:c10 vmovups(xmm12, mem(rcx, 16)) // store c20:c30 vmovups(xmm10, mem(r10)) // store c01:c11 vmovups(xmm14, mem(r10, 16)) // store c21:c31 label(.CDONE) end_asm( : // output operands (none) : // input operands [k_iter] "m" (k_iter), // 0 [k_left] "m" (k_left), // 1 [a] "m" (a), // 2 [b] "m" (b), // 3 [alpha] "m" (alpha), // 4 [beta] "m" (beta), // 5 [c] "m" (c), // 6 [rs_c] "m" (rs_c), // 7 [cs_c] "m" (cs_c), // 8 [b_next] "m" (b_next), // 9 [a_next] "m" (a_next) // 10 : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } void bli_zgemm_piledriver_asm_2x2 ( dim_t k0, dcomplex* restrict alpha, dcomplex* restrict a, dcomplex* restrict b, dcomplex* restrict beta, dcomplex* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { void* a_next = bli_auxinfo_next_a( data ); void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 8; uint64_t k_left = k0 % 8; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; begin_asm() mov(var(a), rax) // load address of a. mov(var(b), rbx) // load address of b. mov(var(b_next), r15) // load address of b_next. mov(var(a_next), r14) // load address of a_next. mov(var(c), rcx) // load address of c mov(var(cs_c), rdi) // load cs_c lea(mem(, rdi, 8), rdi) // cs_c *= sizeof(dcomplex) lea(mem(, rdi, 2), rdi) lea(mem(rcx, rdi, 1), r10) // load address of c + 1*cs_c; add(imm(16*8), rax) add(imm(16*8), rbx) vxorpd(xmm8, xmm8, xmm8) vxorpd(xmm9, xmm9, xmm9) vxorpd(xmm10, xmm10, xmm10) vxorpd(xmm11, xmm11, xmm11) vxorpd(xmm12, xmm12, xmm12) vxorpd(xmm13, xmm13, xmm13) vxorpd(xmm14, xmm14, xmm14) vxorpd(xmm15, xmm15, xmm15) mov(var(k_iter), rsi) // i = k_iter; test(rsi, rsi) // check i via logical AND. je(.ZCONSIDKLEFT) // if i == 0, jump to code that // contains the k_left loop. label(.ZLOOPKITER) // MAIN LOOP je(.ZCONSIDKLEFT) // if i == 0, jump to k_left code. prefetch(0, mem(rbx, 256)) prefetch(0, mem(rax, 512)) // iteration 0 vmovaps(mem(rax, -16*8), xmm0) vmovddup(mem(rbx, -16*8), xmm4) vfmadd231pd(xmm0, xmm4, xmm8) vmovaps(mem(rax, -14*8), xmm1) vfmadd231pd(xmm1, xmm4, xmm12) vmovddup(mem(rbx, -15*8), xmm5) vfmadd231pd(xmm0, xmm5, xmm9) vfmadd231pd(xmm1, xmm5, xmm13) vmovddup(mem(rbx, -14*8), xmm6) vfmadd231pd(xmm0, xmm6, xmm10) vfmadd231pd(xmm1, xmm6, xmm14) vmovddup(mem(rbx, -13*8), xmm7) vfmadd231pd(xmm0, xmm7, xmm11) vmovaps(mem(rax, -12*8), xmm0) vmovddup(mem(rbx, -12*8), xmm4) vfmadd231pd(xmm1, xmm7, xmm15) // iteration 1 vfmadd231pd(xmm0, xmm4, xmm8) vmovaps(mem(rax, -10*8), xmm1) vfmadd231pd(xmm1, xmm4, xmm12) vmovddup(mem(rbx, -11*8), xmm5) vfmadd231pd(xmm0, xmm5, xmm9) vfmadd231pd(xmm1, xmm5, xmm13) vmovddup(mem(rbx, -10*8), xmm6) vfmadd231pd(xmm0, xmm6, xmm10) vfmadd231pd(xmm1, xmm6, xmm14) vmovddup(mem(rbx, -9*8), xmm7) vfmadd231pd(xmm0, xmm7, xmm11) vmovaps(mem(rax, -8*8), xmm0) vmovddup(mem(rbx, -8*8), xmm4) vfmadd231pd(xmm1, xmm7, xmm15) prefetch(0, mem(rbx, 64+256)) prefetch(0, mem(rax, 64+512)) // iteration 2 vfmadd231pd(xmm0, xmm4, xmm8) vmovaps(mem(rax, -6*8), xmm1) vfmadd231pd(xmm1, xmm4, xmm12) vmovddup(mem(rbx, -7*8), xmm5) vfmadd231pd(xmm0, xmm5, xmm9) vfmadd231pd(xmm1, xmm5, xmm13) vmovddup(mem(rbx, -6*8), xmm6) vfmadd231pd(xmm0, xmm6, xmm10) vfmadd231pd(xmm1, xmm6, xmm14) vmovddup(mem(rbx, -5*8), xmm7) vfmadd231pd(xmm0, xmm7, xmm11) vmovaps(mem(rax, -4*8), xmm0) vmovddup(mem(rbx, -4*8), xmm4) vfmadd231pd(xmm1, xmm7, xmm15) // iteration 3 vfmadd231pd(xmm0, xmm4, xmm8) vmovaps(mem(rax, -2*8), xmm1) vfmadd231pd(xmm1, xmm4, xmm12) vmovddup(mem(rbx, -3*8), xmm5) vfmadd231pd(xmm0, xmm5, xmm9) vfmadd231pd(xmm1, xmm5, xmm13) vmovddup(mem(rbx, -2*8), xmm6) vfmadd231pd(xmm0, xmm6, xmm10) vfmadd231pd(xmm1, xmm6, xmm14) vmovddup(mem(rbx, -1*8), xmm7) vfmadd231pd(xmm0, xmm7, xmm11) vmovaps(mem(rax, 0*8), xmm0) vmovddup(mem(rbx, 0*8), xmm4) vfmadd231pd(xmm1, xmm7, xmm15) prefetch(0, mem(rbx, 128+256)) prefetch(0, mem(rax, 128+512)) // iteration 4 vfmadd231pd(xmm0, xmm4, xmm8) vmovaps(mem(rax, 2*8), xmm1) vfmadd231pd(xmm1, xmm4, xmm12) vmovddup(mem(rbx, 1*8), xmm5) vfmadd231pd(xmm0, xmm5, xmm9) vfmadd231pd(xmm1, xmm5, xmm13) vmovddup(mem(rbx, 2*8), xmm6) vfmadd231pd(xmm0, xmm6, xmm10) vfmadd231pd(xmm1, xmm6, xmm14) vmovddup(mem(rbx, 3*8), xmm7) vfmadd231pd(xmm0, xmm7, xmm11) vmovaps(mem(rax, 4*8), xmm0) vmovddup(mem(rbx, 4*8), xmm4) vfmadd231pd(xmm1, xmm7, xmm15) // iteration 5 vfmadd231pd(xmm0, xmm4, xmm8) vmovaps(mem(rax, 6*8), xmm1) vfmadd231pd(xmm1, xmm4, xmm12) vmovddup(mem(rbx, 5*8), xmm5) vfmadd231pd(xmm0, xmm5, xmm9) vfmadd231pd(xmm1, xmm5, xmm13) vmovddup(mem(rbx, 6*8), xmm6) vfmadd231pd(xmm0, xmm6, xmm10) vfmadd231pd(xmm1, xmm6, xmm14) vmovddup(mem(rbx, 7*8), xmm7) vfmadd231pd(xmm0, xmm7, xmm11) vmovaps(mem(rax, 8*8), xmm0) vmovddup(mem(rbx, 8*8), xmm4) vfmadd231pd(xmm1, xmm7, xmm15) prefetch(0, mem(rbx, 128+256)) prefetch(0, mem(rax, 128+512)) // iteration 6 vfmadd231pd(xmm0, xmm4, xmm8) vmovaps(mem(rax, 10*8), xmm1) vfmadd231pd(xmm1, xmm4, xmm12) vmovddup(mem(rbx, 9*8), xmm5) vfmadd231pd(xmm0, xmm5, xmm9) vfmadd231pd(xmm1, xmm5, xmm13) vmovddup(mem(rbx, 10*8), xmm6) vfmadd231pd(xmm0, xmm6, xmm10) vfmadd231pd(xmm1, xmm6, xmm14) vmovddup(mem(rbx, 11*8), xmm7) vfmadd231pd(xmm0, xmm7, xmm11) vmovaps(mem(rax, 12*8), xmm0) vmovddup(mem(rbx, 12*8), xmm4) vfmadd231pd(xmm1, xmm7, xmm15) // iteration 7 vfmadd231pd(xmm0, xmm4, xmm8) vmovaps(mem(rax, 14*8), xmm1) add(imm(8*2*16), rax) // a += 8*2 (unroll x mr) vfmadd231pd(xmm1, xmm4, xmm12) vmovddup(mem(rbx, 13*8), xmm5) vfmadd231pd(xmm0, xmm5, xmm9) vfmadd231pd(xmm1, xmm5, xmm13) vmovddup(mem(rbx, 14*8), xmm6) vfmadd231pd(xmm0, xmm6, xmm10) vfmadd231pd(xmm1, xmm6, xmm14) vmovddup(mem(rbx, 15*8), xmm7) add(imm(8*2*16), rbx) // b += 8*2 (unroll x nr) vfmadd231pd(xmm0, xmm7, xmm11) vfmadd231pd(xmm1, xmm7, xmm15) dec(rsi) // i -= 1; jmp(.ZLOOPKITER) // jump to beginning of loop. label(.ZCONSIDKLEFT) mov(var(k_left), rsi) // i = k_left; test(rsi, rsi) // check i via logical AND. je(.ZPOSTACCUM) // if i == 0, we're done; jump to end. // else, we prepare to enter k_left loop. label(.ZLOOPKLEFT) // EDGE LOOP je(.ZPOSTACCUM) // if i == 0, we're done. prefetch(0, mem(rbx, 256)) prefetch(0, mem(rax, 512)) // iteration 0 vmovaps(mem(rax, -16*8), xmm0) vmovddup(mem(rbx, -16*8), xmm4) vfmadd231pd(xmm0, xmm4, xmm8) vmovaps(mem(rax, -14*8), xmm1) vfmadd231pd(xmm1, xmm4, xmm12) vmovddup(mem(rbx, -15*8), xmm5) vfmadd231pd(xmm0, xmm5, xmm9) vfmadd231pd(xmm1, xmm5, xmm13) vmovddup(mem(rbx, -14*8), xmm6) vfmadd231pd(xmm0, xmm6, xmm10) vfmadd231pd(xmm1, xmm6, xmm14) vmovddup(mem(rbx, -13*8), xmm7) vfmadd231pd(xmm0, xmm7, xmm11) vfmadd231pd(xmm1, xmm7, xmm15) add(imm(1*2*16), rax) // a += 1*2 (1 x mr) add(imm(1*2*16), rbx) // b += 1*2 (1 x nr) dec(rsi) // i -= 1; jmp(.ZLOOPKLEFT) // jump to beginning of loop. label(.ZPOSTACCUM) prefetchw0(mem(rcx, 0*8)) // prefetch c + 0*cs_c prefetchw0(mem(r10, 0*8)) // prefetch c + 1*cs_c vpermilpd(imm(0x1), xmm9, xmm9) vpermilpd(imm(0x1), xmm11, xmm11) vpermilpd(imm(0x1), xmm13, xmm13) vpermilpd(imm(0x1), xmm15, xmm15) vaddsubpd(xmm9, xmm8, xmm8) vaddsubpd(xmm11, xmm10, xmm10) vaddsubpd(xmm13, xmm12, xmm12) vaddsubpd(xmm15, xmm14, xmm14) // xmm8: xmm10: // ( ab00 ( ab01 // ab10 ) ab11 ) // xmm12: xmm14: // ( ab20 ( ab21 // ab30 ) ab31 ) prefetch(0, mem(r14)) // prefetch a_next prefetch(0, mem(r14, 64)) // prefetch a_next // scale by alpha mov(var(alpha), rax) // load address of alpha vmovddup(mem(rax), xmm0) // load alpha_r and duplicate vmovddup(mem(rax, 8), xmm1) // load alpha_i and duplicate vpermilpd(imm(0x1), xmm8, xmm9) vpermilpd(imm(0x1), xmm10, xmm11) vpermilpd(imm(0x1), xmm12, xmm13) vpermilpd(imm(0x1), xmm14, xmm15) vmulpd(xmm8, xmm0, xmm8) vmulpd(xmm10, xmm0, xmm10) vmulpd(xmm12, xmm0, xmm12) vmulpd(xmm14, xmm0, xmm14) vmulpd(xmm9, xmm1, xmm9) vmulpd(xmm11, xmm1, xmm11) vmulpd(xmm13, xmm1, xmm13) vmulpd(xmm15, xmm1, xmm15) vaddsubpd(xmm9, xmm8, xmm8) vaddsubpd(xmm11, xmm10, xmm10) vaddsubpd(xmm13, xmm12, xmm12) vaddsubpd(xmm15, xmm14, xmm14) mov(var(beta), rbx) // load address of beta vmovddup(mem(rbx), xmm6) // load beta_r and duplicate vmovddup(mem(rbx, 8), xmm7) // load beta_i and duplicate mov(var(rs_c), rsi) // load rs_c lea(mem(, rsi, 8), rsi) // rsi = rs_c * sizeof(dcomplex) lea(mem(, rsi, 2), rsi) //lea(mem(rcx, rsi, 2), rdx) // load address of c + 2*rs_c; prefetch(0, mem(r15)) // prefetch b_next prefetch(0, mem(r15, 64)) // prefetch b_next // determine if // c % 32 == 0, AND // 16*cs_c % 32 == 0, AND // rs_c == 1 // ie: aligned, ldim aligned, and // column-stored cmp(imm(16), rsi) // set ZF if (16*rs_c) == 16. sete(bl) // bl = ( ZF == 1 ? 1 : 0 ); test(imm(31), rcx) // set ZF if c & 32 is zero. setz(bh) // bh = ( ZF == 0 ? 1 : 0 ); test(imm(31), rdi) // set ZF if (16*cs_c) & 32 is zero. setz(al) // al = ( ZF == 0 ? 1 : 0 ); // and(bl,bh) followed by // and(bh,al) will reveal result // now avoid loading C if beta == 0 vxorpd(xmm0, xmm0, xmm0) // set xmm0 to zero. vucomisd(xmm0, xmm6) // set ZF if beta_r == 0. sete(r8b) // r8b = ( ZF == 1 ? 1 : 0 ); vucomisd(xmm0, xmm7) // set ZF if beta_i == 0. sete(r9b) // r9b = ( ZF == 1 ? 1 : 0 ); and(r8b, r9b) // set ZF if r8b & r9b == 1. jne(.ZBETAZERO) // if ZF = 0, jump to beta == 0 case // check if aligned/column-stored and(bl, bh) // set ZF if bl & bh == 1. and(bh, al) // set ZF if bh & al == 1. jne(.ZCOLSTORED) // jump to column storage case label(.ZGENSTORED) vmovups(mem(rcx), xmm0) // load c00 vmovups(mem(rcx, rsi, 1), xmm2) // load c10 vpermilpd(imm(0x1), xmm0, xmm1) vpermilpd(imm(0x1), xmm2, xmm3) vmulpd(xmm6, xmm0, xmm0) vmulpd(xmm7, xmm1, xmm1) vaddsubpd(xmm1, xmm0, xmm0) vaddpd(xmm8, xmm0, xmm0) vmovups(xmm0, mem(rcx)) // store c00 vmulpd(xmm6, xmm2, xmm2) vmulpd(xmm7, xmm3, xmm3) vaddsubpd(xmm3, xmm2, xmm2) vaddpd(xmm12, xmm2, xmm2) vmovups(xmm2, mem(rcx, rsi, 1)) // store c10 vmovups(mem(r10), xmm0) // load c01 vmovups(mem(r10, rsi, 1), xmm2) // load c11 vpermilpd(imm(0x1), xmm0, xmm1) vpermilpd(imm(0x1), xmm2, xmm3) vmulpd(xmm6, xmm0, xmm0) vmulpd(xmm7, xmm1, xmm1) vaddsubpd(xmm1, xmm0, xmm0) vaddpd(xmm10, xmm0, xmm0) vmovups(xmm0, mem(r10)) // store c01 vmulpd(xmm6, xmm2, xmm2) vmulpd(xmm7, xmm3, xmm3) vaddsubpd(xmm3, xmm2, xmm2) vaddpd(xmm14, xmm2, xmm2) vmovups(xmm2, mem(r10, rsi, 1)) // store c11 jmp(.ZDONE) // jump to end. label(.ZCOLSTORED) vmovups(mem(rcx), xmm0) // load c00 vmovups(mem(rcx, 16), xmm2) // load c10 vpermilpd(imm(0x1), xmm0, xmm1) vpermilpd(imm(0x1), xmm2, xmm3) vmulpd(xmm6, xmm0, xmm0) vmulpd(xmm7, xmm1, xmm1) vaddsubpd(xmm1, xmm0, xmm0) vaddpd(xmm8, xmm0, xmm0) vmovups(xmm0, mem(rcx)) // store c00 vmulpd(xmm6, xmm2, xmm2) vmulpd(xmm7, xmm3, xmm3) vaddsubpd(xmm3, xmm2, xmm2) vaddpd(xmm12, xmm2, xmm2) vmovups(xmm2, mem(rcx, 16)) // store c10 vmovups(mem(r10), xmm0) // load c01 vmovups(mem(r10, 16), xmm2) // load c11 vpermilpd(imm(0x1), xmm0, xmm1) vpermilpd(imm(0x1), xmm2, xmm3) vmulpd(xmm6, xmm0, xmm0) vmulpd(xmm7, xmm1, xmm1) vaddsubpd(xmm1, xmm0, xmm0) vaddpd(xmm10, xmm0, xmm0) vmovups(xmm0, mem(r10)) // store c01 vmulpd(xmm6, xmm2, xmm2) vmulpd(xmm7, xmm3, xmm3) vaddsubpd(xmm3, xmm2, xmm2) vaddpd(xmm14, xmm2, xmm2) vmovups(xmm2, mem(r10, 16)) // store c11 jmp(.ZDONE) // jump to end. label(.ZBETAZERO) // check if aligned/column-stored // check if aligned/column-stored and(bl, bh) // set ZF if bl & bh == 1. and(bh, al) // set ZF if bh & al == 1. jne(.ZCOLSTORBZ) // jump to column storage case label(.ZGENSTORBZ) vmovups(xmm8, mem(rcx)) // store c00 vmovups(xmm12, mem(rcx, rsi, 1)) // store c10 vmovups(xmm10, mem(r10)) // store c01 vmovups(xmm14, mem(r10, rsi, 1)) // store c11 jmp(.ZDONE) // jump to end. label(.ZCOLSTORBZ) vmovups(xmm8, mem(rcx)) // store c00 vmovups(xmm12, mem(rcx, 16)) // store c10 vmovups(xmm10, mem(r10)) // store c01 vmovups(xmm14, mem(r10, 16)) // store c11 label(.ZDONE) end_asm( : // output operands (none) : // input operands [k_iter] "m" (k_iter), // 0 [k_left] "m" (k_left), // 1 [a] "m" (a), // 2 [b] "m" (b), // 3 [alpha] "m" (alpha), // 4 [beta] "m" (beta), // 5 [c] "m" (c), // 6 [rs_c] "m" (rs_c), // 7 [cs_c] "m" (cs_c), // 8 [b_next] "m" (b_next), // 9 [a_next] "m" (a_next) // 10 : // register clobber list "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "memory" ) } blis-0.6.1/kernels/piledriver/bli_kernels_piledriver.h000066400000000000000000000035661360743507500231720ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // d8x4 (assembly) GEMM_UKR_PROT( float, s, gemm_piledriver_asm_16x3 ) GEMM_UKR_PROT( double, d, gemm_piledriver_asm_8x3 ) GEMM_UKR_PROT( scomplex, c, gemm_piledriver_asm_4x2 ) GEMM_UKR_PROT( dcomplex, z, gemm_piledriver_asm_2x2 ) blis-0.6.1/kernels/power7/000077500000000000000000000000001360743507500153475ustar00rootroot00000000000000blis-0.6.1/kernels/power7/3/000077500000000000000000000000001360743507500155115ustar00rootroot00000000000000blis-0.6.1/kernels/power7/3/bli_gemm_power7_int_8x4.c000066400000000000000000000542101360743507500223120ustar00rootroot00000000000000/* (C) Copyright IBM Corporation 2013 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifdef UTEST #include "blis_utest.h" #else #include "blis.h" #endif #include #define COLMAJ_INDEX(row,col,ld) ((col*ld)+row) #define ROWMAJ_INDEX(row,col,ld) ((row*ld)+col) #define BLIS_INDEX(row,col,rs,cs) ((row*rs)+(col*cs)) /* * Perform * c = beta * c + alpha * a * b * where * alpha & beta are scalars * c is mr x nr in blis-format, (col-stride & row-stride) * a is mr x k in packed col-maj format (leading dim is mr) * b is k x nr in packed row-maj format (leading dim is nr) */ void bli_sgemm_power7_int_8x4 ( dim_t k0, float* restrict alpha, float* restrict a, float* restrict b, float* restrict beta, float* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k = k0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; #if 1 || defined(UTEST) const long MR = BLIS_DEFAULT_MR_S, NR = BLIS_DEFAULT_NR_S; const long LDA = MR, LDB = NR; long i, j, kk; float c00; for (i=0; i < MR; i++) { for (j=0; j < NR; j++) { c00 = c[BLIS_INDEX(i,j,rs_c,cs_c)] * *beta; for (kk=0; kk < k; kk++) c00 += *alpha * (a[COLMAJ_INDEX(i,kk,LDA)] * b[ROWMAJ_INDEX(kk,j,LDB)]); c[BLIS_INDEX(i,j,rs_c,cs_c)] = c00; } } #else //BLIS_SGEMM_UKERNEL_REF(k, alpha, a, b, beta, c, rs_c, cs_c, data); #endif } /* * Perform * c = beta * c + alpha * a * b * where * alpha & beta are scalars * c is mr x nr in blis-format, (col-stride & row-stride) * a is mr x k in packed col-maj format (leading dim is mr) * b is k x nr in packed row-maj format (leading dim is nr) */ void bli_dgemm_power7_int_8x4 ( dim_t k0, double* restrict alpha, double* restrict a, double* restrict b, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k = k0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; #if 1 if (rs_c == 1) { // Optimized code for case where C columns are contiguous (column-major C) vector double vzero = vec_splats( 0.0 ); vector double vc00_10 = vzero; vector double vc20_30 = vzero; vector double vc40_50 = vzero; vector double vc60_70 = vzero; vector double vc01_11 = vzero; vector double vc21_31 = vzero; vector double vc41_51 = vzero; vector double vc61_71 = vzero; vector double vc02_12 = vzero; vector double vc22_32 = vzero; vector double vc42_52 = vzero; vector double vc62_72 = vzero; vector double vc03_13 = vzero; vector double vc23_33 = vzero; vector double vc43_53 = vzero; vector double vc63_73 = vzero; unsigned long long pa = (unsigned long long)a; unsigned long long pb = (unsigned long long)b; #if 0 unsigned long long d1 = 1*sizeof(double); unsigned long long d2 = 2*sizeof(double); unsigned long long d3 = 3*sizeof(double); unsigned long long d4 = 4*sizeof(double); unsigned long long d6 = 6*sizeof(double); #else // ppc64 linux abi: r14-r31 Nonvolatile registers used for local variables register unsigned long long d1 __asm ("r21") = 1*sizeof(double); register unsigned long long d2 __asm ("r22") = 2*sizeof(double); register unsigned long long d3 __asm ("r23") = 3*sizeof(double); register unsigned long long d4 __asm ("r24") = 4*sizeof(double); register unsigned long long d6 __asm ("r26") = 6*sizeof(double); __asm__ volatile (";" : "=r" (d1) : "r" (d1) ); __asm__ volatile (";" : "=r" (d2) : "r" (d2) ); __asm__ volatile (";" : "=r" (d3) : "r" (d3) ); __asm__ volatile (";" : "=r" (d4) : "r" (d4) ); __asm__ volatile (";" : "=r" (d6) : "r" (d6) ); #endif int kk; for (kk=k; kk > 1; kk-=2) { vector double va00_10 = *(vector double *)( pa+0 ); vector double va20_30 = *(vector double *)( pa+d2 ); vector double va40_50 = *(vector double *)( pa+d4 ); vector double va60_70 = *(vector double *)( pa+d6 ); pa += 8*sizeof(double); vector double vb00 = vec_splats( *(double *)( pb+0 ) ); vector double vb01 = vec_splats( *(double *)( pb+d1 ) ); vector double vb02 = vec_splats( *(double *)( pb+d2 ) ); vector double vb03 = vec_splats( *(double *)( pb+d3 ) ); pb += 4*sizeof(double); vc00_10 = vec_madd(va00_10, vb00, vc00_10); vc20_30 = vec_madd(va20_30, vb00, vc20_30); vc40_50 = vec_madd(va40_50, vb00, vc40_50); vc60_70 = vec_madd(va60_70, vb00, vc60_70); vc01_11 = vec_madd(va00_10, vb01, vc01_11); vc21_31 = vec_madd(va20_30, vb01, vc21_31); vc41_51 = vec_madd(va40_50, vb01, vc41_51); vc61_71 = vec_madd(va60_70, vb01, vc61_71); vc02_12 = vec_madd(va00_10, vb02, vc02_12); vc22_32 = vec_madd(va20_30, vb02, vc22_32); vc42_52 = vec_madd(va40_50, vb02, vc42_52); vc62_72 = vec_madd(va60_70, vb02, vc62_72); vc03_13 = vec_madd(va00_10, vb03, vc03_13); vc23_33 = vec_madd(va20_30, vb03, vc23_33); vc43_53 = vec_madd(va40_50, vb03, vc43_53); vc63_73 = vec_madd(va60_70, vb03, vc63_73); va00_10 = *(vector double *)( pa+0 ); va20_30 = *(vector double *)( pa+d2 ); va40_50 = *(vector double *)( pa+d4 ); va60_70 = *(vector double *)( pa+d6 ); pa += 8*sizeof(double); vb00 = vec_splats( *(double *)( pb+0 ) ); vb01 = vec_splats( *(double *)( pb+d1 ) ); vb02 = vec_splats( *(double *)( pb+d2 ) ); vb03 = vec_splats( *(double *)( pb+d3 ) ); pb += 4*sizeof(double); vc00_10 = vec_madd(va00_10, vb00, vc00_10); vc20_30 = vec_madd(va20_30, vb00, vc20_30); vc40_50 = vec_madd(va40_50, vb00, vc40_50); vc60_70 = vec_madd(va60_70, vb00, vc60_70); vc01_11 = vec_madd(va00_10, vb01, vc01_11); vc21_31 = vec_madd(va20_30, vb01, vc21_31); vc41_51 = vec_madd(va40_50, vb01, vc41_51); vc61_71 = vec_madd(va60_70, vb01, vc61_71); vc02_12 = vec_madd(va00_10, vb02, vc02_12); vc22_32 = vec_madd(va20_30, vb02, vc22_32); vc42_52 = vec_madd(va40_50, vb02, vc42_52); vc62_72 = vec_madd(va60_70, vb02, vc62_72); vc03_13 = vec_madd(va00_10, vb03, vc03_13); vc23_33 = vec_madd(va20_30, vb03, vc23_33); vc43_53 = vec_madd(va40_50, vb03, vc43_53); vc63_73 = vec_madd(va60_70, vb03, vc63_73); } for (kk=kk; kk > 0; kk--) { vector double va00_10 = *(vector double *)( pa+0 ); vector double va20_30 = *(vector double *)( pa+d2 ); vector double va40_50 = *(vector double *)( pa+d4 ); vector double va60_70 = *(vector double *)( pa+d6 ); pa += 8*sizeof(double); vector double vb00 = vec_splats( *(double *)( pb+0 ) ); vector double vb01 = vec_splats( *(double *)( pb+d1 ) ); vector double vb02 = vec_splats( *(double *)( pb+d2 ) ); vector double vb03 = vec_splats( *(double *)( pb+d3 ) ); pb += 4*sizeof(double); vc00_10 = vec_madd(va00_10, vb00, vc00_10); vc20_30 = vec_madd(va20_30, vb00, vc20_30); vc40_50 = vec_madd(va40_50, vb00, vc40_50); vc60_70 = vec_madd(va60_70, vb00, vc60_70); vc01_11 = vec_madd(va00_10, vb01, vc01_11); vc21_31 = vec_madd(va20_30, vb01, vc21_31); vc41_51 = vec_madd(va40_50, vb01, vc41_51); vc61_71 = vec_madd(va60_70, vb01, vc61_71); vc02_12 = vec_madd(va00_10, vb02, vc02_12); vc22_32 = vec_madd(va20_30, vb02, vc22_32); vc42_52 = vec_madd(va40_50, vb02, vc42_52); vc62_72 = vec_madd(va60_70, vb02, vc62_72); vc03_13 = vec_madd(va00_10, vb03, vc03_13); vc23_33 = vec_madd(va20_30, vb03, vc23_33); vc43_53 = vec_madd(va40_50, vb03, vc43_53); vc63_73 = vec_madd(va60_70, vb03, vc63_73); } // The following code is dependent on rs_c == 1 vector double valpha = vec_splats( *alpha ); vector double vbeta = (vector double) { *beta, *beta }; vector double *pc = (vector double *)c; vc00_10 = vec_mul(valpha, vc00_10); vc20_30 = vec_mul(valpha, vc20_30); vc40_50 = vec_mul(valpha, vc40_50); vc60_70 = vec_mul(valpha, vc60_70); pc[0] = vec_madd( pc[0], vbeta, vc00_10); pc[1] = vec_madd( pc[1], vbeta, vc20_30); pc[2] = vec_madd( pc[2], vbeta, vc40_50); pc[3] = vec_madd( pc[3], vbeta, vc60_70); pc += cs_c/2; vc01_11 = vec_mul(valpha, vc01_11); vc21_31 = vec_mul(valpha, vc21_31); vc41_51 = vec_mul(valpha, vc41_51); vc61_71 = vec_mul(valpha, vc61_71); pc[0] = vec_madd( pc[0], vbeta, vc01_11); pc[1] = vec_madd( pc[1], vbeta, vc21_31); pc[2] = vec_madd( pc[2], vbeta, vc41_51); pc[3] = vec_madd( pc[3], vbeta, vc61_71); pc += cs_c/2; vc02_12 = vec_mul(valpha, vc02_12); vc22_32 = vec_mul(valpha, vc22_32); vc42_52 = vec_mul(valpha, vc42_52); vc62_72 = vec_mul(valpha, vc62_72); pc[0] = vec_madd( pc[0], vbeta, vc02_12); pc[1] = vec_madd( pc[1], vbeta, vc22_32); pc[2] = vec_madd( pc[2], vbeta, vc42_52); pc[3] = vec_madd( pc[3], vbeta, vc62_72); pc += cs_c/2; vc03_13 = vec_mul(valpha, vc03_13); vc23_33 = vec_mul(valpha, vc23_33); vc43_53 = vec_mul(valpha, vc43_53); vc63_73 = vec_mul(valpha, vc63_73); pc[0] = vec_madd( pc[0], vbeta, vc03_13); pc[1] = vec_madd( pc[1], vbeta, vc23_33); pc[2] = vec_madd( pc[2], vbeta, vc43_53); pc[3] = vec_madd( pc[3], vbeta, vc63_73); } else #endif #if 1 if ( cs_c == 1 ) { // Optimized code for case where C rows are contiguous (i.e. C is row-major) vector double vzero = vec_splats( 0.0 ); vector double vc00_01 = vzero; vector double vc02_03 = vzero; vector double vc10_11 = vzero; vector double vc12_13 = vzero; vector double vc20_21 = vzero; vector double vc22_23 = vzero; vector double vc30_31 = vzero; vector double vc32_33 = vzero; vector double vc40_41 = vzero; vector double vc42_43 = vzero; vector double vc50_51 = vzero; vector double vc52_53 = vzero; vector double vc60_61 = vzero; vector double vc62_63 = vzero; vector double vc70_71 = vzero; vector double vc72_73 = vzero; unsigned long long pa = (unsigned long long)a; unsigned long long pb = (unsigned long long)b; #if 0 unsigned long long d1 = 1*sizeof(double); unsigned long long d2 = 2*sizeof(double); unsigned long long d3 = 3*sizeof(double); unsigned long long d4 = 4*sizeof(double); unsigned long long d6 = 6*sizeof(double); #else // ppc64 linux abi: r14-r31 Nonvolatile registers used for local variables register unsigned long long d1 __asm ("r21") = 1*sizeof(double); register unsigned long long d2 __asm ("r22") = 2*sizeof(double); register unsigned long long d3 __asm ("r23") = 3*sizeof(double); register unsigned long long d4 __asm ("r24") = 4*sizeof(double); register unsigned long long d5 __asm ("r25") = 5*sizeof(double); register unsigned long long d6 __asm ("r26") = 6*sizeof(double); register unsigned long long d7 __asm ("r27") = 7*sizeof(double); __asm__ volatile (";" : "=r" (d1) : "r" (d1) ); __asm__ volatile (";" : "=r" (d2) : "r" (d2) ); __asm__ volatile (";" : "=r" (d3) : "r" (d3) ); __asm__ volatile (";" : "=r" (d4) : "r" (d4) ); __asm__ volatile (";" : "=r" (d5) : "r" (d5) ); __asm__ volatile (";" : "=r" (d6) : "r" (d6) ); __asm__ volatile (";" : "=r" (d7) : "r" (d7) ); #endif int kk; for (kk=k; kk > 0; kk--) { vector double va00 = vec_splats( *(double *)( pa+0 ) ); vector double va10 = vec_splats( *(double *)( pa+d1 ) ); vector double va20 = vec_splats( *(double *)( pa+d2 ) ); vector double va30 = vec_splats( *(double *)( pa+d3 ) ); vector double va40 = vec_splats( *(double *)( pa+d4 ) ); vector double va50 = vec_splats( *(double *)( pa+d5 ) ); vector double va60 = vec_splats( *(double *)( pa+d6 ) ); vector double va70 = vec_splats( *(double *)( pa+d7 ) ); pa += 8*sizeof(double); vector double vb00_01 = *(vector double *)( pb+0 ); vector double vb02_03 = *(vector double *)( pb+d2 ); pb += 4*sizeof(double); vc00_01 = vec_madd(va00, vb00_01, vc00_01); vc02_03 = vec_madd(va00, vb02_03, vc02_03); vc10_11 = vec_madd(va10, vb00_01, vc10_11); vc12_13 = vec_madd(va10, vb02_03, vc12_13); vc20_21 = vec_madd(va20, vb00_01, vc20_21); vc22_23 = vec_madd(va20, vb02_03, vc22_23); vc30_31 = vec_madd(va30, vb00_01, vc30_31); vc32_33 = vec_madd(va30, vb02_03, vc32_33); vc40_41 = vec_madd(va40, vb00_01, vc40_41); vc42_43 = vec_madd(va40, vb02_03, vc42_43); vc50_51 = vec_madd(va50, vb00_01, vc50_51); vc52_53 = vec_madd(va50, vb02_03, vc52_53); vc60_61 = vec_madd(va60, vb00_01, vc60_61); vc62_63 = vec_madd(va60, vb02_03, vc62_63); vc70_71 = vec_madd(va70, vb00_01, vc70_71); vc72_73 = vec_madd(va70, vb02_03, vc72_73); } vector double valpha = vec_splats( *alpha ); vector double vbeta = (vector double) { *beta, *beta }; vector double *pc = (vector double *)c; vc00_01 = vec_mul(valpha, vc00_01); vc02_03 = vec_mul(valpha, vc02_03); pc[0] = vec_madd( pc[0], vbeta, vc00_01); pc[1] = vec_madd( pc[1], vbeta, vc02_03); pc += rs_c/2; vc10_11 = vec_mul(valpha, vc10_11); vc12_13 = vec_mul(valpha, vc12_13); pc[0] = vec_madd( pc[0], vbeta, vc10_11); pc[1] = vec_madd( pc[1], vbeta, vc12_13); pc += rs_c/2; vc20_21 = vec_mul(valpha, vc20_21); vc22_23 = vec_mul(valpha, vc22_23); pc[0] = vec_madd( pc[0], vbeta, vc20_21); pc[1] = vec_madd( pc[1], vbeta, vc22_23); pc += rs_c/2; vc30_31 = vec_mul(valpha, vc30_31); vc32_33 = vec_mul(valpha, vc32_33); pc[0] = vec_madd( pc[0], vbeta, vc30_31); pc[1] = vec_madd( pc[1], vbeta, vc32_33); pc += rs_c/2; vc40_41 = vec_mul(valpha, vc40_41); vc42_43 = vec_mul(valpha, vc42_43); pc[0] = vec_madd( pc[0], vbeta, vc40_41); pc[1] = vec_madd( pc[1], vbeta, vc42_43); pc += rs_c/2; vc50_51 = vec_mul(valpha, vc50_51); vc52_53 = vec_mul(valpha, vc52_53); pc[0] = vec_madd( pc[0], vbeta, vc50_51); pc[1] = vec_madd( pc[1], vbeta, vc52_53); pc += rs_c/2; vc60_61 = vec_mul(valpha, vc60_61); vc62_63 = vec_mul(valpha, vc62_63); pc[0] = vec_madd( pc[0], vbeta, vc60_61); pc[1] = vec_madd( pc[1], vbeta, vc62_63); pc += rs_c/2; vc70_71 = vec_mul(valpha, vc70_71); vc72_73 = vec_mul(valpha, vc72_73); pc[0] = vec_madd( pc[0], vbeta, vc70_71); pc[1] = vec_madd( pc[1], vbeta, vc72_73); pc += rs_c/2; } else #endif { /* General case. Just do it right. */ #if 1 || defined(UTEST) const long MR = BLIS_DEFAULT_MR_D, NR = BLIS_DEFAULT_NR_D; const long LDA = MR, LDB = NR; int i, j, kk; double c00; for (i=0; i < MR; i++) { for (j=0; j < NR; j++) { c00 = c[BLIS_INDEX(i,j,rs_c,cs_c)] * *beta; for (kk=0; kk < k; kk++) c00 += *alpha * (a[COLMAJ_INDEX(i,kk,LDA)] * b[ROWMAJ_INDEX(kk,j,LDB)]); c[BLIS_INDEX(i,j,rs_c,cs_c)] = c00; } } #else //BLIS_DGEMM_UKERNEL_REF(k, alpha, a, b, beta, c, rs_c, cs_c, data); #endif } } /* * Perform * c = beta * c + alpha * a * b * where * alpha & beta are scalars * c is mr x nr in blis-format, (col-stride & row-stride) * a is mr x k in packed col-maj format (leading dim is mr) * b is k x nr in packed row-maj format (leading dim is nr) */ void bli_cgemm_power7_int_8x4 ( dim_t k0, scomplex* restrict alpha, scomplex* restrict a, scomplex* restrict b, scomplex* restrict beta, scomplex* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k = k0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; #if 1 || defined(UTEST) const long MR = BLIS_DEFAULT_MR_C, NR = BLIS_DEFAULT_NR_C; const long LDA = MR, LDB = NR; int i, j, kk; scomplex c00; for (i=0; i < MR; i++) { for (j=0; j < NR; j++) { scomplex tmpc, tmpa, tmpb, tmp; //c00 = c[BLIS_INDEX(i,j,rs_c,cs_c)] * *beta; tmpc = c[BLIS_INDEX(i,j,rs_c,cs_c)]; c00.real = tmpc.real * (*beta).real - tmpc.imag * (*beta).imag; c00.imag = tmpc.real * (*beta).imag + tmpc.imag * (*beta).real; for (kk=0; kk < k; kk++) { //c00 += *alpha * (a[COLMAJ_INDEX(i,kk,LDA)] * b[ROWMAJ_INDEX(kk,j,LDB)]); tmpa = a[COLMAJ_INDEX(i,kk,LDA)]; tmpb = b[ROWMAJ_INDEX(kk,j,LDB)]; tmp.real = tmpa.real * tmpb.real - tmpa.imag * tmpb.imag; tmp.imag = tmpa.real * tmpb.imag + tmpa.imag * tmpb.real; c00.real += (*alpha).real * tmp.real - (*alpha).imag * tmp.imag; c00.imag += (*alpha).real * tmp.imag + (*alpha).imag * tmp.real; } c[BLIS_INDEX(i,j,rs_c,cs_c)] = c00; } } #else //BLIS_CGEMM_UKERNEL_REF(k, alpha, a, b, beta, c, rs_c, cs_c, data); #endif } /* * Perform * c = beta * c + alpha * a * b * where * alpha & beta are scalars * c is mr x nr in blis-format, (col-stride & row-stride) * a is mr x k in packed col-maj format (leading dim is mr) * b is k x nr in packed row-maj format (leading dim is nr) */ void bli_zgemm_power7_int_8x4 ( dim_t k0, scomplex* restrict alpha, scomplex* restrict a, scomplex* restrict b, scomplex* restrict beta, scomplex* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k = k0; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; #if 1 || defined(UTEST) const long MR = BLIS_DEFAULT_MR_Z, NR = BLIS_DEFAULT_NR_Z; const long LDA = MR, LDB = NR; int i, j, kk; dcomplex c00; for (i=0; i < MR; i++) { for (j=0; j < NR; j++) { dcomplex tmpc, tmpa, tmpb, tmp; //c00 = c[BLIS_INDEX(i,j,rs_c,cs_c)] * *beta; tmpc = c[BLIS_INDEX(i,j,rs_c,cs_c)]; c00.real = tmpc.real * (*beta).real - tmpc.imag * (*beta).imag; c00.imag = tmpc.real * (*beta).imag + tmpc.imag * (*beta).real; for (kk=0; kk < k; kk++) { //c00 += *alpha * (a[COLMAJ_INDEX(i,kk,LDA)] * b[ROWMAJ_INDEX(kk,j,LDB)]); tmpa = a[COLMAJ_INDEX(i,kk,LDA)]; tmpb = b[ROWMAJ_INDEX(kk,j,LDB)]; tmp.real = tmpa.real * tmpb.real - tmpa.imag * tmpb.imag; tmp.imag = tmpa.real * tmpb.imag + tmpa.imag * tmpb.real; c00.real += (*alpha).real * tmp.real - (*alpha).imag * tmp.imag; c00.imag += (*alpha).real * tmp.imag + (*alpha).imag * tmp.real; } c[BLIS_INDEX(i,j,rs_c,cs_c)] = c00; } } #else //BLIS_ZGEMM_UKERNEL_REF(k, alpha, a, b, beta, c, rs_c, cs_c, data); #endif } blis-0.6.1/kernels/power7/3/test/000077500000000000000000000000001360743507500164705ustar00rootroot00000000000000blis-0.6.1/kernels/power7/3/test/Makefile000066400000000000000000000003061360743507500201270ustar00rootroot00000000000000 CC = gcc TARGET_ARCH = -m64 -mvsx TGTS = exp KERNEL = bli_gemm_opt_8x4.o CFLAGS = -DUTEST -std=gnu99 -ggdb3 -Wall CFLAGS += -O3 all: $(TGTS) exp: exp.o $(KERNEL) clean: rm -f $(TGTS) *.o blis-0.6.1/kernels/power7/3/test/bli_gemm_power7_int_8x4.c000077700000000000000000000000001360743507500302742../bli_gemm_power7_int_8x4.custar00rootroot00000000000000blis-0.6.1/kernels/power7/3/test/bli_gemm_power7_int_8x4.h000066400000000000000000000057741360743507500233110ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef _BLI_GEMM_OPT_8X4_H_ #define _BLI_GEMM_OPT_8X4_H_ #ifdef UTEST #include "blis_utest.h" #else #include "blis.h" #endif void bli_sgemm_opt_8x4 ( dim_t k, float* restrict alpha, float* restrict a, float* restrict b, float* restrict beta, float* restrict c, inc_t rs_c, inc_t cs_c, auxinfo_t* restrict data, cntx_t* restrict cntx ); void bli_dgemm_opt_8x4 ( dim_t k, double* restrict alpha, double* restrict a, double* restrict b, double* restrict beta, double* restrict c, inc_t rs_c, inc_t cs_c, auxinfo_t* restrict data, cntx_t* restrict cntx ); void bli_cgemm_opt_8x4 ( dim_t k, scomplex* restrict alpha, scomplex* restrict a, scomplex* restrict b, scomplex* restrict beta, scomplex* restrict c, inc_t rs_c, inc_t cs_c, auxinfo_t* restrict data, cntx_t* restrict cntx ); void bli_zgemm_opt_8x4 ( dim_t k, dcomplex* restrict alpha, dcomplex* restrict a, dcomplex* restrict b, dcomplex* restrict beta, dcomplex* restrict c, inc_t rs_c, inc_t cs_c, auxinfo_t* restrict data, cntx_t* restrict cntx ); #endif blis-0.6.1/kernels/power7/3/test/blis_utest.h000066400000000000000000000010751360743507500210210ustar00rootroot00000000000000 #ifndef _BLIS_UTEST_H_ #define _BLIS_UTEST_H_ #define BLIS_DEFAULT_MR_S 8 #define BLIS_DEFAULT_NR_S 4 #define BLIS_DEFAULT_MR_D 8 #define BLIS_DEFAULT_NR_D 4 #define BLIS_DEFAULT_MR_C 8 #define BLIS_DEFAULT_NR_C 4 #define BLIS_DEFAULT_MR_Z 8 #define BLIS_DEFAULT_NR_Z 4 typedef unsigned long dim_t; typedef long inc_t; // Complex types typedef struct scomplex_s { float real; float imag; } scomplex; typedef struct dcomplex_s { double real; double imag; } dcomplex; #define bli_check_error_code(x) #endif blis-0.6.1/kernels/power7/3/test/exp.c000066400000000000000000000071471360743507500174410ustar00rootroot00000000000000#ifdef UTEST #include #include #include #include /* fabs */ #include "blis_utest.h" #include "bli_gemm_power7_opt_8x4.h" #define COLMAJ_INDEX(row,col,ld) ((col*ld)+row) #define ROWMAJ_INDEX(row,col,ld) ((row*ld)+col) #define BLIS_INDEX(row,col,rs,cs) ((row*rs)+(col*cs)) #define MR BLIS_DEFAULT_MR_D #define NR BLIS_DEFAULT_NR_D #define LDA MR #define LDB NR #define EPSILON 0.0000001 /* * Perform * c = beta * c + alpha * a * b * where * alpha & beta are scalars * c is mr x nr in blis-format, (col-stride & row-stride) * a is mr x k in packed col-maj format (leading dim is mr) * b is k x nr in packed row-maj format (leading dim is nr) */ void bli_dgemm_check( dim_t k, double* restrict alpha, double* restrict a, double* restrict b, double* restrict beta, double* restrict c, inc_t rs_c, inc_t cs_c, auxinfo_t* data ) { int i, j, kk; double c00; for (i=0; i < MR; i++) { for (j=0; j < NR; j++) { c00 = c[BLIS_INDEX(i,j,rs_c,cs_c)] * *beta; for (kk=0; kk < k; kk++) c00 += *alpha * (a[COLMAJ_INDEX(i,kk,LDA)] * b[ROWMAJ_INDEX(kk,j,LDB)]); c[BLIS_INDEX(i,j,rs_c,cs_c)] = c00; } } } int main(int argc, char *argv[]) { double *A, *B, *C, *C2; double alpha = 1.0, beta = 1.0; long i, j; long k = 128; int iters = 10; int errors; struct timeval tv_start, tv_end; switch (argc) { case 2: k = atoi(argv[1]); case 1: break; default: printf("Usage: %s [k]\n", argv[0]); return 1; break; } //long rs_c = 1, cs_c = MR; // Column major long rs_c = NR, cs_c = 1; // Row major A = (double*)malloc(LDA * k * sizeof(double)); B = (double*)malloc(LDB * k * sizeof(double)); C = (double*)malloc(MR * NR * sizeof(double)); C2 = (double*)malloc(MR * NR * sizeof(double)); /* Initialize C matrix in blis format */ for (j=0; j EPSILON) { if (errors<20) printf(" %ld expected=%f got=%f\n", i, C2[i], C[i]); errors++; } } printf("Errors = %d\n", errors); if (errors) { return -1; } /* Now get the performance */ gettimeofday(&tv_start, NULL); for (i=0; i #include "blis.h" #if 0 void bli_sgemm_sandybridge_int_8x8 ( dim_t k0, float* restrict alpha, float* restrict a, float* restrict b, float* restrict beta, float* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { } #endif void bli_dgemm_sandybridge_int_8x4 ( dim_t k0, double* restrict alpha, double* restrict a, double* restrict b, double* restrict beta, double* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { //void* a_next = bli_auxinfo_next_a( data ); void* b_next = bli_auxinfo_next_b( data ); // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. uint64_t k_iter = k0 / 2; uint64_t k_left = k0 % 2; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; uint64_t i; double *c00, *c01, *c02, *c03; double *c40, *c41, *c42, *c43; // Quad registers. __m256d va0_3, va4_7; __m256d vA0_3, vA4_7; __m256d vb0, vb1, vb2, vb3; __m256d vb; __m256d vB0; __m256d va0_3b_0, va4_7b_0; __m256d va0_3b_1, va4_7b_1; __m256d va0_3b_2, va4_7b_2; __m256d va0_3b_3, va4_7b_3; __m256d va0_3b0, va4_7b0; __m256d va0_3b1, va4_7b1; __m256d va0_3b2, va4_7b2; __m256d va0_3b3, va4_7b3; __m256d valpha, vbeta, vtmp; __m256d vc0_3_0, vc0_3_1, vc0_3_2, vc0_3_3; __m256d vc4_7_0, vc4_7_1, vc4_7_2, vc4_7_3; __m128d aa, bb; __asm__ volatile( "prefetcht0 0(%0) \n\t" : :"r"(a) ); __asm__ volatile( "prefetcht2 0(%0) \n\t" : :"r"(b_next) ); __asm__ volatile( "prefetcht0 0(%0) \n\t" : :"r"(c) ); va0_3b0 = _mm256_setzero_pd(); va0_3b1 = _mm256_setzero_pd(); va0_3b2 = _mm256_setzero_pd(); va0_3b3 = _mm256_setzero_pd(); va4_7b0 = _mm256_setzero_pd(); va4_7b1 = _mm256_setzero_pd(); va4_7b2 = _mm256_setzero_pd(); va4_7b3 = _mm256_setzero_pd(); va0_3b_0 = _mm256_setzero_pd(); va0_3b_1 = _mm256_setzero_pd(); va0_3b_2 = _mm256_setzero_pd(); va0_3b_3 = _mm256_setzero_pd(); va4_7b_0 = _mm256_setzero_pd(); va4_7b_1 = _mm256_setzero_pd(); va4_7b_2 = _mm256_setzero_pd(); va4_7b_3 = _mm256_setzero_pd(); // Load va0_3 va0_3 = _mm256_load_pd( a ); // Load va4_7 va4_7 = _mm256_load_pd( a + 4 ); // Load vb (b0,b1,b2,b3) vb0 = _mm256_load_pd( b ); for( i = 0; i < k_iter; ++i ) { __asm__ volatile( "prefetcht0 192(%0) \n\t" : :"r"(a) ); // Load va0_3 (Prefetch) vA0_3 = _mm256_load_pd( a + 8 ); // Iteration 0. vtmp = _mm256_mul_pd( va0_3, vb0 ); va0_3b_0 = _mm256_add_pd( va0_3b_0, vtmp ); vtmp = _mm256_mul_pd( va4_7, vb0 ); va4_7b_0 = _mm256_add_pd( va4_7b_0, vtmp ); // Load va4_7 (Prefetch) vA4_7 = _mm256_load_pd( a + 12 ); // Shuffle vb (b1,b0,b3,b2) vb1 = _mm256_shuffle_pd( vb0, vb0, 0x5 ); vtmp = _mm256_mul_pd( va0_3, vb1 ); va0_3b_1 = _mm256_add_pd( va0_3b_1, vtmp ); vtmp = _mm256_mul_pd( va4_7, vb1 ); va4_7b_1 = _mm256_add_pd( va4_7b_1, vtmp ); // Permute vb (b3,b2,b1,b0) vb2 = _mm256_permute2f128_pd( vb1, vb1, 0x1 ); // Load vb (b0,b1,b2,b3) (Prefetch) vB0 = _mm256_load_pd( b + 4 ); vtmp = _mm256_mul_pd( va0_3, vb2 ); va0_3b_2 = _mm256_add_pd( va0_3b_2, vtmp ); vtmp = _mm256_mul_pd( va4_7, vb2 ); va4_7b_2 = _mm256_add_pd( va4_7b_2, vtmp ); // Shuffle vb (b3,b2,b1,b0) vb3 = _mm256_shuffle_pd( vb2, vb2, 0x5 ); vtmp = _mm256_mul_pd( va0_3, vb3 ); va0_3b_3 = _mm256_add_pd( va0_3b_3, vtmp ); vtmp = _mm256_mul_pd( va4_7, vb3 ); va4_7b_3 = _mm256_add_pd( va4_7b_3, vtmp ); // Iteration 1. __asm__ volatile( "prefetcht0 512(%0) \n\t" : :"r"(a) ); // Load va0_3 (Next iteration) va0_3 = _mm256_load_pd( a + 16 ); vtmp = _mm256_mul_pd( vA0_3, vB0 ); va0_3b_0 = _mm256_add_pd( va0_3b_0, vtmp ); vb1 = _mm256_shuffle_pd( vB0, vB0, 0x5 ); vtmp = _mm256_mul_pd( vA4_7, vB0 ); va4_7b_0 = _mm256_add_pd( va4_7b_0, vtmp ); vtmp = _mm256_mul_pd( vA0_3, vb1 ); va0_3b_1 = _mm256_add_pd( va0_3b_1, vtmp ); // Load va4_7 (Next iteration) va4_7 = _mm256_load_pd( a + 20 ); vb2 = _mm256_permute2f128_pd( vb1, vb1, 0x1 ); vtmp = _mm256_mul_pd( vA4_7, vb1 ); va4_7b_1 = _mm256_add_pd( va4_7b_1, vtmp ); vtmp = _mm256_mul_pd( vA0_3, vb2 ); va0_3b_2 = _mm256_add_pd( va0_3b_2, vtmp ); vb3 = _mm256_shuffle_pd( vb2, vb2, 0x5 ); vtmp = _mm256_mul_pd( vA4_7, vb2 ); va4_7b_2 = _mm256_add_pd( va4_7b_2, vtmp ); // Load vb0(Next iteration) vb0 = _mm256_load_pd( b + 8 ); vtmp = _mm256_mul_pd( vA0_3, vb3 ); va0_3b_3 = _mm256_add_pd( va0_3b_3, vtmp ); vtmp = _mm256_mul_pd( vA4_7, vb3 ); va4_7b_3 = _mm256_add_pd( va4_7b_3, vtmp ); a += 16; b += 8; } for( i = 0; i < k_left; ++i ) { // Iteration 0. // Load va0_3 va0_3 = _mm256_load_pd( a ); // Load va4_7 va4_7 = _mm256_load_pd( a + 4 ); // Load vb (b0,b1,b2,b3) vb = _mm256_load_pd( b ); vtmp = _mm256_mul_pd( va0_3, vb ); va0_3b_0 = _mm256_add_pd( va0_3b_0, vtmp ); vtmp = _mm256_mul_pd( va4_7, vb ); va4_7b_0 = _mm256_add_pd( va4_7b_0, vtmp ); // Shuffle vb (b1,b0,b3,b2) vb = _mm256_shuffle_pd( vb, vb, 0x5 ); vtmp = _mm256_mul_pd( va0_3, vb ); va0_3b_1 = _mm256_add_pd( va0_3b_1, vtmp ); vtmp = _mm256_mul_pd( va4_7, vb ); va4_7b_1 = _mm256_add_pd( va4_7b_1, vtmp ); // Permute vb (b3,b2,b1,b0) vb = _mm256_permute2f128_pd( vb, vb, 0x1 ); vtmp = _mm256_mul_pd( va0_3, vb ); va0_3b_2 = _mm256_add_pd( va0_3b_2, vtmp ); vtmp = _mm256_mul_pd( va4_7, vb ); va4_7b_2 = _mm256_add_pd( va4_7b_2, vtmp ); // Shuffle vb (b3,b2,b1,b0) vb = _mm256_shuffle_pd( vb, vb, 0x5 ); vtmp = _mm256_mul_pd( va0_3, vb ); va0_3b_3 = _mm256_add_pd( va0_3b_3, vtmp ); vtmp = _mm256_mul_pd( va4_7, vb ); va4_7b_3 = _mm256_add_pd( va4_7b_3, vtmp ); a += 8; b += 4; } vbeta = _mm256_broadcast_sd( beta ); __m256d vtmpa_0_3b_0 = _mm256_blend_pd( va0_3b_0, va0_3b_1, 0x6 ); __m256d vtmpa_0_3b_1 = _mm256_blend_pd( va0_3b_1, va0_3b_0, 0x6 ); __m256d vtmpa_0_3b_2 = _mm256_blend_pd( va0_3b_2, va0_3b_3, 0x6 ); __m256d vtmpa_0_3b_3 = _mm256_blend_pd( va0_3b_3, va0_3b_2, 0x6 ); __m256d vtmpa_4_7b_0 = _mm256_blend_pd( va4_7b_0, va4_7b_1, 0x6 ); __m256d vtmpa_4_7b_1 = _mm256_blend_pd( va4_7b_1, va4_7b_0, 0x6 ); __m256d vtmpa_4_7b_2 = _mm256_blend_pd( va4_7b_2, va4_7b_3, 0x6 ); __m256d vtmpa_4_7b_3 = _mm256_blend_pd( va4_7b_3, va4_7b_2, 0x6 ); valpha = _mm256_broadcast_sd( alpha ); va0_3b0 = _mm256_permute2f128_pd( vtmpa_0_3b_0, vtmpa_0_3b_2, 0x30 ); va0_3b3 = _mm256_permute2f128_pd( vtmpa_0_3b_2, vtmpa_0_3b_0, 0x30 ); va0_3b1 = _mm256_permute2f128_pd( vtmpa_0_3b_1, vtmpa_0_3b_3, 0x30 ); va0_3b2 = _mm256_permute2f128_pd( vtmpa_0_3b_3, vtmpa_0_3b_1, 0x30 ); va4_7b0 = _mm256_permute2f128_pd( vtmpa_4_7b_0, vtmpa_4_7b_2, 0x30 ); va4_7b3 = _mm256_permute2f128_pd( vtmpa_4_7b_2, vtmpa_4_7b_0, 0x30 ); va4_7b1 = _mm256_permute2f128_pd( vtmpa_4_7b_1, vtmpa_4_7b_3, 0x30 ); va4_7b2 = _mm256_permute2f128_pd( vtmpa_4_7b_3, vtmpa_4_7b_1, 0x30 ); if( rs_c == 1 ) { // Calculate address c00 = ( c + 0*rs_c + 0*cs_c ); // Load //vc0_3_0 = _mm256_load_pd( c + 0*rs_c + 0*cs_c ); vc0_3_0 = _mm256_load_pd( c00 ); // Scale by alpha vtmp = _mm256_mul_pd( valpha, va0_3b0); // Scale by beta vc0_3_0 = _mm256_mul_pd( vbeta, vc0_3_0 ); // Add gemm result vc0_3_0 = _mm256_add_pd( vc0_3_0, vtmp ); // Store back to memory _mm256_store_pd( c00, vc0_3_0 ); // Calculate address c40 = ( c + 4*rs_c + 0*cs_c ); // Load //vc4_7_0 = _mm256_load_pd( c + 4*rs_c + 0*cs_c ); vc4_7_0 = _mm256_load_pd( c40 ); // Scale by alpha vtmp = _mm256_mul_pd( valpha, va4_7b0); // Scale by beta vc4_7_0 = _mm256_mul_pd( vbeta, vc4_7_0 ); // Add gemm result vc4_7_0 = _mm256_add_pd( vc4_7_0, vtmp ); // Store back to memory _mm256_store_pd( c40, vc4_7_0 ); // Calculate address c01 = ( c + 0*rs_c + 1*cs_c ); // Load //vc0_3_1 = _mm256_load_pd( c + 0*rs_c + 1*cs_c ); vc0_3_1 = _mm256_load_pd( c01 ); // Scale by alpha vtmp = _mm256_mul_pd( valpha, va0_3b1); // Scale by beta vc0_3_1 = _mm256_mul_pd( vbeta, vc0_3_1 ); // Add gemm result vc0_3_1 = _mm256_add_pd( vc0_3_1, vtmp ); // Store back to memory _mm256_store_pd( c01, vc0_3_1 ); // Calculate address c41 = ( c + 4*rs_c + 1*cs_c ); // Load //vc4_7_1 = _mm256_load_pd( c + 4*rs_c + 1*cs_c ); vc4_7_1 = _mm256_load_pd( c41 ); // Scale by alpha vtmp = _mm256_mul_pd( valpha, va4_7b1); // Scale by beta vc4_7_1 = _mm256_mul_pd( vbeta, vc4_7_1 ); // Add gemm result vc4_7_1 = _mm256_add_pd( vc4_7_1, vtmp ); // Store back to memory _mm256_store_pd( c41, vc4_7_1 ); // Calculate address c02 = ( c + 0*rs_c + 2*cs_c ); // Load //vc0_3_2 = _mm256_load_pd( c + 0*rs_c + 2*cs_c ); vc0_3_2 = _mm256_load_pd( c02 ); // Scale by alpha vtmp = _mm256_mul_pd( valpha, va0_3b2); // Scale by beta vc0_3_2 = _mm256_mul_pd( vbeta, vc0_3_2 ); // Add gemm result vc0_3_2 = _mm256_add_pd( vc0_3_2, vtmp ); // Store back to memory _mm256_store_pd( c02, vc0_3_2 ); // Calculate address c42 = ( c + 4*rs_c + 2*cs_c ); // Load //vc4_7_2 = _mm256_load_pd( c + 4*rs_c + 2*cs_c ); vc4_7_2 = _mm256_load_pd( c42 ); // Scale by alpha vtmp = _mm256_mul_pd( valpha, va4_7b2); // Scale by beta vc4_7_2 = _mm256_mul_pd( vbeta, vc4_7_2 ); // Add gemm result vc4_7_2 = _mm256_add_pd( vc4_7_2, vtmp ); // Store back to memory _mm256_store_pd( c42, vc4_7_2 ); // Calculate address c03 = ( c + 0*rs_c + 3*cs_c ); // Load //vc0_3_3 = _mm256_load_pd( c + 0*rs_c + 3*cs_c ); vc0_3_3 = _mm256_load_pd( c03 ); // Scale by alpha vtmp = _mm256_mul_pd( valpha, va0_3b3); // Scale by beta vc0_3_3 = _mm256_mul_pd( vbeta, vc0_3_3 ); // Add gemm result vc0_3_3 = _mm256_add_pd( vc0_3_3, vtmp ); // Store back to memory _mm256_store_pd( c03, vc0_3_3 ); // Calculate address c43 = ( c + 4*rs_c + 3*cs_c ); // Load //vc4_7_3 = _mm256_load_pd( c + 4*rs_c + 3*cs_c ); vc4_7_3 = _mm256_load_pd( c43 ); // Scale by alpha vtmp = _mm256_mul_pd( valpha, va4_7b3); // Scale by beta vc4_7_3 = _mm256_mul_pd( vbeta, vc4_7_3 ); // Add gemm result vc4_7_3 = _mm256_add_pd( vc4_7_3, vtmp ); // Store back to memory _mm256_store_pd( c43, vc4_7_3 ); } else { // Calculate address c00 = ( c + 0*rs_c + 0*cs_c ); // Load //vc0_3_0 = _mm256_load_pd( c + 0*rs_c + 0*cs_c ); vc0_3_0 = _mm256_set_pd( *(c + 3*rs_c + 0*cs_c ), *(c + 2*rs_c + 0*cs_c ), *(c + 1*rs_c + 0*cs_c ), *(c + 0*rs_c + 0*cs_c ) ); // Scale by alpha vtmp = _mm256_mul_pd( valpha, va0_3b0); // Scale by beta vc0_3_0 = _mm256_mul_pd( vbeta, vc0_3_0 ); // Add gemm result vc0_3_0 = _mm256_add_pd( vc0_3_0, vtmp ); // Store back to memory //_mm256_store_pd( c00, vc0_3_0 ); aa = _mm256_extractf128_pd( vc0_3_0, 0 ) ; bb = _mm256_extractf128_pd( vc0_3_0, 1 ) ; _mm_storel_pd( c + 0*rs_c + 0*cs_c, aa ); _mm_storeh_pd( c + 1*rs_c + 0*cs_c, aa ); _mm_storel_pd( c + 2*rs_c + 0*cs_c, bb ); _mm_storeh_pd( c + 3*rs_c + 0*cs_c, bb ); // Calculate address c40 = ( c + 4*rs_c + 0*cs_c ); // Load //vc4_7_0 = _mm256_load_pd( c + 4*rs_c + 0*cs_c ); vc4_7_0 = _mm256_set_pd( *(c + 7*rs_c + 0*cs_c ), *(c + 6*rs_c + 0*cs_c ), *(c + 5*rs_c + 0*cs_c ), *(c + 4*rs_c + 0*cs_c ) ); // Scale by alpha vtmp = _mm256_mul_pd( valpha, va4_7b0); // Scale by beta vc4_7_0 = _mm256_mul_pd( vbeta, vc4_7_0 ); // Add gemm result vc4_7_0 = _mm256_add_pd( vc4_7_0, vtmp ); // Store back to memory //_mm256_store_pd( c40, vc4_7_0 ); aa = _mm256_extractf128_pd( vc4_7_0, 0 ) ; bb = _mm256_extractf128_pd( vc4_7_0, 1 ) ; _mm_storel_pd( c + 4*rs_c + 0*cs_c, aa ); _mm_storeh_pd( c + 5*rs_c + 0*cs_c, aa ); _mm_storel_pd( c + 6*rs_c + 0*cs_c, bb ); _mm_storeh_pd( c + 7*rs_c + 0*cs_c, bb ); // Calculate address c01 = ( c + 0*rs_c + 1*cs_c ); // Load //vc0_3_1 = _mm256_load_pd( c + 0*rs_c + 1*cs_c ); vc0_3_1 = _mm256_set_pd( *(c + 3*rs_c + 1*cs_c ), *(c + 2*rs_c + 1*cs_c ), *(c + 1*rs_c + 1*cs_c ), *(c + 0*rs_c + 1*cs_c ) ); // Scale by alpha vtmp = _mm256_mul_pd( valpha, va0_3b1); // Scale by beta vc0_3_1 = _mm256_mul_pd( vbeta, vc0_3_1 ); // Add gemm result vc0_3_1 = _mm256_add_pd( vc0_3_1, vtmp ); // Store back to memory //_mm256_store_pd( c01, vc0_3_1 ); aa = _mm256_extractf128_pd( vc0_3_1, 0 ) ; bb = _mm256_extractf128_pd( vc0_3_1, 1 ) ; _mm_storel_pd( c + 0*rs_c + 1*cs_c, aa ); _mm_storeh_pd( c + 1*rs_c + 1*cs_c, aa ); _mm_storel_pd( c + 2*rs_c + 1*cs_c, bb ); _mm_storeh_pd( c + 3*rs_c + 1*cs_c, bb ); // Calculate address c41 = ( c + 4*rs_c + 1*cs_c ); // Load //vc4_7_1 = _mm256_load_pd( c + 4*rs_c + 1*cs_c ); vc4_7_1 = _mm256_set_pd( *(c + 7*rs_c + 1*cs_c ), *(c + 6*rs_c + 1*cs_c ), *(c + 5*rs_c + 1*cs_c ), *(c + 4*rs_c + 1*cs_c ) ); // Scale by alpha vtmp = _mm256_mul_pd( valpha, va4_7b1); // Scale by beta vc4_7_1 = _mm256_mul_pd( vbeta, vc4_7_1 ); // Add gemm result vc4_7_1 = _mm256_add_pd( vc4_7_1, vtmp ); // Store back to memory //_mm256_store_pd( c41, vc4_7_1 ); aa = _mm256_extractf128_pd( vc4_7_1, 0 ) ; bb = _mm256_extractf128_pd( vc4_7_1, 1 ) ; _mm_storel_pd( c + 4*rs_c + 1*cs_c, aa ); _mm_storeh_pd( c + 5*rs_c + 1*cs_c, aa ); _mm_storel_pd( c + 6*rs_c + 1*cs_c, bb ); _mm_storeh_pd( c + 7*rs_c + 1*cs_c, bb ); // Calculate address c02 = ( c + 0*rs_c + 2*cs_c ); // Load //vc0_3_2 = _mm256_load_pd( c + 0*rs_c + 2*cs_c ); vc0_3_2 = _mm256_set_pd( *(c + 3*rs_c + 2*cs_c ), *(c + 2*rs_c + 2*cs_c ), *(c + 1*rs_c + 2*cs_c ), *(c + 0*rs_c + 2*cs_c ) ); // Scale by alpha vtmp = _mm256_mul_pd( valpha, va0_3b2); // Scale by beta vc0_3_2 = _mm256_mul_pd( vbeta, vc0_3_2 ); // Add gemm result vc0_3_2 = _mm256_add_pd( vc0_3_2, vtmp ); // Store back to memory //_mm256_store_pd( c02, vc0_3_2 ); aa = _mm256_extractf128_pd( vc0_3_2, 0 ) ; bb = _mm256_extractf128_pd( vc0_3_2, 1 ) ; _mm_storel_pd( c + 0*rs_c + 2*cs_c, aa ); _mm_storeh_pd( c + 1*rs_c + 2*cs_c, aa ); _mm_storel_pd( c + 2*rs_c + 2*cs_c, bb ); _mm_storeh_pd( c + 3*rs_c + 2*cs_c, bb ); // Calculate address c42 = ( c + 4*rs_c + 2*cs_c ); // Load //vc4_7_2 = _mm256_load_pd( c + 4*rs_c + 2*cs_c ); vc4_7_2 = _mm256_set_pd( *(c + 7*rs_c + 2*cs_c ), *(c + 6*rs_c + 2*cs_c ), *(c + 5*rs_c + 2*cs_c ), *(c + 4*rs_c + 2*cs_c ) ); // Scale by alpha vtmp = _mm256_mul_pd( valpha, va4_7b2); // Scale by beta vc4_7_2 = _mm256_mul_pd( vbeta, vc4_7_2 ); // Add gemm result vc4_7_2 = _mm256_add_pd( vc4_7_2, vtmp ); // Store back to memory //_mm256_store_pd( c42, vc4_7_2 ); aa = _mm256_extractf128_pd( vc4_7_2, 0 ) ; bb = _mm256_extractf128_pd( vc4_7_2, 1 ) ; _mm_storel_pd( c + 4*rs_c + 2*cs_c, aa ); _mm_storeh_pd( c + 5*rs_c + 2*cs_c, aa ); _mm_storel_pd( c + 6*rs_c + 2*cs_c, bb ); _mm_storeh_pd( c + 7*rs_c + 2*cs_c, bb ); // Calculate address c03 = ( c + 0*rs_c + 3*cs_c ); // Load //vc0_3_3 = _mm256_load_pd( c + 0*rs_c + 3*cs_c ); vc0_3_3 = _mm256_set_pd( *(c + 3*rs_c + 3*cs_c ), *(c + 2*rs_c + 3*cs_c ), *(c + 1*rs_c + 3*cs_c ), *(c + 0*rs_c + 3*cs_c ) ); // Scale by alpha vtmp = _mm256_mul_pd( valpha, va0_3b3); // Scale by beta vc0_3_3 = _mm256_mul_pd( vbeta, vc0_3_3 ); // Add gemm result vc0_3_3 = _mm256_add_pd( vc0_3_3, vtmp ); // Store back to memory //_mm256_store_pd( c03, vc0_3_3 ); aa = _mm256_extractf128_pd( vc0_3_3, 0 ) ; bb = _mm256_extractf128_pd( vc0_3_3, 1 ) ; _mm_storel_pd( c + 0*rs_c + 3*cs_c, aa ); _mm_storeh_pd( c + 1*rs_c + 3*cs_c, aa ); _mm_storel_pd( c + 2*rs_c + 3*cs_c, bb ); _mm_storeh_pd( c + 3*rs_c + 3*cs_c, bb ); // Calculate address c43 = ( c + 4*rs_c + 3*cs_c ); // Load //vc4_7_3 = _mm256_load_pd( c + 4*rs_c + 3*cs_c ); vc4_7_3 = _mm256_set_pd( *(c + 7*rs_c + 3*cs_c ), *(c + 6*rs_c + 3*cs_c ), *(c + 5*rs_c + 3*cs_c ), *(c + 4*rs_c + 3*cs_c ) ); // Scale by alpha vtmp = _mm256_mul_pd( valpha, va4_7b3); // Scale by beta vc4_7_3 = _mm256_mul_pd( vbeta, vc4_7_3 ); // Add gemm result vc4_7_3 = _mm256_add_pd( vc4_7_3, vtmp ); // Store back to memory //_mm256_store_pd( c43, vc4_7_3 ); aa = _mm256_extractf128_pd( vc4_7_3, 0 ) ; bb = _mm256_extractf128_pd( vc4_7_3, 1 ) ; _mm_storel_pd( c + 4*rs_c + 3*cs_c, aa ); _mm_storeh_pd( c + 5*rs_c + 3*cs_c, aa ); _mm_storel_pd( c + 6*rs_c + 3*cs_c, bb ); _mm_storeh_pd( c + 7*rs_c + 3*cs_c, bb ); } } #if 0 void bli_cgemm_sandybridge_int_8x4 ( dim_t k0, scomplex* restrict alpha, scomplex* restrict a, scomplex* restrict b, scomplex* restrict beta, scomplex* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { } #endif #if 0 void bli_zgemm_sandybridge_int_4x4 ( dim_t k0, dcomplex* restrict alpha, dcomplex* restrict a, dcomplex* restrict b, dcomplex* restrict beta, dcomplex* restrict c, inc_t rs_c0, inc_t cs_c0, auxinfo_t* restrict data, cntx_t* restrict cntx ) { } #endif blis-0.6.1/kernels/sandybridge/bli_kernels_sandybridge.h000066400000000000000000000041531360743507500234370ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // d8x4 (assembly) GEMM_UKR_PROT( float, s, gemm_sandybridge_asm_8x8 ) GEMM_UKR_PROT( double, d, gemm_sandybridge_asm_8x4 ) GEMM_UKR_PROT( scomplex, c, gemm_sandybridge_asm_8x4 ) GEMM_UKR_PROT( dcomplex, z, gemm_sandybridge_asm_4x4 ) // d8x4 (intrinsics) GEMM_UKR_PROT( float, s, gemm_sandybridge_int_8x8 ) GEMM_UKR_PROT( double, d, gemm_sandybridge_int_8x4 ) GEMM_UKR_PROT( scomplex, c, gemm_sandybridge_int_8x4 ) GEMM_UKR_PROT( dcomplex, z, gemm_sandybridge_int_4x4 ) blis-0.6.1/kernels/skx/000077500000000000000000000000001360743507500147315ustar00rootroot00000000000000blis-0.6.1/kernels/skx/3/000077500000000000000000000000001360743507500150735ustar00rootroot00000000000000blis-0.6.1/kernels/skx/3/bli_dgemm_skx_asm_16x12_l2.c000066400000000000000000000411721360743507500221460ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define BLIS_ASM_SYNTAX_INTEL #include "bli_x86_asm_macros.h" #define A_L1_PREFETCH_DIST 4 //should be multiple of 2 /*The pointer of B is moved ahead by one iteration of k before the loop starts.Therefore, prefetching 3 k iterations ahead*/ #define B_L1_PREFETCH_DIST 4 #define TAIL_NITER 8 #define CACHELINE_SIZE 64 //size of cache line in bytes /* During each subiteration, prefetching 2 cache lines of B * UNROLL factor ahead. 2cache lines = 16 doubles (NR). * */ #define PREFETCH_A_L1(n, k) \ PREFETCH(0, MEM(RAX, A_L1_PREFETCH_DIST*16*8 + (2*n+k) * CACHELINE_SIZE)) /* Preloading B for the first iteration of the main loop. * for subiter(1), subiter(2), and subiter(3) */ #define PREFETCH_B_L1_1ITER \ PREFETCH(0, MEM(RBX )) \ PREFETCH(0, MEM(RBX, CACHELINE_SIZE)) \ PREFETCH(0, MEM(RBX, 2*CACHELINE_SIZE)) \ PREFETCH(0, MEM(RBX, 3*CACHELINE_SIZE)) \ PREFETCH(0, MEM(RBX, 4*CACHELINE_SIZE)) \ PREFETCH(0, MEM(RBX, 5*CACHELINE_SIZE)) #define LOOP_ALIGN ALIGN16 #define UPDATE_C(R1,R2,R3,R4) \ \ VMULPD(ZMM(R1), ZMM(R1), ZMM(0)) \ VMULPD(ZMM(R2), ZMM(R2), ZMM(0)) \ VMULPD(ZMM(R3), ZMM(R3), ZMM(0)) \ VMULPD(ZMM(R4), ZMM(R4), ZMM(0)) \ VFMADD231PD(ZMM(R1), ZMM(1), MEM(RCX,0*64)) \ VFMADD231PD(ZMM(R2), ZMM(1), MEM(RCX,1*64)) \ VFMADD231PD(ZMM(R3), ZMM(1), MEM(RCX,RAX,1,0*64)) \ VFMADD231PD(ZMM(R4), ZMM(1), MEM(RCX,RAX,1,1*64)) \ VMOVUPD(MEM(RCX,0*64), ZMM(R1)) \ VMOVUPD(MEM(RCX,1*64), ZMM(R2)) \ VMOVUPD(MEM(RCX,RAX,1,0*64), ZMM(R3)) \ VMOVUPD(MEM(RCX,RAX,1,1*64), ZMM(R4)) \ LEA(RCX, MEM(RCX,RAX,2)) #define UPDATE_C_BZ(R1,R2,R3,R4) \ \ VMULPD(ZMM(R1), ZMM(R1), ZMM(0)) \ VMULPD(ZMM(R2), ZMM(R2), ZMM(0)) \ VMULPD(ZMM(R3), ZMM(R3), ZMM(0)) \ VMULPD(ZMM(R4), ZMM(R4), ZMM(0)) \ VMOVUPD(MEM(RCX,0*64), ZMM(R1)) \ VMOVUPD(MEM(RCX,1*64), ZMM(R2)) \ VMOVUPD(MEM(RCX,RAX,1,0*64), ZMM(R3)) \ VMOVUPD(MEM(RCX,RAX,1,1*64), ZMM(R4)) \ LEA(RCX, MEM(RCX,RAX,2)) #define UPDATE_C_ROW_SCATTERED(R1,R2,R3,R4) \ \ KXNORW(K(1), K(0), K(0)) \ KXNORW(K(2), K(0), K(0)) \ VMULPD(ZMM(R1), ZMM(R1), ZMM(0)) \ VGATHERQPD(ZMM(6) MASK_K(1), MEM(RCX,ZMM(2),1)) \ VFMADD231PD(ZMM(R1), ZMM(6), ZMM(1)) \ VSCATTERQPD(MEM(RCX,ZMM(2),1) MASK_K(2), ZMM(R1)) \ \ KXNORW(K(1), K(0), K(0)) \ KXNORW(K(2), K(0), K(0)) \ VMULPD(ZMM(R2), ZMM(R2), ZMM(0)) \ VGATHERQPD(ZMM(6) MASK_K(1), MEM(RCX,ZMM(3),1)) \ VFMADD231PD(ZMM(R2), ZMM(6), ZMM(1)) \ VSCATTERQPD(MEM(RCX,ZMM(3),1) MASK_K(2), ZMM(R2)) \ \ LEA(RCX, MEM(RCX,RAX,1)) \ \ KXNORW(K(1), K(0), K(0)) \ KXNORW(K(2), K(0), K(0)) \ VMULPD(ZMM(R3), ZMM(R3), ZMM(0)) \ VGATHERQPD(ZMM(6) MASK_K(1), MEM(RCX,ZMM(2),1)) \ VFMADD231PD(ZMM(R3), ZMM(6), ZMM(1)) \ VSCATTERQPD(MEM(RCX,ZMM(2),1) MASK_K(2), ZMM(R3)) \ \ KXNORW(K(1), K(0), K(0)) \ KXNORW(K(2), K(0), K(0)) \ VMULPD(ZMM(R4), ZMM(R4), ZMM(0)) \ VGATHERQPD(ZMM(6) MASK_K(1), MEM(RCX,ZMM(3),1)) \ VFMADD231PD(ZMM(R4), ZMM(6), ZMM(1)) \ VSCATTERQPD(MEM(RCX,ZMM(3),1) MASK_K(2), ZMM(R4)) \ \ LEA(RCX, MEM(RCX,RAX,1)) #define UPDATE_C_BZ_ROW_SCATTERED(R1,R2,R3,R4) \ \ KXNORW(K(1), K(0), K(0)) \ VMULPD(ZMM(R1), ZMM(R1), ZMM(0)) \ VSCATTERQPD(MEM(RCX,ZMM(2),1) MASK_K(1), ZMM(R1)) \ \ KXNORW(K(1), K(0), K(0)) \ VMULPD(ZMM(R2), ZMM(R2), ZMM(0)) \ VSCATTERQPD(MEM(RCX,ZMM(3),1) MASK_K(1), ZMM(R2)) \ \ LEA(RCX, MEM(RCX,RAX,1)) \ \ KXNORW(K(1), K(0), K(0)) \ VMULPD(ZMM(R3), ZMM(R3), ZMM(0)) \ VSCATTERQPD(MEM(RCX,ZMM(2),1) MASK_K(1), ZMM(R3)) \ \ KXNORW(K(1), K(0), K(0)) \ VMULPD(ZMM(R4), ZMM(R4), ZMM(0)) \ VSCATTERQPD(MEM(RCX,ZMM(3),1) MASK_K(1), ZMM(R4)) \ \ LEA(RCX, MEM(RCX,RAX,1)) #ifdef PREFETCH_C_L2 #undef PREFETCH_C_L2 #define PREFETCH_C_L2 \ \ PREFETCH(1, MEM(RCX, 0*64)) \ PREFETCH(1, MEM(RCX, 1*64)) \ \ PREFETCH(1, MEM(RCX,R12,1,0*64)) \ PREFETCH(1, MEM(RCX,R12,1,1*64)) \ \ PREFETCH(1, MEM(RCX,R12,2,0*64)) \ PREFETCH(1, MEM(RCX,R12,2,1*64)) \ \ PREFETCH(1, MEM(RCX,R13,1,0*64)) \ PREFETCH(1, MEM(RCX,R13,1,1*64)) \ \ PREFETCH(1, MEM(RCX,R12,4,0*64)) \ PREFETCH(1, MEM(RCX,R12,4,1*64)) \ \ PREFETCH(1, MEM(RCX,R14,1,0*64)) \ PREFETCH(1, MEM(RCX,R14,1,1*64)) \ \ PREFETCH(1, MEM(RCX,R13,2,0*64)) \ PREFETCH(1, MEM(RCX,R13,2,1*64)) \ \ PREFETCH(1, MEM(RCX,R15,1,0*64)) \ PREFETCH(1, MEM(RCX,R15,1,1*64)) \ \ PREFETCH(1, MEM(RDX, 0*64)) \ PREFETCH(1, MEM(RDX, 1*64)) \ \ PREFETCH(1, MEM(RDX,R12,1,0*64)) \ PREFETCH(1, MEM(RDX,R12,1,1*64)) \ \ PREFETCH(1, MEM(RDX,R12,2,0*64)) \ PREFETCH(1, MEM(RDX,R12,2,1*64)) \ \ PREFETCH(1, MEM(RDX,R13,1,0*64)) \ PREFETCH(1, MEM(RDX,R13,1,1*64)) #else #undef PREFETCH_C_L2 #define PREFETCH_C_L2 #endif #define PREFETCH_C_L1 \ \ PREFETCHW0(MEM(RCX, 0*64)) \ PREFETCHW0(MEM(RCX, 1*64)) \ PREFETCHW0(MEM(RCX,R12,1,0*64)) \ PREFETCHW0(MEM(RCX,R12,1,1*64)) \ PREFETCHW0(MEM(RCX,R12,2,0*64)) \ PREFETCHW0(MEM(RCX,R12,2,1*64)) \ PREFETCHW0(MEM(RCX,R13,1,0*64)) \ PREFETCHW0(MEM(RCX,R13,1,1*64)) \ PREFETCHW0(MEM(RCX,R12,4,0*64)) \ PREFETCHW0(MEM(RCX,R12,4,1*64)) \ PREFETCHW0(MEM(RCX,R14,1,0*64)) \ PREFETCHW0(MEM(RCX,R14,1,1*64)) \ PREFETCHW0(MEM(RCX,R13,2,0*64)) \ PREFETCHW0(MEM(RCX,R13,2,1*64)) \ PREFETCHW0(MEM(RCX,R15,1,0*64)) \ PREFETCHW0(MEM(RCX,R15,1,1*64)) \ PREFETCHW0(MEM(RDX, 0*64)) \ PREFETCHW0(MEM(RDX, 1*64)) \ PREFETCHW0(MEM(RDX,R12,1,0*64)) \ PREFETCHW0(MEM(RDX,R12,1,1*64)) \ PREFETCHW0(MEM(RDX,R12,2,0*64)) \ PREFETCHW0(MEM(RDX,R12,2,1*64)) \ PREFETCHW0(MEM(RDX,R13,1,0*64)) \ PREFETCHW0(MEM(RDX,R13,1,1*64)) // // n: index in unrolled loop // // a: ZMM register to load into // b: ZMM register to read from // // ...: addressing for A, except for offset // #define SUBITER(n) \ \ PREFETCH_A_L1(n, 0) \ \ VBROADCASTSD(ZMM(3), MEM(RBX,(12*n+ 0)*8)) \ VBROADCASTSD(ZMM(4), MEM(RBX,(12*n+ 1)*8)) \ VFMADD231PD(ZMM( 8), ZMM(0), ZMM(3)) \ VFMADD231PD(ZMM( 9), ZMM(1), ZMM(3)) \ VFMADD231PD(ZMM(10), ZMM(0), ZMM(4)) \ VFMADD231PD(ZMM(11), ZMM(1), ZMM(4)) \ \ VBROADCASTSD(ZMM(3), MEM(RBX,(12*n+ 2)*8)) \ VBROADCASTSD(ZMM(4), MEM(RBX,(12*n+ 3)*8)) \ VFMADD231PD(ZMM(12), ZMM(0), ZMM(3)) \ VFMADD231PD(ZMM(13), ZMM(1), ZMM(3)) \ VFMADD231PD(ZMM(14), ZMM(0), ZMM(4)) \ VFMADD231PD(ZMM(15), ZMM(1), ZMM(4)) \ \ VBROADCASTSD(ZMM(3), MEM(RBX,(12*n+ 4)*8)) \ VBROADCASTSD(ZMM(4), MEM(RBX,(12*n+ 5)*8)) \ VFMADD231PD(ZMM(16), ZMM(0), ZMM(3)) \ VFMADD231PD(ZMM(17), ZMM(1), ZMM(3)) \ VFMADD231PD(ZMM(18), ZMM(0), ZMM(4)) \ VFMADD231PD(ZMM(19), ZMM(1), ZMM(4)) \ \ PREFETCH_A_L1(n, 1) \ \ VBROADCASTSD(ZMM(3), MEM(RBX,(12*n+ 6)*8)) \ VBROADCASTSD(ZMM(4), MEM(RBX,(12*n+ 7)*8)) \ VFMADD231PD(ZMM(20), ZMM(0), ZMM(3)) \ VFMADD231PD(ZMM(21), ZMM(1), ZMM(3)) \ VFMADD231PD(ZMM(22), ZMM(0), ZMM(4)) \ VFMADD231PD(ZMM(23), ZMM(1), ZMM(4)) \ \ VBROADCASTSD(ZMM(3), MEM(RBX,(12*n+ 8)*8)) \ VBROADCASTSD(ZMM(4), MEM(RBX,(12*n+ 9)*8)) \ VFMADD231PD(ZMM(24), ZMM(0), ZMM(3)) \ VFMADD231PD(ZMM(25), ZMM(1), ZMM(3)) \ VFMADD231PD(ZMM(26), ZMM(0), ZMM(4)) \ VFMADD231PD(ZMM(27), ZMM(1), ZMM(4)) \ \ VBROADCASTSD(ZMM(3), MEM(RBX,(12*n+10)*8)) \ VBROADCASTSD(ZMM(4), MEM(RBX,(12*n+11)*8)) \ VFMADD231PD(ZMM(28), ZMM(0), ZMM(3)) \ VFMADD231PD(ZMM(29), ZMM(1), ZMM(3)) \ VFMADD231PD(ZMM(30), ZMM(0), ZMM(4)) \ VFMADD231PD(ZMM(31), ZMM(1), ZMM(4)) \ \ VMOVAPD(ZMM(0), MEM(RAX,(16*n+0)*8)) \ VMOVAPD(ZMM(1), MEM(RAX,(16*n+8)*8)) //This is an array used for the scatter/gather instructions. static int64_t offsets[16] __attribute__((aligned(64))) = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15}; void bli_dgemm_skx_asm_16x12_l2( dim_t k_, double* restrict alpha, double* restrict a, double* restrict b, double* restrict beta, double* restrict c, inc_t rs_c_, inc_t cs_c_, auxinfo_t* data, cntx_t* restrict cntx ) { (void)data; (void)cntx; const int64_t* offsetPtr = &offsets[0]; const int64_t k = k_; const int64_t rs_c = rs_c_; const int64_t cs_c = cs_c_; BEGIN_ASM() VXORPD(YMM(8), YMM(8), YMM(8)) //clear out registers VMOVAPD(YMM( 7), YMM(8)) VMOVAPD(YMM( 9), YMM(8)) VMOVAPD(YMM(10), YMM(8)) MOV(RSI, VAR(k)) //loop index VMOVAPD(YMM(11), YMM(8)) MOV(RAX, VAR(a)) //load address of a VMOVAPD(YMM(12), YMM(8)) MOV(RBX, VAR(b)) //load address of b VMOVAPD(YMM(13), YMM(8)) MOV(RCX, VAR(c)) //load address of c VMOVAPD(YMM(14), YMM(8)) VMOVAPD(YMM(15), YMM(8)) VMOVAPD(ZMM(0), MEM(RAX, 0*8)) //pre-load a VMOVAPD(YMM(16), YMM(8)) VMOVAPD(ZMM(1), MEM(RAX, 8*8)) //pre-load a VMOVAPD(YMM(17), YMM(8)) VMOVAPD(YMM(18), YMM(8)) VMOVAPD(YMM(19), YMM(8)) MOV(R12, VAR(cs_c)) //cs_c VMOVAPD(YMM(20), YMM(8)) LEA(R13, MEM(R12,R12,2)) //*3 VMOVAPD(YMM(21), YMM(8)) LEA(R14, MEM(R12,R12,4)) //*5 VMOVAPD(YMM(22), YMM(8)) LEA(R15, MEM(R14,R12,2)) //*7 VMOVAPD(YMM(23), YMM(8)) LEA(RDX, MEM(RCX,R12,8)) //c + 8*cs_c VMOVAPD(YMM(24), YMM(8)) VMOVAPD(YMM(25), YMM(8)) MOV(R8, IMM(16*8)) //mr*sizeof(double) VMOVAPD(YMM(26), YMM(8)) MOV(R9, IMM(12*8)) //nr*sizeof(double) VMOVAPD(YMM(27), YMM(8)) VMOVAPD(YMM(28), YMM(8)) LEA(RAX, MEM(RAX,R8,1)) //adjust a for pre-load VMOVAPD(YMM(29), YMM(8)) VMOVAPD(YMM(30), YMM(8)) VMOVAPD(YMM(31), YMM(8)) TEST(RSI, RSI) JZ(POSTACCUM) #ifdef PREFETCH_A_BEFORE PREFETCH(0, MEM(RAX,0*64)) PREFETCH(0, MEM(RAX,1*64)) PREFETCH(0, MEM(RAX,2*64)) PREFETCH(0, MEM(RAX,3*64)) PREFETCH(0, MEM(RAX,4*64)) PREFETCH(0, MEM(RAX,5*64)) PREFETCH(0, MEM(RAX,6*64)) PREFETCH(0, MEM(RAX,7*64)) #endif #ifdef PREFETCH_B_BEFORE PREFETCH(0, MEM(RBX,0*64)) PREFETCH(0, MEM(RBX,1*64)) PREFETCH(0, MEM(RBX,2*64)) PREFETCH(0, MEM(RBX,3*64)) PREFETCH(0, MEM(RBX,4*64)) PREFETCH(0, MEM(RBX,5*64)) #endif PREFETCH_C_L2 MOV(RDI, RSI) AND(RSI, IMM(3)) SAR(RDI, IMM(2)) SUB(RDI, IMM(0+TAIL_NITER)) JLE(K_SMALL) LOOP_ALIGN LABEL(MAIN_LOOP) PREFETCH(0, MEM(RBX,B_L1_PREFETCH_DIST*12*8)) PREFETCH(0, MEM(RBX,B_L1_PREFETCH_DIST*12*8+64)) SUBITER(0) PREFETCH(0, MEM(RBX,B_L1_PREFETCH_DIST*12*8+128)) SUBITER(1) PREFETCH(0, MEM(RBX,B_L1_PREFETCH_DIST*12*8+192)) PREFETCH(0, MEM(RBX,B_L1_PREFETCH_DIST*12*8+256)) SUBITER(2) PREFETCH(0, MEM(RBX,B_L1_PREFETCH_DIST*12*8+320)) SUBITER(3) LEA(RAX, MEM(RAX,R8,4)) LEA(RBX, MEM(RBX,R9,4)) DEC(RDI) JNZ(MAIN_LOOP) LABEL(K_SMALL) PREFETCH_C_L1 ADD(RDI, IMM(0+TAIL_NITER)) JZ(TAIL_LOOP) LOOP_ALIGN LABEL(SMALL_LOOP) PREFETCH(0, MEM(RBX,B_L1_PREFETCH_DIST*12*8)) PREFETCH(0, MEM(RBX,B_L1_PREFETCH_DIST*12*8+64)) SUBITER(0) PREFETCH(0, MEM(RBX,B_L1_PREFETCH_DIST*12*8+128)) SUBITER(1) PREFETCH(0, MEM(RBX,B_L1_PREFETCH_DIST*12*8+192)) PREFETCH(0, MEM(RBX,B_L1_PREFETCH_DIST*12*8+256)) SUBITER(2) PREFETCH(0, MEM(RBX,B_L1_PREFETCH_DIST*12*8+320)) SUBITER(3) LEA(RAX, MEM(RAX,R8,4)) LEA(RBX, MEM(RBX,R9,4)) DEC(RDI) JNZ(SMALL_LOOP) TEST(RSI, RSI) JZ(POSTACCUM) LOOP_ALIGN LABEL(TAIL_LOOP) PREFETCH(0, MEM(RBX,B_L1_PREFETCH_DIST*12*8)) PREFETCH(0, MEM(RBX,B_L1_PREFETCH_DIST*12*8+64)) SUBITER(0) ADD(RAX, R8) ADD(RBX, R9) DEC(RSI) JNZ(TAIL_LOOP) LABEL(POSTACCUM) #ifdef PREFETCH_A_AFTER MOV(R8, VAR(a)) PREFETCH(0, MEM(R8,0*64)) PREFETCH(0, MEM(R8,1*64)) PREFETCH(0, MEM(R8,2*64)) PREFETCH(0, MEM(R8,3*64)) PREFETCH(0, MEM(R8,4*64)) PREFETCH(0, MEM(R8,5*64)) PREFETCH(0, MEM(R8,6*64)) PREFETCH(0, MEM(R8,7*64)) #endif #ifdef PREFETCH_B_AFTER MOV(R9, VAR(b)) PREFETCH(0, MEM(R9,0*64)) PREFETCH(0, MEM(R9,1*64)) PREFETCH(0, MEM(R9,2*64)) PREFETCH(0, MEM(R9,3*64)) PREFETCH(0, MEM(R9,4*64)) PREFETCH(0, MEM(R9,5*64)) #endif MOV(RAX, VAR(alpha)) MOV(RBX, VAR(beta)) VBROADCASTSD(ZMM(0), MEM(RAX)) VBROADCASTSD(ZMM(1), MEM(RBX)) MOV(RAX, VAR(cs_c)) LEA(RAX, MEM(,RAX,8)) MOV(RBX, VAR(rs_c)) LEA(RBX, MEM(,RBX,8)) // Check if C is column stride. If not, jump to the slow scattered update CMP(RBX, IMM(1)) JNE(SCATTEREDUPDATE) VCOMISD(XMM(1), XMM(7)) JE(COLSTORBZ) UPDATE_C( 8, 9,10,11) UPDATE_C(12,13,14,15) UPDATE_C(16,17,18,19) UPDATE_C(20,21,22,23) UPDATE_C(24,25,26,27) UPDATE_C(28,29,30,31) JMP(END) LABEL(COLSTORBZ) UPDATE_C_BZ( 8, 9,10,11) UPDATE_C_BZ(12,13,14,15) UPDATE_C_BZ(16,17,18,19) UPDATE_C_BZ(20,21,22,23) UPDATE_C_BZ(24,25,26,27) UPDATE_C_BZ(28,29,30,31) JMP(END) LABEL(SCATTEREDUPDATE) MOV(RDI, VAR(offsetPtr)) VMOVDQA64(ZMM(2), MEM(RDI,0*64)) VMOVDQA64(ZMM(3), MEM(RDI,1*64)) VPBROADCASTQ(ZMM(6), RBX) VPMULLQ(ZMM(2), ZMM(6), ZMM(2)) VPMULLQ(ZMM(3), ZMM(6), ZMM(3)) VCOMISD(XMM(1), XMM(7)) JE(SCATTERBZ) UPDATE_C_ROW_SCATTERED( 8, 9,10,11) UPDATE_C_ROW_SCATTERED(12,13,14,15) UPDATE_C_ROW_SCATTERED(16,17,18,19) UPDATE_C_ROW_SCATTERED(20,21,22,23) UPDATE_C_ROW_SCATTERED(24,25,26,27) UPDATE_C_ROW_SCATTERED(28,29,30,31) JMP(END) LABEL(SCATTERBZ) UPDATE_C_BZ_ROW_SCATTERED( 8, 9,10,11) UPDATE_C_BZ_ROW_SCATTERED(12,13,14,15) UPDATE_C_BZ_ROW_SCATTERED(16,17,18,19) UPDATE_C_BZ_ROW_SCATTERED(20,21,22,23) UPDATE_C_BZ_ROW_SCATTERED(24,25,26,27) UPDATE_C_BZ_ROW_SCATTERED(28,29,30,31) LABEL(END) VZEROUPPER() END_ASM( : // output operands : // input operands [k] "m" (k), [a] "m" (a), [b] "m" (b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c), [offsetPtr] "m" (offsetPtr) : // register clobber list "rax", "rbx", "rcx", "rdx", "rdi", "rsi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "zmm0", "zmm1", "zmm2", "zmm3", "zmm4", "zmm5", "zmm6", "zmm7", "zmm8", "zmm9", "zmm10", "zmm11", "zmm12", "zmm13", "zmm14", "zmm15", "zmm16", "zmm17", "zmm18", "zmm19", "zmm20", "zmm21", "zmm22", "zmm23", "zmm24", "zmm25", "zmm26", "zmm27", "zmm28", "zmm29", "zmm30", "zmm31", "memory" ) } blis-0.6.1/kernels/skx/3/bli_dgemm_skx_asm_16x14.c000066400000000000000000000343471360743507500215610ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "bli_x86_asm_macros.h" #define A_L1_PREFETCH_DIST 4 // in units of k iterations #define B_L1_PREFETCH_DIST 4 // e.g. 4 k iterations ~= 56 cycles #define TAIL_NITER 5 // in units of 4x unrolled k iterations // e.g. 5 -> 4*5 k iterations ~= 280 cycles #define PREFETCH_A_L1(n, k) \ PREFETCH(0, MEM(RAX, A_L1_PREFETCH_DIST*16*8 + (2*n+k)*64)) #define PREFETCH_B_L1(n, k) \ PREFETCH(0, MEM(RBX, B_L1_PREFETCH_DIST*14*8 + (2*n+k)*56)) #define LOOP_ALIGN ALIGN32 #define UPDATE_C(R1,R2) \ \ VMULPD(ZMM(R1), ZMM(R1), ZMM(0)) \ VMULPD(ZMM(R2), ZMM(R2), ZMM(0)) \ VFMADD231PD(ZMM(R1), ZMM(1), MEM(RCX)) \ VFMADD231PD(ZMM(R2), ZMM(1), MEM(RCX,64)) \ VMOVUPD(MEM(RCX), ZMM(R1)) \ VMOVUPD(MEM(RCX,64), ZMM(R2)) \ LEA(RCX, MEM(RCX,RBX,1)) #define UPDATE_C_BZ(R1,R2) \ \ VMULPD(ZMM(R1), ZMM(R1), ZMM(0)) \ VMULPD(ZMM(R2), ZMM(R2), ZMM(0)) \ VMOVUPD(MEM(RCX), ZMM(R1)) \ VMOVUPD(MEM(RCX,64), ZMM(R2)) \ LEA(RCX, MEM(RCX,RBX,1)) #define UPDATE_C_COL_SCATTERED(R1,R2) \ \ KXNORW(K(1), K(0), K(0)) \ KXNORW(K(2), K(0), K(0)) \ KXNORW(K(3), K(0), K(0)) \ KXNORW(K(4), K(0), K(0)) \ VGATHERQPD(ZMM(0) MASK_K(1), MEM(RCX,ZMM(2),1)) \ VFMADD231PD(ZMM(R1), ZMM(0), ZMM(1)) \ VGATHERQPD(ZMM(0) MASK_K(2), MEM(RCX,ZMM(3),1)) \ VFMADD231PD(ZMM(R2), ZMM(0), ZMM(1)) \ VSCATTERQPD(MEM(RCX,ZMM(2),1) MASK_K(3), ZMM(R1)) \ VSCATTERQPD(MEM(RCX,ZMM(3),1) MASK_K(4), ZMM(R2)) \ LEA(RCX, MEM(RCX,RBX,1)) #define UPDATE_C_BZ_COL_SCATTERED(R1,R2) \ \ KXNORW(K(1), K(0), K(0)) \ KXNORW(K(2), K(0), K(0)) \ VSCATTERQPD(MEM(RCX,ZMM(2),1) MASK_K(1), ZMM(R1)) \ VSCATTERQPD(MEM(RCX,ZMM(3),1) MASK_K(2), ZMM(R2)) \ LEA(RCX, MEM(RCX,RBX,1)) #define SUBITER(n) \ \ PREFETCH_A_L1(n, 0) \ \ VBROADCASTSD(ZMM(2), MEM(RBX,(14*n+ 0)*8)) \ VBROADCASTSD(ZMM(3), MEM(RBX,(14*n+ 1)*8)) \ VFMADD231PD(ZMM( 4), ZMM(0), ZMM(2)) \ VFMADD231PD(ZMM( 5), ZMM(1), ZMM(2)) \ VFMADD231PD(ZMM( 6), ZMM(0), ZMM(3)) \ VFMADD231PD(ZMM( 7), ZMM(1), ZMM(3)) \ \ VBROADCASTSD(ZMM(2), MEM(RBX,(14*n+ 2)*8)) \ VBROADCASTSD(ZMM(3), MEM(RBX,(14*n+ 3)*8)) \ VFMADD231PD(ZMM( 8), ZMM(0), ZMM(2)) \ VFMADD231PD(ZMM( 9), ZMM(1), ZMM(2)) \ VFMADD231PD(ZMM(10), ZMM(0), ZMM(3)) \ VFMADD231PD(ZMM(11), ZMM(1), ZMM(3)) \ \ PREFETCH_B_L1(n, 0) \ \ VBROADCASTSD(ZMM(2), MEM(RBX,(14*n+ 4)*8)) \ VBROADCASTSD(ZMM(3), MEM(RBX,(14*n+ 5)*8)) \ VFMADD231PD(ZMM(12), ZMM(0), ZMM(2)) \ VFMADD231PD(ZMM(13), ZMM(1), ZMM(2)) \ VFMADD231PD(ZMM(14), ZMM(0), ZMM(3)) \ VFMADD231PD(ZMM(15), ZMM(1), ZMM(3)) \ \ VBROADCASTSD(ZMM(2), MEM(RBX,(14*n+ 6)*8)) \ VBROADCASTSD(ZMM(3), MEM(RBX,(14*n+ 7)*8)) \ VFMADD231PD(ZMM(16), ZMM(0), ZMM(2)) \ VFMADD231PD(ZMM(17), ZMM(1), ZMM(2)) \ VFMADD231PD(ZMM(18), ZMM(0), ZMM(3)) \ VFMADD231PD(ZMM(19), ZMM(1), ZMM(3)) \ \ PREFETCH_A_L1(n, 1) \ \ VBROADCASTSD(ZMM(2), MEM(RBX,(14*n+ 8)*8)) \ VBROADCASTSD(ZMM(3), MEM(RBX,(14*n+ 9)*8)) \ VFMADD231PD(ZMM(20), ZMM(0), ZMM(2)) \ VFMADD231PD(ZMM(21), ZMM(1), ZMM(2)) \ VFMADD231PD(ZMM(22), ZMM(0), ZMM(3)) \ VFMADD231PD(ZMM(23), ZMM(1), ZMM(3)) \ \ VBROADCASTSD(ZMM(2), MEM(RBX,(14*n+10)*8)) \ VBROADCASTSD(ZMM(3), MEM(RBX,(14*n+11)*8)) \ VFMADD231PD(ZMM(24), ZMM(0), ZMM(2)) \ VFMADD231PD(ZMM(25), ZMM(1), ZMM(2)) \ VFMADD231PD(ZMM(26), ZMM(0), ZMM(3)) \ VFMADD231PD(ZMM(27), ZMM(1), ZMM(3)) \ \ PREFETCH_B_L1(n, 1) \ \ VBROADCASTSD(ZMM(2), MEM(RBX,(14*n+12)*8)) \ VBROADCASTSD(ZMM(3), MEM(RBX,(14*n+13)*8)) \ VFMADD231PD(ZMM(28), ZMM(0), ZMM(2)) \ VFMADD231PD(ZMM(29), ZMM(1), ZMM(2)) \ VFMADD231PD(ZMM(30), ZMM(0), ZMM(3)) \ VFMADD231PD(ZMM(31), ZMM(1), ZMM(3)) \ \ VMOVAPD(ZMM(0), MEM(RAX,(16*n+0)*8)) \ VMOVAPD(ZMM(1), MEM(RAX,(16*n+8)*8)) //This is an array used for the scatter/gather instructions. static int64_t offsets[16] __attribute__((aligned(64))) = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15}; void bli_dgemm_skx_asm_16x14( dim_t k_, double* restrict alpha, double* restrict a, double* restrict b, double* restrict beta, double* restrict c, inc_t rs_c_, inc_t cs_c_, auxinfo_t* data, cntx_t* restrict cntx ) { (void)data; (void)cntx; const int64_t* offsetPtr = &offsets[0]; const int64_t k = k_; const int64_t rs_c = rs_c_*8; const int64_t cs_c = cs_c_*8; BEGIN_ASM() VXORPD(YMM( 4), YMM( 4), YMM( 4)) //clear out registers VXORPD(YMM( 5), YMM( 5), YMM( 5)) VXORPD(YMM( 6), YMM( 6), YMM( 6)) VXORPD(YMM( 7), YMM( 7), YMM( 7)) VXORPD(YMM( 8), YMM( 8), YMM( 8)) VXORPD(YMM( 9), YMM( 9), YMM( 9)) VXORPD(YMM(10), YMM(10), YMM(10)) VXORPD(YMM(11), YMM(11), YMM(11)) VXORPD(YMM(12), YMM(12), YMM(12)) VXORPD(YMM(13), YMM(13), YMM(13)) VXORPD(YMM(14), YMM(14), YMM(14)) VXORPD(YMM(15), YMM(15), YMM(15)) VXORPD(YMM(16), YMM(16), YMM(16)) VXORPD(YMM(17), YMM(17), YMM(17)) VXORPD(YMM(18), YMM(18), YMM(18)) VXORPD(YMM(19), YMM(19), YMM(19)) VXORPD(YMM(20), YMM(20), YMM(20)) VXORPD(YMM(21), YMM(21), YMM(21)) VXORPD(YMM(22), YMM(22), YMM(22)) VXORPD(YMM(23), YMM(23), YMM(23)) VXORPD(YMM(24), YMM(24), YMM(24)) VXORPD(YMM(25), YMM(25), YMM(25)) VXORPD(YMM(26), YMM(26), YMM(26)) VXORPD(YMM(27), YMM(27), YMM(27)) VXORPD(YMM(28), YMM(28), YMM(28)) VXORPD(YMM(29), YMM(29), YMM(29)) VXORPD(YMM(30), YMM(30), YMM(30)) VXORPD(YMM(31), YMM(31), YMM(31)) MOV(RSI, VAR(k)) //loop index MOV(RAX, VAR(a)) //load address of a MOV(RBX, VAR(b)) //load address of b MOV(RCX, VAR(c)) //load address of c LEA(RDX, MEM(RSI,RSI,2)) LEA(RDX, MEM(,RDX,4)) LEA(RDX, MEM(RDX,RSI,2)) // 14*k LEA(RDX, MEM(RBX,RDX,8,-128)) // b_next LEA(R9, MEM(RCX,63)) // c for prefetching VMOVAPD(ZMM(0), MEM(RAX, 0*8)) //pre-load a VMOVAPD(ZMM(1), MEM(RAX, 8*8)) //pre-load a LEA(RAX, MEM(RAX,16*8)) //adjust a for pre-load MOV(R12, VAR(rs_c)) MOV(R10, VAR(cs_c)) MOV(RDI, RSI) AND(RSI, IMM(3)) SAR(RDI, IMM(2)) SUB(RDI, IMM(14+TAIL_NITER)) JLE(K_LE_80) LOOP_ALIGN LABEL(LOOP1) SUBITER(0) PREFETCH(1, MEM(RDX)) SUBITER(1) SUB(RDI, IMM(1)) SUBITER(2) PREFETCH(1, MEM(RDX,64)) SUBITER(3) LEA(RAX, MEM(RAX,4*16*8)) LEA(RBX, MEM(RBX,4*14*8)) LEA(RDX, MEM(RDX,16*8)) JNZ(LOOP1) LABEL(K_LE_80) ADD(RDI, IMM(14)) JLE(K_LE_24) LOOP_ALIGN LABEL(LOOP2) PREFETCH(0, MEM(R9)) SUBITER(0) PREFETCH(1, MEM(RDX)) SUBITER(1) PREFETCH(0, MEM(R9,64)) SUB(RDI, IMM(1)) SUBITER(2) PREFETCH(1, MEM(RDX,64)) SUBITER(3) LEA(RAX, MEM(RAX,4*16*8)) LEA(RBX, MEM(RBX,4*14*8)) LEA(RDX, MEM(RDX,16*8)) LEA(R9, MEM(R9,R10,1)) JNZ(LOOP2) LABEL(K_LE_24) ADD(RDI, IMM(0+TAIL_NITER)) JLE(TAIL) LOOP_ALIGN LABEL(LOOP3) SUBITER(0) PREFETCH(1, MEM(RDX)) SUBITER(1) SUB(RDI, IMM(1)) SUBITER(2) PREFETCH(1, MEM(RDX,64)) SUBITER(3) LEA(RAX, MEM(RAX,4*16*8)) LEA(RBX, MEM(RBX,4*14*8)) LEA(RDX, MEM(RDX,16*8)) JNZ(LOOP3) LABEL(TAIL) TEST(RSI, RSI) JZ(POSTACCUM) LOOP_ALIGN LABEL(TAIL_LOOP) SUB(RSI, IMM(1)) SUBITER(0) LEA(RAX, MEM(RAX,16*8)) LEA(RBX, MEM(RBX,14*8)) JNZ(TAIL_LOOP) LABEL(POSTACCUM) MOV(RAX, VAR(alpha)) MOV(RBX, VAR(beta)) VBROADCASTSD(ZMM(0), MEM(RAX)) VBROADCASTSD(ZMM(1), MEM(RBX)) VXORPD(YMM(2), YMM(2), YMM(2)) MOV(RAX, R12) MOV(RBX, R10) // Check if C is column stride. CMP(RAX, IMM(8)) JNE(SCATTEREDUPDATE) VCOMISD(XMM(1), XMM(2)) JE(COLSTORBZ) UPDATE_C( 4, 5) UPDATE_C( 6, 7) UPDATE_C( 8, 9) UPDATE_C(10,11) UPDATE_C(12,13) UPDATE_C(14,15) UPDATE_C(16,17) UPDATE_C(18,19) UPDATE_C(20,21) UPDATE_C(22,23) UPDATE_C(24,25) UPDATE_C(26,27) UPDATE_C(28,29) UPDATE_C(30,31) JMP(END) LABEL(COLSTORBZ) UPDATE_C_BZ( 4, 5) UPDATE_C_BZ( 6, 7) UPDATE_C_BZ( 8, 9) UPDATE_C_BZ(10,11) UPDATE_C_BZ(12,13) UPDATE_C_BZ(14,15) UPDATE_C_BZ(16,17) UPDATE_C_BZ(18,19) UPDATE_C_BZ(20,21) UPDATE_C_BZ(22,23) UPDATE_C_BZ(24,25) UPDATE_C_BZ(26,27) UPDATE_C_BZ(28,29) UPDATE_C_BZ(30,31) JMP(END) LABEL(SCATTEREDUPDATE) VMULPD(ZMM( 4), ZMM( 4), ZMM(0)) VMULPD(ZMM( 5), ZMM( 5), ZMM(0)) VMULPD(ZMM( 6), ZMM( 6), ZMM(0)) VMULPD(ZMM( 7), ZMM( 7), ZMM(0)) VMULPD(ZMM( 8), ZMM( 8), ZMM(0)) VMULPD(ZMM( 9), ZMM( 9), ZMM(0)) VMULPD(ZMM(10), ZMM(10), ZMM(0)) VMULPD(ZMM(11), ZMM(11), ZMM(0)) VMULPD(ZMM(12), ZMM(12), ZMM(0)) VMULPD(ZMM(13), ZMM(13), ZMM(0)) VMULPD(ZMM(14), ZMM(14), ZMM(0)) VMULPD(ZMM(15), ZMM(15), ZMM(0)) VMULPD(ZMM(16), ZMM(16), ZMM(0)) VMULPD(ZMM(17), ZMM(17), ZMM(0)) VMULPD(ZMM(18), ZMM(18), ZMM(0)) VMULPD(ZMM(19), ZMM(19), ZMM(0)) VMULPD(ZMM(20), ZMM(20), ZMM(0)) VMULPD(ZMM(21), ZMM(21), ZMM(0)) VMULPD(ZMM(22), ZMM(22), ZMM(0)) VMULPD(ZMM(23), ZMM(23), ZMM(0)) VMULPD(ZMM(24), ZMM(24), ZMM(0)) VMULPD(ZMM(25), ZMM(25), ZMM(0)) VMULPD(ZMM(26), ZMM(26), ZMM(0)) VMULPD(ZMM(27), ZMM(27), ZMM(0)) VMULPD(ZMM(28), ZMM(28), ZMM(0)) VMULPD(ZMM(29), ZMM(29), ZMM(0)) VMULPD(ZMM(30), ZMM(30), ZMM(0)) VMULPD(ZMM(31), ZMM(31), ZMM(0)) VCOMISD(XMM(1), XMM(2)) MOV(RDI, VAR(offsetPtr)) VPBROADCASTQ(ZMM(0), RAX) VPMULLQ(ZMM(2), ZMM(0), MEM(RDI)) VPMULLQ(ZMM(3), ZMM(0), MEM(RDI,64)) JE(SCATTERBZ) UPDATE_C_COL_SCATTERED( 4, 5) UPDATE_C_COL_SCATTERED( 6, 7) UPDATE_C_COL_SCATTERED( 8, 9) UPDATE_C_COL_SCATTERED(10,11) UPDATE_C_COL_SCATTERED(12,13) UPDATE_C_COL_SCATTERED(14,15) UPDATE_C_COL_SCATTERED(16,17) UPDATE_C_COL_SCATTERED(18,19) UPDATE_C_COL_SCATTERED(20,21) UPDATE_C_COL_SCATTERED(22,23) UPDATE_C_COL_SCATTERED(24,25) UPDATE_C_COL_SCATTERED(26,27) UPDATE_C_COL_SCATTERED(28,29) UPDATE_C_COL_SCATTERED(30,31) JMP(END) LABEL(SCATTERBZ) UPDATE_C_BZ_COL_SCATTERED( 4, 5) UPDATE_C_BZ_COL_SCATTERED( 6, 7) UPDATE_C_BZ_COL_SCATTERED( 8, 9) UPDATE_C_BZ_COL_SCATTERED(10,11) UPDATE_C_BZ_COL_SCATTERED(12,13) UPDATE_C_BZ_COL_SCATTERED(14,15) UPDATE_C_BZ_COL_SCATTERED(16,17) UPDATE_C_BZ_COL_SCATTERED(18,19) UPDATE_C_BZ_COL_SCATTERED(20,21) UPDATE_C_BZ_COL_SCATTERED(22,23) UPDATE_C_BZ_COL_SCATTERED(24,25) UPDATE_C_BZ_COL_SCATTERED(26,27) UPDATE_C_BZ_COL_SCATTERED(28,29) UPDATE_C_BZ_COL_SCATTERED(30,31) LABEL(END) VZEROUPPER() END_ASM ( : // output operands : // input operands [k] "m" (k), [a] "m" (a), [b] "m" (b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c), [offsetPtr] "m" (offsetPtr) : // register clobber list "rax", "rbx", "rcx", "rdx", "rdi", "rsi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "zmm0", "zmm1", "zmm2", "zmm3", "zmm4", "zmm5", "zmm6", "zmm7", "zmm8", "zmm9", "zmm10", "zmm11", "zmm12", "zmm13", "zmm14", "zmm15", "zmm16", "zmm17", "zmm18", "zmm19", "zmm20", "zmm21", "zmm22", "zmm23", "zmm24", "zmm25", "zmm26", "zmm27", "zmm28", "zmm29", "zmm30", "zmm31", "memory" ) } blis-0.6.1/kernels/skx/3/bli_sgemm_skx_asm_32x12_l2.c000066400000000000000000000431601360743507500221620ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #define BLIS_ASM_SYNTAX_INTEL #include "bli_x86_asm_macros.h" #define CACHELINE_SIZE 64 //size of cache line in bytes #define A_L1_PREFETCH_DIST 4 //should be multiple of 2 /*The pointer of B is moved ahead by one iteration of k before the loop starts.Therefore, prefetching 3 k iterations ahead*/ #define B_L1_PREFETCH_DIST 4 #define TAIL_NITER 8 /* During each subiteration, prefetching 2 cache lines of B * UNROLL factor ahead. 2cache lines = 32 floats (NR). * */ #define PREFETCH_A_L1(n, k) \ PREFETCH(0, MEM(RAX, A_L1_PREFETCH_DIST*32*4 + (2*n+k) * CACHELINE_SIZE)) #define LOOP_ALIGN ALIGN16 #define UPDATE_C(R1,R2,R3,R4) \ \ VMULPS(ZMM(R1), ZMM(R1), ZMM(0)) \ VMULPS(ZMM(R2), ZMM(R2), ZMM(0)) \ VMULPS(ZMM(R3), ZMM(R3), ZMM(0)) \ VMULPS(ZMM(R4), ZMM(R4), ZMM(0)) \ VFMADD231PS(ZMM(R1), ZMM(1), MEM(RCX,0*64)) \ VFMADD231PS(ZMM(R2), ZMM(1), MEM(RCX,1*64)) \ VFMADD231PS(ZMM(R3), ZMM(1), MEM(RCX,RAX,1,0*64)) \ VFMADD231PS(ZMM(R4), ZMM(1), MEM(RCX,RAX,1,1*64)) \ VMOVUPS(MEM(RCX,0*64), ZMM(R1)) \ VMOVUPS(MEM(RCX,1*64), ZMM(R2)) \ VMOVUPS(MEM(RCX,RAX,1,0*64), ZMM(R3)) \ VMOVUPS(MEM(RCX,RAX,1,1*64), ZMM(R4)) \ LEA(RCX, MEM(RCX,RAX,2)) #define UPDATE_C_BZ(R1,R2,R3,R4) \ \ VMULPS(ZMM(R1), ZMM(R1), ZMM(0)) \ VMULPS(ZMM(R2), ZMM(R2), ZMM(0)) \ VMULPS(ZMM(R3), ZMM(R3), ZMM(0)) \ VMULPS(ZMM(R4), ZMM(R4), ZMM(0)) \ VMOVUPS(MEM(RCX,0*64), ZMM(R1)) \ VMOVUPS(MEM(RCX,1*64), ZMM(R2)) \ VMOVUPS(MEM(RCX,RAX,1,0*64), ZMM(R3)) \ VMOVUPS(MEM(RCX,RAX,1,1*64), ZMM(R4)) \ LEA(RCX, MEM(RCX,RAX,2)) #define UPDATE_C_ROW_SCATTERED(R1,R2,R3,R4) \ \ KXNORW(K(1), K(0), K(0)) \ KXNORW(K(2), K(0), K(0)) \ KXNORW(K(3), K(0), K(0)) \ KXNORW(K(4), K(0), K(0)) \ VMULPS(ZMM(R1), ZMM(R1), ZMM(0)) \ VEXTRACTF64X4(YMM(5), ZMM(R1), IMM(1)) \ VGATHERQPS(YMM(6) MASK_K(1), MEM(RCX,ZMM(2),1)) \ VGATHERQPS(YMM(7) MASK_K(2), MEM(RCX,ZMM(3),1)) \ VFMADD231PS(YMM(R1), YMM(6), YMM(1)) \ VFMADD231PS(YMM( 5), YMM(7), YMM(1)) \ VSCATTERQPS(MEM(RCX,ZMM(2),1) MASK_K(3), YMM(R1)) \ VSCATTERQPS(MEM(RCX,ZMM(3),1) MASK_K(4), YMM( 5)) \ \ KXNORW(K(1), K(0), K(0)) \ KXNORW(K(2), K(0), K(0)) \ KXNORW(K(3), K(0), K(0)) \ KXNORW(K(4), K(0), K(0)) \ VMULPS(ZMM(R2), ZMM(R2), ZMM(0)) \ VEXTRACTF64X4(YMM(5), ZMM(R2), IMM(1)) \ VGATHERQPS(YMM(6) MASK_K(1), MEM(RDX,ZMM(2),1)) \ VGATHERQPS(YMM(7) MASK_K(2), MEM(RDX,ZMM(3),1)) \ VFMADD231PS(YMM(R2), YMM(6), YMM(1)) \ VFMADD231PS(YMM( 5), YMM(7), YMM(1)) \ VSCATTERQPS(MEM(RDX,ZMM(2),1) MASK_K(3), YMM(R2)) \ VSCATTERQPS(MEM(RDX,ZMM(3),1) MASK_K(4), YMM( 5)) \ \ LEA(RCX, MEM(RCX,RAX,1)) \ LEA(RDX, MEM(RDX,RAX,1)) \ \ KXNORW(K(1), K(0), K(0)) \ KXNORW(K(2), K(0), K(0)) \ KXNORW(K(3), K(0), K(0)) \ KXNORW(K(4), K(0), K(0)) \ VMULPS(ZMM(R3), ZMM(R3), ZMM(0)) \ VEXTRACTF64X4(YMM(5), ZMM(R3), IMM(1)) \ VGATHERQPS(YMM(6) MASK_K(1), MEM(RCX,ZMM(2),1)) \ VGATHERQPS(YMM(7) MASK_K(2), MEM(RCX,ZMM(3),1)) \ VFMADD231PS(YMM(R3), YMM(6), YMM(1)) \ VFMADD231PS(YMM( 5), YMM(7), YMM(1)) \ VSCATTERQPS(MEM(RCX,ZMM(2),1) MASK_K(3), YMM(R3)) \ VSCATTERQPS(MEM(RCX,ZMM(3),1) MASK_K(4), YMM( 5)) \ \ KXNORW(K(1), K(0), K(0)) \ KXNORW(K(2), K(0), K(0)) \ KXNORW(K(3), K(0), K(0)) \ KXNORW(K(4), K(0), K(0)) \ VMULPS(ZMM(R4), ZMM(R4), ZMM(0)) \ VEXTRACTF64X4(YMM(5), ZMM(R4), IMM(1)) \ VGATHERQPS(YMM(6) MASK_K(1), MEM(RDX,ZMM(2),1)) \ VGATHERQPS(YMM(7) MASK_K(2), MEM(RDX,ZMM(3),1)) \ VFMADD231PS(YMM(R4), YMM(6), YMM(1)) \ VFMADD231PS(YMM( 5), YMM(7), YMM(1)) \ VSCATTERQPS(MEM(RDX,ZMM(2),1) MASK_K(3), YMM(R4)) \ VSCATTERQPS(MEM(RDX,ZMM(3),1) MASK_K(4), YMM( 5)) \ \ LEA(RCX, MEM(RCX,RAX,1)) \ LEA(RDX, MEM(RDX,RAX,1)) #define UPDATE_C_BZ_ROW_SCATTERED(R1,R2,R3,R4) \ \ KXNORW(K(1), K(0), K(0)) \ KXNORW(K(2), K(0), K(0)) \ VMULPS(ZMM(R1), ZMM(R1), ZMM(0)) \ VEXTRACTF64X4(YMM(5), ZMM(R1), IMM(1)) \ VSCATTERQPS(MEM(RCX,ZMM(2),1) MASK_K(1), YMM(R1)) \ VSCATTERQPS(MEM(RCX,ZMM(3),1) MASK_K(2), YMM( 5)) \ \ KXNORW(K(1), K(0), K(0)) \ KXNORW(K(2), K(0), K(0)) \ VMULPS(ZMM(R2), ZMM(R2), ZMM(0)) \ VEXTRACTF64X4(YMM(5), ZMM(R2), IMM(1)) \ VSCATTERQPS(MEM(RDX,ZMM(2),1) MASK_K(1), YMM(R2)) \ VSCATTERQPS(MEM(RDX,ZMM(3),1) MASK_K(2), YMM( 5)) \ \ LEA(RCX, MEM(RCX,RAX,1)) \ LEA(RDX, MEM(RDX,RAX,1)) \ \ KXNORW(K(1), K(0), K(0)) \ KXNORW(K(2), K(0), K(0)) \ VMULPS(ZMM(R3), ZMM(R3), ZMM(0)) \ VEXTRACTF64X4(YMM(5), ZMM(R3), IMM(1)) \ VSCATTERQPS(MEM(RCX,ZMM(2),1) MASK_K(1), YMM(R3)) \ VSCATTERQPS(MEM(RCX,ZMM(3),1) MASK_K(2), YMM( 5)) \ \ KXNORW(K(1), K(0), K(0)) \ KXNORW(K(2), K(0), K(0)) \ VMULPS(ZMM(R4), ZMM(R4), ZMM(0)) \ VEXTRACTF64X4(YMM(5), ZMM(R4), IMM(1)) \ VSCATTERQPS(MEM(RDX,ZMM(2),1) MASK_K(1), YMM(R4)) \ VSCATTERQPS(MEM(RDX,ZMM(3),1) MASK_K(2), YMM( 5)) \ \ LEA(RCX, MEM(RCX,RAX,1)) \ LEA(RDX, MEM(RDX,RAX,1)) #ifdef PREFETCH_C_L2 #undef PREFETCH_C_L2 #define PREFETCH_C_L2 \ \ PREFETCH(1, MEM(RCX, 0*64)) \ PREFETCH(1, MEM(RCX, 1*64)) \ \ PREFETCH(1, MEM(RCX,R12,1,0*64)) \ PREFETCH(1, MEM(RCX,R12,1,1*64)) \ \ PREFETCH(1, MEM(RCX,R12,2,0*64)) \ PREFETCH(1, MEM(RCX,R12,2,1*64)) \ \ PREFETCH(1, MEM(RCX,R13,1,0*64)) \ PREFETCH(1, MEM(RCX,R13,1,1*64)) \ \ PREFETCH(1, MEM(RCX,R12,4,0*64)) \ PREFETCH(1, MEM(RCX,R12,4,1*64)) \ \ PREFETCH(1, MEM(RCX,R14,1,0*64)) \ PREFETCH(1, MEM(RCX,R14,1,1*64)) \ \ PREFETCH(1, MEM(RCX,R13,2,0*64)) \ PREFETCH(1, MEM(RCX,R13,2,1*64)) \ \ PREFETCH(1, MEM(RCX,R15,1,0*64)) \ PREFETCH(1, MEM(RCX,R15,1,1*64)) \ \ PREFETCH(1, MEM(RDX, 0*64)) \ PREFETCH(1, MEM(RDX, 1*64)) \ \ PREFETCH(1, MEM(RDX,R12,1,0*64)) \ PREFETCH(1, MEM(RDX,R12,1,1*64)) \ \ PREFETCH(1, MEM(RDX,R12,2,0*64)) \ PREFETCH(1, MEM(RDX,R12,2,1*64)) \ \ PREFETCH(1, MEM(RDX,R13,1,0*64)) \ PREFETCH(1, MEM(RDX,R13,1,1*64)) #else #undef PREFETCH_C_L2 #define PREFETCH_C_L2 #endif #define PREFETCH_C_L1 \ \ PREFETCHW0(MEM(RCX, 0*64)) \ PREFETCHW0(MEM(RCX, 1*64)) \ PREFETCHW0(MEM(RCX,R12,1,0*64)) \ PREFETCHW0(MEM(RCX,R12,1,1*64)) \ PREFETCHW0(MEM(RCX,R12,2,0*64)) \ PREFETCHW0(MEM(RCX,R12,2,1*64)) \ PREFETCHW0(MEM(RCX,R13,1,0*64)) \ PREFETCHW0(MEM(RCX,R13,1,1*64)) \ PREFETCHW0(MEM(RCX,R12,4,0*64)) \ PREFETCHW0(MEM(RCX,R12,4,1*64)) \ PREFETCHW0(MEM(RCX,R14,1,0*64)) \ PREFETCHW0(MEM(RCX,R14,1,1*64)) \ PREFETCHW0(MEM(RCX,R13,2,0*64)) \ PREFETCHW0(MEM(RCX,R13,2,1*64)) \ PREFETCHW0(MEM(RCX,R15,1,0*64)) \ PREFETCHW0(MEM(RCX,R15,1,1*64)) \ PREFETCHW0(MEM(RDX, 0*64)) \ PREFETCHW0(MEM(RDX, 1*64)) \ PREFETCHW0(MEM(RDX,R12,1,0*64)) \ PREFETCHW0(MEM(RDX,R12,1,1*64)) \ PREFETCHW0(MEM(RDX,R12,2,0*64)) \ PREFETCHW0(MEM(RDX,R12,2,1*64)) \ PREFETCHW0(MEM(RDX,R13,1,0*64)) \ PREFETCHW0(MEM(RDX,R13,1,1*64)) // // n: index in unrolled loop // // a: ZMM register to load into // b: ZMM register to read from // // ...: addressing for B, except for offset // #define SUBITER(n) \ \ PREFETCH_A_L1(n, 0) \ \ VBROADCASTSS(ZMM(3), MEM(RBX,(12*n+ 0)*4)) \ VBROADCASTSS(ZMM(4), MEM(RBX,(12*n+ 1)*4)) \ VFMADD231PS(ZMM( 8), ZMM(0), ZMM(3)) \ VFMADD231PS(ZMM( 9), ZMM(1), ZMM(3)) \ VFMADD231PS(ZMM(10), ZMM(0), ZMM(4)) \ VFMADD231PS(ZMM(11), ZMM(1), ZMM(4)) \ \ VBROADCASTSS(ZMM(3), MEM(RBX,(12*n+ 2)*4)) \ VBROADCASTSS(ZMM(4), MEM(RBX,(12*n+ 3)*4)) \ VFMADD231PS(ZMM(12), ZMM(0), ZMM(3)) \ VFMADD231PS(ZMM(13), ZMM(1), ZMM(3)) \ VFMADD231PS(ZMM(14), ZMM(0), ZMM(4)) \ VFMADD231PS(ZMM(15), ZMM(1), ZMM(4)) \ \ VBROADCASTSS(ZMM(3), MEM(RBX,(12*n+ 4)*4)) \ VBROADCASTSS(ZMM(4), MEM(RBX,(12*n+ 5)*4)) \ VFMADD231PS(ZMM(16), ZMM(0), ZMM(3)) \ VFMADD231PS(ZMM(17), ZMM(1), ZMM(3)) \ VFMADD231PS(ZMM(18), ZMM(0), ZMM(4)) \ VFMADD231PS(ZMM(19), ZMM(1), ZMM(4)) \ \ PREFETCH_A_L1(n, 1) \ \ VBROADCASTSS(ZMM(3), MEM(RBX,(12*n+ 6)*4)) \ VBROADCASTSS(ZMM(4), MEM(RBX,(12*n+ 7)*4)) \ VFMADD231PS(ZMM(20), ZMM(0), ZMM(3)) \ VFMADD231PS(ZMM(21), ZMM(1), ZMM(3)) \ VFMADD231PS(ZMM(22), ZMM(0), ZMM(4)) \ VFMADD231PS(ZMM(23), ZMM(1), ZMM(4)) \ \ VBROADCASTSS(ZMM(3), MEM(RBX,(12*n+ 8)*4)) \ VBROADCASTSS(ZMM(4), MEM(RBX,(12*n+ 9)*4)) \ VFMADD231PS(ZMM(24), ZMM(0), ZMM(3)) \ VFMADD231PS(ZMM(25), ZMM(1), ZMM(3)) \ VFMADD231PS(ZMM(26), ZMM(0), ZMM(4)) \ VFMADD231PS(ZMM(27), ZMM(1), ZMM(4)) \ \ VBROADCASTSS(ZMM(3), MEM(RBX,(12*n+10)*4)) \ VBROADCASTSS(ZMM(4), MEM(RBX,(12*n+11)*4)) \ VFMADD231PS(ZMM(28), ZMM(0), ZMM(3)) \ VFMADD231PS(ZMM(29), ZMM(1), ZMM(3)) \ VFMADD231PS(ZMM(30), ZMM(0), ZMM(4)) \ VFMADD231PS(ZMM(31), ZMM(1), ZMM(4)) \ \ VMOVAPD(ZMM(0), MEM(RAX,(32*n+0)*4)) \ VMOVAPD(ZMM(1), MEM(RAX,(32*n+16)*4)) //This is an array used for the scatter/gather instructions. static int64_t offsets[16] __attribute__((aligned(64))) = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15}; void bli_sgemm_skx_asm_32x12_l2( dim_t k_, float* restrict alpha, float* restrict a, float* restrict b, float* restrict beta, float* restrict c, inc_t rs_c_, inc_t cs_c_, auxinfo_t* data, cntx_t* restrict cntx ) { (void)data; (void)cntx; const int64_t* offsetPtr = &offsets[0]; const int64_t k = k_; const int64_t rs_c = rs_c_; const int64_t cs_c = cs_c_; BEGIN_ASM() VXORPD(YMM(8), YMM(8), YMM(8)) //clear out registers VMOVAPD(YMM( 7), YMM(8)) VMOVAPD(YMM( 9), YMM(8)) VMOVAPD(YMM(10), YMM(8)) MOV(RSI, VAR(k)) //loop index VMOVAPD(YMM(11), YMM(8)) MOV(RAX, VAR(a)) //load address of a VMOVAPD(YMM(12), YMM(8)) MOV(RBX, VAR(b)) //load address of b VMOVAPD(YMM(13), YMM(8)) MOV(RCX, VAR(c)) //load address of c VMOVAPD(YMM(14), YMM(8)) VMOVAPD(YMM(15), YMM(8)) VMOVAPD(ZMM(0), MEM(RAX, 0*4)) //pre-load a VMOVAPD(YMM(16), YMM(8)) VMOVAPD(ZMM(1), MEM(RAX, 16*4)) //pre-load a VMOVAPD(YMM(17), YMM(8)) VMOVAPD(YMM(18), YMM(8)) VMOVAPD(YMM(19), YMM(8)) MOV(R12, VAR(cs_c)) //cs_c VMOVAPD(YMM(20), YMM(8)) LEA(R13, MEM(R12,R12,2)) //*3 VMOVAPD(YMM(21), YMM(8)) LEA(R14, MEM(R12,R12,4)) //*5 VMOVAPD(YMM(22), YMM(8)) LEA(R15, MEM(R14,R12,2)) //*7 VMOVAPD(YMM(23), YMM(8)) LEA(RDX, MEM(RCX,R12,8)) //c + 8*cs_c VMOVAPD(YMM(24), YMM(8)) VMOVAPD(YMM(25), YMM(8)) MOV(R8, IMM(32*4)) //mr*sizeof(float) VMOVAPD(YMM(26), YMM(8)) MOV(R9, IMM(12*4)) //nr*sizeof(float) VMOVAPD(YMM(27), YMM(8)) VMOVAPD(YMM(28), YMM(8)) LEA(RAX, MEM(RAX,R8,1)) //adjust a for pre-load VMOVAPD(YMM(29), YMM(8)) VMOVAPD(YMM(30), YMM(8)) VMOVAPD(YMM(31), YMM(8)) TEST(RSI, RSI) JZ(POSTACCUM) #ifdef PREFETCH_A_BEFORE /* Prefetching 8 cachlines of A (4 iterations worth of data (32 (MR) x4 (sizeof(float)) x4 iter /64 = 8 cachelines) */ PREFETCH(0, MEM(RAX,0*64)) PREFETCH(0, MEM(RAX,1*64)) PREFETCH(0, MEM(RAX,2*64)) PREFETCH(0, MEM(RAX,3*64)) PREFETCH(0, MEM(RAX,4*64)) PREFETCH(0, MEM(RAX,5*64)) PREFETCH(0, MEM(RAX,6*64)) PREFETCH(0, MEM(RAX,7*64)) #endif #ifdef PREFETCH_B_BEFORE /* Prefetching 3 cachlines of B (4 iterations worth of data (12 (NR) x 4 (sizeof(float)) x 4 iter /64 = 3 cachelines) */ PREFETCH(0, MEM(RBX,0*64)) PREFETCH(0, MEM(RBX,1*64)) PREFETCH(0, MEM(RBX,2*64)) #endif PREFETCH_C_L2 MOV(RDI, RSI) AND(RSI, IMM(3)) SAR(RDI, IMM(2)) SUB(RDI, IMM(0+TAIL_NITER)) JLE(K_SMALL) LOOP_ALIGN LABEL(MAIN_LOOP) PREFETCH(0, MEM(RBX,B_L1_PREFETCH_DIST*12*4)) SUBITER(0) PREFETCH(0, MEM(RBX,B_L1_PREFETCH_DIST*12*4+64)) SUBITER(1) PREFETCH(0, MEM(RBX,B_L1_PREFETCH_DIST*12*4+128)) SUBITER(2) SUBITER(3) LEA(RAX, MEM(RAX,R8,4)) LEA(RBX, MEM(RBX,R9,4)) DEC(RDI) JNZ(MAIN_LOOP) LABEL(K_SMALL) PREFETCH_C_L1 ADD(RDI, IMM(0+TAIL_NITER)) JZ(TAIL_LOOP) LOOP_ALIGN LABEL(SMALL_LOOP) PREFETCH(0, MEM(RBX,B_L1_PREFETCH_DIST*12*4)) SUBITER(0) PREFETCH(0, MEM(RBX,B_L1_PREFETCH_DIST*12*4+64)) SUBITER(1) PREFETCH(0, MEM(RBX,B_L1_PREFETCH_DIST*12*4+128)) SUBITER(2) SUBITER(3) LEA(RAX, MEM(RAX,R8,4)) LEA(RBX, MEM(RBX,R9,4)) DEC(RDI) JNZ(SMALL_LOOP) TEST(RSI, RSI) JZ(POSTACCUM) LOOP_ALIGN LABEL(TAIL_LOOP) PREFETCH(0, MEM(RBX,B_L1_PREFETCH_DIST*12*4)) SUBITER(0) ADD(RAX, R8) ADD(RBX, R9) DEC(RSI) JNZ(TAIL_LOOP) LABEL(POSTACCUM) #ifdef PREFETCH_A_AFTER MOV(R8, VAR(a)) PREFETCH(0, MEM(R8,0*64)) PREFETCH(0, MEM(R8,1*64)) PREFETCH(0, MEM(R8,2*64)) PREFETCH(0, MEM(R8,3*64)) PREFETCH(0, MEM(R8,4*64)) PREFETCH(0, MEM(R8,5*64)) PREFETCH(0, MEM(R8,6*64)) PREFETCH(0, MEM(R8,7*64)) #endif #ifdef PREFETCH_B_AFTER MOV(R9, VAR(b)) PREFETCH(0, MEM(R9,0*64)) PREFETCH(0, MEM(R9,1*64)) PREFETCH(0, MEM(R9,2*64)) #endif MOV(RAX, VAR(alpha)) MOV(RBX, VAR(beta)) VBROADCASTSS(ZMM(0), MEM(RAX)) VBROADCASTSS(ZMM(1), MEM(RBX)) MOV(RAX, VAR(cs_c)) LEA(RAX, MEM(,RAX,4)) MOV(RBX, VAR(rs_c)) LEA(RBX, MEM(,RBX,4)) // Check if C is column major (rs_c = 1). If not, jump to the slow scattered update CMP(RBX, IMM(4)) JNE(SCATTEREDUPDATE) VCOMISS(XMM(1), XMM(7)) JE(COLSTORBZ) UPDATE_C( 8, 9,10,11) UPDATE_C(12,13,14,15) UPDATE_C(16,17,18,19) UPDATE_C(20,21,22,23) UPDATE_C(24,25,26,27) UPDATE_C(28,29,30,31) JMP(END) LABEL(COLSTORBZ) UPDATE_C_BZ( 8, 9,10,11) UPDATE_C_BZ(12,13,14,15) UPDATE_C_BZ(16,17,18,19) UPDATE_C_BZ(20,21,22,23) UPDATE_C_BZ(24,25,26,27) UPDATE_C_BZ(28,29,30,31) JMP(END) LABEL(SCATTEREDUPDATE) LEA(RDX, MEM(RCX,RBX,8)) LEA(RDX, MEM(RDX,RBX,8)) MOV(RDI, VAR(offsetPtr)) VMOVDQA64(ZMM(2), MEM(RDI,0*64)) VMOVDQA64(ZMM(3), MEM(RDI,1*64)) VPBROADCASTQ(ZMM(6), RBX) VPMULLQ(ZMM(2), ZMM(6), ZMM(2)) VPMULLQ(ZMM(3), ZMM(6), ZMM(3)) VCOMISS(XMM(1), XMM(7)) JE(SCATTERBZ) UPDATE_C_ROW_SCATTERED( 8, 9,10,11) UPDATE_C_ROW_SCATTERED(12,13,14,15) UPDATE_C_ROW_SCATTERED(16,17,18,19) UPDATE_C_ROW_SCATTERED(20,21,22,23) UPDATE_C_ROW_SCATTERED(24,25,26,27) UPDATE_C_ROW_SCATTERED(28,29,30,31) JMP(END) LABEL(SCATTERBZ) UPDATE_C_BZ_ROW_SCATTERED( 8, 9,10,11) UPDATE_C_BZ_ROW_SCATTERED(12,13,14,15) UPDATE_C_BZ_ROW_SCATTERED(16,17,18,19) UPDATE_C_BZ_ROW_SCATTERED(20,21,22,23) UPDATE_C_BZ_ROW_SCATTERED(24,25,26,27) UPDATE_C_BZ_ROW_SCATTERED(28,29,30,31) LABEL(END) VZEROUPPER() END_ASM( : // output operands : // input operands [k] "m" (k), [a] "m" (a), [b] "m" (b), [alpha] "m" (alpha), [beta] "m" (beta), [c] "m" (c), [rs_c] "m" (rs_c), [cs_c] "m" (cs_c), [offsetPtr] "m" (offsetPtr) : // register clobber list "rax", "rbx", "rcx", "rdx", "rdi", "rsi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "zmm0", "zmm1", "zmm2", "zmm3", "zmm4", "zmm5", "zmm6", "zmm7", "zmm8", "zmm9", "zmm10", "zmm11", "zmm12", "zmm13", "zmm14", "zmm15", "zmm16", "zmm17", "zmm18", "zmm19", "zmm20", "zmm21", "zmm22", "zmm23", "zmm24", "zmm25", "zmm26", "zmm27", "zmm28", "zmm29", "zmm30", "zmm31", "memory" ) } blis-0.6.1/kernels/skx/bli_kernels_skx.h000066400000000000000000000035311360743507500202620ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ GEMM_UKR_PROT( float , s, gemm_skx_asm_32x12_l2 ) GEMM_UKR_PROT( float , s, gemm_skx_asm_12x32_l2 ) GEMM_UKR_PROT( double, d, gemm_skx_asm_16x12_l2 ) GEMM_UKR_PROT( double, d, gemm_skx_asm_16x14 ) blis-0.6.1/kernels/zen/000077500000000000000000000000001360743507500147205ustar00rootroot00000000000000blis-0.6.1/kernels/zen/1/000077500000000000000000000000001360743507500150605ustar00rootroot00000000000000blis-0.6.1/kernels/zen/1/bli_amaxv_zen_int.c000066400000000000000000000346131360743507500207230ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2016 - 2018 - 2019, Advanced Micro Devices, Inc. Copyright (C) 2018, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "immintrin.h" #include "blis.h" /* Union data structure to access AVX registers One 256-bit AVX register holds 8 SP elements. */ typedef union { __m256 v; float f[8] __attribute__((aligned(64))); } v8sf_t; typedef union { __m128 v; float f[4]; } v4sf_t; /* Union data structure to access AVX registers One 256-bit AVX register holds 4 DP elements. */ typedef union { __m256d v; double d[4] __attribute__((aligned(64))); }v4df_t; typedef union { __m128d v; double d[2]; }v2dd_t; // ----------------------------------------------------------------------------- void bli_samaxv_zen_int ( dim_t n, float* restrict x, inc_t incx, dim_t* restrict i_max, cntx_t* restrict cntx ) { float* minus_one = PASTEMAC(s,m1); dim_t* zero_i = PASTEMAC(i,0); float chi1_r; //float chi1_i; float abs_chi1; float abs_chi1_max; dim_t i_max_l; dim_t i; /* If the vector length is zero, return early. This directly emulates the behavior of netlib BLAS's i?amax() routines. */ if ( bli_zero_dim1( n ) ) { PASTEMAC(i,copys)( *zero_i, *i_max ); return; } /* Initialize the index of the maximum absolute value to zero. */ PASTEMAC(i,copys)( *zero_i, i_max_l ); /* Initialize the maximum absolute value search candidate with -1, which is guaranteed to be less than all values we will compute. */ PASTEMAC(s,copys)( *minus_one, abs_chi1_max ); // For non-unit strides, or very small vector lengths, compute with // scalar code. if ( incx != 1 || n < 8 ) { for ( i = 0; i < n; ++i ) { float* chi1 = x + (i )*incx; /* Get the real and imaginary components of chi1. */ chi1_r = *chi1; /* Replace chi1_r and chi1_i with their absolute values. */ chi1_r = fabsf( chi1_r ); /* Add the real and imaginary absolute values together. */ abs_chi1 = chi1_r; /* If the absolute value of the current element exceeds that of the previous largest, save it and its index. If NaN is encountered, then treat it the same as if it were a valid value that was smaller than any previously seen. This behavior mimics that of LAPACK's ?lange(). */ if ( abs_chi1_max < abs_chi1 || isnan( abs_chi1 ) ) { abs_chi1_max = abs_chi1; i_max_l = i; } } } else { dim_t n_iter, n_left; dim_t num_vec_elements = 8; v8sf_t x_vec, max_vec, maxInx_vec, mask_vec; v8sf_t idx_vec, inc_vec; v8sf_t sign_mask; v4sf_t max_vec_lo, max_vec_hi, mask_vec_lo; v4sf_t maxInx_vec_lo, maxInx_vec_hi; n_iter = n / num_vec_elements; n_left = n % num_vec_elements; idx_vec.v = _mm256_set_ps( 7, 6, 5, 4, 3, 2, 1, 0 ); inc_vec.v = _mm256_set1_ps( 8 ); max_vec.v = _mm256_set1_ps( -1 ); maxInx_vec.v = _mm256_setzero_ps(); sign_mask.v = _mm256_set1_ps( -0.f ); for ( i = 0; i < n_iter; ++i ) { x_vec.v = _mm256_loadu_ps( x ); // Get the absolute value of the vector element. x_vec.v = _mm256_andnot_ps( sign_mask.v, x_vec.v ); mask_vec.v = _mm256_cmp_ps( x_vec.v, max_vec.v, _CMP_GT_OS ); max_vec.v = _mm256_blendv_ps( max_vec.v, x_vec.v, mask_vec.v ); maxInx_vec.v = _mm256_blendv_ps( maxInx_vec.v, idx_vec.v, mask_vec.v ); idx_vec.v += inc_vec.v; x += num_vec_elements; } max_vec_lo.v = _mm256_extractf128_ps( max_vec.v, 0 ); max_vec_hi.v = _mm256_extractf128_ps( max_vec.v, 1 ); mask_vec_lo.v = _mm_cmp_ps( max_vec_hi.v, max_vec_lo.v, _CMP_GT_OS ); max_vec_lo.v = _mm_blendv_ps( max_vec_lo.v, max_vec_hi.v, mask_vec_lo.v ); maxInx_vec_lo.v = _mm256_extractf128_ps( maxInx_vec.v, 0 ); maxInx_vec_hi.v = _mm256_extractf128_ps( maxInx_vec.v, 1 ); maxInx_vec_lo.v = _mm_blendv_ps( maxInx_vec_lo.v, maxInx_vec_hi.v, mask_vec_lo.v ); max_vec_hi.v = _mm_permute_ps( max_vec_lo.v, 14 ); maxInx_vec_hi.v = _mm_permute_ps( maxInx_vec_lo.v, 14 ); mask_vec_lo.v = _mm_cmp_ps( max_vec_hi.v, max_vec_lo.v, _CMP_GT_OS ); max_vec_lo.v = _mm_blendv_ps( max_vec_lo.v, max_vec_hi.v, mask_vec_lo.v ); maxInx_vec_lo.v = _mm_blendv_ps( maxInx_vec_lo.v, maxInx_vec_hi.v, mask_vec_lo.v ); if ( max_vec_lo.f[0] > max_vec_lo.f[1] ) { abs_chi1_max = max_vec_lo.f[0]; i_max_l = maxInx_vec_lo.f[0]; } else { abs_chi1_max = max_vec_lo.f[1]; i_max_l = maxInx_vec_lo.f[1]; } for ( i = n - n_left; i < n; i++ ) { float* chi1 = x; /* Get the real and imaginary components of chi1. */ chi1_r = *chi1; /* Replace chi1_r and chi1_i with their absolute values. */ abs_chi1 = fabsf( chi1_r ); /* If the absolute value of the current element exceeds that of the previous largest, save it and its index. If NaN is encountered, then treat it the same as if it were a valid value that was smaller than any previously seen. This behavior mimics that of LAPACK's ?lange(). */ if ( abs_chi1_max < abs_chi1 || isnan( abs_chi1 ) ) { abs_chi1_max = abs_chi1; i_max_l = i; } x += 1; } } // Issue vzeroupper instruction to clear upper lanes of ymm registers. // This avoids a performance penalty caused by false dependencies when // transitioning from from AVX to SSE instructions (which may occur // later, especially if BLIS is compiled with -mfpmath=sse). _mm256_zeroupper(); /* Store final index to output variable. */ *i_max = i_max_l; } // ----------------------------------------------------------------------------- void bli_damaxv_zen_int ( dim_t n, double* restrict x, inc_t incx, dim_t* restrict i_max, cntx_t* restrict cntx ) { double* minus_one = PASTEMAC(d,m1); dim_t* zero_i = PASTEMAC(i,0); double chi1_r; //double chi1_i; double abs_chi1; double abs_chi1_max; dim_t i_max_l; dim_t i; /* If the vector length is zero, return early. This directly emulates the behavior of netlib BLAS's i?amax() routines. */ if ( bli_zero_dim1( n ) ) { PASTEMAC(i,copys)( *zero_i, *i_max ); return; } /* Initialize the index of the maximum absolute value to zero. */ \ PASTEMAC(i,copys)( *zero_i, i_max_l ); /* Initialize the maximum absolute value search candidate with -1, which is guaranteed to be less than all values we will compute. */ PASTEMAC(d,copys)( *minus_one, abs_chi1_max ); // For non-unit strides, or very small vector lengths, compute with // scalar code. if ( incx != 1 || n < 4 ) { for ( i = 0; i < n; ++i ) { double* chi1 = x + (i )*incx; /* Get the real and imaginary components of chi1. */ chi1_r = *chi1; /* Replace chi1_r and chi1_i with their absolute values. */ chi1_r = fabs( chi1_r ); /* Add the real and imaginary absolute values together. */ abs_chi1 = chi1_r; /* If the absolute value of the current element exceeds that of the previous largest, save it and its index. If NaN is encountered, then treat it the same as if it were a valid value that was smaller than any previously seen. This behavior mimics that of LAPACK's ?lange(). */ if ( abs_chi1_max < abs_chi1 || isnan( abs_chi1 ) ) { abs_chi1_max = abs_chi1; i_max_l = i; } } } else { dim_t n_iter, n_left; dim_t num_vec_elements = 4; v4df_t x_vec, max_vec, maxInx_vec, mask_vec; v4df_t idx_vec, inc_vec; v4df_t sign_mask; v2dd_t max_vec_lo, max_vec_hi, mask_vec_lo; v2dd_t maxInx_vec_lo, maxInx_vec_hi; n_iter = n / num_vec_elements; n_left = n % num_vec_elements; idx_vec.v = _mm256_set_pd( 3, 2, 1, 0 ); inc_vec.v = _mm256_set1_pd( 4 ); max_vec.v = _mm256_set1_pd( -1 ); maxInx_vec.v = _mm256_setzero_pd(); sign_mask.v = _mm256_set1_pd( -0.f ); for ( i = 0; i < n_iter; ++i ) { x_vec.v = _mm256_loadu_pd( x ); // Get the absolute value of the vector element. x_vec.v = _mm256_andnot_pd( sign_mask.v, x_vec.v ); mask_vec.v = _mm256_cmp_pd( x_vec.v, max_vec.v, _CMP_GT_OS ); max_vec.v = _mm256_blendv_pd( max_vec.v, x_vec.v, mask_vec.v ); maxInx_vec.v = _mm256_blendv_pd( maxInx_vec.v, idx_vec.v, mask_vec.v ); idx_vec.v += inc_vec.v; x += num_vec_elements; } max_vec_lo.v = _mm256_extractf128_pd( max_vec.v, 0 ); max_vec_hi.v = _mm256_extractf128_pd( max_vec.v, 1 ); mask_vec_lo.v = _mm_cmp_pd( max_vec_hi.v, max_vec_lo.v, _CMP_GT_OS ); max_vec_lo.v = _mm_blendv_pd( max_vec_lo.v, max_vec_hi.v, mask_vec_lo.v ); maxInx_vec_lo.v = _mm256_extractf128_pd( maxInx_vec.v, 0 ); maxInx_vec_hi.v = _mm256_extractf128_pd( maxInx_vec.v, 1 ); maxInx_vec_lo.v = _mm_blendv_pd( maxInx_vec_lo.v, maxInx_vec_hi.v, mask_vec_lo.v ); if ( max_vec_lo.d[0] > max_vec_lo.d[1] ) { abs_chi1_max = max_vec_lo.d[0]; i_max_l = maxInx_vec_lo.d[0]; } else { abs_chi1_max = max_vec_lo.d[1]; i_max_l = maxInx_vec_lo.d[1]; } for ( i = n - n_left; i < n; i++ ) { double* chi1 = x; /* Get the real and imaginary components of chi1. */ chi1_r = *chi1; /* Replace chi1_r and chi1_i with their absolute values. */ abs_chi1 = fabs( chi1_r ); /* If the absolute value of the current element exceeds that of the previous largest, save it and its index. If NaN is encountered, then treat it the same as if it were a valid value that was smaller than any previously seen. This behavior mimics that of LAPACK's ?lange(). */ if ( abs_chi1_max < abs_chi1 || isnan( abs_chi1 ) ) { abs_chi1_max = abs_chi1; i_max_l = i; } x += 1; } } // Issue vzeroupper instruction to clear upper lanes of ymm registers. // This avoids a performance penalty caused by false dependencies when // transitioning from from AVX to SSE instructions (which may occur // later, especially if BLIS is compiled with -mfpmath=sse). _mm256_zeroupper(); /* Store final index to output variable. */ *i_max = i_max_l; } // ----------------------------------------------------------------------------- #if 0 #undef GENTFUNCR #define GENTFUNCR( ctype, ctype_r, ch, chr, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ dim_t n, \ ctype* x, inc_t incx, \ dim_t* i_max, \ cntx_t* cntx \ ) \ { \ ctype_r* minus_one = PASTEMAC(chr,m1); \ dim_t* zero_i = PASTEMAC(i,0); \ \ ctype_r chi1_r; \ ctype_r chi1_i; \ ctype_r abs_chi1; \ ctype_r abs_chi1_max; \ dim_t i; \ \ /* Initialize the index of the maximum absolute value to zero. */ \ PASTEMAC(i,copys)( zero_i, *i_max ); \ \ /* If the vector length is zero, return early. This directly emulates the behavior of netlib BLAS's i?amax() routines. */ \ if ( bli_zero_dim1( n ) ) return; \ \ /* Initialize the maximum absolute value search candidate with -1, which is guaranteed to be less than all values we will compute. */ \ PASTEMAC(chr,copys)( *minus_one, abs_chi1_max ); \ \ if ( incx == 1 ) \ { \ for ( i = 0; i < n; ++i ) \ { \ /* Get the real and imaginary components of chi1. */ \ PASTEMAC2(ch,chr,gets)( x[i], chi1_r, chi1_i ); \ \ /* Replace chi1_r and chi1_i with their absolute values. */ \ PASTEMAC(chr,abval2s)( chi1_r, chi1_r ); \ PASTEMAC(chr,abval2s)( chi1_i, chi1_i ); \ \ /* Add the real and imaginary absolute values together. */ \ PASTEMAC(chr,set0s)( abs_chi1 ); \ PASTEMAC(chr,adds)( chi1_r, abs_chi1 ); \ PASTEMAC(chr,adds)( chi1_i, abs_chi1 ); \ \ /* If the absolute value of the current element exceeds that of the previous largest, save it and its index. If NaN is encountered, then treat it the same as if it were a valid value that was smaller than any previously seen. This behavior mimics that of LAPACK's ?lange(). */ \ if ( abs_chi1_max < abs_chi1 || bli_isnan( abs_chi1 ) ) \ { \ abs_chi1_max = abs_chi1; \ *i_max = i; \ } \ } \ } \ else \ { \ for ( i = 0; i < n; ++i ) \ { \ ctype* chi1 = x + (i )*incx; \ \ /* Get the real and imaginary components of chi1. */ \ PASTEMAC2(ch,chr,gets)( *chi1, chi1_r, chi1_i ); \ \ /* Replace chi1_r and chi1_i with their absolute values. */ \ PASTEMAC(chr,abval2s)( chi1_r, chi1_r ); \ PASTEMAC(chr,abval2s)( chi1_i, chi1_i ); \ \ /* Add the real and imaginary absolute values together. */ \ PASTEMAC(chr,set0s)( abs_chi1 ); \ PASTEMAC(chr,adds)( chi1_r, abs_chi1 ); \ PASTEMAC(chr,adds)( chi1_i, abs_chi1 ); \ \ /* If the absolute value of the current element exceeds that of the previous largest, save it and its index. If NaN is encountered, then treat it the same as if it were a valid value that was smaller than any previously seen. This behavior mimics that of LAPACK's ?lange(). */ \ if ( abs_chi1_max < abs_chi1 || bli_isnan( abs_chi1 ) ) \ { \ abs_chi1_max = abs_chi1; \ *i_max = i; \ } \ } \ } \ } GENTFUNCR( scomplex, float, c, s, amaxv_zen_int ) GENTFUNCR( dcomplex, double, z, d, amaxv_zen_int ) #endif blis-0.6.1/kernels/zen/1/bli_axpyv_zen_int.c000066400000000000000000000201631360743507500207510ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2016 - 2019, Advanced Micro Devices, Inc. Copyright (C) 2018, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "immintrin.h" #include "blis.h" /* Union data structure to access AVX registers One 256-bit AVX register holds 8 SP elements. */ typedef union { __m256 v; float f[8] __attribute__((aligned(64))); } v8sf_t; /* Union data structure to access AVX registers * One 256-bit AVX register holds 4 DP elements. */ typedef union { __m256d v; double d[4] __attribute__((aligned(64))); } v4df_t; // ----------------------------------------------------------------------------- void bli_saxpyv_zen_int ( conj_t conjx, dim_t n, float* restrict alpha, float* restrict x, inc_t incx, float* restrict y, inc_t incy, cntx_t* restrict cntx ) { const dim_t n_elem_per_reg = 8; const dim_t n_iter_unroll = 4; dim_t i; dim_t n_viter; dim_t n_left; float* restrict x0; float* restrict y0; v8sf_t alphav; v8sf_t x0v, x1v, x2v, x3v; v8sf_t y0v, y1v, y2v, y3v; // If the vector dimension is zero, or if alpha is zero, return early. if ( bli_zero_dim1( n ) || PASTEMAC(s,eq0)( *alpha ) ) return; // Use the unrolling factor and the number of elements per register // to compute the number of vectorized and leftover iterations. n_viter = ( n ) / ( n_elem_per_reg * n_iter_unroll ); n_left = ( n ) % ( n_elem_per_reg * n_iter_unroll ); // If there is anything that would interfere with our use of contiguous // vector loads/stores, override n_viter and n_left to use scalar code // for all iterations. if ( incx != 1 || incy != 1 ) { n_viter = 0; n_left = n; } // Initialize local pointers. x0 = x; y0 = y; // Broadcast the alpha scalar to all elements of a vector register. alphav.v = _mm256_broadcast_ss( alpha ); // If there are vectorized iterations, perform them with vector // instructions. for ( i = 0; i < n_viter; ++i ) { // Load the input values. y0v.v = _mm256_loadu_ps( y0 + 0*n_elem_per_reg ); x0v.v = _mm256_loadu_ps( x0 + 0*n_elem_per_reg ); y1v.v = _mm256_loadu_ps( y0 + 1*n_elem_per_reg ); x1v.v = _mm256_loadu_ps( x0 + 1*n_elem_per_reg ); y2v.v = _mm256_loadu_ps( y0 + 2*n_elem_per_reg ); x2v.v = _mm256_loadu_ps( x0 + 2*n_elem_per_reg ); y3v.v = _mm256_loadu_ps( y0 + 3*n_elem_per_reg ); x3v.v = _mm256_loadu_ps( x0 + 3*n_elem_per_reg ); // perform : y += alpha * x; y0v.v = _mm256_fmadd_ps( alphav.v, x0v.v, y0v.v ); y1v.v = _mm256_fmadd_ps( alphav.v, x1v.v, y1v.v ); y2v.v = _mm256_fmadd_ps( alphav.v, x2v.v, y2v.v ); y3v.v = _mm256_fmadd_ps( alphav.v, x3v.v, y3v.v ); // Store the output. _mm256_storeu_ps( (y0 + 0*n_elem_per_reg), y0v.v ); _mm256_storeu_ps( (y0 + 1*n_elem_per_reg), y1v.v ); _mm256_storeu_ps( (y0 + 2*n_elem_per_reg), y2v.v ); _mm256_storeu_ps( (y0 + 3*n_elem_per_reg), y3v.v ); x0 += n_elem_per_reg * n_iter_unroll; y0 += n_elem_per_reg * n_iter_unroll; } // Issue vzeroupper instruction to clear upper lanes of ymm registers. // This avoids a performance penalty caused by false dependencies when // transitioning from from AVX to SSE instructions (which may occur // as soon as the n_left cleanup loop below if BLIS is compiled with // -mfpmath=sse). _mm256_zeroupper(); const float alphac = *alpha; // If there are leftover iterations, perform them with scalar code. for ( i = 0; i < n_left; ++i ) { const float x0c = *x0; *y0 += alphac * x0c; x0 += incx; y0 += incy; } } // ----------------------------------------------------------------------------- void bli_daxpyv_zen_int ( conj_t conjx, dim_t n, double* restrict alpha, double* restrict x, inc_t incx, double* restrict y, inc_t incy, cntx_t* restrict cntx ) { const dim_t n_elem_per_reg = 4; const dim_t n_iter_unroll = 4; dim_t i; dim_t n_viter; dim_t n_left; double* restrict x0; double* restrict y0; v4df_t alphav; v4df_t x0v, x1v, x2v, x3v; v4df_t y0v, y1v, y2v, y3v; // If the vector dimension is zero, or if alpha is zero, return early. if ( bli_zero_dim1( n ) || PASTEMAC(d,eq0)( *alpha ) ) return; // Use the unrolling factor and the number of elements per register // to compute the number of vectorized and leftover iterations. n_viter = ( n ) / ( n_elem_per_reg * n_iter_unroll ); n_left = ( n ) % ( n_elem_per_reg * n_iter_unroll ); // If there is anything that would interfere with our use of contiguous // vector loads/stores, override n_viter and n_left to use scalar code // for all iterations. if ( incx != 1 || incy != 1 ) { n_viter = 0; n_left = n; } // Initialize local pointers. x0 = x; y0 = y; // Broadcast the alpha scalar to all elements of a vector register. alphav.v = _mm256_broadcast_sd( alpha ); // If there are vectorized iterations, perform them with vector // instructions. for ( i = 0; i < n_viter; ++i ) { // Load the input values. y0v.v = _mm256_loadu_pd( y0 + 0*n_elem_per_reg ); x0v.v = _mm256_loadu_pd( x0 + 0*n_elem_per_reg ); y1v.v = _mm256_loadu_pd( y0 + 1*n_elem_per_reg ); x1v.v = _mm256_loadu_pd( x0 + 1*n_elem_per_reg ); y2v.v = _mm256_loadu_pd( y0 + 2*n_elem_per_reg ); x2v.v = _mm256_loadu_pd( x0 + 2*n_elem_per_reg ); y3v.v = _mm256_loadu_pd( y0 + 3*n_elem_per_reg ); x3v.v = _mm256_loadu_pd( x0 + 3*n_elem_per_reg ); // perform : y += alpha * x; y0v.v = _mm256_fmadd_pd( alphav.v, x0v.v, y0v.v ); y1v.v = _mm256_fmadd_pd( alphav.v, x1v.v, y1v.v ); y2v.v = _mm256_fmadd_pd( alphav.v, x2v.v, y2v.v ); y3v.v = _mm256_fmadd_pd( alphav.v, x3v.v, y3v.v ); // Store the output. _mm256_storeu_pd( (y0 + 0*n_elem_per_reg), y0v.v ); _mm256_storeu_pd( (y0 + 1*n_elem_per_reg), y1v.v ); _mm256_storeu_pd( (y0 + 2*n_elem_per_reg), y2v.v ); _mm256_storeu_pd( (y0 + 3*n_elem_per_reg), y3v.v ); x0 += n_elem_per_reg * n_iter_unroll; y0 += n_elem_per_reg * n_iter_unroll; } // Issue vzeroupper instruction to clear upper lanes of ymm registers. // This avoids a performance penalty caused by false dependencies when // transitioning from from AVX to SSE instructions (which may occur // as soon as the n_left cleanup loop below if BLIS is compiled with // -mfpmath=sse). _mm256_zeroupper(); const double alphac = *alpha; // If there are leftover iterations, perform them with scalar code. for ( i = 0; i < n_left; ++i ) { const double x0c = *x0; *y0 += alphac * x0c; x0 += incx; y0 += incy; } } blis-0.6.1/kernels/zen/1/bli_axpyv_zen_int10.c000066400000000000000000000363431360743507500211210ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2016 - 2019, Advanced Micro Devices, Inc. Copyright (C) 2018, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "immintrin.h" #include "blis.h" /* Union data structure to access AVX registers One 256-bit AVX register holds 8 SP elements. */ typedef union { __m256 v; float f[8] __attribute__((aligned(64))); } v8sf_t; /* Union data structure to access AVX registers * One 256-bit AVX register holds 4 DP elements. */ typedef union { __m256d v; double d[4] __attribute__((aligned(64))); } v4df_t; // ----------------------------------------------------------------------------- void bli_saxpyv_zen_int10 ( conj_t conjx, dim_t n, float* restrict alpha, float* restrict x, inc_t incx, float* restrict y, inc_t incy, cntx_t* restrict cntx ) { const dim_t n_elem_per_reg = 8; dim_t i; float* restrict x0; float* restrict y0; __m256 alphav; __m256 xv[10]; __m256 yv[10]; __m256 zv[10]; // If the vector dimension is zero, or if alpha is zero, return early. if ( bli_zero_dim1( n ) || PASTEMAC(s,eq0)( *alpha ) ) return; // Initialize local pointers. x0 = x; y0 = y; if ( incx == 1 && incy == 1 ) { // Broadcast the alpha scalar to all elements of a vector register. alphav = _mm256_broadcast_ss( alpha ); for ( i = 0; (i + 79) < n; i += 80 ) { // 80 elements will be processed per loop; 10 FMAs will run per loop. xv[0] = _mm256_loadu_ps( x0 + 0*n_elem_per_reg ); xv[1] = _mm256_loadu_ps( x0 + 1*n_elem_per_reg ); xv[2] = _mm256_loadu_ps( x0 + 2*n_elem_per_reg ); xv[3] = _mm256_loadu_ps( x0 + 3*n_elem_per_reg ); xv[4] = _mm256_loadu_ps( x0 + 4*n_elem_per_reg ); xv[5] = _mm256_loadu_ps( x0 + 5*n_elem_per_reg ); xv[6] = _mm256_loadu_ps( x0 + 6*n_elem_per_reg ); xv[7] = _mm256_loadu_ps( x0 + 7*n_elem_per_reg ); xv[8] = _mm256_loadu_ps( x0 + 8*n_elem_per_reg ); xv[9] = _mm256_loadu_ps( x0 + 9*n_elem_per_reg ); yv[0] = _mm256_loadu_ps( y0 + 0*n_elem_per_reg ); yv[1] = _mm256_loadu_ps( y0 + 1*n_elem_per_reg ); yv[2] = _mm256_loadu_ps( y0 + 2*n_elem_per_reg ); yv[3] = _mm256_loadu_ps( y0 + 3*n_elem_per_reg ); yv[4] = _mm256_loadu_ps( y0 + 4*n_elem_per_reg ); yv[5] = _mm256_loadu_ps( y0 + 5*n_elem_per_reg ); yv[6] = _mm256_loadu_ps( y0 + 6*n_elem_per_reg ); yv[7] = _mm256_loadu_ps( y0 + 7*n_elem_per_reg ); yv[8] = _mm256_loadu_ps( y0 + 8*n_elem_per_reg ); yv[9] = _mm256_loadu_ps( y0 + 9*n_elem_per_reg ); zv[0] = _mm256_fmadd_ps( xv[0], alphav, yv[0] ); zv[1] = _mm256_fmadd_ps( xv[1], alphav, yv[1] ); zv[2] = _mm256_fmadd_ps( xv[2], alphav, yv[2] ); zv[3] = _mm256_fmadd_ps( xv[3], alphav, yv[3] ); zv[4] = _mm256_fmadd_ps( xv[4], alphav, yv[4] ); zv[5] = _mm256_fmadd_ps( xv[5], alphav, yv[5] ); zv[6] = _mm256_fmadd_ps( xv[6], alphav, yv[6] ); zv[7] = _mm256_fmadd_ps( xv[7], alphav, yv[7] ); zv[8] = _mm256_fmadd_ps( xv[8], alphav, yv[8] ); zv[9] = _mm256_fmadd_ps( xv[9], alphav, yv[9] ); _mm256_storeu_ps( (y0 + 0*n_elem_per_reg), zv[0] ); _mm256_storeu_ps( (y0 + 1*n_elem_per_reg), zv[1] ); _mm256_storeu_ps( (y0 + 2*n_elem_per_reg), zv[2] ); _mm256_storeu_ps( (y0 + 3*n_elem_per_reg), zv[3] ); _mm256_storeu_ps( (y0 + 4*n_elem_per_reg), zv[4] ); _mm256_storeu_ps( (y0 + 5*n_elem_per_reg), zv[5] ); _mm256_storeu_ps( (y0 + 6*n_elem_per_reg), zv[6] ); _mm256_storeu_ps( (y0 + 7*n_elem_per_reg), zv[7] ); _mm256_storeu_ps( (y0 + 8*n_elem_per_reg), zv[8] ); _mm256_storeu_ps( (y0 + 9*n_elem_per_reg), zv[9] ); x0 += 10*n_elem_per_reg; y0 += 10*n_elem_per_reg; } for ( ; (i + 39) < n; i += 40 ) { xv[0] = _mm256_loadu_ps( x0 + 0*n_elem_per_reg ); xv[1] = _mm256_loadu_ps( x0 + 1*n_elem_per_reg ); xv[2] = _mm256_loadu_ps( x0 + 2*n_elem_per_reg ); xv[3] = _mm256_loadu_ps( x0 + 3*n_elem_per_reg ); xv[4] = _mm256_loadu_ps( x0 + 4*n_elem_per_reg ); yv[0] = _mm256_loadu_ps( y0 + 0*n_elem_per_reg ); yv[1] = _mm256_loadu_ps( y0 + 1*n_elem_per_reg ); yv[2] = _mm256_loadu_ps( y0 + 2*n_elem_per_reg ); yv[3] = _mm256_loadu_ps( y0 + 3*n_elem_per_reg ); yv[4] = _mm256_loadu_ps( y0 + 4*n_elem_per_reg ); zv[0] = _mm256_fmadd_ps( xv[0], alphav, yv[0] ); zv[1] = _mm256_fmadd_ps( xv[1], alphav, yv[1] ); zv[2] = _mm256_fmadd_ps( xv[2], alphav, yv[2] ); zv[3] = _mm256_fmadd_ps( xv[3], alphav, yv[3] ); zv[4] = _mm256_fmadd_ps( xv[4], alphav, yv[4] ); _mm256_storeu_ps( (y0 + 0*n_elem_per_reg), zv[0] ); _mm256_storeu_ps( (y0 + 1*n_elem_per_reg), zv[1] ); _mm256_storeu_ps( (y0 + 2*n_elem_per_reg), zv[2] ); _mm256_storeu_ps( (y0 + 3*n_elem_per_reg), zv[3] ); _mm256_storeu_ps( (y0 + 4*n_elem_per_reg), zv[4] ); x0 += 5*n_elem_per_reg; y0 += 5*n_elem_per_reg; } for ( ; (i + 31) < n; i += 32 ) { xv[0] = _mm256_loadu_ps( x0 + 0*n_elem_per_reg ); xv[1] = _mm256_loadu_ps( x0 + 1*n_elem_per_reg ); xv[2] = _mm256_loadu_ps( x0 + 2*n_elem_per_reg ); xv[3] = _mm256_loadu_ps( x0 + 3*n_elem_per_reg ); yv[0] = _mm256_loadu_ps( y0 + 0*n_elem_per_reg ); yv[1] = _mm256_loadu_ps( y0 + 1*n_elem_per_reg ); yv[2] = _mm256_loadu_ps( y0 + 2*n_elem_per_reg ); yv[3] = _mm256_loadu_ps( y0 + 3*n_elem_per_reg ); zv[0] = _mm256_fmadd_ps( xv[0], alphav, yv[0] ); zv[1] = _mm256_fmadd_ps( xv[1], alphav, yv[1] ); zv[2] = _mm256_fmadd_ps( xv[2], alphav, yv[2] ); zv[3] = _mm256_fmadd_ps( xv[3], alphav, yv[3] ); _mm256_storeu_ps( (y0 + 0*n_elem_per_reg), zv[0] ); _mm256_storeu_ps( (y0 + 1*n_elem_per_reg), zv[1] ); _mm256_storeu_ps( (y0 + 2*n_elem_per_reg), zv[2] ); _mm256_storeu_ps( (y0 + 3*n_elem_per_reg), zv[3] ); x0 += 4*n_elem_per_reg; y0 += 4*n_elem_per_reg; } for ( ; (i + 15) < n; i += 16 ) { xv[0] = _mm256_loadu_ps( x0 + 0*n_elem_per_reg ); xv[1] = _mm256_loadu_ps( x0 + 1*n_elem_per_reg ); yv[0] = _mm256_loadu_ps( y0 + 0*n_elem_per_reg ); yv[1] = _mm256_loadu_ps( y0 + 1*n_elem_per_reg ); zv[0] = _mm256_fmadd_ps( xv[0], alphav, yv[0] ); zv[1] = _mm256_fmadd_ps( xv[1], alphav, yv[1] ); _mm256_storeu_ps( (y0 + 0*n_elem_per_reg), zv[0] ); _mm256_storeu_ps( (y0 + 1*n_elem_per_reg), zv[1] ); x0 += 2*n_elem_per_reg; y0 += 2*n_elem_per_reg; } for ( ; (i + 7) < n; i += 8 ) { xv[0] = _mm256_loadu_ps( x0 + 0*n_elem_per_reg ); yv[0] = _mm256_loadu_ps( y0 + 0*n_elem_per_reg ); zv[0] = _mm256_fmadd_ps( xv[0], alphav, yv[0] ); _mm256_storeu_ps( (y0 + 0*n_elem_per_reg), zv[0] ); x0 += 1*n_elem_per_reg; y0 += 1*n_elem_per_reg; } // Issue vzeroupper instruction to clear upper lanes of ymm registers. // This avoids a performance penalty caused by false dependencies when // transitioning from from AVX to SSE instructions (which may occur // as soon as the n_left cleanup loop below if BLIS is compiled with // -mfpmath=sse). _mm256_zeroupper(); for ( ; (i + 0) < n; i += 1 ) { *y0 += (*alpha) * (*x0); x0 += 1; y0 += 1; } } else { const float alphac = *alpha; for ( i = 0; i < n; ++i ) { const float x0c = *x0; *y0 += alphac * x0c; x0 += incx; y0 += incy; } } } // ----------------------------------------------------------------------------- void bli_daxpyv_zen_int10 ( conj_t conjx, dim_t n, double* restrict alpha, double* restrict x, inc_t incx, double* restrict y, inc_t incy, cntx_t* restrict cntx ) { const dim_t n_elem_per_reg = 4; dim_t i; double* restrict x0 = x; double* restrict y0 = y; __m256d alphav; __m256d xv[10]; __m256d yv[10]; __m256d zv[10]; // If the vector dimension is zero, or if alpha is zero, return early. if ( bli_zero_dim1( n ) || PASTEMAC(d,eq0)( *alpha ) ) return; // Initialize local pointers. x0 = x; y0 = y; if ( incx == 1 && incy == 1 ) { // Broadcast the alpha scalar to all elements of a vector register. alphav = _mm256_broadcast_sd( alpha ); for ( i = 0; (i + 39) < n; i += 40 ) { // 40 elements will be processed per loop; 10 FMAs will run per loop. xv[0] = _mm256_loadu_pd( x0 + 0*n_elem_per_reg ); xv[1] = _mm256_loadu_pd( x0 + 1*n_elem_per_reg ); xv[2] = _mm256_loadu_pd( x0 + 2*n_elem_per_reg ); xv[3] = _mm256_loadu_pd( x0 + 3*n_elem_per_reg ); xv[4] = _mm256_loadu_pd( x0 + 4*n_elem_per_reg ); xv[5] = _mm256_loadu_pd( x0 + 5*n_elem_per_reg ); xv[6] = _mm256_loadu_pd( x0 + 6*n_elem_per_reg ); xv[7] = _mm256_loadu_pd( x0 + 7*n_elem_per_reg ); xv[8] = _mm256_loadu_pd( x0 + 8*n_elem_per_reg ); xv[9] = _mm256_loadu_pd( x0 + 9*n_elem_per_reg ); yv[0] = _mm256_loadu_pd( y0 + 0*n_elem_per_reg ); yv[1] = _mm256_loadu_pd( y0 + 1*n_elem_per_reg ); yv[2] = _mm256_loadu_pd( y0 + 2*n_elem_per_reg ); yv[3] = _mm256_loadu_pd( y0 + 3*n_elem_per_reg ); yv[4] = _mm256_loadu_pd( y0 + 4*n_elem_per_reg ); yv[5] = _mm256_loadu_pd( y0 + 5*n_elem_per_reg ); yv[6] = _mm256_loadu_pd( y0 + 6*n_elem_per_reg ); yv[7] = _mm256_loadu_pd( y0 + 7*n_elem_per_reg ); yv[8] = _mm256_loadu_pd( y0 + 8*n_elem_per_reg ); yv[9] = _mm256_loadu_pd( y0 + 9*n_elem_per_reg ); zv[0] = _mm256_fmadd_pd( xv[0], alphav, yv[0] ); zv[1] = _mm256_fmadd_pd( xv[1], alphav, yv[1] ); zv[2] = _mm256_fmadd_pd( xv[2], alphav, yv[2] ); zv[3] = _mm256_fmadd_pd( xv[3], alphav, yv[3] ); zv[4] = _mm256_fmadd_pd( xv[4], alphav, yv[4] ); zv[5] = _mm256_fmadd_pd( xv[5], alphav, yv[5] ); zv[6] = _mm256_fmadd_pd( xv[6], alphav, yv[6] ); zv[7] = _mm256_fmadd_pd( xv[7], alphav, yv[7] ); zv[8] = _mm256_fmadd_pd( xv[8], alphav, yv[8] ); zv[9] = _mm256_fmadd_pd( xv[9], alphav, yv[9] ); _mm256_storeu_pd( (y0 + 0*n_elem_per_reg), zv[0] ); _mm256_storeu_pd( (y0 + 1*n_elem_per_reg), zv[1] ); _mm256_storeu_pd( (y0 + 2*n_elem_per_reg), zv[2] ); _mm256_storeu_pd( (y0 + 3*n_elem_per_reg), zv[3] ); _mm256_storeu_pd( (y0 + 4*n_elem_per_reg), zv[4] ); _mm256_storeu_pd( (y0 + 5*n_elem_per_reg), zv[5] ); _mm256_storeu_pd( (y0 + 6*n_elem_per_reg), zv[6] ); _mm256_storeu_pd( (y0 + 7*n_elem_per_reg), zv[7] ); _mm256_storeu_pd( (y0 + 8*n_elem_per_reg), zv[8] ); _mm256_storeu_pd( (y0 + 9*n_elem_per_reg), zv[9] ); x0 += 10*n_elem_per_reg; y0 += 10*n_elem_per_reg; } for ( ; (i + 19) < n; i += 20 ) { xv[0] = _mm256_loadu_pd( x0 + 0*n_elem_per_reg ); xv[1] = _mm256_loadu_pd( x0 + 1*n_elem_per_reg ); xv[2] = _mm256_loadu_pd( x0 + 2*n_elem_per_reg ); xv[3] = _mm256_loadu_pd( x0 + 3*n_elem_per_reg ); xv[4] = _mm256_loadu_pd( x0 + 4*n_elem_per_reg ); yv[0] = _mm256_loadu_pd( y0 + 0*n_elem_per_reg ); yv[1] = _mm256_loadu_pd( y0 + 1*n_elem_per_reg ); yv[2] = _mm256_loadu_pd( y0 + 2*n_elem_per_reg ); yv[3] = _mm256_loadu_pd( y0 + 3*n_elem_per_reg ); yv[4] = _mm256_loadu_pd( y0 + 4*n_elem_per_reg ); zv[0] = _mm256_fmadd_pd( xv[0], alphav, yv[0] ); zv[1] = _mm256_fmadd_pd( xv[1], alphav, yv[1] ); zv[2] = _mm256_fmadd_pd( xv[2], alphav, yv[2] ); zv[3] = _mm256_fmadd_pd( xv[3], alphav, yv[3] ); zv[4] = _mm256_fmadd_pd( xv[4], alphav, yv[4] ); _mm256_storeu_pd( (y0 + 0*n_elem_per_reg), zv[0] ); _mm256_storeu_pd( (y0 + 1*n_elem_per_reg), zv[1] ); _mm256_storeu_pd( (y0 + 2*n_elem_per_reg), zv[2] ); _mm256_storeu_pd( (y0 + 3*n_elem_per_reg), zv[3] ); _mm256_storeu_pd( (y0 + 4*n_elem_per_reg), zv[4] ); x0 += 5*n_elem_per_reg; y0 += 5*n_elem_per_reg; } for ( ; (i + 15) < n; i += 16 ) { xv[0] = _mm256_loadu_pd( x0 + 0*n_elem_per_reg ); xv[1] = _mm256_loadu_pd( x0 + 1*n_elem_per_reg ); xv[2] = _mm256_loadu_pd( x0 + 2*n_elem_per_reg ); xv[3] = _mm256_loadu_pd( x0 + 3*n_elem_per_reg ); yv[0] = _mm256_loadu_pd( y0 + 0*n_elem_per_reg ); yv[1] = _mm256_loadu_pd( y0 + 1*n_elem_per_reg ); yv[2] = _mm256_loadu_pd( y0 + 2*n_elem_per_reg ); yv[3] = _mm256_loadu_pd( y0 + 3*n_elem_per_reg ); zv[0] = _mm256_fmadd_pd( xv[0], alphav, yv[0] ); zv[1] = _mm256_fmadd_pd( xv[1], alphav, yv[1] ); zv[2] = _mm256_fmadd_pd( xv[2], alphav, yv[2] ); zv[3] = _mm256_fmadd_pd( xv[3], alphav, yv[3] ); _mm256_storeu_pd( (y0 + 0*n_elem_per_reg), zv[0] ); _mm256_storeu_pd( (y0 + 1*n_elem_per_reg), zv[1] ); _mm256_storeu_pd( (y0 + 2*n_elem_per_reg), zv[2] ); _mm256_storeu_pd( (y0 + 3*n_elem_per_reg), zv[3] ); x0 += 4*n_elem_per_reg; y0 += 4*n_elem_per_reg; } for ( ; i + 7 < n; i += 8 ) { xv[0] = _mm256_loadu_pd( x0 + 0*n_elem_per_reg ); xv[1] = _mm256_loadu_pd( x0 + 1*n_elem_per_reg ); yv[0] = _mm256_loadu_pd( y0 + 0*n_elem_per_reg ); yv[1] = _mm256_loadu_pd( y0 + 1*n_elem_per_reg ); zv[0] = _mm256_fmadd_pd( xv[0], alphav, yv[0] ); zv[1] = _mm256_fmadd_pd( xv[1], alphav, yv[1] ); _mm256_storeu_pd( (y0 + 0*n_elem_per_reg), zv[0] ); _mm256_storeu_pd( (y0 + 1*n_elem_per_reg), zv[1] ); x0 += 2*n_elem_per_reg; y0 += 2*n_elem_per_reg; } for ( ; i + 3 < n; i += 4 ) { xv[0] = _mm256_loadu_pd( x0 + 0*n_elem_per_reg ); yv[0] = _mm256_loadu_pd( y0 + 0*n_elem_per_reg ); zv[0] = _mm256_fmadd_pd( xv[0], alphav, yv[0] ); _mm256_storeu_pd( (y0 + 0*n_elem_per_reg), zv[0] ); x0 += 1*n_elem_per_reg; y0 += 1*n_elem_per_reg; } // Issue vzeroupper instruction to clear upper lanes of ymm registers. // This avoids a performance penalty caused by false dependencies when // transitioning from from AVX to SSE instructions (which may occur // as soon as the n_left cleanup loop below if BLIS is compiled with // -mfpmath=sse). _mm256_zeroupper(); for ( ; i < n; i += 1 ) { *y0 += (*alpha) * (*x0); y0 += 1; x0 += 1; } } else { const double alphac = *alpha; for ( i = 0; i < n; ++i ) { const double x0c = *x0; *y0 += alphac * x0c; x0 += incx; y0 += incy; } } } blis-0.6.1/kernels/zen/1/bli_dotv_zen_int.c000066400000000000000000000216431360743507500205620ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2016 - 2019, Advanced Micro Devices, Inc. Copyright (C) 2018, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "immintrin.h" #include "blis.h" /* Union data structure to access AVX registers One 256-bit AVX register holds 8 SP elements. */ typedef union { __m256 v; float f[8] __attribute__((aligned(64))); } v8sf_t; /* Union data structure to access AVX registers * One 256-bit AVX register holds 4 DP elements. */ typedef union { __m256d v; double d[4] __attribute__((aligned(64))); } v4df_t; // ----------------------------------------------------------------------------- void bli_sdotv_zen_int ( conj_t conjx, conj_t conjy, dim_t n, float* restrict x, inc_t incx, float* restrict y, inc_t incy, float* restrict rho, cntx_t* restrict cntx ) { const dim_t n_elem_per_reg = 8; const dim_t n_iter_unroll = 4; dim_t i; dim_t n_viter; dim_t n_left; float* restrict x0; float* restrict y0; float rho0; v8sf_t rho0v, rho1v, rho2v, rho3v; v8sf_t x0v, y0v; v8sf_t x1v, y1v; v8sf_t x2v, y2v; v8sf_t x3v, y3v; // If the vector dimension is zero, set rho to zero and return early. if ( bli_zero_dim1( n ) ) { PASTEMAC(s,set0s)( *rho ); return; } // Use the unrolling factor and the number of elements per register // to compute the number of vectorized and leftover iterations. n_viter = ( n ) / ( n_elem_per_reg * n_iter_unroll ); n_left = ( n ) % ( n_elem_per_reg * n_iter_unroll ); // If there is anything that would interfere with our use of contiguous // vector loads/stores, override n_viter and n_left to use scalar code // for all iterations. if ( incx != 1 || incy != 1 ) { n_viter = 0; n_left = n; } // Initialize local pointers. x0 = x; y0 = y; // Initialize the local scalar rho1 to zero. PASTEMAC(s,set0s)( rho0 ); // Initialize the unrolled iterations' rho vectors to zero. rho0v.v = _mm256_setzero_ps(); rho1v.v = _mm256_setzero_ps(); rho2v.v = _mm256_setzero_ps(); rho3v.v = _mm256_setzero_ps(); for ( i = 0; i < n_viter; ++i ) { // Load the x and y input vector elements. x0v.v = _mm256_loadu_ps( x0 + 0*n_elem_per_reg ); y0v.v = _mm256_loadu_ps( y0 + 0*n_elem_per_reg ); x1v.v = _mm256_loadu_ps( x0 + 1*n_elem_per_reg ); y1v.v = _mm256_loadu_ps( y0 + 1*n_elem_per_reg ); x2v.v = _mm256_loadu_ps( x0 + 2*n_elem_per_reg ); y2v.v = _mm256_loadu_ps( y0 + 2*n_elem_per_reg ); x3v.v = _mm256_loadu_ps( x0 + 3*n_elem_per_reg ); y3v.v = _mm256_loadu_ps( y0 + 3*n_elem_per_reg ); // Compute the element-wise product of the x and y vectors, // storing in the corresponding rho vectors. rho0v.v = _mm256_fmadd_ps( x0v.v, y0v.v, rho0v.v ); rho1v.v = _mm256_fmadd_ps( x1v.v, y1v.v, rho1v.v ); rho2v.v = _mm256_fmadd_ps( x2v.v, y2v.v, rho2v.v ); rho3v.v = _mm256_fmadd_ps( x3v.v, y3v.v, rho3v.v ); x0 += ( n_elem_per_reg * n_iter_unroll ); y0 += ( n_elem_per_reg * n_iter_unroll ); } // Accumulate the unrolled rho vectors into a single vector. rho0v.v += rho1v.v; rho0v.v += rho2v.v; rho0v.v += rho3v.v; // Accumulate the final rho vector into a single scalar result. rho0 += rho0v.f[0] + rho0v.f[1] + rho0v.f[2] + rho0v.f[3] + rho0v.f[4] + rho0v.f[5] + rho0v.f[6] + rho0v.f[7]; // Issue vzeroupper instruction to clear upper lanes of ymm registers. // This avoids a performance penalty caused by false dependencies when // transitioning from from AVX to SSE instructions (which may occur // as soon as the n_left cleanup loop below if BLIS is compiled with // -mfpmath=sse). _mm256_zeroupper(); // If there are leftover iterations, perform them with scalar code. for ( i = 0; i < n_left; ++i ) { const float x0c = *x0; const float y0c = *y0; rho0 += x0c * y0c; x0 += incx; y0 += incy; } // Copy the final result into the output variable. PASTEMAC(s,copys)( rho0, *rho ); } // ----------------------------------------------------------------------------- void bli_ddotv_zen_int ( conj_t conjx, conj_t conjy, dim_t n, double* restrict x, inc_t incx, double* restrict y, inc_t incy, double* restrict rho, cntx_t* restrict cntx ) { const dim_t n_elem_per_reg = 4; const dim_t n_iter_unroll = 4; dim_t i; dim_t n_viter; dim_t n_left; double* restrict x0; double* restrict y0; double rho0; v4df_t rho0v, rho1v, rho2v, rho3v; v4df_t x0v, y0v; v4df_t x1v, y1v; v4df_t x2v, y2v; v4df_t x3v, y3v; // If the vector dimension is zero, set rho to zero and return early. if ( bli_zero_dim1( n ) ) { PASTEMAC(d,set0s)( *rho ); return; } // Use the unrolling factor and the number of elements per register // to compute the number of vectorized and leftover iterations. n_viter = ( n ) / ( n_elem_per_reg * n_iter_unroll ); n_left = ( n ) % ( n_elem_per_reg * n_iter_unroll ); // If there is anything that would interfere with our use of contiguous // vector loads/stores, override n_viter and n_left to use scalar code // for all iterations. if ( incx != 1 || incy != 1 ) { n_viter = 0; n_left = n; } // Initialize local pointers. x0 = x; y0 = y; // Initialize the local scalar rho1 to zero. PASTEMAC(d,set0s)( rho0 ); // Initialize the unrolled iterations' rho vectors to zero. rho0v.v = _mm256_setzero_pd(); rho1v.v = _mm256_setzero_pd(); rho2v.v = _mm256_setzero_pd(); rho3v.v = _mm256_setzero_pd(); for ( i = 0; i < n_viter; ++i ) { // Load the x and y input vector elements. x0v.v = _mm256_loadu_pd( x0 + 0*n_elem_per_reg ); y0v.v = _mm256_loadu_pd( y0 + 0*n_elem_per_reg ); x1v.v = _mm256_loadu_pd( x0 + 1*n_elem_per_reg ); y1v.v = _mm256_loadu_pd( y0 + 1*n_elem_per_reg ); x2v.v = _mm256_loadu_pd( x0 + 2*n_elem_per_reg ); y2v.v = _mm256_loadu_pd( y0 + 2*n_elem_per_reg ); x3v.v = _mm256_loadu_pd( x0 + 3*n_elem_per_reg ); y3v.v = _mm256_loadu_pd( y0 + 3*n_elem_per_reg ); // Compute the element-wise product of the x and y vectors, // storing in the corresponding rho vectors. rho0v.v = _mm256_fmadd_pd( x0v.v, y0v.v, rho0v.v ); rho1v.v = _mm256_fmadd_pd( x1v.v, y1v.v, rho1v.v ); rho2v.v = _mm256_fmadd_pd( x2v.v, y2v.v, rho2v.v ); rho3v.v = _mm256_fmadd_pd( x3v.v, y3v.v, rho3v.v ); x0 += ( n_elem_per_reg * n_iter_unroll ); y0 += ( n_elem_per_reg * n_iter_unroll ); } // Accumulate the unrolled rho vectors into a single vector. rho0v.v += rho1v.v; rho0v.v += rho2v.v; rho0v.v += rho3v.v; // Accumulate the final rho vector into a single scalar result. rho0 += rho0v.d[0] + rho0v.d[1] + rho0v.d[2] + rho0v.d[3]; // Issue vzeroupper instruction to clear upper lanes of ymm registers. // This avoids a performance penalty caused by false dependencies when // transitioning from from AVX to SSE instructions (which may occur // as soon as the n_left cleanup loop below if BLIS is compiled with // -mfpmath=sse). _mm256_zeroupper(); // If there are leftover iterations, perform them with scalar code. for ( i = 0; i < n_left; ++i ) { const double x0c = *x0; const double y0c = *y0; rho0 += x0c * y0c; x0 += incx; y0 += incy; } // Copy the final result into the output variable. PASTEMAC(d,copys)( rho0, *rho ); } blis-0.6.1/kernels/zen/1/bli_dotv_zen_int10.c000066400000000000000000000334161360743507500207240ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2016 - 2019, Advanced Micro Devices, Inc. Copyright (C) 2018, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "immintrin.h" #include "blis.h" /* Union data structure to access AVX registers One 256-bit AVX register holds 8 SP elements. */ typedef union { __m256 v; float f[8] __attribute__((aligned(64))); } v8sf_t; /* Union data structure to access AVX registers * One 256-bit AVX register holds 4 DP elements. */ typedef union { __m256d v; double d[4] __attribute__((aligned(64))); } v4df_t; // ----------------------------------------------------------------------------- void bli_sdotv_zen_int10 ( conj_t conjx, conj_t conjy, dim_t n, float* restrict x, inc_t incx, float* restrict y, inc_t incy, float* restrict rho, cntx_t* restrict cntx ) { const dim_t n_elem_per_reg = 8; dim_t i; float* restrict x0; float* restrict y0; float rho0; __m256 xv[10]; __m256 yv[10]; v8sf_t rhov[2]; // If the vector dimension is zero, or if alpha is zero, return early. if ( bli_zero_dim1( n ) ) { PASTEMAC(s,set0s)( *rho ); return; } // Initialize local pointers. x0 = x; y0 = y; PASTEMAC(s,set0s)( rho0 ); if ( incx == 1 && incy == 1 ) { rhov[0].v = _mm256_setzero_ps(); rhov[1].v = _mm256_setzero_ps(); for ( i = 0; (i + 79) < n; i += 80 ) { // 80 elements will be processed per loop; 10 FMAs will run per loop. xv[0] = _mm256_loadu_ps( x0 + 0*n_elem_per_reg ); xv[1] = _mm256_loadu_ps( x0 + 1*n_elem_per_reg ); xv[2] = _mm256_loadu_ps( x0 + 2*n_elem_per_reg ); xv[3] = _mm256_loadu_ps( x0 + 3*n_elem_per_reg ); xv[4] = _mm256_loadu_ps( x0 + 4*n_elem_per_reg ); xv[5] = _mm256_loadu_ps( x0 + 5*n_elem_per_reg ); xv[6] = _mm256_loadu_ps( x0 + 6*n_elem_per_reg ); xv[7] = _mm256_loadu_ps( x0 + 7*n_elem_per_reg ); xv[8] = _mm256_loadu_ps( x0 + 8*n_elem_per_reg ); xv[9] = _mm256_loadu_ps( x0 + 9*n_elem_per_reg ); yv[0] = _mm256_loadu_ps( y0 + 0*n_elem_per_reg ); yv[1] = _mm256_loadu_ps( y0 + 1*n_elem_per_reg ); yv[2] = _mm256_loadu_ps( y0 + 2*n_elem_per_reg ); yv[3] = _mm256_loadu_ps( y0 + 3*n_elem_per_reg ); yv[4] = _mm256_loadu_ps( y0 + 4*n_elem_per_reg ); yv[5] = _mm256_loadu_ps( y0 + 5*n_elem_per_reg ); yv[6] = _mm256_loadu_ps( y0 + 6*n_elem_per_reg ); yv[7] = _mm256_loadu_ps( y0 + 7*n_elem_per_reg ); yv[8] = _mm256_loadu_ps( y0 + 8*n_elem_per_reg ); yv[9] = _mm256_loadu_ps( y0 + 9*n_elem_per_reg ); rhov[0].v = _mm256_fmadd_ps( xv[0], yv[0], rhov[0].v ); rhov[1].v = _mm256_fmadd_ps( xv[1], yv[1], rhov[1].v ); rhov[0].v = _mm256_fmadd_ps( xv[2], yv[2], rhov[0].v ); rhov[1].v = _mm256_fmadd_ps( xv[3], yv[3], rhov[1].v ); rhov[0].v = _mm256_fmadd_ps( xv[4], yv[4], rhov[0].v ); rhov[1].v = _mm256_fmadd_ps( xv[5], yv[5], rhov[1].v ); rhov[0].v = _mm256_fmadd_ps( xv[6], yv[6], rhov[0].v ); rhov[1].v = _mm256_fmadd_ps( xv[7], yv[7], rhov[1].v ); rhov[0].v = _mm256_fmadd_ps( xv[8], yv[8], rhov[0].v ); rhov[1].v = _mm256_fmadd_ps( xv[9], yv[9], rhov[1].v ); x0 += 10*n_elem_per_reg; y0 += 10*n_elem_per_reg; } for ( ; (i + 39) < n; i += 40 ) { xv[0] = _mm256_loadu_ps( x0 + 0*n_elem_per_reg ); xv[1] = _mm256_loadu_ps( x0 + 1*n_elem_per_reg ); xv[2] = _mm256_loadu_ps( x0 + 2*n_elem_per_reg ); xv[3] = _mm256_loadu_ps( x0 + 3*n_elem_per_reg ); xv[4] = _mm256_loadu_ps( x0 + 4*n_elem_per_reg ); yv[0] = _mm256_loadu_ps( y0 + 0*n_elem_per_reg ); yv[1] = _mm256_loadu_ps( y0 + 1*n_elem_per_reg ); yv[2] = _mm256_loadu_ps( y0 + 2*n_elem_per_reg ); yv[3] = _mm256_loadu_ps( y0 + 3*n_elem_per_reg ); yv[4] = _mm256_loadu_ps( y0 + 4*n_elem_per_reg ); rhov[0].v = _mm256_fmadd_ps( xv[0], yv[0], rhov[0].v ); rhov[1].v = _mm256_fmadd_ps( xv[1], yv[1], rhov[1].v ); rhov[0].v = _mm256_fmadd_ps( xv[2], yv[2], rhov[0].v ); rhov[1].v = _mm256_fmadd_ps( xv[3], yv[3], rhov[1].v ); rhov[0].v = _mm256_fmadd_ps( xv[4], yv[4], rhov[0].v ); x0 += 5*n_elem_per_reg; y0 += 5*n_elem_per_reg; } for ( ; (i + 31) < n; i += 32 ) { xv[0] = _mm256_loadu_ps( x0 + 0*n_elem_per_reg ); xv[1] = _mm256_loadu_ps( x0 + 1*n_elem_per_reg ); xv[2] = _mm256_loadu_ps( x0 + 2*n_elem_per_reg ); xv[3] = _mm256_loadu_ps( x0 + 3*n_elem_per_reg ); yv[0] = _mm256_loadu_ps( y0 + 0*n_elem_per_reg ); yv[1] = _mm256_loadu_ps( y0 + 1*n_elem_per_reg ); yv[2] = _mm256_loadu_ps( y0 + 2*n_elem_per_reg ); yv[3] = _mm256_loadu_ps( y0 + 3*n_elem_per_reg ); rhov[0].v = _mm256_fmadd_ps( xv[0], yv[0], rhov[0].v ); rhov[1].v = _mm256_fmadd_ps( xv[1], yv[1], rhov[1].v ); rhov[0].v = _mm256_fmadd_ps( xv[2], yv[2], rhov[0].v ); rhov[1].v = _mm256_fmadd_ps( xv[3], yv[3], rhov[1].v ); x0 += 4*n_elem_per_reg; y0 += 4*n_elem_per_reg; } for ( ; (i + 15) < n; i += 16 ) { xv[0] = _mm256_loadu_ps( x0 + 0*n_elem_per_reg ); xv[1] = _mm256_loadu_ps( x0 + 1*n_elem_per_reg ); yv[0] = _mm256_loadu_ps( y0 + 0*n_elem_per_reg ); yv[1] = _mm256_loadu_ps( y0 + 1*n_elem_per_reg ); rhov[0].v = _mm256_fmadd_ps( xv[0], yv[0], rhov[0].v ); rhov[1].v = _mm256_fmadd_ps( xv[1], yv[1], rhov[1].v ); x0 += 2*n_elem_per_reg; y0 += 2*n_elem_per_reg; } for ( ; (i + 7) < n; i += 8 ) { xv[0] = _mm256_loadu_ps( x0 + 0*n_elem_per_reg ); yv[0] = _mm256_loadu_ps( y0 + 0*n_elem_per_reg ); rhov[0].v = _mm256_fmadd_ps( xv[0], yv[0], rhov[0].v ); x0 += 1*n_elem_per_reg; y0 += 1*n_elem_per_reg; } for ( ; (i + 0) < n; i += 1 ) { rhov[0].f[0] += x0[i] * y0[i]; } v8sf_t onev; onev.v = _mm256_set1_ps( 1.0f ); rhov[0].v = _mm256_dp_ps( rhov[0].v, onev.v, 0xf1 ); rhov[1].v = _mm256_dp_ps( rhov[1].v, onev.v, 0xf1 ); // Manually add the results from above to finish the sum. rho0 += rhov[0].f[0] + rhov[0].f[4]; rho0 += rhov[1].f[0] + rhov[1].f[4]; // Issue vzeroupper instruction to clear upper lanes of ymm registers. // This avoids a performance penalty caused by false dependencies when // transitioning from from AVX to SSE instructions (which may occur // later, especially if BLIS is compiled with -mfpmath=sse). _mm256_zeroupper(); } else { for ( i = 0; i < n; ++i ) { const float x0c = *x0; const float y0c = *y0; rho0 += x0c * y0c; x0 += incx; y0 += incy; } } // Copy the final result into the output variable. PASTEMAC(s,copys)( rho0, *rho ); } // ----------------------------------------------------------------------------- void bli_ddotv_zen_int10 ( conj_t conjx, conj_t conjy, dim_t n, double* restrict x, inc_t incx, double* restrict y, inc_t incy, double* restrict rho, cntx_t* restrict cntx ) { const dim_t n_elem_per_reg = 4; dim_t i; double* restrict x0; double* restrict y0; double rho0; __m256d xv[10]; __m256d yv[10]; v4df_t rhov[2]; // If the vector dimension is zero, or if alpha is zero, return early. if ( bli_zero_dim1( n ) ) { PASTEMAC(d,set0s)( *rho ); return; } // Initialize local pointers. x0 = x; y0 = y; PASTEMAC(d,set0s)( rho0 ); if ( incx == 1 && incy == 1 ) { rhov[0].v = _mm256_setzero_pd(); rhov[1].v = _mm256_setzero_pd(); for ( i = 0; (i + 39) < n; i += 40 ) { // 80 elements will be processed per loop; 10 FMAs will run per loop. xv[0] = _mm256_loadu_pd( x0 + 0*n_elem_per_reg ); xv[1] = _mm256_loadu_pd( x0 + 1*n_elem_per_reg ); xv[2] = _mm256_loadu_pd( x0 + 2*n_elem_per_reg ); xv[3] = _mm256_loadu_pd( x0 + 3*n_elem_per_reg ); xv[4] = _mm256_loadu_pd( x0 + 4*n_elem_per_reg ); xv[5] = _mm256_loadu_pd( x0 + 5*n_elem_per_reg ); xv[6] = _mm256_loadu_pd( x0 + 6*n_elem_per_reg ); xv[7] = _mm256_loadu_pd( x0 + 7*n_elem_per_reg ); xv[8] = _mm256_loadu_pd( x0 + 8*n_elem_per_reg ); xv[9] = _mm256_loadu_pd( x0 + 9*n_elem_per_reg ); yv[0] = _mm256_loadu_pd( y0 + 0*n_elem_per_reg ); yv[1] = _mm256_loadu_pd( y0 + 1*n_elem_per_reg ); yv[2] = _mm256_loadu_pd( y0 + 2*n_elem_per_reg ); yv[3] = _mm256_loadu_pd( y0 + 3*n_elem_per_reg ); yv[4] = _mm256_loadu_pd( y0 + 4*n_elem_per_reg ); yv[5] = _mm256_loadu_pd( y0 + 5*n_elem_per_reg ); yv[6] = _mm256_loadu_pd( y0 + 6*n_elem_per_reg ); yv[7] = _mm256_loadu_pd( y0 + 7*n_elem_per_reg ); yv[8] = _mm256_loadu_pd( y0 + 8*n_elem_per_reg ); yv[9] = _mm256_loadu_pd( y0 + 9*n_elem_per_reg ); rhov[0].v = _mm256_fmadd_pd( xv[0], yv[0], rhov[0].v ); rhov[1].v = _mm256_fmadd_pd( xv[1], yv[1], rhov[1].v ); rhov[0].v = _mm256_fmadd_pd( xv[2], yv[2], rhov[0].v ); rhov[1].v = _mm256_fmadd_pd( xv[3], yv[3], rhov[1].v ); rhov[0].v = _mm256_fmadd_pd( xv[4], yv[4], rhov[0].v ); rhov[1].v = _mm256_fmadd_pd( xv[5], yv[5], rhov[1].v ); rhov[0].v = _mm256_fmadd_pd( xv[6], yv[6], rhov[0].v ); rhov[1].v = _mm256_fmadd_pd( xv[7], yv[7], rhov[1].v ); rhov[0].v = _mm256_fmadd_pd( xv[8], yv[8], rhov[0].v ); rhov[1].v = _mm256_fmadd_pd( xv[9], yv[9], rhov[1].v ); x0 += 10*n_elem_per_reg; y0 += 10*n_elem_per_reg; } for ( ; (i + 19) < n; i += 20 ) { xv[0] = _mm256_loadu_pd( x0 + 0*n_elem_per_reg ); xv[1] = _mm256_loadu_pd( x0 + 1*n_elem_per_reg ); xv[2] = _mm256_loadu_pd( x0 + 2*n_elem_per_reg ); xv[3] = _mm256_loadu_pd( x0 + 3*n_elem_per_reg ); xv[4] = _mm256_loadu_pd( x0 + 4*n_elem_per_reg ); yv[0] = _mm256_loadu_pd( y0 + 0*n_elem_per_reg ); yv[1] = _mm256_loadu_pd( y0 + 1*n_elem_per_reg ); yv[2] = _mm256_loadu_pd( y0 + 2*n_elem_per_reg ); yv[3] = _mm256_loadu_pd( y0 + 3*n_elem_per_reg ); yv[4] = _mm256_loadu_pd( y0 + 4*n_elem_per_reg ); rhov[0].v = _mm256_fmadd_pd( xv[0], yv[0], rhov[0].v ); rhov[1].v = _mm256_fmadd_pd( xv[1], yv[1], rhov[1].v ); rhov[0].v = _mm256_fmadd_pd( xv[2], yv[2], rhov[0].v ); rhov[1].v = _mm256_fmadd_pd( xv[3], yv[3], rhov[1].v ); rhov[0].v = _mm256_fmadd_pd( xv[4], yv[4], rhov[0].v ); x0 += 5*n_elem_per_reg; y0 += 5*n_elem_per_reg; } for ( ; (i + 15) < n; i += 16 ) { xv[0] = _mm256_loadu_pd( x0 + 0*n_elem_per_reg ); xv[1] = _mm256_loadu_pd( x0 + 1*n_elem_per_reg ); xv[2] = _mm256_loadu_pd( x0 + 2*n_elem_per_reg ); xv[3] = _mm256_loadu_pd( x0 + 3*n_elem_per_reg ); yv[0] = _mm256_loadu_pd( y0 + 0*n_elem_per_reg ); yv[1] = _mm256_loadu_pd( y0 + 1*n_elem_per_reg ); yv[2] = _mm256_loadu_pd( y0 + 2*n_elem_per_reg ); yv[3] = _mm256_loadu_pd( y0 + 3*n_elem_per_reg ); rhov[0].v = _mm256_fmadd_pd( xv[0], yv[0], rhov[0].v ); rhov[1].v = _mm256_fmadd_pd( xv[1], yv[1], rhov[1].v ); rhov[0].v = _mm256_fmadd_pd( xv[2], yv[2], rhov[0].v ); rhov[1].v = _mm256_fmadd_pd( xv[3], yv[3], rhov[1].v ); x0 += 4*n_elem_per_reg; y0 += 4*n_elem_per_reg; } for ( ; (i + 7) < n; i += 8 ) { xv[0] = _mm256_loadu_pd( x0 + 0*n_elem_per_reg ); xv[1] = _mm256_loadu_pd( x0 + 1*n_elem_per_reg ); yv[0] = _mm256_loadu_pd( y0 + 0*n_elem_per_reg ); yv[1] = _mm256_loadu_pd( y0 + 1*n_elem_per_reg ); rhov[0].v = _mm256_fmadd_pd( xv[0], yv[0], rhov[0].v ); rhov[1].v = _mm256_fmadd_pd( xv[1], yv[1], rhov[1].v ); x0 += 2*n_elem_per_reg; y0 += 2*n_elem_per_reg; } for ( ; (i + 3) < n; i += 4 ) { xv[0] = _mm256_loadu_pd( x0 + 0*n_elem_per_reg ); yv[0] = _mm256_loadu_pd( y0 + 0*n_elem_per_reg ); rhov[0].v = _mm256_fmadd_pd( xv[0], yv[0], rhov[0].v ); x0 += 1*n_elem_per_reg; y0 += 1*n_elem_per_reg; } for ( ; (i + 0) < n; i += 1 ) { rhov[0].d[0] += x0[i] * y0[i]; } // Manually add the results from above to finish the sum. rho0 += rhov[0].d[0] + rhov[0].d[1] + rhov[0].d[2] + rhov[0].d[3]; rho0 += rhov[1].d[0] + rhov[1].d[1] + rhov[1].d[2] + rhov[1].d[3]; // Issue vzeroupper instruction to clear upper lanes of ymm registers. // This avoids a performance penalty caused by false dependencies when // transitioning from from AVX to SSE instructions (which may occur // later, especially if BLIS is compiled with -mfpmath=sse). _mm256_zeroupper(); } else { for ( i = 0; i < n; ++i ) { const double x0c = *x0; const double y0c = *y0; rho0 += x0c * y0c; x0 += incx; y0 += incy; } } // Copy the final result into the output variable. PASTEMAC(d,copys)( rho0, *rho ); } blis-0.6.1/kernels/zen/1/bli_dotxv_zen_int.c000066400000000000000000000225321360743507500207500ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2016 - 2019, Advanced Micro Devices, Inc. Copyright (C) 2018, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "immintrin.h" #include "blis.h" /* Union data structure to access AVX registers One 256-bit AVX register holds 8 SP elements. */ typedef union { __m256 v; float f[8] __attribute__((aligned(64))); } v8sf_t; /* Union data structure to access AVX registers * One 256-bit AVX register holds 4 DP elements. */ typedef union { __m256d v; double d[4] __attribute__((aligned(64))); } v4df_t; // ----------------------------------------------------------------------------- void bli_sdotxv_zen_int ( conj_t conjx, conj_t conjy, dim_t n, float* restrict alpha, float* restrict x, inc_t incx, float* restrict y, inc_t incy, float* restrict beta, float* restrict rho, cntx_t* restrict cntx ) { const dim_t n_elem_per_reg = 8; const dim_t n_iter_unroll = 4; dim_t i; dim_t n_viter; dim_t n_left; float* restrict x0; float* restrict y0; float rho0; v8sf_t rho0v, rho1v, rho2v, rho3v; v8sf_t x0v, y0v; v8sf_t x1v, y1v; v8sf_t x2v, y2v; v8sf_t x3v, y3v; // If beta is zero, initialize rho1 to zero instead of scaling // rho by beta (in case rho contains NaN or Inf). if ( PASTEMAC(s,eq0)( *beta ) ) { PASTEMAC(s,set0s)( *rho ); } else { PASTEMAC(s,scals)( *beta, *rho ); } // If the vector dimension is zero, output rho and return early. if ( bli_zero_dim1( n ) || PASTEMAC(s,eq0)( *alpha ) ) return; // Use the unrolling factor and the number of elements per register // to compute the number of vectorized and leftover iterations. n_viter = ( n ) / ( n_elem_per_reg * n_iter_unroll ); n_left = ( n ) % ( n_elem_per_reg * n_iter_unroll ); // If there is anything that would interfere with our use of contiguous // vector loads/stores, override n_viter and n_left to use scalar code // for all iterations. if ( incx != 1 || incy != 1 ) { n_viter = 0; n_left = n; } // Initialize local pointers. x0 = x; y0 = y; // Initialize the unrolled iterations' rho vectors to zero. rho0v.v = _mm256_setzero_ps(); rho1v.v = _mm256_setzero_ps(); rho2v.v = _mm256_setzero_ps(); rho3v.v = _mm256_setzero_ps(); for ( i = 0; i < n_viter; ++i ) { // Load the x and y input vector elements. x0v.v = _mm256_loadu_ps( x0 + 0*n_elem_per_reg ); y0v.v = _mm256_loadu_ps( y0 + 0*n_elem_per_reg ); x1v.v = _mm256_loadu_ps( x0 + 1*n_elem_per_reg ); y1v.v = _mm256_loadu_ps( y0 + 1*n_elem_per_reg ); x2v.v = _mm256_loadu_ps( x0 + 2*n_elem_per_reg ); y2v.v = _mm256_loadu_ps( y0 + 2*n_elem_per_reg ); x3v.v = _mm256_loadu_ps( x0 + 3*n_elem_per_reg ); y3v.v = _mm256_loadu_ps( y0 + 3*n_elem_per_reg ); // Compute the element-wise product of the x and y vectors, // storing in the corresponding rho vectors. rho0v.v = _mm256_fmadd_ps( x0v.v, y0v.v, rho0v.v ); rho1v.v = _mm256_fmadd_ps( x1v.v, y1v.v, rho1v.v ); rho2v.v = _mm256_fmadd_ps( x2v.v, y2v.v, rho2v.v ); rho3v.v = _mm256_fmadd_ps( x3v.v, y3v.v, rho3v.v ); x0 += ( n_elem_per_reg * n_iter_unroll ); y0 += ( n_elem_per_reg * n_iter_unroll ); } // Accumulate the unrolled rho vectors into a single vector. rho0v.v += rho1v.v; rho0v.v += rho2v.v; rho0v.v += rho3v.v; // Accumulate the final rho vector into a single scalar result. rho0 = rho0v.f[0] + rho0v.f[1] + rho0v.f[2] + rho0v.f[3] + rho0v.f[4] + rho0v.f[5] + rho0v.f[6] + rho0v.f[7]; // Issue vzeroupper instruction to clear upper lanes of ymm registers. // This avoids a performance penalty caused by false dependencies when // transitioning from from AVX to SSE instructions (which may occur // as soon as the n_left cleanup loop below if BLIS is compiled with // -mfpmath=sse). _mm256_zeroupper(); // If there are leftover iterations, perform them with scalar code. for ( i = 0; i < n_left; ++i ) { const float x0c = *x0; const float y0c = *y0; rho0 += x0c * y0c; x0 += incx; y0 += incy; } // Accumulate the final result into the output variable. PASTEMAC(s,axpys)( *alpha, rho0, *rho ); } // ----------------------------------------------------------------------------- void bli_ddotxv_zen_int ( conj_t conjx, conj_t conjy, dim_t n, double* restrict alpha, double* restrict x, inc_t incx, double* restrict y, inc_t incy, double* restrict beta, double* restrict rho, cntx_t* restrict cntx ) { const dim_t n_elem_per_reg = 4; const dim_t n_iter_unroll = 4; dim_t i; dim_t n_viter; dim_t n_left; double* restrict x0; double* restrict y0; double rho0; v4df_t rho0v, rho1v, rho2v, rho3v; v4df_t x0v, y0v; v4df_t x1v, y1v; v4df_t x2v, y2v; v4df_t x3v, y3v; // If beta is zero, initialize rho1 to zero instead of scaling // rho by beta (in case rho contains NaN or Inf). if ( PASTEMAC(d,eq0)( *beta ) ) { PASTEMAC(d,set0s)( *rho ); } else { PASTEMAC(d,scals)( *beta, *rho ); } // If the vector dimension is zero, output rho and return early. if ( bli_zero_dim1( n ) || PASTEMAC(d,eq0)( *alpha ) ) return; // Use the unrolling factor and the number of elements per register // to compute the number of vectorized and leftover iterations. n_viter = ( n ) / ( n_elem_per_reg * n_iter_unroll ); n_left = ( n ) % ( n_elem_per_reg * n_iter_unroll ); // If there is anything that would interfere with our use of contiguous // vector loads/stores, override n_viter and n_left to use scalar code // for all iterations. if ( incx != 1 || incy != 1 ) { n_viter = 0; n_left = n; } // Initialize local pointers. x0 = x; y0 = y; // Initialize the unrolled iterations' rho vectors to zero. rho0v.v = _mm256_setzero_pd(); rho1v.v = _mm256_setzero_pd(); rho2v.v = _mm256_setzero_pd(); rho3v.v = _mm256_setzero_pd(); for ( i = 0; i < n_viter; ++i ) { // Load the x and y input vector elements. x0v.v = _mm256_loadu_pd( x0 + 0*n_elem_per_reg ); y0v.v = _mm256_loadu_pd( y0 + 0*n_elem_per_reg ); x1v.v = _mm256_loadu_pd( x0 + 1*n_elem_per_reg ); y1v.v = _mm256_loadu_pd( y0 + 1*n_elem_per_reg ); x2v.v = _mm256_loadu_pd( x0 + 2*n_elem_per_reg ); y2v.v = _mm256_loadu_pd( y0 + 2*n_elem_per_reg ); x3v.v = _mm256_loadu_pd( x0 + 3*n_elem_per_reg ); y3v.v = _mm256_loadu_pd( y0 + 3*n_elem_per_reg ); // Compute the element-wise product of the x and y vectors, // storing in the corresponding rho vectors. rho0v.v = _mm256_fmadd_pd( x0v.v, y0v.v, rho0v.v ); rho1v.v = _mm256_fmadd_pd( x1v.v, y1v.v, rho1v.v ); rho2v.v = _mm256_fmadd_pd( x2v.v, y2v.v, rho2v.v ); rho3v.v = _mm256_fmadd_pd( x3v.v, y3v.v, rho3v.v ); x0 += ( n_elem_per_reg * n_iter_unroll ); y0 += ( n_elem_per_reg * n_iter_unroll ); } // Accumulate the unrolled rho vectors into a single vector. rho0v.v += rho1v.v; rho0v.v += rho2v.v; rho0v.v += rho3v.v; // Accumulate the final rho vector into a single scalar result. rho0 = rho0v.d[0] + rho0v.d[1] + rho0v.d[2] + rho0v.d[3]; // Issue vzeroupper instruction to clear upper lanes of ymm registers. // This avoids a performance penalty caused by false dependencies when // transitioning from from AVX to SSE instructions (which may occur // as soon as the n_left cleanup loop below if BLIS is compiled with // -mfpmath=sse). _mm256_zeroupper(); // If there are leftover iterations, perform them with scalar code. for ( i = 0; i < n_left; ++i ) { const double x0c = *x0; const double y0c = *y0; rho0 += x0c * y0c; x0 += incx; y0 += incy; } // Accumulate the final result into the output variable. PASTEMAC(d,axpys)( *alpha, rho0, *rho ); } blis-0.6.1/kernels/zen/1/bli_scalv_zen_int.c000066400000000000000000000163031360743507500207130ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2017 - 2019, Advanced Micro Devices, Inc. Copyright (C) 2018, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "immintrin.h" #include "blis.h" /* Union data structure to access AVX registers One 256-bit AVX register holds 8 SP elements. */ typedef union { __m256 v; float f[8] __attribute__((aligned(64))); } v8sf_t; /* Union data structure to access AVX registers * One 256-bit AVX register holds 4 DP elements. */ typedef union { __m256d v; double d[4] __attribute__((aligned(64))); } v4df_t; // ----------------------------------------------------------------------------- void bli_sscalv_zen_int ( conj_t conjalpha, dim_t n, float* restrict alpha, float* restrict x, inc_t incx, cntx_t* restrict cntx ) { const dim_t n_elem_per_reg = 8; const dim_t n_iter_unroll = 4; dim_t i; dim_t n_viter; dim_t n_left; float* restrict x0; v8sf_t alphav; v8sf_t x0v, x1v, x2v, x3v; // If the vector dimension is zero, or if alpha is unit, return early. if ( bli_zero_dim1( n ) || PASTEMAC(s,eq1)( *alpha ) ) return; // If alpha is zero, use setv (in case y contains NaN or Inf). if ( PASTEMAC(s,eq0)( *alpha ) ) { float* zero = bli_s0; ssetv_ker_ft f = bli_cntx_get_l1v_ker_dt( BLIS_FLOAT, BLIS_SETV_KER, cntx ); f ( BLIS_NO_CONJUGATE, n, zero, x, incx, cntx ); return; } // Use the unrolling factor and the number of elements per register // to compute the number of vectorized and leftover iterations. n_viter = ( n ) / ( n_elem_per_reg * n_iter_unroll ); n_left = ( n ) % ( n_elem_per_reg * n_iter_unroll ); // If there is anything that would interfere with our use of contiguous // vector loads/stores, override n_viter and n_left to use scalar code // for all iterations. if ( incx != 1 ) { n_viter = 0; n_left = n; } // Initialize local pointers. x0 = x; // Broadcast the alpha scalar to all elements of a vector register. alphav.v = _mm256_broadcast_ss( alpha ); // If there are vectorized iterations, perform them with vector // instructions. for ( i = 0; i < n_viter; ++i ) { // Load the input values. x0v.v = _mm256_loadu_ps( x0 + 0*n_elem_per_reg ); x1v.v = _mm256_loadu_ps( x0 + 1*n_elem_per_reg ); x2v.v = _mm256_loadu_ps( x0 + 2*n_elem_per_reg ); x3v.v = _mm256_loadu_ps( x0 + 3*n_elem_per_reg ); // perform : x := alpha * x; x0v.v = _mm256_mul_ps( alphav.v, x0v.v ); x1v.v = _mm256_mul_ps( alphav.v, x1v.v ); x2v.v = _mm256_mul_ps( alphav.v, x2v.v ); x3v.v = _mm256_mul_ps( alphav.v, x3v.v ); // Store the output. _mm256_storeu_ps( (x0 + 0*n_elem_per_reg), x0v.v ); _mm256_storeu_ps( (x0 + 1*n_elem_per_reg), x1v.v ); _mm256_storeu_ps( (x0 + 2*n_elem_per_reg), x2v.v ); _mm256_storeu_ps( (x0 + 3*n_elem_per_reg), x3v.v ); x0 += n_elem_per_reg * n_iter_unroll; } const float alphac = *alpha; // If there are leftover iterations, perform them with scalar code. for ( i = 0; i < n_left; ++i ) { *x0 *= alphac; x0 += incx; } } // ----------------------------------------------------------------------------- void bli_dscalv_zen_int ( conj_t conjalpha, dim_t n, double* restrict alpha, double* restrict x, inc_t incx, cntx_t* restrict cntx ) { const dim_t n_elem_per_reg = 4; const dim_t n_iter_unroll = 4; dim_t i; dim_t n_viter; dim_t n_left; double* restrict x0; v4df_t alphav; v4df_t x0v, x1v, x2v, x3v; // If the vector dimension is zero, or if alpha is unit, return early. if ( bli_zero_dim1( n ) || PASTEMAC(d,eq1)( *alpha ) ) return; // If alpha is zero, use setv (in case y contains NaN or Inf). if ( PASTEMAC(d,eq0)( *alpha ) ) { double* zero = bli_d0; dsetv_ker_ft f = bli_cntx_get_l1v_ker_dt( BLIS_DOUBLE, BLIS_SETV_KER, cntx ); f ( BLIS_NO_CONJUGATE, n, zero, x, incx, cntx ); return; } // Use the unrolling factor and the number of elements per register // to compute the number of vectorized and leftover iterations. n_viter = ( n ) / ( n_elem_per_reg * n_iter_unroll ); n_left = ( n ) % ( n_elem_per_reg * n_iter_unroll ); // If there is anything that would interfere with our use of contiguous // vector loads/stores, override n_viter and n_left to use scalar code // for all iterations. if ( incx != 1 ) { n_viter = 0; n_left = n; } // Initialize local pointers. x0 = x; // Broadcast the alpha scalar to all elements of a vector register. alphav.v = _mm256_broadcast_sd( alpha ); // If there are vectorized iterations, perform them with vector // instructions. for ( i = 0; i < n_viter; ++i ) { // Load the input values. x0v.v = _mm256_loadu_pd( x0 + 0*n_elem_per_reg ); x1v.v = _mm256_loadu_pd( x0 + 1*n_elem_per_reg ); x2v.v = _mm256_loadu_pd( x0 + 2*n_elem_per_reg ); x3v.v = _mm256_loadu_pd( x0 + 3*n_elem_per_reg ); // perform : y += alpha * x; x0v.v = _mm256_mul_pd( alphav.v, x0v.v ); x1v.v = _mm256_mul_pd( alphav.v, x1v.v ); x2v.v = _mm256_mul_pd( alphav.v, x2v.v ); x3v.v = _mm256_mul_pd( alphav.v, x3v.v ); // Store the output. _mm256_storeu_pd( (x0 + 0*n_elem_per_reg), x0v.v ); _mm256_storeu_pd( (x0 + 1*n_elem_per_reg), x1v.v ); _mm256_storeu_pd( (x0 + 2*n_elem_per_reg), x2v.v ); _mm256_storeu_pd( (x0 + 3*n_elem_per_reg), x3v.v ); x0 += n_elem_per_reg * n_iter_unroll; } const double alphac = *alpha; // If there are leftover iterations, perform them with scalar code. for ( i = 0; i < n_left; ++i ) { *x0 *= alphac; x0 += incx; } } blis-0.6.1/kernels/zen/1/bli_scalv_zen_int10.c000066400000000000000000000310171360743507500210530ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2017 - 2019, Advanced Micro Devices, Inc. Copyright (C) 2018, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "immintrin.h" #include "blis.h" /* Union data structure to access AVX registers One 256-bit AVX register holds 8 SP elements. */ typedef union { __m256 v; float f[8] __attribute__((aligned(64))); } v8sf_t; /* Union data structure to access AVX registers * One 256-bit AVX register holds 4 DP elements. */ typedef union { __m256d v; double d[4] __attribute__((aligned(64))); } v4df_t; // ----------------------------------------------------------------------------- void bli_sscalv_zen_int10 ( conj_t conjalpha, dim_t n, float* restrict alpha, float* restrict x, inc_t incx, cntx_t* restrict cntx ) { const dim_t n_elem_per_reg = 8; dim_t i; float* restrict x0; __m256 alphav; __m256 xv[10]; __m256 zv[10]; // If the vector dimension is zero, or if alpha is unit, return early. if ( bli_zero_dim1( n ) || PASTEMAC(s,eq1)( *alpha ) ) return; // If alpha is zero, use setv. if ( PASTEMAC(s,eq0)( *alpha ) ) { float* zero = bli_s0; ssetv_ker_ft f = bli_cntx_get_l1v_ker_dt( BLIS_FLOAT, BLIS_SETV_KER, cntx ); f ( BLIS_NO_CONJUGATE, n, zero, x, incx, cntx ); return; } // Initialize local pointers. x0 = x; if ( incx == 1 ) { // Broadcast the alpha scalar to all elements of a vector register. alphav = _mm256_broadcast_ss( alpha ); for ( i = 0; (i + 79) < n; i += 80 ) { // Load the input values. xv[0] = _mm256_loadu_ps( x0 + 0*n_elem_per_reg ); xv[1] = _mm256_loadu_ps( x0 + 1*n_elem_per_reg ); xv[2] = _mm256_loadu_ps( x0 + 2*n_elem_per_reg ); xv[3] = _mm256_loadu_ps( x0 + 3*n_elem_per_reg ); xv[4] = _mm256_loadu_ps( x0 + 4*n_elem_per_reg ); xv[5] = _mm256_loadu_ps( x0 + 5*n_elem_per_reg ); xv[6] = _mm256_loadu_ps( x0 + 6*n_elem_per_reg ); xv[7] = _mm256_loadu_ps( x0 + 7*n_elem_per_reg ); xv[8] = _mm256_loadu_ps( x0 + 8*n_elem_per_reg ); xv[9] = _mm256_loadu_ps( x0 + 9*n_elem_per_reg ); // perform : x := alpha * x; zv[0] = _mm256_mul_ps( alphav, xv[0] ); zv[1] = _mm256_mul_ps( alphav, xv[1] ); zv[2] = _mm256_mul_ps( alphav, xv[2] ); zv[3] = _mm256_mul_ps( alphav, xv[3] ); zv[4] = _mm256_mul_ps( alphav, xv[4] ); zv[5] = _mm256_mul_ps( alphav, xv[5] ); zv[6] = _mm256_mul_ps( alphav, xv[6] ); zv[7] = _mm256_mul_ps( alphav, xv[7] ); zv[8] = _mm256_mul_ps( alphav, xv[8] ); zv[9] = _mm256_mul_ps( alphav, xv[9] ); // Store the output. _mm256_storeu_ps( (x0 + 0*n_elem_per_reg), zv[0] ); _mm256_storeu_ps( (x0 + 1*n_elem_per_reg), zv[1] ); _mm256_storeu_ps( (x0 + 2*n_elem_per_reg), zv[2] ); _mm256_storeu_ps( (x0 + 3*n_elem_per_reg), zv[3] ); _mm256_storeu_ps( (x0 + 4*n_elem_per_reg), zv[4] ); _mm256_storeu_ps( (x0 + 5*n_elem_per_reg), zv[5] ); _mm256_storeu_ps( (x0 + 6*n_elem_per_reg), zv[6] ); _mm256_storeu_ps( (x0 + 7*n_elem_per_reg), zv[7] ); _mm256_storeu_ps( (x0 + 8*n_elem_per_reg), zv[8] ); _mm256_storeu_ps( (x0 + 9*n_elem_per_reg), zv[9] ); x0 += 10*n_elem_per_reg; } for ( ; (i + 39) < n; i += 40 ) { // Load the input values. xv[0] = _mm256_loadu_ps( x0 + 0*n_elem_per_reg ); xv[1] = _mm256_loadu_ps( x0 + 1*n_elem_per_reg ); xv[2] = _mm256_loadu_ps( x0 + 2*n_elem_per_reg ); xv[3] = _mm256_loadu_ps( x0 + 3*n_elem_per_reg ); xv[4] = _mm256_loadu_ps( x0 + 4*n_elem_per_reg ); // perform : x := alpha * x; zv[0] = _mm256_mul_ps( alphav, xv[0] ); zv[1] = _mm256_mul_ps( alphav, xv[1] ); zv[2] = _mm256_mul_ps( alphav, xv[2] ); zv[3] = _mm256_mul_ps( alphav, xv[3] ); zv[4] = _mm256_mul_ps( alphav, xv[4] ); // Store the output. _mm256_storeu_ps( (x0 + 0*n_elem_per_reg), zv[0] ); _mm256_storeu_ps( (x0 + 1*n_elem_per_reg), zv[1] ); _mm256_storeu_ps( (x0 + 2*n_elem_per_reg), zv[2] ); _mm256_storeu_ps( (x0 + 3*n_elem_per_reg), zv[3] ); _mm256_storeu_ps( (x0 + 4*n_elem_per_reg), zv[4] ); x0 += 5*n_elem_per_reg; } for ( ; (i + 31) < n; i += 32 ) { // Load the input values. xv[0] = _mm256_loadu_ps( x0 + 0*n_elem_per_reg ); xv[1] = _mm256_loadu_ps( x0 + 1*n_elem_per_reg ); xv[2] = _mm256_loadu_ps( x0 + 2*n_elem_per_reg ); xv[3] = _mm256_loadu_ps( x0 + 3*n_elem_per_reg ); // perform : x := alpha * x; zv[0] = _mm256_mul_ps( alphav, xv[0] ); zv[1] = _mm256_mul_ps( alphav, xv[1] ); zv[2] = _mm256_mul_ps( alphav, xv[2] ); zv[3] = _mm256_mul_ps( alphav, xv[3] ); // Store the output. _mm256_storeu_ps( (x0 + 0*n_elem_per_reg), zv[0] ); _mm256_storeu_ps( (x0 + 1*n_elem_per_reg), zv[1] ); _mm256_storeu_ps( (x0 + 2*n_elem_per_reg), zv[2] ); _mm256_storeu_ps( (x0 + 3*n_elem_per_reg), zv[3] ); x0 += 4*n_elem_per_reg; } for ( ; (i + 15) < n; i += 16 ) { // Load the input values. xv[0] = _mm256_loadu_ps( x0 + 0*n_elem_per_reg ); xv[1] = _mm256_loadu_ps( x0 + 1*n_elem_per_reg ); // perform : x := alpha * x; zv[0] = _mm256_mul_ps( alphav, xv[0] ); zv[1] = _mm256_mul_ps( alphav, xv[1] ); // Store the output. _mm256_storeu_ps( (x0 + 0*n_elem_per_reg), zv[0] ); _mm256_storeu_ps( (x0 + 1*n_elem_per_reg), zv[1] ); x0 += 2*n_elem_per_reg; } for ( ; (i + 7) < n; i += 8 ) { // Load the input values. xv[0] = _mm256_loadu_ps( x0 + 0*n_elem_per_reg ); // perform : x := alpha * x; zv[0] = _mm256_mul_ps( alphav, xv[0] ); // Store the output. _mm256_storeu_ps( (x0 + 0*n_elem_per_reg), zv[0] ); x0 += 1*n_elem_per_reg; } for ( ; (i + 0) < n; i += 1 ) { *x0 *= *alpha; x0 += 1; } } else { const float alphac = *alpha; for ( i = 0; i < n; ++i ) { *x0 *= alphac; x0 += incx; } } } // ----------------------------------------------------------------------------- void bli_dscalv_zen_int10 ( conj_t conjalpha, dim_t n, double* restrict alpha, double* restrict x, inc_t incx, cntx_t* restrict cntx ) { const dim_t n_elem_per_reg = 4; dim_t i; double* restrict x0; __m256d alphav; __m256d xv[10]; __m256d zv[10]; // If the vector dimension is zero, or if alpha is unit, return early. if ( bli_zero_dim1( n ) || PASTEMAC(d,eq1)( *alpha ) ) return; // If alpha is zero, use setv. if ( PASTEMAC(d,eq0)( *alpha ) ) { double* zero = bli_d0; dsetv_ker_ft f = bli_cntx_get_l1v_ker_dt( BLIS_DOUBLE, BLIS_SETV_KER, cntx ); f ( BLIS_NO_CONJUGATE, n, zero, x, incx, cntx ); return; } // Initialize local pointers. x0 = x; if ( incx == 1 ) { // Broadcast the alpha scalar to all elements of a vector register. alphav = _mm256_broadcast_sd( alpha ); for ( i = 0; (i + 39) < n; i += 40 ) { // Load the input values. xv[0] = _mm256_loadu_pd( x0 + 0*n_elem_per_reg ); xv[1] = _mm256_loadu_pd( x0 + 1*n_elem_per_reg ); xv[2] = _mm256_loadu_pd( x0 + 2*n_elem_per_reg ); xv[3] = _mm256_loadu_pd( x0 + 3*n_elem_per_reg ); xv[4] = _mm256_loadu_pd( x0 + 4*n_elem_per_reg ); xv[5] = _mm256_loadu_pd( x0 + 5*n_elem_per_reg ); xv[6] = _mm256_loadu_pd( x0 + 6*n_elem_per_reg ); xv[7] = _mm256_loadu_pd( x0 + 7*n_elem_per_reg ); xv[8] = _mm256_loadu_pd( x0 + 8*n_elem_per_reg ); xv[9] = _mm256_loadu_pd( x0 + 9*n_elem_per_reg ); // perform : x := alpha * x; zv[0] = _mm256_mul_pd( alphav, xv[0] ); zv[1] = _mm256_mul_pd( alphav, xv[1] ); zv[2] = _mm256_mul_pd( alphav, xv[2] ); zv[3] = _mm256_mul_pd( alphav, xv[3] ); zv[4] = _mm256_mul_pd( alphav, xv[4] ); zv[5] = _mm256_mul_pd( alphav, xv[5] ); zv[6] = _mm256_mul_pd( alphav, xv[6] ); zv[7] = _mm256_mul_pd( alphav, xv[7] ); zv[8] = _mm256_mul_pd( alphav, xv[8] ); zv[9] = _mm256_mul_pd( alphav, xv[9] ); // Store the output. _mm256_storeu_pd( (x0 + 0*n_elem_per_reg), zv[0] ); _mm256_storeu_pd( (x0 + 1*n_elem_per_reg), zv[1] ); _mm256_storeu_pd( (x0 + 2*n_elem_per_reg), zv[2] ); _mm256_storeu_pd( (x0 + 3*n_elem_per_reg), zv[3] ); _mm256_storeu_pd( (x0 + 4*n_elem_per_reg), zv[4] ); _mm256_storeu_pd( (x0 + 5*n_elem_per_reg), zv[5] ); _mm256_storeu_pd( (x0 + 6*n_elem_per_reg), zv[6] ); _mm256_storeu_pd( (x0 + 7*n_elem_per_reg), zv[7] ); _mm256_storeu_pd( (x0 + 8*n_elem_per_reg), zv[8] ); _mm256_storeu_pd( (x0 + 9*n_elem_per_reg), zv[9] ); x0 += 10*n_elem_per_reg; } for ( ; (i + 19) < n; i += 20 ) { // Load the input values. xv[0] = _mm256_loadu_pd( x0 + 0*n_elem_per_reg ); xv[1] = _mm256_loadu_pd( x0 + 1*n_elem_per_reg ); xv[2] = _mm256_loadu_pd( x0 + 2*n_elem_per_reg ); xv[3] = _mm256_loadu_pd( x0 + 3*n_elem_per_reg ); xv[4] = _mm256_loadu_pd( x0 + 4*n_elem_per_reg ); // perform : x := alpha * x; zv[0] = _mm256_mul_pd( alphav, xv[0] ); zv[1] = _mm256_mul_pd( alphav, xv[1] ); zv[2] = _mm256_mul_pd( alphav, xv[2] ); zv[3] = _mm256_mul_pd( alphav, xv[3] ); zv[4] = _mm256_mul_pd( alphav, xv[4] ); // Store the output. _mm256_storeu_pd( (x0 + 0*n_elem_per_reg), zv[0] ); _mm256_storeu_pd( (x0 + 1*n_elem_per_reg), zv[1] ); _mm256_storeu_pd( (x0 + 2*n_elem_per_reg), zv[2] ); _mm256_storeu_pd( (x0 + 3*n_elem_per_reg), zv[3] ); _mm256_storeu_pd( (x0 + 4*n_elem_per_reg), zv[4] ); x0 += 5*n_elem_per_reg; } for ( ; (i + 15) < n; i += 16 ) { // Load the input values. xv[0] = _mm256_loadu_pd( x0 + 0*n_elem_per_reg ); xv[1] = _mm256_loadu_pd( x0 + 1*n_elem_per_reg ); xv[2] = _mm256_loadu_pd( x0 + 2*n_elem_per_reg ); xv[3] = _mm256_loadu_pd( x0 + 3*n_elem_per_reg ); // perform : x := alpha * x; zv[0] = _mm256_mul_pd( alphav, xv[0] ); zv[1] = _mm256_mul_pd( alphav, xv[1] ); zv[2] = _mm256_mul_pd( alphav, xv[2] ); zv[3] = _mm256_mul_pd( alphav, xv[3] ); // Store the output. _mm256_storeu_pd( (x0 + 0*n_elem_per_reg), zv[0] ); _mm256_storeu_pd( (x0 + 1*n_elem_per_reg), zv[1] ); _mm256_storeu_pd( (x0 + 2*n_elem_per_reg), zv[2] ); _mm256_storeu_pd( (x0 + 3*n_elem_per_reg), zv[3] ); x0 += 4*n_elem_per_reg; } for ( ; (i + 7) < n; i += 8 ) { // Load the input values. xv[0] = _mm256_loadu_pd( x0 + 0*n_elem_per_reg ); xv[1] = _mm256_loadu_pd( x0 + 1*n_elem_per_reg ); // perform : x := alpha * x; zv[0] = _mm256_mul_pd( alphav, xv[0] ); zv[1] = _mm256_mul_pd( alphav, xv[1] ); // Store the output. _mm256_storeu_pd( (x0 + 0*n_elem_per_reg), zv[0] ); _mm256_storeu_pd( (x0 + 1*n_elem_per_reg), zv[1] ); x0 += 2*n_elem_per_reg; } for ( ; (i + 3) < n; i += 4 ) { // Load the input values. xv[0] = _mm256_loadu_pd( x0 + 0*n_elem_per_reg ); // perform : x := alpha * x; zv[0] = _mm256_mul_pd( alphav, xv[0] ); // Store the output. _mm256_storeu_pd( (x0 + 0*n_elem_per_reg), zv[0] ); x0 += 1*n_elem_per_reg; } for ( ; (i + 0) < n; i += 1 ) { *x0 *= *alpha; x0 += 1; } } else { const double alphac = *alpha; for ( i = 0; i < n; ++i ) { *x0 *= alphac; x0 += incx; } } } blis-0.6.1/kernels/zen/1f/000077500000000000000000000000001360743507500152265ustar00rootroot00000000000000blis-0.6.1/kernels/zen/1f/bli_axpyf_zen_int_8.c000066400000000000000000000315221360743507500213270ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2017 - 2019, Advanced Micro Devices, Inc. Copyright (C) 2018, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "immintrin.h" #include "blis.h" /* Union data structure to access AVX registers One 256-bit AVX register holds 8 SP elements. */ typedef union { __m256 v; float f[8] __attribute__((aligned(64))); } v8sf_t; /* Union data structure to access AVX registers * One 256-bit AVX register holds 4 DP elements. */ typedef union { __m256d v; double d[4] __attribute__((aligned(64))); } v4df_t; // ----------------------------------------------------------------------------- void bli_saxpyf_zen_int_8 ( conj_t conja, conj_t conjx, dim_t m, dim_t b_n, float* restrict alpha, float* restrict a, inc_t inca, inc_t lda, float* restrict x, inc_t incx, float* restrict y, inc_t incy, cntx_t* restrict cntx ) { const dim_t fuse_fac = 8; const dim_t n_elem_per_reg = 8; const dim_t n_iter_unroll = 1; dim_t i; dim_t m_viter; dim_t m_left; float* restrict a0; float* restrict a1; float* restrict a2; float* restrict a3; float* restrict a4; float* restrict a5; float* restrict a6; float* restrict a7; float* restrict y0; v8sf_t chi0v, chi1v, chi2v, chi3v; v8sf_t chi4v, chi5v, chi6v, chi7v; v8sf_t a0v, a1v, a2v, a3v; v8sf_t a4v, a5v, a6v, a7v; v8sf_t y0v; float chi0, chi1, chi2, chi3; float chi4, chi5, chi6, chi7; // If either dimension is zero, or if alpha is zero, return early. if ( bli_zero_dim2( m, b_n ) || PASTEMAC(s,eq0)( *alpha ) ) return; // If b_n is not equal to the fusing factor, then perform the entire // operation as a loop over axpyv. if ( b_n != fuse_fac ) { saxpyv_ker_ft f = bli_cntx_get_l1v_ker_dt( BLIS_FLOAT, BLIS_AXPYV_KER, cntx ); for ( i = 0; i < b_n; ++i ) { float* a1 = a + (0 )*inca + (i )*lda; float* chi1 = x + (i )*incx; float* y1 = y + (0 )*incy; float alpha_chi1; PASTEMAC(s,copycjs)( conjx, *chi1, alpha_chi1 ); PASTEMAC(s,scals)( *alpha, alpha_chi1 ); f ( conja, m, &alpha_chi1, a1, inca, y1, incy, cntx ); } return; } // At this point, we know that b_n is exactly equal to the fusing factor. // Use the unrolling factor and the number of elements per register // to compute the number of vectorized and leftover iterations. m_viter = ( m ) / ( n_elem_per_reg * n_iter_unroll ); m_left = ( m ) % ( n_elem_per_reg * n_iter_unroll ); // If there is anything that would interfere with our use of contiguous // vector loads/stores, override m_viter and m_left to use scalar code // for all iterations. if ( inca != 1 || incy != 1 ) { m_viter = 0; m_left = m; } a0 = a + 0*lda; a1 = a + 1*lda; a2 = a + 2*lda; a3 = a + 3*lda; a4 = a + 4*lda; a5 = a + 5*lda; a6 = a + 6*lda; a7 = a + 7*lda; y0 = y; chi0 = *( x + 0*incx ); chi1 = *( x + 1*incx ); chi2 = *( x + 2*incx ); chi3 = *( x + 3*incx ); chi4 = *( x + 4*incx ); chi5 = *( x + 5*incx ); chi6 = *( x + 6*incx ); chi7 = *( x + 7*incx ); // Scale each chi scalar by alpha. PASTEMAC(s,scals)( *alpha, chi0 ); PASTEMAC(s,scals)( *alpha, chi1 ); PASTEMAC(s,scals)( *alpha, chi2 ); PASTEMAC(s,scals)( *alpha, chi3 ); PASTEMAC(s,scals)( *alpha, chi4 ); PASTEMAC(s,scals)( *alpha, chi5 ); PASTEMAC(s,scals)( *alpha, chi6 ); PASTEMAC(s,scals)( *alpha, chi7 ); // Broadcast the (alpha*chi?) scalars to all elements of vector registers. chi0v.v = _mm256_broadcast_ss( &chi0 ); chi1v.v = _mm256_broadcast_ss( &chi1 ); chi2v.v = _mm256_broadcast_ss( &chi2 ); chi3v.v = _mm256_broadcast_ss( &chi3 ); chi4v.v = _mm256_broadcast_ss( &chi4 ); chi5v.v = _mm256_broadcast_ss( &chi5 ); chi6v.v = _mm256_broadcast_ss( &chi6 ); chi7v.v = _mm256_broadcast_ss( &chi7 ); // If there are vectorized iterations, perform them with vector // instructions. for ( i = 0; i < m_viter; ++i ) { // Load the input values. y0v.v = _mm256_loadu_ps( y0 + 0*n_elem_per_reg ); a0v.v = _mm256_loadu_ps( a0 + 0*n_elem_per_reg ); a1v.v = _mm256_loadu_ps( a1 + 0*n_elem_per_reg ); a2v.v = _mm256_loadu_ps( a2 + 0*n_elem_per_reg ); a3v.v = _mm256_loadu_ps( a3 + 0*n_elem_per_reg ); a4v.v = _mm256_loadu_ps( a4 + 0*n_elem_per_reg ); a5v.v = _mm256_loadu_ps( a5 + 0*n_elem_per_reg ); a6v.v = _mm256_loadu_ps( a6 + 0*n_elem_per_reg ); a7v.v = _mm256_loadu_ps( a7 + 0*n_elem_per_reg ); // perform : y += alpha * x; y0v.v = _mm256_fmadd_ps( a0v.v, chi0v.v, y0v.v ); y0v.v = _mm256_fmadd_ps( a1v.v, chi1v.v, y0v.v ); y0v.v = _mm256_fmadd_ps( a2v.v, chi2v.v, y0v.v ); y0v.v = _mm256_fmadd_ps( a3v.v, chi3v.v, y0v.v ); y0v.v = _mm256_fmadd_ps( a4v.v, chi4v.v, y0v.v ); y0v.v = _mm256_fmadd_ps( a5v.v, chi5v.v, y0v.v ); y0v.v = _mm256_fmadd_ps( a6v.v, chi6v.v, y0v.v ); y0v.v = _mm256_fmadd_ps( a7v.v, chi7v.v, y0v.v ); // Store the output. _mm256_storeu_ps( (y0 + 0*n_elem_per_reg), y0v.v ); y0 += n_elem_per_reg; a0 += n_elem_per_reg; a1 += n_elem_per_reg; a2 += n_elem_per_reg; a3 += n_elem_per_reg; a4 += n_elem_per_reg; a5 += n_elem_per_reg; a6 += n_elem_per_reg; a7 += n_elem_per_reg; } // If there are leftover iterations, perform them with scalar code. for ( i = 0; i < m_left ; ++i ) { float y0c = *y0; const float a0c = *a0; const float a1c = *a1; const float a2c = *a2; const float a3c = *a3; const float a4c = *a4; const float a5c = *a5; const float a6c = *a6; const float a7c = *a7; y0c += chi0 * a0c; y0c += chi1 * a1c; y0c += chi2 * a2c; y0c += chi3 * a3c; y0c += chi4 * a4c; y0c += chi5 * a5c; y0c += chi6 * a6c; y0c += chi7 * a7c; *y0 = y0c; a0 += inca; a1 += inca; a2 += inca; a3 += inca; a4 += inca; a5 += inca; a6 += inca; a7 += inca; y0 += incy; } } // ----------------------------------------------------------------------------- void bli_daxpyf_zen_int_8 ( conj_t conja, conj_t conjx, dim_t m, dim_t b_n, double* restrict alpha, double* restrict a, inc_t inca, inc_t lda, double* restrict x, inc_t incx, double* restrict y, inc_t incy, cntx_t* restrict cntx ) { const dim_t fuse_fac = 8; const dim_t n_elem_per_reg = 4; const dim_t n_iter_unroll = 1; dim_t i; dim_t m_viter; dim_t m_left; double* restrict a0; double* restrict a1; double* restrict a2; double* restrict a3; double* restrict a4; double* restrict a5; double* restrict a6; double* restrict a7; double* restrict y0; v4df_t chi0v, chi1v, chi2v, chi3v; v4df_t chi4v, chi5v, chi6v, chi7v; v4df_t a0v, a1v, a2v, a3v; v4df_t a4v, a5v, a6v, a7v; v4df_t y0v; double chi0, chi1, chi2, chi3; double chi4, chi5, chi6, chi7; // If either dimension is zero, or if alpha is zero, return early. if ( bli_zero_dim2( m, b_n ) || PASTEMAC(d,eq0)( *alpha ) ) return; // If b_n is not equal to the fusing factor, then perform the entire // operation as a loop over axpyv. if ( b_n != fuse_fac ) { daxpyv_ker_ft f = bli_cntx_get_l1v_ker_dt( BLIS_DOUBLE, BLIS_AXPYV_KER, cntx ); for ( i = 0; i < b_n; ++i ) { double* a1 = a + (0 )*inca + (i )*lda; double* chi1 = x + (i )*incx; double* y1 = y + (0 )*incy; double alpha_chi1; PASTEMAC(d,copycjs)( conjx, *chi1, alpha_chi1 ); PASTEMAC(d,scals)( *alpha, alpha_chi1 ); f ( conja, m, &alpha_chi1, a1, inca, y1, incy, cntx ); } return; } // At this point, we know that b_n is exactly equal to the fusing factor. // Use the unrolling factor and the number of elements per register // to compute the number of vectorized and leftover iterations. m_viter = ( m ) / ( n_elem_per_reg * n_iter_unroll ); m_left = ( m ) % ( n_elem_per_reg * n_iter_unroll ); // If there is anything that would interfere with our use of contiguous // vector loads/stores, override m_viter and m_left to use scalar code // for all iterations. if ( inca != 1 || incy != 1 ) { m_viter = 0; m_left = m; } a0 = a + 0*lda; a1 = a + 1*lda; a2 = a + 2*lda; a3 = a + 3*lda; a4 = a + 4*lda; a5 = a + 5*lda; a6 = a + 6*lda; a7 = a + 7*lda; y0 = y; chi0 = *( x + 0*incx ); chi1 = *( x + 1*incx ); chi2 = *( x + 2*incx ); chi3 = *( x + 3*incx ); chi4 = *( x + 4*incx ); chi5 = *( x + 5*incx ); chi6 = *( x + 6*incx ); chi7 = *( x + 7*incx ); // Scale each chi scalar by alpha. PASTEMAC(d,scals)( *alpha, chi0 ); PASTEMAC(d,scals)( *alpha, chi1 ); PASTEMAC(d,scals)( *alpha, chi2 ); PASTEMAC(d,scals)( *alpha, chi3 ); PASTEMAC(d,scals)( *alpha, chi4 ); PASTEMAC(d,scals)( *alpha, chi5 ); PASTEMAC(d,scals)( *alpha, chi6 ); PASTEMAC(d,scals)( *alpha, chi7 ); // Broadcast the (alpha*chi?) scalars to all elements of vector registers. chi0v.v = _mm256_broadcast_sd( &chi0 ); chi1v.v = _mm256_broadcast_sd( &chi1 ); chi2v.v = _mm256_broadcast_sd( &chi2 ); chi3v.v = _mm256_broadcast_sd( &chi3 ); chi4v.v = _mm256_broadcast_sd( &chi4 ); chi5v.v = _mm256_broadcast_sd( &chi5 ); chi6v.v = _mm256_broadcast_sd( &chi6 ); chi7v.v = _mm256_broadcast_sd( &chi7 ); // If there are vectorized iterations, perform them with vector // instructions. for ( i = 0; i < m_viter; ++i ) { // Load the input values. y0v.v = _mm256_loadu_pd( y0 + 0*n_elem_per_reg ); a0v.v = _mm256_loadu_pd( a0 + 0*n_elem_per_reg ); a1v.v = _mm256_loadu_pd( a1 + 0*n_elem_per_reg ); a2v.v = _mm256_loadu_pd( a2 + 0*n_elem_per_reg ); a3v.v = _mm256_loadu_pd( a3 + 0*n_elem_per_reg ); a4v.v = _mm256_loadu_pd( a4 + 0*n_elem_per_reg ); a5v.v = _mm256_loadu_pd( a5 + 0*n_elem_per_reg ); a6v.v = _mm256_loadu_pd( a6 + 0*n_elem_per_reg ); a7v.v = _mm256_loadu_pd( a7 + 0*n_elem_per_reg ); // perform : y += alpha * x; y0v.v = _mm256_fmadd_pd( a0v.v, chi0v.v, y0v.v ); y0v.v = _mm256_fmadd_pd( a1v.v, chi1v.v, y0v.v ); y0v.v = _mm256_fmadd_pd( a2v.v, chi2v.v, y0v.v ); y0v.v = _mm256_fmadd_pd( a3v.v, chi3v.v, y0v.v ); y0v.v = _mm256_fmadd_pd( a4v.v, chi4v.v, y0v.v ); y0v.v = _mm256_fmadd_pd( a5v.v, chi5v.v, y0v.v ); y0v.v = _mm256_fmadd_pd( a6v.v, chi6v.v, y0v.v ); y0v.v = _mm256_fmadd_pd( a7v.v, chi7v.v, y0v.v ); // Store the output. _mm256_storeu_pd( (y0 + 0*n_elem_per_reg), y0v.v ); y0 += n_elem_per_reg; a0 += n_elem_per_reg; a1 += n_elem_per_reg; a2 += n_elem_per_reg; a3 += n_elem_per_reg; a4 += n_elem_per_reg; a5 += n_elem_per_reg; a6 += n_elem_per_reg; a7 += n_elem_per_reg; } // If there are leftover iterations, perform them with scalar code. for ( i = 0; i < m_left ; ++i ) { double y0c = *y0; const double a0c = *a0; const double a1c = *a1; const double a2c = *a2; const double a3c = *a3; const double a4c = *a4; const double a5c = *a5; const double a6c = *a6; const double a7c = *a7; y0c += chi0 * a0c; y0c += chi1 * a1c; y0c += chi2 * a2c; y0c += chi3 * a3c; y0c += chi4 * a4c; y0c += chi5 * a5c; y0c += chi6 * a6c; y0c += chi7 * a7c; *y0 = y0c; a0 += inca; a1 += inca; a2 += inca; a3 += inca; a4 += inca; a5 += inca; a6 += inca; a7 += inca; y0 += incy; } } blis-0.6.1/kernels/zen/1f/bli_dotxf_zen_int_8.c000066400000000000000000000644211360743507500213300ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2017 - 2019, Advanced Micro Devices, Inc. Copyright (C) 2018, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "immintrin.h" #include "blis.h" /* Union data structure to access AVX registers One 256-bit AVX register holds 8 SP elements. */ typedef union { __m256 v; float f[8] __attribute__((aligned(64))); } v8sf_t; /* Union data structure to access AVX registers * One 256-bit AVX register holds 4 DP elements. */ typedef union { __m256d v; double d[4] __attribute__((aligned(64))); } v4df_t; // ----------------------------------------------------------------------------- void bli_sdotxf_zen_int_8 ( conj_t conjat, conj_t conjx, dim_t m, dim_t b_n, float* restrict alpha, float* restrict a, inc_t inca, inc_t lda, float* restrict x, inc_t incx, float* restrict beta, float* restrict y, inc_t incy, cntx_t* restrict cntx ) { const dim_t fuse_fac = 8; const dim_t n_elem_per_reg = 8; // If the b_n dimension is zero, y is empty and there is no computation. if ( bli_zero_dim1( b_n ) ) return; // If the m dimension is zero, or if alpha is zero, the computation // simplifies to updating y. if ( bli_zero_dim1( m ) || PASTEMAC(s,eq0)( *alpha ) ) { sscalv_ker_ft f = bli_cntx_get_l1v_ker_dt( BLIS_FLOAT, BLIS_SCALV_KER, cntx ); f ( BLIS_NO_CONJUGATE, b_n, beta, y, incy, cntx ); return; } // If b_n is not equal to the fusing factor, then perform the entire // operation as a loop over dotxv. if ( b_n != fuse_fac ) { sdotxv_ker_ft f = bli_cntx_get_l1v_ker_dt( BLIS_FLOAT, BLIS_DOTXV_KER, cntx ); for ( dim_t i = 0; i < b_n; ++i ) { float* a1 = a + (0 )*inca + (i )*lda; float* x1 = x + (0 )*incx; float* psi1 = y + (i )*incy; f ( conjat, conjx, m, alpha, a1, inca, x1, incx, beta, psi1, cntx ); } return; } // At this point, we know that b_n is exactly equal to the fusing factor. // However, m may not be a multiple of the number of elements per vector. // Going forward, we handle two possible storage formats of A explicitly: // (1) A is stored by columns, or (2) A is stored by rows. Either case is // further split into two subproblems along the m dimension: // (a) a vectorized part, starting at m = 0 and ending at any 0 <= m' <= m. // (b) a scalar part, starting at m' and ending at m. If no vectorization // is possible then m' == 0 and thus the scalar part is the entire // problem. If 0 < m', then the a and x pointers and m variable will // be adjusted accordingly for the second subproblem. // Note: since parts (b) for both (1) and (2) are so similar, they are // factored out into one code block after the following conditional, which // distinguishes between (1) and (2). // Intermediate variables to hold the completed dot products float rho0 = 0, rho1 = 0, rho2 = 0, rho3 = 0, rho4 = 0, rho5 = 0, rho6 = 0, rho7 = 0; if ( inca == 1 && incx == 1 ) { const dim_t n_iter_unroll = 1; // Use the unrolling factor and the number of elements per register // to compute the number of vectorized and leftover iterations. dim_t m_viter = ( m ) / ( n_elem_per_reg * n_iter_unroll ); // Set up pointers for x and the b_n columns of A (rows of A^T). float* restrict x0 = x; float* restrict a0 = a + 0*lda; float* restrict a1 = a + 1*lda; float* restrict a2 = a + 2*lda; float* restrict a3 = a + 3*lda; float* restrict a4 = a + 4*lda; float* restrict a5 = a + 5*lda; float* restrict a6 = a + 6*lda; float* restrict a7 = a + 7*lda; // Initialize b_n rho vector accumulators to zero. v8sf_t rho0v; rho0v.v = _mm256_setzero_ps(); v8sf_t rho1v; rho1v.v = _mm256_setzero_ps(); v8sf_t rho2v; rho2v.v = _mm256_setzero_ps(); v8sf_t rho3v; rho3v.v = _mm256_setzero_ps(); v8sf_t rho4v; rho4v.v = _mm256_setzero_ps(); v8sf_t rho5v; rho5v.v = _mm256_setzero_ps(); v8sf_t rho6v; rho6v.v = _mm256_setzero_ps(); v8sf_t rho7v; rho7v.v = _mm256_setzero_ps(); v8sf_t x0v; v8sf_t a0v, a1v, a2v, a3v, a4v, a5v, a6v, a7v; // If there are vectorized iterations, perform them with vector // instructions. for ( dim_t i = 0; i < m_viter; ++i ) { // Load the input values. x0v.v = _mm256_loadu_ps( x0 + 0*n_elem_per_reg ); a0v.v = _mm256_loadu_ps( a0 + 0*n_elem_per_reg ); a1v.v = _mm256_loadu_ps( a1 + 0*n_elem_per_reg ); a2v.v = _mm256_loadu_ps( a2 + 0*n_elem_per_reg ); a3v.v = _mm256_loadu_ps( a3 + 0*n_elem_per_reg ); a4v.v = _mm256_loadu_ps( a4 + 0*n_elem_per_reg ); a5v.v = _mm256_loadu_ps( a5 + 0*n_elem_per_reg ); a6v.v = _mm256_loadu_ps( a6 + 0*n_elem_per_reg ); a7v.v = _mm256_loadu_ps( a7 + 0*n_elem_per_reg ); // perform: rho?v += a?v * x0v; rho0v.v = _mm256_fmadd_ps( a0v.v, x0v.v, rho0v.v ); rho1v.v = _mm256_fmadd_ps( a1v.v, x0v.v, rho1v.v ); rho2v.v = _mm256_fmadd_ps( a2v.v, x0v.v, rho2v.v ); rho3v.v = _mm256_fmadd_ps( a3v.v, x0v.v, rho3v.v ); rho4v.v = _mm256_fmadd_ps( a4v.v, x0v.v, rho4v.v ); rho5v.v = _mm256_fmadd_ps( a5v.v, x0v.v, rho5v.v ); rho6v.v = _mm256_fmadd_ps( a6v.v, x0v.v, rho6v.v ); rho7v.v = _mm256_fmadd_ps( a7v.v, x0v.v, rho7v.v ); x0 += n_elem_per_reg * n_iter_unroll; a0 += n_elem_per_reg * n_iter_unroll; a1 += n_elem_per_reg * n_iter_unroll; a2 += n_elem_per_reg * n_iter_unroll; a3 += n_elem_per_reg * n_iter_unroll; a4 += n_elem_per_reg * n_iter_unroll; a5 += n_elem_per_reg * n_iter_unroll; a6 += n_elem_per_reg * n_iter_unroll; a7 += n_elem_per_reg * n_iter_unroll; } #if 0 rho0 += rho0v.f[0] + rho0v.f[1] + rho0v.f[2] + rho0v.f[3] + rho0v.f[4] + rho0v.f[5] + rho0v.f[6] + rho0v.f[7]; rho1 += rho1v.f[0] + rho1v.f[1] + rho1v.f[2] + rho1v.f[3] + rho1v.f[4] + rho1v.f[5] + rho1v.f[6] + rho1v.f[7]; rho2 += rho2v.f[0] + rho2v.f[1] + rho2v.f[2] + rho2v.f[3] + rho2v.f[4] + rho2v.f[5] + rho2v.f[6] + rho2v.f[7]; rho3 += rho3v.f[0] + rho3v.f[1] + rho3v.f[2] + rho3v.f[3] + rho3v.f[4] + rho3v.f[5] + rho3v.f[6] + rho3v.f[7]; rho4 += rho4v.f[0] + rho4v.f[1] + rho4v.f[2] + rho4v.f[3] + rho4v.f[4] + rho4v.f[5] + rho4v.f[6] + rho4v.f[7]; rho5 += rho5v.f[0] + rho5v.f[1] + rho5v.f[2] + rho5v.f[3] + rho5v.f[4] + rho5v.f[5] + rho5v.f[6] + rho5v.f[7]; rho6 += rho6v.f[0] + rho6v.f[1] + rho6v.f[2] + rho6v.f[3] + rho6v.f[4] + rho6v.f[5] + rho6v.f[6] + rho6v.f[7]; rho7 += rho7v.f[0] + rho7v.f[1] + rho7v.f[2] + rho7v.f[3] + rho7v.f[4] + rho7v.f[5] + rho7v.f[6] + rho7v.f[7]; #else // Now we need to sum the elements within each vector. v8sf_t onev; onev.v = _mm256_set1_ps( 1.0f ); // Sum the elements of a given rho?v by dotting it with 1. The '1' in // '0xf1' stores the sum of the upper four and lower four values to // the low elements of each lane: elements 4 and 0, respectively. (The // 'f' in '0xf1' means include all four elements of each lane in the // summation.) rho0v.v = _mm256_dp_ps( rho0v.v, onev.v, 0xf1 ); rho1v.v = _mm256_dp_ps( rho1v.v, onev.v, 0xf1 ); rho2v.v = _mm256_dp_ps( rho2v.v, onev.v, 0xf1 ); rho3v.v = _mm256_dp_ps( rho3v.v, onev.v, 0xf1 ); rho4v.v = _mm256_dp_ps( rho4v.v, onev.v, 0xf1 ); rho5v.v = _mm256_dp_ps( rho5v.v, onev.v, 0xf1 ); rho6v.v = _mm256_dp_ps( rho6v.v, onev.v, 0xf1 ); rho7v.v = _mm256_dp_ps( rho7v.v, onev.v, 0xf1 ); // Manually add the results from above to finish the sum. rho0 = rho0v.f[0] + rho0v.f[4]; rho1 = rho1v.f[0] + rho1v.f[4]; rho2 = rho2v.f[0] + rho2v.f[4]; rho3 = rho3v.f[0] + rho3v.f[4]; rho4 = rho4v.f[0] + rho4v.f[4]; rho5 = rho5v.f[0] + rho5v.f[4]; rho6 = rho6v.f[0] + rho6v.f[4]; rho7 = rho7v.f[0] + rho7v.f[4]; #endif // Adjust for scalar subproblem. m -= n_elem_per_reg * n_iter_unroll * m_viter; a += n_elem_per_reg * n_iter_unroll * m_viter /* * inca */; x += n_elem_per_reg * n_iter_unroll * m_viter /* * incx */; } else if ( lda == 1 ) { const dim_t n_iter_unroll = 4; // Use the unrolling factor and the number of elements per register // to compute the number of vectorized and leftover iterations. dim_t m_viter = ( m ) / ( n_iter_unroll ); // Initialize pointers for x and A. float* restrict x0 = x; float* restrict a0 = a; // Initialize rho vector accumulators to zero. v8sf_t rho0v; rho0v.v = _mm256_setzero_ps(); v8sf_t rho1v; rho1v.v = _mm256_setzero_ps(); v8sf_t rho2v; rho2v.v = _mm256_setzero_ps(); v8sf_t rho3v; rho3v.v = _mm256_setzero_ps(); v8sf_t x0v, x1v, x2v, x3v; v8sf_t a0v, a1v, a2v, a3v; for ( dim_t i = 0; i < m_viter; ++i ) { // Load the input values. a0v.v = _mm256_loadu_ps( a0 + 0*inca ); a1v.v = _mm256_loadu_ps( a0 + 1*inca ); a2v.v = _mm256_loadu_ps( a0 + 2*inca ); a3v.v = _mm256_loadu_ps( a0 + 3*inca ); x0v.v = _mm256_broadcast_ss( x0 + 0*incx ); x1v.v = _mm256_broadcast_ss( x0 + 1*incx ); x2v.v = _mm256_broadcast_ss( x0 + 2*incx ); x3v.v = _mm256_broadcast_ss( x0 + 3*incx ); // perform : rho?v += a?v * x?v; rho0v.v = _mm256_fmadd_ps( a0v.v, x0v.v, rho0v.v ); rho1v.v = _mm256_fmadd_ps( a1v.v, x1v.v, rho1v.v ); rho2v.v = _mm256_fmadd_ps( a2v.v, x2v.v, rho2v.v ); rho3v.v = _mm256_fmadd_ps( a3v.v, x3v.v, rho3v.v ); x0 += incx * n_iter_unroll; a0 += inca * n_iter_unroll; } // Combine the 8 accumulators into one vector register. rho0v.v = _mm256_add_ps( rho0v.v, rho1v.v ); rho2v.v = _mm256_add_ps( rho2v.v, rho3v.v ); rho0v.v = _mm256_add_ps( rho0v.v, rho2v.v ); // Write vector components to scalar values. rho0 = rho0v.f[0]; rho1 = rho0v.f[1]; rho2 = rho0v.f[2]; rho3 = rho0v.f[3]; rho4 = rho0v.f[4]; rho5 = rho0v.f[5]; rho6 = rho0v.f[6]; rho7 = rho0v.f[7]; // Adjust for scalar subproblem. m -= n_iter_unroll * m_viter; a += n_iter_unroll * m_viter * inca; x += n_iter_unroll * m_viter * incx; } else { // No vectorization possible; use scalar iterations for the entire // problem. } // Scalar edge case. { // Initialize pointers for x and the b_n columns of A (rows of A^T). float* restrict x0 = x; float* restrict a0 = a + 0*lda; float* restrict a1 = a + 1*lda; float* restrict a2 = a + 2*lda; float* restrict a3 = a + 3*lda; float* restrict a4 = a + 4*lda; float* restrict a5 = a + 5*lda; float* restrict a6 = a + 6*lda; float* restrict a7 = a + 7*lda; // If there are leftover iterations, perform them with scalar code. for ( dim_t i = 0; i < m ; ++i ) { const float x0c = *x0; const float a0c = *a0; const float a1c = *a1; const float a2c = *a2; const float a3c = *a3; const float a4c = *a4; const float a5c = *a5; const float a6c = *a6; const float a7c = *a7; rho0 += a0c * x0c; rho1 += a1c * x0c; rho2 += a2c * x0c; rho3 += a3c * x0c; rho4 += a4c * x0c; rho5 += a5c * x0c; rho6 += a6c * x0c; rho7 += a7c * x0c; x0 += incx; a0 += inca; a1 += inca; a2 += inca; a3 += inca; a4 += inca; a5 += inca; a6 += inca; a7 += inca; } } // Now prepare the final rho values to output/accumulate back into // the y vector. v8sf_t rho0v, y0v; // Insert the scalar rho values into a single vector. rho0v.f[0] = rho0; rho0v.f[1] = rho1; rho0v.f[2] = rho2; rho0v.f[3] = rho3; rho0v.f[4] = rho4; rho0v.f[5] = rho5; rho0v.f[6] = rho6; rho0v.f[7] = rho7; // Broadcast the alpha scalar. v8sf_t alphav; alphav.v = _mm256_broadcast_ss( alpha ); // We know at this point that alpha is nonzero; however, beta may still // be zero. If beta is indeed zero, we must overwrite y rather than scale // by beta (in case y contains NaN or Inf). if ( PASTEMAC(s,eq0)( *beta ) ) { // Apply alpha to the accumulated dot product in rho: // y := alpha * rho y0v.v = _mm256_mul_ps( alphav.v, rho0v.v ); } else { // Broadcast the beta scalar. v8sf_t betav; betav.v = _mm256_broadcast_ss( beta ); // Load y. if ( incy == 1 ) { y0v.v = _mm256_loadu_ps( y + 0*n_elem_per_reg ); } else { y0v.f[0] = *(y + 0*incy); y0v.f[1] = *(y + 1*incy); y0v.f[2] = *(y + 2*incy); y0v.f[3] = *(y + 3*incy); y0v.f[4] = *(y + 4*incy); y0v.f[5] = *(y + 5*incy); y0v.f[6] = *(y + 6*incy); y0v.f[7] = *(y + 7*incy); } // Apply beta to y and alpha to the accumulated dot product in rho: // y := beta * y + alpha * rho y0v.v = _mm256_mul_ps( betav.v, y0v.v ); y0v.v = _mm256_fmadd_ps( alphav.v, rho0v.v, y0v.v ); } // Store the output. if ( incy == 1 ) { _mm256_storeu_ps( (y + 0*n_elem_per_reg), y0v.v ); } else { *(y + 0*incy) = y0v.f[0]; *(y + 1*incy) = y0v.f[1]; *(y + 2*incy) = y0v.f[2]; *(y + 3*incy) = y0v.f[3]; *(y + 4*incy) = y0v.f[4]; *(y + 5*incy) = y0v.f[5]; *(y + 6*incy) = y0v.f[6]; *(y + 7*incy) = y0v.f[7]; } } // ----------------------------------------------------------------------------- void bli_ddotxf_zen_int_8 ( conj_t conjat, conj_t conjx, dim_t m, dim_t b_n, double* restrict alpha, double* restrict a, inc_t inca, inc_t lda, double* restrict x, inc_t incx, double* restrict beta, double* restrict y, inc_t incy, cntx_t* restrict cntx ) { const dim_t fuse_fac = 8; const dim_t n_elem_per_reg = 4; // If the b_n dimension is zero, y is empty and there is no computation. if ( bli_zero_dim1( b_n ) ) return; // If the m dimension is zero, or if alpha is zero, the computation // simplifies to updating y. if ( bli_zero_dim1( m ) || PASTEMAC(d,eq0)( *alpha ) ) { dscalv_ker_ft f = bli_cntx_get_l1v_ker_dt( BLIS_DOUBLE, BLIS_SCALV_KER, cntx ); f ( BLIS_NO_CONJUGATE, b_n, beta, y, incy, cntx ); return; } // If b_n is not equal to the fusing factor, then perform the entire // operation as a loop over dotxv. if ( b_n != fuse_fac ) { ddotxv_ker_ft f = bli_cntx_get_l1v_ker_dt( BLIS_DOUBLE, BLIS_DOTXV_KER, cntx ); for ( dim_t i = 0; i < b_n; ++i ) { double* a1 = a + (0 )*inca + (i )*lda; double* x1 = x + (0 )*incx; double* psi1 = y + (i )*incy; f ( conjat, conjx, m, alpha, a1, inca, x1, incx, beta, psi1, cntx ); } return; } // At this point, we know that b_n is exactly equal to the fusing factor. // However, m may not be a multiple of the number of elements per vector. // Going forward, we handle two possible storage formats of A explicitly: // (1) A is stored by columns, or (2) A is stored by rows. Either case is // further split into two subproblems along the m dimension: // (a) a vectorized part, starting at m = 0 and ending at any 0 <= m' <= m. // (b) a scalar part, starting at m' and ending at m. If no vectorization // is possible then m' == 0 and thus the scalar part is the entire // problem. If 0 < m', then the a and x pointers and m variable will // be adjusted accordingly for the second subproblem. // Note: since parts (b) for both (1) and (2) are so similar, they are // factored out into one code block after the following conditional, which // distinguishes between (1) and (2). // Intermediate variables to hold the completed dot products double rho0 = 0, rho1 = 0, rho2 = 0, rho3 = 0, rho4 = 0, rho5 = 0, rho6 = 0, rho7 = 0; if ( inca == 1 && incx == 1 ) { const dim_t n_iter_unroll = 1; // Use the unrolling factor and the number of elements per register // to compute the number of vectorized and leftover iterations. dim_t m_viter = ( m ) / ( n_elem_per_reg * n_iter_unroll ); // Set up pointers for x and the b_n columns of A (rows of A^T). double* restrict x0 = x; double* restrict a0 = a + 0*lda; double* restrict a1 = a + 1*lda; double* restrict a2 = a + 2*lda; double* restrict a3 = a + 3*lda; double* restrict a4 = a + 4*lda; double* restrict a5 = a + 5*lda; double* restrict a6 = a + 6*lda; double* restrict a7 = a + 7*lda; // Initialize b_n rho vector accumulators to zero. v4df_t rho0v; rho0v.v = _mm256_setzero_pd(); v4df_t rho1v; rho1v.v = _mm256_setzero_pd(); v4df_t rho2v; rho2v.v = _mm256_setzero_pd(); v4df_t rho3v; rho3v.v = _mm256_setzero_pd(); v4df_t rho4v; rho4v.v = _mm256_setzero_pd(); v4df_t rho5v; rho5v.v = _mm256_setzero_pd(); v4df_t rho6v; rho6v.v = _mm256_setzero_pd(); v4df_t rho7v; rho7v.v = _mm256_setzero_pd(); v4df_t x0v; v4df_t a0v, a1v, a2v, a3v, a4v, a5v, a6v, a7v; // If there are vectorized iterations, perform them with vector // instructions. for ( dim_t i = 0; i < m_viter; ++i ) { // Load the input values. x0v.v = _mm256_loadu_pd( x0 + 0*n_elem_per_reg ); a0v.v = _mm256_loadu_pd( a0 + 0*n_elem_per_reg ); a1v.v = _mm256_loadu_pd( a1 + 0*n_elem_per_reg ); a2v.v = _mm256_loadu_pd( a2 + 0*n_elem_per_reg ); a3v.v = _mm256_loadu_pd( a3 + 0*n_elem_per_reg ); a4v.v = _mm256_loadu_pd( a4 + 0*n_elem_per_reg ); a5v.v = _mm256_loadu_pd( a5 + 0*n_elem_per_reg ); a6v.v = _mm256_loadu_pd( a6 + 0*n_elem_per_reg ); a7v.v = _mm256_loadu_pd( a7 + 0*n_elem_per_reg ); // perform: rho?v += a?v * x0v; rho0v.v = _mm256_fmadd_pd( a0v.v, x0v.v, rho0v.v ); rho1v.v = _mm256_fmadd_pd( a1v.v, x0v.v, rho1v.v ); rho2v.v = _mm256_fmadd_pd( a2v.v, x0v.v, rho2v.v ); rho3v.v = _mm256_fmadd_pd( a3v.v, x0v.v, rho3v.v ); rho4v.v = _mm256_fmadd_pd( a4v.v, x0v.v, rho4v.v ); rho5v.v = _mm256_fmadd_pd( a5v.v, x0v.v, rho5v.v ); rho6v.v = _mm256_fmadd_pd( a6v.v, x0v.v, rho6v.v ); rho7v.v = _mm256_fmadd_pd( a7v.v, x0v.v, rho7v.v ); x0 += n_elem_per_reg * n_iter_unroll; a0 += n_elem_per_reg * n_iter_unroll; a1 += n_elem_per_reg * n_iter_unroll; a2 += n_elem_per_reg * n_iter_unroll; a3 += n_elem_per_reg * n_iter_unroll; a4 += n_elem_per_reg * n_iter_unroll; a5 += n_elem_per_reg * n_iter_unroll; a6 += n_elem_per_reg * n_iter_unroll; a7 += n_elem_per_reg * n_iter_unroll; } #if 0 rho0 += rho0v.d[0] + rho0v.d[1] + rho0v.d[2] + rho0v.d[3]; rho1 += rho1v.d[0] + rho1v.d[1] + rho1v.d[2] + rho1v.d[3]; rho2 += rho2v.d[0] + rho2v.d[1] + rho2v.d[2] + rho2v.d[3]; rho3 += rho3v.d[0] + rho3v.d[1] + rho3v.d[2] + rho3v.d[3]; rho4 += rho4v.d[0] + rho4v.d[1] + rho4v.d[2] + rho4v.d[3]; rho5 += rho5v.d[0] + rho5v.d[1] + rho5v.d[2] + rho5v.d[3]; rho6 += rho6v.d[0] + rho6v.d[1] + rho6v.d[2] + rho6v.d[3]; rho7 += rho7v.d[0] + rho7v.d[1] + rho7v.d[2] + rho7v.d[3]; #else // Sum the elements of a given rho?v. This computes the sum of // elements within lanes and stores the sum to both elements. rho0v.v = _mm256_hadd_pd( rho0v.v, rho0v.v ); rho1v.v = _mm256_hadd_pd( rho1v.v, rho1v.v ); rho2v.v = _mm256_hadd_pd( rho2v.v, rho2v.v ); rho3v.v = _mm256_hadd_pd( rho3v.v, rho3v.v ); rho4v.v = _mm256_hadd_pd( rho4v.v, rho4v.v ); rho5v.v = _mm256_hadd_pd( rho5v.v, rho5v.v ); rho6v.v = _mm256_hadd_pd( rho6v.v, rho6v.v ); rho7v.v = _mm256_hadd_pd( rho7v.v, rho7v.v ); // Manually add the results from above to finish the sum. rho0 = rho0v.d[0] + rho0v.d[2]; rho1 = rho1v.d[0] + rho1v.d[2]; rho2 = rho2v.d[0] + rho2v.d[2]; rho3 = rho3v.d[0] + rho3v.d[2]; rho4 = rho4v.d[0] + rho4v.d[2]; rho5 = rho5v.d[0] + rho5v.d[2]; rho6 = rho6v.d[0] + rho6v.d[2]; rho7 = rho7v.d[0] + rho7v.d[2]; #endif // Adjust for scalar subproblem. m -= n_elem_per_reg * n_iter_unroll * m_viter; a += n_elem_per_reg * n_iter_unroll * m_viter /* * inca */; x += n_elem_per_reg * n_iter_unroll * m_viter /* * incx */; } else if ( lda == 1 ) { const dim_t n_iter_unroll = 3; const dim_t n_reg_per_row = 2; // fuse_fac / n_elem_per_reg; // Use the unrolling factor and the number of elements per register // to compute the number of vectorized and leftover iterations. dim_t m_viter = ( m ) / ( n_reg_per_row * n_iter_unroll ); // Initialize pointers for x and A. double* restrict x0 = x; double* restrict a0 = a; // Initialize rho vector accumulators to zero. v4df_t rho0v; rho0v.v = _mm256_setzero_pd(); v4df_t rho1v; rho1v.v = _mm256_setzero_pd(); v4df_t rho2v; rho2v.v = _mm256_setzero_pd(); v4df_t rho3v; rho3v.v = _mm256_setzero_pd(); v4df_t rho4v; rho4v.v = _mm256_setzero_pd(); v4df_t rho5v; rho5v.v = _mm256_setzero_pd(); v4df_t x0v, x1v, x2v; v4df_t a0v, a1v, a2v, a3v, a4v, a5v; for ( dim_t i = 0; i < m_viter; ++i ) { // Load the input values. a0v.v = _mm256_loadu_pd( a0 + 0*inca + 0*n_elem_per_reg ); a1v.v = _mm256_loadu_pd( a0 + 0*inca + 1*n_elem_per_reg ); a2v.v = _mm256_loadu_pd( a0 + 1*inca + 0*n_elem_per_reg ); a3v.v = _mm256_loadu_pd( a0 + 1*inca + 1*n_elem_per_reg ); a4v.v = _mm256_loadu_pd( a0 + 2*inca + 0*n_elem_per_reg ); a5v.v = _mm256_loadu_pd( a0 + 2*inca + 1*n_elem_per_reg ); x0v.v = _mm256_broadcast_sd( x0 + 0*incx ); x1v.v = _mm256_broadcast_sd( x0 + 1*incx ); x2v.v = _mm256_broadcast_sd( x0 + 2*incx ); // perform : rho?v += a?v * x?v; rho0v.v = _mm256_fmadd_pd( a0v.v, x0v.v, rho0v.v ); rho1v.v = _mm256_fmadd_pd( a1v.v, x0v.v, rho1v.v ); rho2v.v = _mm256_fmadd_pd( a2v.v, x1v.v, rho2v.v ); rho3v.v = _mm256_fmadd_pd( a3v.v, x1v.v, rho3v.v ); rho4v.v = _mm256_fmadd_pd( a4v.v, x2v.v, rho4v.v ); rho5v.v = _mm256_fmadd_pd( a5v.v, x2v.v, rho5v.v ); x0 += incx * n_iter_unroll; a0 += inca * n_iter_unroll; } // Combine the 8 accumulators into one vector register. rho0v.v = _mm256_add_pd( rho0v.v, rho2v.v ); rho0v.v = _mm256_add_pd( rho0v.v, rho4v.v ); rho1v.v = _mm256_add_pd( rho1v.v, rho3v.v ); rho1v.v = _mm256_add_pd( rho1v.v, rho5v.v ); // Write vector components to scalar values. rho0 = rho0v.d[0]; rho1 = rho0v.d[1]; rho2 = rho0v.d[2]; rho3 = rho0v.d[3]; rho4 = rho1v.d[0]; rho5 = rho1v.d[1]; rho6 = rho1v.d[2]; rho7 = rho1v.d[3]; // Adjust for scalar subproblem. m -= n_iter_unroll * m_viter; a += n_iter_unroll * m_viter * inca; x += n_iter_unroll * m_viter * incx; } else { // No vectorization possible; use scalar iterations for the entire // problem. } // Scalar edge case. { // Initialize pointers for x and the b_n columns of A (rows of A^T). double* restrict x0 = x; double* restrict a0 = a + 0*lda; double* restrict a1 = a + 1*lda; double* restrict a2 = a + 2*lda; double* restrict a3 = a + 3*lda; double* restrict a4 = a + 4*lda; double* restrict a5 = a + 5*lda; double* restrict a6 = a + 6*lda; double* restrict a7 = a + 7*lda; // If there are leftover iterations, perform them with scalar code. for ( dim_t i = 0; i < m ; ++i ) { const double x0c = *x0; const double a0c = *a0; const double a1c = *a1; const double a2c = *a2; const double a3c = *a3; const double a4c = *a4; const double a5c = *a5; const double a6c = *a6; const double a7c = *a7; rho0 += a0c * x0c; rho1 += a1c * x0c; rho2 += a2c * x0c; rho3 += a3c * x0c; rho4 += a4c * x0c; rho5 += a5c * x0c; rho6 += a6c * x0c; rho7 += a7c * x0c; x0 += incx; a0 += inca; a1 += inca; a2 += inca; a3 += inca; a4 += inca; a5 += inca; a6 += inca; a7 += inca; } } // Now prepare the final rho values to output/accumulate back into // the y vector. v4df_t rho0v, rho1v, y0v, y1v; // Insert the scalar rho values into a single vector. rho0v.d[0] = rho0; rho0v.d[1] = rho1; rho0v.d[2] = rho2; rho0v.d[3] = rho3; rho1v.d[0] = rho4; rho1v.d[1] = rho5; rho1v.d[2] = rho6; rho1v.d[3] = rho7; // Broadcast the alpha scalar. v4df_t alphav; alphav.v = _mm256_broadcast_sd( alpha ); // We know at this point that alpha is nonzero; however, beta may still // be zero. If beta is indeed zero, we must overwrite y rather than scale // by beta (in case y contains NaN or Inf). if ( PASTEMAC(d,eq0)( *beta ) ) { // Apply alpha to the accumulated dot product in rho: // y := alpha * rho y0v.v = _mm256_mul_pd( alphav.v, rho0v.v ); y1v.v = _mm256_mul_pd( alphav.v, rho1v.v ); } else { // Broadcast the beta scalar. v4df_t betav; betav.v = _mm256_broadcast_sd( beta ); // Load y. if ( incy == 1 ) { y0v.v = _mm256_loadu_pd( y + 0*n_elem_per_reg ); y1v.v = _mm256_loadu_pd( y + 1*n_elem_per_reg ); } else { y0v.d[0] = *(y + 0*incy); y0v.d[1] = *(y + 1*incy); y0v.d[2] = *(y + 2*incy); y0v.d[3] = *(y + 3*incy); y1v.d[0] = *(y + 4*incy); y1v.d[1] = *(y + 5*incy); y1v.d[2] = *(y + 6*incy); y1v.d[3] = *(y + 7*incy); } // Apply beta to y and alpha to the accumulated dot product in rho: // y := beta * y + alpha * rho y0v.v = _mm256_mul_pd( betav.v, y0v.v ); y1v.v = _mm256_mul_pd( betav.v, y1v.v ); y0v.v = _mm256_fmadd_pd( alphav.v, rho0v.v, y0v.v ); y1v.v = _mm256_fmadd_pd( alphav.v, rho1v.v, y1v.v ); } if ( incy == 1 ) { // Store the output. _mm256_storeu_pd( (y + 0*n_elem_per_reg), y0v.v ); _mm256_storeu_pd( (y + 1*n_elem_per_reg), y1v.v ); } else { *(y + 0*incy) = y0v.d[0]; *(y + 1*incy) = y0v.d[1]; *(y + 2*incy) = y0v.d[2]; *(y + 3*incy) = y0v.d[3]; *(y + 4*incy) = y1v.d[0]; *(y + 5*incy) = y1v.d[1]; *(y + 6*incy) = y1v.d[2]; *(y + 7*incy) = y1v.d[3]; } } blis-0.6.1/kernels/zen/3/000077500000000000000000000000001360743507500150625ustar00rootroot00000000000000blis-0.6.1/kernels/zen/3/bli_gemm_small.c000066400000000000000000004513021360743507500201760ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2017 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "immintrin.h" #include "xmmintrin.h" #include "blis.h" #ifdef BLIS_ENABLE_SMALL_MATRIX #define MR 32 #define D_MR (MR >> 1) #define NR 3 #define BLIS_ENABLE_PREFETCH #define F_SCRATCH_DIM (BLIS_SMALL_MATRIX_THRES * BLIS_SMALL_MATRIX_THRES) static float A_pack[F_SCRATCH_DIM] __attribute__((aligned(64))); #define D_BLIS_SMALL_MATRIX_THRES (BLIS_SMALL_MATRIX_THRES / 2 ) #define D_BLIS_SMALL_M_RECT_MATRIX_THRES (BLIS_SMALL_M_RECT_MATRIX_THRES / 2) #define D_BLIS_SMALL_K_RECT_MATRIX_THRES (BLIS_SMALL_K_RECT_MATRIX_THRES / 2) #define D_SCRATCH_DIM (D_BLIS_SMALL_MATRIX_THRES * D_BLIS_SMALL_MATRIX_THRES) static double D_A_pack[D_SCRATCH_DIM] __attribute__((aligned(64))); #define BLIS_ATBN_M_THRES 40 // Threshold value of M for/below which small matrix code is called. #define AT_MR 4 // The kernel dimension of the A transpose GEMM kernel.(AT_MR * NR). static err_t bli_sgemm_small ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, cntl_t* cntl ); static err_t bli_dgemm_small ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, cntl_t* cntl ); static err_t bli_sgemm_small_atbn ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, cntl_t* cntl ); static err_t bli_dgemm_small_atbn ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, cntl_t* cntl ); /* * The bli_gemm_small function will use the * custom MRxNR kernels, to perform the computation. * The custom kernels are used if the [M * N] < 240 * 240 */ err_t bli_gemm_small ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, cntl_t* cntl ) { #ifdef BLIS_ENABLE_MULTITHREADING return BLIS_NOT_YET_IMPLEMENTED; #endif // If alpha is zero, scale by beta and return. if (bli_obj_equals(alpha, &BLIS_ZERO)) { return BLIS_NOT_YET_IMPLEMENTED; } // if row major format return. if ((bli_obj_row_stride( a ) != 1) || (bli_obj_row_stride( b ) != 1) || (bli_obj_row_stride( c ) != 1)) { return BLIS_INVALID_ROW_STRIDE; } num_t dt = ((*c).info & (0x7 << 0)); if (bli_obj_has_trans( a )) { if (bli_obj_has_notrans( b )) { if (dt == BLIS_FLOAT) { return bli_sgemm_small_atbn(alpha, a, b, beta, c, cntx, cntl); } else if (dt == BLIS_DOUBLE) { return bli_dgemm_small_atbn(alpha, a, b, beta, c, cntx, cntl); } } return BLIS_NOT_YET_IMPLEMENTED; } if (dt == BLIS_DOUBLE) { return bli_dgemm_small(alpha, a, b, beta, c, cntx, cntl); } if (dt == BLIS_FLOAT) { return bli_sgemm_small(alpha, a, b, beta, c, cntx, cntl); } return BLIS_NOT_YET_IMPLEMENTED; }; static err_t bli_sgemm_small ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, cntl_t* cntl ) { gint_t M = bli_obj_length( c ); // number of rows of Matrix C gint_t N = bli_obj_width( c ); // number of columns of Matrix C gint_t K = bli_obj_width( a ); // number of columns of OP(A), will be updated if OP(A) is Transpose(A) . gint_t L = M * N; // printf("alpha_cast = %f beta_cast = %f [ Trans = %d %d], [stride = %d %d %d] [m,n,k = %d %d %d]\n",*alpha_cast,*beta_cast, bli_obj_has_trans( a ), bli_obj_has_trans( b ), lda, ldb,ldc, M,N,K); if ((((L) < (BLIS_SMALL_MATRIX_THRES * BLIS_SMALL_MATRIX_THRES)) || ((M < BLIS_SMALL_M_RECT_MATRIX_THRES) && (K < BLIS_SMALL_K_RECT_MATRIX_THRES))) && ((L!=0) && (K!=0))) { guint_t lda = bli_obj_col_stride( a ); // column stride of matrix OP(A), where OP(A) is Transpose(A) if transA enabled. guint_t ldb = bli_obj_col_stride( b ); // column stride of matrix OP(B), where OP(B) is Transpose(B) if transB enabled. guint_t ldc = bli_obj_col_stride( c ); // column stride of matrix C guint_t row_idx, col_idx, k; float *A = a->buffer; // pointer to elements of Matrix A float *B = b->buffer; // pointer to elements of Matrix B float *C = c->buffer; // pointer to elements of Matrix C float *tA = A, *tB = B, *tC = C;//, *tA_pack; float *tA_packed; // temprorary pointer to hold packed A memory pointer guint_t row_idx_packed; //packed A memory row index guint_t lda_packed; //lda of packed A guint_t col_idx_start; //starting index after A matrix is packed. dim_t tb_inc_row = 1; // row stride of matrix B dim_t tb_inc_col = ldb; // column stride of matrix B __m256 ymm4, ymm5, ymm6, ymm7; __m256 ymm8, ymm9, ymm10, ymm11; __m256 ymm12, ymm13, ymm14, ymm15; __m256 ymm0, ymm1, ymm2, ymm3; gint_t n_remainder; // If the N is non multiple of 3.(N%3) gint_t m_remainder; // If the M is non multiple of 32.(M%32) float *alpha_cast, *beta_cast; // alpha, beta multiples alpha_cast = (alpha->buffer); beta_cast = (beta->buffer); gint_t required_packing_A = 1; // when N is equal to 1 call GEMV instead of GEMM if (N == 1) { bli_gemv ( alpha, a, b, beta, c ); return BLIS_SUCCESS; } //update the pointer math if matrix B needs to be transposed. if (bli_obj_has_trans( b )) { tb_inc_col = 1; //switch row and column strides tb_inc_row = ldb; } if ((N <= 3) || ((MR * K) > F_SCRATCH_DIM)) { required_packing_A = 0; } /* * The computation loop runs for MRxN columns of C matrix, thus * accessing the MRxK A matrix data and KxNR B matrix data. * The computation is organized as inner loops of dimension MRxNR. */ // Process MR rows of C matrix at a time. for (row_idx = 0; (row_idx + (MR - 1)) < M; row_idx += MR) { col_idx_start = 0; tA_packed = A; row_idx_packed = row_idx; lda_packed = lda; // This is the part of the pack and compute optimization. // During the first column iteration, we store the accessed A matrix into // contiguous static memory. This helps to keep te A matrix in Cache and // aviods the TLB misses. if (required_packing_A) { col_idx = 0; //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; tA_packed = A_pack; #ifdef BLIS_ENABLE_PREFETCH _mm_prefetch((char*)(tC + 0), _MM_HINT_T0); _mm_prefetch((char*)(tC + 16), _MM_HINT_T0); _mm_prefetch((char*)(tC + ldc), _MM_HINT_T0); _mm_prefetch((char*)(tC + ldc + 16), _MM_HINT_T0); _mm_prefetch((char*)(tC + 2 * ldc), _MM_HINT_T0); _mm_prefetch((char*)(tC + 2 * ldc + 16), _MM_HINT_T0); #endif // clear scratch registers. ymm4 = _mm256_setzero_ps(); ymm5 = _mm256_setzero_ps(); ymm6 = _mm256_setzero_ps(); ymm7 = _mm256_setzero_ps(); ymm8 = _mm256_setzero_ps(); ymm9 = _mm256_setzero_ps(); ymm10 = _mm256_setzero_ps(); ymm11 = _mm256_setzero_ps(); ymm12 = _mm256_setzero_ps(); ymm13 = _mm256_setzero_ps(); ymm14 = _mm256_setzero_ps(); ymm15 = _mm256_setzero_ps(); for (k = 0; k < K; ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. // This loop is processing MR x K ymm0 = _mm256_broadcast_ss(tB + tb_inc_col * 0); ymm1 = _mm256_broadcast_ss(tB + tb_inc_col * 1); ymm2 = _mm256_broadcast_ss(tB + tb_inc_col * 2); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_ps(tA); _mm256_storeu_ps(tA_packed, ymm3); // the packing of matrix A // ymm4 += ymm0 * ymm3; ymm4 = _mm256_fmadd_ps(ymm0, ymm3, ymm4); // ymm8 += ymm1 * ymm3; ymm8 = _mm256_fmadd_ps(ymm1, ymm3, ymm8); // ymm12 += ymm2 * ymm3; ymm12 = _mm256_fmadd_ps(ymm2, ymm3, ymm12); ymm3 = _mm256_loadu_ps(tA + 8); _mm256_storeu_ps(tA_packed + 8, ymm3); // the packing of matrix A // ymm5 += ymm0 * ymm3; ymm5 = _mm256_fmadd_ps(ymm0, ymm3, ymm5); // ymm9 += ymm1 * ymm3; ymm9 = _mm256_fmadd_ps(ymm1, ymm3, ymm9); // ymm13 += ymm2 * ymm3; ymm13 = _mm256_fmadd_ps(ymm2, ymm3, ymm13); ymm3 = _mm256_loadu_ps(tA + 16); _mm256_storeu_ps(tA_packed + 16, ymm3); // the packing of matrix A // ymm6 += ymm0 * ymm3; ymm6 = _mm256_fmadd_ps(ymm0, ymm3, ymm6); // ymm10 += ymm1 * ymm3; ymm10 = _mm256_fmadd_ps(ymm1, ymm3, ymm10); // ymm14 += ymm2 * ymm3; ymm14 = _mm256_fmadd_ps(ymm2, ymm3, ymm14); ymm3 = _mm256_loadu_ps(tA + 24); _mm256_storeu_ps(tA_packed + 24, ymm3); // the packing of matrix A // ymm7 += ymm0 * ymm3; ymm7 = _mm256_fmadd_ps(ymm0, ymm3, ymm7); // ymm11 += ymm1 * ymm3; ymm11 = _mm256_fmadd_ps(ymm1, ymm3, ymm11); // ymm15 += ymm2 * ymm3; ymm15 = _mm256_fmadd_ps(ymm2, ymm3, ymm15); tA += lda; tA_packed += MR; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_ss(alpha_cast); ymm1 = _mm256_broadcast_ss(beta_cast); //multiply A*B by alpha. ymm4 = _mm256_mul_ps(ymm4, ymm0); ymm5 = _mm256_mul_ps(ymm5, ymm0); ymm6 = _mm256_mul_ps(ymm6, ymm0); ymm7 = _mm256_mul_ps(ymm7, ymm0); ymm8 = _mm256_mul_ps(ymm8, ymm0); ymm9 = _mm256_mul_ps(ymm9, ymm0); ymm10 = _mm256_mul_ps(ymm10, ymm0); ymm11 = _mm256_mul_ps(ymm11, ymm0); ymm12 = _mm256_mul_ps(ymm12, ymm0); ymm13 = _mm256_mul_ps(ymm13, ymm0); ymm14 = _mm256_mul_ps(ymm14, ymm0); ymm15 = _mm256_mul_ps(ymm15, ymm0); // multiply C by beta and accumulate col 1. ymm2 = _mm256_loadu_ps(tC); ymm4 = _mm256_fmadd_ps(ymm2, ymm1, ymm4); ymm2 = _mm256_loadu_ps(tC + 8); ymm5 = _mm256_fmadd_ps(ymm2, ymm1, ymm5); ymm2 = _mm256_loadu_ps(tC + 16); ymm6 = _mm256_fmadd_ps(ymm2, ymm1, ymm6); ymm2 = _mm256_loadu_ps(tC + 24); ymm7 = _mm256_fmadd_ps(ymm2, ymm1, ymm7); _mm256_storeu_ps(tC, ymm4); _mm256_storeu_ps(tC + 8, ymm5); _mm256_storeu_ps(tC + 16, ymm6); _mm256_storeu_ps(tC + 24, ymm7); // multiply C by beta and accumulate, col 2. tC += ldc; ymm2 = _mm256_loadu_ps(tC); ymm8 = _mm256_fmadd_ps(ymm2, ymm1, ymm8); ymm2 = _mm256_loadu_ps(tC + 8); ymm9 = _mm256_fmadd_ps(ymm2, ymm1, ymm9); ymm2 = _mm256_loadu_ps(tC + 16); ymm10 = _mm256_fmadd_ps(ymm2, ymm1, ymm10); ymm2 = _mm256_loadu_ps(tC + 24); ymm11 = _mm256_fmadd_ps(ymm2, ymm1, ymm11); _mm256_storeu_ps(tC, ymm8); _mm256_storeu_ps(tC + 8, ymm9); _mm256_storeu_ps(tC + 16, ymm10); _mm256_storeu_ps(tC + 24, ymm11); // multiply C by beta and accumulate, col 3. tC += ldc; ymm2 = _mm256_loadu_ps(tC); ymm12 = _mm256_fmadd_ps(ymm2, ymm1, ymm12); ymm2 = _mm256_loadu_ps(tC + 8); ymm13 = _mm256_fmadd_ps(ymm2, ymm1, ymm13); ymm2 = _mm256_loadu_ps(tC + 16); ymm14 = _mm256_fmadd_ps(ymm2, ymm1, ymm14); ymm2 = _mm256_loadu_ps(tC + 24); ymm15 = _mm256_fmadd_ps(ymm2, ymm1, ymm15); _mm256_storeu_ps(tC, ymm12); _mm256_storeu_ps(tC + 8, ymm13); _mm256_storeu_ps(tC + 16, ymm14); _mm256_storeu_ps(tC + 24, ymm15); // modify the pointer arithematic to use packed A matrix. col_idx_start = NR; tA_packed = A_pack; row_idx_packed = 0; lda_packed = MR; } // Process NR columns of C matrix at a time. for (col_idx = col_idx_start; (col_idx + (NR - 1)) < N; col_idx += NR) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = tA_packed + row_idx_packed; #ifdef BLIS_ENABLE_PREFETCH _mm_prefetch((char*)(tC + 0), _MM_HINT_T0); _mm_prefetch((char*)(tC + 16), _MM_HINT_T0); _mm_prefetch((char*)(tC + ldc), _MM_HINT_T0); _mm_prefetch((char*)(tC + ldc + 16), _MM_HINT_T0); _mm_prefetch((char*)(tC + 2 * ldc), _MM_HINT_T0); _mm_prefetch((char*)(tC + 2 * ldc + 16), _MM_HINT_T0); #endif // clear scratch registers. ymm4 = _mm256_setzero_ps(); ymm5 = _mm256_setzero_ps(); ymm6 = _mm256_setzero_ps(); ymm7 = _mm256_setzero_ps(); ymm8 = _mm256_setzero_ps(); ymm9 = _mm256_setzero_ps(); ymm10 = _mm256_setzero_ps(); ymm11 = _mm256_setzero_ps(); ymm12 = _mm256_setzero_ps(); ymm13 = _mm256_setzero_ps(); ymm14 = _mm256_setzero_ps(); ymm15 = _mm256_setzero_ps(); for (k = 0; k < K; ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. // This loop is processing MR x K ymm0 = _mm256_broadcast_ss(tB + tb_inc_col * 0); ymm1 = _mm256_broadcast_ss(tB + tb_inc_col * 1); ymm2 = _mm256_broadcast_ss(tB + tb_inc_col * 2); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_ps(tA); // ymm4 += ymm0 * ymm3; ymm4 = _mm256_fmadd_ps(ymm0, ymm3, ymm4); // ymm8 += ymm1 * ymm3; ymm8 = _mm256_fmadd_ps(ymm1, ymm3, ymm8); // ymm12 += ymm2 * ymm3; ymm12 = _mm256_fmadd_ps(ymm2, ymm3, ymm12); ymm3 = _mm256_loadu_ps(tA + 8); // ymm5 += ymm0 * ymm3; ymm5 = _mm256_fmadd_ps(ymm0, ymm3, ymm5); // ymm9 += ymm1 * ymm3; ymm9 = _mm256_fmadd_ps(ymm1, ymm3, ymm9); // ymm13 += ymm2 * ymm3; ymm13 = _mm256_fmadd_ps(ymm2, ymm3, ymm13); ymm3 = _mm256_loadu_ps(tA + 16); // ymm6 += ymm0 * ymm3; ymm6 = _mm256_fmadd_ps(ymm0, ymm3, ymm6); // ymm10 += ymm1 * ymm3; ymm10 = _mm256_fmadd_ps(ymm1, ymm3, ymm10); // ymm14 += ymm2 * ymm3; ymm14 = _mm256_fmadd_ps(ymm2, ymm3, ymm14); ymm3 = _mm256_loadu_ps(tA + 24); // ymm7 += ymm0 * ymm3; ymm7 = _mm256_fmadd_ps(ymm0, ymm3, ymm7); // ymm11 += ymm1 * ymm3; ymm11 = _mm256_fmadd_ps(ymm1, ymm3, ymm11); // ymm15 += ymm2 * ymm3; ymm15 = _mm256_fmadd_ps(ymm2, ymm3, ymm15); tA += lda_packed; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_ss(alpha_cast); ymm1 = _mm256_broadcast_ss(beta_cast); //multiply A*B by alpha. ymm4 = _mm256_mul_ps(ymm4, ymm0); ymm5 = _mm256_mul_ps(ymm5, ymm0); ymm6 = _mm256_mul_ps(ymm6, ymm0); ymm7 = _mm256_mul_ps(ymm7, ymm0); ymm8 = _mm256_mul_ps(ymm8, ymm0); ymm9 = _mm256_mul_ps(ymm9, ymm0); ymm10 = _mm256_mul_ps(ymm10, ymm0); ymm11 = _mm256_mul_ps(ymm11, ymm0); ymm12 = _mm256_mul_ps(ymm12, ymm0); ymm13 = _mm256_mul_ps(ymm13, ymm0); ymm14 = _mm256_mul_ps(ymm14, ymm0); ymm15 = _mm256_mul_ps(ymm15, ymm0); // multiply C by beta and accumulate col 1. ymm2 = _mm256_loadu_ps(tC); ymm4 = _mm256_fmadd_ps(ymm2, ymm1, ymm4); ymm2 = _mm256_loadu_ps(tC + 8); ymm5 = _mm256_fmadd_ps(ymm2, ymm1, ymm5); ymm2 = _mm256_loadu_ps(tC + 16); ymm6 = _mm256_fmadd_ps(ymm2, ymm1, ymm6); ymm2 = _mm256_loadu_ps(tC + 24); ymm7 = _mm256_fmadd_ps(ymm2, ymm1, ymm7); _mm256_storeu_ps(tC, ymm4); _mm256_storeu_ps(tC + 8, ymm5); _mm256_storeu_ps(tC + 16, ymm6); _mm256_storeu_ps(tC + 24, ymm7); // multiply C by beta and accumulate, col 2. tC += ldc; ymm2 = _mm256_loadu_ps(tC); ymm8 = _mm256_fmadd_ps(ymm2, ymm1, ymm8); ymm2 = _mm256_loadu_ps(tC + 8); ymm9 = _mm256_fmadd_ps(ymm2, ymm1, ymm9); ymm2 = _mm256_loadu_ps(tC + 16); ymm10 = _mm256_fmadd_ps(ymm2, ymm1, ymm10); ymm2 = _mm256_loadu_ps(tC + 24); ymm11 = _mm256_fmadd_ps(ymm2, ymm1, ymm11); _mm256_storeu_ps(tC, ymm8); _mm256_storeu_ps(tC + 8, ymm9); _mm256_storeu_ps(tC + 16, ymm10); _mm256_storeu_ps(tC + 24, ymm11); // multiply C by beta and accumulate, col 3. tC += ldc; ymm2 = _mm256_loadu_ps(tC); ymm12 = _mm256_fmadd_ps(ymm2, ymm1, ymm12); ymm2 = _mm256_loadu_ps(tC + 8); ymm13 = _mm256_fmadd_ps(ymm2, ymm1, ymm13); ymm2 = _mm256_loadu_ps(tC + 16); ymm14 = _mm256_fmadd_ps(ymm2, ymm1, ymm14); ymm2 = _mm256_loadu_ps(tC + 24); ymm15 = _mm256_fmadd_ps(ymm2, ymm1, ymm15); _mm256_storeu_ps(tC, ymm12); _mm256_storeu_ps(tC + 8, ymm13); _mm256_storeu_ps(tC + 16, ymm14); _mm256_storeu_ps(tC + 24, ymm15); } n_remainder = N - col_idx; // if the N is not multiple of 3. // handling edge case. if (n_remainder == 2) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; // clear scratch registers. ymm8 = _mm256_setzero_ps(); ymm9 = _mm256_setzero_ps(); ymm10 = _mm256_setzero_ps(); ymm11 = _mm256_setzero_ps(); ymm12 = _mm256_setzero_ps(); ymm13 = _mm256_setzero_ps(); ymm14 = _mm256_setzero_ps(); ymm15 = _mm256_setzero_ps(); for (k = 0; k < K; ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_ss(tB + tb_inc_col * 0); ymm1 = _mm256_broadcast_ss(tB + tb_inc_col * 1); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_ps(tA); ymm8 = _mm256_fmadd_ps(ymm0, ymm3, ymm8); ymm12 = _mm256_fmadd_ps(ymm1, ymm3, ymm12); ymm3 = _mm256_loadu_ps(tA + 8); ymm9 = _mm256_fmadd_ps(ymm0, ymm3, ymm9); ymm13 = _mm256_fmadd_ps(ymm1, ymm3, ymm13); ymm3 = _mm256_loadu_ps(tA + 16); ymm10 = _mm256_fmadd_ps(ymm0, ymm3, ymm10); ymm14 = _mm256_fmadd_ps(ymm1, ymm3, ymm14); ymm3 = _mm256_loadu_ps(tA + 24); ymm11 = _mm256_fmadd_ps(ymm0, ymm3, ymm11); ymm15 = _mm256_fmadd_ps(ymm1, ymm3, ymm15); tA += lda; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_ss(alpha_cast); ymm1 = _mm256_broadcast_ss(beta_cast); //multiply A*B by alpha. ymm8 = _mm256_mul_ps(ymm8, ymm0); ymm9 = _mm256_mul_ps(ymm9, ymm0); ymm10 = _mm256_mul_ps(ymm10, ymm0); ymm11 = _mm256_mul_ps(ymm11, ymm0); ymm12 = _mm256_mul_ps(ymm12, ymm0); ymm13 = _mm256_mul_ps(ymm13, ymm0); ymm14 = _mm256_mul_ps(ymm14, ymm0); ymm15 = _mm256_mul_ps(ymm15, ymm0); // multiply C by beta and accumulate, col 1. ymm2 = _mm256_loadu_ps(tC + 0); ymm8 = _mm256_fmadd_ps(ymm2, ymm1, ymm8); ymm2 = _mm256_loadu_ps(tC + 8); ymm9 = _mm256_fmadd_ps(ymm2, ymm1, ymm9); ymm2 = _mm256_loadu_ps(tC + 16); ymm10 = _mm256_fmadd_ps(ymm2, ymm1, ymm10); ymm2 = _mm256_loadu_ps(tC + 24); ymm11 = _mm256_fmadd_ps(ymm2, ymm1, ymm11); _mm256_storeu_ps(tC + 0, ymm8); _mm256_storeu_ps(tC + 8, ymm9); _mm256_storeu_ps(tC + 16, ymm10); _mm256_storeu_ps(tC + 24, ymm11); // multiply C by beta and accumulate, col 2. tC += ldc; ymm2 = _mm256_loadu_ps(tC); ymm12 = _mm256_fmadd_ps(ymm2, ymm1, ymm12); ymm2 = _mm256_loadu_ps(tC + 8); ymm13 = _mm256_fmadd_ps(ymm2, ymm1, ymm13); ymm2 = _mm256_loadu_ps(tC + 16); ymm14 = _mm256_fmadd_ps(ymm2, ymm1, ymm14); ymm2 = _mm256_loadu_ps(tC + 24); ymm15 = _mm256_fmadd_ps(ymm2, ymm1, ymm15); _mm256_storeu_ps(tC, ymm12); _mm256_storeu_ps(tC + 8, ymm13); _mm256_storeu_ps(tC + 16, ymm14); _mm256_storeu_ps(tC + 24, ymm15); col_idx += 2; } // if the N is not multiple of 3. // handling edge case. if (n_remainder == 1) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; // clear scratch registers. ymm12 = _mm256_setzero_ps(); ymm13 = _mm256_setzero_ps(); ymm14 = _mm256_setzero_ps(); ymm15 = _mm256_setzero_ps(); for (k = 0; k < K; ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_ss(tB + tb_inc_col * 0); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_ps(tA); ymm12 = _mm256_fmadd_ps(ymm0, ymm3, ymm12); ymm3 = _mm256_loadu_ps(tA + 8); ymm13 = _mm256_fmadd_ps(ymm0, ymm3, ymm13); ymm3 = _mm256_loadu_ps(tA + 16); ymm14 = _mm256_fmadd_ps(ymm0, ymm3, ymm14); ymm3 = _mm256_loadu_ps(tA + 24); ymm15 = _mm256_fmadd_ps(ymm0, ymm3, ymm15); tA += lda; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_ss(alpha_cast); ymm1 = _mm256_broadcast_ss(beta_cast); //multiply A*B by alpha. ymm12 = _mm256_mul_ps(ymm12, ymm0); ymm13 = _mm256_mul_ps(ymm13, ymm0); ymm14 = _mm256_mul_ps(ymm14, ymm0); ymm15 = _mm256_mul_ps(ymm15, ymm0); // multiply C by beta and accumulate. ymm2 = _mm256_loadu_ps(tC + 0); ymm12 = _mm256_fmadd_ps(ymm2, ymm1, ymm12); ymm2 = _mm256_loadu_ps(tC + 8); ymm13 = _mm256_fmadd_ps(ymm2, ymm1, ymm13); ymm2 = _mm256_loadu_ps(tC + 16); ymm14 = _mm256_fmadd_ps(ymm2, ymm1, ymm14); ymm2 = _mm256_loadu_ps(tC + 24); ymm15 = _mm256_fmadd_ps(ymm2, ymm1, ymm15); _mm256_storeu_ps(tC + 0, ymm12); _mm256_storeu_ps(tC + 8, ymm13); _mm256_storeu_ps(tC + 16, ymm14); _mm256_storeu_ps(tC + 24, ymm15); } } m_remainder = M - row_idx; if (m_remainder >= 24) { m_remainder -= 24; for (col_idx = 0; (col_idx + 2) < N; col_idx += 3) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; // clear scratch registers. ymm4 = _mm256_setzero_ps(); ymm5 = _mm256_setzero_ps(); ymm6 = _mm256_setzero_ps(); ymm8 = _mm256_setzero_ps(); ymm9 = _mm256_setzero_ps(); ymm10 = _mm256_setzero_ps(); ymm12 = _mm256_setzero_ps(); ymm13 = _mm256_setzero_ps(); ymm14 = _mm256_setzero_ps(); for (k = 0; k < K; ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_ss(tB + tb_inc_col * 0); ymm1 = _mm256_broadcast_ss(tB + tb_inc_col * 1); ymm2 = _mm256_broadcast_ss(tB + tb_inc_col * 2); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_ps(tA); // ymm4 += ymm0 * ymm3; ymm4 = _mm256_fmadd_ps(ymm0, ymm3, ymm4); // ymm8 += ymm1 * ymm3; ymm8 = _mm256_fmadd_ps(ymm1, ymm3, ymm8); // ymm12 += ymm2 * ymm3; ymm12 = _mm256_fmadd_ps(ymm2, ymm3, ymm12); ymm3 = _mm256_loadu_ps(tA + 8); // ymm5 += ymm0 * ymm3; ymm5 = _mm256_fmadd_ps(ymm0, ymm3, ymm5); // ymm9 += ymm1 * ymm3; ymm9 = _mm256_fmadd_ps(ymm1, ymm3, ymm9); // ymm13 += ymm2 * ymm3; ymm13 = _mm256_fmadd_ps(ymm2, ymm3, ymm13); ymm3 = _mm256_loadu_ps(tA + 16); // ymm6 += ymm0 * ymm3; ymm6 = _mm256_fmadd_ps(ymm0, ymm3, ymm6); // ymm10 += ymm1 * ymm3; ymm10 = _mm256_fmadd_ps(ymm1, ymm3, ymm10); // ymm14 += ymm2 * ymm3; ymm14 = _mm256_fmadd_ps(ymm2, ymm3, ymm14); tA += lda; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_ss(alpha_cast); ymm1 = _mm256_broadcast_ss(beta_cast); //multiply A*B by alpha. ymm4 = _mm256_mul_ps(ymm4, ymm0); ymm5 = _mm256_mul_ps(ymm5, ymm0); ymm6 = _mm256_mul_ps(ymm6, ymm0); ymm8 = _mm256_mul_ps(ymm8, ymm0); ymm9 = _mm256_mul_ps(ymm9, ymm0); ymm10 = _mm256_mul_ps(ymm10, ymm0); ymm12 = _mm256_mul_ps(ymm12, ymm0); ymm13 = _mm256_mul_ps(ymm13, ymm0); ymm14 = _mm256_mul_ps(ymm14, ymm0); // multiply C by beta and accumulate. ymm2 = _mm256_loadu_ps(tC); ymm4 = _mm256_fmadd_ps(ymm2, ymm1, ymm4); ymm2 = _mm256_loadu_ps(tC + 8); ymm5 = _mm256_fmadd_ps(ymm2, ymm1, ymm5); ymm2 = _mm256_loadu_ps(tC + 16); ymm6 = _mm256_fmadd_ps(ymm2, ymm1, ymm6); _mm256_storeu_ps(tC, ymm4); _mm256_storeu_ps(tC + 8, ymm5); _mm256_storeu_ps(tC + 16, ymm6); // multiply C by beta and accumulate. tC += ldc; ymm2 = _mm256_loadu_ps(tC); ymm8 = _mm256_fmadd_ps(ymm2, ymm1, ymm8); ymm2 = _mm256_loadu_ps(tC + 8); ymm9 = _mm256_fmadd_ps(ymm2, ymm1, ymm9); ymm2 = _mm256_loadu_ps(tC + 16); ymm10 = _mm256_fmadd_ps(ymm2, ymm1, ymm10); _mm256_storeu_ps(tC, ymm8); _mm256_storeu_ps(tC + 8, ymm9); _mm256_storeu_ps(tC + 16, ymm10); // multiply C by beta and accumulate. tC += ldc; ymm2 = _mm256_loadu_ps(tC); ymm12 = _mm256_fmadd_ps(ymm2, ymm1, ymm12); ymm2 = _mm256_loadu_ps(tC + 8); ymm13 = _mm256_fmadd_ps(ymm2, ymm1, ymm13); ymm2 = _mm256_loadu_ps(tC + 16); ymm14 = _mm256_fmadd_ps(ymm2, ymm1, ymm14); _mm256_storeu_ps(tC, ymm12); _mm256_storeu_ps(tC + 8, ymm13); _mm256_storeu_ps(tC + 16, ymm14); } n_remainder = N - col_idx; // if the N is not multiple of 3. // handling edge case. if (n_remainder == 2) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; // clear scratch registers. ymm8 = _mm256_setzero_ps(); ymm9 = _mm256_setzero_ps(); ymm10 = _mm256_setzero_ps(); ymm12 = _mm256_setzero_ps(); ymm13 = _mm256_setzero_ps(); ymm14 = _mm256_setzero_ps(); for (k = 0; k < K; ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_ss(tB + tb_inc_col * 0); ymm1 = _mm256_broadcast_ss(tB + tb_inc_col * 1); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_ps(tA); ymm8 = _mm256_fmadd_ps(ymm0, ymm3, ymm8); ymm12 = _mm256_fmadd_ps(ymm1, ymm3, ymm12); ymm3 = _mm256_loadu_ps(tA + 8); ymm9 = _mm256_fmadd_ps(ymm0, ymm3, ymm9); ymm13 = _mm256_fmadd_ps(ymm1, ymm3, ymm13); ymm3 = _mm256_loadu_ps(tA + 16); ymm10 = _mm256_fmadd_ps(ymm0, ymm3, ymm10); ymm14 = _mm256_fmadd_ps(ymm1, ymm3, ymm14); tA += lda; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_ss(alpha_cast); ymm1 = _mm256_broadcast_ss(beta_cast); //multiply A*B by alpha. ymm8 = _mm256_mul_ps(ymm8, ymm0); ymm9 = _mm256_mul_ps(ymm9, ymm0); ymm10 = _mm256_mul_ps(ymm10, ymm0); ymm12 = _mm256_mul_ps(ymm12, ymm0); ymm13 = _mm256_mul_ps(ymm13, ymm0); ymm14 = _mm256_mul_ps(ymm14, ymm0); // multiply C by beta and accumulate. ymm2 = _mm256_loadu_ps(tC + 0); ymm8 = _mm256_fmadd_ps(ymm2, ymm1, ymm8); ymm2 = _mm256_loadu_ps(tC + 8); ymm9 = _mm256_fmadd_ps(ymm2, ymm1, ymm9); ymm2 = _mm256_loadu_ps(tC + 16); ymm10 = _mm256_fmadd_ps(ymm2, ymm1, ymm10); _mm256_storeu_ps(tC + 0, ymm8); _mm256_storeu_ps(tC + 8, ymm9); _mm256_storeu_ps(tC + 16, ymm10); // multiply C by beta and accumulate. tC += ldc; ymm2 = _mm256_loadu_ps(tC); ymm12 = _mm256_fmadd_ps(ymm2, ymm1, ymm12); ymm2 = _mm256_loadu_ps(tC + 8); ymm13 = _mm256_fmadd_ps(ymm2, ymm1, ymm13); ymm2 = _mm256_loadu_ps(tC + 16); ymm14 = _mm256_fmadd_ps(ymm2, ymm1, ymm14); _mm256_storeu_ps(tC, ymm12); _mm256_storeu_ps(tC + 8, ymm13); _mm256_storeu_ps(tC + 16, ymm14); col_idx += 2; } // if the N is not multiple of 3. // handling edge case. if (n_remainder == 1) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; // clear scratch registers. ymm12 = _mm256_setzero_ps(); ymm13 = _mm256_setzero_ps(); ymm14 = _mm256_setzero_ps(); for (k = 0; k < K; ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_ss(tB + tb_inc_col * 0); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_ps(tA); ymm12 = _mm256_fmadd_ps(ymm0, ymm3, ymm12); ymm3 = _mm256_loadu_ps(tA + 8); ymm13 = _mm256_fmadd_ps(ymm0, ymm3, ymm13); ymm3 = _mm256_loadu_ps(tA + 16); ymm14 = _mm256_fmadd_ps(ymm0, ymm3, ymm14); tA += lda; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_ss(alpha_cast); ymm1 = _mm256_broadcast_ss(beta_cast); //multiply A*B by alpha. ymm12 = _mm256_mul_ps(ymm12, ymm0); ymm13 = _mm256_mul_ps(ymm13, ymm0); ymm14 = _mm256_mul_ps(ymm14, ymm0); // multiply C by beta and accumulate. ymm2 = _mm256_loadu_ps(tC + 0); ymm12 = _mm256_fmadd_ps(ymm2, ymm1, ymm12); ymm2 = _mm256_loadu_ps(tC + 8); ymm13 = _mm256_fmadd_ps(ymm2, ymm1, ymm13); ymm2 = _mm256_loadu_ps(tC + 16); ymm14 = _mm256_fmadd_ps(ymm2, ymm1, ymm14); _mm256_storeu_ps(tC + 0, ymm12); _mm256_storeu_ps(tC + 8, ymm13); _mm256_storeu_ps(tC + 16, ymm14); } row_idx += 24; } if (m_remainder >= 16) { m_remainder -= 16; for (col_idx = 0; (col_idx + 2) < N; col_idx += 3) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; // clear scratch registers. ymm4 = _mm256_setzero_ps(); ymm5 = _mm256_setzero_ps(); ymm6 = _mm256_setzero_ps(); ymm7 = _mm256_setzero_ps(); ymm8 = _mm256_setzero_ps(); ymm9 = _mm256_setzero_ps(); for (k = 0; k < K; ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_ss(tB + tb_inc_col * 0); ymm1 = _mm256_broadcast_ss(tB + tb_inc_col * 1); ymm2 = _mm256_broadcast_ss(tB + tb_inc_col * 2); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_ps(tA); ymm4 = _mm256_fmadd_ps(ymm0, ymm3, ymm4); ymm6 = _mm256_fmadd_ps(ymm1, ymm3, ymm6); ymm8 = _mm256_fmadd_ps(ymm2, ymm3, ymm8); ymm3 = _mm256_loadu_ps(tA + 8); ymm5 = _mm256_fmadd_ps(ymm0, ymm3, ymm5); ymm7 = _mm256_fmadd_ps(ymm1, ymm3, ymm7); ymm9 = _mm256_fmadd_ps(ymm2, ymm3, ymm9); tA += lda; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_ss(alpha_cast); ymm1 = _mm256_broadcast_ss(beta_cast); //multiply A*B by alpha. ymm4 = _mm256_mul_ps(ymm4, ymm0); ymm5 = _mm256_mul_ps(ymm5, ymm0); ymm6 = _mm256_mul_ps(ymm6, ymm0); ymm7 = _mm256_mul_ps(ymm7, ymm0); ymm8 = _mm256_mul_ps(ymm8, ymm0); ymm9 = _mm256_mul_ps(ymm9, ymm0); // multiply C by beta and accumulate. ymm2 = _mm256_loadu_ps(tC); ymm4 = _mm256_fmadd_ps(ymm2, ymm1, ymm4); ymm2 = _mm256_loadu_ps(tC + 8); ymm5 = _mm256_fmadd_ps(ymm2, ymm1, ymm5); _mm256_storeu_ps(tC, ymm4); _mm256_storeu_ps(tC + 8, ymm5); // multiply C by beta and accumulate. tC += ldc; ymm2 = _mm256_loadu_ps(tC); ymm6 = _mm256_fmadd_ps(ymm2, ymm1, ymm6); ymm2 = _mm256_loadu_ps(tC + 8); ymm7 = _mm256_fmadd_ps(ymm2, ymm1, ymm7); _mm256_storeu_ps(tC, ymm6); _mm256_storeu_ps(tC + 8, ymm7); // multiply C by beta and accumulate. tC += ldc; ymm2 = _mm256_loadu_ps(tC); ymm8 = _mm256_fmadd_ps(ymm2, ymm1, ymm8); ymm2 = _mm256_loadu_ps(tC + 8); ymm9 = _mm256_fmadd_ps(ymm2, ymm1, ymm9); _mm256_storeu_ps(tC, ymm8); _mm256_storeu_ps(tC + 8, ymm9); } n_remainder = N - col_idx; // if the N is not multiple of 3. // handling edge case. if (n_remainder == 2) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; // clear scratch registers. ymm4 = _mm256_setzero_ps(); ymm5 = _mm256_setzero_ps(); ymm6 = _mm256_setzero_ps(); ymm7 = _mm256_setzero_ps(); for (k = 0; k < K; ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_ss(tB + tb_inc_col * 0); ymm1 = _mm256_broadcast_ss(tB + tb_inc_col * 1); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_ps(tA); ymm4 = _mm256_fmadd_ps(ymm0, ymm3, ymm4); ymm6 = _mm256_fmadd_ps(ymm1, ymm3, ymm6); ymm3 = _mm256_loadu_ps(tA + 8); ymm5 = _mm256_fmadd_ps(ymm0, ymm3, ymm5); ymm7 = _mm256_fmadd_ps(ymm1, ymm3, ymm7); tA += lda; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_ss(alpha_cast); ymm1 = _mm256_broadcast_ss(beta_cast); //multiply A*B by alpha. ymm4 = _mm256_mul_ps(ymm4, ymm0); ymm5 = _mm256_mul_ps(ymm5, ymm0); ymm6 = _mm256_mul_ps(ymm6, ymm0); ymm7 = _mm256_mul_ps(ymm7, ymm0); // multiply C by beta and accumulate. ymm2 = _mm256_loadu_ps(tC); ymm4 = _mm256_fmadd_ps(ymm2, ymm1, ymm4); ymm2 = _mm256_loadu_ps(tC + 8); ymm5 = _mm256_fmadd_ps(ymm2, ymm1, ymm5); _mm256_storeu_ps(tC, ymm4); _mm256_storeu_ps(tC + 8, ymm5); // multiply C by beta and accumulate. tC += ldc; ymm2 = _mm256_loadu_ps(tC); ymm6 = _mm256_fmadd_ps(ymm2, ymm1, ymm6); ymm2 = _mm256_loadu_ps(tC + 8); ymm7 = _mm256_fmadd_ps(ymm2, ymm1, ymm7); _mm256_storeu_ps(tC, ymm6); _mm256_storeu_ps(tC + 8, ymm7); col_idx += 2; } // if the N is not multiple of 3. // handling edge case. if (n_remainder == 1) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; ymm4 = _mm256_setzero_ps(); ymm5 = _mm256_setzero_ps(); for (k = 0; k < K; ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_ss(tB + tb_inc_col * 0); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_ps(tA); ymm4 = _mm256_fmadd_ps(ymm0, ymm3, ymm4); ymm3 = _mm256_loadu_ps(tA + 8); ymm5 = _mm256_fmadd_ps(ymm0, ymm3, ymm5); tA += lda; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_ss(alpha_cast); ymm1 = _mm256_broadcast_ss(beta_cast); ymm4 = _mm256_mul_ps(ymm4, ymm0); ymm5 = _mm256_mul_ps(ymm5, ymm0); // multiply C by beta and accumulate. ymm2 = _mm256_loadu_ps(tC); ymm4 = _mm256_fmadd_ps(ymm2, ymm1, ymm4); ymm2 = _mm256_loadu_ps(tC + 8); ymm5 = _mm256_fmadd_ps(ymm2, ymm1, ymm5); _mm256_storeu_ps(tC, ymm4); _mm256_storeu_ps(tC + 8, ymm5); } row_idx += 16; } if (m_remainder >= 8) { m_remainder -= 8; for (col_idx = 0; (col_idx + 2) < N; col_idx += 3) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; // clear scratch registers. ymm4 = _mm256_setzero_ps(); ymm5 = _mm256_setzero_ps(); ymm6 = _mm256_setzero_ps(); for (k = 0; k < K; ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_ss(tB + tb_inc_col * 0); ymm1 = _mm256_broadcast_ss(tB + tb_inc_col * 1); ymm2 = _mm256_broadcast_ss(tB + tb_inc_col * 2); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_ps(tA); ymm4 = _mm256_fmadd_ps(ymm0, ymm3, ymm4); ymm5 = _mm256_fmadd_ps(ymm1, ymm3, ymm5); ymm6 = _mm256_fmadd_ps(ymm2, ymm3, ymm6); tA += lda; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_ss(alpha_cast); ymm1 = _mm256_broadcast_ss(beta_cast); //multiply A*B by alpha. ymm4 = _mm256_mul_ps(ymm4, ymm0); ymm5 = _mm256_mul_ps(ymm5, ymm0); ymm6 = _mm256_mul_ps(ymm6, ymm0); // multiply C by beta and accumulate. ymm2 = _mm256_loadu_ps(tC); ymm4 = _mm256_fmadd_ps(ymm2, ymm1, ymm4); _mm256_storeu_ps(tC, ymm4); // multiply C by beta and accumulate. tC += ldc; ymm2 = _mm256_loadu_ps(tC); ymm5 = _mm256_fmadd_ps(ymm2, ymm1, ymm5); _mm256_storeu_ps(tC, ymm5); // multiply C by beta and accumulate. tC += ldc; ymm2 = _mm256_loadu_ps(tC); ymm6 = _mm256_fmadd_ps(ymm2, ymm1, ymm6); _mm256_storeu_ps(tC, ymm6); } n_remainder = N - col_idx; // if the N is not multiple of 3. // handling edge case. if (n_remainder == 2) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; ymm4 = _mm256_setzero_ps(); ymm5 = _mm256_setzero_ps(); for (k = 0; k < K; ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_ss(tB + tb_inc_col * 0); ymm1 = _mm256_broadcast_ss(tB + tb_inc_col * 1); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_ps(tA); ymm4 = _mm256_fmadd_ps(ymm0, ymm3, ymm4); ymm5 = _mm256_fmadd_ps(ymm1, ymm3, ymm5); tA += lda; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_ss(alpha_cast); ymm1 = _mm256_broadcast_ss(beta_cast); //multiply A*B by alpha. ymm4 = _mm256_mul_ps(ymm4, ymm0); ymm5 = _mm256_mul_ps(ymm5, ymm0); // multiply C by beta and accumulate. ymm2 = _mm256_loadu_ps(tC); ymm4 = _mm256_fmadd_ps(ymm2, ymm1, ymm4); _mm256_storeu_ps(tC, ymm4); // multiply C by beta and accumulate. tC += ldc; ymm2 = _mm256_loadu_ps(tC); ymm5 = _mm256_fmadd_ps(ymm2, ymm1, ymm5); _mm256_storeu_ps(tC, ymm5); col_idx += 2; } // if the N is not multiple of 3. // handling edge case. if (n_remainder == 1) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; ymm4 = _mm256_setzero_ps(); for (k = 0; k < K; ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_ss(tB + tb_inc_col * 0); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_ps(tA); ymm4 = _mm256_fmadd_ps(ymm0, ymm3, ymm4); tA += lda; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_ss(alpha_cast); ymm1 = _mm256_broadcast_ss(beta_cast); ymm4 = _mm256_mul_ps(ymm4, ymm0); // multiply C by beta and accumulate. ymm2 = _mm256_loadu_ps(tC); ymm4 = _mm256_fmadd_ps(ymm2, ymm1, ymm4); _mm256_storeu_ps(tC, ymm4); } row_idx += 8; } // M is not a multiple of 32. // The handling of edge case where the remainder // dimension is less than 8. The padding takes place // to handle this case. if ((m_remainder) && (lda > 7)) { float f_temp[8]; for (col_idx = 0; (col_idx + 2) < N; col_idx += 3) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; // clear scratch registers. ymm5 = _mm256_setzero_ps(); ymm7 = _mm256_setzero_ps(); ymm9 = _mm256_setzero_ps(); for (k = 0; k < (K - 1); ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_ss(tB + tb_inc_col * 0); ymm1 = _mm256_broadcast_ss(tB + tb_inc_col * 1); ymm2 = _mm256_broadcast_ss(tB + tb_inc_col * 2); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_ps(tA); ymm5 = _mm256_fmadd_ps(ymm0, ymm3, ymm5); ymm7 = _mm256_fmadd_ps(ymm1, ymm3, ymm7); ymm9 = _mm256_fmadd_ps(ymm2, ymm3, ymm9); tA += lda; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_ss(tB + tb_inc_col * 0); ymm1 = _mm256_broadcast_ss(tB + tb_inc_col * 1); ymm2 = _mm256_broadcast_ss(tB + tb_inc_col * 2); tB += tb_inc_row; for (int i = 0; i < m_remainder; i++) { f_temp[i] = tA[i]; } ymm3 = _mm256_loadu_ps(f_temp); ymm5 = _mm256_fmadd_ps(ymm0, ymm3, ymm5); ymm7 = _mm256_fmadd_ps(ymm1, ymm3, ymm7); ymm9 = _mm256_fmadd_ps(ymm2, ymm3, ymm9); ymm0 = _mm256_broadcast_ss(alpha_cast); ymm1 = _mm256_broadcast_ss(beta_cast); //multiply A*B by alpha. ymm5 = _mm256_mul_ps(ymm5, ymm0); ymm7 = _mm256_mul_ps(ymm7, ymm0); ymm9 = _mm256_mul_ps(ymm9, ymm0); for (int i = 0; i < m_remainder; i++) { f_temp[i] = tC[i]; } ymm2 = _mm256_loadu_ps(f_temp); ymm5 = _mm256_fmadd_ps(ymm2, ymm1, ymm5); _mm256_storeu_ps(f_temp, ymm5); for (int i = 0; i < m_remainder; i++) { tC[i] = f_temp[i]; } tC += ldc; for (int i = 0; i < m_remainder; i++) { f_temp[i] = tC[i]; } ymm2 = _mm256_loadu_ps(f_temp); ymm7 = _mm256_fmadd_ps(ymm2, ymm1, ymm7); _mm256_storeu_ps(f_temp, ymm7); for (int i = 0; i < m_remainder; i++) { tC[i] = f_temp[i]; } tC += ldc; for (int i = 0; i < m_remainder; i++) { f_temp[i] = tC[i]; } ymm2 = _mm256_loadu_ps(f_temp); ymm9 = _mm256_fmadd_ps(ymm2, ymm1, ymm9); _mm256_storeu_ps(f_temp, ymm9); for (int i = 0; i < m_remainder; i++) { tC[i] = f_temp[i]; } } n_remainder = N - col_idx; // if the N is not multiple of 3. // handling edge case. if (n_remainder == 2) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; ymm5 = _mm256_setzero_ps(); ymm7 = _mm256_setzero_ps(); for (k = 0; k < (K - 1); ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_ss(tB + tb_inc_col * 0); ymm1 = _mm256_broadcast_ss(tB + tb_inc_col * 1); tB += tb_inc_row; ymm3 = _mm256_loadu_ps(tA); ymm5 = _mm256_fmadd_ps(ymm0, ymm3, ymm5); ymm7 = _mm256_fmadd_ps(ymm1, ymm3, ymm7); tA += lda; } ymm0 = _mm256_broadcast_ss(tB + tb_inc_col * 0); ymm1 = _mm256_broadcast_ss(tB + tb_inc_col * 1); tB += tb_inc_row; for (int i = 0; i < m_remainder; i++) { f_temp[i] = tA[i]; } ymm3 = _mm256_loadu_ps(f_temp); ymm5 = _mm256_fmadd_ps(ymm0, ymm3, ymm5); ymm7 = _mm256_fmadd_ps(ymm1, ymm3, ymm7); ymm0 = _mm256_broadcast_ss(alpha_cast); ymm1 = _mm256_broadcast_ss(beta_cast); ymm5 = _mm256_mul_ps(ymm5, ymm0); ymm7 = _mm256_mul_ps(ymm7, ymm0); for (int i = 0; i < m_remainder; i++) { f_temp[i] = tC[i]; } ymm2 = _mm256_loadu_ps(f_temp); ymm5 = _mm256_fmadd_ps(ymm2, ymm1, ymm5); _mm256_storeu_ps(f_temp, ymm5); for (int i = 0; i < m_remainder; i++) { tC[i] = f_temp[i]; } tC += ldc; for (int i = 0; i < m_remainder; i++) { f_temp[i] = tC[i]; } ymm2 = _mm256_loadu_ps(f_temp); ymm7 = _mm256_fmadd_ps(ymm2, ymm1, ymm7); _mm256_storeu_ps(f_temp, ymm7); for (int i = 0; i < m_remainder; i++) { tC[i] = f_temp[i]; } } // if the N is not multiple of 3. // handling edge case. if (n_remainder == 1) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; ymm5 = _mm256_setzero_ps(); for (k = 0; k < (K - 1); ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_ss(tB + tb_inc_col * 0); tB += tb_inc_row; ymm3 = _mm256_loadu_ps(tA); ymm5 = _mm256_fmadd_ps(ymm0, ymm3, ymm5); tA += lda; } ymm0 = _mm256_broadcast_ss(tB + tb_inc_col * 0); tB += tb_inc_row; for (int i = 0; i < m_remainder; i++) { f_temp[i] = tA[i]; } ymm3 = _mm256_loadu_ps(f_temp); ymm5 = _mm256_fmadd_ps(ymm0, ymm3, ymm5); ymm0 = _mm256_broadcast_ss(alpha_cast); ymm1 = _mm256_broadcast_ss(beta_cast); // multiply C by beta and accumulate. ymm5 = _mm256_mul_ps(ymm5, ymm0); for (int i = 0; i < m_remainder; i++) { f_temp[i] = tC[i]; } ymm2 = _mm256_loadu_ps(f_temp); ymm5 = _mm256_fmadd_ps(ymm2, ymm1, ymm5); _mm256_storeu_ps(f_temp, ymm5); for (int i = 0; i < m_remainder; i++) { tC[i] = f_temp[i]; } } m_remainder = 0; } if (m_remainder) { float result; for (; row_idx < M; row_idx += 1) { for (col_idx = 0; col_idx < N; col_idx += 1) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; result = 0; for (k = 0; k < K; ++k) { result += (*tA) * (*tB); tA += lda; tB += tb_inc_row; } result *= (*alpha_cast); (*tC) = (*tC) * (*beta_cast) + result; } } } return BLIS_SUCCESS; } else return BLIS_NONCONFORMAL_DIMENSIONS; }; static err_t bli_dgemm_small ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, cntl_t* cntl ) { gint_t M = bli_obj_length( c ); // number of rows of Matrix C gint_t N = bli_obj_width( c ); // number of columns of Matrix C gint_t K = bli_obj_width( a ); // number of columns of OP(A), will be updated if OP(A) is Transpose(A) . gint_t L = M * N; // If alpha is zero, scale by beta and return. // printf("alpha_cast = %f beta_cast = %f [ Trans = %d %d], [stride = %d %d %d] [m,n,k = %d %d %d]\n",*alpha_cast,*beta_cast, bli_obj_has_trans( a ), bli_obj_has_trans( b ), lda, ldb,ldc, M,N,K); #ifdef BLIS_ENABLE_SMALL_MATRIX_ROME if( (L != 0) && (K != 0) && (N < BLIS_SMALL_MATRIX_THRES_ROME) && (K < BLIS_SMALL_MATRIX_THRES_ROME)) #else if ((((L) < (D_BLIS_SMALL_MATRIX_THRES * D_BLIS_SMALL_MATRIX_THRES)) || ((M < D_BLIS_SMALL_M_RECT_MATRIX_THRES) && (K < D_BLIS_SMALL_K_RECT_MATRIX_THRES))) && ((L!=0) && (K!=0))) #endif { guint_t lda = bli_obj_col_stride( a ); // column stride of matrix OP(A), where OP(A) is Transpose(A) if transA enabled. guint_t ldb = bli_obj_col_stride( b ); // column stride of matrix OP(B), where OP(B) is Transpose(B) if transB enabled. guint_t ldc = bli_obj_col_stride( c ); // column stride of matrix C guint_t row_idx, col_idx, k; double *A = a->buffer; // pointer to elements of Matrix A double *B = b->buffer; // pointer to elements of Matrix B double *C = c->buffer; // pointer to elements of Matrix C double *tA = A, *tB = B, *tC = C;//, *tA_pack; double *tA_packed; // temprorary pointer to hold packed A memory pointer guint_t row_idx_packed; //packed A memory row index guint_t lda_packed; //lda of packed A guint_t col_idx_start; //starting index after A matrix is packed. dim_t tb_inc_row = 1; // row stride of matrix B dim_t tb_inc_col = ldb; // column stride of matrix B __m256d ymm4, ymm5, ymm6, ymm7; __m256d ymm8, ymm9, ymm10, ymm11; __m256d ymm12, ymm13, ymm14, ymm15; __m256d ymm0, ymm1, ymm2, ymm3; gint_t n_remainder; // If the N is non multiple of 3.(N%3) gint_t m_remainder; // If the M is non multiple of 16.(M%16) double *alpha_cast, *beta_cast; // alpha, beta multiples alpha_cast = (alpha->buffer); beta_cast = (beta->buffer); gint_t required_packing_A = 1; // when N is equal to 1 call GEMV instead of GEMM if (N == 1) { bli_gemv ( alpha, a, b, beta, c ); return BLIS_SUCCESS; } //update the pointer math if matrix B needs to be transposed. if (bli_obj_has_trans( b )) { tb_inc_col = 1; //switch row and column strides tb_inc_row = ldb; } if ((N <= 3) || ((D_MR * K) > D_SCRATCH_DIM)) { required_packing_A = 0; } /* * The computation loop runs for D_MRxN columns of C matrix, thus * accessing the D_MRxK A matrix data and KxNR B matrix data. * The computation is organized as inner loops of dimension D_MRxNR. */ // Process D_MR rows of C matrix at a time. for (row_idx = 0; (row_idx + (D_MR - 1)) < M; row_idx += D_MR) { col_idx_start = 0; tA_packed = A; row_idx_packed = row_idx; lda_packed = lda; // This is the part of the pack and compute optimization. // During the first column iteration, we store the accessed A matrix into // contiguous static memory. This helps to keep te A matrix in Cache and // aviods the TLB misses. if (required_packing_A) { col_idx = 0; //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; tA_packed = D_A_pack; #ifdef BLIS_ENABLE_PREFETCH _mm_prefetch((char*)(tC + 0), _MM_HINT_T0); _mm_prefetch((char*)(tC + 8), _MM_HINT_T0); _mm_prefetch((char*)(tC + ldc), _MM_HINT_T0); _mm_prefetch((char*)(tC + ldc + 8), _MM_HINT_T0); _mm_prefetch((char*)(tC + 2 * ldc), _MM_HINT_T0); _mm_prefetch((char*)(tC + 2 * ldc + 8), _MM_HINT_T0); #endif // clear scratch registers. ymm4 = _mm256_setzero_pd(); ymm5 = _mm256_setzero_pd(); ymm6 = _mm256_setzero_pd(); ymm7 = _mm256_setzero_pd(); ymm8 = _mm256_setzero_pd(); ymm9 = _mm256_setzero_pd(); ymm10 = _mm256_setzero_pd(); ymm11 = _mm256_setzero_pd(); ymm12 = _mm256_setzero_pd(); ymm13 = _mm256_setzero_pd(); ymm14 = _mm256_setzero_pd(); ymm15 = _mm256_setzero_pd(); for (k = 0; k < K; ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. // This loop is processing D_MR x K ymm0 = _mm256_broadcast_sd(tB + tb_inc_col * 0); ymm1 = _mm256_broadcast_sd(tB + tb_inc_col * 1); ymm2 = _mm256_broadcast_sd(tB + tb_inc_col * 2); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_pd(tA); _mm256_storeu_pd(tA_packed, ymm3); // the packing of matrix A // ymm4 += ymm0 * ymm3; ymm4 = _mm256_fmadd_pd(ymm0, ymm3, ymm4); // ymm8 += ymm1 * ymm3; ymm8 = _mm256_fmadd_pd(ymm1, ymm3, ymm8); // ymm12 += ymm2 * ymm3; ymm12 = _mm256_fmadd_pd(ymm2, ymm3, ymm12); ymm3 = _mm256_loadu_pd(tA + 4); _mm256_storeu_pd(tA_packed + 4, ymm3); // the packing of matrix A // ymm5 += ymm0 * ymm3; ymm5 = _mm256_fmadd_pd(ymm0, ymm3, ymm5); // ymm9 += ymm1 * ymm3; ymm9 = _mm256_fmadd_pd(ymm1, ymm3, ymm9); // ymm13 += ymm2 * ymm3; ymm13 = _mm256_fmadd_pd(ymm2, ymm3, ymm13); ymm3 = _mm256_loadu_pd(tA + 8); _mm256_storeu_pd(tA_packed + 8, ymm3); // the packing of matrix A // ymm6 += ymm0 * ymm3; ymm6 = _mm256_fmadd_pd(ymm0, ymm3, ymm6); // ymm10 += ymm1 * ymm3; ymm10 = _mm256_fmadd_pd(ymm1, ymm3, ymm10); // ymm14 += ymm2 * ymm3; ymm14 = _mm256_fmadd_pd(ymm2, ymm3, ymm14); ymm3 = _mm256_loadu_pd(tA + 12); _mm256_storeu_pd(tA_packed + 12, ymm3); // the packing of matrix A // ymm7 += ymm0 * ymm3; ymm7 = _mm256_fmadd_pd(ymm0, ymm3, ymm7); // ymm11 += ymm1 * ymm3; ymm11 = _mm256_fmadd_pd(ymm1, ymm3, ymm11); // ymm15 += ymm2 * ymm3; ymm15 = _mm256_fmadd_pd(ymm2, ymm3, ymm15); tA += lda; tA_packed += D_MR; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_sd(alpha_cast); ymm1 = _mm256_broadcast_sd(beta_cast); //multiply A*B by alpha. ymm4 = _mm256_mul_pd(ymm4, ymm0); ymm5 = _mm256_mul_pd(ymm5, ymm0); ymm6 = _mm256_mul_pd(ymm6, ymm0); ymm7 = _mm256_mul_pd(ymm7, ymm0); ymm8 = _mm256_mul_pd(ymm8, ymm0); ymm9 = _mm256_mul_pd(ymm9, ymm0); ymm10 = _mm256_mul_pd(ymm10, ymm0); ymm11 = _mm256_mul_pd(ymm11, ymm0); ymm12 = _mm256_mul_pd(ymm12, ymm0); ymm13 = _mm256_mul_pd(ymm13, ymm0); ymm14 = _mm256_mul_pd(ymm14, ymm0); ymm15 = _mm256_mul_pd(ymm15, ymm0); // multiply C by beta and accumulate col 1. ymm2 = _mm256_loadu_pd(tC); ymm4 = _mm256_fmadd_pd(ymm2, ymm1, ymm4); ymm2 = _mm256_loadu_pd(tC + 4); ymm5 = _mm256_fmadd_pd(ymm2, ymm1, ymm5); ymm2 = _mm256_loadu_pd(tC + 8); ymm6 = _mm256_fmadd_pd(ymm2, ymm1, ymm6); ymm2 = _mm256_loadu_pd(tC + 12); ymm7 = _mm256_fmadd_pd(ymm2, ymm1, ymm7); _mm256_storeu_pd(tC, ymm4); _mm256_storeu_pd(tC + 4, ymm5); _mm256_storeu_pd(tC + 8, ymm6); _mm256_storeu_pd(tC + 12, ymm7); // multiply C by beta and accumulate, col 2. tC += ldc; ymm2 = _mm256_loadu_pd(tC); ymm8 = _mm256_fmadd_pd(ymm2, ymm1, ymm8); ymm2 = _mm256_loadu_pd(tC + 4); ymm9 = _mm256_fmadd_pd(ymm2, ymm1, ymm9); ymm2 = _mm256_loadu_pd(tC + 8); ymm10 = _mm256_fmadd_pd(ymm2, ymm1, ymm10); ymm2 = _mm256_loadu_pd(tC + 12); ymm11 = _mm256_fmadd_pd(ymm2, ymm1, ymm11); _mm256_storeu_pd(tC, ymm8); _mm256_storeu_pd(tC + 4, ymm9); _mm256_storeu_pd(tC + 8, ymm10); _mm256_storeu_pd(tC + 12, ymm11); // multiply C by beta and accumulate, col 3. tC += ldc; ymm2 = _mm256_loadu_pd(tC); ymm12 = _mm256_fmadd_pd(ymm2, ymm1, ymm12); ymm2 = _mm256_loadu_pd(tC + 4); ymm13 = _mm256_fmadd_pd(ymm2, ymm1, ymm13); ymm2 = _mm256_loadu_pd(tC + 8); ymm14 = _mm256_fmadd_pd(ymm2, ymm1, ymm14); ymm2 = _mm256_loadu_pd(tC + 12); ymm15 = _mm256_fmadd_pd(ymm2, ymm1, ymm15); _mm256_storeu_pd(tC, ymm12); _mm256_storeu_pd(tC + 4, ymm13); _mm256_storeu_pd(tC + 8, ymm14); _mm256_storeu_pd(tC + 12, ymm15); // modify the pointer arithematic to use packed A matrix. col_idx_start = NR; tA_packed = D_A_pack; row_idx_packed = 0; lda_packed = D_MR; } // Process NR columns of C matrix at a time. for (col_idx = col_idx_start; (col_idx + (NR - 1)) < N; col_idx += NR) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = tA_packed + row_idx_packed; #ifdef BLIS_ENABLE_PREFETCH _mm_prefetch((char*)(tC + 0), _MM_HINT_T0); _mm_prefetch((char*)(tC + 8), _MM_HINT_T0); _mm_prefetch((char*)(tC + ldc), _MM_HINT_T0); _mm_prefetch((char*)(tC + ldc + 8), _MM_HINT_T0); _mm_prefetch((char*)(tC + 2 * ldc), _MM_HINT_T0); _mm_prefetch((char*)(tC + 2 * ldc + 8), _MM_HINT_T0); #endif // clear scratch registers. ymm4 = _mm256_setzero_pd(); ymm5 = _mm256_setzero_pd(); ymm6 = _mm256_setzero_pd(); ymm7 = _mm256_setzero_pd(); ymm8 = _mm256_setzero_pd(); ymm9 = _mm256_setzero_pd(); ymm10 = _mm256_setzero_pd(); ymm11 = _mm256_setzero_pd(); ymm12 = _mm256_setzero_pd(); ymm13 = _mm256_setzero_pd(); ymm14 = _mm256_setzero_pd(); ymm15 = _mm256_setzero_pd(); for (k = 0; k < K; ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. // This loop is processing D_MR x K ymm0 = _mm256_broadcast_sd(tB + tb_inc_col * 0); ymm1 = _mm256_broadcast_sd(tB + tb_inc_col * 1); ymm2 = _mm256_broadcast_sd(tB + tb_inc_col * 2); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_pd(tA); // ymm4 += ymm0 * ymm3; ymm4 = _mm256_fmadd_pd(ymm0, ymm3, ymm4); // ymm8 += ymm1 * ymm3; ymm8 = _mm256_fmadd_pd(ymm1, ymm3, ymm8); // ymm12 += ymm2 * ymm3; ymm12 = _mm256_fmadd_pd(ymm2, ymm3, ymm12); ymm3 = _mm256_loadu_pd(tA + 4); // ymm5 += ymm0 * ymm3; ymm5 = _mm256_fmadd_pd(ymm0, ymm3, ymm5); // ymm9 += ymm1 * ymm3; ymm9 = _mm256_fmadd_pd(ymm1, ymm3, ymm9); // ymm13 += ymm2 * ymm3; ymm13 = _mm256_fmadd_pd(ymm2, ymm3, ymm13); ymm3 = _mm256_loadu_pd(tA + 8); // ymm6 += ymm0 * ymm3; ymm6 = _mm256_fmadd_pd(ymm0, ymm3, ymm6); // ymm10 += ymm1 * ymm3; ymm10 = _mm256_fmadd_pd(ymm1, ymm3, ymm10); // ymm14 += ymm2 * ymm3; ymm14 = _mm256_fmadd_pd(ymm2, ymm3, ymm14); ymm3 = _mm256_loadu_pd(tA + 12); // ymm7 += ymm0 * ymm3; ymm7 = _mm256_fmadd_pd(ymm0, ymm3, ymm7); // ymm11 += ymm1 * ymm3; ymm11 = _mm256_fmadd_pd(ymm1, ymm3, ymm11); // ymm15 += ymm2 * ymm3; ymm15 = _mm256_fmadd_pd(ymm2, ymm3, ymm15); tA += lda_packed; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_sd(alpha_cast); ymm1 = _mm256_broadcast_sd(beta_cast); //multiply A*B by alpha. ymm4 = _mm256_mul_pd(ymm4, ymm0); ymm5 = _mm256_mul_pd(ymm5, ymm0); ymm6 = _mm256_mul_pd(ymm6, ymm0); ymm7 = _mm256_mul_pd(ymm7, ymm0); ymm8 = _mm256_mul_pd(ymm8, ymm0); ymm9 = _mm256_mul_pd(ymm9, ymm0); ymm10 = _mm256_mul_pd(ymm10, ymm0); ymm11 = _mm256_mul_pd(ymm11, ymm0); ymm12 = _mm256_mul_pd(ymm12, ymm0); ymm13 = _mm256_mul_pd(ymm13, ymm0); ymm14 = _mm256_mul_pd(ymm14, ymm0); ymm15 = _mm256_mul_pd(ymm15, ymm0); // multiply C by beta and accumulate col 1. ymm2 = _mm256_loadu_pd(tC); ymm4 = _mm256_fmadd_pd(ymm2, ymm1, ymm4); ymm2 = _mm256_loadu_pd(tC + 4); ymm5 = _mm256_fmadd_pd(ymm2, ymm1, ymm5); ymm2 = _mm256_loadu_pd(tC + 8); ymm6 = _mm256_fmadd_pd(ymm2, ymm1, ymm6); ymm2 = _mm256_loadu_pd(tC + 12); ymm7 = _mm256_fmadd_pd(ymm2, ymm1, ymm7); _mm256_storeu_pd(tC, ymm4); _mm256_storeu_pd(tC + 4, ymm5); _mm256_storeu_pd(tC + 8, ymm6); _mm256_storeu_pd(tC + 12, ymm7); // multiply C by beta and accumulate, col 2. tC += ldc; ymm2 = _mm256_loadu_pd(tC); ymm8 = _mm256_fmadd_pd(ymm2, ymm1, ymm8); ymm2 = _mm256_loadu_pd(tC + 4); ymm9 = _mm256_fmadd_pd(ymm2, ymm1, ymm9); ymm2 = _mm256_loadu_pd(tC + 8); ymm10 = _mm256_fmadd_pd(ymm2, ymm1, ymm10); ymm2 = _mm256_loadu_pd(tC + 12); ymm11 = _mm256_fmadd_pd(ymm2, ymm1, ymm11); _mm256_storeu_pd(tC, ymm8); _mm256_storeu_pd(tC + 4, ymm9); _mm256_storeu_pd(tC + 8, ymm10); _mm256_storeu_pd(tC + 12, ymm11); // multiply C by beta and accumulate, col 3. tC += ldc; ymm2 = _mm256_loadu_pd(tC); ymm12 = _mm256_fmadd_pd(ymm2, ymm1, ymm12); ymm2 = _mm256_loadu_pd(tC + 4); ymm13 = _mm256_fmadd_pd(ymm2, ymm1, ymm13); ymm2 = _mm256_loadu_pd(tC + 8); ymm14 = _mm256_fmadd_pd(ymm2, ymm1, ymm14); ymm2 = _mm256_loadu_pd(tC + 12); ymm15 = _mm256_fmadd_pd(ymm2, ymm1, ymm15); _mm256_storeu_pd(tC, ymm12); _mm256_storeu_pd(tC + 4, ymm13); _mm256_storeu_pd(tC + 8, ymm14); _mm256_storeu_pd(tC + 12, ymm15); } n_remainder = N - col_idx; // if the N is not multiple of 3. // handling edge case. if (n_remainder == 2) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; // clear scratch registers. ymm8 = _mm256_setzero_pd(); ymm9 = _mm256_setzero_pd(); ymm10 = _mm256_setzero_pd(); ymm11 = _mm256_setzero_pd(); ymm12 = _mm256_setzero_pd(); ymm13 = _mm256_setzero_pd(); ymm14 = _mm256_setzero_pd(); ymm15 = _mm256_setzero_pd(); for (k = 0; k < K; ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_sd(tB + tb_inc_col * 0); ymm1 = _mm256_broadcast_sd(tB + tb_inc_col * 1); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_pd(tA); ymm8 = _mm256_fmadd_pd(ymm0, ymm3, ymm8); ymm12 = _mm256_fmadd_pd(ymm1, ymm3, ymm12); ymm3 = _mm256_loadu_pd(tA + 4); ymm9 = _mm256_fmadd_pd(ymm0, ymm3, ymm9); ymm13 = _mm256_fmadd_pd(ymm1, ymm3, ymm13); ymm3 = _mm256_loadu_pd(tA + 8); ymm10 = _mm256_fmadd_pd(ymm0, ymm3, ymm10); ymm14 = _mm256_fmadd_pd(ymm1, ymm3, ymm14); ymm3 = _mm256_loadu_pd(tA + 12); ymm11 = _mm256_fmadd_pd(ymm0, ymm3, ymm11); ymm15 = _mm256_fmadd_pd(ymm1, ymm3, ymm15); tA += lda; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_sd(alpha_cast); ymm1 = _mm256_broadcast_sd(beta_cast); //multiply A*B by alpha. ymm8 = _mm256_mul_pd(ymm8, ymm0); ymm9 = _mm256_mul_pd(ymm9, ymm0); ymm10 = _mm256_mul_pd(ymm10, ymm0); ymm11 = _mm256_mul_pd(ymm11, ymm0); ymm12 = _mm256_mul_pd(ymm12, ymm0); ymm13 = _mm256_mul_pd(ymm13, ymm0); ymm14 = _mm256_mul_pd(ymm14, ymm0); ymm15 = _mm256_mul_pd(ymm15, ymm0); // multiply C by beta and accumulate, col 1. ymm2 = _mm256_loadu_pd(tC + 0); ymm8 = _mm256_fmadd_pd(ymm2, ymm1, ymm8); ymm2 = _mm256_loadu_pd(tC + 4); ymm9 = _mm256_fmadd_pd(ymm2, ymm1, ymm9); ymm2 = _mm256_loadu_pd(tC + 8); ymm10 = _mm256_fmadd_pd(ymm2, ymm1, ymm10); ymm2 = _mm256_loadu_pd(tC + 12); ymm11 = _mm256_fmadd_pd(ymm2, ymm1, ymm11); _mm256_storeu_pd(tC + 0, ymm8); _mm256_storeu_pd(tC + 4, ymm9); _mm256_storeu_pd(tC + 8, ymm10); _mm256_storeu_pd(tC + 12, ymm11); // multiply C by beta and accumulate, col 2. tC += ldc; ymm2 = _mm256_loadu_pd(tC); ymm12 = _mm256_fmadd_pd(ymm2, ymm1, ymm12); ymm2 = _mm256_loadu_pd(tC + 4); ymm13 = _mm256_fmadd_pd(ymm2, ymm1, ymm13); ymm2 = _mm256_loadu_pd(tC + 8); ymm14 = _mm256_fmadd_pd(ymm2, ymm1, ymm14); ymm2 = _mm256_loadu_pd(tC + 12); ymm15 = _mm256_fmadd_pd(ymm2, ymm1, ymm15); _mm256_storeu_pd(tC, ymm12); _mm256_storeu_pd(tC + 4, ymm13); _mm256_storeu_pd(tC + 8, ymm14); _mm256_storeu_pd(tC + 12, ymm15); col_idx += 2; } // if the N is not multiple of 3. // handling edge case. if (n_remainder == 1) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; // clear scratch registers. ymm12 = _mm256_setzero_pd(); ymm13 = _mm256_setzero_pd(); ymm14 = _mm256_setzero_pd(); ymm15 = _mm256_setzero_pd(); for (k = 0; k < K; ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_sd(tB + tb_inc_col * 0); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_pd(tA); ymm12 = _mm256_fmadd_pd(ymm0, ymm3, ymm12); ymm3 = _mm256_loadu_pd(tA + 4); ymm13 = _mm256_fmadd_pd(ymm0, ymm3, ymm13); ymm3 = _mm256_loadu_pd(tA + 8); ymm14 = _mm256_fmadd_pd(ymm0, ymm3, ymm14); ymm3 = _mm256_loadu_pd(tA + 12); ymm15 = _mm256_fmadd_pd(ymm0, ymm3, ymm15); tA += lda; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_sd(alpha_cast); ymm1 = _mm256_broadcast_sd(beta_cast); //multiply A*B by alpha. ymm12 = _mm256_mul_pd(ymm12, ymm0); ymm13 = _mm256_mul_pd(ymm13, ymm0); ymm14 = _mm256_mul_pd(ymm14, ymm0); ymm15 = _mm256_mul_pd(ymm15, ymm0); // multiply C by beta and accumulate. ymm2 = _mm256_loadu_pd(tC + 0); ymm12 = _mm256_fmadd_pd(ymm2, ymm1, ymm12); ymm2 = _mm256_loadu_pd(tC + 4); ymm13 = _mm256_fmadd_pd(ymm2, ymm1, ymm13); ymm2 = _mm256_loadu_pd(tC + 8); ymm14 = _mm256_fmadd_pd(ymm2, ymm1, ymm14); ymm2 = _mm256_loadu_pd(tC + 12); ymm15 = _mm256_fmadd_pd(ymm2, ymm1, ymm15); _mm256_storeu_pd(tC + 0, ymm12); _mm256_storeu_pd(tC + 4, ymm13); _mm256_storeu_pd(tC + 8, ymm14); _mm256_storeu_pd(tC + 12, ymm15); } } m_remainder = M - row_idx; if (m_remainder >= 12) { m_remainder -= 12; for (col_idx = 0; (col_idx + 2) < N; col_idx += 3) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; // clear scratch registers. ymm4 = _mm256_setzero_pd(); ymm5 = _mm256_setzero_pd(); ymm6 = _mm256_setzero_pd(); ymm8 = _mm256_setzero_pd(); ymm9 = _mm256_setzero_pd(); ymm10 = _mm256_setzero_pd(); ymm12 = _mm256_setzero_pd(); ymm13 = _mm256_setzero_pd(); ymm14 = _mm256_setzero_pd(); for (k = 0; k < K; ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_sd(tB + tb_inc_col * 0); ymm1 = _mm256_broadcast_sd(tB + tb_inc_col * 1); ymm2 = _mm256_broadcast_sd(tB + tb_inc_col * 2); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_pd(tA); // ymm4 += ymm0 * ymm3; ymm4 = _mm256_fmadd_pd(ymm0, ymm3, ymm4); // ymm8 += ymm1 * ymm3; ymm8 = _mm256_fmadd_pd(ymm1, ymm3, ymm8); // ymm12 += ymm2 * ymm3; ymm12 = _mm256_fmadd_pd(ymm2, ymm3, ymm12); ymm3 = _mm256_loadu_pd(tA + 4); // ymm5 += ymm0 * ymm3; ymm5 = _mm256_fmadd_pd(ymm0, ymm3, ymm5); // ymm9 += ymm1 * ymm3; ymm9 = _mm256_fmadd_pd(ymm1, ymm3, ymm9); // ymm13 += ymm2 * ymm3; ymm13 = _mm256_fmadd_pd(ymm2, ymm3, ymm13); ymm3 = _mm256_loadu_pd(tA + 8); // ymm6 += ymm0 * ymm3; ymm6 = _mm256_fmadd_pd(ymm0, ymm3, ymm6); // ymm10 += ymm1 * ymm3; ymm10 = _mm256_fmadd_pd(ymm1, ymm3, ymm10); // ymm14 += ymm2 * ymm3; ymm14 = _mm256_fmadd_pd(ymm2, ymm3, ymm14); tA += lda; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_sd(alpha_cast); ymm1 = _mm256_broadcast_sd(beta_cast); //multiply A*B by alpha. ymm4 = _mm256_mul_pd(ymm4, ymm0); ymm5 = _mm256_mul_pd(ymm5, ymm0); ymm6 = _mm256_mul_pd(ymm6, ymm0); ymm8 = _mm256_mul_pd(ymm8, ymm0); ymm9 = _mm256_mul_pd(ymm9, ymm0); ymm10 = _mm256_mul_pd(ymm10, ymm0); ymm12 = _mm256_mul_pd(ymm12, ymm0); ymm13 = _mm256_mul_pd(ymm13, ymm0); ymm14 = _mm256_mul_pd(ymm14, ymm0); // multiply C by beta and accumulate. ymm2 = _mm256_loadu_pd(tC); ymm4 = _mm256_fmadd_pd(ymm2, ymm1, ymm4); ymm2 = _mm256_loadu_pd(tC + 4); ymm5 = _mm256_fmadd_pd(ymm2, ymm1, ymm5); ymm2 = _mm256_loadu_pd(tC + 8); ymm6 = _mm256_fmadd_pd(ymm2, ymm1, ymm6); _mm256_storeu_pd(tC, ymm4); _mm256_storeu_pd(tC + 4, ymm5); _mm256_storeu_pd(tC + 8, ymm6); // multiply C by beta and accumulate. tC += ldc; ymm2 = _mm256_loadu_pd(tC); ymm8 = _mm256_fmadd_pd(ymm2, ymm1, ymm8); ymm2 = _mm256_loadu_pd(tC + 4); ymm9 = _mm256_fmadd_pd(ymm2, ymm1, ymm9); ymm2 = _mm256_loadu_pd(tC + 8); ymm10 = _mm256_fmadd_pd(ymm2, ymm1, ymm10); _mm256_storeu_pd(tC, ymm8); _mm256_storeu_pd(tC + 4, ymm9); _mm256_storeu_pd(tC + 8, ymm10); // multiply C by beta and accumulate. tC += ldc; ymm2 = _mm256_loadu_pd(tC); ymm12 = _mm256_fmadd_pd(ymm2, ymm1, ymm12); ymm2 = _mm256_loadu_pd(tC + 4); ymm13 = _mm256_fmadd_pd(ymm2, ymm1, ymm13); ymm2 = _mm256_loadu_pd(tC + 8); ymm14 = _mm256_fmadd_pd(ymm2, ymm1, ymm14); _mm256_storeu_pd(tC, ymm12); _mm256_storeu_pd(tC + 4, ymm13); _mm256_storeu_pd(tC + 8, ymm14); } n_remainder = N - col_idx; // if the N is not multiple of 3. // handling edge case. if (n_remainder == 2) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; // clear scratch registers. ymm8 = _mm256_setzero_pd(); ymm9 = _mm256_setzero_pd(); ymm10 = _mm256_setzero_pd(); ymm12 = _mm256_setzero_pd(); ymm13 = _mm256_setzero_pd(); ymm14 = _mm256_setzero_pd(); for (k = 0; k < K; ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_sd(tB + tb_inc_col * 0); ymm1 = _mm256_broadcast_sd(tB + tb_inc_col * 1); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_pd(tA); ymm8 = _mm256_fmadd_pd(ymm0, ymm3, ymm8); ymm12 = _mm256_fmadd_pd(ymm1, ymm3, ymm12); ymm3 = _mm256_loadu_pd(tA + 4); ymm9 = _mm256_fmadd_pd(ymm0, ymm3, ymm9); ymm13 = _mm256_fmadd_pd(ymm1, ymm3, ymm13); ymm3 = _mm256_loadu_pd(tA + 8); ymm10 = _mm256_fmadd_pd(ymm0, ymm3, ymm10); ymm14 = _mm256_fmadd_pd(ymm1, ymm3, ymm14); tA += lda; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_sd(alpha_cast); ymm1 = _mm256_broadcast_sd(beta_cast); //multiply A*B by alpha. ymm8 = _mm256_mul_pd(ymm8, ymm0); ymm9 = _mm256_mul_pd(ymm9, ymm0); ymm10 = _mm256_mul_pd(ymm10, ymm0); ymm12 = _mm256_mul_pd(ymm12, ymm0); ymm13 = _mm256_mul_pd(ymm13, ymm0); ymm14 = _mm256_mul_pd(ymm14, ymm0); // multiply C by beta and accumulate. ymm2 = _mm256_loadu_pd(tC + 0); ymm8 = _mm256_fmadd_pd(ymm2, ymm1, ymm8); ymm2 = _mm256_loadu_pd(tC + 4); ymm9 = _mm256_fmadd_pd(ymm2, ymm1, ymm9); ymm2 = _mm256_loadu_pd(tC + 8); ymm10 = _mm256_fmadd_pd(ymm2, ymm1, ymm10); _mm256_storeu_pd(tC + 0, ymm8); _mm256_storeu_pd(tC + 4, ymm9); _mm256_storeu_pd(tC + 8, ymm10); // multiply C by beta and accumulate. tC += ldc; ymm2 = _mm256_loadu_pd(tC); ymm12 = _mm256_fmadd_pd(ymm2, ymm1, ymm12); ymm2 = _mm256_loadu_pd(tC + 4); ymm13 = _mm256_fmadd_pd(ymm2, ymm1, ymm13); ymm2 = _mm256_loadu_pd(tC + 8); ymm14 = _mm256_fmadd_pd(ymm2, ymm1, ymm14); _mm256_storeu_pd(tC, ymm12); _mm256_storeu_pd(tC + 4, ymm13); _mm256_storeu_pd(tC + 8, ymm14); col_idx += 2; } // if the N is not multiple of 3. // handling edge case. if (n_remainder == 1) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; // clear scratch registers. ymm12 = _mm256_setzero_pd(); ymm13 = _mm256_setzero_pd(); ymm14 = _mm256_setzero_pd(); for (k = 0; k < K; ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_sd(tB + tb_inc_col * 0); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_pd(tA); ymm12 = _mm256_fmadd_pd(ymm0, ymm3, ymm12); ymm3 = _mm256_loadu_pd(tA + 4); ymm13 = _mm256_fmadd_pd(ymm0, ymm3, ymm13); ymm3 = _mm256_loadu_pd(tA + 8); ymm14 = _mm256_fmadd_pd(ymm0, ymm3, ymm14); tA += lda; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_sd(alpha_cast); ymm1 = _mm256_broadcast_sd(beta_cast); //multiply A*B by alpha. ymm12 = _mm256_mul_pd(ymm12, ymm0); ymm13 = _mm256_mul_pd(ymm13, ymm0); ymm14 = _mm256_mul_pd(ymm14, ymm0); // multiply C by beta and accumulate. ymm2 = _mm256_loadu_pd(tC + 0); ymm12 = _mm256_fmadd_pd(ymm2, ymm1, ymm12); ymm2 = _mm256_loadu_pd(tC + 4); ymm13 = _mm256_fmadd_pd(ymm2, ymm1, ymm13); ymm2 = _mm256_loadu_pd(tC + 8); ymm14 = _mm256_fmadd_pd(ymm2, ymm1, ymm14); _mm256_storeu_pd(tC + 0, ymm12); _mm256_storeu_pd(tC + 4, ymm13); _mm256_storeu_pd(tC + 8, ymm14); } row_idx += 12; } if (m_remainder >= 8) { m_remainder -= 8; for (col_idx = 0; (col_idx + 2) < N; col_idx += 3) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; // clear scratch registers. ymm4 = _mm256_setzero_pd(); ymm5 = _mm256_setzero_pd(); ymm6 = _mm256_setzero_pd(); ymm7 = _mm256_setzero_pd(); ymm8 = _mm256_setzero_pd(); ymm9 = _mm256_setzero_pd(); for (k = 0; k < K; ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_sd(tB + tb_inc_col * 0); ymm1 = _mm256_broadcast_sd(tB + tb_inc_col * 1); ymm2 = _mm256_broadcast_sd(tB + tb_inc_col * 2); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_pd(tA); ymm4 = _mm256_fmadd_pd(ymm0, ymm3, ymm4); ymm6 = _mm256_fmadd_pd(ymm1, ymm3, ymm6); ymm8 = _mm256_fmadd_pd(ymm2, ymm3, ymm8); ymm3 = _mm256_loadu_pd(tA + 4); ymm5 = _mm256_fmadd_pd(ymm0, ymm3, ymm5); ymm7 = _mm256_fmadd_pd(ymm1, ymm3, ymm7); ymm9 = _mm256_fmadd_pd(ymm2, ymm3, ymm9); tA += lda; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_sd(alpha_cast); ymm1 = _mm256_broadcast_sd(beta_cast); //multiply A*B by alpha. ymm4 = _mm256_mul_pd(ymm4, ymm0); ymm5 = _mm256_mul_pd(ymm5, ymm0); ymm6 = _mm256_mul_pd(ymm6, ymm0); ymm7 = _mm256_mul_pd(ymm7, ymm0); ymm8 = _mm256_mul_pd(ymm8, ymm0); ymm9 = _mm256_mul_pd(ymm9, ymm0); // multiply C by beta and accumulate. ymm2 = _mm256_loadu_pd(tC); ymm4 = _mm256_fmadd_pd(ymm2, ymm1, ymm4); ymm2 = _mm256_loadu_pd(tC + 4); ymm5 = _mm256_fmadd_pd(ymm2, ymm1, ymm5); _mm256_storeu_pd(tC, ymm4); _mm256_storeu_pd(tC + 4, ymm5); // multiply C by beta and accumulate. tC += ldc; ymm2 = _mm256_loadu_pd(tC); ymm6 = _mm256_fmadd_pd(ymm2, ymm1, ymm6); ymm2 = _mm256_loadu_pd(tC + 4); ymm7 = _mm256_fmadd_pd(ymm2, ymm1, ymm7); _mm256_storeu_pd(tC, ymm6); _mm256_storeu_pd(tC + 4, ymm7); // multiply C by beta and accumulate. tC += ldc; ymm2 = _mm256_loadu_pd(tC); ymm8 = _mm256_fmadd_pd(ymm2, ymm1, ymm8); ymm2 = _mm256_loadu_pd(tC + 4); ymm9 = _mm256_fmadd_pd(ymm2, ymm1, ymm9); _mm256_storeu_pd(tC, ymm8); _mm256_storeu_pd(tC + 4, ymm9); } n_remainder = N - col_idx; // if the N is not multiple of 3. // handling edge case. if (n_remainder == 2) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; // clear scratch registers. ymm4 = _mm256_setzero_pd(); ymm5 = _mm256_setzero_pd(); ymm6 = _mm256_setzero_pd(); ymm7 = _mm256_setzero_pd(); for (k = 0; k < K; ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_sd(tB + tb_inc_col * 0); ymm1 = _mm256_broadcast_sd(tB + tb_inc_col * 1); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_pd(tA); ymm4 = _mm256_fmadd_pd(ymm0, ymm3, ymm4); ymm6 = _mm256_fmadd_pd(ymm1, ymm3, ymm6); ymm3 = _mm256_loadu_pd(tA + 4); ymm5 = _mm256_fmadd_pd(ymm0, ymm3, ymm5); ymm7 = _mm256_fmadd_pd(ymm1, ymm3, ymm7); tA += lda; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_sd(alpha_cast); ymm1 = _mm256_broadcast_sd(beta_cast); //multiply A*B by alpha. ymm4 = _mm256_mul_pd(ymm4, ymm0); ymm5 = _mm256_mul_pd(ymm5, ymm0); ymm6 = _mm256_mul_pd(ymm6, ymm0); ymm7 = _mm256_mul_pd(ymm7, ymm0); // multiply C by beta and accumulate. ymm2 = _mm256_loadu_pd(tC); ymm4 = _mm256_fmadd_pd(ymm2, ymm1, ymm4); ymm2 = _mm256_loadu_pd(tC + 4); ymm5 = _mm256_fmadd_pd(ymm2, ymm1, ymm5); _mm256_storeu_pd(tC, ymm4); _mm256_storeu_pd(tC + 4, ymm5); // multiply C by beta and accumulate. tC += ldc; ymm2 = _mm256_loadu_pd(tC); ymm6 = _mm256_fmadd_pd(ymm2, ymm1, ymm6); ymm2 = _mm256_loadu_pd(tC + 4); ymm7 = _mm256_fmadd_pd(ymm2, ymm1, ymm7); _mm256_storeu_pd(tC, ymm6); _mm256_storeu_pd(tC + 4, ymm7); col_idx += 2; } // if the N is not multiple of 3. // handling edge case. if (n_remainder == 1) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; ymm4 = _mm256_setzero_pd(); ymm5 = _mm256_setzero_pd(); for (k = 0; k < K; ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_sd(tB + tb_inc_col * 0); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_pd(tA); ymm4 = _mm256_fmadd_pd(ymm0, ymm3, ymm4); ymm3 = _mm256_loadu_pd(tA + 4); ymm5 = _mm256_fmadd_pd(ymm0, ymm3, ymm5); tA += lda; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_sd(alpha_cast); ymm1 = _mm256_broadcast_sd(beta_cast); ymm4 = _mm256_mul_pd(ymm4, ymm0); ymm5 = _mm256_mul_pd(ymm5, ymm0); // multiply C by beta and accumulate. ymm2 = _mm256_loadu_pd(tC); ymm4 = _mm256_fmadd_pd(ymm2, ymm1, ymm4); ymm2 = _mm256_loadu_pd(tC + 4); ymm5 = _mm256_fmadd_pd(ymm2, ymm1, ymm5); _mm256_storeu_pd(tC, ymm4); _mm256_storeu_pd(tC + 4, ymm5); } row_idx += 8; } if (m_remainder >= 4) { //printf("HERE\n"); m_remainder -= 4; for (col_idx = 0; (col_idx + 2) < N; col_idx += 3) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; // clear scratch registers. ymm4 = _mm256_setzero_pd(); ymm5 = _mm256_setzero_pd(); ymm6 = _mm256_setzero_pd(); for (k = 0; k < K; ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_sd(tB + tb_inc_col * 0); ymm1 = _mm256_broadcast_sd(tB + tb_inc_col * 1); ymm2 = _mm256_broadcast_sd(tB + tb_inc_col * 2); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_pd(tA); ymm4 = _mm256_fmadd_pd(ymm0, ymm3, ymm4); ymm5 = _mm256_fmadd_pd(ymm1, ymm3, ymm5); ymm6 = _mm256_fmadd_pd(ymm2, ymm3, ymm6); tA += lda; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_sd(alpha_cast); ymm1 = _mm256_broadcast_sd(beta_cast); //multiply A*B by alpha. ymm4 = _mm256_mul_pd(ymm4, ymm0); ymm5 = _mm256_mul_pd(ymm5, ymm0); ymm6 = _mm256_mul_pd(ymm6, ymm0); // multiply C by beta and accumulate. ymm2 = _mm256_loadu_pd(tC); ymm4 = _mm256_fmadd_pd(ymm2, ymm1, ymm4); _mm256_storeu_pd(tC, ymm4); // multiply C by beta and accumulate. tC += ldc; ymm2 = _mm256_loadu_pd(tC); ymm5 = _mm256_fmadd_pd(ymm2, ymm1, ymm5); _mm256_storeu_pd(tC, ymm5); // multiply C by beta and accumulate. tC += ldc; ymm2 = _mm256_loadu_pd(tC); ymm6 = _mm256_fmadd_pd(ymm2, ymm1, ymm6); _mm256_storeu_pd(tC, ymm6); } n_remainder = N - col_idx; // if the N is not multiple of 3. // handling edge case. if (n_remainder == 2) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; ymm4 = _mm256_setzero_pd(); ymm5 = _mm256_setzero_pd(); for (k = 0; k < K; ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_sd(tB + tb_inc_col * 0); ymm1 = _mm256_broadcast_sd(tB + tb_inc_col * 1); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_pd(tA); ymm4 = _mm256_fmadd_pd(ymm0, ymm3, ymm4); ymm5 = _mm256_fmadd_pd(ymm1, ymm3, ymm5); tA += lda; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_sd(alpha_cast); ymm1 = _mm256_broadcast_sd(beta_cast); //multiply A*B by alpha. ymm4 = _mm256_mul_pd(ymm4, ymm0); ymm5 = _mm256_mul_pd(ymm5, ymm0); // multiply C by beta and accumulate. ymm2 = _mm256_loadu_pd(tC); ymm4 = _mm256_fmadd_pd(ymm2, ymm1, ymm4); _mm256_storeu_pd(tC, ymm4); // multiply C by beta and accumulate. tC += ldc; ymm2 = _mm256_loadu_pd(tC); ymm5 = _mm256_fmadd_pd(ymm2, ymm1, ymm5); _mm256_storeu_pd(tC, ymm5); col_idx += 2; } // if the N is not multiple of 3. // handling edge case. if (n_remainder == 1) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; ymm4 = _mm256_setzero_pd(); for (k = 0; k < K; ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_sd(tB + tb_inc_col * 0); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_pd(tA); ymm4 = _mm256_fmadd_pd(ymm0, ymm3, ymm4); tA += lda; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_sd(alpha_cast); ymm1 = _mm256_broadcast_sd(beta_cast); ymm4 = _mm256_mul_pd(ymm4, ymm0); // multiply C by beta and accumulate. ymm2 = _mm256_loadu_pd(tC); ymm4 = _mm256_fmadd_pd(ymm2, ymm1, ymm4); _mm256_storeu_pd(tC, ymm4); } row_idx += 4; } // M is not a multiple of 32. // The handling of edge case where the remainder // dimension is less than 8. The padding takes place // to handle this case. if ((m_remainder) && (lda > 3)) { double f_temp[8]; for (col_idx = 0; (col_idx + 2) < N; col_idx += 3) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; // clear scratch registers. ymm5 = _mm256_setzero_pd(); ymm7 = _mm256_setzero_pd(); ymm9 = _mm256_setzero_pd(); for (k = 0; k < (K - 1); ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_sd(tB + tb_inc_col * 0); ymm1 = _mm256_broadcast_sd(tB + tb_inc_col * 1); ymm2 = _mm256_broadcast_sd(tB + tb_inc_col * 2); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_pd(tA); ymm5 = _mm256_fmadd_pd(ymm0, ymm3, ymm5); ymm7 = _mm256_fmadd_pd(ymm1, ymm3, ymm7); ymm9 = _mm256_fmadd_pd(ymm2, ymm3, ymm9); tA += lda; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_sd(tB + tb_inc_col * 0); ymm1 = _mm256_broadcast_sd(tB + tb_inc_col * 1); ymm2 = _mm256_broadcast_sd(tB + tb_inc_col * 2); tB += tb_inc_row; for (int i = 0; i < m_remainder; i++) { f_temp[i] = tA[i]; } ymm3 = _mm256_loadu_pd(f_temp); ymm5 = _mm256_fmadd_pd(ymm0, ymm3, ymm5); ymm7 = _mm256_fmadd_pd(ymm1, ymm3, ymm7); ymm9 = _mm256_fmadd_pd(ymm2, ymm3, ymm9); ymm0 = _mm256_broadcast_sd(alpha_cast); ymm1 = _mm256_broadcast_sd(beta_cast); //multiply A*B by alpha. ymm5 = _mm256_mul_pd(ymm5, ymm0); ymm7 = _mm256_mul_pd(ymm7, ymm0); ymm9 = _mm256_mul_pd(ymm9, ymm0); for (int i = 0; i < m_remainder; i++) { f_temp[i] = tC[i]; } ymm2 = _mm256_loadu_pd(f_temp); ymm5 = _mm256_fmadd_pd(ymm2, ymm1, ymm5); _mm256_storeu_pd(f_temp, ymm5); for (int i = 0; i < m_remainder; i++) { tC[i] = f_temp[i]; } tC += ldc; for (int i = 0; i < m_remainder; i++) { f_temp[i] = tC[i]; } ymm2 = _mm256_loadu_pd(f_temp); ymm7 = _mm256_fmadd_pd(ymm2, ymm1, ymm7); _mm256_storeu_pd(f_temp, ymm7); for (int i = 0; i < m_remainder; i++) { tC[i] = f_temp[i]; } tC += ldc; for (int i = 0; i < m_remainder; i++) { f_temp[i] = tC[i]; } ymm2 = _mm256_loadu_pd(f_temp); ymm9 = _mm256_fmadd_pd(ymm2, ymm1, ymm9); _mm256_storeu_pd(f_temp, ymm9); for (int i = 0; i < m_remainder; i++) { tC[i] = f_temp[i]; } } n_remainder = N - col_idx; // if the N is not multiple of 3. // handling edge case. if (n_remainder == 2) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; ymm5 = _mm256_setzero_pd(); ymm7 = _mm256_setzero_pd(); for (k = 0; k < (K - 1); ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_sd(tB + tb_inc_col * 0); ymm1 = _mm256_broadcast_sd(tB + tb_inc_col * 1); tB += tb_inc_row; ymm3 = _mm256_loadu_pd(tA); ymm5 = _mm256_fmadd_pd(ymm0, ymm3, ymm5); ymm7 = _mm256_fmadd_pd(ymm1, ymm3, ymm7); tA += lda; } ymm0 = _mm256_broadcast_sd(tB + tb_inc_col * 0); ymm1 = _mm256_broadcast_sd(tB + tb_inc_col * 1); tB += tb_inc_row; for (int i = 0; i < m_remainder; i++) { f_temp[i] = tA[i]; } ymm3 = _mm256_loadu_pd(f_temp); ymm5 = _mm256_fmadd_pd(ymm0, ymm3, ymm5); ymm7 = _mm256_fmadd_pd(ymm1, ymm3, ymm7); ymm0 = _mm256_broadcast_sd(alpha_cast); ymm1 = _mm256_broadcast_sd(beta_cast); ymm5 = _mm256_mul_pd(ymm5, ymm0); ymm7 = _mm256_mul_pd(ymm7, ymm0); for (int i = 0; i < m_remainder; i++) { f_temp[i] = tC[i]; } ymm2 = _mm256_loadu_pd(f_temp); ymm5 = _mm256_fmadd_pd(ymm2, ymm1, ymm5); _mm256_storeu_pd(f_temp, ymm5); for (int i = 0; i < m_remainder; i++) { tC[i] = f_temp[i]; } tC += ldc; for (int i = 0; i < m_remainder; i++) { f_temp[i] = tC[i]; } ymm2 = _mm256_loadu_pd(f_temp); ymm7 = _mm256_fmadd_pd(ymm2, ymm1, ymm7); _mm256_storeu_pd(f_temp, ymm7); for (int i = 0; i < m_remainder; i++) { tC[i] = f_temp[i]; } } // if the N is not multiple of 3. // handling edge case. if (n_remainder == 1) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; ymm5 = _mm256_setzero_pd(); for (k = 0; k < (K - 1); ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_sd(tB + tb_inc_col * 0); tB += tb_inc_row; ymm3 = _mm256_loadu_pd(tA); ymm5 = _mm256_fmadd_pd(ymm0, ymm3, ymm5); tA += lda; } ymm0 = _mm256_broadcast_sd(tB + tb_inc_col * 0); tB += tb_inc_row; for (int i = 0; i < m_remainder; i++) { f_temp[i] = tA[i]; } ymm3 = _mm256_loadu_pd(f_temp); ymm5 = _mm256_fmadd_pd(ymm0, ymm3, ymm5); ymm0 = _mm256_broadcast_sd(alpha_cast); ymm1 = _mm256_broadcast_sd(beta_cast); // multiply C by beta and accumulate. ymm5 = _mm256_mul_pd(ymm5, ymm0); for (int i = 0; i < m_remainder; i++) { f_temp[i] = tC[i]; } ymm2 = _mm256_loadu_pd(f_temp); ymm5 = _mm256_fmadd_pd(ymm2, ymm1, ymm5); _mm256_storeu_pd(f_temp, ymm5); for (int i = 0; i < m_remainder; i++) { tC[i] = f_temp[i]; } } m_remainder = 0; } if (m_remainder) { double result; for (; row_idx < M; row_idx += 1) { for (col_idx = 0; col_idx < N; col_idx += 1) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; result = 0; for (k = 0; k < K; ++k) { result += (*tA) * (*tB); tA += lda; tB += tb_inc_row; } result *= (*alpha_cast); (*tC) = (*tC) * (*beta_cast) + result; } } } return BLIS_SUCCESS; } else return BLIS_NONCONFORMAL_DIMENSIONS; }; static err_t bli_sgemm_small_atbn ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, cntl_t* cntl ) { gint_t M = bli_obj_length( c ); // number of rows of Matrix C gint_t N = bli_obj_width( c ); // number of columns of Matrix C gint_t K = bli_obj_length( b ); // number of rows of Matrix B guint_t lda = bli_obj_col_stride( a ); // column stride of matrix OP(A), where OP(A) is Transpose(A) if transA enabled. guint_t ldb = bli_obj_col_stride( b ); // column stride of matrix OP(B), where OP(B) is Transpose(B) if transB enabled. guint_t ldc = bli_obj_col_stride( c ); // column stride of matrix C int row_idx = 0, col_idx = 0, k; float *A = a->buffer; // pointer to matrix A elements, stored in row major format float *B = b->buffer; // pointer to matrix B elements, stored in column major format float *C = c->buffer; // pointer to matrix C elements, stored in column major format float *tA = A, *tB = B, *tC = C; __m256 ymm4, ymm5, ymm6, ymm7; __m256 ymm8, ymm9, ymm10, ymm11; __m256 ymm12, ymm13, ymm14, ymm15; __m256 ymm0, ymm1, ymm2, ymm3; float result, scratch[8]; float *alpha_cast, *beta_cast; // alpha, beta multiples alpha_cast = (alpha->buffer); beta_cast = (beta->buffer); // The non-copy version of the A^T GEMM gives better performance for the small M cases. // The threshold is controlled by BLIS_ATBN_M_THRES if (M <= BLIS_ATBN_M_THRES) { for (col_idx = 0; (col_idx + (NR - 1)) < N; col_idx += NR) { for (row_idx = 0; (row_idx + (AT_MR - 1)) < M; row_idx += AT_MR) { tA = A + row_idx * lda; tB = B + col_idx * ldb; tC = C + col_idx * ldc + row_idx; // clear scratch registers. ymm4 = _mm256_setzero_ps(); ymm5 = _mm256_setzero_ps(); ymm6 = _mm256_setzero_ps(); ymm7 = _mm256_setzero_ps(); ymm8 = _mm256_setzero_ps(); ymm9 = _mm256_setzero_ps(); ymm10 = _mm256_setzero_ps(); ymm11 = _mm256_setzero_ps(); ymm12 = _mm256_setzero_ps(); ymm13 = _mm256_setzero_ps(); ymm14 = _mm256_setzero_ps(); ymm15 = _mm256_setzero_ps(); //The inner loop computes the 4x3 values of the matrix. //The computation pattern is: // ymm4 ymm5 ymm6 // ymm7 ymm8 ymm9 // ymm10 ymm11 ymm12 // ymm13 ymm14 ymm15 //The Dot operation is performed in the inner loop, 8 float elements fit //in the YMM register hence loop count incremented by 8 for (k = 0; (k + 7) < K; k += 8) { ymm0 = _mm256_loadu_ps(tB + 0); ymm1 = _mm256_loadu_ps(tB + ldb); ymm2 = _mm256_loadu_ps(tB + 2 * ldb); ymm3 = _mm256_loadu_ps(tA); ymm4 = _mm256_fmadd_ps(ymm0, ymm3, ymm4); ymm5 = _mm256_fmadd_ps(ymm1, ymm3, ymm5); ymm6 = _mm256_fmadd_ps(ymm2, ymm3, ymm6); ymm3 = _mm256_loadu_ps(tA + lda); ymm7 = _mm256_fmadd_ps(ymm0, ymm3, ymm7); ymm8 = _mm256_fmadd_ps(ymm1, ymm3, ymm8); ymm9 = _mm256_fmadd_ps(ymm2, ymm3, ymm9); ymm3 = _mm256_loadu_ps(tA + 2 * lda); ymm10 = _mm256_fmadd_ps(ymm0, ymm3, ymm10); ymm11 = _mm256_fmadd_ps(ymm1, ymm3, ymm11); ymm12 = _mm256_fmadd_ps(ymm2, ymm3, ymm12); ymm3 = _mm256_loadu_ps(tA + 3 * lda); ymm13 = _mm256_fmadd_ps(ymm0, ymm3, ymm13); ymm14 = _mm256_fmadd_ps(ymm1, ymm3, ymm14); ymm15 = _mm256_fmadd_ps(ymm2, ymm3, ymm15); tA += 8; tB += 8; } // if K is not a multiple of 8, padding is done before load using temproary array. if (k < K) { int iter; float data_feeder[8] = { 0.0 }; for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tB[iter]; ymm0 = _mm256_loadu_ps(data_feeder); for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tB[iter + ldb]; ymm1 = _mm256_loadu_ps(data_feeder); for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tB[iter + 2 * ldb]; ymm2 = _mm256_loadu_ps(data_feeder); for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tA[iter]; ymm3 = _mm256_loadu_ps(data_feeder); ymm4 = _mm256_fmadd_ps(ymm0, ymm3, ymm4); ymm5 = _mm256_fmadd_ps(ymm1, ymm3, ymm5); ymm6 = _mm256_fmadd_ps(ymm2, ymm3, ymm6); for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tA[lda + iter]; ymm3 = _mm256_loadu_ps(data_feeder); ymm7 = _mm256_fmadd_ps(ymm0, ymm3, ymm7); ymm8 = _mm256_fmadd_ps(ymm1, ymm3, ymm8); ymm9 = _mm256_fmadd_ps(ymm2, ymm3, ymm9); for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tA[2 * lda + iter]; ymm3 = _mm256_loadu_ps(data_feeder); ymm10 = _mm256_fmadd_ps(ymm0, ymm3, ymm10); ymm11 = _mm256_fmadd_ps(ymm1, ymm3, ymm11); ymm12 = _mm256_fmadd_ps(ymm2, ymm3, ymm12); for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tA[3 * lda + iter]; ymm3 = _mm256_loadu_ps(data_feeder); ymm13 = _mm256_fmadd_ps(ymm0, ymm3, ymm13); ymm14 = _mm256_fmadd_ps(ymm1, ymm3, ymm14); ymm15 = _mm256_fmadd_ps(ymm2, ymm3, ymm15); } //horizontal addition and storage of the data. //Results for 4x3 blocks of C is stored here ymm4 = _mm256_hadd_ps(ymm4, ymm4); ymm4 = _mm256_hadd_ps(ymm4, ymm4); _mm256_storeu_ps(scratch, ymm4); result = scratch[0] + scratch[4]; result *= (*alpha_cast); tC[0] = result + tC[0] * (*beta_cast); ymm7 = _mm256_hadd_ps(ymm7, ymm7); ymm7 = _mm256_hadd_ps(ymm7, ymm7); _mm256_storeu_ps(scratch, ymm7); result = scratch[0] + scratch[4]; result *= (*alpha_cast); tC[1] = result + tC[1] * (*beta_cast); ymm10 = _mm256_hadd_ps(ymm10, ymm10); ymm10 = _mm256_hadd_ps(ymm10, ymm10); _mm256_storeu_ps(scratch, ymm10); result = scratch[0] + scratch[4]; result *= (*alpha_cast); tC[2] = result + tC[2] * (*beta_cast); ymm13 = _mm256_hadd_ps(ymm13, ymm13); ymm13 = _mm256_hadd_ps(ymm13, ymm13); _mm256_storeu_ps(scratch, ymm13); result = scratch[0] + scratch[4]; result *= (*alpha_cast); tC[3] = result + tC[3] * (*beta_cast); tC += ldc; ymm5 = _mm256_hadd_ps(ymm5, ymm5); ymm5 = _mm256_hadd_ps(ymm5, ymm5); _mm256_storeu_ps(scratch, ymm5); result = scratch[0] + scratch[4]; result *= (*alpha_cast); tC[0] = result + tC[0] * (*beta_cast); ymm8 = _mm256_hadd_ps(ymm8, ymm8); ymm8 = _mm256_hadd_ps(ymm8, ymm8); _mm256_storeu_ps(scratch, ymm8); result = scratch[0] + scratch[4]; result *= (*alpha_cast); tC[1] = result + tC[1] * (*beta_cast); ymm11 = _mm256_hadd_ps(ymm11, ymm11); ymm11 = _mm256_hadd_ps(ymm11, ymm11); _mm256_storeu_ps(scratch, ymm11); result = scratch[0] + scratch[4]; result *= (*alpha_cast); tC[2] = result + tC[2] * (*beta_cast); ymm14 = _mm256_hadd_ps(ymm14, ymm14); ymm14 = _mm256_hadd_ps(ymm14, ymm14); _mm256_storeu_ps(scratch, ymm14); result = scratch[0] + scratch[4]; result *= (*alpha_cast); tC[3] = result + tC[3] * (*beta_cast); tC += ldc; ymm6 = _mm256_hadd_ps(ymm6, ymm6); ymm6 = _mm256_hadd_ps(ymm6, ymm6); _mm256_storeu_ps(scratch, ymm6); result = scratch[0] + scratch[4]; result *= (*alpha_cast); tC[0] = result + tC[0] * (*beta_cast); ymm9 = _mm256_hadd_ps(ymm9, ymm9); ymm9 = _mm256_hadd_ps(ymm9, ymm9); _mm256_storeu_ps(scratch, ymm9); result = scratch[0] + scratch[4]; result *= (*alpha_cast); tC[1] = result + tC[1] * (*beta_cast); ymm12 = _mm256_hadd_ps(ymm12, ymm12); ymm12 = _mm256_hadd_ps(ymm12, ymm12); _mm256_storeu_ps(scratch, ymm12); result = scratch[0] + scratch[4]; result *= (*alpha_cast); tC[2] = result + tC[2] * (*beta_cast); ymm15 = _mm256_hadd_ps(ymm15, ymm15); ymm15 = _mm256_hadd_ps(ymm15, ymm15); _mm256_storeu_ps(scratch, ymm15); result = scratch[0] + scratch[4]; result *= (*alpha_cast); tC[3] = result + tC[3] * (*beta_cast); } } int processed_col = col_idx; int processed_row = row_idx; //The edge case handling where N is not a multiple of 3 if (processed_col < N) { for (col_idx = processed_col; col_idx < N; col_idx += 1) { for (row_idx = 0; (row_idx + (AT_MR - 1)) < M; row_idx += AT_MR) { tA = A + row_idx * lda; tB = B + col_idx * ldb; tC = C + col_idx * ldc + row_idx; // clear scratch registers. ymm4 = _mm256_setzero_ps(); ymm7 = _mm256_setzero_ps(); ymm10 = _mm256_setzero_ps(); ymm13 = _mm256_setzero_ps(); //The inner loop computes the 4x1 values of the matrix. //The computation pattern is: // ymm4 // ymm7 // ymm10 // ymm13 for (k = 0; (k + 7) < K; k += 8) { ymm0 = _mm256_loadu_ps(tB + 0); ymm3 = _mm256_loadu_ps(tA); ymm4 = _mm256_fmadd_ps(ymm0, ymm3, ymm4); ymm3 = _mm256_loadu_ps(tA + lda); ymm7 = _mm256_fmadd_ps(ymm0, ymm3, ymm7); ymm3 = _mm256_loadu_ps(tA + 2 * lda); ymm10 = _mm256_fmadd_ps(ymm0, ymm3, ymm10); ymm3 = _mm256_loadu_ps(tA + 3 * lda); ymm13 = _mm256_fmadd_ps(ymm0, ymm3, ymm13); tA += 8; tB += 8; } // if K is not a multiple of 8, padding is done before load using temproary array. if (k < K) { int iter; float data_feeder[8] = { 0.0 }; for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tB[iter]; ymm0 = _mm256_loadu_ps(data_feeder); for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tA[iter]; ymm3 = _mm256_loadu_ps(data_feeder); ymm4 = _mm256_fmadd_ps(ymm0, ymm3, ymm4); for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tA[lda + iter]; ymm3 = _mm256_loadu_ps(data_feeder); ymm7 = _mm256_fmadd_ps(ymm0, ymm3, ymm7); for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tA[2 * lda + iter]; ymm3 = _mm256_loadu_ps(data_feeder); ymm10 = _mm256_fmadd_ps(ymm0, ymm3, ymm10); for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tA[3 * lda + iter]; ymm3 = _mm256_loadu_ps(data_feeder); ymm13 = _mm256_fmadd_ps(ymm0, ymm3, ymm13); } //horizontal addition and storage of the data. //Results for 4x1 blocks of C is stored here ymm4 = _mm256_hadd_ps(ymm4, ymm4); ymm4 = _mm256_hadd_ps(ymm4, ymm4); _mm256_storeu_ps(scratch, ymm4); result = scratch[0] + scratch[4]; result *= (*alpha_cast); tC[0] = result + tC[0] * (*beta_cast); ymm7 = _mm256_hadd_ps(ymm7, ymm7); ymm7 = _mm256_hadd_ps(ymm7, ymm7); _mm256_storeu_ps(scratch, ymm7); result = scratch[0] + scratch[4]; result *= (*alpha_cast); tC[1] = result + tC[1] * (*beta_cast); ymm10 = _mm256_hadd_ps(ymm10, ymm10); ymm10 = _mm256_hadd_ps(ymm10, ymm10); _mm256_storeu_ps(scratch, ymm10); result = scratch[0] + scratch[4]; result *= (*alpha_cast); tC[2] = result + tC[2] * (*beta_cast); ymm13 = _mm256_hadd_ps(ymm13, ymm13); ymm13 = _mm256_hadd_ps(ymm13, ymm13); _mm256_storeu_ps(scratch, ymm13); result = scratch[0] + scratch[4]; result *= (*alpha_cast); tC[3] = result + tC[3] * (*beta_cast); } } processed_row = row_idx; } //The edge case handling where M is not a multiple of 4 if (processed_row < M) { for (row_idx = processed_row; row_idx < M; row_idx += 1) { for (col_idx = 0; col_idx < N; col_idx += 1) { tA = A + row_idx * lda; tB = B + col_idx * ldb; tC = C + col_idx * ldc + row_idx; // clear scratch registers. ymm4 = _mm256_setzero_ps(); for (k = 0; (k + 7) < K; k += 8) { ymm0 = _mm256_loadu_ps(tB + 0); ymm3 = _mm256_loadu_ps(tA); ymm4 = _mm256_fmadd_ps(ymm0, ymm3, ymm4); tA += 8; tB += 8; } // if K is not a multiple of 8, padding is done before load using temproary array. if (k < K) { int iter; float data_feeder[8] = { 0.0 }; for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tB[iter]; ymm0 = _mm256_loadu_ps(data_feeder); for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tA[iter]; ymm3 = _mm256_loadu_ps(data_feeder); ymm4 = _mm256_fmadd_ps(ymm0, ymm3, ymm4); } //horizontal addition and storage of the data. ymm4 = _mm256_hadd_ps(ymm4, ymm4); ymm4 = _mm256_hadd_ps(ymm4, ymm4); _mm256_storeu_ps(scratch, ymm4); result = scratch[0] + scratch[4]; result *= (*alpha_cast); tC[0] = result + tC[0] * (*beta_cast); } } } return BLIS_SUCCESS; } else return BLIS_NONCONFORMAL_DIMENSIONS; } static err_t bli_dgemm_small_atbn ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, cntl_t* cntl ) { gint_t M = bli_obj_length( c ); // number of rows of Matrix C gint_t N = bli_obj_width( c ); // number of columns of Matrix C gint_t K = bli_obj_length( b ); // number of rows of Matrix B guint_t lda = bli_obj_col_stride( a ); // column stride of matrix OP(A), where OP(A) is Transpose(A) if transA enabled. guint_t ldb = bli_obj_col_stride( b ); // column stride of matrix OP(B), where OP(B) is Transpose(B) if transB enabled. guint_t ldc = bli_obj_col_stride( c ); // column stride of matrix C guint_t row_idx = 0, col_idx = 0, k; double *A = a->buffer; // pointer to matrix A elements, stored in row major format double *B = b->buffer; // pointer to matrix B elements, stored in column major format double *C = c->buffer; // pointer to matrix C elements, stored in column major format double *tA = A, *tB = B, *tC = C; __m256d ymm4, ymm5, ymm6, ymm7; __m256d ymm8, ymm9, ymm10, ymm11; __m256d ymm12, ymm13, ymm14, ymm15; __m256d ymm0, ymm1, ymm2, ymm3; double result, scratch[8]; double *alpha_cast, *beta_cast; // alpha, beta multiples alpha_cast = (alpha->buffer); beta_cast = (beta->buffer); // The non-copy version of the A^T GEMM gives better performance for the small M cases. // The threshold is controlled by BLIS_ATBN_M_THRES if (M <= BLIS_ATBN_M_THRES) { for (col_idx = 0; (col_idx + (NR - 1)) < N; col_idx += NR) { for (row_idx = 0; (row_idx + (AT_MR - 1)) < M; row_idx += AT_MR) { tA = A + row_idx * lda; tB = B + col_idx * ldb; tC = C + col_idx * ldc + row_idx; // clear scratch registers. ymm4 = _mm256_setzero_pd(); ymm5 = _mm256_setzero_pd(); ymm6 = _mm256_setzero_pd(); ymm7 = _mm256_setzero_pd(); ymm8 = _mm256_setzero_pd(); ymm9 = _mm256_setzero_pd(); ymm10 = _mm256_setzero_pd(); ymm11 = _mm256_setzero_pd(); ymm12 = _mm256_setzero_pd(); ymm13 = _mm256_setzero_pd(); ymm14 = _mm256_setzero_pd(); ymm15 = _mm256_setzero_pd(); //The inner loop computes the 4x3 values of the matrix. //The computation pattern is: // ymm4 ymm5 ymm6 // ymm7 ymm8 ymm9 // ymm10 ymm11 ymm12 // ymm13 ymm14 ymm15 //The Dot operation is performed in the inner loop, 4 double elements fit //in the YMM register hence loop count incremented by 4 for (k = 0; (k + 3) < K; k += 4) { ymm0 = _mm256_loadu_pd(tB + 0); ymm1 = _mm256_loadu_pd(tB + ldb); ymm2 = _mm256_loadu_pd(tB + 2 * ldb); ymm3 = _mm256_loadu_pd(tA); ymm4 = _mm256_fmadd_pd(ymm0, ymm3, ymm4); ymm5 = _mm256_fmadd_pd(ymm1, ymm3, ymm5); ymm6 = _mm256_fmadd_pd(ymm2, ymm3, ymm6); ymm3 = _mm256_loadu_pd(tA + lda); ymm7 = _mm256_fmadd_pd(ymm0, ymm3, ymm7); ymm8 = _mm256_fmadd_pd(ymm1, ymm3, ymm8); ymm9 = _mm256_fmadd_pd(ymm2, ymm3, ymm9); ymm3 = _mm256_loadu_pd(tA + 2 * lda); ymm10 = _mm256_fmadd_pd(ymm0, ymm3, ymm10); ymm11 = _mm256_fmadd_pd(ymm1, ymm3, ymm11); ymm12 = _mm256_fmadd_pd(ymm2, ymm3, ymm12); ymm3 = _mm256_loadu_pd(tA + 3 * lda); ymm13 = _mm256_fmadd_pd(ymm0, ymm3, ymm13); ymm14 = _mm256_fmadd_pd(ymm1, ymm3, ymm14); ymm15 = _mm256_fmadd_pd(ymm2, ymm3, ymm15); tA += 4; tB += 4; } // if K is not a multiple of 4, padding is done before load using temproary array. if (k < K) { int iter; double data_feeder[4] = { 0.0 }; for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tB[iter]; ymm0 = _mm256_loadu_pd(data_feeder); for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tB[iter + ldb]; ymm1 = _mm256_loadu_pd(data_feeder); for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tB[iter + 2 * ldb]; ymm2 = _mm256_loadu_pd(data_feeder); for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tA[iter]; ymm3 = _mm256_loadu_pd(data_feeder); ymm4 = _mm256_fmadd_pd(ymm0, ymm3, ymm4); ymm5 = _mm256_fmadd_pd(ymm1, ymm3, ymm5); ymm6 = _mm256_fmadd_pd(ymm2, ymm3, ymm6); for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tA[lda + iter]; ymm3 = _mm256_loadu_pd(data_feeder); ymm7 = _mm256_fmadd_pd(ymm0, ymm3, ymm7); ymm8 = _mm256_fmadd_pd(ymm1, ymm3, ymm8); ymm9 = _mm256_fmadd_pd(ymm2, ymm3, ymm9); for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tA[2 * lda + iter]; ymm3 = _mm256_loadu_pd(data_feeder); ymm10 = _mm256_fmadd_pd(ymm0, ymm3, ymm10); ymm11 = _mm256_fmadd_pd(ymm1, ymm3, ymm11); ymm12 = _mm256_fmadd_pd(ymm2, ymm3, ymm12); for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tA[3 * lda + iter]; ymm3 = _mm256_loadu_pd(data_feeder); ymm13 = _mm256_fmadd_pd(ymm0, ymm3, ymm13); ymm14 = _mm256_fmadd_pd(ymm1, ymm3, ymm14); ymm15 = _mm256_fmadd_pd(ymm2, ymm3, ymm15); } //horizontal addition and storage of the data. //Results for 4x3 blocks of C is stored here ymm4 = _mm256_hadd_pd(ymm4, ymm4); _mm256_storeu_pd(scratch, ymm4); result = scratch[0] + scratch[2]; result *= (*alpha_cast); tC[0] = result + tC[0] * (*beta_cast); ymm7 = _mm256_hadd_pd(ymm7, ymm7); _mm256_storeu_pd(scratch, ymm7); result = scratch[0] + scratch[2]; result *= (*alpha_cast); tC[1] = result + tC[1] * (*beta_cast); ymm10 = _mm256_hadd_pd(ymm10, ymm10); _mm256_storeu_pd(scratch, ymm10); result = scratch[0] + scratch[2]; result *= (*alpha_cast); tC[2] = result + tC[2] * (*beta_cast); ymm13 = _mm256_hadd_pd(ymm13, ymm13); _mm256_storeu_pd(scratch, ymm13); result = scratch[0] + scratch[2]; result *= (*alpha_cast); tC[3] = result + tC[3] * (*beta_cast); tC += ldc; ymm5 = _mm256_hadd_pd(ymm5, ymm5); _mm256_storeu_pd(scratch, ymm5); result = scratch[0] + scratch[2]; result *= (*alpha_cast); tC[0] = result + tC[0] * (*beta_cast); ymm8 = _mm256_hadd_pd(ymm8, ymm8); _mm256_storeu_pd(scratch, ymm8); result = scratch[0] + scratch[2]; result *= (*alpha_cast); tC[1] = result + tC[1] * (*beta_cast); ymm11 = _mm256_hadd_pd(ymm11, ymm11); _mm256_storeu_pd(scratch, ymm11); result = scratch[0] + scratch[2]; result *= (*alpha_cast); tC[2] = result + tC[2] * (*beta_cast); ymm14 = _mm256_hadd_pd(ymm14, ymm14); _mm256_storeu_pd(scratch, ymm14); result = scratch[0] + scratch[2]; result *= (*alpha_cast); tC[3] = result + tC[3] * (*beta_cast); tC += ldc; ymm6 = _mm256_hadd_pd(ymm6, ymm6); _mm256_storeu_pd(scratch, ymm6); result = scratch[0] + scratch[2]; result *= (*alpha_cast); tC[0] = result + tC[0] * (*beta_cast); ymm9 = _mm256_hadd_pd(ymm9, ymm9); _mm256_storeu_pd(scratch, ymm9); result = scratch[0] + scratch[2]; result *= (*alpha_cast); tC[1] = result + tC[1] * (*beta_cast); ymm12 = _mm256_hadd_pd(ymm12, ymm12); _mm256_storeu_pd(scratch, ymm12); result = scratch[0] + scratch[2]; result *= (*alpha_cast); tC[2] = result + tC[2] * (*beta_cast); ymm15 = _mm256_hadd_pd(ymm15, ymm15); _mm256_storeu_pd(scratch, ymm15); result = scratch[0] + scratch[2]; result *= (*alpha_cast); tC[3] = result + tC[3] * (*beta_cast); } } int processed_col = col_idx; int processed_row = row_idx; //The edge case handling where N is not a multiple of 3 if (processed_col < N) { for (col_idx = processed_col; col_idx < N; col_idx += 1) { for (row_idx = 0; (row_idx + (AT_MR - 1)) < M; row_idx += AT_MR) { tA = A + row_idx * lda; tB = B + col_idx * ldb; tC = C + col_idx * ldc + row_idx; // clear scratch registers. ymm4 = _mm256_setzero_pd(); ymm7 = _mm256_setzero_pd(); ymm10 = _mm256_setzero_pd(); ymm13 = _mm256_setzero_pd(); //The inner loop computes the 4x1 values of the matrix. //The computation pattern is: // ymm4 // ymm7 // ymm10 // ymm13 for (k = 0; (k + 3) < K; k += 4) { ymm0 = _mm256_loadu_pd(tB + 0); ymm3 = _mm256_loadu_pd(tA); ymm4 = _mm256_fmadd_pd(ymm0, ymm3, ymm4); ymm3 = _mm256_loadu_pd(tA + lda); ymm7 = _mm256_fmadd_pd(ymm0, ymm3, ymm7); ymm3 = _mm256_loadu_pd(tA + 2 * lda); ymm10 = _mm256_fmadd_pd(ymm0, ymm3, ymm10); ymm3 = _mm256_loadu_pd(tA + 3 * lda); ymm13 = _mm256_fmadd_pd(ymm0, ymm3, ymm13); tA += 4; tB += 4; } // if K is not a multiple of 4, padding is done before load using temproary array. if (k < K) { int iter; double data_feeder[4] = { 0.0 }; for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tB[iter]; ymm0 = _mm256_loadu_pd(data_feeder); for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tA[iter]; ymm3 = _mm256_loadu_pd(data_feeder); ymm4 = _mm256_fmadd_pd(ymm0, ymm3, ymm4); for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tA[lda + iter]; ymm3 = _mm256_loadu_pd(data_feeder); ymm7 = _mm256_fmadd_pd(ymm0, ymm3, ymm7); for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tA[2 * lda + iter]; ymm3 = _mm256_loadu_pd(data_feeder); ymm10 = _mm256_fmadd_pd(ymm0, ymm3, ymm10); for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tA[3 * lda + iter]; ymm3 = _mm256_loadu_pd(data_feeder); ymm13 = _mm256_fmadd_pd(ymm0, ymm3, ymm13); } //horizontal addition and storage of the data. //Results for 4x1 blocks of C is stored here ymm4 = _mm256_hadd_pd(ymm4, ymm4); _mm256_storeu_pd(scratch, ymm4); result = scratch[0] + scratch[2]; result *= (*alpha_cast); tC[0] = result + tC[0] * (*beta_cast); ymm7 = _mm256_hadd_pd(ymm7, ymm7); _mm256_storeu_pd(scratch, ymm7); result = scratch[0] + scratch[2]; result *= (*alpha_cast); tC[1] = result + tC[1] * (*beta_cast); ymm10 = _mm256_hadd_pd(ymm10, ymm10); _mm256_storeu_pd(scratch, ymm10); result = scratch[0] + scratch[2]; result *= (*alpha_cast); tC[2] = result + tC[2] * (*beta_cast); ymm13 = _mm256_hadd_pd(ymm13, ymm13); _mm256_storeu_pd(scratch, ymm13); result = scratch[0] + scratch[2]; result *= (*alpha_cast); tC[3] = result + tC[3] * (*beta_cast); } } processed_row = row_idx; } // The edge case handling where M is not a multiple of 4 if (processed_row < M) { for (row_idx = processed_row; row_idx < M; row_idx += 1) { for (col_idx = 0; col_idx < N; col_idx += 1) { tA = A + row_idx * lda; tB = B + col_idx * ldb; tC = C + col_idx * ldc + row_idx; // clear scratch registers. ymm4 = _mm256_setzero_pd(); for (k = 0; (k + 3) < K; k += 4) { ymm0 = _mm256_loadu_pd(tB + 0); ymm3 = _mm256_loadu_pd(tA); ymm4 = _mm256_fmadd_pd(ymm0, ymm3, ymm4); tA += 4; tB += 4; } // if K is not a multiple of 4, padding is done before load using temproary array. if (k < K) { int iter; double data_feeder[4] = { 0.0 }; for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tB[iter]; ymm0 = _mm256_loadu_pd(data_feeder); for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tA[iter]; ymm3 = _mm256_loadu_pd(data_feeder); ymm4 = _mm256_fmadd_pd(ymm0, ymm3, ymm4); } //horizontal addition and storage of the data. ymm4 = _mm256_hadd_pd(ymm4, ymm4); _mm256_storeu_pd(scratch, ymm4); result = scratch[0] + scratch[2]; result *= (*alpha_cast); tC[0] = result + tC[0] * (*beta_cast); } } } return BLIS_SUCCESS; } else return BLIS_NONCONFORMAL_DIMENSIONS; } #endif blis-0.6.1/kernels/zen/3/bli_syrk_small.c000066400000000000000000005143531360743507500202470ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name of The University of Texas at Austin nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "immintrin.h" #include "xmmintrin.h" #include "blis.h" #ifdef BLIS_ENABLE_SMALL_MATRIX #define MR 32 #define D_MR (MR >> 1) #define NR 3 #define BLIS_ENABLE_PREFETCH #define F_SCRATCH_DIM (BLIS_SMALL_MATRIX_THRES * BLIS_SMALL_MATRIX_THRES) static float A_pack[F_SCRATCH_DIM] __attribute__((aligned(64))); static float C_pack[F_SCRATCH_DIM] __attribute__((aligned(64))); #define D_BLIS_SMALL_MATRIX_THRES (BLIS_SMALL_MATRIX_THRES / 2 ) #define D_BLIS_SMALL_M_RECT_MATRIX_THRES (BLIS_SMALL_M_RECT_MATRIX_THRES / 2) #define D_BLIS_SMALL_K_RECT_MATRIX_THRES (BLIS_SMALL_K_RECT_MATRIX_THRES / 2) #define D_SCRATCH_DIM (D_BLIS_SMALL_MATRIX_THRES * D_BLIS_SMALL_MATRIX_THRES) static double D_A_pack[D_SCRATCH_DIM] __attribute__((aligned(64))); static double D_C_pack[D_SCRATCH_DIM] __attribute__((aligned(64))); #define BLIS_ATBN_M_THRES 40 // Threshold value of M for/below which small matrix code is called. #define AT_MR 4 // The kernel dimension of the A transpose SYRK kernel.(AT_MR * NR). static err_t bli_ssyrk_small ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, cntl_t* cntl ); static err_t bli_dsyrk_small ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, cntl_t* cntl ); static err_t bli_ssyrk_small_atbn ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, cntl_t* cntl ); static err_t bli_dsyrk_small_atbn ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, cntl_t* cntl ); /* * The bli_syrk_small function will use the * custom MRxNR kernels, to perform the computation. * The custom kernels are used if the [M * N] < 240 * 240 */ err_t bli_syrk_small ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, cntl_t* cntl ) { // FGVZ: This code was originally in bli_syrk_front(). However, it really // fits more naturally here within the bli_syrk_small() function. This // becomes a bit more obvious now that the code is here, as it contains // cpp macros such as BLIS_SMALL_MATRIX_A_THRES_M_SYRK, which are specific // to this implementation. if ( bli_obj_has_trans( a ) ) { // Continue with small implementation. ; } else if ( ( bli_obj_length( a ) <= BLIS_SMALL_MATRIX_A_THRES_M_SYRK && bli_obj_width( a ) < BLIS_SMALL_MATRIX_A_THRES_N_SYRK ) || ( bli_obj_length( a ) < BLIS_SMALL_MATRIX_A_THRES_M_SYRK && bli_obj_width( a ) <= BLIS_SMALL_MATRIX_A_THRES_N_SYRK ) ) { // Continue with small implementation. ; } else { // Reject the problem and return to large code path. return BLIS_FAILURE; } #ifdef BLIS_ENABLE_MULTITHREADING return BLIS_NOT_YET_IMPLEMENTED; #endif // If alpha is zero, scale by beta and return. if (bli_obj_equals(alpha, &BLIS_ZERO)) { return BLIS_NOT_YET_IMPLEMENTED; } // if row major format return. if ((bli_obj_row_stride( a ) != 1) || (bli_obj_row_stride( b ) != 1) || (bli_obj_row_stride( c ) != 1)) { return BLIS_INVALID_ROW_STRIDE; } num_t dt = ((*c).info & (0x7 << 0)); if (bli_obj_has_trans( a )) { if (bli_obj_has_notrans( b )) { if (dt == BLIS_FLOAT) { return bli_ssyrk_small_atbn(alpha, a, b, beta, c, cntx, cntl); } else if (dt == BLIS_DOUBLE) { return bli_dsyrk_small_atbn(alpha, a, b, beta, c, cntx, cntl); } } return BLIS_NOT_YET_IMPLEMENTED; } if (dt == BLIS_DOUBLE) { return bli_dsyrk_small(alpha, a, b, beta, c, cntx, cntl); } if (dt == BLIS_FLOAT) { return bli_ssyrk_small(alpha, a, b, beta, c, cntx, cntl); } return BLIS_NOT_YET_IMPLEMENTED; }; static err_t bli_ssyrk_small ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, cntl_t* cntl ) { int M = bli_obj_length( c ); // number of rows of Matrix C int N = bli_obj_width( c ); // number of columns of Matrix C int K = bli_obj_width( a ); // number of columns of OP(A), will be updated if OP(A) is Transpose(A) . int L = M * N; if ((((L) < (BLIS_SMALL_MATRIX_THRES * BLIS_SMALL_MATRIX_THRES)) || ((M < BLIS_SMALL_M_RECT_MATRIX_THRES) && (K < BLIS_SMALL_K_RECT_MATRIX_THRES))) && ((L!=0) && (K!=0))) { int lda = bli_obj_col_stride(a); // column stride of matrix OP(A), where OP(A) is Transpose(A) if transA enabled. int ldb = bli_obj_col_stride(b); // column stride of matrix OP(B), where OP(B) is Transpose(B) if transB enabled. int ldc_matC = bli_obj_col_stride( c ); // column stride of matrix C int ldc = M;//bli_obj_col_stride( c ); // column stride of static buffer for matrix C int row_idx, col_idx, k; int rs_matC = bli_obj_row_stride( c ); int rsc = 1; float *A = a->buffer; // pointer to elements of Matrix A float *B = b->buffer; // pointer to elements of Matrix B float *C = C_pack; // pointer to elements of Matrix C float *matCbuf = c->buffer; float *tA = A, *tB = B, *tC = C;//, *tA_pack; float *tA_packed; // temprorary pointer to hold packed A memory pointer int row_idx_packed; //packed A memory row index int lda_packed; //lda of packed A int col_idx_start; //starting index after A matrix is packed. dim_t tb_inc_row = 1; // row stride of matrix B dim_t tb_inc_col = ldb; // column stride of matrix B __m256 ymm4, ymm5, ymm6, ymm7; __m256 ymm8, ymm9, ymm10, ymm11; __m256 ymm12, ymm13, ymm14, ymm15; __m256 ymm0, ymm1, ymm2, ymm3; int n_remainder; // If the N is non multiple of 3.(N%3) int m_remainder; // If the M is non multiple of 32.(M%32) float *alpha_cast, *beta_cast; // alpha, beta multiples alpha_cast = (alpha->buffer); beta_cast = (beta->buffer); int required_packing_A = 1; // when N is equal to 1 call GEMV instead of SYRK if (N == 1) { bli_gemv ( alpha, a, b, beta, c ); return BLIS_SUCCESS; } //update the pointer math if matrix B needs to be transposed. if (bli_obj_has_trans( b )) { tb_inc_col = 1; //switch row and column strides tb_inc_row = ldb; } if ((N <= 3) || ((MR * K) > F_SCRATCH_DIM)) { required_packing_A = 0; } /* * The computation loop runs for MRxN columns of C matrix, thus * accessing the MRxK A matrix data and KxNR B matrix data. * The computation is organized as inner loops of dimension MRxNR. */ // Process MR rows of C matrix at a time. for (row_idx = 0; (row_idx + (MR - 1)) < M; row_idx += MR) { col_idx_start = 0; tA_packed = A; row_idx_packed = row_idx; lda_packed = lda; // This is the part of the pack and compute optimization. // During the first column iteration, we store the accessed A matrix into // contiguous static memory. This helps to keep te A matrix in Cache and // aviods the TLB misses. if (required_packing_A) { col_idx = 0; //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; tA_packed = A_pack; #if 0//def BLIS_ENABLE_PREFETCH _mm_prefetch((char*)(tC + 0), _MM_HINT_T0); _mm_prefetch((char*)(tC + 16), _MM_HINT_T0); _mm_prefetch((char*)(tC + ldc), _MM_HINT_T0); _mm_prefetch((char*)(tC + ldc + 16), _MM_HINT_T0); _mm_prefetch((char*)(tC + 2 * ldc), _MM_HINT_T0); _mm_prefetch((char*)(tC + 2 * ldc + 16), _MM_HINT_T0); #endif // clear scratch registers. ymm4 = _mm256_setzero_ps(); ymm5 = _mm256_setzero_ps(); ymm6 = _mm256_setzero_ps(); ymm7 = _mm256_setzero_ps(); ymm8 = _mm256_setzero_ps(); ymm9 = _mm256_setzero_ps(); ymm10 = _mm256_setzero_ps(); ymm11 = _mm256_setzero_ps(); ymm12 = _mm256_setzero_ps(); ymm13 = _mm256_setzero_ps(); ymm14 = _mm256_setzero_ps(); ymm15 = _mm256_setzero_ps(); for (k = 0; k < K; ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. // This loop is processing MR x K ymm0 = _mm256_broadcast_ss(tB + tb_inc_col * 0); ymm1 = _mm256_broadcast_ss(tB + tb_inc_col * 1); ymm2 = _mm256_broadcast_ss(tB + tb_inc_col * 2); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_ps(tA); _mm256_storeu_ps(tA_packed, ymm3); // the packing of matrix A // ymm4 += ymm0 * ymm3; ymm4 = _mm256_fmadd_ps(ymm0, ymm3, ymm4); // ymm8 += ymm1 * ymm3; ymm8 = _mm256_fmadd_ps(ymm1, ymm3, ymm8); // ymm12 += ymm2 * ymm3; ymm12 = _mm256_fmadd_ps(ymm2, ymm3, ymm12); ymm3 = _mm256_loadu_ps(tA + 8); _mm256_storeu_ps(tA_packed + 8, ymm3); // the packing of matrix A // ymm5 += ymm0 * ymm3; ymm5 = _mm256_fmadd_ps(ymm0, ymm3, ymm5); // ymm9 += ymm1 * ymm3; ymm9 = _mm256_fmadd_ps(ymm1, ymm3, ymm9); // ymm13 += ymm2 * ymm3; ymm13 = _mm256_fmadd_ps(ymm2, ymm3, ymm13); ymm3 = _mm256_loadu_ps(tA + 16); _mm256_storeu_ps(tA_packed + 16, ymm3); // the packing of matrix A // ymm6 += ymm0 * ymm3; ymm6 = _mm256_fmadd_ps(ymm0, ymm3, ymm6); // ymm10 += ymm1 * ymm3; ymm10 = _mm256_fmadd_ps(ymm1, ymm3, ymm10); // ymm14 += ymm2 * ymm3; ymm14 = _mm256_fmadd_ps(ymm2, ymm3, ymm14); ymm3 = _mm256_loadu_ps(tA + 24); _mm256_storeu_ps(tA_packed + 24, ymm3); // the packing of matrix A // ymm7 += ymm0 * ymm3; ymm7 = _mm256_fmadd_ps(ymm0, ymm3, ymm7); // ymm11 += ymm1 * ymm3; ymm11 = _mm256_fmadd_ps(ymm1, ymm3, ymm11); // ymm15 += ymm2 * ymm3; ymm15 = _mm256_fmadd_ps(ymm2, ymm3, ymm15); tA += lda; tA_packed += MR; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_ss(alpha_cast); //ymm1 = _mm256_broadcast_ss(beta_cast); //multiply A*B by alpha. ymm4 = _mm256_mul_ps(ymm4, ymm0); ymm5 = _mm256_mul_ps(ymm5, ymm0); ymm6 = _mm256_mul_ps(ymm6, ymm0); ymm7 = _mm256_mul_ps(ymm7, ymm0); ymm8 = _mm256_mul_ps(ymm8, ymm0); ymm9 = _mm256_mul_ps(ymm9, ymm0); ymm10 = _mm256_mul_ps(ymm10, ymm0); ymm11 = _mm256_mul_ps(ymm11, ymm0); ymm12 = _mm256_mul_ps(ymm12, ymm0); ymm13 = _mm256_mul_ps(ymm13, ymm0); ymm14 = _mm256_mul_ps(ymm14, ymm0); ymm15 = _mm256_mul_ps(ymm15, ymm0); // multiply C by beta and accumulate col 1. /*ymm2 = _mm256_loadu_ps(tC); ymm4 = _mm256_fmadd_ps(ymm2, ymm1, ymm4); ymm2 = _mm256_loadu_ps(tC + 8); ymm5 = _mm256_fmadd_ps(ymm2, ymm1, ymm5); ymm2 = _mm256_loadu_ps(tC + 16); ymm6 = _mm256_fmadd_ps(ymm2, ymm1, ymm6); ymm2 = _mm256_loadu_ps(tC + 24); ymm7 = _mm256_fmadd_ps(ymm2, ymm1, ymm7);*/ _mm256_storeu_ps(tC, ymm4); _mm256_storeu_ps(tC + 8, ymm5); _mm256_storeu_ps(tC + 16, ymm6); _mm256_storeu_ps(tC + 24, ymm7); // multiply C by beta and accumulate, col 2. tC += ldc; /*ymm2 = _mm256_loadu_ps(tC); ymm8 = _mm256_fmadd_ps(ymm2, ymm1, ymm8); ymm2 = _mm256_loadu_ps(tC + 8); ymm9 = _mm256_fmadd_ps(ymm2, ymm1, ymm9); ymm2 = _mm256_loadu_ps(tC + 16); ymm10 = _mm256_fmadd_ps(ymm2, ymm1, ymm10); ymm2 = _mm256_loadu_ps(tC + 24); ymm11 = _mm256_fmadd_ps(ymm2, ymm1, ymm11);*/ _mm256_storeu_ps(tC, ymm8); _mm256_storeu_ps(tC + 8, ymm9); _mm256_storeu_ps(tC + 16, ymm10); _mm256_storeu_ps(tC + 24, ymm11); // multiply C by beta and accumulate, col 3. tC += ldc; /*ymm2 = _mm256_loadu_ps(tC); ymm12 = _mm256_fmadd_ps(ymm2, ymm1, ymm12); ymm2 = _mm256_loadu_ps(tC + 8); ymm13 = _mm256_fmadd_ps(ymm2, ymm1, ymm13); ymm2 = _mm256_loadu_ps(tC + 16); ymm14 = _mm256_fmadd_ps(ymm2, ymm1, ymm14); ymm2 = _mm256_loadu_ps(tC + 24); ymm15 = _mm256_fmadd_ps(ymm2, ymm1, ymm15);*/ _mm256_storeu_ps(tC, ymm12); _mm256_storeu_ps(tC + 8, ymm13); _mm256_storeu_ps(tC + 16, ymm14); _mm256_storeu_ps(tC + 24, ymm15); // modify the pointer arithematic to use packed A matrix. col_idx_start = NR; tA_packed = A_pack; row_idx_packed = 0; lda_packed = MR; } // Process NR columns of C matrix at a time. for (col_idx = col_idx_start; (col_idx + (NR - 1)) < N; col_idx += NR) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = tA_packed + row_idx_packed; #if 0//def BLIS_ENABLE_PREFETCH _mm_prefetch((char*)(tC + 0), _MM_HINT_T0); _mm_prefetch((char*)(tC + 16), _MM_HINT_T0); _mm_prefetch((char*)(tC + ldc), _MM_HINT_T0); _mm_prefetch((char*)(tC + ldc + 16), _MM_HINT_T0); _mm_prefetch((char*)(tC + 2 * ldc), _MM_HINT_T0); _mm_prefetch((char*)(tC + 2 * ldc + 16), _MM_HINT_T0); #endif // clear scratch registers. ymm4 = _mm256_setzero_ps(); ymm5 = _mm256_setzero_ps(); ymm6 = _mm256_setzero_ps(); ymm7 = _mm256_setzero_ps(); ymm8 = _mm256_setzero_ps(); ymm9 = _mm256_setzero_ps(); ymm10 = _mm256_setzero_ps(); ymm11 = _mm256_setzero_ps(); ymm12 = _mm256_setzero_ps(); ymm13 = _mm256_setzero_ps(); ymm14 = _mm256_setzero_ps(); ymm15 = _mm256_setzero_ps(); for (k = 0; k < K; ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. // This loop is processing MR x K ymm0 = _mm256_broadcast_ss(tB + tb_inc_col * 0); ymm1 = _mm256_broadcast_ss(tB + tb_inc_col * 1); ymm2 = _mm256_broadcast_ss(tB + tb_inc_col * 2); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_ps(tA); // ymm4 += ymm0 * ymm3; ymm4 = _mm256_fmadd_ps(ymm0, ymm3, ymm4); // ymm8 += ymm1 * ymm3; ymm8 = _mm256_fmadd_ps(ymm1, ymm3, ymm8); // ymm12 += ymm2 * ymm3; ymm12 = _mm256_fmadd_ps(ymm2, ymm3, ymm12); ymm3 = _mm256_loadu_ps(tA + 8); // ymm5 += ymm0 * ymm3; ymm5 = _mm256_fmadd_ps(ymm0, ymm3, ymm5); // ymm9 += ymm1 * ymm3; ymm9 = _mm256_fmadd_ps(ymm1, ymm3, ymm9); // ymm13 += ymm2 * ymm3; ymm13 = _mm256_fmadd_ps(ymm2, ymm3, ymm13); ymm3 = _mm256_loadu_ps(tA + 16); // ymm6 += ymm0 * ymm3; ymm6 = _mm256_fmadd_ps(ymm0, ymm3, ymm6); // ymm10 += ymm1 * ymm3; ymm10 = _mm256_fmadd_ps(ymm1, ymm3, ymm10); // ymm14 += ymm2 * ymm3; ymm14 = _mm256_fmadd_ps(ymm2, ymm3, ymm14); ymm3 = _mm256_loadu_ps(tA + 24); // ymm7 += ymm0 * ymm3; ymm7 = _mm256_fmadd_ps(ymm0, ymm3, ymm7); // ymm11 += ymm1 * ymm3; ymm11 = _mm256_fmadd_ps(ymm1, ymm3, ymm11); // ymm15 += ymm2 * ymm3; ymm15 = _mm256_fmadd_ps(ymm2, ymm3, ymm15); tA += lda_packed; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_ss(alpha_cast); //ymm1 = _mm256_broadcast_ss(beta_cast); //multiply A*B by alpha. ymm4 = _mm256_mul_ps(ymm4, ymm0); ymm5 = _mm256_mul_ps(ymm5, ymm0); ymm6 = _mm256_mul_ps(ymm6, ymm0); ymm7 = _mm256_mul_ps(ymm7, ymm0); ymm8 = _mm256_mul_ps(ymm8, ymm0); ymm9 = _mm256_mul_ps(ymm9, ymm0); ymm10 = _mm256_mul_ps(ymm10, ymm0); ymm11 = _mm256_mul_ps(ymm11, ymm0); ymm12 = _mm256_mul_ps(ymm12, ymm0); ymm13 = _mm256_mul_ps(ymm13, ymm0); ymm14 = _mm256_mul_ps(ymm14, ymm0); ymm15 = _mm256_mul_ps(ymm15, ymm0); // multiply C by beta and accumulate col 1. /*ymm2 = _mm256_loadu_ps(tC); ymm4 = _mm256_fmadd_ps(ymm2, ymm1, ymm4); ymm2 = _mm256_loadu_ps(tC + 8); ymm5 = _mm256_fmadd_ps(ymm2, ymm1, ymm5); ymm2 = _mm256_loadu_ps(tC + 16); ymm6 = _mm256_fmadd_ps(ymm2, ymm1, ymm6); ymm2 = _mm256_loadu_ps(tC + 24); ymm7 = _mm256_fmadd_ps(ymm2, ymm1, ymm7);*/ _mm256_storeu_ps(tC, ymm4); _mm256_storeu_ps(tC + 8, ymm5); _mm256_storeu_ps(tC + 16, ymm6); _mm256_storeu_ps(tC + 24, ymm7); // multiply C by beta and accumulate, col 2. tC += ldc; /*ymm2 = _mm256_loadu_ps(tC); ymm8 = _mm256_fmadd_ps(ymm2, ymm1, ymm8); ymm2 = _mm256_loadu_ps(tC + 8); ymm9 = _mm256_fmadd_ps(ymm2, ymm1, ymm9); ymm2 = _mm256_loadu_ps(tC + 16); ymm10 = _mm256_fmadd_ps(ymm2, ymm1, ymm10); ymm2 = _mm256_loadu_ps(tC + 24); ymm11 = _mm256_fmadd_ps(ymm2, ymm1, ymm11);*/ _mm256_storeu_ps(tC, ymm8); _mm256_storeu_ps(tC + 8, ymm9); _mm256_storeu_ps(tC + 16, ymm10); _mm256_storeu_ps(tC + 24, ymm11); // multiply C by beta and accumulate, col 3. tC += ldc; /*ymm2 = _mm256_loadu_ps(tC); ymm12 = _mm256_fmadd_ps(ymm2, ymm1, ymm12); ymm2 = _mm256_loadu_ps(tC + 8); ymm13 = _mm256_fmadd_ps(ymm2, ymm1, ymm13); ymm2 = _mm256_loadu_ps(tC + 16); ymm14 = _mm256_fmadd_ps(ymm2, ymm1, ymm14); ymm2 = _mm256_loadu_ps(tC + 24); ymm15 = _mm256_fmadd_ps(ymm2, ymm1, ymm15);*/ _mm256_storeu_ps(tC, ymm12); _mm256_storeu_ps(tC + 8, ymm13); _mm256_storeu_ps(tC + 16, ymm14); _mm256_storeu_ps(tC + 24, ymm15); } n_remainder = N - col_idx; // if the N is not multiple of 3. // handling edge case. if (n_remainder == 2) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; // clear scratch registers. ymm8 = _mm256_setzero_ps(); ymm9 = _mm256_setzero_ps(); ymm10 = _mm256_setzero_ps(); ymm11 = _mm256_setzero_ps(); ymm12 = _mm256_setzero_ps(); ymm13 = _mm256_setzero_ps(); ymm14 = _mm256_setzero_ps(); ymm15 = _mm256_setzero_ps(); for (k = 0; k < K; ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_ss(tB + tb_inc_col * 0); ymm1 = _mm256_broadcast_ss(tB + tb_inc_col * 1); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_ps(tA); ymm8 = _mm256_fmadd_ps(ymm0, ymm3, ymm8); ymm12 = _mm256_fmadd_ps(ymm1, ymm3, ymm12); ymm3 = _mm256_loadu_ps(tA + 8); ymm9 = _mm256_fmadd_ps(ymm0, ymm3, ymm9); ymm13 = _mm256_fmadd_ps(ymm1, ymm3, ymm13); ymm3 = _mm256_loadu_ps(tA + 16); ymm10 = _mm256_fmadd_ps(ymm0, ymm3, ymm10); ymm14 = _mm256_fmadd_ps(ymm1, ymm3, ymm14); ymm3 = _mm256_loadu_ps(tA + 24); ymm11 = _mm256_fmadd_ps(ymm0, ymm3, ymm11); ymm15 = _mm256_fmadd_ps(ymm1, ymm3, ymm15); tA += lda; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_ss(alpha_cast); //ymm1 = _mm256_broadcast_ss(beta_cast); //multiply A*B by alpha. ymm8 = _mm256_mul_ps(ymm8, ymm0); ymm9 = _mm256_mul_ps(ymm9, ymm0); ymm10 = _mm256_mul_ps(ymm10, ymm0); ymm11 = _mm256_mul_ps(ymm11, ymm0); ymm12 = _mm256_mul_ps(ymm12, ymm0); ymm13 = _mm256_mul_ps(ymm13, ymm0); ymm14 = _mm256_mul_ps(ymm14, ymm0); ymm15 = _mm256_mul_ps(ymm15, ymm0); // multiply C by beta and accumulate, col 1. /*ymm2 = _mm256_loadu_ps(tC + 0); ymm8 = _mm256_fmadd_ps(ymm2, ymm1, ymm8); ymm2 = _mm256_loadu_ps(tC + 8); ymm9 = _mm256_fmadd_ps(ymm2, ymm1, ymm9); ymm2 = _mm256_loadu_ps(tC + 16); ymm10 = _mm256_fmadd_ps(ymm2, ymm1, ymm10); ymm2 = _mm256_loadu_ps(tC + 24); ymm11 = _mm256_fmadd_ps(ymm2, ymm1, ymm11);*/ _mm256_storeu_ps(tC + 0, ymm8); _mm256_storeu_ps(tC + 8, ymm9); _mm256_storeu_ps(tC + 16, ymm10); _mm256_storeu_ps(tC + 24, ymm11); // multiply C by beta and accumulate, col 2. tC += ldc; /*ymm2 = _mm256_loadu_ps(tC); ymm12 = _mm256_fmadd_ps(ymm2, ymm1, ymm12); ymm2 = _mm256_loadu_ps(tC + 8); ymm13 = _mm256_fmadd_ps(ymm2, ymm1, ymm13); ymm2 = _mm256_loadu_ps(tC + 16); ymm14 = _mm256_fmadd_ps(ymm2, ymm1, ymm14); ymm2 = _mm256_loadu_ps(tC + 24); ymm15 = _mm256_fmadd_ps(ymm2, ymm1, ymm15);*/ _mm256_storeu_ps(tC, ymm12); _mm256_storeu_ps(tC + 8, ymm13); _mm256_storeu_ps(tC + 16, ymm14); _mm256_storeu_ps(tC + 24, ymm15); col_idx += 2; } // if the N is not multiple of 3. // handling edge case. if (n_remainder == 1) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; // clear scratch registers. ymm12 = _mm256_setzero_ps(); ymm13 = _mm256_setzero_ps(); ymm14 = _mm256_setzero_ps(); ymm15 = _mm256_setzero_ps(); for (k = 0; k < K; ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_ss(tB + tb_inc_col * 0); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_ps(tA); ymm12 = _mm256_fmadd_ps(ymm0, ymm3, ymm12); ymm3 = _mm256_loadu_ps(tA + 8); ymm13 = _mm256_fmadd_ps(ymm0, ymm3, ymm13); ymm3 = _mm256_loadu_ps(tA + 16); ymm14 = _mm256_fmadd_ps(ymm0, ymm3, ymm14); ymm3 = _mm256_loadu_ps(tA + 24); ymm15 = _mm256_fmadd_ps(ymm0, ymm3, ymm15); tA += lda; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_ss(alpha_cast); //ymm1 = _mm256_broadcast_ss(beta_cast); //multiply A*B by alpha. ymm12 = _mm256_mul_ps(ymm12, ymm0); ymm13 = _mm256_mul_ps(ymm13, ymm0); ymm14 = _mm256_mul_ps(ymm14, ymm0); ymm15 = _mm256_mul_ps(ymm15, ymm0); // multiply C by beta and accumulate. /*ymm2 = _mm256_loadu_ps(tC + 0); ymm12 = _mm256_fmadd_ps(ymm2, ymm1, ymm12); ymm2 = _mm256_loadu_ps(tC + 8); ymm13 = _mm256_fmadd_ps(ymm2, ymm1, ymm13); ymm2 = _mm256_loadu_ps(tC + 16); ymm14 = _mm256_fmadd_ps(ymm2, ymm1, ymm14); ymm2 = _mm256_loadu_ps(tC + 24); ymm15 = _mm256_fmadd_ps(ymm2, ymm1, ymm15);*/ _mm256_storeu_ps(tC + 0, ymm12); _mm256_storeu_ps(tC + 8, ymm13); _mm256_storeu_ps(tC + 16, ymm14); _mm256_storeu_ps(tC + 24, ymm15); } } m_remainder = M - row_idx; if (m_remainder >= 24) { m_remainder -= 24; for (col_idx = 0; (col_idx + 2) < N; col_idx += 3) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; // clear scratch registers. ymm4 = _mm256_setzero_ps(); ymm5 = _mm256_setzero_ps(); ymm6 = _mm256_setzero_ps(); ymm8 = _mm256_setzero_ps(); ymm9 = _mm256_setzero_ps(); ymm10 = _mm256_setzero_ps(); ymm12 = _mm256_setzero_ps(); ymm13 = _mm256_setzero_ps(); ymm14 = _mm256_setzero_ps(); for (k = 0; k < K; ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_ss(tB + tb_inc_col * 0); ymm1 = _mm256_broadcast_ss(tB + tb_inc_col * 1); ymm2 = _mm256_broadcast_ss(tB + tb_inc_col * 2); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_ps(tA); // ymm4 += ymm0 * ymm3; ymm4 = _mm256_fmadd_ps(ymm0, ymm3, ymm4); // ymm8 += ymm1 * ymm3; ymm8 = _mm256_fmadd_ps(ymm1, ymm3, ymm8); // ymm12 += ymm2 * ymm3; ymm12 = _mm256_fmadd_ps(ymm2, ymm3, ymm12); ymm3 = _mm256_loadu_ps(tA + 8); // ymm5 += ymm0 * ymm3; ymm5 = _mm256_fmadd_ps(ymm0, ymm3, ymm5); // ymm9 += ymm1 * ymm3; ymm9 = _mm256_fmadd_ps(ymm1, ymm3, ymm9); // ymm13 += ymm2 * ymm3; ymm13 = _mm256_fmadd_ps(ymm2, ymm3, ymm13); ymm3 = _mm256_loadu_ps(tA + 16); // ymm6 += ymm0 * ymm3; ymm6 = _mm256_fmadd_ps(ymm0, ymm3, ymm6); // ymm10 += ymm1 * ymm3; ymm10 = _mm256_fmadd_ps(ymm1, ymm3, ymm10); // ymm14 += ymm2 * ymm3; ymm14 = _mm256_fmadd_ps(ymm2, ymm3, ymm14); tA += lda; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_ss(alpha_cast); //ymm1 = _mm256_broadcast_ss(beta_cast); //multiply A*B by alpha. ymm4 = _mm256_mul_ps(ymm4, ymm0); ymm5 = _mm256_mul_ps(ymm5, ymm0); ymm6 = _mm256_mul_ps(ymm6, ymm0); ymm8 = _mm256_mul_ps(ymm8, ymm0); ymm9 = _mm256_mul_ps(ymm9, ymm0); ymm10 = _mm256_mul_ps(ymm10, ymm0); ymm12 = _mm256_mul_ps(ymm12, ymm0); ymm13 = _mm256_mul_ps(ymm13, ymm0); ymm14 = _mm256_mul_ps(ymm14, ymm0); // multiply C by beta and accumulate. /*ymm2 = _mm256_loadu_ps(tC); ymm4 = _mm256_fmadd_ps(ymm2, ymm1, ymm4); ymm2 = _mm256_loadu_ps(tC + 8); ymm5 = _mm256_fmadd_ps(ymm2, ymm1, ymm5); ymm2 = _mm256_loadu_ps(tC + 16); ymm6 = _mm256_fmadd_ps(ymm2, ymm1, ymm6);*/ _mm256_storeu_ps(tC, ymm4); _mm256_storeu_ps(tC + 8, ymm5); _mm256_storeu_ps(tC + 16, ymm6); // multiply C by beta and accumulate. tC += ldc; /*ymm2 = _mm256_loadu_ps(tC); ymm8 = _mm256_fmadd_ps(ymm2, ymm1, ymm8); ymm2 = _mm256_loadu_ps(tC + 8); ymm9 = _mm256_fmadd_ps(ymm2, ymm1, ymm9); ymm2 = _mm256_loadu_ps(tC + 16); ymm10 = _mm256_fmadd_ps(ymm2, ymm1, ymm10);*/ _mm256_storeu_ps(tC, ymm8); _mm256_storeu_ps(tC + 8, ymm9); _mm256_storeu_ps(tC + 16, ymm10); // multiply C by beta and accumulate. tC += ldc; /*ymm2 = _mm256_loadu_ps(tC); ymm12 = _mm256_fmadd_ps(ymm2, ymm1, ymm12); ymm2 = _mm256_loadu_ps(tC + 8); ymm13 = _mm256_fmadd_ps(ymm2, ymm1, ymm13); ymm2 = _mm256_loadu_ps(tC + 16); ymm14 = _mm256_fmadd_ps(ymm2, ymm1, ymm14);*/ _mm256_storeu_ps(tC, ymm12); _mm256_storeu_ps(tC + 8, ymm13); _mm256_storeu_ps(tC + 16, ymm14); } n_remainder = N - col_idx; // if the N is not multiple of 3. // handling edge case. if (n_remainder == 2) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; // clear scratch registers. ymm8 = _mm256_setzero_ps(); ymm9 = _mm256_setzero_ps(); ymm10 = _mm256_setzero_ps(); ymm12 = _mm256_setzero_ps(); ymm13 = _mm256_setzero_ps(); ymm14 = _mm256_setzero_ps(); for (k = 0; k < K; ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_ss(tB + tb_inc_col * 0); ymm1 = _mm256_broadcast_ss(tB + tb_inc_col * 1); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_ps(tA); ymm8 = _mm256_fmadd_ps(ymm0, ymm3, ymm8); ymm12 = _mm256_fmadd_ps(ymm1, ymm3, ymm12); ymm3 = _mm256_loadu_ps(tA + 8); ymm9 = _mm256_fmadd_ps(ymm0, ymm3, ymm9); ymm13 = _mm256_fmadd_ps(ymm1, ymm3, ymm13); ymm3 = _mm256_loadu_ps(tA + 16); ymm10 = _mm256_fmadd_ps(ymm0, ymm3, ymm10); ymm14 = _mm256_fmadd_ps(ymm1, ymm3, ymm14); tA += lda; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_ss(alpha_cast); //ymm1 = _mm256_broadcast_ss(beta_cast); //multiply A*B by alpha. ymm8 = _mm256_mul_ps(ymm8, ymm0); ymm9 = _mm256_mul_ps(ymm9, ymm0); ymm10 = _mm256_mul_ps(ymm10, ymm0); ymm12 = _mm256_mul_ps(ymm12, ymm0); ymm13 = _mm256_mul_ps(ymm13, ymm0); ymm14 = _mm256_mul_ps(ymm14, ymm0); // multiply C by beta and accumulate. /*ymm2 = _mm256_loadu_ps(tC + 0); ymm8 = _mm256_fmadd_ps(ymm2, ymm1, ymm8); ymm2 = _mm256_loadu_ps(tC + 8); ymm9 = _mm256_fmadd_ps(ymm2, ymm1, ymm9); ymm2 = _mm256_loadu_ps(tC + 16); ymm10 = _mm256_fmadd_ps(ymm2, ymm1, ymm10);*/ _mm256_storeu_ps(tC + 0, ymm8); _mm256_storeu_ps(tC + 8, ymm9); _mm256_storeu_ps(tC + 16, ymm10); // multiply C by beta and accumulate. tC += ldc; /*ymm2 = _mm256_loadu_ps(tC); ymm12 = _mm256_fmadd_ps(ymm2, ymm1, ymm12); ymm2 = _mm256_loadu_ps(tC + 8); ymm13 = _mm256_fmadd_ps(ymm2, ymm1, ymm13); ymm2 = _mm256_loadu_ps(tC + 16); ymm14 = _mm256_fmadd_ps(ymm2, ymm1, ymm14);*/ _mm256_storeu_ps(tC, ymm12); _mm256_storeu_ps(tC + 8, ymm13); _mm256_storeu_ps(tC + 16, ymm14); col_idx += 2; } // if the N is not multiple of 3. // handling edge case. if (n_remainder == 1) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; // clear scratch registers. ymm12 = _mm256_setzero_ps(); ymm13 = _mm256_setzero_ps(); ymm14 = _mm256_setzero_ps(); for (k = 0; k < K; ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_ss(tB + tb_inc_col * 0); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_ps(tA); ymm12 = _mm256_fmadd_ps(ymm0, ymm3, ymm12); ymm3 = _mm256_loadu_ps(tA + 8); ymm13 = _mm256_fmadd_ps(ymm0, ymm3, ymm13); ymm3 = _mm256_loadu_ps(tA + 16); ymm14 = _mm256_fmadd_ps(ymm0, ymm3, ymm14); tA += lda; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_ss(alpha_cast); //ymm1 = _mm256_broadcast_ss(beta_cast); //multiply A*B by alpha. ymm12 = _mm256_mul_ps(ymm12, ymm0); ymm13 = _mm256_mul_ps(ymm13, ymm0); ymm14 = _mm256_mul_ps(ymm14, ymm0); // multiply C by beta and accumulate. /*ymm2 = _mm256_loadu_ps(tC + 0); ymm12 = _mm256_fmadd_ps(ymm2, ymm1, ymm12); ymm2 = _mm256_loadu_ps(tC + 8); ymm13 = _mm256_fmadd_ps(ymm2, ymm1, ymm13); ymm2 = _mm256_loadu_ps(tC + 16); ymm14 = _mm256_fmadd_ps(ymm2, ymm1, ymm14);*/ _mm256_storeu_ps(tC + 0, ymm12); _mm256_storeu_ps(tC + 8, ymm13); _mm256_storeu_ps(tC + 16, ymm14); } row_idx += 24; } if (m_remainder >= 16) { m_remainder -= 16; for (col_idx = 0; (col_idx + 2) < N; col_idx += 3) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; // clear scratch registers. ymm4 = _mm256_setzero_ps(); ymm5 = _mm256_setzero_ps(); ymm6 = _mm256_setzero_ps(); ymm7 = _mm256_setzero_ps(); ymm8 = _mm256_setzero_ps(); ymm9 = _mm256_setzero_ps(); for (k = 0; k < K; ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_ss(tB + tb_inc_col * 0); ymm1 = _mm256_broadcast_ss(tB + tb_inc_col * 1); ymm2 = _mm256_broadcast_ss(tB + tb_inc_col * 2); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_ps(tA); ymm4 = _mm256_fmadd_ps(ymm0, ymm3, ymm4); ymm6 = _mm256_fmadd_ps(ymm1, ymm3, ymm6); ymm8 = _mm256_fmadd_ps(ymm2, ymm3, ymm8); ymm3 = _mm256_loadu_ps(tA + 8); ymm5 = _mm256_fmadd_ps(ymm0, ymm3, ymm5); ymm7 = _mm256_fmadd_ps(ymm1, ymm3, ymm7); ymm9 = _mm256_fmadd_ps(ymm2, ymm3, ymm9); tA += lda; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_ss(alpha_cast); //ymm1 = _mm256_broadcast_ss(beta_cast); //multiply A*B by alpha. ymm4 = _mm256_mul_ps(ymm4, ymm0); ymm5 = _mm256_mul_ps(ymm5, ymm0); ymm6 = _mm256_mul_ps(ymm6, ymm0); ymm7 = _mm256_mul_ps(ymm7, ymm0); ymm8 = _mm256_mul_ps(ymm8, ymm0); ymm9 = _mm256_mul_ps(ymm9, ymm0); // multiply C by beta and accumulate. /*ymm2 = _mm256_loadu_ps(tC); ymm4 = _mm256_fmadd_ps(ymm2, ymm1, ymm4); ymm2 = _mm256_loadu_ps(tC + 8); ymm5 = _mm256_fmadd_ps(ymm2, ymm1, ymm5);*/ _mm256_storeu_ps(tC, ymm4); _mm256_storeu_ps(tC + 8, ymm5); // multiply C by beta and accumulate. tC += ldc; /*ymm2 = _mm256_loadu_ps(tC); ymm6 = _mm256_fmadd_ps(ymm2, ymm1, ymm6); ymm2 = _mm256_loadu_ps(tC + 8); ymm7 = _mm256_fmadd_ps(ymm2, ymm1, ymm7);*/ _mm256_storeu_ps(tC, ymm6); _mm256_storeu_ps(tC + 8, ymm7); // multiply C by beta and accumulate. tC += ldc; /*ymm2 = _mm256_loadu_ps(tC); ymm8 = _mm256_fmadd_ps(ymm2, ymm1, ymm8); ymm2 = _mm256_loadu_ps(tC + 8); ymm9 = _mm256_fmadd_ps(ymm2, ymm1, ymm9);*/ _mm256_storeu_ps(tC, ymm8); _mm256_storeu_ps(tC + 8, ymm9); } n_remainder = N - col_idx; // if the N is not multiple of 3. // handling edge case. if (n_remainder == 2) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; // clear scratch registers. ymm4 = _mm256_setzero_ps(); ymm5 = _mm256_setzero_ps(); ymm6 = _mm256_setzero_ps(); ymm7 = _mm256_setzero_ps(); for (k = 0; k < K; ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_ss(tB + tb_inc_col * 0); ymm1 = _mm256_broadcast_ss(tB + tb_inc_col * 1); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_ps(tA); ymm4 = _mm256_fmadd_ps(ymm0, ymm3, ymm4); ymm6 = _mm256_fmadd_ps(ymm1, ymm3, ymm6); ymm3 = _mm256_loadu_ps(tA + 8); ymm5 = _mm256_fmadd_ps(ymm0, ymm3, ymm5); ymm7 = _mm256_fmadd_ps(ymm1, ymm3, ymm7); tA += lda; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_ss(alpha_cast); //ymm1 = _mm256_broadcast_ss(beta_cast); //multiply A*B by alpha. ymm4 = _mm256_mul_ps(ymm4, ymm0); ymm5 = _mm256_mul_ps(ymm5, ymm0); ymm6 = _mm256_mul_ps(ymm6, ymm0); ymm7 = _mm256_mul_ps(ymm7, ymm0); // multiply C by beta and accumulate. /*ymm2 = _mm256_loadu_ps(tC); ymm4 = _mm256_fmadd_ps(ymm2, ymm1, ymm4); ymm2 = _mm256_loadu_ps(tC + 8); ymm5 = _mm256_fmadd_ps(ymm2, ymm1, ymm5);*/ _mm256_storeu_ps(tC, ymm4); _mm256_storeu_ps(tC + 8, ymm5); // multiply C by beta and accumulate. tC += ldc; /*ymm2 = _mm256_loadu_ps(tC); ymm6 = _mm256_fmadd_ps(ymm2, ymm1, ymm6); ymm2 = _mm256_loadu_ps(tC + 8); ymm7 = _mm256_fmadd_ps(ymm2, ymm1, ymm7);*/ _mm256_storeu_ps(tC, ymm6); _mm256_storeu_ps(tC + 8, ymm7); col_idx += 2; } // if the N is not multiple of 3. // handling edge case. if (n_remainder == 1) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; ymm4 = _mm256_setzero_ps(); ymm5 = _mm256_setzero_ps(); for (k = 0; k < K; ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_ss(tB + tb_inc_col * 0); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_ps(tA); ymm4 = _mm256_fmadd_ps(ymm0, ymm3, ymm4); ymm3 = _mm256_loadu_ps(tA + 8); ymm5 = _mm256_fmadd_ps(ymm0, ymm3, ymm5); tA += lda; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_ss(alpha_cast); //ymm1 = _mm256_broadcast_ss(beta_cast); ymm4 = _mm256_mul_ps(ymm4, ymm0); ymm5 = _mm256_mul_ps(ymm5, ymm0); // multiply C by beta and accumulate. /*ymm2 = _mm256_loadu_ps(tC); ymm4 = _mm256_fmadd_ps(ymm2, ymm1, ymm4); ymm2 = _mm256_loadu_ps(tC + 8); ymm5 = _mm256_fmadd_ps(ymm2, ymm1, ymm5);*/ _mm256_storeu_ps(tC, ymm4); _mm256_storeu_ps(tC + 8, ymm5); } row_idx += 16; } if (m_remainder >= 8) { m_remainder -= 8; for (col_idx = 0; (col_idx + 2) < N; col_idx += 3) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; // clear scratch registers. ymm4 = _mm256_setzero_ps(); ymm5 = _mm256_setzero_ps(); ymm6 = _mm256_setzero_ps(); for (k = 0; k < K; ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_ss(tB + tb_inc_col * 0); ymm1 = _mm256_broadcast_ss(tB + tb_inc_col * 1); ymm2 = _mm256_broadcast_ss(tB + tb_inc_col * 2); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_ps(tA); ymm4 = _mm256_fmadd_ps(ymm0, ymm3, ymm4); ymm5 = _mm256_fmadd_ps(ymm1, ymm3, ymm5); ymm6 = _mm256_fmadd_ps(ymm2, ymm3, ymm6); tA += lda; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_ss(alpha_cast); //ymm1 = _mm256_broadcast_ss(beta_cast); //multiply A*B by alpha. ymm4 = _mm256_mul_ps(ymm4, ymm0); ymm5 = _mm256_mul_ps(ymm5, ymm0); ymm6 = _mm256_mul_ps(ymm6, ymm0); // multiply C by beta and accumulate. /*ymm2 = _mm256_loadu_ps(tC); ymm4 = _mm256_fmadd_ps(ymm2, ymm1, ymm4);*/ _mm256_storeu_ps(tC, ymm4); // multiply C by beta and accumulate. tC += ldc; /*ymm2 = _mm256_loadu_ps(tC); ymm5 = _mm256_fmadd_ps(ymm2, ymm1, ymm5);*/ _mm256_storeu_ps(tC, ymm5); // multiply C by beta and accumulate. tC += ldc; /*ymm2 = _mm256_loadu_ps(tC); ymm6 = _mm256_fmadd_ps(ymm2, ymm1, ymm6);*/ _mm256_storeu_ps(tC, ymm6); } n_remainder = N - col_idx; // if the N is not multiple of 3. // handling edge case. if (n_remainder == 2) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; ymm4 = _mm256_setzero_ps(); ymm5 = _mm256_setzero_ps(); for (k = 0; k < K; ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_ss(tB + tb_inc_col * 0); ymm1 = _mm256_broadcast_ss(tB + tb_inc_col * 1); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_ps(tA); ymm4 = _mm256_fmadd_ps(ymm0, ymm3, ymm4); ymm5 = _mm256_fmadd_ps(ymm1, ymm3, ymm5); tA += lda; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_ss(alpha_cast); //ymm1 = _mm256_broadcast_ss(beta_cast); //multiply A*B by alpha. ymm4 = _mm256_mul_ps(ymm4, ymm0); ymm5 = _mm256_mul_ps(ymm5, ymm0); // multiply C by beta and accumulate. /*ymm2 = _mm256_loadu_ps(tC); ymm4 = _mm256_fmadd_ps(ymm2, ymm1, ymm4);*/ _mm256_storeu_ps(tC, ymm4); // multiply C by beta and accumulate. tC += ldc; /*ymm2 = _mm256_loadu_ps(tC); ymm5 = _mm256_fmadd_ps(ymm2, ymm1, ymm5);*/ _mm256_storeu_ps(tC, ymm5); col_idx += 2; } // if the N is not multiple of 3. // handling edge case. if (n_remainder == 1) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; ymm4 = _mm256_setzero_ps(); for (k = 0; k < K; ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_ss(tB + tb_inc_col * 0); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_ps(tA); ymm4 = _mm256_fmadd_ps(ymm0, ymm3, ymm4); tA += lda; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_ss(alpha_cast); //ymm1 = _mm256_broadcast_ss(beta_cast); ymm4 = _mm256_mul_ps(ymm4, ymm0); // multiply C by beta and accumulate. /*ymm2 = _mm256_loadu_ps(tC); ymm4 = _mm256_fmadd_ps(ymm2, ymm1, ymm4);*/ _mm256_storeu_ps(tC, ymm4); } row_idx += 8; } // M is not a multiple of 32. // The handling of edge case where the remainder // dimension is less than 8. The padding takes place // to handle this case. if ((m_remainder) && (lda > 7)) { float f_temp[8]; for (col_idx = 0; (col_idx + 2) < N; col_idx += 3) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; // clear scratch registers. ymm5 = _mm256_setzero_ps(); ymm7 = _mm256_setzero_ps(); ymm9 = _mm256_setzero_ps(); for (k = 0; k < (K - 1); ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_ss(tB + tb_inc_col * 0); ymm1 = _mm256_broadcast_ss(tB + tb_inc_col * 1); ymm2 = _mm256_broadcast_ss(tB + tb_inc_col * 2); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_ps(tA); ymm5 = _mm256_fmadd_ps(ymm0, ymm3, ymm5); ymm7 = _mm256_fmadd_ps(ymm1, ymm3, ymm7); ymm9 = _mm256_fmadd_ps(ymm2, ymm3, ymm9); tA += lda; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_ss(tB + tb_inc_col * 0); ymm1 = _mm256_broadcast_ss(tB + tb_inc_col * 1); ymm2 = _mm256_broadcast_ss(tB + tb_inc_col * 2); tB += tb_inc_row; for (int i = 0; i < m_remainder; i++) { f_temp[i] = tA[i]; } ymm3 = _mm256_loadu_ps(f_temp); ymm5 = _mm256_fmadd_ps(ymm0, ymm3, ymm5); ymm7 = _mm256_fmadd_ps(ymm1, ymm3, ymm7); ymm9 = _mm256_fmadd_ps(ymm2, ymm3, ymm9); ymm0 = _mm256_broadcast_ss(alpha_cast); //ymm1 = _mm256_broadcast_ss(beta_cast); //multiply A*B by alpha. ymm5 = _mm256_mul_ps(ymm5, ymm0); ymm7 = _mm256_mul_ps(ymm7, ymm0); ymm9 = _mm256_mul_ps(ymm9, ymm0); /*for (int i = 0; i < m_remainder; i++) { f_temp[i] = tC[i]; } ymm2 = _mm256_loadu_ps(f_temp); ymm5 = _mm256_fmadd_ps(ymm2, ymm1, ymm5);*/ _mm256_storeu_ps(f_temp, ymm5); for (int i = 0; i < m_remainder; i++) { tC[i] = f_temp[i]; } tC += ldc; /*for (int i = 0; i < m_remainder; i++) { f_temp[i] = tC[i]; } ymm2 = _mm256_loadu_ps(f_temp); ymm7 = _mm256_fmadd_ps(ymm2, ymm1, ymm7);*/ _mm256_storeu_ps(f_temp, ymm7); for (int i = 0; i < m_remainder; i++) { tC[i] = f_temp[i]; } tC += ldc; /*for (int i = 0; i < m_remainder; i++) { f_temp[i] = tC[i]; } ymm2 = _mm256_loadu_ps(f_temp); ymm9 = _mm256_fmadd_ps(ymm2, ymm1, ymm9);*/ _mm256_storeu_ps(f_temp, ymm9); for (int i = 0; i < m_remainder; i++) { tC[i] = f_temp[i]; } } n_remainder = N - col_idx; // if the N is not multiple of 3. // handling edge case. if (n_remainder == 2) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; ymm5 = _mm256_setzero_ps(); ymm7 = _mm256_setzero_ps(); for (k = 0; k < (K - 1); ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_ss(tB + tb_inc_col * 0); ymm1 = _mm256_broadcast_ss(tB + tb_inc_col * 1); tB += tb_inc_row; ymm3 = _mm256_loadu_ps(tA); ymm5 = _mm256_fmadd_ps(ymm0, ymm3, ymm5); ymm7 = _mm256_fmadd_ps(ymm1, ymm3, ymm7); tA += lda; } ymm0 = _mm256_broadcast_ss(tB + tb_inc_col * 0); ymm1 = _mm256_broadcast_ss(tB + tb_inc_col * 1); tB += tb_inc_row; for (int i = 0; i < m_remainder; i++) { f_temp[i] = tA[i]; } ymm3 = _mm256_loadu_ps(f_temp); ymm5 = _mm256_fmadd_ps(ymm0, ymm3, ymm5); ymm7 = _mm256_fmadd_ps(ymm1, ymm3, ymm7); ymm0 = _mm256_broadcast_ss(alpha_cast); //ymm1 = _mm256_broadcast_ss(beta_cast); ymm5 = _mm256_mul_ps(ymm5, ymm0); ymm7 = _mm256_mul_ps(ymm7, ymm0); /*for (int i = 0; i < m_remainder; i++) { f_temp[i] = tC[i]; } ymm2 = _mm256_loadu_ps(f_temp); ymm5 = _mm256_fmadd_ps(ymm2, ymm1, ymm5);*/ _mm256_storeu_ps(f_temp, ymm5); for (int i = 0; i < m_remainder; i++) { tC[i] = f_temp[i]; } tC += ldc; /*for (int i = 0; i < m_remainder; i++) { f_temp[i] = tC[i]; } ymm2 = _mm256_loadu_ps(f_temp); ymm7 = _mm256_fmadd_ps(ymm2, ymm1, ymm7);*/ _mm256_storeu_ps(f_temp, ymm7); for (int i = 0; i < m_remainder; i++) { tC[i] = f_temp[i]; } } // if the N is not multiple of 3. // handling edge case. if (n_remainder == 1) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; ymm5 = _mm256_setzero_ps(); for (k = 0; k < (K - 1); ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_ss(tB + tb_inc_col * 0); tB += tb_inc_row; ymm3 = _mm256_loadu_ps(tA); ymm5 = _mm256_fmadd_ps(ymm0, ymm3, ymm5); tA += lda; } ymm0 = _mm256_broadcast_ss(tB + tb_inc_col * 0); tB += tb_inc_row; for (int i = 0; i < m_remainder; i++) { f_temp[i] = tA[i]; } ymm3 = _mm256_loadu_ps(f_temp); ymm5 = _mm256_fmadd_ps(ymm0, ymm3, ymm5); ymm0 = _mm256_broadcast_ss(alpha_cast); //ymm1 = _mm256_broadcast_ss(beta_cast); // multiply C by beta and accumulate. ymm5 = _mm256_mul_ps(ymm5, ymm0); /*for (int i = 0; i < m_remainder; i++) { f_temp[i] = tC[i]; } ymm2 = _mm256_loadu_ps(f_temp); ymm5 = _mm256_fmadd_ps(ymm2, ymm1, ymm5);*/ _mm256_storeu_ps(f_temp, ymm5); for (int i = 0; i < m_remainder; i++) { tC[i] = f_temp[i]; } } m_remainder = 0; } if (m_remainder) { float result; for (; row_idx < M; row_idx += 1) { for (col_idx = 0; col_idx < N; col_idx += 1) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; result = 0; for (k = 0; k < K; ++k) { result += (*tA) * (*tB); tA += lda; tB += tb_inc_row; } result *= (*alpha_cast); (*tC) = /*(*tC) * (*beta_cast) + */result; } } } //copy/compute sryk values back to C using SIMD if ( bli_seq0( *beta_cast ) ) {//just copy in case of beta = 0 dim_t _i, _j, k, _l; if(bli_obj_is_lower(c)) // c is lower { //first column _j = 0; k = M >> 3; _i = 0; for ( _l = 0; _l < k; _l++ ) { ymm0 = _mm256_loadu_ps((C + _i*rsc)); _mm256_storeu_ps((matCbuf + _i*rs_matC), ymm0); _i += 8; } while (_i < M ) { bli_sscopys( *(C + _i*rsc + _j*ldc), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); _i++; } _j++; while ( _j < N ) //next column { //k = (_j + (8 - (_j & 7))); _l = _j & 7; k = (_l != 0) ? (_j + (8 - _l)) : _j; k = (k <= M) ? k : M; for ( _i = _j; _i < k; ++_i ) { bli_sscopys( *(C + _i*rsc + _j*ldc), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); } k = (M - _i) >> 3; _l = 0; while ( _l < k ) { ymm0 = _mm256_loadu_ps((C + _i*rsc + _j*ldc)); _mm256_storeu_ps((matCbuf + _i*rs_matC + _j*ldc_matC), ymm0); _i += 8; _l++; } while (_i < M ) { bli_sscopys( *(C + _i*rsc + _j*ldc), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); _i++; } _j++; } } else //c is upper { for ( _j = 0; _j < N; ++_j ) { k = (_j + 1) >> 3; _i = 0; _l = 0; while ( _l < k ) { ymm0 = _mm256_loadu_ps((C + _i*rsc + _j*ldc)); _mm256_storeu_ps((matCbuf + _i*rs_matC + _j*ldc_matC), ymm0); _i += 8; _l++; } while (_i <= _j ) { bli_sscopys( *(C + _i*rsc + _j*ldc), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); ++_i; } } } } else {//when beta is non-zero, fmadd and store the results dim_t _i, _j, k, _l; ymm1 = _mm256_broadcast_ss(beta_cast); if(bli_obj_is_lower(c)) //c is lower { //first column _j = 0; k = M >> 3; _i = 0; for ( _l = 0; _l < k; _l++ ) { ymm2 = _mm256_loadu_ps((matCbuf + _i*rs_matC)); ymm0 = _mm256_loadu_ps((C + _i*rsc)); ymm0 = _mm256_fmadd_ps(ymm2, ymm1, ymm0); _mm256_storeu_ps((matCbuf + _i*rs_matC), ymm0); _i += 8; } while (_i < M ) { bli_sssxpbys( *(C + _i*rsc + _j*ldc), *(beta_cast), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); _i++; } _j++; while ( _j < N ) //next column { //k = (_j + (8 - (_j & 7))); _l = _j & 7; k = (_l != 0) ? (_j + (8 - _l)) : _j; k = (k <= M) ? k : M; for ( _i = _j; _i < k; ++_i ) { bli_sssxpbys( *(C + _i*rsc + _j*ldc), *(beta_cast), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); } k = (M - _i) >> 3; _l = 0; while ( _l < k ) { ymm2 = _mm256_loadu_ps((matCbuf + _i*rs_matC + _j*ldc_matC)); ymm0 = _mm256_loadu_ps((C + _i*rsc + _j*ldc)); ymm0 = _mm256_fmadd_ps(ymm2, ymm1, ymm0); _mm256_storeu_ps((matCbuf + _i*rs_matC + _j*ldc_matC), ymm0); _i += 8; _l++; } while (_i < M ) { bli_sssxpbys( *(C + _i*rsc + _j*ldc), *(beta_cast), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); _i++; } _j++; } } else //c is upper { for ( _j = 0; _j < N; ++_j ) { k = (_j + 1) >> 3; _i = 0; _l = 0; while ( _l < k ) { ymm2 = _mm256_loadu_ps((matCbuf + _i*rs_matC + _j*ldc_matC)); ymm0 = _mm256_loadu_ps((C + _i*rsc + _j*ldc)); ymm0 = _mm256_fmadd_ps(ymm2, ymm1, ymm0); _mm256_storeu_ps((matCbuf + _i*rs_matC + _j*ldc_matC), ymm0); _i += 8; _l++; } while (_i <= _j ) { bli_sssxpbys( *(C + _i*rsc + _j*ldc), *(beta_cast), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); ++_i; } } } } return BLIS_SUCCESS; } else return BLIS_NONCONFORMAL_DIMENSIONS; }; static err_t bli_dsyrk_small ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, cntl_t* cntl ) { int M = bli_obj_length( c ); // number of rows of Matrix C int N = bli_obj_width( c ); // number of columns of Matrix C int K = bli_obj_width( a ); // number of columns of OP(A), will be updated if OP(A) is Transpose(A) . int L = M * N; // If alpha is zero, scale by beta and return. if ((((L) < (D_BLIS_SMALL_MATRIX_THRES * D_BLIS_SMALL_MATRIX_THRES)) || ((M < D_BLIS_SMALL_M_RECT_MATRIX_THRES) && (K < D_BLIS_SMALL_K_RECT_MATRIX_THRES))) && ((L!=0) && (K!=0))) { int lda = bli_obj_col_stride( a ); // column stride of matrix OP(A), where OP(A) is Transpose(A) if transA enabled. int ldb = bli_obj_col_stride( b ); // column stride of matrix OP(B), where OP(B) is Transpose(B) if transB enabled. int ldc_matC = bli_obj_col_stride( c ); // column stride of matrix C int ldc = M;//bli_obj_col_stride( c ); // column stride of static buffer for matrix C int row_idx, col_idx, k; int rs_matC = bli_obj_row_stride( c ); int rsc = 1; double *A = a->buffer; // pointer to elements of Matrix A double *B = b->buffer; // pointer to elements of Matrix B double *C = D_C_pack; // pointer to elements of Matrix C double *matCbuf = c->buffer; double *tA = A, *tB = B, *tC = C;//, *tA_pack; double *tA_packed; // temprorary pointer to hold packed A memory pointer int row_idx_packed; //packed A memory row index int lda_packed; //lda of packed A int col_idx_start; //starting index after A matrix is packed. dim_t tb_inc_row = 1; // row stride of matrix B dim_t tb_inc_col = ldb; // column stride of matrix B __m256d ymm4, ymm5, ymm6, ymm7; __m256d ymm8, ymm9, ymm10, ymm11; __m256d ymm12, ymm13, ymm14, ymm15; __m256d ymm0, ymm1, ymm2, ymm3; int n_remainder; // If the N is non multiple of 3.(N%3) int m_remainder; // If the M is non multiple of 16.(M%16) double *alpha_cast, *beta_cast; // alpha, beta multiples alpha_cast = (alpha->buffer); beta_cast = (beta->buffer); int required_packing_A = 1; // when N is equal to 1 call GEMV instead of SYRK if (N == 1) { bli_gemv ( alpha, a, b, beta, c ); return BLIS_SUCCESS; } //update the pointer math if matrix B needs to be transposed. if (bli_obj_has_trans( b )) { tb_inc_col = 1; //switch row and column strides tb_inc_row = ldb; } if ((N <= 3) || ((D_MR * K) > D_SCRATCH_DIM)) { required_packing_A = 0; } /* * The computation loop runs for D_MRxN columns of C matrix, thus * accessing the D_MRxK A matrix data and KxNR B matrix data. * The computation is organized as inner loops of dimension D_MRxNR. */ // Process D_MR rows of C matrix at a time. for (row_idx = 0; (row_idx + (D_MR - 1)) < M; row_idx += D_MR) { col_idx_start = 0; tA_packed = A; row_idx_packed = row_idx; lda_packed = lda; // This is the part of the pack and compute optimization. // During the first column iteration, we store the accessed A matrix into // contiguous static memory. This helps to keep te A matrix in Cache and // aviods the TLB misses. if (required_packing_A) { col_idx = 0; //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; tA_packed = D_A_pack; #if 0//def BLIS_ENABLE_PREFETCH _mm_prefetch((char*)(tC + 0), _MM_HINT_T0); _mm_prefetch((char*)(tC + 8), _MM_HINT_T0); _mm_prefetch((char*)(tC + ldc), _MM_HINT_T0); _mm_prefetch((char*)(tC + ldc + 8), _MM_HINT_T0); _mm_prefetch((char*)(tC + 2 * ldc), _MM_HINT_T0); _mm_prefetch((char*)(tC + 2 * ldc + 8), _MM_HINT_T0); #endif // clear scratch registers. ymm4 = _mm256_setzero_pd(); ymm5 = _mm256_setzero_pd(); ymm6 = _mm256_setzero_pd(); ymm7 = _mm256_setzero_pd(); ymm8 = _mm256_setzero_pd(); ymm9 = _mm256_setzero_pd(); ymm10 = _mm256_setzero_pd(); ymm11 = _mm256_setzero_pd(); ymm12 = _mm256_setzero_pd(); ymm13 = _mm256_setzero_pd(); ymm14 = _mm256_setzero_pd(); ymm15 = _mm256_setzero_pd(); for (k = 0; k < K; ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. // This loop is processing D_MR x K ymm0 = _mm256_broadcast_sd(tB + tb_inc_col * 0); ymm1 = _mm256_broadcast_sd(tB + tb_inc_col * 1); ymm2 = _mm256_broadcast_sd(tB + tb_inc_col * 2); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_pd(tA); _mm256_storeu_pd(tA_packed, ymm3); // the packing of matrix A // ymm4 += ymm0 * ymm3; ymm4 = _mm256_fmadd_pd(ymm0, ymm3, ymm4); // ymm8 += ymm1 * ymm3; ymm8 = _mm256_fmadd_pd(ymm1, ymm3, ymm8); // ymm12 += ymm2 * ymm3; ymm12 = _mm256_fmadd_pd(ymm2, ymm3, ymm12); ymm3 = _mm256_loadu_pd(tA + 4); _mm256_storeu_pd(tA_packed + 4, ymm3); // the packing of matrix A // ymm5 += ymm0 * ymm3; ymm5 = _mm256_fmadd_pd(ymm0, ymm3, ymm5); // ymm9 += ymm1 * ymm3; ymm9 = _mm256_fmadd_pd(ymm1, ymm3, ymm9); // ymm13 += ymm2 * ymm3; ymm13 = _mm256_fmadd_pd(ymm2, ymm3, ymm13); ymm3 = _mm256_loadu_pd(tA + 8); _mm256_storeu_pd(tA_packed + 8, ymm3); // the packing of matrix A // ymm6 += ymm0 * ymm3; ymm6 = _mm256_fmadd_pd(ymm0, ymm3, ymm6); // ymm10 += ymm1 * ymm3; ymm10 = _mm256_fmadd_pd(ymm1, ymm3, ymm10); // ymm14 += ymm2 * ymm3; ymm14 = _mm256_fmadd_pd(ymm2, ymm3, ymm14); ymm3 = _mm256_loadu_pd(tA + 12); _mm256_storeu_pd(tA_packed + 12, ymm3); // the packing of matrix A // ymm7 += ymm0 * ymm3; ymm7 = _mm256_fmadd_pd(ymm0, ymm3, ymm7); // ymm11 += ymm1 * ymm3; ymm11 = _mm256_fmadd_pd(ymm1, ymm3, ymm11); // ymm15 += ymm2 * ymm3; ymm15 = _mm256_fmadd_pd(ymm2, ymm3, ymm15); tA += lda; tA_packed += D_MR; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_sd(alpha_cast); //ymm1 = _mm256_broadcast_sd(beta_cast); //multiply A*B by alpha. ymm4 = _mm256_mul_pd(ymm4, ymm0); ymm5 = _mm256_mul_pd(ymm5, ymm0); ymm6 = _mm256_mul_pd(ymm6, ymm0); ymm7 = _mm256_mul_pd(ymm7, ymm0); ymm8 = _mm256_mul_pd(ymm8, ymm0); ymm9 = _mm256_mul_pd(ymm9, ymm0); ymm10 = _mm256_mul_pd(ymm10, ymm0); ymm11 = _mm256_mul_pd(ymm11, ymm0); ymm12 = _mm256_mul_pd(ymm12, ymm0); ymm13 = _mm256_mul_pd(ymm13, ymm0); ymm14 = _mm256_mul_pd(ymm14, ymm0); ymm15 = _mm256_mul_pd(ymm15, ymm0); // multiply C by beta and accumulate col 1. /*ymm2 = _mm256_loadu_pd(tC); ymm4 = _mm256_fmadd_pd(ymm2, ymm1, ymm4); ymm2 = _mm256_loadu_pd(tC + 4); ymm5 = _mm256_fmadd_pd(ymm2, ymm1, ymm5); ymm2 = _mm256_loadu_pd(tC + 8); ymm6 = _mm256_fmadd_pd(ymm2, ymm1, ymm6); ymm2 = _mm256_loadu_pd(tC + 12); ymm7 = _mm256_fmadd_pd(ymm2, ymm1, ymm7);*/ _mm256_storeu_pd(tC, ymm4); _mm256_storeu_pd(tC + 4, ymm5); _mm256_storeu_pd(tC + 8, ymm6); _mm256_storeu_pd(tC + 12, ymm7); // multiply C by beta and accumulate, col 2. tC += ldc; /*ymm2 = _mm256_loadu_pd(tC); ymm8 = _mm256_fmadd_pd(ymm2, ymm1, ymm8); ymm2 = _mm256_loadu_pd(tC + 4); ymm9 = _mm256_fmadd_pd(ymm2, ymm1, ymm9); ymm2 = _mm256_loadu_pd(tC + 8); ymm10 = _mm256_fmadd_pd(ymm2, ymm1, ymm10); ymm2 = _mm256_loadu_pd(tC + 12); ymm11 = _mm256_fmadd_pd(ymm2, ymm1, ymm11);*/ _mm256_storeu_pd(tC, ymm8); _mm256_storeu_pd(tC + 4, ymm9); _mm256_storeu_pd(tC + 8, ymm10); _mm256_storeu_pd(tC + 12, ymm11); // multiply C by beta and accumulate, col 3. tC += ldc; /*ymm2 = _mm256_loadu_pd(tC); ymm12 = _mm256_fmadd_pd(ymm2, ymm1, ymm12); ymm2 = _mm256_loadu_pd(tC + 4); ymm13 = _mm256_fmadd_pd(ymm2, ymm1, ymm13); ymm2 = _mm256_loadu_pd(tC + 8); ymm14 = _mm256_fmadd_pd(ymm2, ymm1, ymm14); ymm2 = _mm256_loadu_pd(tC + 12); ymm15 = _mm256_fmadd_pd(ymm2, ymm1, ymm15);*/ _mm256_storeu_pd(tC, ymm12); _mm256_storeu_pd(tC + 4, ymm13); _mm256_storeu_pd(tC + 8, ymm14); _mm256_storeu_pd(tC + 12, ymm15); // modify the pointer arithematic to use packed A matrix. col_idx_start = NR; tA_packed = D_A_pack; row_idx_packed = 0; lda_packed = D_MR; } // Process NR columns of C matrix at a time. for (col_idx = col_idx_start; (col_idx + (NR - 1)) < N; col_idx += NR) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = tA_packed + row_idx_packed; #if 0//def BLIS_ENABLE_PREFETCH _mm_prefetch((char*)(tC + 0), _MM_HINT_T0); _mm_prefetch((char*)(tC + 8), _MM_HINT_T0); _mm_prefetch((char*)(tC + ldc), _MM_HINT_T0); _mm_prefetch((char*)(tC + ldc + 8), _MM_HINT_T0); _mm_prefetch((char*)(tC + 2 * ldc), _MM_HINT_T0); _mm_prefetch((char*)(tC + 2 * ldc + 8), _MM_HINT_T0); #endif // clear scratch registers. ymm4 = _mm256_setzero_pd(); ymm5 = _mm256_setzero_pd(); ymm6 = _mm256_setzero_pd(); ymm7 = _mm256_setzero_pd(); ymm8 = _mm256_setzero_pd(); ymm9 = _mm256_setzero_pd(); ymm10 = _mm256_setzero_pd(); ymm11 = _mm256_setzero_pd(); ymm12 = _mm256_setzero_pd(); ymm13 = _mm256_setzero_pd(); ymm14 = _mm256_setzero_pd(); ymm15 = _mm256_setzero_pd(); for (k = 0; k < K; ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. // This loop is processing D_MR x K ymm0 = _mm256_broadcast_sd(tB + tb_inc_col * 0); ymm1 = _mm256_broadcast_sd(tB + tb_inc_col * 1); ymm2 = _mm256_broadcast_sd(tB + tb_inc_col * 2); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_pd(tA); // ymm4 += ymm0 * ymm3; ymm4 = _mm256_fmadd_pd(ymm0, ymm3, ymm4); // ymm8 += ymm1 * ymm3; ymm8 = _mm256_fmadd_pd(ymm1, ymm3, ymm8); // ymm12 += ymm2 * ymm3; ymm12 = _mm256_fmadd_pd(ymm2, ymm3, ymm12); ymm3 = _mm256_loadu_pd(tA + 4); // ymm5 += ymm0 * ymm3; ymm5 = _mm256_fmadd_pd(ymm0, ymm3, ymm5); // ymm9 += ymm1 * ymm3; ymm9 = _mm256_fmadd_pd(ymm1, ymm3, ymm9); // ymm13 += ymm2 * ymm3; ymm13 = _mm256_fmadd_pd(ymm2, ymm3, ymm13); ymm3 = _mm256_loadu_pd(tA + 8); // ymm6 += ymm0 * ymm3; ymm6 = _mm256_fmadd_pd(ymm0, ymm3, ymm6); // ymm10 += ymm1 * ymm3; ymm10 = _mm256_fmadd_pd(ymm1, ymm3, ymm10); // ymm14 += ymm2 * ymm3; ymm14 = _mm256_fmadd_pd(ymm2, ymm3, ymm14); ymm3 = _mm256_loadu_pd(tA + 12); // ymm7 += ymm0 * ymm3; ymm7 = _mm256_fmadd_pd(ymm0, ymm3, ymm7); // ymm11 += ymm1 * ymm3; ymm11 = _mm256_fmadd_pd(ymm1, ymm3, ymm11); // ymm15 += ymm2 * ymm3; ymm15 = _mm256_fmadd_pd(ymm2, ymm3, ymm15); tA += lda_packed; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_sd(alpha_cast); //ymm1 = _mm256_broadcast_sd(beta_cast); //multiply A*B by alpha. ymm4 = _mm256_mul_pd(ymm4, ymm0); ymm5 = _mm256_mul_pd(ymm5, ymm0); ymm6 = _mm256_mul_pd(ymm6, ymm0); ymm7 = _mm256_mul_pd(ymm7, ymm0); ymm8 = _mm256_mul_pd(ymm8, ymm0); ymm9 = _mm256_mul_pd(ymm9, ymm0); ymm10 = _mm256_mul_pd(ymm10, ymm0); ymm11 = _mm256_mul_pd(ymm11, ymm0); ymm12 = _mm256_mul_pd(ymm12, ymm0); ymm13 = _mm256_mul_pd(ymm13, ymm0); ymm14 = _mm256_mul_pd(ymm14, ymm0); ymm15 = _mm256_mul_pd(ymm15, ymm0); // multiply C by beta and accumulate col 1. /*ymm2 = _mm256_loadu_pd(tC); ymm4 = _mm256_fmadd_pd(ymm2, ymm1, ymm4); ymm2 = _mm256_loadu_pd(tC + 4); ymm5 = _mm256_fmadd_pd(ymm2, ymm1, ymm5); ymm2 = _mm256_loadu_pd(tC + 8); ymm6 = _mm256_fmadd_pd(ymm2, ymm1, ymm6); ymm2 = _mm256_loadu_pd(tC + 12); ymm7 = _mm256_fmadd_pd(ymm2, ymm1, ymm7);*/ _mm256_storeu_pd(tC, ymm4); _mm256_storeu_pd(tC + 4, ymm5); _mm256_storeu_pd(tC + 8, ymm6); _mm256_storeu_pd(tC + 12, ymm7); // multiply C by beta and accumulate, col 2. tC += ldc; /*ymm2 = _mm256_loadu_pd(tC); ymm8 = _mm256_fmadd_pd(ymm2, ymm1, ymm8); ymm2 = _mm256_loadu_pd(tC + 4); ymm9 = _mm256_fmadd_pd(ymm2, ymm1, ymm9); ymm2 = _mm256_loadu_pd(tC + 8); ymm10 = _mm256_fmadd_pd(ymm2, ymm1, ymm10); ymm2 = _mm256_loadu_pd(tC + 12); ymm11 = _mm256_fmadd_pd(ymm2, ymm1, ymm11);*/ _mm256_storeu_pd(tC, ymm8); _mm256_storeu_pd(tC + 4, ymm9); _mm256_storeu_pd(tC + 8, ymm10); _mm256_storeu_pd(tC + 12, ymm11); // multiply C by beta and accumulate, col 3. tC += ldc; /*ymm2 = _mm256_loadu_pd(tC); ymm12 = _mm256_fmadd_pd(ymm2, ymm1, ymm12); ymm2 = _mm256_loadu_pd(tC + 4); ymm13 = _mm256_fmadd_pd(ymm2, ymm1, ymm13); ymm2 = _mm256_loadu_pd(tC + 8); ymm14 = _mm256_fmadd_pd(ymm2, ymm1, ymm14); ymm2 = _mm256_loadu_pd(tC + 12); ymm15 = _mm256_fmadd_pd(ymm2, ymm1, ymm15);*/ _mm256_storeu_pd(tC, ymm12); _mm256_storeu_pd(tC + 4, ymm13); _mm256_storeu_pd(tC + 8, ymm14); _mm256_storeu_pd(tC + 12, ymm15); } n_remainder = N - col_idx; // if the N is not multiple of 3. // handling edge case. if (n_remainder == 2) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; // clear scratch registers. ymm8 = _mm256_setzero_pd(); ymm9 = _mm256_setzero_pd(); ymm10 = _mm256_setzero_pd(); ymm11 = _mm256_setzero_pd(); ymm12 = _mm256_setzero_pd(); ymm13 = _mm256_setzero_pd(); ymm14 = _mm256_setzero_pd(); ymm15 = _mm256_setzero_pd(); for (k = 0; k < K; ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_sd(tB + tb_inc_col * 0); ymm1 = _mm256_broadcast_sd(tB + tb_inc_col * 1); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_pd(tA); ymm8 = _mm256_fmadd_pd(ymm0, ymm3, ymm8); ymm12 = _mm256_fmadd_pd(ymm1, ymm3, ymm12); ymm3 = _mm256_loadu_pd(tA + 4); ymm9 = _mm256_fmadd_pd(ymm0, ymm3, ymm9); ymm13 = _mm256_fmadd_pd(ymm1, ymm3, ymm13); ymm3 = _mm256_loadu_pd(tA + 8); ymm10 = _mm256_fmadd_pd(ymm0, ymm3, ymm10); ymm14 = _mm256_fmadd_pd(ymm1, ymm3, ymm14); ymm3 = _mm256_loadu_pd(tA + 12); ymm11 = _mm256_fmadd_pd(ymm0, ymm3, ymm11); ymm15 = _mm256_fmadd_pd(ymm1, ymm3, ymm15); tA += lda; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_sd(alpha_cast); //ymm1 = _mm256_broadcast_sd(beta_cast); //multiply A*B by alpha. ymm8 = _mm256_mul_pd(ymm8, ymm0); ymm9 = _mm256_mul_pd(ymm9, ymm0); ymm10 = _mm256_mul_pd(ymm10, ymm0); ymm11 = _mm256_mul_pd(ymm11, ymm0); ymm12 = _mm256_mul_pd(ymm12, ymm0); ymm13 = _mm256_mul_pd(ymm13, ymm0); ymm14 = _mm256_mul_pd(ymm14, ymm0); ymm15 = _mm256_mul_pd(ymm15, ymm0); // multiply C by beta and accumulate, col 1. /*ymm2 = _mm256_loadu_pd(tC + 0); ymm8 = _mm256_fmadd_pd(ymm2, ymm1, ymm8); ymm2 = _mm256_loadu_pd(tC + 4); ymm9 = _mm256_fmadd_pd(ymm2, ymm1, ymm9); ymm2 = _mm256_loadu_pd(tC + 8); ymm10 = _mm256_fmadd_pd(ymm2, ymm1, ymm10); ymm2 = _mm256_loadu_pd(tC + 12); ymm11 = _mm256_fmadd_pd(ymm2, ymm1, ymm11);*/ _mm256_storeu_pd(tC + 0, ymm8); _mm256_storeu_pd(tC + 4, ymm9); _mm256_storeu_pd(tC + 8, ymm10); _mm256_storeu_pd(tC + 12, ymm11); // multiply C by beta and accumulate, col 2. tC += ldc; /*ymm2 = _mm256_loadu_pd(tC); ymm12 = _mm256_fmadd_pd(ymm2, ymm1, ymm12); ymm2 = _mm256_loadu_pd(tC + 4); ymm13 = _mm256_fmadd_pd(ymm2, ymm1, ymm13); ymm2 = _mm256_loadu_pd(tC + 8); ymm14 = _mm256_fmadd_pd(ymm2, ymm1, ymm14); ymm2 = _mm256_loadu_pd(tC + 12); ymm15 = _mm256_fmadd_pd(ymm2, ymm1, ymm15);*/ _mm256_storeu_pd(tC, ymm12); _mm256_storeu_pd(tC + 4, ymm13); _mm256_storeu_pd(tC + 8, ymm14); _mm256_storeu_pd(tC + 12, ymm15); col_idx += 2; } // if the N is not multiple of 3. // handling edge case. if (n_remainder == 1) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; // clear scratch registers. ymm12 = _mm256_setzero_pd(); ymm13 = _mm256_setzero_pd(); ymm14 = _mm256_setzero_pd(); ymm15 = _mm256_setzero_pd(); for (k = 0; k < K; ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_sd(tB + tb_inc_col * 0); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_pd(tA); ymm12 = _mm256_fmadd_pd(ymm0, ymm3, ymm12); ymm3 = _mm256_loadu_pd(tA + 4); ymm13 = _mm256_fmadd_pd(ymm0, ymm3, ymm13); ymm3 = _mm256_loadu_pd(tA + 8); ymm14 = _mm256_fmadd_pd(ymm0, ymm3, ymm14); ymm3 = _mm256_loadu_pd(tA + 12); ymm15 = _mm256_fmadd_pd(ymm0, ymm3, ymm15); tA += lda; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_sd(alpha_cast); //ymm1 = _mm256_broadcast_sd(beta_cast); //multiply A*B by alpha. ymm12 = _mm256_mul_pd(ymm12, ymm0); ymm13 = _mm256_mul_pd(ymm13, ymm0); ymm14 = _mm256_mul_pd(ymm14, ymm0); ymm15 = _mm256_mul_pd(ymm15, ymm0); // multiply C by beta and accumulate. /*ymm2 = _mm256_loadu_pd(tC + 0); ymm12 = _mm256_fmadd_pd(ymm2, ymm1, ymm12); ymm2 = _mm256_loadu_pd(tC + 4); ymm13 = _mm256_fmadd_pd(ymm2, ymm1, ymm13); ymm2 = _mm256_loadu_pd(tC + 8); ymm14 = _mm256_fmadd_pd(ymm2, ymm1, ymm14); ymm2 = _mm256_loadu_pd(tC + 12); ymm15 = _mm256_fmadd_pd(ymm2, ymm1, ymm15);*/ _mm256_storeu_pd(tC + 0, ymm12); _mm256_storeu_pd(tC + 4, ymm13); _mm256_storeu_pd(tC + 8, ymm14); _mm256_storeu_pd(tC + 12, ymm15); } } m_remainder = M - row_idx; if (m_remainder >= 12) { m_remainder -= 12; for (col_idx = 0; (col_idx + 2) < N; col_idx += 3) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; // clear scratch registers. ymm4 = _mm256_setzero_pd(); ymm5 = _mm256_setzero_pd(); ymm6 = _mm256_setzero_pd(); ymm8 = _mm256_setzero_pd(); ymm9 = _mm256_setzero_pd(); ymm10 = _mm256_setzero_pd(); ymm12 = _mm256_setzero_pd(); ymm13 = _mm256_setzero_pd(); ymm14 = _mm256_setzero_pd(); for (k = 0; k < K; ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_sd(tB + tb_inc_col * 0); ymm1 = _mm256_broadcast_sd(tB + tb_inc_col * 1); ymm2 = _mm256_broadcast_sd(tB + tb_inc_col * 2); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_pd(tA); // ymm4 += ymm0 * ymm3; ymm4 = _mm256_fmadd_pd(ymm0, ymm3, ymm4); // ymm8 += ymm1 * ymm3; ymm8 = _mm256_fmadd_pd(ymm1, ymm3, ymm8); // ymm12 += ymm2 * ymm3; ymm12 = _mm256_fmadd_pd(ymm2, ymm3, ymm12); ymm3 = _mm256_loadu_pd(tA + 4); // ymm5 += ymm0 * ymm3; ymm5 = _mm256_fmadd_pd(ymm0, ymm3, ymm5); // ymm9 += ymm1 * ymm3; ymm9 = _mm256_fmadd_pd(ymm1, ymm3, ymm9); // ymm13 += ymm2 * ymm3; ymm13 = _mm256_fmadd_pd(ymm2, ymm3, ymm13); ymm3 = _mm256_loadu_pd(tA + 8); // ymm6 += ymm0 * ymm3; ymm6 = _mm256_fmadd_pd(ymm0, ymm3, ymm6); // ymm10 += ymm1 * ymm3; ymm10 = _mm256_fmadd_pd(ymm1, ymm3, ymm10); // ymm14 += ymm2 * ymm3; ymm14 = _mm256_fmadd_pd(ymm2, ymm3, ymm14); tA += lda; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_sd(alpha_cast); //ymm1 = _mm256_broadcast_sd(beta_cast); //multiply A*B by alpha. ymm4 = _mm256_mul_pd(ymm4, ymm0); ymm5 = _mm256_mul_pd(ymm5, ymm0); ymm6 = _mm256_mul_pd(ymm6, ymm0); ymm8 = _mm256_mul_pd(ymm8, ymm0); ymm9 = _mm256_mul_pd(ymm9, ymm0); ymm10 = _mm256_mul_pd(ymm10, ymm0); ymm12 = _mm256_mul_pd(ymm12, ymm0); ymm13 = _mm256_mul_pd(ymm13, ymm0); ymm14 = _mm256_mul_pd(ymm14, ymm0); // multiply C by beta and accumulate. /*ymm2 = _mm256_loadu_pd(tC); ymm4 = _mm256_fmadd_pd(ymm2, ymm1, ymm4); ymm2 = _mm256_loadu_pd(tC + 4); ymm5 = _mm256_fmadd_pd(ymm2, ymm1, ymm5); ymm2 = _mm256_loadu_pd(tC + 8); ymm6 = _mm256_fmadd_pd(ymm2, ymm1, ymm6);*/ _mm256_storeu_pd(tC, ymm4); _mm256_storeu_pd(tC + 4, ymm5); _mm256_storeu_pd(tC + 8, ymm6); // multiply C by beta and accumulate. tC += ldc; /*ymm2 = _mm256_loadu_pd(tC); ymm8 = _mm256_fmadd_pd(ymm2, ymm1, ymm8); ymm2 = _mm256_loadu_pd(tC + 4); ymm9 = _mm256_fmadd_pd(ymm2, ymm1, ymm9); ymm2 = _mm256_loadu_pd(tC + 8); ymm10 = _mm256_fmadd_pd(ymm2, ymm1, ymm10);*/ _mm256_storeu_pd(tC, ymm8); _mm256_storeu_pd(tC + 4, ymm9); _mm256_storeu_pd(tC + 8, ymm10); // multiply C by beta and accumulate. tC += ldc; /*ymm2 = _mm256_loadu_pd(tC); ymm12 = _mm256_fmadd_pd(ymm2, ymm1, ymm12); ymm2 = _mm256_loadu_pd(tC + 4); ymm13 = _mm256_fmadd_pd(ymm2, ymm1, ymm13); ymm2 = _mm256_loadu_pd(tC + 8); ymm14 = _mm256_fmadd_pd(ymm2, ymm1, ymm14);*/ _mm256_storeu_pd(tC, ymm12); _mm256_storeu_pd(tC + 4, ymm13); _mm256_storeu_pd(tC + 8, ymm14); } n_remainder = N - col_idx; // if the N is not multiple of 3. // handling edge case. if (n_remainder == 2) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; // clear scratch registers. ymm8 = _mm256_setzero_pd(); ymm9 = _mm256_setzero_pd(); ymm10 = _mm256_setzero_pd(); ymm12 = _mm256_setzero_pd(); ymm13 = _mm256_setzero_pd(); ymm14 = _mm256_setzero_pd(); for (k = 0; k < K; ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_sd(tB + tb_inc_col * 0); ymm1 = _mm256_broadcast_sd(tB + tb_inc_col * 1); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_pd(tA); ymm8 = _mm256_fmadd_pd(ymm0, ymm3, ymm8); ymm12 = _mm256_fmadd_pd(ymm1, ymm3, ymm12); ymm3 = _mm256_loadu_pd(tA + 4); ymm9 = _mm256_fmadd_pd(ymm0, ymm3, ymm9); ymm13 = _mm256_fmadd_pd(ymm1, ymm3, ymm13); ymm3 = _mm256_loadu_pd(tA + 8); ymm10 = _mm256_fmadd_pd(ymm0, ymm3, ymm10); ymm14 = _mm256_fmadd_pd(ymm1, ymm3, ymm14); tA += lda; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_sd(alpha_cast); //ymm1 = _mm256_broadcast_sd(beta_cast); //multiply A*B by alpha. ymm8 = _mm256_mul_pd(ymm8, ymm0); ymm9 = _mm256_mul_pd(ymm9, ymm0); ymm10 = _mm256_mul_pd(ymm10, ymm0); ymm12 = _mm256_mul_pd(ymm12, ymm0); ymm13 = _mm256_mul_pd(ymm13, ymm0); ymm14 = _mm256_mul_pd(ymm14, ymm0); // multiply C by beta and accumulate. /*ymm2 = _mm256_loadu_pd(tC + 0); ymm8 = _mm256_fmadd_pd(ymm2, ymm1, ymm8); ymm2 = _mm256_loadu_pd(tC + 4); ymm9 = _mm256_fmadd_pd(ymm2, ymm1, ymm9); ymm2 = _mm256_loadu_pd(tC + 8); ymm10 = _mm256_fmadd_pd(ymm2, ymm1, ymm10);*/ _mm256_storeu_pd(tC + 0, ymm8); _mm256_storeu_pd(tC + 4, ymm9); _mm256_storeu_pd(tC + 8, ymm10); // multiply C by beta and accumulate. tC += ldc; /*ymm2 = _mm256_loadu_pd(tC); ymm12 = _mm256_fmadd_pd(ymm2, ymm1, ymm12); ymm2 = _mm256_loadu_pd(tC + 4); ymm13 = _mm256_fmadd_pd(ymm2, ymm1, ymm13); ymm2 = _mm256_loadu_pd(tC + 8); ymm14 = _mm256_fmadd_pd(ymm2, ymm1, ymm14);*/ _mm256_storeu_pd(tC, ymm12); _mm256_storeu_pd(tC + 4, ymm13); _mm256_storeu_pd(tC + 8, ymm14); col_idx += 2; } // if the N is not multiple of 3. // handling edge case. if (n_remainder == 1) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; // clear scratch registers. ymm12 = _mm256_setzero_pd(); ymm13 = _mm256_setzero_pd(); ymm14 = _mm256_setzero_pd(); for (k = 0; k < K; ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_sd(tB + tb_inc_col * 0); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_pd(tA); ymm12 = _mm256_fmadd_pd(ymm0, ymm3, ymm12); ymm3 = _mm256_loadu_pd(tA + 4); ymm13 = _mm256_fmadd_pd(ymm0, ymm3, ymm13); ymm3 = _mm256_loadu_pd(tA + 8); ymm14 = _mm256_fmadd_pd(ymm0, ymm3, ymm14); tA += lda; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_sd(alpha_cast); //ymm1 = _mm256_broadcast_sd(beta_cast); //multiply A*B by alpha. ymm12 = _mm256_mul_pd(ymm12, ymm0); ymm13 = _mm256_mul_pd(ymm13, ymm0); ymm14 = _mm256_mul_pd(ymm14, ymm0); // multiply C by beta and accumulate. /*ymm2 = _mm256_loadu_pd(tC + 0); ymm12 = _mm256_fmadd_pd(ymm2, ymm1, ymm12); ymm2 = _mm256_loadu_pd(tC + 4); ymm13 = _mm256_fmadd_pd(ymm2, ymm1, ymm13); ymm2 = _mm256_loadu_pd(tC + 8); ymm14 = _mm256_fmadd_pd(ymm2, ymm1, ymm14);*/ _mm256_storeu_pd(tC + 0, ymm12); _mm256_storeu_pd(tC + 4, ymm13); _mm256_storeu_pd(tC + 8, ymm14); } row_idx += 12; } if (m_remainder >= 8) { m_remainder -= 8; for (col_idx = 0; (col_idx + 2) < N; col_idx += 3) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; // clear scratch registers. ymm4 = _mm256_setzero_pd(); ymm5 = _mm256_setzero_pd(); ymm6 = _mm256_setzero_pd(); ymm7 = _mm256_setzero_pd(); ymm8 = _mm256_setzero_pd(); ymm9 = _mm256_setzero_pd(); for (k = 0; k < K; ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_sd(tB + tb_inc_col * 0); ymm1 = _mm256_broadcast_sd(tB + tb_inc_col * 1); ymm2 = _mm256_broadcast_sd(tB + tb_inc_col * 2); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_pd(tA); ymm4 = _mm256_fmadd_pd(ymm0, ymm3, ymm4); ymm6 = _mm256_fmadd_pd(ymm1, ymm3, ymm6); ymm8 = _mm256_fmadd_pd(ymm2, ymm3, ymm8); ymm3 = _mm256_loadu_pd(tA + 4); ymm5 = _mm256_fmadd_pd(ymm0, ymm3, ymm5); ymm7 = _mm256_fmadd_pd(ymm1, ymm3, ymm7); ymm9 = _mm256_fmadd_pd(ymm2, ymm3, ymm9); tA += lda; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_sd(alpha_cast); //ymm1 = _mm256_broadcast_sd(beta_cast); //multiply A*B by alpha. ymm4 = _mm256_mul_pd(ymm4, ymm0); ymm5 = _mm256_mul_pd(ymm5, ymm0); ymm6 = _mm256_mul_pd(ymm6, ymm0); ymm7 = _mm256_mul_pd(ymm7, ymm0); ymm8 = _mm256_mul_pd(ymm8, ymm0); ymm9 = _mm256_mul_pd(ymm9, ymm0); // multiply C by beta and accumulate. /*ymm2 = _mm256_loadu_pd(tC); ymm4 = _mm256_fmadd_pd(ymm2, ymm1, ymm4); ymm2 = _mm256_loadu_pd(tC + 4); ymm5 = _mm256_fmadd_pd(ymm2, ymm1, ymm5);*/ _mm256_storeu_pd(tC, ymm4); _mm256_storeu_pd(tC + 4, ymm5); // multiply C by beta and accumulate. tC += ldc; /*ymm2 = _mm256_loadu_pd(tC); ymm6 = _mm256_fmadd_pd(ymm2, ymm1, ymm6); ymm2 = _mm256_loadu_pd(tC + 4); ymm7 = _mm256_fmadd_pd(ymm2, ymm1, ymm7);*/ _mm256_storeu_pd(tC, ymm6); _mm256_storeu_pd(tC + 4, ymm7); // multiply C by beta and accumulate. tC += ldc; /*ymm2 = _mm256_loadu_pd(tC); ymm8 = _mm256_fmadd_pd(ymm2, ymm1, ymm8); ymm2 = _mm256_loadu_pd(tC + 4); ymm9 = _mm256_fmadd_pd(ymm2, ymm1, ymm9);*/ _mm256_storeu_pd(tC, ymm8); _mm256_storeu_pd(tC + 4, ymm9); } n_remainder = N - col_idx; // if the N is not multiple of 3. // handling edge case. if (n_remainder == 2) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; // clear scratch registers. ymm4 = _mm256_setzero_pd(); ymm5 = _mm256_setzero_pd(); ymm6 = _mm256_setzero_pd(); ymm7 = _mm256_setzero_pd(); for (k = 0; k < K; ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_sd(tB + tb_inc_col * 0); ymm1 = _mm256_broadcast_sd(tB + tb_inc_col * 1); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_pd(tA); ymm4 = _mm256_fmadd_pd(ymm0, ymm3, ymm4); ymm6 = _mm256_fmadd_pd(ymm1, ymm3, ymm6); ymm3 = _mm256_loadu_pd(tA + 4); ymm5 = _mm256_fmadd_pd(ymm0, ymm3, ymm5); ymm7 = _mm256_fmadd_pd(ymm1, ymm3, ymm7); tA += lda; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_sd(alpha_cast); //ymm1 = _mm256_broadcast_sd(beta_cast); //multiply A*B by alpha. ymm4 = _mm256_mul_pd(ymm4, ymm0); ymm5 = _mm256_mul_pd(ymm5, ymm0); ymm6 = _mm256_mul_pd(ymm6, ymm0); ymm7 = _mm256_mul_pd(ymm7, ymm0); // multiply C by beta and accumulate. /*ymm2 = _mm256_loadu_pd(tC); ymm4 = _mm256_fmadd_pd(ymm2, ymm1, ymm4); ymm2 = _mm256_loadu_pd(tC + 4); ymm5 = _mm256_fmadd_pd(ymm2, ymm1, ymm5);*/ _mm256_storeu_pd(tC, ymm4); _mm256_storeu_pd(tC + 4, ymm5); // multiply C by beta and accumulate. tC += ldc; /*ymm2 = _mm256_loadu_pd(tC); ymm6 = _mm256_fmadd_pd(ymm2, ymm1, ymm6); ymm2 = _mm256_loadu_pd(tC + 4); ymm7 = _mm256_fmadd_pd(ymm2, ymm1, ymm7);*/ _mm256_storeu_pd(tC, ymm6); _mm256_storeu_pd(tC + 4, ymm7); col_idx += 2; } // if the N is not multiple of 3. // handling edge case. if (n_remainder == 1) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; ymm4 = _mm256_setzero_pd(); ymm5 = _mm256_setzero_pd(); for (k = 0; k < K; ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_sd(tB + tb_inc_col * 0); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_pd(tA); ymm4 = _mm256_fmadd_pd(ymm0, ymm3, ymm4); ymm3 = _mm256_loadu_pd(tA + 4); ymm5 = _mm256_fmadd_pd(ymm0, ymm3, ymm5); tA += lda; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_sd(alpha_cast); //ymm1 = _mm256_broadcast_sd(beta_cast); ymm4 = _mm256_mul_pd(ymm4, ymm0); ymm5 = _mm256_mul_pd(ymm5, ymm0); // multiply C by beta and accumulate. /*ymm2 = _mm256_loadu_pd(tC); ymm4 = _mm256_fmadd_pd(ymm2, ymm1, ymm4); ymm2 = _mm256_loadu_pd(tC + 4); ymm5 = _mm256_fmadd_pd(ymm2, ymm1, ymm5);*/ _mm256_storeu_pd(tC, ymm4); _mm256_storeu_pd(tC + 4, ymm5); } row_idx += 8; } if (m_remainder >= 4) { m_remainder -= 4; for (col_idx = 0; (col_idx + 2) < N; col_idx += 3) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; // clear scratch registers. ymm4 = _mm256_setzero_pd(); ymm5 = _mm256_setzero_pd(); ymm6 = _mm256_setzero_pd(); for (k = 0; k < K; ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_sd(tB + tb_inc_col * 0); ymm1 = _mm256_broadcast_sd(tB + tb_inc_col * 1); ymm2 = _mm256_broadcast_sd(tB + tb_inc_col * 2); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_pd(tA); ymm4 = _mm256_fmadd_pd(ymm0, ymm3, ymm4); ymm5 = _mm256_fmadd_pd(ymm1, ymm3, ymm5); ymm6 = _mm256_fmadd_pd(ymm2, ymm3, ymm6); tA += lda; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_sd(alpha_cast); //ymm1 = _mm256_broadcast_sd(beta_cast); //multiply A*B by alpha. ymm4 = _mm256_mul_pd(ymm4, ymm0); ymm5 = _mm256_mul_pd(ymm5, ymm0); ymm6 = _mm256_mul_pd(ymm6, ymm0); // multiply C by beta and accumulate. /*ymm2 = _mm256_loadu_pd(tC); ymm4 = _mm256_fmadd_pd(ymm2, ymm1, ymm4);*/ _mm256_storeu_pd(tC, ymm4); // multiply C by beta and accumulate. tC += ldc; /*ymm2 = _mm256_loadu_pd(tC); ymm5 = _mm256_fmadd_pd(ymm2, ymm1, ymm5);*/ _mm256_storeu_pd(tC, ymm5); // multiply C by beta and accumulate. tC += ldc; /*ymm2 = _mm256_loadu_pd(tC); ymm6 = _mm256_fmadd_pd(ymm2, ymm1, ymm6);*/ _mm256_storeu_pd(tC, ymm6); } n_remainder = N - col_idx; // if the N is not multiple of 3. // handling edge case. if (n_remainder == 2) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; ymm4 = _mm256_setzero_pd(); ymm5 = _mm256_setzero_pd(); for (k = 0; k < K; ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_sd(tB + tb_inc_col * 0); ymm1 = _mm256_broadcast_sd(tB + tb_inc_col * 1); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_pd(tA); ymm4 = _mm256_fmadd_pd(ymm0, ymm3, ymm4); ymm5 = _mm256_fmadd_pd(ymm1, ymm3, ymm5); tA += lda; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_sd(alpha_cast); //ymm1 = _mm256_broadcast_sd(beta_cast); //multiply A*B by alpha. ymm4 = _mm256_mul_pd(ymm4, ymm0); ymm5 = _mm256_mul_pd(ymm5, ymm0); // multiply C by beta and accumulate. /*ymm2 = _mm256_loadu_pd(tC); ymm4 = _mm256_fmadd_pd(ymm2, ymm1, ymm4);*/ _mm256_storeu_pd(tC, ymm4); // multiply C by beta and accumulate. tC += ldc; /*ymm2 = _mm256_loadu_pd(tC); ymm5 = _mm256_fmadd_pd(ymm2, ymm1, ymm5);*/ _mm256_storeu_pd(tC, ymm5); col_idx += 2; } // if the N is not multiple of 3. // handling edge case. if (n_remainder == 1) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; ymm4 = _mm256_setzero_pd(); for (k = 0; k < K; ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_sd(tB + tb_inc_col * 0); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_pd(tA); ymm4 = _mm256_fmadd_pd(ymm0, ymm3, ymm4); tA += lda; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_sd(alpha_cast); //ymm1 = _mm256_broadcast_sd(beta_cast); ymm4 = _mm256_mul_pd(ymm4, ymm0); // multiply C by beta and accumulate. /*ymm2 = _mm256_loadu_pd(tC); ymm4 = _mm256_fmadd_pd(ymm2, ymm1, ymm4);*/ _mm256_storeu_pd(tC, ymm4); } row_idx += 4; } // M is not a multiple of 32. // The handling of edge case where the remainder // dimension is less than 8. The padding takes place // to handle this case. if ((m_remainder) && (lda > 3)) { double f_temp[8]; for (col_idx = 0; (col_idx + 2) < N; col_idx += 3) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; // clear scratch registers. ymm5 = _mm256_setzero_pd(); ymm7 = _mm256_setzero_pd(); ymm9 = _mm256_setzero_pd(); for (k = 0; k < (K - 1); ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_sd(tB + tb_inc_col * 0); ymm1 = _mm256_broadcast_sd(tB + tb_inc_col * 1); ymm2 = _mm256_broadcast_sd(tB + tb_inc_col * 2); tB += tb_inc_row; //broadcasted matrix B elements are multiplied //with matrix A columns. ymm3 = _mm256_loadu_pd(tA); ymm5 = _mm256_fmadd_pd(ymm0, ymm3, ymm5); ymm7 = _mm256_fmadd_pd(ymm1, ymm3, ymm7); ymm9 = _mm256_fmadd_pd(ymm2, ymm3, ymm9); tA += lda; } // alpha, beta multiplication. ymm0 = _mm256_broadcast_sd(tB + tb_inc_col * 0); ymm1 = _mm256_broadcast_sd(tB + tb_inc_col * 1); ymm2 = _mm256_broadcast_sd(tB + tb_inc_col * 2); tB += tb_inc_row; for (int i = 0; i < m_remainder; i++) { f_temp[i] = tA[i]; } ymm3 = _mm256_loadu_pd(f_temp); ymm5 = _mm256_fmadd_pd(ymm0, ymm3, ymm5); ymm7 = _mm256_fmadd_pd(ymm1, ymm3, ymm7); ymm9 = _mm256_fmadd_pd(ymm2, ymm3, ymm9); ymm0 = _mm256_broadcast_sd(alpha_cast); //ymm1 = _mm256_broadcast_sd(beta_cast); //multiply A*B by alpha. ymm5 = _mm256_mul_pd(ymm5, ymm0); ymm7 = _mm256_mul_pd(ymm7, ymm0); ymm9 = _mm256_mul_pd(ymm9, ymm0); /*for (int i = 0; i < m_remainder; i++) { f_temp[i] = tC[i]; } ymm2 = _mm256_loadu_pd(f_temp); ymm5 = _mm256_fmadd_pd(ymm2, ymm1, ymm5);*/ _mm256_storeu_pd(f_temp, ymm5); for (int i = 0; i < m_remainder; i++) { tC[i] = f_temp[i]; } tC += ldc; /*for (int i = 0; i < m_remainder; i++) { f_temp[i] = tC[i]; } ymm2 = _mm256_loadu_pd(f_temp); ymm7 = _mm256_fmadd_pd(ymm2, ymm1, ymm7);*/ _mm256_storeu_pd(f_temp, ymm7); for (int i = 0; i < m_remainder; i++) { tC[i] = f_temp[i]; } tC += ldc; /*for (int i = 0; i < m_remainder; i++) { f_temp[i] = tC[i]; } ymm2 = _mm256_loadu_pd(f_temp); ymm9 = _mm256_fmadd_pd(ymm2, ymm1, ymm9);*/ _mm256_storeu_pd(f_temp, ymm9); for (int i = 0; i < m_remainder; i++) { tC[i] = f_temp[i]; } } n_remainder = N - col_idx; // if the N is not multiple of 3. // handling edge case. if (n_remainder == 2) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; ymm5 = _mm256_setzero_pd(); ymm7 = _mm256_setzero_pd(); for (k = 0; k < (K - 1); ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_sd(tB + tb_inc_col * 0); ymm1 = _mm256_broadcast_sd(tB + tb_inc_col * 1); tB += tb_inc_row; ymm3 = _mm256_loadu_pd(tA); ymm5 = _mm256_fmadd_pd(ymm0, ymm3, ymm5); ymm7 = _mm256_fmadd_pd(ymm1, ymm3, ymm7); tA += lda; } ymm0 = _mm256_broadcast_sd(tB + tb_inc_col * 0); ymm1 = _mm256_broadcast_sd(tB + tb_inc_col * 1); tB += tb_inc_row; for (int i = 0; i < m_remainder; i++) { f_temp[i] = tA[i]; } ymm3 = _mm256_loadu_pd(f_temp); ymm5 = _mm256_fmadd_pd(ymm0, ymm3, ymm5); ymm7 = _mm256_fmadd_pd(ymm1, ymm3, ymm7); ymm0 = _mm256_broadcast_sd(alpha_cast); //ymm1 = _mm256_broadcast_sd(beta_cast); ymm5 = _mm256_mul_pd(ymm5, ymm0); ymm7 = _mm256_mul_pd(ymm7, ymm0); /*for (int i = 0; i < m_remainder; i++) { f_temp[i] = tC[i]; } ymm2 = _mm256_loadu_pd(f_temp); ymm5 = _mm256_fmadd_pd(ymm2, ymm1, ymm5);*/ _mm256_storeu_pd(f_temp, ymm5); for (int i = 0; i < m_remainder; i++) { tC[i] = f_temp[i]; } tC += ldc; /*for (int i = 0; i < m_remainder; i++) { f_temp[i] = tC[i]; } ymm2 = _mm256_loadu_pd(f_temp); ymm7 = _mm256_fmadd_pd(ymm2, ymm1, ymm7);*/ _mm256_storeu_pd(f_temp, ymm7); for (int i = 0; i < m_remainder; i++) { tC[i] = f_temp[i]; } } // if the N is not multiple of 3. // handling edge case. if (n_remainder == 1) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; ymm5 = _mm256_setzero_pd(); for (k = 0; k < (K - 1); ++k) { // The inner loop broadcasts the B matrix data and // multiplies it with the A matrix. ymm0 = _mm256_broadcast_sd(tB + tb_inc_col * 0); tB += tb_inc_row; ymm3 = _mm256_loadu_pd(tA); ymm5 = _mm256_fmadd_pd(ymm0, ymm3, ymm5); tA += lda; } ymm0 = _mm256_broadcast_sd(tB + tb_inc_col * 0); tB += tb_inc_row; for (int i = 0; i < m_remainder; i++) { f_temp[i] = tA[i]; } ymm3 = _mm256_loadu_pd(f_temp); ymm5 = _mm256_fmadd_pd(ymm0, ymm3, ymm5); ymm0 = _mm256_broadcast_sd(alpha_cast); //ymm1 = _mm256_broadcast_sd(beta_cast); // multiply C by beta and accumulate. ymm5 = _mm256_mul_pd(ymm5, ymm0); /*for (int i = 0; i < m_remainder; i++) { f_temp[i] = tC[i]; } ymm2 = _mm256_loadu_pd(f_temp); ymm5 = _mm256_fmadd_pd(ymm2, ymm1, ymm5);*/ _mm256_storeu_pd(f_temp, ymm5); for (int i = 0; i < m_remainder; i++) { tC[i] = f_temp[i]; } } m_remainder = 0; } if (m_remainder) { double result; for (; row_idx < M; row_idx += 1) { for (col_idx = 0; col_idx < N; col_idx += 1) { //pointer math to point to proper memory tC = C + ldc * col_idx + row_idx; tB = B + tb_inc_col * col_idx; tA = A + row_idx; result = 0; for (k = 0; k < K; ++k) { result += (*tA) * (*tB); tA += lda; tB += tb_inc_row; } result *= (*alpha_cast); (*tC) = /*(*tC) * (*beta_cast) + */result; } } } //copy/compute sryk values back to C using SIMD if ( bli_seq0( *beta_cast ) ) {//just copy for beta = 0 dim_t _i, _j, k, _l; if(bli_obj_is_lower(c)) //c is lower { //first column _j = 0; k = M >> 2; _i = 0; for ( _l = 0; _l < k; _l++ ) { ymm0 = _mm256_loadu_pd((C + _i*rsc)); _mm256_storeu_pd((matCbuf + _i*rs_matC), ymm0); _i += 4; } while (_i < M ) { bli_ddcopys( *(C + _i*rsc + _j*ldc), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); _i++; } _j++; while ( _j < N ) //next column { //k = (_j + (4 - (_j & 3))); _l = _j & 3; k = (_l != 0) ? (_j + (4 - _l)) : _j; k = (k <= M) ? k : M; for ( _i = _j; _i < k; ++_i ) { bli_ddcopys( *(C + _i*rsc + _j*ldc), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); } k = (M - _i) >> 2; _l = 0; while ( _l < k ) { ymm0 = _mm256_loadu_pd((C + _i*rsc + _j*ldc)); _mm256_storeu_pd((matCbuf + _i*rs_matC + _j*ldc_matC), ymm0); _i += 4; _l++; } while (_i < M ) { bli_ddcopys( *(C + _i*rsc + _j*ldc), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); _i++; } _j++; } } else //c is upper { for ( _j = 0; _j < N; ++_j ) { k = (_j + 1) >> 2; _i = 0; _l = 0; while ( _l < k ) { ymm0 = _mm256_loadu_pd((C + _i*rsc + _j*ldc)); _mm256_storeu_pd((matCbuf + _i*rs_matC + _j*ldc_matC), ymm0); _i += 4; _l++; } while (_i <= _j ) { bli_ddcopys( *(C + _i*rsc + _j*ldc), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); ++_i; } } } } else {//when beta is non-zero, fmadd and store the results dim_t _i, _j, k, _l; ymm1 = _mm256_broadcast_sd(beta_cast); if(bli_obj_is_lower(c)) //c is lower { //first column _j = 0; k = M >> 2; _i = 0; for ( _l = 0; _l < k; _l++ ) { ymm2 = _mm256_loadu_pd((matCbuf + _i*rs_matC)); ymm0 = _mm256_loadu_pd((C + _i*rsc)); ymm0 = _mm256_fmadd_pd(ymm2, ymm1, ymm0); _mm256_storeu_pd((matCbuf + _i*rs_matC), ymm0); _i += 4; } while (_i < M ) { bli_dddxpbys( *(C + _i*rsc + _j*ldc), *(beta_cast), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); _i++; } _j++; while ( _j < N ) //next column { //k = (_j + (4 - (_j & 3))); _l = _j & 3; k = (_l != 0) ? (_j + (4 - _l)) : _j; k = (k <= M) ? k : M; for ( _i = _j; _i < k; ++_i ) { bli_dddxpbys( *(C + _i*rsc + _j*ldc), *(beta_cast), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); } k = (M - _i) >> 2; _l = 0; while ( _l < k ) { ymm2 = _mm256_loadu_pd((matCbuf + _i*rs_matC + _j*ldc_matC)); ymm0 = _mm256_loadu_pd((C + _i*rsc + _j*ldc)); ymm0 = _mm256_fmadd_pd(ymm2, ymm1, ymm0); _mm256_storeu_pd((matCbuf + _i*rs_matC + _j*ldc_matC), ymm0); _i += 4; _l++; } while (_i < M ) { bli_dddxpbys( *(C + _i*rsc + _j*ldc), *(beta_cast), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); _i++; } _j++; } } else //c is upper { for ( _j = 0; _j < N; ++_j ) { k = (_j + 1) >> 2; _i = 0; _l = 0; while ( _l < k ) { ymm2 = _mm256_loadu_pd((matCbuf + _i*rs_matC + _j*ldc_matC)); ymm0 = _mm256_loadu_pd((C + _i*rsc + _j*ldc)); ymm0 = _mm256_fmadd_pd(ymm2, ymm1, ymm0); _mm256_storeu_pd((matCbuf + _i*rs_matC + _j*ldc_matC), ymm0); _i += 4; _l++; } while (_i <= _j ) { bli_dddxpbys( *(C + _i*rsc + _j*ldc), *(beta_cast), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); ++_i; } } } } return BLIS_SUCCESS; } else return BLIS_NONCONFORMAL_DIMENSIONS; }; static err_t bli_ssyrk_small_atbn ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, cntl_t* cntl ) { int M = bli_obj_length(c); // number of rows of Matrix C int N = bli_obj_width(c); // number of columns of Matrix C int K = bli_obj_length(b); // number of rows of Matrix B int lda = bli_obj_col_stride(a); // column stride of matrix OP(A), where OP(A) is Transpose(A) if transA enabled. int ldb = bli_obj_col_stride(b); // column stride of matrix OP(B), where OP(B) is Transpose(B) if transB enabled. int ldc_matC = bli_obj_col_stride( c ); // column stride of matrix C int ldc = M;//bli_obj_col_stride( c ); // column stride of static buffer for matrix C int row_idx = 0, col_idx = 0, k; int rs_matC = bli_obj_row_stride( c ); int rsc = 1; float *A = a->buffer; // pointer to matrix A elements, stored in row major format float *B = b->buffer; // pointer to matrix B elements, stored in column major format float *C = C_pack; // pointer to matrix C elements, stored in column major format float *matCbuf = c->buffer; float *tA = A, *tB = B, *tC = C; __m256 ymm4, ymm5, ymm6, ymm7; __m256 ymm8, ymm9, ymm10, ymm11; __m256 ymm12, ymm13, ymm14, ymm15; __m256 ymm0, ymm1, ymm2, ymm3; float result, scratch[8]; float *alpha_cast, *beta_cast; // alpha, beta multiples alpha_cast = (alpha->buffer); beta_cast = (beta->buffer); // The non-copy version of the A^T SYRK gives better performance for the small M cases. // The threshold is controlled by BLIS_ATBN_M_THRES if (M <= BLIS_ATBN_M_THRES) { for (col_idx = 0; (col_idx + (NR - 1)) < N; col_idx += NR) { for (row_idx = 0; (row_idx + (AT_MR - 1)) < M; row_idx += AT_MR) { tA = A + row_idx * lda; tB = B + col_idx * ldb; tC = C + col_idx * ldc + row_idx; // clear scratch registers. ymm4 = _mm256_setzero_ps(); ymm5 = _mm256_setzero_ps(); ymm6 = _mm256_setzero_ps(); ymm7 = _mm256_setzero_ps(); ymm8 = _mm256_setzero_ps(); ymm9 = _mm256_setzero_ps(); ymm10 = _mm256_setzero_ps(); ymm11 = _mm256_setzero_ps(); ymm12 = _mm256_setzero_ps(); ymm13 = _mm256_setzero_ps(); ymm14 = _mm256_setzero_ps(); ymm15 = _mm256_setzero_ps(); //The inner loop computes the 4x3 values of the matrix. //The computation pattern is: // ymm4 ymm5 ymm6 // ymm7 ymm8 ymm9 // ymm10 ymm11 ymm12 // ymm13 ymm14 ymm15 //The Dot operation is performed in the inner loop, 8 float elements fit //in the YMM register hence loop count incremented by 8 for (k = 0; (k + 7) < K; k += 8) { ymm0 = _mm256_loadu_ps(tB + 0); ymm1 = _mm256_loadu_ps(tB + ldb); ymm2 = _mm256_loadu_ps(tB + 2 * ldb); ymm3 = _mm256_loadu_ps(tA); ymm4 = _mm256_fmadd_ps(ymm0, ymm3, ymm4); ymm5 = _mm256_fmadd_ps(ymm1, ymm3, ymm5); ymm6 = _mm256_fmadd_ps(ymm2, ymm3, ymm6); ymm3 = _mm256_loadu_ps(tA + lda); ymm7 = _mm256_fmadd_ps(ymm0, ymm3, ymm7); ymm8 = _mm256_fmadd_ps(ymm1, ymm3, ymm8); ymm9 = _mm256_fmadd_ps(ymm2, ymm3, ymm9); ymm3 = _mm256_loadu_ps(tA + 2 * lda); ymm10 = _mm256_fmadd_ps(ymm0, ymm3, ymm10); ymm11 = _mm256_fmadd_ps(ymm1, ymm3, ymm11); ymm12 = _mm256_fmadd_ps(ymm2, ymm3, ymm12); ymm3 = _mm256_loadu_ps(tA + 3 * lda); ymm13 = _mm256_fmadd_ps(ymm0, ymm3, ymm13); ymm14 = _mm256_fmadd_ps(ymm1, ymm3, ymm14); ymm15 = _mm256_fmadd_ps(ymm2, ymm3, ymm15); tA += 8; tB += 8; } // if K is not a multiple of 8, padding is done before load using temproary array. if (k < K) { int iter; float data_feeder[8] = { 0.0 }; for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tB[iter]; ymm0 = _mm256_loadu_ps(data_feeder); for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tB[iter + ldb]; ymm1 = _mm256_loadu_ps(data_feeder); for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tB[iter + 2 * ldb]; ymm2 = _mm256_loadu_ps(data_feeder); for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tA[iter]; ymm3 = _mm256_loadu_ps(data_feeder); ymm4 = _mm256_fmadd_ps(ymm0, ymm3, ymm4); ymm5 = _mm256_fmadd_ps(ymm1, ymm3, ymm5); ymm6 = _mm256_fmadd_ps(ymm2, ymm3, ymm6); for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tA[lda + iter]; ymm3 = _mm256_loadu_ps(data_feeder); ymm7 = _mm256_fmadd_ps(ymm0, ymm3, ymm7); ymm8 = _mm256_fmadd_ps(ymm1, ymm3, ymm8); ymm9 = _mm256_fmadd_ps(ymm2, ymm3, ymm9); for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tA[2 * lda + iter]; ymm3 = _mm256_loadu_ps(data_feeder); ymm10 = _mm256_fmadd_ps(ymm0, ymm3, ymm10); ymm11 = _mm256_fmadd_ps(ymm1, ymm3, ymm11); ymm12 = _mm256_fmadd_ps(ymm2, ymm3, ymm12); for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tA[3 * lda + iter]; ymm3 = _mm256_loadu_ps(data_feeder); ymm13 = _mm256_fmadd_ps(ymm0, ymm3, ymm13); ymm14 = _mm256_fmadd_ps(ymm1, ymm3, ymm14); ymm15 = _mm256_fmadd_ps(ymm2, ymm3, ymm15); } //horizontal addition and storage of the data. //Results for 4x3 blocks of C is stored here ymm4 = _mm256_hadd_ps(ymm4, ymm4); ymm4 = _mm256_hadd_ps(ymm4, ymm4); _mm256_storeu_ps(scratch, ymm4); result = scratch[0] + scratch[4]; result *= (*alpha_cast); tC[0] = result/* + tC[0] * (*beta_cast)*/; ymm7 = _mm256_hadd_ps(ymm7, ymm7); ymm7 = _mm256_hadd_ps(ymm7, ymm7); _mm256_storeu_ps(scratch, ymm7); result = scratch[0] + scratch[4]; result *= (*alpha_cast); tC[1] = result/* + tC[1] * (*beta_cast)*/; ymm10 = _mm256_hadd_ps(ymm10, ymm10); ymm10 = _mm256_hadd_ps(ymm10, ymm10); _mm256_storeu_ps(scratch, ymm10); result = scratch[0] + scratch[4]; result *= (*alpha_cast); tC[2] = result/* + tC[2] * (*beta_cast)*/; ymm13 = _mm256_hadd_ps(ymm13, ymm13); ymm13 = _mm256_hadd_ps(ymm13, ymm13); _mm256_storeu_ps(scratch, ymm13); result = scratch[0] + scratch[4]; result *= (*alpha_cast); tC[3] = result/* + tC[3] * (*beta_cast)*/; tC += ldc; ymm5 = _mm256_hadd_ps(ymm5, ymm5); ymm5 = _mm256_hadd_ps(ymm5, ymm5); _mm256_storeu_ps(scratch, ymm5); result = scratch[0] + scratch[4]; result *= (*alpha_cast); tC[0] = result/* + tC[0] * (*beta_cast)*/; ymm8 = _mm256_hadd_ps(ymm8, ymm8); ymm8 = _mm256_hadd_ps(ymm8, ymm8); _mm256_storeu_ps(scratch, ymm8); result = scratch[0] + scratch[4]; result *= (*alpha_cast); tC[1] = result/* + tC[1] * (*beta_cast)*/; ymm11 = _mm256_hadd_ps(ymm11, ymm11); ymm11 = _mm256_hadd_ps(ymm11, ymm11); _mm256_storeu_ps(scratch, ymm11); result = scratch[0] + scratch[4]; result *= (*alpha_cast); tC[2] = result/* + tC[2] * (*beta_cast)*/; ymm14 = _mm256_hadd_ps(ymm14, ymm14); ymm14 = _mm256_hadd_ps(ymm14, ymm14); _mm256_storeu_ps(scratch, ymm14); result = scratch[0] + scratch[4]; result *= (*alpha_cast); tC[3] = result/* + tC[3] * (*beta_cast)*/; tC += ldc; ymm6 = _mm256_hadd_ps(ymm6, ymm6); ymm6 = _mm256_hadd_ps(ymm6, ymm6); _mm256_storeu_ps(scratch, ymm6); result = scratch[0] + scratch[4]; result *= (*alpha_cast); tC[0] = result/* + tC[0] * (*beta_cast)*/; ymm9 = _mm256_hadd_ps(ymm9, ymm9); ymm9 = _mm256_hadd_ps(ymm9, ymm9); _mm256_storeu_ps(scratch, ymm9); result = scratch[0] + scratch[4]; result *= (*alpha_cast); tC[1] = result/* + tC[1] * (*beta_cast)*/; ymm12 = _mm256_hadd_ps(ymm12, ymm12); ymm12 = _mm256_hadd_ps(ymm12, ymm12); _mm256_storeu_ps(scratch, ymm12); result = scratch[0] + scratch[4]; result *= (*alpha_cast); tC[2] = result/* + tC[2] * (*beta_cast)*/; ymm15 = _mm256_hadd_ps(ymm15, ymm15); ymm15 = _mm256_hadd_ps(ymm15, ymm15); _mm256_storeu_ps(scratch, ymm15); result = scratch[0] + scratch[4]; result *= (*alpha_cast); tC[3] = result/* + tC[3] * (*beta_cast)*/; } } int processed_col = col_idx; int processed_row = row_idx; //The edge case handling where N is not a multiple of 3 if (processed_col < N) { for (col_idx = processed_col; col_idx < N; col_idx += 1) { for (row_idx = 0; (row_idx + (AT_MR - 1)) < M; row_idx += AT_MR) { tA = A + row_idx * lda; tB = B + col_idx * ldb; tC = C + col_idx * ldc + row_idx; // clear scratch registers. ymm4 = _mm256_setzero_ps(); ymm7 = _mm256_setzero_ps(); ymm10 = _mm256_setzero_ps(); ymm13 = _mm256_setzero_ps(); //The inner loop computes the 4x1 values of the matrix. //The computation pattern is: // ymm4 // ymm7 // ymm10 // ymm13 for (k = 0; (k + 7) < K; k += 8) { ymm0 = _mm256_loadu_ps(tB + 0); ymm3 = _mm256_loadu_ps(tA); ymm4 = _mm256_fmadd_ps(ymm0, ymm3, ymm4); ymm3 = _mm256_loadu_ps(tA + lda); ymm7 = _mm256_fmadd_ps(ymm0, ymm3, ymm7); ymm3 = _mm256_loadu_ps(tA + 2 * lda); ymm10 = _mm256_fmadd_ps(ymm0, ymm3, ymm10); ymm3 = _mm256_loadu_ps(tA + 3 * lda); ymm13 = _mm256_fmadd_ps(ymm0, ymm3, ymm13); tA += 8; tB += 8; } // if K is not a multiple of 8, padding is done before load using temproary array. if (k < K) { int iter; float data_feeder[8] = { 0.0 }; for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tB[iter]; ymm0 = _mm256_loadu_ps(data_feeder); for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tA[iter]; ymm3 = _mm256_loadu_ps(data_feeder); ymm4 = _mm256_fmadd_ps(ymm0, ymm3, ymm4); for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tA[lda + iter]; ymm3 = _mm256_loadu_ps(data_feeder); ymm7 = _mm256_fmadd_ps(ymm0, ymm3, ymm7); for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tA[2 * lda + iter]; ymm3 = _mm256_loadu_ps(data_feeder); ymm10 = _mm256_fmadd_ps(ymm0, ymm3, ymm10); for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tA[3 * lda + iter]; ymm3 = _mm256_loadu_ps(data_feeder); ymm13 = _mm256_fmadd_ps(ymm0, ymm3, ymm13); } //horizontal addition and storage of the data. //Results for 4x1 blocks of C is stored here ymm4 = _mm256_hadd_ps(ymm4, ymm4); ymm4 = _mm256_hadd_ps(ymm4, ymm4); _mm256_storeu_ps(scratch, ymm4); result = scratch[0] + scratch[4]; result *= (*alpha_cast); tC[0] = result/* + tC[0] * (*beta_cast)*/; ymm7 = _mm256_hadd_ps(ymm7, ymm7); ymm7 = _mm256_hadd_ps(ymm7, ymm7); _mm256_storeu_ps(scratch, ymm7); result = scratch[0] + scratch[4]; result *= (*alpha_cast); tC[1] = result/* + tC[1] * (*beta_cast)*/; ymm10 = _mm256_hadd_ps(ymm10, ymm10); ymm10 = _mm256_hadd_ps(ymm10, ymm10); _mm256_storeu_ps(scratch, ymm10); result = scratch[0] + scratch[4]; result *= (*alpha_cast); tC[2] = result/* + tC[2] * (*beta_cast)*/; ymm13 = _mm256_hadd_ps(ymm13, ymm13); ymm13 = _mm256_hadd_ps(ymm13, ymm13); _mm256_storeu_ps(scratch, ymm13); result = scratch[0] + scratch[4]; result *= (*alpha_cast); tC[3] = result/* + tC[3] * (*beta_cast)*/; } } processed_row = row_idx; } //The edge case handling where M is not a multiple of 4 if (processed_row < M) { for (row_idx = processed_row; row_idx < M; row_idx += 1) { for (col_idx = 0; col_idx < N; col_idx += 1) { tA = A + row_idx * lda; tB = B + col_idx * ldb; tC = C + col_idx * ldc + row_idx; // clear scratch registers. ymm4 = _mm256_setzero_ps(); for (k = 0; (k + 7) < K; k += 8) { ymm0 = _mm256_loadu_ps(tB + 0); ymm3 = _mm256_loadu_ps(tA); ymm4 = _mm256_fmadd_ps(ymm0, ymm3, ymm4); tA += 8; tB += 8; } // if K is not a multiple of 8, padding is done before load using temproary array. if (k < K) { int iter; float data_feeder[8] = { 0.0 }; for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tB[iter]; ymm0 = _mm256_loadu_ps(data_feeder); for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tA[iter]; ymm3 = _mm256_loadu_ps(data_feeder); ymm4 = _mm256_fmadd_ps(ymm0, ymm3, ymm4); } //horizontal addition and storage of the data. ymm4 = _mm256_hadd_ps(ymm4, ymm4); ymm4 = _mm256_hadd_ps(ymm4, ymm4); _mm256_storeu_ps(scratch, ymm4); result = scratch[0] + scratch[4]; result *= (*alpha_cast); tC[0] = result/* + tC[0] * (*beta_cast)*/; } } } //copy/compute sryk values back to C if ( bli_seq0( *beta_cast ) ) //when beta is 0, just copy result to C { dim_t _i, _j; if(bli_obj_is_lower(c)) //c is lower { for ( _j = 0; _j < N; ++_j ) for ( _i = 0; _i < M; ++_i ) if ( (doff_t)_j - (doff_t)_i <= 0 ) { bli_sscopys( *(C + _i*rsc + _j*ldc), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); } } else //c is upper { for ( _j = 0; _j < N; ++_j ) for ( _i = 0; _i < M; ++_i ) if ( (doff_t)_j - (doff_t)_i >= 0 ) { bli_sscopys( *(C + _i*rsc + _j*ldc), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); } } } else //when beta is non-zero, multiply and store result to C { dim_t _i, _j; if(bli_obj_is_lower(c)) //c is lower { for ( _j = 0; _j < N; ++_j ) for ( _i = 0; _i < M; ++_i ) if ( (doff_t)_j - (doff_t)_i <= 0 ) { bli_sssxpbys( *(C + _i*rsc + _j*ldc), *(beta_cast), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); } } else //c is upper { for ( _j = 0; _j < N; ++_j ) for ( _i = 0; _i < M; ++_i ) if ( (doff_t)_j - (doff_t)_i >= 0 ) { bli_sssxpbys( *(C + _i*rsc + _j*ldc), *(beta_cast), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); } } } return BLIS_SUCCESS; } else return BLIS_NONCONFORMAL_DIMENSIONS; } static err_t bli_dsyrk_small_atbn ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, cntl_t* cntl ) { int M = bli_obj_length( c ); // number of rows of Matrix C int N = bli_obj_width( c ); // number of columns of Matrix C int K = bli_obj_length( b ); // number of rows of Matrix B int lda = bli_obj_col_stride( a ); // column stride of matrix OP(A), where OP(A) is Transpose(A) if transA enabled. int ldb = bli_obj_col_stride( b ); // column stride of matrix OP(B), where OP(B) is Transpose(B) if transB enabled. int ldc_matC = bli_obj_col_stride( c ); // column stride of matrix C int ldc = M;//bli_obj_col_stride( c ); // column stride of static buffer for matrix C int row_idx = 0, col_idx = 0, k; int rs_matC = bli_obj_row_stride( c ); int rsc = 1; double *A = a->buffer; // pointer to matrix A elements, stored in row major format double *B = b->buffer; // pointer to matrix B elements, stored in column major format double *C = D_C_pack; // pointer to matrix C elements, stored in column major format double *matCbuf = c->buffer; double *tA = A, *tB = B, *tC = C; __m256d ymm4, ymm5, ymm6, ymm7; __m256d ymm8, ymm9, ymm10, ymm11; __m256d ymm12, ymm13, ymm14, ymm15; __m256d ymm0, ymm1, ymm2, ymm3; double result, scratch[8]; double *alpha_cast, *beta_cast; // alpha, beta multiples alpha_cast = (alpha->buffer); beta_cast = (beta->buffer); // The non-copy version of the A^T SYRK gives better performance for the small M cases. // The threshold is controlled by BLIS_ATBN_M_THRES if (M <= BLIS_ATBN_M_THRES) { for (col_idx = 0; (col_idx + (NR - 1)) < N; col_idx += NR) { for (row_idx = 0; (row_idx + (AT_MR - 1)) < M; row_idx += AT_MR) { tA = A + row_idx * lda; tB = B + col_idx * ldb; tC = C + col_idx * ldc + row_idx; // clear scratch registers. ymm4 = _mm256_setzero_pd(); ymm5 = _mm256_setzero_pd(); ymm6 = _mm256_setzero_pd(); ymm7 = _mm256_setzero_pd(); ymm8 = _mm256_setzero_pd(); ymm9 = _mm256_setzero_pd(); ymm10 = _mm256_setzero_pd(); ymm11 = _mm256_setzero_pd(); ymm12 = _mm256_setzero_pd(); ymm13 = _mm256_setzero_pd(); ymm14 = _mm256_setzero_pd(); ymm15 = _mm256_setzero_pd(); //The inner loop computes the 4x3 values of the matrix. //The computation pattern is: // ymm4 ymm5 ymm6 // ymm7 ymm8 ymm9 // ymm10 ymm11 ymm12 // ymm13 ymm14 ymm15 //The Dot operation is performed in the inner loop, 4 double elements fit //in the YMM register hence loop count incremented by 4 for (k = 0; (k + 3) < K; k += 4) { ymm0 = _mm256_loadu_pd(tB + 0); ymm1 = _mm256_loadu_pd(tB + ldb); ymm2 = _mm256_loadu_pd(tB + 2 * ldb); ymm3 = _mm256_loadu_pd(tA); ymm4 = _mm256_fmadd_pd(ymm0, ymm3, ymm4); ymm5 = _mm256_fmadd_pd(ymm1, ymm3, ymm5); ymm6 = _mm256_fmadd_pd(ymm2, ymm3, ymm6); ymm3 = _mm256_loadu_pd(tA + lda); ymm7 = _mm256_fmadd_pd(ymm0, ymm3, ymm7); ymm8 = _mm256_fmadd_pd(ymm1, ymm3, ymm8); ymm9 = _mm256_fmadd_pd(ymm2, ymm3, ymm9); ymm3 = _mm256_loadu_pd(tA + 2 * lda); ymm10 = _mm256_fmadd_pd(ymm0, ymm3, ymm10); ymm11 = _mm256_fmadd_pd(ymm1, ymm3, ymm11); ymm12 = _mm256_fmadd_pd(ymm2, ymm3, ymm12); ymm3 = _mm256_loadu_pd(tA + 3 * lda); ymm13 = _mm256_fmadd_pd(ymm0, ymm3, ymm13); ymm14 = _mm256_fmadd_pd(ymm1, ymm3, ymm14); ymm15 = _mm256_fmadd_pd(ymm2, ymm3, ymm15); tA += 4; tB += 4; } // if K is not a multiple of 4, padding is done before load using temproary array. if (k < K) { int iter; double data_feeder[4] = { 0.0 }; for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tB[iter]; ymm0 = _mm256_loadu_pd(data_feeder); for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tB[iter + ldb]; ymm1 = _mm256_loadu_pd(data_feeder); for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tB[iter + 2 * ldb]; ymm2 = _mm256_loadu_pd(data_feeder); for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tA[iter]; ymm3 = _mm256_loadu_pd(data_feeder); ymm4 = _mm256_fmadd_pd(ymm0, ymm3, ymm4); ymm5 = _mm256_fmadd_pd(ymm1, ymm3, ymm5); ymm6 = _mm256_fmadd_pd(ymm2, ymm3, ymm6); for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tA[lda + iter]; ymm3 = _mm256_loadu_pd(data_feeder); ymm7 = _mm256_fmadd_pd(ymm0, ymm3, ymm7); ymm8 = _mm256_fmadd_pd(ymm1, ymm3, ymm8); ymm9 = _mm256_fmadd_pd(ymm2, ymm3, ymm9); for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tA[2 * lda + iter]; ymm3 = _mm256_loadu_pd(data_feeder); ymm10 = _mm256_fmadd_pd(ymm0, ymm3, ymm10); ymm11 = _mm256_fmadd_pd(ymm1, ymm3, ymm11); ymm12 = _mm256_fmadd_pd(ymm2, ymm3, ymm12); for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tA[3 * lda + iter]; ymm3 = _mm256_loadu_pd(data_feeder); ymm13 = _mm256_fmadd_pd(ymm0, ymm3, ymm13); ymm14 = _mm256_fmadd_pd(ymm1, ymm3, ymm14); ymm15 = _mm256_fmadd_pd(ymm2, ymm3, ymm15); } //horizontal addition and storage of the data. //Results for 4x3 blocks of C is stored here ymm4 = _mm256_hadd_pd(ymm4, ymm4); _mm256_storeu_pd(scratch, ymm4); result = scratch[0] + scratch[2]; result *= (*alpha_cast); tC[0] = result/* + tC[0] * (*beta_cast)*/; ymm7 = _mm256_hadd_pd(ymm7, ymm7); _mm256_storeu_pd(scratch, ymm7); result = scratch[0] + scratch[2]; result *= (*alpha_cast); tC[1] = result/* + tC[1] * (*beta_cast)*/; ymm10 = _mm256_hadd_pd(ymm10, ymm10); _mm256_storeu_pd(scratch, ymm10); result = scratch[0] + scratch[2]; result *= (*alpha_cast); tC[2] = result/* + tC[2] * (*beta_cast)*/; ymm13 = _mm256_hadd_pd(ymm13, ymm13); _mm256_storeu_pd(scratch, ymm13); result = scratch[0] + scratch[2]; result *= (*alpha_cast); tC[3] = result/* + tC[3] * (*beta_cast)*/; tC += ldc; ymm5 = _mm256_hadd_pd(ymm5, ymm5); _mm256_storeu_pd(scratch, ymm5); result = scratch[0] + scratch[2]; result *= (*alpha_cast); tC[0] = result/* + tC[0] * (*beta_cast)*/; ymm8 = _mm256_hadd_pd(ymm8, ymm8); _mm256_storeu_pd(scratch, ymm8); result = scratch[0] + scratch[2]; result *= (*alpha_cast); tC[1] = result/* + tC[1] * (*beta_cast)*/; ymm11 = _mm256_hadd_pd(ymm11, ymm11); _mm256_storeu_pd(scratch, ymm11); result = scratch[0] + scratch[2]; result *= (*alpha_cast); tC[2] = result/* + tC[2] * (*beta_cast)*/; ymm14 = _mm256_hadd_pd(ymm14, ymm14); _mm256_storeu_pd(scratch, ymm14); result = scratch[0] + scratch[2]; result *= (*alpha_cast); tC[3] = result/* + tC[3] * (*beta_cast)*/; tC += ldc; ymm6 = _mm256_hadd_pd(ymm6, ymm6); _mm256_storeu_pd(scratch, ymm6); result = scratch[0] + scratch[2]; result *= (*alpha_cast); tC[0] = result/* + tC[0] * (*beta_cast)*/; ymm9 = _mm256_hadd_pd(ymm9, ymm9); _mm256_storeu_pd(scratch, ymm9); result = scratch[0] + scratch[2]; result *= (*alpha_cast); tC[1] = result/* + tC[1] * (*beta_cast)*/; ymm12 = _mm256_hadd_pd(ymm12, ymm12); _mm256_storeu_pd(scratch, ymm12); result = scratch[0] + scratch[2]; result *= (*alpha_cast); tC[2] = result/* + tC[2] * (*beta_cast)*/; ymm15 = _mm256_hadd_pd(ymm15, ymm15); _mm256_storeu_pd(scratch, ymm15); result = scratch[0] + scratch[2]; result *= (*alpha_cast); tC[3] = result/* + tC[3] * (*beta_cast)*/; } } int processed_col = col_idx; int processed_row = row_idx; //The edge case handling where N is not a multiple of 3 if (processed_col < N) { for (col_idx = processed_col; col_idx < N; col_idx += 1) { for (row_idx = 0; (row_idx + (AT_MR - 1)) < M; row_idx += AT_MR) { tA = A + row_idx * lda; tB = B + col_idx * ldb; tC = C + col_idx * ldc + row_idx; // clear scratch registers. ymm4 = _mm256_setzero_pd(); ymm7 = _mm256_setzero_pd(); ymm10 = _mm256_setzero_pd(); ymm13 = _mm256_setzero_pd(); //The inner loop computes the 4x1 values of the matrix. //The computation pattern is: // ymm4 // ymm7 // ymm10 // ymm13 for (k = 0; (k + 3) < K; k += 4) { ymm0 = _mm256_loadu_pd(tB + 0); ymm3 = _mm256_loadu_pd(tA); ymm4 = _mm256_fmadd_pd(ymm0, ymm3, ymm4); ymm3 = _mm256_loadu_pd(tA + lda); ymm7 = _mm256_fmadd_pd(ymm0, ymm3, ymm7); ymm3 = _mm256_loadu_pd(tA + 2 * lda); ymm10 = _mm256_fmadd_pd(ymm0, ymm3, ymm10); ymm3 = _mm256_loadu_pd(tA + 3 * lda); ymm13 = _mm256_fmadd_pd(ymm0, ymm3, ymm13); tA += 4; tB += 4; } // if K is not a multiple of 4, padding is done before load using temproary array. if (k < K) { int iter; double data_feeder[4] = { 0.0 }; for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tB[iter]; ymm0 = _mm256_loadu_pd(data_feeder); for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tA[iter]; ymm3 = _mm256_loadu_pd(data_feeder); ymm4 = _mm256_fmadd_pd(ymm0, ymm3, ymm4); for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tA[lda + iter]; ymm3 = _mm256_loadu_pd(data_feeder); ymm7 = _mm256_fmadd_pd(ymm0, ymm3, ymm7); for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tA[2 * lda + iter]; ymm3 = _mm256_loadu_pd(data_feeder); ymm10 = _mm256_fmadd_pd(ymm0, ymm3, ymm10); for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tA[3 * lda + iter]; ymm3 = _mm256_loadu_pd(data_feeder); ymm13 = _mm256_fmadd_pd(ymm0, ymm3, ymm13); } //horizontal addition and storage of the data. //Results for 4x1 blocks of C is stored here ymm4 = _mm256_hadd_pd(ymm4, ymm4); _mm256_storeu_pd(scratch, ymm4); result = scratch[0] + scratch[2]; result *= (*alpha_cast); tC[0] = result/* + tC[0] * (*beta_cast)*/; ymm7 = _mm256_hadd_pd(ymm7, ymm7); _mm256_storeu_pd(scratch, ymm7); result = scratch[0] + scratch[2]; result *= (*alpha_cast); tC[1] = result/* + tC[1] * (*beta_cast)*/; ymm10 = _mm256_hadd_pd(ymm10, ymm10); _mm256_storeu_pd(scratch, ymm10); result = scratch[0] + scratch[2]; result *= (*alpha_cast); tC[2] = result/* + tC[2] * (*beta_cast)*/; ymm13 = _mm256_hadd_pd(ymm13, ymm13); _mm256_storeu_pd(scratch, ymm13); result = scratch[0] + scratch[2]; result *= (*alpha_cast); tC[3] = result/* + tC[3] * (*beta_cast)*/; } } processed_row = row_idx; } // The edge case handling where M is not a multiple of 4 if (processed_row < M) { for (row_idx = processed_row; row_idx < M; row_idx += 1) { for (col_idx = 0; col_idx < N; col_idx += 1) { tA = A + row_idx * lda; tB = B + col_idx * ldb; tC = C + col_idx * ldc + row_idx; // clear scratch registers. ymm4 = _mm256_setzero_pd(); for (k = 0; (k + 3) < K; k += 4) { ymm0 = _mm256_loadu_pd(tB + 0); ymm3 = _mm256_loadu_pd(tA); ymm4 = _mm256_fmadd_pd(ymm0, ymm3, ymm4); tA += 4; tB += 4; } // if K is not a multiple of 4, padding is done before load using temproary array. if (k < K) { int iter; double data_feeder[4] = { 0.0 }; for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tB[iter]; ymm0 = _mm256_loadu_pd(data_feeder); for (iter = 0; iter < (K - k); iter++) data_feeder[iter] = tA[iter]; ymm3 = _mm256_loadu_pd(data_feeder); ymm4 = _mm256_fmadd_pd(ymm0, ymm3, ymm4); } //horizontal addition and storage of the data. ymm4 = _mm256_hadd_pd(ymm4, ymm4); _mm256_storeu_pd(scratch, ymm4); result = scratch[0] + scratch[2]; result *= (*alpha_cast); tC[0] = result/* + tC[0] * (*beta_cast)*/; } } } //copy/compute sryk values back to C if ( bli_seq0( *beta_cast ) ) //when beta is 0, just copy result to C { dim_t _i, _j; if(bli_obj_is_lower(c)) //c is lower { for ( _j = 0; _j < N; ++_j ) for ( _i = 0; _i < M; ++_i ) if ( (doff_t)_j - (doff_t)_i <= 0 ) { bli_ddcopys( *(C + _i*rsc + _j*ldc), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); } } else //c is upper { for ( _j = 0; _j < N; ++_j ) for ( _i = 0; _i < M; ++_i ) if ( (doff_t)_j - (doff_t)_i >= 0 ) { bli_ddcopys( *(C + _i*rsc + _j*ldc), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); } } } else //when beta is non-zero, multiply and store result to C { dim_t _i, _j; if(bli_obj_is_lower(c)) //c is lower { for ( _j = 0; _j < N; ++_j ) for ( _i = 0; _i < M; ++_i ) if ( (doff_t)_j - (doff_t)_i <= 0 ) { bli_dddxpbys( *(C + _i*rsc + _j*ldc), *(beta_cast), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); } } else //c is upper { for ( _j = 0; _j < N; ++_j ) for ( _i = 0; _i < M; ++_i ) if ( (doff_t)_j - (doff_t)_i >= 0 ) { bli_dddxpbys( *(C + _i*rsc + _j*ldc), *(beta_cast), *(matCbuf + _i*rs_matC + _j*ldc_matC) ); } } } return BLIS_SUCCESS; } else return BLIS_NONCONFORMAL_DIMENSIONS; } #endif blis-0.6.1/kernels/zen/3/bli_trsm_small.c000066400000000000000000057350661360743507500202570ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2018-2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name of The University of Texas at Austin nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #ifdef BLIS_ENABLE_SMALL_MATRIX_TRSM #include "immintrin.h" #define GEMM_BLK_V1 8 //Block size to perform gemm and apply trsm #define GEMM_ACCUM_A 1 //Peform B1=B1-(B0*A0) operation instead of B1'=(B0*A0) and then B1=B1-B1' #define OPT_CACHE_BLOCKING_L1 1 //Perform trsm block-wise in blocks of GEMM_BLK_V1 instead of all columns of B together. #define REARRANGE_SHFL 0 //Rearrange operations using blend or shuffle #define BLI_AlXB_M_SP 16 #define BLI_XAltB_N_SP 128 #define BLI_AutXB_M_SP 64 #define BLI_AutXB_N_SP 128 // XA = B; A is lower-traingular; No transpose; double precision; non-unit diagonal static err_t bli_dtrsm_small_XAlB( side_t side, obj_t* AlphaObj, obj_t* a, obj_t* b, cntx_t* cntx, cntl_t* cntl ); //XA = B; A is lower triabgular; No transpose; double precision; unit-diagonal static err_t bli_dtrsm_small_XAlB_unitDiag( side_t side, obj_t* AlphaObj, obj_t* a, obj_t* b, cntx_t* cntx, cntl_t* cntl ); //XA = B; A is lower-triangular; A is transposed; double precision; non-unit-diagonal static err_t bli_dtrsm_small_XAltB( side_t side, obj_t* AlphaObj, obj_t* a, obj_t* b, cntx_t* cntx, cntl_t* cntl ); //XA = B; A is lower-triangular; A is transposed; double precision; unit-diagonal static err_t bli_dtrsm_small_XAltB_unitDiag( side_t side, obj_t* AlphaObj, obj_t* a, obj_t* b, cntx_t* cntx, cntl_t* cntl ); // XA = B; A is upper triangular; No transpose; double presicion; non-unit diagonal static err_t bli_dtrsm_small_XAuB ( side_t side, obj_t* alpha, obj_t* a, obj_t* b, cntx_t* cntx, cntl_t* cntl ); //XA = B; A is upper triangular; No transpose; double precision; unit-diagonal static err_t bli_dtrsm_small_XAuB_unitDiag( side_t side, obj_t* AlphaObj, obj_t* a, obj_t* b, cntx_t* cntx, cntl_t* cntl ); //XA = B; A is upper-triangular; A is transposed; double precision; non-unit diagonal static err_t bli_dtrsm_small_XAutB( side_t side, obj_t* AlphaObj, obj_t* a, obj_t* b, cntx_t* cntx, cntl_t* cntl ); //XA = B; A is upper-triangular; A is transposed; double precision; unit diagonal static err_t bli_dtrsm_small_XAutB_unitDiag( side_t side, obj_t* AlphaObj, obj_t* a, obj_t* b, cntx_t* cntx, cntl_t* cntl ); //AX = B; A is lower triangular; No transpose; double precision; non-unit diagonal static err_t bli_dtrsm_small_AlXB( side_t side, obj_t* AlphaObj, obj_t* a, obj_t* b, cntx_t* cntx, cntl_t* cntl ); //AX = B; A is lower triangular; No transpose; double precision; unit diagonal static err_t bli_dtrsm_small_AlXB_unitDiag( side_t side, obj_t* AlphaObj, obj_t* a, obj_t* b, cntx_t* cntx, cntl_t* cntl ); static void (*fp_blis_strsm_microkernel)( float *ptr_l, float *ptr_b, int numRows_lb, int numCols_b, int rs_l, int rs_b, int cs_l, int cs_b ); static void blis_strsm_microkernel( float *ptr_l, float *ptr_b, int numRows_lb, int numCols_b, int rs_l, int rs_b, int cs_l, int cs_b ); static void blis_strsm_microkernel_alpha( float *ptr_l, float *ptr_b, int numRows_lb, int numCols_b, int rs_l, int rs_b, int cs_l, int cs_b, float alphaVal ); static void blis_strsm_microkernel_unitDiag( float *ptr_l, float *ptr_b, int numRows_lb, int numCols_b, int rs_l, int rs_b, int cs_l, int cs_b ); static void blis_strsm_microkernel_alpha_unitDiag( float *ptr_l, float *ptr_b, int numRows_lb, int numCols_b, int rs_l, int rs_b, int cs_l, int cs_b, float alphaVal ); static void trsm_XAtB_block_allSmallSizedMatrices(float *ptr_l, float *ptr_b, int numRows_lb, int numCols_b, int rs_l, int rs_b, int cs_l, int cs_b); static void trsm_XAtB_block_allSmallSizedMatrices_alpha(float *ptr_l, float *ptr_b, int numRows_lb, int numCols_b, int rs_l, int rs_b, int cs_l, int cs_b, float alphaVal); static void trsm_XAtB_block_allSmallSizedMatrices_unitDiag(float *ptr_l, float *ptr_b, int numRows_lb, int numCols_b, int rs_l, int rs_b, int cs_l, int cs_b); static void trsm_XAtB_block_allSmallSizedMatrices_alpha_unitDiag(float *ptr_l, float *ptr_b, int numRows_lb, int numCols_b, int rs_l, int rs_b, int cs_l, int cs_b, float alphaVal); static void blis_dtrsm_microkernel( double *ptr_l, double *ptr_b, int numRows_lb, int numCols_b, int rs_l, int rs_b, int cs_l, int cs_b ); static void blis_dtrsm_microkernel_alpha( double *ptr_l, double *ptr_b, int numRows_lb, int numCols_b, int rs_l, int rs_b, int cs_l, int cs_b, double alphaVal ); static void blis_dtrsm_microkernel_unitDiag( double *ptr_l, double *ptr_b, int numRows_lb, int numCols_b, int rs_l, int rs_b, int cs_l, int cs_b ); static void blis_dtrsm_microkernel_alpha_unitDiag( double *ptr_l, double *ptr_b, int numRows_lb, int numCols_b, int rs_l, int rs_b, int cs_l, int cs_b, double alphaVal ); static void dtrsm_XAtB_block_allSmallSizedMatrices(double *ptr_l, double *ptr_b, int numRows_lb, int numCols_b, int rs_l, int rs_b, int cs_l, int cs_b); static void dtrsm_XAtB_block_allSmallSizedMatrices_alpha(double *ptr_l, double *ptr_b, int numRows_lb, int numCols_b, int rs_l, int rs_b, int cs_l, int cs_b, double alphaVal); static void dtrsm_XAtB_block_allSmallSizedMatrices_unitDiag(double *ptr_l, double *ptr_b, int numRows_lb, int numCols_b, int rs_l, int rs_b, int cs_l, int cs_b); static void dtrsm_XAtB_block_allSmallSizedMatrices_alpha_unitDiag(double *ptr_l, double *ptr_b, int numRows_lb, int numCols_b, int rs_l, int rs_b, int cs_l, int cs_b, double alphaVal); static void trsm_AutXB_block_allSmallSizedMatrices(float *ptr_l, float *ptr_b, int numRows_lb, int numCols_b, int rs_l, int rs_b, int cs_l, int cs_b); static void trsm_AutXB_block_allSmallSizedMatrices_alpha(float *ptr_l, float *ptr_b, int numRows_lb, int numCols_b, int rs_l, int rs_b, int cs_l, int cs_b, float alpha); static void trsm_AutXB_block_allSmallSizedMatrices_unitDiag(float *ptr_l, float *ptr_b, int numRows_lb, int numCols_b, int rs_l, int rs_b, int cs_l, int cs_b); static void trsm_AutXB_block_allSmallSizedMatrices_alpha_unitDiag(float *ptr_l, float *ptr_b, int numRows_lb, int numCols_b, int rs_l, int rs_b, int cs_l, int cs_b, float alpha); //AX = B; A is lower triangular; No transpose; single precision static err_t bli_strsm_small_AlXB ( side_t side, obj_t* alpha, obj_t* a, obj_t* b, cntx_t* cntx, cntl_t* cntl ); //A.'X = B; A is upper triangular; A has to be transposed; single precision static err_t bli_strsm_small_AutXB ( side_t side, obj_t* alpha, obj_t* a, obj_t* b, cntx_t* cntx, cntl_t* cntl ); //XA.' = B; A is lower triangular; A has to be transposed; single precision static err_t bli_strsm_small_XAltB ( side_t side, obj_t* alpha, obj_t* a, obj_t* b, cntx_t* cntx, cntl_t* cntl ); //A.'X = B; A is upper triangular; A has to be transposed; double precision static err_t bli_dtrsm_small_AutXB ( side_t side, obj_t* alpha, obj_t* a, obj_t* b, cntx_t* cntx, cntl_t* cntl ); /* * The bli_trsm_small implements unpacked version of TRSM * Currently only column-major is supported, A & B are column-major * Input: A: MxM (triangular matrix) * B: MxN matrix * Output: X: MxN matrix such that AX = alpha*B or XA = alpha*B or A'X = alpha*B or XA' = alpha*B * Here the output X is stored in B * The custom-kernel will be called only when M*(M+N)* sizeof(Matrix Elements) < L3 cache */ err_t bli_trsm_small ( side_t side, obj_t* alpha, obj_t* a, obj_t* b, cntx_t* cntx, cntl_t* cntl ) { #ifdef BLIS_ENABLE_MULTITHREADING return BLIS_NOT_YET_IMPLEMENTED; #endif dim_t m = bli_obj_length(b); dim_t n = bli_obj_width(b); if(!(m && n)) return BLIS_SUCCESS; // If alpha is zero, B matrix will become zero after scaling & hence solution is also zero matrix if (bli_obj_equals(alpha, &BLIS_ZERO)) { return BLIS_NOT_YET_IMPLEMENTED; // scale B by alpha } // We have to call matrix scaling if alpha != 1.0 // if row major format return. Check this again. if ((bli_obj_row_stride(a) != 1) || (bli_obj_row_stride(b) != 1)) { return BLIS_INVALID_ROW_STRIDE; } num_t dt = ((*b).info & (0x7 << 0)); // only float and double datatypes are supported as of now. if (dt != BLIS_DOUBLE && dt != BLIS_FLOAT) { return BLIS_EXPECTED_REAL_DATATYPE; } // A is expected to be triangular in trsm if (!bli_obj_is_upper_or_lower (a)) { return BLIS_EXPECTED_TRIANGULAR_OBJECT; } // can use other control structs - even can use array of function pointers, // indexed by a number with bits formed by f('side', 'uplo', 'transa', dt). // In the below implementation, based on the number of finally implemented // cases, can move the checks with more cases higher up. if(side == BLIS_LEFT) { if(bli_obj_has_trans(a)) { if(dt == BLIS_DOUBLE) { if(bli_obj_is_upper(a)) { //return bli_dtrsm_small_AutXB(side, alpha, a, b, cntx, cntl); return BLIS_NOT_YET_IMPLEMENTED; } else { //return bli_dtrsm_small_AltXB(side, alpha, a, b, cntx, cntl); return BLIS_NOT_YET_IMPLEMENTED; } } else { if(bli_obj_is_upper(a)) { return bli_strsm_small_AutXB(side, alpha, a, b, cntx, cntl); } else { //return bli_strsm_small_AltXB(side, alpha, a, b, cntx, cntl); return BLIS_NOT_YET_IMPLEMENTED; } } } else { if(dt == BLIS_DOUBLE) { if(bli_obj_is_upper(a)) { //return bli_dtrsm_small_AuXB(side, alpha, a, b, cntx, cntl); return BLIS_NOT_YET_IMPLEMENTED; } else { if(bli_obj_has_unit_diag(a)) return bli_dtrsm_small_AlXB_unitDiag(side, alpha, a, b, cntx, cntl); else return bli_dtrsm_small_AlXB(side, alpha, a, b, cntx, cntl); } } else { if(bli_obj_is_upper(a)) { //return bli_strsm_small_AuXB(side, alpha, a, b, cntx, cntl); return BLIS_NOT_YET_IMPLEMENTED; } else { return bli_strsm_small_AlXB(side, alpha, a, b, cntx, cntl); } } } } else { if(bli_obj_has_trans(a)) { if(dt == BLIS_DOUBLE) { if(bli_obj_is_upper(a)) { if(bli_obj_has_unit_diag(a)) return bli_dtrsm_small_XAutB_unitDiag(side, alpha, a, b, cntx, cntl); else return bli_dtrsm_small_XAutB(side, alpha, a, b, cntx, cntl); } else { if(bli_obj_has_unit_diag(a)) return bli_dtrsm_small_XAltB_unitDiag(side, alpha, a, b, cntx, cntl); else return bli_dtrsm_small_XAltB(side, alpha, a, b, cntx, cntl); } } else { if(bli_obj_is_upper(a)) { //return bli_strsm_small_XAutB(side, alpha, a, b, cntx, cntl); return BLIS_NOT_YET_IMPLEMENTED; } else { return bli_strsm_small_XAltB(side, alpha, a, b, cntx, cntl); } } } else { if(dt == BLIS_DOUBLE) { if(bli_obj_is_upper(a)) { if(bli_obj_has_unit_diag(a)) return bli_dtrsm_small_XAuB_unitDiag(side, alpha, a, b, cntx, cntl); else return bli_dtrsm_small_XAuB(side, alpha, a, b, cntx, cntl); } else { if(bli_obj_has_unit_diag(a)) return bli_dtrsm_small_XAlB_unitDiag(side, alpha, a, b, cntx, cntl); else return bli_dtrsm_small_XAlB(side, alpha, a, b, cntx, cntl); } } else { if(bli_obj_is_upper(a)) { //return bli_strsm_small_XAuB(side, alpha, a, b, cntx, cntl); return BLIS_NOT_YET_IMPLEMENTED; } else { //return bli_strsm_small_XAlB(side, alpha, a, b, cntx, cntl); return BLIS_NOT_YET_IMPLEMENTED; } } } } return BLIS_NOT_YET_IMPLEMENTED; }; /* TRSM scalar code for the case AX = alpha * B * A is lower-triangular, non-unit-diagonal, no transpose * Dimensions: A: mxm X: mxn B:mxn */ static err_t dtrsm_small_AlXB ( double *A, double *B, dim_t M, dim_t N, dim_t lda, dim_t ldb ) { dim_t i, j, k; for (k = 0; k < M; k++) { double lkk_inv = 1.0/A[k+k*lda]; for (j = 0; j < N; j++) { B[k + j*ldb] *= lkk_inv; for (i = k+1; i < M; i++) { B[i + j*ldb] -= A[i + k*lda] * B[k + j*ldb]; } } }// k -loop return BLIS_SUCCESS; }// end of function /* TRSM scalar code for the case AX = alpha * B * A is lower-triangular, unit-diagonal, no transpose * Dimensions: A: mxm X: mxn B:mxn */ static err_t dtrsm_small_AlXB_unitDiag ( double *A, double *B, dim_t M, dim_t N, dim_t lda, dim_t ldb ) { dim_t i, j, k; for (k = 0; k < M; k++) { for (j = 0; j < N; j++) { for (i = k+1; i < M; i++) { B[i + j*ldb] -= A[i + k*lda] * B[k + j*ldb]; } } } return BLIS_SUCCESS; }// end of function /* TRSM scalar code for the case XA = alpha * B * A is upper-triangular, non-unit-diagonal no transpose * Dimensions: X:mxn A:nxn B:mxn */ static err_t dtrsm_small_XAuB ( double *A, double *B, dim_t M, dim_t N, dim_t lda, dim_t ldb ) { dim_t i, j, k; for(k = 0; k < N; k++) { double lkk_inv = 1.0/A[k+k*lda]; for(i = 0; i < M; i++) { B[i+k*ldb] *= lkk_inv; for(j = k+1; j < N; j++) { B[i+j*ldb] -= B[i+k*ldb] * A[k+j*lda]; } } } return BLIS_SUCCESS; } /* TRSM scalar code for the case XA = alpha * B * A is lower-triangular, non-unit triangular, no transpose * Dimensions: X:mxn A:nxn B:mxn */ static err_t dtrsm_small_XAlB ( double *A, double *B, double alpha, dim_t M, dim_t N, dim_t lda, dim_t ldb ) { dim_t i, j, k; for(i = 0; i < M; i++) for(j = 0; j < N; j++) B[i+j*ldb] *= alpha; for(k = N-1; k+1 > 0; k--) { double lkk_inv = 1.0/A[k+k*lda]; for(i = M-1; i+1 > 0; i--) { B[i+k*ldb] *= lkk_inv; for(j = k-1; j+1 > 0; j--) { B[i+j*ldb] -= B[i+k*ldb] * A[k+j*lda]; } } } return BLIS_SUCCESS; } /* TRSM scalar code for the case XA = alpha * B * A is lower-triangular, unit-diagonal, no transpose *Dimensions: X:mxn A:nxn B:mxn */ static err_t dtrsm_small_XAlB_unitDiag( double *A, double *B, double alpha, dim_t M, dim_t N, dim_t lda, dim_t ldb ) { dim_t i, j, k; for(i = 0 ; i < M; i++) for(j = 0; j < N; j++) B[i+j*ldb] *= alpha; for(k = N-1; k+1 > 0; k--) { for(i = M-1; i+1 > 0; i--) { for(j = k-1; j+1 > 0; j--) { B[i+j*ldb] -= B[i+k*ldb] * A[k+j*lda]; } } } return BLIS_SUCCESS; } /* TRSM scalar code for the case XA = alpha * B *A is upper-triangular, non-unit-diagonal, A is transposed * Dimensions: X:mxn A:nxn B:mxn */ static err_t dtrsm_small_XAutB ( double *A, double *B, double alpha, dim_t M, dim_t N, dim_t lda, dim_t ldb ) { dim_t i, j, k; for(i = 0; i < M; i++) for(j = 0; j < N; j++) B[i+j*ldb] *=alpha; for(k = N-1; k+1 > 0; k--) { double lkk_inv = 1.0/A[k+k*lda]; for(i = M-1; i+1 > 0; i--) { B[i+k*ldb] *= lkk_inv; for(j = k-1; j+1 > 0; j--) { B[i+j*ldb] -= B[i+k*ldb] * A[j+k*lda]; } } } return BLIS_SUCCESS; } /* TRSM scalar code for the case XA = alpha * B * A is upper-triangular, unit-diagonal, A has to be transposed * Dimensions: X:mxn A:nxn B:mxn */ static err_t dtrsm_small_XAutB_unitDiag( double *A, double *B, double alpha, dim_t M, dim_t N, dim_t lda, dim_t ldb ) { dim_t i, j, k; for(i = 0; i< M; i++) for(j = 0; j< N; j++) B[i+j*ldb] *= alpha; for(i = M-1; i+1 > 0; i--) { for(j = N-1; j+1 > 0; j--) { for(k = j-1; k+1 > 0; k--) { B[i+k*ldb] -= B[i+j*ldb] * A[k+j*lda]; } } } return BLIS_SUCCESS; } /* TRSM scalar code for the case XA = alpha * B * A is lower-triangular, non-unit-diagonal, A has to be transposed * Dimensions: X:mxn A:nxn B:mxn */ static err_t dtrsm_small_XAltB ( double *A, double *B, dim_t M, dim_t N, dim_t lda, dim_t ldb ) { dim_t i, j, k; for(k = 0; k < N; k++) { double lkk_inv = 1.0/A[k+k*lda]; for(i = 0; i < M; i++) { B[i+k*ldb] *= lkk_inv; for(j = k+1; j < N; j++) { B[i+j*ldb] -= B[i+k*ldb] * A[j+k*lda]; } } } return BLIS_SUCCESS; } /* TRSM scalar code for XA = alpha * B * A is lower-triangular, unit-diagonal, A has to be transposed * Dimensions: X:mxn A:nxn B:mxn */ static err_t dtrsm_small_XAltB_unitDiag( double *A, double *B, dim_t M, dim_t N, dim_t lda, dim_t ldb ) { dim_t i, j, k; for(k = 0; k < N; k++) { for(i = 0; i < M; i++) { for(j = k+1; j < N; j++) { B[i+j*ldb] -= B[i+k*ldb] * A[j+k*lda]; } } } return BLIS_SUCCESS; } /* TRSM scalar code for the case XA = alpha * B * A is upper-triangular, unit-diagonal, no transpose * Dimensions: X:mxn A:nxn B:mxn */ static err_t dtrsm_small_XAuB_unitDiag ( double *A, double *B, dim_t M, dim_t N, dim_t lda, dim_t ldb ) { dim_t i, j, k; for(k = 0; k < N; k++) { for(i = 0; i < M; i++) { for(j = k+1; j < N; j++) { B[i+j*ldb] -= B[i+k*ldb] * A[k+j*lda]; } } } return BLIS_SUCCESS; } /* TRSM for the case AX = alpha * B, Double precision * A is lower-triangular, no-transpose, non-unit diagonal * dimensions A: mxm X: mxn B: mxn b01---> * ***************** ** * * * * * * * * * * * * * * *b01* * * * * * * * * * * a10 ****** b11 ***************** | * * * | * * * * * | * * * | * * * * * | *a10*a11* | *b11* * * * v * * * v * * * * * *********** ***************** * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * **************** ***************** a11---> */ static err_t bli_dtrsm_small_AlXB( side_t side, obj_t* AlphaObj, obj_t* a, obj_t* b, cntx_t* cntx, cntl_t* cntl ) { dim_t D_MR = 4; //size of block along 'M' dimpension dim_t D_NR = 8; //size of block along 'N' dimension dim_t m = bli_obj_length(b); // number of rows of matrix B dim_t n = bli_obj_width(b); // number of columns of matrix B #ifdef BLIS_ENABLE_SMALL_MATRIX_ROME if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME) { return BLIS_NOT_YET_IMPLEMENTED; } #else if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_NAPLES) { return BLIS_NOT_YET_IMPLEMENTED; } #endif dim_t m_remainder = m % D_MR; //number of remainder rows dim_t n_remainder = n % D_NR; //number of remainder columns dim_t cs_a = bli_obj_col_stride(a); // column stride of A dim_t cs_b = bli_obj_col_stride(b); // column stride of B dim_t i, j, k; //loop variables dim_t k_iter; //number of times GEMM to be performed double AlphaVal = *(double *)AlphaObj->buffer; //value of alpha double *L = a->buffer; //pointer to matrix A double *B = b->buffer; //pointer to matrix B double *a10, *a11, *b01, *b11; //pointers that point to blocks for GEMM and TRSM double *ptr_b01_dup; double ones = 1.0; //scratch registers __m256d ymm0, ymm1, ymm2, ymm3; __m256d ymm4, ymm5, ymm6, ymm7; __m256d ymm8, ymm9, ymm10, ymm11; __m256d ymm12, ymm13, ymm14, ymm15; __m256d ymm16; for(j = 0; j+D_NR-1 < n; j += D_NR) //loop along 'N' dimension { for(i = 0;i+D_MR-1 < m; i += D_MR) //loop along 'M' dimension { a10 = L +i; //pointer to block of A to be used for GEMM a11 = L + i + (i*cs_a); //pointer to block of A to be used for TRSM b01 = B + j*cs_b; //pointer to block of B to be used for GEMM b11 = B + i + j* cs_b; //pointer to block of B to be used for TRSM k_iter = i / D_MR; //number of times GEMM to be performed(in blocks of 4x4) ymm8 = _mm256_setzero_pd(); ymm9 = _mm256_setzero_pd(); ymm10 = _mm256_setzero_pd(); ymm11 = _mm256_setzero_pd(); ymm12 = _mm256_setzero_pd(); ymm13 = _mm256_setzero_pd(); ymm14 = _mm256_setzero_pd(); ymm15 = _mm256_setzero_pd(); ///GEMM code begins/// for(k = 0; k< k_iter; k++) //loop for number of GEMM operations { ptr_b01_dup = b01; ymm16 = _mm256_loadu_pd((double const *)(a10));//A10[0][0] A10[1][0] A10[2][0] A10[3][0] ymm4 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 0)); //B01[0][0] ymm5 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 1)); //B01[0][1] ymm6 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 2)); //B01[0][2] ymm7 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 3)); //B01[0][3] ymm0 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 4)); //B01[0][4] ymm1 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 5)); //B01[0][5] ymm2 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 6)); //B01[0][6] ymm3 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 7)); //B01[0][7] b01 += 1; //mobe to next row of B ymm8 = _mm256_fmadd_pd(ymm4, ymm16, ymm8); //ymm8 += (B01[0][0]*A10[0][0] B01[0][0]*A10[1][0] B01[0][0]*A10[2][0] B01[0][0]*A10[3][0]) ymm9 = _mm256_fmadd_pd(ymm5, ymm16, ymm9); //ymm9 += (B01[0][1]*A10[0][0] B01[0][1]*A10[1][0] B01[0][1]*A10[2][0] B01[0][1]*A10[3][0]) ymm10 = _mm256_fmadd_pd(ymm6, ymm16, ymm10); //ymm10 += (B01[0][2]*A10[0][0] B01[0][2]*A10[1][0] B01[0][2]*A10[2][0] B01[0][2]*A10[3][0]) ymm11 = _mm256_fmadd_pd(ymm7, ymm16, ymm11); //ymm11 += (B01[0][3]*A10[0][0] B01[0][3]*A10[1][0] B01[0][3]*A10[2][0] B01[0][3]*A10[3][0]) ymm12 = _mm256_fmadd_pd(ymm0, ymm16, ymm12); //ymm12 += (B01[0][4]*A10[0][0] B01[0][4]*A10[1][0] B01[0][4]*A10[2][0] B01[0][4]*A10[3][0]) ymm13 = _mm256_fmadd_pd(ymm1, ymm16, ymm13); //ymm13 += (B01[0][5]*A10[0][0] B01[0][5]*A10[1][0] B01[0][5]*A10[2][0] B01[0][5]*A10[3][0]) ymm14 = _mm256_fmadd_pd(ymm2, ymm16, ymm14); //ymm14 += (B01[0][6]*A10[0][0] B01[0][6]*A10[1][0] B01[0][6]*A10[2][0] B01[0][6]*A10[3][0]) ymm15 = _mm256_fmadd_pd(ymm3, ymm16, ymm15); //ymm15 += (B01[0][7]*A10[0][0] B01[0][7]*A10[1][0] B01[0][7]*A10[2][0] B01[0][7]*A10[3][0]) ymm16 = _mm256_loadu_pd((double const *)(a10 + cs_a));//A10[0][1] A10[1][1] A10[2][1] A10[3][1] ymm4 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 0)); //B01[1][0] ymm5 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 1)); //B01[1][1] ymm6 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 2)); //B01[1][2] ymm7 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 3)); //B01[1][3] ymm0 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 4)); //B01[1][4] ymm1 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 5)); //B01[1][5] ymm2 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 6)); //B01[1][6] ymm3 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 7)); //B01[1][7] b01 += 1; //mobe to next row of B ymm8 = _mm256_fmadd_pd(ymm4, ymm16, ymm8); //ymm8 += (B01[1][0]*A10[0][1] B01[1][0]*A10[1][1] B01[1][0]*A10[2][1] B01[1][0]*A10[3][1]) ymm9 = _mm256_fmadd_pd(ymm5, ymm16, ymm9); //ymm9 += (B01[1][1]*A10[0][1] B01[1][1]*A10[1][1] B01[1][1]*A10[2][1] B01[1][1]*A10[3][1]) ymm10 = _mm256_fmadd_pd(ymm6, ymm16, ymm10); //ymm10 += (B01[1][2]*A10[0][1] B01[1][2]*A10[1][1] B01[1][2]*A10[2][1] B01[1][2]*A10[3][1]) ymm11 = _mm256_fmadd_pd(ymm7, ymm16, ymm11); //ymm11 += (B01[1][3]*A10[0][1] B01[1][3]*A10[1][1] B01[1][3]*A10[2][1] B01[1][3]*A10[3][1]) ymm12 = _mm256_fmadd_pd(ymm0, ymm16, ymm12); //ymm12 += (B01[1][4]*A10[0][1] B01[1][4]*A10[1][1] B01[1][4]*A10[2][1] B01[1][4]*A10[3][1]) ymm13 = _mm256_fmadd_pd(ymm1, ymm16, ymm13); //ymm13 += (B01[1][5]*A10[0][1] B01[1][5]*A10[1][1] B01[1][5]*A10[2][1] B01[1][5]*A10[3][1]) ymm14 = _mm256_fmadd_pd(ymm2, ymm16, ymm14); //ymm14 += (B01[1][6]*A10[0][1] B01[1][6]*A10[1][1] B01[1][6]*A10[2][1] B01[1][6]*A10[3][1]) ymm15 = _mm256_fmadd_pd(ymm3, ymm16, ymm15); //ymm15 += (B01[1][7]*A10[0][1] B01[1][7]*A10[1][1] B01[1][7]*A10[2][1] B01[1][7]*A10[3][1]) ymm16 = _mm256_loadu_pd((double const *)(a10 + cs_a * 2));//A10[0][2] A10[1][2] A10[2][2] A10[3][2] ymm4 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 0)); //B01[2][0] ymm5 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 1)); //B01[2][1] ymm6 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 2)); //B01[2][2] ymm7 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 3)); //B01[2][3] ymm0 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 4)); //B01[2][4] ymm1 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 5)); //B01[2][5] ymm2 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 6)); //B01[2][6] ymm3 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 7)); //B01[2][7] b01 += 1; //mobe to next row of B ymm8 = _mm256_fmadd_pd(ymm4, ymm16, ymm8); //ymm8 += (B01[2][0]*A10[0][2] B01[2][0]*A10[1][2] B01[2][0]*A10[2][2] B01[2][0]*A10[3][2]) ymm9 = _mm256_fmadd_pd(ymm5, ymm16, ymm9); //ymm9 += (B01[2][1]*A10[0][2] B01[2][1]*A10[1][2] B01[2][1]*A10[2][2] B01[2][1]*A10[3][2]) ymm10 = _mm256_fmadd_pd(ymm6, ymm16, ymm10); //ymm10 += (B01[2][2]*A10[0][2] B01[2][2]*A10[1][2] B01[2][2]*A10[2][2] B01[2][2]*A10[3][2]) ymm11 = _mm256_fmadd_pd(ymm7, ymm16, ymm11); //ymm11 += (B01[2][3]*A10[0][2] B01[2][3]*A10[1][2] B01[2][3]*A10[2][2] B01[2][3]*A10[3][2]) ymm12 = _mm256_fmadd_pd(ymm0, ymm16, ymm12); //ymm12 += (B01[2][4]*A10[0][2] B01[2][4]*A10[1][2] B01[2][4]*A10[2][2] B01[2][4]*A10[3][2]) ymm13 = _mm256_fmadd_pd(ymm1, ymm16, ymm13); //ymm13 += (B01[2][5]*A10[0][2] B01[2][5]*A10[1][2] B01[2][5]*A10[2][2] B01[2][5]*A10[3][2]) ymm14 = _mm256_fmadd_pd(ymm2, ymm16, ymm14); //ymm14 += (B01[2][6]*A10[0][2] B01[2][6]*A10[1][2] B01[2][6]*A10[2][2] B01[2][6]*A10[3][2]) ymm15 = _mm256_fmadd_pd(ymm3, ymm16, ymm15); //ymm15 += (B01[2][7]*A10[0][2] B01[2][7]*A10[1][2] B01[2][7]*A10[2][2] B01[2][7]*A10[3][2]) ymm16 = _mm256_loadu_pd((double const *)(a10 + cs_a * 3));//A10[0][3] A10[1][3] A10[2][3] A10[3][3] ymm4 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 0)); //B01[3][0] ymm5 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 1)); //B01[3][1] ymm6 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 2)); //B01[3][2] ymm7 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 3)); //B01[3][3] ymm0 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 4)); //B01[3][4] ymm1 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 5)); //B01[3][5] ymm2 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 6)); //B01[3][6] ymm3 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 7)); //B01[3][7] b01 += 1; //mobe to next row of B ymm8 = _mm256_fmadd_pd(ymm4, ymm16, ymm8); //ymm8 += (B01[3][0]*A10[0][3] B01[3][0]*A10[3][0] B01[3][0]*A10[2][3] B01[3][0]*A10[3][0]) ymm9 = _mm256_fmadd_pd(ymm5, ymm16, ymm9); //ymm9 += (B01[3][1]*A10[0][3] B01[3][1]*A10[3][0] B01[3][1]*A10[2][3] B01[3][1]*A10[3][0]) ymm10 = _mm256_fmadd_pd(ymm6, ymm16, ymm10); //ymm10 += (B01[3][2]*A10[0][3] B01[3][2]*A10[3][0] B01[3][2]*A10[2][3] B01[3][2]*A10[3][0]) ymm11 = _mm256_fmadd_pd(ymm7, ymm16, ymm11); //ymm11 += (B01[3][3]*A10[0][3] B01[3][3]*A10[3][0] B01[3][3]*A10[2][3] B01[3][3]*A10[3][0]) ymm12 = _mm256_fmadd_pd(ymm0, ymm16, ymm12); //ymm12 += (B01[3][4]*A10[0][3] B01[3][4]*A10[3][0] B01[3][4]*A10[2][3] B01[3][4]*A10[3][3]) ymm13 = _mm256_fmadd_pd(ymm1, ymm16, ymm13); //ymm13 += (B01[3][5]*A10[0][3] B01[3][5]*A10[3][0] B01[3][5]*A10[2][3] B01[3][5]*A10[3][3]) ymm14 = _mm256_fmadd_pd(ymm2, ymm16, ymm14); //ymm14 += (B01[3][6]*A10[0][3] B01[3][6]*A10[3][0] B01[3][6]*A10[2][3] B01[3][6]*A10[3][3]) ymm15 = _mm256_fmadd_pd(ymm3, ymm16, ymm15); //ymm15 += (B01[3][7]*A10[0][3] B01[3][7]*A10[3][0] B01[3][7]*A10[2][3] B01[3][7]*A10[3][3]) a10 += D_MR * cs_a; //pointer math to calculate next block of A for GEMM b01 = ptr_b01_dup + D_MR; //pointer math to calculate next block of B for GEMM } ymm16 = _mm256_broadcast_sd((double const *)&AlphaVal); //register to hold alpha ymm0 = _mm256_loadu_pd((double const *)(b11 + cs_b *0)); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] ymm1 = _mm256_loadu_pd((double const *)(b11 + cs_b *1)); //B11[0][1] B11[1][1] B11[2][1] B11[3][1] ymm2 = _mm256_loadu_pd((double const *)(b11 + cs_b *2)); //B11[0][2] B11[1][2] B11[2][2] B11[3][2] ymm3 = _mm256_loadu_pd((double const *)(b11 + cs_b *3)); //B11[0][3] B11[1][3] B11[2][3] B11[3][3] ymm4 = _mm256_loadu_pd((double const *)(b11 + cs_b *4)); //B11[0][4] B11[1][4] B11[2][4] B11[3][4] ymm5 = _mm256_loadu_pd((double const *)(b11 + cs_b *5)); //B11[0][5] B11[1][5] B11[2][5] B11[3][5] ymm6 = _mm256_loadu_pd((double const *)(b11 + cs_b *6)); //B11[0][6] B11[1][6] B11[2][6] B11[3][6] ymm7 = _mm256_loadu_pd((double const *)(b11 + cs_b *7)); //B11[0][7] B11[1][7] B11[2][7] B11[3][7] ymm0 = _mm256_fmsub_pd(ymm0, ymm16, ymm8); //B11[0-3][0] * alpha -= B01[0-3][0] ymm1 = _mm256_fmsub_pd(ymm1, ymm16, ymm9); //B11[0-3][1] * alpha -= B01[0-3][1] ymm2 = _mm256_fmsub_pd(ymm2, ymm16, ymm10); //B11[0-3][2] * alpha -= B01[0-3][2] ymm3 = _mm256_fmsub_pd(ymm3, ymm16, ymm11); //B11[0-3][3] * alpha -= B01[0-3][3] ymm4 = _mm256_fmsub_pd(ymm4, ymm16, ymm12); //B11[0-3][4] * alpha -= B01[0-3][4] ymm5 = _mm256_fmsub_pd(ymm5, ymm16, ymm13); //B11[0-3][5] * alpha -= B01[0-3][5] ymm6 = _mm256_fmsub_pd(ymm6, ymm16, ymm14); //B11[0-3][6] * alpha -= B01[0-3][6] ymm7 = _mm256_fmsub_pd(ymm7, ymm16, ymm15); //B11[0-3][7] * alpha -= B01[0-3][7] ///implement TRSM/// ///transpose of B11// ///unpacklow/// ymm9 = _mm256_unpacklo_pd(ymm0, ymm1); //B11[0][0] B11[0][1] B11[2][0] B11[2][1] ymm11 = _mm256_unpacklo_pd(ymm2, ymm3); //B11[0][2] B11[0][3] B11[2][2] B11[2][3] ymm13 = _mm256_unpacklo_pd(ymm4, ymm5); //B11[0][4] B11[0][5] B11[2][4] B11[2][5] ymm15 = _mm256_unpacklo_pd(ymm6, ymm7); //B11[0][6] B11[0][7] B11[2][6] B11[2][7] //rearrange low elements ymm8 = _mm256_permute2f128_pd(ymm9,ymm11,0x20); //B11[0][0] B11[0][1] B11[0][2] B11[0][3] ymm10 = _mm256_permute2f128_pd(ymm9,ymm11,0x31); //B11[2][0] B11[2][1] B11[2][2] B11[2][3] ymm12 = _mm256_permute2f128_pd(ymm13,ymm15,0x20); //B11[4][0] B11[4][1] B11[4][2] B11[4][3] ymm14 = _mm256_permute2f128_pd(ymm13,ymm15,0x31); //B11[6][0] B11[6][1] B11[6][2] B11[6][3] ////unpackhigh//// ymm0 = _mm256_unpackhi_pd(ymm0, ymm1); //B11[1][0] B11[1][1] B11[3][0] B11[3][1] ymm1 = _mm256_unpackhi_pd(ymm2, ymm3); //B11[1][2] B11[1][3] B11[3][2] B11[3][3] ymm4 = _mm256_unpackhi_pd(ymm4, ymm5); //B11[1][4] B11[1][5] B11[3][4] B11[3][5] ymm5 = _mm256_unpackhi_pd(ymm6, ymm7); //B11[1][6] B11[1][7] B11[3][6] B11[3][7] //rearrange high elements ymm9 = _mm256_permute2f128_pd(ymm0,ymm1,0x20); //B11[1][0] B11[1][1] B11[1][2] B11[1][3] ymm11 = _mm256_permute2f128_pd(ymm0,ymm1,0x31); //B11[3][0] B11[3][1] B11[3][2] B11[3][3] ymm13 = _mm256_permute2f128_pd(ymm4,ymm5,0x20); //B11[5][0] B11[5][1] B11[5][2] B11[5][3] ymm15 = _mm256_permute2f128_pd(ymm4,ymm5,0x31); //B11[7][0] B11[7][1] B11[7][2] B11[7][3] ymm0 = _mm256_broadcast_sd((double const *)&ones); //broadcast diagonal elements of A11 ymm1 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][0] ymm2 = _mm256_broadcast_sd((double const *)(a11+ cs_b +1)); //A11[1][1] ymm3 = _mm256_broadcast_sd((double const *)(a11+cs_b*2 + 2)); //A11[2][2] ymm4 = _mm256_broadcast_sd((double const *)(a11+cs_b*3 + 3)); //A11[3][3] ymm5 = _mm256_unpacklo_pd(ymm1, ymm2); //A11[0][0] A11[0][0] A11[1][1] A11[1][1] ymm6 = _mm256_unpacklo_pd(ymm3, ymm4); //A11[2][2] A11[2][2] A11[3][3] A11[3][3] ymm5 = _mm256_blend_pd(ymm5, ymm6, 0x0C); //A11[0][0] A11[1][1] A11[2][2] A11[3][3] ymm0 = _mm256_div_pd(ymm0, ymm5); //1/A11[0][0] 1/A11[1][1] 1/A11[2][2] 1/A11[2][2] //extract a00 ymm1 = _mm256_permute_pd(ymm0, 0x00); //1/A11[0][0] 1/A11[0][0] 1/A11[2][2] 1/A11[2][2] ymm1 = _mm256_permute2f128_pd(ymm1, ymm1, 0x00); //1/A11[0][0] 1/A11[0][0] 1/A11[0][0] 1/A11[0][0] //(Row 0): perform mul operation of reciprocal of L(0,0) element with 1st row elements of B ymm8 = _mm256_mul_pd(ymm8, ymm1); //B11[0-3][0] /= A11[0][0] ymm12 = _mm256_mul_pd(ymm12, ymm1); //B11[0-3][4] /= A11[0][0] //extract a11 ymm1 = _mm256_permute_pd(ymm0, 0x03); //1/A11[1][1] 1/A11[1][1] 1/A11[3][3] 1/A11[3][3] ymm1 = _mm256_permute2f128_pd(ymm1, ymm1, 0x00); //1/A11[1][1] 1/A11[1][1] 1/A11[1][1] 1/A11[1][1] ymm2 = _mm256_broadcast_sd((double const *)(a11 +1)); //A11[1][0] ymm3 = _mm256_broadcast_sd((double const *)(a11 +2)); //A11[2][0] ymm4 = _mm256_broadcast_sd((double const *)(a11 +3)); //A11[3][0] a11 += cs_a; //(Row1): FMA operations ymm9 = _mm256_fnmadd_pd(ymm2, ymm8, ymm9); //B11[1][0-3] -= A11[1][0] * B11[0-3][0] ymm10 = _mm256_fnmadd_pd(ymm3, ymm8, ymm10); //B11[2][0-3] -= A11[2][0] * B11[0-3][0] ymm11 = _mm256_fnmadd_pd(ymm4, ymm8, ymm11); //B11[3][0-3] -= A11[3][0] * B11[0-3][0] ymm13 = _mm256_fnmadd_pd(ymm2, ymm12, ymm13); //B11[5][0-3] -= A11[1][0] * B11[0-3][4] ymm14 = _mm256_fnmadd_pd(ymm3, ymm12, ymm14); //B11[6][0-3] -= A11[2][0] * B11[0-3][4] ymm15 = _mm256_fnmadd_pd(ymm4, ymm12, ymm15); //B11[7][0-3] -= A11[3][0] * B11[0-3][4] ymm9 = _mm256_mul_pd(ymm9, ymm1); //B11[0-3][1] /= A11[1][1] ymm13 = _mm256_mul_pd(ymm13, ymm1); //B11[0-3][5] /= A11[1][1] ymm3 = _mm256_broadcast_sd((double const *)(a11 +2)); //A11[2][1] ymm4 = _mm256_broadcast_sd((double const *)(a11 +3)); //A11[3][1] a11 += cs_a; //extract a22 ymm1 = _mm256_permute_pd(ymm0, 0x00); //1/A11[0][0] 1/A110[][0] 1/A11[2][2] 1/A11[2][2] ymm1 = _mm256_permute2f128_pd(ymm1, ymm1, 0x11); //1/A11[2][2] 1/A11[2][2] 1/A11[2][2] 1/A11[2][2] //(ROw2): FMA operations ymm10 = _mm256_fnmadd_pd(ymm3, ymm9, ymm10); //B11[2][0-3] -= A11[2][1] * B11[0-3][1] ymm11 = _mm256_fnmadd_pd(ymm4, ymm9, ymm11); //B11[3][0-3] -= A11[3][1] * B11[0-3][1] ymm14 = _mm256_fnmadd_pd(ymm3, ymm13, ymm14); //B11[6][0-3] -= A11[2][1] * B11[0-3][5] ymm15 = _mm256_fnmadd_pd(ymm4, ymm13, ymm15); //B11[7][0-3] -= A11[3][1] * B11[0-3][5] //perform mul operation ymm10 = _mm256_mul_pd(ymm10, ymm1); //B11[0-3][2] /= A11[2][2] ymm14 = _mm256_mul_pd(ymm14, ymm1); //B11[0-3][6] /= A11[2][2] ymm4 = _mm256_broadcast_sd((double const *)(a11 +3)); //A11[3][2] a11 += cs_a; //extract a33 ymm1 = _mm256_permute_pd(ymm0, 0x0C); //1/A11[0][0] 1/A11[0][0] 1/A11[3][3] 1/A11[3][3] ymm1 = _mm256_permute2f128_pd(ymm1, ymm1, 0x11);//1/A11[3][3] 1/A11[3][3] 1/A11[3][3] 1/A11[3][3] //(ROw2): FMA operations ymm11 = _mm256_fnmadd_pd(ymm4, ymm10, ymm11); //B11[3][0-3] -= A11[3][2] * B11[0-3][2] ymm15 = _mm256_fnmadd_pd(ymm4, ymm14, ymm15); //B11[7][0-3] -= A11[3][2] * B11[0-3][6] //perform mul operation ymm11 = _mm256_mul_pd(ymm11, ymm1); //B11[0-3][3] /= A11[3][3] ymm15 = _mm256_mul_pd(ymm15, ymm1); //B11[0-3][7] /= A11[3][3] //unpacklow// ymm1 = _mm256_unpacklo_pd(ymm8, ymm9); //B11[0][0] B11[1][0] B11[0][2] B11[1][2] ymm3 = _mm256_unpacklo_pd(ymm10, ymm11); //B11[2][0] B11[3][0] B11[2][2] B11[3][2] ymm5 = _mm256_unpacklo_pd(ymm12, ymm13); //B11[4][0] B11[5][0] B11[4][2] B11[5][2] ymm7 = _mm256_unpacklo_pd(ymm14, ymm15); //B11[6][0] B11[7][0] B11[6][2] B11[7][2] //rearrange low elements ymm0 = _mm256_permute2f128_pd(ymm1, ymm3, 0x20); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] ymm2 = _mm256_permute2f128_pd(ymm1, ymm3, 0x31); //B11[0][2] B11[1][2] B11[2][2] B11[3][2] ymm4 = _mm256_permute2f128_pd(ymm5, ymm7, 0x20); //B11[4][0] B11[5][0] B11[6][0] B11[7][0] ymm6 = _mm256_permute2f128_pd(ymm5, ymm7, 0x31); //B11[4][2] B11[5][2] B11[6][2] B11[7][2] ///unpack high/// ymm8 = _mm256_unpackhi_pd(ymm8, ymm9); //B11[0][1] B11[1][1] B11[0][3] B11[1][3] ymm9 = _mm256_unpackhi_pd(ymm10, ymm11); //B11[2][1] B11[3][1] B11[2][3] B11[3][3] ymm12 = _mm256_unpackhi_pd(ymm12, ymm13); //B11[4][1] B11[5][1] B11[4][3] B11[5][3] ymm13 = _mm256_unpackhi_pd(ymm14, ymm15); //B11[6][1] B11[7][1] B11[6][3] B11[7][3] //rearrange high elements ymm1 = _mm256_permute2f128_pd(ymm8, ymm9, 0x20); //B11[0][1] B11[1][1] B11[2][1] B11[3][1] ymm3 = _mm256_permute2f128_pd(ymm8, ymm9, 0x31); //B11[0][3] B11[1][3] B11[2][3] B11[3][3] ymm5 = _mm256_permute2f128_pd(ymm12, ymm13, 0x20); //B11[4][1] B11[5][1] B11[6][1] B11[7][1] ymm7 = _mm256_permute2f128_pd(ymm12, ymm13, 0x31); //B11[4][3] B11[5][3] B11[6][3] B11[7][3] _mm256_storeu_pd((double *)(b11 + cs_b * 0), ymm0); //store B11[0][0-3] _mm256_storeu_pd((double *)(b11 + cs_b * 1), ymm1); //store B11[1][0-3] _mm256_storeu_pd((double *)(b11 + cs_b * 2), ymm2); //store B11[2][0-3] _mm256_storeu_pd((double *)(b11 + cs_b * 3), ymm3); //store B11[3][0-3] _mm256_storeu_pd((double *)(b11 + cs_b * 4), ymm4); //store B11[4][0-3] _mm256_storeu_pd((double *)(b11 + cs_b * 5), ymm5); //store B11[5][0-3] _mm256_storeu_pd((double *)(b11 + cs_b * 6), ymm6); //store B11[6][0-3] _mm256_storeu_pd((double *)(b11 + cs_b * 7), ymm7); //store B11[7][0-3] } if(m_remainder) //implementation for reamainder rows(when 'M' is not a multiple of D_MR) { a10 = L +i; //pointer to block of A to be used for GEMM a11 = L + i + (i*cs_a); //pointer to block of A to be used for TRSM b01 = B + j*cs_b; //pointer to block of B to be used for GEMM b11 = B + i + j* cs_b; //pointer to block of B to be used for TRSM k_iter = i / D_MR; //number of times GEMM operation to be done(in blocks of 4x4) ymm8 = _mm256_setzero_pd(); ymm9 = _mm256_setzero_pd(); ymm10 = _mm256_setzero_pd(); ymm11 = _mm256_setzero_pd(); ymm12 = _mm256_setzero_pd(); ymm13 = _mm256_setzero_pd(); ymm14 = _mm256_setzero_pd(); ymm15 = _mm256_setzero_pd(); ///GEMM code Begins/// for(k = 0; k< k_iter; k++) //loop for number of GEMM operations { ptr_b01_dup = b01; ymm16 = _mm256_loadu_pd((double const *)(a10)); //A10[0][0] A10[1][0] A10[2][0] A10[3][0] ymm4 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 0)); //B01[0][0] ymm5 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 1)); //B01[0][1] ymm6 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 2)); //B01[0][2] ymm7 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 3)); //B01[0][3] ymm0 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 4)); //B01[0][4] ymm1 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 5)); //B01[0][5] ymm2 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 6)); //B01[0][6] ymm3 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 7)); //B01[0][7] b01 += 1; //move to next row of B ymm8 = _mm256_fmadd_pd(ymm4, ymm16, ymm8); //ymm8 += (B01[0][0]*A10[0][0] B01[0][0]*A10[1][0] B01[0][0]*A10[2][0] B01[0][0]*A10[3][0] ) ymm9 = _mm256_fmadd_pd(ymm5, ymm16, ymm9); //ymm9 += (B01[0][1]*A10[0][0] B01[0][1]*A10[1][0] B01[0][1]*A10[2][0] B01[0][1]*A10[3][0]) ymm10 = _mm256_fmadd_pd(ymm6, ymm16, ymm10); //ymm10 += (B01[0][2]*A10[0][0] B01[0][2]*A10[1][0] B01[0][2]*A10[2][0] B01[0][2]*A10[3][0]) ymm11 = _mm256_fmadd_pd(ymm7, ymm16, ymm11); //ymm11 += (B01[0][3]*A10[0][0] B01[0][3]*A10[1][0] B01[0][3]*A10[2][0] B01[0][3]*A10[3][0]) ymm12 = _mm256_fmadd_pd(ymm0, ymm16, ymm12); //ymm12 += (B01[0][4]*A10[0][0] B01[0][4]*A10[1][0] B01[0][4]*A10[2][0] B01[0][4]*A10[3][0]) ymm13 = _mm256_fmadd_pd(ymm1, ymm16, ymm13); //ymm13 += (B01[0][5]*A10[0][0] B01[0][5]*A10[1][0] B01[0][5]*A10[2][0] B01[0][5]*A10[3][0]) ymm14 = _mm256_fmadd_pd(ymm2, ymm16, ymm14); //ymm14 += (B01[0][6]*A10[0][0] B01[0][6]*A10[1][0] B01[0][6]*A10[2][0] B01[0][6]*A10[3][0]) ymm15 = _mm256_fmadd_pd(ymm3, ymm16, ymm15); //ymm16 += (B01[0][7]*A10[0][0] B01[0][7]*A10[1][0] B01[0][7]*A10[2][0] B01[0][7]*A10[3][0]) ymm16 = _mm256_loadu_pd((double const *)(a10 + cs_a * 1)); //A10[0][1] A10[1][1] A10[2][1] A10[3][1] ymm4 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 0)); //B01[1][0] ymm5 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 1)); //B01[1][1] ymm6 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 2)); //B01[1][2] ymm7 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 3)); //B01[1][3] ymm0 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 4)); //B01[1][4] ymm1 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 5)); //B01[1][5] ymm2 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 6)); //B01[1][6] ymm3 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 7)); //B01[1][7] b01 += 1; //move to next row of B01 ymm8 = _mm256_fmadd_pd(ymm4, ymm16, ymm8); //ymm8 += (B01[1][0]*A10[0][1] B01[1][0]*A10[1][1] B01[1][0]*A10[2][1] B01[1][0]*A10[3][1]) ymm9 = _mm256_fmadd_pd(ymm5, ymm16, ymm9); //ymm9 += (B01[1][1]*A10[0][1] B01[1][1]*A10[1][1] B01[1][1]*A10[2][1] B01[1][1]*A10[3][1]) ymm10 = _mm256_fmadd_pd(ymm6, ymm16, ymm10); //ymm10 += (B01[1][2]*A10[0][1] B01[1][2]*A10[1][1] B01[1][2]*A10[2][1] B01[1][2]*A10[3][1]) ymm11 = _mm256_fmadd_pd(ymm7, ymm16, ymm11); //ymm11 += (B01[1][3]*A10[0][1] B01[1][3]*A10[1][1] B01[1][3]*A10[2][1] B01[1][3]*A10[3][1]) ymm12 = _mm256_fmadd_pd(ymm0, ymm16, ymm12); //ymm12 += (B01[1][4]*A10[0][1] B01[1][4]*A10[1][1] B01[1][4]*A10[2][1] B01[1][4]*A10[3][1]) ymm13 = _mm256_fmadd_pd(ymm1, ymm16, ymm13); //ymm13 += (B01[1][5]*A10[0][1] B01[1][5]*A10[1][1] B01[1][5]*A10[2][1] B01[1][5]*A10[3][1]) ymm14 = _mm256_fmadd_pd(ymm2, ymm16, ymm14); //ymm14 += (B01[1][6]*A10[0][1] B01[1][6]*A10[1][1] B01[1][6]*A10[2][1] B01[1][6]*A10[3][1]) ymm15 = _mm256_fmadd_pd(ymm3, ymm16, ymm15); //ymm15 += (B01[1][7]*A10[0][1] B01[1][7]*A10[1][1] B01[1][7]*A10[2][1] B01[1][7]*A10[3][1]) ymm16 = _mm256_loadu_pd((double const *)(a10 + cs_a * 2)); //A10[0][2] //A10[1][2] A10[2][2] A10[3][2] ymm4 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 0)); //B01[2][0] ymm5 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 1)); //B01[2][1] ymm6 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 2)); //B01[2][2] ymm7 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 3)); //B01[2][3] ymm0 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 4)); //B01[2][4] ymm1 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 5)); //B01[2][5] ymm2 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 6)); //B01[2][6] ymm3 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 7)); //B01[2][7] b01 += 1; //move to next row of B ymm8 = _mm256_fmadd_pd(ymm4, ymm16, ymm8); //ymm8 += (B01[2][0]*A10[0][2] B01[2][0]*A10[1][2] B01[2][0]*A10[2][2] B01[2][0]*A10[3][2]) ymm9 = _mm256_fmadd_pd(ymm5, ymm16, ymm9); //ymm9 += (B01[2][1]*A10[0][2] B01[2][1]*A10[1][2] B01[2][1]*A10[2][2] B01[2][1]*A10[3][2]) ymm10 = _mm256_fmadd_pd(ymm6, ymm16, ymm10); //ymm10 += (B01[2][2]*A10[0][2] B01[2][2]*A10[1][2] B01[2][2]*A10[2][2] B01[2][2]*A10[3][2]) ymm11 = _mm256_fmadd_pd(ymm7, ymm16, ymm11); //ymm11 += (B01[2][3]*A10[0][2] B01[2][3]*A10[1][2] B01[2][3]*A10[2][2] B01[2][3]*A10[3][2]) ymm12 = _mm256_fmadd_pd(ymm0, ymm16, ymm12); //ymm12 += (B01[2][4]*A10[0][2] B01[2][4]*A10[1][2] B01[2][4]*A10[2][2] B01[2][0]*A10[3][2]) ymm13 = _mm256_fmadd_pd(ymm1, ymm16, ymm13); //ymm13 += (B01[2][5]*A10[0][2] B01[2][5]*A10[1][2] B01[2][5]*A10[2][2] B01[2][1]*A10[3][2]) ymm14 = _mm256_fmadd_pd(ymm2, ymm16, ymm14); //ymm14 += (B01[2][6]*A10[0][2] B01[2][6]*A10[1][2] B01[2][6]*A10[2][2] B01[2][2]*A10[3][2]) ymm15 = _mm256_fmadd_pd(ymm3, ymm16, ymm15); //ymm15 += (B01[2][7]*A10[0][2] B01[2][7]*A10[1][2] B01[2][7]*A10[2][2] B01[2][3]*A10[3][2]) ymm16 = _mm256_loadu_pd((double const *)(a10 + cs_a * 3)); //A10[0][3] A10[1][3] A10[2][3] A10[3][3] ymm4 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 0)); //B01[3][0] ymm5 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 1)); //B01[3][1] ymm6 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 2)); //B01[3][2] ymm7 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 3)); //B01[3][3] ymm0 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 4)); //B01[3][4] ymm1 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 5)); //B01[3][5] ymm2 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 6)); //B01[3][6] ymm3 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 7)); //B01[3][7] b01 += 1; //move to next row of B ymm8 = _mm256_fmadd_pd(ymm4, ymm16, ymm8); //ymm8 += (B01[3][0]*A10[0][3] B01[3][0]*A10[1][3] B01[3][0]*A10[2][3] B01[3][0]*A10[3][3]) ymm9 = _mm256_fmadd_pd(ymm5, ymm16, ymm9); //ymm8 += (B01[3][1]*A10[0][3] B01[3][1]*A10[1][3] B01[3][1]*A10[2][3] B01[3][1]*A10[3][3]) ymm10 = _mm256_fmadd_pd(ymm6, ymm16, ymm10); //ymm8 += (B01[3][2]*A10[0][3] B01[3][2]*A10[1][3] B01[3][2]*A10[2][3] B01[3][2]*A10[3][3]) ymm11 = _mm256_fmadd_pd(ymm7, ymm16, ymm11); //ymm8 += (B01[3][3]*A10[0][3] B01[3][3]*A10[1][3] B01[3][3]*A10[2][3] B01[3][3]*A10[3][3]) ymm12 = _mm256_fmadd_pd(ymm0, ymm16, ymm12); //ymm8 += (B01[3][0]*A10[0][3] B01[3][4]*A10[1][3] B01[3][4]*A10[2][3] B01[3][4]*A10[3][3]) ymm13 = _mm256_fmadd_pd(ymm1, ymm16, ymm13); //ymm8 += (B01[3][1]*A10[0][3] B01[3][5]*A10[1][3] B01[3][5]*A10[2][3] B01[3][5]*A10[3][3]) ymm14 = _mm256_fmadd_pd(ymm2, ymm16, ymm14); //ymm8 += (B01[3][2]*A10[0][3] B01[3][6]*A10[1][3] B01[3][6]*A10[2][3] B01[3][6]*A10[3][3]) ymm15 = _mm256_fmadd_pd(ymm3, ymm16, ymm15); //ymm8 += (B01[3][3]*A10[0][3] B01[3][7]*A10[1][3] B01[3][7]*A10[2][3] B01[3][7]*A10[3][3]) a10 += D_MR * cs_a; //pointer math to find next block of A for GEMM b01 = ptr_b01_dup + D_MR; //pointer math to find next block of B for GEMM } ///GEMM code ends/// ymm16 = _mm256_broadcast_sd((double const *)&AlphaVal); //register to store alpha value ymm0 = _mm256_loadu_pd((double const *)(b11 + cs_b *0)); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] ymm1 = _mm256_loadu_pd((double const *)(b11 + cs_b *1)); //B11[0][1] B11[1][1] B11[2][1] B11[3][1] ymm2 = _mm256_loadu_pd((double const *)(b11 + cs_b *2)); //B11[0][2] B11[1][2] B11[2][2] B11[3][2] ymm3 = _mm256_loadu_pd((double const *)(b11 + cs_b *3)); //B11[0][3] B11[1][3] B11[2][3] B11[3][3] ymm4 = _mm256_loadu_pd((double const *)(b11 + cs_b *4)); //B11[0][4] B11[1][4] B11[2][4] B11[3][4] ymm5 = _mm256_loadu_pd((double const *)(b11 + cs_b *5)); //B11[0][5] B11[1][5] B11[2][5] B11[3][5] ymm6 = _mm256_loadu_pd((double const *)(b11 + cs_b *6)); //B11[0][6] B11[1][6] B11[2][6] B11[3][6] ymm7 = _mm256_loadu_pd((double const *)(b11 + cs_b *7)); //B11[0][7] B11[1][7] B11[2][7] B11[3][7] ymm0 = _mm256_fmsub_pd(ymm0, ymm16, ymm8); //B11[0-3][0] *alpha -= B01[0-3][0] ymm1 = _mm256_fmsub_pd(ymm1, ymm16, ymm9); //B11[0-3][1] *alpha -= B01[0-3][1] ymm2 = _mm256_fmsub_pd(ymm2, ymm16, ymm10); //B11[0-3][2] *alpha -= B01[0-3][2] ymm3 = _mm256_fmsub_pd(ymm3, ymm16, ymm11); //B11[0-3][3] *alpha -= B01[0-3][3] ymm4 = _mm256_fmsub_pd(ymm4, ymm16, ymm12); //B11[0-3][4] *alpha -= B01[0-3][4] ymm5 = _mm256_fmsub_pd(ymm5, ymm16, ymm13); //B11[0-3][5] *alpha -= B01[0-3][5] ymm6 = _mm256_fmsub_pd(ymm6, ymm16, ymm14); //B11[0-3][6] *alpha -= B01[0-3][6] ymm7 = _mm256_fmsub_pd(ymm7, ymm16, ymm15); //B11[0-3][7] *alpha -= B01[0-3][7] ///implement TRSM/// ///unpacklow/// ymm9 = _mm256_unpacklo_pd(ymm0, ymm1); //B11[0][0] B11[0][1] B11[2][0] B11[2][1] ymm11 = _mm256_unpacklo_pd(ymm2, ymm3); //B11[0][2] B11[0][3] B11[2][2] B11[2][3] ymm13 = _mm256_unpacklo_pd(ymm4, ymm5); //B11[0][4] B11[0][5] B11[1][4] B11[1][5] ymm15 = _mm256_unpacklo_pd(ymm6, ymm7); //B11[0][6] B11[0][7] B11[1][6] B11[1][7] //rearrange low elements ymm8 = _mm256_permute2f128_pd(ymm9,ymm11,0x20); //B11[0][0] B11[0][1] B11[0][2] B11[0][3] ymm10 = _mm256_permute2f128_pd(ymm9,ymm11,0x31); //B11[2][0] B11[2][1] B11[2][2] B11[2][3] ymm12 = _mm256_permute2f128_pd(ymm13,ymm15,0x20); //B11[4][0] B11[4][1] B11[4][2] B11[4][3] ymm14 = _mm256_permute2f128_pd(ymm13,ymm15,0x31); //B11[6][0] B11[6][1] B11[6][2] B11[6][3] ////unpackhigh//// ymm0 = _mm256_unpackhi_pd(ymm0, ymm1); //B11[1][0] B11[1][1] B11[3][0] B11[3][1] ymm1 = _mm256_unpackhi_pd(ymm2, ymm3); //B11[1][2] B11[1][3] B11[3][2] B11[3][3] ymm4 = _mm256_unpackhi_pd(ymm4, ymm5); //B11[5][0] B11[5][1] B11[7][0] B11[7][1] ymm5 = _mm256_unpackhi_pd(ymm6, ymm7); //B11[5][2] B11[5][3] B11[7][2] B11[7][3] //rearrange high elements ymm9 = _mm256_permute2f128_pd(ymm0,ymm1,0x20); //B11[1][0] B11[1][1] B11[1][2] B11[1][3] ymm11 = _mm256_permute2f128_pd(ymm0,ymm1,0x31); //B11[3][0] B11[3][1] B11[3][2] B11[3][3] ymm13 = _mm256_permute2f128_pd(ymm4,ymm5,0x20); //B11[5][0] B11[5][1] B11[5][2] B11[5][3] ymm15 = _mm256_permute2f128_pd(ymm4,ymm5,0x31); //B11[7][0] B11[7][1] B11[7][2] B11[7][3] ymm0 = _mm256_broadcast_sd((double const *)&ones); //broadcast diagonal elements of A11 ymm1 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][0] ymm2 = _mm256_broadcast_sd((double const *)(a11+ cs_b +1)); //A11[1][1] ymm3 = _mm256_broadcast_sd((double const *)(a11+cs_b*2 + 2)); //A11[2][2] ymm4 = _mm256_broadcast_sd((double const *)(a11+cs_b*3 + 3)); //A11[3][3] ymm5 = _mm256_unpacklo_pd(ymm1, ymm2); //A11[0][0] A11[0][0] A11[1][1] A11[1][1] ymm6 = _mm256_unpacklo_pd(ymm3, ymm4); //A11[2][2] A11[2][2] A11[3][3] A11[3][3] ymm5 = _mm256_blend_pd(ymm5, ymm6, 0x0C); //A11[0][0] A11[1][1] A11[2][2] A11[3][3] ymm0 = _mm256_div_pd(ymm0, ymm5); //1/A11[0][0] 1/A11[1][1] 1/A11[2][2] 1/A11[3][3] //extract a00 ymm1 = _mm256_permute_pd(ymm0, 0x00); //1/A11[0][0] 1/A11[0][0] 1/A11[2][2] 1/A11[2][2] ymm1 = _mm256_permute2f128_pd(ymm1, ymm1, 0x00); //1/A11[0][0] 1/A11[0][0] 1/A11[0][0] 1/A11[0][0] //(Row 0): perform mul operation of reciprocal of L(0,0) element with 1st row elements of B ymm8 = _mm256_mul_pd(ymm8, ymm1); //B11[0-3][0] /= A11[0][0] ymm12 = _mm256_mul_pd(ymm12, ymm1); //B11[0-3][4] /= A11[0][0] //extract a11 ymm1 = _mm256_permute_pd(ymm0, 0x03); //1/A11[1][1] 1/A11[1][1] 1/A11[3][3] 1/A11[3][3] ymm1 = _mm256_permute2f128_pd(ymm1, ymm1, 0x00); //1/A11[1][1] 1/A11[1][1] 1/A11[1][1] 1/A11[1][1] ymm2 = _mm256_broadcast_sd((double const *)(a11 +1)); //A11[1][0] ymm3 = _mm256_broadcast_sd((double const *)(a11 +2)); //A11[2][0] ymm4 = _mm256_broadcast_sd((double const *)(a11 +3)); //A11[3][0] a11 += cs_a; //(Row1): FMA operations ymm9 = _mm256_fnmadd_pd(ymm2, ymm8, ymm9); //B11[1][0-3] -= B11[0-3][0]*A11[1][0] ymm10 = _mm256_fnmadd_pd(ymm3, ymm8, ymm10); //B11[2][0-3] -= B11[0-3][0]*A11[2][0] ymm11 = _mm256_fnmadd_pd(ymm4, ymm8, ymm11); //B11[3][0-3] -= B11[0-3][0]*A11[3][0] ymm13 = _mm256_fnmadd_pd(ymm2, ymm12, ymm13); //B11[5][0-3] -= B11[0-3][4]*A11[1][4] ymm14 = _mm256_fnmadd_pd(ymm3, ymm12, ymm14); //B11[6][0-3] -= B11[0-3][4]*A11[2][4] ymm15 = _mm256_fnmadd_pd(ymm4, ymm12, ymm15); //B11[7][0-3] -= B11[0-3][4]*A11[3][4] ymm9 = _mm256_mul_pd(ymm9, ymm1); //B11[0-3][1] /= A11[1][1] ymm13 = _mm256_mul_pd(ymm13, ymm1); //B11[0-3][5] /= A11[1][1] ymm3 = _mm256_broadcast_sd((double const *)(a11 +2)); //A11[2][1] ymm4 = _mm256_broadcast_sd((double const *)(a11 +3)); //A11[3][1] a11 += cs_a; //extract a22 ymm1 = _mm256_permute_pd(ymm0, 0x00); //1/A11[0][0] 1/A11[0][0] 1/A11[2][2] 1/A11[2][2] ymm1 = _mm256_permute2f128_pd(ymm1, ymm1, 0x11); //1/A11[2][2] 1/A11[2][2] 1/A11[2][2] 1/A11[2][2] //(ROw2): FMA operations ymm10 = _mm256_fnmadd_pd(ymm3, ymm9, ymm10); //B11[2][0-3] -= A11[2][1] * B11[0-3][1] ymm11 = _mm256_fnmadd_pd(ymm4, ymm9, ymm11); //B11[3][0-3] -= A11[3][1] * B11[0-3][1] ymm14 = _mm256_fnmadd_pd(ymm3, ymm13, ymm14); //B11[6][0-3] -= A11[2][1] * B11[0-3][5] ymm15 = _mm256_fnmadd_pd(ymm4, ymm13, ymm15); //B11[7][0-3] -= A11[3][1] * B11[0-3][5] //perform mul operation ymm10 = _mm256_mul_pd(ymm10, ymm1); //B11[0-3][2] /=A11[2][2] ymm14 = _mm256_mul_pd(ymm14, ymm1); //B11[0-3][6] /= A11[2][2] ymm4 = _mm256_broadcast_sd((double const *)(a11 +3)); //A11[3][2] a11 += cs_a; //extract a33 ymm1 = _mm256_permute_pd(ymm0, 0x0C); //1/A11[0][0] 1/A11[0][0] 1/A11[3][3] 1/A11[3][3] ymm1 = _mm256_permute2f128_pd(ymm1, ymm1, 0x11); //1/A11[3][3] 1/A11[3][3] 1/A11[3][3] 1/A11[3][3] //(ROw2): FMA operations ymm11 = _mm256_fnmadd_pd(ymm4, ymm10, ymm11); //B11[0-3][3] -= A11[3][2]*B11[0-3][2] ymm15 = _mm256_fnmadd_pd(ymm4, ymm14, ymm15); //B11[0-3][7] -= A11[3][2]*B11[0-3][6] //perform mul operation ymm11 = _mm256_mul_pd(ymm11, ymm1); //B11[0-3][3] /= A11[3][3] ymm15 = _mm256_mul_pd(ymm15, ymm1); //B11[0-3][7] /= A11[3][3] //unpacklow// ymm1 = _mm256_unpacklo_pd(ymm8, ymm9); //B11[0][0] B11[1][0] B11[0][2] B11[1][2] ymm3 = _mm256_unpacklo_pd(ymm10, ymm11); //B11[2][0] B11[3][0] B11[2][2] B11[3][2] ymm5 = _mm256_unpacklo_pd(ymm12, ymm13); //B11[4][0] B11[5][0] B11[4][2] B11[5][2] ymm7 = _mm256_unpacklo_pd(ymm14, ymm15); //B11[6][0] B11[7][0] B11[6][2] B11[7][2] //rearrange low elements ymm0 = _mm256_permute2f128_pd(ymm1, ymm3, 0x20); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] ymm2 = _mm256_permute2f128_pd(ymm1, ymm3, 0x31); //B11[0][2] B11[1][2] B11[2][2] B11[3][2] ymm4 = _mm256_permute2f128_pd(ymm5, ymm7, 0x20); //B11[0][4] B11[1][4] B11[2][4] B11[3][4] ymm6 = _mm256_permute2f128_pd(ymm5, ymm7, 0x31); //B11[0][6] B11[1][6] B11[2][6] B11[3][6] ///unpack high/// ymm8 = _mm256_unpackhi_pd(ymm8, ymm9); //B11[0][1] B11[1][1] B11[0][3] B11[1][3] ymm9 = _mm256_unpackhi_pd(ymm10, ymm11); //B11[2][1] B11[3][1] B11[2][3] B11[3][3] ymm12 = _mm256_unpackhi_pd(ymm12, ymm13); //B11[0][5] B11[1][5] B11[0][7] B11[1][7] ymm13 = _mm256_unpackhi_pd(ymm14, ymm15); //B11[2][5] B11[3][5] B11[2][7] B11[3][7] //rearrange high elements ymm1 = _mm256_permute2f128_pd(ymm8, ymm9, 0x20); //B11[0][1] B11[1][1] B11[2][1] B11[3][1] ymm3 = _mm256_permute2f128_pd(ymm8, ymm9, 0x31); //B11[0][3] B11[1][3] B11[2][3] B11[3][3] ymm5 = _mm256_permute2f128_pd(ymm12, ymm13, 0x20); //B11[0][5] B11[1][5] B11[2][5] B11[3][5] ymm7 = _mm256_permute2f128_pd(ymm12, ymm13, 0x31); //B11[0][7] B11[1][7] B11[2][7] B11[3][7] ymm8 = _mm256_loadu_pd((double const *)(b11 + cs_b * 0)); //load B11[0-3][0] ymm9 = _mm256_loadu_pd((double const *)(b11 + cs_b * 1)); //load B11[0-3][1] ymm10 = _mm256_loadu_pd((double const *)(b11 + cs_b * 2)); //load B11[0-3][2] ymm11 = _mm256_loadu_pd((double const *)(b11 + cs_b * 3)); //load B11[0-3][3] ymm12 = _mm256_loadu_pd((double const *)(b11 + cs_b * 4)); //load B11[0-3][4] ymm13 = _mm256_loadu_pd((double const *)(b11 + cs_b * 5)); //load B11[0-3][5] ymm14 = _mm256_loadu_pd((double const *)(b11 + cs_b * 6)); //load B11[0-3][6] ymm15 = _mm256_loadu_pd((double const *)(b11 + cs_b * 7)); //load B11[0-3][7] //determine correct values to store if(m_remainder == 3) { ymm0 = _mm256_blend_pd(ymm0, ymm8, 0x08); ymm1 = _mm256_blend_pd(ymm1, ymm9, 0x08); ymm2 = _mm256_blend_pd(ymm2, ymm10, 0x08); ymm3 = _mm256_blend_pd(ymm3, ymm11, 0x08); ymm4 = _mm256_blend_pd(ymm4, ymm12, 0x08); ymm5 = _mm256_blend_pd(ymm5, ymm13, 0x08); ymm6 = _mm256_blend_pd(ymm6, ymm14, 0x08); ymm7 = _mm256_blend_pd(ymm7, ymm15, 0x08); } if(m_remainder == 2) { ymm0 = _mm256_permute2f128_pd(ymm0, ymm8, 0x30); ymm1 = _mm256_permute2f128_pd(ymm1, ymm9, 0x30); ymm2 = _mm256_permute2f128_pd(ymm2, ymm10, 0x30); ymm3 = _mm256_permute2f128_pd(ymm3, ymm11, 0x30); ymm4 = _mm256_permute2f128_pd(ymm4, ymm12, 0x30); ymm5 = _mm256_permute2f128_pd(ymm5, ymm13, 0x30); ymm6 = _mm256_permute2f128_pd(ymm6, ymm14, 0x30); ymm7 = _mm256_permute2f128_pd(ymm7, ymm15, 0x30); } if(m_remainder == 1) { ymm0 = _mm256_blend_pd(ymm0, ymm8, 0x0E); ymm1 = _mm256_blend_pd(ymm1, ymm9, 0x0E); ymm2 = _mm256_blend_pd(ymm2, ymm10, 0x0E); ymm3 = _mm256_blend_pd(ymm3, ymm11, 0x0E); ymm4 = _mm256_blend_pd(ymm4, ymm12, 0x0E); ymm5 = _mm256_blend_pd(ymm5, ymm13, 0x0E); ymm6 = _mm256_blend_pd(ymm6, ymm14, 0x0E); ymm7 = _mm256_blend_pd(ymm7, ymm15, 0x0E); } _mm256_storeu_pd((double *)(b11 + cs_b * 0), ymm0); //store(B11[0-3][0]) _mm256_storeu_pd((double *)(b11 + cs_b * 1), ymm1); //store(B11[0-3][1]) _mm256_storeu_pd((double *)(b11 + cs_b * 2), ymm2); //store(B11[0-3][2]) _mm256_storeu_pd((double *)(b11 + cs_b * 3), ymm3); //store(B11[0-3][3]) _mm256_storeu_pd((double *)(b11 + cs_b * 4), ymm4); //store(B11[0-3][4]) _mm256_storeu_pd((double *)(b11 + cs_b * 5), ymm5); //store(B11[0-3][5]) _mm256_storeu_pd((double *)(b11 + cs_b * 6), ymm6); //store(B11[0-3][6]) _mm256_storeu_pd((double *)(b11 + cs_b * 7), ymm7); //store(B11[0-3][7]) } } if((n & 4)) //implementation for remainder columns(when 'N' is a multiple of 4) { for(i = 0;i+D_MR-1 < m; i += D_MR) //loop along 'M' direction { a10 = L +i; //pointer to block of A to be used for GEMM a11 = L + i + (i*cs_a); //pointer to block of A to be used for TRSM b01 = B + j*cs_b; //pointer to block of B to be used for GEMM b11 = B + i + j* cs_b; //pointer to block of B to be used for TRSM k_iter = i / D_MR; //number of times GEMM to be performed(in block of 4) ///GEMM for previously calculated values /// //load 4x4 block from b11 ymm0 = _mm256_loadu_pd((double const *)(b11)); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] ymm1 = _mm256_loadu_pd((double const *)(b11 + cs_b)); //B11[0][1] B11[1][1] B11[2][1] B11[3][1] ymm2 = _mm256_loadu_pd((double const *)(b11 + cs_b*2)); //B11[0][2] B11[1][2] B11[2][2] B11[3][2] ymm3 = _mm256_loadu_pd((double const *)(b11 + cs_b*3)); //B11[0][3] B11[1][3] B11[2][3] B11[3][3] ymm4 = _mm256_setzero_pd(); ymm5 = _mm256_setzero_pd(); ymm6 = _mm256_setzero_pd(); ymm7 = _mm256_setzero_pd(); ymm16 = _mm256_broadcast_sd((double const *)&AlphaVal); //register to store alpha for(k = 0; k < k_iter; k++) //loop for number of GEMM operations { ptr_b01_dup = b01; ymm8 = _mm256_loadu_pd((double const *)(a10)); //A10[0][0] A10[1][0] A10[2][0] A10[3][0] ymm9 = _mm256_loadu_pd((double const *)(a10 + cs_a)); //A10[0][1] A10[1][1] A10[2][1] A10[3][1] ymm10 = _mm256_loadu_pd((double const *)(a10 + cs_a*2)); //A10[0][2] A10[1][2] A10[2][2] A10[3][2] ymm11 = _mm256_loadu_pd((double const *)(a10 + cs_a * 3)); //A10[0][3] A10[1][3] A10[2][3] A10[3][3] ymm12 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 0)); //B01[0][0] ymm13 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 1)); //B01[0][1] ymm14 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 2)); //B01[0][2] ymm15 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 3)); //B01[0][3] b01 += 1; //move to next row of B ymm4 = _mm256_fmadd_pd(ymm12, ymm8, ymm4); //ymm4 += (B01[0][0]*A10[0][0] B01[0][0]*A10[1][0] B01[0][0]*A10[2][0] B01[0][0]*A10[3][0]) ymm5 = _mm256_fmadd_pd(ymm13, ymm8, ymm5); //ymm5 += (B01[0][1]*A10[0][0] B01[0][1]*A10[1][0] B01[0][1]*A10[2][0] B01[0][1]*A10[3][0]) ymm6 = _mm256_fmadd_pd(ymm14, ymm8, ymm6); //ymm6 += (B01[0][2]*A10[0][0] B01[0][2]*A10[1][0] B01[0][2]*A10[2][0] B01[0][2]*A10[3][0]) ymm7 = _mm256_fmadd_pd(ymm15, ymm8, ymm7); //ymm7 += (B01[0][3]*A10[0][0] B01[0][3]*A10[1][0] B01[0][3]*A10[2][0] B01[0][3]*A10[3][0]) ymm12 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 0)); //B01[1][0] ymm13 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 1)); //B01[1][1] ymm14 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 2)); //B01[1][2] ymm15 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 3)); //B01[1][3] b01 += 1; ymm4 = _mm256_fmadd_pd(ymm12, ymm9, ymm4); //ymm4 += (B01[1][0]*A10[0][1] B01[1][0]*A10[1][1] B01[1][0]*A10[2][1] B01[1][0]*A10[3][1]) ymm5 = _mm256_fmadd_pd(ymm13, ymm9, ymm5); //ymm5 += (B01[1][1]*A10[0][1] B01[1][1]*A10[1][1] B01[1][1]*A10[2][1] B01[1][1]*A10[3][1]) ymm6 = _mm256_fmadd_pd(ymm14, ymm9, ymm6); //ymm6 += (B01[1][2]*A10[0][1] B01[1][2]*A10[1][1] B01[1][2]*A10[2][1] B01[1][2]*A10[3][1]) ymm7 = _mm256_fmadd_pd(ymm15, ymm9, ymm7); //ymm7 += (B01[1][3]*A10[0][1] B01[1][3]*A10[1][1] B01[1][3]*A10[2][1] B01[1][3]*A10[3][1]) ymm12 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 0)); //B01[2][0] ymm13 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 1)); //B01[2][1] ymm14 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 2)); //B01[2][2] ymm15 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 3)); //B01[2][3] b01 += 1; ymm4 = _mm256_fmadd_pd(ymm12, ymm10, ymm4); //ymm4 += (B01[2][0]*A10[0][2] B01[2][0]*A10[1][2] B01[2][0]*A10[2][2] B01[2][0]*A10[3][2]) ymm5 = _mm256_fmadd_pd(ymm13, ymm10, ymm5); //ymm5 += (B01[2][1]*A10[1][2] B01[2][1]*A10[1][2] B01[2][1]*A10[2][2] B01[2][1]*A10[3][2]) ymm6 = _mm256_fmadd_pd(ymm14, ymm10, ymm6); //ymm6 += (B01[2][2]*A10[2][2] B01[2][2]*A10[1][2] B01[2][2]*A10[2][2] B01[2][2]*A10[3][2]) ymm7 = _mm256_fmadd_pd(ymm15, ymm10, ymm7); //ymm7 += (B01[2][3]*A10[3][2] B01[2][3]*A10[1][2] B01[2][3]*A10[2][2] B01[2][3]*A10[3][2]) ymm12 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 0)); //B01[3][0] ymm13 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 1)); //B01[3][1] ymm14 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 2)); //B01[3][2] ymm15 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 3)); //B01[3][3] b01 += 1; ymm4 = _mm256_fmadd_pd(ymm12, ymm11, ymm4); //ymm4 += (B01[3][0]*A10[0][3] B01[3][0]*A10[1][3] B01[3][0]*A10[2][3] B01[3][0]*A10[3][3]) ymm5 = _mm256_fmadd_pd(ymm13, ymm11, ymm5); //ymm5 += (B01[3][1]*A10[0][3] B01[3][1]*A10[1][3] B01[3][1]*A10[2][3] B01[3][1]*A10[3][3]) ymm6 = _mm256_fmadd_pd(ymm14, ymm11, ymm6); //ymm6 += (B01[3][2]*A10[0][3] B01[3][2]*A10[1][3] B01[3][2]*A10[2][3] B01[3][2]*A10[3][3]) ymm7 = _mm256_fmadd_pd(ymm15, ymm11, ymm7); //ymm7 += (B01[3][3]*A10[0][3] B01[3][3]*A10[1][3] B01[3][3]*A10[2][3] B01[3][3]*A10[3][3]) a10 += D_MR * cs_a; //pointer math to find next block of A for GEMM b01 = ptr_b01_dup + D_MR; //pointer math to find next block of B for GEMM } ymm0 = _mm256_fmsub_pd(ymm0, ymm16, ymm4); //B11[0-3][0] *alpha -= ymm4 ymm1 = _mm256_fmsub_pd(ymm1, ymm16, ymm5); //B01[0-3][1] *alpha -= ymm5 ymm2 = _mm256_fmsub_pd(ymm2, ymm16, ymm6); //B01[0-3][2] *alpha -= ymm6 ymm3 = _mm256_fmsub_pd(ymm3, ymm16, ymm7); //B01[0-3][3] *alpha -= ymm7 ///implement TRSM/// //1st col ymm4 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][0] ymm5 = _mm256_broadcast_sd((double const *)(a11+1)); //A11[1][0] ymm6 = _mm256_broadcast_sd((double const *)(a11+2)); //A11[2][0] ymm7 = _mm256_broadcast_sd((double const *)(a11+3)); //A11[3][0] //2nd col a11 += cs_a; ymm8 = _mm256_broadcast_sd((double const *)(a11 + 1)); //A11[1][1] ymm9 = _mm256_broadcast_sd((double const *)(a11 + 2)); //A11[2][1] ymm10 = _mm256_broadcast_sd((double const *)(a11 + 3)); //A11[3][1] //3rd col a11 += cs_a; ymm11 = _mm256_broadcast_sd((double const *)(a11 + 2)); //A11[2][2] ymm12 = _mm256_broadcast_sd((double const *)(a11 + 3)); //A11[3][2] //4th col a11 += cs_a; ymm13 = _mm256_broadcast_sd((double const *)(a11 + 3)); //A11[3][3] //compute reciprocals of L(i,i) and broadcast in registers ymm4 = _mm256_unpacklo_pd(ymm4, ymm8); //A11[0][0] A11[0][0] A11[2][2] A11[2][2] ymm8 = _mm256_unpacklo_pd(ymm11, ymm13); //A11[1][1] A11[1][1] A11[3][3] A11[3][3] ymm14 = _mm256_broadcast_sd((double const *)&ones); ymm4 = _mm256_blend_pd(ymm4, ymm8, 0x0C); //A11[0][0] A11[1][1] A11[2][2] A11[3][3] ymm14 = _mm256_div_pd(ymm14, ymm4); //1/A11[0][0] 1/A11[1][1] 1/A11[2][2] 1/A11[3][3] ////unpacklow//// ymm8 = _mm256_unpacklo_pd(ymm0, ymm1); //B11[0][0] B11[0][1] B11[2][0] B11[2][1] ymm13 = _mm256_unpacklo_pd(ymm2, ymm3); //B11[0][2] B11[0][3] B11[2][2] B11[2][3] //rearrange low elements ymm4 = _mm256_permute2f128_pd(ymm8,ymm13,0x20); //B11[0][0] B11[0][1] B11[0][2] B11[0][3] ymm11 = _mm256_permute2f128_pd(ymm8,ymm13,0x31);//B11[2][0] B11[2][1] B11[2][2] B11[2][3] /* mat_b_rearr[0] = _mm256_mul_pd(mat_b_rearr[0], alphaReg); mat_b_rearr[2] = _mm256_mul_pd(mat_b_rearr[2], alphaReg); */ ////unpackhigh//// ymm0 = _mm256_unpackhi_pd(ymm0, ymm1); //B11[1][0] B11[1][1] B11[3][0] B11[3][1] ymm1 = _mm256_unpackhi_pd(ymm2, ymm3); //B11[1][2] B11[1][3] B11[3][2] B11[3][3] //rearrange high elements ymm8 = _mm256_permute2f128_pd(ymm0,ymm1,0x20); //B11[1][0] B11[1][1] B11[1][2] B11[1][3] ymm13 = _mm256_permute2f128_pd(ymm0,ymm1,0x31); //B11[3][0] B11[3][1] B11[3][2] B11[3][3] /* mat_b_rearr[1] = _mm256_mul_pd(mat_b_rearr[1], alphaReg); mat_b_rearr[3] = _mm256_mul_pd(mat_b_rearr[3], alphaReg); */ //extract a00 ymm15 = _mm256_permute_pd(ymm14, 0x00); //1/A11[0][0] 1/A11[0][0] 1/A11[2][2] 1/A11[2][2] ymm15 = _mm256_permute2f128_pd(ymm15, ymm15, 0x00); //1/A11[0][0] 1/A11[0][0] 1/A11[0][0] 1/A11[0][0] //(Row0): Perform mul operation of reciprocal of L(0,0) element with 1st row elements of B ymm4 = _mm256_mul_pd(ymm4, ymm15); //B11[0][0-3] /= A11[0][0] //extract diag a11 from a ymm15 = _mm256_permute_pd(ymm14, 0x03); //1/A11[1][1] 1/A11[1][1] 1/A11[3][3] 1/A11[3][3] ymm15 = _mm256_permute2f128_pd(ymm15, ymm15, 0x00); //1/A11[1][1] 1/A11[1][1] 1/A11[1][1] 1/A11[1][1] //(Row1): FMA operations of b1 with elements of indices from (1, 0) uptill (3, 0) ymm8 = _mm256_fnmadd_pd(ymm5, ymm4, ymm8);//d = c - (a*b) //B11[1][0-3] -= A11[1][0]*B11[0][0-3] ymm11 = _mm256_fnmadd_pd(ymm6, ymm4, ymm11);//d = c - (a*b) //B11[2][0-3] -= A11[2][0]*B11[0][0-3] ymm13 = _mm256_fnmadd_pd(ymm7, ymm4, ymm13);//d = c - (a*b) //B11[3][0-3] -= A11[3][0]*B11[0][0-3] //Perform mul operation of reciprocal of L(1,1) element with 2nd row elements of B ymm8 = _mm256_mul_pd(ymm8, ymm15); //B11[1][0-3] /= A11[1][1] //extract diag a22 from a ymm15 = _mm256_permute_pd(ymm14, 0x00); //1/A11[0][0] 1/A11[0][0] 1/A11[2][2] 1/A11[2][2] ymm15 = _mm256_permute2f128_pd(ymm15, ymm15, 0x11); //1/A11[2][2] 1/A11[2][2] 1/A11[2][2] 1/A11[2][2] //(Row2): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) ymm11 = _mm256_fnmadd_pd(ymm9, ymm8, ymm11);//d = c - (a*b) //B11[2][0-3] -= A11[2][1]*B11[1][0-3] ymm13 = _mm256_fnmadd_pd(ymm10, ymm8, ymm13);//d = c - (a*b) //B11[3][0-3] -= A11[3][1]*B11[1][0-3] //Perform mul operation of reciprocal of L(2, 2) element with 3rd row elements of B ymm11 = _mm256_mul_pd(ymm11, ymm15); //B11[2][0-3] /= A11[2][2] //extract diag a33 from a ymm15 = _mm256_permute_pd(ymm14, 0x0C); //1/A11[0][0] 1/A11[0][0] 1/A11[3][3] 1/A11[3][3] ymm15 = _mm256_permute2f128_pd(ymm15, ymm15, 0x11); //1/A11[3][3] 1/A11[3][3] 1/A11[3][3] 1/A11[3][3] //(Row3): FMA operations of b3 with elements of indices from (3, 0) uptill (7, 0) ymm13 = _mm256_fnmadd_pd(ymm12, ymm11, ymm13);//d = c - (a*b) //B11[3][0-3] -= A11[3][2]*B11[2][0-3] //Perform mul operation of reciprocal of L(3, 3) element with 4rth row elements of B ymm13 = _mm256_mul_pd(ymm13, ymm15); //B11[3][0-3] /= A11[3][3] //--> Transpose and store results of columns of B block <--// ////unpacklow//// ymm1 = _mm256_unpacklo_pd(ymm4, ymm8); //B11[0][0] B11[1][0] B11[0][2] B11[1][2] ymm3 = _mm256_unpacklo_pd(ymm11, ymm13); //B11[2][0] B11[3][0] B11[2][2] B11[3][2] //rearrange low elements ymm0 = _mm256_permute2f128_pd(ymm1,ymm3,0x20); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] ymm2 = _mm256_permute2f128_pd(ymm1,ymm3,0x31); //B11[0][2] B11[1][2] B11[2][2] B11[3][2] ////unpackhigh//// ymm14 = _mm256_unpackhi_pd(ymm4, ymm8); //B11[0][1] B11[1][1] B11[0][3] B11[1][3] ymm15 = _mm256_unpackhi_pd(ymm11, ymm13); //B11[2][1] B11[3][1] B11[2][3] B11[3][3] //rearrange high elements ymm1 = _mm256_permute2f128_pd(ymm14,ymm15,0x20); //B11[0][1] B11[1][1] B11[2][1] B11[3][1] ymm3 = _mm256_permute2f128_pd(ymm14,ymm15,0x31); //B11[0][3] B11[1][3] B11[2][3] B11[3][3] _mm256_storeu_pd((double *)b11, ymm0); //store(B11[0-3][0]) _mm256_storeu_pd((double *)(b11 + (cs_b)), ymm1); //store(B11[0-3][1]) _mm256_storeu_pd((double *)(b11 + cs_b*2), ymm2); //store(B11[0-3][2]) _mm256_storeu_pd((double *)(b11 + cs_b*3), ymm3); //store(B11[0-3][3]) } if(m_remainder) //implementation for remainder rows(when 'M' is not a multiple of D_MR) { a10 = L +i; //pointer to block of A to be used for GEMM a11 = L + i + (i*cs_a); //pointer to block of A to be used for TRSM b01 = B + j*cs_b; //pointer to block of B to be used for GEMM b11 = B + i + j* cs_b; //pointer to block of B to be used for TRSM ymm16 = _mm256_broadcast_sd((double const *)&AlphaVal); //register to store alpha k_iter = i / D_MR; //number of GEMM operations to be performed(in blocks of 4x4) ///GEMM for previously calculated values /// //load 4x4 block from b11 ymm0 = _mm256_loadu_pd((double const *)(b11)); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] ymm1 = _mm256_loadu_pd((double const *)(b11 + cs_b)); //B11[0][1] B11[1][1] B11[2][1] B11[3][1] ymm2 = _mm256_loadu_pd((double const *)(b11 + cs_b * 2)); //B11[0][2] B11[1][2] B11[2][2] B11[3][2] ymm3 = _mm256_loadu_pd((double const *)(b11 + cs_b * 3)); //B11[0][3] B11[1][3] B11[2][3] B11[3][3] ymm4 = _mm256_setzero_pd(); ymm5 = _mm256_setzero_pd(); ymm6 = _mm256_setzero_pd(); ymm7 = _mm256_setzero_pd(); for(k = 0; k < k_iter; k++) //looop for number of GEMM operations { ptr_b01_dup = b01; ymm8 = _mm256_loadu_pd((double const *)(a10)); //A10[0][0] A10[1][0] A10[2][0] A10[3][0] ymm9 = _mm256_loadu_pd((double const *)(a10 + cs_a)); //A10[0][1] A10[1][1] A10[2][1] A10[3][1] ymm10 = _mm256_loadu_pd((double const *)(a10 + cs_a * 2)); //A10[0][2] A10[1][2] A10[2][2] A10[3][2] ymm11 = _mm256_loadu_pd((double const *)(a10 + cs_a * 3)); //A10[0][3] A10[1][3] A10[2][3] A10[3][3] ymm12 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 0)); //B01[0][0] ymm13 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 1)); //B01[0][1] ymm14 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 2)); //B01[0][2] ymm15 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 3)); //B01[0][3] b01 += 1; ymm4 = _mm256_fmadd_pd(ymm12, ymm8, ymm4); //ymm4 += (B01[0][0]*A10[0][0] B01[0][0]*A10[1][0] B01[0][0]*A10[2][0] B01[0][0]*A10[3][0]) ymm5 = _mm256_fmadd_pd(ymm13, ymm8, ymm5); //ymm5 += (B01[0][1]*A10[0][0] B01[0][1]*A10[1][0] B01[0][1]*A10[2][0] B01[0][1]*A10[3][0]) ymm6 = _mm256_fmadd_pd(ymm14, ymm8, ymm6); //ymm6 += (B01[0][2]*A10[0][0] B01[0][2]*A10[1][0] B01[0][2]*A10[2][0] B01[0][2]*A10[3][0]) ymm7 = _mm256_fmadd_pd(ymm15, ymm8, ymm7); //ymm7 += (B01[0][3]*A10[0][0] B01[0][3]*A10[1][0] B01[0][3]*A10[2][0] B01[0][3]*A10[3][0]) ymm12 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 0)); //B01[1][0] ymm13 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 1)); //B01[1][1] ymm14 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 2)); //B01[1][2] ymm15 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 3)); //B01[1][3] b01 += 1; ymm4 = _mm256_fmadd_pd(ymm12, ymm9, ymm4); //ymm4 += (B01[1][0]*A10[0][1] B01[1][0]*A10[1][1] B01[1][0]*A10[2][1] B01[1][0]*A10[3][1]) ymm5 = _mm256_fmadd_pd(ymm13, ymm9, ymm5); //ymm5 += (B01[1][1]*A10[0][1] B01[1][1]*A10[1][1] B01[1][1]*A10[2][1] B01[1][1]*A10[3][1]) ymm6 = _mm256_fmadd_pd(ymm14, ymm9, ymm6); //ymm6 += (B01[1][2]*A10[0][1] B01[1][2]*A10[1][1] B01[1][2]*A10[2][1] B01[1][2]*A10[3][1]) ymm7 = _mm256_fmadd_pd(ymm15, ymm9, ymm7); //ymm7 += (B01[1][3]*A10[0][1] B01[1][3]*A10[1][1] B01[1][3]*A10[2][1] B01[1][3]*A10[3][1]) ymm12 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 0)); //B01[2][0] ymm13 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 1)); //B01[2][1] ymm14 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 2)); //B01[2][2] ymm15 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 3)); //B01[2][3] b01 += 1; ymm4 = _mm256_fmadd_pd(ymm12, ymm10, ymm4); //ymm4 += (B01[2][0]*A10[0][2] B01[2][0]*A10[1][2] B01[2][0]*A10[2][2] B01[2][0]*A10[3][2]) ymm5 = _mm256_fmadd_pd(ymm13, ymm10, ymm5); //ymm5 += (B01[2][1]*A10[0][2] B01[2][1]*A10[1][2] B01[2][1]*A10[2][2] B01[2][1]*A10[3][2]) ymm6 = _mm256_fmadd_pd(ymm14, ymm10, ymm6); //ymm6 += (B01[2][2]*A10[0][2] B01[2][2]*A10[1][2] B01[2][2]*A10[2][2] B01[2][2]*A10[3][2]) ymm7 = _mm256_fmadd_pd(ymm15, ymm10, ymm7); //ymm7 += (B01[2][3]*A10[0][2] B01[2][3]*A10[1][2] B01[2][3]*A10[2][2] B01[2][3]*A10[3][2]) ymm12 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 0)); //B01[3][0] ymm13 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 1)); //B01[3][1] ymm14 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 2)); //B01[3][2] ymm15 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 3)); //B01[3][3] b01 += 1; ymm4 = _mm256_fmadd_pd(ymm12, ymm11, ymm4); //ymm4 += (B01[3][0]*A10[0][3] B01[3][0]*A10[1][3] B01[3][0]*A10[2][3] B01[3][0]*A10[3][3]) ymm5 = _mm256_fmadd_pd(ymm13, ymm11, ymm5); //ymm5 += (B01[3][1]*A10[0][3] B01[3][1]*A10[1][3] B01[3][1]*A10[2][3] B01[3][1]*A10[3][3]) ymm6 = _mm256_fmadd_pd(ymm14, ymm11, ymm6); //ymm6 += (B01[3][2]*A10[0][3] B01[3][2]*A10[1][3] B01[3][2]*A10[2][3] B01[3][2]*A10[3][3]) ymm7 = _mm256_fmadd_pd(ymm15, ymm11, ymm7); //ymm7 += (B01[3][3]*A10[0][3] B01[3][3]*A10[1][3] B01[3][3]*A10[2][3] B01[3][3]*A10[3][3]) a10 += D_MR * cs_a; //pointer math to find next block of A for GEMM b01 = ptr_b01_dup + D_MR; //pointer math to find next block of B for GEMM } ymm0 = _mm256_fmsub_pd(ymm0, ymm16, ymm4); //B11[0-3][0] *alpha -= ymm4 ymm1 = _mm256_fmsub_pd(ymm1, ymm16, ymm5); //B11[0-3][1] *alpha -= ymm5 ymm2 = _mm256_fmsub_pd(ymm2, ymm16, ymm6); //B11[0-3][2] *alpha -= ymm6 ymm3 = _mm256_fmsub_pd(ymm3, ymm16, ymm7); //B11[0-3][3] *alpha -= ymm7 ///implement TRSM/// //1st col ymm4 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][0] ymm5 = _mm256_broadcast_sd((double const *)(a11+1)); //A11[1][0] ymm6 = _mm256_broadcast_sd((double const *)(a11+2)); //A11[2][0] ymm7 = _mm256_broadcast_sd((double const *)(a11+3)); //A11[3][0] //2nd col a11 += cs_a; ymm8 = _mm256_broadcast_sd((double const *)(a11 + 1)); //A11[1][1] ymm9 = _mm256_broadcast_sd((double const *)(a11 + 2)); //A11[2][1] ymm10 = _mm256_broadcast_sd((double const *)(a11 + 3)); //A11[3][1] //3rd col a11 += cs_a; ymm11 = _mm256_broadcast_sd((double const *)(a11 + 2)); //A11[2][2] ymm12 = _mm256_broadcast_sd((double const *)(a11 + 3)); //A11[3][2] //4th col a11 += cs_a; ymm13 = _mm256_broadcast_sd((double const *)(a11 + 3)); //A11[3][3] //compute reciprocals of L(i,i) and broadcast in registers ymm4 = _mm256_unpacklo_pd(ymm4, ymm8); //A11[0][0] A11[0][0] A11[1][1] A11[1][1] ymm8 = _mm256_unpacklo_pd(ymm11, ymm13); //A11[2][2] A11[2][2] A11[3][3] A11[3][3] ymm14 = _mm256_broadcast_sd((double const *)&ones); ymm4 = _mm256_blend_pd(ymm4, ymm8, 0x0C); //A11[0][0] A11[1][1] A11[2][2] A11[3][3] ymm14 = _mm256_div_pd(ymm14, ymm4); //1/A11[0][0] 1/A11[1][1] 1/A11[2][2] 1/A11[3][3] ////unpacklow//// ymm8 = _mm256_unpacklo_pd(ymm0, ymm1); //B11[0][0] B11[0][1] B11[2][0] B11[2][1] ymm13 = _mm256_unpacklo_pd(ymm2, ymm3); //B11[0][2] B11[0][3] B11[2][2] B11[2][3] //rearrange low elements ymm4 = _mm256_permute2f128_pd(ymm8,ymm13,0x20); //B11[0][0] B11[0][1] B11[0][2] B11[0][3] ymm11 = _mm256_permute2f128_pd(ymm8,ymm13,0x31);//B11[2][0] B11[2][1] B11[2][2] B11[2][3] /* mat_b_rearr[0] = _mm256_mul_pd(mat_b_rearr[0], alphaReg); mat_b_rearr[2] = _mm256_mul_pd(mat_b_rearr[2], alphaReg); */ ////unpackhigh//// ymm0 = _mm256_unpackhi_pd(ymm0, ymm1); //B11[1][0] B11[1][1] B11[3][0] B11[3][1] ymm1 = _mm256_unpackhi_pd(ymm2, ymm3); //B11[1][2] B11[1][3] B11[3][2] B11[3][3] //rearrange high elements ymm8 = _mm256_permute2f128_pd(ymm0,ymm1,0x20); //B11[1][0] B11[1][1] B11[1][2] B11[1][3] ymm13 = _mm256_permute2f128_pd(ymm0,ymm1,0x31); //B11[3][0] B11[3][1] B11[3][2] B11[3][3] /* mat_b_rearr[1] = _mm256_mul_pd(mat_b_rearr[1], alphaReg); mat_b_rearr[3] = _mm256_mul_pd(mat_b_rearr[3], alphaReg); */ //extract a00 ymm15 = _mm256_permute_pd(ymm14, 0x00); //1/A11[0][0] 1/A11[0][0] 1/A11[2][2] 1/A11[2][2] ymm15 = _mm256_permute2f128_pd(ymm15, ymm15, 0x00);//1/A11[0][0] 1/A11[0][0] 1/A11[0][0] 1/A11[0][0] //(Row0): Perform mul operation of reciprocal of L(0,0) element with 1st row elements of B ymm4 = _mm256_mul_pd(ymm4, ymm15); //B11[0][0-3] /= A11[0][0] //extract diag a11 from a ymm15 = _mm256_permute_pd(ymm14, 0x03); //1/A11[1][1] 1/A11[1][1] 1/A11[3][3] 1/A11[3][3] ymm15 = _mm256_permute2f128_pd(ymm15, ymm15, 0x00); //1/A11[][] 1/A11[1][1] 1/A11[1][1] 1/A11[1][1] //(Row1): FMA operations of b1 with elements of indices from (1, 0) uptill (3, 0) ymm8 = _mm256_fnmadd_pd(ymm5, ymm4, ymm8);//d = c - (a*b) //B11[1][0-3] -= A11[1][0]* B11[0][0-3] ymm11 = _mm256_fnmadd_pd(ymm6, ymm4, ymm11);//d = c - (a*b) //B11[2][0-3] -= A11[2][0]* B11[0][0-3] ymm13 = _mm256_fnmadd_pd(ymm7, ymm4, ymm13);//d = c - (a*b) //B11[3][0-3] -= A11[3][0]* B11[0][0-3] //Perform mul operation of reciprocal of L(1,1) element with 2nd row elements of B ymm8 = _mm256_mul_pd(ymm8, ymm15); //B11[1][0-3] /= A11[1][1] //extract diag a22 from a ymm15 = _mm256_permute_pd(ymm14, 0x00); //1/A11[0][0] 1/A11[0][0] 1/A11[2][2] 1/A11[2][2] ymm15 = _mm256_permute2f128_pd(ymm15, ymm15, 0x11); //1/A11[2][2] 1/A11[2][2] 1/A11[2][2] 1/A11[2][2] //(Row2): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) ymm11 = _mm256_fnmadd_pd(ymm9, ymm8, ymm11);//d = c - (a*b) //B11[2][0-3] -= A11[2][1]* B11[1][0-3] ymm13 = _mm256_fnmadd_pd(ymm10, ymm8, ymm13);//d = c - (a*b) //B11[3][0-3] -= A11[3][1]* B11[1][0-3] //Perform mul operation of reciprocal of L(2, 2) element with 3rd row elements of B ymm11 = _mm256_mul_pd(ymm11, ymm15); //B11[2][0-3] /= A11[2][2] //extract diag a33 from a ymm15 = _mm256_permute_pd(ymm14, 0x0C); //1/A11[0][0] 1/A11[0][0] 1/A11[3][3] 1/A11[3][3] ymm15 = _mm256_permute2f128_pd(ymm15, ymm15, 0x11); //1/A11[3][3] 1/A11[3][3] 1/A11[3][3] 1/A11[3][3] //(Row3): FMA operations of b3 with elements of indices from (3, 0) uptill (7, 0) ymm13 = _mm256_fnmadd_pd(ymm12, ymm11, ymm13);//d = c - (a*b) //B11[3][0-3] -= A11[3][2]* B11[2][0-3] //Perform mul operation of reciprocal of L(3, 3) element with 4rth row elements of B ymm13 = _mm256_mul_pd(ymm13, ymm15); //B11[3][0-3] /= A11[3][3] //--> Transpose and store results of columns of B block <--// ////unpacklow//// ymm1 = _mm256_unpacklo_pd(ymm4, ymm8); //B11[0][0] B11[1][0] B11[0][2] B11[1][2] ymm3 = _mm256_unpacklo_pd(ymm11, ymm13); //B11[2][0] B11[3][0] B11[2][2] B11[3][2] //rearrange low elements ymm0 = _mm256_permute2f128_pd(ymm1,ymm3,0x20); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] ymm2 = _mm256_permute2f128_pd(ymm1,ymm3,0x31); //B11[0][2] B11[1][2] B11[2][2] B11[3][2] ////unpackhigh//// ymm14 = _mm256_unpackhi_pd(ymm4, ymm8); //B11[0][1] B11[1][1] B11[0][3] B11[1][3] ymm15 = _mm256_unpackhi_pd(ymm11, ymm13); //B11[2][1] B11[3][1] B11[2][3] B11[3][3] //rearrange high elements ymm1 = _mm256_permute2f128_pd(ymm14,ymm15,0x20); //B11[0][1] B11[1][1] B11[2][1] B11[3][1] ymm3 = _mm256_permute2f128_pd(ymm14,ymm15,0x31); //B11[0][3] B11[1][3] B11[2][3] B11[3][3] //load 4x4 block from b11 ymm4 = _mm256_loadu_pd((double const *)(b11)); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] ymm5 = _mm256_loadu_pd((double const *)(b11 + cs_b)); //B11[0][1] B11[1][1] B11[2][1] B11[3][1] ymm6 = _mm256_loadu_pd((double const *)(b11 + cs_b * 2)); //B11[0][2] B11[1][2] B11[2][2] B11[3][2] ymm7 = _mm256_loadu_pd((double const *)(b11 + cs_b * 3)); //B11[0][3] B11[1][3] B11[2][2] B11[3][3] //determine correct values to store if(m_remainder == 3) { ymm0 = _mm256_blend_pd(ymm0, ymm4, 0x08); ymm1 = _mm256_blend_pd(ymm1, ymm5, 0x08); ymm2 = _mm256_blend_pd(ymm2, ymm6, 0x08); ymm3 = _mm256_blend_pd(ymm3, ymm7, 0x08); } if(m_remainder == 2) { ymm0 = _mm256_permute2f128_pd(ymm0, ymm4,0x30); ymm1 = _mm256_permute2f128_pd(ymm1, ymm5,0x30); ymm2 = _mm256_permute2f128_pd(ymm2, ymm6,0x30); ymm3 = _mm256_permute2f128_pd(ymm3, ymm7,0x30); } if(m_remainder == 1) { ymm0 = _mm256_blend_pd(ymm0, ymm4, 0x0E); ymm1 = _mm256_blend_pd(ymm1, ymm5, 0x0E); ymm2 = _mm256_blend_pd(ymm2, ymm6, 0x0E); ymm3 = _mm256_blend_pd(ymm3, ymm7, 0x0E); } _mm256_storeu_pd((double *)b11, ymm0); //store(B11[0-3][0]) _mm256_storeu_pd((double *)(b11 + (cs_b)), ymm1); //store(B11[0-3][1]) _mm256_storeu_pd((double *)(b11 + cs_b * 2), ymm2); //store(B11[0-3][2]) _mm256_storeu_pd((double *)(b11 + cs_b * 3), ymm3); //store(B11[0-3][3]) } n_remainder -= 4; j += 4; } if(n_remainder) //implementation fo remaining columns(when 'N' is not a multiple of D_NR) { for(i = 0;i+D_MR-1 < m; i += D_MR) //loop along 'M' direction { a10 = L +i; //pointer to block of A to be used for GEMM a11 = L + i + (i*cs_a); //pointer to block of A to be used for TRSM b01 = B + j*cs_b; //pointer to block of B to be used for GEMM b11 = B + i + j* cs_b; //pointer to block of B to be used for TRSM k_iter = i / D_MR; //number of GEMM operations to be performed(in blocks of 4x4) ymm16 = _mm256_broadcast_sd((double const *)&AlphaVal); //register to store alpha Value ///GEMM for previously calculated values /// //load 4x4 block from b11 if(n_remainder == 3) { ymm0 = _mm256_loadu_pd((double const *)(b11)); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] ymm1 = _mm256_loadu_pd((double const *)(b11 + cs_b)); //B11[0][1] B11[1][1] B11[2][1] B11[3][1] ymm2 = _mm256_loadu_pd((double const *)(b11 + cs_b * 2)); //B11[0][2] B11[1][2] B11[2][2] B11[3][2] ymm3 = _mm256_broadcast_sd((double const *)&ones); } if(n_remainder == 2) { ymm0 = _mm256_loadu_pd((double const *)(b11)); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] ymm1 = _mm256_loadu_pd((double const *)(b11 + cs_b)); //B11[0][1] B11[1][1] B11[2][1] B11[3][1] ymm2 = _mm256_broadcast_sd((double const *)&ones); ymm3 = _mm256_broadcast_sd((double const *)&ones); } if(n_remainder == 1) { ymm0 = _mm256_loadu_pd((double const *)(b11)); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] ymm1 = _mm256_broadcast_sd((double const *)&ones); ymm2 = _mm256_broadcast_sd((double const *)&ones); ymm3 = _mm256_broadcast_sd((double const*)&ones); } ymm4 = _mm256_setzero_pd(); ymm5 = _mm256_setzero_pd(); ymm6 = _mm256_setzero_pd(); ymm7 = _mm256_setzero_pd(); for(k = 0; k < k_iter; k++) { ptr_b01_dup = b01; ymm8 = _mm256_loadu_pd((double const *)(a10)); //A10[0][0] A10[1][0] A10[2][0] A10[3][0] ymm9 = _mm256_loadu_pd((double const *)(a10 + cs_a)); //A10[0][1] A10[1][1] A10[2][1] A10[3][1] ymm10 = _mm256_loadu_pd((double const *)(a10 + cs_a * 2)); //A10[0][2] A10[1][2] A10[2][2] A10[3][2] ymm11 = _mm256_loadu_pd((double const *)(a10 + cs_a * 3)); //A10[0][3] A10[1][3] A10[2][3] A10[3][3] ymm12 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 0)); //B01[0][0] ymm13 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 1)); //B01[0][1] ymm14 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 2)); //B01[0][2] ymm15 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 3)); //B01[0][3] b01 += 1; ymm4 = _mm256_fmadd_pd(ymm12, ymm8, ymm4); //ymm4 += (B01[0][0]*A10[0][0] B01[0][0]*A10[1][0] B01[0][0]*A10[2][0] B01[0][0]*A10[3][0]) ymm5 = _mm256_fmadd_pd(ymm13, ymm8, ymm5); //ymm5 += (B01[0][1]*A10[0][0] B01[0][1]*A10[1][0] B01[0][1]*A10[2][0] B01[0][1]*A10[3][0]) ymm6 = _mm256_fmadd_pd(ymm14, ymm8, ymm6); //ymm6 += (B01[0][2]*A10[0][0] B01[0][2]*A10[1][0] B01[0][2]*A10[2][0] B01[0][2]*A10[3][0]) ymm7 = _mm256_fmadd_pd(ymm15, ymm8, ymm7); //ymm7 += (B01[0][3]*A10[0][0] B01[0][3]*A10[1][0] B01[0][3]*A10[2][0] B01[0][3]*A10[3][0]) ymm12 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 0)); //B01[1][0] ymm13 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 1)); //B01[1][1] ymm14 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 2)); //B01[1][2] ymm15 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 3)); //B01[1][3] b01 += 1; ymm4 = _mm256_fmadd_pd(ymm12, ymm9, ymm4); //ymm4 += (B01[1][0]*A10[0][1] B01[1][0]*A10[1][1] B01[1][0]*A10[2][1] B01[1][0]*A10[3][1]) ymm5 = _mm256_fmadd_pd(ymm13, ymm9, ymm5); //ymm5 += (B01[1][1]*A10[0][1] B01[1][1]*A10[1][1] B01[1][1]*A10[2][1] B01[1][1]*A10[3][1]) ymm6 = _mm256_fmadd_pd(ymm14, ymm9, ymm6); //ymm6 += (B01[1][2]*A10[0][1] B01[1][2]*A10[1][1] B01[1][2]*A10[2][1] B01[1][2]*A10[3][1]) ymm7 = _mm256_fmadd_pd(ymm15, ymm9, ymm7); //ymm7 += (B01[1][3]*A10[0][1] B01[1][3]*A10[1][1] B01[1][3]*A10[2][1] B01[1][3]*A10[3][1]) ymm12 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 0)); //B01[2][0] ymm13 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 1)); //B01[2][1] ymm14 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 2)); //B01[2][2] ymm15 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 3)); //B01[2][3] b01 += 1; ymm4 = _mm256_fmadd_pd(ymm12, ymm10, ymm4); //ymm4 += (B01[2][0]*A10[0][2] B01[2][0]*A10[1][2] B01[2][0]*A10[2][2] B01[2][0]*A10[3][2]) ymm5 = _mm256_fmadd_pd(ymm13, ymm10, ymm5); //ymm5 += (B01[2][1]*A10[0][2] B01[2][1]*A10[1][2] B01[2][1]*A10[2][2] B01[2][1]*A10[3][2]) ymm6 = _mm256_fmadd_pd(ymm14, ymm10, ymm6); //ymm6 += (B01[2][2]*A10[0][2] B01[2][2]*A10[1][2] B01[2][2]*A10[2][2] B01[2][2]*A10[3][2]) ymm7 = _mm256_fmadd_pd(ymm15, ymm10, ymm7); //ymm7 += (B01[2][3]*A10[0][2] B01[2][3]*A10[1][2] B01[2][3]*A10[2][2] B01[2][3]*A10[3][2]) ymm12 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 0)); //B01[3][0] ymm13 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 1)); //B01[3][1] ymm14 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 2)); //B01[3][2] ymm15 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 3)); //B01[3][3] b01 += 1; ymm4 = _mm256_fmadd_pd(ymm12, ymm11, ymm4); //ymm4 += (B01[3][0]*A10[0][3] B01[3][0]*A10[1][3] B01[3][0]*A10[2][3] B01[3][0]*A10[3][3]) ymm5 = _mm256_fmadd_pd(ymm13, ymm11, ymm5); //ymm5 += (B01[3][1]*A10[0][3] B01[3][1]*A10[1][3] B01[3][1]*A10[2][3] B01[3][1]*A10[3][3]) ymm6 = _mm256_fmadd_pd(ymm14, ymm11, ymm6); //ymm6 += (B01[3][2]*A10[0][3] B01[3][2]*A10[1][3] B01[3][2]*A10[2][3] B01[3][2]*A10[3][3]) ymm7 = _mm256_fmadd_pd(ymm15, ymm11, ymm7); //ymm7 += (B01[3][3]*A10[0][3] B01[3][3]*A10[1][3] B01[3][3]*A10[2][3] B01[3][3]*A10[3][3]) a10 += D_MR * cs_a; //pointer math to find next block of A for GEMM b01 = ptr_b01_dup + D_MR; //pointer math to find next block of B for GEMM } ///GEMM code ends/// ymm0 = _mm256_fmsub_pd(ymm0, ymm16, ymm4); //B11[0-3][0] *alpha -= ymm4 ymm1 = _mm256_fmsub_pd(ymm1, ymm16, ymm5); //B11[0-3][1] *alpha -= ymm5 ymm2 = _mm256_fmsub_pd(ymm2, ymm16, ymm6); //B11[0-3][2] *alpha -= ymm6 ymm3 = _mm256_fmsub_pd(ymm3, ymm16, ymm7); //B11[0-3][3] *alpha -= ymm7 ///implement TRSM/// //1st col ymm4 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][0] ymm5 = _mm256_broadcast_sd((double const *)(a11+1)); //A11[1][0] ymm6 = _mm256_broadcast_sd((double const *)(a11+2)); //A11[2][0] ymm7 = _mm256_broadcast_sd((double const *)(a11+3)); //A11[3][0] //2nd col a11 += cs_a; ymm8 = _mm256_broadcast_sd((double const *)(a11 + 1)); //A11[1][1] ymm9 = _mm256_broadcast_sd((double const *)(a11 + 2)); //A11[2][1] ymm10 = _mm256_broadcast_sd((double const *)(a11 + 3)); //A11[3][1] //3rd col a11 += cs_a; ymm11 = _mm256_broadcast_sd((double const *)(a11 + 2)); //A11[2][2] ymm12 = _mm256_broadcast_sd((double const *)(a11 + 3)); //A11[3][2] //4th col a11 += cs_a; ymm13 = _mm256_broadcast_sd((double const *)(a11 + 3)); //A11[3][3] //compute reciprocals of L(i,i) and broadcast in registers ymm4 = _mm256_unpacklo_pd(ymm4, ymm8); //A11[0][0] A11[0][0] A11[1][1] A11[1][1] ymm8 = _mm256_unpacklo_pd(ymm11, ymm13); //A11[2][2] A11[2][2] A11[3][3] A11[3][3] ymm14 = _mm256_broadcast_sd((double const *)&ones); ymm4 = _mm256_blend_pd(ymm4, ymm8, 0x0C); //A11[0][0] A11[1][1] A11[2][2] A11[3][3] ymm14 = _mm256_div_pd(ymm14, ymm4); //1/A11[0][0] 1/A11[1][1] 1/A11[2][2] 1/A11[3][3] ////unpacklow//// ymm8 = _mm256_unpacklo_pd(ymm0, ymm1); //B11[0][0] B11[0][1] B11[2][0] B11[2][1] ymm13 = _mm256_unpacklo_pd(ymm2, ymm3); //B11[0][2] B11[0][3] B11[2][2] B11[2][3] //rearrange low elements ymm4 = _mm256_permute2f128_pd(ymm8,ymm13,0x20); //B11[0][0] B11[0][1] B11[0][2] B11[0][3] ymm11 = _mm256_permute2f128_pd(ymm8,ymm13,0x31);//B11[2][0] B11[2][1] B11[2][2] B11[2][3] /* mat_b_rearr[0] = _mm256_mul_pd(mat_b_rearr[0], alphaReg); mat_b_rearr[2] = _mm256_mul_pd(mat_b_rearr[2], alphaReg); */ ////unpackhigh//// ymm0 = _mm256_unpackhi_pd(ymm0, ymm1); //B11[1][0] B11[1][1] B11[3][0] B11[3][1] ymm1 = _mm256_unpackhi_pd(ymm2, ymm3); //B11[1][2] B11[1][3] B11[3][2] B11[3][3] //rearrange high elements ymm8 = _mm256_permute2f128_pd(ymm0,ymm1,0x20); //B11[1][0] B11[1][1] B11[1][2] B11[1][3] ymm13 = _mm256_permute2f128_pd(ymm0,ymm1,0x31); //B11[3][0] B11[3][1] B11[3][2] B11[3][3] /* mat_b_rearr[1] = _mm256_mul_pd(mat_b_rearr[1], alphaReg); mat_b_rearr[3] = _mm256_mul_pd(mat_b_rearr[3], alphaReg); */ //extract a00 ymm15 = _mm256_permute_pd(ymm14, 0x00); //1/A11[0][0] 1/A11[0][0] 1/A11[2][2] 1/A11[2][2] ymm15 = _mm256_permute2f128_pd(ymm15, ymm15, 0x00); //1/A11[0][0] 1/A11[0][0] 1/A11[0][0] 1/A11[0][0] //(Row0): Perform mul operation of reciprocal of L(0,0) element with 1st row elements of B ymm4 = _mm256_mul_pd(ymm4, ymm15); //B11[0][0-3] /= A11[0][0] //extract diag a11 from a ymm15 = _mm256_permute_pd(ymm14, 0x03); //1/A11[1][1] 1/A11[1][1] 1/A11[2][2] 1/A11[2][2] ymm15 = _mm256_permute2f128_pd(ymm15, ymm15, 0x00); //1/A11[1][1] 1/A11[1][1] 1/A11[1][1] 1/A11[1][1] //(Row1): FMA operations of b1 with elements of indices from (1, 0) uptill (3, 0) ymm8 = _mm256_fnmadd_pd(ymm5, ymm4, ymm8);//d = c - (a*b) //B11[1][0-3] -= A11[1][0] * B11[0][0-3] ymm11 = _mm256_fnmadd_pd(ymm6, ymm4, ymm11);//d = c - (a*b) //B11[2][0-3] -= A11[2][0] * B11[0][0-3] ymm13 = _mm256_fnmadd_pd(ymm7, ymm4, ymm13);//d = c - (a*b) //B11[3][0-3] -= A11[3][0] * B11[0][0-3] //Perform mul operation of reciprocal of L(1,1) element with 2nd row elements of B ymm8 = _mm256_mul_pd(ymm8, ymm15); //B11[1][0-3] /= A11[1][1] //extract diag a22 from a ymm15 = _mm256_permute_pd(ymm14, 0x00); //1/A11[0][0] 1/A11[0][0] 1/A11[2][2] 1/A11[2][2] ymm15 = _mm256_permute2f128_pd(ymm15, ymm15, 0x11); //1/A11[2][2] 1/A11[2][2] 1/A11[2][2] 1/A11[2][2] //(Row2): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) ymm11 = _mm256_fnmadd_pd(ymm9, ymm8, ymm11);//d = c - (a*b) //B11[2][0-3] -= A11[2][1] * B11[1][0-3] ymm13 = _mm256_fnmadd_pd(ymm10, ymm8, ymm13);//d = c - (a*b) //B11[3][0-3] -= A11[3][1] * B11[1][0-3] //Perform mul operation of reciprocal of L(2, 2) element with 3rd row elements of B ymm11 = _mm256_mul_pd(ymm11, ymm15); //B11[2][0-3] /= A11[2][2] //extract diag a33 from a ymm15 = _mm256_permute_pd(ymm14, 0x0C); //1/A11[0][0] 1/A11[0][0] 1/A11[3][3] 1/A11[3][3] ymm15 = _mm256_permute2f128_pd(ymm15, ymm15, 0x11); //1/A11[3][3] 1/A11[3][3] 1/A11[3][3] 1/A11[3][3] //(Row3): FMA operations of b3 with elements of indices from (3, 0) uptill (7, 0) ymm13 = _mm256_fnmadd_pd(ymm12, ymm11, ymm13);//d = c - (a*b) //B11[3][0-3] -= A11[3][2] * B11[2][0-3] //Perform mul operation of reciprocal of L(3, 3) element with 4rth row elements of B ymm13 = _mm256_mul_pd(ymm13, ymm15); //B11[3][0-3] /= A11[3][3] //--> Transpose and store results of columns of B block <--// ////unpacklow//// ymm1 = _mm256_unpacklo_pd(ymm4, ymm8); //B11[0][0] B11[1][0] B11[0][2] B11[1][2] ymm3 = _mm256_unpacklo_pd(ymm11, ymm13); //B11[2][0] B11[3][0] B11[2][2] B11[3][2] //rearrange low elements ymm0 = _mm256_permute2f128_pd(ymm1,ymm3,0x20); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] ymm2 = _mm256_permute2f128_pd(ymm1,ymm3,0x31); //B11[0][2] B11[1][2] B11[2][2] B11[3][2] ////unpackhigh//// ymm14 = _mm256_unpackhi_pd(ymm4, ymm8); //B11[0][1] B11[1][1] B11[0][3] B11[1][3] ymm15 = _mm256_unpackhi_pd(ymm11, ymm13); //B11[2][1] B11[3][1] B11[2][3] B11[3][3] //rearrange high elements ymm1 = _mm256_permute2f128_pd(ymm14,ymm15,0x20); //B11[0][1] B11[1][1] B11[2][1] B11[3][1] ymm3 = _mm256_permute2f128_pd(ymm14,ymm15,0x31); //B11[0][3] B11[1][3] B11[2][3] B11[3][3] if(n_remainder == 3) { _mm256_storeu_pd((double *)b11, ymm0); //store(B11[0-3][0]) _mm256_storeu_pd((double *)(b11 + (cs_b)), ymm1); //store(B11[0-3][1]) _mm256_storeu_pd((double *)(b11 + cs_b * 2), ymm2); //store(B11[0-3][2]) } if(n_remainder == 2) { _mm256_storeu_pd((double *)b11, ymm0); //store(B11[0-3][0]) _mm256_storeu_pd((double *)(b11 + (cs_b)), ymm1); //store(B11[0-3][1]) } if(n_remainder == 1) { _mm256_storeu_pd((double *)b11, ymm0); //store(B11[0-3][0]) } } if(m_remainder) //implementation for remainder rows(when 'M' is not a multiple of D_MR) { a10 = L +i; //pointer to block of A to be used for GEMM a11 = L + i + (i*cs_a); //pointer to block of A to be used for TRSM b01 = B + j*cs_b; //pointer to block of B to be used for GEMM b11 = B + i + j* cs_b; //pointer to block of B to be used for TRSM k_iter = i / D_MR; //number of times GEMM operations to be performed ymm16 = _mm256_broadcast_sd((double const *)&AlphaVal); //register to hold alpha value ///GEMM for previously calculated values /// //load 4x4 block from b11 if(n_remainder == 3) { ymm0 = _mm256_loadu_pd((double const *)(b11)); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] ymm1 = _mm256_loadu_pd((double const *)(b11 + cs_b)); //B11[0][1] B11[1][1] B11[2][1] B11[3][1] ymm2 = _mm256_loadu_pd((double const *)(b11 + cs_b * 2)); //B11[0][2] B11[1][2] B11[2][2] B11[3][2] ymm3 = _mm256_broadcast_sd((double const *)&ones); } if(n_remainder == 2) { ymm0 = _mm256_loadu_pd((double const *)(b11)); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] ymm1 = _mm256_loadu_pd((double const *)(b11 + cs_b)); //B11[0][1] B11[1][1] B11[2][1] B11[3][1] ymm2 = _mm256_broadcast_sd((double const *)&ones); ymm3 = _mm256_broadcast_sd((double const *)&ones); } if(n_remainder == 1) { ymm0 = _mm256_loadu_pd((double const *)(b11)); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] ymm1 = _mm256_broadcast_sd((double const *)&ones); ymm2 = _mm256_broadcast_sd((double const *)&ones); ymm3 = _mm256_broadcast_sd((double const *)&ones); } ymm4 = _mm256_setzero_pd(); ymm5 = _mm256_setzero_pd(); ymm6 = _mm256_setzero_pd(); ymm7 = _mm256_setzero_pd(); for(k = 0; k < k_iter; k++) //loop for number of GEMM operations { ptr_b01_dup = b01; ymm8 = _mm256_loadu_pd((double const *)(a10)); //A10[0][0] A10[1][0] A10[2][0] A10[3][0] ymm9 = _mm256_loadu_pd((double const *)(a10 + cs_a)); //A10[0][1] A10[1][1] A10[2][1] A10[3][1] ymm10 = _mm256_loadu_pd((double const *)(a10 + cs_a * 2)); //A10[0][2] A10[1][2] A10[2][2] A10[3][2] ymm11 = _mm256_loadu_pd((double const *)(a10 + cs_a * 3)); //A10[0][3] A10[1][3] A10[2][3] A10[3][3] ymm12 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 0)); //B10[0][0] ymm13 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 1)); //B10[0][1] ymm14 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 2)); //B10[0][2] ymm15 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 3)); //B10[0][3] b01 += 1; //move to next row of B ymm4 = _mm256_fmadd_pd(ymm12, ymm8, ymm4); //ymm4 += (B01[0][0]*A10[0][0] B01[0][0]*A10[1][0] B01[0][0]*A10[2][0] B01[0][0]*A10[3][0]) ymm5 = _mm256_fmadd_pd(ymm13, ymm8, ymm5); //ymm5 += (B01[0][1]*A10[0][0] B01[0][1]*A10[1][0] B01[0][1]*A10[2][0] B01[0][1]*A10[3][0]) ymm6 = _mm256_fmadd_pd(ymm14, ymm8, ymm6); //ymm6 += (B01[0][2]*A10[0][0] B01[0][2]*A10[1][0] B01[0][2]*A10[2][0] B01[0][2]*A10[3][0]) ymm7 = _mm256_fmadd_pd(ymm15, ymm8, ymm7); //ymm7 += (B01[0][3]*A10[0][0] B01[0][3]*A10[1][0] B01[0][3]*A10[2][0] B01[0][3]*A10[3][0]) ymm12 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 0)); //B10[1][0] ymm13 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 1)); //B10[1][1] ymm14 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 2)); //B10[1][2] ymm15 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 3)); //B10[1][3] b01 += 1; //move to next row of B ymm4 = _mm256_fmadd_pd(ymm12, ymm9, ymm4); //ymm4 += (B01[1][0]*A10[0][1] B01[1][0]*A10[1][1] B01[1][0]*A10[2][1] B01[1][0]*A10[3][1]) ymm5 = _mm256_fmadd_pd(ymm13, ymm9, ymm5); //ymm5 += (B01[1][1]*A10[0][1] B01[1][1]*A10[1][1] B01[1][1]*A10[2][1] B01[1][1]*A10[3][1]) ymm6 = _mm256_fmadd_pd(ymm14, ymm9, ymm6); //ymm6 += (B01[1][2]*A10[0][1] B01[1][2]*A10[1][1] B01[1][2]*A10[2][1] B01[1][2]*A10[3][1]) ymm7 = _mm256_fmadd_pd(ymm15, ymm9, ymm7); //ymm7 += (B01[1][3]*A10[0][1] B01[1][3]*A10[1][1] B01[1][3]*A10[2][1] B01[1][3]*A10[3][1]) ymm12 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 0)); //B10[2][0] ymm13 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 1)); //B10[2][1] ymm14 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 2)); //B10[2][2] ymm15 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 3)); //B10[2][3] b01 += 1; //move to next row of B ymm4 = _mm256_fmadd_pd(ymm12, ymm10, ymm4); //ymm4 += (B01[2][0]*A10[0][2] B01[2][0]*A10[1][2] B01[2][0]*A10[2][2] B01[2][0]*A10[3][2]) ymm5 = _mm256_fmadd_pd(ymm13, ymm10, ymm5); //ymm5 += (B01[2][1]*A10[0][2] B01[2][1]*A10[1][2] B01[2][1]*A10[2][2] B01[2][1]*A10[3][2]) ymm6 = _mm256_fmadd_pd(ymm14, ymm10, ymm6); //ymm6 += (B01[2][2]*A10[0][2] B01[2][2]*A10[1][2] B01[2][2]*A10[2][2] B01[2][2]*A10[3][2]) ymm7 = _mm256_fmadd_pd(ymm15, ymm10, ymm7); //ymm7 += (B01[2][3]*A10[0][2] B01[2][3]*A10[1][2] B01[2][3]*A10[2][2] B01[2][3]*A10[3][2]) ymm12 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 0)); //B10[3][0] ymm13 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 1)); //B10[3][1] ymm14 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 2)); //B10[3][2] ymm15 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 3)); //B10[3][3] b01 += 1; //move to next row of B ymm4 = _mm256_fmadd_pd(ymm12, ymm11, ymm4); //ymm4 += (B01[3][0]*A10[0][3] B01[3][0]*A10[1][3] B01[3][0]*A10[2][3] B01[3][0]*A10[3][3]) ymm5 = _mm256_fmadd_pd(ymm13, ymm11, ymm5); //ymm5 += (B01[3][1]*A10[0][3] B01[3][1]*A10[1][3] B01[3][1]*A10[2][3] B01[3][1]*A10[3][3]) ymm6 = _mm256_fmadd_pd(ymm14, ymm11, ymm6); //ymm6 += (B01[3][2]*A10[0][3] B01[3][2]*A10[1][3] B01[3][2]*A10[2][3] B01[3][2]*A10[3][3]) ymm7 = _mm256_fmadd_pd(ymm15, ymm11, ymm7); //ymm7 += (B01[3][3]*A10[0][3] B01[3][3]*A10[1][3] B01[3][3]*A10[2][3] B01[3][3]*A10[3][3]) a10 += D_MR * cs_a; //pointer math to find next block of A for GEMM b01 = ptr_b01_dup + D_MR; //pointer math to find next block of B for GEMM } ymm8 = _mm256_fmsub_pd(ymm0, ymm16, ymm4); //B11[0-3][0] * alpha -= ymm4 ymm9 = _mm256_fmsub_pd(ymm1, ymm16, ymm5); //B11[0-3][1] * alpha -= ymm5 ymm10 = _mm256_fmsub_pd(ymm2, ymm16, ymm6); //B11[0-3][2] * alpha -= ymm6 ymm11 = _mm256_fmsub_pd(ymm3, ymm16, ymm7); //B11[0-3][3] * alpha -= ymm7 ///implement TRSM/// //determine correct values to store if(m_remainder == 3) { ymm0 = _mm256_blend_pd(ymm8, ymm0, 0x08); ymm1 = _mm256_blend_pd(ymm9, ymm1, 0x08); ymm2 = _mm256_blend_pd(ymm10, ymm2, 0x08); ymm3 = _mm256_blend_pd(ymm11, ymm3, 0x08); } if(m_remainder == 2) { ymm0 = _mm256_permute2f128_pd(ymm8, ymm0, 0x30); ymm1 = _mm256_permute2f128_pd(ymm9, ymm1, 0x30); ymm2 = _mm256_permute2f128_pd(ymm10, ymm2, 0x30); ymm3 = _mm256_permute2f128_pd(ymm11, ymm3, 0x30); } if(m_remainder == 1) { ymm0 = _mm256_blend_pd(ymm8, ymm0, 0x0E); ymm1 = _mm256_blend_pd(ymm9, ymm1, 0x0E); ymm2 = _mm256_blend_pd(ymm10, ymm2, 0x0E); ymm3 = _mm256_blend_pd(ymm11, ymm3, 0x0E); } if(n_remainder == 3) { _mm256_storeu_pd((double *)b11, ymm0); //store(B11[0-3][0]) _mm256_storeu_pd((double *)(b11 + (cs_b)), ymm1); //store(B11[0-3][1]) _mm256_storeu_pd((double *)(b11 + cs_b * 2), ymm2); //store(B11[0-3][2]) } if(n_remainder == 2) { _mm256_storeu_pd((double *)b11, ymm0); //store(B11[0-3][0]) _mm256_storeu_pd((double *)(b11 + (cs_b)), ymm1); //store(B11[0-3][1]) } if(n_remainder == 1) { _mm256_storeu_pd((double *)b11, ymm0); //store(B11[0-3][0]) } ///scalar code for trsm without alpha/// dtrsm_small_AlXB(a11, b11, m_remainder, n_remainder, cs_a, cs_b); } } return BLIS_SUCCESS; } /* TRSM for the case AX = alpha * B, Double precision * A is lower-triangular, no-transpose, unit diagonal * dimensions A: mxm X: mxn B: mxn b01---> * ***************** ** * * * * * * * * * * * * * * *b01* * * * * * * * * * * a10 ****** b11 ***************** | * * * | * * * * * | * * * | * * * * * | *a10*a11* | *b11* * * * v * * * v * * * * * *********** ***************** * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * **************** ***************** a11---> */ static err_t bli_dtrsm_small_AlXB_unitDiag( side_t side, obj_t* AlphaObj, obj_t* a, obj_t* b, cntx_t* cntx, cntl_t* cntl ) { dim_t D_MR = 4; //size of block along 'M' dimpension dim_t D_NR = 8; //size of block along 'N' dimension dim_t m = bli_obj_length(b); // number of rows of matrix B dim_t n = bli_obj_width(b); // number of columns of matrix B #ifdef BLIS_ENABLE_SMALL_MATRIX_ROME if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_ROME) { return BLIS_NOT_YET_IMPLEMENTED; } #else if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_ALXB_NAPLES) { return BLIS_NOT_YET_IMPLEMENTED; } #endif dim_t m_remainder = m % D_MR; //number of remainder rows dim_t n_remainder = n % D_NR; //number of remainder columns dim_t cs_a = bli_obj_col_stride(a); // column stride of A dim_t cs_b = bli_obj_col_stride(b); // column stride of B dim_t i, j, k; //loop variables dim_t k_iter; //number of times GEMM to be performed double AlphaVal = *(double *)AlphaObj->buffer; //value of alpha double *L = a->buffer; //pointer to matrix A double *B = b->buffer; //pointer to matrix B double *a10, *a11, *b01, *b11; //pointers that point to blocks for GEMM and TRSM double *ptr_b01_dup; double ones = 1.0; //scratch registers __m256d ymm0, ymm1, ymm2, ymm3; __m256d ymm4, ymm5, ymm6, ymm7; __m256d ymm8, ymm9, ymm10, ymm11; __m256d ymm12, ymm13, ymm14, ymm15; __m256d ymm16; for(j = 0; j+D_NR-1 < n; j += D_NR) //loop along 'N' dimension { for(i = 0;i+D_MR-1 < m; i += D_MR) //loop along 'M' dimension { a10 = L +i; //pointer to block of A to be used for GEMM a11 = L + i + (i*cs_a); //pointer to block of A to be used for TRSM b01 = B + j*cs_b; //pointer to block of B to be used for GEMM b11 = B + i + j* cs_b; //pointer to block of B to be used for TRSM k_iter = i / D_MR; //number of times GEMM to be performed(in blocks of 4x4) ymm8 = _mm256_setzero_pd(); ymm9 = _mm256_setzero_pd(); ymm10 = _mm256_setzero_pd(); ymm11 = _mm256_setzero_pd(); ymm12 = _mm256_setzero_pd(); ymm13 = _mm256_setzero_pd(); ymm14 = _mm256_setzero_pd(); ymm15 = _mm256_setzero_pd(); ///GEMM code begins/// for(k = 0; k< k_iter; k++) //loop for number of GEMM operations { ptr_b01_dup = b01; ymm16 = _mm256_loadu_pd((double const *)(a10));//A10[0][0] A10[1][0] A10[2][0] A10[3][0] ymm4 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 0)); //B01[0][0] ymm5 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 1)); //B01[0][1] ymm6 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 2)); //B01[0][2] ymm7 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 3)); //B01[0][3] ymm0 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 4)); //B01[0][4] ymm1 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 5)); //B01[0][5] ymm2 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 6)); //B01[0][6] ymm3 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 7)); //B01[0][7] b01 += 1; //mobe to next row of B ymm8 = _mm256_fmadd_pd(ymm4, ymm16, ymm8); //ymm8 += (B01[0][0]*A10[0][0] B01[0][0]*A10[1][0] B01[0][0]*A10[2][0] B01[0][0]*A10[3][0]) ymm9 = _mm256_fmadd_pd(ymm5, ymm16, ymm9); //ymm9 += (B01[0][1]*A10[0][0] B01[0][1]*A10[1][0] B01[0][1]*A10[2][0] B01[0][1]*A10[3][0]) ymm10 = _mm256_fmadd_pd(ymm6, ymm16, ymm10); //ymm10 += (B01[0][2]*A10[0][0] B01[0][2]*A10[1][0] B01[0][2]*A10[2][0] B01[0][2]*A10[3][0]) ymm11 = _mm256_fmadd_pd(ymm7, ymm16, ymm11); //ymm11 += (B01[0][3]*A10[0][0] B01[0][3]*A10[1][0] B01[0][3]*A10[2][0] B01[0][3]*A10[3][0]) ymm12 = _mm256_fmadd_pd(ymm0, ymm16, ymm12); //ymm12 += (B01[0][4]*A10[0][0] B01[0][4]*A10[1][0] B01[0][4]*A10[2][0] B01[0][4]*A10[3][0]) ymm13 = _mm256_fmadd_pd(ymm1, ymm16, ymm13); //ymm13 += (B01[0][5]*A10[0][0] B01[0][5]*A10[1][0] B01[0][5]*A10[2][0] B01[0][5]*A10[3][0]) ymm14 = _mm256_fmadd_pd(ymm2, ymm16, ymm14); //ymm14 += (B01[0][6]*A10[0][0] B01[0][6]*A10[1][0] B01[0][6]*A10[2][0] B01[0][6]*A10[3][0]) ymm15 = _mm256_fmadd_pd(ymm3, ymm16, ymm15); //ymm15 += (B01[0][7]*A10[0][0] B01[0][7]*A10[1][0] B01[0][7]*A10[2][0] B01[0][7]*A10[3][0]) ymm16 = _mm256_loadu_pd((double const *)(a10 + cs_a));//A10[0][1] A10[1][1] A10[2][1] A10[3][1] ymm4 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 0)); //B01[1][0] ymm5 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 1)); //B01[1][1] ymm6 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 2)); //B01[1][2] ymm7 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 3)); //B01[1][3] ymm0 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 4)); //B01[1][4] ymm1 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 5)); //B01[1][5] ymm2 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 6)); //B01[1][6] ymm3 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 7)); //B01[1][7] b01 += 1; //mobe to next row of B ymm8 = _mm256_fmadd_pd(ymm4, ymm16, ymm8); //ymm8 += (B01[1][0]*A10[0][1] B01[1][0]*A10[1][1] B01[1][0]*A10[2][1] B01[1][0]*A10[3][1]) ymm9 = _mm256_fmadd_pd(ymm5, ymm16, ymm9); //ymm9 += (B01[1][1]*A10[0][1] B01[1][1]*A10[1][1] B01[1][1]*A10[2][1] B01[1][1]*A10[3][1]) ymm10 = _mm256_fmadd_pd(ymm6, ymm16, ymm10); //ymm10 += (B01[1][2]*A10[0][1] B01[1][2]*A10[1][1] B01[1][2]*A10[2][1] B01[1][2]*A10[3][1]) ymm11 = _mm256_fmadd_pd(ymm7, ymm16, ymm11); //ymm11 += (B01[1][3]*A10[0][1] B01[1][3]*A10[1][1] B01[1][3]*A10[2][1] B01[1][3]*A10[3][1]) ymm12 = _mm256_fmadd_pd(ymm0, ymm16, ymm12); //ymm12 += (B01[1][4]*A10[0][1] B01[1][4]*A10[1][1] B01[1][4]*A10[2][1] B01[1][4]*A10[3][1]) ymm13 = _mm256_fmadd_pd(ymm1, ymm16, ymm13); //ymm13 += (B01[1][5]*A10[0][1] B01[1][5]*A10[1][1] B01[1][5]*A10[2][1] B01[1][5]*A10[3][1]) ymm14 = _mm256_fmadd_pd(ymm2, ymm16, ymm14); //ymm14 += (B01[1][6]*A10[0][1] B01[1][6]*A10[1][1] B01[1][6]*A10[2][1] B01[1][6]*A10[3][1]) ymm15 = _mm256_fmadd_pd(ymm3, ymm16, ymm15); //ymm15 += (B01[1][7]*A10[0][1] B01[1][7]*A10[1][1] B01[1][7]*A10[2][1] B01[1][7]*A10[3][1]) ymm16 = _mm256_loadu_pd((double const *)(a10 + cs_a * 2));//A10[0][2] A10[1][2] A10[2][2] A10[3][2] ymm4 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 0)); //B01[2][0] ymm5 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 1)); //B01[2][1] ymm6 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 2)); //B01[2][2] ymm7 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 3)); //B01[2][3] ymm0 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 4)); //B01[2][4] ymm1 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 5)); //B01[2][5] ymm2 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 6)); //B01[2][6] ymm3 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 7)); //B01[2][7] b01 += 1; //mobe to next row of B ymm8 = _mm256_fmadd_pd(ymm4, ymm16, ymm8); //ymm8 += (B01[2][0]*A10[0][2] B01[2][0]*A10[1][2] B01[2][0]*A10[2][2] B01[2][0]*A10[3][2]) ymm9 = _mm256_fmadd_pd(ymm5, ymm16, ymm9); //ymm9 += (B01[2][1]*A10[0][2] B01[2][1]*A10[1][2] B01[2][1]*A10[2][2] B01[2][1]*A10[3][2]) ymm10 = _mm256_fmadd_pd(ymm6, ymm16, ymm10); //ymm10 += (B01[2][2]*A10[0][2] B01[2][2]*A10[1][2] B01[2][2]*A10[2][2] B01[2][2]*A10[3][2]) ymm11 = _mm256_fmadd_pd(ymm7, ymm16, ymm11); //ymm11 += (B01[2][3]*A10[0][2] B01[2][3]*A10[1][2] B01[2][3]*A10[2][2] B01[2][3]*A10[3][2]) ymm12 = _mm256_fmadd_pd(ymm0, ymm16, ymm12); //ymm12 += (B01[2][4]*A10[0][2] B01[2][4]*A10[1][2] B01[2][4]*A10[2][2] B01[2][4]*A10[3][2]) ymm13 = _mm256_fmadd_pd(ymm1, ymm16, ymm13); //ymm13 += (B01[2][5]*A10[0][2] B01[2][5]*A10[1][2] B01[2][5]*A10[2][2] B01[2][5]*A10[3][2]) ymm14 = _mm256_fmadd_pd(ymm2, ymm16, ymm14); //ymm14 += (B01[2][6]*A10[0][2] B01[2][6]*A10[1][2] B01[2][6]*A10[2][2] B01[2][6]*A10[3][2]) ymm15 = _mm256_fmadd_pd(ymm3, ymm16, ymm15); //ymm15 += (B01[2][7]*A10[0][2] B01[2][7]*A10[1][2] B01[2][7]*A10[2][2] B01[2][7]*A10[3][2]) ymm16 = _mm256_loadu_pd((double const *)(a10 + cs_a * 3));//A10[0][3] A10[1][3] A10[2][3] A10[3][3] ymm4 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 0)); //B01[3][0] ymm5 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 1)); //B01[3][1] ymm6 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 2)); //B01[3][2] ymm7 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 3)); //B01[3][3] ymm0 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 4)); //B01[3][4] ymm1 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 5)); //B01[3][5] ymm2 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 6)); //B01[3][6] ymm3 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 7)); //B01[3][7] b01 += 1; //mobe to next row of B ymm8 = _mm256_fmadd_pd(ymm4, ymm16, ymm8); //ymm8 += (B01[3][0]*A10[0][3] B01[3][0]*A10[3][0] B01[3][0]*A10[2][3] B01[3][0]*A10[3][0]) ymm9 = _mm256_fmadd_pd(ymm5, ymm16, ymm9); //ymm9 += (B01[3][1]*A10[0][3] B01[3][1]*A10[3][0] B01[3][1]*A10[2][3] B01[3][1]*A10[3][0]) ymm10 = _mm256_fmadd_pd(ymm6, ymm16, ymm10); //ymm10 += (B01[3][2]*A10[0][3] B01[3][2]*A10[3][0] B01[3][2]*A10[2][3] B01[3][2]*A10[3][0]) ymm11 = _mm256_fmadd_pd(ymm7, ymm16, ymm11); //ymm11 += (B01[3][3]*A10[0][3] B01[3][3]*A10[3][0] B01[3][3]*A10[2][3] B01[3][3]*A10[3][0]) ymm12 = _mm256_fmadd_pd(ymm0, ymm16, ymm12); //ymm12 += (B01[3][4]*A10[0][3] B01[3][4]*A10[3][0] B01[3][4]*A10[2][3] B01[3][4]*A10[3][3]) ymm13 = _mm256_fmadd_pd(ymm1, ymm16, ymm13); //ymm13 += (B01[3][5]*A10[0][3] B01[3][5]*A10[3][0] B01[3][5]*A10[2][3] B01[3][5]*A10[3][3]) ymm14 = _mm256_fmadd_pd(ymm2, ymm16, ymm14); //ymm14 += (B01[3][6]*A10[0][3] B01[3][6]*A10[3][0] B01[3][6]*A10[2][3] B01[3][6]*A10[3][3]) ymm15 = _mm256_fmadd_pd(ymm3, ymm16, ymm15); //ymm15 += (B01[3][7]*A10[0][3] B01[3][7]*A10[3][0] B01[3][7]*A10[2][3] B01[3][7]*A10[3][3]) a10 += D_MR * cs_a; //pointer math to calculate next block of A for GEMM b01 = ptr_b01_dup + D_MR; //pointer math to calculate next block of B for GEMM } ymm16 = _mm256_broadcast_sd((double const *)&AlphaVal); //register to hold alpha ymm0 = _mm256_loadu_pd((double const *)(b11 + cs_b *0)); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] ymm1 = _mm256_loadu_pd((double const *)(b11 + cs_b *1)); //B11[0][1] B11[1][1] B11[2][1] B11[3][1] ymm2 = _mm256_loadu_pd((double const *)(b11 + cs_b *2)); //B11[0][2] B11[1][2] B11[2][2] B11[3][2] ymm3 = _mm256_loadu_pd((double const *)(b11 + cs_b *3)); //B11[0][3] B11[1][3] B11[2][3] B11[3][3] ymm4 = _mm256_loadu_pd((double const *)(b11 + cs_b *4)); //B11[0][4] B11[1][4] B11[2][4] B11[3][4] ymm5 = _mm256_loadu_pd((double const *)(b11 + cs_b *5)); //B11[0][5] B11[1][5] B11[2][5] B11[3][5] ymm6 = _mm256_loadu_pd((double const *)(b11 + cs_b *6)); //B11[0][6] B11[1][6] B11[2][6] B11[3][6] ymm7 = _mm256_loadu_pd((double const *)(b11 + cs_b *7)); //B11[0][7] B11[1][7] B11[2][7] B11[3][7] ymm0 = _mm256_fmsub_pd(ymm0, ymm16, ymm8); //B11[0-3][0] * alpha -= B01[0-3][0] ymm1 = _mm256_fmsub_pd(ymm1, ymm16, ymm9); //B11[0-3][1] * alpha -= B01[0-3][1] ymm2 = _mm256_fmsub_pd(ymm2, ymm16, ymm10); //B11[0-3][2] * alpha -= B01[0-3][2] ymm3 = _mm256_fmsub_pd(ymm3, ymm16, ymm11); //B11[0-3][3] * alpha -= B01[0-3][3] ymm4 = _mm256_fmsub_pd(ymm4, ymm16, ymm12); //B11[0-3][4] * alpha -= B01[0-3][4] ymm5 = _mm256_fmsub_pd(ymm5, ymm16, ymm13); //B11[0-3][5] * alpha -= B01[0-3][5] ymm6 = _mm256_fmsub_pd(ymm6, ymm16, ymm14); //B11[0-3][6] * alpha -= B01[0-3][6] ymm7 = _mm256_fmsub_pd(ymm7, ymm16, ymm15); //B11[0-3][7] * alpha -= B01[0-3][7] ///implement TRSM/// ///transpose of B11// ///unpacklow/// ymm9 = _mm256_unpacklo_pd(ymm0, ymm1); //B11[0][0] B11[0][1] B11[2][0] B11[2][1] ymm11 = _mm256_unpacklo_pd(ymm2, ymm3); //B11[0][2] B11[0][3] B11[2][2] B11[2][3] ymm13 = _mm256_unpacklo_pd(ymm4, ymm5); //B11[0][4] B11[0][5] B11[2][4] B11[2][5] ymm15 = _mm256_unpacklo_pd(ymm6, ymm7); //B11[0][6] B11[0][7] B11[2][6] B11[2][7] //rearrange low elements ymm8 = _mm256_permute2f128_pd(ymm9,ymm11,0x20); //B11[0][0] B11[0][1] B11[0][2] B11[0][3] ymm10 = _mm256_permute2f128_pd(ymm9,ymm11,0x31); //B11[2][0] B11[2][1] B11[2][2] B11[2][3] ymm12 = _mm256_permute2f128_pd(ymm13,ymm15,0x20); //B11[4][0] B11[4][1] B11[4][2] B11[4][3] ymm14 = _mm256_permute2f128_pd(ymm13,ymm15,0x31); //B11[6][0] B11[6][1] B11[6][2] B11[6][3] ////unpackhigh//// ymm0 = _mm256_unpackhi_pd(ymm0, ymm1); //B11[1][0] B11[1][1] B11[3][0] B11[3][1] ymm1 = _mm256_unpackhi_pd(ymm2, ymm3); //B11[1][2] B11[1][3] B11[3][2] B11[3][3] ymm4 = _mm256_unpackhi_pd(ymm4, ymm5); //B11[1][4] B11[1][5] B11[3][4] B11[3][5] ymm5 = _mm256_unpackhi_pd(ymm6, ymm7); //B11[1][6] B11[1][7] B11[3][6] B11[3][7] //rearrange high elements ymm9 = _mm256_permute2f128_pd(ymm0,ymm1,0x20); //B11[1][0] B11[1][1] B11[1][2] B11[1][3] ymm11 = _mm256_permute2f128_pd(ymm0,ymm1,0x31); //B11[3][0] B11[3][1] B11[3][2] B11[3][3] ymm13 = _mm256_permute2f128_pd(ymm4,ymm5,0x20); //B11[5][0] B11[5][1] B11[5][2] B11[5][3] ymm15 = _mm256_permute2f128_pd(ymm4,ymm5,0x31); //B11[7][0] B11[7][1] B11[7][2] B11[7][3] //broadcast diagonal elements of A11 ymm1 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][0] ymm2 = _mm256_broadcast_sd((double const *)(a11+ cs_b +1)); //A11[1][1] ymm3 = _mm256_broadcast_sd((double const *)(a11+cs_b*2 + 2)); //A11[2][2] ymm4 = _mm256_broadcast_sd((double const *)(a11+cs_b*3 + 3)); //A11[3][3] ymm2 = _mm256_broadcast_sd((double const *)(a11 +1)); //A11[1][0] ymm3 = _mm256_broadcast_sd((double const *)(a11 +2)); //A11[2][0] ymm4 = _mm256_broadcast_sd((double const *)(a11 +3)); //A11[3][0] a11 += cs_a; //(Row1): FMA operations ymm9 = _mm256_fnmadd_pd(ymm2, ymm8, ymm9); //B11[1][0-3] -= A11[1][0] * B11[0-3][0] ymm10 = _mm256_fnmadd_pd(ymm3, ymm8, ymm10); //B11[2][0-3] -= A11[2][0] * B11[0-3][0] ymm11 = _mm256_fnmadd_pd(ymm4, ymm8, ymm11); //B11[3][0-3] -= A11[3][0] * B11[0-3][0] ymm13 = _mm256_fnmadd_pd(ymm2, ymm12, ymm13); //B11[5][0-3] -= A11[1][0] * B11[0-3][4] ymm14 = _mm256_fnmadd_pd(ymm3, ymm12, ymm14); //B11[6][0-3] -= A11[2][0] * B11[0-3][4] ymm15 = _mm256_fnmadd_pd(ymm4, ymm12, ymm15); //B11[7][0-3] -= A11[3][0] * B11[0-3][4] ymm3 = _mm256_broadcast_sd((double const *)(a11 +2)); //A11[2][1] ymm4 = _mm256_broadcast_sd((double const *)(a11 +3)); //A11[3][1] a11 += cs_a; //(ROw2): FMA operations ymm10 = _mm256_fnmadd_pd(ymm3, ymm9, ymm10); //B11[2][0-3] -= A11[2][1] * B11[0-3][1] ymm11 = _mm256_fnmadd_pd(ymm4, ymm9, ymm11); //B11[3][0-3] -= A11[3][1] * B11[0-3][1] ymm14 = _mm256_fnmadd_pd(ymm3, ymm13, ymm14); //B11[6][0-3] -= A11[2][1] * B11[0-3][5] ymm15 = _mm256_fnmadd_pd(ymm4, ymm13, ymm15); //B11[7][0-3] -= A11[3][1] * B11[0-3][5] ymm4 = _mm256_broadcast_sd((double const *)(a11 +3)); //A11[3][2] a11 += cs_a; //(ROw2): FMA operations ymm11 = _mm256_fnmadd_pd(ymm4, ymm10, ymm11); //B11[3][0-3] -= A11[3][2] * B11[0-3][2] ymm15 = _mm256_fnmadd_pd(ymm4, ymm14, ymm15); //B11[7][0-3] -= A11[3][2] * B11[0-3][6] //unpacklow// ymm1 = _mm256_unpacklo_pd(ymm8, ymm9); //B11[0][0] B11[1][0] B11[0][2] B11[1][2] ymm3 = _mm256_unpacklo_pd(ymm10, ymm11); //B11[2][0] B11[3][0] B11[2][2] B11[3][2] ymm5 = _mm256_unpacklo_pd(ymm12, ymm13); //B11[4][0] B11[5][0] B11[4][2] B11[5][2] ymm7 = _mm256_unpacklo_pd(ymm14, ymm15); //B11[6][0] B11[7][0] B11[6][2] B11[7][2] //rearrange low elements ymm0 = _mm256_permute2f128_pd(ymm1, ymm3, 0x20); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] ymm2 = _mm256_permute2f128_pd(ymm1, ymm3, 0x31); //B11[0][2] B11[1][2] B11[2][2] B11[3][2] ymm4 = _mm256_permute2f128_pd(ymm5, ymm7, 0x20); //B11[4][0] B11[5][0] B11[6][0] B11[7][0] ymm6 = _mm256_permute2f128_pd(ymm5, ymm7, 0x31); //B11[4][2] B11[5][2] B11[6][2] B11[7][2] ///unpack high/// ymm8 = _mm256_unpackhi_pd(ymm8, ymm9); //B11[0][1] B11[1][1] B11[0][3] B11[1][3] ymm9 = _mm256_unpackhi_pd(ymm10, ymm11); //B11[2][1] B11[3][1] B11[2][3] B11[3][3] ymm12 = _mm256_unpackhi_pd(ymm12, ymm13); //B11[4][1] B11[5][1] B11[4][3] B11[5][3] ymm13 = _mm256_unpackhi_pd(ymm14, ymm15); //B11[6][1] B11[7][1] B11[6][3] B11[7][3] //rearrange high elements ymm1 = _mm256_permute2f128_pd(ymm8, ymm9, 0x20); //B11[0][1] B11[1][1] B11[2][1] B11[3][1] ymm3 = _mm256_permute2f128_pd(ymm8, ymm9, 0x31); //B11[0][3] B11[1][3] B11[2][3] B11[3][3] ymm5 = _mm256_permute2f128_pd(ymm12, ymm13, 0x20); //B11[4][1] B11[5][1] B11[6][1] B11[7][1] ymm7 = _mm256_permute2f128_pd(ymm12, ymm13, 0x31); //B11[4][3] B11[5][3] B11[6][3] B11[7][3] _mm256_storeu_pd((double *)(b11 + cs_b * 0), ymm0); //store B11[0][0-3] _mm256_storeu_pd((double *)(b11 + cs_b * 1), ymm1); //store B11[1][0-3] _mm256_storeu_pd((double *)(b11 + cs_b * 2), ymm2); //store B11[2][0-3] _mm256_storeu_pd((double *)(b11 + cs_b * 3), ymm3); //store B11[3][0-3] _mm256_storeu_pd((double *)(b11 + cs_b * 4), ymm4); //store B11[4][0-3] _mm256_storeu_pd((double *)(b11 + cs_b * 5), ymm5); //store B11[5][0-3] _mm256_storeu_pd((double *)(b11 + cs_b * 6), ymm6); //store B11[6][0-3] _mm256_storeu_pd((double *)(b11 + cs_b * 7), ymm7); //store B11[7][0-3] } if(m_remainder) //implementation for reamainder rows(when 'M' is not a multiple of D_MR) { a10 = L +i; //pointer to block of A to be used for GEMM a11 = L + i + (i*cs_a); //pointer to block of A to be used for TRSM b01 = B + j*cs_b; //pointer to block of B to be used for GEMM b11 = B + i + j* cs_b; //pointer to block of B to be used for TRSM k_iter = i / D_MR; //number of times GEMM operation to be done(in blocks of 4x4) ymm8 = _mm256_setzero_pd(); ymm9 = _mm256_setzero_pd(); ymm10 = _mm256_setzero_pd(); ymm11 = _mm256_setzero_pd(); ymm12 = _mm256_setzero_pd(); ymm13 = _mm256_setzero_pd(); ymm14 = _mm256_setzero_pd(); ymm15 = _mm256_setzero_pd(); ///GEMM code Begins/// for(k = 0; k< k_iter; k++) //loop for number of GEMM operations { ptr_b01_dup = b01; ymm16 = _mm256_loadu_pd((double const *)(a10)); //A10[0][0] A10[1][0] A10[2][0] A10[3][0] ymm4 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 0)); //B01[0][0] ymm5 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 1)); //B01[0][1] ymm6 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 2)); //B01[0][2] ymm7 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 3)); //B01[0][3] ymm0 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 4)); //B01[0][4] ymm1 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 5)); //B01[0][5] ymm2 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 6)); //B01[0][6] ymm3 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 7)); //B01[0][7] b01 += 1; //move to next row of B ymm8 = _mm256_fmadd_pd(ymm4, ymm16, ymm8); //ymm8 += (B01[0][0]*A10[0][0] B01[0][0]*A10[1][0] B01[0][0]*A10[2][0] B01[0][0]*A10[3][0] ) ymm9 = _mm256_fmadd_pd(ymm5, ymm16, ymm9); //ymm9 += (B01[0][1]*A10[0][0] B01[0][1]*A10[1][0] B01[0][1]*A10[2][0] B01[0][1]*A10[3][0]) ymm10 = _mm256_fmadd_pd(ymm6, ymm16, ymm10); //ymm10 += (B01[0][2]*A10[0][0] B01[0][2]*A10[1][0] B01[0][2]*A10[2][0] B01[0][2]*A10[3][0]) ymm11 = _mm256_fmadd_pd(ymm7, ymm16, ymm11); //ymm11 += (B01[0][3]*A10[0][0] B01[0][3]*A10[1][0] B01[0][3]*A10[2][0] B01[0][3]*A10[3][0]) ymm12 = _mm256_fmadd_pd(ymm0, ymm16, ymm12); //ymm12 += (B01[0][4]*A10[0][0] B01[0][4]*A10[1][0] B01[0][4]*A10[2][0] B01[0][4]*A10[3][0]) ymm13 = _mm256_fmadd_pd(ymm1, ymm16, ymm13); //ymm13 += (B01[0][5]*A10[0][0] B01[0][5]*A10[1][0] B01[0][5]*A10[2][0] B01[0][5]*A10[3][0]) ymm14 = _mm256_fmadd_pd(ymm2, ymm16, ymm14); //ymm14 += (B01[0][6]*A10[0][0] B01[0][6]*A10[1][0] B01[0][6]*A10[2][0] B01[0][6]*A10[3][0]) ymm15 = _mm256_fmadd_pd(ymm3, ymm16, ymm15); //ymm16 += (B01[0][7]*A10[0][0] B01[0][7]*A10[1][0] B01[0][7]*A10[2][0] B01[0][7]*A10[3][0]) ymm16 = _mm256_loadu_pd((double const *)(a10 + cs_a * 1)); //A10[0][1] A10[1][1] A10[2][1] A10[3][1] ymm4 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 0)); //B01[1][0] ymm5 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 1)); //B01[1][1] ymm6 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 2)); //B01[1][2] ymm7 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 3)); //B01[1][3] ymm0 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 4)); //B01[1][4] ymm1 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 5)); //B01[1][5] ymm2 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 6)); //B01[1][6] ymm3 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 7)); //B01[1][7] b01 += 1; //move to next row of B01 ymm8 = _mm256_fmadd_pd(ymm4, ymm16, ymm8); //ymm8 += (B01[1][0]*A10[0][1] B01[1][0]*A10[1][1] B01[1][0]*A10[2][1] B01[1][0]*A10[3][1]) ymm9 = _mm256_fmadd_pd(ymm5, ymm16, ymm9); //ymm9 += (B01[1][1]*A10[0][1] B01[1][1]*A10[1][1] B01[1][1]*A10[2][1] B01[1][1]*A10[3][1]) ymm10 = _mm256_fmadd_pd(ymm6, ymm16, ymm10); //ymm10 += (B01[1][2]*A10[0][1] B01[1][2]*A10[1][1] B01[1][2]*A10[2][1] B01[1][2]*A10[3][1]) ymm11 = _mm256_fmadd_pd(ymm7, ymm16, ymm11); //ymm11 += (B01[1][3]*A10[0][1] B01[1][3]*A10[1][1] B01[1][3]*A10[2][1] B01[1][3]*A10[3][1]) ymm12 = _mm256_fmadd_pd(ymm0, ymm16, ymm12); //ymm12 += (B01[1][4]*A10[0][1] B01[1][4]*A10[1][1] B01[1][4]*A10[2][1] B01[1][4]*A10[3][1]) ymm13 = _mm256_fmadd_pd(ymm1, ymm16, ymm13); //ymm13 += (B01[1][5]*A10[0][1] B01[1][5]*A10[1][1] B01[1][5]*A10[2][1] B01[1][5]*A10[3][1]) ymm14 = _mm256_fmadd_pd(ymm2, ymm16, ymm14); //ymm14 += (B01[1][6]*A10[0][1] B01[1][6]*A10[1][1] B01[1][6]*A10[2][1] B01[1][6]*A10[3][1]) ymm15 = _mm256_fmadd_pd(ymm3, ymm16, ymm15); //ymm15 += (B01[1][7]*A10[0][1] B01[1][7]*A10[1][1] B01[1][7]*A10[2][1] B01[1][7]*A10[3][1]) ymm16 = _mm256_loadu_pd((double const *)(a10 + cs_a * 2)); //A10[0][2] //A10[1][2] A10[2][2] A10[3][2] ymm4 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 0)); //B01[2][0] ymm5 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 1)); //B01[2][1] ymm6 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 2)); //B01[2][2] ymm7 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 3)); //B01[2][3] ymm0 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 4)); //B01[2][4] ymm1 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 5)); //B01[2][5] ymm2 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 6)); //B01[2][6] ymm3 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 7)); //B01[2][7] b01 += 1; //move to next row of B ymm8 = _mm256_fmadd_pd(ymm4, ymm16, ymm8); //ymm8 += (B01[2][0]*A10[0][2] B01[2][0]*A10[1][2] B01[2][0]*A10[2][2] B01[2][0]*A10[3][2]) ymm9 = _mm256_fmadd_pd(ymm5, ymm16, ymm9); //ymm9 += (B01[2][1]*A10[0][2] B01[2][1]*A10[1][2] B01[2][1]*A10[2][2] B01[2][1]*A10[3][2]) ymm10 = _mm256_fmadd_pd(ymm6, ymm16, ymm10); //ymm10 += (B01[2][2]*A10[0][2] B01[2][2]*A10[1][2] B01[2][2]*A10[2][2] B01[2][2]*A10[3][2]) ymm11 = _mm256_fmadd_pd(ymm7, ymm16, ymm11); //ymm11 += (B01[2][3]*A10[0][2] B01[2][3]*A10[1][2] B01[2][3]*A10[2][2] B01[2][3]*A10[3][2]) ymm12 = _mm256_fmadd_pd(ymm0, ymm16, ymm12); //ymm12 += (B01[2][4]*A10[0][2] B01[2][4]*A10[1][2] B01[2][4]*A10[2][2] B01[2][0]*A10[3][2]) ymm13 = _mm256_fmadd_pd(ymm1, ymm16, ymm13); //ymm13 += (B01[2][5]*A10[0][2] B01[2][5]*A10[1][2] B01[2][5]*A10[2][2] B01[2][1]*A10[3][2]) ymm14 = _mm256_fmadd_pd(ymm2, ymm16, ymm14); //ymm14 += (B01[2][6]*A10[0][2] B01[2][6]*A10[1][2] B01[2][6]*A10[2][2] B01[2][2]*A10[3][2]) ymm15 = _mm256_fmadd_pd(ymm3, ymm16, ymm15); //ymm15 += (B01[2][7]*A10[0][2] B01[2][7]*A10[1][2] B01[2][7]*A10[2][2] B01[2][3]*A10[3][2]) ymm16 = _mm256_loadu_pd((double const *)(a10 + cs_a * 3)); //A10[0][3] A10[1][3] A10[2][3] A10[3][3] ymm4 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 0)); //B01[3][0] ymm5 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 1)); //B01[3][1] ymm6 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 2)); //B01[3][2] ymm7 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 3)); //B01[3][3] ymm0 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 4)); //B01[3][4] ymm1 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 5)); //B01[3][5] ymm2 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 6)); //B01[3][6] ymm3 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 7)); //B01[3][7] b01 += 1; //move to next row of B ymm8 = _mm256_fmadd_pd(ymm4, ymm16, ymm8); //ymm8 += (B01[3][0]*A10[0][3] B01[3][0]*A10[1][3] B01[3][0]*A10[2][3] B01[3][0]*A10[3][3]) ymm9 = _mm256_fmadd_pd(ymm5, ymm16, ymm9); //ymm8 += (B01[3][1]*A10[0][3] B01[3][1]*A10[1][3] B01[3][1]*A10[2][3] B01[3][1]*A10[3][3]) ymm10 = _mm256_fmadd_pd(ymm6, ymm16, ymm10); //ymm8 += (B01[3][2]*A10[0][3] B01[3][2]*A10[1][3] B01[3][2]*A10[2][3] B01[3][2]*A10[3][3]) ymm11 = _mm256_fmadd_pd(ymm7, ymm16, ymm11); //ymm8 += (B01[3][3]*A10[0][3] B01[3][3]*A10[1][3] B01[3][3]*A10[2][3] B01[3][3]*A10[3][3]) ymm12 = _mm256_fmadd_pd(ymm0, ymm16, ymm12); //ymm8 += (B01[3][0]*A10[0][3] B01[3][4]*A10[1][3] B01[3][4]*A10[2][3] B01[3][4]*A10[3][3]) ymm13 = _mm256_fmadd_pd(ymm1, ymm16, ymm13); //ymm8 += (B01[3][1]*A10[0][3] B01[3][5]*A10[1][3] B01[3][5]*A10[2][3] B01[3][5]*A10[3][3]) ymm14 = _mm256_fmadd_pd(ymm2, ymm16, ymm14); //ymm8 += (B01[3][2]*A10[0][3] B01[3][6]*A10[1][3] B01[3][6]*A10[2][3] B01[3][6]*A10[3][3]) ymm15 = _mm256_fmadd_pd(ymm3, ymm16, ymm15); //ymm8 += (B01[3][3]*A10[0][3] B01[3][7]*A10[1][3] B01[3][7]*A10[2][3] B01[3][7]*A10[3][3]) a10 += D_MR * cs_a; //pointer math to find next block of A for GEMM b01 = ptr_b01_dup + D_MR; //pointer math to find next block of B for GEMM } ///GEMM code ends/// ymm16 = _mm256_broadcast_sd((double const *)&AlphaVal); //register to store alpha value ymm0 = _mm256_loadu_pd((double const *)(b11 + cs_b *0)); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] ymm1 = _mm256_loadu_pd((double const *)(b11 + cs_b *1)); //B11[0][1] B11[1][1] B11[2][1] B11[3][1] ymm2 = _mm256_loadu_pd((double const *)(b11 + cs_b *2)); //B11[0][2] B11[1][2] B11[2][2] B11[3][2] ymm3 = _mm256_loadu_pd((double const *)(b11 + cs_b *3)); //B11[0][3] B11[1][3] B11[2][3] B11[3][3] ymm4 = _mm256_loadu_pd((double const *)(b11 + cs_b *4)); //B11[0][4] B11[1][4] B11[2][4] B11[3][4] ymm5 = _mm256_loadu_pd((double const *)(b11 + cs_b *5)); //B11[0][5] B11[1][5] B11[2][5] B11[3][5] ymm6 = _mm256_loadu_pd((double const *)(b11 + cs_b *6)); //B11[0][6] B11[1][6] B11[2][6] B11[3][6] ymm7 = _mm256_loadu_pd((double const *)(b11 + cs_b *7)); //B11[0][7] B11[1][7] B11[2][7] B11[3][7] ymm0 = _mm256_fmsub_pd(ymm0, ymm16, ymm8); //B11[0-3][0] *alpha -= B01[0-3][0] ymm1 = _mm256_fmsub_pd(ymm1, ymm16, ymm9); //B11[0-3][1] *alpha -= B01[0-3][1] ymm2 = _mm256_fmsub_pd(ymm2, ymm16, ymm10); //B11[0-3][2] *alpha -= B01[0-3][2] ymm3 = _mm256_fmsub_pd(ymm3, ymm16, ymm11); //B11[0-3][3] *alpha -= B01[0-3][3] ymm4 = _mm256_fmsub_pd(ymm4, ymm16, ymm12); //B11[0-3][4] *alpha -= B01[0-3][4] ymm5 = _mm256_fmsub_pd(ymm5, ymm16, ymm13); //B11[0-3][5] *alpha -= B01[0-3][5] ymm6 = _mm256_fmsub_pd(ymm6, ymm16, ymm14); //B11[0-3][6] *alpha -= B01[0-3][6] ymm7 = _mm256_fmsub_pd(ymm7, ymm16, ymm15); //B11[0-3][7] *alpha -= B01[0-3][7] ///implement TRSM/// ///unpacklow/// ymm9 = _mm256_unpacklo_pd(ymm0, ymm1); //B11[0][0] B11[0][1] B11[2][0] B11[2][1] ymm11 = _mm256_unpacklo_pd(ymm2, ymm3); //B11[0][2] B11[0][3] B11[2][2] B11[2][3] ymm13 = _mm256_unpacklo_pd(ymm4, ymm5); //B11[0][4] B11[0][5] B11[1][4] B11[1][5] ymm15 = _mm256_unpacklo_pd(ymm6, ymm7); //B11[0][6] B11[0][7] B11[1][6] B11[1][7] //rearrange low elements ymm8 = _mm256_permute2f128_pd(ymm9,ymm11,0x20); //B11[0][0] B11[0][1] B11[0][2] B11[0][3] ymm10 = _mm256_permute2f128_pd(ymm9,ymm11,0x31); //B11[2][0] B11[2][1] B11[2][2] B11[2][3] ymm12 = _mm256_permute2f128_pd(ymm13,ymm15,0x20); //B11[4][0] B11[4][1] B11[4][2] B11[4][3] ymm14 = _mm256_permute2f128_pd(ymm13,ymm15,0x31); //B11[6][0] B11[6][1] B11[6][2] B11[6][3] ////unpackhigh//// ymm0 = _mm256_unpackhi_pd(ymm0, ymm1); //B11[1][0] B11[1][1] B11[3][0] B11[3][1] ymm1 = _mm256_unpackhi_pd(ymm2, ymm3); //B11[1][2] B11[1][3] B11[3][2] B11[3][3] ymm4 = _mm256_unpackhi_pd(ymm4, ymm5); //B11[5][0] B11[5][1] B11[7][0] B11[7][1] ymm5 = _mm256_unpackhi_pd(ymm6, ymm7); //B11[5][2] B11[5][3] B11[7][2] B11[7][3] //rearrange high elements ymm9 = _mm256_permute2f128_pd(ymm0,ymm1,0x20); //B11[1][0] B11[1][1] B11[1][2] B11[1][3] ymm11 = _mm256_permute2f128_pd(ymm0,ymm1,0x31); //B11[3][0] B11[3][1] B11[3][2] B11[3][3] ymm13 = _mm256_permute2f128_pd(ymm4,ymm5,0x20); //B11[5][0] B11[5][1] B11[5][2] B11[5][3] ymm15 = _mm256_permute2f128_pd(ymm4,ymm5,0x31); //B11[7][0] B11[7][1] B11[7][2] B11[7][3] //broadcast diagonal elements of A11 ymm1 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][0] ymm2 = _mm256_broadcast_sd((double const *)(a11+ cs_b +1)); //A11[1][1] ymm3 = _mm256_broadcast_sd((double const *)(a11+cs_b*2 + 2)); //A11[2][2] ymm4 = _mm256_broadcast_sd((double const *)(a11+cs_b*3 + 3)); //A11[3][3] ymm2 = _mm256_broadcast_sd((double const *)(a11 +1)); //A11[1][0] ymm3 = _mm256_broadcast_sd((double const *)(a11 +2)); //A11[2][0] ymm4 = _mm256_broadcast_sd((double const *)(a11 +3)); //A11[3][0] a11 += cs_a; //(Row1): FMA operations ymm9 = _mm256_fnmadd_pd(ymm2, ymm8, ymm9); //B11[1][0-3] -= B11[0-3][0]*A11[1][0] ymm10 = _mm256_fnmadd_pd(ymm3, ymm8, ymm10); //B11[2][0-3] -= B11[0-3][0]*A11[2][0] ymm11 = _mm256_fnmadd_pd(ymm4, ymm8, ymm11); //B11[3][0-3] -= B11[0-3][0]*A11[3][0] ymm13 = _mm256_fnmadd_pd(ymm2, ymm12, ymm13); //B11[5][0-3] -= B11[0-3][4]*A11[1][4] ymm14 = _mm256_fnmadd_pd(ymm3, ymm12, ymm14); //B11[6][0-3] -= B11[0-3][4]*A11[2][4] ymm15 = _mm256_fnmadd_pd(ymm4, ymm12, ymm15); //B11[7][0-3] -= B11[0-3][4]*A11[3][4] ymm3 = _mm256_broadcast_sd((double const *)(a11 +2)); //A11[2][1] ymm4 = _mm256_broadcast_sd((double const *)(a11 +3)); //A11[3][1] a11 += cs_a; //(ROw2): FMA operations ymm10 = _mm256_fnmadd_pd(ymm3, ymm9, ymm10); //B11[2][0-3] -= A11[2][1] * B11[0-3][1] ymm11 = _mm256_fnmadd_pd(ymm4, ymm9, ymm11); //B11[3][0-3] -= A11[3][1] * B11[0-3][1] ymm14 = _mm256_fnmadd_pd(ymm3, ymm13, ymm14); //B11[6][0-3] -= A11[2][1] * B11[0-3][5] ymm15 = _mm256_fnmadd_pd(ymm4, ymm13, ymm15); //B11[7][0-3] -= A11[3][1] * B11[0-3][5] ymm4 = _mm256_broadcast_sd((double const *)(a11 +3)); //A11[3][2] a11 += cs_a; //(ROw2): FMA operations ymm11 = _mm256_fnmadd_pd(ymm4, ymm10, ymm11); //B11[0-3][3] -= A11[3][2]*B11[0-3][2] ymm15 = _mm256_fnmadd_pd(ymm4, ymm14, ymm15); //B11[0-3][7] -= A11[3][2]*B11[0-3][6] //unpacklow// ymm1 = _mm256_unpacklo_pd(ymm8, ymm9); //B11[0][0] B11[1][0] B11[0][2] B11[1][2] ymm3 = _mm256_unpacklo_pd(ymm10, ymm11); //B11[2][0] B11[3][0] B11[2][2] B11[3][2] ymm5 = _mm256_unpacklo_pd(ymm12, ymm13); //B11[4][0] B11[5][0] B11[4][2] B11[5][2] ymm7 = _mm256_unpacklo_pd(ymm14, ymm15); //B11[6][0] B11[7][0] B11[6][2] B11[7][2] //rearrange low elements ymm0 = _mm256_permute2f128_pd(ymm1, ymm3, 0x20); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] ymm2 = _mm256_permute2f128_pd(ymm1, ymm3, 0x31); //B11[0][2] B11[1][2] B11[2][2] B11[3][2] ymm4 = _mm256_permute2f128_pd(ymm5, ymm7, 0x20); //B11[0][4] B11[1][4] B11[2][4] B11[3][4] ymm6 = _mm256_permute2f128_pd(ymm5, ymm7, 0x31); //B11[0][6] B11[1][6] B11[2][6] B11[3][6] ///unpack high/// ymm8 = _mm256_unpackhi_pd(ymm8, ymm9); //B11[0][1] B11[1][1] B11[0][3] B11[1][3] ymm9 = _mm256_unpackhi_pd(ymm10, ymm11); //B11[2][1] B11[3][1] B11[2][3] B11[3][3] ymm12 = _mm256_unpackhi_pd(ymm12, ymm13); //B11[0][5] B11[1][5] B11[0][7] B11[1][7] ymm13 = _mm256_unpackhi_pd(ymm14, ymm15); //B11[2][5] B11[3][5] B11[2][7] B11[3][7] //rearrange high elements ymm1 = _mm256_permute2f128_pd(ymm8, ymm9, 0x20); //B11[0][1] B11[1][1] B11[2][1] B11[3][1] ymm3 = _mm256_permute2f128_pd(ymm8, ymm9, 0x31); //B11[0][3] B11[1][3] B11[2][3] B11[3][3] ymm5 = _mm256_permute2f128_pd(ymm12, ymm13, 0x20); //B11[0][5] B11[1][5] B11[2][5] B11[3][5] ymm7 = _mm256_permute2f128_pd(ymm12, ymm13, 0x31); //B11[0][7] B11[1][7] B11[2][7] B11[3][7] ymm8 = _mm256_loadu_pd((double const *)(b11 + cs_b * 0)); //load B11[0-3][0] ymm9 = _mm256_loadu_pd((double const *)(b11 + cs_b * 1)); //load B11[0-3][1] ymm10 = _mm256_loadu_pd((double const *)(b11 + cs_b * 2)); //load B11[0-3][2] ymm11 = _mm256_loadu_pd((double const *)(b11 + cs_b * 3)); //load B11[0-3][3] ymm12 = _mm256_loadu_pd((double const *)(b11 + cs_b * 4)); //load B11[0-3][4] ymm13 = _mm256_loadu_pd((double const *)(b11 + cs_b * 5)); //load B11[0-3][5] ymm14 = _mm256_loadu_pd((double const *)(b11 + cs_b * 6)); //load B11[0-3][6] ymm15 = _mm256_loadu_pd((double const *)(b11 + cs_b * 7)); //load B11[0-3][7] //determine correct values to store if(m_remainder == 3) { ymm0 = _mm256_blend_pd(ymm0, ymm8, 0x08); ymm1 = _mm256_blend_pd(ymm1, ymm9, 0x08); ymm2 = _mm256_blend_pd(ymm2, ymm10, 0x08); ymm3 = _mm256_blend_pd(ymm3, ymm11, 0x08); ymm4 = _mm256_blend_pd(ymm4, ymm12, 0x08); ymm5 = _mm256_blend_pd(ymm5, ymm13, 0x08); ymm6 = _mm256_blend_pd(ymm6, ymm14, 0x08); ymm7 = _mm256_blend_pd(ymm7, ymm15, 0x08); } if(m_remainder == 2) { ymm0 = _mm256_permute2f128_pd(ymm0, ymm8, 0x30); ymm1 = _mm256_permute2f128_pd(ymm1, ymm9, 0x30); ymm2 = _mm256_permute2f128_pd(ymm2, ymm10, 0x30); ymm3 = _mm256_permute2f128_pd(ymm3, ymm11, 0x30); ymm4 = _mm256_permute2f128_pd(ymm4, ymm12, 0x30); ymm5 = _mm256_permute2f128_pd(ymm5, ymm13, 0x30); ymm6 = _mm256_permute2f128_pd(ymm6, ymm14, 0x30); ymm7 = _mm256_permute2f128_pd(ymm7, ymm15, 0x30); } if(m_remainder == 1) { ymm0 = _mm256_blend_pd(ymm0, ymm8, 0x0E); ymm1 = _mm256_blend_pd(ymm1, ymm9, 0x0E); ymm2 = _mm256_blend_pd(ymm2, ymm10, 0x0E); ymm3 = _mm256_blend_pd(ymm3, ymm11, 0x0E); ymm4 = _mm256_blend_pd(ymm4, ymm12, 0x0E); ymm5 = _mm256_blend_pd(ymm5, ymm13, 0x0E); ymm6 = _mm256_blend_pd(ymm6, ymm14, 0x0E); ymm7 = _mm256_blend_pd(ymm7, ymm15, 0x0E); } _mm256_storeu_pd((double *)(b11 + cs_b * 0), ymm0); //store(B11[0-3][0]) _mm256_storeu_pd((double *)(b11 + cs_b * 1), ymm1); //store(B11[0-3][1]) _mm256_storeu_pd((double *)(b11 + cs_b * 2), ymm2); //store(B11[0-3][2]) _mm256_storeu_pd((double *)(b11 + cs_b * 3), ymm3); //store(B11[0-3][3]) _mm256_storeu_pd((double *)(b11 + cs_b * 4), ymm4); //store(B11[0-3][4]) _mm256_storeu_pd((double *)(b11 + cs_b * 5), ymm5); //store(B11[0-3][5]) _mm256_storeu_pd((double *)(b11 + cs_b * 6), ymm6); //store(B11[0-3][6]) _mm256_storeu_pd((double *)(b11 + cs_b * 7), ymm7); //store(B11[0-3][7]) } } if((n & 4)) //implementation for remainder columns(when 'N' is a multiple of 4) { for(i = 0;i+D_MR-1 < m; i += D_MR) //loop along 'M' direction { a10 = L +i; //pointer to block of A to be used for GEMM a11 = L + i + (i*cs_a); //pointer to block of A to be used for TRSM b01 = B + j*cs_b; //pointer to block of B to be used for GEMM b11 = B + i + j* cs_b; //pointer to block of B to be used for TRSM k_iter = i / D_MR; //number of times GEMM to be performed(in block of 4) ///GEMM for previously calculated values /// //load 4x4 block from b11 ymm0 = _mm256_loadu_pd((double const *)(b11)); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] ymm1 = _mm256_loadu_pd((double const *)(b11 + cs_b)); //B11[0][1] B11[1][1] B11[2][1] B11[3][1] ymm2 = _mm256_loadu_pd((double const *)(b11 + cs_b*2)); //B11[0][2] B11[1][2] B11[2][2] B11[3][2] ymm3 = _mm256_loadu_pd((double const *)(b11 + cs_b*3)); //B11[0][3] B11[1][3] B11[2][3] B11[3][3] ymm4 = _mm256_setzero_pd(); ymm5 = _mm256_setzero_pd(); ymm6 = _mm256_setzero_pd(); ymm7 = _mm256_setzero_pd(); ymm16 = _mm256_broadcast_sd((double const *)&AlphaVal); //register to store alpha for(k = 0; k < k_iter; k++) //loop for number of GEMM operations { ptr_b01_dup = b01; ymm8 = _mm256_loadu_pd((double const *)(a10)); //A10[0][0] A10[1][0] A10[2][0] A10[3][0] ymm9 = _mm256_loadu_pd((double const *)(a10 + cs_a)); //A10[0][1] A10[1][1] A10[2][1] A10[3][1] ymm10 = _mm256_loadu_pd((double const *)(a10 + cs_a*2)); //A10[0][2] A10[1][2] A10[2][2] A10[3][2] ymm11 = _mm256_loadu_pd((double const *)(a10 + cs_a * 3)); //A10[0][3] A10[1][3] A10[2][3] A10[3][3] ymm12 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 0)); //B01[0][0] ymm13 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 1)); //B01[0][1] ymm14 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 2)); //B01[0][2] ymm15 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 3)); //B01[0][3] b01 += 1; //move to next row of B ymm4 = _mm256_fmadd_pd(ymm12, ymm8, ymm4); //ymm4 += (B01[0][0]*A10[0][0] B01[0][0]*A10[1][0] B01[0][0]*A10[2][0] B01[0][0]*A10[3][0]) ymm5 = _mm256_fmadd_pd(ymm13, ymm8, ymm5); //ymm5 += (B01[0][1]*A10[0][0] B01[0][1]*A10[1][0] B01[0][1]*A10[2][0] B01[0][1]*A10[3][0]) ymm6 = _mm256_fmadd_pd(ymm14, ymm8, ymm6); //ymm6 += (B01[0][2]*A10[0][0] B01[0][2]*A10[1][0] B01[0][2]*A10[2][0] B01[0][2]*A10[3][0]) ymm7 = _mm256_fmadd_pd(ymm15, ymm8, ymm7); //ymm7 += (B01[0][3]*A10[0][0] B01[0][3]*A10[1][0] B01[0][3]*A10[2][0] B01[0][3]*A10[3][0]) ymm12 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 0)); //B01[1][0] ymm13 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 1)); //B01[1][1] ymm14 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 2)); //B01[1][2] ymm15 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 3)); //B01[1][3] b01 += 1; ymm4 = _mm256_fmadd_pd(ymm12, ymm9, ymm4); //ymm4 += (B01[1][0]*A10[0][1] B01[1][0]*A10[1][1] B01[1][0]*A10[2][1] B01[1][0]*A10[3][1]) ymm5 = _mm256_fmadd_pd(ymm13, ymm9, ymm5); //ymm5 += (B01[1][1]*A10[0][1] B01[1][1]*A10[1][1] B01[1][1]*A10[2][1] B01[1][1]*A10[3][1]) ymm6 = _mm256_fmadd_pd(ymm14, ymm9, ymm6); //ymm6 += (B01[1][2]*A10[0][1] B01[1][2]*A10[1][1] B01[1][2]*A10[2][1] B01[1][2]*A10[3][1]) ymm7 = _mm256_fmadd_pd(ymm15, ymm9, ymm7); //ymm7 += (B01[1][3]*A10[0][1] B01[1][3]*A10[1][1] B01[1][3]*A10[2][1] B01[1][3]*A10[3][1]) ymm12 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 0)); //B01[2][0] ymm13 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 1)); //B01[2][1] ymm14 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 2)); //B01[2][2] ymm15 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 3)); //B01[2][3] b01 += 1; ymm4 = _mm256_fmadd_pd(ymm12, ymm10, ymm4); //ymm4 += (B01[2][0]*A10[0][2] B01[2][0]*A10[1][2] B01[2][0]*A10[2][2] B01[2][0]*A10[3][2]) ymm5 = _mm256_fmadd_pd(ymm13, ymm10, ymm5); //ymm5 += (B01[2][1]*A10[1][2] B01[2][1]*A10[1][2] B01[2][1]*A10[2][2] B01[2][1]*A10[3][2]) ymm6 = _mm256_fmadd_pd(ymm14, ymm10, ymm6); //ymm6 += (B01[2][2]*A10[2][2] B01[2][2]*A10[1][2] B01[2][2]*A10[2][2] B01[2][2]*A10[3][2]) ymm7 = _mm256_fmadd_pd(ymm15, ymm10, ymm7); //ymm7 += (B01[2][3]*A10[3][2] B01[2][3]*A10[1][2] B01[2][3]*A10[2][2] B01[2][3]*A10[3][2]) ymm12 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 0)); //B01[3][0] ymm13 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 1)); //B01[3][1] ymm14 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 2)); //B01[3][2] ymm15 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 3)); //B01[3][3] b01 += 1; ymm4 = _mm256_fmadd_pd(ymm12, ymm11, ymm4); //ymm4 += (B01[3][0]*A10[0][3] B01[3][0]*A10[1][3] B01[3][0]*A10[2][3] B01[3][0]*A10[3][3]) ymm5 = _mm256_fmadd_pd(ymm13, ymm11, ymm5); //ymm5 += (B01[3][1]*A10[0][3] B01[3][1]*A10[1][3] B01[3][1]*A10[2][3] B01[3][1]*A10[3][3]) ymm6 = _mm256_fmadd_pd(ymm14, ymm11, ymm6); //ymm6 += (B01[3][2]*A10[0][3] B01[3][2]*A10[1][3] B01[3][2]*A10[2][3] B01[3][2]*A10[3][3]) ymm7 = _mm256_fmadd_pd(ymm15, ymm11, ymm7); //ymm7 += (B01[3][3]*A10[0][3] B01[3][3]*A10[1][3] B01[3][3]*A10[2][3] B01[3][3]*A10[3][3]) a10 += D_MR * cs_a; //pointer math to find next block of A for GEMM b01 = ptr_b01_dup + D_MR; //pointer math to find next block of B for GEMM } ymm0 = _mm256_fmsub_pd(ymm0, ymm16, ymm4); //B11[0-3][0] *alpha -= ymm4 ymm1 = _mm256_fmsub_pd(ymm1, ymm16, ymm5); //B01[0-3][1] *alpha -= ymm5 ymm2 = _mm256_fmsub_pd(ymm2, ymm16, ymm6); //B01[0-3][2] *alpha -= ymm6 ymm3 = _mm256_fmsub_pd(ymm3, ymm16, ymm7); //B01[0-3][3] *alpha -= ymm7 ///implement TRSM/// //1st col ymm4 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][0] ymm5 = _mm256_broadcast_sd((double const *)(a11+1)); //A11[1][0] ymm6 = _mm256_broadcast_sd((double const *)(a11+2)); //A11[2][0] ymm7 = _mm256_broadcast_sd((double const *)(a11+3)); //A11[3][0] //2nd col a11 += cs_a; ymm8 = _mm256_broadcast_sd((double const *)(a11 + 1)); //A11[1][1] ymm9 = _mm256_broadcast_sd((double const *)(a11 + 2)); //A11[2][1] ymm10 = _mm256_broadcast_sd((double const *)(a11 + 3)); //A11[3][1] //3rd col a11 += cs_a; ymm11 = _mm256_broadcast_sd((double const *)(a11 + 2)); //A11[2][2] ymm12 = _mm256_broadcast_sd((double const *)(a11 + 3)); //A11[3][2] //4th col a11 += cs_a; ymm13 = _mm256_broadcast_sd((double const *)(a11 + 3)); //A11[3][3] ////unpacklow//// ymm8 = _mm256_unpacklo_pd(ymm0, ymm1); //B11[0][0] B11[0][1] B11[2][0] B11[2][1] ymm13 = _mm256_unpacklo_pd(ymm2, ymm3); //B11[0][2] B11[0][3] B11[2][2] B11[2][3] //rearrange low elements ymm4 = _mm256_permute2f128_pd(ymm8,ymm13,0x20); //B11[0][0] B11[0][1] B11[0][2] B11[0][3] ymm11 = _mm256_permute2f128_pd(ymm8,ymm13,0x31);//B11[2][0] B11[2][1] B11[2][2] B11[2][3] ////unpackhigh//// ymm0 = _mm256_unpackhi_pd(ymm0, ymm1); //B11[1][0] B11[1][1] B11[3][0] B11[3][1] ymm1 = _mm256_unpackhi_pd(ymm2, ymm3); //B11[1][2] B11[1][3] B11[3][2] B11[3][3] //rearrange high elements ymm8 = _mm256_permute2f128_pd(ymm0,ymm1,0x20); //B11[1][0] B11[1][1] B11[1][2] B11[1][3] ymm13 = _mm256_permute2f128_pd(ymm0,ymm1,0x31); //B11[3][0] B11[3][1] B11[3][2] B11[3][3] //(Row1): FMA operations of b1 with elements of indices from (1, 0) uptill (3, 0) ymm8 = _mm256_fnmadd_pd(ymm5, ymm4, ymm8);//d = c - (a*b) //B11[1][0-3] -= A11[1][0]*B11[0][0-3] ymm11 = _mm256_fnmadd_pd(ymm6, ymm4, ymm11);//d = c - (a*b) //B11[2][0-3] -= A11[2][0]*B11[0][0-3] ymm13 = _mm256_fnmadd_pd(ymm7, ymm4, ymm13);//d = c - (a*b) //B11[3][0-3] -= A11[3][0]*B11[0][0-3] //(Row2): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) ymm11 = _mm256_fnmadd_pd(ymm9, ymm8, ymm11);//d = c - (a*b) //B11[2][0-3] -= A11[2][1]*B11[1][0-3] ymm13 = _mm256_fnmadd_pd(ymm10, ymm8, ymm13);//d = c - (a*b) //B11[3][0-3] -= A11[3][1]*B11[1][0-3] //(Row3): FMA operations of b3 with elements of indices from (3, 0) uptill (7, 0) ymm13 = _mm256_fnmadd_pd(ymm12, ymm11, ymm13);//d = c - (a*b) //B11[3][0-3] -= A11[3][2]*B11[2][0-3] //--> Transpose and store results of columns of B block <--// ////unpacklow//// ymm1 = _mm256_unpacklo_pd(ymm4, ymm8); //B11[0][0] B11[1][0] B11[0][2] B11[1][2] ymm3 = _mm256_unpacklo_pd(ymm11, ymm13); //B11[2][0] B11[3][0] B11[2][2] B11[3][2] //rearrange low elements ymm0 = _mm256_permute2f128_pd(ymm1,ymm3,0x20); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] ymm2 = _mm256_permute2f128_pd(ymm1,ymm3,0x31); //B11[0][2] B11[1][2] B11[2][2] B11[3][2] ////unpackhigh//// ymm14 = _mm256_unpackhi_pd(ymm4, ymm8); //B11[0][1] B11[1][1] B11[0][3] B11[1][3] ymm15 = _mm256_unpackhi_pd(ymm11, ymm13); //B11[2][1] B11[3][1] B11[2][3] B11[3][3] //rearrange high elements ymm1 = _mm256_permute2f128_pd(ymm14,ymm15,0x20); //B11[0][1] B11[1][1] B11[2][1] B11[3][1] ymm3 = _mm256_permute2f128_pd(ymm14,ymm15,0x31); //B11[0][3] B11[1][3] B11[2][3] B11[3][3] _mm256_storeu_pd((double *)b11, ymm0); //store(B11[0-3][0]) _mm256_storeu_pd((double *)(b11 + (cs_b)), ymm1); //store(B11[0-3][1]) _mm256_storeu_pd((double *)(b11 + cs_b*2), ymm2); //store(B11[0-3][2]) _mm256_storeu_pd((double *)(b11 + cs_b*3), ymm3); //store(B11[0-3][3]) } if(m_remainder) //implementation for remainder rows(when 'M' is not a multiple of D_MR) { a10 = L +i; //pointer to block of A to be used for GEMM a11 = L + i + (i*cs_a); //pointer to block of A to be used for TRSM b01 = B + j*cs_b; //pointer to block of B to be used for GEMM b11 = B + i + j* cs_b; //pointer to block of B to be used for TRSM ymm16 = _mm256_broadcast_sd((double const *)&AlphaVal); //register to store alpha k_iter = i / D_MR; //number of GEMM operations to be performed(in blocks of 4x4) ///GEMM for previously calculated values /// //load 4x4 block from b11 ymm0 = _mm256_loadu_pd((double const *)(b11)); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] ymm1 = _mm256_loadu_pd((double const *)(b11 + cs_b)); //B11[0][1] B11[1][1] B11[2][1] B11[3][1] ymm2 = _mm256_loadu_pd((double const *)(b11 + cs_b * 2)); //B11[0][2] B11[1][2] B11[2][2] B11[3][2] ymm3 = _mm256_loadu_pd((double const *)(b11 + cs_b * 3)); //B11[0][3] B11[1][3] B11[2][3] B11[3][3] ymm4 = _mm256_setzero_pd(); ymm5 = _mm256_setzero_pd(); ymm6 = _mm256_setzero_pd(); ymm7 = _mm256_setzero_pd(); for(k = 0; k < k_iter; k++) //looop for number of GEMM operations { ptr_b01_dup = b01; ymm8 = _mm256_loadu_pd((double const *)(a10)); //A10[0][0] A10[1][0] A10[2][0] A10[3][0] ymm9 = _mm256_loadu_pd((double const *)(a10 + cs_a)); //A10[0][1] A10[1][1] A10[2][1] A10[3][1] ymm10 = _mm256_loadu_pd((double const *)(a10 + cs_a * 2)); //A10[0][2] A10[1][2] A10[2][2] A10[3][2] ymm11 = _mm256_loadu_pd((double const *)(a10 + cs_a * 3)); //A10[0][3] A10[1][3] A10[2][3] A10[3][3] ymm12 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 0)); //B01[0][0] ymm13 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 1)); //B01[0][1] ymm14 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 2)); //B01[0][2] ymm15 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 3)); //B01[0][3] b01 += 1; ymm4 = _mm256_fmadd_pd(ymm12, ymm8, ymm4); //ymm4 += (B01[0][0]*A10[0][0] B01[0][0]*A10[1][0] B01[0][0]*A10[2][0] B01[0][0]*A10[3][0]) ymm5 = _mm256_fmadd_pd(ymm13, ymm8, ymm5); //ymm5 += (B01[0][1]*A10[0][0] B01[0][1]*A10[1][0] B01[0][1]*A10[2][0] B01[0][1]*A10[3][0]) ymm6 = _mm256_fmadd_pd(ymm14, ymm8, ymm6); //ymm6 += (B01[0][2]*A10[0][0] B01[0][2]*A10[1][0] B01[0][2]*A10[2][0] B01[0][2]*A10[3][0]) ymm7 = _mm256_fmadd_pd(ymm15, ymm8, ymm7); //ymm7 += (B01[0][3]*A10[0][0] B01[0][3]*A10[1][0] B01[0][3]*A10[2][0] B01[0][3]*A10[3][0]) ymm12 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 0)); //B01[1][0] ymm13 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 1)); //B01[1][1] ymm14 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 2)); //B01[1][2] ymm15 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 3)); //B01[1][3] b01 += 1; ymm4 = _mm256_fmadd_pd(ymm12, ymm9, ymm4); //ymm4 += (B01[1][0]*A10[0][1] B01[1][0]*A10[1][1] B01[1][0]*A10[2][1] B01[1][0]*A10[3][1]) ymm5 = _mm256_fmadd_pd(ymm13, ymm9, ymm5); //ymm5 += (B01[1][1]*A10[0][1] B01[1][1]*A10[1][1] B01[1][1]*A10[2][1] B01[1][1]*A10[3][1]) ymm6 = _mm256_fmadd_pd(ymm14, ymm9, ymm6); //ymm6 += (B01[1][2]*A10[0][1] B01[1][2]*A10[1][1] B01[1][2]*A10[2][1] B01[1][2]*A10[3][1]) ymm7 = _mm256_fmadd_pd(ymm15, ymm9, ymm7); //ymm7 += (B01[1][3]*A10[0][1] B01[1][3]*A10[1][1] B01[1][3]*A10[2][1] B01[1][3]*A10[3][1]) ymm12 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 0)); //B01[2][0] ymm13 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 1)); //B01[2][1] ymm14 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 2)); //B01[2][2] ymm15 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 3)); //B01[2][3] b01 += 1; ymm4 = _mm256_fmadd_pd(ymm12, ymm10, ymm4); //ymm4 += (B01[2][0]*A10[0][2] B01[2][0]*A10[1][2] B01[2][0]*A10[2][2] B01[2][0]*A10[3][2]) ymm5 = _mm256_fmadd_pd(ymm13, ymm10, ymm5); //ymm5 += (B01[2][1]*A10[0][2] B01[2][1]*A10[1][2] B01[2][1]*A10[2][2] B01[2][1]*A10[3][2]) ymm6 = _mm256_fmadd_pd(ymm14, ymm10, ymm6); //ymm6 += (B01[2][2]*A10[0][2] B01[2][2]*A10[1][2] B01[2][2]*A10[2][2] B01[2][2]*A10[3][2]) ymm7 = _mm256_fmadd_pd(ymm15, ymm10, ymm7); //ymm7 += (B01[2][3]*A10[0][2] B01[2][3]*A10[1][2] B01[2][3]*A10[2][2] B01[2][3]*A10[3][2]) ymm12 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 0)); //B01[3][0] ymm13 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 1)); //B01[3][1] ymm14 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 2)); //B01[3][2] ymm15 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 3)); //B01[3][3] b01 += 1; ymm4 = _mm256_fmadd_pd(ymm12, ymm11, ymm4); //ymm4 += (B01[3][0]*A10[0][3] B01[3][0]*A10[1][3] B01[3][0]*A10[2][3] B01[3][0]*A10[3][3]) ymm5 = _mm256_fmadd_pd(ymm13, ymm11, ymm5); //ymm5 += (B01[3][1]*A10[0][3] B01[3][1]*A10[1][3] B01[3][1]*A10[2][3] B01[3][1]*A10[3][3]) ymm6 = _mm256_fmadd_pd(ymm14, ymm11, ymm6); //ymm6 += (B01[3][2]*A10[0][3] B01[3][2]*A10[1][3] B01[3][2]*A10[2][3] B01[3][2]*A10[3][3]) ymm7 = _mm256_fmadd_pd(ymm15, ymm11, ymm7); //ymm7 += (B01[3][3]*A10[0][3] B01[3][3]*A10[1][3] B01[3][3]*A10[2][3] B01[3][3]*A10[3][3]) a10 += D_MR * cs_a; //pointer math to find next block of A for GEMM b01 = ptr_b01_dup + D_MR; //pointer math to find next block of B for GEMM } ymm0 = _mm256_fmsub_pd(ymm0, ymm16, ymm4); //B11[0-3][0] *alpha -= ymm4 ymm1 = _mm256_fmsub_pd(ymm1, ymm16, ymm5); //B11[0-3][1] *alpha -= ymm5 ymm2 = _mm256_fmsub_pd(ymm2, ymm16, ymm6); //B11[0-3][2] *alpha -= ymm6 ymm3 = _mm256_fmsub_pd(ymm3, ymm16, ymm7); //B11[0-3][3] *alpha -= ymm7 ///implement TRSM/// //1st col ymm4 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][0] ymm5 = _mm256_broadcast_sd((double const *)(a11+1)); //A11[1][0] ymm6 = _mm256_broadcast_sd((double const *)(a11+2)); //A11[2][0] ymm7 = _mm256_broadcast_sd((double const *)(a11+3)); //A11[3][0] //2nd col a11 += cs_a; ymm8 = _mm256_broadcast_sd((double const *)(a11 + 1)); //A11[1][1] ymm9 = _mm256_broadcast_sd((double const *)(a11 + 2)); //A11[2][1] ymm10 = _mm256_broadcast_sd((double const *)(a11 + 3)); //A11[3][1] //3rd col a11 += cs_a; ymm11 = _mm256_broadcast_sd((double const *)(a11 + 2)); //A11[2][2] ymm12 = _mm256_broadcast_sd((double const *)(a11 + 3)); //A11[3][2] //4th col a11 += cs_a; ymm13 = _mm256_broadcast_sd((double const *)(a11 + 3)); //A11[3][3] ////unpacklow//// ymm8 = _mm256_unpacklo_pd(ymm0, ymm1); //B11[0][0] B11[0][1] B11[2][0] B11[2][1] ymm13 = _mm256_unpacklo_pd(ymm2, ymm3); //B11[0][2] B11[0][3] B11[2][2] B11[2][3] //rearrange low elements ymm4 = _mm256_permute2f128_pd(ymm8,ymm13,0x20); //B11[0][0] B11[0][1] B11[0][2] B11[0][3] ymm11 = _mm256_permute2f128_pd(ymm8,ymm13,0x31);//B11[2][0] B11[2][1] B11[2][2] B11[2][3] ////unpackhigh//// ymm0 = _mm256_unpackhi_pd(ymm0, ymm1); //B11[1][0] B11[1][1] B11[3][0] B11[3][1] ymm1 = _mm256_unpackhi_pd(ymm2, ymm3); //B11[1][2] B11[1][3] B11[3][2] B11[3][3] //rearrange high elements ymm8 = _mm256_permute2f128_pd(ymm0,ymm1,0x20); //B11[1][0] B11[1][1] B11[1][2] B11[1][3] ymm13 = _mm256_permute2f128_pd(ymm0,ymm1,0x31); //B11[3][0] B11[3][1] B11[3][2] B11[3][3] //(Row1): FMA operations of b1 with elements of indices from (1, 0) uptill (3, 0) ymm8 = _mm256_fnmadd_pd(ymm5, ymm4, ymm8);//d = c - (a*b) //B11[1][0-3] -= A11[1][0]* B11[0][0-3] ymm11 = _mm256_fnmadd_pd(ymm6, ymm4, ymm11);//d = c - (a*b) //B11[2][0-3] -= A11[2][0]* B11[0][0-3] ymm13 = _mm256_fnmadd_pd(ymm7, ymm4, ymm13);//d = c - (a*b) //B11[3][0-3] -= A11[3][0]* B11[0][0-3] //(Row2): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) ymm11 = _mm256_fnmadd_pd(ymm9, ymm8, ymm11);//d = c - (a*b) //B11[2][0-3] -= A11[2][1]* B11[1][0-3] ymm13 = _mm256_fnmadd_pd(ymm10, ymm8, ymm13);//d = c - (a*b) //B11[3][0-3] -= A11[3][1]* B11[1][0-3] //(Row3): FMA operations of b3 with elements of indices from (3, 0) uptill (7, 0) ymm13 = _mm256_fnmadd_pd(ymm12, ymm11, ymm13);//d = c - (a*b) //B11[3][0-3] -= A11[3][2]* B11[2][0-3] //--> Transpose and store results of columns of B block <--// ////unpacklow//// ymm1 = _mm256_unpacklo_pd(ymm4, ymm8); //B11[0][0] B11[1][0] B11[0][2] B11[1][2] ymm3 = _mm256_unpacklo_pd(ymm11, ymm13); //B11[2][0] B11[3][0] B11[2][2] B11[3][2] //rearrange low elements ymm0 = _mm256_permute2f128_pd(ymm1,ymm3,0x20); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] ymm2 = _mm256_permute2f128_pd(ymm1,ymm3,0x31); //B11[0][2] B11[1][2] B11[2][2] B11[3][2] ////unpackhigh//// ymm14 = _mm256_unpackhi_pd(ymm4, ymm8); //B11[0][1] B11[1][1] B11[0][3] B11[1][3] ymm15 = _mm256_unpackhi_pd(ymm11, ymm13); //B11[2][1] B11[3][1] B11[2][3] B11[3][3] //rearrange high elements ymm1 = _mm256_permute2f128_pd(ymm14,ymm15,0x20); //B11[0][1] B11[1][1] B11[2][1] B11[3][1] ymm3 = _mm256_permute2f128_pd(ymm14,ymm15,0x31); //B11[0][3] B11[1][3] B11[2][3] B11[3][3] //load 4x4 block from b11 ymm4 = _mm256_loadu_pd((double const *)(b11)); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] ymm5 = _mm256_loadu_pd((double const *)(b11 + cs_b)); //B11[0][1] B11[1][1] B11[2][1] B11[3][1] ymm6 = _mm256_loadu_pd((double const *)(b11 + cs_b * 2)); //B11[0][2] B11[1][2] B11[2][2] B11[3][2] ymm7 = _mm256_loadu_pd((double const *)(b11 + cs_b * 3)); //B11[0][3] B11[1][3] B11[2][2] B11[3][3] //determine correct values to store if(m_remainder == 3) { ymm0 = _mm256_blend_pd(ymm0, ymm4, 0x08); ymm1 = _mm256_blend_pd(ymm1, ymm5, 0x08); ymm2 = _mm256_blend_pd(ymm2, ymm6, 0x08); ymm3 = _mm256_blend_pd(ymm3, ymm7, 0x08); } if(m_remainder == 2) { ymm0 = _mm256_permute2f128_pd(ymm0, ymm4,0x30); ymm1 = _mm256_permute2f128_pd(ymm1, ymm5,0x30); ymm2 = _mm256_permute2f128_pd(ymm2, ymm6,0x30); ymm3 = _mm256_permute2f128_pd(ymm3, ymm7,0x30); } if(m_remainder == 1) { ymm0 = _mm256_blend_pd(ymm0, ymm4, 0x0E); ymm1 = _mm256_blend_pd(ymm1, ymm5, 0x0E); ymm2 = _mm256_blend_pd(ymm2, ymm6, 0x0E); ymm3 = _mm256_blend_pd(ymm3, ymm7, 0x0E); } _mm256_storeu_pd((double *)b11, ymm0); //store(B11[0-3][0]) _mm256_storeu_pd((double *)(b11 + (cs_b)), ymm1); //store(B11[0-3][1]) _mm256_storeu_pd((double *)(b11 + cs_b * 2), ymm2); //store(B11[0-3][2]) _mm256_storeu_pd((double *)(b11 + cs_b * 3), ymm3); //store(B11[0-3][3]) } n_remainder -= 4; j += 4; } if(n_remainder) //implementation fo remaining columns(when 'N' is not a multiple of D_NR) { for(i = 0;i+D_MR-1 < m; i += D_MR) //loop along 'M' direction { a10 = L +i; //pointer to block of A to be used for GEMM a11 = L + i + (i*cs_a); //pointer to block of A to be used for TRSM b01 = B + j*cs_b; //pointer to block of B to be used for GEMM b11 = B + i + j* cs_b; //pointer to block of B to be used for TRSM k_iter = i / D_MR; //number of GEMM operations to be performed(in blocks of 4x4) ymm16 = _mm256_broadcast_sd((double const *)&AlphaVal); //register to store alpha Value ///GEMM for previously calculated values /// //load 4x4 block from b11 if(n_remainder == 3) { ymm0 = _mm256_loadu_pd((double const *)(b11)); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] ymm1 = _mm256_loadu_pd((double const *)(b11 + cs_b)); //B11[0][1] B11[1][1] B11[2][1] B11[3][1] ymm2 = _mm256_loadu_pd((double const *)(b11 + cs_b * 2)); //B11[0][2] B11[1][2] B11[2][2] B11[3][2] ymm3 = _mm256_broadcast_sd((double const *)&ones); } if(n_remainder == 2) { ymm0 = _mm256_loadu_pd((double const *)(b11)); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] ymm1 = _mm256_loadu_pd((double const *)(b11 + cs_b)); //B11[0][1] B11[1][1] B11[2][1] B11[3][1] ymm2 = _mm256_broadcast_sd((double const *)&ones); ymm3 = _mm256_broadcast_sd((double const *)&ones); } if(n_remainder == 1) { ymm0 = _mm256_loadu_pd((double const *)(b11)); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] ymm1 = _mm256_broadcast_sd((double const *)&ones); ymm2 = _mm256_broadcast_sd((double const *)&ones); ymm3 = _mm256_broadcast_sd((double const*)&ones); } ymm4 = _mm256_setzero_pd(); ymm5 = _mm256_setzero_pd(); ymm6 = _mm256_setzero_pd(); ymm7 = _mm256_setzero_pd(); for(k = 0; k < k_iter; k++) { ptr_b01_dup = b01; ymm8 = _mm256_loadu_pd((double const *)(a10)); //A10[0][0] A10[1][0] A10[2][0] A10[3][0] ymm9 = _mm256_loadu_pd((double const *)(a10 + cs_a)); //A10[0][1] A10[1][1] A10[2][1] A10[3][1] ymm10 = _mm256_loadu_pd((double const *)(a10 + cs_a * 2)); //A10[0][2] A10[1][2] A10[2][2] A10[3][2] ymm11 = _mm256_loadu_pd((double const *)(a10 + cs_a * 3)); //A10[0][3] A10[1][3] A10[2][3] A10[3][3] ymm12 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 0)); //B01[0][0] ymm13 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 1)); //B01[0][1] ymm14 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 2)); //B01[0][2] ymm15 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 3)); //B01[0][3] b01 += 1; ymm4 = _mm256_fmadd_pd(ymm12, ymm8, ymm4); //ymm4 += (B01[0][0]*A10[0][0] B01[0][0]*A10[1][0] B01[0][0]*A10[2][0] B01[0][0]*A10[3][0]) ymm5 = _mm256_fmadd_pd(ymm13, ymm8, ymm5); //ymm5 += (B01[0][1]*A10[0][0] B01[0][1]*A10[1][0] B01[0][1]*A10[2][0] B01[0][1]*A10[3][0]) ymm6 = _mm256_fmadd_pd(ymm14, ymm8, ymm6); //ymm6 += (B01[0][2]*A10[0][0] B01[0][2]*A10[1][0] B01[0][2]*A10[2][0] B01[0][2]*A10[3][0]) ymm7 = _mm256_fmadd_pd(ymm15, ymm8, ymm7); //ymm7 += (B01[0][3]*A10[0][0] B01[0][3]*A10[1][0] B01[0][3]*A10[2][0] B01[0][3]*A10[3][0]) ymm12 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 0)); //B01[1][0] ymm13 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 1)); //B01[1][1] ymm14 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 2)); //B01[1][2] ymm15 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 3)); //B01[1][3] b01 += 1; ymm4 = _mm256_fmadd_pd(ymm12, ymm9, ymm4); //ymm4 += (B01[1][0]*A10[0][1] B01[1][0]*A10[1][1] B01[1][0]*A10[2][1] B01[1][0]*A10[3][1]) ymm5 = _mm256_fmadd_pd(ymm13, ymm9, ymm5); //ymm5 += (B01[1][1]*A10[0][1] B01[1][1]*A10[1][1] B01[1][1]*A10[2][1] B01[1][1]*A10[3][1]) ymm6 = _mm256_fmadd_pd(ymm14, ymm9, ymm6); //ymm6 += (B01[1][2]*A10[0][1] B01[1][2]*A10[1][1] B01[1][2]*A10[2][1] B01[1][2]*A10[3][1]) ymm7 = _mm256_fmadd_pd(ymm15, ymm9, ymm7); //ymm7 += (B01[1][3]*A10[0][1] B01[1][3]*A10[1][1] B01[1][3]*A10[2][1] B01[1][3]*A10[3][1]) ymm12 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 0)); //B01[2][0] ymm13 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 1)); //B01[2][1] ymm14 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 2)); //B01[2][2] ymm15 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 3)); //B01[2][3] b01 += 1; ymm4 = _mm256_fmadd_pd(ymm12, ymm10, ymm4); //ymm4 += (B01[2][0]*A10[0][2] B01[2][0]*A10[1][2] B01[2][0]*A10[2][2] B01[2][0]*A10[3][2]) ymm5 = _mm256_fmadd_pd(ymm13, ymm10, ymm5); //ymm5 += (B01[2][1]*A10[0][2] B01[2][1]*A10[1][2] B01[2][1]*A10[2][2] B01[2][1]*A10[3][2]) ymm6 = _mm256_fmadd_pd(ymm14, ymm10, ymm6); //ymm6 += (B01[2][2]*A10[0][2] B01[2][2]*A10[1][2] B01[2][2]*A10[2][2] B01[2][2]*A10[3][2]) ymm7 = _mm256_fmadd_pd(ymm15, ymm10, ymm7); //ymm7 += (B01[2][3]*A10[0][2] B01[2][3]*A10[1][2] B01[2][3]*A10[2][2] B01[2][3]*A10[3][2]) ymm12 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 0)); //B01[3][0] ymm13 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 1)); //B01[3][1] ymm14 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 2)); //B01[3][2] ymm15 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 3)); //B01[3][3] b01 += 1; ymm4 = _mm256_fmadd_pd(ymm12, ymm11, ymm4); //ymm4 += (B01[3][0]*A10[0][3] B01[3][0]*A10[1][3] B01[3][0]*A10[2][3] B01[3][0]*A10[3][3]) ymm5 = _mm256_fmadd_pd(ymm13, ymm11, ymm5); //ymm5 += (B01[3][1]*A10[0][3] B01[3][1]*A10[1][3] B01[3][1]*A10[2][3] B01[3][1]*A10[3][3]) ymm6 = _mm256_fmadd_pd(ymm14, ymm11, ymm6); //ymm6 += (B01[3][2]*A10[0][3] B01[3][2]*A10[1][3] B01[3][2]*A10[2][3] B01[3][2]*A10[3][3]) ymm7 = _mm256_fmadd_pd(ymm15, ymm11, ymm7); //ymm7 += (B01[3][3]*A10[0][3] B01[3][3]*A10[1][3] B01[3][3]*A10[2][3] B01[3][3]*A10[3][3]) a10 += D_MR * cs_a; //pointer math to find next block of A for GEMM b01 = ptr_b01_dup + D_MR; //pointer math to find next block of B for GEMM } ///GEMM code ends/// ymm0 = _mm256_fmsub_pd(ymm0, ymm16, ymm4); //B11[0-3][0] *alpha -= ymm4 ymm1 = _mm256_fmsub_pd(ymm1, ymm16, ymm5); //B11[0-3][1] *alpha -= ymm5 ymm2 = _mm256_fmsub_pd(ymm2, ymm16, ymm6); //B11[0-3][2] *alpha -= ymm6 ymm3 = _mm256_fmsub_pd(ymm3, ymm16, ymm7); //B11[0-3][3] *alpha -= ymm7 ///implement TRSM/// //1st col ymm4 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][0] ymm5 = _mm256_broadcast_sd((double const *)(a11+1)); //A11[1][0] ymm6 = _mm256_broadcast_sd((double const *)(a11+2)); //A11[2][0] ymm7 = _mm256_broadcast_sd((double const *)(a11+3)); //A11[3][0] //2nd col a11 += cs_a; ymm8 = _mm256_broadcast_sd((double const *)(a11 + 1)); //A11[1][1] ymm9 = _mm256_broadcast_sd((double const *)(a11 + 2)); //A11[2][1] ymm10 = _mm256_broadcast_sd((double const *)(a11 + 3)); //A11[3][1] //3rd col a11 += cs_a; ymm11 = _mm256_broadcast_sd((double const *)(a11 + 2)); //A11[2][2] ymm12 = _mm256_broadcast_sd((double const *)(a11 + 3)); //A11[3][2] //4th col a11 += cs_a; ymm13 = _mm256_broadcast_sd((double const *)(a11 + 3)); //A11[3][3] ////unpacklow//// ymm8 = _mm256_unpacklo_pd(ymm0, ymm1); //B11[0][0] B11[0][1] B11[2][0] B11[2][1] ymm13 = _mm256_unpacklo_pd(ymm2, ymm3); //B11[0][2] B11[0][3] B11[2][2] B11[2][3] //rearrange low elements ymm4 = _mm256_permute2f128_pd(ymm8,ymm13,0x20); //B11[0][0] B11[0][1] B11[0][2] B11[0][3] ymm11 = _mm256_permute2f128_pd(ymm8,ymm13,0x31);//B11[2][0] B11[2][1] B11[2][2] B11[2][3] ////unpackhigh//// ymm0 = _mm256_unpackhi_pd(ymm0, ymm1); //B11[1][0] B11[1][1] B11[3][0] B11[3][1] ymm1 = _mm256_unpackhi_pd(ymm2, ymm3); //B11[1][2] B11[1][3] B11[3][2] B11[3][3] //rearrange high elements ymm8 = _mm256_permute2f128_pd(ymm0,ymm1,0x20); //B11[1][0] B11[1][1] B11[1][2] B11[1][3] ymm13 = _mm256_permute2f128_pd(ymm0,ymm1,0x31); //B11[3][0] B11[3][1] B11[3][2] B11[3][3] //(Row1): FMA operations of b1 with elements of indices from (1, 0) uptill (3, 0) ymm8 = _mm256_fnmadd_pd(ymm5, ymm4, ymm8);//d = c - (a*b) //B11[1][0-3] -= A11[1][0] * B11[0][0-3] ymm11 = _mm256_fnmadd_pd(ymm6, ymm4, ymm11);//d = c - (a*b) //B11[2][0-3] -= A11[2][0] * B11[0][0-3] ymm13 = _mm256_fnmadd_pd(ymm7, ymm4, ymm13);//d = c - (a*b) //B11[3][0-3] -= A11[3][0] * B11[0][0-3] //(Row2): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) ymm11 = _mm256_fnmadd_pd(ymm9, ymm8, ymm11);//d = c - (a*b) //B11[2][0-3] -= A11[2][1] * B11[1][0-3] ymm13 = _mm256_fnmadd_pd(ymm10, ymm8, ymm13);//d = c - (a*b) //B11[3][0-3] -= A11[3][1] * B11[1][0-3] //(Row3): FMA operations of b3 with elements of indices from (3, 0) uptill (7, 0) ymm13 = _mm256_fnmadd_pd(ymm12, ymm11, ymm13);//d = c - (a*b) //B11[3][0-3] -= A11[3][2] * B11[2][0-3] //--> Transpose and store results of columns of B block <--// ////unpacklow//// ymm1 = _mm256_unpacklo_pd(ymm4, ymm8); //B11[0][0] B11[1][0] B11[0][2] B11[1][2] ymm3 = _mm256_unpacklo_pd(ymm11, ymm13); //B11[2][0] B11[3][0] B11[2][2] B11[3][2] //rearrange low elements ymm0 = _mm256_permute2f128_pd(ymm1,ymm3,0x20); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] ymm2 = _mm256_permute2f128_pd(ymm1,ymm3,0x31); //B11[0][2] B11[1][2] B11[2][2] B11[3][2] ////unpackhigh//// ymm14 = _mm256_unpackhi_pd(ymm4, ymm8); //B11[0][1] B11[1][1] B11[0][3] B11[1][3] ymm15 = _mm256_unpackhi_pd(ymm11, ymm13); //B11[2][1] B11[3][1] B11[2][3] B11[3][3] //rearrange high elements ymm1 = _mm256_permute2f128_pd(ymm14,ymm15,0x20); //B11[0][1] B11[1][1] B11[2][1] B11[3][1] ymm3 = _mm256_permute2f128_pd(ymm14,ymm15,0x31); //B11[0][3] B11[1][3] B11[2][3] B11[3][3] if(n_remainder == 3) { _mm256_storeu_pd((double *)b11, ymm0); //store(B11[0-3][0]) _mm256_storeu_pd((double *)(b11 + (cs_b)), ymm1); //store(B11[0-3][1]) _mm256_storeu_pd((double *)(b11 + cs_b * 2), ymm2); //store(B11[0-3][2]) } if(n_remainder == 2) { _mm256_storeu_pd((double *)b11, ymm0); //store(B11[0-3][0]) _mm256_storeu_pd((double *)(b11 + (cs_b)), ymm1); //store(B11[0-3][1]) } if(n_remainder == 1) { _mm256_storeu_pd((double *)b11, ymm0); //store(B11[0-3][0]) } } if(m_remainder) //implementation for remainder rows(when 'M' is not a multiple of D_MR) { a10 = L +i; //pointer to block of A to be used for GEMM a11 = L + i + (i*cs_a); //pointer to block of A to be used for TRSM b01 = B + j*cs_b; //pointer to block of B to be used for GEMM b11 = B + i + j* cs_b; //pointer to block of B to be used for TRSM k_iter = i / D_MR; //number of times GEMM operations to be performed ymm16 = _mm256_broadcast_sd((double const *)&AlphaVal); //register to hold alpha value ///GEMM for previously calculated values /// //load 4x4 block from b11 if(n_remainder == 3) { ymm0 = _mm256_loadu_pd((double const *)(b11)); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] ymm1 = _mm256_loadu_pd((double const *)(b11 + cs_b)); //B11[0][1] B11[1][1] B11[2][1] B11[3][1] ymm2 = _mm256_loadu_pd((double const *)(b11 + cs_b * 2)); //B11[0][2] B11[1][2] B11[2][2] B11[3][2] ymm3 = _mm256_broadcast_sd((double const *)&ones); } if(n_remainder == 2) { ymm0 = _mm256_loadu_pd((double const *)(b11)); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] ymm1 = _mm256_loadu_pd((double const *)(b11 + cs_b)); //B11[0][1] B11[1][1] B11[2][1] B11[3][1] ymm2 = _mm256_broadcast_sd((double const *)&ones); ymm3 = _mm256_broadcast_sd((double const *)&ones); } if(n_remainder == 1) { ymm0 = _mm256_loadu_pd((double const *)(b11)); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] ymm1 = _mm256_broadcast_sd((double const *)&ones); ymm2 = _mm256_broadcast_sd((double const *)&ones); ymm3 = _mm256_broadcast_sd((double const *)&ones); } ymm4 = _mm256_setzero_pd(); ymm5 = _mm256_setzero_pd(); ymm6 = _mm256_setzero_pd(); ymm7 = _mm256_setzero_pd(); for(k = 0; k < k_iter; k++) //loop for number of GEMM operations { ptr_b01_dup = b01; ymm8 = _mm256_loadu_pd((double const *)(a10)); //A10[0][0] A10[1][0] A10[2][0] A10[3][0] ymm9 = _mm256_loadu_pd((double const *)(a10 + cs_a)); //A10[0][1] A10[1][1] A10[2][1] A10[3][1] ymm10 = _mm256_loadu_pd((double const *)(a10 + cs_a * 2)); //A10[0][2] A10[1][2] A10[2][2] A10[3][2] ymm11 = _mm256_loadu_pd((double const *)(a10 + cs_a * 3)); //A10[0][3] A10[1][3] A10[2][3] A10[3][3] ymm12 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 0)); //B10[0][0] ymm13 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 1)); //B10[0][1] ymm14 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 2)); //B10[0][2] ymm15 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 3)); //B10[0][3] b01 += 1; //move to next row of B ymm4 = _mm256_fmadd_pd(ymm12, ymm8, ymm4); //ymm4 += (B01[0][0]*A10[0][0] B01[0][0]*A10[1][0] B01[0][0]*A10[2][0] B01[0][0]*A10[3][0]) ymm5 = _mm256_fmadd_pd(ymm13, ymm8, ymm5); //ymm5 += (B01[0][1]*A10[0][0] B01[0][1]*A10[1][0] B01[0][1]*A10[2][0] B01[0][1]*A10[3][0]) ymm6 = _mm256_fmadd_pd(ymm14, ymm8, ymm6); //ymm6 += (B01[0][2]*A10[0][0] B01[0][2]*A10[1][0] B01[0][2]*A10[2][0] B01[0][2]*A10[3][0]) ymm7 = _mm256_fmadd_pd(ymm15, ymm8, ymm7); //ymm7 += (B01[0][3]*A10[0][0] B01[0][3]*A10[1][0] B01[0][3]*A10[2][0] B01[0][3]*A10[3][0]) ymm12 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 0)); //B10[1][0] ymm13 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 1)); //B10[1][1] ymm14 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 2)); //B10[1][2] ymm15 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 3)); //B10[1][3] b01 += 1; //move to next row of B ymm4 = _mm256_fmadd_pd(ymm12, ymm9, ymm4); //ymm4 += (B01[1][0]*A10[0][1] B01[1][0]*A10[1][1] B01[1][0]*A10[2][1] B01[1][0]*A10[3][1]) ymm5 = _mm256_fmadd_pd(ymm13, ymm9, ymm5); //ymm5 += (B01[1][1]*A10[0][1] B01[1][1]*A10[1][1] B01[1][1]*A10[2][1] B01[1][1]*A10[3][1]) ymm6 = _mm256_fmadd_pd(ymm14, ymm9, ymm6); //ymm6 += (B01[1][2]*A10[0][1] B01[1][2]*A10[1][1] B01[1][2]*A10[2][1] B01[1][2]*A10[3][1]) ymm7 = _mm256_fmadd_pd(ymm15, ymm9, ymm7); //ymm7 += (B01[1][3]*A10[0][1] B01[1][3]*A10[1][1] B01[1][3]*A10[2][1] B01[1][3]*A10[3][1]) ymm12 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 0)); //B10[2][0] ymm13 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 1)); //B10[2][1] ymm14 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 2)); //B10[2][2] ymm15 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 3)); //B10[2][3] b01 += 1; //move to next row of B ymm4 = _mm256_fmadd_pd(ymm12, ymm10, ymm4); //ymm4 += (B01[2][0]*A10[0][2] B01[2][0]*A10[1][2] B01[2][0]*A10[2][2] B01[2][0]*A10[3][2]) ymm5 = _mm256_fmadd_pd(ymm13, ymm10, ymm5); //ymm5 += (B01[2][1]*A10[0][2] B01[2][1]*A10[1][2] B01[2][1]*A10[2][2] B01[2][1]*A10[3][2]) ymm6 = _mm256_fmadd_pd(ymm14, ymm10, ymm6); //ymm6 += (B01[2][2]*A10[0][2] B01[2][2]*A10[1][2] B01[2][2]*A10[2][2] B01[2][2]*A10[3][2]) ymm7 = _mm256_fmadd_pd(ymm15, ymm10, ymm7); //ymm7 += (B01[2][3]*A10[0][2] B01[2][3]*A10[1][2] B01[2][3]*A10[2][2] B01[2][3]*A10[3][2]) ymm12 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 0)); //B10[3][0] ymm13 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 1)); //B10[3][1] ymm14 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 2)); //B10[3][2] ymm15 = _mm256_broadcast_sd((double const *)(b01 + cs_b * 3)); //B10[3][3] b01 += 1; //move to next row of B ymm4 = _mm256_fmadd_pd(ymm12, ymm11, ymm4); //ymm4 += (B01[3][0]*A10[0][3] B01[3][0]*A10[1][3] B01[3][0]*A10[2][3] B01[3][0]*A10[3][3]) ymm5 = _mm256_fmadd_pd(ymm13, ymm11, ymm5); //ymm5 += (B01[3][1]*A10[0][3] B01[3][1]*A10[1][3] B01[3][1]*A10[2][3] B01[3][1]*A10[3][3]) ymm6 = _mm256_fmadd_pd(ymm14, ymm11, ymm6); //ymm6 += (B01[3][2]*A10[0][3] B01[3][2]*A10[1][3] B01[3][2]*A10[2][3] B01[3][2]*A10[3][3]) ymm7 = _mm256_fmadd_pd(ymm15, ymm11, ymm7); //ymm7 += (B01[3][3]*A10[0][3] B01[3][3]*A10[1][3] B01[3][3]*A10[2][3] B01[3][3]*A10[3][3]) a10 += D_MR * cs_a; //pointer math to find next block of A for GEMM b01 = ptr_b01_dup + D_MR; //pointer math to find next block of B for GEMM } ymm8 = _mm256_fmsub_pd(ymm0, ymm16, ymm4); //B11[0-3][0] * alpha -= ymm4 ymm9 = _mm256_fmsub_pd(ymm1, ymm16, ymm5); //B11[0-3][1] * alpha -= ymm5 ymm10 = _mm256_fmsub_pd(ymm2, ymm16, ymm6); //B11[0-3][2] * alpha -= ymm6 ymm11 = _mm256_fmsub_pd(ymm3, ymm16, ymm7); //B11[0-3][3] * alpha -= ymm7 ///implement TRSM/// //determine correct values to store if(m_remainder == 3) { ymm0 = _mm256_blend_pd(ymm8, ymm0, 0x08); ymm1 = _mm256_blend_pd(ymm9, ymm1, 0x08); ymm2 = _mm256_blend_pd(ymm10, ymm2, 0x08); ymm3 = _mm256_blend_pd(ymm11, ymm3, 0x08); } if(m_remainder == 2) { ymm0 = _mm256_permute2f128_pd(ymm8, ymm0, 0x30); ymm1 = _mm256_permute2f128_pd(ymm9, ymm1, 0x30); ymm2 = _mm256_permute2f128_pd(ymm10, ymm2, 0x30); ymm3 = _mm256_permute2f128_pd(ymm11, ymm3, 0x30); } if(m_remainder == 1) { ymm0 = _mm256_blend_pd(ymm8, ymm0, 0x0E); ymm1 = _mm256_blend_pd(ymm9, ymm1, 0x0E); ymm2 = _mm256_blend_pd(ymm10, ymm2, 0x0E); ymm3 = _mm256_blend_pd(ymm11, ymm3, 0x0E); } if(n_remainder == 3) { _mm256_storeu_pd((double *)b11, ymm0); //store(B11[0-3][0]) _mm256_storeu_pd((double *)(b11 + (cs_b)), ymm1); //store(B11[0-3][1]) _mm256_storeu_pd((double *)(b11 + cs_b * 2), ymm2); //store(B11[0-3][2]) } if(n_remainder == 2) { _mm256_storeu_pd((double *)b11, ymm0); //store(B11[0-3][0]) _mm256_storeu_pd((double *)(b11 + (cs_b)), ymm1); //store(B11[0-3][1]) } if(n_remainder == 1) { _mm256_storeu_pd((double *)b11, ymm0); //store(B11[0-3][0]) } ///scalar code for trsm without alpha/// dtrsm_small_AlXB_unitDiag(a11, b11, m_remainder, n_remainder, cs_a, cs_b); } } return BLIS_SUCCESS; } /*implements TRSM for the case XA = alpha * B *A is upper triangular, non-unit diagonal, no transpose *dimensions: X:mxn A:nxn B: mxn */ /* b11---> a01 ----> ***************** *********** *b01*b11* * * * * * * b11 * * * * * **a01 * * a11 | ***************** ********* | | * * * * * *a11* * | | * * * * * * * * | v ***************** ****** v * * * * * * * * * * * * * * ***************** * * * */ static err_t bli_dtrsm_small_XAuB( side_t side, obj_t* AlphaObj, obj_t* a, obj_t* b, cntx_t* cntx, cntl_t* cntl ) { dim_t D_MR = 8; //block dimension along the rows dim_t D_NR = 4; //block dimension along the columns dim_t m = bli_obj_length(b); //number of rows dim_t n = bli_obj_width(b); //number of columns dim_t m_remainder = m % D_MR; //number of corner rows dim_t n_remainder = n % D_NR; //number of corner columns dim_t cs_a = bli_obj_col_stride(a); //column stride of matrix A dim_t cs_b = bli_obj_col_stride(b); //column stride of matrix B #ifdef BLIS_ENABLE_SMALL_MATRIX_ROME if(bli_max(m,n)>D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME && (m/n) < D_BLIS_SMALL_MATRIX_THRES_TRSM_DIM_RATIO) { return BLIS_NOT_YET_IMPLEMENTED; } #else if(bli_max(m,n)>D_BLIS_SMALL_MATRIX_THRES_TRSM_NAPLES && (m/n) < D_BLIS_SMALL_MATRIX_THRES_TRSM_DIM_RATIO) { return BLIS_NOT_YET_IMPLEMENTED; } #endif dim_t i, j, k; //loop variablse dim_t k_iter; //determines the number of GEMM operations to be done dim_t cs_b_offset[2]; //pre-calculated strides double ones = 1.0; double AlphaVal = *(double *)AlphaObj->buffer; //value of Alpha double *L = a->buffer; //pointer to matrix A double *B = b->buffer; //pointer to matrix B double *a01, *a11, *b10, *b11; //pointers for GEMM and TRSM blocks double *ptr_a01_dup; cs_b_offset[0] = cs_b << 1; //cs_b_offset[0] = cs_b * 2; cs_b_offset[1] = cs_b_offset[0] + cs_b;//cs_b_offset[1] = cs_b * 3; //ymm scratch reginsters __m256d ymm0, ymm1, ymm2, ymm3; __m256d ymm4, ymm5, ymm6, ymm7; __m256d ymm8, ymm9, ymm10, ymm11; __m256d ymm12, ymm13, ymm14, ymm15; __m256d ymm16; for(i = 0; (i+D_MR-1) < m; i += D_MR) //loop along 'M' direction { for(j = 0; (j+D_NR-1) < n; j += D_NR) //loop along 'N' direction { a01 = L + j*cs_a; //pointer to block of A to be used in GEMM a11 = L + j*cs_a + j; //pointer to block of A to be used for TRSM b10 = B + i; //pointer to block of B to be used in GEMM b11 = B + i + j*cs_b; //pointer to block of B to be used for TRSM k_iter = j / D_NR; //number of GEMM operations to be done(in blocks of 4x4) ymm0 = _mm256_setzero_pd(); ymm1 = _mm256_setzero_pd(); ymm2 = _mm256_setzero_pd(); ymm3 = _mm256_setzero_pd(); ymm4 = _mm256_setzero_pd(); ymm5 = _mm256_setzero_pd(); ymm6 = _mm256_setzero_pd(); ymm7 = _mm256_setzero_pd(); ///GEMM implementation starts/// for(k = 0; k < k_iter; k++) //loop for number of GEMM operations { ptr_a01_dup = a01; //broadcast 1st row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 0)); //A01[0][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 1)); //A01[0][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 2)); //A01[0][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 3)); //A01[0][3] a01 += 1; //move to next row //load 8x2 block of B10 ymm12 = _mm256_loadu_pd((double const *)b10); //B10[0][0] B10[1][0] B10[2][0] B10[3][0] ymm13 = _mm256_loadu_pd((double const *)(b10 + D_NR)); //B10[4][0] B10[5][0] B10[6][0] B10[7][0] ymm14 = _mm256_loadu_pd((double const *)(b10 + cs_b)); //B10[0][1] B10[1][1] B10[2][1] B10[3][1] ymm15 = _mm256_loadu_pd((double const *)(b10 + cs_b + D_NR)); //B10[4][1] B10[5][1] B10[6][1] B10[7][1] ymm0 = _mm256_fmadd_pd(ymm8, ymm12, ymm0); //ymm0 += (B10[0][0]*A01[0][0] B10[1][0]*A01[0][0] B10[2][0]*A01[0][0] B10[3][0]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm12, ymm1); //ymm1 += (B10[0][0]*A01[0][1] B10[1][0]*A01[0][1] B10[2][0]*A01[0][1] B10[3][0]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm12, ymm2); //ymm2 += (B10[0][0]*A01[0][2] B10[1][0]*A01[0][2] B10[2][0]*A01[0][2] B10[3][0]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm12, ymm3); //ymm3 += (B10[0][0]*A01[0][3] B10[1][0]*A01[0][3] B10[2][0]*A01[0][3] B10[3][0]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm13, ymm4); //ymm4 += (B10[4][0]*A01[0][0] B10[5][0]*A01[0][0] B10[6][0]*A01[0][0] B10[7][0]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm13, ymm5); //ymm5 += (B10[4][0]*A01[0][1] B10[5][0]*A01[0][1] B10[6][0]*A01[0][1] B10[7][0]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm13, ymm6); //ymm6 += (B10[4][0]*A01[0][2] B10[5][0]*A01[0][2] B10[6][0]*A01[0][2] B10[7][0]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm13, ymm7); //ymm7 += (B10[4][0]*A01[0][3] B10[5][0]*A01[0][3] B10[6][0]*A01[0][3] B10[7][0]*A01[0][3]) //broadcast 2nd row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 0)); //A01[1][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 1)); //A01[1][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 2)); //A01[1][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 3)); //A01[1][3] a01 += 1; //move to next row of A ymm0 = _mm256_fmadd_pd(ymm8, ymm14, ymm0); //ymm0 += (B10[0][1]*A01[0][0] B10[1][1]*A01[0][0] B10[2][1]*A01[0][0] B10[3][1]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm14, ymm1); //ymm1 += (B10[0][1]*A01[0][1] B10[1][1]*A01[0][1] B10[2][1]*A01[0][1] B10[3][1]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm14, ymm2); //ymm2 += (B10[0][1]*A01[0][2] B10[1][1]*A01[0][2] B10[2][1]*A01[0][2] B10[3][1]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm14, ymm3); //ymm3 += (B10[0][1]*A01[0][3] B10[1][1]*A01[0][3] B10[2][1]*A01[0][3] B10[3][1]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm15, ymm4); //ymm4 += (B10[4][1]*A01[0][0] B10[5][1]*A01[0][0] B10[6][1]*A01[0][0] B10[7][1]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm15, ymm5); //ymm5 += (B10[4][1]*A01[0][1] B10[5][1]*A01[0][1] B10[6][1]*A01[0][1] B10[7][1]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm15, ymm6); //ymm6 += (B10[4][1]*A01[0][2] B10[5][1]*A01[0][2] B10[6][1]*A01[0][2] B10[7][1]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm15, ymm7); //ymm7 += (B10[4][1]*A01[0][3] B10[5][1]*A01[0][3] B10[6][1]*A01[0][3] B10[7][1]*A01[0][3]) //broadcast 3rd row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 0)); //A01[2][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 1)); //A01[2][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 2)); //A01[2][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 3)); //A01[2][3] a01 += 1; //move to next row of A01 //load next 8x2 block of B10 ymm12 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0])); //(B10[0][2] B10[1][2] B10[2][2] B10[3][2]) ymm13 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0] + D_NR)); //(B10[4][2] B10[5][2] B10[6][2] B10[7][2]) ymm14 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0] + cs_b)); //(B10[0][3] B10[1][3] B10[2][3] B10[3][3]) ymm15 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0] + cs_b + D_NR)); //(B10[4][3] B10[5][3] B10[6][3] B10[7][3]) ymm0 = _mm256_fmadd_pd(ymm8, ymm12, ymm0); //ymm0 += (B10[0][2]*A01[0][0] B10[1][2]*A01[0][0] B10[2][2]*A01[0][0] B10[3][2]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm12, ymm1); //ymm1 += (B10[0][2]*A01[0][1] B10[1][2]*A01[0][1] B10[2][2]*A01[0][1] B10[3][2]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm12, ymm2); //ymm2 += (B10[0][2]*A01[0][2] B10[1][2]*A01[0][2] B10[2][2]*A01[0][2] B10[3][2]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm12, ymm3); //ymm3 += (B10[0][2]*A01[0][3] B10[1][2]*A01[0][3] B10[2][2]*A01[0][3] B10[3][2]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm13, ymm4); //ymm4 += (B10[4][2]*A01[0][0] B10[5][2]*A01[0][0] B10[6][2]*A01[0][0] B10[7][2]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm13, ymm5); //ymm5 += (B10[4][2]*A01[0][1] B10[5][2]*A01[0][1] B10[6][2]*A01[0][1] B10[7][2]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm13, ymm6); //ymm6 += (B10[4][2]*A01[0][2] B10[5][2]*A01[0][2] B10[6][2]*A01[0][2] B10[7][2]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm13, ymm7); //ymm7 += (B10[4][2]*A01[0][3] B10[5][2]*A01[0][3] B10[6][2]*A01[0][3] B10[7][2]*A01[0][3]) //broadcast 4th row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 0)); //A01[3][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 1)); //A01[3][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 2)); //A01[3][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 3)); //A01[3][3] a01 += 1; //move to next row of A01 ymm0 = _mm256_fmadd_pd(ymm8, ymm14, ymm0); //ymm0 += (B10[0][3]*A01[0][0] B10[1][3]*A01[0][0] B10[2][3]*A01[0][0] B10[3][3]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm14, ymm1); //ymm1 += (B10[0][3]*A01[0][1] B10[1][3]*A01[0][1] B10[2][3]*A01[0][1] B10[3][3]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm14, ymm2); //ymm2 += (B10[0][3]*A01[0][2] B10[1][3]*A01[0][2] B10[2][3]*A01[0][2] B10[3][3]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm14, ymm3); //ymm3 += (B10[0][3]*A01[0][3] B10[1][3]*A01[0][3] B10[2][3]*A01[0][3] B10[3][3]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm15, ymm4); //ymm4 += (B10[4][3]*A01[0][0] B10[5][3]*A01[0][0] B10[6][3]*A01[0][0] B10[7][3]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm15, ymm5); //ymm5 += (B10[4][3]*A01[0][1] B10[5][3]*A01[0][1] B10[6][3]*A01[0][1] B10[7][3]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm15, ymm6); //ymm6 += (B10[4][3]*A01[0][2] B10[5][3]*A01[0][2] B10[6][3]*A01[0][2] B10[7][3]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm15, ymm7); //ymm7 += (B10[4][3]*A01[0][3] B10[5][3]*A01[0][3] B10[6][3]*A01[0][3] B10[7][3]*A01[0][3]) b10 += D_NR * cs_b; //pointer math to find next block of B for GEMM a01 = ptr_a01_dup + D_NR; //pointer math to find next block of A for GEMM } ///GEMM code ends/// ymm16 = _mm256_broadcast_sd((double const *)&AlphaVal); //load 8x4 block of B11 ymm8 = _mm256_loadu_pd((double const *)b11); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] ymm12 = _mm256_loadu_pd((double const *)(b11 + D_NR)); //B11[4][0] B11[5][0] B11[6][0] B11[7][0] ymm9 = _mm256_loadu_pd((double const *)(b11 + cs_b)); //B11[0][1] B11[1][1] B11[2][1] B11[3][1] ymm13 = _mm256_loadu_pd((double const *)(b11 + cs_b + D_NR)); //B11[4][1] B11[5][1] B11[6][1] B11[7][1] ymm10 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[0])); //B11[0][2] B11[1][2] B11[2][2] B11[3][2] ymm14 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[0] + D_NR)); //B11[4][2] B11[5][2] B11[6][2] B11[7][2] ymm11 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[1])); //B11[0][3] B11[1][3] B11[2][3] B11[3][3] ymm15 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[1] + D_NR)); //B11[4][3] B11[5][3] B11[6][3] B11[7][3] ymm8 = _mm256_fmsub_pd(ymm8, ymm16, ymm0); //B11[0-3][0] * alpha -= ymm0 ymm9 = _mm256_fmsub_pd(ymm9, ymm16, ymm1); //B11[4-7][0] * alpha-= ymm1 ymm10 = _mm256_fmsub_pd(ymm10, ymm16, ymm2); //B11[0-3][1] * alpha-= ymm2 ymm11 = _mm256_fmsub_pd(ymm11, ymm16, ymm3); //B11[4-7][1] * alpha -= ymm3 ymm12 = _mm256_fmsub_pd(ymm12, ymm16, ymm4); //B11[0-3][2] * alpha -= ymm4 ymm13 = _mm256_fmsub_pd(ymm13, ymm16, ymm5); //B11[4-7][2] * alpha -= ymm5 ymm14 = _mm256_fmsub_pd(ymm14, ymm16, ymm6); //B11[0-3][3] * alpha -= ymm6 ymm15 = _mm256_fmsub_pd(ymm15, ymm16, ymm7); //B11[4-7][3] * alpha -= ymm7 ///implement TRSM/// ///read 4x4 block of A11/// ymm7 = _mm256_broadcast_sd((double const *)(&ones)); //1st col ymm0 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][0] //2nd col a11 += cs_a; ymm1 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][1] ymm2 = _mm256_broadcast_sd((double const *)(a11+1)); //A11[1][1] //3rd col a11 += cs_a; ymm3 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][2] ymm4 = _mm256_broadcast_sd((double const *)(a11+1)); //A11[1][2] ymm5 = _mm256_broadcast_sd((double const *)(a11+2)); //A11[2][2] //4th col a11 += cs_a; ymm6 = _mm256_broadcast_sd((double const *)(a11+3)); //A11[3][3] //compute reciprocals of L(i,i) and broadcast in registers ymm0 = _mm256_unpacklo_pd(ymm0, ymm2); //A11[0][0] A11[1][1] A11[0][0] A11[1][1] ymm2 = _mm256_unpacklo_pd(ymm5, ymm6); //A11[2][2] A11[3][3] A11[1][1] A11[3][3] ymm0 = _mm256_blend_pd(ymm0, ymm2, 0x0C); //A11[0][0] A11[1][1] A11[2][2] A11[3][3] ymm7 = _mm256_div_pd(ymm7, ymm0); //(1/A11[0][0] 1/A11[1][1] 1/A11[2][2] 1/A11[3][3]) ymm2 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][3] ymm5 = _mm256_broadcast_sd((double const *)(a11+1)); //A11[1][3] ymm6 = _mm256_broadcast_sd((double const *)(a11+2)); //A11[2][3] //extract a00 ymm0 = _mm256_permute_pd(ymm7, 0x00); //(1/A11[0][0] 1/A11[0][0] 1/A11[2][2] 1/A11[2][2]) ymm0 = _mm256_permute2f128_pd(ymm0, ymm0, 0x00); //(1/A11[0][0] 1/A11[0][0] 1/A11[0][0] 1/A11[0][0]) ymm8 = _mm256_mul_pd(ymm8, ymm0); //B11[0-3][0] /= A11[0][0] ymm12 = _mm256_mul_pd(ymm12, ymm0); //B11[4-7][0] /= A11[0][0] //extract a11 ymm0 = _mm256_permute_pd(ymm7, 0x03); //(1/A11[1][1] 1/A11[1][1] 1/A11[2][2] 1/A11[2][2]) ymm0 = _mm256_permute2f128_pd(ymm0, ymm0, 0x00);//(1/A11[1][1] 1/A11[1][1] 1/A11[1][1] 1/A11[1][1]) //(Row1): FMA operations ymm9 = _mm256_fnmadd_pd(ymm1, ymm8, ymm9); //B11[0-3][1] -= B11[0-3][0] * A11[0][1] ymm10 = _mm256_fnmadd_pd(ymm3, ymm8, ymm10); //B11[0-3][2] -= B11[0-3][0] * A11[0][2] ymm11 = _mm256_fnmadd_pd(ymm2, ymm8, ymm11); //B11[0-3][3] -= B11[0-3][0] * A11[0][3] ymm13 = _mm256_fnmadd_pd(ymm1, ymm12, ymm13); //B11[4-7][1] -= B11[4-7][0] * A11[0][1] ymm14 = _mm256_fnmadd_pd(ymm3, ymm12, ymm14); //B11[4-7][2] -= B11[4-7][0] * A11[0][2] ymm15 = _mm256_fnmadd_pd(ymm2, ymm12, ymm15); //B11[4-7][3] -= B11[4-7][0] * A11[0][3] ymm9 = _mm256_mul_pd(ymm9, ymm0); //B11[0-3][1] /= A11[1][1] ymm13 = _mm256_mul_pd(ymm13, ymm0); //B11[4-7][1] /= A11[1][1] //extract a22 ymm0 = _mm256_permute_pd(ymm7, 0x00); //(1/A11[0][0] 1/A11[0][0] 1/A11[2][2] 1/A11[2][2]) ymm0 = _mm256_permute2f128_pd(ymm0, ymm0, 0x11);//(1/A11[2][2] 1/A11[2][2] 1/A11[2][2] 1/A11[2][2]) //(Row2)FMA operations ymm10 = _mm256_fnmadd_pd(ymm4, ymm9, ymm10); //B11[0-3][2] -= B11[0-3][1] * A11[1][2] ymm11 = _mm256_fnmadd_pd(ymm5, ymm9, ymm11); //B11[0-3][3] -= B11[0-3][1] * A11[1][3] ymm14 = _mm256_fnmadd_pd(ymm4, ymm13, ymm14); //B11[4-7][2] -= B11[4-7][1] * A11[1][2] ymm15 = _mm256_fnmadd_pd(ymm5, ymm13, ymm15); //B11[4-7][3] -= B11[4-7][1] * A11[1][3] ymm10 = _mm256_mul_pd(ymm10, ymm0); //B11[0-3][2] /= A11[2][2] ymm14 = _mm256_mul_pd(ymm14, ymm0); //B11[4-7][2] /= A11[2][2] //extract a33 ymm0 = _mm256_permute_pd(ymm7, 0x0C); //(1/A11[0][0] 1/A11[0][0] 1/A11[3][3] 1/A11[3][3] 1/A11[3][3]) ymm0 = _mm256_permute2f128_pd(ymm0, ymm0, 0x11);//(1/A11[3][3] 1/A11[3][3] 1/A11[3][3] 1/A11[3][3]) //(Row3)FMA operations ymm11 = _mm256_fnmadd_pd(ymm6, ymm10, ymm11); //B11[0-3][3] -= B11[0-3][2] * A11[2][3] ymm15 = _mm256_fnmadd_pd(ymm6, ymm14, ymm15); //B11[4-7][3] -= B11[4-7][2] * A11[2][3] ymm11 = _mm256_mul_pd(ymm11, ymm0); //B11[0-3][3] /= A11[3][3] ymm15 = _mm256_mul_pd(ymm15, ymm0); //B11[4-7][3] /= A11[3][3] _mm256_storeu_pd((double *)b11, ymm8); //store(B11[0-3][0]) _mm256_storeu_pd((double *)(b11 + D_NR), ymm12); //store(B11[4-7][0]) _mm256_storeu_pd((double *)(b11 + cs_b), ymm9); //store(B11[0-3][1]) _mm256_storeu_pd((double *)(b11 + cs_b + D_NR), ymm13); //store(B11[4-7][1]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0]), ymm10); //store(B11[0-3][2]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0] + D_NR), ymm14); //store(B11[4-7][2]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0] + cs_b), ymm11); //store(B11[0-3][3]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0] + cs_b + D_NR), ymm15);//store(B11[4-7][3]) } if(n_remainder) //implementation for remainder columns(when n is not multiple of D_NR) { a01 = L + j*cs_a; //pointer to block of A to be used for GEMM a11 = L + j*cs_a + j; //pointer to block of A to be used for TRSM b10 = B + i; //pointer to block of B to be used for GEMM b11 = B + i + j*cs_b; //pointer to block of B to be used for TRSM k_iter = j / D_NR; //number of GEMM operations to be performed(in blocks of 4x4) ///load 4x4 block of b11 ymm0 = _mm256_setzero_pd(); ymm1 = _mm256_setzero_pd(); ymm2 = _mm256_setzero_pd(); ymm3 = _mm256_setzero_pd(); ymm4 = _mm256_setzero_pd(); ymm5 = _mm256_setzero_pd(); ymm6 = _mm256_setzero_pd(); ymm7 = _mm256_setzero_pd(); ///GEMM implementation begins/// for(k = 0; k < k_iter; k++) ///loop for number of GEMM operations { ptr_a01_dup = a01; //broadcast 1st row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 0)); //A01[0][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 1)); //A01[0][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 2)); //A01[0][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 3)); //A01[0][3] a01 += 1; //move to next row of A //load 8x2 block of B10 ymm12 = _mm256_loadu_pd((double const *)b10); //B10[0][0] B10[1][0] B10[2][0] B10[3][0] ymm13 = _mm256_loadu_pd((double const *)(b10 + D_NR)); //B10[4][0] B10[5][0] B10[6][0] B10[7][0] ymm14 = _mm256_loadu_pd((double const *)(b10 + cs_b)); //B10[0][1] B10[1][1] B10[2][1] B10[3][1] ymm15 = _mm256_loadu_pd((double const *)(b10 + cs_b + D_NR));//B10[4][1] B10[5][1] B10[6][1] B10[7][1] ymm0 = _mm256_fmadd_pd(ymm8, ymm12, ymm0); //ymm0 += (B10[0][0]*A01[0][0] B10[1][0]*A01[0][0] B10[2][0]*A01[0][0] B10[3][0]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm12, ymm1); //ymm1 += (B10[0][0]*A01[0][1] B10[1][0]*A01[0][1] B10[2][0]*A01[0][1] B10[3][0]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm12, ymm2); //ymm2 += (B10[0][0]*A01[0][2] B10[1][0]*A01[0][2] B10[2][0]*A01[0][2] B10[3][0]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm12, ymm3); //ymm3 += (B10[0][0]*A01[0][3] B10[1][0]*A01[0][3] B10[2][0]*A01[0][3] B10[3][0]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm13, ymm4); //ymm4 += (B10[4][0]*A01[0][0] B10[5][0]*A01[0][0] B10[6][0]*A01[0][0] B10[7][0]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm13, ymm5); //ymm5 += (B10[4][0]*A01[0][1] B10[5][0]*A01[0][1] B10[6][0]*A01[0][1] B10[7][0]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm13, ymm6); //ymm6 += (B10[4][0]*A01[0][2] B10[5][0]*A01[0][2] B10[6][0]*A01[0][2] B10[7][0]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm13, ymm7); //ymm7 += (B10[4][0]*A01[0][3] B10[5][0]*A01[0][3] B10[6][0]*A01[0][3] B10[7][0]*A01[0][3]) //broadcast 2nd row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 0)); //A01[1][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 1)); //A01[1][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 2)); //A01[1][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 3)); //A01[1][3] a01 += 1; //move to next row of A ymm0 = _mm256_fmadd_pd(ymm8, ymm14, ymm0); //ymm0 += (B10[0][1]*A01[0][0] B10[1][1]*A01[0][0] B10[2][1]*A01[0][0] B10[3][1]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm14, ymm1); //ymm1 += (B10[0][1]*A01[0][1] B10[1][1]*A01[0][1] B10[2][1]*A01[0][1] B10[3][1]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm14, ymm2); //ymm2 += (B10[0][1]*A01[0][2] B10[1][1]*A01[0][2] B10[2][1]*A01[0][2] B10[3][1]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm14, ymm3); //ymm3 += (B10[0][1]*A01[0][3] B10[1][1]*A01[0][3] B10[2][1]*A01[0][3] B10[3][1]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm15, ymm4); //ymm4 += (B10[4][1]*A01[0][0] B10[5][1]*A01[0][0] B10[6][1]*A01[0][0] B10[7][1]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm15, ymm5); //ymm5 += (B10[4][1]*A01[0][1] B10[5][1]*A01[0][1] B10[6][1]*A01[0][1] B10[7][1]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm15, ymm6); //ymm6 += (B10[4][1]*A01[0][2] B10[5][1]*A01[0][2] B10[6][1]*A01[0][2] B10[7][1]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm15, ymm7); //ymm7 += (B10[4][1]*A01[0][3] B10[5][1]*A01[0][3] B10[6][1]*A01[0][3] B10[7][1]*A01[0][3]) //broadcast 3rd row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 0)); //A01[2][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 1)); //A01[2][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 2)); //A01[2][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 3)); //A01[2][3] a01 += 1; //move to next row of A //load next 8x2 block of B10 ymm12 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0])); //(B10[0][2] B10[1][2] B10[2][2] B10[3][2]) ymm13 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0] + D_NR)); //(B10[4][2] B10[5][2] B10[6][2] B10[7][2]) ymm14 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0] + cs_b)); //(B10[0][3] B10[1][3] B10[2][3] B10[3][3]) ymm15 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0] + cs_b + D_NR)); //(B10[4][3] B10[5][3] B10[6][3] B10[7][3]) ymm0 = _mm256_fmadd_pd(ymm8, ymm12, ymm0); //ymm0 += (B10[0][2]*A01[0][0] B10[1][2]*A01[0][0] B10[2][2]*A01[0][0] B10[3][2]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm12, ymm1); //ymm1 += (B10[0][2]*A01[0][1] B10[1][2]*A01[0][1] B10[2][2]*A01[0][1] B10[3][2]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm12, ymm2); //ymm2 += (B10[0][2]*A01[0][2] B10[1][2]*A01[0][2] B10[2][2]*A01[0][2] B10[3][2]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm12, ymm3); //ymm3 += (B10[0][2]*A01[0][3] B10[1][2]*A01[0][3] B10[2][2]*A01[0][3] B10[3][2]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm13, ymm4); //ymm4 += (B10[4][2]*A01[0][0] B10[5][2]*A01[0][0] B10[6][2]*A01[0][0] B10[7][2]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm13, ymm5); //ymm5 += (B10[4][2]*A01[0][1] B10[5][2]*A01[0][1] B10[6][2]*A01[0][1] B10[7][2]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm13, ymm6); //ymm6 += (B10[4][2]*A01[0][2] B10[5][2]*A01[0][2] B10[6][2]*A01[0][2] B10[7][2]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm13, ymm7); //ymm7 += (B10[4][2]*A01[0][3] B10[5][2]*A01[0][3] B10[6][2]*A01[0][3] B10[7][2]*A01[0][3]) //broadcast 4th row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 0)); //A01[3][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 1)); //A01[3][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 2)); //A01[3][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 3)); //A01[3][3] a01 += 1; //move to next row of A ymm0 = _mm256_fmadd_pd(ymm8, ymm14, ymm0); //ymm0 += (B10[0][3]*A01[0][0] B10[1][3]*A01[0][0] B10[2][3]*A01[0][0] B10[3][3]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm14, ymm1); //ymm1 += (B10[0][3]*A01[0][1] B10[1][3]*A01[0][1] B10[2][3]*A01[0][1] B10[3][3]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm14, ymm2); //ymm2 += (B10[0][3]*A01[0][2] B10[1][3]*A01[0][2] B10[2][3]*A01[0][2] B10[3][3]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm14, ymm3); //ymm3 += (B10[0][3]*A01[0][3] B10[1][3]*A01[0][3] B10[2][3]*A01[0][3] B10[3][3]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm15, ymm4); //ymm4 += (B10[4][3]*A01[0][0] B10[5][3]*A01[0][0] B10[6][3]*A01[0][0] B10[7][3]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm15, ymm5); //ymm5 += (B10[4][3]*A01[0][1] B10[5][3]*A01[0][1] B10[6][3]*A01[0][1] B10[7][3]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm15, ymm6); //ymm6 += (B10[4][3]*A01[0][2] B10[5][3]*A01[0][2] B10[6][3]*A01[0][2] B10[7][3]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm15, ymm7); //ymm7 += (B10[4][3]*A01[0][3] B10[5][3]*A01[0][3] B10[6][3]*A01[0][3] B10[7][3]*A01[0][3]) b10 += D_NR * cs_b; //pointer math to find next block of B for GEMM a01 = ptr_a01_dup + D_NR; //pointer math to find next block of A for GEMM } ///GEMM code ends/// ymm16 = _mm256_broadcast_sd((double const *)&AlphaVal); //subtract the calculated GEMM block from current TRSM block //load 8x4 block of B11 if(n_remainder == 3) { ymm8 = _mm256_loadu_pd((double const *)b11); //B11[0-3][0] ymm12 = _mm256_loadu_pd((double const *)(b11 + D_NR)); //B11[4-7][0] ymm9 = _mm256_loadu_pd((double const *)(b11 + cs_b)); //B11[0-3][1] ymm13 = _mm256_loadu_pd((double const *)(b11 + cs_b + D_NR)); //B11[4-7][1] ymm10 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[0])); //B11[0-3][2] ymm14 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[0] + D_NR)); //B11[4-7][2] ymm11 = _mm256_broadcast_sd((double const *)&ones); //B11[0-3][3] ymm15 = _mm256_broadcast_sd((double const *)&ones); //B11[4-7][3] } if(n_remainder == 2) { ymm8 = _mm256_loadu_pd((double const *)b11); //B11[0-3][0] ymm12 = _mm256_loadu_pd((double const *)(b11 + D_NR)); //B11[4-7][0] ymm9 = _mm256_loadu_pd((double const *)(b11 + cs_b)); //B11[0-3][1] ymm13 = _mm256_loadu_pd((double const *)(b11 + cs_b + D_NR)); //B11[4-7][1] ymm10 = _mm256_broadcast_sd((double const *)&ones); //B11[0-3][2] ymm14 = _mm256_broadcast_sd((double const *)&ones); //B11[4-7][2] ymm11 = _mm256_broadcast_sd((double const *)&ones); //B11[0-3][3] ymm15 = _mm256_broadcast_sd((double const *)&ones); //B11[4-7][3] } if(n_remainder == 1) { ymm8 = _mm256_loadu_pd((double const *)b11); //B11[0-3][0] ymm12 = _mm256_loadu_pd((double const *)(b11 + D_NR)); //B11[4-7][0] ymm9 = _mm256_broadcast_sd((double const *)&ones); //B11[0-3][1] ymm13 = _mm256_broadcast_sd((double const *)&ones); //B11[4-7][1] ymm10 = _mm256_broadcast_sd((double const *)&ones); //B11[0-3][2] ymm14 = _mm256_broadcast_sd((double const *)&ones); //B11[4-7][2] ymm11 = _mm256_broadcast_sd((double const *)&ones); //B11[0-3][3] ymm15 = _mm256_broadcast_sd((double const *)&ones); //B11[4-7][3] } ymm8 = _mm256_fmsub_pd(ymm8, ymm16, ymm0); //B11[0-3][0] * alpha -= B10[0-3][0] ymm9 = _mm256_fmsub_pd(ymm9, ymm16, ymm1); //B11[4-7][0] * alpha -= B10[4-7][0] ymm10 = _mm256_fmsub_pd(ymm10, ymm16, ymm2); //B11[0-3][1] * alpha -= B10[0-3][1] ymm11 = _mm256_fmsub_pd(ymm11, ymm16, ymm3); //B11[4-7][1] * alpha -= B10[4-7][1] ymm12 = _mm256_fmsub_pd(ymm12, ymm16, ymm4); //B11[0-3][2] * alpha -= B10[0-3][2] ymm13 = _mm256_fmsub_pd(ymm13, ymm16, ymm5); //B11[4-7][2] * alpha -= B10[4-7][2] ymm14 = _mm256_fmsub_pd(ymm14, ymm16, ymm6); //B11[0-3][3] * alpha -= B10[0-3][3] ymm15 = _mm256_fmsub_pd(ymm15, ymm16, ymm7); //B11[4-7][3] * alpha -= B10[4-7][3] ///implement TRSM/// ///read 4x4 block of A11/// ymm7 = _mm256_broadcast_sd((double const *)(&ones)); //1st col ymm0 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][0] //2nd col a11 += cs_a; ymm1 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][1] ymm2 = _mm256_broadcast_sd((double const *)(a11+1)); //A11[1][1] //3rd col a11 += cs_a; ymm3 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][2] ymm4 = _mm256_broadcast_sd((double const *)(a11+1)); //A11[1][2] ymm5 = _mm256_broadcast_sd((double const *)(a11+2)); //A11[2][2] //4th col a11 += cs_a; ymm6 = _mm256_broadcast_sd((double const *)(a11+3)); //A11[3][3] //compute reciprocals of L(i,i) and broadcast in registers ymm0 = _mm256_unpacklo_pd(ymm0, ymm2); //A11[0][0] A11[1][1] A11[0][0] A11[1][1] ymm2 = _mm256_unpacklo_pd(ymm5, ymm6); //A11[2][2] A11[3][3] A11[1][1] A11[3][3] ymm0 = _mm256_blend_pd(ymm0, ymm2, 0x0C); //A11[0][0] A11[1][1] A11[2][2] A11[3][3] ymm7 = _mm256_div_pd(ymm7, ymm0); //(1/A11[0][0] 1/A11[1][1] 1/A11[2][2] 1/A11[3][3]) ymm2 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][3] ymm5 = _mm256_broadcast_sd((double const *)(a11+1)); //A11[1][3] ymm6 = _mm256_broadcast_sd((double const *)(a11+2)); //A11[2][3] //extract a00 ymm0 = _mm256_permute_pd(ymm7, 0x00); //(1/A11[0][0] 1/A11[0][0] 1/A11[2][2] 1/A11[2][2]) ymm0 = _mm256_permute2f128_pd(ymm0, ymm0, 0x00);//(1/A11[0][0] 1/A11[0][0] 1/A11[0][0] 1/A11[0][0]) ymm8 = _mm256_mul_pd(ymm8, ymm0); //B11[0-3][0] /= A11[0][0] ymm12 = _mm256_mul_pd(ymm12, ymm0); //B11[4-7][0] /= A11[0][0] //extract a11 ymm0 = _mm256_permute_pd(ymm7, 0x03); //(1/A11[1][1] 1/A11[1][1] 1/A11[2][2] 1/A11[2][2]) ymm0 = _mm256_permute2f128_pd(ymm0, ymm0, 0x00);//(1/A11[1][1] 1/A11[1][1] 1/A11[1][1] 1/A11[1][1]) //(Row1): FMA operations ymm9 = _mm256_fnmadd_pd(ymm1, ymm8, ymm9); //B11[0-3][1] -= B11[0-3][0] * A11[0][1] ymm10 = _mm256_fnmadd_pd(ymm3, ymm8, ymm10); //B11[0-3][2] -= B11[0-3][0] * A11[0][2] ymm11 = _mm256_fnmadd_pd(ymm2, ymm8, ymm11); //B11[0-3][3] -= B11[0-3][0] * A11[0][3] ymm13 = _mm256_fnmadd_pd(ymm1, ymm12, ymm13); //B11[4-7][1] -= B11[4-7][0] * A11[0][1] ymm14 = _mm256_fnmadd_pd(ymm3, ymm12, ymm14); //B11[4-7][2] -= B11[4-7][0] * A11[0][2] ymm15 = _mm256_fnmadd_pd(ymm2, ymm12, ymm15); //B11[4-7][3] -= B11[4-7][0] * A11[0][3] ymm9 = _mm256_mul_pd(ymm9, ymm0); //B11[0-3][1] /= A11[1][1] ymm13 = _mm256_mul_pd(ymm13, ymm0); //B11[4-7][1] /= A11[1][1] //extract a22 ymm0 = _mm256_permute_pd(ymm7, 0x00); //(1/A11[0][0] 1/A11[0][0] 1/A11[2][2] 1/A11[2][2]) ymm0 = _mm256_permute2f128_pd(ymm0, ymm0, 0x11);//(1/A11[2][2] 1/A11[2][2] 1/A11[2][2] 1/A11[2][2]) //(Row2)FMA operations ymm10 = _mm256_fnmadd_pd(ymm4, ymm9, ymm10); //B11[0-3][2] -= B11[0-3][1] * A11[1][2] ymm11 = _mm256_fnmadd_pd(ymm5, ymm9, ymm11); //B11[0-3][3] -= B11[0-3][1] * A11[1][3] ymm14 = _mm256_fnmadd_pd(ymm4, ymm13, ymm14); //B11[4-7][2] -= B11[4-7][1] * A11[1][2] ymm15 = _mm256_fnmadd_pd(ymm5, ymm13, ymm15); //B11[4-7][3] -= B11[4-7][1] * A11[1][3] ymm10 = _mm256_mul_pd(ymm10, ymm0); //B11[0-3][2] /= A11[2][2] ymm14 = _mm256_mul_pd(ymm14, ymm0); //B11[4-7][2] /= A11[2][2] //extract a33 ymm0 = _mm256_permute_pd(ymm7, 0x0C); //(1/A11[0][0] 1/A11[0][0] 1/A11[3][3] 1/A11[3][3] 1/A11[3][3]) ymm0 = _mm256_permute2f128_pd(ymm0, ymm0, 0x11); //(1/A11[3][3] 1/A11[3][3] 1/A11[3][3] 1/A11[3][3]) //(Row3)FMA operations ymm11 = _mm256_fnmadd_pd(ymm6, ymm10, ymm11); //B11[0-3][3] -= B11[0-3][2] * A11[2][3] ymm15 = _mm256_fnmadd_pd(ymm6, ymm14, ymm15); //B11[4-7][3] -= B11[4-7][2] * A11[2][3] ymm11 = _mm256_mul_pd(ymm11, ymm0); //B11[0-3][3] /= A11[3][3] ymm15 = _mm256_mul_pd(ymm15, ymm0); //B11[4-7][3] /= A11[3][3] if(n_remainder == 3) { _mm256_storeu_pd((double *)b11, ymm8); //store(B11[0-3][0]) _mm256_storeu_pd((double *)(b11 + D_NR), ymm12); //store(B11[4-7][0]) _mm256_storeu_pd((double *)(b11 + cs_b), ymm9); //store(B11[0-3][1]) _mm256_storeu_pd((double *)(b11 + cs_b + D_NR), ymm13); //store(B11[4-7][1]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0]), ymm10); //store(B11[0-3][2]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0] + D_NR), ymm14);//store(B11[4-7][2]) } if(n_remainder == 2) { _mm256_storeu_pd((double *)b11, ymm8); //store(B11[0-3][0]) _mm256_storeu_pd((double *)(b11 + D_NR), ymm12); //store(B11[4-7][0]) _mm256_storeu_pd((double *)(b11 + cs_b), ymm9); //store(B11[0-3][1]) _mm256_storeu_pd((double *)(b11 + cs_b + D_NR), ymm13); //store(B11[4-7][1]) } if(n_remainder == 1) { _mm256_storeu_pd((double *)b11, ymm8); //store(B11[0-3][0]) _mm256_storeu_pd((double *)(b11 + D_NR), ymm12); //store(B11[4-7][0]) } } } if((m & 4)) ///implementation for remainder rows(when m_remainder is a multiple of 4) { for(j = 0; (j+D_NR-1) a01 ----> ***************** *********** *b01*b11* * * * * * * b11 * * * * * **a01 * * a11 | ***************** ********* | | * * * * * *a11* * | | * * * * * * * * | v ***************** ****** v * * * * * * * * * * * * * * ***************** * * * */ static err_t bli_dtrsm_small_XAuB_unitDiag( side_t side, obj_t* AlphaObj, obj_t* a, obj_t* b, cntx_t* cntx, cntl_t* cntl ) { dim_t D_MR = 8; //block dimension along the rows dim_t D_NR = 4; //block dimension along the columns dim_t m = bli_obj_length(b); //number of rows dim_t n = bli_obj_width(b); //number of columns dim_t m_remainder = m % D_MR; //number of corner rows dim_t n_remainder = n % D_NR; //number of corner columns dim_t cs_a = bli_obj_col_stride(a); //column stride of matrix A dim_t cs_b = bli_obj_col_stride(b); //column stride of matrix B #ifdef BLIS_ENABLE_SMALL_MATRIX_ROME if(bli_max(m,n)>D_BLIS_SMALL_MATRIX_THRES_TRSM_XAUB_ROME && (m/n) < D_BLIS_SMALL_MATRIX_THRES_TRSM_DIM_RATIO) { return BLIS_NOT_YET_IMPLEMENTED; } #else if(bli_max(m,n)>D_BLIS_SMALL_MATRIX_THRES_TRSM_NAPLES && (m/n) < D_BLIS_SMALL_MATRIX_THRES_TRSM_DIM_RATIO) { return BLIS_NOT_YET_IMPLEMENTED; } #endif dim_t i, j, k; //loop variablse dim_t k_iter; //determines the number of GEMM operations to be done dim_t cs_b_offset[2]; //pre-calculated strides double ones = 1.0; double AlphaVal = *(double *)AlphaObj->buffer; //value of Alpha double *L = a->buffer; //pointer to matrix A double *B = b->buffer; //pointer to matrix B double *a01, *a11, *b10, *b11; //pointers for GEMM and TRSM blocks double *ptr_a01_dup; cs_b_offset[0] = cs_b << 1; //cs_b_offset[0] = cs_b * 2; cs_b_offset[1] = cs_b_offset[0] + cs_b;//cs_b_offset[1] = cs_b * 3; //ymm scratch reginsters __m256d ymm0, ymm1, ymm2, ymm3; __m256d ymm4, ymm5, ymm6, ymm7; __m256d ymm8, ymm9, ymm10, ymm11; __m256d ymm12, ymm13, ymm14, ymm15; __m256d ymm16; for(i = 0; (i+D_MR-1) < m; i += D_MR) //loop along 'M' direction { for(j = 0; (j+D_NR-1) < n; j += D_NR) //loop along 'N' direction { a01 = L + j*cs_a; //pointer to block of A to be used in GEMM a11 = L + j*cs_a + j; //pointer to block of A to be used for TRSM b10 = B + i; //pointer to block of B to be used in GEMM b11 = B + i + j*cs_b; //pointer to block of B to be used for TRSM k_iter = j / D_NR; //number of GEMM operations to be done(in blocks of 4x4) ymm0 = _mm256_setzero_pd(); ymm1 = _mm256_setzero_pd(); ymm2 = _mm256_setzero_pd(); ymm3 = _mm256_setzero_pd(); ymm4 = _mm256_setzero_pd(); ymm5 = _mm256_setzero_pd(); ymm6 = _mm256_setzero_pd(); ymm7 = _mm256_setzero_pd(); ///GEMM implementation starts/// for(k = 0; k < k_iter; k++) //loop for number of GEMM operations { ptr_a01_dup = a01; //broadcast 1st row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 0)); //A01[0][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 1)); //A01[0][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 2)); //A01[0][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 3)); //A01[0][3] a01 += 1; //move to next row //load 8x2 block of B10 ymm12 = _mm256_loadu_pd((double const *)b10); //B10[0][0] B10[1][0] B10[2][0] B10[3][0] ymm13 = _mm256_loadu_pd((double const *)(b10 + D_NR)); //B10[4][0] B10[5][0] B10[6][0] B10[7][0] ymm14 = _mm256_loadu_pd((double const *)(b10 + cs_b)); //B10[0][1] B10[1][1] B10[2][1] B10[3][1] ymm15 = _mm256_loadu_pd((double const *)(b10 + cs_b + D_NR)); //B10[4][1] B10[5][1] B10[6][1] B10[7][1] ymm0 = _mm256_fmadd_pd(ymm8, ymm12, ymm0); //ymm0 += (B10[0][0]*A01[0][0] B10[1][0]*A01[0][0] B10[2][0]*A01[0][0] B10[3][0]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm12, ymm1); //ymm1 += (B10[0][0]*A01[0][1] B10[1][0]*A01[0][1] B10[2][0]*A01[0][1] B10[3][0]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm12, ymm2); //ymm2 += (B10[0][0]*A01[0][2] B10[1][0]*A01[0][2] B10[2][0]*A01[0][2] B10[3][0]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm12, ymm3); //ymm3 += (B10[0][0]*A01[0][3] B10[1][0]*A01[0][3] B10[2][0]*A01[0][3] B10[3][0]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm13, ymm4); //ymm4 += (B10[4][0]*A01[0][0] B10[5][0]*A01[0][0] B10[6][0]*A01[0][0] B10[7][0]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm13, ymm5); //ymm5 += (B10[4][0]*A01[0][1] B10[5][0]*A01[0][1] B10[6][0]*A01[0][1] B10[7][0]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm13, ymm6); //ymm6 += (B10[4][0]*A01[0][2] B10[5][0]*A01[0][2] B10[6][0]*A01[0][2] B10[7][0]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm13, ymm7); //ymm7 += (B10[4][0]*A01[0][3] B10[5][0]*A01[0][3] B10[6][0]*A01[0][3] B10[7][0]*A01[0][3]) //broadcast 2nd row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 0)); //A01[1][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 1)); //A01[1][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 2)); //A01[1][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 3)); //A01[1][3] a01 += 1; //move to next row of A ymm0 = _mm256_fmadd_pd(ymm8, ymm14, ymm0); //ymm0 += (B10[0][1]*A01[0][0] B10[1][1]*A01[0][0] B10[2][1]*A01[0][0] B10[3][1]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm14, ymm1); //ymm1 += (B10[0][1]*A01[0][1] B10[1][1]*A01[0][1] B10[2][1]*A01[0][1] B10[3][1]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm14, ymm2); //ymm2 += (B10[0][1]*A01[0][2] B10[1][1]*A01[0][2] B10[2][1]*A01[0][2] B10[3][1]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm14, ymm3); //ymm3 += (B10[0][1]*A01[0][3] B10[1][1]*A01[0][3] B10[2][1]*A01[0][3] B10[3][1]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm15, ymm4); //ymm4 += (B10[4][1]*A01[0][0] B10[5][1]*A01[0][0] B10[6][1]*A01[0][0] B10[7][1]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm15, ymm5); //ymm5 += (B10[4][1]*A01[0][1] B10[5][1]*A01[0][1] B10[6][1]*A01[0][1] B10[7][1]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm15, ymm6); //ymm6 += (B10[4][1]*A01[0][2] B10[5][1]*A01[0][2] B10[6][1]*A01[0][2] B10[7][1]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm15, ymm7); //ymm7 += (B10[4][1]*A01[0][3] B10[5][1]*A01[0][3] B10[6][1]*A01[0][3] B10[7][1]*A01[0][3]) //broadcast 3rd row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 0)); //A01[2][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 1)); //A01[2][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 2)); //A01[2][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 3)); //A01[2][3] a01 += 1; //move to next row of A01 //load next 8x2 block of B10 ymm12 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0])); //(B10[0][2] B10[1][2] B10[2][2] B10[3][2]) ymm13 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0] + D_NR)); //(B10[4][2] B10[5][2] B10[6][2] B10[7][2]) ymm14 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0] + cs_b)); //(B10[0][3] B10[1][3] B10[2][3] B10[3][3]) ymm15 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0] + cs_b + D_NR)); //(B10[4][3] B10[5][3] B10[6][3] B10[7][3]) ymm0 = _mm256_fmadd_pd(ymm8, ymm12, ymm0); //ymm0 += (B10[0][2]*A01[0][0] B10[1][2]*A01[0][0] B10[2][2]*A01[0][0] B10[3][2]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm12, ymm1); //ymm1 += (B10[0][2]*A01[0][1] B10[1][2]*A01[0][1] B10[2][2]*A01[0][1] B10[3][2]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm12, ymm2); //ymm2 += (B10[0][2]*A01[0][2] B10[1][2]*A01[0][2] B10[2][2]*A01[0][2] B10[3][2]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm12, ymm3); //ymm3 += (B10[0][2]*A01[0][3] B10[1][2]*A01[0][3] B10[2][2]*A01[0][3] B10[3][2]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm13, ymm4); //ymm4 += (B10[4][2]*A01[0][0] B10[5][2]*A01[0][0] B10[6][2]*A01[0][0] B10[7][2]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm13, ymm5); //ymm5 += (B10[4][2]*A01[0][1] B10[5][2]*A01[0][1] B10[6][2]*A01[0][1] B10[7][2]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm13, ymm6); //ymm6 += (B10[4][2]*A01[0][2] B10[5][2]*A01[0][2] B10[6][2]*A01[0][2] B10[7][2]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm13, ymm7); //ymm7 += (B10[4][2]*A01[0][3] B10[5][2]*A01[0][3] B10[6][2]*A01[0][3] B10[7][2]*A01[0][3]) //broadcast 4th row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 0)); //A01[3][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 1)); //A01[3][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 2)); //A01[3][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 3)); //A01[3][3] a01 += 1; //move to next row of A01 ymm0 = _mm256_fmadd_pd(ymm8, ymm14, ymm0); //ymm0 += (B10[0][3]*A01[0][0] B10[1][3]*A01[0][0] B10[2][3]*A01[0][0] B10[3][3]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm14, ymm1); //ymm1 += (B10[0][3]*A01[0][1] B10[1][3]*A01[0][1] B10[2][3]*A01[0][1] B10[3][3]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm14, ymm2); //ymm2 += (B10[0][3]*A01[0][2] B10[1][3]*A01[0][2] B10[2][3]*A01[0][2] B10[3][3]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm14, ymm3); //ymm3 += (B10[0][3]*A01[0][3] B10[1][3]*A01[0][3] B10[2][3]*A01[0][3] B10[3][3]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm15, ymm4); //ymm4 += (B10[4][3]*A01[0][0] B10[5][3]*A01[0][0] B10[6][3]*A01[0][0] B10[7][3]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm15, ymm5); //ymm5 += (B10[4][3]*A01[0][1] B10[5][3]*A01[0][1] B10[6][3]*A01[0][1] B10[7][3]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm15, ymm6); //ymm6 += (B10[4][3]*A01[0][2] B10[5][3]*A01[0][2] B10[6][3]*A01[0][2] B10[7][3]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm15, ymm7); //ymm7 += (B10[4][3]*A01[0][3] B10[5][3]*A01[0][3] B10[6][3]*A01[0][3] B10[7][3]*A01[0][3]) b10 += D_NR * cs_b; //pointer math to find next block of B for GEMM a01 = ptr_a01_dup + D_NR; //pointer math to find next block of A for GEMM } ///GEMM code ends/// ymm16 = _mm256_broadcast_sd((double const *)&AlphaVal); //load 8x4 block of B11 ymm8 = _mm256_loadu_pd((double const *)b11); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] ymm12 = _mm256_loadu_pd((double const *)(b11 + D_NR)); //B11[4][0] B11[5][0] B11[6][0] B11[7][0] ymm9 = _mm256_loadu_pd((double const *)(b11 + cs_b)); //B11[0][1] B11[1][1] B11[2][1] B11[3][1] ymm13 = _mm256_loadu_pd((double const *)(b11 + cs_b + D_NR)); //B11[4][1] B11[5][1] B11[6][1] B11[7][1] ymm10 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[0])); //B11[0][2] B11[1][2] B11[2][2] B11[3][2] ymm14 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[0] + D_NR)); //B11[4][2] B11[5][2] B11[6][2] B11[7][2] ymm11 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[1])); //B11[0][3] B11[1][3] B11[2][3] B11[3][3] ymm15 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[1] + D_NR)); //B11[4][3] B11[5][3] B11[6][3] B11[7][3] ymm8 = _mm256_fmsub_pd(ymm8, ymm16, ymm0); //B11[0-3][0] * alpha -= ymm0 ymm9 = _mm256_fmsub_pd(ymm9, ymm16, ymm1); //B11[4-7][0] * alpha-= ymm1 ymm10 = _mm256_fmsub_pd(ymm10, ymm16, ymm2); //B11[0-3][1] * alpha-= ymm2 ymm11 = _mm256_fmsub_pd(ymm11, ymm16, ymm3); //B11[4-7][1] * alpha -= ymm3 ymm12 = _mm256_fmsub_pd(ymm12, ymm16, ymm4); //B11[0-3][2] * alpha -= ymm4 ymm13 = _mm256_fmsub_pd(ymm13, ymm16, ymm5); //B11[4-7][2] * alpha -= ymm5 ymm14 = _mm256_fmsub_pd(ymm14, ymm16, ymm6); //B11[0-3][3] * alpha -= ymm6 ymm15 = _mm256_fmsub_pd(ymm15, ymm16, ymm7); //B11[4-7][3] * alpha -= ymm7 ///implement TRSM/// ///read 4x4 block of A11/// ymm7 = _mm256_broadcast_sd((double const *)(&ones)); //1st col ymm0 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][0] //2nd col a11 += cs_a; ymm1 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][1] ymm2 = _mm256_broadcast_sd((double const *)(a11+1)); //A11[1][1] //3rd col a11 += cs_a; ymm3 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][2] ymm4 = _mm256_broadcast_sd((double const *)(a11+1)); //A11[1][2] ymm5 = _mm256_broadcast_sd((double const *)(a11+2)); //A11[2][2] //4th col a11 += cs_a; ymm6 = _mm256_broadcast_sd((double const *)(a11+3)); //A11[3][3] ymm2 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][3] ymm5 = _mm256_broadcast_sd((double const *)(a11+1)); //A11[1][3] ymm6 = _mm256_broadcast_sd((double const *)(a11+2)); //A11[2][3] //(Row1): FMA operations ymm9 = _mm256_fnmadd_pd(ymm1, ymm8, ymm9); //B11[0-3][1] -= B11[0-3][0] * A11[0][1] ymm10 = _mm256_fnmadd_pd(ymm3, ymm8, ymm10); //B11[0-3][2] -= B11[0-3][0] * A11[0][2] ymm11 = _mm256_fnmadd_pd(ymm2, ymm8, ymm11); //B11[0-3][3] -= B11[0-3][0] * A11[0][3] ymm13 = _mm256_fnmadd_pd(ymm1, ymm12, ymm13); //B11[4-7][1] -= B11[4-7][0] * A11[0][1] ymm14 = _mm256_fnmadd_pd(ymm3, ymm12, ymm14); //B11[4-7][2] -= B11[4-7][0] * A11[0][2] ymm15 = _mm256_fnmadd_pd(ymm2, ymm12, ymm15); //B11[4-7][3] -= B11[4-7][0] * A11[0][3] //(Row2)FMA operations ymm10 = _mm256_fnmadd_pd(ymm4, ymm9, ymm10); //B11[0-3][2] -= B11[0-3][1] * A11[1][2] ymm11 = _mm256_fnmadd_pd(ymm5, ymm9, ymm11); //B11[0-3][3] -= B11[0-3][1] * A11[1][3] ymm14 = _mm256_fnmadd_pd(ymm4, ymm13, ymm14); //B11[4-7][2] -= B11[4-7][1] * A11[1][2] ymm15 = _mm256_fnmadd_pd(ymm5, ymm13, ymm15); //B11[4-7][3] -= B11[4-7][1] * A11[1][3] //(Row3)FMA operations ymm11 = _mm256_fnmadd_pd(ymm6, ymm10, ymm11); //B11[0-3][3] -= B11[0-3][2] * A11[2][3] ymm15 = _mm256_fnmadd_pd(ymm6, ymm14, ymm15); //B11[4-7][3] -= B11[4-7][2] * A11[2][3] _mm256_storeu_pd((double *)b11, ymm8); //store(B11[0-3][0]) _mm256_storeu_pd((double *)(b11 + D_NR), ymm12); //store(B11[4-7][0]) _mm256_storeu_pd((double *)(b11 + cs_b), ymm9); //store(B11[0-3][1]) _mm256_storeu_pd((double *)(b11 + cs_b + D_NR), ymm13); //store(B11[4-7][1]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0]), ymm10); //store(B11[0-3][2]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0] + D_NR), ymm14); //store(B11[4-7][2]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0] + cs_b), ymm11); //store(B11[0-3][3]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0] + cs_b + D_NR), ymm15);//store(B11[4-7][3]) } if(n_remainder) //implementation for remainder columns(when n is not multiple of D_NR) { a01 = L + j*cs_a; //pointer to block of A to be used for GEMM a11 = L + j*cs_a + j; //pointer to block of A to be used for TRSM b10 = B + i; //pointer to block of B to be used for GEMM b11 = B + i + j*cs_b; //pointer to block of B to be used for TRSM k_iter = j / D_NR; //number of GEMM operations to be performed(in blocks of 4x4) ///load 4x4 block of b11 ymm0 = _mm256_setzero_pd(); ymm1 = _mm256_setzero_pd(); ymm2 = _mm256_setzero_pd(); ymm3 = _mm256_setzero_pd(); ymm4 = _mm256_setzero_pd(); ymm5 = _mm256_setzero_pd(); ymm6 = _mm256_setzero_pd(); ymm7 = _mm256_setzero_pd(); ///GEMM implementation begins/// for(k = 0; k < k_iter; k++) ///loop for number of GEMM operations { ptr_a01_dup = a01; //broadcast 1st row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 0)); //A01[0][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 1)); //A01[0][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 2)); //A01[0][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 3)); //A01[0][3] a01 += 1; //move to next row of A //load 8x2 block of B10 ymm12 = _mm256_loadu_pd((double const *)b10); //B10[0][0] B10[1][0] B10[2][0] B10[3][0] ymm13 = _mm256_loadu_pd((double const *)(b10 + D_NR)); //B10[4][0] B10[5][0] B10[6][0] B10[7][0] ymm14 = _mm256_loadu_pd((double const *)(b10 + cs_b)); //B10[0][1] B10[1][1] B10[2][1] B10[3][1] ymm15 = _mm256_loadu_pd((double const *)(b10 + cs_b + D_NR));//B10[4][1] B10[5][1] B10[6][1] B10[7][1] ymm0 = _mm256_fmadd_pd(ymm8, ymm12, ymm0); //ymm0 += (B10[0][0]*A01[0][0] B10[1][0]*A01[0][0] B10[2][0]*A01[0][0] B10[3][0]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm12, ymm1); //ymm1 += (B10[0][0]*A01[0][1] B10[1][0]*A01[0][1] B10[2][0]*A01[0][1] B10[3][0]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm12, ymm2); //ymm2 += (B10[0][0]*A01[0][2] B10[1][0]*A01[0][2] B10[2][0]*A01[0][2] B10[3][0]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm12, ymm3); //ymm3 += (B10[0][0]*A01[0][3] B10[1][0]*A01[0][3] B10[2][0]*A01[0][3] B10[3][0]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm13, ymm4); //ymm4 += (B10[4][0]*A01[0][0] B10[5][0]*A01[0][0] B10[6][0]*A01[0][0] B10[7][0]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm13, ymm5); //ymm5 += (B10[4][0]*A01[0][1] B10[5][0]*A01[0][1] B10[6][0]*A01[0][1] B10[7][0]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm13, ymm6); //ymm6 += (B10[4][0]*A01[0][2] B10[5][0]*A01[0][2] B10[6][0]*A01[0][2] B10[7][0]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm13, ymm7); //ymm7 += (B10[4][0]*A01[0][3] B10[5][0]*A01[0][3] B10[6][0]*A01[0][3] B10[7][0]*A01[0][3]) //broadcast 2nd row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 0)); //A01[1][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 1)); //A01[1][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 2)); //A01[1][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 3)); //A01[1][3] a01 += 1; //move to next row of A ymm0 = _mm256_fmadd_pd(ymm8, ymm14, ymm0); //ymm0 += (B10[0][1]*A01[0][0] B10[1][1]*A01[0][0] B10[2][1]*A01[0][0] B10[3][1]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm14, ymm1); //ymm1 += (B10[0][1]*A01[0][1] B10[1][1]*A01[0][1] B10[2][1]*A01[0][1] B10[3][1]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm14, ymm2); //ymm2 += (B10[0][1]*A01[0][2] B10[1][1]*A01[0][2] B10[2][1]*A01[0][2] B10[3][1]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm14, ymm3); //ymm3 += (B10[0][1]*A01[0][3] B10[1][1]*A01[0][3] B10[2][1]*A01[0][3] B10[3][1]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm15, ymm4); //ymm4 += (B10[4][1]*A01[0][0] B10[5][1]*A01[0][0] B10[6][1]*A01[0][0] B10[7][1]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm15, ymm5); //ymm5 += (B10[4][1]*A01[0][1] B10[5][1]*A01[0][1] B10[6][1]*A01[0][1] B10[7][1]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm15, ymm6); //ymm6 += (B10[4][1]*A01[0][2] B10[5][1]*A01[0][2] B10[6][1]*A01[0][2] B10[7][1]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm15, ymm7); //ymm7 += (B10[4][1]*A01[0][3] B10[5][1]*A01[0][3] B10[6][1]*A01[0][3] B10[7][1]*A01[0][3]) //broadcast 3rd row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 0)); //A01[2][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 1)); //A01[2][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 2)); //A01[2][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 3)); //A01[2][3] a01 += 1; //move to next row of A //load next 8x2 block of B10 ymm12 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0])); //(B10[0][2] B10[1][2] B10[2][2] B10[3][2]) ymm13 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0] + D_NR)); //(B10[4][2] B10[5][2] B10[6][2] B10[7][2]) ymm14 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0] + cs_b)); //(B10[0][3] B10[1][3] B10[2][3] B10[3][3]) ymm15 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0] + cs_b + D_NR)); //(B10[4][3] B10[5][3] B10[6][3] B10[7][3]) ymm0 = _mm256_fmadd_pd(ymm8, ymm12, ymm0); //ymm0 += (B10[0][2]*A01[0][0] B10[1][2]*A01[0][0] B10[2][2]*A01[0][0] B10[3][2]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm12, ymm1); //ymm1 += (B10[0][2]*A01[0][1] B10[1][2]*A01[0][1] B10[2][2]*A01[0][1] B10[3][2]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm12, ymm2); //ymm2 += (B10[0][2]*A01[0][2] B10[1][2]*A01[0][2] B10[2][2]*A01[0][2] B10[3][2]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm12, ymm3); //ymm3 += (B10[0][2]*A01[0][3] B10[1][2]*A01[0][3] B10[2][2]*A01[0][3] B10[3][2]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm13, ymm4); //ymm4 += (B10[4][2]*A01[0][0] B10[5][2]*A01[0][0] B10[6][2]*A01[0][0] B10[7][2]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm13, ymm5); //ymm5 += (B10[4][2]*A01[0][1] B10[5][2]*A01[0][1] B10[6][2]*A01[0][1] B10[7][2]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm13, ymm6); //ymm6 += (B10[4][2]*A01[0][2] B10[5][2]*A01[0][2] B10[6][2]*A01[0][2] B10[7][2]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm13, ymm7); //ymm7 += (B10[4][2]*A01[0][3] B10[5][2]*A01[0][3] B10[6][2]*A01[0][3] B10[7][2]*A01[0][3]) //broadcast 4th row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 0)); //A01[3][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 1)); //A01[3][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 2)); //A01[3][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 3)); //A01[3][3] a01 += 1; //move to next row of A ymm0 = _mm256_fmadd_pd(ymm8, ymm14, ymm0); //ymm0 += (B10[0][3]*A01[0][0] B10[1][3]*A01[0][0] B10[2][3]*A01[0][0] B10[3][3]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm14, ymm1); //ymm1 += (B10[0][3]*A01[0][1] B10[1][3]*A01[0][1] B10[2][3]*A01[0][1] B10[3][3]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm14, ymm2); //ymm2 += (B10[0][3]*A01[0][2] B10[1][3]*A01[0][2] B10[2][3]*A01[0][2] B10[3][3]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm14, ymm3); //ymm3 += (B10[0][3]*A01[0][3] B10[1][3]*A01[0][3] B10[2][3]*A01[0][3] B10[3][3]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm15, ymm4); //ymm4 += (B10[4][3]*A01[0][0] B10[5][3]*A01[0][0] B10[6][3]*A01[0][0] B10[7][3]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm15, ymm5); //ymm5 += (B10[4][3]*A01[0][1] B10[5][3]*A01[0][1] B10[6][3]*A01[0][1] B10[7][3]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm15, ymm6); //ymm6 += (B10[4][3]*A01[0][2] B10[5][3]*A01[0][2] B10[6][3]*A01[0][2] B10[7][3]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm15, ymm7); //ymm7 += (B10[4][3]*A01[0][3] B10[5][3]*A01[0][3] B10[6][3]*A01[0][3] B10[7][3]*A01[0][3]) b10 += D_NR * cs_b; //pointer math to find next block of B for GEMM a01 = ptr_a01_dup + D_NR; //pointer math to find next block of A for GEMM } ///GEMM code ends/// ymm16 = _mm256_broadcast_sd((double const *)&AlphaVal); //subtract the calculated GEMM block from current TRSM block //load 8x4 block of B11 if(n_remainder == 3) { ymm8 = _mm256_loadu_pd((double const *)b11); //B11[0-3][0] ymm12 = _mm256_loadu_pd((double const *)(b11 + D_NR)); //B11[4-7][0] ymm9 = _mm256_loadu_pd((double const *)(b11 + cs_b)); //B11[0-3][1] ymm13 = _mm256_loadu_pd((double const *)(b11 + cs_b + D_NR)); //B11[4-7][1] ymm10 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[0])); //B11[0-3][2] ymm14 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[0] + D_NR)); //B11[4-7][2] ymm11 = _mm256_broadcast_sd((double const *)&ones); //B11[0-3][3] ymm15 = _mm256_broadcast_sd((double const *)&ones); //B11[4-7][3] } if(n_remainder == 2) { ymm8 = _mm256_loadu_pd((double const *)b11); //B11[0-3][0] ymm12 = _mm256_loadu_pd((double const *)(b11 + D_NR)); //B11[4-7][0] ymm9 = _mm256_loadu_pd((double const *)(b11 + cs_b)); //B11[0-3][1] ymm13 = _mm256_loadu_pd((double const *)(b11 + cs_b + D_NR)); //B11[4-7][1] ymm10 = _mm256_broadcast_sd((double const *)&ones); //B11[0-3][2] ymm14 = _mm256_broadcast_sd((double const *)&ones); //B11[4-7][2] ymm11 = _mm256_broadcast_sd((double const *)&ones); //B11[0-3][3] ymm15 = _mm256_broadcast_sd((double const *)&ones); //B11[4-7][3] } if(n_remainder == 1) { ymm8 = _mm256_loadu_pd((double const *)b11); //B11[0-3][0] ymm12 = _mm256_loadu_pd((double const *)(b11 + D_NR)); //B11[4-7][0] ymm9 = _mm256_broadcast_sd((double const *)&ones); //B11[0-3][1] ymm13 = _mm256_broadcast_sd((double const *)&ones); //B11[4-7][1] ymm10 = _mm256_broadcast_sd((double const *)&ones); //B11[0-3][2] ymm14 = _mm256_broadcast_sd((double const *)&ones); //B11[4-7][2] ymm11 = _mm256_broadcast_sd((double const *)&ones); //B11[0-3][3] ymm15 = _mm256_broadcast_sd((double const *)&ones); //B11[4-7][3] } ymm8 = _mm256_fmsub_pd(ymm8, ymm16, ymm0); //B11[0-3][0] * alpha -= B10[0-3][0] ymm9 = _mm256_fmsub_pd(ymm9, ymm16, ymm1); //B11[4-7][0] * alpha -= B10[4-7][0] ymm10 = _mm256_fmsub_pd(ymm10, ymm16, ymm2); //B11[0-3][1] * alpha -= B10[0-3][1] ymm11 = _mm256_fmsub_pd(ymm11, ymm16, ymm3); //B11[4-7][1] * alpha -= B10[4-7][1] ymm12 = _mm256_fmsub_pd(ymm12, ymm16, ymm4); //B11[0-3][2] * alpha -= B10[0-3][2] ymm13 = _mm256_fmsub_pd(ymm13, ymm16, ymm5); //B11[4-7][2] * alpha -= B10[4-7][2] ymm14 = _mm256_fmsub_pd(ymm14, ymm16, ymm6); //B11[0-3][3] * alpha -= B10[0-3][3] ymm15 = _mm256_fmsub_pd(ymm15, ymm16, ymm7); //B11[4-7][3] * alpha -= B10[4-7][3] ///implement TRSM/// ///read 4x4 block of A11/// ymm7 = _mm256_broadcast_sd((double const *)(&ones)); //1st col ymm0 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][0] //2nd col a11 += cs_a; ymm1 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][1] ymm2 = _mm256_broadcast_sd((double const *)(a11+1)); //A11[1][1] //3rd col a11 += cs_a; ymm3 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][2] ymm4 = _mm256_broadcast_sd((double const *)(a11+1)); //A11[1][2] ymm5 = _mm256_broadcast_sd((double const *)(a11+2)); //A11[2][2] //4th col a11 += cs_a; ymm6 = _mm256_broadcast_sd((double const *)(a11+3)); //A11[3][3] ymm2 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][3] ymm5 = _mm256_broadcast_sd((double const *)(a11+1)); //A11[1][3] ymm6 = _mm256_broadcast_sd((double const *)(a11+2)); //A11[2][3] //(Row1): FMA operations ymm9 = _mm256_fnmadd_pd(ymm1, ymm8, ymm9); //B11[0-3][1] -= B11[0-3][0] * A11[0][1] ymm10 = _mm256_fnmadd_pd(ymm3, ymm8, ymm10); //B11[0-3][2] -= B11[0-3][0] * A11[0][2] ymm11 = _mm256_fnmadd_pd(ymm2, ymm8, ymm11); //B11[0-3][3] -= B11[0-3][0] * A11[0][3] ymm13 = _mm256_fnmadd_pd(ymm1, ymm12, ymm13); //B11[4-7][1] -= B11[4-7][0] * A11[0][1] ymm14 = _mm256_fnmadd_pd(ymm3, ymm12, ymm14); //B11[4-7][2] -= B11[4-7][0] * A11[0][2] ymm15 = _mm256_fnmadd_pd(ymm2, ymm12, ymm15); //B11[4-7][3] -= B11[4-7][0] * A11[0][3] //(Row2)FMA operations ymm10 = _mm256_fnmadd_pd(ymm4, ymm9, ymm10); //B11[0-3][2] -= B11[0-3][1] * A11[1][2] ymm11 = _mm256_fnmadd_pd(ymm5, ymm9, ymm11); //B11[0-3][3] -= B11[0-3][1] * A11[1][3] ymm14 = _mm256_fnmadd_pd(ymm4, ymm13, ymm14); //B11[4-7][2] -= B11[4-7][1] * A11[1][2] ymm15 = _mm256_fnmadd_pd(ymm5, ymm13, ymm15); //B11[4-7][3] -= B11[4-7][1] * A11[1][3] //(Row3)FMA operations ymm11 = _mm256_fnmadd_pd(ymm6, ymm10, ymm11); //B11[0-3][3] -= B11[0-3][2] * A11[2][3] ymm15 = _mm256_fnmadd_pd(ymm6, ymm14, ymm15); //B11[4-7][3] -= B11[4-7][2] * A11[2][3] ymm11 = _mm256_mul_pd(ymm11, ymm0); //B11[0-3][3] /= A11[3][3] ymm15 = _mm256_mul_pd(ymm15, ymm0); //B11[4-7][3] /= A11[3][3] if(n_remainder == 3) { _mm256_storeu_pd((double *)b11, ymm8); //store(B11[0-3][0]) _mm256_storeu_pd((double *)(b11 + D_NR), ymm12); //store(B11[4-7][0]) _mm256_storeu_pd((double *)(b11 + cs_b), ymm9); //store(B11[0-3][1]) _mm256_storeu_pd((double *)(b11 + cs_b + D_NR), ymm13); //store(B11[4-7][1]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0]), ymm10); //store(B11[0-3][2]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0] + D_NR), ymm14);//store(B11[4-7][2]) } if(n_remainder == 2) { _mm256_storeu_pd((double *)b11, ymm8); //store(B11[0-3][0]) _mm256_storeu_pd((double *)(b11 + D_NR), ymm12); //store(B11[4-7][0]) _mm256_storeu_pd((double *)(b11 + cs_b), ymm9); //store(B11[0-3][1]) _mm256_storeu_pd((double *)(b11 + cs_b + D_NR), ymm13); //store(B11[4-7][1]) } if(n_remainder == 1) { _mm256_storeu_pd((double *)b11, ymm8); //store(B11[0-3][0]) _mm256_storeu_pd((double *)(b11 + D_NR), ymm12); //store(B11[4-7][0]) } } } if((m & 4)) ///implementation for remainder rows(when m_remainder is a multiple of 4) { for(j = 0; (j+D_NR-1) a01 ----> ***************** *********** *b01*b11* * * * * * * b11 * * * * * **a01 * * a11 | ***************** ********* | | * * * * * *a11* * | | * * * * * * * * | v ***************** ****** v * * * * * * * * * * * * * * ***************** * * * */ static err_t bli_dtrsm_small_XAltB( side_t side, obj_t* AlphaObj, obj_t* a, obj_t* b, cntx_t* cntx, cntl_t* cntl ) { dim_t D_MR = 8; //block dimension along the rows dim_t D_NR = 4; //block dimension along the columns dim_t m = bli_obj_length(b); //number of rows dim_t n = bli_obj_width(b); //number of columns dim_t m_remainder = m % D_MR; //number of corner rows dim_t n_remainder = n % D_NR; //number of corner columns dim_t cs_a = bli_obj_col_stride(a); //column stride of matrix A dim_t cs_b = bli_obj_col_stride(b); //column stride of matrix B #ifdef BLIS_ENABLE_SMALL_MATRIX_ROME if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_ROME) { return BLIS_NOT_YET_IMPLEMENTED; } #else if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_NAPLES) { return BLIS_NOT_YET_IMPLEMENTED; } #endif dim_t i, j, k; //loop variablse dim_t k_iter; //determines the number of GEMM operations to be done dim_t cs_b_offset[2]; //pre-calculated strides double ones = 1.0; double AlphaVal = *(double *)AlphaObj->buffer; //value of Alpha double *L = a->buffer; //pointer to matrix A double *B = b->buffer; //pointer to matrix B double *a01, *a11, *b10, *b11; //pointers for GEMM and TRSM blocks double *ptr_a01_dup; cs_b_offset[0] = cs_b << 1; //cs_b_offset[0] = cs_b * 2; cs_b_offset[1] = cs_b_offset[0] + cs_b;//cs_b_offset[1] = cs_b * 3; //ymm scratch reginsters __m256d ymm0, ymm1, ymm2, ymm3; __m256d ymm4, ymm5, ymm6, ymm7; __m256d ymm8, ymm9, ymm10, ymm11; __m256d ymm12, ymm13, ymm14, ymm15; __m256d ymm16; for(i = 0; (i+D_MR-1) < m; i += D_MR) //loop along 'M' direction { for(j = 0; (j+D_NR-1) < n; j += D_NR) //loop along 'N' direction { a01 = L + j; //pointer to block of A to be used in GEMM a11 = L + j*cs_a + j; //pointer to block of A to be used for TRSM b10 = B + i; //pointer to block of B to be used in GEMM b11 = B + i + j*cs_b; //pointer to block of B to be used for TRSM k_iter = j / D_NR; //number of GEMM operations to be done(in blocks of 4x4) ymm0 = _mm256_setzero_pd(); ymm1 = _mm256_setzero_pd(); ymm2 = _mm256_setzero_pd(); ymm3 = _mm256_setzero_pd(); ymm4 = _mm256_setzero_pd(); ymm5 = _mm256_setzero_pd(); ymm6 = _mm256_setzero_pd(); ymm7 = _mm256_setzero_pd(); ///GEMM implementation starts/// for(k = 0; k < k_iter; k++) //loop for number of GEMM operations { ptr_a01_dup = a01; //broadcast 1st row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + 0)); //A01[0][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + 1)); //A01[0][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + 2)); //A01[0][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + 3)); //A01[0][3] a01 += cs_a; //move to next row //load 8x2 block of B10 ymm12 = _mm256_loadu_pd((double const *)b10); //B10[0][0] B10[1][0] B10[2][0] B10[3][0] ymm13 = _mm256_loadu_pd((double const *)(b10 + D_NR)); //B10[4][0] B10[5][0] B10[6][0] B10[7][0] ymm14 = _mm256_loadu_pd((double const *)(b10 + cs_b)); //B10[0][1] B10[1][1] B10[2][1] B10[3][1] ymm15 = _mm256_loadu_pd((double const *)(b10 + cs_b + D_NR)); //B10[4][1] B10[5][1] B10[6][1] B10[7][1] ymm0 = _mm256_fmadd_pd(ymm8, ymm12, ymm0); //ymm0 += (B10[0][0]*A01[0][0] B10[1][0]*A01[0][0] B10[2][0]*A01[0][0] B10[3][0]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm12, ymm1); //ymm1 += (B10[0][0]*A01[0][1] B10[1][0]*A01[0][1] B10[2][0]*A01[0][1] B10[3][0]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm12, ymm2); //ymm2 += (B10[0][0]*A01[0][2] B10[1][0]*A01[0][2] B10[2][0]*A01[0][2] B10[3][0]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm12, ymm3); //ymm3 += (B10[0][0]*A01[0][3] B10[1][0]*A01[0][3] B10[2][0]*A01[0][3] B10[3][0]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm13, ymm4); //ymm4 += (B10[4][0]*A01[0][0] B10[5][0]*A01[0][0] B10[6][0]*A01[0][0] B10[7][0]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm13, ymm5); //ymm5 += (B10[4][0]*A01[0][1] B10[5][0]*A01[0][1] B10[6][0]*A01[0][1] B10[7][0]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm13, ymm6); //ymm6 += (B10[4][0]*A01[0][2] B10[5][0]*A01[0][2] B10[6][0]*A01[0][2] B10[7][0]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm13, ymm7); //ymm7 += (B10[4][0]*A01[0][3] B10[5][0]*A01[0][3] B10[6][0]*A01[0][3] B10[7][0]*A01[0][3]) //broadcast 2nd row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + 0)); //A01[1][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + 1)); //A01[1][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + 2)); //A01[1][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + 3)); //A01[1][3] a01 += cs_a; //move to next row of A ymm0 = _mm256_fmadd_pd(ymm8, ymm14, ymm0); //ymm0 += (B10[0][1]*A01[0][0] B10[1][1]*A01[0][0] B10[2][1]*A01[0][0] B10[3][1]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm14, ymm1); //ymm1 += (B10[0][1]*A01[0][1] B10[1][1]*A01[0][1] B10[2][1]*A01[0][1] B10[3][1]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm14, ymm2); //ymm2 += (B10[0][1]*A01[0][2] B10[1][1]*A01[0][2] B10[2][1]*A01[0][2] B10[3][1]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm14, ymm3); //ymm3 += (B10[0][1]*A01[0][3] B10[1][1]*A01[0][3] B10[2][1]*A01[0][3] B10[3][1]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm15, ymm4); //ymm4 += (B10[4][1]*A01[0][0] B10[5][1]*A01[0][0] B10[6][1]*A01[0][0] B10[7][1]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm15, ymm5); //ymm5 += (B10[4][1]*A01[0][1] B10[5][1]*A01[0][1] B10[6][1]*A01[0][1] B10[7][1]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm15, ymm6); //ymm6 += (B10[4][1]*A01[0][2] B10[5][1]*A01[0][2] B10[6][1]*A01[0][2] B10[7][1]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm15, ymm7); //ymm7 += (B10[4][1]*A01[0][3] B10[5][1]*A01[0][3] B10[6][1]*A01[0][3] B10[7][1]*A01[0][3]) //broadcast 3rd row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + 0)); //A01[2][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + 1)); //A01[2][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + 2)); //A01[2][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + 3)); //A01[2][3] a01 += cs_a; //move to next row of A01 //load next 8x2 block of B10 ymm12 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0])); //(B10[0][2] B10[1][2] B10[2][2] B10[3][2]) ymm13 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0] + D_NR)); //(B10[4][2] B10[5][2] B10[6][2] B10[7][2]) ymm14 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0] + cs_b)); //(B10[0][3] B10[1][3] B10[2][3] B10[3][3]) ymm15 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0] + cs_b + D_NR)); //(B10[4][3] B10[5][3] B10[6][3] B10[7][3]) ymm0 = _mm256_fmadd_pd(ymm8, ymm12, ymm0); //ymm0 += (B10[0][2]*A01[0][0] B10[1][2]*A01[0][0] B10[2][2]*A01[0][0] B10[3][2]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm12, ymm1); //ymm1 += (B10[0][2]*A01[0][1] B10[1][2]*A01[0][1] B10[2][2]*A01[0][1] B10[3][2]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm12, ymm2); //ymm2 += (B10[0][2]*A01[0][2] B10[1][2]*A01[0][2] B10[2][2]*A01[0][2] B10[3][2]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm12, ymm3); //ymm3 += (B10[0][2]*A01[0][3] B10[1][2]*A01[0][3] B10[2][2]*A01[0][3] B10[3][2]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm13, ymm4); //ymm4 += (B10[4][2]*A01[0][0] B10[5][2]*A01[0][0] B10[6][2]*A01[0][0] B10[7][2]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm13, ymm5); //ymm5 += (B10[4][2]*A01[0][1] B10[5][2]*A01[0][1] B10[6][2]*A01[0][1] B10[7][2]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm13, ymm6); //ymm6 += (B10[4][2]*A01[0][2] B10[5][2]*A01[0][2] B10[6][2]*A01[0][2] B10[7][2]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm13, ymm7); //ymm7 += (B10[4][2]*A01[0][3] B10[5][2]*A01[0][3] B10[6][2]*A01[0][3] B10[7][2]*A01[0][3]) //broadcast 4th row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + 0)); //A01[3][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + 1)); //A01[3][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + 2)); //A01[3][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + 3)); //A01[3][3] a01 += cs_a; //move to next row of A01 ymm0 = _mm256_fmadd_pd(ymm8, ymm14, ymm0); //ymm0 += (B10[0][3]*A01[0][0] B10[1][3]*A01[0][0] B10[2][3]*A01[0][0] B10[3][3]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm14, ymm1); //ymm1 += (B10[0][3]*A01[0][1] B10[1][3]*A01[0][1] B10[2][3]*A01[0][1] B10[3][3]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm14, ymm2); //ymm2 += (B10[0][3]*A01[0][2] B10[1][3]*A01[0][2] B10[2][3]*A01[0][2] B10[3][3]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm14, ymm3); //ymm3 += (B10[0][3]*A01[0][3] B10[1][3]*A01[0][3] B10[2][3]*A01[0][3] B10[3][3]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm15, ymm4); //ymm4 += (B10[4][3]*A01[0][0] B10[5][3]*A01[0][0] B10[6][3]*A01[0][0] B10[7][3]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm15, ymm5); //ymm5 += (B10[4][3]*A01[0][1] B10[5][3]*A01[0][1] B10[6][3]*A01[0][1] B10[7][3]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm15, ymm6); //ymm6 += (B10[4][3]*A01[0][2] B10[5][3]*A01[0][2] B10[6][3]*A01[0][2] B10[7][3]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm15, ymm7); //ymm7 += (B10[4][3]*A01[0][3] B10[5][3]*A01[0][3] B10[6][3]*A01[0][3] B10[7][3]*A01[0][3]) b10 += D_NR * cs_b; //pointer math to find next block of B for GEMM a01 = ptr_a01_dup + (D_NR * cs_a); //pointer math to find next block of A for GEMM } ///GEMM code ends/// ymm16 = _mm256_broadcast_sd((double const *)&AlphaVal); //load 8x4 block of B11 ymm8 = _mm256_loadu_pd((double const *)b11); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] ymm12 = _mm256_loadu_pd((double const *)(b11 + D_NR)); //B11[4][0] B11[5][0] B11[6][0] B11[7][0] ymm9 = _mm256_loadu_pd((double const *)(b11 + cs_b)); //B11[0][1] B11[1][1] B11[2][1] B11[3][1] ymm13 = _mm256_loadu_pd((double const *)(b11 + cs_b + D_NR)); //B11[4][1] B11[5][1] B11[6][1] B11[7][1] ymm10 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[0])); //B11[0][2] B11[1][2] B11[2][2] B11[3][2] ymm14 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[0] + D_NR)); //B11[4][2] B11[5][2] B11[6][2] B11[7][2] ymm11 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[1])); //B11[0][3] B11[1][3] B11[2][3] B11[3][3] ymm15 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[1] + D_NR)); //B11[4][3] B11[5][3] B11[6][3] B11[7][3] ymm8 = _mm256_fmsub_pd(ymm8, ymm16, ymm0); //B11[0-3][0] * alpha -= ymm0 ymm9 = _mm256_fmsub_pd(ymm9, ymm16, ymm1); //B11[4-7][0] * alpha-= ymm1 ymm10 = _mm256_fmsub_pd(ymm10, ymm16, ymm2); //B11[0-3][1] * alpha-= ymm2 ymm11 = _mm256_fmsub_pd(ymm11, ymm16, ymm3); //B11[4-7][1] * alpha -= ymm3 ymm12 = _mm256_fmsub_pd(ymm12, ymm16, ymm4); //B11[0-3][2] * alpha -= ymm4 ymm13 = _mm256_fmsub_pd(ymm13, ymm16, ymm5); //B11[4-7][2] * alpha -= ymm5 ymm14 = _mm256_fmsub_pd(ymm14, ymm16, ymm6); //B11[0-3][3] * alpha -= ymm6 ymm15 = _mm256_fmsub_pd(ymm15, ymm16, ymm7); //B11[4-7][3] * alpha -= ymm7 ///implement TRSM/// ///read 4x4 block of A11/// ymm7 = _mm256_broadcast_sd((double const *)(&ones)); //1st col ymm0 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][0] //2nd col a11 += 1; ymm1 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 0)); //A11[0][1] ymm2 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 1)); //A11[1][1] //3rd col a11 += 1; ymm3 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 0)); //A11[0][2] ymm4 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 1)); //A11[1][2] ymm5 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 2)); //A11[2][2] //4th col a11 += 1; ymm6 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 3)); //A11[3][3] //compute reciprocals of L(i,i) and broadcast in registers ymm0 = _mm256_unpacklo_pd(ymm0, ymm2); //A11[0][0] A11[1][1] A11[0][0] A11[1][1] ymm2 = _mm256_unpacklo_pd(ymm5, ymm6); //A11[2][2] A11[3][3] A11[1][1] A11[3][3] ymm0 = _mm256_blend_pd(ymm0, ymm2, 0x0C); //A11[0][0] A11[1][1] A11[2][2] A11[3][3] ymm7 = _mm256_div_pd(ymm7, ymm0); //(1/A11[0][0] 1/A11[1][1] 1/A11[2][2] 1/A11[3][3]) ymm2 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 0)); //A11[0][3] ymm5 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 1)); //A11[1][3] ymm6 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 2)); //A11[2][3] //extract a00 ymm0 = _mm256_permute_pd(ymm7, 0x00); //(1/A11[0][0] 1/A11[0][0] 1/A11[2][2] 1/A11[2][2]) ymm0 = _mm256_permute2f128_pd(ymm0, ymm0, 0x00); //(1/A11[0][0] 1/A11[0][0] 1/A11[0][0] 1/A11[0][0]) ymm8 = _mm256_mul_pd(ymm8, ymm0); //B11[0-3][0] /= A11[0][0] ymm12 = _mm256_mul_pd(ymm12, ymm0); //B11[4-7][0] /= A11[0][0] //extract a11 ymm0 = _mm256_permute_pd(ymm7, 0x03); //(1/A11[1][1] 1/A11[1][1] 1/A11[2][2] 1/A11[2][2]) ymm0 = _mm256_permute2f128_pd(ymm0, ymm0, 0x00);//(1/A11[1][1] 1/A11[1][1] 1/A11[1][1] 1/A11[1][1]) //(Row1): FMA operations ymm9 = _mm256_fnmadd_pd(ymm1, ymm8, ymm9); //B11[0-3][1] -= B11[0-3][0] * A11[0][1] ymm10 = _mm256_fnmadd_pd(ymm3, ymm8, ymm10); //B11[0-3][2] -= B11[0-3][0] * A11[0][2] ymm11 = _mm256_fnmadd_pd(ymm2, ymm8, ymm11); //B11[0-3][3] -= B11[0-3][0] * A11[0][3] ymm13 = _mm256_fnmadd_pd(ymm1, ymm12, ymm13); //B11[4-7][1] -= B11[4-7][0] * A11[0][1] ymm14 = _mm256_fnmadd_pd(ymm3, ymm12, ymm14); //B11[4-7][2] -= B11[4-7][0] * A11[0][2] ymm15 = _mm256_fnmadd_pd(ymm2, ymm12, ymm15); //B11[4-7][3] -= B11[4-7][0] * A11[0][3] ymm9 = _mm256_mul_pd(ymm9, ymm0); //B11[0-3][1] /= A11[1][1] ymm13 = _mm256_mul_pd(ymm13, ymm0); //B11[4-7][1] /= A11[1][1] //extract a22 ymm0 = _mm256_permute_pd(ymm7, 0x00); //(1/A11[0][0] 1/A11[0][0] 1/A11[2][2] 1/A11[2][2]) ymm0 = _mm256_permute2f128_pd(ymm0, ymm0, 0x11);//(1/A11[2][2] 1/A11[2][2] 1/A11[2][2] 1/A11[2][2]) //(Row2)FMA operations ymm10 = _mm256_fnmadd_pd(ymm4, ymm9, ymm10); //B11[0-3][2] -= B11[0-3][1] * A11[1][2] ymm11 = _mm256_fnmadd_pd(ymm5, ymm9, ymm11); //B11[0-3][3] -= B11[0-3][1] * A11[1][3] ymm14 = _mm256_fnmadd_pd(ymm4, ymm13, ymm14); //B11[4-7][2] -= B11[4-7][1] * A11[1][2] ymm15 = _mm256_fnmadd_pd(ymm5, ymm13, ymm15); //B11[4-7][3] -= B11[4-7][1] * A11[1][3] ymm10 = _mm256_mul_pd(ymm10, ymm0); //B11[0-3][2] /= A11[2][2] ymm14 = _mm256_mul_pd(ymm14, ymm0); //B11[4-7][2] /= A11[2][2] //extract a33 ymm0 = _mm256_permute_pd(ymm7, 0x0C); //(1/A11[0][0] 1/A11[0][0] 1/A11[3][3] 1/A11[3][3] 1/A11[3][3]) ymm0 = _mm256_permute2f128_pd(ymm0, ymm0, 0x11);//(1/A11[3][3] 1/A11[3][3] 1/A11[3][3] 1/A11[3][3]) //(Row3)FMA operations ymm11 = _mm256_fnmadd_pd(ymm6, ymm10, ymm11); //B11[0-3][3] -= B11[0-3][2] * A11[2][3] ymm15 = _mm256_fnmadd_pd(ymm6, ymm14, ymm15); //B11[4-7][3] -= B11[4-7][2] * A11[2][3] ymm11 = _mm256_mul_pd(ymm11, ymm0); //B11[0-3][3] /= A11[3][3] ymm15 = _mm256_mul_pd(ymm15, ymm0); //B11[4-7][3] /= A11[3][3] _mm256_storeu_pd((double *)b11, ymm8); //store(B11[0-3][0]) _mm256_storeu_pd((double *)(b11 + D_NR), ymm12); //store(B11[4-7][0]) _mm256_storeu_pd((double *)(b11 + cs_b), ymm9); //store(B11[0-3][1]) _mm256_storeu_pd((double *)(b11 + cs_b + D_NR), ymm13); //store(B11[4-7][1]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0]), ymm10); //store(B11[0-3][2]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0] + D_NR), ymm14); //store(B11[4-7][2]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0] + cs_b), ymm11); //store(B11[0-3][3]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0] + cs_b + D_NR), ymm15);//store(B11[4-7][3]) } if(n_remainder) //implementation for remainder columns(when n is not multiple of D_NR) { a01 = L + j; //pointer to block of A to be used for GEMM a11 = L + j*cs_a + j; //pointer to block of A to be used for TRSM b10 = B + i; //pointer to block of B to be used for GEMM b11 = B + i + j*cs_b; //pointer to block of B to be used for TRSM k_iter = j / D_NR; //number of GEMM operations to be performed(in blocks of 4x4) ///load 4x4 block of b11 ymm0 = _mm256_setzero_pd(); ymm1 = _mm256_setzero_pd(); ymm2 = _mm256_setzero_pd(); ymm3 = _mm256_setzero_pd(); ymm4 = _mm256_setzero_pd(); ymm5 = _mm256_setzero_pd(); ymm6 = _mm256_setzero_pd(); ymm7 = _mm256_setzero_pd(); ///GEMM implementation begins/// for(k = 0; k < k_iter; k++) ///loop for number of GEMM operations { ptr_a01_dup = a01; //broadcast 1st row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + 0)); //A01[0][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + 1)); //A01[0][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + 2)); //A01[0][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + 3)); //A01[0][3] a01 += cs_a; //move to next row of A //load 8x2 block of B10 ymm12 = _mm256_loadu_pd((double const *)b10); //B10[0][0] B10[1][0] B10[2][0] B10[3][0] ymm13 = _mm256_loadu_pd((double const *)(b10 + D_NR)); //B10[4][0] B10[5][0] B10[6][0] B10[7][0] ymm14 = _mm256_loadu_pd((double const *)(b10 + cs_b)); //B10[0][1] B10[1][1] B10[2][1] B10[3][1] ymm15 = _mm256_loadu_pd((double const *)(b10 + cs_b + D_NR));//B10[4][1] B10[5][1] B10[6][1] B10[7][1] ymm0 = _mm256_fmadd_pd(ymm8, ymm12, ymm0); //ymm0 += (B10[0][0]*A01[0][0] B10[1][0]*A01[0][0] B10[2][0]*A01[0][0] B10[3][0]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm12, ymm1); //ymm1 += (B10[0][0]*A01[0][1] B10[1][0]*A01[0][1] B10[2][0]*A01[0][1] B10[3][0]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm12, ymm2); //ymm2 += (B10[0][0]*A01[0][2] B10[1][0]*A01[0][2] B10[2][0]*A01[0][2] B10[3][0]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm12, ymm3); //ymm3 += (B10[0][0]*A01[0][3] B10[1][0]*A01[0][3] B10[2][0]*A01[0][3] B10[3][0]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm13, ymm4); //ymm4 += (B10[4][0]*A01[0][0] B10[5][0]*A01[0][0] B10[6][0]*A01[0][0] B10[7][0]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm13, ymm5); //ymm5 += (B10[4][0]*A01[0][1] B10[5][0]*A01[0][1] B10[6][0]*A01[0][1] B10[7][0]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm13, ymm6); //ymm6 += (B10[4][0]*A01[0][2] B10[5][0]*A01[0][2] B10[6][0]*A01[0][2] B10[7][0]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm13, ymm7); //ymm7 += (B10[4][0]*A01[0][3] B10[5][0]*A01[0][3] B10[6][0]*A01[0][3] B10[7][0]*A01[0][3]) //broadcast 2nd row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + 0)); //A01[1][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + 1)); //A01[1][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + 2)); //A01[1][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + 3)); //A01[1][3] a01 += cs_a; //move to next row of A ymm0 = _mm256_fmadd_pd(ymm8, ymm14, ymm0); //ymm0 += (B10[0][1]*A01[0][0] B10[1][1]*A01[0][0] B10[2][1]*A01[0][0] B10[3][1]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm14, ymm1); //ymm1 += (B10[0][1]*A01[0][1] B10[1][1]*A01[0][1] B10[2][1]*A01[0][1] B10[3][1]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm14, ymm2); //ymm2 += (B10[0][1]*A01[0][2] B10[1][1]*A01[0][2] B10[2][1]*A01[0][2] B10[3][1]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm14, ymm3); //ymm3 += (B10[0][1]*A01[0][3] B10[1][1]*A01[0][3] B10[2][1]*A01[0][3] B10[3][1]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm15, ymm4); //ymm4 += (B10[4][1]*A01[0][0] B10[5][1]*A01[0][0] B10[6][1]*A01[0][0] B10[7][1]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm15, ymm5); //ymm5 += (B10[4][1]*A01[0][1] B10[5][1]*A01[0][1] B10[6][1]*A01[0][1] B10[7][1]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm15, ymm6); //ymm6 += (B10[4][1]*A01[0][2] B10[5][1]*A01[0][2] B10[6][1]*A01[0][2] B10[7][1]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm15, ymm7); //ymm7 += (B10[4][1]*A01[0][3] B10[5][1]*A01[0][3] B10[6][1]*A01[0][3] B10[7][1]*A01[0][3]) //broadcast 3rd row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + 0)); //A01[2][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + 1)); //A01[2][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + 2)); //A01[2][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + 3)); //A01[2][3] a01 += cs_a; //move to next row of A //load next 8x2 block of B10 ymm12 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0])); //(B10[0][2] B10[1][2] B10[2][2] B10[3][2]) ymm13 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0] + D_NR)); //(B10[4][2] B10[5][2] B10[6][2] B10[7][2]) ymm14 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0] + cs_b)); //(B10[0][3] B10[1][3] B10[2][3] B10[3][3]) ymm15 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0] + cs_b + D_NR)); //(B10[4][3] B10[5][3] B10[6][3] B10[7][3]) ymm0 = _mm256_fmadd_pd(ymm8, ymm12, ymm0); //ymm0 += (B10[0][2]*A01[0][0] B10[1][2]*A01[0][0] B10[2][2]*A01[0][0] B10[3][2]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm12, ymm1); //ymm1 += (B10[0][2]*A01[0][1] B10[1][2]*A01[0][1] B10[2][2]*A01[0][1] B10[3][2]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm12, ymm2); //ymm2 += (B10[0][2]*A01[0][2] B10[1][2]*A01[0][2] B10[2][2]*A01[0][2] B10[3][2]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm12, ymm3); //ymm3 += (B10[0][2]*A01[0][3] B10[1][2]*A01[0][3] B10[2][2]*A01[0][3] B10[3][2]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm13, ymm4); //ymm4 += (B10[4][2]*A01[0][0] B10[5][2]*A01[0][0] B10[6][2]*A01[0][0] B10[7][2]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm13, ymm5); //ymm5 += (B10[4][2]*A01[0][1] B10[5][2]*A01[0][1] B10[6][2]*A01[0][1] B10[7][2]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm13, ymm6); //ymm6 += (B10[4][2]*A01[0][2] B10[5][2]*A01[0][2] B10[6][2]*A01[0][2] B10[7][2]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm13, ymm7); //ymm7 += (B10[4][2]*A01[0][3] B10[5][2]*A01[0][3] B10[6][2]*A01[0][3] B10[7][2]*A01[0][3]) //broadcast 4th row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + 0)); //A01[3][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + 1)); //A01[3][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + 2)); //A01[3][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + 3)); //A01[3][3] a01 += cs_a; //move to next row of A ymm0 = _mm256_fmadd_pd(ymm8, ymm14, ymm0); //ymm0 += (B10[0][3]*A01[0][0] B10[1][3]*A01[0][0] B10[2][3]*A01[0][0] B10[3][3]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm14, ymm1); //ymm1 += (B10[0][3]*A01[0][1] B10[1][3]*A01[0][1] B10[2][3]*A01[0][1] B10[3][3]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm14, ymm2); //ymm2 += (B10[0][3]*A01[0][2] B10[1][3]*A01[0][2] B10[2][3]*A01[0][2] B10[3][3]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm14, ymm3); //ymm3 += (B10[0][3]*A01[0][3] B10[1][3]*A01[0][3] B10[2][3]*A01[0][3] B10[3][3]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm15, ymm4); //ymm4 += (B10[4][3]*A01[0][0] B10[5][3]*A01[0][0] B10[6][3]*A01[0][0] B10[7][3]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm15, ymm5); //ymm5 += (B10[4][3]*A01[0][1] B10[5][3]*A01[0][1] B10[6][3]*A01[0][1] B10[7][3]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm15, ymm6); //ymm6 += (B10[4][3]*A01[0][2] B10[5][3]*A01[0][2] B10[6][3]*A01[0][2] B10[7][3]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm15, ymm7); //ymm7 += (B10[4][3]*A01[0][3] B10[5][3]*A01[0][3] B10[6][3]*A01[0][3] B10[7][3]*A01[0][3]) b10 += D_NR * cs_b; //pointer math to find next block of B for GEMM a01 = ptr_a01_dup + (D_NR * cs_a); //pointer math to find next block of A for GEMM } ///GEMM code ends/// ymm16 = _mm256_broadcast_sd((double const *)&AlphaVal); //subtract the calculated GEMM block from current TRSM block //load 8x4 block of B11 if(n_remainder == 3) { ymm8 = _mm256_loadu_pd((double const *)b11); //B11[0-3][0] ymm12 = _mm256_loadu_pd((double const *)(b11 + D_NR)); //B11[4-7][0] ymm9 = _mm256_loadu_pd((double const *)(b11 + cs_b)); //B11[0-3][1] ymm13 = _mm256_loadu_pd((double const *)(b11 + cs_b + D_NR)); //B11[4-7][1] ymm10 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[0])); //B11[0-3][2] ymm14 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[0] + D_NR)); //B11[4-7][2] ymm11 = _mm256_broadcast_sd((double const *)&ones); //B11[0-3][3] ymm15 = _mm256_broadcast_sd((double const *)&ones); //B11[4-7][3] } if(n_remainder == 2) { ymm8 = _mm256_loadu_pd((double const *)b11); //B11[0-3][0] ymm12 = _mm256_loadu_pd((double const *)(b11 + D_NR)); //B11[4-7][0] ymm9 = _mm256_loadu_pd((double const *)(b11 + cs_b)); //B11[0-3][1] ymm13 = _mm256_loadu_pd((double const *)(b11 + cs_b + D_NR)); //B11[4-7][1] ymm10 = _mm256_broadcast_sd((double const *)&ones); //B11[0-3][2] ymm14 = _mm256_broadcast_sd((double const *)&ones); //B11[4-7][2] ymm11 = _mm256_broadcast_sd((double const *)&ones); //B11[0-3][3] ymm15 = _mm256_broadcast_sd((double const *)&ones); //B11[4-7][3] } if(n_remainder == 1) { ymm8 = _mm256_loadu_pd((double const *)b11); //B11[0-3][0] ymm12 = _mm256_loadu_pd((double const *)(b11 + D_NR)); //B11[4-7][0] ymm9 = _mm256_broadcast_sd((double const *)&ones); //B11[0-3][1] ymm13 = _mm256_broadcast_sd((double const *)&ones); //B11[4-7][1] ymm10 = _mm256_broadcast_sd((double const *)&ones); //B11[0-3][2] ymm14 = _mm256_broadcast_sd((double const *)&ones); //B11[4-7][2] ymm11 = _mm256_broadcast_sd((double const *)&ones); //B11[0-3][3] ymm15 = _mm256_broadcast_sd((double const *)&ones); //B11[4-7][3] } ymm8 = _mm256_fmsub_pd(ymm8, ymm16, ymm0); //B11[0-3][0] * alpha -= B10[0-3][0] ymm9 = _mm256_fmsub_pd(ymm9, ymm16, ymm1); //B11[4-7][0] * alpha -= B10[4-7][0] ymm10 = _mm256_fmsub_pd(ymm10, ymm16, ymm2); //B11[0-3][1] * alpha -= B10[0-3][1] ymm11 = _mm256_fmsub_pd(ymm11, ymm16, ymm3); //B11[4-7][1] * alpha -= B10[4-7][1] ymm12 = _mm256_fmsub_pd(ymm12, ymm16, ymm4); //B11[0-3][2] * alpha -= B10[0-3][2] ymm13 = _mm256_fmsub_pd(ymm13, ymm16, ymm5); //B11[4-7][2] * alpha -= B10[4-7][2] ymm14 = _mm256_fmsub_pd(ymm14, ymm16, ymm6); //B11[0-3][3] * alpha -= B10[0-3][3] ymm15 = _mm256_fmsub_pd(ymm15, ymm16, ymm7); //B11[4-7][3] * alpha -= B10[4-7][3] ///implement TRSM/// ///read 4x4 block of A11/// ymm7 = _mm256_broadcast_sd((double const *)(&ones)); //1st col ymm0 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][0] //2nd col a11 += 1; ymm1 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 0)); //A11[0][1] ymm2 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 1)); //A11[1][1] //3rd col a11 += 1; ymm3 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 0)); //A11[0][2] ymm4 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 1)); //A11[1][2] ymm5 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 2)); //A11[2][2] //4th col a11 += 1; ymm6 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 3)); //A11[3][3] //compute reciprocals of L(i,i) and broadcast in registers ymm0 = _mm256_unpacklo_pd(ymm0, ymm2); //A11[0][0] A11[1][1] A11[0][0] A11[1][1] ymm2 = _mm256_unpacklo_pd(ymm5, ymm6); //A11[2][2] A11[3][3] A11[1][1] A11[3][3] ymm0 = _mm256_blend_pd(ymm0, ymm2, 0x0C); //A11[0][0] A11[1][1] A11[2][2] A11[3][3] ymm7 = _mm256_div_pd(ymm7, ymm0); //(1/A11[0][0] 1/A11[1][1] 1/A11[2][2] 1/A11[3][3]) ymm2 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 0)); //A11[0][3] ymm5 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 1)); //A11[1][3] ymm6 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 2)); //A11[2][3] //extract a00 ymm0 = _mm256_permute_pd(ymm7, 0x00); //(1/A11[0][0] 1/A11[0][0] 1/A11[2][2] 1/A11[2][2]) ymm0 = _mm256_permute2f128_pd(ymm0, ymm0, 0x00);//(1/A11[0][0] 1/A11[0][0] 1/A11[0][0] 1/A11[0][0]) ymm8 = _mm256_mul_pd(ymm8, ymm0); //B11[0-3][0] /= A11[0][0] ymm12 = _mm256_mul_pd(ymm12, ymm0); //B11[4-7][0] /= A11[0][0] //extract a11 ymm0 = _mm256_permute_pd(ymm7, 0x03); //(1/A11[1][1] 1/A11[1][1] 1/A11[2][2] 1/A11[2][2]) ymm0 = _mm256_permute2f128_pd(ymm0, ymm0, 0x00);//(1/A11[1][1] 1/A11[1][1] 1/A11[1][1] 1/A11[1][1]) //(Row1): FMA operations ymm9 = _mm256_fnmadd_pd(ymm1, ymm8, ymm9); //B11[0-3][1] -= B11[0-3][0] * A11[0][1] ymm10 = _mm256_fnmadd_pd(ymm3, ymm8, ymm10); //B11[0-3][2] -= B11[0-3][0] * A11[0][2] ymm11 = _mm256_fnmadd_pd(ymm2, ymm8, ymm11); //B11[0-3][3] -= B11[0-3][0] * A11[0][3] ymm13 = _mm256_fnmadd_pd(ymm1, ymm12, ymm13); //B11[4-7][1] -= B11[4-7][0] * A11[0][1] ymm14 = _mm256_fnmadd_pd(ymm3, ymm12, ymm14); //B11[4-7][2] -= B11[4-7][0] * A11[0][2] ymm15 = _mm256_fnmadd_pd(ymm2, ymm12, ymm15); //B11[4-7][3] -= B11[4-7][0] * A11[0][3] ymm9 = _mm256_mul_pd(ymm9, ymm0); //B11[0-3][1] /= A11[1][1] ymm13 = _mm256_mul_pd(ymm13, ymm0); //B11[4-7][1] /= A11[1][1] //extract a22 ymm0 = _mm256_permute_pd(ymm7, 0x00); //(1/A11[0][0] 1/A11[0][0] 1/A11[2][2] 1/A11[2][2]) ymm0 = _mm256_permute2f128_pd(ymm0, ymm0, 0x11);//(1/A11[2][2] 1/A11[2][2] 1/A11[2][2] 1/A11[2][2]) //(Row2)FMA operations ymm10 = _mm256_fnmadd_pd(ymm4, ymm9, ymm10); //B11[0-3][2] -= B11[0-3][1] * A11[1][2] ymm11 = _mm256_fnmadd_pd(ymm5, ymm9, ymm11); //B11[0-3][3] -= B11[0-3][1] * A11[1][3] ymm14 = _mm256_fnmadd_pd(ymm4, ymm13, ymm14); //B11[4-7][2] -= B11[4-7][1] * A11[1][2] ymm15 = _mm256_fnmadd_pd(ymm5, ymm13, ymm15); //B11[4-7][3] -= B11[4-7][1] * A11[1][3] ymm10 = _mm256_mul_pd(ymm10, ymm0); //B11[0-3][2] /= A11[2][2] ymm14 = _mm256_mul_pd(ymm14, ymm0); //B11[4-7][2] /= A11[2][2] //extract a33 ymm0 = _mm256_permute_pd(ymm7, 0x0C); //(1/A11[0][0] 1/A11[0][0] 1/A11[3][3] 1/A11[3][3] 1/A11[3][3]) ymm0 = _mm256_permute2f128_pd(ymm0, ymm0, 0x11); //(1/A11[3][3] 1/A11[3][3] 1/A11[3][3] 1/A11[3][3]) //(Row3)FMA operations ymm11 = _mm256_fnmadd_pd(ymm6, ymm10, ymm11); //B11[0-3][3] -= B11[0-3][2] * A11[2][3] ymm15 = _mm256_fnmadd_pd(ymm6, ymm14, ymm15); //B11[4-7][3] -= B11[4-7][2] * A11[2][3] ymm11 = _mm256_mul_pd(ymm11, ymm0); //B11[0-3][3] /= A11[3][3] ymm15 = _mm256_mul_pd(ymm15, ymm0); //B11[4-7][3] /= A11[3][3] if(n_remainder == 3) { _mm256_storeu_pd((double *)b11, ymm8); //store(B11[0-3][0]) _mm256_storeu_pd((double *)(b11 + D_NR), ymm12); //store(B11[4-7][0]) _mm256_storeu_pd((double *)(b11 + cs_b), ymm9); //store(B11[0-3][1]) _mm256_storeu_pd((double *)(b11 + cs_b + D_NR), ymm13); //store(B11[4-7][1]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0]), ymm10); //store(B11[0-3][2]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0] + D_NR), ymm14);//store(B11[4-7][2]) } if(n_remainder == 2) { _mm256_storeu_pd((double *)b11, ymm8); //store(B11[0-3][0]) _mm256_storeu_pd((double *)(b11 + D_NR), ymm12); //store(B11[4-7][0]) _mm256_storeu_pd((double *)(b11 + cs_b), ymm9); //store(B11[0-3][1]) _mm256_storeu_pd((double *)(b11 + cs_b + D_NR), ymm13); //store(B11[4-7][1]) } if(n_remainder == 1) { _mm256_storeu_pd((double *)b11, ymm8); //store(B11[0-3][0]) _mm256_storeu_pd((double *)(b11 + D_NR), ymm12); //store(B11[4-7][0]) } } } if((m & 4)) ///implementation for remainder rows(when m_remainder is a multiple of 4) { for(j = 0; (j+D_NR-1) a01 ----> ***************** *********** *b01*b11* * * * * * * b11 * * * * * **a01 * * a11 | ***************** ********* | | * * * * * *a11* * | | * * * * * * * * | v ***************** ****** v * * * * * * * * * * * * * * ***************** * * * */ static err_t bli_dtrsm_small_XAltB_unitDiag( side_t side, obj_t* AlphaObj, obj_t* a, obj_t* b, cntx_t* cntx, cntl_t* cntl ) { dim_t D_MR = 8; //block dimension along the rows dim_t D_NR = 4; //block dimension along the columns dim_t m = bli_obj_length(b); //number of rows dim_t n = bli_obj_width(b); //number of columns dim_t m_remainder = m % D_MR; //number of corner rows dim_t n_remainder = n % D_NR; //number of corner columns dim_t cs_a = bli_obj_col_stride(a); //column stride of matrix A dim_t cs_b = bli_obj_col_stride(b); //column stride of matrix B #ifdef BLIS_ENABLE_SMALL_MATRIX_ROME if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_ROME) { return BLIS_NOT_YET_IMPLEMENTED; } #else if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_NAPLES) { return BLIS_NOT_YET_IMPLEMENTED; } #endif dim_t i, j, k; //loop variablse dim_t k_iter; //determines the number of GEMM operations to be done dim_t cs_b_offset[2]; //pre-calculated strides double ones = 1.0; double AlphaVal = *(double *)AlphaObj->buffer; //value of Alpha double *L = a->buffer; //pointer to matrix A double *B = b->buffer; //pointer to matrix B double *a01, *a11, *b10, *b11; //pointers for GEMM and TRSM blocks double *ptr_a01_dup; cs_b_offset[0] = cs_b << 1; //cs_b_offset[0] = cs_b * 2; cs_b_offset[1] = cs_b_offset[0] + cs_b;//cs_b_offset[1] = cs_b * 3; //ymm scratch reginsters __m256d ymm0, ymm1, ymm2, ymm3; __m256d ymm4, ymm5, ymm6, ymm7; __m256d ymm8, ymm9, ymm10, ymm11; __m256d ymm12, ymm13, ymm14, ymm15; __m256d ymm16; for(i = 0; (i+D_MR-1) < m; i += D_MR) //loop along 'M' direction { for(j = 0; (j+D_NR-1) < n; j += D_NR) //loop along 'N' direction { a01 = L + j; //pointer to block of A to be used in GEMM a11 = L + j*cs_a + j; //pointer to block of A to be used for TRSM b10 = B + i; //pointer to block of B to be used in GEMM b11 = B + i + j*cs_b; //pointer to block of B to be used for TRSM k_iter = j / D_NR; //number of GEMM operations to be done(in blocks of 4x4) ymm0 = _mm256_setzero_pd(); ymm1 = _mm256_setzero_pd(); ymm2 = _mm256_setzero_pd(); ymm3 = _mm256_setzero_pd(); ymm4 = _mm256_setzero_pd(); ymm5 = _mm256_setzero_pd(); ymm6 = _mm256_setzero_pd(); ymm7 = _mm256_setzero_pd(); ///GEMM implementation starts/// for(k = 0; k < k_iter; k++) //loop for number of GEMM operations { ptr_a01_dup = a01; //broadcast 1st row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + 0)); //A01[0][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + 1)); //A01[0][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + 2)); //A01[0][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + 3)); //A01[0][3] a01 += cs_a; //move to next row //load 8x2 block of B10 ymm12 = _mm256_loadu_pd((double const *)b10); //B10[0][0] B10[1][0] B10[2][0] B10[3][0] ymm13 = _mm256_loadu_pd((double const *)(b10 + D_NR)); //B10[4][0] B10[5][0] B10[6][0] B10[7][0] ymm14 = _mm256_loadu_pd((double const *)(b10 + cs_b)); //B10[0][1] B10[1][1] B10[2][1] B10[3][1] ymm15 = _mm256_loadu_pd((double const *)(b10 + cs_b + D_NR)); //B10[4][1] B10[5][1] B10[6][1] B10[7][1] ymm0 = _mm256_fmadd_pd(ymm8, ymm12, ymm0); //ymm0 += (B10[0][0]*A01[0][0] B10[1][0]*A01[0][0] B10[2][0]*A01[0][0] B10[3][0]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm12, ymm1); //ymm1 += (B10[0][0]*A01[0][1] B10[1][0]*A01[0][1] B10[2][0]*A01[0][1] B10[3][0]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm12, ymm2); //ymm2 += (B10[0][0]*A01[0][2] B10[1][0]*A01[0][2] B10[2][0]*A01[0][2] B10[3][0]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm12, ymm3); //ymm3 += (B10[0][0]*A01[0][3] B10[1][0]*A01[0][3] B10[2][0]*A01[0][3] B10[3][0]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm13, ymm4); //ymm4 += (B10[4][0]*A01[0][0] B10[5][0]*A01[0][0] B10[6][0]*A01[0][0] B10[7][0]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm13, ymm5); //ymm5 += (B10[4][0]*A01[0][1] B10[5][0]*A01[0][1] B10[6][0]*A01[0][1] B10[7][0]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm13, ymm6); //ymm6 += (B10[4][0]*A01[0][2] B10[5][0]*A01[0][2] B10[6][0]*A01[0][2] B10[7][0]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm13, ymm7); //ymm7 += (B10[4][0]*A01[0][3] B10[5][0]*A01[0][3] B10[6][0]*A01[0][3] B10[7][0]*A01[0][3]) //broadcast 2nd row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + 0)); //A01[1][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + 1)); //A01[1][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + 2)); //A01[1][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + 3)); //A01[1][3] a01 += cs_a; //move to next row of A ymm0 = _mm256_fmadd_pd(ymm8, ymm14, ymm0); //ymm0 += (B10[0][1]*A01[0][0] B10[1][1]*A01[0][0] B10[2][1]*A01[0][0] B10[3][1]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm14, ymm1); //ymm1 += (B10[0][1]*A01[0][1] B10[1][1]*A01[0][1] B10[2][1]*A01[0][1] B10[3][1]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm14, ymm2); //ymm2 += (B10[0][1]*A01[0][2] B10[1][1]*A01[0][2] B10[2][1]*A01[0][2] B10[3][1]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm14, ymm3); //ymm3 += (B10[0][1]*A01[0][3] B10[1][1]*A01[0][3] B10[2][1]*A01[0][3] B10[3][1]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm15, ymm4); //ymm4 += (B10[4][1]*A01[0][0] B10[5][1]*A01[0][0] B10[6][1]*A01[0][0] B10[7][1]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm15, ymm5); //ymm5 += (B10[4][1]*A01[0][1] B10[5][1]*A01[0][1] B10[6][1]*A01[0][1] B10[7][1]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm15, ymm6); //ymm6 += (B10[4][1]*A01[0][2] B10[5][1]*A01[0][2] B10[6][1]*A01[0][2] B10[7][1]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm15, ymm7); //ymm7 += (B10[4][1]*A01[0][3] B10[5][1]*A01[0][3] B10[6][1]*A01[0][3] B10[7][1]*A01[0][3]) //broadcast 3rd row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + 0)); //A01[2][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + 1)); //A01[2][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + 2)); //A01[2][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + 3)); //A01[2][3] a01 += cs_a; //move to next row of A01 //load next 8x2 block of B10 ymm12 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0])); //(B10[0][2] B10[1][2] B10[2][2] B10[3][2]) ymm13 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0] + D_NR)); //(B10[4][2] B10[5][2] B10[6][2] B10[7][2]) ymm14 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0] + cs_b)); //(B10[0][3] B10[1][3] B10[2][3] B10[3][3]) ymm15 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0] + cs_b + D_NR)); //(B10[4][3] B10[5][3] B10[6][3] B10[7][3]) ymm0 = _mm256_fmadd_pd(ymm8, ymm12, ymm0); //ymm0 += (B10[0][2]*A01[0][0] B10[1][2]*A01[0][0] B10[2][2]*A01[0][0] B10[3][2]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm12, ymm1); //ymm1 += (B10[0][2]*A01[0][1] B10[1][2]*A01[0][1] B10[2][2]*A01[0][1] B10[3][2]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm12, ymm2); //ymm2 += (B10[0][2]*A01[0][2] B10[1][2]*A01[0][2] B10[2][2]*A01[0][2] B10[3][2]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm12, ymm3); //ymm3 += (B10[0][2]*A01[0][3] B10[1][2]*A01[0][3] B10[2][2]*A01[0][3] B10[3][2]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm13, ymm4); //ymm4 += (B10[4][2]*A01[0][0] B10[5][2]*A01[0][0] B10[6][2]*A01[0][0] B10[7][2]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm13, ymm5); //ymm5 += (B10[4][2]*A01[0][1] B10[5][2]*A01[0][1] B10[6][2]*A01[0][1] B10[7][2]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm13, ymm6); //ymm6 += (B10[4][2]*A01[0][2] B10[5][2]*A01[0][2] B10[6][2]*A01[0][2] B10[7][2]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm13, ymm7); //ymm7 += (B10[4][2]*A01[0][3] B10[5][2]*A01[0][3] B10[6][2]*A01[0][3] B10[7][2]*A01[0][3]) //broadcast 4th row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + 0)); //A01[3][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + 1)); //A01[3][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + 2)); //A01[3][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + 3)); //A01[3][3] a01 += cs_a; //move to next row of A01 ymm0 = _mm256_fmadd_pd(ymm8, ymm14, ymm0); //ymm0 += (B10[0][3]*A01[0][0] B10[1][3]*A01[0][0] B10[2][3]*A01[0][0] B10[3][3]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm14, ymm1); //ymm1 += (B10[0][3]*A01[0][1] B10[1][3]*A01[0][1] B10[2][3]*A01[0][1] B10[3][3]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm14, ymm2); //ymm2 += (B10[0][3]*A01[0][2] B10[1][3]*A01[0][2] B10[2][3]*A01[0][2] B10[3][3]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm14, ymm3); //ymm3 += (B10[0][3]*A01[0][3] B10[1][3]*A01[0][3] B10[2][3]*A01[0][3] B10[3][3]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm15, ymm4); //ymm4 += (B10[4][3]*A01[0][0] B10[5][3]*A01[0][0] B10[6][3]*A01[0][0] B10[7][3]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm15, ymm5); //ymm5 += (B10[4][3]*A01[0][1] B10[5][3]*A01[0][1] B10[6][3]*A01[0][1] B10[7][3]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm15, ymm6); //ymm6 += (B10[4][3]*A01[0][2] B10[5][3]*A01[0][2] B10[6][3]*A01[0][2] B10[7][3]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm15, ymm7); //ymm7 += (B10[4][3]*A01[0][3] B10[5][3]*A01[0][3] B10[6][3]*A01[0][3] B10[7][3]*A01[0][3]) b10 += D_NR * cs_b; //pointer math to find next block of B for GEMM a01 = ptr_a01_dup + (D_NR * cs_a); //pointer math to find next block of A for GEMM } ///GEMM code ends/// ymm16 = _mm256_broadcast_sd((double const *)&AlphaVal); //load 8x4 block of B11 ymm8 = _mm256_loadu_pd((double const *)b11); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] ymm12 = _mm256_loadu_pd((double const *)(b11 + D_NR)); //B11[4][0] B11[5][0] B11[6][0] B11[7][0] ymm9 = _mm256_loadu_pd((double const *)(b11 + cs_b)); //B11[0][1] B11[1][1] B11[2][1] B11[3][1] ymm13 = _mm256_loadu_pd((double const *)(b11 + cs_b + D_NR)); //B11[4][1] B11[5][1] B11[6][1] B11[7][1] ymm10 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[0])); //B11[0][2] B11[1][2] B11[2][2] B11[3][2] ymm14 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[0] + D_NR)); //B11[4][2] B11[5][2] B11[6][2] B11[7][2] ymm11 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[1])); //B11[0][3] B11[1][3] B11[2][3] B11[3][3] ymm15 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[1] + D_NR)); //B11[4][3] B11[5][3] B11[6][3] B11[7][3] ymm8 = _mm256_fmsub_pd(ymm8, ymm16, ymm0); //B11[0-3][0] * alpha -= ymm0 ymm9 = _mm256_fmsub_pd(ymm9, ymm16, ymm1); //B11[4-7][0] * alpha-= ymm1 ymm10 = _mm256_fmsub_pd(ymm10, ymm16, ymm2); //B11[0-3][1] * alpha-= ymm2 ymm11 = _mm256_fmsub_pd(ymm11, ymm16, ymm3); //B11[4-7][1] * alpha -= ymm3 ymm12 = _mm256_fmsub_pd(ymm12, ymm16, ymm4); //B11[0-3][2] * alpha -= ymm4 ymm13 = _mm256_fmsub_pd(ymm13, ymm16, ymm5); //B11[4-7][2] * alpha -= ymm5 ymm14 = _mm256_fmsub_pd(ymm14, ymm16, ymm6); //B11[0-3][3] * alpha -= ymm6 ymm15 = _mm256_fmsub_pd(ymm15, ymm16, ymm7); //B11[4-7][3] * alpha -= ymm7 ///implement TRSM/// ///read 4x4 block of A11/// //1st col ymm0 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][0] //2nd col a11 += 1; ymm1 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 0)); //A11[0][1] ymm2 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 1)); //A11[1][1] //3rd col a11 += 1; ymm3 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 0)); //A11[0][2] ymm4 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 1)); //A11[1][2] ymm5 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 2)); //A11[2][2] //4th col a11 += 1; ymm6 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 3)); //A11[3][3] ymm2 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 0)); //A11[0][3] ymm5 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 1)); //A11[1][3] ymm6 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 2)); //A11[2][3] //(Row1): FMA operations ymm9 = _mm256_fnmadd_pd(ymm1, ymm8, ymm9); //B11[0-3][1] -= B11[0-3][0] * A11[0][1] ymm10 = _mm256_fnmadd_pd(ymm3, ymm8, ymm10); //B11[0-3][2] -= B11[0-3][0] * A11[0][2] ymm11 = _mm256_fnmadd_pd(ymm2, ymm8, ymm11); //B11[0-3][3] -= B11[0-3][0] * A11[0][3] ymm13 = _mm256_fnmadd_pd(ymm1, ymm12, ymm13); //B11[4-7][1] -= B11[4-7][0] * A11[0][1] ymm14 = _mm256_fnmadd_pd(ymm3, ymm12, ymm14); //B11[4-7][2] -= B11[4-7][0] * A11[0][2] ymm15 = _mm256_fnmadd_pd(ymm2, ymm12, ymm15); //B11[4-7][3] -= B11[4-7][0] * A11[0][3] //(Row2)FMA operations ymm10 = _mm256_fnmadd_pd(ymm4, ymm9, ymm10); //B11[0-3][2] -= B11[0-3][1] * A11[1][2] ymm11 = _mm256_fnmadd_pd(ymm5, ymm9, ymm11); //B11[0-3][3] -= B11[0-3][1] * A11[1][3] ymm14 = _mm256_fnmadd_pd(ymm4, ymm13, ymm14); //B11[4-7][2] -= B11[4-7][1] * A11[1][2] ymm15 = _mm256_fnmadd_pd(ymm5, ymm13, ymm15); //B11[4-7][3] -= B11[4-7][1] * A11[1][3] //(Row3)FMA operations ymm11 = _mm256_fnmadd_pd(ymm6, ymm10, ymm11); //B11[0-3][3] -= B11[0-3][2] * A11[2][3] ymm15 = _mm256_fnmadd_pd(ymm6, ymm14, ymm15); //B11[4-7][3] -= B11[4-7][2] * A11[2][3] _mm256_storeu_pd((double *)b11, ymm8); //store(B11[0-3][0]) _mm256_storeu_pd((double *)(b11 + D_NR), ymm12); //store(B11[4-7][0]) _mm256_storeu_pd((double *)(b11 + cs_b), ymm9); //store(B11[0-3][1]) _mm256_storeu_pd((double *)(b11 + cs_b + D_NR), ymm13); //store(B11[4-7][1]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0]), ymm10); //store(B11[0-3][2]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0] + D_NR), ymm14); //store(B11[4-7][2]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0] + cs_b), ymm11); //store(B11[0-3][3]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0] + cs_b + D_NR), ymm15);//store(B11[4-7][3]) } if(n_remainder) //implementation for remainder columns(when n is not multiple of D_NR) { a01 = L + j; //pointer to block of A to be used for GEMM a11 = L + j*cs_a + j; //pointer to block of A to be used for TRSM b10 = B + i; //pointer to block of B to be used for GEMM b11 = B + i + j*cs_b; //pointer to block of B to be used for TRSM k_iter = j / D_NR; //number of GEMM operations to be performed(in blocks of 4x4) ///load 4x4 block of b11 ymm0 = _mm256_setzero_pd(); ymm1 = _mm256_setzero_pd(); ymm2 = _mm256_setzero_pd(); ymm3 = _mm256_setzero_pd(); ymm4 = _mm256_setzero_pd(); ymm5 = _mm256_setzero_pd(); ymm6 = _mm256_setzero_pd(); ymm7 = _mm256_setzero_pd(); ///GEMM implementation begins/// for(k = 0; k < k_iter; k++) ///loop for number of GEMM operations { ptr_a01_dup = a01; //broadcast 1st row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + 0)); //A01[0][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + 1)); //A01[0][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + 2)); //A01[0][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + 3)); //A01[0][3] a01 += cs_a; //move to next row of A //load 8x2 block of B10 ymm12 = _mm256_loadu_pd((double const *)b10); //B10[0][0] B10[1][0] B10[2][0] B10[3][0] ymm13 = _mm256_loadu_pd((double const *)(b10 + D_NR)); //B10[4][0] B10[5][0] B10[6][0] B10[7][0] ymm14 = _mm256_loadu_pd((double const *)(b10 + cs_b)); //B10[0][1] B10[1][1] B10[2][1] B10[3][1] ymm15 = _mm256_loadu_pd((double const *)(b10 + cs_b + D_NR));//B10[4][1] B10[5][1] B10[6][1] B10[7][1] ymm0 = _mm256_fmadd_pd(ymm8, ymm12, ymm0); //ymm0 += (B10[0][0]*A01[0][0] B10[1][0]*A01[0][0] B10[2][0]*A01[0][0] B10[3][0]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm12, ymm1); //ymm1 += (B10[0][0]*A01[0][1] B10[1][0]*A01[0][1] B10[2][0]*A01[0][1] B10[3][0]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm12, ymm2); //ymm2 += (B10[0][0]*A01[0][2] B10[1][0]*A01[0][2] B10[2][0]*A01[0][2] B10[3][0]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm12, ymm3); //ymm3 += (B10[0][0]*A01[0][3] B10[1][0]*A01[0][3] B10[2][0]*A01[0][3] B10[3][0]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm13, ymm4); //ymm4 += (B10[4][0]*A01[0][0] B10[5][0]*A01[0][0] B10[6][0]*A01[0][0] B10[7][0]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm13, ymm5); //ymm5 += (B10[4][0]*A01[0][1] B10[5][0]*A01[0][1] B10[6][0]*A01[0][1] B10[7][0]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm13, ymm6); //ymm6 += (B10[4][0]*A01[0][2] B10[5][0]*A01[0][2] B10[6][0]*A01[0][2] B10[7][0]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm13, ymm7); //ymm7 += (B10[4][0]*A01[0][3] B10[5][0]*A01[0][3] B10[6][0]*A01[0][3] B10[7][0]*A01[0][3]) //broadcast 2nd row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + 0)); //A01[1][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + 1)); //A01[1][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + 2)); //A01[1][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + 3)); //A01[1][3] a01 += cs_a; //move to next row of A ymm0 = _mm256_fmadd_pd(ymm8, ymm14, ymm0); //ymm0 += (B10[0][1]*A01[0][0] B10[1][1]*A01[0][0] B10[2][1]*A01[0][0] B10[3][1]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm14, ymm1); //ymm1 += (B10[0][1]*A01[0][1] B10[1][1]*A01[0][1] B10[2][1]*A01[0][1] B10[3][1]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm14, ymm2); //ymm2 += (B10[0][1]*A01[0][2] B10[1][1]*A01[0][2] B10[2][1]*A01[0][2] B10[3][1]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm14, ymm3); //ymm3 += (B10[0][1]*A01[0][3] B10[1][1]*A01[0][3] B10[2][1]*A01[0][3] B10[3][1]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm15, ymm4); //ymm4 += (B10[4][1]*A01[0][0] B10[5][1]*A01[0][0] B10[6][1]*A01[0][0] B10[7][1]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm15, ymm5); //ymm5 += (B10[4][1]*A01[0][1] B10[5][1]*A01[0][1] B10[6][1]*A01[0][1] B10[7][1]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm15, ymm6); //ymm6 += (B10[4][1]*A01[0][2] B10[5][1]*A01[0][2] B10[6][1]*A01[0][2] B10[7][1]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm15, ymm7); //ymm7 += (B10[4][1]*A01[0][3] B10[5][1]*A01[0][3] B10[6][1]*A01[0][3] B10[7][1]*A01[0][3]) //broadcast 3rd row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + 0)); //A01[2][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + 1)); //A01[2][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + 2)); //A01[2][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + 3)); //A01[2][3] a01 += cs_a; //move to next row of A //load next 8x2 block of B10 ymm12 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0])); //(B10[0][2] B10[1][2] B10[2][2] B10[3][2]) ymm13 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0] + D_NR)); //(B10[4][2] B10[5][2] B10[6][2] B10[7][2]) ymm14 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0] + cs_b)); //(B10[0][3] B10[1][3] B10[2][3] B10[3][3]) ymm15 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0] + cs_b + D_NR)); //(B10[4][3] B10[5][3] B10[6][3] B10[7][3]) ymm0 = _mm256_fmadd_pd(ymm8, ymm12, ymm0); //ymm0 += (B10[0][2]*A01[0][0] B10[1][2]*A01[0][0] B10[2][2]*A01[0][0] B10[3][2]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm12, ymm1); //ymm1 += (B10[0][2]*A01[0][1] B10[1][2]*A01[0][1] B10[2][2]*A01[0][1] B10[3][2]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm12, ymm2); //ymm2 += (B10[0][2]*A01[0][2] B10[1][2]*A01[0][2] B10[2][2]*A01[0][2] B10[3][2]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm12, ymm3); //ymm3 += (B10[0][2]*A01[0][3] B10[1][2]*A01[0][3] B10[2][2]*A01[0][3] B10[3][2]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm13, ymm4); //ymm4 += (B10[4][2]*A01[0][0] B10[5][2]*A01[0][0] B10[6][2]*A01[0][0] B10[7][2]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm13, ymm5); //ymm5 += (B10[4][2]*A01[0][1] B10[5][2]*A01[0][1] B10[6][2]*A01[0][1] B10[7][2]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm13, ymm6); //ymm6 += (B10[4][2]*A01[0][2] B10[5][2]*A01[0][2] B10[6][2]*A01[0][2] B10[7][2]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm13, ymm7); //ymm7 += (B10[4][2]*A01[0][3] B10[5][2]*A01[0][3] B10[6][2]*A01[0][3] B10[7][2]*A01[0][3]) //broadcast 4th row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + 0)); //A01[3][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + 1)); //A01[3][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + 2)); //A01[3][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + 3)); //A01[3][3] a01 += cs_a; //move to next row of A ymm0 = _mm256_fmadd_pd(ymm8, ymm14, ymm0); //ymm0 += (B10[0][3]*A01[0][0] B10[1][3]*A01[0][0] B10[2][3]*A01[0][0] B10[3][3]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm14, ymm1); //ymm1 += (B10[0][3]*A01[0][1] B10[1][3]*A01[0][1] B10[2][3]*A01[0][1] B10[3][3]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm14, ymm2); //ymm2 += (B10[0][3]*A01[0][2] B10[1][3]*A01[0][2] B10[2][3]*A01[0][2] B10[3][3]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm14, ymm3); //ymm3 += (B10[0][3]*A01[0][3] B10[1][3]*A01[0][3] B10[2][3]*A01[0][3] B10[3][3]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm15, ymm4); //ymm4 += (B10[4][3]*A01[0][0] B10[5][3]*A01[0][0] B10[6][3]*A01[0][0] B10[7][3]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm15, ymm5); //ymm5 += (B10[4][3]*A01[0][1] B10[5][3]*A01[0][1] B10[6][3]*A01[0][1] B10[7][3]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm15, ymm6); //ymm6 += (B10[4][3]*A01[0][2] B10[5][3]*A01[0][2] B10[6][3]*A01[0][2] B10[7][3]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm15, ymm7); //ymm7 += (B10[4][3]*A01[0][3] B10[5][3]*A01[0][3] B10[6][3]*A01[0][3] B10[7][3]*A01[0][3]) b10 += D_NR * cs_b; //pointer math to find next block of B for GEMM a01 = ptr_a01_dup + (D_NR * cs_a); //pointer math to find next block of A for GEMM } ///GEMM code ends/// ymm16 = _mm256_broadcast_sd((double const *)&AlphaVal); //subtract the calculated GEMM block from current TRSM block //load 8x4 block of B11 if(n_remainder == 3) { ymm8 = _mm256_loadu_pd((double const *)b11); //B11[0-3][0] ymm12 = _mm256_loadu_pd((double const *)(b11 + D_NR)); //B11[4-7][0] ymm9 = _mm256_loadu_pd((double const *)(b11 + cs_b)); //B11[0-3][1] ymm13 = _mm256_loadu_pd((double const *)(b11 + cs_b + D_NR)); //B11[4-7][1] ymm10 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[0])); //B11[0-3][2] ymm14 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[0] + D_NR)); //B11[4-7][2] ymm11 = _mm256_broadcast_sd((double const *)&ones); //B11[0-3][3] ymm15 = _mm256_broadcast_sd((double const *)&ones); //B11[4-7][3] } if(n_remainder == 2) { ymm8 = _mm256_loadu_pd((double const *)b11); //B11[0-3][0] ymm12 = _mm256_loadu_pd((double const *)(b11 + D_NR)); //B11[4-7][0] ymm9 = _mm256_loadu_pd((double const *)(b11 + cs_b)); //B11[0-3][1] ymm13 = _mm256_loadu_pd((double const *)(b11 + cs_b + D_NR)); //B11[4-7][1] ymm10 = _mm256_broadcast_sd((double const *)&ones); //B11[0-3][2] ymm14 = _mm256_broadcast_sd((double const *)&ones); //B11[4-7][2] ymm11 = _mm256_broadcast_sd((double const *)&ones); //B11[0-3][3] ymm15 = _mm256_broadcast_sd((double const *)&ones); //B11[4-7][3] } if(n_remainder == 1) { ymm8 = _mm256_loadu_pd((double const *)b11); //B11[0-3][0] ymm12 = _mm256_loadu_pd((double const *)(b11 + D_NR)); //B11[4-7][0] ymm9 = _mm256_broadcast_sd((double const *)&ones); //B11[0-3][1] ymm13 = _mm256_broadcast_sd((double const *)&ones); //B11[4-7][1] ymm10 = _mm256_broadcast_sd((double const *)&ones); //B11[0-3][2] ymm14 = _mm256_broadcast_sd((double const *)&ones); //B11[4-7][2] ymm11 = _mm256_broadcast_sd((double const *)&ones); //B11[0-3][3] ymm15 = _mm256_broadcast_sd((double const *)&ones); //B11[4-7][3] } ymm8 = _mm256_fmsub_pd(ymm8, ymm16, ymm0); //B11[0-3][0] * alpha -= B10[0-3][0] ymm9 = _mm256_fmsub_pd(ymm9, ymm16, ymm1); //B11[4-7][0] * alpha -= B10[4-7][0] ymm10 = _mm256_fmsub_pd(ymm10, ymm16, ymm2); //B11[0-3][1] * alpha -= B10[0-3][1] ymm11 = _mm256_fmsub_pd(ymm11, ymm16, ymm3); //B11[4-7][1] * alpha -= B10[4-7][1] ymm12 = _mm256_fmsub_pd(ymm12, ymm16, ymm4); //B11[0-3][2] * alpha -= B10[0-3][2] ymm13 = _mm256_fmsub_pd(ymm13, ymm16, ymm5); //B11[4-7][2] * alpha -= B10[4-7][2] ymm14 = _mm256_fmsub_pd(ymm14, ymm16, ymm6); //B11[0-3][3] * alpha -= B10[0-3][3] ymm15 = _mm256_fmsub_pd(ymm15, ymm16, ymm7); //B11[4-7][3] * alpha -= B10[4-7][3] ///implement TRSM/// ///read 4x4 block of A11/// ymm7 = _mm256_broadcast_sd((double const *)(&ones)); //1st col ymm0 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][0] //2nd col a11 += 1; ymm1 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 0)); //A11[0][1] ymm2 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 1)); //A11[1][1] //3rd col a11 += 1; ymm3 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 0)); //A11[0][2] ymm4 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 1)); //A11[1][2] ymm5 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 2)); //A11[2][2] //4th col a11 += 1; ymm6 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 3)); //A11[3][3] ymm2 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 0)); //A11[0][3] ymm5 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 1)); //A11[1][3] ymm6 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 2)); //A11[2][3] //(Row1): FMA operations ymm9 = _mm256_fnmadd_pd(ymm1, ymm8, ymm9); //B11[0-3][1] -= B11[0-3][0] * A11[0][1] ymm10 = _mm256_fnmadd_pd(ymm3, ymm8, ymm10); //B11[0-3][2] -= B11[0-3][0] * A11[0][2] ymm11 = _mm256_fnmadd_pd(ymm2, ymm8, ymm11); //B11[0-3][3] -= B11[0-3][0] * A11[0][3] ymm13 = _mm256_fnmadd_pd(ymm1, ymm12, ymm13); //B11[4-7][1] -= B11[4-7][0] * A11[0][1] ymm14 = _mm256_fnmadd_pd(ymm3, ymm12, ymm14); //B11[4-7][2] -= B11[4-7][0] * A11[0][2] ymm15 = _mm256_fnmadd_pd(ymm2, ymm12, ymm15); //B11[4-7][3] -= B11[4-7][0] * A11[0][3] //(Row2)FMA operations ymm10 = _mm256_fnmadd_pd(ymm4, ymm9, ymm10); //B11[0-3][2] -= B11[0-3][1] * A11[1][2] ymm11 = _mm256_fnmadd_pd(ymm5, ymm9, ymm11); //B11[0-3][3] -= B11[0-3][1] * A11[1][3] ymm14 = _mm256_fnmadd_pd(ymm4, ymm13, ymm14); //B11[4-7][2] -= B11[4-7][1] * A11[1][2] ymm15 = _mm256_fnmadd_pd(ymm5, ymm13, ymm15); //B11[4-7][3] -= B11[4-7][1] * A11[1][3] //(Row3)FMA operations ymm11 = _mm256_fnmadd_pd(ymm6, ymm10, ymm11); //B11[0-3][3] -= B11[0-3][2] * A11[2][3] ymm15 = _mm256_fnmadd_pd(ymm6, ymm14, ymm15); //B11[4-7][3] -= B11[4-7][2] * A11[2][3] if(n_remainder == 3) { _mm256_storeu_pd((double *)b11, ymm8); //store(B11[0-3][0]) _mm256_storeu_pd((double *)(b11 + D_NR), ymm12); //store(B11[4-7][0]) _mm256_storeu_pd((double *)(b11 + cs_b), ymm9); //store(B11[0-3][1]) _mm256_storeu_pd((double *)(b11 + cs_b + D_NR), ymm13); //store(B11[4-7][1]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0]), ymm10); //store(B11[0-3][2]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0] + D_NR), ymm14);//store(B11[4-7][2]) } if(n_remainder == 2) { _mm256_storeu_pd((double *)b11, ymm8); //store(B11[0-3][0]) _mm256_storeu_pd((double *)(b11 + D_NR), ymm12); //store(B11[4-7][0]) _mm256_storeu_pd((double *)(b11 + cs_b), ymm9); //store(B11[0-3][1]) _mm256_storeu_pd((double *)(b11 + cs_b + D_NR), ymm13); //store(B11[4-7][1]) } if(n_remainder == 1) { _mm256_storeu_pd((double *)b11, ymm8); //store(B11[0-3][0]) _mm256_storeu_pd((double *)(b11 + D_NR), ymm12); //store(B11[4-7][0]) } } } if((m & 4)) ///implementation for remainder rows(when m_remainder is a multiple of 4) { for(j = 0; (j+D_NR-1) D_BLIS_SMALL_MATRIX_THRES_TRSM_ROME) { return BLIS_NOT_YET_IMPLEMENTED; } #else if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_NAPLES) { return BLIS_NOT_YET_IMPLEMENTED; } #endif dim_t i, j, k; //loop variablse dim_t k_iter; //determines the number of GEMM operations to be done dim_t cs_b_offset[2]; //pre-calculated strides double ones = 1.0; double AlphaVal = *(double *)AlphaObj->buffer; //value of Alpha double *L = a->buffer; //pointer to matrix A double *B = b->buffer; //pointer to matrix B double *a01, *a11, *b10, *b11; //pointers for GEMM and TRSM blocks double *ptr_a01_dup; cs_b_offset[0] = cs_b << 1; //cs_b_offset[0] = cs_b * 2; cs_b_offset[1] = cs_b_offset[0] + cs_b;//cs_b_offset[1] = cs_b * 3; //ymm scratch reginsters __m256d ymm0, ymm1, ymm2, ymm3; __m256d ymm4, ymm5, ymm6, ymm7; __m256d ymm8, ymm9, ymm10, ymm11; __m256d ymm12, ymm13, ymm14, ymm15; __m256d ymm16; for(i = (m-D_MR); (i+1) > 0; i -= D_MR) //loop along 'M' direction { for(j = (n-D_NR); (j+1) > 0; j -= D_NR) //loop along 'N' direction { a01 = L + j*cs_a +(j+D_NR); //pointer to block of A to be used in GEMM a11 = L + j*cs_a + j; //pointer to block of A to be used for TRSM b10 = B + i + (j+D_NR)*cs_b; //pointer to block of B to be used in GEMM b11 = B + (i) + (j)*cs_b; //pointer to block of B to be used for TRSM k_iter = (n-j-D_NR) / D_NR; //number of GEMM operations to be done(in blocks of 4x4) ymm0 = _mm256_setzero_pd(); ymm1 = _mm256_setzero_pd(); ymm2 = _mm256_setzero_pd(); ymm3 = _mm256_setzero_pd(); ymm4 = _mm256_setzero_pd(); ymm5 = _mm256_setzero_pd(); ymm6 = _mm256_setzero_pd(); ymm7 = _mm256_setzero_pd(); ///GEMM implementation starts/// for(k = 0; k < k_iter; k++) //loop for number of GEMM operations { ptr_a01_dup = a01; //broadcast 1st row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 0)); //A01[0][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 1)); //A01[0][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 2)); //A01[0][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 3)); //A01[0][3] a01 += 1; //move to next row //load 8x2 block of B10 ymm12 = _mm256_loadu_pd((double const *)b10); //B10[0][0] B10[1][0] B10[2][0] B10[3][0] ymm13 = _mm256_loadu_pd((double const *)(b10 + D_NR)); //B10[4][0] B10[5][0] B10[6][0] B10[7][0] ymm14 = _mm256_loadu_pd((double const *)(b10 + cs_b)); //B10[0][1] B10[1][1] B10[2][1] B10[3][1] ymm15 = _mm256_loadu_pd((double const *)(b10 + cs_b + D_NR)); //B10[4][1] B10[5][1] B10[6][1] B10[7][1] ymm0 = _mm256_fmadd_pd(ymm8, ymm12, ymm0); //ymm0 += (B10[0][0]*A01[0][0] B10[1][0]*A01[0][0] B10[2][0]*A01[0][0] B10[3][0]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm12, ymm1); //ymm1 += (B10[0][0]*A01[0][1] B10[1][0]*A01[0][1] B10[2][0]*A01[0][1] B10[3][0]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm12, ymm2); //ymm2 += (B10[0][0]*A01[0][2] B10[1][0]*A01[0][2] B10[2][0]*A01[0][2] B10[3][0]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm12, ymm3); //ymm3 += (B10[0][0]*A01[0][3] B10[1][0]*A01[0][3] B10[2][0]*A01[0][3] B10[3][0]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm13, ymm4); //ymm4 += (B10[4][0]*A01[0][0] B10[5][0]*A01[0][0] B10[6][0]*A01[0][0] B10[7][0]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm13, ymm5); //ymm5 += (B10[4][0]*A01[0][1] B10[5][0]*A01[0][1] B10[6][0]*A01[0][1] B10[7][0]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm13, ymm6); //ymm6 += (B10[4][0]*A01[0][2] B10[5][0]*A01[0][2] B10[6][0]*A01[0][2] B10[7][0]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm13, ymm7); //ymm7 += (B10[4][0]*A01[0][3] B10[5][0]*A01[0][3] B10[6][0]*A01[0][3] B10[7][0]*A01[0][3]) //broadcast 2nd row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 0)); //A01[1][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 1)); //A01[1][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 2)); //A01[1][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 3)); //A01[1][3] a01 += 1; //move to next row of A ymm0 = _mm256_fmadd_pd(ymm8, ymm14, ymm0); //ymm0 += (B10[0][1]*A01[0][0] B10[1][1]*A01[0][0] B10[2][1]*A01[0][0] B10[3][1]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm14, ymm1); //ymm1 += (B10[0][1]*A01[0][1] B10[1][1]*A01[0][1] B10[2][1]*A01[0][1] B10[3][1]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm14, ymm2); //ymm2 += (B10[0][1]*A01[0][2] B10[1][1]*A01[0][2] B10[2][1]*A01[0][2] B10[3][1]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm14, ymm3); //ymm3 += (B10[0][1]*A01[0][3] B10[1][1]*A01[0][3] B10[2][1]*A01[0][3] B10[3][1]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm15, ymm4); //ymm4 += (B10[4][1]*A01[0][0] B10[5][1]*A01[0][0] B10[6][1]*A01[0][0] B10[7][1]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm15, ymm5); //ymm5 += (B10[4][1]*A01[0][1] B10[5][1]*A01[0][1] B10[6][1]*A01[0][1] B10[7][1]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm15, ymm6); //ymm6 += (B10[4][1]*A01[0][2] B10[5][1]*A01[0][2] B10[6][1]*A01[0][2] B10[7][1]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm15, ymm7); //ymm7 += (B10[4][1]*A01[0][3] B10[5][1]*A01[0][3] B10[6][1]*A01[0][3] B10[7][1]*A01[0][3]) //broadcast 3rd row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 0)); //A01[2][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 1)); //A01[2][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 2)); //A01[2][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 3)); //A01[2][3] a01 += 1; //move to next row of A01 //load next 8x2 block of B10 ymm12 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0])); //(B10[0][2] B10[1][2] B10[2][2] B10[3][2]) ymm13 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0] + D_NR)); //(B10[4][2] B10[5][2] B10[6][2] B10[7][2]) ymm14 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0] + cs_b)); //(B10[0][3] B10[1][3] B10[2][3] B10[3][3]) ymm15 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0] + cs_b + D_NR)); //(B10[4][3] B10[5][3] B10[6][3] B10[7][3]) ymm0 = _mm256_fmadd_pd(ymm8, ymm12, ymm0); //ymm0 += (B10[0][2]*A01[0][0] B10[1][2]*A01[0][0] B10[2][2]*A01[0][0] B10[3][2]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm12, ymm1); //ymm1 += (B10[0][2]*A01[0][1] B10[1][2]*A01[0][1] B10[2][2]*A01[0][1] B10[3][2]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm12, ymm2); //ymm2 += (B10[0][2]*A01[0][2] B10[1][2]*A01[0][2] B10[2][2]*A01[0][2] B10[3][2]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm12, ymm3); //ymm3 += (B10[0][2]*A01[0][3] B10[1][2]*A01[0][3] B10[2][2]*A01[0][3] B10[3][2]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm13, ymm4); //ymm4 += (B10[4][2]*A01[0][0] B10[5][2]*A01[0][0] B10[6][2]*A01[0][0] B10[7][2]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm13, ymm5); //ymm5 += (B10[4][2]*A01[0][1] B10[5][2]*A01[0][1] B10[6][2]*A01[0][1] B10[7][2]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm13, ymm6); //ymm6 += (B10[4][2]*A01[0][2] B10[5][2]*A01[0][2] B10[6][2]*A01[0][2] B10[7][2]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm13, ymm7); //ymm7 += (B10[4][2]*A01[0][3] B10[5][2]*A01[0][3] B10[6][2]*A01[0][3] B10[7][2]*A01[0][3]) //broadcast 4th row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 0)); //A01[3][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 1)); //A01[3][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 2)); //A01[3][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 3)); //A01[3][3] a01 += 1; //move to next row of A01 ymm0 = _mm256_fmadd_pd(ymm8, ymm14, ymm0); //ymm0 += (B10[0][3]*A01[0][0] B10[1][3]*A01[0][0] B10[2][3]*A01[0][0] B10[3][3]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm14, ymm1); //ymm1 += (B10[0][3]*A01[0][1] B10[1][3]*A01[0][1] B10[2][3]*A01[0][1] B10[3][3]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm14, ymm2); //ymm2 += (B10[0][3]*A01[0][2] B10[1][3]*A01[0][2] B10[2][3]*A01[0][2] B10[3][3]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm14, ymm3); //ymm3 += (B10[0][3]*A01[0][3] B10[1][3]*A01[0][3] B10[2][3]*A01[0][3] B10[3][3]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm15, ymm4); //ymm4 += (B10[4][3]*A01[0][0] B10[5][3]*A01[0][0] B10[6][3]*A01[0][0] B10[7][3]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm15, ymm5); //ymm5 += (B10[4][3]*A01[0][1] B10[5][3]*A01[0][1] B10[6][3]*A01[0][1] B10[7][3]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm15, ymm6); //ymm6 += (B10[4][3]*A01[0][2] B10[5][3]*A01[0][2] B10[6][3]*A01[0][2] B10[7][3]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm15, ymm7); //ymm7 += (B10[4][3]*A01[0][3] B10[5][3]*A01[0][3] B10[6][3]*A01[0][3] B10[7][3]*A01[0][3]) b10 += D_NR * cs_b; //pointer math to find next block of B for GEMM a01 = ptr_a01_dup + D_NR; //pointer math to find next block of A for GEMM } ///GEMM code ends/// ymm16 = _mm256_broadcast_sd((double const *)&AlphaVal); //load 8x4 block of B11 ymm8 = _mm256_loadu_pd((double const *)b11); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] ymm12 = _mm256_loadu_pd((double const *)(b11 + D_NR)); //B11[4][0] B11[5][0] B11[6][0] B11[7][0] ymm9 = _mm256_loadu_pd((double const *)(b11 + cs_b)); //B11[0][1] B11[1][1] B11[2][1] B11[3][1] ymm13 = _mm256_loadu_pd((double const *)(b11 + cs_b + D_NR)); //B11[4][1] B11[5][1] B11[6][1] B11[7][1] ymm10 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[0])); //B11[0][2] B11[1][2] B11[2][2] B11[3][2] ymm14 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[0] + D_NR)); //B11[4][2] B11[5][2] B11[6][2] B11[7][2] ymm11 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[1])); //B11[0][3] B11[1][3] B11[2][3] B11[3][3] ymm15 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[1] + D_NR)); //B11[4][3] B11[5][3] B11[6][3] B11[7][3] ymm8 = _mm256_fmsub_pd(ymm8, ymm16, ymm0); //B11[0-3][0] * alpha -= ymm0 ymm9 = _mm256_fmsub_pd(ymm9, ymm16, ymm1); //B11[4-7][0] * alpha-= ymm1 ymm10 = _mm256_fmsub_pd(ymm10, ymm16, ymm2); //B11[0-3][1] * alpha-= ymm2 ymm11 = _mm256_fmsub_pd(ymm11, ymm16, ymm3); //B11[4-7][1] * alpha -= ymm3 ymm12 = _mm256_fmsub_pd(ymm12, ymm16, ymm4); //B11[0-3][2] * alpha -= ymm4 ymm13 = _mm256_fmsub_pd(ymm13, ymm16, ymm5); //B11[4-7][2] * alpha -= ymm5 ymm14 = _mm256_fmsub_pd(ymm14, ymm16, ymm6); //B11[0-3][3] * alpha -= ymm6 ymm15 = _mm256_fmsub_pd(ymm15, ymm16, ymm7); //B11[4-7][3] * alpha -= ymm7 ///implement TRSM/// ///read 4x4 block of A11/// ymm7 = _mm256_broadcast_sd((double const *)(&ones)); //1st col ymm0 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][0] //2nd col a11 += 1; ymm1 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 0)); //A11[0][1] ymm2 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 1)); //A11[1][1] //3rd col a11 += 1; ymm3 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 0)); //A11[0][2] ymm4 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 1)); //A11[1][2] ymm5 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 2)); //A11[2][2] //4th col a11 += 1; ymm6 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 3)); //A11[3][3] //compute reciprocals of L(i,i) and broadcast in registers ymm0 = _mm256_unpacklo_pd(ymm0, ymm2); //A11[0][0] A11[1][1] A11[0][0] A11[1][1] ymm2 = _mm256_unpacklo_pd(ymm5, ymm6); //A11[2][2] A11[3][3] A11[1][1] A11[3][3] ymm0 = _mm256_blend_pd(ymm0, ymm2, 0x0C); //A11[0][0] A11[1][1] A11[2][2] A11[3][3] ymm7 = _mm256_div_pd(ymm7, ymm0); //(1/A11[0][0] 1/A11[1][1] 1/A11[2][2] 1/A11[3][3]) ymm2 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 0)); //A11[0][3] ymm5 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 1)); //A11[1][3] ymm6 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 2)); //A11[2][3] //extract a33 ymm0 = _mm256_permute_pd(ymm7, 0x0C); //(1/A11[0][0] 1/A11[0][0] 1/A11[3][3] 1/A11[3][3] 1/A11[3][3]) ymm0 = _mm256_permute2f128_pd(ymm0, ymm0, 0x11);//(1/A11[3][3] 1/A11[3][3] 1/A11[3][3] 1/A11[3][3]) ymm11 = _mm256_mul_pd(ymm11, ymm0); ymm15 = _mm256_mul_pd(ymm15, ymm0); //extract a22 ymm0 = _mm256_permute_pd(ymm7, 0x00); //(1/A11[0][0] 1/A11[0][0] 1/A11[2][2] 1/A11[2][2]) ymm0 = _mm256_permute2f128_pd(ymm0, ymm0, 0x11);//(1/A11[2][2] 1/A11[2][2] 1/A11[2][2] 1/A11[2][2]) //(row 3):FMA operations ymm10 = _mm256_fnmadd_pd(ymm11, ymm6, ymm10); ymm9 = _mm256_fnmadd_pd(ymm11, ymm5, ymm9); ymm8 = _mm256_fnmadd_pd(ymm11, ymm2, ymm8); ymm14 = _mm256_fnmadd_pd(ymm15, ymm6, ymm14); ymm13 = _mm256_fnmadd_pd(ymm15, ymm5, ymm13); ymm12 = _mm256_fnmadd_pd(ymm15, ymm2, ymm12); ymm10 = _mm256_mul_pd(ymm10, ymm0); ymm14 = _mm256_mul_pd(ymm14, ymm0); //extract a11 ymm0 = _mm256_permute_pd(ymm7, 0x03); //(1/A11[1][1] 1/A11[1][1] 1/A11[2][2] 1/A11[2][2]) ymm0 = _mm256_permute2f128_pd(ymm0, ymm0, 0x00);//(1/A11[1][1] 1/A11[1][1] 1/A11[1][1] 1/A11[1][1]) //(Row 2): FMA operations ymm9 = _mm256_fnmadd_pd(ymm10, ymm4, ymm9); ymm8 = _mm256_fnmadd_pd(ymm10, ymm3, ymm8); ymm13 = _mm256_fnmadd_pd(ymm14, ymm4, ymm13); ymm12 = _mm256_fnmadd_pd(ymm14, ymm3, ymm12); ymm9 = _mm256_mul_pd(ymm9, ymm0); ymm13 = _mm256_mul_pd(ymm13, ymm0); //extract a00 ymm0 = _mm256_permute_pd(ymm7, 0x00); //(1/A11[0][0] 1/A11[0][0] 1/A11[2][2] 1/A11[2][2]) ymm0 = _mm256_permute2f128_pd(ymm0, ymm0, 0x00); //(1/A11[0][0] 1/A11[0][0] 1/A11[0][0] 1/A11[0][0]) //(Row 1): FMA operations ymm8 = _mm256_fnmadd_pd(ymm9, ymm1, ymm8); ymm12 = _mm256_fnmadd_pd(ymm13, ymm1, ymm12); ymm8 = _mm256_mul_pd(ymm8, ymm0); //B11[0-3][0] /= A11[0][0] ymm12 = _mm256_mul_pd(ymm12, ymm0); //B11[4-7][0] /= A11[0][0] _mm256_storeu_pd((double *)b11, ymm8); //store(B11[0-3][0]) _mm256_storeu_pd((double *)(b11 + D_NR), ymm12); //store(B11[4-7][0]) _mm256_storeu_pd((double *)(b11 + cs_b), ymm9); //store(B11[0-3][1]) _mm256_storeu_pd((double *)(b11 + cs_b + D_NR), ymm13); //store(B11[4-7][1]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0]), ymm10); //store(B11[0-3][2]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0] + D_NR), ymm14); //store(B11[4-7][2]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0] + cs_b), ymm11); //store(B11[0-3][3]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0] + cs_b + D_NR), ymm15);//store(B11[4-7][3]) } if(n_remainder) //implementation for remainder columns(when n is not multiple of D_NR) { a01 = L + j*cs_a + (j+D_NR); //pointer to block of A to be used for GEMM a11 = L + j*cs_a + j; //pointer to block of A to be used for TRSM b10 = B + i + (j + D_NR)*cs_b; //pointer to block of B to be used for GEMM b11 = B + i + j*cs_b; //pointer to block of B to be used for TRSM k_iter = (n-j-D_NR) / D_NR; //number of GEMM operations to be performed(in blocks of 4x4) ///load 4x4 block of b11 ymm0 = _mm256_setzero_pd(); ymm1 = _mm256_setzero_pd(); ymm2 = _mm256_setzero_pd(); ymm3 = _mm256_setzero_pd(); ymm4 = _mm256_setzero_pd(); ymm5 = _mm256_setzero_pd(); ymm6 = _mm256_setzero_pd(); ymm7 = _mm256_setzero_pd(); ///GEMM implementation begins/// for(k = 0; k < k_iter; k++) ///loop for number of GEMM operations { ptr_a01_dup = a01; //broadcast 1st row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 0)); //A01[0][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 1)); //A01[0][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 2)); //A01[0][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 3)); //A01[0][3] a01 += 1; //move to next row of A //load 8x2 block of B10 ymm12 = _mm256_loadu_pd((double const *)b10); //B10[0][0] B10[1][0] B10[2][0] B10[3][0] ymm13 = _mm256_loadu_pd((double const *)(b10 + D_NR)); //B10[4][0] B10[5][0] B10[6][0] B10[7][0] ymm14 = _mm256_loadu_pd((double const *)(b10 + cs_b)); //B10[0][1] B10[1][1] B10[2][1] B10[3][1] ymm15 = _mm256_loadu_pd((double const *)(b10 + cs_b + D_NR));//B10[4][1] B10[5][1] B10[6][1] B10[7][1] ymm0 = _mm256_fmadd_pd(ymm8, ymm12, ymm0); //ymm0 += (B10[0][0]*A01[0][0] B10[1][0]*A01[0][0] B10[2][0]*A01[0][0] B10[3][0]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm12, ymm1); //ymm1 += (B10[0][0]*A01[0][1] B10[1][0]*A01[0][1] B10[2][0]*A01[0][1] B10[3][0]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm12, ymm2); //ymm2 += (B10[0][0]*A01[0][2] B10[1][0]*A01[0][2] B10[2][0]*A01[0][2] B10[3][0]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm12, ymm3); //ymm3 += (B10[0][0]*A01[0][3] B10[1][0]*A01[0][3] B10[2][0]*A01[0][3] B10[3][0]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm13, ymm4); //ymm4 += (B10[4][0]*A01[0][0] B10[5][0]*A01[0][0] B10[6][0]*A01[0][0] B10[7][0]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm13, ymm5); //ymm5 += (B10[4][0]*A01[0][1] B10[5][0]*A01[0][1] B10[6][0]*A01[0][1] B10[7][0]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm13, ymm6); //ymm6 += (B10[4][0]*A01[0][2] B10[5][0]*A01[0][2] B10[6][0]*A01[0][2] B10[7][0]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm13, ymm7); //ymm7 += (B10[4][0]*A01[0][3] B10[5][0]*A01[0][3] B10[6][0]*A01[0][3] B10[7][0]*A01[0][3]) //broadcast 2nd row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 0)); //A01[1][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 1)); //A01[1][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 2)); //A01[1][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 3)); //A01[1][3] a01 += 1; //move to next row of A ymm0 = _mm256_fmadd_pd(ymm8, ymm14, ymm0); //ymm0 += (B10[0][1]*A01[0][0] B10[1][1]*A01[0][0] B10[2][1]*A01[0][0] B10[3][1]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm14, ymm1); //ymm1 += (B10[0][1]*A01[0][1] B10[1][1]*A01[0][1] B10[2][1]*A01[0][1] B10[3][1]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm14, ymm2); //ymm2 += (B10[0][1]*A01[0][2] B10[1][1]*A01[0][2] B10[2][1]*A01[0][2] B10[3][1]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm14, ymm3); //ymm3 += (B10[0][1]*A01[0][3] B10[1][1]*A01[0][3] B10[2][1]*A01[0][3] B10[3][1]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm15, ymm4); //ymm4 += (B10[4][1]*A01[0][0] B10[5][1]*A01[0][0] B10[6][1]*A01[0][0] B10[7][1]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm15, ymm5); //ymm5 += (B10[4][1]*A01[0][1] B10[5][1]*A01[0][1] B10[6][1]*A01[0][1] B10[7][1]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm15, ymm6); //ymm6 += (B10[4][1]*A01[0][2] B10[5][1]*A01[0][2] B10[6][1]*A01[0][2] B10[7][1]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm15, ymm7); //ymm7 += (B10[4][1]*A01[0][3] B10[5][1]*A01[0][3] B10[6][1]*A01[0][3] B10[7][1]*A01[0][3]) //broadcast 3rd row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 0)); //A01[2][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 1)); //A01[2][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 2)); //A01[2][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 3)); //A01[2][3] a01 += 1; //move to next row of A //load next 8x2 block of B10 ymm12 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0])); //(B10[0][2] B10[1][2] B10[2][2] B10[3][2]) ymm13 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0] + D_NR)); //(B10[4][2] B10[5][2] B10[6][2] B10[7][2]) ymm14 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0] + cs_b)); //(B10[0][3] B10[1][3] B10[2][3] B10[3][3]) ymm15 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0] + cs_b + D_NR)); //(B10[4][3] B10[5][3] B10[6][3] B10[7][3]) ymm0 = _mm256_fmadd_pd(ymm8, ymm12, ymm0); //ymm0 += (B10[0][2]*A01[0][0] B10[1][2]*A01[0][0] B10[2][2]*A01[0][0] B10[3][2]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm12, ymm1); //ymm1 += (B10[0][2]*A01[0][1] B10[1][2]*A01[0][1] B10[2][2]*A01[0][1] B10[3][2]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm12, ymm2); //ymm2 += (B10[0][2]*A01[0][2] B10[1][2]*A01[0][2] B10[2][2]*A01[0][2] B10[3][2]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm12, ymm3); //ymm3 += (B10[0][2]*A01[0][3] B10[1][2]*A01[0][3] B10[2][2]*A01[0][3] B10[3][2]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm13, ymm4); //ymm4 += (B10[4][2]*A01[0][0] B10[5][2]*A01[0][0] B10[6][2]*A01[0][0] B10[7][2]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm13, ymm5); //ymm5 += (B10[4][2]*A01[0][1] B10[5][2]*A01[0][1] B10[6][2]*A01[0][1] B10[7][2]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm13, ymm6); //ymm6 += (B10[4][2]*A01[0][2] B10[5][2]*A01[0][2] B10[6][2]*A01[0][2] B10[7][2]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm13, ymm7); //ymm7 += (B10[4][2]*A01[0][3] B10[5][2]*A01[0][3] B10[6][2]*A01[0][3] B10[7][2]*A01[0][3]) //broadcast 4th row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 0)); //A01[3][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 1)); //A01[3][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 2)); //A01[3][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 3)); //A01[3][3] a01 += 1; //move to next row of A ymm0 = _mm256_fmadd_pd(ymm8, ymm14, ymm0); //ymm0 += (B10[0][3]*A01[0][0] B10[1][3]*A01[0][0] B10[2][3]*A01[0][0] B10[3][3]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm14, ymm1); //ymm1 += (B10[0][3]*A01[0][1] B10[1][3]*A01[0][1] B10[2][3]*A01[0][1] B10[3][3]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm14, ymm2); //ymm2 += (B10[0][3]*A01[0][2] B10[1][3]*A01[0][2] B10[2][3]*A01[0][2] B10[3][3]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm14, ymm3); //ymm3 += (B10[0][3]*A01[0][3] B10[1][3]*A01[0][3] B10[2][3]*A01[0][3] B10[3][3]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm15, ymm4); //ymm4 += (B10[4][3]*A01[0][0] B10[5][3]*A01[0][0] B10[6][3]*A01[0][0] B10[7][3]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm15, ymm5); //ymm5 += (B10[4][3]*A01[0][1] B10[5][3]*A01[0][1] B10[6][3]*A01[0][1] B10[7][3]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm15, ymm6); //ymm6 += (B10[4][3]*A01[0][2] B10[5][3]*A01[0][2] B10[6][3]*A01[0][2] B10[7][3]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm15, ymm7); //ymm7 += (B10[4][3]*A01[0][3] B10[5][3]*A01[0][3] B10[6][3]*A01[0][3] B10[7][3]*A01[0][3]) b10 += D_NR * cs_b; //pointer math to find next block of B for GEMM a01 = ptr_a01_dup + D_NR; //pointer math to find next block of A for GEMM } ///GEMM code ends/// ymm16 = _mm256_broadcast_sd((double const *)&AlphaVal); //subtract the calculated GEMM block from current TRSM block //load 8x4 block of B11 if(n_remainder == 3) { ymm8 = _mm256_broadcast_sd((double const *)&ones); //B11[0-3][3] ymm12 = _mm256_broadcast_sd((double const *)&ones); //B11[4-7][3] ymm9 = _mm256_loadu_pd((double const *)(b11+cs_b)); //B11[0-3][0] ymm13 = _mm256_loadu_pd((double const *)(b11 + cs_b + D_NR)); //B11[4-7][0] ymm10 = _mm256_loadu_pd((double const *)(b11 + cs_b*2)); //B11[0-3][1] ymm14 = _mm256_loadu_pd((double const *)(b11 + cs_b*2 + D_NR)); //B11[4-7][1] ymm11 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[1])); //B11[0-3][2] ymm15 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[1] + D_NR)); //B11[4-7][2] } if(n_remainder == 2) { ymm8 = _mm256_broadcast_sd((double const *)&ones); //B11[0-3][2] ymm12 = _mm256_broadcast_sd((double const *)&ones); //B11[4-7][2] ymm9 = _mm256_broadcast_sd((double const *)&ones); //B11[0-3][3] ymm13 = _mm256_broadcast_sd((double const *)&ones); //B11[4-7][3] ymm10 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[0])); //B11[0-3][0] ymm14 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[0] + D_NR)); //B11[4-7][0] ymm11 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[1])); //B11[0-3][1] ymm15 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[1] + D_NR)); //B11[4-7][1] } if(n_remainder == 1) { ymm8 = _mm256_broadcast_sd((double const *)&ones); //B11[0-3][1] ymm12 = _mm256_broadcast_sd((double const *)&ones); //B11[4-7][1] ymm9 = _mm256_broadcast_sd((double const *)&ones); //B11[0-3][2] ymm13 = _mm256_broadcast_sd((double const *)&ones); //B11[4-7][2] ymm10 = _mm256_broadcast_sd((double const *)&ones); //B11[0-3][3] ymm14 = _mm256_broadcast_sd((double const *)&ones); //B11[4-7][3] ymm11 = _mm256_loadu_pd((double const *)(b11+cs_b_offset[1])); //B11[0-3][0] ymm15 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[1] +D_NR)); //B11[4-7][0] } ymm8 = _mm256_fmsub_pd(ymm8, ymm16, ymm0); //B11[0-3][0] * alpha -= B10[0-3][0] ymm9 = _mm256_fmsub_pd(ymm9, ymm16, ymm1); //B11[4-7][0] * alpha -= B10[4-7][0] ymm10 = _mm256_fmsub_pd(ymm10, ymm16, ymm2); //B11[0-3][1] * alpha -= B10[0-3][1] ymm11 = _mm256_fmsub_pd(ymm11, ymm16, ymm3); //B11[4-7][1] * alpha -= B10[4-7][1] ymm12 = _mm256_fmsub_pd(ymm12, ymm16, ymm4); //B11[0-3][2] * alpha -= B10[0-3][2] ymm13 = _mm256_fmsub_pd(ymm13, ymm16, ymm5); //B11[4-7][2] * alpha -= B10[4-7][2] ymm14 = _mm256_fmsub_pd(ymm14, ymm16, ymm6); //B11[0-3][3] * alpha -= B10[0-3][3] ymm15 = _mm256_fmsub_pd(ymm15, ymm16, ymm7); //B11[4-7][3] * alpha -= B10[4-7][3] ///implement TRSM/// ///read 4x4 block of A11/// ymm7 = _mm256_broadcast_sd((double const *)(&ones)); //1st col ymm0 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][0] //2nd col a11 += 1; ymm1 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 0)); //A11[0][1] ymm2 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 1)); //A11[1][1] //3rd col a11 += 1; ymm3 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 0)); //A11[0][2] ymm4 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 1)); //A11[1][2] ymm5 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 2)); //A11[2][2] //4th col a11 += 1; ymm6 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 3)); //A11[3][3] //compute reciprocals of L(i,i) and broadcast in registers ymm0 = _mm256_unpacklo_pd(ymm0, ymm2); //A11[0][0] A11[1][1] A11[0][0] A11[1][1] ymm2 = _mm256_unpacklo_pd(ymm5, ymm6); //A11[2][2] A11[3][3] A11[1][1] A11[3][3] ymm0 = _mm256_blend_pd(ymm0, ymm2, 0x0C); //A11[0][0] A11[1][1] A11[2][2] A11[3][3] ymm7 = _mm256_div_pd(ymm7, ymm0); //(1/A11[0][0] 1/A11[1][1] 1/A11[2][2] 1/A11[3][3]) ymm2 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 0)); //A11[0][3] ymm5 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 1)); //A11[1][3] ymm6 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 2)); //A11[2][3] //extract a33 ymm0 = _mm256_permute_pd(ymm7, 0x0C); //(1/A11[0][0] 1/A11[0][0] 1/A11[3][3] 1/A11[3][3] 1/A11[3][3]) ymm0 = _mm256_permute2f128_pd(ymm0, ymm0, 0x11);//(1/A11[3][3] 1/A11[3][3] 1/A11[3][3] 1/A11[3][3]) ymm11 = _mm256_mul_pd(ymm11, ymm0); ymm15 = _mm256_mul_pd(ymm15, ymm0); //extract a22 ymm0 = _mm256_permute_pd(ymm7, 0x00); //(1/A11[0][0] 1/A11[0][0] 1/A11[2][2] 1/A11[2][2]) ymm0 = _mm256_permute2f128_pd(ymm0, ymm0, 0x11);//(1/A11[2][2] 1/A11[2][2] 1/A11[2][2] 1/A11[2][2]) //(row 3):FMA operations ymm10 = _mm256_fnmadd_pd(ymm11, ymm6, ymm10); ymm9 = _mm256_fnmadd_pd(ymm11, ymm5, ymm9); ymm8 = _mm256_fnmadd_pd(ymm11, ymm2, ymm8); ymm14 = _mm256_fnmadd_pd(ymm15, ymm6, ymm14); ymm13 = _mm256_fnmadd_pd(ymm15, ymm5, ymm13); ymm12 = _mm256_fnmadd_pd(ymm15, ymm2, ymm12); ymm10 = _mm256_mul_pd(ymm10, ymm0); ymm14 = _mm256_mul_pd(ymm14, ymm0); //extract a11 ymm0 = _mm256_permute_pd(ymm7, 0x03); //(1/A11[1][1] 1/A11[1][1] 1/A11[2][2] 1/A11[2][2]) ymm0 = _mm256_permute2f128_pd(ymm0, ymm0, 0x00);//(1/A11[1][1] 1/A11[1][1] 1/A11[1][1] 1/A11[1][1]) //(Row 2): FMA operations ymm9 = _mm256_fnmadd_pd(ymm10, ymm4, ymm9); ymm8 = _mm256_fnmadd_pd(ymm10, ymm3, ymm8); ymm13 = _mm256_fnmadd_pd(ymm14, ymm4, ymm13); ymm12 = _mm256_fnmadd_pd(ymm14, ymm3, ymm12); ymm9 = _mm256_mul_pd(ymm9, ymm0); ymm13 = _mm256_mul_pd(ymm13, ymm0); //extract a00 ymm0 = _mm256_permute_pd(ymm7, 0x00); //(1/A11[0][0] 1/A11[0][0] 1/A11[2][2] 1/A11[2][2]) ymm0 = _mm256_permute2f128_pd(ymm0, ymm0, 0x00); //(1/A11[0][0] 1/A11[0][0] 1/A11[0][0] 1/A11[0][0]) //(Row 1): FMA operations ymm8 = _mm256_fnmadd_pd(ymm9, ymm1, ymm8); ymm12 = _mm256_fnmadd_pd(ymm13, ymm1, ymm12); ymm8 = _mm256_mul_pd(ymm8, ymm0); //B11[0-3][0] /= A11[0][0] ymm12 = _mm256_mul_pd(ymm12, ymm0); //B11[4-7][0] /= A11[0][0] if(n_remainder == 3) { _mm256_storeu_pd((double *)(b11 + cs_b), ymm9); //store(B11[0-3][1]) _mm256_storeu_pd((double *)(b11 + cs_b + D_NR), ymm13); //store(B11[4-7][1]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0]), ymm10); //store(B11[0-3][2]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0] + D_NR), ymm14);//store(B11[4-7][2]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[1]), ymm11); //store(B11[0-3][0]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[1] + D_NR), ymm15); //store(B11[4-7][0]) } if(n_remainder == 2) { _mm256_storeu_pd((double *)(b11 + cs_b_offset[0]), ymm10); //store(B11[0-3][1]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0] + D_NR), ymm14); //store(B11[4-7][1]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[1]), ymm11); //store(B11[0-3][0]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[1] + D_NR), ymm15); //store(B11[4-7][0]) } if(n_remainder == 1) { _mm256_storeu_pd((double *)(b11+ cs_b_offset[1]), ymm11); //store(B11[0-3][0]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[1] + D_NR), ymm15); //store(B11[4-7][0]) } } } if(i<0) i += D_NR; if((m & 4)) ///implementation for remainder rows(when m_remainder is a multiple of 4) { for(j = (n-D_NR); (j+1) > 0; j -=D_NR) //loop along n direction { a01 = L + j*cs_a + (j+D_NR); //pointer to block of A to be used for GEMM a11 = L + j*cs_a + j; //pointer to block of A to be used for TRSM b10 = B + i + (j+D_NR)*cs_b; //pointer to block of B to be used for GEMM b11 = B + i + j*cs_b; //pointer to block of B to be used for TRSM k_iter = (n-j-D_NR) / D_NR; //number of times GEMM operations to be performed(in blocks of 4x4) ymm15 = _mm256_broadcast_sd((double const *)&AlphaVal); //register to store alpha ///GEMM for previous blocks /// ///load 4x4 block of b11 ymm0 = _mm256_loadu_pd((double const *)b11); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] ymm1 = _mm256_loadu_pd((double const *)(b11 + cs_b)); //B11[0][1] B11[1][1] B11[2][1] B11[3][1] ymm2 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[0])); //B11[0][2] B11[1][2] B11[2][2] B11[3][2] ymm3 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[1])); //B11[0][3] B11[1][3] B11[2][3] B11[3][3] //multiply by alpha ymm0 = _mm256_mul_pd(ymm0, ymm15); //B11[x][0] *= alpha ymm1 = _mm256_mul_pd(ymm1, ymm15); //B11[x][1] *=alpha ymm2 = _mm256_mul_pd(ymm2, ymm15); //B11[x][2] *= alpha ymm3 = _mm256_mul_pd(ymm3, ymm15); //B11[x][3] *= alpha ymm4 = _mm256_setzero_pd(); ymm5 = _mm256_setzero_pd(); ymm6 = _mm256_setzero_pd(); ymm7 = _mm256_setzero_pd(); ///GEMM implementation starts/// for(k = 0; k < k_iter; k++) //loop for number of GEMM operations { ptr_a01_dup = a01; //load 4x4 bblock of b10 ymm8 = _mm256_loadu_pd((double const *)b10); //B10[0][0] B10[1][0] B10[2][0] B10[3][0] ymm9 = _mm256_loadu_pd((double const *)(b10 + cs_b)); //B10[0][1] B10[1][1] B10[2][1] B10[3][1] ymm10 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0])); //B10[0][2] B10[1][2] B10[2][2] B10[3][2] ymm11 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[1])); //B10[0][3] B10[1][3] B10[2][3] B10[3][3] //broadcast 1st row of A01 ymm12 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 0)); //A01[0][0] ymm13 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 1)); //A01[0][1] ymm14 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 2)); //A01[0][2] ymm15 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 3)); //A01[0][3] a01 += 1; //move to next row of A ymm4 = _mm256_fmadd_pd(ymm12, ymm8, ymm4); //ymm4 += (B10[0][0]*A01[0][0] B10[1][0]*A01[0][0] B10[2][0]*A01[0][0] B10[3][0]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm13, ymm8, ymm5); //ymm5 += (B10[0][0]*A01[0][1] B10[1][0]*A01[0][1] B10[2][0]*A01[0][1] B10[3][0]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm14, ymm8, ymm6); //ymm6 += (B10[0][0]*A01[0][2] B10[1][0]*A01[0][2] B10[2][0]*A01[0][2] B10[3][0]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm15, ymm8, ymm7); //ymm7 += (B10[0][0]*A01[0][3] B10[1][0]*A01[0][3] B10[2][0]*A01[0][3] B10[3][0]*A01[0][3]) //broadcast 2nd row of A01 ymm12 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 0)); //A01[1][0] ymm13 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 1)); //A01[1][1] ymm14 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 2)); //A01[1][2] ymm15 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 3)); //A01[1][3] a01 += 1; //move to next row of A ymm4 = _mm256_fmadd_pd(ymm12, ymm9, ymm4); //ymm4 += (B10[0][1]*A01[1][0] B10[1][1]*A01[1][0] B10[2][1]*A01[1][0] B10[3][1]*A01[1][0]) ymm5 = _mm256_fmadd_pd(ymm13, ymm9, ymm5); //ymm5 += (B10[0][1]*A01[1][1] B10[1][1]*A01[1][1] B10[2][1]*A01[1][1] B10[3][1]*A01[1][1]) ymm6 = _mm256_fmadd_pd(ymm14, ymm9, ymm6); //ymm6 += (B10[0][1]*A01[1][2] B10[1][1]*A01[1][2] B10[2][1]*A01[1][2] B10[3][1]*A01[1][2]) ymm7 = _mm256_fmadd_pd(ymm15, ymm9, ymm7); //ymm7 += (B10[0][1]*A01[1][3] B10[1][1]*A01[1][3] B10[2][1]*A01[1][3] B10[3][1]*A01[1][3]) //braodcast 3rd row of A01 ymm12 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 0)); //A01[2][0] ymm13 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 1)); //A01[2][1] ymm14 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 2)); //A01[2][2] ymm15 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 3)); //A01[2][3] a01 += 1; //move to next row of A ymm4 = _mm256_fmadd_pd(ymm12, ymm10, ymm4); //ymm4 += (B10[0][2]*A01[2][0] B10[1][2]*A01[2][0] B10[2][2]*A01[2][0] B10[3][2]*A01[2][0]) ymm5 = _mm256_fmadd_pd(ymm13, ymm10, ymm5); //ymm5 += (B10[0][2]*A01[2][1] B10[1][2]*A01[2][1] B10[2][2]*A01[2][1] B10[3][2]*A01[2][1]) ymm6 = _mm256_fmadd_pd(ymm14, ymm10, ymm6); //ymm6 += (B10[0][2]*A01[2][2] B10[1][2]*A01[2][2] B10[2][2]*A01[2][2] B10[3][2]*A01[2][2]) ymm7 = _mm256_fmadd_pd(ymm15, ymm10, ymm7); //ymm7 += (B10[0][2]*A01[2][3] B10[1][2]*A01[2][3] B10[2][2]*A01[2][3] B10[3][2]*A01[2][3]) //broadcast 4th row of A01 ymm12 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 0)); //A01[3][0] ymm13 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 1)); //A01[3][1] ymm14 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 2)); //A01[3][2] ymm15 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 3)); //A01[3][3] a01 += 1; //move to next row of A ymm4 = _mm256_fmadd_pd(ymm12, ymm11, ymm4); //ymm4 += (B10[0][3]*A01[3][0] B10[1][3]*A01[3][0] B10[2][3]*A01[3][0] B10[3][3]*A01[3][0]) ymm5 = _mm256_fmadd_pd(ymm13, ymm11, ymm5); //ymm5 += (B10[0][3]*A01[3][1] B10[1][3]*A01[3][1] B10[2][3]*A01[3][1] B10[3][3]*A01[3][1]) ymm6 = _mm256_fmadd_pd(ymm14, ymm11, ymm6); //ymm6 += (B10[0][3]*A01[3][2] B10[1][3]*A01[3][2] B10[2][3]*A01[3][2] B10[3][3]*A01[3][2]) ymm7 = _mm256_fmadd_pd(ymm15, ymm11, ymm7); //ymm7 += (B10[0][3]*A01[3][3] B10[1][3]*A01[3][3] B10[2][3]*A01[3][3] B10[3][3]*A01[3][3]) b10 += D_NR * cs_b; //pointer math to find next block of B for GEMM a01 = ptr_a01_dup + D_NR; //pointer math to find next block of A for GEMM } ///GEMM code end/// ymm0 = _mm256_sub_pd(ymm0, ymm4); //B11[x][0] -=ymm4 ymm1 = _mm256_sub_pd(ymm1, ymm5); //B11[x][1] -= ymm5 ymm2 = _mm256_sub_pd(ymm2, ymm6); //B11[x][2] -= ymm6 ymm3 = _mm256_sub_pd(ymm3, ymm7); //B11[x][3] -= ymm7 ///implement TRSM/// ///read 4x4 block of A11/// //1st col ymm4 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][0] ymm5 = _mm256_broadcast_sd((double const *)(a11+1)); //A11[0][0] ymm6 = _mm256_broadcast_sd((double const *)(a11+2)); //A11[0][0] ymm7 = _mm256_broadcast_sd((double const *)(a11+3)); //A11[0][0] //2nd col a11 += cs_a; ymm8 = _mm256_broadcast_sd((double const *)(a11+1)); //A11[0][1] ymm9 = _mm256_broadcast_sd((double const *)(a11+2)); //A11[1][1] ymm10 = _mm256_broadcast_sd((double const *)(a11+3)); //A11[1][1] //3rd col a11 += cs_a; ymm11 = _mm256_broadcast_sd((double const *)(a11+2)); //A11[0][2] ymm12 = _mm256_broadcast_sd((double const *)(a11+3)); //A11[1][2] //4th col a11 += cs_a; ymm13 = _mm256_broadcast_sd((double const *)(a11+3)); //A11[0][3] ymm14 = _mm256_broadcast_sd((double const *)&ones); //compute reciprocals of A(i,i) and broadcast in registers ymm4 = _mm256_unpacklo_pd(ymm4, ymm8); //A11[0][0] A11[1][1] A11[0][0] A11[1][1] ymm8 = _mm256_unpacklo_pd(ymm11, ymm13); //A11[2][2] A11[3][3] A11[2][2] A11[3][3] ymm15 = _mm256_blend_pd(ymm4, ymm8, 0x0C); //A11[0][0] A11[1][1] A11[2][2] A11[3][3] ymm14 = _mm256_div_pd(ymm14, ymm15); // 1/A11[0][0] 1/A11[1][1] 1/A11[2][2] 1/A11[3][3] //extract a33 ymm15 = _mm256_permute_pd(ymm14, 0x0C); //(1/A11[0][0] 1/A11[0][0] 1/A11[3][3] 1/A11[3][3]) ymm15 = _mm256_permute2f128_pd(ymm15, ymm15, 0x11); //(1/A11[3][3] 1/A11[3][3] 1/A11[3][3] 1/A11[3][3]) ymm3 = _mm256_mul_pd(ymm3, ymm15); //extract a22 ymm15 = _mm256_permute_pd(ymm14, 0x00); //(1/A11[0][0] 1/A11[0][0] 1/A11[2][2] 1/A11[2][2]) ymm15 = _mm256_permute2f128_pd(ymm15, ymm15, 0x11); //(1/A11[2][2] 1/A11[2][2] 1/A11[2][2] 1/A11[2][2]) //(Row 3): FMA operations ymm2 = _mm256_fnmadd_pd(ymm3, ymm12, ymm2); ymm1 = _mm256_fnmadd_pd(ymm3, ymm10, ymm1); ymm0 = _mm256_fnmadd_pd(ymm3, ymm7, ymm0); ymm2 = _mm256_mul_pd(ymm2, ymm15); //extract a11 ymm15 = _mm256_permute_pd(ymm14, 0x03); //(1/A11[1][1] 1/A11[1][1] 1/A11[2][2] 1/A11[2][2]) ymm15 = _mm256_permute2f128_pd(ymm15, ymm15, 0x00); //(1/A11[1][1] 1/A11[1][1] 1/A11[1][1] 1/A11[1][1]) //(ROW 2): FMA operations ymm1 = _mm256_fnmadd_pd(ymm2, ymm9, ymm1); ymm0 = _mm256_fnmadd_pd(ymm2, ymm6, ymm0); ymm1 = _mm256_mul_pd(ymm1, ymm15); //extract A00 ymm15 = _mm256_permute_pd(ymm14, 0x00); //(1/A11[0][0] 1/A11[0][0] 1/A11[2][2] 1/A11[2][2]) ymm15 = _mm256_permute2f128_pd(ymm15, ymm15, 0x00); //(1/A11[0][0] 1/A11[0][0] 1/A11[0][0] 1/A11[0][0]) //(Row 1):FMA operations ymm0 = _mm256_fnmadd_pd(ymm1, ymm5, ymm0); ymm0 = _mm256_mul_pd(ymm0, ymm15); _mm256_storeu_pd((double *)b11, ymm0); //store(B11[x][0]) _mm256_storeu_pd((double *)(b11 + cs_b), ymm1); //store(B11[x][1]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0]), ymm2); //(store(B11[x][2])) _mm256_storeu_pd((double *)(b11 + cs_b_offset[1]), ymm3); //store(B11[x][3]) } if(n_remainder) //implementation for remainder columns(when n is not a multiple of D_NR) { a01 = L + j*cs_a + (j+D_NR); //pointer to block of A to be used for GEMM a11 = L + j*cs_a + j; //pointwr to block of A to be used for TRSM b10 = B + i + (j+D_NR)*cs_b; //pointer to block of B to be used for GEMM b11 = B + i + j*cs_b; //pointer to block of B to be used for TRSM k_iter = (n-j-D_NR) / D_NR; //number of times GEMM operations to be performed(in blocks of 4x4) ymm16 = _mm256_broadcast_sd((double const *)&AlphaVal); //register to store alpha value ///GEMM for previous blocks /// ///load 4x4 block of b11 if(n_remainder == 3) { ymm0 = _mm256_broadcast_sd((double const *)&ones); //B11[0][3] B11[1][3] B11[2][3] B11[3][3] ymm1 = _mm256_loadu_pd((double const *)b11+ cs_b); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] ymm2 = _mm256_loadu_pd((double const *)(b11 + cs_b * 2)); //B11[0][1] B11[1][1] B11[2][1] B11[3][1] ymm3 = _mm256_loadu_pd((double const *)(b11 + cs_b * 3)); //B11[0][2] B11[1][2] B11[2][2] B11[3][2] } if(n_remainder == 2) { ymm0 = _mm256_broadcast_sd((double const *)&ones); //B11[0][2] B11[1][2] B11[2][2] B11[3][2] ymm1 = _mm256_broadcast_sd((double const *)&ones); //B11[0][3] B11[1][3] B11[2][3] B11[3][3] ymm2 = _mm256_loadu_pd((double const *)(b11 + cs_b * 2)); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] ymm3 = _mm256_loadu_pd((double const *)(b11 + cs_b * 3)); //B11[0][1] B11[1][1] B11[2][1] B11[3][1] } if(n_remainder == 1) { ymm0 = _mm256_broadcast_sd((double const *)&ones); //B11[0][1] B11[1][1] B11[2][1] B11[3][1] ymm1 = _mm256_broadcast_sd((double const *)&ones); //B11[0][2] B11[1][2] B11[2][2] B11[3][2] ymm2 = _mm256_broadcast_sd((double const *)&ones); //B11[0][3] B11[1][3] B11[2][3] B11[3][3] ymm3 = _mm256_loadu_pd((double const *)(b11 + cs_b * 3)); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] } //multiply by alpha ymm0 = _mm256_mul_pd(ymm0, ymm16); //B11[x][0] *= alpha ymm1 = _mm256_mul_pd(ymm1, ymm16); //B11[x][1] *=alpha ymm2 = _mm256_mul_pd(ymm2, ymm16); //B11[x][2] *= alpha ymm3 = _mm256_mul_pd(ymm3, ymm16); //B11[x][3] *= alpha ymm4 = _mm256_setzero_pd(); ymm5 = _mm256_setzero_pd(); ymm6 = _mm256_setzero_pd(); ymm7 = _mm256_setzero_pd(); ///GEMM processing stars/// for(k = 0; k < k_iter; k++) { ptr_a01_dup = a01; //load 4x4 bblock of b10 ymm8 = _mm256_loadu_pd((double const *)b10); //B10[0][0] B10[1][0] B10[2][0] B10[3][0] ymm9 = _mm256_loadu_pd((double const *)(b10 + cs_b)); //B10[0][1] B10[1][1] B10[2][1] B10[3][1] ymm10 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0])); //B10[0][2] B10[1][2] B10[2][2] B10[3][2] ymm11 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[1])); //B10[0][3] B10[1][3] B10[2][3] B10[3][3] //broadcast 1st row of A01 ymm12 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 0)); //A01[0][0] ymm13 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 1)); //A01[0][1] ymm14 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 2)); //A01[0][2] ymm15 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 3)); //A01[0][3] a01 += 1; //move to next row of A ymm4 = _mm256_fmadd_pd(ymm12, ymm8, ymm4); //ymm4 += (B10[0][0]*A01[0][0] B10[1][0]*A01[0][0] B10[2][0]*A01[0][0] B10[3][0]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm13, ymm8, ymm5); //ymm5 += (B10[0][0]*A01[0][1] B10[1][0]*A01[0][1] B10[2][0]*A01[0][1] B10[3][0]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm14, ymm8, ymm6); //ymm6 += (B10[0][0]*A01[0][2] B10[1][0]*A01[0][2] B10[2][0]*A01[0][2] B10[3][0]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm15, ymm8, ymm7); //ymm7 += (B10[0][0]*A01[0][3] B10[1][0]*A01[0][3] B10[2][0]*A01[0][3] B10[3][0]*A01[0][3]) //broadcast 2nd row of A01 ymm12 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 0)); //A01[1][0] ymm13 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 1)); //A01[1][1] ymm14 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 2)); //A01[1][2] ymm15 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 3)); //A01[1][3] a01 += 1; //move to next row of A ymm4 = _mm256_fmadd_pd(ymm12, ymm9, ymm4); //ymm4 += (B10[0][1]*A01[1][0] B10[1][1]*A01[1][0] B10[2][1]*A01[1][0] B10[3][1]*A01[1][0]) ymm5 = _mm256_fmadd_pd(ymm13, ymm9, ymm5); //ymm5 += (B10[0][1]*A01[1][1] B10[1][1]*A01[1][1] B10[2][1]*A01[1][1] B10[3][1]*A01[1][1]) ymm6 = _mm256_fmadd_pd(ymm14, ymm9, ymm6); //ymm6 += (B10[0][1]*A01[1][2] B10[1][1]*A01[1][2] B10[2][1]*A01[1][2] B10[3][1]*A01[1][2]) ymm7 = _mm256_fmadd_pd(ymm15, ymm9, ymm7); //ymm7 += (B10[0][1]*A01[1][3] B10[1][1]*A01[1][3] B10[2][1]*A01[1][3] B10[3][1]*A01[1][3]) //braodcast 3rd row of A01 ymm12 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 0)); //A01[2][0] ymm13 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 1)); //A01[2][1] ymm14 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 2)); //A01[2][2] ymm15 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 3)); //A01[2][3] a01 += 1; //move to next row of A ymm4 = _mm256_fmadd_pd(ymm12, ymm10, ymm4); //ymm4 += (B10[0][2]*A01[2][0] B10[1][2]*A01[2][0] B10[2][2]*A01[2][0] B10[3][2]*A01[2][0]) ymm5 = _mm256_fmadd_pd(ymm13, ymm10, ymm5); //ymm5 += (B10[0][2]*A01[2][1] B10[1][2]*A01[2][1] B10[2][2]*A01[2][1] B10[3][2]*A01[2][1]) ymm6 = _mm256_fmadd_pd(ymm14, ymm10, ymm6); //ymm6 += (B10[0][2]*A01[2][2] B10[1][2]*A01[2][2] B10[2][2]*A01[2][2] B10[3][2]*A01[2][2]) ymm7 = _mm256_fmadd_pd(ymm15, ymm10, ymm7); //ymm7 += (B10[0][2]*A01[2][3] B10[1][2]*A01[2][3] B10[2][2]*A01[2][3] B10[3][2]*A01[2][3]) //broadcast 4th row of A01 ymm12 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 0)); //A01[3][0] ymm13 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 1)); //A01[3][1] ymm14 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 2)); //A01[3][2] ymm15 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 3)); //A01[3][3] a01 += 1; //move to next row of A ymm4 = _mm256_fmadd_pd(ymm12, ymm11, ymm4); //ymm4 += (B10[0][3]*A01[3][0] B10[1][3]*A01[3][0] B10[2][3]*A01[3][0] B10[3][3]*A01[3][0]) ymm5 = _mm256_fmadd_pd(ymm13, ymm11, ymm5); //ymm5 += (B10[0][3]*A01[3][1] B10[1][3]*A01[3][1] B10[2][3]*A01[3][1] B10[3][3]*A01[3][1]) ymm6 = _mm256_fmadd_pd(ymm14, ymm11, ymm6); //ymm6 += (B10[0][3]*A01[3][2] B10[1][3]*A01[3][2] B10[2][3]*A01[3][2] B10[3][3]*A01[3][2]) ymm7 = _mm256_fmadd_pd(ymm15, ymm11, ymm7); //ymm7 += (B10[0][3]*A01[3][3] B10[1][3]*A01[3][3] B10[2][3]*A01[3][3] B10[3][3]*A01[3][3]) b10 += D_NR * cs_b; //pointer math to find next block of B for GEMM a01 = ptr_a01_dup + D_NR; //pointer math to find next block of A for GEMM } ///GEMM code ends/// ymm0 = _mm256_sub_pd(ymm0, ymm4); //B11[x][0] -= ymm4 ymm1 = _mm256_sub_pd(ymm1, ymm5); //B11[x][1] -= ymm5 ymm2 = _mm256_sub_pd(ymm2, ymm6); //B11[x][2] -= ymm6 ymm3 = _mm256_sub_pd(ymm3, ymm7); //B11[x][3] -= ymm7 ///implement TRSM/// ///read 4x4 block of A11/// //1st col ymm4 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][0] ymm5 = _mm256_broadcast_sd((double const *)(a11+1)); //A11[0][0] ymm6 = _mm256_broadcast_sd((double const *)(a11+2)); //A11[0][0] ymm7 = _mm256_broadcast_sd((double const *)(a11+3)); //A11[0][0] //2nd col a11 += cs_a; ymm8 = _mm256_broadcast_sd((double const *)(a11+1)); //A11[0][1] ymm9 = _mm256_broadcast_sd((double const *)(a11+2)); //A11[1][1] ymm10 = _mm256_broadcast_sd((double const *)(a11+3)); //A11[1][1] //3rd col a11 += cs_a; ymm11 = _mm256_broadcast_sd((double const *)(a11+2)); //A11[0][2] ymm12 = _mm256_broadcast_sd((double const *)(a11+3)); //A11[1][2] //4th col a11 += cs_a; ymm13 = _mm256_broadcast_sd((double const *)(a11+3)); //A11[0][3] ymm14 = _mm256_broadcast_sd((double const *)&ones); //compute reciprocals of A(i,i) and broadcast in registers ymm4 = _mm256_unpacklo_pd(ymm4, ymm8); //A11[0][0] A11[1][1] A11[0][0] A11[1][1] ymm8 = _mm256_unpacklo_pd(ymm11, ymm13); //A11[2][2] A11[3][3] A11[2][2] A11[3][3] ymm15 = _mm256_blend_pd(ymm4, ymm8, 0x0C); //A11[0][0] A11[1][1] A11[2][2] A11[3][3] ymm14 = _mm256_div_pd(ymm14, ymm15); // 1/A11[0][0] 1/A11[1][1] 1/A11[2][2] 1/A11[3][3] //extract a33 ymm15 = _mm256_permute_pd(ymm14, 0x0C); //(1/A11[0][0] 1/A11[0][0] 1/A11[3][3] 1/A11[3][3]) ymm15 = _mm256_permute2f128_pd(ymm15, ymm15, 0x11); //(1/A11[3][3] 1/A11[3][3] 1/A11[3][3] 1/A11[3][3]) ymm3 = _mm256_mul_pd(ymm3, ymm15); //extract a22 ymm15 = _mm256_permute_pd(ymm14, 0x00); //(1/A11[0][0] 1/A11[0][0] 1/A11[2][2] 1/A11[2][2]) ymm15 = _mm256_permute2f128_pd(ymm15, ymm15, 0x11); //(1/A11[2][2] 1/A11[2][2] 1/A11[2][2] 1/A11[2][2]) //(Row 3): FMA operations ymm2 = _mm256_fnmadd_pd(ymm3, ymm12, ymm2); ymm1 = _mm256_fnmadd_pd(ymm3, ymm10, ymm1); ymm0 = _mm256_fnmadd_pd(ymm3, ymm7, ymm0); ymm2 = _mm256_mul_pd(ymm2, ymm15); //extract a11 ymm15 = _mm256_permute_pd(ymm14, 0x03); //(1/A11[1][1] 1/A11[1][1] 1/A11[2][2] 1/A11[2][2]) ymm15 = _mm256_permute2f128_pd(ymm15, ymm15, 0x00); //(1/A11[1][1] 1/A11[1][1] 1/A11[1][1] 1/A11[1][1]) //(ROW 2): FMA operations ymm1 = _mm256_fnmadd_pd(ymm2, ymm9, ymm1); ymm0 = _mm256_fnmadd_pd(ymm2, ymm6, ymm0); ymm1 = _mm256_mul_pd(ymm1, ymm15); //extract A00 ymm15 = _mm256_permute_pd(ymm14, 0x00); //(1/A11[0][0] 1/A11[0][0] 1/A11[2][2] 1/A11[2][2]) ymm15 = _mm256_permute2f128_pd(ymm15, ymm15, 0x00); //(1/A11[0][0] 1/A11[0][0] 1/A11[0][0] 1/A11[0][0]) //(Row 1):FMA operations ymm0 = _mm256_fnmadd_pd(ymm1, ymm5, ymm0); ymm0 = _mm256_mul_pd(ymm0, ymm15); if(n_remainder == 3) { _mm256_storeu_pd((double *)(b11 + cs_b), ymm1); //store(B11[x][1]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0]), ymm2); //(store(B11[x][2])) _mm256_storeu_pd((double *)(b11 + cs_b*3), ymm3); //store(B11[x][0]) } if(n_remainder == 2) { _mm256_storeu_pd((double *)(b11+ cs_b * 2), ymm2); //store(B11[x][0]) _mm256_storeu_pd((double *)(b11 + cs_b * 3), ymm3); //store(B11[x][1]) } if(n_remainder == 1) { _mm256_storeu_pd((double *)(b11 + cs_b * 3), ymm3); //store(B11[x][0]) } } m_remainder -= 4; i -= 4; } // if(i < 0) i = 0; if(m_remainder) ///implementation for remainder rows { dtrsm_small_XAlB(L, B, AlphaVal, m_remainder, n, cs_a, cs_b); } return BLIS_SUCCESS; } /*implements TRSM for the case XA = alpha * B *A is lower triangular, unit-diagonal, no transpose *dimensions: X:mxn A:nxn B: mxn */ /* <---b11 <---a11 ***************** * *b01*b11* * * * * ^ * * * * * ^ * * | ***************** | ******* | * * * * * | * * * | * * * * * a01* * * b10 ***************** ************* * * * * * * * * * * * * * * * * * * ***************** ******************* */ static err_t bli_dtrsm_small_XAlB_unitDiag( side_t side, obj_t* AlphaObj, obj_t* a, obj_t* b, cntx_t* cntx, cntl_t* cntl ) { dim_t D_MR = 8; //block dimension along the rows dim_t D_NR = 4; //block dimension along the columns dim_t m = bli_obj_length(b); //number of rows dim_t n = bli_obj_width(b); //number of columns dim_t m_remainder = m % D_MR; //number of corner rows dim_t n_remainder = n % D_NR; //number of corner columns dim_t cs_a = bli_obj_col_stride(a); //column stride of matrix A dim_t cs_b = bli_obj_col_stride(b); //column stride of matrix B #ifdef BLIS_ENABLE_SMALL_MATRIX_ROME if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_ROME) { return BLIS_NOT_YET_IMPLEMENTED; } #else if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_NAPLES) { return BLIS_NOT_YET_IMPLEMENTED; } #endif dim_t i, j, k; //loop variablse dim_t k_iter; //determines the number of GEMM operations to be done dim_t cs_b_offset[2]; //pre-calculated strides double ones = 1.0; double AlphaVal = *(double *)AlphaObj->buffer; //value of Alpha double *L = a->buffer; //pointer to matrix A double *B = b->buffer; //pointer to matrix B double *a01, *a11, *b10, *b11; //pointers for GEMM and TRSM blocks double *ptr_a01_dup; cs_b_offset[0] = cs_b << 1; //cs_b_offset[0] = cs_b * 2; cs_b_offset[1] = cs_b_offset[0] + cs_b;//cs_b_offset[1] = cs_b * 3; //ymm scratch reginsters __m256d ymm0, ymm1, ymm2, ymm3; __m256d ymm4, ymm5, ymm6, ymm7; __m256d ymm8, ymm9, ymm10, ymm11; __m256d ymm12, ymm13, ymm14, ymm15; __m256d ymm16; for(i = (m-D_MR); (i+1) > 0; i -= D_MR) //loop along 'M' direction { for(j = (n-D_NR); (j+1) > 0; j -= D_NR) //loop along 'N' direction { a01 = L + j*cs_a +(j+D_NR); //pointer to block of A to be used in GEMM a11 = L + j*cs_a + j; //pointer to block of A to be used for TRSM b10 = B + i + (j+D_NR)*cs_b; //pointer to block of B to be used in GEMM b11 = B + (i) + (j)*cs_b; //pointer to block of B to be used for TRSM k_iter = (n-j-D_NR) / D_NR; //number of GEMM operations to be done(in blocks of 4x4) ymm0 = _mm256_setzero_pd(); ymm1 = _mm256_setzero_pd(); ymm2 = _mm256_setzero_pd(); ymm3 = _mm256_setzero_pd(); ymm4 = _mm256_setzero_pd(); ymm5 = _mm256_setzero_pd(); ymm6 = _mm256_setzero_pd(); ymm7 = _mm256_setzero_pd(); ///GEMM implementation starts/// for(k = 0; k < k_iter; k++) //loop for number of GEMM operations { ptr_a01_dup = a01; //broadcast 1st row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 0)); //A01[0][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 1)); //A01[0][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 2)); //A01[0][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 3)); //A01[0][3] a01 += 1; //move to next row //load 8x2 block of B10 ymm12 = _mm256_loadu_pd((double const *)b10); //B10[0][0] B10[1][0] B10[2][0] B10[3][0] ymm13 = _mm256_loadu_pd((double const *)(b10 + D_NR)); //B10[4][0] B10[5][0] B10[6][0] B10[7][0] ymm14 = _mm256_loadu_pd((double const *)(b10 + cs_b)); //B10[0][1] B10[1][1] B10[2][1] B10[3][1] ymm15 = _mm256_loadu_pd((double const *)(b10 + cs_b + D_NR)); //B10[4][1] B10[5][1] B10[6][1] B10[7][1] ymm0 = _mm256_fmadd_pd(ymm8, ymm12, ymm0); //ymm0 += (B10[0][0]*A01[0][0] B10[1][0]*A01[0][0] B10[2][0]*A01[0][0] B10[3][0]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm12, ymm1); //ymm1 += (B10[0][0]*A01[0][1] B10[1][0]*A01[0][1] B10[2][0]*A01[0][1] B10[3][0]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm12, ymm2); //ymm2 += (B10[0][0]*A01[0][2] B10[1][0]*A01[0][2] B10[2][0]*A01[0][2] B10[3][0]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm12, ymm3); //ymm3 += (B10[0][0]*A01[0][3] B10[1][0]*A01[0][3] B10[2][0]*A01[0][3] B10[3][0]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm13, ymm4); //ymm4 += (B10[4][0]*A01[0][0] B10[5][0]*A01[0][0] B10[6][0]*A01[0][0] B10[7][0]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm13, ymm5); //ymm5 += (B10[4][0]*A01[0][1] B10[5][0]*A01[0][1] B10[6][0]*A01[0][1] B10[7][0]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm13, ymm6); //ymm6 += (B10[4][0]*A01[0][2] B10[5][0]*A01[0][2] B10[6][0]*A01[0][2] B10[7][0]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm13, ymm7); //ymm7 += (B10[4][0]*A01[0][3] B10[5][0]*A01[0][3] B10[6][0]*A01[0][3] B10[7][0]*A01[0][3]) //broadcast 2nd row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 0)); //A01[1][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 1)); //A01[1][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 2)); //A01[1][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 3)); //A01[1][3] a01 += 1; //move to next row of A ymm0 = _mm256_fmadd_pd(ymm8, ymm14, ymm0); //ymm0 += (B10[0][1]*A01[0][0] B10[1][1]*A01[0][0] B10[2][1]*A01[0][0] B10[3][1]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm14, ymm1); //ymm1 += (B10[0][1]*A01[0][1] B10[1][1]*A01[0][1] B10[2][1]*A01[0][1] B10[3][1]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm14, ymm2); //ymm2 += (B10[0][1]*A01[0][2] B10[1][1]*A01[0][2] B10[2][1]*A01[0][2] B10[3][1]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm14, ymm3); //ymm3 += (B10[0][1]*A01[0][3] B10[1][1]*A01[0][3] B10[2][1]*A01[0][3] B10[3][1]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm15, ymm4); //ymm4 += (B10[4][1]*A01[0][0] B10[5][1]*A01[0][0] B10[6][1]*A01[0][0] B10[7][1]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm15, ymm5); //ymm5 += (B10[4][1]*A01[0][1] B10[5][1]*A01[0][1] B10[6][1]*A01[0][1] B10[7][1]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm15, ymm6); //ymm6 += (B10[4][1]*A01[0][2] B10[5][1]*A01[0][2] B10[6][1]*A01[0][2] B10[7][1]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm15, ymm7); //ymm7 += (B10[4][1]*A01[0][3] B10[5][1]*A01[0][3] B10[6][1]*A01[0][3] B10[7][1]*A01[0][3]) //broadcast 3rd row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 0)); //A01[2][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 1)); //A01[2][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 2)); //A01[2][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 3)); //A01[2][3] a01 += 1; //move to next row of A01 //load next 8x2 block of B10 ymm12 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0])); //(B10[0][2] B10[1][2] B10[2][2] B10[3][2]) ymm13 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0] + D_NR)); //(B10[4][2] B10[5][2] B10[6][2] B10[7][2]) ymm14 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0] + cs_b)); //(B10[0][3] B10[1][3] B10[2][3] B10[3][3]) ymm15 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0] + cs_b + D_NR)); //(B10[4][3] B10[5][3] B10[6][3] B10[7][3]) ymm0 = _mm256_fmadd_pd(ymm8, ymm12, ymm0); //ymm0 += (B10[0][2]*A01[0][0] B10[1][2]*A01[0][0] B10[2][2]*A01[0][0] B10[3][2]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm12, ymm1); //ymm1 += (B10[0][2]*A01[0][1] B10[1][2]*A01[0][1] B10[2][2]*A01[0][1] B10[3][2]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm12, ymm2); //ymm2 += (B10[0][2]*A01[0][2] B10[1][2]*A01[0][2] B10[2][2]*A01[0][2] B10[3][2]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm12, ymm3); //ymm3 += (B10[0][2]*A01[0][3] B10[1][2]*A01[0][3] B10[2][2]*A01[0][3] B10[3][2]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm13, ymm4); //ymm4 += (B10[4][2]*A01[0][0] B10[5][2]*A01[0][0] B10[6][2]*A01[0][0] B10[7][2]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm13, ymm5); //ymm5 += (B10[4][2]*A01[0][1] B10[5][2]*A01[0][1] B10[6][2]*A01[0][1] B10[7][2]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm13, ymm6); //ymm6 += (B10[4][2]*A01[0][2] B10[5][2]*A01[0][2] B10[6][2]*A01[0][2] B10[7][2]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm13, ymm7); //ymm7 += (B10[4][2]*A01[0][3] B10[5][2]*A01[0][3] B10[6][2]*A01[0][3] B10[7][2]*A01[0][3]) //broadcast 4th row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 0)); //A01[3][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 1)); //A01[3][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 2)); //A01[3][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 3)); //A01[3][3] a01 += 1; //move to next row of A01 ymm0 = _mm256_fmadd_pd(ymm8, ymm14, ymm0); //ymm0 += (B10[0][3]*A01[0][0] B10[1][3]*A01[0][0] B10[2][3]*A01[0][0] B10[3][3]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm14, ymm1); //ymm1 += (B10[0][3]*A01[0][1] B10[1][3]*A01[0][1] B10[2][3]*A01[0][1] B10[3][3]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm14, ymm2); //ymm2 += (B10[0][3]*A01[0][2] B10[1][3]*A01[0][2] B10[2][3]*A01[0][2] B10[3][3]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm14, ymm3); //ymm3 += (B10[0][3]*A01[0][3] B10[1][3]*A01[0][3] B10[2][3]*A01[0][3] B10[3][3]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm15, ymm4); //ymm4 += (B10[4][3]*A01[0][0] B10[5][3]*A01[0][0] B10[6][3]*A01[0][0] B10[7][3]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm15, ymm5); //ymm5 += (B10[4][3]*A01[0][1] B10[5][3]*A01[0][1] B10[6][3]*A01[0][1] B10[7][3]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm15, ymm6); //ymm6 += (B10[4][3]*A01[0][2] B10[5][3]*A01[0][2] B10[6][3]*A01[0][2] B10[7][3]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm15, ymm7); //ymm7 += (B10[4][3]*A01[0][3] B10[5][3]*A01[0][3] B10[6][3]*A01[0][3] B10[7][3]*A01[0][3]) b10 += D_NR * cs_b; //pointer math to find next block of B for GEMM a01 = ptr_a01_dup + D_NR; //pointer math to find next block of A for GEMM } ///GEMM code ends/// ymm16 = _mm256_broadcast_sd((double const *)&AlphaVal); //load 8x4 block of B11 ymm8 = _mm256_loadu_pd((double const *)b11); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] ymm12 = _mm256_loadu_pd((double const *)(b11 + D_NR)); //B11[4][0] B11[5][0] B11[6][0] B11[7][0] ymm9 = _mm256_loadu_pd((double const *)(b11 + cs_b)); //B11[0][1] B11[1][1] B11[2][1] B11[3][1] ymm13 = _mm256_loadu_pd((double const *)(b11 + cs_b + D_NR)); //B11[4][1] B11[5][1] B11[6][1] B11[7][1] ymm10 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[0])); //B11[0][2] B11[1][2] B11[2][2] B11[3][2] ymm14 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[0] + D_NR)); //B11[4][2] B11[5][2] B11[6][2] B11[7][2] ymm11 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[1])); //B11[0][3] B11[1][3] B11[2][3] B11[3][3] ymm15 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[1] + D_NR)); //B11[4][3] B11[5][3] B11[6][3] B11[7][3] ymm8 = _mm256_fmsub_pd(ymm8, ymm16, ymm0); //B11[0-3][0] * alpha -= ymm0 ymm9 = _mm256_fmsub_pd(ymm9, ymm16, ymm1); //B11[4-7][0] * alpha-= ymm1 ymm10 = _mm256_fmsub_pd(ymm10, ymm16, ymm2); //B11[0-3][1] * alpha-= ymm2 ymm11 = _mm256_fmsub_pd(ymm11, ymm16, ymm3); //B11[4-7][1] * alpha -= ymm3 ymm12 = _mm256_fmsub_pd(ymm12, ymm16, ymm4); //B11[0-3][2] * alpha -= ymm4 ymm13 = _mm256_fmsub_pd(ymm13, ymm16, ymm5); //B11[4-7][2] * alpha -= ymm5 ymm14 = _mm256_fmsub_pd(ymm14, ymm16, ymm6); //B11[0-3][3] * alpha -= ymm6 ymm15 = _mm256_fmsub_pd(ymm15, ymm16, ymm7); //B11[4-7][3] * alpha -= ymm7 ///implement TRSM/// ///read 4x4 block of A11/// //1st col ymm0 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][0] //2nd col a11 += 1; ymm1 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 0)); //A11[0][1] ymm2 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 1)); //A11[1][1] //3rd col a11 += 1; ymm3 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 0)); //A11[0][2] ymm4 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 1)); //A11[1][2] ymm5 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 2)); //A11[2][2] //4th col a11 += 1; ymm6 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 3)); //A11[3][3] ymm2 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 0)); //A11[0][3] ymm5 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 1)); //A11[1][3] ymm6 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 2)); //A11[2][3] //(row 3):FMA operations ymm10 = _mm256_fnmadd_pd(ymm11, ymm6, ymm10); ymm9 = _mm256_fnmadd_pd(ymm11, ymm5, ymm9); ymm8 = _mm256_fnmadd_pd(ymm11, ymm2, ymm8); ymm14 = _mm256_fnmadd_pd(ymm15, ymm6, ymm14); ymm13 = _mm256_fnmadd_pd(ymm15, ymm5, ymm13); ymm12 = _mm256_fnmadd_pd(ymm15, ymm2, ymm12); //(Row 2): FMA operations ymm9 = _mm256_fnmadd_pd(ymm10, ymm4, ymm9); ymm8 = _mm256_fnmadd_pd(ymm10, ymm3, ymm8); ymm13 = _mm256_fnmadd_pd(ymm14, ymm4, ymm13); ymm12 = _mm256_fnmadd_pd(ymm14, ymm3, ymm12); //(Row 1): FMA operations ymm8 = _mm256_fnmadd_pd(ymm9, ymm1, ymm8); ymm12 = _mm256_fnmadd_pd(ymm13, ymm1, ymm12); _mm256_storeu_pd((double *)b11, ymm8); //store(B11[0-3][0]) _mm256_storeu_pd((double *)(b11 + D_NR), ymm12); //store(B11[4-7][0]) _mm256_storeu_pd((double *)(b11 + cs_b), ymm9); //store(B11[0-3][1]) _mm256_storeu_pd((double *)(b11 + cs_b + D_NR), ymm13); //store(B11[4-7][1]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0]), ymm10); //store(B11[0-3][2]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0] + D_NR), ymm14); //store(B11[4-7][2]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0] + cs_b), ymm11); //store(B11[0-3][3]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0] + cs_b + D_NR), ymm15);//store(B11[4-7][3]) } if(n_remainder) //implementation for remainder columns(when n is not multiple of D_NR) { a01 = L + j*cs_a + (j+D_NR); //pointer to block of A to be used for GEMM a11 = L + j*cs_a + j; //pointer to block of A to be used for TRSM b10 = B + i + (j + D_NR)*cs_b; //pointer to block of B to be used for GEMM b11 = B + i + j*cs_b; //pointer to block of B to be used for TRSM k_iter = (n-j-D_NR) / D_NR; //number of GEMM operations to be performed(in blocks of 4x4) ///load 4x4 block of b11 ymm0 = _mm256_setzero_pd(); ymm1 = _mm256_setzero_pd(); ymm2 = _mm256_setzero_pd(); ymm3 = _mm256_setzero_pd(); ymm4 = _mm256_setzero_pd(); ymm5 = _mm256_setzero_pd(); ymm6 = _mm256_setzero_pd(); ymm7 = _mm256_setzero_pd(); ///GEMM implementation begins/// for(k = 0; k < k_iter; k++) ///loop for number of GEMM operations { ptr_a01_dup = a01; //broadcast 1st row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 0)); //A01[0][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 1)); //A01[0][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 2)); //A01[0][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 3)); //A01[0][3] a01 += 1; //move to next row of A //load 8x2 block of B10 ymm12 = _mm256_loadu_pd((double const *)b10); //B10[0][0] B10[1][0] B10[2][0] B10[3][0] ymm13 = _mm256_loadu_pd((double const *)(b10 + D_NR)); //B10[4][0] B10[5][0] B10[6][0] B10[7][0] ymm14 = _mm256_loadu_pd((double const *)(b10 + cs_b)); //B10[0][1] B10[1][1] B10[2][1] B10[3][1] ymm15 = _mm256_loadu_pd((double const *)(b10 + cs_b + D_NR));//B10[4][1] B10[5][1] B10[6][1] B10[7][1] ymm0 = _mm256_fmadd_pd(ymm8, ymm12, ymm0); //ymm0 += (B10[0][0]*A01[0][0] B10[1][0]*A01[0][0] B10[2][0]*A01[0][0] B10[3][0]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm12, ymm1); //ymm1 += (B10[0][0]*A01[0][1] B10[1][0]*A01[0][1] B10[2][0]*A01[0][1] B10[3][0]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm12, ymm2); //ymm2 += (B10[0][0]*A01[0][2] B10[1][0]*A01[0][2] B10[2][0]*A01[0][2] B10[3][0]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm12, ymm3); //ymm3 += (B10[0][0]*A01[0][3] B10[1][0]*A01[0][3] B10[2][0]*A01[0][3] B10[3][0]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm13, ymm4); //ymm4 += (B10[4][0]*A01[0][0] B10[5][0]*A01[0][0] B10[6][0]*A01[0][0] B10[7][0]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm13, ymm5); //ymm5 += (B10[4][0]*A01[0][1] B10[5][0]*A01[0][1] B10[6][0]*A01[0][1] B10[7][0]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm13, ymm6); //ymm6 += (B10[4][0]*A01[0][2] B10[5][0]*A01[0][2] B10[6][0]*A01[0][2] B10[7][0]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm13, ymm7); //ymm7 += (B10[4][0]*A01[0][3] B10[5][0]*A01[0][3] B10[6][0]*A01[0][3] B10[7][0]*A01[0][3]) //broadcast 2nd row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 0)); //A01[1][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 1)); //A01[1][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 2)); //A01[1][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 3)); //A01[1][3] a01 += 1; //move to next row of A ymm0 = _mm256_fmadd_pd(ymm8, ymm14, ymm0); //ymm0 += (B10[0][1]*A01[0][0] B10[1][1]*A01[0][0] B10[2][1]*A01[0][0] B10[3][1]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm14, ymm1); //ymm1 += (B10[0][1]*A01[0][1] B10[1][1]*A01[0][1] B10[2][1]*A01[0][1] B10[3][1]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm14, ymm2); //ymm2 += (B10[0][1]*A01[0][2] B10[1][1]*A01[0][2] B10[2][1]*A01[0][2] B10[3][1]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm14, ymm3); //ymm3 += (B10[0][1]*A01[0][3] B10[1][1]*A01[0][3] B10[2][1]*A01[0][3] B10[3][1]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm15, ymm4); //ymm4 += (B10[4][1]*A01[0][0] B10[5][1]*A01[0][0] B10[6][1]*A01[0][0] B10[7][1]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm15, ymm5); //ymm5 += (B10[4][1]*A01[0][1] B10[5][1]*A01[0][1] B10[6][1]*A01[0][1] B10[7][1]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm15, ymm6); //ymm6 += (B10[4][1]*A01[0][2] B10[5][1]*A01[0][2] B10[6][1]*A01[0][2] B10[7][1]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm15, ymm7); //ymm7 += (B10[4][1]*A01[0][3] B10[5][1]*A01[0][3] B10[6][1]*A01[0][3] B10[7][1]*A01[0][3]) //broadcast 3rd row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 0)); //A01[2][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 1)); //A01[2][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 2)); //A01[2][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 3)); //A01[2][3] a01 += 1; //move to next row of A //load next 8x2 block of B10 ymm12 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0])); //(B10[0][2] B10[1][2] B10[2][2] B10[3][2]) ymm13 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0] + D_NR)); //(B10[4][2] B10[5][2] B10[6][2] B10[7][2]) ymm14 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0] + cs_b)); //(B10[0][3] B10[1][3] B10[2][3] B10[3][3]) ymm15 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0] + cs_b + D_NR)); //(B10[4][3] B10[5][3] B10[6][3] B10[7][3]) ymm0 = _mm256_fmadd_pd(ymm8, ymm12, ymm0); //ymm0 += (B10[0][2]*A01[0][0] B10[1][2]*A01[0][0] B10[2][2]*A01[0][0] B10[3][2]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm12, ymm1); //ymm1 += (B10[0][2]*A01[0][1] B10[1][2]*A01[0][1] B10[2][2]*A01[0][1] B10[3][2]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm12, ymm2); //ymm2 += (B10[0][2]*A01[0][2] B10[1][2]*A01[0][2] B10[2][2]*A01[0][2] B10[3][2]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm12, ymm3); //ymm3 += (B10[0][2]*A01[0][3] B10[1][2]*A01[0][3] B10[2][2]*A01[0][3] B10[3][2]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm13, ymm4); //ymm4 += (B10[4][2]*A01[0][0] B10[5][2]*A01[0][0] B10[6][2]*A01[0][0] B10[7][2]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm13, ymm5); //ymm5 += (B10[4][2]*A01[0][1] B10[5][2]*A01[0][1] B10[6][2]*A01[0][1] B10[7][2]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm13, ymm6); //ymm6 += (B10[4][2]*A01[0][2] B10[5][2]*A01[0][2] B10[6][2]*A01[0][2] B10[7][2]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm13, ymm7); //ymm7 += (B10[4][2]*A01[0][3] B10[5][2]*A01[0][3] B10[6][2]*A01[0][3] B10[7][2]*A01[0][3]) //broadcast 4th row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 0)); //A01[3][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 1)); //A01[3][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 2)); //A01[3][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 3)); //A01[3][3] a01 += 1; //move to next row of A ymm0 = _mm256_fmadd_pd(ymm8, ymm14, ymm0); //ymm0 += (B10[0][3]*A01[0][0] B10[1][3]*A01[0][0] B10[2][3]*A01[0][0] B10[3][3]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm14, ymm1); //ymm1 += (B10[0][3]*A01[0][1] B10[1][3]*A01[0][1] B10[2][3]*A01[0][1] B10[3][3]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm14, ymm2); //ymm2 += (B10[0][3]*A01[0][2] B10[1][3]*A01[0][2] B10[2][3]*A01[0][2] B10[3][3]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm14, ymm3); //ymm3 += (B10[0][3]*A01[0][3] B10[1][3]*A01[0][3] B10[2][3]*A01[0][3] B10[3][3]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm15, ymm4); //ymm4 += (B10[4][3]*A01[0][0] B10[5][3]*A01[0][0] B10[6][3]*A01[0][0] B10[7][3]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm15, ymm5); //ymm5 += (B10[4][3]*A01[0][1] B10[5][3]*A01[0][1] B10[6][3]*A01[0][1] B10[7][3]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm15, ymm6); //ymm6 += (B10[4][3]*A01[0][2] B10[5][3]*A01[0][2] B10[6][3]*A01[0][2] B10[7][3]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm15, ymm7); //ymm7 += (B10[4][3]*A01[0][3] B10[5][3]*A01[0][3] B10[6][3]*A01[0][3] B10[7][3]*A01[0][3]) b10 += D_NR * cs_b; //pointer math to find next block of B for GEMM a01 = ptr_a01_dup + D_NR; //pointer math to find next block of A for GEMM } ///GEMM code ends/// ymm16 = _mm256_broadcast_sd((double const *)&AlphaVal); //subtract the calculated GEMM block from current TRSM block //load 8x4 block of B11 if(n_remainder == 3) { ymm8 = _mm256_broadcast_sd((double const *)&ones); //B11[0-3][3] ymm12 = _mm256_broadcast_sd((double const *)&ones); //B11[4-7][3] ymm9 = _mm256_loadu_pd((double const *)(b11+cs_b)); //B11[0-3][0] ymm13 = _mm256_loadu_pd((double const *)(b11 + cs_b + D_NR)); //B11[4-7][0] ymm10 = _mm256_loadu_pd((double const *)(b11 + cs_b*2)); //B11[0-3][1] ymm14 = _mm256_loadu_pd((double const *)(b11 + cs_b*2 + D_NR)); //B11[4-7][1] ymm11 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[1])); //B11[0-3][2] ymm15 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[1] + D_NR)); //B11[4-7][2] } if(n_remainder == 2) { ymm8 = _mm256_broadcast_sd((double const *)&ones); //B11[0-3][2] ymm12 = _mm256_broadcast_sd((double const *)&ones); //B11[4-7][2] ymm9 = _mm256_broadcast_sd((double const *)&ones); //B11[0-3][3] ymm13 = _mm256_broadcast_sd((double const *)&ones); //B11[4-7][3] ymm10 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[0])); //B11[0-3][0] ymm14 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[0] + D_NR)); //B11[4-7][0] ymm11 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[1])); //B11[0-3][1] ymm15 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[1] + D_NR)); //B11[4-7][1] } if(n_remainder == 1) { ymm8 = _mm256_broadcast_sd((double const *)&ones); //B11[0-3][1] ymm12 = _mm256_broadcast_sd((double const *)&ones); //B11[4-7][1] ymm9 = _mm256_broadcast_sd((double const *)&ones); //B11[0-3][2] ymm13 = _mm256_broadcast_sd((double const *)&ones); //B11[4-7][2] ymm10 = _mm256_broadcast_sd((double const *)&ones); //B11[0-3][3] ymm14 = _mm256_broadcast_sd((double const *)&ones); //B11[4-7][3] ymm11 = _mm256_loadu_pd((double const *)(b11+cs_b_offset[1])); //B11[0-3][0] ymm15 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[1] +D_NR)); //B11[4-7][0] } ymm8 = _mm256_fmsub_pd(ymm8, ymm16, ymm0); //B11[0-3][0] * alpha -= B10[0-3][0] ymm9 = _mm256_fmsub_pd(ymm9, ymm16, ymm1); //B11[4-7][0] * alpha -= B10[4-7][0] ymm10 = _mm256_fmsub_pd(ymm10, ymm16, ymm2); //B11[0-3][1] * alpha -= B10[0-3][1] ymm11 = _mm256_fmsub_pd(ymm11, ymm16, ymm3); //B11[4-7][1] * alpha -= B10[4-7][1] ymm12 = _mm256_fmsub_pd(ymm12, ymm16, ymm4); //B11[0-3][2] * alpha -= B10[0-3][2] ymm13 = _mm256_fmsub_pd(ymm13, ymm16, ymm5); //B11[4-7][2] * alpha -= B10[4-7][2] ymm14 = _mm256_fmsub_pd(ymm14, ymm16, ymm6); //B11[0-3][3] * alpha -= B10[0-3][3] ymm15 = _mm256_fmsub_pd(ymm15, ymm16, ymm7); //B11[4-7][3] * alpha -= B10[4-7][3] ///implement TRSM/// ///read 4x4 block of A11/// ymm7 = _mm256_broadcast_sd((double const *)(&ones)); //1st col ymm0 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][0] //2nd col a11 += 1; ymm1 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 0)); //A11[0][1] ymm2 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 1)); //A11[1][1] //3rd col a11 += 1; ymm3 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 0)); //A11[0][2] ymm4 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 1)); //A11[1][2] ymm5 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 2)); //A11[2][2] //4th col a11 += 1; ymm6 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 3)); //A11[3][3] ymm2 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 0)); //A11[0][3] ymm5 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 1)); //A11[1][3] ymm6 = _mm256_broadcast_sd((double const *)(a11+ cs_a * 2)); //A11[2][3] //(row 3):FMA operations ymm10 = _mm256_fnmadd_pd(ymm11, ymm6, ymm10); ymm9 = _mm256_fnmadd_pd(ymm11, ymm5, ymm9); ymm8 = _mm256_fnmadd_pd(ymm11, ymm2, ymm8); ymm14 = _mm256_fnmadd_pd(ymm15, ymm6, ymm14); ymm13 = _mm256_fnmadd_pd(ymm15, ymm5, ymm13); ymm12 = _mm256_fnmadd_pd(ymm15, ymm2, ymm12); //(Row 2): FMA operations ymm9 = _mm256_fnmadd_pd(ymm10, ymm4, ymm9); ymm8 = _mm256_fnmadd_pd(ymm10, ymm3, ymm8); ymm13 = _mm256_fnmadd_pd(ymm14, ymm4, ymm13); ymm12 = _mm256_fnmadd_pd(ymm14, ymm3, ymm12); //(Row 1): FMA operations ymm8 = _mm256_fnmadd_pd(ymm9, ymm1, ymm8); ymm12 = _mm256_fnmadd_pd(ymm13, ymm1, ymm12); if(n_remainder == 3) { _mm256_storeu_pd((double *)(b11 + cs_b), ymm9); //store(B11[0-3][1]) _mm256_storeu_pd((double *)(b11 + cs_b + D_NR), ymm13); //store(B11[4-7][1]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0]), ymm10); //store(B11[0-3][2]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0] + D_NR), ymm14);//store(B11[4-7][2]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[1]), ymm11); //store(B11[0-3][0]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[1] + D_NR), ymm15); //store(B11[4-7][0]) } if(n_remainder == 2) { _mm256_storeu_pd((double *)(b11 + cs_b_offset[0]), ymm10); //store(B11[0-3][1]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0] + D_NR), ymm14); //store(B11[4-7][1]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[1]), ymm11); //store(B11[0-3][0]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[1] + D_NR), ymm15); //store(B11[4-7][0]) } if(n_remainder == 1) { _mm256_storeu_pd((double *)(b11+ cs_b_offset[1]), ymm11); //store(B11[0-3][0]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[1] + D_NR), ymm15); //store(B11[4-7][0]) } } } if(i<0) i += D_NR; if((m & 4)) ///implementation for remainder rows(when m_remainder is a multiple of 4) { for(j = (n-D_NR); (j+1) > 0; j -=D_NR) //loop along n direction { a01 = L + j*cs_a + (j+D_NR); //pointer to block of A to be used for GEMM a11 = L + j*cs_a + j; //pointer to block of A to be used for TRSM b10 = B + i + (j+D_NR)*cs_b; //pointer to block of B to be used for GEMM b11 = B + i + j*cs_b; //pointer to block of B to be used for TRSM k_iter = (n-j-D_NR) / D_NR; //number of times GEMM operations to be performed(in blocks of 4x4) ymm15 = _mm256_broadcast_sd((double const *)&AlphaVal); //register to store alpha ///GEMM for previous blocks /// ///load 4x4 block of b11 ymm0 = _mm256_loadu_pd((double const *)b11); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] ymm1 = _mm256_loadu_pd((double const *)(b11 + cs_b)); //B11[0][1] B11[1][1] B11[2][1] B11[3][1] ymm2 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[0])); //B11[0][2] B11[1][2] B11[2][2] B11[3][2] ymm3 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[1])); //B11[0][3] B11[1][3] B11[2][3] B11[3][3] //multiply by alpha ymm0 = _mm256_mul_pd(ymm0, ymm15); //B11[x][0] *= alpha ymm1 = _mm256_mul_pd(ymm1, ymm15); //B11[x][1] *=alpha ymm2 = _mm256_mul_pd(ymm2, ymm15); //B11[x][2] *= alpha ymm3 = _mm256_mul_pd(ymm3, ymm15); //B11[x][3] *= alpha ymm4 = _mm256_setzero_pd(); ymm5 = _mm256_setzero_pd(); ymm6 = _mm256_setzero_pd(); ymm7 = _mm256_setzero_pd(); ///GEMM implementation starts/// for(k = 0; k < k_iter; k++) //loop for number of GEMM operations { ptr_a01_dup = a01; //load 4x4 bblock of b10 ymm8 = _mm256_loadu_pd((double const *)b10); //B10[0][0] B10[1][0] B10[2][0] B10[3][0] ymm9 = _mm256_loadu_pd((double const *)(b10 + cs_b)); //B10[0][1] B10[1][1] B10[2][1] B10[3][1] ymm10 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0])); //B10[0][2] B10[1][2] B10[2][2] B10[3][2] ymm11 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[1])); //B10[0][3] B10[1][3] B10[2][3] B10[3][3] //broadcast 1st row of A01 ymm12 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 0)); //A01[0][0] ymm13 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 1)); //A01[0][1] ymm14 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 2)); //A01[0][2] ymm15 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 3)); //A01[0][3] a01 += 1; //move to next row of A ymm4 = _mm256_fmadd_pd(ymm12, ymm8, ymm4); //ymm4 += (B10[0][0]*A01[0][0] B10[1][0]*A01[0][0] B10[2][0]*A01[0][0] B10[3][0]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm13, ymm8, ymm5); //ymm5 += (B10[0][0]*A01[0][1] B10[1][0]*A01[0][1] B10[2][0]*A01[0][1] B10[3][0]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm14, ymm8, ymm6); //ymm6 += (B10[0][0]*A01[0][2] B10[1][0]*A01[0][2] B10[2][0]*A01[0][2] B10[3][0]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm15, ymm8, ymm7); //ymm7 += (B10[0][0]*A01[0][3] B10[1][0]*A01[0][3] B10[2][0]*A01[0][3] B10[3][0]*A01[0][3]) //broadcast 2nd row of A01 ymm12 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 0)); //A01[1][0] ymm13 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 1)); //A01[1][1] ymm14 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 2)); //A01[1][2] ymm15 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 3)); //A01[1][3] a01 += 1; //move to next row of A ymm4 = _mm256_fmadd_pd(ymm12, ymm9, ymm4); //ymm4 += (B10[0][1]*A01[1][0] B10[1][1]*A01[1][0] B10[2][1]*A01[1][0] B10[3][1]*A01[1][0]) ymm5 = _mm256_fmadd_pd(ymm13, ymm9, ymm5); //ymm5 += (B10[0][1]*A01[1][1] B10[1][1]*A01[1][1] B10[2][1]*A01[1][1] B10[3][1]*A01[1][1]) ymm6 = _mm256_fmadd_pd(ymm14, ymm9, ymm6); //ymm6 += (B10[0][1]*A01[1][2] B10[1][1]*A01[1][2] B10[2][1]*A01[1][2] B10[3][1]*A01[1][2]) ymm7 = _mm256_fmadd_pd(ymm15, ymm9, ymm7); //ymm7 += (B10[0][1]*A01[1][3] B10[1][1]*A01[1][3] B10[2][1]*A01[1][3] B10[3][1]*A01[1][3]) //braodcast 3rd row of A01 ymm12 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 0)); //A01[2][0] ymm13 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 1)); //A01[2][1] ymm14 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 2)); //A01[2][2] ymm15 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 3)); //A01[2][3] a01 += 1; //move to next row of A ymm4 = _mm256_fmadd_pd(ymm12, ymm10, ymm4); //ymm4 += (B10[0][2]*A01[2][0] B10[1][2]*A01[2][0] B10[2][2]*A01[2][0] B10[3][2]*A01[2][0]) ymm5 = _mm256_fmadd_pd(ymm13, ymm10, ymm5); //ymm5 += (B10[0][2]*A01[2][1] B10[1][2]*A01[2][1] B10[2][2]*A01[2][1] B10[3][2]*A01[2][1]) ymm6 = _mm256_fmadd_pd(ymm14, ymm10, ymm6); //ymm6 += (B10[0][2]*A01[2][2] B10[1][2]*A01[2][2] B10[2][2]*A01[2][2] B10[3][2]*A01[2][2]) ymm7 = _mm256_fmadd_pd(ymm15, ymm10, ymm7); //ymm7 += (B10[0][2]*A01[2][3] B10[1][2]*A01[2][3] B10[2][2]*A01[2][3] B10[3][2]*A01[2][3]) //broadcast 4th row of A01 ymm12 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 0)); //A01[3][0] ymm13 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 1)); //A01[3][1] ymm14 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 2)); //A01[3][2] ymm15 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 3)); //A01[3][3] a01 += 1; //move to next row of A ymm4 = _mm256_fmadd_pd(ymm12, ymm11, ymm4); //ymm4 += (B10[0][3]*A01[3][0] B10[1][3]*A01[3][0] B10[2][3]*A01[3][0] B10[3][3]*A01[3][0]) ymm5 = _mm256_fmadd_pd(ymm13, ymm11, ymm5); //ymm5 += (B10[0][3]*A01[3][1] B10[1][3]*A01[3][1] B10[2][3]*A01[3][1] B10[3][3]*A01[3][1]) ymm6 = _mm256_fmadd_pd(ymm14, ymm11, ymm6); //ymm6 += (B10[0][3]*A01[3][2] B10[1][3]*A01[3][2] B10[2][3]*A01[3][2] B10[3][3]*A01[3][2]) ymm7 = _mm256_fmadd_pd(ymm15, ymm11, ymm7); //ymm7 += (B10[0][3]*A01[3][3] B10[1][3]*A01[3][3] B10[2][3]*A01[3][3] B10[3][3]*A01[3][3]) b10 += D_NR * cs_b; //pointer math to find next block of B for GEMM a01 = ptr_a01_dup + D_NR; //pointer math to find next block of A for GEMM } ///GEMM code end/// ymm0 = _mm256_sub_pd(ymm0, ymm4); //B11[x][0] -=ymm4 ymm1 = _mm256_sub_pd(ymm1, ymm5); //B11[x][1] -= ymm5 ymm2 = _mm256_sub_pd(ymm2, ymm6); //B11[x][2] -= ymm6 ymm3 = _mm256_sub_pd(ymm3, ymm7); //B11[x][3] -= ymm7 ///implement TRSM/// ///read 4x4 block of A11/// //1st col ymm4 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][0] ymm5 = _mm256_broadcast_sd((double const *)(a11+1)); //A11[0][0] ymm6 = _mm256_broadcast_sd((double const *)(a11+2)); //A11[0][0] ymm7 = _mm256_broadcast_sd((double const *)(a11+3)); //A11[0][0] //2nd col a11 += cs_a; ymm8 = _mm256_broadcast_sd((double const *)(a11+1)); //A11[0][1] ymm9 = _mm256_broadcast_sd((double const *)(a11+2)); //A11[1][1] ymm10 = _mm256_broadcast_sd((double const *)(a11+3)); //A11[1][1] //3rd col a11 += cs_a; ymm11 = _mm256_broadcast_sd((double const *)(a11+2)); //A11[0][2] ymm12 = _mm256_broadcast_sd((double const *)(a11+3)); //A11[1][2] //4th col a11 += cs_a; ymm13 = _mm256_broadcast_sd((double const *)(a11+3)); //A11[0][3] ymm14 = _mm256_broadcast_sd((double const *)&ones); //(Row 3): FMA operations ymm2 = _mm256_fnmadd_pd(ymm3, ymm12, ymm2); ymm1 = _mm256_fnmadd_pd(ymm3, ymm10, ymm1); ymm0 = _mm256_fnmadd_pd(ymm3, ymm7, ymm0); //(ROW 2): FMA operations ymm1 = _mm256_fnmadd_pd(ymm2, ymm9, ymm1); ymm0 = _mm256_fnmadd_pd(ymm2, ymm6, ymm0); //(Row 1):FMA operations ymm0 = _mm256_fnmadd_pd(ymm1, ymm5, ymm0); _mm256_storeu_pd((double *)b11, ymm0); //store(B11[x][0]) _mm256_storeu_pd((double *)(b11 + cs_b), ymm1); //store(B11[x][1]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0]), ymm2); //store(B11[x][2]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[1]), ymm3); //store(B11[x][3]) } if(n_remainder) //implementation for remainder columns(when n is not a multiple of D_NR) { a01 = L + j*cs_a + (j+D_NR); //pointer to block of A to be used for GEMM a11 = L + j*cs_a + j; //pointwr to block of A to be used for TRSM b10 = B + i + (j+D_NR)*cs_b; //pointer to block of B to be used for GEMM b11 = B + i + j*cs_b; //pointer to block of B to be used for TRSM k_iter = (n-j-D_NR) / D_NR; //number of times GEMM operations to be performed(in blocks of 4x4) ymm16 = _mm256_broadcast_sd((double const *)&AlphaVal); //register to store alpha value ///GEMM for previous blocks /// ///load 4x4 block of b11 if(n_remainder == 3) { ymm0 = _mm256_broadcast_sd((double const *)&ones); //B11[0][3] B11[1][3] B11[2][3] B11[3][3] ymm1 = _mm256_loadu_pd((double const *)b11+ cs_b); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] ymm2 = _mm256_loadu_pd((double const *)(b11 + cs_b * 2)); //B11[0][1] B11[1][1] B11[2][1] B11[3][1] ymm3 = _mm256_loadu_pd((double const *)(b11 + cs_b * 3)); //B11[0][2] B11[1][2] B11[2][2] B11[3][2] } if(n_remainder == 2) { ymm0 = _mm256_broadcast_sd((double const *)&ones); //B11[0][2] B11[1][2] B11[2][2] B11[3][2] ymm1 = _mm256_broadcast_sd((double const *)&ones); //B11[0][3] B11[1][3] B11[2][3] B11[3][3] ymm2 = _mm256_loadu_pd((double const *)(b11 + cs_b * 2)); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] ymm3 = _mm256_loadu_pd((double const *)(b11 + cs_b * 3)); //B11[0][1] B11[1][1] B11[2][1] B11[3][1] } if(n_remainder == 1) { ymm0 = _mm256_broadcast_sd((double const *)&ones); //B11[0][1] B11[1][1] B11[2][1] B11[3][1] ymm1 = _mm256_broadcast_sd((double const *)&ones); //B11[0][2] B11[1][2] B11[2][2] B11[3][2] ymm2 = _mm256_broadcast_sd((double const *)&ones); //B11[0][3] B11[1][3] B11[2][3] B11[3][3] ymm3 = _mm256_loadu_pd((double const *)(b11 + cs_b * 3)); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] } //multiply by alpha ymm0 = _mm256_mul_pd(ymm0, ymm16); //B11[x][0] *= alpha ymm1 = _mm256_mul_pd(ymm1, ymm16); //B11[x][1] *=alpha ymm2 = _mm256_mul_pd(ymm2, ymm16); //B11[x][2] *= alpha ymm3 = _mm256_mul_pd(ymm3, ymm16); //B11[x][3] *= alpha ymm4 = _mm256_setzero_pd(); ymm5 = _mm256_setzero_pd(); ymm6 = _mm256_setzero_pd(); ymm7 = _mm256_setzero_pd(); ///GEMM processing stars/// for(k = 0; k < k_iter; k++) { ptr_a01_dup = a01; //load 4x4 bblock of b10 ymm8 = _mm256_loadu_pd((double const *)b10); //B10[0][0] B10[1][0] B10[2][0] B10[3][0] ymm9 = _mm256_loadu_pd((double const *)(b10 + cs_b)); //B10[0][1] B10[1][1] B10[2][1] B10[3][1] ymm10 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0])); //B10[0][2] B10[1][2] B10[2][2] B10[3][2] ymm11 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[1])); //B10[0][3] B10[1][3] B10[2][3] B10[3][3] //broadcast 1st row of A01 ymm12 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 0)); //A01[0][0] ymm13 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 1)); //A01[0][1] ymm14 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 2)); //A01[0][2] ymm15 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 3)); //A01[0][3] a01 += 1; //move to next row of A ymm4 = _mm256_fmadd_pd(ymm12, ymm8, ymm4); //ymm4 += (B10[0][0]*A01[0][0] B10[1][0]*A01[0][0] B10[2][0]*A01[0][0] B10[3][0]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm13, ymm8, ymm5); //ymm5 += (B10[0][0]*A01[0][1] B10[1][0]*A01[0][1] B10[2][0]*A01[0][1] B10[3][0]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm14, ymm8, ymm6); //ymm6 += (B10[0][0]*A01[0][2] B10[1][0]*A01[0][2] B10[2][0]*A01[0][2] B10[3][0]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm15, ymm8, ymm7); //ymm7 += (B10[0][0]*A01[0][3] B10[1][0]*A01[0][3] B10[2][0]*A01[0][3] B10[3][0]*A01[0][3]) //broadcast 2nd row of A01 ymm12 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 0)); //A01[1][0] ymm13 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 1)); //A01[1][1] ymm14 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 2)); //A01[1][2] ymm15 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 3)); //A01[1][3] a01 += 1; //move to next row of A ymm4 = _mm256_fmadd_pd(ymm12, ymm9, ymm4); //ymm4 += (B10[0][1]*A01[1][0] B10[1][1]*A01[1][0] B10[2][1]*A01[1][0] B10[3][1]*A01[1][0]) ymm5 = _mm256_fmadd_pd(ymm13, ymm9, ymm5); //ymm5 += (B10[0][1]*A01[1][1] B10[1][1]*A01[1][1] B10[2][1]*A01[1][1] B10[3][1]*A01[1][1]) ymm6 = _mm256_fmadd_pd(ymm14, ymm9, ymm6); //ymm6 += (B10[0][1]*A01[1][2] B10[1][1]*A01[1][2] B10[2][1]*A01[1][2] B10[3][1]*A01[1][2]) ymm7 = _mm256_fmadd_pd(ymm15, ymm9, ymm7); //ymm7 += (B10[0][1]*A01[1][3] B10[1][1]*A01[1][3] B10[2][1]*A01[1][3] B10[3][1]*A01[1][3]) //braodcast 3rd row of A01 ymm12 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 0)); //A01[2][0] ymm13 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 1)); //A01[2][1] ymm14 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 2)); //A01[2][2] ymm15 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 3)); //A01[2][3] a01 += 1; //move to next row of A ymm4 = _mm256_fmadd_pd(ymm12, ymm10, ymm4); //ymm4 += (B10[0][2]*A01[2][0] B10[1][2]*A01[2][0] B10[2][2]*A01[2][0] B10[3][2]*A01[2][0]) ymm5 = _mm256_fmadd_pd(ymm13, ymm10, ymm5); //ymm5 += (B10[0][2]*A01[2][1] B10[1][2]*A01[2][1] B10[2][2]*A01[2][1] B10[3][2]*A01[2][1]) ymm6 = _mm256_fmadd_pd(ymm14, ymm10, ymm6); //ymm6 += (B10[0][2]*A01[2][2] B10[1][2]*A01[2][2] B10[2][2]*A01[2][2] B10[3][2]*A01[2][2]) ymm7 = _mm256_fmadd_pd(ymm15, ymm10, ymm7); //ymm7 += (B10[0][2]*A01[2][3] B10[1][2]*A01[2][3] B10[2][2]*A01[2][3] B10[3][2]*A01[2][3]) //broadcast 4th row of A01 ymm12 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 0)); //A01[3][0] ymm13 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 1)); //A01[3][1] ymm14 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 2)); //A01[3][2] ymm15 = _mm256_broadcast_sd((double const *)(a01 + cs_a * 3)); //A01[3][3] a01 += 1; //move to next row of A ymm4 = _mm256_fmadd_pd(ymm12, ymm11, ymm4); //ymm4 += (B10[0][3]*A01[3][0] B10[1][3]*A01[3][0] B10[2][3]*A01[3][0] B10[3][3]*A01[3][0]) ymm5 = _mm256_fmadd_pd(ymm13, ymm11, ymm5); //ymm5 += (B10[0][3]*A01[3][1] B10[1][3]*A01[3][1] B10[2][3]*A01[3][1] B10[3][3]*A01[3][1]) ymm6 = _mm256_fmadd_pd(ymm14, ymm11, ymm6); //ymm6 += (B10[0][3]*A01[3][2] B10[1][3]*A01[3][2] B10[2][3]*A01[3][2] B10[3][3]*A01[3][2]) ymm7 = _mm256_fmadd_pd(ymm15, ymm11, ymm7); //ymm7 += (B10[0][3]*A01[3][3] B10[1][3]*A01[3][3] B10[2][3]*A01[3][3] B10[3][3]*A01[3][3]) b10 += D_NR * cs_b; //pointer math to find next block of B for GEMM a01 = ptr_a01_dup + D_NR; //pointer math to find next block of A for GEMM } ///GEMM code ends/// ymm0 = _mm256_sub_pd(ymm0, ymm4); //B11[x][0] -= ymm4 ymm1 = _mm256_sub_pd(ymm1, ymm5); //B11[x][1] -= ymm5 ymm2 = _mm256_sub_pd(ymm2, ymm6); //B11[x][2] -= ymm6 ymm3 = _mm256_sub_pd(ymm3, ymm7); //B11[x][3] -= ymm7 ///implement TRSM/// ///read 4x4 block of A11/// //1st col ymm4 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][0] ymm5 = _mm256_broadcast_sd((double const *)(a11+1)); //A11[0][0] ymm6 = _mm256_broadcast_sd((double const *)(a11+2)); //A11[0][0] ymm7 = _mm256_broadcast_sd((double const *)(a11+3)); //A11[0][0] //2nd col a11 += cs_a; ymm8 = _mm256_broadcast_sd((double const *)(a11+1)); //A11[0][1] ymm9 = _mm256_broadcast_sd((double const *)(a11+2)); //A11[1][1] ymm10 = _mm256_broadcast_sd((double const *)(a11+3)); //A11[1][1] //3rd col a11 += cs_a; ymm11 = _mm256_broadcast_sd((double const *)(a11+2)); //A11[0][2] ymm12 = _mm256_broadcast_sd((double const *)(a11+3)); //A11[1][2] //4th col a11 += cs_a; ymm13 = _mm256_broadcast_sd((double const *)(a11+3)); //A11[0][3] ymm14 = _mm256_broadcast_sd((double const *)&ones); //(Row 3): FMA operations ymm2 = _mm256_fnmadd_pd(ymm3, ymm12, ymm2); ymm1 = _mm256_fnmadd_pd(ymm3, ymm10, ymm1); ymm0 = _mm256_fnmadd_pd(ymm3, ymm7, ymm0); //(ROW 2): FMA operations ymm1 = _mm256_fnmadd_pd(ymm2, ymm9, ymm1); ymm0 = _mm256_fnmadd_pd(ymm2, ymm6, ymm0); //(Row 1):FMA operations ymm0 = _mm256_fnmadd_pd(ymm1, ymm5, ymm0); if(n_remainder == 3) { _mm256_storeu_pd((double *)(b11 + cs_b), ymm1); //store(B11[x][1]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0]), ymm2); //(store(B11[x][2])) _mm256_storeu_pd((double *)(b11 + cs_b*3), ymm3); //store(B11[x][0]) } if(n_remainder == 2) { _mm256_storeu_pd((double *)(b11+ cs_b * 2), ymm2); //store(B11[x][0]) _mm256_storeu_pd((double *)(b11 + cs_b * 3), ymm3); //store(B11[x][1]) } if(n_remainder == 1) { _mm256_storeu_pd((double *)(b11 + cs_b * 3), ymm3); //store(B11[x][0]) } } m_remainder -= 4; i -= 4; } if(m_remainder) { dtrsm_small_XAlB_unitDiag(L, B, AlphaVal, m_remainder, n, cs_a, cs_b); } return BLIS_SUCCESS; } /*implements TRSM for the case XA = alpha * B *A is lower triangular, non-unit diagonal, no transpose *dimensions: X:mxn A:nxn B: mxn */ /* <---b11 <---a11 ***************** * *b01*b11* * * * * ^ * * * * * ^ * * | ***************** | ******* | * * * * * | * * * | * * * * * a01* * * b10 ***************** ************* * * * * * * * * * * * * * * * * * * ***************** ******************* */ static err_t bli_dtrsm_small_XAutB( side_t side, obj_t* AlphaObj, obj_t* a, obj_t* b, cntx_t* cntx, cntl_t* cntl ) { dim_t D_MR = 8; //block dimension along the rows dim_t D_NR = 4; //block dimension along the columns dim_t m = bli_obj_length(b); //number of rows dim_t n = bli_obj_width(b); //number of columns dim_t m_remainder = m % D_MR; //number of corner rows dim_t n_remainder = n % D_NR; //number of corner columns dim_t cs_a = bli_obj_col_stride(a); //column stride of matrix A dim_t cs_b = bli_obj_col_stride(b); //column stride of matrix B #ifdef BLIS_ENABLE_SMALL_MATRIX_ROME if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_ROME) { return BLIS_NOT_YET_IMPLEMENTED; } #else if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_NAPLES) { return BLIS_NOT_YET_IMPLEMENTED; } #endif dim_t i, j, k; //loop variablse dim_t k_iter; //determines the number of GEMM operations to be done dim_t cs_b_offset[2]; //pre-calculated strides double ones = 1.0; double AlphaVal = *(double *)AlphaObj->buffer; //value of Alpha double *L = a->buffer; //pointer to matrix A double *B = b->buffer; //pointer to matrix B double *a01, *a11, *b10, *b11; //pointers for GEMM and TRSM blocks double *ptr_a01_dup; cs_b_offset[0] = cs_b << 1; //cs_b_offset[0] = cs_b * 2; cs_b_offset[1] = cs_b_offset[0] + cs_b;//cs_b_offset[1] = cs_b * 3; //ymm scratch reginsters __m256d ymm0, ymm1, ymm2, ymm3; __m256d ymm4, ymm5, ymm6, ymm7; __m256d ymm8, ymm9, ymm10, ymm11; __m256d ymm12, ymm13, ymm14, ymm15; __m256d ymm16; for(i = (m-D_MR); (i+1) > 0; i -= D_MR) //loop along 'M' direction { for(j = (n-D_NR); (j+1) > 0; j -= D_NR) //loop along 'N' direction { a01 = L + (j+D_NR)*cs_a +(j); //pointer to block of A to be used in GEMM a11 = L + j*cs_a + j; //pointer to block of A to be used for TRSM b10 = B + i + (j+D_NR)*cs_b; //pointer to block of B to be used in GEMM b11 = B + (i) + (j)*cs_b; //pointer to block of B to be used for TRSM k_iter = (n-j-D_NR) / D_NR; //number of GEMM operations to be done(in blocks of 4x4) ymm0 = _mm256_setzero_pd(); ymm1 = _mm256_setzero_pd(); ymm2 = _mm256_setzero_pd(); ymm3 = _mm256_setzero_pd(); ymm4 = _mm256_setzero_pd(); ymm5 = _mm256_setzero_pd(); ymm6 = _mm256_setzero_pd(); ymm7 = _mm256_setzero_pd(); ///GEMM implementation starts/// for(k = 0; k < k_iter; k++) //loop for number of GEMM operations { ptr_a01_dup = a01; //broadcast 1st row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + 0)); //A01[0][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + 1)); //A01[0][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + 2)); //A01[0][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + 3)); //A01[0][3] a01 += cs_a; //move to next row //load 8x2 block of B10 ymm12 = _mm256_loadu_pd((double const *)b10); //B10[0][0] B10[1][0] B10[2][0] B10[3][0] ymm13 = _mm256_loadu_pd((double const *)(b10 + D_NR)); //B10[4][0] B10[5][0] B10[6][0] B10[7][0] ymm14 = _mm256_loadu_pd((double const *)(b10 + cs_b)); //B10[0][1] B10[1][1] B10[2][1] B10[3][1] ymm15 = _mm256_loadu_pd((double const *)(b10 + cs_b + D_NR)); //B10[4][1] B10[5][1] B10[6][1] B10[7][1] ymm0 = _mm256_fmadd_pd(ymm8, ymm12, ymm0); //ymm0 += (B10[0][0]*A01[0][0] B10[1][0]*A01[0][0] B10[2][0]*A01[0][0] B10[3][0]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm12, ymm1); //ymm1 += (B10[0][0]*A01[0][1] B10[1][0]*A01[0][1] B10[2][0]*A01[0][1] B10[3][0]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm12, ymm2); //ymm2 += (B10[0][0]*A01[0][2] B10[1][0]*A01[0][2] B10[2][0]*A01[0][2] B10[3][0]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm12, ymm3); //ymm3 += (B10[0][0]*A01[0][3] B10[1][0]*A01[0][3] B10[2][0]*A01[0][3] B10[3][0]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm13, ymm4); //ymm4 += (B10[4][0]*A01[0][0] B10[5][0]*A01[0][0] B10[6][0]*A01[0][0] B10[7][0]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm13, ymm5); //ymm5 += (B10[4][0]*A01[0][1] B10[5][0]*A01[0][1] B10[6][0]*A01[0][1] B10[7][0]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm13, ymm6); //ymm6 += (B10[4][0]*A01[0][2] B10[5][0]*A01[0][2] B10[6][0]*A01[0][2] B10[7][0]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm13, ymm7); //ymm7 += (B10[4][0]*A01[0][3] B10[5][0]*A01[0][3] B10[6][0]*A01[0][3] B10[7][0]*A01[0][3]) //broadcast 2nd row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + 0)); //A01[1][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + 1)); //A01[1][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + 2)); //A01[1][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + 3)); //A01[1][3] a01 += cs_a; //move to next row of A ymm0 = _mm256_fmadd_pd(ymm8, ymm14, ymm0); //ymm0 += (B10[0][1]*A01[0][0] B10[1][1]*A01[0][0] B10[2][1]*A01[0][0] B10[3][1]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm14, ymm1); //ymm1 += (B10[0][1]*A01[0][1] B10[1][1]*A01[0][1] B10[2][1]*A01[0][1] B10[3][1]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm14, ymm2); //ymm2 += (B10[0][1]*A01[0][2] B10[1][1]*A01[0][2] B10[2][1]*A01[0][2] B10[3][1]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm14, ymm3); //ymm3 += (B10[0][1]*A01[0][3] B10[1][1]*A01[0][3] B10[2][1]*A01[0][3] B10[3][1]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm15, ymm4); //ymm4 += (B10[4][1]*A01[0][0] B10[5][1]*A01[0][0] B10[6][1]*A01[0][0] B10[7][1]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm15, ymm5); //ymm5 += (B10[4][1]*A01[0][1] B10[5][1]*A01[0][1] B10[6][1]*A01[0][1] B10[7][1]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm15, ymm6); //ymm6 += (B10[4][1]*A01[0][2] B10[5][1]*A01[0][2] B10[6][1]*A01[0][2] B10[7][1]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm15, ymm7); //ymm7 += (B10[4][1]*A01[0][3] B10[5][1]*A01[0][3] B10[6][1]*A01[0][3] B10[7][1]*A01[0][3]) //broadcast 3rd row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + 0)); //A01[2][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + 1)); //A01[2][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + 2)); //A01[2][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + 3)); //A01[2][3] a01 += cs_a; //move to next row of A01 //load next 8x2 block of B10 ymm12 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0])); //(B10[0][2] B10[1][2] B10[2][2] B10[3][2]) ymm13 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0] + D_NR)); //(B10[4][2] B10[5][2] B10[6][2] B10[7][2]) ymm14 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0] + cs_b)); //(B10[0][3] B10[1][3] B10[2][3] B10[3][3]) ymm15 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0] + cs_b + D_NR)); //(B10[4][3] B10[5][3] B10[6][3] B10[7][3]) ymm0 = _mm256_fmadd_pd(ymm8, ymm12, ymm0); //ymm0 += (B10[0][2]*A01[0][0] B10[1][2]*A01[0][0] B10[2][2]*A01[0][0] B10[3][2]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm12, ymm1); //ymm1 += (B10[0][2]*A01[0][1] B10[1][2]*A01[0][1] B10[2][2]*A01[0][1] B10[3][2]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm12, ymm2); //ymm2 += (B10[0][2]*A01[0][2] B10[1][2]*A01[0][2] B10[2][2]*A01[0][2] B10[3][2]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm12, ymm3); //ymm3 += (B10[0][2]*A01[0][3] B10[1][2]*A01[0][3] B10[2][2]*A01[0][3] B10[3][2]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm13, ymm4); //ymm4 += (B10[4][2]*A01[0][0] B10[5][2]*A01[0][0] B10[6][2]*A01[0][0] B10[7][2]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm13, ymm5); //ymm5 += (B10[4][2]*A01[0][1] B10[5][2]*A01[0][1] B10[6][2]*A01[0][1] B10[7][2]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm13, ymm6); //ymm6 += (B10[4][2]*A01[0][2] B10[5][2]*A01[0][2] B10[6][2]*A01[0][2] B10[7][2]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm13, ymm7); //ymm7 += (B10[4][2]*A01[0][3] B10[5][2]*A01[0][3] B10[6][2]*A01[0][3] B10[7][2]*A01[0][3]) //broadcast 4th row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + 0)); //A01[3][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + 1)); //A01[3][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + 2)); //A01[3][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + 3)); //A01[3][3] a01 += cs_a; //move to next row of A01 ymm0 = _mm256_fmadd_pd(ymm8, ymm14, ymm0); //ymm0 += (B10[0][3]*A01[0][0] B10[1][3]*A01[0][0] B10[2][3]*A01[0][0] B10[3][3]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm14, ymm1); //ymm1 += (B10[0][3]*A01[0][1] B10[1][3]*A01[0][1] B10[2][3]*A01[0][1] B10[3][3]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm14, ymm2); //ymm2 += (B10[0][3]*A01[0][2] B10[1][3]*A01[0][2] B10[2][3]*A01[0][2] B10[3][3]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm14, ymm3); //ymm3 += (B10[0][3]*A01[0][3] B10[1][3]*A01[0][3] B10[2][3]*A01[0][3] B10[3][3]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm15, ymm4); //ymm4 += (B10[4][3]*A01[0][0] B10[5][3]*A01[0][0] B10[6][3]*A01[0][0] B10[7][3]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm15, ymm5); //ymm5 += (B10[4][3]*A01[0][1] B10[5][3]*A01[0][1] B10[6][3]*A01[0][1] B10[7][3]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm15, ymm6); //ymm6 += (B10[4][3]*A01[0][2] B10[5][3]*A01[0][2] B10[6][3]*A01[0][2] B10[7][3]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm15, ymm7); //ymm7 += (B10[4][3]*A01[0][3] B10[5][3]*A01[0][3] B10[6][3]*A01[0][3] B10[7][3]*A01[0][3]) b10 += D_NR * cs_b; //pointer math to find next block of B for GEMM a01 = ptr_a01_dup + (D_NR * cs_a); //pointer math to find next block of A for GEMM } ///GEMM code ends/// ymm16 = _mm256_broadcast_sd((double const *)&AlphaVal); //load 8x4 block of B11 ymm8 = _mm256_loadu_pd((double const *)b11); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] ymm12 = _mm256_loadu_pd((double const *)(b11 + D_NR)); //B11[4][0] B11[5][0] B11[6][0] B11[7][0] ymm9 = _mm256_loadu_pd((double const *)(b11 + cs_b)); //B11[0][1] B11[1][1] B11[2][1] B11[3][1] ymm13 = _mm256_loadu_pd((double const *)(b11 + cs_b + D_NR)); //B11[4][1] B11[5][1] B11[6][1] B11[7][1] ymm10 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[0])); //B11[0][2] B11[1][2] B11[2][2] B11[3][2] ymm14 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[0] + D_NR)); //B11[4][2] B11[5][2] B11[6][2] B11[7][2] ymm11 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[1])); //B11[0][3] B11[1][3] B11[2][3] B11[3][3] ymm15 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[1] + D_NR)); //B11[4][3] B11[5][3] B11[6][3] B11[7][3] ymm8 = _mm256_fmsub_pd(ymm8, ymm16, ymm0); //B11[0-3][0] * alpha -= ymm0 ymm9 = _mm256_fmsub_pd(ymm9, ymm16, ymm1); //B11[4-7][0] * alpha-= ymm1 ymm10 = _mm256_fmsub_pd(ymm10, ymm16, ymm2); //B11[0-3][1] * alpha-= ymm2 ymm11 = _mm256_fmsub_pd(ymm11, ymm16, ymm3); //B11[4-7][1] * alpha -= ymm3 ymm12 = _mm256_fmsub_pd(ymm12, ymm16, ymm4); //B11[0-3][2] * alpha -= ymm4 ymm13 = _mm256_fmsub_pd(ymm13, ymm16, ymm5); //B11[4-7][2] * alpha -= ymm5 ymm14 = _mm256_fmsub_pd(ymm14, ymm16, ymm6); //B11[0-3][3] * alpha -= ymm6 ymm15 = _mm256_fmsub_pd(ymm15, ymm16, ymm7); //B11[4-7][3] * alpha -= ymm7 ///implement TRSM/// ///read 4x4 block of A11/// //1st col ymm0 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][0] a11 += cs_a; //2nd col ymm1 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][1] ymm2 = _mm256_broadcast_sd((double const *)(a11+1)); //A11[0][1] a11 += cs_a; //3rd col ymm3 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][1] ymm4 = _mm256_broadcast_sd((double const *)(a11+1)); //A11[0][1] ymm5 = _mm256_broadcast_sd((double const *)(a11+2)); //A11[0][1] a11 += cs_a; //4th col ymm6 = _mm256_broadcast_sd((double const *)(a11+3)); //A11[0][1] ymm7 = _mm256_broadcast_sd((double const *)&ones); //compute reciprocals of A(i,i) and broadcast in registers ymm0 = _mm256_unpacklo_pd(ymm0, ymm2); //A11[0][0] A11[1][1] A11[0][0] A11[1][1] ymm2 = _mm256_unpacklo_pd(ymm5, ymm6); //A11[2][2] A11[3][3] A11[2][2] A11[3][3] ymm0 = _mm256_blend_pd(ymm0, ymm2, 0x0C); //A11[0][0] A11[1][1] A11[2][2] A11[3][3] ymm0 = _mm256_div_pd(ymm7, ymm0); // 1/A11[0][0] 1/A11[1][1] 1/A11[2][2] 1/A11[3][3] ymm2 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][1] ymm5 = _mm256_broadcast_sd((double const *)(a11+1)); //A11[0][1] ymm6 = _mm256_broadcast_sd((double const *)(a11+2)); //A11[0][1] //extract a33 ymm7 = _mm256_permute_pd(ymm0, 0x0C); //(1/A11[0][0] 1/A11[0][0] 1/A11[3][3] 1/A11[3][3]) ymm7 = _mm256_permute2f128_pd(ymm7, ymm7, 0x11); //(1/A11[3][3] 1/A11[3][3] 1/A11[3][3] 1/A11[3][3]) ymm11 = _mm256_mul_pd(ymm11, ymm7); ymm15 = _mm256_mul_pd(ymm15, ymm7); //extract a22 ymm7 = _mm256_permute_pd(ymm0, 0x00); //(1/A11[0][0] 1/A11[0][0] 1/A11[2][2] 1/A11[2][2]) ymm7 = _mm256_permute2f128_pd(ymm7, ymm7, 0x11); //(1/A11[2][2] 1/A11[2][2] 1/A11[2][2] 1/A11[2][2]) //(Row 3): FMA operations ymm10 = _mm256_fnmadd_pd(ymm11, ymm6, ymm10); ymm9 = _mm256_fnmadd_pd(ymm11, ymm5, ymm9); ymm8 = _mm256_fnmadd_pd(ymm11, ymm2, ymm8); //(Row 3): FMA operations ymm14 = _mm256_fnmadd_pd(ymm15, ymm6, ymm14); ymm13 = _mm256_fnmadd_pd(ymm15, ymm5, ymm13); ymm12 = _mm256_fnmadd_pd(ymm15, ymm2, ymm12); ymm10 = _mm256_mul_pd(ymm10, ymm7); ymm14 = _mm256_mul_pd(ymm14, ymm7); //extract a11 ymm7 = _mm256_permute_pd(ymm0, 0x03); //(1/A11[1][1] 1/A11[1][1] 1/A11[2][2] 1/A11[2][2]) ymm7 = _mm256_permute2f128_pd(ymm7, ymm7, 0x00); //(1/A11[1][1] 1/A11[1][1] 1/A11[1][1] 1/A11[1][1]) //(ROW 2): FMA operations ymm9 = _mm256_fnmadd_pd(ymm10, ymm4, ymm9); ymm8 = _mm256_fnmadd_pd(ymm10, ymm3, ymm8); ymm13 = _mm256_fnmadd_pd(ymm14, ymm4, ymm13); ymm12 = _mm256_fnmadd_pd(ymm14, ymm3, ymm12); ymm9 = _mm256_mul_pd(ymm9, ymm7); ymm13 = _mm256_mul_pd(ymm13, ymm7); //extract A00 ymm7 = _mm256_permute_pd(ymm0, 0x00); //(1/A11[0][0] 1/A11[0][0] 1/A11[2][2] 1/A11[2][2]) ymm7 = _mm256_permute2f128_pd(ymm7, ymm7, 0x00); //(1/A11[0][0] 1/A11[0][0] 1/A11[0][0] 1/A11[0][0]) //(Row 1):FMA operations ymm8 = _mm256_fnmadd_pd(ymm9, ymm1, ymm8); ymm12 = _mm256_fnmadd_pd(ymm13, ymm1, ymm12); ymm8 = _mm256_mul_pd(ymm8, ymm7); ymm12 = _mm256_mul_pd(ymm12, ymm7); _mm256_storeu_pd((double *)b11, ymm8); //store(B11[x][0]) _mm256_storeu_pd((double *)(b11 + D_NR), ymm12); //store(B11[x][0]) _mm256_storeu_pd((double *)(b11 + cs_b), ymm9); //store(B11[x][1]) _mm256_storeu_pd((double *)(b11 + cs_b + D_NR), ymm13); //store(B11[x][1]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0]), ymm10); //(store(B11[x][2])) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0] + D_NR), ymm14); //(store(B11[x][2])) _mm256_storeu_pd((double *)(b11 + cs_b_offset[1]), ymm11); //store(B11[x][3]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[1] + D_NR), ymm15); //store(B11[x][3]) } if(n_remainder) //implementation for remainder columns(when n is not multiple of D_NR) { a01 = L + (j+D_NR)*cs_a +(j); //pointer to block of A to be used in GEMM a11 = L + j*cs_a + j; //pointer to block of A to be used for TRSM b10 = B + i + (j+D_NR)*cs_b; //pointer to block of B to be used in GEMM b11 = B + (i) + (j)*cs_b; //pointer to block of B to be used for TRSM k_iter = (n-j-D_NR) / D_NR; //number of GEMM operations to be done(in blocks of 4x4) ymm0 = _mm256_setzero_pd(); ymm1 = _mm256_setzero_pd(); ymm2 = _mm256_setzero_pd(); ymm3 = _mm256_setzero_pd(); ymm4 = _mm256_setzero_pd(); ymm5 = _mm256_setzero_pd(); ymm6 = _mm256_setzero_pd(); ymm7 = _mm256_setzero_pd(); ///GEMM implementation starts/// for(k = 0; k < k_iter; k++) //loop for number of GEMM operations { ptr_a01_dup = a01; //broadcast 1st row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + 0)); //A01[0][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + 1)); //A01[0][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + 2)); //A01[0][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + 3)); //A01[0][3] a01 += cs_a; //move to next row //load 8x2 block of B10 ymm12 = _mm256_loadu_pd((double const *)b10); //B10[0][0] B10[1][0] B10[2][0] B10[3][0] ymm13 = _mm256_loadu_pd((double const *)(b10 + D_NR)); //B10[4][0] B10[5][0] B10[6][0] B10[7][0] ymm14 = _mm256_loadu_pd((double const *)(b10 + cs_b)); //B10[0][1] B10[1][1] B10[2][1] B10[3][1] ymm15 = _mm256_loadu_pd((double const *)(b10 + cs_b + D_NR)); //B10[4][1] B10[5][1] B10[6][1] B10[7][1] ymm0 = _mm256_fmadd_pd(ymm8, ymm12, ymm0); //ymm0 += (B10[0][0]*A01[0][0] B10[1][0]*A01[0][0] B10[2][0]*A01[0][0] B10[3][0]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm12, ymm1); //ymm1 += (B10[0][0]*A01[0][1] B10[1][0]*A01[0][1] B10[2][0]*A01[0][1] B10[3][0]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm12, ymm2); //ymm2 += (B10[0][0]*A01[0][2] B10[1][0]*A01[0][2] B10[2][0]*A01[0][2] B10[3][0]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm12, ymm3); //ymm3 += (B10[0][0]*A01[0][3] B10[1][0]*A01[0][3] B10[2][0]*A01[0][3] B10[3][0]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm13, ymm4); //ymm4 += (B10[4][0]*A01[0][0] B10[5][0]*A01[0][0] B10[6][0]*A01[0][0] B10[7][0]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm13, ymm5); //ymm5 += (B10[4][0]*A01[0][1] B10[5][0]*A01[0][1] B10[6][0]*A01[0][1] B10[7][0]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm13, ymm6); //ymm6 += (B10[4][0]*A01[0][2] B10[5][0]*A01[0][2] B10[6][0]*A01[0][2] B10[7][0]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm13, ymm7); //ymm7 += (B10[4][0]*A01[0][3] B10[5][0]*A01[0][3] B10[6][0]*A01[0][3] B10[7][0]*A01[0][3]) //broadcast 2nd row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + 0)); //A01[1][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + 1)); //A01[1][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + 2)); //A01[1][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + 3)); //A01[1][3] a01 += cs_a; //move to next row of A ymm0 = _mm256_fmadd_pd(ymm8, ymm14, ymm0); //ymm0 += (B10[0][1]*A01[0][0] B10[1][1]*A01[0][0] B10[2][1]*A01[0][0] B10[3][1]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm14, ymm1); //ymm1 += (B10[0][1]*A01[0][1] B10[1][1]*A01[0][1] B10[2][1]*A01[0][1] B10[3][1]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm14, ymm2); //ymm2 += (B10[0][1]*A01[0][2] B10[1][1]*A01[0][2] B10[2][1]*A01[0][2] B10[3][1]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm14, ymm3); //ymm3 += (B10[0][1]*A01[0][3] B10[1][1]*A01[0][3] B10[2][1]*A01[0][3] B10[3][1]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm15, ymm4); //ymm4 += (B10[4][1]*A01[0][0] B10[5][1]*A01[0][0] B10[6][1]*A01[0][0] B10[7][1]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm15, ymm5); //ymm5 += (B10[4][1]*A01[0][1] B10[5][1]*A01[0][1] B10[6][1]*A01[0][1] B10[7][1]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm15, ymm6); //ymm6 += (B10[4][1]*A01[0][2] B10[5][1]*A01[0][2] B10[6][1]*A01[0][2] B10[7][1]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm15, ymm7); //ymm7 += (B10[4][1]*A01[0][3] B10[5][1]*A01[0][3] B10[6][1]*A01[0][3] B10[7][1]*A01[0][3]) //broadcast 3rd row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + 0)); //A01[2][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + 1)); //A01[2][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + 2)); //A01[2][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + 3)); //A01[2][3] a01 += cs_a; //move to next row of A01 //load next 8x2 block of B10 ymm12 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0])); //(B10[0][2] B10[1][2] B10[2][2] B10[3][2]) ymm13 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0] + D_NR)); //(B10[4][2] B10[5][2] B10[6][2] B10[7][2]) ymm14 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0] + cs_b)); //(B10[0][3] B10[1][3] B10[2][3] B10[3][3]) ymm15 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0] + cs_b + D_NR)); //(B10[4][3] B10[5][3] B10[6][3] B10[7][3]) ymm0 = _mm256_fmadd_pd(ymm8, ymm12, ymm0); //ymm0 += (B10[0][2]*A01[0][0] B10[1][2]*A01[0][0] B10[2][2]*A01[0][0] B10[3][2]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm12, ymm1); //ymm1 += (B10[0][2]*A01[0][1] B10[1][2]*A01[0][1] B10[2][2]*A01[0][1] B10[3][2]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm12, ymm2); //ymm2 += (B10[0][2]*A01[0][2] B10[1][2]*A01[0][2] B10[2][2]*A01[0][2] B10[3][2]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm12, ymm3); //ymm3 += (B10[0][2]*A01[0][3] B10[1][2]*A01[0][3] B10[2][2]*A01[0][3] B10[3][2]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm13, ymm4); //ymm4 += (B10[4][2]*A01[0][0] B10[5][2]*A01[0][0] B10[6][2]*A01[0][0] B10[7][2]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm13, ymm5); //ymm5 += (B10[4][2]*A01[0][1] B10[5][2]*A01[0][1] B10[6][2]*A01[0][1] B10[7][2]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm13, ymm6); //ymm6 += (B10[4][2]*A01[0][2] B10[5][2]*A01[0][2] B10[6][2]*A01[0][2] B10[7][2]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm13, ymm7); //ymm7 += (B10[4][2]*A01[0][3] B10[5][2]*A01[0][3] B10[6][2]*A01[0][3] B10[7][2]*A01[0][3]) //broadcast 4th row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + 0)); //A01[3][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + 1)); //A01[3][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + 2)); //A01[3][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + 3)); //A01[3][3] a01 += cs_a; //move to next row of A01 ymm0 = _mm256_fmadd_pd(ymm8, ymm14, ymm0); //ymm0 += (B10[0][3]*A01[0][0] B10[1][3]*A01[0][0] B10[2][3]*A01[0][0] B10[3][3]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm14, ymm1); //ymm1 += (B10[0][3]*A01[0][1] B10[1][3]*A01[0][1] B10[2][3]*A01[0][1] B10[3][3]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm14, ymm2); //ymm2 += (B10[0][3]*A01[0][2] B10[1][3]*A01[0][2] B10[2][3]*A01[0][2] B10[3][3]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm14, ymm3); //ymm3 += (B10[0][3]*A01[0][3] B10[1][3]*A01[0][3] B10[2][3]*A01[0][3] B10[3][3]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm15, ymm4); //ymm4 += (B10[4][3]*A01[0][0] B10[5][3]*A01[0][0] B10[6][3]*A01[0][0] B10[7][3]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm15, ymm5); //ymm5 += (B10[4][3]*A01[0][1] B10[5][3]*A01[0][1] B10[6][3]*A01[0][1] B10[7][3]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm15, ymm6); //ymm6 += (B10[4][3]*A01[0][2] B10[5][3]*A01[0][2] B10[6][3]*A01[0][2] B10[7][3]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm15, ymm7); //ymm7 += (B10[4][3]*A01[0][3] B10[5][3]*A01[0][3] B10[6][3]*A01[0][3] B10[7][3]*A01[0][3]) b10 += D_NR * cs_b; //pointer math to find next block of B for GEMM a01 = ptr_a01_dup + (D_NR * cs_a); //pointer math to find next block of A for GEMM } ///GEMM code ends/// ymm16 = _mm256_broadcast_sd((double const *)&AlphaVal); //load 8x4 block of B11 if(n_remainder == 3) { ymm8 = _mm256_broadcast_sd((double const *)&ones); //B11[0-3][3] ymm12 = _mm256_broadcast_sd((double const *)&ones); //B11[4-7][3] ymm9 = _mm256_loadu_pd((double const *)(b11+cs_b)); //B11[0-3][0] ymm13 = _mm256_loadu_pd((double const *)(b11 + cs_b + D_NR)); //B11[4-7][0] ymm10 = _mm256_loadu_pd((double const *)(b11 + cs_b*2)); //B11[0-3][1] ymm14 = _mm256_loadu_pd((double const *)(b11 + cs_b*2 + D_NR)); //B11[4-7][1] ymm11 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[1])); //B11[0-3][2] ymm15 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[1] + D_NR)); //B11[4-7][2] } if(n_remainder == 2) { ymm8 = _mm256_broadcast_sd((double const *)&ones); //B11[0-3][2] ymm12 = _mm256_broadcast_sd((double const *)&ones); //B11[4-7][2] ymm9 = _mm256_broadcast_sd((double const *)&ones); //B11[0-3][3] ymm13 = _mm256_broadcast_sd((double const *)&ones); //B11[4-7][3] ymm10 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[0])); //B11[0-3][0] ymm14 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[0] + D_NR)); //B11[4-7][0] ymm11 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[1])); //B11[0-3][1] ymm15 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[1] + D_NR)); //B11[4-7][1] } if(n_remainder == 1) { ymm8 = _mm256_broadcast_sd((double const *)&ones); //B11[0-3][1] ymm12 = _mm256_broadcast_sd((double const *)&ones); //B11[4-7][1] ymm9 = _mm256_broadcast_sd((double const *)&ones); //B11[0-3][2] ymm13 = _mm256_broadcast_sd((double const *)&ones); //B11[4-7][2] ymm10 = _mm256_broadcast_sd((double const *)&ones); //B11[0-3][3] ymm14 = _mm256_broadcast_sd((double const *)&ones); //B11[4-7][3] ymm11 = _mm256_loadu_pd((double const *)(b11+cs_b_offset[1])); //B11[0-3][0] ymm15 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[1] +D_NR)); //B11[4-7][0] } ymm8 = _mm256_fmsub_pd(ymm8, ymm16, ymm0); //B11[0-3][0] * alpha -= ymm0 ymm9 = _mm256_fmsub_pd(ymm9, ymm16, ymm1); //B11[4-7][0] * alpha-= ymm1 ymm10 = _mm256_fmsub_pd(ymm10, ymm16, ymm2); //B11[0-3][1] * alpha-= ymm2 ymm11 = _mm256_fmsub_pd(ymm11, ymm16, ymm3); //B11[4-7][1] * alpha -= ymm3 ymm12 = _mm256_fmsub_pd(ymm12, ymm16, ymm4); //B11[0-3][2] * alpha -= ymm4 ymm13 = _mm256_fmsub_pd(ymm13, ymm16, ymm5); //B11[4-7][2] * alpha -= ymm5 ymm14 = _mm256_fmsub_pd(ymm14, ymm16, ymm6); //B11[0-3][3] * alpha -= ymm6 ymm15 = _mm256_fmsub_pd(ymm15, ymm16, ymm7); //B11[4-7][3] * alpha -= ymm7 ///implement TRSM/// ///read 4x4 block of A11/// //1st col ymm0 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][0] a11 += cs_a; //2nd col ymm1 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][1] ymm2 = _mm256_broadcast_sd((double const *)(a11+1)); //A11[0][1] a11 += cs_a; //3rd col ymm3 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][1] ymm4 = _mm256_broadcast_sd((double const *)(a11+1)); //A11[0][1] ymm5 = _mm256_broadcast_sd((double const *)(a11+2)); //A11[0][1] a11 += cs_a; //4th col ymm6 = _mm256_broadcast_sd((double const *)(a11+3)); //A11[0][1] ymm7 = _mm256_broadcast_sd((double const *)&ones); //compute reciprocals of A(i,i) and broadcast in registers ymm0 = _mm256_unpacklo_pd(ymm0, ymm2); //A11[0][0] A11[1][1] A11[0][0] A11[1][1] ymm2 = _mm256_unpacklo_pd(ymm5, ymm6); //A11[2][2] A11[3][3] A11[2][2] A11[3][3] ymm0 = _mm256_blend_pd(ymm0, ymm2, 0x0C); //A11[0][0] A11[1][1] A11[2][2] A11[3][3] ymm0 = _mm256_div_pd(ymm7, ymm0); // 1/A11[0][0] 1/A11[1][1] 1/A11[2][2] 1/A11[3][3] ymm2 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][1] ymm5 = _mm256_broadcast_sd((double const *)(a11+1)); //A11[0][1] ymm6 = _mm256_broadcast_sd((double const *)(a11+2)); //A11[0][1] //extract a33 ymm7 = _mm256_permute_pd(ymm0, 0x0C); //(1/A11[0][0] 1/A11[0][0] 1/A11[3][3] 1/A11[3][3]) ymm7 = _mm256_permute2f128_pd(ymm7, ymm7, 0x11); //(1/A11[3][3] 1/A11[3][3] 1/A11[3][3] 1/A11[3][3]) ymm11 = _mm256_mul_pd(ymm11, ymm7); ymm15 = _mm256_mul_pd(ymm15, ymm7); //extract a22 ymm7 = _mm256_permute_pd(ymm0, 0x00); //(1/A11[0][0] 1/A11[0][0] 1/A11[2][2] 1/A11[2][2]) ymm7 = _mm256_permute2f128_pd(ymm7, ymm7, 0x11); //(1/A11[2][2] 1/A11[2][2] 1/A11[2][2] 1/A11[2][2]) //(Row 3): FMA operations ymm10 = _mm256_fnmadd_pd(ymm11, ymm6, ymm10); ymm9 = _mm256_fnmadd_pd(ymm11, ymm5, ymm9); ymm8 = _mm256_fnmadd_pd(ymm11, ymm2, ymm8); //(Row 3): FMA operations ymm14 = _mm256_fnmadd_pd(ymm15, ymm6, ymm14); ymm13 = _mm256_fnmadd_pd(ymm15, ymm5, ymm13); ymm12 = _mm256_fnmadd_pd(ymm15, ymm2, ymm12); ymm10 = _mm256_mul_pd(ymm10, ymm7); ymm14 = _mm256_mul_pd(ymm14, ymm7); //extract a11 ymm7 = _mm256_permute_pd(ymm0, 0x03); //(1/A11[1][1] 1/A11[1][1] 1/A11[2][2] 1/A11[2][2]) ymm7 = _mm256_permute2f128_pd(ymm7, ymm7, 0x00); //(1/A11[1][1] 1/A11[1][1] 1/A11[1][1] 1/A11[1][1]) //(ROW 2): FMA operations ymm9 = _mm256_fnmadd_pd(ymm10, ymm4, ymm9); ymm8 = _mm256_fnmadd_pd(ymm10, ymm3, ymm8); ymm13 = _mm256_fnmadd_pd(ymm14, ymm4, ymm13); ymm12 = _mm256_fnmadd_pd(ymm14, ymm3, ymm12); ymm9 = _mm256_mul_pd(ymm9, ymm7); ymm13 = _mm256_mul_pd(ymm13, ymm7); //extract A00 ymm7 = _mm256_permute_pd(ymm0, 0x00); //(1/A11[0][0] 1/A11[0][0] 1/A11[2][2] 1/A11[2][2]) ymm7 = _mm256_permute2f128_pd(ymm7, ymm7, 0x00); //(1/A11[0][0] 1/A11[0][0] 1/A11[0][0] 1/A11[0][0]) //(Row 1):FMA operations ymm8 = _mm256_fnmadd_pd(ymm9, ymm1, ymm8); ymm12 = _mm256_fnmadd_pd(ymm13, ymm1, ymm12); ymm8 = _mm256_mul_pd(ymm8, ymm7); ymm12 = _mm256_mul_pd(ymm12, ymm7); if(n_remainder == 3) { _mm256_storeu_pd((double *)(b11 + cs_b), ymm9); //store(B11[0-3][1]) _mm256_storeu_pd((double *)(b11 + cs_b + D_NR), ymm13); //store(B11[4-7][1]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0]), ymm10); //store(B11[0-3][2]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0] + D_NR), ymm14);//store(B11[4-7][2]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[1]), ymm11); //store(B11[0-3][0]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[1] + D_NR), ymm15); //store(B11[4-7][0]) } if(n_remainder == 2) { _mm256_storeu_pd((double *)(b11 + cs_b_offset[0]), ymm10); //store(B11[0-3][1]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0] + D_NR), ymm14); //store(B11[4-7][1]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[1]), ymm11); //store(B11[0-3][0]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[1] + D_NR), ymm15); //store(B11[4-7][0]) } if(n_remainder == 1) { _mm256_storeu_pd((double *)(b11+ cs_b_offset[1]), ymm11); //store(B11[0-3][0]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[1] + D_NR), ymm15); //store(B11[4-7][0]) } } } if(i<0) i += D_NR; if((m & 4)) ///implementation for remainder rows(when m_remainder is a multiple of 4) { for(j = (n-D_NR); (j+1) > 0; j -=D_NR) //loop along n direction { a01 = L + (j+D_NR)*cs_a + (j); //pointer to block of A to be used for GEMM a11 = L + j*cs_a + j; //pointer to block of A to be used for TRSM b10 = B + i + (j+D_NR)*cs_b; //pointer to block of B to be used for GEMM b11 = B + i + j*cs_b; //pointer to block of B to be used for TRSM k_iter = (n-j-D_NR) / D_NR; //number of times GEMM operations to be performed(in blocks of 4x4) ymm15 = _mm256_broadcast_sd((double const *)&AlphaVal); //register to store alpha ///GEMM for previous blocks /// ///load 4x4 block of b11 ymm0 = _mm256_loadu_pd((double const *)b11); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] ymm1 = _mm256_loadu_pd((double const *)(b11 + cs_b)); //B11[0][1] B11[1][1] B11[2][1] B11[3][1] ymm2 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[0])); //B11[0][2] B11[1][2] B11[2][2] B11[3][2] ymm3 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[1])); //B11[0][3] B11[1][3] B11[2][3] B11[3][3] //multiply by alpha ymm0 = _mm256_mul_pd(ymm0, ymm15); //B11[x][0] *= alpha ymm1 = _mm256_mul_pd(ymm1, ymm15); //B11[x][1] *=alpha ymm2 = _mm256_mul_pd(ymm2, ymm15); //B11[x][2] *= alpha ymm3 = _mm256_mul_pd(ymm3, ymm15); //B11[x][3] *= alpha ymm4 = _mm256_setzero_pd(); ymm5 = _mm256_setzero_pd(); ymm6 = _mm256_setzero_pd(); ymm7 = _mm256_setzero_pd(); ///GEMM implementation starts/// for(k = 0; k < k_iter; k++) //loop for number of GEMM operations { ptr_a01_dup = a01; //load 4x4 bblock of b10 ymm8 = _mm256_loadu_pd((double const *)b10); //B10[0][0] B10[1][0] B10[2][0] B10[3][0] ymm9 = _mm256_loadu_pd((double const *)(b10 + cs_b)); //B10[0][1] B10[1][1] B10[2][1] B10[3][1] ymm10 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0])); //B10[0][2] B10[1][2] B10[2][2] B10[3][2] ymm11 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[1])); //B10[0][3] B10[1][3] B10[2][3] B10[3][3] //broadcast 1st row of A01 ymm12 = _mm256_broadcast_sd((double const *)(a01 + 0)); //A01[0][0] ymm13 = _mm256_broadcast_sd((double const *)(a01 + 1)); //A01[0][1] ymm14 = _mm256_broadcast_sd((double const *)(a01 + 2)); //A01[0][2] ymm15 = _mm256_broadcast_sd((double const *)(a01 + 3)); //A01[0][3] a01 += cs_a; //move to next row of A ymm4 = _mm256_fmadd_pd(ymm12, ymm8, ymm4); //ymm4 += (B10[0][0]*A01[0][0] B10[1][0]*A01[0][0] B10[2][0]*A01[0][0] B10[3][0]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm13, ymm8, ymm5); //ymm5 += (B10[0][0]*A01[0][1] B10[1][0]*A01[0][1] B10[2][0]*A01[0][1] B10[3][0]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm14, ymm8, ymm6); //ymm6 += (B10[0][0]*A01[0][2] B10[1][0]*A01[0][2] B10[2][0]*A01[0][2] B10[3][0]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm15, ymm8, ymm7); //ymm7 += (B10[0][0]*A01[0][3] B10[1][0]*A01[0][3] B10[2][0]*A01[0][3] B10[3][0]*A01[0][3]) //broadcast 2nd row of A01 ymm12 = _mm256_broadcast_sd((double const *)(a01 + 0)); //A01[1][0] ymm13 = _mm256_broadcast_sd((double const *)(a01 + 1)); //A01[1][1] ymm14 = _mm256_broadcast_sd((double const *)(a01 + 2)); //A01[1][2] ymm15 = _mm256_broadcast_sd((double const *)(a01 + 3)); //A01[1][3] a01 += cs_a; //move to next row of A ymm4 = _mm256_fmadd_pd(ymm12, ymm9, ymm4); //ymm4 += (B10[0][1]*A01[1][0] B10[1][1]*A01[1][0] B10[2][1]*A01[1][0] B10[3][1]*A01[1][0]) ymm5 = _mm256_fmadd_pd(ymm13, ymm9, ymm5); //ymm5 += (B10[0][1]*A01[1][1] B10[1][1]*A01[1][1] B10[2][1]*A01[1][1] B10[3][1]*A01[1][1]) ymm6 = _mm256_fmadd_pd(ymm14, ymm9, ymm6); //ymm6 += (B10[0][1]*A01[1][2] B10[1][1]*A01[1][2] B10[2][1]*A01[1][2] B10[3][1]*A01[1][2]) ymm7 = _mm256_fmadd_pd(ymm15, ymm9, ymm7); //ymm7 += (B10[0][1]*A01[1][3] B10[1][1]*A01[1][3] B10[2][1]*A01[1][3] B10[3][1]*A01[1][3]) //braodcast 3rd row of A01 ymm12 = _mm256_broadcast_sd((double const *)(a01 + 0)); //A01[2][0] ymm13 = _mm256_broadcast_sd((double const *)(a01 + 1)); //A01[2][1] ymm14 = _mm256_broadcast_sd((double const *)(a01 + 2)); //A01[2][2] ymm15 = _mm256_broadcast_sd((double const *)(a01 + 3)); //A01[2][3] a01 += cs_a; //move to next row of A ymm4 = _mm256_fmadd_pd(ymm12, ymm10, ymm4); //ymm4 += (B10[0][2]*A01[2][0] B10[1][2]*A01[2][0] B10[2][2]*A01[2][0] B10[3][2]*A01[2][0]) ymm5 = _mm256_fmadd_pd(ymm13, ymm10, ymm5); //ymm5 += (B10[0][2]*A01[2][1] B10[1][2]*A01[2][1] B10[2][2]*A01[2][1] B10[3][2]*A01[2][1]) ymm6 = _mm256_fmadd_pd(ymm14, ymm10, ymm6); //ymm6 += (B10[0][2]*A01[2][2] B10[1][2]*A01[2][2] B10[2][2]*A01[2][2] B10[3][2]*A01[2][2]) ymm7 = _mm256_fmadd_pd(ymm15, ymm10, ymm7); //ymm7 += (B10[0][2]*A01[2][3] B10[1][2]*A01[2][3] B10[2][2]*A01[2][3] B10[3][2]*A01[2][3]) //broadcast 4th row of A01 ymm12 = _mm256_broadcast_sd((double const *)(a01 + 0)); //A01[3][0] ymm13 = _mm256_broadcast_sd((double const *)(a01 + 1)); //A01[3][1] ymm14 = _mm256_broadcast_sd((double const *)(a01 + 2)); //A01[3][2] ymm15 = _mm256_broadcast_sd((double const *)(a01 + 3)); //A01[3][3] a01 += cs_a; //move to next row of A ymm4 = _mm256_fmadd_pd(ymm12, ymm11, ymm4); //ymm4 += (B10[0][3]*A01[3][0] B10[1][3]*A01[3][0] B10[2][3]*A01[3][0] B10[3][3]*A01[3][0]) ymm5 = _mm256_fmadd_pd(ymm13, ymm11, ymm5); //ymm5 += (B10[0][3]*A01[3][1] B10[1][3]*A01[3][1] B10[2][3]*A01[3][1] B10[3][3]*A01[3][1]) ymm6 = _mm256_fmadd_pd(ymm14, ymm11, ymm6); //ymm6 += (B10[0][3]*A01[3][2] B10[1][3]*A01[3][2] B10[2][3]*A01[3][2] B10[3][3]*A01[3][2]) ymm7 = _mm256_fmadd_pd(ymm15, ymm11, ymm7); //ymm7 += (B10[0][3]*A01[3][3] B10[1][3]*A01[3][3] B10[2][3]*A01[3][3] B10[3][3]*A01[3][3]) b10 += D_NR * cs_b; //pointer math to find next block of B for GEMM a01 = ptr_a01_dup + D_NR*cs_a; //pointer math to find next block of A for GEMM } ///GEMM code end/// ymm0 = _mm256_sub_pd(ymm0, ymm4); //B11[x][0] -=ymm4 ymm1 = _mm256_sub_pd(ymm1, ymm5); //B11[x][1] -= ymm5 ymm2 = _mm256_sub_pd(ymm2, ymm6); //B11[x][2] -= ymm6 ymm3 = _mm256_sub_pd(ymm3, ymm7); //B11[x][3] -= ymm7 ///implement TRSM/// ///read 4x4 block of A11/// //1st col ymm4 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][0] a11 += cs_a; //2nd col ymm5 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][1] ymm8 = _mm256_broadcast_sd((double const *)(a11+1)); //A11[0][1] a11 += cs_a; //3rd col ymm6 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][1] ymm9 = _mm256_broadcast_sd((double const *)(a11+1)); //A11[0][1] ymm11 = _mm256_broadcast_sd((double const *)(a11+2)); //A11[0][1] a11 += cs_a; //4th col ymm7 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][1] ymm10 = _mm256_broadcast_sd((double const *)(a11+1)); //A11[0][1] ymm12 = _mm256_broadcast_sd((double const *)(a11+2)); //A11[0][1] ymm13 = _mm256_broadcast_sd((double const *)(a11+3)); //A11[0][1] ymm14 = _mm256_broadcast_sd((double const *)&ones); //compute reciprocals of A(i,i) and broadcast in registers ymm4 = _mm256_unpacklo_pd(ymm4, ymm8); //A11[0][0] A11[1][1] A11[0][0] A11[1][1] ymm8 = _mm256_unpacklo_pd(ymm11, ymm13); //A11[2][2] A11[3][3] A11[2][2] A11[3][3] ymm15 = _mm256_blend_pd(ymm4, ymm8, 0x0C); //A11[0][0] A11[1][1] A11[2][2] A11[3][3] ymm14 = _mm256_div_pd(ymm14, ymm15); // 1/A11[0][0] 1/A11[1][1] 1/A11[2][2] 1/A11[3][3] //extract a33 ymm15 = _mm256_permute_pd(ymm14, 0x0C); //(1/A11[0][0] 1/A11[0][0] 1/A11[3][3] 1/A11[3][3]) ymm15 = _mm256_permute2f128_pd(ymm15, ymm15, 0x11); //(1/A11[3][3] 1/A11[3][3] 1/A11[3][3] 1/A11[3][3]) ymm3 = _mm256_mul_pd(ymm3, ymm15); //extract a22 ymm15 = _mm256_permute_pd(ymm14, 0x00); //(1/A11[0][0] 1/A11[0][0] 1/A11[2][2] 1/A11[2][2]) ymm15 = _mm256_permute2f128_pd(ymm15, ymm15, 0x11); //(1/A11[2][2] 1/A11[2][2] 1/A11[2][2] 1/A11[2][2]) //(Row 3): FMA operations ymm2 = _mm256_fnmadd_pd(ymm3, ymm12, ymm2); ymm1 = _mm256_fnmadd_pd(ymm3, ymm10, ymm1); ymm0 = _mm256_fnmadd_pd(ymm3, ymm7, ymm0); ymm2 = _mm256_mul_pd(ymm2, ymm15); //extract a11 ymm15 = _mm256_permute_pd(ymm14, 0x03); //(1/A11[1][1] 1/A11[1][1] 1/A11[2][2] 1/A11[2][2]) ymm15 = _mm256_permute2f128_pd(ymm15, ymm15, 0x00); //(1/A11[1][1] 1/A11[1][1] 1/A11[1][1] 1/A11[1][1]) //(ROW 2): FMA operations ymm1 = _mm256_fnmadd_pd(ymm2, ymm9, ymm1); ymm0 = _mm256_fnmadd_pd(ymm2, ymm6, ymm0); ymm1 = _mm256_mul_pd(ymm1, ymm15); //extract A00 ymm15 = _mm256_permute_pd(ymm14, 0x00); //(1/A11[0][0] 1/A11[0][0] 1/A11[2][2] 1/A11[2][2]) ymm15 = _mm256_permute2f128_pd(ymm15, ymm15, 0x00); //(1/A11[0][0] 1/A11[0][0] 1/A11[0][0] 1/A11[0][0]) //(Row 1):FMA operations ymm0 = _mm256_fnmadd_pd(ymm1, ymm5, ymm0); ymm0 = _mm256_mul_pd(ymm0, ymm15); _mm256_storeu_pd((double *)b11, ymm0); //store(B11[x][0]) _mm256_storeu_pd((double *)(b11 + cs_b), ymm1); //store(B11[x][1]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0]), ymm2); //(store(B11[x][2])) _mm256_storeu_pd((double *)(b11 + cs_b_offset[1]), ymm3); //store(B11[x][3]) } if(n_remainder) //implementation for remainder columns(when n is not a multiple of D_NR) { a01 = L + (j+D_NR)*cs_a + (j); //pointer to block of A to be used for GEMM a11 = L + j*cs_a + j; //pointer to block of A to be used for TRSM b10 = B + i + (j+D_NR)*cs_b; //pointer to block of B to be used for GEMM b11 = B + i + j*cs_b; //pointer to block of B to be used for TRSM k_iter = (n-j-D_NR) / D_NR; //number of times GEMM operations to be performed(in blocks of 4x4) ymm15 = _mm256_broadcast_sd((double const *)&AlphaVal); //register to store alpha ///GEMM for previous blocks /// ///load 4x4 block of b11 if(n_remainder == 3) { ymm0 = _mm256_broadcast_sd((double const *)&ones); //B11[0][3] B11[1][3] B11[2][3] B11[3][3] ymm1 = _mm256_loadu_pd((double const *)b11+ cs_b); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] ymm2 = _mm256_loadu_pd((double const *)(b11 + cs_b * 2)); //B11[0][1] B11[1][1] B11[2][1] B11[3][1] ymm3 = _mm256_loadu_pd((double const *)(b11 + cs_b * 3)); //B11[0][2] B11[1][2] B11[2][2] B11[3][2] } if(n_remainder == 2) { ymm0 = _mm256_broadcast_sd((double const *)&ones); //B11[0][2] B11[1][2] B11[2][2] B11[3][2] ymm1 = _mm256_broadcast_sd((double const *)&ones); //B11[0][3] B11[1][3] B11[2][3] B11[3][3] ymm2 = _mm256_loadu_pd((double const *)(b11 + cs_b * 2)); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] ymm3 = _mm256_loadu_pd((double const *)(b11 + cs_b * 3)); //B11[0][1] B11[1][1] B11[2][1] B11[3][1] } if(n_remainder == 1) { ymm0 = _mm256_broadcast_sd((double const *)&ones); //B11[0][1] B11[1][1] B11[2][1] B11[3][1] ymm1 = _mm256_broadcast_sd((double const *)&ones); //B11[0][2] B11[1][2] B11[2][2] B11[3][2] ymm2 = _mm256_broadcast_sd((double const *)&ones); //B11[0][3] B11[1][3] B11[2][3] B11[3][3] ymm3 = _mm256_loadu_pd((double const *)(b11 + cs_b * 3)); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] } //multiply by alpha ymm0 = _mm256_mul_pd(ymm0, ymm15); //B11[x][0] *= alpha ymm1 = _mm256_mul_pd(ymm1, ymm15); //B11[x][1] *=alpha ymm2 = _mm256_mul_pd(ymm2, ymm15); //B11[x][2] *= alpha ymm3 = _mm256_mul_pd(ymm3, ymm15); //B11[x][3] *= alpha ymm4 = _mm256_setzero_pd(); ymm5 = _mm256_setzero_pd(); ymm6 = _mm256_setzero_pd(); ymm7 = _mm256_setzero_pd(); ///GEMM implementation starts/// for(k = 0; k < k_iter; k++) //loop for number of GEMM operations { ptr_a01_dup = a01; //load 4x4 bblock of b10 ymm8 = _mm256_loadu_pd((double const *)b10); //B10[0][0] B10[1][0] B10[2][0] B10[3][0] ymm9 = _mm256_loadu_pd((double const *)(b10 + cs_b)); //B10[0][1] B10[1][1] B10[2][1] B10[3][1] ymm10 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0])); //B10[0][2] B10[1][2] B10[2][2] B10[3][2] ymm11 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[1])); //B10[0][3] B10[1][3] B10[2][3] B10[3][3] //broadcast 1st row of A01 ymm12 = _mm256_broadcast_sd((double const *)(a01 + 0)); //A01[0][0] ymm13 = _mm256_broadcast_sd((double const *)(a01 + 1)); //A01[0][1] ymm14 = _mm256_broadcast_sd((double const *)(a01 + 2)); //A01[0][2] ymm15 = _mm256_broadcast_sd((double const *)(a01 + 3)); //A01[0][3] a01 += cs_a; //move to next row of A ymm4 = _mm256_fmadd_pd(ymm12, ymm8, ymm4); //ymm4 += (B10[0][0]*A01[0][0] B10[1][0]*A01[0][0] B10[2][0]*A01[0][0] B10[3][0]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm13, ymm8, ymm5); //ymm5 += (B10[0][0]*A01[0][1] B10[1][0]*A01[0][1] B10[2][0]*A01[0][1] B10[3][0]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm14, ymm8, ymm6); //ymm6 += (B10[0][0]*A01[0][2] B10[1][0]*A01[0][2] B10[2][0]*A01[0][2] B10[3][0]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm15, ymm8, ymm7); //ymm7 += (B10[0][0]*A01[0][3] B10[1][0]*A01[0][3] B10[2][0]*A01[0][3] B10[3][0]*A01[0][3]) //broadcast 2nd row of A01 ymm12 = _mm256_broadcast_sd((double const *)(a01 + 0)); //A01[1][0] ymm13 = _mm256_broadcast_sd((double const *)(a01 + 1)); //A01[1][1] ymm14 = _mm256_broadcast_sd((double const *)(a01 + 2)); //A01[1][2] ymm15 = _mm256_broadcast_sd((double const *)(a01 + 3)); //A01[1][3] a01 += cs_a; //move to next row of A ymm4 = _mm256_fmadd_pd(ymm12, ymm9, ymm4); //ymm4 += (B10[0][1]*A01[1][0] B10[1][1]*A01[1][0] B10[2][1]*A01[1][0] B10[3][1]*A01[1][0]) ymm5 = _mm256_fmadd_pd(ymm13, ymm9, ymm5); //ymm5 += (B10[0][1]*A01[1][1] B10[1][1]*A01[1][1] B10[2][1]*A01[1][1] B10[3][1]*A01[1][1]) ymm6 = _mm256_fmadd_pd(ymm14, ymm9, ymm6); //ymm6 += (B10[0][1]*A01[1][2] B10[1][1]*A01[1][2] B10[2][1]*A01[1][2] B10[3][1]*A01[1][2]) ymm7 = _mm256_fmadd_pd(ymm15, ymm9, ymm7); //ymm7 += (B10[0][1]*A01[1][3] B10[1][1]*A01[1][3] B10[2][1]*A01[1][3] B10[3][1]*A01[1][3]) //braodcast 3rd row of A01 ymm12 = _mm256_broadcast_sd((double const *)(a01 + 0)); //A01[2][0] ymm13 = _mm256_broadcast_sd((double const *)(a01 + 1)); //A01[2][1] ymm14 = _mm256_broadcast_sd((double const *)(a01 + 2)); //A01[2][2] ymm15 = _mm256_broadcast_sd((double const *)(a01 + 3)); //A01[2][3] a01 += cs_a; //move to next row of A ymm4 = _mm256_fmadd_pd(ymm12, ymm10, ymm4); //ymm4 += (B10[0][2]*A01[2][0] B10[1][2]*A01[2][0] B10[2][2]*A01[2][0] B10[3][2]*A01[2][0]) ymm5 = _mm256_fmadd_pd(ymm13, ymm10, ymm5); //ymm5 += (B10[0][2]*A01[2][1] B10[1][2]*A01[2][1] B10[2][2]*A01[2][1] B10[3][2]*A01[2][1]) ymm6 = _mm256_fmadd_pd(ymm14, ymm10, ymm6); //ymm6 += (B10[0][2]*A01[2][2] B10[1][2]*A01[2][2] B10[2][2]*A01[2][2] B10[3][2]*A01[2][2]) ymm7 = _mm256_fmadd_pd(ymm15, ymm10, ymm7); //ymm7 += (B10[0][2]*A01[2][3] B10[1][2]*A01[2][3] B10[2][2]*A01[2][3] B10[3][2]*A01[2][3]) //broadcast 4th row of A01 ymm12 = _mm256_broadcast_sd((double const *)(a01 + 0)); //A01[3][0] ymm13 = _mm256_broadcast_sd((double const *)(a01 + 1)); //A01[3][1] ymm14 = _mm256_broadcast_sd((double const *)(a01 + 2)); //A01[3][2] ymm15 = _mm256_broadcast_sd((double const *)(a01 + 3)); //A01[3][3] a01 += cs_a; //move to next row of A ymm4 = _mm256_fmadd_pd(ymm12, ymm11, ymm4); //ymm4 += (B10[0][3]*A01[3][0] B10[1][3]*A01[3][0] B10[2][3]*A01[3][0] B10[3][3]*A01[3][0]) ymm5 = _mm256_fmadd_pd(ymm13, ymm11, ymm5); //ymm5 += (B10[0][3]*A01[3][1] B10[1][3]*A01[3][1] B10[2][3]*A01[3][1] B10[3][3]*A01[3][1]) ymm6 = _mm256_fmadd_pd(ymm14, ymm11, ymm6); //ymm6 += (B10[0][3]*A01[3][2] B10[1][3]*A01[3][2] B10[2][3]*A01[3][2] B10[3][3]*A01[3][2]) ymm7 = _mm256_fmadd_pd(ymm15, ymm11, ymm7); //ymm7 += (B10[0][3]*A01[3][3] B10[1][3]*A01[3][3] B10[2][3]*A01[3][3] B10[3][3]*A01[3][3]) b10 += D_NR * cs_b; //pointer math to find next block of B for GEMM a01 = ptr_a01_dup + (D_NR * cs_a); //pointer math to find next block of A for GEMM } ///GEMM code end/// ymm0 = _mm256_sub_pd(ymm0, ymm4); //B11[x][0] -=ymm4 ymm1 = _mm256_sub_pd(ymm1, ymm5); //B11[x][1] -= ymm5 ymm2 = _mm256_sub_pd(ymm2, ymm6); //B11[x][2] -= ymm6 ymm3 = _mm256_sub_pd(ymm3, ymm7); //B11[x][3] -= ymm7 ///implement TRSM/// ///read 4x4 block of A11/// //1st col ymm4 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][0] a11 += cs_a; //2nd col ymm5 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][1] ymm8 = _mm256_broadcast_sd((double const *)(a11+1)); //A11[0][1] a11 += cs_a; //3rd col ymm6 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][1] ymm9 = _mm256_broadcast_sd((double const *)(a11+1)); //A11[0][1] ymm11 = _mm256_broadcast_sd((double const *)(a11+2)); //A11[0][1] a11 += cs_a; //4th col ymm7 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][1] ymm10 = _mm256_broadcast_sd((double const *)(a11+1)); //A11[0][1] ymm12 = _mm256_broadcast_sd((double const *)(a11+2)); //A11[0][1] ymm13 = _mm256_broadcast_sd((double const *)(a11+3)); //A11[0][1] ymm14 = _mm256_broadcast_sd((double const *)&ones); //compute reciprocals of A(i,i) and broadcast in registers ymm4 = _mm256_unpacklo_pd(ymm4, ymm8); //A11[0][0] A11[1][1] A11[0][0] A11[1][1] ymm8 = _mm256_unpacklo_pd(ymm11, ymm13); //A11[2][2] A11[3][3] A11[2][2] A11[3][3] ymm15 = _mm256_blend_pd(ymm4, ymm8, 0x0C); //A11[0][0] A11[1][1] A11[2][2] A11[3][3] ymm14 = _mm256_div_pd(ymm14, ymm15); // 1/A11[0][0] 1/A11[1][1] 1/A11[2][2] 1/A11[3][3] //extract a33 ymm15 = _mm256_permute_pd(ymm14, 0x0C); //(1/A11[0][0] 1/A11[0][0] 1/A11[3][3] 1/A11[3][3]) ymm15 = _mm256_permute2f128_pd(ymm15, ymm15, 0x11); //(1/A11[3][3] 1/A11[3][3] 1/A11[3][3] 1/A11[3][3]) ymm3 = _mm256_mul_pd(ymm3, ymm15); //extract a22 ymm15 = _mm256_permute_pd(ymm14, 0x00); //(1/A11[0][0] 1/A11[0][0] 1/A11[2][2] 1/A11[2][2]) ymm15 = _mm256_permute2f128_pd(ymm15, ymm15, 0x11); //(1/A11[2][2] 1/A11[2][2] 1/A11[2][2] 1/A11[2][2]) //(Row 3): FMA operations ymm2 = _mm256_fnmadd_pd(ymm3, ymm12, ymm2); ymm1 = _mm256_fnmadd_pd(ymm3, ymm10, ymm1); ymm0 = _mm256_fnmadd_pd(ymm3, ymm7, ymm0); ymm2 = _mm256_mul_pd(ymm2, ymm15); //extract a11 ymm15 = _mm256_permute_pd(ymm14, 0x03); //(1/A11[1][1] 1/A11[1][1] 1/A11[2][2] 1/A11[2][2]) ymm15 = _mm256_permute2f128_pd(ymm15, ymm15, 0x00); //(1/A11[1][1] 1/A11[1][1] 1/A11[1][1] 1/A11[1][1]) //(ROW 2): FMA operations ymm1 = _mm256_fnmadd_pd(ymm2, ymm9, ymm1); ymm0 = _mm256_fnmadd_pd(ymm2, ymm6, ymm0); ymm1 = _mm256_mul_pd(ymm1, ymm15); //extract A00 ymm15 = _mm256_permute_pd(ymm14, 0x00); //(1/A11[0][0] 1/A11[0][0] 1/A11[2][2] 1/A11[2][2]) ymm15 = _mm256_permute2f128_pd(ymm15, ymm15, 0x00); //(1/A11[0][0] 1/A11[0][0] 1/A11[0][0] 1/A11[0][0]) //(Row 1):FMA operations ymm0 = _mm256_fnmadd_pd(ymm1, ymm5, ymm0); ymm0 = _mm256_mul_pd(ymm0, ymm15); if(n_remainder == 3) { _mm256_storeu_pd((double *)(b11 + cs_b), ymm1); //store(B11[x][1]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0]), ymm2); //(store(B11[x][2])) _mm256_storeu_pd((double *)(b11 + cs_b*3), ymm3); //store(B11[x][0]) } if(n_remainder == 2) { _mm256_storeu_pd((double *)(b11+ cs_b * 2), ymm2); //store(B11[x][0]) _mm256_storeu_pd((double *)(b11 + cs_b * 3), ymm3); //store(B11[x][1]) } if(n_remainder == 1) { _mm256_storeu_pd((double *)(b11 + cs_b * 3), ymm3); //store(B11[x][0]) } } m_remainder -= 4; i -= 4; } if(m_remainder) ///implementation for remainder rows { dtrsm_small_XAutB(L, B, AlphaVal, m_remainder, n, cs_a, cs_b); } return BLIS_SUCCESS; } /*implements TRSM for the case XA = alpha * B *A is lower triangular, unit-diagonal, no transpose *dimensions: X:mxn A:nxn B: mxn */ /* <---b11 <---a11 ***************** * *b01*b11* * * * * ^ * * * * * ^ * * | ***************** | ******* | * * * * * | * * * | * * * * * a01* * * b10 ***************** ************* * * * * * * * * * * * * * * * * * * ***************** ******************* */ static err_t bli_dtrsm_small_XAutB_unitDiag( side_t side, obj_t* AlphaObj, obj_t* a, obj_t* b, cntx_t* cntx, cntl_t* cntl ) { dim_t D_MR = 8; //block dimension along the rows dim_t D_NR = 4; //block dimension along the columns dim_t m = bli_obj_length(b); //number of rows dim_t n = bli_obj_width(b); //number of columns dim_t m_remainder = m % D_MR; //number of corner rows dim_t n_remainder = n % D_NR; //number of corner columns dim_t cs_a = bli_obj_col_stride(a); //column stride of matrix A dim_t cs_b = bli_obj_col_stride(b); //column stride of matrix B #ifdef BLIS_ENABLE_SMALL_MATRIX_ROME if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_ROME) { return BLIS_NOT_YET_IMPLEMENTED; } #else if(bli_max(m,n) > D_BLIS_SMALL_MATRIX_THRES_TRSM_NAPLES) { return BLIS_NOT_YET_IMPLEMENTED; } #endif dim_t i, j, k; //loop variablse dim_t k_iter; //determines the number of GEMM operations to be done dim_t cs_b_offset[2]; //pre-calculated strides double ones = 1.0; double AlphaVal = *(double *)AlphaObj->buffer; //value of Alpha double *L = a->buffer; //pointer to matrix A double *B = b->buffer; //pointer to matrix B double *a01, *a11, *b10, *b11; //pointers for GEMM and TRSM blocks double *ptr_a01_dup; cs_b_offset[0] = cs_b << 1; //cs_b_offset[0] = cs_b * 2; cs_b_offset[1] = cs_b_offset[0] + cs_b;//cs_b_offset[1] = cs_b * 3; //ymm scratch reginsters __m256d ymm0, ymm1, ymm2, ymm3; __m256d ymm4, ymm5, ymm6, ymm7; __m256d ymm8, ymm9, ymm10, ymm11; __m256d ymm12, ymm13, ymm14, ymm15; __m256d ymm16; for(i = (m-D_MR); (i+1) > 0; i -= D_MR) //loop along 'M' direction { for(j = (n-D_NR); (j+1) > 0; j -= D_NR) //loop along 'N' direction { a01 = L + (j+D_NR)*cs_a +(j); //pointer to block of A to be used in GEMM a11 = L + j*cs_a + j; //pointer to block of A to be used for TRSM b10 = B + i + (j+D_NR)*cs_b; //pointer to block of B to be used in GEMM b11 = B + (i) + (j)*cs_b; //pointer to block of B to be used for TRSM k_iter = (n-j-D_NR) / D_NR; //number of GEMM operations to be done(in blocks of 4x4) ymm0 = _mm256_setzero_pd(); ymm1 = _mm256_setzero_pd(); ymm2 = _mm256_setzero_pd(); ymm3 = _mm256_setzero_pd(); ymm4 = _mm256_setzero_pd(); ymm5 = _mm256_setzero_pd(); ymm6 = _mm256_setzero_pd(); ymm7 = _mm256_setzero_pd(); ///GEMM implementation starts/// for(k = 0; k < k_iter; k++) //loop for number of GEMM operations { ptr_a01_dup = a01; //broadcast 1st row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + 0)); //A01[0][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + 1)); //A01[0][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + 2)); //A01[0][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + 3)); //A01[0][3] a01 += cs_a; //move to next row //load 8x2 block of B10 ymm12 = _mm256_loadu_pd((double const *)b10); //B10[0][0] B10[1][0] B10[2][0] B10[3][0] ymm13 = _mm256_loadu_pd((double const *)(b10 + D_NR)); //B10[4][0] B10[5][0] B10[6][0] B10[7][0] ymm14 = _mm256_loadu_pd((double const *)(b10 + cs_b)); //B10[0][1] B10[1][1] B10[2][1] B10[3][1] ymm15 = _mm256_loadu_pd((double const *)(b10 + cs_b + D_NR)); //B10[4][1] B10[5][1] B10[6][1] B10[7][1] ymm0 = _mm256_fmadd_pd(ymm8, ymm12, ymm0); //ymm0 += (B10[0][0]*A01[0][0] B10[1][0]*A01[0][0] B10[2][0]*A01[0][0] B10[3][0]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm12, ymm1); //ymm1 += (B10[0][0]*A01[0][1] B10[1][0]*A01[0][1] B10[2][0]*A01[0][1] B10[3][0]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm12, ymm2); //ymm2 += (B10[0][0]*A01[0][2] B10[1][0]*A01[0][2] B10[2][0]*A01[0][2] B10[3][0]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm12, ymm3); //ymm3 += (B10[0][0]*A01[0][3] B10[1][0]*A01[0][3] B10[2][0]*A01[0][3] B10[3][0]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm13, ymm4); //ymm4 += (B10[4][0]*A01[0][0] B10[5][0]*A01[0][0] B10[6][0]*A01[0][0] B10[7][0]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm13, ymm5); //ymm5 += (B10[4][0]*A01[0][1] B10[5][0]*A01[0][1] B10[6][0]*A01[0][1] B10[7][0]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm13, ymm6); //ymm6 += (B10[4][0]*A01[0][2] B10[5][0]*A01[0][2] B10[6][0]*A01[0][2] B10[7][0]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm13, ymm7); //ymm7 += (B10[4][0]*A01[0][3] B10[5][0]*A01[0][3] B10[6][0]*A01[0][3] B10[7][0]*A01[0][3]) //broadcast 2nd row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + 0)); //A01[1][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + 1)); //A01[1][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + 2)); //A01[1][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + 3)); //A01[1][3] a01 += cs_a; //move to next row of A ymm0 = _mm256_fmadd_pd(ymm8, ymm14, ymm0); //ymm0 += (B10[0][1]*A01[0][0] B10[1][1]*A01[0][0] B10[2][1]*A01[0][0] B10[3][1]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm14, ymm1); //ymm1 += (B10[0][1]*A01[0][1] B10[1][1]*A01[0][1] B10[2][1]*A01[0][1] B10[3][1]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm14, ymm2); //ymm2 += (B10[0][1]*A01[0][2] B10[1][1]*A01[0][2] B10[2][1]*A01[0][2] B10[3][1]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm14, ymm3); //ymm3 += (B10[0][1]*A01[0][3] B10[1][1]*A01[0][3] B10[2][1]*A01[0][3] B10[3][1]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm15, ymm4); //ymm4 += (B10[4][1]*A01[0][0] B10[5][1]*A01[0][0] B10[6][1]*A01[0][0] B10[7][1]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm15, ymm5); //ymm5 += (B10[4][1]*A01[0][1] B10[5][1]*A01[0][1] B10[6][1]*A01[0][1] B10[7][1]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm15, ymm6); //ymm6 += (B10[4][1]*A01[0][2] B10[5][1]*A01[0][2] B10[6][1]*A01[0][2] B10[7][1]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm15, ymm7); //ymm7 += (B10[4][1]*A01[0][3] B10[5][1]*A01[0][3] B10[6][1]*A01[0][3] B10[7][1]*A01[0][3]) //broadcast 3rd row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + 0)); //A01[2][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + 1)); //A01[2][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + 2)); //A01[2][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + 3)); //A01[2][3] a01 += cs_a; //move to next row of A01 //load next 8x2 block of B10 ymm12 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0])); //(B10[0][2] B10[1][2] B10[2][2] B10[3][2]) ymm13 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0] + D_NR)); //(B10[4][2] B10[5][2] B10[6][2] B10[7][2]) ymm14 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0] + cs_b)); //(B10[0][3] B10[1][3] B10[2][3] B10[3][3]) ymm15 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0] + cs_b + D_NR)); //(B10[4][3] B10[5][3] B10[6][3] B10[7][3]) ymm0 = _mm256_fmadd_pd(ymm8, ymm12, ymm0); //ymm0 += (B10[0][2]*A01[0][0] B10[1][2]*A01[0][0] B10[2][2]*A01[0][0] B10[3][2]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm12, ymm1); //ymm1 += (B10[0][2]*A01[0][1] B10[1][2]*A01[0][1] B10[2][2]*A01[0][1] B10[3][2]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm12, ymm2); //ymm2 += (B10[0][2]*A01[0][2] B10[1][2]*A01[0][2] B10[2][2]*A01[0][2] B10[3][2]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm12, ymm3); //ymm3 += (B10[0][2]*A01[0][3] B10[1][2]*A01[0][3] B10[2][2]*A01[0][3] B10[3][2]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm13, ymm4); //ymm4 += (B10[4][2]*A01[0][0] B10[5][2]*A01[0][0] B10[6][2]*A01[0][0] B10[7][2]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm13, ymm5); //ymm5 += (B10[4][2]*A01[0][1] B10[5][2]*A01[0][1] B10[6][2]*A01[0][1] B10[7][2]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm13, ymm6); //ymm6 += (B10[4][2]*A01[0][2] B10[5][2]*A01[0][2] B10[6][2]*A01[0][2] B10[7][2]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm13, ymm7); //ymm7 += (B10[4][2]*A01[0][3] B10[5][2]*A01[0][3] B10[6][2]*A01[0][3] B10[7][2]*A01[0][3]) //broadcast 4th row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + 0)); //A01[3][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + 1)); //A01[3][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + 2)); //A01[3][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + 3)); //A01[3][3] a01 += cs_a; //move to next row of A01 ymm0 = _mm256_fmadd_pd(ymm8, ymm14, ymm0); //ymm0 += (B10[0][3]*A01[0][0] B10[1][3]*A01[0][0] B10[2][3]*A01[0][0] B10[3][3]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm14, ymm1); //ymm1 += (B10[0][3]*A01[0][1] B10[1][3]*A01[0][1] B10[2][3]*A01[0][1] B10[3][3]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm14, ymm2); //ymm2 += (B10[0][3]*A01[0][2] B10[1][3]*A01[0][2] B10[2][3]*A01[0][2] B10[3][3]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm14, ymm3); //ymm3 += (B10[0][3]*A01[0][3] B10[1][3]*A01[0][3] B10[2][3]*A01[0][3] B10[3][3]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm15, ymm4); //ymm4 += (B10[4][3]*A01[0][0] B10[5][3]*A01[0][0] B10[6][3]*A01[0][0] B10[7][3]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm15, ymm5); //ymm5 += (B10[4][3]*A01[0][1] B10[5][3]*A01[0][1] B10[6][3]*A01[0][1] B10[7][3]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm15, ymm6); //ymm6 += (B10[4][3]*A01[0][2] B10[5][3]*A01[0][2] B10[6][3]*A01[0][2] B10[7][3]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm15, ymm7); //ymm7 += (B10[4][3]*A01[0][3] B10[5][3]*A01[0][3] B10[6][3]*A01[0][3] B10[7][3]*A01[0][3]) b10 += D_NR * cs_b; //pointer math to find next block of B for GEMM a01 = ptr_a01_dup + (D_NR * cs_a); //pointer math to find next block of A for GEMM } ///GEMM code ends/// ymm16 = _mm256_broadcast_sd((double const *)&AlphaVal); //load 8x4 block of B11 ymm8 = _mm256_loadu_pd((double const *)b11); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] ymm12 = _mm256_loadu_pd((double const *)(b11 + D_NR)); //B11[4][0] B11[5][0] B11[6][0] B11[7][0] ymm9 = _mm256_loadu_pd((double const *)(b11 + cs_b)); //B11[0][1] B11[1][1] B11[2][1] B11[3][1] ymm13 = _mm256_loadu_pd((double const *)(b11 + cs_b + D_NR)); //B11[4][1] B11[5][1] B11[6][1] B11[7][1] ymm10 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[0])); //B11[0][2] B11[1][2] B11[2][2] B11[3][2] ymm14 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[0] + D_NR)); //B11[4][2] B11[5][2] B11[6][2] B11[7][2] ymm11 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[1])); //B11[0][3] B11[1][3] B11[2][3] B11[3][3] ymm15 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[1] + D_NR)); //B11[4][3] B11[5][3] B11[6][3] B11[7][3] ymm8 = _mm256_fmsub_pd(ymm8, ymm16, ymm0); //B11[0-3][0] * alpha -= ymm0 ymm9 = _mm256_fmsub_pd(ymm9, ymm16, ymm1); //B11[4-7][0] * alpha-= ymm1 ymm10 = _mm256_fmsub_pd(ymm10, ymm16, ymm2); //B11[0-3][1] * alpha-= ymm2 ymm11 = _mm256_fmsub_pd(ymm11, ymm16, ymm3); //B11[4-7][1] * alpha -= ymm3 ymm12 = _mm256_fmsub_pd(ymm12, ymm16, ymm4); //B11[0-3][2] * alpha -= ymm4 ymm13 = _mm256_fmsub_pd(ymm13, ymm16, ymm5); //B11[4-7][2] * alpha -= ymm5 ymm14 = _mm256_fmsub_pd(ymm14, ymm16, ymm6); //B11[0-3][3] * alpha -= ymm6 ymm15 = _mm256_fmsub_pd(ymm15, ymm16, ymm7); //B11[4-7][3] * alpha -= ymm7 ///implement TRSM/// ///read 4x4 block of A11/// //1st col ymm0 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][0] a11 += cs_a; //2nd col ymm1 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][1] ymm2 = _mm256_broadcast_sd((double const *)(a11+1)); //A11[0][1] a11 += cs_a; //3rd col ymm3 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][1] ymm4 = _mm256_broadcast_sd((double const *)(a11+1)); //A11[0][1] ymm5 = _mm256_broadcast_sd((double const *)(a11+2)); //A11[0][1] a11 += cs_a; //4th col ymm6 = _mm256_broadcast_sd((double const *)(a11+3)); //A11[0][1] ymm2 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][1] ymm5 = _mm256_broadcast_sd((double const *)(a11+1)); //A11[0][1] ymm6 = _mm256_broadcast_sd((double const *)(a11+2)); //A11[0][1] //(Row 3): FMA operations ymm10 = _mm256_fnmadd_pd(ymm11, ymm6, ymm10); ymm9 = _mm256_fnmadd_pd(ymm11, ymm5, ymm9); ymm8 = _mm256_fnmadd_pd(ymm11, ymm2, ymm8); //(Row 3): FMA operations ymm14 = _mm256_fnmadd_pd(ymm15, ymm6, ymm14); ymm13 = _mm256_fnmadd_pd(ymm15, ymm5, ymm13); ymm12 = _mm256_fnmadd_pd(ymm15, ymm2, ymm12); //(ROW 2): FMA operations ymm9 = _mm256_fnmadd_pd(ymm10, ymm4, ymm9); ymm8 = _mm256_fnmadd_pd(ymm10, ymm3, ymm8); ymm13 = _mm256_fnmadd_pd(ymm14, ymm4, ymm13); ymm12 = _mm256_fnmadd_pd(ymm14, ymm3, ymm12); //(Row 1):FMA operations ymm8 = _mm256_fnmadd_pd(ymm9, ymm1, ymm8); ymm12 = _mm256_fnmadd_pd(ymm13, ymm1, ymm12); _mm256_storeu_pd((double *)b11, ymm8); //store(B11[x][0]) _mm256_storeu_pd((double *)(b11 + D_NR), ymm12); //store(B11[x][0]) _mm256_storeu_pd((double *)(b11 + cs_b), ymm9); //store(B11[x][1]) _mm256_storeu_pd((double *)(b11 + cs_b + D_NR), ymm13); //store(B11[x][1]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0]), ymm10); //(store(B11[x][2])) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0] + D_NR), ymm14); //(store(B11[x][2])) _mm256_storeu_pd((double *)(b11 + cs_b_offset[1]), ymm11); //store(B11[x][3]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[1] + D_NR), ymm15); //store(B11[x][3]) } if(n_remainder) //implementation for remainder columns(when n is not multiple of D_NR) { a01 = L + (j+D_NR)*cs_a +(j); //pointer to block of A to be used in GEMM a11 = L + j*cs_a + j; //pointer to block of A to be used for TRSM b10 = B + i + (j+D_NR)*cs_b; //pointer to block of B to be used in GEMM b11 = B + (i) + (j)*cs_b; //pointer to block of B to be used for TRSM k_iter = (n-j-D_NR) / D_NR; //number of GEMM operations to be done(in blocks of 4x4) ymm0 = _mm256_setzero_pd(); ymm1 = _mm256_setzero_pd(); ymm2 = _mm256_setzero_pd(); ymm3 = _mm256_setzero_pd(); ymm4 = _mm256_setzero_pd(); ymm5 = _mm256_setzero_pd(); ymm6 = _mm256_setzero_pd(); ymm7 = _mm256_setzero_pd(); ///GEMM implementation starts/// for(k = 0; k < k_iter; k++) //loop for number of GEMM operations { ptr_a01_dup = a01; //broadcast 1st row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + 0)); //A01[0][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + 1)); //A01[0][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + 2)); //A01[0][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + 3)); //A01[0][3] a01 += cs_a; //move to next row //load 8x2 block of B10 ymm12 = _mm256_loadu_pd((double const *)b10); //B10[0][0] B10[1][0] B10[2][0] B10[3][0] ymm13 = _mm256_loadu_pd((double const *)(b10 + D_NR)); //B10[4][0] B10[5][0] B10[6][0] B10[7][0] ymm14 = _mm256_loadu_pd((double const *)(b10 + cs_b)); //B10[0][1] B10[1][1] B10[2][1] B10[3][1] ymm15 = _mm256_loadu_pd((double const *)(b10 + cs_b + D_NR)); //B10[4][1] B10[5][1] B10[6][1] B10[7][1] ymm0 = _mm256_fmadd_pd(ymm8, ymm12, ymm0); //ymm0 += (B10[0][0]*A01[0][0] B10[1][0]*A01[0][0] B10[2][0]*A01[0][0] B10[3][0]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm12, ymm1); //ymm1 += (B10[0][0]*A01[0][1] B10[1][0]*A01[0][1] B10[2][0]*A01[0][1] B10[3][0]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm12, ymm2); //ymm2 += (B10[0][0]*A01[0][2] B10[1][0]*A01[0][2] B10[2][0]*A01[0][2] B10[3][0]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm12, ymm3); //ymm3 += (B10[0][0]*A01[0][3] B10[1][0]*A01[0][3] B10[2][0]*A01[0][3] B10[3][0]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm13, ymm4); //ymm4 += (B10[4][0]*A01[0][0] B10[5][0]*A01[0][0] B10[6][0]*A01[0][0] B10[7][0]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm13, ymm5); //ymm5 += (B10[4][0]*A01[0][1] B10[5][0]*A01[0][1] B10[6][0]*A01[0][1] B10[7][0]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm13, ymm6); //ymm6 += (B10[4][0]*A01[0][2] B10[5][0]*A01[0][2] B10[6][0]*A01[0][2] B10[7][0]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm13, ymm7); //ymm7 += (B10[4][0]*A01[0][3] B10[5][0]*A01[0][3] B10[6][0]*A01[0][3] B10[7][0]*A01[0][3]) //broadcast 2nd row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + 0)); //A01[1][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + 1)); //A01[1][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + 2)); //A01[1][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + 3)); //A01[1][3] a01 += cs_a; //move to next row of A ymm0 = _mm256_fmadd_pd(ymm8, ymm14, ymm0); //ymm0 += (B10[0][1]*A01[0][0] B10[1][1]*A01[0][0] B10[2][1]*A01[0][0] B10[3][1]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm14, ymm1); //ymm1 += (B10[0][1]*A01[0][1] B10[1][1]*A01[0][1] B10[2][1]*A01[0][1] B10[3][1]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm14, ymm2); //ymm2 += (B10[0][1]*A01[0][2] B10[1][1]*A01[0][2] B10[2][1]*A01[0][2] B10[3][1]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm14, ymm3); //ymm3 += (B10[0][1]*A01[0][3] B10[1][1]*A01[0][3] B10[2][1]*A01[0][3] B10[3][1]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm15, ymm4); //ymm4 += (B10[4][1]*A01[0][0] B10[5][1]*A01[0][0] B10[6][1]*A01[0][0] B10[7][1]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm15, ymm5); //ymm5 += (B10[4][1]*A01[0][1] B10[5][1]*A01[0][1] B10[6][1]*A01[0][1] B10[7][1]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm15, ymm6); //ymm6 += (B10[4][1]*A01[0][2] B10[5][1]*A01[0][2] B10[6][1]*A01[0][2] B10[7][1]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm15, ymm7); //ymm7 += (B10[4][1]*A01[0][3] B10[5][1]*A01[0][3] B10[6][1]*A01[0][3] B10[7][1]*A01[0][3]) //broadcast 3rd row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + 0)); //A01[2][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + 1)); //A01[2][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + 2)); //A01[2][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + 3)); //A01[2][3] a01 += cs_a; //move to next row of A01 //load next 8x2 block of B10 ymm12 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0])); //(B10[0][2] B10[1][2] B10[2][2] B10[3][2]) ymm13 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0] + D_NR)); //(B10[4][2] B10[5][2] B10[6][2] B10[7][2]) ymm14 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0] + cs_b)); //(B10[0][3] B10[1][3] B10[2][3] B10[3][3]) ymm15 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0] + cs_b + D_NR)); //(B10[4][3] B10[5][3] B10[6][3] B10[7][3]) ymm0 = _mm256_fmadd_pd(ymm8, ymm12, ymm0); //ymm0 += (B10[0][2]*A01[0][0] B10[1][2]*A01[0][0] B10[2][2]*A01[0][0] B10[3][2]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm12, ymm1); //ymm1 += (B10[0][2]*A01[0][1] B10[1][2]*A01[0][1] B10[2][2]*A01[0][1] B10[3][2]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm12, ymm2); //ymm2 += (B10[0][2]*A01[0][2] B10[1][2]*A01[0][2] B10[2][2]*A01[0][2] B10[3][2]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm12, ymm3); //ymm3 += (B10[0][2]*A01[0][3] B10[1][2]*A01[0][3] B10[2][2]*A01[0][3] B10[3][2]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm13, ymm4); //ymm4 += (B10[4][2]*A01[0][0] B10[5][2]*A01[0][0] B10[6][2]*A01[0][0] B10[7][2]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm13, ymm5); //ymm5 += (B10[4][2]*A01[0][1] B10[5][2]*A01[0][1] B10[6][2]*A01[0][1] B10[7][2]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm13, ymm6); //ymm6 += (B10[4][2]*A01[0][2] B10[5][2]*A01[0][2] B10[6][2]*A01[0][2] B10[7][2]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm13, ymm7); //ymm7 += (B10[4][2]*A01[0][3] B10[5][2]*A01[0][3] B10[6][2]*A01[0][3] B10[7][2]*A01[0][3]) //broadcast 4th row of A01 ymm8 = _mm256_broadcast_sd((double const *)(a01 + 0)); //A01[3][0] ymm9 = _mm256_broadcast_sd((double const *)(a01 + 1)); //A01[3][1] ymm10 = _mm256_broadcast_sd((double const *)(a01 + 2)); //A01[3][2] ymm11 = _mm256_broadcast_sd((double const *)(a01 + 3)); //A01[3][3] a01 += cs_a; //move to next row of A01 ymm0 = _mm256_fmadd_pd(ymm8, ymm14, ymm0); //ymm0 += (B10[0][3]*A01[0][0] B10[1][3]*A01[0][0] B10[2][3]*A01[0][0] B10[3][3]*A01[0][0]) ymm1 = _mm256_fmadd_pd(ymm9, ymm14, ymm1); //ymm1 += (B10[0][3]*A01[0][1] B10[1][3]*A01[0][1] B10[2][3]*A01[0][1] B10[3][3]*A01[0][1]) ymm2 = _mm256_fmadd_pd(ymm10, ymm14, ymm2); //ymm2 += (B10[0][3]*A01[0][2] B10[1][3]*A01[0][2] B10[2][3]*A01[0][2] B10[3][3]*A01[0][2]) ymm3 = _mm256_fmadd_pd(ymm11, ymm14, ymm3); //ymm3 += (B10[0][3]*A01[0][3] B10[1][3]*A01[0][3] B10[2][3]*A01[0][3] B10[3][3]*A01[0][3]) ymm4 = _mm256_fmadd_pd(ymm8, ymm15, ymm4); //ymm4 += (B10[4][3]*A01[0][0] B10[5][3]*A01[0][0] B10[6][3]*A01[0][0] B10[7][3]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm9, ymm15, ymm5); //ymm5 += (B10[4][3]*A01[0][1] B10[5][3]*A01[0][1] B10[6][3]*A01[0][1] B10[7][3]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm10, ymm15, ymm6); //ymm6 += (B10[4][3]*A01[0][2] B10[5][3]*A01[0][2] B10[6][3]*A01[0][2] B10[7][3]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm11, ymm15, ymm7); //ymm7 += (B10[4][3]*A01[0][3] B10[5][3]*A01[0][3] B10[6][3]*A01[0][3] B10[7][3]*A01[0][3]) b10 += D_NR * cs_b; //pointer math to find next block of B for GEMM a01 = ptr_a01_dup + (D_NR * cs_a); //pointer math to find next block of A for GEMM } ///GEMM code ends/// ymm16 = _mm256_broadcast_sd((double const *)&AlphaVal); //load 8x4 block of B11 if(n_remainder == 3) { ymm8 = _mm256_broadcast_sd((double const *)&ones); //B11[0-3][3] ymm12 = _mm256_broadcast_sd((double const *)&ones); //B11[4-7][3] ymm9 = _mm256_loadu_pd((double const *)(b11+cs_b)); //B11[0-3][0] ymm13 = _mm256_loadu_pd((double const *)(b11 + cs_b + D_NR)); //B11[4-7][0] ymm10 = _mm256_loadu_pd((double const *)(b11 + cs_b*2)); //B11[0-3][1] ymm14 = _mm256_loadu_pd((double const *)(b11 + cs_b*2 + D_NR)); //B11[4-7][1] ymm11 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[1])); //B11[0-3][2] ymm15 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[1] + D_NR)); //B11[4-7][2] } if(n_remainder == 2) { ymm8 = _mm256_broadcast_sd((double const *)&ones); //B11[0-3][2] ymm12 = _mm256_broadcast_sd((double const *)&ones); //B11[4-7][2] ymm9 = _mm256_broadcast_sd((double const *)&ones); //B11[0-3][3] ymm13 = _mm256_broadcast_sd((double const *)&ones); //B11[4-7][3] ymm10 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[0])); //B11[0-3][0] ymm14 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[0] + D_NR)); //B11[4-7][0] ymm11 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[1])); //B11[0-3][1] ymm15 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[1] + D_NR)); //B11[4-7][1] } if(n_remainder == 1) { ymm8 = _mm256_broadcast_sd((double const *)&ones); //B11[0-3][1] ymm12 = _mm256_broadcast_sd((double const *)&ones); //B11[4-7][1] ymm9 = _mm256_broadcast_sd((double const *)&ones); //B11[0-3][2] ymm13 = _mm256_broadcast_sd((double const *)&ones); //B11[4-7][2] ymm10 = _mm256_broadcast_sd((double const *)&ones); //B11[0-3][3] ymm14 = _mm256_broadcast_sd((double const *)&ones); //B11[4-7][3] ymm11 = _mm256_loadu_pd((double const *)(b11+cs_b_offset[1])); //B11[0-3][0] ymm15 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[1] +D_NR)); //B11[4-7][0] } ymm8 = _mm256_fmsub_pd(ymm8, ymm16, ymm0); //B11[0-3][0] * alpha -= ymm0 ymm9 = _mm256_fmsub_pd(ymm9, ymm16, ymm1); //B11[4-7][0] * alpha-= ymm1 ymm10 = _mm256_fmsub_pd(ymm10, ymm16, ymm2); //B11[0-3][1] * alpha-= ymm2 ymm11 = _mm256_fmsub_pd(ymm11, ymm16, ymm3); //B11[4-7][1] * alpha -= ymm3 ymm12 = _mm256_fmsub_pd(ymm12, ymm16, ymm4); //B11[0-3][2] * alpha -= ymm4 ymm13 = _mm256_fmsub_pd(ymm13, ymm16, ymm5); //B11[4-7][2] * alpha -= ymm5 ymm14 = _mm256_fmsub_pd(ymm14, ymm16, ymm6); //B11[0-3][3] * alpha -= ymm6 ymm15 = _mm256_fmsub_pd(ymm15, ymm16, ymm7); //B11[4-7][3] * alpha -= ymm7 ///implement TRSM/// ///read 4x4 block of A11/// //1st col ymm0 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][0] a11 += cs_a; //2nd col ymm1 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][1] ymm2 = _mm256_broadcast_sd((double const *)(a11+1)); //A11[0][1] a11 += cs_a; //3rd col ymm3 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][1] ymm4 = _mm256_broadcast_sd((double const *)(a11+1)); //A11[0][1] ymm5 = _mm256_broadcast_sd((double const *)(a11+2)); //A11[0][1] a11 += cs_a; //4th col ymm6 = _mm256_broadcast_sd((double const *)(a11+3)); //A11[0][1] ymm2 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][1] ymm5 = _mm256_broadcast_sd((double const *)(a11+1)); //A11[0][1] ymm6 = _mm256_broadcast_sd((double const *)(a11+2)); //A11[0][1] //(Row 3): FMA operations ymm10 = _mm256_fnmadd_pd(ymm11, ymm6, ymm10); ymm9 = _mm256_fnmadd_pd(ymm11, ymm5, ymm9); ymm8 = _mm256_fnmadd_pd(ymm11, ymm2, ymm8); //(Row 3): FMA operations ymm14 = _mm256_fnmadd_pd(ymm15, ymm6, ymm14); ymm13 = _mm256_fnmadd_pd(ymm15, ymm5, ymm13); ymm12 = _mm256_fnmadd_pd(ymm15, ymm2, ymm12); //(ROW 2): FMA operations ymm9 = _mm256_fnmadd_pd(ymm10, ymm4, ymm9); ymm8 = _mm256_fnmadd_pd(ymm10, ymm3, ymm8); ymm13 = _mm256_fnmadd_pd(ymm14, ymm4, ymm13); ymm12 = _mm256_fnmadd_pd(ymm14, ymm3, ymm12); //(Row 1):FMA operations ymm8 = _mm256_fnmadd_pd(ymm9, ymm1, ymm8); ymm12 = _mm256_fnmadd_pd(ymm13, ymm1, ymm12); if(n_remainder == 3) { _mm256_storeu_pd((double *)(b11 + cs_b), ymm9); //store(B11[0-3][1]) _mm256_storeu_pd((double *)(b11 + cs_b + D_NR), ymm13); //store(B11[4-7][1]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0]), ymm10); //store(B11[0-3][2]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0] + D_NR), ymm14);//store(B11[4-7][2]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[1]), ymm11); //store(B11[0-3][0]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[1] + D_NR), ymm15); //store(B11[4-7][0]) } if(n_remainder == 2) { _mm256_storeu_pd((double *)(b11 + cs_b_offset[0]), ymm10); //store(B11[0-3][1]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0] + D_NR), ymm14); //store(B11[4-7][1]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[1]), ymm11); //store(B11[0-3][0]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[1] + D_NR), ymm15); //store(B11[4-7][0]) } if(n_remainder == 1) { _mm256_storeu_pd((double *)(b11+ cs_b_offset[1]), ymm11); //store(B11[0-3][0]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[1] + D_NR), ymm15); //store(B11[4-7][0]) } } } if(i<0) i += D_NR; if((m & 4)) ///implementation for remainder rows(when m_remainder is a multiple of 4) { for(j = (n-D_NR); (j+1) > 0; j -=D_NR) //loop along n direction { a01 = L + (j+D_NR)*cs_a + (j); //pointer to block of A to be used for GEMM a11 = L + j*cs_a + j; //pointer to block of A to be used for TRSM b10 = B + i + (j+D_NR)*cs_b; //pointer to block of B to be used for GEMM b11 = B + i + j*cs_b; //pointer to block of B to be used for TRSM k_iter = (n-j-D_NR) / D_NR; //number of times GEMM operations to be performed(in blocks of 4x4) ymm15 = _mm256_broadcast_sd((double const *)&AlphaVal); //register to store alpha ///GEMM for previous blocks /// ///load 4x4 block of b11 ymm0 = _mm256_loadu_pd((double const *)b11); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] ymm1 = _mm256_loadu_pd((double const *)(b11 + cs_b)); //B11[0][1] B11[1][1] B11[2][1] B11[3][1] ymm2 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[0])); //B11[0][2] B11[1][2] B11[2][2] B11[3][2] ymm3 = _mm256_loadu_pd((double const *)(b11 + cs_b_offset[1])); //B11[0][3] B11[1][3] B11[2][3] B11[3][3] //multiply by alpha ymm0 = _mm256_mul_pd(ymm0, ymm15); //B11[x][0] *= alpha ymm1 = _mm256_mul_pd(ymm1, ymm15); //B11[x][1] *=alpha ymm2 = _mm256_mul_pd(ymm2, ymm15); //B11[x][2] *= alpha ymm3 = _mm256_mul_pd(ymm3, ymm15); //B11[x][3] *= alpha ymm4 = _mm256_setzero_pd(); ymm5 = _mm256_setzero_pd(); ymm6 = _mm256_setzero_pd(); ymm7 = _mm256_setzero_pd(); ///GEMM implementation starts/// for(k = 0; k < k_iter; k++) //loop for number of GEMM operations { ptr_a01_dup = a01; //load 4x4 bblock of b10 ymm8 = _mm256_loadu_pd((double const *)b10); //B10[0][0] B10[1][0] B10[2][0] B10[3][0] ymm9 = _mm256_loadu_pd((double const *)(b10 + cs_b)); //B10[0][1] B10[1][1] B10[2][1] B10[3][1] ymm10 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0])); //B10[0][2] B10[1][2] B10[2][2] B10[3][2] ymm11 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[1])); //B10[0][3] B10[1][3] B10[2][3] B10[3][3] //broadcast 1st row of A01 ymm12 = _mm256_broadcast_sd((double const *)(a01 + 0)); //A01[0][0] ymm13 = _mm256_broadcast_sd((double const *)(a01 + 1)); //A01[0][1] ymm14 = _mm256_broadcast_sd((double const *)(a01 + 2)); //A01[0][2] ymm15 = _mm256_broadcast_sd((double const *)(a01 + 3)); //A01[0][3] a01 += cs_a; //move to next row of A ymm4 = _mm256_fmadd_pd(ymm12, ymm8, ymm4); //ymm4 += (B10[0][0]*A01[0][0] B10[1][0]*A01[0][0] B10[2][0]*A01[0][0] B10[3][0]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm13, ymm8, ymm5); //ymm5 += (B10[0][0]*A01[0][1] B10[1][0]*A01[0][1] B10[2][0]*A01[0][1] B10[3][0]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm14, ymm8, ymm6); //ymm6 += (B10[0][0]*A01[0][2] B10[1][0]*A01[0][2] B10[2][0]*A01[0][2] B10[3][0]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm15, ymm8, ymm7); //ymm7 += (B10[0][0]*A01[0][3] B10[1][0]*A01[0][3] B10[2][0]*A01[0][3] B10[3][0]*A01[0][3]) //broadcast 2nd row of A01 ymm12 = _mm256_broadcast_sd((double const *)(a01 + 0)); //A01[1][0] ymm13 = _mm256_broadcast_sd((double const *)(a01 + 1)); //A01[1][1] ymm14 = _mm256_broadcast_sd((double const *)(a01 + 2)); //A01[1][2] ymm15 = _mm256_broadcast_sd((double const *)(a01 + 3)); //A01[1][3] a01 += cs_a; //move to next row of A ymm4 = _mm256_fmadd_pd(ymm12, ymm9, ymm4); //ymm4 += (B10[0][1]*A01[1][0] B10[1][1]*A01[1][0] B10[2][1]*A01[1][0] B10[3][1]*A01[1][0]) ymm5 = _mm256_fmadd_pd(ymm13, ymm9, ymm5); //ymm5 += (B10[0][1]*A01[1][1] B10[1][1]*A01[1][1] B10[2][1]*A01[1][1] B10[3][1]*A01[1][1]) ymm6 = _mm256_fmadd_pd(ymm14, ymm9, ymm6); //ymm6 += (B10[0][1]*A01[1][2] B10[1][1]*A01[1][2] B10[2][1]*A01[1][2] B10[3][1]*A01[1][2]) ymm7 = _mm256_fmadd_pd(ymm15, ymm9, ymm7); //ymm7 += (B10[0][1]*A01[1][3] B10[1][1]*A01[1][3] B10[2][1]*A01[1][3] B10[3][1]*A01[1][3]) //braodcast 3rd row of A01 ymm12 = _mm256_broadcast_sd((double const *)(a01 + 0)); //A01[2][0] ymm13 = _mm256_broadcast_sd((double const *)(a01 + 1)); //A01[2][1] ymm14 = _mm256_broadcast_sd((double const *)(a01 + 2)); //A01[2][2] ymm15 = _mm256_broadcast_sd((double const *)(a01 + 3)); //A01[2][3] a01 += cs_a; //move to next row of A ymm4 = _mm256_fmadd_pd(ymm12, ymm10, ymm4); //ymm4 += (B10[0][2]*A01[2][0] B10[1][2]*A01[2][0] B10[2][2]*A01[2][0] B10[3][2]*A01[2][0]) ymm5 = _mm256_fmadd_pd(ymm13, ymm10, ymm5); //ymm5 += (B10[0][2]*A01[2][1] B10[1][2]*A01[2][1] B10[2][2]*A01[2][1] B10[3][2]*A01[2][1]) ymm6 = _mm256_fmadd_pd(ymm14, ymm10, ymm6); //ymm6 += (B10[0][2]*A01[2][2] B10[1][2]*A01[2][2] B10[2][2]*A01[2][2] B10[3][2]*A01[2][2]) ymm7 = _mm256_fmadd_pd(ymm15, ymm10, ymm7); //ymm7 += (B10[0][2]*A01[2][3] B10[1][2]*A01[2][3] B10[2][2]*A01[2][3] B10[3][2]*A01[2][3]) //broadcast 4th row of A01 ymm12 = _mm256_broadcast_sd((double const *)(a01 + 0)); //A01[3][0] ymm13 = _mm256_broadcast_sd((double const *)(a01 + 1)); //A01[3][1] ymm14 = _mm256_broadcast_sd((double const *)(a01 + 2)); //A01[3][2] ymm15 = _mm256_broadcast_sd((double const *)(a01 + 3)); //A01[3][3] a01 += cs_a; //move to next row of A ymm4 = _mm256_fmadd_pd(ymm12, ymm11, ymm4); //ymm4 += (B10[0][3]*A01[3][0] B10[1][3]*A01[3][0] B10[2][3]*A01[3][0] B10[3][3]*A01[3][0]) ymm5 = _mm256_fmadd_pd(ymm13, ymm11, ymm5); //ymm5 += (B10[0][3]*A01[3][1] B10[1][3]*A01[3][1] B10[2][3]*A01[3][1] B10[3][3]*A01[3][1]) ymm6 = _mm256_fmadd_pd(ymm14, ymm11, ymm6); //ymm6 += (B10[0][3]*A01[3][2] B10[1][3]*A01[3][2] B10[2][3]*A01[3][2] B10[3][3]*A01[3][2]) ymm7 = _mm256_fmadd_pd(ymm15, ymm11, ymm7); //ymm7 += (B10[0][3]*A01[3][3] B10[1][3]*A01[3][3] B10[2][3]*A01[3][3] B10[3][3]*A01[3][3]) b10 += D_NR * cs_b; //pointer math to find next block of B for GEMM a01 = ptr_a01_dup + D_NR*cs_a; //pointer math to find next block of A for GEMM } ///GEMM code end/// ymm0 = _mm256_sub_pd(ymm0, ymm4); //B11[x][0] -=ymm4 ymm1 = _mm256_sub_pd(ymm1, ymm5); //B11[x][1] -= ymm5 ymm2 = _mm256_sub_pd(ymm2, ymm6); //B11[x][2] -= ymm6 ymm3 = _mm256_sub_pd(ymm3, ymm7); //B11[x][3] -= ymm7 ///implement TRSM/// ///read 4x4 block of A11/// //1st col ymm4 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][0] a11 += cs_a; //2nd col ymm5 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][1] ymm8 = _mm256_broadcast_sd((double const *)(a11+1)); //A11[0][1] a11 += cs_a; //3rd col ymm6 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][1] ymm9 = _mm256_broadcast_sd((double const *)(a11+1)); //A11[0][1] ymm11 = _mm256_broadcast_sd((double const *)(a11+2)); //A11[0][1] a11 += cs_a; //4th col ymm7 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][1] ymm10 = _mm256_broadcast_sd((double const *)(a11+1)); //A11[0][1] ymm12 = _mm256_broadcast_sd((double const *)(a11+2)); //A11[0][1] ymm13 = _mm256_broadcast_sd((double const *)(a11+3)); //A11[0][1] //(Row 3): FMA operations ymm2 = _mm256_fnmadd_pd(ymm3, ymm12, ymm2); ymm1 = _mm256_fnmadd_pd(ymm3, ymm10, ymm1); ymm0 = _mm256_fnmadd_pd(ymm3, ymm7, ymm0); //(ROW 2): FMA operations ymm1 = _mm256_fnmadd_pd(ymm2, ymm9, ymm1); ymm0 = _mm256_fnmadd_pd(ymm2, ymm6, ymm0); //(Row 1):FMA operations ymm0 = _mm256_fnmadd_pd(ymm1, ymm5, ymm0); _mm256_storeu_pd((double *)b11, ymm0); //store(B11[x][0]) _mm256_storeu_pd((double *)(b11 + cs_b), ymm1); //store(B11[x][1]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0]), ymm2); //(store(B11[x][2])) _mm256_storeu_pd((double *)(b11 + cs_b_offset[1]), ymm3); //store(B11[x][3]) } if(n_remainder) //implementation for remainder columns(when n is not a multiple of D_NR) { a01 = L + (j+D_NR)*cs_a + (j); //pointer to block of A to be used for GEMM a11 = L + j*cs_a + j; //pointer to block of A to be used for TRSM b10 = B + i + (j+D_NR)*cs_b; //pointer to block of B to be used for GEMM b11 = B + i + j*cs_b; //pointer to block of B to be used for TRSM k_iter = (n-j-D_NR) / D_NR; //number of times GEMM operations to be performed(in blocks of 4x4) ymm15 = _mm256_broadcast_sd((double const *)&AlphaVal); //register to store alpha ///GEMM for previous blocks /// ///load 4x4 block of b11 if(n_remainder == 3) { ymm0 = _mm256_broadcast_sd((double const *)&ones); //B11[0][3] B11[1][3] B11[2][3] B11[3][3] ymm1 = _mm256_loadu_pd((double const *)b11+ cs_b); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] ymm2 = _mm256_loadu_pd((double const *)(b11 + cs_b * 2)); //B11[0][1] B11[1][1] B11[2][1] B11[3][1] ymm3 = _mm256_loadu_pd((double const *)(b11 + cs_b * 3)); //B11[0][2] B11[1][2] B11[2][2] B11[3][2] } if(n_remainder == 2) { ymm0 = _mm256_broadcast_sd((double const *)&ones); //B11[0][2] B11[1][2] B11[2][2] B11[3][2] ymm1 = _mm256_broadcast_sd((double const *)&ones); //B11[0][3] B11[1][3] B11[2][3] B11[3][3] ymm2 = _mm256_loadu_pd((double const *)(b11 + cs_b * 2)); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] ymm3 = _mm256_loadu_pd((double const *)(b11 + cs_b * 3)); //B11[0][1] B11[1][1] B11[2][1] B11[3][1] } if(n_remainder == 1) { ymm0 = _mm256_broadcast_sd((double const *)&ones); //B11[0][1] B11[1][1] B11[2][1] B11[3][1] ymm1 = _mm256_broadcast_sd((double const *)&ones); //B11[0][2] B11[1][2] B11[2][2] B11[3][2] ymm2 = _mm256_broadcast_sd((double const *)&ones); //B11[0][3] B11[1][3] B11[2][3] B11[3][3] ymm3 = _mm256_loadu_pd((double const *)(b11 + cs_b * 3)); //B11[0][0] B11[1][0] B11[2][0] B11[3][0] } //multiply by alpha ymm0 = _mm256_mul_pd(ymm0, ymm15); //B11[x][0] *= alpha ymm1 = _mm256_mul_pd(ymm1, ymm15); //B11[x][1] *=alpha ymm2 = _mm256_mul_pd(ymm2, ymm15); //B11[x][2] *= alpha ymm3 = _mm256_mul_pd(ymm3, ymm15); //B11[x][3] *= alpha ymm4 = _mm256_setzero_pd(); ymm5 = _mm256_setzero_pd(); ymm6 = _mm256_setzero_pd(); ymm7 = _mm256_setzero_pd(); ///GEMM implementation starts/// for(k = 0; k < k_iter; k++) //loop for number of GEMM operations { ptr_a01_dup = a01; //load 4x4 bblock of b10 ymm8 = _mm256_loadu_pd((double const *)b10); //B10[0][0] B10[1][0] B10[2][0] B10[3][0] ymm9 = _mm256_loadu_pd((double const *)(b10 + cs_b)); //B10[0][1] B10[1][1] B10[2][1] B10[3][1] ymm10 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[0])); //B10[0][2] B10[1][2] B10[2][2] B10[3][2] ymm11 = _mm256_loadu_pd((double const *)(b10 + cs_b_offset[1])); //B10[0][3] B10[1][3] B10[2][3] B10[3][3] //broadcast 1st row of A01 ymm12 = _mm256_broadcast_sd((double const *)(a01 + 0)); //A01[0][0] ymm13 = _mm256_broadcast_sd((double const *)(a01 + 1)); //A01[0][1] ymm14 = _mm256_broadcast_sd((double const *)(a01 + 2)); //A01[0][2] ymm15 = _mm256_broadcast_sd((double const *)(a01 + 3)); //A01[0][3] a01 += cs_a; //move to next row of A ymm4 = _mm256_fmadd_pd(ymm12, ymm8, ymm4); //ymm4 += (B10[0][0]*A01[0][0] B10[1][0]*A01[0][0] B10[2][0]*A01[0][0] B10[3][0]*A01[0][0]) ymm5 = _mm256_fmadd_pd(ymm13, ymm8, ymm5); //ymm5 += (B10[0][0]*A01[0][1] B10[1][0]*A01[0][1] B10[2][0]*A01[0][1] B10[3][0]*A01[0][1]) ymm6 = _mm256_fmadd_pd(ymm14, ymm8, ymm6); //ymm6 += (B10[0][0]*A01[0][2] B10[1][0]*A01[0][2] B10[2][0]*A01[0][2] B10[3][0]*A01[0][2]) ymm7 = _mm256_fmadd_pd(ymm15, ymm8, ymm7); //ymm7 += (B10[0][0]*A01[0][3] B10[1][0]*A01[0][3] B10[2][0]*A01[0][3] B10[3][0]*A01[0][3]) //broadcast 2nd row of A01 ymm12 = _mm256_broadcast_sd((double const *)(a01 + 0)); //A01[1][0] ymm13 = _mm256_broadcast_sd((double const *)(a01 + 1)); //A01[1][1] ymm14 = _mm256_broadcast_sd((double const *)(a01 + 2)); //A01[1][2] ymm15 = _mm256_broadcast_sd((double const *)(a01 + 3)); //A01[1][3] a01 += cs_a; //move to next row of A ymm4 = _mm256_fmadd_pd(ymm12, ymm9, ymm4); //ymm4 += (B10[0][1]*A01[1][0] B10[1][1]*A01[1][0] B10[2][1]*A01[1][0] B10[3][1]*A01[1][0]) ymm5 = _mm256_fmadd_pd(ymm13, ymm9, ymm5); //ymm5 += (B10[0][1]*A01[1][1] B10[1][1]*A01[1][1] B10[2][1]*A01[1][1] B10[3][1]*A01[1][1]) ymm6 = _mm256_fmadd_pd(ymm14, ymm9, ymm6); //ymm6 += (B10[0][1]*A01[1][2] B10[1][1]*A01[1][2] B10[2][1]*A01[1][2] B10[3][1]*A01[1][2]) ymm7 = _mm256_fmadd_pd(ymm15, ymm9, ymm7); //ymm7 += (B10[0][1]*A01[1][3] B10[1][1]*A01[1][3] B10[2][1]*A01[1][3] B10[3][1]*A01[1][3]) //braodcast 3rd row of A01 ymm12 = _mm256_broadcast_sd((double const *)(a01 + 0)); //A01[2][0] ymm13 = _mm256_broadcast_sd((double const *)(a01 + 1)); //A01[2][1] ymm14 = _mm256_broadcast_sd((double const *)(a01 + 2)); //A01[2][2] ymm15 = _mm256_broadcast_sd((double const *)(a01 + 3)); //A01[2][3] a01 += cs_a; //move to next row of A ymm4 = _mm256_fmadd_pd(ymm12, ymm10, ymm4); //ymm4 += (B10[0][2]*A01[2][0] B10[1][2]*A01[2][0] B10[2][2]*A01[2][0] B10[3][2]*A01[2][0]) ymm5 = _mm256_fmadd_pd(ymm13, ymm10, ymm5); //ymm5 += (B10[0][2]*A01[2][1] B10[1][2]*A01[2][1] B10[2][2]*A01[2][1] B10[3][2]*A01[2][1]) ymm6 = _mm256_fmadd_pd(ymm14, ymm10, ymm6); //ymm6 += (B10[0][2]*A01[2][2] B10[1][2]*A01[2][2] B10[2][2]*A01[2][2] B10[3][2]*A01[2][2]) ymm7 = _mm256_fmadd_pd(ymm15, ymm10, ymm7); //ymm7 += (B10[0][2]*A01[2][3] B10[1][2]*A01[2][3] B10[2][2]*A01[2][3] B10[3][2]*A01[2][3]) //broadcast 4th row of A01 ymm12 = _mm256_broadcast_sd((double const *)(a01 + 0)); //A01[3][0] ymm13 = _mm256_broadcast_sd((double const *)(a01 + 1)); //A01[3][1] ymm14 = _mm256_broadcast_sd((double const *)(a01 + 2)); //A01[3][2] ymm15 = _mm256_broadcast_sd((double const *)(a01 + 3)); //A01[3][3] a01 += cs_a; //move to next row of A ymm4 = _mm256_fmadd_pd(ymm12, ymm11, ymm4); //ymm4 += (B10[0][3]*A01[3][0] B10[1][3]*A01[3][0] B10[2][3]*A01[3][0] B10[3][3]*A01[3][0]) ymm5 = _mm256_fmadd_pd(ymm13, ymm11, ymm5); //ymm5 += (B10[0][3]*A01[3][1] B10[1][3]*A01[3][1] B10[2][3]*A01[3][1] B10[3][3]*A01[3][1]) ymm6 = _mm256_fmadd_pd(ymm14, ymm11, ymm6); //ymm6 += (B10[0][3]*A01[3][2] B10[1][3]*A01[3][2] B10[2][3]*A01[3][2] B10[3][3]*A01[3][2]) ymm7 = _mm256_fmadd_pd(ymm15, ymm11, ymm7); //ymm7 += (B10[0][3]*A01[3][3] B10[1][3]*A01[3][3] B10[2][3]*A01[3][3] B10[3][3]*A01[3][3]) b10 += D_NR * cs_b; //pointer math to find next block of B for GEMM a01 = ptr_a01_dup + (D_NR * cs_a); //pointer math to find next block of A for GEMM } ///GEMM code end/// ymm0 = _mm256_sub_pd(ymm0, ymm4); //B11[x][0] -=ymm4 ymm1 = _mm256_sub_pd(ymm1, ymm5); //B11[x][1] -= ymm5 ymm2 = _mm256_sub_pd(ymm2, ymm6); //B11[x][2] -= ymm6 ymm3 = _mm256_sub_pd(ymm3, ymm7); //B11[x][3] -= ymm7 ///implement TRSM/// ///read 4x4 block of A11/// //1st col ymm4 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][0] a11 += cs_a; //2nd col ymm5 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][1] ymm8 = _mm256_broadcast_sd((double const *)(a11+1)); //A11[0][1] a11 += cs_a; //3rd col ymm6 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][1] ymm9 = _mm256_broadcast_sd((double const *)(a11+1)); //A11[0][1] ymm11 = _mm256_broadcast_sd((double const *)(a11+2)); //A11[0][1] a11 += cs_a; //4th col ymm7 = _mm256_broadcast_sd((double const *)(a11+0)); //A11[0][1] ymm10 = _mm256_broadcast_sd((double const *)(a11+1)); //A11[0][1] ymm12 = _mm256_broadcast_sd((double const *)(a11+2)); //A11[0][1] ymm13 = _mm256_broadcast_sd((double const *)(a11+3)); //A11[0][1] //(Row 3): FMA operations ymm2 = _mm256_fnmadd_pd(ymm3, ymm12, ymm2); ymm1 = _mm256_fnmadd_pd(ymm3, ymm10, ymm1); ymm0 = _mm256_fnmadd_pd(ymm3, ymm7, ymm0); //(ROW 2): FMA operations ymm1 = _mm256_fnmadd_pd(ymm2, ymm9, ymm1); ymm0 = _mm256_fnmadd_pd(ymm2, ymm6, ymm0); //(Row 1):FMA operations ymm0 = _mm256_fnmadd_pd(ymm1, ymm5, ymm0); if(n_remainder == 3) { _mm256_storeu_pd((double *)(b11 + cs_b), ymm1); //store(B11[x][1]) _mm256_storeu_pd((double *)(b11 + cs_b_offset[0]), ymm2); //(store(B11[x][2])) _mm256_storeu_pd((double *)(b11 + cs_b*3), ymm3); //store(B11[x][0]) } if(n_remainder == 2) { _mm256_storeu_pd((double *)(b11+ cs_b * 2), ymm2); //store(B11[x][0]) _mm256_storeu_pd((double *)(b11 + cs_b * 3), ymm3); //store(B11[x][1]) } if(n_remainder == 1) { _mm256_storeu_pd((double *)(b11 + cs_b * 3), ymm3); //store(B11[x][0]) } } m_remainder -= 4; i -= 4; } if(m_remainder) { dtrsm_small_XAutB_unitDiag(a->buffer, b->buffer,AlphaVal, m_remainder, n, cs_a, cs_b); } return BLIS_SUCCESS; } /* * AX = Alpha*B, Single precision, A: lower triangular * This kernel implementation supports matrices A and B such that m is equal to BLI_AlXB_M_SP and n is mutiple of 8 */ static err_t bli_strsm_small_AlXB ( side_t side, obj_t* AlphaObj, obj_t* a, obj_t* b, cntx_t* cntx, cntl_t* cntl ) { obj_t alpha, beta; // gemm parameters obj_t Ga, Gb, Gc; // for GEMM int m = bli_obj_length(b); // number of rows of matrix B int n = bli_obj_width(b); // number of columns of matrix B int lda = bli_obj_col_stride(a); // column stride of A int ldb = bli_obj_col_stride(b); // column stride of B int rsa = bli_obj_row_stride(a); // row stride of A int rsb = bli_obj_row_stride(b); // row stride of B int i = 0; int j; int blk_size = 8; int isUnitDiag = bli_obj_has_unit_diag(a); float alphaVal; float *L = a->buffer; float *B = b->buffer; if (m != BLI_AlXB_M_SP || (n&7) != 0) { return BLIS_NOT_YET_IMPLEMENTED; } if ( (m*(m + n)) > BLIS_SMALL_MATRIX_THRES_TRSM ) { return BLIS_NOT_YET_IMPLEMENTED; } alphaVal = *((float *)bli_obj_buffer_for_const(BLIS_FLOAT, AlphaObj)); /* Small _GEMM preparation code */ bli_obj_create( BLIS_FLOAT, 1, 1, 0, 0, &alpha ); bli_obj_create( BLIS_FLOAT, 1, 1, 0, 0, &beta ); /* B = B - A*B */ bli_setsc( -(1.0), 0.0, &alpha ); bli_setsc( (1.0), 0.0, &beta ); bli_obj_create_with_attached_buffer( BLIS_FLOAT, blk_size, blk_size, a->buffer, rsa, lda, &Ga); bli_obj_create_with_attached_buffer( BLIS_FLOAT, blk_size, n, b->buffer, rsb, ldb, &Gb); bli_obj_create_with_attached_buffer( BLIS_FLOAT, blk_size, n, b->buffer, rsb, ldb, &Gc); bli_obj_set_conjtrans( BLIS_NO_TRANSPOSE, &Ga ); bli_obj_set_conjtrans( BLIS_NO_TRANSPOSE, &Gb ); bli_obj_set_conjtrans( BLIS_NO_TRANSPOSE, &Gc ); //first block of trsm Gb.buffer = (void*)(B + i); //trsm of first 8xn block if (alphaVal != 1) { if (isUnitDiag == 0) { blis_strsm_microkernel_alpha((L + i * lda + i), (B + i), m, n, rsa, rsb, lda, ldb, alphaVal); fp_blis_strsm_microkernel = blis_strsm_microkernel; } else { blis_strsm_microkernel_alpha_unitDiag((L + i * lda + i), (B + i), m, n, rsa, rsb, lda, ldb, alphaVal); fp_blis_strsm_microkernel = blis_strsm_microkernel_unitDiag; } bli_setsc( alphaVal, 0.0, &beta ); } else { if (isUnitDiag == 0) { blis_strsm_microkernel((L + i * lda + i), (B + i), m, n, rsa, rsb, lda, ldb); fp_blis_strsm_microkernel = blis_strsm_microkernel; } else { blis_strsm_microkernel_unitDiag((L + i * lda + i), (B + i), m, n, rsa, rsb, lda, ldb); fp_blis_strsm_microkernel = blis_strsm_microkernel_unitDiag; } } //gemm update for (j = i + blk_size; j < m; j += blk_size) // for rows upto multiple of BLOCK_HEIGHT { Ga.buffer = (void*)(L + j + i*lda); Gc.buffer = (void*)(B + j); bli_gemm_small(&alpha, &Ga, &Gb, &beta, &Gc, cntx, cntl ); // Gc = beta*Gc + alpha*Ga *Gb } //trsm of remaining blocks for (i = blk_size; i < m; i += blk_size) { Gb.buffer = (void*)(B + i); fp_blis_strsm_microkernel((L + i * lda + i), (B + i), m, n, rsa, rsb, lda, ldb); for (j = i + blk_size; j < m; j += blk_size) // for rows upto multiple of BLOCK_HEIGHT { Ga.buffer = (void*)(L + j + i*lda); Gc.buffer = (void*)(B + j); bli_gemm_small(&alpha, &Ga, &Gb, &beta, &Gc, cntx, cntl ); // Gc = beta*Gc + alpha*Ga *Gb } } // End of for loop - i return BLIS_SUCCESS; } /* * XA' = Alpha*B, Single precision, A: lower triangular * This kernel implementation supports matrices A and B such that * m and n are multiples of 8 and n is less than or equal to BLI_XAltB_N_SP */ static err_t bli_strsm_small_XAltB( side_t side, obj_t* AlphaObj, obj_t* a, obj_t* b, cntx_t* cntx, cntl_t* cntl ) { int m = bli_obj_length(a); // number of rows of matrix B int n = bli_obj_length(b); // number of columns of matrix B int lda = bli_obj_col_stride(a); // column stride of A int ldb = bli_obj_col_stride(b); // column stride of B int rsa = bli_obj_row_stride(a); // row stride of A int rsb = bli_obj_row_stride(b); // row stride of B int i = 0; int isUnitDiag = bli_obj_has_unit_diag(a); float alphaVal; float *L = a->buffer; float *B = b->buffer; if ((m&7) != 0 || (n&7) != 0) { return BLIS_NOT_YET_IMPLEMENTED; } if ( n > BLI_XAltB_N_SP || (m*(m + n)) > BLIS_SMALL_MATRIX_THRES_TRSM ) { return BLIS_NOT_YET_IMPLEMENTED; } alphaVal = *((float *)bli_obj_buffer_for_const(BLIS_FLOAT, AlphaObj)); if (alphaVal != 1) { if (isUnitDiag == 0) { trsm_XAtB_block_allSmallSizedMatrices_alpha((L + i * lda + i), (B + i), m, n, rsa, rsb, lda, ldb, alphaVal); } else { trsm_XAtB_block_allSmallSizedMatrices_alpha_unitDiag((L + i * lda + i), (B + i), m, n, rsa, rsb, lda, ldb, alphaVal); } } else { if (isUnitDiag == 0) { trsm_XAtB_block_allSmallSizedMatrices((L + i * lda + i), (B + i), m, n, rsa, rsb, lda, ldb); } else { trsm_XAtB_block_allSmallSizedMatrices_unitDiag((L + i * lda + i), (B + i), m, n, rsa, rsb, lda, ldb); } } return BLIS_SUCCESS; } /* * A'X = Alpha*B, Single precision, A: upper triangular * This kernel implementation supports matrices A and B such that * m and n are multiples of 8, m is less than or equal to BLI_AutXB_M_SP and n is less than or equal to BLI_AutXB_N_SP */ static err_t bli_strsm_small_AutXB( side_t side, obj_t* AlphaObj, obj_t* a, obj_t* b, cntx_t* cntx, cntl_t* cntl ) { int m = bli_obj_width(a); // number of rows of matrix A (since At, so width is taken) int n = bli_obj_width(b); // number of columns of matrix B int lda = bli_obj_col_stride(a); // column stride of A int ldb = bli_obj_col_stride(b); // column stride of B int rsa = bli_obj_row_stride(a); // row stride of A int rsb = bli_obj_row_stride(b); // row stride of B int i = 0; int isUnitDiag = bli_obj_has_unit_diag(a); float alphaVal; float *L = a->buffer; float *B = b->buffer; if ((m&7) != 0 || (n&7) != 0) { return BLIS_NOT_YET_IMPLEMENTED; } if ( m > BLI_AutXB_M_SP || n > BLI_AutXB_N_SP || (m*(m + n)) > BLIS_SMALL_MATRIX_THRES_TRSM ) { return BLIS_NOT_YET_IMPLEMENTED; } alphaVal = *((float *)bli_obj_buffer_for_const(BLIS_FLOAT, AlphaObj)); if (alphaVal != 1) { if (isUnitDiag == 0) { trsm_AutXB_block_allSmallSizedMatrices_alpha((L + i * lda + i), (B + i), m, n, rsa, rsb, lda, ldb, alphaVal); } else { trsm_AutXB_block_allSmallSizedMatrices_alpha_unitDiag((L + i * lda + i), (B + i), m, n, rsa, rsb, lda, ldb, alphaVal); } } else { if (isUnitDiag == 0) { trsm_AutXB_block_allSmallSizedMatrices((L + i * lda + i), (B + i), m, n, rsa, rsb, lda, ldb); } else { trsm_AutXB_block_allSmallSizedMatrices_unitDiag((L + i * lda + i), (B + i), m, n, rsa, rsb, lda, ldb); } } return BLIS_SUCCESS; } ///////////////////////////// AX=B /////////////////////////////// static void blis_strsm_microkernel_alpha(float *ptr_l, float *ptr_b, int numRows_lb, int numCols_b, int rs_l, int rs_b, int cs_l, int cs_b, float alphaVal) { float ones = 1.0; int j; int cs_b_offset[6]; //int row2, row4, row6; float *ptr_b_dup; //70 number of ymm(256 bits) registers used __m256 mat_b_col[8]; __m256 mat_b_rearr[8]; __m256 mat_a_cols[8]; __m256 mat_a_cols_rearr[36]; __m256 mat_a_diag_inv[8]; __m256 reciprocal_diags; __m256 alphaReg; cs_b_offset[0] = (cs_b << 1); cs_b_offset[1] = cs_b + cs_b_offset[0]; cs_b_offset[2] = (cs_b << 2); cs_b_offset[3] = cs_b + cs_b_offset[2]; cs_b_offset[4] = cs_b_offset[0] + cs_b_offset[2]; cs_b_offset[5] = cs_b + cs_b_offset[4]; //reciprocal_diags = _mm256_loadu_ps((float const *)ones); reciprocal_diags = _mm256_broadcast_ss((float const *)&ones); alphaReg = _mm256_broadcast_ss((float const *)&alphaVal); // ---> considering that the matrix size is multiple of 16 rows and 8 cols <--- // //read first set of 16x8 block of B into registers, where 16 is the blk_height and 8 is the blk_width for B mat_b_col[0] = _mm256_loadu_ps((float const *)ptr_b); //_mm_prefetch((char*)(ptr_l + 0), _MM_HINT_T0); //row2 = (cs_l << 1); //row4 = (cs_l << 2); mat_b_col[1] = _mm256_loadu_ps((float const *)(ptr_b + (cs_b))); //_mm_prefetch((char*)(ptr_l + cs_l), _MM_HINT_T0); mat_b_col[2] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[0])); //_mm_prefetch((char*)(ptr_l + row2), _MM_HINT_T0); mat_b_col[3] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[1])); //_mm_prefetch((char*)(ptr_l + row2 + cs_l), _MM_HINT_T0); //row6 = row2 + row4; mat_b_col[4] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[2])); //_mm_prefetch((char*)(ptr_l + row4), _MM_HINT_T0); mat_b_col[5] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[3])); //_mm_prefetch((char*)(ptr_l + row4 + cs_l), _MM_HINT_T0); mat_b_col[6] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[4])); //_mm_prefetch((char*)(ptr_l + row6), _MM_HINT_T0); mat_b_col[7] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[5])); //_mm_prefetch((char*)(ptr_l + row6 + cs_l), _MM_HINT_T0); //reciprocal_diags = _mm256_loadu_ps((float const *)ones); //read first set of 16x16 block of L, where 16 is the blk_height and 16 is the blk_width for L /*mat_a_cols[0] = _mm256_loadu_ps((float const *)ptr_l); ptr_l += cs_l; mat_a_cols[1] = _mm256_loadu_ps((float const *)ptr_l); ptr_l += cs_l; mat_a_cols[2] = _mm256_loadu_ps((float const *)ptr_l); ptr_l += cs_l; mat_a_cols[3] = _mm256_loadu_ps((float const *)ptr_l); ptr_l += cs_l; mat_a_cols[4] = _mm256_loadu_ps((float const *)ptr_l); ptr_l += cs_l; mat_a_cols[5] = _mm256_loadu_ps((float const *)ptr_l); ptr_l += cs_l; mat_a_cols[6] = _mm256_loadu_ps((float const *)ptr_l); ptr_l += cs_l; mat_a_cols[7] = _mm256_loadu_ps((float const *)ptr_l);*/ //Shuffle to rearrange/transpose 16x16 block of L into contiguous row-wise registers //tmpRegs[0] = _mm256_castps256_ps128(mat_a_cols[0]); //zero latency, no instruction added actually. //mat_a_cols_rearr[0] = _mm256_broadcastss_ps(tmpRegs[0]); //1st col mat_a_cols_rearr[0] = _mm256_broadcast_ss((float const *)(ptr_l+0)); mat_a_cols_rearr[1] = _mm256_broadcast_ss((float const *)(ptr_l+1)); mat_a_cols_rearr[3] = _mm256_broadcast_ss((float const *)(ptr_l+2)); mat_a_cols_rearr[6] = _mm256_broadcast_ss((float const *)(ptr_l+3)); mat_a_cols_rearr[10] = _mm256_broadcast_ss((float const *)(ptr_l+4)); mat_a_cols_rearr[15] = _mm256_broadcast_ss((float const *)(ptr_l+5)); mat_a_cols_rearr[21] = _mm256_broadcast_ss((float const *)(ptr_l+6)); mat_a_cols_rearr[28] = _mm256_broadcast_ss((float const *)(ptr_l+7)); //2nd col ptr_l += cs_l; mat_a_cols_rearr[2] = _mm256_broadcast_ss((float const *)(ptr_l + 1)); mat_a_cols_rearr[4] = _mm256_broadcast_ss((float const *)(ptr_l + 2)); mat_a_cols_rearr[7] = _mm256_broadcast_ss((float const *)(ptr_l + 3)); mat_a_cols_rearr[11] = _mm256_broadcast_ss((float const *)(ptr_l + 4)); mat_a_cols_rearr[16] = _mm256_broadcast_ss((float const *)(ptr_l + 5)); mat_a_cols_rearr[22] = _mm256_broadcast_ss((float const *)(ptr_l + 6)); mat_a_cols_rearr[29] = _mm256_broadcast_ss((float const *)(ptr_l + 7)); //3rd col ptr_l += cs_l; mat_a_cols_rearr[5] = _mm256_broadcast_ss((float const *)(ptr_l + 2)); mat_a_cols_rearr[8] = _mm256_broadcast_ss((float const *)(ptr_l + 3)); mat_a_cols_rearr[12] = _mm256_broadcast_ss((float const *)(ptr_l + 4)); mat_a_cols_rearr[17] = _mm256_broadcast_ss((float const *)(ptr_l + 5)); mat_a_cols_rearr[23] = _mm256_broadcast_ss((float const *)(ptr_l + 6)); mat_a_cols_rearr[30] = _mm256_broadcast_ss((float const *)(ptr_l + 7)); //4rth col ptr_l += cs_l; mat_a_cols_rearr[9] = _mm256_broadcast_ss((float const *)(ptr_l + 3)); mat_a_cols_rearr[13] = _mm256_broadcast_ss((float const *)(ptr_l + 4)); mat_a_cols_rearr[18] = _mm256_broadcast_ss((float const *)(ptr_l + 5)); mat_a_cols_rearr[24] = _mm256_broadcast_ss((float const *)(ptr_l + 6)); mat_a_cols_rearr[31] = _mm256_broadcast_ss((float const *)(ptr_l + 7)); //5th col ptr_l += cs_l; mat_a_cols_rearr[14] = _mm256_broadcast_ss((float const *)(ptr_l + 4)); mat_a_cols_rearr[19] = _mm256_broadcast_ss((float const *)(ptr_l + 5)); mat_a_cols_rearr[25] = _mm256_broadcast_ss((float const *)(ptr_l + 6)); mat_a_cols_rearr[32] = _mm256_broadcast_ss((float const *)(ptr_l + 7)); //6th col ptr_l += cs_l; mat_a_cols_rearr[20] = _mm256_broadcast_ss((float const *)(ptr_l + 5)); mat_a_cols_rearr[26] = _mm256_broadcast_ss((float const *)(ptr_l + 6)); mat_a_cols_rearr[33] = _mm256_broadcast_ss((float const *)(ptr_l + 7)); //7th col ptr_l += cs_l; mat_a_cols_rearr[27] = _mm256_broadcast_ss((float const *)(ptr_l + 6)); mat_a_cols_rearr[34] = _mm256_broadcast_ss((float const *)(ptr_l + 7)); //7th col ptr_l += cs_l; mat_a_cols_rearr[35] = _mm256_broadcast_ss((float const *)(ptr_l + 7)); numCols_b -= 8; // blk_width = 8 //compute reciprocals of L(i,i) and broadcast in registers mat_a_diag_inv[0] = _mm256_unpacklo_ps(mat_a_cols_rearr[0], mat_a_cols_rearr[2]); mat_a_diag_inv[1] = _mm256_unpacklo_ps(mat_a_cols_rearr[5], mat_a_cols_rearr[9]); mat_a_diag_inv[2] = _mm256_unpacklo_ps(mat_a_cols_rearr[14], mat_a_cols_rearr[20]); mat_a_diag_inv[3] = _mm256_unpacklo_ps(mat_a_cols_rearr[27], mat_a_cols_rearr[35]); //mat_a_diag_inv[1] = _mm256_permute_ps(mat_a_diag_inv[1], 0x55); //mat_a_diag_inv[3] = _mm256_permute_ps(mat_a_diag_inv[3], 0x55); mat_a_diag_inv[0] = _mm256_blend_ps(mat_a_diag_inv[0], mat_a_diag_inv[1], 0xCC); mat_a_diag_inv[1] = _mm256_blend_ps(mat_a_diag_inv[2], mat_a_diag_inv[3], 0xCC); mat_a_diag_inv[0] = _mm256_permute2f128_ps(mat_a_diag_inv[0], mat_a_diag_inv[1], 0x20); //reciprocal of diagnol elements reciprocal_diags = _mm256_div_ps(reciprocal_diags, mat_a_diag_inv[0]); //Start loop for cols of B to be processed in size of blk_width for (j = 0; j < numCols_b; j += 8) { ptr_b_dup = ptr_b; /*Shuffle to rearrange/transpose 16x8 block of B into contiguous row-wise registers*/ ////unpacklow//// mat_b_rearr[0] = _mm256_unpacklo_ps(mat_b_col[0], mat_b_col[1]); mat_b_rearr[1] = _mm256_unpacklo_ps(mat_b_col[2], mat_b_col[3]); mat_b_rearr[2] = _mm256_unpacklo_ps(mat_b_col[4], mat_b_col[5]); mat_b_rearr[3] = _mm256_unpacklo_ps(mat_b_col[6], mat_b_col[7]); //Rearrange low elements #if REARRANGE_SHFL == 1 mat_b_rearr[4] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x44); mat_b_rearr[5] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0xEE); mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x44); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0xEE); #else mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x4E); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x4E); mat_b_rearr[4] = _mm256_blend_ps(mat_b_rearr[0], mat_b_rearr[6], 0xCC); mat_b_rearr[5] = _mm256_blend_ps(mat_b_rearr[1], mat_b_rearr[6], 0x33); mat_b_rearr[6] = _mm256_blend_ps(mat_b_rearr[2], mat_b_rearr[7], 0xCC); mat_b_rearr[7] = _mm256_blend_ps(mat_b_rearr[3], mat_b_rearr[7], 0x33); #endif //Merge rearranged low elements into complete rows mat_b_rearr[0] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x20); mat_b_rearr[4] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x31); mat_b_rearr[1] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x20); mat_b_rearr[5] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x31); mat_b_rearr[0] = _mm256_mul_ps(mat_b_rearr[0], alphaReg); mat_b_rearr[4] = _mm256_mul_ps(mat_b_rearr[4], alphaReg); mat_b_rearr[1] = _mm256_mul_ps(mat_b_rearr[1], alphaReg); mat_b_rearr[5] = _mm256_mul_ps(mat_b_rearr[5], alphaReg); ////unpackhigh//// mat_b_col[0] = _mm256_unpackhi_ps(mat_b_col[0], mat_b_col[1]); mat_b_col[1] = _mm256_unpackhi_ps(mat_b_col[2], mat_b_col[3]); mat_b_col[2] = _mm256_unpackhi_ps(mat_b_col[4], mat_b_col[5]); mat_b_col[3] = _mm256_unpackhi_ps(mat_b_col[6], mat_b_col[7]); //Rearrange high elements #if REARRANGE_SHFL == 1 mat_b_col[4] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x44); mat_b_col[5] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0xEE); mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x44); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0xEE); #else mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x4E); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x4E); mat_b_col[4] = _mm256_blend_ps(mat_b_col[0], mat_b_col[6], 0xCC); mat_b_col[5] = _mm256_blend_ps(mat_b_col[1], mat_b_col[6], 0x33); mat_b_col[6] = _mm256_blend_ps(mat_b_col[2], mat_b_col[7], 0xCC); mat_b_col[7] = _mm256_blend_ps(mat_b_col[3], mat_b_col[7], 0x33); #endif //extract diag a00 from a mat_a_diag_inv[0] = _mm256_permute_ps(reciprocal_diags, 0x00); mat_a_diag_inv[0] = _mm256_permute2f128_ps(mat_a_diag_inv[0], mat_a_diag_inv[0], 0x00); //(Row0): Perform mul operation of reciprocal of L(0,0) element with 1st row elements of B mat_b_rearr[0] = _mm256_mul_ps(mat_b_rearr[0], mat_a_diag_inv[0]); //Merge rearranged high elements into complete rows mat_b_rearr[2] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x20); mat_b_rearr[6] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x31); mat_b_rearr[3] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x20); mat_b_rearr[7] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x31); mat_b_rearr[2] = _mm256_mul_ps(mat_b_rearr[2], alphaReg); mat_b_rearr[6] = _mm256_mul_ps(mat_b_rearr[6], alphaReg); mat_b_rearr[3] = _mm256_mul_ps(mat_b_rearr[3], alphaReg); mat_b_rearr[7] = _mm256_mul_ps(mat_b_rearr[7], alphaReg); //extract diag a11 from a mat_a_diag_inv[1] = _mm256_permute_ps(reciprocal_diags, 0x55); mat_a_diag_inv[1] = _mm256_permute2f128_ps(mat_a_diag_inv[1], mat_a_diag_inv[1], 0x00); //(Row1): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_cols_rearr[1], mat_b_rearr[0], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_cols_rearr[3], mat_b_rearr[0], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_cols_rearr[6], mat_b_rearr[0], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_cols_rearr[10], mat_b_rearr[0], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_cols_rearr[15], mat_b_rearr[0], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_cols_rearr[21], mat_b_rearr[0], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[28], mat_b_rearr[0], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(1,1) element with 2nd row elements of B mat_b_rearr[1] = _mm256_mul_ps(mat_b_rearr[1], mat_a_diag_inv[1]); //extract diag a22 from a mat_a_diag_inv[2] = _mm256_permute_ps(reciprocal_diags, 0xAA); mat_a_diag_inv[2] = _mm256_permute2f128_ps(mat_a_diag_inv[2], mat_a_diag_inv[2], 0x00); //(Row2): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_cols_rearr[4], mat_b_rearr[1], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_cols_rearr[7], mat_b_rearr[1], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_cols_rearr[11], mat_b_rearr[1], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_cols_rearr[16], mat_b_rearr[1], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_cols_rearr[22], mat_b_rearr[1], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[29], mat_b_rearr[1], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(2, 2) element with 3rd row elements of B mat_b_rearr[2] = _mm256_mul_ps(mat_b_rearr[2], mat_a_diag_inv[2]); //extract diag a33 from a mat_a_diag_inv[3] = _mm256_permute_ps(reciprocal_diags, 0xFF); mat_a_diag_inv[3] = _mm256_permute2f128_ps(mat_a_diag_inv[3], mat_a_diag_inv[3], 0x00); //(Row3): FMA operations of b3 with elements of indices from (3, 0) uptill (7, 0) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_cols_rearr[8], mat_b_rearr[2], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_cols_rearr[12], mat_b_rearr[2], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_cols_rearr[17], mat_b_rearr[2], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_cols_rearr[23], mat_b_rearr[2], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[30], mat_b_rearr[2], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(3, 3) element with 4rth row elements of B mat_b_rearr[3] = _mm256_mul_ps(mat_b_rearr[3], mat_a_diag_inv[3]); //extract diag a44 from a mat_a_diag_inv[4] = _mm256_permute_ps(reciprocal_diags, 0x00); mat_a_diag_inv[4] = _mm256_permute2f128_ps(mat_a_diag_inv[4], mat_a_diag_inv[4], 0x11); //(Row4): FMA operations of b4 with elements of indices from (4, 0) uptill (7, 0) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_cols_rearr[13], mat_b_rearr[3], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_cols_rearr[18], mat_b_rearr[3], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_cols_rearr[24], mat_b_rearr[3], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[31], mat_b_rearr[3], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(4, 4) element with 4rth row elements of B mat_b_rearr[4] = _mm256_mul_ps(mat_b_rearr[4], mat_a_diag_inv[4]); //extract diag a55 from a mat_a_diag_inv[5] = _mm256_permute_ps(reciprocal_diags, 0x55); mat_a_diag_inv[5] = _mm256_permute2f128_ps(mat_a_diag_inv[5], mat_a_diag_inv[5], 0x11); //(Row5): FMA operations of b5 with elements of indices from (5, 0) uptill (7, 0) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_cols_rearr[19], mat_b_rearr[4], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_cols_rearr[25], mat_b_rearr[4], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[32], mat_b_rearr[4], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(5, 5) element with 5th row elements of B mat_b_rearr[5] = _mm256_mul_ps(mat_b_rearr[5], mat_a_diag_inv[5]); //extract diag a66 from a mat_a_diag_inv[6] = _mm256_permute_ps(reciprocal_diags, 0xAA); mat_a_diag_inv[6] = _mm256_permute2f128_ps(mat_a_diag_inv[6], mat_a_diag_inv[6], 0x11); //(Row6): FMA operations of b6 with elements of indices from (6, 0) uptill (7, 0) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_cols_rearr[26], mat_b_rearr[5], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[33], mat_b_rearr[5], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(6, 6) element with 6th row elements of B mat_b_rearr[6] = _mm256_mul_ps(mat_b_rearr[6], mat_a_diag_inv[6]); //extract diag a77 from a mat_a_diag_inv[7] = _mm256_permute_ps(reciprocal_diags, 0xFF); mat_a_diag_inv[7] = _mm256_permute2f128_ps(mat_a_diag_inv[7], mat_a_diag_inv[7], 0x11); //(Row7): FMA operations of b7 with elements of index (7, 0) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[34], mat_b_rearr[6], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(7, 7) element with 7th row elements of B mat_b_rearr[7] = _mm256_mul_ps(mat_b_rearr[7], mat_a_diag_inv[7]); //--> Transpose and store results of columns of B block <--// ////unpacklow//// mat_a_cols[0] = _mm256_unpacklo_ps(mat_b_rearr[0], mat_b_rearr[1]); mat_a_cols[1] = _mm256_unpacklo_ps(mat_b_rearr[2], mat_b_rearr[3]); mat_a_cols[2] = _mm256_unpacklo_ps(mat_b_rearr[4], mat_b_rearr[5]); mat_a_cols[3] = _mm256_unpacklo_ps(mat_b_rearr[6], mat_b_rearr[7]); //Rearrange low elements #if REARRANGE_SHFL == 1 mat_a_cols[4] = _mm256_shuffle_ps(mat_a_cols[0], mat_a_cols[1], 0x44); mat_a_cols[5] = _mm256_shuffle_ps(mat_a_cols[0], mat_a_cols[1], 0xEE); mat_a_cols[6] = _mm256_shuffle_ps(mat_a_cols[2], mat_a_cols[3], 0x44); mat_a_cols[7] = _mm256_shuffle_ps(mat_a_cols[2], mat_a_cols[3], 0xEE); #else mat_a_cols[6] = _mm256_shuffle_ps(mat_a_cols[0], mat_a_cols[1], 0x4E); mat_a_cols[7] = _mm256_shuffle_ps(mat_a_cols[2], mat_a_cols[3], 0x4E); mat_a_cols[4] = _mm256_blend_ps(mat_a_cols[0], mat_a_cols[6], 0xCC); mat_a_cols[5] = _mm256_blend_ps(mat_a_cols[1], mat_a_cols[6], 0x33); mat_a_cols[6] = _mm256_blend_ps(mat_a_cols[2], mat_a_cols[7], 0xCC); mat_a_cols[7] = _mm256_blend_ps(mat_a_cols[3], mat_a_cols[7], 0x33); #endif //Merge rearranged low elements into complete rows mat_a_cols[0] = _mm256_permute2f128_ps(mat_a_cols[4], mat_a_cols[6], 0x20); mat_a_cols[4] = _mm256_permute2f128_ps(mat_a_cols[4], mat_a_cols[6], 0x31); mat_a_cols[1] = _mm256_permute2f128_ps(mat_a_cols[5], mat_a_cols[7], 0x20); mat_a_cols[5] = _mm256_permute2f128_ps(mat_a_cols[5], mat_a_cols[7], 0x31); ////unpackhigh//// mat_b_rearr[0] = _mm256_unpackhi_ps(mat_b_rearr[0], mat_b_rearr[1]); mat_b_rearr[1] = _mm256_unpackhi_ps(mat_b_rearr[2], mat_b_rearr[3]); mat_b_rearr[2] = _mm256_unpackhi_ps(mat_b_rearr[4], mat_b_rearr[5]); mat_b_rearr[3] = _mm256_unpackhi_ps(mat_b_rearr[6], mat_b_rearr[7]); //Rearrange high elements #if REARRANGE_SHFL == 1 mat_b_rearr[4] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x44); mat_b_rearr[5] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0xEE); mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x44); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0xEE); #else mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x4E); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x4E); mat_b_rearr[4] = _mm256_blend_ps(mat_b_rearr[0], mat_b_rearr[6], 0xCC); mat_b_rearr[5] = _mm256_blend_ps(mat_b_rearr[1], mat_b_rearr[6], 0x33); mat_b_rearr[6] = _mm256_blend_ps(mat_b_rearr[2], mat_b_rearr[7], 0xCC); mat_b_rearr[7] = _mm256_blend_ps(mat_b_rearr[3], mat_b_rearr[7], 0x33); #endif //Merge rearranged high elements into complete rows mat_a_cols[2] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x20); mat_a_cols[6] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x31); mat_a_cols[3] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x20); mat_a_cols[7] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x31); //Read next set of B columns ptr_b += (cs_b + cs_b_offset[5]); mat_b_col[0] = _mm256_loadu_ps((float const *)ptr_b); mat_b_col[1] = _mm256_loadu_ps((float const *)(ptr_b + (cs_b))); mat_b_col[2] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[0])); mat_b_col[3] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[1])); mat_b_col[4] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[2])); mat_b_col[5] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[3])); mat_b_col[6] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[4])); mat_b_col[7] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[5])); //Store the computed B columns _mm256_storeu_ps((float *)ptr_b_dup, mat_a_cols[0]); _mm256_storeu_ps((float *)(ptr_b_dup + (cs_b)), mat_a_cols[1]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[0]), mat_a_cols[2]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[1]), mat_a_cols[3]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[2]), mat_a_cols[4]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[3]), mat_a_cols[5]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[4]), mat_a_cols[6]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[5]), mat_a_cols[7]); //end loop of cols } //Last block trsm processing ptr_b_dup = ptr_b; /*Shuffle to rearrange/transpose 16x8 block of B into contiguous row-wise registers*/ ////unpacklow//// mat_b_rearr[0] = _mm256_unpacklo_ps(mat_b_col[0], mat_b_col[1]); mat_b_rearr[1] = _mm256_unpacklo_ps(mat_b_col[2], mat_b_col[3]); mat_b_rearr[2] = _mm256_unpacklo_ps(mat_b_col[4], mat_b_col[5]); mat_b_rearr[3] = _mm256_unpacklo_ps(mat_b_col[6], mat_b_col[7]); //Rearrange low elements #if REARRANGE_SHFL == 1 mat_b_rearr[4] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x44); mat_b_rearr[5] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0xEE); mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x44); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0xEE); #else mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x4E); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x4E); mat_b_rearr[4] = _mm256_blend_ps(mat_b_rearr[0], mat_b_rearr[6], 0xCC); mat_b_rearr[5] = _mm256_blend_ps(mat_b_rearr[1], mat_b_rearr[6], 0x33); mat_b_rearr[6] = _mm256_blend_ps(mat_b_rearr[2], mat_b_rearr[7], 0xCC); mat_b_rearr[7] = _mm256_blend_ps(mat_b_rearr[3], mat_b_rearr[7], 0x33); #endif //Merge rearranged low elements into complete rows mat_b_rearr[0] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x20); mat_b_rearr[4] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x31); mat_b_rearr[1] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x20); mat_b_rearr[5] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x31); mat_b_rearr[0] = _mm256_mul_ps(mat_b_rearr[0], alphaReg); mat_b_rearr[4] = _mm256_mul_ps(mat_b_rearr[4], alphaReg); mat_b_rearr[1] = _mm256_mul_ps(mat_b_rearr[1], alphaReg); mat_b_rearr[5] = _mm256_mul_ps(mat_b_rearr[5], alphaReg); ////unpackhigh//// mat_b_col[0] = _mm256_unpackhi_ps(mat_b_col[0], mat_b_col[1]); mat_b_col[1] = _mm256_unpackhi_ps(mat_b_col[2], mat_b_col[3]); mat_b_col[2] = _mm256_unpackhi_ps(mat_b_col[4], mat_b_col[5]); mat_b_col[3] = _mm256_unpackhi_ps(mat_b_col[6], mat_b_col[7]); //Rearrange high elements #if REARRANGE_SHFL == 1 mat_b_col[4] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x44); mat_b_col[5] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0xEE); mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x44); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0xEE); #else mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x4E); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x4E); mat_b_col[4] = _mm256_blend_ps(mat_b_col[0], mat_b_col[6], 0xCC); mat_b_col[5] = _mm256_blend_ps(mat_b_col[1], mat_b_col[6], 0x33); mat_b_col[6] = _mm256_blend_ps(mat_b_col[2], mat_b_col[7], 0xCC); mat_b_col[7] = _mm256_blend_ps(mat_b_col[3], mat_b_col[7], 0x33); #endif //extract diag a00 from a mat_a_diag_inv[0] = _mm256_permute_ps(reciprocal_diags, 0x00); mat_a_diag_inv[0] = _mm256_permute2f128_ps(mat_a_diag_inv[0], mat_a_diag_inv[0], 0x00); //(Row0): Perform mul operation of reciprocal of L(0,0) element with 1st row elements of B mat_b_rearr[0] = _mm256_mul_ps(mat_b_rearr[0], mat_a_diag_inv[0]); //Merge rearranged high elements into complete rows mat_b_rearr[2] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x20); mat_b_rearr[6] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x31); mat_b_rearr[3] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x20); mat_b_rearr[7] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x31); mat_b_rearr[2] = _mm256_mul_ps(mat_b_rearr[2], alphaReg); mat_b_rearr[6] = _mm256_mul_ps(mat_b_rearr[6], alphaReg); mat_b_rearr[3] = _mm256_mul_ps(mat_b_rearr[3], alphaReg); mat_b_rearr[7] = _mm256_mul_ps(mat_b_rearr[7], alphaReg); //extract diag a11 from a mat_a_diag_inv[1] = _mm256_permute_ps(reciprocal_diags, 0x55); mat_a_diag_inv[1] = _mm256_permute2f128_ps(mat_a_diag_inv[1], mat_a_diag_inv[1], 0x00); //(Row1): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_cols_rearr[1], mat_b_rearr[0], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_cols_rearr[3], mat_b_rearr[0], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_cols_rearr[6], mat_b_rearr[0], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_cols_rearr[10], mat_b_rearr[0], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_cols_rearr[15], mat_b_rearr[0], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_cols_rearr[21], mat_b_rearr[0], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[28], mat_b_rearr[0], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(1,1) element with 2nd row elements of B mat_b_rearr[1] = _mm256_mul_ps(mat_b_rearr[1], mat_a_diag_inv[1]); //extract diag a22 from a mat_a_diag_inv[2] = _mm256_permute_ps(reciprocal_diags, 0xAA); mat_a_diag_inv[2] = _mm256_permute2f128_ps(mat_a_diag_inv[2], mat_a_diag_inv[2], 0x00); //(Row2): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_cols_rearr[4], mat_b_rearr[1], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_cols_rearr[7], mat_b_rearr[1], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_cols_rearr[11], mat_b_rearr[1], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_cols_rearr[16], mat_b_rearr[1], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_cols_rearr[22], mat_b_rearr[1], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[29], mat_b_rearr[1], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(2, 2) element with 3rd row elements of B mat_b_rearr[2] = _mm256_mul_ps(mat_b_rearr[2], mat_a_diag_inv[2]); //extract diag a33 from a mat_a_diag_inv[3] = _mm256_permute_ps(reciprocal_diags, 0xFF); mat_a_diag_inv[3] = _mm256_permute2f128_ps(mat_a_diag_inv[3], mat_a_diag_inv[3], 0x00); //(Row3): FMA operations of b3 with elements of indices from (3, 0) uptill (7, 0) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_cols_rearr[8], mat_b_rearr[2], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_cols_rearr[12], mat_b_rearr[2], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_cols_rearr[17], mat_b_rearr[2], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_cols_rearr[23], mat_b_rearr[2], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[30], mat_b_rearr[2], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(3, 3) element with 4rth row elements of B mat_b_rearr[3] = _mm256_mul_ps(mat_b_rearr[3], mat_a_diag_inv[3]); //extract diag a44 from a mat_a_diag_inv[4] = _mm256_permute_ps(reciprocal_diags, 0x00); mat_a_diag_inv[4] = _mm256_permute2f128_ps(mat_a_diag_inv[4], mat_a_diag_inv[4], 0x11); //(Row4): FMA operations of b4 with elements of indices from (4, 0) uptill (7, 0) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_cols_rearr[13], mat_b_rearr[3], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_cols_rearr[18], mat_b_rearr[3], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_cols_rearr[24], mat_b_rearr[3], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[31], mat_b_rearr[3], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(4, 4) element with 4rth row elements of B mat_b_rearr[4] = _mm256_mul_ps(mat_b_rearr[4], mat_a_diag_inv[4]); //extract diag a55 from a mat_a_diag_inv[5] = _mm256_permute_ps(reciprocal_diags, 0x55); mat_a_diag_inv[5] = _mm256_permute2f128_ps(mat_a_diag_inv[5], mat_a_diag_inv[5], 0x11); //(Row5): FMA operations of b5 with elements of indices from (5, 0) uptill (7, 0) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_cols_rearr[19], mat_b_rearr[4], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_cols_rearr[25], mat_b_rearr[4], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[32], mat_b_rearr[4], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(5, 5) element with 5th row elements of B mat_b_rearr[5] = _mm256_mul_ps(mat_b_rearr[5], mat_a_diag_inv[5]); //extract diag a66 from a mat_a_diag_inv[6] = _mm256_permute_ps(reciprocal_diags, 0xAA); mat_a_diag_inv[6] = _mm256_permute2f128_ps(mat_a_diag_inv[6], mat_a_diag_inv[6], 0x11); //(Row6): FMA operations of b6 with elements of indices from (6, 0) uptill (7, 0) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_cols_rearr[26], mat_b_rearr[5], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[33], mat_b_rearr[5], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(6, 6) element with 6th row elements of B mat_b_rearr[6] = _mm256_mul_ps(mat_b_rearr[6], mat_a_diag_inv[6]); //extract diag a77 from a mat_a_diag_inv[7] = _mm256_permute_ps(reciprocal_diags, 0xFF); mat_a_diag_inv[7] = _mm256_permute2f128_ps(mat_a_diag_inv[7], mat_a_diag_inv[7], 0x11); //(Row7): FMA operations of b7 with elements of index (7, 0) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[34], mat_b_rearr[6], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(7, 7) element with 7th row elements of B mat_b_rearr[7] = _mm256_mul_ps(mat_b_rearr[7], mat_a_diag_inv[7]); //--> Transpose and store results of columns of B block <--// ////unpacklow//// mat_a_cols[0] = _mm256_unpacklo_ps(mat_b_rearr[0], mat_b_rearr[1]); mat_a_cols[1] = _mm256_unpacklo_ps(mat_b_rearr[2], mat_b_rearr[3]); mat_a_cols[2] = _mm256_unpacklo_ps(mat_b_rearr[4], mat_b_rearr[5]); mat_a_cols[3] = _mm256_unpacklo_ps(mat_b_rearr[6], mat_b_rearr[7]); //Rearrange low elements #if REARRANGE_SHFL == 1 mat_a_cols[4] = _mm256_shuffle_ps(mat_a_cols[0], mat_a_cols[1], 0x44); mat_a_cols[5] = _mm256_shuffle_ps(mat_a_cols[0], mat_a_cols[1], 0xEE); mat_a_cols[6] = _mm256_shuffle_ps(mat_a_cols[2], mat_a_cols[3], 0x44); mat_a_cols[7] = _mm256_shuffle_ps(mat_a_cols[2], mat_a_cols[3], 0xEE); #else mat_a_cols[6] = _mm256_shuffle_ps(mat_a_cols[0], mat_a_cols[1], 0x4E); mat_a_cols[7] = _mm256_shuffle_ps(mat_a_cols[2], mat_a_cols[3], 0x4E); mat_a_cols[4] = _mm256_blend_ps(mat_a_cols[0], mat_a_cols[6], 0xCC); mat_a_cols[5] = _mm256_blend_ps(mat_a_cols[1], mat_a_cols[6], 0x33); mat_a_cols[6] = _mm256_blend_ps(mat_a_cols[2], mat_a_cols[7], 0xCC); mat_a_cols[7] = _mm256_blend_ps(mat_a_cols[3], mat_a_cols[7], 0x33); #endif //Merge rearranged low elements into complete rows mat_a_cols[0] = _mm256_permute2f128_ps(mat_a_cols[4], mat_a_cols[6], 0x20); mat_a_cols[4] = _mm256_permute2f128_ps(mat_a_cols[4], mat_a_cols[6], 0x31); mat_a_cols[1] = _mm256_permute2f128_ps(mat_a_cols[5], mat_a_cols[7], 0x20); mat_a_cols[5] = _mm256_permute2f128_ps(mat_a_cols[5], mat_a_cols[7], 0x31); ////unpackhigh//// mat_b_rearr[0] = _mm256_unpackhi_ps(mat_b_rearr[0], mat_b_rearr[1]); mat_b_rearr[1] = _mm256_unpackhi_ps(mat_b_rearr[2], mat_b_rearr[3]); mat_b_rearr[2] = _mm256_unpackhi_ps(mat_b_rearr[4], mat_b_rearr[5]); mat_b_rearr[3] = _mm256_unpackhi_ps(mat_b_rearr[6], mat_b_rearr[7]); //Rearrange high elements #if REARRANGE_SHFL == 1 mat_b_rearr[4] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x44); mat_b_rearr[5] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0xEE); mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x44); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0xEE); #else mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x4E); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x4E); mat_b_rearr[4] = _mm256_blend_ps(mat_b_rearr[0], mat_b_rearr[6], 0xCC); mat_b_rearr[5] = _mm256_blend_ps(mat_b_rearr[1], mat_b_rearr[6], 0x33); mat_b_rearr[6] = _mm256_blend_ps(mat_b_rearr[2], mat_b_rearr[7], 0xCC); mat_b_rearr[7] = _mm256_blend_ps(mat_b_rearr[3], mat_b_rearr[7], 0x33); #endif //Merge rearranged high elements into complete rows mat_a_cols[2] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x20); mat_a_cols[6] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x31); mat_a_cols[3] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x20); mat_a_cols[7] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x31); //Store the computed B columns _mm256_storeu_ps((float *)ptr_b_dup, mat_a_cols[0]); _mm256_storeu_ps((float *)(ptr_b_dup + (cs_b)), mat_a_cols[1]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[0]), mat_a_cols[2]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[1]), mat_a_cols[3]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[2]), mat_a_cols[4]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[3]), mat_a_cols[5]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[4]), mat_a_cols[6]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[5]), mat_a_cols[7]); //end loop of cols } static void blis_strsm_microkernel_alpha_unitDiag(float *ptr_l, float *ptr_b, int numRows_lb, int numCols_b, int rs_l, int rs_b, int cs_l, int cs_b, float alphaVal) { //float ones = 1.0; int j; int cs_b_offset[6]; //int row2, row4, row6; float *ptr_b_dup; //70 number of ymm(256 bits) registers used __m256 mat_b_col[8]; __m256 mat_b_rearr[8]; __m256 mat_a_cols[8]; __m256 mat_a_cols_rearr[36]; //__m256 mat_a_diag_inv[8]; //__m256 reciprocal_diags; __m256 alphaReg; cs_b_offset[0] = (cs_b << 1); cs_b_offset[1] = cs_b + cs_b_offset[0]; cs_b_offset[2] = (cs_b << 2); cs_b_offset[3] = cs_b + cs_b_offset[2]; cs_b_offset[4] = cs_b_offset[0] + cs_b_offset[2]; cs_b_offset[5] = cs_b + cs_b_offset[4]; //reciprocal_diags = _mm256_loadu_ps((float const *)ones); //reciprocal_diags = _mm256_broadcast_ss((float const *)&ones); alphaReg = _mm256_broadcast_ss((float const *)&alphaVal); // ---> considering that the matrix size is multiple of 16 rows and 8 cols <--- // //read first set of 16x8 block of B into registers, where 16 is the blk_height and 8 is the blk_width for B mat_b_col[0] = _mm256_loadu_ps((float const *)ptr_b); //_mm_prefetch((char*)(ptr_l + 0), _MM_HINT_T0); //row2 = (cs_l << 1); //row4 = (cs_l << 2); mat_b_col[1] = _mm256_loadu_ps((float const *)(ptr_b + (cs_b))); //_mm_prefetch((char*)(ptr_l + cs_l), _MM_HINT_T0); mat_b_col[2] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[0])); //_mm_prefetch((char*)(ptr_l + row2), _MM_HINT_T0); mat_b_col[3] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[1])); //_mm_prefetch((char*)(ptr_l + row2 + cs_l), _MM_HINT_T0); //row6 = row2 + row4; mat_b_col[4] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[2])); //_mm_prefetch((char*)(ptr_l + row4), _MM_HINT_T0); mat_b_col[5] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[3])); //_mm_prefetch((char*)(ptr_l + row4 + cs_l), _MM_HINT_T0); mat_b_col[6] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[4])); //_mm_prefetch((char*)(ptr_l + row6), _MM_HINT_T0); mat_b_col[7] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[5])); //_mm_prefetch((char*)(ptr_l + row6 + cs_l), _MM_HINT_T0); //reciprocal_diags = _mm256_loadu_ps((float const *)ones); //read first set of 16x16 block of L, where 16 is the blk_height and 16 is the blk_width for L /*mat_a_cols[0] = _mm256_loadu_ps((float const *)ptr_l); ptr_l += cs_l; mat_a_cols[1] = _mm256_loadu_ps((float const *)ptr_l); ptr_l += cs_l; mat_a_cols[2] = _mm256_loadu_ps((float const *)ptr_l); ptr_l += cs_l; mat_a_cols[3] = _mm256_loadu_ps((float const *)ptr_l); ptr_l += cs_l; mat_a_cols[4] = _mm256_loadu_ps((float const *)ptr_l); ptr_l += cs_l; mat_a_cols[5] = _mm256_loadu_ps((float const *)ptr_l); ptr_l += cs_l; mat_a_cols[6] = _mm256_loadu_ps((float const *)ptr_l); ptr_l += cs_l; mat_a_cols[7] = _mm256_loadu_ps((float const *)ptr_l);*/ //Shuffle to rearrange/transpose 16x16 block of L into contiguous row-wise registers //tmpRegs[0] = _mm256_castps256_ps128(mat_a_cols[0]); //zero latency, no instruction added actually. //mat_a_cols_rearr[0] = _mm256_broadcastss_ps(tmpRegs[0]); //1st col mat_a_cols_rearr[0] = _mm256_broadcast_ss((float const *)(ptr_l+0)); mat_a_cols_rearr[1] = _mm256_broadcast_ss((float const *)(ptr_l+1)); mat_a_cols_rearr[3] = _mm256_broadcast_ss((float const *)(ptr_l+2)); mat_a_cols_rearr[6] = _mm256_broadcast_ss((float const *)(ptr_l+3)); mat_a_cols_rearr[10] = _mm256_broadcast_ss((float const *)(ptr_l+4)); mat_a_cols_rearr[15] = _mm256_broadcast_ss((float const *)(ptr_l+5)); mat_a_cols_rearr[21] = _mm256_broadcast_ss((float const *)(ptr_l+6)); mat_a_cols_rearr[28] = _mm256_broadcast_ss((float const *)(ptr_l+7)); //2nd col ptr_l += cs_l; mat_a_cols_rearr[2] = _mm256_broadcast_ss((float const *)(ptr_l + 1)); mat_a_cols_rearr[4] = _mm256_broadcast_ss((float const *)(ptr_l + 2)); mat_a_cols_rearr[7] = _mm256_broadcast_ss((float const *)(ptr_l + 3)); mat_a_cols_rearr[11] = _mm256_broadcast_ss((float const *)(ptr_l + 4)); mat_a_cols_rearr[16] = _mm256_broadcast_ss((float const *)(ptr_l + 5)); mat_a_cols_rearr[22] = _mm256_broadcast_ss((float const *)(ptr_l + 6)); mat_a_cols_rearr[29] = _mm256_broadcast_ss((float const *)(ptr_l + 7)); //3rd col ptr_l += cs_l; mat_a_cols_rearr[5] = _mm256_broadcast_ss((float const *)(ptr_l + 2)); mat_a_cols_rearr[8] = _mm256_broadcast_ss((float const *)(ptr_l + 3)); mat_a_cols_rearr[12] = _mm256_broadcast_ss((float const *)(ptr_l + 4)); mat_a_cols_rearr[17] = _mm256_broadcast_ss((float const *)(ptr_l + 5)); mat_a_cols_rearr[23] = _mm256_broadcast_ss((float const *)(ptr_l + 6)); mat_a_cols_rearr[30] = _mm256_broadcast_ss((float const *)(ptr_l + 7)); //4rth col ptr_l += cs_l; mat_a_cols_rearr[9] = _mm256_broadcast_ss((float const *)(ptr_l + 3)); mat_a_cols_rearr[13] = _mm256_broadcast_ss((float const *)(ptr_l + 4)); mat_a_cols_rearr[18] = _mm256_broadcast_ss((float const *)(ptr_l + 5)); mat_a_cols_rearr[24] = _mm256_broadcast_ss((float const *)(ptr_l + 6)); mat_a_cols_rearr[31] = _mm256_broadcast_ss((float const *)(ptr_l + 7)); //5th col ptr_l += cs_l; mat_a_cols_rearr[14] = _mm256_broadcast_ss((float const *)(ptr_l + 4)); mat_a_cols_rearr[19] = _mm256_broadcast_ss((float const *)(ptr_l + 5)); mat_a_cols_rearr[25] = _mm256_broadcast_ss((float const *)(ptr_l + 6)); mat_a_cols_rearr[32] = _mm256_broadcast_ss((float const *)(ptr_l + 7)); //6th col ptr_l += cs_l; mat_a_cols_rearr[20] = _mm256_broadcast_ss((float const *)(ptr_l + 5)); mat_a_cols_rearr[26] = _mm256_broadcast_ss((float const *)(ptr_l + 6)); mat_a_cols_rearr[33] = _mm256_broadcast_ss((float const *)(ptr_l + 7)); //7th col ptr_l += cs_l; mat_a_cols_rearr[27] = _mm256_broadcast_ss((float const *)(ptr_l + 6)); mat_a_cols_rearr[34] = _mm256_broadcast_ss((float const *)(ptr_l + 7)); //8th col //ptr_l += cs_l; //mat_a_cols_rearr[35] = _mm256_broadcast_ss((float const *)(ptr_l + 7)); numCols_b -= 8; // blk_width = 8 //compute reciprocals of L(i,i) and broadcast in registers //mat_a_diag_inv[0] = _mm256_unpacklo_ps(mat_a_cols_rearr[0], mat_a_cols_rearr[2]); //mat_a_diag_inv[1] = _mm256_unpacklo_ps(mat_a_cols_rearr[5], mat_a_cols_rearr[9]); //mat_a_diag_inv[2] = _mm256_unpacklo_ps(mat_a_cols_rearr[14], mat_a_cols_rearr[20]); //mat_a_diag_inv[3] = _mm256_unpacklo_ps(mat_a_cols_rearr[27], mat_a_cols_rearr[35]); //mat_a_diag_inv[1] = _mm256_permute_ps(mat_a_diag_inv[1], 0x55); //mat_a_diag_inv[3] = _mm256_permute_ps(mat_a_diag_inv[3], 0x55); //mat_a_diag_inv[0] = _mm256_blend_ps(mat_a_diag_inv[0], mat_a_diag_inv[1], 0xCC); //mat_a_diag_inv[1] = _mm256_blend_ps(mat_a_diag_inv[2], mat_a_diag_inv[3], 0xCC); //mat_a_diag_inv[0] = _mm256_permute2f128_ps(mat_a_diag_inv[0], mat_a_diag_inv[1], 0x20); //reciprocal of diagnol elements //reciprocal_diags = _mm256_div_ps(reciprocal_diags, mat_a_diag_inv[0]); //Start loop for cols of B to be processed in size of blk_width for (j = 0; j < numCols_b; j += 8) { ptr_b_dup = ptr_b; /*Shuffle to rearrange/transpose 16x8 block of B into contiguous row-wise registers*/ ////unpacklow//// mat_b_rearr[0] = _mm256_unpacklo_ps(mat_b_col[0], mat_b_col[1]); mat_b_rearr[1] = _mm256_unpacklo_ps(mat_b_col[2], mat_b_col[3]); mat_b_rearr[2] = _mm256_unpacklo_ps(mat_b_col[4], mat_b_col[5]); mat_b_rearr[3] = _mm256_unpacklo_ps(mat_b_col[6], mat_b_col[7]); //Rearrange low elements #if REARRANGE_SHFL == 1 mat_b_rearr[4] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x44); mat_b_rearr[5] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0xEE); mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x44); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0xEE); #else mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x4E); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x4E); mat_b_rearr[4] = _mm256_blend_ps(mat_b_rearr[0], mat_b_rearr[6], 0xCC); mat_b_rearr[5] = _mm256_blend_ps(mat_b_rearr[1], mat_b_rearr[6], 0x33); mat_b_rearr[6] = _mm256_blend_ps(mat_b_rearr[2], mat_b_rearr[7], 0xCC); mat_b_rearr[7] = _mm256_blend_ps(mat_b_rearr[3], mat_b_rearr[7], 0x33); #endif //Merge rearranged low elements into complete rows mat_b_rearr[0] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x20); mat_b_rearr[4] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x31); mat_b_rearr[1] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x20); mat_b_rearr[5] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x31); mat_b_rearr[0] = _mm256_mul_ps(mat_b_rearr[0], alphaReg); mat_b_rearr[4] = _mm256_mul_ps(mat_b_rearr[4], alphaReg); mat_b_rearr[1] = _mm256_mul_ps(mat_b_rearr[1], alphaReg); mat_b_rearr[5] = _mm256_mul_ps(mat_b_rearr[5], alphaReg); ////unpackhigh//// mat_b_col[0] = _mm256_unpackhi_ps(mat_b_col[0], mat_b_col[1]); mat_b_col[1] = _mm256_unpackhi_ps(mat_b_col[2], mat_b_col[3]); mat_b_col[2] = _mm256_unpackhi_ps(mat_b_col[4], mat_b_col[5]); mat_b_col[3] = _mm256_unpackhi_ps(mat_b_col[6], mat_b_col[7]); //Rearrange high elements #if REARRANGE_SHFL == 1 mat_b_col[4] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x44); mat_b_col[5] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0xEE); mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x44); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0xEE); #else mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x4E); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x4E); mat_b_col[4] = _mm256_blend_ps(mat_b_col[0], mat_b_col[6], 0xCC); mat_b_col[5] = _mm256_blend_ps(mat_b_col[1], mat_b_col[6], 0x33); mat_b_col[6] = _mm256_blend_ps(mat_b_col[2], mat_b_col[7], 0xCC); mat_b_col[7] = _mm256_blend_ps(mat_b_col[3], mat_b_col[7], 0x33); #endif //extract diag a00 from a //mat_a_diag_inv[0] = _mm256_permute_ps(reciprocal_diags, 0x00); //mat_a_diag_inv[0] = _mm256_permute2f128_ps(mat_a_diag_inv[0], mat_a_diag_inv[0], 0x00); //(Row0): Perform mul operation of reciprocal of L(0,0) element with 1st row elements of B //mat_b_rearr[0] = _mm256_mul_ps(mat_b_rearr[0], mat_a_diag_inv[0]); //Merge rearranged high elements into complete rows mat_b_rearr[2] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x20); mat_b_rearr[6] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x31); mat_b_rearr[3] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x20); mat_b_rearr[7] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x31); mat_b_rearr[2] = _mm256_mul_ps(mat_b_rearr[2], alphaReg); mat_b_rearr[6] = _mm256_mul_ps(mat_b_rearr[6], alphaReg); mat_b_rearr[3] = _mm256_mul_ps(mat_b_rearr[3], alphaReg); mat_b_rearr[7] = _mm256_mul_ps(mat_b_rearr[7], alphaReg); //extract diag a11 from a //mat_a_diag_inv[1] = _mm256_permute_ps(reciprocal_diags, 0x55); //mat_a_diag_inv[1] = _mm256_permute2f128_ps(mat_a_diag_inv[1], mat_a_diag_inv[1], 0x00); //(Row1): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_cols_rearr[1], mat_b_rearr[0], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_cols_rearr[3], mat_b_rearr[0], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_cols_rearr[6], mat_b_rearr[0], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_cols_rearr[10], mat_b_rearr[0], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_cols_rearr[15], mat_b_rearr[0], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_cols_rearr[21], mat_b_rearr[0], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[28], mat_b_rearr[0], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(1,1) element with 2nd row elements of B //mat_b_rearr[1] = _mm256_mul_ps(mat_b_rearr[1], mat_a_diag_inv[1]); //extract diag a22 from a //mat_a_diag_inv[2] = _mm256_permute_ps(reciprocal_diags, 0xAA); //mat_a_diag_inv[2] = _mm256_permute2f128_ps(mat_a_diag_inv[2], mat_a_diag_inv[2], 0x00); //(Row2): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_cols_rearr[4], mat_b_rearr[1], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_cols_rearr[7], mat_b_rearr[1], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_cols_rearr[11], mat_b_rearr[1], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_cols_rearr[16], mat_b_rearr[1], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_cols_rearr[22], mat_b_rearr[1], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[29], mat_b_rearr[1], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(2, 2) element with 3rd row elements of B //mat_b_rearr[2] = _mm256_mul_ps(mat_b_rearr[2], mat_a_diag_inv[2]); //extract diag a33 from a //mat_a_diag_inv[3] = _mm256_permute_ps(reciprocal_diags, 0xFF); //mat_a_diag_inv[3] = _mm256_permute2f128_ps(mat_a_diag_inv[3], mat_a_diag_inv[3], 0x00); //(Row3): FMA operations of b3 with elements of indices from (3, 0) uptill (7, 0) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_cols_rearr[8], mat_b_rearr[2], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_cols_rearr[12], mat_b_rearr[2], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_cols_rearr[17], mat_b_rearr[2], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_cols_rearr[23], mat_b_rearr[2], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[30], mat_b_rearr[2], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(3, 3) element with 4rth row elements of B //mat_b_rearr[3] = _mm256_mul_ps(mat_b_rearr[3], mat_a_diag_inv[3]); //extract diag a44 from a //mat_a_diag_inv[4] = _mm256_permute_ps(reciprocal_diags, 0x00); //mat_a_diag_inv[4] = _mm256_permute2f128_ps(mat_a_diag_inv[4], mat_a_diag_inv[4], 0x11); //(Row4): FMA operations of b4 with elements of indices from (4, 0) uptill (7, 0) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_cols_rearr[13], mat_b_rearr[3], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_cols_rearr[18], mat_b_rearr[3], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_cols_rearr[24], mat_b_rearr[3], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[31], mat_b_rearr[3], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(4, 4) element with 4rth row elements of B //mat_b_rearr[4] = _mm256_mul_ps(mat_b_rearr[4], mat_a_diag_inv[4]); //extract diag a55 from a //mat_a_diag_inv[5] = _mm256_permute_ps(reciprocal_diags, 0x55); //mat_a_diag_inv[5] = _mm256_permute2f128_ps(mat_a_diag_inv[5], mat_a_diag_inv[5], 0x11); //(Row5): FMA operations of b5 with elements of indices from (5, 0) uptill (7, 0) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_cols_rearr[19], mat_b_rearr[4], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_cols_rearr[25], mat_b_rearr[4], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[32], mat_b_rearr[4], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(5, 5) element with 5th row elements of B //mat_b_rearr[5] = _mm256_mul_ps(mat_b_rearr[5], mat_a_diag_inv[5]); //extract diag a66 from a //mat_a_diag_inv[6] = _mm256_permute_ps(reciprocal_diags, 0xAA); //mat_a_diag_inv[6] = _mm256_permute2f128_ps(mat_a_diag_inv[6], mat_a_diag_inv[6], 0x11); //(Row6): FMA operations of b6 with elements of indices from (6, 0) uptill (7, 0) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_cols_rearr[26], mat_b_rearr[5], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[33], mat_b_rearr[5], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(6, 6) element with 6th row elements of B //mat_b_rearr[6] = _mm256_mul_ps(mat_b_rearr[6], mat_a_diag_inv[6]); //extract diag a77 from a //mat_a_diag_inv[7] = _mm256_permute_ps(reciprocal_diags, 0xFF); //mat_a_diag_inv[7] = _mm256_permute2f128_ps(mat_a_diag_inv[7], mat_a_diag_inv[7], 0x11); //(Row7): FMA operations of b7 with elements of index (7, 0) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[34], mat_b_rearr[6], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(7, 7) element with 7th row elements of B //mat_b_rearr[7] = _mm256_mul_ps(mat_b_rearr[7], mat_a_diag_inv[7]); //--> Transpose and store results of columns of B block <--// ////unpacklow//// mat_a_cols[0] = _mm256_unpacklo_ps(mat_b_rearr[0], mat_b_rearr[1]); mat_a_cols[1] = _mm256_unpacklo_ps(mat_b_rearr[2], mat_b_rearr[3]); mat_a_cols[2] = _mm256_unpacklo_ps(mat_b_rearr[4], mat_b_rearr[5]); mat_a_cols[3] = _mm256_unpacklo_ps(mat_b_rearr[6], mat_b_rearr[7]); //Rearrange low elements #if REARRANGE_SHFL == 1 mat_a_cols[4] = _mm256_shuffle_ps(mat_a_cols[0], mat_a_cols[1], 0x44); mat_a_cols[5] = _mm256_shuffle_ps(mat_a_cols[0], mat_a_cols[1], 0xEE); mat_a_cols[6] = _mm256_shuffle_ps(mat_a_cols[2], mat_a_cols[3], 0x44); mat_a_cols[7] = _mm256_shuffle_ps(mat_a_cols[2], mat_a_cols[3], 0xEE); #else mat_a_cols[6] = _mm256_shuffle_ps(mat_a_cols[0], mat_a_cols[1], 0x4E); mat_a_cols[7] = _mm256_shuffle_ps(mat_a_cols[2], mat_a_cols[3], 0x4E); mat_a_cols[4] = _mm256_blend_ps(mat_a_cols[0], mat_a_cols[6], 0xCC); mat_a_cols[5] = _mm256_blend_ps(mat_a_cols[1], mat_a_cols[6], 0x33); mat_a_cols[6] = _mm256_blend_ps(mat_a_cols[2], mat_a_cols[7], 0xCC); mat_a_cols[7] = _mm256_blend_ps(mat_a_cols[3], mat_a_cols[7], 0x33); #endif //Merge rearranged low elements into complete rows mat_a_cols[0] = _mm256_permute2f128_ps(mat_a_cols[4], mat_a_cols[6], 0x20); mat_a_cols[4] = _mm256_permute2f128_ps(mat_a_cols[4], mat_a_cols[6], 0x31); mat_a_cols[1] = _mm256_permute2f128_ps(mat_a_cols[5], mat_a_cols[7], 0x20); mat_a_cols[5] = _mm256_permute2f128_ps(mat_a_cols[5], mat_a_cols[7], 0x31); ////unpackhigh//// mat_b_rearr[0] = _mm256_unpackhi_ps(mat_b_rearr[0], mat_b_rearr[1]); mat_b_rearr[1] = _mm256_unpackhi_ps(mat_b_rearr[2], mat_b_rearr[3]); mat_b_rearr[2] = _mm256_unpackhi_ps(mat_b_rearr[4], mat_b_rearr[5]); mat_b_rearr[3] = _mm256_unpackhi_ps(mat_b_rearr[6], mat_b_rearr[7]); //Rearrange high elements #if REARRANGE_SHFL == 1 mat_b_rearr[4] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x44); mat_b_rearr[5] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0xEE); mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x44); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0xEE); #else mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x4E); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x4E); mat_b_rearr[4] = _mm256_blend_ps(mat_b_rearr[0], mat_b_rearr[6], 0xCC); mat_b_rearr[5] = _mm256_blend_ps(mat_b_rearr[1], mat_b_rearr[6], 0x33); mat_b_rearr[6] = _mm256_blend_ps(mat_b_rearr[2], mat_b_rearr[7], 0xCC); mat_b_rearr[7] = _mm256_blend_ps(mat_b_rearr[3], mat_b_rearr[7], 0x33); #endif //Merge rearranged high elements into complete rows mat_a_cols[2] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x20); mat_a_cols[6] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x31); mat_a_cols[3] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x20); mat_a_cols[7] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x31); //Read next set of B columns ptr_b += (cs_b + cs_b_offset[5]); mat_b_col[0] = _mm256_loadu_ps((float const *)ptr_b); mat_b_col[1] = _mm256_loadu_ps((float const *)(ptr_b + (cs_b))); mat_b_col[2] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[0])); mat_b_col[3] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[1])); mat_b_col[4] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[2])); mat_b_col[5] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[3])); mat_b_col[6] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[4])); mat_b_col[7] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[5])); //Store the computed B columns _mm256_storeu_ps((float *)ptr_b_dup, mat_a_cols[0]); _mm256_storeu_ps((float *)(ptr_b_dup + (cs_b)), mat_a_cols[1]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[0]), mat_a_cols[2]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[1]), mat_a_cols[3]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[2]), mat_a_cols[4]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[3]), mat_a_cols[5]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[4]), mat_a_cols[6]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[5]), mat_a_cols[7]); //end loop of cols } //Last block trsm processing ptr_b_dup = ptr_b; /*Shuffle to rearrange/transpose 16x8 block of B into contiguous row-wise registers*/ ////unpacklow//// mat_b_rearr[0] = _mm256_unpacklo_ps(mat_b_col[0], mat_b_col[1]); mat_b_rearr[1] = _mm256_unpacklo_ps(mat_b_col[2], mat_b_col[3]); mat_b_rearr[2] = _mm256_unpacklo_ps(mat_b_col[4], mat_b_col[5]); mat_b_rearr[3] = _mm256_unpacklo_ps(mat_b_col[6], mat_b_col[7]); //Rearrange low elements #if REARRANGE_SHFL == 1 mat_b_rearr[4] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x44); mat_b_rearr[5] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0xEE); mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x44); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0xEE); #else mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x4E); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x4E); mat_b_rearr[4] = _mm256_blend_ps(mat_b_rearr[0], mat_b_rearr[6], 0xCC); mat_b_rearr[5] = _mm256_blend_ps(mat_b_rearr[1], mat_b_rearr[6], 0x33); mat_b_rearr[6] = _mm256_blend_ps(mat_b_rearr[2], mat_b_rearr[7], 0xCC); mat_b_rearr[7] = _mm256_blend_ps(mat_b_rearr[3], mat_b_rearr[7], 0x33); #endif //Merge rearranged low elements into complete rows mat_b_rearr[0] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x20); mat_b_rearr[4] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x31); mat_b_rearr[1] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x20); mat_b_rearr[5] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x31); mat_b_rearr[0] = _mm256_mul_ps(mat_b_rearr[0], alphaReg); mat_b_rearr[4] = _mm256_mul_ps(mat_b_rearr[4], alphaReg); mat_b_rearr[1] = _mm256_mul_ps(mat_b_rearr[1], alphaReg); mat_b_rearr[5] = _mm256_mul_ps(mat_b_rearr[5], alphaReg); ////unpackhigh//// mat_b_col[0] = _mm256_unpackhi_ps(mat_b_col[0], mat_b_col[1]); mat_b_col[1] = _mm256_unpackhi_ps(mat_b_col[2], mat_b_col[3]); mat_b_col[2] = _mm256_unpackhi_ps(mat_b_col[4], mat_b_col[5]); mat_b_col[3] = _mm256_unpackhi_ps(mat_b_col[6], mat_b_col[7]); //Rearrange high elements #if REARRANGE_SHFL == 1 mat_b_col[4] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x44); mat_b_col[5] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0xEE); mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x44); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0xEE); #else mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x4E); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x4E); mat_b_col[4] = _mm256_blend_ps(mat_b_col[0], mat_b_col[6], 0xCC); mat_b_col[5] = _mm256_blend_ps(mat_b_col[1], mat_b_col[6], 0x33); mat_b_col[6] = _mm256_blend_ps(mat_b_col[2], mat_b_col[7], 0xCC); mat_b_col[7] = _mm256_blend_ps(mat_b_col[3], mat_b_col[7], 0x33); #endif //extract diag a00 from a //mat_a_diag_inv[0] = _mm256_permute_ps(reciprocal_diags, 0x00); //mat_a_diag_inv[0] = _mm256_permute2f128_ps(mat_a_diag_inv[0], mat_a_diag_inv[0], 0x00); //(Row0): Perform mul operation of reciprocal of L(0,0) element with 1st row elements of B //mat_b_rearr[0] = _mm256_mul_ps(mat_b_rearr[0], mat_a_diag_inv[0]); //Merge rearranged high elements into complete rows mat_b_rearr[2] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x20); mat_b_rearr[6] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x31); mat_b_rearr[3] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x20); mat_b_rearr[7] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x31); mat_b_rearr[2] = _mm256_mul_ps(mat_b_rearr[2], alphaReg); mat_b_rearr[6] = _mm256_mul_ps(mat_b_rearr[6], alphaReg); mat_b_rearr[3] = _mm256_mul_ps(mat_b_rearr[3], alphaReg); mat_b_rearr[7] = _mm256_mul_ps(mat_b_rearr[7], alphaReg); //extract diag a11 from a //mat_a_diag_inv[1] = _mm256_permute_ps(reciprocal_diags, 0x55); //mat_a_diag_inv[1] = _mm256_permute2f128_ps(mat_a_diag_inv[1], mat_a_diag_inv[1], 0x00); //(Row1): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_cols_rearr[1], mat_b_rearr[0], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_cols_rearr[3], mat_b_rearr[0], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_cols_rearr[6], mat_b_rearr[0], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_cols_rearr[10], mat_b_rearr[0], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_cols_rearr[15], mat_b_rearr[0], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_cols_rearr[21], mat_b_rearr[0], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[28], mat_b_rearr[0], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(1,1) element with 2nd row elements of B //mat_b_rearr[1] = _mm256_mul_ps(mat_b_rearr[1], mat_a_diag_inv[1]); //extract diag a22 from a //mat_a_diag_inv[2] = _mm256_permute_ps(reciprocal_diags, 0xAA); //mat_a_diag_inv[2] = _mm256_permute2f128_ps(mat_a_diag_inv[2], mat_a_diag_inv[2], 0x00); //(Row2): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_cols_rearr[4], mat_b_rearr[1], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_cols_rearr[7], mat_b_rearr[1], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_cols_rearr[11], mat_b_rearr[1], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_cols_rearr[16], mat_b_rearr[1], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_cols_rearr[22], mat_b_rearr[1], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[29], mat_b_rearr[1], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(2, 2) element with 3rd row elements of B //mat_b_rearr[2] = _mm256_mul_ps(mat_b_rearr[2], mat_a_diag_inv[2]); //extract diag a33 from a //mat_a_diag_inv[3] = _mm256_permute_ps(reciprocal_diags, 0xFF); //mat_a_diag_inv[3] = _mm256_permute2f128_ps(mat_a_diag_inv[3], mat_a_diag_inv[3], 0x00); //(Row3): FMA operations of b3 with elements of indices from (3, 0) uptill (7, 0) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_cols_rearr[8], mat_b_rearr[2], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_cols_rearr[12], mat_b_rearr[2], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_cols_rearr[17], mat_b_rearr[2], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_cols_rearr[23], mat_b_rearr[2], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[30], mat_b_rearr[2], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(3, 3) element with 4rth row elements of B //mat_b_rearr[3] = _mm256_mul_ps(mat_b_rearr[3], mat_a_diag_inv[3]); //extract diag a44 from a //mat_a_diag_inv[4] = _mm256_permute_ps(reciprocal_diags, 0x00); //mat_a_diag_inv[4] = _mm256_permute2f128_ps(mat_a_diag_inv[4], mat_a_diag_inv[4], 0x11); //(Row4): FMA operations of b4 with elements of indices from (4, 0) uptill (7, 0) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_cols_rearr[13], mat_b_rearr[3], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_cols_rearr[18], mat_b_rearr[3], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_cols_rearr[24], mat_b_rearr[3], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[31], mat_b_rearr[3], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(4, 4) element with 4rth row elements of B //mat_b_rearr[4] = _mm256_mul_ps(mat_b_rearr[4], mat_a_diag_inv[4]); //extract diag a55 from a //mat_a_diag_inv[5] = _mm256_permute_ps(reciprocal_diags, 0x55); //mat_a_diag_inv[5] = _mm256_permute2f128_ps(mat_a_diag_inv[5], mat_a_diag_inv[5], 0x11); //(Row5): FMA operations of b5 with elements of indices from (5, 0) uptill (7, 0) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_cols_rearr[19], mat_b_rearr[4], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_cols_rearr[25], mat_b_rearr[4], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[32], mat_b_rearr[4], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(5, 5) element with 5th row elements of B //mat_b_rearr[5] = _mm256_mul_ps(mat_b_rearr[5], mat_a_diag_inv[5]); //extract diag a66 from a //mat_a_diag_inv[6] = _mm256_permute_ps(reciprocal_diags, 0xAA); //mat_a_diag_inv[6] = _mm256_permute2f128_ps(mat_a_diag_inv[6], mat_a_diag_inv[6], 0x11); //(Row6): FMA operations of b6 with elements of indices from (6, 0) uptill (7, 0) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_cols_rearr[26], mat_b_rearr[5], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[33], mat_b_rearr[5], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(6, 6) element with 6th row elements of B //mat_b_rearr[6] = _mm256_mul_ps(mat_b_rearr[6], mat_a_diag_inv[6]); //extract diag a77 from a //mat_a_diag_inv[7] = _mm256_permute_ps(reciprocal_diags, 0xFF); //mat_a_diag_inv[7] = _mm256_permute2f128_ps(mat_a_diag_inv[7], mat_a_diag_inv[7], 0x11); //(Row7): FMA operations of b7 with elements of index (7, 0) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[34], mat_b_rearr[6], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(7, 7) element with 7th row elements of B //mat_b_rearr[7] = _mm256_mul_ps(mat_b_rearr[7], mat_a_diag_inv[7]); //--> Transpose and store results of columns of B block <--// ////unpacklow//// mat_a_cols[0] = _mm256_unpacklo_ps(mat_b_rearr[0], mat_b_rearr[1]); mat_a_cols[1] = _mm256_unpacklo_ps(mat_b_rearr[2], mat_b_rearr[3]); mat_a_cols[2] = _mm256_unpacklo_ps(mat_b_rearr[4], mat_b_rearr[5]); mat_a_cols[3] = _mm256_unpacklo_ps(mat_b_rearr[6], mat_b_rearr[7]); //Rearrange low elements #if REARRANGE_SHFL == 1 mat_a_cols[4] = _mm256_shuffle_ps(mat_a_cols[0], mat_a_cols[1], 0x44); mat_a_cols[5] = _mm256_shuffle_ps(mat_a_cols[0], mat_a_cols[1], 0xEE); mat_a_cols[6] = _mm256_shuffle_ps(mat_a_cols[2], mat_a_cols[3], 0x44); mat_a_cols[7] = _mm256_shuffle_ps(mat_a_cols[2], mat_a_cols[3], 0xEE); #else mat_a_cols[6] = _mm256_shuffle_ps(mat_a_cols[0], mat_a_cols[1], 0x4E); mat_a_cols[7] = _mm256_shuffle_ps(mat_a_cols[2], mat_a_cols[3], 0x4E); mat_a_cols[4] = _mm256_blend_ps(mat_a_cols[0], mat_a_cols[6], 0xCC); mat_a_cols[5] = _mm256_blend_ps(mat_a_cols[1], mat_a_cols[6], 0x33); mat_a_cols[6] = _mm256_blend_ps(mat_a_cols[2], mat_a_cols[7], 0xCC); mat_a_cols[7] = _mm256_blend_ps(mat_a_cols[3], mat_a_cols[7], 0x33); #endif //Merge rearranged low elements into complete rows mat_a_cols[0] = _mm256_permute2f128_ps(mat_a_cols[4], mat_a_cols[6], 0x20); mat_a_cols[4] = _mm256_permute2f128_ps(mat_a_cols[4], mat_a_cols[6], 0x31); mat_a_cols[1] = _mm256_permute2f128_ps(mat_a_cols[5], mat_a_cols[7], 0x20); mat_a_cols[5] = _mm256_permute2f128_ps(mat_a_cols[5], mat_a_cols[7], 0x31); ////unpackhigh//// mat_b_rearr[0] = _mm256_unpackhi_ps(mat_b_rearr[0], mat_b_rearr[1]); mat_b_rearr[1] = _mm256_unpackhi_ps(mat_b_rearr[2], mat_b_rearr[3]); mat_b_rearr[2] = _mm256_unpackhi_ps(mat_b_rearr[4], mat_b_rearr[5]); mat_b_rearr[3] = _mm256_unpackhi_ps(mat_b_rearr[6], mat_b_rearr[7]); //Rearrange high elements #if REARRANGE_SHFL == 1 mat_b_rearr[4] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x44); mat_b_rearr[5] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0xEE); mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x44); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0xEE); #else mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x4E); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x4E); mat_b_rearr[4] = _mm256_blend_ps(mat_b_rearr[0], mat_b_rearr[6], 0xCC); mat_b_rearr[5] = _mm256_blend_ps(mat_b_rearr[1], mat_b_rearr[6], 0x33); mat_b_rearr[6] = _mm256_blend_ps(mat_b_rearr[2], mat_b_rearr[7], 0xCC); mat_b_rearr[7] = _mm256_blend_ps(mat_b_rearr[3], mat_b_rearr[7], 0x33); #endif //Merge rearranged high elements into complete rows mat_a_cols[2] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x20); mat_a_cols[6] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x31); mat_a_cols[3] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x20); mat_a_cols[7] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x31); //Store the computed B columns _mm256_storeu_ps((float *)ptr_b_dup, mat_a_cols[0]); _mm256_storeu_ps((float *)(ptr_b_dup + (cs_b)), mat_a_cols[1]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[0]), mat_a_cols[2]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[1]), mat_a_cols[3]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[2]), mat_a_cols[4]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[3]), mat_a_cols[5]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[4]), mat_a_cols[6]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[5]), mat_a_cols[7]); //end loop of cols } static void blis_strsm_microkernel_unitDiag(float *ptr_l, float *ptr_b, int numRows_lb, int numCols_b, int rs_l, int rs_b, int cs_l, int cs_b) { //float ones = 1.0; int j; int cs_b_offset[6]; //int row2, row4, row6; float *ptr_b_dup; //70 number of ymm(256 bits) registers used __m256 mat_b_col[8]; __m256 mat_b_rearr[8]; __m256 mat_a_cols[8]; __m256 mat_a_cols_rearr[36]; //__m256 mat_a_diag_inv[8]; //__m256 reciprocal_diags; cs_b_offset[0] = (cs_b << 1); cs_b_offset[1] = cs_b + cs_b_offset[0]; cs_b_offset[2] = (cs_b << 2); cs_b_offset[3] = cs_b + cs_b_offset[2]; cs_b_offset[4] = cs_b_offset[0] + cs_b_offset[2]; cs_b_offset[5] = cs_b + cs_b_offset[4]; //reciprocal_diags = _mm256_loadu_ps((float const *)ones); //reciprocal_diags = _mm256_broadcast_ss((float const *)&ones); // ---> considering that the matrix size is multiple of 16 rows and 8 cols <--- // //read first set of 16x8 block of B into registers, where 16 is the blk_height and 8 is the blk_width for B mat_b_col[0] = _mm256_loadu_ps((float const *)ptr_b); //_mm_prefetch((char*)(ptr_l + 0), _MM_HINT_T0); //row2 = (cs_l << 1); //row4 = (cs_l << 2); mat_b_col[1] = _mm256_loadu_ps((float const *)(ptr_b + (cs_b))); //_mm_prefetch((char*)(ptr_l + cs_l), _MM_HINT_T0); mat_b_col[2] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[0])); //_mm_prefetch((char*)(ptr_l + row2), _MM_HINT_T0); mat_b_col[3] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[1])); //_mm_prefetch((char*)(ptr_l + row2 + cs_l), _MM_HINT_T0); //row6 = row2 + row4; mat_b_col[4] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[2])); //_mm_prefetch((char*)(ptr_l + row4), _MM_HINT_T0); mat_b_col[5] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[3])); //_mm_prefetch((char*)(ptr_l + row4 + cs_l), _MM_HINT_T0); mat_b_col[6] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[4])); //_mm_prefetch((char*)(ptr_l + row6), _MM_HINT_T0); mat_b_col[7] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[5])); //_mm_prefetch((char*)(ptr_l + row6 + cs_l), _MM_HINT_T0); //reciprocal_diags = _mm256_loadu_ps((float const *)ones); //read first set of 16x16 block of L, where 16 is the blk_height and 16 is the blk_width for L /*mat_a_cols[0] = _mm256_loadu_ps((float const *)ptr_l); ptr_l += cs_l; mat_a_cols[1] = _mm256_loadu_ps((float const *)ptr_l); ptr_l += cs_l; mat_a_cols[2] = _mm256_loadu_ps((float const *)ptr_l); ptr_l += cs_l; mat_a_cols[3] = _mm256_loadu_ps((float const *)ptr_l); ptr_l += cs_l; mat_a_cols[4] = _mm256_loadu_ps((float const *)ptr_l); ptr_l += cs_l; mat_a_cols[5] = _mm256_loadu_ps((float const *)ptr_l); ptr_l += cs_l; mat_a_cols[6] = _mm256_loadu_ps((float const *)ptr_l); ptr_l += cs_l; mat_a_cols[7] = _mm256_loadu_ps((float const *)ptr_l);*/ //Shuffle to rearrange/transpose 16x16 block of L into contiguous row-wise registers //tmpRegs[0] = _mm256_castps256_ps128(mat_a_cols[0]); //zero latency, no instruction added actually. //mat_a_cols_rearr[0] = _mm256_broadcastss_ps(tmpRegs[0]); //1st col mat_a_cols_rearr[0] = _mm256_broadcast_ss((float const *)(ptr_l+0)); mat_a_cols_rearr[1] = _mm256_broadcast_ss((float const *)(ptr_l+1)); mat_a_cols_rearr[3] = _mm256_broadcast_ss((float const *)(ptr_l+2)); mat_a_cols_rearr[6] = _mm256_broadcast_ss((float const *)(ptr_l+3)); mat_a_cols_rearr[10] = _mm256_broadcast_ss((float const *)(ptr_l+4)); mat_a_cols_rearr[15] = _mm256_broadcast_ss((float const *)(ptr_l+5)); mat_a_cols_rearr[21] = _mm256_broadcast_ss((float const *)(ptr_l+6)); mat_a_cols_rearr[28] = _mm256_broadcast_ss((float const *)(ptr_l+7)); //2nd col ptr_l += cs_l; mat_a_cols_rearr[2] = _mm256_broadcast_ss((float const *)(ptr_l + 1)); mat_a_cols_rearr[4] = _mm256_broadcast_ss((float const *)(ptr_l + 2)); mat_a_cols_rearr[7] = _mm256_broadcast_ss((float const *)(ptr_l + 3)); mat_a_cols_rearr[11] = _mm256_broadcast_ss((float const *)(ptr_l + 4)); mat_a_cols_rearr[16] = _mm256_broadcast_ss((float const *)(ptr_l + 5)); mat_a_cols_rearr[22] = _mm256_broadcast_ss((float const *)(ptr_l + 6)); mat_a_cols_rearr[29] = _mm256_broadcast_ss((float const *)(ptr_l + 7)); //3rd col ptr_l += cs_l; mat_a_cols_rearr[5] = _mm256_broadcast_ss((float const *)(ptr_l + 2)); mat_a_cols_rearr[8] = _mm256_broadcast_ss((float const *)(ptr_l + 3)); mat_a_cols_rearr[12] = _mm256_broadcast_ss((float const *)(ptr_l + 4)); mat_a_cols_rearr[17] = _mm256_broadcast_ss((float const *)(ptr_l + 5)); mat_a_cols_rearr[23] = _mm256_broadcast_ss((float const *)(ptr_l + 6)); mat_a_cols_rearr[30] = _mm256_broadcast_ss((float const *)(ptr_l + 7)); //4rth col ptr_l += cs_l; mat_a_cols_rearr[9] = _mm256_broadcast_ss((float const *)(ptr_l + 3)); mat_a_cols_rearr[13] = _mm256_broadcast_ss((float const *)(ptr_l + 4)); mat_a_cols_rearr[18] = _mm256_broadcast_ss((float const *)(ptr_l + 5)); mat_a_cols_rearr[24] = _mm256_broadcast_ss((float const *)(ptr_l + 6)); mat_a_cols_rearr[31] = _mm256_broadcast_ss((float const *)(ptr_l + 7)); //5th col ptr_l += cs_l; mat_a_cols_rearr[14] = _mm256_broadcast_ss((float const *)(ptr_l + 4)); mat_a_cols_rearr[19] = _mm256_broadcast_ss((float const *)(ptr_l + 5)); mat_a_cols_rearr[25] = _mm256_broadcast_ss((float const *)(ptr_l + 6)); mat_a_cols_rearr[32] = _mm256_broadcast_ss((float const *)(ptr_l + 7)); //6th col ptr_l += cs_l; mat_a_cols_rearr[20] = _mm256_broadcast_ss((float const *)(ptr_l + 5)); mat_a_cols_rearr[26] = _mm256_broadcast_ss((float const *)(ptr_l + 6)); mat_a_cols_rearr[33] = _mm256_broadcast_ss((float const *)(ptr_l + 7)); //7th col ptr_l += cs_l; mat_a_cols_rearr[27] = _mm256_broadcast_ss((float const *)(ptr_l + 6)); mat_a_cols_rearr[34] = _mm256_broadcast_ss((float const *)(ptr_l + 7)); //8th col //ptr_l += cs_l; //mat_a_cols_rearr[35] = _mm256_broadcast_ss((float const *)(ptr_l + 7)); numCols_b -= 8; // blk_width = 8 //compute reciprocals of L(i,i) and broadcast in registers //mat_a_diag_inv[0] = _mm256_unpacklo_ps(mat_a_cols_rearr[0], mat_a_cols_rearr[2]); //mat_a_diag_inv[1] = _mm256_unpacklo_ps(mat_a_cols_rearr[5], mat_a_cols_rearr[9]); //mat_a_diag_inv[2] = _mm256_unpacklo_ps(mat_a_cols_rearr[14], mat_a_cols_rearr[20]); //mat_a_diag_inv[3] = _mm256_unpacklo_ps(mat_a_cols_rearr[27], mat_a_cols_rearr[35]); //mat_a_diag_inv[1] = _mm256_permute_ps(mat_a_diag_inv[1], 0x55); //mat_a_diag_inv[3] = _mm256_permute_ps(mat_a_diag_inv[3], 0x55); //mat_a_diag_inv[0] = _mm256_blend_ps(mat_a_diag_inv[0], mat_a_diag_inv[1], 0xCC); //mat_a_diag_inv[1] = _mm256_blend_ps(mat_a_diag_inv[2], mat_a_diag_inv[3], 0xCC); //mat_a_diag_inv[0] = _mm256_permute2f128_ps(mat_a_diag_inv[0], mat_a_diag_inv[1], 0x20); //reciprocal of diagnol elements //reciprocal_diags = _mm256_div_ps(reciprocal_diags, mat_a_diag_inv[0]); //Start loop for cols of B to be processed in size of blk_width for (j = 0; j < numCols_b; j += 8) { ptr_b_dup = ptr_b; /*Shuffle to rearrange/transpose 16x8 block of B into contiguous row-wise registers*/ ////unpacklow//// mat_b_rearr[0] = _mm256_unpacklo_ps(mat_b_col[0], mat_b_col[1]); mat_b_rearr[1] = _mm256_unpacklo_ps(mat_b_col[2], mat_b_col[3]); mat_b_rearr[2] = _mm256_unpacklo_ps(mat_b_col[4], mat_b_col[5]); mat_b_rearr[3] = _mm256_unpacklo_ps(mat_b_col[6], mat_b_col[7]); //Rearrange low elements #if REARRANGE_SHFL == 1 mat_b_rearr[4] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x44); mat_b_rearr[5] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0xEE); mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x44); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0xEE); #else mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x4E); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x4E); mat_b_rearr[4] = _mm256_blend_ps(mat_b_rearr[0], mat_b_rearr[6], 0xCC); mat_b_rearr[5] = _mm256_blend_ps(mat_b_rearr[1], mat_b_rearr[6], 0x33); mat_b_rearr[6] = _mm256_blend_ps(mat_b_rearr[2], mat_b_rearr[7], 0xCC); mat_b_rearr[7] = _mm256_blend_ps(mat_b_rearr[3], mat_b_rearr[7], 0x33); #endif //Merge rearranged low elements into complete rows mat_b_rearr[0] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x20); mat_b_rearr[4] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x31); mat_b_rearr[1] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x20); mat_b_rearr[5] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x31); ////unpackhigh//// mat_b_col[0] = _mm256_unpackhi_ps(mat_b_col[0], mat_b_col[1]); mat_b_col[1] = _mm256_unpackhi_ps(mat_b_col[2], mat_b_col[3]); mat_b_col[2] = _mm256_unpackhi_ps(mat_b_col[4], mat_b_col[5]); mat_b_col[3] = _mm256_unpackhi_ps(mat_b_col[6], mat_b_col[7]); //Rearrange high elements #if REARRANGE_SHFL == 1 mat_b_col[4] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x44); mat_b_col[5] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0xEE); mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x44); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0xEE); #else mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x4E); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x4E); mat_b_col[4] = _mm256_blend_ps(mat_b_col[0], mat_b_col[6], 0xCC); mat_b_col[5] = _mm256_blend_ps(mat_b_col[1], mat_b_col[6], 0x33); mat_b_col[6] = _mm256_blend_ps(mat_b_col[2], mat_b_col[7], 0xCC); mat_b_col[7] = _mm256_blend_ps(mat_b_col[3], mat_b_col[7], 0x33); #endif //extract diag a00 from a //mat_a_diag_inv[0] = _mm256_permute_ps(reciprocal_diags, 0x00); //mat_a_diag_inv[0] = _mm256_permute2f128_ps(mat_a_diag_inv[0], mat_a_diag_inv[0], 0x00); //(Row0): Perform mul operation of reciprocal of L(0,0) element with 1st row elements of B //mat_b_rearr[0] = _mm256_mul_ps(mat_b_rearr[0], mat_a_diag_inv[0]); //Merge rearranged high elements into complete rows mat_b_rearr[2] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x20); mat_b_rearr[6] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x31); mat_b_rearr[3] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x20); mat_b_rearr[7] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x31); //extract diag a11 from a //mat_a_diag_inv[1] = _mm256_permute_ps(reciprocal_diags, 0x55); //mat_a_diag_inv[1] = _mm256_permute2f128_ps(mat_a_diag_inv[1], mat_a_diag_inv[1], 0x00); //(Row1): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_cols_rearr[1], mat_b_rearr[0], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_cols_rearr[3], mat_b_rearr[0], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_cols_rearr[6], mat_b_rearr[0], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_cols_rearr[10], mat_b_rearr[0], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_cols_rearr[15], mat_b_rearr[0], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_cols_rearr[21], mat_b_rearr[0], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[28], mat_b_rearr[0], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(1,1) element with 2nd row elements of B //mat_b_rearr[1] = _mm256_mul_ps(mat_b_rearr[1], mat_a_diag_inv[1]); //extract diag a22 from a //mat_a_diag_inv[2] = _mm256_permute_ps(reciprocal_diags, 0xAA); //mat_a_diag_inv[2] = _mm256_permute2f128_ps(mat_a_diag_inv[2], mat_a_diag_inv[2], 0x00); //(Row2): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_cols_rearr[4], mat_b_rearr[1], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_cols_rearr[7], mat_b_rearr[1], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_cols_rearr[11], mat_b_rearr[1], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_cols_rearr[16], mat_b_rearr[1], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_cols_rearr[22], mat_b_rearr[1], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[29], mat_b_rearr[1], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(2, 2) element with 3rd row elements of B //mat_b_rearr[2] = _mm256_mul_ps(mat_b_rearr[2], mat_a_diag_inv[2]); //extract diag a33 from a //mat_a_diag_inv[3] = _mm256_permute_ps(reciprocal_diags, 0xFF); //mat_a_diag_inv[3] = _mm256_permute2f128_ps(mat_a_diag_inv[3], mat_a_diag_inv[3], 0x00); //(Row3): FMA operations of b3 with elements of indices from (3, 0) uptill (7, 0) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_cols_rearr[8], mat_b_rearr[2], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_cols_rearr[12], mat_b_rearr[2], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_cols_rearr[17], mat_b_rearr[2], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_cols_rearr[23], mat_b_rearr[2], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[30], mat_b_rearr[2], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(3, 3) element with 4rth row elements of B //mat_b_rearr[3] = _mm256_mul_ps(mat_b_rearr[3], mat_a_diag_inv[3]); //extract diag a44 from a //mat_a_diag_inv[4] = _mm256_permute_ps(reciprocal_diags, 0x00); //mat_a_diag_inv[4] = _mm256_permute2f128_ps(mat_a_diag_inv[4], mat_a_diag_inv[4], 0x11); //(Row4): FMA operations of b4 with elements of indices from (4, 0) uptill (7, 0) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_cols_rearr[13], mat_b_rearr[3], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_cols_rearr[18], mat_b_rearr[3], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_cols_rearr[24], mat_b_rearr[3], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[31], mat_b_rearr[3], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(4, 4) element with 4rth row elements of B //mat_b_rearr[4] = _mm256_mul_ps(mat_b_rearr[4], mat_a_diag_inv[4]); //extract diag a55 from a //mat_a_diag_inv[5] = _mm256_permute_ps(reciprocal_diags, 0x55); //mat_a_diag_inv[5] = _mm256_permute2f128_ps(mat_a_diag_inv[5], mat_a_diag_inv[5], 0x11); //(Row5): FMA operations of b5 with elements of indices from (5, 0) uptill (7, 0) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_cols_rearr[19], mat_b_rearr[4], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_cols_rearr[25], mat_b_rearr[4], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[32], mat_b_rearr[4], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(5, 5) element with 5th row elements of B //mat_b_rearr[5] = _mm256_mul_ps(mat_b_rearr[5], mat_a_diag_inv[5]); //extract diag a66 from a //mat_a_diag_inv[6] = _mm256_permute_ps(reciprocal_diags, 0xAA); //mat_a_diag_inv[6] = _mm256_permute2f128_ps(mat_a_diag_inv[6], mat_a_diag_inv[6], 0x11); //(Row6): FMA operations of b6 with elements of indices from (6, 0) uptill (7, 0) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_cols_rearr[26], mat_b_rearr[5], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[33], mat_b_rearr[5], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(6, 6) element with 6th row elements of B //mat_b_rearr[6] = _mm256_mul_ps(mat_b_rearr[6], mat_a_diag_inv[6]); //extract diag a77 from a //mat_a_diag_inv[7] = _mm256_permute_ps(reciprocal_diags, 0xFF); //mat_a_diag_inv[7] = _mm256_permute2f128_ps(mat_a_diag_inv[7], mat_a_diag_inv[7], 0x11); //(Row7): FMA operations of b7 with elements of index (7, 0) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[34], mat_b_rearr[6], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(7, 7) element with 7th row elements of B //mat_b_rearr[7] = _mm256_mul_ps(mat_b_rearr[7], mat_a_diag_inv[7]); //--> Transpose and store results of columns of B block <--// ////unpacklow//// mat_a_cols[0] = _mm256_unpacklo_ps(mat_b_rearr[0], mat_b_rearr[1]); mat_a_cols[1] = _mm256_unpacklo_ps(mat_b_rearr[2], mat_b_rearr[3]); mat_a_cols[2] = _mm256_unpacklo_ps(mat_b_rearr[4], mat_b_rearr[5]); mat_a_cols[3] = _mm256_unpacklo_ps(mat_b_rearr[6], mat_b_rearr[7]); //Rearrange low elements #if REARRANGE_SHFL == 1 mat_a_cols[4] = _mm256_shuffle_ps(mat_a_cols[0], mat_a_cols[1], 0x44); mat_a_cols[5] = _mm256_shuffle_ps(mat_a_cols[0], mat_a_cols[1], 0xEE); mat_a_cols[6] = _mm256_shuffle_ps(mat_a_cols[2], mat_a_cols[3], 0x44); mat_a_cols[7] = _mm256_shuffle_ps(mat_a_cols[2], mat_a_cols[3], 0xEE); #else mat_a_cols[6] = _mm256_shuffle_ps(mat_a_cols[0], mat_a_cols[1], 0x4E); mat_a_cols[7] = _mm256_shuffle_ps(mat_a_cols[2], mat_a_cols[3], 0x4E); mat_a_cols[4] = _mm256_blend_ps(mat_a_cols[0], mat_a_cols[6], 0xCC); mat_a_cols[5] = _mm256_blend_ps(mat_a_cols[1], mat_a_cols[6], 0x33); mat_a_cols[6] = _mm256_blend_ps(mat_a_cols[2], mat_a_cols[7], 0xCC); mat_a_cols[7] = _mm256_blend_ps(mat_a_cols[3], mat_a_cols[7], 0x33); #endif //Merge rearranged low elements into complete rows mat_a_cols[0] = _mm256_permute2f128_ps(mat_a_cols[4], mat_a_cols[6], 0x20); mat_a_cols[4] = _mm256_permute2f128_ps(mat_a_cols[4], mat_a_cols[6], 0x31); mat_a_cols[1] = _mm256_permute2f128_ps(mat_a_cols[5], mat_a_cols[7], 0x20); mat_a_cols[5] = _mm256_permute2f128_ps(mat_a_cols[5], mat_a_cols[7], 0x31); ////unpackhigh//// mat_b_rearr[0] = _mm256_unpackhi_ps(mat_b_rearr[0], mat_b_rearr[1]); mat_b_rearr[1] = _mm256_unpackhi_ps(mat_b_rearr[2], mat_b_rearr[3]); mat_b_rearr[2] = _mm256_unpackhi_ps(mat_b_rearr[4], mat_b_rearr[5]); mat_b_rearr[3] = _mm256_unpackhi_ps(mat_b_rearr[6], mat_b_rearr[7]); //Rearrange high elements #if REARRANGE_SHFL == 1 mat_b_rearr[4] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x44); mat_b_rearr[5] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0xEE); mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x44); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0xEE); #else mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x4E); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x4E); mat_b_rearr[4] = _mm256_blend_ps(mat_b_rearr[0], mat_b_rearr[6], 0xCC); mat_b_rearr[5] = _mm256_blend_ps(mat_b_rearr[1], mat_b_rearr[6], 0x33); mat_b_rearr[6] = _mm256_blend_ps(mat_b_rearr[2], mat_b_rearr[7], 0xCC); mat_b_rearr[7] = _mm256_blend_ps(mat_b_rearr[3], mat_b_rearr[7], 0x33); #endif //Merge rearranged high elements into complete rows mat_a_cols[2] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x20); mat_a_cols[6] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x31); mat_a_cols[3] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x20); mat_a_cols[7] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x31); //Read next set of B columns ptr_b += (cs_b + cs_b_offset[5]); mat_b_col[0] = _mm256_loadu_ps((float const *)ptr_b); mat_b_col[1] = _mm256_loadu_ps((float const *)(ptr_b + (cs_b))); mat_b_col[2] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[0])); mat_b_col[3] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[1])); mat_b_col[4] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[2])); mat_b_col[5] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[3])); mat_b_col[6] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[4])); mat_b_col[7] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[5])); //Store the computed B columns _mm256_storeu_ps((float *)ptr_b_dup, mat_a_cols[0]); _mm256_storeu_ps((float *)(ptr_b_dup + (cs_b)), mat_a_cols[1]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[0]), mat_a_cols[2]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[1]), mat_a_cols[3]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[2]), mat_a_cols[4]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[3]), mat_a_cols[5]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[4]), mat_a_cols[6]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[5]), mat_a_cols[7]); //end loop of cols } //Last block trsm processing ptr_b_dup = ptr_b; /*Shuffle to rearrange/transpose 16x8 block of B into contiguous row-wise registers*/ ////unpacklow//// mat_b_rearr[0] = _mm256_unpacklo_ps(mat_b_col[0], mat_b_col[1]); mat_b_rearr[1] = _mm256_unpacklo_ps(mat_b_col[2], mat_b_col[3]); mat_b_rearr[2] = _mm256_unpacklo_ps(mat_b_col[4], mat_b_col[5]); mat_b_rearr[3] = _mm256_unpacklo_ps(mat_b_col[6], mat_b_col[7]); //Rearrange low elements #if REARRANGE_SHFL == 1 mat_b_rearr[4] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x44); mat_b_rearr[5] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0xEE); mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x44); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0xEE); #else mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x4E); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x4E); mat_b_rearr[4] = _mm256_blend_ps(mat_b_rearr[0], mat_b_rearr[6], 0xCC); mat_b_rearr[5] = _mm256_blend_ps(mat_b_rearr[1], mat_b_rearr[6], 0x33); mat_b_rearr[6] = _mm256_blend_ps(mat_b_rearr[2], mat_b_rearr[7], 0xCC); mat_b_rearr[7] = _mm256_blend_ps(mat_b_rearr[3], mat_b_rearr[7], 0x33); #endif //Merge rearranged low elements into complete rows mat_b_rearr[0] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x20); mat_b_rearr[4] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x31); mat_b_rearr[1] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x20); mat_b_rearr[5] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x31); ////unpackhigh//// mat_b_col[0] = _mm256_unpackhi_ps(mat_b_col[0], mat_b_col[1]); mat_b_col[1] = _mm256_unpackhi_ps(mat_b_col[2], mat_b_col[3]); mat_b_col[2] = _mm256_unpackhi_ps(mat_b_col[4], mat_b_col[5]); mat_b_col[3] = _mm256_unpackhi_ps(mat_b_col[6], mat_b_col[7]); //Rearrange high elements #if REARRANGE_SHFL == 1 mat_b_col[4] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x44); mat_b_col[5] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0xEE); mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x44); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0xEE); #else mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x4E); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x4E); mat_b_col[4] = _mm256_blend_ps(mat_b_col[0], mat_b_col[6], 0xCC); mat_b_col[5] = _mm256_blend_ps(mat_b_col[1], mat_b_col[6], 0x33); mat_b_col[6] = _mm256_blend_ps(mat_b_col[2], mat_b_col[7], 0xCC); mat_b_col[7] = _mm256_blend_ps(mat_b_col[3], mat_b_col[7], 0x33); #endif //extract diag a00 from a //mat_a_diag_inv[0] = _mm256_permute_ps(reciprocal_diags, 0x00); //mat_a_diag_inv[0] = _mm256_permute2f128_ps(mat_a_diag_inv[0], mat_a_diag_inv[0], 0x00); //(Row0): Perform mul operation of reciprocal of L(0,0) element with 1st row elements of B //mat_b_rearr[0] = _mm256_mul_ps(mat_b_rearr[0], mat_a_diag_inv[0]); //Merge rearranged high elements into complete rows mat_b_rearr[2] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x20); mat_b_rearr[6] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x31); mat_b_rearr[3] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x20); mat_b_rearr[7] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x31); //extract diag a11 from a //mat_a_diag_inv[1] = _mm256_permute_ps(reciprocal_diags, 0x55); //mat_a_diag_inv[1] = _mm256_permute2f128_ps(mat_a_diag_inv[1], mat_a_diag_inv[1], 0x00); //(Row1): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_cols_rearr[1], mat_b_rearr[0], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_cols_rearr[3], mat_b_rearr[0], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_cols_rearr[6], mat_b_rearr[0], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_cols_rearr[10], mat_b_rearr[0], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_cols_rearr[15], mat_b_rearr[0], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_cols_rearr[21], mat_b_rearr[0], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[28], mat_b_rearr[0], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(1,1) element with 2nd row elements of B //mat_b_rearr[1] = _mm256_mul_ps(mat_b_rearr[1], mat_a_diag_inv[1]); //extract diag a22 from a //mat_a_diag_inv[2] = _mm256_permute_ps(reciprocal_diags, 0xAA); //mat_a_diag_inv[2] = _mm256_permute2f128_ps(mat_a_diag_inv[2], mat_a_diag_inv[2], 0x00); //(Row2): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_cols_rearr[4], mat_b_rearr[1], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_cols_rearr[7], mat_b_rearr[1], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_cols_rearr[11], mat_b_rearr[1], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_cols_rearr[16], mat_b_rearr[1], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_cols_rearr[22], mat_b_rearr[1], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[29], mat_b_rearr[1], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(2, 2) element with 3rd row elements of B //mat_b_rearr[2] = _mm256_mul_ps(mat_b_rearr[2], mat_a_diag_inv[2]); //extract diag a33 from a //mat_a_diag_inv[3] = _mm256_permute_ps(reciprocal_diags, 0xFF); //mat_a_diag_inv[3] = _mm256_permute2f128_ps(mat_a_diag_inv[3], mat_a_diag_inv[3], 0x00); //(Row3): FMA operations of b3 with elements of indices from (3, 0) uptill (7, 0) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_cols_rearr[8], mat_b_rearr[2], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_cols_rearr[12], mat_b_rearr[2], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_cols_rearr[17], mat_b_rearr[2], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_cols_rearr[23], mat_b_rearr[2], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[30], mat_b_rearr[2], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(3, 3) element with 4rth row elements of B //mat_b_rearr[3] = _mm256_mul_ps(mat_b_rearr[3], mat_a_diag_inv[3]); //extract diag a44 from a //mat_a_diag_inv[4] = _mm256_permute_ps(reciprocal_diags, 0x00); //mat_a_diag_inv[4] = _mm256_permute2f128_ps(mat_a_diag_inv[4], mat_a_diag_inv[4], 0x11); //(Row4): FMA operations of b4 with elements of indices from (4, 0) uptill (7, 0) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_cols_rearr[13], mat_b_rearr[3], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_cols_rearr[18], mat_b_rearr[3], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_cols_rearr[24], mat_b_rearr[3], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[31], mat_b_rearr[3], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(4, 4) element with 4rth row elements of B //mat_b_rearr[4] = _mm256_mul_ps(mat_b_rearr[4], mat_a_diag_inv[4]); //extract diag a55 from a //mat_a_diag_inv[5] = _mm256_permute_ps(reciprocal_diags, 0x55); //mat_a_diag_inv[5] = _mm256_permute2f128_ps(mat_a_diag_inv[5], mat_a_diag_inv[5], 0x11); //(Row5): FMA operations of b5 with elements of indices from (5, 0) uptill (7, 0) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_cols_rearr[19], mat_b_rearr[4], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_cols_rearr[25], mat_b_rearr[4], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[32], mat_b_rearr[4], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(5, 5) element with 5th row elements of B //mat_b_rearr[5] = _mm256_mul_ps(mat_b_rearr[5], mat_a_diag_inv[5]); //extract diag a66 from a //mat_a_diag_inv[6] = _mm256_permute_ps(reciprocal_diags, 0xAA); //mat_a_diag_inv[6] = _mm256_permute2f128_ps(mat_a_diag_inv[6], mat_a_diag_inv[6], 0x11); //(Row6): FMA operations of b6 with elements of indices from (6, 0) uptill (7, 0) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_cols_rearr[26], mat_b_rearr[5], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[33], mat_b_rearr[5], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(6, 6) element with 6th row elements of B //mat_b_rearr[6] = _mm256_mul_ps(mat_b_rearr[6], mat_a_diag_inv[6]); //extract diag a77 from a //mat_a_diag_inv[7] = _mm256_permute_ps(reciprocal_diags, 0xFF); //mat_a_diag_inv[7] = _mm256_permute2f128_ps(mat_a_diag_inv[7], mat_a_diag_inv[7], 0x11); //(Row7): FMA operations of b7 with elements of index (7, 0) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[34], mat_b_rearr[6], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(7, 7) element with 7th row elements of B //mat_b_rearr[7] = _mm256_mul_ps(mat_b_rearr[7], mat_a_diag_inv[7]); //--> Transpose and store results of columns of B block <--// ////unpacklow//// mat_a_cols[0] = _mm256_unpacklo_ps(mat_b_rearr[0], mat_b_rearr[1]); mat_a_cols[1] = _mm256_unpacklo_ps(mat_b_rearr[2], mat_b_rearr[3]); mat_a_cols[2] = _mm256_unpacklo_ps(mat_b_rearr[4], mat_b_rearr[5]); mat_a_cols[3] = _mm256_unpacklo_ps(mat_b_rearr[6], mat_b_rearr[7]); //Rearrange low elements #if REARRANGE_SHFL == 1 mat_a_cols[4] = _mm256_shuffle_ps(mat_a_cols[0], mat_a_cols[1], 0x44); mat_a_cols[5] = _mm256_shuffle_ps(mat_a_cols[0], mat_a_cols[1], 0xEE); mat_a_cols[6] = _mm256_shuffle_ps(mat_a_cols[2], mat_a_cols[3], 0x44); mat_a_cols[7] = _mm256_shuffle_ps(mat_a_cols[2], mat_a_cols[3], 0xEE); #else mat_a_cols[6] = _mm256_shuffle_ps(mat_a_cols[0], mat_a_cols[1], 0x4E); mat_a_cols[7] = _mm256_shuffle_ps(mat_a_cols[2], mat_a_cols[3], 0x4E); mat_a_cols[4] = _mm256_blend_ps(mat_a_cols[0], mat_a_cols[6], 0xCC); mat_a_cols[5] = _mm256_blend_ps(mat_a_cols[1], mat_a_cols[6], 0x33); mat_a_cols[6] = _mm256_blend_ps(mat_a_cols[2], mat_a_cols[7], 0xCC); mat_a_cols[7] = _mm256_blend_ps(mat_a_cols[3], mat_a_cols[7], 0x33); #endif //Merge rearranged low elements into complete rows mat_a_cols[0] = _mm256_permute2f128_ps(mat_a_cols[4], mat_a_cols[6], 0x20); mat_a_cols[4] = _mm256_permute2f128_ps(mat_a_cols[4], mat_a_cols[6], 0x31); mat_a_cols[1] = _mm256_permute2f128_ps(mat_a_cols[5], mat_a_cols[7], 0x20); mat_a_cols[5] = _mm256_permute2f128_ps(mat_a_cols[5], mat_a_cols[7], 0x31); ////unpackhigh//// mat_b_rearr[0] = _mm256_unpackhi_ps(mat_b_rearr[0], mat_b_rearr[1]); mat_b_rearr[1] = _mm256_unpackhi_ps(mat_b_rearr[2], mat_b_rearr[3]); mat_b_rearr[2] = _mm256_unpackhi_ps(mat_b_rearr[4], mat_b_rearr[5]); mat_b_rearr[3] = _mm256_unpackhi_ps(mat_b_rearr[6], mat_b_rearr[7]); //Rearrange high elements #if REARRANGE_SHFL == 1 mat_b_rearr[4] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x44); mat_b_rearr[5] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0xEE); mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x44); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0xEE); #else mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x4E); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x4E); mat_b_rearr[4] = _mm256_blend_ps(mat_b_rearr[0], mat_b_rearr[6], 0xCC); mat_b_rearr[5] = _mm256_blend_ps(mat_b_rearr[1], mat_b_rearr[6], 0x33); mat_b_rearr[6] = _mm256_blend_ps(mat_b_rearr[2], mat_b_rearr[7], 0xCC); mat_b_rearr[7] = _mm256_blend_ps(mat_b_rearr[3], mat_b_rearr[7], 0x33); #endif //Merge rearranged high elements into complete rows mat_a_cols[2] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x20); mat_a_cols[6] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x31); mat_a_cols[3] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x20); mat_a_cols[7] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x31); //Store the computed B columns _mm256_storeu_ps((float *)ptr_b_dup, mat_a_cols[0]); _mm256_storeu_ps((float *)(ptr_b_dup + (cs_b)), mat_a_cols[1]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[0]), mat_a_cols[2]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[1]), mat_a_cols[3]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[2]), mat_a_cols[4]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[3]), mat_a_cols[5]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[4]), mat_a_cols[6]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[5]), mat_a_cols[7]); //end loop of cols } static void blis_strsm_microkernel(float *ptr_l, float *ptr_b, int numRows_lb, int numCols_b, int rs_l, int rs_b, int cs_l, int cs_b) { float ones = 1.0; int j; int cs_b_offset[6]; //int row2, row4, row6; float *ptr_b_dup; //70 number of ymm(256 bits) registers used __m256 mat_b_col[8]; __m256 mat_b_rearr[8]; __m256 mat_a_cols[8]; __m256 mat_a_cols_rearr[36]; __m256 mat_a_diag_inv[8]; __m256 reciprocal_diags; cs_b_offset[0] = (cs_b << 1); cs_b_offset[1] = cs_b + cs_b_offset[0]; cs_b_offset[2] = (cs_b << 2); cs_b_offset[3] = cs_b + cs_b_offset[2]; cs_b_offset[4] = cs_b_offset[0] + cs_b_offset[2]; cs_b_offset[5] = cs_b + cs_b_offset[4]; //reciprocal_diags = _mm256_loadu_ps((float const *)ones); reciprocal_diags = _mm256_broadcast_ss((float const *)&ones); // ---> considering that the matrix size is multiple of 16 rows and 8 cols <--- // //read first set of 16x8 block of B into registers, where 16 is the blk_height and 8 is the blk_width for B mat_b_col[0] = _mm256_loadu_ps((float const *)ptr_b); //_mm_prefetch((char*)(ptr_l + 0), _MM_HINT_T0); //row2 = (cs_l << 1); //row4 = (cs_l << 2); mat_b_col[1] = _mm256_loadu_ps((float const *)(ptr_b + (cs_b))); //_mm_prefetch((char*)(ptr_l + cs_l), _MM_HINT_T0); mat_b_col[2] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[0])); //_mm_prefetch((char*)(ptr_l + row2), _MM_HINT_T0); mat_b_col[3] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[1])); //_mm_prefetch((char*)(ptr_l + row2 + cs_l), _MM_HINT_T0); //row6 = row2 + row4; mat_b_col[4] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[2])); //_mm_prefetch((char*)(ptr_l + row4), _MM_HINT_T0); mat_b_col[5] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[3])); //_mm_prefetch((char*)(ptr_l + row4 + cs_l), _MM_HINT_T0); mat_b_col[6] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[4])); //_mm_prefetch((char*)(ptr_l + row6), _MM_HINT_T0); mat_b_col[7] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[5])); //_mm_prefetch((char*)(ptr_l + row6 + cs_l), _MM_HINT_T0); //reciprocal_diags = _mm256_loadu_ps((float const *)ones); //read first set of 16x16 block of L, where 16 is the blk_height and 16 is the blk_width for L /*mat_a_cols[0] = _mm256_loadu_ps((float const *)ptr_l); ptr_l += cs_l; mat_a_cols[1] = _mm256_loadu_ps((float const *)ptr_l); ptr_l += cs_l; mat_a_cols[2] = _mm256_loadu_ps((float const *)ptr_l); ptr_l += cs_l; mat_a_cols[3] = _mm256_loadu_ps((float const *)ptr_l); ptr_l += cs_l; mat_a_cols[4] = _mm256_loadu_ps((float const *)ptr_l); ptr_l += cs_l; mat_a_cols[5] = _mm256_loadu_ps((float const *)ptr_l); ptr_l += cs_l; mat_a_cols[6] = _mm256_loadu_ps((float const *)ptr_l); ptr_l += cs_l; mat_a_cols[7] = _mm256_loadu_ps((float const *)ptr_l);*/ //Shuffle to rearrange/transpose 16x16 block of L into contiguous row-wise registers //tmpRegs[0] = _mm256_castps256_ps128(mat_a_cols[0]); //zero latency, no instruction added actually. //mat_a_cols_rearr[0] = _mm256_broadcastss_ps(tmpRegs[0]); //1st col mat_a_cols_rearr[0] = _mm256_broadcast_ss((float const *)(ptr_l+0)); mat_a_cols_rearr[1] = _mm256_broadcast_ss((float const *)(ptr_l+1)); mat_a_cols_rearr[3] = _mm256_broadcast_ss((float const *)(ptr_l+2)); mat_a_cols_rearr[6] = _mm256_broadcast_ss((float const *)(ptr_l+3)); mat_a_cols_rearr[10] = _mm256_broadcast_ss((float const *)(ptr_l+4)); mat_a_cols_rearr[15] = _mm256_broadcast_ss((float const *)(ptr_l+5)); mat_a_cols_rearr[21] = _mm256_broadcast_ss((float const *)(ptr_l+6)); mat_a_cols_rearr[28] = _mm256_broadcast_ss((float const *)(ptr_l+7)); //2nd col ptr_l += cs_l; mat_a_cols_rearr[2] = _mm256_broadcast_ss((float const *)(ptr_l + 1)); mat_a_cols_rearr[4] = _mm256_broadcast_ss((float const *)(ptr_l + 2)); mat_a_cols_rearr[7] = _mm256_broadcast_ss((float const *)(ptr_l + 3)); mat_a_cols_rearr[11] = _mm256_broadcast_ss((float const *)(ptr_l + 4)); mat_a_cols_rearr[16] = _mm256_broadcast_ss((float const *)(ptr_l + 5)); mat_a_cols_rearr[22] = _mm256_broadcast_ss((float const *)(ptr_l + 6)); mat_a_cols_rearr[29] = _mm256_broadcast_ss((float const *)(ptr_l + 7)); //3rd col ptr_l += cs_l; mat_a_cols_rearr[5] = _mm256_broadcast_ss((float const *)(ptr_l + 2)); mat_a_cols_rearr[8] = _mm256_broadcast_ss((float const *)(ptr_l + 3)); mat_a_cols_rearr[12] = _mm256_broadcast_ss((float const *)(ptr_l + 4)); mat_a_cols_rearr[17] = _mm256_broadcast_ss((float const *)(ptr_l + 5)); mat_a_cols_rearr[23] = _mm256_broadcast_ss((float const *)(ptr_l + 6)); mat_a_cols_rearr[30] = _mm256_broadcast_ss((float const *)(ptr_l + 7)); //4rth col ptr_l += cs_l; mat_a_cols_rearr[9] = _mm256_broadcast_ss((float const *)(ptr_l + 3)); mat_a_cols_rearr[13] = _mm256_broadcast_ss((float const *)(ptr_l + 4)); mat_a_cols_rearr[18] = _mm256_broadcast_ss((float const *)(ptr_l + 5)); mat_a_cols_rearr[24] = _mm256_broadcast_ss((float const *)(ptr_l + 6)); mat_a_cols_rearr[31] = _mm256_broadcast_ss((float const *)(ptr_l + 7)); //5th col ptr_l += cs_l; mat_a_cols_rearr[14] = _mm256_broadcast_ss((float const *)(ptr_l + 4)); mat_a_cols_rearr[19] = _mm256_broadcast_ss((float const *)(ptr_l + 5)); mat_a_cols_rearr[25] = _mm256_broadcast_ss((float const *)(ptr_l + 6)); mat_a_cols_rearr[32] = _mm256_broadcast_ss((float const *)(ptr_l + 7)); //6th col ptr_l += cs_l; mat_a_cols_rearr[20] = _mm256_broadcast_ss((float const *)(ptr_l + 5)); mat_a_cols_rearr[26] = _mm256_broadcast_ss((float const *)(ptr_l + 6)); mat_a_cols_rearr[33] = _mm256_broadcast_ss((float const *)(ptr_l + 7)); //7th col ptr_l += cs_l; mat_a_cols_rearr[27] = _mm256_broadcast_ss((float const *)(ptr_l + 6)); mat_a_cols_rearr[34] = _mm256_broadcast_ss((float const *)(ptr_l + 7)); //7th col ptr_l += cs_l; mat_a_cols_rearr[35] = _mm256_broadcast_ss((float const *)(ptr_l + 7)); numCols_b -= 8; // blk_width = 8 //compute reciprocals of L(i,i) and broadcast in registers mat_a_diag_inv[0] = _mm256_unpacklo_ps(mat_a_cols_rearr[0], mat_a_cols_rearr[2]); mat_a_diag_inv[1] = _mm256_unpacklo_ps(mat_a_cols_rearr[5], mat_a_cols_rearr[9]); mat_a_diag_inv[2] = _mm256_unpacklo_ps(mat_a_cols_rearr[14], mat_a_cols_rearr[20]); mat_a_diag_inv[3] = _mm256_unpacklo_ps(mat_a_cols_rearr[27], mat_a_cols_rearr[35]); //mat_a_diag_inv[1] = _mm256_permute_ps(mat_a_diag_inv[1], 0x55); //mat_a_diag_inv[3] = _mm256_permute_ps(mat_a_diag_inv[3], 0x55); mat_a_diag_inv[0] = _mm256_blend_ps(mat_a_diag_inv[0], mat_a_diag_inv[1], 0xCC); mat_a_diag_inv[1] = _mm256_blend_ps(mat_a_diag_inv[2], mat_a_diag_inv[3], 0xCC); mat_a_diag_inv[0] = _mm256_permute2f128_ps(mat_a_diag_inv[0], mat_a_diag_inv[1], 0x20); //reciprocal of diagnol elements reciprocal_diags = _mm256_div_ps(reciprocal_diags, mat_a_diag_inv[0]); //Start loop for cols of B to be processed in size of blk_width for (j = 0; j < numCols_b; j += 8) { ptr_b_dup = ptr_b; /*Shuffle to rearrange/transpose 16x8 block of B into contiguous row-wise registers*/ ////unpacklow//// mat_b_rearr[0] = _mm256_unpacklo_ps(mat_b_col[0], mat_b_col[1]); mat_b_rearr[1] = _mm256_unpacklo_ps(mat_b_col[2], mat_b_col[3]); mat_b_rearr[2] = _mm256_unpacklo_ps(mat_b_col[4], mat_b_col[5]); mat_b_rearr[3] = _mm256_unpacklo_ps(mat_b_col[6], mat_b_col[7]); //Rearrange low elements #if REARRANGE_SHFL == 1 mat_b_rearr[4] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x44); mat_b_rearr[5] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0xEE); mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x44); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0xEE); #else mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x4E); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x4E); mat_b_rearr[4] = _mm256_blend_ps(mat_b_rearr[0], mat_b_rearr[6], 0xCC); mat_b_rearr[5] = _mm256_blend_ps(mat_b_rearr[1], mat_b_rearr[6], 0x33); mat_b_rearr[6] = _mm256_blend_ps(mat_b_rearr[2], mat_b_rearr[7], 0xCC); mat_b_rearr[7] = _mm256_blend_ps(mat_b_rearr[3], mat_b_rearr[7], 0x33); #endif //Merge rearranged low elements into complete rows mat_b_rearr[0] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x20); mat_b_rearr[4] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x31); mat_b_rearr[1] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x20); mat_b_rearr[5] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x31); ////unpackhigh//// mat_b_col[0] = _mm256_unpackhi_ps(mat_b_col[0], mat_b_col[1]); mat_b_col[1] = _mm256_unpackhi_ps(mat_b_col[2], mat_b_col[3]); mat_b_col[2] = _mm256_unpackhi_ps(mat_b_col[4], mat_b_col[5]); mat_b_col[3] = _mm256_unpackhi_ps(mat_b_col[6], mat_b_col[7]); //Rearrange high elements #if REARRANGE_SHFL == 1 mat_b_col[4] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x44); mat_b_col[5] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0xEE); mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x44); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0xEE); #else mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x4E); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x4E); mat_b_col[4] = _mm256_blend_ps(mat_b_col[0], mat_b_col[6], 0xCC); mat_b_col[5] = _mm256_blend_ps(mat_b_col[1], mat_b_col[6], 0x33); mat_b_col[6] = _mm256_blend_ps(mat_b_col[2], mat_b_col[7], 0xCC); mat_b_col[7] = _mm256_blend_ps(mat_b_col[3], mat_b_col[7], 0x33); #endif //extract diag a00 from a mat_a_diag_inv[0] = _mm256_permute_ps(reciprocal_diags, 0x00); mat_a_diag_inv[0] = _mm256_permute2f128_ps(mat_a_diag_inv[0], mat_a_diag_inv[0], 0x00); //(Row0): Perform mul operation of reciprocal of L(0,0) element with 1st row elements of B mat_b_rearr[0] = _mm256_mul_ps(mat_b_rearr[0], mat_a_diag_inv[0]); //Merge rearranged high elements into complete rows mat_b_rearr[2] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x20); mat_b_rearr[6] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x31); mat_b_rearr[3] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x20); mat_b_rearr[7] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x31); //extract diag a11 from a mat_a_diag_inv[1] = _mm256_permute_ps(reciprocal_diags, 0x55); mat_a_diag_inv[1] = _mm256_permute2f128_ps(mat_a_diag_inv[1], mat_a_diag_inv[1], 0x00); //(Row1): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_cols_rearr[1], mat_b_rearr[0], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_cols_rearr[3], mat_b_rearr[0], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_cols_rearr[6], mat_b_rearr[0], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_cols_rearr[10], mat_b_rearr[0], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_cols_rearr[15], mat_b_rearr[0], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_cols_rearr[21], mat_b_rearr[0], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[28], mat_b_rearr[0], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(1,1) element with 2nd row elements of B mat_b_rearr[1] = _mm256_mul_ps(mat_b_rearr[1], mat_a_diag_inv[1]); //extract diag a22 from a mat_a_diag_inv[2] = _mm256_permute_ps(reciprocal_diags, 0xAA); mat_a_diag_inv[2] = _mm256_permute2f128_ps(mat_a_diag_inv[2], mat_a_diag_inv[2], 0x00); //(Row2): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_cols_rearr[4], mat_b_rearr[1], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_cols_rearr[7], mat_b_rearr[1], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_cols_rearr[11], mat_b_rearr[1], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_cols_rearr[16], mat_b_rearr[1], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_cols_rearr[22], mat_b_rearr[1], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[29], mat_b_rearr[1], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(2, 2) element with 3rd row elements of B mat_b_rearr[2] = _mm256_mul_ps(mat_b_rearr[2], mat_a_diag_inv[2]); //extract diag a33 from a mat_a_diag_inv[3] = _mm256_permute_ps(reciprocal_diags, 0xFF); mat_a_diag_inv[3] = _mm256_permute2f128_ps(mat_a_diag_inv[3], mat_a_diag_inv[3], 0x00); //(Row3): FMA operations of b3 with elements of indices from (3, 0) uptill (7, 0) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_cols_rearr[8], mat_b_rearr[2], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_cols_rearr[12], mat_b_rearr[2], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_cols_rearr[17], mat_b_rearr[2], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_cols_rearr[23], mat_b_rearr[2], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[30], mat_b_rearr[2], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(3, 3) element with 4rth row elements of B mat_b_rearr[3] = _mm256_mul_ps(mat_b_rearr[3], mat_a_diag_inv[3]); //extract diag a44 from a mat_a_diag_inv[4] = _mm256_permute_ps(reciprocal_diags, 0x00); mat_a_diag_inv[4] = _mm256_permute2f128_ps(mat_a_diag_inv[4], mat_a_diag_inv[4], 0x11); //(Row4): FMA operations of b4 with elements of indices from (4, 0) uptill (7, 0) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_cols_rearr[13], mat_b_rearr[3], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_cols_rearr[18], mat_b_rearr[3], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_cols_rearr[24], mat_b_rearr[3], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[31], mat_b_rearr[3], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(4, 4) element with 4rth row elements of B mat_b_rearr[4] = _mm256_mul_ps(mat_b_rearr[4], mat_a_diag_inv[4]); //extract diag a55 from a mat_a_diag_inv[5] = _mm256_permute_ps(reciprocal_diags, 0x55); mat_a_diag_inv[5] = _mm256_permute2f128_ps(mat_a_diag_inv[5], mat_a_diag_inv[5], 0x11); //(Row5): FMA operations of b5 with elements of indices from (5, 0) uptill (7, 0) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_cols_rearr[19], mat_b_rearr[4], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_cols_rearr[25], mat_b_rearr[4], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[32], mat_b_rearr[4], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(5, 5) element with 5th row elements of B mat_b_rearr[5] = _mm256_mul_ps(mat_b_rearr[5], mat_a_diag_inv[5]); //extract diag a66 from a mat_a_diag_inv[6] = _mm256_permute_ps(reciprocal_diags, 0xAA); mat_a_diag_inv[6] = _mm256_permute2f128_ps(mat_a_diag_inv[6], mat_a_diag_inv[6], 0x11); //(Row6): FMA operations of b6 with elements of indices from (6, 0) uptill (7, 0) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_cols_rearr[26], mat_b_rearr[5], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[33], mat_b_rearr[5], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(6, 6) element with 6th row elements of B mat_b_rearr[6] = _mm256_mul_ps(mat_b_rearr[6], mat_a_diag_inv[6]); //extract diag a77 from a mat_a_diag_inv[7] = _mm256_permute_ps(reciprocal_diags, 0xFF); mat_a_diag_inv[7] = _mm256_permute2f128_ps(mat_a_diag_inv[7], mat_a_diag_inv[7], 0x11); //(Row7): FMA operations of b7 with elements of index (7, 0) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[34], mat_b_rearr[6], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(7, 7) element with 7th row elements of B mat_b_rearr[7] = _mm256_mul_ps(mat_b_rearr[7], mat_a_diag_inv[7]); //--> Transpose and store results of columns of B block <--// ////unpacklow//// mat_a_cols[0] = _mm256_unpacklo_ps(mat_b_rearr[0], mat_b_rearr[1]); mat_a_cols[1] = _mm256_unpacklo_ps(mat_b_rearr[2], mat_b_rearr[3]); mat_a_cols[2] = _mm256_unpacklo_ps(mat_b_rearr[4], mat_b_rearr[5]); mat_a_cols[3] = _mm256_unpacklo_ps(mat_b_rearr[6], mat_b_rearr[7]); //Rearrange low elements #if REARRANGE_SHFL == 1 mat_a_cols[4] = _mm256_shuffle_ps(mat_a_cols[0], mat_a_cols[1], 0x44); mat_a_cols[5] = _mm256_shuffle_ps(mat_a_cols[0], mat_a_cols[1], 0xEE); mat_a_cols[6] = _mm256_shuffle_ps(mat_a_cols[2], mat_a_cols[3], 0x44); mat_a_cols[7] = _mm256_shuffle_ps(mat_a_cols[2], mat_a_cols[3], 0xEE); #else mat_a_cols[6] = _mm256_shuffle_ps(mat_a_cols[0], mat_a_cols[1], 0x4E); mat_a_cols[7] = _mm256_shuffle_ps(mat_a_cols[2], mat_a_cols[3], 0x4E); mat_a_cols[4] = _mm256_blend_ps(mat_a_cols[0], mat_a_cols[6], 0xCC); mat_a_cols[5] = _mm256_blend_ps(mat_a_cols[1], mat_a_cols[6], 0x33); mat_a_cols[6] = _mm256_blend_ps(mat_a_cols[2], mat_a_cols[7], 0xCC); mat_a_cols[7] = _mm256_blend_ps(mat_a_cols[3], mat_a_cols[7], 0x33); #endif //Merge rearranged low elements into complete rows mat_a_cols[0] = _mm256_permute2f128_ps(mat_a_cols[4], mat_a_cols[6], 0x20); mat_a_cols[4] = _mm256_permute2f128_ps(mat_a_cols[4], mat_a_cols[6], 0x31); mat_a_cols[1] = _mm256_permute2f128_ps(mat_a_cols[5], mat_a_cols[7], 0x20); mat_a_cols[5] = _mm256_permute2f128_ps(mat_a_cols[5], mat_a_cols[7], 0x31); ////unpackhigh//// mat_b_rearr[0] = _mm256_unpackhi_ps(mat_b_rearr[0], mat_b_rearr[1]); mat_b_rearr[1] = _mm256_unpackhi_ps(mat_b_rearr[2], mat_b_rearr[3]); mat_b_rearr[2] = _mm256_unpackhi_ps(mat_b_rearr[4], mat_b_rearr[5]); mat_b_rearr[3] = _mm256_unpackhi_ps(mat_b_rearr[6], mat_b_rearr[7]); //Rearrange high elements #if REARRANGE_SHFL == 1 mat_b_rearr[4] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x44); mat_b_rearr[5] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0xEE); mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x44); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0xEE); #else mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x4E); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x4E); mat_b_rearr[4] = _mm256_blend_ps(mat_b_rearr[0], mat_b_rearr[6], 0xCC); mat_b_rearr[5] = _mm256_blend_ps(mat_b_rearr[1], mat_b_rearr[6], 0x33); mat_b_rearr[6] = _mm256_blend_ps(mat_b_rearr[2], mat_b_rearr[7], 0xCC); mat_b_rearr[7] = _mm256_blend_ps(mat_b_rearr[3], mat_b_rearr[7], 0x33); #endif //Merge rearranged high elements into complete rows mat_a_cols[2] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x20); mat_a_cols[6] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x31); mat_a_cols[3] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x20); mat_a_cols[7] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x31); //Read next set of B columns ptr_b += (cs_b + cs_b_offset[5]); mat_b_col[0] = _mm256_loadu_ps((float const *)ptr_b); mat_b_col[1] = _mm256_loadu_ps((float const *)(ptr_b + (cs_b))); mat_b_col[2] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[0])); mat_b_col[3] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[1])); mat_b_col[4] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[2])); mat_b_col[5] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[3])); mat_b_col[6] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[4])); mat_b_col[7] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[5])); //Store the computed B columns _mm256_storeu_ps((float *)ptr_b_dup, mat_a_cols[0]); _mm256_storeu_ps((float *)(ptr_b_dup + (cs_b)), mat_a_cols[1]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[0]), mat_a_cols[2]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[1]), mat_a_cols[3]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[2]), mat_a_cols[4]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[3]), mat_a_cols[5]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[4]), mat_a_cols[6]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[5]), mat_a_cols[7]); //end loop of cols } //Last block trsm processing ptr_b_dup = ptr_b; /*Shuffle to rearrange/transpose 16x8 block of B into contiguous row-wise registers*/ ////unpacklow//// mat_b_rearr[0] = _mm256_unpacklo_ps(mat_b_col[0], mat_b_col[1]); mat_b_rearr[1] = _mm256_unpacklo_ps(mat_b_col[2], mat_b_col[3]); mat_b_rearr[2] = _mm256_unpacklo_ps(mat_b_col[4], mat_b_col[5]); mat_b_rearr[3] = _mm256_unpacklo_ps(mat_b_col[6], mat_b_col[7]); //Rearrange low elements #if REARRANGE_SHFL == 1 mat_b_rearr[4] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x44); mat_b_rearr[5] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0xEE); mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x44); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0xEE); #else mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x4E); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x4E); mat_b_rearr[4] = _mm256_blend_ps(mat_b_rearr[0], mat_b_rearr[6], 0xCC); mat_b_rearr[5] = _mm256_blend_ps(mat_b_rearr[1], mat_b_rearr[6], 0x33); mat_b_rearr[6] = _mm256_blend_ps(mat_b_rearr[2], mat_b_rearr[7], 0xCC); mat_b_rearr[7] = _mm256_blend_ps(mat_b_rearr[3], mat_b_rearr[7], 0x33); #endif //Merge rearranged low elements into complete rows mat_b_rearr[0] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x20); mat_b_rearr[4] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x31); mat_b_rearr[1] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x20); mat_b_rearr[5] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x31); ////unpackhigh//// mat_b_col[0] = _mm256_unpackhi_ps(mat_b_col[0], mat_b_col[1]); mat_b_col[1] = _mm256_unpackhi_ps(mat_b_col[2], mat_b_col[3]); mat_b_col[2] = _mm256_unpackhi_ps(mat_b_col[4], mat_b_col[5]); mat_b_col[3] = _mm256_unpackhi_ps(mat_b_col[6], mat_b_col[7]); //Rearrange high elements #if REARRANGE_SHFL == 1 mat_b_col[4] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x44); mat_b_col[5] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0xEE); mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x44); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0xEE); #else mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x4E); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x4E); mat_b_col[4] = _mm256_blend_ps(mat_b_col[0], mat_b_col[6], 0xCC); mat_b_col[5] = _mm256_blend_ps(mat_b_col[1], mat_b_col[6], 0x33); mat_b_col[6] = _mm256_blend_ps(mat_b_col[2], mat_b_col[7], 0xCC); mat_b_col[7] = _mm256_blend_ps(mat_b_col[3], mat_b_col[7], 0x33); #endif //extract diag a00 from a mat_a_diag_inv[0] = _mm256_permute_ps(reciprocal_diags, 0x00); mat_a_diag_inv[0] = _mm256_permute2f128_ps(mat_a_diag_inv[0], mat_a_diag_inv[0], 0x00); //(Row0): Perform mul operation of reciprocal of L(0,0) element with 1st row elements of B mat_b_rearr[0] = _mm256_mul_ps(mat_b_rearr[0], mat_a_diag_inv[0]); //Merge rearranged high elements into complete rows mat_b_rearr[2] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x20); mat_b_rearr[6] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x31); mat_b_rearr[3] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x20); mat_b_rearr[7] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x31); //extract diag a11 from a mat_a_diag_inv[1] = _mm256_permute_ps(reciprocal_diags, 0x55); mat_a_diag_inv[1] = _mm256_permute2f128_ps(mat_a_diag_inv[1], mat_a_diag_inv[1], 0x00); //(Row1): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_cols_rearr[1], mat_b_rearr[0], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_cols_rearr[3], mat_b_rearr[0], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_cols_rearr[6], mat_b_rearr[0], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_cols_rearr[10], mat_b_rearr[0], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_cols_rearr[15], mat_b_rearr[0], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_cols_rearr[21], mat_b_rearr[0], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[28], mat_b_rearr[0], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(1,1) element with 2nd row elements of B mat_b_rearr[1] = _mm256_mul_ps(mat_b_rearr[1], mat_a_diag_inv[1]); //extract diag a22 from a mat_a_diag_inv[2] = _mm256_permute_ps(reciprocal_diags, 0xAA); mat_a_diag_inv[2] = _mm256_permute2f128_ps(mat_a_diag_inv[2], mat_a_diag_inv[2], 0x00); //(Row2): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_cols_rearr[4], mat_b_rearr[1], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_cols_rearr[7], mat_b_rearr[1], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_cols_rearr[11], mat_b_rearr[1], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_cols_rearr[16], mat_b_rearr[1], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_cols_rearr[22], mat_b_rearr[1], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[29], mat_b_rearr[1], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(2, 2) element with 3rd row elements of B mat_b_rearr[2] = _mm256_mul_ps(mat_b_rearr[2], mat_a_diag_inv[2]); //extract diag a33 from a mat_a_diag_inv[3] = _mm256_permute_ps(reciprocal_diags, 0xFF); mat_a_diag_inv[3] = _mm256_permute2f128_ps(mat_a_diag_inv[3], mat_a_diag_inv[3], 0x00); //(Row3): FMA operations of b3 with elements of indices from (3, 0) uptill (7, 0) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_cols_rearr[8], mat_b_rearr[2], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_cols_rearr[12], mat_b_rearr[2], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_cols_rearr[17], mat_b_rearr[2], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_cols_rearr[23], mat_b_rearr[2], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[30], mat_b_rearr[2], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(3, 3) element with 4rth row elements of B mat_b_rearr[3] = _mm256_mul_ps(mat_b_rearr[3], mat_a_diag_inv[3]); //extract diag a44 from a mat_a_diag_inv[4] = _mm256_permute_ps(reciprocal_diags, 0x00); mat_a_diag_inv[4] = _mm256_permute2f128_ps(mat_a_diag_inv[4], mat_a_diag_inv[4], 0x11); //(Row4): FMA operations of b4 with elements of indices from (4, 0) uptill (7, 0) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_cols_rearr[13], mat_b_rearr[3], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_cols_rearr[18], mat_b_rearr[3], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_cols_rearr[24], mat_b_rearr[3], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[31], mat_b_rearr[3], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(4, 4) element with 4rth row elements of B mat_b_rearr[4] = _mm256_mul_ps(mat_b_rearr[4], mat_a_diag_inv[4]); //extract diag a55 from a mat_a_diag_inv[5] = _mm256_permute_ps(reciprocal_diags, 0x55); mat_a_diag_inv[5] = _mm256_permute2f128_ps(mat_a_diag_inv[5], mat_a_diag_inv[5], 0x11); //(Row5): FMA operations of b5 with elements of indices from (5, 0) uptill (7, 0) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_cols_rearr[19], mat_b_rearr[4], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_cols_rearr[25], mat_b_rearr[4], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[32], mat_b_rearr[4], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(5, 5) element with 5th row elements of B mat_b_rearr[5] = _mm256_mul_ps(mat_b_rearr[5], mat_a_diag_inv[5]); //extract diag a66 from a mat_a_diag_inv[6] = _mm256_permute_ps(reciprocal_diags, 0xAA); mat_a_diag_inv[6] = _mm256_permute2f128_ps(mat_a_diag_inv[6], mat_a_diag_inv[6], 0x11); //(Row6): FMA operations of b6 with elements of indices from (6, 0) uptill (7, 0) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_cols_rearr[26], mat_b_rearr[5], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[33], mat_b_rearr[5], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(6, 6) element with 6th row elements of B mat_b_rearr[6] = _mm256_mul_ps(mat_b_rearr[6], mat_a_diag_inv[6]); //extract diag a77 from a mat_a_diag_inv[7] = _mm256_permute_ps(reciprocal_diags, 0xFF); mat_a_diag_inv[7] = _mm256_permute2f128_ps(mat_a_diag_inv[7], mat_a_diag_inv[7], 0x11); //(Row7): FMA operations of b7 with elements of index (7, 0) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_cols_rearr[34], mat_b_rearr[6], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(7, 7) element with 7th row elements of B mat_b_rearr[7] = _mm256_mul_ps(mat_b_rearr[7], mat_a_diag_inv[7]); //--> Transpose and store results of columns of B block <--// ////unpacklow//// mat_a_cols[0] = _mm256_unpacklo_ps(mat_b_rearr[0], mat_b_rearr[1]); mat_a_cols[1] = _mm256_unpacklo_ps(mat_b_rearr[2], mat_b_rearr[3]); mat_a_cols[2] = _mm256_unpacklo_ps(mat_b_rearr[4], mat_b_rearr[5]); mat_a_cols[3] = _mm256_unpacklo_ps(mat_b_rearr[6], mat_b_rearr[7]); //Rearrange low elements #if REARRANGE_SHFL == 1 mat_a_cols[4] = _mm256_shuffle_ps(mat_a_cols[0], mat_a_cols[1], 0x44); mat_a_cols[5] = _mm256_shuffle_ps(mat_a_cols[0], mat_a_cols[1], 0xEE); mat_a_cols[6] = _mm256_shuffle_ps(mat_a_cols[2], mat_a_cols[3], 0x44); mat_a_cols[7] = _mm256_shuffle_ps(mat_a_cols[2], mat_a_cols[3], 0xEE); #else mat_a_cols[6] = _mm256_shuffle_ps(mat_a_cols[0], mat_a_cols[1], 0x4E); mat_a_cols[7] = _mm256_shuffle_ps(mat_a_cols[2], mat_a_cols[3], 0x4E); mat_a_cols[4] = _mm256_blend_ps(mat_a_cols[0], mat_a_cols[6], 0xCC); mat_a_cols[5] = _mm256_blend_ps(mat_a_cols[1], mat_a_cols[6], 0x33); mat_a_cols[6] = _mm256_blend_ps(mat_a_cols[2], mat_a_cols[7], 0xCC); mat_a_cols[7] = _mm256_blend_ps(mat_a_cols[3], mat_a_cols[7], 0x33); #endif //Merge rearranged low elements into complete rows mat_a_cols[0] = _mm256_permute2f128_ps(mat_a_cols[4], mat_a_cols[6], 0x20); mat_a_cols[4] = _mm256_permute2f128_ps(mat_a_cols[4], mat_a_cols[6], 0x31); mat_a_cols[1] = _mm256_permute2f128_ps(mat_a_cols[5], mat_a_cols[7], 0x20); mat_a_cols[5] = _mm256_permute2f128_ps(mat_a_cols[5], mat_a_cols[7], 0x31); ////unpackhigh//// mat_b_rearr[0] = _mm256_unpackhi_ps(mat_b_rearr[0], mat_b_rearr[1]); mat_b_rearr[1] = _mm256_unpackhi_ps(mat_b_rearr[2], mat_b_rearr[3]); mat_b_rearr[2] = _mm256_unpackhi_ps(mat_b_rearr[4], mat_b_rearr[5]); mat_b_rearr[3] = _mm256_unpackhi_ps(mat_b_rearr[6], mat_b_rearr[7]); //Rearrange high elements #if REARRANGE_SHFL == 1 mat_b_rearr[4] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x44); mat_b_rearr[5] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0xEE); mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x44); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0xEE); #else mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x4E); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x4E); mat_b_rearr[4] = _mm256_blend_ps(mat_b_rearr[0], mat_b_rearr[6], 0xCC); mat_b_rearr[5] = _mm256_blend_ps(mat_b_rearr[1], mat_b_rearr[6], 0x33); mat_b_rearr[6] = _mm256_blend_ps(mat_b_rearr[2], mat_b_rearr[7], 0xCC); mat_b_rearr[7] = _mm256_blend_ps(mat_b_rearr[3], mat_b_rearr[7], 0x33); #endif //Merge rearranged high elements into complete rows mat_a_cols[2] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x20); mat_a_cols[6] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x31); mat_a_cols[3] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x20); mat_a_cols[7] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x31); //Store the computed B columns _mm256_storeu_ps((float *)ptr_b_dup, mat_a_cols[0]); _mm256_storeu_ps((float *)(ptr_b_dup + (cs_b)), mat_a_cols[1]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[0]), mat_a_cols[2]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[1]), mat_a_cols[3]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[2]), mat_a_cols[4]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[3]), mat_a_cols[5]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[4]), mat_a_cols[6]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[5]), mat_a_cols[7]); //end loop of cols } static void blis_dtrsm_microkernel_alpha(double *ptr_l, double *ptr_b, int m, int n, int rs_l, int rs_b, int cs_l, int cs_b, double alphaVal ) { int j; int n_remainder = n%4; int cs_b_offset[2]; double *ptr_b_dup; double ones = 1.0; __m256d mat_b_col[4]; __m256d mat_b_rearr[4]; __m256d mat_a_cols[4]; __m256d mat_a_cols_rearr[10]; __m256d mat_a_diag_inv[4]; __m256d reciprocal_diags; __m256d alphaReg; cs_b_offset[0] = (cs_b << 1); cs_b_offset[1] = cs_b + cs_b_offset[0]; reciprocal_diags = _mm256_broadcast_sd((double const *)&ones); alphaReg = _mm256_broadcast_sd((double const *)&alphaVal); //if(m % 4 == 0) //{ //1st col mat_a_cols_rearr[0] = _mm256_broadcast_sd((double const *)(ptr_l+0)); mat_a_cols_rearr[1] = _mm256_broadcast_sd((double const *)(ptr_l+1)); mat_a_cols_rearr[3] = _mm256_broadcast_sd((double const *)(ptr_l+2)); mat_a_cols_rearr[6] = _mm256_broadcast_sd((double const *)(ptr_l+3)); //2nd col ptr_l += cs_l; mat_a_cols_rearr[2] = _mm256_broadcast_sd((double const *)(ptr_l + 1)); mat_a_cols_rearr[4] = _mm256_broadcast_sd((double const *)(ptr_l + 2)); mat_a_cols_rearr[7] = _mm256_broadcast_sd((double const *)(ptr_l + 3)); //3rd col ptr_l += cs_l; mat_a_cols_rearr[5] = _mm256_broadcast_sd((double const *)(ptr_l + 2)); mat_a_cols_rearr[8] = _mm256_broadcast_sd((double const *)(ptr_l + 3)); //4th col ptr_l += cs_l; mat_a_cols_rearr[9] = _mm256_broadcast_sd((double const *)(ptr_l + 3)); //compute reciprocals of L(i,i) and broadcast in registers mat_a_diag_inv[0] = _mm256_unpacklo_pd(mat_a_cols_rearr[0], mat_a_cols_rearr[2]); mat_a_diag_inv[1] = _mm256_unpacklo_pd(mat_a_cols_rearr[5], mat_a_cols_rearr[9]); mat_a_diag_inv[0] = _mm256_blend_pd(mat_a_diag_inv[0], mat_a_diag_inv[1], 0x0C); reciprocal_diags = _mm256_div_pd(reciprocal_diags, mat_a_diag_inv[0]); for(j = 0;(j+3) < n; j += 4) { ptr_b_dup = ptr_b; /*Shuffle to rearrange/transpose 8x4 block of B into contiguous row-wise registers*/ //read first set of 4x4 block of B into registers mat_b_col[0] = _mm256_loadu_pd((double const *)ptr_b); mat_b_col[1] = _mm256_loadu_pd((double const *)(ptr_b + (cs_b))); //_mm_prefetch((char*)(ptr_l + cs_l), _MM_HINT_T0); mat_b_col[2] = _mm256_loadu_pd((double const *)(ptr_b + cs_b_offset[0])); //_mm_prefetch((char*)(ptr_l + row2), _MM_HINT_T0); mat_b_col[3] = _mm256_loadu_pd((double const *)(ptr_b + cs_b_offset[1])); ////unpacklow//// mat_b_rearr[1] = _mm256_unpacklo_pd(mat_b_col[0], mat_b_col[1]); mat_b_rearr[3] = _mm256_unpacklo_pd(mat_b_col[2], mat_b_col[3]); //rearrange low elements mat_b_rearr[0] = _mm256_permute2f128_pd(mat_b_rearr[1],mat_b_rearr[3],0x20); mat_b_rearr[2] = _mm256_permute2f128_pd(mat_b_rearr[1],mat_b_rearr[3],0x31); mat_b_rearr[0] = _mm256_mul_pd(mat_b_rearr[0], alphaReg); mat_b_rearr[2] = _mm256_mul_pd(mat_b_rearr[2], alphaReg); ////unpackhigh//// mat_b_col[0] = _mm256_unpackhi_pd(mat_b_col[0], mat_b_col[1]); mat_b_col[1] = _mm256_unpackhi_pd(mat_b_col[2], mat_b_col[3]); //rearrange high elements mat_b_rearr[1] = _mm256_permute2f128_pd(mat_b_col[0],mat_b_col[1],0x20); mat_b_rearr[3] = _mm256_permute2f128_pd(mat_b_col[0],mat_b_col[1],0x31); mat_b_rearr[1] = _mm256_mul_pd(mat_b_rearr[1], alphaReg); mat_b_rearr[3] = _mm256_mul_pd(mat_b_rearr[3], alphaReg); //extract a00 mat_a_diag_inv[0] = _mm256_permute_pd(reciprocal_diags, 0x00); mat_a_diag_inv[0] = _mm256_permute2f128_pd(mat_a_diag_inv[0], mat_a_diag_inv[0], 0x00); //(Row0): Perform mul operation of reciprocal of L(0,0) element with 1st row elements of B mat_b_rearr[0] = _mm256_mul_pd(mat_b_rearr[0], mat_a_diag_inv[0]); //extract diag a11 from a mat_a_diag_inv[1] = _mm256_permute_pd(reciprocal_diags, 0x03); mat_a_diag_inv[1] = _mm256_permute2f128_pd(mat_a_diag_inv[1], mat_a_diag_inv[1], 0x00); //(Row1): FMA operations of b1 with elements of indices from (1, 0) uptill (3, 0) mat_b_rearr[1] = _mm256_fnmadd_pd(mat_a_cols_rearr[1], mat_b_rearr[0], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_pd(mat_a_cols_rearr[3], mat_b_rearr[0], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_pd(mat_a_cols_rearr[6], mat_b_rearr[0], mat_b_rearr[3]);//d = c - (a*b) //Perform mul operation of reciprocal of L(1,1) element with 2nd row elements of B mat_b_rearr[1] = _mm256_mul_pd(mat_b_rearr[1], mat_a_diag_inv[1]); //extract diag a22 from a mat_a_diag_inv[2] = _mm256_permute_pd(reciprocal_diags, 0x00); mat_a_diag_inv[2] = _mm256_permute2f128_pd(mat_a_diag_inv[2], mat_a_diag_inv[2], 0x11); //(Row2): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[2] = _mm256_fnmadd_pd(mat_a_cols_rearr[4], mat_b_rearr[1], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_pd(mat_a_cols_rearr[7], mat_b_rearr[1], mat_b_rearr[3]);//d = c - (a*b) //Perform mul operation of reciprocal of L(2, 2) element with 3rd row elements of B mat_b_rearr[2] = _mm256_mul_pd(mat_b_rearr[2], mat_a_diag_inv[2]); //extract diag a33 from a mat_a_diag_inv[3] = _mm256_permute_pd(reciprocal_diags, 0x0C); mat_a_diag_inv[3] = _mm256_permute2f128_pd(mat_a_diag_inv[3], mat_a_diag_inv[3], 0x11); //(Row3): FMA operations of b3 with elements of indices from (3, 0) uptill (7, 0) mat_b_rearr[3] = _mm256_fnmadd_pd(mat_a_cols_rearr[8], mat_b_rearr[2], mat_b_rearr[3]);//d = c - (a*b) //Perform mul operation of reciprocal of L(3, 3) element with 4rth row elements of B mat_b_rearr[3] = _mm256_mul_pd(mat_b_rearr[3], mat_a_diag_inv[3]); //--> Transpose and store results of columns of B block <--// ////unpacklow//// mat_a_cols[1] = _mm256_unpacklo_pd(mat_b_rearr[0], mat_b_rearr[1]); mat_a_cols[3] = _mm256_unpacklo_pd(mat_b_rearr[2], mat_b_rearr[3]); //rearrange low elements mat_a_cols[0] = _mm256_permute2f128_pd(mat_a_cols[1],mat_a_cols[3],0x20); mat_a_cols[2] = _mm256_permute2f128_pd(mat_a_cols[1],mat_a_cols[3],0x31); ////unpackhigh//// mat_b_rearr[0] = _mm256_unpackhi_pd(mat_b_rearr[0], mat_b_rearr[1]); mat_b_rearr[1] = _mm256_unpackhi_pd(mat_b_rearr[2], mat_b_rearr[3]); //rearrange high elements mat_a_cols[1] = _mm256_permute2f128_pd(mat_b_rearr[0],mat_b_rearr[1],0x20); mat_a_cols[3] = _mm256_permute2f128_pd(mat_b_rearr[0],mat_b_rearr[1],0x31); //Read next set of B columns ptr_b += (cs_b+cs_b_offset[1]); _mm256_storeu_pd((double *)ptr_b_dup, mat_a_cols[0]); _mm256_storeu_pd((double *)(ptr_b_dup + (cs_b)), mat_a_cols[1]); _mm256_storeu_pd((double *)(ptr_b_dup + cs_b_offset[0]), mat_a_cols[2]); _mm256_storeu_pd((double *)(ptr_b_dup + cs_b_offset[1]), mat_a_cols[3]); } ptr_b_dup = ptr_b; if(n_remainder == 3) { //read first set of 4x4 block of B into registers mat_b_col[0] = _mm256_loadu_pd((double const *)ptr_b); mat_b_col[1] = _mm256_loadu_pd((double const *)(ptr_b + (cs_b))); mat_b_col[2] = _mm256_loadu_pd((double const *)(ptr_b + cs_b_offset[0])); mat_b_col[3] = _mm256_broadcast_sd((double const *)&ones); } if(n_remainder == 2) { //read first set of 4x4 block of B into registers mat_b_col[0] = _mm256_loadu_pd((double const *)ptr_b); mat_b_col[1] = _mm256_loadu_pd((double const *)(ptr_b + (cs_b))); mat_b_col[2] = _mm256_broadcast_sd((double const *)&ones); mat_b_col[3] = _mm256_broadcast_sd((double const *)&ones); } if(n_remainder == 1) { //read first set of 4x4 block of B into registers mat_b_col[0] = _mm256_loadu_pd((double const *)ptr_b); mat_b_col[1] = _mm256_broadcast_sd((double const *)&ones); mat_b_col[2] = _mm256_broadcast_sd((double const *)&ones); mat_b_col[3] = _mm256_broadcast_sd((double const *)&ones); } /*Shuffle to rearrange/transpose 8x4 block of B into contiguous row-wise registers*/ ////unpacklow//// mat_b_rearr[1] = _mm256_unpacklo_pd(mat_b_col[0], mat_b_col[1]); mat_b_rearr[3] = _mm256_unpacklo_pd(mat_b_col[2], mat_b_col[3]); //rearrange low elements mat_b_rearr[0] = _mm256_permute2f128_pd(mat_b_rearr[1],mat_b_rearr[3],0x20); mat_b_rearr[2] = _mm256_permute2f128_pd(mat_b_rearr[1],mat_b_rearr[3],0x31); mat_b_rearr[0] = _mm256_mul_pd(mat_b_rearr[0], alphaReg); mat_b_rearr[2] = _mm256_mul_pd(mat_b_rearr[2], alphaReg); ////unpackhigh//// mat_b_col[0] = _mm256_unpackhi_pd(mat_b_col[0], mat_b_col[1]); mat_b_col[1] = _mm256_unpackhi_pd(mat_b_col[2], mat_b_col[3]); //rearrange high elements mat_b_rearr[1] = _mm256_permute2f128_pd(mat_b_col[0],mat_b_col[1],0x20); mat_b_rearr[3] = _mm256_permute2f128_pd(mat_b_col[0],mat_b_col[1],0x31); mat_b_rearr[1] = _mm256_mul_pd(mat_b_rearr[1], alphaReg); mat_b_rearr[3] = _mm256_mul_pd(mat_b_rearr[3], alphaReg); //extract a00 mat_a_diag_inv[0] = _mm256_permute_pd(reciprocal_diags, 0x00); mat_a_diag_inv[0] = _mm256_permute2f128_pd(mat_a_diag_inv[0], mat_a_diag_inv[0], 0x00); //(Row0): Perform mul operation of reciprocal of L(0,0) element with 1st row elements of B mat_b_rearr[0] = _mm256_mul_pd(mat_b_rearr[0], mat_a_diag_inv[0]); //extract diag a11 from a mat_a_diag_inv[1] = _mm256_permute_pd(reciprocal_diags, 0x03); mat_a_diag_inv[1] = _mm256_permute2f128_pd(mat_a_diag_inv[1], mat_a_diag_inv[1], 0x00); //(Row1): FMA operations of b1 with elements of indices from (1, 0) uptill (3, 0) mat_b_rearr[1] = _mm256_fnmadd_pd(mat_a_cols_rearr[1], mat_b_rearr[0], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_pd(mat_a_cols_rearr[3], mat_b_rearr[0], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_pd(mat_a_cols_rearr[6], mat_b_rearr[0], mat_b_rearr[3]);//d = c - (a*b) //Perform mul operation of reciprocal of L(1,1) element with 2nd row elements of B mat_b_rearr[1] = _mm256_mul_pd(mat_b_rearr[1], mat_a_diag_inv[1]); //extract diag a22 from a mat_a_diag_inv[2] = _mm256_permute_pd(reciprocal_diags, 0x00); mat_a_diag_inv[2] = _mm256_permute2f128_pd(mat_a_diag_inv[2], mat_a_diag_inv[2], 0x11); //(Row2): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[2] = _mm256_fnmadd_pd(mat_a_cols_rearr[4], mat_b_rearr[1], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_pd(mat_a_cols_rearr[7], mat_b_rearr[1], mat_b_rearr[3]);//d = c - (a*b) //Perform mul operation of reciprocal of L(2, 2) element with 3rd row elements of B mat_b_rearr[2] = _mm256_mul_pd(mat_b_rearr[2], mat_a_diag_inv[2]); //extract diag a33 from a mat_a_diag_inv[3] = _mm256_permute_pd(reciprocal_diags, 0x0C); mat_a_diag_inv[3] = _mm256_permute2f128_pd(mat_a_diag_inv[3], mat_a_diag_inv[3], 0x11); //(Row3): FMA operations of b3 with elements of indices from (3, 0) uptill (7, 0) mat_b_rearr[3] = _mm256_fnmadd_pd(mat_a_cols_rearr[8], mat_b_rearr[2], mat_b_rearr[3]);//d = c - (a*b) //Perform mul operation of reciprocal of L(3, 3) element with 4rth row elements of B mat_b_rearr[3] = _mm256_mul_pd(mat_b_rearr[3], mat_a_diag_inv[3]); //--> Transpose and store results of columns of B block <--// ////unpacklow//// mat_a_cols[1] = _mm256_unpacklo_pd(mat_b_rearr[0], mat_b_rearr[1]); mat_a_cols[3] = _mm256_unpacklo_pd(mat_b_rearr[2], mat_b_rearr[3]); //rearrange low elements mat_a_cols[0] = _mm256_permute2f128_pd(mat_a_cols[1],mat_a_cols[3],0x20); mat_a_cols[2] = _mm256_permute2f128_pd(mat_a_cols[1],mat_a_cols[3],0x31); ////unpackhigh//// mat_b_rearr[0] = _mm256_unpackhi_pd(mat_b_rearr[0], mat_b_rearr[1]); mat_b_rearr[1] = _mm256_unpackhi_pd(mat_b_rearr[2], mat_b_rearr[3]); //rearrange high elements mat_a_cols[1] = _mm256_permute2f128_pd(mat_b_rearr[0],mat_b_rearr[1],0x20); mat_a_cols[3] = _mm256_permute2f128_pd(mat_b_rearr[0],mat_b_rearr[1],0x31); //Store the computed B columns if(n_remainder == 3) { _mm256_storeu_pd((double *)ptr_b_dup, mat_a_cols[0]); _mm256_storeu_pd((double *)(ptr_b_dup + (cs_b)), mat_a_cols[1]); _mm256_storeu_pd((double *)(ptr_b_dup + cs_b_offset[0]), mat_a_cols[2]); } if(n_remainder == 2) { _mm256_storeu_pd((double *)ptr_b_dup, mat_a_cols[0]); _mm256_storeu_pd((double *)(ptr_b_dup + (cs_b)), mat_a_cols[1]); } if(n_remainder == 1) { _mm256_storeu_pd((double *)ptr_b_dup, mat_a_cols[0]); } //} } #if OPT_CACHE_BLOCKING_L1 //new intrinsic kernels static void trsm_XAtB_block_allSmallSizedMatrices(float *ptr_l, float *ptr_b, int numRows_lb, int numCols_b, int rs_l, int rs_b, int cs_l, int cs_b) { float ones = 1.0; int i, i1, i2, i3, i4, j, k, l, r; int cs_b_offset[7]; int cs_l_offset[7]; float *ptr_b_dup, *ptr_l_dup; //57 number of ymm(256 bits) registers used __m256 mat_b_col[8]; __m256 mat_b_rearr[8]; __m256 mat_a_blk_elems[8]; __m256 mat_a_diag_inv[8]; __m256 reciprocal_diags[2]; reciprocal_diags[0] = _mm256_broadcast_ss((float const *)(&ones)); // ---> considering that the matrix size is multiple of 16 rows and 8 cols <--- // //L matrix offsets cs_l_offset[0] = (cs_l << 1); cs_l_offset[1] = cs_l + cs_l_offset[0]; cs_l_offset[2] = (cs_l << 2); cs_l_offset[3] = cs_l + cs_l_offset[2]; cs_l_offset[4] = cs_l_offset[0] + cs_l_offset[2]; cs_l_offset[5] = cs_l + cs_l_offset[4]; cs_l_offset[6] = (cs_l_offset[5] + cs_l); //read diag elems of L 16x16 block mat_a_blk_elems[0] = _mm256_loadu_ps((float const *)ptr_l); mat_a_blk_elems[1] = _mm256_loadu_ps((float const *)ptr_l + cs_l); mat_a_blk_elems[2] = _mm256_loadu_ps((float const *)ptr_l + cs_l_offset[0]); mat_a_blk_elems[3] = _mm256_loadu_ps((float const *)ptr_l + cs_l_offset[1]); mat_a_blk_elems[4] = _mm256_loadu_ps((float const *)ptr_l + cs_l_offset[2]); mat_a_blk_elems[5] = _mm256_loadu_ps((float const *)ptr_l + cs_l_offset[3]); mat_a_blk_elems[6] = _mm256_loadu_ps((float const *)ptr_l + cs_l_offset[4]); mat_a_blk_elems[7] = _mm256_loadu_ps((float const *)ptr_l + cs_l_offset[5]); cs_b_offset[0] = (cs_b << 1); cs_b_offset[1] = cs_b + cs_b_offset[0]; cs_b_offset[2] = (cs_b << 2); cs_b_offset[3] = cs_b + cs_b_offset[2]; cs_b_offset[4] = cs_b_offset[0] + cs_b_offset[2]; cs_b_offset[5] = cs_b + cs_b_offset[4]; cs_b_offset[6] = (cs_b_offset[5] + cs_b); reciprocal_diags[1] = reciprocal_diags[0]; //pack first 8 diags together mat_a_diag_inv[0] = _mm256_blend_ps(mat_a_blk_elems[0], mat_a_blk_elems[1], 0xAA);//diag 0,1 mat_a_diag_inv[1] = _mm256_blend_ps(mat_a_blk_elems[2], mat_a_blk_elems[3], 0xAA);//diag 2,3 mat_a_diag_inv[2] = _mm256_blend_ps(mat_a_blk_elems[4], mat_a_blk_elems[5], 0xAA);//diag 4,5 mat_a_diag_inv[3] = _mm256_blend_ps(mat_a_blk_elems[6], mat_a_blk_elems[7], 0xAA);//diag 6,7 mat_a_diag_inv[0] = _mm256_blend_ps(mat_a_diag_inv[0], mat_a_diag_inv[1], 0xCC);//diag 0,1,2,3 mat_a_diag_inv[2] = _mm256_blend_ps(mat_a_diag_inv[2], mat_a_diag_inv[3], 0xCC);//diag 4,5,6,7 mat_a_diag_inv[0] = _mm256_blend_ps(mat_a_diag_inv[0], mat_a_diag_inv[2], 0xF0);//diag 0,1,2,3,4,5,6,7 //reciprocal of diagnal elements 0,1,2,3,4,5,6,7 reciprocal_diags[0] = _mm256_div_ps(reciprocal_diags[0], mat_a_diag_inv[0]); //extract diag a00 from a mat_a_diag_inv[0] = _mm256_permute_ps(reciprocal_diags[0], 0x00); mat_a_diag_inv[0] = _mm256_permute2f128_ps(mat_a_diag_inv[0], mat_a_diag_inv[0], 0x00); //mat_a_diag_inv[0] = _mm256_unpacklo_ps(mat_a_diag_inv[0], mat_a_diag_inv[0]); //extract diag a11 from a mat_a_diag_inv[1] = _mm256_permute_ps(reciprocal_diags[0], 0x55); mat_a_diag_inv[1] = _mm256_permute2f128_ps(mat_a_diag_inv[1], mat_a_diag_inv[1], 0x00); //mat_a_diag_inv[1] = _mm256_unpacklo_ps(mat_a_diag_inv[1], mat_a_diag_inv[1]); //extract diag a22 from a mat_a_diag_inv[2] = _mm256_permute_ps(reciprocal_diags[0], 0xAA); mat_a_diag_inv[2] = _mm256_permute2f128_ps(mat_a_diag_inv[2], mat_a_diag_inv[2], 0x00); //mat_a_diag_inv[2] = _mm256_unpacklo_ps(mat_a_diag_inv[2], mat_a_diag_inv[2]); //extract diag a33 from a mat_a_diag_inv[3] = _mm256_permute_ps(reciprocal_diags[0], 0xFF); mat_a_diag_inv[3] = _mm256_permute2f128_ps(mat_a_diag_inv[3], mat_a_diag_inv[3], 0x00); //mat_a_diag_inv[3] = _mm256_unpacklo_ps(mat_a_diag_inv[3], mat_a_diag_inv[3]); //extract diag a44 from a mat_a_diag_inv[4] = _mm256_permute_ps(reciprocal_diags[0], 0x00); mat_a_diag_inv[4] = _mm256_permute2f128_ps(mat_a_diag_inv[4], mat_a_diag_inv[4], 0x11); //mat_a_diag_inv[4] = _mm256_unpacklo_ps(mat_a_diag_inv[4], mat_a_diag_inv[4]); //extract diag a55 from a mat_a_diag_inv[5] = _mm256_permute_ps(reciprocal_diags[0], 0x55); mat_a_diag_inv[5] = _mm256_permute2f128_ps(mat_a_diag_inv[5], mat_a_diag_inv[5], 0x11); //mat_a_diag_inv[5] = _mm256_unpacklo_ps(mat_a_diag_inv[5], mat_a_diag_inv[5]); //extract diag a66 from a mat_a_diag_inv[6] = _mm256_permute_ps(reciprocal_diags[0], 0xAA); mat_a_diag_inv[6] = _mm256_permute2f128_ps(mat_a_diag_inv[6], mat_a_diag_inv[6], 0x11); //mat_a_diag_inv[6] = _mm256_unpacklo_ps(mat_a_diag_inv[6], mat_a_diag_inv[6]); //extract diag a77 from a mat_a_diag_inv[7] = _mm256_permute_ps(reciprocal_diags[0], 0xFF); mat_a_diag_inv[7] = _mm256_permute2f128_ps(mat_a_diag_inv[7], mat_a_diag_inv[7], 0x11); //mat_a_diag_inv[7] = _mm256_unpacklo_ps(mat_a_diag_inv[7], mat_a_diag_inv[7]); /***************** first set of 8 rows of B processing starts *****************/ ptr_b_dup = ptr_b; i = 0; for (j = 0; j < numCols_b; j += 8) { /////////////////// Complete Upper 8x8 block trsm of B :- upper 8x8 block of B with upper 8x8 block of A //read 8x8 block of B into registers mat_b_col[0] = _mm256_loadu_ps((float const *)ptr_b + i); mat_b_col[1] = _mm256_loadu_ps((float const *)(ptr_b + cs_b + i)); mat_b_col[2] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[0] + i)); mat_b_col[3] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[1] + i)); mat_b_col[4] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[2] + i)); mat_b_col[5] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[3] + i)); mat_b_col[6] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[4] + i)); mat_b_col[7] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[5] + i)); //(Row0): Perform mul operation of reciprocal of L(0,0) element with 1st row elements of B mat_b_col[0] = _mm256_mul_ps(mat_b_col[0], mat_a_diag_inv[0]); mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + 1)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + 2)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + 3)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + 4)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + 5)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l + 6)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l + 7)); //(Row1): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_col[1] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[0], mat_b_col[1]);//d = c - (a*b) mat_b_col[2] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[0], mat_b_col[2]);//d = c - (a*b) mat_b_col[3] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[0], mat_b_col[3]);//d = c - (a*b) mat_b_col[4] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[0], mat_b_col[4]);//d = c - (a*b) mat_b_col[5] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[0], mat_b_col[5]);//d = c - (a*b) mat_b_col[6] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[0], mat_b_col[6]);//d = c - (a*b) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[0], mat_b_col[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(1,1) element with 2nd row elements of B mat_b_col[1] = _mm256_mul_ps(mat_b_col[1], mat_a_diag_inv[1]); mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 2)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 3)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 4)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 5)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 6)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 7)); //(Row2): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_col[2] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[1], mat_b_col[2]);//d = c - (a*b) mat_b_col[3] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[1], mat_b_col[3]);//d = c - (a*b) mat_b_col[4] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[1], mat_b_col[4]);//d = c - (a*b) mat_b_col[5] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[1], mat_b_col[5]);//d = c - (a*b) mat_b_col[6] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[1], mat_b_col[6]);//d = c - (a*b) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[1], mat_b_col[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(2, 2) element with 3rd row elements of B mat_b_col[2] = _mm256_mul_ps(mat_b_col[2], mat_a_diag_inv[2]); mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 3)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 4)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 5)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 6)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 7)); //(Row3): FMA operations of b3 with elements of indices from (3, 0) uptill (7, 0) mat_b_col[3] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[2], mat_b_col[3]);//d = c - (a*b) mat_b_col[4] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[2], mat_b_col[4]);//d = c - (a*b) mat_b_col[5] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[2], mat_b_col[5]);//d = c - (a*b) mat_b_col[6] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[2], mat_b_col[6]);//d = c - (a*b) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[2], mat_b_col[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(3, 3) element with 4rth row elements of B mat_b_col[3] = _mm256_mul_ps(mat_b_col[3], mat_a_diag_inv[3]); mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 4)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 5)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 6)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 7)); //(Row4): FMA operations of b4 with elements of indices from (4, 0) uptill (7, 0) mat_b_col[4] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[3], mat_b_col[4]);//d = c - (a*b) mat_b_col[5] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[3], mat_b_col[5]);//d = c - (a*b) mat_b_col[6] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[3], mat_b_col[6]);//d = c - (a*b) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[3], mat_b_col[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(4, 4) element with 4rth row elements of B mat_b_col[4] = _mm256_mul_ps(mat_b_col[4], mat_a_diag_inv[4]); mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + 5)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + 6)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + 7)); //(Row5): FMA operations of b5 with elements of indices from (5, 0) uptill (7, 0) mat_b_col[5] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[4], mat_b_col[5]);//d = c - (a*b) mat_b_col[6] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[4], mat_b_col[6]);//d = c - (a*b) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[4], mat_b_col[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(5, 5) element with 5th row elements of B mat_b_col[5] = _mm256_mul_ps(mat_b_col[5], mat_a_diag_inv[5]); mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + 6)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + 7)); //(Row6): FMA operations of b6 with elements of indices from (6, 0) uptill (7, 0) mat_b_col[6] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[5], mat_b_col[6]);//d = c - (a*b) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[5], mat_b_col[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(6, 6) element with 6th row elements of B mat_b_col[6] = _mm256_mul_ps(mat_b_col[6], mat_a_diag_inv[6]); mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[4] + 7)); //(Row7): FMA operations of b7 with elements of index (7, 0) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[6], mat_b_col[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(7, 7) element with 7th row elements of B mat_b_col[7] = _mm256_mul_ps(mat_b_col[7], mat_a_diag_inv[7]); //////////////////////////////////////////////////////////////////////////////// //Store the computed B columns _mm256_storeu_ps((float *)ptr_b_dup, mat_b_col[0]); _mm256_storeu_ps((float *)(ptr_b_dup + (cs_b)), mat_b_col[1]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[0]), mat_b_col[2]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[1]), mat_b_col[3]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[2]), mat_b_col[4]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[3]), mat_b_col[5]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[4]), mat_b_col[6]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[5]), mat_b_col[7]); //i += cs_b_offset[6]; //ptr_b_dup += cs_b_offset[6]; i += 8; ptr_b_dup += 8; } //c = 0; /***************** first set of 8 cols of B processing done *****************/ ptr_b_dup = ptr_b; i3 = 0; i1 = 0; //Start loop for cols of B to be processed in size of blk_width for (j = 8; j < numRows_lb; j += 8)//m :- 8x8 block row { ptr_l += 8; //ptr_b += j; //ptr_b_dup += 8; ptr_b_dup += cs_b_offset[6]; i1 += cs_b_offset[6]; //Read next 8x8 block of A to get diag elements i3 += cs_l_offset[6]; mat_a_blk_elems[0] = _mm256_loadu_ps((float const *)ptr_l + i3); mat_a_blk_elems[1] = _mm256_loadu_ps((float const *)ptr_l + i3 + cs_l); mat_a_blk_elems[2] = _mm256_loadu_ps((float const *)ptr_l + i3 + cs_l_offset[0]); mat_a_blk_elems[3] = _mm256_loadu_ps((float const *)ptr_l + i3 + cs_l_offset[1]); mat_a_blk_elems[4] = _mm256_loadu_ps((float const *)ptr_l + i3 + cs_l_offset[2]); mat_a_blk_elems[5] = _mm256_loadu_ps((float const *)ptr_l + i3 + cs_l_offset[3]); mat_a_blk_elems[6] = _mm256_loadu_ps((float const *)ptr_l + i3 + cs_l_offset[4]); mat_a_blk_elems[7] = _mm256_loadu_ps((float const *)ptr_l + i3 + cs_l_offset[5]); //pack 8 diags of A together reciprocal_diags[0] = reciprocal_diags[1]; mat_a_diag_inv[0] = _mm256_blend_ps(mat_a_blk_elems[0], mat_a_blk_elems[1], 0xAA);//diag 0,1 mat_a_diag_inv[1] = _mm256_blend_ps(mat_a_blk_elems[2], mat_a_blk_elems[3], 0xAA);//diag 2,3 mat_a_diag_inv[2] = _mm256_blend_ps(mat_a_blk_elems[4], mat_a_blk_elems[5], 0xAA);//diag 4,5 mat_a_diag_inv[3] = _mm256_blend_ps(mat_a_blk_elems[6], mat_a_blk_elems[7], 0xAA);//diag 6,7 mat_a_diag_inv[0] = _mm256_blend_ps(mat_a_diag_inv[0], mat_a_diag_inv[1], 0xCC);//diag 0,1,2,3 mat_a_diag_inv[2] = _mm256_blend_ps(mat_a_diag_inv[2], mat_a_diag_inv[3], 0xCC);//diag 4,5,6,7 mat_a_diag_inv[0] = _mm256_blend_ps(mat_a_diag_inv[0], mat_a_diag_inv[2], 0xF0);//diag 0,1,2,3,4,5,6,7 //reciprocal of diagnal elements of A :- 0,1,2,3,4,5,6,7 reciprocal_diags[0] = _mm256_div_ps(reciprocal_diags[0], mat_a_diag_inv[0]); //extract diag a00 from a mat_a_diag_inv[0] = _mm256_permute_ps(reciprocal_diags[0], 0x00); mat_a_diag_inv[0] = _mm256_permute2f128_ps(mat_a_diag_inv[0], mat_a_diag_inv[0], 0x00); //mat_a_diag_inv2[0] = _mm256_unpacklo_ps(mat_a_diag_inv2[0], mat_a_diag_inv2[0]); //extract diag a11 from a mat_a_diag_inv[1] = _mm256_permute_ps(reciprocal_diags[0], 0x55); mat_a_diag_inv[1] = _mm256_permute2f128_ps(mat_a_diag_inv[1], mat_a_diag_inv[1], 0x00); //mat_a_diag_inv[1] = _mm256_unpacklo_ps(mat_a_diag_inv[1], mat_a_diag_inv[1]); //extract diag a22 from a mat_a_diag_inv[2] = _mm256_permute_ps(reciprocal_diags[0], 0xAA); mat_a_diag_inv[2] = _mm256_permute2f128_ps(mat_a_diag_inv[2], mat_a_diag_inv[2], 0x00); //mat_a_diag_inv[2] = _mm256_unpacklo_ps(mat_a_diag_inv[2], mat_a_diag_inv[2]); //extract diag a33 from a mat_a_diag_inv[3] = _mm256_permute_ps(reciprocal_diags[0], 0xFF); mat_a_diag_inv[3] = _mm256_permute2f128_ps(mat_a_diag_inv[3], mat_a_diag_inv[3], 0x00); //mat_a_diag_inv[3] = _mm256_unpacklo_ps(mat_a_diag_inv[3], mat_a_diag_inv[3]); //extract diag a44 from a mat_a_diag_inv[4] = _mm256_permute_ps(reciprocal_diags[0], 0x00); mat_a_diag_inv[4] = _mm256_permute2f128_ps(mat_a_diag_inv[4], mat_a_diag_inv[4], 0x11); //mat_a_diag_inv[4] = _mm256_unpacklo_ps(mat_a_diag_inv[4], mat_a_diag_inv[4]); //extract diag a55 from a mat_a_diag_inv[5] = _mm256_permute_ps(reciprocal_diags[0], 0x55); mat_a_diag_inv[5] = _mm256_permute2f128_ps(mat_a_diag_inv[5], mat_a_diag_inv[5], 0x11); //mat_a_diag_inv[5] = _mm256_unpacklo_ps(mat_a_diag_inv[5], mat_a_diag_inv[5]); //extract diag a66 from a mat_a_diag_inv[6] = _mm256_permute_ps(reciprocal_diags[0], 0xAA); mat_a_diag_inv[6] = _mm256_permute2f128_ps(mat_a_diag_inv[6], mat_a_diag_inv[6], 0x11); //mat_a_diag_inv[6] = _mm256_unpacklo_ps(mat_a_diag_inv[6], mat_a_diag_inv[6]); //extract diag a77 from a mat_a_diag_inv[7] = _mm256_permute_ps(reciprocal_diags[0], 0xFF); mat_a_diag_inv[7] = _mm256_permute2f128_ps(mat_a_diag_inv[7], mat_a_diag_inv[7], 0x11); //mat_a_diag_inv[7] = _mm256_unpacklo_ps(mat_a_diag_inv[7], mat_a_diag_inv[7]); for (r = 0; r < numCols_b; r += GEMM_BLK_V1) { #if GEMM_ACCUM_A i = i1 + r; //Read 8 cols of B columns of Block-to-be-solved mat_b_rearr[0] = _mm256_loadu_ps((float const *)ptr_b + i); mat_b_rearr[1] = _mm256_loadu_ps((float const *)(ptr_b + cs_b + i)); mat_b_rearr[2] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[0] + i)); mat_b_rearr[3] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[1] + i)); mat_b_rearr[4] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[2] + i)); mat_b_rearr[5] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[3] + i)); mat_b_rearr[6] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[4] + i)); mat_b_rearr[7] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[5] + i)); #endif i = 0; i2 = 0; for (l = 0; l < j; l += 8) // move across m { //for (k = 0; k < numCols_b; k += 8) // move across n for the same value of l (index of m) { /////////////////// Partial Lower 8x8 block trsm of B ptr_l_dup = ptr_l; i4 = i2 + r; //Read current 8 cols of B columns from specified 8x8 current-block of B mat_b_col[0] = _mm256_loadu_ps((float const *)ptr_b + i4); mat_b_col[1] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b)); mat_b_col[2] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[0])); mat_b_col[3] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[1])); mat_b_col[4] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[2])); mat_b_col[5] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[3])); mat_b_col[6] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[4])); mat_b_col[7] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[5])); //Broadcast A8,0 to A15,0 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 1)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 2)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 3)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 4)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 5)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 6)); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 7)); i4 = k >> 3; ptr_l_dup += cs_l; #if GEMM_ACCUM_A //(Row8): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[0], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[0], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[0], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[0], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[0], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[0], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[0], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[0], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_mul_ps(mat_a_blk_elems[0], mat_b_col[0]); mat_b_rearr[1] = _mm256_mul_ps(mat_a_blk_elems[1], mat_b_col[0]); mat_b_rearr[2] = _mm256_mul_ps(mat_a_blk_elems[2], mat_b_col[0]); mat_b_rearr[3] = _mm256_mul_ps(mat_a_blk_elems[3], mat_b_col[0]); mat_b_rearr[4] = _mm256_mul_ps(mat_a_blk_elems[4], mat_b_col[0]); mat_b_rearr[5] = _mm256_mul_ps(mat_a_blk_elems[5], mat_b_col[0]); mat_b_rearr[6] = _mm256_mul_ps(mat_a_blk_elems[6], mat_b_col[0]); mat_b_rearr[7] = _mm256_mul_ps(mat_a_blk_elems[7], mat_b_col[0]); #endif //Broadcast A21 to A71 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 1)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 2)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 3)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 4)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 5)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 6)); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 7)); ptr_l_dup += cs_l; #if GEMM_ACCUM_A //(Row9): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[1], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[1], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[1], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[1], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[1], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[1], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[1], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[1], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[1], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[1], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[1], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[1], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[1], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[1], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[1], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[1], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A8,2 to A15,2 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 1)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 2)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 3)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 4)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 5)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 6)); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 7)); ptr_l_dup += cs_l; #if GEMM_ACCUM_A //(Row10): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[2], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[2], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[2], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[2], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[2], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[2], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[2], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[2], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[2], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[2], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[2], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[2], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[2], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[2], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[2], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[2], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A8,3 to A15,3 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 1)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 2)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 3)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 4)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 5)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 6)); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 7)); ptr_l_dup += cs_l; #if GEMM_ACCUM_A //(Row11): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[3], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[3], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[3], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[3], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[3], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[3], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[3], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[3], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[3], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[3], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[3], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[3], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[3], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[3], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[3], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[3], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A8,4 to A15,4 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 1)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 2)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 3)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 4)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 5)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 6)); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 7)); ptr_l_dup += cs_l; #if GEMM_ACCUM_A //(Row12): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[4], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[4], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[4], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[4], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[4], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[4], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[4], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[4], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[4], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[4], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[4], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[4], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[4], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[4], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[4], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[4], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A8,5 to A15,5 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 1)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 2)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 3)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 4)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 5)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 6)); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 7)); ptr_l_dup += cs_l; #if GEMM_ACCUM_A //(Row13): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[5], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[5], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[5], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[5], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[5], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[5], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[5], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[5], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[5], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[5], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[5], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[5], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[5], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[5], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[5], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[5], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A8,6 to A15,6 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 1)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 2)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 3)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 4)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 5)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 6)); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 7)); ptr_l_dup += cs_l; #if GEMM_ACCUM_A //(Row14): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[6], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[6], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[6], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[6], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[6], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[6], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[6], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[6], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[6], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[6], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[6], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[6], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[6], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[6], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[6], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[6], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A8,7 to A15,7 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 1)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 2)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 3)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 4)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 5)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 6)); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 7)); ptr_l_dup += cs_l; #if GEMM_ACCUM_A //(Row15): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[7], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[7], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[7], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[7], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[7], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[7], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[7], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[7], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[7], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[7], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[7], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[7], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[7], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[7], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[7], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[7], mat_b_rearr[7]);//d = c - (a*b) #endif //end loop of cols } i2 += cs_b_offset[6]; i += cs_l_offset[6]; } //trsm solve k = 0; //for (i2 = 0; i2 < numCols_b; i2 += 8) { i2 = i1 + r; /////////////////// Complete Lower 8x8 block trsm of B :- lower 8x8 block of B with lower right 8x8 block of A #if !GEMM_ACCUM_A //Read 8 cols of B columns of Block-to-be-solved mat_b_col[0] = _mm256_loadu_ps((float const *)ptr_b + i2); mat_b_col[1] = _mm256_loadu_ps((float const *)(ptr_b + cs_b + i2)); mat_b_col[2] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[0] + i2)); mat_b_col[3] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[1] + i2)); mat_b_col[4] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[2] + i2)); mat_b_col[5] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[3] + i2)); mat_b_col[6] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[4] + i2)); mat_b_col[7] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[5] + i2)); #endif //Broadcast A10 to A70 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + i + 1)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + i + 2)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + i + 3)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + i + 4)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + i + 5)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l + i + 6)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); i += cs_l; #if GEMM_ACCUM_A //(Row0): Perform mul operation of reciprocal of L(0,0) element with 1st row elements of B mat_b_rearr[0] = _mm256_mul_ps(mat_b_rearr[0], mat_a_diag_inv[0]); #else mat_b_rearr[0] = _mm256_sub_ps(mat_b_col[0], mat_b_rearr[0]); mat_b_rearr[0] = _mm256_mul_ps(mat_b_rearr[0], mat_a_diag_inv[0]); #endif #if GEMM_ACCUM_A mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[0], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[0], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_rearr[0], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_rearr[0], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_rearr[0], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_rearr[0], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_rearr[0], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[1] = _mm256_sub_ps(mat_b_col[1], mat_b_rearr[1]); mat_b_rearr[2] = _mm256_sub_ps(mat_b_col[2], mat_b_rearr[2]); mat_b_rearr[3] = _mm256_sub_ps(mat_b_col[3], mat_b_rearr[3]); mat_b_rearr[4] = _mm256_sub_ps(mat_b_col[4], mat_b_rearr[4]); mat_b_rearr[5] = _mm256_sub_ps(mat_b_col[5], mat_b_rearr[5]); mat_b_rearr[6] = _mm256_sub_ps(mat_b_col[6], mat_b_rearr[6]); mat_b_rearr[7] = _mm256_sub_ps(mat_b_col[7], mat_b_rearr[7]); //(Row1): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[0], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[0], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_rearr[0], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_rearr[0], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_rearr[0], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_rearr[0], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_rearr[0], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A21 to A71 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + i + 2)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + i + 3)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + i + 4)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + i + 5)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + i + 6)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); i += cs_l; //Perform mul operation of reciprocal of L(1,1) element with 2nd row elements of B mat_b_rearr[1] = _mm256_mul_ps(mat_b_rearr[1], mat_a_diag_inv[1]); //(Row2): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[1], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[1], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_rearr[1], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_rearr[1], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_rearr[1], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_rearr[1], mat_b_rearr[7]);//d = c - (a*b) //Broadcast A32 to A72 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + i + 3)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + i + 4)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + i + 5)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + i + 6)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); i += cs_l; //Perform mul operation of reciprocal of L(2, 2) element with 3rd row elements of B mat_b_rearr[2] = _mm256_mul_ps(mat_b_rearr[2], mat_a_diag_inv[2]); //(Row3): FMA operations of b3 with elements of indices from (3, 0) uptill (7, 0) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[2], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[2], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_rearr[2], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_rearr[2], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_rearr[2], mat_b_rearr[7]);//d = c - (a*b) //Broadcast A43 to A73 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + i + 4)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + i + 5)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + i + 6)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); i += cs_l; //Perform mul operation of reciprocal of L(3, 3) element with 4rth row elements of B mat_b_rearr[3] = _mm256_mul_ps(mat_b_rearr[3], mat_a_diag_inv[3]); //(Row4): FMA operations of b4 with elements of indices from (4, 0) uptill (7, 0) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[3], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[3], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_rearr[3], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_rearr[3], mat_b_rearr[7]);//d = c - (a*b) //Broadcast A54 to A74 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + i + 5)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + i + 6)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); i += cs_l; //Perform mul operation of reciprocal of L(4, 4) element with 4rth row elements of B mat_b_rearr[4] = _mm256_mul_ps(mat_b_rearr[4], mat_a_diag_inv[4]); //(Row5): FMA operations of b5 with elements of indices from (5, 0) uptill (7, 0) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[4], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[4], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_rearr[4], mat_b_rearr[7]);//d = c - (a*b) //Broadcast A65 to A75 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + i + 6)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); i += cs_l; //Perform mul operation of reciprocal of L(5, 5) element with 5th row elements of B mat_b_rearr[5] = _mm256_mul_ps(mat_b_rearr[5], mat_a_diag_inv[5]); //(Row6): FMA operations of b6 with elements of indices from (6, 0) uptill (7, 0) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[5], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[5], mat_b_rearr[7]);//d = c - (a*b) //Broadcast A76 to register mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); //Perform mul operation of reciprocal of L(6, 6) element with 6th row elements of B mat_b_rearr[6] = _mm256_mul_ps(mat_b_rearr[6], mat_a_diag_inv[6]); //(Row7): FMA operations of b7 with elements of index (7, 0) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[6], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(7, 7) element with 7th row elements of B mat_b_rearr[7] = _mm256_mul_ps(mat_b_rearr[7], mat_a_diag_inv[7]); //////////////////////////////////////////////////////////////////////////////// //Store the computed B columns _mm256_storeu_ps((float *)ptr_b_dup + r, mat_b_rearr[0]); _mm256_storeu_ps((float *)(ptr_b_dup + (cs_b)+r), mat_b_rearr[1]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[0] + r), mat_b_rearr[2]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[1] + r), mat_b_rearr[3]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[2] + r), mat_b_rearr[4]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[3] + r), mat_b_rearr[5]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[4] + r), mat_b_rearr[6]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[5] + r), mat_b_rearr[7]); //printf("writing B => m[%d], n[%d], [%f]\n", j, k, *(ptr_b_dup + k)); k++; } } } //numRows of A ///////////////////loop ends ///////////////////// } static void trsm_XAtB_block_allSmallSizedMatrices_alpha(float *ptr_l, float *ptr_b, int numRows_lb, int numCols_b, int rs_l, int rs_b, int cs_l, int cs_b, float alpha) { float ones = 1.0; int i, i1, i2, i3, i4, j, k, l, r; int cs_b_offset[7]; int cs_l_offset[7]; float *ptr_b_dup, *ptr_l_dup; //57 number of ymm(256 bits) registers used __m256 mat_b_col[8]; __m256 mat_b_rearr[8]; __m256 mat_a_blk_elems[8]; __m256 mat_a_diag_inv[8]; __m256 reciprocal_diags[2]; __m256 alphaReg; reciprocal_diags[0] = _mm256_broadcast_ss((float const *)(&ones)); alphaReg = _mm256_broadcast_ss((float const *)&alpha); // ---> considering that the matrix size is multiple of 16 rows and 8 cols <--- // //L matrix offsets cs_l_offset[0] = (cs_l << 1); cs_l_offset[1] = cs_l + cs_l_offset[0]; cs_l_offset[2] = (cs_l << 2); cs_l_offset[3] = cs_l + cs_l_offset[2]; cs_l_offset[4] = cs_l_offset[0] + cs_l_offset[2]; cs_l_offset[5] = cs_l + cs_l_offset[4]; cs_l_offset[6] = (cs_l_offset[5] + cs_l); //read diag elems of L 16x16 block mat_a_blk_elems[0] = _mm256_loadu_ps((float const *)ptr_l); mat_a_blk_elems[1] = _mm256_loadu_ps((float const *)ptr_l + cs_l); mat_a_blk_elems[2] = _mm256_loadu_ps((float const *)ptr_l + cs_l_offset[0]); mat_a_blk_elems[3] = _mm256_loadu_ps((float const *)ptr_l + cs_l_offset[1]); mat_a_blk_elems[4] = _mm256_loadu_ps((float const *)ptr_l + cs_l_offset[2]); mat_a_blk_elems[5] = _mm256_loadu_ps((float const *)ptr_l + cs_l_offset[3]); mat_a_blk_elems[6] = _mm256_loadu_ps((float const *)ptr_l + cs_l_offset[4]); mat_a_blk_elems[7] = _mm256_loadu_ps((float const *)ptr_l + cs_l_offset[5]); cs_b_offset[0] = (cs_b << 1); cs_b_offset[1] = cs_b + cs_b_offset[0]; cs_b_offset[2] = (cs_b << 2); cs_b_offset[3] = cs_b + cs_b_offset[2]; cs_b_offset[4] = cs_b_offset[0] + cs_b_offset[2]; cs_b_offset[5] = cs_b + cs_b_offset[4]; cs_b_offset[6] = (cs_b_offset[5] + cs_b); reciprocal_diags[1] = reciprocal_diags[0]; //pack first 8 diags together mat_a_diag_inv[0] = _mm256_blend_ps(mat_a_blk_elems[0], mat_a_blk_elems[1], 0xAA);//diag 0,1 mat_a_diag_inv[1] = _mm256_blend_ps(mat_a_blk_elems[2], mat_a_blk_elems[3], 0xAA);//diag 2,3 mat_a_diag_inv[2] = _mm256_blend_ps(mat_a_blk_elems[4], mat_a_blk_elems[5], 0xAA);//diag 4,5 mat_a_diag_inv[3] = _mm256_blend_ps(mat_a_blk_elems[6], mat_a_blk_elems[7], 0xAA);//diag 6,7 mat_a_diag_inv[0] = _mm256_blend_ps(mat_a_diag_inv[0], mat_a_diag_inv[1], 0xCC);//diag 0,1,2,3 mat_a_diag_inv[2] = _mm256_blend_ps(mat_a_diag_inv[2], mat_a_diag_inv[3], 0xCC);//diag 4,5,6,7 mat_a_diag_inv[0] = _mm256_blend_ps(mat_a_diag_inv[0], mat_a_diag_inv[2], 0xF0);//diag 0,1,2,3,4,5,6,7 //reciprocal of diagnal elements 0,1,2,3,4,5,6,7 reciprocal_diags[0] = _mm256_div_ps(reciprocal_diags[0], mat_a_diag_inv[0]); #if 0 //Broadcast A10 to A70 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + 1)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + 2)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + 3)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + 4)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + 5)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l + 6)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l + 7)); //Broadcast A21 to A71 to registers mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 2)); mat_a_blk_elems[8] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 3)); mat_a_blk_elems[9] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 4)); mat_a_blk_elems[10] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 5)); mat_a_blk_elems[11] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 6)); mat_a_blk_elems[12] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 7)); //Broadcast A32 to A72 to registers mat_a_blk_elems[13] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 3)); mat_a_blk_elems[14] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 4)); mat_a_blk_elems[15] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 5)); mat_a_blk_elems[16] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 6)); mat_a_blk_elems[17] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 7)); //Broadcast A43 to A73 to registers mat_a_blk_elems[18] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 4)); mat_a_blk_elems[19] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 5)); mat_a_blk_elems[20] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 6)); mat_a_blk_elems[21] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 7)); //Broadcast A54 to A74 to registers mat_a_blk_elems[22] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + 5)); mat_a_blk_elems[23] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + 6)); mat_a_blk_elems[24] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + 7)); //Broadcast A65 to A75 to registers mat_a_blk_elems[25] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + 6)); mat_a_blk_elems[26] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + 7)); //Broadcast A76 to register mat_a_blk_elems[27] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[4] + 7)); #endif //extract diag a00 from a mat_a_diag_inv[0] = _mm256_permute_ps(reciprocal_diags[0], 0x00); mat_a_diag_inv[0] = _mm256_permute2f128_ps(mat_a_diag_inv[0], mat_a_diag_inv[0], 0x00); //mat_a_diag_inv[0] = _mm256_unpacklo_ps(mat_a_diag_inv[0], mat_a_diag_inv[0]); //extract diag a11 from a mat_a_diag_inv[1] = _mm256_permute_ps(reciprocal_diags[0], 0x55); mat_a_diag_inv[1] = _mm256_permute2f128_ps(mat_a_diag_inv[1], mat_a_diag_inv[1], 0x00); //mat_a_diag_inv[1] = _mm256_unpacklo_ps(mat_a_diag_inv[1], mat_a_diag_inv[1]); //extract diag a22 from a mat_a_diag_inv[2] = _mm256_permute_ps(reciprocal_diags[0], 0xAA); mat_a_diag_inv[2] = _mm256_permute2f128_ps(mat_a_diag_inv[2], mat_a_diag_inv[2], 0x00); //mat_a_diag_inv[2] = _mm256_unpacklo_ps(mat_a_diag_inv[2], mat_a_diag_inv[2]); //extract diag a33 from a mat_a_diag_inv[3] = _mm256_permute_ps(reciprocal_diags[0], 0xFF); mat_a_diag_inv[3] = _mm256_permute2f128_ps(mat_a_diag_inv[3], mat_a_diag_inv[3], 0x00); //mat_a_diag_inv[3] = _mm256_unpacklo_ps(mat_a_diag_inv[3], mat_a_diag_inv[3]); //extract diag a44 from a mat_a_diag_inv[4] = _mm256_permute_ps(reciprocal_diags[0], 0x00); mat_a_diag_inv[4] = _mm256_permute2f128_ps(mat_a_diag_inv[4], mat_a_diag_inv[4], 0x11); //mat_a_diag_inv[4] = _mm256_unpacklo_ps(mat_a_diag_inv[4], mat_a_diag_inv[4]); //extract diag a55 from a mat_a_diag_inv[5] = _mm256_permute_ps(reciprocal_diags[0], 0x55); mat_a_diag_inv[5] = _mm256_permute2f128_ps(mat_a_diag_inv[5], mat_a_diag_inv[5], 0x11); //mat_a_diag_inv[5] = _mm256_unpacklo_ps(mat_a_diag_inv[5], mat_a_diag_inv[5]); //extract diag a66 from a mat_a_diag_inv[6] = _mm256_permute_ps(reciprocal_diags[0], 0xAA); mat_a_diag_inv[6] = _mm256_permute2f128_ps(mat_a_diag_inv[6], mat_a_diag_inv[6], 0x11); //mat_a_diag_inv[6] = _mm256_unpacklo_ps(mat_a_diag_inv[6], mat_a_diag_inv[6]); //extract diag a77 from a mat_a_diag_inv[7] = _mm256_permute_ps(reciprocal_diags[0], 0xFF); mat_a_diag_inv[7] = _mm256_permute2f128_ps(mat_a_diag_inv[7], mat_a_diag_inv[7], 0x11); //mat_a_diag_inv[7] = _mm256_unpacklo_ps(mat_a_diag_inv[7], mat_a_diag_inv[7]); /***************** first set of 8 rows of B processing starts *****************/ ptr_b_dup = ptr_b; i = 0; for (j = 0; j < numCols_b; j += 8) { /////////////////// Complete Upper 8x8 block trsm of B :- upper 8x8 block of B with upper 8x8 block of A //read 8x8 block of B into registers mat_b_col[0] = _mm256_loadu_ps((float const *)ptr_b + i); mat_b_col[1] = _mm256_loadu_ps((float const *)(ptr_b + cs_b + i)); mat_b_col[2] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[0] + i)); mat_b_col[3] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[1] + i)); mat_b_col[4] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[2] + i)); mat_b_col[5] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[3] + i)); mat_b_col[6] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[4] + i)); mat_b_col[7] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[5] + i)); mat_b_col[0] = _mm256_mul_ps(mat_b_col[0], alphaReg); mat_b_col[1] = _mm256_mul_ps(mat_b_col[1], alphaReg); mat_b_col[2] = _mm256_mul_ps(mat_b_col[2], alphaReg); mat_b_col[3] = _mm256_mul_ps(mat_b_col[3], alphaReg); mat_b_col[4] = _mm256_mul_ps(mat_b_col[4], alphaReg); mat_b_col[5] = _mm256_mul_ps(mat_b_col[5], alphaReg); mat_b_col[6] = _mm256_mul_ps(mat_b_col[6], alphaReg); mat_b_col[7] = _mm256_mul_ps(mat_b_col[7], alphaReg); //(Row0): Perform mul operation of reciprocal of L(0,0) element with 1st row elements of B mat_b_col[0] = _mm256_mul_ps(mat_b_col[0], mat_a_diag_inv[0]); mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + 1)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + 2)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + 3)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + 4)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + 5)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l + 6)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l + 7)); //(Row1): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_col[1] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[0], mat_b_col[1]);//d = c - (a*b) mat_b_col[2] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[0], mat_b_col[2]);//d = c - (a*b) mat_b_col[3] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[0], mat_b_col[3]);//d = c - (a*b) mat_b_col[4] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[0], mat_b_col[4]);//d = c - (a*b) mat_b_col[5] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[0], mat_b_col[5]);//d = c - (a*b) mat_b_col[6] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[0], mat_b_col[6]);//d = c - (a*b) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[0], mat_b_col[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(1,1) element with 2nd row elements of B mat_b_col[1] = _mm256_mul_ps(mat_b_col[1], mat_a_diag_inv[1]); mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 2)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 3)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 4)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 5)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 6)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 7)); //(Row2): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_col[2] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[1], mat_b_col[2]);//d = c - (a*b) mat_b_col[3] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[1], mat_b_col[3]);//d = c - (a*b) mat_b_col[4] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[1], mat_b_col[4]);//d = c - (a*b) mat_b_col[5] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[1], mat_b_col[5]);//d = c - (a*b) mat_b_col[6] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[1], mat_b_col[6]);//d = c - (a*b) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[1], mat_b_col[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(2, 2) element with 3rd row elements of B mat_b_col[2] = _mm256_mul_ps(mat_b_col[2], mat_a_diag_inv[2]); mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 3)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 4)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 5)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 6)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 7)); //(Row3): FMA operations of b3 with elements of indices from (3, 0) uptill (7, 0) mat_b_col[3] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[2], mat_b_col[3]);//d = c - (a*b) mat_b_col[4] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[2], mat_b_col[4]);//d = c - (a*b) mat_b_col[5] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[2], mat_b_col[5]);//d = c - (a*b) mat_b_col[6] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[2], mat_b_col[6]);//d = c - (a*b) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[2], mat_b_col[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(3, 3) element with 4rth row elements of B mat_b_col[3] = _mm256_mul_ps(mat_b_col[3], mat_a_diag_inv[3]); mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 4)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 5)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 6)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 7)); //(Row4): FMA operations of b4 with elements of indices from (4, 0) uptill (7, 0) mat_b_col[4] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[3], mat_b_col[4]);//d = c - (a*b) mat_b_col[5] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[3], mat_b_col[5]);//d = c - (a*b) mat_b_col[6] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[3], mat_b_col[6]);//d = c - (a*b) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[3], mat_b_col[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(4, 4) element with 4rth row elements of B mat_b_col[4] = _mm256_mul_ps(mat_b_col[4], mat_a_diag_inv[4]); mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + 5)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + 6)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + 7)); //(Row5): FMA operations of b5 with elements of indices from (5, 0) uptill (7, 0) mat_b_col[5] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[4], mat_b_col[5]);//d = c - (a*b) mat_b_col[6] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[4], mat_b_col[6]);//d = c - (a*b) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[4], mat_b_col[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(5, 5) element with 5th row elements of B mat_b_col[5] = _mm256_mul_ps(mat_b_col[5], mat_a_diag_inv[5]); mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + 6)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + 7)); //(Row6): FMA operations of b6 with elements of indices from (6, 0) uptill (7, 0) mat_b_col[6] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[5], mat_b_col[6]);//d = c - (a*b) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[5], mat_b_col[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(6, 6) element with 6th row elements of B mat_b_col[6] = _mm256_mul_ps(mat_b_col[6], mat_a_diag_inv[6]); mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[4] + 7)); //(Row7): FMA operations of b7 with elements of index (7, 0) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[6], mat_b_col[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(7, 7) element with 7th row elements of B mat_b_col[7] = _mm256_mul_ps(mat_b_col[7], mat_a_diag_inv[7]); //////////////////////////////////////////////////////////////////////////////// //Store the computed B columns _mm256_storeu_ps((float *)ptr_b_dup, mat_b_col[0]); _mm256_storeu_ps((float *)(ptr_b_dup + (cs_b)), mat_b_col[1]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[0]), mat_b_col[2]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[1]), mat_b_col[3]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[2]), mat_b_col[4]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[3]), mat_b_col[5]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[4]), mat_b_col[6]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[5]), mat_b_col[7]); //i += cs_b_offset[6]; //ptr_b_dup += cs_b_offset[6]; i += 8; ptr_b_dup += 8; } //c = 0; /***************** first set of 8 cols of B processing done *****************/ ptr_b_dup = ptr_b; i3 = 0; i1 = 0; //Start loop for cols of B to be processed in size of blk_width for (j = 8; j < numRows_lb; j += 8)//m :- 8x8 block row { ptr_l += 8; //ptr_b += j; //ptr_b_dup += 8; ptr_b_dup += cs_b_offset[6]; i1 += cs_b_offset[6]; //Read next 8x8 block of A to get diag elements i3 += cs_l_offset[6]; mat_a_blk_elems[0] = _mm256_loadu_ps((float const *)ptr_l + i3); mat_a_blk_elems[1] = _mm256_loadu_ps((float const *)ptr_l + i3 + cs_l); mat_a_blk_elems[2] = _mm256_loadu_ps((float const *)ptr_l + i3 + cs_l_offset[0]); mat_a_blk_elems[3] = _mm256_loadu_ps((float const *)ptr_l + i3 + cs_l_offset[1]); mat_a_blk_elems[4] = _mm256_loadu_ps((float const *)ptr_l + i3 + cs_l_offset[2]); mat_a_blk_elems[5] = _mm256_loadu_ps((float const *)ptr_l + i3 + cs_l_offset[3]); mat_a_blk_elems[6] = _mm256_loadu_ps((float const *)ptr_l + i3 + cs_l_offset[4]); mat_a_blk_elems[7] = _mm256_loadu_ps((float const *)ptr_l + i3 + cs_l_offset[5]); //pack 8 diags of A together reciprocal_diags[0] = reciprocal_diags[1]; mat_a_diag_inv[0] = _mm256_blend_ps(mat_a_blk_elems[0], mat_a_blk_elems[1], 0xAA);//diag 0,1 mat_a_diag_inv[1] = _mm256_blend_ps(mat_a_blk_elems[2], mat_a_blk_elems[3], 0xAA);//diag 2,3 mat_a_diag_inv[2] = _mm256_blend_ps(mat_a_blk_elems[4], mat_a_blk_elems[5], 0xAA);//diag 4,5 mat_a_diag_inv[3] = _mm256_blend_ps(mat_a_blk_elems[6], mat_a_blk_elems[7], 0xAA);//diag 6,7 mat_a_diag_inv[0] = _mm256_blend_ps(mat_a_diag_inv[0], mat_a_diag_inv[1], 0xCC);//diag 0,1,2,3 mat_a_diag_inv[2] = _mm256_blend_ps(mat_a_diag_inv[2], mat_a_diag_inv[3], 0xCC);//diag 4,5,6,7 mat_a_diag_inv[0] = _mm256_blend_ps(mat_a_diag_inv[0], mat_a_diag_inv[2], 0xF0);//diag 0,1,2,3,4,5,6,7 //reciprocal of diagnal elements of A :- 0,1,2,3,4,5,6,7 reciprocal_diags[0] = _mm256_div_ps(reciprocal_diags[0], mat_a_diag_inv[0]); //extract diag a00 from a mat_a_diag_inv[0] = _mm256_permute_ps(reciprocal_diags[0], 0x00); mat_a_diag_inv[0] = _mm256_permute2f128_ps(mat_a_diag_inv[0], mat_a_diag_inv[0], 0x00); //mat_a_diag_inv2[0] = _mm256_unpacklo_ps(mat_a_diag_inv2[0], mat_a_diag_inv2[0]); //extract diag a11 from a mat_a_diag_inv[1] = _mm256_permute_ps(reciprocal_diags[0], 0x55); mat_a_diag_inv[1] = _mm256_permute2f128_ps(mat_a_diag_inv[1], mat_a_diag_inv[1], 0x00); //mat_a_diag_inv[1] = _mm256_unpacklo_ps(mat_a_diag_inv[1], mat_a_diag_inv[1]); //extract diag a22 from a mat_a_diag_inv[2] = _mm256_permute_ps(reciprocal_diags[0], 0xAA); mat_a_diag_inv[2] = _mm256_permute2f128_ps(mat_a_diag_inv[2], mat_a_diag_inv[2], 0x00); //mat_a_diag_inv[2] = _mm256_unpacklo_ps(mat_a_diag_inv[2], mat_a_diag_inv[2]); //extract diag a33 from a mat_a_diag_inv[3] = _mm256_permute_ps(reciprocal_diags[0], 0xFF); mat_a_diag_inv[3] = _mm256_permute2f128_ps(mat_a_diag_inv[3], mat_a_diag_inv[3], 0x00); //mat_a_diag_inv[3] = _mm256_unpacklo_ps(mat_a_diag_inv[3], mat_a_diag_inv[3]); //extract diag a44 from a mat_a_diag_inv[4] = _mm256_permute_ps(reciprocal_diags[0], 0x00); mat_a_diag_inv[4] = _mm256_permute2f128_ps(mat_a_diag_inv[4], mat_a_diag_inv[4], 0x11); //mat_a_diag_inv[4] = _mm256_unpacklo_ps(mat_a_diag_inv[4], mat_a_diag_inv[4]); //extract diag a55 from a mat_a_diag_inv[5] = _mm256_permute_ps(reciprocal_diags[0], 0x55); mat_a_diag_inv[5] = _mm256_permute2f128_ps(mat_a_diag_inv[5], mat_a_diag_inv[5], 0x11); //mat_a_diag_inv[5] = _mm256_unpacklo_ps(mat_a_diag_inv[5], mat_a_diag_inv[5]); //extract diag a66 from a mat_a_diag_inv[6] = _mm256_permute_ps(reciprocal_diags[0], 0xAA); mat_a_diag_inv[6] = _mm256_permute2f128_ps(mat_a_diag_inv[6], mat_a_diag_inv[6], 0x11); //mat_a_diag_inv[6] = _mm256_unpacklo_ps(mat_a_diag_inv[6], mat_a_diag_inv[6]); //extract diag a77 from a mat_a_diag_inv[7] = _mm256_permute_ps(reciprocal_diags[0], 0xFF); mat_a_diag_inv[7] = _mm256_permute2f128_ps(mat_a_diag_inv[7], mat_a_diag_inv[7], 0x11); //mat_a_diag_inv[7] = _mm256_unpacklo_ps(mat_a_diag_inv[7], mat_a_diag_inv[7]); for (r = 0; r < numCols_b; r += GEMM_BLK_V1) { #if GEMM_ACCUM_A i = i1 + r; //Read 8 cols of B columns of Block-to-be-solved mat_b_rearr[0] = _mm256_loadu_ps((float const *)ptr_b + i); mat_b_rearr[1] = _mm256_loadu_ps((float const *)(ptr_b + cs_b + i)); mat_b_rearr[2] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[0] + i)); mat_b_rearr[3] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[1] + i)); mat_b_rearr[4] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[2] + i)); mat_b_rearr[5] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[3] + i)); mat_b_rearr[6] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[4] + i)); mat_b_rearr[7] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[5] + i)); mat_b_rearr[0] = _mm256_mul_ps(mat_b_rearr[0], alphaReg); mat_b_rearr[1] = _mm256_mul_ps(mat_b_rearr[1], alphaReg); mat_b_rearr[2] = _mm256_mul_ps(mat_b_rearr[2], alphaReg); mat_b_rearr[3] = _mm256_mul_ps(mat_b_rearr[3], alphaReg); mat_b_rearr[4] = _mm256_mul_ps(mat_b_rearr[4], alphaReg); mat_b_rearr[5] = _mm256_mul_ps(mat_b_rearr[5], alphaReg); mat_b_rearr[6] = _mm256_mul_ps(mat_b_rearr[6], alphaReg); mat_b_rearr[7] = _mm256_mul_ps(mat_b_rearr[7], alphaReg); #endif i = 0; i2 = 0; for (l = 0; l < j; l += 8) // move across m { //for (k = 0; k < numCols_b; k += 8) // move across n for the same value of l (index of m) { /////////////////// Partial Lower 8x8 block trsm of B ptr_l_dup = ptr_l; i4 = i2 + r; //Read current 8 cols of B columns from specified 8x8 current-block of B mat_b_col[0] = _mm256_loadu_ps((float const *)ptr_b + i4); mat_b_col[1] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b)); mat_b_col[2] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[0])); mat_b_col[3] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[1])); mat_b_col[4] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[2])); mat_b_col[5] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[3])); mat_b_col[6] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[4])); mat_b_col[7] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[5])); //Broadcast A8,0 to A15,0 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 1)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 2)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 3)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 4)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 5)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 6)); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 7)); i4 = k >> 3; ptr_l_dup += cs_l; #if GEMM_ACCUM_A //(Row8): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[0], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[0], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[0], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[0], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[0], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[0], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[0], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[0], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_mul_ps(mat_a_blk_elems[0], mat_b_col[0]); mat_b_rearr[1] = _mm256_mul_ps(mat_a_blk_elems[1], mat_b_col[0]); mat_b_rearr[2] = _mm256_mul_ps(mat_a_blk_elems[2], mat_b_col[0]); mat_b_rearr[3] = _mm256_mul_ps(mat_a_blk_elems[3], mat_b_col[0]); mat_b_rearr[4] = _mm256_mul_ps(mat_a_blk_elems[4], mat_b_col[0]); mat_b_rearr[5] = _mm256_mul_ps(mat_a_blk_elems[5], mat_b_col[0]); mat_b_rearr[6] = _mm256_mul_ps(mat_a_blk_elems[6], mat_b_col[0]); mat_b_rearr[7] = _mm256_mul_ps(mat_a_blk_elems[7], mat_b_col[0]); #endif //Broadcast A21 to A71 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 1)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 2)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 3)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 4)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 5)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 6)); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 7)); ptr_l_dup += cs_l; #if GEMM_ACCUM_A //(Row9): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[1], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[1], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[1], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[1], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[1], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[1], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[1], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[1], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[1], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[1], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[1], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[1], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[1], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[1], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[1], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[1], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A8,2 to A15,2 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 1)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 2)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 3)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 4)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 5)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 6)); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 7)); ptr_l_dup += cs_l; #if GEMM_ACCUM_A //(Row10): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[2], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[2], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[2], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[2], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[2], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[2], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[2], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[2], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[2], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[2], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[2], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[2], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[2], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[2], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[2], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[2], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A8,3 to A15,3 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 1)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 2)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 3)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 4)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 5)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 6)); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 7)); ptr_l_dup += cs_l; #if GEMM_ACCUM_A //(Row11): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[3], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[3], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[3], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[3], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[3], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[3], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[3], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[3], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[3], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[3], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[3], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[3], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[3], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[3], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[3], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[3], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A8,4 to A15,4 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 1)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 2)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 3)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 4)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 5)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 6)); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 7)); ptr_l_dup += cs_l; #if GEMM_ACCUM_A //(Row12): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[4], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[4], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[4], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[4], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[4], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[4], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[4], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[4], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[4], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[4], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[4], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[4], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[4], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[4], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[4], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[4], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A8,5 to A15,5 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 1)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 2)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 3)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 4)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 5)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 6)); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 7)); ptr_l_dup += cs_l; #if GEMM_ACCUM_A //(Row13): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[5], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[5], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[5], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[5], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[5], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[5], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[5], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[5], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[5], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[5], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[5], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[5], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[5], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[5], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[5], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[5], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A8,6 to A15,6 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 1)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 2)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 3)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 4)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 5)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 6)); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 7)); ptr_l_dup += cs_l; #if GEMM_ACCUM_A //(Row14): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[6], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[6], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[6], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[6], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[6], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[6], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[6], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[6], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[6], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[6], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[6], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[6], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[6], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[6], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[6], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[6], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A8,7 to A15,7 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 1)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 2)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 3)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 4)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 5)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 6)); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 7)); ptr_l_dup += cs_l; #if GEMM_ACCUM_A //(Row15): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[7], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[7], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[7], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[7], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[7], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[7], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[7], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[7], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[7], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[7], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[7], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[7], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[7], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[7], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[7], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[7], mat_b_rearr[7]);//d = c - (a*b) #endif //end loop of cols } i2 += cs_b_offset[6]; i += cs_l_offset[6]; } //trsm solve k = 0; //for (i2 = 0; i2 < numCols_b; i2 += 8) { i2 = i1 + r; /////////////////// Complete Lower 8x8 block trsm of B :- lower 8x8 block of B with lower right 8x8 block of A #if !GEMM_ACCUM_A //Read 8 cols of B columns of Block-to-be-solved mat_b_col[0] = _mm256_loadu_ps((float const *)ptr_b + i2); mat_b_col[1] = _mm256_loadu_ps((float const *)(ptr_b + cs_b + i2)); mat_b_col[2] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[0] + i2)); mat_b_col[3] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[1] + i2)); mat_b_col[4] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[2] + i2)); mat_b_col[5] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[3] + i2)); mat_b_col[6] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[4] + i2)); mat_b_col[7] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[5] + i2)); mat_b_col[0] = _mm256_mul_ps(mat_b_col[0], alphaReg); mat_b_col[1] = _mm256_mul_ps(mat_b_col[1], alphaReg); mat_b_col[2] = _mm256_mul_ps(mat_b_col[2], alphaReg); mat_b_col[3] = _mm256_mul_ps(mat_b_col[3], alphaReg); mat_b_col[4] = _mm256_mul_ps(mat_b_col[4], alphaReg); mat_b_col[5] = _mm256_mul_ps(mat_b_col[5], alphaReg); mat_b_col[6] = _mm256_mul_ps(mat_b_col[6], alphaReg); mat_b_col[7] = _mm256_mul_ps(mat_b_col[7], alphaReg); #endif //Broadcast A10 to A70 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + i + 1)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + i + 2)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + i + 3)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + i + 4)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + i + 5)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l + i + 6)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); i += cs_l; #if GEMM_ACCUM_A //(Row0): Perform mul operation of reciprocal of L(0,0) element with 1st row elements of B mat_b_rearr[0] = _mm256_mul_ps(mat_b_rearr[0], mat_a_diag_inv[0]); #else mat_b_rearr[0] = _mm256_sub_ps(mat_b_col[0], mat_b_rearr[0]); mat_b_rearr[0] = _mm256_mul_ps(mat_b_rearr[0], mat_a_diag_inv[0]); #endif #if GEMM_ACCUM_A mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[0], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[0], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_rearr[0], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_rearr[0], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_rearr[0], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_rearr[0], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_rearr[0], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[1] = _mm256_sub_ps(mat_b_col[1], mat_b_rearr[1]); mat_b_rearr[2] = _mm256_sub_ps(mat_b_col[2], mat_b_rearr[2]); mat_b_rearr[3] = _mm256_sub_ps(mat_b_col[3], mat_b_rearr[3]); mat_b_rearr[4] = _mm256_sub_ps(mat_b_col[4], mat_b_rearr[4]); mat_b_rearr[5] = _mm256_sub_ps(mat_b_col[5], mat_b_rearr[5]); mat_b_rearr[6] = _mm256_sub_ps(mat_b_col[6], mat_b_rearr[6]); mat_b_rearr[7] = _mm256_sub_ps(mat_b_col[7], mat_b_rearr[7]); //(Row1): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[0], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[0], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_rearr[0], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_rearr[0], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_rearr[0], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_rearr[0], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_rearr[0], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A21 to A71 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + i + 2)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + i + 3)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + i + 4)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + i + 5)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + i + 6)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); i += cs_l; //Perform mul operation of reciprocal of L(1,1) element with 2nd row elements of B mat_b_rearr[1] = _mm256_mul_ps(mat_b_rearr[1], mat_a_diag_inv[1]); //(Row2): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[1], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[1], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_rearr[1], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_rearr[1], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_rearr[1], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_rearr[1], mat_b_rearr[7]);//d = c - (a*b) //Broadcast A32 to A72 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + i + 3)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + i + 4)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + i + 5)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + i + 6)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); i += cs_l; //Perform mul operation of reciprocal of L(2, 2) element with 3rd row elements of B mat_b_rearr[2] = _mm256_mul_ps(mat_b_rearr[2], mat_a_diag_inv[2]); //(Row3): FMA operations of b3 with elements of indices from (3, 0) uptill (7, 0) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[2], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[2], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_rearr[2], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_rearr[2], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_rearr[2], mat_b_rearr[7]);//d = c - (a*b) //Broadcast A43 to A73 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + i + 4)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + i + 5)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + i + 6)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); i += cs_l; //Perform mul operation of reciprocal of L(3, 3) element with 4rth row elements of B mat_b_rearr[3] = _mm256_mul_ps(mat_b_rearr[3], mat_a_diag_inv[3]); //(Row4): FMA operations of b4 with elements of indices from (4, 0) uptill (7, 0) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[3], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[3], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_rearr[3], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_rearr[3], mat_b_rearr[7]);//d = c - (a*b) //Broadcast A54 to A74 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + i + 5)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + i + 6)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); i += cs_l; //Perform mul operation of reciprocal of L(4, 4) element with 4rth row elements of B mat_b_rearr[4] = _mm256_mul_ps(mat_b_rearr[4], mat_a_diag_inv[4]); //(Row5): FMA operations of b5 with elements of indices from (5, 0) uptill (7, 0) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[4], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[4], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_rearr[4], mat_b_rearr[7]);//d = c - (a*b) //Broadcast A65 to A75 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + i + 6)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); i += cs_l; //Perform mul operation of reciprocal of L(5, 5) element with 5th row elements of B mat_b_rearr[5] = _mm256_mul_ps(mat_b_rearr[5], mat_a_diag_inv[5]); //(Row6): FMA operations of b6 with elements of indices from (6, 0) uptill (7, 0) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[5], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[5], mat_b_rearr[7]);//d = c - (a*b) //Broadcast A76 to register mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); //Perform mul operation of reciprocal of L(6, 6) element with 6th row elements of B mat_b_rearr[6] = _mm256_mul_ps(mat_b_rearr[6], mat_a_diag_inv[6]); //(Row7): FMA operations of b7 with elements of index (7, 0) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[6], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(7, 7) element with 7th row elements of B mat_b_rearr[7] = _mm256_mul_ps(mat_b_rearr[7], mat_a_diag_inv[7]); //////////////////////////////////////////////////////////////////////////////// //Store the computed B columns _mm256_storeu_ps((float *)ptr_b_dup + r, mat_b_rearr[0]); _mm256_storeu_ps((float *)(ptr_b_dup + (cs_b)+r), mat_b_rearr[1]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[0] + r), mat_b_rearr[2]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[1] + r), mat_b_rearr[3]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[2] + r), mat_b_rearr[4]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[3] + r), mat_b_rearr[5]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[4] + r), mat_b_rearr[6]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[5] + r), mat_b_rearr[7]); //printf("writing B => m[%d], n[%d], [%f]\n", j, k, *(ptr_b_dup + k)); k++; } } } //numRows of A ///////////////////loop ends ///////////////////// } static void trsm_XAtB_block_allSmallSizedMatrices_unitDiag(float *ptr_l, float *ptr_b, int numRows_lb, int numCols_b, int rs_l, int rs_b, int cs_l, int cs_b) { //float ones = 1.0; int i, i1, i2, i3, i4, j, k, l, r; int cs_b_offset[7]; int cs_l_offset[7]; float *ptr_b_dup, *ptr_l_dup; //57 number of ymm(256 bits) registers used __m256 mat_b_col[8]; __m256 mat_b_rearr[8]; __m256 mat_a_blk_elems[8]; //__m256 mat_a_diag_inv[8]; //__m256 reciprocal_diags[2]; // ---> considering that the matrix size is multiple of 16 rows and 8 cols <--- // //L matrix offsets cs_l_offset[0] = (cs_l << 1); cs_l_offset[1] = cs_l + cs_l_offset[0]; cs_l_offset[2] = (cs_l << 2); cs_l_offset[3] = cs_l + cs_l_offset[2]; cs_l_offset[4] = cs_l_offset[0] + cs_l_offset[2]; cs_l_offset[5] = cs_l + cs_l_offset[4]; cs_l_offset[6] = (cs_l_offset[5] + cs_l); cs_b_offset[0] = (cs_b << 1); cs_b_offset[1] = cs_b + cs_b_offset[0]; cs_b_offset[2] = (cs_b << 2); cs_b_offset[3] = cs_b + cs_b_offset[2]; cs_b_offset[4] = cs_b_offset[0] + cs_b_offset[2]; cs_b_offset[5] = cs_b + cs_b_offset[4]; cs_b_offset[6] = (cs_b_offset[5] + cs_b); /***************** first set of 8 rows of B processing starts *****************/ ptr_b_dup = ptr_b; i = 0; for (j = 0; j < numCols_b; j += 8) { /////////////////// Complete Upper 8x8 block trsm of B :- upper 8x8 block of B with upper 8x8 block of A //read 8x8 block of B into registers mat_b_col[0] = _mm256_loadu_ps((float const *)ptr_b + i); mat_b_col[1] = _mm256_loadu_ps((float const *)(ptr_b + cs_b + i)); mat_b_col[2] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[0] + i)); mat_b_col[3] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[1] + i)); mat_b_col[4] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[2] + i)); mat_b_col[5] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[3] + i)); mat_b_col[6] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[4] + i)); mat_b_col[7] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[5] + i)); //(Row0) mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + 1)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + 2)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + 3)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + 4)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + 5)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l + 6)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l + 7)); //(Row1): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_col[1] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[0], mat_b_col[1]);//d = c - (a*b) mat_b_col[2] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[0], mat_b_col[2]);//d = c - (a*b) mat_b_col[3] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[0], mat_b_col[3]);//d = c - (a*b) mat_b_col[4] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[0], mat_b_col[4]);//d = c - (a*b) mat_b_col[5] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[0], mat_b_col[5]);//d = c - (a*b) mat_b_col[6] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[0], mat_b_col[6]);//d = c - (a*b) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[0], mat_b_col[7]);//d = c - (a*b) mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 2)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 3)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 4)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 5)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 6)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 7)); //(Row2): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_col[2] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[1], mat_b_col[2]);//d = c - (a*b) mat_b_col[3] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[1], mat_b_col[3]);//d = c - (a*b) mat_b_col[4] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[1], mat_b_col[4]);//d = c - (a*b) mat_b_col[5] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[1], mat_b_col[5]);//d = c - (a*b) mat_b_col[6] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[1], mat_b_col[6]);//d = c - (a*b) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[1], mat_b_col[7]);//d = c - (a*b) mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 3)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 4)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 5)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 6)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 7)); //(Row3): FMA operations of b3 with elements of indices from (3, 0) uptill (7, 0) mat_b_col[3] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[2], mat_b_col[3]);//d = c - (a*b) mat_b_col[4] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[2], mat_b_col[4]);//d = c - (a*b) mat_b_col[5] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[2], mat_b_col[5]);//d = c - (a*b) mat_b_col[6] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[2], mat_b_col[6]);//d = c - (a*b) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[2], mat_b_col[7]);//d = c - (a*b) mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 4)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 5)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 6)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 7)); //(Row4): FMA operations of b4 with elements of indices from (4, 0) uptill (7, 0) mat_b_col[4] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[3], mat_b_col[4]);//d = c - (a*b) mat_b_col[5] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[3], mat_b_col[5]);//d = c - (a*b) mat_b_col[6] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[3], mat_b_col[6]);//d = c - (a*b) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[3], mat_b_col[7]);//d = c - (a*b) mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + 5)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + 6)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + 7)); //(Row5): FMA operations of b5 with elements of indices from (5, 0) uptill (7, 0) mat_b_col[5] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[4], mat_b_col[5]);//d = c - (a*b) mat_b_col[6] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[4], mat_b_col[6]);//d = c - (a*b) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[4], mat_b_col[7]);//d = c - (a*b) mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + 6)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + 7)); //(Row6): FMA operations of b6 with elements of indices from (6, 0) uptill (7, 0) mat_b_col[6] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[5], mat_b_col[6]);//d = c - (a*b) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[5], mat_b_col[7]);//d = c - (a*b) mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[4] + 7)); //(Row7): FMA operations of b7 with elements of index (7, 0) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[6], mat_b_col[7]);//d = c - (a*b) //////////////////////////////////////////////////////////////////////////////// //Store the computed B columns _mm256_storeu_ps((float *)ptr_b_dup, mat_b_col[0]); _mm256_storeu_ps((float *)(ptr_b_dup + (cs_b)), mat_b_col[1]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[0]), mat_b_col[2]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[1]), mat_b_col[3]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[2]), mat_b_col[4]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[3]), mat_b_col[5]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[4]), mat_b_col[6]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[5]), mat_b_col[7]); //i += cs_b_offset[6]; //ptr_b_dup += cs_b_offset[6]; i += 8; ptr_b_dup += 8; } //c = 0; /***************** first set of 8 cols of B processing done *****************/ ptr_b_dup = ptr_b; i3 = 0; i1 = 0; //Start loop for cols of B to be processed in size of blk_width for (j = 8; j < numRows_lb; j += 8)//m :- 8x8 block row { ptr_l += 8; //ptr_b += j; //ptr_b_dup += 8; ptr_b_dup += cs_b_offset[6]; i1 += cs_b_offset[6]; i3 += cs_l_offset[6]; i = 0; i2 = 0; for (r = 0; r < numCols_b; r += GEMM_BLK_V1) { #if GEMM_ACCUM_A i = i1 + r; //Read 8 cols of B columns of Block-to-be-solved mat_b_rearr[0] = _mm256_loadu_ps((float const *)ptr_b + i); mat_b_rearr[1] = _mm256_loadu_ps((float const *)(ptr_b + cs_b + i)); mat_b_rearr[2] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[0] + i)); mat_b_rearr[3] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[1] + i)); mat_b_rearr[4] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[2] + i)); mat_b_rearr[5] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[3] + i)); mat_b_rearr[6] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[4] + i)); mat_b_rearr[7] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[5] + i)); #endif i = 0; i2 = 0; for (l = 0; l < j; l += 8) // move across m { //for (k = 0; k < numCols_b; k += 8) // move across n for the same value of l (index of m) { /////////////////// Partial Lower 8x8 block trsm of B ptr_l_dup = ptr_l; i4 = i2 + r; //Read current 8 cols of B columns from specified 8x8 current-block of B mat_b_col[0] = _mm256_loadu_ps((float const *)ptr_b + i4); mat_b_col[1] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b)); mat_b_col[2] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[0])); mat_b_col[3] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[1])); mat_b_col[4] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[2])); mat_b_col[5] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[3])); mat_b_col[6] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[4])); mat_b_col[7] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[5])); //Broadcast A8,0 to A15,0 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 1)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 2)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 3)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 4)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 5)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 6)); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 7)); i4 = k >> 3; ptr_l_dup += cs_l; #if GEMM_ACCUM_A //(Row8): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[0], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[0], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[0], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[0], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[0], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[0], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[0], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[0], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_mul_ps(mat_a_blk_elems[0], mat_b_col[0]); mat_b_rearr[1] = _mm256_mul_ps(mat_a_blk_elems[1], mat_b_col[0]); mat_b_rearr[2] = _mm256_mul_ps(mat_a_blk_elems[2], mat_b_col[0]); mat_b_rearr[3] = _mm256_mul_ps(mat_a_blk_elems[3], mat_b_col[0]); mat_b_rearr[4] = _mm256_mul_ps(mat_a_blk_elems[4], mat_b_col[0]); mat_b_rearr[5] = _mm256_mul_ps(mat_a_blk_elems[5], mat_b_col[0]); mat_b_rearr[6] = _mm256_mul_ps(mat_a_blk_elems[6], mat_b_col[0]); mat_b_rearr[7] = _mm256_mul_ps(mat_a_blk_elems[7], mat_b_col[0]); #endif //Broadcast A21 to A71 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 1)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 2)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 3)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 4)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 5)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 6)); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 7)); ptr_l_dup += cs_l; #if GEMM_ACCUM_A //(Row9): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[1], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[1], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[1], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[1], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[1], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[1], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[1], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[1], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[1], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[1], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[1], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[1], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[1], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[1], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[1], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[1], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A8,2 to A15,2 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 1)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 2)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 3)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 4)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 5)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 6)); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 7)); ptr_l_dup += cs_l; #if GEMM_ACCUM_A //(Row10): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[2], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[2], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[2], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[2], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[2], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[2], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[2], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[2], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[2], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[2], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[2], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[2], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[2], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[2], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[2], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[2], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A8,3 to A15,3 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 1)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 2)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 3)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 4)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 5)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 6)); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 7)); ptr_l_dup += cs_l; #if GEMM_ACCUM_A //(Row11): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[3], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[3], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[3], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[3], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[3], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[3], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[3], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[3], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[3], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[3], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[3], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[3], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[3], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[3], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[3], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[3], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A8,4 to A15,4 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 1)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 2)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 3)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 4)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 5)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 6)); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 7)); ptr_l_dup += cs_l; #if GEMM_ACCUM_A //(Row12): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[4], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[4], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[4], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[4], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[4], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[4], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[4], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[4], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[4], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[4], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[4], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[4], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[4], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[4], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[4], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[4], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A8,5 to A15,5 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 1)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 2)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 3)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 4)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 5)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 6)); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 7)); ptr_l_dup += cs_l; #if GEMM_ACCUM_A //(Row13): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[5], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[5], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[5], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[5], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[5], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[5], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[5], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[5], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[5], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[5], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[5], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[5], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[5], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[5], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[5], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[5], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A8,6 to A15,6 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 1)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 2)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 3)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 4)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 5)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 6)); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 7)); ptr_l_dup += cs_l; #if GEMM_ACCUM_A //(Row14): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[6], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[6], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[6], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[6], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[6], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[6], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[6], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[6], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[6], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[6], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[6], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[6], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[6], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[6], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[6], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[6], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A8,7 to A15,7 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 1)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 2)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 3)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 4)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 5)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 6)); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 7)); ptr_l_dup += cs_l; #if GEMM_ACCUM_A //(Row15): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[7], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[7], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[7], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[7], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[7], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[7], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[7], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[7], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[7], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[7], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[7], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[7], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[7], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[7], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[7], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[7], mat_b_rearr[7]);//d = c - (a*b) #endif //end loop of cols } i2 += cs_b_offset[6]; i += cs_l_offset[6]; } //trsm solve k = 0; //for (i2 = 0; i2 < numCols_b; i2 += 8) { i2 = i1 + r; /////////////////// Complete Lower 8x8 block trsm of B :- lower 8x8 block of B with lower right 8x8 block of A #if !GEMM_ACCUM_A //Read 8 cols of B columns of Block-to-be-solved mat_b_col[0] = _mm256_loadu_ps((float const *)ptr_b + i2); mat_b_col[1] = _mm256_loadu_ps((float const *)(ptr_b + cs_b + i2)); mat_b_col[2] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[0] + i2)); mat_b_col[3] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[1] + i2)); mat_b_col[4] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[2] + i2)); mat_b_col[5] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[3] + i2)); mat_b_col[6] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[4] + i2)); mat_b_col[7] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[5] + i2)); #endif //Broadcast A10 to A70 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + i + 1)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + i + 2)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + i + 3)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + i + 4)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + i + 5)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l + i + 6)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); i += cs_l; #if GEMM_ACCUM_A //(Row0): already done #else mat_b_rearr[0] = _mm256_sub_ps(mat_b_col[0], mat_b_rearr[0]); #endif #if GEMM_ACCUM_A mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[0], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[0], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_rearr[0], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_rearr[0], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_rearr[0], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_rearr[0], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_rearr[0], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[1] = _mm256_sub_ps(mat_b_col[1], mat_b_rearr[1]); mat_b_rearr[2] = _mm256_sub_ps(mat_b_col[2], mat_b_rearr[2]); mat_b_rearr[3] = _mm256_sub_ps(mat_b_col[3], mat_b_rearr[3]); mat_b_rearr[4] = _mm256_sub_ps(mat_b_col[4], mat_b_rearr[4]); mat_b_rearr[5] = _mm256_sub_ps(mat_b_col[5], mat_b_rearr[5]); mat_b_rearr[6] = _mm256_sub_ps(mat_b_col[6], mat_b_rearr[6]); mat_b_rearr[7] = _mm256_sub_ps(mat_b_col[7], mat_b_rearr[7]); //(Row1): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[0], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[0], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_rearr[0], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_rearr[0], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_rearr[0], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_rearr[0], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_rearr[0], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A21 to A71 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + i + 2)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + i + 3)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + i + 4)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + i + 5)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + i + 6)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); i += cs_l; //(Row2): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[1], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[1], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_rearr[1], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_rearr[1], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_rearr[1], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_rearr[1], mat_b_rearr[7]);//d = c - (a*b) //Broadcast A32 to A72 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + i + 3)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + i + 4)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + i + 5)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + i + 6)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); i += cs_l; //(Row3): FMA operations of b3 with elements of indices from (3, 0) uptill (7, 0) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[2], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[2], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_rearr[2], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_rearr[2], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_rearr[2], mat_b_rearr[7]);//d = c - (a*b) //Broadcast A43 to A73 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + i + 4)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + i + 5)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + i + 6)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); i += cs_l; //(Row4): FMA operations of b4 with elements of indices from (4, 0) uptill (7, 0) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[3], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[3], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_rearr[3], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_rearr[3], mat_b_rearr[7]);//d = c - (a*b) //Broadcast A54 to A74 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + i + 5)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + i + 6)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); i += cs_l; //(Row5): FMA operations of b5 with elements of indices from (5, 0) uptill (7, 0) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[4], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[4], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_rearr[4], mat_b_rearr[7]);//d = c - (a*b) //Broadcast A65 to A75 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + i + 6)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); i += cs_l; //(Row6): FMA operations of b6 with elements of indices from (6, 0) uptill (7, 0) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[5], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[5], mat_b_rearr[7]);//d = c - (a*b) //Broadcast A76 to register mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); //(Row7): FMA operations of b7 with elements of index (7, 0) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[6], mat_b_rearr[7]);//d = c - (a*b) //////////////////////////////////////////////////////////////////////////////// //Store the computed B columns _mm256_storeu_ps((float *)ptr_b_dup + r, mat_b_rearr[0]); _mm256_storeu_ps((float *)(ptr_b_dup + (cs_b)+r), mat_b_rearr[1]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[0] + r), mat_b_rearr[2]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[1] + r), mat_b_rearr[3]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[2] + r), mat_b_rearr[4]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[3] + r), mat_b_rearr[5]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[4] + r), mat_b_rearr[6]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[5] + r), mat_b_rearr[7]); //printf("writing B => m[%d], n[%d], [%f]\n", j, k, *(ptr_b_dup + k)); k++; } } } //numRows of A ///////////////////loop ends ///////////////////// } static void trsm_XAtB_block_allSmallSizedMatrices_alpha_unitDiag(float *ptr_l, float *ptr_b, int numRows_lb, int numCols_b, int rs_l, int rs_b, int cs_l, int cs_b, float alpha) { //float ones = 1.0; int i, i1, i2, i3, i4, j, k, l, r; int cs_b_offset[7]; int cs_l_offset[7]; float *ptr_b_dup, *ptr_l_dup; //57 number of ymm(256 bits) registers used __m256 mat_b_col[8]; __m256 mat_b_rearr[8]; __m256 mat_a_blk_elems[8]; //__m256 mat_a_diag_inv[8]; //__m256 reciprocal_diags[2]; __m256 alphaReg; alphaReg = _mm256_broadcast_ss((float const *)&alpha); // ---> considering that the matrix size is multiple of 16 rows and 8 cols <--- // //L matrix offsets cs_l_offset[0] = (cs_l << 1); cs_l_offset[1] = cs_l + cs_l_offset[0]; cs_l_offset[2] = (cs_l << 2); cs_l_offset[3] = cs_l + cs_l_offset[2]; cs_l_offset[4] = cs_l_offset[0] + cs_l_offset[2]; cs_l_offset[5] = cs_l + cs_l_offset[4]; cs_l_offset[6] = (cs_l_offset[5] + cs_l); cs_b_offset[0] = (cs_b << 1); cs_b_offset[1] = cs_b + cs_b_offset[0]; cs_b_offset[2] = (cs_b << 2); cs_b_offset[3] = cs_b + cs_b_offset[2]; cs_b_offset[4] = cs_b_offset[0] + cs_b_offset[2]; cs_b_offset[5] = cs_b + cs_b_offset[4]; cs_b_offset[6] = (cs_b_offset[5] + cs_b); #if 0 //Broadcast A10 to A70 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + 1)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + 2)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + 3)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + 4)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + 5)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l + 6)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l + 7)); //Broadcast A21 to A71 to registers mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 2)); mat_a_blk_elems[8] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 3)); mat_a_blk_elems[9] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 4)); mat_a_blk_elems[10] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 5)); mat_a_blk_elems[11] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 6)); mat_a_blk_elems[12] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 7)); //Broadcast A32 to A72 to registers mat_a_blk_elems[13] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 3)); mat_a_blk_elems[14] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 4)); mat_a_blk_elems[15] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 5)); mat_a_blk_elems[16] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 6)); mat_a_blk_elems[17] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 7)); //Broadcast A43 to A73 to registers mat_a_blk_elems[18] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 4)); mat_a_blk_elems[19] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 5)); mat_a_blk_elems[20] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 6)); mat_a_blk_elems[21] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 7)); //Broadcast A54 to A74 to registers mat_a_blk_elems[22] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + 5)); mat_a_blk_elems[23] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + 6)); mat_a_blk_elems[24] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + 7)); //Broadcast A65 to A75 to registers mat_a_blk_elems[25] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + 6)); mat_a_blk_elems[26] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + 7)); //Broadcast A76 to register mat_a_blk_elems[27] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[4] + 7)); #endif /***************** first set of 8 rows of B processing starts *****************/ ptr_b_dup = ptr_b; i = 0; for (j = 0; j < numCols_b; j += 8) { /////////////////// Complete Upper 8x8 block trsm of B :- upper 8x8 block of B with upper 8x8 block of A //read 8x8 block of B into registers mat_b_col[0] = _mm256_loadu_ps((float const *)ptr_b + i); mat_b_col[1] = _mm256_loadu_ps((float const *)(ptr_b + cs_b + i)); mat_b_col[2] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[0] + i)); mat_b_col[3] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[1] + i)); mat_b_col[4] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[2] + i)); mat_b_col[5] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[3] + i)); mat_b_col[6] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[4] + i)); mat_b_col[7] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[5] + i)); mat_b_col[0] = _mm256_mul_ps(mat_b_col[0], alphaReg); mat_b_col[1] = _mm256_mul_ps(mat_b_col[1], alphaReg); mat_b_col[2] = _mm256_mul_ps(mat_b_col[2], alphaReg); mat_b_col[3] = _mm256_mul_ps(mat_b_col[3], alphaReg); mat_b_col[4] = _mm256_mul_ps(mat_b_col[4], alphaReg); mat_b_col[5] = _mm256_mul_ps(mat_b_col[5], alphaReg); mat_b_col[6] = _mm256_mul_ps(mat_b_col[6], alphaReg); mat_b_col[7] = _mm256_mul_ps(mat_b_col[7], alphaReg); //(Row0) mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + 1)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + 2)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + 3)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + 4)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + 5)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l + 6)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l + 7)); //(Row1): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_col[1] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[0], mat_b_col[1]);//d = c - (a*b) mat_b_col[2] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[0], mat_b_col[2]);//d = c - (a*b) mat_b_col[3] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[0], mat_b_col[3]);//d = c - (a*b) mat_b_col[4] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[0], mat_b_col[4]);//d = c - (a*b) mat_b_col[5] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[0], mat_b_col[5]);//d = c - (a*b) mat_b_col[6] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[0], mat_b_col[6]);//d = c - (a*b) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[0], mat_b_col[7]);//d = c - (a*b) mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 2)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 3)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 4)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 5)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 6)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 7)); //(Row2): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_col[2] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[1], mat_b_col[2]);//d = c - (a*b) mat_b_col[3] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[1], mat_b_col[3]);//d = c - (a*b) mat_b_col[4] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[1], mat_b_col[4]);//d = c - (a*b) mat_b_col[5] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[1], mat_b_col[5]);//d = c - (a*b) mat_b_col[6] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[1], mat_b_col[6]);//d = c - (a*b) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[1], mat_b_col[7]);//d = c - (a*b) mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 3)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 4)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 5)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 6)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 7)); //(Row3): FMA operations of b3 with elements of indices from (3, 0) uptill (7, 0) mat_b_col[3] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[2], mat_b_col[3]);//d = c - (a*b) mat_b_col[4] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[2], mat_b_col[4]);//d = c - (a*b) mat_b_col[5] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[2], mat_b_col[5]);//d = c - (a*b) mat_b_col[6] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[2], mat_b_col[6]);//d = c - (a*b) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[2], mat_b_col[7]);//d = c - (a*b) mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 4)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 5)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 6)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 7)); //(Row4): FMA operations of b4 with elements of indices from (4, 0) uptill (7, 0) mat_b_col[4] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[3], mat_b_col[4]);//d = c - (a*b) mat_b_col[5] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[3], mat_b_col[5]);//d = c - (a*b) mat_b_col[6] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[3], mat_b_col[6]);//d = c - (a*b) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[3], mat_b_col[7]);//d = c - (a*b) mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + 5)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + 6)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + 7)); //(Row5): FMA operations of b5 with elements of indices from (5, 0) uptill (7, 0) mat_b_col[5] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[4], mat_b_col[5]);//d = c - (a*b) mat_b_col[6] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[4], mat_b_col[6]);//d = c - (a*b) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[4], mat_b_col[7]);//d = c - (a*b) mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + 6)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + 7)); //(Row6): FMA operations of b6 with elements of indices from (6, 0) uptill (7, 0) mat_b_col[6] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[5], mat_b_col[6]);//d = c - (a*b) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[5], mat_b_col[7]);//d = c - (a*b) mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[4] + 7)); //(Row7): FMA operations of b7 with elements of index (7, 0) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[6], mat_b_col[7]);//d = c - (a*b) //////////////////////////////////////////////////////////////////////////////// //Store the computed B columns _mm256_storeu_ps((float *)ptr_b_dup, mat_b_col[0]); _mm256_storeu_ps((float *)(ptr_b_dup + (cs_b)), mat_b_col[1]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[0]), mat_b_col[2]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[1]), mat_b_col[3]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[2]), mat_b_col[4]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[3]), mat_b_col[5]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[4]), mat_b_col[6]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[5]), mat_b_col[7]); //i += cs_b_offset[6]; //ptr_b_dup += cs_b_offset[6]; i += 8; ptr_b_dup += 8; } //c = 0; /***************** first set of 8 cols of B processing done *****************/ ptr_b_dup = ptr_b; i3 = 0; i1 = 0; //Start loop for cols of B to be processed in size of blk_width for (j = 8; j < numRows_lb; j += 8)//m :- 8x8 block row { ptr_l += 8; //ptr_b += j; //ptr_b_dup += 8; ptr_b_dup += cs_b_offset[6]; i1 += cs_b_offset[6]; i3 += cs_l_offset[6]; i = 0; i2 = 0; for (r = 0; r < numCols_b; r += GEMM_BLK_V1) { #if GEMM_ACCUM_A i = i1 + r; //Read 8 cols of B columns of Block-to-be-solved mat_b_rearr[0] = _mm256_loadu_ps((float const *)ptr_b + i); mat_b_rearr[1] = _mm256_loadu_ps((float const *)(ptr_b + cs_b + i)); mat_b_rearr[2] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[0] + i)); mat_b_rearr[3] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[1] + i)); mat_b_rearr[4] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[2] + i)); mat_b_rearr[5] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[3] + i)); mat_b_rearr[6] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[4] + i)); mat_b_rearr[7] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[5] + i)); mat_b_rearr[0] = _mm256_mul_ps(mat_b_rearr[0], alphaReg); mat_b_rearr[1] = _mm256_mul_ps(mat_b_rearr[1], alphaReg); mat_b_rearr[2] = _mm256_mul_ps(mat_b_rearr[2], alphaReg); mat_b_rearr[3] = _mm256_mul_ps(mat_b_rearr[3], alphaReg); mat_b_rearr[4] = _mm256_mul_ps(mat_b_rearr[4], alphaReg); mat_b_rearr[5] = _mm256_mul_ps(mat_b_rearr[5], alphaReg); mat_b_rearr[6] = _mm256_mul_ps(mat_b_rearr[6], alphaReg); mat_b_rearr[7] = _mm256_mul_ps(mat_b_rearr[7], alphaReg); #endif i = 0; i2 = 0; for (l = 0; l < j; l += 8) // move across m { //for (k = 0; k < numCols_b; k += 8) // move across n for the same value of l (index of m) { /////////////////// Partial Lower 8x8 block trsm of B ptr_l_dup = ptr_l; i4 = i2 + r; //Read current 8 cols of B columns from specified 8x8 current-block of B mat_b_col[0] = _mm256_loadu_ps((float const *)ptr_b + i4); mat_b_col[1] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b)); mat_b_col[2] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[0])); mat_b_col[3] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[1])); mat_b_col[4] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[2])); mat_b_col[5] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[3])); mat_b_col[6] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[4])); mat_b_col[7] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[5])); //Broadcast A8,0 to A15,0 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 1)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 2)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 3)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 4)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 5)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 6)); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 7)); i4 = k >> 3; ptr_l_dup += cs_l; #if GEMM_ACCUM_A //(Row8): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[0], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[0], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[0], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[0], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[0], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[0], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[0], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[0], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_mul_ps(mat_a_blk_elems[0], mat_b_col[0]); mat_b_rearr[1] = _mm256_mul_ps(mat_a_blk_elems[1], mat_b_col[0]); mat_b_rearr[2] = _mm256_mul_ps(mat_a_blk_elems[2], mat_b_col[0]); mat_b_rearr[3] = _mm256_mul_ps(mat_a_blk_elems[3], mat_b_col[0]); mat_b_rearr[4] = _mm256_mul_ps(mat_a_blk_elems[4], mat_b_col[0]); mat_b_rearr[5] = _mm256_mul_ps(mat_a_blk_elems[5], mat_b_col[0]); mat_b_rearr[6] = _mm256_mul_ps(mat_a_blk_elems[6], mat_b_col[0]); mat_b_rearr[7] = _mm256_mul_ps(mat_a_blk_elems[7], mat_b_col[0]); #endif //Broadcast A21 to A71 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 1)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 2)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 3)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 4)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 5)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 6)); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 7)); ptr_l_dup += cs_l; #if GEMM_ACCUM_A //(Row9): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[1], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[1], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[1], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[1], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[1], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[1], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[1], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[1], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[1], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[1], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[1], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[1], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[1], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[1], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[1], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[1], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A8,2 to A15,2 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 1)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 2)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 3)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 4)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 5)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 6)); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 7)); ptr_l_dup += cs_l; #if GEMM_ACCUM_A //(Row10): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[2], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[2], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[2], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[2], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[2], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[2], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[2], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[2], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[2], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[2], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[2], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[2], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[2], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[2], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[2], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[2], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A8,3 to A15,3 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 1)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 2)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 3)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 4)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 5)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 6)); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 7)); ptr_l_dup += cs_l; #if GEMM_ACCUM_A //(Row11): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[3], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[3], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[3], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[3], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[3], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[3], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[3], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[3], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[3], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[3], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[3], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[3], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[3], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[3], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[3], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[3], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A8,4 to A15,4 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 1)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 2)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 3)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 4)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 5)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 6)); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 7)); ptr_l_dup += cs_l; #if GEMM_ACCUM_A //(Row12): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[4], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[4], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[4], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[4], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[4], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[4], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[4], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[4], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[4], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[4], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[4], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[4], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[4], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[4], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[4], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[4], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A8,5 to A15,5 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 1)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 2)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 3)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 4)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 5)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 6)); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 7)); ptr_l_dup += cs_l; #if GEMM_ACCUM_A //(Row13): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[5], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[5], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[5], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[5], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[5], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[5], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[5], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[5], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[5], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[5], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[5], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[5], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[5], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[5], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[5], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[5], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A8,6 to A15,6 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 1)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 2)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 3)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 4)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 5)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 6)); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 7)); ptr_l_dup += cs_l; #if GEMM_ACCUM_A //(Row14): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[6], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[6], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[6], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[6], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[6], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[6], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[6], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[6], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[6], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[6], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[6], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[6], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[6], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[6], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[6], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[6], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A8,7 to A15,7 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 1)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 2)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 3)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 4)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 5)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 6)); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + i + 7)); ptr_l_dup += cs_l; #if GEMM_ACCUM_A //(Row15): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[7], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[7], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[7], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[7], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[7], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[7], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[7], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[7], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[7], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[7], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[7], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[7], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[7], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[7], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[7], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[7], mat_b_rearr[7]);//d = c - (a*b) #endif //end loop of cols } i2 += cs_b_offset[6]; i += cs_l_offset[6]; } //trsm solve k = 0; //for (i2 = 0; i2 < numCols_b; i2 += 8) { i2 = i1 + r; /////////////////// Complete Lower 8x8 block trsm of B :- lower 8x8 block of B with lower right 8x8 block of A #if !GEMM_ACCUM_A //Read 8 cols of B columns of Block-to-be-solved mat_b_col[0] = _mm256_loadu_ps((float const *)ptr_b + i2); mat_b_col[1] = _mm256_loadu_ps((float const *)(ptr_b + cs_b + i2)); mat_b_col[2] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[0] + i2)); mat_b_col[3] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[1] + i2)); mat_b_col[4] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[2] + i2)); mat_b_col[5] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[3] + i2)); mat_b_col[6] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[4] + i2)); mat_b_col[7] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[5] + i2)); mat_b_col[0] = _mm256_mul_ps(mat_b_col[0], alphaReg); mat_b_col[1] = _mm256_mul_ps(mat_b_col[1], alphaReg); mat_b_col[2] = _mm256_mul_ps(mat_b_col[2], alphaReg); mat_b_col[3] = _mm256_mul_ps(mat_b_col[3], alphaReg); mat_b_col[4] = _mm256_mul_ps(mat_b_col[4], alphaReg); mat_b_col[5] = _mm256_mul_ps(mat_b_col[5], alphaReg); mat_b_col[6] = _mm256_mul_ps(mat_b_col[6], alphaReg); mat_b_col[7] = _mm256_mul_ps(mat_b_col[7], alphaReg); #endif //Broadcast A10 to A70 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + i + 1)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + i + 2)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + i + 3)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + i + 4)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + i + 5)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l + i + 6)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); i += cs_l; #if GEMM_ACCUM_A //(Row0): already done #else mat_b_rearr[0] = _mm256_sub_ps(mat_b_col[0], mat_b_rearr[0]); #endif #if GEMM_ACCUM_A mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[0], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[0], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_rearr[0], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_rearr[0], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_rearr[0], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_rearr[0], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_rearr[0], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[1] = _mm256_sub_ps(mat_b_col[1], mat_b_rearr[1]); mat_b_rearr[2] = _mm256_sub_ps(mat_b_col[2], mat_b_rearr[2]); mat_b_rearr[3] = _mm256_sub_ps(mat_b_col[3], mat_b_rearr[3]); mat_b_rearr[4] = _mm256_sub_ps(mat_b_col[4], mat_b_rearr[4]); mat_b_rearr[5] = _mm256_sub_ps(mat_b_col[5], mat_b_rearr[5]); mat_b_rearr[6] = _mm256_sub_ps(mat_b_col[6], mat_b_rearr[6]); mat_b_rearr[7] = _mm256_sub_ps(mat_b_col[7], mat_b_rearr[7]); //(Row1): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[0], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[0], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_rearr[0], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_rearr[0], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_rearr[0], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_rearr[0], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_rearr[0], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A21 to A71 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + i + 2)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + i + 3)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + i + 4)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + i + 5)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + i + 6)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); i += cs_l; //(Row2): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[1], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[1], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_rearr[1], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_rearr[1], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_rearr[1], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_rearr[1], mat_b_rearr[7]);//d = c - (a*b) //Broadcast A32 to A72 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + i + 3)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + i + 4)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + i + 5)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + i + 6)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); i += cs_l; //(Row3): FMA operations of b3 with elements of indices from (3, 0) uptill (7, 0) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[2], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[2], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_rearr[2], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_rearr[2], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_rearr[2], mat_b_rearr[7]);//d = c - (a*b) //Broadcast A43 to A73 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + i + 4)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + i + 5)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + i + 6)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); i += cs_l; //(Row4): FMA operations of b4 with elements of indices from (4, 0) uptill (7, 0) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[3], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[3], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_rearr[3], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_rearr[3], mat_b_rearr[7]);//d = c - (a*b) //Broadcast A54 to A74 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + i + 5)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + i + 6)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); i += cs_l; //(Row5): FMA operations of b5 with elements of indices from (5, 0) uptill (7, 0) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[4], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[4], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_rearr[4], mat_b_rearr[7]);//d = c - (a*b) //Broadcast A65 to A75 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + i + 6)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); i += cs_l; //(Row6): FMA operations of b6 with elements of indices from (6, 0) uptill (7, 0) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[5], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[5], mat_b_rearr[7]);//d = c - (a*b) //Broadcast A76 to register mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); //(Row7): FMA operations of b7 with elements of index (7, 0) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[6], mat_b_rearr[7]);//d = c - (a*b) //////////////////////////////////////////////////////////////////////////////// //Store the computed B columns _mm256_storeu_ps((float *)ptr_b_dup + r, mat_b_rearr[0]); _mm256_storeu_ps((float *)(ptr_b_dup + (cs_b)+r), mat_b_rearr[1]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[0] + r), mat_b_rearr[2]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[1] + r), mat_b_rearr[3]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[2] + r), mat_b_rearr[4]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[3] + r), mat_b_rearr[5]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[4] + r), mat_b_rearr[6]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[5] + r), mat_b_rearr[7]); //printf("writing B => m[%d], n[%d], [%f]\n", j, k, *(ptr_b_dup + k)); k++; } } } //numRows of A ///////////////////loop ends ///////////////////// } #else //rel 1.0 intrisic kernels (NOT OPT_CACHE_BLOCKING_L1) static void trsm_XAtB_block_allSmallSizedMatrices(float *ptr_l, float *ptr_b, int numRows_lb, int numCols_b, int rs_l, int rs_b, int cs_l, int cs_b) { float ones = 1.0; int i, i1, i2, i3, i4, j, k, l; int cs_b_offset[7]; int cs_l_offset[7]; float *ptr_b_dup; //57 number of ymm(256 bits) registers used __m256 mat_b_col[8]; __m256 mat_b_rearr[16][8]; __m256 mat_a_cols_rearr[8]; __m256 mat_a_blk_elems[64]; __m256 mat_a_diag_inv[8]; __m256 reciprocal_diags[2]; reciprocal_diags[0] = _mm256_broadcast_ss((float const *)(&ones)); // ---> considering that the matrix size is multiple of 16 rows and 8 cols <--- // //L matrix offsets cs_l_offset[0] = (cs_l << 1); cs_l_offset[1] = cs_l + cs_l_offset[0]; cs_l_offset[2] = (cs_l << 2); cs_l_offset[3] = cs_l + cs_l_offset[2]; cs_l_offset[4] = cs_l_offset[0] + cs_l_offset[2]; cs_l_offset[5] = cs_l + cs_l_offset[4]; cs_l_offset[6] = (cs_l_offset[5] + cs_l); //read diag elems of L 16x16 block mat_a_cols_rearr[0] = _mm256_loadu_ps((float const *)ptr_l); mat_a_cols_rearr[1] = _mm256_loadu_ps((float const *)ptr_l + cs_l); mat_a_cols_rearr[2] = _mm256_loadu_ps((float const *)ptr_l + cs_l_offset[0]); mat_a_cols_rearr[3] = _mm256_loadu_ps((float const *)ptr_l + cs_l_offset[1]); mat_a_cols_rearr[4] = _mm256_loadu_ps((float const *)ptr_l + cs_l_offset[2]); mat_a_cols_rearr[5] = _mm256_loadu_ps((float const *)ptr_l + cs_l_offset[3]); mat_a_cols_rearr[6] = _mm256_loadu_ps((float const *)ptr_l + cs_l_offset[4]); mat_a_cols_rearr[7] = _mm256_loadu_ps((float const *)ptr_l + cs_l_offset[5]); cs_b_offset[0] = (cs_b << 1); cs_b_offset[1] = cs_b + cs_b_offset[0]; cs_b_offset[2] = (cs_b << 2); cs_b_offset[3] = cs_b + cs_b_offset[2]; cs_b_offset[4] = cs_b_offset[0] + cs_b_offset[2]; cs_b_offset[5] = cs_b + cs_b_offset[4]; cs_b_offset[6] = (cs_b_offset[5] + cs_b); reciprocal_diags[1] = reciprocal_diags[0]; //pack first 8 diags together mat_a_diag_inv[0] = _mm256_blend_ps(mat_a_cols_rearr[0], mat_a_cols_rearr[1], 0xAA);//diag 0,1 mat_a_diag_inv[1] = _mm256_blend_ps(mat_a_cols_rearr[2], mat_a_cols_rearr[3], 0xAA);//diag 2,3 mat_a_diag_inv[2] = _mm256_blend_ps(mat_a_cols_rearr[4], mat_a_cols_rearr[5], 0xAA);//diag 4,5 mat_a_diag_inv[3] = _mm256_blend_ps(mat_a_cols_rearr[6], mat_a_cols_rearr[7], 0xAA);//diag 6,7 mat_a_diag_inv[0] = _mm256_blend_ps(mat_a_diag_inv[0], mat_a_diag_inv[1], 0xCC);//diag 0,1,2,3 mat_a_diag_inv[2] = _mm256_blend_ps(mat_a_diag_inv[2], mat_a_diag_inv[3], 0xCC);//diag 4,5,6,7 mat_a_diag_inv[0] = _mm256_blend_ps(mat_a_diag_inv[0], mat_a_diag_inv[2], 0xF0);//diag 0,1,2,3,4,5,6,7 //reciprocal of diagnal elements 0,1,2,3,4,5,6,7 reciprocal_diags[0] = _mm256_div_ps(reciprocal_diags[0], mat_a_diag_inv[0]); //Broadcast A10 to A70 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + 1)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + 2)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + 3)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + 4)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + 5)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l + 6)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l + 7)); //Broadcast A21 to A71 to registers mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 2)); mat_a_blk_elems[8] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 3)); mat_a_blk_elems[9] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 4)); mat_a_blk_elems[10] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 5)); mat_a_blk_elems[11] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 6)); mat_a_blk_elems[12] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 7)); //Broadcast A32 to A72 to registers mat_a_blk_elems[13] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 3)); mat_a_blk_elems[14] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 4)); mat_a_blk_elems[15] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 5)); mat_a_blk_elems[16] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 6)); mat_a_blk_elems[17] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 7)); //Broadcast A43 to A73 to registers mat_a_blk_elems[18] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 4)); mat_a_blk_elems[19] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 5)); mat_a_blk_elems[20] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 6)); mat_a_blk_elems[21] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 7)); //Broadcast A54 to A74 to registers mat_a_blk_elems[22] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + 5)); mat_a_blk_elems[23] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + 6)); mat_a_blk_elems[24] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + 7)); //Broadcast A65 to A75 to registers mat_a_blk_elems[25] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + 6)); mat_a_blk_elems[26] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + 7)); //Broadcast A76 to register mat_a_blk_elems[27] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[4] + 7)); //extract diag a00 from a mat_a_diag_inv[0] = _mm256_permute_ps(reciprocal_diags[0], 0x00); mat_a_diag_inv[0] = _mm256_permute2f128_ps(mat_a_diag_inv[0], mat_a_diag_inv[0], 0x00); //mat_a_diag_inv[0] = _mm256_unpacklo_ps(mat_a_diag_inv[0], mat_a_diag_inv[0]); //extract diag a11 from a mat_a_diag_inv[1] = _mm256_permute_ps(reciprocal_diags[0], 0x55); mat_a_diag_inv[1] = _mm256_permute2f128_ps(mat_a_diag_inv[1], mat_a_diag_inv[1], 0x00); //mat_a_diag_inv[1] = _mm256_unpacklo_ps(mat_a_diag_inv[1], mat_a_diag_inv[1]); //extract diag a22 from a mat_a_diag_inv[2] = _mm256_permute_ps(reciprocal_diags[0], 0xAA); mat_a_diag_inv[2] = _mm256_permute2f128_ps(mat_a_diag_inv[2], mat_a_diag_inv[2], 0x00); //mat_a_diag_inv[2] = _mm256_unpacklo_ps(mat_a_diag_inv[2], mat_a_diag_inv[2]); //extract diag a33 from a mat_a_diag_inv[3] = _mm256_permute_ps(reciprocal_diags[0], 0xFF); mat_a_diag_inv[3] = _mm256_permute2f128_ps(mat_a_diag_inv[3], mat_a_diag_inv[3], 0x00); //mat_a_diag_inv[3] = _mm256_unpacklo_ps(mat_a_diag_inv[3], mat_a_diag_inv[3]); //extract diag a44 from a mat_a_diag_inv[4] = _mm256_permute_ps(reciprocal_diags[0], 0x00); mat_a_diag_inv[4] = _mm256_permute2f128_ps(mat_a_diag_inv[4], mat_a_diag_inv[4], 0x11); //mat_a_diag_inv[4] = _mm256_unpacklo_ps(mat_a_diag_inv[4], mat_a_diag_inv[4]); //extract diag a55 from a mat_a_diag_inv[5] = _mm256_permute_ps(reciprocal_diags[0], 0x55); mat_a_diag_inv[5] = _mm256_permute2f128_ps(mat_a_diag_inv[5], mat_a_diag_inv[5], 0x11); //mat_a_diag_inv[5] = _mm256_unpacklo_ps(mat_a_diag_inv[5], mat_a_diag_inv[5]); //extract diag a66 from a mat_a_diag_inv[6] = _mm256_permute_ps(reciprocal_diags[0], 0xAA); mat_a_diag_inv[6] = _mm256_permute2f128_ps(mat_a_diag_inv[6], mat_a_diag_inv[6], 0x11); //mat_a_diag_inv[6] = _mm256_unpacklo_ps(mat_a_diag_inv[6], mat_a_diag_inv[6]); //extract diag a77 from a mat_a_diag_inv[7] = _mm256_permute_ps(reciprocal_diags[0], 0xFF); mat_a_diag_inv[7] = _mm256_permute2f128_ps(mat_a_diag_inv[7], mat_a_diag_inv[7], 0x11); //mat_a_diag_inv[7] = _mm256_unpacklo_ps(mat_a_diag_inv[7], mat_a_diag_inv[7]); /***************** first set of 8 rows of B processing starts *****************/ ptr_b_dup = ptr_b; i = 0; for (j = 0; j < numCols_b; j += 8) { /////////////////// Complete Upper 8x8 block trsm of B :- upper 8x8 block of B with upper 8x8 block of A //read 8x8 block of B into registers mat_b_rearr[0][0] = _mm256_loadu_ps((float const *)ptr_b + i); mat_b_rearr[1][0] = _mm256_loadu_ps((float const *)(ptr_b + cs_b + i)); mat_b_rearr[2][0] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[0] + i)); mat_b_rearr[3][0] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[1] + i)); mat_b_rearr[4][0] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[2] + i)); mat_b_rearr[5][0] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[3] + i)); mat_b_rearr[6][0] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[4] + i)); mat_b_rearr[7][0] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[5] + i)); //(Row0): Perform mul operation of reciprocal of L(0,0) element with 1st row elements of B mat_b_col[0] = _mm256_mul_ps(mat_b_rearr[0][0], mat_a_diag_inv[0]); //(Row1): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[1][0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[0], mat_b_rearr[1][0]);//d = c - (a*b) mat_b_rearr[2][0] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[0], mat_b_rearr[2][0]);//d = c - (a*b) mat_b_rearr[3][0] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[0], mat_b_rearr[3][0]);//d = c - (a*b) mat_b_rearr[4][0] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[0], mat_b_rearr[4][0]);//d = c - (a*b) mat_b_rearr[5][0] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[0], mat_b_rearr[5][0]);//d = c - (a*b) mat_b_rearr[6][0] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[0], mat_b_rearr[6][0]);//d = c - (a*b) mat_b_rearr[7][0] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[0], mat_b_rearr[7][0]);//d = c - (a*b) //Perform mul operation of reciprocal of L(1,1) element with 2nd row elements of B mat_b_col[1] = _mm256_mul_ps(mat_b_rearr[1][0], mat_a_diag_inv[1]); //(Row2): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[2][0] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[1], mat_b_rearr[2][0]);//d = c - (a*b) mat_b_rearr[3][0] = _mm256_fnmadd_ps(mat_a_blk_elems[8], mat_b_col[1], mat_b_rearr[3][0]);//d = c - (a*b) mat_b_rearr[4][0] = _mm256_fnmadd_ps(mat_a_blk_elems[9], mat_b_col[1], mat_b_rearr[4][0]);//d = c - (a*b) mat_b_rearr[5][0] = _mm256_fnmadd_ps(mat_a_blk_elems[10], mat_b_col[1], mat_b_rearr[5][0]);//d = c - (a*b) mat_b_rearr[6][0] = _mm256_fnmadd_ps(mat_a_blk_elems[11], mat_b_col[1], mat_b_rearr[6][0]);//d = c - (a*b) mat_b_rearr[7][0] = _mm256_fnmadd_ps(mat_a_blk_elems[12], mat_b_col[1], mat_b_rearr[7][0]);//d = c - (a*b) //Perform mul operation of reciprocal of L(2, 2) element with 3rd row elements of B mat_b_col[2] = _mm256_mul_ps(mat_b_rearr[2][0], mat_a_diag_inv[2]); //(Row3): FMA operations of b3 with elements of indices from (3, 0) uptill (7, 0) mat_b_rearr[3][0] = _mm256_fnmadd_ps(mat_a_blk_elems[13], mat_b_col[2], mat_b_rearr[3][0]);//d = c - (a*b) mat_b_rearr[4][0] = _mm256_fnmadd_ps(mat_a_blk_elems[14], mat_b_col[2], mat_b_rearr[4][0]);//d = c - (a*b) mat_b_rearr[5][0] = _mm256_fnmadd_ps(mat_a_blk_elems[15], mat_b_col[2], mat_b_rearr[5][0]);//d = c - (a*b) mat_b_rearr[6][0] = _mm256_fnmadd_ps(mat_a_blk_elems[16], mat_b_col[2], mat_b_rearr[6][0]);//d = c - (a*b) mat_b_rearr[7][0] = _mm256_fnmadd_ps(mat_a_blk_elems[17], mat_b_col[2], mat_b_rearr[7][0]);//d = c - (a*b) //Perform mul operation of reciprocal of L(3, 3) element with 4rth row elements of B mat_b_col[3] = _mm256_mul_ps(mat_b_rearr[3][0], mat_a_diag_inv[3]); //(Row4): FMA operations of b4 with elements of indices from (4, 0) uptill (7, 0) mat_b_rearr[4][0] = _mm256_fnmadd_ps(mat_a_blk_elems[18], mat_b_col[3], mat_b_rearr[4][0]);//d = c - (a*b) mat_b_rearr[5][0] = _mm256_fnmadd_ps(mat_a_blk_elems[19], mat_b_col[3], mat_b_rearr[5][0]);//d = c - (a*b) mat_b_rearr[6][0] = _mm256_fnmadd_ps(mat_a_blk_elems[20], mat_b_col[3], mat_b_rearr[6][0]);//d = c - (a*b) mat_b_rearr[7][0] = _mm256_fnmadd_ps(mat_a_blk_elems[21], mat_b_col[3], mat_b_rearr[7][0]);//d = c - (a*b) //Perform mul operation of reciprocal of L(4, 4) element with 4rth row elements of B mat_b_col[4] = _mm256_mul_ps(mat_b_rearr[4][0], mat_a_diag_inv[4]); //(Row5): FMA operations of b5 with elements of indices from (5, 0) uptill (7, 0) mat_b_rearr[5][0] = _mm256_fnmadd_ps(mat_a_blk_elems[22], mat_b_col[4], mat_b_rearr[5][0]);//d = c - (a*b) mat_b_rearr[6][0] = _mm256_fnmadd_ps(mat_a_blk_elems[23], mat_b_col[4], mat_b_rearr[6][0]);//d = c - (a*b) mat_b_rearr[7][0] = _mm256_fnmadd_ps(mat_a_blk_elems[24], mat_b_col[4], mat_b_rearr[7][0]);//d = c - (a*b) //Perform mul operation of reciprocal of L(5, 5) element with 5th row elements of B mat_b_col[5] = _mm256_mul_ps(mat_b_rearr[5][0], mat_a_diag_inv[5]); //(Row6): FMA operations of b6 with elements of indices from (6, 0) uptill (7, 0) mat_b_rearr[6][0] = _mm256_fnmadd_ps(mat_a_blk_elems[25], mat_b_col[5], mat_b_rearr[6][0]);//d = c - (a*b) mat_b_rearr[7][0] = _mm256_fnmadd_ps(mat_a_blk_elems[26], mat_b_col[5], mat_b_rearr[7][0]);//d = c - (a*b) //Perform mul operation of reciprocal of L(6, 6) element with 6th row elements of B mat_b_col[6] = _mm256_mul_ps(mat_b_rearr[6][0], mat_a_diag_inv[6]); //(Row7): FMA operations of b7 with elements of index (7, 0) mat_b_rearr[7][0] = _mm256_fnmadd_ps(mat_a_blk_elems[27], mat_b_col[6], mat_b_rearr[7][0]);//d = c - (a*b) //Perform mul operation of reciprocal of L(7, 7) element with 7th row elements of B mat_b_col[7] = _mm256_mul_ps(mat_b_rearr[7][0], mat_a_diag_inv[7]); //////////////////////////////////////////////////////////////////////////////// //Store the computed B columns _mm256_storeu_ps((float *)ptr_b_dup, mat_b_col[0]); _mm256_storeu_ps((float *)(ptr_b_dup + (cs_b)), mat_b_col[1]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[0]), mat_b_col[2]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[1]), mat_b_col[3]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[2]), mat_b_col[4]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[3]), mat_b_col[5]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[4]), mat_b_col[6]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[5]), mat_b_col[7]); //i += cs_b_offset[6]; //ptr_b_dup += cs_b_offset[6]; i += 8; ptr_b_dup += 8; } //c = 0; /***************** first set of 8 cols of B processing done *****************/ ptr_b_dup = ptr_b; i3 = 0; i1 = 0; //Start loop for cols of B to be processed in size of blk_width for (j = 8; j < numRows_lb; j += 8)//m :- 8x8 block row { ptr_l += 8; //ptr_b += j; //ptr_b_dup += 8; ptr_b_dup += cs_b_offset[6]; i1 += cs_b_offset[6]; //Read next 8x8 block of A to get diag elements i3 += cs_l_offset[6]; mat_a_cols_rearr[0] = _mm256_loadu_ps((float const *)ptr_l + i3); mat_a_cols_rearr[1] = _mm256_loadu_ps((float const *)ptr_l + i3 + cs_l); mat_a_cols_rearr[2] = _mm256_loadu_ps((float const *)ptr_l + i3 + cs_l_offset[0]); mat_a_cols_rearr[3] = _mm256_loadu_ps((float const *)ptr_l + i3 + cs_l_offset[1]); mat_a_cols_rearr[4] = _mm256_loadu_ps((float const *)ptr_l + i3 + cs_l_offset[2]); mat_a_cols_rearr[5] = _mm256_loadu_ps((float const *)ptr_l + i3 + cs_l_offset[3]); mat_a_cols_rearr[6] = _mm256_loadu_ps((float const *)ptr_l + i3 + cs_l_offset[4]); mat_a_cols_rearr[7] = _mm256_loadu_ps((float const *)ptr_l + i3 + cs_l_offset[5]); //pack 8 diags of A together reciprocal_diags[0] = reciprocal_diags[1]; mat_a_diag_inv[0] = _mm256_blend_ps(mat_a_cols_rearr[0], mat_a_cols_rearr[1], 0xAA);//diag 0,1 mat_a_diag_inv[1] = _mm256_blend_ps(mat_a_cols_rearr[2], mat_a_cols_rearr[3], 0xAA);//diag 2,3 mat_a_diag_inv[2] = _mm256_blend_ps(mat_a_cols_rearr[4], mat_a_cols_rearr[5], 0xAA);//diag 4,5 mat_a_diag_inv[3] = _mm256_blend_ps(mat_a_cols_rearr[6], mat_a_cols_rearr[7], 0xAA);//diag 6,7 mat_a_diag_inv[0] = _mm256_blend_ps(mat_a_diag_inv[0], mat_a_diag_inv[1], 0xCC);//diag 0,1,2,3 mat_a_diag_inv[2] = _mm256_blend_ps(mat_a_diag_inv[2], mat_a_diag_inv[3], 0xCC);//diag 4,5,6,7 mat_a_diag_inv[0] = _mm256_blend_ps(mat_a_diag_inv[0], mat_a_diag_inv[2], 0xF0);//diag 0,1,2,3,4,5,6,7 //reciprocal of diagnal elements of A :- 0,1,2,3,4,5,6,7 reciprocal_diags[0] = _mm256_div_ps(reciprocal_diags[0], mat_a_diag_inv[0]); i = 0; i2 = 0; for (k = 0; k < numCols_b; k += 8) { i = i1 + k; //Read 8 cols of B columns of Block-to-be-solved mat_b_rearr[i2][0] = _mm256_loadu_ps((float const *)ptr_b + i); mat_b_rearr[i2][1] = _mm256_loadu_ps((float const *)(ptr_b + cs_b + i)); mat_b_rearr[i2][2] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[0] + i)); mat_b_rearr[i2][3] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[1] + i)); mat_b_rearr[i2][4] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[2] + i)); mat_b_rearr[i2][5] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[3] + i)); mat_b_rearr[i2][6] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[4] + i)); mat_b_rearr[i2][7] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[5] + i)); i2++; } i = 0; i2 = 0; for (l = 0; l < j; l += 8) // move across m { //Broadcast A8,0 to A15,0 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + i)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + i + 1)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + i + 2)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + i + 3)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + i + 4)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l + i + 5)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l + i + 6)); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); //Broadcast A21 to A71 to registers mat_a_blk_elems[8] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + i)); mat_a_blk_elems[9] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + i + 1)); mat_a_blk_elems[10] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + i + 2)); mat_a_blk_elems[11] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + i + 3)); mat_a_blk_elems[12] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + i + 4)); mat_a_blk_elems[13] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + i + 5)); mat_a_blk_elems[14] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + i + 6)); mat_a_blk_elems[15] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + i + 7)); //Broadcast A8,2 to A15,2 to registers mat_a_blk_elems[16] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + i)); mat_a_blk_elems[17] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + i + 1)); mat_a_blk_elems[18] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + i + 2)); mat_a_blk_elems[19] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + i + 3)); mat_a_blk_elems[20] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + i + 4)); mat_a_blk_elems[21] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + i + 5)); mat_a_blk_elems[22] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + i + 6)); mat_a_blk_elems[23] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + i + 7)); //Broadcast A8,3 to A15,3 to registers mat_a_blk_elems[24] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + i)); mat_a_blk_elems[25] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + i + 1)); mat_a_blk_elems[26] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + i + 2)); mat_a_blk_elems[27] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + i + 3)); mat_a_blk_elems[28] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + i + 4)); mat_a_blk_elems[29] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + i + 5)); mat_a_blk_elems[30] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + i + 6)); mat_a_blk_elems[31] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + i + 7)); // _mm256_permute2f128_ps() //Broadcast A8,4 to A15,4 to registers mat_a_blk_elems[32] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + i)); mat_a_blk_elems[33] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + i + 1)); mat_a_blk_elems[34] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + i + 2)); mat_a_blk_elems[35] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + i + 3)); mat_a_blk_elems[36] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + i + 4)); mat_a_blk_elems[37] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + i + 5)); mat_a_blk_elems[38] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + i + 6)); mat_a_blk_elems[39] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + i + 7)); //Broadcast A8,5 to A15,5 to registers mat_a_blk_elems[40] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + i)); mat_a_blk_elems[41] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + i + 1)); mat_a_blk_elems[42] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + i + 2)); mat_a_blk_elems[43] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + i + 3)); mat_a_blk_elems[44] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + i + 4)); mat_a_blk_elems[45] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + i + 5)); mat_a_blk_elems[46] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + i + 6)); mat_a_blk_elems[47] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + i + 7)); //Broadcast A8,6 to A15,6 to registers mat_a_blk_elems[48] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[4] + i)); mat_a_blk_elems[49] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[4] + i + 1)); mat_a_blk_elems[50] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[4] + i + 2)); mat_a_blk_elems[51] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[4] + i + 3)); mat_a_blk_elems[52] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[4] + i + 4)); mat_a_blk_elems[53] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[4] + i + 5)); mat_a_blk_elems[54] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[4] + i + 6)); mat_a_blk_elems[55] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[4] + i + 7)); //Broadcast A8,7 to A15,7 to registers mat_a_blk_elems[56] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[5] + i)); mat_a_blk_elems[57] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[5] + i + 1)); mat_a_blk_elems[58] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[5] + i + 2)); mat_a_blk_elems[59] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[5] + i + 3)); mat_a_blk_elems[60] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[5] + i + 4)); mat_a_blk_elems[61] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[5] + i + 5)); mat_a_blk_elems[62] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[5] + i + 6)); mat_a_blk_elems[63] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[5] + i + 7)); i += cs_l_offset[6]; for (k = 0; k < numCols_b; k += 8) // move across n for the same value of l (index of m) { /////////////////// Partial Lower 8x8 block trsm of B i4 = i2 + k; //Read current 8 cols of B columns from specified 8x8 current-block of B mat_b_col[0] = _mm256_loadu_ps((float const *)ptr_b + i4); mat_b_col[1] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b)); mat_b_col[2] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[0])); mat_b_col[3] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[1])); mat_b_col[4] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[2])); mat_b_col[5] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[3])); mat_b_col[6] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[4])); mat_b_col[7] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[5])); i4 = k >> 3; //(Row8): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[i4][0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[0], mat_b_rearr[i4][0]);//d = c - (a*b) mat_b_rearr[i4][1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[0], mat_b_rearr[i4][1]);//d = c - (a*b) mat_b_rearr[i4][2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[0], mat_b_rearr[i4][2]);//d = c - (a*b) mat_b_rearr[i4][3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[0], mat_b_rearr[i4][3]);//d = c - (a*b) mat_b_rearr[i4][4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[0], mat_b_rearr[i4][4]);//d = c - (a*b) mat_b_rearr[i4][5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[0], mat_b_rearr[i4][5]);//d = c - (a*b) mat_b_rearr[i4][6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[0], mat_b_rearr[i4][6]);//d = c - (a*b) mat_b_rearr[i4][7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[0], mat_b_rearr[i4][7]);//d = c - (a*b) //(Row9): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[i4][0] = _mm256_fnmadd_ps(mat_a_blk_elems[8], mat_b_col[1], mat_b_rearr[i4][0]);//d = c - (a*b) mat_b_rearr[i4][1] = _mm256_fnmadd_ps(mat_a_blk_elems[9], mat_b_col[1], mat_b_rearr[i4][1]);//d = c - (a*b) mat_b_rearr[i4][2] = _mm256_fnmadd_ps(mat_a_blk_elems[10], mat_b_col[1], mat_b_rearr[i4][2]);//d = c - (a*b) mat_b_rearr[i4][3] = _mm256_fnmadd_ps(mat_a_blk_elems[11], mat_b_col[1], mat_b_rearr[i4][3]);//d = c - (a*b) mat_b_rearr[i4][4] = _mm256_fnmadd_ps(mat_a_blk_elems[12], mat_b_col[1], mat_b_rearr[i4][4]);//d = c - (a*b) mat_b_rearr[i4][5] = _mm256_fnmadd_ps(mat_a_blk_elems[13], mat_b_col[1], mat_b_rearr[i4][5]);//d = c - (a*b) mat_b_rearr[i4][6] = _mm256_fnmadd_ps(mat_a_blk_elems[14], mat_b_col[1], mat_b_rearr[i4][6]);//d = c - (a*b) mat_b_rearr[i4][7] = _mm256_fnmadd_ps(mat_a_blk_elems[15], mat_b_col[1], mat_b_rearr[i4][7]);//d = c - (a*b) //(Row10): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[i4][0] = _mm256_fnmadd_ps(mat_a_blk_elems[16], mat_b_col[2], mat_b_rearr[i4][0]);//d = c - (a*b) mat_b_rearr[i4][1] = _mm256_fnmadd_ps(mat_a_blk_elems[17], mat_b_col[2], mat_b_rearr[i4][1]);//d = c - (a*b) mat_b_rearr[i4][2] = _mm256_fnmadd_ps(mat_a_blk_elems[18], mat_b_col[2], mat_b_rearr[i4][2]);//d = c - (a*b) mat_b_rearr[i4][3] = _mm256_fnmadd_ps(mat_a_blk_elems[19], mat_b_col[2], mat_b_rearr[i4][3]);//d = c - (a*b) mat_b_rearr[i4][4] = _mm256_fnmadd_ps(mat_a_blk_elems[20], mat_b_col[2], mat_b_rearr[i4][4]);//d = c - (a*b) mat_b_rearr[i4][5] = _mm256_fnmadd_ps(mat_a_blk_elems[21], mat_b_col[2], mat_b_rearr[i4][5]);//d = c - (a*b) mat_b_rearr[i4][6] = _mm256_fnmadd_ps(mat_a_blk_elems[22], mat_b_col[2], mat_b_rearr[i4][6]);//d = c - (a*b) mat_b_rearr[i4][7] = _mm256_fnmadd_ps(mat_a_blk_elems[23], mat_b_col[2], mat_b_rearr[i4][7]);//d = c - (a*b) //(Row11): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[i4][0] = _mm256_fnmadd_ps(mat_a_blk_elems[24], mat_b_col[3], mat_b_rearr[i4][0]);//d = c - (a*b) mat_b_rearr[i4][1] = _mm256_fnmadd_ps(mat_a_blk_elems[25], mat_b_col[3], mat_b_rearr[i4][1]);//d = c - (a*b) mat_b_rearr[i4][2] = _mm256_fnmadd_ps(mat_a_blk_elems[26], mat_b_col[3], mat_b_rearr[i4][2]);//d = c - (a*b) mat_b_rearr[i4][3] = _mm256_fnmadd_ps(mat_a_blk_elems[27], mat_b_col[3], mat_b_rearr[i4][3]);//d = c - (a*b) mat_b_rearr[i4][4] = _mm256_fnmadd_ps(mat_a_blk_elems[28], mat_b_col[3], mat_b_rearr[i4][4]);//d = c - (a*b) mat_b_rearr[i4][5] = _mm256_fnmadd_ps(mat_a_blk_elems[29], mat_b_col[3], mat_b_rearr[i4][5]);//d = c - (a*b) mat_b_rearr[i4][6] = _mm256_fnmadd_ps(mat_a_blk_elems[30], mat_b_col[3], mat_b_rearr[i4][6]);//d = c - (a*b) mat_b_rearr[i4][7] = _mm256_fnmadd_ps(mat_a_blk_elems[31], mat_b_col[3], mat_b_rearr[i4][7]);//d = c - (a*b) //(Row12): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[i4][0] = _mm256_fnmadd_ps(mat_a_blk_elems[32], mat_b_col[4], mat_b_rearr[i4][0]);//d = c - (a*b) mat_b_rearr[i4][1] = _mm256_fnmadd_ps(mat_a_blk_elems[33], mat_b_col[4], mat_b_rearr[i4][1]);//d = c - (a*b) mat_b_rearr[i4][2] = _mm256_fnmadd_ps(mat_a_blk_elems[34], mat_b_col[4], mat_b_rearr[i4][2]);//d = c - (a*b) mat_b_rearr[i4][3] = _mm256_fnmadd_ps(mat_a_blk_elems[35], mat_b_col[4], mat_b_rearr[i4][3]);//d = c - (a*b) mat_b_rearr[i4][4] = _mm256_fnmadd_ps(mat_a_blk_elems[36], mat_b_col[4], mat_b_rearr[i4][4]);//d = c - (a*b) mat_b_rearr[i4][5] = _mm256_fnmadd_ps(mat_a_blk_elems[37], mat_b_col[4], mat_b_rearr[i4][5]);//d = c - (a*b) mat_b_rearr[i4][6] = _mm256_fnmadd_ps(mat_a_blk_elems[38], mat_b_col[4], mat_b_rearr[i4][6]);//d = c - (a*b) mat_b_rearr[i4][7] = _mm256_fnmadd_ps(mat_a_blk_elems[39], mat_b_col[4], mat_b_rearr[i4][7]);//d = c - (a*b) //(Row13): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[i4][0] = _mm256_fnmadd_ps(mat_a_blk_elems[40], mat_b_col[5], mat_b_rearr[i4][0]);//d = c - (a*b) mat_b_rearr[i4][1] = _mm256_fnmadd_ps(mat_a_blk_elems[41], mat_b_col[5], mat_b_rearr[i4][1]);//d = c - (a*b) mat_b_rearr[i4][2] = _mm256_fnmadd_ps(mat_a_blk_elems[42], mat_b_col[5], mat_b_rearr[i4][2]);//d = c - (a*b) mat_b_rearr[i4][3] = _mm256_fnmadd_ps(mat_a_blk_elems[43], mat_b_col[5], mat_b_rearr[i4][3]);//d = c - (a*b) mat_b_rearr[i4][4] = _mm256_fnmadd_ps(mat_a_blk_elems[44], mat_b_col[5], mat_b_rearr[i4][4]);//d = c - (a*b) mat_b_rearr[i4][5] = _mm256_fnmadd_ps(mat_a_blk_elems[45], mat_b_col[5], mat_b_rearr[i4][5]);//d = c - (a*b) mat_b_rearr[i4][6] = _mm256_fnmadd_ps(mat_a_blk_elems[46], mat_b_col[5], mat_b_rearr[i4][6]);//d = c - (a*b) mat_b_rearr[i4][7] = _mm256_fnmadd_ps(mat_a_blk_elems[47], mat_b_col[5], mat_b_rearr[i4][7]);//d = c - (a*b) //(Row14): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[i4][0] = _mm256_fnmadd_ps(mat_a_blk_elems[48], mat_b_col[6], mat_b_rearr[i4][0]);//d = c - (a*b) mat_b_rearr[i4][1] = _mm256_fnmadd_ps(mat_a_blk_elems[49], mat_b_col[6], mat_b_rearr[i4][1]);//d = c - (a*b) mat_b_rearr[i4][2] = _mm256_fnmadd_ps(mat_a_blk_elems[50], mat_b_col[6], mat_b_rearr[i4][2]);//d = c - (a*b) mat_b_rearr[i4][3] = _mm256_fnmadd_ps(mat_a_blk_elems[51], mat_b_col[6], mat_b_rearr[i4][3]);//d = c - (a*b) mat_b_rearr[i4][4] = _mm256_fnmadd_ps(mat_a_blk_elems[52], mat_b_col[6], mat_b_rearr[i4][4]);//d = c - (a*b) mat_b_rearr[i4][5] = _mm256_fnmadd_ps(mat_a_blk_elems[53], mat_b_col[6], mat_b_rearr[i4][5]);//d = c - (a*b) mat_b_rearr[i4][6] = _mm256_fnmadd_ps(mat_a_blk_elems[54], mat_b_col[6], mat_b_rearr[i4][6]);//d = c - (a*b) mat_b_rearr[i4][7] = _mm256_fnmadd_ps(mat_a_blk_elems[55], mat_b_col[6], mat_b_rearr[i4][7]);//d = c - (a*b) //(Row15): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[i4][0] = _mm256_fnmadd_ps(mat_a_blk_elems[56], mat_b_col[7], mat_b_rearr[i4][0]);//d = c - (a*b) mat_b_rearr[i4][1] = _mm256_fnmadd_ps(mat_a_blk_elems[57], mat_b_col[7], mat_b_rearr[i4][1]);//d = c - (a*b) mat_b_rearr[i4][2] = _mm256_fnmadd_ps(mat_a_blk_elems[58], mat_b_col[7], mat_b_rearr[i4][2]);//d = c - (a*b) mat_b_rearr[i4][3] = _mm256_fnmadd_ps(mat_a_blk_elems[59], mat_b_col[7], mat_b_rearr[i4][3]);//d = c - (a*b) mat_b_rearr[i4][4] = _mm256_fnmadd_ps(mat_a_blk_elems[60], mat_b_col[7], mat_b_rearr[i4][4]);//d = c - (a*b) mat_b_rearr[i4][5] = _mm256_fnmadd_ps(mat_a_blk_elems[61], mat_b_col[7], mat_b_rearr[i4][5]);//d = c - (a*b) mat_b_rearr[i4][6] = _mm256_fnmadd_ps(mat_a_blk_elems[62], mat_b_col[7], mat_b_rearr[i4][6]);//d = c - (a*b) mat_b_rearr[i4][7] = _mm256_fnmadd_ps(mat_a_blk_elems[63], mat_b_col[7], mat_b_rearr[i4][7]);//d = c - (a*b) //end loop of cols } i2 += cs_b_offset[6]; } //Broadcast A10 to A70 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + i + 1)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + i + 2)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + i + 3)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + i + 4)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + i + 5)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l + i + 6)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); i += cs_l; //extract diag a00 from a mat_a_diag_inv[0] = _mm256_permute_ps(reciprocal_diags[0], 0x00); mat_a_diag_inv[0] = _mm256_permute2f128_ps(mat_a_diag_inv[0], mat_a_diag_inv[0], 0x00); //mat_a_diag_inv2[0] = _mm256_unpacklo_ps(mat_a_diag_inv2[0], mat_a_diag_inv2[0]); //Broadcast A21 to A71 to registers mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l + i + 2)); mat_a_blk_elems[8] = _mm256_broadcast_ss((float const *)(ptr_l + i + 3)); mat_a_blk_elems[9] = _mm256_broadcast_ss((float const *)(ptr_l + i + 4)); mat_a_blk_elems[10] = _mm256_broadcast_ss((float const *)(ptr_l + i + 5)); mat_a_blk_elems[11] = _mm256_broadcast_ss((float const *)(ptr_l + i + 6)); mat_a_blk_elems[12] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); i += cs_l; //extract diag a11 from a mat_a_diag_inv[1] = _mm256_permute_ps(reciprocal_diags[0], 0x55); mat_a_diag_inv[1] = _mm256_permute2f128_ps(mat_a_diag_inv[1], mat_a_diag_inv[1], 0x00); //mat_a_diag_inv[1] = _mm256_unpacklo_ps(mat_a_diag_inv[1], mat_a_diag_inv[1]); //Broadcast A32 to A72 to registers mat_a_blk_elems[13] = _mm256_broadcast_ss((float const *)(ptr_l + i + 3)); mat_a_blk_elems[14] = _mm256_broadcast_ss((float const *)(ptr_l + i + 4)); mat_a_blk_elems[15] = _mm256_broadcast_ss((float const *)(ptr_l + i + 5)); mat_a_blk_elems[16] = _mm256_broadcast_ss((float const *)(ptr_l + i + 6)); mat_a_blk_elems[17] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); i += cs_l; //extract diag a22 from a mat_a_diag_inv[2] = _mm256_permute_ps(reciprocal_diags[0], 0xAA); mat_a_diag_inv[2] = _mm256_permute2f128_ps(mat_a_diag_inv[2], mat_a_diag_inv[2], 0x00); //mat_a_diag_inv[2] = _mm256_unpacklo_ps(mat_a_diag_inv[2], mat_a_diag_inv[2]); //Broadcast A43 to A73 to registers mat_a_blk_elems[18] = _mm256_broadcast_ss((float const *)(ptr_l + i + 4)); mat_a_blk_elems[19] = _mm256_broadcast_ss((float const *)(ptr_l + i + 5)); mat_a_blk_elems[20] = _mm256_broadcast_ss((float const *)(ptr_l + i + 6)); mat_a_blk_elems[21] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); i += cs_l; //extract diag a33 from a mat_a_diag_inv[3] = _mm256_permute_ps(reciprocal_diags[0], 0xFF); mat_a_diag_inv[3] = _mm256_permute2f128_ps(mat_a_diag_inv[3], mat_a_diag_inv[3], 0x00); //mat_a_diag_inv[3] = _mm256_unpacklo_ps(mat_a_diag_inv[3], mat_a_diag_inv[3]); //Broadcast A54 to A74 to registers mat_a_blk_elems[22] = _mm256_broadcast_ss((float const *)(ptr_l + i + 5)); mat_a_blk_elems[23] = _mm256_broadcast_ss((float const *)(ptr_l + i + 6)); mat_a_blk_elems[24] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); i += cs_l; //extract diag a44 from a mat_a_diag_inv[4] = _mm256_permute_ps(reciprocal_diags[0], 0x00); mat_a_diag_inv[4] = _mm256_permute2f128_ps(mat_a_diag_inv[4], mat_a_diag_inv[4], 0x11); //mat_a_diag_inv[4] = _mm256_unpacklo_ps(mat_a_diag_inv[4], mat_a_diag_inv[4]); //Broadcast A65 to A75 to registers mat_a_blk_elems[25] = _mm256_broadcast_ss((float const *)(ptr_l + i + 6)); mat_a_blk_elems[26] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); i += cs_l; //extract diag a55 from a mat_a_diag_inv[5] = _mm256_permute_ps(reciprocal_diags[0], 0x55); mat_a_diag_inv[5] = _mm256_permute2f128_ps(mat_a_diag_inv[5], mat_a_diag_inv[5], 0x11); //mat_a_diag_inv[5] = _mm256_unpacklo_ps(mat_a_diag_inv[5], mat_a_diag_inv[5]); //Broadcast A76 to register mat_a_blk_elems[27] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); //extract diag a66 from a mat_a_diag_inv[6] = _mm256_permute_ps(reciprocal_diags[0], 0xAA); mat_a_diag_inv[6] = _mm256_permute2f128_ps(mat_a_diag_inv[6], mat_a_diag_inv[6], 0x11); //mat_a_diag_inv[6] = _mm256_unpacklo_ps(mat_a_diag_inv[6], mat_a_diag_inv[6]); //extract diag a77 from a mat_a_diag_inv[7] = _mm256_permute_ps(reciprocal_diags[0], 0xFF); mat_a_diag_inv[7] = _mm256_permute2f128_ps(mat_a_diag_inv[7], mat_a_diag_inv[7], 0x11); //mat_a_diag_inv[7] = _mm256_unpacklo_ps(mat_a_diag_inv[7], mat_a_diag_inv[7]); k = 0; for (i = 0; i < numCols_b; i+=8) { /////////////////// Complete Lower 8x8 block trsm of B :- lower 8x8 block of B with lower right 8x8 block of A //(Row0): Perform mul operation of reciprocal of L(0,0) element with 1st row elements of B mat_b_rearr[k][0] = _mm256_mul_ps(mat_b_rearr[k][0], mat_a_diag_inv[0]); //(Row1): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[k][1] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[k][0], mat_b_rearr[k][1]);//d = c - (a*b) mat_b_rearr[k][2] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[k][0], mat_b_rearr[k][2]);//d = c - (a*b) mat_b_rearr[k][3] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_rearr[k][0], mat_b_rearr[k][3]);//d = c - (a*b) mat_b_rearr[k][4] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_rearr[k][0], mat_b_rearr[k][4]);//d = c - (a*b) mat_b_rearr[k][5] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_rearr[k][0], mat_b_rearr[k][5]);//d = c - (a*b) mat_b_rearr[k][6] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_rearr[k][0], mat_b_rearr[k][6]);//d = c - (a*b) mat_b_rearr[k][7] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_rearr[k][0], mat_b_rearr[k][7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(1,1) element with 2nd row elements of B mat_b_rearr[k][1] = _mm256_mul_ps(mat_b_rearr[k][1], mat_a_diag_inv[1]); //(Row2): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[k][2] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_rearr[k][1], mat_b_rearr[k][2]);//d = c - (a*b) mat_b_rearr[k][3] = _mm256_fnmadd_ps(mat_a_blk_elems[8], mat_b_rearr[k][1], mat_b_rearr[k][3]);//d = c - (a*b) mat_b_rearr[k][4] = _mm256_fnmadd_ps(mat_a_blk_elems[9], mat_b_rearr[k][1], mat_b_rearr[k][4]);//d = c - (a*b) mat_b_rearr[k][5] = _mm256_fnmadd_ps(mat_a_blk_elems[10], mat_b_rearr[k][1], mat_b_rearr[k][5]);//d = c - (a*b) mat_b_rearr[k][6] = _mm256_fnmadd_ps(mat_a_blk_elems[11], mat_b_rearr[k][1], mat_b_rearr[k][6]);//d = c - (a*b) mat_b_rearr[k][7] = _mm256_fnmadd_ps(mat_a_blk_elems[12], mat_b_rearr[k][1], mat_b_rearr[k][7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(2, 2) element with 3rd row elements of B mat_b_rearr[k][2] = _mm256_mul_ps(mat_b_rearr[k][2], mat_a_diag_inv[2]); //(Row3): FMA operations of b3 with elements of indices from (3, 0) uptill (7, 0) mat_b_rearr[k][3] = _mm256_fnmadd_ps(mat_a_blk_elems[13], mat_b_rearr[k][2], mat_b_rearr[k][3]);//d = c - (a*b) mat_b_rearr[k][4] = _mm256_fnmadd_ps(mat_a_blk_elems[14], mat_b_rearr[k][2], mat_b_rearr[k][4]);//d = c - (a*b) mat_b_rearr[k][5] = _mm256_fnmadd_ps(mat_a_blk_elems[15], mat_b_rearr[k][2], mat_b_rearr[k][5]);//d = c - (a*b) mat_b_rearr[k][6] = _mm256_fnmadd_ps(mat_a_blk_elems[16], mat_b_rearr[k][2], mat_b_rearr[k][6]);//d = c - (a*b) mat_b_rearr[k][7] = _mm256_fnmadd_ps(mat_a_blk_elems[17], mat_b_rearr[k][2], mat_b_rearr[k][7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(3, 3) element with 4rth row elements of B mat_b_rearr[k][3] = _mm256_mul_ps(mat_b_rearr[k][3], mat_a_diag_inv[3]); //(Row4): FMA operations of b4 with elements of indices from (4, 0) uptill (7, 0) mat_b_rearr[k][4] = _mm256_fnmadd_ps(mat_a_blk_elems[18], mat_b_rearr[k][3], mat_b_rearr[k][4]);//d = c - (a*b) mat_b_rearr[k][5] = _mm256_fnmadd_ps(mat_a_blk_elems[19], mat_b_rearr[k][3], mat_b_rearr[k][5]);//d = c - (a*b) mat_b_rearr[k][6] = _mm256_fnmadd_ps(mat_a_blk_elems[20], mat_b_rearr[k][3], mat_b_rearr[k][6]);//d = c - (a*b) mat_b_rearr[k][7] = _mm256_fnmadd_ps(mat_a_blk_elems[21], mat_b_rearr[k][3], mat_b_rearr[k][7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(4, 4) element with 4rth row elements of B mat_b_rearr[k][4] = _mm256_mul_ps(mat_b_rearr[k][4], mat_a_diag_inv[4]); //(Row5): FMA operations of b5 with elements of indices from (5, 0) uptill (7, 0) mat_b_rearr[k][5] = _mm256_fnmadd_ps(mat_a_blk_elems[22], mat_b_rearr[k][4], mat_b_rearr[k][5]);//d = c - (a*b) mat_b_rearr[k][6] = _mm256_fnmadd_ps(mat_a_blk_elems[23], mat_b_rearr[k][4], mat_b_rearr[k][6]);//d = c - (a*b) mat_b_rearr[k][7] = _mm256_fnmadd_ps(mat_a_blk_elems[24], mat_b_rearr[k][4], mat_b_rearr[k][7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(5, 5) element with 5th row elements of B mat_b_rearr[k][5] = _mm256_mul_ps(mat_b_rearr[k][5], mat_a_diag_inv[5]); //(Row6): FMA operations of b6 with elements of indices from (6, 0) uptill (7, 0) mat_b_rearr[k][6] = _mm256_fnmadd_ps(mat_a_blk_elems[25], mat_b_rearr[k][5], mat_b_rearr[k][6]);//d = c - (a*b) mat_b_rearr[k][7] = _mm256_fnmadd_ps(mat_a_blk_elems[26], mat_b_rearr[k][5], mat_b_rearr[k][7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(6, 6) element with 6th row elements of B mat_b_rearr[k][6] = _mm256_mul_ps(mat_b_rearr[k][6], mat_a_diag_inv[6]); //(Row7): FMA operations of b7 with elements of index (7, 0) mat_b_rearr[k][7] = _mm256_fnmadd_ps(mat_a_blk_elems[27], mat_b_rearr[k][6], mat_b_rearr[k][7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(7, 7) element with 7th row elements of B mat_b_rearr[k][7] = _mm256_mul_ps(mat_b_rearr[k][7], mat_a_diag_inv[7]); //////////////////////////////////////////////////////////////////////////////// //Store the computed B columns _mm256_storeu_ps((float *)ptr_b_dup + i, mat_b_rearr[k][0]); _mm256_storeu_ps((float *)(ptr_b_dup + (cs_b) + i), mat_b_rearr[k][1]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[0] + i), mat_b_rearr[k][2]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[1] + i), mat_b_rearr[k][3]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[2] + i), mat_b_rearr[k][4]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[3] + i), mat_b_rearr[k][5]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[4] + i), mat_b_rearr[k][6]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[5] + i), mat_b_rearr[k][7]); //printf("writing B => m[%d], n[%d], [%f]\n", j, k, *(ptr_b_dup + k)); k++; } } ///////////////////loop ends ///////////////////// } static void trsm_XAtB_block_allSmallSizedMatrices_alpha(float *ptr_l, float *ptr_b, int numRows_lb, int numCols_b, int rs_l, int rs_b, int cs_l, int cs_b, float alpha) { float ones = 1.0; int i, i1, i2, i3, i4, j, k, l; int cs_b_offset[7]; int cs_l_offset[7]; float *ptr_b_dup; //57 number of ymm(256 bits) registers used __m256 mat_b_col[8]; __m256 mat_b_rearr[16][8]; __m256 mat_a_cols_rearr[8]; __m256 mat_a_blk_elems[64]; __m256 mat_a_diag_inv[8]; __m256 reciprocal_diags[2]; __m256 alphaReg; reciprocal_diags[0] = _mm256_broadcast_ss((float const *)(&ones)); alphaReg = _mm256_broadcast_ss((float const *)&alpha); // ---> considering that the matrix size is multiple of 16 rows and 8 cols <--- // //L matrix offsets cs_l_offset[0] = (cs_l << 1); cs_l_offset[1] = cs_l + cs_l_offset[0]; cs_l_offset[2] = (cs_l << 2); cs_l_offset[3] = cs_l + cs_l_offset[2]; cs_l_offset[4] = cs_l_offset[0] + cs_l_offset[2]; cs_l_offset[5] = cs_l + cs_l_offset[4]; cs_l_offset[6] = (cs_l_offset[5] + cs_l); //read diag elems of L 16x16 block mat_a_cols_rearr[0] = _mm256_loadu_ps((float const *)ptr_l); mat_a_cols_rearr[1] = _mm256_loadu_ps((float const *)ptr_l + cs_l); mat_a_cols_rearr[2] = _mm256_loadu_ps((float const *)ptr_l + cs_l_offset[0]); mat_a_cols_rearr[3] = _mm256_loadu_ps((float const *)ptr_l + cs_l_offset[1]); mat_a_cols_rearr[4] = _mm256_loadu_ps((float const *)ptr_l + cs_l_offset[2]); mat_a_cols_rearr[5] = _mm256_loadu_ps((float const *)ptr_l + cs_l_offset[3]); mat_a_cols_rearr[6] = _mm256_loadu_ps((float const *)ptr_l + cs_l_offset[4]); mat_a_cols_rearr[7] = _mm256_loadu_ps((float const *)ptr_l + cs_l_offset[5]); cs_b_offset[0] = (cs_b << 1); cs_b_offset[1] = cs_b + cs_b_offset[0]; cs_b_offset[2] = (cs_b << 2); cs_b_offset[3] = cs_b + cs_b_offset[2]; cs_b_offset[4] = cs_b_offset[0] + cs_b_offset[2]; cs_b_offset[5] = cs_b + cs_b_offset[4]; cs_b_offset[6] = (cs_b_offset[5] + cs_b); reciprocal_diags[1] = reciprocal_diags[0]; //pack first 8 diags together mat_a_diag_inv[0] = _mm256_blend_ps(mat_a_cols_rearr[0], mat_a_cols_rearr[1], 0xAA);//diag 0,1 mat_a_diag_inv[1] = _mm256_blend_ps(mat_a_cols_rearr[2], mat_a_cols_rearr[3], 0xAA);//diag 2,3 mat_a_diag_inv[2] = _mm256_blend_ps(mat_a_cols_rearr[4], mat_a_cols_rearr[5], 0xAA);//diag 4,5 mat_a_diag_inv[3] = _mm256_blend_ps(mat_a_cols_rearr[6], mat_a_cols_rearr[7], 0xAA);//diag 6,7 mat_a_diag_inv[0] = _mm256_blend_ps(mat_a_diag_inv[0], mat_a_diag_inv[1], 0xCC);//diag 0,1,2,3 mat_a_diag_inv[2] = _mm256_blend_ps(mat_a_diag_inv[2], mat_a_diag_inv[3], 0xCC);//diag 4,5,6,7 mat_a_diag_inv[0] = _mm256_blend_ps(mat_a_diag_inv[0], mat_a_diag_inv[2], 0xF0);//diag 0,1,2,3,4,5,6,7 //reciprocal of diagnal elements 0,1,2,3,4,5,6,7 reciprocal_diags[0] = _mm256_div_ps(reciprocal_diags[0], mat_a_diag_inv[0]); //Broadcast A10 to A70 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + 1)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + 2)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + 3)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + 4)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + 5)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l + 6)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l + 7)); //Broadcast A21 to A71 to registers mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 2)); mat_a_blk_elems[8] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 3)); mat_a_blk_elems[9] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 4)); mat_a_blk_elems[10] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 5)); mat_a_blk_elems[11] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 6)); mat_a_blk_elems[12] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 7)); //Broadcast A32 to A72 to registers mat_a_blk_elems[13] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 3)); mat_a_blk_elems[14] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 4)); mat_a_blk_elems[15] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 5)); mat_a_blk_elems[16] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 6)); mat_a_blk_elems[17] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 7)); //Broadcast A43 to A73 to registers mat_a_blk_elems[18] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 4)); mat_a_blk_elems[19] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 5)); mat_a_blk_elems[20] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 6)); mat_a_blk_elems[21] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 7)); //Broadcast A54 to A74 to registers mat_a_blk_elems[22] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + 5)); mat_a_blk_elems[23] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + 6)); mat_a_blk_elems[24] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + 7)); //Broadcast A65 to A75 to registers mat_a_blk_elems[25] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + 6)); mat_a_blk_elems[26] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + 7)); //Broadcast A76 to register mat_a_blk_elems[27] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[4] + 7)); //extract diag a00 from a mat_a_diag_inv[0] = _mm256_permute_ps(reciprocal_diags[0], 0x00); mat_a_diag_inv[0] = _mm256_permute2f128_ps(mat_a_diag_inv[0], mat_a_diag_inv[0], 0x00); //mat_a_diag_inv[0] = _mm256_unpacklo_ps(mat_a_diag_inv[0], mat_a_diag_inv[0]); //extract diag a11 from a mat_a_diag_inv[1] = _mm256_permute_ps(reciprocal_diags[0], 0x55); mat_a_diag_inv[1] = _mm256_permute2f128_ps(mat_a_diag_inv[1], mat_a_diag_inv[1], 0x00); //mat_a_diag_inv[1] = _mm256_unpacklo_ps(mat_a_diag_inv[1], mat_a_diag_inv[1]); //extract diag a22 from a mat_a_diag_inv[2] = _mm256_permute_ps(reciprocal_diags[0], 0xAA); mat_a_diag_inv[2] = _mm256_permute2f128_ps(mat_a_diag_inv[2], mat_a_diag_inv[2], 0x00); //mat_a_diag_inv[2] = _mm256_unpacklo_ps(mat_a_diag_inv[2], mat_a_diag_inv[2]); //extract diag a33 from a mat_a_diag_inv[3] = _mm256_permute_ps(reciprocal_diags[0], 0xFF); mat_a_diag_inv[3] = _mm256_permute2f128_ps(mat_a_diag_inv[3], mat_a_diag_inv[3], 0x00); //mat_a_diag_inv[3] = _mm256_unpacklo_ps(mat_a_diag_inv[3], mat_a_diag_inv[3]); //extract diag a44 from a mat_a_diag_inv[4] = _mm256_permute_ps(reciprocal_diags[0], 0x00); mat_a_diag_inv[4] = _mm256_permute2f128_ps(mat_a_diag_inv[4], mat_a_diag_inv[4], 0x11); //mat_a_diag_inv[4] = _mm256_unpacklo_ps(mat_a_diag_inv[4], mat_a_diag_inv[4]); //extract diag a55 from a mat_a_diag_inv[5] = _mm256_permute_ps(reciprocal_diags[0], 0x55); mat_a_diag_inv[5] = _mm256_permute2f128_ps(mat_a_diag_inv[5], mat_a_diag_inv[5], 0x11); //mat_a_diag_inv[5] = _mm256_unpacklo_ps(mat_a_diag_inv[5], mat_a_diag_inv[5]); //extract diag a66 from a mat_a_diag_inv[6] = _mm256_permute_ps(reciprocal_diags[0], 0xAA); mat_a_diag_inv[6] = _mm256_permute2f128_ps(mat_a_diag_inv[6], mat_a_diag_inv[6], 0x11); //mat_a_diag_inv[6] = _mm256_unpacklo_ps(mat_a_diag_inv[6], mat_a_diag_inv[6]); //extract diag a77 from a mat_a_diag_inv[7] = _mm256_permute_ps(reciprocal_diags[0], 0xFF); mat_a_diag_inv[7] = _mm256_permute2f128_ps(mat_a_diag_inv[7], mat_a_diag_inv[7], 0x11); //mat_a_diag_inv[7] = _mm256_unpacklo_ps(mat_a_diag_inv[7], mat_a_diag_inv[7]); /***************** first set of 8 rows of B processing starts *****************/ ptr_b_dup = ptr_b; i = 0; for (j = 0; j < numCols_b; j += 8) { /////////////////// Complete Upper 8x8 block trsm of B :- upper 8x8 block of B with upper 8x8 block of A //read 8x8 block of B into registers mat_b_rearr[0][0] = _mm256_loadu_ps((float const *)ptr_b + i); mat_b_rearr[1][0] = _mm256_loadu_ps((float const *)(ptr_b + cs_b + i)); mat_b_rearr[2][0] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[0] + i)); mat_b_rearr[3][0] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[1] + i)); mat_b_rearr[4][0] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[2] + i)); mat_b_rearr[5][0] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[3] + i)); mat_b_rearr[6][0] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[4] + i)); mat_b_rearr[7][0] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[5] + i)); mat_b_rearr[0][0] = _mm256_mul_ps(mat_b_rearr[0][0], alphaReg); mat_b_rearr[1][0] = _mm256_mul_ps(mat_b_rearr[1][0], alphaReg); mat_b_rearr[2][0] = _mm256_mul_ps(mat_b_rearr[2][0], alphaReg); mat_b_rearr[3][0] = _mm256_mul_ps(mat_b_rearr[3][0], alphaReg); mat_b_rearr[4][0] = _mm256_mul_ps(mat_b_rearr[4][0], alphaReg); mat_b_rearr[5][0] = _mm256_mul_ps(mat_b_rearr[5][0], alphaReg); mat_b_rearr[6][0] = _mm256_mul_ps(mat_b_rearr[6][0], alphaReg); mat_b_rearr[7][0] = _mm256_mul_ps(mat_b_rearr[7][0], alphaReg); //(Row0): Perform mul operation of reciprocal of L(0,0) element with 1st row elements of B mat_b_col[0] = _mm256_mul_ps(mat_b_rearr[0][0], mat_a_diag_inv[0]); //(Row1): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[1][0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[0], mat_b_rearr[1][0]);//d = c - (a*b) mat_b_rearr[2][0] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[0], mat_b_rearr[2][0]);//d = c - (a*b) mat_b_rearr[3][0] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[0], mat_b_rearr[3][0]);//d = c - (a*b) mat_b_rearr[4][0] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[0], mat_b_rearr[4][0]);//d = c - (a*b) mat_b_rearr[5][0] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[0], mat_b_rearr[5][0]);//d = c - (a*b) mat_b_rearr[6][0] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[0], mat_b_rearr[6][0]);//d = c - (a*b) mat_b_rearr[7][0] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[0], mat_b_rearr[7][0]);//d = c - (a*b) //Perform mul operation of reciprocal of L(1,1) element with 2nd row elements of B mat_b_col[1] = _mm256_mul_ps(mat_b_rearr[1][0], mat_a_diag_inv[1]); //(Row2): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[2][0] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[1], mat_b_rearr[2][0]);//d = c - (a*b) mat_b_rearr[3][0] = _mm256_fnmadd_ps(mat_a_blk_elems[8], mat_b_col[1], mat_b_rearr[3][0]);//d = c - (a*b) mat_b_rearr[4][0] = _mm256_fnmadd_ps(mat_a_blk_elems[9], mat_b_col[1], mat_b_rearr[4][0]);//d = c - (a*b) mat_b_rearr[5][0] = _mm256_fnmadd_ps(mat_a_blk_elems[10], mat_b_col[1], mat_b_rearr[5][0]);//d = c - (a*b) mat_b_rearr[6][0] = _mm256_fnmadd_ps(mat_a_blk_elems[11], mat_b_col[1], mat_b_rearr[6][0]);//d = c - (a*b) mat_b_rearr[7][0] = _mm256_fnmadd_ps(mat_a_blk_elems[12], mat_b_col[1], mat_b_rearr[7][0]);//d = c - (a*b) //Perform mul operation of reciprocal of L(2, 2) element with 3rd row elements of B mat_b_col[2] = _mm256_mul_ps(mat_b_rearr[2][0], mat_a_diag_inv[2]); //(Row3): FMA operations of b3 with elements of indices from (3, 0) uptill (7, 0) mat_b_rearr[3][0] = _mm256_fnmadd_ps(mat_a_blk_elems[13], mat_b_col[2], mat_b_rearr[3][0]);//d = c - (a*b) mat_b_rearr[4][0] = _mm256_fnmadd_ps(mat_a_blk_elems[14], mat_b_col[2], mat_b_rearr[4][0]);//d = c - (a*b) mat_b_rearr[5][0] = _mm256_fnmadd_ps(mat_a_blk_elems[15], mat_b_col[2], mat_b_rearr[5][0]);//d = c - (a*b) mat_b_rearr[6][0] = _mm256_fnmadd_ps(mat_a_blk_elems[16], mat_b_col[2], mat_b_rearr[6][0]);//d = c - (a*b) mat_b_rearr[7][0] = _mm256_fnmadd_ps(mat_a_blk_elems[17], mat_b_col[2], mat_b_rearr[7][0]);//d = c - (a*b) //Perform mul operation of reciprocal of L(3, 3) element with 4rth row elements of B mat_b_col[3] = _mm256_mul_ps(mat_b_rearr[3][0], mat_a_diag_inv[3]); //(Row4): FMA operations of b4 with elements of indices from (4, 0) uptill (7, 0) mat_b_rearr[4][0] = _mm256_fnmadd_ps(mat_a_blk_elems[18], mat_b_col[3], mat_b_rearr[4][0]);//d = c - (a*b) mat_b_rearr[5][0] = _mm256_fnmadd_ps(mat_a_blk_elems[19], mat_b_col[3], mat_b_rearr[5][0]);//d = c - (a*b) mat_b_rearr[6][0] = _mm256_fnmadd_ps(mat_a_blk_elems[20], mat_b_col[3], mat_b_rearr[6][0]);//d = c - (a*b) mat_b_rearr[7][0] = _mm256_fnmadd_ps(mat_a_blk_elems[21], mat_b_col[3], mat_b_rearr[7][0]);//d = c - (a*b) //Perform mul operation of reciprocal of L(4, 4) element with 4rth row elements of B mat_b_col[4] = _mm256_mul_ps(mat_b_rearr[4][0], mat_a_diag_inv[4]); //(Row5): FMA operations of b5 with elements of indices from (5, 0) uptill (7, 0) mat_b_rearr[5][0] = _mm256_fnmadd_ps(mat_a_blk_elems[22], mat_b_col[4], mat_b_rearr[5][0]);//d = c - (a*b) mat_b_rearr[6][0] = _mm256_fnmadd_ps(mat_a_blk_elems[23], mat_b_col[4], mat_b_rearr[6][0]);//d = c - (a*b) mat_b_rearr[7][0] = _mm256_fnmadd_ps(mat_a_blk_elems[24], mat_b_col[4], mat_b_rearr[7][0]);//d = c - (a*b) //Perform mul operation of reciprocal of L(5, 5) element with 5th row elements of B mat_b_col[5] = _mm256_mul_ps(mat_b_rearr[5][0], mat_a_diag_inv[5]); //(Row6): FMA operations of b6 with elements of indices from (6, 0) uptill (7, 0) mat_b_rearr[6][0] = _mm256_fnmadd_ps(mat_a_blk_elems[25], mat_b_col[5], mat_b_rearr[6][0]);//d = c - (a*b) mat_b_rearr[7][0] = _mm256_fnmadd_ps(mat_a_blk_elems[26], mat_b_col[5], mat_b_rearr[7][0]);//d = c - (a*b) //Perform mul operation of reciprocal of L(6, 6) element with 6th row elements of B mat_b_col[6] = _mm256_mul_ps(mat_b_rearr[6][0], mat_a_diag_inv[6]); //(Row7): FMA operations of b7 with elements of index (7, 0) mat_b_rearr[7][0] = _mm256_fnmadd_ps(mat_a_blk_elems[27], mat_b_col[6], mat_b_rearr[7][0]);//d = c - (a*b) //Perform mul operation of reciprocal of L(7, 7) element with 7th row elements of B mat_b_col[7] = _mm256_mul_ps(mat_b_rearr[7][0], mat_a_diag_inv[7]); //////////////////////////////////////////////////////////////////////////////// //Store the computed B columns _mm256_storeu_ps((float *)ptr_b_dup, mat_b_col[0]); _mm256_storeu_ps((float *)(ptr_b_dup + (cs_b)), mat_b_col[1]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[0]), mat_b_col[2]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[1]), mat_b_col[3]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[2]), mat_b_col[4]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[3]), mat_b_col[5]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[4]), mat_b_col[6]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[5]), mat_b_col[7]); //i += cs_b_offset[6]; //ptr_b_dup += cs_b_offset[6]; i += 8; ptr_b_dup += 8; } //c = 0; /***************** first set of 8 cols of B processing done *****************/ ptr_b_dup = ptr_b; i3 = 0; i1 = 0; //Start loop for cols of B to be processed in size of blk_width for (j = 8; j < numRows_lb; j += 8)//m :- 8x8 block row { ptr_l += 8; //ptr_b += j; //ptr_b_dup += 8; ptr_b_dup += cs_b_offset[6]; i1 += cs_b_offset[6]; //Read next 8x8 block of A to get diag elements i3 += cs_l_offset[6]; mat_a_cols_rearr[0] = _mm256_loadu_ps((float const *)ptr_l + i3); mat_a_cols_rearr[1] = _mm256_loadu_ps((float const *)ptr_l + i3 + cs_l); mat_a_cols_rearr[2] = _mm256_loadu_ps((float const *)ptr_l + i3 + cs_l_offset[0]); mat_a_cols_rearr[3] = _mm256_loadu_ps((float const *)ptr_l + i3 + cs_l_offset[1]); mat_a_cols_rearr[4] = _mm256_loadu_ps((float const *)ptr_l + i3 + cs_l_offset[2]); mat_a_cols_rearr[5] = _mm256_loadu_ps((float const *)ptr_l + i3 + cs_l_offset[3]); mat_a_cols_rearr[6] = _mm256_loadu_ps((float const *)ptr_l + i3 + cs_l_offset[4]); mat_a_cols_rearr[7] = _mm256_loadu_ps((float const *)ptr_l + i3 + cs_l_offset[5]); //pack 8 diags of A together reciprocal_diags[0] = reciprocal_diags[1]; mat_a_diag_inv[0] = _mm256_blend_ps(mat_a_cols_rearr[0], mat_a_cols_rearr[1], 0xAA);//diag 0,1 mat_a_diag_inv[1] = _mm256_blend_ps(mat_a_cols_rearr[2], mat_a_cols_rearr[3], 0xAA);//diag 2,3 mat_a_diag_inv[2] = _mm256_blend_ps(mat_a_cols_rearr[4], mat_a_cols_rearr[5], 0xAA);//diag 4,5 mat_a_diag_inv[3] = _mm256_blend_ps(mat_a_cols_rearr[6], mat_a_cols_rearr[7], 0xAA);//diag 6,7 mat_a_diag_inv[0] = _mm256_blend_ps(mat_a_diag_inv[0], mat_a_diag_inv[1], 0xCC);//diag 0,1,2,3 mat_a_diag_inv[2] = _mm256_blend_ps(mat_a_diag_inv[2], mat_a_diag_inv[3], 0xCC);//diag 4,5,6,7 mat_a_diag_inv[0] = _mm256_blend_ps(mat_a_diag_inv[0], mat_a_diag_inv[2], 0xF0);//diag 0,1,2,3,4,5,6,7 //reciprocal of diagnal elements of A :- 0,1,2,3,4,5,6,7 reciprocal_diags[0] = _mm256_div_ps(reciprocal_diags[0], mat_a_diag_inv[0]); i = 0; i2 = 0; for (k = 0; k < numCols_b; k += 8) { i = i1 + k; //Read 8 cols of B columns of Block-to-be-solved mat_b_rearr[i2][0] = _mm256_loadu_ps((float const *)ptr_b + i); mat_b_rearr[i2][1] = _mm256_loadu_ps((float const *)(ptr_b + cs_b + i)); mat_b_rearr[i2][2] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[0] + i)); mat_b_rearr[i2][3] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[1] + i)); mat_b_rearr[i2][4] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[2] + i)); mat_b_rearr[i2][5] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[3] + i)); mat_b_rearr[i2][6] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[4] + i)); mat_b_rearr[i2][7] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[5] + i)); mat_b_rearr[i2][0] = _mm256_mul_ps(mat_b_rearr[i2][0], alphaReg); mat_b_rearr[i2][1] = _mm256_mul_ps(mat_b_rearr[i2][1], alphaReg); mat_b_rearr[i2][2] = _mm256_mul_ps(mat_b_rearr[i2][2], alphaReg); mat_b_rearr[i2][3] = _mm256_mul_ps(mat_b_rearr[i2][3], alphaReg); mat_b_rearr[i2][4] = _mm256_mul_ps(mat_b_rearr[i2][4], alphaReg); mat_b_rearr[i2][5] = _mm256_mul_ps(mat_b_rearr[i2][5], alphaReg); mat_b_rearr[i2][6] = _mm256_mul_ps(mat_b_rearr[i2][6], alphaReg); mat_b_rearr[i2][7] = _mm256_mul_ps(mat_b_rearr[i2][7], alphaReg); i2++; } i = 0; i2 = 0; for (l = 0; l < j; l += 8) // move across m { //Broadcast A8,0 to A15,0 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + i)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + i + 1)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + i + 2)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + i + 3)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + i + 4)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l + i + 5)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l + i + 6)); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); //Broadcast A21 to A71 to registers mat_a_blk_elems[8] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + i)); mat_a_blk_elems[9] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + i + 1)); mat_a_blk_elems[10] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + i + 2)); mat_a_blk_elems[11] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + i + 3)); mat_a_blk_elems[12] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + i + 4)); mat_a_blk_elems[13] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + i + 5)); mat_a_blk_elems[14] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + i + 6)); mat_a_blk_elems[15] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + i + 7)); //Broadcast A8,2 to A15,2 to registers mat_a_blk_elems[16] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + i)); mat_a_blk_elems[17] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + i + 1)); mat_a_blk_elems[18] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + i + 2)); mat_a_blk_elems[19] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + i + 3)); mat_a_blk_elems[20] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + i + 4)); mat_a_blk_elems[21] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + i + 5)); mat_a_blk_elems[22] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + i + 6)); mat_a_blk_elems[23] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + i + 7)); //Broadcast A8,3 to A15,3 to registers mat_a_blk_elems[24] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + i)); mat_a_blk_elems[25] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + i + 1)); mat_a_blk_elems[26] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + i + 2)); mat_a_blk_elems[27] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + i + 3)); mat_a_blk_elems[28] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + i + 4)); mat_a_blk_elems[29] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + i + 5)); mat_a_blk_elems[30] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + i + 6)); mat_a_blk_elems[31] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + i + 7)); // _mm256_permute2f128_ps() //Broadcast A8,4 to A15,4 to registers mat_a_blk_elems[32] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + i)); mat_a_blk_elems[33] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + i + 1)); mat_a_blk_elems[34] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + i + 2)); mat_a_blk_elems[35] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + i + 3)); mat_a_blk_elems[36] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + i + 4)); mat_a_blk_elems[37] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + i + 5)); mat_a_blk_elems[38] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + i + 6)); mat_a_blk_elems[39] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + i + 7)); //Broadcast A8,5 to A15,5 to registers mat_a_blk_elems[40] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + i)); mat_a_blk_elems[41] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + i + 1)); mat_a_blk_elems[42] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + i + 2)); mat_a_blk_elems[43] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + i + 3)); mat_a_blk_elems[44] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + i + 4)); mat_a_blk_elems[45] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + i + 5)); mat_a_blk_elems[46] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + i + 6)); mat_a_blk_elems[47] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + i + 7)); //Broadcast A8,6 to A15,6 to registers mat_a_blk_elems[48] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[4] + i)); mat_a_blk_elems[49] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[4] + i + 1)); mat_a_blk_elems[50] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[4] + i + 2)); mat_a_blk_elems[51] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[4] + i + 3)); mat_a_blk_elems[52] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[4] + i + 4)); mat_a_blk_elems[53] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[4] + i + 5)); mat_a_blk_elems[54] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[4] + i + 6)); mat_a_blk_elems[55] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[4] + i + 7)); //Broadcast A8,7 to A15,7 to registers mat_a_blk_elems[56] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[5] + i)); mat_a_blk_elems[57] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[5] + i + 1)); mat_a_blk_elems[58] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[5] + i + 2)); mat_a_blk_elems[59] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[5] + i + 3)); mat_a_blk_elems[60] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[5] + i + 4)); mat_a_blk_elems[61] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[5] + i + 5)); mat_a_blk_elems[62] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[5] + i + 6)); mat_a_blk_elems[63] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[5] + i + 7)); i += cs_l_offset[6]; for (k = 0; k < numCols_b; k += 8) // move across n for the same value of l (index of m) { /////////////////// Partial Lower 8x8 block trsm of B i4 = i2 + k; //Read current 8 cols of B columns from specified 8x8 current-block of B mat_b_col[0] = _mm256_loadu_ps((float const *)ptr_b + i4); mat_b_col[1] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b)); mat_b_col[2] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[0])); mat_b_col[3] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[1])); mat_b_col[4] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[2])); mat_b_col[5] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[3])); mat_b_col[6] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[4])); mat_b_col[7] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[5])); i4 = k >> 3; //(Row8): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[i4][0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[0], mat_b_rearr[i4][0]);//d = c - (a*b) mat_b_rearr[i4][1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[0], mat_b_rearr[i4][1]);//d = c - (a*b) mat_b_rearr[i4][2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[0], mat_b_rearr[i4][2]);//d = c - (a*b) mat_b_rearr[i4][3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[0], mat_b_rearr[i4][3]);//d = c - (a*b) mat_b_rearr[i4][4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[0], mat_b_rearr[i4][4]);//d = c - (a*b) mat_b_rearr[i4][5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[0], mat_b_rearr[i4][5]);//d = c - (a*b) mat_b_rearr[i4][6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[0], mat_b_rearr[i4][6]);//d = c - (a*b) mat_b_rearr[i4][7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[0], mat_b_rearr[i4][7]);//d = c - (a*b) //(Row9): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[i4][0] = _mm256_fnmadd_ps(mat_a_blk_elems[8], mat_b_col[1], mat_b_rearr[i4][0]);//d = c - (a*b) mat_b_rearr[i4][1] = _mm256_fnmadd_ps(mat_a_blk_elems[9], mat_b_col[1], mat_b_rearr[i4][1]);//d = c - (a*b) mat_b_rearr[i4][2] = _mm256_fnmadd_ps(mat_a_blk_elems[10], mat_b_col[1], mat_b_rearr[i4][2]);//d = c - (a*b) mat_b_rearr[i4][3] = _mm256_fnmadd_ps(mat_a_blk_elems[11], mat_b_col[1], mat_b_rearr[i4][3]);//d = c - (a*b) mat_b_rearr[i4][4] = _mm256_fnmadd_ps(mat_a_blk_elems[12], mat_b_col[1], mat_b_rearr[i4][4]);//d = c - (a*b) mat_b_rearr[i4][5] = _mm256_fnmadd_ps(mat_a_blk_elems[13], mat_b_col[1], mat_b_rearr[i4][5]);//d = c - (a*b) mat_b_rearr[i4][6] = _mm256_fnmadd_ps(mat_a_blk_elems[14], mat_b_col[1], mat_b_rearr[i4][6]);//d = c - (a*b) mat_b_rearr[i4][7] = _mm256_fnmadd_ps(mat_a_blk_elems[15], mat_b_col[1], mat_b_rearr[i4][7]);//d = c - (a*b) //(Row10): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[i4][0] = _mm256_fnmadd_ps(mat_a_blk_elems[16], mat_b_col[2], mat_b_rearr[i4][0]);//d = c - (a*b) mat_b_rearr[i4][1] = _mm256_fnmadd_ps(mat_a_blk_elems[17], mat_b_col[2], mat_b_rearr[i4][1]);//d = c - (a*b) mat_b_rearr[i4][2] = _mm256_fnmadd_ps(mat_a_blk_elems[18], mat_b_col[2], mat_b_rearr[i4][2]);//d = c - (a*b) mat_b_rearr[i4][3] = _mm256_fnmadd_ps(mat_a_blk_elems[19], mat_b_col[2], mat_b_rearr[i4][3]);//d = c - (a*b) mat_b_rearr[i4][4] = _mm256_fnmadd_ps(mat_a_blk_elems[20], mat_b_col[2], mat_b_rearr[i4][4]);//d = c - (a*b) mat_b_rearr[i4][5] = _mm256_fnmadd_ps(mat_a_blk_elems[21], mat_b_col[2], mat_b_rearr[i4][5]);//d = c - (a*b) mat_b_rearr[i4][6] = _mm256_fnmadd_ps(mat_a_blk_elems[22], mat_b_col[2], mat_b_rearr[i4][6]);//d = c - (a*b) mat_b_rearr[i4][7] = _mm256_fnmadd_ps(mat_a_blk_elems[23], mat_b_col[2], mat_b_rearr[i4][7]);//d = c - (a*b) //(Row11): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[i4][0] = _mm256_fnmadd_ps(mat_a_blk_elems[24], mat_b_col[3], mat_b_rearr[i4][0]);//d = c - (a*b) mat_b_rearr[i4][1] = _mm256_fnmadd_ps(mat_a_blk_elems[25], mat_b_col[3], mat_b_rearr[i4][1]);//d = c - (a*b) mat_b_rearr[i4][2] = _mm256_fnmadd_ps(mat_a_blk_elems[26], mat_b_col[3], mat_b_rearr[i4][2]);//d = c - (a*b) mat_b_rearr[i4][3] = _mm256_fnmadd_ps(mat_a_blk_elems[27], mat_b_col[3], mat_b_rearr[i4][3]);//d = c - (a*b) mat_b_rearr[i4][4] = _mm256_fnmadd_ps(mat_a_blk_elems[28], mat_b_col[3], mat_b_rearr[i4][4]);//d = c - (a*b) mat_b_rearr[i4][5] = _mm256_fnmadd_ps(mat_a_blk_elems[29], mat_b_col[3], mat_b_rearr[i4][5]);//d = c - (a*b) mat_b_rearr[i4][6] = _mm256_fnmadd_ps(mat_a_blk_elems[30], mat_b_col[3], mat_b_rearr[i4][6]);//d = c - (a*b) mat_b_rearr[i4][7] = _mm256_fnmadd_ps(mat_a_blk_elems[31], mat_b_col[3], mat_b_rearr[i4][7]);//d = c - (a*b) //(Row12): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[i4][0] = _mm256_fnmadd_ps(mat_a_blk_elems[32], mat_b_col[4], mat_b_rearr[i4][0]);//d = c - (a*b) mat_b_rearr[i4][1] = _mm256_fnmadd_ps(mat_a_blk_elems[33], mat_b_col[4], mat_b_rearr[i4][1]);//d = c - (a*b) mat_b_rearr[i4][2] = _mm256_fnmadd_ps(mat_a_blk_elems[34], mat_b_col[4], mat_b_rearr[i4][2]);//d = c - (a*b) mat_b_rearr[i4][3] = _mm256_fnmadd_ps(mat_a_blk_elems[35], mat_b_col[4], mat_b_rearr[i4][3]);//d = c - (a*b) mat_b_rearr[i4][4] = _mm256_fnmadd_ps(mat_a_blk_elems[36], mat_b_col[4], mat_b_rearr[i4][4]);//d = c - (a*b) mat_b_rearr[i4][5] = _mm256_fnmadd_ps(mat_a_blk_elems[37], mat_b_col[4], mat_b_rearr[i4][5]);//d = c - (a*b) mat_b_rearr[i4][6] = _mm256_fnmadd_ps(mat_a_blk_elems[38], mat_b_col[4], mat_b_rearr[i4][6]);//d = c - (a*b) mat_b_rearr[i4][7] = _mm256_fnmadd_ps(mat_a_blk_elems[39], mat_b_col[4], mat_b_rearr[i4][7]);//d = c - (a*b) //(Row13): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[i4][0] = _mm256_fnmadd_ps(mat_a_blk_elems[40], mat_b_col[5], mat_b_rearr[i4][0]);//d = c - (a*b) mat_b_rearr[i4][1] = _mm256_fnmadd_ps(mat_a_blk_elems[41], mat_b_col[5], mat_b_rearr[i4][1]);//d = c - (a*b) mat_b_rearr[i4][2] = _mm256_fnmadd_ps(mat_a_blk_elems[42], mat_b_col[5], mat_b_rearr[i4][2]);//d = c - (a*b) mat_b_rearr[i4][3] = _mm256_fnmadd_ps(mat_a_blk_elems[43], mat_b_col[5], mat_b_rearr[i4][3]);//d = c - (a*b) mat_b_rearr[i4][4] = _mm256_fnmadd_ps(mat_a_blk_elems[44], mat_b_col[5], mat_b_rearr[i4][4]);//d = c - (a*b) mat_b_rearr[i4][5] = _mm256_fnmadd_ps(mat_a_blk_elems[45], mat_b_col[5], mat_b_rearr[i4][5]);//d = c - (a*b) mat_b_rearr[i4][6] = _mm256_fnmadd_ps(mat_a_blk_elems[46], mat_b_col[5], mat_b_rearr[i4][6]);//d = c - (a*b) mat_b_rearr[i4][7] = _mm256_fnmadd_ps(mat_a_blk_elems[47], mat_b_col[5], mat_b_rearr[i4][7]);//d = c - (a*b) //(Row14): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[i4][0] = _mm256_fnmadd_ps(mat_a_blk_elems[48], mat_b_col[6], mat_b_rearr[i4][0]);//d = c - (a*b) mat_b_rearr[i4][1] = _mm256_fnmadd_ps(mat_a_blk_elems[49], mat_b_col[6], mat_b_rearr[i4][1]);//d = c - (a*b) mat_b_rearr[i4][2] = _mm256_fnmadd_ps(mat_a_blk_elems[50], mat_b_col[6], mat_b_rearr[i4][2]);//d = c - (a*b) mat_b_rearr[i4][3] = _mm256_fnmadd_ps(mat_a_blk_elems[51], mat_b_col[6], mat_b_rearr[i4][3]);//d = c - (a*b) mat_b_rearr[i4][4] = _mm256_fnmadd_ps(mat_a_blk_elems[52], mat_b_col[6], mat_b_rearr[i4][4]);//d = c - (a*b) mat_b_rearr[i4][5] = _mm256_fnmadd_ps(mat_a_blk_elems[53], mat_b_col[6], mat_b_rearr[i4][5]);//d = c - (a*b) mat_b_rearr[i4][6] = _mm256_fnmadd_ps(mat_a_blk_elems[54], mat_b_col[6], mat_b_rearr[i4][6]);//d = c - (a*b) mat_b_rearr[i4][7] = _mm256_fnmadd_ps(mat_a_blk_elems[55], mat_b_col[6], mat_b_rearr[i4][7]);//d = c - (a*b) //(Row15): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[i4][0] = _mm256_fnmadd_ps(mat_a_blk_elems[56], mat_b_col[7], mat_b_rearr[i4][0]);//d = c - (a*b) mat_b_rearr[i4][1] = _mm256_fnmadd_ps(mat_a_blk_elems[57], mat_b_col[7], mat_b_rearr[i4][1]);//d = c - (a*b) mat_b_rearr[i4][2] = _mm256_fnmadd_ps(mat_a_blk_elems[58], mat_b_col[7], mat_b_rearr[i4][2]);//d = c - (a*b) mat_b_rearr[i4][3] = _mm256_fnmadd_ps(mat_a_blk_elems[59], mat_b_col[7], mat_b_rearr[i4][3]);//d = c - (a*b) mat_b_rearr[i4][4] = _mm256_fnmadd_ps(mat_a_blk_elems[60], mat_b_col[7], mat_b_rearr[i4][4]);//d = c - (a*b) mat_b_rearr[i4][5] = _mm256_fnmadd_ps(mat_a_blk_elems[61], mat_b_col[7], mat_b_rearr[i4][5]);//d = c - (a*b) mat_b_rearr[i4][6] = _mm256_fnmadd_ps(mat_a_blk_elems[62], mat_b_col[7], mat_b_rearr[i4][6]);//d = c - (a*b) mat_b_rearr[i4][7] = _mm256_fnmadd_ps(mat_a_blk_elems[63], mat_b_col[7], mat_b_rearr[i4][7]);//d = c - (a*b) //end loop of cols } i2 += cs_b_offset[6]; } //Broadcast A10 to A70 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + i + 1)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + i + 2)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + i + 3)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + i + 4)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + i + 5)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l + i + 6)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); i += cs_l; //extract diag a00 from a mat_a_diag_inv[0] = _mm256_permute_ps(reciprocal_diags[0], 0x00); mat_a_diag_inv[0] = _mm256_permute2f128_ps(mat_a_diag_inv[0], mat_a_diag_inv[0], 0x00); //mat_a_diag_inv2[0] = _mm256_unpacklo_ps(mat_a_diag_inv2[0], mat_a_diag_inv2[0]); //Broadcast A21 to A71 to registers mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l + i + 2)); mat_a_blk_elems[8] = _mm256_broadcast_ss((float const *)(ptr_l + i + 3)); mat_a_blk_elems[9] = _mm256_broadcast_ss((float const *)(ptr_l + i + 4)); mat_a_blk_elems[10] = _mm256_broadcast_ss((float const *)(ptr_l + i + 5)); mat_a_blk_elems[11] = _mm256_broadcast_ss((float const *)(ptr_l + i + 6)); mat_a_blk_elems[12] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); i += cs_l; //extract diag a11 from a mat_a_diag_inv[1] = _mm256_permute_ps(reciprocal_diags[0], 0x55); mat_a_diag_inv[1] = _mm256_permute2f128_ps(mat_a_diag_inv[1], mat_a_diag_inv[1], 0x00); //mat_a_diag_inv[1] = _mm256_unpacklo_ps(mat_a_diag_inv[1], mat_a_diag_inv[1]); //Broadcast A32 to A72 to registers mat_a_blk_elems[13] = _mm256_broadcast_ss((float const *)(ptr_l + i + 3)); mat_a_blk_elems[14] = _mm256_broadcast_ss((float const *)(ptr_l + i + 4)); mat_a_blk_elems[15] = _mm256_broadcast_ss((float const *)(ptr_l + i + 5)); mat_a_blk_elems[16] = _mm256_broadcast_ss((float const *)(ptr_l + i + 6)); mat_a_blk_elems[17] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); i += cs_l; //extract diag a22 from a mat_a_diag_inv[2] = _mm256_permute_ps(reciprocal_diags[0], 0xAA); mat_a_diag_inv[2] = _mm256_permute2f128_ps(mat_a_diag_inv[2], mat_a_diag_inv[2], 0x00); //mat_a_diag_inv[2] = _mm256_unpacklo_ps(mat_a_diag_inv[2], mat_a_diag_inv[2]); //Broadcast A43 to A73 to registers mat_a_blk_elems[18] = _mm256_broadcast_ss((float const *)(ptr_l + i + 4)); mat_a_blk_elems[19] = _mm256_broadcast_ss((float const *)(ptr_l + i + 5)); mat_a_blk_elems[20] = _mm256_broadcast_ss((float const *)(ptr_l + i + 6)); mat_a_blk_elems[21] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); i += cs_l; //extract diag a33 from a mat_a_diag_inv[3] = _mm256_permute_ps(reciprocal_diags[0], 0xFF); mat_a_diag_inv[3] = _mm256_permute2f128_ps(mat_a_diag_inv[3], mat_a_diag_inv[3], 0x00); //mat_a_diag_inv[3] = _mm256_unpacklo_ps(mat_a_diag_inv[3], mat_a_diag_inv[3]); //Broadcast A54 to A74 to registers mat_a_blk_elems[22] = _mm256_broadcast_ss((float const *)(ptr_l + i + 5)); mat_a_blk_elems[23] = _mm256_broadcast_ss((float const *)(ptr_l + i + 6)); mat_a_blk_elems[24] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); i += cs_l; //extract diag a44 from a mat_a_diag_inv[4] = _mm256_permute_ps(reciprocal_diags[0], 0x00); mat_a_diag_inv[4] = _mm256_permute2f128_ps(mat_a_diag_inv[4], mat_a_diag_inv[4], 0x11); //mat_a_diag_inv[4] = _mm256_unpacklo_ps(mat_a_diag_inv[4], mat_a_diag_inv[4]); //Broadcast A65 to A75 to registers mat_a_blk_elems[25] = _mm256_broadcast_ss((float const *)(ptr_l + i + 6)); mat_a_blk_elems[26] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); i += cs_l; //extract diag a55 from a mat_a_diag_inv[5] = _mm256_permute_ps(reciprocal_diags[0], 0x55); mat_a_diag_inv[5] = _mm256_permute2f128_ps(mat_a_diag_inv[5], mat_a_diag_inv[5], 0x11); //mat_a_diag_inv[5] = _mm256_unpacklo_ps(mat_a_diag_inv[5], mat_a_diag_inv[5]); //Broadcast A76 to register mat_a_blk_elems[27] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); //extract diag a66 from a mat_a_diag_inv[6] = _mm256_permute_ps(reciprocal_diags[0], 0xAA); mat_a_diag_inv[6] = _mm256_permute2f128_ps(mat_a_diag_inv[6], mat_a_diag_inv[6], 0x11); //mat_a_diag_inv[6] = _mm256_unpacklo_ps(mat_a_diag_inv[6], mat_a_diag_inv[6]); //extract diag a77 from a mat_a_diag_inv[7] = _mm256_permute_ps(reciprocal_diags[0], 0xFF); mat_a_diag_inv[7] = _mm256_permute2f128_ps(mat_a_diag_inv[7], mat_a_diag_inv[7], 0x11); //mat_a_diag_inv[7] = _mm256_unpacklo_ps(mat_a_diag_inv[7], mat_a_diag_inv[7]); k = 0; for (i = 0; i < numCols_b; i+=8) { /////////////////// Complete Lower 8x8 block trsm of B :- lower 8x8 block of B with lower right 8x8 block of A //(Row0): Perform mul operation of reciprocal of L(0,0) element with 1st row elements of B mat_b_rearr[k][0] = _mm256_mul_ps(mat_b_rearr[k][0], mat_a_diag_inv[0]); //(Row1): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[k][1] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[k][0], mat_b_rearr[k][1]);//d = c - (a*b) mat_b_rearr[k][2] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[k][0], mat_b_rearr[k][2]);//d = c - (a*b) mat_b_rearr[k][3] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_rearr[k][0], mat_b_rearr[k][3]);//d = c - (a*b) mat_b_rearr[k][4] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_rearr[k][0], mat_b_rearr[k][4]);//d = c - (a*b) mat_b_rearr[k][5] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_rearr[k][0], mat_b_rearr[k][5]);//d = c - (a*b) mat_b_rearr[k][6] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_rearr[k][0], mat_b_rearr[k][6]);//d = c - (a*b) mat_b_rearr[k][7] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_rearr[k][0], mat_b_rearr[k][7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(1,1) element with 2nd row elements of B mat_b_rearr[k][1] = _mm256_mul_ps(mat_b_rearr[k][1], mat_a_diag_inv[1]); //(Row2): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[k][2] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_rearr[k][1], mat_b_rearr[k][2]);//d = c - (a*b) mat_b_rearr[k][3] = _mm256_fnmadd_ps(mat_a_blk_elems[8], mat_b_rearr[k][1], mat_b_rearr[k][3]);//d = c - (a*b) mat_b_rearr[k][4] = _mm256_fnmadd_ps(mat_a_blk_elems[9], mat_b_rearr[k][1], mat_b_rearr[k][4]);//d = c - (a*b) mat_b_rearr[k][5] = _mm256_fnmadd_ps(mat_a_blk_elems[10], mat_b_rearr[k][1], mat_b_rearr[k][5]);//d = c - (a*b) mat_b_rearr[k][6] = _mm256_fnmadd_ps(mat_a_blk_elems[11], mat_b_rearr[k][1], mat_b_rearr[k][6]);//d = c - (a*b) mat_b_rearr[k][7] = _mm256_fnmadd_ps(mat_a_blk_elems[12], mat_b_rearr[k][1], mat_b_rearr[k][7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(2, 2) element with 3rd row elements of B mat_b_rearr[k][2] = _mm256_mul_ps(mat_b_rearr[k][2], mat_a_diag_inv[2]); //(Row3): FMA operations of b3 with elements of indices from (3, 0) uptill (7, 0) mat_b_rearr[k][3] = _mm256_fnmadd_ps(mat_a_blk_elems[13], mat_b_rearr[k][2], mat_b_rearr[k][3]);//d = c - (a*b) mat_b_rearr[k][4] = _mm256_fnmadd_ps(mat_a_blk_elems[14], mat_b_rearr[k][2], mat_b_rearr[k][4]);//d = c - (a*b) mat_b_rearr[k][5] = _mm256_fnmadd_ps(mat_a_blk_elems[15], mat_b_rearr[k][2], mat_b_rearr[k][5]);//d = c - (a*b) mat_b_rearr[k][6] = _mm256_fnmadd_ps(mat_a_blk_elems[16], mat_b_rearr[k][2], mat_b_rearr[k][6]);//d = c - (a*b) mat_b_rearr[k][7] = _mm256_fnmadd_ps(mat_a_blk_elems[17], mat_b_rearr[k][2], mat_b_rearr[k][7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(3, 3) element with 4rth row elements of B mat_b_rearr[k][3] = _mm256_mul_ps(mat_b_rearr[k][3], mat_a_diag_inv[3]); //(Row4): FMA operations of b4 with elements of indices from (4, 0) uptill (7, 0) mat_b_rearr[k][4] = _mm256_fnmadd_ps(mat_a_blk_elems[18], mat_b_rearr[k][3], mat_b_rearr[k][4]);//d = c - (a*b) mat_b_rearr[k][5] = _mm256_fnmadd_ps(mat_a_blk_elems[19], mat_b_rearr[k][3], mat_b_rearr[k][5]);//d = c - (a*b) mat_b_rearr[k][6] = _mm256_fnmadd_ps(mat_a_blk_elems[20], mat_b_rearr[k][3], mat_b_rearr[k][6]);//d = c - (a*b) mat_b_rearr[k][7] = _mm256_fnmadd_ps(mat_a_blk_elems[21], mat_b_rearr[k][3], mat_b_rearr[k][7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(4, 4) element with 4rth row elements of B mat_b_rearr[k][4] = _mm256_mul_ps(mat_b_rearr[k][4], mat_a_diag_inv[4]); //(Row5): FMA operations of b5 with elements of indices from (5, 0) uptill (7, 0) mat_b_rearr[k][5] = _mm256_fnmadd_ps(mat_a_blk_elems[22], mat_b_rearr[k][4], mat_b_rearr[k][5]);//d = c - (a*b) mat_b_rearr[k][6] = _mm256_fnmadd_ps(mat_a_blk_elems[23], mat_b_rearr[k][4], mat_b_rearr[k][6]);//d = c - (a*b) mat_b_rearr[k][7] = _mm256_fnmadd_ps(mat_a_blk_elems[24], mat_b_rearr[k][4], mat_b_rearr[k][7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(5, 5) element with 5th row elements of B mat_b_rearr[k][5] = _mm256_mul_ps(mat_b_rearr[k][5], mat_a_diag_inv[5]); //(Row6): FMA operations of b6 with elements of indices from (6, 0) uptill (7, 0) mat_b_rearr[k][6] = _mm256_fnmadd_ps(mat_a_blk_elems[25], mat_b_rearr[k][5], mat_b_rearr[k][6]);//d = c - (a*b) mat_b_rearr[k][7] = _mm256_fnmadd_ps(mat_a_blk_elems[26], mat_b_rearr[k][5], mat_b_rearr[k][7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(6, 6) element with 6th row elements of B mat_b_rearr[k][6] = _mm256_mul_ps(mat_b_rearr[k][6], mat_a_diag_inv[6]); //(Row7): FMA operations of b7 with elements of index (7, 0) mat_b_rearr[k][7] = _mm256_fnmadd_ps(mat_a_blk_elems[27], mat_b_rearr[k][6], mat_b_rearr[k][7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(7, 7) element with 7th row elements of B mat_b_rearr[k][7] = _mm256_mul_ps(mat_b_rearr[k][7], mat_a_diag_inv[7]); //////////////////////////////////////////////////////////////////////////////// //Store the computed B columns _mm256_storeu_ps((float *)ptr_b_dup + i, mat_b_rearr[k][0]); _mm256_storeu_ps((float *)(ptr_b_dup + (cs_b) + i), mat_b_rearr[k][1]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[0] + i), mat_b_rearr[k][2]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[1] + i), mat_b_rearr[k][3]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[2] + i), mat_b_rearr[k][4]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[3] + i), mat_b_rearr[k][5]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[4] + i), mat_b_rearr[k][6]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[5] + i), mat_b_rearr[k][7]); k++; } } ///////////////////loop ends ///////////////////// } static void trsm_XAtB_block_allSmallSizedMatrices_unitDiag(float *ptr_l, float *ptr_b, int numRows_lb, int numCols_b, int rs_l, int rs_b, int cs_l, int cs_b) { //float ones = 1.0; int i, i1, i2, i3, i4, j, k, l; int cs_b_offset[7]; int cs_l_offset[7]; float *ptr_b_dup; //57 number of ymm(256 bits) registers used __m256 mat_b_col[8]; __m256 mat_b_rearr[16][8]; //__m256 mat_a_cols_rearr[8]; __m256 mat_a_blk_elems[64]; //__m256 mat_a_diag_inv[8]; //__m256 reciprocal_diags[2]; // ---> considering that the matrix size is multiple of 16 rows and 8 cols <--- // //L matrix offsets cs_l_offset[0] = (cs_l << 1); cs_l_offset[1] = cs_l + cs_l_offset[0]; cs_l_offset[2] = (cs_l << 2); cs_l_offset[3] = cs_l + cs_l_offset[2]; cs_l_offset[4] = cs_l_offset[0] + cs_l_offset[2]; cs_l_offset[5] = cs_l + cs_l_offset[4]; cs_l_offset[6] = (cs_l_offset[5] + cs_l); cs_b_offset[0] = (cs_b << 1); cs_b_offset[1] = cs_b + cs_b_offset[0]; cs_b_offset[2] = (cs_b << 2); cs_b_offset[3] = cs_b + cs_b_offset[2]; cs_b_offset[4] = cs_b_offset[0] + cs_b_offset[2]; cs_b_offset[5] = cs_b + cs_b_offset[4]; cs_b_offset[6] = (cs_b_offset[5] + cs_b); //Broadcast A10 to A70 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + 1)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + 2)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + 3)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + 4)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + 5)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l + 6)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l + 7)); //Broadcast A21 to A71 to registers mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 2)); mat_a_blk_elems[8] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 3)); mat_a_blk_elems[9] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 4)); mat_a_blk_elems[10] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 5)); mat_a_blk_elems[11] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 6)); mat_a_blk_elems[12] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 7)); //Broadcast A32 to A72 to registers mat_a_blk_elems[13] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 3)); mat_a_blk_elems[14] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 4)); mat_a_blk_elems[15] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 5)); mat_a_blk_elems[16] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 6)); mat_a_blk_elems[17] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 7)); //Broadcast A43 to A73 to registers mat_a_blk_elems[18] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 4)); mat_a_blk_elems[19] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 5)); mat_a_blk_elems[20] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 6)); mat_a_blk_elems[21] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 7)); //Broadcast A54 to A74 to registers mat_a_blk_elems[22] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + 5)); mat_a_blk_elems[23] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + 6)); mat_a_blk_elems[24] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + 7)); //Broadcast A65 to A75 to registers mat_a_blk_elems[25] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + 6)); mat_a_blk_elems[26] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + 7)); //Broadcast A76 to register mat_a_blk_elems[27] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[4] + 7)); /***************** first set of 8 rows of B processing starts *****************/ ptr_b_dup = ptr_b; i = 0; for (j = 0; j < numCols_b; j += 8) { /////////////////// Complete Upper 8x8 block trsm of B :- upper 8x8 block of B with upper 8x8 block of A //read 8x8 block of B into registers mat_b_rearr[0][0] = _mm256_loadu_ps((float const *)ptr_b + i); mat_b_rearr[1][0] = _mm256_loadu_ps((float const *)(ptr_b + cs_b + i)); mat_b_rearr[2][0] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[0] + i)); mat_b_rearr[3][0] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[1] + i)); mat_b_rearr[4][0] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[2] + i)); mat_b_rearr[5][0] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[3] + i)); mat_b_rearr[6][0] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[4] + i)); mat_b_rearr[7][0] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[5] + i)); //(Row0) mat_b_col[0] = mat_b_rearr[0][0]; //(Row1): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_col[1] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[0], mat_b_rearr[1][0]);//d = c - (a*b) mat_b_rearr[2][0] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[0], mat_b_rearr[2][0]);//d = c - (a*b) mat_b_rearr[3][0] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[0], mat_b_rearr[3][0]);//d = c - (a*b) mat_b_rearr[4][0] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[0], mat_b_rearr[4][0]);//d = c - (a*b) mat_b_rearr[5][0] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[0], mat_b_rearr[5][0]);//d = c - (a*b) mat_b_rearr[6][0] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[0], mat_b_rearr[6][0]);//d = c - (a*b) mat_b_rearr[7][0] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[0], mat_b_rearr[7][0]);//d = c - (a*b) //(Row2): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_col[2] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[1], mat_b_rearr[2][0]);//d = c - (a*b) mat_b_rearr[3][0] = _mm256_fnmadd_ps(mat_a_blk_elems[8], mat_b_col[1], mat_b_rearr[3][0]);//d = c - (a*b) mat_b_rearr[4][0] = _mm256_fnmadd_ps(mat_a_blk_elems[9], mat_b_col[1], mat_b_rearr[4][0]);//d = c - (a*b) mat_b_rearr[5][0] = _mm256_fnmadd_ps(mat_a_blk_elems[10], mat_b_col[1], mat_b_rearr[5][0]);//d = c - (a*b) mat_b_rearr[6][0] = _mm256_fnmadd_ps(mat_a_blk_elems[11], mat_b_col[1], mat_b_rearr[6][0]);//d = c - (a*b) mat_b_rearr[7][0] = _mm256_fnmadd_ps(mat_a_blk_elems[12], mat_b_col[1], mat_b_rearr[7][0]);//d = c - (a*b) //(Row3): FMA operations of b3 with elements of indices from (3, 0) uptill (7, 0) mat_b_col[3] = _mm256_fnmadd_ps(mat_a_blk_elems[13], mat_b_col[2], mat_b_rearr[3][0]);//d = c - (a*b) mat_b_rearr[4][0] = _mm256_fnmadd_ps(mat_a_blk_elems[14], mat_b_col[2], mat_b_rearr[4][0]);//d = c - (a*b) mat_b_rearr[5][0] = _mm256_fnmadd_ps(mat_a_blk_elems[15], mat_b_col[2], mat_b_rearr[5][0]);//d = c - (a*b) mat_b_rearr[6][0] = _mm256_fnmadd_ps(mat_a_blk_elems[16], mat_b_col[2], mat_b_rearr[6][0]);//d = c - (a*b) mat_b_rearr[7][0] = _mm256_fnmadd_ps(mat_a_blk_elems[17], mat_b_col[2], mat_b_rearr[7][0]);//d = c - (a*b) //(Row4): FMA operations of b4 with elements of indices from (4, 0) uptill (7, 0) mat_b_col[4] = _mm256_fnmadd_ps(mat_a_blk_elems[18], mat_b_col[3], mat_b_rearr[4][0]);//d = c - (a*b) mat_b_rearr[5][0] = _mm256_fnmadd_ps(mat_a_blk_elems[19], mat_b_col[3], mat_b_rearr[5][0]);//d = c - (a*b) mat_b_rearr[6][0] = _mm256_fnmadd_ps(mat_a_blk_elems[20], mat_b_col[3], mat_b_rearr[6][0]);//d = c - (a*b) mat_b_rearr[7][0] = _mm256_fnmadd_ps(mat_a_blk_elems[21], mat_b_col[3], mat_b_rearr[7][0]);//d = c - (a*b) //(Row5): FMA operations of b5 with elements of indices from (5, 0) uptill (7, 0) mat_b_col[5] = _mm256_fnmadd_ps(mat_a_blk_elems[22], mat_b_col[4], mat_b_rearr[5][0]);//d = c - (a*b) mat_b_rearr[6][0] = _mm256_fnmadd_ps(mat_a_blk_elems[23], mat_b_col[4], mat_b_rearr[6][0]);//d = c - (a*b) mat_b_rearr[7][0] = _mm256_fnmadd_ps(mat_a_blk_elems[24], mat_b_col[4], mat_b_rearr[7][0]);//d = c - (a*b) //(Row6): FMA operations of b6 with elements of indices from (6, 0) uptill (7, 0) mat_b_col[6] = _mm256_fnmadd_ps(mat_a_blk_elems[25], mat_b_col[5], mat_b_rearr[6][0]);//d = c - (a*b) mat_b_rearr[7][0] = _mm256_fnmadd_ps(mat_a_blk_elems[26], mat_b_col[5], mat_b_rearr[7][0]);//d = c - (a*b) //(Row7): FMA operations of b7 with elements of index (7, 0) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[27], mat_b_col[6], mat_b_rearr[7][0]);//d = c - (a*b) //////////////////////////////////////////////////////////////////////////////// //Store the computed B columns _mm256_storeu_ps((float *)ptr_b_dup, mat_b_col[0]); _mm256_storeu_ps((float *)(ptr_b_dup + (cs_b)), mat_b_col[1]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[0]), mat_b_col[2]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[1]), mat_b_col[3]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[2]), mat_b_col[4]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[3]), mat_b_col[5]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[4]), mat_b_col[6]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[5]), mat_b_col[7]); //i += cs_b_offset[6]; //ptr_b_dup += cs_b_offset[6]; i += 8; ptr_b_dup += 8; } //c = 0; /***************** first set of 8 cols of B processing done *****************/ ptr_b_dup = ptr_b; i3 = 0; i1 = 0; //Start loop for cols of B to be processed in size of blk_width for (j = 8; j < numRows_lb; j += 8)//m :- 8x8 block row { ptr_l += 8; //ptr_b += j; //ptr_b_dup += 8; ptr_b_dup += cs_b_offset[6]; i1 += cs_b_offset[6]; i3 += cs_l_offset[6]; i = 0; i2 = 0; for (k = 0; k < numCols_b; k += 8) { i = i1 + k; //Read 8 cols of B columns of Block-to-be-solved mat_b_rearr[i2][0] = _mm256_loadu_ps((float const *)ptr_b + i); mat_b_rearr[i2][1] = _mm256_loadu_ps((float const *)(ptr_b + cs_b + i)); mat_b_rearr[i2][2] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[0] + i)); mat_b_rearr[i2][3] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[1] + i)); mat_b_rearr[i2][4] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[2] + i)); mat_b_rearr[i2][5] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[3] + i)); mat_b_rearr[i2][6] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[4] + i)); mat_b_rearr[i2][7] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[5] + i)); i2++; } i = 0; i2 = 0; for (l = 0; l < j; l += 8) // move across m { //Broadcast A8,0 to A15,0 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + i)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + i + 1)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + i + 2)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + i + 3)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + i + 4)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l + i + 5)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l + i + 6)); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); //Broadcast A21 to A71 to registers mat_a_blk_elems[8] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + i)); mat_a_blk_elems[9] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + i + 1)); mat_a_blk_elems[10] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + i + 2)); mat_a_blk_elems[11] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + i + 3)); mat_a_blk_elems[12] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + i + 4)); mat_a_blk_elems[13] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + i + 5)); mat_a_blk_elems[14] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + i + 6)); mat_a_blk_elems[15] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + i + 7)); //Broadcast A8,2 to A15,2 to registers mat_a_blk_elems[16] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + i)); mat_a_blk_elems[17] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + i + 1)); mat_a_blk_elems[18] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + i + 2)); mat_a_blk_elems[19] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + i + 3)); mat_a_blk_elems[20] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + i + 4)); mat_a_blk_elems[21] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + i + 5)); mat_a_blk_elems[22] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + i + 6)); mat_a_blk_elems[23] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + i + 7)); //Broadcast A8,3 to A15,3 to registers mat_a_blk_elems[24] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + i)); mat_a_blk_elems[25] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + i + 1)); mat_a_blk_elems[26] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + i + 2)); mat_a_blk_elems[27] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + i + 3)); mat_a_blk_elems[28] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + i + 4)); mat_a_blk_elems[29] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + i + 5)); mat_a_blk_elems[30] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + i + 6)); mat_a_blk_elems[31] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + i + 7)); // _mm256_permute2f128_ps() //Broadcast A8,4 to A15,4 to registers mat_a_blk_elems[32] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + i)); mat_a_blk_elems[33] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + i + 1)); mat_a_blk_elems[34] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + i + 2)); mat_a_blk_elems[35] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + i + 3)); mat_a_blk_elems[36] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + i + 4)); mat_a_blk_elems[37] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + i + 5)); mat_a_blk_elems[38] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + i + 6)); mat_a_blk_elems[39] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + i + 7)); //Broadcast A8,5 to A15,5 to registers mat_a_blk_elems[40] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + i)); mat_a_blk_elems[41] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + i + 1)); mat_a_blk_elems[42] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + i + 2)); mat_a_blk_elems[43] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + i + 3)); mat_a_blk_elems[44] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + i + 4)); mat_a_blk_elems[45] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + i + 5)); mat_a_blk_elems[46] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + i + 6)); mat_a_blk_elems[47] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + i + 7)); //Broadcast A8,6 to A15,6 to registers mat_a_blk_elems[48] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[4] + i)); mat_a_blk_elems[49] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[4] + i + 1)); mat_a_blk_elems[50] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[4] + i + 2)); mat_a_blk_elems[51] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[4] + i + 3)); mat_a_blk_elems[52] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[4] + i + 4)); mat_a_blk_elems[53] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[4] + i + 5)); mat_a_blk_elems[54] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[4] + i + 6)); mat_a_blk_elems[55] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[4] + i + 7)); //Broadcast A8,7 to A15,7 to registers mat_a_blk_elems[56] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[5] + i)); mat_a_blk_elems[57] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[5] + i + 1)); mat_a_blk_elems[58] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[5] + i + 2)); mat_a_blk_elems[59] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[5] + i + 3)); mat_a_blk_elems[60] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[5] + i + 4)); mat_a_blk_elems[61] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[5] + i + 5)); mat_a_blk_elems[62] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[5] + i + 6)); mat_a_blk_elems[63] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[5] + i + 7)); i += cs_l_offset[6]; for (k = 0; k < numCols_b; k += 8) // move across n for the same value of l (index of m) { /////////////////// Partial Lower 8x8 block trsm of B i4 = i2 + k; //Read current 8 cols of B columns from specified 8x8 current-block of B mat_b_col[0] = _mm256_loadu_ps((float const *)ptr_b + i4); mat_b_col[1] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b)); mat_b_col[2] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[0])); mat_b_col[3] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[1])); mat_b_col[4] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[2])); mat_b_col[5] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[3])); mat_b_col[6] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[4])); mat_b_col[7] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[5])); i4 = k >> 3; //(Row8): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[i4][0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[0], mat_b_rearr[i4][0]);//d = c - (a*b) mat_b_rearr[i4][1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[0], mat_b_rearr[i4][1]);//d = c - (a*b) mat_b_rearr[i4][2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[0], mat_b_rearr[i4][2]);//d = c - (a*b) mat_b_rearr[i4][3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[0], mat_b_rearr[i4][3]);//d = c - (a*b) mat_b_rearr[i4][4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[0], mat_b_rearr[i4][4]);//d = c - (a*b) mat_b_rearr[i4][5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[0], mat_b_rearr[i4][5]);//d = c - (a*b) mat_b_rearr[i4][6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[0], mat_b_rearr[i4][6]);//d = c - (a*b) mat_b_rearr[i4][7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[0], mat_b_rearr[i4][7]);//d = c - (a*b) //(Row9): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[i4][0] = _mm256_fnmadd_ps(mat_a_blk_elems[8], mat_b_col[1], mat_b_rearr[i4][0]);//d = c - (a*b) mat_b_rearr[i4][1] = _mm256_fnmadd_ps(mat_a_blk_elems[9], mat_b_col[1], mat_b_rearr[i4][1]);//d = c - (a*b) mat_b_rearr[i4][2] = _mm256_fnmadd_ps(mat_a_blk_elems[10], mat_b_col[1], mat_b_rearr[i4][2]);//d = c - (a*b) mat_b_rearr[i4][3] = _mm256_fnmadd_ps(mat_a_blk_elems[11], mat_b_col[1], mat_b_rearr[i4][3]);//d = c - (a*b) mat_b_rearr[i4][4] = _mm256_fnmadd_ps(mat_a_blk_elems[12], mat_b_col[1], mat_b_rearr[i4][4]);//d = c - (a*b) mat_b_rearr[i4][5] = _mm256_fnmadd_ps(mat_a_blk_elems[13], mat_b_col[1], mat_b_rearr[i4][5]);//d = c - (a*b) mat_b_rearr[i4][6] = _mm256_fnmadd_ps(mat_a_blk_elems[14], mat_b_col[1], mat_b_rearr[i4][6]);//d = c - (a*b) mat_b_rearr[i4][7] = _mm256_fnmadd_ps(mat_a_blk_elems[15], mat_b_col[1], mat_b_rearr[i4][7]);//d = c - (a*b) //(Row10): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[i4][0] = _mm256_fnmadd_ps(mat_a_blk_elems[16], mat_b_col[2], mat_b_rearr[i4][0]);//d = c - (a*b) mat_b_rearr[i4][1] = _mm256_fnmadd_ps(mat_a_blk_elems[17], mat_b_col[2], mat_b_rearr[i4][1]);//d = c - (a*b) mat_b_rearr[i4][2] = _mm256_fnmadd_ps(mat_a_blk_elems[18], mat_b_col[2], mat_b_rearr[i4][2]);//d = c - (a*b) mat_b_rearr[i4][3] = _mm256_fnmadd_ps(mat_a_blk_elems[19], mat_b_col[2], mat_b_rearr[i4][3]);//d = c - (a*b) mat_b_rearr[i4][4] = _mm256_fnmadd_ps(mat_a_blk_elems[20], mat_b_col[2], mat_b_rearr[i4][4]);//d = c - (a*b) mat_b_rearr[i4][5] = _mm256_fnmadd_ps(mat_a_blk_elems[21], mat_b_col[2], mat_b_rearr[i4][5]);//d = c - (a*b) mat_b_rearr[i4][6] = _mm256_fnmadd_ps(mat_a_blk_elems[22], mat_b_col[2], mat_b_rearr[i4][6]);//d = c - (a*b) mat_b_rearr[i4][7] = _mm256_fnmadd_ps(mat_a_blk_elems[23], mat_b_col[2], mat_b_rearr[i4][7]);//d = c - (a*b) //(Row11): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[i4][0] = _mm256_fnmadd_ps(mat_a_blk_elems[24], mat_b_col[3], mat_b_rearr[i4][0]);//d = c - (a*b) mat_b_rearr[i4][1] = _mm256_fnmadd_ps(mat_a_blk_elems[25], mat_b_col[3], mat_b_rearr[i4][1]);//d = c - (a*b) mat_b_rearr[i4][2] = _mm256_fnmadd_ps(mat_a_blk_elems[26], mat_b_col[3], mat_b_rearr[i4][2]);//d = c - (a*b) mat_b_rearr[i4][3] = _mm256_fnmadd_ps(mat_a_blk_elems[27], mat_b_col[3], mat_b_rearr[i4][3]);//d = c - (a*b) mat_b_rearr[i4][4] = _mm256_fnmadd_ps(mat_a_blk_elems[28], mat_b_col[3], mat_b_rearr[i4][4]);//d = c - (a*b) mat_b_rearr[i4][5] = _mm256_fnmadd_ps(mat_a_blk_elems[29], mat_b_col[3], mat_b_rearr[i4][5]);//d = c - (a*b) mat_b_rearr[i4][6] = _mm256_fnmadd_ps(mat_a_blk_elems[30], mat_b_col[3], mat_b_rearr[i4][6]);//d = c - (a*b) mat_b_rearr[i4][7] = _mm256_fnmadd_ps(mat_a_blk_elems[31], mat_b_col[3], mat_b_rearr[i4][7]);//d = c - (a*b) //(Row12): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[i4][0] = _mm256_fnmadd_ps(mat_a_blk_elems[32], mat_b_col[4], mat_b_rearr[i4][0]);//d = c - (a*b) mat_b_rearr[i4][1] = _mm256_fnmadd_ps(mat_a_blk_elems[33], mat_b_col[4], mat_b_rearr[i4][1]);//d = c - (a*b) mat_b_rearr[i4][2] = _mm256_fnmadd_ps(mat_a_blk_elems[34], mat_b_col[4], mat_b_rearr[i4][2]);//d = c - (a*b) mat_b_rearr[i4][3] = _mm256_fnmadd_ps(mat_a_blk_elems[35], mat_b_col[4], mat_b_rearr[i4][3]);//d = c - (a*b) mat_b_rearr[i4][4] = _mm256_fnmadd_ps(mat_a_blk_elems[36], mat_b_col[4], mat_b_rearr[i4][4]);//d = c - (a*b) mat_b_rearr[i4][5] = _mm256_fnmadd_ps(mat_a_blk_elems[37], mat_b_col[4], mat_b_rearr[i4][5]);//d = c - (a*b) mat_b_rearr[i4][6] = _mm256_fnmadd_ps(mat_a_blk_elems[38], mat_b_col[4], mat_b_rearr[i4][6]);//d = c - (a*b) mat_b_rearr[i4][7] = _mm256_fnmadd_ps(mat_a_blk_elems[39], mat_b_col[4], mat_b_rearr[i4][7]);//d = c - (a*b) //(Row13): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[i4][0] = _mm256_fnmadd_ps(mat_a_blk_elems[40], mat_b_col[5], mat_b_rearr[i4][0]);//d = c - (a*b) mat_b_rearr[i4][1] = _mm256_fnmadd_ps(mat_a_blk_elems[41], mat_b_col[5], mat_b_rearr[i4][1]);//d = c - (a*b) mat_b_rearr[i4][2] = _mm256_fnmadd_ps(mat_a_blk_elems[42], mat_b_col[5], mat_b_rearr[i4][2]);//d = c - (a*b) mat_b_rearr[i4][3] = _mm256_fnmadd_ps(mat_a_blk_elems[43], mat_b_col[5], mat_b_rearr[i4][3]);//d = c - (a*b) mat_b_rearr[i4][4] = _mm256_fnmadd_ps(mat_a_blk_elems[44], mat_b_col[5], mat_b_rearr[i4][4]);//d = c - (a*b) mat_b_rearr[i4][5] = _mm256_fnmadd_ps(mat_a_blk_elems[45], mat_b_col[5], mat_b_rearr[i4][5]);//d = c - (a*b) mat_b_rearr[i4][6] = _mm256_fnmadd_ps(mat_a_blk_elems[46], mat_b_col[5], mat_b_rearr[i4][6]);//d = c - (a*b) mat_b_rearr[i4][7] = _mm256_fnmadd_ps(mat_a_blk_elems[47], mat_b_col[5], mat_b_rearr[i4][7]);//d = c - (a*b) //(Row14): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[i4][0] = _mm256_fnmadd_ps(mat_a_blk_elems[48], mat_b_col[6], mat_b_rearr[i4][0]);//d = c - (a*b) mat_b_rearr[i4][1] = _mm256_fnmadd_ps(mat_a_blk_elems[49], mat_b_col[6], mat_b_rearr[i4][1]);//d = c - (a*b) mat_b_rearr[i4][2] = _mm256_fnmadd_ps(mat_a_blk_elems[50], mat_b_col[6], mat_b_rearr[i4][2]);//d = c - (a*b) mat_b_rearr[i4][3] = _mm256_fnmadd_ps(mat_a_blk_elems[51], mat_b_col[6], mat_b_rearr[i4][3]);//d = c - (a*b) mat_b_rearr[i4][4] = _mm256_fnmadd_ps(mat_a_blk_elems[52], mat_b_col[6], mat_b_rearr[i4][4]);//d = c - (a*b) mat_b_rearr[i4][5] = _mm256_fnmadd_ps(mat_a_blk_elems[53], mat_b_col[6], mat_b_rearr[i4][5]);//d = c - (a*b) mat_b_rearr[i4][6] = _mm256_fnmadd_ps(mat_a_blk_elems[54], mat_b_col[6], mat_b_rearr[i4][6]);//d = c - (a*b) mat_b_rearr[i4][7] = _mm256_fnmadd_ps(mat_a_blk_elems[55], mat_b_col[6], mat_b_rearr[i4][7]);//d = c - (a*b) //(Row15): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[i4][0] = _mm256_fnmadd_ps(mat_a_blk_elems[56], mat_b_col[7], mat_b_rearr[i4][0]);//d = c - (a*b) mat_b_rearr[i4][1] = _mm256_fnmadd_ps(mat_a_blk_elems[57], mat_b_col[7], mat_b_rearr[i4][1]);//d = c - (a*b) mat_b_rearr[i4][2] = _mm256_fnmadd_ps(mat_a_blk_elems[58], mat_b_col[7], mat_b_rearr[i4][2]);//d = c - (a*b) mat_b_rearr[i4][3] = _mm256_fnmadd_ps(mat_a_blk_elems[59], mat_b_col[7], mat_b_rearr[i4][3]);//d = c - (a*b) mat_b_rearr[i4][4] = _mm256_fnmadd_ps(mat_a_blk_elems[60], mat_b_col[7], mat_b_rearr[i4][4]);//d = c - (a*b) mat_b_rearr[i4][5] = _mm256_fnmadd_ps(mat_a_blk_elems[61], mat_b_col[7], mat_b_rearr[i4][5]);//d = c - (a*b) mat_b_rearr[i4][6] = _mm256_fnmadd_ps(mat_a_blk_elems[62], mat_b_col[7], mat_b_rearr[i4][6]);//d = c - (a*b) mat_b_rearr[i4][7] = _mm256_fnmadd_ps(mat_a_blk_elems[63], mat_b_col[7], mat_b_rearr[i4][7]);//d = c - (a*b) //end loop of cols } i2 += cs_b_offset[6]; } //Broadcast A10 to A70 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + i + 1)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + i + 2)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + i + 3)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + i + 4)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + i + 5)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l + i + 6)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); i += cs_l; //Broadcast A21 to A71 to registers mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l + i + 2)); mat_a_blk_elems[8] = _mm256_broadcast_ss((float const *)(ptr_l + i + 3)); mat_a_blk_elems[9] = _mm256_broadcast_ss((float const *)(ptr_l + i + 4)); mat_a_blk_elems[10] = _mm256_broadcast_ss((float const *)(ptr_l + i + 5)); mat_a_blk_elems[11] = _mm256_broadcast_ss((float const *)(ptr_l + i + 6)); mat_a_blk_elems[12] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); i += cs_l; //Broadcast A32 to A72 to registers mat_a_blk_elems[13] = _mm256_broadcast_ss((float const *)(ptr_l + i + 3)); mat_a_blk_elems[14] = _mm256_broadcast_ss((float const *)(ptr_l + i + 4)); mat_a_blk_elems[15] = _mm256_broadcast_ss((float const *)(ptr_l + i + 5)); mat_a_blk_elems[16] = _mm256_broadcast_ss((float const *)(ptr_l + i + 6)); mat_a_blk_elems[17] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); i += cs_l; //Broadcast A43 to A73 to registers mat_a_blk_elems[18] = _mm256_broadcast_ss((float const *)(ptr_l + i + 4)); mat_a_blk_elems[19] = _mm256_broadcast_ss((float const *)(ptr_l + i + 5)); mat_a_blk_elems[20] = _mm256_broadcast_ss((float const *)(ptr_l + i + 6)); mat_a_blk_elems[21] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); i += cs_l; //Broadcast A54 to A74 to registers mat_a_blk_elems[22] = _mm256_broadcast_ss((float const *)(ptr_l + i + 5)); mat_a_blk_elems[23] = _mm256_broadcast_ss((float const *)(ptr_l + i + 6)); mat_a_blk_elems[24] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); i += cs_l; //Broadcast A65 to A75 to registers mat_a_blk_elems[25] = _mm256_broadcast_ss((float const *)(ptr_l + i + 6)); mat_a_blk_elems[26] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); i += cs_l; //Broadcast A76 to register mat_a_blk_elems[27] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); k = 0; for (i = 0; i < numCols_b; i+=8) { /////////////////// Complete Lower 8x8 block trsm of B :- lower 8x8 block of B with lower right 8x8 block of A //(Row0): already done //(Row1): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[k][1] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[k][0], mat_b_rearr[k][1]);//d = c - (a*b) mat_b_rearr[k][2] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[k][0], mat_b_rearr[k][2]);//d = c - (a*b) mat_b_rearr[k][3] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_rearr[k][0], mat_b_rearr[k][3]);//d = c - (a*b) mat_b_rearr[k][4] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_rearr[k][0], mat_b_rearr[k][4]);//d = c - (a*b) mat_b_rearr[k][5] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_rearr[k][0], mat_b_rearr[k][5]);//d = c - (a*b) mat_b_rearr[k][6] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_rearr[k][0], mat_b_rearr[k][6]);//d = c - (a*b) mat_b_rearr[k][7] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_rearr[k][0], mat_b_rearr[k][7]);//d = c - (a*b) //(Row2): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[k][2] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_rearr[k][1], mat_b_rearr[k][2]);//d = c - (a*b) mat_b_rearr[k][3] = _mm256_fnmadd_ps(mat_a_blk_elems[8], mat_b_rearr[k][1], mat_b_rearr[k][3]);//d = c - (a*b) mat_b_rearr[k][4] = _mm256_fnmadd_ps(mat_a_blk_elems[9], mat_b_rearr[k][1], mat_b_rearr[k][4]);//d = c - (a*b) mat_b_rearr[k][5] = _mm256_fnmadd_ps(mat_a_blk_elems[10], mat_b_rearr[k][1], mat_b_rearr[k][5]);//d = c - (a*b) mat_b_rearr[k][6] = _mm256_fnmadd_ps(mat_a_blk_elems[11], mat_b_rearr[k][1], mat_b_rearr[k][6]);//d = c - (a*b) mat_b_rearr[k][7] = _mm256_fnmadd_ps(mat_a_blk_elems[12], mat_b_rearr[k][1], mat_b_rearr[k][7]);//d = c - (a*b) //(Row3): FMA operations of b3 with elements of indices from (3, 0) uptill (7, 0) mat_b_rearr[k][3] = _mm256_fnmadd_ps(mat_a_blk_elems[13], mat_b_rearr[k][2], mat_b_rearr[k][3]);//d = c - (a*b) mat_b_rearr[k][4] = _mm256_fnmadd_ps(mat_a_blk_elems[14], mat_b_rearr[k][2], mat_b_rearr[k][4]);//d = c - (a*b) mat_b_rearr[k][5] = _mm256_fnmadd_ps(mat_a_blk_elems[15], mat_b_rearr[k][2], mat_b_rearr[k][5]);//d = c - (a*b) mat_b_rearr[k][6] = _mm256_fnmadd_ps(mat_a_blk_elems[16], mat_b_rearr[k][2], mat_b_rearr[k][6]);//d = c - (a*b) mat_b_rearr[k][7] = _mm256_fnmadd_ps(mat_a_blk_elems[17], mat_b_rearr[k][2], mat_b_rearr[k][7]);//d = c - (a*b) //(Row4): FMA operations of b4 with elements of indices from (4, 0) uptill (7, 0) mat_b_rearr[k][4] = _mm256_fnmadd_ps(mat_a_blk_elems[18], mat_b_rearr[k][3], mat_b_rearr[k][4]);//d = c - (a*b) mat_b_rearr[k][5] = _mm256_fnmadd_ps(mat_a_blk_elems[19], mat_b_rearr[k][3], mat_b_rearr[k][5]);//d = c - (a*b) mat_b_rearr[k][6] = _mm256_fnmadd_ps(mat_a_blk_elems[20], mat_b_rearr[k][3], mat_b_rearr[k][6]);//d = c - (a*b) mat_b_rearr[k][7] = _mm256_fnmadd_ps(mat_a_blk_elems[21], mat_b_rearr[k][3], mat_b_rearr[k][7]);//d = c - (a*b) //(Row5): FMA operations of b5 with elements of indices from (5, 0) uptill (7, 0) mat_b_rearr[k][5] = _mm256_fnmadd_ps(mat_a_blk_elems[22], mat_b_rearr[k][4], mat_b_rearr[k][5]);//d = c - (a*b) mat_b_rearr[k][6] = _mm256_fnmadd_ps(mat_a_blk_elems[23], mat_b_rearr[k][4], mat_b_rearr[k][6]);//d = c - (a*b) mat_b_rearr[k][7] = _mm256_fnmadd_ps(mat_a_blk_elems[24], mat_b_rearr[k][4], mat_b_rearr[k][7]);//d = c - (a*b) //(Row6): FMA operations of b6 with elements of indices from (6, 0) uptill (7, 0) mat_b_rearr[k][6] = _mm256_fnmadd_ps(mat_a_blk_elems[25], mat_b_rearr[k][5], mat_b_rearr[k][6]);//d = c - (a*b) mat_b_rearr[k][7] = _mm256_fnmadd_ps(mat_a_blk_elems[26], mat_b_rearr[k][5], mat_b_rearr[k][7]);//d = c - (a*b) //(Row7): FMA operations of b7 with elements of index (7, 0) mat_b_rearr[k][7] = _mm256_fnmadd_ps(mat_a_blk_elems[27], mat_b_rearr[k][6], mat_b_rearr[k][7]);//d = c - (a*b) //////////////////////////////////////////////////////////////////////////////// //Store the computed B columns _mm256_storeu_ps((float *)ptr_b_dup + i, mat_b_rearr[k][0]); _mm256_storeu_ps((float *)(ptr_b_dup + (cs_b) + i), mat_b_rearr[k][1]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[0] + i), mat_b_rearr[k][2]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[1] + i), mat_b_rearr[k][3]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[2] + i), mat_b_rearr[k][4]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[3] + i), mat_b_rearr[k][5]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[4] + i), mat_b_rearr[k][6]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[5] + i), mat_b_rearr[k][7]); //printf("writing B => m[%d], n[%d], [%f]\n", j, k, *(ptr_b_dup + k)); k++; } } ///////////////////loop ends ///////////////////// } static void trsm_XAtB_block_allSmallSizedMatrices_alpha_unitDiag(float *ptr_l, float *ptr_b, int numRows_lb, int numCols_b, int rs_l, int rs_b, int cs_l, int cs_b, float alpha) { //float ones = 1.0; int i, i1, i2, i3, i4, j, k, l; int cs_b_offset[7]; int cs_l_offset[7]; float *ptr_b_dup; //57 number of ymm(256 bits) registers used __m256 mat_b_col[8]; __m256 mat_b_rearr[16][8]; //__m256 mat_a_cols_rearr[8]; __m256 mat_a_blk_elems[64]; //__m256 mat_a_diag_inv[8]; //__m256 reciprocal_diags[2]; __m256 alphaReg; alphaReg = _mm256_broadcast_ss((float const *)&alpha); // ---> considering that the matrix size is multiple of 16 rows and 8 cols <--- // //L matrix offsets cs_l_offset[0] = (cs_l << 1); cs_l_offset[1] = cs_l + cs_l_offset[0]; cs_l_offset[2] = (cs_l << 2); cs_l_offset[3] = cs_l + cs_l_offset[2]; cs_l_offset[4] = cs_l_offset[0] + cs_l_offset[2]; cs_l_offset[5] = cs_l + cs_l_offset[4]; cs_l_offset[6] = (cs_l_offset[5] + cs_l); cs_b_offset[0] = (cs_b << 1); cs_b_offset[1] = cs_b + cs_b_offset[0]; cs_b_offset[2] = (cs_b << 2); cs_b_offset[3] = cs_b + cs_b_offset[2]; cs_b_offset[4] = cs_b_offset[0] + cs_b_offset[2]; cs_b_offset[5] = cs_b + cs_b_offset[4]; cs_b_offset[6] = (cs_b_offset[5] + cs_b); //Broadcast A10 to A70 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + 1)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + 2)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + 3)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + 4)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + 5)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l + 6)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l + 7)); //Broadcast A21 to A71 to registers mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 2)); mat_a_blk_elems[8] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 3)); mat_a_blk_elems[9] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 4)); mat_a_blk_elems[10] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 5)); mat_a_blk_elems[11] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 6)); mat_a_blk_elems[12] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 7)); //Broadcast A32 to A72 to registers mat_a_blk_elems[13] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 3)); mat_a_blk_elems[14] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 4)); mat_a_blk_elems[15] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 5)); mat_a_blk_elems[16] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 6)); mat_a_blk_elems[17] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 7)); //Broadcast A43 to A73 to registers mat_a_blk_elems[18] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 4)); mat_a_blk_elems[19] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 5)); mat_a_blk_elems[20] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 6)); mat_a_blk_elems[21] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 7)); //Broadcast A54 to A74 to registers mat_a_blk_elems[22] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + 5)); mat_a_blk_elems[23] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + 6)); mat_a_blk_elems[24] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + 7)); //Broadcast A65 to A75 to registers mat_a_blk_elems[25] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + 6)); mat_a_blk_elems[26] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + 7)); //Broadcast A76 to register mat_a_blk_elems[27] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[4] + 7)); /***************** first set of 8 rows of B processing starts *****************/ ptr_b_dup = ptr_b; i = 0; for (j = 0; j < numCols_b; j += 8) { /////////////////// Complete Upper 8x8 block trsm of B :- upper 8x8 block of B with upper 8x8 block of A //read 8x8 block of B into registers mat_b_rearr[0][0] = _mm256_loadu_ps((float const *)ptr_b + i); mat_b_rearr[1][0] = _mm256_loadu_ps((float const *)(ptr_b + cs_b + i)); mat_b_rearr[2][0] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[0] + i)); mat_b_rearr[3][0] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[1] + i)); mat_b_rearr[4][0] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[2] + i)); mat_b_rearr[5][0] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[3] + i)); mat_b_rearr[6][0] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[4] + i)); mat_b_rearr[7][0] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[5] + i)); mat_b_rearr[0][0] = _mm256_mul_ps(mat_b_rearr[0][0], alphaReg); mat_b_rearr[1][0] = _mm256_mul_ps(mat_b_rearr[1][0], alphaReg); mat_b_rearr[2][0] = _mm256_mul_ps(mat_b_rearr[2][0], alphaReg); mat_b_rearr[3][0] = _mm256_mul_ps(mat_b_rearr[3][0], alphaReg); mat_b_rearr[4][0] = _mm256_mul_ps(mat_b_rearr[4][0], alphaReg); mat_b_rearr[5][0] = _mm256_mul_ps(mat_b_rearr[5][0], alphaReg); mat_b_rearr[6][0] = _mm256_mul_ps(mat_b_rearr[6][0], alphaReg); mat_b_rearr[7][0] = _mm256_mul_ps(mat_b_rearr[7][0], alphaReg); //(Row0) mat_b_col[0] = mat_b_rearr[0][0]; //(Row1): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_col[1] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[0], mat_b_rearr[1][0]);//d = c - (a*b) mat_b_rearr[2][0] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[0], mat_b_rearr[2][0]);//d = c - (a*b) mat_b_rearr[3][0] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[0], mat_b_rearr[3][0]);//d = c - (a*b) mat_b_rearr[4][0] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[0], mat_b_rearr[4][0]);//d = c - (a*b) mat_b_rearr[5][0] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[0], mat_b_rearr[5][0]);//d = c - (a*b) mat_b_rearr[6][0] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[0], mat_b_rearr[6][0]);//d = c - (a*b) mat_b_rearr[7][0] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[0], mat_b_rearr[7][0]);//d = c - (a*b) //(Row2): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_col[2] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[1], mat_b_rearr[2][0]);//d = c - (a*b) mat_b_rearr[3][0] = _mm256_fnmadd_ps(mat_a_blk_elems[8], mat_b_col[1], mat_b_rearr[3][0]);//d = c - (a*b) mat_b_rearr[4][0] = _mm256_fnmadd_ps(mat_a_blk_elems[9], mat_b_col[1], mat_b_rearr[4][0]);//d = c - (a*b) mat_b_rearr[5][0] = _mm256_fnmadd_ps(mat_a_blk_elems[10], mat_b_col[1], mat_b_rearr[5][0]);//d = c - (a*b) mat_b_rearr[6][0] = _mm256_fnmadd_ps(mat_a_blk_elems[11], mat_b_col[1], mat_b_rearr[6][0]);//d = c - (a*b) mat_b_rearr[7][0] = _mm256_fnmadd_ps(mat_a_blk_elems[12], mat_b_col[1], mat_b_rearr[7][0]);//d = c - (a*b) //(Row3): FMA operations of b3 with elements of indices from (3, 0) uptill (7, 0) mat_b_col[3] = _mm256_fnmadd_ps(mat_a_blk_elems[13], mat_b_col[2], mat_b_rearr[3][0]);//d = c - (a*b) mat_b_rearr[4][0] = _mm256_fnmadd_ps(mat_a_blk_elems[14], mat_b_col[2], mat_b_rearr[4][0]);//d = c - (a*b) mat_b_rearr[5][0] = _mm256_fnmadd_ps(mat_a_blk_elems[15], mat_b_col[2], mat_b_rearr[5][0]);//d = c - (a*b) mat_b_rearr[6][0] = _mm256_fnmadd_ps(mat_a_blk_elems[16], mat_b_col[2], mat_b_rearr[6][0]);//d = c - (a*b) mat_b_rearr[7][0] = _mm256_fnmadd_ps(mat_a_blk_elems[17], mat_b_col[2], mat_b_rearr[7][0]);//d = c - (a*b) //(Row4): FMA operations of b4 with elements of indices from (4, 0) uptill (7, 0) mat_b_col[4] = _mm256_fnmadd_ps(mat_a_blk_elems[18], mat_b_col[3], mat_b_rearr[4][0]);//d = c - (a*b) mat_b_rearr[5][0] = _mm256_fnmadd_ps(mat_a_blk_elems[19], mat_b_col[3], mat_b_rearr[5][0]);//d = c - (a*b) mat_b_rearr[6][0] = _mm256_fnmadd_ps(mat_a_blk_elems[20], mat_b_col[3], mat_b_rearr[6][0]);//d = c - (a*b) mat_b_rearr[7][0] = _mm256_fnmadd_ps(mat_a_blk_elems[21], mat_b_col[3], mat_b_rearr[7][0]);//d = c - (a*b) //(Row5): FMA operations of b5 with elements of indices from (5, 0) uptill (7, 0) mat_b_col[5] = _mm256_fnmadd_ps(mat_a_blk_elems[22], mat_b_col[4], mat_b_rearr[5][0]);//d = c - (a*b) mat_b_rearr[6][0] = _mm256_fnmadd_ps(mat_a_blk_elems[23], mat_b_col[4], mat_b_rearr[6][0]);//d = c - (a*b) mat_b_rearr[7][0] = _mm256_fnmadd_ps(mat_a_blk_elems[24], mat_b_col[4], mat_b_rearr[7][0]);//d = c - (a*b) //(Row6): FMA operations of b6 with elements of indices from (6, 0) uptill (7, 0) mat_b_col[6] = _mm256_fnmadd_ps(mat_a_blk_elems[25], mat_b_col[5], mat_b_rearr[6][0]);//d = c - (a*b) mat_b_rearr[7][0] = _mm256_fnmadd_ps(mat_a_blk_elems[26], mat_b_col[5], mat_b_rearr[7][0]);//d = c - (a*b) //(Row7): FMA operations of b7 with elements of index (7, 0) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[27], mat_b_col[6], mat_b_rearr[7][0]);//d = c - (a*b) //////////////////////////////////////////////////////////////////////////////// //Store the computed B columns _mm256_storeu_ps((float *)ptr_b_dup, mat_b_col[0]); _mm256_storeu_ps((float *)(ptr_b_dup + (cs_b)), mat_b_col[1]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[0]), mat_b_col[2]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[1]), mat_b_col[3]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[2]), mat_b_col[4]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[3]), mat_b_col[5]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[4]), mat_b_col[6]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[5]), mat_b_col[7]); //i += cs_b_offset[6]; //ptr_b_dup += cs_b_offset[6]; i += 8; ptr_b_dup += 8; } //c = 0; /***************** first set of 8 cols of B processing done *****************/ ptr_b_dup = ptr_b; i3 = 0; i1 = 0; //Start loop for cols of B to be processed in size of blk_width for (j = 8; j < numRows_lb; j += 8)//m :- 8x8 block row { ptr_l += 8; //ptr_b += j; //ptr_b_dup += 8; ptr_b_dup += cs_b_offset[6]; i1 += cs_b_offset[6]; i3 += cs_l_offset[6]; i = 0; i2 = 0; for (k = 0; k < numCols_b; k += 8) { i = i1 + k; //Read 8 cols of B columns of Block-to-be-solved mat_b_rearr[i2][0] = _mm256_loadu_ps((float const *)ptr_b + i); mat_b_rearr[i2][1] = _mm256_loadu_ps((float const *)(ptr_b + cs_b + i)); mat_b_rearr[i2][2] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[0] + i)); mat_b_rearr[i2][3] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[1] + i)); mat_b_rearr[i2][4] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[2] + i)); mat_b_rearr[i2][5] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[3] + i)); mat_b_rearr[i2][6] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[4] + i)); mat_b_rearr[i2][7] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[5] + i)); mat_b_rearr[i2][0] = _mm256_mul_ps(mat_b_rearr[i2][0], alphaReg); mat_b_rearr[i2][1] = _mm256_mul_ps(mat_b_rearr[i2][1], alphaReg); mat_b_rearr[i2][2] = _mm256_mul_ps(mat_b_rearr[i2][2], alphaReg); mat_b_rearr[i2][3] = _mm256_mul_ps(mat_b_rearr[i2][3], alphaReg); mat_b_rearr[i2][4] = _mm256_mul_ps(mat_b_rearr[i2][4], alphaReg); mat_b_rearr[i2][5] = _mm256_mul_ps(mat_b_rearr[i2][5], alphaReg); mat_b_rearr[i2][6] = _mm256_mul_ps(mat_b_rearr[i2][6], alphaReg); mat_b_rearr[i2][7] = _mm256_mul_ps(mat_b_rearr[i2][7], alphaReg); i2++; } i = 0; i2 = 0; for (l = 0; l < j; l += 8) // move across m { //Broadcast A8,0 to A15,0 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + i)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + i + 1)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + i + 2)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + i + 3)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + i + 4)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l + i + 5)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l + i + 6)); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); //Broadcast A21 to A71 to registers mat_a_blk_elems[8] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + i)); mat_a_blk_elems[9] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + i + 1)); mat_a_blk_elems[10] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + i + 2)); mat_a_blk_elems[11] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + i + 3)); mat_a_blk_elems[12] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + i + 4)); mat_a_blk_elems[13] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + i + 5)); mat_a_blk_elems[14] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + i + 6)); mat_a_blk_elems[15] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + i + 7)); //Broadcast A8,2 to A15,2 to registers mat_a_blk_elems[16] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + i)); mat_a_blk_elems[17] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + i + 1)); mat_a_blk_elems[18] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + i + 2)); mat_a_blk_elems[19] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + i + 3)); mat_a_blk_elems[20] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + i + 4)); mat_a_blk_elems[21] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + i + 5)); mat_a_blk_elems[22] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + i + 6)); mat_a_blk_elems[23] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + i + 7)); //Broadcast A8,3 to A15,3 to registers mat_a_blk_elems[24] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + i)); mat_a_blk_elems[25] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + i + 1)); mat_a_blk_elems[26] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + i + 2)); mat_a_blk_elems[27] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + i + 3)); mat_a_blk_elems[28] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + i + 4)); mat_a_blk_elems[29] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + i + 5)); mat_a_blk_elems[30] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + i + 6)); mat_a_blk_elems[31] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + i + 7)); // _mm256_permute2f128_ps() //Broadcast A8,4 to A15,4 to registers mat_a_blk_elems[32] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + i)); mat_a_blk_elems[33] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + i + 1)); mat_a_blk_elems[34] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + i + 2)); mat_a_blk_elems[35] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + i + 3)); mat_a_blk_elems[36] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + i + 4)); mat_a_blk_elems[37] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + i + 5)); mat_a_blk_elems[38] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + i + 6)); mat_a_blk_elems[39] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + i + 7)); //Broadcast A8,5 to A15,5 to registers mat_a_blk_elems[40] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + i)); mat_a_blk_elems[41] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + i + 1)); mat_a_blk_elems[42] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + i + 2)); mat_a_blk_elems[43] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + i + 3)); mat_a_blk_elems[44] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + i + 4)); mat_a_blk_elems[45] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + i + 5)); mat_a_blk_elems[46] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + i + 6)); mat_a_blk_elems[47] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + i + 7)); //Broadcast A8,6 to A15,6 to registers mat_a_blk_elems[48] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[4] + i)); mat_a_blk_elems[49] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[4] + i + 1)); mat_a_blk_elems[50] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[4] + i + 2)); mat_a_blk_elems[51] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[4] + i + 3)); mat_a_blk_elems[52] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[4] + i + 4)); mat_a_blk_elems[53] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[4] + i + 5)); mat_a_blk_elems[54] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[4] + i + 6)); mat_a_blk_elems[55] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[4] + i + 7)); //Broadcast A8,7 to A15,7 to registers mat_a_blk_elems[56] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[5] + i)); mat_a_blk_elems[57] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[5] + i + 1)); mat_a_blk_elems[58] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[5] + i + 2)); mat_a_blk_elems[59] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[5] + i + 3)); mat_a_blk_elems[60] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[5] + i + 4)); mat_a_blk_elems[61] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[5] + i + 5)); mat_a_blk_elems[62] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[5] + i + 6)); mat_a_blk_elems[63] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[5] + i + 7)); i += cs_l_offset[6]; for (k = 0; k < numCols_b; k += 8) // move across n for the same value of l (index of m) { /////////////////// Partial Lower 8x8 block trsm of B i4 = i2 + k; //Read current 8 cols of B columns from specified 8x8 current-block of B mat_b_col[0] = _mm256_loadu_ps((float const *)ptr_b + i4); mat_b_col[1] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b)); mat_b_col[2] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[0])); mat_b_col[3] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[1])); mat_b_col[4] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[2])); mat_b_col[5] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[3])); mat_b_col[6] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[4])); mat_b_col[7] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[5])); i4 = k >> 3; //(Row8): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[i4][0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[0], mat_b_rearr[i4][0]);//d = c - (a*b) mat_b_rearr[i4][1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[0], mat_b_rearr[i4][1]);//d = c - (a*b) mat_b_rearr[i4][2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[0], mat_b_rearr[i4][2]);//d = c - (a*b) mat_b_rearr[i4][3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[0], mat_b_rearr[i4][3]);//d = c - (a*b) mat_b_rearr[i4][4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[0], mat_b_rearr[i4][4]);//d = c - (a*b) mat_b_rearr[i4][5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[0], mat_b_rearr[i4][5]);//d = c - (a*b) mat_b_rearr[i4][6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[0], mat_b_rearr[i4][6]);//d = c - (a*b) mat_b_rearr[i4][7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[0], mat_b_rearr[i4][7]);//d = c - (a*b) //(Row9): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[i4][0] = _mm256_fnmadd_ps(mat_a_blk_elems[8], mat_b_col[1], mat_b_rearr[i4][0]);//d = c - (a*b) mat_b_rearr[i4][1] = _mm256_fnmadd_ps(mat_a_blk_elems[9], mat_b_col[1], mat_b_rearr[i4][1]);//d = c - (a*b) mat_b_rearr[i4][2] = _mm256_fnmadd_ps(mat_a_blk_elems[10], mat_b_col[1], mat_b_rearr[i4][2]);//d = c - (a*b) mat_b_rearr[i4][3] = _mm256_fnmadd_ps(mat_a_blk_elems[11], mat_b_col[1], mat_b_rearr[i4][3]);//d = c - (a*b) mat_b_rearr[i4][4] = _mm256_fnmadd_ps(mat_a_blk_elems[12], mat_b_col[1], mat_b_rearr[i4][4]);//d = c - (a*b) mat_b_rearr[i4][5] = _mm256_fnmadd_ps(mat_a_blk_elems[13], mat_b_col[1], mat_b_rearr[i4][5]);//d = c - (a*b) mat_b_rearr[i4][6] = _mm256_fnmadd_ps(mat_a_blk_elems[14], mat_b_col[1], mat_b_rearr[i4][6]);//d = c - (a*b) mat_b_rearr[i4][7] = _mm256_fnmadd_ps(mat_a_blk_elems[15], mat_b_col[1], mat_b_rearr[i4][7]);//d = c - (a*b) //(Row10): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[i4][0] = _mm256_fnmadd_ps(mat_a_blk_elems[16], mat_b_col[2], mat_b_rearr[i4][0]);//d = c - (a*b) mat_b_rearr[i4][1] = _mm256_fnmadd_ps(mat_a_blk_elems[17], mat_b_col[2], mat_b_rearr[i4][1]);//d = c - (a*b) mat_b_rearr[i4][2] = _mm256_fnmadd_ps(mat_a_blk_elems[18], mat_b_col[2], mat_b_rearr[i4][2]);//d = c - (a*b) mat_b_rearr[i4][3] = _mm256_fnmadd_ps(mat_a_blk_elems[19], mat_b_col[2], mat_b_rearr[i4][3]);//d = c - (a*b) mat_b_rearr[i4][4] = _mm256_fnmadd_ps(mat_a_blk_elems[20], mat_b_col[2], mat_b_rearr[i4][4]);//d = c - (a*b) mat_b_rearr[i4][5] = _mm256_fnmadd_ps(mat_a_blk_elems[21], mat_b_col[2], mat_b_rearr[i4][5]);//d = c - (a*b) mat_b_rearr[i4][6] = _mm256_fnmadd_ps(mat_a_blk_elems[22], mat_b_col[2], mat_b_rearr[i4][6]);//d = c - (a*b) mat_b_rearr[i4][7] = _mm256_fnmadd_ps(mat_a_blk_elems[23], mat_b_col[2], mat_b_rearr[i4][7]);//d = c - (a*b) //(Row11): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[i4][0] = _mm256_fnmadd_ps(mat_a_blk_elems[24], mat_b_col[3], mat_b_rearr[i4][0]);//d = c - (a*b) mat_b_rearr[i4][1] = _mm256_fnmadd_ps(mat_a_blk_elems[25], mat_b_col[3], mat_b_rearr[i4][1]);//d = c - (a*b) mat_b_rearr[i4][2] = _mm256_fnmadd_ps(mat_a_blk_elems[26], mat_b_col[3], mat_b_rearr[i4][2]);//d = c - (a*b) mat_b_rearr[i4][3] = _mm256_fnmadd_ps(mat_a_blk_elems[27], mat_b_col[3], mat_b_rearr[i4][3]);//d = c - (a*b) mat_b_rearr[i4][4] = _mm256_fnmadd_ps(mat_a_blk_elems[28], mat_b_col[3], mat_b_rearr[i4][4]);//d = c - (a*b) mat_b_rearr[i4][5] = _mm256_fnmadd_ps(mat_a_blk_elems[29], mat_b_col[3], mat_b_rearr[i4][5]);//d = c - (a*b) mat_b_rearr[i4][6] = _mm256_fnmadd_ps(mat_a_blk_elems[30], mat_b_col[3], mat_b_rearr[i4][6]);//d = c - (a*b) mat_b_rearr[i4][7] = _mm256_fnmadd_ps(mat_a_blk_elems[31], mat_b_col[3], mat_b_rearr[i4][7]);//d = c - (a*b) //(Row12): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[i4][0] = _mm256_fnmadd_ps(mat_a_blk_elems[32], mat_b_col[4], mat_b_rearr[i4][0]);//d = c - (a*b) mat_b_rearr[i4][1] = _mm256_fnmadd_ps(mat_a_blk_elems[33], mat_b_col[4], mat_b_rearr[i4][1]);//d = c - (a*b) mat_b_rearr[i4][2] = _mm256_fnmadd_ps(mat_a_blk_elems[34], mat_b_col[4], mat_b_rearr[i4][2]);//d = c - (a*b) mat_b_rearr[i4][3] = _mm256_fnmadd_ps(mat_a_blk_elems[35], mat_b_col[4], mat_b_rearr[i4][3]);//d = c - (a*b) mat_b_rearr[i4][4] = _mm256_fnmadd_ps(mat_a_blk_elems[36], mat_b_col[4], mat_b_rearr[i4][4]);//d = c - (a*b) mat_b_rearr[i4][5] = _mm256_fnmadd_ps(mat_a_blk_elems[37], mat_b_col[4], mat_b_rearr[i4][5]);//d = c - (a*b) mat_b_rearr[i4][6] = _mm256_fnmadd_ps(mat_a_blk_elems[38], mat_b_col[4], mat_b_rearr[i4][6]);//d = c - (a*b) mat_b_rearr[i4][7] = _mm256_fnmadd_ps(mat_a_blk_elems[39], mat_b_col[4], mat_b_rearr[i4][7]);//d = c - (a*b) //(Row13): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[i4][0] = _mm256_fnmadd_ps(mat_a_blk_elems[40], mat_b_col[5], mat_b_rearr[i4][0]);//d = c - (a*b) mat_b_rearr[i4][1] = _mm256_fnmadd_ps(mat_a_blk_elems[41], mat_b_col[5], mat_b_rearr[i4][1]);//d = c - (a*b) mat_b_rearr[i4][2] = _mm256_fnmadd_ps(mat_a_blk_elems[42], mat_b_col[5], mat_b_rearr[i4][2]);//d = c - (a*b) mat_b_rearr[i4][3] = _mm256_fnmadd_ps(mat_a_blk_elems[43], mat_b_col[5], mat_b_rearr[i4][3]);//d = c - (a*b) mat_b_rearr[i4][4] = _mm256_fnmadd_ps(mat_a_blk_elems[44], mat_b_col[5], mat_b_rearr[i4][4]);//d = c - (a*b) mat_b_rearr[i4][5] = _mm256_fnmadd_ps(mat_a_blk_elems[45], mat_b_col[5], mat_b_rearr[i4][5]);//d = c - (a*b) mat_b_rearr[i4][6] = _mm256_fnmadd_ps(mat_a_blk_elems[46], mat_b_col[5], mat_b_rearr[i4][6]);//d = c - (a*b) mat_b_rearr[i4][7] = _mm256_fnmadd_ps(mat_a_blk_elems[47], mat_b_col[5], mat_b_rearr[i4][7]);//d = c - (a*b) //(Row14): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[i4][0] = _mm256_fnmadd_ps(mat_a_blk_elems[48], mat_b_col[6], mat_b_rearr[i4][0]);//d = c - (a*b) mat_b_rearr[i4][1] = _mm256_fnmadd_ps(mat_a_blk_elems[49], mat_b_col[6], mat_b_rearr[i4][1]);//d = c - (a*b) mat_b_rearr[i4][2] = _mm256_fnmadd_ps(mat_a_blk_elems[50], mat_b_col[6], mat_b_rearr[i4][2]);//d = c - (a*b) mat_b_rearr[i4][3] = _mm256_fnmadd_ps(mat_a_blk_elems[51], mat_b_col[6], mat_b_rearr[i4][3]);//d = c - (a*b) mat_b_rearr[i4][4] = _mm256_fnmadd_ps(mat_a_blk_elems[52], mat_b_col[6], mat_b_rearr[i4][4]);//d = c - (a*b) mat_b_rearr[i4][5] = _mm256_fnmadd_ps(mat_a_blk_elems[53], mat_b_col[6], mat_b_rearr[i4][5]);//d = c - (a*b) mat_b_rearr[i4][6] = _mm256_fnmadd_ps(mat_a_blk_elems[54], mat_b_col[6], mat_b_rearr[i4][6]);//d = c - (a*b) mat_b_rearr[i4][7] = _mm256_fnmadd_ps(mat_a_blk_elems[55], mat_b_col[6], mat_b_rearr[i4][7]);//d = c - (a*b) //(Row15): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[i4][0] = _mm256_fnmadd_ps(mat_a_blk_elems[56], mat_b_col[7], mat_b_rearr[i4][0]);//d = c - (a*b) mat_b_rearr[i4][1] = _mm256_fnmadd_ps(mat_a_blk_elems[57], mat_b_col[7], mat_b_rearr[i4][1]);//d = c - (a*b) mat_b_rearr[i4][2] = _mm256_fnmadd_ps(mat_a_blk_elems[58], mat_b_col[7], mat_b_rearr[i4][2]);//d = c - (a*b) mat_b_rearr[i4][3] = _mm256_fnmadd_ps(mat_a_blk_elems[59], mat_b_col[7], mat_b_rearr[i4][3]);//d = c - (a*b) mat_b_rearr[i4][4] = _mm256_fnmadd_ps(mat_a_blk_elems[60], mat_b_col[7], mat_b_rearr[i4][4]);//d = c - (a*b) mat_b_rearr[i4][5] = _mm256_fnmadd_ps(mat_a_blk_elems[61], mat_b_col[7], mat_b_rearr[i4][5]);//d = c - (a*b) mat_b_rearr[i4][6] = _mm256_fnmadd_ps(mat_a_blk_elems[62], mat_b_col[7], mat_b_rearr[i4][6]);//d = c - (a*b) mat_b_rearr[i4][7] = _mm256_fnmadd_ps(mat_a_blk_elems[63], mat_b_col[7], mat_b_rearr[i4][7]);//d = c - (a*b) //end loop of cols } i2 += cs_b_offset[6]; } //Broadcast A10 to A70 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + i + 1)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + i + 2)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + i + 3)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + i + 4)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + i + 5)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l + i + 6)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); i += cs_l; //Broadcast A21 to A71 to registers mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l + i + 2)); mat_a_blk_elems[8] = _mm256_broadcast_ss((float const *)(ptr_l + i + 3)); mat_a_blk_elems[9] = _mm256_broadcast_ss((float const *)(ptr_l + i + 4)); mat_a_blk_elems[10] = _mm256_broadcast_ss((float const *)(ptr_l + i + 5)); mat_a_blk_elems[11] = _mm256_broadcast_ss((float const *)(ptr_l + i + 6)); mat_a_blk_elems[12] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); i += cs_l; //Broadcast A32 to A72 to registers mat_a_blk_elems[13] = _mm256_broadcast_ss((float const *)(ptr_l + i + 3)); mat_a_blk_elems[14] = _mm256_broadcast_ss((float const *)(ptr_l + i + 4)); mat_a_blk_elems[15] = _mm256_broadcast_ss((float const *)(ptr_l + i + 5)); mat_a_blk_elems[16] = _mm256_broadcast_ss((float const *)(ptr_l + i + 6)); mat_a_blk_elems[17] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); i += cs_l; //Broadcast A43 to A73 to registers mat_a_blk_elems[18] = _mm256_broadcast_ss((float const *)(ptr_l + i + 4)); mat_a_blk_elems[19] = _mm256_broadcast_ss((float const *)(ptr_l + i + 5)); mat_a_blk_elems[20] = _mm256_broadcast_ss((float const *)(ptr_l + i + 6)); mat_a_blk_elems[21] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); i += cs_l; //Broadcast A54 to A74 to registers mat_a_blk_elems[22] = _mm256_broadcast_ss((float const *)(ptr_l + i + 5)); mat_a_blk_elems[23] = _mm256_broadcast_ss((float const *)(ptr_l + i + 6)); mat_a_blk_elems[24] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); i += cs_l; //Broadcast A65 to A75 to registers mat_a_blk_elems[25] = _mm256_broadcast_ss((float const *)(ptr_l + i + 6)); mat_a_blk_elems[26] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); i += cs_l; //Broadcast A76 to register mat_a_blk_elems[27] = _mm256_broadcast_ss((float const *)(ptr_l + i + 7)); k = 0; for (i = 0; i < numCols_b; i+=8) { /////////////////// Complete Lower 8x8 block trsm of B :- lower 8x8 block of B with lower right 8x8 block of A //(Row0): already done //(Row1): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[k][1] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[k][0], mat_b_rearr[k][1]);//d = c - (a*b) mat_b_rearr[k][2] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[k][0], mat_b_rearr[k][2]);//d = c - (a*b) mat_b_rearr[k][3] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_rearr[k][0], mat_b_rearr[k][3]);//d = c - (a*b) mat_b_rearr[k][4] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_rearr[k][0], mat_b_rearr[k][4]);//d = c - (a*b) mat_b_rearr[k][5] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_rearr[k][0], mat_b_rearr[k][5]);//d = c - (a*b) mat_b_rearr[k][6] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_rearr[k][0], mat_b_rearr[k][6]);//d = c - (a*b) mat_b_rearr[k][7] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_rearr[k][0], mat_b_rearr[k][7]);//d = c - (a*b) //(Row2): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[k][2] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_rearr[k][1], mat_b_rearr[k][2]);//d = c - (a*b) mat_b_rearr[k][3] = _mm256_fnmadd_ps(mat_a_blk_elems[8], mat_b_rearr[k][1], mat_b_rearr[k][3]);//d = c - (a*b) mat_b_rearr[k][4] = _mm256_fnmadd_ps(mat_a_blk_elems[9], mat_b_rearr[k][1], mat_b_rearr[k][4]);//d = c - (a*b) mat_b_rearr[k][5] = _mm256_fnmadd_ps(mat_a_blk_elems[10], mat_b_rearr[k][1], mat_b_rearr[k][5]);//d = c - (a*b) mat_b_rearr[k][6] = _mm256_fnmadd_ps(mat_a_blk_elems[11], mat_b_rearr[k][1], mat_b_rearr[k][6]);//d = c - (a*b) mat_b_rearr[k][7] = _mm256_fnmadd_ps(mat_a_blk_elems[12], mat_b_rearr[k][1], mat_b_rearr[k][7]);//d = c - (a*b) //(Row3): FMA operations of b3 with elements of indices from (3, 0) uptill (7, 0) mat_b_rearr[k][3] = _mm256_fnmadd_ps(mat_a_blk_elems[13], mat_b_rearr[k][2], mat_b_rearr[k][3]);//d = c - (a*b) mat_b_rearr[k][4] = _mm256_fnmadd_ps(mat_a_blk_elems[14], mat_b_rearr[k][2], mat_b_rearr[k][4]);//d = c - (a*b) mat_b_rearr[k][5] = _mm256_fnmadd_ps(mat_a_blk_elems[15], mat_b_rearr[k][2], mat_b_rearr[k][5]);//d = c - (a*b) mat_b_rearr[k][6] = _mm256_fnmadd_ps(mat_a_blk_elems[16], mat_b_rearr[k][2], mat_b_rearr[k][6]);//d = c - (a*b) mat_b_rearr[k][7] = _mm256_fnmadd_ps(mat_a_blk_elems[17], mat_b_rearr[k][2], mat_b_rearr[k][7]);//d = c - (a*b) //(Row4): FMA operations of b4 with elements of indices from (4, 0) uptill (7, 0) mat_b_rearr[k][4] = _mm256_fnmadd_ps(mat_a_blk_elems[18], mat_b_rearr[k][3], mat_b_rearr[k][4]);//d = c - (a*b) mat_b_rearr[k][5] = _mm256_fnmadd_ps(mat_a_blk_elems[19], mat_b_rearr[k][3], mat_b_rearr[k][5]);//d = c - (a*b) mat_b_rearr[k][6] = _mm256_fnmadd_ps(mat_a_blk_elems[20], mat_b_rearr[k][3], mat_b_rearr[k][6]);//d = c - (a*b) mat_b_rearr[k][7] = _mm256_fnmadd_ps(mat_a_blk_elems[21], mat_b_rearr[k][3], mat_b_rearr[k][7]);//d = c - (a*b) //(Row5): FMA operations of b5 with elements of indices from (5, 0) uptill (7, 0) mat_b_rearr[k][5] = _mm256_fnmadd_ps(mat_a_blk_elems[22], mat_b_rearr[k][4], mat_b_rearr[k][5]);//d = c - (a*b) mat_b_rearr[k][6] = _mm256_fnmadd_ps(mat_a_blk_elems[23], mat_b_rearr[k][4], mat_b_rearr[k][6]);//d = c - (a*b) mat_b_rearr[k][7] = _mm256_fnmadd_ps(mat_a_blk_elems[24], mat_b_rearr[k][4], mat_b_rearr[k][7]);//d = c - (a*b) //(Row6): FMA operations of b6 with elements of indices from (6, 0) uptill (7, 0) mat_b_rearr[k][6] = _mm256_fnmadd_ps(mat_a_blk_elems[25], mat_b_rearr[k][5], mat_b_rearr[k][6]);//d = c - (a*b) mat_b_rearr[k][7] = _mm256_fnmadd_ps(mat_a_blk_elems[26], mat_b_rearr[k][5], mat_b_rearr[k][7]);//d = c - (a*b) //(Row7): FMA operations of b7 with elements of index (7, 0) mat_b_rearr[k][7] = _mm256_fnmadd_ps(mat_a_blk_elems[27], mat_b_rearr[k][6], mat_b_rearr[k][7]);//d = c - (a*b) //////////////////////////////////////////////////////////////////////////////// //Store the computed B columns _mm256_storeu_ps((float *)ptr_b_dup + i, mat_b_rearr[k][0]); _mm256_storeu_ps((float *)(ptr_b_dup + (cs_b) + i), mat_b_rearr[k][1]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[0] + i), mat_b_rearr[k][2]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[1] + i), mat_b_rearr[k][3]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[2] + i), mat_b_rearr[k][4]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[3] + i), mat_b_rearr[k][5]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[4] + i), mat_b_rearr[k][6]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[5] + i), mat_b_rearr[k][7]); //printf("writing B => m[%d], n[%d], [%f]\n", j, k, *(ptr_b_dup + k)); k++; } } ///////////////////loop ends ///////////////////// } #endif //OPT_CACHE_BLOCKING_L1 //////////////////////////// AutX=B /////////////////////// static void trsm_AutXB_block_allSmallSizedMatrices(float *ptr_l, float *ptr_b, int numRows_lb, int numCols_b, int rs_l, int rs_b, int cs_l, int cs_b) { float ones = 1.0; int i, i1, i2, i3, i4, j, k, l, r; int cs_b_offset[7]; int cs_l_offset[7]; float *ptr_b_dup, *ptr_l_dup; //57 number of ymm(256 bits) registers used __m256 mat_b_col[8]; __m256 mat_b_rearr[8]; __m256 mat_a_blk_elems[8]; __m256 mat_a_diag_inv[8]; __m256 reciprocal_diags[2]; reciprocal_diags[0] = _mm256_broadcast_ss((float const *)(&ones)); // ---> considering that the matrix size is multiple of 16 rows and 8 cols <--- // //L matrix offsets cs_l_offset[0] = (cs_l << 1); cs_l_offset[1] = cs_l + cs_l_offset[0]; cs_l_offset[2] = (cs_l << 2); cs_l_offset[3] = cs_l + cs_l_offset[2]; cs_l_offset[4] = cs_l_offset[0] + cs_l_offset[2]; cs_l_offset[5] = cs_l + cs_l_offset[4]; cs_l_offset[6] = (cs_l_offset[5] + cs_l); //read diag elems of L 16x16 block mat_a_blk_elems[0] = _mm256_loadu_ps((float const *)ptr_l); mat_a_blk_elems[1] = _mm256_loadu_ps((float const *)ptr_l + cs_l); mat_a_blk_elems[2] = _mm256_loadu_ps((float const *)ptr_l + cs_l_offset[0]); mat_a_blk_elems[3] = _mm256_loadu_ps((float const *)ptr_l + cs_l_offset[1]); mat_a_blk_elems[4] = _mm256_loadu_ps((float const *)ptr_l + cs_l_offset[2]); mat_a_blk_elems[5] = _mm256_loadu_ps((float const *)ptr_l + cs_l_offset[3]); mat_a_blk_elems[6] = _mm256_loadu_ps((float const *)ptr_l + cs_l_offset[4]); mat_a_blk_elems[7] = _mm256_loadu_ps((float const *)ptr_l + cs_l_offset[5]); cs_b_offset[0] = (cs_b << 1); cs_b_offset[1] = cs_b + cs_b_offset[0]; cs_b_offset[2] = (cs_b << 2); cs_b_offset[3] = cs_b + cs_b_offset[2]; cs_b_offset[4] = cs_b_offset[0] + cs_b_offset[2]; cs_b_offset[5] = cs_b + cs_b_offset[4]; cs_b_offset[6] = (cs_b_offset[5] + cs_b); reciprocal_diags[1] = reciprocal_diags[0]; //pack first 8 diags together mat_a_diag_inv[0] = _mm256_blend_ps(mat_a_blk_elems[0], mat_a_blk_elems[1], 0xAA);//diag 0,1 mat_a_diag_inv[1] = _mm256_blend_ps(mat_a_blk_elems[2], mat_a_blk_elems[3], 0xAA);//diag 2,3 mat_a_diag_inv[2] = _mm256_blend_ps(mat_a_blk_elems[4], mat_a_blk_elems[5], 0xAA);//diag 4,5 mat_a_diag_inv[3] = _mm256_blend_ps(mat_a_blk_elems[6], mat_a_blk_elems[7], 0xAA);//diag 6,7 mat_a_diag_inv[0] = _mm256_blend_ps(mat_a_diag_inv[0], mat_a_diag_inv[1], 0xCC);//diag 0,1,2,3 mat_a_diag_inv[2] = _mm256_blend_ps(mat_a_diag_inv[2], mat_a_diag_inv[3], 0xCC);//diag 4,5,6,7 mat_a_diag_inv[0] = _mm256_blend_ps(mat_a_diag_inv[0], mat_a_diag_inv[2], 0xF0);//diag 0,1,2,3,4,5,6,7 //reciprocal of diagnal elements 0,1,2,3,4,5,6,7 reciprocal_diags[0] = _mm256_div_ps(reciprocal_diags[0], mat_a_diag_inv[0]); #if 0 //Broadcast A10 to A70 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + 1)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + 2)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + 3)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + 4)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + 5)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l + 6)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l + 7)); //Broadcast A21 to A71 to registers mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 2)); mat_a_blk_elems[8] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 3)); mat_a_blk_elems[9] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 4)); mat_a_blk_elems[10] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 5)); mat_a_blk_elems[11] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 6)); mat_a_blk_elems[12] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 7)); //Broadcast A32 to A72 to registers mat_a_blk_elems[13] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 3)); mat_a_blk_elems[14] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 4)); mat_a_blk_elems[15] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 5)); mat_a_blk_elems[16] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 6)); mat_a_blk_elems[17] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 7)); //Broadcast A43 to A73 to registers mat_a_blk_elems[18] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 4)); mat_a_blk_elems[19] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 5)); mat_a_blk_elems[20] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 6)); mat_a_blk_elems[21] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 7)); //Broadcast A54 to A74 to registers mat_a_blk_elems[22] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + 5)); mat_a_blk_elems[23] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + 6)); mat_a_blk_elems[24] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + 7)); //Broadcast A65 to A75 to registers mat_a_blk_elems[25] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + 6)); mat_a_blk_elems[26] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + 7)); //Broadcast A76 to register mat_a_blk_elems[27] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[4] + 7)); #endif //extract diag a00 from a mat_a_diag_inv[0] = _mm256_permute_ps(reciprocal_diags[0], 0x00); mat_a_diag_inv[0] = _mm256_permute2f128_ps(mat_a_diag_inv[0], mat_a_diag_inv[0], 0x00); //mat_a_diag_inv[0] = _mm256_unpacklo_ps(mat_a_diag_inv[0], mat_a_diag_inv[0]); //extract diag a11 from a mat_a_diag_inv[1] = _mm256_permute_ps(reciprocal_diags[0], 0x55); mat_a_diag_inv[1] = _mm256_permute2f128_ps(mat_a_diag_inv[1], mat_a_diag_inv[1], 0x00); //mat_a_diag_inv[1] = _mm256_unpacklo_ps(mat_a_diag_inv[1], mat_a_diag_inv[1]); //extract diag a22 from a mat_a_diag_inv[2] = _mm256_permute_ps(reciprocal_diags[0], 0xAA); mat_a_diag_inv[2] = _mm256_permute2f128_ps(mat_a_diag_inv[2], mat_a_diag_inv[2], 0x00); //mat_a_diag_inv[2] = _mm256_unpacklo_ps(mat_a_diag_inv[2], mat_a_diag_inv[2]); //extract diag a33 from a mat_a_diag_inv[3] = _mm256_permute_ps(reciprocal_diags[0], 0xFF); mat_a_diag_inv[3] = _mm256_permute2f128_ps(mat_a_diag_inv[3], mat_a_diag_inv[3], 0x00); //mat_a_diag_inv[3] = _mm256_unpacklo_ps(mat_a_diag_inv[3], mat_a_diag_inv[3]); //extract diag a44 from a mat_a_diag_inv[4] = _mm256_permute_ps(reciprocal_diags[0], 0x00); mat_a_diag_inv[4] = _mm256_permute2f128_ps(mat_a_diag_inv[4], mat_a_diag_inv[4], 0x11); //mat_a_diag_inv[4] = _mm256_unpacklo_ps(mat_a_diag_inv[4], mat_a_diag_inv[4]); //extract diag a55 from a mat_a_diag_inv[5] = _mm256_permute_ps(reciprocal_diags[0], 0x55); mat_a_diag_inv[5] = _mm256_permute2f128_ps(mat_a_diag_inv[5], mat_a_diag_inv[5], 0x11); //mat_a_diag_inv[5] = _mm256_unpacklo_ps(mat_a_diag_inv[5], mat_a_diag_inv[5]); //extract diag a66 from a mat_a_diag_inv[6] = _mm256_permute_ps(reciprocal_diags[0], 0xAA); mat_a_diag_inv[6] = _mm256_permute2f128_ps(mat_a_diag_inv[6], mat_a_diag_inv[6], 0x11); //mat_a_diag_inv[6] = _mm256_unpacklo_ps(mat_a_diag_inv[6], mat_a_diag_inv[6]); //extract diag a77 from a mat_a_diag_inv[7] = _mm256_permute_ps(reciprocal_diags[0], 0xFF); mat_a_diag_inv[7] = _mm256_permute2f128_ps(mat_a_diag_inv[7], mat_a_diag_inv[7], 0x11); //mat_a_diag_inv[7] = _mm256_unpacklo_ps(mat_a_diag_inv[7], mat_a_diag_inv[7]); /***************** first set of 8 rows of B processing starts *****************/ ptr_b_dup = ptr_b; i = 0; for (j = 0; j < numCols_b; j += 8) { /////////////////// Complete Upper 8x8 block trsm of B :- upper 8x8 block of B with upper 8x8 block of A //read 8x8 block of B into registers mat_b_rearr[0] = _mm256_loadu_ps((float const *)ptr_b + i); mat_b_rearr[1] = _mm256_loadu_ps((float const *)(ptr_b + cs_b + i)); mat_b_rearr[2] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[0] + i)); mat_b_rearr[3] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[1] + i)); mat_b_rearr[4] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[2] + i)); mat_b_rearr[5] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[3] + i)); mat_b_rearr[6] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[4] + i)); mat_b_rearr[7] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[5] + i)); /* transpose steps start */ ////unpacklow//// mat_b_col[0] = _mm256_unpacklo_ps(mat_b_rearr[0], mat_b_rearr[1]); mat_b_col[1] = _mm256_unpacklo_ps(mat_b_rearr[2], mat_b_rearr[3]); mat_b_col[2] = _mm256_unpacklo_ps(mat_b_rearr[4], mat_b_rearr[5]); mat_b_col[3] = _mm256_unpacklo_ps(mat_b_rearr[6], mat_b_rearr[7]); //Rearrange low elements #if REARRANGE_SHFL == 1 mat_b_col[4] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x44); mat_b_col[5] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0xEE); mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x44); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0xEE); #else mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x4E); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x4E); mat_b_col[4] = _mm256_blend_ps(mat_b_col[0], mat_b_col[6], 0xCC); mat_b_col[5] = _mm256_blend_ps(mat_b_col[1], mat_b_col[6], 0x33); mat_b_col[6] = _mm256_blend_ps(mat_b_col[2], mat_b_col[7], 0xCC); mat_b_col[7] = _mm256_blend_ps(mat_b_col[3], mat_b_col[7], 0x33); #endif //Merge rearranged low elements into complete rows mat_b_col[0] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x20); mat_b_col[4] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x31); mat_b_col[1] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x20); mat_b_col[5] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x31); ////unpackhigh//// mat_b_rearr[0] = _mm256_unpackhi_ps(mat_b_rearr[0], mat_b_rearr[1]); mat_b_rearr[1] = _mm256_unpackhi_ps(mat_b_rearr[2], mat_b_rearr[3]); mat_b_rearr[2] = _mm256_unpackhi_ps(mat_b_rearr[4], mat_b_rearr[5]); mat_b_rearr[3] = _mm256_unpackhi_ps(mat_b_rearr[6], mat_b_rearr[7]); //Rearrange high elements #if REARRANGE_SHFL == 1 mat_b_rearr[4] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x44); mat_b_rearr[5] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0xEE); mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x44); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0xEE); #else mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x4E); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x4E); mat_b_rearr[4] = _mm256_blend_ps(mat_b_rearr[0], mat_b_rearr[6], 0xCC); mat_b_rearr[5] = _mm256_blend_ps(mat_b_rearr[1], mat_b_rearr[6], 0x33); mat_b_rearr[6] = _mm256_blend_ps(mat_b_rearr[2], mat_b_rearr[7], 0xCC); mat_b_rearr[7] = _mm256_blend_ps(mat_b_rearr[3], mat_b_rearr[7], 0x33); #endif //Merge rearranged high elements into complete rows mat_b_col[2] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x20); mat_b_col[6] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x31); mat_b_col[3] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x20); mat_b_col[7] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x31); /* transpose steps end */ //(Row0): Perform mul operation of reciprocal of L(0,0) element with 1st row elements of B mat_b_col[0] = _mm256_mul_ps(mat_b_col[0], mat_a_diag_inv[0]); mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0])); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3])); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[4])); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[5])); //(Row1): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_col[1] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[0], mat_b_col[1]);//d = c - (a*b) mat_b_col[2] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[0], mat_b_col[2]);//d = c - (a*b) mat_b_col[3] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[0], mat_b_col[3]);//d = c - (a*b) mat_b_col[4] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[0], mat_b_col[4]);//d = c - (a*b) mat_b_col[5] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[0], mat_b_col[5]);//d = c - (a*b) mat_b_col[6] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[0], mat_b_col[6]);//d = c - (a*b) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[0], mat_b_col[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(1,1) element with 2nd row elements of B mat_b_col[1] = _mm256_mul_ps(mat_b_col[1], mat_a_diag_inv[1]); mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + 1 + cs_l_offset[0])); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + 1 + cs_l_offset[1])); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + 1 + cs_l_offset[2])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + 1 + cs_l_offset[3])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + 1 + cs_l_offset[4])); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l + 1 + cs_l_offset[5])); //(Row2): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_col[2] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[1], mat_b_col[2]);//d = c - (a*b) mat_b_col[3] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[1], mat_b_col[3]);//d = c - (a*b) mat_b_col[4] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[1], mat_b_col[4]);//d = c - (a*b) mat_b_col[5] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[1], mat_b_col[5]);//d = c - (a*b) mat_b_col[6] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[1], mat_b_col[6]);//d = c - (a*b) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[1], mat_b_col[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(2, 2) element with 3rd row elements of B mat_b_col[2] = _mm256_mul_ps(mat_b_col[2], mat_a_diag_inv[2]); mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + 2 + cs_l_offset[1])); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + 2 + cs_l_offset[2])); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + 2 + cs_l_offset[3])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + 2 + cs_l_offset[4])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + 2 + cs_l_offset[5])); //(Row3): FMA operations of b3 with elements of indices from (3, 0) uptill (7, 0) mat_b_col[3] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[2], mat_b_col[3]);//d = c - (a*b) mat_b_col[4] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[2], mat_b_col[4]);//d = c - (a*b) mat_b_col[5] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[2], mat_b_col[5]);//d = c - (a*b) mat_b_col[6] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[2], mat_b_col[6]);//d = c - (a*b) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[2], mat_b_col[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(3, 3) element with 4rth row elements of B mat_b_col[3] = _mm256_mul_ps(mat_b_col[3], mat_a_diag_inv[3]); mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + 3 + cs_l_offset[2])); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + 3 + cs_l_offset[3])); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + 3 + cs_l_offset[4])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + 3 + cs_l_offset[5])); //(Row4): FMA operations of b4 with elements of indices from (4, 0) uptill (7, 0) mat_b_col[4] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[3], mat_b_col[4]);//d = c - (a*b) mat_b_col[5] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[3], mat_b_col[5]);//d = c - (a*b) mat_b_col[6] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[3], mat_b_col[6]);//d = c - (a*b) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[3], mat_b_col[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(4, 4) element with 4rth row elements of B mat_b_col[4] = _mm256_mul_ps(mat_b_col[4], mat_a_diag_inv[4]); mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + 4 + cs_l_offset[3])); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + 4 + cs_l_offset[4])); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + 4 + cs_l_offset[5])); //(Row5): FMA operations of b5 with elements of indices from (5, 0) uptill (7, 0) mat_b_col[5] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[4], mat_b_col[5]);//d = c - (a*b) mat_b_col[6] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[4], mat_b_col[6]);//d = c - (a*b) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[4], mat_b_col[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(5, 5) element with 5th row elements of B mat_b_col[5] = _mm256_mul_ps(mat_b_col[5], mat_a_diag_inv[5]); mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + 5 + cs_l_offset[4])); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + 5 + cs_l_offset[5])); //(Row6): FMA operations of b6 with elements of indices from (6, 0) uptill (7, 0) mat_b_col[6] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[5], mat_b_col[6]);//d = c - (a*b) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[5], mat_b_col[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(6, 6) element with 6th row elements of B mat_b_col[6] = _mm256_mul_ps(mat_b_col[6], mat_a_diag_inv[6]); mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + 6 + cs_l_offset[5])); //(Row7): FMA operations of b7 with elements of index (7, 0) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[6], mat_b_col[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(7, 7) element with 7th row elements of B mat_b_col[7] = _mm256_mul_ps(mat_b_col[7], mat_a_diag_inv[7]); //////////////////////////////////////////////////////////////////////////////// /* transpose steps start */ ////unpacklow//// mat_b_rearr[0] = _mm256_unpacklo_ps(mat_b_col[0], mat_b_col[1]); mat_b_rearr[1] = _mm256_unpacklo_ps(mat_b_col[2], mat_b_col[3]); mat_b_rearr[2] = _mm256_unpacklo_ps(mat_b_col[4], mat_b_col[5]); mat_b_rearr[3] = _mm256_unpacklo_ps(mat_b_col[6], mat_b_col[7]); //Rearrange low elements #if REARRANGE_SHFL == 1 mat_b_rearr[4] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x44); mat_b_rearr[5] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0xEE); mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x44); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0xEE); #else mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x4E); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x4E); mat_b_rearr[4] = _mm256_blend_ps(mat_b_rearr[0], mat_b_rearr[6], 0xCC); mat_b_rearr[5] = _mm256_blend_ps(mat_b_rearr[1], mat_b_rearr[6], 0x33); mat_b_rearr[6] = _mm256_blend_ps(mat_b_rearr[2], mat_b_rearr[7], 0xCC); mat_b_rearr[7] = _mm256_blend_ps(mat_b_rearr[3], mat_b_rearr[7], 0x33); #endif //Merge rearranged low elements into complete rows mat_b_rearr[0] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x20); mat_b_rearr[4] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x31); mat_b_rearr[1] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x20); mat_b_rearr[5] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x31); ////unpackhigh//// mat_b_col[0] = _mm256_unpackhi_ps(mat_b_col[0], mat_b_col[1]); mat_b_col[1] = _mm256_unpackhi_ps(mat_b_col[2], mat_b_col[3]); mat_b_col[2] = _mm256_unpackhi_ps(mat_b_col[4], mat_b_col[5]); mat_b_col[3] = _mm256_unpackhi_ps(mat_b_col[6], mat_b_col[7]); //Rearrange high elements #if REARRANGE_SHFL == 1 mat_b_col[4] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x44); mat_b_col[5] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0xEE); mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x44); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0xEE); #else mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x4E); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x4E); mat_b_col[4] = _mm256_blend_ps(mat_b_col[0], mat_b_col[6], 0xCC); mat_b_col[5] = _mm256_blend_ps(mat_b_col[1], mat_b_col[6], 0x33); mat_b_col[6] = _mm256_blend_ps(mat_b_col[2], mat_b_col[7], 0xCC); mat_b_col[7] = _mm256_blend_ps(mat_b_col[3], mat_b_col[7], 0x33); #endif //Merge rearranged high elements into complete rows mat_b_rearr[2] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x20); mat_b_rearr[6] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x31); mat_b_rearr[3] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x20); mat_b_rearr[7] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x31); /* transpose steps end */ //Store the computed B columns _mm256_storeu_ps((float *)ptr_b_dup, mat_b_rearr[0]); _mm256_storeu_ps((float *)(ptr_b_dup + (cs_b)), mat_b_rearr[1]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[0]), mat_b_rearr[2]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[1]), mat_b_rearr[3]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[2]), mat_b_rearr[4]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[3]), mat_b_rearr[5]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[4]), mat_b_rearr[6]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[5]), mat_b_rearr[7]); i += cs_b_offset[6]; ptr_b_dup += cs_b_offset[6]; //i += 8; //ptr_b_dup += 8; } //c = 0; /***************** first set of 8 cols of B processing done *****************/ ptr_b_dup = ptr_b; i3 = 0; i1 = 0; //Start loop for cols of B to be processed in size of blk_width for (j = 8; j < numRows_lb; j += 8)//m :- 8x8 block row { ptr_l += cs_l_offset[6]; //Read next 8x8 block of A to get diag elements i3 += 8; mat_a_blk_elems[0] = _mm256_loadu_ps((float const *)ptr_l + i3); mat_a_blk_elems[1] = _mm256_loadu_ps((float const *)ptr_l + i3 + cs_l); mat_a_blk_elems[2] = _mm256_loadu_ps((float const *)ptr_l + i3 + cs_l_offset[0]); mat_a_blk_elems[3] = _mm256_loadu_ps((float const *)ptr_l + i3 + cs_l_offset[1]); mat_a_blk_elems[4] = _mm256_loadu_ps((float const *)ptr_l + i3 + cs_l_offset[2]); mat_a_blk_elems[5] = _mm256_loadu_ps((float const *)ptr_l + i3 + cs_l_offset[3]); mat_a_blk_elems[6] = _mm256_loadu_ps((float const *)ptr_l + i3 + cs_l_offset[4]); mat_a_blk_elems[7] = _mm256_loadu_ps((float const *)ptr_l + i3 + cs_l_offset[5]); //pack 8 diags of A together reciprocal_diags[0] = reciprocal_diags[1]; mat_a_diag_inv[0] = _mm256_blend_ps(mat_a_blk_elems[0], mat_a_blk_elems[1], 0xAA);//diag 0,1 mat_a_diag_inv[1] = _mm256_blend_ps(mat_a_blk_elems[2], mat_a_blk_elems[3], 0xAA);//diag 2,3 mat_a_diag_inv[2] = _mm256_blend_ps(mat_a_blk_elems[4], mat_a_blk_elems[5], 0xAA);//diag 4,5 mat_a_diag_inv[3] = _mm256_blend_ps(mat_a_blk_elems[6], mat_a_blk_elems[7], 0xAA);//diag 6,7 mat_a_diag_inv[0] = _mm256_blend_ps(mat_a_diag_inv[0], mat_a_diag_inv[1], 0xCC);//diag 0,1,2,3 mat_a_diag_inv[2] = _mm256_blend_ps(mat_a_diag_inv[2], mat_a_diag_inv[3], 0xCC);//diag 4,5,6,7 mat_a_diag_inv[0] = _mm256_blend_ps(mat_a_diag_inv[0], mat_a_diag_inv[2], 0xF0);//diag 0,1,2,3,4,5,6,7 //reciprocal of diagnal elements of A :- 0,1,2,3,4,5,6,7 reciprocal_diags[0] = _mm256_div_ps(reciprocal_diags[0], mat_a_diag_inv[0]); //ptr_b += j; //ptr_b_dup += 8; ptr_b_dup += 8; i1 += 8; i = i1; i2 = 0; //extract diag a00 from a mat_a_diag_inv[0] = _mm256_permute_ps(reciprocal_diags[0], 0x00); mat_a_diag_inv[0] = _mm256_permute2f128_ps(mat_a_diag_inv[0], mat_a_diag_inv[0], 0x00); //mat_a_diag_inv2[0] = _mm256_unpacklo_ps(mat_a_diag_inv2[0], mat_a_diag_inv2[0]); //extract diag a11 from a mat_a_diag_inv[1] = _mm256_permute_ps(reciprocal_diags[0], 0x55); mat_a_diag_inv[1] = _mm256_permute2f128_ps(mat_a_diag_inv[1], mat_a_diag_inv[1], 0x00); //mat_a_diag_inv[1] = _mm256_unpacklo_ps(mat_a_diag_inv[1], mat_a_diag_inv[1]); //extract diag a22 from a mat_a_diag_inv[2] = _mm256_permute_ps(reciprocal_diags[0], 0xAA); mat_a_diag_inv[2] = _mm256_permute2f128_ps(mat_a_diag_inv[2], mat_a_diag_inv[2], 0x00); //mat_a_diag_inv[2] = _mm256_unpacklo_ps(mat_a_diag_inv[2], mat_a_diag_inv[2]); //extract diag a33 from a mat_a_diag_inv[3] = _mm256_permute_ps(reciprocal_diags[0], 0xFF); mat_a_diag_inv[3] = _mm256_permute2f128_ps(mat_a_diag_inv[3], mat_a_diag_inv[3], 0x00); //mat_a_diag_inv[3] = _mm256_unpacklo_ps(mat_a_diag_inv[3], mat_a_diag_inv[3]); //extract diag a44 from a mat_a_diag_inv[4] = _mm256_permute_ps(reciprocal_diags[0], 0x00); mat_a_diag_inv[4] = _mm256_permute2f128_ps(mat_a_diag_inv[4], mat_a_diag_inv[4], 0x11); //mat_a_diag_inv[4] = _mm256_unpacklo_ps(mat_a_diag_inv[4], mat_a_diag_inv[4]); //extract diag a55 from a mat_a_diag_inv[5] = _mm256_permute_ps(reciprocal_diags[0], 0x55); mat_a_diag_inv[5] = _mm256_permute2f128_ps(mat_a_diag_inv[5], mat_a_diag_inv[5], 0x11); //mat_a_diag_inv[5] = _mm256_unpacklo_ps(mat_a_diag_inv[5], mat_a_diag_inv[5]); //extract diag a66 from a mat_a_diag_inv[6] = _mm256_permute_ps(reciprocal_diags[0], 0xAA); mat_a_diag_inv[6] = _mm256_permute2f128_ps(mat_a_diag_inv[6], mat_a_diag_inv[6], 0x11); //mat_a_diag_inv[6] = _mm256_unpacklo_ps(mat_a_diag_inv[6], mat_a_diag_inv[6]); //extract diag a77 from a mat_a_diag_inv[7] = _mm256_permute_ps(reciprocal_diags[0], 0xFF); mat_a_diag_inv[7] = _mm256_permute2f128_ps(mat_a_diag_inv[7], mat_a_diag_inv[7], 0x11); //mat_a_diag_inv[7] = _mm256_unpacklo_ps(mat_a_diag_inv[7], mat_a_diag_inv[7]); for (r = 0; r < numCols_b; r += GEMM_BLK_V1) { #if GEMM_ACCUM_A //Read 8 cols of B columns of Block-to-be-solved mat_b_col[0] = _mm256_loadu_ps((float const *)ptr_b + i); mat_b_col[1] = _mm256_loadu_ps((float const *)(ptr_b + cs_b + i)); mat_b_col[2] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[0] + i)); mat_b_col[3] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[1] + i)); mat_b_col[4] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[2] + i)); mat_b_col[5] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[3] + i)); mat_b_col[6] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[4] + i)); mat_b_col[7] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[5] + i)); /* transpose steps start */ ////unpacklow//// mat_b_rearr[0] = _mm256_unpacklo_ps(mat_b_col[0], mat_b_col[1]); mat_b_rearr[1] = _mm256_unpacklo_ps(mat_b_col[2], mat_b_col[3]); mat_b_rearr[2] = _mm256_unpacklo_ps(mat_b_col[4], mat_b_col[5]); mat_b_rearr[3] = _mm256_unpacklo_ps(mat_b_col[6], mat_b_col[7]); //Rearrange low elements #if REARRANGE_SHFL == 1 mat_b_rearr[4] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x44); mat_b_rearr[5] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0xEE); mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x44); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0xEE); #else mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x4E); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x4E); mat_b_rearr[4] = _mm256_blend_ps(mat_b_rearr[0], mat_b_rearr[6], 0xCC); mat_b_rearr[5] = _mm256_blend_ps(mat_b_rearr[1], mat_b_rearr[6], 0x33); mat_b_rearr[6] = _mm256_blend_ps(mat_b_rearr[2], mat_b_rearr[7], 0xCC); mat_b_rearr[7] = _mm256_blend_ps(mat_b_rearr[3], mat_b_rearr[7], 0x33); #endif //Merge rearranged low elements into complete rows mat_b_rearr[0] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x20); mat_b_rearr[4] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x31); mat_b_rearr[1] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x20); mat_b_rearr[5] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x31); ////unpackhigh//// mat_b_col[0] = _mm256_unpackhi_ps(mat_b_col[0], mat_b_col[1]); mat_b_col[1] = _mm256_unpackhi_ps(mat_b_col[2], mat_b_col[3]); mat_b_col[2] = _mm256_unpackhi_ps(mat_b_col[4], mat_b_col[5]); mat_b_col[3] = _mm256_unpackhi_ps(mat_b_col[6], mat_b_col[7]); //Rearrange high elements #if REARRANGE_SHFL == 1 mat_b_col[4] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x44); mat_b_col[5] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0xEE); mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x44); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0xEE); #else mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x4E); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x4E); mat_b_col[4] = _mm256_blend_ps(mat_b_col[0], mat_b_col[6], 0xCC); mat_b_col[5] = _mm256_blend_ps(mat_b_col[1], mat_b_col[6], 0x33); mat_b_col[6] = _mm256_blend_ps(mat_b_col[2], mat_b_col[7], 0xCC); mat_b_col[7] = _mm256_blend_ps(mat_b_col[3], mat_b_col[7], 0x33); #endif //Merge rearranged high elements into complete rows mat_b_rearr[2] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x20); mat_b_rearr[6] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x31); mat_b_rearr[3] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x20); mat_b_rearr[7] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x31); /* transpose steps end */ #endif //i = 0; ptr_l_dup = ptr_l; i4 = i2; for (l = 0; l < j; l += 8) // move across m { //for (k = 0; k < numCols_b; k += 8) // move across n for the same value of l (index of m) //{ /////////////////// Partial Lower 8x8 block trsm of B //Read current 8 cols of B columns from specified 8x8 current-block of B mat_a_blk_elems[0] = _mm256_loadu_ps((float const *)ptr_b + i4); mat_a_blk_elems[1] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b)); mat_a_blk_elems[2] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[0])); mat_a_blk_elems[3] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[1])); mat_a_blk_elems[4] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[2])); mat_a_blk_elems[5] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[3])); mat_a_blk_elems[6] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[4])); mat_a_blk_elems[7] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[5])); /* transpose steps start */ ////unpacklow//// mat_b_col[0] = _mm256_unpacklo_ps(mat_a_blk_elems[0], mat_a_blk_elems[1]); mat_b_col[1] = _mm256_unpacklo_ps(mat_a_blk_elems[2], mat_a_blk_elems[3]); mat_b_col[2] = _mm256_unpacklo_ps(mat_a_blk_elems[4], mat_a_blk_elems[5]); mat_b_col[3] = _mm256_unpacklo_ps(mat_a_blk_elems[6], mat_a_blk_elems[7]); //Rearrange low elements #if REARRANGE_SHFL == 1 mat_b_col[4] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x44); mat_b_col[5] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0xEE); mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x44); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0xEE); #else mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x4E); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x4E); mat_b_col[4] = _mm256_blend_ps(mat_b_col[0], mat_b_col[6], 0xCC); mat_b_col[5] = _mm256_blend_ps(mat_b_col[1], mat_b_col[6], 0x33); mat_b_col[6] = _mm256_blend_ps(mat_b_col[2], mat_b_col[7], 0xCC); mat_b_col[7] = _mm256_blend_ps(mat_b_col[3], mat_b_col[7], 0x33); #endif //Merge rearranged low elements into complete rows mat_b_col[0] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x20); mat_b_col[4] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x31); mat_b_col[1] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x20); mat_b_col[5] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x31); ////unpackhigh//// mat_a_blk_elems[0] = _mm256_unpackhi_ps(mat_a_blk_elems[0], mat_a_blk_elems[1]); mat_a_blk_elems[1] = _mm256_unpackhi_ps(mat_a_blk_elems[2], mat_a_blk_elems[3]); mat_a_blk_elems[2] = _mm256_unpackhi_ps(mat_a_blk_elems[4], mat_a_blk_elems[5]); mat_a_blk_elems[3] = _mm256_unpackhi_ps(mat_a_blk_elems[6], mat_a_blk_elems[7]); //Rearrange high elements #if REARRANGE_SHFL == 1 mat_a_blk_elems[4] = _mm256_shuffle_ps(mat_a_blk_elems[0], mat_a_blk_elems[1], 0x44); mat_a_blk_elems[5] = _mm256_shuffle_ps(mat_a_blk_elems[0], mat_a_blk_elems[1], 0xEE); mat_a_blk_elems[6] = _mm256_shuffle_ps(mat_a_blk_elems[2], mat_a_blk_elems[3], 0x44); mat_a_blk_elems[7] = _mm256_shuffle_ps(mat_a_blk_elems[2], mat_a_blk_elems[3], 0xEE); #else mat_a_blk_elems[6] = _mm256_shuffle_ps(mat_a_blk_elems[0], mat_a_blk_elems[1], 0x4E); mat_a_blk_elems[7] = _mm256_shuffle_ps(mat_a_blk_elems[2], mat_a_blk_elems[3], 0x4E); mat_a_blk_elems[4] = _mm256_blend_ps(mat_a_blk_elems[0], mat_a_blk_elems[6], 0xCC); mat_a_blk_elems[5] = _mm256_blend_ps(mat_a_blk_elems[1], mat_a_blk_elems[6], 0x33); mat_a_blk_elems[6] = _mm256_blend_ps(mat_a_blk_elems[2], mat_a_blk_elems[7], 0xCC); mat_a_blk_elems[7] = _mm256_blend_ps(mat_a_blk_elems[3], mat_a_blk_elems[7], 0x33); #endif //Merge rearranged high elements into complete rows mat_b_col[2] = _mm256_permute2f128_ps(mat_a_blk_elems[4], mat_a_blk_elems[6], 0x20); mat_b_col[6] = _mm256_permute2f128_ps(mat_a_blk_elems[4], mat_a_blk_elems[6], 0x31); mat_b_col[3] = _mm256_permute2f128_ps(mat_a_blk_elems[5], mat_a_blk_elems[7], 0x20); mat_b_col[7] = _mm256_permute2f128_ps(mat_a_blk_elems[5], mat_a_blk_elems[7], 0x31); /* transpose steps end */ //Broadcast A8,0 to A15,0 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[0])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[1])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[2])); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[3])); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[4])); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[5])); //i4 = k >> 3; ptr_l_dup++; #if GEMM_ACCUM_A //(Row8): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[0], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[0], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[0], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[0], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[0], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[0], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[0], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[0], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_mul_ps(mat_a_blk_elems[0], mat_b_col[0]); mat_b_rearr[1] = _mm256_mul_ps(mat_a_blk_elems[1], mat_b_col[0]); mat_b_rearr[2] = _mm256_mul_ps(mat_a_blk_elems[2], mat_b_col[0]); mat_b_rearr[3] = _mm256_mul_ps(mat_a_blk_elems[3], mat_b_col[0]); mat_b_rearr[4] = _mm256_mul_ps(mat_a_blk_elems[4], mat_b_col[0]); mat_b_rearr[5] = _mm256_mul_ps(mat_a_blk_elems[5], mat_b_col[0]); mat_b_rearr[6] = _mm256_mul_ps(mat_a_blk_elems[6], mat_b_col[0]); mat_b_rearr[7] = _mm256_mul_ps(mat_a_blk_elems[7], mat_b_col[0]); #endif //Broadcast A21 to A71 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[0])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[1])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[2])); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[3])); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[4])); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[5])); ptr_l_dup++; #if GEMM_ACCUM_A //(Row9): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[1], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[1], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[1], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[1], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[1], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[1], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[1], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[1], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[1], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[1], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[1], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[1], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[1], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[1], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[1], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[1], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A8,2 to A15,2 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[0])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[1])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[2])); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[3])); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[4])); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[5])); ptr_l_dup++; #if GEMM_ACCUM_A //(Row10): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[2], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[2], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[2], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[2], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[2], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[2], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[2], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[2], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[2], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[2], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[2], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[2], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[2], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[2], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[2], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[2], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A8,3 to A15,3 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[0])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[1])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[2])); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[3])); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[4])); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[5])); ptr_l_dup++; #if GEMM_ACCUM_A //(Row11): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[3], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[3], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[3], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[3], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[3], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[3], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[3], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[3], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[3], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[3], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[3], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[3], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[3], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[3], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[3], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[3], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A8,4 to A15,4 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[0])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[1])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[2])); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[3])); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[4])); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[5])); ptr_l_dup++; #if GEMM_ACCUM_A //(Row12): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[4], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[4], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[4], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[4], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[4], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[4], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[4], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[4], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[4], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[4], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[4], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[4], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[4], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[4], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[4], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[4], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A8,5 to A15,5 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[0])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[1])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[2])); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[3])); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[4])); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[5])); ptr_l_dup++; #if GEMM_ACCUM_A //(Row13): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[5], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[5], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[5], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[5], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[5], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[5], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[5], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[5], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[5], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[5], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[5], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[5], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[5], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[5], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[5], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[5], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A8,6 to A15,6 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[0])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[1])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[2])); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[3])); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[4])); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[5])); ptr_l_dup++; #if GEMM_ACCUM_A //(Row14): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[6], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[6], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[6], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[6], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[6], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[6], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[6], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[6], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[6], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[6], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[6], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[6], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[6], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[6], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[6], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[6], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A8,7 to A15,7 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[0])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[1])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[2])); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[3])); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[4])); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[5])); ptr_l_dup++; #if GEMM_ACCUM_A //(Row15): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[7], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[7], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[7], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[7], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[7], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[7], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[7], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[7], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[7], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[7], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[7], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[7], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[7], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[7], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[7], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[7], mat_b_rearr[7]);//d = c - (a*b) #endif //end loop of cols //} //i2 += cs_b_offset[6]; i4 += 8; } //trsm solve k = 0; //for (i2 = 0; i2 < numCols_b; i2 += 8) //{ //i2 = i1 + r; /////////////////// Complete Lower 8x8 block trsm of B :- lower 8x8 block of B with lower right 8x8 block of A #if !GEMM_ACCUM_A //Read 8 cols of B columns of Block-to-be-solved mat_b_rearr[0] = _mm256_loadu_ps((float const *)ptr_b + i); mat_b_rearr[1] = _mm256_loadu_ps((float const *)(ptr_b + cs_b + i)); mat_b_rearr[2] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[0] + i)); mat_b_rearr[3] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[1] + i)); mat_b_rearr[4] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[2] + i)); mat_b_rearr[5] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[3] + i)); mat_b_rearr[6] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[4] + i)); mat_b_rearr[7] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[5] + i)); /* transpose steps start */ ////unpacklow//// mat_b_col[0] = _mm256_unpacklo_ps(mat_b_rearr[0], mat_b_rearr[1]); mat_b_col[1] = _mm256_unpacklo_ps(mat_b_rearr[2], mat_b_rearr[3]); mat_b_col[2] = _mm256_unpacklo_ps(mat_b_rearr[4], mat_b_rearr[5]); mat_b_col[3] = _mm256_unpacklo_ps(mat_b_rearr[6], mat_b_rearr[7]); //Rearrange low elements #if REARRANGE_SHFL == 1 mat_b_col[4] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x44); mat_b_col[5] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0xEE); mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x44); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0xEE); #else mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x4E); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x4E); mat_b_col[4] = _mm256_blend_ps(mat_b_col[0], mat_b_col[6], 0xCC); mat_b_col[5] = _mm256_blend_ps(mat_b_col[1], mat_b_col[6], 0x33); mat_b_col[6] = _mm256_blend_ps(mat_b_col[2], mat_b_col[7], 0xCC); mat_b_col[7] = _mm256_blend_ps(mat_b_col[3], mat_b_col[7], 0x33); #endif //Merge rearranged low elements into complete rows mat_b_col[0] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x20); mat_b_col[4] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x31); mat_b_col[1] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x20); mat_b_col[5] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x31); ////unpackhigh//// mat_b_rearr[0] = _mm256_unpackhi_ps(mat_b_rearr[0], mat_b_rearr[1]); mat_b_rearr[1] = _mm256_unpackhi_ps(mat_b_rearr[2], mat_b_rearr[3]); mat_b_rearr[2] = _mm256_unpackhi_ps(mat_b_rearr[4], mat_b_rearr[5]); mat_b_rearr[3] = _mm256_unpackhi_ps(mat_b_rearr[6], mat_b_rearr[7]); //Rearrange high elements #if REARRANGE_SHFL == 1 mat_b_rearr[4] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x44); mat_b_rearr[5] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0xEE); mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x44); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0xEE); #else mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x4E); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x4E); mat_b_rearr[4] = _mm256_blend_ps(mat_b_rearr[0], mat_b_rearr[6], 0xCC); mat_b_rearr[5] = _mm256_blend_ps(mat_b_rearr[1], mat_b_rearr[6], 0x33); mat_b_rearr[6] = _mm256_blend_ps(mat_b_rearr[2], mat_b_rearr[7], 0xCC); mat_b_rearr[7] = _mm256_blend_ps(mat_b_rearr[3], mat_b_rearr[7], 0x33); #endif //Merge rearranged high elements into complete rows mat_b_col[2] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x20); mat_b_col[6] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x31); mat_b_col[3] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x20); mat_b_col[7] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x31); /* transpose steps end */ #endif //Broadcast A10 to A70 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[0])); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[1])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[2])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[3])); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[4])); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[5])); //i += cs_l; #if GEMM_ACCUM_A //(Row0): Perform mul operation of reciprocal of L(0,0) element with 1st row elements of B mat_b_rearr[0] = _mm256_mul_ps(mat_b_rearr[0], mat_a_diag_inv[0]); #else mat_b_rearr[0] = _mm256_sub_ps(mat_b_col[0], mat_b_rearr[0]); mat_b_rearr[0] = _mm256_mul_ps(mat_b_rearr[0], mat_a_diag_inv[0]); #endif #if GEMM_ACCUM_A mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[0], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[0], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_rearr[0], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_rearr[0], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_rearr[0], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_rearr[0], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_rearr[0], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[1] = _mm256_sub_ps(mat_b_col[1], mat_b_rearr[1]); mat_b_rearr[2] = _mm256_sub_ps(mat_b_col[2], mat_b_rearr[2]); mat_b_rearr[3] = _mm256_sub_ps(mat_b_col[3], mat_b_rearr[3]); mat_b_rearr[4] = _mm256_sub_ps(mat_b_col[4], mat_b_rearr[4]); mat_b_rearr[5] = _mm256_sub_ps(mat_b_col[5], mat_b_rearr[5]); mat_b_rearr[6] = _mm256_sub_ps(mat_b_col[6], mat_b_rearr[6]); mat_b_rearr[7] = _mm256_sub_ps(mat_b_col[7], mat_b_rearr[7]); //(Row1): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[0], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[0], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_rearr[0], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_rearr[0], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_rearr[0], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_rearr[0], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_rearr[0], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A21 to A71 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 1 + cs_l_offset[0])); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 1 + cs_l_offset[1])); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 1 + cs_l_offset[2])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 1 + cs_l_offset[3])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 1 + cs_l_offset[4])); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 1 + cs_l_offset[5])); //i += cs_l; //Perform mul operation of reciprocal of L(1,1) element with 2nd row elements of B mat_b_rearr[1] = _mm256_mul_ps(mat_b_rearr[1], mat_a_diag_inv[1]); //(Row2): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[1], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[1], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_rearr[1], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_rearr[1], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_rearr[1], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_rearr[1], mat_b_rearr[7]);//d = c - (a*b) //Broadcast A32 to A72 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 2 + cs_l_offset[1])); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 2 + cs_l_offset[2])); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 2 + cs_l_offset[3])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 2 + cs_l_offset[4])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 2 + cs_l_offset[5])); //i += cs_l; //Perform mul operation of reciprocal of L(2, 2) element with 3rd row elements of B mat_b_rearr[2] = _mm256_mul_ps(mat_b_rearr[2], mat_a_diag_inv[2]); //(Row3): FMA operations of b3 with elements of indices from (3, 0) uptill (7, 0) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[2], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[2], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_rearr[2], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_rearr[2], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_rearr[2], mat_b_rearr[7]);//d = c - (a*b) //Broadcast A43 to A73 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 3 + cs_l_offset[2])); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 3 + cs_l_offset[3])); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 3 + cs_l_offset[4])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 3 + cs_l_offset[5])); //i += cs_l; //Perform mul operation of reciprocal of L(3, 3) element with 4rth row elements of B mat_b_rearr[3] = _mm256_mul_ps(mat_b_rearr[3], mat_a_diag_inv[3]); //(Row4): FMA operations of b4 with elements of indices from (4, 0) uptill (7, 0) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[3], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[3], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_rearr[3], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_rearr[3], mat_b_rearr[7]);//d = c - (a*b) //Broadcast A54 to A74 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 4 + cs_l_offset[3])); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 4 + cs_l_offset[4])); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 4 + cs_l_offset[5])); //i += cs_l; //Perform mul operation of reciprocal of L(4, 4) element with 4rth row elements of B mat_b_rearr[4] = _mm256_mul_ps(mat_b_rearr[4], mat_a_diag_inv[4]); //(Row5): FMA operations of b5 with elements of indices from (5, 0) uptill (7, 0) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[4], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[4], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_rearr[4], mat_b_rearr[7]);//d = c - (a*b) //Broadcast A65 to A75 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 5 + cs_l_offset[4])); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 5 + cs_l_offset[5])); //i += cs_l; //Perform mul operation of reciprocal of L(5, 5) element with 5th row elements of B mat_b_rearr[5] = _mm256_mul_ps(mat_b_rearr[5], mat_a_diag_inv[5]); //(Row6): FMA operations of b6 with elements of indices from (6, 0) uptill (7, 0) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[5], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[5], mat_b_rearr[7]);//d = c - (a*b) //Broadcast A76 to register mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 6 + cs_l_offset[5])); //Perform mul operation of reciprocal of L(6, 6) element with 6th row elements of B mat_b_rearr[6] = _mm256_mul_ps(mat_b_rearr[6], mat_a_diag_inv[6]); //(Row7): FMA operations of b7 with elements of index (7, 0) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[6], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(7, 7) element with 7th row elements of B mat_b_rearr[7] = _mm256_mul_ps(mat_b_rearr[7], mat_a_diag_inv[7]); //////////////////////////////////////////////////////////////////////////////// /* transpose steps start */ ////unpacklow//// mat_b_col[0] = _mm256_unpacklo_ps(mat_b_rearr[0], mat_b_rearr[1]); mat_b_col[1] = _mm256_unpacklo_ps(mat_b_rearr[2], mat_b_rearr[3]); mat_b_col[2] = _mm256_unpacklo_ps(mat_b_rearr[4], mat_b_rearr[5]); mat_b_col[3] = _mm256_unpacklo_ps(mat_b_rearr[6], mat_b_rearr[7]); //Rearrange low elements #if REARRANGE_SHFL == 1 mat_b_col[4] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x44); mat_b_col[5] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0xEE); mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x44); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0xEE); #else mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x4E); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x4E); mat_b_col[4] = _mm256_blend_ps(mat_b_col[0], mat_b_col[6], 0xCC); mat_b_col[5] = _mm256_blend_ps(mat_b_col[1], mat_b_col[6], 0x33); mat_b_col[6] = _mm256_blend_ps(mat_b_col[2], mat_b_col[7], 0xCC); mat_b_col[7] = _mm256_blend_ps(mat_b_col[3], mat_b_col[7], 0x33); #endif //Merge rearranged low elements into complete rows mat_b_col[0] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x20); mat_b_col[4] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x31); mat_b_col[1] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x20); mat_b_col[5] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x31); ////unpackhigh//// mat_b_rearr[0] = _mm256_unpackhi_ps(mat_b_rearr[0], mat_b_rearr[1]); mat_b_rearr[1] = _mm256_unpackhi_ps(mat_b_rearr[2], mat_b_rearr[3]); mat_b_rearr[2] = _mm256_unpackhi_ps(mat_b_rearr[4], mat_b_rearr[5]); mat_b_rearr[3] = _mm256_unpackhi_ps(mat_b_rearr[6], mat_b_rearr[7]); //Rearrange high elements #if REARRANGE_SHFL == 1 mat_b_rearr[4] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x44); mat_b_rearr[5] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0xEE); mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x44); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0xEE); #else mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x4E); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x4E); mat_b_rearr[4] = _mm256_blend_ps(mat_b_rearr[0], mat_b_rearr[6], 0xCC); mat_b_rearr[5] = _mm256_blend_ps(mat_b_rearr[1], mat_b_rearr[6], 0x33); mat_b_rearr[6] = _mm256_blend_ps(mat_b_rearr[2], mat_b_rearr[7], 0xCC); mat_b_rearr[7] = _mm256_blend_ps(mat_b_rearr[3], mat_b_rearr[7], 0x33); #endif //Merge rearranged high elements into complete rows mat_b_col[2] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x20); mat_b_col[6] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x31); mat_b_col[3] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x20); mat_b_col[7] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x31); /* transpose steps end */ //Store the computed B columns _mm256_storeu_ps((float *)ptr_b_dup + i2, mat_b_col[0]); _mm256_storeu_ps((float *)(ptr_b_dup + (cs_b)+i2), mat_b_col[1]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[0] + i2), mat_b_col[2]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[1] + i2), mat_b_col[3]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[2] + i2), mat_b_col[4]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[3] + i2), mat_b_col[5]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[4] + i2), mat_b_col[6]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[5] + i2), mat_b_col[7]); //printf("writing B => m[%d], n[%d], [%f]\n", j, k, *(ptr_b_dup + k)); k++; //} i += cs_b_offset[6]; i2 += cs_b_offset[6]; } } //numRows of A ///////////////////loop ends ///////////////////// } static void trsm_AutXB_block_allSmallSizedMatrices_alpha(float *ptr_l, float *ptr_b, int numRows_lb, int numCols_b, int rs_l, int rs_b, int cs_l, int cs_b, float alpha) { float ones = 1.0; int i, i1, i2, i3, i4, j, k, l, r; int cs_b_offset[7]; int cs_l_offset[7]; float *ptr_b_dup, *ptr_l_dup; //57 number of ymm(256 bits) registers used __m256 mat_b_col[8]; __m256 mat_b_rearr[8]; __m256 mat_a_blk_elems[8]; __m256 mat_a_diag_inv[8]; __m256 reciprocal_diags[2]; __m256 alphaReg; reciprocal_diags[0] = _mm256_broadcast_ss((float const *)(&ones)); alphaReg = _mm256_broadcast_ss((float const *)&alpha); // ---> considering that the matrix size is multiple of 16 rows and 8 cols <--- // //L matrix offsets cs_l_offset[0] = (cs_l << 1); cs_l_offset[1] = cs_l + cs_l_offset[0]; cs_l_offset[2] = (cs_l << 2); cs_l_offset[3] = cs_l + cs_l_offset[2]; cs_l_offset[4] = cs_l_offset[0] + cs_l_offset[2]; cs_l_offset[5] = cs_l + cs_l_offset[4]; cs_l_offset[6] = (cs_l_offset[5] + cs_l); //read diag elems of L 16x16 block mat_a_blk_elems[0] = _mm256_loadu_ps((float const *)ptr_l); mat_a_blk_elems[1] = _mm256_loadu_ps((float const *)ptr_l + cs_l); mat_a_blk_elems[2] = _mm256_loadu_ps((float const *)ptr_l + cs_l_offset[0]); mat_a_blk_elems[3] = _mm256_loadu_ps((float const *)ptr_l + cs_l_offset[1]); mat_a_blk_elems[4] = _mm256_loadu_ps((float const *)ptr_l + cs_l_offset[2]); mat_a_blk_elems[5] = _mm256_loadu_ps((float const *)ptr_l + cs_l_offset[3]); mat_a_blk_elems[6] = _mm256_loadu_ps((float const *)ptr_l + cs_l_offset[4]); mat_a_blk_elems[7] = _mm256_loadu_ps((float const *)ptr_l + cs_l_offset[5]); cs_b_offset[0] = (cs_b << 1); cs_b_offset[1] = cs_b + cs_b_offset[0]; cs_b_offset[2] = (cs_b << 2); cs_b_offset[3] = cs_b + cs_b_offset[2]; cs_b_offset[4] = cs_b_offset[0] + cs_b_offset[2]; cs_b_offset[5] = cs_b + cs_b_offset[4]; cs_b_offset[6] = (cs_b_offset[5] + cs_b); reciprocal_diags[1] = reciprocal_diags[0]; //pack first 8 diags together mat_a_diag_inv[0] = _mm256_blend_ps(mat_a_blk_elems[0], mat_a_blk_elems[1], 0xAA);//diag 0,1 mat_a_diag_inv[1] = _mm256_blend_ps(mat_a_blk_elems[2], mat_a_blk_elems[3], 0xAA);//diag 2,3 mat_a_diag_inv[2] = _mm256_blend_ps(mat_a_blk_elems[4], mat_a_blk_elems[5], 0xAA);//diag 4,5 mat_a_diag_inv[3] = _mm256_blend_ps(mat_a_blk_elems[6], mat_a_blk_elems[7], 0xAA);//diag 6,7 mat_a_diag_inv[0] = _mm256_blend_ps(mat_a_diag_inv[0], mat_a_diag_inv[1], 0xCC);//diag 0,1,2,3 mat_a_diag_inv[2] = _mm256_blend_ps(mat_a_diag_inv[2], mat_a_diag_inv[3], 0xCC);//diag 4,5,6,7 mat_a_diag_inv[0] = _mm256_blend_ps(mat_a_diag_inv[0], mat_a_diag_inv[2], 0xF0);//diag 0,1,2,3,4,5,6,7 //reciprocal of diagnal elements 0,1,2,3,4,5,6,7 reciprocal_diags[0] = _mm256_div_ps(reciprocal_diags[0], mat_a_diag_inv[0]); #if 0 //Broadcast A10 to A70 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + 1)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + 2)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + 3)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + 4)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + 5)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l + 6)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l + 7)); //Broadcast A21 to A71 to registers mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 2)); mat_a_blk_elems[8] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 3)); mat_a_blk_elems[9] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 4)); mat_a_blk_elems[10] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 5)); mat_a_blk_elems[11] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 6)); mat_a_blk_elems[12] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 7)); //Broadcast A32 to A72 to registers mat_a_blk_elems[13] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 3)); mat_a_blk_elems[14] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 4)); mat_a_blk_elems[15] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 5)); mat_a_blk_elems[16] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 6)); mat_a_blk_elems[17] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 7)); //Broadcast A43 to A73 to registers mat_a_blk_elems[18] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 4)); mat_a_blk_elems[19] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 5)); mat_a_blk_elems[20] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 6)); mat_a_blk_elems[21] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 7)); //Broadcast A54 to A74 to registers mat_a_blk_elems[22] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + 5)); mat_a_blk_elems[23] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + 6)); mat_a_blk_elems[24] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + 7)); //Broadcast A65 to A75 to registers mat_a_blk_elems[25] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + 6)); mat_a_blk_elems[26] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + 7)); //Broadcast A76 to register mat_a_blk_elems[27] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[4] + 7)); #endif //extract diag a00 from a mat_a_diag_inv[0] = _mm256_permute_ps(reciprocal_diags[0], 0x00); mat_a_diag_inv[0] = _mm256_permute2f128_ps(mat_a_diag_inv[0], mat_a_diag_inv[0], 0x00); //mat_a_diag_inv[0] = _mm256_unpacklo_ps(mat_a_diag_inv[0], mat_a_diag_inv[0]); //extract diag a11 from a mat_a_diag_inv[1] = _mm256_permute_ps(reciprocal_diags[0], 0x55); mat_a_diag_inv[1] = _mm256_permute2f128_ps(mat_a_diag_inv[1], mat_a_diag_inv[1], 0x00); //mat_a_diag_inv[1] = _mm256_unpacklo_ps(mat_a_diag_inv[1], mat_a_diag_inv[1]); //extract diag a22 from a mat_a_diag_inv[2] = _mm256_permute_ps(reciprocal_diags[0], 0xAA); mat_a_diag_inv[2] = _mm256_permute2f128_ps(mat_a_diag_inv[2], mat_a_diag_inv[2], 0x00); //mat_a_diag_inv[2] = _mm256_unpacklo_ps(mat_a_diag_inv[2], mat_a_diag_inv[2]); //extract diag a33 from a mat_a_diag_inv[3] = _mm256_permute_ps(reciprocal_diags[0], 0xFF); mat_a_diag_inv[3] = _mm256_permute2f128_ps(mat_a_diag_inv[3], mat_a_diag_inv[3], 0x00); //mat_a_diag_inv[3] = _mm256_unpacklo_ps(mat_a_diag_inv[3], mat_a_diag_inv[3]); //extract diag a44 from a mat_a_diag_inv[4] = _mm256_permute_ps(reciprocal_diags[0], 0x00); mat_a_diag_inv[4] = _mm256_permute2f128_ps(mat_a_diag_inv[4], mat_a_diag_inv[4], 0x11); //mat_a_diag_inv[4] = _mm256_unpacklo_ps(mat_a_diag_inv[4], mat_a_diag_inv[4]); //extract diag a55 from a mat_a_diag_inv[5] = _mm256_permute_ps(reciprocal_diags[0], 0x55); mat_a_diag_inv[5] = _mm256_permute2f128_ps(mat_a_diag_inv[5], mat_a_diag_inv[5], 0x11); //mat_a_diag_inv[5] = _mm256_unpacklo_ps(mat_a_diag_inv[5], mat_a_diag_inv[5]); //extract diag a66 from a mat_a_diag_inv[6] = _mm256_permute_ps(reciprocal_diags[0], 0xAA); mat_a_diag_inv[6] = _mm256_permute2f128_ps(mat_a_diag_inv[6], mat_a_diag_inv[6], 0x11); //mat_a_diag_inv[6] = _mm256_unpacklo_ps(mat_a_diag_inv[6], mat_a_diag_inv[6]); //extract diag a77 from a mat_a_diag_inv[7] = _mm256_permute_ps(reciprocal_diags[0], 0xFF); mat_a_diag_inv[7] = _mm256_permute2f128_ps(mat_a_diag_inv[7], mat_a_diag_inv[7], 0x11); //mat_a_diag_inv[7] = _mm256_unpacklo_ps(mat_a_diag_inv[7], mat_a_diag_inv[7]); /***************** first set of 8 rows of B processing starts *****************/ ptr_b_dup = ptr_b; i = 0; for (j = 0; j < numCols_b; j += 8) { /////////////////// Complete Upper 8x8 block trsm of B :- upper 8x8 block of B with upper 8x8 block of A //read 8x8 block of B into registers mat_b_rearr[0] = _mm256_loadu_ps((float const *)ptr_b + i); mat_b_rearr[1] = _mm256_loadu_ps((float const *)(ptr_b + cs_b + i)); mat_b_rearr[2] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[0] + i)); mat_b_rearr[3] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[1] + i)); mat_b_rearr[4] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[2] + i)); mat_b_rearr[5] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[3] + i)); mat_b_rearr[6] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[4] + i)); mat_b_rearr[7] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[5] + i)); /* transpose steps start */ ////unpacklow//// mat_b_col[0] = _mm256_unpacklo_ps(mat_b_rearr[0], mat_b_rearr[1]); mat_b_col[1] = _mm256_unpacklo_ps(mat_b_rearr[2], mat_b_rearr[3]); mat_b_col[2] = _mm256_unpacklo_ps(mat_b_rearr[4], mat_b_rearr[5]); mat_b_col[3] = _mm256_unpacklo_ps(mat_b_rearr[6], mat_b_rearr[7]); //Rearrange low elements #if REARRANGE_SHFL == 1 mat_b_col[4] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x44); mat_b_col[5] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0xEE); mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x44); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0xEE); #else mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x4E); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x4E); mat_b_col[4] = _mm256_blend_ps(mat_b_col[0], mat_b_col[6], 0xCC); mat_b_col[5] = _mm256_blend_ps(mat_b_col[1], mat_b_col[6], 0x33); mat_b_col[6] = _mm256_blend_ps(mat_b_col[2], mat_b_col[7], 0xCC); mat_b_col[7] = _mm256_blend_ps(mat_b_col[3], mat_b_col[7], 0x33); #endif //Merge rearranged low elements into complete rows mat_b_col[0] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x20); mat_b_col[4] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x31); mat_b_col[1] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x20); mat_b_col[5] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x31); ////unpackhigh//// mat_b_rearr[0] = _mm256_unpackhi_ps(mat_b_rearr[0], mat_b_rearr[1]); mat_b_rearr[1] = _mm256_unpackhi_ps(mat_b_rearr[2], mat_b_rearr[3]); mat_b_rearr[2] = _mm256_unpackhi_ps(mat_b_rearr[4], mat_b_rearr[5]); mat_b_rearr[3] = _mm256_unpackhi_ps(mat_b_rearr[6], mat_b_rearr[7]); //Rearrange high elements #if REARRANGE_SHFL == 1 mat_b_rearr[4] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x44); mat_b_rearr[5] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0xEE); mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x44); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0xEE); #else mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x4E); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x4E); mat_b_rearr[4] = _mm256_blend_ps(mat_b_rearr[0], mat_b_rearr[6], 0xCC); mat_b_rearr[5] = _mm256_blend_ps(mat_b_rearr[1], mat_b_rearr[6], 0x33); mat_b_rearr[6] = _mm256_blend_ps(mat_b_rearr[2], mat_b_rearr[7], 0xCC); mat_b_rearr[7] = _mm256_blend_ps(mat_b_rearr[3], mat_b_rearr[7], 0x33); #endif //Merge rearranged high elements into complete rows mat_b_col[2] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x20); mat_b_col[6] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x31); mat_b_col[3] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x20); mat_b_col[7] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x31); /* transpose steps end */ mat_b_col[0] = _mm256_mul_ps(mat_b_col[0], alphaReg); mat_b_col[1] = _mm256_mul_ps(mat_b_col[1], alphaReg); mat_b_col[2] = _mm256_mul_ps(mat_b_col[2], alphaReg); mat_b_col[3] = _mm256_mul_ps(mat_b_col[3], alphaReg); mat_b_col[4] = _mm256_mul_ps(mat_b_col[4], alphaReg); mat_b_col[5] = _mm256_mul_ps(mat_b_col[5], alphaReg); mat_b_col[6] = _mm256_mul_ps(mat_b_col[6], alphaReg); mat_b_col[7] = _mm256_mul_ps(mat_b_col[7], alphaReg); //(Row0): Perform mul operation of reciprocal of L(0,0) element with 1st row elements of B mat_b_col[0] = _mm256_mul_ps(mat_b_col[0], mat_a_diag_inv[0]); mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0])); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3])); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[4])); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[5])); //(Row1): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_col[1] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[0], mat_b_col[1]);//d = c - (a*b) mat_b_col[2] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[0], mat_b_col[2]);//d = c - (a*b) mat_b_col[3] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[0], mat_b_col[3]);//d = c - (a*b) mat_b_col[4] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[0], mat_b_col[4]);//d = c - (a*b) mat_b_col[5] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[0], mat_b_col[5]);//d = c - (a*b) mat_b_col[6] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[0], mat_b_col[6]);//d = c - (a*b) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[0], mat_b_col[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(1,1) element with 2nd row elements of B mat_b_col[1] = _mm256_mul_ps(mat_b_col[1], mat_a_diag_inv[1]); mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + 1 + cs_l_offset[0])); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + 1 + cs_l_offset[1])); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + 1 + cs_l_offset[2])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + 1 + cs_l_offset[3])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + 1 + cs_l_offset[4])); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l + 1 + cs_l_offset[5])); //(Row2): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_col[2] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[1], mat_b_col[2]);//d = c - (a*b) mat_b_col[3] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[1], mat_b_col[3]);//d = c - (a*b) mat_b_col[4] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[1], mat_b_col[4]);//d = c - (a*b) mat_b_col[5] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[1], mat_b_col[5]);//d = c - (a*b) mat_b_col[6] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[1], mat_b_col[6]);//d = c - (a*b) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[1], mat_b_col[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(2, 2) element with 3rd row elements of B mat_b_col[2] = _mm256_mul_ps(mat_b_col[2], mat_a_diag_inv[2]); mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + 2 + cs_l_offset[1])); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + 2 + cs_l_offset[2])); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + 2 + cs_l_offset[3])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + 2 + cs_l_offset[4])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + 2 + cs_l_offset[5])); //(Row3): FMA operations of b3 with elements of indices from (3, 0) uptill (7, 0) mat_b_col[3] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[2], mat_b_col[3]);//d = c - (a*b) mat_b_col[4] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[2], mat_b_col[4]);//d = c - (a*b) mat_b_col[5] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[2], mat_b_col[5]);//d = c - (a*b) mat_b_col[6] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[2], mat_b_col[6]);//d = c - (a*b) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[2], mat_b_col[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(3, 3) element with 4rth row elements of B mat_b_col[3] = _mm256_mul_ps(mat_b_col[3], mat_a_diag_inv[3]); mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + 3 + cs_l_offset[2])); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + 3 + cs_l_offset[3])); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + 3 + cs_l_offset[4])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + 3 + cs_l_offset[5])); //(Row4): FMA operations of b4 with elements of indices from (4, 0) uptill (7, 0) mat_b_col[4] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[3], mat_b_col[4]);//d = c - (a*b) mat_b_col[5] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[3], mat_b_col[5]);//d = c - (a*b) mat_b_col[6] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[3], mat_b_col[6]);//d = c - (a*b) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[3], mat_b_col[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(4, 4) element with 4rth row elements of B mat_b_col[4] = _mm256_mul_ps(mat_b_col[4], mat_a_diag_inv[4]); mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + 4 + cs_l_offset[3])); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + 4 + cs_l_offset[4])); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + 4 + cs_l_offset[5])); //(Row5): FMA operations of b5 with elements of indices from (5, 0) uptill (7, 0) mat_b_col[5] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[4], mat_b_col[5]);//d = c - (a*b) mat_b_col[6] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[4], mat_b_col[6]);//d = c - (a*b) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[4], mat_b_col[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(5, 5) element with 5th row elements of B mat_b_col[5] = _mm256_mul_ps(mat_b_col[5], mat_a_diag_inv[5]); mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + 5 + cs_l_offset[4])); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + 5 + cs_l_offset[5])); //(Row6): FMA operations of b6 with elements of indices from (6, 0) uptill (7, 0) mat_b_col[6] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[5], mat_b_col[6]);//d = c - (a*b) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[5], mat_b_col[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(6, 6) element with 6th row elements of B mat_b_col[6] = _mm256_mul_ps(mat_b_col[6], mat_a_diag_inv[6]); mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + 6 + cs_l_offset[5])); //(Row7): FMA operations of b7 with elements of index (7, 0) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[6], mat_b_col[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(7, 7) element with 7th row elements of B mat_b_col[7] = _mm256_mul_ps(mat_b_col[7], mat_a_diag_inv[7]); //////////////////////////////////////////////////////////////////////////////// /* transpose steps start */ ////unpacklow//// mat_b_rearr[0] = _mm256_unpacklo_ps(mat_b_col[0], mat_b_col[1]); mat_b_rearr[1] = _mm256_unpacklo_ps(mat_b_col[2], mat_b_col[3]); mat_b_rearr[2] = _mm256_unpacklo_ps(mat_b_col[4], mat_b_col[5]); mat_b_rearr[3] = _mm256_unpacklo_ps(mat_b_col[6], mat_b_col[7]); //Rearrange low elements #if REARRANGE_SHFL == 1 mat_b_rearr[4] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x44); mat_b_rearr[5] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0xEE); mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x44); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0xEE); #else mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x4E); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x4E); mat_b_rearr[4] = _mm256_blend_ps(mat_b_rearr[0], mat_b_rearr[6], 0xCC); mat_b_rearr[5] = _mm256_blend_ps(mat_b_rearr[1], mat_b_rearr[6], 0x33); mat_b_rearr[6] = _mm256_blend_ps(mat_b_rearr[2], mat_b_rearr[7], 0xCC); mat_b_rearr[7] = _mm256_blend_ps(mat_b_rearr[3], mat_b_rearr[7], 0x33); #endif //Merge rearranged low elements into complete rows mat_b_rearr[0] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x20); mat_b_rearr[4] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x31); mat_b_rearr[1] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x20); mat_b_rearr[5] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x31); ////unpackhigh//// mat_b_col[0] = _mm256_unpackhi_ps(mat_b_col[0], mat_b_col[1]); mat_b_col[1] = _mm256_unpackhi_ps(mat_b_col[2], mat_b_col[3]); mat_b_col[2] = _mm256_unpackhi_ps(mat_b_col[4], mat_b_col[5]); mat_b_col[3] = _mm256_unpackhi_ps(mat_b_col[6], mat_b_col[7]); //Rearrange high elements #if REARRANGE_SHFL == 1 mat_b_col[4] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x44); mat_b_col[5] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0xEE); mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x44); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0xEE); #else mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x4E); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x4E); mat_b_col[4] = _mm256_blend_ps(mat_b_col[0], mat_b_col[6], 0xCC); mat_b_col[5] = _mm256_blend_ps(mat_b_col[1], mat_b_col[6], 0x33); mat_b_col[6] = _mm256_blend_ps(mat_b_col[2], mat_b_col[7], 0xCC); mat_b_col[7] = _mm256_blend_ps(mat_b_col[3], mat_b_col[7], 0x33); #endif //Merge rearranged high elements into complete rows mat_b_rearr[2] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x20); mat_b_rearr[6] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x31); mat_b_rearr[3] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x20); mat_b_rearr[7] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x31); /* transpose steps end */ //Store the computed B columns _mm256_storeu_ps((float *)ptr_b_dup, mat_b_rearr[0]); _mm256_storeu_ps((float *)(ptr_b_dup + (cs_b)), mat_b_rearr[1]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[0]), mat_b_rearr[2]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[1]), mat_b_rearr[3]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[2]), mat_b_rearr[4]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[3]), mat_b_rearr[5]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[4]), mat_b_rearr[6]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[5]), mat_b_rearr[7]); i += cs_b_offset[6]; ptr_b_dup += cs_b_offset[6]; //i += 8; //ptr_b_dup += 8; } //c = 0; /***************** first set of 8 cols of B processing done *****************/ ptr_b_dup = ptr_b; i3 = 0; i1 = 0; //Start loop for cols of B to be processed in size of blk_width for (j = 8; j < numRows_lb; j += 8)//m :- 8x8 block row { ptr_l += cs_l_offset[6]; //Read next 8x8 block of A to get diag elements i3 += 8; mat_a_blk_elems[0] = _mm256_loadu_ps((float const *)ptr_l + i3); mat_a_blk_elems[1] = _mm256_loadu_ps((float const *)ptr_l + i3 + cs_l); mat_a_blk_elems[2] = _mm256_loadu_ps((float const *)ptr_l + i3 + cs_l_offset[0]); mat_a_blk_elems[3] = _mm256_loadu_ps((float const *)ptr_l + i3 + cs_l_offset[1]); mat_a_blk_elems[4] = _mm256_loadu_ps((float const *)ptr_l + i3 + cs_l_offset[2]); mat_a_blk_elems[5] = _mm256_loadu_ps((float const *)ptr_l + i3 + cs_l_offset[3]); mat_a_blk_elems[6] = _mm256_loadu_ps((float const *)ptr_l + i3 + cs_l_offset[4]); mat_a_blk_elems[7] = _mm256_loadu_ps((float const *)ptr_l + i3 + cs_l_offset[5]); //pack 8 diags of A together reciprocal_diags[0] = reciprocal_diags[1]; mat_a_diag_inv[0] = _mm256_blend_ps(mat_a_blk_elems[0], mat_a_blk_elems[1], 0xAA);//diag 0,1 mat_a_diag_inv[1] = _mm256_blend_ps(mat_a_blk_elems[2], mat_a_blk_elems[3], 0xAA);//diag 2,3 mat_a_diag_inv[2] = _mm256_blend_ps(mat_a_blk_elems[4], mat_a_blk_elems[5], 0xAA);//diag 4,5 mat_a_diag_inv[3] = _mm256_blend_ps(mat_a_blk_elems[6], mat_a_blk_elems[7], 0xAA);//diag 6,7 mat_a_diag_inv[0] = _mm256_blend_ps(mat_a_diag_inv[0], mat_a_diag_inv[1], 0xCC);//diag 0,1,2,3 mat_a_diag_inv[2] = _mm256_blend_ps(mat_a_diag_inv[2], mat_a_diag_inv[3], 0xCC);//diag 4,5,6,7 mat_a_diag_inv[0] = _mm256_blend_ps(mat_a_diag_inv[0], mat_a_diag_inv[2], 0xF0);//diag 0,1,2,3,4,5,6,7 //reciprocal of diagnal elements of A :- 0,1,2,3,4,5,6,7 reciprocal_diags[0] = _mm256_div_ps(reciprocal_diags[0], mat_a_diag_inv[0]); //ptr_b += j; //ptr_b_dup += 8; ptr_b_dup += 8; i1 += 8; i = i1; i2 = 0; //extract diag a00 from a mat_a_diag_inv[0] = _mm256_permute_ps(reciprocal_diags[0], 0x00); mat_a_diag_inv[0] = _mm256_permute2f128_ps(mat_a_diag_inv[0], mat_a_diag_inv[0], 0x00); //mat_a_diag_inv2[0] = _mm256_unpacklo_ps(mat_a_diag_inv2[0], mat_a_diag_inv2[0]); //extract diag a11 from a mat_a_diag_inv[1] = _mm256_permute_ps(reciprocal_diags[0], 0x55); mat_a_diag_inv[1] = _mm256_permute2f128_ps(mat_a_diag_inv[1], mat_a_diag_inv[1], 0x00); //mat_a_diag_inv[1] = _mm256_unpacklo_ps(mat_a_diag_inv[1], mat_a_diag_inv[1]); //extract diag a22 from a mat_a_diag_inv[2] = _mm256_permute_ps(reciprocal_diags[0], 0xAA); mat_a_diag_inv[2] = _mm256_permute2f128_ps(mat_a_diag_inv[2], mat_a_diag_inv[2], 0x00); //mat_a_diag_inv[2] = _mm256_unpacklo_ps(mat_a_diag_inv[2], mat_a_diag_inv[2]); //extract diag a33 from a mat_a_diag_inv[3] = _mm256_permute_ps(reciprocal_diags[0], 0xFF); mat_a_diag_inv[3] = _mm256_permute2f128_ps(mat_a_diag_inv[3], mat_a_diag_inv[3], 0x00); //mat_a_diag_inv[3] = _mm256_unpacklo_ps(mat_a_diag_inv[3], mat_a_diag_inv[3]); //extract diag a44 from a mat_a_diag_inv[4] = _mm256_permute_ps(reciprocal_diags[0], 0x00); mat_a_diag_inv[4] = _mm256_permute2f128_ps(mat_a_diag_inv[4], mat_a_diag_inv[4], 0x11); //mat_a_diag_inv[4] = _mm256_unpacklo_ps(mat_a_diag_inv[4], mat_a_diag_inv[4]); //extract diag a55 from a mat_a_diag_inv[5] = _mm256_permute_ps(reciprocal_diags[0], 0x55); mat_a_diag_inv[5] = _mm256_permute2f128_ps(mat_a_diag_inv[5], mat_a_diag_inv[5], 0x11); //mat_a_diag_inv[5] = _mm256_unpacklo_ps(mat_a_diag_inv[5], mat_a_diag_inv[5]); //extract diag a66 from a mat_a_diag_inv[6] = _mm256_permute_ps(reciprocal_diags[0], 0xAA); mat_a_diag_inv[6] = _mm256_permute2f128_ps(mat_a_diag_inv[6], mat_a_diag_inv[6], 0x11); //mat_a_diag_inv[6] = _mm256_unpacklo_ps(mat_a_diag_inv[6], mat_a_diag_inv[6]); //extract diag a77 from a mat_a_diag_inv[7] = _mm256_permute_ps(reciprocal_diags[0], 0xFF); mat_a_diag_inv[7] = _mm256_permute2f128_ps(mat_a_diag_inv[7], mat_a_diag_inv[7], 0x11); //mat_a_diag_inv[7] = _mm256_unpacklo_ps(mat_a_diag_inv[7], mat_a_diag_inv[7]); for (r = 0; r < numCols_b; r += GEMM_BLK_V1) { #if GEMM_ACCUM_A //Read 8 cols of B columns of Block-to-be-solved mat_b_col[0] = _mm256_loadu_ps((float const *)ptr_b + i); mat_b_col[1] = _mm256_loadu_ps((float const *)(ptr_b + cs_b + i)); mat_b_col[2] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[0] + i)); mat_b_col[3] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[1] + i)); mat_b_col[4] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[2] + i)); mat_b_col[5] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[3] + i)); mat_b_col[6] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[4] + i)); mat_b_col[7] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[5] + i)); /* transpose steps start */ ////unpacklow//// mat_b_rearr[0] = _mm256_unpacklo_ps(mat_b_col[0], mat_b_col[1]); mat_b_rearr[1] = _mm256_unpacklo_ps(mat_b_col[2], mat_b_col[3]); mat_b_rearr[2] = _mm256_unpacklo_ps(mat_b_col[4], mat_b_col[5]); mat_b_rearr[3] = _mm256_unpacklo_ps(mat_b_col[6], mat_b_col[7]); //Rearrange low elements #if REARRANGE_SHFL == 1 mat_b_rearr[4] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x44); mat_b_rearr[5] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0xEE); mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x44); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0xEE); #else mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x4E); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x4E); mat_b_rearr[4] = _mm256_blend_ps(mat_b_rearr[0], mat_b_rearr[6], 0xCC); mat_b_rearr[5] = _mm256_blend_ps(mat_b_rearr[1], mat_b_rearr[6], 0x33); mat_b_rearr[6] = _mm256_blend_ps(mat_b_rearr[2], mat_b_rearr[7], 0xCC); mat_b_rearr[7] = _mm256_blend_ps(mat_b_rearr[3], mat_b_rearr[7], 0x33); #endif //Merge rearranged low elements into complete rows mat_b_rearr[0] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x20); mat_b_rearr[4] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x31); mat_b_rearr[1] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x20); mat_b_rearr[5] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x31); ////unpackhigh//// mat_b_col[0] = _mm256_unpackhi_ps(mat_b_col[0], mat_b_col[1]); mat_b_col[1] = _mm256_unpackhi_ps(mat_b_col[2], mat_b_col[3]); mat_b_col[2] = _mm256_unpackhi_ps(mat_b_col[4], mat_b_col[5]); mat_b_col[3] = _mm256_unpackhi_ps(mat_b_col[6], mat_b_col[7]); //Rearrange high elements #if REARRANGE_SHFL == 1 mat_b_col[4] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x44); mat_b_col[5] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0xEE); mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x44); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0xEE); #else mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x4E); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x4E); mat_b_col[4] = _mm256_blend_ps(mat_b_col[0], mat_b_col[6], 0xCC); mat_b_col[5] = _mm256_blend_ps(mat_b_col[1], mat_b_col[6], 0x33); mat_b_col[6] = _mm256_blend_ps(mat_b_col[2], mat_b_col[7], 0xCC); mat_b_col[7] = _mm256_blend_ps(mat_b_col[3], mat_b_col[7], 0x33); #endif //Merge rearranged high elements into complete rows mat_b_rearr[2] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x20); mat_b_rearr[6] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x31); mat_b_rearr[3] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x20); mat_b_rearr[7] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x31); /* transpose steps end */ mat_b_rearr[0] = _mm256_mul_ps(mat_b_rearr[0], alphaReg); mat_b_rearr[1] = _mm256_mul_ps(mat_b_rearr[1], alphaReg); mat_b_rearr[2] = _mm256_mul_ps(mat_b_rearr[2], alphaReg); mat_b_rearr[3] = _mm256_mul_ps(mat_b_rearr[3], alphaReg); mat_b_rearr[4] = _mm256_mul_ps(mat_b_rearr[4], alphaReg); mat_b_rearr[5] = _mm256_mul_ps(mat_b_rearr[5], alphaReg); mat_b_rearr[6] = _mm256_mul_ps(mat_b_rearr[6], alphaReg); mat_b_rearr[7] = _mm256_mul_ps(mat_b_rearr[7], alphaReg); #endif //i = 0; ptr_l_dup = ptr_l; i4 = i2; for (l = 0; l < j; l += 8) // move across m { //for (k = 0; k < numCols_b; k += 8) // move across n for the same value of l (index of m) //{ /////////////////// Partial Lower 8x8 block trsm of B //Read current 8 cols of B columns from specified 8x8 current-block of B mat_a_blk_elems[0] = _mm256_loadu_ps((float const *)ptr_b + i4); mat_a_blk_elems[1] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b)); mat_a_blk_elems[2] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[0])); mat_a_blk_elems[3] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[1])); mat_a_blk_elems[4] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[2])); mat_a_blk_elems[5] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[3])); mat_a_blk_elems[6] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[4])); mat_a_blk_elems[7] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[5])); /* transpose steps start */ ////unpacklow//// mat_b_col[0] = _mm256_unpacklo_ps(mat_a_blk_elems[0], mat_a_blk_elems[1]); mat_b_col[1] = _mm256_unpacklo_ps(mat_a_blk_elems[2], mat_a_blk_elems[3]); mat_b_col[2] = _mm256_unpacklo_ps(mat_a_blk_elems[4], mat_a_blk_elems[5]); mat_b_col[3] = _mm256_unpacklo_ps(mat_a_blk_elems[6], mat_a_blk_elems[7]); //Rearrange low elements #if REARRANGE_SHFL == 1 mat_b_col[4] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x44); mat_b_col[5] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0xEE); mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x44); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0xEE); #else mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x4E); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x4E); mat_b_col[4] = _mm256_blend_ps(mat_b_col[0], mat_b_col[6], 0xCC); mat_b_col[5] = _mm256_blend_ps(mat_b_col[1], mat_b_col[6], 0x33); mat_b_col[6] = _mm256_blend_ps(mat_b_col[2], mat_b_col[7], 0xCC); mat_b_col[7] = _mm256_blend_ps(mat_b_col[3], mat_b_col[7], 0x33); #endif //Merge rearranged low elements into complete rows mat_b_col[0] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x20); mat_b_col[4] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x31); mat_b_col[1] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x20); mat_b_col[5] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x31); ////unpackhigh//// mat_a_blk_elems[0] = _mm256_unpackhi_ps(mat_a_blk_elems[0], mat_a_blk_elems[1]); mat_a_blk_elems[1] = _mm256_unpackhi_ps(mat_a_blk_elems[2], mat_a_blk_elems[3]); mat_a_blk_elems[2] = _mm256_unpackhi_ps(mat_a_blk_elems[4], mat_a_blk_elems[5]); mat_a_blk_elems[3] = _mm256_unpackhi_ps(mat_a_blk_elems[6], mat_a_blk_elems[7]); //Rearrange high elements #if REARRANGE_SHFL == 1 mat_a_blk_elems[4] = _mm256_shuffle_ps(mat_a_blk_elems[0], mat_a_blk_elems[1], 0x44); mat_a_blk_elems[5] = _mm256_shuffle_ps(mat_a_blk_elems[0], mat_a_blk_elems[1], 0xEE); mat_a_blk_elems[6] = _mm256_shuffle_ps(mat_a_blk_elems[2], mat_a_blk_elems[3], 0x44); mat_a_blk_elems[7] = _mm256_shuffle_ps(mat_a_blk_elems[2], mat_a_blk_elems[3], 0xEE); #else mat_a_blk_elems[6] = _mm256_shuffle_ps(mat_a_blk_elems[0], mat_a_blk_elems[1], 0x4E); mat_a_blk_elems[7] = _mm256_shuffle_ps(mat_a_blk_elems[2], mat_a_blk_elems[3], 0x4E); mat_a_blk_elems[4] = _mm256_blend_ps(mat_a_blk_elems[0], mat_a_blk_elems[6], 0xCC); mat_a_blk_elems[5] = _mm256_blend_ps(mat_a_blk_elems[1], mat_a_blk_elems[6], 0x33); mat_a_blk_elems[6] = _mm256_blend_ps(mat_a_blk_elems[2], mat_a_blk_elems[7], 0xCC); mat_a_blk_elems[7] = _mm256_blend_ps(mat_a_blk_elems[3], mat_a_blk_elems[7], 0x33); #endif //Merge rearranged high elements into complete rows mat_b_col[2] = _mm256_permute2f128_ps(mat_a_blk_elems[4], mat_a_blk_elems[6], 0x20); mat_b_col[6] = _mm256_permute2f128_ps(mat_a_blk_elems[4], mat_a_blk_elems[6], 0x31); mat_b_col[3] = _mm256_permute2f128_ps(mat_a_blk_elems[5], mat_a_blk_elems[7], 0x20); mat_b_col[7] = _mm256_permute2f128_ps(mat_a_blk_elems[5], mat_a_blk_elems[7], 0x31); /* transpose steps end */ //Broadcast A8,0 to A15,0 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[0])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[1])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[2])); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[3])); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[4])); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[5])); //i4 = k >> 3; ptr_l_dup++; #if GEMM_ACCUM_A //(Row8): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[0], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[0], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[0], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[0], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[0], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[0], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[0], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[0], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_mul_ps(mat_a_blk_elems[0], mat_b_col[0]); mat_b_rearr[1] = _mm256_mul_ps(mat_a_blk_elems[1], mat_b_col[0]); mat_b_rearr[2] = _mm256_mul_ps(mat_a_blk_elems[2], mat_b_col[0]); mat_b_rearr[3] = _mm256_mul_ps(mat_a_blk_elems[3], mat_b_col[0]); mat_b_rearr[4] = _mm256_mul_ps(mat_a_blk_elems[4], mat_b_col[0]); mat_b_rearr[5] = _mm256_mul_ps(mat_a_blk_elems[5], mat_b_col[0]); mat_b_rearr[6] = _mm256_mul_ps(mat_a_blk_elems[6], mat_b_col[0]); mat_b_rearr[7] = _mm256_mul_ps(mat_a_blk_elems[7], mat_b_col[0]); #endif //Broadcast A21 to A71 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[0])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[1])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[2])); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[3])); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[4])); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[5])); ptr_l_dup++; #if GEMM_ACCUM_A //(Row9): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[1], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[1], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[1], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[1], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[1], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[1], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[1], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[1], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[1], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[1], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[1], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[1], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[1], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[1], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[1], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[1], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A8,2 to A15,2 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[0])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[1])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[2])); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[3])); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[4])); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[5])); ptr_l_dup++; #if GEMM_ACCUM_A //(Row10): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[2], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[2], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[2], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[2], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[2], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[2], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[2], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[2], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[2], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[2], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[2], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[2], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[2], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[2], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[2], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[2], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A8,3 to A15,3 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[0])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[1])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[2])); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[3])); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[4])); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[5])); ptr_l_dup++; #if GEMM_ACCUM_A //(Row11): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[3], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[3], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[3], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[3], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[3], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[3], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[3], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[3], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[3], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[3], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[3], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[3], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[3], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[3], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[3], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[3], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A8,4 to A15,4 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[0])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[1])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[2])); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[3])); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[4])); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[5])); ptr_l_dup++; #if GEMM_ACCUM_A //(Row12): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[4], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[4], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[4], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[4], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[4], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[4], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[4], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[4], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[4], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[4], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[4], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[4], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[4], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[4], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[4], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[4], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A8,5 to A15,5 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[0])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[1])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[2])); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[3])); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[4])); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[5])); ptr_l_dup++; #if GEMM_ACCUM_A //(Row13): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[5], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[5], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[5], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[5], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[5], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[5], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[5], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[5], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[5], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[5], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[5], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[5], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[5], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[5], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[5], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[5], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A8,6 to A15,6 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[0])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[1])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[2])); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[3])); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[4])); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[5])); ptr_l_dup++; #if GEMM_ACCUM_A //(Row14): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[6], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[6], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[6], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[6], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[6], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[6], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[6], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[6], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[6], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[6], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[6], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[6], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[6], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[6], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[6], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[6], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A8,7 to A15,7 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[0])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[1])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[2])); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[3])); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[4])); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[5])); ptr_l_dup++; #if GEMM_ACCUM_A //(Row15): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[7], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[7], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[7], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[7], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[7], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[7], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[7], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[7], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[7], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[7], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[7], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[7], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[7], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[7], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[7], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[7], mat_b_rearr[7]);//d = c - (a*b) #endif //end loop of cols //} //i2 += cs_b_offset[6]; i4 += 8; } //trsm solve k = 0; //for (i2 = 0; i2 < numCols_b; i2 += 8) //{ //i2 = i1 + r; /////////////////// Complete Lower 8x8 block trsm of B :- lower 8x8 block of B with lower right 8x8 block of A #if !GEMM_ACCUM_A //Read 8 cols of B columns of Block-to-be-solved mat_b_rearr[0] = _mm256_loadu_ps((float const *)ptr_b + i); mat_b_rearr[1] = _mm256_loadu_ps((float const *)(ptr_b + cs_b + i)); mat_b_rearr[2] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[0] + i)); mat_b_rearr[3] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[1] + i)); mat_b_rearr[4] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[2] + i)); mat_b_rearr[5] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[3] + i)); mat_b_rearr[6] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[4] + i)); mat_b_rearr[7] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[5] + i)); /* transpose steps start */ ////unpacklow//// mat_b_col[0] = _mm256_unpacklo_ps(mat_b_rearr[0], mat_b_rearr[1]); mat_b_col[1] = _mm256_unpacklo_ps(mat_b_rearr[2], mat_b_rearr[3]); mat_b_col[2] = _mm256_unpacklo_ps(mat_b_rearr[4], mat_b_rearr[5]); mat_b_col[3] = _mm256_unpacklo_ps(mat_b_rearr[6], mat_b_rearr[7]); //Rearrange low elements #if REARRANGE_SHFL == 1 mat_b_col[4] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x44); mat_b_col[5] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0xEE); mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x44); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0xEE); #else mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x4E); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x4E); mat_b_col[4] = _mm256_blend_ps(mat_b_col[0], mat_b_col[6], 0xCC); mat_b_col[5] = _mm256_blend_ps(mat_b_col[1], mat_b_col[6], 0x33); mat_b_col[6] = _mm256_blend_ps(mat_b_col[2], mat_b_col[7], 0xCC); mat_b_col[7] = _mm256_blend_ps(mat_b_col[3], mat_b_col[7], 0x33); #endif //Merge rearranged low elements into complete rows mat_b_col[0] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x20); mat_b_col[4] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x31); mat_b_col[1] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x20); mat_b_col[5] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x31); ////unpackhigh//// mat_b_rearr[0] = _mm256_unpackhi_ps(mat_b_rearr[0], mat_b_rearr[1]); mat_b_rearr[1] = _mm256_unpackhi_ps(mat_b_rearr[2], mat_b_rearr[3]); mat_b_rearr[2] = _mm256_unpackhi_ps(mat_b_rearr[4], mat_b_rearr[5]); mat_b_rearr[3] = _mm256_unpackhi_ps(mat_b_rearr[6], mat_b_rearr[7]); //Rearrange high elements #if REARRANGE_SHFL == 1 mat_b_rearr[4] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x44); mat_b_rearr[5] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0xEE); mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x44); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0xEE); #else mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x4E); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x4E); mat_b_rearr[4] = _mm256_blend_ps(mat_b_rearr[0], mat_b_rearr[6], 0xCC); mat_b_rearr[5] = _mm256_blend_ps(mat_b_rearr[1], mat_b_rearr[6], 0x33); mat_b_rearr[6] = _mm256_blend_ps(mat_b_rearr[2], mat_b_rearr[7], 0xCC); mat_b_rearr[7] = _mm256_blend_ps(mat_b_rearr[3], mat_b_rearr[7], 0x33); #endif //Merge rearranged high elements into complete rows mat_b_col[2] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x20); mat_b_col[6] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x31); mat_b_col[3] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x20); mat_b_col[7] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x31); /* transpose steps end */ mat_b_col[0] = _mm256_mul_ps(mat_b_col[0], alphaReg); mat_b_col[1] = _mm256_mul_ps(mat_b_col[1], alphaReg); mat_b_col[2] = _mm256_mul_ps(mat_b_col[2], alphaReg); mat_b_col[3] = _mm256_mul_ps(mat_b_col[3], alphaReg); mat_b_col[4] = _mm256_mul_ps(mat_b_col[4], alphaReg); mat_b_col[5] = _mm256_mul_ps(mat_b_col[5], alphaReg); mat_b_col[6] = _mm256_mul_ps(mat_b_col[6], alphaReg); mat_b_col[7] = _mm256_mul_ps(mat_b_col[7], alphaReg); #endif //Broadcast A10 to A70 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[0])); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[1])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[2])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[3])); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[4])); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[5])); //i += cs_l; #if GEMM_ACCUM_A //(Row0): Perform mul operation of reciprocal of L(0,0) element with 1st row elements of B mat_b_rearr[0] = _mm256_mul_ps(mat_b_rearr[0], mat_a_diag_inv[0]); #else mat_b_rearr[0] = _mm256_sub_ps(mat_b_col[0], mat_b_rearr[0]); mat_b_rearr[0] = _mm256_mul_ps(mat_b_rearr[0], mat_a_diag_inv[0]); #endif #if GEMM_ACCUM_A mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[0], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[0], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_rearr[0], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_rearr[0], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_rearr[0], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_rearr[0], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_rearr[0], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[1] = _mm256_sub_ps(mat_b_col[1], mat_b_rearr[1]); mat_b_rearr[2] = _mm256_sub_ps(mat_b_col[2], mat_b_rearr[2]); mat_b_rearr[3] = _mm256_sub_ps(mat_b_col[3], mat_b_rearr[3]); mat_b_rearr[4] = _mm256_sub_ps(mat_b_col[4], mat_b_rearr[4]); mat_b_rearr[5] = _mm256_sub_ps(mat_b_col[5], mat_b_rearr[5]); mat_b_rearr[6] = _mm256_sub_ps(mat_b_col[6], mat_b_rearr[6]); mat_b_rearr[7] = _mm256_sub_ps(mat_b_col[7], mat_b_rearr[7]); //(Row1): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[0], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[0], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_rearr[0], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_rearr[0], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_rearr[0], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_rearr[0], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_rearr[0], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A21 to A71 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 1 + cs_l_offset[0])); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 1 + cs_l_offset[1])); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 1 + cs_l_offset[2])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 1 + cs_l_offset[3])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 1 + cs_l_offset[4])); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 1 + cs_l_offset[5])); //i += cs_l; //Perform mul operation of reciprocal of L(1,1) element with 2nd row elements of B mat_b_rearr[1] = _mm256_mul_ps(mat_b_rearr[1], mat_a_diag_inv[1]); //(Row2): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[1], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[1], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_rearr[1], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_rearr[1], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_rearr[1], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_rearr[1], mat_b_rearr[7]);//d = c - (a*b) //Broadcast A32 to A72 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 2 + cs_l_offset[1])); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 2 + cs_l_offset[2])); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 2 + cs_l_offset[3])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 2 + cs_l_offset[4])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 2 + cs_l_offset[5])); //i += cs_l; //Perform mul operation of reciprocal of L(2, 2) element with 3rd row elements of B mat_b_rearr[2] = _mm256_mul_ps(mat_b_rearr[2], mat_a_diag_inv[2]); //(Row3): FMA operations of b3 with elements of indices from (3, 0) uptill (7, 0) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[2], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[2], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_rearr[2], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_rearr[2], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_rearr[2], mat_b_rearr[7]);//d = c - (a*b) //Broadcast A43 to A73 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 3 + cs_l_offset[2])); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 3 + cs_l_offset[3])); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 3 + cs_l_offset[4])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 3 + cs_l_offset[5])); //i += cs_l; //Perform mul operation of reciprocal of L(3, 3) element with 4rth row elements of B mat_b_rearr[3] = _mm256_mul_ps(mat_b_rearr[3], mat_a_diag_inv[3]); //(Row4): FMA operations of b4 with elements of indices from (4, 0) uptill (7, 0) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[3], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[3], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_rearr[3], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_rearr[3], mat_b_rearr[7]);//d = c - (a*b) //Broadcast A54 to A74 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 4 + cs_l_offset[3])); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 4 + cs_l_offset[4])); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 4 + cs_l_offset[5])); //i += cs_l; //Perform mul operation of reciprocal of L(4, 4) element with 4rth row elements of B mat_b_rearr[4] = _mm256_mul_ps(mat_b_rearr[4], mat_a_diag_inv[4]); //(Row5): FMA operations of b5 with elements of indices from (5, 0) uptill (7, 0) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[4], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[4], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_rearr[4], mat_b_rearr[7]);//d = c - (a*b) //Broadcast A65 to A75 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 5 + cs_l_offset[4])); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 5 + cs_l_offset[5])); //i += cs_l; //Perform mul operation of reciprocal of L(5, 5) element with 5th row elements of B mat_b_rearr[5] = _mm256_mul_ps(mat_b_rearr[5], mat_a_diag_inv[5]); //(Row6): FMA operations of b6 with elements of indices from (6, 0) uptill (7, 0) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[5], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[5], mat_b_rearr[7]);//d = c - (a*b) //Broadcast A76 to register mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 6 + cs_l_offset[5])); //Perform mul operation of reciprocal of L(6, 6) element with 6th row elements of B mat_b_rearr[6] = _mm256_mul_ps(mat_b_rearr[6], mat_a_diag_inv[6]); //(Row7): FMA operations of b7 with elements of index (7, 0) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[6], mat_b_rearr[7]);//d = c - (a*b) //Perform mul operation of reciprocal of L(7, 7) element with 7th row elements of B mat_b_rearr[7] = _mm256_mul_ps(mat_b_rearr[7], mat_a_diag_inv[7]); //////////////////////////////////////////////////////////////////////////////// /* transpose steps start */ ////unpacklow//// mat_b_col[0] = _mm256_unpacklo_ps(mat_b_rearr[0], mat_b_rearr[1]); mat_b_col[1] = _mm256_unpacklo_ps(mat_b_rearr[2], mat_b_rearr[3]); mat_b_col[2] = _mm256_unpacklo_ps(mat_b_rearr[4], mat_b_rearr[5]); mat_b_col[3] = _mm256_unpacklo_ps(mat_b_rearr[6], mat_b_rearr[7]); //Rearrange low elements #if REARRANGE_SHFL == 1 mat_b_col[4] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x44); mat_b_col[5] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0xEE); mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x44); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0xEE); #else mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x4E); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x4E); mat_b_col[4] = _mm256_blend_ps(mat_b_col[0], mat_b_col[6], 0xCC); mat_b_col[5] = _mm256_blend_ps(mat_b_col[1], mat_b_col[6], 0x33); mat_b_col[6] = _mm256_blend_ps(mat_b_col[2], mat_b_col[7], 0xCC); mat_b_col[7] = _mm256_blend_ps(mat_b_col[3], mat_b_col[7], 0x33); #endif //Merge rearranged low elements into complete rows mat_b_col[0] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x20); mat_b_col[4] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x31); mat_b_col[1] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x20); mat_b_col[5] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x31); ////unpackhigh//// mat_b_rearr[0] = _mm256_unpackhi_ps(mat_b_rearr[0], mat_b_rearr[1]); mat_b_rearr[1] = _mm256_unpackhi_ps(mat_b_rearr[2], mat_b_rearr[3]); mat_b_rearr[2] = _mm256_unpackhi_ps(mat_b_rearr[4], mat_b_rearr[5]); mat_b_rearr[3] = _mm256_unpackhi_ps(mat_b_rearr[6], mat_b_rearr[7]); //Rearrange high elements #if REARRANGE_SHFL == 1 mat_b_rearr[4] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x44); mat_b_rearr[5] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0xEE); mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x44); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0xEE); #else mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x4E); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x4E); mat_b_rearr[4] = _mm256_blend_ps(mat_b_rearr[0], mat_b_rearr[6], 0xCC); mat_b_rearr[5] = _mm256_blend_ps(mat_b_rearr[1], mat_b_rearr[6], 0x33); mat_b_rearr[6] = _mm256_blend_ps(mat_b_rearr[2], mat_b_rearr[7], 0xCC); mat_b_rearr[7] = _mm256_blend_ps(mat_b_rearr[3], mat_b_rearr[7], 0x33); #endif //Merge rearranged high elements into complete rows mat_b_col[2] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x20); mat_b_col[6] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x31); mat_b_col[3] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x20); mat_b_col[7] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x31); /* transpose steps end */ //Store the computed B columns _mm256_storeu_ps((float *)ptr_b_dup + i2, mat_b_col[0]); _mm256_storeu_ps((float *)(ptr_b_dup + (cs_b)+i2), mat_b_col[1]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[0] + i2), mat_b_col[2]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[1] + i2), mat_b_col[3]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[2] + i2), mat_b_col[4]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[3] + i2), mat_b_col[5]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[4] + i2), mat_b_col[6]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[5] + i2), mat_b_col[7]); //printf("writing B => m[%d], n[%d], [%f]\n", j, k, *(ptr_b_dup + k)); k++; //} i += cs_b_offset[6]; i2 += cs_b_offset[6]; } } //numRows of A ///////////////////loop ends ///////////////////// } static void trsm_AutXB_block_allSmallSizedMatrices_unitDiag(float *ptr_l, float *ptr_b, int numRows_lb, int numCols_b, int rs_l, int rs_b, int cs_l, int cs_b) { //float ones = 1.0; int i, i1, i2, i4, j, k, l, r; int cs_b_offset[7]; int cs_l_offset[7]; float *ptr_b_dup, *ptr_l_dup; //57 number of ymm(256 bits) registers used __m256 mat_b_col[8]; __m256 mat_b_rearr[8]; __m256 mat_a_blk_elems[8]; //__m256 mat_a_diag_inv[8]; //__m256 reciprocal_diags[2]; // ---> considering that the matrix size is multiple of 16 rows and 8 cols <--- // //L matrix offsets cs_l_offset[0] = (cs_l << 1); cs_l_offset[1] = cs_l + cs_l_offset[0]; cs_l_offset[2] = (cs_l << 2); cs_l_offset[3] = cs_l + cs_l_offset[2]; cs_l_offset[4] = cs_l_offset[0] + cs_l_offset[2]; cs_l_offset[5] = cs_l + cs_l_offset[4]; cs_l_offset[6] = (cs_l_offset[5] + cs_l); cs_b_offset[0] = (cs_b << 1); cs_b_offset[1] = cs_b + cs_b_offset[0]; cs_b_offset[2] = (cs_b << 2); cs_b_offset[3] = cs_b + cs_b_offset[2]; cs_b_offset[4] = cs_b_offset[0] + cs_b_offset[2]; cs_b_offset[5] = cs_b + cs_b_offset[4]; cs_b_offset[6] = (cs_b_offset[5] + cs_b); #if 0 //Broadcast A10 to A70 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + 1)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + 2)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + 3)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + 4)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + 5)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l + 6)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l + 7)); //Broadcast A21 to A71 to registers mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 2)); mat_a_blk_elems[8] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 3)); mat_a_blk_elems[9] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 4)); mat_a_blk_elems[10] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 5)); mat_a_blk_elems[11] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 6)); mat_a_blk_elems[12] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 7)); //Broadcast A32 to A72 to registers mat_a_blk_elems[13] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 3)); mat_a_blk_elems[14] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 4)); mat_a_blk_elems[15] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 5)); mat_a_blk_elems[16] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 6)); mat_a_blk_elems[17] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 7)); //Broadcast A43 to A73 to registers mat_a_blk_elems[18] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 4)); mat_a_blk_elems[19] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 5)); mat_a_blk_elems[20] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 6)); mat_a_blk_elems[21] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 7)); //Broadcast A54 to A74 to registers mat_a_blk_elems[22] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + 5)); mat_a_blk_elems[23] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + 6)); mat_a_blk_elems[24] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + 7)); //Broadcast A65 to A75 to registers mat_a_blk_elems[25] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + 6)); mat_a_blk_elems[26] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + 7)); //Broadcast A76 to register mat_a_blk_elems[27] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[4] + 7)); #endif /***************** first set of 8 rows of B processing starts *****************/ ptr_b_dup = ptr_b; i = 0; for (j = 0; j < numCols_b; j += 8) { /////////////////// Complete Upper 8x8 block trsm of B :- upper 8x8 block of B with upper 8x8 block of A //read 8x8 block of B into registers mat_b_rearr[0] = _mm256_loadu_ps((float const *)ptr_b + i); mat_b_rearr[1] = _mm256_loadu_ps((float const *)(ptr_b + cs_b + i)); mat_b_rearr[2] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[0] + i)); mat_b_rearr[3] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[1] + i)); mat_b_rearr[4] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[2] + i)); mat_b_rearr[5] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[3] + i)); mat_b_rearr[6] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[4] + i)); mat_b_rearr[7] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[5] + i)); /* transpose steps start */ ////unpacklow//// mat_b_col[0] = _mm256_unpacklo_ps(mat_b_rearr[0], mat_b_rearr[1]); mat_b_col[1] = _mm256_unpacklo_ps(mat_b_rearr[2], mat_b_rearr[3]); mat_b_col[2] = _mm256_unpacklo_ps(mat_b_rearr[4], mat_b_rearr[5]); mat_b_col[3] = _mm256_unpacklo_ps(mat_b_rearr[6], mat_b_rearr[7]); //Rearrange low elements #if REARRANGE_SHFL == 1 mat_b_col[4] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x44); mat_b_col[5] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0xEE); mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x44); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0xEE); #else mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x4E); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x4E); mat_b_col[4] = _mm256_blend_ps(mat_b_col[0], mat_b_col[6], 0xCC); mat_b_col[5] = _mm256_blend_ps(mat_b_col[1], mat_b_col[6], 0x33); mat_b_col[6] = _mm256_blend_ps(mat_b_col[2], mat_b_col[7], 0xCC); mat_b_col[7] = _mm256_blend_ps(mat_b_col[3], mat_b_col[7], 0x33); #endif //Merge rearranged low elements into complete rows mat_b_col[0] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x20); mat_b_col[4] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x31); mat_b_col[1] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x20); mat_b_col[5] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x31); ////unpackhigh//// mat_b_rearr[0] = _mm256_unpackhi_ps(mat_b_rearr[0], mat_b_rearr[1]); mat_b_rearr[1] = _mm256_unpackhi_ps(mat_b_rearr[2], mat_b_rearr[3]); mat_b_rearr[2] = _mm256_unpackhi_ps(mat_b_rearr[4], mat_b_rearr[5]); mat_b_rearr[3] = _mm256_unpackhi_ps(mat_b_rearr[6], mat_b_rearr[7]); //Rearrange high elements #if REARRANGE_SHFL == 1 mat_b_rearr[4] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x44); mat_b_rearr[5] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0xEE); mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x44); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0xEE); #else mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x4E); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x4E); mat_b_rearr[4] = _mm256_blend_ps(mat_b_rearr[0], mat_b_rearr[6], 0xCC); mat_b_rearr[5] = _mm256_blend_ps(mat_b_rearr[1], mat_b_rearr[6], 0x33); mat_b_rearr[6] = _mm256_blend_ps(mat_b_rearr[2], mat_b_rearr[7], 0xCC); mat_b_rearr[7] = _mm256_blend_ps(mat_b_rearr[3], mat_b_rearr[7], 0x33); #endif //Merge rearranged high elements into complete rows mat_b_col[2] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x20); mat_b_col[6] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x31); mat_b_col[3] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x20); mat_b_col[7] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x31); /* transpose steps end */ //(Row0) mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0])); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3])); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[4])); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[5])); //(Row1): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_col[1] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[0], mat_b_col[1]);//d = c - (a*b) mat_b_col[2] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[0], mat_b_col[2]);//d = c - (a*b) mat_b_col[3] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[0], mat_b_col[3]);//d = c - (a*b) mat_b_col[4] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[0], mat_b_col[4]);//d = c - (a*b) mat_b_col[5] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[0], mat_b_col[5]);//d = c - (a*b) mat_b_col[6] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[0], mat_b_col[6]);//d = c - (a*b) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[0], mat_b_col[7]);//d = c - (a*b) mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + 1 + cs_l_offset[0])); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + 1 + cs_l_offset[1])); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + 1 + cs_l_offset[2])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + 1 + cs_l_offset[3])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + 1 + cs_l_offset[4])); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l + 1 + cs_l_offset[5])); //(Row2): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_col[2] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[1], mat_b_col[2]);//d = c - (a*b) mat_b_col[3] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[1], mat_b_col[3]);//d = c - (a*b) mat_b_col[4] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[1], mat_b_col[4]);//d = c - (a*b) mat_b_col[5] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[1], mat_b_col[5]);//d = c - (a*b) mat_b_col[6] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[1], mat_b_col[6]);//d = c - (a*b) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[1], mat_b_col[7]);//d = c - (a*b) mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + 2 + cs_l_offset[1])); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + 2 + cs_l_offset[2])); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + 2 + cs_l_offset[3])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + 2 + cs_l_offset[4])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + 2 + cs_l_offset[5])); //(Row3): FMA operations of b3 with elements of indices from (3, 0) uptill (7, 0) mat_b_col[3] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[2], mat_b_col[3]);//d = c - (a*b) mat_b_col[4] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[2], mat_b_col[4]);//d = c - (a*b) mat_b_col[5] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[2], mat_b_col[5]);//d = c - (a*b) mat_b_col[6] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[2], mat_b_col[6]);//d = c - (a*b) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[2], mat_b_col[7]);//d = c - (a*b) mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + 3 + cs_l_offset[2])); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + 3 + cs_l_offset[3])); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + 3 + cs_l_offset[4])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + 3 + cs_l_offset[5])); //(Row4): FMA operations of b4 with elements of indices from (4, 0) uptill (7, 0) mat_b_col[4] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[3], mat_b_col[4]);//d = c - (a*b) mat_b_col[5] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[3], mat_b_col[5]);//d = c - (a*b) mat_b_col[6] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[3], mat_b_col[6]);//d = c - (a*b) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[3], mat_b_col[7]);//d = c - (a*b) mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + 4 + cs_l_offset[3])); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + 4 + cs_l_offset[4])); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + 4 + cs_l_offset[5])); //(Row5): FMA operations of b5 with elements of indices from (5, 0) uptill (7, 0) mat_b_col[5] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[4], mat_b_col[5]);//d = c - (a*b) mat_b_col[6] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[4], mat_b_col[6]);//d = c - (a*b) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[4], mat_b_col[7]);//d = c - (a*b) mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + 5 + cs_l_offset[4])); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + 5 + cs_l_offset[5])); //(Row6): FMA operations of b6 with elements of indices from (6, 0) uptill (7, 0) mat_b_col[6] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[5], mat_b_col[6]);//d = c - (a*b) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[5], mat_b_col[7]);//d = c - (a*b) mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + 6 + cs_l_offset[5])); //(Row7): FMA operations of b7 with elements of index (7, 0) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[6], mat_b_col[7]);//d = c - (a*b) //////////////////////////////////////////////////////////////////////////////// /* transpose steps start */ ////unpacklow//// mat_b_rearr[0] = _mm256_unpacklo_ps(mat_b_col[0], mat_b_col[1]); mat_b_rearr[1] = _mm256_unpacklo_ps(mat_b_col[2], mat_b_col[3]); mat_b_rearr[2] = _mm256_unpacklo_ps(mat_b_col[4], mat_b_col[5]); mat_b_rearr[3] = _mm256_unpacklo_ps(mat_b_col[6], mat_b_col[7]); //Rearrange low elements #if REARRANGE_SHFL == 1 mat_b_rearr[4] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x44); mat_b_rearr[5] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0xEE); mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x44); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0xEE); #else mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x4E); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x4E); mat_b_rearr[4] = _mm256_blend_ps(mat_b_rearr[0], mat_b_rearr[6], 0xCC); mat_b_rearr[5] = _mm256_blend_ps(mat_b_rearr[1], mat_b_rearr[6], 0x33); mat_b_rearr[6] = _mm256_blend_ps(mat_b_rearr[2], mat_b_rearr[7], 0xCC); mat_b_rearr[7] = _mm256_blend_ps(mat_b_rearr[3], mat_b_rearr[7], 0x33); #endif //Merge rearranged low elements into complete rows mat_b_rearr[0] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x20); mat_b_rearr[4] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x31); mat_b_rearr[1] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x20); mat_b_rearr[5] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x31); ////unpackhigh//// mat_b_col[0] = _mm256_unpackhi_ps(mat_b_col[0], mat_b_col[1]); mat_b_col[1] = _mm256_unpackhi_ps(mat_b_col[2], mat_b_col[3]); mat_b_col[2] = _mm256_unpackhi_ps(mat_b_col[4], mat_b_col[5]); mat_b_col[3] = _mm256_unpackhi_ps(mat_b_col[6], mat_b_col[7]); //Rearrange high elements #if REARRANGE_SHFL == 1 mat_b_col[4] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x44); mat_b_col[5] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0xEE); mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x44); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0xEE); #else mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x4E); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x4E); mat_b_col[4] = _mm256_blend_ps(mat_b_col[0], mat_b_col[6], 0xCC); mat_b_col[5] = _mm256_blend_ps(mat_b_col[1], mat_b_col[6], 0x33); mat_b_col[6] = _mm256_blend_ps(mat_b_col[2], mat_b_col[7], 0xCC); mat_b_col[7] = _mm256_blend_ps(mat_b_col[3], mat_b_col[7], 0x33); #endif //Merge rearranged high elements into complete rows mat_b_rearr[2] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x20); mat_b_rearr[6] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x31); mat_b_rearr[3] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x20); mat_b_rearr[7] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x31); /* transpose steps end */ //Store the computed B columns _mm256_storeu_ps((float *)ptr_b_dup, mat_b_rearr[0]); _mm256_storeu_ps((float *)(ptr_b_dup + (cs_b)), mat_b_rearr[1]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[0]), mat_b_rearr[2]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[1]), mat_b_rearr[3]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[2]), mat_b_rearr[4]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[3]), mat_b_rearr[5]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[4]), mat_b_rearr[6]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[5]), mat_b_rearr[7]); i += cs_b_offset[6]; ptr_b_dup += cs_b_offset[6]; //i += 8; //ptr_b_dup += 8; } //c = 0; /***************** first set of 8 cols of B processing done *****************/ ptr_b_dup = ptr_b; i1 = 0; //Start loop for cols of B to be processed in size of blk_width for (j = 8; j < numRows_lb; j += 8)//m :- 8x8 block row { ptr_l += cs_l_offset[6]; //ptr_b += j; //ptr_b_dup += 8; ptr_b_dup += 8; i1 += 8; i = i1; i2 = 0; for (r = 0; r < numCols_b; r += GEMM_BLK_V1) { #if GEMM_ACCUM_A //Read 8 cols of B columns of Block-to-be-solved mat_b_col[0] = _mm256_loadu_ps((float const *)ptr_b + i); mat_b_col[1] = _mm256_loadu_ps((float const *)(ptr_b + cs_b + i)); mat_b_col[2] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[0] + i)); mat_b_col[3] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[1] + i)); mat_b_col[4] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[2] + i)); mat_b_col[5] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[3] + i)); mat_b_col[6] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[4] + i)); mat_b_col[7] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[5] + i)); /* transpose steps start */ ////unpacklow//// mat_b_rearr[0] = _mm256_unpacklo_ps(mat_b_col[0], mat_b_col[1]); mat_b_rearr[1] = _mm256_unpacklo_ps(mat_b_col[2], mat_b_col[3]); mat_b_rearr[2] = _mm256_unpacklo_ps(mat_b_col[4], mat_b_col[5]); mat_b_rearr[3] = _mm256_unpacklo_ps(mat_b_col[6], mat_b_col[7]); //Rearrange low elements #if REARRANGE_SHFL == 1 mat_b_rearr[4] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x44); mat_b_rearr[5] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0xEE); mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x44); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0xEE); #else mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x4E); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x4E); mat_b_rearr[4] = _mm256_blend_ps(mat_b_rearr[0], mat_b_rearr[6], 0xCC); mat_b_rearr[5] = _mm256_blend_ps(mat_b_rearr[1], mat_b_rearr[6], 0x33); mat_b_rearr[6] = _mm256_blend_ps(mat_b_rearr[2], mat_b_rearr[7], 0xCC); mat_b_rearr[7] = _mm256_blend_ps(mat_b_rearr[3], mat_b_rearr[7], 0x33); #endif //Merge rearranged low elements into complete rows mat_b_rearr[0] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x20); mat_b_rearr[4] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x31); mat_b_rearr[1] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x20); mat_b_rearr[5] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x31); ////unpackhigh//// mat_b_col[0] = _mm256_unpackhi_ps(mat_b_col[0], mat_b_col[1]); mat_b_col[1] = _mm256_unpackhi_ps(mat_b_col[2], mat_b_col[3]); mat_b_col[2] = _mm256_unpackhi_ps(mat_b_col[4], mat_b_col[5]); mat_b_col[3] = _mm256_unpackhi_ps(mat_b_col[6], mat_b_col[7]); //Rearrange high elements #if REARRANGE_SHFL == 1 mat_b_col[4] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x44); mat_b_col[5] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0xEE); mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x44); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0xEE); #else mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x4E); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x4E); mat_b_col[4] = _mm256_blend_ps(mat_b_col[0], mat_b_col[6], 0xCC); mat_b_col[5] = _mm256_blend_ps(mat_b_col[1], mat_b_col[6], 0x33); mat_b_col[6] = _mm256_blend_ps(mat_b_col[2], mat_b_col[7], 0xCC); mat_b_col[7] = _mm256_blend_ps(mat_b_col[3], mat_b_col[7], 0x33); #endif //Merge rearranged high elements into complete rows mat_b_rearr[2] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x20); mat_b_rearr[6] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x31); mat_b_rearr[3] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x20); mat_b_rearr[7] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x31); /* transpose steps end */ #endif //i = 0; ptr_l_dup = ptr_l; i4 = i2; for (l = 0; l < j; l += 8) // move across m { //for (k = 0; k < numCols_b; k += 8) // move across n for the same value of l (index of m) //{ /////////////////// Partial Lower 8x8 block trsm of B //Read current 8 cols of B columns from specified 8x8 current-block of B mat_a_blk_elems[0] = _mm256_loadu_ps((float const *)ptr_b + i4); mat_a_blk_elems[1] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b)); mat_a_blk_elems[2] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[0])); mat_a_blk_elems[3] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[1])); mat_a_blk_elems[4] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[2])); mat_a_blk_elems[5] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[3])); mat_a_blk_elems[6] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[4])); mat_a_blk_elems[7] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[5])); /* transpose steps start */ ////unpacklow//// mat_b_col[0] = _mm256_unpacklo_ps(mat_a_blk_elems[0], mat_a_blk_elems[1]); mat_b_col[1] = _mm256_unpacklo_ps(mat_a_blk_elems[2], mat_a_blk_elems[3]); mat_b_col[2] = _mm256_unpacklo_ps(mat_a_blk_elems[4], mat_a_blk_elems[5]); mat_b_col[3] = _mm256_unpacklo_ps(mat_a_blk_elems[6], mat_a_blk_elems[7]); //Rearrange low elements #if REARRANGE_SHFL == 1 mat_b_col[4] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x44); mat_b_col[5] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0xEE); mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x44); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0xEE); #else mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x4E); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x4E); mat_b_col[4] = _mm256_blend_ps(mat_b_col[0], mat_b_col[6], 0xCC); mat_b_col[5] = _mm256_blend_ps(mat_b_col[1], mat_b_col[6], 0x33); mat_b_col[6] = _mm256_blend_ps(mat_b_col[2], mat_b_col[7], 0xCC); mat_b_col[7] = _mm256_blend_ps(mat_b_col[3], mat_b_col[7], 0x33); #endif //Merge rearranged low elements into complete rows mat_b_col[0] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x20); mat_b_col[4] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x31); mat_b_col[1] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x20); mat_b_col[5] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x31); ////unpackhigh//// mat_a_blk_elems[0] = _mm256_unpackhi_ps(mat_a_blk_elems[0], mat_a_blk_elems[1]); mat_a_blk_elems[1] = _mm256_unpackhi_ps(mat_a_blk_elems[2], mat_a_blk_elems[3]); mat_a_blk_elems[2] = _mm256_unpackhi_ps(mat_a_blk_elems[4], mat_a_blk_elems[5]); mat_a_blk_elems[3] = _mm256_unpackhi_ps(mat_a_blk_elems[6], mat_a_blk_elems[7]); //Rearrange high elements #if REARRANGE_SHFL == 1 mat_a_blk_elems[4] = _mm256_shuffle_ps(mat_a_blk_elems[0], mat_a_blk_elems[1], 0x44); mat_a_blk_elems[5] = _mm256_shuffle_ps(mat_a_blk_elems[0], mat_a_blk_elems[1], 0xEE); mat_a_blk_elems[6] = _mm256_shuffle_ps(mat_a_blk_elems[2], mat_a_blk_elems[3], 0x44); mat_a_blk_elems[7] = _mm256_shuffle_ps(mat_a_blk_elems[2], mat_a_blk_elems[3], 0xEE); #else mat_a_blk_elems[6] = _mm256_shuffle_ps(mat_a_blk_elems[0], mat_a_blk_elems[1], 0x4E); mat_a_blk_elems[7] = _mm256_shuffle_ps(mat_a_blk_elems[2], mat_a_blk_elems[3], 0x4E); mat_a_blk_elems[4] = _mm256_blend_ps(mat_a_blk_elems[0], mat_a_blk_elems[6], 0xCC); mat_a_blk_elems[5] = _mm256_blend_ps(mat_a_blk_elems[1], mat_a_blk_elems[6], 0x33); mat_a_blk_elems[6] = _mm256_blend_ps(mat_a_blk_elems[2], mat_a_blk_elems[7], 0xCC); mat_a_blk_elems[7] = _mm256_blend_ps(mat_a_blk_elems[3], mat_a_blk_elems[7], 0x33); #endif //Merge rearranged high elements into complete rows mat_b_col[2] = _mm256_permute2f128_ps(mat_a_blk_elems[4], mat_a_blk_elems[6], 0x20); mat_b_col[6] = _mm256_permute2f128_ps(mat_a_blk_elems[4], mat_a_blk_elems[6], 0x31); mat_b_col[3] = _mm256_permute2f128_ps(mat_a_blk_elems[5], mat_a_blk_elems[7], 0x20); mat_b_col[7] = _mm256_permute2f128_ps(mat_a_blk_elems[5], mat_a_blk_elems[7], 0x31); /* transpose steps end */ //Broadcast A8,0 to A15,0 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[0])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[1])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[2])); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[3])); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[4])); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[5])); //i4 = k >> 3; ptr_l_dup++; #if GEMM_ACCUM_A //(Row8): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[0], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[0], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[0], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[0], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[0], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[0], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[0], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[0], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_mul_ps(mat_a_blk_elems[0], mat_b_col[0]); mat_b_rearr[1] = _mm256_mul_ps(mat_a_blk_elems[1], mat_b_col[0]); mat_b_rearr[2] = _mm256_mul_ps(mat_a_blk_elems[2], mat_b_col[0]); mat_b_rearr[3] = _mm256_mul_ps(mat_a_blk_elems[3], mat_b_col[0]); mat_b_rearr[4] = _mm256_mul_ps(mat_a_blk_elems[4], mat_b_col[0]); mat_b_rearr[5] = _mm256_mul_ps(mat_a_blk_elems[5], mat_b_col[0]); mat_b_rearr[6] = _mm256_mul_ps(mat_a_blk_elems[6], mat_b_col[0]); mat_b_rearr[7] = _mm256_mul_ps(mat_a_blk_elems[7], mat_b_col[0]); #endif //Broadcast A21 to A71 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[0])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[1])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[2])); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[3])); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[4])); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[5])); ptr_l_dup++; #if GEMM_ACCUM_A //(Row9): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[1], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[1], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[1], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[1], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[1], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[1], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[1], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[1], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[1], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[1], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[1], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[1], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[1], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[1], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[1], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[1], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A8,2 to A15,2 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[0])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[1])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[2])); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[3])); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[4])); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[5])); ptr_l_dup++; #if GEMM_ACCUM_A //(Row10): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[2], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[2], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[2], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[2], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[2], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[2], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[2], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[2], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[2], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[2], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[2], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[2], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[2], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[2], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[2], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[2], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A8,3 to A15,3 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[0])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[1])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[2])); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[3])); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[4])); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[5])); ptr_l_dup++; #if GEMM_ACCUM_A //(Row11): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[3], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[3], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[3], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[3], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[3], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[3], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[3], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[3], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[3], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[3], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[3], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[3], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[3], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[3], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[3], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[3], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A8,4 to A15,4 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[0])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[1])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[2])); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[3])); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[4])); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[5])); ptr_l_dup++; #if GEMM_ACCUM_A //(Row12): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[4], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[4], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[4], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[4], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[4], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[4], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[4], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[4], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[4], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[4], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[4], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[4], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[4], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[4], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[4], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[4], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A8,5 to A15,5 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[0])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[1])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[2])); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[3])); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[4])); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[5])); ptr_l_dup++; #if GEMM_ACCUM_A //(Row13): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[5], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[5], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[5], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[5], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[5], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[5], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[5], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[5], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[5], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[5], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[5], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[5], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[5], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[5], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[5], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[5], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A8,6 to A15,6 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[0])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[1])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[2])); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[3])); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[4])); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[5])); ptr_l_dup++; #if GEMM_ACCUM_A //(Row14): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[6], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[6], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[6], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[6], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[6], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[6], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[6], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[6], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[6], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[6], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[6], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[6], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[6], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[6], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[6], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[6], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A8,7 to A15,7 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[0])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[1])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[2])); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[3])); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[4])); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[5])); ptr_l_dup++; #if GEMM_ACCUM_A //(Row15): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[7], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[7], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[7], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[7], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[7], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[7], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[7], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[7], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[7], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[7], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[7], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[7], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[7], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[7], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[7], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[7], mat_b_rearr[7]);//d = c - (a*b) #endif //end loop of cols //} //i2 += cs_b_offset[6]; i4 += 8; } //trsm solve k = 0; //for (i2 = 0; i2 < numCols_b; i2 += 8) //{ //i2 = i1 + r; /////////////////// Complete Lower 8x8 block trsm of B :- lower 8x8 block of B with lower right 8x8 block of A #if !GEMM_ACCUM_A //Read 8 cols of B columns of Block-to-be-solved mat_b_rearr[0] = _mm256_loadu_ps((float const *)ptr_b + i); mat_b_rearr[1] = _mm256_loadu_ps((float const *)(ptr_b + cs_b + i)); mat_b_rearr[2] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[0] + i)); mat_b_rearr[3] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[1] + i)); mat_b_rearr[4] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[2] + i)); mat_b_rearr[5] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[3] + i)); mat_b_rearr[6] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[4] + i)); mat_b_rearr[7] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[5] + i)); /* transpose steps start */ ////unpacklow//// mat_b_col[0] = _mm256_unpacklo_ps(mat_b_rearr[0], mat_b_rearr[1]); mat_b_col[1] = _mm256_unpacklo_ps(mat_b_rearr[2], mat_b_rearr[3]); mat_b_col[2] = _mm256_unpacklo_ps(mat_b_rearr[4], mat_b_rearr[5]); mat_b_col[3] = _mm256_unpacklo_ps(mat_b_rearr[6], mat_b_rearr[7]); //Rearrange low elements #if REARRANGE_SHFL == 1 mat_b_col[4] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x44); mat_b_col[5] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0xEE); mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x44); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0xEE); #else mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x4E); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x4E); mat_b_col[4] = _mm256_blend_ps(mat_b_col[0], mat_b_col[6], 0xCC); mat_b_col[5] = _mm256_blend_ps(mat_b_col[1], mat_b_col[6], 0x33); mat_b_col[6] = _mm256_blend_ps(mat_b_col[2], mat_b_col[7], 0xCC); mat_b_col[7] = _mm256_blend_ps(mat_b_col[3], mat_b_col[7], 0x33); #endif //Merge rearranged low elements into complete rows mat_b_col[0] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x20); mat_b_col[4] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x31); mat_b_col[1] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x20); mat_b_col[5] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x31); ////unpackhigh//// mat_b_rearr[0] = _mm256_unpackhi_ps(mat_b_rearr[0], mat_b_rearr[1]); mat_b_rearr[1] = _mm256_unpackhi_ps(mat_b_rearr[2], mat_b_rearr[3]); mat_b_rearr[2] = _mm256_unpackhi_ps(mat_b_rearr[4], mat_b_rearr[5]); mat_b_rearr[3] = _mm256_unpackhi_ps(mat_b_rearr[6], mat_b_rearr[7]); //Rearrange high elements #if REARRANGE_SHFL == 1 mat_b_rearr[4] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x44); mat_b_rearr[5] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0xEE); mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x44); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0xEE); #else mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x4E); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x4E); mat_b_rearr[4] = _mm256_blend_ps(mat_b_rearr[0], mat_b_rearr[6], 0xCC); mat_b_rearr[5] = _mm256_blend_ps(mat_b_rearr[1], mat_b_rearr[6], 0x33); mat_b_rearr[6] = _mm256_blend_ps(mat_b_rearr[2], mat_b_rearr[7], 0xCC); mat_b_rearr[7] = _mm256_blend_ps(mat_b_rearr[3], mat_b_rearr[7], 0x33); #endif //Merge rearranged high elements into complete rows mat_b_col[2] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x20); mat_b_col[6] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x31); mat_b_col[3] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x20); mat_b_col[7] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x31); /* transpose steps end */ #endif //Broadcast A10 to A70 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[0])); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[1])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[2])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[3])); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[4])); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[5])); //i += cs_l; #if GEMM_ACCUM_A //(Row0): already done #else mat_b_rearr[0] = _mm256_sub_ps(mat_b_col[0], mat_b_rearr[0]); #endif #if GEMM_ACCUM_A mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[0], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[0], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_rearr[0], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_rearr[0], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_rearr[0], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_rearr[0], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_rearr[0], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[1] = _mm256_sub_ps(mat_b_col[1], mat_b_rearr[1]); mat_b_rearr[2] = _mm256_sub_ps(mat_b_col[2], mat_b_rearr[2]); mat_b_rearr[3] = _mm256_sub_ps(mat_b_col[3], mat_b_rearr[3]); mat_b_rearr[4] = _mm256_sub_ps(mat_b_col[4], mat_b_rearr[4]); mat_b_rearr[5] = _mm256_sub_ps(mat_b_col[5], mat_b_rearr[5]); mat_b_rearr[6] = _mm256_sub_ps(mat_b_col[6], mat_b_rearr[6]); mat_b_rearr[7] = _mm256_sub_ps(mat_b_col[7], mat_b_rearr[7]); //(Row1): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[0], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[0], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_rearr[0], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_rearr[0], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_rearr[0], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_rearr[0], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_rearr[0], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A21 to A71 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 1 + cs_l_offset[0])); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 1 + cs_l_offset[1])); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 1 + cs_l_offset[2])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 1 + cs_l_offset[3])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 1 + cs_l_offset[4])); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 1 + cs_l_offset[5])); //i += cs_l; //(Row2): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[1], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[1], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_rearr[1], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_rearr[1], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_rearr[1], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_rearr[1], mat_b_rearr[7]);//d = c - (a*b) //Broadcast A32 to A72 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 2 + cs_l_offset[1])); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 2 + cs_l_offset[2])); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 2 + cs_l_offset[3])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 2 + cs_l_offset[4])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 2 + cs_l_offset[5])); //i += cs_l; //(Row3): FMA operations of b3 with elements of indices from (3, 0) uptill (7, 0) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[2], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[2], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_rearr[2], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_rearr[2], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_rearr[2], mat_b_rearr[7]);//d = c - (a*b) //Broadcast A43 to A73 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 3 + cs_l_offset[2])); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 3 + cs_l_offset[3])); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 3 + cs_l_offset[4])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 3 + cs_l_offset[5])); //i += cs_l; //(Row4): FMA operations of b4 with elements of indices from (4, 0) uptill (7, 0) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[3], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[3], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_rearr[3], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_rearr[3], mat_b_rearr[7]);//d = c - (a*b) //Broadcast A54 to A74 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 4 + cs_l_offset[3])); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 4 + cs_l_offset[4])); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 4 + cs_l_offset[5])); //i += cs_l; //(Row5): FMA operations of b5 with elements of indices from (5, 0) uptill (7, 0) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[4], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[4], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_rearr[4], mat_b_rearr[7]);//d = c - (a*b) //Broadcast A65 to A75 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 5 + cs_l_offset[4])); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 5 + cs_l_offset[5])); //i += cs_l; //(Row6): FMA operations of b6 with elements of indices from (6, 0) uptill (7, 0) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[5], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[5], mat_b_rearr[7]);//d = c - (a*b) //Broadcast A76 to register mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 6 + cs_l_offset[5])); //(Row7): FMA operations of b7 with elements of index (7, 0) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[6], mat_b_rearr[7]);//d = c - (a*b) //////////////////////////////////////////////////////////////////////////////// /* transpose steps start */ ////unpacklow//// mat_b_col[0] = _mm256_unpacklo_ps(mat_b_rearr[0], mat_b_rearr[1]); mat_b_col[1] = _mm256_unpacklo_ps(mat_b_rearr[2], mat_b_rearr[3]); mat_b_col[2] = _mm256_unpacklo_ps(mat_b_rearr[4], mat_b_rearr[5]); mat_b_col[3] = _mm256_unpacklo_ps(mat_b_rearr[6], mat_b_rearr[7]); //Rearrange low elements #if REARRANGE_SHFL == 1 mat_b_col[4] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x44); mat_b_col[5] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0xEE); mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x44); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0xEE); #else mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x4E); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x4E); mat_b_col[4] = _mm256_blend_ps(mat_b_col[0], mat_b_col[6], 0xCC); mat_b_col[5] = _mm256_blend_ps(mat_b_col[1], mat_b_col[6], 0x33); mat_b_col[6] = _mm256_blend_ps(mat_b_col[2], mat_b_col[7], 0xCC); mat_b_col[7] = _mm256_blend_ps(mat_b_col[3], mat_b_col[7], 0x33); #endif //Merge rearranged low elements into complete rows mat_b_col[0] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x20); mat_b_col[4] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x31); mat_b_col[1] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x20); mat_b_col[5] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x31); ////unpackhigh//// mat_b_rearr[0] = _mm256_unpackhi_ps(mat_b_rearr[0], mat_b_rearr[1]); mat_b_rearr[1] = _mm256_unpackhi_ps(mat_b_rearr[2], mat_b_rearr[3]); mat_b_rearr[2] = _mm256_unpackhi_ps(mat_b_rearr[4], mat_b_rearr[5]); mat_b_rearr[3] = _mm256_unpackhi_ps(mat_b_rearr[6], mat_b_rearr[7]); //Rearrange high elements #if REARRANGE_SHFL == 1 mat_b_rearr[4] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x44); mat_b_rearr[5] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0xEE); mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x44); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0xEE); #else mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x4E); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x4E); mat_b_rearr[4] = _mm256_blend_ps(mat_b_rearr[0], mat_b_rearr[6], 0xCC); mat_b_rearr[5] = _mm256_blend_ps(mat_b_rearr[1], mat_b_rearr[6], 0x33); mat_b_rearr[6] = _mm256_blend_ps(mat_b_rearr[2], mat_b_rearr[7], 0xCC); mat_b_rearr[7] = _mm256_blend_ps(mat_b_rearr[3], mat_b_rearr[7], 0x33); #endif //Merge rearranged high elements into complete rows mat_b_col[2] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x20); mat_b_col[6] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x31); mat_b_col[3] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x20); mat_b_col[7] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x31); /* transpose steps end */ //Store the computed B columns _mm256_storeu_ps((float *)ptr_b_dup + i2, mat_b_col[0]); _mm256_storeu_ps((float *)(ptr_b_dup + (cs_b)+i2), mat_b_col[1]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[0] + i2), mat_b_col[2]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[1] + i2), mat_b_col[3]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[2] + i2), mat_b_col[4]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[3] + i2), mat_b_col[5]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[4] + i2), mat_b_col[6]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[5] + i2), mat_b_col[7]); //printf("writing B => m[%d], n[%d], [%f]\n", j, k, *(ptr_b_dup + k)); k++; //} i += cs_b_offset[6]; i2 += cs_b_offset[6]; } } //numRows of A ///////////////////loop ends ///////////////////// } static void trsm_AutXB_block_allSmallSizedMatrices_alpha_unitDiag(float *ptr_l, float *ptr_b, int numRows_lb, int numCols_b, int rs_l, int rs_b, int cs_l, int cs_b, float alpha) { //float ones = 1.0; int i, i1, i2, i4, j, k, l, r; int cs_b_offset[7]; int cs_l_offset[7]; float *ptr_b_dup, *ptr_l_dup; //57 number of ymm(256 bits) registers used __m256 mat_b_col[8]; __m256 mat_b_rearr[8]; __m256 mat_a_blk_elems[8]; //__m256 mat_a_diag_inv[8]; //__m256 reciprocal_diags[2]; __m256 alphaReg; alphaReg = _mm256_broadcast_ss((float const *)&alpha); // ---> considering that the matrix size is multiple of 16 rows and 8 cols <--- // //L matrix offsets cs_l_offset[0] = (cs_l << 1); cs_l_offset[1] = cs_l + cs_l_offset[0]; cs_l_offset[2] = (cs_l << 2); cs_l_offset[3] = cs_l + cs_l_offset[2]; cs_l_offset[4] = cs_l_offset[0] + cs_l_offset[2]; cs_l_offset[5] = cs_l + cs_l_offset[4]; cs_l_offset[6] = (cs_l_offset[5] + cs_l); cs_b_offset[0] = (cs_b << 1); cs_b_offset[1] = cs_b + cs_b_offset[0]; cs_b_offset[2] = (cs_b << 2); cs_b_offset[3] = cs_b + cs_b_offset[2]; cs_b_offset[4] = cs_b_offset[0] + cs_b_offset[2]; cs_b_offset[5] = cs_b + cs_b_offset[4]; cs_b_offset[6] = (cs_b_offset[5] + cs_b); #if 0 //Broadcast A10 to A70 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + 1)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + 2)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + 3)); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + 4)); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + 5)); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l + 6)); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l + 7)); //Broadcast A21 to A71 to registers mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 2)); mat_a_blk_elems[8] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 3)); mat_a_blk_elems[9] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 4)); mat_a_blk_elems[10] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 5)); mat_a_blk_elems[11] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 6)); mat_a_blk_elems[12] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l + 7)); //Broadcast A32 to A72 to registers mat_a_blk_elems[13] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 3)); mat_a_blk_elems[14] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 4)); mat_a_blk_elems[15] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 5)); mat_a_blk_elems[16] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 6)); mat_a_blk_elems[17] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0] + 7)); //Broadcast A43 to A73 to registers mat_a_blk_elems[18] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 4)); mat_a_blk_elems[19] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 5)); mat_a_blk_elems[20] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 6)); mat_a_blk_elems[21] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1] + 7)); //Broadcast A54 to A74 to registers mat_a_blk_elems[22] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + 5)); mat_a_blk_elems[23] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + 6)); mat_a_blk_elems[24] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2] + 7)); //Broadcast A65 to A75 to registers mat_a_blk_elems[25] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + 6)); mat_a_blk_elems[26] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3] + 7)); //Broadcast A76 to register mat_a_blk_elems[27] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[4] + 7)); #endif /***************** first set of 8 rows of B processing starts *****************/ ptr_b_dup = ptr_b; i = 0; for (j = 0; j < numCols_b; j += 8) { /////////////////// Complete Upper 8x8 block trsm of B :- upper 8x8 block of B with upper 8x8 block of A //read 8x8 block of B into registers mat_b_rearr[0] = _mm256_loadu_ps((float const *)ptr_b + i); mat_b_rearr[1] = _mm256_loadu_ps((float const *)(ptr_b + cs_b + i)); mat_b_rearr[2] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[0] + i)); mat_b_rearr[3] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[1] + i)); mat_b_rearr[4] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[2] + i)); mat_b_rearr[5] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[3] + i)); mat_b_rearr[6] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[4] + i)); mat_b_rearr[7] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[5] + i)); /* transpose steps start */ ////unpacklow//// mat_b_col[0] = _mm256_unpacklo_ps(mat_b_rearr[0], mat_b_rearr[1]); mat_b_col[1] = _mm256_unpacklo_ps(mat_b_rearr[2], mat_b_rearr[3]); mat_b_col[2] = _mm256_unpacklo_ps(mat_b_rearr[4], mat_b_rearr[5]); mat_b_col[3] = _mm256_unpacklo_ps(mat_b_rearr[6], mat_b_rearr[7]); //Rearrange low elements #if REARRANGE_SHFL == 1 mat_b_col[4] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x44); mat_b_col[5] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0xEE); mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x44); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0xEE); #else mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x4E); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x4E); mat_b_col[4] = _mm256_blend_ps(mat_b_col[0], mat_b_col[6], 0xCC); mat_b_col[5] = _mm256_blend_ps(mat_b_col[1], mat_b_col[6], 0x33); mat_b_col[6] = _mm256_blend_ps(mat_b_col[2], mat_b_col[7], 0xCC); mat_b_col[7] = _mm256_blend_ps(mat_b_col[3], mat_b_col[7], 0x33); #endif //Merge rearranged low elements into complete rows mat_b_col[0] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x20); mat_b_col[4] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x31); mat_b_col[1] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x20); mat_b_col[5] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x31); ////unpackhigh//// mat_b_rearr[0] = _mm256_unpackhi_ps(mat_b_rearr[0], mat_b_rearr[1]); mat_b_rearr[1] = _mm256_unpackhi_ps(mat_b_rearr[2], mat_b_rearr[3]); mat_b_rearr[2] = _mm256_unpackhi_ps(mat_b_rearr[4], mat_b_rearr[5]); mat_b_rearr[3] = _mm256_unpackhi_ps(mat_b_rearr[6], mat_b_rearr[7]); //Rearrange high elements #if REARRANGE_SHFL == 1 mat_b_rearr[4] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x44); mat_b_rearr[5] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0xEE); mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x44); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0xEE); #else mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x4E); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x4E); mat_b_rearr[4] = _mm256_blend_ps(mat_b_rearr[0], mat_b_rearr[6], 0xCC); mat_b_rearr[5] = _mm256_blend_ps(mat_b_rearr[1], mat_b_rearr[6], 0x33); mat_b_rearr[6] = _mm256_blend_ps(mat_b_rearr[2], mat_b_rearr[7], 0xCC); mat_b_rearr[7] = _mm256_blend_ps(mat_b_rearr[3], mat_b_rearr[7], 0x33); #endif //Merge rearranged high elements into complete rows mat_b_col[2] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x20); mat_b_col[6] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x31); mat_b_col[3] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x20); mat_b_col[7] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x31); /* transpose steps end */ mat_b_col[0] = _mm256_mul_ps(mat_b_col[0], alphaReg); mat_b_col[1] = _mm256_mul_ps(mat_b_col[1], alphaReg); mat_b_col[2] = _mm256_mul_ps(mat_b_col[2], alphaReg); mat_b_col[3] = _mm256_mul_ps(mat_b_col[3], alphaReg); mat_b_col[4] = _mm256_mul_ps(mat_b_col[4], alphaReg); mat_b_col[5] = _mm256_mul_ps(mat_b_col[5], alphaReg); mat_b_col[6] = _mm256_mul_ps(mat_b_col[6], alphaReg); mat_b_col[7] = _mm256_mul_ps(mat_b_col[7], alphaReg); //(Row0) mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[0])); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[1])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[2])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[3])); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[4])); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l + cs_l_offset[5])); //(Row1): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_col[1] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[0], mat_b_col[1]);//d = c - (a*b) mat_b_col[2] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[0], mat_b_col[2]);//d = c - (a*b) mat_b_col[3] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[0], mat_b_col[3]);//d = c - (a*b) mat_b_col[4] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[0], mat_b_col[4]);//d = c - (a*b) mat_b_col[5] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[0], mat_b_col[5]);//d = c - (a*b) mat_b_col[6] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[0], mat_b_col[6]);//d = c - (a*b) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[0], mat_b_col[7]);//d = c - (a*b) mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + 1 + cs_l_offset[0])); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + 1 + cs_l_offset[1])); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + 1 + cs_l_offset[2])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + 1 + cs_l_offset[3])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + 1 + cs_l_offset[4])); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l + 1 + cs_l_offset[5])); //(Row2): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_col[2] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[1], mat_b_col[2]);//d = c - (a*b) mat_b_col[3] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[1], mat_b_col[3]);//d = c - (a*b) mat_b_col[4] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[1], mat_b_col[4]);//d = c - (a*b) mat_b_col[5] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[1], mat_b_col[5]);//d = c - (a*b) mat_b_col[6] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[1], mat_b_col[6]);//d = c - (a*b) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[1], mat_b_col[7]);//d = c - (a*b) mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + 2 + cs_l_offset[1])); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + 2 + cs_l_offset[2])); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + 2 + cs_l_offset[3])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + 2 + cs_l_offset[4])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l + 2 + cs_l_offset[5])); //(Row3): FMA operations of b3 with elements of indices from (3, 0) uptill (7, 0) mat_b_col[3] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[2], mat_b_col[3]);//d = c - (a*b) mat_b_col[4] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[2], mat_b_col[4]);//d = c - (a*b) mat_b_col[5] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[2], mat_b_col[5]);//d = c - (a*b) mat_b_col[6] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[2], mat_b_col[6]);//d = c - (a*b) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[2], mat_b_col[7]);//d = c - (a*b) mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + 3 + cs_l_offset[2])); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + 3 + cs_l_offset[3])); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + 3 + cs_l_offset[4])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l + 3 + cs_l_offset[5])); //(Row4): FMA operations of b4 with elements of indices from (4, 0) uptill (7, 0) mat_b_col[4] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[3], mat_b_col[4]);//d = c - (a*b) mat_b_col[5] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[3], mat_b_col[5]);//d = c - (a*b) mat_b_col[6] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[3], mat_b_col[6]);//d = c - (a*b) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[3], mat_b_col[7]);//d = c - (a*b) mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + 4 + cs_l_offset[3])); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + 4 + cs_l_offset[4])); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l + 4 + cs_l_offset[5])); //(Row5): FMA operations of b5 with elements of indices from (5, 0) uptill (7, 0) mat_b_col[5] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[4], mat_b_col[5]);//d = c - (a*b) mat_b_col[6] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[4], mat_b_col[6]);//d = c - (a*b) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[4], mat_b_col[7]);//d = c - (a*b) mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + 5 + cs_l_offset[4])); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l + 5 + cs_l_offset[5])); //(Row6): FMA operations of b6 with elements of indices from (6, 0) uptill (7, 0) mat_b_col[6] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[5], mat_b_col[6]);//d = c - (a*b) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[5], mat_b_col[7]);//d = c - (a*b) mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l + 6 + cs_l_offset[5])); //(Row7): FMA operations of b7 with elements of index (7, 0) mat_b_col[7] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[6], mat_b_col[7]);//d = c - (a*b) //////////////////////////////////////////////////////////////////////////////// /* transpose steps start */ ////unpacklow//// mat_b_rearr[0] = _mm256_unpacklo_ps(mat_b_col[0], mat_b_col[1]); mat_b_rearr[1] = _mm256_unpacklo_ps(mat_b_col[2], mat_b_col[3]); mat_b_rearr[2] = _mm256_unpacklo_ps(mat_b_col[4], mat_b_col[5]); mat_b_rearr[3] = _mm256_unpacklo_ps(mat_b_col[6], mat_b_col[7]); //Rearrange low elements #if REARRANGE_SHFL == 1 mat_b_rearr[4] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x44); mat_b_rearr[5] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0xEE); mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x44); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0xEE); #else mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x4E); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x4E); mat_b_rearr[4] = _mm256_blend_ps(mat_b_rearr[0], mat_b_rearr[6], 0xCC); mat_b_rearr[5] = _mm256_blend_ps(mat_b_rearr[1], mat_b_rearr[6], 0x33); mat_b_rearr[6] = _mm256_blend_ps(mat_b_rearr[2], mat_b_rearr[7], 0xCC); mat_b_rearr[7] = _mm256_blend_ps(mat_b_rearr[3], mat_b_rearr[7], 0x33); #endif //Merge rearranged low elements into complete rows mat_b_rearr[0] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x20); mat_b_rearr[4] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x31); mat_b_rearr[1] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x20); mat_b_rearr[5] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x31); ////unpackhigh//// mat_b_col[0] = _mm256_unpackhi_ps(mat_b_col[0], mat_b_col[1]); mat_b_col[1] = _mm256_unpackhi_ps(mat_b_col[2], mat_b_col[3]); mat_b_col[2] = _mm256_unpackhi_ps(mat_b_col[4], mat_b_col[5]); mat_b_col[3] = _mm256_unpackhi_ps(mat_b_col[6], mat_b_col[7]); //Rearrange high elements #if REARRANGE_SHFL == 1 mat_b_col[4] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x44); mat_b_col[5] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0xEE); mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x44); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0xEE); #else mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x4E); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x4E); mat_b_col[4] = _mm256_blend_ps(mat_b_col[0], mat_b_col[6], 0xCC); mat_b_col[5] = _mm256_blend_ps(mat_b_col[1], mat_b_col[6], 0x33); mat_b_col[6] = _mm256_blend_ps(mat_b_col[2], mat_b_col[7], 0xCC); mat_b_col[7] = _mm256_blend_ps(mat_b_col[3], mat_b_col[7], 0x33); #endif //Merge rearranged high elements into complete rows mat_b_rearr[2] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x20); mat_b_rearr[6] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x31); mat_b_rearr[3] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x20); mat_b_rearr[7] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x31); /* transpose steps end */ //Store the computed B columns _mm256_storeu_ps((float *)ptr_b_dup, mat_b_rearr[0]); _mm256_storeu_ps((float *)(ptr_b_dup + (cs_b)), mat_b_rearr[1]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[0]), mat_b_rearr[2]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[1]), mat_b_rearr[3]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[2]), mat_b_rearr[4]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[3]), mat_b_rearr[5]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[4]), mat_b_rearr[6]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[5]), mat_b_rearr[7]); i += cs_b_offset[6]; ptr_b_dup += cs_b_offset[6]; //i += 8; //ptr_b_dup += 8; } //c = 0; /***************** first set of 8 cols of B processing done *****************/ ptr_b_dup = ptr_b; i1 = 0; //Start loop for cols of B to be processed in size of blk_width for (j = 8; j < numRows_lb; j += 8)//m :- 8x8 block row { ptr_l += cs_l_offset[6]; //ptr_b += j; //ptr_b_dup += 8; ptr_b_dup += 8; i1 += 8; i = i1; i2 = 0; for (r = 0; r < numCols_b; r += GEMM_BLK_V1) { #if GEMM_ACCUM_A //Read 8 cols of B columns of Block-to-be-solved mat_b_col[0] = _mm256_loadu_ps((float const *)ptr_b + i); mat_b_col[1] = _mm256_loadu_ps((float const *)(ptr_b + cs_b + i)); mat_b_col[2] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[0] + i)); mat_b_col[3] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[1] + i)); mat_b_col[4] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[2] + i)); mat_b_col[5] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[3] + i)); mat_b_col[6] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[4] + i)); mat_b_col[7] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[5] + i)); /* transpose steps start */ ////unpacklow//// mat_b_rearr[0] = _mm256_unpacklo_ps(mat_b_col[0], mat_b_col[1]); mat_b_rearr[1] = _mm256_unpacklo_ps(mat_b_col[2], mat_b_col[3]); mat_b_rearr[2] = _mm256_unpacklo_ps(mat_b_col[4], mat_b_col[5]); mat_b_rearr[3] = _mm256_unpacklo_ps(mat_b_col[6], mat_b_col[7]); //Rearrange low elements #if REARRANGE_SHFL == 1 mat_b_rearr[4] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x44); mat_b_rearr[5] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0xEE); mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x44); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0xEE); #else mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x4E); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x4E); mat_b_rearr[4] = _mm256_blend_ps(mat_b_rearr[0], mat_b_rearr[6], 0xCC); mat_b_rearr[5] = _mm256_blend_ps(mat_b_rearr[1], mat_b_rearr[6], 0x33); mat_b_rearr[6] = _mm256_blend_ps(mat_b_rearr[2], mat_b_rearr[7], 0xCC); mat_b_rearr[7] = _mm256_blend_ps(mat_b_rearr[3], mat_b_rearr[7], 0x33); #endif //Merge rearranged low elements into complete rows mat_b_rearr[0] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x20); mat_b_rearr[4] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x31); mat_b_rearr[1] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x20); mat_b_rearr[5] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x31); ////unpackhigh//// mat_b_col[0] = _mm256_unpackhi_ps(mat_b_col[0], mat_b_col[1]); mat_b_col[1] = _mm256_unpackhi_ps(mat_b_col[2], mat_b_col[3]); mat_b_col[2] = _mm256_unpackhi_ps(mat_b_col[4], mat_b_col[5]); mat_b_col[3] = _mm256_unpackhi_ps(mat_b_col[6], mat_b_col[7]); //Rearrange high elements #if REARRANGE_SHFL == 1 mat_b_col[4] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x44); mat_b_col[5] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0xEE); mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x44); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0xEE); #else mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x4E); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x4E); mat_b_col[4] = _mm256_blend_ps(mat_b_col[0], mat_b_col[6], 0xCC); mat_b_col[5] = _mm256_blend_ps(mat_b_col[1], mat_b_col[6], 0x33); mat_b_col[6] = _mm256_blend_ps(mat_b_col[2], mat_b_col[7], 0xCC); mat_b_col[7] = _mm256_blend_ps(mat_b_col[3], mat_b_col[7], 0x33); #endif //Merge rearranged high elements into complete rows mat_b_rearr[2] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x20); mat_b_rearr[6] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x31); mat_b_rearr[3] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x20); mat_b_rearr[7] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x31); /* transpose steps end */ mat_b_rearr[0] = _mm256_mul_ps(mat_b_rearr[0], alphaReg); mat_b_rearr[1] = _mm256_mul_ps(mat_b_rearr[1], alphaReg); mat_b_rearr[2] = _mm256_mul_ps(mat_b_rearr[2], alphaReg); mat_b_rearr[3] = _mm256_mul_ps(mat_b_rearr[3], alphaReg); mat_b_rearr[4] = _mm256_mul_ps(mat_b_rearr[4], alphaReg); mat_b_rearr[5] = _mm256_mul_ps(mat_b_rearr[5], alphaReg); mat_b_rearr[6] = _mm256_mul_ps(mat_b_rearr[6], alphaReg); mat_b_rearr[7] = _mm256_mul_ps(mat_b_rearr[7], alphaReg); #endif //i = 0; ptr_l_dup = ptr_l; i4 = i2; for (l = 0; l < j; l += 8) // move across m { //for (k = 0; k < numCols_b; k += 8) // move across n for the same value of l (index of m) //{ /////////////////// Partial Lower 8x8 block trsm of B //Read current 8 cols of B columns from specified 8x8 current-block of B mat_a_blk_elems[0] = _mm256_loadu_ps((float const *)ptr_b + i4); mat_a_blk_elems[1] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b)); mat_a_blk_elems[2] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[0])); mat_a_blk_elems[3] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[1])); mat_a_blk_elems[4] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[2])); mat_a_blk_elems[5] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[3])); mat_a_blk_elems[6] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[4])); mat_a_blk_elems[7] = _mm256_loadu_ps((float const *)(ptr_b + i4 + cs_b_offset[5])); /* transpose steps start */ ////unpacklow//// mat_b_col[0] = _mm256_unpacklo_ps(mat_a_blk_elems[0], mat_a_blk_elems[1]); mat_b_col[1] = _mm256_unpacklo_ps(mat_a_blk_elems[2], mat_a_blk_elems[3]); mat_b_col[2] = _mm256_unpacklo_ps(mat_a_blk_elems[4], mat_a_blk_elems[5]); mat_b_col[3] = _mm256_unpacklo_ps(mat_a_blk_elems[6], mat_a_blk_elems[7]); //Rearrange low elements #if REARRANGE_SHFL == 1 mat_b_col[4] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x44); mat_b_col[5] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0xEE); mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x44); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0xEE); #else mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x4E); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x4E); mat_b_col[4] = _mm256_blend_ps(mat_b_col[0], mat_b_col[6], 0xCC); mat_b_col[5] = _mm256_blend_ps(mat_b_col[1], mat_b_col[6], 0x33); mat_b_col[6] = _mm256_blend_ps(mat_b_col[2], mat_b_col[7], 0xCC); mat_b_col[7] = _mm256_blend_ps(mat_b_col[3], mat_b_col[7], 0x33); #endif //Merge rearranged low elements into complete rows mat_b_col[0] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x20); mat_b_col[4] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x31); mat_b_col[1] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x20); mat_b_col[5] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x31); ////unpackhigh//// mat_a_blk_elems[0] = _mm256_unpackhi_ps(mat_a_blk_elems[0], mat_a_blk_elems[1]); mat_a_blk_elems[1] = _mm256_unpackhi_ps(mat_a_blk_elems[2], mat_a_blk_elems[3]); mat_a_blk_elems[2] = _mm256_unpackhi_ps(mat_a_blk_elems[4], mat_a_blk_elems[5]); mat_a_blk_elems[3] = _mm256_unpackhi_ps(mat_a_blk_elems[6], mat_a_blk_elems[7]); //Rearrange high elements #if REARRANGE_SHFL == 1 mat_a_blk_elems[4] = _mm256_shuffle_ps(mat_a_blk_elems[0], mat_a_blk_elems[1], 0x44); mat_a_blk_elems[5] = _mm256_shuffle_ps(mat_a_blk_elems[0], mat_a_blk_elems[1], 0xEE); mat_a_blk_elems[6] = _mm256_shuffle_ps(mat_a_blk_elems[2], mat_a_blk_elems[3], 0x44); mat_a_blk_elems[7] = _mm256_shuffle_ps(mat_a_blk_elems[2], mat_a_blk_elems[3], 0xEE); #else mat_a_blk_elems[6] = _mm256_shuffle_ps(mat_a_blk_elems[0], mat_a_blk_elems[1], 0x4E); mat_a_blk_elems[7] = _mm256_shuffle_ps(mat_a_blk_elems[2], mat_a_blk_elems[3], 0x4E); mat_a_blk_elems[4] = _mm256_blend_ps(mat_a_blk_elems[0], mat_a_blk_elems[6], 0xCC); mat_a_blk_elems[5] = _mm256_blend_ps(mat_a_blk_elems[1], mat_a_blk_elems[6], 0x33); mat_a_blk_elems[6] = _mm256_blend_ps(mat_a_blk_elems[2], mat_a_blk_elems[7], 0xCC); mat_a_blk_elems[7] = _mm256_blend_ps(mat_a_blk_elems[3], mat_a_blk_elems[7], 0x33); #endif //Merge rearranged high elements into complete rows mat_b_col[2] = _mm256_permute2f128_ps(mat_a_blk_elems[4], mat_a_blk_elems[6], 0x20); mat_b_col[6] = _mm256_permute2f128_ps(mat_a_blk_elems[4], mat_a_blk_elems[6], 0x31); mat_b_col[3] = _mm256_permute2f128_ps(mat_a_blk_elems[5], mat_a_blk_elems[7], 0x20); mat_b_col[7] = _mm256_permute2f128_ps(mat_a_blk_elems[5], mat_a_blk_elems[7], 0x31); /* transpose steps end */ //Broadcast A8,0 to A15,0 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[0])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[1])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[2])); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[3])); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[4])); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[5])); //i4 = k >> 3; ptr_l_dup++; #if GEMM_ACCUM_A //(Row8): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[0], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[0], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[0], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[0], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[0], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[0], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[0], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[0], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_mul_ps(mat_a_blk_elems[0], mat_b_col[0]); mat_b_rearr[1] = _mm256_mul_ps(mat_a_blk_elems[1], mat_b_col[0]); mat_b_rearr[2] = _mm256_mul_ps(mat_a_blk_elems[2], mat_b_col[0]); mat_b_rearr[3] = _mm256_mul_ps(mat_a_blk_elems[3], mat_b_col[0]); mat_b_rearr[4] = _mm256_mul_ps(mat_a_blk_elems[4], mat_b_col[0]); mat_b_rearr[5] = _mm256_mul_ps(mat_a_blk_elems[5], mat_b_col[0]); mat_b_rearr[6] = _mm256_mul_ps(mat_a_blk_elems[6], mat_b_col[0]); mat_b_rearr[7] = _mm256_mul_ps(mat_a_blk_elems[7], mat_b_col[0]); #endif //Broadcast A21 to A71 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[0])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[1])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[2])); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[3])); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[4])); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[5])); ptr_l_dup++; #if GEMM_ACCUM_A //(Row9): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[1], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[1], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[1], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[1], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[1], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[1], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[1], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[1], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[1], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[1], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[1], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[1], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[1], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[1], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[1], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[1], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A8,2 to A15,2 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[0])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[1])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[2])); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[3])); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[4])); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[5])); ptr_l_dup++; #if GEMM_ACCUM_A //(Row10): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[2], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[2], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[2], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[2], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[2], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[2], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[2], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[2], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[2], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[2], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[2], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[2], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[2], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[2], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[2], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[2], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A8,3 to A15,3 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[0])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[1])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[2])); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[3])); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[4])); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[5])); ptr_l_dup++; #if GEMM_ACCUM_A //(Row11): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[3], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[3], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[3], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[3], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[3], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[3], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[3], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[3], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[3], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[3], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[3], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[3], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[3], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[3], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[3], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[3], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A8,4 to A15,4 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[0])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[1])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[2])); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[3])); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[4])); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[5])); ptr_l_dup++; #if GEMM_ACCUM_A //(Row12): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[4], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[4], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[4], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[4], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[4], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[4], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[4], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[4], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[4], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[4], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[4], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[4], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[4], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[4], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[4], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[4], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A8,5 to A15,5 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[0])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[1])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[2])); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[3])); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[4])); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[5])); ptr_l_dup++; #if GEMM_ACCUM_A //(Row13): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[5], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[5], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[5], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[5], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[5], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[5], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[5], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[5], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[5], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[5], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[5], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[5], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[5], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[5], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[5], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[5], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A8,6 to A15,6 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[0])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[1])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[2])); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[3])); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[4])); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[5])); ptr_l_dup++; #if GEMM_ACCUM_A //(Row14): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[6], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[6], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[6], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[6], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[6], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[6], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[6], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[6], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[6], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[6], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[6], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[6], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[6], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[6], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[6], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[6], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A8,7 to A15,7 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l)); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[0])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[1])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[2])); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[3])); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[4])); mat_a_blk_elems[7] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[5])); ptr_l_dup++; #if GEMM_ACCUM_A //(Row15): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[0] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_col[7], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_col[7], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_col[7], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_col[7], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_col[7], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_col[7], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_col[7], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[7], mat_b_col[7], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[0] = _mm256_fmadd_ps(mat_a_blk_elems[0], mat_b_col[7], mat_b_rearr[0]);//d = c - (a*b) mat_b_rearr[1] = _mm256_fmadd_ps(mat_a_blk_elems[1], mat_b_col[7], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fmadd_ps(mat_a_blk_elems[2], mat_b_col[7], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fmadd_ps(mat_a_blk_elems[3], mat_b_col[7], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fmadd_ps(mat_a_blk_elems[4], mat_b_col[7], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fmadd_ps(mat_a_blk_elems[5], mat_b_col[7], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fmadd_ps(mat_a_blk_elems[6], mat_b_col[7], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fmadd_ps(mat_a_blk_elems[7], mat_b_col[7], mat_b_rearr[7]);//d = c - (a*b) #endif //end loop of cols //} //i2 += cs_b_offset[6]; i4 += 8; } //trsm solve k = 0; //for (i2 = 0; i2 < numCols_b; i2 += 8) //{ //i2 = i1 + r; /////////////////// Complete Lower 8x8 block trsm of B :- lower 8x8 block of B with lower right 8x8 block of A #if !GEMM_ACCUM_A //Read 8 cols of B columns of Block-to-be-solved mat_b_rearr[0] = _mm256_loadu_ps((float const *)ptr_b + i); mat_b_rearr[1] = _mm256_loadu_ps((float const *)(ptr_b + cs_b + i)); mat_b_rearr[2] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[0] + i)); mat_b_rearr[3] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[1] + i)); mat_b_rearr[4] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[2] + i)); mat_b_rearr[5] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[3] + i)); mat_b_rearr[6] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[4] + i)); mat_b_rearr[7] = _mm256_loadu_ps((float const *)(ptr_b + cs_b_offset[5] + i)); /* transpose steps start */ ////unpacklow//// mat_b_col[0] = _mm256_unpacklo_ps(mat_b_rearr[0], mat_b_rearr[1]); mat_b_col[1] = _mm256_unpacklo_ps(mat_b_rearr[2], mat_b_rearr[3]); mat_b_col[2] = _mm256_unpacklo_ps(mat_b_rearr[4], mat_b_rearr[5]); mat_b_col[3] = _mm256_unpacklo_ps(mat_b_rearr[6], mat_b_rearr[7]); //Rearrange low elements #if REARRANGE_SHFL == 1 mat_b_col[4] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x44); mat_b_col[5] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0xEE); mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x44); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0xEE); #else mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x4E); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x4E); mat_b_col[4] = _mm256_blend_ps(mat_b_col[0], mat_b_col[6], 0xCC); mat_b_col[5] = _mm256_blend_ps(mat_b_col[1], mat_b_col[6], 0x33); mat_b_col[6] = _mm256_blend_ps(mat_b_col[2], mat_b_col[7], 0xCC); mat_b_col[7] = _mm256_blend_ps(mat_b_col[3], mat_b_col[7], 0x33); #endif //Merge rearranged low elements into complete rows mat_b_col[0] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x20); mat_b_col[4] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x31); mat_b_col[1] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x20); mat_b_col[5] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x31); ////unpackhigh//// mat_b_rearr[0] = _mm256_unpackhi_ps(mat_b_rearr[0], mat_b_rearr[1]); mat_b_rearr[1] = _mm256_unpackhi_ps(mat_b_rearr[2], mat_b_rearr[3]); mat_b_rearr[2] = _mm256_unpackhi_ps(mat_b_rearr[4], mat_b_rearr[5]); mat_b_rearr[3] = _mm256_unpackhi_ps(mat_b_rearr[6], mat_b_rearr[7]); //Rearrange high elements #if REARRANGE_SHFL == 1 mat_b_rearr[4] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x44); mat_b_rearr[5] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0xEE); mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x44); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0xEE); #else mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x4E); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x4E); mat_b_rearr[4] = _mm256_blend_ps(mat_b_rearr[0], mat_b_rearr[6], 0xCC); mat_b_rearr[5] = _mm256_blend_ps(mat_b_rearr[1], mat_b_rearr[6], 0x33); mat_b_rearr[6] = _mm256_blend_ps(mat_b_rearr[2], mat_b_rearr[7], 0xCC); mat_b_rearr[7] = _mm256_blend_ps(mat_b_rearr[3], mat_b_rearr[7], 0x33); #endif //Merge rearranged high elements into complete rows mat_b_col[2] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x20); mat_b_col[6] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x31); mat_b_col[3] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x20); mat_b_col[7] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x31); /* transpose steps end */ mat_b_col[0] = _mm256_mul_ps(mat_b_col[0], alphaReg); mat_b_col[1] = _mm256_mul_ps(mat_b_col[1], alphaReg); mat_b_col[2] = _mm256_mul_ps(mat_b_col[2], alphaReg); mat_b_col[3] = _mm256_mul_ps(mat_b_col[3], alphaReg); mat_b_col[4] = _mm256_mul_ps(mat_b_col[4], alphaReg); mat_b_col[5] = _mm256_mul_ps(mat_b_col[5], alphaReg); mat_b_col[6] = _mm256_mul_ps(mat_b_col[6], alphaReg); mat_b_col[7] = _mm256_mul_ps(mat_b_col[7], alphaReg); #endif //Broadcast A10 to A70 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l)); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[0])); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[1])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[2])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[3])); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[4])); mat_a_blk_elems[6] = _mm256_broadcast_ss((float const *)(ptr_l_dup + cs_l_offset[5])); //i += cs_l; #if GEMM_ACCUM_A //(Row0): already done #else mat_b_rearr[0] = _mm256_sub_ps(mat_b_col[0], mat_b_rearr[0]); #endif #if GEMM_ACCUM_A mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[0], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[0], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_rearr[0], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_rearr[0], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_rearr[0], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_rearr[0], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_rearr[0], mat_b_rearr[7]);//d = c - (a*b) #else mat_b_rearr[1] = _mm256_sub_ps(mat_b_col[1], mat_b_rearr[1]); mat_b_rearr[2] = _mm256_sub_ps(mat_b_col[2], mat_b_rearr[2]); mat_b_rearr[3] = _mm256_sub_ps(mat_b_col[3], mat_b_rearr[3]); mat_b_rearr[4] = _mm256_sub_ps(mat_b_col[4], mat_b_rearr[4]); mat_b_rearr[5] = _mm256_sub_ps(mat_b_col[5], mat_b_rearr[5]); mat_b_rearr[6] = _mm256_sub_ps(mat_b_col[6], mat_b_rearr[6]); mat_b_rearr[7] = _mm256_sub_ps(mat_b_col[7], mat_b_rearr[7]); //(Row1): FMA operations of b1 with elements of indices from (1, 0) uptill (7, 0) mat_b_rearr[1] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[0], mat_b_rearr[1]);//d = c - (a*b) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[0], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_rearr[0], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_rearr[0], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_rearr[0], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_rearr[0], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[6], mat_b_rearr[0], mat_b_rearr[7]);//d = c - (a*b) #endif //Broadcast A21 to A71 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 1 + cs_l_offset[0])); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 1 + cs_l_offset[1])); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 1 + cs_l_offset[2])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 1 + cs_l_offset[3])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 1 + cs_l_offset[4])); mat_a_blk_elems[5] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 1 + cs_l_offset[5])); //i += cs_l; //(Row2): FMA operations of b2 with elements of indices from (2, 0) uptill (7, 0) mat_b_rearr[2] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[1], mat_b_rearr[2]);//d = c - (a*b) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[1], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_rearr[1], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_rearr[1], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_rearr[1], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[5], mat_b_rearr[1], mat_b_rearr[7]);//d = c - (a*b) //Broadcast A32 to A72 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 2 + cs_l_offset[1])); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 2 + cs_l_offset[2])); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 2 + cs_l_offset[3])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 2 + cs_l_offset[4])); mat_a_blk_elems[4] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 2 + cs_l_offset[5])); //i += cs_l; //(Row3): FMA operations of b3 with elements of indices from (3, 0) uptill (7, 0) mat_b_rearr[3] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[2], mat_b_rearr[3]);//d = c - (a*b) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[2], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_rearr[2], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_rearr[2], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[4], mat_b_rearr[2], mat_b_rearr[7]);//d = c - (a*b) //Broadcast A43 to A73 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 3 + cs_l_offset[2])); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 3 + cs_l_offset[3])); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 3 + cs_l_offset[4])); mat_a_blk_elems[3] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 3 + cs_l_offset[5])); //i += cs_l; //(Row4): FMA operations of b4 with elements of indices from (4, 0) uptill (7, 0) mat_b_rearr[4] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[3], mat_b_rearr[4]);//d = c - (a*b) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[3], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_rearr[3], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[3], mat_b_rearr[3], mat_b_rearr[7]);//d = c - (a*b) //Broadcast A54 to A74 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 4 + cs_l_offset[3])); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 4 + cs_l_offset[4])); mat_a_blk_elems[2] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 4 + cs_l_offset[5])); //i += cs_l; //(Row5): FMA operations of b5 with elements of indices from (5, 0) uptill (7, 0) mat_b_rearr[5] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[4], mat_b_rearr[5]);//d = c - (a*b) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[4], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[2], mat_b_rearr[4], mat_b_rearr[7]);//d = c - (a*b) //Broadcast A65 to A75 to registers mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 5 + cs_l_offset[4])); mat_a_blk_elems[1] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 5 + cs_l_offset[5])); //i += cs_l; //(Row6): FMA operations of b6 with elements of indices from (6, 0) uptill (7, 0) mat_b_rearr[6] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[5], mat_b_rearr[6]);//d = c - (a*b) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[1], mat_b_rearr[5], mat_b_rearr[7]);//d = c - (a*b) //Broadcast A76 to register mat_a_blk_elems[0] = _mm256_broadcast_ss((float const *)(ptr_l_dup + 6 + cs_l_offset[5])); //(Row7): FMA operations of b7 with elements of index (7, 0) mat_b_rearr[7] = _mm256_fnmadd_ps(mat_a_blk_elems[0], mat_b_rearr[6], mat_b_rearr[7]);//d = c - (a*b) //////////////////////////////////////////////////////////////////////////////// /* transpose steps start */ ////unpacklow//// mat_b_col[0] = _mm256_unpacklo_ps(mat_b_rearr[0], mat_b_rearr[1]); mat_b_col[1] = _mm256_unpacklo_ps(mat_b_rearr[2], mat_b_rearr[3]); mat_b_col[2] = _mm256_unpacklo_ps(mat_b_rearr[4], mat_b_rearr[5]); mat_b_col[3] = _mm256_unpacklo_ps(mat_b_rearr[6], mat_b_rearr[7]); //Rearrange low elements #if REARRANGE_SHFL == 1 mat_b_col[4] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x44); mat_b_col[5] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0xEE); mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x44); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0xEE); #else mat_b_col[6] = _mm256_shuffle_ps(mat_b_col[0], mat_b_col[1], 0x4E); mat_b_col[7] = _mm256_shuffle_ps(mat_b_col[2], mat_b_col[3], 0x4E); mat_b_col[4] = _mm256_blend_ps(mat_b_col[0], mat_b_col[6], 0xCC); mat_b_col[5] = _mm256_blend_ps(mat_b_col[1], mat_b_col[6], 0x33); mat_b_col[6] = _mm256_blend_ps(mat_b_col[2], mat_b_col[7], 0xCC); mat_b_col[7] = _mm256_blend_ps(mat_b_col[3], mat_b_col[7], 0x33); #endif //Merge rearranged low elements into complete rows mat_b_col[0] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x20); mat_b_col[4] = _mm256_permute2f128_ps(mat_b_col[4], mat_b_col[6], 0x31); mat_b_col[1] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x20); mat_b_col[5] = _mm256_permute2f128_ps(mat_b_col[5], mat_b_col[7], 0x31); ////unpackhigh//// mat_b_rearr[0] = _mm256_unpackhi_ps(mat_b_rearr[0], mat_b_rearr[1]); mat_b_rearr[1] = _mm256_unpackhi_ps(mat_b_rearr[2], mat_b_rearr[3]); mat_b_rearr[2] = _mm256_unpackhi_ps(mat_b_rearr[4], mat_b_rearr[5]); mat_b_rearr[3] = _mm256_unpackhi_ps(mat_b_rearr[6], mat_b_rearr[7]); //Rearrange high elements #if REARRANGE_SHFL == 1 mat_b_rearr[4] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x44); mat_b_rearr[5] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0xEE); mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x44); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0xEE); #else mat_b_rearr[6] = _mm256_shuffle_ps(mat_b_rearr[0], mat_b_rearr[1], 0x4E); mat_b_rearr[7] = _mm256_shuffle_ps(mat_b_rearr[2], mat_b_rearr[3], 0x4E); mat_b_rearr[4] = _mm256_blend_ps(mat_b_rearr[0], mat_b_rearr[6], 0xCC); mat_b_rearr[5] = _mm256_blend_ps(mat_b_rearr[1], mat_b_rearr[6], 0x33); mat_b_rearr[6] = _mm256_blend_ps(mat_b_rearr[2], mat_b_rearr[7], 0xCC); mat_b_rearr[7] = _mm256_blend_ps(mat_b_rearr[3], mat_b_rearr[7], 0x33); #endif //Merge rearranged high elements into complete rows mat_b_col[2] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x20); mat_b_col[6] = _mm256_permute2f128_ps(mat_b_rearr[4], mat_b_rearr[6], 0x31); mat_b_col[3] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x20); mat_b_col[7] = _mm256_permute2f128_ps(mat_b_rearr[5], mat_b_rearr[7], 0x31); /* transpose steps end */ //Store the computed B columns _mm256_storeu_ps((float *)ptr_b_dup + i2, mat_b_col[0]); _mm256_storeu_ps((float *)(ptr_b_dup + (cs_b)+i2), mat_b_col[1]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[0] + i2), mat_b_col[2]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[1] + i2), mat_b_col[3]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[2] + i2), mat_b_col[4]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[3] + i2), mat_b_col[5]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[4] + i2), mat_b_col[6]); _mm256_storeu_ps((float *)(ptr_b_dup + cs_b_offset[5] + i2), mat_b_col[7]); //printf("writing B => m[%d], n[%d], [%f]\n", j, k, *(ptr_b_dup + k)); k++; //} i += cs_b_offset[6]; i2 += cs_b_offset[6]; } } //numRows of A ///////////////////loop ends ///////////////////// } #endif blis-0.6.1/kernels/zen/bli_kernels_zen.h000066400000000000000000000055241360743507500202440ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // -- level-1v -- // amaxv (intrinsics) AMAXV_KER_PROT( float, s, amaxv_zen_int ) AMAXV_KER_PROT( double, d, amaxv_zen_int ) // axpyv (intrinsics) AXPYV_KER_PROT( float, s, axpyv_zen_int ) AXPYV_KER_PROT( double, d, axpyv_zen_int ) // axpyv (intrinsics unrolled x10) AXPYV_KER_PROT( float, s, axpyv_zen_int10 ) AXPYV_KER_PROT( double, d, axpyv_zen_int10 ) // dotv (intrinsics) DOTV_KER_PROT( float, s, dotv_zen_int ) DOTV_KER_PROT( double, d, dotv_zen_int ) // dotv (intrinsics, unrolled x10) DOTV_KER_PROT( float, s, dotv_zen_int10 ) DOTV_KER_PROT( double, d, dotv_zen_int10 ) // dotxv (intrinsics) DOTXV_KER_PROT( float, s, dotxv_zen_int ) DOTXV_KER_PROT( double, d, dotxv_zen_int ) // scalv (intrinsics) SCALV_KER_PROT( float, s, scalv_zen_int ) SCALV_KER_PROT( double, d, scalv_zen_int ) // scalv (intrinsics unrolled x10) SCALV_KER_PROT( float, s, scalv_zen_int10 ) SCALV_KER_PROT( double, d, scalv_zen_int10 ) // -- level-1f -- // axpyf (intrinsics) AXPYF_KER_PROT( float, s, axpyf_zen_int_8 ) AXPYF_KER_PROT( double, d, axpyf_zen_int_8 ) // dotxf (intrinsics) DOTXF_KER_PROT( float, s, dotxf_zen_int_8 ) DOTXF_KER_PROT( double, d, dotxf_zen_int_8 ) blis-0.6.1/kernels/zen2/000077500000000000000000000000001360743507500150025ustar00rootroot00000000000000blis-0.6.1/kernels/zen2/.gitignore000066400000000000000000000001071360743507500167700ustar00rootroot00000000000000# Ignore everything in this directory * # Except this file !.gitignore blis-0.6.1/mpi_test/000077500000000000000000000000001360743507500143055ustar00rootroot00000000000000blis-0.6.1/mpi_test/Makefile000066400000000000000000000172041360743507500157510ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # # Makefile # # Field G. Van Zee # # Makefile for standalone BLIS test drivers. # # # --- Makefile PHONY target definitions ---------------------------------------- # .PHONY: all \ blis essl \ clean cleanx # Comments: # - DIST_PATH is assumed to not exist if BLIS_INSTALL_PATH is given. # - We must use recursively expanded assignment for LIB_PATH and INC_PATH in # the second case because CONFIG_NAME is not yet set. ifneq ($(strip $(BLIS_INSTALL_PATH)),) LIB_PATH := $(BLIS_INSTALL_PATH)/lib INC_PATH := $(BLIS_INSTALL_PATH)/include/blis SHARE_PATH := $(BLIS_INSTALL_PATH)/share/blis else DIST_PATH := .. LIB_PATH = ../lib/$(CONFIG_NAME) INC_PATH = ../include/$(CONFIG_NAME) SHARE_PATH := .. endif # # --- Include common makefile definitions -------------------------------------- # # Include the common makefile fragment. -include $(SHARE_PATH)/common.mk # # --- BLAS and LAPACK implementations ------------------------------------------ # # BLAS library path(s). This is where the BLAS libraries reside. BLAS_LIB_PATH := $(HOME)/flame/lib MKL_LIB_PATH := $(HOME)/intel/mkl/lib/intel64/ ESSL_LIB_PATH := /soft/libraries/essl/current/lib64 # OpenBLAS OPENBLAS_LIB := $(BLAS_LIB_PATH)/libopenblas.a # ATLAS ATLAS_LIB := $(BLAS_LIB_PATH)/libf77blas.a \ $(BLAS_LIB_PATH)/libatlas.a # MKL MKL_LIB := -L$(MKL_LIB_PATH) \ -lmkl_sequential \ -lmkl_core \ -lmkl_intel_lp64 # ESSL # Note: ESSL is named differently for SMP and/or BG ESSL_LIB := $(ESSL_LIB_PATH)/libesslsmpbg.a \ -L$(IBM_MAIN_DIR)/xlsmp/bg/3.1/bglib64/ \ -L$(IBM_MAIN_DIR)/xlf/bg/14.1/bglib64/ \ -lxlsmp -lxlf90_r -lxlfmath -lxl # Accelerate MAC_LIB := -framework Accelerate # # --- General build definitions ------------------------------------------------ # TEST_SRC_PATH := . TEST_OBJ_PATH := . # Gather all local object files. TEST_OBJS := $(patsubst $(TEST_SRC_PATH)/%.c, \ $(TEST_OBJ_PATH)/%.o, \ $(wildcard $(TEST_SRC_PATH)/*.c)) # Override the value of CINCFLAGS so that the value of CFLAGS returned by # get-user-cflags-for() is not cluttered up with include paths needed only # while building BLIS. CINCFLAGS := -I$(INC_PATH) # Use the "framework" CFLAGS for the configuration family. CFLAGS := $(call get-user-cflags-for,$(CONFIG_NAME)) # Add local header paths to CFLAGS CFLAGS += -I$(TEST_SRC_PATH) # Locate the libblis library to which we will link. #LIBBLIS_LINK := $(LIB_PATH)/$(LIBBLIS_L) # # --- Targets/rules ------------------------------------------------------------ # # Complete list of possible targets when defining 'all': # # blis openblas atlas mkl mac essl # all: blis essl blis: test_gemm_blis.x \ test_hemm_blis.x \ test_herk_blis.x \ test_her2k_blis.x \ test_trmm_blis.x \ test_trsm_blis.x essl: test_gemm_essl.x \ test_hemm_essl.x \ test_herk_essl.x \ test_her2k_essl.x \ test_trmm_essl.x \ test_trsm_essl.x openblas: test_gemv_openblas.x \ test_ger_openblas.x \ test_hemv_openblas.x \ test_her_openblas.x \ test_her2_openblas.x \ test_trmv_openblas.x \ test_trsv_openblas.x \ \ test_gemm_openblas.x \ test_hemm_openblas.x \ test_herk_openblas.x \ test_her2k_openblas.x \ test_trmm_openblas.x \ test_trsm_openblas.x atlas: test_gemv_atlas.x \ test_ger_atlas.x \ test_hemv_atlas.x \ test_her_atlas.x \ test_her2_atlas.x \ test_trmv_atlas.x \ test_trsv_atlas.x \ \ test_gemm_atlas.x \ test_hemm_atlas.x \ test_herk_atlas.x \ test_her2k_atlas.x \ test_trmm_atlas.x \ test_trsm_atlas.x mkl: test_gemv_mkl.x \ test_ger_mkl.x \ test_hemv_mkl.x \ test_her_mkl.x \ test_her2_mkl.x \ test_trmv_mkl.x \ test_trsv_mkl.x \ \ test_gemm_mkl.x \ test_hemm_mkl.x \ test_herk_mkl.x \ test_her2k_mkl.x \ test_trmm_mkl.x \ test_trsm_mkl.x mac: test_gemv_mac.x \ test_ger_mac.x \ test_hemv_mac.x \ test_her_mac.x \ test_her2_mac.x \ test_trmv_mac.x \ test_trsv_mac.x \ \ test_gemm_mac.x \ test_hemm_mac.x \ test_herk_mac.x \ test_her2k_mac.x \ test_trmm_mac.x \ test_trsm_mac.x # --Object file rules -- $(TEST_OBJ_PATH)/%.o: $(TEST_SRC_PATH)/%.c $(CC) $(CFLAGS) -c $< -o $@ test_%_openblas.o: test_%.c $(CC) $(CFLAGS) -DBLAS=\"openblas\" -c $< -o $@ test_%_atlas.o: test_%.c $(CC) $(CFLAGS) -DBLAS=\"atlas\" -c $< -o $@ test_%_mkl.o: test_%.c $(CC) $(CFLAGS) -DBLAS=\"mkl\" -c $< -o $@ test_%_essl.o: test_%.c $(CC) $(CFLAGS) -DBLAS=\"essl\" -c $< -o $@ test_%_mac.o: test_%.c $(CC) $(CFLAGS) -DBLAS=\"mac\" -c $< -o $@ test_%_blis.o: test_%.c $(CC) $(CFLAGS) -DBLIS -c $< -o $@ # -- Executable file rules -- # NOTE: For the BLAS test drivers, we place the BLAS libraries before BLIS # on the link command line in case BLIS was configured with the BLAS # compatibility layer. This prevents BLIS from inadvertently getting called # for the BLAS routines we are trying to test with. test_%_openblas.x: test_%_openblas.o $(LIBBLIS_LINK) $(LINKER) $< $(OPENBLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@ test_%_atlas.x: test_%_atlas.o $(LIBBLIS_LINK) $(LINKER) $< $(ATLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@ test_%_mkl.x: test_%_mkl.o $(LIBBLIS_LINK) $(LINKER) $< $(MKL_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@ test_%_essl.x: test_%_essl.o $(LIBBLIS_LINK) $(LINKER) $< $(ESSL_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@ test_%_mac.x: test_%_mac.o $(LIBBLIS_LINK) $(LINKER) $< $(MAC_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@ test_%_blis.x: test_%_blis.o $(LIBBLIS_LINK) $(LINKER) $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@ # -- Clean rules -- clean: cleanx cleanx: - $(RM_F) *.o *.x blis-0.6.1/mpi_test/test_gemm.c000066400000000000000000000145501360743507500164420ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "blis.h" #include // transa transb m n k alpha a lda b ldb beta c ldc //void dgemm_( char*, char*, int*, int*, int*, double*, double*, int*, double*, int*, double*, double*, int* ); //#define PRINT int main( int argc, char** argv ) { obj_t a, b, c; obj_t c_save; obj_t alpha, beta; dim_t m, n, k; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, n_input, k_input; num_t dt_a, dt_b, dt_c; num_t dt_alpha, dt_beta; int r, n_repeats; double dtime; double dtime_save; double gflops; bli_init(); n_repeats = 3; if( argc < 7 ) { printf("Usage:\n"); printf("test_foo.x m n k p_begin p_inc p_end:\n"); exit; } int world_size, world_rank, provided; MPI_Init_thread( NULL, NULL, MPI_THREAD_FUNNELED, &provided ); MPI_Comm_size( MPI_COMM_WORLD, &world_size ); MPI_Comm_rank( MPI_COMM_WORLD, &world_rank ); m_input = strtol( argv[1], NULL, 10 ); n_input = strtol( argv[2], NULL, 10 ); k_input = strtol( argv[3], NULL, 10 ); p_begin = strtol( argv[4], NULL, 10 ); p_inc = strtol( argv[5], NULL, 10 ); p_end = strtol( argv[6], NULL, 10 ); #if 1 dt_a = BLIS_DOUBLE; dt_b = BLIS_DOUBLE; dt_c = BLIS_DOUBLE; dt_alpha = BLIS_DOUBLE; dt_beta = BLIS_DOUBLE; #else dt_a = dt_b = dt_c = dt_alpha = dt_beta = BLIS_DCOMPLEX; #endif for ( p = p_begin + world_rank * p_inc; p <= p_end; p += p_inc * world_size ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p * ( dim_t )abs(n_input); else n = ( dim_t ) n_input; if ( k_input < 0 ) k = p * ( dim_t )abs(k_input); else k = ( dim_t ) k_input; bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha ); bli_obj_create( dt_beta, 1, 1, 0, 0, &beta ); bli_obj_create( dt_a, m, k, 0, 0, &a ); bli_obj_create( dt_b, k, n, 0, 0, &b ); bli_obj_create( dt_c, m, n, 0, 0, &c ); bli_obj_create( dt_c, m, n, 0, 0, &c_save ); bli_randm( &a ); bli_randm( &b ); bli_randm( &c ); bli_setsc( (0.9/1.0), 0.2, &alpha ); bli_setsc( (1.0/1.0), 0.0, &beta ); bli_copym( &c, &c_save ); dtime_save = 1.0e9; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef BLIS //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); bli_gemm( &alpha, //bli_gemm4m( &alpha, &a, &b, &beta, &c ); #else if ( bli_is_real( dt_a ) ) { f77_char transa = 'N'; f77_char transb = 'N'; f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); double* alphap = bli_obj_buffer( &alpha ); double* ap = bli_obj_buffer( &a ); double* bp = bli_obj_buffer( &b ); double* betap = bli_obj_buffer( &beta ); double* cp = bli_obj_buffer( &c ); dgemm_( &transa, &transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else { f77_char transa = 'N'; f77_char transb = 'N'; f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); dcomplex* alphap = bli_obj_buffer( &alpha ); dcomplex* ap = bli_obj_buffer( &a ); dcomplex* bp = bli_obj_buffer( &b ); dcomplex* betap = bli_obj_buffer( &beta ); dcomplex* cp = bli_obj_buffer( &c ); zgemm_( &transa, //zgemm3m_( &transa, &transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } gflops = ( 2.0 * m * k * n ) / ( dtime_save * 1.0e9 ); if ( bli_is_complex( dt_a ) ) gflops *= 4.0; #ifdef BLIS printf( "data_gemm_blis" ); #else printf( "data_gemm_%s", BLAS ); #endif printf( "( %2lu, 1:5 ) = [ %4lu %4lu %4lu %10.3e %6.3f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, ( unsigned long )k, ( unsigned long )n, dtime_save, gflops ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); } bli_finalize(); return 0; } blis-0.6.1/mpi_test/test_hemm.c000066400000000000000000000152411360743507500164410ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "blis.h" #include // side uploa m n alpha a lda b ldb beta c ldc //void dsymm_( char*, char*, int*, int*, double*, double*, int*, double*, int*, double*, double*, int* ); //#define PRINT int main( int argc, char** argv ) { obj_t a, b, c; obj_t c_save; obj_t alpha, beta; dim_t m, n; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, n_input; num_t dt_a, dt_b, dt_c; num_t dt_alpha, dt_beta; int r, n_repeats; side_t side; uplo_t uplo; double dtime; double dtime_save; double gflops; bli_init(); n_repeats = 3; if( argc < 7 ) { printf("Usage:\n"); printf("test_foo.x m n k p_begin p_inc p_end:\n"); exit; } int world_size, world_rank, provided; MPI_Init_thread( NULL, NULL, MPI_THREAD_FUNNELED, &provided ); MPI_Comm_size( MPI_COMM_WORLD, &world_size ); MPI_Comm_rank( MPI_COMM_WORLD, &world_rank ); m_input = strtol( argv[1], NULL, 10 ); n_input = strtol( argv[2], NULL, 10 ); p_begin = strtol( argv[4], NULL, 10 ); p_inc = strtol( argv[5], NULL, 10 ); p_end = strtol( argv[6], NULL, 10 ); #if 1 dt_a = BLIS_DOUBLE; dt_b = BLIS_DOUBLE; dt_c = BLIS_DOUBLE; dt_alpha = BLIS_DOUBLE; dt_beta = BLIS_DOUBLE; #else dt_a = dt_b = dt_c = dt_alpha = dt_beta = BLIS_DCOMPLEX; #endif side = BLIS_LEFT; //side = BLIS_RIGHT; uplo = BLIS_LOWER; //uplo = BLIS_UPPER; for ( p = p_begin + world_rank * p_inc; p <= p_end; p += p_inc * world_size ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p * ( dim_t )abs(n_input); else n = ( dim_t ) n_input; bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha ); bli_obj_create( dt_beta, 1, 1, 0, 0, &beta ); if ( bli_is_left( side ) ) bli_obj_create( dt_a, m, m, 0, 0, &a ); else bli_obj_create( dt_a, n, n, 0, 0, &a ); bli_obj_create( dt_b, m, n, 0, 0, &b ); bli_obj_create( dt_c, m, n, 0, 0, &c ); bli_obj_create( dt_c, m, n, 0, 0, &c_save ); bli_randm( &a ); bli_randm( &b ); bli_randm( &c ); bli_obj_set_struc( BLIS_HERMITIAN, &a ); bli_obj_set_uplo( uplo, &a ); // Randomize A, make it densely Hermitian, and zero the unstored // triangle to ensure the implementation reads only from the stored // region. bli_randm( &a ); bli_mkherm( &a ); bli_mktrim( &a ); /* bli_obj_toggle_uplo( &a ); bli_obj_inc_diag_offset( 1, &a ); bli_setm( &BLIS_ZERO, &a ); bli_obj_inc_diag_offset( -1, &a ); bli_obj_toggle_uplo( &a ); bli_obj_set_diag( BLIS_NONUNIT_DIAG, &a ); bli_scalm( &BLIS_TWO, &a ); bli_scalm( &BLIS_TWO, &a ); */ bli_setsc( (2.0/1.0), 1.0, &alpha ); bli_setsc( (1.0/1.0), 0.0, &beta ); bli_copym( &c, &c_save ); dtime_save = 1.0e9; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT /* obj_t ar, ai; bli_obj_alias_to( &a, &ar ); bli_obj_alias_to( &a, &ai ); bli_obj_set_dt( BLIS_DOUBLE, &ar ); ar.rs *= 2; ar.cs *= 2; bli_obj_set_dt( BLIS_DOUBLE, &ai ); ai.rs *= 2; ai.cs *= 2; ai.buffer = ( double* )ai.buffer + 1; bli_printm( "ar", &ar, "%4.1f", "" ); bli_printm( "ai", &ai, "%4.1f", "" ); */ bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "b", &b, "%4.1f", "" ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); bli_hemm( side, //bli_hemm4m( side, &alpha, &a, &b, &beta, &c ); #else f77_char side = 'L'; f77_char uplo = 'L'; f77_int mm = bli_obj_length( &c ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); double* alphap = bli_obj_buffer( &alpha ); double* ap = bli_obj_buffer( &a ); double* bp = bli_obj_buffer( &b ); double* betap = bli_obj_buffer( &beta ); double* cp = bli_obj_buffer( &c ); dsymm_( &side, &uplo, &mm, &nn, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); #endif #ifdef PRINT bli_printm( "c after", &c, "%9.5f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } if ( bli_is_left( side ) ) gflops = ( 2.0 * m * m * n ) / ( dtime_save * 1.0e9 ); else gflops = ( 2.0 * m * n * n ) / ( dtime_save * 1.0e9 ); if ( bli_is_complex( dt_a ) ) gflops *= 4.0; #ifdef BLIS printf( "data_hemm_blis" ); #else printf( "data_hemm_%s", BLAS ); #endif printf( "( %2lu, 1:4 ) = [ %4lu %4lu %10.3e %6.3f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, ( unsigned long )n, dtime_save, gflops ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); } bli_finalize(); return 0; } blis-0.6.1/mpi_test/test_her2k.c000066400000000000000000000127621360743507500165330ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "blis.h" #include // uploa transa m k alpha a lda b ldb beta c ldc //void dsyr2k_( char*, char*, int*, int*, double*, double*, int*, double*, int*, double*, double*, int* ); //#define PRINT int main( int argc, char** argv ) { obj_t a, b, c; obj_t c_save; obj_t alpha, beta; dim_t m, k; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, k_input; num_t dt_a, dt_b, dt_c; num_t dt_alpha, dt_beta; int r, n_repeats; uplo_t uplo; double dtime; double dtime_save; double gflops; bli_init(); n_repeats = 3; if( argc < 7 ) { printf("Usage:\n"); printf("test_foo.x m n k p_begin p_inc p_end:\n"); exit; } int world_size, world_rank, provided; MPI_Init_thread( NULL, NULL, MPI_THREAD_FUNNELED, &provided ); MPI_Comm_size( MPI_COMM_WORLD, &world_size ); MPI_Comm_rank( MPI_COMM_WORLD, &world_rank ); m_input = strtol( argv[1], NULL, 10 ); k_input = strtol( argv[3], NULL, 10 ); p_begin = strtol( argv[4], NULL, 10 ); p_inc = strtol( argv[5], NULL, 10 ); p_end = strtol( argv[6], NULL, 10 ); dt_a = BLIS_DOUBLE; dt_b = BLIS_DOUBLE; dt_c = BLIS_DOUBLE; dt_alpha = BLIS_DOUBLE; dt_beta = BLIS_DOUBLE; uplo = BLIS_LOWER; for ( p = p_begin + world_rank * p_inc; p <= p_end; p += p_inc * world_size ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( k_input < 0 ) k = p * ( dim_t )abs(k_input); else k = ( dim_t ) k_input; bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha ); bli_obj_create( dt_beta, 1, 1, 0, 0, &beta ); bli_obj_create( dt_a, m, k, 0, 0, &a ); bli_obj_create( dt_b, m, k, 0, 0, &b ); bli_obj_create( dt_c, m, m, 0, 0, &c ); bli_obj_create( dt_c, m, m, 0, 0, &c_save ); bli_randm( &a ); bli_randm( &b ); bli_randm( &c ); bli_obj_set_struc( BLIS_HERMITIAN, &c ); bli_obj_set_uplo( uplo, &c ); bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_setsc( (1.0/1.0), 0.0, &beta ); bli_copym( &c, &c_save ); dtime_save = 1.0e9; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "b", &b, "%4.1f", "" ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); bli_her2k( &alpha, &a, &b, &beta, &c ); #else f77_char uploa = 'L'; f77_char transa = 'N'; f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); double* alphap = bli_obj_buffer( &alpha ); double* ap = bli_obj_buffer( &a ); double* bp = bli_obj_buffer( &b ); double* betap = bli_obj_buffer( &beta ); double* cp = bli_obj_buffer( &c ); dsyr2k_( &uploa, &transa, &mm, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); #endif #ifdef PRINT bli_printm( "c after", &c, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } gflops = ( 2.0 * m * k * m ) / ( dtime_save * 1.0e9 ); #ifdef BLIS printf( "data_her2k_blis" ); #else printf( "data_her2k_%s", BLAS ); #endif printf( "( %2lu, 1:4 ) = [ %4lu %4lu %10.3e %6.3f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, ( unsigned long )k, dtime_save, gflops ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); } bli_finalize(); return 0; } blis-0.6.1/mpi_test/test_herk.c000066400000000000000000000122521360743507500164430ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "blis.h" #include // uploa transa m k alpha a lda beta c ldc //void dsyrk_( char*, char*, int*, int*, double*, double*, int*, double*, double*, int* ); //#define PRINT int main( int argc, char** argv ) { obj_t a, c; obj_t c_save; obj_t alpha, beta; dim_t m, k; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, k_input; num_t dt_a, dt_c; num_t dt_alpha, dt_beta; int r, n_repeats; uplo_t uplo; double dtime; double dtime_save; double gflops; bli_init(); n_repeats = 3; if( argc < 7 ) { printf("Usage:\n"); printf("test_foo.x m n k p_begin p_inc p_end:\n"); exit; } int world_size, world_rank, provided; MPI_Init_thread( NULL, NULL, MPI_THREAD_FUNNELED, &provided ); MPI_Comm_size( MPI_COMM_WORLD, &world_size ); MPI_Comm_rank( MPI_COMM_WORLD, &world_rank ); m_input = strtol( argv[1], NULL, 10 ); k_input = strtol( argv[3], NULL, 10 ); p_begin = strtol( argv[4], NULL, 10 ); p_inc = strtol( argv[5], NULL, 10 ); p_end = strtol( argv[6], NULL, 10 ); dt_a = BLIS_DOUBLE; dt_c = BLIS_DOUBLE; dt_alpha = BLIS_DOUBLE; dt_beta = BLIS_DOUBLE; uplo = BLIS_LOWER; for ( p = p_begin + world_rank * p_inc; p <= p_end; p += p_inc * world_size ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( k_input < 0 ) k = p * ( dim_t )abs(k_input); else k = ( dim_t ) k_input; bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha ); bli_obj_create( dt_beta, 1, 1, 0, 0, &beta ); bli_obj_create( dt_a, m, k, 0, 0, &a ); bli_obj_create( dt_c, m, m, 0, 0, &c ); bli_obj_create( dt_c, m, m, 0, 0, &c_save ); bli_randm( &a ); bli_randm( &c ); bli_obj_set_struc( BLIS_HERMITIAN, &c ); bli_obj_set_uplo( uplo, &c ); bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_setsc( (1.0/1.0), 0.0, &beta ); bli_copym( &c, &c_save ); dtime_save = 1.0e9; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); bli_herk( &alpha, &a, &beta, &c ); #else f77_char uploa = 'L'; f77_char transa = 'N'; f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); double* alphap = bli_obj_buffer( &alpha ); double* ap = bli_obj_buffer( &a ); double* betap = bli_obj_buffer( &beta ); double* cp = bli_obj_buffer( &c ); dsyrk_( &uploa, &transa, &mm, &kk, alphap, ap, &lda, betap, cp, &ldc ); #endif #ifdef PRINT bli_printm( "c after", &c, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } gflops = ( 1.0 * m * k * m ) / ( dtime_save * 1.0e9 ); #ifdef BLIS printf( "data_herk_blis" ); #else printf( "data_herk_%s", BLAS ); #endif printf( "( %2lu, 1:4 ) = [ %4lu %4lu %10.3e %6.3f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, ( unsigned long )k, dtime_save, gflops ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &c ); bli_obj_free( &c_save ); } bli_finalize(); return 0; } blis-0.6.1/mpi_test/test_trmm.c000066400000000000000000000144371360743507500165000ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "blis.h" #include // side uplo trans diag m n alpha a lda b ldb //void dtrmm_( char*, char*, char*, char*, int*, int*, double*, double*, int*, double*, int* ); //#define PRINT int main( int argc, char** argv ) { obj_t a, b, c; obj_t c_save; obj_t alpha, beta; dim_t m, n; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, n_input; num_t dt_a, dt_b, dt_c; num_t dt_alpha, dt_beta; int r, n_repeats; side_t side; uplo_t uplo; double dtime; double dtime_save; double gflops; bli_init(); n_repeats = 3; if( argc < 7 ) { printf("Usage:\n"); printf("test_foo.x m n p_begin p_inc p_end:\n"); exit; } int world_size, world_rank, provided; MPI_Init_thread( NULL, NULL, MPI_THREAD_FUNNELED, &provided ); MPI_Comm_size( MPI_COMM_WORLD, &world_size ); MPI_Comm_rank( MPI_COMM_WORLD, &world_rank ); m_input = strtol( argv[1], NULL, 10 ); n_input = strtol( argv[2], NULL, 10 ); p_begin = strtol( argv[4], NULL, 10 ); p_inc = strtol( argv[5], NULL, 10 ); p_end = strtol( argv[6], NULL, 10 ); #if 1 dt_a = BLIS_DOUBLE; dt_b = BLIS_DOUBLE; dt_c = BLIS_DOUBLE; dt_alpha = BLIS_DOUBLE; dt_beta = BLIS_DOUBLE; #else dt_a = dt_b = dt_c = dt_alpha = dt_beta = BLIS_DCOMPLEX; #endif side = BLIS_LEFT; //side = BLIS_RIGHT; uplo = BLIS_LOWER; //uplo = BLIS_UPPER; for ( p = p_begin + world_rank * p_inc; p <= p_end; p += p_inc * world_size ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p * ( dim_t )abs(n_input); else n = ( dim_t ) n_input; bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha ); bli_obj_create( dt_beta, 1, 1, 0, 0, &beta ); if ( bli_is_left( side ) ) bli_obj_create( dt_a, m, m, 0, 0, &a ); else bli_obj_create( dt_a, n, n, 0, 0, &a ); bli_obj_create( dt_b, m, n, 0, 0, &b ); bli_obj_create( dt_c, m, n, 0, 0, &c ); bli_obj_create( dt_c, m, n, 0, 0, &c_save ); bli_obj_set_struc( BLIS_TRIANGULAR, &a ); bli_obj_set_uplo( uplo, &a ); bli_randm( &a ); bli_randm( &c ); bli_randm( &b ); /* bli_obj_toggle_uplo( &a ); bli_obj_inc_diag_offset( -1, &a ); bli_setm( &BLIS_ZERO, &a ); bli_obj_inc_diag_offset( 1, &a ); bli_obj_toggle_uplo( &a ); bli_obj_set_diag( BLIS_NONUNIT_DIAG, &a ); bli_scalm( &BLIS_TWO, &a ); //bli_scalm( &BLIS_TWO, &a ); */ bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_setsc( (1.0/1.0), 0.0, &beta ); bli_copym( &c, &c_save ); dtime_save = 1.0e9; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT /* obj_t ar, ai; bli_obj_alias_to( &a, &ar ); bli_obj_alias_to( &a, &ai ); bli_obj_set_dt( BLIS_DOUBLE, &ar ); ar.rs *= 2; ar.cs *= 2; bli_obj_set_dt( BLIS_DOUBLE, &ai ); ai.rs *= 2; ai.cs *= 2; ai.buffer = ( double* )ai.buffer + 1; bli_printm( "ar", &ar, "%4.1f", "" ); bli_printm( "ai", &ai, "%4.1f", "" ); */ bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); bli_trmm( side, //bli_trmm4m( side, &alpha, &a, &c ); #else f77_char side = 'L'; f77_char uplo = 'L'; f77_char transa = 'N'; f77_char diag = 'N'; f77_int mm = bli_obj_length( &c ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); double* alphap = bli_obj_buffer( &alpha ); double* ap = bli_obj_buffer( &a ); double* cp = bli_obj_buffer( &c ); dtrmm_( &side, &uplo, &transa, &diag, &mm, &nn, alphap, ap, &lda, cp, &ldc ); #endif #ifdef PRINT bli_printm( "c after", &c, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } if ( bli_is_left( side ) ) gflops = ( 1.0 * m * m * n ) / ( dtime_save * 1.0e9 ); else gflops = ( 1.0 * m * n * n ) / ( dtime_save * 1.0e9 ); if ( bli_is_complex( dt_a ) ) gflops *= 4.0; #ifdef BLIS printf( "data_trmm_blis" ); #else printf( "data_trmm_%s", BLAS ); #endif printf( "( %2lu, 1:4 ) = [ %4lu %4lu %10.3e %6.3f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, ( unsigned long )n, dtime_save, gflops ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); } bli_finalize(); return 0; } blis-0.6.1/mpi_test/test_trsm.c000066400000000000000000000163221360743507500165010ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "blis.h" #include // side uplo trans diag m n alpha a lda b ldb //void dtrsm_( char*, char*, char*, char*, int*, int*, double*, double*, int*, double*, int* ); //#define PRINT int main( int argc, char** argv ) { obj_t a, b, c; obj_t c_save; obj_t alpha, beta; dim_t m, n; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, n_input; num_t dt_a, dt_b, dt_c; num_t dt_alpha, dt_beta; int r, n_repeats; side_t side; uplo_t uplo; double dtime; double dtime_save; double gflops; bli_init(); n_repeats = 3; if( argc < 7 ) { printf("Usage:\n"); printf("test_foo.x m n k p_begin p_inc p_end:\n"); exit; } int world_size, world_rank, provided; MPI_Init_thread( NULL, NULL, MPI_THREAD_FUNNELED, &provided ); MPI_Comm_size( MPI_COMM_WORLD, &world_size ); MPI_Comm_rank( MPI_COMM_WORLD, &world_rank ); m_input = strtol( argv[1], NULL, 10 ); n_input = strtol( argv[2], NULL, 10 ); p_begin = strtol( argv[4], NULL, 10 ); p_inc = strtol( argv[5], NULL, 10 ); p_end = strtol( argv[6], NULL, 10 ); #if 1 dt_a = BLIS_DOUBLE; dt_b = BLIS_DOUBLE; dt_c = BLIS_DOUBLE; dt_alpha = BLIS_DOUBLE; dt_beta = BLIS_DOUBLE; #else dt_a = dt_b = dt_c = dt_alpha = dt_beta = BLIS_FLOAT; //dt_a = dt_b = dt_c = dt_alpha = dt_beta = BLIS_SCOMPLEX; #endif side = BLIS_LEFT; //side = BLIS_RIGHT; uplo = BLIS_LOWER; //uplo = BLIS_UPPER; for ( p = p_begin + world_rank * p_inc; p <= p_end; p += p_inc * world_size ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p * ( dim_t )abs(n_input); else n = ( dim_t ) n_input; bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha ); bli_obj_create( dt_beta, 1, 1, 0, 0, &beta ); if ( bli_is_left( side ) ) bli_obj_create( dt_a, m, m, 0, 0, &a ); else bli_obj_create( dt_a, n, n, 0, 0, &a ); bli_obj_create( dt_b, m, n, 0, 0, &b ); bli_obj_create( dt_c, m, n, 0, 0, &c ); bli_obj_create( dt_c, m, n, 0, 0, &c_save ); bli_obj_set_struc( BLIS_TRIANGULAR, &a ); bli_obj_set_uplo( uplo, &a ); //bli_obj_set_diag( BLIS_UNIT_DIAG, &a ); bli_randm( &a ); bli_randm( &c ); bli_randm( &b ); /* { obj_t a2; bli_obj_alias_to( &a, &a2 ); bli_obj_toggle_uplo( &a2 ); bli_obj_inc_diag_offset( 1, &a2 ); bli_setm( &BLIS_ZERO, &a2 ); bli_obj_inc_diag_offset( -2, &a2 ); bli_obj_toggle_uplo( &a2 ); bli_obj_set_diag( BLIS_NONUNIT_DIAG, &a2 ); bli_scalm( &BLIS_TWO, &a2 ); //bli_scalm( &BLIS_TWO, &a ); } */ bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_setsc( (1.0/1.0), 0.0, &beta ); bli_copym( &c, &c_save ); dtime_save = 1.0e9; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT /* obj_t ar, ai; bli_obj_alias_to( &a, &ar ); bli_obj_alias_to( &a, &ai ); bli_obj_set_dt( BLIS_DOUBLE, &ar ); ar.rs *= 2; ar.cs *= 2; bli_obj_set_dt( BLIS_DOUBLE, &ai ); ai.rs *= 2; ai.cs *= 2; ai.buffer = ( double* )ai.buffer + 1; bli_printm( "ar", &ar, "%4.1f", "" ); bli_printm( "ai", &ai, "%4.1f", "" ); */ bli_invertd( &a ); bli_printm( "a", &a, "%4.1f", "" ); bli_invertd( &a ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); bli_trsm( side, //bli_trsm4m( side, //bli_trsm3m( side, &alpha, &a, &c ); #else if ( bli_is_real( dt_a ) ) { f77_char side = 'L'; f77_char uplo = 'L'; f77_char transa = 'N'; f77_char diag = 'N'; f77_int mm = bli_obj_length( &c ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); float * alphap = bli_obj_buffer( &alpha ); float * ap = bli_obj_buffer( &a ); float * cp = bli_obj_buffer( &c ); strsm_( &side, &uplo, &transa, &diag, &mm, &nn, alphap, ap, &lda, cp, &ldc ); } else // if ( bli_is_complex( dt_a ) ) { f77_char side = 'L'; f77_char uplo = 'L'; f77_char transa = 'N'; f77_char diag = 'N'; f77_int mm = bli_obj_length( &c ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); scomplex* alphap = bli_obj_buffer( &alpha ); scomplex* ap = bli_obj_buffer( &a ); scomplex* cp = bli_obj_buffer( &c ); ctrsm_( &side, //ztrsm_( &side, &uplo, &transa, &diag, &mm, &nn, alphap, ap, &lda, cp, &ldc ); } #endif #ifdef PRINT bli_printm( "c after", &c, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } if ( bli_is_left( side ) ) gflops = ( 1.0 * m * m * n ) / ( dtime_save * 1.0e9 ); else gflops = ( 1.0 * m * n * n ) / ( dtime_save * 1.0e9 ); if ( bli_is_complex( dt_a ) ) gflops *= 4.0; #ifdef BLIS printf( "data_trsm_blis" ); #else printf( "data_trsm_%s", BLAS ); #endif printf( "( %2lu, 1:4 ) = [ %4lu %4lu %10.3e %6.3f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, ( unsigned long )n, dtime_save, gflops ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); } bli_finalize(); return 0; } blis-0.6.1/ref_kernels/000077500000000000000000000000001360743507500147605ustar00rootroot00000000000000blis-0.6.1/ref_kernels/1/000077500000000000000000000000001360743507500151205ustar00rootroot00000000000000blis-0.6.1/ref_kernels/1/bli_addv_ref.c000066400000000000000000000054471360743507500176760ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conjx, \ dim_t n, \ ctype* restrict x, inc_t incx, \ ctype* restrict y, inc_t incy, \ cntx_t* restrict cntx \ ) \ { \ if ( bli_zero_dim1( n ) ) return; \ \ ctype* restrict chi1 = x; \ ctype* restrict psi1 = y; \ \ if ( bli_is_conj( conjx ) ) \ { \ if ( incx == 1 && incy == 1 ) \ { \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,addjs)( chi1[i], psi1[i] ); \ } \ } \ else \ { \ for ( dim_t i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,addjs)( *chi1, *psi1 ); \ \ chi1 += incx; \ psi1 += incy; \ } \ } \ } \ else \ { \ if ( incx == 1 && incy == 1 ) \ { \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,adds)( chi1[i], psi1[i] ); \ } \ } \ else \ { \ for ( dim_t i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,adds)( *chi1, *psi1 ); \ \ chi1 += incx; \ psi1 += incy; \ } \ } \ } \ } INSERT_GENTFUNC_BASIC2( addv, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) blis-0.6.1/ref_kernels/1/bli_amaxv_ref.c000066400000000000000000000115371360743507500200710ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // Define BLAS-like interfaces with typed operands. // #undef GENTFUNCR #define GENTFUNCR( ctype, ctype_r, ch, chr, opname, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ dim_t n, \ ctype* restrict x, inc_t incx, \ dim_t* restrict i_max, \ cntx_t* restrict cntx \ ) \ { \ ctype_r* minus_one = PASTEMAC(chr,m1); \ dim_t* zero_i = PASTEMAC(i,0); \ \ ctype_r chi1_r; \ ctype_r chi1_i; \ ctype_r abs_chi1; \ ctype_r abs_chi1_max; \ dim_t i_max_l; \ \ /* If the vector length is zero, return early. This directly emulates the behavior of netlib BLAS's i?amax() routines. */ \ if ( bli_zero_dim1( n ) ) \ { \ PASTEMAC(i,copys)( *zero_i, *i_max ); \ return; \ } \ \ /* Initialize the index of the maximum absolute value to zero. */ \ PASTEMAC(i,copys)( *zero_i, i_max_l ); \ \ /* Initialize the maximum absolute value search candidate with -1, which is guaranteed to be less than all values we will compute. */ \ PASTEMAC(chr,copys)( *minus_one, abs_chi1_max ); \ \ if ( incx == 1 ) \ { \ ctype* chi1 = x; \ \ for ( dim_t i = 0; i < n; ++i ) \ { \ /* Get the real and imaginary components of chi1. */ \ PASTEMAC2(ch,chr,gets)( *chi1, chi1_r, chi1_i ); \ \ /* Replace chi1_r and chi1_i with their absolute values. */ \ PASTEMAC(chr,abval2s)( chi1_r, chi1_r ); \ PASTEMAC(chr,abval2s)( chi1_i, chi1_i ); \ \ /* Add the real and imaginary absolute values together. */ \ PASTEMAC(chr,set0s)( abs_chi1 ); \ PASTEMAC(chr,adds)( chi1_r, abs_chi1 ); \ PASTEMAC(chr,adds)( chi1_i, abs_chi1 ); \ \ /* If the absolute value of the current element exceeds that of the previous largest, save it and its index. If NaN is encountered, then treat it the same as if it were a valid value that was smaller than any previously seen. This behavior mimics that of LAPACK's ?lange(). */ \ if ( abs_chi1_max < abs_chi1 || bli_isnan( abs_chi1 ) ) \ { \ abs_chi1_max = abs_chi1; \ i_max_l = i; \ } \ \ chi1 += 1; \ } \ } \ else \ { \ for ( dim_t i = 0; i < n; ++i ) \ { \ ctype* chi1 = x + (i )*incx; \ \ /* Get the real and imaginary components of chi1. */ \ PASTEMAC2(ch,chr,gets)( *chi1, chi1_r, chi1_i ); \ \ /* Replace chi1_r and chi1_i with their absolute values. */ \ PASTEMAC(chr,abval2s)( chi1_r, chi1_r ); \ PASTEMAC(chr,abval2s)( chi1_i, chi1_i ); \ \ /* Add the real and imaginary absolute values together. */ \ PASTEMAC(chr,set0s)( abs_chi1 ); \ PASTEMAC(chr,adds)( chi1_r, abs_chi1 ); \ PASTEMAC(chr,adds)( chi1_i, abs_chi1 ); \ \ /* If the absolute value of the current element exceeds that of the previous largest, save it and its index. If NaN is encountered, then treat it the same as if it were a valid value that was smaller than any previously seen. This behavior mimics that of LAPACK's ?lange(). */ \ if ( abs_chi1_max < abs_chi1 || bli_isnan( abs_chi1 ) ) \ { \ abs_chi1_max = abs_chi1; \ i_max_l = i; \ } \ } \ } \ \ /* Store the final index to the output variable. */ \ PASTEMAC(i,copys)( i_max_l, *i_max ); \ } INSERT_GENTFUNCR_BASIC2( amaxv, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) blis-0.6.1/ref_kernels/1/bli_axpbyv_ref.c000066400000000000000000000142501360743507500202610ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conjx, \ dim_t n, \ ctype* restrict alpha, \ ctype* restrict x, inc_t incx, \ ctype* restrict beta, \ ctype* restrict y, inc_t incy, \ cntx_t* restrict cntx \ ) \ { \ if ( bli_zero_dim1( n ) ) return; \ \ if ( PASTEMAC(ch,eq0)( *alpha ) ) \ { \ if ( PASTEMAC(ch,eq0)( *beta ) ) \ { \ /* If alpha is zero and beta is zero, set to zero. */ \ \ ctype* zero = PASTEMAC(ch,0); \ \ /* Query the context for the kernel function pointer. */ \ const num_t dt = PASTEMAC(ch,type); \ PASTECH(ch,setv_ker_ft) setv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_SETV_KER, cntx ); \ \ setv_p \ ( \ BLIS_NO_CONJUGATE, \ n, \ zero, \ y, incy, \ cntx \ ); \ return; \ } \ else if ( PASTEMAC(ch,eq1)( *beta ) ) \ { \ /* If alpha is zero and beta is one, return. */ \ return; \ } \ else \ { \ /* If alpha is zero, scale by beta. */ \ \ /* Query the context for the kernel function pointer. */ \ const num_t dt = PASTEMAC(ch,type); \ PASTECH(ch,scalv_ker_ft) scalv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_SCALV_KER, cntx ); \ \ scalv_p \ ( \ BLIS_NO_CONJUGATE, \ n, \ beta, \ y, incy, \ cntx \ ); \ return; \ } \ \ } \ else if ( PASTEMAC(ch,eq1)( *alpha ) ) \ { \ if ( PASTEMAC(ch,eq0)( *beta ) ) \ { \ /* If alpha is one and beta is zero, use copyv. */ \ \ /* Query the context for the kernel function pointer. */ \ const num_t dt = PASTEMAC(ch,type); \ PASTECH(ch,copyv_ker_ft) copyv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_COPYV_KER, cntx ); \ \ copyv_p \ ( \ conjx, \ n, \ x, incx, \ y, incy, \ cntx \ ); \ return; \ } \ else if ( PASTEMAC(ch,eq1)( *beta ) ) \ { \ /* If alpha is one and beta is one, use addv. */ \ \ /* Query the context for the kernel function pointer. */ \ const num_t dt = PASTEMAC(ch,type); \ PASTECH(ch,addv_ker_ft) addv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_ADDV_KER, cntx ); \ \ addv_p \ ( \ conjx, \ n, \ x, incx, \ y, incy, \ cntx \ ); \ return; \ } \ else \ { \ /* If alpha is one and beta is something else, use xpbyv. */ \ \ /* Query the context for the kernel function pointer. */ \ const num_t dt = PASTEMAC(ch,type); \ PASTECH(ch,xpbyv_ker_ft) xpbyv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_XPBYV_KER, cntx ); \ \ xpbyv_p \ ( \ conjx, \ n, \ x, incx, \ beta, \ y, incy, \ cntx \ ); \ return; \ } \ } \ else \ { \ if ( PASTEMAC(ch,eq0)( *beta ) ) \ { \ /* If alpha is something else and beta is zero, use scal2v. */ \ \ /* Query the context for the kernel function pointer. */ \ const num_t dt = PASTEMAC(ch,type); \ PASTECH(ch,scal2v_ker_ft) scal2v_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_SCAL2V_KER, cntx ); \ \ scal2v_p \ ( \ conjx, \ n, \ alpha, \ x, incx, \ y, incy, \ cntx \ ); \ return; \ } \ else if ( PASTEMAC(ch,eq1)( *beta ) ) \ { \ /* If alpha is something else and beta is one, use axpyv. */ \ \ /* Query the context for the kernel function pointer. */ \ const num_t dt = PASTEMAC(ch,type); \ PASTECH(ch,axpyv_ker_ft) axpyv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_AXPYV_KER, cntx ); \ \ axpyv_p \ ( \ conjx, \ n, \ alpha, \ x, incx, \ y, incy, \ cntx \ ); \ return; \ } \ } \ \ /* If execution reaches here, alpha and beta are both non-zero/non-unit. */ \ \ if ( bli_is_conj( conjx ) ) \ { \ if ( incx == 1 && incy == 1 ) \ { \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,axpbyjs)( *alpha, x[i], *beta, y[i] ); \ } \ } \ else \ { \ for ( dim_t i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,axpbyjs)( *alpha, *x, *beta, *y ); \ \ x += incx; \ y += incy; \ } \ } \ } \ else \ { \ if ( incx == 1 && incy == 1 ) \ { \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,axpbys)( *alpha, x[i], *beta, y[i] ); \ } \ } \ else \ { \ for ( dim_t i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,axpbys)( *alpha, *x, *beta, *y ); \ \ x += incx; \ y += incy; \ } \ } \ } \ } INSERT_GENTFUNC_BASIC2( axpbyv, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) blis-0.6.1/ref_kernels/1/bli_axpyv_ref.c000066400000000000000000000122211360743507500201130ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #if 0 #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conjx, \ dim_t n, \ ctype* restrict alpha, \ ctype* restrict x, inc_t incx, \ ctype* restrict y, inc_t incy, \ cntx_t* restrict cntx \ ) \ { \ if ( bli_zero_dim1( n ) ) return; \ \ /* If alpha is zero, return. */ \ if ( PASTEMAC(ch,eq0)( *alpha ) ) return; \ \ /* If alpha is one, use addv. */ \ if ( PASTEMAC(ch,eq1)( *alpha ) ) \ { \ /* Query the context for the kernel function pointer. */ \ const num_t dt = PASTEMAC(ch,type); \ PASTECH(ch,addv_ker_ft) addv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_ADDV_KER, cntx ); \ \ addv_p \ ( \ conjx, \ n, \ x, incx, \ y, incy, \ cntx \ ); \ return; \ } \ \ ctype* restrict chi1 = x; \ ctype* restrict psi1 = y; \ \ if ( bli_is_conj( conjx ) ) \ { \ if ( incx == 1 && incy == 1 ) \ { \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ /*PASTEMAC(ch,axpyjs)( *alpha, chi1[i], psi1[i] );*/ \ psi1[i] = fma( *alpha, chi1[i], psi1[i] ); \ } \ } \ else \ { \ for ( dim_t i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,axpyjs)( *alpha, *chi1, *psi1 ); \ \ chi1 += incx; \ psi1 += incy; \ } \ } \ } \ else \ { \ if ( incx == 1 && incy == 1 ) \ { \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ /*PASTEMAC(ch,axpys)( *alpha, chi1[i], psi1[i] );*/ \ psi1[i] = fma( *alpha, chi1[i], psi1[i] ); \ } \ } \ else \ { \ for ( dim_t i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,axpys)( *alpha, *chi1, *psi1 ); \ \ chi1 += incx; \ psi1 += incy; \ } \ } \ } \ } //INSERT_GENTFUNC_BASIC2( axpyv, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) GENTFUNC( float, s, axpyv, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) GENTFUNC( double, d, axpyv, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #endif #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conjx, \ dim_t n, \ ctype* restrict alpha, \ ctype* restrict x, inc_t incx, \ ctype* restrict y, inc_t incy, \ cntx_t* restrict cntx \ ) \ { \ if ( bli_zero_dim1( n ) ) return; \ \ /* If alpha is zero, return. */ \ if ( PASTEMAC(ch,eq0)( *alpha ) ) return; \ \ /* If alpha is one, use addv. */ \ if ( PASTEMAC(ch,eq1)( *alpha ) ) \ { \ /* Query the context for the kernel function pointer. */ \ const num_t dt = PASTEMAC(ch,type); \ PASTECH(ch,addv_ker_ft) addv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_ADDV_KER, cntx ); \ \ addv_p \ ( \ conjx, \ n, \ x, incx, \ y, incy, \ cntx \ ); \ return; \ } \ \ if ( bli_is_conj( conjx ) ) \ { \ if ( incx == 1 && incy == 1 ) \ { \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,axpyjs)( *alpha, x[i], y[i] ); \ } \ } \ else \ { \ for ( dim_t i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,axpyjs)( *alpha, *x, *y ); \ \ x += incx; \ y += incy; \ } \ } \ } \ else \ { \ if ( incx == 1 && incy == 1 ) \ { \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,axpys)( *alpha, x[i], y[i] ); \ } \ } \ else \ { \ for ( dim_t i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,axpys)( *alpha, *x, *y ); \ \ x += incx; \ y += incy; \ } \ } \ } \ } INSERT_GENTFUNC_BASIC2( axpyv, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) blis-0.6.1/ref_kernels/1/bli_copyv_ref.c000066400000000000000000000053141360743507500201110ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conjx, \ dim_t n, \ ctype* restrict x, inc_t incx, \ ctype* restrict y, inc_t incy, \ cntx_t* restrict cntx \ ) \ { \ if ( bli_zero_dim1( n ) ) return; \ \ if ( bli_is_conj( conjx ) ) \ { \ if ( incx == 1 && incy == 1 ) \ { \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,copyjs)( x[i], y[i] ); \ } \ } \ else \ { \ for ( dim_t i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,copyjs)( *x, *y ); \ \ x += incx; \ y += incy; \ } \ } \ } \ else \ { \ if ( incx == 1 && incy == 1 ) \ { \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,copys)( x[i], y[i] ); \ } \ } \ else \ { \ for ( dim_t i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,copys)( *x, *y ); \ \ x += incx; \ y += incy; \ } \ } \ } \ } INSERT_GENTFUNC_BASIC2( copyv, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) blis-0.6.1/ref_kernels/1/bli_dotv_ref.c000066400000000000000000000063651360743507500177340ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conjx, \ conj_t conjy, \ dim_t n, \ ctype* restrict x, inc_t incx, \ ctype* restrict y, inc_t incy, \ ctype* restrict rho, \ cntx_t* restrict cntx \ ) \ { \ ctype dotxy; \ \ if ( bli_zero_dim1( n ) ) \ { \ PASTEMAC(ch,set0s)( *rho ); \ return; \ } \ \ PASTEMAC(ch,set0s)( dotxy ); \ \ conj_t conjx_use = conjx; \ \ /* If y must be conjugated, we do so indirectly by first toggling the effective conjugation of x and then conjugating the resulting dot product. */ \ if ( bli_is_conj( conjy ) ) \ bli_toggle_conj( &conjx_use ); \ \ if ( bli_is_conj( conjx_use ) ) \ { \ if ( incx == 1 && incy == 1 ) \ { \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,dotjs)( x[i], y[i], dotxy ); \ } \ } \ else \ { \ for ( dim_t i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,dotjs)( *x, *y, dotxy ); \ \ x += incx; \ y += incy; \ } \ } \ } \ else \ { \ if ( incx == 1 && incy == 1 ) \ { \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,dots)( x[i], y[i], dotxy ); \ } \ } \ else \ { \ for ( dim_t i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,dots)( *x, *y, dotxy ); \ \ x += incx; \ y += incy; \ } \ } \ } \ \ if ( bli_is_conj( conjy ) ) \ PASTEMAC(ch,conjs)( dotxy ); \ \ PASTEMAC(ch,copys)( dotxy, *rho ); \ } INSERT_GENTFUNC_BASIC2( dotv, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) blis-0.6.1/ref_kernels/1/bli_dotxv_ref.c000066400000000000000000000070741360743507500201220ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conjx, \ conj_t conjy, \ dim_t n, \ ctype* restrict alpha, \ ctype* restrict x, inc_t incx, \ ctype* restrict y, inc_t incy, \ ctype* restrict beta, \ ctype* restrict rho, \ cntx_t* restrict cntx \ ) \ { \ ctype dotxy; \ \ /* If beta is zero, clear rho. Otherwise, scale by beta. */ \ if ( PASTEMAC(ch,eq0)( *beta ) ) \ { \ PASTEMAC(ch,set0s)( *rho ); \ } \ else \ { \ PASTEMAC(ch,scals)( *beta, *rho ); \ } \ \ /* If the vectors are empty or if alpha is zero, return early. */ \ if ( bli_zero_dim1( n ) || PASTEMAC(ch,eq0)( *alpha ) ) return; \ \ PASTEMAC(ch,set0s)( dotxy ); \ \ /* If y must be conjugated, we do so indirectly by first toggling the effective conjugation of x and then conjugating the resulting dot product. */ \ conj_t conjx_use = conjx; \ \ if ( bli_is_conj( conjy ) ) \ bli_toggle_conj( &conjx_use ); \ \ if ( bli_is_conj( conjx_use ) ) \ { \ if ( incx == 1 && incy == 1 ) \ { \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,dotjs)( x[i], y[i], dotxy ); \ } \ } \ else \ { \ for ( dim_t i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,dotjs)( *x, *y, dotxy ); \ \ x += incx; \ y += incy; \ } \ } \ } \ else \ { \ if ( incx == 1 && incy == 1 ) \ { \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,dots)( x[i], y[i], dotxy ); \ } \ } \ else \ { \ for ( dim_t i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,dots)( *x, *y, dotxy ); \ \ x += incx; \ y += incy; \ } \ } \ } \ \ if ( bli_is_conj( conjy ) ) \ PASTEMAC(ch,conjs)( dotxy ); \ \ PASTEMAC(ch,axpys)( *alpha, dotxy, *rho ); \ } INSERT_GENTFUNC_BASIC2( dotxv, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) blis-0.6.1/ref_kernels/1/bli_invertv_ref.c000066400000000000000000000043511360743507500204460ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ dim_t n, \ ctype* restrict x, inc_t incx, \ cntx_t* restrict cntx \ ) \ { \ if ( bli_zero_dim1( n ) ) return; \ \ if ( incx == 1 ) \ { \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,inverts)( x[i] ); \ } \ } \ else \ { \ for ( dim_t i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,inverts)( *x ); \ \ x += incx; \ } \ } \ } INSERT_GENTFUNC_BASIC2( invertv, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) blis-0.6.1/ref_kernels/1/bli_scal2v_ref.c000066400000000000000000000071561360743507500201510ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conjx, \ dim_t n, \ ctype* restrict alpha, \ ctype* restrict x, inc_t incx, \ ctype* restrict y, inc_t incy, \ cntx_t* restrict cntx \ ) \ { \ if ( bli_zero_dim1( n ) ) return; \ \ if ( PASTEMAC(ch,eq0)( *alpha ) ) \ { \ /* If alpha is zero, use setv. */ \ \ ctype* zero = PASTEMAC(ch,0); \ \ /* Query the context for the kernel function pointer. */ \ const num_t dt = PASTEMAC(ch,type); \ PASTECH(ch,setv_ker_ft) setv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_SETV_KER, cntx ); \ \ setv_p \ ( \ BLIS_NO_CONJUGATE, \ n, \ zero, \ y, incy, \ cntx \ ); \ return; \ } \ else if ( PASTEMAC(ch,eq0)( *alpha ) ) \ { \ /* If alpha is one, use copyv. */ \ \ /* Query the context for the kernel function pointer. */ \ const num_t dt = PASTEMAC(ch,type); \ PASTECH(ch,copyv_ker_ft) copyv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_COPYV_KER, cntx ); \ \ copyv_p \ ( \ BLIS_NO_CONJUGATE, \ n, \ x, incx, \ y, incy, \ cntx \ ); \ return; \ } \ \ if ( bli_is_conj( conjx ) ) \ { \ if ( incx == 1 && incy == 1 ) \ { \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,scal2js)( *alpha, x[i], y[i] ); \ } \ } \ else \ { \ for ( dim_t i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,scal2js)( *alpha, *x, *y ); \ \ x += incx; \ y += incy; \ } \ } \ } \ else \ { \ if ( incx == 1 && incy == 1 ) \ { \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,scal2s)( *alpha, x[i], y[i] ); \ } \ } \ else \ { \ for ( dim_t i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,scal2s)( *alpha, *x, *y ); \ \ x += incx; \ y += incy; \ } \ } \ } \ } INSERT_GENTFUNC_BASIC2( scal2v, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) blis-0.6.1/ref_kernels/1/bli_scalv_ref.c000066400000000000000000000056341360743507500200660ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conjalpha, \ dim_t n, \ ctype* restrict alpha, \ ctype* restrict x, inc_t incx, \ cntx_t* restrict cntx \ ) \ { \ if ( bli_zero_dim1( n ) ) return; \ \ /* If alpha is one, return. */ \ if ( PASTEMAC(ch,eq1)( *alpha ) ) return; \ \ /* If alpha is zero, use setv. */ \ if ( PASTEMAC(ch,eq0)( *alpha ) ) \ { \ ctype* zero = PASTEMAC(ch,0); \ \ /* Query the context for the kernel function pointer. */ \ const num_t dt = PASTEMAC(ch,type); \ PASTECH(ch,setv_ker_ft) setv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_SETV_KER, cntx ); \ \ setv_p \ ( \ BLIS_NO_CONJUGATE, \ n, \ zero, \ x, incx, \ cntx \ ); \ return; \ } \ \ ctype alpha_conj; \ \ PASTEMAC(ch,copycjs)( conjalpha, *alpha, alpha_conj ); \ \ if ( incx == 1 ) \ { \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,scals)( alpha_conj, x[i] ); \ } \ } \ else \ { \ for ( dim_t i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,scals)( alpha_conj, *x ); \ \ x += incx; \ } \ } \ } INSERT_GENTFUNC_BASIC2( scalv, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) blis-0.6.1/ref_kernels/1/bli_setv_ref.c000066400000000000000000000053501360743507500177320ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conjalpha, \ dim_t n, \ ctype* restrict alpha, \ ctype* restrict x, inc_t incx, \ cntx_t* restrict cntx \ ) \ { \ if ( bli_zero_dim1( n ) ) return; \ \ if ( PASTEMAC(ch,eq0)( *alpha ) ) \ { \ if ( incx == 1 ) \ { \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,set0s)( x[i] ); \ } \ } \ else \ { \ for ( dim_t i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,set0s)( *x ); \ \ x += incx; \ } \ } \ } \ else \ { \ ctype alpha_conj; \ \ PASTEMAC(ch,copycjs)( conjalpha, *alpha, alpha_conj ); \ \ if ( incx == 1 ) \ { \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,copys)( alpha_conj, x[i] ); \ } \ } \ else \ { \ for ( dim_t i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,copys)( alpha_conj, *x ); \ \ x += incx; \ } \ } \ } \ } INSERT_GENTFUNC_BASIC2( setv, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) blis-0.6.1/ref_kernels/1/bli_subv_ref.c000066400000000000000000000053071360743507500177320ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conjx, \ dim_t n, \ ctype* restrict x, inc_t incx, \ ctype* restrict y, inc_t incy, \ cntx_t* restrict cntx \ ) \ { \ if ( bli_zero_dim1( n ) ) return; \ \ if ( bli_is_conj( conjx ) ) \ { \ if ( incx == 1 && incy == 1 ) \ { \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,subjs)( x[i], y[i] ); \ } \ } \ else \ { \ for ( dim_t i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,subjs)( *x, *y ); \ \ x += incx; \ y += incy; \ } \ } \ } \ else \ { \ if ( incx == 1 && incy == 1 ) \ { \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,subs)( x[i], y[i] ); \ } \ } \ else \ { \ for ( dim_t i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,subs)( *x, *y ); \ \ x += incx; \ y += incy; \ } \ } \ } \ } INSERT_GENTFUNC_BASIC2( subv, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) blis-0.6.1/ref_kernels/1/bli_swapv_ref.c000066400000000000000000000044631360743507500201150ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ dim_t n, \ ctype* restrict x, inc_t incx, \ ctype* restrict y, inc_t incy, \ cntx_t* restrict cntx \ ) \ { \ if ( bli_zero_dim1( n ) ) return; \ \ if ( incx == 1 && incy == 1 ) \ { \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,swaps)( x[i], y[i] ); \ } \ } \ else \ { \ for ( dim_t i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,swaps)( *x, *y ); \ \ x += incx; \ y += incy; \ } \ } \ } INSERT_GENTFUNC_BASIC2( swapv, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) blis-0.6.1/ref_kernels/1/bli_xpbyv_ref.c000066400000000000000000000070421360743507500201210ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conjx, \ dim_t n, \ ctype* restrict x, inc_t incx, \ ctype* restrict beta, \ ctype* restrict y, inc_t incy, \ cntx_t* restrict cntx \ ) \ { \ if ( bli_zero_dim1( n ) ) return; \ \ /* If beta is zero, use copyv. */ \ if ( PASTEMAC(ch,eq0)( *beta ) ) \ { \ /* Query the context for the kernel function pointer. */ \ const num_t dt = PASTEMAC(ch,type); \ PASTECH(ch,copyv_ker_ft) copyv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_COPYV_KER, cntx ); \ \ copyv_p \ ( \ conjx, \ n, \ x, incx, \ y, incy, \ cntx \ ); \ return; \ } \ /* If alpha is one, use addv. */ \ else if ( PASTEMAC(ch,eq1)( *beta ) ) \ { \ /* Query the context for the kernel function pointer. */ \ const num_t dt = PASTEMAC(ch,type); \ PASTECH(ch,addv_ker_ft) addv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_ADDV_KER, cntx ); \ \ addv_p \ ( \ conjx, \ n, \ x, incx, \ y, incy, \ cntx \ ); \ return; \ } \ \ if ( bli_is_conj( conjx ) ) \ { \ if ( incx == 1 && incy == 1 ) \ { \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,xpbyjs)( x[i], *beta, y[i] ); \ } \ } \ else \ { \ for ( dim_t i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,xpbyjs)( *x, *beta, *y ); \ \ x += incx; \ y += incy; \ } \ } \ } \ else \ { \ if ( incx == 1 && incy == 1 ) \ { \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,xpbys)( x[i], *beta, y[i] ); \ } \ } \ else \ { \ for ( dim_t i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,xpbys)( *x, *beta, *y ); \ \ x += incx; \ y += incy; \ } \ } \ } \ } INSERT_GENTFUNC_BASIC2( xpbyv, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) blis-0.6.1/ref_kernels/1f/000077500000000000000000000000001360743507500152665ustar00rootroot00000000000000blis-0.6.1/ref_kernels/1f/bli_axpy2v_ref.c000066400000000000000000000075511360743507500203550ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conjx, \ conj_t conjy, \ dim_t n, \ ctype* restrict alphax, \ ctype* restrict alphay, \ ctype* restrict x, inc_t incx, \ ctype* restrict y, inc_t incy, \ ctype* restrict z, inc_t incz, \ cntx_t* restrict cntx \ ) \ { \ if ( bli_zero_dim1( n ) ) return; \ \ if ( incz == 1 && incx == 1 && incy == 1 ) \ { \ ctype chic, psic; \ \ if ( bli_is_noconj( conjx ) ) \ { \ if ( bli_is_noconj( conjy ) ) \ { \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,axpys)( *alphax, x[i], z[i] ); \ PASTEMAC(ch,axpys)( *alphay, y[i], z[i] ); \ } \ } \ else /* if ( bli_is_conj( conjy ) ) */ \ { \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,axpys)( *alphax, x[i], z[i] ); \ PASTEMAC(ch,copyjs)( y[i], psic ); \ PASTEMAC(ch,axpys)( *alphay, psic, z[i] ); \ } \ } \ } \ else /* if ( bli_is_conj( conjx ) ) */ \ { \ if ( bli_is_noconj( conjy ) ) \ { \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,copyjs)( x[i], chic ); \ PASTEMAC(ch,axpys)( *alphax, chic, z[i] ); \ PASTEMAC(ch,axpys)( *alphay, y[i], z[i] ); \ } \ } \ else /* if ( bli_is_conj( conjy ) ) */ \ { \ PRAGMA_SIMD \ for ( dim_t i = 0; i < n; ++i ) \ { \ PASTEMAC(ch,copyjs)( x[i], chic ); \ PASTEMAC(ch,axpys)( *alphax, chic, z[i] ); \ PASTEMAC(ch,copyjs)( y[i], psic ); \ PASTEMAC(ch,axpys)( *alphay, psic, z[i] ); \ } \ } \ } \ } \ else \ { \ /* Query the context for the kernel function pointer. */ \ const num_t dt = PASTEMAC(ch,type); \ PASTECH(ch,axpyv_ker_ft) kfp_av \ = \ bli_cntx_get_l1v_ker_dt( dt, BLIS_AXPYV_KER, cntx ); \ \ kfp_av \ ( \ conjx, \ n, \ alphax, \ x, incx, \ z, incz, \ cntx \ ); \ \ kfp_av \ ( \ conjy, \ n, \ alphay, \ y, incy, \ z, incz, \ cntx \ ); \ } \ } INSERT_GENTFUNC_BASIC2( axpy2v, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) blis-0.6.1/ref_kernels/1f/bli_axpyf_ref.c000066400000000000000000000077701360743507500202560ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, arch, suf, ff ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conja, \ conj_t conjx, \ dim_t m, \ dim_t b_n, \ ctype* restrict alpha, \ ctype* restrict a, inc_t inca, inc_t lda, \ ctype* restrict x, inc_t incx, \ ctype* restrict y, inc_t incy, \ cntx_t* restrict cntx \ ) \ { \ if ( bli_zero_dim1( m ) ) return; \ \ if ( inca == 1 && incx == 1 && incy == 1 && b_n == ff ) \ { \ ctype ax[ ff ]; \ \ /* Scale x by alpha, storing to a temporary array ax. */ \ if ( bli_is_conj( conjx ) ) \ { \ PRAGMA_SIMD \ for ( dim_t j = 0; j < ff; ++j ) \ PASTEMAC(ch,scal2js)( *alpha, x[j], ax[j] ); \ } \ else \ { \ PRAGMA_SIMD \ for ( dim_t j = 0; j < ff; ++j ) \ PASTEMAC(ch,scal2s)( *alpha, x[j], ax[j] ); \ } \ \ /* Accumulate ff separate axpyv's into y. */ \ if ( bli_is_noconj( conja ) ) \ { \ PRAGMA_SIMD \ for ( dim_t i = 0; i < m; ++i ) \ for ( dim_t j = 0; j < ff; ++j ) \ { \ PASTEMAC(ch,axpys)( ax[j], a[i + j*lda], y[i] ); \ } \ } \ else \ { \ PRAGMA_SIMD \ for ( dim_t i = 0; i < m; ++i ) \ for ( dim_t j = 0; j < ff; ++j ) \ { \ PASTEMAC(ch,axpyjs)( ax[j], a[i + j*lda], y[i] ); \ } \ } \ } \ else \ { \ /* Query the context for the kernel function pointer. */ \ const num_t dt = PASTEMAC(ch,type); \ PASTECH(ch,axpyv_ker_ft) kfp_av \ = \ bli_cntx_get_l1v_ker_dt( dt, BLIS_AXPYV_KER, cntx ); \ \ for ( dim_t i = 0; i < b_n; ++i ) \ { \ ctype* restrict a1 = a + (0 )*inca + (i )*lda; \ ctype* restrict chi1 = x + (i )*incx; \ ctype* restrict y1 = y + (0 )*incy; \ \ ctype alpha_chi1; \ \ PASTEMAC(ch,copycjs)( conjx, *chi1, alpha_chi1 ); \ PASTEMAC(ch,scals)( *alpha, alpha_chi1 ); \ \ kfp_av \ ( \ conja, \ m, \ &alpha_chi1, \ a1, inca, \ y1, incy, \ cntx \ ); \ } \ } \ } //INSERT_GENTFUNC_BASIC2( axpyf, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) GENTFUNC( float, s, axpyf, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX, 8 ) GENTFUNC( double, d, axpyf, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX, 8 ) GENTFUNC( scomplex, c, axpyf, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX, 8 ) GENTFUNC( dcomplex, z, axpyf, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX, 8 ) blis-0.6.1/ref_kernels/1f/bli_dotaxpyv_ref.c000066400000000000000000000105261360743507500207760ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conjxt, \ conj_t conjx, \ conj_t conjy, \ dim_t m, \ ctype* restrict alpha, \ ctype* restrict x, inc_t incx, \ ctype* restrict y, inc_t incy, \ ctype* restrict rho, \ ctype* restrict z, inc_t incz, \ cntx_t* restrict cntx \ ) \ { \ if ( bli_zero_dim1( m ) ) return; \ \ if ( incz == 1 && incx == 1 && incy == 1 ) \ { \ if ( bli_is_noconj( conjx ) ) \ { \ conj_t conjxt_use = conjxt; \ ctype dotxy; \ \ PASTEMAC(ch,set0s)( dotxy ); \ \ if ( bli_is_conj( conjy ) ) \ bli_toggle_conj( &conjxt_use ); \ \ if ( bli_is_noconj( conjxt_use ) ) \ { \ PRAGMA_SIMD \ for ( dim_t i = 0; i < m; ++i ) \ { \ PASTEMAC(ch,dots)( x[i], y[i], dotxy ); \ PASTEMAC(ch,axpys)( *alpha, x[i], z[i] ); \ } \ } \ else /* bli_is_conj( conjxt_use ) ) */ \ { \ PRAGMA_SIMD \ for ( dim_t i = 0; i < m; ++i ) \ { \ PASTEMAC(ch,dotjs)( x[i], y[i], dotxy ); \ PASTEMAC(ch,axpys)( *alpha, x[i], z[i] ); \ } \ } \ \ if ( bli_is_conj( conjy ) ) \ PASTEMAC(ch,conjs)( dotxy ); \ \ PASTEMAC(ch,copys)( dotxy, *rho ); \ } \ else /* bli_is_conj( conjx ) ) */ \ { \ conj_t conjxt_use = conjxt; \ ctype dotxy; \ \ PASTEMAC(ch,set0s)( dotxy ); \ \ if ( bli_is_conj( conjy ) ) \ bli_toggle_conj( &conjxt_use ); \ \ if ( bli_is_noconj( conjxt_use ) ) \ { \ PRAGMA_SIMD \ for ( dim_t i = 0; i < m; ++i ) \ { \ PASTEMAC(ch,dots)( x[i], y[i], dotxy ); \ PASTEMAC(ch,axpyjs)( *alpha, x[i], z[i] ); \ } \ } \ else /* bli_is_conj( conjxt_use ) ) */ \ { \ PRAGMA_SIMD \ for ( dim_t i = 0; i < m; ++i ) \ { \ PASTEMAC(ch,dotjs)( x[i], y[i], dotxy ); \ PASTEMAC(ch,axpyjs)( *alpha, x[i], z[i] ); \ } \ } \ \ if ( bli_is_conj( conjy ) ) \ PASTEMAC(ch,conjs)( dotxy ); \ \ PASTEMAC(ch,copys)( dotxy, *rho ); \ } \ } \ else \ { \ \ /* Query the context for the kernel function pointer. */ \ const num_t dt = PASTEMAC(ch,type); \ PASTECH(ch,dotv_ker_ft) kfp_dv \ = \ bli_cntx_get_l1v_ker_dt( dt, BLIS_DOTV_KER, cntx ); \ PASTECH(ch,axpyv_ker_ft) kfp_av \ = \ bli_cntx_get_l1v_ker_dt( dt, BLIS_AXPYV_KER, cntx ); \ \ kfp_dv \ ( \ conjxt, \ conjy, \ m, \ x, incx, \ y, incy, \ rho, \ cntx \ ); \ \ kfp_av \ ( \ conjx, \ m, \ alpha, \ x, incx, \ z, incz, \ cntx \ ); \ } \ } INSERT_GENTFUNC_BASIC2( dotaxpyv, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) blis-0.6.1/ref_kernels/1f/bli_dotxaxpyf_ref.c000066400000000000000000000136261360743507500211520ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, arch, suf, ff ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conjat, \ conj_t conja, \ conj_t conjw, \ conj_t conjx, \ dim_t m, \ dim_t b_n, \ ctype* restrict alpha, \ ctype* restrict a, inc_t inca, inc_t lda, \ ctype* restrict w, inc_t incw, \ ctype* restrict x, inc_t incx, \ ctype* restrict beta, \ ctype* restrict y, inc_t incy, \ ctype* restrict z, inc_t incz, \ cntx_t* restrict cntx \ ) \ { \ /* A is m x n. */ \ /* y = beta * y + alpha * A^T w; */ \ /* z = z + alpha * A x; */ \ \ if ( 1 && inca == 1 && incw == 1 && incx == 1 && \ incy == 1 && incz == 1 && b_n == ff ) \ { \ ctype r[ ff ]; \ ctype ax[ ff ]; \ \ /* If beta is zero, clear y. Otherwise, scale by beta. */ \ if ( PASTEMAC(ch,eq0)( *beta ) ) \ { \ for ( dim_t i = 0; i < ff; ++i ) PASTEMAC(ch,set0s)( y[i] ); \ } \ else \ { \ for ( dim_t i = 0; i < ff; ++i ) PASTEMAC(ch,scals)( *beta, y[i] ); \ } \ \ /* If the vectors are empty or if alpha is zero, return early. */ \ if ( bli_zero_dim1( m ) || PASTEMAC(ch,eq0)( *alpha ) ) return; \ \ /* Initialize r vector to 0. */ \ for ( dim_t i = 0; i < ff; ++i ) PASTEMAC(ch,set0s)( r[i] ); \ \ /* Scale x by alpha, storing to a temporary array ax. */ \ if ( bli_is_conj( conjx ) ) \ { \ PRAGMA_SIMD \ for ( dim_t i = 0; i < ff; ++i ) \ PASTEMAC(ch,scal2js)( *alpha, x[i], ax[i] ); \ } \ else \ { \ PRAGMA_SIMD \ for ( dim_t i = 0; i < ff; ++i ) \ PASTEMAC(ch,scal2s)( *alpha, x[i], ax[i] ); \ } \ \ /* If a must be conjugated, we do so indirectly by first toggling the effective conjugation of w and then conjugating the resulting dot products. */ \ conj_t conjw_use = conjw; \ \ if ( bli_is_conj( conjat ) ) \ bli_toggle_conj( &conjw_use ); \ \ if ( bli_is_noconj( conjw_use ) ) \ { \ if ( bli_is_noconj( conja ) ) \ { \ PRAGMA_SIMD \ for ( dim_t p = 0; p < m; ++p ) \ for ( dim_t i = 0; i < ff; ++i ) \ { \ PASTEMAC(ch,axpys)( a[p + i*lda], w[p], r[i] ); \ PASTEMAC(ch,axpys)( ax[i], a[p + i*lda], z[p] ); \ } \ } \ else \ { \ PRAGMA_SIMD \ for ( dim_t p = 0; p < m; ++p ) \ for ( dim_t i = 0; i < ff; ++i ) \ { \ PASTEMAC(ch,axpys)( a[p + i*lda], w[p], r[i] ); \ PASTEMAC(ch,axpyjs)( ax[i], a[p + i*lda], z[p] ); \ } \ } \ } \ else \ { \ if ( bli_is_noconj( conja ) ) \ { \ PRAGMA_SIMD \ for ( dim_t p = 0; p < m; ++p ) \ for ( dim_t i = 0; i < ff; ++i ) \ { \ PASTEMAC(ch,axpyjs)( a[p + i*lda], w[p], r[i] ); \ PASTEMAC(ch,axpys)( ax[i], a[p + i*lda], z[p] ); \ } \ } \ else \ { \ PRAGMA_SIMD \ for ( dim_t p = 0; p < m; ++p ) \ for ( dim_t i = 0; i < ff; ++i ) \ { \ PASTEMAC(ch,axpyjs)( a[p + i*lda], w[p], r[i] ); \ PASTEMAC(ch,axpyjs)( ax[i], a[p + i*lda], z[p] ); \ } \ } \ } \ \ if ( bli_is_conj( conjat ) ) \ for ( dim_t i = 0; i < ff; ++i ) PASTEMAC(ch,conjs)( r[i] ); \ \ for ( dim_t i = 0; i < ff; ++i ) \ { \ PASTEMAC(ch,axpys)( *alpha, r[i], y[i] ); \ } \ } \ else \ { \ /* Query the context for the kernel function pointer. */ \ const num_t dt = PASTEMAC(ch,type); \ PASTECH(ch,dotxf_ker_ft) kfp_df \ = \ bli_cntx_get_l1f_ker_dt( dt, BLIS_DOTXF_KER, cntx ); \ PASTECH(ch,axpyf_ker_ft) kfp_af \ = \ bli_cntx_get_l1f_ker_dt( dt, BLIS_AXPYF_KER, cntx ); \ \ kfp_df \ ( \ conjat, \ conjw, \ m, \ b_n, \ alpha, \ a, inca, lda, \ w, incw, \ beta, \ y, incy, \ cntx \ ); \ \ kfp_af \ ( \ conja, \ conjx, \ m, \ b_n, \ alpha, \ a, inca, lda, \ x, incx, \ z, incz, \ cntx \ ); \ } \ } //INSERT_GENTFUNC_BASIC2( dotxaxpyf, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) GENTFUNC( float, s, dotxaxpyf, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX, 4 ) GENTFUNC( double, d, dotxaxpyf, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX, 4 ) GENTFUNC( scomplex, c, dotxaxpyf, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX, 4 ) GENTFUNC( dcomplex, z, dotxaxpyf, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX, 4 ) blis-0.6.1/ref_kernels/1f/bli_dotxf_ref.c000066400000000000000000000107461360743507500202500ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, arch, suf, ff ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conjat, \ conj_t conjx, \ dim_t m, \ dim_t b_n, \ ctype* restrict alpha, \ ctype* restrict a, inc_t inca, inc_t lda, \ ctype* restrict x, inc_t incx, \ ctype* restrict beta, \ ctype* restrict y, inc_t incy, \ cntx_t* restrict cntx \ ) \ { \ if ( inca == 1 && incx == 1 && incy == 1 && b_n == ff ) \ { \ ctype r[ ff ]; \ \ /* If beta is zero, clear y. Otherwise, scale by beta. */ \ if ( PASTEMAC(ch,eq0)( *beta ) ) \ { \ for ( dim_t i = 0; i < ff; ++i ) PASTEMAC(ch,set0s)( y[i] ); \ } \ else \ { \ for ( dim_t i = 0; i < ff; ++i ) PASTEMAC(ch,scals)( *beta, y[i] ); \ } \ \ /* If the vectors are empty or if alpha is zero, return early. */ \ if ( bli_zero_dim1( m ) || PASTEMAC(ch,eq0)( *alpha ) ) return; \ \ /* Initialize r vector to 0. */ \ for ( dim_t i = 0; i < ff; ++i ) PASTEMAC(ch,set0s)( r[i] ); \ \ /* If a must be conjugated, we do so indirectly by first toggling the effective conjugation of x and then conjugating the resulting dot products. */ \ conj_t conjx_use = conjx; \ \ if ( bli_is_conj( conjat ) ) \ bli_toggle_conj( &conjx_use ); \ \ if ( bli_is_noconj( conjx_use ) ) \ { \ PRAGMA_SIMD \ for ( dim_t p = 0; p < m; ++p ) \ for ( dim_t i = 0; i < ff; ++i ) \ { \ PASTEMAC(ch,axpys)( a[p + i*lda], x[p], r[i] ); \ } \ } \ else \ { \ PRAGMA_SIMD \ for ( dim_t p = 0; p < m; ++p ) \ for ( dim_t i = 0; i < ff; ++i ) \ { \ PASTEMAC(ch,axpyjs)( a[p + i*lda], x[p], r[i] ); \ } \ } \ \ if ( bli_is_conj( conjat ) ) \ for ( dim_t i = 0; i < ff; ++i ) PASTEMAC(ch,conjs)( r[i] ); \ \ for ( dim_t i = 0; i < ff; ++i ) \ { \ PASTEMAC(ch,axpys)( *alpha, r[i], y[i] ); \ } \ } \ else \ { \ /* Query the context for the kernel function pointer. */ \ const num_t dt = PASTEMAC(ch,type); \ PASTECH(ch,dotxv_ker_ft) kfp_dv \ = \ bli_cntx_get_l1v_ker_dt( dt, BLIS_DOTXV_KER, cntx ); \ \ for ( dim_t i = 0; i < b_n; ++i ) \ { \ ctype* restrict a1 = a + (0 )*inca + (i )*lda; \ ctype* restrict x1 = x + (0 )*incx; \ ctype* restrict psi1 = y + (i )*incy; \ \ kfp_dv \ ( \ conjat, \ conjx, \ m, \ alpha, \ a1, inca, \ x1, incx, \ beta, \ psi1, \ cntx \ ); \ } \ } \ } //INSERT_GENTFUNC_BASIC2( dotxf, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) GENTFUNC( float, s, dotxf, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX, 6 ) GENTFUNC( double, d, dotxf, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX, 6 ) GENTFUNC( scomplex, c, dotxf, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX, 6 ) GENTFUNC( dcomplex, z, dotxf, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX, 6 ) blis-0.6.1/ref_kernels/1f/other/000077500000000000000000000000001360743507500164075ustar00rootroot00000000000000blis-0.6.1/ref_kernels/1f/other/bli_dotxaxpyf_ref_alt.c000066400000000000000000000066771360743507500231430ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ conj_t conjat, \ conj_t conja, \ conj_t conjw, \ conj_t conjx, \ dim_t m, \ dim_t b_n, \ ctype* restrict alpha, \ ctype* restrict a, inc_t inca, inc_t lda, \ ctype* restrict w, inc_t incw, \ ctype* restrict x, inc_t incx, \ ctype* restrict beta, \ ctype* restrict y, inc_t incy, \ ctype* restrict z, inc_t incz, \ cntx_t* cntx \ ) \ { \ ctype* a1; \ ctype* chi1; \ ctype* w1; \ ctype* psi1; \ ctype* z1; \ ctype conjx_chi1; \ ctype alpha_chi1; \ dim_t i; \ \ /* Query the context for the kernel function pointer. */ \ const num_t dt = PASTEMAC(ch,type); \ PASTECH(ch,dotxv_ft) kfp_dv = bli_cntx_get_l1v_ker_dt( dt, BLIS_DOTXV_KER, cntx ); \ PASTECH(ch,axpyv_ft) kfp_av = bli_cntx_get_l1v_ker_dt( dt, BLIS_AXPYV_KER, cntx ); \ \ /* A is m x n. */ \ /* y = beta * y + alpha * A^T w; */ \ /* z = z + alpha * A x; */ \ for ( i = 0; i < b_n; ++i ) \ { \ a1 = a + (0 )*inca + (i )*lda; \ w1 = w + (0 )*incw; \ psi1 = y + (i )*incy; \ \ kfp_dv \ ( \ conjat, \ conjw, \ m, \ alpha, \ a1, inca, \ w1, incw, \ beta, \ psi1, \ cntx \ ); \ } \ \ for ( i = 0; i < b_n; ++i ) \ { \ a1 = a + (0 )*inca + (i )*lda; \ chi1 = x + (i )*incx; \ z1 = z + (0 )*incz; \ \ PASTEMAC(ch,copycjs)( conjx, *chi1, conjx_chi1 ); \ PASTEMAC(ch,scal2s)( *alpha, conjx_chi1, alpha_chi1 ); \ \ kfp_av \ ( \ conja, \ m, \ &alpha_chi1, \ a1, inca, \ z1, incz, \ cntx \ ); \ } \ } INSERT_GENTFUNC_BASIC0( dotxaxpyf_ref_var1 ) blis-0.6.1/ref_kernels/1m/000077500000000000000000000000001360743507500152755ustar00rootroot00000000000000blis-0.6.1/ref_kernels/1m/bli_packm_cxk_1er_ref.c000066400000000000000000002732721360743507500216470ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, opname, mnr, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conja, \ pack_t schema, \ dim_t cdim, \ dim_t n, \ dim_t n_max, \ void* restrict kappa, \ void* restrict a, inc_t inca, inc_t lda, \ void* restrict p, inc_t ldp, \ cntx_t* restrict cntx \ ) \ { \ if ( cdim == mnr ) \ { \ if ( bli_is_1e_packed( schema ) ) \ { \ const inc_t inca1 = inca; \ const inc_t lda1 = lda; \ const inc_t ldp1 = ldp; \ \ ctype* restrict kappa_cast = ( ctype* )kappa; \ ctype* restrict alpha1_ri = ( ctype* )a; \ ctype* restrict pi1_ri = ( ctype* )p; \ ctype* restrict pi1_ir = ( ctype* )p + ldp1/2; \ \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \ \ alpha1_ri += lda1; \ pi1_ri += ldp1; \ pi1_ir += ldp1; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \ \ alpha1_ri += lda1; \ pi1_ri += ldp1; \ pi1_ir += ldp1; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \ \ alpha1_ri += lda1; \ pi1_ri += ldp1; \ pi1_ir += ldp1; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \ \ alpha1_ri += lda1; \ pi1_ri += ldp1; \ pi1_ir += ldp1; \ } \ } \ } \ } \ else /* if ( bli_is_1r_packed( schema ) ) */ \ { \ const inc_t inca2 = 2 * inca; \ const inc_t lda2 = 2 * lda; \ const inc_t ldp2 = 2 * ldp; \ \ ctype* kappa_cast = kappa; \ ctype_r* restrict kappa_r = ( ctype_r* )kappa; \ ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \ ctype_r* restrict alpha1_r = ( ctype_r* )a; \ ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \ ctype_r* restrict pi1_r = ( ctype_r* )p; \ ctype_r* restrict pi1_i = ( ctype_r* )p + ldp; \ \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyjris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp2; \ pi1_i += ldp2; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp2; \ pi1_i += ldp2; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp2; \ pi1_i += ldp2; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp2; \ pi1_i += ldp2; \ } \ } \ } \ } \ } \ else /* if ( cdim < mnr ) */ \ { \ PASTEMAC(ch,scal21ms_mxn) \ ( \ schema, \ conja, \ cdim, \ n, \ kappa, \ a, inca, lda, \ p, 1, ldp, ldp \ ); \ \ /* if ( cdim < mnr ) */ \ { \ ctype* restrict zero = PASTEMAC(ch,0); \ const dim_t offm = cdim; \ const dim_t offn = 0; \ const dim_t m_edge = mnr - cdim; \ const dim_t n_edge = n_max; \ \ PASTEMAC(ch,set1ms_mxn) \ ( \ schema, \ offm, \ offn, \ m_edge, \ n_edge, \ zero, \ p, 1, ldp, ldp \ ); \ } \ } \ \ if ( n < n_max ) \ { \ ctype* restrict zero = PASTEMAC(ch,0); \ const dim_t offm = 0; \ const dim_t offn = n; \ const dim_t m_edge = mnr; \ const dim_t n_edge = n_max - n; \ \ PASTEMAC(ch,set1ms_mxn) \ ( \ schema, \ offm, \ offn, \ m_edge, \ n_edge, \ zero, \ p, 1, ldp, ldp \ ); \ } \ } INSERT_GENTFUNCCO_BASIC3( packm_2xk_1er, 2, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, opname, mnr, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conja, \ pack_t schema, \ dim_t cdim, \ dim_t n, \ dim_t n_max, \ void* restrict kappa, \ void* restrict a, inc_t inca, inc_t lda, \ void* restrict p, inc_t ldp, \ cntx_t* restrict cntx \ ) \ { \ if ( cdim == mnr ) \ { \ if ( bli_is_1e_packed( schema ) ) \ { \ const inc_t inca1 = inca; \ const inc_t lda1 = lda; \ const inc_t ldp1 = ldp; \ \ ctype* restrict kappa_cast = ( ctype* )kappa; \ ctype* restrict alpha1_ri = ( ctype* )a; \ ctype* restrict pi1_ri = ( ctype* )p; \ ctype* restrict pi1_ir = ( ctype* )p + ldp1/2; \ \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \ \ alpha1_ri += lda1; \ pi1_ri += ldp1; \ pi1_ir += ldp1; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \ \ alpha1_ri += lda1; \ pi1_ri += ldp1; \ pi1_ir += ldp1; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \ \ alpha1_ri += lda1; \ pi1_ri += ldp1; \ pi1_ir += ldp1; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \ \ alpha1_ri += lda1; \ pi1_ri += ldp1; \ pi1_ir += ldp1; \ } \ } \ } \ } \ else /* if ( bli_is_1r_packed( schema ) ) */ \ { \ const inc_t inca2 = 2 * inca; \ const inc_t lda2 = 2 * lda; \ const inc_t ldp2 = 2 * ldp; \ \ ctype* kappa_cast = kappa; \ ctype_r* restrict kappa_r = ( ctype_r* )kappa; \ ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \ ctype_r* restrict alpha1_r = ( ctype_r* )a; \ ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \ ctype_r* restrict pi1_r = ( ctype_r* )p; \ ctype_r* restrict pi1_i = ( ctype_r* )p + ldp; \ \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyjris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp2; \ pi1_i += ldp2; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp2; \ pi1_i += ldp2; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp2; \ pi1_i += ldp2; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp2; \ pi1_i += ldp2; \ } \ } \ } \ } \ } \ else /* if ( cdim < mnr ) */ \ { \ PASTEMAC(ch,scal21ms_mxn) \ ( \ schema, \ conja, \ cdim, \ n, \ kappa, \ a, inca, lda, \ p, 1, ldp, ldp \ ); \ \ /* if ( cdim < mnr ) */ \ { \ ctype* restrict zero = PASTEMAC(ch,0); \ const dim_t offm = cdim; \ const dim_t offn = 0; \ const dim_t m_edge = mnr - cdim; \ const dim_t n_edge = n_max; \ \ PASTEMAC(ch,set1ms_mxn) \ ( \ schema, \ offm, \ offn, \ m_edge, \ n_edge, \ zero, \ p, 1, ldp, ldp \ ); \ } \ } \ \ if ( n < n_max ) \ { \ ctype* restrict zero = PASTEMAC(ch,0); \ const dim_t offm = 0; \ const dim_t offn = n; \ const dim_t m_edge = mnr; \ const dim_t n_edge = n_max - n; \ \ PASTEMAC(ch,set1ms_mxn) \ ( \ schema, \ offm, \ offn, \ m_edge, \ n_edge, \ zero, \ p, 1, ldp, ldp \ ); \ } \ } INSERT_GENTFUNCCO_BASIC3( packm_4xk_1er, 4, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, opname, mnr, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conja, \ pack_t schema, \ dim_t cdim, \ dim_t n, \ dim_t n_max, \ void* restrict kappa, \ void* restrict a, inc_t inca, inc_t lda, \ void* restrict p, inc_t ldp, \ cntx_t* restrict cntx \ ) \ { \ if ( cdim == mnr ) \ { \ if ( bli_is_1e_packed( schema ) ) \ { \ const inc_t inca1 = inca; \ const inc_t lda1 = lda; \ const inc_t ldp1 = ldp; \ \ ctype* restrict kappa_cast = ( ctype* )kappa; \ ctype* restrict alpha1_ri = ( ctype* )a; \ ctype* restrict pi1_ri = ( ctype* )p; \ ctype* restrict pi1_ir = ( ctype* )p + ldp1/2; \ \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \ \ alpha1_ri += lda1; \ pi1_ri += ldp1; \ pi1_ir += ldp1; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \ \ alpha1_ri += lda1; \ pi1_ri += ldp1; \ pi1_ir += ldp1; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \ \ alpha1_ri += lda1; \ pi1_ri += ldp1; \ pi1_ir += ldp1; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \ \ alpha1_ri += lda1; \ pi1_ri += ldp1; \ pi1_ir += ldp1; \ } \ } \ } \ } \ else /* if ( bli_is_1r_packed( schema ) ) */ \ { \ const inc_t inca2 = 2 * inca; \ const inc_t lda2 = 2 * lda; \ const inc_t ldp2 = 2 * ldp; \ \ ctype* kappa_cast = kappa; \ ctype_r* restrict kappa_r = ( ctype_r* )kappa; \ ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \ ctype_r* restrict alpha1_r = ( ctype_r* )a; \ ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \ ctype_r* restrict pi1_r = ( ctype_r* )p; \ ctype_r* restrict pi1_i = ( ctype_r* )p + ldp; \ \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyjris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp2; \ pi1_i += ldp2; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp2; \ pi1_i += ldp2; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp2; \ pi1_i += ldp2; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp2; \ pi1_i += ldp2; \ } \ } \ } \ } \ } \ else /* if ( cdim < mnr ) */ \ { \ PASTEMAC(ch,scal21ms_mxn) \ ( \ schema, \ conja, \ cdim, \ n, \ kappa, \ a, inca, lda, \ p, 1, ldp, ldp \ ); \ \ /* if ( cdim < mnr ) */ \ { \ ctype* restrict zero = PASTEMAC(ch,0); \ const dim_t offm = cdim; \ const dim_t offn = 0; \ const dim_t m_edge = mnr - cdim; \ const dim_t n_edge = n_max; \ \ PASTEMAC(ch,set1ms_mxn) \ ( \ schema, \ offm, \ offn, \ m_edge, \ n_edge, \ zero, \ p, 1, ldp, ldp \ ); \ } \ } \ \ if ( n < n_max ) \ { \ ctype* restrict zero = PASTEMAC(ch,0); \ const dim_t offm = 0; \ const dim_t offn = n; \ const dim_t m_edge = mnr; \ const dim_t n_edge = n_max - n; \ \ PASTEMAC(ch,set1ms_mxn) \ ( \ schema, \ offm, \ offn, \ m_edge, \ n_edge, \ zero, \ p, 1, ldp, ldp \ ); \ } \ } INSERT_GENTFUNCCO_BASIC3( packm_6xk_1er, 6, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, opname, mnr, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conja, \ pack_t schema, \ dim_t cdim, \ dim_t n, \ dim_t n_max, \ void* restrict kappa, \ void* restrict a, inc_t inca, inc_t lda, \ void* restrict p, inc_t ldp, \ cntx_t* restrict cntx \ ) \ { \ if ( cdim == mnr ) \ { \ if ( bli_is_1e_packed( schema ) ) \ { \ const inc_t inca1 = inca; \ const inc_t lda1 = lda; \ const inc_t ldp1 = ldp; \ \ ctype* restrict kappa_cast = ( ctype* )kappa; \ ctype* restrict alpha1_ri = ( ctype* )a; \ ctype* restrict pi1_ri = ( ctype* )p; \ ctype* restrict pi1_ir = ( ctype* )p + ldp1/2; \ \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \ \ alpha1_ri += lda1; \ pi1_ri += ldp1; \ pi1_ir += ldp1; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \ \ alpha1_ri += lda1; \ pi1_ri += ldp1; \ pi1_ir += ldp1; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \ \ alpha1_ri += lda1; \ pi1_ri += ldp1; \ pi1_ir += ldp1; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \ \ alpha1_ri += lda1; \ pi1_ri += ldp1; \ pi1_ir += ldp1; \ } \ } \ } \ } \ else /* if ( bli_is_1r_packed( schema ) ) */ \ { \ const inc_t inca2 = 2 * inca; \ const inc_t lda2 = 2 * lda; \ const inc_t ldp2 = 2 * ldp; \ \ ctype* kappa_cast = kappa; \ ctype_r* restrict kappa_r = ( ctype_r* )kappa; \ ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \ ctype_r* restrict alpha1_r = ( ctype_r* )a; \ ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \ ctype_r* restrict pi1_r = ( ctype_r* )p; \ ctype_r* restrict pi1_i = ( ctype_r* )p + ldp; \ \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyjris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp2; \ pi1_i += ldp2; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp2; \ pi1_i += ldp2; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp2; \ pi1_i += ldp2; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp2; \ pi1_i += ldp2; \ } \ } \ } \ } \ } \ else /* if ( cdim < mnr ) */ \ { \ PASTEMAC(ch,scal21ms_mxn) \ ( \ schema, \ conja, \ cdim, \ n, \ kappa, \ a, inca, lda, \ p, 1, ldp, ldp \ ); \ \ /* if ( cdim < mnr ) */ \ { \ ctype* restrict zero = PASTEMAC(ch,0); \ const dim_t offm = cdim; \ const dim_t offn = 0; \ const dim_t m_edge = mnr - cdim; \ const dim_t n_edge = n_max; \ \ PASTEMAC(ch,set1ms_mxn) \ ( \ schema, \ offm, \ offn, \ m_edge, \ n_edge, \ zero, \ p, 1, ldp, ldp \ ); \ } \ } \ \ if ( n < n_max ) \ { \ ctype* restrict zero = PASTEMAC(ch,0); \ const dim_t offm = 0; \ const dim_t offn = n; \ const dim_t m_edge = mnr; \ const dim_t n_edge = n_max - n; \ \ PASTEMAC(ch,set1ms_mxn) \ ( \ schema, \ offm, \ offn, \ m_edge, \ n_edge, \ zero, \ p, 1, ldp, ldp \ ); \ } \ } INSERT_GENTFUNCCO_BASIC3( packm_8xk_1er, 8, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, opname, mnr, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conja, \ pack_t schema, \ dim_t cdim, \ dim_t n, \ dim_t n_max, \ void* restrict kappa, \ void* restrict a, inc_t inca, inc_t lda, \ void* restrict p, inc_t ldp, \ cntx_t* restrict cntx \ ) \ { \ if ( cdim == mnr ) \ { \ if ( bli_is_1e_packed( schema ) ) \ { \ const inc_t inca1 = inca; \ const inc_t lda1 = lda; \ const inc_t ldp1 = ldp; \ \ ctype* restrict kappa_cast = ( ctype* )kappa; \ ctype* restrict alpha1_ri = ( ctype* )a; \ ctype* restrict pi1_ri = ( ctype* )p; \ ctype* restrict pi1_ir = ( ctype* )p + ldp1/2; \ \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \ \ alpha1_ri += lda1; \ pi1_ri += ldp1; \ pi1_ir += ldp1; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \ \ alpha1_ri += lda1; \ pi1_ri += ldp1; \ pi1_ir += ldp1; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \ \ alpha1_ri += lda1; \ pi1_ri += ldp1; \ pi1_ir += ldp1; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \ \ alpha1_ri += lda1; \ pi1_ri += ldp1; \ pi1_ir += ldp1; \ } \ } \ } \ } \ else /* if ( bli_is_1r_packed( schema ) ) */ \ { \ const inc_t inca2 = 2 * inca; \ const inc_t lda2 = 2 * lda; \ const inc_t ldp2 = 2 * ldp; \ \ ctype* kappa_cast = kappa; \ ctype_r* restrict kappa_r = ( ctype_r* )kappa; \ ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \ ctype_r* restrict alpha1_r = ( ctype_r* )a; \ ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \ ctype_r* restrict pi1_r = ( ctype_r* )p; \ ctype_r* restrict pi1_i = ( ctype_r* )p + ldp; \ \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyjris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp2; \ pi1_i += ldp2; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp2; \ pi1_i += ldp2; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp2; \ pi1_i += ldp2; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp2; \ pi1_i += ldp2; \ } \ } \ } \ } \ } \ else /* if ( cdim < mnr ) */ \ { \ PASTEMAC(ch,scal21ms_mxn) \ ( \ schema, \ conja, \ cdim, \ n, \ kappa, \ a, inca, lda, \ p, 1, ldp, ldp \ ); \ \ /* if ( cdim < mnr ) */ \ { \ ctype* restrict zero = PASTEMAC(ch,0); \ const dim_t offm = cdim; \ const dim_t offn = 0; \ const dim_t m_edge = mnr - cdim; \ const dim_t n_edge = n_max; \ \ PASTEMAC(ch,set1ms_mxn) \ ( \ schema, \ offm, \ offn, \ m_edge, \ n_edge, \ zero, \ p, 1, ldp, ldp \ ); \ } \ } \ \ if ( n < n_max ) \ { \ ctype* restrict zero = PASTEMAC(ch,0); \ const dim_t offm = 0; \ const dim_t offn = n; \ const dim_t m_edge = mnr; \ const dim_t n_edge = n_max - n; \ \ PASTEMAC(ch,set1ms_mxn) \ ( \ schema, \ offm, \ offn, \ m_edge, \ n_edge, \ zero, \ p, 1, ldp, ldp \ ); \ } \ } INSERT_GENTFUNCCO_BASIC3( packm_10xk_1er, 10, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, opname, mnr, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conja, \ pack_t schema, \ dim_t cdim, \ dim_t n, \ dim_t n_max, \ void* restrict kappa, \ void* restrict a, inc_t inca, inc_t lda, \ void* restrict p, inc_t ldp, \ cntx_t* restrict cntx \ ) \ { \ if ( cdim == mnr ) \ { \ if ( bli_is_1e_packed( schema ) ) \ { \ const inc_t inca1 = inca; \ const inc_t lda1 = lda; \ const inc_t ldp1 = ldp; \ \ ctype* restrict kappa_cast = ( ctype* )kappa; \ ctype* restrict alpha1_ri = ( ctype* )a; \ ctype* restrict pi1_ri = ( ctype* )p; \ ctype* restrict pi1_ir = ( ctype* )p + ldp1/2; \ \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \ \ alpha1_ri += lda1; \ pi1_ri += ldp1; \ pi1_ir += ldp1; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \ \ alpha1_ri += lda1; \ pi1_ri += ldp1; \ pi1_ir += ldp1; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \ \ alpha1_ri += lda1; \ pi1_ri += ldp1; \ pi1_ir += ldp1; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \ \ alpha1_ri += lda1; \ pi1_ri += ldp1; \ pi1_ir += ldp1; \ } \ } \ } \ } \ else /* if ( bli_is_1r_packed( schema ) ) */ \ { \ const inc_t inca2 = 2 * inca; \ const inc_t lda2 = 2 * lda; \ const inc_t ldp2 = 2 * ldp; \ \ ctype* kappa_cast = kappa; \ ctype_r* restrict kappa_r = ( ctype_r* )kappa; \ ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \ ctype_r* restrict alpha1_r = ( ctype_r* )a; \ ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \ ctype_r* restrict pi1_r = ( ctype_r* )p; \ ctype_r* restrict pi1_i = ( ctype_r* )p + ldp; \ \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyjris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp2; \ pi1_i += ldp2; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \ PASTEMAC(ch,copyris)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \ PASTEMAC(ch,copyris)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp2; \ pi1_i += ldp2; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp2; \ pi1_i += ldp2; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp2; \ pi1_i += ldp2; \ } \ } \ } \ } \ } \ else /* if ( cdim < mnr ) */ \ { \ PASTEMAC(ch,scal21ms_mxn) \ ( \ schema, \ conja, \ cdim, \ n, \ kappa, \ a, inca, lda, \ p, 1, ldp, ldp \ ); \ \ /* if ( cdim < mnr ) */ \ { \ ctype* restrict zero = PASTEMAC(ch,0); \ const dim_t offm = cdim; \ const dim_t offn = 0; \ const dim_t m_edge = mnr - cdim; \ const dim_t n_edge = n_max; \ \ PASTEMAC(ch,set1ms_mxn) \ ( \ schema, \ offm, \ offn, \ m_edge, \ n_edge, \ zero, \ p, 1, ldp, ldp \ ); \ } \ } \ \ if ( n < n_max ) \ { \ ctype* restrict zero = PASTEMAC(ch,0); \ const dim_t offm = 0; \ const dim_t offn = n; \ const dim_t m_edge = mnr; \ const dim_t n_edge = n_max - n; \ \ PASTEMAC(ch,set1ms_mxn) \ ( \ schema, \ offm, \ offn, \ m_edge, \ n_edge, \ zero, \ p, 1, ldp, ldp \ ); \ } \ } INSERT_GENTFUNCCO_BASIC3( packm_12xk_1er, 12, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, opname, mnr, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conja, \ pack_t schema, \ dim_t cdim, \ dim_t n, \ dim_t n_max, \ void* restrict kappa, \ void* restrict a, inc_t inca, inc_t lda, \ void* restrict p, inc_t ldp, \ cntx_t* restrict cntx \ ) \ { \ if ( cdim == mnr ) \ { \ if ( bli_is_1e_packed( schema ) ) \ { \ const inc_t inca1 = inca; \ const inc_t lda1 = lda; \ const inc_t ldp1 = ldp; \ \ ctype* restrict kappa_cast = ( ctype* )kappa; \ ctype* restrict alpha1_ri = ( ctype* )a; \ ctype* restrict pi1_ri = ( ctype* )p; \ ctype* restrict pi1_ir = ( ctype* )p + ldp1/2; \ \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri +12*inca1), *(pi1_ri +12), *(pi1_ir +12) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri +13*inca1), *(pi1_ri +13), *(pi1_ir +13) ); \ \ alpha1_ri += lda1; \ pi1_ri += ldp1; \ pi1_ir += ldp1; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri +12*inca1), *(pi1_ri +12), *(pi1_ir +12) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri +13*inca1), *(pi1_ri +13), *(pi1_ir +13) ); \ \ alpha1_ri += lda1; \ pi1_ri += ldp1; \ pi1_ir += ldp1; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +12*inca1), *(pi1_ri +12), *(pi1_ir +12) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +13*inca1), *(pi1_ri +13), *(pi1_ir +13) ); \ \ alpha1_ri += lda1; \ pi1_ri += ldp1; \ pi1_ir += ldp1; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +12*inca1), *(pi1_ri +12), *(pi1_ir +12) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +13*inca1), *(pi1_ri +13), *(pi1_ir +13) ); \ \ alpha1_ri += lda1; \ pi1_ri += ldp1; \ pi1_ir += ldp1; \ } \ } \ } \ } \ else /* if ( bli_is_1r_packed( schema ) ) */ \ { \ const inc_t inca2 = 2 * inca; \ const inc_t lda2 = 2 * lda; \ const inc_t ldp2 = 2 * ldp; \ \ ctype* kappa_cast = kappa; \ ctype_r* restrict kappa_r = ( ctype_r* )kappa; \ ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \ ctype_r* restrict alpha1_r = ( ctype_r* )a; \ ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \ ctype_r* restrict pi1_r = ( ctype_r* )p; \ ctype_r* restrict pi1_i = ( ctype_r* )p + ldp; \ \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyjris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp2; \ pi1_i += ldp2; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \ PASTEMAC(ch,copyris)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \ PASTEMAC(ch,copyris)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \ PASTEMAC(ch,copyris)( *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12) ); \ PASTEMAC(ch,copyris)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp2; \ pi1_i += ldp2; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp2; \ pi1_i += ldp2; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp2; \ pi1_i += ldp2; \ } \ } \ } \ } \ } \ else /* if ( cdim < mnr ) */ \ { \ PASTEMAC(ch,scal21ms_mxn) \ ( \ schema, \ conja, \ cdim, \ n, \ kappa, \ a, inca, lda, \ p, 1, ldp, ldp \ ); \ \ /* if ( cdim < mnr ) */ \ { \ ctype* restrict zero = PASTEMAC(ch,0); \ const dim_t offm = cdim; \ const dim_t offn = 0; \ const dim_t m_edge = mnr - cdim; \ const dim_t n_edge = n_max; \ \ PASTEMAC(ch,set1ms_mxn) \ ( \ schema, \ offm, \ offn, \ m_edge, \ n_edge, \ zero, \ p, 1, ldp, ldp \ ); \ } \ } \ \ if ( n < n_max ) \ { \ ctype* restrict zero = PASTEMAC(ch,0); \ const dim_t offm = 0; \ const dim_t offn = n; \ const dim_t m_edge = mnr; \ const dim_t n_edge = n_max - n; \ \ PASTEMAC(ch,set1ms_mxn) \ ( \ schema, \ offm, \ offn, \ m_edge, \ n_edge, \ zero, \ p, 1, ldp, ldp \ ); \ } \ } INSERT_GENTFUNCCO_BASIC3( packm_14xk_1er, 14, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, opname, mnr, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conja, \ pack_t schema, \ dim_t cdim, \ dim_t n, \ dim_t n_max, \ void* restrict kappa, \ void* restrict a, inc_t inca, inc_t lda, \ void* restrict p, inc_t ldp, \ cntx_t* restrict cntx \ ) \ { \ if ( cdim == mnr ) \ { \ if ( bli_is_1e_packed( schema ) ) \ { \ const inc_t inca1 = inca; \ const inc_t lda1 = lda; \ const inc_t ldp1 = ldp; \ \ ctype* restrict kappa_cast = ( ctype* )kappa; \ ctype* restrict alpha1_ri = ( ctype* )a; \ ctype* restrict pi1_ri = ( ctype* )p; \ ctype* restrict pi1_ir = ( ctype* )p + ldp1/2; \ \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri +12*inca1), *(pi1_ri +12), *(pi1_ir +12) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri +13*inca1), *(pi1_ri +13), *(pi1_ir +13) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri +14*inca1), *(pi1_ri +14), *(pi1_ir +14) ); \ PASTEMAC(ch,copyj1es)( *(alpha1_ri +15*inca1), *(pi1_ri +15), *(pi1_ir +15) ); \ \ alpha1_ri += lda1; \ pi1_ri += ldp1; \ pi1_ir += ldp1; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri +12*inca1), *(pi1_ri +12), *(pi1_ir +12) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri +13*inca1), *(pi1_ri +13), *(pi1_ir +13) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri +14*inca1), *(pi1_ri +14), *(pi1_ir +14) ); \ PASTEMAC(ch,copy1es)( *(alpha1_ri +15*inca1), *(pi1_ri +15), *(pi1_ir +15) ); \ \ alpha1_ri += lda1; \ pi1_ri += ldp1; \ pi1_ir += ldp1; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +12*inca1), *(pi1_ri +12), *(pi1_ir +12) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +13*inca1), *(pi1_ri +13), *(pi1_ir +13) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +14*inca1), *(pi1_ri +14), *(pi1_ir +14) ); \ PASTEMAC(ch,scal2j1es)( *kappa_cast, *(alpha1_ri +15*inca1), *(pi1_ri +15), *(pi1_ir +15) ); \ \ alpha1_ri += lda1; \ pi1_ri += ldp1; \ pi1_ir += ldp1; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 0*inca1), *(pi1_ri + 0), *(pi1_ir + 0) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 1*inca1), *(pi1_ri + 1), *(pi1_ir + 1) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 2*inca1), *(pi1_ri + 2), *(pi1_ir + 2) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 3*inca1), *(pi1_ri + 3), *(pi1_ir + 3) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 4*inca1), *(pi1_ri + 4), *(pi1_ir + 4) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 5*inca1), *(pi1_ri + 5), *(pi1_ir + 5) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 6*inca1), *(pi1_ri + 6), *(pi1_ir + 6) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 7*inca1), *(pi1_ri + 7), *(pi1_ir + 7) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 8*inca1), *(pi1_ri + 8), *(pi1_ir + 8) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri + 9*inca1), *(pi1_ri + 9), *(pi1_ir + 9) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +10*inca1), *(pi1_ri +10), *(pi1_ir +10) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +11*inca1), *(pi1_ri +11), *(pi1_ir +11) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +12*inca1), *(pi1_ri +12), *(pi1_ir +12) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +13*inca1), *(pi1_ri +13), *(pi1_ir +13) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +14*inca1), *(pi1_ri +14), *(pi1_ir +14) ); \ PASTEMAC(ch,scal21es)( *kappa_cast, *(alpha1_ri +15*inca1), *(pi1_ri +15), *(pi1_ir +15) ); \ \ alpha1_ri += lda1; \ pi1_ri += ldp1; \ pi1_ir += ldp1; \ } \ } \ } \ } \ else /* if ( bli_is_1r_packed( schema ) ) */ \ { \ const inc_t inca2 = 2 * inca; \ const inc_t lda2 = 2 * lda; \ const inc_t ldp2 = 2 * ldp; \ \ ctype* kappa_cast = kappa; \ ctype_r* restrict kappa_r = ( ctype_r* )kappa; \ ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \ ctype_r* restrict alpha1_r = ( ctype_r* )a; \ ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \ ctype_r* restrict pi1_r = ( ctype_r* )p; \ ctype_r* restrict pi1_i = ( ctype_r* )p + ldp; \ \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyjris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r +14*inca2), *(alpha1_i +14*inca2), *(pi1_r +14), *(pi1_i +14) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r +15*inca2), *(alpha1_i +15*inca2), *(pi1_r +15), *(pi1_i +15) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp2; \ pi1_i += ldp2; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \ PASTEMAC(ch,copyris)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \ PASTEMAC(ch,copyris)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \ PASTEMAC(ch,copyris)( *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12) ); \ PASTEMAC(ch,copyris)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \ PASTEMAC(ch,copyris)( *(alpha1_r +14*inca2), *(alpha1_i +14*inca2), *(pi1_r +14), *(pi1_i +14) ); \ PASTEMAC(ch,copyris)( *(alpha1_r +15*inca2), *(alpha1_i +15*inca2), *(pi1_r +15), *(pi1_i +15) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp2; \ pi1_i += ldp2; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r +14*inca2), *(alpha1_i +14*inca2), *(pi1_r +14), *(pi1_i +14) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r +15*inca2), *(alpha1_i +15*inca2), *(pi1_r +15), *(pi1_i +15) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp2; \ pi1_i += ldp2; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r +14*inca2), *(alpha1_i +14*inca2), *(pi1_r +14), *(pi1_i +14) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r +15*inca2), *(alpha1_i +15*inca2), *(pi1_r +15), *(pi1_i +15) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp2; \ pi1_i += ldp2; \ } \ } \ } \ } \ } \ else /* if ( cdim < mnr ) */ \ { \ PASTEMAC(ch,scal21ms_mxn) \ ( \ schema, \ conja, \ cdim, \ n, \ kappa, \ a, inca, lda, \ p, 1, ldp, ldp \ ); \ \ /* if ( cdim < mnr ) */ \ { \ ctype* restrict zero = PASTEMAC(ch,0); \ const dim_t offm = cdim; \ const dim_t offn = 0; \ const dim_t m_edge = mnr - cdim; \ const dim_t n_edge = n_max; \ \ PASTEMAC(ch,set1ms_mxn) \ ( \ schema, \ offm, \ offn, \ m_edge, \ n_edge, \ zero, \ p, 1, ldp, ldp \ ); \ } \ } \ \ if ( n < n_max ) \ { \ ctype* restrict zero = PASTEMAC(ch,0); \ const dim_t offm = 0; \ const dim_t offn = n; \ const dim_t m_edge = mnr; \ const dim_t n_edge = n_max - n; \ \ PASTEMAC(ch,set1ms_mxn) \ ( \ schema, \ offm, \ offn, \ m_edge, \ n_edge, \ zero, \ p, 1, ldp, ldp \ ); \ } \ } INSERT_GENTFUNCCO_BASIC3( packm_16xk_1er, 16, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) blis-0.6.1/ref_kernels/1m/bli_packm_cxk_3mis_ref.c000066400000000000000000002233121360743507500220210ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, opname, mnr, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conja, \ dim_t cdim, \ dim_t n, \ dim_t n_max, \ void* restrict kappa, \ void* restrict a, inc_t inca, inc_t lda, \ void* restrict p, inc_t is_p, inc_t ldp, \ cntx_t* restrict cntx \ ) \ { \ const inc_t inca2 = 2 * inca; \ const inc_t lda2 = 2 * lda; \ \ ctype* kappa_cast = kappa; \ ctype_r* restrict kappa_r = ( ctype_r* )kappa; \ ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \ ctype_r* restrict alpha1_r = ( ctype_r* )a; \ ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \ ctype_r* restrict pi1_r = ( ctype_r* )p; \ ctype_r* restrict pi1_i = ( ctype_r* )p + is_p; \ ctype_r* restrict pi1_rpi = ( ctype_r* )p + 2*is_p; \ \ if ( cdim == mnr ) \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_rpi + 0) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ pi1_rpi += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_rpi + 0) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ pi1_rpi += ldp; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_rpi + 0) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ pi1_rpi += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_rpi + 0) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ pi1_rpi += ldp; \ } \ } \ } \ } \ else /* if ( cdim < mnr ) */ \ { \ PASTEMAC(ch,scal2ri3s_mxn) \ ( \ conja, \ cdim, \ n, \ kappa, \ a, inca, lda, \ p, 1, ldp, is_p \ ); \ \ /* if ( cdim < mnr ) */ \ { \ ctype_r* restrict zero_r = PASTEMAC(chr,0); \ const dim_t i = cdim; \ const dim_t m_edge = mnr - i; \ const dim_t n_edge = n_max; \ ctype_r* p_edge_r = ( ctype_r* )p + (i )*1; \ ctype_r* p_edge_i = ( ctype_r* )p + is_p + (i )*1; \ ctype_r* p_edge_rpi = ( ctype_r* )p + 2*is_p + (i )*1; \ \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_r, 1, ldp, \ cntx, \ NULL \ ); \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_i, 1, ldp, \ cntx, \ NULL \ ); \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_rpi, 1, ldp, \ cntx, \ NULL \ ); \ } \ } \ \ if ( n < n_max ) \ { \ ctype_r* restrict zero_r = PASTEMAC(chr,0); \ const dim_t j = n; \ const dim_t m_edge = mnr; \ const dim_t n_edge = n_max - j; \ ctype_r* p_edge_r = ( ctype_r* )p + (j )*ldp; \ ctype_r* p_edge_i = ( ctype_r* )p + is_p + (j )*ldp; \ ctype_r* p_edge_rpi = ( ctype_r* )p + 2*is_p + (j )*ldp; \ \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_r, 1, ldp, \ cntx, \ NULL \ ); \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_i, 1, ldp, \ cntx, \ NULL \ ); \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_rpi, 1, ldp, \ cntx, \ NULL \ ); \ } \ } INSERT_GENTFUNCCO_BASIC3( packm_2xk_3mis, 2, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, opname, mnr, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conja, \ dim_t cdim, \ dim_t n, \ dim_t n_max, \ void* restrict kappa, \ void* restrict a, inc_t inca, inc_t lda, \ void* restrict p, inc_t is_p, inc_t ldp, \ cntx_t* restrict cntx \ ) \ { \ const inc_t inca2 = 2 * inca; \ const inc_t lda2 = 2 * lda; \ \ ctype* kappa_cast = kappa; \ ctype_r* restrict kappa_r = ( ctype_r* )kappa; \ ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \ ctype_r* restrict alpha1_r = ( ctype_r* )a; \ ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \ ctype_r* restrict pi1_r = ( ctype_r* )p; \ ctype_r* restrict pi1_i = ( ctype_r* )p + is_p; \ ctype_r* restrict pi1_rpi = ( ctype_r* )p + 2*is_p; \ \ if ( cdim == mnr ) \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_rpi + 0) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_rpi + 2) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_rpi + 3) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ pi1_rpi += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_rpi + 0) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_rpi + 2) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_rpi + 3) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ pi1_rpi += ldp; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_rpi + 0) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_rpi + 2) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_rpi + 3) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ pi1_rpi += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_rpi + 0) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_rpi + 2) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_rpi + 3) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ pi1_rpi += ldp; \ } \ } \ } \ } \ else /* if ( cdim < mnr ) */ \ { \ PASTEMAC(ch,scal2ri3s_mxn) \ ( \ conja, \ cdim, \ n, \ kappa, \ a, inca, lda, \ p, 1, ldp, is_p \ ); \ \ /* if ( cdim < mnr ) */ \ { \ ctype_r* restrict zero_r = PASTEMAC(chr,0); \ const dim_t i = cdim; \ const dim_t m_edge = mnr - i; \ const dim_t n_edge = n_max; \ ctype_r* p_edge_r = ( ctype_r* )p + (i )*1; \ ctype_r* p_edge_i = ( ctype_r* )p + is_p + (i )*1; \ ctype_r* p_edge_rpi = ( ctype_r* )p + 2*is_p + (i )*1; \ \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_r, 1, ldp, \ cntx, \ NULL \ ); \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_i, 1, ldp, \ cntx, \ NULL \ ); \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_rpi, 1, ldp, \ cntx, \ NULL \ ); \ } \ } \ \ if ( n < n_max ) \ { \ ctype_r* restrict zero_r = PASTEMAC(chr,0); \ const dim_t j = n; \ const dim_t m_edge = mnr; \ const dim_t n_edge = n_max - j; \ ctype_r* p_edge_r = ( ctype_r* )p + (j )*ldp; \ ctype_r* p_edge_i = ( ctype_r* )p + is_p + (j )*ldp; \ ctype_r* p_edge_rpi = ( ctype_r* )p + 2*is_p + (j )*ldp; \ \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_r, 1, ldp, \ cntx, \ NULL \ ); \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_i, 1, ldp, \ cntx, \ NULL \ ); \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_rpi, 1, ldp, \ cntx, \ NULL \ ); \ } \ } INSERT_GENTFUNCCO_BASIC3( packm_4xk_3mis, 4, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, opname, mnr, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conja, \ dim_t cdim, \ dim_t n, \ dim_t n_max, \ void* restrict kappa, \ void* restrict a, inc_t inca, inc_t lda, \ void* restrict p, inc_t is_p, inc_t ldp, \ cntx_t* restrict cntx \ ) \ { \ const inc_t inca2 = 2 * inca; \ const inc_t lda2 = 2 * lda; \ \ ctype* kappa_cast = kappa; \ ctype_r* restrict kappa_r = ( ctype_r* )kappa; \ ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \ ctype_r* restrict alpha1_r = ( ctype_r* )a; \ ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \ ctype_r* restrict pi1_r = ( ctype_r* )p; \ ctype_r* restrict pi1_i = ( ctype_r* )p + is_p; \ ctype_r* restrict pi1_rpi = ( ctype_r* )p + 2*is_p; \ \ if ( cdim == mnr ) \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_rpi + 0) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_rpi + 2) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_rpi + 3) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_rpi + 4) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_rpi + 5) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ pi1_rpi += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_rpi + 0) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_rpi + 2) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_rpi + 3) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_rpi + 4) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_rpi + 5) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ pi1_rpi += ldp; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_rpi + 0) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_rpi + 2) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_rpi + 3) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_rpi + 4) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_rpi + 5) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ pi1_rpi += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_rpi + 0) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_rpi + 2) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_rpi + 3) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_rpi + 4) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_rpi + 5) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ pi1_rpi += ldp; \ } \ } \ } \ } \ else /* if ( cdim < mnr ) */ \ { \ PASTEMAC(ch,scal2ri3s_mxn) \ ( \ conja, \ cdim, \ n, \ kappa, \ a, inca, lda, \ p, 1, ldp, is_p \ ); \ \ /* if ( cdim < mnr ) */ \ { \ ctype_r* restrict zero_r = PASTEMAC(chr,0); \ const dim_t i = cdim; \ const dim_t m_edge = mnr - i; \ const dim_t n_edge = n_max; \ ctype_r* p_edge_r = ( ctype_r* )p + (i )*1; \ ctype_r* p_edge_i = ( ctype_r* )p + is_p + (i )*1; \ ctype_r* p_edge_rpi = ( ctype_r* )p + 2*is_p + (i )*1; \ \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_r, 1, ldp, \ cntx, \ NULL \ ); \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_i, 1, ldp, \ cntx, \ NULL \ ); \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_rpi, 1, ldp, \ cntx, \ NULL \ ); \ } \ } \ \ if ( n < n_max ) \ { \ ctype_r* restrict zero_r = PASTEMAC(chr,0); \ const dim_t j = n; \ const dim_t m_edge = mnr; \ const dim_t n_edge = n_max - j; \ ctype_r* p_edge_r = ( ctype_r* )p + (j )*ldp; \ ctype_r* p_edge_i = ( ctype_r* )p + is_p + (j )*ldp; \ ctype_r* p_edge_rpi = ( ctype_r* )p + 2*is_p + (j )*ldp; \ \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_r, 1, ldp, \ cntx, \ NULL \ ); \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_i, 1, ldp, \ cntx, \ NULL \ ); \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_rpi, 1, ldp, \ cntx, \ NULL \ ); \ } \ } INSERT_GENTFUNCCO_BASIC3( packm_6xk_3mis, 6, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, opname, mnr, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conja, \ dim_t cdim, \ dim_t n, \ dim_t n_max, \ void* restrict kappa, \ void* restrict a, inc_t inca, inc_t lda, \ void* restrict p, inc_t is_p, inc_t ldp, \ cntx_t* restrict cntx \ ) \ { \ const inc_t inca2 = 2 * inca; \ const inc_t lda2 = 2 * lda; \ \ ctype* kappa_cast = kappa; \ ctype_r* restrict kappa_r = ( ctype_r* )kappa; \ ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \ ctype_r* restrict alpha1_r = ( ctype_r* )a; \ ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \ ctype_r* restrict pi1_r = ( ctype_r* )p; \ ctype_r* restrict pi1_i = ( ctype_r* )p + is_p; \ ctype_r* restrict pi1_rpi = ( ctype_r* )p + 2*is_p; \ \ if ( cdim == mnr ) \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_rpi + 0) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_rpi + 2) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_rpi + 3) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_rpi + 4) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_rpi + 5) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_rpi + 6) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_rpi + 7) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ pi1_rpi += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_rpi + 0) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_rpi + 2) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_rpi + 3) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_rpi + 4) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_rpi + 5) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_rpi + 6) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_rpi + 7) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ pi1_rpi += ldp; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_rpi + 0) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_rpi + 2) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_rpi + 3) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_rpi + 4) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_rpi + 5) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_rpi + 6) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_rpi + 7) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ pi1_rpi += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_rpi + 0) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_rpi + 2) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_rpi + 3) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_rpi + 4) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_rpi + 5) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_rpi + 6) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_rpi + 7) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ pi1_rpi += ldp; \ } \ } \ } \ } \ else /* if ( cdim < mnr ) */ \ { \ PASTEMAC(ch,scal2ri3s_mxn) \ ( \ conja, \ cdim, \ n, \ kappa, \ a, inca, lda, \ p, 1, ldp, is_p \ ); \ \ /* if ( cdim < mnr ) */ \ { \ ctype_r* restrict zero_r = PASTEMAC(chr,0); \ const dim_t i = cdim; \ const dim_t m_edge = mnr - i; \ const dim_t n_edge = n_max; \ ctype_r* p_edge_r = ( ctype_r* )p + (i )*1; \ ctype_r* p_edge_i = ( ctype_r* )p + is_p + (i )*1; \ ctype_r* p_edge_rpi = ( ctype_r* )p + 2*is_p + (i )*1; \ \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_r, 1, ldp, \ cntx, \ NULL \ ); \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_i, 1, ldp, \ cntx, \ NULL \ ); \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_rpi, 1, ldp, \ cntx, \ NULL \ ); \ } \ } \ \ if ( n < n_max ) \ { \ ctype_r* restrict zero_r = PASTEMAC(chr,0); \ const dim_t j = n; \ const dim_t m_edge = mnr; \ const dim_t n_edge = n_max - j; \ ctype_r* p_edge_r = ( ctype_r* )p + (j )*ldp; \ ctype_r* p_edge_i = ( ctype_r* )p + is_p + (j )*ldp; \ ctype_r* p_edge_rpi = ( ctype_r* )p + 2*is_p + (j )*ldp; \ \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_r, 1, ldp, \ cntx, \ NULL \ ); \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_i, 1, ldp, \ cntx, \ NULL \ ); \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_rpi, 1, ldp, \ cntx, \ NULL \ ); \ } \ } INSERT_GENTFUNCCO_BASIC3( packm_8xk_3mis, 8, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, opname, mnr, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conja, \ dim_t cdim, \ dim_t n, \ dim_t n_max, \ void* restrict kappa, \ void* restrict a, inc_t inca, inc_t lda, \ void* restrict p, inc_t is_p, inc_t ldp, \ cntx_t* restrict cntx \ ) \ { \ const inc_t inca2 = 2 * inca; \ const inc_t lda2 = 2 * lda; \ \ ctype* kappa_cast = kappa; \ ctype_r* restrict kappa_r = ( ctype_r* )kappa; \ ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \ ctype_r* restrict alpha1_r = ( ctype_r* )a; \ ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \ ctype_r* restrict pi1_r = ( ctype_r* )p; \ ctype_r* restrict pi1_i = ( ctype_r* )p + is_p; \ ctype_r* restrict pi1_rpi = ( ctype_r* )p + 2*is_p; \ \ if ( cdim == mnr ) \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_rpi + 0) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_rpi + 2) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_rpi + 3) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_rpi + 4) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_rpi + 5) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_rpi + 6) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_rpi + 7) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_rpi + 8) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_rpi + 9) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ pi1_rpi += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_rpi + 0) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_rpi + 2) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_rpi + 3) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_rpi + 4) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_rpi + 5) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_rpi + 6) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_rpi + 7) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_rpi + 8) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_rpi + 9) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ pi1_rpi += ldp; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_rpi + 0) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_rpi + 2) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_rpi + 3) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_rpi + 4) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_rpi + 5) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_rpi + 6) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_rpi + 7) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_rpi + 8) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_rpi + 9) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ pi1_rpi += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_rpi + 0) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_rpi + 2) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_rpi + 3) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_rpi + 4) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_rpi + 5) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_rpi + 6) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_rpi + 7) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_rpi + 8) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_rpi + 9) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ pi1_rpi += ldp; \ } \ } \ } \ } \ else /* if ( cdim < mnr ) */ \ { \ PASTEMAC(ch,scal2ri3s_mxn) \ ( \ conja, \ cdim, \ n, \ kappa, \ a, inca, lda, \ p, 1, ldp, is_p \ ); \ \ /* if ( cdim < mnr ) */ \ { \ ctype_r* restrict zero_r = PASTEMAC(chr,0); \ const dim_t i = cdim; \ const dim_t m_edge = mnr - i; \ const dim_t n_edge = n_max; \ ctype_r* p_edge_r = ( ctype_r* )p + (i )*1; \ ctype_r* p_edge_i = ( ctype_r* )p + is_p + (i )*1; \ ctype_r* p_edge_rpi = ( ctype_r* )p + 2*is_p + (i )*1; \ \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_r, 1, ldp, \ cntx, \ NULL \ ); \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_i, 1, ldp, \ cntx, \ NULL \ ); \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_rpi, 1, ldp, \ cntx, \ NULL \ ); \ } \ } \ \ if ( n < n_max ) \ { \ ctype_r* restrict zero_r = PASTEMAC(chr,0); \ const dim_t j = n; \ const dim_t m_edge = mnr; \ const dim_t n_edge = n_max - j; \ ctype_r* p_edge_r = ( ctype_r* )p + (j )*ldp; \ ctype_r* p_edge_i = ( ctype_r* )p + is_p + (j )*ldp; \ ctype_r* p_edge_rpi = ( ctype_r* )p + 2*is_p + (j )*ldp; \ \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_r, 1, ldp, \ cntx, \ NULL \ ); \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_i, 1, ldp, \ cntx, \ NULL \ ); \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_rpi, 1, ldp, \ cntx, \ NULL \ ); \ } \ } INSERT_GENTFUNCCO_BASIC3( packm_10xk_3mis, 10, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, opname, mnr, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conja, \ dim_t cdim, \ dim_t n, \ dim_t n_max, \ void* restrict kappa, \ void* restrict a, inc_t inca, inc_t lda, \ void* restrict p, inc_t is_p, inc_t ldp, \ cntx_t* restrict cntx \ ) \ { \ const inc_t inca2 = 2 * inca; \ const inc_t lda2 = 2 * lda; \ \ ctype* kappa_cast = kappa; \ ctype_r* restrict kappa_r = ( ctype_r* )kappa; \ ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \ ctype_r* restrict alpha1_r = ( ctype_r* )a; \ ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \ ctype_r* restrict pi1_r = ( ctype_r* )p; \ ctype_r* restrict pi1_i = ( ctype_r* )p + is_p; \ ctype_r* restrict pi1_rpi = ( ctype_r* )p + 2*is_p; \ \ if ( cdim == mnr ) \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_rpi + 0) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_rpi + 2) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_rpi + 3) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_rpi + 4) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_rpi + 5) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_rpi + 6) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_rpi + 7) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_rpi + 8) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_rpi + 9) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10), *(pi1_rpi +10) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_rpi +11) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ pi1_rpi += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_rpi + 0) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_rpi + 2) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_rpi + 3) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_rpi + 4) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_rpi + 5) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_rpi + 6) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_rpi + 7) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_rpi + 8) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_rpi + 9) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10), *(pi1_rpi +10) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_rpi +11) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ pi1_rpi += ldp; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_rpi + 0) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_rpi + 2) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_rpi + 3) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_rpi + 4) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_rpi + 5) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_rpi + 6) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_rpi + 7) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_rpi + 8) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_rpi + 9) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10), *(pi1_rpi +10) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_rpi +11) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ pi1_rpi += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_rpi + 0) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_rpi + 2) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_rpi + 3) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_rpi + 4) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_rpi + 5) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_rpi + 6) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_rpi + 7) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_rpi + 8) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_rpi + 9) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10), *(pi1_rpi +10) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_rpi +11) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ pi1_rpi += ldp; \ } \ } \ } \ } \ else /* if ( cdim < mnr ) */ \ { \ PASTEMAC(ch,scal2ri3s_mxn) \ ( \ conja, \ cdim, \ n, \ kappa, \ a, inca, lda, \ p, 1, ldp, is_p \ ); \ \ /* if ( cdim < mnr ) */ \ { \ ctype_r* restrict zero_r = PASTEMAC(chr,0); \ const dim_t i = cdim; \ const dim_t m_edge = mnr - i; \ const dim_t n_edge = n_max; \ ctype_r* p_edge_r = ( ctype_r* )p + (i )*1; \ ctype_r* p_edge_i = ( ctype_r* )p + is_p + (i )*1; \ ctype_r* p_edge_rpi = ( ctype_r* )p + 2*is_p + (i )*1; \ \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_r, 1, ldp, \ cntx, \ NULL \ ); \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_i, 1, ldp, \ cntx, \ NULL \ ); \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_rpi, 1, ldp, \ cntx, \ NULL \ ); \ } \ } \ \ if ( n < n_max ) \ { \ ctype_r* restrict zero_r = PASTEMAC(chr,0); \ const dim_t j = n; \ const dim_t m_edge = mnr; \ const dim_t n_edge = n_max - j; \ ctype_r* p_edge_r = ( ctype_r* )p + (j )*ldp; \ ctype_r* p_edge_i = ( ctype_r* )p + is_p + (j )*ldp; \ ctype_r* p_edge_rpi = ( ctype_r* )p + 2*is_p + (j )*ldp; \ \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_r, 1, ldp, \ cntx, \ NULL \ ); \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_i, 1, ldp, \ cntx, \ NULL \ ); \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_rpi, 1, ldp, \ cntx, \ NULL \ ); \ } \ } INSERT_GENTFUNCCO_BASIC3( packm_12xk_3mis, 12, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, opname, mnr, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conja, \ dim_t cdim, \ dim_t n, \ dim_t n_max, \ void* restrict kappa, \ void* restrict a, inc_t inca, inc_t lda, \ void* restrict p, inc_t is_p, inc_t ldp, \ cntx_t* restrict cntx \ ) \ { \ const inc_t inca2 = 2 * inca; \ const inc_t lda2 = 2 * lda; \ \ ctype* kappa_cast = kappa; \ ctype_r* restrict kappa_r = ( ctype_r* )kappa; \ ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \ ctype_r* restrict alpha1_r = ( ctype_r* )a; \ ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \ ctype_r* restrict pi1_r = ( ctype_r* )p; \ ctype_r* restrict pi1_i = ( ctype_r* )p + is_p; \ ctype_r* restrict pi1_rpi = ( ctype_r* )p + 2*is_p; \ \ if ( cdim == mnr ) \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_rpi + 0) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_rpi + 2) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_rpi + 3) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_rpi + 4) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_rpi + 5) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_rpi + 6) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_rpi + 7) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_rpi + 8) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_rpi + 9) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10), *(pi1_rpi +10) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_rpi +11) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12), *(pi1_rpi +12) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13), *(pi1_rpi +13) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ pi1_rpi += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_rpi + 0) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_rpi + 2) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_rpi + 3) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_rpi + 4) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_rpi + 5) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_rpi + 6) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_rpi + 7) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_rpi + 8) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_rpi + 9) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10), *(pi1_rpi +10) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_rpi +11) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12), *(pi1_rpi +12) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13), *(pi1_rpi +13) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ pi1_rpi += ldp; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_rpi + 0) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_rpi + 2) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_rpi + 3) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_rpi + 4) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_rpi + 5) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_rpi + 6) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_rpi + 7) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_rpi + 8) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_rpi + 9) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10), *(pi1_rpi +10) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_rpi +11) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12), *(pi1_rpi +12) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13), *(pi1_rpi +13) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ pi1_rpi += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_rpi + 0) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_rpi + 2) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_rpi + 3) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_rpi + 4) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_rpi + 5) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_rpi + 6) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_rpi + 7) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_rpi + 8) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_rpi + 9) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10), *(pi1_rpi +10) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_rpi +11) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12), *(pi1_rpi +12) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13), *(pi1_rpi +13) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ pi1_rpi += ldp; \ } \ } \ } \ } \ else /* if ( cdim < mnr ) */ \ { \ PASTEMAC(ch,scal2ri3s_mxn) \ ( \ conja, \ cdim, \ n, \ kappa, \ a, inca, lda, \ p, 1, ldp, is_p \ ); \ \ /* if ( cdim < mnr ) */ \ { \ ctype_r* restrict zero_r = PASTEMAC(chr,0); \ const dim_t i = cdim; \ const dim_t m_edge = mnr - i; \ const dim_t n_edge = n_max; \ ctype_r* p_edge_r = ( ctype_r* )p + (i )*1; \ ctype_r* p_edge_i = ( ctype_r* )p + is_p + (i )*1; \ ctype_r* p_edge_rpi = ( ctype_r* )p + 2*is_p + (i )*1; \ \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_r, 1, ldp, \ cntx, \ NULL \ ); \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_i, 1, ldp, \ cntx, \ NULL \ ); \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_rpi, 1, ldp, \ cntx, \ NULL \ ); \ } \ } \ \ if ( n < n_max ) \ { \ ctype_r* restrict zero_r = PASTEMAC(chr,0); \ const dim_t j = n; \ const dim_t m_edge = mnr; \ const dim_t n_edge = n_max - j; \ ctype_r* p_edge_r = ( ctype_r* )p + (j )*ldp; \ ctype_r* p_edge_i = ( ctype_r* )p + is_p + (j )*ldp; \ ctype_r* p_edge_rpi = ( ctype_r* )p + 2*is_p + (j )*ldp; \ \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_r, 1, ldp, \ cntx, \ NULL \ ); \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_i, 1, ldp, \ cntx, \ NULL \ ); \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_rpi, 1, ldp, \ cntx, \ NULL \ ); \ } \ } INSERT_GENTFUNCCO_BASIC3( packm_14xk_3mis, 14, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, opname, mnr, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conja, \ dim_t cdim, \ dim_t n, \ dim_t n_max, \ void* restrict kappa, \ void* restrict a, inc_t inca, inc_t lda, \ void* restrict p, inc_t is_p, inc_t ldp, \ cntx_t* restrict cntx \ ) \ { \ const inc_t inca2 = 2 * inca; \ const inc_t lda2 = 2 * lda; \ \ ctype* kappa_cast = kappa; \ ctype_r* restrict kappa_r = ( ctype_r* )kappa; \ ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \ ctype_r* restrict alpha1_r = ( ctype_r* )a; \ ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \ ctype_r* restrict pi1_r = ( ctype_r* )p; \ ctype_r* restrict pi1_i = ( ctype_r* )p + is_p; \ ctype_r* restrict pi1_rpi = ( ctype_r* )p + 2*is_p; \ \ if ( cdim == mnr ) \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_rpi + 0) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_rpi + 2) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_rpi + 3) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_rpi + 4) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_rpi + 5) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_rpi + 6) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_rpi + 7) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_rpi + 8) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_rpi + 9) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10), *(pi1_rpi +10) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_rpi +11) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12), *(pi1_rpi +12) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13), *(pi1_rpi +13) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r +14*inca2), *(alpha1_i +14*inca2), *(pi1_r +14), *(pi1_i +14), *(pi1_rpi +14) ); \ PASTEMAC(ch,copyjri3s)( *(alpha1_r +15*inca2), *(alpha1_i +15*inca2), *(pi1_r +15), *(pi1_i +15), *(pi1_rpi +15) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ pi1_rpi += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_rpi + 0) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_rpi + 2) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_rpi + 3) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_rpi + 4) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_rpi + 5) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_rpi + 6) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_rpi + 7) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_rpi + 8) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_rpi + 9) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10), *(pi1_rpi +10) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_rpi +11) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12), *(pi1_rpi +12) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13), *(pi1_rpi +13) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r +14*inca2), *(alpha1_i +14*inca2), *(pi1_r +14), *(pi1_i +14), *(pi1_rpi +14) ); \ PASTEMAC(ch,copyri3s)( *(alpha1_r +15*inca2), *(alpha1_i +15*inca2), *(pi1_r +15), *(pi1_i +15), *(pi1_rpi +15) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ pi1_rpi += ldp; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_rpi + 0) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_rpi + 2) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_rpi + 3) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_rpi + 4) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_rpi + 5) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_rpi + 6) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_rpi + 7) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_rpi + 8) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_rpi + 9) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10), *(pi1_rpi +10) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_rpi +11) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12), *(pi1_rpi +12) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13), *(pi1_rpi +13) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r +14*inca2), *(alpha1_i +14*inca2), *(pi1_r +14), *(pi1_i +14), *(pi1_rpi +14) ); \ PASTEMAC(ch,scal2jri3s)( *kappa_r, *kappa_i, *(alpha1_r +15*inca2), *(alpha1_i +15*inca2), *(pi1_r +15), *(pi1_i +15), *(pi1_rpi +15) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ pi1_rpi += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0), *(pi1_rpi + 0) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1), *(pi1_rpi + 1) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2), *(pi1_rpi + 2) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3), *(pi1_rpi + 3) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4), *(pi1_rpi + 4) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5), *(pi1_rpi + 5) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6), *(pi1_rpi + 6) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7), *(pi1_rpi + 7) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8), *(pi1_rpi + 8) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9), *(pi1_rpi + 9) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10), *(pi1_rpi +10) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11), *(pi1_rpi +11) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12), *(pi1_rpi +12) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13), *(pi1_rpi +13) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r +14*inca2), *(alpha1_i +14*inca2), *(pi1_r +14), *(pi1_i +14), *(pi1_rpi +14) ); \ PASTEMAC(ch,scal2ri3s)( *kappa_r, *kappa_i, *(alpha1_r +15*inca2), *(alpha1_i +15*inca2), *(pi1_r +15), *(pi1_i +15), *(pi1_rpi +15) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ pi1_rpi += ldp; \ } \ } \ } \ } \ else /* if ( cdim < mnr ) */ \ { \ PASTEMAC(ch,scal2ri3s_mxn) \ ( \ conja, \ cdim, \ n, \ kappa, \ a, inca, lda, \ p, 1, ldp, is_p \ ); \ \ /* if ( cdim < mnr ) */ \ { \ ctype_r* restrict zero_r = PASTEMAC(chr,0); \ const dim_t i = cdim; \ const dim_t m_edge = mnr - i; \ const dim_t n_edge = n_max; \ ctype_r* p_edge_r = ( ctype_r* )p + (i )*1; \ ctype_r* p_edge_i = ( ctype_r* )p + is_p + (i )*1; \ ctype_r* p_edge_rpi = ( ctype_r* )p + 2*is_p + (i )*1; \ \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_r, 1, ldp, \ cntx, \ NULL \ ); \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_i, 1, ldp, \ cntx, \ NULL \ ); \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_rpi, 1, ldp, \ cntx, \ NULL \ ); \ } \ } \ \ if ( n < n_max ) \ { \ ctype_r* restrict zero_r = PASTEMAC(chr,0); \ const dim_t j = n; \ const dim_t m_edge = mnr; \ const dim_t n_edge = n_max - j; \ ctype_r* p_edge_r = ( ctype_r* )p + (j )*ldp; \ ctype_r* p_edge_i = ( ctype_r* )p + is_p + (j )*ldp; \ ctype_r* p_edge_rpi = ( ctype_r* )p + 2*is_p + (j )*ldp; \ \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_r, 1, ldp, \ cntx, \ NULL \ ); \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_i, 1, ldp, \ cntx, \ NULL \ ); \ PASTEMAC2(chr,setm,BLIS_TAPI_EX_SUF) \ ( \ BLIS_NO_CONJUGATE, \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ m_edge, \ n_edge, \ zero_r, \ p_edge_rpi, 1, ldp, \ cntx, \ NULL \ ); \ } \ } INSERT_GENTFUNCCO_BASIC3( packm_16xk_3mis, 16, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) blis-0.6.1/ref_kernels/1m/bli_packm_cxk_4mi_ref.c000066400000000000000000001635521360743507500216500ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, opname, mnr, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conja, \ dim_t cdim, \ dim_t n, \ dim_t n_max, \ void* restrict kappa, \ void* restrict a, inc_t inca, inc_t lda, \ void* restrict p, inc_t is_p, inc_t ldp, \ cntx_t* restrict cntx \ ) \ { \ const inc_t inca2 = 2 * inca; \ const inc_t lda2 = 2 * lda; \ \ ctype* kappa_cast = kappa; \ ctype_r* restrict kappa_r = ( ctype_r* )kappa; \ ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \ ctype_r* restrict alpha1_r = ( ctype_r* )a; \ ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \ ctype_r* restrict pi1_r = ( ctype_r* )p; \ ctype_r* restrict pi1_i = ( ctype_r* )p + is_p; \ \ if ( cdim == mnr ) \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyjris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ } \ } \ } \ } \ else /* if ( cdim < mnr ) */ \ { \ PASTEMAC(ch,scal2ris_mxn) \ ( \ conja, \ cdim, \ n, \ kappa, \ a, inca, lda, \ p, 1, ldp, is_p \ ); \ \ /* if ( cdim < mnr ) */ \ { \ const dim_t i = cdim; \ const dim_t m_edge = mnr - i; \ const dim_t n_edge = n_max; \ ctype_r* restrict p_edge_r = ( ctype_r* )p + (i )*1; \ ctype_r* restrict p_edge_i = ( ctype_r* )p + is_p + (i )*1; \ \ PASTEMAC(chr,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge_r, 1, ldp \ ); \ PASTEMAC(chr,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge_i, 1, ldp \ ); \ } \ } \ \ if ( n < n_max ) \ { \ const dim_t j = n; \ const dim_t m_edge = mnr; \ const dim_t n_edge = n_max - j; \ ctype_r* restrict p_edge_r = ( ctype_r* )p + (j )*ldp; \ ctype_r* restrict p_edge_i = ( ctype_r* )p + is_p + (j )*ldp; \ \ PASTEMAC(chr,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge_r, 1, ldp \ ); \ PASTEMAC(chr,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge_i, 1, ldp \ ); \ } \ } INSERT_GENTFUNCCO_BASIC3( packm_2xk_4mi, 2, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, opname, mnr, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conja, \ dim_t cdim, \ dim_t n, \ dim_t n_max, \ void* restrict kappa, \ void* restrict a, inc_t inca, inc_t lda, \ void* restrict p, inc_t is_p, inc_t ldp, \ cntx_t* restrict cntx \ ) \ { \ const inc_t inca2 = 2 * inca; \ const inc_t lda2 = 2 * lda; \ \ ctype* kappa_cast = kappa; \ ctype_r* restrict kappa_r = ( ctype_r* )kappa; \ ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \ ctype_r* restrict alpha1_r = ( ctype_r* )a; \ ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \ ctype_r* restrict pi1_r = ( ctype_r* )p; \ ctype_r* restrict pi1_i = ( ctype_r* )p + is_p; \ \ if ( cdim == mnr ) \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyjris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ } \ } \ } \ } \ else /* if ( cdim < mnr ) */ \ { \ PASTEMAC(ch,scal2ris_mxn) \ ( \ conja, \ cdim, \ n, \ kappa, \ a, inca, lda, \ p, 1, ldp, is_p \ ); \ \ /* if ( cdim < mnr ) */ \ { \ const dim_t i = cdim; \ const dim_t m_edge = mnr - i; \ const dim_t n_edge = n_max; \ ctype_r* restrict p_edge_r = ( ctype_r* )p + (i )*1; \ ctype_r* restrict p_edge_i = ( ctype_r* )p + is_p + (i )*1; \ \ PASTEMAC(chr,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge_r, 1, ldp \ ); \ PASTEMAC(chr,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge_i, 1, ldp \ ); \ } \ } \ \ if ( n < n_max ) \ { \ const dim_t j = n; \ const dim_t m_edge = mnr; \ const dim_t n_edge = n_max - j; \ ctype_r* restrict p_edge_r = ( ctype_r* )p + (j )*ldp; \ ctype_r* restrict p_edge_i = ( ctype_r* )p + is_p + (j )*ldp; \ \ PASTEMAC(chr,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge_r, 1, ldp \ ); \ PASTEMAC(chr,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge_i, 1, ldp \ ); \ } \ } INSERT_GENTFUNCCO_BASIC3( packm_4xk_4mi, 4, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, opname, mnr, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conja, \ dim_t cdim, \ dim_t n, \ dim_t n_max, \ void* restrict kappa, \ void* restrict a, inc_t inca, inc_t lda, \ void* restrict p, inc_t is_p, inc_t ldp, \ cntx_t* restrict cntx \ ) \ { \ const inc_t inca2 = 2 * inca; \ const inc_t lda2 = 2 * lda; \ \ ctype* kappa_cast = kappa; \ ctype_r* restrict kappa_r = ( ctype_r* )kappa; \ ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \ ctype_r* restrict alpha1_r = ( ctype_r* )a; \ ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \ ctype_r* restrict pi1_r = ( ctype_r* )p; \ ctype_r* restrict pi1_i = ( ctype_r* )p + is_p; \ \ if ( cdim == mnr ) \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyjris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ } \ } \ } \ } \ else /* if ( cdim < mnr ) */ \ { \ PASTEMAC(ch,scal2ris_mxn) \ ( \ conja, \ cdim, \ n, \ kappa, \ a, inca, lda, \ p, 1, ldp, is_p \ ); \ \ /* if ( cdim < mnr ) */ \ { \ const dim_t i = cdim; \ const dim_t m_edge = mnr - i; \ const dim_t n_edge = n_max; \ ctype_r* restrict p_edge_r = ( ctype_r* )p + (i )*1; \ ctype_r* restrict p_edge_i = ( ctype_r* )p + is_p + (i )*1; \ \ PASTEMAC(chr,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge_r, 1, ldp \ ); \ PASTEMAC(chr,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge_i, 1, ldp \ ); \ } \ } \ \ if ( n < n_max ) \ { \ const dim_t j = n; \ const dim_t m_edge = mnr; \ const dim_t n_edge = n_max - j; \ ctype_r* restrict p_edge_r = ( ctype_r* )p + (j )*ldp; \ ctype_r* restrict p_edge_i = ( ctype_r* )p + is_p + (j )*ldp; \ \ PASTEMAC(chr,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge_r, 1, ldp \ ); \ PASTEMAC(chr,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge_i, 1, ldp \ ); \ } \ } INSERT_GENTFUNCCO_BASIC3( packm_6xk_4mi, 6, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, opname, mnr, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conja, \ dim_t cdim, \ dim_t n, \ dim_t n_max, \ void* restrict kappa, \ void* restrict a, inc_t inca, inc_t lda, \ void* restrict p, inc_t is_p, inc_t ldp, \ cntx_t* restrict cntx \ ) \ { \ const inc_t inca2 = 2 * inca; \ const inc_t lda2 = 2 * lda; \ \ ctype* kappa_cast = kappa; \ ctype_r* restrict kappa_r = ( ctype_r* )kappa; \ ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \ ctype_r* restrict alpha1_r = ( ctype_r* )a; \ ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \ ctype_r* restrict pi1_r = ( ctype_r* )p; \ ctype_r* restrict pi1_i = ( ctype_r* )p + is_p; \ \ if ( cdim == mnr ) \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyjris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ } \ } \ } \ } \ else /* if ( cdim < mnr ) */ \ { \ PASTEMAC(ch,scal2ris_mxn) \ ( \ conja, \ cdim, \ n, \ kappa, \ a, inca, lda, \ p, 1, ldp, is_p \ ); \ \ /* if ( cdim < mnr ) */ \ { \ const dim_t i = cdim; \ const dim_t m_edge = mnr - i; \ const dim_t n_edge = n_max; \ ctype_r* restrict p_edge_r = ( ctype_r* )p + (i )*1; \ ctype_r* restrict p_edge_i = ( ctype_r* )p + is_p + (i )*1; \ \ PASTEMAC(chr,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge_r, 1, ldp \ ); \ PASTEMAC(chr,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge_i, 1, ldp \ ); \ } \ } \ \ if ( n < n_max ) \ { \ const dim_t j = n; \ const dim_t m_edge = mnr; \ const dim_t n_edge = n_max - j; \ ctype_r* restrict p_edge_r = ( ctype_r* )p + (j )*ldp; \ ctype_r* restrict p_edge_i = ( ctype_r* )p + is_p + (j )*ldp; \ \ PASTEMAC(chr,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge_r, 1, ldp \ ); \ PASTEMAC(chr,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge_i, 1, ldp \ ); \ } \ } INSERT_GENTFUNCCO_BASIC3( packm_8xk_4mi, 8, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, opname, mnr, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conja, \ dim_t cdim, \ dim_t n, \ dim_t n_max, \ void* restrict kappa, \ void* restrict a, inc_t inca, inc_t lda, \ void* restrict p, inc_t is_p, inc_t ldp, \ cntx_t* restrict cntx \ ) \ { \ const inc_t inca2 = 2 * inca; \ const inc_t lda2 = 2 * lda; \ \ ctype* kappa_cast = kappa; \ ctype_r* restrict kappa_r = ( ctype_r* )kappa; \ ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \ ctype_r* restrict alpha1_r = ( ctype_r* )a; \ ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \ ctype_r* restrict pi1_r = ( ctype_r* )p; \ ctype_r* restrict pi1_i = ( ctype_r* )p + is_p; \ \ if ( cdim == mnr ) \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyjris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ } \ } \ } \ } \ else /* if ( cdim < mnr ) */ \ { \ PASTEMAC(ch,scal2ris_mxn) \ ( \ conja, \ cdim, \ n, \ kappa, \ a, inca, lda, \ p, 1, ldp, is_p \ ); \ \ /* if ( cdim < mnr ) */ \ { \ const dim_t i = cdim; \ const dim_t m_edge = mnr - i; \ const dim_t n_edge = n_max; \ ctype_r* restrict p_edge_r = ( ctype_r* )p + (i )*1; \ ctype_r* restrict p_edge_i = ( ctype_r* )p + is_p + (i )*1; \ \ PASTEMAC(chr,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge_r, 1, ldp \ ); \ PASTEMAC(chr,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge_i, 1, ldp \ ); \ } \ } \ \ if ( n < n_max ) \ { \ const dim_t j = n; \ const dim_t m_edge = mnr; \ const dim_t n_edge = n_max - j; \ ctype_r* restrict p_edge_r = ( ctype_r* )p + (j )*ldp; \ ctype_r* restrict p_edge_i = ( ctype_r* )p + is_p + (j )*ldp; \ \ PASTEMAC(chr,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge_r, 1, ldp \ ); \ PASTEMAC(chr,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge_i, 1, ldp \ ); \ } \ } INSERT_GENTFUNCCO_BASIC3( packm_10xk_4mi, 10, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, opname, mnr, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conja, \ dim_t cdim, \ dim_t n, \ dim_t n_max, \ void* restrict kappa, \ void* restrict a, inc_t inca, inc_t lda, \ void* restrict p, inc_t is_p, inc_t ldp, \ cntx_t* restrict cntx \ ) \ { \ const inc_t inca2 = 2 * inca; \ const inc_t lda2 = 2 * lda; \ \ ctype* kappa_cast = kappa; \ ctype_r* restrict kappa_r = ( ctype_r* )kappa; \ ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \ ctype_r* restrict alpha1_r = ( ctype_r* )a; \ ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \ ctype_r* restrict pi1_r = ( ctype_r* )p; \ ctype_r* restrict pi1_i = ( ctype_r* )p + is_p; \ \ if ( cdim == mnr ) \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyjris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \ PASTEMAC(ch,copyris)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \ PASTEMAC(ch,copyris)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ } \ } \ } \ } \ else /* if ( cdim < mnr ) */ \ { \ PASTEMAC(ch,scal2ris_mxn) \ ( \ conja, \ cdim, \ n, \ kappa, \ a, inca, lda, \ p, 1, ldp, is_p \ ); \ \ /* if ( cdim < mnr ) */ \ { \ const dim_t i = cdim; \ const dim_t m_edge = mnr - i; \ const dim_t n_edge = n_max; \ ctype_r* restrict p_edge_r = ( ctype_r* )p + (i )*1; \ ctype_r* restrict p_edge_i = ( ctype_r* )p + is_p + (i )*1; \ \ PASTEMAC(chr,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge_r, 1, ldp \ ); \ PASTEMAC(chr,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge_i, 1, ldp \ ); \ } \ } \ \ if ( n < n_max ) \ { \ const dim_t j = n; \ const dim_t m_edge = mnr; \ const dim_t n_edge = n_max - j; \ ctype_r* restrict p_edge_r = ( ctype_r* )p + (j )*ldp; \ ctype_r* restrict p_edge_i = ( ctype_r* )p + is_p + (j )*ldp; \ \ PASTEMAC(chr,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge_r, 1, ldp \ ); \ PASTEMAC(chr,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge_i, 1, ldp \ ); \ } \ } INSERT_GENTFUNCCO_BASIC3( packm_12xk_4mi, 12, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, opname, mnr, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conja, \ dim_t cdim, \ dim_t n, \ dim_t n_max, \ void* restrict kappa, \ void* restrict a, inc_t inca, inc_t lda, \ void* restrict p, inc_t is_p, inc_t ldp, \ cntx_t* restrict cntx \ ) \ { \ const inc_t inca2 = 2 * inca; \ const inc_t lda2 = 2 * lda; \ \ ctype* kappa_cast = kappa; \ ctype_r* restrict kappa_r = ( ctype_r* )kappa; \ ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \ ctype_r* restrict alpha1_r = ( ctype_r* )a; \ ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \ ctype_r* restrict pi1_r = ( ctype_r* )p; \ ctype_r* restrict pi1_i = ( ctype_r* )p + is_p; \ \ if ( cdim == mnr ) \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyjris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \ PASTEMAC(ch,copyris)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \ PASTEMAC(ch,copyris)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \ PASTEMAC(ch,copyris)( *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12) ); \ PASTEMAC(ch,copyris)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ } \ } \ } \ } \ else /* if ( cdim < mnr ) */ \ { \ PASTEMAC(ch,scal2ris_mxn) \ ( \ conja, \ cdim, \ n, \ kappa, \ a, inca, lda, \ p, 1, ldp, is_p \ ); \ \ /* if ( cdim < mnr ) */ \ { \ const dim_t i = cdim; \ const dim_t m_edge = mnr - i; \ const dim_t n_edge = n_max; \ ctype_r* restrict p_edge_r = ( ctype_r* )p + (i )*1; \ ctype_r* restrict p_edge_i = ( ctype_r* )p + is_p + (i )*1; \ \ PASTEMAC(chr,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge_r, 1, ldp \ ); \ PASTEMAC(chr,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge_i, 1, ldp \ ); \ } \ } \ \ if ( n < n_max ) \ { \ const dim_t j = n; \ const dim_t m_edge = mnr; \ const dim_t n_edge = n_max - j; \ ctype_r* restrict p_edge_r = ( ctype_r* )p + (j )*ldp; \ ctype_r* restrict p_edge_i = ( ctype_r* )p + is_p + (j )*ldp; \ \ PASTEMAC(chr,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge_r, 1, ldp \ ); \ PASTEMAC(chr,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge_i, 1, ldp \ ); \ } \ } INSERT_GENTFUNCCO_BASIC3( packm_14xk_4mi, 14, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, opname, mnr, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conja, \ dim_t cdim, \ dim_t n, \ dim_t n_max, \ void* restrict kappa, \ void* restrict a, inc_t inca, inc_t lda, \ void* restrict p, inc_t is_p, inc_t ldp, \ cntx_t* restrict cntx \ ) \ { \ const inc_t inca2 = 2 * inca; \ const inc_t lda2 = 2 * lda; \ \ ctype* kappa_cast = kappa; \ ctype_r* restrict kappa_r = ( ctype_r* )kappa; \ ctype_r* restrict kappa_i = ( ctype_r* )kappa + 1; \ ctype_r* restrict alpha1_r = ( ctype_r* )a; \ ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \ ctype_r* restrict pi1_r = ( ctype_r* )p; \ ctype_r* restrict pi1_i = ( ctype_r* )p + is_p; \ \ if ( cdim == mnr ) \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyjris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r +14*inca2), *(alpha1_i +14*inca2), *(pi1_r +14), *(pi1_i +14) ); \ PASTEMAC(ch,copyjris)( *(alpha1_r +15*inca2), *(alpha1_i +15*inca2), *(pi1_r +15), *(pi1_i +15) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyris)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \ PASTEMAC(ch,copyris)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \ PASTEMAC(ch,copyris)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \ PASTEMAC(ch,copyris)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \ PASTEMAC(ch,copyris)( *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12) ); \ PASTEMAC(ch,copyris)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \ PASTEMAC(ch,copyris)( *(alpha1_r +14*inca2), *(alpha1_i +14*inca2), *(pi1_r +14), *(pi1_i +14) ); \ PASTEMAC(ch,copyris)( *(alpha1_r +15*inca2), *(alpha1_i +15*inca2), *(pi1_r +15), *(pi1_i +15) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r +14*inca2), *(alpha1_i +14*inca2), *(pi1_r +14), *(pi1_i +14) ); \ PASTEMAC(ch,scal2jris)( *kappa_r, *kappa_i, *(alpha1_r +15*inca2), *(alpha1_i +15*inca2), *(pi1_r +15), *(pi1_i +15) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0), *(pi1_i + 0) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1), *(pi1_i + 1) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2), *(pi1_i + 2) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3), *(pi1_i + 3) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4), *(pi1_i + 4) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5), *(pi1_i + 5) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6), *(pi1_i + 6) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7), *(pi1_i + 7) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8), *(pi1_i + 8) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9), *(pi1_i + 9) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10), *(pi1_i +10) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11), *(pi1_i +11) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12), *(pi1_i +12) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13), *(pi1_i +13) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r +14*inca2), *(alpha1_i +14*inca2), *(pi1_r +14), *(pi1_i +14) ); \ PASTEMAC(ch,scal2ris)( *kappa_r, *kappa_i, *(alpha1_r +15*inca2), *(alpha1_i +15*inca2), *(pi1_r +15), *(pi1_i +15) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ pi1_i += ldp; \ } \ } \ } \ } \ else /* if ( cdim < mnr ) */ \ { \ PASTEMAC(ch,scal2ris_mxn) \ ( \ conja, \ cdim, \ n, \ kappa, \ a, inca, lda, \ p, 1, ldp, is_p \ ); \ \ /* if ( cdim < mnr ) */ \ { \ const dim_t i = cdim; \ const dim_t m_edge = mnr - i; \ const dim_t n_edge = n_max; \ ctype_r* restrict p_edge_r = ( ctype_r* )p + (i )*1; \ ctype_r* restrict p_edge_i = ( ctype_r* )p + is_p + (i )*1; \ \ PASTEMAC(chr,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge_r, 1, ldp \ ); \ PASTEMAC(chr,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge_i, 1, ldp \ ); \ } \ } \ \ if ( n < n_max ) \ { \ const dim_t j = n; \ const dim_t m_edge = mnr; \ const dim_t n_edge = n_max - j; \ ctype_r* restrict p_edge_r = ( ctype_r* )p + (j )*ldp; \ ctype_r* restrict p_edge_i = ( ctype_r* )p + is_p + (j )*ldp; \ \ PASTEMAC(chr,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge_r, 1, ldp \ ); \ PASTEMAC(chr,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge_i, 1, ldp \ ); \ } \ } INSERT_GENTFUNCCO_BASIC3( packm_16xk_4mi, 16, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) blis-0.6.1/ref_kernels/1m/bli_packm_cxk_bb_ref.c000066400000000000000000000565021360743507500215360ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // -- 6xk, duplication factor 2 ------------------------------------------------ #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, mnr, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conja, \ pack_t schema, \ dim_t cdim, \ dim_t n, \ dim_t n_max, \ void* restrict kappa, \ void* restrict a, inc_t inca, inc_t lda, \ void* restrict p, inc_t ldp, \ cntx_t* restrict cntx \ ) \ { \ ctype* restrict kappa_cast = kappa; \ ctype* restrict alpha1 = a; \ ctype* restrict pi1 = p; \ \ const dim_t dfac = 2; \ \ /* Handle the packing of B (column panel schemas) separately from packing of A (row panel schemas). */ \ if ( bli_is_col_packed( schema ) ) \ { \ if ( cdim == mnr ) \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyjs)( *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 0*inca), *(pi1 + 1) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 1*inca), *(pi1 + 2) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 1*inca), *(pi1 + 3) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 2*inca), *(pi1 + 4) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 2*inca), *(pi1 + 5) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 3*inca), *(pi1 + 6) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 3*inca), *(pi1 + 7) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 4*inca), *(pi1 + 8) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 4*inca), *(pi1 + 9) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 5*inca), *(pi1 + 10) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 5*inca), *(pi1 + 11) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ else /* if ( bli_is_noconj( conja ) ) */ \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copys)( *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC(ch,copys)( *(alpha1 + 0*inca), *(pi1 + 1) ); \ PASTEMAC(ch,copys)( *(alpha1 + 1*inca), *(pi1 + 2) ); \ PASTEMAC(ch,copys)( *(alpha1 + 1*inca), *(pi1 + 3) ); \ PASTEMAC(ch,copys)( *(alpha1 + 2*inca), *(pi1 + 4) ); \ PASTEMAC(ch,copys)( *(alpha1 + 2*inca), *(pi1 + 5) ); \ PASTEMAC(ch,copys)( *(alpha1 + 3*inca), *(pi1 + 6) ); \ PASTEMAC(ch,copys)( *(alpha1 + 3*inca), *(pi1 + 7) ); \ PASTEMAC(ch,copys)( *(alpha1 + 4*inca), *(pi1 + 8) ); \ PASTEMAC(ch,copys)( *(alpha1 + 4*inca), *(pi1 + 9) ); \ PASTEMAC(ch,copys)( *(alpha1 + 5*inca), *(pi1 + 10) ); \ PASTEMAC(ch,copys)( *(alpha1 + 5*inca), *(pi1 + 11) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ } \ else /* if ( !PASTEMAC(ch,eq1)( *kappa_cast ) ) */ \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 1) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 2) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 3) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 4) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 5) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 6) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 7) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 8) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 9) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 10) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 11) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ else /* if ( bli_is_noconj( conja ) ) */ \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 1) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 2) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 3) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 4) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 5) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 6) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 7) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 8) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 9) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 10) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 11) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ } \ } \ else /* if ( cdim < mnr ) */ \ { \ PASTEMAC(ch,scal2bbs_mxn) \ ( \ conja, \ cdim, \ n, \ kappa, \ a, inca, lda, \ p, dfac, ldp \ ); \ \ /* if ( cdim < mnr ) */ \ { \ const dim_t i = cdim; \ const dim_t m_edge = mnr - cdim; \ const dim_t n_edge = n_max; \ ctype* restrict p_cast = p; \ ctype* restrict p_edge = p_cast + (i )*dfac; \ \ PASTEMAC(ch,set0bbs_mxn) \ ( \ m_edge, \ n_edge, \ p_edge, dfac, ldp \ ); \ } \ } \ \ if ( n < n_max ) \ { \ const dim_t j = n; \ const dim_t m_edge = mnr; \ const dim_t n_edge = n_max - n; \ ctype* restrict p_cast = p; \ ctype* restrict p_edge = p_cast + (j )*ldp; \ \ PASTEMAC(ch,set0bbs_mxn) \ ( \ m_edge, \ n_edge, \ p_edge, dfac, ldp \ ); \ } \ } \ else /* if ( bli_is_row_packed( schema ) ) */ \ { \ if ( cdim == mnr ) \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyjs)( *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 1*inca), *(pi1 + 1) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 2*inca), *(pi1 + 2) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 3*inca), *(pi1 + 3) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 4*inca), *(pi1 + 4) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 5*inca), *(pi1 + 5) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ else /* if ( bli_is_noconj( conja ) ) */ \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copys)( *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC(ch,copys)( *(alpha1 + 1*inca), *(pi1 + 1) ); \ PASTEMAC(ch,copys)( *(alpha1 + 2*inca), *(pi1 + 2) ); \ PASTEMAC(ch,copys)( *(alpha1 + 3*inca), *(pi1 + 3) ); \ PASTEMAC(ch,copys)( *(alpha1 + 4*inca), *(pi1 + 4) ); \ PASTEMAC(ch,copys)( *(alpha1 + 5*inca), *(pi1 + 5) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ } \ else /* if ( !PASTEMAC(ch,eq1)( *kappa_cast ) ) */ \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ else /* if ( bli_is_noconj( conja ) ) */ \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ } \ } \ else /* if ( cdim < mnr ) */ \ { \ PASTEMAC(ch,scal2s_mxn) \ ( \ conja, \ cdim, \ n, \ kappa, \ a, inca, lda, \ p, 1, ldp \ ); \ \ /* if ( cdim < mnr ) */ \ { \ const dim_t i = cdim; \ const dim_t m_edge = mnr - cdim; \ const dim_t n_edge = n_max; \ ctype* restrict p_cast = p; \ ctype* restrict p_edge = p_cast + (i )*1; \ \ PASTEMAC(ch,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge, 1, ldp \ ); \ } \ } \ \ if ( n < n_max ) \ { \ const dim_t j = n; \ const dim_t m_edge = mnr; \ const dim_t n_edge = n_max - n; \ ctype* restrict p_cast = p; \ ctype* restrict p_edge = p_cast + (j )*ldp; \ \ PASTEMAC(ch,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge, 1, ldp \ ); \ } \ } \ } INSERT_GENTFUNC_BASIC3( packm_6xk_bb2, 6, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) // -- 6xk, duplication factor 4 ------------------------------------------------ #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, mnr, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conja, \ pack_t schema, \ dim_t cdim, \ dim_t n, \ dim_t n_max, \ void* restrict kappa, \ void* restrict a, inc_t inca, inc_t lda, \ void* restrict p, inc_t ldp, \ cntx_t* restrict cntx \ ) \ { \ ctype* restrict kappa_cast = kappa; \ ctype* restrict alpha1 = a; \ ctype* restrict pi1 = p; \ \ const dim_t dfac = 4; \ \ /* Handle the packing of B (column panel schemas) separately from packing of A (row panel schemas). */ \ if ( bli_is_col_packed( schema ) ) \ { \ if ( cdim == mnr ) \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyjs)( *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 0*inca), *(pi1 + 1) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 0*inca), *(pi1 + 2) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 0*inca), *(pi1 + 3) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 1*inca), *(pi1 + 4) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 1*inca), *(pi1 + 5) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 1*inca), *(pi1 + 6) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 1*inca), *(pi1 + 7) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 2*inca), *(pi1 + 8) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 2*inca), *(pi1 + 9) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 2*inca), *(pi1 + 10) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 2*inca), *(pi1 + 11) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 3*inca), *(pi1 + 12) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 3*inca), *(pi1 + 13) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 3*inca), *(pi1 + 14) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 3*inca), *(pi1 + 15) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 4*inca), *(pi1 + 16) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 4*inca), *(pi1 + 17) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 4*inca), *(pi1 + 18) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 4*inca), *(pi1 + 19) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 5*inca), *(pi1 + 20) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 5*inca), *(pi1 + 21) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 5*inca), *(pi1 + 22) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 5*inca), *(pi1 + 23) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ else /* if ( bli_is_noconj( conja ) ) */ \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copys)( *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC(ch,copys)( *(alpha1 + 0*inca), *(pi1 + 1) ); \ PASTEMAC(ch,copys)( *(alpha1 + 0*inca), *(pi1 + 2) ); \ PASTEMAC(ch,copys)( *(alpha1 + 0*inca), *(pi1 + 3) ); \ PASTEMAC(ch,copys)( *(alpha1 + 1*inca), *(pi1 + 4) ); \ PASTEMAC(ch,copys)( *(alpha1 + 1*inca), *(pi1 + 5) ); \ PASTEMAC(ch,copys)( *(alpha1 + 1*inca), *(pi1 + 6) ); \ PASTEMAC(ch,copys)( *(alpha1 + 1*inca), *(pi1 + 7) ); \ PASTEMAC(ch,copys)( *(alpha1 + 2*inca), *(pi1 + 8) ); \ PASTEMAC(ch,copys)( *(alpha1 + 2*inca), *(pi1 + 9) ); \ PASTEMAC(ch,copys)( *(alpha1 + 2*inca), *(pi1 + 10) ); \ PASTEMAC(ch,copys)( *(alpha1 + 2*inca), *(pi1 + 11) ); \ PASTEMAC(ch,copys)( *(alpha1 + 3*inca), *(pi1 + 12) ); \ PASTEMAC(ch,copys)( *(alpha1 + 3*inca), *(pi1 + 13) ); \ PASTEMAC(ch,copys)( *(alpha1 + 3*inca), *(pi1 + 14) ); \ PASTEMAC(ch,copys)( *(alpha1 + 3*inca), *(pi1 + 15) ); \ PASTEMAC(ch,copys)( *(alpha1 + 4*inca), *(pi1 + 16) ); \ PASTEMAC(ch,copys)( *(alpha1 + 4*inca), *(pi1 + 17) ); \ PASTEMAC(ch,copys)( *(alpha1 + 4*inca), *(pi1 + 18) ); \ PASTEMAC(ch,copys)( *(alpha1 + 4*inca), *(pi1 + 19) ); \ PASTEMAC(ch,copys)( *(alpha1 + 5*inca), *(pi1 + 20) ); \ PASTEMAC(ch,copys)( *(alpha1 + 5*inca), *(pi1 + 21) ); \ PASTEMAC(ch,copys)( *(alpha1 + 5*inca), *(pi1 + 22) ); \ PASTEMAC(ch,copys)( *(alpha1 + 5*inca), *(pi1 + 23) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ } \ else /* if ( !PASTEMAC(ch,eq1)( *kappa_cast ) ) */ \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 1) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 2) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 3) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 4) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 5) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 6) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 7) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 8) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 9) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 10) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 11) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 12) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 13) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 14) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 15) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 16) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 17) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 18) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 19) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 20) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 21) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 22) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 23) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ else /* if ( bli_is_noconj( conja ) ) */ \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 1) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 2) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 3) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 4) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 5) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 6) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 7) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 8) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 9) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 10) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 11) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 12) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 13) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 14) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 15) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 16) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 17) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 18) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 19) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 20) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 21) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 22) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 23) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ } \ } \ else /* if ( cdim < mnr ) */ \ { \ PASTEMAC(ch,scal2bbs_mxn) \ ( \ conja, \ cdim, \ n, \ kappa, \ a, inca, lda, \ p, dfac, ldp \ ); \ \ /* if ( cdim < mnr ) */ \ { \ const dim_t i = cdim; \ const dim_t m_edge = mnr - cdim; \ const dim_t n_edge = n_max; \ ctype* restrict p_cast = p; \ ctype* restrict p_edge = p_cast + (i )*dfac; \ \ PASTEMAC(ch,set0bbs_mxn) \ ( \ m_edge, \ n_edge, \ p_edge, dfac, ldp \ ); \ } \ } \ \ if ( n < n_max ) \ { \ const dim_t j = n; \ const dim_t m_edge = mnr; \ const dim_t n_edge = n_max - n; \ ctype* restrict p_cast = p; \ ctype* restrict p_edge = p_cast + (j )*ldp; \ \ PASTEMAC(ch,set0bbs_mxn) \ ( \ m_edge, \ n_edge, \ p_edge, dfac, ldp \ ); \ } \ } \ else /* if ( bli_is_row_packed( schema ) ) */ \ { \ if ( cdim == mnr ) \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyjs)( *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 1*inca), *(pi1 + 1) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 2*inca), *(pi1 + 2) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 3*inca), *(pi1 + 3) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 4*inca), *(pi1 + 4) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 5*inca), *(pi1 + 5) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ else /* if ( bli_is_noconj( conja ) ) */ \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copys)( *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC(ch,copys)( *(alpha1 + 1*inca), *(pi1 + 1) ); \ PASTEMAC(ch,copys)( *(alpha1 + 2*inca), *(pi1 + 2) ); \ PASTEMAC(ch,copys)( *(alpha1 + 3*inca), *(pi1 + 3) ); \ PASTEMAC(ch,copys)( *(alpha1 + 4*inca), *(pi1 + 4) ); \ PASTEMAC(ch,copys)( *(alpha1 + 5*inca), *(pi1 + 5) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ } \ else /* if ( !PASTEMAC(ch,eq1)( *kappa_cast ) ) */ \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ else /* if ( bli_is_noconj( conja ) ) */ \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ } \ } \ else /* if ( cdim < mnr ) */ \ { \ PASTEMAC(ch,scal2s_mxn) \ ( \ conja, \ cdim, \ n, \ kappa, \ a, inca, lda, \ p, 1, ldp \ ); \ \ /* if ( cdim < mnr ) */ \ { \ const dim_t i = cdim; \ const dim_t m_edge = mnr - cdim; \ const dim_t n_edge = n_max; \ ctype* restrict p_cast = p; \ ctype* restrict p_edge = p_cast + (i )*1; \ \ PASTEMAC(ch,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge, 1, ldp \ ); \ } \ } \ \ if ( n < n_max ) \ { \ const dim_t j = n; \ const dim_t m_edge = mnr; \ const dim_t n_edge = n_max - n; \ ctype* restrict p_cast = p; \ ctype* restrict p_edge = p_cast + (j )*ldp; \ \ PASTEMAC(ch,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge, 1, ldp \ ); \ } \ } \ } INSERT_GENTFUNC_BASIC3( packm_6xk_bb4, 6, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) blis-0.6.1/ref_kernels/1m/bli_packm_cxk_ref.c000066400000000000000000001543311360743507500210720ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, mnr, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conja, \ pack_t schema, \ dim_t cdim, \ dim_t n, \ dim_t n_max, \ void* restrict kappa, \ void* restrict a, inc_t inca, inc_t lda, \ void* restrict p, inc_t ldp, \ cntx_t* restrict cntx \ ) \ { \ ctype* restrict kappa_cast = kappa; \ ctype* restrict alpha1 = a; \ ctype* restrict pi1 = p; \ \ dim_t n_iter = n / 4; \ dim_t n_left = n % 4; \ \ if ( cdim == mnr ) \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyjs)( *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 1*inca), *(pi1 + 1) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ else \ { \ for ( ; n_iter != 0; --n_iter ) \ { \ PASTEMAC2(ch,ch,copys)( *(alpha1 + 0*inca + 0*lda), *(pi1 + 0 + 0*ldp) ); \ PASTEMAC2(ch,ch,copys)( *(alpha1 + 1*inca + 0*lda), *(pi1 + 1 + 0*ldp) ); \ \ PASTEMAC2(ch,ch,copys)( *(alpha1 + 0*inca + 1*lda), *(pi1 + 0 + 1*ldp) ); \ PASTEMAC2(ch,ch,copys)( *(alpha1 + 1*inca + 1*lda), *(pi1 + 1 + 1*ldp) ); \ \ PASTEMAC2(ch,ch,copys)( *(alpha1 + 0*inca + 2*lda), *(pi1 + 0 + 2*ldp) ); \ PASTEMAC2(ch,ch,copys)( *(alpha1 + 1*inca + 2*lda), *(pi1 + 1 + 2*ldp) ); \ \ PASTEMAC2(ch,ch,copys)( *(alpha1 + 0*inca + 3*lda), *(pi1 + 0 + 3*ldp) ); \ PASTEMAC2(ch,ch,copys)( *(alpha1 + 1*inca + 3*lda), *(pi1 + 1 + 3*ldp) ); \ \ alpha1 += 4*lda; \ pi1 += 4*ldp; \ } \ \ for ( ; n_left != 0; --n_left ) \ { \ PASTEMAC2(ch,ch,copys)( *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC2(ch,ch,copys)( *(alpha1 + 1*inca), *(pi1 + 1) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ } \ } \ else /* if ( cdim < mnr ) */ \ { \ PASTEMAC2(ch,scal2m,BLIS_TAPI_EX_SUF) \ ( \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ ( trans_t )conja, \ cdim, \ n, \ kappa, \ a, inca, lda, \ p, 1, ldp, \ cntx, \ NULL \ ); \ \ /* if ( cdim < mnr ) */ \ { \ const dim_t i = cdim; \ const dim_t m_edge = mnr - cdim; \ const dim_t n_edge = n_max; \ ctype* restrict p_cast = p; \ ctype* restrict p_edge = p_cast + (i )*1; \ \ PASTEMAC(ch,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge, 1, ldp \ ); \ } \ } \ \ if ( n < n_max ) \ { \ const dim_t j = n; \ const dim_t m_edge = mnr; \ const dim_t n_edge = n_max - n; \ ctype* restrict p_cast = p; \ ctype* restrict p_edge = p_cast + (j )*ldp; \ \ PASTEMAC(ch,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge, 1, ldp \ ); \ } \ } INSERT_GENTFUNC_BASIC3( packm_2xk, 2, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, mnr, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conja, \ pack_t schema, \ dim_t cdim, \ dim_t n, \ dim_t n_max, \ void* restrict kappa, \ void* restrict a, inc_t inca, inc_t lda, \ void* restrict p, inc_t ldp, \ cntx_t* restrict cntx \ ) \ { \ ctype* restrict kappa_cast = kappa; \ ctype* restrict alpha1 = a; \ ctype* restrict pi1 = p; \ \ dim_t n_iter = n / 4; \ dim_t n_left = n % 4; \ \ if ( cdim == mnr ) \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC2(ch,ch,copyjs)( *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC2(ch,ch,copyjs)( *(alpha1 + 1*inca), *(pi1 + 1) ); \ PASTEMAC2(ch,ch,copyjs)( *(alpha1 + 2*inca), *(pi1 + 2) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ else \ { \ for ( ; n_iter != 0; --n_iter ) \ { \ PASTEMAC2(ch,ch,copys)( *(alpha1 + 0*inca + 0*lda), *(pi1 + 0 + 0*ldp) ); \ PASTEMAC2(ch,ch,copys)( *(alpha1 + 1*inca + 0*lda), *(pi1 + 1 + 0*ldp) ); \ PASTEMAC2(ch,ch,copys)( *(alpha1 + 2*inca + 0*lda), *(pi1 + 2 + 0*ldp) ); \ \ PASTEMAC2(ch,ch,copys)( *(alpha1 + 0*inca + 1*lda), *(pi1 + 0 + 1*ldp) ); \ PASTEMAC2(ch,ch,copys)( *(alpha1 + 1*inca + 1*lda), *(pi1 + 1 + 1*ldp) ); \ PASTEMAC2(ch,ch,copys)( *(alpha1 + 2*inca + 1*lda), *(pi1 + 2 + 1*ldp) ); \ \ PASTEMAC2(ch,ch,copys)( *(alpha1 + 0*inca + 2*lda), *(pi1 + 0 + 2*ldp) ); \ PASTEMAC2(ch,ch,copys)( *(alpha1 + 1*inca + 2*lda), *(pi1 + 1 + 2*ldp) ); \ PASTEMAC2(ch,ch,copys)( *(alpha1 + 2*inca + 2*lda), *(pi1 + 2 + 2*ldp) ); \ \ PASTEMAC2(ch,ch,copys)( *(alpha1 + 0*inca + 3*lda), *(pi1 + 0 + 3*ldp) ); \ PASTEMAC2(ch,ch,copys)( *(alpha1 + 1*inca + 3*lda), *(pi1 + 1 + 3*ldp) ); \ PASTEMAC2(ch,ch,copys)( *(alpha1 + 2*inca + 3*lda), *(pi1 + 2 + 3*ldp) ); \ \ alpha1 += 4*lda; \ pi1 += 4*ldp; \ } \ \ for ( ; n_left != 0; --n_left ) \ { \ PASTEMAC2(ch,ch,copys)( *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC2(ch,ch,copys)( *(alpha1 + 1*inca), *(pi1 + 1) ); \ PASTEMAC2(ch,ch,copys)( *(alpha1 + 2*inca), *(pi1 + 2) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ } \ } \ else /* if ( cdim < mnr ) */ \ { \ PASTEMAC2(ch,scal2m,BLIS_TAPI_EX_SUF) \ ( \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ ( trans_t )conja, \ cdim, \ n, \ kappa, \ a, inca, lda, \ p, 1, ldp, \ cntx, \ NULL \ ); \ \ /* if ( cdim < mnr ) */ \ { \ const dim_t i = cdim; \ const dim_t m_edge = mnr - cdim; \ const dim_t n_edge = n_max; \ ctype* restrict p_cast = p; \ ctype* restrict p_edge = p_cast + (i )*1; \ \ PASTEMAC(ch,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge, 1, ldp \ ); \ } \ } \ \ if ( n < n_max ) \ { \ const dim_t j = n; \ const dim_t m_edge = mnr; \ const dim_t n_edge = n_max - n; \ ctype* restrict p_cast = p; \ ctype* restrict p_edge = p_cast + (j )*ldp; \ \ PASTEMAC(ch,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge, 1, ldp \ ); \ } \ } INSERT_GENTFUNC_BASIC3( packm_3xk, 3, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, mnr, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conja, \ pack_t schema, \ dim_t cdim, \ dim_t n, \ dim_t n_max, \ void* restrict kappa, \ void* restrict a, inc_t inca, inc_t lda, \ void* restrict p, inc_t ldp, \ cntx_t* restrict cntx \ ) \ { \ ctype* restrict kappa_cast = kappa; \ ctype* restrict alpha1 = a; \ ctype* restrict pi1 = p; \ \ dim_t n_iter = n / 2; \ dim_t n_left = n % 2; \ \ if ( cdim == mnr ) \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC2(ch,ch,copyjs)( *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC2(ch,ch,copyjs)( *(alpha1 + 1*inca), *(pi1 + 1) ); \ PASTEMAC2(ch,ch,copyjs)( *(alpha1 + 2*inca), *(pi1 + 2) ); \ PASTEMAC2(ch,ch,copyjs)( *(alpha1 + 3*inca), *(pi1 + 3) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ else \ { \ for ( ; n_iter != 0; --n_iter ) \ { \ PASTEMAC2(ch,ch,copys)( *(alpha1 + 0*inca + 0*lda), *(pi1 + 0 + 0*ldp) ); \ PASTEMAC2(ch,ch,copys)( *(alpha1 + 1*inca + 0*lda), *(pi1 + 1 + 0*ldp) ); \ PASTEMAC2(ch,ch,copys)( *(alpha1 + 2*inca + 0*lda), *(pi1 + 2 + 0*ldp) ); \ PASTEMAC2(ch,ch,copys)( *(alpha1 + 3*inca + 0*lda), *(pi1 + 3 + 0*ldp) ); \ \ PASTEMAC2(ch,ch,copys)( *(alpha1 + 0*inca + 1*lda), *(pi1 + 0 + 1*ldp) ); \ PASTEMAC2(ch,ch,copys)( *(alpha1 + 1*inca + 1*lda), *(pi1 + 1 + 1*ldp) ); \ PASTEMAC2(ch,ch,copys)( *(alpha1 + 2*inca + 1*lda), *(pi1 + 2 + 1*ldp) ); \ PASTEMAC2(ch,ch,copys)( *(alpha1 + 3*inca + 1*lda), *(pi1 + 3 + 1*ldp) ); \ \ alpha1 += 2*lda; \ pi1 += 2*ldp; \ } \ \ for ( ; n_left != 0; --n_left ) \ { \ PASTEMAC2(ch,ch,copys)( *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC2(ch,ch,copys)( *(alpha1 + 1*inca), *(pi1 + 1) ); \ PASTEMAC2(ch,ch,copys)( *(alpha1 + 2*inca), *(pi1 + 2) ); \ PASTEMAC2(ch,ch,copys)( *(alpha1 + 3*inca), *(pi1 + 3) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ } \ } \ else /* if ( cdim < mnr ) */ \ { \ PASTEMAC2(ch,scal2m,BLIS_TAPI_EX_SUF) \ ( \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ ( trans_t )conja, \ cdim, \ n, \ kappa, \ a, inca, lda, \ p, 1, ldp, \ cntx, \ NULL \ ); \ \ /* if ( cdim < mnr ) */ \ { \ const dim_t i = cdim; \ const dim_t m_edge = mnr - cdim; \ const dim_t n_edge = n_max; \ ctype* restrict p_cast = p; \ ctype* restrict p_edge = p_cast + (i )*1; \ \ PASTEMAC(ch,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge, 1, ldp \ ); \ } \ } \ \ if ( n < n_max ) \ { \ const dim_t j = n; \ const dim_t m_edge = mnr; \ const dim_t n_edge = n_max - n; \ ctype* restrict p_cast = p; \ ctype* restrict p_edge = p_cast + (j )*ldp; \ \ PASTEMAC(ch,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge, 1, ldp \ ); \ } \ } INSERT_GENTFUNC_BASIC3( packm_4xk, 4, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, mnr, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conja, \ pack_t schema, \ dim_t cdim, \ dim_t n, \ dim_t n_max, \ void* restrict kappa, \ void* restrict a, inc_t inca, inc_t lda, \ void* restrict p, inc_t ldp, \ cntx_t* restrict cntx \ ) \ { \ ctype* restrict kappa_cast = kappa; \ ctype* restrict alpha1 = a; \ ctype* restrict pi1 = p; \ \ if ( cdim == mnr ) \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyjs)( *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 1*inca), *(pi1 + 1) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 2*inca), *(pi1 + 2) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 3*inca), *(pi1 + 3) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 4*inca), *(pi1 + 4) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 5*inca), *(pi1 + 5) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copys)( *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC(ch,copys)( *(alpha1 + 1*inca), *(pi1 + 1) ); \ PASTEMAC(ch,copys)( *(alpha1 + 2*inca), *(pi1 + 2) ); \ PASTEMAC(ch,copys)( *(alpha1 + 3*inca), *(pi1 + 3) ); \ PASTEMAC(ch,copys)( *(alpha1 + 4*inca), *(pi1 + 4) ); \ PASTEMAC(ch,copys)( *(alpha1 + 5*inca), *(pi1 + 5) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ } \ } \ else /* if ( cdim < mnr ) */ \ { \ PASTEMAC2(ch,scal2m,BLIS_TAPI_EX_SUF) \ ( \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ ( trans_t )conja, \ cdim, \ n, \ kappa, \ a, inca, lda, \ p, 1, ldp, \ cntx, \ NULL \ ); \ \ /* if ( cdim < mnr ) */ \ { \ const dim_t i = cdim; \ const dim_t m_edge = mnr - cdim; \ const dim_t n_edge = n_max; \ ctype* restrict p_cast = p; \ ctype* restrict p_edge = p_cast + (i )*1; \ \ PASTEMAC(ch,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge, 1, ldp \ ); \ } \ } \ \ if ( n < n_max ) \ { \ const dim_t j = n; \ const dim_t m_edge = mnr; \ const dim_t n_edge = n_max - n; \ ctype* restrict p_cast = p; \ ctype* restrict p_edge = p_cast + (j )*ldp; \ \ PASTEMAC(ch,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge, 1, ldp \ ); \ } \ } INSERT_GENTFUNC_BASIC3( packm_6xk, 6, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, mnr, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conja, \ pack_t schema, \ dim_t cdim, \ dim_t n, \ dim_t n_max, \ void* restrict kappa, \ void* restrict a, inc_t inca, inc_t lda, \ void* restrict p, inc_t ldp, \ cntx_t* restrict cntx \ ) \ { \ ctype* restrict kappa_cast = kappa; \ ctype* restrict alpha1 = a; \ ctype* restrict pi1 = p; \ \ dim_t n_iter = n / 2; \ dim_t n_left = n % 2; \ \ if ( cdim == mnr ) \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyjs)( *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 1*inca), *(pi1 + 1) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 2*inca), *(pi1 + 2) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 3*inca), *(pi1 + 3) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 4*inca), *(pi1 + 4) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 5*inca), *(pi1 + 5) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 6*inca), *(pi1 + 6) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 7*inca), *(pi1 + 7) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ else \ { \ for ( ; n_iter != 0; --n_iter ) \ { \ PASTEMAC(ch,copys)( *(alpha1 + 0*inca + 0*lda), *(pi1 + 0 + 0*ldp) ); \ PASTEMAC(ch,copys)( *(alpha1 + 1*inca + 0*lda), *(pi1 + 1 + 0*ldp) ); \ PASTEMAC(ch,copys)( *(alpha1 + 2*inca + 0*lda), *(pi1 + 2 + 0*ldp) ); \ PASTEMAC(ch,copys)( *(alpha1 + 3*inca + 0*lda), *(pi1 + 3 + 0*ldp) ); \ PASTEMAC(ch,copys)( *(alpha1 + 4*inca + 0*lda), *(pi1 + 4 + 0*ldp) ); \ PASTEMAC(ch,copys)( *(alpha1 + 5*inca + 0*lda), *(pi1 + 5 + 0*ldp) ); \ PASTEMAC(ch,copys)( *(alpha1 + 6*inca + 0*lda), *(pi1 + 6 + 0*ldp) ); \ PASTEMAC(ch,copys)( *(alpha1 + 7*inca + 0*lda), *(pi1 + 7 + 0*ldp) ); \ \ PASTEMAC(ch,copys)( *(alpha1 + 0*inca + 1*lda), *(pi1 + 0 + 1*ldp) ); \ PASTEMAC(ch,copys)( *(alpha1 + 1*inca + 1*lda), *(pi1 + 1 + 1*ldp) ); \ PASTEMAC(ch,copys)( *(alpha1 + 2*inca + 1*lda), *(pi1 + 2 + 1*ldp) ); \ PASTEMAC(ch,copys)( *(alpha1 + 3*inca + 1*lda), *(pi1 + 3 + 1*ldp) ); \ PASTEMAC(ch,copys)( *(alpha1 + 4*inca + 1*lda), *(pi1 + 4 + 1*ldp) ); \ PASTEMAC(ch,copys)( *(alpha1 + 5*inca + 1*lda), *(pi1 + 5 + 1*ldp) ); \ PASTEMAC(ch,copys)( *(alpha1 + 6*inca + 1*lda), *(pi1 + 6 + 1*ldp) ); \ PASTEMAC(ch,copys)( *(alpha1 + 7*inca + 1*lda), *(pi1 + 7 + 1*ldp) ); \ \ alpha1 += 2*lda; \ pi1 += 2*ldp; \ } \ \ for ( ; n_left != 0; --n_left ) \ { \ PASTEMAC(ch,copys)( *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC(ch,copys)( *(alpha1 + 1*inca), *(pi1 + 1) ); \ PASTEMAC(ch,copys)( *(alpha1 + 2*inca), *(pi1 + 2) ); \ PASTEMAC(ch,copys)( *(alpha1 + 3*inca), *(pi1 + 3) ); \ PASTEMAC(ch,copys)( *(alpha1 + 4*inca), *(pi1 + 4) ); \ PASTEMAC(ch,copys)( *(alpha1 + 5*inca), *(pi1 + 5) ); \ PASTEMAC(ch,copys)( *(alpha1 + 6*inca), *(pi1 + 6) ); \ PASTEMAC(ch,copys)( *(alpha1 + 7*inca), *(pi1 + 7) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ } \ } \ else /* if ( cdim < mnr ) */ \ { \ PASTEMAC2(ch,scal2m,BLIS_TAPI_EX_SUF) \ ( \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ ( trans_t )conja, \ cdim, \ n, \ kappa, \ a, inca, lda, \ p, 1, ldp, \ cntx, \ NULL \ ); \ \ /* if ( cdim < mnr ) */ \ { \ const dim_t i = cdim; \ const dim_t m_edge = mnr - cdim; \ const dim_t n_edge = n_max; \ ctype* restrict p_cast = p; \ ctype* restrict p_edge = p_cast + (i )*1; \ \ PASTEMAC(ch,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge, 1, ldp \ ); \ } \ } \ \ if ( n < n_max ) \ { \ const dim_t j = n; \ const dim_t m_edge = mnr; \ const dim_t n_edge = n_max - n; \ ctype* restrict p_cast = p; \ ctype* restrict p_edge = p_cast + (j )*ldp; \ \ PASTEMAC(ch,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge, 1, ldp \ ); \ } \ } INSERT_GENTFUNC_BASIC3( packm_8xk, 8, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, mnr, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conja, \ pack_t schema, \ dim_t cdim, \ dim_t n, \ dim_t n_max, \ void* restrict kappa, \ void* restrict a, inc_t inca, inc_t lda, \ void* restrict p, inc_t ldp, \ cntx_t* restrict cntx \ ) \ { \ ctype* restrict kappa_cast = kappa; \ ctype* restrict alpha1 = a; \ ctype* restrict pi1 = p; \ \ if ( cdim == mnr ) \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyjs)( *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 1*inca), *(pi1 + 1) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 2*inca), *(pi1 + 2) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 3*inca), *(pi1 + 3) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 4*inca), *(pi1 + 4) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 5*inca), *(pi1 + 5) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 6*inca), *(pi1 + 6) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 7*inca), *(pi1 + 7) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 8*inca), *(pi1 + 8) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 9*inca), *(pi1 + 9) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copys)( *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC(ch,copys)( *(alpha1 + 1*inca), *(pi1 + 1) ); \ PASTEMAC(ch,copys)( *(alpha1 + 2*inca), *(pi1 + 2) ); \ PASTEMAC(ch,copys)( *(alpha1 + 3*inca), *(pi1 + 3) ); \ PASTEMAC(ch,copys)( *(alpha1 + 4*inca), *(pi1 + 4) ); \ PASTEMAC(ch,copys)( *(alpha1 + 5*inca), *(pi1 + 5) ); \ PASTEMAC(ch,copys)( *(alpha1 + 6*inca), *(pi1 + 6) ); \ PASTEMAC(ch,copys)( *(alpha1 + 7*inca), *(pi1 + 7) ); \ PASTEMAC(ch,copys)( *(alpha1 + 8*inca), *(pi1 + 8) ); \ PASTEMAC(ch,copys)( *(alpha1 + 9*inca), *(pi1 + 9) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ } \ } \ else /* if ( cdim < mnr ) */ \ { \ PASTEMAC2(ch,scal2m,BLIS_TAPI_EX_SUF) \ ( \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ ( trans_t )conja, \ cdim, \ n, \ kappa, \ a, inca, lda, \ p, 1, ldp, \ cntx, \ NULL \ ); \ \ /* if ( cdim < mnr ) */ \ { \ const dim_t i = cdim; \ const dim_t m_edge = mnr - cdim; \ const dim_t n_edge = n_max; \ ctype* restrict p_cast = p; \ ctype* restrict p_edge = p_cast + (i )*1; \ \ PASTEMAC(ch,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge, 1, ldp \ ); \ } \ } \ \ if ( n < n_max ) \ { \ const dim_t j = n; \ const dim_t m_edge = mnr; \ const dim_t n_edge = n_max - n; \ ctype* restrict p_cast = p; \ ctype* restrict p_edge = p_cast + (j )*ldp; \ \ PASTEMAC(ch,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge, 1, ldp \ ); \ } \ } INSERT_GENTFUNC_BASIC3( packm_10xk, 10, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, mnr, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conja, \ pack_t schema, \ dim_t cdim, \ dim_t n, \ dim_t n_max, \ void* restrict kappa, \ void* restrict a, inc_t inca, inc_t lda, \ void* restrict p, inc_t ldp, \ cntx_t* restrict cntx \ ) \ { \ ctype* restrict kappa_cast = kappa; \ ctype* restrict alpha1 = a; \ ctype* restrict pi1 = p; \ \ if ( cdim == mnr ) \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyjs)( *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 1*inca), *(pi1 + 1) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 2*inca), *(pi1 + 2) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 3*inca), *(pi1 + 3) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 4*inca), *(pi1 + 4) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 5*inca), *(pi1 + 5) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 6*inca), *(pi1 + 6) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 7*inca), *(pi1 + 7) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 8*inca), *(pi1 + 8) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 9*inca), *(pi1 + 9) ); \ PASTEMAC(ch,copyjs)( *(alpha1 +10*inca), *(pi1 +10) ); \ PASTEMAC(ch,copyjs)( *(alpha1 +11*inca), *(pi1 +11) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copys)( *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC(ch,copys)( *(alpha1 + 1*inca), *(pi1 + 1) ); \ PASTEMAC(ch,copys)( *(alpha1 + 2*inca), *(pi1 + 2) ); \ PASTEMAC(ch,copys)( *(alpha1 + 3*inca), *(pi1 + 3) ); \ PASTEMAC(ch,copys)( *(alpha1 + 4*inca), *(pi1 + 4) ); \ PASTEMAC(ch,copys)( *(alpha1 + 5*inca), *(pi1 + 5) ); \ PASTEMAC(ch,copys)( *(alpha1 + 6*inca), *(pi1 + 6) ); \ PASTEMAC(ch,copys)( *(alpha1 + 7*inca), *(pi1 + 7) ); \ PASTEMAC(ch,copys)( *(alpha1 + 8*inca), *(pi1 + 8) ); \ PASTEMAC(ch,copys)( *(alpha1 + 9*inca), *(pi1 + 9) ); \ PASTEMAC(ch,copys)( *(alpha1 +10*inca), *(pi1 +10) ); \ PASTEMAC(ch,copys)( *(alpha1 +11*inca), *(pi1 +11) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +10*inca), *(pi1 +10) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +11*inca), *(pi1 +11) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +10*inca), *(pi1 +10) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +11*inca), *(pi1 +11) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ } \ } \ else /* if ( cdim < mnr ) */ \ { \ PASTEMAC2(ch,scal2m,BLIS_TAPI_EX_SUF) \ ( \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ ( trans_t )conja, \ cdim, \ n, \ kappa, \ a, inca, lda, \ p, 1, ldp, \ cntx, \ NULL \ ); \ \ /* if ( cdim < mnr ) */ \ { \ const dim_t i = cdim; \ const dim_t m_edge = mnr - cdim; \ const dim_t n_edge = n_max; \ ctype* restrict p_cast = p; \ ctype* restrict p_edge = p_cast + (i )*1; \ \ PASTEMAC(ch,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge, 1, ldp \ ); \ } \ } \ \ if ( n < n_max ) \ { \ const dim_t j = n; \ const dim_t m_edge = mnr; \ const dim_t n_edge = n_max - n; \ ctype* restrict p_cast = p; \ ctype* restrict p_edge = p_cast + (j )*ldp; \ \ PASTEMAC(ch,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge, 1, ldp \ ); \ } \ } INSERT_GENTFUNC_BASIC3( packm_12xk, 12, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, mnr, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conja, \ pack_t schema, \ dim_t cdim, \ dim_t n, \ dim_t n_max, \ void* restrict kappa, \ void* restrict a, inc_t inca, inc_t lda, \ void* restrict p, inc_t ldp, \ cntx_t* restrict cntx \ ) \ { \ ctype* restrict kappa_cast = kappa; \ ctype* restrict alpha1 = a; \ ctype* restrict pi1 = p; \ \ if ( cdim == mnr ) \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyjs)( *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 1*inca), *(pi1 + 1) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 2*inca), *(pi1 + 2) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 3*inca), *(pi1 + 3) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 4*inca), *(pi1 + 4) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 5*inca), *(pi1 + 5) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 6*inca), *(pi1 + 6) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 7*inca), *(pi1 + 7) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 8*inca), *(pi1 + 8) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 9*inca), *(pi1 + 9) ); \ PASTEMAC(ch,copyjs)( *(alpha1 +10*inca), *(pi1 +10) ); \ PASTEMAC(ch,copyjs)( *(alpha1 +11*inca), *(pi1 +11) ); \ PASTEMAC(ch,copyjs)( *(alpha1 +12*inca), *(pi1 +12) ); \ PASTEMAC(ch,copyjs)( *(alpha1 +13*inca), *(pi1 +13) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copys)( *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC(ch,copys)( *(alpha1 + 1*inca), *(pi1 + 1) ); \ PASTEMAC(ch,copys)( *(alpha1 + 2*inca), *(pi1 + 2) ); \ PASTEMAC(ch,copys)( *(alpha1 + 3*inca), *(pi1 + 3) ); \ PASTEMAC(ch,copys)( *(alpha1 + 4*inca), *(pi1 + 4) ); \ PASTEMAC(ch,copys)( *(alpha1 + 5*inca), *(pi1 + 5) ); \ PASTEMAC(ch,copys)( *(alpha1 + 6*inca), *(pi1 + 6) ); \ PASTEMAC(ch,copys)( *(alpha1 + 7*inca), *(pi1 + 7) ); \ PASTEMAC(ch,copys)( *(alpha1 + 8*inca), *(pi1 + 8) ); \ PASTEMAC(ch,copys)( *(alpha1 + 9*inca), *(pi1 + 9) ); \ PASTEMAC(ch,copys)( *(alpha1 +10*inca), *(pi1 +10) ); \ PASTEMAC(ch,copys)( *(alpha1 +11*inca), *(pi1 +11) ); \ PASTEMAC(ch,copys)( *(alpha1 +12*inca), *(pi1 +12) ); \ PASTEMAC(ch,copys)( *(alpha1 +13*inca), *(pi1 +13) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +10*inca), *(pi1 +10) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +11*inca), *(pi1 +11) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +12*inca), *(pi1 +12) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +13*inca), *(pi1 +13) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +10*inca), *(pi1 +10) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +11*inca), *(pi1 +11) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +12*inca), *(pi1 +12) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +13*inca), *(pi1 +13) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ } \ } \ else /* if ( cdim < mnr ) */ \ { \ PASTEMAC2(ch,scal2m,BLIS_TAPI_EX_SUF) \ ( \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ ( trans_t )conja, \ cdim, \ n, \ kappa, \ a, inca, lda, \ p, 1, ldp, \ cntx, \ NULL \ ); \ \ /* if ( cdim < mnr ) */ \ { \ const dim_t i = cdim; \ const dim_t m_edge = mnr - cdim; \ const dim_t n_edge = n_max; \ ctype* restrict p_cast = p; \ ctype* restrict p_edge = p_cast + (i )*1; \ \ PASTEMAC(ch,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge, 1, ldp \ ); \ } \ } \ \ if ( n < n_max ) \ { \ const dim_t j = n; \ const dim_t m_edge = mnr; \ const dim_t n_edge = n_max - n; \ ctype* restrict p_cast = p; \ ctype* restrict p_edge = p_cast + (j )*ldp; \ \ PASTEMAC(ch,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge, 1, ldp \ ); \ } \ } INSERT_GENTFUNC_BASIC3( packm_14xk, 14, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, mnr, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conja, \ pack_t schema, \ dim_t cdim, \ dim_t n, \ dim_t n_max, \ void* restrict kappa, \ void* restrict a, inc_t inca, inc_t lda, \ void* restrict p, inc_t ldp, \ cntx_t* restrict cntx \ ) \ { \ ctype* restrict kappa_cast = kappa; \ ctype* restrict alpha1 = a; \ ctype* restrict pi1 = p; \ \ if ( cdim == mnr ) \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyjs)( *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 1*inca), *(pi1 + 1) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 2*inca), *(pi1 + 2) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 3*inca), *(pi1 + 3) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 4*inca), *(pi1 + 4) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 5*inca), *(pi1 + 5) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 6*inca), *(pi1 + 6) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 7*inca), *(pi1 + 7) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 8*inca), *(pi1 + 8) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 9*inca), *(pi1 + 9) ); \ PASTEMAC(ch,copyjs)( *(alpha1 +10*inca), *(pi1 +10) ); \ PASTEMAC(ch,copyjs)( *(alpha1 +11*inca), *(pi1 +11) ); \ PASTEMAC(ch,copyjs)( *(alpha1 +12*inca), *(pi1 +12) ); \ PASTEMAC(ch,copyjs)( *(alpha1 +13*inca), *(pi1 +13) ); \ PASTEMAC(ch,copyjs)( *(alpha1 +14*inca), *(pi1 +14) ); \ PASTEMAC(ch,copyjs)( *(alpha1 +15*inca), *(pi1 +15) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copys)( *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC(ch,copys)( *(alpha1 + 1*inca), *(pi1 + 1) ); \ PASTEMAC(ch,copys)( *(alpha1 + 2*inca), *(pi1 + 2) ); \ PASTEMAC(ch,copys)( *(alpha1 + 3*inca), *(pi1 + 3) ); \ PASTEMAC(ch,copys)( *(alpha1 + 4*inca), *(pi1 + 4) ); \ PASTEMAC(ch,copys)( *(alpha1 + 5*inca), *(pi1 + 5) ); \ PASTEMAC(ch,copys)( *(alpha1 + 6*inca), *(pi1 + 6) ); \ PASTEMAC(ch,copys)( *(alpha1 + 7*inca), *(pi1 + 7) ); \ PASTEMAC(ch,copys)( *(alpha1 + 8*inca), *(pi1 + 8) ); \ PASTEMAC(ch,copys)( *(alpha1 + 9*inca), *(pi1 + 9) ); \ PASTEMAC(ch,copys)( *(alpha1 +10*inca), *(pi1 +10) ); \ PASTEMAC(ch,copys)( *(alpha1 +11*inca), *(pi1 +11) ); \ PASTEMAC(ch,copys)( *(alpha1 +12*inca), *(pi1 +12) ); \ PASTEMAC(ch,copys)( *(alpha1 +13*inca), *(pi1 +13) ); \ PASTEMAC(ch,copys)( *(alpha1 +14*inca), *(pi1 +14) ); \ PASTEMAC(ch,copys)( *(alpha1 +15*inca), *(pi1 +15) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +10*inca), *(pi1 +10) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +11*inca), *(pi1 +11) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +12*inca), *(pi1 +12) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +13*inca), *(pi1 +13) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +14*inca), *(pi1 +14) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +15*inca), *(pi1 +15) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +10*inca), *(pi1 +10) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +11*inca), *(pi1 +11) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +12*inca), *(pi1 +12) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +13*inca), *(pi1 +13) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +14*inca), *(pi1 +14) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +15*inca), *(pi1 +15) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ } \ } \ else /* if ( cdim < mnr ) */ \ { \ PASTEMAC2(ch,scal2m,BLIS_TAPI_EX_SUF) \ ( \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ ( trans_t )conja, \ cdim, \ n, \ kappa, \ a, inca, lda, \ p, 1, ldp, \ cntx, \ NULL \ ); \ \ /* if ( cdim < mnr ) */ \ { \ const dim_t i = cdim; \ const dim_t m_edge = mnr - cdim; \ const dim_t n_edge = n_max; \ ctype* restrict p_cast = p; \ ctype* restrict p_edge = p_cast + (i )*1; \ \ PASTEMAC(ch,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge, 1, ldp \ ); \ } \ } \ \ if ( n < n_max ) \ { \ const dim_t j = n; \ const dim_t m_edge = mnr; \ const dim_t n_edge = n_max - n; \ ctype* restrict p_cast = p; \ ctype* restrict p_edge = p_cast + (j )*ldp; \ \ PASTEMAC(ch,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge, 1, ldp \ ); \ } \ } INSERT_GENTFUNC_BASIC3( packm_16xk, 16, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, mnr, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conja, \ pack_t schema, \ dim_t cdim, \ dim_t n, \ dim_t n_max, \ void* restrict kappa, \ void* restrict a, inc_t inca, inc_t lda, \ void* restrict p, inc_t ldp, \ cntx_t* restrict cntx \ ) \ { \ ctype* restrict kappa_cast = kappa; \ ctype* restrict alpha1 = a; \ ctype* restrict pi1 = p; \ \ if ( cdim == mnr ) \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copyjs)( *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 1*inca), *(pi1 + 1) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 2*inca), *(pi1 + 2) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 3*inca), *(pi1 + 3) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 4*inca), *(pi1 + 4) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 5*inca), *(pi1 + 5) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 6*inca), *(pi1 + 6) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 7*inca), *(pi1 + 7) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 8*inca), *(pi1 + 8) ); \ PASTEMAC(ch,copyjs)( *(alpha1 + 9*inca), *(pi1 + 9) ); \ PASTEMAC(ch,copyjs)( *(alpha1 +10*inca), *(pi1 +10) ); \ PASTEMAC(ch,copyjs)( *(alpha1 +11*inca), *(pi1 +11) ); \ PASTEMAC(ch,copyjs)( *(alpha1 +12*inca), *(pi1 +12) ); \ PASTEMAC(ch,copyjs)( *(alpha1 +13*inca), *(pi1 +13) ); \ PASTEMAC(ch,copyjs)( *(alpha1 +14*inca), *(pi1 +14) ); \ PASTEMAC(ch,copyjs)( *(alpha1 +15*inca), *(pi1 +15) ); \ PASTEMAC(ch,copyjs)( *(alpha1 +16*inca), *(pi1 +16) ); \ PASTEMAC(ch,copyjs)( *(alpha1 +17*inca), *(pi1 +17) ); \ PASTEMAC(ch,copyjs)( *(alpha1 +18*inca), *(pi1 +18) ); \ PASTEMAC(ch,copyjs)( *(alpha1 +19*inca), *(pi1 +19) ); \ PASTEMAC(ch,copyjs)( *(alpha1 +20*inca), *(pi1 +20) ); \ PASTEMAC(ch,copyjs)( *(alpha1 +21*inca), *(pi1 +21) ); \ PASTEMAC(ch,copyjs)( *(alpha1 +22*inca), *(pi1 +22) ); \ PASTEMAC(ch,copyjs)( *(alpha1 +23*inca), *(pi1 +23) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,copys)( *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC(ch,copys)( *(alpha1 + 1*inca), *(pi1 + 1) ); \ PASTEMAC(ch,copys)( *(alpha1 + 2*inca), *(pi1 + 2) ); \ PASTEMAC(ch,copys)( *(alpha1 + 3*inca), *(pi1 + 3) ); \ PASTEMAC(ch,copys)( *(alpha1 + 4*inca), *(pi1 + 4) ); \ PASTEMAC(ch,copys)( *(alpha1 + 5*inca), *(pi1 + 5) ); \ PASTEMAC(ch,copys)( *(alpha1 + 6*inca), *(pi1 + 6) ); \ PASTEMAC(ch,copys)( *(alpha1 + 7*inca), *(pi1 + 7) ); \ PASTEMAC(ch,copys)( *(alpha1 + 8*inca), *(pi1 + 8) ); \ PASTEMAC(ch,copys)( *(alpha1 + 9*inca), *(pi1 + 9) ); \ PASTEMAC(ch,copys)( *(alpha1 +10*inca), *(pi1 +10) ); \ PASTEMAC(ch,copys)( *(alpha1 +11*inca), *(pi1 +11) ); \ PASTEMAC(ch,copys)( *(alpha1 +12*inca), *(pi1 +12) ); \ PASTEMAC(ch,copys)( *(alpha1 +13*inca), *(pi1 +13) ); \ PASTEMAC(ch,copys)( *(alpha1 +14*inca), *(pi1 +14) ); \ PASTEMAC(ch,copys)( *(alpha1 +15*inca), *(pi1 +15) ); \ PASTEMAC(ch,copys)( *(alpha1 +16*inca), *(pi1 +16) ); \ PASTEMAC(ch,copys)( *(alpha1 +17*inca), *(pi1 +17) ); \ PASTEMAC(ch,copys)( *(alpha1 +18*inca), *(pi1 +18) ); \ PASTEMAC(ch,copys)( *(alpha1 +19*inca), *(pi1 +19) ); \ PASTEMAC(ch,copys)( *(alpha1 +20*inca), *(pi1 +20) ); \ PASTEMAC(ch,copys)( *(alpha1 +21*inca), *(pi1 +21) ); \ PASTEMAC(ch,copys)( *(alpha1 +22*inca), *(pi1 +22) ); \ PASTEMAC(ch,copys)( *(alpha1 +23*inca), *(pi1 +23) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +10*inca), *(pi1 +10) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +11*inca), *(pi1 +11) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +12*inca), *(pi1 +12) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +13*inca), *(pi1 +13) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +14*inca), *(pi1 +14) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +15*inca), *(pi1 +15) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +16*inca), *(pi1 +16) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +17*inca), *(pi1 +17) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +18*inca), *(pi1 +18) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +19*inca), *(pi1 +19) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +20*inca), *(pi1 +20) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +21*inca), *(pi1 +21) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +22*inca), *(pi1 +22) ); \ PASTEMAC(ch,scal2js)( *kappa_cast, *(alpha1 +23*inca), *(pi1 +23) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 8*inca), *(pi1 + 8) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 + 9*inca), *(pi1 + 9) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +10*inca), *(pi1 +10) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +11*inca), *(pi1 +11) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +12*inca), *(pi1 +12) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +13*inca), *(pi1 +13) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +14*inca), *(pi1 +14) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +15*inca), *(pi1 +15) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +16*inca), *(pi1 +16) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +17*inca), *(pi1 +17) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +18*inca), *(pi1 +18) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +19*inca), *(pi1 +19) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +20*inca), *(pi1 +20) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +21*inca), *(pi1 +21) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +22*inca), *(pi1 +22) ); \ PASTEMAC(ch,scal2s)( *kappa_cast, *(alpha1 +23*inca), *(pi1 +23) ); \ \ alpha1 += lda; \ pi1 += ldp; \ } \ } \ } \ } \ else /* if ( cdim < mnr ) */ \ { \ PASTEMAC2(ch,scal2m,BLIS_TAPI_EX_SUF) \ ( \ 0, \ BLIS_NONUNIT_DIAG, \ BLIS_DENSE, \ ( trans_t )conja, \ cdim, \ n, \ kappa, \ a, inca, lda, \ p, 1, ldp, \ cntx, \ NULL \ ); \ \ /* if ( cdim < mnr ) */ \ { \ const dim_t i = cdim; \ const dim_t m_edge = mnr - cdim; \ const dim_t n_edge = n_max; \ ctype* restrict p_cast = p; \ ctype* restrict p_edge = p_cast + (i )*1; \ \ PASTEMAC(ch,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge, 1, ldp \ ); \ } \ } \ \ if ( n < n_max ) \ { \ const dim_t j = n; \ const dim_t m_edge = mnr; \ const dim_t n_edge = n_max - n; \ ctype* restrict p_cast = p; \ ctype* restrict p_edge = p_cast + (j )*ldp; \ \ PASTEMAC(ch,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge, 1, ldp \ ); \ } \ } INSERT_GENTFUNC_BASIC3( packm_24xk, 24, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) blis-0.6.1/ref_kernels/1m/bli_packm_cxk_rih_ref.c000066400000000000000000002732021360743507500217330ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, opname, mnr, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conja, \ pack_t schema, \ dim_t cdim, \ dim_t n, \ dim_t n_max, \ void* restrict kappa, \ void* restrict a, inc_t inca, inc_t lda, \ void* restrict p, inc_t ldp, \ cntx_t* restrict cntx \ ) \ { \ const inc_t inca2 = 2 * inca; \ const inc_t lda2 = 2 * lda; \ \ ctype* kappa_cast = kappa; \ ctype* restrict alpha1 = a; \ ctype_r* restrict alpha1_r = ( ctype_r* )a; \ ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \ ctype_r* restrict pi1_r = ( ctype_r* )p; \ \ \ if ( cdim == mnr ) \ { \ if ( bli_is_ro_packed( schema ) ) \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ /* This works regardless of conja since we are only copying the real part. */ \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(chr,copys)( *(alpha1_r + 0*inca2), *(pi1_r + 0) ); \ PASTEMAC(chr,copys)( *(alpha1_r + 1*inca2), *(pi1_r + 1) ); \ \ alpha1_r += lda2; \ pi1_r += ldp; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 0*inca), *(pi1_r + 0) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 1*inca), *(pi1_r + 1) ); \ \ alpha1 += lda; \ pi1_r += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 0*inca), *(pi1_r + 0) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 1*inca), *(pi1_r + 1) ); \ \ alpha1 += lda; \ pi1_r += ldp; \ } \ } \ } \ } \ else if ( bli_is_io_packed( schema ) ) \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(chr,copys)( -*(alpha1_i + 0*inca2), *(pi1_r + 0) ); \ PASTEMAC(chr,copys)( -*(alpha1_i + 1*inca2), *(pi1_r + 1) ); \ \ alpha1_i += lda2; \ pi1_r += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(chr,copys)( *(alpha1_i + 0*inca2), *(pi1_r + 0) ); \ PASTEMAC(chr,copys)( *(alpha1_i + 1*inca2), *(pi1_r + 1) ); \ \ alpha1_i += lda2; \ pi1_r += ldp; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 0*inca), *(pi1_r + 0) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 1*inca), *(pi1_r + 1) ); \ \ alpha1 += lda; \ pi1_r += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 0*inca), *(pi1_r + 0) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 1*inca), *(pi1_r + 1) ); \ \ alpha1 += lda; \ pi1_r += ldp; \ } \ } \ } \ } \ else /* if ( bli_is_rpi_packed( schema ) ) */ \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(chr,add3s)( *(alpha1_r + 0*inca2), -*(alpha1_i + 0*inca2), *(pi1_r + 0) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 1*inca2), -*(alpha1_i + 1*inca2), *(pi1_r + 1) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(chr,add3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 0*inca), *(pi1_r + 0) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 1*inca), *(pi1_r + 1) ); \ \ alpha1 += lda; \ pi1_r += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 0*inca), *(pi1_r + 0) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 1*inca), *(pi1_r + 1) ); \ \ alpha1 += lda; \ pi1_r += ldp; \ } \ } \ } \ } \ } \ else /* if ( cdim < mnr ) */ \ { \ PASTEMAC(ch,scal2rihs_mxn) \ ( \ schema, \ conja, \ cdim, \ n, \ kappa, \ a, inca, lda, \ p, 1, ldp \ ); \ \ /* if ( cdim < mnr ) */ \ { \ const dim_t i = cdim; \ const dim_t m_edge = mnr - cdim; \ const dim_t n_edge = n_max; \ ctype* restrict p_cast = p; \ ctype* restrict p_edge = p_cast + (i )*1; \ \ PASTEMAC(ch,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge, 1, ldp \ ); \ } \ } \ \ if ( n < n_max ) \ { \ const dim_t j = n; \ const dim_t m_edge = mnr; \ const dim_t n_edge = n_max - n; \ ctype* restrict p_cast = p; \ ctype* restrict p_edge = p_cast + (j )*ldp; \ \ PASTEMAC(ch,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge, 1, ldp \ ); \ } \ } INSERT_GENTFUNCCO_BASIC3( packm_2xk_rih, 2, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, opname, mnr, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conja, \ pack_t schema, \ dim_t cdim, \ dim_t n, \ dim_t n_max, \ void* restrict kappa, \ void* restrict a, inc_t inca, inc_t lda, \ void* restrict p, inc_t ldp, \ cntx_t* restrict cntx \ ) \ { \ const inc_t inca2 = 2 * inca; \ const inc_t lda2 = 2 * lda; \ \ ctype* kappa_cast = kappa; \ ctype* restrict alpha1 = a; \ ctype_r* restrict alpha1_r = ( ctype_r* )a; \ ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \ ctype_r* restrict pi1_r = ( ctype_r* )p; \ \ \ if ( cdim == mnr ) \ { \ if ( bli_is_ro_packed( schema ) ) \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ /* This works regardless of conja since we are only copying the real part. */ \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(chr,copys)( *(alpha1_r + 0*inca2), *(pi1_r + 0) ); \ PASTEMAC(chr,copys)( *(alpha1_r + 1*inca2), *(pi1_r + 1) ); \ PASTEMAC(chr,copys)( *(alpha1_r + 2*inca2), *(pi1_r + 2) ); \ PASTEMAC(chr,copys)( *(alpha1_r + 3*inca2), *(pi1_r + 3) ); \ \ alpha1_r += lda2; \ pi1_r += ldp; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 0*inca), *(pi1_r + 0) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 1*inca), *(pi1_r + 1) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 2*inca), *(pi1_r + 2) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 3*inca), *(pi1_r + 3) ); \ \ alpha1 += lda; \ pi1_r += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 0*inca), *(pi1_r + 0) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 1*inca), *(pi1_r + 1) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 2*inca), *(pi1_r + 2) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 3*inca), *(pi1_r + 3) ); \ \ alpha1 += lda; \ pi1_r += ldp; \ } \ } \ } \ } \ else if ( bli_is_io_packed( schema ) ) \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(chr,copys)( -*(alpha1_i + 0*inca2), *(pi1_r + 0) ); \ PASTEMAC(chr,copys)( -*(alpha1_i + 1*inca2), *(pi1_r + 1) ); \ PASTEMAC(chr,copys)( -*(alpha1_i + 2*inca2), *(pi1_r + 2) ); \ PASTEMAC(chr,copys)( -*(alpha1_i + 3*inca2), *(pi1_r + 3) ); \ \ alpha1_i += lda2; \ pi1_r += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(chr,copys)( *(alpha1_i + 0*inca2), *(pi1_r + 0) ); \ PASTEMAC(chr,copys)( *(alpha1_i + 1*inca2), *(pi1_r + 1) ); \ PASTEMAC(chr,copys)( *(alpha1_i + 2*inca2), *(pi1_r + 2) ); \ PASTEMAC(chr,copys)( *(alpha1_i + 3*inca2), *(pi1_r + 3) ); \ \ alpha1_i += lda2; \ pi1_r += ldp; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 0*inca), *(pi1_r + 0) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 1*inca), *(pi1_r + 1) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 2*inca), *(pi1_r + 2) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 3*inca), *(pi1_r + 3) ); \ \ alpha1 += lda; \ pi1_r += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 0*inca), *(pi1_r + 0) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 1*inca), *(pi1_r + 1) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 2*inca), *(pi1_r + 2) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 3*inca), *(pi1_r + 3) ); \ \ alpha1 += lda; \ pi1_r += ldp; \ } \ } \ } \ } \ else /* if ( bli_is_rpi_packed( schema ) ) */ \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(chr,add3s)( *(alpha1_r + 0*inca2), -*(alpha1_i + 0*inca2), *(pi1_r + 0) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 1*inca2), -*(alpha1_i + 1*inca2), *(pi1_r + 1) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 2*inca2), -*(alpha1_i + 2*inca2), *(pi1_r + 2) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 3*inca2), -*(alpha1_i + 3*inca2), *(pi1_r + 3) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(chr,add3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 0*inca), *(pi1_r + 0) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 1*inca), *(pi1_r + 1) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 2*inca), *(pi1_r + 2) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 3*inca), *(pi1_r + 3) ); \ \ alpha1 += lda; \ pi1_r += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 0*inca), *(pi1_r + 0) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 1*inca), *(pi1_r + 1) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 2*inca), *(pi1_r + 2) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 3*inca), *(pi1_r + 3) ); \ \ alpha1 += lda; \ pi1_r += ldp; \ } \ } \ } \ } \ } \ else /* if ( cdim < mnr ) */ \ { \ PASTEMAC(ch,scal2rihs_mxn) \ ( \ schema, \ conja, \ cdim, \ n, \ kappa, \ a, inca, lda, \ p, 1, ldp \ ); \ \ /* if ( cdim < mnr ) */ \ { \ const dim_t i = cdim; \ const dim_t m_edge = mnr - cdim; \ const dim_t n_edge = n_max; \ ctype* restrict p_cast = p; \ ctype* restrict p_edge = p_cast + (i )*1; \ \ PASTEMAC(ch,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge, 1, ldp \ ); \ } \ } \ \ if ( n < n_max ) \ { \ const dim_t j = n; \ const dim_t m_edge = mnr; \ const dim_t n_edge = n_max - n; \ ctype* restrict p_cast = p; \ ctype* restrict p_edge = p_cast + (j )*ldp; \ \ PASTEMAC(ch,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge, 1, ldp \ ); \ } \ } INSERT_GENTFUNCCO_BASIC3( packm_4xk_rih, 4, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, opname, mnr, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conja, \ pack_t schema, \ dim_t cdim, \ dim_t n, \ dim_t n_max, \ void* restrict kappa, \ void* restrict a, inc_t inca, inc_t lda, \ void* restrict p, inc_t ldp, \ cntx_t* restrict cntx \ ) \ { \ const inc_t inca2 = 2 * inca; \ const inc_t lda2 = 2 * lda; \ \ ctype* kappa_cast = kappa; \ ctype* restrict alpha1 = a; \ ctype_r* restrict alpha1_r = ( ctype_r* )a; \ ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \ ctype_r* restrict pi1_r = ( ctype_r* )p; \ \ \ if ( cdim == mnr ) \ { \ if ( bli_is_ro_packed( schema ) ) \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ /* This works regardless of conja since we are only copying the real part. */ \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(chr,copys)( *(alpha1_r + 0*inca2), *(pi1_r + 0) ); \ PASTEMAC(chr,copys)( *(alpha1_r + 1*inca2), *(pi1_r + 1) ); \ PASTEMAC(chr,copys)( *(alpha1_r + 2*inca2), *(pi1_r + 2) ); \ PASTEMAC(chr,copys)( *(alpha1_r + 3*inca2), *(pi1_r + 3) ); \ PASTEMAC(chr,copys)( *(alpha1_r + 4*inca2), *(pi1_r + 4) ); \ PASTEMAC(chr,copys)( *(alpha1_r + 5*inca2), *(pi1_r + 5) ); \ \ alpha1_r += lda2; \ pi1_r += ldp; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 0*inca), *(pi1_r + 0) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 1*inca), *(pi1_r + 1) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 2*inca), *(pi1_r + 2) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 3*inca), *(pi1_r + 3) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 4*inca), *(pi1_r + 4) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 5*inca), *(pi1_r + 5) ); \ \ alpha1 += lda; \ pi1_r += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 0*inca), *(pi1_r + 0) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 1*inca), *(pi1_r + 1) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 2*inca), *(pi1_r + 2) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 3*inca), *(pi1_r + 3) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 4*inca), *(pi1_r + 4) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 5*inca), *(pi1_r + 5) ); \ \ alpha1 += lda; \ pi1_r += ldp; \ } \ } \ } \ } \ else if ( bli_is_io_packed( schema ) ) \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(chr,copys)( -*(alpha1_i + 0*inca2), *(pi1_r + 0) ); \ PASTEMAC(chr,copys)( -*(alpha1_i + 1*inca2), *(pi1_r + 1) ); \ PASTEMAC(chr,copys)( -*(alpha1_i + 2*inca2), *(pi1_r + 2) ); \ PASTEMAC(chr,copys)( -*(alpha1_i + 3*inca2), *(pi1_r + 3) ); \ PASTEMAC(chr,copys)( -*(alpha1_i + 4*inca2), *(pi1_r + 4) ); \ PASTEMAC(chr,copys)( -*(alpha1_i + 5*inca2), *(pi1_r + 5) ); \ \ alpha1_i += lda2; \ pi1_r += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(chr,copys)( *(alpha1_i + 0*inca2), *(pi1_r + 0) ); \ PASTEMAC(chr,copys)( *(alpha1_i + 1*inca2), *(pi1_r + 1) ); \ PASTEMAC(chr,copys)( *(alpha1_i + 2*inca2), *(pi1_r + 2) ); \ PASTEMAC(chr,copys)( *(alpha1_i + 3*inca2), *(pi1_r + 3) ); \ PASTEMAC(chr,copys)( *(alpha1_i + 4*inca2), *(pi1_r + 4) ); \ PASTEMAC(chr,copys)( *(alpha1_i + 5*inca2), *(pi1_r + 5) ); \ \ alpha1_i += lda2; \ pi1_r += ldp; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 0*inca), *(pi1_r + 0) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 1*inca), *(pi1_r + 1) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 2*inca), *(pi1_r + 2) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 3*inca), *(pi1_r + 3) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 4*inca), *(pi1_r + 4) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 5*inca), *(pi1_r + 5) ); \ \ alpha1 += lda; \ pi1_r += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 0*inca), *(pi1_r + 0) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 1*inca), *(pi1_r + 1) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 2*inca), *(pi1_r + 2) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 3*inca), *(pi1_r + 3) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 4*inca), *(pi1_r + 4) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 5*inca), *(pi1_r + 5) ); \ \ alpha1 += lda; \ pi1_r += ldp; \ } \ } \ } \ } \ else /* if ( bli_is_rpi_packed( schema ) ) */ \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(chr,add3s)( *(alpha1_r + 0*inca2), -*(alpha1_i + 0*inca2), *(pi1_r + 0) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 1*inca2), -*(alpha1_i + 1*inca2), *(pi1_r + 1) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 2*inca2), -*(alpha1_i + 2*inca2), *(pi1_r + 2) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 3*inca2), -*(alpha1_i + 3*inca2), *(pi1_r + 3) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 4*inca2), -*(alpha1_i + 4*inca2), *(pi1_r + 4) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 5*inca2), -*(alpha1_i + 5*inca2), *(pi1_r + 5) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(chr,add3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 0*inca), *(pi1_r + 0) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 1*inca), *(pi1_r + 1) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 2*inca), *(pi1_r + 2) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 3*inca), *(pi1_r + 3) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 4*inca), *(pi1_r + 4) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 5*inca), *(pi1_r + 5) ); \ \ alpha1 += lda; \ pi1_r += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 0*inca), *(pi1_r + 0) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 1*inca), *(pi1_r + 1) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 2*inca), *(pi1_r + 2) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 3*inca), *(pi1_r + 3) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 4*inca), *(pi1_r + 4) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 5*inca), *(pi1_r + 5) ); \ \ alpha1 += lda; \ pi1_r += ldp; \ } \ } \ } \ } \ } \ else /* if ( cdim < mnr ) */ \ { \ PASTEMAC(ch,scal2rihs_mxn) \ ( \ schema, \ conja, \ cdim, \ n, \ kappa, \ a, inca, lda, \ p, 1, ldp \ ); \ \ /* if ( cdim < mnr ) */ \ { \ const dim_t i = cdim; \ const dim_t m_edge = mnr - cdim; \ const dim_t n_edge = n_max; \ ctype* restrict p_cast = p; \ ctype* restrict p_edge = p_cast + (i )*1; \ \ PASTEMAC(ch,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge, 1, ldp \ ); \ } \ } \ \ if ( n < n_max ) \ { \ const dim_t j = n; \ const dim_t m_edge = mnr; \ const dim_t n_edge = n_max - n; \ ctype* restrict p_cast = p; \ ctype* restrict p_edge = p_cast + (j )*ldp; \ \ PASTEMAC(ch,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge, 1, ldp \ ); \ } \ } INSERT_GENTFUNCCO_BASIC3( packm_6xk_rih, 6, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, opname, mnr, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conja, \ pack_t schema, \ dim_t cdim, \ dim_t n, \ dim_t n_max, \ void* restrict kappa, \ void* restrict a, inc_t inca, inc_t lda, \ void* restrict p, inc_t ldp, \ cntx_t* restrict cntx \ ) \ { \ const inc_t inca2 = 2 * inca; \ const inc_t lda2 = 2 * lda; \ \ ctype* kappa_cast = kappa; \ ctype* restrict alpha1 = a; \ ctype_r* restrict alpha1_r = ( ctype_r* )a; \ ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \ ctype_r* restrict pi1_r = ( ctype_r* )p; \ \ \ if ( cdim == mnr ) \ { \ if ( bli_is_ro_packed( schema ) ) \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ /* This works regardless of conja since we are only copying the real part. */ \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(chr,copys)( *(alpha1_r + 0*inca2), *(pi1_r + 0) ); \ PASTEMAC(chr,copys)( *(alpha1_r + 1*inca2), *(pi1_r + 1) ); \ PASTEMAC(chr,copys)( *(alpha1_r + 2*inca2), *(pi1_r + 2) ); \ PASTEMAC(chr,copys)( *(alpha1_r + 3*inca2), *(pi1_r + 3) ); \ PASTEMAC(chr,copys)( *(alpha1_r + 4*inca2), *(pi1_r + 4) ); \ PASTEMAC(chr,copys)( *(alpha1_r + 5*inca2), *(pi1_r + 5) ); \ PASTEMAC(chr,copys)( *(alpha1_r + 6*inca2), *(pi1_r + 6) ); \ PASTEMAC(chr,copys)( *(alpha1_r + 7*inca2), *(pi1_r + 7) ); \ \ alpha1_r += lda2; \ pi1_r += ldp; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 0*inca), *(pi1_r + 0) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 1*inca), *(pi1_r + 1) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 2*inca), *(pi1_r + 2) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 3*inca), *(pi1_r + 3) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 4*inca), *(pi1_r + 4) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 5*inca), *(pi1_r + 5) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 6*inca), *(pi1_r + 6) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 7*inca), *(pi1_r + 7) ); \ \ alpha1 += lda; \ pi1_r += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 0*inca), *(pi1_r + 0) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 1*inca), *(pi1_r + 1) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 2*inca), *(pi1_r + 2) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 3*inca), *(pi1_r + 3) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 4*inca), *(pi1_r + 4) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 5*inca), *(pi1_r + 5) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 6*inca), *(pi1_r + 6) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 7*inca), *(pi1_r + 7) ); \ \ alpha1 += lda; \ pi1_r += ldp; \ } \ } \ } \ } \ else if ( bli_is_io_packed( schema ) ) \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(chr,copys)( -*(alpha1_i + 0*inca2), *(pi1_r + 0) ); \ PASTEMAC(chr,copys)( -*(alpha1_i + 1*inca2), *(pi1_r + 1) ); \ PASTEMAC(chr,copys)( -*(alpha1_i + 2*inca2), *(pi1_r + 2) ); \ PASTEMAC(chr,copys)( -*(alpha1_i + 3*inca2), *(pi1_r + 3) ); \ PASTEMAC(chr,copys)( -*(alpha1_i + 4*inca2), *(pi1_r + 4) ); \ PASTEMAC(chr,copys)( -*(alpha1_i + 5*inca2), *(pi1_r + 5) ); \ PASTEMAC(chr,copys)( -*(alpha1_i + 6*inca2), *(pi1_r + 6) ); \ PASTEMAC(chr,copys)( -*(alpha1_i + 7*inca2), *(pi1_r + 7) ); \ \ alpha1_i += lda2; \ pi1_r += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(chr,copys)( *(alpha1_i + 0*inca2), *(pi1_r + 0) ); \ PASTEMAC(chr,copys)( *(alpha1_i + 1*inca2), *(pi1_r + 1) ); \ PASTEMAC(chr,copys)( *(alpha1_i + 2*inca2), *(pi1_r + 2) ); \ PASTEMAC(chr,copys)( *(alpha1_i + 3*inca2), *(pi1_r + 3) ); \ PASTEMAC(chr,copys)( *(alpha1_i + 4*inca2), *(pi1_r + 4) ); \ PASTEMAC(chr,copys)( *(alpha1_i + 5*inca2), *(pi1_r + 5) ); \ PASTEMAC(chr,copys)( *(alpha1_i + 6*inca2), *(pi1_r + 6) ); \ PASTEMAC(chr,copys)( *(alpha1_i + 7*inca2), *(pi1_r + 7) ); \ \ alpha1_i += lda2; \ pi1_r += ldp; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 0*inca), *(pi1_r + 0) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 1*inca), *(pi1_r + 1) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 2*inca), *(pi1_r + 2) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 3*inca), *(pi1_r + 3) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 4*inca), *(pi1_r + 4) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 5*inca), *(pi1_r + 5) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 6*inca), *(pi1_r + 6) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 7*inca), *(pi1_r + 7) ); \ \ alpha1 += lda; \ pi1_r += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 0*inca), *(pi1_r + 0) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 1*inca), *(pi1_r + 1) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 2*inca), *(pi1_r + 2) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 3*inca), *(pi1_r + 3) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 4*inca), *(pi1_r + 4) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 5*inca), *(pi1_r + 5) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 6*inca), *(pi1_r + 6) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 7*inca), *(pi1_r + 7) ); \ \ alpha1 += lda; \ pi1_r += ldp; \ } \ } \ } \ } \ else /* if ( bli_is_rpi_packed( schema ) ) */ \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(chr,add3s)( *(alpha1_r + 0*inca2), -*(alpha1_i + 0*inca2), *(pi1_r + 0) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 1*inca2), -*(alpha1_i + 1*inca2), *(pi1_r + 1) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 2*inca2), -*(alpha1_i + 2*inca2), *(pi1_r + 2) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 3*inca2), -*(alpha1_i + 3*inca2), *(pi1_r + 3) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 4*inca2), -*(alpha1_i + 4*inca2), *(pi1_r + 4) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 5*inca2), -*(alpha1_i + 5*inca2), *(pi1_r + 5) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 6*inca2), -*(alpha1_i + 6*inca2), *(pi1_r + 6) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 7*inca2), -*(alpha1_i + 7*inca2), *(pi1_r + 7) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(chr,add3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 0*inca), *(pi1_r + 0) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 1*inca), *(pi1_r + 1) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 2*inca), *(pi1_r + 2) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 3*inca), *(pi1_r + 3) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 4*inca), *(pi1_r + 4) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 5*inca), *(pi1_r + 5) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 6*inca), *(pi1_r + 6) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 7*inca), *(pi1_r + 7) ); \ \ alpha1 += lda; \ pi1_r += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 0*inca), *(pi1_r + 0) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 1*inca), *(pi1_r + 1) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 2*inca), *(pi1_r + 2) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 3*inca), *(pi1_r + 3) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 4*inca), *(pi1_r + 4) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 5*inca), *(pi1_r + 5) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 6*inca), *(pi1_r + 6) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 7*inca), *(pi1_r + 7) ); \ \ alpha1 += lda; \ pi1_r += ldp; \ } \ } \ } \ } \ } \ else /* if ( cdim < mnr ) */ \ { \ PASTEMAC(ch,scal2rihs_mxn) \ ( \ schema, \ conja, \ cdim, \ n, \ kappa, \ a, inca, lda, \ p, 1, ldp \ ); \ \ /* if ( cdim < mnr ) */ \ { \ const dim_t i = cdim; \ const dim_t m_edge = mnr - cdim; \ const dim_t n_edge = n_max; \ ctype* restrict p_cast = p; \ ctype* restrict p_edge = p_cast + (i )*1; \ \ PASTEMAC(ch,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge, 1, ldp \ ); \ } \ } \ \ if ( n < n_max ) \ { \ const dim_t j = n; \ const dim_t m_edge = mnr; \ const dim_t n_edge = n_max - n; \ ctype* restrict p_cast = p; \ ctype* restrict p_edge = p_cast + (j )*ldp; \ \ PASTEMAC(ch,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge, 1, ldp \ ); \ } \ } INSERT_GENTFUNCCO_BASIC3( packm_8xk_rih, 8, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, opname, mnr, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conja, \ pack_t schema, \ dim_t cdim, \ dim_t n, \ dim_t n_max, \ void* restrict kappa, \ void* restrict a, inc_t inca, inc_t lda, \ void* restrict p, inc_t ldp, \ cntx_t* restrict cntx \ ) \ { \ const inc_t inca2 = 2 * inca; \ const inc_t lda2 = 2 * lda; \ \ ctype* kappa_cast = kappa; \ ctype* restrict alpha1 = a; \ ctype_r* restrict alpha1_r = ( ctype_r* )a; \ ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \ ctype_r* restrict pi1_r = ( ctype_r* )p; \ \ \ if ( cdim == mnr ) \ { \ if ( bli_is_ro_packed( schema ) ) \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ /* This works regardless of conja since we are only copying the real part. */ \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(chr,copys)( *(alpha1_r + 0*inca2), *(pi1_r + 0) ); \ PASTEMAC(chr,copys)( *(alpha1_r + 1*inca2), *(pi1_r + 1) ); \ PASTEMAC(chr,copys)( *(alpha1_r + 2*inca2), *(pi1_r + 2) ); \ PASTEMAC(chr,copys)( *(alpha1_r + 3*inca2), *(pi1_r + 3) ); \ PASTEMAC(chr,copys)( *(alpha1_r + 4*inca2), *(pi1_r + 4) ); \ PASTEMAC(chr,copys)( *(alpha1_r + 5*inca2), *(pi1_r + 5) ); \ PASTEMAC(chr,copys)( *(alpha1_r + 6*inca2), *(pi1_r + 6) ); \ PASTEMAC(chr,copys)( *(alpha1_r + 7*inca2), *(pi1_r + 7) ); \ PASTEMAC(chr,copys)( *(alpha1_r + 8*inca2), *(pi1_r + 8) ); \ PASTEMAC(chr,copys)( *(alpha1_r + 9*inca2), *(pi1_r + 9) ); \ \ alpha1_r += lda2; \ pi1_r += ldp; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 0*inca), *(pi1_r + 0) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 1*inca), *(pi1_r + 1) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 2*inca), *(pi1_r + 2) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 3*inca), *(pi1_r + 3) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 4*inca), *(pi1_r + 4) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 5*inca), *(pi1_r + 5) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 6*inca), *(pi1_r + 6) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 7*inca), *(pi1_r + 7) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 8*inca), *(pi1_r + 8) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 9*inca), *(pi1_r + 9) ); \ \ alpha1 += lda; \ pi1_r += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 0*inca), *(pi1_r + 0) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 1*inca), *(pi1_r + 1) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 2*inca), *(pi1_r + 2) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 3*inca), *(pi1_r + 3) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 4*inca), *(pi1_r + 4) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 5*inca), *(pi1_r + 5) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 6*inca), *(pi1_r + 6) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 7*inca), *(pi1_r + 7) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 8*inca), *(pi1_r + 8) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 9*inca), *(pi1_r + 9) ); \ \ alpha1 += lda; \ pi1_r += ldp; \ } \ } \ } \ } \ else if ( bli_is_io_packed( schema ) ) \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(chr,copys)( -*(alpha1_i + 0*inca2), *(pi1_r + 0) ); \ PASTEMAC(chr,copys)( -*(alpha1_i + 1*inca2), *(pi1_r + 1) ); \ PASTEMAC(chr,copys)( -*(alpha1_i + 2*inca2), *(pi1_r + 2) ); \ PASTEMAC(chr,copys)( -*(alpha1_i + 3*inca2), *(pi1_r + 3) ); \ PASTEMAC(chr,copys)( -*(alpha1_i + 4*inca2), *(pi1_r + 4) ); \ PASTEMAC(chr,copys)( -*(alpha1_i + 5*inca2), *(pi1_r + 5) ); \ PASTEMAC(chr,copys)( -*(alpha1_i + 6*inca2), *(pi1_r + 6) ); \ PASTEMAC(chr,copys)( -*(alpha1_i + 7*inca2), *(pi1_r + 7) ); \ PASTEMAC(chr,copys)( -*(alpha1_i + 8*inca2), *(pi1_r + 8) ); \ PASTEMAC(chr,copys)( -*(alpha1_i + 9*inca2), *(pi1_r + 9) ); \ \ alpha1_i += lda2; \ pi1_r += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(chr,copys)( *(alpha1_i + 0*inca2), *(pi1_r + 0) ); \ PASTEMAC(chr,copys)( *(alpha1_i + 1*inca2), *(pi1_r + 1) ); \ PASTEMAC(chr,copys)( *(alpha1_i + 2*inca2), *(pi1_r + 2) ); \ PASTEMAC(chr,copys)( *(alpha1_i + 3*inca2), *(pi1_r + 3) ); \ PASTEMAC(chr,copys)( *(alpha1_i + 4*inca2), *(pi1_r + 4) ); \ PASTEMAC(chr,copys)( *(alpha1_i + 5*inca2), *(pi1_r + 5) ); \ PASTEMAC(chr,copys)( *(alpha1_i + 6*inca2), *(pi1_r + 6) ); \ PASTEMAC(chr,copys)( *(alpha1_i + 7*inca2), *(pi1_r + 7) ); \ PASTEMAC(chr,copys)( *(alpha1_i + 8*inca2), *(pi1_r + 8) ); \ PASTEMAC(chr,copys)( *(alpha1_i + 9*inca2), *(pi1_r + 9) ); \ \ alpha1_i += lda2; \ pi1_r += ldp; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 0*inca), *(pi1_r + 0) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 1*inca), *(pi1_r + 1) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 2*inca), *(pi1_r + 2) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 3*inca), *(pi1_r + 3) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 4*inca), *(pi1_r + 4) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 5*inca), *(pi1_r + 5) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 6*inca), *(pi1_r + 6) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 7*inca), *(pi1_r + 7) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 8*inca), *(pi1_r + 8) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 9*inca), *(pi1_r + 9) ); \ \ alpha1 += lda; \ pi1_r += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 0*inca), *(pi1_r + 0) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 1*inca), *(pi1_r + 1) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 2*inca), *(pi1_r + 2) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 3*inca), *(pi1_r + 3) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 4*inca), *(pi1_r + 4) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 5*inca), *(pi1_r + 5) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 6*inca), *(pi1_r + 6) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 7*inca), *(pi1_r + 7) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 8*inca), *(pi1_r + 8) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 9*inca), *(pi1_r + 9) ); \ \ alpha1 += lda; \ pi1_r += ldp; \ } \ } \ } \ } \ else /* if ( bli_is_rpi_packed( schema ) ) */ \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(chr,add3s)( *(alpha1_r + 0*inca2), -*(alpha1_i + 0*inca2), *(pi1_r + 0) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 1*inca2), -*(alpha1_i + 1*inca2), *(pi1_r + 1) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 2*inca2), -*(alpha1_i + 2*inca2), *(pi1_r + 2) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 3*inca2), -*(alpha1_i + 3*inca2), *(pi1_r + 3) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 4*inca2), -*(alpha1_i + 4*inca2), *(pi1_r + 4) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 5*inca2), -*(alpha1_i + 5*inca2), *(pi1_r + 5) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 6*inca2), -*(alpha1_i + 6*inca2), *(pi1_r + 6) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 7*inca2), -*(alpha1_i + 7*inca2), *(pi1_r + 7) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 8*inca2), -*(alpha1_i + 8*inca2), *(pi1_r + 8) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 9*inca2), -*(alpha1_i + 9*inca2), *(pi1_r + 9) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(chr,add3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 0*inca), *(pi1_r + 0) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 1*inca), *(pi1_r + 1) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 2*inca), *(pi1_r + 2) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 3*inca), *(pi1_r + 3) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 4*inca), *(pi1_r + 4) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 5*inca), *(pi1_r + 5) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 6*inca), *(pi1_r + 6) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 7*inca), *(pi1_r + 7) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 8*inca), *(pi1_r + 8) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 9*inca), *(pi1_r + 9) ); \ \ alpha1 += lda; \ pi1_r += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 0*inca), *(pi1_r + 0) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 1*inca), *(pi1_r + 1) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 2*inca), *(pi1_r + 2) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 3*inca), *(pi1_r + 3) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 4*inca), *(pi1_r + 4) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 5*inca), *(pi1_r + 5) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 6*inca), *(pi1_r + 6) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 7*inca), *(pi1_r + 7) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 8*inca), *(pi1_r + 8) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 9*inca), *(pi1_r + 9) ); \ \ alpha1 += lda; \ pi1_r += ldp; \ } \ } \ } \ } \ } \ else /* if ( cdim < mnr ) */ \ { \ PASTEMAC(ch,scal2rihs_mxn) \ ( \ schema, \ conja, \ cdim, \ n, \ kappa, \ a, inca, lda, \ p, 1, ldp \ ); \ \ /* if ( cdim < mnr ) */ \ { \ const dim_t i = cdim; \ const dim_t m_edge = mnr - cdim; \ const dim_t n_edge = n_max; \ ctype* restrict p_cast = p; \ ctype* restrict p_edge = p_cast + (i )*1; \ \ PASTEMAC(ch,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge, 1, ldp \ ); \ } \ } \ \ if ( n < n_max ) \ { \ const dim_t j = n; \ const dim_t m_edge = mnr; \ const dim_t n_edge = n_max - n; \ ctype* restrict p_cast = p; \ ctype* restrict p_edge = p_cast + (j )*ldp; \ \ PASTEMAC(ch,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge, 1, ldp \ ); \ } \ } INSERT_GENTFUNCCO_BASIC3( packm_10xk_rih, 10, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, opname, mnr, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conja, \ pack_t schema, \ dim_t cdim, \ dim_t n, \ dim_t n_max, \ void* restrict kappa, \ void* restrict a, inc_t inca, inc_t lda, \ void* restrict p, inc_t ldp, \ cntx_t* restrict cntx \ ) \ { \ const inc_t inca2 = 2 * inca; \ const inc_t lda2 = 2 * lda; \ \ ctype* kappa_cast = kappa; \ ctype* restrict alpha1 = a; \ ctype_r* restrict alpha1_r = ( ctype_r* )a; \ ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \ ctype_r* restrict pi1_r = ( ctype_r* )p; \ \ \ if ( cdim == mnr ) \ { \ if ( bli_is_ro_packed( schema ) ) \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ /* This works regardless of conja since we are only copying the real part. */ \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(chr,copys)( *(alpha1_r + 0*inca2), *(pi1_r + 0) ); \ PASTEMAC(chr,copys)( *(alpha1_r + 1*inca2), *(pi1_r + 1) ); \ PASTEMAC(chr,copys)( *(alpha1_r + 2*inca2), *(pi1_r + 2) ); \ PASTEMAC(chr,copys)( *(alpha1_r + 3*inca2), *(pi1_r + 3) ); \ PASTEMAC(chr,copys)( *(alpha1_r + 4*inca2), *(pi1_r + 4) ); \ PASTEMAC(chr,copys)( *(alpha1_r + 5*inca2), *(pi1_r + 5) ); \ PASTEMAC(chr,copys)( *(alpha1_r + 6*inca2), *(pi1_r + 6) ); \ PASTEMAC(chr,copys)( *(alpha1_r + 7*inca2), *(pi1_r + 7) ); \ PASTEMAC(chr,copys)( *(alpha1_r + 8*inca2), *(pi1_r + 8) ); \ PASTEMAC(chr,copys)( *(alpha1_r + 9*inca2), *(pi1_r + 9) ); \ PASTEMAC(chr,copys)( *(alpha1_r +10*inca2), *(pi1_r +10) ); \ PASTEMAC(chr,copys)( *(alpha1_r +11*inca2), *(pi1_r +11) ); \ \ alpha1_r += lda2; \ pi1_r += ldp; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 0*inca), *(pi1_r + 0) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 1*inca), *(pi1_r + 1) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 2*inca), *(pi1_r + 2) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 3*inca), *(pi1_r + 3) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 4*inca), *(pi1_r + 4) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 5*inca), *(pi1_r + 5) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 6*inca), *(pi1_r + 6) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 7*inca), *(pi1_r + 7) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 8*inca), *(pi1_r + 8) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 9*inca), *(pi1_r + 9) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 +10*inca), *(pi1_r +10) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 +11*inca), *(pi1_r +11) ); \ \ alpha1 += lda; \ pi1_r += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 0*inca), *(pi1_r + 0) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 1*inca), *(pi1_r + 1) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 2*inca), *(pi1_r + 2) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 3*inca), *(pi1_r + 3) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 4*inca), *(pi1_r + 4) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 5*inca), *(pi1_r + 5) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 6*inca), *(pi1_r + 6) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 7*inca), *(pi1_r + 7) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 8*inca), *(pi1_r + 8) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 9*inca), *(pi1_r + 9) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 +10*inca), *(pi1_r +10) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 +11*inca), *(pi1_r +11) ); \ \ alpha1 += lda; \ pi1_r += ldp; \ } \ } \ } \ } \ else if ( bli_is_io_packed( schema ) ) \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(chr,copys)( -*(alpha1_i + 0*inca2), *(pi1_r + 0) ); \ PASTEMAC(chr,copys)( -*(alpha1_i + 1*inca2), *(pi1_r + 1) ); \ PASTEMAC(chr,copys)( -*(alpha1_i + 2*inca2), *(pi1_r + 2) ); \ PASTEMAC(chr,copys)( -*(alpha1_i + 3*inca2), *(pi1_r + 3) ); \ PASTEMAC(chr,copys)( -*(alpha1_i + 4*inca2), *(pi1_r + 4) ); \ PASTEMAC(chr,copys)( -*(alpha1_i + 5*inca2), *(pi1_r + 5) ); \ PASTEMAC(chr,copys)( -*(alpha1_i + 6*inca2), *(pi1_r + 6) ); \ PASTEMAC(chr,copys)( -*(alpha1_i + 7*inca2), *(pi1_r + 7) ); \ PASTEMAC(chr,copys)( -*(alpha1_i + 8*inca2), *(pi1_r + 8) ); \ PASTEMAC(chr,copys)( -*(alpha1_i + 9*inca2), *(pi1_r + 9) ); \ PASTEMAC(chr,copys)( -*(alpha1_i +10*inca2), *(pi1_r +10) ); \ PASTEMAC(chr,copys)( -*(alpha1_i +11*inca2), *(pi1_r +11) ); \ \ alpha1_i += lda2; \ pi1_r += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(chr,copys)( *(alpha1_i + 0*inca2), *(pi1_r + 0) ); \ PASTEMAC(chr,copys)( *(alpha1_i + 1*inca2), *(pi1_r + 1) ); \ PASTEMAC(chr,copys)( *(alpha1_i + 2*inca2), *(pi1_r + 2) ); \ PASTEMAC(chr,copys)( *(alpha1_i + 3*inca2), *(pi1_r + 3) ); \ PASTEMAC(chr,copys)( *(alpha1_i + 4*inca2), *(pi1_r + 4) ); \ PASTEMAC(chr,copys)( *(alpha1_i + 5*inca2), *(pi1_r + 5) ); \ PASTEMAC(chr,copys)( *(alpha1_i + 6*inca2), *(pi1_r + 6) ); \ PASTEMAC(chr,copys)( *(alpha1_i + 7*inca2), *(pi1_r + 7) ); \ PASTEMAC(chr,copys)( *(alpha1_i + 8*inca2), *(pi1_r + 8) ); \ PASTEMAC(chr,copys)( *(alpha1_i + 9*inca2), *(pi1_r + 9) ); \ PASTEMAC(chr,copys)( *(alpha1_i +10*inca2), *(pi1_r +10) ); \ PASTEMAC(chr,copys)( *(alpha1_i +11*inca2), *(pi1_r +11) ); \ \ alpha1_i += lda2; \ pi1_r += ldp; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 0*inca), *(pi1_r + 0) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 1*inca), *(pi1_r + 1) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 2*inca), *(pi1_r + 2) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 3*inca), *(pi1_r + 3) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 4*inca), *(pi1_r + 4) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 5*inca), *(pi1_r + 5) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 6*inca), *(pi1_r + 6) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 7*inca), *(pi1_r + 7) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 8*inca), *(pi1_r + 8) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 9*inca), *(pi1_r + 9) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 +10*inca), *(pi1_r +10) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 +11*inca), *(pi1_r +11) ); \ \ alpha1 += lda; \ pi1_r += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 0*inca), *(pi1_r + 0) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 1*inca), *(pi1_r + 1) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 2*inca), *(pi1_r + 2) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 3*inca), *(pi1_r + 3) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 4*inca), *(pi1_r + 4) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 5*inca), *(pi1_r + 5) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 6*inca), *(pi1_r + 6) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 7*inca), *(pi1_r + 7) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 8*inca), *(pi1_r + 8) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 9*inca), *(pi1_r + 9) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 +10*inca), *(pi1_r +10) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 +11*inca), *(pi1_r +11) ); \ \ alpha1 += lda; \ pi1_r += ldp; \ } \ } \ } \ } \ else /* if ( bli_is_rpi_packed( schema ) ) */ \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(chr,add3s)( *(alpha1_r + 0*inca2), -*(alpha1_i + 0*inca2), *(pi1_r + 0) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 1*inca2), -*(alpha1_i + 1*inca2), *(pi1_r + 1) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 2*inca2), -*(alpha1_i + 2*inca2), *(pi1_r + 2) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 3*inca2), -*(alpha1_i + 3*inca2), *(pi1_r + 3) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 4*inca2), -*(alpha1_i + 4*inca2), *(pi1_r + 4) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 5*inca2), -*(alpha1_i + 5*inca2), *(pi1_r + 5) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 6*inca2), -*(alpha1_i + 6*inca2), *(pi1_r + 6) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 7*inca2), -*(alpha1_i + 7*inca2), *(pi1_r + 7) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 8*inca2), -*(alpha1_i + 8*inca2), *(pi1_r + 8) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 9*inca2), -*(alpha1_i + 9*inca2), *(pi1_r + 9) ); \ PASTEMAC(chr,add3s)( *(alpha1_r +10*inca2), -*(alpha1_i +10*inca2), *(pi1_r +10) ); \ PASTEMAC(chr,add3s)( *(alpha1_r +11*inca2), -*(alpha1_i +11*inca2), *(pi1_r +11) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(chr,add3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9) ); \ PASTEMAC(chr,add3s)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10) ); \ PASTEMAC(chr,add3s)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 0*inca), *(pi1_r + 0) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 1*inca), *(pi1_r + 1) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 2*inca), *(pi1_r + 2) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 3*inca), *(pi1_r + 3) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 4*inca), *(pi1_r + 4) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 5*inca), *(pi1_r + 5) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 6*inca), *(pi1_r + 6) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 7*inca), *(pi1_r + 7) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 8*inca), *(pi1_r + 8) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 9*inca), *(pi1_r + 9) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 +10*inca), *(pi1_r +10) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 +11*inca), *(pi1_r +11) ); \ \ alpha1 += lda; \ pi1_r += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 0*inca), *(pi1_r + 0) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 1*inca), *(pi1_r + 1) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 2*inca), *(pi1_r + 2) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 3*inca), *(pi1_r + 3) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 4*inca), *(pi1_r + 4) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 5*inca), *(pi1_r + 5) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 6*inca), *(pi1_r + 6) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 7*inca), *(pi1_r + 7) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 8*inca), *(pi1_r + 8) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 9*inca), *(pi1_r + 9) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 +10*inca), *(pi1_r +10) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 +11*inca), *(pi1_r +11) ); \ \ alpha1 += lda; \ pi1_r += ldp; \ } \ } \ } \ } \ } \ else /* if ( cdim < mnr ) */ \ { \ PASTEMAC(ch,scal2rihs_mxn) \ ( \ schema, \ conja, \ cdim, \ n, \ kappa, \ a, inca, lda, \ p, 1, ldp \ ); \ \ /* if ( cdim < mnr ) */ \ { \ const dim_t i = cdim; \ const dim_t m_edge = mnr - cdim; \ const dim_t n_edge = n_max; \ ctype* restrict p_cast = p; \ ctype* restrict p_edge = p_cast + (i )*1; \ \ PASTEMAC(ch,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge, 1, ldp \ ); \ } \ } \ \ if ( n < n_max ) \ { \ const dim_t j = n; \ const dim_t m_edge = mnr; \ const dim_t n_edge = n_max - n; \ ctype* restrict p_cast = p; \ ctype* restrict p_edge = p_cast + (j )*ldp; \ \ PASTEMAC(ch,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge, 1, ldp \ ); \ } \ } INSERT_GENTFUNCCO_BASIC3( packm_12xk_rih, 12, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, opname, mnr, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conja, \ pack_t schema, \ dim_t cdim, \ dim_t n, \ dim_t n_max, \ void* restrict kappa, \ void* restrict a, inc_t inca, inc_t lda, \ void* restrict p, inc_t ldp, \ cntx_t* restrict cntx \ ) \ { \ const inc_t inca2 = 2 * inca; \ const inc_t lda2 = 2 * lda; \ \ ctype* kappa_cast = kappa; \ ctype* restrict alpha1 = a; \ ctype_r* restrict alpha1_r = ( ctype_r* )a; \ ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \ ctype_r* restrict pi1_r = ( ctype_r* )p; \ \ \ if ( cdim == mnr ) \ { \ if ( bli_is_ro_packed( schema ) ) \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ /* This works regardless of conja since we are only copying the real part. */ \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(chr,copys)( *(alpha1_r + 0*inca2), *(pi1_r + 0) ); \ PASTEMAC(chr,copys)( *(alpha1_r + 1*inca2), *(pi1_r + 1) ); \ PASTEMAC(chr,copys)( *(alpha1_r + 2*inca2), *(pi1_r + 2) ); \ PASTEMAC(chr,copys)( *(alpha1_r + 3*inca2), *(pi1_r + 3) ); \ PASTEMAC(chr,copys)( *(alpha1_r + 4*inca2), *(pi1_r + 4) ); \ PASTEMAC(chr,copys)( *(alpha1_r + 5*inca2), *(pi1_r + 5) ); \ PASTEMAC(chr,copys)( *(alpha1_r + 6*inca2), *(pi1_r + 6) ); \ PASTEMAC(chr,copys)( *(alpha1_r + 7*inca2), *(pi1_r + 7) ); \ PASTEMAC(chr,copys)( *(alpha1_r + 8*inca2), *(pi1_r + 8) ); \ PASTEMAC(chr,copys)( *(alpha1_r + 9*inca2), *(pi1_r + 9) ); \ PASTEMAC(chr,copys)( *(alpha1_r +10*inca2), *(pi1_r +10) ); \ PASTEMAC(chr,copys)( *(alpha1_r +11*inca2), *(pi1_r +11) ); \ PASTEMAC(chr,copys)( *(alpha1_r +12*inca2), *(pi1_r +12) ); \ PASTEMAC(chr,copys)( *(alpha1_r +13*inca2), *(pi1_r +13) ); \ \ alpha1_r += lda2; \ pi1_r += ldp; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 0*inca), *(pi1_r + 0) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 1*inca), *(pi1_r + 1) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 2*inca), *(pi1_r + 2) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 3*inca), *(pi1_r + 3) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 4*inca), *(pi1_r + 4) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 5*inca), *(pi1_r + 5) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 6*inca), *(pi1_r + 6) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 7*inca), *(pi1_r + 7) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 8*inca), *(pi1_r + 8) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 9*inca), *(pi1_r + 9) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 +10*inca), *(pi1_r +10) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 +11*inca), *(pi1_r +11) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 +12*inca), *(pi1_r +12) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 +13*inca), *(pi1_r +13) ); \ \ alpha1 += lda; \ pi1_r += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 0*inca), *(pi1_r + 0) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 1*inca), *(pi1_r + 1) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 2*inca), *(pi1_r + 2) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 3*inca), *(pi1_r + 3) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 4*inca), *(pi1_r + 4) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 5*inca), *(pi1_r + 5) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 6*inca), *(pi1_r + 6) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 7*inca), *(pi1_r + 7) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 8*inca), *(pi1_r + 8) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 9*inca), *(pi1_r + 9) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 +10*inca), *(pi1_r +10) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 +11*inca), *(pi1_r +11) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 +12*inca), *(pi1_r +12) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 +13*inca), *(pi1_r +13) ); \ \ alpha1 += lda; \ pi1_r += ldp; \ } \ } \ } \ } \ else if ( bli_is_io_packed( schema ) ) \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(chr,copys)( -*(alpha1_i + 0*inca2), *(pi1_r + 0) ); \ PASTEMAC(chr,copys)( -*(alpha1_i + 1*inca2), *(pi1_r + 1) ); \ PASTEMAC(chr,copys)( -*(alpha1_i + 2*inca2), *(pi1_r + 2) ); \ PASTEMAC(chr,copys)( -*(alpha1_i + 3*inca2), *(pi1_r + 3) ); \ PASTEMAC(chr,copys)( -*(alpha1_i + 4*inca2), *(pi1_r + 4) ); \ PASTEMAC(chr,copys)( -*(alpha1_i + 5*inca2), *(pi1_r + 5) ); \ PASTEMAC(chr,copys)( -*(alpha1_i + 6*inca2), *(pi1_r + 6) ); \ PASTEMAC(chr,copys)( -*(alpha1_i + 7*inca2), *(pi1_r + 7) ); \ PASTEMAC(chr,copys)( -*(alpha1_i + 8*inca2), *(pi1_r + 8) ); \ PASTEMAC(chr,copys)( -*(alpha1_i + 9*inca2), *(pi1_r + 9) ); \ PASTEMAC(chr,copys)( -*(alpha1_i +10*inca2), *(pi1_r +10) ); \ PASTEMAC(chr,copys)( -*(alpha1_i +11*inca2), *(pi1_r +11) ); \ PASTEMAC(chr,copys)( -*(alpha1_i +12*inca2), *(pi1_r +12) ); \ PASTEMAC(chr,copys)( -*(alpha1_i +13*inca2), *(pi1_r +13) ); \ \ alpha1_i += lda2; \ pi1_r += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(chr,copys)( *(alpha1_i + 0*inca2), *(pi1_r + 0) ); \ PASTEMAC(chr,copys)( *(alpha1_i + 1*inca2), *(pi1_r + 1) ); \ PASTEMAC(chr,copys)( *(alpha1_i + 2*inca2), *(pi1_r + 2) ); \ PASTEMAC(chr,copys)( *(alpha1_i + 3*inca2), *(pi1_r + 3) ); \ PASTEMAC(chr,copys)( *(alpha1_i + 4*inca2), *(pi1_r + 4) ); \ PASTEMAC(chr,copys)( *(alpha1_i + 5*inca2), *(pi1_r + 5) ); \ PASTEMAC(chr,copys)( *(alpha1_i + 6*inca2), *(pi1_r + 6) ); \ PASTEMAC(chr,copys)( *(alpha1_i + 7*inca2), *(pi1_r + 7) ); \ PASTEMAC(chr,copys)( *(alpha1_i + 8*inca2), *(pi1_r + 8) ); \ PASTEMAC(chr,copys)( *(alpha1_i + 9*inca2), *(pi1_r + 9) ); \ PASTEMAC(chr,copys)( *(alpha1_i +10*inca2), *(pi1_r +10) ); \ PASTEMAC(chr,copys)( *(alpha1_i +11*inca2), *(pi1_r +11) ); \ PASTEMAC(chr,copys)( *(alpha1_i +12*inca2), *(pi1_r +12) ); \ PASTEMAC(chr,copys)( *(alpha1_i +13*inca2), *(pi1_r +13) ); \ \ alpha1_i += lda2; \ pi1_r += ldp; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 0*inca), *(pi1_r + 0) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 1*inca), *(pi1_r + 1) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 2*inca), *(pi1_r + 2) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 3*inca), *(pi1_r + 3) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 4*inca), *(pi1_r + 4) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 5*inca), *(pi1_r + 5) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 6*inca), *(pi1_r + 6) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 7*inca), *(pi1_r + 7) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 8*inca), *(pi1_r + 8) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 9*inca), *(pi1_r + 9) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 +10*inca), *(pi1_r +10) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 +11*inca), *(pi1_r +11) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 +12*inca), *(pi1_r +12) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 +13*inca), *(pi1_r +13) ); \ \ alpha1 += lda; \ pi1_r += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 0*inca), *(pi1_r + 0) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 1*inca), *(pi1_r + 1) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 2*inca), *(pi1_r + 2) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 3*inca), *(pi1_r + 3) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 4*inca), *(pi1_r + 4) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 5*inca), *(pi1_r + 5) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 6*inca), *(pi1_r + 6) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 7*inca), *(pi1_r + 7) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 8*inca), *(pi1_r + 8) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 9*inca), *(pi1_r + 9) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 +10*inca), *(pi1_r +10) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 +11*inca), *(pi1_r +11) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 +12*inca), *(pi1_r +12) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 +13*inca), *(pi1_r +13) ); \ \ alpha1 += lda; \ pi1_r += ldp; \ } \ } \ } \ } \ else /* if ( bli_is_rpi_packed( schema ) ) */ \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(chr,add3s)( *(alpha1_r + 0*inca2), -*(alpha1_i + 0*inca2), *(pi1_r + 0) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 1*inca2), -*(alpha1_i + 1*inca2), *(pi1_r + 1) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 2*inca2), -*(alpha1_i + 2*inca2), *(pi1_r + 2) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 3*inca2), -*(alpha1_i + 3*inca2), *(pi1_r + 3) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 4*inca2), -*(alpha1_i + 4*inca2), *(pi1_r + 4) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 5*inca2), -*(alpha1_i + 5*inca2), *(pi1_r + 5) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 6*inca2), -*(alpha1_i + 6*inca2), *(pi1_r + 6) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 7*inca2), -*(alpha1_i + 7*inca2), *(pi1_r + 7) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 8*inca2), -*(alpha1_i + 8*inca2), *(pi1_r + 8) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 9*inca2), -*(alpha1_i + 9*inca2), *(pi1_r + 9) ); \ PASTEMAC(chr,add3s)( *(alpha1_r +10*inca2), -*(alpha1_i +10*inca2), *(pi1_r +10) ); \ PASTEMAC(chr,add3s)( *(alpha1_r +11*inca2), -*(alpha1_i +11*inca2), *(pi1_r +11) ); \ PASTEMAC(chr,add3s)( *(alpha1_r +12*inca2), -*(alpha1_i +12*inca2), *(pi1_r +12) ); \ PASTEMAC(chr,add3s)( *(alpha1_r +13*inca2), -*(alpha1_i +13*inca2), *(pi1_r +13) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(chr,add3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9) ); \ PASTEMAC(chr,add3s)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10) ); \ PASTEMAC(chr,add3s)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11) ); \ PASTEMAC(chr,add3s)( *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12) ); \ PASTEMAC(chr,add3s)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 0*inca), *(pi1_r + 0) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 1*inca), *(pi1_r + 1) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 2*inca), *(pi1_r + 2) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 3*inca), *(pi1_r + 3) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 4*inca), *(pi1_r + 4) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 5*inca), *(pi1_r + 5) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 6*inca), *(pi1_r + 6) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 7*inca), *(pi1_r + 7) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 8*inca), *(pi1_r + 8) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 9*inca), *(pi1_r + 9) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 +10*inca), *(pi1_r +10) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 +11*inca), *(pi1_r +11) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 +12*inca), *(pi1_r +12) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 +13*inca), *(pi1_r +13) ); \ \ alpha1 += lda; \ pi1_r += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 0*inca), *(pi1_r + 0) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 1*inca), *(pi1_r + 1) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 2*inca), *(pi1_r + 2) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 3*inca), *(pi1_r + 3) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 4*inca), *(pi1_r + 4) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 5*inca), *(pi1_r + 5) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 6*inca), *(pi1_r + 6) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 7*inca), *(pi1_r + 7) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 8*inca), *(pi1_r + 8) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 9*inca), *(pi1_r + 9) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 +10*inca), *(pi1_r +10) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 +11*inca), *(pi1_r +11) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 +12*inca), *(pi1_r +12) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 +13*inca), *(pi1_r +13) ); \ \ alpha1 += lda; \ pi1_r += ldp; \ } \ } \ } \ } \ } \ else /* if ( cdim < mnr ) */ \ { \ PASTEMAC(ch,scal2rihs_mxn) \ ( \ schema, \ conja, \ cdim, \ n, \ kappa, \ a, inca, lda, \ p, 1, ldp \ ); \ \ /* if ( cdim < mnr ) */ \ { \ const dim_t i = cdim; \ const dim_t m_edge = mnr - cdim; \ const dim_t n_edge = n_max; \ ctype* restrict p_cast = p; \ ctype* restrict p_edge = p_cast + (i )*1; \ \ PASTEMAC(ch,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge, 1, ldp \ ); \ } \ } \ \ if ( n < n_max ) \ { \ const dim_t j = n; \ const dim_t m_edge = mnr; \ const dim_t n_edge = n_max - n; \ ctype* restrict p_cast = p; \ ctype* restrict p_edge = p_cast + (j )*ldp; \ \ PASTEMAC(ch,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge, 1, ldp \ ); \ } \ } INSERT_GENTFUNCCO_BASIC3( packm_14xk_rih, 14, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, opname, mnr, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conja, \ pack_t schema, \ dim_t cdim, \ dim_t n, \ dim_t n_max, \ void* restrict kappa, \ void* restrict a, inc_t inca, inc_t lda, \ void* restrict p, inc_t ldp, \ cntx_t* restrict cntx \ ) \ { \ const inc_t inca2 = 2 * inca; \ const inc_t lda2 = 2 * lda; \ \ ctype* kappa_cast = kappa; \ ctype* restrict alpha1 = a; \ ctype_r* restrict alpha1_r = ( ctype_r* )a; \ ctype_r* restrict alpha1_i = ( ctype_r* )a + 1; \ ctype_r* restrict pi1_r = ( ctype_r* )p; \ \ \ if ( cdim == mnr ) \ { \ if ( bli_is_ro_packed( schema ) ) \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ /* This works regardless of conja since we are only copying the real part. */ \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(chr,copys)( *(alpha1_r + 0*inca2), *(pi1_r + 0) ); \ PASTEMAC(chr,copys)( *(alpha1_r + 1*inca2), *(pi1_r + 1) ); \ PASTEMAC(chr,copys)( *(alpha1_r + 2*inca2), *(pi1_r + 2) ); \ PASTEMAC(chr,copys)( *(alpha1_r + 3*inca2), *(pi1_r + 3) ); \ PASTEMAC(chr,copys)( *(alpha1_r + 4*inca2), *(pi1_r + 4) ); \ PASTEMAC(chr,copys)( *(alpha1_r + 5*inca2), *(pi1_r + 5) ); \ PASTEMAC(chr,copys)( *(alpha1_r + 6*inca2), *(pi1_r + 6) ); \ PASTEMAC(chr,copys)( *(alpha1_r + 7*inca2), *(pi1_r + 7) ); \ PASTEMAC(chr,copys)( *(alpha1_r + 8*inca2), *(pi1_r + 8) ); \ PASTEMAC(chr,copys)( *(alpha1_r + 9*inca2), *(pi1_r + 9) ); \ PASTEMAC(chr,copys)( *(alpha1_r +10*inca2), *(pi1_r +10) ); \ PASTEMAC(chr,copys)( *(alpha1_r +11*inca2), *(pi1_r +11) ); \ PASTEMAC(chr,copys)( *(alpha1_r +12*inca2), *(pi1_r +12) ); \ PASTEMAC(chr,copys)( *(alpha1_r +13*inca2), *(pi1_r +13) ); \ PASTEMAC(chr,copys)( *(alpha1_r +14*inca2), *(pi1_r +14) ); \ PASTEMAC(chr,copys)( *(alpha1_r +15*inca2), *(pi1_r +15) ); \ \ alpha1_r += lda2; \ pi1_r += ldp; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 0*inca), *(pi1_r + 0) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 1*inca), *(pi1_r + 1) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 2*inca), *(pi1_r + 2) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 3*inca), *(pi1_r + 3) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 4*inca), *(pi1_r + 4) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 5*inca), *(pi1_r + 5) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 6*inca), *(pi1_r + 6) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 7*inca), *(pi1_r + 7) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 8*inca), *(pi1_r + 8) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 + 9*inca), *(pi1_r + 9) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 +10*inca), *(pi1_r +10) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 +11*inca), *(pi1_r +11) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 +12*inca), *(pi1_r +12) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 +13*inca), *(pi1_r +13) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 +14*inca), *(pi1_r +14) ); \ PASTEMAC(ch,scal2jros)( *kappa_cast, *(alpha1 +15*inca), *(pi1_r +15) ); \ \ alpha1 += lda; \ pi1_r += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 0*inca), *(pi1_r + 0) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 1*inca), *(pi1_r + 1) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 2*inca), *(pi1_r + 2) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 3*inca), *(pi1_r + 3) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 4*inca), *(pi1_r + 4) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 5*inca), *(pi1_r + 5) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 6*inca), *(pi1_r + 6) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 7*inca), *(pi1_r + 7) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 8*inca), *(pi1_r + 8) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 + 9*inca), *(pi1_r + 9) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 +10*inca), *(pi1_r +10) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 +11*inca), *(pi1_r +11) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 +12*inca), *(pi1_r +12) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 +13*inca), *(pi1_r +13) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 +14*inca), *(pi1_r +14) ); \ PASTEMAC(ch,scal2ros)( *kappa_cast, *(alpha1 +15*inca), *(pi1_r +15) ); \ \ alpha1 += lda; \ pi1_r += ldp; \ } \ } \ } \ } \ else if ( bli_is_io_packed( schema ) ) \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(chr,copys)( -*(alpha1_i + 0*inca2), *(pi1_r + 0) ); \ PASTEMAC(chr,copys)( -*(alpha1_i + 1*inca2), *(pi1_r + 1) ); \ PASTEMAC(chr,copys)( -*(alpha1_i + 2*inca2), *(pi1_r + 2) ); \ PASTEMAC(chr,copys)( -*(alpha1_i + 3*inca2), *(pi1_r + 3) ); \ PASTEMAC(chr,copys)( -*(alpha1_i + 4*inca2), *(pi1_r + 4) ); \ PASTEMAC(chr,copys)( -*(alpha1_i + 5*inca2), *(pi1_r + 5) ); \ PASTEMAC(chr,copys)( -*(alpha1_i + 6*inca2), *(pi1_r + 6) ); \ PASTEMAC(chr,copys)( -*(alpha1_i + 7*inca2), *(pi1_r + 7) ); \ PASTEMAC(chr,copys)( -*(alpha1_i + 8*inca2), *(pi1_r + 8) ); \ PASTEMAC(chr,copys)( -*(alpha1_i + 9*inca2), *(pi1_r + 9) ); \ PASTEMAC(chr,copys)( -*(alpha1_i +10*inca2), *(pi1_r +10) ); \ PASTEMAC(chr,copys)( -*(alpha1_i +11*inca2), *(pi1_r +11) ); \ PASTEMAC(chr,copys)( -*(alpha1_i +12*inca2), *(pi1_r +12) ); \ PASTEMAC(chr,copys)( -*(alpha1_i +13*inca2), *(pi1_r +13) ); \ PASTEMAC(chr,copys)( -*(alpha1_i +14*inca2), *(pi1_r +14) ); \ PASTEMAC(chr,copys)( -*(alpha1_i +15*inca2), *(pi1_r +15) ); \ \ alpha1_i += lda2; \ pi1_r += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(chr,copys)( *(alpha1_i + 0*inca2), *(pi1_r + 0) ); \ PASTEMAC(chr,copys)( *(alpha1_i + 1*inca2), *(pi1_r + 1) ); \ PASTEMAC(chr,copys)( *(alpha1_i + 2*inca2), *(pi1_r + 2) ); \ PASTEMAC(chr,copys)( *(alpha1_i + 3*inca2), *(pi1_r + 3) ); \ PASTEMAC(chr,copys)( *(alpha1_i + 4*inca2), *(pi1_r + 4) ); \ PASTEMAC(chr,copys)( *(alpha1_i + 5*inca2), *(pi1_r + 5) ); \ PASTEMAC(chr,copys)( *(alpha1_i + 6*inca2), *(pi1_r + 6) ); \ PASTEMAC(chr,copys)( *(alpha1_i + 7*inca2), *(pi1_r + 7) ); \ PASTEMAC(chr,copys)( *(alpha1_i + 8*inca2), *(pi1_r + 8) ); \ PASTEMAC(chr,copys)( *(alpha1_i + 9*inca2), *(pi1_r + 9) ); \ PASTEMAC(chr,copys)( *(alpha1_i +10*inca2), *(pi1_r +10) ); \ PASTEMAC(chr,copys)( *(alpha1_i +11*inca2), *(pi1_r +11) ); \ PASTEMAC(chr,copys)( *(alpha1_i +12*inca2), *(pi1_r +12) ); \ PASTEMAC(chr,copys)( *(alpha1_i +13*inca2), *(pi1_r +13) ); \ PASTEMAC(chr,copys)( *(alpha1_i +14*inca2), *(pi1_r +14) ); \ PASTEMAC(chr,copys)( *(alpha1_i +15*inca2), *(pi1_r +15) ); \ \ alpha1_i += lda2; \ pi1_r += ldp; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 0*inca), *(pi1_r + 0) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 1*inca), *(pi1_r + 1) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 2*inca), *(pi1_r + 2) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 3*inca), *(pi1_r + 3) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 4*inca), *(pi1_r + 4) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 5*inca), *(pi1_r + 5) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 6*inca), *(pi1_r + 6) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 7*inca), *(pi1_r + 7) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 8*inca), *(pi1_r + 8) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 + 9*inca), *(pi1_r + 9) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 +10*inca), *(pi1_r +10) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 +11*inca), *(pi1_r +11) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 +12*inca), *(pi1_r +12) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 +13*inca), *(pi1_r +13) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 +14*inca), *(pi1_r +14) ); \ PASTEMAC(ch,scal2jios)( *kappa_cast, *(alpha1 +15*inca), *(pi1_r +15) ); \ \ alpha1 += lda; \ pi1_r += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 0*inca), *(pi1_r + 0) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 1*inca), *(pi1_r + 1) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 2*inca), *(pi1_r + 2) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 3*inca), *(pi1_r + 3) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 4*inca), *(pi1_r + 4) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 5*inca), *(pi1_r + 5) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 6*inca), *(pi1_r + 6) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 7*inca), *(pi1_r + 7) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 8*inca), *(pi1_r + 8) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 + 9*inca), *(pi1_r + 9) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 +10*inca), *(pi1_r +10) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 +11*inca), *(pi1_r +11) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 +12*inca), *(pi1_r +12) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 +13*inca), *(pi1_r +13) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 +14*inca), *(pi1_r +14) ); \ PASTEMAC(ch,scal2ios)( *kappa_cast, *(alpha1 +15*inca), *(pi1_r +15) ); \ \ alpha1 += lda; \ pi1_r += ldp; \ } \ } \ } \ } \ else /* if ( bli_is_rpi_packed( schema ) ) */ \ { \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(chr,add3s)( *(alpha1_r + 0*inca2), -*(alpha1_i + 0*inca2), *(pi1_r + 0) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 1*inca2), -*(alpha1_i + 1*inca2), *(pi1_r + 1) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 2*inca2), -*(alpha1_i + 2*inca2), *(pi1_r + 2) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 3*inca2), -*(alpha1_i + 3*inca2), *(pi1_r + 3) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 4*inca2), -*(alpha1_i + 4*inca2), *(pi1_r + 4) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 5*inca2), -*(alpha1_i + 5*inca2), *(pi1_r + 5) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 6*inca2), -*(alpha1_i + 6*inca2), *(pi1_r + 6) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 7*inca2), -*(alpha1_i + 7*inca2), *(pi1_r + 7) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 8*inca2), -*(alpha1_i + 8*inca2), *(pi1_r + 8) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 9*inca2), -*(alpha1_i + 9*inca2), *(pi1_r + 9) ); \ PASTEMAC(chr,add3s)( *(alpha1_r +10*inca2), -*(alpha1_i +10*inca2), *(pi1_r +10) ); \ PASTEMAC(chr,add3s)( *(alpha1_r +11*inca2), -*(alpha1_i +11*inca2), *(pi1_r +11) ); \ PASTEMAC(chr,add3s)( *(alpha1_r +12*inca2), -*(alpha1_i +12*inca2), *(pi1_r +12) ); \ PASTEMAC(chr,add3s)( *(alpha1_r +13*inca2), -*(alpha1_i +13*inca2), *(pi1_r +13) ); \ PASTEMAC(chr,add3s)( *(alpha1_r +14*inca2), -*(alpha1_i +14*inca2), *(pi1_r +14) ); \ PASTEMAC(chr,add3s)( *(alpha1_r +15*inca2), -*(alpha1_i +15*inca2), *(pi1_r +15) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(chr,add3s)( *(alpha1_r + 0*inca2), *(alpha1_i + 0*inca2), *(pi1_r + 0) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 1*inca2), *(alpha1_i + 1*inca2), *(pi1_r + 1) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 2*inca2), *(alpha1_i + 2*inca2), *(pi1_r + 2) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 3*inca2), *(alpha1_i + 3*inca2), *(pi1_r + 3) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 4*inca2), *(alpha1_i + 4*inca2), *(pi1_r + 4) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 5*inca2), *(alpha1_i + 5*inca2), *(pi1_r + 5) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 6*inca2), *(alpha1_i + 6*inca2), *(pi1_r + 6) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 7*inca2), *(alpha1_i + 7*inca2), *(pi1_r + 7) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 8*inca2), *(alpha1_i + 8*inca2), *(pi1_r + 8) ); \ PASTEMAC(chr,add3s)( *(alpha1_r + 9*inca2), *(alpha1_i + 9*inca2), *(pi1_r + 9) ); \ PASTEMAC(chr,add3s)( *(alpha1_r +10*inca2), *(alpha1_i +10*inca2), *(pi1_r +10) ); \ PASTEMAC(chr,add3s)( *(alpha1_r +11*inca2), *(alpha1_i +11*inca2), *(pi1_r +11) ); \ PASTEMAC(chr,add3s)( *(alpha1_r +12*inca2), *(alpha1_i +12*inca2), *(pi1_r +12) ); \ PASTEMAC(chr,add3s)( *(alpha1_r +13*inca2), *(alpha1_i +13*inca2), *(pi1_r +13) ); \ PASTEMAC(chr,add3s)( *(alpha1_r +14*inca2), *(alpha1_i +14*inca2), *(pi1_r +14) ); \ PASTEMAC(chr,add3s)( *(alpha1_r +15*inca2), *(alpha1_i +15*inca2), *(pi1_r +15) ); \ \ alpha1_r += lda2; \ alpha1_i += lda2; \ pi1_r += ldp; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conja ) ) \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 0*inca), *(pi1_r + 0) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 1*inca), *(pi1_r + 1) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 2*inca), *(pi1_r + 2) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 3*inca), *(pi1_r + 3) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 4*inca), *(pi1_r + 4) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 5*inca), *(pi1_r + 5) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 6*inca), *(pi1_r + 6) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 7*inca), *(pi1_r + 7) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 8*inca), *(pi1_r + 8) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 + 9*inca), *(pi1_r + 9) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 +10*inca), *(pi1_r +10) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 +11*inca), *(pi1_r +11) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 +12*inca), *(pi1_r +12) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 +13*inca), *(pi1_r +13) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 +14*inca), *(pi1_r +14) ); \ PASTEMAC(ch,scal2jrpis)( *kappa_cast, *(alpha1 +15*inca), *(pi1_r +15) ); \ \ alpha1 += lda; \ pi1_r += ldp; \ } \ } \ else \ { \ for ( dim_t k = n; k != 0; --k ) \ { \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 0*inca), *(pi1_r + 0) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 1*inca), *(pi1_r + 1) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 2*inca), *(pi1_r + 2) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 3*inca), *(pi1_r + 3) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 4*inca), *(pi1_r + 4) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 5*inca), *(pi1_r + 5) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 6*inca), *(pi1_r + 6) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 7*inca), *(pi1_r + 7) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 8*inca), *(pi1_r + 8) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 + 9*inca), *(pi1_r + 9) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 +10*inca), *(pi1_r +10) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 +11*inca), *(pi1_r +11) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 +12*inca), *(pi1_r +12) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 +13*inca), *(pi1_r +13) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 +14*inca), *(pi1_r +14) ); \ PASTEMAC(ch,scal2rpis)( *kappa_cast, *(alpha1 +15*inca), *(pi1_r +15) ); \ \ alpha1 += lda; \ pi1_r += ldp; \ } \ } \ } \ } \ } \ else /* if ( cdim < mnr ) */ \ { \ PASTEMAC(ch,scal2rihs_mxn) \ ( \ schema, \ conja, \ cdim, \ n, \ kappa, \ a, inca, lda, \ p, 1, ldp \ ); \ \ /* if ( cdim < mnr ) */ \ { \ const dim_t i = cdim; \ const dim_t m_edge = mnr - cdim; \ const dim_t n_edge = n_max; \ ctype* restrict p_cast = p; \ ctype* restrict p_edge = p_cast + (i )*1; \ \ PASTEMAC(ch,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge, 1, ldp \ ); \ } \ } \ \ if ( n < n_max ) \ { \ const dim_t j = n; \ const dim_t m_edge = mnr; \ const dim_t n_edge = n_max - n; \ ctype* restrict p_cast = p; \ ctype* restrict p_edge = p_cast + (j )*ldp; \ \ PASTEMAC(ch,set0s_mxn) \ ( \ m_edge, \ n_edge, \ p_edge, 1, ldp \ ); \ } \ } INSERT_GENTFUNCCO_BASIC3( packm_16xk_rih, 16, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) blis-0.6.1/ref_kernels/1m/bli_unpackm_cxk_ref.c000066400000000000000000000754651360743507500214470ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conjp, \ dim_t n, \ void* restrict kappa, \ void* restrict p, inc_t ldp, \ void* restrict a, inc_t inca, inc_t lda, \ cntx_t* restrict cntx \ ) \ { \ ctype* restrict kappa_cast = kappa; \ ctype* restrict pi1 = p; \ ctype* restrict alpha1 = a; \ \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conjp ) ) \ { \ for ( ; n != 0; --n ) \ { \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 0), *(alpha1 + 0*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \ \ pi1 += ldp; \ alpha1 += lda; \ } \ } \ else \ { \ for ( ; n != 0; --n ) \ { \ PASTEMAC2(ch,ch,copys)( *(pi1 + 0), *(alpha1 + 0*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \ \ pi1 += ldp; \ alpha1 += lda; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conjp ) ) \ { \ for ( ; n != 0; --n ) \ { \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \ \ pi1 += ldp; \ alpha1 += lda; \ } \ } \ else \ { \ for ( ; n != 0; --n ) \ { \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \ \ pi1 += ldp; \ alpha1 += lda; \ } \ } \ } \ } INSERT_GENTFUNC_BASIC2( unpackm_2xk, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conjp, \ dim_t n, \ void* restrict kappa, \ void* restrict p, inc_t ldp, \ void* restrict a, inc_t inca, inc_t lda, \ cntx_t* restrict cntx \ ) \ { \ ctype* restrict kappa_cast = kappa; \ ctype* restrict pi1 = p; \ ctype* restrict alpha1 = a; \ \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conjp ) ) \ { \ for ( ; n != 0; --n ) \ { \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 0), *(alpha1 + 0*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 2), *(alpha1 + 2*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 3), *(alpha1 + 3*inca) ); \ \ pi1 += ldp; \ alpha1 += lda; \ } \ } \ else \ { \ for ( ; n != 0; --n ) \ { \ PASTEMAC2(ch,ch,copys)( *(pi1 + 0), *(alpha1 + 0*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 2), *(alpha1 + 2*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 3), *(alpha1 + 3*inca) ); \ \ pi1 += ldp; \ alpha1 += lda; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conjp ) ) \ { \ for ( ; n != 0; --n ) \ { \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \ \ pi1 += ldp; \ alpha1 += lda; \ } \ } \ else \ { \ for ( ; n != 0; --n ) \ { \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \ \ pi1 += ldp; \ alpha1 += lda; \ } \ } \ } \ } INSERT_GENTFUNC_BASIC2( unpackm_4xk, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conjp, \ dim_t n, \ void* restrict kappa, \ void* restrict p, inc_t ldp, \ void* restrict a, inc_t inca, inc_t lda, \ cntx_t* restrict cntx \ ) \ { \ ctype* restrict kappa_cast = kappa; \ ctype* restrict pi1 = p; \ ctype* restrict alpha1 = a; \ \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conjp ) ) \ { \ for ( ; n != 0; --n ) \ { \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 0), *(alpha1 + 0*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 2), *(alpha1 + 2*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 3), *(alpha1 + 3*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 4), *(alpha1 + 4*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 5), *(alpha1 + 5*inca) ); \ \ pi1 += ldp; \ alpha1 += lda; \ } \ } \ else \ { \ for ( ; n != 0; --n ) \ { \ PASTEMAC2(ch,ch,copys)( *(pi1 + 0), *(alpha1 + 0*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 2), *(alpha1 + 2*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 3), *(alpha1 + 3*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 4), *(alpha1 + 4*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 5), *(alpha1 + 5*inca) ); \ \ pi1 += ldp; \ alpha1 += lda; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conjp ) ) \ { \ for ( ; n != 0; --n ) \ { \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \ \ pi1 += ldp; \ alpha1 += lda; \ } \ } \ else \ { \ for ( ; n != 0; --n ) \ { \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \ \ pi1 += ldp; \ alpha1 += lda; \ } \ } \ } \ } INSERT_GENTFUNC_BASIC2( unpackm_6xk, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conjp, \ dim_t n, \ void* restrict kappa, \ void* restrict p, inc_t ldp, \ void* restrict a, inc_t inca, inc_t lda, \ cntx_t* restrict cntx \ ) \ { \ ctype* restrict kappa_cast = kappa; \ ctype* restrict pi1 = p; \ ctype* restrict alpha1 = a; \ \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conjp ) ) \ { \ for ( ; n != 0; --n ) \ { \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 0), *(alpha1 + 0*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 2), *(alpha1 + 2*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 3), *(alpha1 + 3*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 4), *(alpha1 + 4*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 5), *(alpha1 + 5*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 6), *(alpha1 + 6*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 7), *(alpha1 + 7*inca) ); \ \ pi1 += ldp; \ alpha1 += lda; \ } \ } \ else \ { \ for ( ; n != 0; --n ) \ { \ PASTEMAC2(ch,ch,copys)( *(pi1 + 0), *(alpha1 + 0*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 2), *(alpha1 + 2*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 3), *(alpha1 + 3*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 4), *(alpha1 + 4*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 5), *(alpha1 + 5*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 6), *(alpha1 + 6*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 7), *(alpha1 + 7*inca) ); \ \ pi1 += ldp; \ alpha1 += lda; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conjp ) ) \ { \ for ( ; n != 0; --n ) \ { \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \ \ pi1 += ldp; \ alpha1 += lda; \ } \ } \ else \ { \ for ( ; n != 0; --n ) \ { \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \ \ pi1 += ldp; \ alpha1 += lda; \ } \ } \ } \ } INSERT_GENTFUNC_BASIC2( unpackm_8xk, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conjp, \ dim_t n, \ void* restrict kappa, \ void* restrict p, inc_t ldp, \ void* restrict a, inc_t inca, inc_t lda, \ cntx_t* restrict cntx \ ) \ { \ ctype* restrict kappa_cast = kappa; \ ctype* restrict pi1 = p; \ ctype* restrict alpha1 = a; \ \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conjp ) ) \ { \ for ( ; n != 0; --n ) \ { \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 0), *(alpha1 + 0*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 2), *(alpha1 + 2*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 3), *(alpha1 + 3*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 4), *(alpha1 + 4*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 5), *(alpha1 + 5*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 6), *(alpha1 + 6*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 7), *(alpha1 + 7*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 8), *(alpha1 + 8*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 9), *(alpha1 + 9*inca) ); \ \ pi1 += ldp; \ alpha1 += lda; \ } \ } \ else \ { \ for ( ; n != 0; --n ) \ { \ PASTEMAC2(ch,ch,copys)( *(pi1 + 0), *(alpha1 + 0*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 2), *(alpha1 + 2*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 3), *(alpha1 + 3*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 4), *(alpha1 + 4*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 5), *(alpha1 + 5*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 6), *(alpha1 + 6*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 7), *(alpha1 + 7*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 8), *(alpha1 + 8*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 9), *(alpha1 + 9*inca) ); \ \ pi1 += ldp; \ alpha1 += lda; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conjp ) ) \ { \ for ( ; n != 0; --n ) \ { \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \ \ pi1 += ldp; \ alpha1 += lda; \ } \ } \ else \ { \ for ( ; n != 0; --n ) \ { \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \ \ pi1 += ldp; \ alpha1 += lda; \ } \ } \ } \ } INSERT_GENTFUNC_BASIC2( unpackm_10xk, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conjp, \ dim_t n, \ void* restrict kappa, \ void* restrict p, inc_t ldp, \ void* restrict a, inc_t inca, inc_t lda, \ cntx_t* restrict cntx \ ) \ { \ ctype* restrict kappa_cast = kappa; \ ctype* restrict pi1 = p; \ ctype* restrict alpha1 = a; \ \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conjp ) ) \ { \ for ( ; n != 0; --n ) \ { \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 0), *(alpha1 + 0*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 2), *(alpha1 + 2*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 3), *(alpha1 + 3*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 4), *(alpha1 + 4*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 5), *(alpha1 + 5*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 6), *(alpha1 + 6*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 7), *(alpha1 + 7*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 8), *(alpha1 + 8*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 9), *(alpha1 + 9*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 10), *(alpha1 + 10*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 11), *(alpha1 + 11*inca) ); \ \ pi1 += ldp; \ alpha1 += lda; \ } \ } \ else \ { \ for ( ; n != 0; --n ) \ { \ PASTEMAC2(ch,ch,copys)( *(pi1 + 0), *(alpha1 + 0*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 2), *(alpha1 + 2*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 3), *(alpha1 + 3*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 4), *(alpha1 + 4*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 5), *(alpha1 + 5*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 6), *(alpha1 + 6*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 7), *(alpha1 + 7*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 8), *(alpha1 + 8*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 9), *(alpha1 + 9*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 10), *(alpha1 + 10*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 11), *(alpha1 + 11*inca) ); \ \ pi1 += ldp; \ alpha1 += lda; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conjp ) ) \ { \ for ( ; n != 0; --n ) \ { \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \ \ pi1 += ldp; \ alpha1 += lda; \ } \ } \ else \ { \ for ( ; n != 0; --n ) \ { \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \ \ pi1 += ldp; \ alpha1 += lda; \ } \ } \ } \ } INSERT_GENTFUNC_BASIC2( unpackm_12xk, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conjp, \ dim_t n, \ void* restrict kappa, \ void* restrict p, inc_t ldp, \ void* restrict a, inc_t inca, inc_t lda, \ cntx_t* restrict cntx \ ) \ { \ ctype* restrict kappa_cast = kappa; \ ctype* restrict pi1 = p; \ ctype* restrict alpha1 = a; \ \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conjp ) ) \ { \ for ( ; n != 0; --n ) \ { \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 0), *(alpha1 + 0*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 2), *(alpha1 + 2*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 3), *(alpha1 + 3*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 4), *(alpha1 + 4*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 5), *(alpha1 + 5*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 6), *(alpha1 + 6*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 7), *(alpha1 + 7*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 8), *(alpha1 + 8*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 9), *(alpha1 + 9*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 10), *(alpha1 + 10*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 11), *(alpha1 + 11*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 12), *(alpha1 + 12*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 13), *(alpha1 + 13*inca) ); \ \ pi1 += ldp; \ alpha1 += lda; \ } \ } \ else \ { \ for ( ; n != 0; --n ) \ { \ PASTEMAC2(ch,ch,copys)( *(pi1 + 0), *(alpha1 + 0*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 2), *(alpha1 + 2*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 3), *(alpha1 + 3*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 4), *(alpha1 + 4*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 5), *(alpha1 + 5*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 6), *(alpha1 + 6*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 7), *(alpha1 + 7*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 8), *(alpha1 + 8*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 9), *(alpha1 + 9*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 10), *(alpha1 + 10*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 11), *(alpha1 + 11*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 12), *(alpha1 + 12*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 13), *(alpha1 + 13*inca) ); \ \ pi1 += ldp; \ alpha1 += lda; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conjp ) ) \ { \ for ( ; n != 0; --n ) \ { \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 12), *(alpha1 + 12*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 13), *(alpha1 + 13*inca) ); \ \ pi1 += ldp; \ alpha1 += lda; \ } \ } \ else \ { \ for ( ; n != 0; --n ) \ { \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 12), *(alpha1 + 12*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 13), *(alpha1 + 13*inca) ); \ \ pi1 += ldp; \ alpha1 += lda; \ } \ } \ } \ } INSERT_GENTFUNC_BASIC2( unpackm_14xk, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conjp, \ dim_t n, \ void* restrict kappa, \ void* restrict p, inc_t ldp, \ void* restrict a, inc_t inca, inc_t lda, \ cntx_t* restrict cntx \ ) \ { \ ctype* restrict kappa_cast = kappa; \ ctype* restrict pi1 = p; \ ctype* restrict alpha1 = a; \ \ if ( PASTEMAC(ch,eq1)( *kappa_cast ) ) \ { \ if ( bli_is_conj( conjp ) ) \ { \ for ( ; n != 0; --n ) \ { \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 0), *(alpha1 + 0*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 2), *(alpha1 + 2*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 3), *(alpha1 + 3*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 4), *(alpha1 + 4*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 5), *(alpha1 + 5*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 6), *(alpha1 + 6*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 7), *(alpha1 + 7*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 8), *(alpha1 + 8*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 9), *(alpha1 + 9*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 10), *(alpha1 + 10*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 11), *(alpha1 + 11*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 12), *(alpha1 + 12*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 13), *(alpha1 + 13*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 14), *(alpha1 + 14*inca) ); \ PASTEMAC2(ch,ch,copyjs)( *(pi1 + 15), *(alpha1 + 15*inca) ); \ \ pi1 += ldp; \ alpha1 += lda; \ } \ } \ else \ { \ for ( ; n != 0; --n ) \ { \ PASTEMAC2(ch,ch,copys)( *(pi1 + 0), *(alpha1 + 0*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 2), *(alpha1 + 2*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 3), *(alpha1 + 3*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 4), *(alpha1 + 4*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 5), *(alpha1 + 5*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 6), *(alpha1 + 6*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 7), *(alpha1 + 7*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 8), *(alpha1 + 8*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 9), *(alpha1 + 9*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 10), *(alpha1 + 10*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 11), *(alpha1 + 11*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 12), *(alpha1 + 12*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 13), *(alpha1 + 13*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 14), *(alpha1 + 14*inca) ); \ PASTEMAC2(ch,ch,copys)( *(pi1 + 15), *(alpha1 + 15*inca) ); \ \ pi1 += ldp; \ alpha1 += lda; \ } \ } \ } \ else \ { \ if ( bli_is_conj( conjp ) ) \ { \ for ( ; n != 0; --n ) \ { \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 12), *(alpha1 + 12*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 13), *(alpha1 + 13*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 14), *(alpha1 + 14*inca) ); \ PASTEMAC3(ch,ch,ch,scal2js)( *kappa_cast, *(pi1 + 15), *(alpha1 + 15*inca) ); \ \ pi1 += ldp; \ alpha1 += lda; \ } \ } \ else \ { \ for ( ; n != 0; --n ) \ { \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 12), *(alpha1 + 12*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 13), *(alpha1 + 13*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 14), *(alpha1 + 14*inca) ); \ PASTEMAC3(ch,ch,ch,scal2s)( *kappa_cast, *(pi1 + 15), *(alpha1 + 15*inca) ); \ \ pi1 += ldp; \ alpha1 += lda; \ } \ } \ } \ } INSERT_GENTFUNC_BASIC2( unpackm_16xk, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) blis-0.6.1/ref_kernels/3/000077500000000000000000000000001360743507500151225ustar00rootroot00000000000000blis-0.6.1/ref_kernels/3/bb/000077500000000000000000000000001360743507500155055ustar00rootroot00000000000000blis-0.6.1/ref_kernels/3/bb/bli_gemmbb_ref.c000066400000000000000000000106671360743507500205760ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // An implementation that indexes through B with the assumption that all // elements were broadcast (duplicated) by a factor of NP/NR. #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ dim_t k, \ ctype* restrict alpha, \ ctype* restrict a, \ ctype* restrict b, \ ctype* restrict beta, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict data, \ cntx_t* restrict cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ const dim_t mr = bli_cntx_get_blksz_def_dt( dt, BLIS_MR, cntx ); \ const dim_t nr = bli_cntx_get_blksz_def_dt( dt, BLIS_NR, cntx ); \ \ const inc_t packmr = bli_cntx_get_blksz_max_dt( dt, BLIS_MR, cntx ); \ const inc_t packnr = bli_cntx_get_blksz_max_dt( dt, BLIS_NR, cntx ); \ \ const dim_t m = mr; \ const dim_t n = nr; \ \ const inc_t cs_a = packmr; \ \ const inc_t rs_b = packnr; \ \ /* Assume that the degree of duplication is equal to packnr / nr. */ \ const inc_t cs_b = packnr / nr; \ \ ctype ab[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const inc_t rs_ab = 1; \ const inc_t cs_ab = mr; \ \ dim_t l, j, i; \ \ ctype ai; \ ctype bj; \ \ \ /* Initialize the accumulator elements in ab to zero. */ \ for ( i = 0; i < m * n; ++i ) \ { \ PASTEMAC(ch,set0s)( *(ab + i) ); \ } \ \ /* Perform a series of k rank-1 updates into ab. */ \ for ( l = 0; l < k; ++l ) \ { \ ctype* restrict abij = ab; \ \ /* In an optimized implementation, these two loops over MR and NR are typically fully unrolled. */ \ for ( j = 0; j < n; ++j ) \ { \ bj = *(b + j*cs_b); \ \ for ( i = 0; i < m; ++i ) \ { \ ai = *(a + i); \ \ PASTEMAC(ch,dots)( ai, bj, *abij ); \ \ abij += rs_ab; \ } \ } \ \ a += cs_a; \ b += rs_b; \ } \ \ /* Scale the result in ab by alpha. */ \ for ( i = 0; i < m * n; ++i ) \ { \ PASTEMAC(ch,scals)( *alpha, *(ab + i) ); \ } \ \ /* If beta is zero, overwrite c with the scaled result in ab. Otherwise, scale by beta and then add the scaled redult in ab. */ \ if ( PASTEMAC(ch,eq0)( *beta ) ) \ { \ PASTEMAC(ch,copys_mxn)( m, \ n, \ ab, rs_ab, cs_ab, \ c, rs_c, cs_c ); \ } \ else \ { \ PASTEMAC(ch,xpbys_mxn)( m, \ n, \ ab, rs_ab, cs_ab, \ beta, \ c, rs_c, cs_c ); \ } \ } INSERT_GENTFUNC_BASIC2( gemmbb, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) blis-0.6.1/ref_kernels/3/bb/bli_gemmtrsmbb_ref.c000066400000000000000000000111531360743507500214730ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // An implementation that indexes through B with the assumption that all // elements were broadcast (duplicated) by a factor of NP/NR. #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, arch, suf, trsmkerid ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ dim_t k, \ ctype* restrict alpha, \ ctype* restrict a1x, \ ctype* restrict a11, \ ctype* restrict bx1, \ ctype* restrict b11, \ ctype* restrict c11, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict data, \ cntx_t* restrict cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ const inc_t mr = bli_cntx_get_blksz_def_dt( dt, BLIS_MR, cntx ); \ const inc_t nr = bli_cntx_get_blksz_def_dt( dt, BLIS_NR, cntx ); \ \ const inc_t packnr = bli_cntx_get_blksz_max_dt( dt, BLIS_NR, cntx ); \ \ const inc_t rs_b = packnr; \ \ /* Assume that the degree of duplication is equal to packnr / nr. */ \ const inc_t cs_b = packnr / nr; \ /* printf( "bli_gemmtrsmbb_ref(): cs_b = %d\n", (int)cs_b ); \ printf( "bli_gemmtrsmbb_ref(): k nr = %d %d\n", (int)k, (int)nr ); \ */ \ \ ctype* minus_one = PASTEMAC(ch,m1); \ \ PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_l3_nat_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ PASTECH(ch,trsm_ukr_ft) \ trsm_ukr = bli_cntx_get_l3_nat_ukr_dt( dt, trsmkerid, cntx ); \ \ /* PASTEMAC(d,fprintm)( stdout, "gemmtrsm_ukr: b01", k, nr, \ (double*)bx1, rs_b, cs_b, "%5.2f", "" ); \ PASTEMAC(d,fprintm)( stdout, "gemmtrsm_ukr: b11", mr, 2*nr, \ (double*)b11, rs_b, 1, "%5.2f", "" ); \ */ \ \ /* lower: b11 = alpha * b11 - a10 * b01; */ \ /* upper: b11 = alpha * b11 - a12 * b21; */ \ gemm_ukr \ ( \ k, \ minus_one, \ a1x, \ bx1, \ alpha, \ b11, rs_b, cs_b, \ data, \ cntx \ ); \ /* PASTEMAC(d,fprintm)( stdout, "gemmtrsm_ukr: b11 after gemm", mr, 2*nr, \ (double*)b11, rs_b, 1, "%5.2f", "" ); \ */ \ \ /* b11 = inv(a11) * b11; c11 = b11; */ \ trsm_ukr \ ( \ a11, \ b11, \ c11, rs_c, cs_c, \ data, \ cntx \ ); \ /* PASTEMAC(d,fprintm)( stdout, "gemmtrsm_ukr: b11 after trsm", mr, 2*nr, \ (double*)b11, rs_b, 1, "%5.2f", "" ); \ */ \ \ /* Broadcast the elements of the updated b11 submatrix to their duplicated neighbors. */ \ PASTEMAC(ch,bcastbbs_mxn) \ ( \ mr, \ nr, \ b11, rs_b, cs_b \ ); \ \ /* PASTEMAC(d,fprintm)( stdout, "gemmtrsm_ukr: b0111p_r after", k+3, 8, \ ( double* )b01, 2*PASTEMAC(ch,packnr), 2, "%4.1f", "" ); \ PASTEMAC(d,fprintm)( stdout, "gemmtrsm_ukr: b0111p_i after", k+3, 8, \ ( double* )b01 + 1, 2*PASTEMAC(ch,packnr), 2, "%4.1f", "" ); \ */ \ } INSERT_GENTFUNC_BASIC3( gemmtrsmbb_l, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX, BLIS_TRSM_L_UKR ) INSERT_GENTFUNC_BASIC3( gemmtrsmbb_u, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX, BLIS_TRSM_U_UKR ) blis-0.6.1/ref_kernels/3/bb/bli_trsmbb_ref.c000066400000000000000000000157201360743507500206310ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // An implementation that indexes through B with the assumption that all // elements were broadcast (duplicated) by a factor of NP/NR. #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ ctype* restrict a, \ ctype* restrict b, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict data, \ cntx_t* restrict cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ const dim_t mr = bli_cntx_get_blksz_def_dt( dt, BLIS_MR, cntx ); \ const dim_t nr = bli_cntx_get_blksz_def_dt( dt, BLIS_NR, cntx ); \ \ const inc_t packmr = bli_cntx_get_blksz_max_dt( dt, BLIS_MR, cntx ); \ const inc_t packnr = bli_cntx_get_blksz_max_dt( dt, BLIS_NR, cntx ); \ \ const dim_t m = mr; \ const dim_t n = nr; \ \ const inc_t rs_a = 1; \ const inc_t cs_a = packmr; \ \ const inc_t rs_b = packnr; \ \ /* Assume that the degree of duplication is equal to packnr / nr. */ \ const inc_t cs_b = packnr / nr; \ \ dim_t iter, i, j, l; \ dim_t n_behind; \ \ for ( iter = 0; iter < m; ++iter ) \ { \ i = iter; \ n_behind = i; \ \ ctype* restrict alpha11 = a + (i )*rs_a + (i )*cs_a; \ ctype* restrict a10t = a + (i )*rs_a + (0 )*cs_a; \ ctype* restrict B0 = b + (0 )*rs_b + (0 )*cs_b; \ ctype* restrict b1 = b + (i )*rs_b + (0 )*cs_b; \ \ /* b1 = b1 - a10t * B0; */ \ /* b1 = b1 / alpha11; */ \ for ( j = 0; j < n; ++j ) \ { \ ctype* restrict b01 = B0 + (0 )*rs_b + (j )*cs_b; \ ctype* restrict beta11 = b1 + (0 )*rs_b + (j )*cs_b; \ ctype* restrict gamma11 = c + (i )*rs_c + (j )*cs_c; \ ctype beta11c = *beta11; \ ctype rho11; \ \ /* beta11 = beta11 - a10t * b01; */ \ PASTEMAC(ch,set0s)( rho11 ); \ for ( l = 0; l < n_behind; ++l ) \ { \ ctype* restrict alpha10 = a10t + (l )*cs_a; \ ctype* restrict beta01 = b01 + (l )*rs_b; \ \ PASTEMAC(ch,axpys)( *alpha10, *beta01, rho11 ); \ } \ PASTEMAC(ch,subs)( rho11, beta11c ); \ \ /* beta11 = beta11 / alpha11; */ \ /* NOTE: The INVERSE of alpha11 (1.0/alpha11) is stored instead of alpha11, so we can multiply rather than divide. We store the inverse of alpha11 intentionally to avoid expensive division instructions within the micro-kernel. */ \ PASTEMAC(ch,scals)( *alpha11, beta11c ); \ \ /* Output final result to matrix c. */ \ PASTEMAC(ch,copys)( beta11c, *gamma11 ); \ \ /* Store the local value back to b11. */ \ PASTEMAC(ch,copys)( beta11c, *beta11 ); \ } \ } \ } INSERT_GENTFUNC_BASIC2( trsmbb_l, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ ctype* restrict a, \ ctype* restrict b, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict data, \ cntx_t* restrict cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ const dim_t mr = bli_cntx_get_blksz_def_dt( dt, BLIS_MR, cntx ); \ const dim_t nr = bli_cntx_get_blksz_def_dt( dt, BLIS_NR, cntx ); \ \ const inc_t packmr = bli_cntx_get_blksz_max_dt( dt, BLIS_MR, cntx ); \ const inc_t packnr = bli_cntx_get_blksz_max_dt( dt, BLIS_NR, cntx ); \ \ const dim_t m = mr; \ const dim_t n = nr; \ \ const inc_t rs_a = 1; \ const inc_t cs_a = packmr; \ \ const inc_t rs_b = packnr; \ \ /* Assume that the degree of duplication is equal to packnr / nr. */ \ const inc_t cs_b = packnr / nr; \ \ dim_t iter, i, j, l; \ dim_t n_behind; \ \ for ( iter = 0; iter < m; ++iter ) \ { \ i = m - iter - 1; \ n_behind = iter; \ \ ctype* restrict alpha11 = a + (i )*rs_a + (i )*cs_a; \ ctype* restrict a12t = a + (i )*rs_a + (i+1)*cs_a; \ ctype* restrict b1 = b + (i )*rs_b + (0 )*cs_b; \ ctype* restrict B2 = b + (i+1)*rs_b + (0 )*cs_b; \ \ /* b1 = b1 - a12t * B2; */ \ /* b1 = b1 / alpha11; */ \ for ( j = 0; j < n; ++j ) \ { \ ctype* restrict beta11 = b1 + (0 )*rs_b + (j )*cs_b; \ ctype* restrict b21 = B2 + (0 )*rs_b + (j )*cs_b; \ ctype* restrict gamma11 = c + (i )*rs_c + (j )*cs_c; \ ctype beta11c = *beta11; \ ctype rho11; \ \ /* beta11 = beta11 - a12t * b21; */ \ PASTEMAC(ch,set0s)( rho11 ); \ for ( l = 0; l < n_behind; ++l ) \ { \ ctype* restrict alpha12 = a12t + (l )*cs_a; \ ctype* restrict beta21 = b21 + (l )*rs_b; \ \ PASTEMAC(ch,axpys)( *alpha12, *beta21, rho11 ); \ } \ PASTEMAC(ch,subs)( rho11, beta11c ); \ \ /* beta11 = beta11 / alpha11; */ \ /* NOTE: The INVERSE of alpha11 (1.0/alpha11) is stored instead of alpha11, so we can multiply rather than divide. We store the inverse of alpha11 intentionally to avoid expensive division instructions within the micro-kernel. */ \ PASTEMAC(ch,scals)( *alpha11, beta11c ); \ \ /* Output final result to matrix c. */ \ PASTEMAC(ch,copys)( beta11c, *gamma11 ); \ \ /* Store the local value back to b11. */ \ PASTEMAC(ch,copys)( beta11c, *beta11 ); \ } \ } \ } INSERT_GENTFUNC_BASIC2( trsmbb_u, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) blis-0.6.1/ref_kernels/3/bli_gemm_ref.c000066400000000000000000000164171360743507500177060ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #if 1 // An implementation that attempts to facilitate emission of vectorized // instructions via constant loop bounds + #pragma omp simd directives. #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, arch, suf, mr, nr ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ dim_t k, \ ctype* restrict alpha, \ ctype* restrict a, \ ctype* restrict b, \ ctype* restrict beta, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict data, \ cntx_t* restrict cntx \ ) \ { \ ctype ab[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const inc_t rs_ab = nr; \ const inc_t cs_ab = 1; \ \ const inc_t cs_a = mr; \ const inc_t rs_b = nr; \ \ \ /* Initialize the accumulator elements in ab to zero. */ \ PRAGMA_SIMD \ for ( dim_t i = 0; i < mr * nr; ++i ) \ { \ PASTEMAC(ch,set0s)( ab[ i ] ); \ } \ \ /* Perform a series of k rank-1 updates into ab. */ \ for ( dim_t l = 0; l < k; ++l ) \ { \ for ( dim_t i = 0; i < mr; ++i ) \ { \ PRAGMA_SIMD \ for ( dim_t j = 0; j < nr; ++j ) \ { \ PASTEMAC(ch,dots) \ ( \ a[ i ], \ b[ j ], \ ab[ i*rs_ab + j*cs_ab ] \ ); \ } \ } \ \ a += cs_a; \ b += rs_b; \ } \ \ /* Scale the result in ab by alpha. */ \ PRAGMA_SIMD \ for ( dim_t i = 0; i < mr * nr; ++i ) \ { \ PASTEMAC(ch,scals)( *alpha, ab[ i ] ); \ } \ \ /* Output/accumulate intermediate result ab based on the storage of c and the value of beta. */ \ if ( cs_c == 1 ) \ { \ /* C is row-stored. */ \ \ if ( PASTEMAC(ch,eq0)( *beta ) ) \ { \ for ( dim_t i = 0; i < mr; ++i ) \ for ( dim_t j = 0; j < nr; ++j ) \ PASTEMAC(ch,copys) \ ( \ ab[ i*rs_ab + j*cs_ab ], \ c [ i*rs_c + j*1 ] \ ); \ } \ else \ { \ for ( dim_t i = 0; i < mr; ++i ) \ for ( dim_t j = 0; j < nr; ++j ) \ PASTEMAC(ch,xpbys) \ ( \ ab[ i*rs_ab + j*cs_ab ], \ *beta, \ c [ i*rs_c + j*1 ] \ ); \ } \ } \ else \ { \ /* C is column-stored or general-stored. */ \ \ if ( PASTEMAC(ch,eq0)( *beta ) ) \ { \ for ( dim_t j = 0; j < nr; ++j ) \ for ( dim_t i = 0; i < mr; ++i ) \ PASTEMAC(ch,copys) \ ( \ ab[ i*rs_ab + j*cs_ab ], \ c [ i*rs_c + j*cs_c ] \ ); \ } \ else \ { \ for ( dim_t j = 0; j < nr; ++j ) \ for ( dim_t i = 0; i < mr; ++i ) \ PASTEMAC(ch,xpbys) \ ( \ ab[ i*rs_ab + j*cs_ab ], \ *beta, \ c [ i*rs_c + j*cs_c ] \ ); \ } \ } \ } //INSERT_GENTFUNC_BASIC2( gemm, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) GENTFUNC( float, s, gemm, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX, 4, 16 ) GENTFUNC( double, d, gemm, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX, 4, 8 ) GENTFUNC( scomplex, c, gemm, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX, 4, 8 ) GENTFUNC( dcomplex, z, gemm, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX, 4, 4 ) #else // An implementation that uses variable loop bounds (queried from the context) // and makes no use of #pragma omp simd. #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ dim_t k, \ ctype* restrict alpha, \ ctype* restrict a, \ ctype* restrict b, \ ctype* restrict beta, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict data, \ cntx_t* restrict cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ const dim_t mr = bli_cntx_get_blksz_def_dt( dt, BLIS_MR, cntx ); \ const dim_t nr = bli_cntx_get_blksz_def_dt( dt, BLIS_NR, cntx ); \ \ const inc_t packmr = bli_cntx_get_blksz_max_dt( dt, BLIS_MR, cntx ); \ const inc_t packnr = bli_cntx_get_blksz_max_dt( dt, BLIS_NR, cntx ); \ \ const dim_t m = mr; \ const dim_t n = nr; \ \ const inc_t cs_a = packmr; \ \ const inc_t rs_b = packnr; \ \ ctype ab[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const inc_t rs_ab = 1; \ const inc_t cs_ab = mr; \ \ dim_t l, j, i; \ \ ctype ai; \ ctype bj; \ \ \ /* Initialize the accumulator elements in ab to zero. */ \ for ( i = 0; i < m * n; ++i ) \ { \ PASTEMAC(ch,set0s)( *(ab + i) ); \ } \ \ /* Perform a series of k rank-1 updates into ab. */ \ for ( l = 0; l < k; ++l ) \ { \ ctype* restrict abij = ab; \ \ /* In an optimized implementation, these two loops over MR and NR are typically fully unrolled. */ \ for ( j = 0; j < n; ++j ) \ { \ bj = *(b + j); \ \ for ( i = 0; i < m; ++i ) \ { \ ai = *(a + i); \ \ PASTEMAC(ch,dots)( ai, bj, *abij ); \ \ abij += rs_ab; \ } \ } \ \ a += cs_a; \ b += rs_b; \ } \ \ /* Scale the result in ab by alpha. */ \ for ( i = 0; i < m * n; ++i ) \ { \ PASTEMAC(ch,scals)( *alpha, *(ab + i) ); \ } \ \ /* If beta is zero, overwrite c with the scaled result in ab. Otherwise, scale by beta and then add the scaled redult in ab. */ \ if ( PASTEMAC(ch,eq0)( *beta ) ) \ { \ PASTEMAC(ch,copys_mxn)( m, \ n, \ ab, rs_ab, cs_ab, \ c, rs_c, cs_c ); \ } \ else \ { \ PASTEMAC(ch,xpbys_mxn)( m, \ n, \ ab, rs_ab, cs_ab, \ beta, \ c, rs_c, cs_c ); \ } \ } INSERT_GENTFUNC_BASIC2( gemm, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #endif blis-0.6.1/ref_kernels/3/bli_gemmsup_ref.c000066400000000000000000000362621360743507500204360ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // -- Row storage case --------------------------------------------------------- // #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conja, \ conj_t conjb, \ dim_t m, \ dim_t n, \ dim_t k, \ ctype* restrict alpha, \ ctype* restrict a, inc_t rs_a, inc_t cs_a, \ ctype* restrict b, inc_t rs_b, inc_t cs_b, \ ctype* restrict beta, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict data, \ cntx_t* restrict cntx \ ) \ { \ /* NOTE: This microkernel can actually handle arbitrarily large values of m, n, and k. */ \ \ /* Traverse c by rows. */ \ for ( dim_t i = 0; i < m; ++i ) \ { \ ctype* restrict ci = &c[ i*rs_c ]; \ ctype* restrict ai = &a[ i*rs_a ]; \ \ for ( dim_t j = 0; j < n; ++j ) \ { \ ctype* restrict cij = &ci[ j*cs_c ]; \ ctype* restrict bj = &b [ j*cs_b ]; \ ctype ab; \ \ PASTEMAC(ch,set0s)( ab ); \ \ /* Perform a dot product to update the (i,j) element of c. */ \ for ( dim_t l = 0; l < k; ++l ) \ { \ ctype* restrict aij = &ai[ l*cs_a ]; \ ctype* restrict bij = &bj[ l*rs_b ]; \ \ PASTEMAC(ch,dots)( *aij, *bij, ab ); \ } \ \ /* If beta is one, add ab into c. If beta is zero, overwrite c with the result in ab. Otherwise, scale by beta and accumulate ab to c. */ \ if ( PASTEMAC(ch,eq1)( *beta ) ) \ { \ PASTEMAC(ch,axpys)( *alpha, ab, *cij ); \ } \ else if ( PASTEMAC(ch,eq0)( *beta ) ) \ { \ PASTEMAC(ch,scal2s)( *alpha, ab, *cij ); \ } \ else \ { \ PASTEMAC(ch,axpbys)( *alpha, ab, *beta, *cij ); \ } \ } \ } \ } INSERT_GENTFUNC_BASIC2( gemmsup_r, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) // // -- Column storage case ------------------------------------------------------ // #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conja, \ conj_t conjb, \ dim_t m, \ dim_t n, \ dim_t k, \ ctype* restrict alpha, \ ctype* restrict a, inc_t rs_a, inc_t cs_a, \ ctype* restrict b, inc_t rs_b, inc_t cs_b, \ ctype* restrict beta, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict data, \ cntx_t* restrict cntx \ ) \ { \ /* NOTE: This microkernel can actually handle arbitrarily large values of m, n, and k. */ \ \ /* Traverse c by columns. */ \ for ( dim_t j = 0; j < n; ++j ) \ { \ ctype* restrict cj = &c[ j*cs_c ]; \ ctype* restrict bj = &b[ j*cs_b ]; \ \ for ( dim_t i = 0; i < m; ++i ) \ { \ ctype* restrict cij = &cj[ i*rs_c ]; \ ctype* restrict ai = &a [ i*rs_a ]; \ ctype ab; \ \ PASTEMAC(ch,set0s)( ab ); \ \ /* Perform a dot product to update the (i,j) element of c. */ \ for ( dim_t l = 0; l < k; ++l ) \ { \ ctype* restrict aij = &ai[ l*cs_a ]; \ ctype* restrict bij = &bj[ l*rs_b ]; \ \ PASTEMAC(ch,dots)( *aij, *bij, ab ); \ } \ \ /* If beta is one, add ab into c. If beta is zero, overwrite c with the result in ab. Otherwise, scale by beta and accumulate ab to c. */ \ if ( PASTEMAC(ch,eq1)( *beta ) ) \ { \ PASTEMAC(ch,axpys)( *alpha, ab, *cij ); \ } \ else if ( PASTEMAC(ch,eq0)( *beta ) ) \ { \ PASTEMAC(ch,scal2s)( *alpha, ab, *cij ); \ } \ else \ { \ PASTEMAC(ch,axpbys)( *alpha, ab, *beta, *cij ); \ } \ } \ } \ } INSERT_GENTFUNC_BASIC2( gemmsup_c, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) // // -- General storage case ----------------------------------------------------- // INSERT_GENTFUNC_BASIC2( gemmsup_g, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #if 0 // // -- Row storage case --------------------------------------------------------- // #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conja, \ conj_t conjb, \ dim_t m, \ dim_t n, \ dim_t k, \ ctype* restrict alpha, \ ctype* restrict a, inc_t rs_a, inc_t cs_a, \ ctype* restrict b, inc_t rs_b, inc_t cs_b, \ ctype* restrict beta, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict data, \ cntx_t* restrict cntx \ ) \ { \ const dim_t mn = m * n; \ \ ctype ab[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const inc_t rs_ab = n; \ const inc_t cs_ab = 1; \ \ \ /* Assumptions: m <= mr, n <= nr so that the temporary array ab is sufficiently large enough to hold the m x n microtile. The ability to handle m < mr and n < nr is being provided so that optimized ukernels can call one of these reference implementations for their edge cases, if they choose. When they do so, they will need to call the function directly, by its configuration-mangled name, since it will have been overwritten in the context when the optimized ukernel functions are registered. */ \ \ \ /* Initialize the accumulator elements in ab to zero. */ \ for ( dim_t i = 0; i < mn; ++i ) \ { \ PASTEMAC(ch,set0s)( ab[i] ); \ } \ \ /* Perform a series of k rank-1 updates into ab. */ \ for ( dim_t l = 0; l < k; ++l ) \ { \ /* Traverse ab by rows; assume cs_ab = 1. */ \ for ( dim_t i = 0; i < m; ++i ) \ { \ for ( dim_t j = 0; j < n; ++j ) \ { \ PASTEMAC(ch,dots) \ ( \ a[ i*rs_a ], \ b[ j*cs_b ], \ ab[ i*rs_ab + j*cs_ab ] \ ); \ } \ } \ \ a += cs_a; \ b += rs_b; \ } \ \ /* Scale the result in ab by alpha. */ \ for ( dim_t i = 0; i < mn; ++i ) \ { \ PASTEMAC(ch,scals)( *alpha, ab[i] ); \ } \ \ \ /* If beta is one, add ab into c. If beta is zero, overwrite c with the result in ab. Otherwise, scale by beta and accumulate ab to c. */ \ if ( PASTEMAC(ch,eq1)( *beta ) ) \ { \ /* Traverse ab and c by rows; assume cs_a = cs_a = 1. */ \ for ( dim_t i = 0; i < m; ++i ) \ for ( dim_t j = 0; j < n; ++j ) \ { \ PASTEMAC(ch,adds) \ ( \ ab[ i*rs_ab + j*1 ], \ c[ i*rs_c + j*1 ] \ ) \ } \ } \ else if ( PASTEMAC(ch,eq0)( *beta ) ) \ { \ \ /* Traverse ab and c by rows; assume cs_a = cs_a = 1. */ \ for ( dim_t i = 0; i < m; ++i ) \ for ( dim_t j = 0; j < n; ++j ) \ { \ PASTEMAC(ch,copys) \ ( \ ab[ i*rs_ab + j*1 ], \ c[ i*rs_c + j*1 ] \ ) \ } \ } \ else /* beta != 0 && beta != 1 */ \ { \ /* Traverse ab and c by rows; assume cs_a = cs_a = 1. */ \ for ( dim_t i = 0; i < m; ++i ) \ for ( dim_t j = 0; j < n; ++j ) \ { \ PASTEMAC(ch,xpbys) \ ( \ ab[ i*rs_ab + j*1 ], \ *beta, \ c[ i*rs_c + j*1 ] \ ) \ } \ } \ } INSERT_GENTFUNC_BASIC2( gemmsup_r, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) // // -- Column storage case ------------------------------------------------------ // #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conja, \ conj_t conjb, \ dim_t m, \ dim_t n, \ dim_t k, \ ctype* restrict alpha, \ ctype* restrict a, inc_t rs_a, inc_t cs_a, \ ctype* restrict b, inc_t rs_b, inc_t cs_b, \ ctype* restrict beta, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict data, \ cntx_t* restrict cntx \ ) \ { \ const dim_t mn = m * n; \ \ ctype ab[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const inc_t rs_ab = 1; \ const inc_t cs_ab = m; \ \ \ /* Assumptions: m <= mr, n <= nr so that the temporary array ab is sufficiently large enough to hold the m x n microtile. The ability to handle m < mr and n < nr is being provided so that optimized ukernels can call one of these reference implementations for their edge cases, if they choose. When they do so, they will need to call the function directly, by its configuration-mangled name, since it will have been overwritten in the context when the optimized ukernel functions are registered. */ \ \ \ /* Initialize the accumulator elements in ab to zero. */ \ for ( dim_t i = 0; i < mn; ++i ) \ { \ PASTEMAC(ch,set0s)( ab[i] ); \ } \ \ /* Perform a series of k rank-1 updates into ab. */ \ for ( dim_t l = 0; l < k; ++l ) \ { \ /* Traverse ab by columns; assume rs_ab = 1. */ \ for ( dim_t j = 0; j < n; ++j ) \ { \ for ( dim_t i = 0; i < m; ++i ) \ { \ PASTEMAC(ch,dots) \ ( \ a[ i*rs_a ], \ b[ j*cs_b ], \ ab[ i*rs_ab + j*cs_ab ] \ ); \ } \ } \ \ a += cs_a; \ b += rs_b; \ } \ \ /* Scale the result in ab by alpha. */ \ for ( dim_t i = 0; i < mn; ++i ) \ { \ PASTEMAC(ch,scals)( *alpha, ab[i] ); \ } \ \ \ /* If beta is one, add ab into c. If beta is zero, overwrite c with the result in ab. Otherwise, scale by beta and accumulate ab to c. */ \ if ( PASTEMAC(ch,eq1)( *beta ) ) \ { \ /* Traverse ab and c by columns; assume rs_a = rs_a = 1. */ \ for ( dim_t j = 0; j < n; ++j ) \ for ( dim_t i = 0; i < m; ++i ) \ { \ PASTEMAC(ch,adds) \ ( \ ab[ i*1 + j*cs_ab ], \ c[ i*1 + j*cs_c ] \ ) \ } \ } \ else if ( PASTEMAC(ch,eq0)( *beta ) ) \ { \ /* Traverse ab and c by columns; assume rs_a = rs_a = 1. */ \ for ( dim_t j = 0; j < n; ++j ) \ for ( dim_t i = 0; i < m; ++i ) \ { \ PASTEMAC(ch,copys) \ ( \ ab[ i*1 + j*cs_ab ], \ c[ i*1 + j*cs_c ] \ ) \ } \ } \ else /* beta != 0 && beta != 1 */ \ { \ /* Traverse ab and c by columns; assume rs_a = rs_a = 1. */ \ for ( dim_t j = 0; j < n; ++j ) \ for ( dim_t i = 0; i < m; ++i ) \ { \ PASTEMAC(ch,xpbys) \ ( \ ab[ i*1 + j*cs_ab ], \ *beta, \ c[ i*1 + j*cs_c ] \ ) \ } \ } \ } INSERT_GENTFUNC_BASIC2( gemmsup_c, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) // // -- General storage case ----------------------------------------------------- // #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ conj_t conja, \ conj_t conjb, \ dim_t m, \ dim_t n, \ dim_t k, \ ctype* restrict alpha, \ ctype* restrict a, inc_t rs_a, inc_t cs_a, \ ctype* restrict b, inc_t rs_b, inc_t cs_b, \ ctype* restrict beta, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict data, \ cntx_t* restrict cntx \ ) \ { \ const dim_t mn = m * n; \ \ ctype ab[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const inc_t rs_ab = 1; \ const inc_t cs_ab = m; \ \ \ /* Assumptions: m <= mr, n <= nr so that the temporary array ab is sufficiently large enough to hold the m x n microtile. The ability to handle m < mr and n < nr is being provided so that optimized ukernels can call one of these reference implementations for their edge cases, if they choose. When they do so, they will need to call the function directly, by its configuration-mangled name, since it will have been overwritten in the context when the optimized ukernel functions are registered. */ \ \ \ /* Initialize the accumulator elements in ab to zero. */ \ for ( dim_t i = 0; i < mn; ++i ) \ { \ PASTEMAC(ch,set0s)( ab[i] ); \ } \ \ /* Perform a series of k rank-1 updates into ab. */ \ for ( dim_t l = 0; l < k; ++l ) \ { \ /* General storage: doesn't matter how we traverse ab. */ \ for ( dim_t j = 0; j < n; ++j ) \ { \ for ( dim_t i = 0; i < m; ++i ) \ { \ PASTEMAC(ch,dots) \ ( \ a[ i*rs_a ], \ b[ j*cs_b ], \ ab[ i*rs_ab + j*cs_ab ] \ ); \ } \ } \ \ a += cs_a; \ b += rs_b; \ } \ \ /* Scale the result in ab by alpha. */ \ for ( dim_t i = 0; i < mn; ++i ) \ { \ PASTEMAC(ch,scals)( *alpha, ab[i] ); \ } \ \ \ /* If beta is one, add ab into c. If beta is zero, overwrite c with the result in ab. Otherwise, scale by beta and accumulate ab to c. */ \ if ( PASTEMAC(ch,eq1)( *beta ) ) \ { \ /* General storage: doesn't matter how we traverse ab and c. */ \ for ( dim_t j = 0; j < n; ++j ) \ for ( dim_t i = 0; i < m; ++i ) \ { \ PASTEMAC(ch,adds) \ ( \ ab[ i*rs_ab + j*cs_ab ], \ c[ i*rs_c + j*cs_c ] \ ) \ } \ } \ else if ( PASTEMAC(ch,eq0)( *beta ) ) \ { \ /* General storage: doesn't matter how we traverse ab and c. */ \ for ( dim_t j = 0; j < n; ++j ) \ for ( dim_t i = 0; i < m; ++i ) \ { \ PASTEMAC(ch,copys) \ ( \ ab[ i*rs_ab + j*cs_ab ], \ c[ i*rs_c + j*cs_c ] \ ) \ } \ } \ else /* beta != 0 && beta != 1 */ \ { \ /* General storage: doesn't matter how we traverse ab and c. */ \ for ( dim_t j = 0; j < n; ++j ) \ for ( dim_t i = 0; i < m; ++i ) \ { \ PASTEMAC(ch,xpbys) \ ( \ ab[ i*rs_ab + j*cs_ab ], \ *beta, \ c[ i*rs_c + j*cs_c ] \ ) \ } \ } \ } INSERT_GENTFUNC_BASIC2( gemmsup_g, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #endif blis-0.6.1/ref_kernels/3/bli_gemmtrsm_ref.c000066400000000000000000000066321360743507500206120ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, arch, suf, trsmkerid ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ dim_t k, \ ctype* restrict alpha, \ ctype* restrict a1x, \ ctype* restrict a11, \ ctype* restrict bx1, \ ctype* restrict b11, \ ctype* restrict c11, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict data, \ cntx_t* restrict cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ const inc_t packnr = bli_cntx_get_blksz_max_dt( dt, BLIS_NR, cntx ); \ \ const inc_t rs_b = packnr; \ const inc_t cs_b = 1; \ \ ctype* minus_one = PASTEMAC(ch,m1); \ \ PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_l3_nat_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ PASTECH(ch,trsm_ukr_ft) \ trsm_ukr = bli_cntx_get_l3_nat_ukr_dt( dt, trsmkerid, cntx ); \ \ /* lower: b11 = alpha * b11 - a10 * b01; */ \ /* upper: b11 = alpha * b11 - a12 * b21; */ \ gemm_ukr \ ( \ k, \ minus_one, \ a1x, \ bx1, \ alpha, \ b11, rs_b, cs_b, \ data, \ cntx \ ); \ \ /* b11 = inv(a11) * b11; c11 = b11; */ \ trsm_ukr \ ( \ a11, \ b11, \ c11, rs_c, cs_c, \ data, \ cntx \ ); \ \ /* PASTEMAC(d,fprintm)( stdout, "gemmtrsm_ukr: b0111p_r after", k+3, 8, \ ( double* )b01, 2*PASTEMAC(ch,packnr), 2, "%4.1f", "" ); \ PASTEMAC(d,fprintm)( stdout, "gemmtrsm_ukr: b0111p_i after", k+3, 8, \ ( double* )b01 + 1, 2*PASTEMAC(ch,packnr), 2, "%4.1f", "" ); \ */ \ } INSERT_GENTFUNC_BASIC3( gemmtrsm_l, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX, BLIS_TRSM_L_UKR ) INSERT_GENTFUNC_BASIC3( gemmtrsm_u, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX, BLIS_TRSM_U_UKR ) blis-0.6.1/ref_kernels/3/bli_trsm_ref.c000066400000000000000000000252411360743507500177410ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #if 0 // An implementation that attempts to facilitate emission of vectorized // instructions via constant loop bounds + #pragma omp simd directives. #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, arch, suf, mr, nr ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ ctype* restrict a, \ ctype* restrict b, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict data, \ cntx_t* restrict cntx \ ) \ { \ const inc_t rs_a = 1; \ const inc_t cs_a = mr; \ \ const inc_t rs_b = nr; \ const inc_t cs_b = 1; \ \ PRAGMA_SIMD \ for ( dim_t i = 0; i < mr; ++i ) \ { \ /* b1 = b1 - a10t * B0; */ \ /* b1 = b1 / alpha11; */ \ for ( dim_t j = 0; j < nr; ++j ) \ { \ ctype beta11c = b[i*rs_b + j*cs_b]; \ ctype rho11; \ \ /* beta11 = beta11 - a10t * b01; */ \ PASTEMAC(ch,set0s)( rho11 ); \ for ( dim_t l = 0; l < i; ++l ) \ { \ PASTEMAC(ch,axpys)( a[i*rs_a + l*cs_a], \ b[l*rs_b + j*cs_b], rho11 ); \ } \ PASTEMAC(ch,subs)( rho11, beta11c ); \ \ /* beta11 = beta11 / alpha11; */ \ /* NOTE: The INVERSE of alpha11 (1.0/alpha11) is stored instead of alpha11, so we can multiply rather than divide. We store the inverse of alpha11 intentionally to avoid expensive division instructions within the micro-kernel. */ \ PASTEMAC(ch,scals)( a[i*rs_a + i*cs_a], beta11c ); \ \ /* Output final result to matrix c. */ \ PASTEMAC(ch,copys)( beta11c, c[i*rs_c + j*cs_c] ); \ \ /* Store the local value back to b11. */ \ PASTEMAC(ch,copys)( beta11c, b[i*rs_b + j*cs_b] ); \ } \ } \ } //INSERT_GENTFUNC_BASIC2( trsm_l, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) GENTFUNC( float, s, trsm_l, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX, 4, 16 ) GENTFUNC( double, d, trsm_l, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX, 4, 8 ) GENTFUNC( scomplex, c, trsm_l, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX, 4, 8 ) GENTFUNC( dcomplex, z, trsm_l, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX, 4, 4 ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, arch, suf, mr, nr ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ ctype* restrict a, \ ctype* restrict b, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict data, \ cntx_t* restrict cntx \ ) \ { \ const inc_t rs_a = 1; \ const inc_t cs_a = mr; \ \ const inc_t rs_b = nr; \ const inc_t cs_b = 1; \ \ PRAGMA_SIMD \ for ( dim_t iter = 0; iter < mr; ++iter ) \ { \ dim_t i = mr - iter - 1; \ \ /* b1 = b1 - a12t * B2; */ \ /* b1 = b1 / alpha11; */ \ for ( dim_t j = 0; j < nr; ++j ) \ { \ ctype beta11c = b[i*rs_b + j*cs_b]; \ ctype rho11; \ \ /* beta11 = beta11 - a12t * b21; */ \ PASTEMAC(ch,set0s)( rho11 ); \ for ( dim_t l = 0; l < iter; ++l ) \ { \ PASTEMAC(ch,axpys)( a[i*rs_a + (i+1+l)*cs_a], \ b[(i+1+l)*rs_b + j*cs_b], rho11 ); \ } \ PASTEMAC(ch,subs)( rho11, beta11c ); \ \ /* beta11 = beta11 / alpha11; */ \ /* NOTE: The INVERSE of alpha11 (1.0/alpha11) is stored instead of alpha11, so we can multiply rather than divide. We store the inverse of alpha11 intentionally to avoid expensive division instructions within the micro-kernel. */ \ PASTEMAC(ch,scals)( a[i*rs_a + i*cs_a], beta11c ); \ \ /* Output final result to matrix c. */ \ PASTEMAC(ch,copys)( beta11c, c[i*rs_c + j*cs_c] ); \ \ /* Store the local value back to b11. */ \ PASTEMAC(ch,copys)( beta11c, b[i*rs_b + j*cs_b] ); \ } \ } \ } //INSERT_GENTFUNC_BASIC2( trsm_u, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) GENTFUNC( float, s, trsm_u, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX, 4, 16 ) GENTFUNC( double, d, trsm_u, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX, 4, 8 ) GENTFUNC( scomplex, c, trsm_u, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX, 4, 8 ) GENTFUNC( dcomplex, z, trsm_u, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX, 4, 4 ) #else // An implementation that uses variable loop bounds (queried from the context) // and makes no use of #pragma omp simd. #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ ctype* restrict a, \ ctype* restrict b, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict data, \ cntx_t* restrict cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ const dim_t mr = bli_cntx_get_blksz_def_dt( dt, BLIS_MR, cntx ); \ const dim_t nr = bli_cntx_get_blksz_def_dt( dt, BLIS_NR, cntx ); \ \ const inc_t packmr = bli_cntx_get_blksz_max_dt( dt, BLIS_MR, cntx ); \ const inc_t packnr = bli_cntx_get_blksz_max_dt( dt, BLIS_NR, cntx ); \ \ const dim_t m = mr; \ const dim_t n = nr; \ \ const inc_t rs_a = 1; \ const inc_t cs_a = packmr; \ \ const inc_t rs_b = packnr; \ const inc_t cs_b = 1; \ \ dim_t iter, i, j, l; \ dim_t n_behind; \ \ for ( iter = 0; iter < m; ++iter ) \ { \ i = iter; \ n_behind = i; \ \ ctype* restrict alpha11 = a + (i )*rs_a + (i )*cs_a; \ ctype* restrict a10t = a + (i )*rs_a + (0 )*cs_a; \ ctype* restrict B0 = b + (0 )*rs_b + (0 )*cs_b; \ ctype* restrict b1 = b + (i )*rs_b + (0 )*cs_b; \ \ /* b1 = b1 - a10t * B0; */ \ /* b1 = b1 / alpha11; */ \ for ( j = 0; j < n; ++j ) \ { \ ctype* restrict b01 = B0 + (0 )*rs_b + (j )*cs_b; \ ctype* restrict beta11 = b1 + (0 )*rs_b + (j )*cs_b; \ ctype* restrict gamma11 = c + (i )*rs_c + (j )*cs_c; \ ctype beta11c = *beta11; \ ctype rho11; \ \ /* beta11 = beta11 - a10t * b01; */ \ PASTEMAC(ch,set0s)( rho11 ); \ for ( l = 0; l < n_behind; ++l ) \ { \ ctype* restrict alpha10 = a10t + (l )*cs_a; \ ctype* restrict beta01 = b01 + (l )*rs_b; \ \ PASTEMAC(ch,axpys)( *alpha10, *beta01, rho11 ); \ } \ PASTEMAC(ch,subs)( rho11, beta11c ); \ \ /* beta11 = beta11 / alpha11; */ \ /* NOTE: The INVERSE of alpha11 (1.0/alpha11) is stored instead of alpha11, so we can multiply rather than divide. We store the inverse of alpha11 intentionally to avoid expensive division instructions within the micro-kernel. */ \ PASTEMAC(ch,scals)( *alpha11, beta11c ); \ \ /* Output final result to matrix c. */ \ PASTEMAC(ch,copys)( beta11c, *gamma11 ); \ \ /* Store the local value back to b11. */ \ PASTEMAC(ch,copys)( beta11c, *beta11 ); \ } \ } \ } INSERT_GENTFUNC_BASIC2( trsm_l, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ ctype* restrict a, \ ctype* restrict b, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict data, \ cntx_t* restrict cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ const dim_t mr = bli_cntx_get_blksz_def_dt( dt, BLIS_MR, cntx ); \ const dim_t nr = bli_cntx_get_blksz_def_dt( dt, BLIS_NR, cntx ); \ \ const inc_t packmr = bli_cntx_get_blksz_max_dt( dt, BLIS_MR, cntx ); \ const inc_t packnr = bli_cntx_get_blksz_max_dt( dt, BLIS_NR, cntx ); \ \ const dim_t m = mr; \ const dim_t n = nr; \ \ const inc_t rs_a = 1; \ const inc_t cs_a = packmr; \ \ const inc_t rs_b = packnr; \ const inc_t cs_b = 1; \ \ dim_t iter, i, j, l; \ dim_t n_behind; \ \ for ( iter = 0; iter < m; ++iter ) \ { \ i = m - iter - 1; \ n_behind = iter; \ \ ctype* restrict alpha11 = a + (i )*rs_a + (i )*cs_a; \ ctype* restrict a12t = a + (i )*rs_a + (i+1)*cs_a; \ ctype* restrict b1 = b + (i )*rs_b + (0 )*cs_b; \ ctype* restrict B2 = b + (i+1)*rs_b + (0 )*cs_b; \ \ /* b1 = b1 - a12t * B2; */ \ /* b1 = b1 / alpha11; */ \ for ( j = 0; j < n; ++j ) \ { \ ctype* restrict beta11 = b1 + (0 )*rs_b + (j )*cs_b; \ ctype* restrict b21 = B2 + (0 )*rs_b + (j )*cs_b; \ ctype* restrict gamma11 = c + (i )*rs_c + (j )*cs_c; \ ctype beta11c = *beta11; \ ctype rho11; \ \ /* beta11 = beta11 - a12t * b21; */ \ PASTEMAC(ch,set0s)( rho11 ); \ for ( l = 0; l < n_behind; ++l ) \ { \ ctype* restrict alpha12 = a12t + (l )*cs_a; \ ctype* restrict beta21 = b21 + (l )*rs_b; \ \ PASTEMAC(ch,axpys)( *alpha12, *beta21, rho11 ); \ } \ PASTEMAC(ch,subs)( rho11, beta11c ); \ \ /* beta11 = beta11 / alpha11; */ \ /* NOTE: The INVERSE of alpha11 (1.0/alpha11) is stored instead of alpha11, so we can multiply rather than divide. We store the inverse of alpha11 intentionally to avoid expensive division instructions within the micro-kernel. */ \ PASTEMAC(ch,scals)( *alpha11, beta11c ); \ \ /* Output final result to matrix c. */ \ PASTEMAC(ch,copys)( beta11c, *gamma11 ); \ \ /* Store the local value back to b11. */ \ PASTEMAC(ch,copys)( beta11c, *beta11 ); \ } \ } \ } INSERT_GENTFUNC_BASIC2( trsm_u, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #endif blis-0.6.1/ref_kernels/3/old/000077500000000000000000000000001360743507500157005ustar00rootroot00000000000000blis-0.6.1/ref_kernels/3/old/bli_gemm_simd_ref.c000066400000000000000000000121121360743507500214640ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, arch, suf, mr, nr ) \ \ void PASTEMAC4(ch,opname,arch,_simd,suf) \ ( \ dim_t k, \ ctype* restrict alpha, \ ctype* restrict a, \ ctype* restrict b, \ ctype* restrict beta, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict data, \ cntx_t* restrict cntx \ ) \ { \ ctype ab[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const inc_t rs_ab = nr; \ const inc_t cs_ab = 1; \ \ const inc_t cs_a = mr; \ const inc_t rs_b = nr; \ \ \ /* Initialize the accumulator elements in ab to zero. */ \ PRAGMA_SIMD \ for ( dim_t i = 0; i < mr * nr; ++i ) \ { \ PASTEMAC(ch,set0s)( ab[ i ] ); \ } \ \ /* const dim_t pre = 16; \ dim_t k16; \ if ( k >= pre ) { k16 = k - pre; k = pre; } \ else { k16 = 0; } \ \ for ( dim_t l = 0; l < k16; ++l ) \ { \ for ( dim_t i = 0; i < mr; ++i ) \ { \ PRAGMA_SIMD \ for ( dim_t j = 0; j < nr; ++j ) \ { \ PASTEMAC(ch,dots) \ ( \ a[ i ], \ b[ j ], \ ab[ i*rs_ab + j*cs_ab ] \ ); \ } \ } \ \ a += cs_a; \ b += rs_b; \ } \ \ __builtin_prefetch( c + 0*cs_c, 1, 0 ); \ __builtin_prefetch( c + 1*cs_c, 1, 0 ); \ __builtin_prefetch( c + 2*cs_c, 1, 0 ); \ __builtin_prefetch( c + 3*cs_c, 1, 0 ); \ */ \ \ /* Perform a series of k rank-1 updates into ab. */ \ for ( dim_t l = 0; l < k; ++l ) \ { \ for ( dim_t i = 0; i < mr; ++i ) \ { \ PRAGMA_SIMD \ for ( dim_t j = 0; j < nr; ++j ) \ { \ PASTEMAC(ch,dots) \ ( \ a[ i ], \ b[ j ], \ ab[ i*rs_ab + j*cs_ab ] \ ); \ } \ } \ \ a += cs_a; \ b += rs_b; \ } \ \ /* Scale the result in ab by alpha. */ \ PRAGMA_SIMD \ for ( dim_t i = 0; i < mr * nr; ++i ) \ { \ PASTEMAC(ch,scals)( *alpha, ab[ i ] ); \ } \ \ /* Output/accumulate intermediate result ab based on the storage of c and the value of beta. */ \ if ( cs_c == 1 ) \ { \ /* C is row-stored. */ \ \ if ( PASTEMAC(ch,eq0)( *beta ) ) \ { \ for ( dim_t i = 0; i < mr; ++i ) \ for ( dim_t j = 0; j < nr; ++j ) \ PASTEMAC(ch,copys) \ ( \ ab[ i*rs_ab + j*cs_ab ], \ c [ i*rs_c + j*1 ] \ ); \ } \ else \ { \ for ( dim_t i = 0; i < mr; ++i ) \ for ( dim_t j = 0; j < nr; ++j ) \ PASTEMAC(ch,xpbys) \ ( \ ab[ i*rs_ab + j*cs_ab ], \ *beta, \ c [ i*rs_c + j*1 ] \ ); \ } \ } \ else \ { \ /* C is column-stored or general-stored. */ \ \ if ( PASTEMAC(ch,eq0)( *beta ) ) \ { \ for ( dim_t j = 0; j < nr; ++j ) \ for ( dim_t i = 0; i < mr; ++i ) \ PASTEMAC(ch,copys) \ ( \ ab[ i*rs_ab + j*cs_ab ], \ c [ i*rs_c + j*1 ] \ ); \ } \ else \ { \ for ( dim_t j = 0; j < nr; ++j ) \ for ( dim_t i = 0; i < mr; ++i ) \ PASTEMAC(ch,xpbys) \ ( \ ab[ i*rs_ab + j*cs_ab ], \ *beta, \ c [ i*rs_c + j*1 ] \ ); \ } \ } \ } //INSERT_GENTFUNC_BASIC2( gemm, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) GENTFUNC( float, s, gemm, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX, 4, 16 ) GENTFUNC( double, d, gemm, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX, 4, 8 ) GENTFUNC( scomplex, c, gemm, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX, 4, 8 ) GENTFUNC( dcomplex, z, gemm, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX, 4, 4 ) blis-0.6.1/ref_kernels/3/old/bli_gemm_unrl_ref.c000066400000000000000000000302671360743507500215230ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // // -- dgemm -------------------------------------------------------------------- // #undef CH #define CH d #undef CTYPE #define CTYPE double #undef ZERO #define ZERO 0.0 #undef MR #define MR 4 #undef NR #define NR 8 //void PASTEMAC4(CH,gemm,BLIS_CNAME_INFIX,BLIS_REF_SUF,_4x8) void PASTEMAC6(CH,gemm,BLIS_CNAME_REF_SUFFIX,_,MR,x,NR) ( dim_t k, CTYPE* restrict alpha, CTYPE* restrict a, CTYPE* restrict b, CTYPE* restrict beta, CTYPE* restrict c, inc_t rs_c, inc_t cs_c, auxinfo_t* restrict data, cntx_t* restrict cntx ) { const dim_t cs_a = MR; const dim_t rs_b = NR; CTYPE ab00 = ZERO, ab01 = ZERO, ab02 = ZERO, ab03 = ZERO; CTYPE ab10 = ZERO, ab11 = ZERO, ab12 = ZERO, ab13 = ZERO; CTYPE ab20 = ZERO, ab21 = ZERO, ab22 = ZERO, ab23 = ZERO; CTYPE ab30 = ZERO, ab31 = ZERO, ab32 = ZERO, ab33 = ZERO; CTYPE ab04 = ZERO, ab05 = ZERO, ab06 = ZERO, ab07 = ZERO; CTYPE ab14 = ZERO, ab15 = ZERO, ab16 = ZERO, ab17 = ZERO; CTYPE ab24 = ZERO, ab25 = ZERO, ab26 = ZERO, ab27 = ZERO; CTYPE ab34 = ZERO, ab35 = ZERO, ab36 = ZERO, ab37 = ZERO; // Perform a series of k rank-1 updates into ab. for ( ; k != 0; --k ) { const CTYPE a0 = a[0]; ab00 += a0*b[0]; ab01 += a0*b[1]; ab02 += a0*b[2]; ab03 += a0*b[3]; ab04 += a0*b[4]; ab05 += a0*b[5]; ab06 += a0*b[6]; ab07 += a0*b[7]; const CTYPE a1 = a[1]; ab10 += a1*b[0]; ab11 += a1*b[1]; ab12 += a1*b[2]; ab13 += a1*b[3]; ab14 += a1*b[4]; ab15 += a1*b[5]; ab16 += a1*b[6]; ab17 += a1*b[7]; const CTYPE a2 = a[2]; ab20 += a2*b[0]; ab21 += a2*b[1]; ab22 += a2*b[2]; ab23 += a2*b[3]; ab24 += a2*b[4]; ab25 += a2*b[5]; ab26 += a2*b[6]; ab27 += a2*b[7]; const CTYPE a3 = a[3]; ab30 += a3*b[0]; ab31 += a3*b[1]; ab32 += a3*b[2]; ab33 += a3*b[3]; ab34 += a3*b[4]; ab35 += a3*b[5]; ab36 += a3*b[6]; ab37 += a3*b[7]; a += cs_a; b += rs_b; } // Scale each element of ab by alpha. if ( !PASTEMAC(CH,eq1)( *alpha ) ) { const CTYPE alpha0 = *alpha; PASTEMAC(CH,scals)( alpha0, ab00 ); PASTEMAC(CH,scals)( alpha0, ab01 ); PASTEMAC(CH,scals)( alpha0, ab02 ); PASTEMAC(CH,scals)( alpha0, ab02 ); PASTEMAC(CH,scals)( alpha0, ab04 ); PASTEMAC(CH,scals)( alpha0, ab05 ); PASTEMAC(CH,scals)( alpha0, ab06 ); PASTEMAC(CH,scals)( alpha0, ab07 ); PASTEMAC(CH,scals)( alpha0, ab10 ); PASTEMAC(CH,scals)( alpha0, ab11 ); PASTEMAC(CH,scals)( alpha0, ab12 ); PASTEMAC(CH,scals)( alpha0, ab12 ); PASTEMAC(CH,scals)( alpha0, ab14 ); PASTEMAC(CH,scals)( alpha0, ab15 ); PASTEMAC(CH,scals)( alpha0, ab16 ); PASTEMAC(CH,scals)( alpha0, ab17 ); PASTEMAC(CH,scals)( alpha0, ab20 ); PASTEMAC(CH,scals)( alpha0, ab21 ); PASTEMAC(CH,scals)( alpha0, ab22 ); PASTEMAC(CH,scals)( alpha0, ab22 ); PASTEMAC(CH,scals)( alpha0, ab24 ); PASTEMAC(CH,scals)( alpha0, ab25 ); PASTEMAC(CH,scals)( alpha0, ab26 ); PASTEMAC(CH,scals)( alpha0, ab27 ); PASTEMAC(CH,scals)( alpha0, ab30 ); PASTEMAC(CH,scals)( alpha0, ab31 ); PASTEMAC(CH,scals)( alpha0, ab32 ); PASTEMAC(CH,scals)( alpha0, ab32 ); PASTEMAC(CH,scals)( alpha0, ab34 ); PASTEMAC(CH,scals)( alpha0, ab35 ); PASTEMAC(CH,scals)( alpha0, ab36 ); PASTEMAC(CH,scals)( alpha0, ab37 ); } // Output/accumulate intermediate result ab based on the storage // of c and the value of beta. if ( cs_c == 1 ) { // C is row-stored. if ( PASTEMAC(CH,eq0)( *beta ) ) { // beta == 0: // c := ab PASTEMAC(CH,copys)( ab00, c[ 0*rs_c + 0 ] ); PASTEMAC(CH,copys)( ab01, c[ 0*rs_c + 1 ] ); PASTEMAC(CH,copys)( ab02, c[ 0*rs_c + 2 ] ); PASTEMAC(CH,copys)( ab03, c[ 0*rs_c + 3 ] ); PASTEMAC(CH,copys)( ab04, c[ 0*rs_c + 4 ] ); PASTEMAC(CH,copys)( ab05, c[ 0*rs_c + 5 ] ); PASTEMAC(CH,copys)( ab06, c[ 0*rs_c + 6 ] ); PASTEMAC(CH,copys)( ab07, c[ 0*rs_c + 7 ] ); PASTEMAC(CH,copys)( ab10, c[ 1*rs_c + 0 ] ); PASTEMAC(CH,copys)( ab11, c[ 1*rs_c + 1 ] ); PASTEMAC(CH,copys)( ab12, c[ 1*rs_c + 2 ] ); PASTEMAC(CH,copys)( ab13, c[ 1*rs_c + 3 ] ); PASTEMAC(CH,copys)( ab14, c[ 1*rs_c + 4 ] ); PASTEMAC(CH,copys)( ab15, c[ 1*rs_c + 5 ] ); PASTEMAC(CH,copys)( ab16, c[ 1*rs_c + 6 ] ); PASTEMAC(CH,copys)( ab17, c[ 1*rs_c + 7 ] ); PASTEMAC(CH,copys)( ab20, c[ 2*rs_c + 0 ] ); PASTEMAC(CH,copys)( ab21, c[ 2*rs_c + 1 ] ); PASTEMAC(CH,copys)( ab22, c[ 2*rs_c + 2 ] ); PASTEMAC(CH,copys)( ab23, c[ 2*rs_c + 3 ] ); PASTEMAC(CH,copys)( ab24, c[ 2*rs_c + 4 ] ); PASTEMAC(CH,copys)( ab25, c[ 2*rs_c + 5 ] ); PASTEMAC(CH,copys)( ab26, c[ 2*rs_c + 6 ] ); PASTEMAC(CH,copys)( ab27, c[ 2*rs_c + 7 ] ); PASTEMAC(CH,copys)( ab30, c[ 3*rs_c + 0 ] ); PASTEMAC(CH,copys)( ab31, c[ 3*rs_c + 1 ] ); PASTEMAC(CH,copys)( ab32, c[ 3*rs_c + 2 ] ); PASTEMAC(CH,copys)( ab33, c[ 3*rs_c + 3 ] ); PASTEMAC(CH,copys)( ab34, c[ 3*rs_c + 4 ] ); PASTEMAC(CH,copys)( ab35, c[ 3*rs_c + 5 ] ); PASTEMAC(CH,copys)( ab36, c[ 3*rs_c + 6 ] ); PASTEMAC(CH,copys)( ab37, c[ 3*rs_c + 7 ] ); } else { const CTYPE beta0 = *beta; // beta != 0: // c := beta * c + ab PASTEMAC(CH,xpbys)( ab00, beta0, c[ 0*rs_c + 0 ] ); PASTEMAC(CH,xpbys)( ab01, beta0, c[ 0*rs_c + 1 ] ); PASTEMAC(CH,xpbys)( ab02, beta0, c[ 0*rs_c + 2 ] ); PASTEMAC(CH,xpbys)( ab03, beta0, c[ 0*rs_c + 3 ] ); PASTEMAC(CH,xpbys)( ab04, beta0, c[ 0*rs_c + 4 ] ); PASTEMAC(CH,xpbys)( ab05, beta0, c[ 0*rs_c + 5 ] ); PASTEMAC(CH,xpbys)( ab06, beta0, c[ 0*rs_c + 6 ] ); PASTEMAC(CH,xpbys)( ab07, beta0, c[ 0*rs_c + 7 ] ); PASTEMAC(CH,xpbys)( ab10, beta0, c[ 1*rs_c + 0 ] ); PASTEMAC(CH,xpbys)( ab11, beta0, c[ 1*rs_c + 1 ] ); PASTEMAC(CH,xpbys)( ab12, beta0, c[ 1*rs_c + 2 ] ); PASTEMAC(CH,xpbys)( ab13, beta0, c[ 1*rs_c + 3 ] ); PASTEMAC(CH,xpbys)( ab14, beta0, c[ 1*rs_c + 4 ] ); PASTEMAC(CH,xpbys)( ab15, beta0, c[ 1*rs_c + 5 ] ); PASTEMAC(CH,xpbys)( ab16, beta0, c[ 1*rs_c + 6 ] ); PASTEMAC(CH,xpbys)( ab17, beta0, c[ 1*rs_c + 7 ] ); PASTEMAC(CH,xpbys)( ab20, beta0, c[ 2*rs_c + 0 ] ); PASTEMAC(CH,xpbys)( ab21, beta0, c[ 2*rs_c + 1 ] ); PASTEMAC(CH,xpbys)( ab22, beta0, c[ 2*rs_c + 2 ] ); PASTEMAC(CH,xpbys)( ab23, beta0, c[ 2*rs_c + 3 ] ); PASTEMAC(CH,xpbys)( ab24, beta0, c[ 2*rs_c + 4 ] ); PASTEMAC(CH,xpbys)( ab25, beta0, c[ 2*rs_c + 5 ] ); PASTEMAC(CH,xpbys)( ab26, beta0, c[ 2*rs_c + 6 ] ); PASTEMAC(CH,xpbys)( ab27, beta0, c[ 2*rs_c + 7 ] ); PASTEMAC(CH,xpbys)( ab30, beta0, c[ 3*rs_c + 0 ] ); PASTEMAC(CH,xpbys)( ab31, beta0, c[ 3*rs_c + 1 ] ); PASTEMAC(CH,xpbys)( ab32, beta0, c[ 3*rs_c + 2 ] ); PASTEMAC(CH,xpbys)( ab33, beta0, c[ 3*rs_c + 3 ] ); PASTEMAC(CH,xpbys)( ab34, beta0, c[ 3*rs_c + 4 ] ); PASTEMAC(CH,xpbys)( ab35, beta0, c[ 3*rs_c + 5 ] ); PASTEMAC(CH,xpbys)( ab36, beta0, c[ 3*rs_c + 6 ] ); PASTEMAC(CH,xpbys)( ab37, beta0, c[ 3*rs_c + 7 ] ); } } else { // C is general-stored (or column-stored). if ( PASTEMAC(CH,eq0)( *beta ) ) { // beta == 0: // c := ab PASTEMAC(CH,copys)( ab00, c[ 0*rs_c + 0*cs_c ] ); PASTEMAC(CH,copys)( ab01, c[ 0*rs_c + 1*cs_c ] ); PASTEMAC(CH,copys)( ab02, c[ 0*rs_c + 2*cs_c ] ); PASTEMAC(CH,copys)( ab03, c[ 0*rs_c + 3*cs_c ] ); PASTEMAC(CH,copys)( ab04, c[ 0*rs_c + 4*cs_c ] ); PASTEMAC(CH,copys)( ab05, c[ 0*rs_c + 5*cs_c ] ); PASTEMAC(CH,copys)( ab06, c[ 0*rs_c + 6*cs_c ] ); PASTEMAC(CH,copys)( ab07, c[ 0*rs_c + 7*cs_c ] ); PASTEMAC(CH,copys)( ab10, c[ 1*rs_c + 0*cs_c ] ); PASTEMAC(CH,copys)( ab11, c[ 1*rs_c + 1*cs_c ] ); PASTEMAC(CH,copys)( ab12, c[ 1*rs_c + 2*cs_c ] ); PASTEMAC(CH,copys)( ab13, c[ 1*rs_c + 3*cs_c ] ); PASTEMAC(CH,copys)( ab14, c[ 1*rs_c + 4*cs_c ] ); PASTEMAC(CH,copys)( ab15, c[ 1*rs_c + 5*cs_c ] ); PASTEMAC(CH,copys)( ab16, c[ 1*rs_c + 6*cs_c ] ); PASTEMAC(CH,copys)( ab17, c[ 1*rs_c + 7*cs_c ] ); PASTEMAC(CH,copys)( ab20, c[ 2*rs_c + 0*cs_c ] ); PASTEMAC(CH,copys)( ab21, c[ 2*rs_c + 1*cs_c ] ); PASTEMAC(CH,copys)( ab22, c[ 2*rs_c + 2*cs_c ] ); PASTEMAC(CH,copys)( ab23, c[ 2*rs_c + 3*cs_c ] ); PASTEMAC(CH,copys)( ab24, c[ 2*rs_c + 4*cs_c ] ); PASTEMAC(CH,copys)( ab25, c[ 2*rs_c + 5*cs_c ] ); PASTEMAC(CH,copys)( ab26, c[ 2*rs_c + 6*cs_c ] ); PASTEMAC(CH,copys)( ab27, c[ 2*rs_c + 7*cs_c ] ); PASTEMAC(CH,copys)( ab30, c[ 3*rs_c + 0*cs_c ] ); PASTEMAC(CH,copys)( ab31, c[ 3*rs_c + 1*cs_c ] ); PASTEMAC(CH,copys)( ab32, c[ 3*rs_c + 2*cs_c ] ); PASTEMAC(CH,copys)( ab33, c[ 3*rs_c + 3*cs_c ] ); PASTEMAC(CH,copys)( ab34, c[ 3*rs_c + 4*cs_c ] ); PASTEMAC(CH,copys)( ab35, c[ 3*rs_c + 5*cs_c ] ); PASTEMAC(CH,copys)( ab36, c[ 3*rs_c + 6*cs_c ] ); PASTEMAC(CH,copys)( ab37, c[ 3*rs_c + 7*cs_c ] ); } else { const CTYPE beta0 = *beta; // beta != 0: // c := beta * c + ab PASTEMAC(CH,xpbys)( ab00, beta0, c[ 0*rs_c + 0*cs_c ] ); PASTEMAC(CH,xpbys)( ab01, beta0, c[ 0*rs_c + 1*cs_c ] ); PASTEMAC(CH,xpbys)( ab02, beta0, c[ 0*rs_c + 2*cs_c ] ); PASTEMAC(CH,xpbys)( ab03, beta0, c[ 0*rs_c + 3*cs_c ] ); PASTEMAC(CH,xpbys)( ab04, beta0, c[ 0*rs_c + 4*cs_c ] ); PASTEMAC(CH,xpbys)( ab05, beta0, c[ 0*rs_c + 5*cs_c ] ); PASTEMAC(CH,xpbys)( ab06, beta0, c[ 0*rs_c + 6*cs_c ] ); PASTEMAC(CH,xpbys)( ab07, beta0, c[ 0*rs_c + 7*cs_c ] ); PASTEMAC(CH,xpbys)( ab10, beta0, c[ 1*rs_c + 0*cs_c ] ); PASTEMAC(CH,xpbys)( ab11, beta0, c[ 1*rs_c + 1*cs_c ] ); PASTEMAC(CH,xpbys)( ab12, beta0, c[ 1*rs_c + 2*cs_c ] ); PASTEMAC(CH,xpbys)( ab13, beta0, c[ 1*rs_c + 3*cs_c ] ); PASTEMAC(CH,xpbys)( ab14, beta0, c[ 1*rs_c + 4*cs_c ] ); PASTEMAC(CH,xpbys)( ab15, beta0, c[ 1*rs_c + 5*cs_c ] ); PASTEMAC(CH,xpbys)( ab16, beta0, c[ 1*rs_c + 6*cs_c ] ); PASTEMAC(CH,xpbys)( ab17, beta0, c[ 1*rs_c + 7*cs_c ] ); PASTEMAC(CH,xpbys)( ab20, beta0, c[ 2*rs_c + 0*cs_c ] ); PASTEMAC(CH,xpbys)( ab21, beta0, c[ 2*rs_c + 1*cs_c ] ); PASTEMAC(CH,xpbys)( ab22, beta0, c[ 2*rs_c + 2*cs_c ] ); PASTEMAC(CH,xpbys)( ab23, beta0, c[ 2*rs_c + 3*cs_c ] ); PASTEMAC(CH,xpbys)( ab24, beta0, c[ 2*rs_c + 4*cs_c ] ); PASTEMAC(CH,xpbys)( ab25, beta0, c[ 2*rs_c + 5*cs_c ] ); PASTEMAC(CH,xpbys)( ab26, beta0, c[ 2*rs_c + 6*cs_c ] ); PASTEMAC(CH,xpbys)( ab27, beta0, c[ 2*rs_c + 7*cs_c ] ); PASTEMAC(CH,xpbys)( ab30, beta0, c[ 3*rs_c + 0*cs_c ] ); PASTEMAC(CH,xpbys)( ab31, beta0, c[ 3*rs_c + 1*cs_c ] ); PASTEMAC(CH,xpbys)( ab32, beta0, c[ 3*rs_c + 2*cs_c ] ); PASTEMAC(CH,xpbys)( ab33, beta0, c[ 3*rs_c + 3*cs_c ] ); PASTEMAC(CH,xpbys)( ab34, beta0, c[ 3*rs_c + 4*cs_c ] ); PASTEMAC(CH,xpbys)( ab35, beta0, c[ 3*rs_c + 5*cs_c ] ); PASTEMAC(CH,xpbys)( ab36, beta0, c[ 3*rs_c + 6*cs_c ] ); PASTEMAC(CH,xpbys)( ab37, beta0, c[ 3*rs_c + 7*cs_c ] ); } } } blis-0.6.1/ref_kernels/bli_cntx_ref.c000066400000000000000000001053441360743507500175710ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" // -- Instantiate kernel prototypes for the current architecture --------------- // Define macros to construct the full symbol name from the operation name. #undef GENARNAME // architecture, _ref (no bli_) #define GENARNAME(opname) PASTECH2(opname,BLIS_CNAME_INFIX,BLIS_REF_SUFFIX) #undef GENBARNAME // bli_, architecture, _ref #define GENBARNAME(opname) PASTEMAC2(opname,BLIS_CNAME_INFIX,BLIS_REF_SUFFIX) #undef GENBAINAME // bli_, architecture, _ind #define GENBAINAME(opname) PASTEMAC2(opname,BLIS_CNAME_INFIX,BLIS_IND_SUFFIX) // -- Level-3 native micro-kernel prototype redefinitions ---------------------- // -- prototypes for completely generic level-3 microkernels -- #undef gemm_ukr_name #define gemm_ukr_name GENARNAME(gemm) #undef gemmtrsm_l_ukr_name #define gemmtrsm_l_ukr_name GENARNAME(gemmtrsm_l) #undef gemmtrsm_u_ukr_name #define gemmtrsm_u_ukr_name GENARNAME(gemmtrsm_u) #undef trsm_l_ukr_name #define trsm_l_ukr_name GENARNAME(trsm_l) #undef trsm_u_ukr_name #define trsm_u_ukr_name GENARNAME(trsm_u) // Instantiate prototypes for above functions via the native micro-kernel API // template. #include "bli_l3_ukr.h" // -- Level-3 virtual micro-kernel prototype redefinitions --------------------- // -- 3mh -- #undef gemm3mh_ukr_name #define gemm3mh_ukr_name GENARNAME(gemm3mh) // -- 3m1 -- #undef gemm3m1_ukr_name #define gemm3m1_ukr_name GENARNAME(gemm3m1) #undef gemmtrsm3m1_l_ukr_name #define gemmtrsm3m1_l_ukr_name GENARNAME(gemmtrsm3m1_l) #undef gemmtrsm3m1_u_ukr_name #define gemmtrsm3m1_u_ukr_name GENARNAME(gemmtrsm3m1_u) #undef trsm3m1_l_ukr_name #define trsm3m1_l_ukr_name GENARNAME(trsm3m1_l) #undef trsm3m1_u_ukr_name #define trsm3m1_u_ukr_name GENARNAME(trsm3m1_u) // -- 4mh -- #undef gemm4mh_ukr_name #define gemm4mh_ukr_name GENARNAME(gemm4mh) // -- 4mb -- #undef gemm4mb_ukr_name #define gemm4mb_ukr_name GENARNAME(gemm4mb) // -- 4m1 -- #undef gemm4m1_ukr_name #define gemm4m1_ukr_name GENARNAME(gemm4m1) #undef gemmtrsm4m1_l_ukr_name #define gemmtrsm4m1_l_ukr_name GENARNAME(gemmtrsm4m1_l) #undef gemmtrsm4m1_u_ukr_name #define gemmtrsm4m1_u_ukr_name GENARNAME(gemmtrsm4m1_u) #undef trsm4m1_l_ukr_name #define trsm4m1_l_ukr_name GENARNAME(trsm4m1_l) #undef trsm4m1_u_ukr_name #define trsm4m1_u_ukr_name GENARNAME(trsm4m1_u) // -- 1m -- #undef gemm1m_ukr_name #define gemm1m_ukr_name GENARNAME(gemm1m) #undef gemmtrsm1m_l_ukr_name #define gemmtrsm1m_l_ukr_name GENARNAME(gemmtrsm1m_l) #undef gemmtrsm1m_u_ukr_name #define gemmtrsm1m_u_ukr_name GENARNAME(gemmtrsm1m_u) #undef trsm1m_l_ukr_name #define trsm1m_l_ukr_name GENARNAME(trsm1m_l) #undef trsm1m_u_ukr_name #define trsm1m_u_ukr_name GENARNAME(trsm1m_u) // Instantiate prototypes for above functions via the virtual micro-kernel API // template. #include "bli_l3_ind_ukr.h" // -- Level-3 small/unpacked micro-kernel prototype definitions ---------------- // NOTE: This results in redundant prototypes for gemmsup_r and gemmsup_c // kernels, but since they will be identical the compiler won't complain. #undef gemmsup_rv_ukr_name #define gemmsup_rv_ukr_name GENARNAME(gemmsup_r) #undef gemmsup_rg_ukr_name #define gemmsup_rg_ukr_name GENARNAME(gemmsup_r) #undef gemmsup_cv_ukr_name #define gemmsup_cv_ukr_name GENARNAME(gemmsup_c) #undef gemmsup_cg_ukr_name #define gemmsup_cg_ukr_name GENARNAME(gemmsup_c) #undef gemmsup_gx_ukr_name #define gemmsup_gx_ukr_name GENARNAME(gemmsup_g) // Include the small/unpacked kernel API template. #include "bli_l3_sup_ker.h" // -- Level-1m (packm/unpackm) kernel prototype redefinitions ------------------ #undef packm_2xk_ker_name #define packm_2xk_ker_name GENARNAME(packm_2xk) #undef packm_3xk_ker_name #define packm_3xk_ker_name GENARNAME(packm_3xk) #undef packm_4xk_ker_name #define packm_4xk_ker_name GENARNAME(packm_4xk) #undef packm_6xk_ker_name #define packm_6xk_ker_name GENARNAME(packm_6xk) #undef packm_8xk_ker_name #define packm_8xk_ker_name GENARNAME(packm_8xk) #undef packm_10xk_ker_name #define packm_10xk_ker_name GENARNAME(packm_10xk) #undef packm_12xk_ker_name #define packm_12xk_ker_name GENARNAME(packm_12xk) #undef packm_14xk_ker_name #define packm_14xk_ker_name GENARNAME(packm_14xk) #undef packm_16xk_ker_name #define packm_16xk_ker_name GENARNAME(packm_16xk) #undef packm_24xk_ker_name #define packm_24xk_ker_name GENARNAME(packm_24xk) #undef unpackm_2xk_ker_name #define unpackm_2xk_ker_name GENARNAME(unpackm_2xk) #undef unpackm_4xk_ker_name #define unpackm_4xk_ker_name GENARNAME(unpackm_4xk) #undef unpackm_6xk_ker_name #define unpackm_6xk_ker_name GENARNAME(unpackm_6xk) #undef unpackm_8xk_ker_name #define unpackm_8xk_ker_name GENARNAME(unpackm_8xk) #undef unpackm_10xk_ker_name #define unpackm_10xk_ker_name GENARNAME(unpackm_10xk) #undef unpackm_12xk_ker_name #define unpackm_12xk_ker_name GENARNAME(unpackm_12xk) #undef unpackm_14xk_ker_name #define unpackm_14xk_ker_name GENARNAME(unpackm_14xk) #undef unpackm_16xk_ker_name #define unpackm_16xk_ker_name GENARNAME(unpackm_16xk) #undef packm_2xk_3mis_ker_name #define packm_2xk_3mis_ker_name GENARNAME(packm_2xk_3mis) #undef packm_4xk_3mis_ker_name #define packm_4xk_3mis_ker_name GENARNAME(packm_4xk_3mis) #undef packm_6xk_3mis_ker_name #define packm_6xk_3mis_ker_name GENARNAME(packm_6xk_3mis) #undef packm_8xk_3mis_ker_name #define packm_8xk_3mis_ker_name GENARNAME(packm_8xk_3mis) #undef packm_10xk_3mis_ker_name #define packm_10xk_3mis_ker_name GENARNAME(packm_10xk_3mis) #undef packm_12xk_3mis_ker_name #define packm_12xk_3mis_ker_name GENARNAME(packm_12xk_3mis) #undef packm_14xk_3mis_ker_name #define packm_14xk_3mis_ker_name GENARNAME(packm_14xk_3mis) #undef packm_16xk_3mis_ker_name #define packm_16xk_3mis_ker_name GENARNAME(packm_16xk_3mis) #undef packm_2xk_4mi_ker_name #define packm_2xk_4mi_ker_name GENARNAME(packm_2xk_4mi) #undef packm_3xk_4mi_ker_name #define packm_3xk_4mi_ker_name GENARNAME(packm_3xk_4mi) #undef packm_4xk_4mi_ker_name #define packm_4xk_4mi_ker_name GENARNAME(packm_4xk_4mi) #undef packm_6xk_4mi_ker_name #define packm_6xk_4mi_ker_name GENARNAME(packm_6xk_4mi) #undef packm_8xk_4mi_ker_name #define packm_8xk_4mi_ker_name GENARNAME(packm_8xk_4mi) #undef packm_10xk_4mi_ker_name #define packm_10xk_4mi_ker_name GENARNAME(packm_10xk_4mi) #undef packm_12xk_4mi_ker_name #define packm_12xk_4mi_ker_name GENARNAME(packm_12xk_4mi) #undef packm_14xk_4mi_ker_name #define packm_14xk_4mi_ker_name GENARNAME(packm_14xk_4mi) #undef packm_16xk_4mi_ker_name #define packm_16xk_4mi_ker_name GENARNAME(packm_16xk_4mi) #undef packm_2xk_rih_ker_name #define packm_2xk_rih_ker_name GENARNAME(packm_2xk_rih) #undef packm_4xk_rih_ker_name #define packm_4xk_rih_ker_name GENARNAME(packm_4xk_rih) #undef packm_6xk_rih_ker_name #define packm_6xk_rih_ker_name GENARNAME(packm_6xk_rih) #undef packm_8xk_rih_ker_name #define packm_8xk_rih_ker_name GENARNAME(packm_8xk_rih) #undef packm_10xk_rih_ker_name #define packm_10xk_rih_ker_name GENARNAME(packm_10xk_rih) #undef packm_12xk_rih_ker_name #define packm_12xk_rih_ker_name GENARNAME(packm_12xk_rih) #undef packm_14xk_rih_ker_name #define packm_14xk_rih_ker_name GENARNAME(packm_14xk_rih) #undef packm_16xk_rih_ker_name #define packm_16xk_rih_ker_name GENARNAME(packm_16xk_rih) #undef packm_2xk_1er_ker_name #define packm_2xk_1er_ker_name GENARNAME(packm_2xk_1er) #undef packm_4xk_1er_ker_name #define packm_4xk_1er_ker_name GENARNAME(packm_4xk_1er) #undef packm_6xk_1er_ker_name #define packm_6xk_1er_ker_name GENARNAME(packm_6xk_1er) #undef packm_8xk_1er_ker_name #define packm_8xk_1er_ker_name GENARNAME(packm_8xk_1er) #undef packm_10xk_1er_ker_name #define packm_10xk_1er_ker_name GENARNAME(packm_10xk_1er) #undef packm_12xk_1er_ker_name #define packm_12xk_1er_ker_name GENARNAME(packm_12xk_1er) #undef packm_14xk_1er_ker_name #define packm_14xk_1er_ker_name GENARNAME(packm_14xk_1er) #undef packm_16xk_1er_ker_name #define packm_16xk_1er_ker_name GENARNAME(packm_16xk_1er) // Instantiate prototypes for above functions via the level-1m kernel API // template. #include "bli_l1m_ker.h" // -- Level-1f kernel prototype redefinitions ---------------------------------- #undef axpy2v_ker_name #define axpy2v_ker_name GENARNAME(axpy2v) #undef dotaxpyv_ker_name #define dotaxpyv_ker_name GENARNAME(dotaxpyv) #undef axpyf_ker_name #define axpyf_ker_name GENARNAME(axpyf) #undef dotxf_ker_name #define dotxf_ker_name GENARNAME(dotxf) #undef dotxaxpyf_ker_name #define dotxaxpyf_ker_name GENARNAME(dotxaxpyf) // Instantiate prototypes for above functions via the level-1f kernel API // template. #include "bli_l1f_ker.h" // -- Level-1v kernel prototype redefinitions ---------------------------------- // -- prototypes for completely generic level-1v kernels -- #undef addv_ker_name #define addv_ker_name GENARNAME(addv) #undef amaxv_ker_name #define amaxv_ker_name GENARNAME(amaxv) #undef axpbyv_ker_name #define axpbyv_ker_name GENARNAME(axpbyv) #undef axpyv_ker_name #define axpyv_ker_name GENARNAME(axpyv) #undef copyv_ker_name #define copyv_ker_name GENARNAME(copyv) #undef dotv_ker_name #define dotv_ker_name GENARNAME(dotv) #undef dotxv_ker_name #define dotxv_ker_name GENARNAME(dotxv) #undef invertv_ker_name #define invertv_ker_name GENARNAME(invertv) #undef scalv_ker_name #define scalv_ker_name GENARNAME(scalv) #undef scal2v_ker_name #define scal2v_ker_name GENARNAME(scal2v) #undef setv_ker_name #define setv_ker_name GENARNAME(setv) #undef subv_ker_name #define subv_ker_name GENARNAME(subv) #undef swapv_ker_name #define swapv_ker_name GENARNAME(swapv) #undef xpbyv_ker_name #define xpbyv_ker_name GENARNAME(xpbyv) // Instantiate prototypes for above functions via the level-1v kernel API // template. #include "bli_l1v_ker.h" // -- Macros to help concisely instantiate bli_func_init() --------------------- #define gen_func_init_co( func_p, opname ) \ { \ bli_func_init( func_p, NULL, NULL, \ PASTEMAC(c,opname), PASTEMAC(z,opname) ); \ } #define gen_func_init( func_p, opname ) \ { \ bli_func_init( func_p, PASTEMAC(s,opname), PASTEMAC(d,opname), \ PASTEMAC(c,opname), PASTEMAC(z,opname) ); \ } #define gen_sup_func_init( func0_p, func1_p, opname ) \ { \ bli_func_init( func0_p, PASTEMAC(s,opname), PASTEMAC(d,opname), \ PASTEMAC(c,opname), PASTEMAC(z,opname) ); \ bli_func_init( func1_p, PASTEMAC(s,opname), PASTEMAC(d,opname), \ PASTEMAC(c,opname), PASTEMAC(z,opname) ); \ } // ----------------------------------------------------------------------------- void GENBARNAME(cntx_init) ( cntx_t* cntx ) { blksz_t blkszs[ BLIS_NUM_BLKSZS ]; blksz_t thresh[ BLIS_NUM_THRESH ]; func_t* funcs; mbool_t* mbools; dim_t i; void** vfuncs; // -- Clear the context ---------------------------------------------------- bli_cntx_clear( cntx ); // -- Set blocksizes ------------------------------------------------------- // s d c z bli_blksz_init_easy( &blkszs[ BLIS_KR ], 1, 1, 1, 1 ); bli_blksz_init_easy( &blkszs[ BLIS_MR ], 4, 4, 4, 4 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 16, 8, 8, 4 ); bli_blksz_init_easy( &blkszs[ BLIS_MC ], 256, 128, 128, 64 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 256, 256, 256 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4096, 4096, 4096, 4096 ); bli_blksz_init_easy( &blkszs[ BLIS_M2 ], 1000, 1000, 1000, 1000 ); bli_blksz_init_easy( &blkszs[ BLIS_N2 ], 1000, 1000, 1000, 1000 ); bli_blksz_init_easy( &blkszs[ BLIS_AF ], 8, 8, 8, 8 ); bli_blksz_init_easy( &blkszs[ BLIS_DF ], 6, 6, 6, 6 ); bli_blksz_init_easy( &blkszs[ BLIS_XF ], 4, 4, 4, 4 ); // Initialize the context with the default blocksize objects and their // multiples. bli_cntx_set_blkszs ( BLIS_NAT, 11, BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR, BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR, BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR, BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR, BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR, BLIS_KR, &blkszs[ BLIS_KR ], BLIS_KR, BLIS_M2, &blkszs[ BLIS_M2 ], BLIS_M2, BLIS_N2, &blkszs[ BLIS_N2 ], BLIS_N2, BLIS_AF, &blkszs[ BLIS_AF ], BLIS_AF, BLIS_DF, &blkszs[ BLIS_DF ], BLIS_DF, BLIS_XF, &blkszs[ BLIS_XF ], BLIS_XF, cntx ); // -- Set level-3 virtual micro-kernels ------------------------------------ funcs = bli_cntx_l3_vir_ukrs_buf( cntx ); // NOTE: We set the virtual micro-kernel slots to contain the addresses // of the native micro-kernels. In general, the ukernels in the virtual // ukernel slots are always called, and if the function called happens to // be a virtual micro-kernel, it will then know to find its native // ukernel in the native ukernel slots. gen_func_init( &funcs[ BLIS_GEMM_UKR ], gemm_ukr_name ); gen_func_init( &funcs[ BLIS_GEMMTRSM_L_UKR ], gemmtrsm_l_ukr_name ); gen_func_init( &funcs[ BLIS_GEMMTRSM_U_UKR ], gemmtrsm_u_ukr_name ); gen_func_init( &funcs[ BLIS_TRSM_L_UKR ], trsm_l_ukr_name ); gen_func_init( &funcs[ BLIS_TRSM_U_UKR ], trsm_u_ukr_name ); // -- Set level-3 native micro-kernels and preferences --------------------- funcs = bli_cntx_l3_nat_ukrs_buf( cntx ); mbools = bli_cntx_l3_nat_ukrs_prefs_buf( cntx ); gen_func_init( &funcs[ BLIS_GEMM_UKR ], gemm_ukr_name ); gen_func_init( &funcs[ BLIS_GEMMTRSM_L_UKR ], gemmtrsm_l_ukr_name ); gen_func_init( &funcs[ BLIS_GEMMTRSM_U_UKR ], gemmtrsm_u_ukr_name ); gen_func_init( &funcs[ BLIS_TRSM_L_UKR ], trsm_l_ukr_name ); gen_func_init( &funcs[ BLIS_TRSM_U_UKR ], trsm_u_ukr_name ); bli_mbool_init( &mbools[ BLIS_GEMM_UKR ], TRUE, TRUE, TRUE, TRUE ); bli_mbool_init( &mbools[ BLIS_GEMMTRSM_L_UKR ], FALSE, FALSE, FALSE, FALSE ); bli_mbool_init( &mbools[ BLIS_GEMMTRSM_U_UKR ], FALSE, FALSE, FALSE, FALSE ); bli_mbool_init( &mbools[ BLIS_TRSM_L_UKR ], FALSE, FALSE, FALSE, FALSE ); bli_mbool_init( &mbools[ BLIS_TRSM_U_UKR ], FALSE, FALSE, FALSE, FALSE ); // -- Set level-3 small/unpacked thresholds -------------------------------- // NOTE: The default thresholds are set very low so that the sup framework // only actives for exceedingly small dimensions. If a sub-configuration // registers optimized sup kernels, then that sub-configuration should also // register new (probably larger) thresholds that are almost surely more // appropriate that these default values. // s d c z bli_blksz_init_easy( &thresh[ BLIS_MT ], 0, 0, 0, 0 ); bli_blksz_init_easy( &thresh[ BLIS_NT ], 0, 0, 0, 0 ); bli_blksz_init_easy( &thresh[ BLIS_KT ], 0, 0, 0, 0 ); // Initialize the context with the default thresholds. bli_cntx_set_l3_sup_thresh ( 3, BLIS_MT, &thresh[ BLIS_MT ], BLIS_NT, &thresh[ BLIS_NT ], BLIS_KT, &thresh[ BLIS_KT ], cntx ); // -- Set level-3 small/unpacked handlers ---------------------------------- vfuncs = bli_cntx_l3_sup_handlers_buf( cntx ); // Initialize all of the function pointers to NULL; for ( i = 0; i < BLIS_NUM_LEVEL3_OPS; ++i ) vfuncs[ i ] = NULL; // The level-3 sup handlers are oapi-based, so we only set one slot per // operation. // Set the gemm slot to the default gemm sup handler. vfuncs[ BLIS_GEMM ] = bli_gemmsup_ref; // -- Set level-3 small/unpacked micro-kernels and preferences ------------- funcs = bli_cntx_l3_sup_kers_buf( cntx ); mbools = bli_cntx_l3_sup_kers_prefs_buf( cntx ); #if 0 // Adhere to the small/unpacked ukernel mappings: // - rv -> rrr, rcr // - rg -> rrc, rcc // - cv -> ccr, ccc // - cg -> crr, crc gen_sup_func_init( &funcs[ BLIS_RRR ], &funcs[ BLIS_RCR ], gemmsup_rv_ukr_name ); gen_sup_func_init( &funcs[ BLIS_RRC ], &funcs[ BLIS_RCC ], gemmsup_rg_ukr_name ); gen_sup_func_init( &funcs[ BLIS_CCR ], &funcs[ BLIS_CCC ], gemmsup_cv_ukr_name ); gen_sup_func_init( &funcs[ BLIS_CRR ], &funcs[ BLIS_CRC ], gemmsup_cg_ukr_name ); #endif gen_func_init( &funcs[ BLIS_RRR ], gemmsup_rv_ukr_name ); gen_func_init( &funcs[ BLIS_RRC ], gemmsup_rv_ukr_name ); gen_func_init( &funcs[ BLIS_RCR ], gemmsup_rv_ukr_name ); gen_func_init( &funcs[ BLIS_RCC ], gemmsup_rv_ukr_name ); gen_func_init( &funcs[ BLIS_CRR ], gemmsup_cv_ukr_name ); gen_func_init( &funcs[ BLIS_CRC ], gemmsup_cv_ukr_name ); gen_func_init( &funcs[ BLIS_CCR ], gemmsup_cv_ukr_name ); gen_func_init( &funcs[ BLIS_CCC ], gemmsup_cv_ukr_name ); // Register the general-stride/generic ukernel to the "catch-all" slot // associated with the BLIS_XXX enum value. This slot will be queried if // *any* operand is stored with general stride. gen_func_init( &funcs[ BLIS_XXX ], gemmsup_gx_ukr_name ); // Set the l3 sup ukernel storage preferences. bli_mbool_init( &mbools[ BLIS_RRR ], TRUE, TRUE, TRUE, TRUE ); bli_mbool_init( &mbools[ BLIS_RRC ], TRUE, TRUE, TRUE, TRUE ); bli_mbool_init( &mbools[ BLIS_RCR ], TRUE, TRUE, TRUE, TRUE ); bli_mbool_init( &mbools[ BLIS_RCC ], TRUE, TRUE, TRUE, TRUE ); bli_mbool_init( &mbools[ BLIS_CRR ], FALSE, FALSE, FALSE, FALSE ); bli_mbool_init( &mbools[ BLIS_CRC ], FALSE, FALSE, FALSE, FALSE ); bli_mbool_init( &mbools[ BLIS_CCR ], FALSE, FALSE, FALSE, FALSE ); bli_mbool_init( &mbools[ BLIS_CCC ], FALSE, FALSE, FALSE, FALSE ); bli_mbool_init( &mbools[ BLIS_XXX ], FALSE, FALSE, FALSE, FALSE ); // -- Set level-1f kernels ------------------------------------------------- funcs = bli_cntx_l1f_kers_buf( cntx ); gen_func_init( &funcs[ BLIS_AXPY2V_KER ], axpy2v_ker_name ); gen_func_init( &funcs[ BLIS_DOTAXPYV_KER ], dotaxpyv_ker_name ); gen_func_init( &funcs[ BLIS_AXPYF_KER ], axpyf_ker_name ); gen_func_init( &funcs[ BLIS_DOTXF_KER ], dotxf_ker_name ); gen_func_init( &funcs[ BLIS_DOTXAXPYF_KER ], dotxaxpyf_ker_name ); // -- Set level-1v kernels ------------------------------------------------- funcs = bli_cntx_l1v_kers_buf( cntx ); gen_func_init( &funcs[ BLIS_ADDV_KER ], addv_ker_name ); gen_func_init( &funcs[ BLIS_AMAXV_KER ], amaxv_ker_name ); gen_func_init( &funcs[ BLIS_AXPBYV_KER ], axpbyv_ker_name ); gen_func_init( &funcs[ BLIS_AXPYV_KER ], axpyv_ker_name ); gen_func_init( &funcs[ BLIS_COPYV_KER ], copyv_ker_name ); gen_func_init( &funcs[ BLIS_DOTV_KER ], dotv_ker_name ); gen_func_init( &funcs[ BLIS_DOTXV_KER ], dotxv_ker_name ); gen_func_init( &funcs[ BLIS_INVERTV_KER ], invertv_ker_name ); gen_func_init( &funcs[ BLIS_SCALV_KER ], scalv_ker_name ); gen_func_init( &funcs[ BLIS_SCAL2V_KER ], scal2v_ker_name ); gen_func_init( &funcs[ BLIS_SETV_KER ], setv_ker_name ); gen_func_init( &funcs[ BLIS_SUBV_KER ], subv_ker_name ); gen_func_init( &funcs[ BLIS_SWAPV_KER ], swapv_ker_name ); gen_func_init( &funcs[ BLIS_XPBYV_KER ], xpbyv_ker_name ); // -- Set level-1m (packm/unpackm) kernels --------------------------------- funcs = bli_cntx_packm_kers_buf( cntx ); // Initialize all packm kernel func_t entries to NULL. for ( i = BLIS_PACKM_0XK_KER; i <= BLIS_PACKM_31XK_KER; ++i ) { bli_func_init_null( &funcs[ i ] ); } gen_func_init( &funcs[ BLIS_PACKM_2XK_KER ], packm_2xk_ker_name ); gen_func_init( &funcs[ BLIS_PACKM_3XK_KER ], packm_3xk_ker_name ); gen_func_init( &funcs[ BLIS_PACKM_4XK_KER ], packm_4xk_ker_name ); gen_func_init( &funcs[ BLIS_PACKM_6XK_KER ], packm_6xk_ker_name ); gen_func_init( &funcs[ BLIS_PACKM_8XK_KER ], packm_8xk_ker_name ); gen_func_init( &funcs[ BLIS_PACKM_10XK_KER ], packm_10xk_ker_name ); gen_func_init( &funcs[ BLIS_PACKM_12XK_KER ], packm_12xk_ker_name ); gen_func_init( &funcs[ BLIS_PACKM_14XK_KER ], packm_14xk_ker_name ); gen_func_init( &funcs[ BLIS_PACKM_16XK_KER ], packm_16xk_ker_name ); gen_func_init( &funcs[ BLIS_PACKM_24XK_KER ], packm_24xk_ker_name ); funcs = bli_cntx_unpackm_kers_buf( cntx ); // Initialize all packm kernel func_t entries to NULL. for ( i = BLIS_UNPACKM_0XK_KER; i <= BLIS_UNPACKM_31XK_KER; ++i ) { bli_func_init_null( &funcs[ i ] ); } gen_func_init( &funcs[ BLIS_UNPACKM_2XK_KER ], unpackm_2xk_ker_name ); gen_func_init( &funcs[ BLIS_UNPACKM_4XK_KER ], unpackm_4xk_ker_name ); gen_func_init( &funcs[ BLIS_UNPACKM_6XK_KER ], unpackm_6xk_ker_name ); gen_func_init( &funcs[ BLIS_UNPACKM_8XK_KER ], unpackm_8xk_ker_name ); gen_func_init( &funcs[ BLIS_UNPACKM_10XK_KER ], unpackm_10xk_ker_name ); gen_func_init( &funcs[ BLIS_UNPACKM_12XK_KER ], unpackm_12xk_ker_name ); gen_func_init( &funcs[ BLIS_UNPACKM_14XK_KER ], unpackm_14xk_ker_name ); gen_func_init( &funcs[ BLIS_UNPACKM_16XK_KER ], unpackm_16xk_ker_name ); // -- Set miscellaneous fields --------------------------------------------- bli_cntx_set_method( BLIS_NAT, cntx ); bli_cntx_set_schema_a_block( BLIS_PACKED_ROW_PANELS, cntx ); bli_cntx_set_schema_b_panel( BLIS_PACKED_COL_PANELS, cntx ); bli_cntx_set_schema_c_panel( BLIS_NOT_PACKED, cntx ); //bli_cntx_set_anti_pref( FALSE, cntx ); //bli_cntx_set_membrk( bli_membrk_query(), cntx ); } // ----------------------------------------------------------------------------- void GENBAINAME(cntx_init) ( ind_t method, num_t dt, cntx_t* cntx ) { func_t* funcs; dim_t i; // This function is designed to modify a copy of an existing native // context to enable computation via an induced method for complex // domain level-3 operations. It is called by bli_gks_query_ind_cntx() // on a context after its contexts are set by copying from the // architecture's native context. // -- Set induced method level-3 virtual micro-kernels --------------------- funcs = bli_cntx_l3_vir_ukrs_buf( cntx ); // 3mh, 4mh, and 4mb do not not support trsm. bli_func_init_null( &funcs[ BLIS_GEMMTRSM_L_UKR ] ); bli_func_init_null( &funcs[ BLIS_GEMMTRSM_U_UKR ] ); bli_func_init_null( &funcs[ BLIS_TRSM_L_UKR ] ); bli_func_init_null( &funcs[ BLIS_TRSM_U_UKR ] ); if ( method == BLIS_3MH ) { gen_func_init_co( &funcs[ BLIS_GEMM_UKR ], gemm3mh_ukr_name ); } else if ( method == BLIS_3M1 ) { gen_func_init_co( &funcs[ BLIS_GEMM_UKR ], gemm3m1_ukr_name ); gen_func_init_co( &funcs[ BLIS_GEMMTRSM_L_UKR ], gemmtrsm3m1_l_ukr_name ); gen_func_init_co( &funcs[ BLIS_GEMMTRSM_U_UKR ], gemmtrsm3m1_u_ukr_name ); gen_func_init_co( &funcs[ BLIS_TRSM_L_UKR ], trsm3m1_l_ukr_name ); gen_func_init_co( &funcs[ BLIS_TRSM_U_UKR ], trsm3m1_u_ukr_name ); } else if ( method == BLIS_4MH ) { gen_func_init_co( &funcs[ BLIS_GEMM_UKR ], gemm4mh_ukr_name ); } else if ( method == BLIS_4M1B ) { gen_func_init_co( &funcs[ BLIS_GEMM_UKR ], gemm4mb_ukr_name ); } else if ( method == BLIS_4M1A ) { gen_func_init_co( &funcs[ BLIS_GEMM_UKR ], gemm4m1_ukr_name ); gen_func_init_co( &funcs[ BLIS_GEMMTRSM_L_UKR ], gemmtrsm4m1_l_ukr_name ); gen_func_init_co( &funcs[ BLIS_GEMMTRSM_U_UKR ], gemmtrsm4m1_u_ukr_name ); gen_func_init_co( &funcs[ BLIS_TRSM_L_UKR ], trsm4m1_l_ukr_name ); gen_func_init_co( &funcs[ BLIS_TRSM_U_UKR ], trsm4m1_u_ukr_name ); } else if ( method == BLIS_1M ) { gen_func_init_co( &funcs[ BLIS_GEMM_UKR ], gemm1m_ukr_name ); gen_func_init_co( &funcs[ BLIS_GEMMTRSM_L_UKR ], gemmtrsm1m_l_ukr_name ); gen_func_init_co( &funcs[ BLIS_GEMMTRSM_U_UKR ], gemmtrsm1m_u_ukr_name ); gen_func_init_co( &funcs[ BLIS_TRSM_L_UKR ], trsm1m_l_ukr_name ); gen_func_init_co( &funcs[ BLIS_TRSM_U_UKR ], trsm1m_u_ukr_name ); } else // if ( method == BLIS_NAT ) { gen_func_init_co( &funcs[ BLIS_GEMM_UKR ], gemm_ukr_name ); gen_func_init_co( &funcs[ BLIS_GEMMTRSM_L_UKR ], gemmtrsm_l_ukr_name ); gen_func_init_co( &funcs[ BLIS_GEMMTRSM_U_UKR ], gemmtrsm_u_ukr_name ); gen_func_init_co( &funcs[ BLIS_TRSM_L_UKR ], trsm_l_ukr_name ); gen_func_init_co( &funcs[ BLIS_TRSM_U_UKR ], trsm_u_ukr_name ); } // For 1m, we employ an optimization which requires that we copy the native // real domain gemm ukernel function pointers to the corresponding real // domain slots in the virtual gemm ukernel func_t. if ( method == BLIS_1M ) { func_t* gemm_nat_ukrs = bli_cntx_get_l3_nat_ukrs( BLIS_GEMM_UKR, cntx ); func_t* gemm_vir_ukrs = bli_cntx_get_l3_vir_ukrs( BLIS_GEMM_UKR, cntx ); bli_func_copy_dt( BLIS_FLOAT, gemm_nat_ukrs, BLIS_FLOAT, gemm_vir_ukrs ); bli_func_copy_dt( BLIS_DOUBLE, gemm_nat_ukrs, BLIS_DOUBLE, gemm_vir_ukrs ); } // -- Set induced method packm kernels ------------------------------------- funcs = bli_cntx_packm_kers_buf( cntx ); // Initialize all packm kernel func_t entries to NULL. for ( i = BLIS_PACKM_0XK_KER; i <= BLIS_PACKM_31XK_KER; ++i ) { bli_func_init_null( &funcs[ i ] ); } if ( method == BLIS_3MH || method == BLIS_4MH ) { gen_func_init_co( &funcs[ BLIS_PACKM_2XK_KER ], packm_2xk_rih_ker_name ); gen_func_init_co( &funcs[ BLIS_PACKM_4XK_KER ], packm_4xk_rih_ker_name ); gen_func_init_co( &funcs[ BLIS_PACKM_6XK_KER ], packm_6xk_rih_ker_name ); gen_func_init_co( &funcs[ BLIS_PACKM_8XK_KER ], packm_8xk_rih_ker_name ); gen_func_init_co( &funcs[ BLIS_PACKM_10XK_KER ], packm_10xk_rih_ker_name ); gen_func_init_co( &funcs[ BLIS_PACKM_12XK_KER ], packm_12xk_rih_ker_name ); gen_func_init_co( &funcs[ BLIS_PACKM_14XK_KER ], packm_14xk_rih_ker_name ); gen_func_init_co( &funcs[ BLIS_PACKM_16XK_KER ], packm_16xk_rih_ker_name ); } else if ( method == BLIS_3M1 ) { gen_func_init_co( &funcs[ BLIS_PACKM_2XK_KER ], packm_2xk_3mis_ker_name ); gen_func_init_co( &funcs[ BLIS_PACKM_4XK_KER ], packm_4xk_3mis_ker_name ); gen_func_init_co( &funcs[ BLIS_PACKM_6XK_KER ], packm_6xk_3mis_ker_name ); gen_func_init_co( &funcs[ BLIS_PACKM_8XK_KER ], packm_8xk_3mis_ker_name ); gen_func_init_co( &funcs[ BLIS_PACKM_10XK_KER ], packm_10xk_3mis_ker_name ); gen_func_init_co( &funcs[ BLIS_PACKM_12XK_KER ], packm_12xk_3mis_ker_name ); gen_func_init_co( &funcs[ BLIS_PACKM_14XK_KER ], packm_14xk_3mis_ker_name ); gen_func_init_co( &funcs[ BLIS_PACKM_16XK_KER ], packm_16xk_3mis_ker_name ); } else if ( method == BLIS_4M1A || method == BLIS_4M1B ) { gen_func_init_co( &funcs[ BLIS_PACKM_2XK_KER ], packm_2xk_4mi_ker_name ); gen_func_init_co( &funcs[ BLIS_PACKM_4XK_KER ], packm_4xk_4mi_ker_name ); gen_func_init_co( &funcs[ BLIS_PACKM_6XK_KER ], packm_6xk_4mi_ker_name ); gen_func_init_co( &funcs[ BLIS_PACKM_8XK_KER ], packm_8xk_4mi_ker_name ); gen_func_init_co( &funcs[ BLIS_PACKM_10XK_KER ], packm_10xk_4mi_ker_name ); gen_func_init_co( &funcs[ BLIS_PACKM_12XK_KER ], packm_12xk_4mi_ker_name ); gen_func_init_co( &funcs[ BLIS_PACKM_14XK_KER ], packm_14xk_4mi_ker_name ); gen_func_init_co( &funcs[ BLIS_PACKM_16XK_KER ], packm_16xk_4mi_ker_name ); } else if ( method == BLIS_1M ) { gen_func_init_co( &funcs[ BLIS_PACKM_2XK_KER ], packm_2xk_1er_ker_name ); gen_func_init_co( &funcs[ BLIS_PACKM_4XK_KER ], packm_4xk_1er_ker_name ); gen_func_init_co( &funcs[ BLIS_PACKM_6XK_KER ], packm_6xk_1er_ker_name ); gen_func_init_co( &funcs[ BLIS_PACKM_8XK_KER ], packm_8xk_1er_ker_name ); gen_func_init_co( &funcs[ BLIS_PACKM_10XK_KER ], packm_10xk_1er_ker_name ); gen_func_init_co( &funcs[ BLIS_PACKM_12XK_KER ], packm_12xk_1er_ker_name ); gen_func_init_co( &funcs[ BLIS_PACKM_14XK_KER ], packm_14xk_1er_ker_name ); gen_func_init_co( &funcs[ BLIS_PACKM_16XK_KER ], packm_16xk_1er_ker_name ); } else // if ( method == BLIS_NAT ) { gen_func_init( &funcs[ BLIS_PACKM_2XK_KER ], packm_2xk_ker_name ); gen_func_init( &funcs[ BLIS_PACKM_3XK_KER ], packm_3xk_ker_name ); gen_func_init( &funcs[ BLIS_PACKM_4XK_KER ], packm_4xk_ker_name ); gen_func_init( &funcs[ BLIS_PACKM_6XK_KER ], packm_6xk_ker_name ); gen_func_init( &funcs[ BLIS_PACKM_8XK_KER ], packm_8xk_ker_name ); gen_func_init( &funcs[ BLIS_PACKM_10XK_KER ], packm_10xk_ker_name ); gen_func_init( &funcs[ BLIS_PACKM_12XK_KER ], packm_12xk_ker_name ); gen_func_init( &funcs[ BLIS_PACKM_14XK_KER ], packm_14xk_ker_name ); gen_func_init( &funcs[ BLIS_PACKM_16XK_KER ], packm_16xk_ker_name ); gen_func_init( &funcs[ BLIS_PACKM_24XK_KER ], packm_24xk_ker_name ); } // -- Set induced method cache and register blocksizes --------------------- // Modify the context with cache and register blocksizes (and multiples) // appropriate for the current induced method. if ( method == BLIS_3MH ) { bli_cntx_set_ind_blkszs ( method, 6, BLIS_NC, 1.0, 1.0, BLIS_KC, 1.0, 1.0, BLIS_MC, 1.0, 1.0, BLIS_NR, 1.0, 1.0, BLIS_MR, 1.0, 1.0, BLIS_KR, 1.0, 1.0, cntx ); } else if ( method == BLIS_3M1 ) { bli_cntx_set_ind_blkszs ( method, 6, BLIS_NC, 1.0, 1.0, BLIS_KC, 3.0, 3.0, BLIS_MC, 1.0, 1.0, BLIS_NR, 1.0, 1.0, BLIS_MR, 1.0, 1.0, BLIS_KR, 1.0, 1.0, cntx ); } else if ( method == BLIS_4MH ) { bli_cntx_set_ind_blkszs ( method, 6, BLIS_NC, 1.0, 1.0, BLIS_KC, 1.0, 1.0, BLIS_MC, 1.0, 1.0, BLIS_NR, 1.0, 1.0, BLIS_MR, 1.0, 1.0, BLIS_KR, 1.0, 1.0, cntx ); } else if ( method == BLIS_4M1B ) { bli_cntx_set_ind_blkszs ( method, 6, BLIS_NC, 2.0, 2.0, BLIS_KC, 1.0, 1.0, BLIS_MC, 2.0, 2.0, BLIS_NR, 1.0, 1.0, BLIS_MR, 1.0, 1.0, BLIS_KR, 1.0, 1.0, cntx ); } else if ( method == BLIS_4M1A ) { bli_cntx_set_ind_blkszs ( method, 6, BLIS_NC, 1.0, 1.0, BLIS_KC, 2.0, 2.0, BLIS_MC, 1.0, 1.0, BLIS_NR, 1.0, 1.0, BLIS_MR, 1.0, 1.0, BLIS_KR, 1.0, 1.0, cntx ); } else if ( method == BLIS_1M ) { const bool_t is_pb = FALSE; // We MUST set the induced method in the context prior to calling // bli_cntx_l3_ukr_prefers_cols_dt() because that function queries // the induced method. It needs the induced method value in order // to determine whether to evaluate the "prefers column storage" // predicate using the storage preference of the kernel for dt, or // the storage preference of the kernel for the real projection of // dt. Failing to set the induced method here can lead to strange // undefined behavior at runtime if the native complex kernel's // storage preference happens to not equal that of the native real // kernel. bli_cntx_set_method( method, cntx ); // Initialize the blocksizes according to the micro-kernel preference as // well as the algorithm. if ( bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ) ) { // This branch is used for algorithms 1m_c_bp, 1m_r_pb. // Set the pack_t schemas for the c_bp or r_pb algorithms. if ( !is_pb ) { bli_cntx_set_schema_a_block( BLIS_PACKED_ROW_PANELS_1E, cntx ); bli_cntx_set_schema_b_panel( BLIS_PACKED_COL_PANELS_1R, cntx ); } else // if ( is_pb ) { bli_cntx_set_schema_b_panel( BLIS_PACKED_ROW_PANELS_1R, cntx ); bli_cntx_set_schema_a_block( BLIS_PACKED_COL_PANELS_1E, cntx ); } bli_cntx_set_ind_blkszs ( method, 6, BLIS_NC, 1.0, 1.0, BLIS_KC, 2.0, 2.0, // halve kc... BLIS_MC, 2.0, 2.0, // halve mc... BLIS_NR, 1.0, 1.0, BLIS_MR, 2.0, 1.0, // ...and mr (but NOT packmr) BLIS_KR, 1.0, 1.0, cntx ); } else // if ( bli_cntx_l3_vir_ukr_prefers_rows_dt( dt, BLIS_GEMM_UKR, cntx ) ) { // This branch is used for algorithms 1m_r_bp, 1m_c_pb. // Set the pack_t schemas for the r_bp or c_pb algorithms. if ( !is_pb ) { bli_cntx_set_schema_a_block( BLIS_PACKED_ROW_PANELS_1R, cntx ); bli_cntx_set_schema_b_panel( BLIS_PACKED_COL_PANELS_1E, cntx ); } else // if ( is_pb ) { bli_cntx_set_schema_b_panel( BLIS_PACKED_ROW_PANELS_1E, cntx ); bli_cntx_set_schema_a_block( BLIS_PACKED_COL_PANELS_1R, cntx ); } bli_cntx_set_ind_blkszs ( method, 6, BLIS_NC, 2.0, 2.0, // halve nc... BLIS_KC, 2.0, 2.0, // halve kc... BLIS_MC, 1.0, 1.0, BLIS_NR, 2.0, 1.0, // ...and nr (but NOT packnr) BLIS_MR, 1.0, 1.0, BLIS_KR, 1.0, 1.0, cntx ); } } else // if ( method == BLIS_NAT ) { // No change in blocksizes needed for native execution. } // -- Set misc. other fields ----------------------------------------------- if ( method == BLIS_3MH ) { // Schemas vary with _stage(). } else if ( method == BLIS_3M1 ) { bli_cntx_set_schema_a_block( BLIS_PACKED_ROW_PANELS_3MI, cntx ); bli_cntx_set_schema_b_panel( BLIS_PACKED_COL_PANELS_3MI, cntx ); } else if ( method == BLIS_4MH ) { // Schemas vary with _stage(). } else if ( method == BLIS_4M1A || method == BLIS_4M1B ) { bli_cntx_set_schema_a_block( BLIS_PACKED_ROW_PANELS_4MI, cntx ); bli_cntx_set_schema_b_panel( BLIS_PACKED_COL_PANELS_4MI, cntx ); } else if ( method == BLIS_1M ) { //const bool_t is_pb = FALSE; // Set the anti-preference field to TRUE when executing a panel-block // algorithm, and FALSE otherwise. This will cause higher-level generic // code to establish (if needed) disagreement between the storage of C and // the micro-kernel output preference so that the two will come back into // agreement in the panel-block macro-kernel (which implemented in terms // of the block-panel macro-kernel with some induced transpositions). //bli_cntx_set_anti_pref( is_pb, cntx ); } else // if ( method == BLIS_NAT ) { } } blis-0.6.1/ref_kernels/ind/000077500000000000000000000000001360743507500155325ustar00rootroot00000000000000blis-0.6.1/ref_kernels/ind/bli_gemm1m_ref.c000066400000000000000000000204751360743507500205530ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, opname, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ dim_t k, \ ctype* restrict alpha, \ ctype* restrict a, \ ctype* restrict b, \ ctype* restrict beta, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict data, \ cntx_t* restrict cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ const num_t dt_r = PASTEMAC(chr,type); \ \ PASTECH(chr,gemm_ukr_ft) \ rgemm_ukr = bli_cntx_get_l3_nat_ukr_dt( dt_r, BLIS_GEMM_UKR, cntx ); \ const bool_t col_pref = bli_cntx_l3_nat_ukr_prefers_cols_dt( dt_r, BLIS_GEMM_UKR, cntx ); \ const bool_t row_pref = !col_pref; \ \ const dim_t mr = bli_cntx_get_blksz_def_dt( dt, BLIS_MR, cntx ); \ const dim_t nr = bli_cntx_get_blksz_def_dt( dt, BLIS_NR, cntx ); \ \ const dim_t k2 = 2 * k; \ \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype_r ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ inc_t rs_ct; \ inc_t cs_ct; \ \ ctype_r* restrict a_r = ( ctype_r* )a; \ \ ctype_r* restrict b_r = ( ctype_r* )b; \ \ ctype_r* restrict zero_r = PASTEMAC(chr,0); \ \ ctype_r* restrict alpha_r = &PASTEMAC(ch,real)( *alpha ); \ ctype_r* restrict alpha_i = &PASTEMAC(ch,imag)( *alpha ); \ \ ctype_r* restrict beta_r = &PASTEMAC(ch,real)( *beta ); \ ctype_r* restrict beta_i = &PASTEMAC(ch,imag)( *beta ); \ \ ctype_r* c_use; \ inc_t rs_c_use; \ inc_t cs_c_use; \ \ bool_t using_ct; \ \ /* PASTEMAC(chr,fprintm)( stdout, "gemm_ukr: a", mr, 2*k, \ a_r, 1, mr, "%5.2f", "" ); \ PASTEMAC(chr,fprintm)( stdout, "gemm_ukr: b", 2*k, 2*nr, \ b_r, 2*nr, 1, "%5.2f", "" ); \ PASTEMAC(chr,fprintm)( stdout, "gemm_ukr: c after", mr, 2*nr, \ c_use, rs_c_use, cs_c_use, "%5.2f", "" ); \ */ \ \ /* SAFETY CHECK: The higher level implementation should never allow an alpha with non-zero imaginary component to be passed in, because it can't be applied properly using the 1m method. If alpha is not real, then something is very wrong. */ \ if ( !PASTEMAC(chr,eq0)( *alpha_i ) ) \ bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); \ \ \ /* If beta has a non-zero imaginary component OR if c is stored with general stride, then we compute the alpha*a*b product into temporary storage and then accumulate that result into c afterwards. Note that the other two cases concerning disagreement between the storage of C and the output preference of the micro-kernel, should ONLY occur in the context of trsm, whereby this virtual micro-kernel is called directly from the trsm macro-kernel to update the micro-tile b11 that exists within the packed row-panel of B. Indeed that is the reason those cases MUST be explicitly handled. */ \ if ( !PASTEMAC(chr,eq0)( *beta_i ) ) using_ct = TRUE; \ else if ( bli_is_col_stored( rs_c, cs_c ) && row_pref ) using_ct = TRUE; \ else if ( bli_is_row_stored( rs_c, cs_c ) && col_pref ) using_ct = TRUE; \ else if ( bli_is_gen_stored( rs_c, cs_c ) ) using_ct = TRUE; \ else using_ct = FALSE; \ \ \ if ( using_ct ) \ { \ /* In the atypical cases, we compute the result into temporary workspace ct and then accumulated it back to c at the end. */ \ \ /* Set the strides of ct based on the preference of the underlying native real domain gemm micro-kernel. Note that we set the ct strides in units of complex elements. */ \ if ( col_pref ) { rs_ct = 1; cs_ct = mr; } \ else { rs_ct = nr; cs_ct = 1; } \ \ c_use = ( ctype_r* )ct; \ rs_c_use = rs_ct; \ cs_c_use = cs_ct; \ \ /* Convert the strides from being in units of complex elements to be in units of real elements. Note that we don't need to check for general storage here because that case corresponds to the scenario where we are using the ct buffer and its rs_ct/cs_ct strides. */ \ if ( bli_is_col_stored( rs_c_use, cs_c_use ) ) cs_c_use *= 2; \ else rs_c_use *= 2; \ \ /* The following gemm micro-kernel call implements the 1m method, which induces a complex matrix multiplication by calling the real matrix micro-kernel on micro-panels that have been packed according to the 1e and 1r formats. */ \ \ /* c = beta * c + alpha_r * a * b; */ \ rgemm_ukr \ ( \ k2, \ alpha_r, \ a_r, \ b_r, \ zero_r, \ c_use, rs_c_use, cs_c_use, \ data, \ cntx \ ); \ \ dim_t i, j; \ \ /* Accumulate the final result in ct back to c. */ \ if ( PASTEMAC(ch,eq1)( *beta ) ) \ { \ for ( j = 0; j < nr; ++j ) \ for ( i = 0; i < mr; ++i ) \ { \ PASTEMAC(ch,adds)( *(ct + i*rs_ct + j*cs_ct), \ *(c + i*rs_c + j*cs_c ) ); \ } \ } \ else if ( PASTEMAC(ch,eq0)( *beta ) ) \ { \ for ( j = 0; j < nr; ++j ) \ for ( i = 0; i < mr; ++i ) \ { \ PASTEMAC(ch,copys)( *(ct + i*rs_ct + j*cs_ct), \ *(c + i*rs_c + j*cs_c ) ); \ } \ } \ else \ { \ for ( j = 0; j < nr; ++j ) \ for ( i = 0; i < mr; ++i ) \ { \ PASTEMAC(ch,xpbys)( *(ct + i*rs_ct + j*cs_ct), \ *beta, \ *(c + i*rs_c + j*cs_c ) ); \ } \ } \ } \ else \ { \ /* In the typical cases, we use the real part of beta and accumulate directly into the output matrix c. */ \ \ c_use = ( ctype_r* )c; \ rs_c_use = rs_c; \ cs_c_use = cs_c; \ \ /* Convert the strides from being in units of complex elements to be in units of real elements. Note that we don't need to check for general storage here because that case corresponds to the scenario where we are using the ct buffer and its rs_ct/cs_ct strides. */ \ if ( bli_is_col_stored( rs_c_use, cs_c_use ) ) cs_c_use *= 2; \ else rs_c_use *= 2; \ \ /* The following gemm micro-kernel call implements the 1m method, which induces a complex matrix multiplication by calling the real matrix micro-kernel on micro-panels that have been packed according to the 1e and 1r formats. */ \ \ /* c = beta * c + alpha_r * a * b; */ \ rgemm_ukr \ ( \ k2, \ alpha_r, \ a_r, \ b_r, \ beta_r, \ c_use, rs_c_use, cs_c_use, \ data, \ cntx \ ); \ } \ } INSERT_GENTFUNCCO_BASIC2( gemm1m, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) blis-0.6.1/ref_kernels/ind/bli_gemm3m1_ref.c000066400000000000000000000264601360743507500206360ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, opname, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ dim_t k, \ ctype* restrict alpha, \ ctype* restrict a, \ ctype* restrict b, \ ctype* restrict beta, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict data, \ cntx_t* restrict cntx \ ) \ { \ const num_t dt_r = PASTEMAC(chr,type); \ \ PASTECH(chr,gemm_ukr_ft) \ rgemm_ukr = bli_cntx_get_l3_nat_ukr_dt( dt_r, BLIS_GEMM_UKR, cntx ); \ \ const dim_t mr = bli_cntx_get_blksz_def_dt( dt_r, BLIS_MR, cntx ); \ const dim_t nr = bli_cntx_get_blksz_def_dt( dt_r, BLIS_NR, cntx ); \ \ const dim_t m = mr; \ const dim_t n = nr; \ \ ctype_r ab_r[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype_r ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ ctype_r ab_i[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype_r ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ ctype_r ab_rpi[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype_r ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ inc_t rs_ab; \ inc_t cs_ab; \ \ const inc_t is_a = bli_auxinfo_is_a( data ); \ const inc_t is_b = bli_auxinfo_is_b( data ); \ \ ctype_r* restrict a_r = ( ctype_r* )a; \ ctype_r* restrict a_i = ( ctype_r* )a + is_a; \ ctype_r* restrict a_rpi = ( ctype_r* )a + 2*is_a; \ \ ctype_r* restrict b_r = ( ctype_r* )b; \ ctype_r* restrict b_i = ( ctype_r* )b + is_b; \ ctype_r* restrict b_rpi = ( ctype_r* )b + 2*is_b; \ \ ctype_r* restrict zero_r = PASTEMAC(chr,0); \ \ ctype_r* restrict alpha_r = &PASTEMAC(ch,real)( *alpha ); \ ctype_r* restrict alpha_i = &PASTEMAC(ch,imag)( *alpha ); \ \ const ctype_r beta_r = PASTEMAC(ch,real)( *beta ); \ const ctype_r beta_i = PASTEMAC(ch,imag)( *beta ); \ \ void* a_next = bli_auxinfo_next_a( data ); \ void* b_next = bli_auxinfo_next_b( data ); \ \ dim_t n_iter; \ dim_t n_elem; \ \ inc_t incc, ldc; \ inc_t incab, ldab; \ \ dim_t i, j; \ \ \ /* SAFETY CHECK: The higher level implementation should never allow an alpha with non-zero imaginary component to be passed in, because it can't be applied properly using the 3m method. If alpha is not real, then something is very wrong. */ \ if ( !PASTEMAC(chr,eq0)( *alpha_i ) ) \ bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); \ \ \ /* An optimization: Set local strides and loop bounds based on the strides of c, so that (a) the micro-kernel accesses ct the same way it would if it were updating c directly, and (b) c is updated contiguously. For c with general stride, we access ct the same way we would as if it were column-stored. */ \ if ( bli_is_row_stored( rs_c, cs_c ) ) \ { \ rs_ab = n; n_iter = m; incc = cs_c; \ cs_ab = 1; n_elem = n; ldc = rs_c; \ } \ else /* column-stored or general stride */ \ { \ rs_ab = 1; n_iter = n; incc = rs_c; \ cs_ab = m; n_elem = m; ldc = cs_c; \ } \ incab = 1; \ ldab = n_elem; \ \ \ /* The following gemm micro-kernel calls implement all "phases" of the 3m method: c = beta * c; c_r += + a_r * b_r - a_i * b_i; c_i += (a_r + a_i)(b_r + b_i) - a_r * b_r - a_i * b_i; NOTE: Scaling by alpha_r is not shown above, but is implemented below. */ \ \ \ bli_auxinfo_set_next_ab( a_i, b_i, data ); \ \ /* ab_r = alpha_r * a_r * b_r; */ \ rgemm_ukr \ ( \ k, \ alpha_r, \ a_r, \ b_r, \ zero_r, \ ab_r, rs_ab, cs_ab, \ data, \ cntx \ ); \ \ bli_auxinfo_set_next_ab( a_rpi, b_rpi, data ); \ \ /* ab_i = alpha_r * a_i * b_i; */ \ rgemm_ukr \ ( \ k, \ alpha_r, \ a_i, \ b_i, \ zero_r, \ ab_i, rs_ab, cs_ab, \ data, \ cntx \ ); \ \ bli_auxinfo_set_next_ab( a_next, b_next, data ); \ \ /* ct_i = alpha_r * a_ri * b_ri; */ \ rgemm_ukr \ ( \ k, \ alpha_r, \ a_rpi, \ b_rpi, \ zero_r, \ ab_rpi, rs_ab, cs_ab, \ data, \ cntx \ ); \ \ \ /* How we accumulate the intermediate matrix products stored in ab_r, ab_i, and ab_rpi depends on the value of beta. */ \ if ( !PASTEMAC(chr,eq0)( beta_i ) ) \ { \ /* c = beta * c; c_r = c_r + ab_r - ab_i; c_i = c_i + ab_rpi - ab_r - ab_i; */ \ for ( j = 0; j < n_iter; ++j ) \ for ( i = 0; i < n_elem; ++i ) \ { \ const ctype_r alphabeta11_r = *(ab_r + i*incab + j*ldab); \ const ctype_r alphabeta11_i = *(ab_i + i*incab + j*ldab); \ const ctype_r alphabeta11_rpi = *(ab_rpi + i*incab + j*ldab); \ ctype* restrict gamma11 = c + i*incc + j*ldc ; \ ctype_r* restrict gamma11_r = &PASTEMAC(ch,real)( *gamma11 ); \ ctype_r* restrict gamma11_i = &PASTEMAC(ch,imag)( *gamma11 ); \ ctype_r gamma11t_r; \ ctype_r gamma11t_i; \ \ PASTEMAC(ch,copyris)( alphabeta11_r, \ -alphabeta11_r, \ gamma11t_r, \ gamma11t_i ); \ \ PASTEMAC(ch,subris)( alphabeta11_i, \ alphabeta11_i, \ gamma11t_r, \ gamma11t_i ); \ \ PASTEMAC(chr,adds)( alphabeta11_rpi, \ gamma11t_i ); \ \ PASTEMAC(ch,xpbyris)( gamma11t_r, \ gamma11t_i, \ beta_r, \ beta_i, \ *gamma11_r, \ *gamma11_i ); \ } \ } \ else if ( PASTEMAC(chr,eq1)( beta_r ) ) \ { \ /* c_r = c_r + ab_r - ab_i; c_i = c_i + ab_rpi - ab_r - ab_i; */ \ for ( j = 0; j < n_iter; ++j ) \ for ( i = 0; i < n_elem; ++i ) \ { \ const ctype_r alphabeta11_r = *(ab_r + i*incab + j*ldab); \ const ctype_r alphabeta11_i = *(ab_i + i*incab + j*ldab); \ const ctype_r alphabeta11_rpi = *(ab_rpi + i*incab + j*ldab); \ ctype* restrict gamma11 = c + i*incc + j*ldc ; \ ctype_r* restrict gamma11_r = &PASTEMAC(ch,real)( *gamma11 ); \ ctype_r* restrict gamma11_i = &PASTEMAC(ch,imag)( *gamma11 ); \ ctype_r gamma11t_r; \ ctype_r gamma11t_i; \ \ PASTEMAC(ch,copyris)( alphabeta11_r, \ -alphabeta11_r, \ gamma11t_r, \ gamma11t_i ); \ \ PASTEMAC(ch,subris)( alphabeta11_i, \ alphabeta11_i, \ gamma11t_r, \ gamma11t_i ); \ \ PASTEMAC(chr,adds)( alphabeta11_rpi, \ gamma11t_i ); \ \ PASTEMAC(ch,addris)( gamma11t_r, \ gamma11t_i, \ *gamma11_r, \ *gamma11_i ); \ } \ } \ else if ( !PASTEMAC(chr,eq0)( beta_r ) ) \ { \ /* c_r = beta_r * c_r + ab_r - ab_i; c_i = beta_r * c_i + ab_rpi - ab_r - ab_i; */ \ for ( j = 0; j < n_iter; ++j ) \ for ( i = 0; i < n_elem; ++i ) \ { \ const ctype_r alphabeta11_r = *(ab_r + i*incab + j*ldab); \ const ctype_r alphabeta11_i = *(ab_i + i*incab + j*ldab); \ const ctype_r alphabeta11_rpi = *(ab_rpi + i*incab + j*ldab); \ ctype* restrict gamma11 = c + i*incc + j*ldc ; \ ctype_r* restrict gamma11_r = &PASTEMAC(ch,real)( *gamma11 ); \ ctype_r* restrict gamma11_i = &PASTEMAC(ch,imag)( *gamma11 ); \ ctype_r gamma11t_r; \ ctype_r gamma11t_i; \ \ PASTEMAC(ch,copyris)( alphabeta11_r, \ -alphabeta11_r, \ gamma11t_r, \ gamma11t_i ); \ \ PASTEMAC(ch,subris)( alphabeta11_i, \ alphabeta11_i, \ gamma11t_r, \ gamma11t_i ); \ \ PASTEMAC(chr,adds)( alphabeta11_rpi, \ gamma11t_i ); \ \ PASTEMAC(chr,xpbys)( gamma11t_r, beta_r, *gamma11_r ); \ PASTEMAC(chr,xpbys)( gamma11t_i, beta_r, *gamma11_i ); \ } \ } \ else /* if ( PASTEMAC(chr,eq0)( beta_r ) ) */ \ { \ /* c_r = ab_r - ab_i; c_i = ab_rpi - ab_r - ab_i; */ \ for ( j = 0; j < n_iter; ++j ) \ for ( i = 0; i < n_elem; ++i ) \ { \ const ctype_r alphabeta11_r = *(ab_r + i*incab + j*ldab); \ const ctype_r alphabeta11_i = *(ab_i + i*incab + j*ldab); \ const ctype_r alphabeta11_rpi = *(ab_rpi + i*incab + j*ldab); \ ctype* restrict gamma11 = c + i*incc + j*ldc ; \ ctype_r* restrict gamma11_r = &PASTEMAC(ch,real)( *gamma11 ); \ ctype_r* restrict gamma11_i = &PASTEMAC(ch,imag)( *gamma11 ); \ ctype_r gamma11t_r; \ ctype_r gamma11t_i; \ \ PASTEMAC(ch,copyris)( alphabeta11_r, \ -alphabeta11_r, \ gamma11t_r, \ gamma11t_i ); \ \ PASTEMAC(ch,subris)( alphabeta11_i, \ alphabeta11_i, \ gamma11t_r, \ gamma11t_i ); \ \ PASTEMAC(chr,adds)( alphabeta11_rpi, \ gamma11t_i ); \ \ PASTEMAC(ch,copyris)( gamma11t_r, \ gamma11t_i, \ *gamma11_r, \ *gamma11_i ); \ } \ } \ } INSERT_GENTFUNCCO_BASIC2( gemm3m1, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) blis-0.6.1/ref_kernels/ind/bli_gemm3mh_ref.c000066400000000000000000000236021360743507500207200ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, opname, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ dim_t k, \ ctype* restrict alpha, \ ctype* restrict a, \ ctype* restrict b, \ ctype* restrict beta, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict data, \ cntx_t* restrict cntx \ ) \ { \ const num_t dt_r = PASTEMAC(chr,type); \ \ PASTECH(chr,gemm_ukr_ft) \ rgemm_ukr = bli_cntx_get_l3_nat_ukr_dt( dt_r, BLIS_GEMM_UKR, cntx ); \ \ const dim_t mr = bli_cntx_get_blksz_def_dt( dt_r, BLIS_MR, cntx ); \ const dim_t nr = bli_cntx_get_blksz_def_dt( dt_r, BLIS_NR, cntx ); \ \ const dim_t m = mr; \ const dim_t n = nr; \ \ ctype_r ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype_r ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ inc_t rs_ct; \ inc_t cs_ct; \ \ ctype_r* restrict a_cast = ( ctype_r* )a; \ \ ctype_r* restrict b_cast = ( ctype_r* )b; \ \ ctype_r* restrict zero_r = PASTEMAC(chr,0); \ \ ctype_r* restrict alpha_r = &PASTEMAC(ch,real)( *alpha ); \ ctype_r* restrict alpha_i = &PASTEMAC(ch,imag)( *alpha ); \ \ const ctype_r beta_r = PASTEMAC(ch,real)( *beta ); \ const ctype_r beta_i = PASTEMAC(ch,imag)( *beta ); \ \ const pack_t schema = bli_auxinfo_schema_a( data ); \ \ dim_t n_iter; \ dim_t n_elem; \ \ inc_t incc, ldc; \ inc_t incct, ldct; \ \ dim_t i, j; \ \ \ /* SAFETY CHECK: The higher level implementation should never allow an alpha with non-zero imaginary component to be passed in, because it can't be applied properly using the 3mh method. If alpha is not real, then something is very wrong. */ \ if ( !PASTEMAC(chr,eq0)( *alpha_i ) ) \ bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); \ \ \ /* An optimization: Set local strides and loop bounds based on the strides of c, so that (a) the micro-kernel accesses ct the same way it would if it were updating c directly, and (b) c is updated contiguously. For c with general stride, we access ct the same way we would as if it were column-stored. */ \ if ( bli_is_row_stored( rs_c, cs_c ) ) \ { \ rs_ct = n; n_iter = m; incc = cs_c; \ cs_ct = 1; n_elem = n; ldc = rs_c; \ } \ else /* column-stored or general stride */ \ { \ rs_ct = 1; n_iter = n; incc = rs_c; \ cs_ct = m; n_elem = m; ldc = cs_c; \ } \ incct = 1; \ ldct = n_elem; \ \ \ /* The following gemm micro-kernel call implements one "phase" of the 3m method: c = beta * c; c_r += + a_r * b_r - a_i * b_i; c_i += (a_r + a_i)(b_r + b_i) - a_r * b_r - a_i * b_i; NOTE: Scaling by alpha_r is not shown above, but is implemented below. */ \ \ \ /* ct = alpha_r * a * b; */ \ rgemm_ukr \ ( \ k, \ alpha_r, \ a_cast, \ b_cast, \ zero_r, \ ct, rs_ct, cs_ct, \ data, \ cntx \ ); \ \ /* PASTEMAC(chr,fprintm)( stdout, "gemm3mh_ukr: ct", 4, 4, ct, rs_ct, cs_ct, "%4.1f", "" );*/ \ \ /* How we accumulate the intermediate matrix product stored in ct depends on (a) the schemas of A and B (they are always the same), and (b) the value of beta. */ \ if ( bli_is_ro_packed( schema ) ) \ { \ if ( !PASTEMAC(chr,eq0)( beta_i ) ) \ { \ /* c = beta * c; c_r = c_r + ct; c_i = c_i - ct; */ \ for ( j = 0; j < n_iter; ++j ) \ for ( i = 0; i < n_elem; ++i ) \ { \ const ctype_r gamma11t = *(ct + i*incct + j*ldct); \ ctype* restrict gamma11 = c + i*incc + j*ldc ; \ ctype_r* restrict gamma11_r = &PASTEMAC(ch,real)( *gamma11 ); \ ctype_r* restrict gamma11_i = &PASTEMAC(ch,imag)( *gamma11 ); \ \ PASTEMAC(ch,xpbyris)( gamma11t, \ -gamma11t, \ beta_r, \ beta_i, \ *gamma11_r, \ *gamma11_i ); \ } \ } \ else if ( PASTEMAC(chr,eq1)( beta_r ) ) \ { \ /* c_r = c_r + ct; c_i = c_i - ct; */ \ for ( j = 0; j < n_iter; ++j ) \ for ( i = 0; i < n_elem; ++i ) \ { \ const ctype_r gamma11t = *(ct + i*incct + j*ldct); \ ctype* restrict gamma11 = c + i*incc + j*ldc ; \ ctype_r* restrict gamma11_r = &PASTEMAC(ch,real)( *gamma11 ); \ ctype_r* restrict gamma11_i = &PASTEMAC(ch,imag)( *gamma11 ); \ \ PASTEMAC(chr,adds)( gamma11t, *gamma11_r ); \ PASTEMAC(chr,subs)( gamma11t, *gamma11_i ); \ } \ } \ else if ( !PASTEMAC(chr,eq0)( beta_r ) ) \ { \ /* c_r = beta_r * c_r + ct; c_i = beta_r * c_i - ct; */ \ for ( j = 0; j < n_iter; ++j ) \ for ( i = 0; i < n_elem; ++i ) \ { \ const ctype_r gamma11t = *(ct + i*incct + j*ldct); \ ctype* restrict gamma11 = c + i*incc + j*ldc ; \ ctype_r* restrict gamma11_r = &PASTEMAC(ch,real)( *gamma11 ); \ ctype_r* restrict gamma11_i = &PASTEMAC(ch,imag)( *gamma11 ); \ \ PASTEMAC(chr,xpbys)( gamma11t, beta_r, *gamma11_r ); \ PASTEMAC(chr,xpbys)( -gamma11t, beta_r, *gamma11_i ); \ } \ } \ else /* if PASTEMAC(chr,eq0)( beta_r ) */ \ { \ /* c_r = ct; c_i = -ct; */ \ for ( j = 0; j < n_iter; ++j ) \ for ( i = 0; i < n_elem; ++i ) \ { \ const ctype_r gamma11t = *(ct + i*incct + j*ldct); \ ctype* restrict gamma11 = c + i*incc + j*ldc ; \ ctype_r* restrict gamma11_r = &PASTEMAC(ch,real)( *gamma11 ); \ ctype_r* restrict gamma11_i = &PASTEMAC(ch,imag)( *gamma11 ); \ \ PASTEMAC(chr,copys)( gamma11t, *gamma11_r ); \ PASTEMAC(chr,copys)( -gamma11t, *gamma11_i ); \ } \ } \ } \ else if ( bli_is_io_packed( schema ) ) \ { \ if ( PASTEMAC(chr,eq1)( beta_r ) ) \ { \ /* c_r = c_r - ct; c_i = c_i - ct; */ \ for ( j = 0; j < n_iter; ++j ) \ for ( i = 0; i < n_elem; ++i ) \ { \ const ctype_r gamma11t = *(ct + i*incct + j*ldct); \ ctype* restrict gamma11 = c + i*incc + j*ldc ; \ ctype_r* restrict gamma11_r = &PASTEMAC(ch,real)( *gamma11 ); \ ctype_r* restrict gamma11_i = &PASTEMAC(ch,imag)( *gamma11 ); \ \ PASTEMAC(chr,subs)( gamma11t, *gamma11_r ); \ PASTEMAC(chr,subs)( gamma11t, *gamma11_i ); \ } \ } \ else /* if PASTEMAC(chr,eq0)( beta_r ) */ \ { \ /* c_r = -ct; c_i = -ct; */ \ for ( j = 0; j < n_iter; ++j ) \ for ( i = 0; i < n_elem; ++i ) \ { \ const ctype_r gamma11t = *(ct + i*incct + j*ldct); \ ctype* restrict gamma11 = c + i*incc + j*ldc ; \ ctype_r* restrict gamma11_r = &PASTEMAC(ch,real)( *gamma11 ); \ ctype_r* restrict gamma11_i = &PASTEMAC(ch,imag)( *gamma11 ); \ \ PASTEMAC(chr,copys)( -gamma11t, *gamma11_r ); \ PASTEMAC(chr,copys)( -gamma11t, *gamma11_i ); \ } \ } \ } \ else /* if ( bli_is_rpi_packed( schema ) ) */ \ { \ if ( PASTEMAC(chr,eq1)( beta_r ) ) \ { \ /* c_r = c_r + 0; c_i = c_i + ct; */ \ for ( j = 0; j < n_iter; ++j ) \ for ( i = 0; i < n_elem; ++i ) \ { \ const ctype_r gamma11t = *(ct + i*incct + j*ldct); \ ctype* restrict gamma11 = c + i*incc + j*ldc ; \ ctype_r* restrict gamma11_i = &PASTEMAC(ch,imag)( *gamma11 ); \ \ PASTEMAC(chr,adds)( gamma11t, *gamma11_i ); \ } \ } \ else /* if PASTEMAC(chr,eq0)( beta_r ) */ \ { \ /* c_r = 0; c_i = ct; */ \ for ( j = 0; j < n_iter; ++j ) \ for ( i = 0; i < n_elem; ++i ) \ { \ const ctype_r gamma11t = *(ct + i*incct + j*ldct); \ ctype* restrict gamma11 = c + i*incc + j*ldc ; \ ctype_r* restrict gamma11_r = &PASTEMAC(ch,real)( *gamma11 ); \ ctype_r* restrict gamma11_i = &PASTEMAC(ch,imag)( *gamma11 ); \ \ PASTEMAC(chr,set0s)( *gamma11_r ); \ PASTEMAC(chr,copys)( gamma11t, *gamma11_i ); \ } \ } \ } \ \ /*PASTEMAC(ch,fprintm)( stdout, "gemm3mh_ukr: c", 4, 4, c, rs_c, cs_c, "%4.1f", "" ); \ */ \ \ /*PASTEMAC(chr,fprintm)( stdout, "gemm3mh_ukr: b1", k, n, b_cast, n, 1, "%4.1f", "" ); \ PASTEMAC(chr,fprintm)( stdout, "gemm3mh_ukr: a1", m, k, a_cast, 1, m, "%4.1f", "" );*/ \ } INSERT_GENTFUNCCO_BASIC2( gemm3mh, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) blis-0.6.1/ref_kernels/ind/bli_gemm4m1_ref.c000066400000000000000000000223501360743507500206310ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, opname, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ dim_t k, \ ctype* restrict alpha, \ ctype* restrict a, \ ctype* restrict b, \ ctype* restrict beta, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict data, \ cntx_t* restrict cntx \ ) \ { \ const num_t dt_r = PASTEMAC(chr,type); \ \ PASTECH(chr,gemm_ukr_ft) \ rgemm_ukr = bli_cntx_get_l3_nat_ukr_dt( dt_r, BLIS_GEMM_UKR, cntx ); \ \ const dim_t mr = bli_cntx_get_blksz_def_dt( dt_r, BLIS_MR, cntx ); \ const dim_t nr = bli_cntx_get_blksz_def_dt( dt_r, BLIS_NR, cntx ); \ \ const dim_t m = mr; \ const dim_t n = nr; \ \ ctype_r ct_r[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype_r ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ ctype_r ct_i[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype_r ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ inc_t rs_ct; \ inc_t cs_ct; \ \ const inc_t is_a = bli_auxinfo_is_a( data ); \ const inc_t is_b = bli_auxinfo_is_b( data ); \ \ ctype_r* restrict a_r = ( ctype_r* )a; \ ctype_r* restrict a_i = ( ctype_r* )a + is_a; \ \ ctype_r* restrict b_r = ( ctype_r* )b; \ ctype_r* restrict b_i = ( ctype_r* )b + is_b; \ \ ctype_r* restrict one_r = PASTEMAC(chr,1); \ ctype_r* restrict zero_r = PASTEMAC(chr,0); \ \ ctype_r* restrict alpha_r = &PASTEMAC(ch,real)( *alpha ); \ ctype_r* restrict alpha_i = &PASTEMAC(ch,imag)( *alpha ); \ \ ctype_r m_alpha_r = -(*alpha_r); \ \ const ctype_r beta_r = PASTEMAC(ch,real)( *beta ); \ const ctype_r beta_i = PASTEMAC(ch,imag)( *beta ); \ \ void* a_next = bli_auxinfo_next_a( data ); \ void* b_next = bli_auxinfo_next_b( data ); \ \ dim_t n_iter; \ dim_t n_elem; \ \ inc_t incc, ldc; \ inc_t incct, ldct; \ \ dim_t i, j; \ \ \ /* PASTEMAC(chr,fprintm)( stdout, "gemm4m1_ukr: ap_r", m, k, \ a_r, 1, PASTEMAC(chr,packmr), "%4.1f", "" ); \ PASTEMAC(chr,fprintm)( stdout, "gemm4m1_ukr: ap_i", m, k, \ a_i, 1, PASTEMAC(chr,packmr), "%4.1f", "" ); \ PASTEMAC(chr,fprintm)( stdout, "gemm4m1_ukr: bp_r", k, n, \ b_r, PASTEMAC(chr,packnr), 1, "%4.1f", "" ); \ PASTEMAC(chr,fprintm)( stdout, "gemm4m1_ukr: bp_i", k, n, \ b_i, PASTEMAC(chr,packnr), 1, "%4.1f", "" ); \ */ \ \ \ /* SAFETY CHECK: The higher level implementation should never allow an alpha with non-zero imaginary component to be passed in, because it can't be applied properly using the 4m method. If alpha is not real, then something is very wrong. */ \ if ( !PASTEMAC(chr,eq0)( *alpha_i ) ) \ bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); \ \ \ /* An optimization: Set local strides and loop bounds based on the strides of c, so that (a) the micro-kernel accesses ct the same way it would if it were updating c directly, and (b) c is updated contiguously. For c with general stride, we access ct the same way we would as if it were column-stored. */ \ if ( bli_is_row_stored( rs_c, cs_c ) ) \ { \ rs_ct = n; n_iter = m; incc = cs_c; \ cs_ct = 1; n_elem = n; ldc = rs_c; \ } \ else /* column-stored or general stride */ \ { \ rs_ct = 1; n_iter = n; incc = rs_c; \ cs_ct = m; n_elem = m; ldc = cs_c; \ } \ incct = 1; \ ldct = n_elem; \ \ \ /* The following gemm micro-kernel calls implement all "phases" of the 4m method: c = beta * c; c_r += a_r * b_r - a_i * b_i; c_i += a_r * b_i + a_i * b_r; NOTE: Scaling by alpha_r is not shown above, but is implemented below. */ \ \ \ bli_auxinfo_set_next_ab( a_r, b_i, data ); \ \ /* ct_r = alpha_r * a_r * b_r; */ \ rgemm_ukr \ ( \ k, \ alpha_r, \ a_r, \ b_r, \ zero_r, \ ct_r, rs_ct, cs_ct, \ data, \ cntx \ ); \ \ bli_auxinfo_set_next_ab( a_i, b_r, data ); \ \ /* ct_i = alpha_r * a_r * b_i; */ \ rgemm_ukr \ ( \ k, \ alpha_r, \ a_r, \ b_i, \ zero_r, \ ct_i, rs_ct, cs_ct, \ data, \ cntx \ ); \ \ bli_auxinfo_set_next_ab( a_i, b_i, data ); \ \ /* ct_i += alpha_r * a_i * b_r; */ \ rgemm_ukr \ ( \ k, \ alpha_r, \ a_i, \ b_r, \ one_r, \ ct_i, rs_ct, cs_ct, \ data, \ cntx \ ); \ \ bli_auxinfo_set_next_ab( a_next, b_next, data ); \ \ /* ct_r += -alpha_r * a_i * b_i; */ \ rgemm_ukr \ ( \ k, \ &m_alpha_r, \ a_i, \ b_i, \ one_r, \ ct_r, rs_ct, cs_ct, \ data, \ cntx \ ); \ \ \ /* How we accumulate the intermediate matrix product stored in ct_r and ct_i depends on the value of beta. */ \ if ( !PASTEMAC(chr,eq0)( beta_i ) ) \ { \ /* c = beta * c + ct; */ \ for ( j = 0; j < n_iter; ++j ) \ for ( i = 0; i < n_elem; ++i ) \ { \ const ctype_r gamma11t_r = *(ct_r + i*incct + j*ldct); \ const ctype_r gamma11t_i = *(ct_i + i*incct + j*ldct); \ ctype* restrict gamma11 = c + i*incc + j*ldc ; \ ctype_r* restrict gamma11_r = &PASTEMAC(ch,real)( *gamma11 ); \ ctype_r* restrict gamma11_i = &PASTEMAC(ch,imag)( *gamma11 ); \ \ PASTEMAC(ch,xpbyris)( gamma11t_r, \ gamma11t_i, \ beta_r, \ beta_i, \ *gamma11_r, \ *gamma11_i ); \ } \ } \ else if ( PASTEMAC(chr,eq1)( beta_r ) ) \ { \ /* c_r = c_r + ct_r; */ \ /* c_i = c_i + ct_i; */ \ for ( j = 0; j < n_iter; ++j ) \ for ( i = 0; i < n_elem; ++i ) \ { \ const ctype_r gamma11t_r = *(ct_r + i*incct + j*ldct); \ const ctype_r gamma11t_i = *(ct_i + i*incct + j*ldct); \ ctype* restrict gamma11 = c + i*incc + j*ldc ; \ ctype_r* restrict gamma11_r = &PASTEMAC(ch,real)( *gamma11 ); \ ctype_r* restrict gamma11_i = &PASTEMAC(ch,imag)( *gamma11 ); \ \ PASTEMAC(chr,adds)( gamma11t_r, *gamma11_r ); \ PASTEMAC(chr,adds)( gamma11t_i, *gamma11_i ); \ } \ } \ else if ( !PASTEMAC(chr,eq0)( beta_r ) ) \ { \ /* c_r = beta_r * c_r + ct_r; */ \ /* c_i = beta_r * c_i + ct_i; */ \ for ( j = 0; j < n_iter; ++j ) \ for ( i = 0; i < n_elem; ++i ) \ { \ const ctype_r gamma11t_r = *(ct_r + i*incct + j*ldct); \ const ctype_r gamma11t_i = *(ct_i + i*incct + j*ldct); \ ctype* restrict gamma11 = c + i*incc + j*ldc ; \ ctype_r* restrict gamma11_r = &PASTEMAC(ch,real)( *gamma11 ); \ ctype_r* restrict gamma11_i = &PASTEMAC(ch,imag)( *gamma11 ); \ \ PASTEMAC(chr,xpbys)( gamma11t_r, beta_r, *gamma11_r ); \ PASTEMAC(chr,xpbys)( gamma11t_i, beta_r, *gamma11_i ); \ } \ } \ else /* if PASTEMAC(chr,eq0)( beta_r ) */ \ { \ /* c_r = ct_r; */ \ /* c_i = ct_i; */ \ for ( j = 0; j < n_iter; ++j ) \ for ( i = 0; i < n_elem; ++i ) \ { \ const ctype_r gamma11t_r = *(ct_r + i*incct + j*ldct); \ const ctype_r gamma11t_i = *(ct_i + i*incct + j*ldct); \ ctype* restrict gamma11 = c + i*incc + j*ldc ; \ ctype_r* restrict gamma11_r = &PASTEMAC(ch,real)( *gamma11 ); \ ctype_r* restrict gamma11_i = &PASTEMAC(ch,imag)( *gamma11 ); \ \ PASTEMAC(chr,copys)( gamma11t_r, *gamma11_r ); \ PASTEMAC(chr,copys)( gamma11t_i, *gamma11_i ); \ } \ } \ } INSERT_GENTFUNCCO_BASIC2( gemm4m1, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) blis-0.6.1/ref_kernels/ind/bli_gemm4mb_ref.c000066400000000000000000000264211360743507500207150ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, opname, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ dim_t k, \ ctype* restrict alpha, \ ctype* restrict a, \ ctype* restrict b, \ ctype* restrict beta, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict data, \ cntx_t* restrict cntx \ ) \ { \ const num_t dt_r = PASTEMAC(chr,type); \ \ PASTECH(chr,gemm_ukr_ft) \ rgemm_ukr = bli_cntx_get_l3_nat_ukr_dt( dt_r, BLIS_GEMM_UKR, cntx ); \ \ const dim_t mr = bli_cntx_get_blksz_def_dt( dt_r, BLIS_MR, cntx ); \ const dim_t nr = bli_cntx_get_blksz_def_dt( dt_r, BLIS_NR, cntx ); \ \ const dim_t m = mr; \ const dim_t n = nr; \ \ ctype_r ct_r[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype_r ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ ctype_r ct_i[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype_r ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ inc_t rs_ct; \ inc_t cs_ct; \ \ const inc_t is_a = bli_auxinfo_is_a( data ); \ const inc_t is_b = bli_auxinfo_is_b( data ); \ \ ctype_r* restrict a_r = ( ctype_r* )a; \ ctype_r* restrict a_i = ( ctype_r* )a + is_a; \ \ ctype_r* restrict b_r = ( ctype_r* )b; \ ctype_r* restrict b_i = ( ctype_r* )b + is_b; \ \ ctype_r* restrict zero_r = PASTEMAC(chr,0); \ \ ctype_r* restrict alpha_r = &PASTEMAC(ch,real)( *alpha ); \ ctype_r* restrict alpha_i = &PASTEMAC(ch,imag)( *alpha ); \ \ const ctype_r beta_r = PASTEMAC(ch,real)( *beta ); \ const ctype_r beta_i = PASTEMAC(ch,imag)( *beta ); \ \ ctype_r m_alpha_r = -PASTEMAC(ch,real)( *alpha ); \ \ const pack_t schema_b = bli_auxinfo_schema_b( data ); \ \ void* a_next = bli_auxinfo_next_a( data ); \ void* b_next = bli_auxinfo_next_b( data ); \ \ dim_t n_iter; \ dim_t n_elem; \ \ inc_t incc, ldc; \ inc_t incct, ldct; \ \ dim_t i, j; \ \ \ /* SAFETY CHECK: The higher level implementation should never allow an alpha with non-zero imaginary component to be passed in, because it can't be applied properly using the 4mb method. If alpha is not real, then something is very wrong. */ \ if ( !PASTEMAC(chr,eq0)( *alpha_i ) ) \ bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); \ \ \ /* An optimization: Set local strides and loop bounds based on the strides of c, so that (a) the micro-kernel accesses ct the same way it would if it were updating c directly, and (b) c is updated contiguously. For c with general stride, we access ct the same way we would as if it were column-stored. */ \ if ( bli_is_row_stored( rs_c, cs_c ) ) \ { \ rs_ct = n; n_iter = m; incc = cs_c; \ cs_ct = 1; n_elem = n; ldc = rs_c; \ } \ else /* column-stored or general stride */ \ { \ rs_ct = 1; n_iter = n; incc = rs_c; \ cs_ct = m; n_elem = m; ldc = cs_c; \ } \ incct = 1; \ ldct = n_elem; \ \ \ \ if ( bli_is_ro_packed( schema_b ) ) \ { \ /* The following gemm micro-kernel calls implement the first half of the 4mb method (which uses b_r): c = beta * c; c_r += a_r * b_r; c_i += a_i * b_r; NOTE: Scaling by alpha_r is not shown above, but is implemented below. */ \ \ bli_auxinfo_set_next_ab( a_i, b_r, data ); \ \ rgemm_ukr \ ( \ k, \ alpha_r, \ a_r, \ b_r, \ zero_r, \ ct_r, rs_ct, cs_ct, \ data, \ cntx \ ); \ \ bli_auxinfo_set_next_ab( a_next, b_next, data ); \ \ rgemm_ukr \ ( \ k, \ alpha_r, \ a_i, \ b_r, \ zero_r, \ ct_i, rs_ct, cs_ct, \ data, \ cntx \ ); \ } \ else /* if ( bli_is_io_packed( schema_b ) ) */ \ { \ /* The following gemm micro-kernel calls implement the second half of the 4mb method (which uses b_i): c_r += -a_i * b_i; c_i += a_r * b_i; NOTE: Scaling by alpha_r is not shown above, but is implemented below. */ \ \ bli_auxinfo_set_next_ab( a_i, b_i, data ); \ \ rgemm_ukr \ ( \ k, \ alpha_r, \ a_r, \ b_i, \ zero_r, \ ct_i, rs_ct, cs_ct, \ data, \ cntx \ ); \ \ bli_auxinfo_set_next_ab( a_next, b_next, data ); \ \ rgemm_ukr \ ( \ k, \ &m_alpha_r, \ a_i, \ b_i, \ zero_r, \ ct_r, rs_ct, cs_ct, \ data, \ cntx \ ); \ } \ \ \ \ /* How we accumulate the intermediate matrix product stored in ct_r and ct_i depends on (a) the schema of B, and (b) the value of beta. */ \ if ( bli_is_ro_packed( schema_b ) ) \ { \ if ( !PASTEMAC(chr,eq0)( beta_i ) ) \ { \ /* c = beta * c + ct; */ \ for ( j = 0; j < n_iter; ++j ) \ for ( i = 0; i < n_elem; ++i ) \ { \ const ctype_r gamma11t_r = *(ct_r + i*incct + j*ldct); \ const ctype_r gamma11t_i = *(ct_i + i*incct + j*ldct); \ ctype* restrict gamma11 = c + i*incc + j*ldc ; \ ctype_r* restrict gamma11_r = &PASTEMAC(ch,real)( *gamma11 ); \ ctype_r* restrict gamma11_i = &PASTEMAC(ch,imag)( *gamma11 ); \ \ PASTEMAC(ch,xpbyris)( gamma11t_r, \ gamma11t_i, \ beta_r, \ beta_i, \ *gamma11_r, \ *gamma11_i ); \ } \ } \ else if ( PASTEMAC(chr,eq1)( beta_r ) ) \ { \ /* c_r = c_r + ct_r; */ \ /* c_i = c_i + ct_i; */ \ for ( j = 0; j < n_iter; ++j ) \ for ( i = 0; i < n_elem; ++i ) \ { \ const ctype_r gamma11t_r = *(ct_r + i*incct + j*ldct); \ const ctype_r gamma11t_i = *(ct_i + i*incct + j*ldct); \ ctype* restrict gamma11 = c + i*incc + j*ldc ; \ ctype_r* restrict gamma11_r = &PASTEMAC(ch,real)( *gamma11 ); \ ctype_r* restrict gamma11_i = &PASTEMAC(ch,imag)( *gamma11 ); \ \ PASTEMAC(chr,adds)( gamma11t_r, *gamma11_r ); \ PASTEMAC(chr,adds)( gamma11t_i, *gamma11_i ); \ } \ } \ else if ( !PASTEMAC(chr,eq0)( beta_r ) ) \ { \ /* c_r = beta_r * c_r + ct_r; */ \ /* c_i = beta_r * c_i + ct_i; */ \ for ( j = 0; j < n_iter; ++j ) \ for ( i = 0; i < n_elem; ++i ) \ { \ const ctype_r gamma11t_r = *(ct_r + i*incct + j*ldct); \ const ctype_r gamma11t_i = *(ct_i + i*incct + j*ldct); \ ctype* restrict gamma11 = c + i*incc + j*ldc ; \ ctype_r* restrict gamma11_r = &PASTEMAC(ch,real)( *gamma11 ); \ ctype_r* restrict gamma11_i = &PASTEMAC(ch,imag)( *gamma11 ); \ \ PASTEMAC(chr,xpbys)( gamma11t_r, beta_r, *gamma11_r ); \ PASTEMAC(chr,xpbys)( gamma11t_i, beta_r, *gamma11_i ); \ } \ } \ else /* if PASTEMAC(chr,eq0)( beta_r ) */ \ { \ /* c_r = ct_r; */ \ /* c_i = ct_i; */ \ for ( j = 0; j < n_iter; ++j ) \ for ( i = 0; i < n_elem; ++i ) \ { \ const ctype_r gamma11t_r = *(ct_r + i*incct + j*ldct); \ const ctype_r gamma11t_i = *(ct_i + i*incct + j*ldct); \ ctype* restrict gamma11 = c + i*incc + j*ldc ; \ ctype_r* restrict gamma11_r = &PASTEMAC(ch,real)( *gamma11 ); \ ctype_r* restrict gamma11_i = &PASTEMAC(ch,imag)( *gamma11 ); \ \ PASTEMAC(chr,copys)( gamma11t_r, *gamma11_r ); \ PASTEMAC(chr,copys)( gamma11t_i, *gamma11_i ); \ } \ } \ } \ else /* if ( bli_is_io_packed( schema_b ) ) */ \ { \ /* NOTE: If this branch executes, it means we are in the second half of the 4mb computation in which we multiply the b_i sub-panel by the entire block of A. Here, we know that beta will either be equal to one (for interior cases within gemm macro-kernel), or zero (for edge cases). */ \ \ if ( PASTEMAC(chr,eq1)( beta_r ) ) \ { \ /* c_r = c_r + ct_r; */ \ /* c_i = c_i + ct_i; */ \ for ( j = 0; j < n_iter; ++j ) \ for ( i = 0; i < n_elem; ++i ) \ { \ const ctype_r gamma11t_r = *(ct_r + i*incct + j*ldct); \ const ctype_r gamma11t_i = *(ct_i + i*incct + j*ldct); \ ctype* restrict gamma11 = c + i*incc + j*ldc ; \ ctype_r* restrict gamma11_r = &PASTEMAC(ch,real)( *gamma11 ); \ ctype_r* restrict gamma11_i = &PASTEMAC(ch,imag)( *gamma11 ); \ \ PASTEMAC(chr,adds)( gamma11t_r, *gamma11_r ); \ PASTEMAC(chr,adds)( gamma11t_i, *gamma11_i ); \ } \ } \ else /* if PASTEMAC(chr,eq0)( beta_r ) */ \ { \ /* c_r = ct_r; */ \ /* c_i = ct_i; */ \ for ( j = 0; j < n_iter; ++j ) \ for ( i = 0; i < n_elem; ++i ) \ { \ const ctype_r gamma11t_r = *(ct_r + i*incct + j*ldct); \ const ctype_r gamma11t_i = *(ct_i + i*incct + j*ldct); \ ctype* restrict gamma11 = c + i*incc + j*ldc ; \ ctype_r* restrict gamma11_r = &PASTEMAC(ch,real)( *gamma11 ); \ ctype_r* restrict gamma11_i = &PASTEMAC(ch,imag)( *gamma11 ); \ \ PASTEMAC(chr,copys)( gamma11t_r, *gamma11_r ); \ PASTEMAC(chr,copys)( gamma11t_i, *gamma11_i ); \ } \ } \ } \ \ /*PASTEMAC(chr,fprintm)( stdout, "gemm4mb_ukr: b1_r", k, n, b_r, n, 1, "%4.1f", "" ); \ PASTEMAC(chr,fprintm)( stdout, "gemm4mb_ukr: b1_i", k, n, b_i, n, 1, "%4.1f", "" );*/ \ /*PASTEMAC(chr,fprintm)( stdout, "gemm4mb_ukr: a1_r", m, k, a_r, 1, m, "%4.1f", "" ); \ PASTEMAC(chr,fprintm)( stdout, "gemm4mb_ukr: a1_i", m, k, a_i, 1, m, "%4.1f", "" );*/ \ /*PASTEMAC(chr,fprintm)( stdout, "gemm4mb_ukr: ct_r", 8, 6, ct_r, rs_ct, cs_ct, "%4.1f", "" ); \ PASTEMAC(chr,fprintm)( stdout, "gemm4mb_ukr: ct_i", 8, 6, ct_i, rs_ct, cs_ct, "%4.1f", "" );*/ \ } INSERT_GENTFUNCCO_BASIC2( gemm4mb, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) blis-0.6.1/ref_kernels/ind/bli_gemm4mh_ref.c000066400000000000000000000224471360743507500207270ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, opname, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ dim_t k, \ ctype* restrict alpha, \ ctype* restrict a, \ ctype* restrict b, \ ctype* restrict beta, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict data, \ cntx_t* restrict cntx \ ) \ { \ const num_t dt_r = PASTEMAC(chr,type); \ \ PASTECH(chr,gemm_ukr_ft) \ rgemm_ukr = bli_cntx_get_l3_nat_ukr_dt( dt_r, BLIS_GEMM_UKR, cntx ); \ \ const dim_t mr = bli_cntx_get_blksz_def_dt( dt_r, BLIS_MR, cntx ); \ const dim_t nr = bli_cntx_get_blksz_def_dt( dt_r, BLIS_NR, cntx ); \ \ const dim_t m = mr; \ const dim_t n = nr; \ \ ctype_r ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype_r ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ inc_t rs_ct; \ inc_t cs_ct; \ \ ctype_r* restrict a_cast = ( ctype_r* )a; \ \ ctype_r* restrict b_cast = ( ctype_r* )b; \ \ ctype_r* restrict zero_r = PASTEMAC(chr,0); \ \ ctype_r* restrict alpha_r = &PASTEMAC(ch,real)( *alpha ); \ ctype_r* restrict alpha_i = &PASTEMAC(ch,imag)( *alpha ); \ \ const ctype_r beta_r = PASTEMAC(ch,real)( *beta ); \ const ctype_r beta_i = PASTEMAC(ch,imag)( *beta ); \ \ const pack_t schema_a = bli_auxinfo_schema_a( data ); \ const pack_t schema_b = bli_auxinfo_schema_b( data ); \ \ dim_t n_iter; \ dim_t n_elem; \ \ inc_t incc, ldc; \ inc_t incct, ldct; \ \ dim_t i, j; \ \ \ /* SAFETY CHECK: The higher level implementation should never allow an alpha with non-zero imaginary component to be passed in, because it can't be applied properly using the 4mh method. If alpha is not real, then something is very wrong. */ \ if ( !PASTEMAC(chr,eq0)( *alpha_i ) ) \ bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); \ \ \ /* An optimization: Set local strides and loop bounds based on the strides of c, so that (a) the micro-kernel accesses ct the same way it would if it were updating c directly, and (b) c is updated contiguously. For c with general stride, we access ct the same way we would as if it were column-stored. */ \ if ( bli_is_row_stored( rs_c, cs_c ) ) \ { \ rs_ct = n; n_iter = m; incc = cs_c; \ cs_ct = 1; n_elem = n; ldc = rs_c; \ } \ else /* column-stored or general stride */ \ { \ rs_ct = 1; n_iter = n; incc = rs_c; \ cs_ct = m; n_elem = m; ldc = cs_c; \ } \ incct = 1; \ ldct = n_elem; \ \ \ /* The following gemm micro-kernel call implement one "phase" of the 4m method: c = beta * c; c_r += a_r * b_r - a_i * b_i; c_i += a_r * b_i + a_i * b_r; NOTE: Scaling by alpha_r is not shown above, but is implemented below. */ \ \ \ /* ct = alpha_r * a * b; */ \ rgemm_ukr \ ( \ k, \ alpha_r, \ a_cast, \ b_cast, \ zero_r, \ ct, rs_ct, cs_ct, \ data, \ cntx \ ); \ \ \ /* How we accumulate the intermediate matrix product stored in ct depends on (a) the schemas of A and B, and (b) the value of beta. */ \ if ( bli_is_ro_packed( schema_a ) && \ bli_is_ro_packed( schema_b ) ) \ { \ if ( !PASTEMAC(chr,eq0)( beta_i ) ) \ { \ /* c = beta * c; c_r = c_r + ct; */ \ for ( j = 0; j < n_iter; ++j ) \ for ( i = 0; i < n_elem; ++i ) \ { \ const ctype_r gamma11t = *(ct + i*incct + j*ldct); \ ctype* restrict gamma11 = c + i*incc + j*ldc ; \ ctype_r* restrict gamma11_r = &PASTEMAC(ch,real)( *gamma11 ); \ \ PASTEMAC(ch,scals)( *beta, *gamma11 ); \ PASTEMAC(chr,adds)( gamma11t, *gamma11_r ); \ } \ } \ else if ( PASTEMAC(chr,eq1)( beta_r ) ) \ { \ /* c_r = c_r + ct; c_i = c_i; */ \ for ( j = 0; j < n_iter; ++j ) \ for ( i = 0; i < n_elem; ++i ) \ { \ const ctype_r gamma11t = *(ct + i*incct + j*ldct); \ ctype* restrict gamma11 = c + i*incc + j*ldc ; \ ctype_r* restrict gamma11_r = &PASTEMAC(ch,real)( *gamma11 ); \ \ PASTEMAC(chr,adds)( gamma11t, *gamma11_r ); \ } \ } \ else if ( !PASTEMAC(chr,eq0)( beta_r ) ) \ { \ /* c_r = beta_r * c_r + ct; c_i = beta_r * c_i; */ \ for ( j = 0; j < n_iter; ++j ) \ for ( i = 0; i < n_elem; ++i ) \ { \ const ctype_r gamma11t = *(ct + i*incct + j*ldct); \ ctype* restrict gamma11 = c + i*incc + j*ldc ; \ ctype_r* restrict gamma11_r = &PASTEMAC(ch,real)( *gamma11 ); \ ctype_r* restrict gamma11_i = &PASTEMAC(ch,imag)( *gamma11 ); \ \ PASTEMAC(chr,xpbys)( gamma11t, beta_r, *gamma11_r ); \ PASTEMAC(chr,scals)( beta_r, *gamma11_i ); \ } \ } \ else /* if PASTEMAC(chr,eq0)( beta_r ) */ \ { \ /* c_r = ct; c_i = 0; */ \ for ( j = 0; j < n_iter; ++j ) \ for ( i = 0; i < n_elem; ++i ) \ { \ const ctype_r gamma11t = *(ct + i*incct + j*ldct); \ ctype* restrict gamma11 = c + i*incc + j*ldc ; \ ctype_r* restrict gamma11_r = &PASTEMAC(ch,real)( *gamma11 ); \ ctype_r* restrict gamma11_i = &PASTEMAC(ch,imag)( *gamma11 ); \ \ PASTEMAC(chr,copys)( gamma11t, *gamma11_r ); \ PASTEMAC(chr,set0s)( *gamma11_i ); \ } \ } \ } \ else if ( ( bli_is_ro_packed( schema_a ) && \ bli_is_io_packed( schema_b ) ) || \ ( bli_is_io_packed( schema_a ) && \ bli_is_ro_packed( schema_b ) ) \ ) \ { \ if ( PASTEMAC(chr,eq1)( beta_r ) ) \ { \ /* c_r = c_r + 0; c_i = c_i + ct; */ \ for ( j = 0; j < n_iter; ++j ) \ for ( i = 0; i < n_elem; ++i ) \ { \ const ctype_r gamma11t = *(ct + i*incct + j*ldct); \ ctype* restrict gamma11 = c + i*incc + j*ldc ; \ ctype_r* restrict gamma11_i = &PASTEMAC(ch,imag)( *gamma11 ); \ \ PASTEMAC(chr,adds)( gamma11t, *gamma11_i ); \ } \ } \ else /* if PASTEMAC(chr,eq0)( beta_r ) */ \ { \ /* c_r = 0; c_i = ct; */ \ for ( j = 0; j < n_iter; ++j ) \ for ( i = 0; i < n_elem; ++i ) \ { \ const ctype_r gamma11t = *(ct + i*incct + j*ldct); \ ctype* restrict gamma11 = c + i*incc + j*ldc ; \ ctype_r* restrict gamma11_r = &PASTEMAC(ch,real)( *gamma11 ); \ ctype_r* restrict gamma11_i = &PASTEMAC(ch,imag)( *gamma11 ); \ \ PASTEMAC(chr,set0s)( *gamma11_r ); \ PASTEMAC(chr,copys)( gamma11t, *gamma11_i ); \ } \ } \ } \ else /* if ( bli_is_io_packed( schema_a ) && \ bli_is_io_packed( schema_b ) ) */ \ { \ if ( PASTEMAC(chr,eq1)( beta_r ) ) \ { \ /* c_r = c_r - ct; c_i = c_i + 0; */ \ for ( j = 0; j < n_iter; ++j ) \ for ( i = 0; i < n_elem; ++i ) \ { \ const ctype_r gamma11t = *(ct + i*incct + j*ldct); \ ctype* restrict gamma11 = c + i*incc + j*ldc ; \ ctype_r* restrict gamma11_r = &PASTEMAC(ch,real)( *gamma11 ); \ \ PASTEMAC(chr,subs)( gamma11t, *gamma11_r ); \ } \ } \ else /* if PASTEMAC(chr,eq0)( beta_r ) */ \ { \ /* c_r = -ct; c_i = 0; */ \ for ( j = 0; j < n_iter; ++j ) \ for ( i = 0; i < n_elem; ++i ) \ { \ const ctype_r gamma11t = *(ct + i*incct + j*ldct); \ ctype* restrict gamma11 = c + i*incc + j*ldc ; \ ctype_r* restrict gamma11_r = &PASTEMAC(ch,real)( *gamma11 ); \ ctype_r* restrict gamma11_i = &PASTEMAC(ch,imag)( *gamma11 ); \ \ PASTEMAC(chr,copys)( -gamma11t, *gamma11_r ); \ PASTEMAC(chr,set0s)( *gamma11_i ); \ } \ } \ } \ } INSERT_GENTFUNCCO_BASIC2( gemm4mh, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) blis-0.6.1/ref_kernels/ind/bli_gemmtrsm1m_ref.c000066400000000000000000000175351360743507500214640ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, opname, arch, suf, trsmkerid ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ dim_t k, \ ctype* restrict alpha, \ ctype* restrict a1x, \ ctype* restrict a11, \ ctype* restrict bx1, \ ctype* restrict b11, \ ctype* restrict c11, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict data, \ cntx_t* restrict cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ const num_t dt_r = PASTEMAC(chr,type); \ \ PASTECH(chr,gemm_ukr_ft) \ rgemm_ukr = bli_cntx_get_l3_nat_ukr_dt( dt_r, BLIS_GEMM_UKR, cntx ); \ \ PASTECH(ch,trsm_ukr_ft) \ ctrsm_vir_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, trsmkerid, cntx ); \ \ const bool_t col_pref = bli_cntx_l3_nat_ukr_prefers_cols_dt( dt_r, BLIS_GEMM_UKR, cntx ); \ \ const dim_t mr = bli_cntx_get_blksz_def_dt( dt, BLIS_MR, cntx ); \ const dim_t nr = bli_cntx_get_blksz_def_dt( dt, BLIS_NR, cntx ); \ \ const dim_t mr_r = bli_cntx_get_blksz_def_dt( dt_r, BLIS_MR, cntx ); \ const dim_t nr_r = bli_cntx_get_blksz_def_dt( dt_r, BLIS_NR, cntx ); \ \ ctype bt[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ inc_t rs_bt; \ inc_t cs_bt; \ \ inc_t rs_bt_r; \ inc_t cs_bt_r; \ \ const dim_t packnr = bli_cntx_get_blksz_max_dt( dt, BLIS_NR, cntx ); \ \ const pack_t schema_b = bli_cntx_schema_b_panel( cntx ); \ \ const dim_t k2 = 2 * k; \ \ ctype_r* restrict a1x_r = ( ctype_r* )a1x; \ \ ctype_r* restrict bx1_r = ( ctype_r* )bx1; \ \ const inc_t rs_b = packnr; \ const inc_t cs_b = 1; \ \ ctype_r* restrict zero_r = PASTEMAC(chr,0); \ ctype_r* restrict minus_one_r = PASTEMAC(chr,m1); \ \ const ctype_r alpha_r = PASTEMAC(ch,real)( *alpha ); \ const ctype_r alpha_i = PASTEMAC(ch,imag)( *alpha ); \ \ ctype_r* b_use; \ inc_t rs_b_use; \ inc_t cs_b_use; \ \ \ /* Handle alphas with non-zero imaginary components. */ \ /* NOTE: This branch should never execute because alphas with non-zero imaginary components should be applied during packing, and so the only alphas we should see here are those exclusively in the real domain, either because the value originally had no imaginary compoent (e.g. 4.0) or because a 1.0 was sent in as a placeholder since the alpha was applied during packing. */ \ if ( 0 ) \ if ( !PASTEMAC(chr,eq0)( alpha_i ) ) \ { \ bli_abort(); \ \ /* ctype_r* restrict one_r = PASTEMAC(chr,1); \ \ const inc_t ld_b = rs_b; \ \ PASTEMAC(ch,scal1ms_mxn)( schema_b, \ mr, \ nr, \ alpha, \ b11, rs_b, cs_b, ld_b ); \ \ alpha_r = *one_r; \ */ \ } \ \ \ { \ /* Set the strides for the temporary bt matrix based on the native real domain micro-kernel storage preferences. */ \ if ( col_pref ) { rs_bt = 1; cs_bt = mr; \ rs_bt_r = 1; cs_bt_r = mr_r; } \ else { rs_bt = nr; cs_bt = 1; \ rs_bt_r = nr_r; cs_bt_r = 1; } \ \ b_use = ( ctype_r* )bt; \ rs_b_use = rs_bt_r; \ cs_b_use = cs_bt_r; \ } \ \ \ /* Since b11 is stored in the 1e or 1r schema, we cannot update it directly, and instead must compute the matrix product in a local temporary microtile and then accumulate it into b11 according to its schema. */ \ \ \ /* lower: bt = -1.0 * a10 * b01; upper: bt = -1.0 * a12 * b21; */ \ rgemm_ukr \ ( \ k2, \ minus_one_r, \ a1x_r, \ bx1_r, \ zero_r, \ b_use, rs_b_use, cs_b_use, \ data, \ cntx \ ); \ \ \ if ( bli_is_1e_packed( schema_b ) ) \ { \ const inc_t ld_b = rs_b; \ \ ctype* restrict b11_ri = ( ctype* )b11; \ ctype* restrict b11_ir = ( ctype* )b11 + ld_b/2; \ \ dim_t i, j; \ \ /* b11 = alpha * b11 + bt; */ \ for ( j = 0; j < nr; ++j ) \ for ( i = 0; i < mr; ++i ) \ { \ ctype* restrict beta11t = bt + i*rs_bt + j*cs_bt; \ ctype_r* restrict beta11t_r = &PASTEMAC(ch,real)( *beta11t ); \ ctype_r* restrict beta11t_i = &PASTEMAC(ch,imag)( *beta11t ); \ ctype* restrict beta11_ri = b11_ri + i*rs_b + j*cs_b; \ ctype_r* restrict beta11_r = &PASTEMAC(ch,real)( *beta11_ri ); \ ctype_r* restrict beta11_i = &PASTEMAC(ch,imag)( *beta11_ri ); \ ctype* restrict beta11_ir = b11_ir + i*rs_b + j*cs_b; \ \ PASTEMAC3(ch,chr,ch,xpbyris) \ ( \ *beta11t_r, \ *beta11t_i, \ alpha_r, \ alpha_i, /* alpha_i not referenced */ \ *beta11_r, \ *beta11_i \ ); \ \ PASTEMAC(ch,sets)( -*beta11_i, \ *beta11_r, *beta11_ir ); \ } \ } \ else /* if ( bli_is_1r_packed( schema_b ) ) */ \ { \ const inc_t ld_b = rs_b; \ const inc_t rs_b2 = 2 * rs_b; \ const inc_t cs_b2 = cs_b; \ \ ctype_r* restrict b11_r = ( ctype_r* )b11; \ ctype_r* restrict b11_i = ( ctype_r* )b11 + ld_b; \ \ dim_t i, j; \ \ /* b11 = alpha * b11 + bt; */ \ for ( j = 0; j < nr; ++j ) \ for ( i = 0; i < mr; ++i ) \ { \ ctype* restrict beta11t = bt + i*rs_bt + j*cs_bt; \ ctype_r* restrict beta11t_r = &PASTEMAC(ch,real)( *beta11t ); \ ctype_r* restrict beta11t_i = &PASTEMAC(ch,imag)( *beta11t ); \ ctype_r* restrict beta11_r = b11_r + i*rs_b2 + j*cs_b2; \ ctype_r* restrict beta11_i = b11_i + i*rs_b2 + j*cs_b2; \ \ PASTEMAC3(ch,chr,ch,xpbyris) \ ( \ *beta11t_r, \ *beta11t_i, \ alpha_r, \ alpha_i, /* alpha_i not referenced */ \ *beta11_r, \ *beta11_i \ ); \ } \ } \ \ \ /* b11 = inv(a11) * b11; c11 = b11; */ \ ctrsm_vir_ukr \ ( \ a11, \ b11, \ c11, rs_c, cs_c, \ data, \ cntx \ ); \ } INSERT_GENTFUNCCO_BASIC3( gemmtrsm1m_l, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX, BLIS_TRSM_L_UKR ) INSERT_GENTFUNCCO_BASIC3( gemmtrsm1m_u, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX, BLIS_TRSM_U_UKR ) blis-0.6.1/ref_kernels/ind/bli_gemmtrsm3m1_ref.c000066400000000000000000000205161360743507500215400ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, opname, arch, suf, trsmkerid ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ dim_t k, \ ctype* restrict alpha, \ ctype* restrict a1x, \ ctype* restrict a11, \ ctype* restrict bx1, \ ctype* restrict b11, \ ctype* restrict c11, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict data, \ cntx_t* restrict cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ const num_t dt_r = PASTEMAC(chr,type); \ \ PASTECH(chr,gemm_ukr_ft) \ rgemm_ukr = bli_cntx_get_l3_nat_ukr_dt( dt_r, BLIS_GEMM_UKR, cntx ); \ \ PASTECH(ch,trsm_ukr_ft) \ ctrsm_vir_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, trsmkerid, cntx ); \ \ const dim_t mr = bli_cntx_get_blksz_def_dt( dt_r, BLIS_MR, cntx ); \ const dim_t nr = bli_cntx_get_blksz_def_dt( dt_r, BLIS_NR, cntx ); \ \ const dim_t packnr = bli_cntx_get_blksz_max_dt( dt_r, BLIS_NR, cntx ); \ \ const dim_t m = mr; \ const dim_t n = nr; \ \ ctype_r ab_r[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype_r ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ ctype_r ab_i[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype_r ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const inc_t rs_ab = 1; \ const inc_t cs_ab = mr; \ \ const inc_t is_a = bli_auxinfo_is_a( data ); \ const inc_t is_b = bli_auxinfo_is_b( data ); \ \ ctype_r* restrict a1x_r = ( ctype_r* )a1x; \ ctype_r* restrict a1x_i = ( ctype_r* )a1x + is_a; \ ctype_r* restrict a1x_ri = ( ctype_r* )a1x + 2*is_a; \ \ ctype_r* restrict bx1_r = ( ctype_r* )bx1; \ ctype_r* restrict bx1_i = ( ctype_r* )bx1 + is_b; \ ctype_r* restrict bx1_ri = ( ctype_r* )bx1 + 2*is_b; \ \ ctype_r* restrict b11_r = ( ctype_r* )b11; \ ctype_r* restrict b11_i = ( ctype_r* )b11 + is_b; \ ctype_r* restrict b11_ri = ( ctype_r* )b11 + 2*is_b; \ \ const inc_t rs_b = packnr; \ const inc_t cs_b = 1; \ \ ctype_r* restrict one_r = PASTEMAC(chr,1); \ ctype_r* restrict zero_r = PASTEMAC(chr,0); \ ctype_r* restrict minus_one_r = PASTEMAC(chr,m1); \ \ ctype_r alpha_r = PASTEMAC(ch,real)( *alpha ); \ ctype_r alpha_i = PASTEMAC(ch,imag)( *alpha ); \ \ void* a_next = bli_auxinfo_next_a( data ); \ void* b_next = bli_auxinfo_next_b( data ); \ \ dim_t i, j; \ \ \ /* Copy the contents of c to a temporary buffer ct. */ \ if ( !PASTEMAC(chr,eq0)( alpha_i ) ) \ { \ /* We can handle a non-zero imaginary component on alpha, but to do so we have to manually scale b and then use alpha == 1 for the micro-kernel calls. */ \ for ( i = 0; i < m; ++i ) \ for ( j = 0; j < n; ++j ) \ PASTEMAC(ch,scalris)( alpha_r, \ alpha_i, \ *(b11_r + i*rs_b + j*cs_b), \ *(b11_i + i*rs_b + j*cs_b) ); \ \ /* Use alpha.r == 1.0. */ \ alpha_r = *one_r; \ } \ \ \ /* lower: b11.r = alpha.r * b11.r - ( + a10.r * b01.r - a10.i * b01.i ); b11.i = alpha.r * b11.i - ( a10.ri * b01.ri - a10.r * b01.r - a10.i * b01.i ); upper: b11.r = alpha.r * b11.r - ( + a12.r * b21.r - a12.i * b21.i ); b11.i = alpha.r * b11.i - ( a12.ri * b21.ri - a12.r * b21.r - a12.i * b21.i ); */ \ \ bli_auxinfo_set_next_ab( a1x_i, bx1_i, data ); \ \ /* lower: ab.r = a10.r * b01.r; upper: ab.r = a12.r * b21.r; */ \ rgemm_ukr \ ( \ k, \ one_r, \ a1x_r, \ bx1_r, \ zero_r, \ ab_r, rs_ab, cs_ab, \ data, \ cntx \ ); \ \ bli_auxinfo_set_next_ab( a1x_ri, bx1_ri, data ); \ \ /* lower: ab.i = a10.i * b01.i; upper: ab.i = a12.i * b21.i; */ \ rgemm_ukr \ ( \ k, \ one_r, \ a1x_i, \ bx1_i, \ zero_r, \ ab_i, rs_ab, cs_ab, \ data, \ cntx \ ); \ \ bli_auxinfo_set_next_ab( a_next, b_next, data ); \ \ /* lower: b11.i = alpha.r * b11.i - a12.ri * b21.ri; upper: b11.i = alpha.r * b11.i - a12.ri * b21.ri; */ \ rgemm_ukr \ ( \ k, \ minus_one_r, \ a1x_ri, \ bx1_ri, \ &alpha_r, \ b11_i, rs_b, cs_b, \ data, \ cntx \ ); \ \ \ /* b11.r = alpha.r * b11.r - ab.r; b11.r = b11.r + ab.i; b11.i = b11.i + ab.r; b11.i = b11.i + ab.i; */ \ for ( i = 0; i < m; ++i ) \ for ( j = 0; j < n; ++j ) \ { \ ctype_r alphabeta_r = *(ab_r + i*rs_ab + j*cs_ab); \ ctype_r alphabeta_i = *(ab_i + i*rs_ab + j*cs_ab); \ ctype_r beta11_r = *(b11_r + i*rs_b + j*cs_b); \ ctype_r beta11_i = *(b11_i + i*rs_b + j*cs_b); \ \ PASTEMAC(chr,scals)( alpha_r, beta11_r ); \ \ PASTEMAC(chr,subs)( alphabeta_r, beta11_r ); \ PASTEMAC(chr,adds)( alphabeta_i, beta11_r ); \ PASTEMAC(chr,adds)( alphabeta_r, beta11_i ); \ PASTEMAC(chr,adds)( alphabeta_i, beta11_i ); \ \ /* Store the local values back to b11. */ \ PASTEMAC(ch,copyris)( beta11_r, \ beta11_i, \ *(b11_r + i*rs_b + j*cs_b), \ *(b11_i + i*rs_b + j*cs_b) ); \ \ /* Update the ri part of b11. */ \ PASTEMAC(chr,add3s)( beta11_r, \ beta11_i, \ *(b11_ri + i*rs_b + j*cs_b) ); \ } \ \ \ /* b11 = inv(a11) * b11; c11 = b11; */ \ ctrsm_vir_ukr \ ( \ a11, \ b11, \ c11, rs_c, cs_c, \ data, \ cntx \ ); \ \ \ /* PASTEMAC(chr,fprintm)( stdout, "gemmtrsm3m1_l_ukr: b11_r after", m, n, \ b11_r, PASTEMAC(chr,packnr), 1, "%4.1f", "" ); \ PASTEMAC(chr,fprintm)( stdout, "gemmtrsm3m1_l_ukr: b11_i after", m, n, \ b11_i, PASTEMAC(chr,packnr), 1, "%4.1f", "" ); \ */ \ /* PASTEMAC(chr,fprintm)( stdout, "gemmtrsm3m1_l_ukr: b01_r", k, n, \ b01_r, PASTEMAC(chr,packnr), 1, "%4.1f", "" ); \ PASTEMAC(chr,fprintm)( stdout, "gemmtrsm3m1_l_ukr: b01_i", k, n, \ b01_i, PASTEMAC(chr,packnr), 1, "%4.1f", "" ); \ PASTEMAC(chr,fprintm)( stdout, "gemmtrsm3m1_l_ukr: b11_r", m, n, \ b11_r, PASTEMAC(chr,packnr), 1, "%4.1f", "" ); \ PASTEMAC(chr,fprintm)( stdout, "gemmtrsm3m1_l_ukr: b11_i", m, n, \ b11_i, PASTEMAC(chr,packnr), 1, "%4.1f", "" ); \ */ \ } INSERT_GENTFUNCCO_BASIC3( gemmtrsm3m1_l, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX, BLIS_TRSM_L_UKR ) INSERT_GENTFUNCCO_BASIC3( gemmtrsm3m1_u, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX, BLIS_TRSM_U_UKR ) blis-0.6.1/ref_kernels/ind/bli_gemmtrsm4m1_ref.c000066400000000000000000000172511360743507500215430ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, opname, arch, suf, trsmkerid ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ dim_t k, \ ctype* restrict alpha, \ ctype* restrict a1x, \ ctype* restrict a11, \ ctype* restrict bx1, \ ctype* restrict b11, \ ctype* restrict c11, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict data, \ cntx_t* restrict cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ const num_t dt_r = PASTEMAC(chr,type); \ \ PASTECH(chr,gemm_ukr_ft) \ rgemm_ukr = bli_cntx_get_l3_nat_ukr_dt( dt_r, BLIS_GEMM_UKR, cntx ); \ \ PASTECH(ch,trsm_ukr_ft) \ ctrsm_vir_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, trsmkerid, cntx ); \ \ const dim_t mr = bli_cntx_get_blksz_def_dt( dt_r, BLIS_MR, cntx ); \ const dim_t nr = bli_cntx_get_blksz_def_dt( dt_r, BLIS_NR, cntx ); \ \ const dim_t packnr = bli_cntx_get_blksz_max_dt( dt_r, BLIS_NR, cntx ); \ \ const dim_t m = mr; \ const dim_t n = nr; \ \ const inc_t is_a = bli_auxinfo_is_a( data ); \ const inc_t is_b = bli_auxinfo_is_b( data ); \ \ ctype_r* restrict a1x_r = ( ctype_r* )a1x; \ ctype_r* restrict a1x_i = ( ctype_r* )a1x + is_a; \ \ ctype_r* restrict bx1_r = ( ctype_r* )bx1; \ ctype_r* restrict bx1_i = ( ctype_r* )bx1 + is_b; \ \ ctype_r* restrict b11_r = ( ctype_r* )b11; \ ctype_r* restrict b11_i = ( ctype_r* )b11 + is_b; \ \ const inc_t rs_b = packnr; \ const inc_t cs_b = 1; \ \ ctype_r* restrict one_r = PASTEMAC(chr,1); \ ctype_r* restrict minus_one_r = PASTEMAC(chr,m1); \ \ /* A hack to avoid a 'restrict' warning triggered by passing in the same address (one_r) for both alpha and beta when calling the last of the four matrix products. We now use one_r for alpha and this new local variable, onel, for beta. (See issue #328.) */ \ ctype_r onel; \ ctype_r* restrict onel_r = &onel; \ PASTEMAC(chr,set1s)( onel ); \ \ ctype_r alpha_r = PASTEMAC(ch,real)( *alpha ); \ ctype_r alpha_i = PASTEMAC(ch,imag)( *alpha ); \ \ void* a_next = bli_auxinfo_next_a( data ); \ void* b_next = bli_auxinfo_next_b( data ); \ \ dim_t i, j; \ \ /* printf( "gemmtrsm4m1_l_ukr: is_a = %lu is_b = %lu\n", is_a, is_b ); \ PASTEMAC(chr,fprintm)( stdout, "gemmtrsm4m1_l_ukr: a1x11p_r", m, k+m, \ a1x_r, 1, PASTEMAC(chr,packmr), "%4.1f", "" ); \ PASTEMAC(chr,fprintm)( stdout, "gemmtrsm4m1_l_ukr: a1x11p_i", m, k+m, \ a1x_i, 1, PASTEMAC(chr,packmr), "%4.1f", "" ); \ PASTEMAC(chr,fprintm)( stdout, "gemmtrsm4m1_l_ukr: bx111p_r", k+m, n, \ bx1_r, PASTEMAC(chr,packnr), 1, "%4.1f", "" ); \ PASTEMAC(chr,fprintm)( stdout, "gemmtrsm4m1_l_ukr: bx111p_i", k+m, n, \ bx1_i, PASTEMAC(chr,packnr), 1, "%4.1f", "" ); \ */ \ \ /* Copy the contents of c to a temporary buffer ct. */ \ if ( !PASTEMAC(chr,eq0)( alpha_i ) ) \ { \ /* We can handle a non-zero imaginary component on alpha, but to do so we have to manually scale b and then use alpha == 1 for the micro-kernel calls. */ \ for ( i = 0; i < m; ++i ) \ for ( j = 0; j < n; ++j ) \ PASTEMAC(ch,scalris)( alpha_r, \ alpha_i, \ *(b11_r + i*rs_b + j*cs_b), \ *(b11_i + i*rs_b + j*cs_b) ); \ \ /* Use alpha.r == 1.0. */ \ alpha_r = *one_r; \ } \ \ \ /* lower: b11.r = alpha.r * b11.r - ( a10.r * b01.r - a10.i * b01.i ); b11.i = alpha.r * b11.i - ( a10.r * b01.i + a10.i * b01.r ); upper: b11.r = alpha.r * b11.r - ( a12.r * b21.r - a12.i * b21.i ); b11.i = alpha.r * b11.i - ( a12.r * b21.i + a12.i * b21.r ); */ \ \ bli_auxinfo_set_next_ab( a1x_r, bx1_i, data ); \ \ /* lower: b11.r = alpha.r * b11.r - a10.r * b01.r; upper: b11.r = alpha.r * b11.r - a12.r * b21.r; */ \ rgemm_ukr \ ( \ k, \ minus_one_r, \ a1x_r, \ bx1_r, \ &alpha_r, \ b11_r, rs_b, cs_b, \ data, \ cntx \ ); \ \ bli_auxinfo_set_next_ab( a1x_i, bx1_r, data ); \ \ /* lower: b11.i = alpha.r * b11.i - a10.r * b01.i; upper: b11.i = alpha.r * b11.i - a12.r * b21.i; */ \ rgemm_ukr \ ( \ k, \ minus_one_r, \ a1x_r, \ bx1_i, \ &alpha_r, \ b11_i, rs_b, cs_b, \ data, \ cntx \ ); \ \ bli_auxinfo_set_next_ab( a1x_i, bx1_i, data ); \ \ /* lower: b11.i = 1.0 * b11.i - a10.i * b01.r; upper: b11.i = 1.0 * b11.i - a12.i * b21.r; */ \ rgemm_ukr \ ( \ k, \ minus_one_r, \ a1x_i, \ bx1_r, \ one_r, \ b11_i, rs_b, cs_b, \ data, \ cntx \ ); \ \ bli_auxinfo_set_next_ab( a_next, b_next, data ); \ \ /* lower: b11.r = 1.0 * b11.r + a10.i * b01.i; upper: b11.r = 1.0 * b11.r + a12.i * b21.i; */ \ rgemm_ukr \ ( \ k, \ one_r, \ a1x_i, \ bx1_i, \ onel_r, \ b11_r, rs_b, cs_b, \ data, \ cntx \ ); \ /* PASTEMAC(chr,fprintm)( stdout, "gemmtrsm4m1_l_ukr: bx111p_r post-gemm", k+m, n, \ bx1_r, PASTEMAC(chr,packnr), 1, "%4.1f", "" ); \ PASTEMAC(chr,fprintm)( stdout, "gemmtrsm4m1_l_ukr: bx111p_i post-gemm", k+m, n, \ bx1_i, PASTEMAC(chr,packnr), 1, "%4.1f", "" ); \ */ \ \ /* b11 = inv(a11) * b11; c11 = b11; */ \ ctrsm_vir_ukr \ ( \ a11, \ b11, \ c11, rs_c, cs_c, \ data, \ cntx \ ); \ \ /* PASTEMAC(chr,fprintm)( stdout, "gemmtrsm4m1_l_ukr: bx111p_r after", k+m, n, \ bx1_r, PASTEMAC(chr,packnr), 1, "%4.1f", "" ); \ PASTEMAC(chr,fprintm)( stdout, "gemmtrsm4m1_l_ukr: bx111p_i after", k+m, n, \ bx1_i, PASTEMAC(chr,packnr), 1, "%4.1f", "" ); \ */ \ } INSERT_GENTFUNCCO_BASIC3( gemmtrsm4m1_l, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX, BLIS_TRSM_L_UKR ) INSERT_GENTFUNCCO_BASIC3( gemmtrsm4m1_u, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX, BLIS_TRSM_U_UKR ) blis-0.6.1/ref_kernels/ind/bli_trsm1m_ref.c000066400000000000000000000411241360743507500206050ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, opname, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ ctype* restrict a, \ ctype* restrict b, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict data, \ cntx_t* restrict cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ const dim_t mr = bli_cntx_get_blksz_def_dt( dt, BLIS_MR, cntx ); \ const dim_t nr = bli_cntx_get_blksz_def_dt( dt, BLIS_NR, cntx ); \ \ const inc_t packmr = bli_cntx_get_blksz_max_dt( dt, BLIS_MR, cntx ); \ const inc_t packnr = bli_cntx_get_blksz_max_dt( dt, BLIS_NR, cntx ); \ \ const dim_t m = mr; \ const dim_t n = nr; \ \ const inc_t rs_a = 1; \ const inc_t cs_a = packmr; \ \ const inc_t rs_b = packnr; \ const inc_t cs_b = 1; \ \ const inc_t ld_a = cs_a; \ const inc_t ld_b = rs_b; \ \ const pack_t schema_b = bli_cntx_schema_b_panel( cntx ); \ \ dim_t iter, i, j, l; \ dim_t n_behind; \ \ \ if ( bli_is_1e_packed( schema_b ) ) \ { \ const inc_t rs_a2 = 1 * rs_a; \ const inc_t cs_a2 = 2 * cs_a; \ \ ctype_r* restrict a_r = ( ctype_r* )a; \ ctype_r* restrict a_i = ( ctype_r* )a + ld_a; \ \ ctype* restrict b_ri = ( ctype* )b; \ ctype* restrict b_ir = ( ctype* )b + ld_b/2; \ \ for ( iter = 0; iter < m; ++iter ) \ { \ i = iter; \ n_behind = i; \ \ ctype_r* restrict alpha11_r = a_r + (i )*rs_a2 + (i )*cs_a2; \ ctype_r* restrict alpha11_i = a_i + (i )*rs_a2 + (i )*cs_a2; \ ctype_r* restrict a10t_r = a_r + (i )*rs_a2 + (0 )*cs_a2; \ ctype_r* restrict a10t_i = a_i + (i )*rs_a2 + (0 )*cs_a2; \ ctype* restrict b1_ri = b_ri + (i )*rs_b + (0 )*cs_b; \ ctype* restrict b1_ir = b_ir + (i )*rs_b + (0 )*cs_b; \ ctype* restrict B0_ri = b_ri + (0 )*rs_b + (0 )*cs_b; \ \ /* b1 = b1 - a10t * B0; */ \ /* b1 = b1 / alpha11; */ \ for ( j = 0; j < n; ++j ) \ { \ ctype* restrict beta11_ri = b1_ri + (0 )*rs_b + (j )*cs_b; \ ctype* restrict beta11_ir = b1_ir + (0 )*rs_b + (j )*cs_b; \ ctype* restrict b01_ri = B0_ri + (0 )*rs_b + (j )*cs_b; \ ctype* restrict gamma11 = c + (i )*rs_c + (j )*cs_c; \ ctype_r beta11c_r = PASTEMAC(ch,real)( *beta11_ri ); \ ctype_r beta11c_i = PASTEMAC(ch,imag)( *beta11_ri ); \ ctype_r rho11_r; \ ctype_r rho11_i; \ \ /* beta11 = beta11 - a10t * b01; */ \ PASTEMAC(ch,set0ris)( rho11_r, \ rho11_i ); \ for ( l = 0; l < n_behind; ++l ) \ { \ ctype_r* restrict alpha10_r = a10t_r + (l )*cs_a2; \ ctype_r* restrict alpha10_i = a10t_i + (l )*cs_a2; \ ctype* restrict beta01_ri = b01_ri + (l )*rs_b; \ ctype_r* restrict beta01_r = &PASTEMAC(ch,real)( *beta01_ri ); \ ctype_r* restrict beta01_i = &PASTEMAC(ch,imag)( *beta01_ri ); \ \ PASTEMAC(ch,axpyris)( *alpha10_r, \ *alpha10_i, \ *beta01_r, \ *beta01_i, \ rho11_r, \ rho11_i ); \ } \ PASTEMAC(ch,subris)( rho11_r, \ rho11_i, \ beta11c_r, \ beta11c_i ); \ \ /* beta11 = beta11 / alpha11; */ \ /* NOTE: The INVERSE of alpha11 (1.0/alpha11) is stored instead of alpha11, so we can multiply rather than divide. We store the inverse of alpha11 intentionally to avoid expensive division instructions within the micro-kernel. */ \ PASTEMAC(ch,scalris)( *alpha11_r, \ *alpha11_i, \ beta11c_r, \ beta11c_i ); \ \ /* Output final result to matrix c. */ \ PASTEMAC(ch,sets)( beta11c_r, beta11c_i, *gamma11 ); \ \ /* Store the local values back to b11. */ \ PASTEMAC(ch,sets)( beta11c_r, beta11c_i, *beta11_ri ); \ PASTEMAC(ch,sets)( -beta11c_i, beta11c_r, *beta11_ir ); \ } \ } \ } \ else /* ( bli_is_1r_packed( schema_b ) ) */ \ { \ const inc_t rs_b2 = 2 * rs_b; \ const inc_t cs_b2 = 1 * cs_b; \ \ ctype* restrict a_ri = ( ctype* )a; \ /*ctype* restrict a_ir = ( ctype* )a + ld_a/2;*/ \ \ ctype_r* restrict b_r = ( ctype_r* )b; \ ctype_r* restrict b_i = ( ctype_r* )b + ld_b; \ \ for ( iter = 0; iter < m; ++iter ) \ { \ i = iter; \ n_behind = i; \ \ ctype* restrict alpha11_ri = a_ri + (i )*rs_a + (i )*cs_a; \ ctype_r* restrict alpha11_r = &PASTEMAC(ch,real)( *alpha11_ri ); \ ctype_r* restrict alpha11_i = &PASTEMAC(ch,imag)( *alpha11_ri ); \ ctype* restrict a10t_ri = a_ri + (i )*rs_a + (0 )*cs_a; \ ctype_r* restrict b1_r = b_r + (i )*rs_b2 + (0 )*cs_b2; \ ctype_r* restrict b1_i = b_i + (i )*rs_b2 + (0 )*cs_b2; \ ctype_r* restrict B0_r = b_r + (0 )*rs_b2 + (0 )*cs_b2; \ ctype_r* restrict B0_i = b_i + (0 )*rs_b2 + (0 )*cs_b2; \ \ /* b1 = b1 - a10t * B0; */ \ /* b1 = b1 / alpha11; */ \ for ( j = 0; j < n; ++j ) \ { \ ctype_r* restrict beta11_r = b1_r + (0 )*rs_b2 + (j )*cs_b2; \ ctype_r* restrict beta11_i = b1_i + (0 )*rs_b2 + (j )*cs_b2; \ ctype_r* restrict b01_r = B0_r + (0 )*rs_b2 + (j )*cs_b2; \ ctype_r* restrict b01_i = B0_i + (0 )*rs_b2 + (j )*cs_b2; \ ctype* restrict gamma11 = c + (i )*rs_c + (j )*cs_c; \ ctype_r beta11c_r = *beta11_r; \ ctype_r beta11c_i = *beta11_i; \ ctype_r rho11_r; \ ctype_r rho11_i; \ \ /* beta11 = beta11 - a10t * b01; */ \ PASTEMAC(ch,set0ris)( rho11_r, \ rho11_i ); \ for ( l = 0; l < n_behind; ++l ) \ { \ ctype* restrict alpha10_ri = a10t_ri + (l )*cs_a; \ ctype_r* restrict alpha10_r = &PASTEMAC(ch,real)( *alpha10_ri ); \ ctype_r* restrict alpha10_i = &PASTEMAC(ch,imag)( *alpha10_ri ); \ ctype_r* restrict beta01_r = b01_r + (l )*rs_b2; \ ctype_r* restrict beta01_i = b01_i + (l )*rs_b2; \ \ PASTEMAC(ch,axpyris)( *alpha10_r, \ *alpha10_i, \ *beta01_r, \ *beta01_i, \ rho11_r, \ rho11_i ); \ } \ PASTEMAC(ch,subris)( rho11_r, \ rho11_i, \ beta11c_r, \ beta11c_i ); \ \ /* beta11 = beta11 / alpha11; */ \ /* NOTE: The INVERSE of alpha11 (1.0/alpha11) is stored instead of alpha11, so we can multiply rather than divide. We store the inverse of alpha11 intentionally to avoid expensive division instructions within the micro-kernel. */ \ PASTEMAC(ch,scalris)( *alpha11_r, \ *alpha11_i, \ beta11c_r, \ beta11c_i ); \ \ /* Output final result to matrix c. */ \ PASTEMAC(ch,sets)( beta11c_r, \ beta11c_i, *gamma11 ); \ \ /* Store the local values back to b11. */ \ PASTEMAC(ch,copyris)( beta11c_r, \ beta11c_i, \ *beta11_r, \ *beta11_i ); \ } \ } \ } \ } INSERT_GENTFUNCCO_BASIC2( trsm1m_l, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, opname, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ ctype* restrict a, \ ctype* restrict b, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict data, \ cntx_t* restrict cntx \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ const dim_t mr = bli_cntx_get_blksz_def_dt( dt, BLIS_MR, cntx ); \ const dim_t nr = bli_cntx_get_blksz_def_dt( dt, BLIS_NR, cntx ); \ \ const inc_t packmr = bli_cntx_get_blksz_max_dt( dt, BLIS_MR, cntx ); \ const inc_t packnr = bli_cntx_get_blksz_max_dt( dt, BLIS_NR, cntx ); \ \ const dim_t m = mr; \ const dim_t n = nr; \ \ const inc_t rs_a = 1; \ const inc_t cs_a = packmr; \ \ const inc_t rs_b = packnr; \ const inc_t cs_b = 1; \ \ const inc_t ld_a = cs_a; \ const inc_t ld_b = rs_b; \ \ const pack_t schema_b = bli_cntx_schema_b_panel( cntx ); \ \ dim_t iter, i, j, l; \ dim_t n_behind; \ \ \ if ( bli_is_1e_packed( schema_b ) ) \ { \ const inc_t rs_a2 = 1 * rs_a; \ const inc_t cs_a2 = 2 * cs_a; \ \ ctype_r* restrict a_r = ( ctype_r* )a; \ ctype_r* restrict a_i = ( ctype_r* )a + ld_a; \ \ ctype* restrict b_ri = ( ctype* )b; \ ctype* restrict b_ir = ( ctype* )b + ld_b/2; \ \ for ( iter = 0; iter < m; ++iter ) \ { \ i = m - iter - 1; \ n_behind = iter; \ \ ctype_r* restrict alpha11_r = a_r + (i )*rs_a2 + (i )*cs_a2; \ ctype_r* restrict alpha11_i = a_i + (i )*rs_a2 + (i )*cs_a2; \ ctype_r* restrict a12t_r = a_r + (i )*rs_a2 + (i+1)*cs_a2; \ ctype_r* restrict a12t_i = a_i + (i )*rs_a2 + (i+1)*cs_a2; \ ctype* restrict b1_ri = b_ri + (i )*rs_b + (0 )*cs_b; \ ctype* restrict b1_ir = b_ir + (i )*rs_b + (0 )*cs_b; \ ctype* restrict B2_ri = b_ri + (i+1)*rs_b + (0 )*cs_b; \ \ /* b1 = b1 - a12t * B2; */ \ /* b1 = b1 / alpha11; */ \ for ( j = 0; j < n; ++j ) \ { \ ctype* restrict beta11_ri = b1_ri + (0 )*rs_b + (j )*cs_b; \ ctype* restrict beta11_ir = b1_ir + (0 )*rs_b + (j )*cs_b; \ ctype* restrict b21_ri = B2_ri + (0 )*rs_b + (j )*cs_b; \ ctype* restrict gamma11 = c + (i )*rs_c + (j )*cs_c; \ ctype_r beta11c_r = PASTEMAC(ch,real)( *beta11_ri ); \ ctype_r beta11c_i = PASTEMAC(ch,imag)( *beta11_ri ); \ ctype_r rho11_r; \ ctype_r rho11_i; \ \ /* beta11 = beta11 - a10t * b01; */ \ PASTEMAC(ch,set0ris)( rho11_r, \ rho11_i ); \ for ( l = 0; l < n_behind; ++l ) \ { \ ctype_r* restrict alpha12_r = a12t_r + (l )*cs_a2; \ ctype_r* restrict alpha12_i = a12t_i + (l )*cs_a2; \ ctype* restrict beta21_ri = b21_ri + (l )*rs_b; \ ctype_r* restrict beta21_r = &PASTEMAC(ch,real)( *beta21_ri ); \ ctype_r* restrict beta21_i = &PASTEMAC(ch,imag)( *beta21_ri ); \ \ PASTEMAC(ch,axpyris)( *alpha12_r, \ *alpha12_i, \ *beta21_r, \ *beta21_i, \ rho11_r, \ rho11_i ); \ } \ PASTEMAC(ch,subris)( rho11_r, \ rho11_i, \ beta11c_r, \ beta11c_i ); \ \ /* beta11 = beta11 / alpha11; */ \ /* NOTE: The INVERSE of alpha11 (1.0/alpha11) is stored instead of alpha11, so we can multiply rather than divide. We store the inverse of alpha11 intentionally to avoid expensive division instructions within the micro-kernel. */ \ PASTEMAC(ch,scalris)( *alpha11_r, \ *alpha11_i, \ beta11c_r, \ beta11c_i ); \ \ /* Output final result to matrix c. */ \ PASTEMAC(ch,sets)( beta11c_r, beta11c_i, *gamma11 ); \ \ /* Store the local values back to b11. */ \ PASTEMAC(ch,sets)( beta11c_r, beta11c_i, *beta11_ri ); \ PASTEMAC(ch,sets)( -beta11c_i, beta11c_r, *beta11_ir ); \ } \ } \ } \ else /* if ( bli_is_1r_packed( schema_b ) ) */ \ { \ const inc_t rs_b2 = 2 * rs_b; \ const inc_t cs_b2 = 1 * cs_b; \ \ ctype* restrict a_ri = ( ctype* )a; \ /*ctype* restrict a_ir = ( ctype* )a + ld_a/2;*/ \ \ ctype_r* restrict b_r = ( ctype_r* )b; \ ctype_r* restrict b_i = ( ctype_r* )b + ld_b; \ \ for ( iter = 0; iter < m; ++iter ) \ { \ i = m - iter - 1; \ n_behind = iter; \ \ ctype* restrict alpha11_ri = a_ri + (i )*rs_a + (i )*cs_a; \ ctype_r* restrict alpha11_r = &PASTEMAC(ch,real)( *alpha11_ri ); \ ctype_r* restrict alpha11_i = &PASTEMAC(ch,imag)( *alpha11_ri ); \ ctype* restrict a12t_ri = a_ri + (i )*rs_a + (i+1)*cs_a; \ ctype_r* restrict b1_r = b_r + (i )*rs_b2 + (0 )*cs_b2; \ ctype_r* restrict b1_i = b_i + (i )*rs_b2 + (0 )*cs_b2; \ ctype_r* restrict B2_r = b_r + (i+1)*rs_b2 + (0 )*cs_b2; \ ctype_r* restrict B2_i = b_i + (i+1)*rs_b2 + (0 )*cs_b2; \ \ /* b1 = b1 - a12t * B2; */ \ /* b1 = b1 / alpha11; */ \ for ( j = 0; j < n; ++j ) \ { \ ctype_r* restrict beta11_r = b1_r + (0 )*rs_b2 + (j )*cs_b2; \ ctype_r* restrict beta11_i = b1_i + (0 )*rs_b2 + (j )*cs_b2; \ ctype_r* restrict b21_r = B2_r + (0 )*rs_b2 + (j )*cs_b2; \ ctype_r* restrict b21_i = B2_i + (0 )*rs_b2 + (j )*cs_b2; \ ctype* restrict gamma11 = c + (i )*rs_c + (j )*cs_c; \ ctype_r beta11c_r = *beta11_r; \ ctype_r beta11c_i = *beta11_i; \ ctype_r rho11_r; \ ctype_r rho11_i; \ \ /* beta11 = beta11 - a10t * b01; */ \ PASTEMAC(ch,set0ris)( rho11_r, \ rho11_i ); \ for ( l = 0; l < n_behind; ++l ) \ { \ ctype* restrict alpha12_ri = a12t_ri + (l )*cs_a; \ ctype_r* restrict alpha12_r = &PASTEMAC(ch,real)( *alpha12_ri ); \ ctype_r* restrict alpha12_i = &PASTEMAC(ch,imag)( *alpha12_ri ); \ ctype_r* restrict beta21_r = b21_r + (l )*rs_b2; \ ctype_r* restrict beta21_i = b21_i + (l )*rs_b2; \ \ PASTEMAC(ch,axpyris)( *alpha12_r, \ *alpha12_i, \ *beta21_r, \ *beta21_i, \ rho11_r, \ rho11_i ); \ } \ PASTEMAC(ch,subris)( rho11_r, \ rho11_i, \ beta11c_r, \ beta11c_i ); \ \ /* beta11 = beta11 / alpha11; */ \ /* NOTE: The INVERSE of alpha11 (1.0/alpha11) is stored instead of alpha11, so we can multiply rather than divide. We store the inverse of alpha11 intentionally to avoid expensive division instructions within the micro-kernel. */ \ PASTEMAC(ch,scalris)( *alpha11_r, \ *alpha11_i, \ beta11c_r, \ beta11c_i ); \ \ /* Output final result to matrix c. */ \ PASTEMAC(ch,sets)( beta11c_r, \ beta11c_i, *gamma11 ); \ \ /* Store the local values back to b11. */ \ PASTEMAC(ch,copyris)( beta11c_r, \ beta11c_i, \ *beta11_r, \ *beta11_i ); \ } \ } \ } \ } INSERT_GENTFUNCCO_BASIC2( trsm1m_u, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) blis-0.6.1/ref_kernels/ind/bli_trsm3m1_ref.c000066400000000000000000000246011360743507500206710ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, opname, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ ctype* restrict a, \ ctype* restrict b, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict data, \ cntx_t* restrict cntx \ ) \ { \ const num_t dt_r = PASTEMAC(chr,type); \ \ const dim_t mr = bli_cntx_get_blksz_def_dt( dt_r, BLIS_MR, cntx ); \ const dim_t nr = bli_cntx_get_blksz_def_dt( dt_r, BLIS_NR, cntx ); \ \ const inc_t packmr = bli_cntx_get_blksz_max_dt( dt_r, BLIS_MR, cntx ); \ const inc_t packnr = bli_cntx_get_blksz_max_dt( dt_r, BLIS_NR, cntx ); \ \ const dim_t m = mr; \ const dim_t n = nr; \ \ const inc_t is_a = bli_auxinfo_is_a( data ); \ const inc_t is_b = bli_auxinfo_is_b( data ); \ \ ctype_r* restrict a_r = ( ctype_r* )a; \ ctype_r* restrict a_i = ( ctype_r* )a + is_a; \ \ ctype_r* restrict b_r = ( ctype_r* )b; \ ctype_r* restrict b_i = ( ctype_r* )b + is_b; \ ctype_r* restrict b_ri = ( ctype_r* )b + 2*is_b; \ \ const inc_t rs_a = 1; \ const inc_t cs_a = packmr; \ \ const inc_t rs_b = packnr; \ const inc_t cs_b = 1; \ \ dim_t iter, i, j, l; \ dim_t n_behind; \ \ \ for ( iter = 0; iter < m; ++iter ) \ { \ i = iter; \ n_behind = i; \ \ ctype_r* restrict alpha11_r = a_r + (i )*rs_a + (i )*cs_a; \ ctype_r* restrict alpha11_i = a_i + (i )*rs_a + (i )*cs_a; \ ctype_r* restrict a10t_r = a_r + (i )*rs_a + (0 )*cs_a; \ ctype_r* restrict a10t_i = a_i + (i )*rs_a + (0 )*cs_a; \ ctype_r* restrict b1_r = b_r + (i )*rs_b + (0 )*cs_b; \ ctype_r* restrict b1_i = b_i + (i )*rs_b + (0 )*cs_b; \ ctype_r* restrict b1_ri = b_ri + (i )*rs_b + (0 )*cs_b; \ ctype_r* restrict B0_r = b_r + (0 )*rs_b + (0 )*cs_b; \ ctype_r* restrict B0_i = b_i + (0 )*rs_b + (0 )*cs_b; \ \ /* b1 = b1 - a10t * B0; */ \ /* b1 = b1 / alpha11; */ \ for ( j = 0; j < n; ++j ) \ { \ ctype_r* restrict beta11_r = b1_r + (0 )*rs_b + (j )*cs_b; \ ctype_r* restrict beta11_i = b1_i + (0 )*rs_b + (j )*cs_b; \ ctype_r* restrict beta11_ri = b1_ri + (0 )*rs_b + (j )*cs_b; \ ctype_r* restrict b01_r = B0_r + (0 )*rs_b + (j )*cs_b; \ ctype_r* restrict b01_i = B0_i + (0 )*rs_b + (j )*cs_b; \ ctype* restrict gamma11 = c + (i )*rs_c + (j )*cs_c; \ ctype_r beta11c_r = *beta11_r; \ ctype_r beta11c_i = *beta11_i; \ ctype_r rho11_r; \ ctype_r rho11_i; \ \ /* beta11 = beta11 - a10t * b01; */ \ PASTEMAC(chr,set0s)( rho11_r ); \ PASTEMAC(chr,set0s)( rho11_i ); \ for ( l = 0; l < n_behind; ++l ) \ { \ ctype_r* restrict alpha10_r = a10t_r + (l )*cs_a; \ ctype_r* restrict alpha10_i = a10t_i + (l )*cs_a; \ ctype_r* restrict beta01_r = b01_r + (l )*rs_b; \ ctype_r* restrict beta01_i = b01_i + (l )*rs_b; \ \ PASTEMAC(ch,axpyris)( *alpha10_r, \ *alpha10_i, \ *beta01_r, \ *beta01_i, \ rho11_r, \ rho11_i ); \ } \ PASTEMAC(ch,subris)( rho11_r, \ rho11_i, \ beta11c_r, \ beta11c_i ); \ \ /* beta11 = beta11 / alpha11; */ \ /* NOTE: The INVERSE of alpha11 (1.0/alpha11) is stored instead of alpha11, so we can multiply rather than divide. We store the inverse of alpha11 intentionally to avoid expensive division instructions within the micro-kernel. */ \ PASTEMAC(ch,scalris)( *alpha11_r, \ *alpha11_i, \ beta11c_r, \ beta11c_i ); \ \ /* Output final result to matrix c. */ \ PASTEMAC(ch,sets)( beta11c_r, \ beta11c_i, *gamma11 ); \ \ /* Store the local values back to b11. */ \ PASTEMAC(chr,copys)( beta11c_r, *beta11_r ); \ PASTEMAC(chr,copys)( beta11c_i, *beta11_i ); \ \ /* Update the ri part of the packed panel. */ \ PASTEMAC(chr,add3s)( beta11c_r, \ beta11c_i, \ *beta11_ri ); \ } \ } \ } INSERT_GENTFUNCCO_BASIC2( trsm3m1_l, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, opname, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ ctype* restrict a, \ ctype* restrict b, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict data, \ cntx_t* restrict cntx \ ) \ { \ const num_t dt_r = PASTEMAC(chr,type); \ \ const dim_t mr = bli_cntx_get_blksz_def_dt( dt_r, BLIS_MR, cntx ); \ const dim_t nr = bli_cntx_get_blksz_def_dt( dt_r, BLIS_NR, cntx ); \ \ const inc_t packmr = bli_cntx_get_blksz_max_dt( dt_r, BLIS_MR, cntx ); \ const inc_t packnr = bli_cntx_get_blksz_max_dt( dt_r, BLIS_NR, cntx ); \ \ const dim_t m = mr; \ const dim_t n = nr; \ \ const inc_t is_a = bli_auxinfo_is_a( data ); \ const inc_t is_b = bli_auxinfo_is_b( data ); \ \ ctype_r* restrict a_r = ( ctype_r* )a; \ ctype_r* restrict a_i = ( ctype_r* )a + is_a; \ \ ctype_r* restrict b_r = ( ctype_r* )b; \ ctype_r* restrict b_i = ( ctype_r* )b + is_b; \ ctype_r* restrict b_ri = ( ctype_r* )b + 2*is_b; \ \ const inc_t rs_a = 1; \ const inc_t cs_a = packmr; \ \ const inc_t rs_b = packnr; \ const inc_t cs_b = 1; \ \ dim_t iter, i, j, l; \ dim_t n_behind; \ \ \ for ( iter = 0; iter < m; ++iter ) \ { \ i = m - iter - 1; \ n_behind = iter; \ \ ctype_r* restrict alpha11_r = a_r + (i )*rs_a + (i )*cs_a; \ ctype_r* restrict alpha11_i = a_i + (i )*rs_a + (i )*cs_a; \ ctype_r* restrict a12t_r = a_r + (i )*rs_a + (i+1)*cs_a; \ ctype_r* restrict a12t_i = a_i + (i )*rs_a + (i+1)*cs_a; \ ctype_r* restrict b1_r = b_r + (i )*rs_b + (0 )*cs_b; \ ctype_r* restrict b1_i = b_i + (i )*rs_b + (0 )*cs_b; \ ctype_r* restrict b1_ri = b_ri + (i )*rs_b + (0 )*cs_b; \ ctype_r* restrict B2_r = b_r + (i+1)*rs_b + (0 )*cs_b; \ ctype_r* restrict B2_i = b_i + (i+1)*rs_b + (0 )*cs_b; \ \ /* b1 = b1 - a12t * B2; */ \ /* b1 = b1 / alpha11; */ \ for ( j = 0; j < n; ++j ) \ { \ ctype_r* restrict beta11_r = b1_r + (0 )*rs_b + (j )*cs_b; \ ctype_r* restrict beta11_i = b1_i + (0 )*rs_b + (j )*cs_b; \ ctype_r* restrict beta11_ri = b1_ri + (0 )*rs_b + (j )*cs_b; \ ctype_r* restrict b21_r = B2_r + (0 )*rs_b + (j )*cs_b; \ ctype_r* restrict b21_i = B2_i + (0 )*rs_b + (j )*cs_b; \ ctype* restrict gamma11 = c + (i )*rs_c + (j )*cs_c; \ ctype_r beta11c_r = *beta11_r; \ ctype_r beta11c_i = *beta11_i; \ ctype_r rho11_r; \ ctype_r rho11_i; \ \ /* beta11 = beta11 - a12t * b21; */ \ PASTEMAC(chr,set0s)( rho11_r ); \ PASTEMAC(chr,set0s)( rho11_i ); \ for ( l = 0; l < n_behind; ++l ) \ { \ ctype_r* restrict alpha12_r = a12t_r + (l )*cs_a; \ ctype_r* restrict alpha12_i = a12t_i + (l )*cs_a; \ ctype_r* restrict beta21_r = b21_r + (l )*rs_b; \ ctype_r* restrict beta21_i = b21_i + (l )*rs_b; \ \ PASTEMAC(ch,axpyris)( *alpha12_r, \ *alpha12_i, \ *beta21_r, \ *beta21_i, \ rho11_r, \ rho11_i ); \ } \ PASTEMAC(ch,subris)( rho11_r, \ rho11_i, \ beta11c_r, \ beta11c_i ); \ \ /* beta11 = beta11 / alpha11; */ \ /* NOTE: The INVERSE of alpha11 (1.0/alpha11) is stored instead of alpha11, so we can multiply rather than divide. We store the inverse of alpha11 intentionally to avoid expensive division instructions within the micro-kernel. */ \ PASTEMAC(ch,scalris)( *alpha11_r, \ *alpha11_i, \ beta11c_r, \ beta11c_i ); \ \ /* Output final result to matrix c. */ \ PASTEMAC(ch,sets)( beta11c_r, \ beta11c_i, *gamma11 ); \ \ /* Store the local values back to b11. */ \ PASTEMAC(chr,copys)( beta11c_r, *beta11_r ); \ PASTEMAC(chr,copys)( beta11c_i, *beta11_i ); \ \ /* Update the ri part of the packed panel. */ \ PASTEMAC(chr,add3s)( beta11c_r, \ beta11c_i, \ *beta11_ri ); \ } \ } \ } INSERT_GENTFUNCCO_BASIC2( trsm3m1_u, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) blis-0.6.1/ref_kernels/ind/bli_trsm4m1_ref.c000066400000000000000000000247221360743507500206760ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, opname, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ ctype* restrict a, \ ctype* restrict b, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict data, \ cntx_t* restrict cntx \ ) \ { \ const num_t dt_r = PASTEMAC(chr,type); \ \ const dim_t mr = bli_cntx_get_blksz_def_dt( dt_r, BLIS_MR, cntx ); \ const dim_t nr = bli_cntx_get_blksz_def_dt( dt_r, BLIS_NR, cntx ); \ \ const inc_t packmr = bli_cntx_get_blksz_max_dt( dt_r, BLIS_MR, cntx ); \ const inc_t packnr = bli_cntx_get_blksz_max_dt( dt_r, BLIS_NR, cntx ); \ \ const dim_t m = mr; \ const dim_t n = nr; \ \ const inc_t is_a = bli_auxinfo_is_a( data ); \ const inc_t is_b = bli_auxinfo_is_b( data ); \ \ ctype_r* restrict a_r = ( ctype_r* )a; \ ctype_r* restrict a_i = ( ctype_r* )a + is_a; \ \ ctype_r* restrict b_r = ( ctype_r* )b; \ ctype_r* restrict b_i = ( ctype_r* )b + is_b; \ \ const inc_t rs_a = 1; \ const inc_t cs_a = packmr; \ \ const inc_t rs_b = packnr; \ const inc_t cs_b = 1; \ \ dim_t iter, i, j, l; \ dim_t n_behind; \ \ /* PASTEMAC(chr,fprintm)( stdout, "trsm4m1_l_ukr: a11p_r", m, m, \ a_r, 1, PASTEMAC(chr,packmr), "%4.1f", "" ); \ PASTEMAC(chr,fprintm)( stdout, "trsm4m1_l_ukr: a11p_i", m, m, \ a_i, 1, PASTEMAC(chr,packmr), "%4.1f", "" ); \ PASTEMAC(chr,fprintm)( stdout, "trsm4m1_l_ukr: b11p_r", m, n, \ b_r, PASTEMAC(chr,packnr), 1, "%4.1f", "" ); \ PASTEMAC(chr,fprintm)( stdout, "trsm4m1_l_ukr: b11p_i", m, n, \ b_i, PASTEMAC(chr,packnr), 1, "%4.1f", "" ); \ */ \ \ for ( iter = 0; iter < m; ++iter ) \ { \ i = iter; \ n_behind = i; \ \ ctype_r* restrict alpha11_r = a_r + (i )*rs_a + (i )*cs_a; \ ctype_r* restrict alpha11_i = a_i + (i )*rs_a + (i )*cs_a; \ ctype_r* restrict a10t_r = a_r + (i )*rs_a + (0 )*cs_a; \ ctype_r* restrict a10t_i = a_i + (i )*rs_a + (0 )*cs_a; \ ctype_r* restrict b1_r = b_r + (i )*rs_b + (0 )*cs_b; \ ctype_r* restrict b1_i = b_i + (i )*rs_b + (0 )*cs_b; \ ctype_r* restrict B0_r = b_r + (0 )*rs_b + (0 )*cs_b; \ ctype_r* restrict B0_i = b_i + (0 )*rs_b + (0 )*cs_b; \ \ /* b1 = b1 - a10t * B0; */ \ /* b1 = b1 / alpha11; */ \ for ( j = 0; j < n; ++j ) \ { \ ctype_r* restrict beta11_r = b1_r + (0 )*rs_b + (j )*cs_b; \ ctype_r* restrict beta11_i = b1_i + (0 )*rs_b + (j )*cs_b; \ ctype_r* restrict b01_r = B0_r + (0 )*rs_b + (j )*cs_b; \ ctype_r* restrict b01_i = B0_i + (0 )*rs_b + (j )*cs_b; \ ctype* restrict gamma11 = c + (i )*rs_c + (j )*cs_c; \ ctype_r beta11c_r = *beta11_r; \ ctype_r beta11c_i = *beta11_i; \ ctype_r rho11_r; \ ctype_r rho11_i; \ \ /* beta11 = beta11 - a10t * b01; */ \ PASTEMAC(chr,set0s)( rho11_r ); \ PASTEMAC(chr,set0s)( rho11_i ); \ for ( l = 0; l < n_behind; ++l ) \ { \ ctype_r* restrict alpha10_r = a10t_r + (l )*cs_a; \ ctype_r* restrict alpha10_i = a10t_i + (l )*cs_a; \ ctype_r* restrict beta01_r = b01_r + (l )*rs_b; \ ctype_r* restrict beta01_i = b01_i + (l )*rs_b; \ \ PASTEMAC(ch,axpyris)( *alpha10_r, \ *alpha10_i, \ *beta01_r, \ *beta01_i, \ rho11_r, \ rho11_i ); \ } \ PASTEMAC(ch,subris)( rho11_r, \ rho11_i, \ beta11c_r, \ beta11c_i ); \ \ /* beta11 = beta11 / alpha11; */ \ /* NOTE: The INVERSE of alpha11 (1.0/alpha11) is stored instead of alpha11, so we can multiply rather than divide. We store the inverse of alpha11 intentionally to avoid expensive division instructions within the micro-kernel. */ \ PASTEMAC(ch,scalris)( *alpha11_r, \ *alpha11_i, \ beta11c_r, \ beta11c_i ); \ \ /* Output final result to matrix c. */ \ PASTEMAC(ch,sets)( beta11c_r, \ beta11c_i, *gamma11 ); \ \ /* Store the local values back to b11. */ \ PASTEMAC(chr,copys)( beta11c_r, *beta11_r ); \ PASTEMAC(chr,copys)( beta11c_i, *beta11_i ); \ } \ } \ \ /* PASTEMAC(chr,fprintm)( stdout, "trsm4m1_l_ukr: b11p_r after", m, n, \ b_r, PASTEMAC(chr,packnr), 1, "%4.1f", "" ); \ PASTEMAC(chr,fprintm)( stdout, "trsm4m1_l_ukr: b11p_i after", m, n, \ b_i, PASTEMAC(chr,packnr), 1, "%4.1f", "" ); \ */ \ } INSERT_GENTFUNCCO_BASIC2( trsm4m1_l, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) #undef GENTFUNCCO #define GENTFUNCCO( ctype, ctype_r, ch, chr, opname, arch, suf ) \ \ void PASTEMAC3(ch,opname,arch,suf) \ ( \ ctype* restrict a, \ ctype* restrict b, \ ctype* restrict c, inc_t rs_c, inc_t cs_c, \ auxinfo_t* restrict data, \ cntx_t* restrict cntx \ ) \ { \ const num_t dt_r = PASTEMAC(chr,type); \ \ const dim_t mr = bli_cntx_get_blksz_def_dt( dt_r, BLIS_MR, cntx ); \ const dim_t nr = bli_cntx_get_blksz_def_dt( dt_r, BLIS_NR, cntx ); \ \ const inc_t packmr = bli_cntx_get_blksz_max_dt( dt_r, BLIS_MR, cntx ); \ const inc_t packnr = bli_cntx_get_blksz_max_dt( dt_r, BLIS_NR, cntx ); \ \ const dim_t m = mr; \ const dim_t n = nr; \ \ const inc_t is_a = bli_auxinfo_is_a( data ); \ const inc_t is_b = bli_auxinfo_is_b( data ); \ \ ctype_r* restrict a_r = ( ctype_r* )a; \ ctype_r* restrict a_i = ( ctype_r* )a + is_a; \ \ ctype_r* restrict b_r = ( ctype_r* )b; \ ctype_r* restrict b_i = ( ctype_r* )b + is_b; \ \ const inc_t rs_a = 1; \ const inc_t cs_a = packmr; \ \ const inc_t rs_b = packnr; \ const inc_t cs_b = 1; \ \ dim_t iter, i, j, l; \ dim_t n_behind; \ \ \ for ( iter = 0; iter < m; ++iter ) \ { \ i = m - iter - 1; \ n_behind = iter; \ \ ctype_r* restrict alpha11_r = a_r + (i )*rs_a + (i )*cs_a; \ ctype_r* restrict alpha11_i = a_i + (i )*rs_a + (i )*cs_a; \ ctype_r* restrict a12t_r = a_r + (i )*rs_a + (i+1)*cs_a; \ ctype_r* restrict a12t_i = a_i + (i )*rs_a + (i+1)*cs_a; \ ctype_r* restrict b1_r = b_r + (i )*rs_b + (0 )*cs_b; \ ctype_r* restrict b1_i = b_i + (i )*rs_b + (0 )*cs_b; \ ctype_r* restrict B2_r = b_r + (i+1)*rs_b + (0 )*cs_b; \ ctype_r* restrict B2_i = b_i + (i+1)*rs_b + (0 )*cs_b; \ \ /* b1 = b1 - a12t * B2; */ \ /* b1 = b1 / alpha11; */ \ for ( j = 0; j < n; ++j ) \ { \ ctype_r* restrict beta11_r = b1_r + (0 )*rs_b + (j )*cs_b; \ ctype_r* restrict beta11_i = b1_i + (0 )*rs_b + (j )*cs_b; \ ctype_r* restrict b21_r = B2_r + (0 )*rs_b + (j )*cs_b; \ ctype_r* restrict b21_i = B2_i + (0 )*rs_b + (j )*cs_b; \ ctype* restrict gamma11 = c + (i )*rs_c + (j )*cs_c; \ ctype_r beta11c_r = *beta11_r; \ ctype_r beta11c_i = *beta11_i; \ ctype_r rho11_r; \ ctype_r rho11_i; \ \ /* beta11 = beta11 - a12t * b21; */ \ PASTEMAC(chr,set0s)( rho11_r ); \ PASTEMAC(chr,set0s)( rho11_i ); \ for ( l = 0; l < n_behind; ++l ) \ { \ ctype_r* restrict alpha12_r = a12t_r + (l )*cs_a; \ ctype_r* restrict alpha12_i = a12t_i + (l )*cs_a; \ ctype_r* restrict beta21_r = b21_r + (l )*rs_b; \ ctype_r* restrict beta21_i = b21_i + (l )*rs_b; \ \ PASTEMAC(ch,axpyris)( *alpha12_r, \ *alpha12_i, \ *beta21_r, \ *beta21_i, \ rho11_r, \ rho11_i ); \ } \ PASTEMAC(ch,subris)( rho11_r, \ rho11_i, \ beta11c_r, \ beta11c_i ); \ \ /* beta11 = beta11 / alpha11; */ \ /* NOTE: The INVERSE of alpha11 (1.0/alpha11) is stored instead of alpha11, so we can multiply rather than divide. We store the inverse of alpha11 intentionally to avoid expensive division instructions within the micro-kernel. */ \ PASTEMAC(ch,scalris)( *alpha11_r, \ *alpha11_i, \ beta11c_r, \ beta11c_i ); \ \ /* Output final result to matrix c. */ \ PASTEMAC(ch,sets)( beta11c_r, \ beta11c_i, *gamma11 ); \ \ /* Store the local values back to b11. */ \ PASTEMAC(chr,copys)( beta11c_r, *beta11_r ); \ PASTEMAC(chr,copys)( beta11c_i, *beta11_i ); \ } \ } \ } INSERT_GENTFUNCCO_BASIC2( trsm4m1_u, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) blis-0.6.1/sandbox/000077500000000000000000000000001360743507500141175ustar00rootroot00000000000000blis-0.6.1/sandbox/ref99/000077500000000000000000000000001360743507500150555ustar00rootroot00000000000000blis-0.6.1/sandbox/ref99/base/000077500000000000000000000000001360743507500157675ustar00rootroot00000000000000blis-0.6.1/sandbox/ref99/base/blx_blksz.c000066400000000000000000000057751360743507500201430ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "blix.h" dim_t blx_determine_blocksize_f ( dim_t i, dim_t dim, obj_t* obj, bszid_t bszid, cntx_t* cntx ) { num_t dt; blksz_t* bsize; dim_t b_alg, b_max; dim_t b_use; // Extract the execution datatype and use it to query the corresponding // blocksize and blocksize maximum values from the blksz_t object. dt = bli_obj_exec_dt( obj ); bsize = bli_cntx_get_blksz( bszid, cntx ); b_alg = bli_blksz_get_def( dt, bsize ); b_max = bli_blksz_get_max( dt, bsize ); b_use = blx_determine_blocksize_f_sub( i, dim, b_alg, b_max ); return b_use; } dim_t blx_determine_blocksize_f_sub ( dim_t i, dim_t dim, dim_t b_alg, dim_t b_max ) { dim_t b_now; dim_t dim_left_now; // We assume that this function is being called from an algorithm that // is moving "forward" (ie: top to bottom, left to right, top-left // to bottom-right). // Compute how much of the matrix dimension is left, including the // chunk that will correspond to the blocksize we are computing now. dim_left_now = dim - i; // If the dimension currently remaining is less than the maximum // blocksize, use it instead of the default blocksize b_alg. // Otherwise, use b_alg. if ( dim_left_now <= b_max ) { b_now = dim_left_now; } else { b_now = b_alg; } return b_now; } blis-0.6.1/sandbox/ref99/base/blx_blksz.h000066400000000000000000000045341360743507500201400ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ dim_t blx_determine_blocksize ( dir_t direct, dim_t i, dim_t dim, obj_t* obj, bszid_t bszid, cntx_t* cntx ); dim_t blx_determine_blocksize_f ( dim_t i, dim_t dim, obj_t* obj, bszid_t bszid, cntx_t* cntx ); dim_t blx_determine_blocksize_b ( dim_t i, dim_t dim, obj_t* obj, bszid_t bszid, cntx_t* cntx ); dim_t blx_determine_blocksize_f_sub ( dim_t i, dim_t dim, dim_t b_alg, dim_t b_max ); dim_t blx_determine_blocksize_b_sub ( dim_t i, dim_t dim, dim_t b_alg, dim_t b_max ); blis-0.6.1/sandbox/ref99/blx_gemm.h000066400000000000000000000036561360743507500170320ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // core gemm #include "blx_gemm_front.h" #include "blx_gemm_int.h" #include "blx_gemm_var.h" // base #include "blx_blksz.h" // cntl #include "blx_packm_cntl.h" #include "blx_gemm_cntl.h" #include "blx_l3_cntl_if.h" // thread #include "blx_gemm_thread.h" // packm #include "blx_l3_packm.h" blis-0.6.1/sandbox/ref99/blx_gemm_front.c000066400000000000000000000111361360743507500202250ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2017 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "blix.h" void blx_gemm_front ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl ) { bli_init_once(); obj_t a_local; obj_t b_local; obj_t c_local; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_gemm_check( alpha, a, b, beta, c, cntx ); // If alpha is zero, scale by beta and return. if ( bli_obj_equals( alpha, &BLIS_ZERO ) ) { bli_scalm( beta, c ); return; } // Alias A, B, and C in case we need to apply transformations. bli_obj_alias_to( a, &a_local ); bli_obj_alias_to( b, &b_local ); bli_obj_alias_to( c, &c_local ); // If alpha is non-unit, typecast and apply it to the scalar attached // to B. if ( !bli_obj_equals( alpha, &BLIS_ONE ) ) { bli_obj_scalar_apply_scalar( alpha, &b_local ); } // If beta is non-unit, typecast and apply it to the scalar attached // to C. if ( !bli_obj_equals( beta, &BLIS_ONE ) ) { bli_obj_scalar_apply_scalar( beta, &c_local ); } // An optimization: If C is stored by rows and the micro-kernel prefers // contiguous columns, or if C is stored by columns and the micro-kernel // prefers contiguous rows, transpose the entire operation to allow the // micro-kernel to access elements of C in its preferred manner. if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) ) { bli_obj_swap( &a_local, &b_local ); bli_obj_induce_trans( &a_local ); bli_obj_induce_trans( &b_local ); bli_obj_induce_trans( &c_local ); } { // A sort of hack for communicating the desired pach schemas for A and // B to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and // bli_l3_cntl_create_if()). This allows us to access the schemas from // the control tree, which hopefully reduces some confusion, // particularly in bli_packm_init(). if ( bli_cntx_method( cntx ) == BLIS_NAT ) { bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local ); bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &b_local ); } else // if ( bli_cntx_method( cntx ) != BLIS_NAT ) { pack_t schema_a = bli_cntx_schema_a_block( cntx ); pack_t schema_b = bli_cntx_schema_b_panel( cntx ); bli_obj_set_pack_schema( schema_a, &a_local ); bli_obj_set_pack_schema( schema_b, &b_local ); } } // Parse and interpret the contents of the rntm_t object to properly // set the ways of parallelism for each loop, and then make any // additional modifications necessary for the current operation. bli_rntm_set_ways_for_op ( BLIS_GEMM, BLIS_LEFT, // ignored for gemm bli_obj_length( &c_local ), bli_obj_width( &c_local ), bli_obj_width( &a_local ), rntm ); // Invoke the internal back-end via the thread handler. blx_gemm_thread ( blx_gemm_int, BLIS_GEMM, // operation family id &a_local, &b_local, &c_local, cntx, rntm, cntl ); } blis-0.6.1/sandbox/ref99/blx_gemm_front.h000066400000000000000000000035141360743507500202330ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void blx_gemm_front ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl ); blis-0.6.1/sandbox/ref99/blx_gemm_int.c000066400000000000000000000047221360743507500176720ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "blix.h" void blx_gemm_int ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { obj_t a_local; obj_t b_local; obj_t c_local; gemm_var_oft f; // Alias A, B, and C in case we need to update attached scalars. bli_obj_alias_to( a, &a_local ); bli_obj_alias_to( b, &b_local ); bli_obj_alias_to( c, &c_local ); // Create the next node in the thrinfo_t structure. bli_thrinfo_grow( rntm, cntl, thread ); // Extract the function pointer from the current control tree node. f = bli_cntl_var_func( cntl ); // Invoke the variant. f ( &a_local, &b_local, &c_local, cntx, rntm, cntl, thread ); } blis-0.6.1/sandbox/ref99/blx_gemm_int.h000066400000000000000000000034711360743507500176770ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void blx_gemm_int ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ); blis-0.6.1/sandbox/ref99/cntl/000077500000000000000000000000001360743507500160155ustar00rootroot00000000000000blis-0.6.1/sandbox/ref99/cntl/blx_gemm_cntl.c000066400000000000000000000111261360743507500207740ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "blix.h" cntl_t* blx_gemm_cntl_create ( opid_t family, pack_t schema_a, pack_t schema_b ) { return blx_gemmbp_cntl_create( family, schema_a, schema_b ); } // ----------------------------------------------------------------------------- cntl_t* blx_gemmbp_cntl_create ( opid_t family, pack_t schema_a, pack_t schema_b ) { void_fp macro_kernel_fp; void_fp packa_fp; void_fp packb_fp; macro_kernel_fp = blx_gemm_ker_var2; packa_fp = bli_packm_blk_var1; packb_fp = bli_packm_blk_var1; // Create two nodes for the macro-kernel. cntl_t* gemm_cntl_bu_ke = blx_gemm_cntl_create_node ( family, // the operation family BLIS_MR, // needed for bli_thrinfo_rgrow() NULL, // variant function pointer not used NULL // no sub-node; this is the leaf of the tree. ); cntl_t* gemm_cntl_bp_bu = blx_gemm_cntl_create_node ( family, BLIS_NR, // not used by macro-kernel, but needed for bli_thrinfo_rgrow() macro_kernel_fp, gemm_cntl_bu_ke ); // Create a node for packing matrix A. cntl_t* gemm_cntl_packa = blx_packm_cntl_create_node ( blx_gemm_packa, // pack the left-hand operand packa_fp, BLIS_MR, BLIS_KR, FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? schema_a, // normally BLIS_PACKED_ROW_PANELS BLIS_BUFFER_FOR_A_BLOCK, gemm_cntl_bp_bu ); // Create a node for partitioning the m dimension by MC. cntl_t* gemm_cntl_op_bp = blx_gemm_cntl_create_node ( family, BLIS_MC, blx_gemm_blk_var1, gemm_cntl_packa ); // Create a node for packing matrix B. cntl_t* gemm_cntl_packb = blx_packm_cntl_create_node ( blx_gemm_packb, // pack the right-hand operand packb_fp, BLIS_KR, BLIS_NR, FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? schema_b, // normally BLIS_PACKED_COL_PANELS BLIS_BUFFER_FOR_B_PANEL, gemm_cntl_op_bp ); // Create a node for partitioning the k dimension by KC. cntl_t* gemm_cntl_mm_op = blx_gemm_cntl_create_node ( family, BLIS_KC, blx_gemm_blk_var3, gemm_cntl_packb ); // Create a node for partitioning the n dimension by NC. cntl_t* gemm_cntl_vl_mm = blx_gemm_cntl_create_node ( family, BLIS_NC, blx_gemm_blk_var2, gemm_cntl_mm_op ); return gemm_cntl_vl_mm; } // ----------------------------------------------------------------------------- void blx_gemm_cntl_free ( cntl_t* cntl, thrinfo_t* thread ) { bli_cntl_free( cntl, thread ); } // ----------------------------------------------------------------------------- cntl_t* blx_gemm_cntl_create_node ( opid_t family, bszid_t bszid, void_fp var_func, cntl_t* sub_node ) { return bli_cntl_create_node( family, bszid, var_func, NULL, sub_node ); } blis-0.6.1/sandbox/ref99/cntl/blx_gemm_cntl.h000066400000000000000000000045131360743507500210030ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ cntl_t* blx_gemm_cntl_create ( opid_t family, pack_t schema_a, pack_t schema_b ); // ----------------------------------------------------------------------------- cntl_t* blx_gemmbp_cntl_create ( opid_t family, pack_t schema_a, pack_t schema_b ); // ----------------------------------------------------------------------------- void blx_gemm_cntl_free ( cntl_t* cntl, thrinfo_t* thread ); // ----------------------------------------------------------------------------- cntl_t* blx_gemm_cntl_create_node ( opid_t family, bszid_t bszid, void_fp var_func, cntl_t* sub_node ); blis-0.6.1/sandbox/ref99/cntl/blx_l3_cntl_if.c000066400000000000000000000074611360743507500210520ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "blix.h" void blx_l3_cntl_create_if ( opid_t family, obj_t* a, obj_t* b, obj_t* c, cntl_t* cntl_orig, cntl_t** cntl_use ) { // This is part of a hack to support mixed domain in bli_gemm_front(). // Sometimes we need to specify a non-standard schema for A and B, and // we decided to transmit them via the schema field in the obj_t's // rather than pass them in as function parameters. Once the values // have been read, we immediately reset them back to their expected // values for unpacked objects. Notice that we do this even if the // caller passed in a custom control tree; that's because we still need // to reset the pack schema of a and b, which were modified by the // operation's _front() function. However, in order for this to work, // the level-3 thread entry function (or omp parallel region) must // alias thread-local copies of objects a and b. pack_t schema_a = bli_obj_pack_schema( a ); pack_t schema_b = bli_obj_pack_schema( b ); bli_obj_set_pack_schema( BLIS_NOT_PACKED, a ); bli_obj_set_pack_schema( BLIS_NOT_PACKED, b ); // If the control tree pointer is NULL, we construct a default // tree as a function of the operation family. if ( cntl_orig == NULL ) { *cntl_use = blx_gemm_cntl_create( family, schema_a, schema_b ); } else { // If the user provided a control tree, create a copy and use it // instead (so that threads can use its local tree as a place to // cache things like pack mem_t entries). *cntl_use = bli_cntl_copy( cntl_orig ); // Recursively set the family fields of the newly copied control tree // nodes. bli_cntl_mark_family( family, *cntl_use ); } } void blx_l3_cntl_free_if ( obj_t* a, obj_t* b, obj_t* c, cntl_t* cntl_orig, cntl_t* cntl_use, thrinfo_t* thread ) { // If the control tree pointer is NULL, a default tree would have // been created, so we now must free it. if ( cntl_orig == NULL ) { blx_gemm_cntl_free( cntl_use, thread ); } else { // If the user provided a control tree, free the copy of it that // was created. bli_cntl_free( cntl_use, thread ); } } blis-0.6.1/sandbox/ref99/cntl/blx_l3_cntl_if.h000066400000000000000000000037441360743507500210570ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void blx_l3_cntl_create_if ( opid_t family, obj_t* a, obj_t* b, obj_t* c, cntl_t* cntl_orig, cntl_t** cntl_use ); void blx_l3_cntl_free_if ( obj_t* a, obj_t* b, obj_t* c, cntl_t* cntl_orig, cntl_t* cntl_use, thrinfo_t* thread ); blis-0.6.1/sandbox/ref99/cntl/blx_packm_cntl.c000066400000000000000000000057051360743507500211500ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" cntl_t* blx_packm_cntl_create_node ( void_fp var_func, void_fp packm_var_func, bszid_t bmid_m, bszid_t bmid_n, bool_t does_invert_diag, bool_t rev_iter_if_upper, bool_t rev_iter_if_lower, pack_t pack_schema, packbuf_t pack_buf_type, cntl_t* sub_node ) { cntl_t* cntl; packm_params_t* params; // Allocate a packm_params_t struct. params = bli_malloc_intl( sizeof( packm_params_t ) ); // Initialize the packm_params_t struct. params->size = sizeof( packm_params_t ); params->var_func = packm_var_func; params->bmid_m = bmid_m; params->bmid_n = bmid_n; params->does_invert_diag = does_invert_diag; params->rev_iter_if_upper = rev_iter_if_upper; params->rev_iter_if_lower = rev_iter_if_lower; params->pack_schema = pack_schema; params->pack_buf_type = pack_buf_type; // It's important that we set the bszid field to BLIS_NO_PART to indicate // that no blocksize partitioning is performed. bli_cntl_free() will rely // on this information to know how to step through the thrinfo_t tree in // sync with the cntl_t tree. cntl = bli_cntl_create_node ( BLIS_NOID, BLIS_NO_PART, var_func, params, sub_node ); return cntl; } blis-0.6.1/sandbox/ref99/cntl/blx_packm_cntl.h000066400000000000000000000037551360743507500211600ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ cntl_t* blx_packm_cntl_create_node ( void_fp var_func, void_fp packm_var_func, bszid_t bmid_m, bszid_t bmid_n, bool_t does_invert_diag, bool_t rev_iter_if_upper, bool_t rev_iter_if_lower, pack_t pack_schema, packbuf_t pack_buf_type, cntl_t* sub_node ); blis-0.6.1/sandbox/ref99/include/000077500000000000000000000000001360743507500165005ustar00rootroot00000000000000blis-0.6.1/sandbox/ref99/include/bli_sandbox.h000066400000000000000000000041021360743507500211320ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name of copyright holder(s) nor the names contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIS_SANDBOX_H #define BLIS_SANDBOX_H // NOTE: This header is the only header required to be present in the sandbox // implementation directory. // This header should contain (or #include) any definitions that must be // folded into blis.h. Typically, it will remain empty since any header // definitions specific to the sandbox implementation will not need to be // made available to applications (or the framework) during compilation. #endif blis-0.6.1/sandbox/ref99/include/blix.h000066400000000000000000000035741360743507500176200ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name of copyright holder(s) nor the names contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef BLIX_H #define BLIX_H // This header is #included by every file in the ref99 sandbox, and so here // we #include any headers that would define prototypes or types that are // needed by the ref99 sandbox source code. #include "blx_gemm.h" #endif blis-0.6.1/sandbox/ref99/oapi/000077500000000000000000000000001360743507500160055ustar00rootroot00000000000000blis-0.6.1/sandbox/ref99/oapi/bli_gemmnat.c000066400000000000000000000050561360743507500204350ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2017 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name of copyright holder(s) nor the names contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "blix.h" // Given the current architecture of BLIS sandboxes, bli_gemmnat() is the // entry point to any sandbox implementation. // NOTE: We must keep this function named bli_gemmnat() since this is the BLIS // API function for which we are providing an alternative implementation via // the sandbox. void bli_gemmnat ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm ) { bli_init_once(); // Obtain a valid native context from the gks if necessary. if ( cntx == NULL ) cntx = bli_gks_query_cntx(); // Initialize a local runtime object if necessary. rntm_t rntm_l; if ( rntm == NULL ) { rntm = &rntm_l; bli_thread_init_rntm( rntm ); } // Invoke the operation's front end. blx_gemm_front( alpha, a, b, beta, c, cntx, rntm, NULL ); } blis-0.6.1/sandbox/ref99/packm/000077500000000000000000000000001360743507500161505ustar00rootroot00000000000000blis-0.6.1/sandbox/ref99/packm/blx_l3_packm.c000066400000000000000000000131321360743507500206520ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "blix.h" void blx_l3_packm ( obj_t* x, obj_t* x_pack, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { membrk_t* membrk; packbuf_t pack_buf_type; mem_t* cntl_mem_p; siz_t size_needed; // FGVZ: Not sure why we need this barrier, but we do. bli_thread_obarrier( thread ); // Every thread initializes x_pack and determines the size of memory // block needed (which gets embedded into the otherwise "blank" mem_t // entry in the control tree node). size_needed = bli_packm_init ( x, x_pack, cntx, cntl ); // If zero was returned, no memory needs to be allocated and so we can // return early. if ( size_needed == 0 ) return; // Query the memory broker from the context. membrk = bli_cntx_get_membrk( cntx ); // Query the pack buffer type from the control tree node. pack_buf_type = bli_cntl_packm_params_pack_buf_type( cntl ); // Query the address of the mem_t entry within the control tree node. cntl_mem_p = bli_cntl_pack_mem( cntl ); // Check the mem_t field in the control tree. If it is unallocated, then // we need to acquire a block from the memory broker and broadcast it to // all threads in the chief's thread group. if ( bli_mem_is_unalloc( cntl_mem_p ) ) { mem_t* local_mem_p; mem_t local_mem_s; if ( bli_thread_am_ochief( thread ) ) { // The chief thread acquires a block from the memory broker // and saves the associated mem_t entry to local_mem_s. bli_membrk_acquire_m ( membrk, size_needed, pack_buf_type, &local_mem_s ); } // Broadcast the address of the chief thread's local mem_t entry to // all threads. local_mem_p = bli_thread_obroadcast( thread, &local_mem_s ); // Save the contents of the chief thread's local mem_t entry to the // mem_t field in this thread's control tree node. *cntl_mem_p = *local_mem_p; } else // ( bli_mem_is_alloc( cntl_mem_p ) ) { mem_t* local_mem_p; mem_t local_mem_s; // If the mem_t entry in the control tree does NOT contain a NULL // buffer, then a block has already been acquired from the memory // broker and cached in the control tree. // As a sanity check, we should make sure that the mem_t object isn't // associated with a block that is too small compared to the size of // the packed matrix buffer that is needed, according to the return // value from packm_init(). siz_t cntl_mem_size = bli_mem_size( cntl_mem_p ); if ( cntl_mem_size < size_needed ) { if ( bli_thread_am_ochief( thread ) ) { // The chief thread releases the existing block associated with // the mem_t entry in the control tree, and then re-acquires a // new block, saving the associated mem_t entry to local_mem_s. bli_membrk_release( cntl_mem_p ); bli_membrk_acquire_m ( membrk, size_needed, pack_buf_type, &local_mem_s ); } // Broadcast the address of the chief thread's local mem_t entry to // all threads. local_mem_p = bli_thread_obroadcast( thread, &local_mem_s ); // Save the chief thread's local mem_t entry to the mem_t field in // this thread's control tree node. *cntl_mem_p = *local_mem_p; } else { // If the mem_t entry is already allocated and sufficiently large, // then we use it as-is. No action is needed, because all threads // will already have the cached values in their local control // trees' mem_t entries, currently pointed to by cntl_mem_p. bli_thread_obarrier( thread ); } } // Update the buffer address in x_pack to point to the buffer associated // with the mem_t entry acquired from the memory broker (now cached in // the control tree node). void* buf = bli_mem_buffer( cntl_mem_p ); bli_obj_set_buffer( buf, x_pack ); // Pack the contents of object x to object x_pack. bli_packm_int ( x, x_pack, cntx, cntl, thread ); // Barrier so that packing is done before computation. bli_thread_obarrier( thread ); } blis-0.6.1/sandbox/ref99/packm/blx_l3_packm.h000066400000000000000000000034541360743507500206650ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void blx_l3_packm ( obj_t* x, obj_t* x_pack, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ); blis-0.6.1/sandbox/ref99/thread/000077500000000000000000000000001360743507500163245ustar00rootroot00000000000000blis-0.6.1/sandbox/ref99/thread/blx_gemm_thread.c000066400000000000000000000116251360743507500216160ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "blix.h" // This code is enabled only when multithreading is enabled via OpenMP. #ifdef BLIS_ENABLE_OPENMP void blx_gemm_thread ( gemmint_t func, opid_t family, obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl ) { // Query the total number of threads from the context. dim_t n_threads = bli_rntm_num_threads( rntm ); // Allcoate a global communicator for the root thrinfo_t structures. thrcomm_t* gl_comm = bli_thrcomm_create( n_threads ); _Pragma( "omp parallel num_threads(n_threads)" ) { dim_t id = omp_get_thread_num(); obj_t a_t, b_t, c_t; cntl_t* cntl_use; thrinfo_t* thread; // Alias thread-local copies of A, B, and C. These will be the objects // we pass down the algorithmic function stack. Making thread-local // alaises IS ABSOLUTELY IMPORTANT and MUST BE DONE because each thread // will read the schemas from A and B and then reset the schemas to // their expected unpacked state (in blx_l3_cntl_create_if()). bli_obj_alias_to( a, &a_t ); bli_obj_alias_to( b, &b_t ); bli_obj_alias_to( c, &c_t ); // Create a default control tree for the operation, if needed. blx_l3_cntl_create_if( family, &a_t, &b_t, &c_t, cntl, &cntl_use ); // Create the root node of the current thread's thrinfo_t structure. bli_l3_thrinfo_create_root( id, gl_comm, rntm, cntl_use, &thread ); func ( &a_t, &b_t, &c_t, cntx, rntm, cntl_use, thread ); // Free the control tree, if one was created locally. blx_l3_cntl_free_if( &a_t, &b_t, &c_t, cntl, cntl_use, thread ); // Free the current thread's thrinfo_t structure. bli_l3_thrinfo_free( thread ); } // We shouldn't free the global communicator since it was already freed // by the global communicator's chief thread in bli_l3_thrinfo_free() // (called above). } #endif #ifdef BLIS_ENABLE_PTHREADS #error "Sandbox does not yet implement pthreads." #endif // This code is enabled only when multithreading is disabled. #ifndef BLIS_ENABLE_MULTITHREADING void blx_gemm_thread ( gemmint_t func, opid_t family, obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl ) { // For sequential execution, we use only one thread. dim_t n_threads = 1; dim_t id = 0; // Allcoate a global communicator for the root thrinfo_t structures. thrcomm_t* gl_comm = bli_thrcomm_create( n_threads ); cntl_t* cntl_use; thrinfo_t* thread; // Create a default control tree for the operation, if needed. blx_l3_cntl_create_if( family, a, b, c, cntl, &cntl_use ); // Create the root node of the thread's thrinfo_t structure. bli_l3_thrinfo_create_root( id, gl_comm, rntm, cntl_use, &thread ); func ( a, b, c, cntx, rntm, cntl_use, thread ); // Free the control tree, if one was created locally. blx_l3_cntl_free_if( a, b, c, cntl, cntl_use, thread ); // Free the current thread's thrinfo_t structure. bli_l3_thrinfo_free( thread ); // We shouldn't free the global communicator since it was already freed // by the global communicator's chief thread in bli_l3_thrinfo_free() // (called above). } #endif blis-0.6.1/sandbox/ref99/thread/blx_gemm_thread.h000066400000000000000000000041071360743507500216200ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // gemm internal function type typedef void (*gemmint_t) ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ); void blx_gemm_thread ( gemmint_t func, opid_t family, obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl ); blis-0.6.1/sandbox/ref99/vars/000077500000000000000000000000001360743507500160305ustar00rootroot00000000000000blis-0.6.1/sandbox/ref99/vars/blx_gemm_blk_var1.c000066400000000000000000000053071360743507500215540ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "blix.h" void blx_gemm_blk_var1 ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { obj_t a1, c1; dim_t i; dim_t b_alg; dim_t my_start, my_end; // Determine the current thread's subpartition range. bli_thread_range_mdim ( BLIS_FWD, thread, a, b, c, cntl, cntx, &my_start, &my_end ); // Partition along the m dimension. for ( i = my_start; i < my_end; i += b_alg ) { // Determine the current algorithmic blocksize. b_alg = blx_determine_blocksize_f( i, my_end, c, bli_cntl_bszid( cntl ), cntx ); // Acquire partitions for A1 and C1. bli_acquire_mpart_mdim( BLIS_FWD, BLIS_SUBPART1, i, b_alg, a, &a1 ); bli_acquire_mpart_mdim( BLIS_FWD, BLIS_SUBPART1, i, b_alg, c, &c1 ); // Perform gemm subproblem. blx_gemm_int ( &a1, b, &c1, cntx, rntm, bli_cntl_sub_node( cntl ), bli_thrinfo_sub_node( thread ) ); } } blis-0.6.1/sandbox/ref99/vars/blx_gemm_blk_var2.c000066400000000000000000000053071360743507500215550ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "blix.h" void blx_gemm_blk_var2 ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { obj_t b1, c1; dim_t i; dim_t b_alg; dim_t my_start, my_end; // Determine the current thread's subpartition range. bli_thread_range_ndim ( BLIS_FWD, thread, a, b, c, cntl, cntx, &my_start, &my_end ); // Partition along the n dimension. for ( i = my_start; i < my_end; i += b_alg ) { // Determine the current algorithmic blocksize. b_alg = blx_determine_blocksize_f( i, my_end, c, bli_cntl_bszid( cntl ), cntx ); // Acquire partitions for B1 and C1. bli_acquire_mpart_ndim( BLIS_FWD, BLIS_SUBPART1, i, b_alg, b, &b1 ); bli_acquire_mpart_ndim( BLIS_FWD, BLIS_SUBPART1, i, b_alg, c, &c1 ); // Perform gemm subproblem. blx_gemm_int ( a, &b1, &c1, cntx, rntm, bli_cntl_sub_node( cntl ), bli_thrinfo_sub_node( thread ) ); } } blis-0.6.1/sandbox/ref99/vars/blx_gemm_blk_var3.c000066400000000000000000000060341360743507500215540ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "blix.h" void blx_gemm_blk_var3 ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { obj_t a1, b1; dim_t i; dim_t b_alg; dim_t k_trans; // Query dimension in partitioning direction. k_trans = bli_obj_width_after_trans( a ); // Partition along the k dimension. for ( i = 0; i < k_trans; i += b_alg ) { // Determine the current algorithmic blocksize. b_alg = blx_determine_blocksize_f( i, k_trans, c, bli_cntl_bszid( cntl ), cntx ); // Acquire partitions for A1 and B1. bli_acquire_mpart_ndim( BLIS_FWD, BLIS_SUBPART1, i, b_alg, a, &a1 ); bli_acquire_mpart_mdim( BLIS_FWD, BLIS_SUBPART1, i, b_alg, b, &b1 ); // Perform gemm subproblem. blx_gemm_int ( &a1, &b1, c, cntx, rntm, bli_cntl_sub_node( cntl ), bli_thrinfo_sub_node( thread ) ); bli_thread_obarrier( bli_thrinfo_sub_node( thread ) ); // This variant executes multiple rank-k updates. Therefore, if the // internal beta scalar on matrix C is non-zero, we must use it // only for the first iteration (and then BLIS_ONE for all others). // And since c is a locally aliased obj_t, we can simply overwrite // the internal beta scalar with BLIS_ONE once it has been used in // the first iteration. if ( i == 0 ) bli_obj_scalar_reset( c ); } } blis-0.6.1/sandbox/ref99/vars/blx_gemm_ker_var2.c000066400000000000000000000265151360743507500215720ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "blix.h" // Function pointer type for datatype-specific functions. typedef void (*gemm_fp) ( pack_t schema_a, pack_t schema_b, dim_t m, dim_t n, dim_t k, void* alpha, void* a, inc_t cs_a, inc_t is_a, dim_t pd_a, inc_t ps_a, void* b, inc_t rs_b, inc_t is_b, dim_t pd_b, inc_t ps_b, void* beta, void* c, inc_t rs_c, inc_t cs_c, cntx_t* cntx, rntm_t* rntm, thrinfo_t* thread ); // Function pointer array for datatype-specific functions. static gemm_fp ftypes[BLIS_NUM_FP_TYPES] = { PASTECH2(blx_,s,gemm_ker_var2), PASTECH2(blx_,c,gemm_ker_var2), PASTECH2(blx_,d,gemm_ker_var2), PASTECH2(blx_,z,gemm_ker_var2) }; void blx_gemm_ker_var2 ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { num_t dt_exec = bli_obj_exec_dt( c ); pack_t schema_a = bli_obj_pack_schema( a ); pack_t schema_b = bli_obj_pack_schema( b ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width( a ); void* buf_a = bli_obj_buffer_at_off( a ); inc_t cs_a = bli_obj_col_stride( a ); inc_t is_a = bli_obj_imag_stride( a ); dim_t pd_a = bli_obj_panel_dim( a ); inc_t ps_a = bli_obj_panel_stride( a ); void* buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b = bli_obj_row_stride( b ); inc_t is_b = bli_obj_imag_stride( b ); dim_t pd_b = bli_obj_panel_dim( b ); inc_t ps_b = bli_obj_panel_stride( b ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); obj_t scalar_a; obj_t scalar_b; void* buf_alpha; void* buf_beta; gemm_fp f; // Detach and multiply the scalars attached to A and B. bli_obj_scalar_detach( a, &scalar_a ); bli_obj_scalar_detach( b, &scalar_b ); bli_mulsc( &scalar_a, &scalar_b ); // Grab the addresses of the internal scalar buffers for the scalar // merged above and the scalar attached to C. buf_alpha = bli_obj_internal_scalar_buffer( &scalar_b ); buf_beta = bli_obj_internal_scalar_buffer( c ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Invoke the function. f( schema_a, schema_b, m, n, k, buf_alpha, buf_a, cs_a, is_a, pd_a, ps_a, buf_b, rs_b, is_b, pd_b, ps_b, buf_beta, buf_c, rs_c, cs_c, cntx, rntm, thread ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTECH2(blx_,ch,varname) \ ( \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha, \ void* a, inc_t cs_a, inc_t is_a, \ dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, inc_t is_b, \ dim_t pd_b, inc_t ps_b, \ void* beta, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* thread \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ /* Alias some constants to simpler names. */ \ const dim_t MR = pd_a; \ const dim_t NR = pd_b; \ /*const dim_t PACKMR = cs_a;*/ \ /*const dim_t PACKNR = rs_b;*/ \ \ /* Query the context for the micro-kernel address and cast it to its function pointer type. */ \ PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the original C matrix. For example, if C is column-stored, ct will be column-stored as well. */ \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict a_cast = a; \ ctype* restrict b_cast = b; \ ctype* restrict c_cast = c; \ ctype* restrict alpha_cast = alpha; \ ctype* restrict beta_cast = beta; \ ctype* restrict b1; \ ctype* restrict c1; \ \ dim_t m_iter, m_left; \ dim_t n_iter, n_left; \ dim_t i, j; \ dim_t m_cur; \ dim_t n_cur; \ inc_t rstep_a; \ inc_t cstep_b; \ inc_t rstep_c, cstep_c; \ auxinfo_t aux; \ \ /* Assumptions/assertions: rs_a == 1 cs_a == PACKMR pd_a == MR ps_a == stride to next micro-panel of A rs_b == PACKNR cs_b == 1 pd_b == NR ps_b == stride to next micro-panel of B rs_c == (no assumptions) cs_c == (no assumptions) */ \ \ /* If any dimension is zero, return immediately. */ \ if ( bli_zero_dim3( m, n, k ) ) return; \ \ /* Clear the temporary C buffer in case it has any infs or NaNs. */ \ PASTEMAC(ch,set0s_mxn)( MR, NR, \ ct, rs_ct, cs_ct ); \ \ /* Compute number of primary and leftover components of the m and n dimensions. */ \ n_iter = n / NR; \ n_left = n % NR; \ \ m_iter = m / MR; \ m_left = m % MR; \ \ if ( n_left ) ++n_iter; \ if ( m_left ) ++m_iter; \ \ /* Determine some increments used to step through A, B, and C. */ \ rstep_a = ps_a; \ \ cstep_b = ps_b; \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ \ /* Save the pack schemas of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_schema_a( schema_a, &aux ); \ bli_auxinfo_set_schema_b( schema_b, &aux ); \ \ /* Save the imaginary stride of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( is_a, &aux ); \ bli_auxinfo_set_is_b( is_b, &aux ); \ \ /* Save the desired output datatype (indicating no typecasting). */ \ /*bli_auxinfo_set_dt_on_output( dt, &aux );*/ \ \ /* The 'thread' argument points to the thrinfo_t node for the 2nd (jr) loop around the microkernel. Here we query the thrinfo_t node for the 1st (ir) loop around the microkernel. */ \ thrinfo_t* caucus = bli_thrinfo_sub_node( thread ); \ \ /* Query the number of threads and thread ids for each loop. */ \ dim_t jr_nt = bli_thread_n_way( thread ); \ dim_t jr_tid = bli_thread_work_id( thread ); \ dim_t ir_nt = bli_thread_n_way( caucus ); \ dim_t ir_tid = bli_thread_work_id( caucus ); \ \ dim_t jr_start, jr_end; \ dim_t ir_start, ir_end; \ dim_t jr_inc, ir_inc; \ \ /* Determine the thread range and increment for the 2nd and 1st loops. NOTE: The definition of bli_thread_range_jrir() will depend on whether slab or round-robin partitioning was requested at configure-time. */ \ bli_thread_range_jrir( thread, n_iter, 1, FALSE, &jr_start, &jr_end, &jr_inc ); \ bli_thread_range_jrir( caucus, m_iter, 1, FALSE, &ir_start, &ir_end, &ir_inc ); \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = jr_start; j < jr_end; j += jr_inc ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ b1 = b_cast + j * cstep_b; \ c1 = c_cast + j * cstep_c; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = ir_start; i < ir_end; i += ir_inc ) \ { \ ctype* restrict a2; \ \ a1 = a_cast + i * rstep_a; \ c11 = c1 + i * rstep_c; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = bli_gemm_get_next_a_upanel( a1, rstep_a, ir_inc ); \ if ( bli_is_last_iter( i, ir_end, ir_tid, ir_nt ) ) \ { \ a2 = a_cast; \ b2 = bli_gemm_get_next_b_upanel( b1, cstep_b, jr_inc ); \ if ( bli_is_last_iter( j, jr_end, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ beta_cast, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Scale the bottom edge of C and add the result from above. */ \ PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ beta_cast, \ c11, rs_c, cs_c ); \ } \ } \ } \ \ /* PASTEMAC(ch,fprintm)( stdout, "gemm_ker_var2: b1", k, NR, b1, NR, 1, "%4.1f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "gemm_ker_var2: a1", MR, k, a1, 1, MR, "%4.1f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "gemm_ker_var2: c after", m_cur, n_cur, c11, rs_c, cs_c, "%4.1f", "" ); \ */ \ } #if 0 GENTFUNC( float, s, gemm_ker_var2 ) GENTFUNC( double, d, gemm_ker_var2 ) GENTFUNC( scomplex, c, gemm_ker_var2 ) GENTFUNC( dcomplex, z, gemm_ker_var2 ) #else INSERT_GENTFUNC_BASIC0( gemm_ker_var2 ) #endif blis-0.6.1/sandbox/ref99/vars/blx_gemm_packab.c000066400000000000000000000053621360743507500212750ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "blix.h" void blx_gemm_packa ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { obj_t a_pack; // Pack matrix A according to the control tree node. blx_l3_packm ( a, &a_pack, cntx, rntm, cntl, thread ); // Proceed with execution using packed matrix A. blx_gemm_int ( &a_pack, b, c, cntx, rntm, bli_cntl_sub_node( cntl ), bli_thrinfo_sub_node( thread ) ); } // ----------------------------------------------------------------------------- void blx_gemm_packb ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { obj_t b_pack; // Pack matrix B according to the control tree node. blx_l3_packm ( b, &b_pack, cntx, rntm, cntl, thread ); // Proceed with execution using packed matrix B. blx_gemm_int ( a, &b_pack, c, cntx, rntm, bli_cntl_sub_node( cntl ), bli_thrinfo_sub_node( thread ) ); } blis-0.6.1/sandbox/ref99/vars/blx_gemm_var.h000066400000000000000000000054411360743507500206470ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // Prototype object-based interfaces. // #undef GENPROT #define GENPROT( opname ) \ \ void PASTECH(blx_,opname) \ ( \ obj_t* a, \ obj_t* b, \ obj_t* c, \ cntx_t* cntx, \ rntm_t* rntm, \ cntl_t* cntl, \ thrinfo_t* thread \ ); GENPROT( gemm_blk_var1 ) GENPROT( gemm_blk_var2 ) GENPROT( gemm_blk_var3 ) GENPROT( gemm_packa ) GENPROT( gemm_packb ) GENPROT( gemm_ker_var2 ) // // Prototype BLAS-like interfaces with void pointer operands. // #undef GENTPROT #define GENTPROT( ctype, ch, varname ) \ \ void PASTECH2(blx_,ch,varname) \ ( \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha, \ void* a, inc_t cs_a, inc_t is_a, \ dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, inc_t is_b, \ dim_t pd_b, inc_t ps_b, \ void* beta, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* thread \ ); INSERT_GENTPROT_BASIC0( gemm_ker_var2 ) blis-0.6.1/sandbox/ref99/vars/other/000077500000000000000000000000001360743507500171515ustar00rootroot00000000000000blis-0.6.1/sandbox/ref99/vars/other/blx_gemm_ker_var2rr.c000066400000000000000000000261401360743507500232510ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "blix.h" // Function pointer type for datatype-specific functions. typedef void (*gemm_fp) ( pack_t schema_a, pack_t schema_b, dim_t m, dim_t n, dim_t k, void* alpha, void* a, inc_t cs_a, inc_t is_a, dim_t pd_a, inc_t ps_a, void* b, inc_t rs_b, inc_t is_b, dim_t pd_b, inc_t ps_b, void* beta, void* c, inc_t rs_c, inc_t cs_c, cntx_t* cntx, rntm_t* rntm, thrinfo_t* thread ); // Function pointer array for datatype-specific functions. static gemm_fp ftypes[BLIS_NUM_FP_TYPES] = { PASTECH2(blx_,s,gemm_ker_var2rr), PASTECH2(blx_,c,gemm_ker_var2rr), PASTECH2(blx_,d,gemm_ker_var2rr), PASTECH2(blx_,z,gemm_ker_var2rr) }; void blx_gemm_ker_var2rr ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { num_t dt_exec = bli_obj_exec_dt( c ); pack_t schema_a = bli_obj_pack_schema( a ); pack_t schema_b = bli_obj_pack_schema( b ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width( a ); void* buf_a = bli_obj_buffer_at_off( a ); inc_t cs_a = bli_obj_col_stride( a ); inc_t is_a = bli_obj_imag_stride( a ); dim_t pd_a = bli_obj_panel_dim( a ); inc_t ps_a = bli_obj_panel_stride( a ); void* buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b = bli_obj_row_stride( b ); inc_t is_b = bli_obj_imag_stride( b ); dim_t pd_b = bli_obj_panel_dim( b ); inc_t ps_b = bli_obj_panel_stride( b ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); obj_t scalar_a; obj_t scalar_b; void* buf_alpha; void* buf_beta; gemm_fp f; // Detach and multiply the scalars attached to A and B. bli_obj_scalar_detach( a, &scalar_a ); bli_obj_scalar_detach( b, &scalar_b ); bli_mulsc( &scalar_a, &scalar_b ); // Grab the addresses of the internal scalar buffers for the scalar // merged above and the scalar attached to C. buf_alpha = bli_obj_internal_scalar_buffer( &scalar_b ); buf_beta = bli_obj_internal_scalar_buffer( c ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Invoke the function. f( schema_a, schema_b, m, n, k, buf_alpha, buf_a, cs_a, is_a, pd_a, ps_a, buf_b, rs_b, is_b, pd_b, ps_b, buf_beta, buf_c, rs_c, cs_c, cntx, rntm, thread ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTECH2(blx_,ch,varname) \ ( \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha, \ void* a, inc_t cs_a, inc_t is_a, \ dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, inc_t is_b, \ dim_t pd_b, inc_t ps_b, \ void* beta, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* thread \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ /* Alias some constants to simpler names. */ \ const dim_t MR = pd_a; \ const dim_t NR = pd_b; \ /*const dim_t PACKMR = cs_a;*/ \ /*const dim_t PACKNR = rs_b;*/ \ \ /* Query the context for the micro-kernel address and cast it to its function pointer type. */ \ PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the original C matrix. For example, if C is column-stored, ct will be column-stored as well. */ \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict a_cast = a; \ ctype* restrict b_cast = b; \ ctype* restrict c_cast = c; \ ctype* restrict alpha_cast = alpha; \ ctype* restrict beta_cast = beta; \ ctype* restrict b1; \ ctype* restrict c1; \ \ dim_t m_iter, m_left; \ dim_t n_iter, n_left; \ dim_t i, j; \ dim_t m_cur; \ dim_t n_cur; \ inc_t rstep_a; \ inc_t cstep_b; \ inc_t rstep_c, cstep_c; \ auxinfo_t aux; \ \ /* Assumptions/assertions: rs_a == 1 cs_a == PACKMR pd_a == MR ps_a == stride to next micro-panel of A rs_b == PACKNR cs_b == 1 pd_b == NR ps_b == stride to next micro-panel of B rs_c == (no assumptions) cs_c == (no assumptions) */ \ \ /* If any dimension is zero, return immediately. */ \ if ( bli_zero_dim3( m, n, k ) ) return; \ \ /* Clear the temporary C buffer in case it has any infs or NaNs. */ \ PASTEMAC(ch,set0s_mxn)( MR, NR, \ ct, rs_ct, cs_ct ); \ \ /* Compute number of primary and leftover components of the m and n dimensions. */ \ n_iter = n / NR; \ n_left = n % NR; \ \ m_iter = m / MR; \ m_left = m % MR; \ \ if ( n_left ) ++n_iter; \ if ( m_left ) ++m_iter; \ \ /* Determine some increments used to step through A, B, and C. */ \ rstep_a = ps_a; \ \ cstep_b = ps_b; \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ \ /* Save the pack schemas of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_schema_a( schema_a, &aux ); \ bli_auxinfo_set_schema_b( schema_b, &aux ); \ \ /* Save the imaginary stride of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( is_a, &aux ); \ bli_auxinfo_set_is_b( is_b, &aux ); \ \ /* The 'thread' argument points to the thrinfo_t node for the 2nd (jr) loop around the microkernel. Here we query the thrinfo_t node for the 1st (ir) loop around the microkernel. */ \ thrinfo_t* caucus = bli_thrinfo_sub_node( thread ); \ \ /* Query the number of threads and thread ids for each loop. */ \ dim_t jr_nt = bli_thread_n_way( thread ); \ dim_t jr_tid = bli_thread_work_id( thread ); \ dim_t ir_nt = bli_thread_n_way( caucus ); \ dim_t ir_tid = bli_thread_work_id( caucus ); \ \ dim_t jr_start, jr_end; \ dim_t ir_start, ir_end; \ dim_t jr_inc, ir_inc; \ \ /* Determine the thread range and increment for each thrinfo_t node. */ \ bli_thread_range_jrir_rr( thread, n_iter, 1, FALSE, &jr_start, &jr_end, &jr_inc ); \ bli_thread_range_jrir_rr( caucus, m_iter, 1, FALSE, &ir_start, &ir_end, &ir_inc ); \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = jr_start; j < jr_end; j += jr_inc ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ b1 = b_cast + j * cstep_b; \ c1 = c_cast + j * cstep_c; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = ir_start; i < ir_end; i += ir_inc ) \ { \ ctype* restrict a2; \ \ a1 = a_cast + i * rstep_a; \ c11 = c1 + i * rstep_c; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = bli_gemm_get_next_a_upanel( a1, rstep_a, ir_inc ); \ if ( bli_is_last_iter_rr( i, ir_end, ir_tid, ir_nt ) ) \ { \ a2 = a_cast; \ b2 = bli_gemm_get_next_b_upanel( b1, cstep_b, jr_inc ); \ if ( bli_is_last_iter_rr( j, jr_end, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ beta_cast, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Scale the bottom edge of C and add the result from above. */ \ PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ beta_cast, \ c11, rs_c, cs_c ); \ } \ } \ } \ \ /* PASTEMAC(ch,fprintm)( stdout, "gemm_ker_var2: b1", k, NR, b1, NR, 1, "%4.1f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "gemm_ker_var2: a1", MR, k, a1, 1, MR, "%4.1f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "gemm_ker_var2: c after", m_cur, n_cur, c11, rs_c, cs_c, "%4.1f", "" ); \ */ \ } #if 0 GENTFUNC( float, s, gemm_ker_var2rr ) GENTFUNC( double, d, gemm_ker_var2rr ) GENTFUNC( scomplex, c, gemm_ker_var2rr ) GENTFUNC( dcomplex, z, gemm_ker_var2rr ) #else INSERT_GENTFUNC_BASIC0( gemm_ker_var2rr ) #endif blis-0.6.1/sandbox/ref99/vars/other/blx_gemm_ker_var2sl.c000066400000000000000000000261401360743507500232440ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "blix.h" // Function pointer type for datatype-specific functions. typedef void (*gemm_fp) ( pack_t schema_a, pack_t schema_b, dim_t m, dim_t n, dim_t k, void* alpha, void* a, inc_t cs_a, inc_t is_a, dim_t pd_a, inc_t ps_a, void* b, inc_t rs_b, inc_t is_b, dim_t pd_b, inc_t ps_b, void* beta, void* c, inc_t rs_c, inc_t cs_c, cntx_t* cntx, rntm_t* rntm, thrinfo_t* thread ); // Function pointer array for datatype-specific functions. static gemm_fp ftypes[BLIS_NUM_FP_TYPES] = { PASTECH2(blx_,s,gemm_ker_var2sl), PASTECH2(blx_,c,gemm_ker_var2sl), PASTECH2(blx_,d,gemm_ker_var2sl), PASTECH2(blx_,z,gemm_ker_var2sl) }; void blx_gemm_ker_var2sl ( obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { num_t dt_exec = bli_obj_exec_dt( c ); pack_t schema_a = bli_obj_pack_schema( a ); pack_t schema_b = bli_obj_pack_schema( b ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width( a ); void* buf_a = bli_obj_buffer_at_off( a ); inc_t cs_a = bli_obj_col_stride( a ); inc_t is_a = bli_obj_imag_stride( a ); dim_t pd_a = bli_obj_panel_dim( a ); inc_t ps_a = bli_obj_panel_stride( a ); void* buf_b = bli_obj_buffer_at_off( b ); inc_t rs_b = bli_obj_row_stride( b ); inc_t is_b = bli_obj_imag_stride( b ); dim_t pd_b = bli_obj_panel_dim( b ); inc_t ps_b = bli_obj_panel_stride( b ); void* buf_c = bli_obj_buffer_at_off( c ); inc_t rs_c = bli_obj_row_stride( c ); inc_t cs_c = bli_obj_col_stride( c ); obj_t scalar_a; obj_t scalar_b; void* buf_alpha; void* buf_beta; gemm_fp f; // Detach and multiply the scalars attached to A and B. bli_obj_scalar_detach( a, &scalar_a ); bli_obj_scalar_detach( b, &scalar_b ); bli_mulsc( &scalar_a, &scalar_b ); // Grab the addresses of the internal scalar buffers for the scalar // merged above and the scalar attached to C. buf_alpha = bli_obj_internal_scalar_buffer( &scalar_b ); buf_beta = bli_obj_internal_scalar_buffer( c ); // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_exec]; // Invoke the function. f( schema_a, schema_b, m, n, k, buf_alpha, buf_a, cs_a, is_a, pd_a, ps_a, buf_b, rs_b, is_b, pd_b, ps_b, buf_beta, buf_c, rs_c, cs_c, cntx, rntm, thread ); } #undef GENTFUNC #define GENTFUNC( ctype, ch, varname ) \ \ void PASTECH2(blx_,ch,varname) \ ( \ pack_t schema_a, \ pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ void* alpha, \ void* a, inc_t cs_a, inc_t is_a, \ dim_t pd_a, inc_t ps_a, \ void* b, inc_t rs_b, inc_t is_b, \ dim_t pd_b, inc_t ps_b, \ void* beta, \ void* c, inc_t rs_c, inc_t cs_c, \ cntx_t* cntx, \ rntm_t* rntm, \ thrinfo_t* thread \ ) \ { \ const num_t dt = PASTEMAC(ch,type); \ \ /* Alias some constants to simpler names. */ \ const dim_t MR = pd_a; \ const dim_t NR = pd_b; \ /*const dim_t PACKMR = cs_a;*/ \ /*const dim_t PACKNR = rs_b;*/ \ \ /* Query the context for the micro-kernel address and cast it to its function pointer type. */ \ PASTECH(ch,gemm_ukr_ft) \ gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the original C matrix. For example, if C is column-stored, ct will be column-stored as well. */ \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict a_cast = a; \ ctype* restrict b_cast = b; \ ctype* restrict c_cast = c; \ ctype* restrict alpha_cast = alpha; \ ctype* restrict beta_cast = beta; \ ctype* restrict b1; \ ctype* restrict c1; \ \ dim_t m_iter, m_left; \ dim_t n_iter, n_left; \ dim_t i, j; \ dim_t m_cur; \ dim_t n_cur; \ inc_t rstep_a; \ inc_t cstep_b; \ inc_t rstep_c, cstep_c; \ auxinfo_t aux; \ \ /* Assumptions/assertions: rs_a == 1 cs_a == PACKMR pd_a == MR ps_a == stride to next micro-panel of A rs_b == PACKNR cs_b == 1 pd_b == NR ps_b == stride to next micro-panel of B rs_c == (no assumptions) cs_c == (no assumptions) */ \ \ /* If any dimension is zero, return immediately. */ \ if ( bli_zero_dim3( m, n, k ) ) return; \ \ /* Clear the temporary C buffer in case it has any infs or NaNs. */ \ PASTEMAC(ch,set0s_mxn)( MR, NR, \ ct, rs_ct, cs_ct ); \ \ /* Compute number of primary and leftover components of the m and n dimensions. */ \ n_iter = n / NR; \ n_left = n % NR; \ \ m_iter = m / MR; \ m_left = m % MR; \ \ if ( n_left ) ++n_iter; \ if ( m_left ) ++m_iter; \ \ /* Determine some increments used to step through A, B, and C. */ \ rstep_a = ps_a; \ \ cstep_b = ps_b; \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ \ /* Save the pack schemas of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_schema_a( schema_a, &aux ); \ bli_auxinfo_set_schema_b( schema_b, &aux ); \ \ /* Save the imaginary stride of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_is_a( is_a, &aux ); \ bli_auxinfo_set_is_b( is_b, &aux ); \ \ /* The 'thread' argument points to the thrinfo_t node for the 2nd (jr) loop around the microkernel. Here we query the thrinfo_t node for the 1st (ir) loop around the microkernel. */ \ thrinfo_t* caucus = bli_thrinfo_sub_node( thread ); \ \ /* Query the number of threads and thread ids for each loop. */ \ dim_t jr_nt = bli_thread_n_way( thread ); \ dim_t jr_tid = bli_thread_work_id( thread ); \ dim_t ir_nt = bli_thread_n_way( caucus ); \ dim_t ir_tid = bli_thread_work_id( caucus ); \ \ dim_t jr_start, jr_end; \ dim_t ir_start, ir_end; \ dim_t jr_inc, ir_inc; \ \ /* Determine the thread range and increment for each thrinfo_t node. */ \ bli_thread_range_jrir_sl( thread, n_iter, 1, FALSE, &jr_start, &jr_end, &jr_inc ); \ bli_thread_range_jrir_sl( caucus, m_iter, 1, FALSE, &ir_start, &ir_end, &ir_inc ); \ \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = jr_start; j < jr_end; j += jr_inc ) \ { \ ctype* restrict a1; \ ctype* restrict c11; \ ctype* restrict b2; \ \ b1 = b_cast + j * cstep_b; \ c1 = c_cast + j * cstep_c; \ \ n_cur = ( bli_is_not_edge_f( j, n_iter, n_left ) ? NR : n_left ); \ \ /* Initialize our next panel of B to be the current panel of B. */ \ b2 = b1; \ \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = ir_start; i < ir_end; i += ir_inc ) \ { \ ctype* restrict a2; \ \ a1 = a_cast + i * rstep_a; \ c11 = c1 + i * rstep_c; \ \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* Compute the addresses of the next panels of A and B. */ \ a2 = bli_gemm_get_next_a_upanel( a1, rstep_a, ir_inc ); \ if ( bli_is_last_iter_sl( i, ir_end, ir_tid, ir_nt ) ) \ { \ a2 = a_cast; \ b2 = bli_gemm_get_next_b_upanel( b1, cstep_b, jr_inc ); \ if ( bli_is_last_iter_sl( j, jr_end, jr_tid, jr_nt ) ) \ b2 = b_cast; \ } \ \ /* Save addresses of next panels of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_next_a( a2, &aux ); \ bli_auxinfo_set_next_b( b2, &aux ); \ \ /* Handle interior and edge cases separately. */ \ if ( m_cur == MR && n_cur == NR ) \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ beta_cast, \ c11, rs_c, cs_c, \ &aux, \ cntx \ ); \ } \ else \ { \ /* Invoke the gemm micro-kernel. */ \ gemm_ukr \ ( \ k, \ alpha_cast, \ a1, \ b1, \ zero, \ ct, rs_ct, cs_ct, \ &aux, \ cntx \ ); \ \ /* Scale the bottom edge of C and add the result from above. */ \ PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \ ct, rs_ct, cs_ct, \ beta_cast, \ c11, rs_c, cs_c ); \ } \ } \ } \ \ /* PASTEMAC(ch,fprintm)( stdout, "gemm_ker_var2: b1", k, NR, b1, NR, 1, "%4.1f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "gemm_ker_var2: a1", MR, k, a1, 1, MR, "%4.1f", "" ); \ PASTEMAC(ch,fprintm)( stdout, "gemm_ker_var2: c after", m_cur, n_cur, c11, rs_c, cs_c, "%4.1f", "" ); \ */ \ } #if 0 GENTFUNC( float, s, gemm_ker_var2sl ) GENTFUNC( double, d, gemm_ker_var2sl ) GENTFUNC( scomplex, c, gemm_ker_var2sl ) GENTFUNC( dcomplex, z, gemm_ker_var2sl ) #else INSERT_GENTFUNC_BASIC0( gemm_ker_var2sl ) #endif blis-0.6.1/so_version000066400000000000000000000000061360743507500145660ustar00rootroot000000000000003 0.0 blis-0.6.1/test/000077500000000000000000000000001360743507500134405ustar00rootroot00000000000000blis-0.6.1/test/1m4m/000077500000000000000000000000001360743507500142165ustar00rootroot00000000000000blis-0.6.1/test/1m4m/Makefile000066400000000000000000000436021360743507500156630ustar00rootroot00000000000000#!/bin/bash # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # Copyright (C) 2018, Advanced Micro Devices, Inc. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # # Makefile # # Field G. Van Zee # # Makefile for standalone BLIS test drivers. # # # --- Makefile PHONY target definitions ---------------------------------------- # .PHONY: all \ clean cleanx # # --- Determine makefile fragment location ------------------------------------- # # Comments: # - DIST_PATH is assumed to not exist if BLIS_INSTALL_PATH is given. # - We must use recursively expanded assignment for LIB_PATH and INC_PATH in # the second case because CONFIG_NAME is not yet set. ifneq ($(strip $(BLIS_INSTALL_PATH)),) LIB_PATH := $(BLIS_INSTALL_PATH)/lib INC_PATH := $(BLIS_INSTALL_PATH)/include/blis SHARE_PATH := $(BLIS_INSTALL_PATH)/share/blis else DIST_PATH := ../.. LIB_PATH = ../../lib/$(CONFIG_NAME) INC_PATH = ../../include/$(CONFIG_NAME) SHARE_PATH := ../.. endif # # --- Include common makefile definitions -------------------------------------- # # Include the common makefile fragment. -include $(SHARE_PATH)/common.mk # # --- BLAS implementations ----------------------------------------------------- # # BLAS library path(s). This is where the BLAS libraries reside. HOME_LIB_PATH := $(HOME)/flame/lib # OpenBLAS OPENBLAS_LIB := $(HOME_LIB_PATH)/libopenblas.a OPENBLASP_LIB := $(HOME_LIB_PATH)/libopenblasp.a # ATLAS #ATLAS_LIB := $(HOME_LIB_PATH)/libf77blas.a \ # $(HOME_LIB_PATH)/libatlas.a # Eigen EIGEN_INC := $(HOME)/flame/eigen/include/eigen3 EIGEN_LIB := $(HOME_LIB_PATH)/libeigen_blas_static.a EIGENP_LIB := $(EIGEN_LIB) # MKL MKL_LIB_PATH := $(HOME)/intel/mkl/lib/intel64 MKL_LIB := -L$(MKL_LIB_PATH) \ -lmkl_intel_lp64 \ -lmkl_core \ -lmkl_sequential \ -lpthread -lm -ldl #MKLP_LIB := -L$(MKL_LIB_PATH) \ # -lmkl_intel_thread \ # -lmkl_core \ # -lmkl_intel_ilp64 \ # -L$(ICC_LIB_PATH) \ # -liomp5 MKLP_LIB := -L$(MKL_LIB_PATH) \ -lmkl_intel_lp64 \ -lmkl_core \ -lmkl_gnu_thread \ -lpthread -lm -ldl -fopenmp #-L$(ICC_LIB_PATH) \ #-lgomp VENDOR_LIB := $(MKL_LIB) VENDORP_LIB := $(MKLP_LIB) # # --- Problem size definitions ------------------------------------------------- # # Single core (single-threaded) PS_BEGIN := 48 PS_MAX := 2400 PS_INC := 48 # Single-socket (multithreaded) P1_BEGIN := 96 P1_MAX := 4800 P1_INC := 96 # Dual-socket (multithreaded) P2_BEGIN := 144 P2_MAX := 7200 P2_INC := 144 # # --- General build definitions ------------------------------------------------ # TEST_SRC_PATH := . TEST_OBJ_PATH := . # Gather all local object files. TEST_OBJS := $(sort $(patsubst $(TEST_SRC_PATH)/%.c, \ $(TEST_OBJ_PATH)/%.o, \ $(wildcard $(TEST_SRC_PATH)/*.c))) # Override the value of CINCFLAGS so that the value of CFLAGS returned by # get-user-cflags-for() is not cluttered up with include paths needed only # while building BLIS. CINCFLAGS := -I$(INC_PATH) # Use the "framework" CFLAGS for the configuration family. CFLAGS := $(call get-user-cflags-for,$(CONFIG_NAME)) # Add local header paths to CFLAGS. CFLAGS += -I$(TEST_SRC_PATH) # Locate the libblis library to which we will link. #LIBBLIS_LINK := $(LIB_PATH)/$(LIBBLIS_L) # Define a set of CFLAGS for use with C++ and Eigen. CXXFLAGS := $(subst -std=c99,-std=c++11,$(CFLAGS)) CXXFLAGS += -I$(EIGEN_INC) # Create a copy of CXXFLAGS without -fopenmp in order to disable multithreading. CXXFLAGS_ST := -march=native $(subst -fopenmp,,$(CXXFLAGS)) CXXFLAGS_MT := -march=native $(CXXFLAGS) # Which library? BLI_DEF := -DBLIS BLA_DEF := -DBLAS EIG_DEF := -DEIGEN # Complex implementation type D3MHW := -DIND=BLIS_3MH D3M1 := -DIND=BLIS_3M1 D4MHW := -DIND=BLIS_4MH D4M1B := -DIND=BLIS_4M1B D4M1A := -DIND=BLIS_4M1A D1M := -DIND=BLIS_1M DNAT := -DIND=BLIS_NAT # Implementation string #STR_3MHW := -DSTR=\"3mhw\" #STR_3M1 := -DSTR=\"3m1\" #STR_4MHW := -DSTR=\"4mhw\" #STR_4M1B := -DSTR=\"4m1b\" STR_4M1A := -DSTR=\"4m1a_blis\" STR_1M := -DSTR=\"1m_blis\" STR_NAT := -DSTR=\"asm_blis\" STR_OBL := -DSTR=\"openblas\" STR_EIG := -DSTR=\"eigen\" STR_VEN := -DSTR=\"vendor\" # Single or multithreaded string STR_ST := -DTHR_STR=\"st\" STR_1S := -DTHR_STR=\"1s\" STR_2S := -DTHR_STR=\"2s\" # Problem size specification PDEF_ST := -DP_BEGIN=$(PS_BEGIN) -DP_INC=$(PS_INC) -DP_MAX=$(PS_MAX) PDEF_1S := -DP_BEGIN=$(P1_BEGIN) -DP_INC=$(P1_INC) -DP_MAX=$(P1_MAX) PDEF_2S := -DP_BEGIN=$(P2_BEGIN) -DP_INC=$(P2_INC) -DP_MAX=$(P2_MAX) # # --- Targets/rules ------------------------------------------------------------ # all: all-st all-1s all-2s blis: blis-st blis-1s blis-2s openblas: openblas-st openblas-1s openblas-2s eigen: eigen-st eigen-1s eigen-2s vendor: vendor-st vendor-1s vendor-2s mkl: vendor armpl: vendor all-st: blis-st openblas-st mkl-st all-1s: blis-1s openblas-1s mkl-1s all-2s: blis-2s openblas-2s mkl-2s blis-st: blis-nat-st blis-1m-st blis-4m1a-st blis-1s: blis-nat-1s blis-1m-1s blis-4m1a-1s blis-2s: blis-nat-2s blis-1m-2s blis-4m1a-2s #blis-ind: blis-ind-st blis-ind-mt blis-nat: blis-nat-st blis-nat-1s blis-nat-2s blis-1m: blis-1m-st blis-1m-1s blis-1m-2s blis-4m1a: blis-4m1a-st blis-4m1a-1s blis-4m1a-2s # Define the datatypes, operations, and implementations. DTS := s d c z OPS := gemm BIMPLS := asm_blis 4m1a_blis 1m_blis openblas vendor EIMPLS := eigen # Define functions to construct object filenames from the datatypes and # operations given an implementation. We define one function for single- # threaded, single-socket, and dual-socket filenames. get-st-objs = $(foreach dt,$(DTS),$(foreach op,$(OPS),test_$(dt)$(op)_$(PS_MAX)_$(1)_st.o)) get-1s-objs = $(foreach dt,$(DTS),$(foreach op,$(OPS),test_$(dt)$(op)_$(P1_MAX)_$(1)_1s.o)) get-2s-objs = $(foreach dt,$(DTS),$(foreach op,$(OPS),test_$(dt)$(op)_$(P2_MAX)_$(1)_2s.o)) # Construct object and binary names for single-threaded, single-socket, and # dual-socket files for BLIS, OpenBLAS, and a vendor library (e.g. MKL). BLIS_1M_ST_OBJS := $(call get-st-objs,1m_blis) BLIS_1M_ST_BINS := $(patsubst %.o,%.x,$(BLIS_1M_ST_OBJS)) BLIS_1M_1S_OBJS := $(call get-1s-objs,1m_blis) BLIS_1M_1S_BINS := $(patsubst %.o,%.x,$(BLIS_1M_1S_OBJS)) BLIS_1M_2S_OBJS := $(call get-2s-objs,1m_blis) BLIS_1M_2S_BINS := $(patsubst %.o,%.x,$(BLIS_1M_2S_OBJS)) BLIS_4M1A_ST_OBJS := $(call get-st-objs,4m1a_blis) BLIS_4M1A_ST_BINS := $(patsubst %.o,%.x,$(BLIS_4M1A_ST_OBJS)) BLIS_4M1A_1S_OBJS := $(call get-1s-objs,4m1a_blis) BLIS_4M1A_1S_BINS := $(patsubst %.o,%.x,$(BLIS_4M1A_1S_OBJS)) BLIS_4M1A_2S_OBJS := $(call get-2s-objs,4m1a_blis) BLIS_4M1A_2S_BINS := $(patsubst %.o,%.x,$(BLIS_4M1A_2S_OBJS)) BLIS_NAT_ST_OBJS := $(call get-st-objs,asm_blis) BLIS_NAT_ST_BINS := $(patsubst %.o,%.x,$(BLIS_NAT_ST_OBJS)) BLIS_NAT_1S_OBJS := $(call get-1s-objs,asm_blis) BLIS_NAT_1S_BINS := $(patsubst %.o,%.x,$(BLIS_NAT_1S_OBJS)) BLIS_NAT_2S_OBJS := $(call get-2s-objs,asm_blis) BLIS_NAT_2S_BINS := $(patsubst %.o,%.x,$(BLIS_NAT_2S_OBJS)) OPENBLAS_ST_OBJS := $(call get-st-objs,openblas) OPENBLAS_ST_BINS := $(patsubst %.o,%.x,$(OPENBLAS_ST_OBJS)) OPENBLAS_1S_OBJS := $(call get-1s-objs,openblas) OPENBLAS_1S_BINS := $(patsubst %.o,%.x,$(OPENBLAS_1S_OBJS)) OPENBLAS_2S_OBJS := $(call get-2s-objs,openblas) OPENBLAS_2S_BINS := $(patsubst %.o,%.x,$(OPENBLAS_2S_OBJS)) EIGEN_ST_OBJS := $(call get-st-objs,eigen) EIGEN_ST_BINS := $(patsubst %.o,%.x,$(EIGEN_ST_OBJS)) EIGEN_1S_OBJS := $(call get-1s-objs,eigen) EIGEN_1S_BINS := $(patsubst %.o,%.x,$(EIGEN_1S_OBJS)) EIGEN_2S_OBJS := $(call get-2s-objs,eigen) EIGEN_2S_BINS := $(patsubst %.o,%.x,$(EIGEN_2S_OBJS)) VENDOR_ST_OBJS := $(call get-st-objs,vendor) VENDOR_ST_BINS := $(patsubst %.o,%.x,$(VENDOR_ST_OBJS)) VENDOR_1S_OBJS := $(call get-1s-objs,vendor) VENDOR_1S_BINS := $(patsubst %.o,%.x,$(VENDOR_1S_OBJS)) VENDOR_2S_OBJS := $(call get-2s-objs,vendor) VENDOR_2S_BINS := $(patsubst %.o,%.x,$(VENDOR_2S_OBJS)) # Define some targets associated with the above object/binary files. blis-nat-st: $(BLIS_NAT_ST_BINS) blis-nat-1s: $(BLIS_NAT_1S_BINS) blis-nat-2s: $(BLIS_NAT_2S_BINS) blis-1m-st: $(BLIS_1M_ST_BINS) blis-1m-1s: $(BLIS_1M_1S_BINS) blis-1m-2s: $(BLIS_1M_2S_BINS) blis-4m1a-st: $(BLIS_4M1A_ST_BINS) blis-4m1a-1s: $(BLIS_4M1A_1S_BINS) blis-4m1a-2s: $(BLIS_4M1A_2S_BINS) openblas-st: $(OPENBLAS_ST_BINS) openblas-1s: $(OPENBLAS_1S_BINS) openblas-2s: $(OPENBLAS_2S_BINS) eigen-st: $(EIGEN_ST_BINS) eigen-1s: $(EIGEN_1S_BINS) eigen-2s: $(EIGEN_2S_BINS) vendor-st: $(VENDOR_ST_BINS) vendor-1s: $(VENDOR_1S_BINS) vendor-2s: $(VENDOR_2S_BINS) mkl-st: vendor-st mkl-1s: vendor-1s mkl-2s: vendor-2s armpl-st: vendor-st armpl-1s: vendor-1s armpl-2s: vendor-2s # Mark the object files as intermediate so that make will remove them # automatically after building the binaries on which they depend. .INTERMEDIATE: $(BLIS_NAT_ST_OBJS) $(BLIS_NAT_1S_OBJS) $(BLIS_NAT_2S_OBJS) .INTERMEDIATE: $(BLIS_1M_ST_OBJS) $(BLIS_1M_1S_OBJS) $(BLIS_1M_2S_OBJS) .INTERMEDIATE: $(BLIS_4M1A_ST_OBJS) $(BLIS_4M1A_1S_OBJS) $(BLIS_4M1A_2S_OBJS) .INTERMEDIATE: $(OPENBLAS_ST_OBJS) $(OPENBLAS_1S_OBJS) $(OPENBLAS_2S_OBJS) .INTERMEDIATE: $(EIGEN_ST_OBJS) $(EIGEN_1S_OBJS) $(EIGEN_2S_OBJS) .INTERMEDIATE: $(VENDOR_ST_OBJS) $(VENDOR_1S_OBJS) $(VENDOR_2S_OBJS) # --Object file rules -- #$(TEST_OBJ_PATH)/%.o: $(TEST_SRC_PATH)/%.c # $(CC) $(CFLAGS) -c $< -o $@ # A function to return the datatype cpp macro def from the datatype # character. get-dt-cpp = $(strip \ $(if $(findstring s,$(1)),-DDT=BLIS_FLOAT -DIS_FLOAT,\ $(if $(findstring d,$(1)),-DDT=BLIS_DOUBLE -DIS_DOUBLE,\ $(if $(findstring c,$(1)),-DDT=BLIS_SCOMPLEX -DIS_SCOMPLEX,\ -DDT=BLIS_DCOMPLEX -DIS_DCOMPLEX)))) get-in-cpp = $(strip \ $(if $(findstring 1m_blis,$(1)),-DIND=BLIS_1M,\ $(if $(findstring 4m1a_blis,$(1)),-DIND=BLIS_4M1A,\ -DIND=BLIS_NAT))) # A function to return other cpp macros that help the test driver # identify the implementation. #get-bl-cpp = $(strip \ # $(if $(findstring blis,$(1)),$(STR_NAT) $(BLI_DEF),\ # $(if $(findstring openblas,$(1)),$(STR_OBL) $(BLA_DEF),\ # $(if $(findstring eigen,$(1)),$(STR_EIG) $(EIG_DEF),\ # $(STR_VEN) $(BLA_DEF))))) get-bl-cpp = $(strip \ $(if $(findstring 1m_blis,$(1)),$(STR_1M) $(BLI_DEF),\ $(if $(findstring 4m1a_blis,$(1)),$(STR_4M1A) $(BLI_DEF),\ $(if $(findstring asm_blis,$(1)),$(STR_NAT) $(BLI_DEF),\ $(if $(findstring openblas,$(1)),$(STR_OBL) $(BLA_DEF),\ $(if $(and $(findstring eigen,$(1)),\ $(findstring gemm,$(2))),\ $(STR_EIG) $(EIG_DEF),\ $(if $(findstring eigen,$(1)),\ $(STR_EIG) $(BLA_DEF),\ $(STR_VEN) $(BLA_DEF)))))))) # Rules for BLIS and BLAS libraries. define make-st-rule test_$(1)$(2)_$(PS_MAX)_$(3)_st.o: test_$(op).c Makefile $(CC) $(CFLAGS) $(PDEF_ST) $(call get-dt-cpp,$(1)) $(call get-bl-cpp,$(3),$(2)) $(call get-in-cpp,$(3)) $(STR_ST) -c $$< -o $$@ endef define make-1s-rule test_$(1)$(2)_$(P1_MAX)_$(3)_1s.o: test_$(op).c Makefile $(CC) $(CFLAGS) $(PDEF_1S) $(call get-dt-cpp,$(1)) $(call get-bl-cpp,$(3),$(2)) $(call get-in-cpp,$(3)) $(STR_1S) -c $$< -o $$@ endef define make-2s-rule test_$(1)$(2)_$(P2_MAX)_$(3)_2s.o: test_$(op).c Makefile $(CC) $(CFLAGS) $(PDEF_2S) $(call get-dt-cpp,$(1)) $(call get-bl-cpp,$(3),$(2)) $(call get-in-cpp,$(3)) $(STR_2S) -c $$< -o $$@ endef $(foreach dt,$(DTS), \ $(foreach op,$(OPS), \ $(foreach im,$(BIMPLS),$(eval $(call make-st-rule,$(dt),$(op),$(im)))))) $(foreach dt,$(DTS), \ $(foreach op,$(OPS), \ $(foreach im,$(BIMPLS),$(eval $(call make-1s-rule,$(dt),$(op),$(im)))))) $(foreach dt,$(DTS), \ $(foreach op,$(OPS), \ $(foreach im,$(BIMPLS),$(eval $(call make-2s-rule,$(dt),$(op),$(im)))))) # Rules for Eigen. define make-eigst-rule test_$(1)$(2)_$(PS_MAX)_$(3)_st.o: test_$(op).c Makefile $(CXX) $(CXXFLAGS_ST) $(PDEF_ST) $(call get-dt-cpp,$(1)) $(call get-bl-cpp,$(3),$(2)) $(DNAT) $(STR_ST) -c $$< -o $$@ endef define make-eig1s-rule test_$(1)$(2)_$(P1_MAX)_$(3)_1s.o: test_$(op).c Makefile $(CXX) $(CXXFLAGS_MT) $(PDEF_1S) $(call get-dt-cpp,$(1)) $(call get-bl-cpp,$(3),$(2)) $(DNAT) $(STR_1S) -c $$< -o $$@ endef define make-eig2s-rule test_$(1)$(2)_$(P2_MAX)_$(3)_2s.o: test_$(op).c Makefile $(CXX) $(CXXFLAGS_MT) $(PDEF_2S) $(call get-dt-cpp,$(1)) $(call get-bl-cpp,$(3),$(2)) $(DNAT) $(STR_2S) -c $$< -o $$@ endef $(foreach dt,$(DTS), \ $(foreach op,$(OPS), \ $(foreach im,$(EIMPLS),$(eval $(call make-eigst-rule,$(dt),$(op),$(im)))))) $(foreach dt,$(DTS), \ $(foreach op,$(OPS), \ $(foreach im,$(EIMPLS),$(eval $(call make-eig1s-rule,$(dt),$(op),$(im)))))) $(foreach dt,$(DTS), \ $(foreach op,$(OPS), \ $(foreach im,$(EIMPLS),$(eval $(call make-eig2s-rule,$(dt),$(op),$(im)))))) # -- Executable file rules -- # NOTE: For the BLAS test drivers, we place the BLAS libraries before BLIS # on the link command line in case BLIS was configured with the BLAS # compatibility layer. This prevents BLIS from inadvertently getting called # for the BLAS routines we are trying to test with. test_%_$(PS_MAX)_1m_blis_st.x: test_%_$(PS_MAX)_1m_blis_st.o $(LIBBLIS_LINK) $(CC) $(strip $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@) test_%_$(P1_MAX)_1m_blis_1s.x: test_%_$(P1_MAX)_1m_blis_1s.o $(LIBBLIS_LINK) $(CC) $(strip $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@) test_%_$(P2_MAX)_1m_blis_2s.x: test_%_$(P2_MAX)_1m_blis_2s.o $(LIBBLIS_LINK) $(CC) $(strip $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@) test_%_$(PS_MAX)_4m1a_blis_st.x: test_%_$(PS_MAX)_4m1a_blis_st.o $(LIBBLIS_LINK) $(CC) $(strip $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@) test_%_$(P1_MAX)_4m1a_blis_1s.x: test_%_$(P1_MAX)_4m1a_blis_1s.o $(LIBBLIS_LINK) $(CC) $(strip $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@) test_%_$(P2_MAX)_4m1a_blis_2s.x: test_%_$(P2_MAX)_4m1a_blis_2s.o $(LIBBLIS_LINK) $(CC) $(strip $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@) test_%_$(PS_MAX)_asm_blis_st.x: test_%_$(PS_MAX)_asm_blis_st.o $(LIBBLIS_LINK) $(CC) $(strip $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@) test_%_$(P1_MAX)_asm_blis_1s.x: test_%_$(P1_MAX)_asm_blis_1s.o $(LIBBLIS_LINK) $(CC) $(strip $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@) test_%_$(P2_MAX)_asm_blis_2s.x: test_%_$(P2_MAX)_asm_blis_2s.o $(LIBBLIS_LINK) $(CC) $(strip $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@) test_%_$(PS_MAX)_openblas_st.x: test_%_$(PS_MAX)_openblas_st.o $(LIBBLIS_LINK) $(CC) $(strip $< $(OPENBLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@) test_%_$(P1_MAX)_openblas_1s.x: test_%_$(P1_MAX)_openblas_1s.o $(LIBBLIS_LINK) $(CC) $(strip $< $(OPENBLASP_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@) test_%_$(P2_MAX)_openblas_2s.x: test_%_$(P2_MAX)_openblas_2s.o $(LIBBLIS_LINK) $(CC) $(strip $< $(OPENBLASP_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@) test_%_$(PS_MAX)_eigen_st.x: test_%_$(PS_MAX)_eigen_st.o $(LIBBLIS_LINK) $(CXX) $(strip $< $(EIGEN_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@) test_%_$(P1_MAX)_eigen_1s.x: test_%_$(P1_MAX)_eigen_1s.o $(LIBBLIS_LINK) $(CXX) $(strip $< $(EIGENP_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@) test_%_$(P2_MAX)_eigen_2s.x: test_%_$(P2_MAX)_eigen_2s.o $(LIBBLIS_LINK) $(CXX) $(strip $< $(EIGENP_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@) test_%_$(PS_MAX)_vendor_st.x: test_%_$(PS_MAX)_vendor_st.o $(LIBBLIS_LINK) $(CC) $(strip $< $(VENDOR_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@) test_%_$(P1_MAX)_vendor_1s.x: test_%_$(P1_MAX)_vendor_1s.o $(LIBBLIS_LINK) $(CC) $(strip $< $(VENDORP_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@) test_%_$(P2_MAX)_vendor_2s.x: test_%_$(P2_MAX)_vendor_2s.o $(LIBBLIS_LINK) $(CC) $(strip $< $(VENDORP_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@) # -- Clean rules -- clean: cleanx cleanx: - $(RM_F) *.o *.x blis-0.6.1/test/1m4m/runme.sh000077500000000000000000000152161360743507500157100ustar00rootroot00000000000000#!/bin/bash # File pefixes. exec_root="test" out_root="output" delay=0.1 #sys="blis" #sys="stampede2" sys="lonestar5" #sys="ul252" #sys="ul264" # Bind threads to processors. #export OMP_PROC_BIND=true #export GOMP_CPU_AFFINITY="0 2 4 6 8 10 12 14 16 18 20 22 1 3 5 7 9 11 13 15 17 19 21 23" #export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103" if [ ${sys} = "blis" ]; then export GOMP_CPU_AFFINITY="0 1 2 3" threads="jc1ic1jr1_2400 jc2ic3jr2_6000 jc4ic3jr2_8000" elif [ ${sys} = "stampede2" ]; then echo "Need to set GOMP_CPU_AFFINITY." exit 1 threads="jc1ic1jr1_2400 jc4ic6jr1_6000 jc4ic12jr1_8000" elif [ ${sys} = "lonestar5" ]; then export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23" # A hack to use libiomp5 with gcc. #export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/apps/intel/16.0.1.150/compilers_and_libraries_2016.1.150/linux/compiler/lib/intel64" #threads="jc1ic1jr1_2400 # jc2ic3jr2_4800 # jc4ic3jr2_9600" threads="jc1ic1jr1_2400 jc4ic3jr2_7200" threads="jc4ic3jr2_7200" elif [ ${sys} = "ul252" ]; then export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/home/field/intel/mkl/lib/intel64" export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51" threads="jc1ic1jr1_2400 jc2ic13jr1_6000 jc4ic13jr1_8000" elif [ ${sys} = "ul264" ]; then export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/home/field/intel/mkl/lib/intel64" export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63" threads="jc1ic1jr1_2400 jc1ic8jr4_6000 jc2ic8jr4_8000" fi # Datatypes to test. test_dts="s d c z" # Operations to test. #test_ops="gemm hemm herk trmm trsm" test_ops="gemm" # Implementations to test. #impls="blis" #impls="other" #impls="eigen" impls="all" if [ "${impls}" = "blis" ]; then test_impls="asm_blis" elif [ "${impls}" = "eigen" ]; then test_impls="eigen" elif [ "${impls}" = "other" ]; then test_impls="openblas vendor" elif [ "${impls}" = "eigen" ]; then test_impls="eigen" else test_impls="openblas vendor asm_blis 4m1a_blis 1m_blis" #test_impls="openblas" #test_impls="asm_blis 4m1a_blis 1m_blis" #test_impls="asm_blis 1m_blis" fi # Save a copy of GOMP_CPU_AFFINITY so that if we have to unset it, we can # restore the value. GOMP_CPU_AFFINITYsave=${GOMP_CPU_AFFINITY} # First perform real test cases. for th in ${threads}; do # Start with one way of parallelism in each loop. We will now begin # parsing the 'th' variable to update one or more of these threading # parameters. jc_nt=1; pc_nt=1; ic_nt=1; jr_nt=1; ir_nt=1 # Strip everything before and after the underscore so that what remains # is the problem size and threading parameter string, respectively. psize=${th##*_}; thinfo=${th%%_*} # Identify each threading parameter and insert a space before it. thsep=$(echo -e ${thinfo} | sed -e "s/\([jip][cr]\)/ \1/g" ) nt=1 for loopnum in ${thsep}; do # Given the current string, which identifies a loop and the # number of ways of parallelism for that loop, strip out # the ways and loop separately to identify each. loop=$(echo -e ${loopnum} | sed -e "s/[0-9]//g" ) num=$(echo -e ${loopnum} | sed -e "s/[a-z]//g" ) # Construct a string that we can evaluate to set the number # of ways of parallelism for the current loop. loop_nt_eq_num="${loop}_nt=${num}" # Update the total number of threads. nt=$(expr ${nt} \* ${num}) # Evaluate the string to assign the ways to the variable. eval ${loop_nt_eq_num} done echo "Switching to: jc${jc_nt} pc${pc_nt} ic${ic_nt} jr${jr_nt} ir${ir_nt} (nt = ${nt}) p_max${psize}" for dt in ${test_dts}; do for im in ${test_impls}; do if [ "${dt}" = "s" -o "${dt}" = "d" ] && \ [ "${im}" = "1m_blis" -o "${im}" = "4m1a_blis" ]; then continue fi for op in ${test_ops}; do # Eigen does not support multithreading for hemm, herk, trmm, # or trsm. So if we're getting ready to execute an Eigen driver # for one of these operations and nt > 1, we skip this test. if [ "${im}" = "eigen" ] && \ [ "${op}" != "gemm" ] && \ [ "${nt}" != "1" ]; then continue; fi # Find the threading suffix by probing the executable. binname=$(ls ${exec_root}_${dt}${op}_${psize}_${im}_*.x) suf_ext=${binname##*_} suf=${suf_ext%%.*} #echo "found file: ${binname} with suffix ${suf}" # Set the number of threads according to th. if [ "${suf}" = "1s" ] || [ "${suf}" = "2s" ]; then # Set the threading parameters based on the implementation # that we are preparing to run. if [ "${im}" = "asm_blis" ]; then unset OMP_NUM_THREADS export BLIS_JC_NT=${jc_nt} export BLIS_PC_NT=${pc_nt} export BLIS_IC_NT=${ic_nt} export BLIS_JR_NT=${jr_nt} export BLIS_IR_NT=${ir_nt} elif [ "${im}" = "openblas" ]; then unset OMP_NUM_THREADS export OPENBLAS_NUM_THREADS=${nt} elif [ "${im}" = "eigen" ]; then export OMP_NUM_THREADS=${nt} elif [ "${im}" = "vendor" ]; then unset OMP_NUM_THREADS export MKL_NUM_THREADS=${nt} fi export nt_use=${nt} # Multithreaded OpenBLAS seems to have a problem running # properly if GOMP_CPU_AFFINITY is set. So we temporarily # unset it here if we are about to execute OpenBLAS, but # otherwise restore it. if [ ${im} = "openblas" ]; then unset GOMP_CPU_AFFINITY else export GOMP_CPU_AFFINITY="${GOMP_CPU_AFFINITYsave}" fi else export BLIS_JC_NT=1 export BLIS_PC_NT=1 export BLIS_IC_NT=1 export BLIS_JR_NT=1 export BLIS_IR_NT=1 export OMP_NUM_THREADS=1 export OPENBLAS_NUM_THREADS=1 export MKL_NUM_THREADS=1 export nt_use=1 fi # Construct the name of the test executable. exec_name="${exec_root}_${dt}${op}_${psize}_${im}_${suf}.x" # Construct the name of the output file. out_file="${out_root}_${suf}_${dt}${op}_${im}.m" #echo "Running (nt = ${nt_use}) ./${exec_name} > ${out_file}" echo "Running ./${exec_name} > ${out_file}" # Run executable. ./${exec_name} > ${out_file} sleep ${delay} done done done done blis-0.6.1/test/1m4m/test_gemm.c000066400000000000000000000301221360743507500163440ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name of The University of Texas nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #ifdef EIGEN #define BLIS_DISABLE_BLAS_DEFS #include "blis.h" #include #include using namespace Eigen; #else #include "blis.h" #endif #define COL_STORAGE //#define ROW_STORAGE //#define PRINT int main( int argc, char** argv ) { obj_t a, b, c; obj_t c_save; obj_t alpha, beta; dim_t m, n, k; dim_t p; dim_t p_begin, p_max, p_inc; int m_input, n_input, k_input; ind_t ind; num_t dt; char dt_ch; int r, n_repeats; trans_t transa; trans_t transb; f77_char f77_transa; f77_char f77_transb; double dtime; double dtime_save; double gflops; //bli_init(); bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); n_repeats = 3; dt = DT; ind = IND; #if 1 p_begin = P_BEGIN; p_max = P_MAX; p_inc = P_INC; m_input = -1; n_input = -1; k_input = -1; #else p_begin = 40; p_max = 2000; p_inc = 40; m_input = -1; n_input = -1; k_input = -1; #endif // Supress compiler warnings about unused variable 'ind'. ( void )ind; #if 0 cntx_t* cntx; ind_t ind_mod = ind; // A hack to use 3m1 as 1mpb (with 1m as 1mbp). if ( ind == BLIS_3M1 ) ind_mod = BLIS_1M; // Initialize a context for the current induced method and datatype. cntx = bli_gks_query_ind_cntx( ind_mod, dt ); // Set k to the kc blocksize for the current datatype. k_input = bli_cntx_get_blksz_def_dt( dt, BLIS_KC, cntx ); #elif 0 #ifdef BLIS if ( ind == BLIS_4M1A ) k_input = 128; else if ( ind == BLIS_1M ) k_input = 128; else k_input = 256; #else k_input = 192; #endif #endif // Choose the char corresponding to the requested datatype. if ( bli_is_float( dt ) ) dt_ch = 's'; else if ( bli_is_double( dt ) ) dt_ch = 'd'; else if ( bli_is_scomplex( dt ) ) dt_ch = 'c'; else dt_ch = 'z'; transa = BLIS_NO_TRANSPOSE; transb = BLIS_NO_TRANSPOSE; bli_param_map_blis_to_netlib_trans( transa, &f77_transa ); bli_param_map_blis_to_netlib_trans( transb, &f77_transb ); // Begin with initializing the last entry to zero so that // matlab allocates space for the entire array once up-front. for ( p = p_begin; p + p_inc <= p_max; p += p_inc ) ; printf( "data_%s_%cgemm_%s", THR_STR, dt_ch, STR ); printf( "( %2lu, 1:4 ) = [ %4lu %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin)/p_inc + 1, ( unsigned long )0, ( unsigned long )0, ( unsigned long )0, 0.0 ); //for ( p = p_begin; p <= p_max; p += p_inc ) for ( p = p_max; p_begin <= p; p -= p_inc ) { if ( m_input < 0 ) m = p / ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p / ( dim_t )abs(n_input); else n = ( dim_t ) n_input; if ( k_input < 0 ) k = p / ( dim_t )abs(k_input); else k = ( dim_t ) k_input; bli_obj_create( dt, 1, 1, 0, 0, &alpha ); bli_obj_create( dt, 1, 1, 0, 0, &beta ); #ifdef COL_STORAGE bli_obj_create( dt, m, k, 0, 0, &a ); bli_obj_create( dt, k, n, 0, 0, &b ); bli_obj_create( dt, m, n, 0, 0, &c ); bli_obj_create( dt, m, n, 0, 0, &c_save ); #else bli_obj_create( dt, m, k, k, 1, &a ); bli_obj_create( dt, k, n, n, 1, &b ); bli_obj_create( dt, m, n, n, 1, &c ); bli_obj_create( dt, m, n, n, 1, &c_save ); #endif bli_randm( &a ); bli_randm( &b ); bli_randm( &c ); bli_obj_set_conjtrans( transa, &a ); bli_obj_set_conjtrans( transb, &b ); bli_setsc( (1.0/1.0), 0.0, &alpha ); bli_setsc( (1.0/1.0), 0.0, &beta ); bli_copym( &c, &c_save ); #ifdef BLIS bli_ind_disable_all_dt( dt ); bli_ind_enable_dt( ind, dt ); #endif #ifdef EIGEN double alpha_r, alpha_i; bli_getsc( &alpha, &alpha_r, &alpha_i ); void* ap = bli_obj_buffer_at_off( &a ); void* bp = bli_obj_buffer_at_off( &b ); void* cp = bli_obj_buffer_at_off( &c ); #ifdef COL_STORAGE const int os_a = bli_obj_col_stride( &a ); const int os_b = bli_obj_col_stride( &b ); const int os_c = bli_obj_col_stride( &c ); #else const int os_a = bli_obj_row_stride( &a ); const int os_b = bli_obj_row_stride( &b ); const int os_c = bli_obj_row_stride( &c ); #endif Stride stride_a( os_a, 1 ); Stride stride_b( os_b, 1 ); Stride stride_c( os_c, 1 ); #ifdef COL_STORAGE #if defined(IS_FLOAT) typedef Matrix MatrixXf_; #elif defined (IS_DOUBLE) typedef Matrix MatrixXd_; #elif defined (IS_SCOMPLEX) typedef Matrix, Dynamic, Dynamic, ColMajor> MatrixXcf_; #elif defined (IS_DCOMPLEX) typedef Matrix, Dynamic, Dynamic, ColMajor> MatrixXcd_; #endif #else #if defined(IS_FLOAT) typedef Matrix MatrixXf_; #elif defined (IS_DOUBLE) typedef Matrix MatrixXd_; #elif defined (IS_SCOMPLEX) typedef Matrix, Dynamic, Dynamic, RowMajor> MatrixXcf_; #elif defined (IS_DCOMPLEX) typedef Matrix, Dynamic, Dynamic, RowMajor> MatrixXcd_; #endif #endif #if defined(IS_FLOAT) Map > A( ( float* )ap, m, k, stride_a ); Map > B( ( float* )bp, k, n, stride_b ); Map > C( ( float* )cp, m, n, stride_c ); #elif defined (IS_DOUBLE) Map > A( ( double* )ap, m, k, stride_a ); Map > B( ( double* )bp, k, n, stride_b ); Map > C( ( double* )cp, m, n, stride_c ); #elif defined (IS_SCOMPLEX) Map > A( ( std::complex* )ap, m, k, stride_a ); Map > B( ( std::complex* )bp, k, n, stride_b ); Map > C( ( std::complex* )cp, m, n, stride_c ); #elif defined (IS_DCOMPLEX) Map > A( ( std::complex* )ap, m, k, stride_a ); Map > B( ( std::complex* )bp, k, n, stride_b ); Map > C( ( std::complex* )cp, m, n, stride_c ); #endif #endif dtime_save = DBL_MAX; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "b", &b, "%4.1f", "" ); bli_printm( "c", &c, "%4.1f", "" ); #endif #if defined(BLIS) bli_gemm( &alpha, &a, &b, &beta, &c ); #elif defined(EIGEN) C.noalias() += alpha_r * A * B; #else // if defined(BLAS) if ( bli_is_float( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); float* alphap = ( float* )bli_obj_buffer( &alpha ); float* ap = ( float* )bli_obj_buffer( &a ); float* bp = ( float* )bli_obj_buffer( &b ); float* betap = ( float* )bli_obj_buffer( &beta ); float* cp = ( float* )bli_obj_buffer( &c ); sgemm_( &f77_transa, &f77_transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_double( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); double* alphap = ( double* )bli_obj_buffer( &alpha ); double* ap = ( double* )bli_obj_buffer( &a ); double* bp = ( double* )bli_obj_buffer( &b ); double* betap = ( double* )bli_obj_buffer( &beta ); double* cp = ( double* )bli_obj_buffer( &c ); dgemm_( &f77_transa, &f77_transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_scomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); scomplex* alphap = ( scomplex* )bli_obj_buffer( &alpha ); scomplex* ap = ( scomplex* )bli_obj_buffer( &a ); scomplex* bp = ( scomplex* )bli_obj_buffer( &b ); scomplex* betap = ( scomplex* )bli_obj_buffer( &beta ); scomplex* cp = ( scomplex* )bli_obj_buffer( &c ); cgemm_( &f77_transa, &f77_transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_dcomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); dcomplex* alphap = ( dcomplex* )bli_obj_buffer( &alpha ); dcomplex* ap = ( dcomplex* )bli_obj_buffer( &a ); dcomplex* bp = ( dcomplex* )bli_obj_buffer( &b ); dcomplex* betap = ( dcomplex* )bli_obj_buffer( &beta ); dcomplex* cp = ( dcomplex* )bli_obj_buffer( &c ); zgemm_( &f77_transa, &f77_transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } #endif #ifdef PRINT bli_printm( "c after", &c, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } gflops = ( 2.0 * m * k * n ) / ( dtime_save * 1.0e9 ); if ( bli_is_complex( dt ) ) gflops *= 4.0; printf( "data_%s_%cgemm_%s", THR_STR, dt_ch, STR ); printf( "( %2lu, 1:4 ) = [ %4lu %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin)/p_inc + 1, ( unsigned long )m, ( unsigned long )k, ( unsigned long )n, gflops ); //fflush( stdout ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); } //bli_finalize(); return 0; } blis-0.6.1/test/3/000077500000000000000000000000001360743507500136025ustar00rootroot00000000000000blis-0.6.1/test/3/Makefile000066400000000000000000000366671360743507500152640ustar00rootroot00000000000000#!/bin/bash # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # Copyright (C) 2018, Advanced Micro Devices, Inc. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # # Makefile # # Field G. Van Zee # # Makefile for standalone BLIS test drivers. # # # --- Makefile PHONY target definitions ---------------------------------------- # .PHONY: all \ clean cleanx # # --- Determine makefile fragment location ------------------------------------- # # Comments: # - DIST_PATH is assumed to not exist if BLIS_INSTALL_PATH is given. # - We must use recursively expanded assignment for LIB_PATH and INC_PATH in # the second case because CONFIG_NAME is not yet set. ifneq ($(strip $(BLIS_INSTALL_PATH)),) LIB_PATH := $(BLIS_INSTALL_PATH)/lib INC_PATH := $(BLIS_INSTALL_PATH)/include/blis SHARE_PATH := $(BLIS_INSTALL_PATH)/share/blis else DIST_PATH := ../.. LIB_PATH = ../../lib/$(CONFIG_NAME) INC_PATH = ../../include/$(CONFIG_NAME) SHARE_PATH := ../.. endif # # --- Include common makefile definitions -------------------------------------- # # Include the common makefile fragment. -include $(SHARE_PATH)/common.mk # # --- BLAS implementations ----------------------------------------------------- # # BLAS library path(s). This is where the BLAS libraries reside. HOME_LIB_PATH := $(HOME)/flame/lib # OpenBLAS OPENBLAS_LIB := $(HOME_LIB_PATH)/libopenblas.a OPENBLASP_LIB := $(HOME_LIB_PATH)/libopenblasp.a # ATLAS #ATLAS_LIB := $(HOME_LIB_PATH)/libf77blas.a \ # $(HOME_LIB_PATH)/libatlas.a # Eigen EIGEN_INC := $(HOME)/flame/eigen/include/eigen3 EIGEN_LIB := $(HOME_LIB_PATH)/libeigen_blas_static.a EIGENP_LIB := $(EIGEN_LIB) # MKL MKL_LIB_PATH := $(HOME)/intel/mkl/lib/intel64 MKL_LIB := -L$(MKL_LIB_PATH) \ -lmkl_intel_lp64 \ -lmkl_core \ -lmkl_sequential \ -lpthread -lm -ldl #MKLP_LIB := -L$(MKL_LIB_PATH) \ # -lmkl_intel_thread \ # -lmkl_core \ # -lmkl_intel_ilp64 \ # -L$(ICC_LIB_PATH) \ # -liomp5 MKLP_LIB := -L$(MKL_LIB_PATH) \ -lmkl_intel_lp64 \ -lmkl_core \ -lmkl_gnu_thread \ -lpthread -lm -ldl -fopenmp #-L$(ICC_LIB_PATH) \ #-lgomp VENDOR_LIB := $(MKL_LIB) VENDORP_LIB := $(MKLP_LIB) # # --- Problem size definitions ------------------------------------------------- # # Single core (single-threaded) PS_BEGIN := 48 PS_MAX := 2400 PS_INC := 48 # Single-socket (multithreaded) P1_BEGIN := 96 P1_MAX := 4800 P1_INC := 96 # Dual-socket (multithreaded) P2_BEGIN := 144 P2_MAX := 7200 P2_INC := 144 # # --- General build definitions ------------------------------------------------ # TEST_SRC_PATH := . TEST_OBJ_PATH := . # Gather all local object files. TEST_OBJS := $(sort $(patsubst $(TEST_SRC_PATH)/%.c, \ $(TEST_OBJ_PATH)/%.o, \ $(wildcard $(TEST_SRC_PATH)/*.c))) # Override the value of CINCFLAGS so that the value of CFLAGS returned by # get-user-cflags-for() is not cluttered up with include paths needed only # while building BLIS. CINCFLAGS := -I$(INC_PATH) # Use the "framework" CFLAGS for the configuration family. CFLAGS := $(call get-user-cflags-for,$(CONFIG_NAME)) # Add local header paths to CFLAGS. CFLAGS += -I$(TEST_SRC_PATH) # Locate the libblis library to which we will link. #LIBBLIS_LINK := $(LIB_PATH)/$(LIBBLIS_L) # Define a set of CFLAGS for use with C++ and Eigen. CXXFLAGS := $(subst -std=c99,-std=c++11,$(CFLAGS)) CXXFLAGS += -I$(EIGEN_INC) # Create a copy of CXXFLAGS without -fopenmp in order to disable multithreading. CXXFLAGS_ST := -march=native $(subst -fopenmp,,$(CXXFLAGS)) CXXFLAGS_MT := -march=native $(CXXFLAGS) # Which library? BLI_DEF := -DBLIS BLA_DEF := -DBLAS EIG_DEF := -DEIGEN # Complex implementation type D3MHW := -DIND=BLIS_3MH D3M1 := -DIND=BLIS_3M1 D4MHW := -DIND=BLIS_4MH D4M1B := -DIND=BLIS_4M1B D4M1A := -DIND=BLIS_4M1A D1M := -DIND=BLIS_1M DNAT := -DIND=BLIS_NAT # Implementation string #STR_3MHW := -DSTR=\"3mhw\" #STR_3M1 := -DSTR=\"3m1\" #STR_4MHW := -DSTR=\"4mhw\" #STR_4M1B := -DSTR=\"4m1b\" #STR_4M1A := -DSTR=\"4m1a\" #STR_1M := -DSTR=\"1m\" STR_NAT := -DSTR=\"asm_blis\" STR_OBL := -DSTR=\"openblas\" STR_EIG := -DSTR=\"eigen\" STR_VEN := -DSTR=\"vendor\" # Single or multithreaded string STR_ST := -DTHR_STR=\"st\" STR_1S := -DTHR_STR=\"1s\" STR_2S := -DTHR_STR=\"2s\" # Problem size specification PDEF_ST := -DP_BEGIN=$(PS_BEGIN) -DP_INC=$(PS_INC) -DP_MAX=$(PS_MAX) PDEF_1S := -DP_BEGIN=$(P1_BEGIN) -DP_INC=$(P1_INC) -DP_MAX=$(P1_MAX) PDEF_2S := -DP_BEGIN=$(P2_BEGIN) -DP_INC=$(P2_INC) -DP_MAX=$(P2_MAX) # # --- Targets/rules ------------------------------------------------------------ # all: all-st all-1s all-2s blis: blis-st blis-1s blis-2s openblas: openblas-st openblas-1s openblas-2s eigen: eigen-st eigen-1s eigen-2s vendor: vendor-st vendor-1s vendor-2s mkl: vendor armpl: vendor all-st: blis-st openblas-st mkl-st all-1s: blis-1s openblas-1s mkl-1s all-2s: blis-2s openblas-2s mkl-2s blis-st: blis-nat-st blis-1s: blis-nat-1s blis-2s: blis-nat-2s #blis-ind: blis-ind-st blis-ind-mt blis-nat: blis-nat-st blis-nat-1s blis-nat-2s # Define the datatypes, operations, and implementations. DTS := s d c z OPS := gemm hemm herk trmm trsm BIMPLS := asm_blis openblas vendor EIMPLS := eigen # Define functions to construct object filenames from the datatypes and # operations given an implementation. We define one function for single- # threaded, single-socket, and dual-socket filenames. get-st-objs = $(foreach dt,$(DTS),$(foreach op,$(OPS),test_$(dt)$(op)_$(PS_MAX)_$(1)_st.o)) get-1s-objs = $(foreach dt,$(DTS),$(foreach op,$(OPS),test_$(dt)$(op)_$(P1_MAX)_$(1)_1s.o)) get-2s-objs = $(foreach dt,$(DTS),$(foreach op,$(OPS),test_$(dt)$(op)_$(P2_MAX)_$(1)_2s.o)) # Construct object and binary names for single-threaded, single-socket, and # dual-socket files for BLIS, OpenBLAS, and a vendor library (e.g. MKL). BLIS_NAT_ST_OBJS := $(call get-st-objs,asm_blis) BLIS_NAT_ST_BINS := $(patsubst %.o,%.x,$(BLIS_NAT_ST_OBJS)) BLIS_NAT_1S_OBJS := $(call get-1s-objs,asm_blis) BLIS_NAT_1S_BINS := $(patsubst %.o,%.x,$(BLIS_NAT_1S_OBJS)) BLIS_NAT_2S_OBJS := $(call get-2s-objs,asm_blis) BLIS_NAT_2S_BINS := $(patsubst %.o,%.x,$(BLIS_NAT_2S_OBJS)) OPENBLAS_ST_OBJS := $(call get-st-objs,openblas) OPENBLAS_ST_BINS := $(patsubst %.o,%.x,$(OPENBLAS_ST_OBJS)) OPENBLAS_1S_OBJS := $(call get-1s-objs,openblas) OPENBLAS_1S_BINS := $(patsubst %.o,%.x,$(OPENBLAS_1S_OBJS)) OPENBLAS_2S_OBJS := $(call get-2s-objs,openblas) OPENBLAS_2S_BINS := $(patsubst %.o,%.x,$(OPENBLAS_2S_OBJS)) EIGEN_ST_OBJS := $(call get-st-objs,eigen) EIGEN_ST_BINS := $(patsubst %.o,%.x,$(EIGEN_ST_OBJS)) EIGEN_1S_OBJS := $(call get-1s-objs,eigen) EIGEN_1S_BINS := $(patsubst %.o,%.x,$(EIGEN_1S_OBJS)) EIGEN_2S_OBJS := $(call get-2s-objs,eigen) EIGEN_2S_BINS := $(patsubst %.o,%.x,$(EIGEN_2S_OBJS)) VENDOR_ST_OBJS := $(call get-st-objs,vendor) VENDOR_ST_BINS := $(patsubst %.o,%.x,$(VENDOR_ST_OBJS)) VENDOR_1S_OBJS := $(call get-1s-objs,vendor) VENDOR_1S_BINS := $(patsubst %.o,%.x,$(VENDOR_1S_OBJS)) VENDOR_2S_OBJS := $(call get-2s-objs,vendor) VENDOR_2S_BINS := $(patsubst %.o,%.x,$(VENDOR_2S_OBJS)) # Define some targets associated with the above object/binary files. blis-nat-st: $(BLIS_NAT_ST_BINS) blis-nat-1s: $(BLIS_NAT_1S_BINS) blis-nat-2s: $(BLIS_NAT_2S_BINS) openblas-st: $(OPENBLAS_ST_BINS) openblas-1s: $(OPENBLAS_1S_BINS) openblas-2s: $(OPENBLAS_2S_BINS) eigen-st: $(EIGEN_ST_BINS) eigen-1s: $(EIGEN_1S_BINS) eigen-2s: $(EIGEN_2S_BINS) vendor-st: $(VENDOR_ST_BINS) vendor-1s: $(VENDOR_1S_BINS) vendor-2s: $(VENDOR_2S_BINS) mkl-st: vendor-st mkl-1s: vendor-1s mkl-2s: vendor-2s armpl-st: vendor-st armpl-1s: vendor-1s armpl-2s: vendor-2s # Mark the object files as intermediate so that make will remove them # automatically after building the binaries on which they depend. .INTERMEDIATE: $(BLIS_NAT_ST_OBJS) $(BLIS_NAT_1S_OBJS) $(BLIS_NAT_2S_OBJS) .INTERMEDIATE: $(OPENBLAS_ST_OBJS) $(OPENBLAS_1S_OBJS) $(OPENBLAS_2S_OBJS) .INTERMEDIATE: $(EIGEN_ST_OBJS) $(EIGEN_1S_OBJS) $(EIGEN_2S_OBJS) .INTERMEDIATE: $(VENDOR_ST_OBJS) $(VENDOR_1S_OBJS) $(VENDOR_2S_OBJS) # --Object file rules -- #$(TEST_OBJ_PATH)/%.o: $(TEST_SRC_PATH)/%.c # $(CC) $(CFLAGS) -c $< -o $@ # A function to return the datatype cpp macro def from the datatype # character. get-dt-cpp = $(strip \ $(if $(findstring s,$(1)),-DDT=BLIS_FLOAT -DIS_FLOAT,\ $(if $(findstring d,$(1)),-DDT=BLIS_DOUBLE -DIS_DOUBLE,\ $(if $(findstring c,$(1)),-DDT=BLIS_SCOMPLEX -DIS_SCOMPLEX,\ -DDT=BLIS_DCOMPLEX -DIS_DCOMPLEX)))) # A function to return other cpp macros that help the test driver # identify the implementation. #get-bl-cpp = $(strip \ # $(if $(findstring blis,$(1)),$(STR_NAT) $(BLI_DEF),\ # $(if $(findstring openblas,$(1)),$(STR_OBL) $(BLA_DEF),\ # $(if $(findstring eigen,$(1)),$(STR_EIG) $(EIG_DEF),\ # $(STR_VEN) $(BLA_DEF))))) get-bl-cpp = $(strip \ $(if $(findstring blis,$(1)),$(STR_NAT) $(BLI_DEF),\ $(if $(findstring openblas,$(1)),$(STR_OBL) $(BLA_DEF),\ $(if $(and $(findstring eigen,$(1)),\ $(findstring gemm,$(2))),\ $(STR_EIG) $(EIG_DEF),\ $(if $(findstring eigen,$(1)),\ $(STR_EIG) $(BLA_DEF),\ $(STR_VEN) $(BLA_DEF)))))) # Rules for BLIS and BLAS libraries. define make-st-rule test_$(1)$(2)_$(PS_MAX)_$(3)_st.o: test_$(op).c Makefile $(CC) $(CFLAGS) $(PDEF_ST) $(call get-dt-cpp,$(1)) $(call get-bl-cpp,$(3),$(2)) $(DNAT) $(STR_ST) -c $$< -o $$@ endef define make-1s-rule test_$(1)$(2)_$(P1_MAX)_$(3)_1s.o: test_$(op).c Makefile $(CC) $(CFLAGS) $(PDEF_1S) $(call get-dt-cpp,$(1)) $(call get-bl-cpp,$(3),$(2)) $(DNAT) $(STR_1S) -c $$< -o $$@ endef define make-2s-rule test_$(1)$(2)_$(P2_MAX)_$(3)_2s.o: test_$(op).c Makefile $(CC) $(CFLAGS) $(PDEF_2S) $(call get-dt-cpp,$(1)) $(call get-bl-cpp,$(3),$(2)) $(DNAT) $(STR_2S) -c $$< -o $$@ endef $(foreach dt,$(DTS), \ $(foreach op,$(OPS), \ $(foreach im,$(BIMPLS),$(eval $(call make-st-rule,$(dt),$(op),$(im)))))) $(foreach dt,$(DTS), \ $(foreach op,$(OPS), \ $(foreach im,$(BIMPLS),$(eval $(call make-1s-rule,$(dt),$(op),$(im)))))) $(foreach dt,$(DTS), \ $(foreach op,$(OPS), \ $(foreach im,$(BIMPLS),$(eval $(call make-2s-rule,$(dt),$(op),$(im)))))) # Rules for Eigen. define make-eigst-rule test_$(1)$(2)_$(PS_MAX)_$(3)_st.o: test_$(op).c Makefile $(CXX) $(CXXFLAGS_ST) $(PDEF_ST) $(call get-dt-cpp,$(1)) $(call get-bl-cpp,$(3),$(2)) $(DNAT) $(STR_ST) -c $$< -o $$@ endef define make-eig1s-rule test_$(1)$(2)_$(P1_MAX)_$(3)_1s.o: test_$(op).c Makefile $(CXX) $(CXXFLAGS_MT) $(PDEF_1S) $(call get-dt-cpp,$(1)) $(call get-bl-cpp,$(3),$(2)) $(DNAT) $(STR_1S) -c $$< -o $$@ endef define make-eig2s-rule test_$(1)$(2)_$(P2_MAX)_$(3)_2s.o: test_$(op).c Makefile $(CXX) $(CXXFLAGS_MT) $(PDEF_2S) $(call get-dt-cpp,$(1)) $(call get-bl-cpp,$(3),$(2)) $(DNAT) $(STR_2S) -c $$< -o $$@ endef $(foreach dt,$(DTS), \ $(foreach op,$(OPS), \ $(foreach im,$(EIMPLS),$(eval $(call make-eigst-rule,$(dt),$(op),$(im)))))) $(foreach dt,$(DTS), \ $(foreach op,$(OPS), \ $(foreach im,$(EIMPLS),$(eval $(call make-eig1s-rule,$(dt),$(op),$(im)))))) $(foreach dt,$(DTS), \ $(foreach op,$(OPS), \ $(foreach im,$(EIMPLS),$(eval $(call make-eig2s-rule,$(dt),$(op),$(im)))))) # -- Executable file rules -- # NOTE: For the BLAS test drivers, we place the BLAS libraries before BLIS # on the link command line in case BLIS was configured with the BLAS # compatibility layer. This prevents BLIS from inadvertently getting called # for the BLAS routines we are trying to test with. test_%_$(PS_MAX)_asm_blis_st.x: test_%_$(PS_MAX)_asm_blis_st.o $(LIBBLIS_LINK) $(CC) $(strip $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@) test_%_$(P1_MAX)_asm_blis_1s.x: test_%_$(P1_MAX)_asm_blis_1s.o $(LIBBLIS_LINK) $(CC) $(strip $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@) test_%_$(P2_MAX)_asm_blis_2s.x: test_%_$(P2_MAX)_asm_blis_2s.o $(LIBBLIS_LINK) $(CC) $(strip $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@) test_%_$(PS_MAX)_openblas_st.x: test_%_$(PS_MAX)_openblas_st.o $(LIBBLIS_LINK) $(CC) $(strip $< $(OPENBLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@) test_%_$(P1_MAX)_openblas_1s.x: test_%_$(P1_MAX)_openblas_1s.o $(LIBBLIS_LINK) $(CC) $(strip $< $(OPENBLASP_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@) test_%_$(P2_MAX)_openblas_2s.x: test_%_$(P2_MAX)_openblas_2s.o $(LIBBLIS_LINK) $(CC) $(strip $< $(OPENBLASP_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@) test_%_$(PS_MAX)_eigen_st.x: test_%_$(PS_MAX)_eigen_st.o $(LIBBLIS_LINK) $(CXX) $(strip $< $(EIGEN_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@) test_%_$(P1_MAX)_eigen_1s.x: test_%_$(P1_MAX)_eigen_1s.o $(LIBBLIS_LINK) $(CXX) $(strip $< $(EIGENP_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@) test_%_$(P2_MAX)_eigen_2s.x: test_%_$(P2_MAX)_eigen_2s.o $(LIBBLIS_LINK) $(CXX) $(strip $< $(EIGENP_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@) test_%_$(PS_MAX)_vendor_st.x: test_%_$(PS_MAX)_vendor_st.o $(LIBBLIS_LINK) $(CC) $(strip $< $(VENDOR_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@) test_%_$(P1_MAX)_vendor_1s.x: test_%_$(P1_MAX)_vendor_1s.o $(LIBBLIS_LINK) $(CC) $(strip $< $(VENDORP_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@) test_%_$(P2_MAX)_vendor_2s.x: test_%_$(P2_MAX)_vendor_2s.o $(LIBBLIS_LINK) $(CC) $(strip $< $(VENDORP_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@) # -- Clean rules -- clean: cleanx cleanx: - $(RM_F) *.o *.x blis-0.6.1/test/3/matlab/000077500000000000000000000000001360743507500150425ustar00rootroot00000000000000blis-0.6.1/test/3/matlab/gen_opnames.m000066400000000000000000000004101360743507500175060ustar00rootroot00000000000000function r_val = gen_opnames( ops, dts ) nops = size( ops, 1 ); ndts = size( dts, 2 ); i = 1; for id = 1:ndts dt = dts( id ); for io = 1:nops op = ops( io, : ); opnames( i, : ) = sprintf( '%c%s', dt, op ); i = i + 1; end end r_val = opnames; end blis-0.6.1/test/3/matlab/plot_l3_perf.m000066400000000000000000000143511360743507500176140ustar00rootroot00000000000000function r_val = plot_l3_perf( opname, ... data_blis, ... data_open, ... data_eige, ... data_vend, vend_str, ... nth, ... rows, cols, ... with_eigen, ... cfreq, ... dfps, ... theid ) if 1 ax1 = subplot( rows, cols, theid ); hold( ax1, 'on' ); end % Set line properties. color_blis = 'k'; lines_blis = '-'; markr_blis = ''; color_open = 'r'; lines_open = '--'; markr_open = 'o'; color_eige = 'm'; lines_eige = '-.'; markr_eige = 'x'; color_vend = 'b'; lines_vend = '-.'; markr_vend = '.'; % Compute the peak performance in terms of the number of double flops % executable per cycle and the clock rate. if opname(1) == 's' || opname(1) == 'c' flopspercycle = dfps * 2; else flopspercycle = dfps; end max_perf_core = (flopspercycle * cfreq) * 1; % Adjust title for real domain hemm and herk. title_opname = opname; if opname(1) == 's' || opname(1) == 'd' % if strcmp( extractAfter( opname, 1 ), 'hemm' ) || ... % strcmp( extractAfter( opname, 1 ), 'herk' ) % title_opname(2:3) = 'sy'; % end opname_u = opname; opname_u(1) = '_'; if strcmp( opname_u, '_hemm' ) || ... strcmp( opname_u, '_herk' ) title_opname(2:3) = 'sy'; end end % Print the title to a string. titlename = '%s'; titlename = sprintf( titlename, title_opname ); % Set the legend strings. blis_legend = sprintf( 'BLIS' ); open_legend = sprintf( 'OpenBLAS' ); eige_legend = sprintf( 'Eigen' ); %vend_legend = sprintf( 'MKL' ); %vend_legend = sprintf( 'ARMPL' ); vend_legend = vend_str; % Determine the final dimension. %n_points = size( data_blis, 1 ); %x_end = data_blis( n_points, 1 ); % Set axes range values. y_scale = 1.00; x_begin = 0; x_end = data_blis( size( data_blis, 1 ), 1 ); y_begin = 0; y_end = max_perf_core * y_scale; % Set axes names. xaxisname = ' m = n = k'; if nth == 1 yaxisname = 'GFLOPS'; else yaxisname = 'GFLOPS/core'; end %flopscol = 4; flopscol = size( data_blis, 2 ); msize = 5; if 1 fontsize = 13; else fontsize = 16; end linesize = 0.5; legend_loc = 'southeast'; % -------------------------------------------------------------------- x_axis( :, 1 ) = data_blis( :, 1 ); data_peak( 1, 1:2 ) = [ 0 max_perf_core ]; data_peak( 2, 1:2 ) = [ x_end max_perf_core ]; blis_ln = line( x_axis( :, 1 ), data_blis( :, flopscol ) / nth, ... 'Color',color_blis, 'LineStyle',lines_blis, ... 'LineWidth',linesize ); open_ln = line( x_axis( :, 1 ), data_open( :, flopscol ) / nth, ... 'Color',color_open, 'LineStyle',lines_open, ... 'LineWidth',linesize ); if data_eige(1,1) ~= -1 eige_ln = line( x_axis( :, 1 ), data_eige( :, flopscol ) / nth, ... 'Color',color_eige, 'LineStyle',lines_eige, ... 'LineWidth',linesize ); else eige_ln = line( nan, nan, ... 'Color',color_eige, 'LineStyle',lines_eige, ... 'LineWidth',linesize ); end vend_ln = line( x_axis( :, 1 ), data_vend( :, flopscol ) / nth, ... 'Color',color_vend, 'LineStyle',lines_vend, ... 'LineWidth',linesize ); xlim( ax1, [x_begin x_end] ); ylim( ax1, [y_begin y_end] ); if 6000 <= x_end && x_end < 10000 x_tick2 = x_end - 2000; x_tick1 = x_tick2/2; xticks( ax1, [ x_tick1 x_tick2 ] ); elseif 4000 <= x_end && x_end < 6000 x_tick2 = x_end - 1000; x_tick1 = x_tick2/2; xticks( ax1, [ x_tick1 x_tick2 ] ); elseif 2000 <= x_end && x_end < 3000 x_tick2 = x_end - 400; x_tick1 = x_tick2/2; xticks( ax1, [ x_tick1 x_tick2 ] ); end if rows == 4 && cols == 5 if nth == 1 && theid == 3 if with_eigen == 1 leg = legend( [ blis_ln open_ln eige_ln vend_ln ], ... blis_legend, open_legend, eige_legend, vend_legend, ... 'Location', legend_loc ); else leg = legend( [ blis_ln open_ln vend_ln ], ... blis_legend, open_legend, vend_legend, ... 'Location', legend_loc ); end set( leg,'Box','off','Color','none','Units','inches','FontSize',fontsize-3 ); set( leg,'Position',[11.20 12.81 0.7 0.3 ] ); % (0,2br) %set( leg,'Position',[ 4.20 12.81 0.7 0.3 ] ); % (0,0br) elseif nth > 1 && theid == 4 if with_eigen == 1 leg = legend( [ blis_ln open_ln eige_ln vend_ln ], ... blis_legend, open_legend, eige_legend, vend_legend, ... 'Location', legend_loc ); else leg = legend( [ blis_ln open_ln vend_ln ], ... blis_legend, open_legend, vend_legend, ... 'Location', legend_loc ); end set( leg,'Box','off','Color','none','Units','inches','FontSize',fontsize-3 ); %set( leg,'Position',[7.70 12.81 0.7 0.3 ] ); % (0,1br) %set( leg,'Position',[11.20 12.81 0.7 0.3 ] ); % (0,2br) set( leg,'Position',[10.47 14.17 0.7 0.3 ] ); % (0,2tl) end end %set( leg,'Position',[ 4.20 12.75 0.7 0.3 ] ); % (0,0br) %set( leg,'Position',[ 7.70 12.75 0.7 0.3 ] ); % (0,1br) %set( leg,'Position',[10.47 14.28 0.7 0.3 ] ); % (0,2tl) %set( leg,'Position',[11.20 12.75 0.7 0.3 ] ); % (0,2br) %set( leg,'Position',[13.95 14.28 0.7 0.3 ] ); % (0,3tl) %set( leg,'Position',[14.70 12.75 0.7 0.3 ] ); % (0,3br) %set( leg,'Position',[17.45 14.28 0.7 0.3 ] ); % (0,4tl) %set( leg,'Position',[18.22 12.75 0.7 0.3 ] ); % (0,4br) set( ax1,'FontSize',fontsize ); set( ax1,'TitleFontSizeMultiplier',1.0 ); % default is 1.1. box( ax1, 'on' ); titl = title( titlename ); set( titl, 'FontWeight', 'normal' ); % default font style is now 'bold'. tpos = get( titl, 'Position' ); % default is to align across whole figure, not box. %tpos(1) = tpos(1) + 100; tpos(1) = tpos(1) + 40; set( titl, 'Position', tpos ); % here we nudge it back to centered with box. if theid > (rows-1)*cols xlab = xlabel( ax1,xaxisname ); %tpos = get( xlab, 'Position' ) %tpos(2) = tpos(2) + 10; %set( xlab, 'Position', tpos ); end if mod(theid-1,cols) == 0 ylab = ylabel( ax1,yaxisname ); end %export_fig( filename, colorflag, '-pdf', '-m2', '-painters', '-transparent' ); %saveas( fig, filename_png ); %hold( ax1, 'off' ); r_val = 0; end blis-0.6.1/test/3/matlab/plot_panel_4x5.m000066400000000000000000000075301360743507500200620ustar00rootroot00000000000000function r_val = plot_panel_4x5( cfreq, ... dflopspercycle, ... nth, ... thr_str, ... dirpath, ... arch_str, ... vend_str, ... with_eigen ) %cfreq = 1.8; %dflopspercycle = 32; % Create filename "templates" for the files that contain the performance % results. filetemp_blis = '%s/output_%s_%s_asm_blis.m'; filetemp_open = '%s/output_%s_%s_openblas.m'; filetemp_eige = '%s/output_%s_%s_eigen.m'; filetemp_vend = '%s/output_%s_%s_vendor.m'; % Create a variable name "template" for the variables contained in the % files outlined above. vartemp = 'data_%s_%s_%s( :, : )'; % Define the datatypes and operations we will be plotting. dts = [ 's' 'd' 'c' 'z' ]; ops( 1, : ) = 'gemm'; ops( 2, : ) = 'hemm'; ops( 3, : ) = 'herk'; ops( 4, : ) = 'trmm'; ops( 5, : ) = 'trsm'; % Generate datatype-specific operation names from the set of operations % and datatypes. opnames = gen_opnames( ops, dts ); n_opnames = size( opnames, 1 ); fig = figure('Position', [100, 100, 2000, 1500]); orient( fig, 'portrait' ); set(gcf,'PaperUnits', 'inches'); if 1 == 1 % matlab set(gcf,'PaperSize', [11 15.0]); set(gcf,'PaperPosition', [0 0 11 15.0]); set(gcf,'PaperPositionMode','manual'); else % octave 4.x set(gcf,'PaperSize', [15 19.0]); set(gcf,'PaperPositionMode','auto'); end set(gcf,'PaperOrientation','landscape'); % Iterate over the list of datatype-specific operation names. for opi = 1:n_opnames %for opi = 1:1 % Grab the current datatype combination. opname = opnames( opi, : ); str = sprintf( 'Plotting %d: %s', opi, opname ); disp(str); % Construct filenames for the data files from templates. file_blis = sprintf( filetemp_blis, dirpath, thr_str, opname ); file_open = sprintf( filetemp_open, dirpath, thr_str, opname ); file_vend = sprintf( filetemp_vend, dirpath, thr_str, opname ); % Load the data files. %str = sprintf( ' Loading %s', file_blis ); disp(str); run( file_blis ) %str = sprintf( ' Loading %s', file_open ); disp(str); run( file_open ) %str = sprintf( ' Loading %s', file_vend ); disp(str); run( file_vend ) % Construct variable names for the variables in the data files. var_blis = sprintf( vartemp, thr_str, opname, 'asm_blis' ); var_open = sprintf( vartemp, thr_str, opname, 'openblas' ); var_vend = sprintf( vartemp, thr_str, opname, 'vendor' ); % Use eval() to instantiate the variable names constructed above, % copying each to a simplified name. data_blis = eval( var_blis ); % e.g. data_st_sgemm_asm_blis( :, : ); data_open = eval( var_open ); % e.g. data_st_sgemm_openblas( :, : ); data_vend = eval( var_vend ); % e.g. data_st_sgemm_vendor( :, : ); % Only read Eigen data in select cases. if with_eigen == 1 opname_u = opname; opname_u(1) = '_'; if nth == 1 || strcmp( opname_u, '_gemm' ) file_eige = sprintf( filetemp_eige, dirpath, thr_str, opname ); run( file_eige ) var_eige = sprintf( vartemp, thr_str, opname, 'eigen' ); data_eige = eval( var_eige ); % e.g. data_st_sgemm_eigen( :, : ); else data_eige(1,1) = -1; end else data_eige(1,1) = -1; end % Plot one result in an m x n grid of plots, via the subplot() % function. plot_l3_perf( opname, ... data_blis, ... data_open, ... data_eige, ... data_vend, vend_str, ... nth, ... 4, 5, ... with_eigen, ... cfreq, ... dflopspercycle, ... opi ); end % Construct the name of the file to which we will output the graph. outfile = sprintf( 'l3_perf_%s_nt%d.pdf', arch_str, nth ); % Output the graph to pdf format. %print(gcf, 'gemm_md','-fillpage','-dpdf'); print(gcf, outfile,'-bestfit','-dpdf'); end blis-0.6.1/test/3/matlab/runthese.m000066400000000000000000000047451360743507500170670ustar00rootroot00000000000000% tx2 plot_panel_4x5(2.20,8,1, 'st','../results/tx2/20190205/st', 'tx2', 'ARMPL'); close; clear all; plot_panel_4x5(2.20,8,28,'1s','../results/tx2/20190205/jc4ic7','tx2_jc4ic7','ARMPL'); close; clear all; plot_panel_4x5(2.20,8,56,'2s','../results/tx2/20190205/jc8ic7','tx2_jc8ic7','ARMPL'); close; clear all; % skx % pre-eigen: %plot_panel_4x5(2.00,32,1, 'st','../results/skx/20190306/st', 'skx', 'MKL'); close; clear all; %plot_panel_4x5(2.00,32,26,'1s','../results/skx/20190306/jc2ic13','skx_jc2ic13','MKL'); close; clear all; %plot_panel_4x5(2.00,32,52,'2s','../results/skx/20190306/jc4ic13','skx_jc4ic13','MKL'); close; clear all; % with eigen: plot_panel_4x5(2.00,32,1, 'st','../results/skx/merged20190306_0328/st', 'skx', 'MKL',1); close; clear all; plot_panel_4x5(2.00,32,26,'1s','../results/skx/merged20190306_0328/jc2ic13','skx_jc2ic13','MKL',1); close; clear all; plot_panel_4x5(2.00,32,52,'2s','../results/skx/merged20190306_0328/jc4ic13','skx_jc4ic13','MKL',1); close; clear all; % has % pre-eigen: %plot_panel_4x5(3.25,16,1, 'st','../results/has/20190206/st', 'has', 'MKL',1); close; clear all; %plot_panel_4x5(3.00,16,12,'1s','../results/has/20190206/jc2ic3jr2','has_jc2ic3jr2','MKL',1); close; clear all; %plot_panel_4x5(3.00,16,24,'2s','../results/has/20190206/jc4ic3jr2','has_jc4ic3jr2','MKL',1); close; clear all; % with eigen: plot_panel_4x5(3.25,16,1, 'st','../results/has/merged20190206_0328/st', 'has', 'MKL',1); close; clear all; plot_panel_4x5(3.00,16,12,'1s','../results/has/merged20190206_0328/jc2ic3jr2','has_jc2ic3jr2','MKL',1); close; clear all; plot_panel_4x5(3.00,16,24,'2s','../results/has/merged20190206_0328/jc4ic3jr2','has_jc4ic3jr2','MKL',1); close; clear all; % epyc % pre-eigen: %plot_panel_4x5(3.00,8,1, 'st','../results/epyc/merged201903_0619/st','epyc', 'MKL'); close; clear all; %plot_panel_4x5(2.55,8,32,'1s','../results/epyc/merged201903_0619/jc1ic8jr4','epyc_jc1ic8jr4','MKL'); close; clear all; %plot_panel_4x5(2.55,8,64,'2s','../results/epyc/merged201903_0619/jc2ic8jr4','epyc_jc2ic8jr4','MKL'); close; clear all; % with eigen: plot_panel_4x5(3.00,8,1, 'st','../results/epyc/merged20190306_0319_0328/st', 'epyc', 'MKL',1); close; clear all; plot_panel_4x5(2.55,8,32,'1s','../results/epyc/merged20190306_0319_0328/jc1ic8jr4','epyc_jc1ic8jr4','MKL',1); close; clear all; plot_panel_4x5(2.55,8,64,'2s','../results/epyc/merged20190306_0319_0328/jc2ic8jr4','epyc_jc2ic8jr4','MKL',1); close; clear all; blis-0.6.1/test/3/runme.sh000077500000000000000000000144541360743507500152770ustar00rootroot00000000000000#!/bin/bash # File pefixes. exec_root="test" out_root="output" delay=0.1 sys="blis" #sys="stampede2" #sys="lonestar5" #sys="ul252" #sys="ul264" # Bind threads to processors. #export OMP_PROC_BIND=true #export GOMP_CPU_AFFINITY="0 2 4 6 8 10 12 14 16 18 20 22 1 3 5 7 9 11 13 15 17 19 21 23" #export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103" if [ ${sys} = "blis" ]; then export GOMP_CPU_AFFINITY="0 1 2 3" threads="jc1ic1jr1_2400 jc2ic3jr2_6000 jc4ic3jr2_8000" elif [ ${sys} = "stampede2" ]; then echo "Need to set GOMP_CPU_AFFINITY." exit 1 threads="jc1ic1jr1_2400 jc4ic6jr1_6000 jc4ic12jr1_8000" elif [ ${sys} = "lonestar5" ]; then export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23" # A hack to use libiomp5 with gcc. #export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/apps/intel/16.0.1.150/compilers_and_libraries_2016.1.150/linux/compiler/lib/intel64" threads="jc1ic1jr1_2400 jc2ic3jr2_6000 jc4ic3jr2_8000" elif [ ${sys} = "ul252" ]; then export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/home/field/intel/mkl/lib/intel64" export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51" threads="jc1ic1jr1_2400 jc2ic13jr1_6000 jc4ic13jr1_8000" elif [ ${sys} = "ul264" ]; then export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/home/field/intel/mkl/lib/intel64" export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63" threads="jc1ic1jr1_2400 jc1ic8jr4_6000 jc2ic8jr4_8000" fi # Datatypes to test. test_dts="d s z c" # Operations to test. test_ops="gemm hemm herk trmm trsm" # Implementations to test. impls="blis" #impls="other" #impls="eigen" #impls="all" if [ "${impls}" = "blis" ]; then test_impls="asm_blis" elif [ "${impls}" = "eigen" ]; then test_impls="eigen" elif [ "${impls}" = "other" ]; then test_impls="openblas vendor" elif [ "${impls}" = "eigen" ]; then test_impls="eigen" else test_impls="openblas asm_blis vendor" fi # Save a copy of GOMP_CPU_AFFINITY so that if we have to unset it, we can # restore the value. GOMP_CPU_AFFINITYsave=${GOMP_CPU_AFFINITY} # First perform real test cases. for th in ${threads}; do # Start with one way of parallelism in each loop. We will now begin # parsing the 'th' variable to update one or more of these threading # parameters. jc_nt=1; pc_nt=1; ic_nt=1; jr_nt=1; ir_nt=1 # Strip everything before and after the underscore so that what remains # is the problem size and threading parameter string, respectively. psize=${th##*_}; thinfo=${th%%_*} # Identify each threading parameter and insert a space before it. thsep=$(echo -e ${thinfo} | sed -e "s/\([jip][cr]\)/ \1/g" ) nt=1 for loopnum in ${thsep}; do # Given the current string, which identifies a loop and the # number of ways of parallelism for that loop, strip out # the ways and loop separately to identify each. loop=$(echo -e ${loopnum} | sed -e "s/[0-9]//g" ) num=$(echo -e ${loopnum} | sed -e "s/[a-z]//g" ) # Construct a string that we can evaluate to set the number # of ways of parallelism for the current loop. loop_nt_eq_num="${loop}_nt=${num}" # Update the total number of threads. nt=$(expr ${nt} \* ${num}) # Evaluate the string to assign the ways to the variable. eval ${loop_nt_eq_num} done echo "Switching to: jc${jc_nt} pc${pc_nt} ic${ic_nt} jr${jr_nt} ir${ir_nt} (nt = ${nt}) p_max${psize}" for dt in ${test_dts}; do for im in ${test_impls}; do for op in ${test_ops}; do # Eigen does not support multithreading for hemm, herk, trmm, # or trsm. So if we're getting ready to execute an Eigen driver # for one of these operations and nt > 1, we skip this test. if [ "${im}" = "eigen" ] && \ [ "${op}" != "gemm" ] && \ [ "${nt}" != "1" ]; then continue; fi # Find the threading suffix by probing the executable. binname=$(ls ${exec_root}_${dt}${op}_${psize}_${im}_*.x) suf_ext=${binname##*_} suf=${suf_ext%%.*} #echo "found file: ${binname} with suffix ${suf}" # Set the number of threads according to th. if [ "${suf}" = "1s" ] || [ "${suf}" = "2s" ]; then # Set the threading parameters based on the implementation # that we are preparing to run. if [ "${im}" = "asm_blis" ]; then unset OMP_NUM_THREADS export BLIS_JC_NT=${jc_nt} export BLIS_PC_NT=${pc_nt} export BLIS_IC_NT=${ic_nt} export BLIS_JR_NT=${jr_nt} export BLIS_IR_NT=${ir_nt} elif [ "${im}" = "openblas" ]; then unset OMP_NUM_THREADS export OPENBLAS_NUM_THREADS=${nt} elif [ "${im}" = "eigen" ]; then export OMP_NUM_THREADS=${nt} elif [ "${im}" = "vendor" ]; then unset OMP_NUM_THREADS export MKL_NUM_THREADS=${nt} fi export nt_use=${nt} # Multithreaded OpenBLAS seems to have a problem running # properly if GOMP_CPU_AFFINITY is set. So we temporarily # unset it here if we are about to execute OpenBLAS, but # otherwise restore it. if [ ${im} = "openblas" ]; then unset GOMP_CPU_AFFINITY else export GOMP_CPU_AFFINITY="${GOMP_CPU_AFFINITYsave}" fi else export BLIS_JC_NT=1 export BLIS_PC_NT=1 export BLIS_IC_NT=1 export BLIS_JR_NT=1 export BLIS_IR_NT=1 export OMP_NUM_THREADS=1 export OPENBLAS_NUM_THREADS=1 export MKL_NUM_THREADS=1 export nt_use=1 fi # Construct the name of the test executable. exec_name="${exec_root}_${dt}${op}_${psize}_${im}_${suf}.x" # Construct the name of the output file. out_file="${out_root}_${suf}_${dt}${op}_${im}.m" #echo "Running (nt = ${nt_use}) ./${exec_name} > ${out_file}" echo "Running ./${exec_name} > ${out_file}" # Run executable. ./${exec_name} > ${out_file} sleep ${delay} done done done done blis-0.6.1/test/3/test_gemm.c000066400000000000000000000276411360743507500157440ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name of The University of Texas nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #ifdef EIGEN #define BLIS_DISABLE_BLAS_DEFS #include "blis.h" #include #include using namespace Eigen; #else #include "blis.h" #endif #define COL_STORAGE //#define ROW_STORAGE //#define PRINT int main( int argc, char** argv ) { obj_t a, b, c; obj_t c_save; obj_t alpha, beta; dim_t m, n, k; dim_t p; dim_t p_begin, p_max, p_inc; int m_input, n_input, k_input; ind_t ind; num_t dt; char dt_ch; int r, n_repeats; trans_t transa; trans_t transb; f77_char f77_transa; f77_char f77_transb; double dtime; double dtime_save; double gflops; //bli_init(); //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); n_repeats = 3; dt = DT; ind = IND; #if 1 p_begin = P_BEGIN; p_max = P_MAX; p_inc = P_INC; m_input = -1; n_input = -1; k_input = -1; #else p_begin = 40; p_max = 1000; p_inc = 40; m_input = -1; n_input = -1; k_input = -1; #endif // Supress compiler warnings about unused variable 'ind'. ( void )ind; #if 0 cntx_t* cntx; ind_t ind_mod = ind; // A hack to use 3m1 as 1mpb (with 1m as 1mbp). if ( ind == BLIS_3M1 ) ind_mod = BLIS_1M; // Initialize a context for the current induced method and datatype. cntx = bli_gks_query_ind_cntx( ind_mod, dt ); // Set k to the kc blocksize for the current datatype. k_input = bli_cntx_get_blksz_def_dt( dt, BLIS_KC, cntx ); #elif 1 //k_input = 256; #endif // Choose the char corresponding to the requested datatype. if ( bli_is_float( dt ) ) dt_ch = 's'; else if ( bli_is_double( dt ) ) dt_ch = 'd'; else if ( bli_is_scomplex( dt ) ) dt_ch = 'c'; else dt_ch = 'z'; transa = BLIS_NO_TRANSPOSE; transb = BLIS_NO_TRANSPOSE; bli_param_map_blis_to_netlib_trans( transa, &f77_transa ); bli_param_map_blis_to_netlib_trans( transb, &f77_transb ); // Begin with initializing the last entry to zero so that // matlab allocates space for the entire array once up-front. for ( p = p_begin; p + p_inc <= p_max; p += p_inc ) ; printf( "data_%s_%cgemm_%s", THR_STR, dt_ch, STR ); printf( "( %2lu, 1:4 ) = [ %4lu %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin)/p_inc + 1, ( unsigned long )0, ( unsigned long )0, ( unsigned long )0, 0.0 ); //for ( p = p_begin; p <= p_max; p += p_inc ) for ( p = p_max; p_begin <= p; p -= p_inc ) { if ( m_input < 0 ) m = p / ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p / ( dim_t )abs(n_input); else n = ( dim_t ) n_input; if ( k_input < 0 ) k = p / ( dim_t )abs(k_input); else k = ( dim_t ) k_input; bli_obj_create( dt, 1, 1, 0, 0, &alpha ); bli_obj_create( dt, 1, 1, 0, 0, &beta ); #ifdef COL_STORAGE bli_obj_create( dt, m, k, 0, 0, &a ); bli_obj_create( dt, k, n, 0, 0, &b ); bli_obj_create( dt, m, n, 0, 0, &c ); bli_obj_create( dt, m, n, 0, 0, &c_save ); #else bli_obj_create( dt, m, k, k, 1, &a ); bli_obj_create( dt, k, n, n, 1, &b ); bli_obj_create( dt, m, n, n, 1, &c ); bli_obj_create( dt, m, n, n, 1, &c_save ); #endif bli_randm( &a ); bli_randm( &b ); bli_randm( &c ); bli_obj_set_conjtrans( transa, &a ); bli_obj_set_conjtrans( transb, &b ); bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_setsc( (1.0/1.0), 0.0, &beta ); bli_copym( &c, &c_save ); #if 0 //def BLIS bli_ind_disable_all_dt( dt ); bli_ind_enable_dt( ind, dt ); #endif #ifdef EIGEN double alpha_r, alpha_i; bli_getsc( &alpha, &alpha_r, &alpha_i ); void* ap = bli_obj_buffer_at_off( &a ); void* bp = bli_obj_buffer_at_off( &b ); void* cp = bli_obj_buffer_at_off( &c ); #ifdef COL_STORAGE const int os_a = bli_obj_col_stride( &a ); const int os_b = bli_obj_col_stride( &b ); const int os_c = bli_obj_col_stride( &c ); #else const int os_a = bli_obj_row_stride( &a ); const int os_b = bli_obj_row_stride( &b ); const int os_c = bli_obj_row_stride( &c ); #endif Stride stride_a( os_a, 1 ); Stride stride_b( os_b, 1 ); Stride stride_c( os_c, 1 ); #ifdef COL_STORAGE #if defined(IS_FLOAT) typedef Matrix MatrixXf_; #elif defined (IS_DOUBLE) typedef Matrix MatrixXd_; #elif defined (IS_SCOMPLEX) typedef Matrix, Dynamic, Dynamic, ColMajor> MatrixXcf_; #elif defined (IS_DCOMPLEX) typedef Matrix, Dynamic, Dynamic, ColMajor> MatrixXcd_; #endif #else #if defined(IS_FLOAT) typedef Matrix MatrixXf_; #elif defined (IS_DOUBLE) typedef Matrix MatrixXd_; #elif defined (IS_SCOMPLEX) typedef Matrix, Dynamic, Dynamic, RowMajor> MatrixXcf_; #elif defined (IS_DCOMPLEX) typedef Matrix, Dynamic, Dynamic, RowMajor> MatrixXcd_; #endif #endif #if defined(IS_FLOAT) Map > A( ( float* )ap, m, k, stride_a ); Map > B( ( float* )bp, k, n, stride_b ); Map > C( ( float* )cp, m, n, stride_c ); #elif defined (IS_DOUBLE) Map > A( ( double* )ap, m, k, stride_a ); Map > B( ( double* )bp, k, n, stride_b ); Map > C( ( double* )cp, m, n, stride_c ); #elif defined (IS_SCOMPLEX) Map > A( ( std::complex* )ap, m, k, stride_a ); Map > B( ( std::complex* )bp, k, n, stride_b ); Map > C( ( std::complex* )cp, m, n, stride_c ); #elif defined (IS_DCOMPLEX) Map > A( ( std::complex* )ap, m, k, stride_a ); Map > B( ( std::complex* )bp, k, n, stride_b ); Map > C( ( std::complex* )cp, m, n, stride_c ); #endif #endif dtime_save = DBL_MAX; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "b", &b, "%4.1f", "" ); bli_printm( "c", &c, "%4.1f", "" ); #endif #if defined(BLIS) bli_gemm( &alpha, &a, &b, &beta, &c ); #elif defined(EIGEN) C.noalias() += alpha_r * A * B; #else // if defined(BLAS) if ( bli_is_float( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); float* alphap = ( float* )bli_obj_buffer( &alpha ); float* ap = ( float* )bli_obj_buffer( &a ); float* bp = ( float* )bli_obj_buffer( &b ); float* betap = ( float* )bli_obj_buffer( &beta ); float* cp = ( float* )bli_obj_buffer( &c ); sgemm_( &f77_transa, &f77_transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_double( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); double* alphap = ( double* )bli_obj_buffer( &alpha ); double* ap = ( double* )bli_obj_buffer( &a ); double* bp = ( double* )bli_obj_buffer( &b ); double* betap = ( double* )bli_obj_buffer( &beta ); double* cp = ( double* )bli_obj_buffer( &c ); dgemm_( &f77_transa, &f77_transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_scomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); scomplex* alphap = ( scomplex* )bli_obj_buffer( &alpha ); scomplex* ap = ( scomplex* )bli_obj_buffer( &a ); scomplex* bp = ( scomplex* )bli_obj_buffer( &b ); scomplex* betap = ( scomplex* )bli_obj_buffer( &beta ); scomplex* cp = ( scomplex* )bli_obj_buffer( &c ); cgemm_( &f77_transa, &f77_transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_dcomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); dcomplex* alphap = ( dcomplex* )bli_obj_buffer( &alpha ); dcomplex* ap = ( dcomplex* )bli_obj_buffer( &a ); dcomplex* bp = ( dcomplex* )bli_obj_buffer( &b ); dcomplex* betap = ( dcomplex* )bli_obj_buffer( &beta ); dcomplex* cp = ( dcomplex* )bli_obj_buffer( &c ); zgemm_( &f77_transa, &f77_transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } #endif #ifdef PRINT bli_printm( "c after", &c, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } gflops = ( 2.0 * m * k * n ) / ( dtime_save * 1.0e9 ); if ( bli_is_complex( dt ) ) gflops *= 4.0; printf( "data_%s_%cgemm_%s", THR_STR, dt_ch, STR ); printf( "( %2lu, 1:4 ) = [ %4lu %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin)/p_inc + 1, ( unsigned long )m, ( unsigned long )k, ( unsigned long )n, gflops ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); } //bli_finalize(); return 0; } blis-0.6.1/test/3/test_hemm.c000066400000000000000000000223011360743507500157310ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name of The University of Texas nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "blis.h" //#define PRINT int main( int argc, char** argv ) { obj_t a, b, c; obj_t c_save; obj_t alpha, beta; dim_t m, n; dim_t p; dim_t p_begin, p_max, p_inc; int m_input, n_input; ind_t ind; num_t dt; char dt_ch; int r, n_repeats; side_t side; uplo_t uploa; f77_char f77_side; f77_char f77_uploa; double dtime; double dtime_save; double gflops; //bli_init(); //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); n_repeats = 3; dt = DT; ind = IND; p_begin = P_BEGIN; p_max = P_MAX; p_inc = P_INC; m_input = -1; n_input = -1; // Supress compiler warnings about unused variable 'ind'. ( void )ind; #if 0 cntx_t* cntx; ind_t ind_mod = ind; // A hack to use 3m1 as 1mpb (with 1m as 1mbp). if ( ind == BLIS_3M1 ) ind_mod = BLIS_1M; // Initialize a context for the current induced method and datatype. cntx = bli_gks_query_ind_cntx( ind_mod, dt ); // Set k to the kc blocksize for the current datatype. k_input = bli_cntx_get_blksz_def_dt( dt, BLIS_KC, cntx ); #elif 1 //k_input = 256; #endif // Choose the char corresponding to the requested datatype. if ( bli_is_float( dt ) ) dt_ch = 's'; else if ( bli_is_double( dt ) ) dt_ch = 'd'; else if ( bli_is_scomplex( dt ) ) dt_ch = 'c'; else dt_ch = 'z'; side = BLIS_LEFT; uploa = BLIS_LOWER; bli_param_map_blis_to_netlib_side( side, &f77_side ); bli_param_map_blis_to_netlib_uplo( uploa, &f77_uploa ); // Begin with initializing the last entry to zero so that // matlab allocates space for the entire array once up-front. for ( p = p_begin; p + p_inc <= p_max; p += p_inc ) ; printf( "data_%s_%chemm_%s", THR_STR, dt_ch, STR ); printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin)/p_inc + 1, ( unsigned long )0, ( unsigned long )0, 0.0 ); //for ( p = p_begin; p <= p_max; p += p_inc ) for ( p = p_max; p_begin <= p; p -= p_inc ) { if ( m_input < 0 ) m = p / ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p / ( dim_t )abs(n_input); else n = ( dim_t ) n_input; bli_obj_create( dt, 1, 1, 0, 0, &alpha ); bli_obj_create( dt, 1, 1, 0, 0, &beta ); if ( bli_is_left( side ) ) bli_obj_create( dt, m, m, 0, 0, &a ); else bli_obj_create( dt, n, n, 0, 0, &a ); bli_obj_create( dt, m, n, 0, 0, &b ); bli_obj_create( dt, m, n, 0, 0, &c ); bli_obj_create( dt, m, n, 0, 0, &c_save ); bli_randm( &a ); bli_randm( &b ); bli_randm( &c ); bli_obj_set_struc( BLIS_HERMITIAN, &a ); bli_obj_set_uplo( uploa, &a ); // Make A densely Hermitian, and zero the unstored triangle to // ensure the implementation reads only from the stored region. bli_mkherm( &a ); bli_mktrim( &a ); bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_setsc( (1.0/1.0), 0.0, &beta ); bli_copym( &c, &c_save ); #if 0 //def BLIS bli_ind_disable_all_dt( dt ); bli_ind_enable_dt( ind, dt ); #endif dtime_save = DBL_MAX; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "b", &b, "%4.1f", "" ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS bli_hemm( side, &alpha, &a, &b, &beta, &c ); #else if ( bli_is_float( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); float* alphap = ( float* )bli_obj_buffer( &alpha ); float* ap = ( float* )bli_obj_buffer( &a ); float* bp = ( float* )bli_obj_buffer( &b ); float* betap = ( float* )bli_obj_buffer( &beta ); float* cp = ( float* )bli_obj_buffer( &c ); ssymm_( &f77_side, &f77_uploa, &mm, &nn, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_double( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); double* alphap = ( double* )bli_obj_buffer( &alpha ); double* ap = ( double* )bli_obj_buffer( &a ); double* bp = ( double* )bli_obj_buffer( &b ); double* betap = ( double* )bli_obj_buffer( &beta ); double* cp = ( double* )bli_obj_buffer( &c ); dsymm_( &f77_side, &f77_uploa, &mm, &nn, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_scomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); #ifdef EIGEN float* alphap = ( float* )bli_obj_buffer( &alpha ); float* ap = ( float* )bli_obj_buffer( &a ); float* bp = ( float* )bli_obj_buffer( &b ); float* betap = ( float* )bli_obj_buffer( &beta ); float* cp = ( float* )bli_obj_buffer( &c ); #else scomplex* alphap = ( scomplex* )bli_obj_buffer( &alpha ); scomplex* ap = ( scomplex* )bli_obj_buffer( &a ); scomplex* bp = ( scomplex* )bli_obj_buffer( &b ); scomplex* betap = ( scomplex* )bli_obj_buffer( &beta ); scomplex* cp = ( scomplex* )bli_obj_buffer( &c ); #endif chemm_( &f77_side, &f77_uploa, &mm, &nn, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_dcomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); #ifdef EIGEN double* alphap = ( double* )bli_obj_buffer( &alpha ); double* ap = ( double* )bli_obj_buffer( &a ); double* bp = ( double* )bli_obj_buffer( &b ); double* betap = ( double* )bli_obj_buffer( &beta ); double* cp = ( double* )bli_obj_buffer( &c ); #else dcomplex* alphap = ( dcomplex* )bli_obj_buffer( &alpha ); dcomplex* ap = ( dcomplex* )bli_obj_buffer( &a ); dcomplex* bp = ( dcomplex* )bli_obj_buffer( &b ); dcomplex* betap = ( dcomplex* )bli_obj_buffer( &beta ); dcomplex* cp = ( dcomplex* )bli_obj_buffer( &c ); #endif zhemm_( &f77_side, &f77_uploa, &mm, &nn, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } #endif #ifdef PRINT bli_printm( "c after", &c, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } if ( bli_is_left( side ) ) gflops = ( 2.0 * m * m * n ) / ( dtime_save * 1.0e9 ); else gflops = ( 2.0 * m * n * n ) / ( dtime_save * 1.0e9 ); if ( bli_is_complex( dt ) ) gflops *= 4.0; printf( "data_%s_%chemm_%s", THR_STR, dt_ch, STR ); printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin)/p_inc + 1, ( unsigned long )m, ( unsigned long )n, gflops ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); } //bli_finalize(); return 0; } blis-0.6.1/test/3/test_herk.c000066400000000000000000000210621360743507500157370ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name of The University of Texas nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "blis.h" //#define PRINT int main( int argc, char** argv ) { obj_t a, c; obj_t c_save; obj_t alpha, beta; dim_t m, k; dim_t p; dim_t p_begin, p_max, p_inc; int m_input, k_input; ind_t ind; num_t dt, dt_real; char dt_ch; int r, n_repeats; uplo_t uploc; trans_t transa; f77_char f77_uploc; f77_char f77_transa; double dtime; double dtime_save; double gflops; //bli_init(); //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); n_repeats = 3; dt = DT; dt_real = bli_dt_proj_to_real( DT ); ind = IND; p_begin = P_BEGIN; p_max = P_MAX; p_inc = P_INC; m_input = -1; k_input = -1; // Supress compiler warnings about unused variable 'ind'. ( void )ind; #if 0 cntx_t* cntx; ind_t ind_mod = ind; // A hack to use 3m1 as 1mpb (with 1m as 1mbp). if ( ind == BLIS_3M1 ) ind_mod = BLIS_1M; // Initialize a context for the current induced method and datatype. cntx = bli_gks_query_ind_cntx( ind_mod, dt ); // Set k to the kc blocksize for the current datatype. k_input = bli_cntx_get_blksz_def_dt( dt, BLIS_KC, cntx ); #elif 1 //k_input = 256; #endif // Choose the char corresponding to the requested datatype. if ( bli_is_float( dt ) ) dt_ch = 's'; else if ( bli_is_double( dt ) ) dt_ch = 'd'; else if ( bli_is_scomplex( dt ) ) dt_ch = 'c'; else dt_ch = 'z'; uploc = BLIS_LOWER; transa = BLIS_NO_TRANSPOSE; bli_param_map_blis_to_netlib_uplo( uploc, &f77_uploc ); bli_param_map_blis_to_netlib_trans( transa, &f77_transa ); // Begin with initializing the last entry to zero so that // matlab allocates space for the entire array once up-front. for ( p = p_begin; p + p_inc <= p_max; p += p_inc ) ; printf( "data_%s_%cherk_%s", THR_STR, dt_ch, STR ); printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin)/p_inc + 1, ( unsigned long )0, ( unsigned long )0, 0.0 ); //for ( p = p_begin; p <= p_max; p += p_inc ) for ( p = p_max; p_begin <= p; p -= p_inc ) { if ( m_input < 0 ) m = p / ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( k_input < 0 ) k = p / ( dim_t )abs(k_input); else k = ( dim_t ) k_input; bli_obj_create( dt_real, 1, 1, 0, 0, &alpha ); bli_obj_create( dt, 1, 1, 0, 0, &beta ); if ( bli_does_trans( transa ) ) bli_obj_create( dt, k, m, 0, 0, &a ); else bli_obj_create( dt, m, k, 0, 0, &a ); bli_obj_create( dt, m, m, 0, 0, &c ); //bli_obj_create( dt, m, k, 2, 2*m, &a ); //bli_obj_create( dt, k, n, 2, 2*k, &b ); //bli_obj_create( dt, m, n, 2, 2*m, &c ); bli_obj_create( dt, m, m, 0, 0, &c_save ); bli_randm( &a ); bli_randm( &c ); bli_obj_set_struc( BLIS_HERMITIAN, &c ); bli_obj_set_uplo( uploc, &c ); bli_obj_set_conjtrans( transa, &a ); bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_setsc( (1.0/1.0), 0.0, &beta ); bli_copym( &c, &c_save ); #if 0 //def BLIS bli_ind_disable_all_dt( dt ); bli_ind_enable_dt( ind, dt ); #endif dtime_save = DBL_MAX; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS bli_herk( &alpha, &a, &beta, &c ); #else if ( bli_is_float( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); float* alphap = ( float* )bli_obj_buffer( &alpha ); float* ap = ( float* )bli_obj_buffer( &a ); float* betap = ( float* )bli_obj_buffer( &beta ); float* cp = ( float* )bli_obj_buffer( &c ); ssyrk_( &f77_uploc, &f77_transa, &mm, &kk, alphap, ap, &lda, betap, cp, &ldc ); } else if ( bli_is_double( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); double* alphap = ( double* )bli_obj_buffer( &alpha ); double* ap = ( double* )bli_obj_buffer( &a ); double* betap = ( double* )bli_obj_buffer( &beta ); double* cp = ( double* )bli_obj_buffer( &c ); dsyrk_( &f77_uploc, &f77_transa, &mm, &kk, alphap, ap, &lda, betap, cp, &ldc ); } else if ( bli_is_scomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); #ifdef EIGEN float* alphap = ( float* )bli_obj_buffer( &alpha ); float* ap = ( float* )bli_obj_buffer( &a ); float* betap = ( float* )bli_obj_buffer( &beta ); float* cp = ( float* )bli_obj_buffer( &c ); #else float* alphap = ( float* )bli_obj_buffer( &alpha ); scomplex* ap = ( scomplex* )bli_obj_buffer( &a ); float* betap = ( float* )bli_obj_buffer( &beta ); scomplex* cp = ( scomplex* )bli_obj_buffer( &c ); #endif cherk_( &f77_uploc, &f77_transa, &mm, &kk, alphap, ap, &lda, betap, cp, &ldc ); } else if ( bli_is_dcomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); #ifdef EIGEN double* alphap = ( double* )bli_obj_buffer( &alpha ); double* ap = ( double* )bli_obj_buffer( &a ); double* betap = ( double* )bli_obj_buffer( &beta ); double* cp = ( double* )bli_obj_buffer( &c ); #else double* alphap = ( double* )bli_obj_buffer( &alpha ); dcomplex* ap = ( dcomplex* )bli_obj_buffer( &a ); double* betap = ( double* )bli_obj_buffer( &beta ); dcomplex* cp = ( dcomplex* )bli_obj_buffer( &c ); #endif zherk_( &f77_uploc, &f77_transa, &mm, &kk, alphap, ap, &lda, betap, cp, &ldc ); } #endif #ifdef PRINT bli_printm( "c after", &c, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } gflops = ( 1.0 * m * k * m ) / ( dtime_save * 1.0e9 ); if ( bli_is_complex( dt ) ) gflops *= 4.0; printf( "data_%s_%cherk_%s", THR_STR, dt_ch, STR ); printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin)/p_inc + 1, ( unsigned long )m, ( unsigned long )k, gflops ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &c ); bli_obj_free( &c_save ); } //bli_finalize(); return 0; } blis-0.6.1/test/3/test_trmm.c000066400000000000000000000206431360743507500157710ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name of The University of Texas nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "blis.h" //#define PRINT int main( int argc, char** argv ) { obj_t a, c; obj_t c_save; obj_t alpha; dim_t m, n; dim_t p; dim_t p_begin, p_max, p_inc; int m_input, n_input; ind_t ind; num_t dt; char dt_ch; int r, n_repeats; side_t side; uplo_t uploa; trans_t transa; diag_t diaga; f77_char f77_side; f77_char f77_uploa; f77_char f77_transa; f77_char f77_diaga; double dtime; double dtime_save; double gflops; //bli_init(); //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); n_repeats = 3; dt = DT; ind = IND; p_begin = P_BEGIN; p_max = P_MAX; p_inc = P_INC; m_input = -1; n_input = -1; // Supress compiler warnings about unused variable 'ind'. ( void )ind; #if 0 cntx_t* cntx; ind_t ind_mod = ind; // A hack to use 3m1 as 1mpb (with 1m as 1mbp). if ( ind == BLIS_3M1 ) ind_mod = BLIS_1M; // Initialize a context for the current induced method and datatype. cntx = bli_gks_query_ind_cntx( ind_mod, dt ); // Set k to the kc blocksize for the current datatype. k_input = bli_cntx_get_blksz_def_dt( dt, BLIS_KC, cntx ); #elif 1 //k_input = 256; #endif // Choose the char corresponding to the requested datatype. if ( bli_is_float( dt ) ) dt_ch = 's'; else if ( bli_is_double( dt ) ) dt_ch = 'd'; else if ( bli_is_scomplex( dt ) ) dt_ch = 'c'; else dt_ch = 'z'; #if 0 side = BLIS_LEFT; #else side = BLIS_RIGHT; #endif #if 0 uploa = BLIS_LOWER; #else uploa = BLIS_UPPER; #endif transa = BLIS_NO_TRANSPOSE; diaga = BLIS_NONUNIT_DIAG; bli_param_map_blis_to_netlib_side( side, &f77_side ); bli_param_map_blis_to_netlib_uplo( uploa, &f77_uploa ); bli_param_map_blis_to_netlib_trans( transa, &f77_transa ); bli_param_map_blis_to_netlib_diag( diaga, &f77_diaga ); // Begin with initializing the last entry to zero so that // matlab allocates space for the entire array once up-front. for ( p = p_begin; p + p_inc <= p_max; p += p_inc ) ; printf( "data_%s_%ctrmm_%s", THR_STR, dt_ch, STR ); printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin)/p_inc + 1, ( unsigned long )0, ( unsigned long )0, 0.0 ); //for ( p = p_begin; p <= p_max; p += p_inc ) for ( p = p_max; p_begin <= p; p -= p_inc ) { if ( m_input < 0 ) m = p / ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p / ( dim_t )abs(n_input); else n = ( dim_t ) n_input; bli_obj_create( dt, 1, 1, 0, 0, &alpha ); if ( bli_is_left( side ) ) bli_obj_create( dt, m, m, 0, 0, &a ); else bli_obj_create( dt, n, n, 0, 0, &a ); bli_obj_create( dt, m, n, 0, 0, &c ); bli_obj_create( dt, m, n, 0, 0, &c_save ); bli_randm( &a ); bli_randm( &c ); bli_obj_set_struc( BLIS_TRIANGULAR, &a ); bli_obj_set_uplo( uploa, &a ); bli_obj_set_conjtrans( transa, &a ); bli_obj_set_diag( diaga, &a ); bli_randm( &a ); bli_mktrim( &a ); bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_copym( &c, &c_save ); #if 0 //def BLIS bli_ind_disable_all_dt( dt ); bli_ind_enable_dt( ind, dt ); #endif dtime_save = DBL_MAX; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS bli_trmm( side, &alpha, &a, &c ); #else if ( bli_is_float( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); float* alphap = ( float* )bli_obj_buffer( &alpha ); float* ap = ( float* )bli_obj_buffer( &a ); float* cp = ( float* )bli_obj_buffer( &c ); strmm_( &f77_side, &f77_uploa, &f77_transa, &f77_diaga, &mm, &kk, alphap, ap, &lda, cp, &ldc ); } else if ( bli_is_double( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); double* alphap = ( double* )bli_obj_buffer( &alpha ); double* ap = ( double* )bli_obj_buffer( &a ); double* cp = ( double* )bli_obj_buffer( &c ); dtrmm_( &f77_side, &f77_uploa, &f77_transa, &f77_diaga, &mm, &kk, alphap, ap, &lda, cp, &ldc ); } else if ( bli_is_scomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); #ifdef EIGEN float* alphap = ( float* )bli_obj_buffer( &alpha ); float* ap = ( float* )bli_obj_buffer( &a ); float* cp = ( float* )bli_obj_buffer( &c ); #else scomplex* alphap = ( scomplex* )bli_obj_buffer( &alpha ); scomplex* ap = ( scomplex* )bli_obj_buffer( &a ); scomplex* cp = ( scomplex* )bli_obj_buffer( &c ); #endif ctrmm_( &f77_side, &f77_uploa, &f77_transa, &f77_diaga, &mm, &kk, alphap, ap, &lda, cp, &ldc ); } else if ( bli_is_dcomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); #ifdef EIGEN double* alphap = ( double* )bli_obj_buffer( &alpha ); double* ap = ( double* )bli_obj_buffer( &a ); double* cp = ( double* )bli_obj_buffer( &c ); #else dcomplex* alphap = ( dcomplex* )bli_obj_buffer( &alpha ); dcomplex* ap = ( dcomplex* )bli_obj_buffer( &a ); dcomplex* cp = ( dcomplex* )bli_obj_buffer( &c ); #endif ztrmm_( &f77_side, &f77_uploa, &f77_transa, &f77_diaga, &mm, &kk, alphap, ap, &lda, cp, &ldc ); } #endif #ifdef PRINT bli_printm( "c after", &c, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } if ( bli_is_left( side ) ) gflops = ( 1.0 * m * m * n ) / ( dtime_save * 1.0e9 ); else gflops = ( 1.0 * m * n * n ) / ( dtime_save * 1.0e9 ); if ( bli_is_complex( dt ) ) gflops *= 4.0; printf( "data_%s_%ctrmm_%s", THR_STR, dt_ch, STR ); printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin)/p_inc + 1, ( unsigned long )m, ( unsigned long )n, gflops ); bli_obj_free( &alpha ); bli_obj_free( &a ); bli_obj_free( &c ); bli_obj_free( &c_save ); } //bli_finalize(); return 0; } blis-0.6.1/test/3/test_trsm.c000066400000000000000000000210621360743507500157730ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name of The University of Texas nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "blis.h" //#define PRINT int main( int argc, char** argv ) { obj_t a, c; obj_t c_save; obj_t alpha; dim_t m, n; dim_t p; dim_t p_begin, p_max, p_inc; int m_input, n_input; ind_t ind; num_t dt; char dt_ch; int r, n_repeats; side_t side; uplo_t uploa; trans_t transa; diag_t diaga; f77_char f77_side; f77_char f77_uploa; f77_char f77_transa; f77_char f77_diaga; double dtime; double dtime_save; double gflops; //bli_init(); //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); n_repeats = 3; dt = DT; ind = IND; p_begin = P_BEGIN; p_max = P_MAX; p_inc = P_INC; m_input = -1; n_input = -1; // Supress compiler warnings about unused variable 'ind'. ( void )ind; #if 0 cntx_t* cntx; ind_t ind_mod = ind; // A hack to use 3m1 as 1mpb (with 1m as 1mbp). if ( ind == BLIS_3M1 ) ind_mod = BLIS_1M; // Initialize a context for the current induced method and datatype. cntx = bli_gks_query_ind_cntx( ind_mod, dt ); // Set k to the kc blocksize for the current datatype. k_input = bli_cntx_get_blksz_def_dt( dt, BLIS_KC, cntx ); #elif 1 //k_input = 256; #endif // Choose the char corresponding to the requested datatype. if ( bli_is_float( dt ) ) dt_ch = 's'; else if ( bli_is_double( dt ) ) dt_ch = 'd'; else if ( bli_is_scomplex( dt ) ) dt_ch = 'c'; else dt_ch = 'z'; #if 0 side = BLIS_LEFT; #else side = BLIS_RIGHT; #endif #if 0 uploa = BLIS_LOWER; #else uploa = BLIS_UPPER; #endif transa = BLIS_NO_TRANSPOSE; diaga = BLIS_NONUNIT_DIAG; bli_param_map_blis_to_netlib_side( side, &f77_side ); bli_param_map_blis_to_netlib_uplo( uploa, &f77_uploa ); bli_param_map_blis_to_netlib_trans( transa, &f77_transa ); bli_param_map_blis_to_netlib_diag( diaga, &f77_diaga ); // Begin with initializing the last entry to zero so that // matlab allocates space for the entire array once up-front. for ( p = p_begin; p + p_inc <= p_max; p += p_inc ) ; printf( "data_%s_%ctrsm_%s", THR_STR, dt_ch, STR ); printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin)/p_inc + 1, ( unsigned long )0, ( unsigned long )0, 0.0 ); //for ( p = p_begin; p <= p_max; p += p_inc ) for ( p = p_max; p_begin <= p; p -= p_inc ) { if ( m_input < 0 ) m = p / ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p / ( dim_t )abs(n_input); else n = ( dim_t ) n_input; bli_obj_create( dt, 1, 1, 0, 0, &alpha ); if ( bli_is_left( side ) ) bli_obj_create( dt, m, m, 0, 0, &a ); else bli_obj_create( dt, n, n, 0, 0, &a ); bli_obj_create( dt, m, n, 0, 0, &c ); //bli_obj_create( dt, m, n, n, 1, &c ); bli_obj_create( dt, m, n, 0, 0, &c_save ); bli_randm( &a ); bli_randm( &c ); bli_obj_set_struc( BLIS_TRIANGULAR, &a ); bli_obj_set_uplo( uploa, &a ); bli_obj_set_conjtrans( transa, &a ); bli_obj_set_diag( diaga, &a ); bli_randm( &a ); bli_mktrim( &a ); // Load the diagonal of A to make it more likely to be invertible. bli_shiftd( &BLIS_TWO, &a ); bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_copym( &c, &c_save ); #if 0 //def BLIS bli_ind_disable_all_dt( dt ); bli_ind_enable_dt( ind, dt ); #endif dtime_save = DBL_MAX; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS bli_trsm( side, &alpha, &a, &c ); #else if ( bli_is_float( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); float* alphap = ( float* )bli_obj_buffer( &alpha ); float* ap = ( float* )bli_obj_buffer( &a ); float* cp = ( float* )bli_obj_buffer( &c ); strsm_( &f77_side, &f77_uploa, &f77_transa, &f77_diaga, &mm, &kk, alphap, ap, &lda, cp, &ldc ); } else if ( bli_is_double( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); double* alphap = ( double* )bli_obj_buffer( &alpha ); double* ap = ( double* )bli_obj_buffer( &a ); double* cp = ( double* )bli_obj_buffer( &c ); dtrsm_( &f77_side, &f77_uploa, &f77_transa, &f77_diaga, &mm, &kk, alphap, ap, &lda, cp, &ldc ); } else if ( bli_is_scomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); #ifdef EIGEN float* alphap = ( float* )bli_obj_buffer( &alpha ); float* ap = ( float* )bli_obj_buffer( &a ); float* cp = ( float* )bli_obj_buffer( &c ); #else scomplex* alphap = ( scomplex* )bli_obj_buffer( &alpha ); scomplex* ap = ( scomplex* )bli_obj_buffer( &a ); scomplex* cp = ( scomplex* )bli_obj_buffer( &c ); #endif ctrsm_( &f77_side, &f77_uploa, &f77_transa, &f77_diaga, &mm, &kk, alphap, ap, &lda, cp, &ldc ); } else if ( bli_is_dcomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); #ifdef EIGEN double* alphap = ( double* )bli_obj_buffer( &alpha ); double* ap = ( double* )bli_obj_buffer( &a ); double* cp = ( double* )bli_obj_buffer( &c ); #else dcomplex* alphap = ( dcomplex* )bli_obj_buffer( &alpha ); dcomplex* ap = ( dcomplex* )bli_obj_buffer( &a ); dcomplex* cp = ( dcomplex* )bli_obj_buffer( &c ); #endif ztrsm_( &f77_side, &f77_uploa, &f77_transa, &f77_diaga, &mm, &kk, alphap, ap, &lda, cp, &ldc ); } #endif #ifdef PRINT bli_printm( "c after", &c, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } if ( bli_is_left( side ) ) gflops = ( 1.0 * m * m * n ) / ( dtime_save * 1.0e9 ); else gflops = ( 1.0 * m * n * n ) / ( dtime_save * 1.0e9 ); if ( bli_is_complex( dt ) ) gflops *= 4.0; printf( "data_%s_%ctrsm_%s", THR_STR, dt_ch, STR ); printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin)/p_inc + 1, ( unsigned long )m, ( unsigned long )n, gflops ); bli_obj_free( &alpha ); bli_obj_free( &a ); bli_obj_free( &c ); bli_obj_free( &c_save ); } //bli_finalize(); return 0; } blis-0.6.1/test/Makefile000066400000000000000000000220231360743507500150770ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # Copyright (C) 2017 - 2019, Advanced Micro Devices, Inc. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # # Makefile # # Field G. Van Zee # # Makefile for standalone BLIS test drivers. # # # --- Makefile PHONY target definitions ---------------------------------------- # .PHONY: all \ blis openblas atlas mkl \ clean cleanx # # --- Determine makefile fragment location ------------------------------------- # # Comments: # - DIST_PATH is assumed to not exist if BLIS_INSTALL_PATH is given. # - We must use recursively expanded assignment for LIB_PATH and INC_PATH in # the second case because CONFIG_NAME is not yet set. ifneq ($(strip $(BLIS_INSTALL_PATH)),) LIB_PATH := $(BLIS_INSTALL_PATH)/lib INC_PATH := $(BLIS_INSTALL_PATH)/include/blis SHARE_PATH := $(BLIS_INSTALL_PATH)/share/blis else DIST_PATH := .. LIB_PATH = ../lib/$(CONFIG_NAME) INC_PATH = ../include/$(CONFIG_NAME) SHARE_PATH := .. endif # # --- Include common makefile definitions -------------------------------------- # # Include the common makefile fragment. -include $(SHARE_PATH)/common.mk # # --- BLAS and LAPACK implementations ------------------------------------------ # # BLIS library and header path. This is simply wherever it was installed. #BLIS_LIB_PATH := $(INSTALL_PREFIX)/lib #BLIS_INC_PATH := $(INSTALL_PREFIX)/include/blis # BLIS library. #BLIS_LIB := $(BLIS_LIB_PATH)/libblis.a # BLAS library path(s). This is where the BLAS libraries reside. BLAS_LIB_PATH := $(HOME)/flame/lib #MKL_LIB_PATH := /opt/apps/intel/13/composer_xe_2013.2.146/mkl/lib/intel64 #MKL_LIB_PATH := $(HOME)/intel/mkl/lib/intel64 MKL_LIB_PATH := ${MKLROOT}/lib/intel64 #ESSL_LIB_PATH := $(HOME)/path/to/essl/changeme # OpenBLAS OPENBLAS_LIB := $(BLAS_LIB_PATH)/libopenblas.a # ATLAS ATLAS_LIB := $(BLAS_LIB_PATH)/libf77blas.a \ $(BLAS_LIB_PATH)/libatlas.a # MKL MKL_LIB := -L$(MKL_LIB_PATH) \ -lmkl_intel_lp64 \ -lmkl_core \ -lmkl_sequential \ -lpthread -lm -ldl # ESSL # Note: ESSL is named differently for SMP and/or BG #ESSL_TYPE := # This is the 32b library on POWER #ESSL_TYPE := 6464 # This is the 64b library on POWER #ESSL_TYPE := bg # This is the 32b single-threaded library on Blue Gene #ESSL_TYPE := smpbg # This is the 32b multi-threaded library on Blue Gene #ESSL_LIB := $(ESSL_LIB_PATH)/libessl$(ESSL_TYPE).a # Accelerate MAC_LIB := -framework Accelerate # # --- General build definitions ------------------------------------------------ # TEST_SRC_PATH := . TEST_OBJ_PATH := . # Gather all local object files. TEST_OBJS := $(patsubst $(TEST_SRC_PATH)/%.c, \ $(TEST_OBJ_PATH)/%.o, \ $(wildcard $(TEST_SRC_PATH)/*.c)) # Override the value of CINCFLAGS so that the value of CFLAGS returned by # get-user-cflags-for() is not cluttered up with include paths needed only # while building BLIS. CINCFLAGS := -I$(INC_PATH) # Use the CFLAGS for the configuration family. CFLAGS := $(call get-user-cflags-for,$(CONFIG_NAME)) # Add local header paths to CFLAGS CFLAGS += -I$(TEST_SRC_PATH) # Locate the libblis library to which we will link. #LIBBLIS_LINK := $(LIB_PATH)/$(LIBBLIS_L) # # --- Targets/rules ------------------------------------------------------------ # # Complete list of possible targets when defining 'all': # # blis openblas atlas mkl mac essl # #all: blis openblas atlas mkl all: blis openblas mkl blis: \ test_dotv_blis.x \ test_axpyv_blis.x \ test_gemv_blis.x \ test_ger_blis.x \ test_hemv_blis.x \ test_her_blis.x \ test_her2_blis.x \ test_trmv_blis.x \ test_trsv_blis.x \ \ test_gemm_blis.x \ test_hemm_blis.x \ test_herk_blis.x \ test_her2k_blis.x \ test_trmm_blis.x \ test_trsm_blis.x openblas: \ test_dotv_openblas.x \ test_axpyv_openblas.x \ test_gemv_openblas.x \ test_ger_openblas.x \ test_hemv_openblas.x \ test_her_openblas.x \ test_her2_openblas.x \ test_trmv_openblas.x \ test_trsv_openblas.x \ \ test_gemm_openblas.x \ test_hemm_openblas.x \ test_herk_openblas.x \ test_her2k_openblas.x \ test_trmm_openblas.x \ test_trsm_openblas.x atlas: \ test_dotv_atlas.x \ test_axpyv_atlas.x \ test_gemv_atlas.x \ test_ger_atlas.x \ test_hemv_atlas.x \ test_her_atlas.x \ test_her2_atlas.x \ test_trmv_atlas.x \ test_trsv_atlas.x \ \ test_gemm_atlas.x \ test_hemm_atlas.x \ test_herk_atlas.x \ test_her2k_atlas.x \ test_trmm_atlas.x \ test_trsm_atlas.x mkl: test_dotv_mkl.x \ test_axpyv_mkl.x \ test_gemv_mkl.x \ test_ger_mkl.x \ test_hemv_mkl.x \ test_her_mkl.x \ test_her2_mkl.x \ test_trmv_mkl.x \ test_trsv_mkl.x \ \ test_gemm_mkl.x \ test_hemm_mkl.x \ test_herk_mkl.x \ test_her2k_mkl.x \ test_trmm_mkl.x \ test_trsm_mkl.x essl: test_dotv_essl.x \ test_axpyv_essl.x \ test_gemv_essl.x \ test_ger_essl.x \ test_hemv_essl.x \ test_her_essl.x \ test_her2_essl.x \ test_trmv_essl.x \ test_trsv_essl.x \ \ test_gemm_essl.x \ test_hemm_essl.x \ test_herk_essl.x \ test_her2k_essl.x \ test_trmm_essl.x \ test_trsm_essl.x mac: test_dotv_mac.x \ test_axpyv_mac.x \ test_gemv_mac.x \ test_ger_mac.x \ test_hemv_mac.x \ test_her_mac.x \ test_her2_mac.x \ test_trmv_mac.x \ test_trsv_mac.x \ \ test_gemm_mac.x \ test_hemm_mac.x \ test_herk_mac.x \ test_her2k_mac.x \ test_trmm_mac.x \ test_trsm_mac.x # --Object file rules -- $(TEST_OBJ_PATH)/%.o: $(TEST_SRC_PATH)/%.c $(CC) $(CFLAGS) -c $< -o $@ test_%_openblas.o: test_%.c $(CC) $(CFLAGS) -DBLAS=\"openblas\" -c $< -o $@ test_%_atlas.o: test_%.c $(CC) $(CFLAGS) -DBLAS=\"atlas\" -c $< -o $@ test_%_mkl.o: test_%.c $(CC) $(CFLAGS) -DBLAS=\"mkl\" -c $< -o $@ test_%_essl.o: test_%.c $(CC) $(CFLAGS) -DBLAS=\"essl\" -c $< -o $@ test_%_mac.o: test_%.c $(CC) $(CFLAGS) -DBLAS=\"mac\" -c $< -o $@ test_%_blis.o: test_%.c $(CC) $(CFLAGS) -DBLIS -c $< -o $@ # -- Executable file rules -- # NOTE: For the BLAS test drivers, we place the BLAS libraries before BLIS # on the link command line in case BLIS was configured with the BLAS # compatibility layer. This prevents BLIS from inadvertently getting called # for the BLAS routines we are trying to test with. test_%_openblas.x: test_%_openblas.o $(LIBBLIS_LINK) $(LINKER) $< $(OPENBLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@ test_%_atlas.x: test_%_atlas.o $(LIBBLIS_LINK) $(LINKER) $< $(ATLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@ test_%_mkl.x: test_%_mkl.o $(LIBBLIS_LINK) $(LINKER) $< $(MKL_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@ test_%_essl.x: test_%_essl.o $(LIBBLIS_LINK) $(LINKER) $< $(ESSL_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@ test_%_mac.x: test_%_mac.o $(LIBBLIS_LINK) $(LINKER) $< $(MAC_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@ test_%_blis.x: test_%_blis.o $(LIBBLIS_LINK) $(LINKER) $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@ # -- Clean rules -- clean: cleanx cleanx: - $(RM_F) *.o *.x blis-0.6.1/test/exec_sizes/000077500000000000000000000000001360743507500156015ustar00rootroot00000000000000blis-0.6.1/test/exec_sizes/Makefile000066400000000000000000000274061360743507500172520ustar00rootroot00000000000000#!/bin/bash # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # # Makefile # # Field G. Van Zee # # Makefile for standalone BLIS test drivers. # # # --- Makefile PHONY target definitions ---------------------------------------- # .PHONY: all \ blis openblas atlas mkl \ clean cleanx # # --- Determine makefile fragment location ------------------------------------- # # Comments: # - DIST_PATH is assumed to not exist if BLIS_INSTALL_PATH is given. # - We must use recursively expanded assignment for LIB_PATH and INC_PATH in # the second case because CONFIG_NAME is not yet set. ifneq ($(strip $(BLIS_INSTALL_PATH)),) LIB_PATH := $(BLIS_INSTALL_PATH)/lib INC_PATH := $(BLIS_INSTALL_PATH)/include/blis SHARE_PATH := $(BLIS_INSTALL_PATH)/share/blis else DIST_PATH := ../.. LIB_PATH = ../../lib/$(CONFIG_NAME) INC_PATH = ../../include/$(CONFIG_NAME) SHARE_PATH := ../.. endif # # --- Include common makefile definitions -------------------------------------- # # Include the common makefile fragment. -include $(SHARE_PATH)/common.mk # # --- BLAS and LAPACK implementations ------------------------------------------ # # BLIS library and header path. This is simply wherever it was installed. #BLIS_LIB_PATH := $(INSTALL_PREFIX)/lib #BLIS_INC_PATH := $(INSTALL_PREFIX)/include/blis # BLIS library. #BLIS_LIB := $(BLIS_LIB_PATH)/libblis.a # BLAS library path(s). This is where the BLAS libraries reside. BLAS_LIB_PATH := $(HOME)/flame/lib #MKL_LIB_PATH := $(HOME)/intel/mkl/lib/intel64/ MKL_LIB_PATH := ${MKLROOT}/lib/intel64 # OpenBLAS, ATLAS, and MKL libraries. #BLAS_LIB := $(LIB_PATH)/libblas.a #BLAS_LIB := $(LIB_PATH)/libgoto.a #BLAS_LIB := $(LIB_PATH)/libgoto2.a OBLAS_LIB := $(BLAS_LIB_PATH)/libopenblas.a ABLAS_LIB := $(BLAS_LIB_PATH)/libf77blas.a \ $(BLAS_LIB_PATH)/libatlas.a #MBLAS_LIB := -L$(MKL_LIB_PATH) \ # -lmkl_sequential \ # -lmkl_core \ # -lmkl_intel_lp64 #MBLAS_LIB := -Wl,--start-group \ # $(MKL_LIB_PATH)/libmkl_sequential.a \ # $(MKL_LIB_PATH)/libmkl_core.a \ # $(MKL_LIB_PATH)/libmkl_intel_ilp64.a \ # -Wl,--end-group \ # -lpthread -lm MBLAS_LIB := -L$(MKL_LIB_PATH) \ -lmkl_intel_lp64 \ -lmkl_core \ -lmkl_sequential \ -lpthread -lm -ldl # # --- General build definitions ------------------------------------------------ # TEST_SRC_PATH := . TEST_OBJ_PATH := . ## Gather all local source files. TEST_SIZES_SRC := test_size.c # Override the value of CINCFLAGS so that the value of CFLAGS returned by # get-user-cflags-for() is not cluttered up with include paths needed only # while building BLIS. CINCFLAGS := -I$(INC_PATH) # Use the CFLAGS for the configuration family. CFLAGS := $(call get-user-cflags-for,$(CONFIG_NAME)) # Add local header paths to CFLAGS CFLAGS += -I$(TEST_SRC_PATH) # Locate the libblis library to which we will link. #LIBBLIS_LINK := $(LIB_PATH)/$(LIBBLIS_L) # # --- Targets/rules ------------------------------------------------------------ # MAKE_BLIS := yes MAKE_OPENBLAS := yes MAKE_MKL := yes MAKE_ATLAS := no MAKE_DCOMPLEX := yes ifeq ($(MAKE_BLIS),yes) TEST_BINS += test_blis01 \ test_blis02 \ test_blis03 \ test_blis04 \ test_blis05 \ test_blis06 ifeq ($(MAKE_DCOMPLEX),yes) TEST_BINS += test_blis07 \ test_blis08 \ test_blis09 \ test_blis10 \ test_blis11 \ test_blis12 endif endif ifeq ($(MAKE_OPENBLAS),yes) TEST_BINS += test_oblas01 \ test_oblas02 \ test_oblas03 \ test_oblas04 \ test_oblas05 \ test_oblas06 ifeq ($(MAKE_DCOMPLEX),yes) TEST_BINS += test_oblas07 \ test_oblas08 \ test_oblas09 \ test_oblas10 \ test_oblas11 \ test_oblas12 endif endif ifeq ($(MAKE_ATLAS),yes) TEST_BINS += test_ablas01 \ test_ablas02 \ test_ablas03 \ test_ablas04 \ test_ablas05 \ test_ablas06 ifeq ($(MAKE_DCOMPLEX),yes) TEST_BINS += test_ablas07 \ test_ablas08 \ test_ablas09 \ test_ablas10 \ test_ablas11 \ test_ablas12 endif endif ifeq ($(MAKE_MKL),yes) TEST_BINS += test_mblas01 \ test_mblas02 \ test_mblas03 \ test_mblas04 \ test_mblas05 \ test_mblas06 ifeq ($(MAKE_DCOMPLEX),yes) TEST_BINS += test_mblas07 \ test_mblas08 \ test_mblas09 \ test_mblas10 \ test_mblas11 \ test_mblas12 endif endif # --Object file rules -- all: $(TEST_BINS) $(TEST_OBJ_PATH)/%.o: $(TEST_SRC_PATH)/%.c $(CC) $(CFLAGS) -c $< -o $@ # -- Executable file rules -- # BLIS rules test_blis01: $(CC) $(CFLAGS) -DNBLIS=1 $(TEST_SIZES_SRC) $(LIBBLIS_LINK) $(LDFLAGS) -o $@.x test_blis02: $(CC) $(CFLAGS) -DNBLIS=2 $(TEST_SIZES_SRC) $(LIBBLIS_LINK) $(LDFLAGS) -o $@.x test_blis03: $(CC) $(CFLAGS) -DNBLIS=3 $(TEST_SIZES_SRC) $(LIBBLIS_LINK) $(LDFLAGS) -o $@.x test_blis04: $(CC) $(CFLAGS) -DNBLIS=4 $(TEST_SIZES_SRC) $(LIBBLIS_LINK) $(LDFLAGS) -o $@.x test_blis05: $(CC) $(CFLAGS) -DNBLIS=5 $(TEST_SIZES_SRC) $(LIBBLIS_LINK) $(LDFLAGS) -o $@.x test_blis06: $(CC) $(CFLAGS) -DNBLIS=6 $(TEST_SIZES_SRC) $(LIBBLIS_LINK) $(LDFLAGS) -o $@.x test_blis07: $(CC) $(CFLAGS) -DNBLIS=7 $(TEST_SIZES_SRC) $(LIBBLIS_LINK) $(LDFLAGS) -o $@.x test_blis08: $(CC) $(CFLAGS) -DNBLIS=8 $(TEST_SIZES_SRC) $(LIBBLIS_LINK) $(LDFLAGS) -o $@.x test_blis09: $(CC) $(CFLAGS) -DNBLIS=9 $(TEST_SIZES_SRC) $(LIBBLIS_LINK) $(LDFLAGS) -o $@.x test_blis10: $(CC) $(CFLAGS) -DNBLIS=10 $(TEST_SIZES_SRC) $(LIBBLIS_LINK) $(LDFLAGS) -o $@.x test_blis11: $(CC) $(CFLAGS) -DNBLIS=11 $(TEST_SIZES_SRC) $(LIBBLIS_LINK) $(LDFLAGS) -o $@.x test_blis12: $(CC) $(CFLAGS) -DNBLIS=12 $(TEST_SIZES_SRC) $(LIBBLIS_LINK) $(LDFLAGS) -o $@.x # OpenBLAS rules test_oblas01: $(CC) $(CFLAGS) -DNBLAS=1 $(TEST_SIZES_SRC) $(OBLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@.x test_oblas02: $(CC) $(CFLAGS) -DNBLAS=2 $(TEST_SIZES_SRC) $(OBLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@.x test_oblas03: $(CC) $(CFLAGS) -DNBLAS=3 $(TEST_SIZES_SRC) $(OBLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@.x test_oblas04: $(CC) $(CFLAGS) -DNBLAS=4 $(TEST_SIZES_SRC) $(OBLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@.x test_oblas05: $(CC) $(CFLAGS) -DNBLAS=5 $(TEST_SIZES_SRC) $(OBLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@.x test_oblas06: $(CC) $(CFLAGS) -DNBLAS=6 $(TEST_SIZES_SRC) $(OBLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@.x test_oblas07: $(CC) $(CFLAGS) -DNBLAS=7 $(TEST_SIZES_SRC) $(OBLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@.x test_oblas08: $(CC) $(CFLAGS) -DNBLAS=8 $(TEST_SIZES_SRC) $(OBLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@.x test_oblas09: $(CC) $(CFLAGS) -DNBLAS=9 $(TEST_SIZES_SRC) $(OBLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@.x test_oblas10: $(CC) $(CFLAGS) -DNBLAS=10 $(TEST_SIZES_SRC) $(OBLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@.x test_oblas11: $(CC) $(CFLAGS) -DNBLAS=11 $(TEST_SIZES_SRC) $(OBLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@.x test_oblas12: $(CC) $(CFLAGS) -DNBLAS=12 $(TEST_SIZES_SRC) $(OBLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@.x # ATLAS BLAS rules test_ablas01: $(CC) $(CFLAGS) -DNBLAS=1 $(TEST_SIZES_SRC) $(ABLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@.x test_ablas02: $(CC) $(CFLAGS) -DNBLAS=2 $(TEST_SIZES_SRC) $(ABLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@.x test_ablas03: $(CC) $(CFLAGS) -DNBLAS=3 $(TEST_SIZES_SRC) $(ABLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@.x test_ablas04: $(CC) $(CFLAGS) -DNBLAS=4 $(TEST_SIZES_SRC) $(ABLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@.x test_ablas05: $(CC) $(CFLAGS) -DNBLAS=5 $(TEST_SIZES_SRC) $(ABLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@.x test_ablas06: $(CC) $(CFLAGS) -DNBLAS=6 $(TEST_SIZES_SRC) $(ABLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@.x test_ablas07: $(CC) $(CFLAGS) -DNBLAS=7 $(TEST_SIZES_SRC) $(ABLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@.x test_ablas08: $(CC) $(CFLAGS) -DNBLAS=8 $(TEST_SIZES_SRC) $(ABLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@.x test_ablas09: $(CC) $(CFLAGS) -DNBLAS=9 $(TEST_SIZES_SRC) $(ABLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@.x test_ablas10: $(CC) $(CFLAGS) -DNBLAS=10 $(TEST_SIZES_SRC) $(ABLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@.x test_ablas11: $(CC) $(CFLAGS) -DNBLAS=11 $(TEST_SIZES_SRC) $(ABLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@.x test_ablas12: $(CC) $(CFLAGS) -DNBLAS=12 $(TEST_SIZES_SRC) $(ABLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@.x # MKL BLAS rules test_mblas01: $(CC) $(CFLAGS) -DNBLAS=1 $(TEST_SIZES_SRC) $(MBLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@.x test_mblas02: $(CC) $(CFLAGS) -DNBLAS=2 $(TEST_SIZES_SRC) $(MBLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@.x test_mblas03: $(CC) $(CFLAGS) -DNBLAS=3 $(TEST_SIZES_SRC) $(MBLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@.x test_mblas04: $(CC) $(CFLAGS) -DNBLAS=4 $(TEST_SIZES_SRC) $(MBLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@.x test_mblas05: $(CC) $(CFLAGS) -DNBLAS=5 $(TEST_SIZES_SRC) $(MBLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@.x test_mblas06: $(CC) $(CFLAGS) -DNBLAS=6 $(TEST_SIZES_SRC) $(MBLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@.x test_mblas07: $(CC) $(CFLAGS) -DNBLAS=7 $(TEST_SIZES_SRC) $(MBLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@.x test_mblas08: $(CC) $(CFLAGS) -DNBLAS=8 $(TEST_SIZES_SRC) $(MBLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@.x test_mblas09: $(CC) $(CFLAGS) -DNBLAS=9 $(TEST_SIZES_SRC) $(MBLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@.x test_mblas10: $(CC) $(CFLAGS) -DNBLAS=10 $(TEST_SIZES_SRC) $(MBLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@.x test_mblas11: $(CC) $(CFLAGS) -DNBLAS=11 $(TEST_SIZES_SRC) $(MBLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@.x test_mblas12: $(CC) $(CFLAGS) -DNBLAS=12 $(TEST_SIZES_SRC) $(MBLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@.x # -- Clean rules -- clean: cleanx cleanx: - $(RM_F) *.x blis-0.6.1/test/exec_sizes/grab_top_outputs.sh000077500000000000000000000044331360743507500215440ustar00rootroot00000000000000#!/bin/bash # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # output_file="output_mem_sizes.txt" exec_prefix="test" exec_blis="blis1 blis2 blis3 blis4 blis5 blis6" exec_oblas="oblas1 oblas2 oblas3 oblas4 oblas5 oblas6" exec_ablas="ablas1 ablas2 ablas3 ablas4 ablas5 ablas6" exec_mblas="mblas1 mblas2 mblas3 mblas4 mblas5 mblas6" execs="${exec_blis} ${exec_oblas} ${exec_ablas} ${exec_mblas}" # Send column labels to the output file. top -n 1 -b | grep COMMAND >> ${output_file} for e in ${execs}; do exec_name="${exec_prefix}_${e}.x" echo "Capturing ${exec_name}..." ./${exec_name} & sleep 1 top -n 1 -b | grep "${exec_prefix}" >> ${output_file} pkill "${exec_name}" > /dev/null done blis-0.6.1/test/exec_sizes/makefile.prev000066400000000000000000000116431360743507500202610ustar00rootroot00000000000000# # test directory makefile # MAKE_BLIS := yes MAKE_OPEN := yes MAKE_ATLAS := yes MAKE_MKL := yes BLIS_LIB := $(HOME)/blis/lib/libblis.a INC_PATH := $(HOME)/blis/include LIB_PATH := $(HOME)/flame/lib MKL_PATH := /opt/intel/mkl/10.2.2.025 OBLAS_LIB := $(LIB_PATH)/libopenblas.a ABLAS_LIB := $(LIB_PATH)/libf77blas.a \ $(LIB_PATH)/libatlas.a #BLAS_LIB := -L/opt/intel/mkl/10.2.2.025/lib/em64t/ \ # -lmkl_sequential -lmkl_core -lmkl_intel_lp64 MBLAS_LIB := $(MKL_PATH)/lib/em64t/libmkl_solver_lp64_sequential.a \ -Wl,--start-group \ $(MKL_PATH)/lib/em64t/libmkl_intel_lp64.a \ $(MKL_PATH)/lib/em64t/libmkl_sequential.a \ $(MKL_PATH)/lib/em64t/libmkl_core.a \ -Wl,--end-group \ -lpthread -lm CC := gcc CFLAGS := -I$(INC_PATH) \ -O2 -fomit-frame-pointer \ -std=c99 \ -Wall -Wno-comment #-pg #-g LINKER := $(CC) LDFLAGS := -L/usr/lib/gcc/x86_64-redhat-linux/4.1.2 -L/usr/lib/gcc/x86_64-redhat-linux/4.1.2/../../../../lib64 -L/lib/../lib64 -L/usr/lib/../lib64 -lgfortranbegin -lgfortran -lm LDFLAGS += -lpthread TEST_SIZES_SRC := test_size.c TEST_BINS := ifeq ($(MAKE_BLIS),yes) TEST_BINS += test_blis1 \ test_blis2 \ test_blis3 \ test_blis4 \ test_blis5 \ test_blis6 endif ifeq ($(MAKE_OPEN),yes) TEST_BINS += test_oblas1 \ test_oblas2 \ test_oblas3 \ test_oblas4 \ test_oblas5 \ test_oblas6 endif ifeq ($(MAKE_ATLAS),yes) TEST_BINS += test_ablas1 \ test_ablas2 \ test_ablas3 \ test_ablas4 \ test_ablas5 \ test_ablas6 endif ifeq ($(MAKE_MKL),yes) TEST_BINS += test_mblas1 \ test_mblas2 \ test_mblas3 \ test_mblas4 \ test_mblas5 \ test_mblas6 endif %.o: %.c $(CC) $(CFLAGS) -c $< -o $@ all: $(TEST_BINS) # @echo "$(TEST_BINS)" # BLIS rules test_blis1: $(BLIS_LIB) $(CC) $(CFLAGS) -DNBLIS=1 $(TEST_SIZES_SRC) $(BLIS_LIB) $(LDFLAGS) -o $@.x test_blis2: $(BLIS_LIB) $(CC) $(CFLAGS) -DNBLIS=2 $(TEST_SIZES_SRC) $(BLIS_LIB) $(LDFLAGS) -o $@.x test_blis3: $(BLIS_LIB) $(CC) $(CFLAGS) -DNBLIS=3 $(TEST_SIZES_SRC) $(BLIS_LIB) $(LDFLAGS) -o $@.x test_blis4: $(BLIS_LIB) $(CC) $(CFLAGS) -DNBLIS=4 $(TEST_SIZES_SRC) $(BLIS_LIB) $(LDFLAGS) -o $@.x test_blis5: $(BLIS_LIB) $(CC) $(CFLAGS) -DNBLIS=5 $(TEST_SIZES_SRC) $(BLIS_LIB) $(LDFLAGS) -o $@.x test_blis6: $(BLIS_LIB) $(CC) $(CFLAGS) -DNBLIS=6 $(TEST_SIZES_SRC) $(BLIS_LIB) $(LDFLAGS) -o $@.x # OpenBLAS rules test_oblas1: $(BLIS_LIB) $(CC) $(CFLAGS) -DNBLAS=1 $(TEST_SIZES_SRC) $(BLIS_LIB) $(OBLAS_LIB) $(LDFLAGS) -o $@.x test_oblas2: $(BLIS_LIB) $(CC) $(CFLAGS) -DNBLAS=2 $(TEST_SIZES_SRC) $(BLIS_LIB) $(OBLAS_LIB) $(LDFLAGS) -o $@.x test_oblas3: $(BLIS_LIB) $(CC) $(CFLAGS) -DNBLAS=3 $(TEST_SIZES_SRC) $(BLIS_LIB) $(OBLAS_LIB) $(LDFLAGS) -o $@.x test_oblas4: $(BLIS_LIB) $(CC) $(CFLAGS) -DNBLAS=4 $(TEST_SIZES_SRC) $(BLIS_LIB) $(OBLAS_LIB) $(LDFLAGS) -o $@.x test_oblas5: $(BLIS_LIB) $(CC) $(CFLAGS) -DNBLAS=5 $(TEST_SIZES_SRC) $(BLIS_LIB) $(OBLAS_LIB) $(LDFLAGS) -o $@.x test_oblas6: $(BLIS_LIB) $(CC) $(CFLAGS) -DNBLAS=6 $(TEST_SIZES_SRC) $(BLIS_LIB) $(OBLAS_LIB) $(LDFLAGS) -o $@.x # ATLAS BLAS rules test_ablas1: $(BLIS_LIB) $(CC) $(CFLAGS) -DNBLAS=1 $(TEST_SIZES_SRC) $(BLIS_LIB) $(ABLAS_LIB) $(LDFLAGS) -o $@.x test_ablas2: $(BLIS_LIB) $(CC) $(CFLAGS) -DNBLAS=2 $(TEST_SIZES_SRC) $(BLIS_LIB) $(ABLAS_LIB) $(LDFLAGS) -o $@.x test_ablas3: $(BLIS_LIB) $(CC) $(CFLAGS) -DNBLAS=3 $(TEST_SIZES_SRC) $(BLIS_LIB) $(ABLAS_LIB) $(LDFLAGS) -o $@.x test_ablas4: $(BLIS_LIB) $(CC) $(CFLAGS) -DNBLAS=4 $(TEST_SIZES_SRC) $(BLIS_LIB) $(ABLAS_LIB) $(LDFLAGS) -o $@.x test_ablas5: $(BLIS_LIB) $(CC) $(CFLAGS) -DNBLAS=5 $(TEST_SIZES_SRC) $(BLIS_LIB) $(ABLAS_LIB) $(LDFLAGS) -o $@.x test_ablas6: $(BLIS_LIB) $(CC) $(CFLAGS) -DNBLAS=6 $(TEST_SIZES_SRC) $(BLIS_LIB) $(ABLAS_LIB) $(LDFLAGS) -o $@.x # MKL BLAS rules test_mblas1: $(BLIS_LIB) $(CC) $(CFLAGS) -DNBLAS=1 $(TEST_SIZES_SRC) $(BLIS_LIB) $(MBLAS_LIB) $(LDFLAGS) -o $@.x test_mblas2: $(BLIS_LIB) $(CC) $(CFLAGS) -DNBLAS=2 $(TEST_SIZES_SRC) $(BLIS_LIB) $(MBLAS_LIB) $(LDFLAGS) -o $@.x test_mblas3: $(BLIS_LIB) $(CC) $(CFLAGS) -DNBLAS=3 $(TEST_SIZES_SRC) $(BLIS_LIB) $(MBLAS_LIB) $(LDFLAGS) -o $@.x test_mblas4: $(BLIS_LIB) $(CC) $(CFLAGS) -DNBLAS=4 $(TEST_SIZES_SRC) $(BLIS_LIB) $(MBLAS_LIB) $(LDFLAGS) -o $@.x test_mblas5: $(BLIS_LIB) $(CC) $(CFLAGS) -DNBLAS=5 $(TEST_SIZES_SRC) $(BLIS_LIB) $(MBLAS_LIB) $(LDFLAGS) -o $@.x test_mblas6: $(BLIS_LIB) $(CC) $(CFLAGS) -DNBLAS=6 $(TEST_SIZES_SRC) $(BLIS_LIB) $(MBLAS_LIB) $(LDFLAGS) -o $@.x clean: rm -f *.x blis-0.6.1/test/exec_sizes/test_size.c000066400000000000000000000333061360743507500177630ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "blis.h" /* void dgemm_ ( char* transa, char* transb, int* m, int* n, int* k, double* alpha, double* a, int* lda, double* b, int* ldb, double* beta, double* c, int* ldc ); void zhemm_ ( char* side, char* uplo, int* m, int* n, dcomplex* alpha, dcomplex* a, int* lda, dcomplex* b, int* ldb, dcomplex* beta, dcomplex* c, int* ldc ); void zherk_ ( char* uplo, char* transa, int* n, int* k, double* alpha, dcomplex* a, int* lda, double* beta, dcomplex* c, int* ldc ); void zher2k_( char* uplo, char* transa, int* n, int* k, dcomplex* alpha, dcomplex* a, int* lda, dcomplex* b, int* ldb, double* beta, dcomplex* c, int* ldc ); void dsymm_ ( char* side, char* uplo, int* m, int* n, double* alpha, double* a, int* lda, double* b, int* ldb, double* beta, double* c, int* ldc ); void dsyrk_ ( char* uplo, char* transa, int* n, int* k, double* alpha, double* a, int* lda, double* beta, double* c, int* ldc ); void dsyr2k_( char* uplo, char* transa, int* n, int* k, double* alpha, double* a, int* lda, double* b, int* ldb, double* beta, double* c, int* ldc ); void dtrmm_ ( char* side, char* uplo, char* transa, char* diag, int* m, int* n, double* alpha, double* a, int* lda, double* b, int* ldb ); void dtrsm_ ( char* side, char* uplo, char* transa, char* diag, int* m, int* n, double* alpha, double* a, int* lda, double* b, int* ldb ); */ int main( int argc, char** argv ) { obj_t a, b, c; obj_t x, y; obj_t alpha, beta; dim_t m; num_t dt_a, dt_b, dt_c; num_t dt_alpha, dt_beta; int ii; #ifdef NBLIS //bli_init(); #endif m = 4000; dt_a = BLIS_DOUBLE; dt_b = BLIS_DOUBLE; dt_c = BLIS_DOUBLE; dt_alpha = BLIS_DOUBLE; dt_beta = BLIS_DOUBLE; { #ifdef NBLIS bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha ); bli_obj_create( dt_beta, 1, 1, 0, 0, &beta ); bli_obj_create( dt_a, m, 1, 0, 0, &x ); bli_obj_create( dt_a, m, 1, 0, 0, &y ); bli_obj_create( dt_a, m, m, 0, 0, &a ); bli_obj_create( dt_b, m, m, 0, 0, &b ); bli_obj_create( dt_c, m, m, 0, 0, &c ); bli_randm( &a ); bli_randm( &b ); bli_randm( &c ); bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_setsc( (1.0/1.0), 0.0, &beta ); #endif #ifdef NBLAS x.buffer = malloc( m * 1 * sizeof( double ) ); y.buffer = malloc( m * 1 * sizeof( double ) ); alpha.buffer = malloc( 1 * sizeof( double ) ); beta.buffer = malloc( 1 * sizeof( double ) ); a.buffer = malloc( m * m * sizeof( double ) ); bli_obj_set_dims( m, m, &a ); bli_obj_set_strides( m, 1, &a ); b.buffer = malloc( m * m * sizeof( double ) ); bli_obj_set_dims( m, m, &b ); bli_obj_set_strides( m, 1, &b ); c.buffer = malloc( m * m * sizeof( double ) ); bli_obj_set_dims( m, m, &c ); bli_obj_set_strides( m, 1, &c ); *((double*)alpha.buffer) = 2.0; *((double*)beta.buffer) = -1.0; #endif #ifdef NBLIS #if NBLIS >= 1 for ( ii = 0; ii < 2000000000; ++ii ) { bli_gemm( &BLIS_ONE, &a, &b, &BLIS_ONE, &c ); } #endif #if NBLIS >= 2 { bli_hemm( BLIS_LEFT, &BLIS_ONE, &a, &b, &BLIS_ONE, &c ); } #endif #if NBLIS >= 3 { bli_herk( &BLIS_ONE, &a, &BLIS_ONE, &c ); } #endif #if NBLIS >= 4 { bli_her2k( &BLIS_ONE, &a, &b, &BLIS_ONE, &c ); } #endif #if NBLIS >= 5 { bli_trmm( BLIS_LEFT, &BLIS_ONE, &a, &c ); } #endif #if NBLIS >= 6 { bli_trsm( BLIS_LEFT, &BLIS_ONE, &a, &c ); } #endif #endif #ifdef NBLAS #if NBLAS >= 1 for ( ii = 0; ii < 2000000000; ++ii ) { f77_char transa = 'N'; f77_char transb = 'N'; f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); double* alphap = bli_obj_buffer( &alpha ); double* ap = bli_obj_buffer( &a ); double* bp = bli_obj_buffer( &b ); double* betap = bli_obj_buffer( &beta ); double* cp = bli_obj_buffer( &c ); dgemm_( &transa, &transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } #endif #if NBLAS >= 2 { f77_char side = 'L'; f77_char uplo = 'L'; f77_int mm = bli_obj_length( &c ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); double* alphap = bli_obj_buffer( &alpha ); double* ap = bli_obj_buffer( &a ); double* bp = bli_obj_buffer( &b ); double* betap = bli_obj_buffer( &beta ); double* cp = bli_obj_buffer( &c ); dsymm_( &side, &uplo, &mm, &nn, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } #endif #if NBLAS >= 3 { f77_char uplo = 'L'; f77_char trans = 'N'; f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width( &a ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); double* alphap = bli_obj_buffer( &alpha ); double* ap = bli_obj_buffer( &a ); double* betap = bli_obj_buffer( &beta ); double* cp = bli_obj_buffer( &c ); dsyrk_( &uplo, &trans, &mm, &kk, alphap, ap, &lda, betap, cp, &ldc ); } #endif #if NBLAS >= 4 { f77_char uplo = 'L'; f77_char trans = 'N'; f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width( &a ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); double* alphap = bli_obj_buffer( &alpha ); double* ap = bli_obj_buffer( &a ); double* bp = bli_obj_buffer( &b ); double* betap = bli_obj_buffer( &beta ); double* cp = bli_obj_buffer( &c ); dsyr2k_( &uplo, &trans, &mm, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } #endif #if NBLAS >= 5 { f77_char side = 'L'; f77_char uplo = 'L'; f77_char trans = 'N'; f77_char diag = 'N'; f77_int mm = bli_obj_length( &c ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); double* alphap = bli_obj_buffer( &alpha ); double* ap = bli_obj_buffer( &a ); double* cp = bli_obj_buffer( &c ); dtrmm_( &side, &uplo, &trans, &diag, &mm, &nn, alphap, ap, &lda, cp, &ldc ); } #endif #if NBLAS >= 6 { f77_char side = 'L'; f77_char uplo = 'L'; f77_char trans = 'N'; f77_char diag = 'N'; f77_int mm = bli_obj_length( &c ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); double* alphap = bli_obj_buffer( &alpha ); double* ap = bli_obj_buffer( &a ); double* cp = bli_obj_buffer( &c ); dtrsm_( &side, &uplo, &trans, &diag, &mm, &nn, alphap, ap, &lda, cp, &ldc ); } #endif #if NBLAS >= 7 { f77_char transa = 'N'; f77_char transb = 'N'; f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); dcomplex* alphap = bli_obj_buffer( &alpha ); dcomplex* ap = bli_obj_buffer( &a ); dcomplex* bp = bli_obj_buffer( &b ); dcomplex* betap = bli_obj_buffer( &beta ); dcomplex* cp = bli_obj_buffer( &c ); zgemm_( &transa, &transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } #endif #if NBLAS >= 8 { f77_char side = 'L'; f77_char uplo = 'L'; f77_int mm = bli_obj_length( &c ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); dcomplex* alphap = bli_obj_buffer( &alpha ); dcomplex* ap = bli_obj_buffer( &a ); dcomplex* bp = bli_obj_buffer( &b ); dcomplex* betap = bli_obj_buffer( &beta ); dcomplex* cp = bli_obj_buffer( &c ); zhemm_( &side, &uplo, &mm, &nn, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } #endif #if NBLAS >= 9 { f77_char uplo = 'L'; f77_char trans = 'N'; f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width( &a ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); double* alphap = bli_obj_buffer( &alpha ); dcomplex* ap = bli_obj_buffer( &a ); double* betap = bli_obj_buffer( &beta ); dcomplex* cp = bli_obj_buffer( &c ); zherk_( &uplo, &trans, &mm, &kk, alphap, ap, &lda, betap, cp, &ldc ); } #endif #if NBLAS >= 10 { f77_char uplo = 'L'; f77_char trans = 'N'; f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width( &a ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); dcomplex* alphap = bli_obj_buffer( &alpha ); dcomplex* ap = bli_obj_buffer( &a ); dcomplex* bp = bli_obj_buffer( &b ); double* betap = bli_obj_buffer( &beta ); dcomplex* cp = bli_obj_buffer( &c ); zher2k_( &uplo, &trans, &mm, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } #endif #if NBLAS >= 11 { f77_char side = 'L'; f77_char uplo = 'L'; f77_char trans = 'N'; f77_char diag = 'N'; f77_int mm = bli_obj_length( &c ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); dcomplex* alphap = bli_obj_buffer( &alpha ); dcomplex* ap = bli_obj_buffer( &a ); dcomplex* cp = bli_obj_buffer( &c ); ztrmm_( &side, &uplo, &trans, &diag, &mm, &nn, alphap, ap, &lda, cp, &ldc ); } #endif #if NBLAS >= 12 { f77_char side = 'L'; f77_char uplo = 'L'; f77_char trans = 'N'; f77_char diag = 'N'; f77_int mm = bli_obj_length( &c ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); dcomplex* alphap = bli_obj_buffer( &alpha ); dcomplex* ap = bli_obj_buffer( &a ); dcomplex* cp = bli_obj_buffer( &c ); ztrsm_( &side, &uplo, &trans, &diag, &mm, &nn, alphap, ap, &lda, cp, &ldc ); } #endif #endif #ifdef NBLIS bli_obj_free( &x ); bli_obj_free( &y ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); #endif #ifdef NBLAS free( x.buffer ); free( y.buffer ); free( alpha.buffer ); free( beta.buffer ); free( a.buffer ); free( b.buffer ); free( c.buffer ); #endif } #ifdef NBLIS //bli_finalize(); #endif return 0; } blis-0.6.1/test/mixeddt/000077500000000000000000000000001360743507500150765ustar00rootroot00000000000000blis-0.6.1/test/mixeddt/Makefile000066400000000000000000000244071360743507500165450ustar00rootroot00000000000000#!/bin/bash # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # # Makefile # # Field G. Van Zee # # Makefile for standalone BLIS test drivers. # # # --- Makefile PHONY target definitions ---------------------------------------- # .PHONY: all all-st all-mt \ blis blis-st blis-mt \ blis-nat blis-nat-st blis-nat-mt \ openblas openblas-st openblas-mt \ mkl mkl-st mkl-mt \ blis-gemm-st blis-gemm-mt \ blis-gemm-nat-st blis-gemm-nat-mt \ openblas-gemm-st openblas-gemm-mt \ mkl-gemm-st mkl-gemm-mt \ clean cleanx # # --- Determine makefile fragment location ------------------------------------- # # Comments: # - DIST_PATH is assumed to not exist if BLIS_INSTALL_PATH is given. # - We must use recursively expanded assignment for LIB_PATH and INC_PATH in # the second case because CONFIG_NAME is not yet set. ifneq ($(strip $(BLIS_INSTALL_PATH)),) LIB_PATH := $(BLIS_INSTALL_PATH)/lib INC_PATH := $(BLIS_INSTALL_PATH)/include/blis SHARE_PATH := $(BLIS_INSTALL_PATH)/share/blis else DIST_PATH := ../.. LIB_PATH = ../../lib/$(CONFIG_NAME) INC_PATH = ../../include/$(CONFIG_NAME) SHARE_PATH := ../.. endif # # --- Include common makefile definitions -------------------------------------- # # Include the common makefile fragment. -include $(SHARE_PATH)/common.mk # # --- BLAS and LAPACK implementations ------------------------------------------ # # BLIS library and header path. This is simply wherever it was installed. #BLIS_LIB_PATH := $(INSTALL_PREFIX)/lib #BLIS_INC_PATH := $(INSTALL_PREFIX)/include/blis # BLIS library. #BLIS_LIB := $(BLIS_LIB_PATH)/libblis.a # BLAS library path(s). This is where the BLAS libraries reside. HOME_LIB_PATH := $(HOME)/flame/lib # # --- General build definitions ------------------------------------------------ # TEST_SRC_PATH := . TEST_OBJ_PATH := . # Gather all local object files. TEST_OBJS := $(sort $(patsubst $(TEST_SRC_PATH)/%.c, \ $(TEST_OBJ_PATH)/%.o, \ $(wildcard $(TEST_SRC_PATH)/*.c))) # Override the value of CINCFLAGS so that the value of CFLAGS returned by # get-frame-cflags-for() is not cluttered up with include paths needed only # while building BLIS. CINCFLAGS := -I$(INC_PATH) # Use the "framework" CFLAGS for the configuration family. CFLAGS := $(call get-user-cflags-for,$(CONFIG_NAME)) # Add local header paths to CFLAGS. CFLAGS += -I$(TEST_SRC_PATH) # Locate the libblis library to which we will link. #LIBBLIS_LINK := $(LIB_PATH)/$(LIBBLIS_L) # Which library? BLI_DEF := -DBLIS BLA_DEF := -DBLAS # Single or multithreaded string STR_ST := -DTHR_STR=\"st\" STR_MT := -DTHR_STR=\"mt\" # Problem size specification PDEF_ST := -DP_BEGIN=40 \ -DP_MAX=2000 \ -DP_INC=40 PDEF_MT := -DP_BEGIN=160 \ -DP_MAX=8000 \ -DP_INC=160 # Enumerate possible datatypes and computation precisions. dts := s d c z prs := s d # Various functions that help us construct the datatype combinations and then # extract the needed datatype strings and C preprocessor define flags. get-char-c = $(word 1,$(subst _, ,$(1))) get-char-a = $(word 2,$(subst _, ,$(1))) get-char-b = $(word 3,$(subst _, ,$(1))) get-char-x = $(word 4,$(subst _, ,$(1))) get-cstr = $(call get-char-c,$(1))$(call get-char-a,$(1))$(call get-char-b,$(1))$(call get-char-x,$(1)) get-cdef-a = $(strip $(subst s,-DDTA=BLIS_FLOAT, \ $(subst d,-DDTA=BLIS_DOUBLE, \ $(subst c,-DDTA=BLIS_SCOMPLEX, \ $(subst z,-DDTA=BLIS_DCOMPLEX,$(call get-char-a,$(1))))))) get-cdef-b = $(strip $(subst s,-DDTB=BLIS_FLOAT, \ $(subst d,-DDTB=BLIS_DOUBLE, \ $(subst c,-DDTB=BLIS_SCOMPLEX, \ $(subst z,-DDTB=BLIS_DCOMPLEX,$(call get-char-b,$(1))))))) get-cdef-c = $(strip $(subst s,-DDTC=BLIS_FLOAT, \ $(subst d,-DDTC=BLIS_DOUBLE, \ $(subst c,-DDTC=BLIS_SCOMPLEX, \ $(subst z,-DDTC=BLIS_DCOMPLEX,$(call get-char-c,$(1))))))) get-cdef-x = $(strip $(subst s,-DDTX=BLIS_FLOAT, \ $(subst d,-DDTX=BLIS_DOUBLE,$(call get-char-x,$(1))))) get-cdefs = $(call get-cdef-c,$(1)) $(call get-cdef-a,$(1)) $(call get-cdef-b,$(1)) $(call get-cdef-x,$(1)) # Define a function to return the appropriate -DSTR= and -D[BLIS|BLAS] flags. get-idefs = $(strip $(subst intern,-DSTR=\"$(1)\" -DBLIS, \ $(subst ad_hoc,-DSTR=\"$(1)\" -DBLAS, \ $(subst mkl,-DSTR=\"$(1)\" -DBLAS,$(1))))) # Enumerate all possible datatype combinations. DT_CODES := $(foreach dt0,$(dts),$(foreach dt1,$(dts),$(foreach dt2,$(dts),$(foreach pr,$(prs),$(dt0)_$(dt1)_$(dt2)_$(pr))))) # Build a list of the datatype strings. DT_COMBOS := $(foreach code,$(DT_CODES),$(call get-cstr,$(code))) # Build a list of BLIS, OpenBLAS, and MKL executables. INTERN_OBJS_ST := $(foreach combo,$(DT_COMBOS),test_$(combo)gemm_intern_st.o) INTERN_BINS_ST := $(patsubst %.o,%.x,$(INTERN_OBJS_ST)) AD_HOC_OBJS_ST := $(foreach combo,$(DT_COMBOS),test_$(combo)gemm_ad_hoc_st.o) AD_HOC_BINS_ST := $(patsubst %.o,%.x,$(AD_HOC_OBJS_ST)) INTERN_OBJS_MT := $(foreach combo,$(DT_COMBOS),test_$(combo)gemm_intern_mt.o) INTERN_BINS_MT := $(patsubst %.o,%.x,$(INTERN_OBJS_MT)) AD_HOC_OBJS_MT := $(foreach combo,$(DT_COMBOS),test_$(combo)gemm_ad_hoc_mt.o) AD_HOC_BINS_MT := $(patsubst %.o,%.x,$(AD_HOC_OBJS_MT)) # # --- Targets/rules ------------------------------------------------------------ # all: st st: intern-st ad_hoc-st mt: intern-mt ad_hoc-mt intern-st: $(INTERN_BINS_ST) ad_hoc-st: $(AD_HOC_BINS_ST) intern-mt: $(INTERN_BINS_MT) ad_hoc-mt: $(AD_HOC_BINS_MT) #blis: test_ssssgemm_asm_blis_st.x \ # test_sssdgemm_asm_blis_st.x \ # test_ssdsgemm_asm_blis_st.x \ # test_sdssgemm_asm_blis_st.x \ # test_dsssgemm_asm_blis_st.x \ # test_dddsgemm_asm_blis_st.x \ # test_ddddgemm_asm_blis_st.x # --Object file rules -- # Define the function that will be used to instantiate compilation rules # for the various implementations. define make-st-rule test_$(call get-cstr,$(1))gemm_$(2)_st.o: test_gemm.c Makefile ifeq ($(ENABLE_VERBOSE),yes) $(CC) $(CFLAGS) $(PDEF_ST) $(call get-cdefs,$(1)) $(call get-idefs,$(2)) $(STR_ST) -c $$< -o $$@ else @echo "Compiling $$@" @$(CC) $(CFLAGS) $(PDEF_ST) $(call get-cdefs,$(1)) $(call get-idefs,$(2)) $(STR_ST) -c $$< -o $$@ endif endef define make-mt-rule test_$(call get-cstr,$(1))gemm_$(2)_mt.o: test_gemm.c Makefile ifeq ($(ENABLE_VERBOSE),yes) $(CC) $(CFLAGS) $(PDEF_MT) $(call get-cdefs,$(1)) $(call get-idefs,$(2)) $(STR_MT) -c $$< -o $$@ else @echo "Compiling $$@" @$(CC) $(CFLAGS) $(PDEF_MT) $(call get-cdefs,$(1)) $(call get-idefs,$(2)) $(STR_MT) -c $$< -o $$@ endif endef # Define the implementations for which we will instantiate compilation rules. IMPLS := intern ad_hoc # Instantiate the rule function make-st-rule() and make-mt-rule for each # implementation in IMPLS and each of the datatype "codes" in DT_CODES. $(foreach impl,$(IMPLS), \ $(foreach code,$(DT_CODES),$(eval $(call make-st-rule,$(code),$(impl))))) $(foreach impl,$(IMPLS), \ $(foreach code,$(DT_CODES),$(eval $(call make-mt-rule,$(code),$(impl))))) # -- Executable file rules -- # NOTE: For the BLAS test drivers, we place the BLAS libraries before BLIS # on the link command line in case BLIS was configured with the BLAS # compatibility layer. This prevents BLIS from inadvertently getting called # for the BLAS routines we are trying to test with. test_%_ad_hoc_st.x: test_%_ad_hoc_st.o $(LIBBLIS_LINK) ifeq ($(ENABLE_VERBOSE),yes) $(LINKER) $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@ $(RM_F) $< else @@echo "Linking $@ to '$(LIBBLIS_LINK)'" @$(LINKER) $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@ @$(RM_F) $< endif test_%_ad_hoc_mt.x: test_%_ad_hoc_mt.o $(LIBBLIS_LINK) ifeq ($(ENABLE_VERBOSE),yes) $(LINKER) $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@ $(RM_F) $< else @@echo "Linking $@ to '$(LIBBLIS_LINK)'" @$(LINKER) $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@ @$(RM_F) $< endif test_%_intern_st.x: test_%_intern_st.o $(LIBBLIS_LINK) ifeq ($(ENABLE_VERBOSE),yes) $(LINKER) $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@ $(RM_F) $< else @@echo "Linking $@ to '$(LIBBLIS_LINK)'" @$(LINKER) $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@ @$(RM_F) $< endif test_%_intern_mt.x: test_%_intern_mt.o $(LIBBLIS_LINK) ifeq ($(ENABLE_VERBOSE),yes) $(LINKER) $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@ $(RM_F) $< else @@echo "Linking $@ to '$(LIBBLIS_LINK)'" @$(LINKER) $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@ @$(RM_F) $< endif # -- Clean rules -- clean: cleanx cleanx: - $(RM_F) *.o *.x cleanout: - $(RM_F) *.m blis-0.6.1/test/mixeddt/matlab/000077500000000000000000000000001360743507500163365ustar00rootroot00000000000000blis-0.6.1/test/mixeddt/matlab/gen_dt_combos.m000066400000000000000000000101531360743507500213160ustar00rootroot00000000000000function r_val = gen_dt_combos() dt_chars = [ 's' 'd' 'c' 'z' ]; pr_chars = [ 's' 'd' ]; if 0 i = 1; for dtc = dt_chars for dta = dt_chars for dtb = dt_chars for pr = pr_chars dt_combos( i, : ) = sprintf( '%c%c%c%c', dtc, dta, dtb, pr ); i = i + 1; end end end end end %n_combos = size(temp,1); if 1 dt_combos( 1, : ) = 'ssss'; dt_combos( 2, : ) = 'ssds'; dt_combos( 3, : ) = 'sscs'; dt_combos( 4, : ) = 'sszs'; dt_combos( 5, : ) = 'sdss'; dt_combos( 6, : ) = 'sdds'; dt_combos( 7, : ) = 'sdcs'; dt_combos( 8, : ) = 'sdzs'; dt_combos( 9, : ) = 'sssd'; dt_combos( 10, : ) = 'ssdd'; dt_combos( 11, : ) = 'sscd'; dt_combos( 12, : ) = 'sszd'; dt_combos( 13, : ) = 'sdsd'; dt_combos( 14, : ) = 'sddd'; dt_combos( 15, : ) = 'sdcd'; dt_combos( 16, : ) = 'sdzd'; dt_combos( 17, : ) = 'scss'; dt_combos( 18, : ) = 'scds'; dt_combos( 19, : ) = 'sccs'; dt_combos( 20, : ) = 'sczs'; dt_combos( 21, : ) = 'szss'; dt_combos( 22, : ) = 'szds'; dt_combos( 23, : ) = 'szcs'; dt_combos( 24, : ) = 'szzs'; dt_combos( 25, : ) = 'scsd'; dt_combos( 26, : ) = 'scdd'; dt_combos( 27, : ) = 'sccd'; dt_combos( 28, : ) = 'sczd'; dt_combos( 29, : ) = 'szsd'; dt_combos( 30, : ) = 'szdd'; dt_combos( 31, : ) = 'szcd'; dt_combos( 32, : ) = 'szzd'; dt_combos( 33, : ) = 'dsss'; dt_combos( 34, : ) = 'dsds'; dt_combos( 35, : ) = 'dscs'; dt_combos( 36, : ) = 'dszs'; dt_combos( 37, : ) = 'ddss'; dt_combos( 38, : ) = 'ddds'; dt_combos( 39, : ) = 'ddcs'; dt_combos( 40, : ) = 'ddzs'; dt_combos( 41, : ) = 'dssd'; dt_combos( 42, : ) = 'dsdd'; dt_combos( 43, : ) = 'dscd'; dt_combos( 44, : ) = 'dszd'; dt_combos( 45, : ) = 'ddsd'; dt_combos( 46, : ) = 'dddd'; dt_combos( 47, : ) = 'ddcd'; dt_combos( 48, : ) = 'ddzd'; dt_combos( 49, : ) = 'dcss'; dt_combos( 50, : ) = 'dcds'; dt_combos( 51, : ) = 'dccs'; dt_combos( 52, : ) = 'dczs'; dt_combos( 53, : ) = 'dzss'; dt_combos( 54, : ) = 'dzds'; dt_combos( 55, : ) = 'dzcs'; dt_combos( 56, : ) = 'dzzs'; dt_combos( 57, : ) = 'dcsd'; dt_combos( 58, : ) = 'dcdd'; dt_combos( 59, : ) = 'dccd'; dt_combos( 60, : ) = 'dczd'; dt_combos( 61, : ) = 'dzsd'; dt_combos( 62, : ) = 'dzdd'; dt_combos( 63, : ) = 'dzcd'; dt_combos( 64, : ) = 'dzzd'; dt_combos( 65, : ) = 'csss'; dt_combos( 66, : ) = 'csds'; dt_combos( 67, : ) = 'cscs'; dt_combos( 68, : ) = 'cszs'; dt_combos( 69, : ) = 'cdss'; dt_combos( 70, : ) = 'cdds'; dt_combos( 71, : ) = 'cdcs'; dt_combos( 72, : ) = 'cdzs'; dt_combos( 73, : ) = 'cssd'; dt_combos( 74, : ) = 'csdd'; dt_combos( 75, : ) = 'cscd'; dt_combos( 76, : ) = 'cszd'; dt_combos( 77, : ) = 'cdsd'; dt_combos( 78, : ) = 'cddd'; dt_combos( 79, : ) = 'cdcd'; dt_combos( 80, : ) = 'cdzd'; dt_combos( 81, : ) = 'ccss'; dt_combos( 82, : ) = 'ccds'; dt_combos( 83, : ) = 'cccs'; dt_combos( 84, : ) = 'cczs'; dt_combos( 85, : ) = 'czss'; dt_combos( 86, : ) = 'czds'; dt_combos( 87, : ) = 'czcs'; dt_combos( 88, : ) = 'czzs'; dt_combos( 89, : ) = 'ccsd'; dt_combos( 90, : ) = 'ccdd'; dt_combos( 91, : ) = 'cccd'; dt_combos( 92, : ) = 'cczd'; dt_combos( 93, : ) = 'czsd'; dt_combos( 94, : ) = 'czdd'; dt_combos( 95, : ) = 'czcd'; dt_combos( 96, : ) = 'czzd'; dt_combos( 97, : ) = 'zsss'; dt_combos( 98, : ) = 'zsds'; dt_combos( 99, : ) = 'zscs'; dt_combos( 100, : ) = 'zszs'; dt_combos( 101, : ) = 'zdss'; dt_combos( 102, : ) = 'zdds'; dt_combos( 103, : ) = 'zdcs'; dt_combos( 104, : ) = 'zdzs'; dt_combos( 105, : ) = 'zssd'; dt_combos( 106, : ) = 'zsdd'; dt_combos( 107, : ) = 'zscd'; dt_combos( 108, : ) = 'zszd'; dt_combos( 109, : ) = 'zdsd'; dt_combos( 110, : ) = 'zddd'; dt_combos( 111, : ) = 'zdcd'; dt_combos( 112, : ) = 'zdzd'; dt_combos( 113, : ) = 'zcss'; dt_combos( 114, : ) = 'zcds'; dt_combos( 115, : ) = 'zccs'; dt_combos( 116, : ) = 'zczs'; dt_combos( 117, : ) = 'zzss'; dt_combos( 118, : ) = 'zzds'; dt_combos( 119, : ) = 'zzcs'; dt_combos( 120, : ) = 'zzzs'; dt_combos( 121, : ) = 'zcsd'; dt_combos( 122, : ) = 'zcdd'; dt_combos( 123, : ) = 'zccd'; dt_combos( 124, : ) = 'zczd'; dt_combos( 125, : ) = 'zzsd'; dt_combos( 126, : ) = 'zzdd'; dt_combos( 127, : ) = 'zzcd'; dt_combos( 128, : ) = 'zzzd'; end r_val = dt_combos; end blis-0.6.1/test/mixeddt/matlab/gen_prec_combos.m000066400000000000000000000041171360743507500216430ustar00rootroot00000000000000function r_val = gen_prec_combos( mdcase ) dt_chars = [ 's' 'd' 'c' 'z' ]; pr_chars = [ 's' 'd' ]; dm_chars = [ 'r' 'c' ]; dmc = mdcase( 1 ); dma = mdcase( 2 ); dmb = mdcase( 3 ); if 0 pr_combos( 1, : ) = 'ssss'; pr_combos( 2, : ) = 'ssds'; pr_combos( 3, : ) = 'sdss'; pr_combos( 4, : ) = 'sdds'; pr_combos( 5, : ) = 'dsss'; pr_combos( 6, : ) = 'dsds'; pr_combos( 7, : ) = 'ddss'; pr_combos( 8, : ) = 'ddds'; pr_combos( 9, : ) = 'dddd'; pr_combos( 10, : ) = 'ddsd'; pr_combos( 11, : ) = 'dsdd'; pr_combos( 12, : ) = 'dssd'; pr_combos( 13, : ) = 'sddd'; pr_combos( 14, : ) = 'sdsd'; pr_combos( 15, : ) = 'ssdd'; pr_combos( 16, : ) = 'sssd'; end pr_combos( 1, : ) = 'ssss'; pr_combos( 2, : ) = 'ssds'; pr_combos( 3, : ) = 'dddd'; pr_combos( 4, : ) = 'ddsd'; pr_combos( 5, : ) = 'sdss'; pr_combos( 6, : ) = 'sdds'; pr_combos( 7, : ) = 'dsdd'; pr_combos( 8, : ) = 'dssd'; pr_combos( 9, : ) = 'dsss'; pr_combos( 10, : ) = 'dsds'; pr_combos( 11, : ) = 'sddd'; pr_combos( 12, : ) = 'sdsd'; pr_combos( 13, : ) = 'ddss'; pr_combos( 14, : ) = 'ddds'; pr_combos( 15, : ) = 'ssdd'; pr_combos( 16, : ) = 'sssd'; for i = 1:16 pr_combo = pr_combos( i, : ); %str = sprintf( '%s', pr_combo ); disp(str); prc = pr_combo( 1 ); pra = pr_combo( 2 ); prb = pr_combo( 3 ); pr = pr_combo( 4 ); dtc = prec_dom_to_dt( prc, dmc ); dta = prec_dom_to_dt( pra, dma ); dtb = prec_dom_to_dt( prb, dmb ); dt_combos( i, : ) = sprintf( '%c%c%c%c', dtc, dta, dtb, pr ); end %if 0 %i = 1; %pr = 's'; %for prc = pr_chars % for pra = pr_chars % for prb = pr_chars % dtc = prec_dom_to_dt( prc, dmc ); % dta = prec_dom_to_dt( pra, dma ); % dtb = prec_dom_to_dt( prb, dmb ); % dt_combos( i, : ) = sprintf( '%c%c%c%c', dtc, dta, dtb, pr ); % i = i + 1; % end % end %end % %pr = 'd'; %for prc = flip( pr_chars ) % for pra = flip( pr_chars ) % for prb = flip( pr_chars ) % dtc = prec_dom_to_dt( prc, dmc ); % dta = prec_dom_to_dt( pra, dma ); % dtb = prec_dom_to_dt( prb, dmb ); % dt_combos( i, : ) = sprintf( '%c%c%c%c', dtc, dta, dtb, pr ); % i = i + 1; % end % end %end %end r_val = dt_combos; end blis-0.6.1/test/mixeddt/matlab/output/000077500000000000000000000000001360743507500176765ustar00rootroot00000000000000blis-0.6.1/test/mixeddt/matlab/output/.gitkeep000066400000000000000000000000001360743507500213150ustar00rootroot00000000000000blis-0.6.1/test/mixeddt/matlab/plot_all.m000066400000000000000000000030221360743507500203170ustar00rootroot00000000000000 % skx plot_dom_all(2.0,32,1,'../../skx/st', '../../skx/st_noxmem', '../../skx/st_merged', 'skx_t1');close all;clear all plot_dom_all(2.0,32,26,'../../skx/jc2ic13','../../skx/jc2ic13_noxmem','../../skx/jc2ic13_merged','skx_t26');close all;clear all plot_dom_all(2.0,32,52,'../../skx/jc4ic13','../../skx/jc4ic13_noxmem','../../skx/jc4ic13_merged','skx_t52');close all;clear all plot_dt_select(2.0,32,1,'../../skx/st', '../../skx/st_noxmem', '../../skx/st_merged', 'skx_t1');close all;clear all plot_dt_select(2.0,32,26,'../../skx/jc2ic13','../../skx/jc2ic13_noxmem','../../skx/jc2ic13_merged','skx_t26');close all;clear all plot_dt_select(2.0,32,52,'../../skx/jc4ic13','../../skx/jc4ic13_noxmem','../../skx/jc4ic13_merged','skx_t52');close all;clear all % tx2 plot_dom_all(2.2,8,1,'../../tx2/st', '../../tx2/st_noxmem', '../../tx2/st_merged', 'tx2_t1');close all;clear all plot_dom_all(2.2,8,28,'../../tx2/jc4ic7','../../tx2/jc4ic7_noxmem','../../tx2/jc4ic7_merged','tx2_t28');close all;clear all plot_dom_all(2.2,8,56,'../../tx2/jc8ic7','../../tx2/jc8ic7_noxmem','../../tx2/jc8ic7_merged','tx2_t56');close all;clear all plot_dt_select(2.2,8,1,'../../tx2/st', '../../tx2/st_noxmem', '../../tx2/st_merged', 'tx2_t1');close all;clear all plot_dt_select(2.2,8,28,'../../tx2/jc4ic7','../../tx2/jc4ic7_noxmem','../../tx2/jc4ic7_merged','tx2_t28');close all;clear all plot_dt_select(2.2,8,56,'../../tx2/jc8ic7','../../tx2/jc8ic7_noxmem','../../tx2/jc8ic7_merged','tx2_t56');close all;clear all blis-0.6.1/test/mixeddt/matlab/plot_dom_all.m000066400000000000000000000014211360743507500211570ustar00rootroot00000000000000function r_val = plot_dom_all( cfreq, ... dflopspercycle, ... nth, ... dirpath, ... dirpath_out, ... arch_str ) cases( 1, : ) = [ 'rrr' ]; cases( 2, : ) = [ 'rrc' ]; cases( 3, : ) = [ 'rcr' ]; cases( 4, : ) = [ 'rcc' ]; cases( 5, : ) = [ 'crr' ]; cases( 6, : ) = [ 'crc' ]; cases( 7, : ) = [ 'ccr' ]; cases( 8, : ) = [ 'ccc' ]; n_cases = size(cases,1); for i = 1:n_cases thecase = cases( i, : ); plot_dom_case( thecase, ... cfreq, ... dflopspercycle, ... nth, ... dirpath, ... dirpath_out, ... arch_str ); end r_val = 0; end blis-0.6.1/test/mixeddt/matlab/plot_dom_case.m000066400000000000000000000104351360743507500213270ustar00rootroot00000000000000function r_val = plot_dom_case( mdcase, ... cfreq, ... dflopspercycle, ... nth, ... dirpath, ... dirpath_out, ... arch_str ) % Create filename "templates" for the files that contain the performance % results. filetemp_intern = '%s/output_%s_%sgemm_intern.m'; filetemp_ad_hoc = '%s/output_%s_%sgemm_ad_hoc.m'; if nth == 1 thr_str = 'st'; else thr_str = 'mt'; end if 1 dt_combos = gen_prec_combos( mdcase ); else dt_combos( 1, : ) = [ 'ssss' ]; dt_combos( 2, : ) = [ 'sssd' ]; dt_combos( 3, : ) = [ 'ssds' ]; dt_combos( 4, : ) = [ 'sdss' ]; dt_combos( 5, : ) = [ 'dsss' ]; dt_combos( 6, : ) = [ 'ddds' ]; dt_combos( 7, : ) = [ 'dddd' ]; end n_combos = size(dt_combos,1); % Construct filenames for the "reference" (single real) data, then load % the data files, and finally save the results to different variable names. file_blis_sref = sprintf( filetemp_intern, dirpath, thr_str, 'ssss' ); run( file_blis_sref ) data_gemm_intern_sref( :, : ) = data_gemm_intern( :, : ); % Construct filenames for the "reference" (double real) data, then load % the data files, and finally save the results to different variable names. file_blis_dref = sprintf( filetemp_intern, dirpath, thr_str, 'dddd' ); run( file_blis_dref ) data_gemm_intern_dref( :, : ) = data_gemm_intern( :, : ); % Construct filenames for the "reference" (single complex) data, then load % the data files, and finally save the results to different variable names. file_blis_cref = sprintf( filetemp_intern, dirpath, thr_str, 'cccs' ); run( file_blis_cref ) data_gemm_intern_cref( :, : ) = data_gemm_intern( :, : ); % Construct filenames for the "reference" (double complex) data, then load % the data files, and finally save the results to different variable names. file_blis_zref = sprintf( filetemp_intern, dirpath, thr_str, 'zzzd' ); run( file_blis_zref ) data_gemm_intern_zref( :, : ) = data_gemm_intern( :, : ); fig = figure; orient( fig, 'portrait' ); %set(gcf,'Position',[0 0 2000 900]); set(gcf,'PaperUnits', 'inches'); %set(gcf,'PaperSize', [16 12.4]); %set(gcf,'PaperPosition', [0 0 16 12.4]); set(gcf,'PaperSize', [14 11.0]); set(gcf,'PaperPosition', [0 0 14 11.0]); %set(gcf,'PaperPositionMode','auto'); set(gcf,'PaperPositionMode','manual'); set(gcf,'PaperOrientation','portrait'); for dti = 1:n_combos %for dti = 1:1 % Grab the current datatype combination. combo = dt_combos( dti, : ); %str = sprintf( 'Plotting %d: %s', dti, combo ); disp(str); fprintf( '%d (%s) ', dti, combo ); if combo(4) == 's' data_gemm_ref( :, : ) = data_gemm_intern_sref( :, : ); refch = 's'; else %if combo(4) == 'd' data_gemm_ref( :, : ) = data_gemm_intern_dref( :, : ); refch = 'd'; end if ( combo(1) == 'c' || combo(1) == 'z' ) && ... ( combo(2) == 'c' || combo(2) == 'z' ) && ... ( combo(3) == 'c' || combo(3) == 'z' ) if combo(4) == 's' data_gemm_ref( :, : ) = data_gemm_intern_cref( :, : ); refch = 'c'; else %if combo(4) == 'd' data_gemm_ref( :, : ) = data_gemm_intern_zref( :, : ); refch = 'z'; end end % Construct filenames for the data files from templates. file_intern = sprintf( filetemp_intern, dirpath, thr_str, combo ); file_ad_hoc = sprintf( filetemp_ad_hoc, dirpath, thr_str, combo ); % Load the data files. %str = sprintf( ' Loading %s', file_intern ); disp(str); run( file_intern ) %str = sprintf( ' Loading %s', file_ad_hoc ); disp(str); run( file_ad_hoc ) % Plot the result. plot_gemm_perf( combo, ... data_gemm_ref, ... data_gemm_intern, ... data_gemm_ad_hoc, ... refch, ... nth, ... 4, 4, ... cfreq, ... dflopspercycle, ... dti ); end fprintf( '\n' ); %if 0 %set(gcf,'Position',[0 0 2000 900]); %set(gcf,'PaperUnits', 'inches'); %set(gcf,'PaperSize', [48 22]); %set(gcf,'PaperPosition', [0 0 48 22]); %%set(gcf,'PaperPositionMode','auto'); %set(gcf,'PaperPositionMode','manual'); %set(gcf,'PaperOrientation','landscape'); %end outfile = sprintf( '%s/gemm_%s_%s', dirpath_out, mdcase, arch_str ); print(gcf, outfile,'-bestfit','-dpdf'); %print(gcf, 'gemm_md','-fillpage','-dpdf'); blis-0.6.1/test/mixeddt/matlab/plot_dt_all.m000066400000000000000000000117141360743507500210150ustar00rootroot00000000000000function r_val = plot_dt_all( is_mt ) if is_mt == 1 thr_str = 'mt'; else thr_str = 'st'; end if 1 dt_combos = gen_dt_combos(); else dt_combos( 1, : ) = [ 'ssss' ]; dt_combos( 2, : ) = [ 'sssd' ]; dt_combos( 3, : ) = [ 'ssds' ]; dt_combos( 4, : ) = [ 'sdss' ]; dt_combos( 5, : ) = [ 'dsss' ]; dt_combos( 6, : ) = [ 'ddds' ]; dt_combos( 7, : ) = [ 'dddd' ]; end n_combos = size(dt_combos,1); filetemp_blis = '../output_%s_%sgemm_asm_blis.m'; filetemp_open = '../output_%s_%sgemm_openblas.m'; % Construct filenames for the "reference" (single real) data, then load % the data files, and finally save the results to different variable names. file_blis_sref = sprintf( filetemp_blis, thr_str, 'ssss' ); file_open_sref = sprintf( filetemp_open, thr_str, 'ssss' ); %str = sprintf( ' Loading %s', file_blis_sref ); disp(str); run( file_blis_sref ) %str = sprintf( ' Loading %s', file_open_sref ); disp(str); run( file_open_sref ) data_gemm_asm_blis_sref( :, : ) = data_gemm_asm_blis( :, : ); data_gemm_openblas_sref( :, : ) = data_gemm_openblas( :, : ); % Construct filenames for the "reference" (double real) data, then load % the data files, and finally save the results to different variable names. file_blis_dref = sprintf( filetemp_blis, thr_str, 'dddd' ); file_open_dref = sprintf( filetemp_open, thr_str, 'dddd' ); %str = sprintf( ' Loading %s', file_blis_dref ); disp(str); run( file_blis_dref ) %str = sprintf( ' Loading %s', file_open_dref ); disp(str); run( file_open_dref ) data_gemm_asm_blis_dref( :, : ) = data_gemm_asm_blis( :, : ); data_gemm_openblas_dref( :, : ) = data_gemm_openblas( :, : ); % Construct filenames for the "reference" (single complex) data, then load % the data files, and finally save the results to different variable names. file_blis_cref = sprintf( filetemp_blis, thr_str, 'cccs' ); file_open_cref = sprintf( filetemp_open, thr_str, 'cccs' ); %str = sprintf( ' Loading %s', file_blis_cref ); disp(str); run( file_blis_cref ) %str = sprintf( ' Loading %s', file_open_cref ); disp(str); run( file_open_cref ) data_gemm_asm_blis_cref( :, : ) = data_gemm_asm_blis( :, : ); data_gemm_openblas_cref( :, : ) = data_gemm_openblas( :, : ); % Construct filenames for the "reference" (double complex) data, then load % the data files, and finally save the results to different variable names. file_blis_zref = sprintf( filetemp_blis, thr_str, 'zzzd' ); file_open_zref = sprintf( filetemp_open, thr_str, 'zzzd' ); %str = sprintf( ' Loading %s', file_blis_zref ); disp(str); run( file_blis_zref ) %str = sprintf( ' Loading %s', file_open_zref ); disp(str); run( file_open_zref ) data_gemm_asm_blis_zref( :, : ) = data_gemm_asm_blis( :, : ); data_gemm_openblas_zref( :, : ) = data_gemm_openblas( :, : ); fig = figure; orient( fig, 'landscape' ); set(gcf,'Position',[0 0 2000 900]); set(gcf,'PaperUnits', 'inches'); set(gcf,'PaperSize', [64 33]); set(gcf,'PaperPosition', [0 0 64 33]); %set(gcf,'PaperPositionMode','auto'); set(gcf,'PaperPositionMode','manual'); set(gcf,'PaperOrientation','landscape'); for dti = 1:n_combos %for dti = 1:1 % Grab the current datatype combination. combo = dt_combos( dti, : ); str = sprintf( 'Plotting %d: %s', dti, combo ); disp(str); if combo(4) == 's' data_gemm_asm_blis_ref( :, : ) = data_gemm_asm_blis_sref( :, : ); data_gemm_openblas_ref( :, : ) = data_gemm_openblas_sref( :, : ); refch = 's'; else %if combo(4) == 'd' data_gemm_asm_blis_ref( :, : ) = data_gemm_asm_blis_dref( :, : ); data_gemm_openblas_ref( :, : ) = data_gemm_openblas_dref( :, : ); refch = 'd'; end if ( combo(1) == 'c' || combo(1) == 'z' ) && ... ( combo(2) == 'c' || combo(2) == 'z' ) && ... ( combo(3) == 'c' || combo(3) == 'z' ) if combo(4) == 's' data_gemm_asm_blis_ref( :, : ) = data_gemm_asm_blis_cref( :, : ); data_gemm_openblas_ref( :, : ) = data_gemm_openblas_cref( :, : ); refch = 'c'; else %if combo(4) == 'd' data_gemm_asm_blis_ref( :, : ) = data_gemm_asm_blis_zref( :, : ); data_gemm_openblas_ref( :, : ) = data_gemm_openblas_zref( :, : ); refch = 'z'; end end % Construct filenames for the data files from templates. file_blis = sprintf( filetemp_blis, thr_str, combo ); file_open = sprintf( filetemp_open, thr_str, combo ); % Load the data files. %str = sprintf( ' Loading %s', file_blis ); disp(str); run( file_blis ) %str = sprintf( ' Loading %s', file_open ); disp(str); run( file_open ) % Plot the result. plot_gemm_perf( combo, ... data_gemm_asm_blis, ... data_gemm_asm_blis_ref, ... data_gemm_openblas, ... data_gemm_openblas_ref, ... is_mt, refch, 8, 16, dti ); end if 0 set(gcf,'Position',[0 0 2000 900]); set(gcf,'PaperUnits', 'inches'); set(gcf,'PaperSize', [48 22]); set(gcf,'PaperPosition', [0 0 48 22]); %set(gcf,'PaperPositionMode','auto'); set(gcf,'PaperPositionMode','manual'); set(gcf,'PaperOrientation','landscape'); end print(gcf, 'output/gemm_md','-bestfit','-dpdf'); %print(gcf, 'gemm_md','-fillpage','-dpdf'); blis-0.6.1/test/mixeddt/matlab/plot_dt_select.m000066400000000000000000000114651360743507500215270ustar00rootroot00000000000000function r_val = plot_dt_select( dom, is_mt ) if is_mt == 1 thr_str = 'mt'; else thr_str = 'st'; end if dom == 'r' dt_combos( 1, : ) = [ 'dsss' ]; dt_combos( 2, : ) = [ 'sddd' ]; dt_combos( 3, : ) = [ 'sdds' ]; dt_combos( 4, : ) = [ 'dssd' ]; dt_combos( 5, : ) = [ 'ddds' ]; dt_combos( 6, : ) = [ 'sssd' ]; else dt_combos( 1, : ) = [ 'csss' ]; dt_combos( 2, : ) = [ 'zddd' ]; dt_combos( 3, : ) = [ 'ccss' ]; dt_combos( 4, : ) = [ 'zzdd' ]; dt_combos( 5, : ) = [ 'cscs' ]; dt_combos( 6, : ) = [ 'zdzd' ]; end n_combos = size(dt_combos,1); filetemp_blis = '../output_%s_%sgemm_asm_blis.m'; filetemp_open = '../output_%s_%sgemm_openblas.m'; % Construct filenames for the "reference" (single real) data, then load % the data files, and finally save the results to different variable names. file_blis_sref = sprintf( filetemp_blis, thr_str, 'ssss' ); file_open_sref = sprintf( filetemp_open, thr_str, 'ssss' ); run( file_blis_sref ) run( file_open_sref ) data_gemm_asm_blis_sref( :, : ) = data_gemm_asm_blis( :, : ); data_gemm_openblas_sref( :, : ) = data_gemm_openblas( :, : ); % Construct filenames for the "reference" (double real) data, then load % the data files, and finally save the results to different variable names. file_blis_dref = sprintf( filetemp_blis, thr_str, 'dddd' ); file_open_dref = sprintf( filetemp_open, thr_str, 'dddd' ); run( file_blis_dref ) run( file_open_dref ) data_gemm_asm_blis_dref( :, : ) = data_gemm_asm_blis( :, : ); data_gemm_openblas_dref( :, : ) = data_gemm_openblas( :, : ); % Construct filenames for the "reference" (single complex) data, then load % the data files, and finally save the results to different variable names. file_blis_cref = sprintf( filetemp_blis, thr_str, 'cccs' ); file_open_cref = sprintf( filetemp_open, thr_str, 'cccs' ); run( file_blis_cref ) run( file_open_cref ) data_gemm_asm_blis_cref( :, : ) = data_gemm_asm_blis( :, : ); data_gemm_openblas_cref( :, : ) = data_gemm_openblas( :, : ); % Construct filenames for the "reference" (double complex) data, then load % the data files, and finally save the results to different variable names. file_blis_zref = sprintf( filetemp_blis, thr_str, 'zzzd' ); file_open_zref = sprintf( filetemp_open, thr_str, 'zzzd' ); run( file_blis_zref ) run( file_open_zref ) data_gemm_asm_blis_zref( :, : ) = data_gemm_asm_blis( :, : ); data_gemm_openblas_zref( :, : ) = data_gemm_openblas( :, : ); %fig = figure; fig = figure('Position', [100, 100, 1024, 1300]); orient( fig, 'portrait' ); %set(gcf,'Position',[0 0 2000 900]); set(gcf,'PaperUnits', 'inches'); %set(gcf,'PaperSize', [16 12.4]); %set(gcf,'PaperPosition', [0 0 16 12.4]); set(gcf,'PaperSize', [9 11.0]); set(gcf,'PaperPosition', [0 0 9 11.0]); %set(gcf,'PaperPositionMode','auto'); set(gcf,'PaperPositionMode','manual'); set(gcf,'PaperOrientation','portrait'); for dti = 1:n_combos %for dti = 1:1 % Grab the current datatype combination. combo = dt_combos( dti, : ); str = sprintf( 'Plotting %d: %s', dti, combo ); disp(str); if combo(4) == 's' data_gemm_asm_blis_ref( :, : ) = data_gemm_asm_blis_sref( :, : ); data_gemm_openblas_ref( :, : ) = data_gemm_openblas_sref( :, : ); refch = 's'; else %if combo(4) == 'd' data_gemm_asm_blis_ref( :, : ) = data_gemm_asm_blis_dref( :, : ); data_gemm_openblas_ref( :, : ) = data_gemm_openblas_dref( :, : ); refch = 'd'; end if ( combo(1) == 'c' || combo(1) == 'z' ) && ... ( combo(2) == 'c' || combo(2) == 'z' ) && ... ( combo(3) == 'c' || combo(3) == 'z' ) if combo(4) == 's' data_gemm_asm_blis_ref( :, : ) = data_gemm_asm_blis_cref( :, : ); data_gemm_openblas_ref( :, : ) = data_gemm_openblas_cref( :, : ); refch = 'c'; else %if combo(4) == 'd' data_gemm_asm_blis_ref( :, : ) = data_gemm_asm_blis_zref( :, : ); data_gemm_openblas_ref( :, : ) = data_gemm_openblas_zref( :, : ); refch = 'z'; end end % Construct filenames for the data files from templates. file_blis = sprintf( filetemp_blis, thr_str, combo ); file_open = sprintf( filetemp_open, thr_str, combo ); % Load the data files. %str = sprintf( ' Loading %s', file_blis ); disp(str); run( file_blis ) %str = sprintf( ' Loading %s', file_open ); disp(str); run( file_open ) % Plot the result. plot_gemm_perf( combo, ... data_gemm_asm_blis, ... data_gemm_asm_blis_ref, ... data_gemm_openblas, ... data_gemm_openblas_ref, ... is_mt, refch, 3, 2, dti ); end %if 0 %set(gcf,'Position',[0 0 2000 900]); %set(gcf,'PaperUnits', 'inches'); %set(gcf,'PaperSize', [48 22]); %set(gcf,'PaperPosition', [0 0 48 22]); %%set(gcf,'PaperPositionMode','auto'); %set(gcf,'PaperPositionMode','manual'); %set(gcf,'PaperOrientation','landscape'); %end outfile = sprintf( 'output/gemm_select_%c', dom ); print(gcf, outfile,'-bestfit','-dpdf'); %print(gcf, 'gemm_md','-fillpage','-dpdf'); blis-0.6.1/test/mixeddt/matlab/plot_gemm_perf.m000066400000000000000000000107731360743507500215230ustar00rootroot00000000000000function r_val = plot_gemm_perf( dt_str, ... data_ref, ... data_intern, ... data_ad_hoc, ... refch, ... nth, ... rows, cols, ... cfreq, ... dfps, ... theid ) if 1 ax1 = subplot( rows, cols, theid ); hold( ax1, 'on' ); end % Set line properties. color_ref = 'b'; lines_ref = ':'; markr_ref = ''; color_intern = 'b'; lines_intern = '-'; markr_intern = ''; color_ad_hoc = 'k'; lines_ad_hoc = '-.'; markr_ad_hoc = ''; % Compute the peak performance in terms of the number of double flops % executable per cycle and the clock rate. if dt_str(4) == 's' flopspercycle = dfps * 2; else flopspercycle = dfps; end max_perf_core = (flopspercycle * cfreq) * 1; % Print the title to a string. titlename = '%s'; titlename = sprintf( titlename, dt_str ); % Set the legend strings. if refch == 's' ref_legend = sprintf( 'Ref (sgemm)' ); elseif refch == 'd' ref_legend = sprintf( 'Ref (dgemm)' ); elseif refch == 'c' ref_legend = sprintf( 'Ref (cgemm)' ); elseif refch == 'z' ref_legend = sprintf( 'Ref (zgemm)' ); end intern_legend = sprintf( 'Internal' ); ad_hoc_legend = sprintf( 'Ad-hoc' ); % Set axes range values. y_scale = 1.00; x_begin = 0; x_end = data_ref( size( data_ref, 1 ), 1 ); y_begin = 0; y_end = max_perf_core * y_scale; % Set axes names. xaxisname = ' m = n = k'; if nth == 1 yaxisname = 'GFLOPS'; else yaxisname = 'GFLOPS/core'; end flopscol = 4; msize = 5; if 1 fontsize = 13; else fontsize = 16; end linesize = 0.5; legend_loc = 'southeast'; % -------------------------------------------------------------------- x_axis( :, 1 ) = data_intern( :, 1 ); data_peak( 1, 1:2 ) = [ 0 max_perf_core ]; data_peak( 2, 1:2 ) = [ x_end max_perf_core ]; ref = line( x_axis( :, 1 ), data_ref( :, flopscol ) / nth, ... 'Color',color_ref, 'LineStyle',lines_ref, ... 'LineWidth',linesize ); intern = line( x_axis( :, 1 ), data_intern( :, flopscol ) / nth, ... 'Color',color_intern, 'LineStyle',lines_intern, ... 'LineWidth',linesize ); ad_hoc = line( x_axis( :, 1 ), data_ad_hoc( :, flopscol ) / nth, ... 'Color',color_ad_hoc, 'LineStyle',lines_ad_hoc, ... 'LineWidth',linesize ); xlim( ax1, [x_begin x_end] ); ylim( ax1, [y_begin y_end] ); if rows == 8 && cols == 16 refs_legend = sprintf( 'Ref [sc]gemm' ); refd_legend = sprintf( 'Ref [dz]gemm' ); if theid == 1 leg = legend( ... [ ... ref ... intern ... ad_hoc ... ], ... refs_legend, ... intern_legend, ... ad_hoc_legend, ... 'Location', 'best' ); %'Location', legend_loc ); set( leg,'Box','off' ); set( leg,'Color','none' ); set( leg,'FontSize',fontsize-2 ); set( leg,'Units','inches' ); elseif theid == 9 leg = legend( ... [ ... ref ... intern ... ad_hoc ... ], ... refd_legend, ... intern_legend, ... ad_hoc_legend, ... 'Location', 'best' ); %'Location', legend_loc ); set( leg,'Box','off' ); set( leg,'Color','none' ); set( leg,'FontSize',fontsize-2 ); set( leg,'Units','inches' ); end elseif rows == 4 && cols == 4 if theid == 2 || theid == 4 leg = legend( ... [ ... ref ... intern ... ad_hoc ... ], ... ref_legend, ... intern_legend, ... ad_hoc_legend, ... 'Location', legend_loc ); %'Location', 'best' ); set( leg,'Box','off' ); set( leg,'Color','none' ); set( leg,'FontSize',fontsize-2 ); set( leg,'Units','inches' ); if theid == 2 set( leg,'Position',[2.31 3.52 0.7 0.3 ] ); elseif theid == 4 set( leg,'Position',[4.80 3.52 0.7 0.3 ] ); end %set( leg,'Position',[1.03 3.46 0.7 0.3 ] ); end end set( ax1,'FontSize',fontsize ); set( ax1,'TitleFontSizeMultiplier',1.0 ); % default is 1.1. box( ax1, 'on' ); titl = title( titlename ); set( titl, 'FontWeight', 'normal' ); % default font style is now 'bold'. tpos = get( titl, 'Position' ); % default is to align across whole figure, not box. %tpos(1) = tpos(1) + 100; tpos(1) = tpos(1) + 40; set( titl, 'Position', tpos ); % here we nudge it back to centered with box. if theid > (rows-1)*cols xlab = xlabel( ax1,xaxisname ); %tpos = get( xlab, 'Position' ) %tpos(2) = tpos(2) + 10; %set( xlab, 'Position', tpos ); end if mod(theid-1,cols) == 0 ylab = ylabel( ax1,yaxisname ); end r_val = 0; end blis-0.6.1/test/mixeddt/matlab/prec_dom_to_dt.m000066400000000000000000000002641360743507500214770ustar00rootroot00000000000000function r_val = prec_dom_to_dt( pc, dc ) if dc == 'r' if pc == 's' r_val = 's'; else r_val = 'd'; end else if pc == 's' r_val = 'c'; else r_val = 'z'; end end end blis-0.6.1/test/mixeddt/matlab/testrand.m000066400000000000000000000017201360743507500203400ustar00rootroot00000000000000fig1 = figure(1); clf; %orient(fig1,'landscape') orient(gcf,'landscape') for i = 1:128 subplot(8,16,i); xx = 400:400:2000; aa = rand(size(xx)); plot(xx,aa); end % broken. if 0 set(gcf, 'PaperUnits', 'inches'); set(gcf, 'PaperSize', [60 36]); set(fig1,'PaperUnits','normalized'); set(fig1,'PaperPosition', [0 0 1 1]); print(fig1, 'testrand', '-dpdf'); end if 0 % works okay. set(gcf,'PaperUnits', 'inches'); set(gcf,'PaperSize', [72 36]); set(gcf,'PaperPositionMode','auto'); set(gcf,'PaperOrientation','landscape'); set(gcf,'Position',[50 50 4000 1800]); print(gcf, 'testrand','-bestfit','-dpdf'); end if 1 % works better? set(gcf,'Position',[0 0 2000 900]); set(gcf,'PaperUnits', 'inches'); set(gcf,'PaperSize', [48 22]); set(gcf,'PaperPosition', [0 0 48 22]); %set(gcf,'PaperPositionMode','auto'); set(gcf,'PaperPositionMode','manual'); set(gcf,'PaperOrientation','landscape'); print(gcf, 'testrand','-bestfit','-dpdf'); end blis-0.6.1/test/mixeddt/matlab/wawoxmem/000077500000000000000000000000001360743507500202025ustar00rootroot00000000000000blis-0.6.1/test/mixeddt/matlab/wawoxmem/dt_to_dom.m000066400000000000000000000002241360743507500223260ustar00rootroot00000000000000function r_val = dt_to_dom( dt ) dom = 'rrr'; for ch = 1:3 if dt(ch) == 'c' || dt(ch) == 'z' dom(ch) = 'c'; end end r_val = dom; end blis-0.6.1/test/mixeddt/matlab/wawoxmem/gen_prec_combos.m000066400000000000000000000041171360743507500235070ustar00rootroot00000000000000function r_val = gen_prec_combos( mdcase ) dt_chars = [ 's' 'd' 'c' 'z' ]; pr_chars = [ 's' 'd' ]; dm_chars = [ 'r' 'c' ]; dmc = mdcase( 1 ); dma = mdcase( 2 ); dmb = mdcase( 3 ); if 0 pr_combos( 1, : ) = 'ssss'; pr_combos( 2, : ) = 'ssds'; pr_combos( 3, : ) = 'sdss'; pr_combos( 4, : ) = 'sdds'; pr_combos( 5, : ) = 'dsss'; pr_combos( 6, : ) = 'dsds'; pr_combos( 7, : ) = 'ddss'; pr_combos( 8, : ) = 'ddds'; pr_combos( 9, : ) = 'dddd'; pr_combos( 10, : ) = 'ddsd'; pr_combos( 11, : ) = 'dsdd'; pr_combos( 12, : ) = 'dssd'; pr_combos( 13, : ) = 'sddd'; pr_combos( 14, : ) = 'sdsd'; pr_combos( 15, : ) = 'ssdd'; pr_combos( 16, : ) = 'sssd'; end pr_combos( 1, : ) = 'ssss'; pr_combos( 2, : ) = 'ssds'; pr_combos( 3, : ) = 'dddd'; pr_combos( 4, : ) = 'ddsd'; pr_combos( 5, : ) = 'sdss'; pr_combos( 6, : ) = 'sdds'; pr_combos( 7, : ) = 'dsdd'; pr_combos( 8, : ) = 'dssd'; pr_combos( 9, : ) = 'dsss'; pr_combos( 10, : ) = 'dsds'; pr_combos( 11, : ) = 'sddd'; pr_combos( 12, : ) = 'sdsd'; pr_combos( 13, : ) = 'ddss'; pr_combos( 14, : ) = 'ddds'; pr_combos( 15, : ) = 'ssdd'; pr_combos( 16, : ) = 'sssd'; for i = 1:16 pr_combo = pr_combos( i, : ); %str = sprintf( '%s', pr_combo ); disp(str); prc = pr_combo( 1 ); pra = pr_combo( 2 ); prb = pr_combo( 3 ); pr = pr_combo( 4 ); dtc = prec_dom_to_dt( prc, dmc ); dta = prec_dom_to_dt( pra, dma ); dtb = prec_dom_to_dt( prb, dmb ); dt_combos( i, : ) = sprintf( '%c%c%c%c', dtc, dta, dtb, pr ); end %if 0 %i = 1; %pr = 's'; %for prc = pr_chars % for pra = pr_chars % for prb = pr_chars % dtc = prec_dom_to_dt( prc, dmc ); % dta = prec_dom_to_dt( pra, dma ); % dtb = prec_dom_to_dt( prb, dmb ); % dt_combos( i, : ) = sprintf( '%c%c%c%c', dtc, dta, dtb, pr ); % i = i + 1; % end % end %end % %pr = 'd'; %for prc = flip( pr_chars ) % for pra = flip( pr_chars ) % for prb = flip( pr_chars ) % dtc = prec_dom_to_dt( prc, dmc ); % dta = prec_dom_to_dt( pra, dma ); % dtb = prec_dom_to_dt( prb, dmb ); % dt_combos( i, : ) = sprintf( '%c%c%c%c', dtc, dta, dtb, pr ); % i = i + 1; % end % end %end %end r_val = dt_combos; end blis-0.6.1/test/mixeddt/matlab/wawoxmem/plot_dom_all.m000066400000000000000000000015441360743507500230310ustar00rootroot00000000000000function r_val = plot_dom_all( cfreq, ... dflopspercycle, ... nth, ... dirpath_w, ... dirpath_wo, ... dirpath_out, ... arch_str ) cases( 1, : ) = [ 'rrr' ]; cases( 2, : ) = [ 'rrc' ]; cases( 3, : ) = [ 'rcr' ]; cases( 4, : ) = [ 'rcc' ]; cases( 5, : ) = [ 'crr' ]; cases( 6, : ) = [ 'crc' ]; cases( 7, : ) = [ 'ccr' ]; cases( 8, : ) = [ 'ccc' ]; n_cases = size(cases,1); for i = 1:n_cases thecase = cases( i, : ); plot_dom_case( thecase, ... cfreq, ... dflopspercycle, ... nth, ... dirpath_w, ... dirpath_wo, ... dirpath_out, ... arch_str ); end r_val = 0; end blis-0.6.1/test/mixeddt/matlab/wawoxmem/plot_dom_case.m000066400000000000000000000113011360743507500231640ustar00rootroot00000000000000function r_val = plot_dom_case( mdcase, ... cfreq, ... dflopspercycle, ... nth, ... dirpath_w, ... dirpath_wo, ... dirpath_out, ... arch_str ) % Create filename "templates" for the files that contain the performance % results. filetemp_intern = '%s/output_%s_%sgemm_intern.m'; filetemp_ad_hoc = '%s/output_%s_%sgemm_ad_hoc.m'; if nth == 1 thr_str = 'st'; else thr_str = 'mt'; end if 1 dt_combos = gen_prec_combos( mdcase ); else dt_combos( 1, : ) = [ 'ssss' ]; dt_combos( 2, : ) = [ 'sssd' ]; dt_combos( 3, : ) = [ 'ssds' ]; dt_combos( 4, : ) = [ 'sdss' ]; dt_combos( 5, : ) = [ 'dsss' ]; dt_combos( 6, : ) = [ 'ddds' ]; dt_combos( 7, : ) = [ 'dddd' ]; end n_combos = size(dt_combos,1); % Construct filenames for the "reference" (single real) data, then load % the data files, and finally save the results to different variable names. file_blis_sref = sprintf( filetemp_intern, dirpath_w, thr_str, 'ssss' ); run( file_blis_sref ) data_gemm_intern_sref( :, : ) = data_gemm_intern( :, : ); % Construct filenames for the "reference" (double real) data, then load % the data files, and finally save the results to different variable names. file_blis_dref = sprintf( filetemp_intern, dirpath_w, thr_str, 'dddd' ); run( file_blis_dref ) data_gemm_intern_dref( :, : ) = data_gemm_intern( :, : ); % Construct filenames for the "reference" (single complex) data, then load % the data files, and finally save the results to different variable names. file_blis_cref = sprintf( filetemp_intern, dirpath_w, thr_str, 'cccs' ); run( file_blis_cref ) data_gemm_intern_cref( :, : ) = data_gemm_intern( :, : ); % Construct filenames for the "reference" (double complex) data, then load % the data files, and finally save the results to different variable names. file_blis_zref = sprintf( filetemp_intern, dirpath_w, thr_str, 'zzzd' ); run( file_blis_zref ) data_gemm_intern_zref( :, : ) = data_gemm_intern( :, : ); fig = figure; orient( fig, 'portrait' ); %set(gcf,'Position',[0 0 2000 900]); set(gcf,'PaperUnits', 'inches'); %set(gcf,'PaperSize', [16 12.4]); %set(gcf,'PaperPosition', [0 0 16 12.4]); set(gcf,'PaperSize', [14 11.0]); set(gcf,'PaperPosition', [0 0 14 11.0]); %set(gcf,'PaperPositionMode','auto'); set(gcf,'PaperPositionMode','manual'); set(gcf,'PaperOrientation','portrait'); fprintf( 'Plotting... ' ); for dti = 1:n_combos %for dti = 1:1 % Grab the current datatype combination. combo = dt_combos( dti, : ); %str = sprintf( 'Plotting %d: %s', dti, combo ); disp(str); fprintf( '%d (%s) ', dti, combo ); if combo(4) == 's' data_gemm_ref( :, : ) = data_gemm_intern_sref( :, : ); refch = 's'; else %if combo(4) == 'd' data_gemm_ref( :, : ) = data_gemm_intern_dref( :, : ); refch = 'd'; end if ( combo(1) == 'c' || combo(1) == 'z' ) && ... ( combo(2) == 'c' || combo(2) == 'z' ) && ... ( combo(3) == 'c' || combo(3) == 'z' ) if combo(4) == 's' data_gemm_ref( :, : ) = data_gemm_intern_cref( :, : ); refch = 'c'; else %if combo(4) == 'd' data_gemm_ref( :, : ) = data_gemm_intern_zref( :, : ); refch = 'z'; end end % Construct filenames for the data files from templates. file_intern_w = sprintf( filetemp_intern, dirpath_w, thr_str, combo ); file_intern_wo = sprintf( filetemp_intern, dirpath_wo, thr_str, combo ); file_ad_hoc = sprintf( filetemp_ad_hoc, dirpath_w, thr_str, combo ); % Load the data files. %str = sprintf( ' Loading %s', file_intern_w ); disp(str); run( file_intern_w ) data_gemm_intern_w( :, : ) = data_gemm_intern( :, : ); %str = sprintf( ' Loading %s', file_intern_wo ); disp(str); run( file_intern_wo ) data_gemm_intern_wo( :, : ) = data_gemm_intern( :, : ); %str = sprintf( ' Loading %s', file_ad_hoc ); disp(str); run( file_ad_hoc ) % Plot the result. plot_gemm_perf( combo, ... data_gemm_ref, ... data_gemm_intern_w, ... data_gemm_intern_wo, ... data_gemm_ad_hoc, ... refch, ... nth, ... 4, 4, ... cfreq, ... dflopspercycle, ... dti ); end fprintf( '\n' ); %if 0 %set(gcf,'Position',[0 0 2000 900]); %set(gcf,'PaperUnits', 'inches'); %set(gcf,'PaperSize', [48 22]); %set(gcf,'PaperPosition', [0 0 48 22]); %%set(gcf,'PaperPositionMode','auto'); %set(gcf,'PaperPositionMode','manual'); %set(gcf,'PaperOrientation','landscape'); %end outfile = sprintf( '%s/gemm_%s_%s', dirpath_out, mdcase, arch_str ); print(gcf, outfile,'-bestfit','-dpdf'); %print(gcf, 'gemm_md','-fillpage','-dpdf'); blis-0.6.1/test/mixeddt/matlab/wawoxmem/plot_dt_select.m000066400000000000000000000114711360743507500233700ustar00rootroot00000000000000function r_val = plot_dt_select( cfreq, ... dflopspercycle, ... nth, ... dirpath_w, ... dirpath_wo, ... dirpath_out, ... arch_str ) % Create filename "templates" for the files that contain the performance % results. filetemp_intern = '%s/output_%s_%sgemm_intern.m'; filetemp_ad_hoc = '%s/output_%s_%sgemm_ad_hoc.m'; if nth == 1 thr_str = 'st'; else thr_str = 'mt'; end dt_combos( 1, : ) = [ 'sdds' ]; dt_combos( 2, : ) = [ 'ccss' ]; dt_combos( 3, : ) = [ 'dssd' ]; dt_combos( 4, : ) = [ 'zzdd' ]; dt_combos( 5, : ) = [ 'ddds' ]; dt_combos( 6, : ) = [ 'cscs' ]; dt_combos( 7, : ) = [ 'sssd' ]; dt_combos( 8, : ) = [ 'zdzd' ]; dt_combos( 9, : ) = [ 'dsss' ]; dt_combos( 10, : ) = [ 'csss' ]; dt_combos( 11, : ) = [ 'sddd' ]; dt_combos( 12, : ) = [ 'zddd' ]; n_combos = size(dt_combos,1); % Construct filenames for the "reference" (single real) data, then load % the data files, and finally save the results to different variable names. file_blis_sref = sprintf( filetemp_intern, dirpath_w, thr_str, 'ssss' ); run( file_blis_sref ) data_gemm_intern_sref( :, : ) = data_gemm_intern( :, : ); % Construct filenames for the "reference" (double real) data, then load % the data files, and finally save the results to different variable names. file_blis_dref = sprintf( filetemp_intern, dirpath_w, thr_str, 'dddd' ); run( file_blis_dref ) data_gemm_intern_dref( :, : ) = data_gemm_intern( :, : ); % Construct filenames for the "reference" (single complex) data, then load % the data files, and finally save the results to different variable names. file_blis_cref = sprintf( filetemp_intern, dirpath_w, thr_str, 'cccs' ); run( file_blis_cref ) data_gemm_intern_cref( :, : ) = data_gemm_intern( :, : ); % Construct filenames for the "reference" (double complex) data, then load % the data files, and finally save the results to different variable names. file_blis_zref = sprintf( filetemp_intern, dirpath_w, thr_str, 'zzzd' ); run( file_blis_zref ) data_gemm_intern_zref( :, : ) = data_gemm_intern( :, : ); %fig = figure; fig = figure('Position', [100, 100, 1000, 600]); orient( fig, 'portrait' ); %set(gcf,'Position',[0 0 2000 900]); set(gcf,'PaperUnits', 'inches'); %set(gcf,'PaperSize', [16 12.4]); %set(gcf,'PaperPosition', [0 0 16 12.4]); set(gcf,'PaperSize', [14 9.5]); set(gcf,'PaperPosition', [0 0 14 9.5]); %set(gcf,'PaperPositionMode','auto'); set(gcf,'PaperPositionMode','manual'); set(gcf,'PaperOrientation','portrait'); fprintf( 'Plotting... ' ); for dti = 1:n_combos %for dti = 1:1 % Grab the current datatype combination. combo = dt_combos( dti, : ); %str = sprintf( 'Plotting %d: %s', dti, combo ); disp(str); fprintf( '%d (%s) ', dti, combo ); if combo(4) == 's' data_gemm_ref( :, : ) = data_gemm_intern_sref( :, : ); refch = 's'; else %if combo(4) == 'd' data_gemm_ref( :, : ) = data_gemm_intern_dref( :, : ); refch = 'd'; end if ( combo(1) == 'c' || combo(1) == 'z' ) && ... ( combo(2) == 'c' || combo(2) == 'z' ) && ... ( combo(3) == 'c' || combo(3) == 'z' ) if combo(4) == 's' data_gemm_ref( :, : ) = data_gemm_intern_cref( :, : ); refch = 'c'; else %if combo(4) == 'd' data_gemm_ref( :, : ) = data_gemm_intern_zref( :, : ); refch = 'z'; end end % Construct filenames for the data files from templates. file_intern_w = sprintf( filetemp_intern, dirpath_w, thr_str, combo ); file_intern_wo = sprintf( filetemp_intern, dirpath_wo, thr_str, combo ); file_ad_hoc = sprintf( filetemp_ad_hoc, dirpath_w, thr_str, combo ); % Load the data files. %str = sprintf( ' Loading %s', file_intern_w ); disp(str); run( file_intern_w ) data_gemm_intern_w( :, : ) = data_gemm_intern( :, : ); %str = sprintf( ' Loading %s', file_intern_wo ); disp(str); run( file_intern_wo ) data_gemm_intern_wo( :, : ) = data_gemm_intern( :, : ); %str = sprintf( ' Loading %s', file_ad_hoc ); disp(str); run( file_ad_hoc ) % Plot the result. plot_gemm_perf( combo, ... data_gemm_ref, ... data_gemm_intern_w, ... data_gemm_intern_wo, ... data_gemm_ad_hoc, ... refch, ... nth, ... 3, 4, ... cfreq, ... dflopspercycle, ... dti ); end fprintf( '\n' ); %if 0 %set(gcf,'Position',[0 0 2000 900]); %set(gcf,'PaperUnits', 'inches'); %set(gcf,'PaperSize', [48 22]); %set(gcf,'PaperPosition', [0 0 48 22]); %%set(gcf,'PaperPositionMode','auto'); %set(gcf,'PaperPositionMode','manual'); %set(gcf,'PaperOrientation','landscape'); %end outfile = sprintf( '%s/gemm_select_%s', dirpath_out, arch_str ); print(gcf, outfile,'-bestfit','-dpdf'); %print(gcf, 'gemm_md','-fillpage','-dpdf'); blis-0.6.1/test/mixeddt/matlab/wawoxmem/plot_gemm_perf.m000066400000000000000000000122741360743507500233650ustar00rootroot00000000000000function r_val = plot_gemm_perf( dt_str, ... data_ref, ... data_intern_w, ... data_intern_wo, ... data_ad_hoc, ... refch, ... nth, ... rows, cols, ... cfreq, ... dfps, ... theid ) if 1 ax1 = subplot( rows, cols, theid ); hold( ax1, 'on' ); end % Set line properties. color_ref = 'b'; lines_ref = ':'; markr_ref = ''; color_intern_w = 'r'; lines_intern_w = '-'; markr_intern_w = ''; color_intern_wo = 'b'; lines_intern_wo = '--'; markr_intern_wo = '.'; color_ad_hoc = 'k'; lines_ad_hoc = '-.'; markr_ad_hoc = ''; % Compute the peak performance in terms of the number of double flops % executable per cycle and the clock rate. if dt_str(4) == 's' flopspercycle = dfps * 2; else flopspercycle = dfps; end max_perf_core = (flopspercycle * cfreq) * 1; % Print the title to a string. %titlename = '%sgemm'; titlename = '%s'; titlename = sprintf( titlename, dt_str ); % Set the legend strings. if refch == 's' ref_legend = sprintf( 'Ref (sgemm)' ); elseif refch == 'd' ref_legend = sprintf( 'Ref (dgemm)' ); elseif refch == 'c' ref_legend = sprintf( 'Ref (cgemm)' ); elseif refch == 'z' ref_legend = sprintf( 'Ref (zgemm)' ); end internw_legend = sprintf( 'Intern (+xm)' ); internwo_legend = sprintf( 'Intern (-xm)' ); ad_hoc_legend = sprintf( 'Ad-hoc' ); % Set axes range values. y_scale = 1.00; x_begin = 0; x_end = data_ref( size( data_ref, 1 ), 1 ); y_begin = 0; y_end = max_perf_core * y_scale; % Set axes names. xaxisname = ' m = n = k'; if nth == 1 yaxisname = 'GFLOPS'; else yaxisname = 'GFLOPS/core'; end flopscol = 4; msize = 5; if 1 fontsize = 13; else fontsize = 16; end linesize = 0.5; legend_loc = 'southeast'; % -------------------------------------------------------------------- x_axis( :, 1 ) = data_intern_w( :, 1 ); data_peak( 1, 1:2 ) = [ 0 max_perf_core ]; data_peak( 2, 1:2 ) = [ x_end max_perf_core ]; ref = line( x_axis( :, 1 ), data_ref( :, flopscol ) / nth, ... 'Color',color_ref, 'LineStyle',lines_ref, ... 'LineWidth',linesize ); if ( uses_xmem( dt_str ) ) intern_w = line( x_axis( :, 1 ), data_intern_w( :, flopscol ) / nth, ... 'Color',color_intern_w, 'LineStyle',lines_intern_w, ... 'LineWidth',linesize ); else %set( intern_w, 'visible', 'off' ); intern_w = line( nan, nan, ... 'Color',color_intern_w, 'LineStyle',lines_intern_w, ... 'LineWidth',linesize ); end intern_wo = line( x_axis( :, 1 ), data_intern_wo( :, flopscol ) / nth, ... 'Color',color_intern_wo, 'LineStyle',lines_intern_wo, ... 'LineWidth',linesize ); ad_hoc = line( x_axis( :, 1 ), data_ad_hoc( :, flopscol ) / nth, ... 'Color',color_ad_hoc, 'LineStyle',lines_ad_hoc, ... 'LineWidth',linesize ); xlim( ax1, [x_begin x_end] ); ylim( ax1, [y_begin y_end] ); if x_end == 6000 x_tick2 = x_end - 1000; x_tick1 = x_tick2/2; xticks( ax1, [ x_tick1 x_tick2 ] ); elseif x_end == 2000 x_tick2 = x_end - 400; x_tick1 = x_tick2/2; xticks( ax1, [ x_tick1 x_tick2 ] ); end % full domain case if rows == 4 && cols == 4 if theid == 2 || theid == 4 leg = legend( ... [ ... ref ... intern_w ... intern_wo ... ad_hoc ... ], ... ref_legend, ... internw_legend, ... internwo_legend, ... ad_hoc_legend, ... 'Location', legend_loc ); %'Location', 'best' ); set( leg,'Box','off' ); set( leg,'Color','none' ); set( leg,'FontSize',fontsize-2 ); set( leg,'Units','inches' ); if theid == 2 set( leg,'Position',[2.31 3.52 0.7 0.3 ] ); elseif theid == 4 set( leg,'Position',[4.80 3.52 0.7 0.3 ] ); end end % select graphs elseif rows == 3 && cols == 4 if theid == 2 || theid == 4 leg = legend( ... [ ... ref ... intern_w ... intern_wo ... ad_hoc ... ], ... ref_legend, ... internw_legend, ... internwo_legend, ... ad_hoc_legend, ... 'Location', legend_loc ); %'Location', 'best' ); set( leg,'Box','off' ); set( leg,'Color','none' ); set( leg,'FontSize',fontsize-2 ); set( leg,'Units','inches' ); if theid == 2 set( leg,'Position',[4.38 4.78 0.7 0.3 ] ); elseif theid == 4 set( leg,'Position',[8.82 4.78 0.7 0.3 ] ); end end end set( ax1,'FontSize',fontsize ); set( ax1,'TitleFontSizeMultiplier',1.0 ); % default is 1.1. box( ax1, 'on' ); titl = title( titlename ); set( titl, 'FontWeight', 'normal' ); % default font style is now 'bold'. tpos = get( titl, 'Position' ); % default is to align across whole figure, not box. %tpos(1) = tpos(1) + 100; tpos(1) = tpos(1) + 40; set( titl, 'Position', tpos ); % here we nudge it back to centered with box. if theid > (rows-1)*cols xlab = xlabel( ax1,xaxisname ); %tpos = get( xlab, 'Position' ) %tpos(2) = tpos(2) + 10; %set( xlab, 'Position', tpos ); end if mod(theid-1,cols) == 0 ylab = ylabel( ax1,yaxisname ); end r_val = 0; end blis-0.6.1/test/mixeddt/matlab/wawoxmem/prec_dom_to_dt.m000066400000000000000000000003011360743507500233330ustar00rootroot00000000000000function r_val = prec_dom_to_dt( pc, dc ) if dc == 'r' if pc == 's' r_val = 's'; else r_val = 'd'; end else if pc == 's' r_val = 'c'; else r_val = 'z'; end end end blis-0.6.1/test/mixeddt/matlab/wawoxmem/uses_xmem.m000066400000000000000000000006231360743507500223660ustar00rootroot00000000000000function r_val = uses_xmem( dt_str ) a = dt_str(1); b = dt_str(4); a_prec = 'd'; b_prec = 'd'; if ( a == 's' || a == 'c' ) a_prec = 's'; end if ( b == 's' || b == 'c' ) b_prec = 's'; end dom_str = dt_to_dom( dt_str ); r_val = 0; if ( a_prec ~= b_prec ) r_val = 1; elseif ( strcmp( dom_str, 'crr' ) ) r_val = 1; elseif ( strcmp( dom_str, 'crc' ) ) r_val = 1; end end blis-0.6.1/test/mixeddt/runme.sh000077500000000000000000000107231360743507500165660ustar00rootroot00000000000000#!/bin/bash # File pefixes. exec_root="test" out_root="output" sys="blis" #sys="stampede2" #sys="lonestar5" #sys="ul252" sys="tx2" # Bind threads to processors. #export OMP_PROC_BIND=true #export GOMP_CPU_AFFINITY="0 2 4 6 8 10 12 14 1 3 5 7 9 11 13 15" #export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7" #export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7" #export GOMP_CPU_AFFINITY="0 2 4 6 1 3 5 7" #export GOMP_CPU_AFFINITY="0 4 1 5 2 6 3 7" #export GOMP_CPU_AFFINITY="0 1 4 5 8 9 12 13 16 17 20 21 24 25 28 29 32 33 36 37 40 41 44 45" #export GOMP_CPU_AFFINITY="0 2 4 6 8 10 12 14 16 18 20 22 1 3 5 7 9 11 13 15 17 19 21 23" #export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23" export GOMP_CPU_AFFINITY="0 1 2 3" # Modify LD_LIBRARY_PATH. if [ ${sys} = "blis" ]; then export LD_LIBRARY_PATH="$LD_LIBRARY_PATH" export GOMP_CPU_AFFINITY="0 1 2 3" jc_nt=1 # 5th loop ic_nt=4 # 3rd loop jr_nt=1 # 2nd loop ir_nt=1 # 1st loop nt=4 elif [ ${sys} = "stampede2" ]; then echo "Need to set GOMP_CPU_AFFINITY." exit 1 jc_nt=4 # 5th loop ic_nt=12 # 3rd loop jr_nt=1 # 2nd loop ir_nt=1 # 1st loop nt=48 elif [ ${sys} = "lonestar5" ]; then echo "Need to set GOMP_CPU_AFFINITY." exit 1 # A hack to use libiomp5 with gcc. export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/apps/intel/16.0.1.150/compilers_and_libraries_2016.1.150/linux/compiler/lib/intel64" jc_nt=2 # 5th loop ic_nt=12 # 3rd loop jr_nt=1 # 2nd loop ir_nt=1 # 1st loop nt=24 elif [ ${sys} = "ul252" ]; then export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/home/field/intel/mkl/lib/intel64" #export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103" export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51" #jc_nt=4 # 5th loop jc_nt=2 # 5th loop ic_nt=13 # 3rd loop jr_nt=1 # 2nd loop ir_nt=1 # 1st loop #nt=52 nt=26 elif [ ${sys} = "tx2" ]; then export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55" jc_nt=8 # 5th loop ic_nt=7 # 3rd loop jr_nt=1 # 2nd loop ir_nt=1 # 1st loop nt=56 fi # Save a copy of GOMP_CPU_AFFINITY so that if we have to unset it, we can # restore the value. GOMP_CPU_AFFINITYsave=${GOMP_CPU_AFFINITY} # Datatypes to test. #dts="s d c z" # Threadedness to test. threads="mt" #threads="st" # Implementations to test. test_impls="ad_hoc intern" # Operations to test. l3_ops="gemm" test_ops="${l3_ops}" # Define the list of datatype chars and precision chars. dt_chars="s d c z" pr_chars="s d" # Construct the datatype combination strings. dt_combos="" for dtc in ${dt_chars}; do for dta in ${dt_chars}; do for dtb in ${dt_chars}; do for pre in ${pr_chars}; do dt_combos="${dt_combos} ${dtc}${dta}${dtb}${pre}" done done done done # Overrides, in case something goes wrong for a subset of tests. #test_impls="ad_hoc" #dt_combos="ssss sssd ssds sdss dsss ddds dddd" # Now perform complex test cases. for th in ${threads}; do for dt in ${dt_combos}; do for im in ${test_impls}; do for op in ${test_ops}; do # Set the number of threads according to th. if [ ${th} = "mt" ]; then export BLIS_JC_NT=${jc_nt} export BLIS_IC_NT=${ic_nt} export BLIS_JR_NT=${jr_nt} export BLIS_IR_NT=${ir_nt} export OMP_NUM_THREADS=${nt} export OPENBLAS_NUM_THREADS=${nt} # Unset GOMP_CPU_AFFINITY for OpenBLAS. if [ ${im} = "openblas" ]; then unset GOMP_CPU_AFFINITY else export GOMP_CPU_AFFINITY=${GOMP_CPU_AFFINITYsave} fi else export BLIS_JC_NT=1 export BLIS_IC_NT=1 export BLIS_JR_NT=1 export BLIS_IR_NT=1 export OMP_NUM_THREADS=1 export OPENBLAS_NUM_THREADS=1 fi # Construct the name of the test executable. exec_name="${exec_root}_${dt}${op}_${im}_${th}.x" # Construct the name of the output file. out_file="${out_root}_${th}_${dt}${op}_${im}.m" echo "Running (nt = ${OMP_NUM_THREADS}) ./${exec_name} > ${out_file}" # Run executable. ./${exec_name} > ${out_file} #sleep 1 done done done done blis-0.6.1/test/mixeddt/test_gemm.c000066400000000000000000000374661360743507500172460ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name of The University of Texas nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "blis.h" void blas_gemm_md( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c ); void blas_gemm( trans_t transa, trans_t transb, num_t dt, obj_t* ao, obj_t* alpha, obj_t* bo, obj_t* beta, obj_t* co ); //#define PRINT int main( int argc, char** argv ) { obj_t a, b, c; obj_t c_save; obj_t* alphao; obj_t* betao; dim_t m, n, k; dim_t p; int r; double dtime; double dtime_save; double gflops; double flopsmul; //bli_init(); //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); int n_repeats = 3; num_t dta = DTA; num_t dtb = DTB; num_t dtc = DTC; num_t dtx = DTX; const bool_t a_real = bli_is_real( dta ); const bool_t b_real = bli_is_real( dtb ); const bool_t c_real = bli_is_real( dtc ); const bool_t a_complex = bli_is_complex( dta ); const bool_t b_complex = bli_is_complex( dtb ); const bool_t c_complex = bli_is_complex( dtc ); // Extract the precision component of the computation datatype. prec_t comp_prec = bli_dt_prec( dtx ); dim_t p_begin = P_BEGIN; dim_t p_max = P_MAX; dim_t p_inc = P_INC; int m_input = -1; int n_input = -1; int k_input = -1; #if 0 k_input = 256; #endif #if 0 char dta_ch, dtb_ch, dtc_ch, dtx_ch; // Choose the char corresponding to the requested datatype. if ( bli_is_float( dta ) ) dta_ch = 's'; else if ( bli_is_double( dta ) ) dta_ch = 'd'; else if ( bli_is_scomplex( dta ) ) dta_ch = 'c'; else dta_ch = 'z'; if ( bli_is_float( dtb ) ) dtb_ch = 's'; else if ( bli_is_double( dtb ) ) dtb_ch = 'd'; else if ( bli_is_scomplex( dtb ) ) dtb_ch = 'c'; else dtb_ch = 'z'; if ( bli_is_float( dtc ) ) dtc_ch = 's'; else if ( bli_is_double( dtc ) ) dtc_ch = 'd'; else if ( bli_is_scomplex( dtc ) ) dtc_ch = 'c'; else dtc_ch = 'z'; if ( bli_is_float( dtx ) ) dtx_ch = 's'; else dtx_ch = 'd'; ( void )dta_ch; ( void )dtb_ch; ( void )dtc_ch; ( void )dtx_ch; #endif trans_t transa = BLIS_NO_TRANSPOSE; trans_t transb = BLIS_NO_TRANSPOSE; // Begin with initializing the last entry to zero so that // matlab allocates space for the entire array once up-front. for ( p = p_begin; p + p_inc <= p_max; p += p_inc ) ; //printf( "data_%s_%c%c%c%cgemm_%s", THR_STR, dtc_ch, dta_ch, dtb_ch, dtx_ch, STR ); printf( "data_gemm_%s", STR ); printf( "( %2lu, 1:4 ) = [ %4lu %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin)/p_inc + 1, ( unsigned long )0, ( unsigned long )0, ( unsigned long )0, 0.0 ); // Adjust the flops scaling based on which domain case is being executed. if ( c_real && a_real && b_real ) flopsmul = 2.0; else if ( c_real && a_real && b_complex ) flopsmul = 2.0; else if ( c_real && a_complex && b_real ) flopsmul = 2.0; else if ( c_real && a_complex && b_complex ) flopsmul = 4.0; else if ( c_complex && a_real && b_real ) flopsmul = 2.0; else if ( c_complex && a_real && b_complex ) flopsmul = 4.0; else if ( c_complex && a_complex && b_real ) flopsmul = 4.0; else if ( c_complex && a_complex && b_complex ) flopsmul = 8.0; //for ( p = p_begin; p <= p_max; p += p_inc ) for ( p = p_max; p_begin <= p; p -= p_inc ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p * ( dim_t )abs(n_input); else n = ( dim_t ) n_input; if ( k_input < 0 ) k = p * ( dim_t )abs(k_input); else k = ( dim_t ) k_input; bli_obj_create( dta, m, k, 0, 0, &a ); bli_obj_create( dtb, k, n, 0, 0, &b ); bli_obj_create( dtc, m, n, 0, 0, &c ); bli_obj_create( dtc, m, n, 0, 0, &c_save ); bli_obj_set_comp_prec( comp_prec, &c ); alphao = &BLIS_ONE; betao = &BLIS_ONE; bli_randm( &a ); bli_randm( &b ); bli_randm( &c ); bli_obj_set_conjtrans( transa, &a ); bli_obj_set_conjtrans( transb, &b ); bli_copym( &c, &c_save ); dtime_save = DBL_MAX; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "b", &b, "%4.1f", "" ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS bli_gemm ( alphao, &a, &b, betao, &c ); #else blas_gemm_md ( alphao, &a, &b, betao, &c ); #endif #ifdef PRINT bli_printm( "c after", &c, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } gflops = ( flopsmul * m * k * n ) / ( dtime_save * 1.0e9 ); //printf( "data_%s_%c%c%c%cgemm_%s", THR_STR, dtc_ch, dta_ch, dtb_ch, dtx_ch, STR ); printf( "data_gemm_%s", STR ); printf( "( %2lu, 1:4 ) = [ %4lu %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin)/p_inc + 1, ( unsigned long )m, ( unsigned long )k, ( unsigned long )n, gflops ); bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); } //bli_finalize(); return 0; } void blas_gemm_md( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c ) { trans_t transa = bli_obj_conjtrans_status( a ); trans_t transb = bli_obj_conjtrans_status( b ); prec_t comp_prec = bli_obj_comp_prec( c ); if ( bli_obj_dt( a ) == bli_obj_dt( b ) && bli_obj_dt( b ) == bli_obj_dt( c ) && //bli_obj_dt( c ) == ( num_t )comp_prec ) bli_obj_prec( c ) == comp_prec ) { blas_gemm( transa, transb, bli_obj_dt( c ), alpha, a, b, beta, c ); return; } num_t dtc = bli_obj_dt( c ); num_t dta = bli_obj_dt( a ); num_t dtb = bli_obj_dt( b ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width_after_trans( a ); obj_t* ao = a; obj_t* bo = b; obj_t* co = c; num_t targ_dt_c, targ_dt_a, targ_dt_b; dom_t targ_dom_c, targ_dom_a, targ_dom_b; num_t dt_comp; dom_t comp_dom; obj_t at, bt, ct; obj_t ar, cr; bool_t needacc; bool_t force_proj_a = FALSE; bool_t force_proj_b = FALSE; if ( bli_is_real( dtc ) && bli_is_real( dta ) && bli_is_real( dtb ) ) { // rrr comp_dom = BLIS_REAL; targ_dom_c = BLIS_REAL; targ_dom_a = BLIS_REAL; targ_dom_b = BLIS_REAL; needacc = FALSE; } else if ( bli_is_real( dtc ) && bli_is_real( dta ) && bli_is_complex( dtb ) ) { // rrc comp_dom = BLIS_REAL; targ_dom_c = BLIS_REAL; targ_dom_a = BLIS_REAL; targ_dom_b = BLIS_REAL; needacc = FALSE; force_proj_b = TRUE; } else if ( bli_is_real( dtc ) && bli_is_complex( dta ) && bli_is_real( dtb ) ) { // rcr comp_dom = BLIS_REAL; targ_dom_c = BLIS_REAL; targ_dom_a = BLIS_REAL; targ_dom_b = BLIS_REAL; needacc = FALSE; force_proj_a = TRUE; } else if ( bli_is_real( dtc ) && bli_is_complex( dta ) && bli_is_complex( dtb ) ) { // rcc comp_dom = BLIS_COMPLEX; targ_dom_c = BLIS_COMPLEX; targ_dom_a = BLIS_COMPLEX; targ_dom_b = BLIS_COMPLEX; needacc = TRUE; } else if ( bli_is_complex( dtc ) && bli_is_real( dta ) && bli_is_real( dtb ) ) { // crr comp_dom = BLIS_REAL; targ_dom_c = BLIS_REAL; targ_dom_a = BLIS_REAL; targ_dom_b = BLIS_REAL; needacc = TRUE; } else if ( bli_is_complex( dtc ) && bli_is_real( dta ) && bli_is_complex( dtb ) ) { // crc comp_dom = BLIS_COMPLEX; targ_dom_c = BLIS_COMPLEX; targ_dom_a = BLIS_COMPLEX; targ_dom_b = BLIS_COMPLEX; needacc = FALSE; force_proj_a = TRUE; } else if ( bli_is_complex( dtc ) && bli_is_complex( dta ) && bli_is_real( dtb ) ) { // ccr comp_dom = BLIS_REAL; targ_dom_c = BLIS_COMPLEX; targ_dom_a = BLIS_COMPLEX; targ_dom_b = BLIS_REAL; needacc = FALSE; } else if ( bli_is_complex( dtc ) && bli_is_complex( dta ) && bli_is_complex( dtb ) ) { // ccc comp_dom = BLIS_COMPLEX; targ_dom_c = BLIS_COMPLEX; targ_dom_a = BLIS_COMPLEX; targ_dom_b = BLIS_COMPLEX; needacc = FALSE; } else { comp_dom = BLIS_REAL; targ_dom_c = BLIS_REAL; targ_dom_a = BLIS_REAL; targ_dom_b = BLIS_REAL; needacc = FALSE; } // ---------------------------------------------------------------------------- // Merge the computation domain with the computation precision. dt_comp = comp_dom | comp_prec; targ_dt_a = targ_dom_a | comp_prec; targ_dt_b = targ_dom_b | comp_prec; targ_dt_c = targ_dom_c | comp_prec; // Copy-cast A, if needed. if ( bli_dt_prec( dta ) != comp_prec || force_proj_a ) { bli_obj_create( targ_dt_a, m, k, 0, 0, &at ); bli_castm( ao, &at ); ao = &at; } // Copy-cast B, if needed. if ( bli_dt_prec( dtb ) != comp_prec || force_proj_b ) { bli_obj_create( targ_dt_b, k, n, 0, 0, &bt ); bli_castm( bo, &bt ); bo = &bt; } if ( bli_dt_prec( dtc ) != comp_prec ) { needacc = TRUE; } // Copy-cast C, if needed. if ( needacc ) { //bli_obj_create( dt_comp, m, n, 0, 0, &ct ); bli_obj_create( targ_dt_c, m, n, 0, 0, &ct ); bli_castm( c, &ct ); co = &ct; } // ---------------------------------------------------------------------------- if ( bli_is_real( dtc ) && bli_is_real( dta ) && bli_is_real( dtb ) ) { } else if ( bli_is_real( dtc ) && bli_is_real( dta ) && bli_is_complex( dtb ) ) { } else if ( bli_is_real( dtc ) && bli_is_complex( dta ) && bli_is_real( dtb ) ) { } else if ( bli_is_real( dtc ) && bli_is_complex( dta ) && bli_is_complex( dtb ) ) { } else if ( bli_is_complex( dtc ) && bli_is_real( dta ) && bli_is_real( dtb ) ) { } else if ( bli_is_complex( dtc ) && bli_is_real( dta ) && bli_is_complex( dtb ) ) { } else if ( bli_is_complex( dtc ) && bli_is_complex( dta ) && bli_is_real( dtb ) ) { inc_t rsa = bli_obj_row_stride( ao ); inc_t csa = bli_obj_col_stride( ao ); inc_t ma = bli_obj_length( ao ); inc_t na = bli_obj_width( ao ); siz_t ela = bli_obj_elem_size( ao ); num_t dtap = bli_obj_dt_proj_to_real( ao ); bli_obj_alias_to( ao, &ar ); ao = &ar; bli_obj_set_strides( rsa, 2*csa, ao ); bli_obj_set_dims( 2*ma, na, ao ); bli_obj_set_dt( dtap, ao ); bli_obj_set_elem_size( ela/2, ao ); inc_t rsc = bli_obj_row_stride( co ); inc_t csc = bli_obj_col_stride( co ); inc_t mc = bli_obj_length( co ); inc_t nc = bli_obj_width( co ); siz_t elc = bli_obj_elem_size( co ); num_t dtcp = bli_obj_dt_proj_to_real( co ); bli_obj_alias_to( co, &cr ); co = &cr; bli_obj_set_strides( rsc, 2*csc, co ); bli_obj_set_dims( 2*mc, nc, co ); bli_obj_set_dt( dtcp, co ); bli_obj_set_elem_size( elc/2, co ); } else if ( bli_is_complex( dtc ) && bli_is_complex( dta ) && bli_is_complex( dtb ) ) { } else { } // ---------------------------------------------------------------------------- // Call the BLAS. blas_gemm( transa, transb, dt_comp, alpha, ao, bo, beta, co ); // Accumulate back to C, if needed. if ( needacc ) { bli_castm( &ct, c ); } if ( bli_dt_prec( dta ) != comp_prec || force_proj_a ) { bli_obj_free( &at ); } if ( bli_dt_prec( dtb ) != comp_prec || force_proj_b ) { bli_obj_free( &bt ); } if ( needacc ) { bli_obj_free( &ct ); } } void blas_gemm( trans_t transa, trans_t transb, num_t dt, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c ) { char f77_transa = 'N'; char f77_transb = 'N'; //bli_param_map_blis_to_netlib_trans( transa, &f77_transa ); //bli_param_map_blis_to_netlib_trans( transb, &f77_transb ); if ( bli_is_float( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width_after_trans( a ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); float* alphap = bli_obj_buffer_for_1x1( dt, alpha ); float* ap = bli_obj_buffer( a ); float* bp = bli_obj_buffer( b ); float* betap = bli_obj_buffer_for_1x1( dt, beta ); float* cp = bli_obj_buffer( c ); sgemm_( &f77_transa, &f77_transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_double( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width_after_trans( a ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); double* alphap = bli_obj_buffer_for_1x1( dt, alpha ); double* ap = bli_obj_buffer( a ); double* bp = bli_obj_buffer( b ); double* betap = bli_obj_buffer_for_1x1( dt, beta ); double* cp = bli_obj_buffer( c ); dgemm_( &f77_transa, &f77_transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_scomplex( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width_after_trans( a ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); scomplex* alphap = bli_obj_buffer_for_1x1( dt, alpha ); scomplex* ap = bli_obj_buffer( a ); scomplex* bp = bli_obj_buffer( b ); scomplex* betap = bli_obj_buffer_for_1x1( dt, beta ); scomplex* cp = bli_obj_buffer( c ); cgemm_( &f77_transa, &f77_transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_dcomplex( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width_after_trans( a ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); dcomplex* alphap = bli_obj_buffer_for_1x1( dt, alpha ); dcomplex* ap = bli_obj_buffer( a ); dcomplex* bp = bli_obj_buffer( b ); dcomplex* betap = bli_obj_buffer_for_1x1( dt, beta ); dcomplex* cp = bli_obj_buffer( c ); zgemm_( &f77_transa, &f77_transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } } blis-0.6.1/test/runme.sh000077500000000000000000000011751360743507500151310ustar00rootroot00000000000000#!/bin/bash exec_root="test" out_root="output" #out_root="output_square" # Operations to test. l2_ops="gemv ger hemv her her2 trmv trsv" l3_ops="gemm hemm herk her2k trmm trsm" test_ops="${l2_ops} ${l3_ops}" # Implementations to test. #test_impls="openblas mkl blis" test_impls="blis" for im in ${test_impls}; do for op in ${test_ops}; do # Construct the name of the test executable. exec_name="${exec_root}_${op}_${im}.x" # Construct the name of the output file. out_file="${out_root}_${op}_${im}.m" echo "Running ${exec_name} > ${out_file}" # Run executable. ./${exec_name} > ${out_file} sleep 1 done done blis-0.6.1/test/studies/000077500000000000000000000000001360743507500151205ustar00rootroot00000000000000blis-0.6.1/test/studies/skx/000077500000000000000000000000001360743507500157255ustar00rootroot00000000000000blis-0.6.1/test/studies/skx/Makefile000066400000000000000000000504721360743507500173750ustar00rootroot00000000000000#!/bin/bash # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # # Makefile # # Field G. Van Zee # # Makefile for standalone BLIS test drivers. # # # --- Makefile PHONY target definitions ---------------------------------------- # .PHONY: all \ blis-gemm-st openblas-gemm-st mkl-gemm-st acml-gemm-st \ blis-gemm-mt openblas-gemm-mt mkl-gemm-mt acml-gemm-mt \ blis-syrk-st openblas-syrk-st mkl-syrk-st armpl-syrk-st \ blis-syrk-mt openblas-syrk-mt mkl-syrk-mt armpl-syrk-mt \ blis-hemm-st openblas-hemm-st mkl-hemm-st armpl-hemm-st \ blis-hemm-mt openblas-hemm-mt mkl-hemm-mt armpl-hemm-mt \ blis-trmm-st openblas-trmm-st mkl-trmm-st armpl-trmm-st \ blis-trmm-mt openblas-trmm-mt mkl-trmm-mt armpl-trmm-mt \ clean cleanx # Comments: # - DIST_PATH is assumed to not exist if BLIS_INSTALL_PATH is given. # - We must use recursively expanded assignment for LIB_PATH and INC_PATH in # the second case because CONFIG_NAME is not yet set. ifneq ($(strip $(BLIS_INSTALL_PATH)),) LIB_PATH := $(BLIS_INSTALL_PATH)/lib INC_PATH := $(BLIS_INSTALL_PATH)/include/blis SHARE_PATH := $(BLIS_INSTALL_PATH)/share/blis else DIST_PATH := ../../.. LIB_PATH = ../../../lib/$(CONFIG_NAME) INC_PATH = ../../../include/$(CONFIG_NAME) SHARE_PATH := ../../.. endif # # --- Include common makefile definitions -------------------------------------- # # Include the common makefile fragment. -include $(SHARE_PATH)/common.mk # # --- BLAS and LAPACK implementations ------------------------------------------ # # BLIS library and header path. This is simply wherever it was installed. #BLIS_LIB_PATH := $(INSTALL_PREFIX)/lib BLIS_INC_PATH := $(INSTALL_PREFIX)/include/blis # BLIS library. #BLIS_LIB := $(BLIS_LIB_PATH)/libblis.a # BLAS library path(s). This is where the BLAS libraries reside. HOME_LIB_PATH := $(HOME)/OpenBLAS/lib #MKL_LIB_PATH := /opt/apps/intel/13/composer_xe_2013.2.146/mkl/lib/intel64 #MKL_LIB_PATH := $(HOME)/intel/mkl/lib/intel64 MKL_LIB_PATH := ${MKLROOT}/lib/intel64 ARMPL_LIB_PATH := /opt/arm/armpl-18.3.0_Cortex-A57_Ubuntu-16.04_gcc_7.1.0_aarch64-linux/lib #ICC_LIB_PATH := /opt/apps/intel/13/composer_xe_2013.2.146/compiler/lib/intel64 ACML_LIB_PATH := $(HOME_LIB_PATH)/acml/5.3.1/gfortran64_fma4_int64/lib ACMLP_LIB_PATH := $(HOME_LIB_PATH)/acml/5.3.1/gfortran64_fma4_mp_int64/lib # OpenBLAS OPENBLAS_LIB := $(HOME_LIB_PATH)/libopenblas.a OPENBLASP_LIB := $(HOME_LIB_PATH)/libopenblas.a # ATLAS ATLAS_LIB := $(HOME_LIB_PATH)/libf77blas.a \ $(HOME_LIB_PATH)/libatlas.a # For ARMPL ARMPL_LIB := $(ARMPL_LIB_PATH)/libarmpl_lp64.a ARMPLP_LIB := $(ARMPL_LIB_PATH)/libarmpl_lp64_mp.a # MKL MKL_LIB := -L$(MKL_LIB_PATH) \ -lmkl_intel_lp64 \ -lmkl_core \ -lmkl_sequential \ -lpthread -lm -ldl #MKLP_LIB := -L$(MKL_LIB_PATH) \ # -lmkl_intel_thread \ # -lmkl_core \ # -lmkl_intel_ilp64 \ # -L$(ICC_LIB_PATH) \ # -liomp5 MKLP_LIB := -L$(MKL_LIB_PATH) \ -lmkl_intel_lp64 \ -lmkl_core \ -lmkl_gnu_thread \ -lpthread -lm -ldl -fopenmp #-L$(ICC_LIB_PATH) \ #-lgomp # ACML ACML_LIB := -L$(ACML_LIB_PATH) \ -lgfortran -lm -lrt -ldl -lacml ACMLP_LIB := -L$(ACMLP_LIB_PATH) \ -lgfortran -lm -lrt -ldl -lacml_mp # # --- General build definitions ------------------------------------------------ # TEST_SRC_PATH := . TEST_OBJ_PATH := . # Gather all local object files. TEST_OBJS := $(sort $(patsubst $(TEST_SRC_PATH)/%.c, \ $(TEST_OBJ_PATH)/%.o, \ $(wildcard $(TEST_SRC_PATH)/*.c))) # Override the value of CINCFLAGS so that the value of CFLAGS returned by # get-user-cflags-for() is not cluttered up with include paths needed only # while building BLIS. CINCFLAGS := -I$(INC_PATH) # Use the "framework" CFLAGS for the configuration family. CFLAGS := $(call get-user-cflags-for,$(CONFIG_NAME)) # Add local header paths to CFLAGS. CFLAGS += -g -I$(TEST_SRC_PATH) # Locate the libblis library to which we will link. #LIBBLIS_LINK := $(LIB_PATH)/$(LIBBLIS_L) # Datatype DT_S := -DDT=BLIS_FLOAT DT_D := -DDT=BLIS_DOUBLE DT_C := -DDT=BLIS_SCOMPLEX DT_Z := -DDT=BLIS_DCOMPLEX # Which library? BLI_DEF := -DBLIS BLA_DEF := -DBLAS # Complex implementation type D3MHW := -DIND=BLIS_3MH D3M1 := -DIND=BLIS_3M1 D4MHW := -DIND=BLIS_4MH D4M1B := -DIND=BLIS_4M1B D4M1A := -DIND=BLIS_4M1A D1M := -DIND=BLIS_1M DNAT := -DIND=BLIS_NAT # Implementation string STR_3MHW := -DSTR=\"3mhw\" STR_3M1 := -DSTR=\"3m1\" STR_4MHW := -DSTR=\"4mhw\" STR_4M1B := -DSTR=\"4m1b\" STR_4M1A := -DSTR=\"4m1a\" STR_1M := -DSTR=\"1m\" STR_NAT := -DSTR=\"asm\" STR_OBL := -DSTR=\"openblas\" STR_MKL := -DSTR=\"mkl\" STR_ACML := -DSTR=\"acml\" STR_ARMPL:= -DSTR=\"armpl\" # Single or multithreaded string STR_ST := -DTHR_STR=\"st\" STR_MT := -DTHR_STR=\"mt\" # Problem size specification PDEF_ST := -DP_BEGIN=40 \ -DP_END=2000 \ -DP_INC=40 PDEF_MT := -DP_BEGIN=200 \ -DP_END=5000 \ -DP_INC=200 # # --- Targets/rules ------------------------------------------------------------ # all-st: blis-st openblas-st mkl-st all-mt: blis-mt openblas-mt mkl-mt blis-st: blis-gemm-st blis-syrk-st blis-hemm-st blis-trmm-st blis-mt: blis-gemm-mt blis-syrk-mt blis-hemm-mt blis-trmm-mt openblas-st: openblas-gemm-st openblas-syrk-st openblas-hemm-st openblas-trmm-st openblas-mt: openblas-gemm-mt openblas-syrk-mt openblas-hemm-mt openblas-trmm-mt mkl-st: mkl-gemm-st mkl-syrk-st mkl-hemm-st mkl-trmm-st mkl-mt: mkl-gemm-mt mkl-syrk-mt mkl-hemm-mt mkl-trmm-mt armpl-st: armpl-gemm-st armpl-syrk-st armpl-hemm-st armpl-trmm-st armpl-mt: armpl-gemm-mt armpl-syrk-mt armpl-hemm-mt armpl-trmm-mt blis-gemm-st: \ test_sgemm_asm_blis_st.x \ test_dgemm_asm_blis_st.x \ \ test_cgemm_1m_blis_st.x \ test_zgemm_1m_blis_st.x \ test_cgemm_asm_blis_st.x \ test_zgemm_asm_blis_st.x blis-syrk-st: \ test_ssyrk_asm_blis_st.x \ test_dsyrk_asm_blis_st.x \ test_csyrk_1m_blis_st.x \ test_zsyrk_1m_blis_st.x blis-syrk-mt: \ test_ssyrk_asm_blis_mt.x \ test_dsyrk_asm_blis_mt.x \ test_csyrk_1m_blis_mt.x \ test_zsyrk_1m_blis_mt.x blis-hemm-st: \ test_shemm_asm_blis_st.x \ test_dhemm_asm_blis_st.x \ test_chemm_1m_blis_st.x \ test_zhemm_1m_blis_st.x blis-hemm-mt: \ test_shemm_asm_blis_mt.x \ test_dhemm_asm_blis_mt.x \ test_chemm_1m_blis_mt.x \ test_zhemm_1m_blis_mt.x blis-trmm-st: \ test_strmm_asm_blis_st.x \ test_dtrmm_asm_blis_st.x \ test_ctrmm_1m_blis_st.x \ test_ztrmm_1m_blis_st.x blis-trmm-mt: \ test_strmm_asm_blis_mt.x \ test_dtrmm_asm_blis_mt.x \ test_ctrmm_1m_blis_mt.x \ test_ztrmm_1m_blis_mt.x blis-gemm-mt: \ test_sgemm_asm_blis_mt.x \ test_dgemm_asm_blis_mt.x \ \ test_cgemm_1m_blis_mt.x \ test_zgemm_1m_blis_mt.x \ test_cgemm_asm_blis_mt.x \ test_zgemm_asm_blis_mt.x openblas-gemm-st: \ test_sgemm_openblas_st.x \ test_dgemm_openblas_st.x \ test_cgemm_openblas_st.x \ test_zgemm_openblas_st.x openblas-gemm-mt: \ test_sgemm_openblas_mt.x \ test_dgemm_openblas_mt.x \ test_cgemm_openblas_mt.x \ test_zgemm_openblas_mt.x openblas-syrk-st: \ test_ssyrk_openblas_st.x \ test_dsyrk_openblas_st.x \ test_csyrk_openblas_st.x \ test_zsyrk_openblas_st.x openblas-syrk-mt: \ test_ssyrk_openblas_mt.x \ test_dsyrk_openblas_mt.x \ test_csyrk_openblas_mt.x \ test_zsyrk_openblas_mt.x openblas-hemm-st: \ test_shemm_openblas_st.x \ test_dhemm_openblas_st.x \ test_chemm_openblas_st.x \ test_zhemm_openblas_st.x openblas-hemm-mt: \ test_shemm_openblas_mt.x \ test_dhemm_openblas_mt.x \ test_chemm_openblas_mt.x \ test_zhemm_openblas_mt.x openblas-trmm-st: \ test_strmm_openblas_st.x \ test_dtrmm_openblas_st.x \ test_ctrmm_openblas_st.x \ test_ztrmm_openblas_st.x openblas-trmm-mt: \ test_strmm_openblas_mt.x \ test_dtrmm_openblas_mt.x \ test_ctrmm_openblas_mt.x \ test_ztrmm_openblas_mt.x mkl-gemm-st: \ test_sgemm_mkl_st.x \ test_dgemm_mkl_st.x \ test_cgemm_mkl_st.x \ test_zgemm_mkl_st.x mkl-gemm-mt: \ test_sgemm_mkl_mt.x \ test_dgemm_mkl_mt.x \ test_cgemm_mkl_mt.x \ test_zgemm_mkl_mt.x mkl-syrk-st: \ test_ssyrk_mkl_st.x \ test_dsyrk_mkl_st.x \ test_csyrk_mkl_st.x \ test_zsyrk_mkl_st.x mkl-syrk-mt: \ test_ssyrk_mkl_mt.x \ test_dsyrk_mkl_mt.x \ test_csyrk_mkl_mt.x \ test_zsyrk_mkl_mt.x mkl-hemm-st: \ test_shemm_mkl_st.x \ test_dhemm_mkl_st.x \ test_chemm_mkl_st.x \ test_zhemm_mkl_st.x mkl-hemm-mt: \ test_shemm_mkl_mt.x \ test_dhemm_mkl_mt.x \ test_chemm_mkl_mt.x \ test_zhemm_mkl_mt.x mkl-trmm-st: \ test_strmm_mkl_st.x \ test_dtrmm_mkl_st.x \ test_ctrmm_mkl_st.x \ test_ztrmm_mkl_st.x mkl-trmm-mt: \ test_strmm_mkl_mt.x \ test_dtrmm_mkl_mt.x \ test_ctrmm_mkl_mt.x \ test_ztrmm_mkl_mt.x armpl-gemm-st: \ test_sgemm_armpl_st.x \ test_dgemm_armpl_st.x \ test_cgemm_armpl_st.x \ test_zgemm_armpl_st.x armpl-gemm-mt: \ test_sgemm_armpl_mt.x \ test_dgemm_armpl_mt.x \ test_cgemm_armpl_mt.x \ test_zgemm_armpl_mt.x armpl-syrk-st: \ test_ssyrk_armpl_st.x \ test_dsyrk_armpl_st.x \ test_csyrk_armpl_st.x \ test_zsyrk_armpl_st.x armpl-syrk-mt: \ test_ssyrk_armpl_mt.x \ test_dsyrk_armpl_mt.x \ test_csyrk_armpl_mt.x \ test_zsyrk_armpl_mt.x armpl-hemm-st: \ test_shemm_armpl_st.x \ test_dhemm_armpl_st.x \ test_chemm_armpl_st.x \ test_zhemm_armpl_st.x armpl-hemm-mt: \ test_shemm_armpl_mt.x \ test_dhemm_armpl_mt.x \ test_chemm_armpl_mt.x \ test_zhemm_armpl_mt.x armpl-trmm-st: \ test_strmm_armpl_st.x \ test_dtrmm_armpl_st.x \ test_ctrmm_armpl_st.x \ test_ztrmm_armpl_st.x armpl-trmm-mt: \ test_strmm_armpl_mt.x \ test_dtrmm_armpl_mt.x \ test_ctrmm_armpl_mt.x \ test_ztrmm_armpl_mt.x # --Object file rules -- $(TEST_OBJ_PATH)/%.o: $(TEST_SRC_PATH)/%.c $(CC) $(CFLAGS) -c $< -o $@ # blis 3mhw test_z%_3mhw_blis_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_Z) $(BLI_DEF) $(D3MHW) $(STR_3MHW) $(STR_ST) -c $< -o $@ test_c%_3mhw_blis_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_C) $(BLI_DEF) $(D3MHW) $(STR_3MHW) $(STR_ST) -c $< -o $@ test_z%_3mhw_blis_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_Z) $(BLI_DEF) $(D3MHW) $(STR_3MHW) $(STR_MT) -c $< -o $@ test_c%_3mhw_blis_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_C) $(BLI_DEF) $(D3MHW) $(STR_3MHW) $(STR_MT) -c $< -o $@ # blis 3m1 test_z%_3m1_blis_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_Z) $(BLI_DEF) $(D3M1) $(STR_3M1) $(STR_ST) -c $< -o $@ test_c%_3m1_blis_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_C) $(BLI_DEF) $(D3M1) $(STR_3M1) $(STR_ST) -c $< -o $@ test_z%_3m1_blis_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_Z) $(BLI_DEF) $(D3M1) $(STR_3M1) $(STR_MT) -c $< -o $@ test_c%_3m1_blis_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_C) $(BLI_DEF) $(D3M1) $(STR_3M1) $(STR_MT) -c $< -o $@ # blis 4mhw test_z%_4mhw_blis_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_Z) $(BLI_DEF) $(D4MHW) $(STR_4MHW) $(STR_ST) -c $< -o $@ test_c%_4mhw_blis_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_C) $(BLI_DEF) $(D4MHW) $(STR_4MHW) $(STR_ST) -c $< -o $@ test_z%_4mhw_blis_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_Z) $(BLI_DEF) $(D4MHW) $(STR_4MHW) $(STR_MT) -c $< -o $@ test_c%_4mhw_blis_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_C) $(BLI_DEF) $(D4MHW) $(STR_4MHW) $(STR_MT) -c $< -o $@ # blis 4m1b test_z%_4m1b_blis_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_Z) $(BLI_DEF) $(D4M1B) $(STR_4M1B) $(STR_ST) -c $< -o $@ test_c%_4m1b_blis_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_C) $(BLI_DEF) $(D4M1B) $(STR_4M1B) $(STR_ST) -c $< -o $@ test_z%_4m1b_blis_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_Z) $(BLI_DEF) $(D4M1B) $(STR_4M1B) $(STR_MT) -c $< -o $@ test_c%_4m1b_blis_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_C) $(BLI_DEF) $(D4M1B) $(STR_4M1B) $(STR_MT) -c $< -o $@ # blis 4m1a test_z%_4m1a_blis_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_Z) $(BLI_DEF) $(D4M1A) $(STR_4M1A) $(STR_ST) -c $< -o $@ test_c%_4m1a_blis_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_C) $(BLI_DEF) $(D4M1A) $(STR_4M1A) $(STR_ST) -c $< -o $@ test_z%_4m1a_blis_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_Z) $(BLI_DEF) $(D4M1A) $(STR_4M1A) $(STR_MT) -c $< -o $@ test_c%_4m1a_blis_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_C) $(BLI_DEF) $(D4M1A) $(STR_4M1A) $(STR_MT) -c $< -o $@ # blis 1m test_z%_1m_blis_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_Z) $(BLI_DEF) $(D1M) $(STR_1M) $(STR_ST) -c $< -o $@ test_c%_1m_blis_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_C) $(BLI_DEF) $(D1M) $(STR_1M) $(STR_ST) -c $< -o $@ test_z%_1m_blis_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_Z) $(BLI_DEF) $(D1M) $(STR_1M) $(STR_MT) -c $< -o $@ test_c%_1m_blis_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_C) $(BLI_DEF) $(D1M) $(STR_1M) $(STR_MT) -c $< -o $@ # blis asm test_d%_asm_blis_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_D) $(BLI_DEF) $(DNAT) $(STR_NAT) $(STR_ST) -c $< -o $@ test_s%_asm_blis_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_S) $(BLI_DEF) $(DNAT) $(STR_NAT) $(STR_ST) -c $< -o $@ test_z%_asm_blis_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_Z) $(BLI_DEF) $(DNAT) $(STR_NAT) $(STR_ST) -c $< -o $@ test_c%_asm_blis_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_C) $(BLI_DEF) $(DNAT) $(STR_NAT) $(STR_ST) -c $< -o $@ test_d%_asm_blis_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_D) $(BLI_DEF) $(DNAT) $(STR_NAT) $(STR_MT) -c $< -o $@ test_s%_asm_blis_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_S) $(BLI_DEF) $(DNAT) $(STR_NAT) $(STR_MT) -c $< -o $@ test_z%_asm_blis_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_Z) $(BLI_DEF) $(DNAT) $(STR_NAT) $(STR_MT) -c $< -o $@ test_c%_asm_blis_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_C) $(BLI_DEF) $(DNAT) $(STR_NAT) $(STR_MT) -c $< -o $@ # openblas test_d%_openblas_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_D) $(BLA_DEF) $(DNAT) $(STR_OBL) $(STR_ST) -c $< -o $@ test_s%_openblas_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_S) $(BLA_DEF) $(DNAT) $(STR_OBL) $(STR_ST) -c $< -o $@ test_z%_openblas_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_Z) $(BLA_DEF) $(DNAT) $(STR_OBL) $(STR_ST) -c $< -o $@ test_c%_openblas_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_C) $(BLA_DEF) $(DNAT) $(STR_OBL) $(STR_ST) -c $< -o $@ test_d%_openblas_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_D) $(BLA_DEF) $(DNAT) $(STR_OBL) $(STR_MT) -c $< -o $@ test_s%_openblas_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_S) $(BLA_DEF) $(DNAT) $(STR_OBL) $(STR_MT) -c $< -o $@ test_z%_openblas_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_Z) $(BLA_DEF) $(DNAT) $(STR_OBL) $(STR_MT) -c $< -o $@ test_c%_openblas_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_C) $(BLA_DEF) $(DNAT) $(STR_OBL) $(STR_MT) -c $< -o $@ # mkl test_d%_mkl_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_D) $(BLA_DEF) $(DNAT) $(STR_MKL) $(STR_ST) -c $< -o $@ test_s%_mkl_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_S) $(BLA_DEF) $(DNAT) $(STR_MKL) $(STR_ST) -c $< -o $@ test_z%_mkl_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_Z) $(BLA_DEF) $(DNAT) $(STR_MKL) $(STR_ST) -c $< -o $@ test_c%_mkl_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_C) $(BLA_DEF) $(DNAT) $(STR_MKL) $(STR_ST) -c $< -o $@ test_d%_mkl_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_D) $(BLA_DEF) $(DNAT) $(STR_MKL) $(STR_MT) -c $< -o $@ test_s%_mkl_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_S) $(BLA_DEF) $(DNAT) $(STR_MKL) $(STR_MT) -c $< -o $@ test_z%_mkl_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_Z) $(BLA_DEF) $(DNAT) $(STR_MKL) $(STR_MT) -c $< -o $@ test_c%_mkl_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_C) $(BLA_DEF) $(DNAT) $(STR_MKL) $(STR_MT) -c $< -o $@ # armpl test_d%_armpl_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_D) $(BLA_DEF) $(DNAT) $(STR_ARMPL) $(STR_ST) -c $< -o $@ test_s%_armpl_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_S) $(BLA_DEF) $(DNAT) $(STR_ARMPL) $(STR_ST) -c $< -o $@ test_z%_armpl_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_Z) $(BLA_DEF) $(DNAT) $(STR_ARMPL) $(STR_ST) -c $< -o $@ test_c%_armpl_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_C) $(BLA_DEF) $(DNAT) $(STR_ARMPL) $(STR_ST) -c $< -o $@ test_d%_armpl_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_D) $(BLA_DEF) $(DNAT) $(STR_ARMPL) $(STR_MT) -c $< -o $@ test_s%_armpl_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_S) $(BLA_DEF) $(DNAT) $(STR_ARMPL) $(STR_MT) -c $< -o $@ test_z%_armpl_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_Z) $(BLA_DEF) $(DNAT) $(STR_ARMPL) $(STR_MT) -c $< -o $@ test_c%_armpl_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_C) $(BLA_DEF) $(DNAT) $(STR_ARMPL) $(STR_MT) -c $< -o $@ # mkl # -- Executable file rules -- # NOTE: For the BLAS test drivers, we place the BLAS libraries before BLIS # on the link command line in case BLIS was configured with the BLAS # compatibility layer. This prevents BLIS from inadvertently getting called # for the BLAS routines we are trying to test with. test_%_openblas_st.x: test_%_openblas_st.o $(LIBBLIS_LINK) $(LINKER) $< $(OPENBLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@ test_%_openblas_mt.x: test_%_openblas_mt.o $(LIBBLIS_LINK) $(LINKER) $< $(OPENBLASP_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@ test_%_mkl_st.x: test_%_mkl_st.o $(LIBBLIS_LINK) $(LINKER) $< $(MKL_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@ test_%_mkl_mt.x: test_%_mkl_mt.o $(LIBBLIS_LINK) $(LINKER) $< $(MKLP_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@ test_%_armpl_st.x: test_%_armpl_st.o $(LIBBLIS_LINK) $(LINKER) $< $(ARMPL_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@ test_%_armpl_mt.x: test_%_armpl_mt.o $(LIBBLIS_LINK) $(LINKER) $< $(ARMPL_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@ test_%_blis_st.x: test_%_blis_st.o $(LIBBLIS_LINK) $(LINKER) $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@ test_%_blis_mt.x: test_%_blis_mt.o $(LIBBLIS_LINK) $(LINKER) $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@ # -- Clean rules -- clean: cleanx cleanx: - $(RM_F) *.o *.x blis-0.6.1/test/studies/skx/plot_gemm_mt_perf.m000066400000000000000000000076731360743507500216170ustar00rootroot00000000000000addpath(pathname) output_mt_sgemm_asm_blis output_mt_dgemm_asm_blis output_mt_cgemm_1m_blis output_mt_zgemm_1m_blis output_mt_sgemm_openblas output_mt_dgemm_openblas output_mt_cgemm_openblas output_mt_zgemm_openblas output_mt_sgemm_mkl output_mt_dgemm_mkl output_mt_cgemm_mkl output_mt_zgemm_mkl % SGEMM multi threaded axes1 = subplot(4, 4, 1); hold(axes1,'on'); plot(data_mt_sgemm_asm_blis(:,1), data_mt_sgemm_asm_blis(:,4), 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_mt_sgemm_openblas(:,1), data_mt_sgemm_openblas(:,4), 'LineWidth', 1.25,'Color', [0 1 0]); plot(data_mt_sgemm_mkl(:,1), data_mt_sgemm_mkl(:,4), '--', 'LineWidth', 1.25,'Color', [1 0 0]); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('SGEMM (multi-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes1,'on'); set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); v = axis; % extract the current ranges axis( [ 0 xmax_mt 0 speak*numcores] ) % DGEMM multi threaded axes1 = subplot(4, 4, 5); hold(axes1,'on'); plot(data_mt_dgemm_asm_blis(:,1), data_mt_dgemm_asm_blis(:,4), 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_mt_dgemm_openblas(:,1), data_mt_dgemm_openblas(:,4), 'LineWidth', 1.25,'Color', [0 1 0]); plot(data_mt_dgemm_mkl(:,1), data_mt_dgemm_mkl(:,4), '--', 'LineWidth', 1.25,'Color', [1 0 0]); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('DGEMM (multi-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes1,'on'); set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %legend({'BLIS', 'OpenBLAS', 'ARMPL'},'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue', 'Location', 'bemt'); v = axis; % extract the current ranges axis( [ 0 xmax_mt 0 dpeak*numcores ] ) % CGEMM multi threaded axes1 = subplot(4, 4, 9); hold(axes1,'on'); plot(data_mt_cgemm_1m_blis(:,1), data_mt_cgemm_1m_blis(:,4), 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_mt_cgemm_openblas(:,1), data_mt_cgemm_openblas(:,4), 'LineWidth', 1.25,'Color', [0 1 0]); plot(data_mt_cgemm_mkl(:,1), data_mt_cgemm_mkl(:,4), '--', 'LineWidth', 1.25,'Color', [1 0 0]); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('CGEMM (multi-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes1,'on'); set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); v = axis; % extract the current ranges axis( [ 0 xmax_mt 0 speak*numcores ] ) % ZGEMM multi threaded axes1 = subplot(4, 4, 13); hold(axes1,'on'); plot(data_mt_zgemm_1m_blis(:,1), data_mt_zgemm_1m_blis(:,4), 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_mt_zgemm_openblas(:,1), data_mt_zgemm_openblas(:,4), 'LineWidth', 1.25,'Color', [0 1 0]); plot(data_mt_zgemm_mkl(:,1), data_mt_zgemm_mkl(:,4), '--', 'LineWidth', 1.25,'Color', [1 0 0]); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('ZGEMM (multi-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes1,'on'); set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); legend({'BLIS', 'OpenBLAS', 'MKL'},'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue', 'Location', 'best'); v = axis; % extract the current ranges axis( [ 0 xmax_mt 0 dpeak*numcores ] ) clear *gemm* rmpath(pathname) blis-0.6.1/test/studies/skx/plot_gemm_st_perf.m000066400000000000000000000076321360743507500216200ustar00rootroot00000000000000addpath(pathname) output_st_sgemm_asm_blis output_st_dgemm_asm_blis output_st_cgemm_1m_blis output_st_zgemm_1m_blis output_st_sgemm_openblas output_st_dgemm_openblas output_st_cgemm_openblas output_st_zgemm_openblas output_st_sgemm_mkl output_st_dgemm_mkl output_st_cgemm_mkl output_st_zgemm_mkl % SGEMM Single threaded axes1 = subplot(4, 4, 1); hold(axes1,'on'); plot(data_st_sgemm_asm_blis(:,1), data_st_sgemm_asm_blis(:,4), 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_st_sgemm_openblas(:,1), data_st_sgemm_openblas(:,4), 'LineWidth', 1.25,'Color', [0 1 0]); plot(data_st_sgemm_mkl(:,1), data_st_sgemm_mkl(:,4), '--', 'LineWidth', 1.25,'Color', [1 0 0]); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('SGEMM (single-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes1,'on'); set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); v = axis; % extract the current ranges axis( [ 0 v(2) 0 speak ] ) % DGEMM Single threaded axes1 = subplot(4, 4, 5); hold(axes1,'on'); plot(data_st_dgemm_asm_blis(:,1), data_st_dgemm_asm_blis(:,4), 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_st_dgemm_openblas(:,1), data_st_dgemm_openblas(:,4), 'LineWidth', 1.25,'Color', [0 1 0]); plot(data_st_dgemm_mkl(:,1), data_st_dgemm_mkl(:,4), '--', 'LineWidth', 1.25,'Color', [1 0 0]); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('DGEMM (single-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes1,'on'); set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %legend({'BLIS', 'OpenBLAS', 'ARMPL'},'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue', 'Location', 'best'); v = axis; % extract the current ranges axis( [ 0 v(2) 0 dpeak ] ) % CGEMM Single threaded axes1 = subplot(4, 4, 9); hold(axes1,'on'); plot(data_st_cgemm_1m_blis(:,1), data_st_cgemm_1m_blis(:,4), 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_st_cgemm_openblas(:,1), data_st_cgemm_openblas(:,4),'--', 'LineWidth', 1.25,'Color', [0 1 0]); plot(data_st_cgemm_mkl(:,1), data_st_cgemm_mkl(:,4), '--', 'LineWidth', 1.25,'Color', [1 0 0]); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('CGEMM (single-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes1,'on'); set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); v = axis; % extract the current ranges axis( [ 0 v(2) 0 speak ] ) % ZGEMM Single threaded axes1 = subplot(4, 4, 13); hold(axes1,'on'); plot(data_st_zgemm_1m_blis(:,1), data_st_zgemm_1m_blis(:,4), 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_st_zgemm_openblas(:,1), data_st_zgemm_openblas(:,4), 'LineWidth', 1.25,'Color', [0 1 0]); plot(data_st_zgemm_mkl(:,1), data_st_zgemm_mkl(:,4), '--', 'LineWidth', 1.25,'Color', [1 0 0]); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('ZGEMM (single-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes1,'on'); set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %legend({'BLIS', 'OpenBLAS', 'MKL'},'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue', 'Location', 'South'); v = axis; % extract the current ranges axis( [ 0 v(2) 0 dpeak ] ) clear *gemm* rmpath(pathname) blis-0.6.1/test/studies/skx/plot_hemm_mt_perf.m000066400000000000000000000077011360743507500216100ustar00rootroot00000000000000addpath(pathname) output_mt_shemm_asm_blis output_mt_dhemm_asm_blis output_mt_chemm_1m_blis output_mt_zhemm_1m_blis output_mt_shemm_openblas output_mt_dhemm_openblas output_mt_chemm_openblas output_mt_zhemm_openblas output_mt_shemm_mkl output_mt_dhemm_mkl output_mt_chemm_mkl output_mt_zhemm_mkl % SSYMM multi threaded axes1 = subplot(4, 4, 3); hold(axes1,'on'); plot(data_mt_shemm_asm_blis(:,1), data_mt_shemm_asm_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_mt_shemm_openblas(:,1), data_mt_shemm_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]); plot(data_mt_shemm_mkl(:,1), data_mt_shemm_mkl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 0]); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('SSYMM (multi-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes1,'on'); set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); v = axis; % extract the current ranges axis( [ 0 xmax_mt 0 speak*numcores ] ) % DSYMM multi threaded axes1 = subplot(4, 4, 7); hold(axes1,'on'); plot(data_mt_dhemm_asm_blis(:,1), data_mt_dhemm_asm_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_mt_dhemm_openblas(:,1), data_mt_dhemm_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]); plot(data_mt_dhemm_mkl(:,1), data_mt_dhemm_mkl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 0]); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('DSYMM (multi-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes1,'on'); set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %legend({'BLIS', 'OpenBLAS', 'ARMPL'},'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue', 'Location', 'bemt'); v = axis; % extract the current ranges axis( [ 0 xmax_mt 0 dpeak*numcores ] ) % CHEMM multi threaded axes1 = subplot(4, 4, 11); hold(axes1,'on'); plot(data_mt_chemm_1m_blis(:,1), data_mt_chemm_1m_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_mt_chemm_openblas(:,1), data_mt_chemm_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]); plot(data_mt_chemm_mkl(:,1), data_mt_chemm_mkl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 0]); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('CHEMM (multi-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes1,'on'); set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); v = axis; % extract the current ranges axis( [ 0 xmax_mt 0 speak*numcores ] ) % ZHEMM multi threaded axes1 = subplot(4, 4, 15); hold(axes1,'on'); plot(data_mt_zhemm_1m_blis(:,1), data_mt_zhemm_1m_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_mt_zhemm_openblas(:,1), data_mt_zhemm_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]); plot(data_mt_zhemm_mkl(:,1), data_mt_zhemm_mkl(:,3),'--', 'LineWidth', 1.25,'Color', [1 0 0]); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('ZHEMM (multi-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes1,'on'); set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); legend({'BLIS', 'OpenBLAS', 'ARMPL'},'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue', 'Location', 'best'); v = axis; % extract the current ranges axis( [ 0 xmax_mt 0 dpeak*numcores ] ) clear *hemm* rmpath(pathname) blis-0.6.1/test/studies/skx/plot_hemm_st_perf.m000066400000000000000000000076511360743507500216220ustar00rootroot00000000000000addpath(pathname) output_st_shemm_asm_blis output_st_dhemm_asm_blis output_st_chemm_1m_blis output_st_zhemm_1m_blis output_st_shemm_openblas output_st_dhemm_openblas output_st_chemm_openblas output_st_zhemm_openblas output_st_shemm_mkl output_st_dhemm_mkl output_st_chemm_mkl output_st_zhemm_mkl % SSYMM Single threaded axes1 = subplot(4, 4, 3); hold(axes1,'on'); plot(data_st_shemm_asm_blis(:,1), data_st_shemm_asm_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_st_shemm_openblas(:,1), data_st_shemm_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]); plot(data_st_shemm_mkl(:,1), data_st_shemm_mkl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 0]); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('SSYMM (single-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes1,'on'); set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); v = axis; % extract the current ranges axis( [ 0 v(2) 0 speak ] ) % DSYMM Single threaded axes1 = subplot(4, 4, 7); hold(axes1,'on'); plot(data_st_dhemm_asm_blis(:,1), data_st_dhemm_asm_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_st_dhemm_openblas(:,1), data_st_dhemm_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]); plot(data_st_dhemm_mkl(:,1), data_st_dhemm_mkl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 0]); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('DSYMM (single-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes1,'on'); set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %legend({'BLIS', 'OpenBLAS', 'ARMPL'},'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue', 'Location', 'best'); v = axis; % extract the current ranges axis( [ 0 v(2) 0 dpeak ] ) % CHEMM Single threaded axes1 = subplot(4, 4, 11); hold(axes1,'on'); plot(data_st_chemm_1m_blis(:,1), data_st_chemm_1m_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_st_chemm_openblas(:,1), data_st_chemm_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]); plot(data_st_chemm_mkl(:,1), data_st_chemm_mkl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 0]); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('CHEMM (single-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes1,'on'); set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); v = axis; % extract the current ranges axis( [ 0 v(2) 0 speak ] ) % ZHEMM Single threaded axes1 = subplot(4, 4, 15); hold(axes1,'on'); plot(data_st_zhemm_1m_blis(:,1), data_st_zhemm_1m_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_st_zhemm_openblas(:,1), data_st_zhemm_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]); plot(data_st_zhemm_mkl(:,1), data_st_zhemm_mkl(:,3),'--', 'LineWidth', 1.25,'Color', [1 0 0]); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('ZHEMM (single-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes1,'on'); set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); % legend({'BLIS', 'BLIS (AVX2)', 'OpenBLAS', 'MKL'},'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue', 'Location', 'South'); v = axis; % extract the current ranges axis( [ 0 v(2) 0 dpeak ] ) clear *hemm* rmpath(pathname) blis-0.6.1/test/studies/skx/plot_skx_perf.m000066400000000000000000000011441360743507500207620ustar00rootroot00000000000000fontsize = 6; numcores = 4; freq = 3.5; sflopspercycle = 64; dflopspercycle = 32; speak = sflopspercycle*freq; dpeak = dflopspercycle*freq; xmax_mt = 5000; fig1 = figure(1); clf(fig1) % pathname = './20180711/'; plot_gemm_st_perf plot_syrk_st_perf plot_hemm_st_perf plot_trmm_st_perf fig1.PaperPositionMode = 'auto'; orient(fig1,'landscape') print(fig1, 'skx-st', '-dpdf','-fillpage') % fig1 = figure(2); % clf; % % plot_gemm_mt_perf % plot_syrk_mt_perf % plot_hemm_mt_perf % plot_trmm_mt_perf % % fig1.PaperPositionMode = 'auto'; % orient(fig1,'landscape') % print(fig1, 'A57-mt', '-dpdf','-fillpage') blis-0.6.1/test/studies/skx/plot_syrk_mt_perf.m000066400000000000000000000134101360743507500216440ustar00rootroot00000000000000addpath(pathname) output_mt_ssyrk_asm_blis output_mt_dsyrk_asm_blis output_mt_csyrk_1m_blis output_mt_zsyrk_1m_blis output_mt_ssyrk_openblas output_mt_dsyrk_openblas output_mt_csyrk_openblas output_mt_zsyrk_openblas output_mt_ssyrk_mkl output_mt_dsyrk_mkl output_mt_csyrk_mkl output_mt_zsyrk_mkl plot_lower=0; if(plot_lower) output_mt_ssyrk_asm_blis output_mt_dsyrk_asm_blis output_mt_csyrk_1m_blis output_mt_zsyrk_1m_blis output_mt_ssyrk_openblas output_mt_dsyrk_openblas output_mt_csyrk_openblas output_mt_zsyrk_openblas output_mt_ssyrk_mkl output_mt_dsyrk_mkl output_mt_csyrk_mkl output_mt_zsyrk_mkl end % SSYRK multi threaded axes1 = subplot(4, 4, 2); hold(axes1,'on'); plot(data_mt_ssyrk_asm_blis(:,1), data_mt_ssyrk_asm_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_mt_ssyrk_openblas(:,1), data_mt_ssyrk_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]); plot(data_mt_ssyrk_mkl(:,1), data_mt_ssyrk_mkl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 0]); if(plot_lower) plot(data_mt_ssyrk_l_asm_blis(:,1), data_mt_ssyrk_l_asm_blis(:,3), '-.','LineWidth', 1.25,'Color', [0 0 1]); plot(data_mt_ssyrk_l_openblas(:,1), data_mt_ssyrk_l_openblas(:,3), '-.', 'LineWidth', 1.25,'Color', [0 1 0]); plot(data_mt_ssyrk_l_mkl(:,1), data_mt_ssyrk_l_mkl(:,3), '-.', 'LineWidth', 1.25,'Color', [1 0 0]); end ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('SSYRK (multi-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes1,'on'); set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); v = axis; % extract the current ranges axis( [ 0 xmax_mt 0 speak*numcores ] ) % DSYRK multi threaded axes1 = subplot(4, 4, 6); hold(axes1,'on'); plot(data_mt_dsyrk_asm_blis(:,1), data_mt_dsyrk_asm_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_mt_dsyrk_openblas(:,1), data_mt_dsyrk_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]); plot(data_mt_dsyrk_mkl(:,1), data_mt_dsyrk_mkl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 0]); if(plot_lower) plot(data_mt_dsyrk_l_asm_blis(:,1), data_mt_dsyrk_l_asm_blis(:,3), '-.', 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_mt_dsyrk_l_openblas(:,1), data_mt_dsyrk_l_openblas(:,3), '-.', 'LineWidth', 1.25,'Color', [0 1 0]); plot(data_mt_dsyrk_l_mkl(:,1), data_mt_dsyrk_l_mkl(:,3), '-.', 'LineWidth', 1.25,'Color', [1 0 0]); end ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('DSYRK (multi-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes1,'on'); set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %legend({'BLIS', 'OpenBLAS', 'ARMPL'},'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue', 'Location', 'bemt'); v = axis; % extract the current ranges axis( [ 0 xmax_mt 0 dpeak*numcores ] ) % CSYRK multi threaded axes1 = subplot(4, 4, 10); hold(axes1,'on'); plot(data_mt_csyrk_1m_blis(:,1), data_mt_csyrk_1m_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_mt_csyrk_openblas(:,1), data_mt_csyrk_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]); plot(data_mt_csyrk_mkl(:,1), data_mt_csyrk_mkl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 0]); if(plot_lower) plot(data_mt_csyrk_l_1m_blis(:,1), data_mt_csyrk_l_1m_blis(:,3),'-.', 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_mt_csyrk_l_openblas(:,1), data_mt_csyrk_l_openblas(:,3), '-.', 'LineWidth', 1.25,'Color', [0 1 0]); plot(data_mt_csyrk_l_mkl(:,1), data_mt_csyrk_l_mkl(:,3), '-.', 'LineWidth', 1.25,'Color', [1 0 0]); end ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('CSYRK (multi-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes1,'on'); set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); v = axis; % extract the current ranges axis( [ 0 xmax_mt 0 speak*numcores ] ) % ZSYRK multi threaded axes1 = subplot(4, 4, 14); hold(axes1,'on'); plot(data_mt_zsyrk_1m_blis(:,1), data_mt_zsyrk_1m_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_mt_zsyrk_openblas(:,1), data_mt_zsyrk_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]); plot(data_mt_zsyrk_mkl(:,1), data_mt_zsyrk_mkl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 0]); if(plot_lower) plot(data_mt_zsyrk_l_1m_blis(:,1), data_mt_zsyrk_l_1m_blis(:,3), '-.', 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_mt_zsyrk_l_openblas(:,1), data_mt_zsyrk_l_openblas(:,3), '-.', 'LineWidth', 1.25,'Color', [0 1 0]); plot(data_mt_zsyrk_l_mkl(:,1), data_mt_zsyrk_l_mkl(:,3), '-.', 'LineWidth', 1.25,'Color', [1 0 0]); end ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('ZSYRK (multi-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes1,'on'); set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); legend({'BLIS (Upper)', 'OpenBLAS (Upper)', 'ARMPL (Upper)', 'BLIS (Lower)', 'OpenBLAS (Lower) ', 'ARMPL (Lower)'},'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue', 'Location', 'best'); v = axis; % extract the current ranges axis( [ 0 xmax_mt 0 dpeak*numcores ] ) clear *syrk* rmpath(pathname) blis-0.6.1/test/studies/skx/plot_syrk_st_perf.m000066400000000000000000000125001360743507500216510ustar00rootroot00000000000000addpath(pathname) output_st_ssyrk_asm_blis output_st_dsyrk_asm_blis output_st_csyrk_1m_blis output_st_zsyrk_1m_blis output_st_ssyrk_openblas output_st_dsyrk_openblas output_st_csyrk_openblas output_st_zsyrk_openblas output_st_ssyrk_mkl output_st_dsyrk_mkl output_st_csyrk_mkl output_st_zsyrk_mkl plot_lower = 0; % SSYRK Single threaded axes1 = subplot(4, 4, 2); hold(axes1,'on'); plot(data_st_ssyrk_asm_blis(:,1), data_st_ssyrk_asm_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_st_ssyrk_openblas(:,1), data_st_ssyrk_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]); plot(data_st_ssyrk_mkl(:,1), data_st_ssyrk_mkl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 0]); if(plot_lower) plot(data_st_ssyrk_l_asm_blis(:,1), data_st_ssyrk_l_asm_blis(:,3), '-.','LineWidth', 1.25,'Color', [0 0 1]); plot(data_st_ssyrk_l_openblas(:,1), data_st_ssyrk_l_openblas(:,3), '-.', 'LineWidth', 1.25,'Color', [0 1 0]); plot(data_st_ssyrk_l_mkl(:,1), data_st_ssyrk_l_mkl(:,3), '-.', 'LineWidth', 1.25,'Color', [1 0 0]); end ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('SSYRK (single-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes1,'on'); set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); v = axis; % extract the current ranges axis( [ 0 v(2) 0 speak ] ) % DSYRK single threaded axes1 = subplot(4, 4, 6); hold(axes1,'on'); plot(data_st_dsyrk_asm_blis(:,1), data_st_dsyrk_asm_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_st_dsyrk_openblas(:,1), data_st_dsyrk_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]); plot(data_st_dsyrk_mkl(:,1), data_st_dsyrk_mkl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 0]); if(plot_lower) plot(data_st_dsyrk_l_asm_blis(:,1), data_st_dsyrk_l_asm_blis(:,3), '-.', 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_st_dsyrk_l_openblas(:,1), data_st_dsyrk_l_openblas(:,3), '-.', 'LineWidth', 1.25,'Color', [0 1 0]); plot(data_st_dsyrk_l_mkl(:,1), data_st_dsyrk_l_mkl(:,3), '-.', 'LineWidth', 1.25,'Color', [1 0 0]); end ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('DSYRK (single-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes1,'on'); set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %legend({'BLIS', 'OpenBLAS', 'ARMPL'},'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue', 'Location', 'best'); v = axis; % extract the current ranges axis( [ 0 v(2) 0 dpeak ] ) % CSYRK single threaded axes1 = subplot(4, 4, 10); hold(axes1,'on'); plot(data_st_csyrk_1m_blis(:,1), data_st_csyrk_1m_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_st_csyrk_openblas(:,1), data_st_csyrk_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]); plot(data_st_csyrk_mkl(:,1), data_st_csyrk_mkl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 0]); if(plot_lower) plot(data_st_csyrk_l_1m_blis(:,1), data_st_csyrk_l_1m_blis(:,3),'-.', 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_st_csyrk_l_openblas(:,1), data_st_csyrk_l_openblas(:,3), '-.', 'LineWidth', 1.25,'Color', [0 1 0]); plot(data_st_csyrk_l_mkl(:,1), data_st_csyrk_l_mkl(:,3), '-.', 'LineWidth', 1.25,'Color', [1 0 0]); end ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('CSYRK (single-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes1,'on'); set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); v = axis; % extract the current ranges axis( [ 0 v(2) 0 speak ] ) % ZSYRK single threaded axes1 = subplot(4, 4, 14); hold(axes1,'on'); plot(data_st_zsyrk_1m_blis(:,1), data_st_zsyrk_1m_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_st_zsyrk_openblas(:,1), data_st_zsyrk_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]); plot(data_st_zsyrk_mkl(:,1), data_st_zsyrk_mkl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 0]); if(plot_lower) plot(data_st_zsyrk_l_1m_blis(:,1), data_st_zsyrk_l_1m_blis(:,3), '-.', 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_st_zsyrk_l_openblas(:,1), data_st_zsyrk_l_openblas(:,3), '-.', 'LineWidth', 1.25,'Color', [0 1 0]); plot(data_st_zsyrk_l_mkl(:,1), data_st_zsyrk_l_mkl(:,3), '-.', 'LineWidth', 1.25,'Color', [1 0 0]); end ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('ZSYRK (single-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes1,'on'); set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); % legend({'BLIS', 'BLIS (AVX2)','OpenBLAS', 'MKL'},'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue', 'Location', 'South'); v = axis; % extract the current ranges axis( [ 0 v(2) 0 dpeak ] ) clear *syrk* rmpath(pathname) blis-0.6.1/test/studies/skx/plot_trmm_mt_perf.m000066400000000000000000000076771360743507500216550ustar00rootroot00000000000000addpath(pathname) output_mt_strmm_asm_blis output_mt_dtrmm_asm_blis output_mt_ctrmm_1m_blis output_mt_ztrmm_1m_blis output_mt_strmm_openblas output_mt_dtrmm_openblas output_mt_ctrmm_openblas output_mt_ztrmm_openblas output_mt_strmm_mkl output_mt_dtrmm_mkl output_mt_ctrmm_mkl output_mt_ztrmm_mkl % mtRMM multi threaded axes1 = subplot(4, 4, 4); hold(axes1,'on'); plot(data_mt_strmm_asm_blis(:,1), data_mt_strmm_asm_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_mt_strmm_openblas(:,1), data_mt_strmm_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]); plot(data_mt_strmm_mkl(:,1), data_mt_strmm_mkl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 0]); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('STRMM (multi-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes1,'on'); set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); v = axis; % extract the current ranges axis( [ 0 xmax_mt 0 speak*numcores ] ) % DTRMM multi threaded axes1 = subplot(4, 4, 8); hold(axes1,'on'); plot(data_mt_dtrmm_asm_blis(:,1), data_mt_dtrmm_asm_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_mt_dtrmm_openblas(:,1), data_mt_dtrmm_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]); plot(data_mt_dtrmm_mkl(:,1), data_mt_dtrmm_mkl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 0]); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('DTRMM (multi-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes1,'on'); set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %legend({'BLIS', 'OpenBLAS', 'ARMPL'},'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue', 'Location', 'bemt'); v = axis; % extract the current ranges axis( [ 0 xmax_mt 0 dpeak*numcores ] ) % CTRMM multi threaded axes1 = subplot(4, 4, 12); hold(axes1,'on'); plot(data_mt_ctrmm_1m_blis(:,1), data_mt_ctrmm_1m_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_mt_ctrmm_openblas(:,1), data_mt_ctrmm_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]); plot(data_mt_ctrmm_mkl(:,1), data_mt_ctrmm_mkl(:,3),'--', 'LineWidth', 1.25,'Color', [1 0 0]); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('CTRMM (multi-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes1,'on'); set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); v = axis; % extract the current ranges axis( [ 0 xmax_mt 0 speak*numcores ] ) % ZTRMM multi threaded axes1 = subplot(4, 4, 16); hold(axes1,'on'); plot(data_mt_ztrmm_1m_blis(:,1), data_mt_ztrmm_1m_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_mt_ztrmm_openblas(:,1), data_mt_ztrmm_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]); plot(data_mt_ztrmm_mkl(:,1), data_mt_ztrmm_mkl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 0]); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('ZTRMM (multi-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes1,'on'); set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); legend({'BLIS', 'OpenBLAS', 'ARMPL'},'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue', 'Location', 'best'); v = axis; % extract the current ranges axis( [ 0 xmax_mt 0 dpeak*numcores ] ) clear *trmm* rmpath(pathname) blis-0.6.1/test/studies/skx/plot_trmm_st_perf.m000066400000000000000000000076441360743507500216550ustar00rootroot00000000000000addpath(pathname) output_st_strmm_asm_blis output_st_dtrmm_asm_blis output_st_ctrmm_1m_blis output_st_ztrmm_1m_blis output_st_strmm_openblas output_st_dtrmm_openblas output_st_ctrmm_openblas output_st_ztrmm_openblas output_st_strmm_mkl output_st_dtrmm_mkl output_st_ctrmm_mkl output_st_ztrmm_mkl % STRMM Single threaded axes1 = subplot(4, 4, 4); hold(axes1,'on'); plot(data_st_strmm_asm_blis(:,1), data_st_strmm_asm_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_st_strmm_openblas(:,1), data_st_strmm_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]); plot(data_st_strmm_mkl(:,1), data_st_strmm_mkl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 0]); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('STRMM (single-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes1,'on'); set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); v = axis; % extract the current ranges axis( [ 0 v(2) 0 speak ] ) % DTRMM Single threaded axes1 = subplot(4, 4, 8); hold(axes1,'on'); plot(data_st_dtrmm_asm_blis(:,1), data_st_dtrmm_asm_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_st_dtrmm_openblas(:,1), data_st_dtrmm_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]); plot(data_st_dtrmm_mkl(:,1), data_st_dtrmm_mkl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 0]); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('DTRMM (single-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes1,'on'); set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %legend({'BLIS', 'OpenBLAS', 'ARMPL'},'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue', 'Location', 'best'); v = axis; % extract the current ranges axis( [ 0 v(2) 0 dpeak ] ) % CTRMM Single threaded axes1 = subplot(4, 4, 12); hold(axes1,'on'); plot(data_st_ctrmm_1m_blis(:,1), data_st_ctrmm_1m_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_st_ctrmm_openblas(:,1), data_st_ctrmm_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]); plot(data_st_ctrmm_mkl(:,1), data_st_ctrmm_mkl(:,3),'--', 'LineWidth', 1.25,'Color', [1 0 0]); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('CTRMM (single-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes1,'on'); set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); v = axis; % extract the current ranges axis( [ 0 v(2) 0 speak ] ) % ZTRMM Single threaded axes1 = subplot(4, 4, 16); hold(axes1,'on'); plot(data_st_ztrmm_1m_blis(:,1), data_st_ztrmm_1m_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_st_ztrmm_openblas(:,1), data_st_ztrmm_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]); plot(data_st_ztrmm_mkl(:,1), data_st_ztrmm_mkl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 0]); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('ZTRMM (single-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes1,'on'); set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); legend({'BLIS', 'BLIS (AVX2)','OpenBLAS', 'MKL'},'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue', 'Location', 'South'); v = axis; % extract the current ranges axis( [ 0 v(2) 0 dpeak ] ) clear *trmm* rmpath(pathname) blis-0.6.1/test/studies/skx/runme.sh000077500000000000000000000224441360743507500174200ustar00rootroot00000000000000#!/bin/bash # File pefixes. exec_root="test" out_root="output" out_rootdir=$(date +%Y%m%d) mkdir -p $out_rootdir #sys="blis" #sys="stampede" #sys="stampede2" #sys="lonestar" #sys="wahlberg" #sys="arm-softiron" sys="skx" # Bind threads to processors. #export OMP_PROC_BIND=true #export GOMP_CPU_AFFINITY="0 2 4 6 8 10 12 14 1 3 5 7 9 11 13 15" #export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7" #export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7" #export GOMP_CPU_AFFINITY="0 2 4 6 1 3 5 7" #export GOMP_CPU_AFFINITY="0 4 1 5 2 6 3 7" #export GOMP_CPU_AFFINITY="0 1 4 5 8 9 12 13 16 17 20 21 24 25 28 29 32 33 36 37 40 41 44 45" #export GOMP_CPU_AFFINITY="0 2 4 6 8 10 12 14 16 18 20 22 1 3 5 7 9 11 13 15 17 19 21 23" export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39" # Modify LD_LIBRARY_PATH. if [ ${sys} = "blis" ]; then export LD_LIBRARY_PATH="$LD_LIBRARY_PATH" elif [ ${sys} = "stampede" ]; then # A hack to use libiomp5 with gcc. export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/apps/intel/13/composer_xe_2013.2.146/compiler/lib/intel64" elif [ ${sys} = "stampede2" ]; then : elif [ ${sys} = "lonestar" ]; then # A hack to use libiomp5 with gcc. export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/apps/intel/16.0.1.150/compilers_and_libraries_2016.1.150/linux/compiler/lib/intel64" elif [ ${sys} = "wahlberg" ]; then export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$HOME/flame/lib/acml/5.3.1/gfortran64_int64/lib" export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$HOME/flame/lib/acml/5.3.1/gfortran64_mp_int64/lib" fi # Threading scheme to use when multithreading if [ ${sys} = "blis" ]; then jc_nt=1 # 5th loop ic_nt=4 # 3rd loop jr_nt=1 # 2nd loop ir_nt=1 # 1st loop nt=4 elif [ ${sys} = "stampede" ]; then jc_nt=2 # 5th loop ic_nt=8 # 3rd loop jr_nt=1 # 2nd loop ir_nt=1 # 1st loop nt=16 elif [ ${sys} = "lonestar" ]; then jc_nt=2 # 5th loop ic_nt=12 # 3rd loop jr_nt=1 # 2nd loop ir_nt=1 # 1st loop nt=24 elif [ ${sys} = "wahlberg" ]; then jc_nt=1 # 5th loop ic_nt=2 # 3rd loop jr_nt=2 # 2nd loop ir_nt=1 # 1st loop nt=4 elif [ ${sys} = "arm-softiron" ]; then #jc_nt=1 # 5th loop #ic_nt=2 # 3rd loop #jr_nt=2 # 2nd loop #ir_nt=1 # 1st loop nt=4 elif [ ${sys} = "stampede2" ]; then jc_nt=2 # 5th loop ic_nt=1 # 3rd loop jr_nt=10 # 2nd loop ir_nt=1 # 1st loop nt=20 elif [ ${sys} = "skx" ]; then jc_1_nt=1 # 5th loop ic_1_nt=20 # 3rd loop jr_1_nt=1 # 2nd loop ir_1_nt=1 # 1st loop nt_1=20 jc_2_nt=2 # 5th loop ic_2_nt=20 # 3rd loop jr_2_nt=1 # 2nd loop ir_2_nt=1 # 1st loop nt_2=40 fi # Threadedness to test. #threads="mt" #threads_r="mt" threads="st mt" threads_r="st mt" # Datatypes to test. dts="c z " #dts="c z" #dts_r="s d" dts_r="s d" # Operations to test. l3_ops="gemm syrk hemm trmm" #l3_ops="gemm" test_ops="${l3_ops}" test_ops_r="${l3_ops}" # Complex domain implementations to test. if [ ${sys} = "blis" ]; then #test_impls="openblas mkl 3mhw_blis 3m3_blis 3m2_blis 3m1_blis 4mhw_blis 4m1b_blis 4m1a_blis" test_impls="openblas 3mhw_blis 3m3_blis 3m2_blis 3m1_blis 4mhw_blis 4m1b_blis 4m1a_blis 1m_blis" elif [ ${sys} = "stampede" ]; then test_impls="openblas mkl asm_blis 3mhw_blis 3m3_blis 3m2_blis 3m1_blis 4mhw_blis 4m1b_blis 4m1a_blis 1m_blis" #test_impls="openblas mkl asm_blis" elif [ ${sys} = "stampede2" ]; then test_impls="openblas mkl 1m_blis" #test_impls="1m_blis" elif [ ${sys} = "lonestar" ]; then test_impls="asm_blis 4mhw_blis 4m1a_blis 1m_blis 3m1_blis" #test_impls="1m_blis 3m1_blis" #test_impls="4m1a_blis" #test_impls="mkl" #test_impls="openblas mkl asm_blis" elif [ ${sys} = "wahlberg" ]; then test_impls="openblas acml asm_blis 3mhw_blis 3m3_blis 3m2_blis 3m1_blis 4mhw_blis 4m1b_blis 4m1a_blis 1m_blis" test_impls="openblas acml asm_blis" elif [ ${sys} = "arm-softiron" ]; then test_impls="openblas 1m_blis mkl" elif [ ${sys} = "skx" ]; then test_impls="openblas 1m_blis mkl" fi # Real domain implementations to test. test_impls_r="openblas mkl asm_blis" cores_r="20 40" cores="20 40" # First perform real test cases. for nc in ${cores_r}; do for dt in ${dts_r}; do for im in ${test_impls_r}; do for op in ${test_ops_r}; do if [ ${nc} -gt 1 ]; then # Unset GOMP_CPU_AFFINITY for MKL when using mkl_intel_thread. if [ ${im} = "openblas" ]; then unset GOMP_CPU_AFFINITY else export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39" fi if [ ${nc} -eq 20 ]; then export BLIS_JC_NT=${jc_1_nt} export BLIS_IC_NT=${ic_1_nt} export BLIS_JR_NT=${jr_1_nt} export BLIS_IR_NT=${ir_1_nt} export OMP_NUM_THREADS=${nt_1} out_dir="${out_rootdir}/1socket" mkdir -p $out_rootdir/1socket elif [ ${nc} -eq 40 ]; then export BLIS_JC_NT=${jc_2_nt} export BLIS_IC_NT=${ic_2_nt} export BLIS_JR_NT=${jr_2_nt} export BLIS_IR_NT=${ir_2_nt} export OMP_NUM_THREADS=${nt_2} out_dir="${out_rootdir}/2sockets" mkdir -p $out_rootdir/2sockets fi th="mt" else export BLIS_NUM_THREADS=1 export OMP_NUM_THREADS=1 out_dir="${out_rootdir}/st" mkdir -p $out_rootdir/st th="st" fi # Construct the name of the test executable. exec_name="${exec_root}_${dt}${op}_${im}_${th}.x" # Construct the name of the output file. out_file="${out_dir}/${out_root}_${th}_${dt}${op}_${im}.m" echo "Running (nt = ${OMP_NUM_THREADS}) ./${exec_name} > ${out_file}" # Run executable. ./${exec_name} > ${out_file} sleep 1 done done done done # Now perform complex test cases. for nc in ${cores_r}; do for dt in ${dts}; do for im in ${test_impls}; do for op in ${test_ops}; do if [ ${nc} -gt 1 ]; then # Unset GOMP_CPU_AFFINITY for MKL when using mkl_intel_thread. if [ ${im} = "openblas" ]; then unset GOMP_CPU_AFFINITY else export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39" fi if [ ${nc} -eq 20 ]; then export BLIS_JC_NT=${jc_1_nt} export BLIS_IC_NT=${ic_1_nt} export BLIS_JR_NT=${jr_1_nt} export BLIS_IR_NT=${ir_1_nt} export OMP_NUM_THREADS=${nt_1} out_dir="${out_rootdir}/1socket" elif [ ${nc} -eq 40 ]; then export BLIS_JC_NT=${jc_2_nt} export BLIS_IC_NT=${ic_2_nt} export BLIS_JR_NT=${jr_2_nt} export BLIS_IR_NT=${ir_2_nt} export OMP_NUM_THREADS=${nt_2} out_dir="${out_rootdir}/2sockets" fi th="mt" else export BLIS_NUM_THREADS=1 export OMP_NUM_THREADS=1 out_dir="${out_rootdir}/st" th="st" fi # Construct the name of the test executable. exec_name="${exec_root}_${dt}${op}_${im}_${th}.x" # Construct the name of the output file. out_file="${out_dir}/${out_root}_${th}_${dt}${op}_${im}.m" echo "Running (nt = ${OMP_NUM_THREADS}) ./${exec_name} > ${out_file}" # Run executable. ./${exec_name} > ${out_file} sleep 1 done done done done blis-0.6.1/test/studies/skx/test_gemm.c000066400000000000000000000220101360743507500200500ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name of The University of Texas nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "blis.h" void zgemm3m_( f77_char*, f77_char*, f77_int*, f77_int*, f77_int*, dcomplex*, dcomplex*, f77_int*, dcomplex*, f77_int*, dcomplex*, dcomplex*, f77_int* ); //#define PRINT int main( int argc, char** argv ) { obj_t a, b, c; obj_t c_save; obj_t alpha, beta; dim_t m, n, k; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, n_input, k_input; ind_t ind; num_t dt; char dt_ch; int r, n_repeats; trans_t transa; trans_t transb; f77_char f77_transa; f77_char f77_transb; double dtime; double dtime_save; double gflops; //bli_init(); //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); n_repeats = 3; dt = DT; ind = IND; p_begin = P_BEGIN; p_end = P_END; p_inc = P_INC; m_input = -1; n_input = -1; k_input = -1; // Supress compiler warnings about unused variable 'ind'. ( void )ind; #if 0 cntx_t* cntx; ind_t ind_mod = ind; // A hack to use 3m1 as 1mpb (with 1m as 1mbp). if ( ind == BLIS_3M1 ) ind_mod = BLIS_1M; // Initialize a context for the current induced method and datatype. cntx = bli_gks_query_ind_cntx( ind_mod, dt ); // Set k to the kc blocksize for the current datatype. k_input = bli_cntx_get_blksz_def_dt( dt, BLIS_KC, cntx ); #elif 1 //k_input = 256; #endif // Choose the char corresponding to the requested datatype. if ( bli_is_float( dt ) ) dt_ch = 's'; else if ( bli_is_double( dt ) ) dt_ch = 'd'; else if ( bli_is_scomplex( dt ) ) dt_ch = 'c'; else dt_ch = 'z'; transa = BLIS_NO_TRANSPOSE; transb = BLIS_NO_TRANSPOSE; bli_param_map_blis_to_netlib_trans( transa, &f77_transa ); bli_param_map_blis_to_netlib_trans( transb, &f77_transb ); // Begin with initializing the last entry to zero so that // matlab allocates space for the entire array once up-front. for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ; #ifdef BLIS printf( "data_%s_%cgemm_%s_blis", THR_STR, dt_ch, STR ); #else printf( "data_%s_%cgemm_%s", THR_STR, dt_ch, STR ); #endif printf( "( %2lu, 1:4 ) = [ %4lu %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )0, ( unsigned long )0, ( unsigned long )0, 0.0 ); for ( p = p_begin; p <= p_end; p += p_inc ) { if ( m_input < 0 ) m = p / ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p / ( dim_t )abs(n_input); else n = ( dim_t ) n_input; if ( k_input < 0 ) k = p / ( dim_t )abs(k_input); else k = ( dim_t ) k_input; bli_obj_create( dt, 1, 1, 0, 0, &alpha ); bli_obj_create( dt, 1, 1, 0, 0, &beta ); bli_obj_create( dt, m, k, 0, 0, &a ); bli_obj_create( dt, k, n, 0, 0, &b ); bli_obj_create( dt, m, n, 0, 0, &c ); //bli_obj_create( dt, m, k, 2, 2*m, &a ); //bli_obj_create( dt, k, n, 2, 2*k, &b ); //bli_obj_create( dt, m, n, 2, 2*m, &c ); bli_obj_create( dt, m, n, 0, 0, &c_save ); bli_randm( &a ); bli_randm( &b ); bli_randm( &c ); bli_obj_set_conjtrans( transa, &a ); bli_obj_set_conjtrans( transb, &b ); bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_setsc( (1.0/1.0), 0.0, &beta ); bli_copym( &c, &c_save ); #ifdef BLIS bli_ind_disable_all_dt( dt ); bli_ind_enable_dt( ind, dt ); #endif dtime_save = DBL_MAX; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "b", &b, "%4.1f", "" ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS bli_gemm( &alpha, &a, &b, &beta, &c ); #else if ( bli_is_float( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); float* alphap = bli_obj_buffer( &alpha ); float* ap = bli_obj_buffer( &a ); float* bp = bli_obj_buffer( &b ); float* betap = bli_obj_buffer( &beta ); float* cp = bli_obj_buffer( &c ); sgemm_( &f77_transa, &f77_transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_double( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); double* alphap = bli_obj_buffer( &alpha ); double* ap = bli_obj_buffer( &a ); double* bp = bli_obj_buffer( &b ); double* betap = bli_obj_buffer( &beta ); double* cp = bli_obj_buffer( &c ); dgemm_( &f77_transa, &f77_transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_scomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); scomplex* alphap = bli_obj_buffer( &alpha ); scomplex* ap = bli_obj_buffer( &a ); scomplex* bp = bli_obj_buffer( &b ); scomplex* betap = bli_obj_buffer( &beta ); scomplex* cp = bli_obj_buffer( &c ); cgemm_( &f77_transa, &f77_transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_dcomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); dcomplex* alphap = bli_obj_buffer( &alpha ); dcomplex* ap = bli_obj_buffer( &a ); dcomplex* bp = bli_obj_buffer( &b ); dcomplex* betap = bli_obj_buffer( &beta ); dcomplex* cp = bli_obj_buffer( &c ); zgemm_( &f77_transa, //zgemm3m_( &f77_transa, &f77_transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } #endif #ifdef PRINT bli_printm( "c after", &c, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } gflops = ( 2.0 * m * k * n ) / ( dtime_save * 1.0e9 ); if ( bli_is_complex( dt ) ) gflops *= 4.0; #ifdef BLIS printf( "data_%s_%cgemm_%s_blis", THR_STR, dt_ch, STR ); #else printf( "data_%s_%cgemm_%s", THR_STR, dt_ch, STR ); #endif printf( "( %2lu, 1:4 ) = [ %4lu %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, ( unsigned long )k, ( unsigned long )n, gflops ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); } //bli_finalize(); return 0; } blis-0.6.1/test/studies/skx/test_hemm.c000066400000000000000000000210761360743507500200640ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name of The University of Texas nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "blis.h" void zgemm3m_( f77_char*, f77_char*, f77_int*, f77_int*, f77_int*, dcomplex*, dcomplex*, f77_int*, dcomplex*, f77_int*, dcomplex*, dcomplex*, f77_int* ); //#define PRINT int main( int argc, char** argv ) { obj_t a, b, c; obj_t c_save; obj_t alpha, beta; dim_t m, n; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, n_input; ind_t ind; num_t dt; char dt_ch; int r, n_repeats; side_t side; uplo_t uploa; f77_char f77_side; f77_char f77_uploa; double dtime; double dtime_save; double gflops; //bli_init(); //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); n_repeats = 3; dt = DT; ind = IND; p_begin = P_BEGIN; p_end = P_END; p_inc = P_INC; m_input = -1; n_input = -1; // Supress compiler warnings about unused variable 'ind'. ( void )ind; #if 0 cntx_t* cntx; ind_t ind_mod = ind; // A hack to use 3m1 as 1mpb (with 1m as 1mbp). if ( ind == BLIS_3M1 ) ind_mod = BLIS_1M; // Initialize a context for the current induced method and datatype. cntx = bli_gks_query_ind_cntx( ind_mod, dt ); // Set k to the kc blocksize for the current datatype. k_input = bli_cntx_get_blksz_def_dt( dt, BLIS_KC, cntx ); #elif 1 //k_input = 256; #endif // Choose the char corresponding to the requested datatype. if ( bli_is_float( dt ) ) dt_ch = 's'; else if ( bli_is_double( dt ) ) dt_ch = 'd'; else if ( bli_is_scomplex( dt ) ) dt_ch = 'c'; else dt_ch = 'z'; side = BLIS_LEFT; uploa = BLIS_LOWER; bli_param_map_blis_to_netlib_side( side, &f77_side ); bli_param_map_blis_to_netlib_uplo( uploa, &f77_uploa ); // Begin with initializing the last entry to zero so that // matlab allocates space for the entire array once up-front. for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ; #ifdef BLIS printf( "data_%s_%chemm_%s_blis", THR_STR, dt_ch, STR ); #else printf( "data_%s_%chemm_%s", THR_STR, dt_ch, STR ); #endif printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )0, ( unsigned long )0, 0.0 ); for ( p = p_begin; p <= p_end; p += p_inc ) { if ( m_input < 0 ) m = p / ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p / ( dim_t )abs(n_input); else n = ( dim_t ) n_input; bli_obj_create( dt, 1, 1, 0, 0, &alpha ); bli_obj_create( dt, 1, 1, 0, 0, &beta ); if (bli_is_left(side)) bli_obj_create( dt, m, m, 0, 0, &a ); else bli_obj_create( dt, n, n, 0, 0, &a ); bli_obj_create( dt, m, n, 0, 0, &b ); bli_obj_create( dt, m, n, 0, 0, &c ); bli_obj_create( dt, m, n, 0, 0, &c_save ); bli_randm( &a ); bli_randm( &b ); bli_randm( &c ); bli_obj_set_struc( BLIS_HERMITIAN, &a ); bli_obj_set_uplo( uploa, &a ); bli_mkherm( &a ); bli_mktrim( &a ); bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_setsc( (1.0/1.0), 0.0, &beta ); bli_copym( &c, &c_save ); #ifdef BLIS bli_ind_disable_all_dt( dt ); bli_ind_enable_dt( ind, dt ); #endif dtime_save = DBL_MAX; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "b", &b, "%4.1f", "" ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS bli_hemm( side, &alpha, &a, &b, &beta, &c ); #else if ( bli_is_float( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); float* alphap = bli_obj_buffer( &alpha ); float* ap = bli_obj_buffer( &a ); float* bp = bli_obj_buffer( &b ); float* betap = bli_obj_buffer( &beta ); float* cp = bli_obj_buffer( &c ); ssymm_( &f77_side, &f77_uploa, &mm, &nn, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_double( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); double* alphap = bli_obj_buffer( &alpha ); double* ap = bli_obj_buffer( &a ); double* bp = bli_obj_buffer( &b ); double* betap = bli_obj_buffer( &beta ); double* cp = bli_obj_buffer( &c ); dsymm_( &f77_side, &f77_uploa, &mm, &nn, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_scomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); scomplex* alphap = bli_obj_buffer( &alpha ); scomplex* ap = bli_obj_buffer( &a ); scomplex* bp = bli_obj_buffer( &b ); scomplex* betap = bli_obj_buffer( &beta ); scomplex* cp = bli_obj_buffer( &c ); chemm_( &f77_side, &f77_uploa, &mm, &nn, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_dcomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); dcomplex* alphap = bli_obj_buffer( &alpha ); dcomplex* ap = bli_obj_buffer( &a ); dcomplex* bp = bli_obj_buffer( &b ); dcomplex* betap = bli_obj_buffer( &beta ); dcomplex* cp = bli_obj_buffer( &c ); zhemm_( &f77_side, &f77_uploa, &mm, &nn, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } #endif #ifdef PRINT bli_printm( "c after", &c, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } if ( bli_is_left(side) ) gflops = ( 2.0 * m * m * n ) / ( dtime_save * 1.0e9 ); else gflops = ( 2.0 * m * n * n ) / ( dtime_save * 1.0e9 ); if ( bli_is_complex( dt ) ) gflops *= 4.0; #ifdef BLIS printf( "data_%s_%chemm_%s_blis", THR_STR, dt_ch, STR ); #else printf( "data_%s_%chemm_%s", THR_STR, dt_ch, STR ); #endif printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, ( unsigned long )n, gflops ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); } //bli_finalize(); return 0; } blis-0.6.1/test/studies/skx/test_syrk.c000066400000000000000000000176141360743507500201310ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name of The University of Texas nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "blis.h" //#define PRINT int main( int argc, char** argv ) { obj_t a, c; obj_t c_save; obj_t alpha, beta; dim_t m, k; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, k_input; ind_t ind; num_t dt; char dt_ch; int r, n_repeats; trans_t transa; uplo_t uploc; f77_char f77_transa; f77_char f77_uploc; double dtime; double dtime_save; double gflops; //bli_init(); //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); n_repeats = 3; dt = DT; ind = IND; p_begin = P_BEGIN; p_end = P_END; p_inc = P_INC; m_input = -1; k_input = -1; // Supress compiler warnings about unused variable 'ind'. ( void )ind; #if 0 cntx_t* cntx; ind_t ind_mod = ind; // A hack to use 3m1 as 1mpb (with 1m as 1mbp). if ( ind == BLIS_3M1 ) ind_mod = BLIS_1M; // Initialize a context for the current induced method and datatype. cntx = bli_gks_query_ind_cntx( ind_mod, dt ); // Set k to the kc blocksize for the current datatype. k_input = bli_cntx_get_blksz_def_dt( dt, BLIS_KC, cntx ); #elif 1 //k_input = 256; #endif // Choose the char corresponding to the requested datatype. if ( bli_is_float( dt ) ) dt_ch = 's'; else if ( bli_is_double( dt ) ) dt_ch = 'd'; else if ( bli_is_scomplex( dt ) ) dt_ch = 'c'; else dt_ch = 'z'; transa = BLIS_NO_TRANSPOSE; uploc = BLIS_LOWER; bli_param_map_blis_to_netlib_trans( transa, &f77_transa ); bli_param_map_blis_to_netlib_uplo ( uploc, &f77_uploc ); // Begin with initializing the last entry to zero so that // matlab allocates space for the entire array once up-front. for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ; #ifdef BLIS printf( "data_%s_%csyrk_%s_blis", THR_STR, dt_ch, STR ); #else printf( "data_%s_%csyrk_%s", THR_STR, dt_ch, STR ); #endif printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )0, ( unsigned long )0, 0.0 ); for ( p = p_begin; p <= p_end; p += p_inc ) { if ( m_input < 0 ) m = p / ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( k_input < 0 ) k = p / ( dim_t )abs(k_input); else k = ( dim_t ) k_input; bli_obj_create( dt, 1, 1, 0, 0, &alpha ); bli_obj_create( dt, 1, 1, 0, 0, &beta ); bli_obj_create( dt, m, k, 0, 0, &a ); bli_obj_create( dt, m, m, 0, 0, &c ); //bli_obj_create( dt, m, k, 2, 2*m, &a ); //bli_obj_create( dt, k, n, 2, 2*k, &b ); //bli_obj_create( dt, m, n, 2, 2*m, &c ); bli_obj_create( dt, m, m, 0, 0, &c_save ); bli_randm( &a ); bli_obj_set_struc( BLIS_SYMMETRIC, &c ); bli_obj_set_uplo ( uploc, &c ); bli_randm( &c ); bli_obj_set_conjtrans( transa, &a ); bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_setsc( (1.0/1.0), 0.0, &beta ); bli_copym( &c, &c_save ); #ifdef BLIS bli_ind_disable_all_dt( dt ); bli_ind_enable_dt( ind, dt ); #endif dtime_save = DBL_MAX; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS bli_syrk( &alpha, &a, &beta, &c ); #else if ( bli_is_float( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); float* alphap = bli_obj_buffer( &alpha ); float* ap = bli_obj_buffer( &a ); float* betap = bli_obj_buffer( &beta ); float* cp = bli_obj_buffer( &c ); ssyrk_( &f77_uploc, &f77_transa, &mm, &kk, alphap, ap, &lda, betap, cp, &ldc ); } else if ( bli_is_double( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); double* alphap = bli_obj_buffer( &alpha ); double* ap = bli_obj_buffer( &a ); double* betap = bli_obj_buffer( &beta ); double* cp = bli_obj_buffer( &c ); dsyrk_( &f77_uploc, &f77_transa, &mm, &kk, alphap, ap, &lda, betap, cp, &ldc ); } else if ( bli_is_scomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); scomplex* alphap = bli_obj_buffer( &alpha ); scomplex* ap = bli_obj_buffer( &a ); scomplex* betap = bli_obj_buffer( &beta ); scomplex* cp = bli_obj_buffer( &c ); csyrk_( &f77_uploc, &f77_transa, &mm, &kk, alphap, ap, &lda, betap, cp, &ldc ); } else if ( bli_is_dcomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); dcomplex* alphap = bli_obj_buffer( &alpha ); dcomplex* ap = bli_obj_buffer( &a ); dcomplex* betap = bli_obj_buffer( &beta ); dcomplex* cp = bli_obj_buffer( &c ); zsyrk_( &f77_uploc, &f77_transa, &mm, &kk, alphap, ap, &lda, betap, cp, &ldc ); } #endif #ifdef PRINT bli_printm( "c after", &c, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } gflops = ( 1.0 * m * m * k ) / ( dtime_save * 1.0e9 ); if ( bli_is_complex( dt ) ) gflops *= 4.0; #ifdef BLIS printf( "data_%s_%csyrk_%s_blis", THR_STR, dt_ch, STR ); #else printf( "data_%s_%csyrk_%s", THR_STR, dt_ch, STR ); #endif printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, ( unsigned long )k, gflops ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &c ); bli_obj_free( &c_save ); } //bli_finalize(); return 0; } blis-0.6.1/test/studies/skx/test_trmm.c000066400000000000000000000176341360743507500201220ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name of The University of Texas nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "blis.h" //#define PRINT int main( int argc, char** argv ) { obj_t a, c; obj_t c_save; obj_t alpha; dim_t m, n; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, n_input; ind_t ind; num_t dt; char dt_ch; int r, n_repeats; side_t side; uplo_t uploa; trans_t transa; diag_t diaga; f77_char f77_side; f77_char f77_uploa; f77_char f77_transa; f77_char f77_diaga; double dtime; double dtime_save; double gflops; //bli_init(); //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); n_repeats = 3; dt = DT; ind = IND; p_begin = P_BEGIN; p_end = P_END; p_inc = P_INC; m_input = -1; n_input = -1; // Supress compiler warnings about unused variable 'ind'. ( void )ind; #if 0 cntx_t* cntx; ind_t ind_mod = ind; // A hack to use 3m1 as 1mpb (with 1m as 1mbp). if ( ind == BLIS_3M1 ) ind_mod = BLIS_1M; // Initialize a context for the current induced method and datatype. cntx = bli_gks_query_ind_cntx( ind_mod, dt ); // Set k to the kc blocksize for the current datatype. k_input = bli_cntx_get_blksz_def_dt( dt, BLIS_KC, cntx ); #elif 1 //k_input = 256; #endif // Choose the char corresponding to the requested datatype. if ( bli_is_float( dt ) ) dt_ch = 's'; else if ( bli_is_double( dt ) ) dt_ch = 'd'; else if ( bli_is_scomplex( dt ) ) dt_ch = 'c'; else dt_ch = 'z'; side = BLIS_LEFT; uploa = BLIS_LOWER; transa = BLIS_NO_TRANSPOSE; diaga = BLIS_NONUNIT_DIAG; bli_param_map_blis_to_netlib_side( side, &f77_side ); bli_param_map_blis_to_netlib_uplo( uploa, &f77_uploa ); bli_param_map_blis_to_netlib_trans( transa, &f77_transa ); bli_param_map_blis_to_netlib_diag( diaga, &f77_diaga ); // Begin with initializing the last entry to zero so that // matlab allocates space for the entire array once up-front. for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ; #ifdef BLIS printf( "data_%s_%ctrmm_%s_blis", THR_STR, dt_ch, STR ); #else printf( "data_%s_%ctrmm_%s", THR_STR, dt_ch, STR ); #endif printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )0, ( unsigned long )0, 0.0 ); for ( p = p_begin; p <= p_end; p += p_inc ) { if ( m_input < 0 ) m = p / ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p / ( dim_t )abs(n_input); else n = ( dim_t ) n_input; bli_obj_create( dt, 1, 1, 0, 0, &alpha ); if ( bli_is_left( side ) ) bli_obj_create( dt, m, m, 0, 0, &a ); else bli_obj_create( dt, n, n, 0, 0, &a ); bli_obj_create( dt, m, n, 0, 0, &c ); bli_obj_create( dt, m, n, 0, 0, &c_save ); bli_randm( &a ); bli_randm( &c ); bli_obj_set_struc( BLIS_TRIANGULAR, &a ); bli_obj_set_uplo( uploa, &a ); bli_obj_set_conjtrans( transa, &a ); bli_obj_set_diag( diaga, &a ); bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_copym( &c, &c_save ); #ifdef BLIS bli_ind_disable_all_dt( dt ); bli_ind_enable_dt( ind, dt ); #endif dtime_save = DBL_MAX; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "b", &b, "%4.1f", "" ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS bli_trmm( side, &alpha, &a, &c ); #else if ( bli_is_float( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); float* alphap = bli_obj_buffer( &alpha ); float* ap = bli_obj_buffer( &a ); float* cp = bli_obj_buffer( &c ); strmm_( &f77_side, &f77_uploa, &f77_transa, &f77_diaga, &mm, &nn, alphap, ap, &lda, cp, &ldc ); } else if ( bli_is_double( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); double* alphap = bli_obj_buffer( &alpha ); double* ap = bli_obj_buffer( &a ); double* cp = bli_obj_buffer( &c ); dtrmm_( &f77_side, &f77_uploa, &f77_transa, &f77_diaga, &mm, &nn, alphap, ap, &lda, cp, &ldc ); } else if ( bli_is_scomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); scomplex* alphap = bli_obj_buffer( &alpha ); scomplex* ap = bli_obj_buffer( &a ); scomplex* cp = bli_obj_buffer( &c ); ctrmm_( &f77_side, &f77_uploa, &f77_transa, &f77_diaga, &mm, &nn, alphap, ap, &lda, cp, &ldc ); } else if ( bli_is_dcomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); dcomplex* alphap = bli_obj_buffer( &alpha ); dcomplex* ap = bli_obj_buffer( &a ); dcomplex* cp = bli_obj_buffer( &c ); ztrmm_( &f77_side, &f77_uploa, &f77_transa, &f77_diaga, &mm, &nn, alphap, ap, &lda, cp, &ldc ); } #endif #ifdef PRINT bli_printm( "c after", &c, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } if ( bli_is_left(side) ) gflops = ( 1.0 * m * m * n ) / ( dtime_save * 1.0e9 ); else gflops = ( 1.0 * m * n * n ) / ( dtime_save * 1.0e9 ); if ( bli_is_complex( dt ) ) gflops *= 4.0; #ifdef BLIS printf( "data_%s_%ctrmm_%s_blis", THR_STR, dt_ch, STR ); #else printf( "data_%s_%ctrmm_%s", THR_STR, dt_ch, STR ); #endif printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, ( unsigned long )n, gflops ); bli_obj_free( &alpha ); bli_obj_free( &a ); bli_obj_free( &c ); bli_obj_free( &c_save ); } //bli_finalize(); return 0; } blis-0.6.1/test/studies/thunderx2/000077500000000000000000000000001360743507500170435ustar00rootroot00000000000000blis-0.6.1/test/studies/thunderx2/Makefile000066400000000000000000000476461360743507500205240ustar00rootroot00000000000000#!/bin/bash # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # # Makefile # # Field G. Van Zee # # Makefile for standalone BLIS test drivers. # # # --- Makefile PHONY target definitions ---------------------------------------- # .PHONY: all \ blis-gemm-st openblas-gemm-st mkl-gemm-st acml-gemm-st \ blis-gemm-mt openblas-gemm-mt mkl-gemm-mt acml-gemm-mt \ blis-syrk-st openblas-syrk-st mkl-syrk-st armpl-syrk-st \ blis-syrk-mt openblas-syrk-mt mkl-syrk-mt armpl-syrk-mt \ blis-hemm-st openblas-hemm-st mkl-hemm-st armpl-hemm-st \ blis-hemm-mt openblas-hemm-mt mkl-hemm-mt armpl-hemm-mt \ blis-trmm-st openblas-trmm-st mkl-trmm-st armpl-trmm-st \ blis-trmm-mt openblas-trmm-mt mkl-trmm-mt armpl-trmm-mt \ clean cleanx # Comments: # - DIST_PATH is assumed to not exist if BLIS_INSTALL_PATH is given. # - We must use recursively expanded assignment for LIB_PATH and INC_PATH in # the second case because CONFIG_NAME is not yet set. ifneq ($(strip $(BLIS_INSTALL_PATH)),) LIB_PATH := $(BLIS_INSTALL_PATH)/lib INC_PATH := $(BLIS_INSTALL_PATH)/include/blis SHARE_PATH := $(BLIS_INSTALL_PATH)/share/blis else DIST_PATH := ../../.. LIB_PATH = ../../../lib/$(CONFIG_NAME) INC_PATH = ../../../include/$(CONFIG_NAME) SHARE_PATH := ../../.. endif # # --- Include common makefile definitions -------------------------------------- # # Include the common makefile fragment. -include $(SHARE_PATH)/common.mk # # --- BLAS and LAPACK implementations ------------------------------------------ # # BLIS library and header path. This is simply wherever it was installed. #BLIS_LIB_PATH := $(INSTALL_PREFIX)/lib BLIS_INC_PATH := $(INSTALL_PREFIX)/include/blis # BLIS library. #BLIS_LIB := $(BLIS_LIB_PATH)/libblis.a # BLAS library path(s). This is where the BLAS libraries reside. HOME_LIB_PATH := $(HOME)/OpenBLAS/lib MKL_LIB_PATH := ${MKLROOT}/lib/intel64 ARMPL_LIB_PATH := /opt/arm/armpl-18.4.0_ThunderX2CN99_Ubuntu-16.04_gcc_7.1.0_aarch64-linux/lib/ ACML_LIB_PATH := $(HOME_LIB_PATH)/acml/5.3.1/gfortran64_fma4_int64/lib ACMLP_LIB_PATH := $(HOME_LIB_PATH)/acml/5.3.1/gfortran64_fma4_mp_int64/lib # OpenBLAS OPENBLAS_LIB := $(HOME_LIB_PATH)/libopenblas.a OPENBLASP_LIB := $(HOME_LIB_PATH)/libopenblas.a # ATLAS ATLAS_LIB := $(HOME_LIB_PATH)/libf77blas.a \ $(HOME_LIB_PATH)/libatlas.a # For ARMPL ARMPL_LIB := $(ARMPL_LIB_PATH)/libarmpl_lp64.a ARMPLP_LIB := $(ARMPL_LIB_PATH)/libarmpl_lp64_mp.a # MKL MKL_LIB := -L$(MKL_LIB_PATH) \ -lmkl_intel_lp64 \ -lmkl_core \ -lmkl_sequential \ -lpthread -lm -ldl #MKLP_LIB := -L$(MKL_LIB_PATH) \ # -lmkl_intel_thread \ # -lmkl_core \ # -lmkl_intel_ilp64 \ # -L$(ICC_LIB_PATH) \ # -liomp5 MKLP_LIB := -L$(MKL_LIB_PATH) \ -lmkl_intel_lp64 \ -lmkl_core \ -lmkl_gnu_thread \ -lpthread -lm -ldl -fopenmp #-L$(ICC_LIB_PATH) \ #-lgomp # ACML ACML_LIB := -L$(ACML_LIB_PATH) \ -lgfortran -lm -lrt -ldl -lacml ACMLP_LIB := -L$(ACMLP_LIB_PATH) \ -lgfortran -lm -lrt -ldl -lacml_mp # # --- General build definitions ------------------------------------------------ # TEST_SRC_PATH := . TEST_OBJ_PATH := . # Gather all local object files. TEST_OBJS := $(sort $(patsubst $(TEST_SRC_PATH)/%.c, \ $(TEST_OBJ_PATH)/%.o, \ $(wildcard $(TEST_SRC_PATH)/*.c))) # Use the "framework" CFLAGS for the configuration family. CFLAGS := $(call get-frame-cflags-for,$(CONFIG_NAME)) # Add local header paths to CFLAGS. CFLAGS += -g -I$(TEST_SRC_PATH) # Locate the libblis library to which we will link. #LIBBLIS_LINK := $(LIB_PATH)/$(LIBBLIS_L) # Datatype DT_S := -DDT=BLIS_FLOAT DT_D := -DDT=BLIS_DOUBLE DT_C := -DDT=BLIS_SCOMPLEX DT_Z := -DDT=BLIS_DCOMPLEX # Which library? BLI_DEF := -DBLIS BLA_DEF := -DBLAS # Complex implementation type D3MHW := -DIND=BLIS_3MH D3M1 := -DIND=BLIS_3M1 D4MHW := -DIND=BLIS_4MH D4M1B := -DIND=BLIS_4M1B D4M1A := -DIND=BLIS_4M1A D1M := -DIND=BLIS_1M DNAT := -DIND=BLIS_NAT # Implementation string STR_3MHW := -DSTR=\"3mhw\" STR_3M1 := -DSTR=\"3m1\" STR_4MHW := -DSTR=\"4mhw\" STR_4M1B := -DSTR=\"4m1b\" STR_4M1A := -DSTR=\"4m1a\" STR_1M := -DSTR=\"1m\" STR_NAT := -DSTR=\"asm\" STR_OBL := -DSTR=\"openblas\" STR_MKL := -DSTR=\"mkl\" STR_ACML := -DSTR=\"acml\" STR_ARMPL:= -DSTR=\"armpl\" # Single or multithreaded string STR_ST := -DTHR_STR=\"st\" STR_MT := -DTHR_STR=\"mt\" # Problem size specification PDEF_ST := -DP_BEGIN=40 \ -DP_END=2000 \ -DP_INC=40 PDEF_MT := -DP_BEGIN=200 \ -DP_END=10000 \ -DP_INC=200 # # --- Targets/rules ------------------------------------------------------------ # all-st: blis-st openblas-st armpl-st all-mt: blis-mt openblas-mt armpl-mt blis-st: blis-gemm-st blis-syrk-st blis-hemm-st blis-trmm-st blis-mt: blis-gemm-mt blis-syrk-mt blis-hemm-mt blis-trmm-mt openblas-st: openblas-gemm-st openblas-syrk-st openblas-hemm-st openblas-trmm-st openblas-mt: openblas-gemm-mt openblas-syrk-mt openblas-hemm-mt openblas-trmm-mt mkl-st: mkl-gemm-st mkl-syrk-st mkl-hemm-st mkl-trmm-st mkl-mt: mkl-gemm-mt mkl-syrk-mt mkl-hemm-mt mkl-trmm-mt armpl-st: armpl-gemm-st armpl-syrk-st armpl-hemm-st armpl-trmm-st armpl-mt: armpl-gemm-mt armpl-syrk-mt armpl-hemm-mt armpl-trmm-mt blis-gemm-st: \ test_sgemm_asm_blis_st.x \ test_dgemm_asm_blis_st.x \ \ test_cgemm_1m_blis_st.x \ test_zgemm_1m_blis_st.x \ test_cgemm_asm_blis_st.x \ test_zgemm_asm_blis_st.x blis-syrk-st: \ test_ssyrk_asm_blis_st.x \ test_dsyrk_asm_blis_st.x \ test_csyrk_1m_blis_st.x \ test_zsyrk_1m_blis_st.x blis-syrk-mt: \ test_ssyrk_asm_blis_mt.x \ test_dsyrk_asm_blis_mt.x \ test_csyrk_1m_blis_mt.x \ test_zsyrk_1m_blis_mt.x blis-hemm-st: \ test_shemm_asm_blis_st.x \ test_dhemm_asm_blis_st.x \ test_chemm_1m_blis_st.x \ test_zhemm_1m_blis_st.x blis-hemm-mt: \ test_shemm_asm_blis_mt.x \ test_dhemm_asm_blis_mt.x \ test_chemm_1m_blis_mt.x \ test_zhemm_1m_blis_mt.x blis-trmm-st: \ test_strmm_asm_blis_st.x \ test_dtrmm_asm_blis_st.x \ test_ctrmm_1m_blis_st.x \ test_ztrmm_1m_blis_st.x blis-trmm-mt: \ test_strmm_asm_blis_mt.x \ test_dtrmm_asm_blis_mt.x \ test_ctrmm_1m_blis_mt.x \ test_ztrmm_1m_blis_mt.x blis-gemm-mt: \ test_sgemm_asm_blis_mt.x \ test_dgemm_asm_blis_mt.x \ \ test_cgemm_1m_blis_mt.x \ test_zgemm_1m_blis_mt.x \ test_cgemm_asm_blis_mt.x \ test_zgemm_asm_blis_mt.x openblas-gemm-st: \ test_sgemm_openblas_st.x \ test_dgemm_openblas_st.x \ test_cgemm_openblas_st.x \ test_zgemm_openblas_st.x openblas-gemm-mt: \ test_sgemm_openblas_mt.x \ test_dgemm_openblas_mt.x \ test_cgemm_openblas_mt.x \ test_zgemm_openblas_mt.x openblas-syrk-st: \ test_ssyrk_openblas_st.x \ test_dsyrk_openblas_st.x \ test_csyrk_openblas_st.x \ test_zsyrk_openblas_st.x openblas-syrk-mt: \ test_ssyrk_openblas_mt.x \ test_dsyrk_openblas_mt.x \ test_csyrk_openblas_mt.x \ test_zsyrk_openblas_mt.x openblas-hemm-st: \ test_shemm_openblas_st.x \ test_dhemm_openblas_st.x \ test_chemm_openblas_st.x \ test_zhemm_openblas_st.x openblas-hemm-mt: \ test_shemm_openblas_mt.x \ test_dhemm_openblas_mt.x \ test_chemm_openblas_mt.x \ test_zhemm_openblas_mt.x openblas-trmm-st: \ test_strmm_openblas_st.x \ test_dtrmm_openblas_st.x \ test_ctrmm_openblas_st.x \ test_ztrmm_openblas_st.x openblas-trmm-mt: \ test_strmm_openblas_mt.x \ test_dtrmm_openblas_mt.x \ test_ctrmm_openblas_mt.x \ test_ztrmm_openblas_mt.x mkl-gemm-st: \ test_sgemm_mkl_st.x \ test_dgemm_mkl_st.x \ test_cgemm_mkl_st.x \ test_zgemm_mkl_st.x mkl-gemm-mt: \ test_sgemm_mkl_mt.x \ test_dgemm_mkl_mt.x \ test_cgemm_mkl_mt.x \ test_zgemm_mkl_mt.x mkl-syrk-st: \ test_ssyrk_mkl_st.x \ test_dsyrk_mkl_st.x \ test_csyrk_mkl_st.x \ test_zsyrk_mkl_st.x mkl-syrk-mt: \ test_ssyrk_mkl_mt.x \ test_dsyrk_mkl_mt.x \ test_csyrk_mkl_mt.x \ test_zsyrk_mkl_mt.x mkl-hemm-st: \ test_shemm_mkl_st.x \ test_dhemm_mkl_st.x \ test_chemm_mkl_st.x \ test_zhemm_mkl_st.x mkl-hemm-mt: \ test_shemm_mkl_mt.x \ test_dhemm_mkl_mt.x \ test_chemm_mkl_mt.x \ test_zhemm_mkl_mt.x mkl-trmm-st: \ test_strmm_mkl_st.x \ test_dtrmm_mkl_st.x \ test_ctrmm_mkl_st.x \ test_ztrmm_mkl_st.x mkl-trmm-mt: \ test_strmm_mkl_mt.x \ test_dtrmm_mkl_mt.x \ test_ctrmm_mkl_mt.x \ test_ztrmm_mkl_mt.x armpl-gemm-st: \ test_sgemm_armpl_st.x \ test_dgemm_armpl_st.x \ test_cgemm_armpl_st.x \ test_zgemm_armpl_st.x armpl-gemm-mt: \ test_sgemm_armpl_mt.x \ test_dgemm_armpl_mt.x \ test_cgemm_armpl_mt.x \ test_zgemm_armpl_mt.x armpl-syrk-st: \ test_ssyrk_armpl_st.x \ test_dsyrk_armpl_st.x \ test_csyrk_armpl_st.x \ test_zsyrk_armpl_st.x armpl-syrk-mt: \ test_ssyrk_armpl_mt.x \ test_dsyrk_armpl_mt.x \ test_csyrk_armpl_mt.x \ test_zsyrk_armpl_mt.x armpl-hemm-st: \ test_shemm_armpl_st.x \ test_dhemm_armpl_st.x \ test_chemm_armpl_st.x \ test_zhemm_armpl_st.x armpl-hemm-mt: \ test_shemm_armpl_mt.x \ test_dhemm_armpl_mt.x \ test_chemm_armpl_mt.x \ test_zhemm_armpl_mt.x armpl-trmm-st: \ test_strmm_armpl_st.x \ test_dtrmm_armpl_st.x \ test_ctrmm_armpl_st.x \ test_ztrmm_armpl_st.x armpl-trmm-mt: \ test_strmm_armpl_mt.x \ test_dtrmm_armpl_mt.x \ test_ctrmm_armpl_mt.x \ test_ztrmm_armpl_mt.x # --Object file rules -- $(TEST_OBJ_PATH)/%.o: $(TEST_SRC_PATH)/%.c $(CC) $(CFLAGS) -c $< -o $@ # blis 3mhw test_z%_3mhw_blis_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_Z) $(BLI_DEF) $(D3MHW) $(STR_3MHW) $(STR_ST) -c $< -o $@ test_c%_3mhw_blis_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_C) $(BLI_DEF) $(D3MHW) $(STR_3MHW) $(STR_ST) -c $< -o $@ test_z%_3mhw_blis_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_Z) $(BLI_DEF) $(D3MHW) $(STR_3MHW) $(STR_MT) -c $< -o $@ test_c%_3mhw_blis_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_C) $(BLI_DEF) $(D3MHW) $(STR_3MHW) $(STR_MT) -c $< -o $@ # blis 3m1 test_z%_3m1_blis_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_Z) $(BLI_DEF) $(D3M1) $(STR_3M1) $(STR_ST) -c $< -o $@ test_c%_3m1_blis_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_C) $(BLI_DEF) $(D3M1) $(STR_3M1) $(STR_ST) -c $< -o $@ test_z%_3m1_blis_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_Z) $(BLI_DEF) $(D3M1) $(STR_3M1) $(STR_MT) -c $< -o $@ test_c%_3m1_blis_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_C) $(BLI_DEF) $(D3M1) $(STR_3M1) $(STR_MT) -c $< -o $@ # blis 4mhw test_z%_4mhw_blis_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_Z) $(BLI_DEF) $(D4MHW) $(STR_4MHW) $(STR_ST) -c $< -o $@ test_c%_4mhw_blis_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_C) $(BLI_DEF) $(D4MHW) $(STR_4MHW) $(STR_ST) -c $< -o $@ test_z%_4mhw_blis_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_Z) $(BLI_DEF) $(D4MHW) $(STR_4MHW) $(STR_MT) -c $< -o $@ test_c%_4mhw_blis_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_C) $(BLI_DEF) $(D4MHW) $(STR_4MHW) $(STR_MT) -c $< -o $@ # blis 4m1b test_z%_4m1b_blis_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_Z) $(BLI_DEF) $(D4M1B) $(STR_4M1B) $(STR_ST) -c $< -o $@ test_c%_4m1b_blis_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_C) $(BLI_DEF) $(D4M1B) $(STR_4M1B) $(STR_ST) -c $< -o $@ test_z%_4m1b_blis_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_Z) $(BLI_DEF) $(D4M1B) $(STR_4M1B) $(STR_MT) -c $< -o $@ test_c%_4m1b_blis_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_C) $(BLI_DEF) $(D4M1B) $(STR_4M1B) $(STR_MT) -c $< -o $@ # blis 4m1a test_z%_4m1a_blis_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_Z) $(BLI_DEF) $(D4M1A) $(STR_4M1A) $(STR_ST) -c $< -o $@ test_c%_4m1a_blis_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_C) $(BLI_DEF) $(D4M1A) $(STR_4M1A) $(STR_ST) -c $< -o $@ test_z%_4m1a_blis_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_Z) $(BLI_DEF) $(D4M1A) $(STR_4M1A) $(STR_MT) -c $< -o $@ test_c%_4m1a_blis_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_C) $(BLI_DEF) $(D4M1A) $(STR_4M1A) $(STR_MT) -c $< -o $@ # blis 1m test_z%_1m_blis_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_Z) $(BLI_DEF) $(D1M) $(STR_1M) $(STR_ST) -c $< -o $@ test_c%_1m_blis_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_C) $(BLI_DEF) $(D1M) $(STR_1M) $(STR_ST) -c $< -o $@ test_z%_1m_blis_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_Z) $(BLI_DEF) $(D1M) $(STR_1M) $(STR_MT) -c $< -o $@ test_c%_1m_blis_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_C) $(BLI_DEF) $(D1M) $(STR_1M) $(STR_MT) -c $< -o $@ # blis asm test_d%_asm_blis_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_D) $(BLI_DEF) $(DNAT) $(STR_NAT) $(STR_ST) -c $< -o $@ test_s%_asm_blis_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_S) $(BLI_DEF) $(DNAT) $(STR_NAT) $(STR_ST) -c $< -o $@ test_z%_asm_blis_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_Z) $(BLI_DEF) $(DNAT) $(STR_NAT) $(STR_ST) -c $< -o $@ test_c%_asm_blis_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_C) $(BLI_DEF) $(DNAT) $(STR_NAT) $(STR_ST) -c $< -o $@ test_d%_asm_blis_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_D) $(BLI_DEF) $(DNAT) $(STR_NAT) $(STR_MT) -c $< -o $@ test_s%_asm_blis_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_S) $(BLI_DEF) $(DNAT) $(STR_NAT) $(STR_MT) -c $< -o $@ test_z%_asm_blis_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_Z) $(BLI_DEF) $(DNAT) $(STR_NAT) $(STR_MT) -c $< -o $@ test_c%_asm_blis_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_C) $(BLI_DEF) $(DNAT) $(STR_NAT) $(STR_MT) -c $< -o $@ # openblas test_d%_openblas_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_D) $(BLA_DEF) $(DNAT) $(STR_OBL) $(STR_ST) -c $< -o $@ test_s%_openblas_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_S) $(BLA_DEF) $(DNAT) $(STR_OBL) $(STR_ST) -c $< -o $@ test_z%_openblas_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_Z) $(BLA_DEF) $(DNAT) $(STR_OBL) $(STR_ST) -c $< -o $@ test_c%_openblas_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_C) $(BLA_DEF) $(DNAT) $(STR_OBL) $(STR_ST) -c $< -o $@ test_d%_openblas_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_D) $(BLA_DEF) $(DNAT) $(STR_OBL) $(STR_MT) -c $< -o $@ test_s%_openblas_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_S) $(BLA_DEF) $(DNAT) $(STR_OBL) $(STR_MT) -c $< -o $@ test_z%_openblas_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_Z) $(BLA_DEF) $(DNAT) $(STR_OBL) $(STR_MT) -c $< -o $@ test_c%_openblas_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_C) $(BLA_DEF) $(DNAT) $(STR_OBL) $(STR_MT) -c $< -o $@ # mkl test_d%_mkl_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_D) $(BLA_DEF) $(DNAT) $(STR_MKL) $(STR_ST) -c $< -o $@ test_s%_mkl_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_S) $(BLA_DEF) $(DNAT) $(STR_MKL) $(STR_ST) -c $< -o $@ test_z%_mkl_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_Z) $(BLA_DEF) $(DNAT) $(STR_MKL) $(STR_ST) -c $< -o $@ test_c%_mkl_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_C) $(BLA_DEF) $(DNAT) $(STR_MKL) $(STR_ST) -c $< -o $@ test_d%_mkl_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_D) $(BLA_DEF) $(DNAT) $(STR_MKL) $(STR_MT) -c $< -o $@ test_s%_mkl_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_S) $(BLA_DEF) $(DNAT) $(STR_MKL) $(STR_MT) -c $< -o $@ test_z%_mkl_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_Z) $(BLA_DEF) $(DNAT) $(STR_MKL) $(STR_MT) -c $< -o $@ test_c%_mkl_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_C) $(BLA_DEF) $(DNAT) $(STR_MKL) $(STR_MT) -c $< -o $@ # armpl test_d%_armpl_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_D) $(BLA_DEF) $(DNAT) $(STR_ARMPL) $(STR_ST) -c $< -o $@ test_s%_armpl_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_S) $(BLA_DEF) $(DNAT) $(STR_ARMPL) $(STR_ST) -c $< -o $@ test_z%_armpl_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_Z) $(BLA_DEF) $(DNAT) $(STR_ARMPL) $(STR_ST) -c $< -o $@ test_c%_armpl_st.o: test_%.c $(CC) $(CFLAGS) $(PDEF_ST) $(DT_C) $(BLA_DEF) $(DNAT) $(STR_ARMPL) $(STR_ST) -c $< -o $@ test_d%_armpl_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_D) $(BLA_DEF) $(DNAT) $(STR_ARMPL) $(STR_MT) -c $< -o $@ test_s%_armpl_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_S) $(BLA_DEF) $(DNAT) $(STR_ARMPL) $(STR_MT) -c $< -o $@ test_z%_armpl_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_Z) $(BLA_DEF) $(DNAT) $(STR_ARMPL) $(STR_MT) -c $< -o $@ test_c%_armpl_mt.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_C) $(BLA_DEF) $(DNAT) $(STR_ARMPL) $(STR_MT) -c $< -o $@ # mkl # -- Executable file rules -- # NOTE: For the BLAS test drivers, we place the BLAS libraries before BLIS # on the link command line in case BLIS was configured with the BLAS # compatibility layer. This prevents BLIS from inadvertently getting called # for the BLAS routines we are trying to test with. test_%_openblas_st.x: test_%_openblas_st.o $(LIBBLIS_LINK) $(LINKER) $< $(OPENBLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@ test_%_openblas_mt.x: test_%_openblas_mt.o $(LIBBLIS_LINK) $(LINKER) $< $(OPENBLASP_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@ test_%_mkl_st.x: test_%_mkl_st.o $(LIBBLIS_LINK) $(LINKER) $< $(MKL_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@ test_%_mkl_mt.x: test_%_mkl_mt.o $(LIBBLIS_LINK) $(LINKER) $< $(MKLP_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@ test_%_armpl_st.x: test_%_armpl_st.o $(LIBBLIS_LINK) $(LINKER) $< $(ARMPL_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@ test_%_armpl_mt.x: test_%_armpl_mt.o $(LIBBLIS_LINK) $(LINKER) $< $(ARMPLP_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@ test_%_blis_st.x: test_%_blis_st.o $(LIBBLIS_LINK) $(LINKER) $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@ test_%_blis_mt.x: test_%_blis_mt.o $(LIBBLIS_LINK) $(LINKER) $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@ # -- Clean rules -- clean: cleanx cleanx: - $(RM_F) *.o *.x blis-0.6.1/test/studies/thunderx2/plot_gemm_mt_perf.m000066400000000000000000000112421360743507500227200ustar00rootroot00000000000000axes1 = subplot(4, 4, 1); hold(axes1,'on'); axes2 = subplot(4, 4, 5); hold(axes2,'on'); axes3 = subplot(4, 4, 9); hold(axes3,'on'); axes4 = subplot(4, 4, 13); hold(axes4,'on'); addpath(pathname_blis) if(plot_s) % SGEMM multi threaded axes(axes1); output_mt_sgemm_asm_blis plot(data_mt_sgemm_asm_blis(:,1), data_mt_sgemm_asm_blis(:,4), 'LineWidth', 1.25,'Color', [0 0 1]); end % DGEMM multi threaded if(plot_d) axes(axes2); output_mt_dgemm_asm_blis plot(data_mt_dgemm_asm_blis(:,1), data_mt_dgemm_asm_blis(:,4), 'LineWidth', 1.25,'Color', [0 0 1]); end % CGEMM multi threaded if(plot_c) axes(axes3); output_mt_cgemm_1m_blis plot(data_mt_cgemm_1m_blis(:,1), data_mt_cgemm_1m_blis(:,4), 'LineWidth', 1.25,'Color', [0 0 1]); end % ZGEMM multi threaded if(plot_z) axes(axes4); output_mt_zgemm_1m_blis plot(data_mt_zgemm_1m_blis(:,1), data_mt_zgemm_1m_blis(:,4), 'LineWidth', 1.25,'Color', [0 0 1]); end clear *gemm* rmpath(pathname_blis) % OpenBLAS addpath(pathname_openblas) if(plot_s) axes(axes1); output_mt_sgemm_openblas plot(data_mt_sgemm_openblas(:,1), data_mt_sgemm_openblas(:,4), 'LineWidth', 1.25,'Color', [0 1 0]); end if(plot_d) axes(axes2); output_mt_dgemm_openblas plot(data_mt_dgemm_openblas(:,1), data_mt_dgemm_openblas(:,4), 'LineWidth', 1.25,'Color', [0 1 0]); end if(plot_c) axes(axes3); output_mt_cgemm_openblas plot(data_mt_cgemm_openblas(:,1), data_mt_cgemm_openblas(:,4), 'LineWidth', 1.25,'Color', [0 1 0]); end if(plot_z) axes(axes4); output_mt_zgemm_openblas plot(data_mt_zgemm_openblas(:,1), data_mt_zgemm_openblas(:,4), 'LineWidth', 1.25,'Color', [0 1 0]); end clear *gemm* rmpath(pathname_openblas) % ARMPL addpath(pathname_armpl) if(plot_s) axes(axes1); output_mt_sgemm_armpl plot(data_mt_sgemm_armpl(:,1), data_mt_sgemm_armpl(:,4), '--', 'LineWidth', 1.25,'Color', [1 0 1]); end if(plot_d) axes(axes2); output_mt_dgemm_armpl plot(data_mt_dgemm_armpl(:,1), data_mt_dgemm_armpl(:,4), '--', 'LineWidth', 1.25,'Color', [1 0 1]); end if(plot_c) axes(axes3); output_mt_cgemm_armpl plot(data_mt_cgemm_armpl(:,1), data_mt_cgemm_armpl(:,4), '--', 'LineWidth', 1.25,'Color', [1 0 1]); end if(plot_z) axes(axes4); output_mt_zgemm_armpl plot(data_mt_zgemm_armpl(:,1), data_mt_zgemm_armpl(:,4), '--', 'LineWidth', 1.25,'Color', [1 0 1]); end clear *gemm* rmpath(pathname_armpl) axes(axes1); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('SGEMM (multi-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes1,'on'); set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); v = axis; % extract the current ranges axis( [ 0 xmax_mt 0 speak*numcores] ) axes(axes2); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('DGEMM (multi-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes2,'on'); set(axes2,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %legend({'BLIS', 'OpenBLAS', 'ARMPL'},'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue', 'Location', 'best'); v = axis; % extract the current ranges axis( [ 0 xmax_mt 0 dpeak*numcores ] ) axes(axes3); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('CGEMM (multi-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes3,'on'); set(axes3,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); v = axis; % extract the current ranges axis( [ 0 xmax_mt 0 speak*numcores ] ) axes(axes4); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('ZGEMM (multi-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes4,'on'); set(axes4,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %legend({'BLIS', 'OpenBLAS', 'MKL'},'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue', 'Location', 'South'); v = axis; % extract the current ranges axis( [ 0 xmax_mt 0 dpeak*numcores ] ) blis-0.6.1/test/studies/thunderx2/plot_gemm_st_perf.m000066400000000000000000000101251360743507500227250ustar00rootroot00000000000000addpath(pathname) output_st_sgemm_asm_blis output_st_dgemm_asm_blis output_st_cgemm_1m_blis output_st_zgemm_1m_blis output_st_sgemm_openblas output_st_dgemm_openblas output_st_cgemm_openblas output_st_zgemm_openblas % SGEMM Single threaded axes1 = subplot(4, 4, 1); hold(axes1,'on'); plot(data_st_sgemm_asm_blis(:,1), data_st_sgemm_asm_blis(:,4), 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_st_sgemm_openblas(:,1), data_st_sgemm_openblas(:,4), 'LineWidth', 1.25,'Color', [0 1 0]); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('SGEMM (single-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes1,'on'); set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); v = axis; % extract the current ranges axis( [ 0 v(2) 0 speak ] ) % DGEMM Single threaded axes1 = subplot(4, 4, 5); hold(axes1,'on'); plot(data_st_dgemm_asm_blis(:,1), data_st_dgemm_asm_blis(:,4), 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_st_dgemm_openblas(:,1), data_st_dgemm_openblas(:,4), 'LineWidth', 1.25,'Color', [0 1 0]); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('DGEMM (single-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes1,'on'); set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %legend({'BLIS', 'OpenBLAS', 'ARMPL'},'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue', 'Location', 'best'); v = axis; % extract the current ranges axis( [ 0 v(2) 0 dpeak ] ) % CGEMM Single threaded axes1 = subplot(4, 4, 9); hold(axes1,'on'); plot(data_st_cgemm_1m_blis(:,1), data_st_cgemm_1m_blis(:,4), 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_st_cgemm_openblas(:,1), data_st_cgemm_openblas(:,4), 'LineWidth', 1.25,'Color', [0 1 0]); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('CGEMM (single-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes1,'on'); set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); v = axis; % extract the current ranges axis( [ 0 v(2) 0 speak ] ) % ZGEMM Single threaded axes1 = subplot(4, 4, 13); hold(axes1,'on'); plot(data_st_zgemm_1m_blis(:,1), data_st_zgemm_1m_blis(:,4), 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_st_zgemm_openblas(:,1), data_st_zgemm_openblas(:,4), 'LineWidth', 1.25,'Color', [0 1 0]); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('ZGEMM (single-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes1,'on'); set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %legend({'BLIS', 'OpenBLAS', 'MKL'},'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue', 'Location', 'South'); v = axis; % extract the current ranges axis( [ 0 v(2) 0 dpeak ] ) clear *gemm* rmpath(pathname) addpath(pathname_armpl) output_st_sgemm_armpl output_st_dgemm_armpl output_st_cgemm_armpl output_st_zgemm_armpl % SGEMM Single threaded subplot(4, 4, 1); plot(data_st_sgemm_armpl(:,1), data_st_sgemm_armpl(:,4), '--', 'LineWidth', 1.25,'Color', [1 0 1]); subplot(4, 4, 5); plot(data_st_dgemm_armpl(:,1), data_st_dgemm_armpl(:,4), '--', 'LineWidth', 1.25,'Color', [1 0 1]); subplot(4, 4, 9); plot(data_st_cgemm_armpl(:,1), data_st_cgemm_armpl(:,4), '--', 'LineWidth', 1.25,'Color', [1 0 1]); subplot(4, 4, 13); plot(data_st_zgemm_armpl(:,1), data_st_zgemm_armpl(:,4), '--', 'LineWidth', 1.25,'Color', [1 0 1]); clear *gemm* rmpath(pathname_armpl) blis-0.6.1/test/studies/thunderx2/plot_hemm_mt_perf.m000066400000000000000000000112161360743507500227220ustar00rootroot00000000000000axes3 = subplot(4, 4, 3); hold(axes3,'on'); axes7 = subplot(4, 4, 7); hold(axes7,'on'); axes11 = subplot(4, 4, 11); hold(axes11,'on'); axes15 = subplot(4, 4, 15); hold(axes15,'on'); addpath(pathname_blis) if(plot_s) axes(axes3); output_mt_shemm_asm_blis plot(data_mt_shemm_asm_blis(:,1), data_mt_shemm_asm_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]); end if(plot_d) axes(axes7); output_mt_dhemm_asm_blis plot(data_mt_dhemm_asm_blis(:,1), data_mt_dhemm_asm_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]); end if(plot_c) axes(axes11); output_mt_chemm_1m_blis plot(data_mt_chemm_1m_blis(:,1), data_mt_chemm_1m_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]); end if(plot_z) axes(axes15); output_mt_zhemm_1m_blis plot(data_mt_zhemm_1m_blis(:,1), data_mt_zhemm_1m_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]); end clear *hemm* rmpath(pathname_blis) addpath(pathname_openblas) if(plot_s) axes(axes3); output_mt_shemm_openblas plot(data_mt_shemm_openblas(:,1), data_mt_shemm_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]); end if(plot_d) axes(axes7); output_mt_dhemm_openblas plot(data_mt_dhemm_openblas(:,1), data_mt_dhemm_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]); end if(plot_c) axes(axes11); output_mt_chemm_openblas plot(data_mt_chemm_openblas(:,1), data_mt_chemm_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]); end if(plot_z) axes(axes15); output_mt_zhemm_openblas plot(data_mt_zhemm_openblas(:,1), data_mt_zhemm_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]); end clear *hemm* rmpath(pathname_openblas) addpath(pathname_armpl); if(plot_s) axes(axes3); output_mt_shemm_armpl plot(data_mt_shemm_armpl(:,1), data_mt_shemm_armpl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 1]); end if(plot_d) axes(axes7); output_mt_dhemm_armpl plot(data_mt_dhemm_armpl(:,1), data_mt_dhemm_armpl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 1]); end if(plot_c) axes(axes11); output_mt_chemm_armpl plot(data_mt_chemm_armpl(:,1), data_mt_chemm_armpl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 1]); end if(plot_z) axes(axes15); output_mt_zhemm_armpl plot(data_mt_zhemm_armpl(:,1), data_mt_zhemm_armpl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 1]); end clear *hemm* rmpath(pathname_armpl) % SSYMM multi threaded axes(axes3); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('SSYMM (multi-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes3,'on'); set(axes3,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); v = axis; % extract the current ranges axis( [ 0 xmax_mt 0 speak*numcores ] ) axes(axes7); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('DSYMM (multi-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes7,'on'); set(axes7,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %legend({'BLIS', 'OpenBLAS', 'ARMPL'},'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue', 'Location', 'best'); v = axis; % extract the current ranges axis( [ 0 xmax_mt 0 dpeak*numcores ] ) % CHEMM multi threaded axes(axes11); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('CHEMM (multi-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes11,'on'); set(axes11,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); v = axis; % extract the current ranges axis( [ 0 xmax_mt 0 speak*numcores ] ) % ZHEMM multi threaded axes(axes15); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('ZHEMM (multi-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes15,'on'); set(axes15,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); % legend({'BLIS', 'BLIS (AVX2)', 'OpenBLAS', 'MKL'},'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue', 'Location', 'South'); v = axis; % extract the current ranges axis( [ 0 xmax_mt 0 dpeak*numcores ] ) blis-0.6.1/test/studies/thunderx2/plot_hemm_st_perf.m000066400000000000000000000077201360743507500227350ustar00rootroot00000000000000addpath(pathname) output_st_shemm_asm_blis output_st_dhemm_asm_blis output_st_chemm_1m_blis output_st_zhemm_1m_blis output_st_shemm_openblas output_st_dhemm_openblas output_st_chemm_openblas output_st_zhemm_openblas % SSYMM Single threaded axes1 = subplot(4, 4, 3); hold(axes1,'on'); plot(data_st_shemm_asm_blis(:,1), data_st_shemm_asm_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_st_shemm_openblas(:,1), data_st_shemm_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('SSYMM (single-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes1,'on'); set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); v = axis; % extract the current ranges axis( [ 0 v(2) 0 speak ] ) % DSYMM Single threaded axes1 = subplot(4, 4, 7); hold(axes1,'on'); plot(data_st_dhemm_asm_blis(:,1), data_st_dhemm_asm_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_st_dhemm_openblas(:,1), data_st_dhemm_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('DSYMM (single-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes1,'on'); set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %legend({'BLIS', 'OpenBLAS', 'ARMPL'},'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue', 'Location', 'best'); v = axis; % extract the current ranges axis( [ 0 v(2) 0 dpeak ] ) % CHEMM Single threaded axes1 = subplot(4, 4, 11); hold(axes1,'on'); plot(data_st_chemm_1m_blis(:,1), data_st_chemm_1m_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_st_chemm_openblas(:,1), data_st_chemm_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('CHEMM (single-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes1,'on'); set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); v = axis; % extract the current ranges axis( [ 0 v(2) 0 speak ] ) % ZHEMM Single threaded axes1 = subplot(4, 4, 15); hold(axes1,'on'); plot(data_st_zhemm_1m_blis(:,1), data_st_zhemm_1m_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_st_zhemm_openblas(:,1), data_st_zhemm_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('ZHEMM (single-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes1,'on'); set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); v = axis; % extract the current ranges axis( [ 0 v(2) 0 dpeak ] ) clear *hemm* rmpath(pathname) addpath(pathname_armpl) output_st_shemm_armpl output_st_dhemm_armpl output_st_chemm_armpl output_st_zhemm_armpl % Shemm Single threaded subplot(4, 4, 3); plot(data_st_shemm_armpl(:,1), data_st_shemm_armpl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 1]); subplot(4, 4, 7); plot(data_st_dhemm_armpl(:,1), data_st_dhemm_armpl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 1]); subplot(4, 4, 11); plot(data_st_chemm_armpl(:,1), data_st_chemm_armpl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 1]); subplot(4, 4, 15); plot(data_st_zhemm_armpl(:,1), data_st_zhemm_armpl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 1]); clear *hemm* rmpath(pathname_armpl) blis-0.6.1/test/studies/thunderx2/plot_syrk_mt_perf.m000066400000000000000000000106101360743507500227610ustar00rootroot00000000000000 axes2 = subplot(4, 4, 2); hold(axes2,'on'); axes6 = subplot(4, 4, 6); hold(axes6,'on'); axes10 = subplot(4, 4, 10); hold(axes10,'on'); axes14 = subplot(4, 4, 14); hold(axes14,'on'); addpath(pathname_blis) if(plot_s) axes(axes2); output_mt_ssyrk_asm_blis plot(data_mt_ssyrk_asm_blis(:,1), data_mt_ssyrk_asm_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]); end if(plot_d) axes(axes6); output_mt_dsyrk_asm_blis plot(data_mt_dsyrk_asm_blis(:,1), data_mt_dsyrk_asm_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]); end if(plot_c) axes(axes10); output_mt_csyrk_1m_blis plot(data_mt_csyrk_1m_blis(:,1), data_mt_csyrk_1m_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]); end if(plot_z) axes(axes14); output_mt_zsyrk_1m_blis plot(data_mt_zsyrk_1m_blis(:,1), data_mt_zsyrk_1m_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]); end clear *syrk* rmpath(pathname_blis) % OpenBLAS addpath(pathname_openblas) if(plot_s) axes(axes2); output_mt_ssyrk_openblas plot(data_mt_ssyrk_openblas(:,1), data_mt_ssyrk_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]); end if(plot_d) axes(axes6); output_mt_dsyrk_openblas plot(data_mt_dsyrk_openblas(:,1), data_mt_dsyrk_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]); end if(plot_c) axes(axes10); output_mt_csyrk_openblas plot(data_mt_csyrk_openblas(:,1), data_mt_csyrk_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]); end if(plot_z) axes(axes14); output_mt_zsyrk_openblas plot(data_mt_zsyrk_openblas(:,1), data_mt_zsyrk_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]); end clear *syrk* rmpath(pathname_openblas) % ARMPL addpath(pathname_armpl) if(plot_s) axes(axes2); output_mt_ssyrk_armpl plot(data_mt_ssyrk_armpl(:,1), data_mt_ssyrk_armpl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 1]); end if(plot_d) axes(axes6); output_mt_dsyrk_armpl plot(data_mt_dsyrk_armpl(:,1), data_mt_dsyrk_armpl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 1]); end if(plot_c) axes(axes10); output_mt_csyrk_armpl plot(data_mt_csyrk_armpl(:,1), data_mt_csyrk_armpl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 1]); end if(plot_z) axes(axes14); output_mt_zsyrk_armpl plot(data_mt_zsyrk_armpl(:,1), data_mt_zsyrk_armpl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 1]); end clear *syrk* rmpath(pathname_armpl) axes(axes2); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('SSYRK (multi-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes2,'on'); set(axes2,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); v = axis; % extract the current ranges axis( [ 0 xmax_mt 0 speak*numcores ] ) % DSYRK multi threaded axes(axes6); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('DSYRK (multi-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes6,'on'); set(axes6,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); v = axis; % extract the current ranges axis( [ 0 xmax_mt 0 dpeak*numcores ] ) % CSYRK multi threaded axes(axes10); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('CSYRK (multi-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes10,'on'); set(axes10,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); v = axis; % extract the current ranges axis( [ 0 xmax_mt 0 speak*numcores ] ) % ZSYRK multi threaded axes(axes14); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('ZSYRK (multi-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes14,'on'); set(axes14,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); % legend({'BLIS', 'BLIS (AVX2)','OpenBLAS', 'MKL'},'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue', 'Location', 'South'); v = axis; % extract the current ranges axis( [ 0 xmax_mt 0 dpeak*numcores ] ) blis-0.6.1/test/studies/thunderx2/plot_syrk_st_perf.m000066400000000000000000000101661360743507500227750ustar00rootroot00000000000000addpath(pathname) output_st_ssyrk_asm_blis output_st_dsyrk_asm_blis output_st_csyrk_1m_blis output_st_zsyrk_1m_blis output_st_ssyrk_openblas output_st_dsyrk_openblas output_st_csyrk_openblas output_st_zsyrk_openblas plot_lower = 0; % SSYRK Single threaded axes1 = subplot(4, 4, 2); hold(axes1,'on'); plot(data_st_ssyrk_asm_blis(:,1), data_st_ssyrk_asm_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_st_ssyrk_openblas(:,1), data_st_ssyrk_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('SSYRK (single-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes1,'on'); set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); v = axis; % extract the current ranges axis( [ 0 v(2) 0 speak ] ) % DSYRK single threaded axes1 = subplot(4, 4, 6); hold(axes1,'on'); plot(data_st_dsyrk_asm_blis(:,1), data_st_dsyrk_asm_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_st_dsyrk_openblas(:,1), data_st_dsyrk_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('DSYRK (single-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes1,'on'); set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %legend({'BLIS', 'OpenBLAS', 'ARMPL'},'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue', 'Location', 'best'); v = axis; % extract the current ranges axis( [ 0 v(2) 0 dpeak ] ) % CSYRK single threaded axes1 = subplot(4, 4, 10); hold(axes1,'on'); plot(data_st_csyrk_1m_blis(:,1), data_st_csyrk_1m_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_st_csyrk_openblas(:,1), data_st_csyrk_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('CSYRK (single-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes1,'on'); set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); v = axis; % extract the current ranges axis( [ 0 v(2) 0 speak ] ) % ZSYRK single threaded axes1 = subplot(4, 4, 14); hold(axes1,'on'); plot(data_st_zsyrk_1m_blis(:,1), data_st_zsyrk_1m_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_st_zsyrk_openblas(:,1), data_st_zsyrk_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('ZSYRK (single-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes1,'on'); set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); % legend({'BLIS', 'BLIS (AVX2)','OpenBLAS', 'MKL'},'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue', 'Location', 'South'); v = axis; % extract the current ranges axis( [ 0 v(2) 0 dpeak ] ) clear *syrk* rmpath(pathname) addpath(pathname_armpl) output_st_ssyrk_armpl output_st_dsyrk_armpl output_st_csyrk_armpl output_st_zsyrk_armpl % Ssyrk Single threaded subplot(4, 4, 2); plot(data_st_ssyrk_armpl(:,1), data_st_ssyrk_armpl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 1]); subplot(4, 4, 6); plot(data_st_dsyrk_armpl(:,1), data_st_dsyrk_armpl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 1]); subplot(4, 4, 10); plot(data_st_csyrk_armpl(:,1), data_st_csyrk_armpl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 1]); subplot(4, 4, 14); plot(data_st_zsyrk_armpl(:,1), data_st_zsyrk_armpl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 1]); clear *syrk* rmpath(pathname_armpl) blis-0.6.1/test/studies/thunderx2/plot_thunderx2_perf.m000066400000000000000000000040611360743507500232170ustar00rootroot00000000000000plot_st = 1; plot_1s = 1; plot_2s = 1; plot_s = 1; plot_d = 1; plot_c = 1; plot_z = 1; plot_armpl = 1; fontsize = 6; freq = 2; sflopspercycle = 16; dflopspercycle = 8; speak = sflopspercycle*freq; dpeak = dflopspercycle*freq; xmax_mt = 5000; if(plot_st) numcores = 1; fig1 = figure(1); clf(fig1) % pathname = './20180824/'; pathname_armpl = './20180829/'; plot_gemm_st_perf plot_syrk_st_perf plot_hemm_st_perf plot_trmm_st_perf %fig1.PaperPositionMode = 'auto'; orient(fig1,'landscape') set(fig1,'PaperUnits','normalized'); set(fig1,'PaperPosition', [0 0 1 1]); print(fig1, 'thunderx2-st-20180829', '-dpdf') clear pathname pathname_armpl end if (plot_1s) fig1 = figure(2); clf; numcores = 28; pathname_blis = './20180830/1socket'; pathname_armpl = './20180830/1socket'; pathname_openblas = './20180830/1socket'; %JC = 2, IC = 14 plot_gemm_mt_perf plot_syrk_mt_perf plot_hemm_mt_perf plot_trmm_mt_perf %fig1.PaperPositionMode = 'auto'; orient(fig1,'landscape') set(fig1,'PaperUnits','normalized'); set(fig1,'PaperPosition', [0 0 1 1]); print(fig1, 'thunderx2-mt-28cores-20180830', '-dpdf') end if(plot_2s) numcores = 56; %JC = 4, IC = 14 fig1 = figure(3); clf; plot_gemm = 1; plot_syrk = 1; plot_hemm = 1; plot_trmm = 1; plot_s = 1; plot_d = 1; plot_c = 1; plot_z = 1; pathname_blis = './20180830/2sockets'; pathname_openblas = './20180830/2sockets'; pathname_armpl = './20180830/2sockets'; if(plot_gemm) plot_gemm_mt_perf end if(plot_syrk) plot_syrk_mt_perf end if(plot_hemm) plot_hemm_mt_perf end if(plot_trmm) plot_trmm_mt_perf end %fig1.PaperPositionMode = 'auto'; orient(fig1,'landscape') set(fig1,'PaperUnits','normalized'); set(fig1,'PaperPosition', [0 0 1 1]); print(fig1, 'thunderx2-mt-56cores-20180830', '-dpdf') endblis-0.6.1/test/studies/thunderx2/plot_trmm_mt_perf.m000066400000000000000000000114711360743507500227560ustar00rootroot00000000000000axes4 = subplot(4, 4, 4); hold(axes4,'on'); axes8 = subplot(4, 4, 8); hold(axes8,'on'); axes12 = subplot(4, 4, 12); hold(axes12,'on'); axes16 = subplot(4, 4, 16); hold(axes16,'on'); addpath(pathname_blis) if(plot_s) axes(axes4); output_mt_strmm_asm_blis plot(data_mt_strmm_asm_blis(:,1), data_mt_strmm_asm_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]); end if(plot_d) axes(axes8); output_mt_dtrmm_asm_blis plot(data_mt_dtrmm_asm_blis(:,1), data_mt_dtrmm_asm_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]); end if(plot_c) axes(axes12); output_mt_ctrmm_1m_blis plot(data_mt_ctrmm_1m_blis(:,1), data_mt_ctrmm_1m_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]); end if(plot_z) axes(axes16); output_mt_ztrmm_1m_blis plot(data_mt_ztrmm_1m_blis(:,1), data_mt_ztrmm_1m_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]); end clear *trmm* rmpath(pathname_blis) addpath(pathname_openblas) if(plot_s) axes(axes4); output_mt_strmm_openblas plot(data_mt_strmm_openblas(:,1), data_mt_strmm_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]); end if(plot_d) axes(axes8); output_mt_dtrmm_openblas plot(data_mt_dtrmm_openblas(:,1), data_mt_dtrmm_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]); end if(plot_c) axes(axes12); output_mt_ctrmm_openblas plot(data_mt_ctrmm_openblas(:,1), data_mt_ctrmm_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]); end if(plot_z) axes(axes16); output_mt_ztrmm_openblas plot(data_mt_ztrmm_openblas(:,1), data_mt_ztrmm_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]); end clear *trmm* rmpath(pathname_openblas) if(plot_armpl) addpath(pathname_armpl) if(plot_s) axes(axes4); output_mt_strmm_armpl plot(data_mt_strmm_armpl(:,1), data_mt_strmm_armpl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 1]); end if(plot_d) axes(axes8); output_mt_dtrmm_armpl plot(data_mt_dtrmm_armpl(:,1), data_mt_dtrmm_armpl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 1]); end if(plot_c) axes(axes12); output_mt_ctrmm_armpl plot(data_mt_ctrmm_armpl(:,1), data_mt_ctrmm_armpl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 1]); end if(plot_z) axes(axes16); output_mt_ztrmm_armpl plot(data_mt_ztrmm_armpl(:,1), data_mt_ztrmm_armpl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 1]); end clear *trmm* rmpath(pathname_armpl) end axes(axes4); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('STRMM (multi-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes4,'on'); set(axes4,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); v = axis; % extract the current ranges axis( [ 0 xmax_mt 0 speak*numcores ] ) axes(axes8); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('DTRMM (multi-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes8,'on'); set(axes8,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %legend({'BLIS', 'OpenBLAS', 'ARMPL'},'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue', 'Location', 'best'); v = axis; % extract the current ranges axis( [ 0 xmax_mt 0 dpeak*numcores ] ) axes(axes12); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('CTRMM (multi-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes12,'on'); set(axes12,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); v = axis; % extract the current ranges axis( [ 0 xmax_mt 0 speak*numcores ] ) axes(axes16); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('ZTRMM (multi-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes16,'on'); set(axes16,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); legend({'BLIS','OpenBLAS'},'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue', 'Location', 'South'); v = axis; % extract the current ranges axis( [ 0 xmax_mt 0 dpeak*numcores ] ) legend({'BLIS','OpenBLAS', 'ARMPL'},'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue', 'Location', 'South'); blis-0.6.1/test/studies/thunderx2/plot_trmm_st_perf.m000066400000000000000000000101261360743507500227600ustar00rootroot00000000000000addpath(pathname) output_st_strmm_asm_blis output_st_dtrmm_asm_blis output_st_ctrmm_1m_blis output_st_ztrmm_1m_blis output_st_strmm_openblas output_st_dtrmm_openblas output_st_ctrmm_openblas output_st_ztrmm_openblas % STRMM Single threaded axes1 = subplot(4, 4, 4); hold(axes1,'on'); plot(data_st_strmm_asm_blis(:,1), data_st_strmm_asm_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_st_strmm_openblas(:,1), data_st_strmm_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('STRMM (single-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes1,'on'); set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); v = axis; % extract the current ranges axis( [ 0 v(2) 0 speak ] ) % DTRMM Single threaded axes1 = subplot(4, 4, 8); hold(axes1,'on'); plot(data_st_dtrmm_asm_blis(:,1), data_st_dtrmm_asm_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_st_dtrmm_openblas(:,1), data_st_dtrmm_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('DTRMM (single-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes1,'on'); set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %legend({'BLIS', 'OpenBLAS', 'ARMPL'},'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue', 'Location', 'best'); v = axis; % extract the current ranges axis( [ 0 v(2) 0 dpeak ] ) % CTRMM Single threaded axes1 = subplot(4, 4, 12); hold(axes1,'on'); plot(data_st_ctrmm_1m_blis(:,1), data_st_ctrmm_1m_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_st_ctrmm_openblas(:,1), data_st_ctrmm_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); %xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('CTRMM (single-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes1,'on'); set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); v = axis; % extract the current ranges axis( [ 0 v(2) 0 speak ] ) % ZTRMM Single threaded axes1 = subplot(4, 4, 16); hold(axes1,'on'); plot(data_st_ztrmm_1m_blis(:,1), data_st_ztrmm_1m_blis(:,3), 'LineWidth', 1.25,'Color', [0 0 1]); plot(data_st_ztrmm_openblas(:,1), data_st_ztrmm_openblas(:,3), 'LineWidth', 1.25,'Color', [0 1 0]); ylabel( 'GFLOPS', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); xlabel( 'matrix dimension m=n=k', 'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue' ); title('ZTRMM (single-threaded)','FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); box(axes1,'on'); set(axes1,'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue'); v = axis; % extract the current ranges axis( [ 0 v(2) 0 dpeak ] ) clear *trmm* rmpath(pathname) addpath(pathname_armpl) output_st_strmm_armpl output_st_dtrmm_armpl output_st_ctrmm_armpl output_st_ztrmm_armpl % Strmm Single threaded subplot(4, 4, 4); plot(data_st_strmm_armpl(:,1), data_st_strmm_armpl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 1]); subplot(4, 4, 8); plot(data_st_dtrmm_armpl(:,1), data_st_dtrmm_armpl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 1]); subplot(4, 4, 12); plot(data_st_ctrmm_armpl(:,1), data_st_ctrmm_armpl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 1]); subplot(4, 4, 16); plot(data_st_ztrmm_armpl(:,1), data_st_ztrmm_armpl(:,3), '--', 'LineWidth', 1.25,'Color', [1 0 1]); legend({'BLIS','OpenBLAS', 'ARMPL'},'FontSize', fontsize, 'FontWeight', 'bold', 'FontName', 'Helvetica Neue', 'Location', 'South'); clear *trmm* rmpath(pathname_armpl) blis-0.6.1/test/studies/thunderx2/runme.sh000077500000000000000000000131351360743507500205330ustar00rootroot00000000000000#!/bin/bash # File pefixes. exec_root="test" out_root="output" out_rootdir=$(date +%Y%m%d) #out_rootdir=20180830 mkdir -p $out_rootdir sys="thunderx2" # Bind threads to processors. #export OMP_PROC_BIND=true #export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55" unset GOMP_CPU_AFFINITY # Threading scheme to use when multithreading if [ ${sys} = "blis" ]; then jc_nt=1 # 5th loop ic_nt=4 # 3rd loop jr_nt=1 # 2nd loop ir_nt=1 # 1st loop nt=4 elif [ ${sys} = "thunderx2" ]; then jc_1_nt=2 # 5th loop ic_1_nt=14 # 3rd loop jr_1_nt=1 # 2nd loop ir_1_nt=1 # 1st loop nt_1=28 jc_2_nt=4 # 5th loop ic_2_nt=14 # 3rd loop jr_2_nt=1 # 2nd loop ir_2_nt=1 # 1st loop nt_2=56 fi # Threadedness to test. #threads="mt1 mt2" #threads_r="mt" #threads="st" #threads_r="st" # Datatypes to test. dts="c z" dts_r="s d" # Operations to test. #l3_ops="gemm syrk hemm trmm" l3_ops="gemm" test_ops="${l3_ops}" test_ops_r="${l3_ops}" # Complex domain implementations to test. if [ ${sys} = "blis" ]; then #test_impls="openblas mkl 3mhw_blis 3m3_blis 3m2_blis 3m1_blis 4mhw_blis 4m1b_blis 4m1a_blis" test_impls="openblas 3mhw_blis 3m3_blis 3m2_blis 3m1_blis 4mhw_blis 4m1b_blis 4m1a_blis 1m_blis" elif [ ${sys} = "thunderx2" ]; then #test_impls="openblas" #test_impls="armpl" #test_impls="1m_blis armpl" test_impls="openblas armpl 1m_blis" fi # Real domain implementations to test. test_impls_r="openblas armpl asm_blis" #test_impls_r="openblas" #test_impls_r="asm_blis" #test_impls_r="armpl" cores_r="1 28 56" cores="1 28 56" # First perform real test cases. for nc in ${cores_r}; do for dt in ${dts_r}; do for im in ${test_impls_r}; do for op in ${test_ops_r}; do # Set the number of threads according to th. if [ ${nc} -gt 1 ]; then # Unset GOMP_CPU_AFFINITY for MKL when using mkl_intel_thread. if [ ${im} = "openblas" ]; then unset GOMP_CPU_AFFINITY elif [ ${im} = "armpl" ]; then unset GOMP_CPU_AFFINITY else export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55" fi if [ ${nc} -eq 28 ]; then export BLIS_JC_NT=${jc_1_nt} export BLIS_IC_NT=${ic_1_nt} export BLIS_JR_NT=${jr_1_nt} export BLIS_IR_NT=${ir_1_nt} export OMP_NUM_THREADS=${nt_1} out_dir="${out_rootdir}/1socket" mkdir -p $out_rootdir/1socket elif [ ${nc} -eq 56 ]; then export BLIS_JC_NT=${jc_2_nt} export BLIS_IC_NT=${ic_2_nt} export BLIS_JR_NT=${jr_2_nt} export BLIS_IR_NT=${ir_2_nt} export OMP_NUM_THREADS=${nt_2} out_dir="${out_rootdir}/2sockets" mkdir -p $out_rootdir/2sockets fi th="mt" else export BLIS_JC_NT=1 export BLIS_IC_NT=1 export BLIS_JR_NT=1 export BLIS_IR_NT=1 export OMP_NUM_THREADS=1 out_dir="${out_rootdir}/st" mkdir -p $out_rootdir/st th="st" fi # Construct the name of the test executable. exec_name="${exec_root}_${dt}${op}_${im}_${th}.x" # Construct the name of the output file. out_file="${out_dir}/${out_root}_${th}_${dt}${op}_${im}.m" echo "Running (nt = ${OMP_NUM_THREADS}) ./${exec_name} > ${out_file}" # Run executable. ./${exec_name} > ${out_file} sleep 1 done done done done # Now perform complex test cases. for nc in ${cores}; do for dt in ${dts}; do for im in ${test_impls}; do for op in ${test_ops}; do # Set the number of threads according to th. if [ ${nc} -gt 1 ]; then # Unset GOMP_CPU_AFFINITY for MKL when using mkl_intel_thread. if [ ${im} = "openblas" ]; then unset GOMP_CPU_AFFINITY elif [ ${im} = "armpl" ]; then unset GOMP_CPU_AFFINITY else export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55" fi if [ ${nc} -eq 28 ]; then export BLIS_JC_NT=${jc_1_nt} export BLIS_IC_NT=${ic_1_nt} export BLIS_JR_NT=${jr_1_nt} export BLIS_IR_NT=${ir_1_nt} export OMP_NUM_THREADS=${nt_1} out_dir="${out_rootdir}/1socket" elif [ ${nc} -eq 56 ]; then export BLIS_JC_NT=${jc_2_nt} export BLIS_IC_NT=${ic_2_nt} export BLIS_JR_NT=${jr_2_nt} export BLIS_IR_NT=${ir_2_nt} export OMP_NUM_THREADS=${nt_2} out_dir="${out_rootdir}/2sockets" fi th="mt" else export BLIS_JC_NT=1 export BLIS_IC_NT=1 export BLIS_JR_NT=1 export BLIS_IR_NT=1 export OMP_NUM_THREADS=1 out_dir="${out_rootdir}/st" th="st" fi # Construct the name of the test executable. exec_name="${exec_root}_${dt}${op}_${im}_${th}.x" # Construct the name of the output file. out_file="${out_dir}/${out_root}_${th}_${dt}${op}_${im}.m" echo "Running (nt = ${OMP_NUM_THREADS}) ./${exec_name} > ${out_file}" # Run executable. ./${exec_name} > ${out_file} sleep 1 done done done done blis-0.6.1/test/studies/thunderx2/test_gemm.c000066400000000000000000000217751360743507500212070ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name of The University of Texas nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "blis.h" void zgemm3m_( f77_char*, f77_char*, f77_int*, f77_int*, f77_int*, dcomplex*, dcomplex*, f77_int*, dcomplex*, f77_int*, dcomplex*, dcomplex*, f77_int* ); //#define PRINT int main( int argc, char** argv ) { obj_t a, b, c; obj_t c_save; obj_t alpha, beta; dim_t m, n, k; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, n_input, k_input; ind_t ind; num_t dt; char dt_ch; int r, n_repeats; trans_t transa; trans_t transb; f77_char f77_transa; f77_char f77_transb; double dtime; double dtime_save; double gflops; bli_init(); //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); n_repeats = 3; dt = DT; ind = IND; p_begin = P_BEGIN; p_end = P_END; p_inc = P_INC; m_input = -1; n_input = -1; k_input = -1; // Supress compiler warnings about unused variable 'ind'. ( void )ind; #if 0 cntx_t* cntx; ind_t ind_mod = ind; // A hack to use 3m1 as 1mpb (with 1m as 1mbp). if ( ind == BLIS_3M1 ) ind_mod = BLIS_1M; // Initialize a context for the current induced method and datatype. cntx = bli_gks_query_ind_cntx( ind_mod, dt ); // Set k to the kc blocksize for the current datatype. k_input = bli_cntx_get_blksz_def_dt( dt, BLIS_KC, cntx ); #elif 1 //k_input = 256; #endif // Choose the char corresponding to the requested datatype. if ( bli_is_float( dt ) ) dt_ch = 's'; else if ( bli_is_double( dt ) ) dt_ch = 'd'; else if ( bli_is_scomplex( dt ) ) dt_ch = 'c'; else dt_ch = 'z'; transa = BLIS_NO_TRANSPOSE; transb = BLIS_NO_TRANSPOSE; bli_param_map_blis_to_netlib_trans( transa, &f77_transa ); bli_param_map_blis_to_netlib_trans( transb, &f77_transb ); // Begin with initializing the last entry to zero so that // matlab allocates space for the entire array once up-front. for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ; #ifdef BLIS printf( "data_%s_%cgemm_%s_blis", THR_STR, dt_ch, STR ); #else printf( "data_%s_%cgemm_%s", THR_STR, dt_ch, STR ); #endif printf( "( %2lu, 1:4 ) = [ %4lu %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )0, ( unsigned long )0, ( unsigned long )0, 0.0 ); for ( p = p_begin; p <= p_end; p += p_inc ) { if ( m_input < 0 ) m = p / ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p / ( dim_t )abs(n_input); else n = ( dim_t ) n_input; if ( k_input < 0 ) k = p / ( dim_t )abs(k_input); else k = ( dim_t ) k_input; bli_obj_create( dt, 1, 1, 0, 0, &alpha ); bli_obj_create( dt, 1, 1, 0, 0, &beta ); bli_obj_create( dt, m, k, 0, 0, &a ); bli_obj_create( dt, k, n, 0, 0, &b ); bli_obj_create( dt, m, n, 0, 0, &c ); //bli_obj_create( dt, m, k, 2, 2*m, &a ); //bli_obj_create( dt, k, n, 2, 2*k, &b ); //bli_obj_create( dt, m, n, 2, 2*m, &c ); bli_obj_create( dt, m, n, 0, 0, &c_save ); bli_randm( &a ); bli_randm( &b ); bli_randm( &c ); bli_obj_set_conjtrans( transa, &a ); bli_obj_set_conjtrans( transb, &b ); bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_setsc( (1.0/1.0), 0.0, &beta ); bli_copym( &c, &c_save ); #ifdef BLIS bli_ind_disable_all_dt( dt ); bli_ind_enable_dt( ind, dt ); #endif dtime_save = DBL_MAX; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "b", &b, "%4.1f", "" ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS bli_gemm( &alpha, &a, &b, &beta, &c ); #else if ( bli_is_float( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); float* alphap = bli_obj_buffer( &alpha ); float* ap = bli_obj_buffer( &a ); float* bp = bli_obj_buffer( &b ); float* betap = bli_obj_buffer( &beta ); float* cp = bli_obj_buffer( &c ); sgemm_( &f77_transa, &f77_transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_double( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); double* alphap = bli_obj_buffer( &alpha ); double* ap = bli_obj_buffer( &a ); double* bp = bli_obj_buffer( &b ); double* betap = bli_obj_buffer( &beta ); double* cp = bli_obj_buffer( &c ); dgemm_( &f77_transa, &f77_transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_scomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); scomplex* alphap = bli_obj_buffer( &alpha ); scomplex* ap = bli_obj_buffer( &a ); scomplex* bp = bli_obj_buffer( &b ); scomplex* betap = bli_obj_buffer( &beta ); scomplex* cp = bli_obj_buffer( &c ); cgemm_( &f77_transa, &f77_transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_dcomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); dcomplex* alphap = bli_obj_buffer( &alpha ); dcomplex* ap = bli_obj_buffer( &a ); dcomplex* bp = bli_obj_buffer( &b ); dcomplex* betap = bli_obj_buffer( &beta ); dcomplex* cp = bli_obj_buffer( &c ); zgemm_( &f77_transa, //zgemm3m_( &f77_transa, &f77_transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } #endif #ifdef PRINT bli_printm( "c after", &c, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } gflops = ( 2.0 * m * k * n ) / ( dtime_save * 1.0e9 ); if ( bli_is_complex( dt ) ) gflops *= 4.0; #ifdef BLIS printf( "data_%s_%cgemm_%s_blis", THR_STR, dt_ch, STR ); #else printf( "data_%s_%cgemm_%s", THR_STR, dt_ch, STR ); #endif printf( "( %2lu, 1:4 ) = [ %4lu %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, ( unsigned long )k, ( unsigned long )n, gflops ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); } bli_finalize(); return 0; } blis-0.6.1/test/studies/thunderx2/test_hemm.c000066400000000000000000000210701360743507500211740ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name of The University of Texas nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "blis.h" void zgemm3m_( f77_char*, f77_char*, f77_int*, f77_int*, f77_int*, dcomplex*, dcomplex*, f77_int*, dcomplex*, f77_int*, dcomplex*, dcomplex*, f77_int* ); //#define PRINT int main( int argc, char** argv ) { obj_t a, b, c; obj_t c_save; obj_t alpha, beta; dim_t m, n; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, n_input; ind_t ind; num_t dt; char dt_ch; int r, n_repeats; side_t side; uplo_t uploa; f77_char f77_side; f77_char f77_uploa; double dtime; double dtime_save; double gflops; //bli_init(); //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); n_repeats = 3; dt = DT; ind = IND; p_begin = P_BEGIN; p_end = P_END; p_inc = P_INC; m_input = -1; n_input = -1; // Supress compiler warnings about unused variable 'ind'. ( void )ind; #if 0 cntx_t* cntx; ind_t ind_mod = ind; // A hack to use 3m1 as 1mpb (with 1m as 1mbp). if ( ind == BLIS_3M1 ) ind_mod = BLIS_1M; // Initialize a context for the current induced method and datatype. cntx = bli_gks_query_ind_cntx( ind_mod, dt ); // Set k to the kc blocksize for the current datatype. k_input = bli_cntx_get_blksz_def_dt( dt, BLIS_KC, cntx ); #elif 1 //k_input = 256; #endif // Choose the char corresponding to the requested datatype. if ( bli_is_float( dt ) ) dt_ch = 's'; else if ( bli_is_double( dt ) ) dt_ch = 'd'; else if ( bli_is_scomplex( dt ) ) dt_ch = 'c'; else dt_ch = 'z'; side = BLIS_LEFT; uploa = BLIS_LOWER; bli_param_map_blis_to_netlib_side( side, &f77_side ); bli_param_map_blis_to_netlib_uplo( uploa, &f77_uploa ); // Begin with initializing the last entry to zero so that // matlab allocates space for the entire array once up-front. for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ; #ifdef BLIS printf( "data_%s_%chemm_%s_blis", THR_STR, dt_ch, STR ); #else printf( "data_%s_%chemm_%s", THR_STR, dt_ch, STR ); #endif printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )0, ( unsigned long )0, 0.0 ); for ( p = p_begin; p <= p_end; p += p_inc ) { if ( m_input < 0 ) m = p / ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p / ( dim_t )abs(n_input); else n = ( dim_t ) n_input; bli_obj_create( dt, 1, 1, 0, 0, &alpha ); bli_obj_create( dt, 1, 1, 0, 0, &beta ); if (bli_is_left(side)) bli_obj_create( dt, m, m, 0, 0, &a ); else bli_obj_create( dt, n, n, 0, 0, &a ); bli_obj_create( dt, m, n, 0, 0, &b ); bli_obj_create( dt, m, n, 0, 0, &c ); bli_obj_create( dt, m, n, 0, 0, &c_save ); bli_randm( &a ); bli_randm( &b ); bli_randm( &c ); bli_obj_set_struc( BLIS_HERMITIAN, &a ); bli_obj_set_uplo( uploa, &a ); bli_mkherm( &a ); bli_mktrim( &a ); bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_setsc( (1.0/1.0), 0.0, &beta ); bli_copym( &c, &c_save ); #ifdef BLIS bli_ind_disable_all_dt( dt ); bli_ind_enable_dt( ind, dt ); #endif dtime_save = DBL_MAX; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "b", &b, "%4.1f", "" ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS bli_hemm( side, &alpha, &a, &b, &beta, &c ); #else if ( bli_is_float( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); float* alphap = bli_obj_buffer( &alpha ); float* ap = bli_obj_buffer( &a ); float* bp = bli_obj_buffer( &b ); float* betap = bli_obj_buffer( &beta ); float* cp = bli_obj_buffer( &c ); ssymm_( &f77_side, &f77_uploa, &mm, &nn, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_double( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); double* alphap = bli_obj_buffer( &alpha ); double* ap = bli_obj_buffer( &a ); double* bp = bli_obj_buffer( &b ); double* betap = bli_obj_buffer( &beta ); double* cp = bli_obj_buffer( &c ); dsymm_( &f77_side, &f77_uploa, &mm, &nn, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_scomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); scomplex* alphap = bli_obj_buffer( &alpha ); scomplex* ap = bli_obj_buffer( &a ); scomplex* bp = bli_obj_buffer( &b ); scomplex* betap = bli_obj_buffer( &beta ); scomplex* cp = bli_obj_buffer( &c ); chemm_( &f77_side, &f77_uploa, &mm, &nn, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_dcomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); dcomplex* alphap = bli_obj_buffer( &alpha ); dcomplex* ap = bli_obj_buffer( &a ); dcomplex* bp = bli_obj_buffer( &b ); dcomplex* betap = bli_obj_buffer( &beta ); dcomplex* cp = bli_obj_buffer( &c ); zhemm_( &f77_side, &f77_uploa, &mm, &nn, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } #endif #ifdef PRINT bli_printm( "c after", &c, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } if ( bli_is_left(side) ) gflops = ( 2.0 * m * m * n ) / ( dtime_save * 1.0e9 ); else gflops = ( 2.0 * m * n * n ) / ( dtime_save * 1.0e9 ); if ( bli_is_complex( dt ) ) gflops *= 4.0; #ifdef BLIS printf( "data_%s_%chemm_%s_blis", THR_STR, dt_ch, STR ); #else printf( "data_%s_%chemm_%s", THR_STR, dt_ch, STR ); #endif printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, ( unsigned long )n, gflops ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); } //bli_finalize(); return 0; } blis-0.6.1/test/studies/thunderx2/test_syrk.c000066400000000000000000000176061360743507500212500ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name of The University of Texas nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "blis.h" //#define PRINT int main( int argc, char** argv ) { obj_t a, c; obj_t c_save; obj_t alpha, beta; dim_t m, k; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, k_input; ind_t ind; num_t dt; char dt_ch; int r, n_repeats; trans_t transa; uplo_t uploc; f77_char f77_transa; f77_char f77_uploc; double dtime; double dtime_save; double gflops; //bli_init(); //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); n_repeats = 3; dt = DT; ind = IND; p_begin = P_BEGIN; p_end = P_END; p_inc = P_INC; m_input = -1; k_input = -1; // Supress compiler warnings about unused variable 'ind'. ( void )ind; #if 0 cntx_t* cntx; ind_t ind_mod = ind; // A hack to use 3m1 as 1mpb (with 1m as 1mbp). if ( ind == BLIS_3M1 ) ind_mod = BLIS_1M; // Initialize a context for the current induced method and datatype. cntx = bli_gks_query_ind_cntx( ind_mod, dt ); // Set k to the kc blocksize for the current datatype. k_input = bli_cntx_get_blksz_def_dt( dt, BLIS_KC, cntx ); #elif 1 //k_input = 256; #endif // Choose the char corresponding to the requested datatype. if ( bli_is_float( dt ) ) dt_ch = 's'; else if ( bli_is_double( dt ) ) dt_ch = 'd'; else if ( bli_is_scomplex( dt ) ) dt_ch = 'c'; else dt_ch = 'z'; transa = BLIS_NO_TRANSPOSE; uploc = BLIS_LOWER; bli_param_map_blis_to_netlib_trans( transa, &f77_transa ); bli_param_map_blis_to_netlib_uplo ( uploc, &f77_uploc ); // Begin with initializing the last entry to zero so that // matlab allocates space for the entire array once up-front. for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ; #ifdef BLIS printf( "data_%s_%csyrk_%s_blis", THR_STR, dt_ch, STR ); #else printf( "data_%s_%csyrk_%s", THR_STR, dt_ch, STR ); #endif printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )0, ( unsigned long )0, 0.0 ); for ( p = p_begin; p <= p_end; p += p_inc ) { if ( m_input < 0 ) m = p / ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( k_input < 0 ) k = p / ( dim_t )abs(k_input); else k = ( dim_t ) k_input; bli_obj_create( dt, 1, 1, 0, 0, &alpha ); bli_obj_create( dt, 1, 1, 0, 0, &beta ); bli_obj_create( dt, m, k, 0, 0, &a ); bli_obj_create( dt, m, m, 0, 0, &c ); //bli_obj_create( dt, m, k, 2, 2*m, &a ); //bli_obj_create( dt, k, n, 2, 2*k, &b ); //bli_obj_create( dt, m, n, 2, 2*m, &c ); bli_obj_create( dt, m, m, 0, 0, &c_save ); bli_randm( &a ); bli_obj_set_struc( BLIS_SYMMETRIC, &c ); bli_obj_set_uplo ( uploc, &c ); bli_randm( &c ); bli_obj_set_conjtrans( transa, &a ); bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_setsc( (1.0/1.0), 0.0, &beta ); bli_copym( &c, &c_save ); #ifdef BLIS bli_ind_disable_all_dt( dt ); bli_ind_enable_dt( ind, dt ); #endif dtime_save = DBL_MAX; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS bli_syrk( &alpha, &a, &beta, &c ); #else if ( bli_is_float( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); float* alphap = bli_obj_buffer( &alpha ); float* ap = bli_obj_buffer( &a ); float* betap = bli_obj_buffer( &beta ); float* cp = bli_obj_buffer( &c ); ssyrk_( &f77_uploc, &f77_transa, &mm, &kk, alphap, ap, &lda, betap, cp, &ldc ); } else if ( bli_is_double( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); double* alphap = bli_obj_buffer( &alpha ); double* ap = bli_obj_buffer( &a ); double* betap = bli_obj_buffer( &beta ); double* cp = bli_obj_buffer( &c ); dsyrk_( &f77_uploc, &f77_transa, &mm, &kk, alphap, ap, &lda, betap, cp, &ldc ); } else if ( bli_is_scomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); scomplex* alphap = bli_obj_buffer( &alpha ); scomplex* ap = bli_obj_buffer( &a ); scomplex* betap = bli_obj_buffer( &beta ); scomplex* cp = bli_obj_buffer( &c ); csyrk_( &f77_uploc, &f77_transa, &mm, &kk, alphap, ap, &lda, betap, cp, &ldc ); } else if ( bli_is_dcomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); dcomplex* alphap = bli_obj_buffer( &alpha ); dcomplex* ap = bli_obj_buffer( &a ); dcomplex* betap = bli_obj_buffer( &beta ); dcomplex* cp = bli_obj_buffer( &c ); zsyrk_( &f77_uploc, &f77_transa, &mm, &kk, alphap, ap, &lda, betap, cp, &ldc ); } #endif #ifdef PRINT bli_printm( "c after", &c, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } gflops = ( 1.0 * m * m * k ) / ( dtime_save * 1.0e9 ); if ( bli_is_complex( dt ) ) gflops *= 4.0; #ifdef BLIS printf( "data_%s_%csyrk_%s_blis", THR_STR, dt_ch, STR ); #else printf( "data_%s_%csyrk_%s", THR_STR, dt_ch, STR ); #endif printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, ( unsigned long )k, gflops ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &c ); bli_obj_free( &c_save ); } //bli_finalize(); return 0; } blis-0.6.1/test/studies/thunderx2/test_trmm.c000066400000000000000000000176261360743507500212410ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name of The University of Texas nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "blis.h" //#define PRINT int main( int argc, char** argv ) { obj_t a, c; obj_t c_save; obj_t alpha; dim_t m, n; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, n_input; ind_t ind; num_t dt; char dt_ch; int r, n_repeats; side_t side; uplo_t uploa; trans_t transa; diag_t diaga; f77_char f77_side; f77_char f77_uploa; f77_char f77_transa; f77_char f77_diaga; double dtime; double dtime_save; double gflops; //bli_init(); //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); n_repeats = 3; dt = DT; ind = IND; p_begin = P_BEGIN; p_end = P_END; p_inc = P_INC; m_input = -1; n_input = -1; // Supress compiler warnings about unused variable 'ind'. ( void )ind; #if 0 cntx_t* cntx; ind_t ind_mod = ind; // A hack to use 3m1 as 1mpb (with 1m as 1mbp). if ( ind == BLIS_3M1 ) ind_mod = BLIS_1M; // Initialize a context for the current induced method and datatype. cntx = bli_gks_query_ind_cntx( ind_mod, dt ); // Set k to the kc blocksize for the current datatype. k_input = bli_cntx_get_blksz_def_dt( dt, BLIS_KC, cntx ); #elif 1 //k_input = 256; #endif // Choose the char corresponding to the requested datatype. if ( bli_is_float( dt ) ) dt_ch = 's'; else if ( bli_is_double( dt ) ) dt_ch = 'd'; else if ( bli_is_scomplex( dt ) ) dt_ch = 'c'; else dt_ch = 'z'; side = BLIS_LEFT; uploa = BLIS_LOWER; transa = BLIS_NO_TRANSPOSE; diaga = BLIS_NONUNIT_DIAG; bli_param_map_blis_to_netlib_side( side, &f77_side ); bli_param_map_blis_to_netlib_uplo( uploa, &f77_uploa ); bli_param_map_blis_to_netlib_trans( transa, &f77_transa ); bli_param_map_blis_to_netlib_diag( diaga, &f77_diaga ); // Begin with initializing the last entry to zero so that // matlab allocates space for the entire array once up-front. for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ; #ifdef BLIS printf( "data_%s_%ctrmm_%s_blis", THR_STR, dt_ch, STR ); #else printf( "data_%s_%ctrmm_%s", THR_STR, dt_ch, STR ); #endif printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )0, ( unsigned long )0, 0.0 ); for ( p = p_begin; p <= p_end; p += p_inc ) { if ( m_input < 0 ) m = p / ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p / ( dim_t )abs(n_input); else n = ( dim_t ) n_input; bli_obj_create( dt, 1, 1, 0, 0, &alpha ); if ( bli_is_left( side ) ) bli_obj_create( dt, m, m, 0, 0, &a ); else bli_obj_create( dt, n, n, 0, 0, &a ); bli_obj_create( dt, m, n, 0, 0, &c ); bli_obj_create( dt, m, n, 0, 0, &c_save ); bli_randm( &a ); bli_randm( &c ); bli_obj_set_struc( BLIS_TRIANGULAR, &a ); bli_obj_set_uplo( uploa, &a ); bli_obj_set_conjtrans( transa, &a ); bli_obj_set_diag( diaga, &a ); bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_copym( &c, &c_save ); #ifdef BLIS bli_ind_disable_all_dt( dt ); bli_ind_enable_dt( ind, dt ); #endif dtime_save = DBL_MAX; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "b", &b, "%4.1f", "" ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS bli_trmm( side, &alpha, &a, &c ); #else if ( bli_is_float( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); float* alphap = bli_obj_buffer( &alpha ); float* ap = bli_obj_buffer( &a ); float* cp = bli_obj_buffer( &c ); strmm_( &f77_side, &f77_uploa, &f77_transa, &f77_diaga, &mm, &nn, alphap, ap, &lda, cp, &ldc ); } else if ( bli_is_double( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); double* alphap = bli_obj_buffer( &alpha ); double* ap = bli_obj_buffer( &a ); double* cp = bli_obj_buffer( &c ); dtrmm_( &f77_side, &f77_uploa, &f77_transa, &f77_diaga, &mm, &nn, alphap, ap, &lda, cp, &ldc ); } else if ( bli_is_scomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); scomplex* alphap = bli_obj_buffer( &alpha ); scomplex* ap = bli_obj_buffer( &a ); scomplex* cp = bli_obj_buffer( &c ); ctrmm_( &f77_side, &f77_uploa, &f77_transa, &f77_diaga, &mm, &nn, alphap, ap, &lda, cp, &ldc ); } else if ( bli_is_dcomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); dcomplex* alphap = bli_obj_buffer( &alpha ); dcomplex* ap = bli_obj_buffer( &a ); dcomplex* cp = bli_obj_buffer( &c ); ztrmm_( &f77_side, &f77_uploa, &f77_transa, &f77_diaga, &mm, &nn, alphap, ap, &lda, cp, &ldc ); } #endif #ifdef PRINT bli_printm( "c after", &c, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } if ( bli_is_left(side) ) gflops = ( 1.0 * m * m * n ) / ( dtime_save * 1.0e9 ); else gflops = ( 1.0 * m * n * n ) / ( dtime_save * 1.0e9 ); if ( bli_is_complex( dt ) ) gflops *= 4.0; #ifdef BLIS printf( "data_%s_%ctrmm_%s_blis", THR_STR, dt_ch, STR ); #else printf( "data_%s_%ctrmm_%s", THR_STR, dt_ch, STR ); #endif printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, ( unsigned long )n, gflops ); bli_obj_free( &alpha ); bli_obj_free( &a ); bli_obj_free( &c ); bli_obj_free( &c_save ); } //bli_finalize(); return 0; } blis-0.6.1/test/sup/000077500000000000000000000000001360743507500142475ustar00rootroot00000000000000blis-0.6.1/test/sup/Makefile000066400000000000000000000411411360743507500157100ustar00rootroot00000000000000#!/bin/bash # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # Copyright (C) 2019, Advanced Micro Devices, Inc. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # # Makefile # # Field G. Van Zee # # Makefile for standalone BLIS test drivers. # # # --- Makefile PHONY target definitions ---------------------------------------- # .PHONY: all all-st all-mt \ blis blis-st blis-mt \ clean cleanx # # --- Determine makefile fragment location ------------------------------------- # # Comments: # - DIST_PATH is assumed to not exist if BLIS_INSTALL_PATH is given. # - We must use recursively expanded assignment for LIB_PATH and INC_PATH in # the second case because CONFIG_NAME is not yet set. ifneq ($(strip $(BLIS_INSTALL_PATH)),) LIB_PATH := $(BLIS_INSTALL_PATH)/lib INC_PATH := $(BLIS_INSTALL_PATH)/include/blis SHARE_PATH := $(BLIS_INSTALL_PATH)/share/blis else DIST_PATH := ../.. LIB_PATH = ../../lib/$(CONFIG_NAME) INC_PATH = ../../include/$(CONFIG_NAME) SHARE_PATH := ../.. endif # # --- Include common makefile definitions -------------------------------------- # # Include the common makefile fragment. -include $(SHARE_PATH)/common.mk # # --- BLAS and LAPACK implementations ------------------------------------------ # # BLIS library and header path. This is simply wherever it was installed. #BLIS_LIB_PATH := $(INSTALL_PREFIX)/lib #BLIS_INC_PATH := $(INSTALL_PREFIX)/include/blis # BLIS library. #BLIS_LIB := $(BLIS_LIB_PATH)/libblis.a # BLAS library path(s). This is where the BLAS libraries reside. HOME_LIB_PATH := $(HOME)/flame/lib MKL_LIB_PATH := $(HOME)/intel/mkl/lib/intel64 # netlib BLAS NETLIB_LIB := $(HOME_LIB_PATH)/libblas.a # OpenBLAS OPENBLAS_LIB := $(HOME_LIB_PATH)/libopenblas.a OPENBLASP_LIB := $(HOME_LIB_PATH)/libopenblasp.a # BLASFEO BLASFEO_LIB := $(HOME_LIB_PATH)/libblasfeo.a # libxsmm LIBXSMM_LIB := $(HOME_LIB_PATH)/libxsmm.a -ldl \ $(NETLIB_LIB) -lgfortran # ATLAS ATLAS_LIB := $(HOME_LIB_PATH)/libf77blas.a \ $(HOME_LIB_PATH)/libatlas.a # Eigen EIGEN_INC := $(HOME)/flame/eigen/include/eigen3 EIGEN_LIB := $(HOME_LIB_PATH)/libeigen_blas_static.a EIGENP_LIB := $(EIGEN_LIB) # MKL MKL_LIB := -L$(MKL_LIB_PATH) \ -lmkl_intel_lp64 \ -lmkl_core \ -lmkl_sequential \ -lpthread -lm -ldl MKLP_LIB := -L$(MKL_LIB_PATH) \ -lmkl_intel_lp64 \ -lmkl_core \ -lmkl_gnu_thread \ -lpthread -lm -ldl -fopenmp #-L$(ICC_LIB_PATH) \ #-lgomp VENDOR_LIB := $(MKL_LIB) VENDORP_LIB := $(MKLP_LIB) # # --- Problem size definitions ------------------------------------------------- # # Single core PS_BEGIN := 4 PS_MAX := 800 PS_INC := 4 # Multicore P1_BEGIN := 120 P1_MAX := 6000 P1_INC := 120 # # --- General build definitions ------------------------------------------------ # TEST_SRC_PATH := . TEST_OBJ_PATH := . # Gather all local object files. TEST_OBJS := $(sort $(patsubst $(TEST_SRC_PATH)/%.c, \ $(TEST_OBJ_PATH)/%.o, \ $(wildcard $(TEST_SRC_PATH)/*.c))) # Override the value of CINCFLAGS so that the value of CFLAGS returned by # get-frame-cflags-for() is not cluttered up with include paths needed only # while building BLIS. CINCFLAGS := -I$(INC_PATH) # Use the "framework" CFLAGS for the configuration family. CFLAGS := $(call get-user-cflags-for,$(CONFIG_NAME)) # Add local header paths to CFLAGS. CFLAGS += -I$(TEST_SRC_PATH) # Locate the libblis library to which we will link. LIBBLIS_LINK := $(LIB_PATH)/$(LIBBLIS_L) # Define a set of CFLAGS for use with C++ and Eigen. CXXFLAGS := $(subst -std=c99,-std=c++11,$(CFLAGS)) CXXFLAGS += -I$(EIGEN_INC) # Create a copy of CXXFLAGS without -fopenmp in order to disable multithreading. CXXFLAGS_ST := -march=native $(subst -fopenmp,,$(CXXFLAGS)) CXXFLAGS_MT := -march=native $(CXXFLAGS) # Single or multithreaded string STR_ST := -DTHR_STR=\"st\" STR_MT := -DTHR_STR=\"mt\" # Number of trials per problem size. N_TRIALS := -DN_TRIALS=3 # Problem size specification PDEF_ST := -DP_BEGIN=$(PS_BEGIN) \ -DP_MAX=$(PS_MAX) \ -DP_INC=$(PS_INC) PDEF_MT := -DP_BEGIN=$(P1_BEGIN) \ -DP_MAX=$(P1_MAX) \ -DP_INC=$(P1_INC) ifeq ($(E),1) ERRCHK := -DERROR_CHECK else ERRCHK := -DNO_ERROR_CHECK endif # Enumerate possible datatypes and computation precisions. #dts := s d c z DTS := d TRANS := n_n \ n_t \ t_n \ t_t # While BLIS supports all combinations of row and column storage for matrices # C, A, and B, the alternatives mostly only support CBLAS APIs, which inherently # support only "all row-storage" or "all column-storage". Thus, we disable the # building of those other drivers so that compilation/linking completes sooner. #STORS := r_r_r \ # r_r_c \ # r_c_r \ # r_c_c \ # c_r_r \ # c_r_c \ # c_c_r \ # c_c_c STORS := r_r_r \ c_c_c SHAPES := l_l_s \ l_s_l \ s_l_l \ s_s_l \ s_l_s \ l_s_s \ l_l_l SMS := 6 SNS := 8 SKS := 4 # # --- Function definitions ----------------------------------------------------- # # A function to strip the underscores from a list of strings. stripu = $(subst _,,$(1)) # Various functions that help us construct the datatype combinations and then # extract the needed datatype strings and C preprocessor define flags. get-1of2 = $(word 1,$(subst _, ,$(1))) get-2of2 = $(word 2,$(subst _, ,$(1))) get-1of3 = $(word 1,$(subst _, ,$(1))) get-2of3 = $(word 2,$(subst _, ,$(1))) get-3of3 = $(word 3,$(subst _, ,$(1))) # Datatype defs. get-dt-cpp = $(strip \ $(if $(findstring s,$(1)),-DDT=BLIS_FLOAT -DIS_FLOAT,\ $(if $(findstring d,$(1)),-DDT=BLIS_DOUBLE -DIS_DOUBLE,\ $(if $(findstring c,$(1)),-DDT=BLIS_SCOMPLEX -DIS_SCOMPLEX,\ -DDT=BLIS_DCOMPLEX -DIS_DCOMPLEX)))) # Transpose defs. get-tra-defs-a = $(strip $(subst n,-DTRANSA=BLIS_NO_TRANSPOSE -DA_NOTRANS, \ $(subst t,-DTRANSA=BLIS_TRANSPOSE -DA_TRANS,$(call get-1of2,$(1))))) get-tra-defs-b = $(strip $(subst n,-DTRANSB=BLIS_NO_TRANSPOSE -DB_NOTRANS, \ $(subst t,-DTRANSB=BLIS_TRANSPOSE -DB_TRANS,$(call get-2of2,$(1))))) get-tra-defs = $(call get-tra-defs-a,$(1)) $(call get-tra-defs-b,$(1)) # Storage defs. get-sto-uch-a = $(strip $(subst r,R, \ $(subst c,C,$(call get-1of3,$(1))))) get-sto-uch-b = $(strip $(subst r,R, \ $(subst c,C,$(call get-2of3,$(1))))) get-sto-uch-c = $(strip $(subst r,R, \ $(subst c,C,$(call get-3of3,$(1))))) get-sto-defs = $(strip \ -DSTOR3=BLIS_$(call get-sto-uch-a,$(1))$(call get-sto-uch-b,$(1))$(call get-sto-uch-c,$(1)) \ -DA_STOR_$(call get-sto-uch-a,$(1)) \ -DB_STOR_$(call get-sto-uch-b,$(1)) \ -DC_STOR_$(call get-sto-uch-c,$(1))) # Dimension defs. get-shape-defs-cm = $(if $(findstring l,$(1)),-DM_DIM=-1,-DM_DIM=$(2)) get-shape-defs-cn = $(if $(findstring l,$(1)),-DN_DIM=-1,-DN_DIM=$(2)) get-shape-defs-ck = $(if $(findstring l,$(1)),-DK_DIM=-1,-DK_DIM=$(2)) get-shape-defs-m = $(call get-shape-defs-cm,$(call get-1of3,$(1)),$(2)) get-shape-defs-n = $(call get-shape-defs-cn,$(call get-2of3,$(1)),$(2)) get-shape-defs-k = $(call get-shape-defs-ck,$(call get-3of3,$(1)),$(2)) # arguments: 1: shape (w/ underscores) 2: smallm 3: smalln 4: smallk get-shape-defs = $(strip $(call get-shape-defs-m,$(1),$(2)) \ $(call get-shape-defs-n,$(1),$(3)) \ $(call get-shape-defs-k,$(1),$(4))) #$(error l_l_s 6 8 4 = $(call get-shape-defs,l_l_s,6,8,4)) # Shape-dimension string. get-shape-str-ch = $(if $(findstring l,$(1)),p,$(2)) get-shape-str-m = $(call get-shape-str-ch,$(call get-1of3,$(1)),$(2)) get-shape-str-n = $(call get-shape-str-ch,$(call get-2of3,$(1)),$(2)) get-shape-str-k = $(call get-shape-str-ch,$(call get-3of3,$(1)),$(2)) # arguments: 1: shape (w/ underscores) 2: smallm 3: smalln 4: smallk get-shape-dim-str = m$(call get-shape-str-m,$(1),$(2))n$(call get-shape-str-n,$(1),$(3))k$(call get-shape-str-k,$(1),$(4)) # Implementation defs. # Define a function to return the appropriate -DSTR= and -D[BLIS|BLAS] flags. get-imp-defs = $(strip $(subst blissup,-DSTR=\"$(1)\" -DBLIS -DSUP, \ $(subst blislpab,-DSTR=\"$(1)\" -DBLIS, \ $(subst eigen,-DSTR=\"$(1)\" -DEIGEN, \ $(subst openblas,-DSTR=\"$(1)\" -DCBLAS, \ $(subst blasfeo,-DSTR=\"$(1)\" -DCBLAS, \ $(subst libxsmm,-DSTR=\"$(1)\" -DBLAS -DXSMM, \ $(subst vendor,-DSTR=\"$(1)\" -DCBLAS,$(1))))))))) TRANS0 = $(call stripu,$(TRANS)) STORS0 = $(call stripu,$(STORS)) # Limit BLAS and Eigen to only using all row-stored, or all column-stored matrices. # Also, limit libxsmm to using all column-stored matrices since it does not offer # CBLAS interfaces. BSTORS0 = rrr ccc ESTORS0 = rrr ccc XSTORS0 = ccc # # --- Object and binary file definitons ---------------------------------------- # get-st-objs = $(foreach dt,$(1),$(foreach tr,$(2),$(foreach st,$(3),$(foreach sh,$(4),$(foreach sm,$(5),$(foreach sn,$(6),$(foreach sk,$(7),test_$(dt)gemm_$(tr)_$(st)_$(call get-shape-dim-str,$(sh),$(sm),$(sn),$(sk))_$(8)_st.o))))))) # Build a list of object files and binaries for each single-threaded # implementation using the get-st-objs() function defined above. BLISSUP_ST_OBJS := $(call get-st-objs,$(DTS),$(TRANS0),$(STORS0),$(SHAPES),$(SMS),$(SNS),$(SKS),blissup) BLISSUP_ST_BINS := $(patsubst %.o,%.x,$(BLISSUP_ST_OBJS)) BLISLPAB_ST_OBJS := $(call get-st-objs,$(DTS),$(TRANS0),$(STORS0),$(SHAPES),$(SMS),$(SNS),$(SKS),blislpab) BLISLPAB_ST_BINS := $(patsubst %.o,%.x,$(BLISLPAB_ST_OBJS)) EIGEN_ST_OBJS := $(call get-st-objs,$(DTS),$(TRANS0),$(ESTORS0),$(SHAPES),$(SMS),$(SNS),$(SKS),eigen) EIGEN_ST_BINS := $(patsubst %.o,%.x,$(EIGEN_ST_OBJS)) OPENBLAS_ST_OBJS := $(call get-st-objs,$(DTS),$(TRANS0),$(BSTORS0),$(SHAPES),$(SMS),$(SNS),$(SKS),openblas) OPENBLAS_ST_BINS := $(patsubst %.o,%.x,$(OPENBLAS_ST_OBJS)) BLASFEO_ST_OBJS := $(call get-st-objs,$(DTS),$(TRANS0),$(BSTORS0),$(SHAPES),$(SMS),$(SNS),$(SKS),blasfeo) BLASFEO_ST_BINS := $(patsubst %.o,%.x,$(BLASFEO_ST_OBJS)) LIBXSMM_ST_OBJS := $(call get-st-objs,$(DTS),$(TRANS0),$(XSTORS0),$(SHAPES),$(SMS),$(SNS),$(SKS),libxsmm) LIBXSMM_ST_BINS := $(patsubst %.o,%.x,$(LIBXSMM_ST_OBJS)) VENDOR_ST_OBJS := $(call get-st-objs,$(DTS),$(TRANS0),$(BSTORS0),$(SHAPES),$(SMS),$(SNS),$(SKS),vendor) VENDOR_ST_BINS := $(patsubst %.o,%.x,$(VENDOR_ST_OBJS)) #$(error "objs = $(EIGEN_ST_BINS)" ) # Mark the object files as intermediate so that make will remove them # automatically after building the binaries on which they depend. .INTERMEDIATE: $(BLISSUP_ST_OBJS) \ $(BLISLPAB_ST_OBJS) \ $(EIGEN_ST_OBJS) \ $(OPENBLAS_ST_OBJS) \ $(BLASFEO_ST_OBJS) \ $(LIBXSMM_ST_OBJS) \ $(VENDOR_ST_OBJS) # # --- Targets/rules ------------------------------------------------------------ # all: st blissup: blissup-st blislpab: blislpab-st eigen: eigen-st openblas: openblas-st blasfeo: blasfeo-st libxsmm: libxsmm-st vendor: vendor-st st: blissup-st blislpab-st \ eigen-st openblas-st blasfeo-st libxsmm-st vendor-st blis: blissup-st blislpab-st blissup-st: $(BLISSUP_ST_BINS) blislpab-st: $(BLISLPAB_ST_BINS) eigen-st: $(EIGEN_ST_BINS) openblas-st: $(OPENBLAS_ST_BINS) blasfeo-st: $(BLASFEO_ST_BINS) libxsmm-st: $(LIBXSMM_ST_BINS) vendor-st: $(VENDOR_ST_BINS) # --Object file rules -- # Define the implementations for which we will instantiate compilation rules. BIMPLS := blissup blislpab openblas blasfeo libxsmm vendor EIMPLS := eigen # 1 2 3 4 567 8 # test_dgemm_nn_rrr_mpn6kp_blissup_st.x # Define the function that will be used to instantiate compilation rules # for the various implementations. define make-st-rule test_$(1)gemm_$(call stripu,$(2))_$(call stripu,$(3))_$(call get-shape-dim-str,$(4),$(5),$(6),$(7))_$(8)_st.o: test_gemm.c Makefile $(CC) $(CFLAGS) $(ERRCHK) $(N_TRIALS) $(PDEF_ST) $(call get-dt-cpp,$(1)) $(call get-tra-defs,$(2)) $(call get-sto-defs,$(3)) $(call get-shape-defs,$(4),$(5),$(6),$(7)) $(call get-imp-defs,$(8)) $(STR_ST) -c $$< -o $$@ endef # Instantiate the rule function make-st-rule() for each BLIS/BLAS/CBLAS # implementation. $(foreach dt,$(DTS), \ $(foreach tr,$(TRANS), \ $(foreach st,$(STORS), \ $(foreach sh,$(SHAPES), \ $(foreach sm,$(SMS), \ $(foreach sn,$(SNS), \ $(foreach sk,$(SKS), \ $(foreach impl,$(BIMPLS), \ $(eval $(call make-st-rule,$(dt),$(tr),$(st),$(sh),$(sm),$(sn),$(sk),$(impl))))))))))) # Define the function that will be used to instantiate compilation rules # for the various implementations. define make-eigst-rule test_$(1)gemm_$(call stripu,$(2))_$(call stripu,$(3))_$(call get-shape-dim-str,$(4),$(5),$(6),$(7))_$(8)_st.o: test_gemm.c Makefile $(CXX) $(CXXFLAGS_ST) $(ERRCHK) $(N_TRIALS) $(PDEF_ST) $(call get-dt-cpp,$(1)) $(call get-tra-defs,$(2)) $(call get-sto-defs,$(3)) $(call get-shape-defs,$(4),$(5),$(6),$(7)) $(call get-imp-defs,$(8)) $(STR_ST) -c $$< -o $$@ endef # Instantiate the rule function make-st-rule() for each Eigen implementation. $(foreach dt,$(DTS), \ $(foreach tr,$(TRANS), \ $(foreach st,$(STORS), \ $(foreach sh,$(SHAPES), \ $(foreach sm,$(SMS), \ $(foreach sn,$(SNS), \ $(foreach sk,$(SKS), \ $(foreach impl,$(EIMPLS), \ $(eval $(call make-eigst-rule,$(dt),$(tr),$(st),$(sh),$(sm),$(sn),$(sk),$(impl))))))))))) # -- Executable file rules -- # NOTE: For the BLAS test drivers, we place the BLAS libraries before BLIS # on the link command line in case BLIS was configured with the BLAS # compatibility layer. This prevents BLIS from inadvertently getting called # for the BLAS routines we are trying to test with. test_%_blissup_st.x: test_%_blissup_st.o $(LIBBLIS_LINK) $(CC) $(strip $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@) test_%_blislpab_st.x: test_%_blislpab_st.o $(LIBBLIS_LINK) $(CC) $(strip $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@) test_%_eigen_st.x: test_%_eigen_st.o $(LIBBLIS_LINK) $(CXX) $(strip $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@) test_%_openblas_st.x: test_%_openblas_st.o $(LIBBLIS_LINK) $(CC) $(strip $< $(OPENBLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@) test_%_blasfeo_st.x: test_%_blasfeo_st.o $(LIBBLIS_LINK) $(CC) $(strip $< $(BLASFEO_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@) test_%_libxsmm_st.x: test_%_libxsmm_st.o $(LIBBLIS_LINK) $(CC) $(strip $< $(LIBXSMM_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@) test_%_vendor_st.x: test_%_vendor_st.o $(LIBBLIS_LINK) $(CC) $(strip $< $(VENDOR_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@) # -- Clean rules -- clean: cleanx cleanx: - $(RM_F) *.x *.o blis-0.6.1/test/sup/octave/000077500000000000000000000000001360743507500155305ustar00rootroot00000000000000blis-0.6.1/test/sup/octave/gen_opsupnames.m000066400000000000000000000020711360743507500207310ustar00rootroot00000000000000function [ r_val1, r_val2 ] = gen_opsupnames( ops, stor, smalldims ) nops = size( ops, 1 ); smallm = smalldims( 1 ); smalln = smalldims( 2 ); smallk = smalldims( 3 ); i = 1; for io = 1:nops op = ops( io, : ); opsupnames( i+0, : ) = sprintf( '%s_%s_m%dnpkp', op, stor, smallm ); opsupnames( i+1, : ) = sprintf( '%s_%s_mpn%dkp', op, stor, smalln ); opsupnames( i+2, : ) = sprintf( '%s_%s_mpnpk%d', op, stor, smallk ); opsupnames( i+3, : ) = sprintf( '%s_%s_mpn%dk%d', op, stor, smalln, smallk ); opsupnames( i+4, : ) = sprintf( '%s_%s_m%dnpk%d', op, stor, smallm, smallk ); opsupnames( i+5, : ) = sprintf( '%s_%s_m%dn%dkp', op, stor, smallm, smalln ); opsupnames( i+6, : ) = sprintf( '%s_%s_mpnpkp', op, stor ); opnames( i+0, : ) = sprintf( '%s', op ); opnames( i+1, : ) = sprintf( '%s', op ); opnames( i+2, : ) = sprintf( '%s', op ); opnames( i+3, : ) = sprintf( '%s', op ); opnames( i+4, : ) = sprintf( '%s', op ); opnames( i+5, : ) = sprintf( '%s', op ); opnames( i+6, : ) = sprintf( '%s', op ); i = i + 7; end r_val1 = opsupnames; r_val2 = opnames; end blis-0.6.1/test/sup/octave/plot_l3sup_perf.m000066400000000000000000000225101360743507500210260ustar00rootroot00000000000000function r_val = plot_l3sup_perf( opname, ... data_blissup, ... data_blislpab, ... data_eigen, ... data_open, ... data_bfeo, ... data_xsmm, ... data_vend, vend_str, ... nth, ... rows, cols, ... cfreq, ... dfps, ... theid, impl ) %if ... %mod(theid-1,cols) == 2 || ... % ... %mod(theid-1,cols) == 3 || ... % ... %mod(theid-1,cols) == 4 || ... % 0 == 1 ... %theid >= 19 % show_plot = 0; %else show_plot = 1; %end %legend_plot_id = 11; legend_plot_id = 1*cols + 1*5; if 1 ax1 = subplot( rows, cols, theid ); hold( ax1, 'on' ); end % Set line properties. color_blissup = 'k'; lines_blissup = '-'; markr_blissup = ''; color_blislpab = 'k'; lines_blislpab = ':'; markr_blislpab = ''; color_eigen = 'm'; lines_eigen = '-.'; markr_eigen = 'o'; color_open = 'r'; lines_open = '--'; markr_open = 'o'; color_bfeo = 'c'; lines_bfeo = '-'; markr_bfeo = 'o'; color_xsmm = 'g'; lines_xsmm = '-'; markr_xsmm = 'o'; color_vend = 'b'; lines_vend = '-.'; markr_vend = '.'; % Compute the peak performance in terms of the number of double flops % executable per cycle and the clock rate. if opname(1) == 's' || opname(1) == 'c' flopspercycle = dfps * 2; else flopspercycle = dfps; end max_perf_core = (flopspercycle * cfreq) * 1; % Escape underscores in the title. title_opname = strrep( opname, '_', '\_' ); % Print the title to a string. titlename = '%s'; titlename = sprintf( titlename, title_opname ); % Set the legend strings. blissup_legend = sprintf( 'BLIS sup' ); blislpab_legend = sprintf( 'BLIS conv' ); eigen_legend = sprintf( 'Eigen' ); open_legend = sprintf( 'OpenBLAS' ); bfeo_legend = sprintf( 'BLASFEO' ); xsmm_legend = sprintf( 'libxsmm' ); %vend_legend = sprintf( 'MKL' ); %vend_legend = sprintf( 'ARMPL' ); vend_legend = vend_str; % Set axes range values. y_scale = 1.00; x_begin = 0; %x_end is set below. y_begin = 0; y_end = max_perf_core * y_scale; % Set axes names. if nth == 1 yaxisname = 'GFLOPS'; else yaxisname = 'GFLOPS/core'; end %flopscol = 4; flopscol = size( data_blissup, 2 ); msize = 5; if 1 fontsize = 11; else fontsize = 16; end linesize = 0.5; legend_loc = 'southeast'; % -------------------------------------------------------------------- % Automatically detect a column with the increasing problem size. % Then set the maximum x-axis value. for psize_col = 1:3 if data_blissup( 1, psize_col ) ~= data_blissup( 2, psize_col ) break; end end x_axis( :, 1 ) = data_blissup( :, psize_col ); % Compute the number of data points we have in the x-axis. Note that % we only use half the data points for the m = n = k column of graphs. if mod(theid-1,cols) == 6 np = size( data_blissup, 1 ) / 2; else np = size( data_blissup, 1 ); end has_xsmm = 1; if data_xsmm( 1, flopscol ) == 0.0 has_xsmm = 0; end % Grab the last x-axis value. x_end = data_blissup( np, psize_col ); %data_peak( 1, 1:2 ) = [ 0 max_perf_core ]; %data_peak( 2, 1:2 ) = [ x_end max_perf_core ]; if show_plot == 1 blissup_ln = line( x_axis( 1:np, 1 ), data_blissup( 1:np, flopscol ) / nth, ... 'Color',color_blissup, 'LineStyle',lines_blissup, ... 'LineWidth',linesize ); blislpab_ln = line( x_axis( 1:np, 1 ), data_blislpab( 1:np, flopscol ) / nth, ... 'Color',color_blislpab, 'LineStyle',lines_blislpab, ... 'LineWidth',linesize ); eigen_ln = line( x_axis( 1:np, 1 ), data_eigen( 1:np, flopscol ) / nth, ... 'Color',color_eigen, 'LineStyle',lines_eigen, ... 'LineWidth',linesize ); open_ln = line( x_axis( 1:np, 1 ), data_open( 1:np, flopscol ) / nth, ... 'Color',color_open, 'LineStyle',lines_open, ... 'LineWidth',linesize ); bfeo_ln = line( x_axis( 1:np, 1 ), data_bfeo( 1:np, flopscol ) / nth, ... 'Color',color_bfeo, 'LineStyle',lines_bfeo, ... 'LineWidth',linesize ); if has_xsmm == 1 xsmm_ln = line( x_axis( 1:np, 1 ), data_xsmm( 1:np, flopscol ) / nth, ... 'Color',color_xsmm, 'LineStyle',lines_xsmm, ... 'LineWidth',linesize ); else xsmm_ln = line( nan, nan, ... 'Color',color_xsmm, 'LineStyle',lines_xsmm, ... 'LineWidth',linesize ); end vend_ln = line( x_axis( 1:np, 1 ), data_vend( 1:np, flopscol ) / nth, ... 'Color',color_vend, 'LineStyle',lines_vend, ... 'LineWidth',linesize ); else if theid == legend_plot_id blissup_ln = line( nan, nan, ... 'Color',color_blissup, 'LineStyle',lines_blissup, ... 'LineWidth',linesize ); blislpab_ln = line( nan, nan, ... 'Color',color_blislpab, 'LineStyle',lines_blislpab, ... 'LineWidth',linesize ); eigen_ln = line( nan, nan, ... 'Color',color_eigen, 'LineStyle',lines_eigen, ... 'LineWidth',linesize ); open_ln = line( nan, nan, ... 'Color',color_open, 'LineStyle',lines_open, ... 'LineWidth',linesize ); bfeo_ln = line( nan, nan, ... 'Color',color_bfeo, 'LineStyle',lines_bfeo, ... 'LineWidth',linesize ); xsmm_ln = line( nan, nan, ... 'Color',color_xsmm, 'LineStyle',lines_xsmm, ... 'LineWidth',linesize ); vend_ln = line( nan, nan, ... 'Color',color_vend, 'LineStyle',lines_vend, ... 'LineWidth',linesize ); end end xlim( ax1, [x_begin x_end] ); ylim( ax1, [y_begin y_end] ); if 6000 <= x_end && x_end < 10000 x_tick2 = x_end - 2000; x_tick1 = x_tick2/2; xticks( ax1, [ x_tick1 x_tick2 ] ); elseif 4000 <= x_end && x_end < 6000 x_tick2 = x_end - 1000; x_tick1 = x_tick2/2; xticks( ax1, [ x_tick1 x_tick2 ] ); elseif 2000 <= x_end && x_end < 3000 x_tick2 = x_end - 400; x_tick1 = x_tick2/2; xticks( ax1, [ x_tick1 x_tick2 ] ); elseif 500 <= x_end && x_end < 1000 x_tick3 = x_end*(3/4); x_tick2 = x_end*(2/4); x_tick1 = x_end*(1/4); xticks( ax1, [ x_tick1 x_tick2 x_tick3 ] ); end if show_plot == 1 || theid == legend_plot_id if nth == 1 && theid == legend_plot_id if has_xsmm == 1 leg = legend( ... [ ... blissup_ln ... blislpab_ln ... eigen_ln ... open_ln ... bfeo_ln ... xsmm_ln ... vend_ln ... ], ... blissup_legend, ... blislpab_legend, ... eigen_legend, ... open_legend, ... bfeo_legend, ... xsmm_legend, ... vend_legend, ... 'Location', legend_loc ); set( leg,'Box','off' ); set( leg,'Color','none' ); set( leg,'Units','inches' ); if impl == 'octave' set( leg,'FontSize',fontsize ); set( leg,'Position',[11.92 6.54 1.15 0.7 ] ); % (1,4tl) else set( leg,'FontSize',fontsize-3 ); set( leg,'Position',[18.20 10.20 1.15 0.7 ] ); % (1,4tl) end else leg = legend( ... [ ... blissup_ln ... blislpab_ln ... eigen_ln ... open_ln ... bfeo_ln ... vend_ln ... ], ... blissup_legend, ... blislpab_legend, ... eigen_legend, ... open_legend, ... bfeo_legend, ... vend_legend, ... 'Location', legend_loc ); set( leg,'Box','off' ); set( leg,'Color','none' ); set( leg,'Units','inches' ); if impl == 'octave' set( leg,'FontSize',fontsize ); set( leg,'Position',[11.92 6.54 1.15 0.7 ] ); % (1,4tl) else set( leg,'FontSize',fontsize-1 ); set( leg,'Position',[18.24 10.15 1.15 0.7 ] ); % (1,4tl) end end set( leg,'Box','off' ); set( leg,'Color','none' ); set( leg,'Units','inches' ); % xpos ypos %set( leg,'Position',[11.32 6.36 1.15 0.7 ] ); % (1,4tl) elseif nth > 1 && theid == legend_plot_id end end set( ax1,'FontSize',fontsize ); set( ax1,'TitleFontSizeMultiplier',1.0 ); % default is 1.1. box( ax1, 'on' ); titl = title( titlename ); set( titl, 'FontWeight', 'normal' ); % default font style is now 'bold'. if impl == 'octave' tpos = get( titl, 'Position' ); % default is to align across whole figure, not box. tpos(1) = tpos(1) + -40; set( titl, 'Position', tpos ); % here we nudge it back to centered with box. end if theid > (rows-1)*cols %xlab = xlabel( ax1,xaxisname ); %tpos = get( xlab, 'Position' ) %tpos(2) = tpos(2) + 10; %set( xlab, 'Position', tpos ); if theid == rows*cols - 6 xlab = xlabel( ax1, 'm = 6; n = k' ); elseif theid == rows*cols - 5 xlab = xlabel( ax1, 'n = 8; m = k' ); elseif theid == rows*cols - 4 xlab = xlabel( ax1, 'k = 4; m = n' ); elseif theid == rows*cols - 3 xlab = xlabel( ax1, 'm; n = 8, k = 4' ); elseif theid == rows*cols - 2 xlab = xlabel( ax1, 'n; m = 6, k = 4' ); elseif theid == rows*cols - 1 xlab = xlabel( ax1, 'k; m = 6, n = 8' ); elseif theid == rows*cols - 0 xlab = xlabel( ax1, 'm = n = k' ); end end if mod(theid-1,cols) == 0 ylab = ylabel( ax1,yaxisname ); end %export_fig( filename, colorflag, '-pdf', '-m2', '-painters', '-transparent' ); %saveas( fig, filename_png ); %hold( ax1, 'off' ); r_val = 0; end blis-0.6.1/test/sup/octave/plot_panel_trxsh.m000066400000000000000000000134171360743507500213010ustar00rootroot00000000000000function r_val = plot_panel_trxsh ... ( ... cfreq, ... dflopspercycle, ... nth, ... thr_str, ... dt_ch, ... stor_str, ... smalldims, ... dirpath, ... arch_str, ... vend_str, ... impl ... ) %cfreq = 1.8; %dflopspercycle = 32; % Create filename "templates" for the files that contain the performance % results. filetemp_blissup = '%s/output_%s_%s_blissup.m'; filetemp_blislpab = '%s/output_%s_%s_blislpab.m'; filetemp_eigen = '%s/output_%s_%s_eigen.m'; filetemp_open = '%s/output_%s_%s_openblas.m'; filetemp_bfeo = '%s/output_%s_%s_blasfeo.m'; filetemp_xsmm = '%s/output_%s_%s_libxsmm.m'; filetemp_vend = '%s/output_%s_%s_vendor.m'; % Create a variable name "template" for the variables contained in the % files outlined above. vartemp = 'data_%s_%s_%s( :, : )'; % Define the datatypes and operations we will be plotting. oproot = sprintf( '%cgemm', dt_ch ); ops( 1, : ) = sprintf( '%s_nn', oproot ); ops( 2, : ) = sprintf( '%s_nt', oproot ); ops( 3, : ) = sprintf( '%s_tn', oproot ); ops( 4, : ) = sprintf( '%s_tt', oproot ); % Generate datatype-specific operation names from the set of operations % and datatypes. [ opsupnames, opnames ] = gen_opsupnames( ops, stor_str, smalldims ); n_opsupnames = size( opsupnames, 1 ); %opsupnames %opnames %return if 1 == 1 %fig = figure('Position', [100, 100, 2400, 1500]); fig = figure('Position', [100, 100, 2800, 1500]); orient( fig, 'portrait' ); set(gcf,'PaperUnits', 'inches'); if impl == 'matlab' set(gcf,'PaperSize', [11.5 20.4]); set(gcf,'PaperPosition', [0 0 11.5 20.4]); set(gcf,'PaperPositionMode','manual'); else % impl == 'octave' % octave 4.x set(gcf,'PaperSize', [10 17.5]); set(gcf,'PaperPositionMode','auto'); end set(gcf,'PaperOrientation','landscape'); end % Iterate over the list of datatype-specific operation names. for opi = 1:n_opsupnames %for opi = 1:1 % Grab the current datatype combination. opsupname = opsupnames( opi, : ); opname = opnames( opi, : ); str = sprintf( 'Plotting %2d: %s', opi, opsupname ); disp(str); % Construct filenames for the data files from templates. file_blissup = sprintf( filetemp_blissup, dirpath, thr_str, opsupname ); file_blislpab = sprintf( filetemp_blislpab, dirpath, thr_str, opsupname ); file_eigen = sprintf( filetemp_eigen, dirpath, thr_str, opsupname ); file_open = sprintf( filetemp_open, dirpath, thr_str, opsupname ); file_bfeo = sprintf( filetemp_bfeo, dirpath, thr_str, opsupname ); file_vend = sprintf( filetemp_vend, dirpath, thr_str, opsupname ); % Load the data files. %str = sprintf( ' Loading %s', file_blissup ); disp(str); run( file_blissup ) run( file_blislpab ) run( file_eigen ) run( file_open ) run( file_bfeo ) run( file_vend ) % Construct variable names for the variables in the data files. var_blissup = sprintf( vartemp, thr_str, opname, 'blissup' ); var_blislpab = sprintf( vartemp, thr_str, opname, 'blislpab' ); var_eigen = sprintf( vartemp, thr_str, opname, 'eigen' ); var_open = sprintf( vartemp, thr_str, opname, 'openblas' ); var_bfeo = sprintf( vartemp, thr_str, opname, 'blasfeo' ); var_vend = sprintf( vartemp, thr_str, opname, 'vendor' ); % Use eval() to instantiate the variable names constructed above, % copying each to a simplified name. data_blissup = eval( var_blissup ); % e.g. data_st_dgemm_blissup( :, : ); data_blislpab = eval( var_blislpab ); % e.g. data_st_dgemm_blislpab( :, : ); data_eigen = eval( var_eigen ); % e.g. data_st_dgemm_eigen( :, : ); data_open = eval( var_open ); % e.g. data_st_dgemm_openblas( :, : ); data_bfeo = eval( var_bfeo ); % e.g. data_st_dgemm_blasfeo( :, : ); data_vend = eval( var_vend ); % e.g. data_st_dgemm_vendor( :, : ); if stor_str == 'ccc' % Only read xsmm data for the column storage case, since that's the % only format that libxsmm supports. file_xsmm = sprintf( filetemp_xsmm, dirpath, thr_str, opsupname ); run( file_xsmm ) var_xsmm = sprintf( vartemp, thr_str, opname, 'libxsmm' ); data_xsmm = eval( var_xsmm ); % e.g. data_st_dgemm_libxsmm( :, : ); else % Set the data variable to zeros using the same dimensions as the other % variables. data_xsmm = zeros( size( data_blissup, 1 ), ... size( data_blissup, 2 ) ); end %str = sprintf( ' Reading %s', var_blissup ); disp(str); %str = sprintf( ' Reading %s', var_blislpab ); disp(str); %str = sprintf( ' Reading %s', var_eigen ); disp(str); %str = sprintf( ' Reading %s', var_open ); disp(str); %str = sprintf( ' Reading %s', var_bfeo ); disp(str); %str = sprintf( ' Reading %s', var_xsmm ); disp(str); %str = sprintf( ' Reading %s', var_vend ); disp(str); % Plot one result in an m x n grid of plots, via the subplot() % function. if 1 == 1 plot_l3sup_perf( opsupname, ... data_blissup, ... data_blislpab, ... data_eigen, ... data_open, ... data_bfeo, ... data_xsmm, ... data_vend, vend_str, ... nth, ... 4, 7, ... cfreq, ... dflopspercycle, ... opi, impl ); clear data_st_*gemm_*; clear data_blissup; clear data_blislpab; clear data_eigen; clear data_open; clear data_bfeo; clear data_xsmm; clear data_vend; end end % Construct the name of the file to which we will output the graph. outfile = sprintf( 'l3sup_%s_%s_%s_nt%d.pdf', oproot, stor_str, arch_str, nth ); % Output the graph to pdf format. %print(gcf, 'gemm_md','-fillpage','-dpdf'); %print(gcf, outfile,'-bestfit','-dpdf'); if impl == 'octave' print(gcf, outfile); else % if impl == 'matlab' print(gcf, outfile,'-bestfit','-dpdf'); end blis-0.6.1/test/sup/octave/runthese.m000066400000000000000000000015231360743507500175440ustar00rootroot00000000000000 % haswell plot_panel_trxsh(3.25,16,1,'st','d','ccc',[ 6 8 4 ],'../results/haswell/20190823/4_800_4_mt201','has','MKL','matlab'); close; clear all; plot_panel_trxsh(3.25,16,1,'st','d','rrr',[ 6 8 4 ],'../results/haswell/20190823/4_800_4_mt201','has','MKL','matlab'); close; clear all; % kabylake plot_panel_trxsh(3.80,16,1,'st','d','rrr',[ 6 8 4 ],'../results/kabylake/20190823/4_800_4_mt201','kbl','MKL','matlab'); close; clear all; plot_panel_trxsh(3.80,16,1,'st','d','ccc',[ 6 8 4 ],'../results/kabylake/20190823/4_800_4_mt201','kbl','MKL','matlab'); close; clear all; % epyc plot_panel_trxsh(3.00, 8,1,'st','d','rrr',[ 6 8 4 ],'../results/epyc/20190826/4_800_4_mt256','epyc','MKL','matlab'); close; clear all; plot_panel_trxsh(3.00, 8,1,'st','d','ccc',[ 6 8 4 ],'../results/epyc/20190826/4_800_4_mt256','epyc','MKL','matlab'); close; clear all; blis-0.6.1/test/sup/runme.sh000077500000000000000000000053001360743507500157320ustar00rootroot00000000000000#!/bin/bash # File pefixes. exec_root="test" out_root="output" # Placeholder until we add multithreading. nt=1 # Delay between test cases. delay=0.02 # Threadedness to test. threads="st" # Datatypes to test. #dts="d s" dts="d" # Operations to test. ops="gemm" # Transpose combintions to test. trans="nn nt tn tt" # Storage combinations to test. #stors="rrr rrc rcr rcc crr crc ccr ccc" stors="rrr ccc" # Problem shapes to test. shapes="sll lsl lls lss sls ssl lll" # FGVZ: figure out how to probe what's in the directory and # execute everything that's there? sms="6" sns="8" sks="4" # Implementations to test. impls="vendor blissup blislpab openblas eigen libxsmm blasfeo" #impls="vendor" #impls="blissup" #impls="blislpab" #impls="openblas" #impls="eigen" #impls="libxsmm" #impls="blasfeo" # Example: test_dgemm_nn_rrc_m6npkp_blissup_st.x for th in ${threads}; do for dt in ${dts}; do for op in ${ops}; do for tr in ${trans}; do for st in ${stors}; do for sh in ${shapes}; do for sm in ${sms}; do for sn in ${sns}; do for sk in ${sks}; do for im in ${impls}; do # Limit execution of non-BLIS implementations to # rrr/ccc storage cases. if [ "${im:0:4}" != "blis" ] && \ [ "${st}" != "rrr" ] && \ [ "${st}" != "ccc" ]; then continue; fi # Further limit execution of libxsmm to # ccc storage cases. if [ "${im:0:7}" = "libxsmm" ] && \ [ "${st}" != "ccc" ]; then continue; fi # Extract the shape chars for m, n, k. chm=${sh:0:1} chn=${sh:1:1} chk=${sh:2:1} # Construct the shape substring (e.g. m6npkp) shstr="" if [ ${chm} = "s" ]; then shstr="${shstr}m${sm}" else shstr="${shstr}mp" fi if [ ${chn} = "s" ]; then shstr="${shstr}n${sn}" else shstr="${shstr}np" fi if [ ${chk} = "s" ]; then shstr="${shstr}k${sk}" else shstr="${shstr}kp" fi # Ex: test_dgemm_nn_rrc_m6npkp_blissup_st.x # Construct the name of the test executable. exec_name="${exec_root}_${dt}${op}_${tr}_${st}_${shstr}_${im}_${th}.x" # Construct the name of the output file. out_file="${out_root}_${th}_${dt}${op}_${tr}_${st}_${shstr}_${im}.m" echo "Running (nt = ${nt}) ./${exec_name} > ${out_file}" # Run executable. ./${exec_name} > ${out_file} sleep ${delay} done done done done done done done done done done blis-0.6.1/test/sup/test_gemm.c000066400000000000000000000420121360743507500163760ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name of The University of Texas nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #ifdef EIGEN #define BLIS_DISABLE_BLAS_DEFS #include "blis.h" #include //#include using namespace Eigen; #else #include "blis.h" #endif //#define PRINT int main( int argc, char** argv ) { bli_init(); #ifndef ERROR_CHECK bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); #endif dim_t n_trials = N_TRIALS; num_t dt = DT; #if 1 dim_t p_begin = P_BEGIN; dim_t p_max = P_MAX; dim_t p_inc = P_INC; #else dim_t p_begin = 4; dim_t p_max = 40; dim_t p_inc = 4; #endif #if 1 dim_t m_input = M_DIM; dim_t n_input = N_DIM; dim_t k_input = K_DIM; #else p_begin = p_inc = 32; dim_t m_input = 6; dim_t n_input = -1; dim_t k_input = -1; #endif #if 1 trans_t transa = TRANSA; trans_t transb = TRANSB; #else trans_t transa = BLIS_NO_TRANSPOSE; trans_t transb = BLIS_NO_TRANSPOSE; #endif #if 1 stor3_t sc = STOR3; #else stor3_t sc = BLIS_RRR; #endif inc_t rs_c, cs_c; inc_t rs_a, cs_a; inc_t rs_b, cs_b; if ( sc == BLIS_RRR ) { rs_c = cs_c = -1; rs_a = cs_a = -1; rs_b = cs_b = -1; } else if ( sc == BLIS_RRC ) { rs_c = cs_c = -1; rs_a = cs_a = -1; rs_b = cs_b = 0; } else if ( sc == BLIS_RCR ) { rs_c = cs_c = -1; rs_a = cs_a = 0; rs_b = cs_b = -1; } else if ( sc == BLIS_RCC ) { rs_c = cs_c = -1; rs_a = cs_a = 0; rs_b = cs_b = 0; } else if ( sc == BLIS_CRR ) { rs_c = cs_c = 0; rs_a = cs_a = -1; rs_b = cs_b = -1; } else if ( sc == BLIS_CRC ) { rs_c = cs_c = 0; rs_a = cs_a = -1; rs_b = cs_b = 0; } else if ( sc == BLIS_CCR ) { rs_c = cs_c = 0; rs_a = cs_a = 0; rs_b = cs_b = -1; } else if ( sc == BLIS_CCC ) { rs_c = cs_c = 0; rs_a = cs_a = 0; rs_b = cs_b = 0; } else { bli_abort(); } f77_int cbla_storage; if ( sc == BLIS_RRR ) cbla_storage = CblasRowMajor; else if ( sc == BLIS_CCC ) cbla_storage = CblasColMajor; else cbla_storage = -1; ( void )cbla_storage; char dt_ch; // Choose the char corresponding to the requested datatype. if ( bli_is_float( dt ) ) dt_ch = 's'; else if ( bli_is_double( dt ) ) dt_ch = 'd'; else if ( bli_is_scomplex( dt ) ) dt_ch = 'c'; else dt_ch = 'z'; f77_char f77_transa; f77_char f77_transb; char transal, transbl; bli_param_map_blis_to_netlib_trans( transa, &f77_transa ); bli_param_map_blis_to_netlib_trans( transb, &f77_transb ); transal = tolower( f77_transa ); transbl = tolower( f77_transb ); f77_int cbla_transa = ( transal == 'n' ? CblasNoTrans : CblasTrans ); f77_int cbla_transb = ( transbl == 'n' ? CblasNoTrans : CblasTrans ); ( void )cbla_transa; ( void )cbla_transb; dim_t p; // Begin with initializing the last entry to zero so that // matlab allocates space for the entire array once up-front. for ( p = p_begin; p + p_inc <= p_max; p += p_inc ) ; printf( "data_%s_%cgemm_%c%c_%s", THR_STR, dt_ch, transal, transbl, STR ); printf( "( %2lu, 1:4 ) = [ %4lu %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin)/p_inc + 1, ( unsigned long )0, ( unsigned long )0, ( unsigned long )0, 0.0 ); //for ( p = p_begin; p <= p_max; p += p_inc ) for ( p = p_max; p_begin <= p; p -= p_inc ) { obj_t a, b, c; obj_t c_save; obj_t alpha, beta; dim_t m, n, k; if ( m_input < 0 ) m = p / ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p / ( dim_t )abs(n_input); else n = ( dim_t ) n_input; if ( k_input < 0 ) k = p / ( dim_t )abs(k_input); else k = ( dim_t ) k_input; bli_obj_create( dt, 1, 1, 0, 0, &alpha ); bli_obj_create( dt, 1, 1, 0, 0, &beta ); bli_obj_create( dt, m, n, rs_c, cs_c, &c ); bli_obj_create( dt, m, n, rs_c, cs_c, &c_save ); if ( bli_does_notrans( transa ) ) bli_obj_create( dt, m, k, rs_a, cs_a, &a ); else bli_obj_create( dt, k, m, rs_a, cs_a, &a ); if ( bli_does_notrans( transb ) ) bli_obj_create( dt, k, n, rs_b, cs_b, &b ); else bli_obj_create( dt, n, k, rs_b, cs_b, &b ); bli_randm( &a ); bli_randm( &b ); bli_randm( &c ); bli_obj_set_conjtrans( transa, &a ); bli_obj_set_conjtrans( transb, &b ); bli_setsc( (1.0/1.0), 0.0, &alpha ); bli_setsc( (1.0/1.0), 0.0, &beta ); bli_copym( &c, &c_save ); #ifdef EIGEN double alpha_r, alpha_i; bli_getsc( &alpha, &alpha_r, &alpha_i ); void* ap = bli_obj_buffer_at_off( &a ); void* bp = bli_obj_buffer_at_off( &b ); void* cp = bli_obj_buffer_at_off( &c ); const int os_a = ( bli_obj_is_col_stored( &a ) ? bli_obj_col_stride( &a ) : bli_obj_row_stride( &a ) ); const int os_b = ( bli_obj_is_col_stored( &b ) ? bli_obj_col_stride( &b ) : bli_obj_row_stride( &b ) ); const int os_c = ( bli_obj_is_col_stored( &c ) ? bli_obj_col_stride( &c ) : bli_obj_row_stride( &c ) ); Stride stride_a( os_a, 1 ); Stride stride_b( os_b, 1 ); Stride stride_c( os_c, 1 ); #if defined(IS_FLOAT) #elif defined (IS_DOUBLE) #ifdef A_STOR_R typedef Matrix MatrixXd_A; #else typedef Matrix MatrixXd_A; #endif #ifdef B_STOR_R typedef Matrix MatrixXd_B; #else typedef Matrix MatrixXd_B; #endif #ifdef C_STOR_R typedef Matrix MatrixXd_C; #else typedef Matrix MatrixXd_C; #endif #ifdef A_NOTRANS // A is not transposed Map > A( ( double* )ap, m, k, stride_a ); #else // A is transposed Map > A( ( double* )ap, k, m, stride_a ); #endif #ifdef B_NOTRANS // B is not transposed Map > B( ( double* )bp, k, n, stride_b ); #else // B is transposed Map > B( ( double* )bp, n, k, stride_b ); #endif Map > C( ( double* )cp, m, n, stride_c ); #endif #endif double dtime_save = DBL_MAX; for ( dim_t r = 0; r < n_trials; ++r ) { bli_copym( &c_save, &c ); double dtime = bli_clock(); #ifdef EIGEN #ifdef A_NOTRANS #ifdef B_NOTRANS C.noalias() += alpha_r * A * B; #else // B_TRANS C.noalias() += alpha_r * A * B.transpose(); #endif #else // A_TRANS #ifdef B_NOTRANS C.noalias() += alpha_r * A.transpose() * B; #else // B_TRANS C.noalias() += alpha_r * A.transpose() * B.transpose(); #endif #endif #endif #ifdef BLIS #ifdef SUP // Allow sup. bli_gemm( &alpha, &a, &b, &beta, &c ); #else // Disable sup and use the expert interface. rntm_t rntm = BLIS_RNTM_INITIALIZER; bli_rntm_disable_l3_sup( &rntm ); bli_gemm_ex( &alpha, &a, &b, &beta, &c, NULL, &rntm ); #endif #endif #ifdef BLAS if ( bli_is_float( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); float* alphap = ( float* )bli_obj_buffer( &alpha ); float* ap = ( float* )bli_obj_buffer( &a ); float* bp = ( float* )bli_obj_buffer( &b ); float* betap = ( float* )bli_obj_buffer( &beta ); float* cp = ( float* )bli_obj_buffer( &c ); #ifdef XSMM libxsmm_sgemm( &f77_transa, #else sgemm_( &f77_transa, #endif &f77_transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_double( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); double* alphap = ( double* )bli_obj_buffer( &alpha ); double* ap = ( double* )bli_obj_buffer( &a ); double* bp = ( double* )bli_obj_buffer( &b ); double* betap = ( double* )bli_obj_buffer( &beta ); double* cp = ( double* )bli_obj_buffer( &c ); #ifdef XSMM libxsmm_dgemm( &f77_transa, #else dgemm_( &f77_transa, #endif &f77_transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_scomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); scomplex* alphap = ( scomplex* )bli_obj_buffer( &alpha ); scomplex* ap = ( scomplex* )bli_obj_buffer( &a ); scomplex* bp = ( scomplex* )bli_obj_buffer( &b ); scomplex* betap = ( scomplex* )bli_obj_buffer( &beta ); scomplex* cp = ( scomplex* )bli_obj_buffer( &c ); #ifdef XSMM libxsmm_cgemm( &f77_transa, #else cgemm_( &f77_transa, #endif &f77_transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_dcomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); dcomplex* alphap = ( dcomplex* )bli_obj_buffer( &alpha ); dcomplex* ap = ( dcomplex* )bli_obj_buffer( &a ); dcomplex* bp = ( dcomplex* )bli_obj_buffer( &b ); dcomplex* betap = ( dcomplex* )bli_obj_buffer( &beta ); dcomplex* cp = ( dcomplex* )bli_obj_buffer( &c ); #ifdef XSMM libxsmm_zgemm( &f77_transa, #else zgemm_( &f77_transa, #endif &f77_transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } #endif #ifdef CBLAS if ( bli_is_float( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int nn = bli_obj_width( &c ); #ifdef C_STOR_R f77_int lda = bli_obj_row_stride( &a ); f77_int ldb = bli_obj_row_stride( &b ); f77_int ldc = bli_obj_row_stride( &c ); #else f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); #endif float* alphap = bli_obj_buffer( &alpha ); float* ap = bli_obj_buffer( &a ); float* bp = bli_obj_buffer( &b ); float* betap = bli_obj_buffer( &beta ); float* cp = bli_obj_buffer( &c ); cblas_sgemm( cbla_storage, cbla_transa, cbla_transb, mm, nn, kk, *alphap, ap, lda, bp, ldb, *betap, cp, ldc ); } else if ( bli_is_double( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int nn = bli_obj_width( &c ); #ifdef C_STOR_R f77_int lda = bli_obj_row_stride( &a ); f77_int ldb = bli_obj_row_stride( &b ); f77_int ldc = bli_obj_row_stride( &c ); #else f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); #endif double* alphap = bli_obj_buffer( &alpha ); double* ap = bli_obj_buffer( &a ); double* bp = bli_obj_buffer( &b ); double* betap = bli_obj_buffer( &beta ); double* cp = bli_obj_buffer( &c ); cblas_dgemm( cbla_storage, cbla_transa, cbla_transb, mm, nn, kk, *alphap, ap, lda, bp, ldb, *betap, cp, ldc ); } else if ( bli_is_scomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int nn = bli_obj_width( &c ); #ifdef C_STOR_R f77_int lda = bli_obj_row_stride( &a ); f77_int ldb = bli_obj_row_stride( &b ); f77_int ldc = bli_obj_row_stride( &c ); #else f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); #endif scomplex* alphap = bli_obj_buffer( &alpha ); scomplex* ap = bli_obj_buffer( &a ); scomplex* bp = bli_obj_buffer( &b ); scomplex* betap = bli_obj_buffer( &beta ); scomplex* cp = bli_obj_buffer( &c ); cblas_cgemm( cbla_storage, cbla_transa, cbla_transb, mm, nn, kk, alphap, ap, lda, bp, ldb, betap, cp, ldc ); } else if ( bli_is_dcomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int nn = bli_obj_width( &c ); #ifdef C_STOR_R f77_int lda = bli_obj_row_stride( &a ); f77_int ldb = bli_obj_row_stride( &b ); f77_int ldc = bli_obj_row_stride( &c ); #else f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); #endif dcomplex* alphap = bli_obj_buffer( &alpha ); dcomplex* ap = bli_obj_buffer( &a ); dcomplex* bp = bli_obj_buffer( &b ); dcomplex* betap = bli_obj_buffer( &beta ); dcomplex* cp = bli_obj_buffer( &c ); cblas_zgemm( cbla_storage, cbla_transa, cbla_transb, mm, nn, kk, alphap, ap, lda, bp, ldb, betap, cp, ldc ); } #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } double gflops = ( 2.0 * m * k * n ) / ( dtime_save * 1.0e9 ); if ( bli_is_complex( dt ) ) gflops *= 4.0; printf( "data_%s_%cgemm_%c%c_%s", THR_STR, dt_ch, transal, transbl, STR ); printf( "( %2lu, 1:4 ) = [ %4lu %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin)/p_inc + 1, ( unsigned long )m, ( unsigned long )n, ( unsigned long )k, gflops ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); } //bli_finalize(); return 0; } blis-0.6.1/test/test_axpyv.c000066400000000000000000000117431360743507500160200ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "blis.h" // n alpha x incx y incy //void daxpyv_( int*, double*, double*, int*, double*, int* ); //#define PRINT int main( int argc, char** argv ) { obj_t x, y; obj_t y_save; obj_t alpha; dim_t n; dim_t p; dim_t p_begin, p_end, p_inc; int n_input; num_t dt_x, dt_y; num_t dt_alpha; int r, n_repeats; num_t dt; double dtime; double dtime_save; double gflops; bli_init(); n_repeats = 3; #ifndef PRINT p_begin = 40; p_end = 4000; p_inc = 40; n_input = -1; #else p_begin = 16; p_end = 16; p_inc = 1; n_input = 15; #endif #if 1 dt = BLIS_FLOAT; //dt = BLIS_DOUBLE; #else //dt = BLIS_SCOMPLEX; dt = BLIS_DCOMPLEX; #endif dt_x = dt_y = dt_alpha = dt; // Begin with initializing the last entry to zero so that // matlab allocates space for the entire array once up-front. for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ; #ifdef BLIS printf( "data_axpyv_blis" ); #else printf( "data_axpyv_%s", BLAS ); #endif printf( "( %2lu, 1:2 ) = [ %4lu %7.2f ];\n", ( unsigned long )(p - p_begin)/p_inc + 1, ( unsigned long )0, 0.0 ); //for ( p = p_begin; p <= p_end; p += p_inc ) for ( p = p_end; p_begin <= p; p -= p_inc ) { if ( n_input < 0 ) n = p * ( dim_t )abs(n_input); else n = ( dim_t ) n_input; bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha ); bli_obj_create( dt_x, n, 1, 0, 0, &x ); bli_obj_create( dt_y, n, 1, 0, 0, &y ); bli_obj_create( dt_y, n, 1, 0, 0, &y_save ); bli_randm( &x ); bli_randm( &y ); bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_copym( &y, &y_save ); dtime_save = 1.0e9; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &y_save, &y ); dtime = bli_clock(); #ifdef PRINT bli_printm( "alpha", &alpha, "%4.1f", "" ); bli_printm( "x", &x, "%4.1f", "" ); bli_printm( "y", &y, "%4.1f", "" ); #endif #ifdef BLIS bli_axpyv( &alpha, &x, &y ); #else if ( bli_is_float( dt ) ) { f77_int nn = bli_obj_length( &x ); f77_int incx = bli_obj_vector_inc( &x ); f77_int incy = bli_obj_vector_inc( &y ); float* alphap = bli_obj_buffer( &alpha ); float* xp = bli_obj_buffer( &x ); float* yp = bli_obj_buffer( &y ); saxpy_( &nn, alphap, xp, &incx, yp, &incy ); } else if ( bli_is_double( dt ) ) { f77_int nn = bli_obj_length( &x ); f77_int incx = bli_obj_vector_inc( &x ); f77_int incy = bli_obj_vector_inc( &y ); double* alphap = bli_obj_buffer( &alpha ); double* xp = bli_obj_buffer( &x ); double* yp = bli_obj_buffer( &y ); daxpy_( &nn, alphap, xp, &incx, yp, &incy ); } #endif #ifdef PRINT bli_printm( "y after", &y, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } gflops = ( 2.0 * n ) / ( dtime_save * 1.0e9 ); #ifdef BLIS printf( "data_axpyv_blis" ); #else printf( "data_axpyv_%s", BLAS ); #endif printf( "( %2lu, 1:2 ) = [ %4lu %7.2f ];\n", ( unsigned long )(p - p_begin)/p_inc + 1, ( unsigned long )n, gflops ); bli_obj_free( &alpha ); bli_obj_free( &x ); bli_obj_free( &y ); bli_obj_free( &y_save ); } bli_finalize(); return 0; } blis-0.6.1/test/test_dotv.c000066400000000000000000000113431360743507500156210ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "blis.h" // res n x incx y incy //double res = ddotv_( int*, double*, int*, double*, int* ); //#define PRINT int main( int argc, char** argv ) { obj_t x, y; obj_t res; dim_t n; dim_t p; dim_t p_begin, p_end, p_inc; int n_input; num_t dt_x, dt_y, dt_res; int r, n_repeats; num_t dt; double dtime; double dtime_save; double gflops; bli_init(); n_repeats = 10; #ifndef PRINT p_begin = 40; p_end = 4000; p_inc = 40; n_input = -1; #else p_begin = 16; p_end = 16; p_inc = 1; n_input = 16; #endif #if 1 dt = BLIS_FLOAT; //dt = BLIS_DOUBLE; #else //dt = BLIS_SCOMPLEX; dt = BLIS_DCOMPLEX; #endif dt_x = dt_y = dt_res = dt; // Begin with initializing the last entry to zero so that // matlab allocates space for the entire array once up-front. for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ; #ifdef BLIS printf( "data_dotv_blis" ); #else printf( "data_dotv_%s", BLAS ); #endif printf( "( %2lu, 1:2 ) = [ %4lu %7.2f ];\n", ( unsigned long )(p - p_begin)/p_inc + 1, ( unsigned long )0, 0.0 ); //for ( p = p_begin; p <= p_end; p += p_inc ) for ( p = p_end; p_begin <= p; p -= p_inc ) { if ( n_input < 0 ) n = p * ( dim_t )abs(n_input); else n = ( dim_t ) n_input; bli_obj_create( dt_x, n, 1, 0, 0, &x ); bli_obj_create( dt_y, n, 1, 0, 0, &y ); bli_obj_create( dt_res, 1, 1, 0, 0, &res ); bli_randm( &x ); bli_randm( &y ); dtime_save = 1.0e9; for ( r = 0; r < n_repeats; ++r ) { dtime = bli_clock(); #ifdef PRINT bli_printm( "x", &x, "%4.1f", "" ); bli_printm( "y", &y, "%4.1f", "" ); #endif #ifdef BLIS bli_dotv( &x, &y, &res); #else if ( bli_is_float( dt ) ) { f77_int nn = bli_obj_length( &x ); f77_int incx = bli_obj_vector_inc( &x ); f77_int incy = bli_obj_vector_inc( &y ); float* xp = bli_obj_buffer( &x ); float* yp = bli_obj_buffer( &y ); float* resp = bli_obj_buffer( &res ); *resp = sdot_( &nn, xp, &incx, yp, &incy ); } else if ( bli_is_double( dt ) ) { f77_int nn = bli_obj_length( &x ); f77_int incx = bli_obj_vector_inc( &x ); f77_int incy = bli_obj_vector_inc( &y ); double* xp = bli_obj_buffer( &x ); double* yp = bli_obj_buffer( &y ); double* resp = bli_obj_buffer( &res ); *resp = ddot_( &nn, xp, &incx, yp, &incy ); } #endif #ifdef PRINT bli_printm( "res after", &res, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } gflops = ( 2.0 * n ) / ( dtime_save * 1.0e9 ); #ifdef BLIS printf( "data_dotv_blis" ); #else printf( "data_dotv_%s", BLAS ); #endif printf( "( %2lu, 1:2 ) = [ %4lu %7.2f ];\n", ( unsigned long )(p - p_begin)/p_inc + 1, ( unsigned long )n, gflops ); bli_obj_free( &x ); bli_obj_free( &y ); bli_obj_free( &res ); } bli_finalize(); return 0; } blis-0.6.1/test/test_gemm.c000066400000000000000000000177001360743507500155750ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name of The University of Texas nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "blis.h" //#define PRINT int main( int argc, char** argv ) { obj_t a, b, c; obj_t c_save; obj_t alpha, beta; dim_t m, n, k; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, n_input, k_input; num_t dt; int r, n_repeats; trans_t transa; trans_t transb; f77_char f77_transa; f77_char f77_transb; double dtime; double dtime_save; double gflops; //bli_init(); //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); n_repeats = 3; #ifndef PRINT p_begin = 200; p_end = 2000; p_inc = 200; m_input = -1; n_input = -1; k_input = -1; #else p_begin = 16; p_end = 16; p_inc = 1; m_input = 5; k_input = 6; n_input = 4; #endif #if 1 //dt = BLIS_FLOAT; dt = BLIS_DOUBLE; #else //dt = BLIS_SCOMPLEX; dt = BLIS_DCOMPLEX; #endif transa = BLIS_NO_TRANSPOSE; transb = BLIS_NO_TRANSPOSE; bli_param_map_blis_to_netlib_trans( transa, &f77_transa ); bli_param_map_blis_to_netlib_trans( transb, &f77_transb ); // Begin with initializing the last entry to zero so that // matlab allocates space for the entire array once up-front. for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ; #ifdef BLIS printf( "data_gemm_blis" ); #else printf( "data_gemm_%s", BLAS ); #endif printf( "( %2lu, 1:4 ) = [ %4lu %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin)/p_inc + 1, ( unsigned long )0, ( unsigned long )0, ( unsigned long )0, 0.0 ); //for ( p = p_begin; p <= p_end; p += p_inc ) for ( p = p_end; p_begin <= p; p -= p_inc ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p * ( dim_t )abs(n_input); else n = ( dim_t ) n_input; if ( k_input < 0 ) k = p * ( dim_t )abs(k_input); else k = ( dim_t ) k_input; bli_obj_create( dt, 1, 1, 0, 0, &alpha ); bli_obj_create( dt, 1, 1, 0, 0, &beta ); bli_obj_create( dt, m, k, 0, 0, &a ); bli_obj_create( dt, k, n, 0, 0, &b ); bli_obj_create( dt, m, n, 0, 0, &c ); bli_obj_create( dt, m, n, 0, 0, &c_save ); bli_randm( &a ); bli_randm( &b ); bli_randm( &c ); bli_obj_set_conjtrans( transa, &a ); bli_obj_set_conjtrans( transb, &b ); bli_setsc( (0.9/1.0), 0.2, &alpha ); bli_setsc( -(1.1/1.0), 0.3, &beta ); bli_copym( &c, &c_save ); dtime_save = DBL_MAX; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "b", &b, "%4.1f", "" ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS bli_gemm( &alpha, &a, &b, &beta, &c ); #else if ( bli_is_float( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); float* alphap = bli_obj_buffer( &alpha ); float* ap = bli_obj_buffer( &a ); float* bp = bli_obj_buffer( &b ); float* betap = bli_obj_buffer( &beta ); float* cp = bli_obj_buffer( &c ); sgemm_( &f77_transa, &f77_transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_double( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); double* alphap = bli_obj_buffer( &alpha ); double* ap = bli_obj_buffer( &a ); double* bp = bli_obj_buffer( &b ); double* betap = bli_obj_buffer( &beta ); double* cp = bli_obj_buffer( &c ); dgemm_( &f77_transa, &f77_transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_scomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); scomplex* alphap = bli_obj_buffer( &alpha ); scomplex* ap = bli_obj_buffer( &a ); scomplex* bp = bli_obj_buffer( &b ); scomplex* betap = bli_obj_buffer( &beta ); scomplex* cp = bli_obj_buffer( &c ); cgemm_( &f77_transa, &f77_transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_dcomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); dcomplex* alphap = bli_obj_buffer( &alpha ); dcomplex* ap = bli_obj_buffer( &a ); dcomplex* bp = bli_obj_buffer( &b ); dcomplex* betap = bli_obj_buffer( &beta ); dcomplex* cp = bli_obj_buffer( &c ); zgemm_( &f77_transa, &f77_transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } #endif #ifdef PRINT bli_printm( "c after", &c, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } gflops = ( 2.0 * m * k * n ) / ( dtime_save * 1.0e9 ); if ( bli_is_complex( dt ) ) gflops *= 4.0; #ifdef BLIS printf( "data_gemm_blis" ); #else printf( "data_gemm_%s", BLAS ); #endif printf( "( %2lu, 1:4 ) = [ %4lu %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin)/p_inc + 1, ( unsigned long )m, ( unsigned long )k, ( unsigned long )n, gflops ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); } //bli_finalize(); return 0; } blis-0.6.1/test/test_gemv.c000066400000000000000000000123061360743507500156030ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "blis.h" // transa m n alpha a lda x incx beta y incy //void dgemv_( char*, int*, int*, double*, double*, int*, double*, int*, double*, double*, int* ); //#define PRINT int main( int argc, char** argv ) { obj_t a, x, y; obj_t y_save; obj_t alpha, beta; dim_t m, n; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, n_input; num_t dt_a, dt_x, dt_y; num_t dt_alpha, dt_beta; int r, n_repeats; double dtime; double dtime_save; double gflops; //bli_init(); n_repeats = 3; #ifndef PRINT p_begin = 40; p_end = 2000; p_inc = 40; m_input = -1; n_input = -1; #else p_begin = 16; p_end = 16; p_inc = 1; m_input = 15; n_input = 15; #endif dt_a = dt_x = dt_y = dt_alpha = dt_beta = BLIS_DOUBLE; // Begin with initializing the last entry to zero so that // matlab allocates space for the entire array once up-front. for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ; #ifdef BLIS printf( "data_gemv_blis" ); #else printf( "data_gemv_%s", BLAS ); #endif printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin)/p_inc + 1, ( unsigned long )0, ( unsigned long )0, 0.0 ); //for ( p = p_begin; p <= p_end; p += p_inc ) for ( p = p_end; p_begin <= p; p -= p_inc ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p * ( dim_t )abs(n_input); else n = ( dim_t ) n_input; bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha ); bli_obj_create( dt_beta, 1, 1, 0, 0, &beta ); bli_obj_create( dt_a, m, n, 0, 0, &a ); bli_obj_create( dt_x, n, 1, 0, 0, &x ); bli_obj_create( dt_y, m, 1, 0, 0, &y ); bli_obj_create( dt_y, m, 1, 0, 0, &y_save ); bli_randm( &a ); bli_randm( &x ); bli_randm( &y ); bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_setsc( -(1.0/1.0), 0.0, &beta ); bli_copym( &y, &y_save ); dtime_save = DBL_MAX; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &y_save, &y ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "x", &x, "%4.1f", "" ); bli_printm( "y", &y, "%4.1f", "" ); #endif #ifdef BLIS bli_gemv( &alpha, &a, &x, &beta, &y ); #else f77_char transa = 'N'; f77_int mm = bli_obj_length( &a ); f77_int nn = bli_obj_width( &a ); f77_int lda = bli_obj_col_stride( &a ); f77_int incx = bli_obj_vector_inc( &x ); f77_int incy = bli_obj_vector_inc( &y ); double* alphap = bli_obj_buffer( &alpha ); double* ap = bli_obj_buffer( &a ); double* xp = bli_obj_buffer( &x ); double* betap = bli_obj_buffer( &beta ); double* yp = bli_obj_buffer( &y ); dgemv_( &transa, &mm, &nn, alphap, ap, &lda, xp, &incx, betap, yp, &incy ); #endif #ifdef PRINT bli_printm( "y after", &y, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } gflops = ( 2.0 * m * n ) / ( dtime_save * 1.0e9 ); #ifdef BLIS printf( "data_gemv_blis" ); #else printf( "data_gemv_%s", BLAS ); #endif printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin)/p_inc + 1, ( unsigned long )m, ( unsigned long )n, gflops ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &x ); bli_obj_free( &y ); bli_obj_free( &y_save ); } //bli_finalize(); return 0; } blis-0.6.1/test/test_ger.c000066400000000000000000000116111360743507500154200ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "blis.h" // m n alpha x incx y incy a lda //void dger_( int*, int*, double*, double*, int*, double*, int*, double*, int* ); //#define PRINT int main( int argc, char** argv ) { obj_t a, x, y; obj_t a_save; obj_t alpha; dim_t m, n; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, n_input; num_t dt_a, dt_x, dt_y; num_t dt_alpha; int r, n_repeats; double dtime; double dtime_save; double gflops; //bli_init(); n_repeats = 3; #ifndef PRINT p_begin = 40; p_end = 2000; p_inc = 40; m_input = -1; n_input = -1; #else p_begin = 16; p_end = 16; p_inc = 1; m_input = 15; n_input = 15; #endif dt_alpha = dt_x = dt_y = dt_a = BLIS_DOUBLE; // Begin with initializing the last entry to zero so that // matlab allocates space for the entire array once up-front. for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ; #ifdef BLIS printf( "data_ger_blis" ); #else printf( "data_ger_%s", BLAS ); #endif printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin)/p_inc + 1, ( unsigned long )0, ( unsigned long )0, 0.0 ); //for ( p = p_begin; p <= p_end; p += p_inc ) for ( p = p_end; p_begin <= p; p -= p_inc ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p * ( dim_t )abs(n_input); else n = ( dim_t ) n_input; bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha ); bli_obj_create( dt_x, m, 1, 0, 0, &x ); bli_obj_create( dt_y, n, 1, 0, 0, &y ); bli_obj_create( dt_a, m, n, 0, 0, &a ); bli_obj_create( dt_a, m, n, 0, 0, &a_save ); bli_randm( &x ); bli_randm( &y ); bli_randm( &a ); bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_copym( &a, &a_save ); dtime_save = DBL_MAX; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &a_save, &a ); dtime = bli_clock(); #ifdef PRINT bli_printm( "x", &x, "%4.1f", "" ); bli_printm( "y", &y, "%4.1f", "" ); bli_printm( "a", &a, "%4.1f", "" ); #endif #ifdef BLIS bli_ger( &alpha, &x, &y, &a ); #else f77_int mm = bli_obj_length( &a ); f77_int nn = bli_obj_width( &a ); f77_int incx = bli_obj_vector_inc( &x ); f77_int incy = bli_obj_vector_inc( &y ); f77_int lda = bli_obj_col_stride( &a ); double* alphap = bli_obj_buffer( &alpha ); double* xp = bli_obj_buffer( &x ); double* yp = bli_obj_buffer( &y ); double* ap = bli_obj_buffer( &a ); dger_( &mm, &nn, alphap, xp, &incx, yp, &incy, ap, &lda ); #endif #ifdef PRINT bli_printm( "a after", &a, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } gflops = ( 2.0 * m * n ) / ( dtime_save * 1.0e9 ); #ifdef BLIS printf( "data_ger_blis" ); #else printf( "data_ger_%s", BLAS ); #endif printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin)/p_inc + 1, ( unsigned long )m, ( unsigned long )n, gflops ); bli_obj_free( &alpha ); bli_obj_free( &x ); bli_obj_free( &y ); bli_obj_free( &a ); bli_obj_free( &a_save ); } //bli_finalize(); return 0; } blis-0.6.1/test/test_hemm.c000066400000000000000000000201611360743507500155710ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "blis.h" //#define PRINT int main( int argc, char** argv ) { obj_t a, b, c; obj_t c_save; obj_t alpha, beta; dim_t m, n; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, n_input; num_t dt; int r, n_repeats; side_t side; uplo_t uploa; f77_char f77_side; f77_char f77_uploa; double dtime; double dtime_save; double gflops; //bli_init(); //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); n_repeats = 3; #ifndef PRINT p_begin = 200; p_end = 2000; p_inc = 200; m_input = -1; n_input = -1; #else p_begin = 16; p_end = 16; p_inc = 1; m_input = 4; n_input = 4; #endif #if 1 //dt = BLIS_FLOAT; dt = BLIS_DOUBLE; #else //dt = BLIS_SCOMPLEX; dt = BLIS_DCOMPLEX; #endif side = BLIS_LEFT; //side = BLIS_RIGHT; uploa = BLIS_LOWER; //uploa = BLIS_UPPER; bli_param_map_blis_to_netlib_side( side, &f77_side ); bli_param_map_blis_to_netlib_uplo( uploa, &f77_uploa ); // Begin with initializing the last entry to zero so that // matlab allocates space for the entire array once up-front. for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ; #ifdef BLIS printf( "data_hemm_blis" ); #else printf( "data_hemm_%s", BLAS ); #endif printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin)/p_inc + 1, ( unsigned long )0, ( unsigned long )0, 0.0 ); //for ( p = p_begin; p <= p_end; p += p_inc ) for ( p = p_end; p_begin <= p; p -= p_inc ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p * ( dim_t )abs(n_input); else n = ( dim_t ) n_input; bli_obj_create( dt, 1, 1, 0, 0, &alpha ); bli_obj_create( dt, 1, 1, 0, 0, &beta ); if ( bli_is_left( side ) ) bli_obj_create( dt, m, m, 0, 0, &a ); else bli_obj_create( dt, n, n, 0, 0, &a ); bli_obj_create( dt, m, n, 0, 0, &b ); bli_obj_create( dt, m, n, 0, 0, &c ); bli_obj_create( dt, m, n, 0, 0, &c_save ); bli_randm( &a ); bli_randm( &b ); bli_randm( &c ); bli_obj_set_struc( BLIS_HERMITIAN, &a ); bli_obj_set_uplo( uploa, &a ); // Randomize A, make it densely Hermitian, and zero the unstored // triangle to ensure the implementation reads only from the stored // region. bli_randm( &a ); bli_mkherm( &a ); bli_mktrim( &a ); /* bli_obj_toggle_uplo( &a ); bli_obj_inc_diag_offset( 1, &a ); bli_setm( &BLIS_ZERO, &a ); bli_obj_inc_diag_offset( -1, &a ); bli_obj_toggle_uplo( &a ); bli_obj_set_diag( BLIS_NONUNIT_DIAG, &a ); bli_scalm( &BLIS_TWO, &a ); bli_scalm( &BLIS_TWO, &a ); */ bli_setsc( (2.0/1.0), 1.0, &alpha ); bli_setsc( -(1.0/1.0), 0.0, &beta ); bli_copym( &c, &c_save ); dtime_save = DBL_MAX; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "b", &b, "%4.1f", "" ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS bli_hemm( side, &alpha, &a, &b, &beta, &c ); #else if ( bli_is_float( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); float* alphap = bli_obj_buffer( &alpha ); float* ap = bli_obj_buffer( &a ); float* bp = bli_obj_buffer( &b ); float* betap = bli_obj_buffer( &beta ); float* cp = bli_obj_buffer( &c ); ssymm_( &f77_side, &f77_uploa, &mm, &nn, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_double( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); double* alphap = bli_obj_buffer( &alpha ); double* ap = bli_obj_buffer( &a ); double* bp = bli_obj_buffer( &b ); double* betap = bli_obj_buffer( &beta ); double* cp = bli_obj_buffer( &c ); dsymm_( &f77_side, &f77_uploa, &mm, &nn, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_scomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); scomplex* alphap = bli_obj_buffer( &alpha ); scomplex* ap = bli_obj_buffer( &a ); scomplex* bp = bli_obj_buffer( &b ); scomplex* betap = bli_obj_buffer( &beta ); scomplex* cp = bli_obj_buffer( &c ); chemm_( &f77_side, &f77_uploa, &mm, &nn, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_dcomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); dcomplex* alphap = bli_obj_buffer( &alpha ); dcomplex* ap = bli_obj_buffer( &a ); dcomplex* bp = bli_obj_buffer( &b ); dcomplex* betap = bli_obj_buffer( &beta ); dcomplex* cp = bli_obj_buffer( &c ); zhemm_( &f77_side, &f77_uploa, &mm, &nn, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } #endif #ifdef PRINT bli_printm( "c after", &c, "%9.5f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } if ( bli_is_left( side ) ) gflops = ( 2.0 * m * m * n ) / ( dtime_save * 1.0e9 ); else gflops = ( 2.0 * m * n * n ) / ( dtime_save * 1.0e9 ); if ( bli_is_complex( dt ) ) gflops *= 4.0; #ifdef BLIS printf( "data_hemm_blis" ); #else printf( "data_hemm_%s", BLAS ); #endif printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin)/p_inc + 1, ( unsigned long )m, ( unsigned long )n, gflops ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); } //bli_finalize(); return 0; } blis-0.6.1/test/test_hemv.c000066400000000000000000000123501360743507500156030ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "blis.h" // uploa m alpha a lda x incx beta y incy //void dsymv_( char*, int*, double*, double*, int*, double*, int*, double*, double*, int* ); //#define PRINT int main( int argc, char** argv ) { obj_t a, x, y; obj_t y_save; obj_t alpha, beta; dim_t m; dim_t p; dim_t p_begin, p_end, p_inc; int m_input; num_t dt_a, dt_x, dt_y; num_t dt_alpha, dt_beta; int r, n_repeats; uplo_t uplo; double dtime; double dtime_save; double gflops; //bli_init(); n_repeats = 3; #ifndef PRINT p_begin = 40; p_end = 2000; p_inc = 40; m_input = -1; #else p_begin = 16; p_end = 16; p_inc = 1; m_input = 6; #endif #if 1 dt_a = dt_x = dt_y = dt_alpha = dt_beta = BLIS_DOUBLE; #else dt_a = dt_x = dt_y = dt_alpha = dt_beta = BLIS_DCOMPLEX; #endif uplo = BLIS_LOWER; // Begin with initializing the last entry to zero so that // matlab allocates space for the entire array once up-front. for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ; #ifdef BLIS printf( "data_hemv_blis" ); #else printf( "data_hemv_%s", BLAS ); #endif printf( "( %2lu, 1:2 ) = [ %4lu %7.2f ];\n", ( unsigned long )(p - p_begin)/p_inc + 1, ( unsigned long )0, 0.0 ); //for ( p = p_begin; p <= p_end; p += p_inc ) for ( p = p_end; p_begin <= p; p -= p_inc ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha ); bli_obj_create( dt_beta, 1, 1, 0, 0, &beta ); bli_obj_create( dt_a, m, m, 0, 0, &a ); bli_obj_create( dt_x, m, 1, 0, 0, &x ); bli_obj_create( dt_y, m, 1, 0, 0, &y ); bli_obj_create( dt_y, m, 1, 0, 0, &y_save ); bli_randm( &a ); bli_randm( &x ); bli_randm( &y ); bli_obj_set_struc( BLIS_HERMITIAN, &a ); //bli_obj_set_struc( BLIS_SYMMETRIC, &a ); bli_obj_set_uplo( uplo, &a ); bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_setsc( -(1.0/1.0), 0.0, &beta ); bli_copym( &y, &y_save ); dtime_save = DBL_MAX; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &y_save, &y ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "x", &x, "%4.1f", "" ); bli_printm( "y", &y, "%4.1f", "" ); #endif #ifdef BLIS //bli_obj_toggle_conj( &a ); //bli_obj_toggle_conj( &x ); //bli_symv( &alpha, bli_hemv( &alpha, &a, &x, &beta, &y ); #else f77_char uploa = 'L'; f77_int mm = bli_obj_length( &a ); f77_int lda = bli_obj_col_stride( &a ); f77_int incx = bli_obj_vector_inc( &x ); f77_int incy = bli_obj_vector_inc( &y ); double* alphap = bli_obj_buffer( &alpha ); double* ap = bli_obj_buffer( &a ); double* xp = bli_obj_buffer( &x ); double* betap = bli_obj_buffer( &beta ); double* yp = bli_obj_buffer( &y ); dsymv_( &uploa, &mm, alphap, ap, &lda, xp, &incx, betap, yp, &incy ); #endif #ifdef PRINT bli_printm( "y after", &y, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } gflops = ( 2.0 * m * m ) / ( dtime_save * 1.0e9 ); #ifdef BLIS printf( "data_hemv_blis" ); #else printf( "data_hemv_%s", BLAS ); #endif printf( "( %2lu, 1:2 ) = [ %4lu %7.2f ];\n", ( unsigned long )(p - p_begin)/p_inc + 1, ( unsigned long )m, gflops ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &x ); bli_obj_free( &y ); bli_obj_free( &y_save ); } //bli_finalize(); return 0; } blis-0.6.1/test/test_her.c000066400000000000000000000115021360743507500154200ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "blis.h" // uplo m alpha x incx a lda //void dsyr_( char*, int*, double*, double*, int*, double*, int* ); //void zher_( char*, int*, double*, dcomplex*, int*, dcomplex*, int* ); //#define PRINT int main( int argc, char** argv ) { obj_t a, x; obj_t a_save; obj_t alpha; dim_t m; dim_t p; dim_t p_begin, p_end, p_inc; int m_input; num_t dt_a, dt_x; num_t dt_alpha; int r, n_repeats; uplo_t uplo; double dtime; double dtime_save; double gflops; //bli_init(); n_repeats = 3; #ifndef PRINT p_begin = 40; p_end = 2000; p_inc = 40; m_input = -1; #else p_begin = 16; p_end = 16; p_inc = 1; m_input = 6; #endif #if 1 dt_alpha = dt_x = dt_a = BLIS_DOUBLE; #else dt_alpha = dt_x = dt_a = BLIS_DCOMPLEX; #endif uplo = BLIS_LOWER; // Begin with initializing the last entry to zero so that // matlab allocates space for the entire array once up-front. for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ; #ifdef BLIS printf( "data_her_blis" ); #else printf( "data_her_%s", BLAS ); #endif printf( "( %2lu, 1:2 ) = [ %4lu %7.2f ];\n", ( unsigned long )(p - p_begin)/p_inc + 1, ( unsigned long )0, 0.0 ); //for ( p = p_begin; p <= p_end; p += p_inc ) for ( p = p_end; p_begin <= p; p -= p_inc ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha ); bli_obj_create( dt_x, m, 1, 0, 0, &x ); bli_obj_create( dt_a, m, m, 0, 0, &a ); bli_obj_create( dt_a, m, m, 0, 0, &a_save ); bli_randm( &x ); bli_randm( &a ); bli_obj_set_struc( BLIS_HERMITIAN, &a ); //bli_obj_set_struc( BLIS_SYMMETRIC, &a ); bli_obj_set_uplo( uplo, &a ); bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_copym( &a, &a_save ); dtime_save = DBL_MAX; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &a_save, &a ); dtime = bli_clock(); #ifdef PRINT bli_printm( "x", &x, "%4.1f", "" ); bli_printm( "a", &a, "%4.1f", "" ); #endif #ifdef BLIS //bli_obj_toggle_conj( &x ); //bli_syr( &alpha, bli_her( &alpha, &x, &a ); #else f77_char uplo = 'L'; f77_int mm = bli_obj_length( &a ); f77_int incx = bli_obj_vector_inc( &x ); f77_int lda = bli_obj_col_stride( &a ); double* alphap = bli_obj_buffer( &alpha ); double* xp = bli_obj_buffer( &x ); double* ap = bli_obj_buffer( &a ); /* dcomplex* xp = bli_obj_buffer( x ); dcomplex* ap = bli_obj_buffer( &a ); */ dsyr_( &uplo, //zher_( &uplo, &mm, alphap, xp, &incx, ap, &lda ); #endif #ifdef PRINT bli_printm( "a after", &a, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } gflops = ( 1.0 * m * m ) / ( dtime_save * 1.0e9 ); #ifdef BLIS printf( "data_her_blis" ); #else printf( "data_her_%s", BLAS ); #endif printf( "( %2lu, 1:2 ) = [ %4lu %7.2f ];\n", ( unsigned long )(p - p_begin)/p_inc + 1, ( unsigned long )m, gflops ); bli_obj_free( &alpha ); bli_obj_free( &x ); bli_obj_free( &a ); bli_obj_free( &a_save ); } //bli_finalize(); return 0; } blis-0.6.1/test/test_her2.c000066400000000000000000000117571360743507500155160ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "blis.h" // uplo m alpha x incx y incy a lda //void dsyr2_( char*, int*, double*, double*, int*, double*, int*, double*, int* ); //#define PRINT int main( int argc, char** argv ) { obj_t a, x, y; obj_t a_save; obj_t alpha; dim_t m; dim_t p; dim_t p_begin, p_end, p_inc; int m_input; num_t dt_a, dt_x, dt_y; num_t dt_alpha; int r, n_repeats; uplo_t uplo; double dtime; double dtime_save; double gflops; //bli_init(); n_repeats = 3; #ifndef PRINT p_begin = 40; p_end = 2000; p_inc = 40; m_input = -1; #else p_begin = 16; p_end = 16; p_inc = 1; m_input = 6; #endif #if 1 dt_alpha = dt_x = dt_y = dt_a = BLIS_DOUBLE; #else dt_alpha = dt_x = dt_y = dt_a = BLIS_DCOMPLEX; #endif uplo = BLIS_LOWER; // Begin with initializing the last entry to zero so that // matlab allocates space for the entire array once up-front. for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ; #ifdef BLIS printf( "data_her2_blis" ); #else printf( "data_her2_%s", BLAS ); #endif printf( "( %2lu, 1:2 ) = [ %4lu %7.2f ];\n", ( unsigned long )(p - p_begin)/p_inc + 1, ( unsigned long )0, 0.0 ); //for ( p = p_begin; p <= p_end; p += p_inc ) for ( p = p_end; p_begin <= p; p -= p_inc ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha ); bli_obj_create( dt_x, m, 1, 0, 0, &x ); bli_obj_create( dt_y, m, 1, 0, 0, &y ); bli_obj_create( dt_a, m, m, 0, 0, &a ); bli_obj_create( dt_a, m, m, 0, 0, &a_save ); bli_randm( &x ); bli_randm( &y ); bli_randm( &a ); bli_obj_set_struc( BLIS_HERMITIAN, &a ); //bli_obj_set_struc( BLIS_SYMMETRIC, &a ); bli_obj_set_uplo( uplo, &a ); bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_copym( &a, &a_save ); dtime_save = DBL_MAX; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &a_save, &a ); dtime = bli_clock(); #ifdef PRINT bli_printm( "x", &x, "%4.1f", "" ); bli_printm( "y", &y, "%4.1f", "" ); bli_printm( "a", &a, "%4.1f", "" ); #endif #ifdef BLIS //bli_obj_toggle_conj( &x ); //bli_obj_toggle_conj( &y ); //bli_syr2( &alpha, bli_her2( &alpha, &x, &y, &a ); #else f77_char uplo = 'L'; f77_int mm = bli_obj_length( &a ); f77_int incx = bli_obj_vector_inc( &x ); f77_int incy = bli_obj_vector_inc( &y ); f77_int lda = bli_obj_col_stride( &a ); double* alphap = bli_obj_buffer( &alpha ); double* xp = bli_obj_buffer( &x ); double* yp = bli_obj_buffer( &y ); double* ap = bli_obj_buffer( &a ); dsyr2_( &uplo, &mm, alphap, xp, &incx, yp, &incy, ap, &lda ); #endif #ifdef PRINT bli_printm( "a after", &a, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } gflops = ( 2.0 * m * m ) / ( dtime_save * 1.0e9 ); #ifdef BLIS printf( "data_her2_blis" ); #else printf( "data_her2_%s", BLAS ); #endif printf( "( %2lu, 1:2 ) = [ %4lu %7.2f ];\n", ( unsigned long )(p - p_begin)/p_inc + 1, ( unsigned long )m, gflops ); bli_obj_free( &alpha ); bli_obj_free( &x ); bli_obj_free( &y ); bli_obj_free( &a ); bli_obj_free( &a_save ); } //bli_finalize(); return 0; } blis-0.6.1/test/test_her2k.c000066400000000000000000000173741360743507500156720ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "blis.h" //#define PRINT int main( int argc, char** argv ) { obj_t a, b, c; obj_t c_save; obj_t alpha, beta; dim_t m, k; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, k_input; num_t dt; int r, n_repeats; uplo_t uploc; trans_t transa; f77_char f77_uploc; f77_char f77_transa; double dtime; double dtime_save; double gflops; //bli_init(); //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); n_repeats = 3; #ifndef PRINT p_begin = 200; p_end = 2000; p_inc = 200; m_input = -1; k_input = -1; #else p_begin = 16; p_end = 16; p_inc = 1; m_input = 3; k_input = 1; #endif #if 1 //dt = BLIS_FLOAT; dt = BLIS_DOUBLE; #else //dt = BLIS_SCOMPLEX; dt = BLIS_DCOMPLEX; #endif uploc = BLIS_LOWER; //uploc = BLIS_UPPER; transa = BLIS_NO_TRANSPOSE; bli_param_map_blis_to_netlib_uplo( uploc, &f77_uploc ); bli_param_map_blis_to_netlib_trans( transa, &f77_transa ); // Begin with initializing the last entry to zero so that // matlab allocates space for the entire array once up-front. for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ; #ifdef BLIS printf( "data_her2k_blis" ); #else printf( "data_her2k_%s", BLAS ); #endif printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin)/p_inc + 1, ( unsigned long )0, ( unsigned long )0, 0.0 ); //for ( p = p_begin; p <= p_end; p += p_inc ) for ( p = p_end; p_begin <= p; p -= p_inc ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( k_input < 0 ) k = p * ( dim_t )abs(k_input); else k = ( dim_t ) k_input; bli_obj_create( dt, 1, 1, 0, 0, &alpha ); bli_obj_create( dt, 1, 1, 0, 0, &beta ); if ( bli_does_trans( transa ) ) { bli_obj_create( dt, k, m, 0, 0, &a ); bli_obj_create( dt, k, m, 0, 0, &b ); } else { bli_obj_create( dt, m, k, 0, 0, &a ); bli_obj_create( dt, m, k, 0, 0, &b ); } bli_obj_create( dt, m, m, 0, 0, &c ); bli_obj_create( dt, m, m, 0, 0, &c_save ); bli_randm( &a ); bli_randm( &b ); bli_randm( &c ); bli_obj_set_struc( BLIS_HERMITIAN, &c ); bli_obj_set_uplo( uploc, &c ); bli_obj_set_conjtrans( transa, &a ); bli_obj_set_conjtrans( transa, &b ); bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_setsc( -(1.0/1.0), 0.0, &beta ); bli_copym( &c, &c_save ); dtime_save = DBL_MAX; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "b", &b, "%4.1f", "" ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS bli_her2k( &alpha, &a, &b, &beta, &c ); #else if ( bli_is_float( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); float* alphap = bli_obj_buffer( &alpha ); float* ap = bli_obj_buffer( &a ); float* bp = bli_obj_buffer( &b ); float* betap = bli_obj_buffer( &beta ); float* cp = bli_obj_buffer( &c ); ssyr2k_( &f77_uploc, &f77_transa, &mm, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_double( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); double* alphap = bli_obj_buffer( &alpha ); double* ap = bli_obj_buffer( &a ); double* bp = bli_obj_buffer( &b ); double* betap = bli_obj_buffer( &beta ); double* cp = bli_obj_buffer( &c ); dsyr2k_( &f77_uploc, &f77_transa, &mm, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_scomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); scomplex* alphap = bli_obj_buffer( &alpha ); scomplex* ap = bli_obj_buffer( &a ); scomplex* bp = bli_obj_buffer( &b ); float* betap = bli_obj_buffer( &beta ); scomplex* cp = bli_obj_buffer( &c ); cher2k_( &f77_uploc, &f77_transa, &mm, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_dcomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); dcomplex* alphap = bli_obj_buffer( &alpha ); dcomplex* ap = bli_obj_buffer( &a ); dcomplex* bp = bli_obj_buffer( &b ); double* betap = bli_obj_buffer( &beta ); dcomplex* cp = bli_obj_buffer( &c ); zher2k_( &f77_uploc, &f77_transa, &mm, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } #endif #ifdef PRINT bli_printm( "c after", &c, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } gflops = ( 2.0 * m * k * m ) / ( dtime_save * 1.0e9 ); if ( bli_is_complex( dt ) ) gflops *= 4.0; #ifdef BLIS printf( "data_her2k_blis" ); #else printf( "data_her2k_%s", BLAS ); #endif printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin)/p_inc + 1, ( unsigned long )m, ( unsigned long )k, gflops ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &c ); bli_obj_free( &c_save ); } //bli_finalize(); return 0; } blis-0.6.1/test/test_herk.c000066400000000000000000000160701360743507500156000ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "blis.h" //#define PRINT int main( int argc, char** argv ) { obj_t a, c; obj_t c_save; obj_t alpha, beta; dim_t m, k; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, k_input; num_t dt; int r, n_repeats; uplo_t uploc; trans_t transa; f77_char f77_uploc; f77_char f77_transa; double dtime; double dtime_save; double gflops; //bli_init(); //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); n_repeats = 3; #ifndef PRINT p_begin = 200; p_end = 2000; p_inc = 200; m_input = -1; k_input = -1; #else p_begin = 16; p_end = 16; p_inc = 1; m_input = 3; k_input = 1; #endif #if 1 //dt = BLIS_FLOAT; dt = BLIS_DOUBLE; #else //dt = BLIS_SCOMPLEX; dt = BLIS_DCOMPLEX; #endif uploc = BLIS_LOWER; //uploc = BLIS_UPPER; transa = BLIS_NO_TRANSPOSE; bli_param_map_blis_to_netlib_uplo( uploc, &f77_uploc ); bli_param_map_blis_to_netlib_trans( transa, &f77_transa ); // Begin with initializing the last entry to zero so that // matlab allocates space for the entire array once up-front. for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ; #ifdef BLIS printf( "data_herk_blis" ); #else printf( "data_herk_%s", BLAS ); #endif printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin)/p_inc + 1, ( unsigned long )0, ( unsigned long )0, 0.0 ); //for ( p = p_begin; p <= p_end; p += p_inc ) for ( p = p_end; p_begin <= p; p -= p_inc ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( k_input < 0 ) k = p * ( dim_t )abs(k_input); else k = ( dim_t ) k_input; bli_obj_create( dt, 1, 1, 0, 0, &alpha ); bli_obj_create( dt, 1, 1, 0, 0, &beta ); if ( bli_does_trans( transa ) ) bli_obj_create( dt, k, m, 0, 0, &a ); else bli_obj_create( dt, m, k, 0, 0, &a ); bli_obj_create( dt, m, m, 0, 0, &c ); bli_obj_create( dt, m, m, 0, 0, &c_save ); bli_randm( &a ); bli_randm( &c ); bli_obj_set_struc( BLIS_HERMITIAN, &c ); bli_obj_set_uplo( uploc, &c ); bli_obj_set_conjtrans( transa, &a ); bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_setsc( -(1.0/1.0), 0.0, &beta ); bli_copym( &c, &c_save ); dtime_save = DBL_MAX; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS bli_herk( &alpha, &a, &beta, &c ); #else if ( bli_is_float( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); float* alphap = bli_obj_buffer( &alpha ); float* ap = bli_obj_buffer( &a ); float* betap = bli_obj_buffer( &beta ); float* cp = bli_obj_buffer( &c ); ssyrk_( &f77_uploc, &f77_transa, &mm, &kk, alphap, ap, &lda, betap, cp, &ldc ); } else if ( bli_is_double( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); double* alphap = bli_obj_buffer( &alpha ); double* ap = bli_obj_buffer( &a ); double* betap = bli_obj_buffer( &beta ); double* cp = bli_obj_buffer( &c ); dsyrk_( &f77_uploc, &f77_transa, &mm, &kk, alphap, ap, &lda, betap, cp, &ldc ); } else if ( bli_is_scomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); float* alphap = bli_obj_buffer( &alpha ); scomplex* ap = bli_obj_buffer( &a ); float* betap = bli_obj_buffer( &beta ); scomplex* cp = bli_obj_buffer( &c ); cherk_( &f77_uploc, &f77_transa, &mm, &kk, alphap, ap, &lda, betap, cp, &ldc ); } else if ( bli_is_dcomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width_after_trans( &a ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); double* alphap = bli_obj_buffer( &alpha ); dcomplex* ap = bli_obj_buffer( &a ); double* betap = bli_obj_buffer( &beta ); dcomplex* cp = bli_obj_buffer( &c ); zherk_( &f77_uploc, &f77_transa, &mm, &kk, alphap, ap, &lda, betap, cp, &ldc ); } #endif #ifdef PRINT bli_printm( "c after", &c, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } gflops = ( 1.0 * m * k * m ) / ( dtime_save * 1.0e9 ); if ( bli_is_complex( dt ) ) gflops *= 4.0; #ifdef BLIS printf( "data_herk_blis" ); #else printf( "data_herk_%s", BLAS ); #endif printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin)/p_inc + 1, ( unsigned long )m, ( unsigned long )k, gflops ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &c ); bli_obj_free( &c_save ); } //bli_finalize(); return 0; } blis-0.6.1/test/test_trmm.c000066400000000000000000000166151360743507500156330ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "blis.h" //#define PRINT int main( int argc, char** argv ) { obj_t a, c; obj_t c_save; obj_t alpha; dim_t m, n; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, n_input; num_t dt; int r, n_repeats; side_t side; uplo_t uploa; trans_t transa; diag_t diaga; f77_char f77_side; f77_char f77_uploa; f77_char f77_transa; f77_char f77_diaga; double dtime; double dtime_save; double gflops; //bli_init(); //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); n_repeats = 3; #ifndef PRINT p_begin = 200; p_end = 2000; p_inc = 200; m_input = -1; n_input = -1; #else p_begin = 16; p_end = 16; p_inc = 1; m_input = 4; n_input = 4; #endif #if 1 //dt = BLIS_FLOAT; dt = BLIS_DOUBLE; #else //dt = BLIS_SCOMPLEX; dt = BLIS_DCOMPLEX; #endif side = BLIS_LEFT; //side = BLIS_RIGHT; uploa = BLIS_LOWER; //uploa = BLIS_UPPER; transa = BLIS_NO_TRANSPOSE; diaga = BLIS_NONUNIT_DIAG; bli_param_map_blis_to_netlib_side( side, &f77_side ); bli_param_map_blis_to_netlib_uplo( uploa, &f77_uploa ); bli_param_map_blis_to_netlib_trans( transa, &f77_transa ); bli_param_map_blis_to_netlib_diag( diaga, &f77_diaga ); // Begin with initializing the last entry to zero so that // matlab allocates space for the entire array once up-front. for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ; #ifdef BLIS printf( "data_trmm_blis" ); #else printf( "data_trmm_%s", BLAS ); #endif printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin)/p_inc + 1, ( unsigned long )0, ( unsigned long )0, 0.0 ); //for ( p = p_begin; p <= p_end; p += p_inc ) for ( p = p_end; p_begin <= p; p -= p_inc ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p * ( dim_t )abs(n_input); else n = ( dim_t ) n_input; bli_obj_create( dt, 1, 1, 0, 0, &alpha ); if ( bli_is_left( side ) ) bli_obj_create( dt, m, m, 0, 0, &a ); else bli_obj_create( dt, n, n, 0, 0, &a ); bli_obj_create( dt, m, n, 0, 0, &c ); bli_obj_create( dt, m, n, 0, 0, &c_save ); bli_randm( &a ); bli_randm( &c ); bli_obj_set_struc( BLIS_TRIANGULAR, &a ); bli_obj_set_uplo( uploa, &a ); bli_obj_set_conjtrans( transa, &a ); bli_obj_set_diag( diaga, &a ); // Randomize A, make it densely Hermitian, and zero the unstored // triangle to ensure the implementation reads only from the stored // region. bli_randm( &a ); bli_mkherm( &a ); bli_mktrim( &a ); bli_setsc( (2.0/1.0), 1.0, &alpha ); bli_copym( &c, &c_save ); dtime_save = DBL_MAX; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS bli_trmm( side, &alpha, &a, &c ); #else if ( bli_is_float( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); float* alphap = bli_obj_buffer( &alpha ); float* ap = bli_obj_buffer( &a ); float* cp = bli_obj_buffer( &c ); strmm_( &f77_side, &f77_uploa, &f77_transa, &f77_diaga, &mm, &nn, alphap, ap, &lda, cp, &ldc ); } else if ( bli_is_double( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); double* alphap = bli_obj_buffer( &alpha ); double* ap = bli_obj_buffer( &a ); double* cp = bli_obj_buffer( &c ); dtrmm_( &f77_side, &f77_uploa, &f77_transa, &f77_diaga, &mm, &nn, alphap, ap, &lda, cp, &ldc ); } else if ( bli_is_scomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); scomplex* alphap = bli_obj_buffer( &alpha ); scomplex* ap = bli_obj_buffer( &a ); scomplex* cp = bli_obj_buffer( &c ); ctrmm_( &f77_side, &f77_uploa, &f77_transa, &f77_diaga, &mm, &nn, alphap, ap, &lda, cp, &ldc ); } else if ( bli_is_dcomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); dcomplex* alphap = bli_obj_buffer( &alpha ); dcomplex* ap = bli_obj_buffer( &a ); dcomplex* cp = bli_obj_buffer( &c ); ztrmm_( &f77_side, &f77_uploa, &f77_transa, &f77_diaga, &mm, &nn, alphap, ap, &lda, cp, &ldc ); } #endif #ifdef PRINT bli_printm( "c after", &c, "%9.5f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } if ( bli_is_left( side ) ) gflops = ( 1.0 * m * m * n ) / ( dtime_save * 1.0e9 ); else gflops = ( 1.0 * m * n * n ) / ( dtime_save * 1.0e9 ); if ( bli_is_complex( dt ) ) gflops *= 4.0; #ifdef BLIS printf( "data_trmm_blis" ); #else printf( "data_trmm_%s", BLAS ); #endif printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin)/p_inc + 1, ( unsigned long )m, ( unsigned long )n, gflops ); bli_obj_free( &alpha ); bli_obj_free( &a ); bli_obj_free( &c ); bli_obj_free( &c_save ); } //bli_finalize(); return 0; } blis-0.6.1/test/test_trmv.c000066400000000000000000000112251360743507500156340ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "blis.h" // uploa trans, diag, m a lda x incx //void dtrmv_( char*, char*, char*, int*, double*, int*, double*, int* ); //#define PRINT int main( int argc, char** argv ) { obj_t a, x; obj_t x_save; obj_t alpha; dim_t m; dim_t p; dim_t p_begin, p_end, p_inc; int m_input; num_t dt_a, dt_x; num_t dt_alpha; int r, n_repeats; uplo_t uplo; double dtime; double dtime_save; double gflops; //bli_init(); n_repeats = 3; #ifndef PRINT p_begin = 40; p_end = 2000; p_inc = 40; m_input = -1; #else p_begin = 16; p_end = 16; p_inc = 1; m_input = 15; n_input = 15; #endif dt_alpha = dt_a = dt_x = BLIS_DOUBLE; uplo = BLIS_LOWER; // Begin with initializing the last entry to zero so that // matlab allocates space for the entire array once up-front. for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ; #ifdef BLIS printf( "data_trmv_blis" ); #else printf( "data_trmv_%s", BLAS ); #endif printf( "( %2lu, 1:2 ) = [ %4lu %7.2f ];\n", ( unsigned long )(p - p_begin)/p_inc + 1, ( unsigned long )0, 0.0 ); //for ( p = p_begin; p <= p_end; p += p_inc ) for ( p = p_end; p_begin <= p; p -= p_inc ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha ); bli_obj_create( dt_a, m, m, 0, 0, &a ); bli_obj_create( dt_x, m, 1, 0, 0, &x ); bli_obj_create( dt_x, m, 1, 0, 0, &x_save ); bli_randm( &a ); bli_randm( &x ); bli_obj_set_struc( BLIS_TRIANGULAR, &a ); bli_obj_set_uplo( uplo, &a ); bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, &a ); bli_obj_set_diag( BLIS_NONUNIT_DIAG, &a ); bli_setsc( (1.0/1.0), 0.0, &alpha ); bli_copym( &x, &x_save ); dtime_save = DBL_MAX; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &x_save, &x ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "x", &x, "%4.1f", "" ); #endif #ifdef BLIS bli_trmv( &BLIS_ONE, &a, &x ); #else f77_char uploa = 'L'; f77_char transa = 'N'; f77_char diaga = 'N'; f77_int mm = bli_obj_length( &a ); f77_int lda = bli_obj_col_stride( &a ); f77_int incx = bli_obj_vector_inc( &x ); double* ap = bli_obj_buffer( &a ); double* xp = bli_obj_buffer( &x ); dtrmv_( &uploa, &transa, &diaga, &mm, ap, &lda, xp, &incx ); #endif #ifdef PRINT bli_printm( "x after", &x, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } gflops = ( 1.0 * m * m ) / ( dtime_save * 1.0e9 ); #ifdef BLIS printf( "data_trmv_blis" ); #else printf( "data_trmv_%s", BLAS ); #endif printf( "( %2lu, 1:2 ) = [ %4lu %7.2f ];\n", ( unsigned long )(p - p_begin)/p_inc + 1, ( unsigned long )m, gflops ); bli_obj_free( &alpha ); bli_obj_free( &a ); bli_obj_free( &x ); bli_obj_free( &x_save ); } //bli_finalize(); return 0; } blis-0.6.1/test/test_trsm.c000066400000000000000000000167511360743507500156420ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "blis.h" //#define PRINT int main( int argc, char** argv ) { obj_t a, c; obj_t c_save; obj_t alpha; dim_t m, n; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, n_input; num_t dt; int r, n_repeats; side_t side; uplo_t uploa; trans_t transa; diag_t diaga; f77_char f77_side; f77_char f77_uploa; f77_char f77_transa; f77_char f77_diaga; double dtime; double dtime_save; double gflops; //bli_init(); //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); n_repeats = 3; #ifndef PRINT p_begin = 200; p_end = 2000; p_inc = 200; m_input = -1; n_input = -1; #else p_begin = 16; p_end = 16; p_inc = 1; m_input = 4; n_input = 4; #endif #if 1 //dt = BLIS_FLOAT; dt = BLIS_DOUBLE; #else //dt = BLIS_SCOMPLEX; dt = BLIS_DCOMPLEX; #endif side = BLIS_LEFT; //side = BLIS_RIGHT; uploa = BLIS_LOWER; //uploa = BLIS_UPPER; transa = BLIS_NO_TRANSPOSE; diaga = BLIS_NONUNIT_DIAG; bli_param_map_blis_to_netlib_side( side, &f77_side ); bli_param_map_blis_to_netlib_uplo( uploa, &f77_uploa ); bli_param_map_blis_to_netlib_trans( transa, &f77_transa ); bli_param_map_blis_to_netlib_diag( diaga, &f77_diaga ); // Begin with initializing the last entry to zero so that // matlab allocates space for the entire array once up-front. for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ; #ifdef BLIS printf( "data_trsm_blis" ); #else printf( "data_trsm_%s", BLAS ); #endif printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin)/p_inc + 1, ( unsigned long )0, ( unsigned long )0, 0.0 ); //for ( p = p_begin; p <= p_end; p += p_inc ) for ( p = p_end; p_begin <= p; p -= p_inc ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p * ( dim_t )abs(n_input); else n = ( dim_t ) n_input; bli_obj_create( dt, 1, 1, 0, 0, &alpha ); if ( bli_is_left( side ) ) bli_obj_create( dt, m, m, 0, 0, &a ); else bli_obj_create( dt, n, n, 0, 0, &a ); bli_obj_create( dt, m, n, 0, 0, &c ); bli_obj_create( dt, m, n, 0, 0, &c_save ); bli_randm( &a ); bli_randm( &c ); bli_obj_set_struc( BLIS_TRIANGULAR, &a ); bli_obj_set_uplo( uploa, &a ); bli_obj_set_conjtrans( transa, &a ); bli_obj_set_diag( diaga, &a ); // Randomize A and zero the unstored triangle to ensure the // implementation reads only from the stored region. bli_randm( &a ); bli_mktrim( &a ); // Load the diagonal of A to make it more likely to be invertible. bli_shiftd( &BLIS_TWO, &a ); bli_setsc( (2.0/1.0), 1.0, &alpha ); bli_copym( &c, &c_save ); dtime_save = DBL_MAX; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT bli_invertd( &a ); bli_printm( "a", &a, "%4.1f", "" ); bli_invertd( &a ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS bli_trsm( side, &alpha, &a, &c ); #else if ( bli_is_float( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); float* alphap = bli_obj_buffer( &alpha ); float* ap = bli_obj_buffer( &a ); float* cp = bli_obj_buffer( &c ); strsm_( &f77_side, &f77_uploa, &f77_transa, &f77_diaga, &mm, &nn, alphap, ap, &lda, cp, &ldc ); } else if ( bli_is_double( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); double* alphap = bli_obj_buffer( &alpha ); double* ap = bli_obj_buffer( &a ); double* cp = bli_obj_buffer( &c ); dtrsm_( &f77_side, &f77_uploa, &f77_transa, &f77_diaga, &mm, &nn, alphap, ap, &lda, cp, &ldc ); } else if ( bli_is_scomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); scomplex* alphap = bli_obj_buffer( &alpha ); scomplex* ap = bli_obj_buffer( &a ); scomplex* cp = bli_obj_buffer( &c ); ctrsm_( &f77_side, &f77_uploa, &f77_transa, &f77_diaga, &mm, &nn, alphap, ap, &lda, cp, &ldc ); } else if ( bli_is_dcomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); dcomplex* alphap = bli_obj_buffer( &alpha ); dcomplex* ap = bli_obj_buffer( &a ); dcomplex* cp = bli_obj_buffer( &c ); ztrsm_( &f77_side, &f77_uploa, &f77_transa, &f77_diaga, &mm, &nn, alphap, ap, &lda, cp, &ldc ); } #endif #ifdef PRINT bli_printm( "c after", &c, "%9.5f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } if ( bli_is_left( side ) ) gflops = ( 1.0 * m * m * n ) / ( dtime_save * 1.0e9 ); else gflops = ( 1.0 * m * n * n ) / ( dtime_save * 1.0e9 ); if ( bli_is_complex( dt ) ) gflops *= 4.0; #ifdef BLIS printf( "data_trsm_blis" ); #else printf( "data_trsm_%s", BLAS ); #endif printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin)/p_inc + 1, ( unsigned long )m, ( unsigned long )n, gflops ); bli_obj_free( &alpha ); bli_obj_free( &a ); bli_obj_free( &c ); bli_obj_free( &c_save ); } //bli_finalize(); return 0; } blis-0.6.1/test/test_trsv.c000066400000000000000000000116251360743507500156460ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "blis.h" // uploa trans, diag, m a lda x incx //void dtrsv_( char*, char*, char*, int*, double*, int*, double*, int* ); //#define PRINT int main( int argc, char** argv ) { obj_t a, x; obj_t x_save; obj_t alpha; dim_t m; dim_t p; dim_t p_begin, p_end, p_inc; int m_input; num_t dt_a, dt_x; num_t dt_alpha; int r, n_repeats; uplo_t uplo; double dtime; double dtime_save; double gflops; //bli_init(); n_repeats = 3; #ifndef PRINT p_begin = 40; p_end = 2000; p_inc = 40; m_input = -1; #else p_begin = 16; p_end = 16; p_inc = 1; m_input = 15; n_input = 15; #endif dt_alpha = dt_a = dt_x = BLIS_DOUBLE; uplo = BLIS_LOWER; // Begin with initializing the last entry to zero so that // matlab allocates space for the entire array once up-front. for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ; #ifdef BLIS printf( "data_trsv_blis" ); #else printf( "data_trv_%s", BLAS ); #endif printf( "( %2lu, 1:2 ) = [ %4lu %7.2f ];\n", ( unsigned long )(p - p_begin)/p_inc + 1, ( unsigned long )0, 0.0 ); //for ( p = p_begin; p <= p_end; p += p_inc ) for ( p = p_end; p_begin <= p; p -= p_inc ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha ); bli_obj_create( dt_a, m, m, 0, 0, &a ); bli_obj_create( dt_x, m, 1, 0, 0, &x ); bli_obj_create( dt_x, m, 1, 0, 0, &x_save ); bli_randm( &a ); bli_randm( &x ); bli_obj_set_struc( BLIS_TRIANGULAR, &a ); bli_obj_set_uplo( uplo, &a ); bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, &a ); bli_obj_set_diag( BLIS_NONUNIT_DIAG, &a ); // Randomize A and zero the unstored triangle to ensure the // implementation reads only from the stored region. bli_randm( &a ); bli_mktrim( &a ); // Load the diagonal of A to make it more likely to be invertible. bli_shiftd( &BLIS_TWO, &a ); bli_setsc( (1.0/1.0), 0.0, &alpha ); bli_copym( &x, &x_save ); dtime_save = DBL_MAX; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &x_save, &x ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "x", &x, "%4.1f", "" ); #endif #ifdef BLIS bli_trsv( &BLIS_ONE, &a, &x ); #else f77_char uploa = 'L'; f77_char transa = 'N'; f77_char diaga = 'N'; f77_int mm = bli_obj_length( &a ); f77_int lda = bli_obj_col_stride( &a ); f77_int incx = bli_obj_vector_inc( &x ); double* ap = bli_obj_buffer( &a ); double* xp = bli_obj_buffer( &x ); dtrsv_( &uploa, &transa, &diaga, &mm, ap, &lda, xp, &incx ); #endif #ifdef PRINT bli_printm( "x after", &x, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } gflops = ( 1.0 * m * m ) / ( dtime_save * 1.0e9 ); #ifdef BLIS printf( "data_trsv_blis" ); #else printf( "data_trsv_%s", BLAS ); #endif printf( "( %2lu, 1:2 ) = [ %4lu %7.2f ];\n", ( unsigned long )(p - p_begin)/p_inc + 1, ( unsigned long )m, gflops ); bli_obj_free( &alpha ); bli_obj_free( &a ); bli_obj_free( &x ); bli_obj_free( &x_save ); } //bli_finalize(); return 0; } blis-0.6.1/test/thread_ranges/000077500000000000000000000000001360743507500162465ustar00rootroot00000000000000blis-0.6.1/test/thread_ranges/Makefile000066400000000000000000000107271360743507500177150ustar00rootroot00000000000000#!/bin/bash # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # # Makefile # # Field G. Van Zee # # Makefile for standalone BLIS test drivers. # # # --- Makefile PHONY target definitions ---------------------------------------- # .PHONY: all \ test-ranges \ clean cleanx # # --- Determine makefile fragment location ------------------------------------- # # Comments: # - DIST_PATH is assumed to not exist if BLIS_INSTALL_PATH is given. # - We must use recursively expanded assignment for LIB_PATH and INC_PATH in # the second case because CONFIG_NAME is not yet set. ifneq ($(strip $(BLIS_INSTALL_PATH)),) LIB_PATH := $(BLIS_INSTALL_PATH)/lib INC_PATH := $(BLIS_INSTALL_PATH)/include/blis SHARE_PATH := $(BLIS_INSTALL_PATH)/share/blis else DIST_PATH := ../.. LIB_PATH = ../../lib/$(CONFIG_NAME) INC_PATH = ../../include/$(CONFIG_NAME) SHARE_PATH := ../.. endif # # --- Include common makefile definitions -------------------------------------- # # Include the common makefile fragment. -include $(SHARE_PATH)/common.mk # # --- General build definitions ------------------------------------------------ # TEST_SRC_PATH := . TEST_OBJ_PATH := . # Gather all local object files. TEST_OBJS := $(sort $(patsubst $(TEST_SRC_PATH)/%.c, \ $(TEST_OBJ_PATH)/%.o, \ $(wildcard $(TEST_SRC_PATH)/*.c))) # Override the value of CINCFLAGS so that the value of CFLAGS returned by # get-user-cflags-for() is not cluttered up with include paths needed only # while building BLIS. CINCFLAGS := -I$(INC_PATH) # Use the CFLAGS for the configuration family. CFLAGS := $(call get-user-cflags-for,$(CONFIG_NAME)) # Add installed and local header paths to CFLAGS CFLAGS += -I$(TEST_SRC_PATH) # Locate the libblis library to which we will link. #LIBBLIS_LINK := $(LIB_PATH)/$(LIBBLIS_L) # Datatype DT_S := -DDT=BLIS_FLOAT DT_D := -DDT=BLIS_DOUBLE DT_C := -DDT=BLIS_SCOMPLEX DT_Z := -DDT=BLIS_DCOMPLEX # Problem size specification PDEF_MT := -DP_BEGIN=400 \ -DP_END=8000 \ -DP_INC=400 # # --- Targets/rules ------------------------------------------------------------ # all: test-ranges test-ranges: \ test_ranges.x # --Object file rules -- $(TEST_OBJ_PATH)/%.o: $(TEST_SRC_PATH)/%.c $(CC) $(CFLAGS) -c $< -o $@ # blis asm test_%.o: test_%.c $(CC) $(CFLAGS) $(PDEF_MT) $(DT_D) -c $< -o $@ # -- Executable file rules -- # NOTE: For the BLAS test drivers, we place the BLAS libraries before BLIS # on the link command line in case BLIS was configured with the BLAS # compatibility layer. This prevents BLIS from inadvertently getting called # for the BLAS routines we are trying to test with. test_ranges.x: test_ranges.o $(LIBBLIS_LINK) $(LINKER) $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@ # -- Clean rules -- clean: cleanx cleanx: - $(RM_F) *.o *.x blis-0.6.1/test/thread_ranges/test_ranges.c000066400000000000000000000243361360743507500207400ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name of The University of Texas nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include "blis.h" //#define PRINT int main( int argc, char** argv ) { //bli_init(); #if 0 obj_t a, b, c; obj_t aa, bb, cc; dim_t m, n, k; num_t dt; uplo_t uploa, uplob, uploc; { dt = BLIS_DOUBLE; m = 6; k = 6; n = 6; bli_obj_create( dt, m, k, 0, 0, &a ); bli_obj_create( dt, k, n, 0, 0, &b ); bli_obj_create( dt, m, n, 0, 0, &c ); uploa = BLIS_UPPER; uploa = BLIS_LOWER; bli_obj_set_struc( BLIS_TRIANGULAR, &a ); bli_obj_set_uplo( uploa, &a ); bli_obj_set_diag_offset( -2, &a ); uplob = BLIS_UPPER; uplob = BLIS_LOWER; bli_obj_set_struc( BLIS_TRIANGULAR, &b ); bli_obj_set_uplo( uplob, &b ); bli_obj_set_diag_offset( -2, &b ); uploc = BLIS_UPPER; //uploc = BLIS_LOWER; //uploc = BLIS_ZEROS; //uploc = BLIS_DENSE; bli_obj_set_struc( BLIS_HERMITIAN, &c ); //bli_obj_set_struc( BLIS_TRIANGULAR, &c ); bli_obj_set_uplo( uploc, &c ); bli_obj_set_diag_offset( 1, &c ); bli_obj_alias_to( &a, &aa ); (void)aa; bli_obj_alias_to( &b, &bb ); (void)bb; bli_obj_alias_to( &c, &cc ); (void)cc; bli_randm( &a ); bli_randm( &b ); bli_randm( &c ); //bli_mkherm( &a ); //bli_mktrim( &a ); bli_prune_unref_mparts( &cc, BLIS_M, &aa, BLIS_N ); bli_printm( "c orig", &c, "%4.1f", "" ); bli_printm( "c alias", &cc, "%4.1f", "" ); bli_printm( "a orig", &a, "%4.1f", "" ); bli_printm( "a alias", &aa, "%4.1f", "" ); //bli_obj_print( "a struct", &a ); } #endif dim_t p_begin, p_max, p_inc; gint_t m_input, n_input; char uploa_ch; doff_t diagoffa; dim_t bf; dim_t n_way; char part_dim_ch; bool_t go_fwd; char out_ch; obj_t a; blksz_t bfs; thrinfo_t thrinfo; dim_t m, n; uplo_t uploa; bool_t part_m_dim, part_n_dim; bool_t go_bwd; dim_t p; num_t dt; dim_t start, end; dim_t width; siz_t area; gint_t t_begin, t_stop, t_inc; dim_t t; if ( argc == 13 ) { sscanf( argv[1], "%u", &p_begin ); sscanf( argv[2], "%u", &p_max ); sscanf( argv[3], "%u", &p_inc ); sscanf( argv[4], "%d", &m_input ); sscanf( argv[5], "%d", &n_input ); sscanf( argv[6], "%c", &uploa_ch ); sscanf( argv[7], "%d", &diagoffa ); sscanf( argv[8], "%u", &bf ); sscanf( argv[9], "%u", &n_way ); sscanf( argv[10], "%c", &part_dim_ch ); sscanf( argv[11], "%u", &go_fwd ); sscanf( argv[12], "%c", &out_ch ); } else { printf( "\n" ); printf( " %s\n", argv[0] ); printf( "\n" ); printf( " Simulate the dimension ranges assigned to threads when\n" ); printf( " partitioning a matrix for parallelism in BLIS.\n" ); printf( "\n" ); printf( " Usage:\n" ); printf( "\n" ); printf( " %s p_beg p_max p_inc m n uplo doff bf n_way part_dim go_fwd out\n", argv[0] ); printf( "\n" ); printf( " p_beg: the first problem size p to test.\n" ); printf( " p_max: the maximum problem size p to test.\n" ); printf( " p_inc: the increase in problem size p between tests.\n" ); printf( " m: the m dimension:\n" ); printf( " n: the n dimension:\n" ); printf( " if m,n = -1: bind m,n to problem size p.\n" ); printf( " if m,n = 0: bind m,n to p_max.\n" ); printf( " if m,n > 0: hold m,n = c constant for all p.\n" ); printf( " uplo: the uplo field of the matrix being partitioned:\n" ); printf( " 'l': lower-stored (BLIS_LOWER)\n" ); printf( " 'u': upper-stored (BLIS_UPPER)\n" ); printf( " 'd': densely-stored (BLIS_DENSE)\n" ); printf( " doff: the diagonal offset of the matrix being partitioned.\n" ); printf( " bf: the simulated blocking factor. all thread ranges must\n" ); printf( " be a multiple of bf, except for the range that contains\n" ); printf( " the edge case (if one exists). the blocking factor\n" ); printf( " would typically correspond to a register blocksize.\n" ); printf( " n_way: the number of ways of parallelism for which we are\n" ); printf( " partitioning (i.e.: the number of threads, or thread\n" ); printf( " groups).\n" ); printf( " part_dim: the dimension to partition:\n" ); printf( " 'm': partition the m dimension.\n" ); printf( " 'n': partition the n dimension.\n" ); printf( " go_fwd: the direction to partition:\n" ); printf( " '1': forward, e.g. left-to-right (part_dim = 'm') or\n" ); printf( " top-to-bottom (part_dim = 'n')\n" ); printf( " '0': backward, e.g. right-to-left (part_dim = 'm') or\n" ); printf( " bottom-to-top (part_dim = 'n')\n" ); printf( " NOTE: reversing the direction does not change the\n" ); printf( " subpartitions' widths, but it does change which end of\n" ); printf( " the index range receives the edge case, if it exists.\n" ); printf( " out: the type of output per thread-column:\n" ); printf( " 'w': the width (and area) of the thread's subpartition\n" ); printf( " 'r': the actual ranges of the thread's subpartition\n" ); printf( " where the start and end points of each range are\n" ); printf( " inclusive and exclusive, respectively.\n" ); printf( "\n" ); exit(1); } if ( m_input == 0 ) m_input = p_max; if ( n_input == 0 ) n_input = p_max; if ( part_dim_ch == 'm' ) { part_m_dim = TRUE; part_n_dim = FALSE; } else { part_m_dim = FALSE; part_n_dim = TRUE; } go_bwd = !go_fwd; if ( uploa_ch == 'l' ) uploa = BLIS_LOWER; else if ( uploa_ch == 'u' ) uploa = BLIS_UPPER; else uploa = BLIS_DENSE; if ( part_n_dim ) { if ( bli_is_upper( uploa ) ) { t_begin = n_way-1; t_stop = -1; t_inc = -1; } else /* if lower or dense */ { t_begin = 0; t_stop = n_way; t_inc = 1; } } else // if ( part_m_dim ) { if ( bli_is_lower( uploa ) ) { t_begin = n_way-1; t_stop = -1; t_inc = -1; } else /* if upper or dense */ { t_begin = 0; t_stop = n_way; t_inc = 1; } } printf( "\n" ); printf( " part: %3s doff: %3d bf: %3d output: %s\n", ( part_n_dim ? ( go_fwd ? "l2r" : "r2l" ) : ( go_fwd ? "t2b" : "b2t" ) ), ( int )diagoffa, ( int )bf, ( out_ch == 'w' ? "width(area)" : "ranges" ) ); printf( " uplo: %3c nt: %3u\n", uploa_ch, ( unsigned )n_way ); printf( "\n" ); printf( " " ); for ( t = t_begin; t != t_stop; t += t_inc ) { if ( part_n_dim ) { if ( t == t_begin ) printf( "left... " ); else if ( t == t_stop-t_inc ) printf( " ...right" ); else printf( " " ); } else // if ( part_m_dim ) { if ( t == t_begin ) printf( "top... " ); else if ( t == t_stop-t_inc ) printf( " ...bottom" ); else printf( " " ); } } printf( "\n" ); printf( "%4c x %4c ", 'm', 'n' ); for ( t = t_begin; t != t_stop; t += t_inc ) { printf( "%9s %u ", "thread", ( unsigned )t ); } printf( "\n" ); printf( "-------------" ); for ( t = t_begin; t != t_stop; t += t_inc ) { printf( "-------------" ); } printf( "\n" ); for ( p = p_begin; p <= p_max; p += p_inc ) { if ( m_input < 0 ) m = ( dim_t )p; else m = ( dim_t )m_input; if ( n_input < 0 ) n = ( dim_t )p; else n = ( dim_t )n_input; dt = BLIS_DOUBLE; bli_obj_create( dt, m, n, 0, 0, &a ); bli_obj_set_struc( BLIS_TRIANGULAR, &a ); bli_obj_set_uplo( uploa, &a ); bli_obj_set_diag_offset( diagoffa, &a ); bli_randm( &a ); bli_blksz_init_easy( &bfs, bf, bf, bf, bf ); printf( "%4u x %4u ", ( unsigned )m, ( unsigned )n ); for ( t = t_begin; t != t_stop; t += t_inc ) { thrinfo.n_way = n_way; thrinfo.work_id = t; if ( part_n_dim && go_fwd ) area = bli_thread_range_weighted_l2r( &thrinfo, &a, &bfs, &start, &end ); else if ( part_n_dim && go_bwd ) area = bli_thread_range_weighted_r2l( &thrinfo, &a, &bfs, &start, &end ); else if ( part_m_dim && go_fwd ) area = bli_thread_range_weighted_t2b( &thrinfo, &a, &bfs, &start, &end ); else // ( part_m_dim && go_bwd ) area = bli_thread_range_weighted_b2t( &thrinfo, &a, &bfs, &start, &end ); width = end - start; if ( out_ch == 'w' ) printf( "%4u(%6u) ", ( unsigned )width, ( unsigned )area ); else printf( "[%4u,%4u) ", ( unsigned )start, ( unsigned )end ); } printf( "\n" ); bli_obj_free( &a ); } //bli_finalize(); return 0; } blis-0.6.1/testsuite/000077500000000000000000000000001360743507500145125ustar00rootroot00000000000000blis-0.6.1/testsuite/Makefile000066400000000000000000000130731360743507500161560ustar00rootroot00000000000000# # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2014, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # # # Makefile # # Field G. Van Zee # # Makefile for BLIS testsuite. # # # --- Makefile PHONY target definitions ---------------------------------------- # .PHONY: all bin clean \ check-env check-env-mk check-env-fragments check-env-make-defs \ run run-amd64 run-x86 run-arm # # --- Determine makefile fragment location ------------------------------------- # # Comments: # - DIST_PATH is assumed to not exist if BLIS_INSTALL_PATH is given. # - We must use recursively expanded assignment for LIB_PATH and INC_PATH in # the second case because CONFIG_NAME is not yet set. ifneq ($(strip $(BLIS_INSTALL_PATH)),) LIB_PATH := $(BLIS_INSTALL_PATH)/lib INC_PATH := $(BLIS_INSTALL_PATH)/include/blis SHARE_PATH := $(BLIS_INSTALL_PATH)/share/blis else DIST_PATH := .. LIB_PATH = ../lib/$(CONFIG_NAME) INC_PATH = ../include/$(CONFIG_NAME) SHARE_PATH := .. endif # # --- Include common makefile definitions -------------------------------------- # # Include the common makefile fragment. -include $(SHARE_PATH)/common.mk # # --- General build definitions ------------------------------------------------ # TEST_SRC_PATH := src TEST_OBJ_PATH := obj # Gather all local object files. TEST_OBJS := $(sort $(patsubst $(TEST_SRC_PATH)/%.c, \ $(TEST_OBJ_PATH)/%.o, \ $(wildcard $(TEST_SRC_PATH)/*.c))) # Override the value of CINCFLAGS so that the value of CFLAGS returned by # get-user-cflags-for() is not cluttered up with include paths needed only # while building BLIS. CINCFLAGS := -I$(INC_PATH) # Use the "framework" CFLAGS for the configuration family. CFLAGS := $(call get-user-cflags-for,$(CONFIG_NAME)) # Add local header paths to CFLAGS CFLAGS += -I$(TEST_SRC_PATH) # Locate the libblis library to which we will link. #LIBBLIS_LINK := $(LIB_PATH)/$(LIBBLIS_L) # Binary executable name. TESTSUITE_BIN := test_libblis.x # # --- Targets/rules ------------------------------------------------------------ # # --- Primary targets --- all: check-env bin bin: check-env $(TESTSUITE_BIN) # --- Environment check rules --- check-env: check-env-make-defs check-env-fragments check-env-config-mk check-env-config-mk: ifeq ($(CONFIG_MK_PRESENT),no) $(error Cannot proceed: config.mk not detected! Run configure first) endif check-env-make-defs: check-env-fragments ifeq ($(MAKE_DEFS_MK_PRESENT),no) $(error Cannot proceed: make_defs.mk not detected! Invalid configuration) endif # --Object file rules -- $(TEST_OBJ_PATH)/%.o: $(TEST_SRC_PATH)/%.c $(LIBBLIS_LINK) ifeq ($(ENABLE_VERBOSE),yes) $(CC) $(CFLAGS) -c $< -o $@ else @echo "Compiling $@" @$(CC) $(CFLAGS) -c $< -o $@ endif # -- Executable file rules -- $(TESTSUITE_BIN): $(TEST_OBJS) $(LIBBLIS_LINK) ifeq ($(ENABLE_VERBOSE),yes) $(LINKER) $(TEST_OBJS) $(LIBBLIS_LINK) $(LDFLAGS) -o $@ else @echo "Linking $@ against '$(LIBBLIS_LINK) $(LDFLAGS)'" @$(LINKER) $(TEST_OBJS) $(LIBBLIS_LINK) $(LDFLAGS) -o $@ endif # -- Test run/check rules -- run: $(TESTSUITE_BIN) ifeq ($(ENABLE_VERBOSE),yes) ./$(TESTSUITE_BIN) > $(TESTSUITE_OUT_FILE) else @echo "Running $(TESTSUITE_BIN) with output redirected to '$(TESTSUITE_OUT_FILE)'" @./$(TESTSUITE_BIN) > $(TESTSUITE_OUT_FILE) endif run-fast: $(TESTSUITE_BIN) ifeq ($(ENABLE_VERBOSE),yes) ./$(TESTSUITE_BIN) -g $(TESTSUITE_FAST_GEN) -o $(TESTSUITE_FAST_OPS) > $(TESTSUITE_OUT_FILE) else @echo "Running $(TESTSUITE_BIN) (fast) with output redirected to '$(TESTSUITE_OUT_FILE)'" @./$(TESTSUITE_BIN) -g $(TESTSUITE_FAST_GEN) -o $(TESTSUITE_FAST_OPS) > $(TESTSUITE_OUT_FILE) endif check: run ifeq ($(ENABLE_VERBOSE),yes) - $(TESTSUITE_CHECK) $(TESTSUITE_OUT_FILE) else @- $(TESTSUITE_CHECK) $(TESTSUITE_OUT_FILE) endif check-fast: run-fast ifeq ($(ENABLE_VERBOSE),yes) - $(TESTSUITE_CHECK) $(TESTSUITE_OUT_FILE) else @- $(TESTSUITE_CHECK) $(TESTSUITE_OUT_FILE) endif # -- Clean rules -- clean: - $(RM_F) $(TEST_OBJS) $(TESTSUITE_BIN) blis-0.6.1/testsuite/check-blistest.sh000077500000000000000000000042111360743507500177530ustar00rootroot00000000000000#!/bin/sh # # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2018, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # script_name=${0##*/} ansi_red="\033[0;31m" ansi_green="\033[0;32m" ansi_normal="\033[0m" passmsg="All BLIS tests passed!" failmsg0="At least one BLIS test failed. :(" failmsg1="Please see output.testsuite for details." grep -q FAILURE $1 if [ $? -eq 0 ]; then printf "${ansi_red}""${script_name}: ${failmsg0}""${ansi_normal}\n" printf "${ansi_red}""${script_name}: ${failmsg1}""${ansi_normal}\n" exit 1 else printf "${ansi_green}""${script_name}: ${passmsg}""${ansi_normal}\n" exit 0 fi blis-0.6.1/testsuite/input.general000066400000000000000000000044101360743507500172070ustar00rootroot00000000000000# ---------------------------------------------------------------------- # # input.general # BLIS test suite # # This file contains input values that control how BLIS operations are # tested. Comments explain the purpose of each parameter as well as # accepted values. # 1 # Number of repeats per experiment (best result is reported) rc # Matrix storage scheme(s) to test: # 'c' = col-major storage; 'g' = general stride storage; # 'r' = row-major storage cj # Vector storage scheme(s) to test: # 'c' = colvec / unit stride; 'j' = colvec / non-unit stride; # 'r' = rowvec / unit stride; 'i' = rowvec / non-unit stride 0 # Test all combinations of storage schemes? 1 # Perform all tests with alignment? # '0' = do NOT align buffers/ldims; '1' = align buffers/ldims 0 # Randomize vectors and matrices using: # '0' = real values on [-1,1]; # '1' = powers of 2 in narrow precision range 32 # General stride spacing (for cases when testing general stride) sdcz # Datatype(s) to test: # 's' = single real; 'c' = single complex; # 'd' = double real; 'z' = double complex 0 # Test gemm with mixed-domain operands? 0 # Test gemm with mixed-precision operands? 100 # Problem size: first to test 500 # Problem size: maximum to test 100 # Problem size: increment between experiments # Complex level-3 implementations to test: 0 # 3mh ('1' = enable; '0' = disable) 0 # 3m1 ('1' = enable; '0' = disable) 0 # 4mh ('1' = enable; '0' = disable) 0 # 4m1b ('1' = enable; '0' = disable) 0 # 4m1a ('1' = enable; '0' = disable) 1 # 1m ('1' = enable; '0' = disable) 1 # native ('1' = enable; '0' = disable) 1 # Simulate application-level threading: # '1' = disable / use one testsuite thread; # 'n' = enable and use n testsuite threads 1 # Error-checking level: # '0' = disable error checking; '1' = full error checking i # Reaction to test failure: # 'i' = ignore; 's' = sleep() and continue; 'a' = abort 0 # Output results in matlab/octave format? ('1' = yes; '0' = no) 0 # Output results to stdout AND files? ('1' = yes; '0' = no) blis-0.6.1/testsuite/input.general.fast000066400000000000000000000044151360743507500201500ustar00rootroot00000000000000# ---------------------------------------------------------------------- # # input.general.fast # BLIS test suite # # This file contains input values that control how BLIS operations are # tested. Comments explain the purpose of each parameter as well as # accepted values. # 1 # Number of repeats per experiment (best result is reported) rc # Matrix storage scheme(s) to test: # 'c' = col-major storage; 'g' = general stride storage; # 'r' = row-major storage cj # Vector storage scheme(s) to test: # 'c' = colvec / unit stride; 'j' = colvec / non-unit stride; # 'r' = rowvec / unit stride; 'i' = rowvec / non-unit stride 0 # Test all combinations of storage schemes? 1 # Perform all tests with alignment? # '0' = do NOT align buffers/ldims; '1' = align buffers/ldims 0 # Randomize vectors and matrices using: # '0' = real values on [-1,1]; # '1' = powers of 2 in narrow precision range 32 # General stride spacing (for cases when testing general stride) sdcz # Datatype(s) to test: # 's' = single real; 'c' = single complex; # 'd' = double real; 'z' = double complex 0 # Test gemm with mixed-domain operands? 0 # Test gemm with mixed-precision operands? 100 # Problem size: first to test 100 # Problem size: maximum to test 100 # Problem size: increment between experiments # Complex level-3 implementations to test: 0 # 3mh ('1' = enable; '0' = disable) 0 # 3m1 ('1' = enable; '0' = disable) 0 # 4mh ('1' = enable; '0' = disable) 0 # 4m1b ('1' = enable; '0' = disable) 0 # 4m1a ('1' = enable; '0' = disable) 0 # 1m ('1' = enable; '0' = disable) 1 # native ('1' = enable; '0' = disable) 1 # Simulate application-level threading: # '1' = disable / use one testsuite thread; # 'n' = enable and use n testsuite threads 1 # Error-checking level: # '0' = disable error checking; '1' = full error checking i # Reaction to test failure: # 'i' = ignore; 's' = sleep() and continue; 'a' = abort 0 # Output results in matlab/octave format? ('1' = yes; '0' = no) 0 # Output results to stdout AND files? ('1' = yes; '0' = no) blis-0.6.1/testsuite/input.general.mixed000066400000000000000000000044101360743507500203140ustar00rootroot00000000000000# ---------------------------------------------------------------------- # # input.general # BLIS test suite # # This file contains input values that control how BLIS operations are # tested. Comments explain the purpose of each parameter as well as # accepted values. # 1 # Number of repeats per experiment (best result is reported) rc # Matrix storage scheme(s) to test: # 'c' = col-major storage; 'g' = general stride storage; # 'r' = row-major storage cj # Vector storage scheme(s) to test: # 'c' = colvec / unit stride; 'j' = colvec / non-unit stride; # 'r' = rowvec / unit stride; 'i' = rowvec / non-unit stride 0 # Test all combinations of storage schemes? 1 # Perform all tests with alignment? # '0' = do NOT align buffers/ldims; '1' = align buffers/ldims 0 # Randomize vectors and matrices using: # '0' = real values on [-1,1]; # '1' = powers of 2 in narrow precision range 32 # General stride spacing (for cases when testing general stride) sdcz # Datatype(s) to test: # 's' = single real; 'c' = single complex; # 'd' = double real; 'z' = double complex 1 # Test gemm with mixed-domain operands? 1 # Test gemm with mixed-precision operands? 100 # Problem size: first to test 500 # Problem size: maximum to test 100 # Problem size: increment between experiments # Complex level-3 implementations to test: 0 # 3mh ('1' = enable; '0' = disable) 0 # 3m1 ('1' = enable; '0' = disable) 0 # 4mh ('1' = enable; '0' = disable) 0 # 4m1b ('1' = enable; '0' = disable) 0 # 4m1a ('1' = enable; '0' = disable) 1 # 1m ('1' = enable; '0' = disable) 1 # native ('1' = enable; '0' = disable) 1 # Simulate application-level threading: # '1' = disable / use one testsuite thread; # 'n' = enable and use n testsuite threads 1 # Error-checking level: # '0' = disable error checking; '1' = full error checking i # Reaction to test failure: # 'i' = ignore; 's' = sleep() and continue; 'a' = abort 0 # Output results in matlab/octave format? ('1' = yes; '0' = no) 0 # Output results to stdout AND files? ('1' = yes; '0' = no) blis-0.6.1/testsuite/input.general.salt000066400000000000000000000044101360743507500201510ustar00rootroot00000000000000# ---------------------------------------------------------------------- # # input.general # BLIS test suite # # This file contains input values that control how BLIS operations are # tested. Comments explain the purpose of each parameter as well as # accepted values. # 1 # Number of repeats per experiment (best result is reported) rc # Matrix storage scheme(s) to test: # 'c' = col-major storage; 'g' = general stride storage; # 'r' = row-major storage cj # Vector storage scheme(s) to test: # 'c' = colvec / unit stride; 'j' = colvec / non-unit stride; # 'r' = rowvec / unit stride; 'i' = rowvec / non-unit stride 0 # Test all combinations of storage schemes? 1 # Perform all tests with alignment? # '0' = do NOT align buffers/ldims; '1' = align buffers/ldims 0 # Randomize vectors and matrices using: # '0' = real values on [-1,1]; # '1' = powers of 2 in narrow precision range 32 # General stride spacing (for cases when testing general stride) sdcz # Datatype(s) to test: # 's' = single real; 'c' = single complex; # 'd' = double real; 'z' = double complex 0 # Test gemm with mixed-domain operands? 0 # Test gemm with mixed-precision operands? 100 # Problem size: first to test 100 # Problem size: maximum to test 100 # Problem size: increment between experiments # Complex level-3 implementations to test: 0 # 3mh ('1' = enable; '0' = disable) 0 # 3m1 ('1' = enable; '0' = disable) 0 # 4mh ('1' = enable; '0' = disable) 0 # 4m1b ('1' = enable; '0' = disable) 0 # 4m1a ('1' = enable; '0' = disable) 1 # 1m ('1' = enable; '0' = disable) 1 # native ('1' = enable; '0' = disable) 4 # Simulate application-level threading: # '1' = disable / use one testsuite thread; # 'n' = enable and use n testsuite threads 1 # Error-checking level: # '0' = disable error checking; '1' = full error checking i # Reaction to test failure: # 'i' = ignore; 's' = sleep() and continue; 'a' = abort 0 # Output results in matlab/octave format? ('1' = yes; '0' = no) 0 # Output results to stdout AND files? ('1' = yes; '0' = no) blis-0.6.1/testsuite/input.operations000066400000000000000000000220451360743507500177610ustar00rootroot00000000000000# -------------------------------------------------------------------------- # # input.operations # BLIS test suite # # This file contains input values that control which BLIS operations are # tested as well as how those test runs are parameterized. We will now # describe how each section or line type may be edited. # # ENABLING/DISABLING ENTIRE SECTIONS # The values in the "Section overrides" section allow you to disable # all operations in a given "level". Enabling a level here by itself # does not enable every operation in that level; it simply means that # the individual switches for each operation (in that level) determine # whether or not the tests are executed. Use 1 to enable a section, or # 0 to disable. # # ENABLING/DISABLING INDIVIDUAL OPERATION TESTS # Given that an operation's section override switch is set to 1 # (enabled), whether or not that operation will get tested is # determined by its local switch. For example, if the level-1v section # override is set to 1, and there is a 1 on the line marked "addv", # then the addv operation will be tested. Similarly, a 0 would cause # addv to not be tested. # # ENABLING ONLY SELECT OPERATIONS # If you would like to enable just a few (or even just one) operation # without adjusting any section overrides (or individual operation # switches), change the desired operation switch(es) to 2. This will # cause any operation that is not set to 2 to be disabled, regardless # of section override values. For example, setting the axpyv and gemv # operation switches to 2 will cause the test suite to test ONLY axpyv # and gemv, even if all other sections and operations are set to 1. # NOTE: As long as there is at least on operation switch set to 2, no # other operations will be tested. When you are done testing your # select operations, you should revert the operation switch(es) back # to 1. # # CHANGING PROBLEM SIZE/SHAPES TESTED # The problem sizes tested by an operation are determined by the # dimension specifiers on the line marked "dimensions: ". # If, for example, contains two dimension labels (e.g. # "m n"), then the line should begin with two dimension specifiers. # Dimension specifiers of -1 cause the corresponding dimension to be # bound to the problem size, which is determined by values set in # input.general. Positive values cause the corresponding dimension to # be fixed to that value and held constant. # # Examples of dimension specifiers (where the dimensions are m and n): # # -1 -1 Dimensions m and n grow with problem size (resulting in # square matrices). # -1 150 Dimension m grows with problem size and n is fixed at # 150. # -1 -2 Dimension m grows with problem size and n grows # proportional to half the problem size. # # CHANGING PARAMTER COMBINATIONS TESTED # The parameter combinations tested by an operation are determined by # the parameter specifier characters on the line marked "parameters: # ". If, for example, contains two # parameter labels (e.g. "transa conjx"), then the line should contain # two parameter specifier characters. The '?' specifier character # serves as a wildcard--it causes all possible values of that parameter # to be tested. A character such as 'n' or 't' causes only that value # to be tested. # # Examples of parameter specifiers (where the parameters are transa # and conjx): # # ?? All combinations of the transa and conjx parameters are # tested: nn, nc, tn, tc, cn, cc, hn, hc. # ?n conjx is fixed to "no conjugate" but transa is allowed # to vary: nn, tn, cn, hn. # hc Only the case where transa is "Hermitian-transpose" and # conjx is "conjugate" is tested. # # Here is a full list of the parameter types used by the various BLIS # operations along with their possible character encodings: # # side: l,r left, right # uplo: l,u lower, upper # trans: n,t,c,h no transpose, transpose, conjugate, Hermitian- # transpose (i.e. conjugate-transpose) # conj: n,c no conjugate, conjugate # diag: n,u non-unit diagonal, unit diagonal # # --- Section overrides ---------------------------------------------------- 1 # Utility 1 # Level-1v kernels 1 # Level-1m 1 # Level-1f kernels 1 # Level-2 1 # Level-3 micro-kernels 1 # Level-3 # --- Utility -------------------------------------------------------------- 1 # randv -1 # dimensions: m 1 # randm -1 -1 # dimensions: m n # --- Level-1v ------------------------------------------------------------- 1 # addv -1 # dimensions: m ? # parameters: conjx 1 # amaxv -1 # dimensions: m 1 # axpbyv -1 # dimensions: m ? # parameters: conjx 1 # axpyv -1 # dimensions: m ? # parameters: conjx 1 # copyv -1 # dimensions: m ? # parameters: conjx 1 # dotv -1 # dimensions: m ?? # parameters: conjx conjy 1 # dotxv -1 # dimensions: m ?? # parameters: conjx conjy 1 # normfv -1 # dimensions: m 1 # scalv -1 # dimensions: m ? # parameters: conjbeta 1 # scal2v -1 # dimensions: m ? # parameters: conjx 1 # setv -1 # dimensions: m 1 # subv -1 # dimensions: m ? # parameters: conjx 1 # xpbyv -1 # dimensions: m ? # parameters: conjx # --- Level-1m ------------------------------------------------------------- 1 # addm -1 -2 # dimensions: m n ? # parameters: transa 1 # axpym -1 -1 # dimensions: m n ? # parameters: transa 1 # copym -1 -2 # dimensions: m n ? # parameters: transa 1 # normfm -1 -2 # dimensions: m n 1 # scalm -1 -2 # dimensions: m n ? # parameters: conjbeta 1 # scal2m -1 -2 # dimensions: m n ? # parameters: transa 1 # setm -1 -2 # dimensions: m n 1 # subm -1 -2 # dimensions: m n ? # parameters: transa 1 # xpbym -1 -1 # dimensions: m n ? # parameters: transa # --- Level-1f kernels ----------------------------------------------------- 1 # axpy2v -1 # dimensions: m ?? # parameters: conjx conjy 1 # dotaxpyv -1 # dimensions: m ??? # parameters: conjxt conjx conjy 1 # axpyf -1 # dimensions: m ?? # parameters: conja conjx 1 # dotxf -1 # dimensions: m ?? # parameters: conjat conjx 1 # dotxaxpyf -1 # dimensions: m ???? # parameters: conjat conja conjw conjx # --- Level-2 -------------------------------------------------------------- 1 # gemv -1 -2 # dimensions: m n ?? # parameters: transa conjx 1 # ger -1 -2 # dimensions: m n ?? # parameters: conjx conjy 1 # hemv -1 # dimensions: m ??? # parameters: uploa conja conjx 1 # her -1 # dimensions: m ?? # parameters: uploc conjx 1 # her2 -1 # dimensions: m ??? # parameters: uploc conjx conjy 1 # symv -1 # dimensions: m ??? # parameters: uploa conja conjx 1 # syr -1 # dimensions: m ?? # parameters: uploc conjx 1 # syr2 -1 # dimensions: m ??? # parameters: uploc conjx conjy 1 # trmv -1 # dimensions: m ??? # parameters: uploa transa diaga 1 # trsv -1 # dimensions: m ??? # parameters: uploa transa diaga # --- Level-3 micro-kernels ------------------------------------------------ 1 # gemm -1 # dimensions: k 1 # trsm ? # parameters: uploa 1 # gemmtrsm -1 # dimensions: k ? # parameters: uploa # --- Level-3 -------------------------------------------------------------- 1 # gemm -1 -1 -1 # dimensions: m n k ?? # parameters: transa transb 1 # hemm -1 -1 # dimensions: m n ???? # parameters: side uploa conja transb 1 # herk -1 -1 # dimensions: m k ?? # parameters: uploc transa 1 # her2k -1 -1 # dimensions: m k ??? # parameters: uploc transa transb 1 # symm -1 -1 # dimensions: m n ???? # parameters: side uploa conja transb 1 # syrk -1 -1 # dimensions: m k ?? # parameters: uploc transa 1 # syr2k -1 -1 # dimensions: m k ??? # parameters: uploc transa transb 1 # trmm -1 -1 # dimensions: m n ???? # parameters: side uploa transa diaga 1 # trmm3 -1 -1 # dimensions: m n ????n # parameters: side uploa transa diaga transb 1 # trsm -1 -1 # dimensions: m n ???? # parameters: side uploa transa diaga blis-0.6.1/testsuite/input.operations.fast000066400000000000000000000220451360743507500207150ustar00rootroot00000000000000# -------------------------------------------------------------------------- # # input.operations # BLIS test suite # # This file contains input values that control which BLIS operations are # tested as well as how those test runs are parameterized. We will now # describe how each section or line type may be edited. # # ENABLING/DISABLING ENTIRE SECTIONS # The values in the "Section overrides" section allow you to disable # all operations in a given "level". Enabling a level here by itself # does not enable every operation in that level; it simply means that # the individual switches for each operation (in that level) determine # whether or not the tests are executed. Use 1 to enable a section, or # 0 to disable. # # ENABLING/DISABLING INDIVIDUAL OPERATION TESTS # Given that an operation's section override switch is set to 1 # (enabled), whether or not that operation will get tested is # determined by its local switch. For example, if the level-1v section # override is set to 1, and there is a 1 on the line marked "addv", # then the addv operation will be tested. Similarly, a 0 would cause # addv to not be tested. # # ENABLING ONLY SELECT OPERATIONS # If you would like to enable just a few (or even just one) operation # without adjusting any section overrides (or individual operation # switches), change the desired operation switch(es) to 2. This will # cause any operation that is not set to 2 to be disabled, regardless # of section override values. For example, setting the axpyv and gemv # operation switches to 2 will cause the test suite to test ONLY axpyv # and gemv, even if all other sections and operations are set to 1. # NOTE: As long as there is at least on operation switch set to 2, no # other operations will be tested. When you are done testing your # select operations, you should revert the operation switch(es) back # to 1. # # CHANGING PROBLEM SIZE/SHAPES TESTED # The problem sizes tested by an operation are determined by the # dimension specifiers on the line marked "dimensions: ". # If, for example, contains two dimension labels (e.g. # "m n"), then the line should begin with two dimension specifiers. # Dimension specifiers of -1 cause the corresponding dimension to be # bound to the problem size, which is determined by values set in # input.general. Positive values cause the corresponding dimension to # be fixed to that value and held constant. # # Examples of dimension specifiers (where the dimensions are m and n): # # -1 -1 Dimensions m and n grow with problem size (resulting in # square matrices). # -1 150 Dimension m grows with problem size and n is fixed at # 150. # -1 -2 Dimension m grows with problem size and n grows # proportional to half the problem size. # # CHANGING PARAMTER COMBINATIONS TESTED # The parameter combinations tested by an operation are determined by # the parameter specifier characters on the line marked "parameters: # ". If, for example, contains two # parameter labels (e.g. "transa conjx"), then the line should contain # two parameter specifier characters. The '?' specifier character # serves as a wildcard--it causes all possible values of that parameter # to be tested. A character such as 'n' or 't' causes only that value # to be tested. # # Examples of parameter specifiers (where the parameters are transa # and conjx): # # ?? All combinations of the transa and conjx parameters are # tested: nn, nc, tn, tc, cn, cc, hn, hc. # ?n conjx is fixed to "no conjugate" but transa is allowed # to vary: nn, tn, cn, hn. # hc Only the case where transa is "Hermitian-transpose" and # conjx is "conjugate" is tested. # # Here is a full list of the parameter types used by the various BLIS # operations along with their possible character encodings: # # side: l,r left, right # uplo: l,u lower, upper # trans: n,t,c,h no transpose, transpose, conjugate, Hermitian- # transpose (i.e. conjugate-transpose) # conj: n,c no conjugate, conjugate # diag: n,u non-unit diagonal, unit diagonal # # --- Section overrides ---------------------------------------------------- 1 # Utility 1 # Level-1v kernels 1 # Level-1m 1 # Level-1f kernels 1 # Level-2 1 # Level-3 micro-kernels 1 # Level-3 # --- Utility -------------------------------------------------------------- 1 # randv -1 # dimensions: m 1 # randm -1 -1 # dimensions: m n # --- Level-1v ------------------------------------------------------------- 1 # addv -1 # dimensions: m ? # parameters: conjx 1 # amaxv -1 # dimensions: m 1 # axpbyv -1 # dimensions: m ? # parameters: conjx 1 # axpyv -1 # dimensions: m ? # parameters: conjx 1 # copyv -1 # dimensions: m ? # parameters: conjx 1 # dotv -1 # dimensions: m ?? # parameters: conjx conjy 1 # dotxv -1 # dimensions: m ?? # parameters: conjx conjy 1 # normfv -1 # dimensions: m 1 # scalv -1 # dimensions: m ? # parameters: conjbeta 1 # scal2v -1 # dimensions: m ? # parameters: conjx 1 # setv -1 # dimensions: m 1 # subv -1 # dimensions: m ? # parameters: conjx 1 # xpbyv -1 # dimensions: m ? # parameters: conjx # --- Level-1m ------------------------------------------------------------- 1 # addm -1 -2 # dimensions: m n ? # parameters: transa 1 # axpym -1 -1 # dimensions: m n ? # parameters: transa 1 # copym -1 -2 # dimensions: m n ? # parameters: transa 1 # normfm -1 -2 # dimensions: m n 1 # scalm -1 -2 # dimensions: m n ? # parameters: conjbeta 1 # scal2m -1 -2 # dimensions: m n ? # parameters: transa 1 # setm -1 -2 # dimensions: m n 1 # subm -1 -2 # dimensions: m n ? # parameters: transa 1 # xpbym -1 -1 # dimensions: m n ? # parameters: transa # --- Level-1f kernels ----------------------------------------------------- 1 # axpy2v -1 # dimensions: m ?? # parameters: conjx conjy 1 # dotaxpyv -1 # dimensions: m ??? # parameters: conjxt conjx conjy 1 # axpyf -1 # dimensions: m ?? # parameters: conja conjx 1 # dotxf -1 # dimensions: m ?? # parameters: conjat conjx 1 # dotxaxpyf -1 # dimensions: m ???? # parameters: conjat conja conjw conjx # --- Level-2 -------------------------------------------------------------- 1 # gemv -1 -2 # dimensions: m n ?? # parameters: transa conjx 1 # ger -1 -2 # dimensions: m n ?? # parameters: conjx conjy 1 # hemv -1 # dimensions: m ??? # parameters: uploa conja conjx 1 # her -1 # dimensions: m ?? # parameters: uploc conjx 1 # her2 -1 # dimensions: m ??? # parameters: uploc conjx conjy 1 # symv -1 # dimensions: m ??? # parameters: uploa conja conjx 1 # syr -1 # dimensions: m ?? # parameters: uploc conjx 1 # syr2 -1 # dimensions: m ??? # parameters: uploc conjx conjy 1 # trmv -1 # dimensions: m ??? # parameters: uploa transa diaga 1 # trsv -1 # dimensions: m ??? # parameters: uploa transa diaga # --- Level-3 micro-kernels ------------------------------------------------ 1 # gemm -1 # dimensions: k 1 # trsm ? # parameters: uploa 1 # gemmtrsm -1 # dimensions: k ? # parameters: uploa # --- Level-3 -------------------------------------------------------------- 1 # gemm -1 -1 -1 # dimensions: m n k nn # parameters: transa transb 1 # hemm -1 -1 # dimensions: m n ??nn # parameters: side uploa conja transb 1 # herk -1 -1 # dimensions: m k ?n # parameters: uploc transa 1 # her2k -1 -1 # dimensions: m k ?nn # parameters: uploc transa transb 1 # symm -1 -1 # dimensions: m n ??nn # parameters: side uploa conja transb 1 # syrk -1 -1 # dimensions: m k ?n # parameters: uploc transa 1 # syr2k -1 -1 # dimensions: m k ?nn # parameters: uploc transa transb 1 # trmm -1 -1 # dimensions: m n ??n? # parameters: side uploa transa diaga 0 # trmm3 -1 -1 # dimensions: m n ??n?n # parameters: side uploa transa diaga transb 1 # trsm -1 -1 # dimensions: m n ??n? # parameters: side uploa transa diaga blis-0.6.1/testsuite/input.operations.mixed000066400000000000000000000220451360743507500210660ustar00rootroot00000000000000# -------------------------------------------------------------------------- # # input.operations # BLIS test suite # # This file contains input values that control which BLIS operations are # tested as well as how those test runs are parameterized. We will now # describe how each section or line type may be edited. # # ENABLING/DISABLING ENTIRE SECTIONS # The values in the "Section overrides" section allow you to disable # all operations in a given "level". Enabling a level here by itself # does not enable every operation in that level; it simply means that # the individual switches for each operation (in that level) determine # whether or not the tests are executed. Use 1 to enable a section, or # 0 to disable. # # ENABLING/DISABLING INDIVIDUAL OPERATION TESTS # Given that an operation's section override switch is set to 1 # (enabled), whether or not that operation will get tested is # determined by its local switch. For example, if the level-1v section # override is set to 1, and there is a 1 on the line marked "addv", # then the addv operation will be tested. Similarly, a 0 would cause # addv to not be tested. # # ENABLING ONLY SELECT OPERATIONS # If you would like to enable just a few (or even just one) operation # without adjusting any section overrides (or individual operation # switches), change the desired operation switch(es) to 2. This will # cause any operation that is not set to 2 to be disabled, regardless # of section override values. For example, setting the axpyv and gemv # operation switches to 2 will cause the test suite to test ONLY axpyv # and gemv, even if all other sections and operations are set to 1. # NOTE: As long as there is at least on operation switch set to 2, no # other operations will be tested. When you are done testing your # select operations, you should revert the operation switch(es) back # to 1. # # CHANGING PROBLEM SIZE/SHAPES TESTED # The problem sizes tested by an operation are determined by the # dimension specifiers on the line marked "dimensions: ". # If, for example, contains two dimension labels (e.g. # "m n"), then the line should begin with two dimension specifiers. # Dimension specifiers of -1 cause the corresponding dimension to be # bound to the problem size, which is determined by values set in # input.general. Positive values cause the corresponding dimension to # be fixed to that value and held constant. # # Examples of dimension specifiers (where the dimensions are m and n): # # -1 -1 Dimensions m and n grow with problem size (resulting in # square matrices). # -1 150 Dimension m grows with problem size and n is fixed at # 150. # -1 -2 Dimension m grows with problem size and n grows # proportional to half the problem size. # # CHANGING PARAMTER COMBINATIONS TESTED # The parameter combinations tested by an operation are determined by # the parameter specifier characters on the line marked "parameters: # ". If, for example, contains two # parameter labels (e.g. "transa conjx"), then the line should contain # two parameter specifier characters. The '?' specifier character # serves as a wildcard--it causes all possible values of that parameter # to be tested. A character such as 'n' or 't' causes only that value # to be tested. # # Examples of parameter specifiers (where the parameters are transa # and conjx): # # ?? All combinations of the transa and conjx parameters are # tested: nn, nc, tn, tc, cn, cc, hn, hc. # ?n conjx is fixed to "no conjugate" but transa is allowed # to vary: nn, tn, cn, hn. # hc Only the case where transa is "Hermitian-transpose" and # conjx is "conjugate" is tested. # # Here is a full list of the parameter types used by the various BLIS # operations along with their possible character encodings: # # side: l,r left, right # uplo: l,u lower, upper # trans: n,t,c,h no transpose, transpose, conjugate, Hermitian- # transpose (i.e. conjugate-transpose) # conj: n,c no conjugate, conjugate # diag: n,u non-unit diagonal, unit diagonal # # --- Section overrides ---------------------------------------------------- 1 # Utility 1 # Level-1v kernels 1 # Level-1m 1 # Level-1f kernels 1 # Level-2 1 # Level-3 micro-kernels 1 # Level-3 # --- Utility -------------------------------------------------------------- 1 # randv -1 # dimensions: m 1 # randm -1 -1 # dimensions: m n # --- Level-1v ------------------------------------------------------------- 1 # addv -1 # dimensions: m ? # parameters: conjx 1 # amaxv -1 # dimensions: m 1 # axpbyv -1 # dimensions: m ? # parameters: conjx 1 # axpyv -1 # dimensions: m ? # parameters: conjx 1 # copyv -1 # dimensions: m ? # parameters: conjx 1 # dotv -1 # dimensions: m ?? # parameters: conjx conjy 1 # dotxv -1 # dimensions: m ?? # parameters: conjx conjy 1 # normfv -1 # dimensions: m 1 # scalv -1 # dimensions: m ? # parameters: conjbeta 1 # scal2v -1 # dimensions: m ? # parameters: conjx 1 # setv -1 # dimensions: m 1 # subv -1 # dimensions: m ? # parameters: conjx 1 # xpbyv -1 # dimensions: m ? # parameters: conjx # --- Level-1m ------------------------------------------------------------- 1 # addm -1 -2 # dimensions: m n ? # parameters: transa 1 # axpym -1 -1 # dimensions: m n ? # parameters: transa 1 # copym -1 -2 # dimensions: m n ? # parameters: transa 1 # normfm -1 -2 # dimensions: m n 1 # scalm -1 -2 # dimensions: m n ? # parameters: conjbeta 1 # scal2m -1 -2 # dimensions: m n ? # parameters: transa 1 # setm -1 -2 # dimensions: m n 1 # subm -1 -2 # dimensions: m n ? # parameters: transa 1 # xpbym -1 -1 # dimensions: m n ? # parameters: transa # --- Level-1f kernels ----------------------------------------------------- 1 # axpy2v -1 # dimensions: m ?? # parameters: conjx conjy 1 # dotaxpyv -1 # dimensions: m ??? # parameters: conjxt conjx conjy 1 # axpyf -1 # dimensions: m ?? # parameters: conja conjx 1 # dotxf -1 # dimensions: m ?? # parameters: conjat conjx 1 # dotxaxpyf -1 # dimensions: m ???? # parameters: conjat conja conjw conjx # --- Level-2 -------------------------------------------------------------- 1 # gemv -1 -2 # dimensions: m n ?? # parameters: transa conjx 1 # ger -1 -2 # dimensions: m n ?? # parameters: conjx conjy 1 # hemv -1 # dimensions: m ??? # parameters: uploa conja conjx 1 # her -1 # dimensions: m ?? # parameters: uploc conjx 1 # her2 -1 # dimensions: m ??? # parameters: uploc conjx conjy 1 # symv -1 # dimensions: m ??? # parameters: uploa conja conjx 1 # syr -1 # dimensions: m ?? # parameters: uploc conjx 1 # syr2 -1 # dimensions: m ??? # parameters: uploc conjx conjy 1 # trmv -1 # dimensions: m ??? # parameters: uploa transa diaga 1 # trsv -1 # dimensions: m ??? # parameters: uploa transa diaga # --- Level-3 micro-kernels ------------------------------------------------ 1 # gemm -1 # dimensions: k 1 # trsm ? # parameters: uploa 1 # gemmtrsm -1 # dimensions: k ? # parameters: uploa # --- Level-3 -------------------------------------------------------------- 2 # gemm -1 -1 -1 # dimensions: m n k nn # parameters: transa transb 1 # hemm -1 -1 # dimensions: m n ???? # parameters: side uploa conja transb 1 # herk -1 -1 # dimensions: m k ?? # parameters: uploc transa 1 # her2k -1 -1 # dimensions: m k ??? # parameters: uploc transa transb 1 # symm -1 -1 # dimensions: m n ???? # parameters: side uploa conja transb 1 # syrk -1 -1 # dimensions: m k ?? # parameters: uploc transa 1 # syr2k -1 -1 # dimensions: m k ??? # parameters: uploc transa transb 1 # trmm -1 -1 # dimensions: m n ???? # parameters: side uploa transa diaga 1 # trmm3 -1 -1 # dimensions: m n ????n # parameters: side uploa transa diaga transb 1 # trsm -1 -1 # dimensions: m n ???? # parameters: side uploa transa diaga blis-0.6.1/testsuite/input.operations.salt000066400000000000000000000220451360743507500207230ustar00rootroot00000000000000# -------------------------------------------------------------------------- # # input.operations # BLIS test suite # # This file contains input values that control which BLIS operations are # tested as well as how those test runs are parameterized. We will now # describe how each section or line type may be edited. # # ENABLING/DISABLING ENTIRE SECTIONS # The values in the "Section overrides" section allow you to disable # all operations in a given "level". Enabling a level here by itself # does not enable every operation in that level; it simply means that # the individual switches for each operation (in that level) determine # whether or not the tests are executed. Use 1 to enable a section, or # 0 to disable. # # ENABLING/DISABLING INDIVIDUAL OPERATION TESTS # Given that an operation's section override switch is set to 1 # (enabled), whether or not that operation will get tested is # determined by its local switch. For example, if the level-1v section # override is set to 1, and there is a 1 on the line marked "addv", # then the addv operation will be tested. Similarly, a 0 would cause # addv to not be tested. # # ENABLING ONLY SELECT OPERATIONS # If you would like to enable just a few (or even just one) operation # without adjusting any section overrides (or individual operation # switches), change the desired operation switch(es) to 2. This will # cause any operation that is not set to 2 to be disabled, regardless # of section override values. For example, setting the axpyv and gemv # operation switches to 2 will cause the test suite to test ONLY axpyv # and gemv, even if all other sections and operations are set to 1. # NOTE: As long as there is at least on operation switch set to 2, no # other operations will be tested. When you are done testing your # select operations, you should revert the operation switch(es) back # to 1. # # CHANGING PROBLEM SIZE/SHAPES TESTED # The problem sizes tested by an operation are determined by the # dimension specifiers on the line marked "dimensions: ". # If, for example, contains two dimension labels (e.g. # "m n"), then the line should begin with two dimension specifiers. # Dimension specifiers of -1 cause the corresponding dimension to be # bound to the problem size, which is determined by values set in # input.general. Positive values cause the corresponding dimension to # be fixed to that value and held constant. # # Examples of dimension specifiers (where the dimensions are m and n): # # -1 -1 Dimensions m and n grow with problem size (resulting in # square matrices). # -1 150 Dimension m grows with problem size and n is fixed at # 150. # -1 -2 Dimension m grows with problem size and n grows # proportional to half the problem size. # # CHANGING PARAMTER COMBINATIONS TESTED # The parameter combinations tested by an operation are determined by # the parameter specifier characters on the line marked "parameters: # ". If, for example, contains two # parameter labels (e.g. "transa conjx"), then the line should contain # two parameter specifier characters. The '?' specifier character # serves as a wildcard--it causes all possible values of that parameter # to be tested. A character such as 'n' or 't' causes only that value # to be tested. # # Examples of parameter specifiers (where the parameters are transa # and conjx): # # ?? All combinations of the transa and conjx parameters are # tested: nn, nc, tn, tc, cn, cc, hn, hc. # ?n conjx is fixed to "no conjugate" but transa is allowed # to vary: nn, tn, cn, hn. # hc Only the case where transa is "Hermitian-transpose" and # conjx is "conjugate" is tested. # # Here is a full list of the parameter types used by the various BLIS # operations along with their possible character encodings: # # side: l,r left, right # uplo: l,u lower, upper # trans: n,t,c,h no transpose, transpose, conjugate, Hermitian- # transpose (i.e. conjugate-transpose) # conj: n,c no conjugate, conjugate # diag: n,u non-unit diagonal, unit diagonal # # --- Section overrides ---------------------------------------------------- 1 # Utility 1 # Level-1v kernels 1 # Level-1m 1 # Level-1f kernels 1 # Level-2 1 # Level-3 micro-kernels 1 # Level-3 # --- Utility -------------------------------------------------------------- 1 # randv -1 # dimensions: m 1 # randm -1 -1 # dimensions: m n # --- Level-1v ------------------------------------------------------------- 1 # addv -1 # dimensions: m ? # parameters: conjx 1 # amaxv -1 # dimensions: m 1 # axpbyv -1 # dimensions: m ? # parameters: conjx 1 # axpyv -1 # dimensions: m ? # parameters: conjx 1 # copyv -1 # dimensions: m ? # parameters: conjx 1 # dotv -1 # dimensions: m ?? # parameters: conjx conjy 1 # dotxv -1 # dimensions: m ?? # parameters: conjx conjy 1 # normfv -1 # dimensions: m 1 # scalv -1 # dimensions: m ? # parameters: conjbeta 1 # scal2v -1 # dimensions: m ? # parameters: conjx 1 # setv -1 # dimensions: m 1 # subv -1 # dimensions: m ? # parameters: conjx 1 # xpbyv -1 # dimensions: m ? # parameters: conjx # --- Level-1m ------------------------------------------------------------- 1 # addm -1 -2 # dimensions: m n ? # parameters: transa 1 # axpym -1 -1 # dimensions: m n ? # parameters: transa 1 # copym -1 -2 # dimensions: m n ? # parameters: transa 1 # normfm -1 -2 # dimensions: m n 1 # scalm -1 -2 # dimensions: m n ? # parameters: conjbeta 1 # scal2m -1 -2 # dimensions: m n ? # parameters: transa 1 # setm -1 -2 # dimensions: m n 1 # subm -1 -2 # dimensions: m n ? # parameters: transa 1 # xpbym -1 -1 # dimensions: m n ? # parameters: transa # --- Level-1f kernels ----------------------------------------------------- 1 # axpy2v -1 # dimensions: m ?? # parameters: conjx conjy 1 # dotaxpyv -1 # dimensions: m ??? # parameters: conjxt conjx conjy 1 # axpyf -1 # dimensions: m ?? # parameters: conja conjx 1 # dotxf -1 # dimensions: m ?? # parameters: conjat conjx 1 # dotxaxpyf -1 # dimensions: m ???? # parameters: conjat conja conjw conjx # --- Level-2 -------------------------------------------------------------- 1 # gemv -1 -2 # dimensions: m n ?? # parameters: transa conjx 1 # ger -1 -2 # dimensions: m n ?? # parameters: conjx conjy 1 # hemv -1 # dimensions: m ??? # parameters: uploa conja conjx 1 # her -1 # dimensions: m ?? # parameters: uploc conjx 1 # her2 -1 # dimensions: m ??? # parameters: uploc conjx conjy 1 # symv -1 # dimensions: m ??? # parameters: uploa conja conjx 1 # syr -1 # dimensions: m ?? # parameters: uploc conjx 1 # syr2 -1 # dimensions: m ??? # parameters: uploc conjx conjy 1 # trmv -1 # dimensions: m ??? # parameters: uploa transa diaga 1 # trsv -1 # dimensions: m ??? # parameters: uploa transa diaga # --- Level-3 micro-kernels ------------------------------------------------ 1 # gemm -1 # dimensions: k 1 # trsm ? # parameters: uploa 1 # gemmtrsm -1 # dimensions: k ? # parameters: uploa # --- Level-3 -------------------------------------------------------------- 1 # gemm -1 -1 -1 # dimensions: m n k nn # parameters: transa transb 1 # hemm -1 -1 # dimensions: m n ??nn # parameters: side uploa conja transb 1 # herk -1 -1 # dimensions: m k ?n # parameters: uploc transa 1 # her2k -1 -1 # dimensions: m k ?nn # parameters: uploc transa transb 1 # symm -1 -1 # dimensions: m n ??nn # parameters: side uploa conja transb 1 # syrk -1 -1 # dimensions: m k ?n # parameters: uploc transa 1 # syr2k -1 -1 # dimensions: m k ?nn # parameters: uploc transa transb 1 # trmm -1 -1 # dimensions: m n ??n? # parameters: side uploa transa diaga 0 # trmm3 -1 -1 # dimensions: m n ??n?n # parameters: side uploa transa diaga transb 1 # trsm -1 -1 # dimensions: m n ??n? # parameters: side uploa transa diaga blis-0.6.1/testsuite/obj/000077500000000000000000000000001360743507500152645ustar00rootroot00000000000000blis-0.6.1/testsuite/obj/.gitkeep000066400000000000000000000000001360743507500167030ustar00rootroot00000000000000blis-0.6.1/testsuite/old/000077500000000000000000000000001360743507500152705ustar00rootroot00000000000000blis-0.6.1/testsuite/old/jobscripts/000077500000000000000000000000001360743507500174525ustar00rootroot00000000000000blis-0.6.1/testsuite/old/jobscripts/cfig.out000066400000000000000000000136321360743507500211200ustar00rootroot00000000000000configure: detected Linux kernel version 4.14.0-115.6.1.el7a.ppc64le. configure: python interpeter search list is: python python3 python2. configure: using 'python' python interpreter. configure: found python version 2.7.5 (maj: 2, min: 7, rev: 5). configure: python 2.7.5 appears to be supported. configure: C compiler search list is: gcc clang cc. configure: using 'gcc' C compiler. configure: C++ compiler search list is: g++ clang++ c++. configure: using 'g++' C++ compiler (for sandbox only). configure: found gcc version 8.2.0 (maj: 8, min: 2, rev: 0). configure: checking for blacklisted configurations due to gcc 8.2.0. configure: found assembler ('as') version 2.27 (maj: 2, min: 27, rev: ). configure: checking for blacklisted configurations due to as 2.27. configure: warning: assembler ('as' 2.27) does not support 'bulldozer'; adding to blacklist. configure: warning: assembler ('as' 2.27) does not support 'sandybridge'; adding to blacklist. configure: warning: assembler ('as' 2.27) does not support 'haswell'; adding to blacklist. configure: warning: assembler ('as' 2.27) does not support 'piledriver'; adding to blacklist. configure: warning: assembler ('as' 2.27) does not support 'steamroller'; adding to blacklist. configure: warning: assembler ('as' 2.27) does not support 'excavator'; adding to blacklist. configure: warning: assembler ('as' 2.27) does not support 'skx'; adding to blacklist. configure: warning: assembler ('as' 2.27) does not support 'knl'; adding to blacklist. configure: configuration blacklist: configure: bulldozer sandybridge haswell piledriver steamroller excavator skx knl configure: reading configuration registry...done. configure: determining default version string. configure: found '.git' directory; assuming git clone. configure: executing: git describe --tags. configure: git returned an error: 'Unknown option: -C usage: git [--version] [--help] [-c name=value] [--exec-path[=]] [--html-path] [--man-path] [--info-path] [-p|--paginate|--no-pager] [--no-replace-objects] [--bare] [--git-dir=] [--work-tree=] [--namespace=] []'. configure: using string from unmodified version file. configure: starting configuration of BLIS 0.6.0. configure: configuring with official version string. configure: found shared library .so version '2.0.0'. configure: .so major version: 2 configure: .so minor.build version: 0.0 configure: manual configuration requested; configuring with 'power9'. configure: checking configuration against contents of 'config_registry'. configure: configuration 'power9' is registered. configure: 'power9' is defined as having the following sub-configurations: configure: power9 configure: which collectively require the following kernels: configure: power9 configure: checking sub-configurations: configure: 'power9' is registered...and exists. configure: checking sub-configurations' requisite kernels: configure: 'power9' kernels...exist. configure: no install prefix option given; defaulting to '/usr/local'. configure: no install exec_prefix option given; defaulting to PREFIX. configure: no install libdir option given; defaulting to EXECPREFIX/lib. configure: no install includedir option given; defaulting to PREFIX/include. configure: no install sharedir option given; defaulting to PREFIX/share. configure: final installation directories: configure: prefix: /usr/local configure: exec_prefix: ${prefix} configure: libdir: ${exec_prefix}/lib configure: includedir: ${prefix}/include configure: sharedir: ${prefix}/share configure: NOTE: the variables above can be overridden when running make. configure: no preset CFLAGS detected. configure: no preset LDFLAGS detected. configure: debug symbols disabled. configure: disabling verbose make output. (enable with 'make V=1'.) configure: disabling ARG_MAX hack. configure: building BLIS as both static and shared libraries. configure: exporting only public symbols within shared library. configure: threading is disabled. configure: requesting slab threading in jr and ir loops. configure: internal memory pools for packing blocks are enabled. configure: internal memory pools for small blocks are enabled. configure: memory tracing output is disabled. configure: libmemkind not found; disabling. configure: compiler appears to not support #pragma omp simd. configure: the BLAS compatibility layer is enabled. configure: the CBLAS compatibility layer is disabled. configure: mixed datatype support is enabled. configure: mixed datatype optimizations requiring extra memory are enabled. configure: small matrix handling is enabled. configure: the BLIS API integer size is automatically determined. configure: the BLAS/CBLAS API integer size is 32-bit. configure: configuring for conventional gemm implementation. configure: creating ./config.mk from ./build/config.mk.in configure: creating ./bli_config.h from ./build/bli_config.h.in configure: creating ./obj/power9 configure: creating ./obj/power9/config/power9 configure: creating ./obj/power9/kernels/power9 configure: creating ./obj/power9/ref_kernels/power9 configure: creating ./obj/power9/frame configure: creating ./obj/power9/blastest configure: creating ./obj/power9/testsuite configure: creating ./lib/power9 configure: creating ./include/power9 configure: mirroring ./config/power9 to ./obj/power9/config/power9 configure: mirroring ./kernels/power9 to ./obj/power9/kernels/power9 configure: mirroring ./ref_kernels to ./obj/power9/ref_kernels configure: mirroring ./ref_kernels to ./obj/power9/ref_kernels/power9 configure: mirroring ./frame to ./obj/power9/frame configure: creating makefile fragments in ./obj/power9/config/power9 configure: creating makefile fragments in ./obj/power9/kernels/power9 configure: creating makefile fragments in ./obj/power9/ref_kernels configure: creating makefile fragments in ./obj/power9/frame configure: configured to build within top-level directory of source distribution. CONFIGURE DONE blis-0.6.1/testsuite/old/jobscripts/cfig.sh000077500000000000000000000001001360743507500207100ustar00rootroot00000000000000#!/bin/bash cd ~/blis ./configure power9 echo "CONFIGURE DONE" blis-0.6.1/testsuite/old/jobscripts/jb-cfig.sh000066400000000000000000000007221360743507500213100ustar00rootroot00000000000000#!/bin/bash # execute in the general partition #SBATCH --partition=general # execute with 40 processes/tasks #SBATCH --ntasks=1 # maximum time is 30 minutes #SBATCH --time=00:30:00 # job name is my_job #SBATCH --job-name=blis # send email for status updates #SBATCH --mail-type=ALL,TIME_LIMIT #SBATCH --mail-user=ntukanov # change default output file name #SBATCH --output=cfig.out # load environment module load gcc/8.2 # application execution srun cfig.sh blis-0.6.1/testsuite/old/jobscripts/jb-mk.sh000066400000000000000000000007161360743507500210120ustar00rootroot00000000000000#!/bin/bash # execute in the general partition #SBATCH --partition=general # execute with 40 processes/tasks #SBATCH --ntasks=1 # maximum time is 30 minutes #SBATCH --time=00:30:00 # job name is my_job #SBATCH --job-name=blis # send email for status updates #SBATCH --mail-type=ALL,TIME_LIMIT #SBATCH --mail-user=ntukanov # change default output file name #SBATCH --output=mk.out # load environment module load gcc/8.2 # application execution srun mk.sh blis-0.6.1/testsuite/old/jobscripts/jb-runtest.sh000066400000000000000000000007301360743507500221030ustar00rootroot00000000000000#!/bin/bash # execute in the general partition #SBATCH --partition=general # execute with 40 processes/tasks #SBATCH --ntasks=1 # maximum time is 30 minutes #SBATCH --time=00:30:00 # job name is my_job #SBATCH --job-name=blis # send email for status updates #SBATCH --mail-type=ALL,TIME_LIMIT #SBATCH --mail-user=ntukanov # change default output file name #SBATCH --output=runtest.out # load environment module load gcc/8.2 # application execution srun runtest.sh blis-0.6.1/testsuite/old/jobscripts/mk.out000066400000000000000000000007071360743507500206160ustar00rootroot00000000000000Removing flattened header files from include/power9 Removing object files from ./obj/power9 srun: Job step aborted: Waiting up to 32 seconds for job step to finish. srun: got SIGCONT slurmstepd: error: *** JOB 1155 ON lookout00 CANCELLED AT 2019-06-10T17:29:07 *** srun: forcing job termination slurmstepd: error: *** STEP 1155.0 ON lookout00 CANCELLED AT 2019-06-10T17:29:07 *** make: *** [cleanlib] Terminated srun: error: lookout00: task 0: Terminated blis-0.6.1/testsuite/old/jobscripts/mk.sh000077500000000000000000000000711360743507500204160ustar00rootroot00000000000000#!/bin/bash cd ~/blis make clean make echo "MAKE DONE" blis-0.6.1/testsuite/old/jobscripts/runtest.sh000077500000000000000000000002021360743507500215070ustar00rootroot00000000000000#!/bin/bash cd ~/blis/testsuite rm -rf test_libblis.out make clean make -j ./test_libblis.x > test_libblis.out echo "TEST DONE" blis-0.6.1/testsuite/src/000077500000000000000000000000001360743507500153015ustar00rootroot00000000000000blis-0.6.1/testsuite/src/test_addm.c000066400000000000000000000203021360743507500174060ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "test_libblis.h" // Static variables. static char* op_str = "addm"; static char* o_types = "mm"; // x y static char* p_types = "h"; // transx static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-13, 1e-14 }, // warn, pass for d { 1e-13, 1e-14 } }; // warn, pass for z // Local prototypes. void libblis_test_addm_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); void libblis_test_addm_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ); void libblis_test_addm_impl ( iface_t iface, obj_t* x, obj_t* y ); void libblis_test_addm_check ( test_params_t* params, obj_t* alpha, obj_t* beta, obj_t* x, obj_t* y, double* resid ); void libblis_test_addm_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { libblis_test_setm( tdata, params, &(op->ops->setm) ); libblis_test_normfm( tdata, params, &(op->ops->normfm) ); } void libblis_test_addm ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || libblis_test_l1m_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_addm_deps( tdata, params, op ); // Execute the test driver for each implementation requested. //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( tdata, params, op, BLIS_TEST_SEQ_FRONT_END, op_str, p_types, o_types, thresh, libblis_test_addm_experiment ); } } void libblis_test_addm_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { double time_min = DBL_MAX; double time; num_t datatype; dim_t m, n; trans_t transx; obj_t alpha, beta; obj_t x, y; // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); // Map the dimension specifier to actual dimensions. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); n = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_trans( pc_str[0], &transx ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha ); bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, transx, sc_str[0], m, n, &x ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[1], m, n, &y ); // Initialize alpha and beta. bli_setsc( -1.0, -1.0, &alpha ); bli_setsc( 3.0, 3.0, &beta ); // Randomize x. bli_setm( &alpha, &x ); bli_setm( &beta, &y ); // Apply the parameters. bli_obj_set_conjtrans( transx, &x ); // Disable repeats since bli_copym() is not yet tested. //for ( i = 0; i < n_repeats; ++i ) { time = bli_clock(); libblis_test_addm_impl( iface, &x, &y ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 1.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &x ) ) *perf *= 2.0; // Perform checks. libblis_test_addm_check( params, &alpha, &beta, &x, &y, resid ); // Zero out performance and residual if output matrix is empty. libblis_test_check_empty_problem( &y, perf, resid ); // Free the test objects. bli_obj_free( &x ); bli_obj_free( &y ); } void libblis_test_addm_impl ( iface_t iface, obj_t* x, obj_t* y ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: bli_addm( x, y ); break; default: libblis_test_printf_error( "Invalid interface type.\n" ); } } void libblis_test_addm_check ( test_params_t* params, obj_t* alpha, obj_t* beta, obj_t* x, obj_t* y, double* resid ) { num_t dt = bli_obj_dt( y ); num_t dt_real = bli_obj_dt_proj_to_real( y ); dim_t m = bli_obj_length( y ); dim_t n = bli_obj_width( y ); conj_t conjx = bli_obj_conj_status( x ); obj_t aplusb; obj_t alpha_conj; obj_t norm_r, m_r, n_r, temp_r; double junk; // // Pre-conditions: // - x is set to alpha. // - y_orig is set to beta. // Note: // - alpha and beta should have non-zero imaginary components in the // complex cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // y := y_orig + conjx(x) // // is functioning correctly if // // normfm(y) - sqrt( absqsc( beta + conjx(alpha) ) * m * n ) // // is negligible. // bli_obj_scalar_init_detached( dt, &aplusb ); bli_obj_scalar_init_detached( dt_real, &temp_r ); bli_obj_scalar_init_detached( dt_real, &norm_r ); bli_obj_scalar_init_detached( dt_real, &m_r ); bli_obj_scalar_init_detached( dt_real, &n_r ); bli_obj_scalar_init_detached_copy_of( dt, conjx, alpha, &alpha_conj ); bli_normfm( y, &norm_r ); bli_copysc( beta, &aplusb ); bli_addsc( &alpha_conj, &aplusb ); bli_setsc( ( double )m, 0.0, &m_r ); bli_setsc( ( double )n, 0.0, &n_r ); bli_absqsc( &aplusb, &temp_r ); bli_mulsc( &m_r, &temp_r ); bli_mulsc( &n_r, &temp_r ); bli_sqrtsc( &temp_r, &temp_r ); bli_subsc( &temp_r, &norm_r ); bli_getsc( &norm_r, resid, &junk ); } blis-0.6.1/testsuite/src/test_addm.h000066400000000000000000000034771360743507500174310ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void libblis_test_addm ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); blis-0.6.1/testsuite/src/test_addv.c000066400000000000000000000176321360743507500174330ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "test_libblis.h" // Static variables. static char* op_str = "addv"; static char* o_types = "vv"; // x y static char* p_types = "c"; // conjx static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-13, 1e-14 }, // warn, pass for d { 1e-13, 1e-14 } }; // warn, pass for z // Local prototypes. void libblis_test_addv_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); void libblis_test_addv_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ); void libblis_test_addv_impl ( iface_t iface, obj_t* x, obj_t* y ); void libblis_test_addv_check ( test_params_t* params, obj_t* alpha, obj_t* beta, obj_t* x, obj_t* y, double* resid ); void libblis_test_addv_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { libblis_test_setv( tdata, params, &(op->ops->setv) ); libblis_test_normfv( tdata, params, &(op->ops->normfv) ); } void libblis_test_addv ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || libblis_test_l1v_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_addv_deps( tdata, params, op ); // Execute the test driver for each implementation requested. //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( tdata, params, op, BLIS_TEST_SEQ_FRONT_END, op_str, p_types, o_types, thresh, libblis_test_addv_experiment ); } } void libblis_test_addv_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { double time_min = DBL_MAX; double time; num_t datatype; dim_t m; conj_t conjx; obj_t alpha, beta; obj_t x, y; // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); // Map the dimension specifier to an actual dimension. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_conj( pc_str[0], &conjx ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha ); bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); libblis_test_vobj_create( params, datatype, sc_str[1], m, &y ); // Initialize alpha and beta. bli_setsc( -1.0, -1.0, &alpha ); bli_setsc( 3.0, 3.0, &beta ); // Set x and y to alpha and beta, respectively. bli_setv( &alpha, &x ); bli_setv( &beta, &y ); // Apply the parameters. bli_obj_set_conj( conjx, &x ); // Disable repeats since bli_copyv() is not yet tested. //for ( i = 0; i < n_repeats; ++i ) { time = bli_clock(); libblis_test_addv_impl( iface, &x, &y ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 1.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &x ) ) *perf *= 2.0; // Perform checks. libblis_test_addv_check( params, &alpha, &beta, &x, &y, resid ); // Zero out performance and residual if output vector is empty. libblis_test_check_empty_problem( &y, perf, resid ); // Free the test objects. bli_obj_free( &x ); bli_obj_free( &y ); } void libblis_test_addv_impl ( iface_t iface, obj_t* x, obj_t* y ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: bli_addv( x, y ); break; default: libblis_test_printf_error( "Invalid interface type.\n" ); } } void libblis_test_addv_check ( test_params_t* params, obj_t* alpha, obj_t* beta, obj_t* x, obj_t* y, double* resid ) { num_t dt = bli_obj_dt( x ); num_t dt_real = bli_obj_dt_proj_to_real( x ); dim_t m = bli_obj_vector_dim( x ); conj_t conjx = bli_obj_conj_status( x ); obj_t aplusb; obj_t alpha_conj; obj_t norm_r, m_r, temp_r; double junk; // // Pre-conditions: // - x is set to alpha. // - y_orig is set to beta. // Note: // - alpha and beta should have non-zero imaginary components in the // complex cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // y := y_orig + conjx(x) // // is functioning correctly if // // normfv(y) - sqrt( absqsc( beta + conjx(alpha) ) * m ) // // is negligible. // bli_obj_scalar_init_detached( dt, &aplusb ); bli_obj_scalar_init_detached( dt_real, &temp_r ); bli_obj_scalar_init_detached( dt_real, &norm_r ); bli_obj_scalar_init_detached( dt_real, &m_r ); bli_obj_scalar_init_detached_copy_of( dt, conjx, alpha, &alpha_conj ); bli_normfv( y, &norm_r ); bli_copysc( beta, &aplusb ); bli_addsc( &alpha_conj, &aplusb ); bli_setsc( ( double )m, 0.0, &m_r ); bli_absqsc( &aplusb, &temp_r ); bli_mulsc( &m_r, &temp_r ); bli_sqrtsc( &temp_r, &temp_r ); bli_subsc( &temp_r, &norm_r ); bli_getsc( &norm_r, resid, &junk ); } blis-0.6.1/testsuite/src/test_addv.h000066400000000000000000000034771360743507500174420ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void libblis_test_addv ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); blis-0.6.1/testsuite/src/test_amaxv.c000066400000000000000000000271321360743507500176250ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "test_libblis.h" // Static variables. static char* op_str = "amaxv"; static char* o_types = "v"; // x static char* p_types = ""; // (no parameters) static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-13, 1e-14 }, // warn, pass for d { 1e-13, 1e-14 } }; // warn, pass for z // Local prototypes. void libblis_test_amaxv_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); void libblis_test_amaxv_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ); void libblis_test_amaxv_impl ( iface_t iface, obj_t* x, obj_t* index ); void libblis_test_amaxv_check ( test_params_t* params, obj_t* x, obj_t* index, double* resid ); void bli_amaxv_test ( obj_t* x, obj_t* index ); void libblis_test_amaxv_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { libblis_test_randv( tdata, params, &(op->ops->randv) ); } void libblis_test_amaxv ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || libblis_test_l1v_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_amaxv_deps( tdata, params, op ); // Execute the test driver for each implementation requested. //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( tdata, params, op, BLIS_TEST_SEQ_FRONT_END, op_str, p_types, o_types, thresh, libblis_test_amaxv_experiment ); } } void libblis_test_amaxv_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; num_t datatype; dim_t m; obj_t x; obj_t index; // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); // Map the dimension specifier to an actual dimension. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); // Map parameter characters to BLIS constants. // Create test scalars. bli_obj_scalar_init_detached( BLIS_INT, &index ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); // Randomize x. libblis_test_vobj_randomize( params, FALSE, &x ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { time = bli_clock(); libblis_test_amaxv_impl( iface, &x, &index ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 1.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &x ) ) *perf *= 2.0; // Perform checks. libblis_test_amaxv_check( params, &x, &index, resid ); // Zero out performance and residual if input vector is empty. libblis_test_check_empty_problem( &x, perf, resid ); // Free the test objects. bli_obj_free( &x ); } void libblis_test_amaxv_impl ( iface_t iface, obj_t* x, obj_t* index ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: bli_amaxv( x, index ); break; default: libblis_test_printf_error( "Invalid interface type.\n" ); } } void libblis_test_amaxv_check ( test_params_t* params, obj_t* x, obj_t* index, double* resid ) { obj_t index_test; obj_t chi_i; obj_t chi_i_test; dim_t i; dim_t i_test; double i_d, junk; double i_d_test; // // Pre-conditions: // - x is randomized. // // Under these conditions, we assume that the implementation for // // index := amaxv( x ) // // is functioning correctly if // // x[ index ] = max( x ) // // where max() is implemented via the bli_?amaxv_test() function. // // The following two calls have already been made by the caller. That // is, the index object has already been created and the library's // amaxv implementation has already been tested. //bli_obj_scalar_init_detached( BLIS_INT, &index ); //bli_amaxv( x, &index ); bli_getsc( index, &i_d, &junk ); i = i_d; bli_acquire_vi( i, x, &chi_i ); bli_obj_scalar_init_detached( BLIS_INT, &index_test ); bli_amaxv_test( x, &index_test ); bli_getsc( &index_test, &i_d_test, &junk ); i_test = i_d_test; bli_acquire_vi( i_test, x, &chi_i_test ); // Verify that the values referenced by index and index_test are equal. if ( bli_obj_equals( &chi_i, &chi_i_test ) ) *resid = 0.0; else *resid = 1.0; } // ----------------------------------------------------------------------------- // // Prototype BLAS-like interfaces with typed operands for a local amaxv test // operation // #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ ( \ dim_t n, \ ctype* restrict x, inc_t incx, \ dim_t* restrict index \ ); \ INSERT_GENTPROT_BASIC0( amaxv_test ) // // Prototype function pointer query interface. // #undef GENPROT #define GENPROT( tname, opname ) \ \ PASTECH(tname,_vft) \ PASTEMAC(opname,_qfp)( num_t dt ); GENPROT( amaxv, amaxv_test ) // // Define function pointer query interfaces. // #undef GENFRONT #define GENFRONT( tname, opname ) \ \ GENARRAY_FPA( PASTECH(tname,_vft), \ opname ); \ \ PASTECH(tname,_vft) \ PASTEMAC(opname,_qfp)( num_t dt ) \ { \ return PASTECH(opname,_fpa)[ dt ]; \ } GENFRONT( amaxv, amaxv_test ) // // Define object-based interface for a local amaxv test operation. // #undef GENFRONT #define GENFRONT( tname, opname ) \ \ void PASTEMAC0(opname) \ ( \ obj_t* x, \ obj_t* index \ ) \ { \ num_t dt = bli_obj_dt( x ); \ \ dim_t n = bli_obj_vector_dim( x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \ inc_t incx = bli_obj_vector_inc( x ); \ \ void* buf_index = bli_obj_buffer_at_off( index ); \ \ /* FGVZ: Disabling this code since bli_amaxv_check() is supposed to be a non-public API function, and therefore unavailable unless all symbols are scheduled to be exported at configure-time (which is not currently the default behavior). if ( bli_error_checking_is_enabled() ) \ bli_amaxv_check( x, index ); \ */ \ \ /* Query a type-specific function pointer, except one that uses void* for function arguments instead of typed pointers. */ \ PASTECH(tname,_vft) f = \ PASTEMAC(opname,_qfp)( dt ); \ \ f \ ( \ n, \ buf_x, incx, \ buf_index \ ); \ } GENFRONT( amaxv, amaxv_test ) // // Define BLAS-like interfaces with typed operands for a local amaxv test // operation. // NOTE: This is based on a simplified version of the bli_?amaxv_ref() // reference kernel. // #undef GENTFUNCR #define GENTFUNCR( ctype, ctype_r, ch, chr, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ dim_t n, \ ctype* x, inc_t incx, \ dim_t* index \ ) \ { \ ctype_r* minus_one = PASTEMAC(chr,m1); \ dim_t* zero_i = PASTEMAC(i,0); \ \ ctype_r chi1_r; \ ctype_r chi1_i; \ ctype_r abs_chi1; \ ctype_r abs_chi1_max; \ dim_t index_l; \ dim_t i; \ \ /* If the vector length is zero, return early. This directly emulates the behavior of netlib BLAS's i?amax() routines. */ \ if ( bli_zero_dim1( n ) ) \ { \ PASTEMAC(i,copys)( *zero_i, *index ); \ return; \ } \ \ /* Initialize the index of the maximum absolute value to zero. */ \ PASTEMAC(i,copys)( *zero_i, index_l ); \ \ /* Initialize the maximum absolute value search candidate with -1, which is guaranteed to be less than all values we will compute. */ \ PASTEMAC(chr,copys)( *minus_one, abs_chi1_max ); \ \ { \ for ( i = 0; i < n; ++i ) \ { \ ctype* chi1 = x + (i )*incx; \ \ /* Get the real and imaginary components of chi1. */ \ PASTEMAC2(ch,chr,gets)( *chi1, chi1_r, chi1_i ); \ \ /* Replace chi1_r and chi1_i with their absolute values. */ \ PASTEMAC(chr,abval2s)( chi1_r, chi1_r ); \ PASTEMAC(chr,abval2s)( chi1_i, chi1_i ); \ \ /* Add the real and imaginary absolute values together. */ \ PASTEMAC(chr,set0s)( abs_chi1 ); \ PASTEMAC(chr,adds)( chi1_r, abs_chi1 ); \ PASTEMAC(chr,adds)( chi1_i, abs_chi1 ); \ \ /* If the absolute value of the current element exceeds that of the previous largest, save it and its index. If NaN is encountered, then treat it the same as if it were a valid value that was smaller than any previously seen. This behavior mimics that of LAPACK's ?lange(). */ \ if ( abs_chi1_max < abs_chi1 || bli_isnan( abs_chi1 ) ) \ { \ abs_chi1_max = abs_chi1; \ index_l = i; \ } \ } \ } \ \ /* Store the final index to the output variable. */ \ PASTEMAC(i,copys)( index_l, *index ); \ } INSERT_GENTFUNCR_BASIC0( amaxv_test ) blis-0.6.1/testsuite/src/test_amaxv.h000066400000000000000000000035001360743507500176230ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void libblis_test_amaxv ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); blis-0.6.1/testsuite/src/test_axpbyv.c000066400000000000000000000213251360743507500200200ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "test_libblis.h" // Static variables. static char* op_str = "axpbyv"; static char* o_types = "vv"; // x y static char* p_types = "c"; // conjx static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-13, 1e-14 }, // warn, pass for d { 1e-13, 1e-14 } }; // warn, pass for z // Local prototypes. void libblis_test_axpbyv_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); void libblis_test_axpbyv_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ); void libblis_test_axpbyv_impl ( iface_t iface, obj_t* alpha, obj_t* x, obj_t* beta, obj_t* y ); void libblis_test_axpbyv_check ( test_params_t* params, obj_t* alpha, obj_t* x, obj_t* beta, obj_t* y, obj_t* y_orig, double* resid ); void libblis_test_axpbyv_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { libblis_test_randv( tdata, params, &(op->ops->randv) ); libblis_test_normfv( tdata, params, &(op->ops->normfv) ); libblis_test_addv( tdata, params, &(op->ops->addv) ); libblis_test_axpyv( tdata, params, &(op->ops->axpyv) ); libblis_test_subv( tdata, params, &(op->ops->subv) ); libblis_test_copyv( tdata, params, &(op->ops->copyv) ); libblis_test_scalv( tdata, params, &(op->ops->scalv) ); libblis_test_scal2v( tdata, params, &(op->ops->scal2v) ); libblis_test_xpbyv( tdata, params, &(op->ops->xpbyv) ); } void libblis_test_axpbyv ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || libblis_test_l1v_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_axpbyv_deps( tdata, params, op ); // Execute the test driver for each implementation requested. //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( tdata, params, op, BLIS_TEST_SEQ_FRONT_END, op_str, p_types, o_types, thresh, libblis_test_axpbyv_experiment ); } } void libblis_test_axpbyv_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; num_t datatype; dim_t m; conj_t conjx; obj_t alpha, beta, x, y; obj_t y_save; // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); // Map the dimension specifier to an actual dimension. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_conj( pc_str[0], &conjx ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha ); bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); libblis_test_vobj_create( params, datatype, sc_str[1], m, &y ); libblis_test_vobj_create( params, datatype, sc_str[1], m, &y_save ); // Set alpha. //bli_setsc( sqrt(2.0)/2.0, sqrt(2.0)/2.0, &alpha ); //bli_copysc( &BLIS_TWO, &alpha ); if ( bli_obj_is_real( &y ) ) bli_setsc( -2.0, 0.0, &alpha ); else bli_setsc( 0.0, -2.0, &alpha ); bli_setsc( -1.0, 0.0, &beta ); // Randomize x and y, and save y. libblis_test_vobj_randomize( params, FALSE, &x ); libblis_test_vobj_randomize( params, FALSE, &y ); bli_copyv( &y, &y_save ); // Apply the parameters. bli_obj_set_conj( conjx, &x ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copyv( &y_save, &y ); time = bli_clock(); libblis_test_axpbyv_impl( iface, &alpha, &x, &beta, &y ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 3.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &y ) ) *perf *= 14.0 / 3.0; // Perform checks. libblis_test_axpbyv_check( params, &alpha, &x, &beta, &y, &y_save, resid ); // Zero out performance and residual if output vector is empty. libblis_test_check_empty_problem( &y, perf, resid ); // Free the test objects. bli_obj_free( &x ); bli_obj_free( &y ); bli_obj_free( &y_save ); } void libblis_test_axpbyv_impl ( iface_t iface, obj_t* alpha, obj_t* x, obj_t* beta, obj_t* y ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: bli_axpbyv( alpha, x, beta, y ); break; default: libblis_test_printf_error( "Invalid interface type.\n" ); } } void libblis_test_axpbyv_check ( test_params_t* params, obj_t* alpha, obj_t* x, obj_t* beta, obj_t* y, obj_t* y_orig, double* resid ) { num_t dt = bli_obj_dt( y ); num_t dt_real = bli_obj_dt_proj_to_real( y ); dim_t m = bli_obj_vector_dim( y ); obj_t x_temp, y_temp; obj_t norm; double junk; // // Pre-conditions: // - x is randomized. // - y_orig is randomized. // Note: // - alpha should have a non-zero imaginary component in the complex // cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // y := beta * y_orig + alpha * conjx(x) // // is functioning correctly if // // normfv( y - ( beta * y_orig + alpha * conjx(x) ) ) // // is negligible. // bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m, 1, 0, 0, &x_temp ); bli_obj_create( dt, m, 1, 0, 0, &y_temp ); bli_copyv( x, &x_temp ); bli_copyv( y_orig, &y_temp ); bli_scalv( alpha, &x_temp ); bli_scalv( beta, &y_temp ); bli_addv( &x_temp, &y_temp ); bli_subv( &y_temp, y ); bli_normfv( y, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &x_temp ); bli_obj_free( &y_temp ); } blis-0.6.1/testsuite/src/test_axpbyv.h000066400000000000000000000035011360743507500200210ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void libblis_test_axpbyv ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); blis-0.6.1/testsuite/src/test_axpy2v.c000066400000000000000000000224111360743507500177350ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "test_libblis.h" // Static variables. static char* op_str = "axpy2v"; static char* o_types = "vvv"; // x y z static char* p_types = "cc"; // conjx conjy static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-13, 1e-14 }, // warn, pass for d { 1e-13, 1e-14 } }; // warn, pass for z // Local prototypes. void libblis_test_axpy2v_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); void libblis_test_axpy2v_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ); void libblis_test_axpy2v_impl ( iface_t iface, obj_t* alpha1, obj_t* alpha2, obj_t* x, obj_t* y, obj_t* z, cntx_t* cntx ); void libblis_test_axpy2v_check ( test_params_t* params, obj_t* alpha1, obj_t* alpha2, obj_t* x, obj_t* y, obj_t* z, obj_t* z_orig, double* resid ); void libblis_test_axpy2v_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { libblis_test_randv( tdata, params, &(op->ops->randv) ); libblis_test_normfv( tdata, params, &(op->ops->normfv) ); libblis_test_addv( tdata, params, &(op->ops->addv) ); libblis_test_subv( tdata, params, &(op->ops->subv) ); libblis_test_copyv( tdata, params, &(op->ops->copyv) ); libblis_test_scalv( tdata, params, &(op->ops->scalv) ); } void libblis_test_axpy2v ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || libblis_test_l1f_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_axpy2v_deps( tdata, params, op ); // Execute the test driver for each implementation requested. //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( tdata, params, op, BLIS_TEST_SEQ_FRONT_END, op_str, p_types, o_types, thresh, libblis_test_axpy2v_experiment ); } } void libblis_test_axpy2v_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; num_t datatype; dim_t m; conj_t conjx, conjy; obj_t alpha1, alpha2, x, y, z; obj_t z_save; cntx_t* cntx; // Query a context. cntx = bli_gks_query_cntx(); // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); // Map the dimension specifier to an actual dimension. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_conj( pc_str[0], &conjx ); bli_param_map_char_to_blis_conj( pc_str[1], &conjy ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha1 ); bli_obj_scalar_init_detached( datatype, &alpha2 ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); libblis_test_vobj_create( params, datatype, sc_str[1], m, &y ); libblis_test_vobj_create( params, datatype, sc_str[2], m, &z ); libblis_test_vobj_create( params, datatype, sc_str[2], m, &z_save ); // Set alpha. if ( bli_obj_is_real( &z ) ) { bli_setsc( -1.0, 0.0, &alpha1 ); bli_setsc( -0.9, 0.0, &alpha2 ); } else { bli_setsc( 0.0, -1.0, &alpha1 ); bli_setsc( 0.0, -0.9, &alpha2 ); } // Randomize x and y, and save y. libblis_test_vobj_randomize( params, TRUE, &x ); libblis_test_vobj_randomize( params, TRUE, &y ); libblis_test_vobj_randomize( params, TRUE, &z ); bli_copyv( &z, &z_save ); // Apply the parameters. bli_obj_set_conj( conjx, &x ); bli_obj_set_conj( conjy, &y ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copyv( &z_save, &z ); time = bli_clock(); libblis_test_axpy2v_impl( iface, &alpha1, &alpha2, &x, &y, &z, cntx ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 2.0 * m + 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &z ) ) *perf *= 4.0; // Perform checks. libblis_test_axpy2v_check( params, &alpha1, &alpha2, &x, &y, &z, &z_save, resid ); // Zero out performance and residual if output vector is empty. libblis_test_check_empty_problem( &z, perf, resid ); // Free the test objects. bli_obj_free( &x ); bli_obj_free( &y ); bli_obj_free( &z ); bli_obj_free( &z_save ); } void libblis_test_axpy2v_impl ( iface_t iface, obj_t* alpha1, obj_t* alpha2, obj_t* x, obj_t* y, obj_t* z, cntx_t* cntx ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: bli_axpy2v_ex( alpha1, alpha2, x, y, z, cntx, NULL ); break; default: libblis_test_printf_error( "Invalid interface type.\n" ); } } void libblis_test_axpy2v_check ( test_params_t* params, obj_t* alpha1, obj_t* alpha2, obj_t* x, obj_t* y, obj_t* z, obj_t* z_orig, double* resid ) { num_t dt = bli_obj_dt( z ); num_t dt_real = bli_obj_dt_proj_to_real( z ); dim_t m = bli_obj_vector_dim( z ); obj_t x_temp, y_temp, z_temp; obj_t norm; double junk; // // Pre-conditions: // - x is randomized. // - y is randomized. // - z_orig is randomized. // Note: // - alpha1, alpha2 should have a non-zero imaginary component in the // complex cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // z := z_orig + alpha1 * conjx(x) + alpha2 * conjy(y) // // is functioning correctly if // // normfv( z - v ) // // is negligible, where v contains z as computed by two calls to axpyv. // bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m, 1, 0, 0, &x_temp ); bli_obj_create( dt, m, 1, 0, 0, &y_temp ); bli_obj_create( dt, m, 1, 0, 0, &z_temp ); bli_copyv( x, &x_temp ); bli_copyv( y, &y_temp ); bli_copyv( z_orig, &z_temp ); bli_scalv( alpha1, &x_temp ); bli_scalv( alpha2, &y_temp ); bli_addv( &x_temp, &z_temp ); bli_addv( &y_temp, &z_temp ); bli_subv( &z_temp, z ); bli_normfv( z, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &x_temp ); bli_obj_free( &y_temp ); bli_obj_free( &z_temp ); } blis-0.6.1/testsuite/src/test_axpy2v.h000066400000000000000000000035011360743507500177410ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void libblis_test_axpy2v ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); blis-0.6.1/testsuite/src/test_axpyf.c000066400000000000000000000225001360743507500176320ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "test_libblis.h" // Static variables. static char* op_str = "axpyf"; static char* o_types = "mvv"; // A x y static char* p_types = "cc"; // conja conjx static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-13, 1e-14 }, // warn, pass for d { 1e-13, 1e-14 } }; // warn, pass for z // Local prototypes. void libblis_test_axpyf_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); void libblis_test_axpyf_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ); void libblis_test_axpyf_impl ( iface_t iface, obj_t* alpha, obj_t* a, obj_t* x, obj_t* y, cntx_t* cntx ); void libblis_test_axpyf_check ( test_params_t* params, obj_t* alpha, obj_t* a, obj_t* x, obj_t* y, obj_t* y_orig, double* resid ); void libblis_test_axpyf_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { libblis_test_randv( tdata, params, &(op->ops->randv) ); libblis_test_randm( tdata, params, &(op->ops->randm) ); libblis_test_normfv( tdata, params, &(op->ops->normfv) ); libblis_test_subv( tdata, params, &(op->ops->subv) ); libblis_test_copyv( tdata, params, &(op->ops->copyv) ); libblis_test_axpyv( tdata, params, &(op->ops->axpyv) ); } void libblis_test_axpyf ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || libblis_test_l1f_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_axpyf_deps( tdata, params, op ); // Execute the test driver for each implementation requested. //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( tdata, params, op, BLIS_TEST_SEQ_FRONT_END, op_str, p_types, o_types, thresh, libblis_test_axpyf_experiment ); } } void libblis_test_axpyf_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; num_t datatype; dim_t m, b_n; conj_t conja, conjx; obj_t alpha, a, x, y; obj_t y_save; cntx_t* cntx; // Query a context. cntx = bli_gks_query_cntx(); // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); // Map the dimension specifier to an actual dimension. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); // Query the operation's fusing factor for the current datatype. b_n = bli_cntx_get_blksz_def_dt( datatype, BLIS_AF, cntx ); // Store the fusing factor so that the driver can retrieve the value // later when printing results. op->dim_aux[0] = b_n; // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_conj( pc_str[0], &conja ); bli_param_map_char_to_blis_conj( pc_str[1], &conjx ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, b_n, &a ); libblis_test_vobj_create( params, datatype, sc_str[1], b_n, &x ); libblis_test_vobj_create( params, datatype, sc_str[2], m, &y ); libblis_test_vobj_create( params, datatype, sc_str[2], m, &y_save ); // Set alpha. if ( bli_obj_is_real( &y ) ) { bli_setsc( -1.0, 0.0, &alpha ); } else { bli_setsc( 0.0, -1.0, &alpha ); } // Randomize A, x, and y, and save y. libblis_test_mobj_randomize( params, FALSE, &a ); libblis_test_vobj_randomize( params, FALSE, &x ); libblis_test_vobj_randomize( params, FALSE, &y ); bli_copyv( &y, &y_save ); // Apply the parameters. bli_obj_set_conj( conja, &a ); bli_obj_set_conj( conjx, &x ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copyv( &y_save, &y ); time = bli_clock(); libblis_test_axpyf_impl( iface, &alpha, &a, &x, &y, cntx ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 2.0 * m * b_n ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &y ) ) *perf *= 4.0; // Perform checks. libblis_test_axpyf_check( params, &alpha, &a, &x, &y, &y_save, resid ); // Zero out performance and residual if output vector is empty. libblis_test_check_empty_problem( &y, perf, resid ); // Free the test objects. bli_obj_free( &a ); bli_obj_free( &x ); bli_obj_free( &y ); bli_obj_free( &y_save ); } void libblis_test_axpyf_impl ( iface_t iface, obj_t* alpha, obj_t* a, obj_t* x, obj_t* y, cntx_t* cntx ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: bli_axpyf_ex( alpha, a, x, y, cntx, NULL ); break; default: libblis_test_printf_error( "Invalid interface type.\n" ); } } void libblis_test_axpyf_check ( test_params_t* params, obj_t* alpha, obj_t* a, obj_t* x, obj_t* y, obj_t* y_orig, double* resid ) { num_t dt = bli_obj_dt( y ); num_t dt_real = bli_obj_dt_proj_to_real( y ); dim_t m = bli_obj_vector_dim( y ); dim_t b_n = bli_obj_width( a ); dim_t i; obj_t a1, chi1, v; obj_t alpha_chi1; obj_t norm; double junk; // // Pre-conditions: // - a is randomized. // - x is randomized. // - y is randomized. // Note: // - alpha should have a non-zero imaginary component in the complex // cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // y := y_orig + alpha * conja(A) * conjx(x) // // is functioning correctly if // // normfv( y - v ) // // is negligible, where v contains y as computed by repeated calls to // axpyv. // bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_scalar_init_detached( dt, &alpha_chi1 ); bli_obj_create( dt, m, 1, 0, 0, &v ); bli_copyv( y_orig, &v ); for ( i = 0; i < b_n; ++i ) { bli_acquire_mpart_l2r( BLIS_SUBPART1, i, 1, a, &a1 ); bli_acquire_vpart_f2b( BLIS_SUBPART1, i, 1, x, &chi1 ); bli_copysc( &chi1, &alpha_chi1 ); bli_mulsc( alpha, &alpha_chi1 ); bli_axpyv( &alpha_chi1, &a1, &v ); } bli_subv( y, &v ); bli_normfv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &v ); } blis-0.6.1/testsuite/src/test_axpyf.h000066400000000000000000000035001360743507500176360ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void libblis_test_axpyf ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); blis-0.6.1/testsuite/src/test_axpym.c000066400000000000000000000207121360743507500176440ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "test_libblis.h" // Static variables. static char* op_str = "axpym"; static char* o_types = "mm"; // x y static char* p_types = "h"; // transx static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-13, 1e-14 }, // warn, pass for d { 1e-13, 1e-14 } }; // warn, pass for z // Local prototypes. void libblis_test_axpym_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); void libblis_test_axpym_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ); void libblis_test_axpym_impl ( iface_t iface, obj_t* alpha, obj_t* x, obj_t* y ); void libblis_test_axpym_check ( test_params_t* params, obj_t* alpha, obj_t* x, obj_t* y, obj_t* y_save, double* resid ); void libblis_test_axpym_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { libblis_test_randm( tdata, params, &(op->ops->randm) ); libblis_test_normfm( tdata, params, &(op->ops->normfm) ); libblis_test_addm( tdata, params, &(op->ops->addm) ); libblis_test_subm( tdata, params, &(op->ops->subm) ); libblis_test_copym( tdata, params, &(op->ops->copym) ); libblis_test_scalm( tdata, params, &(op->ops->scalm) ); } void libblis_test_axpym ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || libblis_test_l1m_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_axpym_deps( tdata, params, op ); // Execute the test driver for each implementation requested. //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( tdata, params, op, BLIS_TEST_SEQ_FRONT_END, op_str, p_types, o_types, thresh, libblis_test_axpym_experiment ); } } void libblis_test_axpym_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; num_t datatype; dim_t m, n; trans_t transx; obj_t alpha, x, y; obj_t y_save; // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); // Map the dimension specifier to actual dimensions. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); n = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_trans( pc_str[0], &transx ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, transx, sc_str[0], m, n, &x ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, n, &y ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, n, &y_save ); // Set alpha. if ( bli_obj_is_real( &y ) ) bli_setsc( -2.0, 0.0, &alpha ); else bli_setsc( 0.0, -2.0, &alpha ); // Randomize and save y. libblis_test_mobj_randomize( params, FALSE, &x ); libblis_test_mobj_randomize( params, FALSE, &y ); bli_copym( &y, &y_save ); // Apply the parameters. bli_obj_set_conjtrans( transx, &x ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copym( &y_save, &y ); time = bli_clock(); libblis_test_axpym_impl( iface, &alpha, &x, &y ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 2.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &y ) ) *perf *= 4.0; // Perform checks. libblis_test_axpym_check( params, &alpha, &x, &y, &y_save, resid ); // Zero out performance and residual if output matrix is empty. libblis_test_check_empty_problem( &y, perf, resid ); // Free the test objects. bli_obj_free( &x ); bli_obj_free( &y ); bli_obj_free( &y_save ); } void libblis_test_axpym_impl ( iface_t iface, obj_t* alpha, obj_t* x, obj_t* y ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: bli_axpym( alpha, x, y ); break; default: libblis_test_printf_error( "Invalid interface type.\n" ); } } void libblis_test_axpym_check ( test_params_t* params, obj_t* alpha, obj_t* x, obj_t* y, obj_t* y_orig, double* resid ) { num_t dt = bli_obj_dt( y ); num_t dt_real = bli_obj_dt_proj_to_real( y ); dim_t m = bli_obj_length( y ); dim_t n = bli_obj_width( y ); obj_t x_temp, y_temp; obj_t norm; double junk; // // Pre-conditions: // - x is randomized. // - y_orig is randomized. // Note: // - alpha should have a non-zero imaginary component in the complex // cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // y := y_orig + alpha * conjx(x) // // is functioning correctly if // // normfm( y - ( y_orig + alpha * conjx(x) ) ) // // is negligible. // bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m, n, 0, 0, &x_temp ); bli_obj_create( dt, m, n, 0, 0, &y_temp ); bli_copym( x, &x_temp ); bli_copym( y_orig, &y_temp ); bli_scalm( alpha, &x_temp ); bli_addm( &x_temp, &y_temp ); bli_subm( &y_temp, y ); bli_normfm( y, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &x_temp ); bli_obj_free( &y_temp ); } blis-0.6.1/testsuite/src/test_axpym.h000066400000000000000000000035001360743507500176450ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void libblis_test_axpym ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); blis-0.6.1/testsuite/src/test_axpyv.c000066400000000000000000000204221360743507500176530ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "test_libblis.h" // Static variables. static char* op_str = "axpyv"; static char* o_types = "vv"; // x y static char* p_types = "c"; // conjx static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-13, 1e-14 }, // warn, pass for d { 1e-13, 1e-14 } }; // warn, pass for z // Local prototypes. void libblis_test_axpyv_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); void libblis_test_axpyv_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ); void libblis_test_axpyv_impl ( iface_t iface, obj_t* alpha, obj_t* x, obj_t* y ); void libblis_test_axpyv_check ( test_params_t* params, obj_t* alpha, obj_t* x, obj_t* y, obj_t* y_orig, double* resid ); void libblis_test_axpyv_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { libblis_test_randv( tdata, params, &(op->ops->randv) ); libblis_test_normfv( tdata, params, &(op->ops->normfv) ); libblis_test_addv( tdata, params, &(op->ops->addv) ); libblis_test_subv( tdata, params, &(op->ops->subv) ); libblis_test_copyv( tdata, params, &(op->ops->copyv) ); libblis_test_scalv( tdata, params, &(op->ops->scalv) ); } void libblis_test_axpyv ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || libblis_test_l1v_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_axpyv_deps( tdata, params, op ); // Execute the test driver for each implementation requested. //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( tdata, params, op, BLIS_TEST_SEQ_FRONT_END, op_str, p_types, o_types, thresh, libblis_test_axpyv_experiment ); } } void libblis_test_axpyv_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; num_t datatype; dim_t m; conj_t conjx; obj_t alpha, x, y; obj_t y_save; // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); // Map the dimension specifier to an actual dimension. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_conj( pc_str[0], &conjx ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); libblis_test_vobj_create( params, datatype, sc_str[1], m, &y ); libblis_test_vobj_create( params, datatype, sc_str[1], m, &y_save ); // Set alpha. //bli_setsc( sqrt(2.0)/2.0, sqrt(2.0)/2.0, &alpha ); //bli_copysc( &BLIS_TWO, &alpha ); if ( bli_obj_is_real( &y ) ) bli_setsc( -2.0, 0.0, &alpha ); else bli_setsc( 0.0, -2.0, &alpha ); // Randomize x and y, and save y. libblis_test_vobj_randomize( params, FALSE, &x ); libblis_test_vobj_randomize( params, FALSE, &y ); bli_copyv( &y, &y_save ); // Apply the parameters. bli_obj_set_conj( conjx, &x ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copyv( &y_save, &y ); time = bli_clock(); libblis_test_axpyv_impl( iface, &alpha, &x, &y ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &y ) ) *perf *= 4.0; // Perform checks. libblis_test_axpyv_check( params, &alpha, &x, &y, &y_save, resid ); // Zero out performance and residual if output vector is empty. libblis_test_check_empty_problem( &y, perf, resid ); // Free the test objects. bli_obj_free( &x ); bli_obj_free( &y ); bli_obj_free( &y_save ); } void libblis_test_axpyv_impl ( iface_t iface, obj_t* alpha, obj_t* x, obj_t* y ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: bli_axpyv( alpha, x, y ); break; default: libblis_test_printf_error( "Invalid interface type.\n" ); } } void libblis_test_axpyv_check ( test_params_t* params, obj_t* alpha, obj_t* x, obj_t* y, obj_t* y_orig, double* resid ) { num_t dt = bli_obj_dt( y ); num_t dt_real = bli_obj_dt_proj_to_real( y ); dim_t m = bli_obj_vector_dim( y ); obj_t x_temp, y_temp; obj_t norm; double junk; // // Pre-conditions: // - x is randomized. // - y_orig is randomized. // Note: // - alpha should have a non-zero imaginary component in the complex // cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // y := y_orig + alpha * conjx(x) // // is functioning correctly if // // normfv( y - ( y_orig + alpha * conjx(x) ) ) // // is negligible. // bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m, 1, 0, 0, &x_temp ); bli_obj_create( dt, m, 1, 0, 0, &y_temp ); bli_copyv( x, &x_temp ); bli_copyv( y_orig, &y_temp ); bli_scalv( alpha, &x_temp ); bli_addv( &x_temp, &y_temp ); bli_subv( &y_temp, y ); bli_normfv( y, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &x_temp ); bli_obj_free( &y_temp ); } blis-0.6.1/testsuite/src/test_axpyv.h000066400000000000000000000035001360743507500176560ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void libblis_test_axpyv ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); blis-0.6.1/testsuite/src/test_copym.c000066400000000000000000000157441360743507500176460ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "test_libblis.h" // Static variables. static char* op_str = "copym"; static char* o_types = "mm"; // x y static char* p_types = "h"; // transx static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-13, 1e-14 }, // warn, pass for d { 1e-13, 1e-14 } }; // warn, pass for z // Local prototypes. void libblis_test_copym_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); void libblis_test_copym_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ); void libblis_test_copym_impl ( iface_t iface, obj_t* x, obj_t* y ); void libblis_test_copym_check ( test_params_t* params, obj_t* x, obj_t* y, double* resid ); void libblis_test_copym_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { libblis_test_randm( tdata, params, &(op->ops->randm) ); libblis_test_subm( tdata, params, &(op->ops->subm) ); libblis_test_normfm( tdata, params, &(op->ops->normfm) ); } void libblis_test_copym ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || libblis_test_l1m_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_copym_deps( tdata, params, op ); // Execute the test driver for each implementation requested. //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( tdata, params, op, BLIS_TEST_SEQ_FRONT_END, op_str, p_types, o_types, thresh, libblis_test_copym_experiment ); } } void libblis_test_copym_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { double time_min = DBL_MAX; double time; num_t datatype; dim_t m, n; trans_t transx; obj_t x, y; // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); // Map the dimension specifier to actual dimensions. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); n = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_trans( pc_str[0], &transx ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, transx, sc_str[0], m, n, &x ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[1], m, n, &y ); // Randomize x and set y to one. libblis_test_mobj_randomize( params, FALSE, &x ); bli_setm( &BLIS_ONE, &y ); // Apply the parameters. bli_obj_set_conjtrans( transx, &x ); // Disable repeats since bli_copym() is not yet tested. //for ( i = 0; i < n_repeats; ++i ) { time = bli_clock(); libblis_test_copym_impl( iface, &x, &y ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 1.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &x ) ) *perf *= 2.0; // Perform checks. libblis_test_copym_check( params, &x, &y, resid ); // Zero out performance and residual if output matrix is empty. libblis_test_check_empty_problem( &y, perf, resid ); // Free the test objects. bli_obj_free( &x ); bli_obj_free( &y ); } void libblis_test_copym_impl ( iface_t iface, obj_t* x, obj_t* y ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: bli_copym( x, y ); break; default: libblis_test_printf_error( "Invalid interface type.\n" ); } } void libblis_test_copym_check ( test_params_t* params, obj_t* x, obj_t* y, double* resid ) { num_t dt_real = bli_obj_dt_proj_to_real( x ); obj_t norm_y_r; double junk; // // Pre-conditions: // - x is randomized. // // Under these conditions, we assume that the implementation for // // y := conjx(x) // // is functioning correctly if // // normfm( y - conjx(x) ) // // is negligible. // bli_obj_scalar_init_detached( dt_real, &norm_y_r ); bli_subm( x, y ); bli_normfm( y, &norm_y_r ); bli_getsc( &norm_y_r, resid, &junk ); } blis-0.6.1/testsuite/src/test_copym.h000066400000000000000000000035001360743507500176360ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void libblis_test_copym ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); blis-0.6.1/testsuite/src/test_copyv.c000066400000000000000000000154731360743507500176560ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "test_libblis.h" // Static variables. static char* op_str = "copyv"; static char* o_types = "vv"; // x y static char* p_types = "c"; // conjx static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-13, 1e-14 }, // warn, pass for d { 1e-13, 1e-14 } }; // warn, pass for z // Local prototypes. void libblis_test_copyv_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); void libblis_test_copyv_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ); void libblis_test_copyv_impl ( iface_t iface, obj_t* x, obj_t* y ); void libblis_test_copyv_check ( test_params_t* params, obj_t* x, obj_t* y, double* resid ); void libblis_test_copyv_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { libblis_test_randv( tdata, params, &(op->ops->randv) ); libblis_test_subv( tdata, params, &(op->ops->subv) ); libblis_test_normfv( tdata, params, &(op->ops->normfv) ); } void libblis_test_copyv ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || libblis_test_l1v_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_copyv_deps( tdata, params, op ); // Execute the test driver for each implementation requested. //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( tdata, params, op, BLIS_TEST_SEQ_FRONT_END, op_str, p_types, o_types, thresh, libblis_test_copyv_experiment ); } } void libblis_test_copyv_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { double time_min = DBL_MAX; double time; num_t datatype; dim_t m; conj_t conjx; obj_t x, y; // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); // Map the dimension specifier to an actual dimension. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_conj( pc_str[0], &conjx ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); libblis_test_vobj_create( params, datatype, sc_str[1], m, &y ); // Randomize x and set y to one. libblis_test_vobj_randomize( params, FALSE, &x ); bli_setv( &BLIS_ONE, &y ); // Apply the parameters. bli_obj_set_conj( conjx, &x ); // Disable repeats since bli_copyv() is not yet tested. //for ( i = 0; i < n_repeats; ++i ) { time = bli_clock(); libblis_test_copyv_impl( iface, &x, &y ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 1.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &x ) ) *perf *= 2.0; // Perform checks. libblis_test_copyv_check( params, &x, &y, resid ); // Zero out performance and residual if output vector is empty. libblis_test_check_empty_problem( &y, perf, resid ); // Free the test objects. bli_obj_free( &x ); bli_obj_free( &y ); } void libblis_test_copyv_impl ( iface_t iface, obj_t* x, obj_t* y ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: bli_copyv( x, y ); break; default: libblis_test_printf_error( "Invalid interface type.\n" ); } } void libblis_test_copyv_check ( test_params_t* params, obj_t* x, obj_t* y, double* resid ) { num_t dt_real = bli_obj_dt_proj_to_real( x ); obj_t norm_y_r; double junk; // // Pre-conditions: // - x is randomized. // // Under these conditions, we assume that the implementation for // // y := conjx(x) // // is functioning correctly if // // normfv( y - conjx(x) ) // // is negligible. // bli_obj_scalar_init_detached( dt_real, &norm_y_r ); bli_subv( x, y ); bli_normfv( y, &norm_y_r ); bli_getsc( &norm_y_r, resid, &junk ); } blis-0.6.1/testsuite/src/test_copyv.h000066400000000000000000000035001360743507500176470ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void libblis_test_copyv ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); blis-0.6.1/testsuite/src/test_dotaxpyv.c000066400000000000000000000240531360743507500203660ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "test_libblis.h" // Static variables. static char* op_str = "dotaxpyv"; static char* o_types = "vvv"; // x y z static char* p_types = "ccc"; // conjxt conjx conjy static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-13, 1e-14 }, // warn, pass for d { 1e-13, 1e-14 } }; // warn, pass for z // Local prototypes. void libblis_test_dotaxpyv_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); void libblis_test_dotaxpyv_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ); void libblis_test_dotaxpyv_impl ( iface_t iface, obj_t* alpha, obj_t* xt, obj_t* x, obj_t* y, obj_t* rho, obj_t* z, cntx_t* cntx ); void libblis_test_dotaxpyv_check ( test_params_t* params, obj_t* alpha, obj_t* xt, obj_t* x, obj_t* y, obj_t* rho, obj_t* z, obj_t* z_orig, double* resid ); void libblis_test_dotaxpyv_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { libblis_test_randv( tdata, params, &(op->ops->randv) ); libblis_test_normfv( tdata, params, &(op->ops->normfv) ); libblis_test_subv( tdata, params, &(op->ops->subv) ); libblis_test_copyv( tdata, params, &(op->ops->copyv) ); libblis_test_dotv( tdata, params, &(op->ops->dotv) ); libblis_test_axpyv( tdata, params, &(op->ops->axpyv) ); } void libblis_test_dotaxpyv ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || libblis_test_l1f_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_dotaxpyv_deps( tdata, params, op ); // Execute the test driver for each implementation requested. //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( tdata, params, op, BLIS_TEST_SEQ_FRONT_END, op_str, p_types, o_types, thresh, libblis_test_dotaxpyv_experiment ); } } void libblis_test_dotaxpyv_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; num_t datatype; dim_t m; conj_t conjxt, conjx, conjy; conj_t conjconjxty; obj_t alpha, xt, x, y, rho, z; obj_t z_save; cntx_t* cntx; // Query a context. cntx = bli_gks_query_cntx(); // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); // Map the dimension specifier to an actual dimension. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_conj( pc_str[0], &conjxt ); bli_param_map_char_to_blis_conj( pc_str[1], &conjx ); bli_param_map_char_to_blis_conj( pc_str[2], &conjy ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha ); bli_obj_scalar_init_detached( datatype, &rho ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); libblis_test_vobj_create( params, datatype, sc_str[1], m, &y ); libblis_test_vobj_create( params, datatype, sc_str[2], m, &z ); libblis_test_vobj_create( params, datatype, sc_str[2], m, &z_save ); // Set alpha. if ( bli_obj_is_real( &z ) ) { bli_setsc( -0.8, 0.0, &alpha ); } else { bli_setsc( 0.7, -0.1, &alpha ); } // Randomize x and z, and save z. libblis_test_vobj_randomize( params, TRUE, &x ); libblis_test_vobj_randomize( params, TRUE, &z ); bli_copyv( &z, &z_save ); // Create an alias to x for xt. (Note that it doesn't actually need to be // transposed.) bli_obj_alias_to( &x, &xt ); // Determine whether to make a copy of x with or without conjugation. // // conjx conjy ~conjx^conjy y is initialized as // n n c y = conj(x) // n c n y = x // c n n y = x // c c c y = conj(x) // conjconjxty = bli_apply_conj( conjxt, conjy ); conjconjxty = bli_conj_toggled( conjconjxty ); bli_obj_set_conj( conjconjxty, &xt ); bli_copyv( &xt, &y ); // Apply the parameters. bli_obj_set_conj( conjxt, &xt ); bli_obj_set_conj( conjx, &x ); bli_obj_set_conj( conjy, &y ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copysc( &BLIS_MINUS_ONE, &rho ); bli_copyv( &z_save, &z ); time = bli_clock(); libblis_test_dotaxpyv_impl( iface, &alpha, &xt, &x, &y, &rho, &z, cntx ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 2.0 * m + 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &z ) ) *perf *= 4.0; // Perform checks. libblis_test_dotaxpyv_check( params, &alpha, &xt, &x, &y, &rho, &z, &z_save, resid ); // Zero out performance and residual if output vector is empty. libblis_test_check_empty_problem( &z, perf, resid ); // Free the test objects. bli_obj_free( &x ); bli_obj_free( &y ); bli_obj_free( &z ); bli_obj_free( &z_save ); } void libblis_test_dotaxpyv_impl ( iface_t iface, obj_t* alpha, obj_t* xt, obj_t* x, obj_t* y, obj_t* rho, obj_t* z, cntx_t* cntx ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: bli_dotaxpyv_ex( alpha, xt, x, y, rho, z, cntx, NULL ); break; default: libblis_test_printf_error( "Invalid interface type.\n" ); } } void libblis_test_dotaxpyv_check ( test_params_t* params, obj_t* alpha, obj_t* xt, obj_t* x, obj_t* y, obj_t* rho, obj_t* z, obj_t* z_orig, double* resid ) { num_t dt = bli_obj_dt( z ); num_t dt_real = bli_obj_dt_proj_to_real( z ); dim_t m = bli_obj_vector_dim( z ); obj_t rho_temp; obj_t z_temp; obj_t norm_z; double resid1, resid2; double junk; // // Pre-conditions: // - x is randomized. // - y is randomized. // - z_orig is randomized. // - xt is an alias to x. // Note: // - alpha should have a non-zero imaginary component in the complex // cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // rho := conjxt(x^T) conjy(y) // z := z_orig + alpha * conjx(x) // // is functioning correctly if // // ( rho - rho_temp ) // // and // // normfv( z - z_temp ) // // are negligible, where rho_temp and z_temp contain rho and z as // computed by dotv and axpyv, respectively. // bli_obj_scalar_init_detached( dt, &rho_temp ); bli_obj_scalar_init_detached( dt_real, &norm_z ); bli_obj_create( dt, m, 1, 0, 0, &z_temp ); bli_copyv( z_orig, &z_temp ); bli_dotv( xt, y, &rho_temp ); bli_axpyv( alpha, x, &z_temp ); bli_subsc( rho, &rho_temp ); bli_getsc( &rho_temp, &resid1, &junk ); bli_subv( &z_temp, z ); bli_normfv( z, &norm_z ); bli_getsc( &norm_z, &resid2, &junk ); *resid = bli_fmaxabs( resid1, resid2 ); bli_obj_free( &z_temp ); } blis-0.6.1/testsuite/src/test_dotaxpyv.h000066400000000000000000000035031360743507500203700ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void libblis_test_dotaxpyv ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); blis-0.6.1/testsuite/src/test_dotv.c000066400000000000000000000200471360743507500174630ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "test_libblis.h" // Static variables. static char* op_str = "dotv"; static char* o_types = "vv"; // x y static char* p_types = "cc"; // conjx conjy static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-13, 1e-14 }, // warn, pass for d { 1e-13, 1e-14 } }; // warn, pass for z // Local prototypes. void libblis_test_dotv_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); void libblis_test_dotv_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ); void libblis_test_dotv_impl ( iface_t iface, obj_t* x, obj_t* y, obj_t* rho ); void libblis_test_dotv_check ( test_params_t* params, obj_t* x, obj_t* y, obj_t* rho, double* resid ); void libblis_test_dotv_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { libblis_test_randv( tdata, params, &(op->ops->randv) ); libblis_test_normfv( tdata, params, &(op->ops->normfv) ); libblis_test_copyv( tdata, params, &(op->ops->copyv) ); } void libblis_test_dotv ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || libblis_test_l1v_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_dotv_deps( tdata, params, op ); // Execute the test driver for each implementation requested. //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( tdata, params, op, BLIS_TEST_SEQ_FRONT_END, op_str, p_types, o_types, thresh, libblis_test_dotv_experiment ); } } void libblis_test_dotv_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; num_t datatype; dim_t m; conj_t conjx, conjy, conjconjxy; obj_t x, y, rho; // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); // Map the dimension specifier to an actual dimension. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_conj( pc_str[0], &conjx ); bli_param_map_char_to_blis_conj( pc_str[1], &conjy ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &rho ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); libblis_test_vobj_create( params, datatype, sc_str[1], m, &y ); // Randomize x. libblis_test_vobj_randomize( params, TRUE, &x ); // Determine whether to make a copy of x with or without conjugation. // // conjx conjy ~conjx^conjy y is initialized as // n n c y = conj(x) // n c n y = x // c n n y = x // c c c y = conj(x) // conjconjxy = bli_apply_conj( conjx, conjy ); conjconjxy = bli_conj_toggled( conjconjxy ); bli_obj_set_conj( conjconjxy, &x ); bli_copyv( &x, &y ); // Apply the parameters. bli_obj_set_conj( conjx, &x ); bli_obj_set_conj( conjy, &y ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copysc( &BLIS_MINUS_ONE, &rho ); time = bli_clock(); libblis_test_dotv_impl( iface, &x, &y, &rho ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &y ) ) *perf *= 4.0; // Perform checks. libblis_test_dotv_check( params, &x, &y, &rho, resid ); // Zero out performance and residual if output scalar is empty. libblis_test_check_empty_problem( &rho, perf, resid ); // Free the test objects. bli_obj_free( &x ); bli_obj_free( &y ); } void libblis_test_dotv_impl ( iface_t iface, obj_t* x, obj_t* y, obj_t* rho ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: bli_dotv( x, y, rho ); break; default: libblis_test_printf_error( "Invalid interface type.\n" ); } } void libblis_test_dotv_check ( test_params_t* params, obj_t* x, obj_t* y, obj_t* rho, double* resid ) { num_t dt_real = bli_obj_dt_proj_to_real( y ); obj_t rho_r, rho_i; obj_t norm_x, norm_xy; double zero; double junk; // // Pre-conditions: // - x is randomized. // - y is equal to conj(conjx(conjy(x))). // // Under these conditions, we assume that the implementation for // // rho := conjx(x^T) conjy(y) // // is functioning correctly if // // sqrtsc( rho.real ) - normfv( x ) // // and // // rho.imag // // are negligible. // bli_obj_scalar_init_detached( dt_real, &rho_r ); bli_obj_scalar_init_detached( dt_real, &rho_i ); bli_obj_scalar_init_detached( dt_real, &norm_x ); bli_obj_scalar_init_detached( dt_real, &norm_xy ); bli_normfv( x, &norm_x ); bli_unzipsc( rho, &rho_r, &rho_i ); bli_sqrtsc( &rho_r, &norm_xy ); bli_subsc( &norm_x, &norm_xy ); bli_getsc( &norm_xy, resid, &junk ); bli_getsc( &rho_i, &zero, &junk ); *resid = bli_fmaxabs( *resid, zero ); } blis-0.6.1/testsuite/src/test_dotv.h000066400000000000000000000034771360743507500175000ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void libblis_test_dotv ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); blis-0.6.1/testsuite/src/test_dotxaxpyf.c000066400000000000000000000273321360743507500205410ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "test_libblis.h" // Static variables. static char* op_str = "dotxaxpyf"; static char* o_types = "mvvvv"; // A w x y z static char* p_types = "cccc"; // conjat conja conjw conjx static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-13, 1e-14 }, // warn, pass for d { 1e-13, 1e-14 } }; // warn, pass for z // Local prototypes. void libblis_test_dotxaxpyf_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); void libblis_test_dotxaxpyf_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ); void libblis_test_dotxaxpyf_impl ( iface_t iface, obj_t* alpha, obj_t* at, obj_t* a, obj_t* w, obj_t* x, obj_t* beta, obj_t* y, obj_t* z, cntx_t* cntx ); void libblis_test_dotxaxpyf_check ( test_params_t* params, obj_t* alpha, obj_t* at, obj_t* a, obj_t* w, obj_t* x, obj_t* beta, obj_t* y, obj_t* z, obj_t* y_orig, obj_t* z_orig, double* resid ); void libblis_test_dotxaxpyf_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { libblis_test_randv( tdata, params, &(op->ops->randv) ); libblis_test_randm( tdata, params, &(op->ops->randm) ); libblis_test_normfv( tdata, params, &(op->ops->normfv) ); libblis_test_subv( tdata, params, &(op->ops->subv) ); libblis_test_copyv( tdata, params, &(op->ops->copyv) ); libblis_test_axpyv( tdata, params, &(op->ops->axpyv) ); libblis_test_dotxv( tdata, params, &(op->ops->dotxv) ); } void libblis_test_dotxaxpyf ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || libblis_test_l1f_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_dotxaxpyf_deps( tdata, params, op ); // Execute the test driver for each implementation requested. //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( tdata, params, op, BLIS_TEST_SEQ_FRONT_END, op_str, p_types, o_types, thresh, libblis_test_dotxaxpyf_experiment ); } } void libblis_test_dotxaxpyf_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; num_t datatype; dim_t m, b_n; conj_t conjat, conja, conjw, conjx; obj_t alpha, at, a, w, x, beta, y, z; obj_t y_save, z_save; cntx_t* cntx; // Query a context. cntx = bli_gks_query_cntx(); // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); // Map the dimension specifier to an actual dimension. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); // Query the operation's fusing factor for the current datatype. b_n = bli_cntx_get_blksz_def_dt( datatype, BLIS_XF, cntx ); // Store the fusing factor so that the driver can retrieve the value // later when printing results. op->dim_aux[0] = b_n; // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_conj( pc_str[0], &conjat ); bli_param_map_char_to_blis_conj( pc_str[1], &conja ); bli_param_map_char_to_blis_conj( pc_str[2], &conjw ); bli_param_map_char_to_blis_conj( pc_str[3], &conjx ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha ); bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, b_n, &a ); libblis_test_vobj_create( params, datatype, sc_str[1], m, &w ); libblis_test_vobj_create( params, datatype, sc_str[2], b_n, &x ); libblis_test_vobj_create( params, datatype, sc_str[3], b_n, &y ); libblis_test_vobj_create( params, datatype, sc_str[3], b_n, &y_save ); libblis_test_vobj_create( params, datatype, sc_str[4], m, &z ); libblis_test_vobj_create( params, datatype, sc_str[4], m, &z_save ); // Set alpha. if ( bli_obj_is_real( &y ) ) { bli_setsc( 1.2, 0.0, &alpha ); bli_setsc( -1.0, 0.0, &beta ); } else { bli_setsc( 1.2, 0.1, &alpha ); bli_setsc( -1.0, -0.1, &beta ); } // Randomize A, w, x, y, and z, and save y and z. libblis_test_mobj_randomize( params, FALSE, &a ); libblis_test_vobj_randomize( params, FALSE, &w ); libblis_test_vobj_randomize( params, FALSE, &x ); libblis_test_vobj_randomize( params, FALSE, &y ); libblis_test_vobj_randomize( params, FALSE, &z ); bli_copyv( &y, &y_save ); bli_copyv( &z, &z_save ); // Create an alias to a for at. (Note that it should NOT actually be // marked for transposition since the transposition is part of the dotxf // subproblem.) bli_obj_alias_to( &a, &at ); // Apply the parameters. bli_obj_set_conj( conjat, &at ); bli_obj_set_conj( conja, &a ); bli_obj_set_conj( conjw, &w ); bli_obj_set_conj( conjx, &x ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copyv( &y_save, &y ); bli_copyv( &z_save, &z ); time = bli_clock(); libblis_test_dotxaxpyf_impl( iface, &alpha, &at, &a, &w, &x, &beta, &y, &z, cntx ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 2.0 * m * b_n + 2.0 * m * b_n ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &y ) ) *perf *= 4.0; // Perform checks. libblis_test_dotxaxpyf_check( params, &alpha, &at, &a, &w, &x, &beta, &y, &z, &y_save, &z_save, resid ); // Zero out performance and residual if either output vector is empty. libblis_test_check_empty_problem( &y, perf, resid ); libblis_test_check_empty_problem( &z, perf, resid ); // Free the test objects. bli_obj_free( &a ); bli_obj_free( &w ); bli_obj_free( &x ); bli_obj_free( &y ); bli_obj_free( &z ); bli_obj_free( &y_save ); bli_obj_free( &z_save ); } void libblis_test_dotxaxpyf_impl ( iface_t iface, obj_t* alpha, obj_t* at, obj_t* a, obj_t* w, obj_t* x, obj_t* beta, obj_t* y, obj_t* z, cntx_t* cntx ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: bli_dotxaxpyf_ex( alpha, at, a, w, x, beta, y, z, cntx, NULL ); break; default: libblis_test_printf_error( "Invalid interface type.\n" ); } } void libblis_test_dotxaxpyf_check ( test_params_t* params, obj_t* alpha, obj_t* at, obj_t* a, obj_t* w, obj_t* x, obj_t* beta, obj_t* y, obj_t* z, obj_t* y_orig, obj_t* z_orig, double* resid ) { num_t dt = bli_obj_dt( y ); num_t dt_real = bli_obj_dt_proj_to_real( y ); dim_t m = bli_obj_vector_dim( z ); dim_t b_n = bli_obj_vector_dim( y ); dim_t i; obj_t a1, chi1, psi1, v, q; obj_t alpha_chi1; obj_t norm; double resid1, resid2; double junk; // // Pre-conditions: // - a is randomized. // - w is randomized. // - x is randomized. // - y is randomized. // - z is randomized. // - at is an alias to a. // Note: // - alpha and beta should have a non-zero imaginary component in the // complex cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // y := beta * y_orig + alpha * conjat(A^T) * conjw(w) // z := z_orig + alpha * conja(A) * conjx(x) // // is functioning correctly if // // normfv( y - v ) // // and // // normfv( z - q ) // // are negligible, where v and q contain y and z as computed by repeated // calls to dotxv and axpyv, respectively. // bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_scalar_init_detached( dt, &alpha_chi1 ); bli_obj_create( dt, b_n, 1, 0, 0, &v ); bli_obj_create( dt, m, 1, 0, 0, &q ); bli_copyv( y_orig, &v ); bli_copyv( z_orig, &q ); // v := beta * v + alpha * conjat(at) * conjw(w) for ( i = 0; i < b_n; ++i ) { bli_acquire_mpart_l2r( BLIS_SUBPART1, i, 1, at, &a1 ); bli_acquire_vpart_f2b( BLIS_SUBPART1, i, 1, &v, &psi1 ); bli_dotxv( alpha, &a1, w, beta, &psi1 ); } // q := q + alpha * conja(a) * conjx(x) for ( i = 0; i < b_n; ++i ) { bli_acquire_mpart_l2r( BLIS_SUBPART1, i, 1, a, &a1 ); bli_acquire_vpart_f2b( BLIS_SUBPART1, i, 1, x, &chi1 ); bli_copysc( &chi1, &alpha_chi1 ); bli_mulsc( alpha, &alpha_chi1 ); bli_axpyv( &alpha_chi1, &a1, &q ); } bli_subv( y, &v ); bli_normfv( &v, &norm ); bli_getsc( &norm, &resid1, &junk ); bli_subv( z, &q ); bli_normfv( &q, &norm ); bli_getsc( &norm, &resid2, &junk ); *resid = bli_fmaxabs( resid1, resid2 ); bli_obj_free( &v ); bli_obj_free( &q ); } blis-0.6.1/testsuite/src/test_dotxaxpyf.h000066400000000000000000000035041360743507500205410ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void libblis_test_dotxaxpyf ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); blis-0.6.1/testsuite/src/test_dotxf.c000066400000000000000000000226361360743507500176410ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "test_libblis.h" // Static variables. static char* op_str = "dotxf"; static char* o_types = "mvv"; // A x y static char* p_types = "cc"; // conjat conjx static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 5e-04, 5e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-12, 1e-13 }, // warn, pass for d { 1e-12, 1e-13 } }; // warn, pass for z // Local prototypes. void libblis_test_dotxf_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); void libblis_test_dotxf_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ); void libblis_test_dotxf_impl ( iface_t iface, obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y, cntx_t* cntx ); void libblis_test_dotxf_check ( test_params_t* params, obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y, obj_t* y_orig, double* resid ); void libblis_test_dotxf_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { libblis_test_randv( tdata, params, &(op->ops->randv) ); libblis_test_randm( tdata, params, &(op->ops->randm) ); libblis_test_normfv( tdata, params, &(op->ops->normfv) ); libblis_test_subv( tdata, params, &(op->ops->subv) ); libblis_test_copyv( tdata, params, &(op->ops->copyv) ); libblis_test_dotxv( tdata, params, &(op->ops->dotxv) ); } void libblis_test_dotxf ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || libblis_test_l1f_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_dotxf_deps( tdata, params, op ); // Execute the test driver for each implementation requested. //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( tdata, params, op, BLIS_TEST_SEQ_FRONT_END, op_str, p_types, o_types, thresh, libblis_test_dotxf_experiment ); } } void libblis_test_dotxf_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; num_t datatype; dim_t m, b_n; conj_t conjat, conjx; obj_t alpha, a, x, beta, y; obj_t y_save; cntx_t* cntx; // Query a context. cntx = bli_gks_query_cntx(); // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); // Map the dimension specifier to an actual dimension. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); // Query the operation's fusing factor for the current datatype. b_n = bli_cntx_get_blksz_def_dt( datatype, BLIS_DF, cntx ); // Store the fusing factor so that the driver can retrieve the value // later when printing results. op->dim_aux[0] = b_n; // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_conj( pc_str[0], &conjat ); bli_param_map_char_to_blis_conj( pc_str[1], &conjx ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha ); bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, b_n, &a ); libblis_test_vobj_create( params, datatype, sc_str[1], m, &x ); libblis_test_vobj_create( params, datatype, sc_str[2], b_n, &y ); libblis_test_vobj_create( params, datatype, sc_str[2], b_n, &y_save ); // Set alpha. if ( bli_obj_is_real( &y ) ) { bli_setsc( 1.2, 0.0, &alpha ); bli_setsc( -1.0, 0.0, &beta ); } else { bli_setsc( 1.2, 0.1, &alpha ); bli_setsc( -1.0, -0.1, &beta ); } // Randomize A, x, and y, and save y. libblis_test_mobj_randomize( params, FALSE, &a ); libblis_test_vobj_randomize( params, FALSE, &x ); libblis_test_vobj_randomize( params, FALSE, &y ); bli_copyv( &y, &y_save ); // Apply the parameters. bli_obj_set_conj( conjat, &a ); bli_obj_set_conj( conjx, &x ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copyv( &y_save, &y ); time = bli_clock(); libblis_test_dotxf_impl( iface, &alpha, &a, &x, &beta, &y, cntx ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 2.0 * m * b_n ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &y ) ) *perf *= 4.0; // Perform checks. libblis_test_dotxf_check( params, &alpha, &a, &x, &beta, &y, &y_save, resid ); // Zero out performance and residual if output vector is empty. libblis_test_check_empty_problem( &y, perf, resid ); // Free the test objects. bli_obj_free( &a ); bli_obj_free( &x ); bli_obj_free( &y ); bli_obj_free( &y_save ); } void libblis_test_dotxf_impl ( iface_t iface, obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y, cntx_t* cntx ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: bli_dotxf_ex( alpha, a, x, beta, y, cntx, NULL ); break; default: libblis_test_printf_error( "Invalid interface type.\n" ); } } void libblis_test_dotxf_check ( test_params_t* params, obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y, obj_t* y_orig, double* resid ) { num_t dt = bli_obj_dt( y ); num_t dt_real = bli_obj_dt_proj_to_real( y ); dim_t b_n = bli_obj_vector_dim( y ); dim_t i; obj_t a1, psi1, v; obj_t norm; double junk; // // Pre-conditions: // - a is randomized. // - x is randomized. // - y is randomized. // Note: // - alpha and beta should have a non-zero imaginary component in the // complex cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // y := beta * y_orig + alpha * conjat(A^T) * conjx(x) // // is functioning correctly if // // normfv( y - v ) // // is negligible, where v contains y as computed by repeated calls to // dotxv. // bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, b_n, 1, 0, 0, &v ); bli_copyv( y_orig, &v ); for ( i = 0; i < b_n; ++i ) { bli_acquire_mpart_l2r( BLIS_SUBPART1, i, 1, a, &a1 ); bli_acquire_vpart_f2b( BLIS_SUBPART1, i, 1, &v, &psi1 ); bli_dotxv( alpha, &a1, x, beta, &psi1 ); } bli_subv( y, &v ); bli_normfv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &v ); } blis-0.6.1/testsuite/src/test_dotxf.h000066400000000000000000000035001360743507500176330ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void libblis_test_dotxf ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); blis-0.6.1/testsuite/src/test_dotxv.c000066400000000000000000000221341360743507500176520ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "test_libblis.h" // Static variables. static char* op_str = "dotxv"; static char* o_types = "vv"; // x y static char* p_types = "cc"; // conjx conjy static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-13, 1e-14 }, // warn, pass for d { 1e-13, 1e-14 } }; // warn, pass for z // Local prototypes. void libblis_test_dotxv_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); void libblis_test_dotxv_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ); void libblis_test_dotxv_impl ( iface_t iface, obj_t* alpha, obj_t* x, obj_t* y, obj_t* beta, obj_t* rho ); void libblis_test_dotxv_check ( test_params_t* params, obj_t* alpha, obj_t* x, obj_t* y, obj_t* beta, obj_t* rho, obj_t* rho_orig, double* resid ); void libblis_test_dotxv_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { libblis_test_randv( tdata, params, &(op->ops->randv) ); libblis_test_normfv( tdata, params, &(op->ops->normfv) ); libblis_test_copyv( tdata, params, &(op->ops->copyv) ); } void libblis_test_dotxv ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || libblis_test_l1v_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_dotxv_deps( tdata, params, op ); // Execute the test driver for each implementation requested. //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( tdata, params, op, BLIS_TEST_SEQ_FRONT_END, op_str, p_types, o_types, thresh, libblis_test_dotxv_experiment ); } } void libblis_test_dotxv_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; num_t datatype; dim_t m; conj_t conjx, conjy, conjconjxy; obj_t alpha, x, y, beta, rho; obj_t rho_save; // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); // Map the dimension specifier to an actual dimension. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_conj( pc_str[0], &conjx ); bli_param_map_char_to_blis_conj( pc_str[1], &conjy ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha ); bli_obj_scalar_init_detached( datatype, &beta ); bli_obj_scalar_init_detached( datatype, &rho ); bli_obj_scalar_init_detached( datatype, &rho_save ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); libblis_test_vobj_create( params, datatype, sc_str[1], m, &y ); // Initialize alpha, beta, and rho. bli_copysc( &BLIS_ONE, &alpha ); bli_copysc( &BLIS_ZERO, &beta ); bli_copysc( &BLIS_MINUS_ONE, &rho ); bli_copysc( &rho, &rho_save ); // Randomize x. libblis_test_vobj_randomize( params, TRUE, &x ); // Determine whether to make a copy of x with or without conjugation. // // conjx conjy ~conjx^conjy y is initialized as // n n c y = conj(x) // n c n y = x // c n n y = x // c c c y = conj(x) // conjconjxy = bli_apply_conj( conjx, conjy ); conjconjxy = bli_conj_toggled( conjconjxy ); bli_obj_set_conj( conjconjxy, &x ); bli_copyv( &x, &y ); // Apply the parameters. bli_obj_set_conj( conjx, &x ); bli_obj_set_conj( conjy, &y ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copysc( &rho_save, &rho ); time = bli_clock(); libblis_test_dotxv_impl( iface, &alpha, &x, &y, &beta, &rho ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &y ) ) *perf *= 4.0; // Perform checks. libblis_test_dotxv_check( params, &alpha, &x, &y, &beta, &rho, &rho_save, resid ); // Zero out performance and residual if output scalar is empty. libblis_test_check_empty_problem( &rho, perf, resid ); // Free the test objects. bli_obj_free( &x ); bli_obj_free( &y ); } void libblis_test_dotxv_impl ( iface_t iface, obj_t* alpha, obj_t* x, obj_t* y, obj_t* beta, obj_t* rho ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: bli_dotxv( alpha, x, y, beta, rho ); break; default: libblis_test_printf_error( "Invalid interface type.\n" ); } } void libblis_test_dotxv_check ( test_params_t* params, obj_t* alpha, obj_t* x, obj_t* y, obj_t* beta, obj_t* rho, obj_t* rho_orig, double* resid ) { num_t dt_real = bli_obj_dt_proj_to_real( y ); obj_t rho_r, rho_i; obj_t norm_x_r, norm_xy_r; obj_t temp_r; double zero; double junk; // // Pre-conditions: // - x is randomized. // - y is equal to conjx(conjy(x)). // - alpha must be real-valued. // - beta must be zero. // Note: // - We forgo fully exercising beta scaling in order to simplify the // test. // // Under these conditions, we assume that the implementation for // // rho := beta * rho_orig + alpha * conjx(x^T) conjy(y) // // is functioning correctly if // // sqrtsc( rho.real ) - sqrtsc( alpha ) * normfv( x ) // // and // // rho.imag // // are negligible. // bli_obj_scalar_init_detached( dt_real, &rho_r ); bli_obj_scalar_init_detached( dt_real, &rho_i ); bli_obj_scalar_init_detached( dt_real, &norm_x_r ); bli_obj_scalar_init_detached( dt_real, &norm_xy_r ); bli_obj_scalar_init_detached( dt_real, &temp_r ); bli_copysc( alpha, &temp_r ); bli_sqrtsc( &temp_r, &temp_r ); bli_normfv( x, &norm_x_r ); bli_mulsc( &temp_r, &norm_x_r ); bli_unzipsc( rho, &rho_r, &rho_i ); bli_sqrtsc( &rho_r, &norm_xy_r ); bli_subsc( &norm_x_r, &norm_xy_r ); bli_getsc( &norm_xy_r, resid, &junk ); bli_getsc( &rho_i, &zero, &junk ); *resid = bli_fmaxabs( *resid, zero ); } blis-0.6.1/testsuite/src/test_dotxv.h000066400000000000000000000035001360743507500176530ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void libblis_test_dotxv ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); blis-0.6.1/testsuite/src/test_gemm.c000066400000000000000000000460301360743507500174340ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "test_libblis.h" // Static variables. static char* op_str = "gemm"; static char* o_types = "mmm"; // a b c static char* p_types = "hh"; // transa transb static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-13, 1e-14 }, // warn, pass for d { 1e-13, 1e-14 } }; // warn, pass for z // Local prototypes. void libblis_test_gemm_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); void libblis_test_gemm_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ); void libblis_test_gemm_md ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ); void libblis_test_gemm_impl ( iface_t iface, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c ); void libblis_test_gemm_check ( test_params_t* params, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, obj_t* c_orig, double* resid ); void libblis_test_gemm_md_check ( test_params_t* params, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, obj_t* c_orig, double* resid ); double libblis_test_gemm_flops ( obj_t* a, obj_t* b, obj_t* c ); void libblis_test_gemm_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { libblis_test_randv( tdata, params, &(op->ops->randv) ); libblis_test_randm( tdata, params, &(op->ops->randm) ); libblis_test_setv( tdata, params, &(op->ops->setv) ); libblis_test_normfv( tdata, params, &(op->ops->normfv) ); libblis_test_subv( tdata, params, &(op->ops->subv) ); libblis_test_scalv( tdata, params, &(op->ops->scalv) ); libblis_test_copym( tdata, params, &(op->ops->copym) ); libblis_test_scalm( tdata, params, &(op->ops->scalm) ); libblis_test_gemv( tdata, params, &(op->ops->gemv) ); } void libblis_test_gemm ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || libblis_test_l3_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_gemm_deps( tdata, params, op ); // Execute the test driver for each implementation requested. //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( tdata, params, op, BLIS_TEST_SEQ_FRONT_END, op_str, p_types, o_types, thresh, libblis_test_gemm_experiment ); } } void libblis_test_gemm_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; num_t datatype; dim_t m, n, k; trans_t transa; trans_t transb; obj_t alpha, a, b, beta, c; obj_t c_save; // Use a different function to handle mixed datatypes. if ( params->mixed_domain || params->mixed_precision ) { libblis_test_gemm_md( params, op, iface, dc_str, pc_str, sc_str, p_cur, perf, resid ); return; } // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); // Map the dimension specifier to actual dimensions. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); n = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur ); k = libblis_test_get_dim_from_prob_size( op->dim_spec[2], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_trans( pc_str[0], &transa ); bli_param_map_char_to_blis_trans( pc_str[1], &transb ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha ); bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, transa, sc_str[1], m, k, &a ); libblis_test_mobj_create( params, datatype, transb, sc_str[2], k, n, &b ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, n, &c ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, n, &c_save ); // Set alpha and beta. if ( bli_obj_is_real( &c ) ) { bli_setsc( 1.2, 0.0, &alpha ); bli_setsc( 0.9, 0.0, &beta ); } else { bli_setsc( 1.2, 0.8, &alpha ); bli_setsc( 0.9, 1.0, &beta ); } // Randomize A, B, and C, and save C. libblis_test_mobj_randomize( params, TRUE, &a ); libblis_test_mobj_randomize( params, TRUE, &b ); libblis_test_mobj_randomize( params, TRUE, &c ); bli_copym( &c, &c_save ); //bli_setm( &BLIS_ONE, &a ); //bli_setsc( 1.0, 0.0, &alpha ); //bli_setsc( 0.0, 0.0, &beta ); // Apply the parameters. bli_obj_set_conjtrans( transa, &a ); bli_obj_set_conjtrans( transb, &b ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copym( &c_save, &c ); time = bli_clock(); libblis_test_gemm_impl( iface, &alpha, &a, &b, &beta, &c ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 2.0 * m * n * k ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &c ) ) *perf *= 4.0; // Perform checks. libblis_test_gemm_check( params, &alpha, &a, &b, &beta, &c, &c_save, resid ); // Zero out performance and residual if output matrix is empty. libblis_test_check_empty_problem( &c, perf, resid ); // Free the test objects. bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); } void libblis_test_gemm_md ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; num_t dt_a, dt_b, dt_c; num_t dt_complex; dim_t m, n, k; trans_t transa; trans_t transb; obj_t alpha, a, b, beta, c; obj_t c_save; // Decode the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &dt_c ); bli_param_map_char_to_blis_dt( dc_str[1], &dt_a ); bli_param_map_char_to_blis_dt( dc_str[2], &dt_b ); // Project one of the datatypes (it doesn't matter which) to the // complex domain. dt_complex = bli_dt_proj_to_complex( dt_c ); // Map the dimension specifier to actual dimensions. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); n = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur ); k = libblis_test_get_dim_from_prob_size( op->dim_spec[2], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_trans( pc_str[0], &transa ); bli_param_map_char_to_blis_trans( pc_str[1], &transb ); // Create test scalars. bli_obj_scalar_init_detached( dt_complex, &alpha ); bli_obj_scalar_init_detached( dt_complex, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, dt_a, transa, sc_str[1], m, k, &a ); libblis_test_mobj_create( params, dt_b, transb, sc_str[2], k, n, &b ); libblis_test_mobj_create( params, dt_c, BLIS_NO_TRANSPOSE, sc_str[0], m, n, &c ); libblis_test_mobj_create( params, dt_c, BLIS_NO_TRANSPOSE, sc_str[0], m, n, &c_save ); // For mixed-precision, set the computation precision of C. if ( params->mixed_precision ) { num_t dt_comp; prec_t comp_prec; // The computation precision is encoded in the computation datatype, // which appears as an additional char in dc_str. bli_param_map_char_to_blis_dt( dc_str[3], &dt_comp ); // Extract the precision from the computation datatype. comp_prec = bli_dt_prec( dt_comp ); // Set the computation precision of C. bli_obj_set_comp_prec( comp_prec, &c ); } // Set alpha and beta. { bli_setsc( 2.0, 0.0, &alpha ); bli_setsc( 1.2, 0.5, &beta ); //bli_setsc( 1.0, 0.0, &alpha ); //bli_setsc( 1.0, 0.0, &beta ); } // Randomize A, B, and C, and save C. libblis_test_mobj_randomize( params, TRUE, &a ); libblis_test_mobj_randomize( params, TRUE, &b ); libblis_test_mobj_randomize( params, TRUE, &c ); bli_copym( &c, &c_save ); // Apply the parameters. bli_obj_set_conjtrans( transa, &a ); bli_obj_set_conjtrans( transb, &b ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copym( &c_save, &c ); time = bli_clock(); libblis_test_gemm_impl( iface, &alpha, &a, &b, &beta, &c ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. //*perf = ( 2.0 * m * n * k ) / time_min / FLOPS_PER_UNIT_PERF; //if ( bli_obj_is_complex( &c ) ) *perf *= 4.0; *perf = libblis_test_gemm_flops( &a, &b, &c ) / time_min / FLOPS_PER_UNIT_PERF; // Perform checks. libblis_test_gemm_md_check( params, &alpha, &a, &b, &beta, &c, &c_save, resid ); // Zero out performance and residual if output matrix is empty. libblis_test_check_empty_problem( &c, perf, resid ); // Free the test objects. bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); } void libblis_test_gemm_impl ( iface_t iface, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: #if 0 //bli_printm( "alpha", alpha, "%5.2f", "" ); //bli_printm( "beta", beta, "%5.2f", "" ); bli_printm( "a", a, "%5.2f", "" ); bli_printm( "b", b, "%5.2f", "" ); bli_printm( "c", c, "%5.2f", "" ); #endif //if ( bli_obj_length( b ) == 16 && // bli_obj_stor3_from_strides( c, a, b ) == BLIS_CRR ) //bli_printm( "c before", c, "%6.3f", "" ); bli_gemm( alpha, a, b, beta, c ); #if 0 if ( bli_obj_length( c ) == 12 && bli_obj_stor3_from_strides( c, a, b ) == BLIS_RRR ) bli_printm( "c after", c, "%6.3f", "" ); #endif break; default: libblis_test_printf_error( "Invalid interface type.\n" ); } } void libblis_test_gemm_md_check ( test_params_t* params, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, obj_t* c_orig, double* resid ) { num_t dt_real = bli_obj_dt_proj_to_real( c ); num_t dt_comp = bli_obj_dt_proj_to_complex( c ); num_t dt; dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width_after_trans( a ); obj_t norm; obj_t t, v, w, z; double junk; // Compute our reference checksum in the real domain if all operands // are real, and in the complex domain otherwise. Also implicit in this // is that we use the storage precision of C to determine the precision // in which we perform the reference checksum. if ( bli_obj_is_real( a ) && bli_obj_is_real( b ) && bli_obj_is_real( c ) ) dt = dt_real; else dt = dt_comp; // This function works in a manner similar to that of the function // libblis_test_gemm_check(), except that we project a, b, and c into // the complex domain (regardless of their storage datatype), and then // proceed with the checking accordingly. obj_t a2, b2, c2, c0; bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, n, 1, 0, 0, &t ); bli_obj_create( dt, m, 1, 0, 0, &v ); bli_obj_create( dt, k, 1, 0, 0, &w ); bli_obj_create( dt, m, 1, 0, 0, &z ); libblis_test_vobj_randomize( params, TRUE, &t ); // We need to zero out the imaginary part of t in order for our // checks to work in all cases. Otherwise, the imaginary parts // could affect intermediate products, depending on the order that // they are executed. bli_setiv( &BLIS_ZERO, &t ); // Create complex equivalents of a, b, c_orig, and c. bli_obj_create( dt, m, k, 0, 0, &a2 ); bli_obj_create( dt, k, n, 0, 0, &b2 ); bli_obj_create( dt, m, n, 0, 0, &c2 ); bli_obj_create( dt, m, n, 0, 0, &c0 ); // Cast a, b, c_orig, and c into the datatype of our temporary objects. bli_castm( a, &a2 ); bli_castm( b, &b2 ); bli_castm( c_orig, &c2 ); bli_castm( c, &c0 ); bli_gemv( &BLIS_ONE, &c0, &t, &BLIS_ZERO, &v ); #if 0 if ( bli_obj_is_scomplex( c ) && bli_obj_is_float( a ) && bli_obj_is_float( b ) ) { bli_printm( "test_gemm.c: a", a, "%7.3f", "" ); bli_printm( "test_gemm.c: b", b, "%7.3f", "" ); bli_printm( "test_gemm.c: c orig", c_orig, "%7.3f", "" ); bli_printm( "test_gemm.c: c computed", c, "%7.3f", "" ); } #endif #if 0 bli_gemm( alpha, &a2, &b2, beta, &c2 ); bli_gemv( &BLIS_ONE, &c2, &t, &BLIS_ZERO, &z ); if ( bli_obj_is_real( c ) ) bli_setiv( &BLIS_ZERO, &z ); #else bli_gemv( &BLIS_ONE, &b2, &t, &BLIS_ZERO, &w ); bli_gemv( alpha, &a2, &w, &BLIS_ZERO, &z ); bli_gemv( beta, &c2, &t, &BLIS_ONE, &z ); if ( bli_obj_is_real( c ) ) bli_setiv( &BLIS_ZERO, &z ); #endif bli_subv( &z, &v ); bli_normfv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &t ); bli_obj_free( &v ); bli_obj_free( &w ); bli_obj_free( &z ); bli_obj_free( &a2 ); bli_obj_free( &b2 ); bli_obj_free( &c2 ); bli_obj_free( &c0 ); } void libblis_test_gemm_check ( test_params_t* params, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, obj_t* c_orig, double* resid ) { num_t dt = bli_obj_dt( c ); num_t dt_real = bli_obj_dt_proj_to_real( c ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width_after_trans( a ); obj_t norm; obj_t t, v, w, z; double junk; // // Pre-conditions: // - a is randomized. // - b is randomized. // - c_orig is randomized. // Note: // - alpha and beta should have non-zero imaginary components in the // complex cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // C := beta * C_orig + alpha * transa(A) * transb(B) // // is functioning correctly if // // normfv( v - z ) // // is negligible, where // // v = C * t // z = ( beta * C_orig + alpha * transa(A) * transb(B) ) * t // = beta * C_orig * t + alpha * transa(A) * transb(B) * t // = beta * C_orig * t + alpha * transa(A) * w // = beta * C_orig * t + z // bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, n, 1, 0, 0, &t ); bli_obj_create( dt, m, 1, 0, 0, &v ); bli_obj_create( dt, k, 1, 0, 0, &w ); bli_obj_create( dt, m, 1, 0, 0, &z ); libblis_test_vobj_randomize( params, TRUE, &t ); bli_gemv( &BLIS_ONE, c, &t, &BLIS_ZERO, &v ); bli_gemv( &BLIS_ONE, b, &t, &BLIS_ZERO, &w ); bli_gemv( alpha, a, &w, &BLIS_ZERO, &z ); bli_gemv( beta, c_orig, &t, &BLIS_ONE, &z ); bli_subv( &z, &v ); bli_normfv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &t ); bli_obj_free( &v ); bli_obj_free( &w ); bli_obj_free( &z ); } double libblis_test_gemm_flops ( obj_t* a, obj_t* b, obj_t* c ) { bool_t a_is_real = bli_obj_is_real( a ); bool_t a_is_complex = bli_obj_is_complex( a ); bool_t b_is_real = bli_obj_is_real( b ); bool_t b_is_complex = bli_obj_is_complex( b ); bool_t c_is_real = bli_obj_is_real( c ); bool_t c_is_complex = bli_obj_is_complex( c ); double m = ( double )bli_obj_length( c ); double n = ( double )bli_obj_width( c ); double k = ( double )bli_obj_width( a ); double flops; if ( ( c_is_complex && a_is_complex && b_is_complex ) ) { flops = 8.0 * m * n * k; } else if ( ( c_is_complex && a_is_complex && b_is_real ) || ( c_is_complex && a_is_real && b_is_complex ) || ( c_is_real && a_is_complex && b_is_complex ) ) { flops = 4.0 * m * n * k; } else { flops = 2.0 * m * n * k; } return flops; } blis-0.6.1/testsuite/src/test_gemm.h000066400000000000000000000034771360743507500174510ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void libblis_test_gemm ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); blis-0.6.1/testsuite/src/test_gemm_ukr.c000066400000000000000000000315271360743507500203220ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "test_libblis.h" // Static variables. static char* op_str = "gemm_ukr"; static char* o_types = "m"; // c static char* p_types = ""; static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-13, 1e-14 }, // warn, pass for d { 1e-13, 1e-14 } }; // warn, pass for z // Local prototypes. void libblis_test_gemm_ukr_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); void libblis_test_gemm_ukr_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ); void libblis_test_gemm_ukr_impl ( iface_t iface, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx ); void libblis_test_gemm_ukr_check ( test_params_t* params, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, obj_t* c_orig, double* resid ); void libblis_test_gemm_ukr_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { libblis_test_randv( tdata, params, &(op->ops->randv) ); libblis_test_randm( tdata, params, &(op->ops->randm) ); libblis_test_setv( tdata, params, &(op->ops->setv) ); libblis_test_normfv( tdata, params, &(op->ops->normfv) ); libblis_test_subv( tdata, params, &(op->ops->subv) ); libblis_test_scalv( tdata, params, &(op->ops->scalv) ); libblis_test_copym( tdata, params, &(op->ops->copym) ); libblis_test_scalm( tdata, params, &(op->ops->scalm) ); libblis_test_gemv( tdata, params, &(op->ops->gemv) ); } void libblis_test_gemm_ukr ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || libblis_test_l3ukr_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_gemm_ukr_deps( tdata, params, op ); // Execute the test driver for each implementation requested. //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( tdata, params, op, BLIS_TEST_SEQ_UKERNEL, op_str, p_types, o_types, thresh, libblis_test_gemm_ukr_experiment ); } } void libblis_test_gemm_ukr_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; num_t datatype; dim_t m, n, k; inc_t ldap, ldbp; char sc_a = 'c'; char sc_b = 'r'; obj_t alpha, a, b, beta, c; obj_t ap, bp; obj_t c_save; cntx_t* cntx; // Query a context. cntx = bli_gks_query_cntx(); // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); // Map the dimension specifier to actual dimensions. k = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); // Fix m and n to MR and NR, respectively. m = bli_cntx_get_blksz_def_dt( datatype, BLIS_MR, cntx ); n = bli_cntx_get_blksz_def_dt( datatype, BLIS_NR, cntx ); // Also query PACKMR and PACKNR as the leading dimensions to ap and bp, // respectively. ldap = bli_cntx_get_blksz_max_dt( datatype, BLIS_MR, cntx ); ldbp = bli_cntx_get_blksz_max_dt( datatype, BLIS_NR, cntx ); // Store the register blocksizes so that the driver can retrieve the // values later when printing results. op->dim_aux[0] = m; op->dim_aux[1] = n; // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha ); bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands. libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_a, m, k, &a ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_b, k, n, &b ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, n, &c ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, n, &c_save ); // Set alpha and beta. if ( bli_obj_is_real( &c ) ) { bli_setsc( 1.2, 0.0, &alpha ); bli_setsc( -1.0, 0.0, &beta ); //bli_setsc( 0.0, 0.0, &beta ); } else { bli_setsc( 1.2, 0.8, &alpha ); bli_setsc( -1.0, 0.5, &beta ); } // Randomize A, B, and C, and save C. libblis_test_mobj_randomize( params, TRUE, &a ); libblis_test_mobj_randomize( params, TRUE, &b ); libblis_test_mobj_randomize( params, TRUE, &c ); bli_copym( &c, &c_save ); #if 0 // Create pack objects for a and b, and pack them to ap and bp, // respectively. cntl_t* cntl_a = libblis_test_pobj_create ( BLIS_MR, BLIS_KR, BLIS_NO_INVERT_DIAG, BLIS_PACKED_ROW_PANELS, BLIS_BUFFER_FOR_A_BLOCK, &a, &ap, cntx ); cntl_t* cntl_b = libblis_test_pobj_create ( BLIS_KR, BLIS_NR, BLIS_NO_INVERT_DIAG, BLIS_PACKED_COL_PANELS, BLIS_BUFFER_FOR_B_PANEL, &b, &bp, cntx ); #endif // Create the packed objects. Use packmr and packnr as the leading // dimensions of ap and bp, respectively. Note that we use the ldims // instead of the matrix dimensions for allocation purposes here. // This is a little hacky and was prompted when trying to support // configurations such as power9 that employ duplication/broadcasting // of elements in one of the packed matrix objects. Thankfully, packm // doesn't care about those dimensions and instead relies on // information taken from the source object. Thus, this is merely // about coaxing bli_obj_create() in allocating enough space for our // purposes. bli_obj_create( datatype, ldap, k, 1, ldap, &ap ); bli_obj_create( datatype, k, ldbp, ldbp, 1, &bp ); // Set up the objects for packing. Calling packm_init_pack() does everything // except checkout a memory pool block and save its address to the obj_t's. // However, it does overwrite the buffer field of packed object with that of // the source object (as a side-effect of bli_obj_alias_to(); that buffer // field would normally be overwritten yet again by the address from the // memory pool block). So, we have to save the buffer address that was // allocated so we can re-store it to the object afterward. void* buf_ap = bli_obj_buffer( &ap ); void* buf_bp = bli_obj_buffer( &bp ); bli_packm_init_pack( BLIS_NO_INVERT_DIAG, BLIS_PACKED_ROW_PANELS, BLIS_PACK_FWD_IF_UPPER, BLIS_PACK_FWD_IF_LOWER, BLIS_MR, BLIS_KR, &a, &ap, cntx ); bli_packm_init_pack( BLIS_NO_INVERT_DIAG, BLIS_PACKED_COL_PANELS, BLIS_PACK_FWD_IF_UPPER, BLIS_PACK_FWD_IF_LOWER, BLIS_KR, BLIS_NR, &b, &bp, cntx ); bli_obj_set_buffer( buf_ap, &ap ); bli_obj_set_buffer( buf_bp, &bp ); // Pack the data from the source objects. bli_packm_blk_var1( &a, &ap, cntx, NULL, &BLIS_PACKM_SINGLE_THREADED ); bli_packm_blk_var1( &b, &bp, cntx, NULL, &BLIS_PACKM_SINGLE_THREADED ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copym( &c_save, &c ); time = bli_clock(); libblis_test_gemm_ukr_impl( iface, &alpha, &ap, &bp, &beta, &c, cntx ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 2.0 * m * n * k ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &c ) ) *perf *= 4.0; // Perform checks. libblis_test_gemm_ukr_check( params, &alpha, &a, &b, &beta, &c, &c_save, resid ); // Zero out performance and residual if output matrix is empty. libblis_test_check_empty_problem( &c, perf, resid ); #if 0 // Free the control tree nodes and release their cached mem_t entries // back to the memory broker. bli_cntl_free( cntl_a, &BLIS_PACKM_SINGLE_THREADED ); bli_cntl_free( cntl_b, &BLIS_PACKM_SINGLE_THREADED ); #endif // Free the packed objects. bli_obj_free( &ap ); bli_obj_free( &bp ); // Free the test objects. bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); } void libblis_test_gemm_ukr_impl ( iface_t iface, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx ) { switch ( iface ) { case BLIS_TEST_SEQ_UKERNEL: bli_gemm_ukernel( alpha, a, b, beta, c, cntx ); break; default: libblis_test_printf_error( "Invalid interface type.\n" ); } } void libblis_test_gemm_ukr_check ( test_params_t* params, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, obj_t* c_orig, double* resid ) { num_t dt = bli_obj_dt( c ); num_t dt_real = bli_obj_dt_proj_to_real( c ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); dim_t k = bli_obj_width( a ); obj_t norm; obj_t t, v, w, z; double junk; // // Pre-conditions: // - a is randomized. // - b is randomized. // - c_orig is randomized. // Note: // - alpha and beta should have non-zero imaginary components in the // complex cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // C := beta * C_orig + alpha * A * B // // is functioning correctly if // // normfv( v - z ) // // is negligible, where // // v = C * t // z = ( beta * C_orig + alpha * A * B ) * t // = beta * C_orig * t + alpha * A * B * t // = beta * C_orig * t + alpha * A * w // = beta * C_orig * t + z // bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, n, 1, 0, 0, &t ); bli_obj_create( dt, m, 1, 0, 0, &v ); bli_obj_create( dt, k, 1, 0, 0, &w ); bli_obj_create( dt, m, 1, 0, 0, &z ); libblis_test_vobj_randomize( params, TRUE, &t ); bli_gemv( &BLIS_ONE, c, &t, &BLIS_ZERO, &v ); bli_gemv( &BLIS_ONE, b, &t, &BLIS_ZERO, &w ); bli_gemv( alpha, a, &w, &BLIS_ZERO, &z ); bli_gemv( beta, c_orig, &t, &BLIS_ONE, &z ); bli_subv( &z, &v ); bli_normfv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &t ); bli_obj_free( &v ); bli_obj_free( &w ); bli_obj_free( &z ); } blis-0.6.1/testsuite/src/test_gemm_ukr.h000066400000000000000000000035031360743507500203200ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void libblis_test_gemm_ukr ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); blis-0.6.1/testsuite/src/test_gemmtrsm_ukr.c000066400000000000000000000450231360743507500212240ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "test_libblis.h" // Static variables. static char* op_str = "gemmtrsm_ukr"; static char* o_types = "m"; // c11 static char* p_types = "u"; // uploa static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-13, 1e-14 }, // warn, pass for d { 1e-13, 1e-14 } }; // warn, pass for z // Local prototypes. void libblis_test_gemmtrsm_ukr_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); void libblis_test_gemmtrsm_ukr_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ); void libblis_test_gemmtrsm_ukr_impl ( iface_t iface, side_t side, obj_t* alpha, obj_t* a1x, obj_t* a11, obj_t* bx1, obj_t* b11, obj_t* c11, cntx_t* cntx ); void libblis_test_gemmtrsm_ukr_check ( test_params_t* params, side_t side, obj_t* alpha, obj_t* a1x, obj_t* a11, obj_t* bx1, obj_t* b11, obj_t* c11, obj_t* c11_save, double* resid ); void bli_gemmtrsm_ukr_make_subparts ( dim_t k, obj_t* a, obj_t* b, obj_t* a1x, obj_t* a11, obj_t* bx1, obj_t* b11 ); void libblis_test_gemmtrsm_ukr_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { libblis_test_randv( tdata, params, &(op->ops->randv) ); libblis_test_randm( tdata, params, &(op->ops->randm) ); libblis_test_setv( tdata, params, &(op->ops->setv) ); libblis_test_normfv( tdata, params, &(op->ops->normfv) ); libblis_test_subv( tdata, params, &(op->ops->subv) ); libblis_test_scalv( tdata, params, &(op->ops->scalv) ); libblis_test_copym( tdata, params, &(op->ops->copym) ); libblis_test_scalm( tdata, params, &(op->ops->scalm) ); libblis_test_gemv( tdata, params, &(op->ops->gemv) ); libblis_test_trsv( tdata, params, &(op->ops->trsv) ); } void libblis_test_gemmtrsm_ukr ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || libblis_test_l3ukr_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_gemmtrsm_ukr_deps( tdata, params, op ); // Execute the test driver for each implementation requested. //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( tdata, params, op, BLIS_TEST_SEQ_UKERNEL, op_str, p_types, o_types, thresh, libblis_test_gemmtrsm_ukr_experiment ); } } // Import the register blocksizes used by the micro-kernel(s). extern blksz_t* gemm_mr; extern blksz_t* gemm_nr; extern blksz_t* gemm_kr; void libblis_test_gemmtrsm_ukr_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; num_t datatype; dim_t m, n, k; inc_t ldap, ldbp; char sc_a = 'c'; char sc_b = 'r'; side_t side = BLIS_LEFT; uplo_t uploa; obj_t alpha; obj_t a_big, a, b; obj_t b11, c11; obj_t ap, bp; obj_t a1xp, a11p, bx1p, b11p; obj_t c11_save; cntx_t* cntx; // Query a context. cntx = bli_gks_query_cntx(); // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); // Map the dimension specifier to actual dimensions. k = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); // Fix m and n to MR and NR, respectively. m = bli_cntx_get_blksz_def_dt( datatype, BLIS_MR, cntx ); n = bli_cntx_get_blksz_def_dt( datatype, BLIS_NR, cntx ); // Also query PACKMR and PACKNR as the leading dimensions to ap and bp, // respectively. ldap = bli_cntx_get_blksz_max_dt( datatype, BLIS_MR, cntx ); ldbp = bli_cntx_get_blksz_max_dt( datatype, BLIS_NR, cntx ); // Store the register blocksizes so that the driver can retrieve the // values later when printing results. op->dim_aux[0] = m; op->dim_aux[1] = n; // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_uplo( pc_str[0], &uploa ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_a, k+m, k+m, &a_big ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_b, k+m, n, &b ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, n, &c11 ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, n, &c11_save ); // Set alpha. if ( bli_obj_is_real( &b ) ) { bli_setsc( 2.0, 0.0, &alpha ); } else { bli_setsc( 2.0, 0.0, &alpha ); } // Set the structure, uplo, and diagonal offset properties of A. bli_obj_set_struc( BLIS_TRIANGULAR, &a_big ); bli_obj_set_uplo( uploa, &a_big ); // Randomize A and make it densely triangular. libblis_test_mobj_randomize( params, TRUE, &a_big ); libblis_test_mobj_load_diag( params, &a_big ); // Normalize B and save. libblis_test_mobj_randomize( params, TRUE, &b ); // Locate A1x/A11 (lower) or Ax1/A11 (upper), and then locate the // corresponding B11 block of B. if ( bli_obj_is_lower( &a_big ) ) { bli_acquire_mpart_t2b( BLIS_SUBPART1, k, m, &a_big, &a ); bli_acquire_mpart_t2b( BLIS_SUBPART1, k, m, &b, &b11 ); } else { bli_acquire_mpart_t2b( BLIS_SUBPART1, 0, m, &a_big, &a ); bli_acquire_mpart_t2b( BLIS_SUBPART1, 0, m, &b, &b11 ); } // Copy B11 to C11, and save. bli_copym( &b11, &c11 ); bli_copym( &c11, &c11_save ); #if 0 // Create pack objects for a and b, and pack them to ap and bp, // respectively. cntl_t* cntl_a = libblis_test_pobj_create ( BLIS_MR, BLIS_MR, BLIS_INVERT_DIAG, BLIS_PACKED_ROW_PANELS, BLIS_BUFFER_FOR_A_BLOCK, &a, &ap, &cntx ); cntl_t* cntl_b = libblis_test_pobj_create ( BLIS_MR, BLIS_NR, BLIS_NO_INVERT_DIAG, BLIS_PACKED_COL_PANELS, BLIS_BUFFER_FOR_B_PANEL, &b, &bp, &cntx ); #endif // Create the packed objects. Use packmr and packnr as the leading // dimensions of ap and bp, respectively. Note that we use the ldims // instead of the matrix dimensions for allocation purposes here. // This is a little hacky and was prompted when trying to support // configurations such as power9 that employ duplication/broadcasting // of elements in one of the packed matrix objects. Thankfully, packm // doesn't care about those dimensions and instead relies on // information taken from the source object. Thus, this is merely // about coaxing bli_obj_create() in allocating enough space for our // purposes. bli_obj_create( datatype, ldap, k+m, 1, ldap, &ap ); bli_obj_create( datatype, k+m, ldbp, ldbp, 1, &bp ); // We overwrite the m dimension of ap and n dimension of bp with // m and n, respectively, so that these objects contain the correct // logical dimensions. Recall that ldap and ldbp were used only to // induce bli_obj_create() to allocate sufficient memory for the // duplication in rare instances where the subconfig uses a gemm // ukernel that duplicates elements in one of the operands. bli_obj_set_length( m, &ap ); bli_obj_set_width( n, &bp ); // Set up the objects for packing. Calling packm_init_pack() does everything // except checkout a memory pool block and save its address to the obj_t's. // However, it does overwrite the buffer field of packed object with that of // the source object (as a side-effect of bli_obj_alias_to(); that buffer // field would normally be overwritten yet again by the address from the // memory pool block). So, we have to save the buffer address that was // allocated so we can re-store it to the object afterward. void* buf_ap = bli_obj_buffer( &ap ); void* buf_bp = bli_obj_buffer( &bp ); bli_packm_init_pack( BLIS_INVERT_DIAG, BLIS_PACKED_ROW_PANELS, BLIS_PACK_FWD_IF_UPPER, BLIS_PACK_FWD_IF_LOWER, BLIS_MR, BLIS_KR, &a, &ap, cntx ); bli_packm_init_pack( BLIS_NO_INVERT_DIAG, BLIS_PACKED_COL_PANELS, BLIS_PACK_FWD_IF_UPPER, BLIS_PACK_FWD_IF_LOWER, BLIS_KR, BLIS_NR, &b, &bp, cntx ); bli_obj_set_buffer( buf_ap, &ap ); bli_obj_set_buffer( buf_bp, &bp ); // Set the diagonal offset of ap. if ( bli_is_lower( uploa ) ) { bli_obj_set_diag_offset( k, &ap ); } else { bli_obj_set_diag_offset( 0, &ap ); } // Set the uplo field of ap since the default for packed objects is // BLIS_DENSE, and the _make_subparts() routine needs this information // to know how to initialize the subpartitions. bli_obj_set_uplo( uploa, &ap ); // Pack the data from the source objects. bli_packm_blk_var1( &a, &ap, cntx, NULL, &BLIS_PACKM_SINGLE_THREADED ); bli_packm_blk_var1( &b, &bp, cntx, NULL, &BLIS_PACKM_SINGLE_THREADED ); // Create subpartitions from the a and b panels. bli_gemmtrsm_ukr_make_subparts( k, &ap, &bp, &a1xp, &a11p, &bx1p, &b11p ); // Set the uplo field of a11p since the default for packed objects is // BLIS_DENSE, and the _ukernel() wrapper needs this information to // know which set of micro-kernels (lower or upper) to choose from. bli_obj_set_uplo( uploa, &a11p ); #if 0 bli_printm( "a", &a, "%5.2f", "" ); bli_printm( "ap", &ap, "%5.2f", "" ); #endif // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copym( &c11_save, &c11 ); // Re-pack (restore) the contents of b to bp. //bli_packm_blk_var1( &b, &bp, &cntx, cntl_b, &BLIS_PACKM_SINGLE_THREADED ); bli_packm_blk_var1( &b, &bp, cntx, NULL, &BLIS_PACKM_SINGLE_THREADED ); time = bli_clock(); libblis_test_gemmtrsm_ukr_impl( iface, side, &alpha, &a1xp, &a11p, &bx1p, &b11p, &c11, cntx ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 2.0 * m * n * k + 1.0 * m * m * n ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &b ) ) *perf *= 4.0; // A hack to support subconfigs such as power9, which duplicate/broadcast // more than one stored element per logical element in the packed copy of // B. We assume that the ratio ldbp/n gives us the duplication factor used // within B while the ratio ldap/m gives us the duplication factor used // within A (not entirely a safe assumption, though I think it holds for // all gemm ukernels currently supported within BLIS). This duplication // factor must be used as the column stride of B (or the row stride of A) // in order for the bli_gemmv() operation (called within the // libblis_test_gemmtrsm_ukr_check()) to operate properly. if ( ldbp / n > 1 ) { const dim_t bfac = ldbp / n; bli_obj_set_col_stride( bfac, &b11p ); bli_obj_set_col_stride( bfac, &bx1p ); } if ( ldap / m > 1 ) { const dim_t bfac = ldap / m; bli_obj_set_row_stride( bfac, &a11p ); bli_obj_set_row_stride( bfac, &a1xp ); } // Perform checks. libblis_test_gemmtrsm_ukr_check( params, side, &alpha, &a1xp, &a11p, &bx1p, &b11p, &c11, &c11_save, resid ); // Zero out performance and residual if output matrix is empty. //libblis_test_check_empty_problem( &c11, perf, resid ); #if 0 // Free the control tree nodes and release their cached mem_t entries // back to the memory broker. bli_cntl_free( cntl_a, &BLIS_PACKM_SINGLE_THREADED ); bli_cntl_free( cntl_b, &BLIS_PACKM_SINGLE_THREADED ); #endif // Free the packed objects. bli_obj_free( &ap ); bli_obj_free( &bp ); // Free the test objects. bli_obj_free( &a_big ); bli_obj_free( &b ); bli_obj_free( &c11 ); bli_obj_free( &c11_save ); } void libblis_test_gemmtrsm_ukr_impl ( iface_t iface, side_t side, obj_t* alpha, obj_t* a1x, obj_t* a11, obj_t* bx1, obj_t* b11, obj_t* c11, cntx_t* cntx ) { switch ( iface ) { case BLIS_TEST_SEQ_UKERNEL: bli_gemmtrsm_ukernel( alpha, a1x, a11, bx1, b11, c11, cntx ); break; default: libblis_test_printf_error( "Invalid interface type.\n" ); } } void libblis_test_gemmtrsm_ukr_check ( test_params_t* params, side_t side, obj_t* alpha, obj_t* a1x, obj_t* a11, obj_t* bx1, obj_t* b11, obj_t* c11, obj_t* c11_orig, double* resid ) { num_t dt = bli_obj_dt( b11 ); num_t dt_real = bli_obj_dt_proj_to_real( b11 ); dim_t m = bli_obj_length( b11 ); dim_t n = bli_obj_width( b11 ); dim_t k = bli_obj_width( a1x ); obj_t norm; obj_t t, v, w, z; double junk; // // Pre-conditions: // - a1x, a11, bx1, c11_orig are randomized; a11 is triangular. // - contents of b11 == contents of c11. // - side == BLIS_LEFT. // // Under these conditions, we assume that the implementation for // // B := inv(A11) * ( alpha * B11 - A1x * Bx1 ) (side = left) // // is functioning correctly if // // normfv( v - z ) // // is negligible, where // // v = B11 * t // // z = ( inv(A11) * ( alpha * B11_orig - A1x * Bx1 ) ) * t // = inv(A11) * ( alpha * B11_orig * t - A1x * Bx1 * t ) // = inv(A11) * ( alpha * B11_orig * t - A1x * w ) // bli_obj_scalar_init_detached( dt_real, &norm ); if ( bli_is_left( side ) ) { bli_obj_create( dt, n, 1, 0, 0, &t ); bli_obj_create( dt, m, 1, 0, 0, &v ); bli_obj_create( dt, k, 1, 0, 0, &w ); bli_obj_create( dt, m, 1, 0, 0, &z ); } else // else if ( bli_is_left( side ) ) { // BLIS does not currently support right-side micro-kernels. bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); } libblis_test_vobj_randomize( params, TRUE, &t ); bli_gemv( &BLIS_ONE, b11, &t, &BLIS_ZERO, &v ); #if 0 bli_printm( "a11", a11, "%5.2f", "" ); #endif // Restore the diagonal of a11 to its original, un-inverted state // (needed for trsv). bli_invertd( a11 ); if ( bli_is_left( side ) ) { bli_gemv( &BLIS_ONE, bx1, &t, &BLIS_ZERO, &w ); bli_gemv( alpha, c11_orig, &t, &BLIS_ZERO, &z ); bli_gemv( &BLIS_MINUS_ONE, a1x, &w, &BLIS_ONE, &z ); bli_trsv( &BLIS_ONE, a11, &z ); } else // else if ( bli_is_left( side ) ) { // BLIS does not currently support right-side micro-kernels. bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); } bli_subv( &z, &v ); bli_normfv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &t ); bli_obj_free( &v ); bli_obj_free( &w ); bli_obj_free( &z ); } void bli_gemmtrsm_ukr_make_subparts ( dim_t k, obj_t* a, obj_t* b, obj_t* a1x, obj_t* a11, obj_t* bx1, obj_t* b11 ) { dim_t mr = bli_obj_length( a ); dim_t nr = bli_obj_width( b ); dim_t off_a1x, off_a11; dim_t off_bx1, off_b11; if ( bli_obj_is_lower( a ) ) { off_a1x = 0; off_a11 = k; off_bx1 = 0; off_b11 = k; } else { off_a1x = mr; off_a11 = 0; off_bx1 = mr; off_b11 = 0; } bli_obj_init_subpart_from( a, a1x ); bli_obj_set_dims( mr, k, a1x ); bli_obj_inc_offs( 0, off_a1x, a1x ); bli_obj_init_subpart_from( a, a11 ); bli_obj_set_dims( mr, mr, a11 ); bli_obj_inc_offs( 0, off_a11, a11 ); bli_obj_init_subpart_from( b, bx1 ); bli_obj_set_dims( k, nr, bx1 ); bli_obj_inc_offs( off_bx1, 0, bx1 ); bli_obj_init_subpart_from( b, b11 ); bli_obj_set_dims( mr, nr, b11 ); bli_obj_inc_offs( off_b11, 0, b11 ); // Mark a1x as having general structure (which overwrites the triangular // property it inherited from a). bli_obj_set_struc( BLIS_GENERAL, a1x ); // Set the diagonal offset of a11 to 0 (which overwrites the diagonal // offset value it inherited from a). bli_obj_set_diag_offset( 0, a11 ); } blis-0.6.1/testsuite/src/test_gemmtrsm_ukr.h000066400000000000000000000035071360743507500212320ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void libblis_test_gemmtrsm_ukr ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); blis-0.6.1/testsuite/src/test_gemv.c000066400000000000000000000234651360743507500174540ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "test_libblis.h" // Static variables. static char* op_str = "gemv"; static char* o_types = "mvv"; // a x y static char* p_types = "hc"; // transa conjx static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-13, 1e-14 }, // warn, pass for d { 1e-13, 1e-14 } }; // warn, pass for z // Local prototypes. void libblis_test_gemv_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); void libblis_test_gemv_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ); void libblis_test_gemv_impl ( iface_t iface, obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y ); void libblis_test_gemv_check ( test_params_t* params, obj_t* kappa, obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y, obj_t* y_orig, double* resid ); void libblis_test_gemv_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { libblis_test_randv( tdata, params, &(op->ops->randv) ); libblis_test_normfv( tdata, params, &(op->ops->normfv) ); libblis_test_subv( tdata, params, &(op->ops->subv) ); libblis_test_copyv( tdata, params, &(op->ops->copyv) ); libblis_test_scalv( tdata, params, &(op->ops->scalv) ); } void libblis_test_gemv ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || libblis_test_l2_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_gemv_deps( tdata, params, op ); // Execute the test driver for each implementation requested. //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( tdata, params, op, BLIS_TEST_SEQ_FRONT_END, op_str, p_types, o_types, thresh, libblis_test_gemv_experiment ); } } void libblis_test_gemv_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; num_t datatype; dim_t m, n; trans_t transa; conj_t conjx; obj_t kappa; obj_t alpha, a, x, beta, y; obj_t y_save; // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); // Map the dimension specifier to actual dimensions. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); n = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_trans( pc_str[0], &transa ); bli_param_map_char_to_blis_conj( pc_str[1], &conjx ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &kappa ); bli_obj_scalar_init_detached( datatype, &alpha ); bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, transa, sc_str[0], m, n, &a ); libblis_test_vobj_create( params, datatype, sc_str[1], n, &x ); libblis_test_vobj_create( params, datatype, sc_str[2], m, &y ); libblis_test_vobj_create( params, datatype, sc_str[2], m, &y_save ); // Set alpha and beta. if ( bli_obj_is_real( &y ) ) { bli_setsc( 2.0, 0.0, &alpha ); bli_setsc( -1.0, 0.0, &beta ); } else { bli_setsc( 1.0, 2.0, &alpha ); bli_setsc( 1.0, -1.0, &beta ); } // Initialize diagonal of matrix A. bli_setsc( 2.0, -1.0, &kappa ); bli_setm( &BLIS_ZERO, &a ); bli_setd( &kappa, &a ); // Randomize x and y, and save y. libblis_test_vobj_randomize( params, TRUE, &x ); libblis_test_vobj_randomize( params, TRUE, &y ); bli_copyv( &y, &y_save ); // Apply the parameters. bli_obj_set_conjtrans( transa, &a ); bli_obj_set_conj( conjx, &x ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copym( &y_save, &y ); time = bli_clock(); libblis_test_gemv_impl( iface, &alpha, &a, &x, &beta, &y ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 2.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &y ) ) *perf *= 4.0; // Perform checks. libblis_test_gemv_check( params, &kappa, &alpha, &a, &x, &beta, &y, &y_save, resid ); // Zero out performance and residual if output vector is empty. libblis_test_check_empty_problem( &y, perf, resid ); // Free the test objects. bli_obj_free( &a ); bli_obj_free( &x ); bli_obj_free( &y ); bli_obj_free( &y_save ); } void libblis_test_gemv_impl ( iface_t iface, obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: bli_gemv( alpha, a, x, beta, y ); break; default: libblis_test_printf_error( "Invalid interface type.\n" ); } } void libblis_test_gemv_check ( test_params_t* params, obj_t* kappa, obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y, obj_t* y_orig, double* resid ) { num_t dt = bli_obj_dt( y ); num_t dt_real = bli_obj_dt_proj_to_real( y ); conj_t conja = bli_obj_conj_status( a ); dim_t n_x = bli_obj_vector_dim( x ); dim_t m_y = bli_obj_vector_dim( y ); dim_t min_m_n = bli_min( m_y, n_x ); obj_t x_temp, y_temp; obj_t kappac, norm; obj_t xT_temp, yT_temp, yT; double junk; // // Pre-conditions: // - a is initialized to kappa along the diagonal. // - x is randomized. // - y_orig is randomized. // Note: // - alpha, beta, and kappa should have non-zero imaginary components in // the complex cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // y := beta * y_orig + alpha * transa(A) * conjx(x) // // is functioning correctly if // // normfv( y - z ) // // is negligible, where // // z = beta * y_orig + alpha * conja(kappa) * x // bli_obj_scalar_init_detached_copy_of( dt, conja, kappa, &kappac ); bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, n_x, 1, 0, 0, &x_temp ); bli_obj_create( dt, m_y, 1, 0, 0, &y_temp ); bli_copyv( x, &x_temp ); bli_copyv( y_orig, &y_temp ); bli_acquire_vpart_f2b( BLIS_SUBPART1, 0, min_m_n, &x_temp, &xT_temp ); bli_acquire_vpart_f2b( BLIS_SUBPART1, 0, min_m_n, &y_temp, &yT_temp ); bli_acquire_vpart_f2b( BLIS_SUBPART1, 0, min_m_n, y, &yT ); bli_scalv( &kappac, &xT_temp ); bli_scalv( beta, &yT_temp ); bli_axpyv( alpha, &xT_temp, &yT_temp ); bli_subv( &yT_temp, &yT ); bli_normfv( &yT, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &x_temp ); bli_obj_free( &y_temp ); } blis-0.6.1/testsuite/src/test_gemv.h000066400000000000000000000034771360743507500174620ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void libblis_test_gemv ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); blis-0.6.1/testsuite/src/test_ger.c000066400000000000000000000222671360743507500172720ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "test_libblis.h" // Static variables. static char* op_str = "ger"; static char* o_types = "vvm"; // x y a static char* p_types = "cc"; // transa conjx static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-13, 1e-14 }, // warn, pass for d { 1e-13, 1e-14 } }; // warn, pass for z // Local prototypes. void libblis_test_ger_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); void libblis_test_ger_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ); void libblis_test_ger_impl ( iface_t iface, obj_t* alpha, obj_t* x, obj_t* y, obj_t* a ); void libblis_test_ger_check ( test_params_t* params, obj_t* alpha, obj_t* x, obj_t* y, obj_t* a, obj_t* a_orig, double* resid ); void libblis_test_ger_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { libblis_test_randv( tdata, params, &(op->ops->randv) ); libblis_test_normfv( tdata, params, &(op->ops->normfv) ); libblis_test_subv( tdata, params, &(op->ops->subv) ); libblis_test_scal2v( tdata, params, &(op->ops->scal2v) ); libblis_test_dotv( tdata, params, &(op->ops->dotv) ); libblis_test_gemv( tdata, params, &(op->ops->gemv) ); } void libblis_test_ger ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || libblis_test_l2_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_ger_deps( tdata, params, op ); // Execute the test driver for each implementation requested. //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( tdata, params, op, BLIS_TEST_SEQ_FRONT_END, op_str, p_types, o_types, thresh, libblis_test_ger_experiment ); } } void libblis_test_ger_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; num_t datatype; dim_t m, n; conj_t conjx, conjy; obj_t alpha, x, y, a; obj_t a_save; // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); // Map the dimension specifier to actual dimensions. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); n = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_conj( pc_str[0], &conjx ); bli_param_map_char_to_blis_conj( pc_str[1], &conjy ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); libblis_test_vobj_create( params, datatype, sc_str[1], n, &y ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[2], m, n, &a ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[2], m, n, &a_save ); // Set alpha. if ( bli_obj_is_real( &a ) ) bli_setsc( -1.0, 1.0, &alpha ); else bli_setsc( -1.0, 1.0, &alpha ); // Randomize x and y. libblis_test_vobj_randomize( params, TRUE, &x ); libblis_test_vobj_randomize( params, TRUE, &y ); // Initialize A to identity and save. bli_setm( &BLIS_ZERO, &a ); bli_setd( &BLIS_ONE, &a ); bli_copym( &a, &a_save ); // Apply the parameters. bli_obj_set_conj( conjx, &x ); bli_obj_set_conj( conjy, &y ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copym( &a_save, &a ); time = bli_clock(); libblis_test_ger_impl( iface, &alpha, &x, &y, &a ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 2.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &a ) ) *perf *= 4.0; // Perform checks. libblis_test_ger_check( params, &alpha, &x, &y, &a, &a_save, resid ); // Zero out performance and residual if output matrix is empty. libblis_test_check_empty_problem( &a, perf, resid ); // Free the test objects. bli_obj_free( &x ); bli_obj_free( &y ); bli_obj_free( &a ); bli_obj_free( &a_save ); } void libblis_test_ger_impl ( iface_t iface, obj_t* alpha, obj_t* x, obj_t* y, obj_t* a ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: bli_ger( alpha, x, y, a ); break; default: libblis_test_printf_error( "Invalid interface type.\n" ); } } void libblis_test_ger_check ( test_params_t* params, obj_t* alpha, obj_t* x, obj_t* y, obj_t* a, obj_t* a_orig, double* resid ) { num_t dt = bli_obj_dt( a ); num_t dt_real = bli_obj_dt_proj_to_real( a ); dim_t m_a = bli_obj_length( a ); dim_t n_a = bli_obj_width( a ); obj_t t, v, w; obj_t rho, norm; double junk; // // Pre-conditions: // - x is randomized. // - y is randomized. // - a is identity. // Note: // - alpha should have a non-zero imaginary component in the // complex cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // A := A_orig + alpha * conjx(x) * conjy(y) // // is functioning correctly if // // normfv( v - w ) // // is negligible, where // // v = A * t // w = ( A_orig + alpha * conjx(x) * conjy(y)^T ) * t // = A_orig * t + alpha * conjx(x) * conjy(y)^T * t // = A_orig * t + alpha * conjx(x) * rho // = A_orig * t + w // bli_obj_scalar_init_detached( dt, &rho ); bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, n_a, 1, 0, 0, &t ); bli_obj_create( dt, m_a, 1, 0, 0, &v ); bli_obj_create( dt, m_a, 1, 0, 0, &w ); libblis_test_vobj_randomize( params, TRUE, &t ); bli_gemv( &BLIS_ONE, a, &t, &BLIS_ZERO, &v ); bli_dotv( y, &t, &rho ); bli_mulsc( alpha, &rho ); bli_scal2v( &rho, x, &w ); bli_gemv( &BLIS_ONE, a_orig, &t, &BLIS_ONE, &w ); bli_subv( &w, &v ); bli_normfv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &t ); bli_obj_free( &v ); bli_obj_free( &w ); } blis-0.6.1/testsuite/src/test_ger.h000066400000000000000000000034761360743507500173000ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void libblis_test_ger ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); blis-0.6.1/testsuite/src/test_hemm.c000066400000000000000000000264611360743507500174430ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "test_libblis.h" // Static variables. static char* op_str = "hemm"; static char* o_types = "mmm"; // a b c static char* p_types = "such"; // side uploa conja transb static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-13, 1e-14 }, // warn, pass for d { 1e-13, 1e-14 } }; // warn, pass for z // Local prototypes. void libblis_test_hemm_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); void libblis_test_hemm_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ); void libblis_test_hemm_impl ( iface_t iface, side_t side, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c ); void libblis_test_hemm_check ( test_params_t* params, side_t side, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, obj_t* c_orig, double* resid ); void libblis_test_hemm_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { libblis_test_randv( tdata, params, &(op->ops->randv) ); libblis_test_randm( tdata, params, &(op->ops->randm) ); libblis_test_setv( tdata, params, &(op->ops->setv) ); libblis_test_normfv( tdata, params, &(op->ops->normfv) ); libblis_test_subv( tdata, params, &(op->ops->subv) ); libblis_test_scalv( tdata, params, &(op->ops->scalv) ); libblis_test_copym( tdata, params, &(op->ops->copym) ); libblis_test_scalm( tdata, params, &(op->ops->scalm) ); libblis_test_gemv( tdata, params, &(op->ops->gemv) ); libblis_test_hemv( tdata, params, &(op->ops->hemv) ); } void libblis_test_hemm ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || libblis_test_l3_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_hemm_deps( tdata, params, op ); // Execute the test driver for each implementation requested. //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( tdata, params, op, BLIS_TEST_SEQ_FRONT_END, op_str, p_types, o_types, thresh, libblis_test_hemm_experiment ); } } void libblis_test_hemm_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; num_t datatype; dim_t m, n; dim_t mn_side; side_t side; uplo_t uploa; conj_t conja; trans_t transb; obj_t alpha, a, b, beta, c; obj_t c_save; // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); // Map the dimension specifier to actual dimensions. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); n = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_side( pc_str[0], &side ); bli_param_map_char_to_blis_uplo( pc_str[1], &uploa ); bli_param_map_char_to_blis_conj( pc_str[2], &conja ); bli_param_map_char_to_blis_trans( pc_str[3], &transb ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha ); bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). bli_set_dim_with_side( side, m, n, &mn_side ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[1], mn_side, mn_side, &a ); libblis_test_mobj_create( params, datatype, transb, sc_str[2], m, n, &b ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, n, &c ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, n, &c_save ); // Set alpha and beta. if ( bli_obj_is_real( &c ) ) { bli_setsc( 1.2, 0.0, &alpha ); bli_setsc( -1.0, 0.0, &beta ); } else { bli_setsc( 1.2, 0.8, &alpha ); bli_setsc( -1.0, 1.0, &beta ); } // Set the structure and uplo properties of A. bli_obj_set_struc( BLIS_HERMITIAN, &a ); bli_obj_set_uplo( uploa, &a ); // Randomize A, make it densely Hermitian, and zero the unstored triangle // to ensure the implementation reads only from the stored region. libblis_test_mobj_randomize( params, TRUE, &a ); bli_mkherm( &a ); bli_mktrim( &a ); // Randomize B and C, and save C. libblis_test_mobj_randomize( params, TRUE, &b ); libblis_test_mobj_randomize( params, TRUE, &c ); bli_copym( &c, &c_save ); // Apply the remaining parameters. bli_obj_set_conj( conja, &a ); bli_obj_set_conjtrans( transb, &b ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copym( &c_save, &c ); time = bli_clock(); libblis_test_hemm_impl( iface, side, &alpha, &a, &b, &beta, &c ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 2.0 * mn_side * m * n ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &c ) ) *perf *= 4.0; // Perform checks. libblis_test_hemm_check( params, side, &alpha, &a, &b, &beta, &c, &c_save, resid ); // Zero out performance and residual if output matrix is empty. libblis_test_check_empty_problem( &c, perf, resid ); // Free the test objects. bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); } void libblis_test_hemm_impl ( iface_t iface, side_t side, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: bli_hemm( side, alpha, a, b, beta, c ); //bli_hemm4m( side, alpha, a, b, beta, c ); //bli_hemm3m( side, alpha, a, b, beta, c ); break; default: libblis_test_printf_error( "Invalid interface type.\n" ); } } void libblis_test_hemm_check ( test_params_t* params, side_t side, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, obj_t* c_orig, double* resid ) { num_t dt = bli_obj_dt( c ); num_t dt_real = bli_obj_dt_proj_to_real( c ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); obj_t norm; obj_t t, v, w, z; double junk; // // Pre-conditions: // - a is randomized and Hermitian. // - b is randomized. // - c_orig is randomized. // Note: // - alpha and beta should have non-zero imaginary components in the // complex cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // C := beta * C_orig + alpha * conja(A) * transb(B) (side = left) // C := beta * C_orig + alpha * transb(B) * conja(A) (side = right) // // is functioning correctly if // // normfv( v - z ) // // is negligible, where // // v = C * t // // z = ( beta * C_orig + alpha * conja(A) * transb(B) ) * t (side = left) // = beta * C_orig * t + alpha * conja(A) * transb(B) * t // = beta * C_orig * t + alpha * conja(A) * w // = beta * C_orig * t + z // // z = ( beta * C_orig + alpha * transb(B) * conja(A) ) * t (side = right) // = beta * C_orig * t + alpha * transb(B) * conja(A) * t // = beta * C_orig * t + alpha * transb(B) * w // = beta * C_orig * t + z bli_obj_scalar_init_detached( dt_real, &norm ); if ( bli_is_left( side ) ) { bli_obj_create( dt, n, 1, 0, 0, &t ); bli_obj_create( dt, m, 1, 0, 0, &v ); bli_obj_create( dt, m, 1, 0, 0, &w ); bli_obj_create( dt, m, 1, 0, 0, &z ); } else // else if ( bli_is_right( side ) ) { bli_obj_create( dt, n, 1, 0, 0, &t ); bli_obj_create( dt, m, 1, 0, 0, &v ); bli_obj_create( dt, n, 1, 0, 0, &w ); bli_obj_create( dt, m, 1, 0, 0, &z ); } libblis_test_vobj_randomize( params, TRUE, &t ); bli_gemv( &BLIS_ONE, c, &t, &BLIS_ZERO, &v ); if ( bli_is_left( side ) ) { bli_gemv( &BLIS_ONE, b, &t, &BLIS_ZERO, &w ); bli_hemv( alpha, a, &w, &BLIS_ZERO, &z ); } else // else if ( bli_is_right( side ) ) { bli_hemv( &BLIS_ONE, a, &t, &BLIS_ZERO, &w ); bli_gemv( alpha, b, &w, &BLIS_ZERO, &z ); } bli_gemv( beta, c_orig, &t, &BLIS_ONE, &z ); bli_subv( &z, &v ); bli_normfv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &t ); bli_obj_free( &v ); bli_obj_free( &w ); bli_obj_free( &z ); } blis-0.6.1/testsuite/src/test_hemm.h000066400000000000000000000034771360743507500174520ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void libblis_test_hemm ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); blis-0.6.1/testsuite/src/test_hemv.c000066400000000000000000000226151360743507500174510ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "test_libblis.h" // Static variables. static char* op_str = "hemv"; static char* o_types = "mvv"; // a x y static char* p_types = "ucc"; // uploa conja conjx static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-13, 1e-14 }, // warn, pass for d { 1e-13, 1e-14 } }; // warn, pass for z // Local prototypes. void libblis_test_hemv_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); void libblis_test_hemv_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ); void libblis_test_hemv_impl ( iface_t iface, obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y ); void libblis_test_hemv_check ( test_params_t* params, obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y, obj_t* y_orig, double* resid ); void libblis_test_hemv_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { libblis_test_randv( tdata, params, &(op->ops->randv) ); libblis_test_randm( tdata, params, &(op->ops->randm) ); libblis_test_normfv( tdata, params, &(op->ops->normfv) ); libblis_test_subv( tdata, params, &(op->ops->subv) ); libblis_test_copyv( tdata, params, &(op->ops->copyv) ); libblis_test_scalv( tdata, params, &(op->ops->scalv) ); libblis_test_gemv( tdata, params, &(op->ops->gemv) ); } void libblis_test_hemv ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || libblis_test_l2_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_hemv_deps( tdata, params, op ); // Execute the test driver for each implementation requested. //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( tdata, params, op, BLIS_TEST_SEQ_FRONT_END, op_str, p_types, o_types, thresh, libblis_test_hemv_experiment ); } } void libblis_test_hemv_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; num_t datatype; dim_t m; uplo_t uploa; conj_t conja; conj_t conjx; obj_t alpha, a, x, beta, y; obj_t y_save; // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); // Map the dimension specifier to an actual dimension. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_uplo( pc_str[0], &uploa ); bli_param_map_char_to_blis_conj( pc_str[1], &conja ); bli_param_map_char_to_blis_conj( pc_str[2], &conjx ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha ); bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, m, &a ); libblis_test_vobj_create( params, datatype, sc_str[1], m, &x ); libblis_test_vobj_create( params, datatype, sc_str[2], m, &y ); libblis_test_vobj_create( params, datatype, sc_str[2], m, &y_save ); // Set alpha and beta. if ( bli_obj_is_real( &y ) ) { bli_setsc( 1.0, 0.0, &alpha ); bli_setsc( -1.0, 0.0, &beta ); } else { bli_setsc( 0.5, 0.5, &alpha ); bli_setsc( -0.5, 0.5, &beta ); } // Set the structure and uplo properties of A. bli_obj_set_struc( BLIS_HERMITIAN, &a ); bli_obj_set_uplo( uploa, &a ); // Randomize A, make it densely Hermitian, and zero the unstored triangle // to ensure the implementation reads only from the stored region. libblis_test_mobj_randomize( params, TRUE, &a ); bli_mkherm( &a ); bli_mktrim( &a ); // Randomize x and y, and save y. libblis_test_vobj_randomize( params, TRUE, &x ); libblis_test_vobj_randomize( params, TRUE, &y ); bli_copyv( &y, &y_save ); // Apply the remaining parameters. bli_obj_set_conj( conja, &a ); bli_obj_set_conj( conjx, &x ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copym( &y_save, &y ); time = bli_clock(); libblis_test_hemv_impl( iface, &alpha, &a, &x, &beta, &y ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 1.0 * m * m ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &y ) ) *perf *= 4.0; // Perform checks. libblis_test_hemv_check( params, &alpha, &a, &x, &beta, &y, &y_save, resid ); // Zero out performance and residual if output vector is empty. libblis_test_check_empty_problem( &y, perf, resid ); // Free the test objects. bli_obj_free( &a ); bli_obj_free( &x ); bli_obj_free( &y ); bli_obj_free( &y_save ); } void libblis_test_hemv_impl ( iface_t iface, obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: bli_hemv( alpha, a, x, beta, y ); break; default: libblis_test_printf_error( "Invalid interface type.\n" ); } } void libblis_test_hemv_check ( test_params_t* params, obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y, obj_t* y_orig, double* resid ) { num_t dt = bli_obj_dt( y ); num_t dt_real = bli_obj_dt_proj_to_real( y ); dim_t m = bli_obj_vector_dim( y ); obj_t v; obj_t norm; double junk; // // Pre-conditions: // - a is randomized and Hermitian. // - x is randomized. // - y_orig is randomized. // Note: // - alpha and beta should have non-zero imaginary components in the // complex cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // y := beta * y_orig + alpha * conja(A) * conjx(x) // // is functioning correctly if // // normfv( y - v ) // // is negligible, where // // v = beta * y_orig + alpha * conja(A_dense) * x // bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m, 1, 0, 0, &v ); bli_copyv( y_orig, &v ); bli_mkherm( a ); bli_obj_set_struc( BLIS_GENERAL, a ); bli_obj_set_uplo( BLIS_DENSE, a ); bli_gemv( alpha, a, x, beta, &v ); bli_subv( &v, y ); bli_normfv( y, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &v ); } blis-0.6.1/testsuite/src/test_hemv.h000066400000000000000000000034771360743507500174630ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void libblis_test_hemv ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); blis-0.6.1/testsuite/src/test_her.c000066400000000000000000000226521360743507500172710ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "test_libblis.h" // Static variables. static char* op_str = "her"; static char* o_types = "vm"; // x a static char* p_types = "uc"; // uploa conjx static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-13, 1e-14 }, // warn, pass for d { 1e-13, 1e-14 } }; // warn, pass for z // Local prototypes. void libblis_test_her_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); void libblis_test_her_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ); void libblis_test_her_impl ( iface_t iface, obj_t* alpha, obj_t* x, obj_t* a ); void libblis_test_her_check ( test_params_t* params, obj_t* alpha, obj_t* x, obj_t* a, obj_t* a_orig, double* resid ); void libblis_test_her_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { libblis_test_randv( tdata, params, &(op->ops->randv) ); libblis_test_randm( tdata, params, &(op->ops->randm) ); libblis_test_normfv( tdata, params, &(op->ops->normfv) ); libblis_test_subv( tdata, params, &(op->ops->subv) ); libblis_test_copym( tdata, params, &(op->ops->copym) ); libblis_test_scal2v( tdata, params, &(op->ops->scal2v) ); libblis_test_dotv( tdata, params, &(op->ops->dotv) ); libblis_test_gemv( tdata, params, &(op->ops->gemv) ); } void libblis_test_her ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || libblis_test_l2_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_her_deps( tdata, params, op ); // Execute the test driver for each implementation requested. //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( tdata, params, op, BLIS_TEST_SEQ_FRONT_END, op_str, p_types, o_types, thresh, libblis_test_her_experiment ); } } void libblis_test_her_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; num_t datatype; dim_t m; uplo_t uploa; conj_t conjx; obj_t alpha, x, a; obj_t a_save; // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); // Map the dimension specifier to an actual dimension. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_uplo( pc_str[0], &uploa ); bli_param_map_char_to_blis_conj( pc_str[1], &conjx ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[1], m, m, &a ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[1], m, m, &a_save ); // Set alpha. //bli_copysc( &BLIS_MINUS_ONE, &alpha ); bli_setsc( -1.0, 0.0, &alpha ); // Randomize x. libblis_test_vobj_randomize( params, TRUE, &x ); // Set the structure and uplo properties of A. bli_obj_set_struc( BLIS_HERMITIAN, &a ); bli_obj_set_uplo( uploa, &a ); // Randomize A, make it densely Hermitian, and zero the unstored triangle // to ensure the implementation is reads only from the stored region. libblis_test_mobj_randomize( params, TRUE, &a ); bli_mkherm( &a ); bli_mktrim( &a ); // Save A and set its structure and uplo properties. bli_obj_set_struc( BLIS_HERMITIAN, &a_save ); bli_obj_set_uplo( uploa, &a_save ); bli_copym( &a, &a_save ); // Apply the remaining parameters. bli_obj_set_conj( conjx, &x ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copym( &a_save, &a ); time = bli_clock(); libblis_test_her_impl( iface, &alpha, &x, &a ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 1.0 * m * m ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &a ) ) *perf *= 4.0; // Perform checks. libblis_test_her_check( params, &alpha, &x, &a, &a_save, resid ); // Zero out performance and residual if output matrix is empty. libblis_test_check_empty_problem( &a, perf, resid ); // Free the test objects. bli_obj_free( &x ); bli_obj_free( &a ); bli_obj_free( &a_save ); } void libblis_test_her_impl ( iface_t iface, obj_t* alpha, obj_t* x, obj_t* a ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: bli_her( alpha, x, a ); break; default: libblis_test_printf_error( "Invalid interface type.\n" ); } } void libblis_test_her_check ( test_params_t* params, obj_t* alpha, obj_t* x, obj_t* a, obj_t* a_orig, double* resid ) { num_t dt = bli_obj_dt( a ); num_t dt_real = bli_obj_dt_proj_to_real( a ); dim_t m_a = bli_obj_length( a ); obj_t xh, t, v, w; obj_t rho, norm; double junk; // // Pre-conditions: // - x is randomized. // - a is randomized and Hermitian. // Note: // - alpha must be real-valued. // // Under these conditions, we assume that the implementation for // // A := A_orig + alpha * conjx(x) * conjx(x)^H // // is functioning correctly if // // normfv( v - w ) // // is negligible, where // // v = A * t // w = ( A_orig + alpha * conjx(x) * conjx(x)^H ) * t // = A_orig * t + alpha * conjx(x) * conjx(x)^H * t // = A_orig * t + alpha * conjx(x) * rho // = A_orig * t + w // bli_mkherm( a ); bli_mkherm( a_orig ); bli_obj_set_struc( BLIS_GENERAL, a ); bli_obj_set_struc( BLIS_GENERAL, a_orig ); bli_obj_set_uplo( BLIS_DENSE, a ); bli_obj_set_uplo( BLIS_DENSE, a_orig ); bli_obj_scalar_init_detached( dt, &rho ); bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m_a, 1, 0, 0, &t ); bli_obj_create( dt, m_a, 1, 0, 0, &v ); bli_obj_create( dt, m_a, 1, 0, 0, &w ); bli_obj_alias_with_conj( BLIS_CONJUGATE, x, &xh ); libblis_test_vobj_randomize( params, TRUE, &t ); bli_gemv( &BLIS_ONE, a, &t, &BLIS_ZERO, &v ); bli_dotv( &xh, &t, &rho ); bli_mulsc( alpha, &rho ); bli_scal2v( &rho, x, &w ); bli_gemv( &BLIS_ONE, a_orig, &t, &BLIS_ONE, &w ); bli_subv( &w, &v ); bli_normfv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &t ); bli_obj_free( &v ); bli_obj_free( &w ); } blis-0.6.1/testsuite/src/test_her.h000066400000000000000000000034761360743507500173010ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void libblis_test_her ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); blis-0.6.1/testsuite/src/test_her2.c000066400000000000000000000247151360743507500173550ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "test_libblis.h" // Static variables. static char* op_str = "her2"; static char* o_types = "vvm"; // x y a static char* p_types = "ucc"; // uploa conjx conjy static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-13, 1e-14 }, // warn, pass for d { 1e-13, 1e-14 } }; // warn, pass for z // Local prototypes. void libblis_test_her2_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); void libblis_test_her2_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ); void libblis_test_her2_impl ( iface_t iface, obj_t* alpha, obj_t* x, obj_t* y, obj_t* a ); void libblis_test_her2_check ( test_params_t* params, obj_t* alpha, obj_t* x, obj_t* y, obj_t* a, obj_t* a_orig, double* resid ); void libblis_test_her2_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { libblis_test_randv( tdata, params, &(op->ops->randv) ); libblis_test_randm( tdata, params, &(op->ops->randm) ); libblis_test_normfv( tdata, params, &(op->ops->normfv) ); libblis_test_subv( tdata, params, &(op->ops->subv) ); libblis_test_copym( tdata, params, &(op->ops->copym) ); libblis_test_scal2v( tdata, params, &(op->ops->scal2v) ); libblis_test_dotv( tdata, params, &(op->ops->dotv) ); libblis_test_gemv( tdata, params, &(op->ops->gemv) ); } void libblis_test_her2 ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || libblis_test_l2_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_her2_deps( tdata, params, op ); // Execute the test driver for each implementation requested. //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( tdata, params, op, BLIS_TEST_SEQ_FRONT_END, op_str, p_types, o_types, thresh, libblis_test_her2_experiment ); } } void libblis_test_her2_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; num_t datatype; dim_t m; uplo_t uploa; conj_t conjx, conjy; obj_t alpha, x, y, a; obj_t a_save; // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); // Map the dimension specifier to an actual dimension. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_uplo( pc_str[0], &uploa ); bli_param_map_char_to_blis_conj( pc_str[1], &conjx ); bli_param_map_char_to_blis_conj( pc_str[2], &conjy ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); libblis_test_vobj_create( params, datatype, sc_str[1], m, &y ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[2], m, m, &a ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[2], m, m, &a_save ); // Set alpha. //bli_copysc( &BLIS_MINUS_ONE, &alpha ); bli_setsc( -1.0, 1.0, &alpha ); // Randomize x and y. libblis_test_vobj_randomize( params, TRUE, &x ); libblis_test_vobj_randomize( params, TRUE, &y ); // Set the structure and uplo properties of A. bli_obj_set_struc( BLIS_HERMITIAN, &a ); bli_obj_set_uplo( uploa, &a ); // Randomize A, make it densely Hermitian, and zero the unstored triangle // to ensure the implementation is reads only from the stored region. libblis_test_mobj_randomize( params, TRUE, &a ); bli_mkherm( &a ); bli_mktrim( &a ); // Save A and set its structure and uplo properties. bli_obj_set_struc( BLIS_HERMITIAN, &a_save ); bli_obj_set_uplo( uploa, &a_save ); bli_copym( &a, &a_save ); // Apply the remaining parameters. bli_obj_set_conj( conjx, &x ); bli_obj_set_conj( conjy, &y ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copym( &a_save, &a ); time = bli_clock(); libblis_test_her2_impl( iface, &alpha, &x, &y, &a ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 2.0 * m * m ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &a ) ) *perf *= 4.0; // Perform checks. libblis_test_her2_check( params, &alpha, &x, &y, &a, &a_save, resid ); // Zero out performance and residual if output matrix is empty. libblis_test_check_empty_problem( &a, perf, resid ); // Free the test objects. bli_obj_free( &x ); bli_obj_free( &y ); bli_obj_free( &a ); bli_obj_free( &a_save ); } void libblis_test_her2_impl ( iface_t iface, obj_t* alpha, obj_t* x, obj_t* y, obj_t* a ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: bli_her2( alpha, x, y, a ); break; default: libblis_test_printf_error( "Invalid interface type.\n" ); } } void libblis_test_her2_check ( test_params_t* params, obj_t* alpha, obj_t* x, obj_t* y, obj_t* a, obj_t* a_orig, double* resid ) { num_t dt = bli_obj_dt( a ); num_t dt_real = bli_obj_dt_proj_to_real( a ); dim_t m_a = bli_obj_length( a ); obj_t xh, yh, alphac; obj_t t, v, w1, w2; obj_t rho, norm; double junk; // // Pre-conditions: // - x is randomized. // - y is randomized. // - a is randomized and Hermitian. // // Under these conditions, we assume that the implementation for // // A := A_orig + alpha * conjx(x) * conjy(y)^H + conj(alpha) * conjy(y) * conjx(x)^H // // is functioning correctly if // // normfv( v - w ) // // is negligible, where // // v = A * t // w = ( A_orig + alpha * conjx(x) * conjy(y)^H + conj(alpha) * conjy(y) * conjx(x)^H ) * t // = A_orig * t + alpha * conjx(x) * conjy(y)^H * t + conj(alpha) * conjy(y) * conjx(x)^H * t // = A_orig * t + alpha * conjx(x) * conjy(y)^H * t + conj(alpha) * conjy(y) * rho // = A_orig * t + alpha * conjx(x) * conjy(y)^H * t + w1 // = A_orig * t + alpha * conjx(x) * rho + w1 // = A_orig * t + w2 + w1 // bli_mkherm( a ); bli_mkherm( a_orig ); bli_obj_set_struc( BLIS_GENERAL, a ); bli_obj_set_struc( BLIS_GENERAL, a_orig ); bli_obj_set_uplo( BLIS_DENSE, a ); bli_obj_set_uplo( BLIS_DENSE, a_orig ); bli_obj_scalar_init_detached( dt, &rho ); bli_obj_scalar_init_detached( dt, &alphac ); bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m_a, 1, 0, 0, &t ); bli_obj_create( dt, m_a, 1, 0, 0, &v ); bli_obj_create( dt, m_a, 1, 0, 0, &w1 ); bli_obj_create( dt, m_a, 1, 0, 0, &w2 ); bli_obj_alias_with_conj( BLIS_CONJUGATE, x, &xh ); bli_obj_alias_with_conj( BLIS_CONJUGATE, y, &yh ); bli_obj_alias_with_conj( BLIS_CONJUGATE, alpha, &alphac ); libblis_test_vobj_randomize( params, TRUE, &t ); bli_gemv( &BLIS_ONE, a, &t, &BLIS_ZERO, &v ); bli_dotv( &xh, &t, &rho ); bli_mulsc( &alphac, &rho ); bli_scal2v( &rho, y, &w1 ); bli_dotv( &yh, &t, &rho ); bli_mulsc( alpha, &rho ); bli_scal2v( &rho, x, &w2 ); bli_addv( &w2, &w1 ); bli_gemv( &BLIS_ONE, a_orig, &t, &BLIS_ONE, &w1 ); bli_subv( &w1, &v ); bli_normfv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &t ); bli_obj_free( &v ); bli_obj_free( &w1 ); bli_obj_free( &w2 ); } blis-0.6.1/testsuite/src/test_her2.h000066400000000000000000000034771360743507500173640ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void libblis_test_her2 ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); blis-0.6.1/testsuite/src/test_her2k.c000066400000000000000000000263361360743507500175310ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "test_libblis.h" // Static variables. static char* op_str = "her2k"; static char* o_types = "mmm"; // a b c static char* p_types = "uhh"; // uploc transa transb static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-13, 1e-14 }, // warn, pass for d { 1e-13, 1e-14 } }; // warn, pass for z // Local prototypes. void libblis_test_her2k_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); void libblis_test_her2k_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ); void libblis_test_her2k_impl ( iface_t iface, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c ); void libblis_test_her2k_check ( test_params_t* params, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, obj_t* c_orig, double* resid ); void libblis_test_her2k_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { libblis_test_randv( tdata, params, &(op->ops->randv) ); libblis_test_randm( tdata, params, &(op->ops->randm) ); libblis_test_setv( tdata, params, &(op->ops->setv) ); libblis_test_normfv( tdata, params, &(op->ops->normfv) ); libblis_test_subv( tdata, params, &(op->ops->subv) ); libblis_test_scalv( tdata, params, &(op->ops->scalv) ); libblis_test_copym( tdata, params, &(op->ops->copym) ); libblis_test_scalm( tdata, params, &(op->ops->scalm) ); libblis_test_gemv( tdata, params, &(op->ops->gemv) ); libblis_test_hemv( tdata, params, &(op->ops->hemv) ); } void libblis_test_her2k ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || libblis_test_l3_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_her2k_deps( tdata, params, op ); // Execute the test driver for each implementation requested. //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( tdata, params, op, BLIS_TEST_SEQ_FRONT_END, op_str, p_types, o_types, thresh, libblis_test_her2k_experiment ); } } void libblis_test_her2k_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; num_t datatype; dim_t m, k; uplo_t uploc; trans_t transa, transb; obj_t alpha, a, b, beta, c; obj_t c_save; // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); // Map the dimension specifier to actual dimensions. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); k = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_uplo( pc_str[0], &uploc ); bli_param_map_char_to_blis_trans( pc_str[1], &transa ); bli_param_map_char_to_blis_trans( pc_str[2], &transb ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha ); bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, transa, sc_str[1], m, k, &a ); libblis_test_mobj_create( params, datatype, transb, sc_str[2], m, k, &b ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, m, &c ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, m, &c_save ); // Set alpha and beta. if ( bli_obj_is_real( &c ) ) { bli_setsc( 0.8, 0.0, &alpha ); bli_setsc( -1.0, 0.0, &beta ); } else { // For her2k, alpha may be complex, but beta must be real-valued // (in order to preserve the Hermitian structure of C). bli_setsc( 0.8, 0.5, &alpha ); bli_setsc( -1.0, 0.0, &beta ); } // Randomize A and B. libblis_test_mobj_randomize( params, TRUE, &a ); libblis_test_mobj_randomize( params, TRUE, &b ); // Set the structure and uplo properties of C. bli_obj_set_struc( BLIS_HERMITIAN, &c ); bli_obj_set_uplo( uploc, &c ); // Randomize A, make it densely Hermitian, and zero the unstored triangle // to ensure the implementation is reads only from the stored region. libblis_test_mobj_randomize( params, TRUE, &c ); bli_mkherm( &c ); bli_mktrim( &c ); // Save C and set its structure and uplo properties. bli_obj_set_struc( BLIS_HERMITIAN, &c_save ); bli_obj_set_uplo( uploc, &c_save ); bli_copym( &c, &c_save ); // Apply the remaining parameters. bli_obj_set_conjtrans( transa, &a ); bli_obj_set_conjtrans( transb, &b ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copym( &c_save, &c ); time = bli_clock(); libblis_test_her2k_impl( iface, &alpha, &a, &b, &beta, &c ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 2.0 * m * m * k ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &c ) ) *perf *= 4.0; // Perform checks. libblis_test_her2k_check( params, &alpha, &a, &b, &beta, &c, &c_save, resid ); // Zero out performance and residual if output matrix is empty. libblis_test_check_empty_problem( &c, perf, resid ); // Free the test objects. bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); } void libblis_test_her2k_impl ( iface_t iface, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: bli_her2k( alpha, a, b, beta, c ); //bli_her2k4m( alpha, a, b, beta, c ); //bli_her2k3m( alpha, a, b, beta, c ); break; default: libblis_test_printf_error( "Invalid interface type.\n" ); } } void libblis_test_her2k_check ( test_params_t* params, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, obj_t* c_orig, double* resid ) { num_t dt = bli_obj_dt( c ); num_t dt_real = bli_obj_dt_proj_to_real( c ); dim_t m = bli_obj_length( c ); dim_t k = bli_obj_width_after_trans( a ); obj_t alphac, ah, bh; obj_t norm; obj_t t, v, w1, w2, z; double junk; // // Pre-conditions: // - a is randomized. // - b is randomized. // - c_orig is randomized and Hermitian. // Note: // - alpha should have a non-zero imaginary component in the // complex cases in order to more fully exercise the implementation. // - beta must be real-valued. // // Under these conditions, we assume that the implementation for // // C := beta * C_orig + alpha * transa(A) * transb(B)^H + conj(alpha) * transb(B) * transa(A)^H // // is functioning correctly if // // normfv( v - z ) // // is negligible, where // // v = C * t // z = ( beta * C_orig + alpha * transa(A) * transb(B)^H + conj(alpha) * transb(B) * transa(A)^H ) * t // = beta * C_orig * t + alpha * transa(A) * transb(B)^H * t + conj(alpha) * transb(B) * transa(A)^H * t // = beta * C_orig * t + alpha * transa(A) * transb(B)^H * t + conj(alpha) * transb(B) * w2 // = beta * C_orig * t + alpha * transa(A) * w1 + conj(alpha) * transb(B) * w2 // = beta * C_orig * t + alpha * transa(A) * w1 + z // = beta * C_orig * t + z // bli_obj_alias_with_trans( BLIS_CONJ_TRANSPOSE, a, &ah ); bli_obj_alias_with_trans( BLIS_CONJ_TRANSPOSE, b, &bh ); bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_scalar_init_detached_copy_of( dt, BLIS_CONJUGATE, alpha, &alphac ); bli_obj_create( dt, m, 1, 0, 0, &t ); bli_obj_create( dt, m, 1, 0, 0, &v ); bli_obj_create( dt, k, 1, 0, 0, &w1 ); bli_obj_create( dt, k, 1, 0, 0, &w2 ); bli_obj_create( dt, m, 1, 0, 0, &z ); libblis_test_vobj_randomize( params, TRUE, &t ); bli_hemv( &BLIS_ONE, c, &t, &BLIS_ZERO, &v ); bli_gemv( &BLIS_ONE, &ah, &t, &BLIS_ZERO, &w2 ); bli_gemv( &BLIS_ONE, &bh, &t, &BLIS_ZERO, &w1 ); bli_gemv( alpha, a, &w1, &BLIS_ZERO, &z ); bli_gemv( &alphac, b, &w2, &BLIS_ONE, &z ); bli_hemv( beta, c_orig, &t, &BLIS_ONE, &z ); bli_subv( &z, &v ); bli_normfv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &t ); bli_obj_free( &v ); bli_obj_free( &w1 ); bli_obj_free( &w2 ); bli_obj_free( &z ); } blis-0.6.1/testsuite/src/test_her2k.h000066400000000000000000000035001360743507500175220ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void libblis_test_her2k ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); blis-0.6.1/testsuite/src/test_herk.c000066400000000000000000000240531360743507500174410ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "test_libblis.h" // Static variables. static char* op_str = "herk"; static char* o_types = "mm"; // a c static char* p_types = "uh"; // uploc transa static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-13, 1e-14 }, // warn, pass for d { 1e-13, 1e-14 } }; // warn, pass for z // Local prototypes. void libblis_test_herk_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); void libblis_test_herk_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ); void libblis_test_herk_impl ( iface_t iface, obj_t* alpha, obj_t* a, obj_t* beta, obj_t* c ); void libblis_test_herk_check ( test_params_t* params, obj_t* alpha, obj_t* a, obj_t* beta, obj_t* c, obj_t* c_orig, double* resid ); void libblis_test_herk_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { libblis_test_randv( tdata, params, &(op->ops->randv) ); libblis_test_randm( tdata, params, &(op->ops->randm) ); libblis_test_setv( tdata, params, &(op->ops->setv) ); libblis_test_normfv( tdata, params, &(op->ops->normfv) ); libblis_test_subv( tdata, params, &(op->ops->subv) ); libblis_test_scalv( tdata, params, &(op->ops->scalv) ); libblis_test_copym( tdata, params, &(op->ops->copym) ); libblis_test_scalm( tdata, params, &(op->ops->scalm) ); libblis_test_gemv( tdata, params, &(op->ops->gemv) ); libblis_test_hemv( tdata, params, &(op->ops->hemv) ); } void libblis_test_herk ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || libblis_test_l3_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_herk_deps( tdata, params, op ); // Execute the test driver for each implementation requested. //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( tdata, params, op, BLIS_TEST_SEQ_FRONT_END, op_str, p_types, o_types, thresh, libblis_test_herk_experiment ); } } void libblis_test_herk_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; num_t datatype; dim_t m, k; uplo_t uploc; trans_t transa; obj_t alpha, a, beta, c; obj_t c_save; // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); // Map the dimension specifier to actual dimensions. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); k = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_uplo( pc_str[0], &uploc ); bli_param_map_char_to_blis_trans( pc_str[1], &transa ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha ); bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, transa, sc_str[1], m, k, &a ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, m, &c ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, m, &c_save ); // Set alpha and beta. if ( bli_obj_is_real( &c ) ) { bli_setsc( 1.2, 0.0, &alpha ); bli_setsc( -1.0, 0.0, &beta ); } else { // For herk, alpha and beta must both be real-valued, even in the // complex case (in order to preserve the Hermitian structure of C). bli_setsc( 1.2, 0.0, &alpha ); bli_setsc( -1.0, 0.0, &beta ); } // Randomize A. libblis_test_mobj_randomize( params, TRUE, &a ); // Set the structure and uplo properties of C. bli_obj_set_struc( BLIS_HERMITIAN, &c ); bli_obj_set_uplo( uploc, &c ); // Randomize A, make it densely Hermitian, and zero the unstored triangle // to ensure the implementation is reads only from the stored region. libblis_test_mobj_randomize( params, TRUE, &c ); bli_mkherm( &c ); bli_mktrim( &c ); // Save C and set its structure and uplo properties. bli_obj_set_struc( BLIS_HERMITIAN, &c_save ); bli_obj_set_uplo( uploc, &c_save ); bli_copym( &c, &c_save ); // Apply the remaining parameters. bli_obj_set_conjtrans( transa, &a ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copym( &c_save, &c ); time = bli_clock(); libblis_test_herk_impl( iface, &alpha, &a, &beta, &c ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 1.0 * m * m * k ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &c ) ) *perf *= 4.0; // Perform checks. libblis_test_herk_check( params, &alpha, &a, &beta, &c, &c_save, resid ); // Zero out performance and residual if output matrix is empty. libblis_test_check_empty_problem( &c, perf, resid ); // Free the test objects. bli_obj_free( &a ); bli_obj_free( &c ); bli_obj_free( &c_save ); } void libblis_test_herk_impl ( iface_t iface, obj_t* alpha, obj_t* a, obj_t* beta, obj_t* c ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: bli_herk( alpha, a, beta, c ); //bli_herk4m( alpha, a, beta, c ); //bli_herk3m( alpha, a, beta, c ); break; default: libblis_test_printf_error( "Invalid interface type.\n" ); } } void libblis_test_herk_check ( test_params_t* params, obj_t* alpha, obj_t* a, obj_t* beta, obj_t* c, obj_t* c_orig, double* resid ) { num_t dt = bli_obj_dt( c ); num_t dt_real = bli_obj_dt_proj_to_real( c ); dim_t m = bli_obj_length( c ); dim_t k = bli_obj_width_after_trans( a ); obj_t ah; obj_t norm; obj_t t, v, w, z; double junk; // // Pre-conditions: // - a is randomized. // - c_orig is randomized and Hermitian. // Note: // - alpha and beta must be real-valued. // // Under these conditions, we assume that the implementation for // // C := beta * C_orig + alpha * transa(A) * transa(A)^H // // is functioning correctly if // // normfv( v - z ) // // is negligible, where // // v = C * t // z = ( beta * C_orig + alpha * transa(A) * transa(A)^H ) * t // = beta * C_orig * t + alpha * transa(A) * transa(A)^H * t // = beta * C_orig * t + alpha * transa(A) * w // = beta * C_orig * t + z // bli_obj_alias_with_trans( BLIS_CONJ_TRANSPOSE, a, &ah ); bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m, 1, 0, 0, &t ); bli_obj_create( dt, m, 1, 0, 0, &v ); bli_obj_create( dt, k, 1, 0, 0, &w ); bli_obj_create( dt, m, 1, 0, 0, &z ); libblis_test_vobj_randomize( params, TRUE, &t ); bli_hemv( &BLIS_ONE, c, &t, &BLIS_ZERO, &v ); bli_gemv( &BLIS_ONE, &ah, &t, &BLIS_ZERO, &w ); bli_gemv( alpha, a, &w, &BLIS_ZERO, &z ); bli_hemv( beta, c_orig, &t, &BLIS_ONE, &z ); bli_subv( &z, &v ); bli_normfv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &t ); bli_obj_free( &v ); bli_obj_free( &w ); bli_obj_free( &z ); } blis-0.6.1/testsuite/src/test_herk.h000066400000000000000000000034771360743507500174550ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void libblis_test_herk ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); blis-0.6.1/testsuite/src/test_libblis.c000066400000000000000000003661101360743507500201330ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "test_libblis.h" // Global variables. char libblis_test_binary_name[ MAX_BINARY_NAME_LENGTH + 1 ]; char libblis_test_parameters_filename[ MAX_FILENAME_LENGTH + 1 ]; char libblis_test_operations_filename[ MAX_FILENAME_LENGTH + 1 ]; char libblis_test_pass_string[ MAX_PASS_STRING_LENGTH + 1 ]; char libblis_test_warn_string[ MAX_PASS_STRING_LENGTH + 1 ]; char libblis_test_fail_string[ MAX_PASS_STRING_LENGTH + 1 ]; char libblis_test_store_chars[ NUM_OPERAND_TYPES ][ MAX_STORE_VALS_PER_TYPE + 1 ]; char libblis_test_param_chars[ NUM_PARAM_TYPES ][ MAX_PARAM_VALS_PER_TYPE + 1 ]; char libblis_test_sp_chars[ 2 + 1 ] = "sc"; char libblis_test_dp_chars[ 2 + 1 ] = "dz"; char libblis_test_rd_chars[ 2 + 1 ] = "sd"; char libblis_test_cd_chars[ 2 + 1 ] = "cz"; char libblis_test_dt_chars[ 4 + 1 ] = "sdcz"; int main( int argc, char** argv ) { test_params_t params; test_ops_t ops; // Initialize libblis. //bli_init(); // Initialize some strings. libblis_test_init_strings(); // Parse the command line parameters. libblis_test_parse_command_line( argc, argv ); // Read the global parameters file. libblis_test_read_params_file( libblis_test_parameters_filename, ¶ms ); // Read the operations parameter file. libblis_test_read_ops_file( libblis_test_operations_filename, &ops ); // Walk through all test modules. //libblis_test_all_ops( ¶ms, &ops ); libblis_test_thread_decorator( ¶ms, &ops ); // Finalize libblis. bli_finalize(); // Return peacefully. return 0; } #if 0 typedef struct thread_data { test_params_t* params; test_ops_t* ops; unsigned int nt; unsigned int id; unsigned int xc; //pthread_mutex_t* mutex; pthread_barrier_t* barrier; } thread_data_t; #endif void* libblis_test_thread_entry( void* tdata_void ) { thread_data_t* tdata = tdata_void; test_params_t* params = tdata->params; test_ops_t* ops = tdata->ops; // Walk through all test modules. libblis_test_all_ops( tdata, params, ops ); return NULL; } void libblis_test_thread_decorator( test_params_t* params, test_ops_t* ops ) { // Query the total number of threads to simulate. size_t nt = ( size_t )params->n_app_threads; // Allocate an array of pthread objects and auxiliary data structs to pass // to the thread entry functions. #ifdef BLIS_ENABLE_MEM_TRACING printf( "libblis_test_thread_decorator(): " ); #endif bli_pthread_t* pthread = bli_malloc_user( sizeof( bli_pthread_t ) * nt ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "libblis_test_thread_decorator(): " ); #endif thread_data_t* tdata = bli_malloc_user( sizeof( thread_data_t ) * nt ); // Allocate a mutex for the threads to share. //bli_pthread_mutex_t* mutex = bli_malloc_user( sizeof( bli_pthread_mutex_t ) ); // Allocate a barrier for the threads to share. #ifdef BLIS_ENABLE_MEM_TRACING printf( "libblis_test_thread_decorator(): " ); #endif bli_pthread_barrier_t* barrier = bli_malloc_user( sizeof( bli_pthread_barrier_t ) ); // Initialize the mutex. //bli_pthread_mutex_init( mutex, NULL ); // Initialize the barrier for nt threads. bli_pthread_barrier_init( barrier, NULL, nt ); // NOTE: We must iterate backwards so that the chief thread (thread id 0) // can spawn all other threads before proceeding with its own computation. // ALSO: Since we need to let the counter go negative, id must be a signed // integer here. for ( signed int id = nt - 1; 0 <= id; id-- ) { tdata[id].params = params; tdata[id].ops = ops; tdata[id].nt = nt; tdata[id].id = id; tdata[id].xc = 0; //tdata[id].mutex = mutex; tdata[id].barrier = barrier; // Spawn additional threads for ids greater than 1. if ( id != 0 ) bli_pthread_create( &pthread[id], NULL, libblis_test_thread_entry, &tdata[id] ); else libblis_test_thread_entry( ( void* )(&tdata[0]) ); } // Thread 0 waits for additional threads to finish. for ( unsigned int id = 1; id < nt; id++ ) { bli_pthread_join( pthread[id], NULL ); } // Destroy the mutex. //bli_pthread_mutex_destroy( mutex ); // Destroy the barrier. bli_pthread_barrier_destroy( barrier ); // Free the pthread-related memory. #ifdef BLIS_ENABLE_MEM_TRACING printf( "libblis_test_thread_decorator(): " ); #endif bli_free_user( pthread ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "libblis_test_thread_decorator(): " ); #endif bli_free_user( tdata ); #ifdef BLIS_ENABLE_MEM_TRACING printf( "libblis_test_thread_decorator(): " ); #endif //bli_free_user( mutex ); bli_free_user( barrier ); } void libblis_test_all_ops( thread_data_t* tdata, test_params_t* params, test_ops_t* ops ) { // Test the utility operations. libblis_test_utility_ops( tdata, params, ops ); // Test the level-1v operations. libblis_test_level1v_ops( tdata, params, ops ); // Test the level-1m operations. libblis_test_level1m_ops( tdata, params, ops ); // Test the level-1f operations. libblis_test_level1f_ops( tdata, params, ops ); // Test the level-2 operations. libblis_test_level2_ops( tdata, params, ops ); // Test the level-3 micro-kernels. libblis_test_level3_ukrs( tdata, params, ops ); // Test the level-3 operations. libblis_test_level3_ops( tdata, params, ops ); } void libblis_test_utility_ops( thread_data_t* tdata, test_params_t* params, test_ops_t* ops ) { libblis_test_randv( tdata, params, &(ops->randv) ); libblis_test_randm( tdata, params, &(ops->randm) ); } void libblis_test_level1v_ops( thread_data_t* tdata, test_params_t* params, test_ops_t* ops ) { libblis_test_addv( tdata, params, &(ops->addv) ); libblis_test_amaxv( tdata, params, &(ops->amaxv) ); libblis_test_axpbyv( tdata, params, &(ops->axpbyv) ); libblis_test_axpyv( tdata, params, &(ops->axpyv) ); libblis_test_copyv( tdata, params, &(ops->copyv) ); libblis_test_dotv( tdata, params, &(ops->dotv) ); libblis_test_dotxv( tdata, params, &(ops->dotxv) ); libblis_test_normfv( tdata, params, &(ops->normfv) ); libblis_test_scalv( tdata, params, &(ops->scalv) ); libblis_test_scal2v( tdata, params, &(ops->scal2v) ); libblis_test_setv( tdata, params, &(ops->setv) ); libblis_test_subv( tdata, params, &(ops->subv) ); libblis_test_xpbyv( tdata, params, &(ops->xpbyv) ); } void libblis_test_level1m_ops( thread_data_t* tdata, test_params_t* params, test_ops_t* ops ) { libblis_test_addm( tdata, params, &(ops->addm) ); libblis_test_axpym( tdata, params, &(ops->axpym) ); libblis_test_copym( tdata, params, &(ops->copym) ); libblis_test_normfm( tdata, params, &(ops->normfm) ); libblis_test_scalm( tdata, params, &(ops->scalm) ); libblis_test_scal2m( tdata, params, &(ops->scal2m) ); libblis_test_setm( tdata, params, &(ops->setm) ); libblis_test_subm( tdata, params, &(ops->subm) ); libblis_test_xpbym( tdata, params, &(ops->xpbym) ); } void libblis_test_level1f_ops( thread_data_t* tdata, test_params_t* params, test_ops_t* ops ) { libblis_test_axpy2v( tdata, params, &(ops->axpy2v) ); libblis_test_dotaxpyv( tdata, params, &(ops->dotaxpyv) ); libblis_test_axpyf( tdata, params, &(ops->axpyf) ); libblis_test_dotxf( tdata, params, &(ops->dotxf) ); libblis_test_dotxaxpyf( tdata, params, &(ops->dotxaxpyf) ); } void libblis_test_level2_ops( thread_data_t* tdata, test_params_t* params, test_ops_t* ops ) { libblis_test_gemv( tdata, params, &(ops->gemv) ); libblis_test_ger( tdata, params, &(ops->ger) ); libblis_test_hemv( tdata, params, &(ops->hemv) ); libblis_test_her( tdata, params, &(ops->her) ); libblis_test_her2( tdata, params, &(ops->her2) ); libblis_test_symv( tdata, params, &(ops->symv) ); libblis_test_syr( tdata, params, &(ops->syr) ); libblis_test_syr2( tdata, params, &(ops->syr2) ); libblis_test_trmv( tdata, params, &(ops->trmv) ); libblis_test_trsv( tdata, params, &(ops->trsv) ); } void libblis_test_level3_ukrs( thread_data_t* tdata, test_params_t* params, test_ops_t* ops ) { libblis_test_gemm_ukr( tdata, params, &(ops->gemm_ukr) ); libblis_test_trsm_ukr( tdata, params, &(ops->trsm_ukr) ); libblis_test_gemmtrsm_ukr( tdata, params, &(ops->gemmtrsm_ukr) ); } void libblis_test_level3_ops( thread_data_t* tdata, test_params_t* params, test_ops_t* ops ) { libblis_test_gemm( tdata, params, &(ops->gemm) ); libblis_test_hemm( tdata, params, &(ops->hemm) ); libblis_test_herk( tdata, params, &(ops->herk) ); libblis_test_her2k( tdata, params, &(ops->her2k) ); libblis_test_symm( tdata, params, &(ops->symm) ); libblis_test_syrk( tdata, params, &(ops->syrk) ); libblis_test_syr2k( tdata, params, &(ops->syr2k) ); libblis_test_trmm( tdata, params, &(ops->trmm) ); libblis_test_trmm3( tdata, params, &(ops->trmm3) ); libblis_test_trsm( tdata, params, &(ops->trsm) ); } void libblis_test_read_ops_file( char* input_filename, test_ops_t* ops ) { FILE* input_stream; // Attempt to open input file corresponding to input_filename as // read-only/binary. input_stream = fopen( input_filename, "rb" ); libblis_test_fopen_check_stream( input_filename, input_stream ); // Initialize the individual override field to FALSE. ops->indiv_over = FALSE; // Begin reading operations input file. // Section overrides libblis_test_read_section_override( ops, input_stream, &(ops->util_over) ); libblis_test_read_section_override( ops, input_stream, &(ops->l1v_over) ); libblis_test_read_section_override( ops, input_stream, &(ops->l1m_over) ); libblis_test_read_section_override( ops, input_stream, &(ops->l1f_over) ); libblis_test_read_section_override( ops, input_stream, &(ops->l2_over) ); libblis_test_read_section_override( ops, input_stream, &(ops->l3ukr_over) ); libblis_test_read_section_override( ops, input_stream, &(ops->l3_over) ); // dimensions n_param operation // Utility operations libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 0, &(ops->randv) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_MN, 0, &(ops->randm) ); // Level-1v libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 1, &(ops->addv) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 0, &(ops->amaxv) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 1, &(ops->axpbyv) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 1, &(ops->axpyv) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 1, &(ops->copyv) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 2, &(ops->dotv) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 2, &(ops->dotxv) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 0, &(ops->normfv) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 1, &(ops->scalv) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 1, &(ops->scal2v) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 0, &(ops->setv) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 1, &(ops->subv) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 1, &(ops->xpbyv) ); // Level-1m libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_MN, 1, &(ops->addm) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_MN, 1, &(ops->axpym) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_MN, 1, &(ops->copym) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_MN, 0, &(ops->normfm) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_MN, 1, &(ops->scalm) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_MN, 1, &(ops->scal2m) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_MN, 0, &(ops->setm) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_MN, 1, &(ops->subm) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_MN, 1, &(ops->xpbym) ); // Level-1f libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 2, &(ops->axpy2v) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 3, &(ops->dotaxpyv) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_MF, 2, &(ops->axpyf) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_MF, 2, &(ops->dotxf) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_MF, 4, &(ops->dotxaxpyf) ); // Level-2 libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_MN, 2, &(ops->gemv) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_MN, 2, &(ops->ger) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 3, &(ops->hemv) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 2, &(ops->her) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 3, &(ops->her2) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 3, &(ops->symv) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 2, &(ops->syr) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 3, &(ops->syr2) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 3, &(ops->trmv) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 3, &(ops->trsv) ); // Level-3 micro-kernels libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_K, 0, &(ops->gemm_ukr) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_NO_DIMS, 1, &(ops->trsm_ukr) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_K, 1, &(ops->gemmtrsm_ukr) ); // Level-3 libblis_test_read_op_info( ops, input_stream, BLIS_GEMM, BLIS_TEST_DIMS_MNK, 2, &(ops->gemm) ); libblis_test_read_op_info( ops, input_stream, BLIS_HEMM, BLIS_TEST_DIMS_MN, 4, &(ops->hemm) ); libblis_test_read_op_info( ops, input_stream, BLIS_HERK, BLIS_TEST_DIMS_MK, 2, &(ops->herk) ); libblis_test_read_op_info( ops, input_stream, BLIS_HER2K, BLIS_TEST_DIMS_MK, 3, &(ops->her2k) ); libblis_test_read_op_info( ops, input_stream, BLIS_SYMM, BLIS_TEST_DIMS_MN, 4, &(ops->symm) ); libblis_test_read_op_info( ops, input_stream, BLIS_SYRK, BLIS_TEST_DIMS_MK, 2, &(ops->syrk) ); libblis_test_read_op_info( ops, input_stream, BLIS_SYR2K, BLIS_TEST_DIMS_MK, 3, &(ops->syr2k) ); libblis_test_read_op_info( ops, input_stream, BLIS_TRMM, BLIS_TEST_DIMS_MN, 4, &(ops->trmm) ); libblis_test_read_op_info( ops, input_stream, BLIS_TRMM3, BLIS_TEST_DIMS_MN, 5, &(ops->trmm3) ); libblis_test_read_op_info( ops, input_stream, BLIS_TRSM, BLIS_TEST_DIMS_MN, 4, &(ops->trsm) ); // Output the section overrides. libblis_test_output_section_overrides( stdout, ops ); // Close the file. fclose( input_stream ); } void libblis_test_read_params_file( char* input_filename, test_params_t* params ) { FILE* input_stream; char buffer[ INPUT_BUFFER_SIZE ]; char temp[ INPUT_BUFFER_SIZE ]; int i; // Attempt to open input file corresponding to input_filename as // read-only/binary. input_stream = fopen( input_filename, "rb" ); libblis_test_fopen_check_stream( input_filename, input_stream ); // Read the number of repeats. libblis_test_read_next_line( buffer, input_stream ); sscanf( buffer, "%u ", &(params->n_repeats) ); // Read the matrix storage schemes to test. We should have at most three: // 'r' for row-major, 'c' for column-major, and 'g' for general strides. libblis_test_read_next_line( buffer, input_stream ); sscanf( buffer, "%s ", temp ); params->n_mstorage = strlen( temp ); if ( params->n_mstorage > MAX_NUM_MSTORAGE ) { libblis_test_printf_error( "Detected too many matrix storage schemes (%u) in input file.\n", params->n_mstorage ); } strcpy( params->storage[ BLIS_TEST_MATRIX_OPERAND ], temp ); // Read the vector storage schemes to test. We should have at most four: // 'r' for row vectors with unit stride, 'c' for column vectors with unit // stride, 'i' for row vectors with non-unit stride, and 'j' for column // vectors with non-unit stride. libblis_test_read_next_line( buffer, input_stream ); sscanf( buffer, "%s ", temp ); params->n_vstorage = strlen( temp ); if ( params->n_vstorage > MAX_NUM_VSTORAGE ) { libblis_test_printf_error( "Detected too many vector storage schemes (%u) in input file.\n", params->n_vstorage ); } strcpy( params->storage[ BLIS_TEST_VECTOR_OPERAND ], temp ); // Read whether to mix all storage combinations. libblis_test_read_next_line( buffer, input_stream ); sscanf( buffer, "%u ", &(params->mix_all_storage) ); // Read whether to perform all tests with aligned addresses and ldims. libblis_test_read_next_line( buffer, input_stream ); sscanf( buffer, "%u ", &(params->alignment) ); // Read the randomization method. libblis_test_read_next_line( buffer, input_stream ); sscanf( buffer, "%u ", &(params->rand_method) ); if ( params->rand_method != BLIS_TEST_RAND_REAL_VALUES && params->rand_method != BLIS_TEST_RAND_NARROW_POW2 ) { libblis_test_printf_error( "Invalid randomization method (%u) in input file.\n", params->rand_method ); } // Read the general stride "spacing". libblis_test_read_next_line( buffer, input_stream ); sscanf( buffer, "%u ", &(params->gs_spacing) ); // Overwrite the existing storage character arrays with the sets provided. strcpy( libblis_test_store_chars[BLIS_TEST_MATRIX_OPERAND], params->storage[BLIS_TEST_MATRIX_OPERAND] ); strcpy( libblis_test_store_chars[BLIS_TEST_VECTOR_OPERAND], params->storage[BLIS_TEST_VECTOR_OPERAND] ); // Read the datatypes to test. We should have at most four: 's', 'd', 'c', // and 'z'. libblis_test_read_next_line( buffer, input_stream ); sscanf( buffer, "%s ", temp ); params->n_datatypes = strlen( temp ); if ( params->n_datatypes > MAX_NUM_DATATYPES ) { libblis_test_printf_error( "Detected too many datatype requests (%u) in input file.\n", params->n_datatypes ); } for( i = 0; i < params->n_datatypes; ++i ) { //if ( temp[i] == 's' ) params->datatype[i] = BLIS_FLOAT; //else if ( temp[i] == 'd' ) params->datatype[i] = BLIS_DOUBLE; //else if ( temp[i] == 'c' ) params->datatype[i] = BLIS_SCOMPLEX; //else if ( temp[i] == 'z' ) params->datatype[i] = BLIS_DCOMPLEX; // Map the char in temp[i] to the corresponding num_t value. bli_param_map_char_to_blis_dt( temp[i], &(params->datatype[i]) ); params->datatype_char[i] = temp[i]; } // Read whether to test gemm with mixed-domain operands. libblis_test_read_next_line( buffer, input_stream ); sscanf( buffer, "%u ", &(params->mixed_domain) ); // Read whether to test gemm with mixed-precision operands. libblis_test_read_next_line( buffer, input_stream ); sscanf( buffer, "%u ", &(params->mixed_precision) ); // Read the initial problem size to test. libblis_test_read_next_line( buffer, input_stream ); sscanf( buffer, "%u ", &(params->p_first) ); // Read the maximum problem size to test. libblis_test_read_next_line( buffer, input_stream ); sscanf( buffer, "%u ", &(params->p_max) ); // Read the problem size increment to test. libblis_test_read_next_line( buffer, input_stream ); sscanf( buffer, "%u ", &(params->p_inc) ); // Read whether to enable 3mh. libblis_test_read_next_line( buffer, input_stream ); sscanf( buffer, "%u ", &(params->ind_enable[ BLIS_3MH ]) ); // Read whether to enable 3m1. libblis_test_read_next_line( buffer, input_stream ); sscanf( buffer, "%u ", &(params->ind_enable[ BLIS_3M1 ]) ); // Read whether to enable 4mh. libblis_test_read_next_line( buffer, input_stream ); sscanf( buffer, "%u ", &(params->ind_enable[ BLIS_4MH ]) ); // Read whether to enable 4m1b (4mb). libblis_test_read_next_line( buffer, input_stream ); sscanf( buffer, "%u ", &(params->ind_enable[ BLIS_4M1B ]) ); // Read whether to enable 4m1a (4m1). libblis_test_read_next_line( buffer, input_stream ); sscanf( buffer, "%u ", &(params->ind_enable[ BLIS_4M1A ]) ); // Read whether to enable 1m. libblis_test_read_next_line( buffer, input_stream ); sscanf( buffer, "%u ", &(params->ind_enable[ BLIS_1M ]) ); // Read whether to native (complex) execution. libblis_test_read_next_line( buffer, input_stream ); sscanf( buffer, "%u ", &(params->ind_enable[ BLIS_NAT ]) ); // Read whether to simulate application-level threading. libblis_test_read_next_line( buffer, input_stream ); sscanf( buffer, "%u ", &(params->n_app_threads) ); // Silently interpret non-positive numbers the same as 1. if ( params->n_app_threads < 1 ) params->n_app_threads = 1; // Disable induced methods when simulating more than one application // threads. if ( params->n_app_threads > 1 ) { if ( params->ind_enable[ BLIS_3MH ] || params->ind_enable[ BLIS_3M1 ] || params->ind_enable[ BLIS_4MH ] || params->ind_enable[ BLIS_4M1B ] || params->ind_enable[ BLIS_4M1A ] || params->ind_enable[ BLIS_1M ] ) { // Due to an inherent race condition in the way induced methods // are enabled and disabled at runtime, all induced methods must be // disabled when simulating multiple application threads. libblis_test_printf_infoc( "simulating multiple application threads; disabling induced methods.\n" ); params->ind_enable[ BLIS_3MH ] = 0; params->ind_enable[ BLIS_3M1 ] = 0; params->ind_enable[ BLIS_4MH ] = 0; params->ind_enable[ BLIS_4M1B ] = 0; params->ind_enable[ BLIS_4M1A ] = 0; params->ind_enable[ BLIS_1M ] = 0; } } // Read the requested error-checking level. libblis_test_read_next_line( buffer, input_stream ); sscanf( buffer, "%u ", &(params->error_checking_level) ); // Read the requested course of action if a test fails. libblis_test_read_next_line( buffer, input_stream ); sscanf( buffer, "%c ", &(params->reaction_to_failure) ); if ( params->reaction_to_failure != ON_FAILURE_IGNORE_CHAR && params->reaction_to_failure != ON_FAILURE_SLEEP_CHAR && params->reaction_to_failure != ON_FAILURE_ABORT_CHAR ) { libblis_test_printf_error( "Invalid reaction-to-failure character code (%c) in input file.\n", params->reaction_to_failure ); } // Read whether to output in matlab format. libblis_test_read_next_line( buffer, input_stream ); sscanf( buffer, "%u ", &(params->output_matlab_format) ); // Read whether to output to files in addition to stdout. libblis_test_read_next_line( buffer, input_stream ); sscanf( buffer, "%u ", &(params->output_files) ); // Close the file. fclose( input_stream ); // Output the parameter struct. libblis_test_output_params_struct( stdout, params ); } void libblis_test_read_section_override( test_ops_t* ops, FILE* input_stream, int* override ) { char buffer[ INPUT_BUFFER_SIZE ]; // Read the line for the section override switch. libblis_test_read_next_line( buffer, input_stream ); sscanf( buffer, "%d ", override ); } void libblis_test_read_op_info( test_ops_t* ops, FILE* input_stream, opid_t opid, dimset_t dimset, unsigned int n_params, test_op_t* op ) { char buffer[ INPUT_BUFFER_SIZE ]; char temp[ INPUT_BUFFER_SIZE ]; int i, p; // Initialize the operation type field. op->opid = opid; // Read the line for the overall operation switch. libblis_test_read_next_line( buffer, input_stream ); sscanf( buffer, "%d ", &(op->op_switch) ); // Check the op_switch for the individual override value. if ( op->op_switch == ENABLE_ONLY ) { ops->indiv_over = TRUE; } op->n_dims = libblis_test_get_n_dims_from_dimset( dimset ); op->dimset = dimset; if ( op->n_dims > MAX_NUM_DIMENSIONS ) { libblis_test_printf_error( "Detected too many dimensions (%u) in input file to store.\n", op->n_dims ); } //printf( "n_dims = %u\n", op->n_dims ); // If there is at least one dimension for the current operation, read the // dimension specifications, which encode the actual dimensions or the // dimension ratios for each dimension. if ( op->n_dims > 0 ) { libblis_test_read_next_line( buffer, input_stream ); for ( i = 0, p = 0; i < op->n_dims; ++i ) { //printf( "buffer[p]: %s\n", &buffer[p] ); // Advance until we hit non-whitespace (ie: the next number). for ( ; isspace( buffer[p] ); ++p ) ; //printf( "buffer[p] after: %s\n", &buffer[p] ); sscanf( &buffer[p], "%d", &(op->dim_spec[i]) ); //printf( "dim[%d] = %d\n", i, op->dim_spec[i] ); // Advance until we hit whitespace (ie: the space before the next number). for ( ; !isspace( buffer[p] ); ++p ) ; } } // If there is at least one parameter for the current operation, read the // parameter chars, which encode which parameter combinations to test. if ( n_params > 0 ) { libblis_test_read_next_line( buffer, input_stream ); sscanf( buffer, "%s ", temp ); op->n_params = strlen( temp ); if ( op->n_params > MAX_NUM_PARAMETERS ) { libblis_test_printf_error( "Detected too many parameters (%u) in input file.\n", op->n_params ); } if ( op->n_params != n_params ) { libblis_test_printf_error( "Number of parameters specified by caller does not match length of parameter string in input file. strlen( temp ) = %u; n_params = %u\n", op->n_params, n_params ); } strcpy( op->params, temp ); } else { op->n_params = 0; strcpy( op->params, "" ); } // Initialize the "test done" switch. op->test_done = FALSE; // Initialize the parent pointer. op->ops = ops; } void libblis_test_output_section_overrides( FILE* os, test_ops_t* ops ) { libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, "--- Section overrides ---\n" ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, "Utility operations %d\n", ops->util_over ); libblis_test_fprintf_c( os, "Level-1v operations %d\n", ops->l1v_over ); libblis_test_fprintf_c( os, "Level-1m operations %d\n", ops->l1m_over ); libblis_test_fprintf_c( os, "Level-1f operations %d\n", ops->l1f_over ); libblis_test_fprintf_c( os, "Level-2 operations %d\n", ops->l2_over ); libblis_test_fprintf_c( os, "Level-3 micro-kernels %d\n", ops->l3ukr_over ); libblis_test_fprintf_c( os, "Level-3 operations %d\n", ops->l3_over ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf( os, "\n" ); } void libblis_test_output_params_struct( FILE* os, test_params_t* params ) { int i; //char int_type_size_str[8]; gint_t int_type_size; ind_t im; cntx_t* cntx; cntx_t* cntx_c; cntx_t* cntx_z; // If bli_info_get_int_type_size() returns 32 or 64, the size is forced. // Otherwise, the size is chosen automatically. We query the result of // that automatic choice via sizeof(gint_t). if ( bli_info_get_int_type_size() == 32 || bli_info_get_int_type_size() == 64 ) int_type_size = bli_info_get_int_type_size(); else int_type_size = sizeof(gint_t) * 8; char impl_str[16]; char jrir_str[16]; // Describe the threading implementation. if ( bli_info_get_enable_openmp() ) sprintf( impl_str, "openmp" ); else if ( bli_info_get_enable_pthreads() ) sprintf( impl_str, "pthreads" ); else /* threading disabled */ sprintf( impl_str, "disabled" ); // Describe the status of jrir thread partitioning. if ( bli_info_get_thread_part_jrir_slab() ) sprintf( jrir_str, "slab" ); else /*bli_info_get_thread_part_jrir_rr()*/ sprintf( jrir_str, "round-robin" ); char nt_str[16]; char jc_nt_str[16]; char pc_nt_str[16]; char ic_nt_str[16]; char jr_nt_str[16]; char ir_nt_str[16]; // Query the number of ways of parallelism per loop (and overall) and // convert these values into strings, with "unset" being used if the // value returned was -1 (indicating the environment variable was unset). dim_t nt = bli_thread_get_num_threads(); dim_t jc_nt = bli_thread_get_jc_nt(); dim_t pc_nt = bli_thread_get_pc_nt(); dim_t ic_nt = bli_thread_get_ic_nt(); dim_t jr_nt = bli_thread_get_jr_nt(); dim_t ir_nt = bli_thread_get_ir_nt(); if ( nt == -1 ) sprintf( nt_str, "unset" ); else sprintf( nt_str, "%d", ( int ) nt ); if ( jc_nt == -1 ) sprintf( jc_nt_str, "unset" ); else sprintf( jc_nt_str, "%d", ( int )jc_nt ); if ( pc_nt == -1 ) sprintf( pc_nt_str, "unset" ); else sprintf( pc_nt_str, "%d", ( int )pc_nt ); if ( ic_nt == -1 ) sprintf( ic_nt_str, "unset" ); else sprintf( ic_nt_str, "%d", ( int )ic_nt ); if ( jr_nt == -1 ) sprintf( jr_nt_str, "unset" ); else sprintf( jr_nt_str, "%d", ( int )jr_nt ); if ( ir_nt == -1 ) sprintf( ir_nt_str, "unset" ); else sprintf( ir_nt_str, "%d", ( int )ir_nt ); // Set up rntm_t objects for each of the four families: // gemm, herk, trmm, trsm. rntm_t gemm, herk, trmm_l, trmm_r, trsm_l, trsm_r; dim_t m = 1000, n = 1000, k = 1000; bli_rntm_init_from_global( &gemm ); bli_rntm_init_from_global( &herk ); bli_rntm_init_from_global( &trmm_l ); bli_rntm_init_from_global( &trmm_r ); bli_rntm_init_from_global( &trsm_l ); bli_rntm_init_from_global( &trsm_r ); bli_rntm_set_ways_for_op( BLIS_GEMM, BLIS_LEFT, m, n, k, &gemm ); bli_rntm_set_ways_for_op( BLIS_HERK, BLIS_LEFT, m, n, k, &herk ); bli_rntm_set_ways_for_op( BLIS_TRMM, BLIS_LEFT, m, n, k, &trmm_l ); bli_rntm_set_ways_for_op( BLIS_TRMM, BLIS_RIGHT, m, n, k, &trmm_r ); bli_rntm_set_ways_for_op( BLIS_TRSM, BLIS_LEFT, m, n, k, &trsm_l ); bli_rntm_set_ways_for_op( BLIS_TRSM, BLIS_RIGHT, m, n, k, &trsm_r ); // Output some system parameters. libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, "--- BLIS library info -------------------------------------\n" ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, "version string %s\n", bli_info_get_version_str() ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, "--- BLIS configuration info ---\n" ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, "active sub-configuration %s\n", bli_arch_string( bli_arch_query_id() ) ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, "BLIS integer type size (bits) %d\n", ( int )int_type_size ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, "Assumed max # of SIMD regs %d\n", ( int )bli_info_get_simd_num_registers() ); libblis_test_fprintf_c( os, "SIMD size (bytes) %d\n", ( int )bli_info_get_simd_size() ); libblis_test_fprintf_c( os, "SIMD alignment (bytes) %d\n", ( int )bli_info_get_simd_align_size() ); libblis_test_fprintf_c( os, "Max stack buffer size (bytes) %d\n", ( int )bli_info_get_stack_buf_max_size() ); libblis_test_fprintf_c( os, "Page size (bytes) %d\n", ( int )bli_info_get_page_size() ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, "memory pools\n" ); libblis_test_fprintf_c( os, " enabled for packing blocks? %d\n", ( int )bli_info_get_enable_pba_pools() ); libblis_test_fprintf_c( os, " enabled for small blocks? %d\n", ( int )bli_info_get_enable_sba_pools() ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, "memory alignment (bytes) \n" ); libblis_test_fprintf_c( os, " stack address %d\n", ( int )bli_info_get_stack_buf_align_size() ); libblis_test_fprintf_c( os, " obj_t address %d\n", ( int )bli_info_get_heap_addr_align_size() ); libblis_test_fprintf_c( os, " obj_t stride %d\n", ( int )bli_info_get_heap_stride_align_size() ); libblis_test_fprintf_c( os, " pool block addr A (+offset) %d (+%d)\n", ( int )bli_info_get_pool_addr_align_size_a(), ( int )bli_info_get_pool_addr_offset_size_a() ); libblis_test_fprintf_c( os, " pool block addr B (+offset) %d (+%d)\n", ( int )bli_info_get_pool_addr_align_size_b(), ( int )bli_info_get_pool_addr_offset_size_b() ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, "BLAS/CBLAS compatibility layers \n" ); libblis_test_fprintf_c( os, " BLAS API enabled? %d\n", ( int )bli_info_get_enable_blas() ); libblis_test_fprintf_c( os, " CBLAS API enabled? %d\n", ( int )bli_info_get_enable_cblas() ); libblis_test_fprintf_c( os, " integer type size (bits) %d\n", ( int )bli_info_get_blas_int_type_size() ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, "libmemkind \n" ); libblis_test_fprintf_c( os, " enabled? %d\n", ( int )bli_info_get_enable_memkind() ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, "gemm sandbox \n" ); libblis_test_fprintf_c( os, " enabled? %d\n", ( int )bli_info_get_enable_sandbox() ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, "floating-point types s d c z \n" ); libblis_test_fprintf_c( os, " sizes (bytes) %7u %7u %7u %7u\n", sizeof(float), sizeof(double), sizeof(scomplex), sizeof(dcomplex) ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, "--- BLIS parallelization info ---\n" ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, "multithreading %s\n", impl_str ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, "thread auto-factorization \n" ); libblis_test_fprintf_c( os, " m dim thread ratio %d\n", ( int )BLIS_THREAD_RATIO_M ); libblis_test_fprintf_c( os, " n dim thread ratio %d\n", ( int )BLIS_THREAD_RATIO_N ); libblis_test_fprintf_c( os, " jr max threads %d\n", ( int )BLIS_THREAD_MAX_JR ); libblis_test_fprintf_c( os, " ir max threads %d\n", ( int )BLIS_THREAD_MAX_IR ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, "ways of parallelism nt jc pc ic jr ir\n" ); libblis_test_fprintf_c( os, " environment %5s %5s %5s %5s %5s %5s\n", nt_str, jc_nt_str, pc_nt_str, ic_nt_str, jr_nt_str, ir_nt_str ); libblis_test_fprintf_c( os, " gemm (m,n,k=1000) %5d %5d %5d %5d %5d\n", ( int )bli_rntm_jc_ways( &gemm ), ( int )bli_rntm_pc_ways( &gemm ), ( int )bli_rntm_ic_ways( &gemm ), ( int )bli_rntm_jr_ways( &gemm ), ( int )bli_rntm_ir_ways( &gemm ) ); libblis_test_fprintf_c( os, " herk (m,k=1000) %5d %5d %5d %5d %5d\n", ( int )bli_rntm_jc_ways( &herk ), ( int )bli_rntm_pc_ways( &herk ), ( int )bli_rntm_ic_ways( &herk ), ( int )bli_rntm_jr_ways( &herk ), ( int )bli_rntm_ir_ways( &herk ) ); libblis_test_fprintf_c( os, " trmm_l (m,n=1000) %5d %5d %5d %5d %5d\n", ( int )bli_rntm_jc_ways( &trmm_l ), ( int )bli_rntm_pc_ways( &trmm_l ), ( int )bli_rntm_ic_ways( &trmm_l ), ( int )bli_rntm_jr_ways( &trmm_l ), ( int )bli_rntm_ir_ways( &trmm_l ) ); libblis_test_fprintf_c( os, " trmm_r (m,n=1000) %5d %5d %5d %5d %5d\n", ( int )bli_rntm_jc_ways( &trmm_r ), ( int )bli_rntm_pc_ways( &trmm_r ), ( int )bli_rntm_ic_ways( &trmm_r ), ( int )bli_rntm_jr_ways( &trmm_r ), ( int )bli_rntm_ir_ways( &trmm_r ) ); libblis_test_fprintf_c( os, " trsm_l (m,n=1000) %5d %5d %5d %5d %5d\n", ( int )bli_rntm_jc_ways( &trsm_l ), ( int )bli_rntm_pc_ways( &trsm_l ), ( int )bli_rntm_ic_ways( &trsm_l ), ( int )bli_rntm_jr_ways( &trsm_l ), ( int )bli_rntm_ir_ways( &trsm_l ) ); libblis_test_fprintf_c( os, " trsm_r (m,n=1000) %5d %5d %5d %5d %5d\n", ( int )bli_rntm_jc_ways( &trsm_r ), ( int )bli_rntm_pc_ways( &trsm_r ), ( int )bli_rntm_ic_ways( &trsm_r ), ( int )bli_rntm_jr_ways( &trsm_r ), ( int )bli_rntm_ir_ways( &trsm_r ) ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, "thread partitioning \n" ); //libblis_test_fprintf_c( os, " jc/ic loops %s\n", "slab" ); libblis_test_fprintf_c( os, " jr/ir loops %s\n", jrir_str ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, "--- BLIS default implementations ---\n" ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, "level-3 implementations s d c z\n" ); libblis_test_fprintf_c( os, " gemm %7s %7s %7s %7s\n", bli_info_get_gemm_impl_string( BLIS_FLOAT ), bli_info_get_gemm_impl_string( BLIS_DOUBLE ), bli_info_get_gemm_impl_string( BLIS_SCOMPLEX ), bli_info_get_gemm_impl_string( BLIS_DCOMPLEX ) ); libblis_test_fprintf_c( os, " hemm %7s %7s %7s %7s\n", bli_info_get_hemm_impl_string( BLIS_FLOAT ), bli_info_get_hemm_impl_string( BLIS_DOUBLE ), bli_info_get_hemm_impl_string( BLIS_SCOMPLEX ), bli_info_get_hemm_impl_string( BLIS_DCOMPLEX ) ); libblis_test_fprintf_c( os, " herk %7s %7s %7s %7s\n", bli_info_get_herk_impl_string( BLIS_FLOAT ), bli_info_get_herk_impl_string( BLIS_DOUBLE ), bli_info_get_herk_impl_string( BLIS_SCOMPLEX ), bli_info_get_herk_impl_string( BLIS_DCOMPLEX ) ); libblis_test_fprintf_c( os, " her2k %7s %7s %7s %7s\n", bli_info_get_her2k_impl_string( BLIS_FLOAT ), bli_info_get_her2k_impl_string( BLIS_DOUBLE ), bli_info_get_her2k_impl_string( BLIS_SCOMPLEX ), bli_info_get_her2k_impl_string( BLIS_DCOMPLEX ) ); libblis_test_fprintf_c( os, " symm %7s %7s %7s %7s\n", bli_info_get_symm_impl_string( BLIS_FLOAT ), bli_info_get_symm_impl_string( BLIS_DOUBLE ), bli_info_get_symm_impl_string( BLIS_SCOMPLEX ), bli_info_get_symm_impl_string( BLIS_DCOMPLEX ) ); libblis_test_fprintf_c( os, " syrk %7s %7s %7s %7s\n", bli_info_get_syrk_impl_string( BLIS_FLOAT ), bli_info_get_syrk_impl_string( BLIS_DOUBLE ), bli_info_get_syrk_impl_string( BLIS_SCOMPLEX ), bli_info_get_syrk_impl_string( BLIS_DCOMPLEX ) ); libblis_test_fprintf_c( os, " syr2k %7s %7s %7s %7s\n", bli_info_get_syr2k_impl_string( BLIS_FLOAT ), bli_info_get_syr2k_impl_string( BLIS_DOUBLE ), bli_info_get_syr2k_impl_string( BLIS_SCOMPLEX ), bli_info_get_syr2k_impl_string( BLIS_DCOMPLEX ) ); libblis_test_fprintf_c( os, " trmm %7s %7s %7s %7s\n", bli_info_get_trmm_impl_string( BLIS_FLOAT ), bli_info_get_trmm_impl_string( BLIS_DOUBLE ), bli_info_get_trmm_impl_string( BLIS_SCOMPLEX ), bli_info_get_trmm_impl_string( BLIS_DCOMPLEX ) ); libblis_test_fprintf_c( os, " trmm3 %7s %7s %7s %7s\n", bli_info_get_trmm3_impl_string( BLIS_FLOAT ), bli_info_get_trmm3_impl_string( BLIS_DOUBLE ), bli_info_get_trmm3_impl_string( BLIS_SCOMPLEX ), bli_info_get_trmm3_impl_string( BLIS_DCOMPLEX ) ); libblis_test_fprintf_c( os, " trsm %7s %7s %7s %7s\n", bli_info_get_trsm_impl_string( BLIS_FLOAT ), bli_info_get_trsm_impl_string( BLIS_DOUBLE ), bli_info_get_trsm_impl_string( BLIS_SCOMPLEX ), bli_info_get_trsm_impl_string( BLIS_DCOMPLEX ) ); libblis_test_fprintf_c( os, "\n" ); //bli_ind_disable_all(); bli_ind_oper_enable_only( BLIS_GEMM, BLIS_NAT, BLIS_SCOMPLEX ); bli_ind_oper_enable_only( BLIS_GEMM, BLIS_NAT, BLIS_DCOMPLEX ); libblis_test_fprintf_c( os, "--- BLIS native implementation info ---\n" ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, " c z \n" ); libblis_test_fprintf_c( os, "complex implementation %7s %7s\n", bli_ind_oper_get_avail_impl_string( BLIS_GEMM, BLIS_SCOMPLEX ), bli_ind_oper_get_avail_impl_string( BLIS_GEMM, BLIS_DCOMPLEX ) ); libblis_test_fprintf_c( os, "\n" ); // Query a native context. cntx = bli_gks_query_nat_cntx(); libblis_test_fprintf_c( os, "level-3 blocksizes s d c z \n" ); libblis_test_fprintf_c( os, " mc %7d %7d %7d %7d\n", ( int )bli_cntx_get_blksz_def_dt( BLIS_FLOAT, BLIS_MC, cntx ), ( int )bli_cntx_get_blksz_def_dt( BLIS_DOUBLE, BLIS_MC, cntx ), ( int )bli_cntx_get_blksz_def_dt( BLIS_SCOMPLEX, BLIS_MC, cntx ), ( int )bli_cntx_get_blksz_def_dt( BLIS_DCOMPLEX, BLIS_MC, cntx ) ); libblis_test_fprintf_c( os, " kc %7d %7d %7d %7d\n", ( int )bli_cntx_get_blksz_def_dt( BLIS_FLOAT, BLIS_KC, cntx ), ( int )bli_cntx_get_blksz_def_dt( BLIS_DOUBLE, BLIS_KC, cntx ), ( int )bli_cntx_get_blksz_def_dt( BLIS_SCOMPLEX, BLIS_KC, cntx ), ( int )bli_cntx_get_blksz_def_dt( BLIS_DCOMPLEX, BLIS_KC, cntx ) ); libblis_test_fprintf_c( os, " nc %7d %7d %7d %7d\n", ( int )bli_cntx_get_blksz_def_dt( BLIS_FLOAT, BLIS_NC, cntx ), ( int )bli_cntx_get_blksz_def_dt( BLIS_DOUBLE, BLIS_NC, cntx ), ( int )bli_cntx_get_blksz_def_dt( BLIS_SCOMPLEX, BLIS_NC, cntx ), ( int )bli_cntx_get_blksz_def_dt( BLIS_DCOMPLEX, BLIS_NC, cntx ) ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, " mc maximum %7d %7d %7d %7d\n", ( int )bli_cntx_get_blksz_max_dt( BLIS_FLOAT, BLIS_MC, cntx ), ( int )bli_cntx_get_blksz_max_dt( BLIS_DOUBLE, BLIS_MC, cntx ), ( int )bli_cntx_get_blksz_max_dt( BLIS_SCOMPLEX, BLIS_MC, cntx ), ( int )bli_cntx_get_blksz_max_dt( BLIS_DCOMPLEX, BLIS_MC, cntx ) ); libblis_test_fprintf_c( os, " kc maximum %7d %7d %7d %7d\n", ( int )bli_cntx_get_blksz_max_dt( BLIS_FLOAT, BLIS_KC, cntx ), ( int )bli_cntx_get_blksz_max_dt( BLIS_DOUBLE, BLIS_KC, cntx ), ( int )bli_cntx_get_blksz_max_dt( BLIS_SCOMPLEX, BLIS_KC, cntx ), ( int )bli_cntx_get_blksz_max_dt( BLIS_DCOMPLEX, BLIS_KC, cntx ) ); libblis_test_fprintf_c( os, " nc maximum %7d %7d %7d %7d\n", ( int )bli_cntx_get_blksz_max_dt( BLIS_FLOAT, BLIS_NC, cntx ), ( int )bli_cntx_get_blksz_max_dt( BLIS_DOUBLE, BLIS_NC, cntx ), ( int )bli_cntx_get_blksz_max_dt( BLIS_SCOMPLEX, BLIS_NC, cntx ), ( int )bli_cntx_get_blksz_max_dt( BLIS_DCOMPLEX, BLIS_NC, cntx ) ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, " mr %7d %7d %7d %7d\n", ( int )bli_cntx_get_blksz_def_dt( BLIS_FLOAT, BLIS_MR, cntx ), ( int )bli_cntx_get_blksz_def_dt( BLIS_DOUBLE, BLIS_MR, cntx ), ( int )bli_cntx_get_blksz_def_dt( BLIS_SCOMPLEX, BLIS_MR, cntx ), ( int )bli_cntx_get_blksz_def_dt( BLIS_DCOMPLEX, BLIS_MR, cntx ) ); libblis_test_fprintf_c( os, " nr %7d %7d %7d %7d\n", ( int )bli_cntx_get_blksz_def_dt( BLIS_FLOAT, BLIS_NR, cntx ), ( int )bli_cntx_get_blksz_def_dt( BLIS_DOUBLE, BLIS_NR, cntx ), ( int )bli_cntx_get_blksz_def_dt( BLIS_SCOMPLEX, BLIS_NR, cntx ), ( int )bli_cntx_get_blksz_def_dt( BLIS_DCOMPLEX, BLIS_NR, cntx ) ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, " mr packdim %7d %7d %7d %7d\n", ( int )bli_cntx_get_blksz_max_dt( BLIS_FLOAT, BLIS_MR, cntx ), ( int )bli_cntx_get_blksz_max_dt( BLIS_DOUBLE, BLIS_MR, cntx ), ( int )bli_cntx_get_blksz_max_dt( BLIS_SCOMPLEX, BLIS_MR, cntx ), ( int )bli_cntx_get_blksz_max_dt( BLIS_DCOMPLEX, BLIS_MR, cntx ) ); libblis_test_fprintf_c( os, " nr packdim %7d %7d %7d %7d\n", ( int )bli_cntx_get_blksz_max_dt( BLIS_FLOAT, BLIS_NR, cntx ), ( int )bli_cntx_get_blksz_max_dt( BLIS_DOUBLE, BLIS_NR, cntx ), ( int )bli_cntx_get_blksz_max_dt( BLIS_SCOMPLEX, BLIS_NR, cntx ), ( int )bli_cntx_get_blksz_max_dt( BLIS_DCOMPLEX, BLIS_NR, cntx ) ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, "micro-kernel types s d c z\n" ); libblis_test_fprintf_c( os, " gemm %7s %7s %7s %7s\n", bli_info_get_gemm_ukr_impl_string( BLIS_NAT, BLIS_FLOAT ), bli_info_get_gemm_ukr_impl_string( BLIS_NAT, BLIS_DOUBLE ), bli_info_get_gemm_ukr_impl_string( BLIS_NAT, BLIS_SCOMPLEX ), bli_info_get_gemm_ukr_impl_string( BLIS_NAT, BLIS_DCOMPLEX ) ); libblis_test_fprintf_c( os, " gemmtrsm_l %7s %7s %7s %7s\n", bli_info_get_gemmtrsm_l_ukr_impl_string( BLIS_NAT, BLIS_FLOAT ), bli_info_get_gemmtrsm_l_ukr_impl_string( BLIS_NAT, BLIS_DOUBLE ), bli_info_get_gemmtrsm_l_ukr_impl_string( BLIS_NAT, BLIS_SCOMPLEX ), bli_info_get_gemmtrsm_l_ukr_impl_string( BLIS_NAT, BLIS_DCOMPLEX ) ); libblis_test_fprintf_c( os, " gemmtrsm_u %7s %7s %7s %7s\n", bli_info_get_gemmtrsm_u_ukr_impl_string( BLIS_NAT, BLIS_FLOAT ), bli_info_get_gemmtrsm_u_ukr_impl_string( BLIS_NAT, BLIS_DOUBLE ), bli_info_get_gemmtrsm_u_ukr_impl_string( BLIS_NAT, BLIS_SCOMPLEX ), bli_info_get_gemmtrsm_u_ukr_impl_string( BLIS_NAT, BLIS_DCOMPLEX ) ); libblis_test_fprintf_c( os, " trsm_l %7s %7s %7s %7s\n", bli_info_get_trsm_l_ukr_impl_string( BLIS_NAT, BLIS_FLOAT ), bli_info_get_trsm_l_ukr_impl_string( BLIS_NAT, BLIS_DOUBLE ), bli_info_get_trsm_l_ukr_impl_string( BLIS_NAT, BLIS_SCOMPLEX ), bli_info_get_trsm_l_ukr_impl_string( BLIS_NAT, BLIS_DCOMPLEX ) ); libblis_test_fprintf_c( os, " trsm_u %7s %7s %7s %7s\n", bli_info_get_trsm_u_ukr_impl_string( BLIS_NAT, BLIS_FLOAT ), bli_info_get_trsm_u_ukr_impl_string( BLIS_NAT, BLIS_DOUBLE ), bli_info_get_trsm_u_ukr_impl_string( BLIS_NAT, BLIS_SCOMPLEX ), bli_info_get_trsm_u_ukr_impl_string( BLIS_NAT, BLIS_DCOMPLEX ) ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, "--- BLIS induced implementation info ---\n" ); libblis_test_fprintf_c( os, "\n" ); for ( im = 0; im < BLIS_NAT; ++im ) { if ( params->ind_enable[ im ] == 0 ) continue; bli_ind_oper_enable_only( BLIS_GEMM, im, BLIS_SCOMPLEX ); bli_ind_oper_enable_only( BLIS_GEMM, im, BLIS_DCOMPLEX ); //libblis_test_fprintf_c( os, " c z \n" ); libblis_test_fprintf_c( os, " c z \n" ); libblis_test_fprintf_c( os, "complex implementation %7s %7s\n", bli_ind_oper_get_avail_impl_string( BLIS_GEMM, BLIS_SCOMPLEX ), bli_ind_oper_get_avail_impl_string( BLIS_GEMM, BLIS_DCOMPLEX ) ); libblis_test_fprintf_c( os, "\n" ); // Query a native context. cntx_c = bli_gks_query_ind_cntx( im, BLIS_SCOMPLEX ); cntx_z = bli_gks_query_ind_cntx( im, BLIS_DCOMPLEX ); libblis_test_fprintf_c( os, "level-3 blocksizes c z \n" ); libblis_test_fprintf_c( os, " mc %7d %7d\n", ( int )bli_cntx_get_blksz_def_dt( BLIS_SCOMPLEX, BLIS_MC, cntx_c ), ( int )bli_cntx_get_blksz_def_dt( BLIS_DCOMPLEX, BLIS_MC, cntx_z ) ); libblis_test_fprintf_c( os, " kc %7d %7d\n", ( int )bli_cntx_get_blksz_def_dt( BLIS_SCOMPLEX, BLIS_KC, cntx_c ), ( int )bli_cntx_get_blksz_def_dt( BLIS_DCOMPLEX, BLIS_KC, cntx_z ) ); libblis_test_fprintf_c( os, " nc %7d %7d\n", ( int )bli_cntx_get_blksz_def_dt( BLIS_SCOMPLEX, BLIS_NC, cntx_c ), ( int )bli_cntx_get_blksz_def_dt( BLIS_DCOMPLEX, BLIS_NC, cntx_z ) ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, " mc maximum %7d %7d\n", ( int )bli_cntx_get_blksz_max_dt( BLIS_SCOMPLEX, BLIS_MC, cntx_c ), ( int )bli_cntx_get_blksz_max_dt( BLIS_DCOMPLEX, BLIS_MC, cntx_z ) ); libblis_test_fprintf_c( os, " kc maximum %7d %7d\n", ( int )bli_cntx_get_blksz_max_dt( BLIS_SCOMPLEX, BLIS_KC, cntx_c ), ( int )bli_cntx_get_blksz_max_dt( BLIS_DCOMPLEX, BLIS_KC, cntx_z ) ); libblis_test_fprintf_c( os, " nc maximum %7d %7d\n", ( int )bli_cntx_get_blksz_max_dt( BLIS_SCOMPLEX, BLIS_NC, cntx_c ), ( int )bli_cntx_get_blksz_max_dt( BLIS_DCOMPLEX, BLIS_NC, cntx_z ) ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, " mr %7d %7d\n", ( int )bli_cntx_get_blksz_def_dt( BLIS_SCOMPLEX, BLIS_MR, cntx_c ), ( int )bli_cntx_get_blksz_def_dt( BLIS_DCOMPLEX, BLIS_MR, cntx_z ) ); libblis_test_fprintf_c( os, " nr %7d %7d\n", ( int )bli_cntx_get_blksz_def_dt( BLIS_SCOMPLEX, BLIS_NR, cntx_c ), ( int )bli_cntx_get_blksz_def_dt( BLIS_DCOMPLEX, BLIS_NR, cntx_z ) ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, " mr packdim %7d %7d\n", ( int )bli_cntx_get_blksz_max_dt( BLIS_SCOMPLEX, BLIS_MR, cntx_c ), ( int )bli_cntx_get_blksz_max_dt( BLIS_DCOMPLEX, BLIS_MR, cntx_z ) ); libblis_test_fprintf_c( os, " nr packdim %7d %7d\n", ( int )bli_cntx_get_blksz_max_dt( BLIS_SCOMPLEX, BLIS_NR, cntx_c ), ( int )bli_cntx_get_blksz_max_dt( BLIS_DCOMPLEX, BLIS_NR, cntx_z ) ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, "micro-kernel types c z\n" ); libblis_test_fprintf_c( os, " gemm %7s %7s\n", bli_info_get_gemm_ukr_impl_string( im, BLIS_SCOMPLEX ), bli_info_get_gemm_ukr_impl_string( im, BLIS_DCOMPLEX ) ); libblis_test_fprintf_c( os, " gemmtrsm_l %7s %7s\n", bli_info_get_gemmtrsm_l_ukr_impl_string( im, BLIS_SCOMPLEX ), bli_info_get_gemmtrsm_l_ukr_impl_string( im, BLIS_DCOMPLEX ) ); libblis_test_fprintf_c( os, " gemmtrsm_u %7s %7s\n", bli_info_get_gemmtrsm_u_ukr_impl_string( im, BLIS_SCOMPLEX ), bli_info_get_gemmtrsm_u_ukr_impl_string( im, BLIS_DCOMPLEX ) ); libblis_test_fprintf_c( os, " trsm_l %7s %7s\n", bli_info_get_trsm_l_ukr_impl_string( im, BLIS_SCOMPLEX ), bli_info_get_trsm_l_ukr_impl_string( im, BLIS_DCOMPLEX ) ); libblis_test_fprintf_c( os, " trsm_u %7s %7s\n", bli_info_get_trsm_u_ukr_impl_string( im, BLIS_SCOMPLEX ), bli_info_get_trsm_u_ukr_impl_string( im, BLIS_DCOMPLEX ) ); libblis_test_fprintf_c( os, "\n" ); } bli_ind_disable_all(); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, "--- BLIS misc. other info ---\n" ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, "level-2 cache blocksizes s d c z \n" ); libblis_test_fprintf_c( os, " m dimension %7d %7d %7d %7d\n", ( int )bli_cntx_get_blksz_def_dt( BLIS_FLOAT, BLIS_M2, cntx ), ( int )bli_cntx_get_blksz_def_dt( BLIS_DOUBLE, BLIS_M2, cntx ), ( int )bli_cntx_get_blksz_def_dt( BLIS_SCOMPLEX, BLIS_M2, cntx ), ( int )bli_cntx_get_blksz_def_dt( BLIS_DCOMPLEX, BLIS_M2, cntx ) ); libblis_test_fprintf_c( os, " n dimension %7d %7d %7d %7d\n", ( int )bli_cntx_get_blksz_def_dt( BLIS_FLOAT, BLIS_N2, cntx ), ( int )bli_cntx_get_blksz_def_dt( BLIS_DOUBLE, BLIS_N2, cntx ), ( int )bli_cntx_get_blksz_def_dt( BLIS_SCOMPLEX, BLIS_N2, cntx ), ( int )bli_cntx_get_blksz_def_dt( BLIS_DCOMPLEX, BLIS_N2, cntx ) ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, "level-1f fusing factors s d c z \n" ); libblis_test_fprintf_c( os, " axpyf %7d %7d %7d %7d\n", ( int )bli_cntx_get_blksz_def_dt( BLIS_FLOAT, BLIS_AF, cntx ), ( int )bli_cntx_get_blksz_def_dt( BLIS_DOUBLE, BLIS_AF, cntx ), ( int )bli_cntx_get_blksz_def_dt( BLIS_SCOMPLEX, BLIS_AF, cntx ), ( int )bli_cntx_get_blksz_def_dt( BLIS_DCOMPLEX, BLIS_AF, cntx ) ); libblis_test_fprintf_c( os, " dotxf %7d %7d %7d %7d\n", ( int )bli_cntx_get_blksz_def_dt( BLIS_FLOAT, BLIS_DF, cntx ), ( int )bli_cntx_get_blksz_def_dt( BLIS_DOUBLE, BLIS_DF, cntx ), ( int )bli_cntx_get_blksz_def_dt( BLIS_SCOMPLEX, BLIS_DF, cntx ), ( int )bli_cntx_get_blksz_def_dt( BLIS_DCOMPLEX, BLIS_DF, cntx ) ); libblis_test_fprintf_c( os, " dotxaxpyf %7d %7d %7d %7d\n", ( int )bli_cntx_get_blksz_def_dt( BLIS_FLOAT, BLIS_XF, cntx ), ( int )bli_cntx_get_blksz_def_dt( BLIS_DOUBLE, BLIS_XF, cntx ), ( int )bli_cntx_get_blksz_def_dt( BLIS_SCOMPLEX, BLIS_XF, cntx ), ( int )bli_cntx_get_blksz_def_dt( BLIS_DCOMPLEX, BLIS_XF, cntx ) ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf( os, "\n" ); // Output the contents of the param struct. libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, "--- BLIS test suite parameters ----------------------------\n" ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf_c( os, "num repeats per experiment %u\n", params->n_repeats ); libblis_test_fprintf_c( os, "num matrix storage schemes %u\n", params->n_mstorage ); libblis_test_fprintf_c( os, "storage[ matrix ] %s\n", params->storage[ BLIS_TEST_MATRIX_OPERAND ] ); libblis_test_fprintf_c( os, "num vector storage schemes %u\n", params->n_vstorage ); libblis_test_fprintf_c( os, "storage[ vector ] %s\n", params->storage[ BLIS_TEST_VECTOR_OPERAND ] ); libblis_test_fprintf_c( os, "mix all storage schemes? %u\n", params->mix_all_storage ); libblis_test_fprintf_c( os, "test with aligned memory? %u\n", params->alignment ); libblis_test_fprintf_c( os, "randomization method %u\n", params->rand_method ); libblis_test_fprintf_c( os, "general stride spacing %u\n", params->gs_spacing ); libblis_test_fprintf_c( os, "num datatypes %u\n", params->n_datatypes ); libblis_test_fprintf_c( os, "datatype[0] %d (%c)\n", params->datatype[0], params->datatype_char[0] ); for( i = 1; i < params->n_datatypes; ++i ) libblis_test_fprintf_c( os, " [%d] %d (%c)\n", i, params->datatype[i], params->datatype_char[i] ); libblis_test_fprintf_c( os, "mix domains for gemm? %u\n", params->mixed_domain ); libblis_test_fprintf_c( os, "mix precisions for gemm? %u\n", params->mixed_precision ); libblis_test_fprintf_c( os, "problem size: first to test %u\n", params->p_first ); libblis_test_fprintf_c( os, "problem size: max to test %u\n", params->p_max ); libblis_test_fprintf_c( os, "problem size increment %u\n", params->p_inc ); libblis_test_fprintf_c( os, "complex implementations \n" ); libblis_test_fprintf_c( os, " 3mh? %u\n", params->ind_enable[ BLIS_3MH ] ); libblis_test_fprintf_c( os, " 3m1? %u\n", params->ind_enable[ BLIS_3M1 ] ); libblis_test_fprintf_c( os, " 4mh? %u\n", params->ind_enable[ BLIS_4MH ] ); libblis_test_fprintf_c( os, " 4m1b (4mb)? %u\n", params->ind_enable[ BLIS_4M1B ] ); libblis_test_fprintf_c( os, " 4m1a (4m1)? %u\n", params->ind_enable[ BLIS_4M1A ] ); libblis_test_fprintf_c( os, " 1m? %u\n", params->ind_enable[ BLIS_1M ] ); libblis_test_fprintf_c( os, " native? %u\n", params->ind_enable[ BLIS_NAT ] ); libblis_test_fprintf_c( os, "simulated app-level threads %u\n", params->n_app_threads ); libblis_test_fprintf_c( os, "error-checking level %u\n", params->error_checking_level ); libblis_test_fprintf_c( os, "reaction to failure %c\n", params->reaction_to_failure ); libblis_test_fprintf_c( os, "output in matlab format? %u\n", params->output_matlab_format ); libblis_test_fprintf_c( os, "output to stdout AND files? %u\n", params->output_files ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf( os, "\n" ); #ifndef BLIS_ENABLE_GEMM_MD // Notify the user if mixed domain or mixed precision was requested. if ( params->mixed_domain || params->mixed_precision ) { libblis_test_printf_error( "mixed domain and/or mixed precision testing requested, but building against BLIS without mixed datatype support.\n" ); } #endif // If mixed domain or mixed precision was requested, we disable all // induced methods except 1m and native execution. if ( params->mixed_domain || params->mixed_precision ) { ind_t im; for ( im = BLIS_IND_FIRST; im < BLIS_IND_LAST+1; ++im ) { if ( im != BLIS_1M && im != BLIS_NAT ) params->ind_enable[ im ] = 0; } } } void libblis_test_output_op_struct( FILE* os, test_op_t* op, char* op_str ) { dimset_t dimset = op->dimset; if ( dimset == BLIS_TEST_DIMS_MNK ) { libblis_test_fprintf_c( os, "%s m n k %d %d %d\n", op_str, op->dim_spec[0], op->dim_spec[1], op->dim_spec[2] ); } else if ( dimset == BLIS_TEST_DIMS_MN ) { libblis_test_fprintf_c( os, "%s m n %d %d\n", op_str, op->dim_spec[0], op->dim_spec[1] ); } else if ( dimset == BLIS_TEST_DIMS_MK ) { libblis_test_fprintf_c( os, "%s m k %d %d\n", op_str, op->dim_spec[0], op->dim_spec[1] ); } else if ( dimset == BLIS_TEST_DIMS_M || dimset == BLIS_TEST_DIMS_MF ) { libblis_test_fprintf_c( os, "%s m %d\n", op_str, op->dim_spec[0] ); } else if ( dimset == BLIS_TEST_DIMS_K ) { libblis_test_fprintf_c( os, "%s k %d\n", op_str, op->dim_spec[0] ); } else if ( dimset == BLIS_TEST_NO_DIMS ) { // Do nothing. } else { libblis_test_printf_error( "Invalid dimension combination.\n" ); } if ( op->n_params > 0 ) libblis_test_fprintf_c( os, "%s operand params %s\n", op_str, op->params ); else libblis_test_fprintf_c( os, "%s operand params %s\n", op_str, "(none)" ); libblis_test_fprintf_c( os, "\n" ); libblis_test_fprintf( os, "\n" ); } char* libblis_test_get_string_for_result( double resid, num_t dt, thresh_t* thresh ) { char* r_val; // Before checking against the thresholds, make sure the residual is // neither NaN nor Inf. (Note that bli_isnan() and bli_isinf() are // both simply wrappers to the isnan() and isinf() macros defined // defined in math.h.) if ( bli_isnan( resid ) || bli_isinf( resid ) ) { r_val = libblis_test_fail_string; } else { // Check the result against the thresholds. if ( resid > thresh[dt].failwarn ) r_val = libblis_test_fail_string; else if ( resid > thresh[dt].warnpass ) r_val = libblis_test_warn_string; else r_val = libblis_test_pass_string; } return r_val; } param_t libblis_test_get_param_type_for_char( char p_type ) { param_t r_val; if ( p_type == 's' ) r_val = BLIS_TEST_PARAM_SIDE; else if ( p_type == 'u' ) r_val = BLIS_TEST_PARAM_UPLO; else if ( p_type == 'e' ) r_val = BLIS_TEST_PARAM_UPLODE; else if ( p_type == 'h' ) r_val = BLIS_TEST_PARAM_TRANS; else if ( p_type == 'c' ) r_val = BLIS_TEST_PARAM_CONJ; else if ( p_type == 'd' ) r_val = BLIS_TEST_PARAM_DIAG; else { r_val = 0; libblis_test_printf_error( "Invalid parameter character.\n" ); } return r_val; } operand_t libblis_test_get_operand_type_for_char( char o_type ) { operand_t r_val; if ( o_type == 'm' ) r_val = BLIS_TEST_MATRIX_OPERAND; else if ( o_type == 'v' ) r_val = BLIS_TEST_VECTOR_OPERAND; else { r_val = 0; libblis_test_printf_error( "Invalid operand character.\n" ); } return r_val; } unsigned int libblis_test_get_n_dims_from_dimset( dimset_t dimset ) { unsigned int n_dims; if ( dimset == BLIS_TEST_DIMS_MNK ) n_dims = 3; else if ( dimset == BLIS_TEST_DIMS_MN ) n_dims = 2; else if ( dimset == BLIS_TEST_DIMS_MK ) n_dims = 2; else if ( dimset == BLIS_TEST_DIMS_M ) n_dims = 1; else if ( dimset == BLIS_TEST_DIMS_MF ) n_dims = 1; else if ( dimset == BLIS_TEST_DIMS_K ) n_dims = 1; else if ( dimset == BLIS_TEST_NO_DIMS ) n_dims = 0; else { n_dims = 0; libblis_test_printf_error( "Invalid dimension combination.\n" ); } return n_dims; } unsigned int libblis_test_get_n_dims_from_string( char* dims_str ) { unsigned int n_dims; char* cp; cp = dims_str; for ( n_dims = 0; *cp != '\0'; ++n_dims ) { //printf( "n_dims = %u\n", n_dims ); while ( isspace( *cp ) ) { //printf( "current char: _%c_", *cp ); ++cp; } while ( isdigit( *cp ) ) { //printf( "current char: _%c_", *cp ); ++cp; } } //printf( "n_dims finally = %u\n", n_dims ); return n_dims; } dim_t libblis_test_get_dim_from_prob_size( int dim_spec, unsigned int p_size ) { dim_t dim; if ( dim_spec < 0 ) dim = p_size / bli_abs(dim_spec); else dim = dim_spec; return dim; } void libblis_test_fill_param_strings( char* p_spec_str, char** chars_for_param, unsigned int n_params, unsigned int n_param_combos, char** pc_str ) { unsigned int pci, pi, i; unsigned int* counter; unsigned int* n_vals_for_param; // Allocate an array that will store the number of parameter values // for each parameter. n_vals_for_param = ( unsigned int* ) malloc( n_params * sizeof( unsigned int ) ); // Fill n_vals_for_param[i] with the number of parameter values (chars) // in chars_for_param[i] (this is simply the string length). for ( i = 0; i < n_params; ++i ) { if ( p_spec_str[i] == '?' ) n_vals_for_param[i] = strlen( chars_for_param[i] ); else n_vals_for_param[i] = 1; } // Allocate an array with one digit per parameter. We will use // this array to keep track of our progress as we canonically move // though all possible parameter combinations. counter = ( unsigned int* ) malloc( n_params * sizeof( unsigned int ) ); // Initialize all values in c to zero. for ( i = 0; i < n_params; ++i ) counter[i] = 0; for ( pci = 0; pci < n_param_combos; ++pci ) { // Iterate backwards through each parameter string we create, since we // want to form (for example, if the parameters are transa and conjx: // (1) nn, (2) nc, (3) cn, (4) cc, (5) tn, (6) tc, (7) hn, (8) hc. for ( i = 0, pi = n_params - 1; i < n_params; --pi, ++i ) { // If the current parameter character, p_spec_str[pi] is fixed (ie: if // it is not '?'), then just copy it into the parameter combination // string. Otherwise, map the current integer value in c to the // corresponding character in char_for_param[pi]. if ( p_spec_str[pi] != '?' ) pc_str[pci][pi] = p_spec_str[pi]; else pc_str[pci][pi] = chars_for_param[ pi ][ counter[pi] ]; } // Terminate the current parameter combination string. pc_str[pci][n_params] = '\0'; // Only try to increment/carryover if this is NOT the last param // combo. if ( pci < n_param_combos - 1 ) { // Increment the least-most significant counter. counter[ n_params - 1 ]++; // Perform "carryover" if needed. carryover( &counter[ n_params - 1 ], &n_vals_for_param[ n_params - 1 ], n_params ); } } // Free the temporary arrays. free( counter ); // Free the array holding the number of parameter values for each // parameter. free( n_vals_for_param ); } void carryover( unsigned int* c, unsigned int* n_vals_for_param, unsigned int n_params ) { if ( n_params == 1 ) return; else { if ( *c == *n_vals_for_param ) { *c = 0; *(c-1) += 1; carryover( c-1, n_vals_for_param-1, n_params-1 ); } } } void libblis_test_op_driver ( thread_data_t* tdata, test_params_t* params, test_op_t* op, iface_t iface, char* op_str, char* p_types, char* o_types, thresh_t* thresh, void (*f_exp) (test_params_t*, // params struct test_op_t*, // op struct iface_t, // iface char*, // datatype (current datatype) char*, // pc_str (current param string) char*, // sc_str (current storage string) unsigned int, // p_cur (current problem size) double*, // perf double* ) // residual ) { unsigned int n_mstorage = params->n_mstorage; unsigned int n_vstorage = params->n_vstorage; unsigned int n_datatypes = params->n_datatypes; unsigned int p_first = params->p_first; unsigned int p_max = params->p_max; unsigned int p_inc = params->p_inc; unsigned int mix_all_storage = params->mix_all_storage; unsigned int mixed_domain = params->mixed_domain; unsigned int mixed_precision = params->mixed_precision; unsigned int reaction_to_failure = params->reaction_to_failure; num_t datatype; num_t dt_check; char dt_char; char* p_spec_str; unsigned int n_params; char** chars_for_param; unsigned int n_param_combos; char** pc_str; char s_spec_str[ MAX_NUM_OPERANDS + 1 ]; unsigned int n_operands; unsigned int n_operandsp1; char** chars_for_storage; unsigned int n_store_combos; char** sc_str; char d_spec_str[ MAX_NUM_OPERANDS + 1 ]; char** chars_for_spdt; char** chars_for_dpdt; unsigned int n_spdt_combos; unsigned int n_dpdt_combos; unsigned int n_dt_combos; char** dc_str; char** chars_for_dt; char** chars_for_rddt; char** chars_for_cddt; unsigned int n_rddt_combos; unsigned int n_cddt_combos; unsigned int p_cur, pi; unsigned int indi, pci, sci, dci, i, j, o; unsigned int is_mixed_dt; double perf, resid; char* pass_str; char* ind_str; char blank_str[32]; char funcname_str[64]; char dims_str[64]; char label_str[128]; unsigned int n_spaces; unsigned int n_dims_print; FILE* output_stream = NULL; // These arrays are malloc()'ed in select branches. Here, we set // them to NULL so they can be unconditionally free()'ed at the // end of the function. chars_for_rddt = NULL; chars_for_cddt = NULL; chars_for_spdt = NULL; chars_for_dpdt = NULL; // If output to files was requested, attempt to open a file stream. if ( params->output_files ) libblis_test_fopen_ofile( op_str, iface, &output_stream ); // Set the error-checking level according to what was specified in the // input file. if ( params->error_checking_level == 0 ) bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); else bli_error_checking_level_set( BLIS_FULL_ERROR_CHECKING ); // Obtain the parameter specification (filter) string. p_spec_str = op->params; // Figure out how many parameters we have. n_params = strlen( p_types ); if ( strlen( p_types ) != strlen( p_spec_str) ) { libblis_test_printf_error( "Parameter specification string from input file does not match length of p_types string.\n" ); } // Allocate an array that stores pointers to the sets of possible parameter // chars for each parameter. chars_for_param = ( char** ) malloc( n_params * sizeof( char* ) ); // Set the values in chars_for_param to the appropriate string addresses. for ( i = 0; i < n_params; ++i ) { param_t param_type = libblis_test_get_param_type_for_char( p_types[i] ); chars_for_param[i] = libblis_test_param_chars[ param_type ]; } // Compute the total number of parameter combinations to test (which is // simply the product of the string lengths of chars_for_param[i]. n_param_combos = libblis_test_count_combos( n_params, p_spec_str, chars_for_param ); // Allocate an array of parameter combination strings, one for each // parameter combination that needs to be tested. pc_str = ( char** ) malloc( n_param_combos * sizeof( char* ) ); for ( i = 0; i < n_param_combos; ++i ) pc_str[i] = ( char* ) malloc( ( n_params + 1 ) * sizeof( char ) ); // Fill the parameter combination strings in pc_str with the parameter // combinations called for by the parameter string p_spec_str. libblis_test_fill_param_strings( p_spec_str, chars_for_param, n_params, n_param_combos, pc_str ); // Figure out how many operands we have. n_operands = strlen( o_types ); // If we are testing a micro-kernel, unconditionally disable the // "mix all storage" option. if ( iface == BLIS_TEST_SEQ_UKERNEL ) mix_all_storage = DISABLE; // Enumerate all combinations of storage schemes requested. if ( mix_all_storage ) { // Fill storage specification string with wildcard chars. for ( i = 0; i < n_operands; ++i ) s_spec_str[i] = '?'; s_spec_str[i] = '\0'; // Allocate an array that stores pointers to the sets of possible // storage chars for each operand. chars_for_storage = ( char** ) malloc( n_operands * sizeof( char* ) ); // Set the values in chars_for_storage to the address of the string // that holds the storage chars. for ( i = 0; i < n_operands; ++i ) { operand_t operand_type = libblis_test_get_operand_type_for_char( o_types[i] ); chars_for_storage[i] = libblis_test_store_chars[ operand_type ]; } // Compute the total number of storage combinations to test (which is // simply the product of the string lengths of chars_for_storage[i]. n_store_combos = libblis_test_count_combos( n_operands, s_spec_str, chars_for_storage ); // Allocate an array of storage combination strings, one for each // storage combination that needs to be tested. sc_str = ( char** ) malloc( n_store_combos * sizeof( char* ) ); for ( sci = 0; sci < n_store_combos; ++sci ) sc_str[sci] = ( char* ) malloc( ( n_operands + 1 ) * sizeof( char ) ); // Fill the storage combination strings in sc_str with the storage // combinations called for by the storage string p_spec_str. libblis_test_fill_param_strings( s_spec_str, chars_for_storage, n_operands, n_store_combos, sc_str ); } else // if ( !mix_all_storage ) { // Only run combinations where all operands of either type (matrices // or vectors) are stored in one storage scheme or another (no mixing // of schemes within the same operand type). unsigned int n_mat_operands = 0; unsigned int n_vec_operands = 0; for ( o = 0; o < n_operands; ++o ) { operand_t operand_type = libblis_test_get_operand_type_for_char( o_types[o] ); if ( operand_type == BLIS_TEST_MATRIX_OPERAND ) ++n_mat_operands; else if ( operand_type == BLIS_TEST_VECTOR_OPERAND ) ++n_vec_operands; } // We compute the total number of storage combinations based on whether // the current operation has only matrix operands, only vector operands, // or both. if ( n_vec_operands == 0 ) { n_store_combos = n_mstorage; n_vstorage = 1; } else if ( n_mat_operands == 0 ) { n_store_combos = n_vstorage; n_mstorage = 1; } else { n_store_combos = n_mstorage * n_vstorage; } sc_str = ( char** ) malloc( n_store_combos * sizeof( char* ) ); for ( j = 0; j < n_mstorage; ++j ) { for ( i = 0; i < n_vstorage; ++i ) { sci = j*n_vstorage + i; sc_str[ sci ] = ( char* ) malloc( ( n_operands + 1 ) * sizeof( char ) ); for ( o = 0; o < n_operands; ++o ) { unsigned int ij; operand_t operand_type = libblis_test_get_operand_type_for_char( o_types[o] ); if ( operand_type == BLIS_TEST_MATRIX_OPERAND ) ij = j; else ij = i; sc_str[sci][o] = params->storage[ operand_type ][ij]; } sc_str[sci][n_operands] = '\0'; } } } // Enumerate all combinations of datatype domains requested, but only // for the gemm operation. if ( !mixed_domain && mixed_precision && op->opid == BLIS_GEMM ) { is_mixed_dt = TRUE; // Increment the number of operands by one to account for the // computation precision (or computation datatype, as we will encode // it in the char string). n_operandsp1 = n_operands + 1; unsigned int has_rd = libblis_test_dt_str_has_rd_char( params ); unsigned int has_cd = libblis_test_dt_str_has_cd_char( params ); // Fill datatype specification string with wildcard chars. for ( i = 0; i < n_operandsp1; ++i ) d_spec_str[i] = '?'; d_spec_str[i] = '\0'; // Allocate an array that stores pointers to the sets of possible // datatype chars for each operand. chars_for_rddt = ( char** ) malloc( n_operandsp1 * sizeof( char* ) ); chars_for_cddt = ( char** ) malloc( n_operandsp1 * sizeof( char* ) ); // Set the values in chars_for_rddt/cddt to the address of the string // that holds the datatype chars. for ( i = 0; i < n_operandsp1; ++i ) { chars_for_rddt[i] = libblis_test_rd_chars; chars_for_cddt[i] = libblis_test_cd_chars; } // Set the last set of chars in chars_for_cddt to the real domain // charset. This is because the last char will be the computation // precision. chars_for_cddt[i-1] = libblis_test_rd_chars; // Compute the total number of datatype combinations to test (which is // simply the product of the string lengths of chars_for_spdt/dpdt[i]). // NOTE: We skip inspecting/branching off of the d_spec_str chars since // we know they are all '?'. n_rddt_combos = 0; n_cddt_combos = 0; if ( has_rd ) n_rddt_combos = libblis_test_count_combos( n_operandsp1, d_spec_str, chars_for_rddt ); if ( has_cd ) n_cddt_combos = libblis_test_count_combos( n_operandsp1, d_spec_str, chars_for_cddt ); // Add real and complex domain combinations. n_dt_combos = n_rddt_combos + n_cddt_combos; // Allocate an array of datatype combination strings, one for each // datatype combination that needs to be tested. dc_str = ( char** ) malloc( n_dt_combos * sizeof( char* ) ); for ( dci = 0; dci < n_dt_combos; ++dci ) dc_str[dci] = ( char* ) malloc( ( n_operandsp1 + 1 ) * sizeof( char ) ); char** dc_str_p = dc_str; // Fill the datatype combination strings in dc_str with the datatype // combinations implied by chars_for_rddt/cddt. if ( has_rd ) { libblis_test_fill_param_strings( d_spec_str, chars_for_rddt, n_operandsp1, n_rddt_combos, dc_str_p ); dc_str_p += n_rddt_combos; } if ( has_cd ) { libblis_test_fill_param_strings( d_spec_str, chars_for_cddt, n_operandsp1, n_cddt_combos, dc_str_p ); dc_str_p += n_cddt_combos; } #if 0 printf( "n_rddt_combos = %d\n", n_rddt_combos ); printf( "n_cddt_combos = %d\n", n_cddt_combos ); printf( "n_dt_combos = %d\n\n", n_dt_combos ); for ( dci = 0; dci < n_dt_combos; ++dci ) printf( "dc_str[%2d] = %s\n", dci, dc_str[dci] ); bli_abort(); #endif } else if ( mixed_domain && !mixed_precision && op->opid == BLIS_GEMM ) { is_mixed_dt = TRUE; // Increment the number of operands by one to account for the // computation precision (or computation datatype, as we will encode // it in the char string). n_operandsp1 = n_operands + 1; unsigned int has_sp = libblis_test_dt_str_has_sp_char( params ); unsigned int has_dp = libblis_test_dt_str_has_dp_char( params ); // Fill datatype specification string with wildcard chars. for ( i = 0; i < n_operands; ++i ) d_spec_str[i] = '?'; d_spec_str[i] = '\0'; // Allocate an array that stores pointers to the sets of possible // datatype chars for each operand (plus the computation precision // char). chars_for_spdt = ( char** ) malloc( n_operands * sizeof( char* ) ); chars_for_dpdt = ( char** ) malloc( n_operands * sizeof( char* ) ); // Set the values in chars_for_spdt/dpdt to the address of the string // that holds the datatype chars. for ( i = 0; i < n_operands; ++i ) { chars_for_spdt[i] = libblis_test_sp_chars; chars_for_dpdt[i] = libblis_test_dp_chars; } // Compute the total number of datatype combinations to test (which is // simply the product of the string lengths of chars_for_spdt/dpdt[i]). // NOTE: We skip inspecting/branching off of the d_spec_str chars since // we know they are all '?'. n_spdt_combos = 0; n_dpdt_combos = 0; if ( has_sp ) n_spdt_combos = libblis_test_count_combos( n_operands, d_spec_str, chars_for_spdt ); if ( has_dp ) n_dpdt_combos = libblis_test_count_combos( n_operands, d_spec_str, chars_for_dpdt ); // Add single- and double-precision combinations. n_dt_combos = n_spdt_combos + n_dpdt_combos; // Allocate an array of datatype combination strings, one for each // datatype combination that needs to be tested. dc_str = ( char** ) malloc( n_dt_combos * sizeof( char* ) ); for ( dci = 0; dci < n_dt_combos; ++dci ) dc_str[dci] = ( char* ) malloc( ( n_operandsp1 + 1 ) * sizeof( char ) ); char** dc_str_p = dc_str; // Fill the datatype combination strings in dc_str with the datatype // combinations implied by chars_for_spdt/dpdt. if ( has_sp ) { libblis_test_fill_param_strings( d_spec_str, chars_for_spdt, n_operands, n_spdt_combos, dc_str_p ); dc_str_p += n_spdt_combos; } if ( has_dp ) { libblis_test_fill_param_strings( d_spec_str, chars_for_dpdt, n_operands, n_dpdt_combos, dc_str_p ); dc_str_p += n_dpdt_combos; } // Manually set the computation char to the real projection of the // first char of each combination. int prec_i = n_operands; for ( i = 0; i < n_dt_combos; ++i ) { dc_str[i][prec_i] = libblis_test_proj_dtchar_to_precchar( dc_str[i][0] ); dc_str[i][prec_i+1] = '\0'; } #if 0 printf( "n_spdt_combos = %d\n", n_spdt_combos ); printf( "n_dpdt_combos = %d\n", n_dpdt_combos ); printf( "n_dt_combos = %d\n\n", n_dt_combos ); for ( dci = 0; dci < n_dt_combos; ++dci ) printf( "dc_str[%2d] = %s\n", dci, dc_str[dci] ); bli_abort(); #endif } else if ( mixed_domain && mixed_precision && op->opid == BLIS_GEMM ) { is_mixed_dt = TRUE; // Increment the number of operands by one to account for the // computation precision (or computation datatype, as we will encode // it in the char string). n_operandsp1 = n_operands + 1; // Fill datatype specification string with wildcard chars. for ( i = 0; i < n_operandsp1; ++i ) d_spec_str[i] = '?'; d_spec_str[i] = '\0'; // Allocate an array that stores pointers to the sets of possible // datatype chars for each operand. chars_for_dt = ( char** ) malloc( n_operandsp1 * sizeof( char* ) ); // Set the values in chars_for_rddt/cddt to the address of the string // that holds the datatype chars. for ( i = 0; i < n_operandsp1; ++i ) { chars_for_dt[i] = libblis_test_dt_chars; } // Set the last set of chars in chars_for_dt to the real domain // charset. This is because the last char will be the computation // precision, with the computation domain implied by the operands' // storage datatypes. chars_for_dt[i-1] = libblis_test_rd_chars; // Compute the total number of datatype combinations to test (which is // simply the product of the string lengths of chars_for_dt[i]). // NOTE: We skip inspecting/branching off of the d_spec_str chars since // we know they are all '?'. n_dt_combos = libblis_test_count_combos( n_operandsp1, d_spec_str, chars_for_dt ); // Allocate an array of datatype combination strings, one for each // datatype combination that needs to be tested. dc_str = ( char** ) malloc( n_dt_combos * sizeof( char* ) ); for ( dci = 0; dci < n_dt_combos; ++dci ) dc_str[dci] = ( char* ) malloc( ( n_operandsp1 + 1 ) * sizeof( char ) ); // Fill the datatype combination strings in dc_str with the datatype // combinations implied by chars_for_rddt/cddt. libblis_test_fill_param_strings( d_spec_str, chars_for_dt, n_operandsp1, n_dt_combos, dc_str ); #if 0 printf( "n_dt_combos = %d\n\n", n_dt_combos ); for ( dci = 0; dci < n_dt_combos; ++dci ) printf( "dc_str[%3d] = %s\n", dci, dc_str[dci] ); bli_abort(); #endif } else // ( ( !mixed_domain && !mixed_precision ) || op->opid != BLIS_GEMM ) { is_mixed_dt = FALSE; // Increment the number of operands by one to account for the // computation precision (or computation datatype, as we will encode // it in the char string). n_operandsp1 = n_operands + 1; // Since we are not mixing domains, we only consider n_datatype // datatype combinations, where each combination is actually // homogeneous (e.g. "sss", "ddd", etc., if n_operands == 3). n_dt_combos = n_datatypes; // Allocate an array of datatype combination strings, one for each // datatype specified. dc_str = ( char** ) malloc( n_dt_combos * sizeof( char* ) ); for ( dci = 0; dci < n_dt_combos; ++dci ) dc_str[dci] = ( char* ) malloc( ( n_operandsp1 + 1 ) * sizeof( char ) ); // Fill each datatype combination string with the same dt char for // each operand in the current operation. for ( dci = 0; dci < n_dt_combos; ++dci ) { dt_char = params->datatype_char[dci]; for ( i = 0; i < n_operands; ++i ) dc_str[dci][i] = dt_char; // Encode the computation precision as the last char. dc_str[dci][i] = libblis_test_proj_dtchar_to_precchar( dc_str[dci][0] ); dc_str[dci][i+1] = '\0'; } #if 0 printf( "n_dt_combos = %d\n\n", n_dt_combos ); for ( dci = 0; dci < n_dt_combos; ++dci ) printf( "dc_str[%3d] = %s\n", dci, dc_str[dci] ); bli_abort(); #endif } // These statements should only be executed by one thread. if ( tdata->id == 0 ) { // Output a heading and the contents of the op struct. libblis_test_fprintf_c( stdout, "--- %s ---\n", op_str ); libblis_test_fprintf_c( stdout, "\n" ); libblis_test_output_op_struct( stdout, op, op_str ); // Also output to a matlab file if requested (and successfully opened). if ( output_stream ) { // For file output, we also include the contents of the global // parameter struct. We do this per operation so that the parameters // are included in each file, whereas we only output it once to // stdout (at the end of libblis_test_read_parameter_file()). libblis_test_output_params_struct( output_stream, params ); libblis_test_fprintf_c( output_stream, "--- %s ---\n", op_str ); libblis_test_fprintf_c( output_stream, "\n" ); libblis_test_output_op_struct( output_stream, op, op_str ); } } // Loop over the requested storage schemes. for ( sci = 0; sci < n_store_combos; ++sci ) { // Loop over the requested datatypes. for ( dci = 0; dci < n_dt_combos; ++dci ) //for ( dci = 14; dci < 15; ++dci ) //for ( dci = 6; dci < 7; dci += 1 ) //for ( dci = 12; dci < 13; ++dci ) //for ( dci = 4; dci < 5; ++dci ) //for ( dci = 8; dci < 9; ++dci ) //for ( dci = 0; dci < 1; ++dci ) { // We need a datatype to use for induced method related things // as well as to decide which set of residual thresholds to use. // We must choose the first operand's dt char since that's the // only operand we know is guaranteed to exist. bli_param_map_char_to_blis_dt( dc_str[dci][0], &datatype ); dt_check = datatype; int has_sp = libblis_test_dt_str_has_sp_char_str( n_operandsp1, dc_str[dci] ); int has_dp = libblis_test_dt_str_has_dp_char_str( n_operandsp1, dc_str[dci] ); int has_samep = (has_sp && !has_dp ) || (has_dp && !has_sp ); // Notice that we use n_operands here instead of // n_operandsp1 since we only want to chars for the // storage datatypes of the matrix operands, not the // computation precision char. int has_cd_only = !libblis_test_dt_str_has_rd_char_str( n_operands, dc_str[dci] ); if ( has_sp ) { // If any of the operands are single precision, ensure that // dt_check is also single precision so that the residual is // compared to datatype-appropriate thresholds. dt_check = bli_dt_proj_to_single_prec( datatype ); } // Build a commented column label string. libblis_test_build_col_labels_string( params, op, label_str ); // These statements should only be executed by one thread. if ( tdata->id == 0 ) { // Output the column label string. libblis_test_fprintf( stdout, "%s\n", label_str ); // Also output to a matlab file if requested (and successfully // opened). if ( output_stream ) libblis_test_fprintf( output_stream, "%s\n", label_str ); } // Start by assuming we will only test native execution. ind_t ind_first = BLIS_NAT; dim_t ind_last = BLIS_NAT; // If the operation is level-3, and all operand domains are complex, // then we iterate over all induced methods. if ( bli_opid_is_level3( op->opid ) && has_cd_only ) ind_first = 0; // Loop over induced methods (or just BLIS_NAT). for ( indi = ind_first; indi <= ind_last; ++indi ) { // If the current datatype is real, OR if the current // induced method is implemented (for the operation // being tested) AND it was requested, then we enable // ONLY that method and proceed. Otherwise, we skip the // current method and go to the next method. if ( bli_is_real( datatype ) ) { ; } else if ( bli_ind_oper_is_impl( op->opid, indi ) && params->ind_enable[ indi ] == 1 ) { // If the current induced method is 1m, make sure that // we only proceed for gemm where all operands are stored // in the complex domain. (This prevents 1m from being // executed on mixed-datatype combinations that contain // real domain datatypes.) if ( indi == BLIS_1M ) { if ( op->opid == BLIS_GEMM && has_cd_only ) { ; } else if ( has_samep && has_cd_only ) { ; } else { continue; } } else { ; } } else { continue; } bli_ind_oper_enable_only( op->opid, indi, datatype ); // Query the implementation string associated with the // current operation and datatype. If the operation is // not level-3, we will always get back the native string. ind_str = bli_ind_oper_get_avail_impl_string( op->opid, datatype ); // Loop over the requested parameter combinations. for ( pci = 0; pci < n_param_combos; ++pci ) { // Loop over the requested problem sizes. for ( p_cur = p_first, pi = 1; p_cur <= p_max; p_cur += p_inc, ++pi ) { // Skip this experiment (for this problem size) according to // to the counter, number of threads, and thread id. if ( tdata->xc % tdata->nt != tdata->id ) { tdata->xc++; continue; } // Call the given experiment function. perf and resid will // contain the resulting performance and residual values, // respectively. f_exp( params, op, iface, dc_str[dci], pc_str[pci], sc_str[sci], p_cur, &perf, &resid ); // Remove the sign of the residual, if there is one. resid = bli_fabs( resid ); if ( resid == -0.0 ) resid = 0.0; // Query the string corresponding to the residual's // position relative to the thresholds. pass_str = libblis_test_get_string_for_result( resid, dt_check, thresh ); // Build a string unique to the operation, datatype combo, // parameter combo, and storage combo being tested. libblis_test_build_function_string( BLIS_FILEDATA_PREFIX_STR, op->opid, indi, ind_str, op_str, is_mixed_dt, dc_str[dci], n_param_combos, pc_str[pci], sc_str[sci], funcname_str ); // Compute the number of spaces we have left to fill given // length of our operation's name. n_spaces = MAX_FUNC_STRING_LENGTH - strlen( funcname_str ); fill_string_with_n_spaces( blank_str, n_spaces ); // Print all dimensions to a single string. libblis_test_build_dims_string( op, p_cur, dims_str ); // Count the number of dimensions that were printed to the string. n_dims_print = libblis_test_get_n_dims_from_string( dims_str ); // Output the results of the test. Use matlab format if requested. // NOTE: Here we use fprintf() over libblis_test_fprintf() so // that on POSIX systems the output is not intermingled. If we // used libblis_test_fprintf(), we would need to enclose this // conditional with the acquisition of a mutex shared among all // threads to prevent intermingled output. if ( params->output_matlab_format ) { fprintf( stdout, "%s%s( %3u, 1:%u ) = [%s %7.2lf %8.2le ]; %c %s\n", funcname_str, blank_str, pi, n_dims_print + 2, dims_str, perf, resid, OUTPUT_COMMENT_CHAR, pass_str ); // Also output to a file if requested (and successfully // opened). if ( output_stream ) fprintf( output_stream, "%s%s( %3u, 1:%u ) = [%s %7.2lf %8.2le ]; %c %s\n", funcname_str, blank_str, pi, n_dims_print + 2, dims_str, perf, resid, OUTPUT_COMMENT_CHAR, pass_str ); } else { fprintf( stdout, "%s%s %s %7.2lf %8.2le %s\n", funcname_str, blank_str, dims_str, perf, resid, pass_str ); // Also output to a file if requested (and successfully // opened). if ( output_stream ) fprintf( output_stream, "%s%s %s %7.2lf %8.2le %s\n", funcname_str, blank_str, dims_str, perf, resid, pass_str ); } // If we need to check whether to do something on failure, // do so now. if ( reaction_to_failure == ON_FAILURE_SLEEP_CHAR ) { if ( strstr( pass_str, BLIS_TEST_FAIL_STRING ) == pass_str ) libblis_test_sleep(); } else if ( reaction_to_failure == ON_FAILURE_ABORT_CHAR ) { if ( strstr( pass_str, BLIS_TEST_FAIL_STRING ) == pass_str ) libblis_test_abort(); } // Increment the experiment counter (regardless of whether // the thread executed or skipped the current experiment). tdata->xc += 1; } } } // Wait for all other threads so that the output stays organized. bli_pthread_barrier_wait( tdata->barrier ); // These statements should only be executed by one thread. if ( tdata->id == 0 ) { libblis_test_fprintf( stdout, "\n" ); if ( output_stream ) libblis_test_fprintf( output_stream, "\n" ); } } } // Free the array that stored pointers to the sets of possible parameter // chars for each parameter. free( chars_for_param ); // Free the parameter combination strings and then the master pointer. for ( pci = 0; pci < n_param_combos; ++pci ) free( pc_str[pci] ); free( pc_str ); // Free the storage combination strings and then the master pointer. for ( sci = 0; sci < n_store_combos; ++sci ) free( sc_str[sci] ); free( sc_str ); // Free some auxiliary arrays used by the mixed-domain/mixed-precision // datatype-handling logic. free( chars_for_rddt ); free( chars_for_cddt ); free( chars_for_spdt ); free( chars_for_dpdt ); // Free the datatype combination strings and then the master pointer. for ( dci = 0; dci < n_dt_combos; ++dci ) free( dc_str[dci] ); free( dc_str ); // If the file was opened (successfully), close the output stream. if ( output_stream ) libblis_test_fclose_ofile( output_stream ); // Mark this operation as done. op->test_done = TRUE; } void libblis_test_build_function_string ( char* prefix_str, opid_t opid, ind_t method, char* ind_str, char* op_str, unsigned int is_mixed_dt, char* dc_str, unsigned int n_param_combos, char* pc_str, char* sc_str, char* funcname_str ) { // We only print the full datatype combination string if is_mixed_dt // is set and the operation is gemm. Otherwise, we print only // the first char (since they are all the same). if ( is_mixed_dt == TRUE && opid == BLIS_GEMM ) sprintf( funcname_str, "%s_%s%s", prefix_str, dc_str, op_str ); else sprintf( funcname_str, "%s_%c%s", prefix_str, dc_str[0], op_str ); // If the method is non-native (ie: induced), append a string // identifying the induced method. if ( method != BLIS_NAT ) sprintf( &funcname_str[strlen(funcname_str)], "%s", ind_str ); // We check the string length of pc_str in case the user is running an // operation that has parameters (and thus generally more than one // parameter combination), but has fixed all parameters in the input // file, which would result in n_param_combos to equal one. This way, // the function string contains the parameter string associated with // the parameters that were fixed. if ( n_param_combos > 1 || strlen(pc_str) > 0 ) sprintf( &funcname_str[strlen(funcname_str)], "_%s_%s", pc_str, sc_str ); else sprintf( &funcname_str[strlen(funcname_str)], "_%s", sc_str ); if ( strlen( funcname_str ) > MAX_FUNC_STRING_LENGTH ) libblis_test_printf_error( "Function name string length (%d) exceeds maximum (%d).\n", strlen( funcname_str ), MAX_FUNC_STRING_LENGTH ); } void libblis_test_build_dims_string( test_op_t* op, dim_t p_cur, char* dims_str ) { unsigned int i; // For level-1f experiments with fusing factors, we grab the fusing // factor from the op struct. We do something similar for micro-kernel // calls. if ( op->dimset == BLIS_TEST_DIMS_MF ) { //sprintf( &dims_str[strlen(dims_str)], " %5u %5u", sprintf( dims_str, " %5u %5u", ( unsigned int ) libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ), ( unsigned int ) op->dim_aux[0] ); } else if ( op->dimset == BLIS_TEST_DIMS_K ) { //sprintf( &dims_str[strlen(dims_str)], " %5u %5u %5u", sprintf( dims_str, " %5u %5u %5u", ( unsigned int ) op->dim_aux[0], ( unsigned int ) op->dim_aux[1], ( unsigned int ) libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ) ); } else if ( op->dimset == BLIS_TEST_NO_DIMS ) { //sprintf( &dims_str[strlen(dims_str)], " %5u %5u", sprintf( dims_str, " %5u %5u", ( unsigned int ) op->dim_aux[0], ( unsigned int ) op->dim_aux[1] ); } else // For all other operations, we just use the dim_spec[] values // and the current problem size. { // Initialize the string as empty. sprintf( dims_str, "%s", "" ); // Print all dimensions to a single string. for ( i = 0; i < op->n_dims; ++i ) { sprintf( &dims_str[strlen(dims_str)], " %5u", ( unsigned int ) libblis_test_get_dim_from_prob_size( op->dim_spec[i], p_cur ) ); } } } // % dtoper_params_storage m n k gflops resid result void libblis_test_build_col_labels_string( test_params_t* params, test_op_t* op, char* l_str ) { unsigned int n_spaces; char blank_str[64]; strcpy( l_str, "" ); if ( op->n_params > 0 ) { sprintf( &l_str[strlen(l_str)], "%c %s_%s", OUTPUT_COMMENT_CHAR, BLIS_FILEDATA_PREFIX_STR, "

__" ); } else // ( n_params == 0 ) { sprintf( &l_str[strlen(l_str)], "%c %s_%s", OUTPUT_COMMENT_CHAR, BLIS_FILEDATA_PREFIX_STR, "
_ " ); } if ( params->output_matlab_format ) n_spaces = 11; else n_spaces = 1; fill_string_with_n_spaces( blank_str, n_spaces ); sprintf( &l_str[strlen(l_str)], "%s", blank_str ); if ( op->dimset == BLIS_TEST_DIMS_MNK || op->dimset == BLIS_TEST_DIMS_MN || op->dimset == BLIS_TEST_DIMS_MK || op->dimset == BLIS_TEST_DIMS_M || op->dimset == BLIS_TEST_DIMS_K || op->dimset == BLIS_TEST_DIMS_MF || op->dimset == BLIS_TEST_NO_DIMS ) sprintf( &l_str[strlen(l_str)], " %5s", "m" ); if ( op->dimset == BLIS_TEST_DIMS_MNK || op->dimset == BLIS_TEST_DIMS_MN || op->dimset == BLIS_TEST_DIMS_K || op->dimset == BLIS_TEST_DIMS_MF || op->dimset == BLIS_TEST_NO_DIMS ) sprintf( &l_str[strlen(l_str)], " %5s", "n" ); if ( op->dimset == BLIS_TEST_DIMS_MNK || op->dimset == BLIS_TEST_DIMS_MK || op->dimset == BLIS_TEST_DIMS_K ) sprintf( &l_str[strlen(l_str)], " %5s", "k" ); sprintf( &l_str[strlen(l_str)], "%s", " gflops resid result" ); } void libblis_test_build_filename_string( char* prefix_str, char* op_str, char* funcname_str ) { sprintf( funcname_str, "%s_%s.m", prefix_str, op_str ); } void fill_string_with_n_spaces( char* str, unsigned int n_spaces ) { unsigned int i; // Initialze to empty string in case n_spaces == 0. sprintf( str, "%s", "" ); for ( i = 0; i < n_spaces; ++i ) sprintf( &str[i], " " ); } void libblis_test_mobj_create( test_params_t* params, num_t dt, trans_t trans, char storage, dim_t m, dim_t n, obj_t* a ) { dim_t gs = params->gs_spacing; bool_t alignment = params->alignment; siz_t elem_size = bli_dt_size( dt ); dim_t m_trans = m; dim_t n_trans = n; dim_t rs = 1; // Initialization avoids a compiler warning. dim_t cs = 1; // Initialization avoids a compiler warning. // Apply the trans parameter to the dimensions (if needed). bli_set_dims_with_trans( trans, m, n, &m_trans, &n_trans ); // Compute unaligned strides according to the storage case encoded in // the storage char, and then align the leading dimension if alignment // was requested. if ( storage == 'c' ) { rs = 1; cs = m_trans; if ( alignment ) cs = bli_align_dim_to_size( cs, elem_size, BLIS_HEAP_STRIDE_ALIGN_SIZE ); } else if ( storage == 'r' ) { rs = n_trans; cs = 1; if ( alignment ) rs = bli_align_dim_to_size( rs, elem_size, BLIS_HEAP_STRIDE_ALIGN_SIZE ); } else if ( storage == 'g' ) { // We apply (arbitrarily) a column tilt, instead of a row tilt, to // all general stride cases. rs = gs; cs = gs * m_trans; if ( alignment ) cs = bli_align_dim_to_size( cs, elem_size, BLIS_HEAP_STRIDE_ALIGN_SIZE ); } else { libblis_test_printf_error( "Invalid storage character: %c\n", storage ); } // Create the object using the dimensions and strides computed above. bli_obj_create( dt, m_trans, n_trans, rs, cs, a ); } #if 0 cntl_t* libblis_test_pobj_create( bszid_t bmult_id_m, bszid_t bmult_id_n, invdiag_t inv_diag, pack_t pack_schema, packbuf_t pack_buf, obj_t* a, obj_t* p, cntx_t* cntx ) { bool_t does_inv_diag; rntm_t rntm; if ( inv_diag == BLIS_NO_INVERT_DIAG ) does_inv_diag = FALSE; else does_inv_diag = TRUE; // Create a control tree node for the packing operation. cntl_t* cntl = bli_packm_cntl_create_node ( NULL, // we don't need the small block allocator from the runtime. NULL, // func ptr is not referenced b/c we don't call via l3 _int(). bli_packm_blk_var1, bmult_id_m, bmult_id_n, does_inv_diag, FALSE, FALSE, pack_schema, pack_buf, NULL // no child node needed ); // Initialize a local-to-BLIS rntm_t. This is simply so we have something // to pass into bli_l3_packm(). The function doesn't (currently) use the // runtime object, and even if it did, one with default values would work // fine here. bli_rntm_init( &rntm ); // Pack the contents of A to P. bli_l3_packm( a, p, cntx, &rntm, cntl, &BLIS_PACKM_SINGLE_THREADED ); // Return the control tree pointer so the caller can free the cntl_t and its // mem_t entry later on. return cntl; } #endif void libblis_test_vobj_create( test_params_t* params, num_t dt, char storage, dim_t m, obj_t* x ) { dim_t gs = params->gs_spacing; // Column vector (unit stride) if ( storage == 'c' ) bli_obj_create( dt, m, 1, 1, m, x ); // Row vector (unit stride) else if ( storage == 'r' ) bli_obj_create( dt, 1, m, m, 1, x ); // Column vector (non-unit stride) else if ( storage == 'j' ) bli_obj_create( dt, m, 1, gs, gs*m, x ); // Row vector (non-unit stride) else if ( storage == 'i' ) bli_obj_create( dt, 1, m, gs*m, gs, x ); else { libblis_test_printf_error( "Invalid storage character: %c\n", storage ); } } void libblis_test_vobj_randomize( test_params_t* params, bool_t normalize, obj_t* x ) { if ( params->rand_method == BLIS_TEST_RAND_REAL_VALUES ) bli_randv( x ); else // if ( params->rand_method == BLIS_TEST_RAND_NARROW_POW2 ) bli_randnv( x ); if ( normalize ) { num_t dt = bli_obj_dt( x ); num_t dt_r = bli_obj_dt_proj_to_real( x ); obj_t kappa; obj_t kappa_r; bli_obj_scalar_init_detached( dt, &kappa ); bli_obj_scalar_init_detached( dt_r, &kappa_r ); // Normalize vector elements. The following code ensures that we // always invert-scale by whole power of two. bli_normfv( x, &kappa_r ); libblis_test_ceil_pow2( &kappa_r ); bli_copysc( &kappa_r, &kappa ); bli_invertsc( &kappa ); bli_scalv( &kappa, x ); } } void libblis_test_mobj_randomize( test_params_t* params, bool_t normalize, obj_t* a ) { if ( params->rand_method == BLIS_TEST_RAND_REAL_VALUES ) bli_randm( a ); else // if ( params->rand_method == BLIS_TEST_RAND_NARROW_POW2 ) bli_randnm( a ); if ( normalize ) { #if 0 num_t dt = bli_obj_dt( a ); dim_t max_m_n = bli_obj_max_dim( a ); obj_t kappa; bli_obj_scalar_init_detached( dt, &kappa ); // Normalize vector elements by maximum matrix dimension. bli_setsc( 1.0/( double )max_m_n, 0.0, &kappa ); bli_scalm( &kappa, a ); #endif num_t dt = bli_obj_dt( a ); num_t dt_r = bli_obj_dt_proj_to_real( a ); obj_t kappa; obj_t kappa_r; bli_obj_scalar_init_detached( dt, &kappa ); bli_obj_scalar_init_detached( dt_r, &kappa_r ); // Normalize matrix elements. bli_norm1m( a, &kappa_r ); libblis_test_ceil_pow2( &kappa_r ); bli_copysc( &kappa_r, &kappa ); bli_invertsc( &kappa ); bli_scalm( &kappa, a ); } } void libblis_test_ceil_pow2( obj_t* alpha ) { double alpha_r; double alpha_i; bli_getsc( alpha, &alpha_r, &alpha_i ); alpha_r = pow( 2.0, ceil( log2( alpha_r ) ) ); bli_setsc( alpha_r, alpha_i, alpha ); } void libblis_test_mobj_load_diag( test_params_t* params, obj_t* a ) { // We assume that all elements of a were intialized on interval [-1,1]. // Load the diagonal by 2.0. bli_shiftd( &BLIS_TWO, a ); } void libblis_test_init_strings( void ) { strcpy( libblis_test_pass_string, BLIS_TEST_PASS_STRING ); strcpy( libblis_test_warn_string, BLIS_TEST_WARN_STRING ); strcpy( libblis_test_fail_string, BLIS_TEST_FAIL_STRING ); strcpy( libblis_test_param_chars[BLIS_TEST_PARAM_SIDE], BLIS_TEST_PARAM_SIDE_CHARS ); strcpy( libblis_test_param_chars[BLIS_TEST_PARAM_UPLO], BLIS_TEST_PARAM_UPLO_CHARS ); strcpy( libblis_test_param_chars[BLIS_TEST_PARAM_UPLODE], BLIS_TEST_PARAM_UPLODE_CHARS ); strcpy( libblis_test_param_chars[BLIS_TEST_PARAM_TRANS], BLIS_TEST_PARAM_TRANS_CHARS ); strcpy( libblis_test_param_chars[BLIS_TEST_PARAM_CONJ], BLIS_TEST_PARAM_CONJ_CHARS ); strcpy( libblis_test_param_chars[BLIS_TEST_PARAM_DIAG], BLIS_TEST_PARAM_DIAG_CHARS ); strcpy( libblis_test_store_chars[BLIS_TEST_MATRIX_OPERAND], BLIS_TEST_MSTORE_CHARS ); strcpy( libblis_test_store_chars[BLIS_TEST_VECTOR_OPERAND], BLIS_TEST_VSTORE_CHARS ); } void libblis_test_sleep( void ) { int i; libblis_test_printf_infoc( "Resuming in " ); for ( i = SECONDS_TO_SLEEP; i > 0; --i ) { libblis_test_printf_info( "%d ", i ); bli_sleep(1); } libblis_test_printf_info( "\n" ); } void libblis_test_abort( void ) { abort(); } void libblis_test_fopen_ofile( char* op_str, iface_t iface, FILE** output_stream ) { char filename_str[ MAX_FILENAME_LENGTH ]; if ( iface == BLIS_TEST_MT_FRONT_END ) bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); // Construct a filename string for the current operation. libblis_test_build_filename_string( BLIS_FILE_PREFIX_STR, op_str, filename_str ); // Open the output file (overwriting a previous instance, if it exists) // for writing (in binary mode). *output_stream = fopen( filename_str, "wb" ); // Check the output stream and report an error if something went wrong. libblis_test_fopen_check_stream( filename_str, *output_stream ); } void libblis_test_fclose_ofile( FILE* output_stream ) { fclose( output_stream ); } void libblis_test_fopen_check_stream( char* filename_str, FILE* stream ) { // Check for success. if ( stream == NULL ) { libblis_test_printf_error( "Failed to open file %s. Check existence (if file is being read), permissions (if file is being overwritten), and/or storage limit.\n", filename_str ); } } void libblis_test_read_next_line( char* buffer, FILE* input_stream ) { char temp[ INPUT_BUFFER_SIZE ]; // We want to read at least one line, so we use a do-while loop. do { // Read the next line into a temporary buffer and check success. if ( fgets( temp, INPUT_BUFFER_SIZE-1, input_stream ) == NULL ) { if ( feof( input_stream ) ) libblis_test_printf_error( "Error reading input file: encountered unexpected EOF." ); else libblis_test_printf_error( "Error (non-EOF) reading input file." ); } } // We continue to read lines into buffer until the line is neither // commented nor blank. while ( temp[0] == INPUT_COMMENT_CHAR || temp[0] == '\n' || temp[0] == ' ' || temp[0] == '\t' ); // Save the string in temp, up to first white space character, into buffer. //sscanf( temp, "%s ", buffer ); strcpy( buffer, temp ); //printf( "libblis_test_read_next_line() read: %s\n", buffer ); } void libblis_test_fprintf( FILE* output_stream, char* message, ... ) { va_list args; // Initialize variable argument environment. va_start( args, message ); // Parse the received message and print its components. libblis_test_parse_message( output_stream, message, args ); // Shutdown variable argument environment and clean up stack. va_end( args ); // Flush the output stream. fflush( output_stream ); } void libblis_test_fprintf_c( FILE* output_stream, char* message, ... ) { va_list args; fprintf( output_stream, "%c ", OUTPUT_COMMENT_CHAR ); // Initialize variable argument environment. va_start( args, message ); // Parse the received message and print its components. libblis_test_parse_message( output_stream, message, args ); // Shutdown variable argument environment and clean up stack. va_end( args ); // Flush the output stream. fflush( output_stream ); } void libblis_test_printf_info( char* message, ... ) { FILE* output_stream = stdout; va_list args; // Initialize variable argument environment. va_start( args, message ); // Parse the received message and print its components. libblis_test_parse_message( output_stream, message, args ); // Shutdown variable argument environment and clean up stack. va_end( args ); // Flush the output stream. fflush( output_stream ); } void libblis_test_printf_infoc( char* message, ... ) { FILE* output_stream = stdout; va_list args; fprintf( output_stream, "%c ", OUTPUT_COMMENT_CHAR ); // Initialize variable argument environment. va_start( args, message ); // Parse the received message and print its components. libblis_test_parse_message( output_stream, message, args ); // Shutdown variable argument environment and clean up stack. va_end( args ); // Flush the output stream. fflush( output_stream ); } void libblis_test_printf_error( char* message, ... ) { FILE* output_stream = stderr; va_list args; fprintf( output_stream, "%s: *** error ***: ", libblis_test_binary_name ); // Initialize variable argument environment. va_start( args, message ); // Parse the received message and print its components. libblis_test_parse_message( output_stream, message, args ); // Shutdown variable argument environment and clean up stack. va_end( args ); // Flush the output stream. fflush( output_stream ); // Exit. exit(1); } void libblis_test_parse_message( FILE* output_stream, char* message, va_list args ) { int c, cf; char format_spec[8]; unsigned int the_uint; int the_int; double the_double; char* the_string; char the_char; // Begin looping over message to insert variables wherever there are // format specifiers. for ( c = 0; message[c] != '\0'; ) { if ( message[c] != '%' ) { fprintf( output_stream, "%c", message[c] ); c += 1; } else if ( message[c] == '%' && message[c+1] == '%' ) // handle escaped '%' chars. { fprintf( output_stream, "%c", message[c] ); c += 2; } else { // Save the format string if there is one. format_spec[0] = '%'; for ( c += 1, cf = 1; strchr( "udefsc", message[c] ) == NULL; ++c, ++cf ) { format_spec[cf] = message[c]; } // Add the final type specifier, and null-terminate the string. format_spec[cf] = message[c]; format_spec[cf+1] = '\0'; // Switch based on type, since we can't predict what will // va_args() will return. switch ( message[c] ) { case 'u': the_uint = va_arg( args, unsigned int ); fprintf( output_stream, format_spec, the_uint ); break; case 'd': the_int = va_arg( args, int ); fprintf( output_stream, format_spec, the_int ); break; case 'e': the_double = va_arg( args, double ); fprintf( output_stream, format_spec, the_double ); break; case 'f': the_double = va_arg( args, double ); fprintf( output_stream, format_spec, the_double ); break; case 's': the_string = va_arg( args, char* ); //fprintf( output_stream, "%s", the_string ); fprintf( output_stream, format_spec, the_string ); break; case 'c': the_char = va_arg( args, int ); fprintf( output_stream, "%c", the_char ); break; } // Move to next character past type specifier. c += 1; } } } void libblis_test_parse_command_line( int argc, char** argv ) { bool_t gave_option_g = FALSE; bool_t gave_option_o = FALSE; int opt; char opt_ch; getopt_t state; // Copy the binary name to a global string so we can use it later. strncpy( libblis_test_binary_name, argv[0], MAX_BINARY_NAME_LENGTH ); // Initialize the state for running bli_getopt(). Here, 0 is the // initial value for opterr, which suppresses error messages. bli_getopt_init_state( 0, &state ); // Process all option arguments until we get a -1, which means we're done. while( (opt = bli_getopt( argc, argv, "g:o:", &state )) != -1 ) { // Explicitly typecast opt, which is an int, to a char. (Failing to // typecast resulted in at least one user-reported problem whereby // opt was being filled with garbage.) opt_ch = ( char )opt; switch( opt_ch ) { case 'g': libblis_test_printf_infoc( "detected -g option; using \"%s\" for parameters filename.\n", state.optarg ); strncpy( libblis_test_parameters_filename, state.optarg, MAX_FILENAME_LENGTH ); gave_option_g = TRUE; break; case 'o': libblis_test_printf_infoc( "detected -o option; using \"%s\" for operations filename.\n", state.optarg ); strncpy( libblis_test_operations_filename, state.optarg, MAX_FILENAME_LENGTH ); gave_option_o = TRUE; break; case '?': libblis_test_printf_error( "unexpected option '%c' given or missing option argument\n", state.optopt ); break; default: libblis_test_printf_error( "unexpected option chararcter returned from getopt: %c\n", opt_ch ); } } if ( gave_option_g == FALSE ) { libblis_test_printf_infoc( "no -g option given; defaulting to \"%s\" for parameters filename.\n", PARAMETERS_FILENAME ); // Copy default parameters filename into its global string. strncpy( libblis_test_parameters_filename, PARAMETERS_FILENAME, MAX_FILENAME_LENGTH ); } if ( gave_option_o == FALSE ) { libblis_test_printf_infoc( "no -o option given; defaulting to \"%s\" for operations filename.\n", OPERATIONS_FILENAME ); // Copy default operations filename into its global string. strncpy( libblis_test_operations_filename, OPERATIONS_FILENAME, MAX_FILENAME_LENGTH ); } // If there are still arguments remaining after getopt() processing is // complete, print an error. if ( state.optind < argc ) { libblis_test_printf_error( "Encountered unexpected non-option argument: %s\n", argv[ state.optind ] ); } } void libblis_test_check_empty_problem( obj_t* c, double* perf, double* resid ) { if ( bli_obj_has_zero_dim( c ) ) { *perf = 0.0; *resid = 0.0; } } int libblis_test_op_is_disabled( test_op_t* op ) { int r_val; // If there was at least one individual override, then an op test is // disabled if it is NOT equal to ENABLE_ONLY. If there were no // individual overrides, then an op test is disabled if it is equal // to DISABLE. if ( op->ops->indiv_over == TRUE ) { if ( op->op_switch != ENABLE_ONLY ) r_val = TRUE; else r_val = FALSE; } else // if ( op->ops->indiv_over == FALSE ) { if ( op->op_switch == DISABLE ) r_val = TRUE; else r_val = FALSE; } return r_val; } int libblis_test_op_is_done( test_op_t* op ) { return op->test_done; } int libblis_test_util_is_disabled( test_op_t* op ) { if ( op->ops->util_over == DISABLE ) return TRUE; else return FALSE; } int libblis_test_l1v_is_disabled( test_op_t* op ) { if ( op->ops->l1v_over == DISABLE ) return TRUE; else return FALSE; } int libblis_test_l1m_is_disabled( test_op_t* op ) { if ( op->ops->l1m_over == DISABLE ) return TRUE; else return FALSE; } int libblis_test_l1f_is_disabled( test_op_t* op ) { if ( op->ops->l1f_over == DISABLE ) return TRUE; else return FALSE; } int libblis_test_l2_is_disabled( test_op_t* op ) { if ( op->ops->l2_over == DISABLE ) return TRUE; else return FALSE; } int libblis_test_l3ukr_is_disabled( test_op_t* op ) { if ( op->ops->l3ukr_over == DISABLE ) return TRUE; else return FALSE; } int libblis_test_l3_is_disabled( test_op_t* op ) { if ( op->ops->l3_over == DISABLE ) return TRUE; else return FALSE; } // --- int libblis_test_dt_str_has_sp_char( test_params_t* params ) { return libblis_test_dt_str_has_sp_char_str( params->n_datatypes, params->datatype_char ); } int libblis_test_dt_str_has_sp_char_str( int n, char* str ) { for ( int i = 0; i < n; ++i ) { if ( str[i] == 's' || str[i] == 'c' ) return TRUE; } return FALSE; } int libblis_test_dt_str_has_dp_char( test_params_t* params ) { return libblis_test_dt_str_has_dp_char_str( params->n_datatypes, params->datatype_char ); } int libblis_test_dt_str_has_dp_char_str( int n, char* str ) { for ( int i = 0; i < n; ++i ) { if ( str[i] == 'd' || str[i] == 'z' ) return TRUE; } return FALSE; } // --- int libblis_test_dt_str_has_rd_char( test_params_t* params ) { return libblis_test_dt_str_has_rd_char_str( params->n_datatypes, params->datatype_char ); } int libblis_test_dt_str_has_rd_char_str( int n, char* str ) { for ( int i = 0; i < n; ++i ) { if ( str[i] == 's' || str[i] == 'd' ) return TRUE; } return FALSE; } int libblis_test_dt_str_has_cd_char( test_params_t* params ) { return libblis_test_dt_str_has_cd_char_str( params->n_datatypes, params->datatype_char ); } int libblis_test_dt_str_has_cd_char_str( int n, char* str ) { for ( int i = 0; i < n; ++i ) { if ( str[i] == 'c' || str[i] == 'z' ) return TRUE; } return FALSE; } // --- unsigned int libblis_test_count_combos ( unsigned int n_operands, char* spec_str, char** char_sets ) { unsigned int n_combos = 1; for ( int i = 0; i < n_operands; ++i ) { if ( spec_str[i] == '?' ) n_combos *= strlen( char_sets[i] ); } return n_combos; } char libblis_test_proj_dtchar_to_precchar( char dt_char ) { char r_val = dt_char; if ( r_val == 'c' ) r_val = 's'; else if ( r_val == 'z' ) r_val = 'd'; return r_val; } blis-0.6.1/testsuite/src/test_libblis.h000066400000000000000000000403421360743507500201340ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ // // --- System headers ---------------------------------------------------------- // // For va_* functions. #include // For string manipulation functions. #include // For other string manipulation functions (e.g. isspace()). #include // For POSIX stuff. #ifndef _MSC_VER #include #endif // // --- Constants and types ----------------------------------------------------- // #define PARAMETERS_FILENAME "input.general" #define OPERATIONS_FILENAME "input.operations" #define INPUT_COMMENT_CHAR '#' #define OUTPUT_COMMENT_CHAR '%' #define BLIS_FILE_PREFIX_STR "libblis_test" #define BLIS_FILEDATA_PREFIX_STR "blis" #define INPUT_BUFFER_SIZE 256 #define MAX_FILENAME_LENGTH 1000 #define MAX_BINARY_NAME_LENGTH 256 #define MAX_FUNC_STRING_LENGTH 26 #define FLOPS_PER_UNIT_PERF 1e9 #define MAX_NUM_MSTORAGE 4 #define MAX_NUM_VSTORAGE 5 #define MAX_NUM_DATATYPES 4 #define MAX_NUM_PARAMETERS 7 #define MAX_NUM_DIMENSIONS 3 #define MAX_NUM_OPERANDS 5 #define MAX_PASS_STRING_LENGTH 32 #define BLIS_TEST_FAIL_STRING "FAILURE" #define BLIS_TEST_WARN_STRING "MARGINAL" #define BLIS_TEST_PASS_STRING "PASS" #define ON_FAILURE_IGNORE_CHAR 'i' #define ON_FAILURE_SLEEP_CHAR 's' #define ON_FAILURE_ABORT_CHAR 'a' #define SECONDS_TO_SLEEP 3 #define DISABLE 0 #define ENABLE 1 #define ENABLE_ONLY 2 #define MAX_PARAM_VALS_PER_TYPE 4 #define BLIS_TEST_PARAM_SIDE_CHARS "lr" #define BLIS_TEST_PARAM_UPLO_CHARS "lu" #define BLIS_TEST_PARAM_UPLODE_CHARS "dlu" #define BLIS_TEST_PARAM_TRANS_CHARS "ncth" #define BLIS_TEST_PARAM_CONJ_CHARS "nc" #define BLIS_TEST_PARAM_DIAG_CHARS "nu" #define NUM_PARAM_TYPES 6 typedef enum { BLIS_TEST_PARAM_SIDE = 0, BLIS_TEST_PARAM_UPLO = 1, BLIS_TEST_PARAM_UPLODE = 2, BLIS_TEST_PARAM_TRANS = 3, BLIS_TEST_PARAM_CONJ = 4, BLIS_TEST_PARAM_DIAG = 5, } param_t; #define MAX_STORE_VALS_PER_TYPE 4 #define BLIS_TEST_MSTORE_CHARS "crg" #define BLIS_TEST_VSTORE_CHARS "crji" #define NUM_OPERAND_TYPES 2 typedef enum { BLIS_TEST_MATRIX_OPERAND = 0, BLIS_TEST_VECTOR_OPERAND = 1, } operand_t; typedef enum { BLIS_TEST_DIMS_MNK = 0, BLIS_TEST_DIMS_MN = 1, BLIS_TEST_DIMS_MK = 2, BLIS_TEST_DIMS_M = 3, BLIS_TEST_DIMS_MF = 4, BLIS_TEST_DIMS_K = 5, BLIS_TEST_NO_DIMS = 6, } dimset_t; typedef enum { BLIS_TEST_SEQ_UKERNEL = 0, BLIS_TEST_SEQ_FRONT_END = 1, BLIS_TEST_MT_FRONT_END = 2, } iface_t; typedef enum { BLIS_TEST_RAND_REAL_VALUES = 0, BLIS_TEST_RAND_NARROW_POW2 = 1, } rand_t; typedef struct { unsigned int n_repeats; unsigned int n_mstorage; unsigned int n_vstorage; char storage[ NUM_OPERAND_TYPES ][ MAX_NUM_MSTORAGE + 1 ]; unsigned int mix_all_storage; unsigned int alignment; unsigned int rand_method; unsigned int gs_spacing; unsigned int n_datatypes; char datatype_char[ MAX_NUM_DATATYPES + 1 ]; num_t datatype[ MAX_NUM_DATATYPES + 1 ]; unsigned int mixed_domain; unsigned int mixed_precision; unsigned int p_first; unsigned int p_max; unsigned int p_inc; unsigned int ind_enable[ BLIS_NUM_IND_METHODS ]; unsigned int n_app_threads; char reaction_to_failure; unsigned int output_matlab_format; unsigned int output_files; unsigned int error_checking_level; } test_params_t; typedef struct { // parent test_ops_t struct struct test_ops_s* ops; opid_t opid; int op_switch; #if 0 int front_seq; #endif unsigned int n_dims; dimset_t dimset; int dim_spec[ MAX_NUM_DIMENSIONS ]; int dim_aux[ MAX_NUM_DIMENSIONS ]; unsigned int n_params; char params[ MAX_NUM_PARAMETERS ]; bool_t test_done; } test_op_t; typedef struct test_ops_s { // individual override int indiv_over; // section overrides int util_over; int l1v_over; int l1m_over; int l1f_over; int l2_over; int l3ukr_over; int l3_over; // util test_op_t randv; test_op_t randm; // level-1v test_op_t addv; test_op_t amaxv; test_op_t axpbyv; test_op_t axpyv; test_op_t copyv; test_op_t dotv; test_op_t dotxv; test_op_t normfv; test_op_t scalv; test_op_t scal2v; test_op_t setv; test_op_t subv; test_op_t xpbyv; // level-1m test_op_t addm; test_op_t axpym; test_op_t copym; test_op_t normfm; test_op_t scalm; test_op_t scal2m; test_op_t setm; test_op_t subm; test_op_t xpbym; // level-1f test_op_t axpy2v; test_op_t dotaxpyv; test_op_t axpyf; test_op_t dotxf; test_op_t dotxaxpyf; // level-2 test_op_t gemv; test_op_t ger; test_op_t hemv; test_op_t her; test_op_t her2; test_op_t symv; test_op_t syr; test_op_t syr2; test_op_t trmv; test_op_t trsv; // level-3 micro-kernels test_op_t gemm_ukr; test_op_t trsm_ukr; test_op_t gemmtrsm_ukr; // level-3 test_op_t gemm; test_op_t hemm; test_op_t herk; test_op_t her2k; test_op_t symm; test_op_t syrk; test_op_t syr2k; test_op_t trmm; test_op_t trmm3; test_op_t trsm; } test_ops_t; typedef struct { double failwarn; double warnpass; } thresh_t; typedef struct thread_data { test_params_t* params; test_ops_t* ops; unsigned int nt; unsigned int id; unsigned int xc; //bli_pthread_mutex_t* mutex; bli_pthread_barrier_t* barrier; } thread_data_t; // // --- Prototypes -------------------------------------------------------------- // void* libblis_test_thread_entry( void* tdata_void ); void libblis_test_thread_decorator( test_params_t* params, test_ops_t* ops ); void libblis_test_all_ops( thread_data_t* tdata, test_params_t* params, test_ops_t* ops ); void libblis_test_utility_ops( thread_data_t* tdata, test_params_t* params, test_ops_t* ops ); void libblis_test_level1m_ops( thread_data_t* tdata, test_params_t* params, test_ops_t* ops ); void libblis_test_level1v_ops( thread_data_t* tdata, test_params_t* params, test_ops_t* ops ); void libblis_test_level1f_ops( thread_data_t* tdata, test_params_t* params, test_ops_t* ops ); void libblis_test_level2_ops( thread_data_t* tdata, test_params_t* params, test_ops_t* ops ); void libblis_test_level3_ukrs( thread_data_t* tdata, test_params_t* params, test_ops_t* ops ); void libblis_test_level3_ops( thread_data_t* tdata, test_params_t* params, test_ops_t* ops ); void libblis_test_read_params_file( char* input_filename, test_params_t* params ); void libblis_test_read_ops_file( char* input_filename, test_ops_t* ops ); void libblis_test_read_section_override( test_ops_t* ops, FILE* input_stream, int* override ); void libblis_test_read_op_info( test_ops_t* ops, FILE* input_stream, opid_t opid, dimset_t dimset, unsigned int n_params, test_op_t* op ); // --- Struct output --- void libblis_test_output_section_overrides( FILE* os, test_ops_t* ops ); void libblis_test_output_params_struct( FILE* os, test_params_t* params ); void libblis_test_output_op_struct( FILE* os, test_op_t* op, char* op_str ); // --- Mapping --- char* libblis_test_get_string_for_result( double residual, num_t dt, thresh_t* thresh ); param_t libblis_test_get_param_type_for_char( char p_type ); operand_t libblis_test_get_operand_type_for_char( char o_type ); unsigned int libblis_test_get_n_dims_from_dimset( dimset_t dimset ); unsigned int libblis_test_get_n_dims_from_string( char* dims_str ); dim_t libblis_test_get_dim_from_prob_size( int dim_spec, unsigned int p_size ); // --- Parameter/storage string generation --- void libblis_test_fill_param_strings( char* p_str, char** chars_for_param, unsigned int n_params, unsigned int n_param_combos, char** pc_str ); void carryover( unsigned int* c, unsigned int* n_vals_for_param, unsigned int n_params ); // --- Operation driver --- void libblis_test_op_driver ( thread_data_t* tdata, test_params_t* params, test_op_t* op, iface_t iface, char* op_str, char* p_types, char* o_types, thresh_t* thresh, void (*f_exp) (test_params_t*, // params struct test_op_t*, // op struct iface_t, // iface char*, // dc_str (current datatype string) char*, // pc_str (current param string) char*, // sc_str (current storage string) unsigned int, // p_cur (current problem size) double*, // perf double*) // residual ); // --- Generate experiment string labels --- void libblis_test_build_function_string ( char* prefix_str, opid_t opid, ind_t method, char* ind_str, char* op_str, unsigned int is_mixed_dt, char* dc_str, unsigned int n_param_combos, char* pc_str, char* sc_str, char* funcname_str ); void libblis_test_build_dims_string( test_op_t* op, dim_t p_cur, char* dims_str ); void libblis_test_build_filename_string( char* prefix_str, char* op_str, char* funcname_str ); void libblis_test_build_col_labels_string( test_params_t* params, test_op_t* op, char* l_str ); void fill_string_with_n_spaces( char* str, unsigned int n_spaces ); // --- Create object --- void libblis_test_mobj_create( test_params_t* params, num_t dt, trans_t trans, char storage, dim_t m, dim_t n, obj_t* a ); cntl_t* libblis_test_pobj_create( bszid_t bmult_id_m, bszid_t bmult_id_n, invdiag_t inv_diag, pack_t pack_schema, packbuf_t pack_buf, obj_t* a, obj_t* p, cntx_t* cntx ); void libblis_test_vobj_create( test_params_t* params, num_t dt, char storage, dim_t m, obj_t* x ); // --- Randomize/initialize object --- void libblis_test_vobj_randomize( test_params_t* params, bool_t normalize, obj_t* x ); void libblis_test_mobj_randomize( test_params_t* params, bool_t normalize, obj_t* a ); void libblis_test_mobj_load_diag( test_params_t* params, obj_t* a ); void libblis_test_ceil_pow2( obj_t* alpha ); // --- Global string initialization --- void libblis_test_init_strings( void ); // --- System wrappers --- void libblis_test_sleep( void ); void libblis_test_abort( void ); // --- File I/O wrappers --- void libblis_test_fopen_ofile( char* op_str, iface_t iface, FILE** output_stream ); void libblis_test_fclose_ofile( FILE* output_stream ); void libblis_test_fopen_check_stream( char* filename_str, FILE* stream ); void libblis_test_read_next_line( char* buffer, FILE* input_stream ); // --- Custom fprintf-related --- void libblis_test_fprintf( FILE* output_stream, char* message, ... ); void libblis_test_fprintf_c( FILE* output_stream, char* message, ... ); void libblis_test_printf_info( char* message, ... ); void libblis_test_printf_infoc( char* message, ... ); void libblis_test_printf_error( char* message, ... ); void libblis_test_parse_message( FILE* output_stream, char* message, va_list args ); void libblis_test_parse_command_line( int argc, char** argv ); // --- Miscellaneous --- void libblis_test_check_empty_problem( obj_t* c, double* perf, double* resid ); int libblis_test_op_is_disabled( test_op_t* op ); int libblis_test_op_is_done( test_op_t* op ); int libblis_test_util_is_disabled( test_op_t* op ); int libblis_test_l1v_is_disabled( test_op_t* op ); int libblis_test_l1m_is_disabled( test_op_t* op ); int libblis_test_l1f_is_disabled( test_op_t* op ); int libblis_test_l2_is_disabled( test_op_t* op ); int libblis_test_l3ukr_is_disabled( test_op_t* op ); int libblis_test_l3_is_disabled( test_op_t* op ); int libblis_test_dt_str_has_sp_char( test_params_t* params ); int libblis_test_dt_str_has_sp_char_str( int n, char* str ); int libblis_test_dt_str_has_dp_char( test_params_t* params ); int libblis_test_dt_str_has_dp_char_str( int n, char* str ); int libblis_test_dt_str_has_rd_char( test_params_t* params ); int libblis_test_dt_str_has_rd_char_str( int n, char* str ); int libblis_test_dt_str_has_cd_char( test_params_t* params ); int libblis_test_dt_str_has_cd_char_str( int n, char* str ); unsigned int libblis_test_count_combos ( unsigned int n_operands, char* spec_str, char** char_sets ); char libblis_test_proj_dtchar_to_precchar( char dt_char ); // // --- Test module headers ----------------------------------------------------- // // Utility operations #include "test_randv.h" #include "test_randm.h" // Level-1v #include "test_addv.h" #include "test_amaxv.h" #include "test_axpbyv.h" #include "test_axpyv.h" #include "test_copyv.h" #include "test_dotv.h" #include "test_dotxv.h" #include "test_normfv.h" #include "test_scalv.h" #include "test_scal2v.h" #include "test_setv.h" #include "test_subv.h" #include "test_xpbyv.h" // Level-1m #include "test_addm.h" #include "test_axpym.h" #include "test_copym.h" #include "test_normfm.h" #include "test_scalm.h" #include "test_scal2m.h" #include "test_setm.h" #include "test_subm.h" #include "test_xpbym.h" // Level-1f kernels #include "test_axpy2v.h" #include "test_dotaxpyv.h" #include "test_axpyf.h" #include "test_dotxf.h" #include "test_dotxaxpyf.h" // Level-2 #include "test_gemv.h" #include "test_ger.h" #include "test_hemv.h" #include "test_her.h" #include "test_her2.h" #include "test_symv.h" #include "test_syr.h" #include "test_syr2.h" #include "test_trmv.h" #include "test_trsv.h" // Level-3 micro-kernels #include "test_gemm_ukr.h" #include "test_trsm_ukr.h" #include "test_gemmtrsm_ukr.h" // Level-3 #include "test_gemm.h" #include "test_hemm.h" #include "test_herk.h" #include "test_her2k.h" #include "test_symm.h" #include "test_syrk.h" #include "test_syr2k.h" #include "test_trmm.h" #include "test_trmm3.h" #include "test_trsm.h" blis-0.6.1/testsuite/src/test_normfm.c000066400000000000000000000171061360743507500200070ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "test_libblis.h" // Static variables. static char* op_str = "normfm"; static char* o_types = "m"; // x static char* p_types = ""; // (no parameters) static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-13, 1e-14 }, // warn, pass for d { 1e-13, 1e-14 } }; // warn, pass for z // Local prototypes. void libblis_test_normfm_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); void libblis_test_normfm_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ); void libblis_test_normfm_impl ( iface_t iface, obj_t* x, obj_t* norm ); void libblis_test_normfm_check ( test_params_t* params, obj_t* beta, obj_t* x, obj_t* norm, double* resid ); void libblis_test_normfm_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { libblis_test_setm( tdata, params, &(op->ops->setm) ); } void libblis_test_normfm ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || libblis_test_l1m_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_normfm_deps( tdata, params, op ); // Execute the test driver for each implementation requested. //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( tdata, params, op, BLIS_TEST_SEQ_FRONT_END, op_str, p_types, o_types, thresh, libblis_test_normfm_experiment ); } } void libblis_test_normfm_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; num_t datatype; num_t dt_real; dim_t m, n; obj_t beta, norm; obj_t x; // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); // Compute the real projection of the chosen datatype. dt_real = bli_dt_proj_to_real( datatype ); // Map the dimension specifier to actual dimensions. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); n = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur ); // Map parameter characters to BLIS constants. // Create test scalars. bli_obj_scalar_init_detached( datatype, &beta ); bli_obj_scalar_init_detached( dt_real, &norm ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, n, &x ); // Initialize beta to 2 - 2i. bli_setsc( 2.0, -2.0, &beta ); // Set all elements of x to beta. bli_setm( &beta, &x ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { time = bli_clock(); libblis_test_normfm_impl( iface, &x, &norm ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 2.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &x ) ) *perf *= 2.0; // Perform checks. libblis_test_normfm_check( params, &beta, &x, &norm, resid ); // Zero out performance and residual if input matrix is empty. libblis_test_check_empty_problem( &x, perf, resid ); // Free the test objects. bli_obj_free( &x ); } void libblis_test_normfm_impl ( iface_t iface, obj_t* x, obj_t* norm ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: bli_normfm( x, norm ); break; default: libblis_test_printf_error( "Invalid interface type.\n" ); } } void libblis_test_normfm_check ( test_params_t* params, obj_t* beta, obj_t* x, obj_t* norm, double* resid ) { num_t dt_real = bli_obj_dt_proj_to_real( x ); dim_t m = bli_obj_length( x ); dim_t n = bli_obj_width( x ); obj_t m_r, n_r, temp_r; double junk; // // Pre-conditions: // - x is set to beta. // Note: // - beta should have a non-zero imaginary component in the complex // cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // norm := normfm( x ) // // is functioning correctly if // // norm = sqrt( absqsc( beta ) * m * n ) // // where m and n are the dimensions of x. // bli_obj_scalar_init_detached( dt_real, &temp_r ); bli_obj_scalar_init_detached( dt_real, &m_r ); bli_obj_scalar_init_detached( dt_real, &n_r ); bli_setsc( ( double )m, 0.0, &m_r ); bli_setsc( ( double )n, 0.0, &n_r ); bli_absqsc( beta, &temp_r ); bli_mulsc( &m_r, &temp_r ); bli_mulsc( &n_r, &temp_r ); bli_sqrtsc( &temp_r, &temp_r ); bli_subsc( &temp_r, norm ); bli_getsc( norm, resid, &junk ); } blis-0.6.1/testsuite/src/test_normfm.h000066400000000000000000000035011360743507500200060ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void libblis_test_normfm ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); blis-0.6.1/testsuite/src/test_normfv.c000066400000000000000000000164431360743507500200230ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "test_libblis.h" // Static variables. static char* op_str = "normfv"; static char* o_types = "v"; // x static char* p_types = ""; // (no parameters) static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-13, 1e-14 }, // warn, pass for d { 1e-13, 1e-14 } }; // warn, pass for z // Local prototypes. void libblis_test_normfv_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); void libblis_test_normfv_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ); void libblis_test_normfv_impl ( iface_t iface, obj_t* x, obj_t* norm ); void libblis_test_normfv_check ( test_params_t* params, obj_t* beta, obj_t* x, obj_t* norm, double* resid ); void libblis_test_normfv_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { libblis_test_setv( tdata, params, &(op->ops->setv) ); } void libblis_test_normfv ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || libblis_test_l1v_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_normfv_deps( tdata, params, op ); // Execute the test driver for each implementation requested. //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( tdata, params, op, BLIS_TEST_SEQ_FRONT_END, op_str, p_types, o_types, thresh, libblis_test_normfv_experiment ); } } void libblis_test_normfv_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; num_t datatype; num_t dt_real; dim_t m; obj_t beta, norm; obj_t x; // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); // Compute the real projection of the chosen datatype. dt_real = bli_dt_proj_to_real( datatype ); // Map the dimension specifier to an actual dimension. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); // Map parameter characters to BLIS constants. // Create test scalars. bli_obj_scalar_init_detached( datatype, &beta ); bli_obj_scalar_init_detached( dt_real, &norm ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); // Initialize beta to 2 - 2i. bli_setsc( 2.0, -2.0, &beta ); // Set all elements of x to beta. bli_setv( &beta, &x ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { time = bli_clock(); libblis_test_normfv_impl( iface, &x, &norm ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &x ) ) *perf *= 2.0; // Perform checks. libblis_test_normfv_check( params, &beta, &x, &norm, resid ); // Zero out performance and residual if input vector is empty. libblis_test_check_empty_problem( &x, perf, resid ); // Free the test objects. bli_obj_free( &x ); } void libblis_test_normfv_impl ( iface_t iface, obj_t* x, obj_t* norm ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: bli_normfv( x, norm ); break; default: libblis_test_printf_error( "Invalid interface type.\n" ); } } void libblis_test_normfv_check ( test_params_t* params, obj_t* beta, obj_t* x, obj_t* norm, double* resid ) { num_t dt_real = bli_obj_dt_proj_to_real( x ); dim_t m = bli_obj_vector_dim( x ); obj_t m_r, temp_r; double junk; // // Pre-conditions: // - x is set to beta. // Note: // - beta should have a non-zero imaginary component in the complex // cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // norm := normfv( x ) // // is functioning correctly if // // norm = sqrt( absqsc( beta ) * m ) // // where m is the length of x. // bli_obj_scalar_init_detached( dt_real, &temp_r ); bli_obj_scalar_init_detached( dt_real, &m_r ); bli_setsc( ( double )m, 0.0, &m_r ); bli_absqsc( beta, &temp_r ); bli_mulsc( &m_r, &temp_r ); bli_sqrtsc( &temp_r, &temp_r ); bli_subsc( &temp_r, norm ); bli_getsc( norm, resid, &junk ); } blis-0.6.1/testsuite/src/test_normfv.h000066400000000000000000000035011360743507500200170ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void libblis_test_normfv ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); blis-0.6.1/testsuite/src/test_randm.c000066400000000000000000000214571360743507500176160ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "test_libblis.h" // Static variables. static char* op_str = "randm"; static char* o_types = "m"; // a static char* p_types = ""; // (no parameters) static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-13, 1e-14 }, // warn, pass for d { 1e-13, 1e-14 } }; // warn, pass for z // Local prototypes. void libblis_test_randm_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); void libblis_test_randm_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ); void libblis_test_randm_impl ( iface_t iface, obj_t* x ); void libblis_test_randm_check ( test_params_t* params, obj_t* x, double* resid ); void libblis_test_randm_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // No dependencies. } void libblis_test_randm ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || libblis_test_util_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_randm_deps( tdata, params, op ); // Execute the test driver for each implementation requested. //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( tdata, params, op, BLIS_TEST_SEQ_FRONT_END, op_str, p_types, o_types, thresh, libblis_test_randm_experiment ); } } void libblis_test_randm_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; num_t datatype; dim_t m, n; char x_store; obj_t x; // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); // Map the dimension specifier to actual dimensions. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); n = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur ); // Map parameter characters to BLIS constants. // Extract the storage character for each operand. x_store = sc_str[0]; // Create the test objects. libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, x_store, m, n, &x ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { time = bli_clock(); libblis_test_randm_impl( iface, &x ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 2.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &x ) ) *perf *= 2.0; // Perform checks. // For randm(), we don't return a meaningful residual/diff, since we can't // really say for sure what is "random" and what is not, so instead we // manually perform some checks that will fail under some scenarios whic // we consider to be likely. libblis_test_randm_check( params, &x, resid ); // Zero out performance and residual if input matrix is empty. libblis_test_check_empty_problem( &x, perf, resid ); // Free the test objects. bli_obj_free( &x ); } void libblis_test_randm_impl ( iface_t iface, obj_t* x ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: bli_randm( x ); break; default: libblis_test_printf_error( "Invalid interface type.\n" ); } } void libblis_test_randm_check ( test_params_t* params, obj_t* x, double* resid ) { num_t dt_real = bli_obj_dt_proj_to_real( x ); dim_t m_x = bli_obj_length( x ); dim_t n_x = bli_obj_width( x ); obj_t sum; // // The two most likely ways that randm would fail is if all elements // were zero, or if all elements were greater than or equal to one. // We check both of these conditions by computing the sum of the // absolute values of the elements of x. // *resid = 0.0; bli_obj_scalar_init_detached( dt_real, &sum ); bli_absumm( x, &sum ); if ( bli_is_float( dt_real ) ) { float* sum_x = bli_obj_buffer_at_off( &sum ); if ( *sum_x == *bli_d0 ) *resid = 1.0; else if ( *sum_x >= 2.0 * m_x * n_x ) *resid = 2.0; } else // if ( bli_is_double( dt_real ) ) { double* sum_x = bli_obj_buffer_at_off( &sum ); if ( *sum_x == *bli_d0 ) *resid = 1.0; else if ( *sum_x >= 2.0 * m_x * n_x ) *resid = 2.0; } } #define FUNCPTR_T absumm_fp typedef void (*FUNCPTR_T)( dim_t m, dim_t n, void* x, inc_t rs_x, inc_t cs_x, void* sum_x ); static FUNCPTR_T GENARRAY(ftypes,absumm); void bli_absumm ( obj_t* x, obj_t* sum_x ) { num_t dt = bli_obj_dt( x ); dim_t m = bli_obj_length( x ); dim_t n = bli_obj_width( x ); void* buf_x = bli_obj_buffer_at_off( x ); inc_t rs_x = bli_obj_row_stride( x ); inc_t cs_x = bli_obj_col_stride( x ); void* buf_sum_x = bli_obj_buffer_at_off( sum_x ); FUNCPTR_T f; // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt]; // Invoke the function. f( m, n, buf_x, rs_x, cs_x, buf_sum_x ); } #undef GENTFUNCR #define GENTFUNCR( ctype, ctype_r, ch, chr, varname ) \ \ void PASTEMAC(ch,varname)( \ dim_t m, \ dim_t n, \ void* x, inc_t rs_x, inc_t cs_x, \ void* sum_x \ ) \ { \ ctype* x_cast = x; \ ctype_r* sum_x_cast = sum_x; \ ctype_r abs_chi1; \ ctype_r sum; \ dim_t i, j; \ \ PASTEMAC(chr,set0s)( sum ); \ \ for ( j = 0; j < n; j++ ) \ { \ for ( i = 0; i < m; i++ ) \ { \ ctype* chi1 = x_cast + (i )*rs_x + (j )*cs_x; \ \ PASTEMAC2(ch,chr,abval2s)( *chi1, abs_chi1 ); \ PASTEMAC2(chr,chr,adds)( abs_chi1, sum ); \ } \ } \ \ PASTEMAC2(chr,chr,copys)( sum, *sum_x_cast ); \ } INSERT_GENTFUNCR_BASIC0( absumm ) blis-0.6.1/testsuite/src/test_randm.h000066400000000000000000000043461360743507500176210ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void libblis_test_randm ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); void bli_absumm ( obj_t* x, obj_t* sum_x ); #undef GENTPROTR #define GENTPROTR( ctype, ctype_r, ch, chr, varname ) \ \ void PASTEMAC(ch,varname)( \ dim_t m, \ dim_t n, \ void* x, inc_t rs_x, inc_t cs_x, \ void* sum_x \ ); INSERT_GENTPROTR_BASIC0( absumm ) blis-0.6.1/testsuite/src/test_randv.c000066400000000000000000000157511360743507500176270ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "test_libblis.h" // Static variables. static char* op_str = "randv"; static char* o_types = "v"; // x static char* p_types = ""; // (no parameters) static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-13, 1e-14 }, // warn, pass for d { 1e-13, 1e-14 } }; // warn, pass for z // Local prototypes. void libblis_test_randv_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); void libblis_test_randv_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ); void libblis_test_randv_impl ( iface_t iface, obj_t* x ); void libblis_test_randv_check ( test_params_t* params, obj_t* x, double* resid ); void libblis_test_randv_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // No dependencies. } void libblis_test_randv ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || libblis_test_util_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_randv_deps( tdata, params, op ); // Execute the test driver for each implementation requested. //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( tdata, params, op, BLIS_TEST_SEQ_FRONT_END, op_str, p_types, o_types, thresh, libblis_test_randv_experiment ); } } void libblis_test_randv_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; num_t datatype; dim_t m; char x_store; obj_t x; // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); // Map the dimension specifier to an actual dimension. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); // Map parameter characters to BLIS constants. // Extract the storage character for each operand. x_store = sc_str[0]; // Create the test objects. libblis_test_vobj_create( params, datatype, x_store, m, &x ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { time = bli_clock(); libblis_test_randv_impl( iface, &x ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &x ) ) *perf *= 2.0; // Perform checks. // For randv(), we don't return a meaningful residual/diff, since we can't // really say for sure what is "random" and what is not, so instead we // manually perform some checks that will fail under some scenarios whic // we consider to be likely. libblis_test_randv_check( params, &x, resid ); // Zero out performance and residual if output vector is empty. libblis_test_check_empty_problem( &x, perf, resid ); // Free the test objects. bli_obj_free( &x ); } void libblis_test_randv_impl ( iface_t iface, obj_t* x ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: bli_randv( x ); break; default: libblis_test_printf_error( "Invalid interface type.\n" ); } } void libblis_test_randv_check ( test_params_t* params, obj_t* x, double* resid ) { num_t dt_real = bli_obj_dt_proj_to_real( x ); dim_t m_x = bli_obj_vector_dim( x ); obj_t sum; *resid = 0.0; // // The two most likely ways that randv would fail is if all elements // were zero, or if all elements were greater than or equal to one. // We check both of these conditions by computing the sum of the // absolute values of the elements of x. // bli_obj_scalar_init_detached( dt_real, &sum ); bli_norm1v( x, &sum ); if ( bli_is_float( dt_real ) ) { float* sum_x = bli_obj_buffer_at_off( &sum ); if ( *sum_x == *bli_d0 ) *resid = 1.0; else if ( *sum_x >= 2.0 * m_x ) *resid = 2.0; } else // if ( bli_is_double( dt_real ) ) { double* sum_x = bli_obj_buffer_at_off( &sum ); if ( *sum_x == *bli_d0 ) *resid = 1.0; else if ( *sum_x >= 2.0 * m_x ) *resid = 2.0; } } blis-0.6.1/testsuite/src/test_randv.h000066400000000000000000000035001360743507500176210ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void libblis_test_randv ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); blis-0.6.1/testsuite/src/test_scal2m.c000066400000000000000000000203321360743507500176650ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "test_libblis.h" // Static variables. static char* op_str = "scal2m"; static char* o_types = "mm"; // x y static char* p_types = "h"; // conjx static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-13, 1e-14 }, // warn, pass for d { 1e-13, 1e-14 } }; // warn, pass for z // Local prototypes. void libblis_test_scal2m_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); void libblis_test_scal2m_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ); void libblis_test_scal2m_impl ( iface_t iface, obj_t* alpha, obj_t* x, obj_t* y ); void libblis_test_scal2m_check ( test_params_t* params, obj_t* alpha, obj_t* x, obj_t* y, obj_t* y_save, double* resid ); void libblis_test_scal2m_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { libblis_test_randm( tdata, params, &(op->ops->randm) ); libblis_test_normfm( tdata, params, &(op->ops->normfm) ); libblis_test_subm( tdata, params, &(op->ops->subm) ); libblis_test_copym( tdata, params, &(op->ops->copym) ); libblis_test_scalm( tdata, params, &(op->ops->scalm) ); } void libblis_test_scal2m ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || libblis_test_l1m_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_scal2m_deps( tdata, params, op ); // Execute the test driver for each implementation requested. //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( tdata, params, op, BLIS_TEST_SEQ_FRONT_END, op_str, p_types, o_types, thresh, libblis_test_scal2m_experiment ); } } void libblis_test_scal2m_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; num_t datatype; dim_t m, n; trans_t transx; obj_t alpha, x, y; obj_t y_save; // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); // Map the dimension specifier to actual dimensions. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); n = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_trans( pc_str[0], &transx ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, transx, sc_str[0], m, n, &x ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, n, &y ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, n, &y_save ); // Set alpha. if ( bli_obj_is_real( &y ) ) bli_setsc( -2.0, 0.0, &alpha ); else bli_setsc( 0.0, -2.0, &alpha ); // Randomize and save y. libblis_test_mobj_randomize( params, FALSE, &x ); bli_setm( &BLIS_ONE, &y ); bli_copym( &y, &y_save ); // Apply the parameters. bli_obj_set_conjtrans( transx, &x ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copym( &y_save, &y ); time = bli_clock(); libblis_test_scal2m_impl( iface, &alpha, &x, &y ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 1.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &y ) ) *perf *= 4.0; // Perform checks. libblis_test_scal2m_check( params, &alpha, &x, &y, &y_save, resid ); // Zero out performance and residual if output matrix is empty. libblis_test_check_empty_problem( &y, perf, resid ); // Free the test objects. bli_obj_free( &x ); bli_obj_free( &y ); bli_obj_free( &y_save ); } void libblis_test_scal2m_impl ( iface_t iface, obj_t* alpha, obj_t* x, obj_t* y ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: bli_scal2m( alpha, x, y ); break; default: libblis_test_printf_error( "Invalid interface type.\n" ); } } void libblis_test_scal2m_check ( test_params_t* params, obj_t* alpha, obj_t* x, obj_t* y, obj_t* y_orig, double* resid ) { num_t dt = bli_obj_dt( y ); num_t dt_real = bli_obj_dt_proj_to_real( y ); dim_t m = bli_obj_length( y ); dim_t n = bli_obj_width( y ); obj_t x_temp; obj_t norm; double junk; // // Pre-conditions: // - x is randomized. // - y_orig is set to one. // Note: // - alpha should have a non-zero imaginary component in the complex // cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // y := alpha * conjx(x) // // is functioning correctly if // // normfm( y - alpha * conjx(x) ) // // is negligible. // bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m, n, 0, 0, &x_temp ); bli_copym( x, &x_temp ); bli_scalm( alpha, &x_temp ); bli_subm( &x_temp, y ); bli_normfm( y, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &x_temp ); } blis-0.6.1/testsuite/src/test_scal2m.h000066400000000000000000000035011360743507500176710ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void libblis_test_scal2m ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); blis-0.6.1/testsuite/src/test_scal2v.c000066400000000000000000000201031360743507500176720ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "test_libblis.h" // Static variables. static char* op_str = "scal2v"; static char* o_types = "vv"; // x y static char* p_types = "c"; // conjx static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-13, 1e-14 }, // warn, pass for d { 1e-13, 1e-14 } }; // warn, pass for z // Local prototypes. void libblis_test_scal2v_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); void libblis_test_scal2v_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ); void libblis_test_scal2v_impl ( iface_t iface, obj_t* alpha, obj_t* x, obj_t* y ); void libblis_test_scal2v_check ( test_params_t* params, obj_t* alpha, obj_t* x, obj_t* y, obj_t* y_orig, double* resid ); void libblis_test_scal2v_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { libblis_test_randv( tdata, params, &(op->ops->randv) ); libblis_test_normfv( tdata, params, &(op->ops->normfv) ); libblis_test_subv( tdata, params, &(op->ops->subv) ); libblis_test_copyv( tdata, params, &(op->ops->copyv) ); libblis_test_scalv( tdata, params, &(op->ops->scalv) ); } void libblis_test_scal2v ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || libblis_test_l1v_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_scal2v_deps( tdata, params, op ); // Execute the test driver for each implementation requested. //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( tdata, params, op, BLIS_TEST_SEQ_FRONT_END, op_str, p_types, o_types, thresh, libblis_test_scal2v_experiment ); } } void libblis_test_scal2v_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; num_t datatype; dim_t m; conj_t conjx; obj_t alpha, x, y; obj_t y_save; // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); // Map the dimension specifier to an actual dimension. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_conj( pc_str[0], &conjx ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); libblis_test_vobj_create( params, datatype, sc_str[1], m, &y ); libblis_test_vobj_create( params, datatype, sc_str[1], m, &y_save ); // Set alpha. //bli_setsc( sqrt(2.0)/2.0, sqrt(2.0)/2.0, &alpha ); //bli_copysc( &BLIS_TWO, &alpha ); if ( bli_obj_is_real( &y ) ) bli_setsc( -2.0, 0.0, &alpha ); else bli_setsc( 0.0, -2.0, &alpha ); // Randomize x and y, and save y. libblis_test_vobj_randomize( params, FALSE, &x ); libblis_test_vobj_randomize( params, FALSE, &y ); bli_copyv( &y, &y_save ); // Apply the parameters. bli_obj_set_conj( conjx, &x ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copyv( &y_save, &y ); time = bli_clock(); libblis_test_scal2v_impl( iface, &alpha, &x, &y ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 1.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &y ) ) *perf *= 4.0; // Perform checks. libblis_test_scal2v_check( params, &alpha, &x, &y, &y_save, resid ); // Zero out performance and residual if output vector is empty. libblis_test_check_empty_problem( &y, perf, resid ); // Free the test objects. bli_obj_free( &x ); bli_obj_free( &y ); bli_obj_free( &y_save ); } void libblis_test_scal2v_impl ( iface_t iface, obj_t* alpha, obj_t* x, obj_t* y ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: bli_scal2v( alpha, x, y ); break; default: libblis_test_printf_error( "Invalid interface type.\n" ); } } void libblis_test_scal2v_check ( test_params_t* params, obj_t* alpha, obj_t* x, obj_t* y, obj_t* y_orig, double* resid ) { num_t dt = bli_obj_dt( y ); num_t dt_real = bli_obj_dt_proj_to_real( y ); dim_t m = bli_obj_vector_dim( y ); obj_t x_temp; obj_t norm; double junk; // // Pre-conditions: // - x is randomized. // - y_orig is set to one. // Note: // - alpha should have a non-zero imaginary component in the complex // cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // y := alpha * conjx(x) // // is functioning correctly if // // normfv( y - alpha * conjx(x) ) // // is negligible. // bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m, 1, 0, 0, &x_temp ); bli_copyv( x, &x_temp ); bli_scalv( alpha, &x_temp ); bli_subv( &x_temp, y ); bli_normfv( y, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &x_temp ); } blis-0.6.1/testsuite/src/test_scal2v.h000066400000000000000000000035011360743507500177020ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void libblis_test_scal2v ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); blis-0.6.1/testsuite/src/test_scalm.c000066400000000000000000000177431360743507500176170ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "test_libblis.h" // Static variables. static char* op_str = "scalm"; static char* o_types = "m"; // x static char* p_types = "c"; // conjbeta static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-13, 1e-14 }, // warn, pass for d { 1e-13, 1e-14 } }; // warn, pass for z // Local prototypes. void libblis_test_scalm_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); void libblis_test_scalm_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ); void libblis_test_scalm_impl ( iface_t iface, obj_t* beta, obj_t* y ); void libblis_test_scalm_check ( test_params_t* params, obj_t* beta, obj_t* y, obj_t* y_save, double* resid ); void libblis_test_scalm_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { libblis_test_randm( tdata, params, &(op->ops->randm) ); libblis_test_normfm( tdata, params, &(op->ops->normfm) ); libblis_test_copym( tdata, params, &(op->ops->copym) ); } void libblis_test_scalm ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || libblis_test_l1m_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_scalm_deps( tdata, params, op ); // Execute the test driver for each implementation requested. //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( tdata, params, op, BLIS_TEST_SEQ_FRONT_END, op_str, p_types, o_types, thresh, libblis_test_scalm_experiment ); } } void libblis_test_scalm_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; num_t datatype; dim_t m, n; conj_t conjbeta; obj_t beta, y; obj_t y_save; // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); // Map the dimension specifier to actual dimensions. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); n = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_conj( pc_str[0], &conjbeta ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, n, &y ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, n, &y_save ); // Set beta to 0 + i. //bli_setsc( 0.0, 1.0, &beta ); if ( bli_obj_is_real( &y ) ) bli_setsc( -2.0, 0.0, &beta ); else bli_setsc( 0.0, -2.0, &beta ); // Randomize and save y. libblis_test_mobj_randomize( params, FALSE, &y ); bli_copym( &y, &y_save ); // Apply the parameters. bli_obj_set_conj( conjbeta, &beta ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copym( &y_save, &y ); time = bli_clock(); libblis_test_scalm_impl( iface, &beta, &y ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 1.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &y ) ) *perf *= 6.0; // Perform checks. libblis_test_scalm_check( params, &beta, &y, &y_save, resid ); // Zero out performance and residual if output matrix is empty. libblis_test_check_empty_problem( &y, perf, resid ); // Free the test objects. bli_obj_free( &y ); bli_obj_free( &y_save ); } void libblis_test_scalm_impl ( iface_t iface, obj_t* beta, obj_t* y ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: bli_scalm( beta, y ); break; default: libblis_test_printf_error( "Invalid interface type.\n" ); } } void libblis_test_scalm_check ( test_params_t* params, obj_t* beta, obj_t* y, obj_t* y_orig, double* resid ) { num_t dt = bli_obj_dt( y ); num_t dt_real = bli_obj_dt_proj_to_real( y ); dim_t m = bli_obj_length( y ); dim_t n = bli_obj_width( y ); obj_t norm_y_r; obj_t nbeta; obj_t y2; double junk; // // Pre-conditions: // - y_orig is randomized. // Note: // - beta should have a non-zero imaginary component in the complex // cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // y := conjbeta(beta) * y_orig // // is functioning correctly if // // normfm( y + -conjbeta(beta) * y_orig ) // // is negligible. // bli_obj_create( dt, m, n, 0, 0, &y2 ); bli_copym( y_orig, &y2 ); bli_obj_scalar_init_detached( dt, &nbeta ); bli_obj_scalar_init_detached( dt_real, &norm_y_r ); bli_copysc( beta, &nbeta ); bli_mulsc( &BLIS_MINUS_ONE, &nbeta ); bli_scalm( &nbeta, &y2 ); bli_addm( &y2, y ); bli_normfm( y, &norm_y_r ); bli_getsc( &norm_y_r, resid, &junk ); bli_obj_free( &y2 ); } blis-0.6.1/testsuite/src/test_scalm.h000066400000000000000000000035001360743507500176060ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void libblis_test_scalm ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); blis-0.6.1/testsuite/src/test_scalv.c000066400000000000000000000174611360743507500176250ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "test_libblis.h" // Static variables. static char* op_str = "scalv"; static char* o_types = "v"; // y static char* p_types = "c"; // conjbeta static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-13, 1e-14 }, // warn, pass for d { 1e-13, 1e-14 } }; // warn, pass for z // Local prototypes. void libblis_test_scalv_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); void libblis_test_scalv_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ); void libblis_test_scalv_impl ( iface_t iface, obj_t* beta, obj_t* y ); void libblis_test_scalv_check ( test_params_t* params, obj_t* beta, obj_t* y, obj_t* y_orig, double* resid ); void libblis_test_scalv_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { libblis_test_randv( tdata, params, &(op->ops->randv) ); libblis_test_normfv( tdata, params, &(op->ops->normfv) ); libblis_test_addv( tdata, params, &(op->ops->addv) ); libblis_test_copyv( tdata, params, &(op->ops->copyv) ); } void libblis_test_scalv ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || libblis_test_l1v_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_scalv_deps( tdata, params, op ); // Execute the test driver for each implementation requested. //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( tdata, params, op, BLIS_TEST_SEQ_FRONT_END, op_str, p_types, o_types, thresh, libblis_test_scalv_experiment ); } } void libblis_test_scalv_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; num_t datatype; dim_t m; conj_t conjbeta; obj_t beta, y; obj_t y_save; // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); // Map the dimension specifier to an actual dimension. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_conj( pc_str[0], &conjbeta ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &y ); libblis_test_vobj_create( params, datatype, sc_str[0], m, &y_save ); // Set beta. if ( bli_obj_is_real( &y ) ) bli_setsc( -2.0, 0.0, &beta ); else bli_setsc( 0.0, -2.0, &beta ); // Randomize and save y. libblis_test_vobj_randomize( params, FALSE, &y ); bli_copyv( &y, &y_save ); // Apply the parameters. bli_obj_set_conj( conjbeta, &beta ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copyv( &y_save, &y ); time = bli_clock(); libblis_test_scalv_impl( iface, &beta, &y ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 1.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &y ) ) *perf *= 6.0; // Perform checks. libblis_test_scalv_check( params, &beta, &y, &y_save, resid ); // Zero out performance and residual if output vector is empty. libblis_test_check_empty_problem( &y, perf, resid ); // Free the test objects. bli_obj_free( &y ); bli_obj_free( &y_save ); } void libblis_test_scalv_impl ( iface_t iface, obj_t* beta, obj_t* y ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: bli_scalv( beta, y ); break; default: libblis_test_printf_error( "Invalid interface type.\n" ); } } void libblis_test_scalv_check ( test_params_t* params, obj_t* beta, obj_t* y, obj_t* y_orig, double* resid ) { num_t dt = bli_obj_dt( y ); num_t dt_real = bli_obj_dt_proj_to_real( y ); dim_t m = bli_obj_vector_dim( y ); obj_t norm_y_r; obj_t nbeta; obj_t y2; double junk; // // Pre-conditions: // - y_orig is randomized. // Note: // - beta should have a non-zero imaginary component in the complex // cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // y := conjbeta(beta) * y_orig // // is functioning correctly if // // normfv( y + -conjbeta(beta) * y_orig ) // // is negligible. // bli_obj_create( dt, m, 1, 0, 0, &y2 ); bli_copyv( y_orig, &y2 ); bli_obj_scalar_init_detached( dt, &nbeta ); bli_obj_scalar_init_detached( dt_real, &norm_y_r ); bli_copysc( beta, &nbeta ); bli_mulsc( &BLIS_MINUS_ONE, &nbeta ); bli_scalv( &nbeta, &y2 ); bli_addv( &y2, y ); bli_normfv( y, &norm_y_r ); bli_getsc( &norm_y_r, resid, &junk ); bli_obj_free( &y2 ); } blis-0.6.1/testsuite/src/test_scalv.h000066400000000000000000000035001360743507500176170ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void libblis_test_scalv ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); blis-0.6.1/testsuite/src/test_setm.c000066400000000000000000000200551360743507500174560ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "test_libblis.h" // Static variables. static char* op_str = "setm"; static char* o_types = "m"; // x static char* p_types = ""; // (no parameters) static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-13, 1e-14 }, // warn, pass for d { 1e-13, 1e-14 } }; // warn, pass for z // Local prototypes. void libblis_test_setm_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); void libblis_test_setm_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ); void libblis_test_setm_impl ( iface_t iface, obj_t* beta, obj_t* x ); void libblis_test_setm_check ( test_params_t* params, obj_t* beta, obj_t* x, double* resid ); void libblis_test_setm_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { libblis_test_randv( tdata, params, &(op->ops->randm) ); } void libblis_test_setm ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || libblis_test_l1m_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_setm_deps( tdata, params, op ); // Execute the test driver for each implementation requested. //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( tdata, params, op, BLIS_TEST_SEQ_FRONT_END, op_str, p_types, o_types, thresh, libblis_test_setm_experiment ); } } void libblis_test_setm_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; num_t datatype; dim_t m, n; obj_t beta; obj_t x; // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); // Map the dimension specifier to actual dimensions. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); n = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur ); // Map parameter characters to BLIS constants. // Create test scalars. bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, n, &x ); // Initialize beta to unit. bli_copysc( &BLIS_ONE, &beta ); // Randomize x. libblis_test_mobj_randomize( params, FALSE, &x ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { time = bli_clock(); libblis_test_setm_impl( iface, &beta, &x ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 1.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &x ) ) *perf *= 2.0; // Perform checks. libblis_test_setm_check( params, &beta, &x, resid ); // Zero out performance and residual if output matrix is empty. libblis_test_check_empty_problem( &x, perf, resid ); // Free the test objects. bli_obj_free( &x ); } void libblis_test_setm_impl ( iface_t iface, obj_t* beta, obj_t* x ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: bli_setm( beta, x ); break; default: libblis_test_printf_error( "Invalid interface type.\n" ); } } void libblis_test_setm_check ( test_params_t* params, obj_t* beta, obj_t* x, double* resid ) { num_t dt_x = bli_obj_dt( x ); dim_t m_x = bli_obj_length( x ); dim_t n_x = bli_obj_width( x ); inc_t rs_x = bli_obj_row_stride( x ); inc_t cs_x = bli_obj_col_stride( x ); void* buf_x = bli_obj_buffer_at_off( x ); void* buf_beta = bli_obj_buffer_for_1x1( dt_x, beta ); dim_t i, j; *resid = 0.0; // // The easiest way to check that setm was successful is to confirm // that each element of x is equal to beta. // if ( bli_obj_is_float( x ) ) { float* beta_cast = buf_beta; float* buf_x_cast = buf_x; float* chi1; for ( j = 0; j < n_x; ++j ) { for ( i = 0; i < m_x; ++i ) { chi1 = buf_x_cast + (i )*rs_x + (j )*cs_x; if ( !bli_seq( *chi1, *beta_cast ) ) { *resid = 1.0; return; } } } } else if ( bli_obj_is_double( x ) ) { double* beta_cast = buf_beta; double* buf_x_cast = buf_x; double* chi1; for ( j = 0; j < n_x; ++j ) { for ( i = 0; i < m_x; ++i ) { chi1 = buf_x_cast + (i )*rs_x + (j )*cs_x; if ( !bli_deq( *chi1, *beta_cast ) ) { *resid = 1.0; return; } } } } else if ( bli_obj_is_scomplex( x ) ) { scomplex* beta_cast = buf_beta; scomplex* buf_x_cast = buf_x; scomplex* chi1; for ( j = 0; j < n_x; ++j ) { for ( i = 0; i < m_x; ++i ) { chi1 = buf_x_cast + (i )*rs_x + (j )*cs_x; if ( !bli_ceq( *chi1, *beta_cast ) ) { *resid = 1.0; return; } } } } else // if ( bli_obj_is_dcomplex( x ) ) { dcomplex* beta_cast = buf_beta; dcomplex* buf_x_cast = buf_x; dcomplex* chi1; for ( j = 0; j < n_x; ++j ) { for ( i = 0; i < m_x; ++i ) { chi1 = buf_x_cast + (i )*rs_x + (j )*cs_x; if ( !bli_zeq( *chi1, *beta_cast ) ) { *resid = 1.0; return; } } } } } blis-0.6.1/testsuite/src/test_setm.h000066400000000000000000000034771360743507500174740ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void libblis_test_setm ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); blis-0.6.1/testsuite/src/test_setv.c000066400000000000000000000167731360743507500175030ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "test_libblis.h" // Static variables. static char* op_str = "setv"; static char* o_types = "v"; // x static char* p_types = ""; // (no parameters) static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-13, 1e-14 }, // warn, pass for d { 1e-13, 1e-14 } }; // warn, pass for z // Local prototypes. void libblis_test_setv_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); void libblis_test_setv_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ); void libblis_test_setv_impl ( iface_t iface, obj_t* beta, obj_t* x ); void libblis_test_setv_check ( test_params_t* params, obj_t* beta, obj_t* x, double* resid ); void libblis_test_setv_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { libblis_test_randv( tdata, params, &(op->ops->randv) ); } void libblis_test_setv ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || libblis_test_l1v_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_setv_deps( tdata, params, op ); // Execute the test driver for each implementation requested. //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( tdata, params, op, BLIS_TEST_SEQ_FRONT_END, op_str, p_types, o_types, thresh, libblis_test_setv_experiment ); } } void libblis_test_setv_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; num_t datatype; dim_t m; obj_t beta; obj_t x; // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); // Map the dimension specifier to an actual dimension. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); // Map parameter characters to BLIS constants. // Create test scalars. bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); // Initialize beta to unit. bli_copysc( &BLIS_ONE, &beta ); // Randomize x. libblis_test_vobj_randomize( params, FALSE, &x ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { time = bli_clock(); libblis_test_setv_impl( iface, &beta, &x ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 1.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &x ) ) *perf *= 2.0; // Perform checks. libblis_test_setv_check( params, &beta, &x, resid ); // Zero out performance and residual if output vector is empty. libblis_test_check_empty_problem( &x, perf, resid ); // Free the test objects. bli_obj_free( &x ); } void libblis_test_setv_impl ( iface_t iface, obj_t* beta, obj_t* x ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: bli_setv( beta, x ); break; default: libblis_test_printf_error( "Invalid interface type.\n" ); } } void libblis_test_setv_check ( test_params_t* params, obj_t* beta, obj_t* x, double* resid ) { num_t dt_x = bli_obj_dt( x ); dim_t m_x = bli_obj_vector_dim( x ); inc_t inc_x = bli_obj_vector_inc( x ); void* buf_x = bli_obj_buffer_at_off( x ); void* buf_beta = bli_obj_buffer_for_1x1( dt_x, beta ); dim_t i; *resid = 0.0; // // The easiest way to check that setv was successful is to confirm // that each element of x is equal to beta. // if ( bli_obj_is_float( x ) ) { float* chi1 = buf_x; float* beta_cast = buf_beta; for ( i = 0; i < m_x; ++i ) { if ( !bli_seq( *chi1, *beta_cast ) ) { *resid = 1.0; return; } chi1 += inc_x; } } else if ( bli_obj_is_double( x ) ) { double* chi1 = buf_x; double* beta_cast = buf_beta; for ( i = 0; i < m_x; ++i ) { if ( !bli_deq( *chi1, *beta_cast ) ) { *resid = 1.0; return; } chi1 += inc_x; } } else if ( bli_obj_is_scomplex( x ) ) { scomplex* chi1 = buf_x; scomplex* beta_cast = buf_beta; for ( i = 0; i < m_x; ++i ) { if ( !bli_ceq( *chi1, *beta_cast ) ) { *resid = 1.0; return; } chi1 += inc_x; } } else // if ( bli_obj_is_dcomplex( x ) ) { dcomplex* chi1 = buf_x; dcomplex* beta_cast = buf_beta; for ( i = 0; i < m_x; ++i ) { if ( !bli_zeq( *chi1, *beta_cast ) ) { *resid = 1.0; return; } chi1 += inc_x; } } } blis-0.6.1/testsuite/src/test_setv.h000066400000000000000000000034771360743507500175050ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void libblis_test_setv ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); blis-0.6.1/testsuite/src/test_subm.c000066400000000000000000000203071360743507500174540ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "test_libblis.h" // Static variables. static char* op_str = "subm"; static char* o_types = "mm"; // x y static char* p_types = "h"; // transx static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-13, 1e-14 }, // warn, pass for d { 1e-13, 1e-14 } }; // warn, pass for z // Local prototypes. void libblis_test_subm_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); void libblis_test_subm_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ); void libblis_test_subm_impl ( iface_t iface, obj_t* x, obj_t* y ); void libblis_test_subm_check ( test_params_t* params, obj_t* alpha, obj_t* beta, obj_t* x, obj_t* y, double* resid ); void libblis_test_subm_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { libblis_test_setm( tdata, params, &(op->ops->setm) ); libblis_test_normfm( tdata, params, &(op->ops->normfm) ); } void libblis_test_subm ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || libblis_test_l1m_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_subm_deps( tdata, params, op ); // Execute the test driver for each implementation requested. //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( tdata, params, op, BLIS_TEST_SEQ_FRONT_END, op_str, p_types, o_types, thresh, libblis_test_subm_experiment ); } } void libblis_test_subm_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { double time_min = DBL_MAX; double time; num_t datatype; dim_t m, n; trans_t transx; obj_t alpha, beta; obj_t x, y; // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); // Map the dimension specifier to actual dimensions. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); n = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_trans( pc_str[0], &transx ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha ); bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, transx, sc_str[0], m, n, &x ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[1], m, n, &y ); // Initialize alpha and beta. bli_setsc( 1.0, 1.0, &alpha ); bli_setsc( 3.0, 3.0, &beta ); // Randomize x. bli_setm( &alpha, &x ); bli_setm( &beta, &y ); // Apply the parameters. bli_obj_set_conjtrans( transx, &x ); // Disable repeats since bli_copym() is not yet tested. //for ( i = 0; i < n_repeats; ++i ) { time = bli_clock(); libblis_test_subm_impl( iface, &x, &y ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 1.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &x ) ) *perf *= 2.0; // Perform checks. libblis_test_subm_check( params, &alpha, &beta, &x, &y, resid ); // Zero out performance and residual if output matrix is empty. libblis_test_check_empty_problem( &y, perf, resid ); // Free the test objects. bli_obj_free( &x ); bli_obj_free( &y ); } void libblis_test_subm_impl ( iface_t iface, obj_t* x, obj_t* y ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: bli_subm( x, y ); break; default: libblis_test_printf_error( "Invalid interface type.\n" ); } } void libblis_test_subm_check ( test_params_t* params, obj_t* alpha, obj_t* beta, obj_t* x, obj_t* y, double* resid ) { num_t dt = bli_obj_dt( y ); num_t dt_real = bli_obj_dt_proj_to_real( y ); dim_t m = bli_obj_length( y ); dim_t n = bli_obj_width( y ); conj_t conjx = bli_obj_conj_status( x ); obj_t aminusb; obj_t alpha_conj; obj_t norm_r, m_r, n_r, temp_r; double junk; // // Pre-conditions: // - x is set to alpha. // - y_orig is set to beta. // Note: // - alpha and beta should have non-zero imaginary components in the // complex cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // y := y_orig - conjx(x) // // is functioning correctly if // // normfm(y) - sqrt( absqsc( beta - conjx(alpha) ) * m * n ) // // is negligible. // bli_obj_scalar_init_detached( dt, &aminusb ); bli_obj_scalar_init_detached( dt_real, &temp_r ); bli_obj_scalar_init_detached( dt_real, &norm_r ); bli_obj_scalar_init_detached( dt_real, &m_r ); bli_obj_scalar_init_detached( dt_real, &n_r ); bli_obj_scalar_init_detached_copy_of( dt, conjx, alpha, &alpha_conj ); bli_normfm( y, &norm_r ); bli_copysc( beta, &aminusb ); bli_subsc( &alpha_conj, &aminusb ); bli_setsc( ( double )m, 0.0, &m_r ); bli_setsc( ( double )n, 0.0, &n_r ); bli_absqsc( &aminusb, &temp_r ); bli_mulsc( &m_r, &temp_r ); bli_mulsc( &n_r, &temp_r ); bli_sqrtsc( &temp_r, &temp_r ); bli_subsc( &temp_r, &norm_r ); bli_getsc( &norm_r, resid, &junk ); } blis-0.6.1/testsuite/src/test_subm.h000066400000000000000000000034771360743507500174720ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void libblis_test_subm ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); blis-0.6.1/testsuite/src/test_subv.c000066400000000000000000000176341360743507500174760ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "test_libblis.h" // Static variables. static char* op_str = "subv"; static char* o_types = "vv"; // x y static char* p_types = "c"; // conjx static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-13, 1e-14 }, // warn, pass for d { 1e-13, 1e-14 } }; // warn, pass for z // Local prototypes. void libblis_test_subv_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); void libblis_test_subv_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ); void libblis_test_subv_impl ( iface_t iface, obj_t* x, obj_t* y ); void libblis_test_subv_check ( test_params_t* params, obj_t* alpha, obj_t* beta, obj_t* x, obj_t* y, double* resid ); void libblis_test_subv_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { libblis_test_setv( tdata, params, &(op->ops->setv) ); libblis_test_normfv( tdata, params, &(op->ops->normfv) ); } void libblis_test_subv ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || libblis_test_l1v_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_subv_deps( tdata, params, op ); // Execute the test driver for each implementation requested. //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( tdata, params, op, BLIS_TEST_SEQ_FRONT_END, op_str, p_types, o_types, thresh, libblis_test_subv_experiment ); } } void libblis_test_subv_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { double time_min = DBL_MAX; double time; num_t datatype; dim_t m; conj_t conjx; obj_t alpha, beta; obj_t x, y; // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); // Map the dimension specifier to an actual dimension. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_conj( pc_str[0], &conjx ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha ); bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); libblis_test_vobj_create( params, datatype, sc_str[1], m, &y ); // Initialize alpha and beta. bli_setsc( 1.0, 1.0, &alpha ); bli_setsc( 3.0, 3.0, &beta ); // Set x and y to alpha and beta, respectively. bli_setv( &alpha, &x ); bli_setv( &beta, &y ); // Apply the parameters. bli_obj_set_conj( conjx, &x ); // Disable repeats since bli_copyv() is not yet tested. //for ( i = 0; i < n_repeats; ++i ) { time = bli_clock(); libblis_test_subv_impl( iface, &x, &y ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 1.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &x ) ) *perf *= 2.0; // Perform checks. libblis_test_subv_check( params, &alpha, &beta, &x, &y, resid ); // Zero out performance and residual if output vector is empty. libblis_test_check_empty_problem( &y, perf, resid ); // Free the test objects. bli_obj_free( &x ); bli_obj_free( &y ); } void libblis_test_subv_impl ( iface_t iface, obj_t* x, obj_t* y ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: bli_subv( x, y ); break; default: libblis_test_printf_error( "Invalid interface type.\n" ); } } void libblis_test_subv_check ( test_params_t* params, obj_t* alpha, obj_t* beta, obj_t* x, obj_t* y, double* resid ) { num_t dt = bli_obj_dt( x ); num_t dt_real = bli_obj_dt_proj_to_real( x ); dim_t m = bli_obj_vector_dim( x ); conj_t conjx = bli_obj_conj_status( x ); obj_t aminusb; obj_t alpha_conj; obj_t norm_r, m_r, temp_r; double junk; // // Pre-conditions: // - x is set to alpha. // - y_orig is set to beta. // Note: // - alpha and beta should have non-zero imaginary components in the // complex cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // y := y_orig - conjx(x) // // is functioning correctly if // // normfv(y) - sqrt( absqsc( beta - conjx(alpha) ) * m ) // // is negligible. // bli_obj_scalar_init_detached( dt, &aminusb ); bli_obj_scalar_init_detached( dt_real, &temp_r ); bli_obj_scalar_init_detached( dt_real, &norm_r ); bli_obj_scalar_init_detached( dt_real, &m_r ); bli_obj_scalar_init_detached_copy_of( dt, conjx, alpha, &alpha_conj ); bli_normfv( y, &norm_r ); bli_copysc( beta, &aminusb ); bli_subsc( &alpha_conj, &aminusb ); bli_setsc( ( double )m, 0.0, &m_r ); bli_absqsc( &aminusb, &temp_r ); bli_mulsc( &m_r, &temp_r ); bli_sqrtsc( &temp_r, &temp_r ); bli_subsc( &temp_r, &norm_r ); bli_getsc( &norm_r, resid, &junk ); } blis-0.6.1/testsuite/src/test_subv.h000066400000000000000000000034771360743507500175030ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void libblis_test_subv ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); blis-0.6.1/testsuite/src/test_symm.c000066400000000000000000000264141360743507500175000ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "test_libblis.h" // Static variables. static char* op_str = "symm"; static char* o_types = "mmm"; // a b c static char* p_types = "such"; // side uploa conja transb static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-13, 1e-14 }, // warn, pass for d { 1e-13, 1e-14 } }; // warn, pass for z // Local prototypes. void libblis_test_symm_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); void libblis_test_symm_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ); void libblis_test_symm_impl ( iface_t iface, side_t side, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c ); void libblis_test_symm_check ( test_params_t* params, side_t side, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, obj_t* c_orig, double* resid ); void libblis_test_symm_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { libblis_test_randv( tdata, params, &(op->ops->randv) ); libblis_test_randm( tdata, params, &(op->ops->randm) ); libblis_test_setv( tdata, params, &(op->ops->setv) ); libblis_test_normfv( tdata, params, &(op->ops->normfv) ); libblis_test_subv( tdata, params, &(op->ops->subv) ); libblis_test_scalv( tdata, params, &(op->ops->scalv) ); libblis_test_copym( tdata, params, &(op->ops->copym) ); libblis_test_scalm( tdata, params, &(op->ops->scalm) ); libblis_test_gemv( tdata, params, &(op->ops->gemv) ); libblis_test_symv( tdata, params, &(op->ops->symv) ); } void libblis_test_symm ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || libblis_test_l3_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_symm_deps( tdata, params, op ); // Execute the test driver for each implementation requested. //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( tdata, params, op, BLIS_TEST_SEQ_FRONT_END, op_str, p_types, o_types, thresh, libblis_test_symm_experiment ); } } void libblis_test_symm_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; num_t datatype; dim_t m, n; dim_t mn_side; side_t side; uplo_t uploa; conj_t conja; trans_t transb; obj_t alpha, a, b, beta, c; obj_t c_save; // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); // Map the dimension specifier to actual dimensions. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); n = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_side( pc_str[0], &side ); bli_param_map_char_to_blis_uplo( pc_str[1], &uploa ); bli_param_map_char_to_blis_conj( pc_str[2], &conja ); bli_param_map_char_to_blis_trans( pc_str[3], &transb ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha ); bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). bli_set_dim_with_side( side, m, n, &mn_side ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[1], mn_side, mn_side, &a ); libblis_test_mobj_create( params, datatype, transb, sc_str[2], m, n, &b ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, n, &c ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, n, &c_save ); // Set alpha and beta. if ( bli_obj_is_real( &c ) ) { bli_setsc( 0.8, 0.0, &alpha ); bli_setsc( -1.0, 0.0, &beta ); } else { bli_setsc( 0.8, 0.6, &alpha ); bli_setsc( -1.0, 1.0, &beta ); } // Set the structure and uplo properties of A. bli_obj_set_struc( BLIS_SYMMETRIC, &a ); bli_obj_set_uplo( uploa, &a ); // Randomize A, make it densely symmetric, and zero the unstored triangle // to ensure the implementation reads only from the stored region. libblis_test_mobj_randomize( params, TRUE, &a ); bli_mksymm( &a ); bli_mktrim( &a ); // Randomize B and C, and save C. libblis_test_mobj_randomize( params, TRUE, &b ); libblis_test_mobj_randomize( params, TRUE, &c ); bli_copym( &c, &c_save ); // Apply the remaining parameters. bli_obj_set_conj( conja, &a ); bli_obj_set_conjtrans( transb, &b ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copym( &c_save, &c ); time = bli_clock(); libblis_test_symm_impl( iface, side, &alpha, &a, &b, &beta, &c ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 2.0 * mn_side * m * n ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &c ) ) *perf *= 4.0; // Perform checks. libblis_test_symm_check( params, side, &alpha, &a, &b, &beta, &c, &c_save, resid ); // Zero out performance and residual if output matrix is empty. libblis_test_check_empty_problem( &c, perf, resid ); // Free the test objects. bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); } void libblis_test_symm_impl ( iface_t iface, side_t side, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: bli_symm( side, alpha, a, b, beta, c ); //bli_symm4m( side, alpha, a, b, beta, c ); //bli_symm3m( side, alpha, a, b, beta, c ); break; default: libblis_test_printf_error( "Invalid interface type.\n" ); } } void libblis_test_symm_check ( test_params_t* params, side_t side, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, obj_t* c_orig, double* resid ) { num_t dt = bli_obj_dt( c ); num_t dt_real = bli_obj_dt_proj_to_real( c ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); obj_t norm; obj_t t, v, w, z; double junk; // // Pre-conditions: // - a is randomized and symmetric. // - b is randomized. // - c_orig is randomized. // Note: // - alpha and beta should have non-zero imaginary components in the // complex cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // C := beta * C_orig + alpha * conja(A) * transb(B) (side = left) // C := beta * C_orig + alpha * transb(B) * conja(A) (side = right) // // is functioning correctly if // // normfv( v - z ) // // is negligible, where // // v = C * t // // z = ( beta * C_orig + alpha * conja(A) * transb(B) ) * t (side = left) // = beta * C_orig * t + alpha * conja(A) * transb(B) * t // = beta * C_orig * t + alpha * conja(A) * w // = beta * C_orig * t + z // // z = ( beta * C_orig + alpha * transb(B) * conja(A) ) * t (side = right) // = beta * C_orig * t + alpha * transb(B) * conja(A) * t // = beta * C_orig * t + alpha * transb(B) * w // = beta * C_orig * t + z bli_obj_scalar_init_detached( dt_real, &norm ); if ( bli_is_left( side ) ) { bli_obj_create( dt, n, 1, 0, 0, &t ); bli_obj_create( dt, m, 1, 0, 0, &v ); bli_obj_create( dt, m, 1, 0, 0, &w ); bli_obj_create( dt, m, 1, 0, 0, &z ); } else // else if ( bli_is_left( side ) ) { bli_obj_create( dt, n, 1, 0, 0, &t ); bli_obj_create( dt, m, 1, 0, 0, &v ); bli_obj_create( dt, n, 1, 0, 0, &w ); bli_obj_create( dt, m, 1, 0, 0, &z ); } libblis_test_vobj_randomize( params, TRUE, &t ); bli_gemv( &BLIS_ONE, c, &t, &BLIS_ZERO, &v ); if ( bli_is_left( side ) ) { bli_gemv( &BLIS_ONE, b, &t, &BLIS_ZERO, &w ); bli_symv( alpha, a, &w, &BLIS_ZERO, &z ); } else { bli_symv( &BLIS_ONE, a, &t, &BLIS_ZERO, &w ); bli_gemv( alpha, b, &w, &BLIS_ZERO, &z ); } bli_gemv( beta, c_orig, &t, &BLIS_ONE, &z ); bli_subv( &z, &v ); bli_normfv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &t ); bli_obj_free( &v ); bli_obj_free( &w ); bli_obj_free( &z ); } blis-0.6.1/testsuite/src/test_symm.h000066400000000000000000000034771360743507500175110ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void libblis_test_symm ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); blis-0.6.1/testsuite/src/test_symv.c000066400000000000000000000226151360743507500175100ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "test_libblis.h" // Static variables. static char* op_str = "symv"; static char* o_types = "mvv"; // a x y static char* p_types = "ucc"; // uploa conja conjx static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-13, 1e-14 }, // warn, pass for d { 1e-13, 1e-14 } }; // warn, pass for z // Local prototypes. void libblis_test_symv_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); void libblis_test_symv_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ); void libblis_test_symv_impl ( iface_t iface, obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y ); void libblis_test_symv_check ( test_params_t* params, obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y, obj_t* y_orig, double* resid ); void libblis_test_symv_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { libblis_test_randv( tdata, params, &(op->ops->randv) ); libblis_test_randm( tdata, params, &(op->ops->randm) ); libblis_test_normfv( tdata, params, &(op->ops->normfv) ); libblis_test_subv( tdata, params, &(op->ops->subv) ); libblis_test_copyv( tdata, params, &(op->ops->copyv) ); libblis_test_scalv( tdata, params, &(op->ops->scalv) ); libblis_test_gemv( tdata, params, &(op->ops->gemv) ); } void libblis_test_symv ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || libblis_test_l2_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_symv_deps( tdata, params, op ); // Execute the test driver for each implementation requested. //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( tdata, params, op, BLIS_TEST_SEQ_FRONT_END, op_str, p_types, o_types, thresh, libblis_test_symv_experiment ); } } void libblis_test_symv_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; num_t datatype; dim_t m; uplo_t uploa; conj_t conja; conj_t conjx; obj_t alpha, a, x, beta, y; obj_t y_save; // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); // Map the dimension specifier to an actual dimension. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_uplo( pc_str[0], &uploa ); bli_param_map_char_to_blis_conj( pc_str[1], &conja ); bli_param_map_char_to_blis_conj( pc_str[2], &conjx ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha ); bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, m, &a ); libblis_test_vobj_create( params, datatype, sc_str[1], m, &x ); libblis_test_vobj_create( params, datatype, sc_str[2], m, &y ); libblis_test_vobj_create( params, datatype, sc_str[2], m, &y_save ); // Set alpha and beta. if ( bli_obj_is_real( &y ) ) { bli_setsc( 1.0, 0.0, &alpha ); bli_setsc( -1.0, 0.0, &beta ); } else { bli_setsc( 0.5, 0.5, &alpha ); bli_setsc( -0.5, 0.5, &beta ); } // Set the structure and uplo properties of A. bli_obj_set_struc( BLIS_SYMMETRIC, &a ); bli_obj_set_uplo( uploa, &a ); // Randomize A, make it densely symmetric, and zero the unstored triangle // to ensure the implementation reads only from the stored region. libblis_test_mobj_randomize( params, TRUE, &a ); bli_mksymm( &a ); bli_mktrim( &a ); // Randomize x and y, and save y. libblis_test_vobj_randomize( params, TRUE, &x ); libblis_test_vobj_randomize( params, TRUE, &y ); bli_copyv( &y, &y_save ); // Apply the remaining parameters. bli_obj_set_conj( conja, &a ); bli_obj_set_conj( conjx, &x ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copym( &y_save, &y ); time = bli_clock(); libblis_test_symv_impl( iface, &alpha, &a, &x, &beta, &y ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 1.0 * m * m ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &y ) ) *perf *= 4.0; // Perform checks. libblis_test_symv_check( params, &alpha, &a, &x, &beta, &y, &y_save, resid ); // Zero out performance and residual if output vector is empty. libblis_test_check_empty_problem( &y, perf, resid ); // Free the test objects. bli_obj_free( &a ); bli_obj_free( &x ); bli_obj_free( &y ); bli_obj_free( &y_save ); } void libblis_test_symv_impl ( iface_t iface, obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: bli_symv( alpha, a, x, beta, y ); break; default: libblis_test_printf_error( "Invalid interface type.\n" ); } } void libblis_test_symv_check ( test_params_t* params, obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y, obj_t* y_orig, double* resid ) { num_t dt = bli_obj_dt( y ); num_t dt_real = bli_obj_dt_proj_to_real( y ); dim_t m = bli_obj_vector_dim( y ); obj_t v; obj_t norm; double junk; // // Pre-conditions: // - a is randomized and symmetric. // - x is randomized. // - y_orig is randomized. // Note: // - alpha and beta should have non-zero imaginary components in the // complex cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // y := beta * y_orig + alpha * conja(A) * conjx(x) // // is functioning correctly if // // normfv( y - v ) // // is negligible, where // // v = beta * y_orig + alpha * conja(A_dense) * x // bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m, 1, 0, 0, &v ); bli_copyv( y_orig, &v ); bli_mksymm( a ); bli_obj_set_struc( BLIS_GENERAL, a ); bli_obj_set_uplo( BLIS_DENSE, a ); bli_gemv( alpha, a, x, beta, &v ); bli_subv( &v, y ); bli_normfv( y, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &v ); } blis-0.6.1/testsuite/src/test_symv.h000066400000000000000000000034771360743507500175220ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void libblis_test_symv ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); blis-0.6.1/testsuite/src/test_syr.c000066400000000000000000000227021360743507500173240ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "test_libblis.h" // Static variables. static char* op_str = "syr"; static char* o_types = "vm"; // x a static char* p_types = "uc"; // uploa conjx static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-13, 1e-14 }, // warn, pass for d { 1e-13, 1e-14 } }; // warn, pass for z // Local prototypes. void libblis_test_syr_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); void libblis_test_syr_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ); void libblis_test_syr_impl ( iface_t iface, obj_t* alpha, obj_t* x, obj_t* a ); void libblis_test_syr_check ( test_params_t* params, obj_t* alpha, obj_t* x, obj_t* a, obj_t* a_orig, double* resid ); void libblis_test_syr_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { libblis_test_randv( tdata, params, &(op->ops->randv) ); libblis_test_randm( tdata, params, &(op->ops->randm) ); libblis_test_normfv( tdata, params, &(op->ops->normfv) ); libblis_test_subv( tdata, params, &(op->ops->subv) ); libblis_test_copym( tdata, params, &(op->ops->copym) ); libblis_test_scal2v( tdata, params, &(op->ops->scal2v) ); libblis_test_dotv( tdata, params, &(op->ops->dotv) ); libblis_test_gemv( tdata, params, &(op->ops->gemv) ); } void libblis_test_syr ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || libblis_test_l2_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_syr_deps( tdata, params, op ); // Execute the test driver for each implementation requested. //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( tdata, params, op, BLIS_TEST_SEQ_FRONT_END, op_str, p_types, o_types, thresh, libblis_test_syr_experiment ); } } void libblis_test_syr_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; num_t datatype; dim_t m; uplo_t uploa; conj_t conjx; obj_t alpha, x, a; obj_t a_save; // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); // Map the dimension specifier to an actual dimension. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_uplo( pc_str[0], &uploa ); bli_param_map_char_to_blis_conj( pc_str[1], &conjx ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[1], m, m, &a ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[1], m, m, &a_save ); // Set alpha. //bli_copysc( &BLIS_MINUS_ONE, &alpha ); bli_setsc( -1.0, 0.5, &alpha ); // Randomize x. libblis_test_vobj_randomize( params, TRUE, &x ); // Set the structure and uplo properties of A. bli_obj_set_struc( BLIS_SYMMETRIC, &a ); bli_obj_set_uplo( uploa, &a ); // Randomize A, make it densely symmetric, and zero the unstored triangle // to ensure the implementation is reads only from the stored region. libblis_test_mobj_randomize( params, TRUE, &a ); bli_mksymm( &a ); bli_mktrim( &a ); bli_obj_set_struc( BLIS_SYMMETRIC, &a_save ); bli_obj_set_uplo( uploa, &a_save ); bli_copym( &a, &a_save ); // Apply the remaining parameters. bli_obj_set_conj( conjx, &x ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copym( &a_save, &a ); time = bli_clock(); libblis_test_syr_impl( iface, &alpha, &x, &a ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 1.0 * m * m ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &a ) ) *perf *= 4.0; // Perform checks. libblis_test_syr_check( params, &alpha, &x, &a, &a_save, resid ); // Zero out performance and residual if output matrix is empty. libblis_test_check_empty_problem( &a, perf, resid ); // Free the test objects. bli_obj_free( &x ); bli_obj_free( &a ); bli_obj_free( &a_save ); } void libblis_test_syr_impl ( iface_t iface, obj_t* alpha, obj_t* x, obj_t* a ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: bli_syr( alpha, x, a ); break; default: libblis_test_printf_error( "Invalid interface type.\n" ); } } void libblis_test_syr_check ( test_params_t* params, obj_t* alpha, obj_t* x, obj_t* a, obj_t* a_orig, double* resid ) { num_t dt = bli_obj_dt( a ); num_t dt_real = bli_obj_dt_proj_to_real( a ); dim_t m_a = bli_obj_length( a ); obj_t xt, t, v, w; obj_t rho, norm; double junk; // // Pre-conditions: // - x is randomized. // - a is randomized and symmetric. // Note: // - alpha should have a non-zero imaginary component in the // complex cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // A := A_orig + alpha * conjx(x) * conjx(x)^T // // is functioning correctly if // // normfv( v - w ) // // is negligible, where // // v = A * t // w = ( A_orig + alpha * conjx(x) * conjx(x)^T ) * t // = A_orig * t + alpha * conjx(x) * conjx(x)^T * t // = A_orig * t + alpha * conjx(x) * rho // = A_orig * t + w // bli_mksymm( a ); bli_mksymm( a_orig ); bli_obj_set_struc( BLIS_GENERAL, a ); bli_obj_set_struc( BLIS_GENERAL, a_orig ); bli_obj_set_uplo( BLIS_DENSE, a ); bli_obj_set_uplo( BLIS_DENSE, a_orig ); bli_obj_scalar_init_detached( dt, &rho ); bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m_a, 1, 0, 0, &t ); bli_obj_create( dt, m_a, 1, 0, 0, &v ); bli_obj_create( dt, m_a, 1, 0, 0, &w ); bli_obj_alias_to( x, &xt ); libblis_test_vobj_randomize( params, TRUE, &t ); bli_gemv( &BLIS_ONE, a, &t, &BLIS_ZERO, &v ); bli_dotv( &xt, &t, &rho ); bli_mulsc( alpha, &rho ); bli_scal2v( &rho, x, &w ); bli_gemv( &BLIS_ONE, a_orig, &t, &BLIS_ONE, &w ); bli_subv( &w, &v ); bli_normfv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &t ); bli_obj_free( &v ); bli_obj_free( &w ); } blis-0.6.1/testsuite/src/test_syr.h000066400000000000000000000034761360743507500173400ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void libblis_test_syr ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); blis-0.6.1/testsuite/src/test_syr2.c000066400000000000000000000245501360743507500174110ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "test_libblis.h" // Static variables. static char* op_str = "syr2"; static char* o_types = "vvm"; // x y a static char* p_types = "ucc"; // uploa conjx conjy static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-13, 1e-14 }, // warn, pass for d { 1e-13, 1e-14 } }; // warn, pass for z // Local prototypes. void libblis_test_syr2_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); void libblis_test_syr2_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ); void libblis_test_syr2_impl ( iface_t iface, obj_t* alpha, obj_t* x, obj_t* y, obj_t* a ); void libblis_test_syr2_check ( test_params_t* params, obj_t* alpha, obj_t* x, obj_t* y, obj_t* a, obj_t* a_orig, double* resid ); void libblis_test_syr2_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { libblis_test_randv( tdata, params, &(op->ops->randv) ); libblis_test_randm( tdata, params, &(op->ops->randm) ); libblis_test_normfv( tdata, params, &(op->ops->normfv) ); libblis_test_subv( tdata, params, &(op->ops->subv) ); libblis_test_copym( tdata, params, &(op->ops->copym) ); libblis_test_scal2v( tdata, params, &(op->ops->scal2v) ); libblis_test_dotv( tdata, params, &(op->ops->dotv) ); libblis_test_gemv( tdata, params, &(op->ops->gemv) ); } void libblis_test_syr2 ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || libblis_test_l2_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_syr2_deps( tdata, params, op ); // Execute the test driver for each implementation requested. //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( tdata, params, op, BLIS_TEST_SEQ_FRONT_END, op_str, p_types, o_types, thresh, libblis_test_syr2_experiment ); } } void libblis_test_syr2_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; num_t datatype; dim_t m; uplo_t uploa; conj_t conjx, conjy; obj_t alpha, x, y, a; obj_t a_save; // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); // Map the dimension specifier to an actual dimension. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_uplo( pc_str[0], &uploa ); bli_param_map_char_to_blis_conj( pc_str[1], &conjx ); bli_param_map_char_to_blis_conj( pc_str[2], &conjy ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); libblis_test_vobj_create( params, datatype, sc_str[1], m, &y ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[2], m, m, &a ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[2], m, m, &a_save ); // Set alpha. //bli_copysc( &BLIS_MINUS_ONE, &alpha ); bli_setsc( -1.0, 1.0, &alpha ); // Randomize x and y. libblis_test_vobj_randomize( params, TRUE, &x ); libblis_test_vobj_randomize( params, TRUE, &y ); // Set the structure and uplo properties of A. bli_obj_set_struc( BLIS_SYMMETRIC, &a ); bli_obj_set_uplo( uploa, &a ); // Randomize A, make it densely symmetric, and zero the unstored triangle // to ensure the implementation is reads only from the stored region. libblis_test_mobj_randomize( params, TRUE, &a ); bli_mksymm( &a ); bli_mktrim( &a ); bli_obj_set_struc( BLIS_SYMMETRIC, &a_save ); bli_obj_set_uplo( uploa, &a_save ); bli_copym( &a, &a_save ); // Apply the remaining parameters. bli_obj_set_conj( conjx, &x ); bli_obj_set_conj( conjy, &y ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copym( &a_save, &a ); time = bli_clock(); libblis_test_syr2_impl( iface, &alpha, &x, &y, &a ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 2.0 * m * m ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &a ) ) *perf *= 4.0; // Perform checks. libblis_test_syr2_check( params, &alpha, &x, &y, &a, &a_save, resid ); // Zero out performance and residual if output matrix is empty. libblis_test_check_empty_problem( &a, perf, resid ); // Free the test objects. bli_obj_free( &x ); bli_obj_free( &y ); bli_obj_free( &a ); bli_obj_free( &a_save ); } void libblis_test_syr2_impl ( iface_t iface, obj_t* alpha, obj_t* x, obj_t* y, obj_t* a ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: bli_syr2( alpha, x, y, a ); break; default: libblis_test_printf_error( "Invalid interface type.\n" ); } } void libblis_test_syr2_check ( test_params_t* params, obj_t* alpha, obj_t* x, obj_t* y, obj_t* a, obj_t* a_orig, double* resid ) { num_t dt = bli_obj_dt( a ); num_t dt_real = bli_obj_dt_proj_to_real( a ); dim_t m_a = bli_obj_length( a ); obj_t xt, yt; obj_t t, v, w1, w2; obj_t rho, norm; double junk; // // Pre-conditions: // - x is randomized. // - y is randomized. // - a is randomized and symmetric. // Note: // - alpha should have a non-zero imaginary component in the // complex cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // A := A_orig + alpha * conjx(x) * conjy(y)^T + alpha * conjy(y) * conjx(x)^T // // is functioning correctly if // // normfv( v - w ) // // is negligible, where // // v = A * t // w = ( A_orig + alpha * conjx(x) * conjy(y)^T + alpha * conjy(y) * conjx(x)^T ) * t // = A_orig * t + alpha * conjx(x) * conjy(y)^T * t + alpha * conjy(y) * conjx(x)^T * t // = A_orig * t + alpha * conjx(x) * conjy(y)^T * t + alpha * conjy(y) * rho // = A_orig * t + alpha * conjx(x) * conjy(y)^T * t + w1 // = A_orig * t + alpha * conjx(x) * rho + w1 // = A_orig * t + w2 + w1 // bli_mksymm( a ); bli_mksymm( a_orig ); bli_obj_set_struc( BLIS_GENERAL, a ); bli_obj_set_struc( BLIS_GENERAL, a_orig ); bli_obj_set_uplo( BLIS_DENSE, a ); bli_obj_set_uplo( BLIS_DENSE, a_orig ); bli_obj_scalar_init_detached( dt, &rho ); bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m_a, 1, 0, 0, &t ); bli_obj_create( dt, m_a, 1, 0, 0, &v ); bli_obj_create( dt, m_a, 1, 0, 0, &w1 ); bli_obj_create( dt, m_a, 1, 0, 0, &w2 ); bli_obj_alias_to( x, &xt ); bli_obj_alias_to( y, &yt ); libblis_test_vobj_randomize( params, TRUE, &t ); bli_gemv( &BLIS_ONE, a, &t, &BLIS_ZERO, &v ); bli_dotv( &xt, &t, &rho ); bli_mulsc( alpha, &rho ); bli_scal2v( &rho, y, &w1 ); bli_dotv( &yt, &t, &rho ); bli_mulsc( alpha, &rho ); bli_scal2v( &rho, x, &w2 ); bli_addv( &w2, &w1 ); bli_gemv( &BLIS_ONE, a_orig, &t, &BLIS_ONE, &w1 ); bli_subv( &w1, &v ); bli_normfv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &t ); bli_obj_free( &v ); bli_obj_free( &w1 ); bli_obj_free( &w2 ); } blis-0.6.1/testsuite/src/test_syr2.h000066400000000000000000000034771360743507500174230ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void libblis_test_syr2 ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); blis-0.6.1/testsuite/src/test_syr2k.c000066400000000000000000000261121360743507500175600ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "test_libblis.h" // Static variables. static char* op_str = "syr2k"; static char* o_types = "mmm"; // a b c static char* p_types = "uhh"; // uploc transa transb static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-13, 1e-14 }, // warn, pass for d { 1e-13, 1e-14 } }; // warn, pass for z // Local prototypes. void libblis_test_syr2k_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); void libblis_test_syr2k_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ); void libblis_test_syr2k_impl ( iface_t iface, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c ); void libblis_test_syr2k_check ( test_params_t* params, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, obj_t* c_orig, double* resid ); void libblis_test_syr2k_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { libblis_test_randv( tdata, params, &(op->ops->randv) ); libblis_test_randm( tdata, params, &(op->ops->randm) ); libblis_test_setv( tdata, params, &(op->ops->setv) ); libblis_test_normfv( tdata, params, &(op->ops->normfv) ); libblis_test_subv( tdata, params, &(op->ops->subv) ); libblis_test_scalv( tdata, params, &(op->ops->scalv) ); libblis_test_copym( tdata, params, &(op->ops->copym) ); libblis_test_scalm( tdata, params, &(op->ops->scalm) ); libblis_test_gemv( tdata, params, &(op->ops->gemv) ); libblis_test_symv( tdata, params, &(op->ops->symv) ); } void libblis_test_syr2k ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || libblis_test_l3_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_syr2k_deps( tdata, params, op ); // Execute the test driver for each implementation requested. //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( tdata, params, op, BLIS_TEST_SEQ_FRONT_END, op_str, p_types, o_types, thresh, libblis_test_syr2k_experiment ); } } void libblis_test_syr2k_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; num_t datatype; dim_t m, k; uplo_t uploc; trans_t transa, transb; obj_t alpha, a, b, beta, c; obj_t c_save; // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); // Map the dimension specifier to actual dimensions. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); k = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_uplo( pc_str[0], &uploc ); bli_param_map_char_to_blis_trans( pc_str[1], &transa ); bli_param_map_char_to_blis_trans( pc_str[2], &transb ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha ); bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, transa, sc_str[1], m, k, &a ); libblis_test_mobj_create( params, datatype, transb, sc_str[2], m, k, &b ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, m, &c ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, m, &c_save ); // Set alpha and beta. if ( bli_obj_is_real( &c ) ) { bli_setsc( 0.8, 0.0, &alpha ); bli_setsc( -1.0, 0.0, &beta ); } else { // For syr2k, both alpha and beta may be complex since, unlike her2k, // C is symmetric in both the real and complex cases. bli_setsc( 0.8, 0.5, &alpha ); bli_setsc( -1.0, 0.5, &beta ); } // Randomize A and B. libblis_test_mobj_randomize( params, TRUE, &a ); libblis_test_mobj_randomize( params, TRUE, &b ); // Set the structure and uplo properties of C. bli_obj_set_struc( BLIS_SYMMETRIC, &c ); bli_obj_set_uplo( uploc, &c ); // Randomize A, make it densely symmetric, and zero the unstored triangle // to ensure the implementation is reads only from the stored region. libblis_test_mobj_randomize( params, TRUE, &c ); bli_mksymm( &c ); bli_mktrim( &c ); // Save C and set its structure and uplo properties. bli_obj_set_struc( BLIS_SYMMETRIC, &c_save ); bli_obj_set_uplo( uploc, &c_save ); bli_copym( &c, &c_save ); // Apply the remaining parameters. bli_obj_set_conjtrans( transa, &a ); bli_obj_set_conjtrans( transb, &b ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copym( &c_save, &c ); time = bli_clock(); libblis_test_syr2k_impl( iface, &alpha, &a, &b, &beta, &c ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 2.0 * m * m * k ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &c ) ) *perf *= 4.0; // Perform checks. libblis_test_syr2k_check( params, &alpha, &a, &b, &beta, &c, &c_save, resid ); // Zero out performance and residual if output matrix is empty. libblis_test_check_empty_problem( &c, perf, resid ); // Free the test objects. bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); } void libblis_test_syr2k_impl ( iface_t iface, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: bli_syr2k( alpha, a, b, beta, c ); //bli_syr2k4m( alpha, a, b, beta, c ); //bli_syr2k3m( alpha, a, b, beta, c ); break; default: libblis_test_printf_error( "Invalid interface type.\n" ); } } void libblis_test_syr2k_check ( test_params_t* params, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, obj_t* c_orig, double* resid ) { num_t dt = bli_obj_dt( c ); num_t dt_real = bli_obj_dt_proj_to_real( c ); dim_t m = bli_obj_length( c ); dim_t k = bli_obj_width_after_trans( a ); obj_t at, bt; obj_t norm; obj_t t, v, w1, w2, z; double junk; // // Pre-conditions: // - a is randomized. // - b is randomized. // - c_orig is randomized and symmetric. // Note: // - alpha and beta should have non-zero imaginary components in the // complex cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // C := beta * C_orig + alpha * transa(A) * transb(B)^T + alpha * transb(B) * transa(A)^T // // is functioning correctly if // // normfv( v - z ) // // is negligible, where // // v = C * t // z = ( beta * C_orig + alpha * transa(A) * transb(B)^T + alpha * transb(B) * transa(A)^T ) * t // = beta * C_orig * t + alpha * transa(A) * transb(B)^T * t + alpha * transb(B) * transa(A)^T * t // = beta * C_orig * t + alpha * transa(A) * transb(B)^T * t + alpha * transb(B) * w2 // = beta * C_orig * t + alpha * transa(A) * w1 + alpha * transb(B) * w2 // = beta * C_orig * t + alpha * transa(A) * w1 + z // = beta * C_orig * t + z // bli_obj_alias_with_trans( BLIS_TRANSPOSE, a, &at ); bli_obj_alias_with_trans( BLIS_TRANSPOSE, b, &bt ); bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m, 1, 0, 0, &t ); bli_obj_create( dt, m, 1, 0, 0, &v ); bli_obj_create( dt, k, 1, 0, 0, &w1 ); bli_obj_create( dt, k, 1, 0, 0, &w2 ); bli_obj_create( dt, m, 1, 0, 0, &z ); libblis_test_vobj_randomize( params, TRUE, &t ); bli_symv( &BLIS_ONE, c, &t, &BLIS_ZERO, &v ); bli_gemv( &BLIS_ONE, &at, &t, &BLIS_ZERO, &w2 ); bli_gemv( &BLIS_ONE, &bt, &t, &BLIS_ZERO, &w1 ); bli_gemv( alpha, a, &w1, &BLIS_ZERO, &z ); bli_gemv( alpha, b, &w2, &BLIS_ONE, &z ); bli_symv( beta, c_orig, &t, &BLIS_ONE, &z ); bli_subv( &z, &v ); bli_normfv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &t ); bli_obj_free( &v ); bli_obj_free( &w1 ); bli_obj_free( &w2 ); bli_obj_free( &z ); } blis-0.6.1/testsuite/src/test_syr2k.h000066400000000000000000000035001360743507500175610ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void libblis_test_syr2k ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); blis-0.6.1/testsuite/src/test_syrk.c000066400000000000000000000242011360743507500174730ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "test_libblis.h" // Static variables. static char* op_str = "syrk"; static char* o_types = "mm"; // a c static char* p_types = "uh"; // uploc transa static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-13, 1e-14 }, // warn, pass for d { 1e-13, 1e-14 } }; // warn, pass for z // Local prototypes. void libblis_test_syrk_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); void libblis_test_syrk_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ); void libblis_test_syrk_impl ( iface_t iface, obj_t* alpha, obj_t* a, obj_t* beta, obj_t* c ); void libblis_test_syrk_check ( test_params_t* params, obj_t* alpha, obj_t* a, obj_t* beta, obj_t* c, obj_t* c_orig, double* resid ); void libblis_test_syrk_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { libblis_test_randv( tdata, params, &(op->ops->randv) ); libblis_test_randm( tdata, params, &(op->ops->randm) ); libblis_test_setv( tdata, params, &(op->ops->setv) ); libblis_test_normfv( tdata, params, &(op->ops->normfv) ); libblis_test_subv( tdata, params, &(op->ops->subv) ); libblis_test_scalv( tdata, params, &(op->ops->scalv) ); libblis_test_copym( tdata, params, &(op->ops->copym) ); libblis_test_scalm( tdata, params, &(op->ops->scalm) ); libblis_test_gemv( tdata, params, &(op->ops->gemv) ); libblis_test_symv( tdata, params, &(op->ops->symv) ); } void libblis_test_syrk ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || libblis_test_l3_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_syrk_deps( tdata, params, op ); // Execute the test driver for each implementation requested. //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( tdata, params, op, BLIS_TEST_SEQ_FRONT_END, op_str, p_types, o_types, thresh, libblis_test_syrk_experiment ); } } void libblis_test_syrk_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; num_t datatype; dim_t m, k; uplo_t uploc; trans_t transa; obj_t alpha, a, beta, c; obj_t c_save; // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); // Map the dimension specifier to actual dimensions. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); k = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_uplo( pc_str[0], &uploc ); bli_param_map_char_to_blis_trans( pc_str[1], &transa ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha ); bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, transa, sc_str[1], m, k, &a ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, m, &c ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, m, &c_save ); // Set alpha and beta. if ( bli_obj_is_real( &c ) ) { bli_setsc( 1.2, 0.0, &alpha ); bli_setsc( -1.0, 0.0, &beta ); } else { // For syrk, both alpha and beta may be complex since, unlike herk, // C is symmetric in both the real and complex cases. bli_setsc( 1.2, 0.5, &alpha ); bli_setsc( -1.0, 0.5, &beta ); } // Randomize A. libblis_test_mobj_randomize( params, TRUE, &a ); // Set the structure and uplo properties of C. bli_obj_set_struc( BLIS_SYMMETRIC, &c ); bli_obj_set_uplo( uploc, &c ); // Randomize A, make it densely symmetric, and zero the unstored triangle // to ensure the implementation is reads only from the stored region. libblis_test_mobj_randomize( params, TRUE, &c ); bli_mksymm( &c ); bli_mktrim( &c ); // Save C and set its structure and uplo properties. bli_obj_set_struc( BLIS_SYMMETRIC, &c_save ); bli_obj_set_uplo( uploc, &c_save ); bli_copym( &c, &c_save ); // Apply the remaining parameters. bli_obj_set_conjtrans( transa, &a ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copym( &c_save, &c ); time = bli_clock(); libblis_test_syrk_impl( iface, &alpha, &a, &beta, &c ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 1.0 * m * m * k ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &c ) ) *perf *= 4.0; // Perform checks. libblis_test_syrk_check( params, &alpha, &a, &beta, &c, &c_save, resid ); // Zero out performance and residual if output matrix is empty. libblis_test_check_empty_problem( &c, perf, resid ); // Free the test objects. bli_obj_free( &a ); bli_obj_free( &c ); bli_obj_free( &c_save ); } void libblis_test_syrk_impl ( iface_t iface, obj_t* alpha, obj_t* a, obj_t* beta, obj_t* c ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: bli_syrk( alpha, a, beta, c ); //bli_syrk4m( alpha, a, beta, c ); //bli_syrk3m( alpha, a, beta, c ); break; default: libblis_test_printf_error( "Invalid interface type.\n" ); } } void libblis_test_syrk_check ( test_params_t* params, obj_t* alpha, obj_t* a, obj_t* beta, obj_t* c, obj_t* c_orig, double* resid ) { num_t dt = bli_obj_dt( c ); num_t dt_real = bli_obj_dt_proj_to_real( c ); dim_t m = bli_obj_length( c ); dim_t k = bli_obj_width_after_trans( a ); obj_t at; obj_t norm; obj_t t, v, w, z; double junk; // // Pre-conditions: // - a is randomized. // - c_orig is randomized and symmetric. // Note: // - alpha and beta should have non-zero imaginary components in the // complex cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // C := beta * C_orig + alpha * transa(A) * transa(A)^T // // is functioning correctly if // // normfv( v - z ) // // is negligible, where // // v = C * t // z = ( beta * C_orig + alpha * transa(A) * transa(A)^T ) * t // = beta * C_orig * t + alpha * transa(A) * transa(A)^T * t // = beta * C_orig * t + alpha * transa(A) * w // = beta * C_orig * t + z // bli_obj_alias_with_trans( BLIS_TRANSPOSE, a, &at ); bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m, 1, 0, 0, &t ); bli_obj_create( dt, m, 1, 0, 0, &v ); bli_obj_create( dt, k, 1, 0, 0, &w ); bli_obj_create( dt, m, 1, 0, 0, &z ); libblis_test_vobj_randomize( params, TRUE, &t ); bli_symv( &BLIS_ONE, c, &t, &BLIS_ZERO, &v ); bli_gemv( &BLIS_ONE, &at, &t, &BLIS_ZERO, &w ); bli_gemv( alpha, a, &w, &BLIS_ZERO, &z ); bli_symv( beta, c_orig, &t, &BLIS_ONE, &z ); bli_subv( &z, &v ); bli_normfv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &t ); bli_obj_free( &v ); bli_obj_free( &w ); bli_obj_free( &z ); } blis-0.6.1/testsuite/src/test_syrk.h000066400000000000000000000034771360743507500175140ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void libblis_test_syrk ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); blis-0.6.1/testsuite/src/test_trmm.c000066400000000000000000000244531360743507500174730ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "test_libblis.h" // Static variables. static char* op_str = "trmm"; static char* o_types = "mm"; // a b static char* p_types = "suhd"; // side uploa transa diaga static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-13, 1e-14 }, // warn, pass for d { 1e-13, 1e-14 } }; // warn, pass for z // Local prototypes. void libblis_test_trmm_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); void libblis_test_trmm_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ); void libblis_test_trmm_impl ( iface_t iface, side_t side, obj_t* alpha, obj_t* a, obj_t* b ); void libblis_test_trmm_check ( test_params_t* params, side_t side, obj_t* alpha, obj_t* a, obj_t* b, obj_t* b_orig, double* resid ); void libblis_test_trmm_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { libblis_test_randv( tdata, params, &(op->ops->randv) ); libblis_test_randm( tdata, params, &(op->ops->randm) ); libblis_test_setv( tdata, params, &(op->ops->setv) ); libblis_test_normfv( tdata, params, &(op->ops->normfv) ); libblis_test_subv( tdata, params, &(op->ops->subv) ); libblis_test_scalv( tdata, params, &(op->ops->scalv) ); libblis_test_copym( tdata, params, &(op->ops->copym) ); libblis_test_scalm( tdata, params, &(op->ops->scalm) ); libblis_test_gemv( tdata, params, &(op->ops->gemv) ); libblis_test_trmv( tdata, params, &(op->ops->trmv) ); } void libblis_test_trmm ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || libblis_test_l3_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_trmm_deps( tdata, params, op ); // Execute the test driver for each implementation requested. //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( tdata, params, op, BLIS_TEST_SEQ_FRONT_END, op_str, p_types, o_types, thresh, libblis_test_trmm_experiment ); } } void libblis_test_trmm_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; num_t datatype; dim_t m, n; dim_t mn_side; side_t side; uplo_t uploa; trans_t transa; diag_t diaga; obj_t alpha, a, b; obj_t b_save; // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); // Map the dimension specifier to actual dimensions. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); n = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_side( pc_str[0], &side ); bli_param_map_char_to_blis_uplo( pc_str[1], &uploa ); bli_param_map_char_to_blis_trans( pc_str[2], &transa ); bli_param_map_char_to_blis_diag( pc_str[3], &diaga ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha ); // Create test operands (vectors and/or matrices). bli_set_dim_with_side( side, m, n, &mn_side ); libblis_test_mobj_create( params, datatype, transa, sc_str[1], mn_side, mn_side, &a ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, n, &b ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, n, &b_save ); // Set alpha and beta. if ( bli_obj_is_real( &b ) ) { bli_setsc( 0.8, 0.0, &alpha ); } else { bli_setsc( 0.8, 0.5, &alpha ); } // Set the structure and uplo properties of A. bli_obj_set_struc( BLIS_TRIANGULAR, &a ); bli_obj_set_uplo( uploa, &a ); // Randomize A, make it densely triangular. libblis_test_mobj_randomize( params, TRUE, &a ); bli_mktrim( &a ); // Randomize B and save B. libblis_test_mobj_randomize( params, TRUE, &b ); bli_copym( &b, &b_save ); // Apply the remaining parameters. bli_obj_set_conjtrans( transa, &a ); bli_obj_set_diag( diaga, &a ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copym( &b_save, &b ); time = bli_clock(); libblis_test_trmm_impl( iface, side, &alpha, &a, &b ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 1.0 * mn_side * m * n ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &b ) ) *perf *= 4.0; // Perform checks. libblis_test_trmm_check( params, side, &alpha, &a, &b, &b_save, resid ); // Zero out performance and residual if output matrix is empty. libblis_test_check_empty_problem( &b, perf, resid ); // Free the test objects. bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &b_save ); } void libblis_test_trmm_impl ( iface_t iface, side_t side, obj_t* alpha, obj_t* a, obj_t* b ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: bli_trmm( side, alpha, a, b ); //bli_trmm4m( side, alpha, a, b ); //bli_trmm3m( side, alpha, a, b ); break; default: libblis_test_printf_error( "Invalid interface type.\n" ); } } void libblis_test_trmm_check ( test_params_t* params, side_t side, obj_t* alpha, obj_t* a, obj_t* b, obj_t* b_orig, double* resid ) { num_t dt = bli_obj_dt( b ); num_t dt_real = bli_obj_dt_proj_to_real( b ); dim_t m = bli_obj_length( b ); dim_t n = bli_obj_width( b ); obj_t norm; obj_t t, v, w, z; double junk; // // Pre-conditions: // - a is randomized and triangular. // - b_orig is randomized. // Note: // - alpha should have a non-zero imaginary component in the // complex cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // B := alpha * transa(A) * B_orig (side = left) // B := alpha * B_orig * transa(A) (side = right) // // is functioning correctly if // // normfv( v - z ) // // is negligible, where // // v = B * t // // z = ( alpha * transa(A) * B ) * t (side = left) // = alpha * transa(A) * B * t // = alpha * transa(A) * w // // z = ( alpha * B * transa(A) ) * t (side = right) // = alpha * B * transa(A) * t // = alpha * B * w bli_obj_scalar_init_detached( dt_real, &norm ); if ( bli_is_left( side ) ) { bli_obj_create( dt, n, 1, 0, 0, &t ); bli_obj_create( dt, m, 1, 0, 0, &v ); bli_obj_create( dt, m, 1, 0, 0, &w ); bli_obj_create( dt, m, 1, 0, 0, &z ); } else // else if ( bli_is_left( side ) ) { bli_obj_create( dt, n, 1, 0, 0, &t ); bli_obj_create( dt, m, 1, 0, 0, &v ); bli_obj_create( dt, n, 1, 0, 0, &w ); bli_obj_create( dt, m, 1, 0, 0, &z ); } libblis_test_vobj_randomize( params, TRUE, &t ); bli_gemv( &BLIS_ONE, b, &t, &BLIS_ZERO, &v ); if ( bli_is_left( side ) ) { bli_gemv( &BLIS_ONE, b_orig, &t, &BLIS_ZERO, &w ); bli_trmv( alpha, a, &w ); bli_copyv( &w, &z ); } else { bli_copyv( &t, &w ); bli_trmv( &BLIS_ONE, a, &w ); bli_gemv( alpha, b_orig, &w, &BLIS_ZERO, &z ); } bli_subv( &z, &v ); bli_normfv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &t ); bli_obj_free( &v ); bli_obj_free( &w ); bli_obj_free( &z ); } blis-0.6.1/testsuite/src/test_trmm.h000066400000000000000000000034771360743507500175030ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void libblis_test_trmm ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); blis-0.6.1/testsuite/src/test_trmm3.c000066400000000000000000000264751360743507500175640ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "test_libblis.h" // Static variables. static char* op_str = "trmm3"; static char* o_types = "mmm"; // a b c static char* p_types = "suhdh"; // side uploa transa diaga transb static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-13, 1e-14 }, // warn, pass for d { 1e-13, 1e-14 } }; // warn, pass for z // Local prototypes. void libblis_test_trmm3_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); void libblis_test_trmm3_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ); void libblis_test_trmm3_impl ( iface_t iface, side_t side, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c ); void libblis_test_trmm3_check ( test_params_t* params, side_t side, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, obj_t* c_orig, double* resid ); void libblis_test_trmm3_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { libblis_test_randv( tdata, params, &(op->ops->randv) ); libblis_test_randm( tdata, params, &(op->ops->randm) ); libblis_test_setv( tdata, params, &(op->ops->setv) ); libblis_test_normfv( tdata, params, &(op->ops->normfv) ); libblis_test_subv( tdata, params, &(op->ops->subv) ); libblis_test_scalv( tdata, params, &(op->ops->scalv) ); libblis_test_copym( tdata, params, &(op->ops->copym) ); libblis_test_scalm( tdata, params, &(op->ops->scalm) ); libblis_test_gemv( tdata, params, &(op->ops->gemv) ); libblis_test_trmv( tdata, params, &(op->ops->trmv) ); } void libblis_test_trmm3 ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || libblis_test_l3_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_trmm3_deps( tdata, params, op ); // Execute the test driver for each implementation requested. //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( tdata, params, op, BLIS_TEST_SEQ_FRONT_END, op_str, p_types, o_types, thresh, libblis_test_trmm3_experiment ); } } void libblis_test_trmm3_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; num_t datatype; dim_t m, n; dim_t mn_side; side_t side; uplo_t uploa; trans_t transa; diag_t diaga; trans_t transb; obj_t alpha, a, b, beta, c; obj_t c_save; // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); // Map the dimension specifier to actual dimensions. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); n = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_side( pc_str[0], &side ); bli_param_map_char_to_blis_uplo( pc_str[1], &uploa ); bli_param_map_char_to_blis_trans( pc_str[2], &transa ); bli_param_map_char_to_blis_diag( pc_str[3], &diaga ); bli_param_map_char_to_blis_trans( pc_str[4], &transb ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha ); bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). bli_set_dim_with_side( side, m, n, &mn_side ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[1], mn_side, mn_side, &a ); libblis_test_mobj_create( params, datatype, transb, sc_str[2], m, n, &b ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, n, &c ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, n, &c_save ); // Set alpha and beta. if ( bli_obj_is_real( &c ) ) { bli_setsc( 0.8, 0.0, &alpha ); bli_setsc( -1.0, 0.0, &beta ); } else { bli_setsc( 0.8, 0.6, &alpha ); bli_setsc( -1.0, 0.5, &beta ); } // Set the structure and uplo properties of A. bli_obj_set_struc( BLIS_TRIANGULAR, &a ); bli_obj_set_uplo( uploa, &a ); // Randomize A, make it densely triangular. libblis_test_mobj_randomize( params, TRUE, &a ); bli_mktrim( &a ); // Randomize B and C, and save C. libblis_test_mobj_randomize( params, TRUE, &b ); libblis_test_mobj_randomize( params, TRUE, &c ); bli_copym( &c, &c_save ); // Apply the remaining parameters. bli_obj_set_conjtrans( transa, &a ); bli_obj_set_diag( diaga, &a ); bli_obj_set_conjtrans( transb, &b ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copym( &c_save, &c ); time = bli_clock(); libblis_test_trmm3_impl( iface, side, &alpha, &a, &b, &beta, &c ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 1.0 * mn_side * m * n ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &c ) ) *perf *= 4.0; // Perform checks. libblis_test_trmm3_check( params, side, &alpha, &a, &b, &beta, &c, &c_save, resid ); // Zero out performance and residual if output matrix is empty. libblis_test_check_empty_problem( &c, perf, resid ); // Free the test objects. bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); } void libblis_test_trmm3_impl ( iface_t iface, side_t side, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: bli_trmm3( side, alpha, a, b, beta, c ); //bli_trmm34m( side, alpha, a, b, beta, c ); //bli_trmm33m( side, alpha, a, b, beta, c ); break; default: libblis_test_printf_error( "Invalid interface type.\n" ); } } void libblis_test_trmm3_check ( test_params_t* params, side_t side, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, obj_t* c_orig, double* resid ) { num_t dt = bli_obj_dt( c ); num_t dt_real = bli_obj_dt_proj_to_real( c ); dim_t m = bli_obj_length( c ); dim_t n = bli_obj_width( c ); obj_t norm; obj_t t, v, w, z; double junk; // // Pre-conditions: // - a is randomized and triangular. // - b is randomized. // - c_orig is randomized. // Note: // - alpha and beta should have non-zero imaginary components in the // complex cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // C := beta * C_orig + alpha * transa(A) * transb(B) (side = left) // C := beta * C_orig + alpha * transb(B) * transa(A) (side = right) // // is functioning correctly if // // normfv( v - z ) // // is negligible, where // // v = C * t // // z = ( beta * C_orig + alpha * transa(A) * transb(B) ) * t (side = left) // = beta * C_orig * t + alpha * transa(A) * transb(B) * t // = beta * C_orig * t + alpha * transa(A) * w // = beta * C_orig * t + z // // z = ( beta * C_orig + alpha * transb(B) * transa(A) ) * t (side = right) // = beta * C_orig * t + alpha * transb(B) * transa(A) * t // = beta * C_orig * t + alpha * transb(B) * w // = beta * C_orig * t + z bli_obj_scalar_init_detached( dt_real, &norm ); if ( bli_is_left( side ) ) { bli_obj_create( dt, n, 1, 0, 0, &t ); bli_obj_create( dt, m, 1, 0, 0, &v ); bli_obj_create( dt, m, 1, 0, 0, &w ); bli_obj_create( dt, m, 1, 0, 0, &z ); } else // else if ( bli_is_left( side ) ) { bli_obj_create( dt, n, 1, 0, 0, &t ); bli_obj_create( dt, m, 1, 0, 0, &v ); bli_obj_create( dt, n, 1, 0, 0, &w ); bli_obj_create( dt, m, 1, 0, 0, &z ); } libblis_test_vobj_randomize( params, TRUE, &t ); bli_gemv( &BLIS_ONE, c, &t, &BLIS_ZERO, &v ); if ( bli_is_left( side ) ) { bli_gemv( &BLIS_ONE, b, &t, &BLIS_ZERO, &w ); bli_trmv( alpha, a, &w ); bli_copyv( &w, &z ); } else { bli_copyv( &t, &w ); bli_trmv( &BLIS_ONE, a, &w ); bli_gemv( alpha, b, &w, &BLIS_ZERO, &z ); } bli_gemv( beta, c_orig, &t, &BLIS_ONE, &z ); bli_subv( &z, &v ); bli_normfv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &t ); bli_obj_free( &v ); bli_obj_free( &w ); bli_obj_free( &z ); } blis-0.6.1/testsuite/src/test_trmm3.h000066400000000000000000000035001360743507500175510ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void libblis_test_trmm3 ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); blis-0.6.1/testsuite/src/test_trmv.c000066400000000000000000000221211360743507500174720ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "test_libblis.h" // Static variables. static char* op_str = "trmv"; static char* o_types = "mv"; // a x static char* p_types = "uhd"; // uploa transa diaga static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-13, 1e-14 }, // warn, pass for d { 1e-13, 1e-14 } }; // warn, pass for z // Local prototypes. void libblis_test_trmv_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); void libblis_test_trmv_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ); void libblis_test_trmv_impl ( iface_t iface, obj_t* alpha, obj_t* a, obj_t* x ); void libblis_test_trmv_check ( test_params_t* params, obj_t* alpha, obj_t* a, obj_t* x, obj_t* x_orig, double* resid ); void libblis_test_trmv_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { libblis_test_randv( tdata, params, &(op->ops->randv) ); libblis_test_randm( tdata, params, &(op->ops->randm) ); libblis_test_normfv( tdata, params, &(op->ops->normfv) ); libblis_test_subv( tdata, params, &(op->ops->subv) ); libblis_test_copyv( tdata, params, &(op->ops->copyv) ); libblis_test_scalv( tdata, params, &(op->ops->scalv) ); libblis_test_gemv( tdata, params, &(op->ops->gemv) ); } void libblis_test_trmv ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || libblis_test_l2_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_trmv_deps( tdata, params, op ); // Execute the test driver for each implementation requested. //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( tdata, params, op, BLIS_TEST_SEQ_FRONT_END, op_str, p_types, o_types, thresh, libblis_test_trmv_experiment ); } } void libblis_test_trmv_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; num_t datatype; dim_t m; uplo_t uploa; trans_t transa; diag_t diaga; obj_t alpha, a, x; obj_t x_save; // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); // Map the dimension specifier to an actual dimension. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_uplo( pc_str[0], &uploa ); bli_param_map_char_to_blis_trans( pc_str[1], &transa ); bli_param_map_char_to_blis_diag( pc_str[2], &diaga ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, m, &a ); libblis_test_vobj_create( params, datatype, sc_str[1], m, &x ); libblis_test_vobj_create( params, datatype, sc_str[1], m, &x_save ); // Set alpha. if ( bli_obj_is_real( &x ) ) bli_setsc( -1.0, 0.0, &alpha ); else bli_setsc( -0.5, 0.5, &alpha ); // Set the structure and uplo properties of A. bli_obj_set_struc( BLIS_TRIANGULAR, &a ); bli_obj_set_uplo( uploa, &a ); // Randomize A, make it densely triangular. libblis_test_mobj_randomize( params, TRUE, &a ); //libblis_test_mobj_load_diag( params, &a ); bli_mktrim( &a ); // Randomize x and save. libblis_test_vobj_randomize( params, TRUE, &x ); bli_copyv( &x, &x_save ); // Apply the remaining parameters. bli_obj_set_conjtrans( transa, &a ); bli_obj_set_diag( diaga, &a ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copym( &x_save, &x ); time = bli_clock(); libblis_test_trmv_impl( iface, &alpha, &a, &x ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 1.0 * m * m ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &x ) ) *perf *= 4.0; // Perform checks. libblis_test_trmv_check( params, &alpha, &a, &x, &x_save, resid ); // Zero out performance and residual if output vector is empty. libblis_test_check_empty_problem( &x, perf, resid ); // Free the test objects. bli_obj_free( &a ); bli_obj_free( &x ); bli_obj_free( &x_save ); } void libblis_test_trmv_impl ( iface_t iface, obj_t* alpha, obj_t* a, obj_t* x ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: bli_trmv( alpha, a, x ); break; default: libblis_test_printf_error( "Invalid interface type.\n" ); } } void libblis_test_trmv_check ( test_params_t* params, obj_t* alpha, obj_t* a, obj_t* x, obj_t* x_orig, double* resid ) { num_t dt = bli_obj_dt( x ); num_t dt_real = bli_obj_dt_proj_to_real( x ); dim_t m = bli_obj_vector_dim( x ); uplo_t uploa = bli_obj_uplo( a ); trans_t transa = bli_obj_conjtrans_status( a ); obj_t a_local, y; obj_t norm; double junk; // // Pre-conditions: // - a is randomized and triangular. // - x is randomized. // Note: // - alpha should have a non-zero imaginary component in the // complex cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // x := alpha * transa(A) * x_orig // // is functioning correctly if // // normfv( y - x ) // // is negligible, where // // y = alpha * conja(A_dense) * x_orig // bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m, 1, 0, 0, &y ); bli_obj_create( dt, m, m, 0, 0, &a_local ); bli_obj_set_struc( BLIS_TRIANGULAR, &a_local ); bli_obj_set_uplo( uploa, &a_local ); bli_obj_toggle_uplo_if_trans( transa, &a_local ); bli_copym( a, &a_local ); bli_mktrim( &a_local ); bli_obj_set_struc( BLIS_GENERAL, &a_local ); bli_obj_set_uplo( BLIS_DENSE, &a_local ); bli_gemv( alpha, &a_local, x_orig, &BLIS_ZERO, &y ); bli_subv( x, &y ); bli_normfv( &y, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &y ); bli_obj_free( &a_local ); } blis-0.6.1/testsuite/src/test_trmv.h000066400000000000000000000034771360743507500175140ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void libblis_test_trmv ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); blis-0.6.1/testsuite/src/test_trsm.c000066400000000000000000000247601360743507500175020ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "test_libblis.h" // Static variables. static char* op_str = "trsm"; static char* o_types = "mm"; // a b static char* p_types = "suhd"; // side uploa transa diaga static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-13, 1e-14 }, // warn, pass for d { 1e-13, 1e-14 } }; // warn, pass for z // Local prototypes. void libblis_test_trsm_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); void libblis_test_trsm_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ); void libblis_test_trsm_impl ( iface_t iface, side_t side, obj_t* alpha, obj_t* a, obj_t* b ); void libblis_test_trsm_check ( test_params_t* params, side_t side, obj_t* alpha, obj_t* a, obj_t* b, obj_t* b_orig, double* resid ); void libblis_test_trsm_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { libblis_test_randv( tdata, params, &(op->ops->randv) ); libblis_test_randm( tdata, params, &(op->ops->randm) ); libblis_test_setv( tdata, params, &(op->ops->setv) ); libblis_test_normfv( tdata, params, &(op->ops->normfv) ); libblis_test_subv( tdata, params, &(op->ops->subv) ); libblis_test_scalv( tdata, params, &(op->ops->scalv) ); libblis_test_copym( tdata, params, &(op->ops->copym) ); libblis_test_scalm( tdata, params, &(op->ops->scalm) ); libblis_test_gemv( tdata, params, &(op->ops->gemv) ); libblis_test_trsv( tdata, params, &(op->ops->trsv) ); } void libblis_test_trsm ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || libblis_test_l3_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_trsm_deps( tdata, params, op ); // Execute the test driver for each implementation requested. //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( tdata, params, op, BLIS_TEST_SEQ_FRONT_END, op_str, p_types, o_types, thresh, libblis_test_trsm_experiment ); } } void libblis_test_trsm_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; num_t datatype; dim_t m, n; dim_t mn_side; side_t side; uplo_t uploa; trans_t transa; diag_t diaga; obj_t alpha, a, b; obj_t b_save; // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); // Map the dimension specifier to actual dimensions. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); n = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_side( pc_str[0], &side ); bli_param_map_char_to_blis_uplo( pc_str[1], &uploa ); bli_param_map_char_to_blis_trans( pc_str[2], &transa ); bli_param_map_char_to_blis_diag( pc_str[3], &diaga ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha ); // Create test operands (vectors and/or matrices). bli_set_dim_with_side( side, m, n, &mn_side ); libblis_test_mobj_create( params, datatype, transa, sc_str[1], mn_side, mn_side, &a ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, n, &b ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, n, &b_save ); // Set alpha. if ( bli_obj_is_real( &b ) ) { bli_setsc( 2.0, 0.0, &alpha ); } else { bli_setsc( 2.0, 0.0, &alpha ); } // Set the structure and uplo properties of A. bli_obj_set_struc( BLIS_TRIANGULAR, &a ); bli_obj_set_uplo( uploa, &a ); // Randomize A, load the diagonal, make it densely triangular. libblis_test_mobj_randomize( params, TRUE, &a ); libblis_test_mobj_load_diag( params, &a ); bli_mktrim( &a ); // Randomize B and save B. libblis_test_mobj_randomize( params, TRUE, &b ); bli_copym( &b, &b_save ); // Apply the remaining parameters. bli_obj_set_conjtrans( transa, &a ); bli_obj_set_diag( diaga, &a ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copym( &b_save, &b ); time = bli_clock(); libblis_test_trsm_impl( iface, side, &alpha, &a, &b ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 1.0 * mn_side * m * n ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &b ) ) *perf *= 4.0; // Perform checks. libblis_test_trsm_check( params, side, &alpha, &a, &b, &b_save, resid ); // Zero out performance and residual if output matrix is empty. libblis_test_check_empty_problem( &b, perf, resid ); // Free the test objects. bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &b_save ); } void libblis_test_trsm_impl ( iface_t iface, side_t side, obj_t* alpha, obj_t* a, obj_t* b ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: #if 0 bli_printm( "a", a, "%5.2f", "" ); bli_printm( "b", b, "%5.2f", "" ); //bli_printm( "alpha", alpha, "%5.2f", "" ); #endif bli_trsm( side, alpha, a, b ); #if 0 bli_printm( "b after", b, "%5.2f", "" ); #endif break; default: libblis_test_printf_error( "Invalid interface type.\n" ); } } void libblis_test_trsm_check ( test_params_t* params, side_t side, obj_t* alpha, obj_t* a, obj_t* b, obj_t* b_orig, double* resid ) { num_t dt = bli_obj_dt( b ); num_t dt_real = bli_obj_dt_proj_to_real( b ); dim_t m = bli_obj_length( b ); dim_t n = bli_obj_width( b ); obj_t norm; obj_t t, v, w, z; double junk; // // Pre-conditions: // - a is randomized and triangular. // - b_orig is randomized. // Note: // - alpha should have a non-zero imaginary component in the // complex cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // B := alpha * inv(transa(A)) * B_orig (side = left) // B := alpha * B_orig * inv(transa(A)) (side = right) // // is functioning correctly if // // normfv( v - z ) // // is negligible, where // // v = B * t // // z = ( alpha * inv(transa(A)) * B ) * t (side = left) // = alpha * inv(transa(A)) * B * t // = alpha * inv(transa(A)) * w // // z = ( alpha * B * inv(transa(A)) ) * t (side = right) // = alpha * B * tinv(ransa(A)) * t // = alpha * B * w bli_obj_scalar_init_detached( dt_real, &norm ); if ( bli_is_left( side ) ) { bli_obj_create( dt, n, 1, 0, 0, &t ); bli_obj_create( dt, m, 1, 0, 0, &v ); bli_obj_create( dt, m, 1, 0, 0, &w ); bli_obj_create( dt, m, 1, 0, 0, &z ); } else // else if ( bli_is_left( side ) ) { bli_obj_create( dt, n, 1, 0, 0, &t ); bli_obj_create( dt, m, 1, 0, 0, &v ); bli_obj_create( dt, n, 1, 0, 0, &w ); bli_obj_create( dt, m, 1, 0, 0, &z ); } libblis_test_vobj_randomize( params, TRUE, &t ); bli_gemv( &BLIS_ONE, b, &t, &BLIS_ZERO, &v ); if ( bli_is_left( side ) ) { bli_gemv( alpha, b_orig, &t, &BLIS_ZERO, &w ); bli_trsv( &BLIS_ONE, a, &w ); bli_copyv( &w, &z ); } else { bli_copyv( &t, &w ); bli_trsv( &BLIS_ONE, a, &w ); bli_gemv( alpha, b_orig, &w, &BLIS_ZERO, &z ); } bli_subv( &z, &v ); bli_normfv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &t ); bli_obj_free( &v ); bli_obj_free( &w ); bli_obj_free( &z ); } blis-0.6.1/testsuite/src/test_trsm.h000066400000000000000000000034771360743507500175110ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void libblis_test_trsm ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); blis-0.6.1/testsuite/src/test_trsm_ukr.c000066400000000000000000000335641360743507500203650ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "test_libblis.h" // Static variables. static char* op_str = "trsm_ukr"; static char* o_types = "m"; // c static char* p_types = "u"; // uploa static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-13, 1e-14 }, // warn, pass for d { 1e-13, 1e-14 } }; // warn, pass for z // Local prototypes. void libblis_test_trsm_ukr_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); void libblis_test_trsm_ukr_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ); void libblis_test_trsm_ukr_impl ( iface_t iface, side_t side, obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx ); void libblis_test_trsm_ukr_check ( test_params_t* params, side_t side, obj_t* a, obj_t* b, obj_t* b_orig, double* resid ); void libblis_test_trsm_ukr_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { libblis_test_randv( tdata, params, &(op->ops->randv) ); libblis_test_randm( tdata, params, &(op->ops->randm) ); libblis_test_setv( tdata, params, &(op->ops->setv) ); libblis_test_normfv( tdata, params, &(op->ops->normfv) ); libblis_test_subv( tdata, params, &(op->ops->subv) ); libblis_test_scalv( tdata, params, &(op->ops->scalv) ); libblis_test_copym( tdata, params, &(op->ops->copym) ); libblis_test_scalm( tdata, params, &(op->ops->scalm) ); libblis_test_gemv( tdata, params, &(op->ops->gemv) ); libblis_test_trsv( tdata, params, &(op->ops->trsv) ); } void libblis_test_trsm_ukr ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || libblis_test_l3ukr_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_trsm_ukr_deps( tdata, params, op ); // Execute the test driver for each implementation requested. //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( tdata, params, op, BLIS_TEST_SEQ_UKERNEL, op_str, p_types, o_types, thresh, libblis_test_trsm_ukr_experiment ); } } // Import the register blocksizes used by the micro-kernel(s). extern blksz_t* gemm_mr; extern blksz_t* gemm_nr; extern blksz_t* gemm_kr; void libblis_test_trsm_ukr_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; num_t datatype; dim_t m, n; inc_t ldap, ldbp; char sc_a = 'c'; char sc_b = 'r'; side_t side = BLIS_LEFT; uplo_t uploa; obj_t a, b, c; obj_t ap, bp; obj_t c_save; cntx_t* cntx; // Query a context. cntx = bli_gks_query_cntx(); // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); // Fix m and n to MR and NR, respectively. m = bli_cntx_get_blksz_def_dt( datatype, BLIS_MR, cntx ); n = bli_cntx_get_blksz_def_dt( datatype, BLIS_NR, cntx ); // Also query PACKMR and PACKNR as the leading dimensions to ap and bp, // respectively. ldap = bli_cntx_get_blksz_max_dt( datatype, BLIS_MR, cntx ); ldbp = bli_cntx_get_blksz_max_dt( datatype, BLIS_NR, cntx ); // Store the register blocksizes so that the driver can retrieve the // values later when printing results. op->dim_aux[0] = m; op->dim_aux[1] = n; // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_uplo( pc_str[0], &uploa ); // Create test scalars. // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_a, m, m, &a ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_b, m, n, &b ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, n, &c ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, n, &c_save ); // Set the structure, uplo, and diagonal offset properties of A. bli_obj_set_struc( BLIS_TRIANGULAR, &a ); bli_obj_set_uplo( uploa, &a ); bli_obj_set_diag_offset( 0, &a ); // Randomize A, make it densely triangular. libblis_test_mobj_randomize( params, TRUE, &a ); libblis_test_mobj_load_diag( params, &a ); bli_mktrim( &a ); // Randomize B. libblis_test_mobj_randomize( params, TRUE, &b ); // Randomize C and save C. libblis_test_mobj_randomize( params, TRUE, &c ); bli_copym( &c, &c_save ); #if 0 // Create pack objects for a and b, and pack them to ap and bp, // respectively. cntl_t* cntl_a = libblis_test_pobj_create ( BLIS_MR, BLIS_MR, BLIS_INVERT_DIAG, BLIS_PACKED_ROW_PANELS, BLIS_BUFFER_FOR_A_BLOCK, &a, &ap, cntx ); cntl_t* cntl_b = libblis_test_pobj_create ( BLIS_MR, BLIS_NR, BLIS_NO_INVERT_DIAG, BLIS_PACKED_COL_PANELS, BLIS_BUFFER_FOR_B_PANEL, &b, &bp, cntx ); #endif // Create the packed objects. Use packmr and packnr as the leading // dimensions of ap and bp, respectively. Note that we use the ldims // instead of the matrix dimensions for allocation purposes here. // This is a little hacky and was prompted when trying to support // configurations such as power9 that employ duplication/broadcasting // of elements in one of the packed matrix objects. Thankfully, packm // doesn't care about those dimensions and instead relies on // information taken from the source object. Thus, this is merely // about coaxing bli_obj_create() in allocating enough space for our // purposes. bli_obj_create( datatype, ldap, m, 1, ldap, &ap ); bli_obj_create( datatype, m, ldbp, ldbp, 1, &bp ); // Set up the objects for packing. Calling packm_init_pack() does everything // except checkout a memory pool block and save its address to the obj_t's. // However, it does overwrite the buffer field of packed object with that of // the source object (as a side-effect of bli_obj_alias_to(); that buffer // field would normally be overwritten yet again by the address from the // memory pool block). So, we have to save the buffer address that was // allocated so we can re-store it to the object afterward. void* buf_ap = bli_obj_buffer( &ap ); void* buf_bp = bli_obj_buffer( &bp ); bli_packm_init_pack( BLIS_INVERT_DIAG, BLIS_PACKED_ROW_PANELS, BLIS_PACK_FWD_IF_UPPER, BLIS_PACK_FWD_IF_LOWER, BLIS_MR, BLIS_KR, &a, &ap, cntx ); bli_packm_init_pack( BLIS_NO_INVERT_DIAG, BLIS_PACKED_COL_PANELS, BLIS_PACK_FWD_IF_UPPER, BLIS_PACK_FWD_IF_LOWER, BLIS_KR, BLIS_NR, &b, &bp, cntx ); bli_obj_set_buffer( buf_ap, &ap ); bli_obj_set_buffer( buf_bp, &bp ); // Set the diagonal offset of ap. bli_obj_set_diag_offset( 0, &ap ); // Set the uplo field of ap since the default for packed objects is // BLIS_DENSE, and the _ukernel() wrapper needs this information to // know which set of micro-kernels (lower or upper) to choose from. bli_obj_set_uplo( uploa, &ap ); // Pack the data from the source objects. bli_packm_blk_var1( &a, &ap, cntx, NULL, &BLIS_PACKM_SINGLE_THREADED ); bli_packm_blk_var1( &b, &bp, cntx, NULL, &BLIS_PACKM_SINGLE_THREADED ); #if 0 bli_printm( "a", &a, "%5.2f", "" ); bli_printm( "ap", &ap, "%5.2f", "" ); #endif // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { // Re-pack the contents of b to bp. //bli_packm_blk_var1( &b, &bp, cntx, cntl_b, &BLIS_PACKM_SINGLE_THREADED ); bli_packm_blk_var1( &b, &bp, cntx, NULL, &BLIS_PACKM_SINGLE_THREADED ); bli_copym( &c_save, &c ); time = bli_clock(); libblis_test_trsm_ukr_impl( iface, side, &ap, &bp, &c, cntx ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 1.0 * m * m * n ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &b ) ) *perf *= 4.0; // Perform checks. libblis_test_trsm_ukr_check( params, side, &ap, &c, &b, resid ); // Zero out performance and residual if output matrix is empty. //libblis_test_check_empty_problem( &c, perf, resid ); #if 0 // Free the control tree nodes and release their cached mem_t entries // back to the memory broker. bli_cntl_free( NULL, cntl_a, &BLIS_PACKM_SINGLE_THREADED ); bli_cntl_free( NULL, cntl_b, &BLIS_PACKM_SINGLE_THREADED ); #endif // Free the packed objects. bli_obj_free( &ap ); bli_obj_free( &bp ); // Free the test objects. bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); } void libblis_test_trsm_ukr_impl ( iface_t iface, side_t side, obj_t* a, obj_t* b, obj_t* c, cntx_t* cntx ) { switch ( iface ) { case BLIS_TEST_SEQ_UKERNEL: bli_trsm_ukernel( a, b, c, cntx ); break; default: libblis_test_printf_error( "Invalid interface type.\n" ); } } void libblis_test_trsm_ukr_check ( test_params_t* params, side_t side, obj_t* a, obj_t* b, obj_t* b_orig, double* resid ) { num_t dt = bli_obj_dt( b ); num_t dt_real = bli_obj_dt_proj_to_real( b ); dim_t m = bli_obj_length( b ); dim_t n = bli_obj_width( b ); obj_t norm; obj_t t, v, w, z; double junk; // // Pre-conditions: // - a is randomized and triangular. // - b_orig is randomized. // // Under these conditions, we assume that the implementation for // // B := inv(transa(A)) * B_orig (side = left) // B := B_orig * inv(transa(A)) (side = right) // // is functioning correctly if // // normfv( v - z ) // // is negligible, where // // v = B * t // // z = ( inv(transa(A)) * B ) * t (side = left) // = inv(transa(A)) * B * t // = inv(transa(A)) * w // // z = ( B * inv(transa(A)) ) * t (side = right) // = B * tinv(ransa(A)) * t // = B * w bli_obj_scalar_init_detached( dt_real, &norm ); if ( bli_is_left( side ) ) { bli_obj_create( dt, n, 1, 0, 0, &t ); bli_obj_create( dt, m, 1, 0, 0, &v ); bli_obj_create( dt, m, 1, 0, 0, &w ); bli_obj_create( dt, m, 1, 0, 0, &z ); } else // else if ( bli_is_left( side ) ) { bli_obj_create( dt, n, 1, 0, 0, &t ); bli_obj_create( dt, m, 1, 0, 0, &v ); bli_obj_create( dt, n, 1, 0, 0, &w ); bli_obj_create( dt, m, 1, 0, 0, &z ); } libblis_test_vobj_randomize( params, TRUE, &t ); bli_gemv( &BLIS_ONE, b, &t, &BLIS_ZERO, &v ); #if 0 bli_printm( "a11", a, "%5.2f", "" ); #endif // Restore the diagonal of a11 to its original, un-inverted state // (needed for trsv). bli_invertd( a ); if ( bli_is_left( side ) ) { bli_gemv( &BLIS_ONE, b_orig, &t, &BLIS_ZERO, &w ); bli_trsv( &BLIS_ONE, a, &w ); bli_copyv( &w, &z ); } else { bli_copyv( &t, &w ); bli_trsv( &BLIS_ONE, a, &w ); bli_gemv( &BLIS_ONE, b_orig, &w, &BLIS_ZERO, &z ); } bli_subv( &z, &v ); bli_normfv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &t ); bli_obj_free( &v ); bli_obj_free( &w ); bli_obj_free( &z ); } blis-0.6.1/testsuite/src/test_trsm_ukr.h000066400000000000000000000035031360743507500203600ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void libblis_test_trsm_ukr ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); blis-0.6.1/testsuite/src/test_trsv.c000066400000000000000000000224021360743507500175020ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "test_libblis.h" // Static variables. static char* op_str = "trsv"; static char* o_types = "mv"; // a x static char* p_types = "uhd"; // uploa transa diaga static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-13, 1e-14 }, // warn, pass for d { 1e-13, 1e-14 } }; // warn, pass for z // Local prototypes. void libblis_test_trsv_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); void libblis_test_trsv_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ); void libblis_test_trsv_impl ( iface_t iface, obj_t* alpha, obj_t* a, obj_t* x ); void libblis_test_trsv_check ( test_params_t* params, obj_t* alpha, obj_t* a, obj_t* x, obj_t* x_orig, double* resid ); void libblis_test_trsv_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { libblis_test_randv( tdata, params, &(op->ops->randv) ); libblis_test_randm( tdata, params, &(op->ops->randm) ); libblis_test_normfv( tdata, params, &(op->ops->normfv) ); libblis_test_subv( tdata, params, &(op->ops->subv) ); libblis_test_copyv( tdata, params, &(op->ops->copyv) ); libblis_test_scalv( tdata, params, &(op->ops->scalv) ); libblis_test_gemv( tdata, params, &(op->ops->gemv) ); } void libblis_test_trsv ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || libblis_test_l2_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_trsv_deps( tdata, params, op ); // Execute the test driver for each implementation requested. //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( tdata, params, op, BLIS_TEST_SEQ_FRONT_END, op_str, p_types, o_types, thresh, libblis_test_trsv_experiment ); } } void libblis_test_trsv_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; num_t datatype; dim_t m; uplo_t uploa; trans_t transa; diag_t diaga; obj_t alpha, a, x; obj_t x_save; // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); // Map the dimension specifier to an actual dimension. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_uplo( pc_str[0], &uploa ); bli_param_map_char_to_blis_trans( pc_str[1], &transa ); bli_param_map_char_to_blis_diag( pc_str[2], &diaga ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &alpha ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, m, &a ); libblis_test_vobj_create( params, datatype, sc_str[1], m, &x ); libblis_test_vobj_create( params, datatype, sc_str[1], m, &x_save ); // Set alpha. if ( bli_obj_is_real( &x ) ) bli_setsc( 2.0, 0.0, &alpha ); else bli_setsc( 2.0, -1.0, &alpha ); // Set the structure and uplo properties of A. bli_obj_set_struc( BLIS_TRIANGULAR, &a ); bli_obj_set_uplo( uploa, &a ); // Randomize A, load the diagonal, make it densely triangular. libblis_test_mobj_randomize( params, TRUE, &a ); libblis_test_mobj_load_diag( params, &a ); bli_mktrim( &a ); // Randomize x and save. libblis_test_vobj_randomize( params, TRUE, &x ); bli_copyv( &x, &x_save ); // Apply the remaining parameters. bli_obj_set_conjtrans( transa, &a ); bli_obj_set_diag( diaga, &a ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copym( &x_save, &x ); time = bli_clock(); libblis_test_trsv_impl( iface, &alpha, &a, &x ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 1.0 * m * m ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &x ) ) *perf *= 4.0; // Perform checks. libblis_test_trsv_check( params, &alpha, &a, &x, &x_save, resid ); // Zero out performance and residual if output vector is empty. libblis_test_check_empty_problem( &x, perf, resid ); // Free the test objects. bli_obj_free( &a ); bli_obj_free( &x ); bli_obj_free( &x_save ); } void libblis_test_trsv_impl ( iface_t iface, obj_t* alpha, obj_t* a, obj_t* x ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: bli_trsv( alpha, a, x ); break; default: libblis_test_printf_error( "Invalid interface type.\n" ); } } void libblis_test_trsv_check ( test_params_t* params, obj_t* alpha, obj_t* a, obj_t* x, obj_t* x_orig, double* resid ) { num_t dt = bli_obj_dt( x ); num_t dt_real = bli_obj_dt_proj_to_real( x ); dim_t m = bli_obj_vector_dim( x ); uplo_t uploa = bli_obj_uplo( a ); trans_t transa = bli_obj_conjtrans_status( a ); obj_t alpha_inv; obj_t a_local, y; obj_t norm; double junk; // // Pre-conditions: // - a is randomized and triangular. // - x is randomized. // Note: // - alpha should have a non-zero imaginary component in the // complex cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // x := alpha * inv(transa(A)) * x_orig // // is functioning correctly if // // normfv( y - x_orig ) // // is negligible, where // // y = inv(alpha) * transa(A_dense) * x // bli_obj_scalar_init_detached( dt, &alpha_inv ); bli_obj_scalar_init_detached( dt_real, &norm ); bli_copysc( &BLIS_ONE, &alpha_inv ); bli_divsc( alpha, &alpha_inv ); bli_obj_create( dt, m, 1, 0, 0, &y ); bli_obj_create( dt, m, m, 0, 0, &a_local ); bli_obj_set_struc( BLIS_TRIANGULAR, &a_local ); bli_obj_set_uplo( uploa, &a_local ); bli_obj_toggle_uplo_if_trans( transa, &a_local ); bli_copym( a, &a_local ); bli_mktrim( &a_local ); bli_obj_set_struc( BLIS_GENERAL, &a_local ); bli_obj_set_uplo( BLIS_DENSE, &a_local ); bli_gemv( &alpha_inv, &a_local, x, &BLIS_ZERO, &y ); bli_subv( x_orig, &y ); bli_normfv( &y, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &y ); bli_obj_free( &a_local ); } blis-0.6.1/testsuite/src/test_trsv.h000066400000000000000000000034771360743507500175220ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void libblis_test_trsv ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); blis-0.6.1/testsuite/src/test_xpbym.c000066400000000000000000000206001360743507500176410ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2018, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "test_libblis.h" // Static variables. static char* op_str = "xpbym"; static char* o_types = "mm"; // x y static char* p_types = "h"; // transx static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-13, 1e-14 }, // warn, pass for d { 1e-13, 1e-14 } }; // warn, pass for z // Local prototypes. void libblis_test_xpbym_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); void libblis_test_xpbym_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ); void libblis_test_xpbym_impl ( iface_t iface, obj_t* x, obj_t* beta, obj_t* y ); void libblis_test_xpbym_check ( test_params_t* params, obj_t* x, obj_t* beta, obj_t* y, obj_t* y_save, double* resid ); void libblis_test_xpbym_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { libblis_test_randm( tdata, params, &(op->ops->randm) ); libblis_test_normfm( tdata, params, &(op->ops->normfm) ); libblis_test_addm( tdata, params, &(op->ops->addm) ); libblis_test_subm( tdata, params, &(op->ops->subm) ); libblis_test_copym( tdata, params, &(op->ops->copym) ); libblis_test_scalm( tdata, params, &(op->ops->scalm) ); } void libblis_test_xpbym ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || libblis_test_l1m_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_xpbym_deps( tdata, params, op ); // Execute the test driver for each implementation requested. //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( tdata, params, op, BLIS_TEST_SEQ_FRONT_END, op_str, p_types, o_types, thresh, libblis_test_xpbym_experiment ); } } void libblis_test_xpbym_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; num_t datatype; dim_t m, n; trans_t transx; obj_t x, beta, y; obj_t y_save; // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); // Map the dimension specifier to actual dimensions. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); n = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_trans( pc_str[0], &transx ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, transx, sc_str[0], m, n, &x ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, n, &y ); libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, n, &y_save ); // Set beta. if ( bli_obj_is_real( &y ) ) bli_setsc( -2.0, 0.0, &beta ); else bli_setsc( 0.0, -2.0, &beta ); // Randomize and save y. libblis_test_mobj_randomize( params, FALSE, &x ); libblis_test_mobj_randomize( params, FALSE, &y ); bli_copym( &y, &y_save ); // Apply the parameters. bli_obj_set_conjtrans( transx, &x ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copym( &y_save, &y ); time = bli_clock(); libblis_test_xpbym_impl( iface, &x, &beta, &y ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 2.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &y ) ) *perf *= 4.0; // Perform checks. libblis_test_xpbym_check( params, &x, &beta, &y, &y_save, resid ); // Zero out performance and residual if output matrix is empty. libblis_test_check_empty_problem( &y, perf, resid ); // Free the test objects. bli_obj_free( &x ); bli_obj_free( &y ); bli_obj_free( &y_save ); } void libblis_test_xpbym_impl ( iface_t iface, obj_t* x, obj_t* beta, obj_t* y ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: bli_xpbym( x, beta, y ); break; default: libblis_test_printf_error( "Invalid interface type.\n" ); } } void libblis_test_xpbym_check ( test_params_t* params, obj_t* x, obj_t* beta, obj_t* y, obj_t* y_orig, double* resid ) { num_t dt = bli_obj_dt( y ); num_t dt_real = bli_obj_dt_proj_to_real( y ); dim_t m = bli_obj_length( y ); dim_t n = bli_obj_width( y ); obj_t x_temp, y_temp; obj_t norm; double junk; // // Pre-conditions: // - x is randomized. // - y_orig is randomized. // Note: // - alpha should have a non-zero imaginary component in the complex // cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // y := beta * y_orig + conjx(x) // // is functioning correctly if // // normfm( y - ( beta * y_orig + conjx(x) ) ) // // is negligible. // bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m, n, 0, 0, &x_temp ); bli_obj_create( dt, m, n, 0, 0, &y_temp ); bli_copym( x, &x_temp ); bli_copym( y_orig, &y_temp ); bli_scalm( beta, &y_temp ); bli_addm( &x_temp, &y_temp ); bli_subm( &y_temp, y ); bli_normfm( y, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &x_temp ); bli_obj_free( &y_temp ); } blis-0.6.1/testsuite/src/test_xpbym.h000066400000000000000000000034051360743507500176520ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2018, The University of Texas at Austin Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void libblis_test_xpbym ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); blis-0.6.1/testsuite/src/test_xpbyv.c000066400000000000000000000201571360743507500176610ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "blis.h" #include "test_libblis.h" // Static variables. static char* op_str = "xpbyv"; static char* o_types = "vv"; // x y static char* p_types = "c"; // conjx static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-13, 1e-14 }, // warn, pass for d { 1e-13, 1e-14 } }; // warn, pass for z // Local prototypes. void libblis_test_xpbyv_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); void libblis_test_xpbyv_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ); void libblis_test_xpbyv_impl ( iface_t iface, obj_t* x, obj_t* beta, obj_t* y ); void libblis_test_xpbyv_check ( test_params_t* params, obj_t* x, obj_t* beta, obj_t* y, obj_t* y_orig, double* resid ); void libblis_test_xpbyv_deps ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { libblis_test_randv( tdata, params, &(op->ops->randv) ); libblis_test_normfv( tdata, params, &(op->ops->normfv) ); libblis_test_addv( tdata, params, &(op->ops->addv) ); libblis_test_subv( tdata, params, &(op->ops->subv) ); libblis_test_copyv( tdata, params, &(op->ops->copyv) ); } void libblis_test_xpbyv ( thread_data_t* tdata, test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || libblis_test_l1v_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_xpbyv_deps( tdata, params, op ); // Execute the test driver for each implementation requested. //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( tdata, params, op, BLIS_TEST_SEQ_FRONT_END, op_str, p_types, o_types, thresh, libblis_test_xpbyv_experiment ); } } void libblis_test_xpbyv_experiment ( test_params_t* params, test_op_t* op, iface_t iface, char* dc_str, char* pc_str, char* sc_str, unsigned int p_cur, double* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; unsigned int i; double time_min = DBL_MAX; double time; num_t datatype; dim_t m; conj_t conjx; obj_t beta, x, y; obj_t y_save; // Use the datatype of the first char in the datatype combination string. bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); // Map the dimension specifier to an actual dimension. m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); // Map parameter characters to BLIS constants. bli_param_map_char_to_blis_conj( pc_str[0], &conjx ); // Create test scalars. bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); libblis_test_vobj_create( params, datatype, sc_str[1], m, &y ); libblis_test_vobj_create( params, datatype, sc_str[1], m, &y_save ); // Set beta. if ( bli_obj_is_real( &y ) ) bli_setsc( -2.0, 0.0, &beta ); else bli_setsc( 0.0, -2.0, &beta ); // Randomize x and y, and save y. libblis_test_vobj_randomize( params, FALSE, &x ); libblis_test_vobj_randomize( params, FALSE, &y ); bli_copyv( &y, &y_save ); // Apply the parameters. bli_obj_set_conj( conjx, &x ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { bli_copyv( &y_save, &y ); time = bli_clock(); libblis_test_xpbyv_impl( iface, &x, &beta, &y ); time_min = bli_clock_min_diff( time_min, time ); } // Estimate the performance of the best experiment repeat. *perf = ( 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; if ( bli_obj_is_complex( &y ) ) *perf *= 4.0; // Perform checks. libblis_test_xpbyv_check( params, &x, &beta, &y, &y_save, resid ); // Zero out performance and residual if output vector is empty. libblis_test_check_empty_problem( &y, perf, resid ); // Free the test objects. bli_obj_free( &x ); bli_obj_free( &y ); bli_obj_free( &y_save ); } void libblis_test_xpbyv_impl ( iface_t iface, obj_t* x, obj_t* beta, obj_t* y ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: bli_xpbyv( x, beta, y ); break; default: libblis_test_printf_error( "Invalid interface type.\n" ); } } void libblis_test_xpbyv_check ( test_params_t* params, obj_t* x, obj_t* beta, obj_t* y, obj_t* y_orig, double* resid ) { num_t dt = bli_obj_dt( y ); num_t dt_real = bli_obj_dt_proj_to_real( y ); dim_t m = bli_obj_vector_dim( y ); obj_t x_temp, y_temp; obj_t norm; double junk; // // Pre-conditions: // - x is randomized. // - y_orig is randomized. // Note: // - beta should have a non-zero imaginary component in the complex // cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // // y := beta * y_orig + conjx(x) // // is functioning correctly if // // normfv( y - ( beta * y_orig + conjx(x) ) ) // // is negligible. // bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m, 1, 0, 0, &x_temp ); bli_obj_create( dt, m, 1, 0, 0, &y_temp ); bli_copyv( x, &x_temp ); bli_copyv( y_orig, &y_temp ); bli_scalv( beta, &y_temp ); bli_addv( &x_temp, &y_temp ); bli_subv( &y_temp, y ); bli_normfv( y, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &x_temp ); bli_obj_free( &y_temp ); } blis-0.6.1/testsuite/src/test_xpbyv.h000066400000000000000000000035001360743507500176570ustar00rootroot00000000000000/* BLIS An object-based framework for developing high-performance BLAS-like libraries. Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - Neither the name(s) of the copyright holder(s) nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ void libblis_test_xpbyv ( thread_data_t* tdata, test_params_t* params, test_op_t* op ); blis-0.6.1/travis/000077500000000000000000000000001360743507500137715ustar00rootroot00000000000000blis-0.6.1/travis/cpuid/000077500000000000000000000000001360743507500150755ustar00rootroot00000000000000blis-0.6.1/travis/cpuid/excavator.def000066400000000000000000000103061360743507500175510ustar00rootroot00000000000000# # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2018, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # CPU: AMD A12-8870, 4000 MHz # 00000000 ******** => 0000000D 68747541 444D4163 69746E65 00000001 ******** => 00660F51 00040800 7ED8320B 178BFBFF 00000002 ******** => 00000000 00000000 00000000 00000000 00000003 ******** => 00000000 00000000 00000000 00000000 00000005 ******** => 00000040 00000040 00000003 00000000 00000006 ******** => 00000004 00000000 00000001 00000000 00000007 ******** => 00000000 000001A9 00000000 00000000 00000008 ******** => 00000000 00000000 00000000 00000000 00000009 ******** => 00000000 00000000 00000000 00000000 0000000A ******** => 00000000 00000000 00000000 00000000 0000000C ******** => 00000000 00000000 00000000 00000000 0000000D 00000000 => 00000007 00000340 000003C0 40000000 0000000D 00000001 => 00000001 00000000 00000000 00000000 0000000D 00000002 => 00000100 00000240 00000000 00000000 0000000D 0000003E => 00000080 00000340 00000000 00000000 80000000 ******** => 8000001E 68747541 444D4163 69746E65 80000001 ******** => 00660F51 20000000 2FABBFFF 2FD3FBFF 80000002 ******** => 20444D41 204F5250 2D323141 30373838 80000003 ******** => 2C375220 20323120 504D4F43 20455455 80000004 ******** => 45524F43 43342053 2047382B 00202020 80000005 ******** => FF40FF18 FF40FF30 20080140 60030140 80000006 ******** => 64006400 64004200 04008140 00000000 80000007 ******** => 00000000 00000005 00000400 000037D9 80000008 ******** => 00003030 00000000 00004003 00000000 80000009 ******** => 00000000 00000000 00000000 00000000 8000000A ******** => 00000001 00008000 00000000 0001BCFF 8000000B ******** => 00000000 00000000 00000000 00000000 8000000C ******** => 00000000 00000000 00000000 00000000 8000000D ******** => 00000000 00000000 00000000 00000000 8000000E ******** => 00000000 00000000 00000000 00000000 8000000F ******** => 00000000 00000000 00000000 00000000 80000010 ******** => 00000000 00000000 00000000 00000000 80000011 ******** => 00000000 00000000 00000000 00000000 80000012 ******** => 00000000 00000000 00000000 00000000 80000013 ******** => 00000000 00000000 00000000 00000000 80000014 ******** => 00000000 00000000 00000000 00000000 80000015 ******** => 00000000 00000000 00000000 00000000 80000016 ******** => 00000000 00000000 00000000 00000000 80000017 ******** => 00000000 00000000 00000000 00000000 80000018 ******** => 00000000 00000000 00000000 00000000 80000019 ******** => F040F018 64006400 00000000 00000000 8000001A ******** => 00000003 00000000 00000000 00000000 8000001B ******** => 000007FF 00000000 00000000 00000000 8000001C ******** => 00000000 80032013 00010200 E000000F 8000001E ******** => 00000010 00000100 00000000 00000000 blis-0.6.1/travis/cpuid/haswell.def000066400000000000000000000067521360743507500172260ustar00rootroot00000000000000# # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2018, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # CPU: Intel Xeon E5-2660 v3, 2600 MHz # 00000000 ******** => 0000000F 756E6547 6C65746E 49656E69 00000001 ******** => 000306F2 00200800 7FFEFBFF BFEBFBFF 00000002 ******** => 76036301 00F0B5FF 00000000 00C10000 00000003 ******** => 00000000 00000000 00000000 00000000 00000004 00000000 => 3C004121 01C0003F 0000003F 00000000 00000004 00000001 => 3C004122 01C0003F 0000003F 00000000 00000004 00000002 => 3C004143 01C0003F 000001FF 00000000 00000004 00000003 => 3C07C163 04C0003F 00004FFF 00000006 00000005 ******** => 00000040 00000040 00000003 00002120 00000006 ******** => 00000075 00000002 00000009 00000000 00000007 ******** => 00000000 000037AB 00000000 00000000 00000008 ******** => 00000000 00000000 00000000 00000000 00000009 ******** => 00000001 00000000 00000000 00000000 0000000A ******** => 07300403 00000000 00000000 00000603 0000000B 00000000 => 00000001 00000002 00000100 00000000 0000000B 00000001 => 00000005 00000014 00000201 00000000 0000000C ******** => 00000000 00000000 00000000 00000000 0000000D 00000000 => 00000007 00000340 00000340 00000000 0000000D 00000001 => 00000001 00000000 00000000 00000000 0000000D 00000002 => 00000100 00000240 00000000 00000000 0000000E ******** => 00000000 00000000 00000000 00000000 0000000F 00000000 => 00000000 00000027 00000000 00000002 0000000F 00000001 => 00000000 0000A000 00000027 00000001 80000000 ******** => 80000008 00000000 00000000 00000000 80000001 ******** => 00000000 00000000 00000021 2C100000 80000002 ******** => 65746E49 2952286C 6F655820 2952286E 80000003 ******** => 55504320 2D354520 30363632 20337620 80000004 ******** => 2E322040 48473036 0000007A 00000000 80000005 ******** => 00000000 00000000 00000000 00000000 80000006 ******** => 00000000 00000000 01006040 00000000 80000007 ******** => 00000000 00000000 00000000 00000100 80000008 ******** => 0000302E 00000000 00000000 00000000 blis-0.6.1/travis/cpuid/penryn.def000066400000000000000000000054001360743507500170670ustar00rootroot00000000000000# # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2018, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # CPU: Intel Xeon X5550, 2666 MHz # 00000000 ******** => 0000000B 756E6547 6C65746E 49656E69 00000001 ******** => 000106A2 00100800 00BCE3BD BFEBFBFF 00000002 ******** => 55035A01 00F0B2E4 00000000 09CA212C 00000003 ******** => 00000000 00000000 00000000 00000000 00000005 ******** => 00000040 00000040 00000003 00021120 00000006 ******** => 00000003 00000002 00000001 00000000 00000007 ******** => 00000000 00000000 00000000 00000000 00000008 ******** => 00000000 00000000 00000000 00000000 00000009 ******** => 00000000 00000000 00000000 00000000 0000000A ******** => 07300403 00000000 00000000 00000603 80000000 ******** => 80000008 00000000 00000000 00000000 80000001 ******** => 00000000 00000000 00000001 28100000 80000002 ******** => 756E6547 20656E69 65746E49 2952286C 80000003 ******** => 55504320 20202020 20202020 40202020 80000004 ******** => 30303020 20402030 37362E32 007A4847 80000005 ******** => 00000000 00000000 00000000 00000000 80000006 ******** => 00000000 00000000 01006040 00000000 80000007 ******** => 00000000 00000000 00000000 00000100 80000008 ******** => 00003028 00000000 00000000 00000000 blis-0.6.1/travis/cpuid/piledriver.def000066400000000000000000000106531360743507500177270ustar00rootroot00000000000000# # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2018, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # CPU: AMD A10-6800K, 4300 MHz # 00000000 ******** => 0000000D 68747541 444D4163 69746E65 00000001 ******** => 00610F31 00040800 3E98320B 178BFBFF 00000002 ******** => 00000000 00000000 00000000 00000000 00000003 ******** => 00000000 00000000 00000000 00000000 00000005 ******** => 00000040 00000040 00000003 00000000 00000006 ******** => 00000000 00000000 00000001 00000000 00000007 ******** => 00000000 00000008 00000000 00000000 00000008 ******** => 00000000 00000000 00000000 00000000 00000009 ******** => 00000000 00000000 00000000 00000000 0000000A ******** => 00000000 00000000 00000000 00000000 0000000C ******** => 00000000 00000000 00000000 00000000 0000000D 00000000 => 00000007 00000340 000003C0 40000000 0000000D 00000001 => 00000000 00000000 00000000 00000000 0000000D 00000002 => 00000100 00000240 00000000 00000000 0000000D 0000003E => 00000080 00000340 00000000 00000000 80000000 ******** => 8000001E 68747541 444D4163 69746E65 80000001 ******** => 00610F31 20000000 01EBBFFF 2FD3FBFF 80000002 ******** => 20444D41 2D303141 30303836 5041204B 80000003 ******** => 69772055 52206874 6F656461 6D74286E 80000004 ******** => 44482029 61724720 63696870 00202073 80000005 ******** => FF40FF18 FF40FF30 10040140 40020140 80000006 ******** => 64006400 64004200 08008140 00000000 80000007 ******** => 00000000 00000000 00000000 000007D9 80000008 ******** => 00003030 00000000 00004003 00000000 80000009 ******** => 00000000 00000000 00000000 00000000 8000000A ******** => 00000001 00010000 00000000 00001CFF 8000000B ******** => 00000000 00000000 00000000 00000000 8000000C ******** => 00000000 00000000 00000000 00000000 8000000D ******** => 00000000 00000000 00000000 00000000 8000000E ******** => 00000000 00000000 00000000 00000000 8000000F ******** => 00000000 00000000 00000000 00000000 80000010 ******** => 00000000 00000000 00000000 00000000 80000011 ******** => 00000000 00000000 00000000 00000000 80000012 ******** => 00000000 00000000 00000000 00000000 80000013 ******** => 00000000 00000000 00000000 00000000 80000014 ******** => 00000000 00000000 00000000 00000000 80000015 ******** => 00000000 00000000 00000000 00000000 80000016 ******** => 00000000 00000000 00000000 00000000 80000017 ******** => 00000000 00000000 00000000 00000000 80000018 ******** => 00000000 00000000 00000000 00000000 80000019 ******** => F040F018 64006400 00000000 00000000 8000001A ******** => 00000003 00000000 00000000 00000000 8000001B ******** => 000000FF 00000000 00000000 00000000 8000001C ******** => 00000000 80032013 00010200 8000000F 8000001D 00000001 => 00000121 00C0003F 0000003F 00000000 8000001D 00000002 => 00004122 0040003F 000001FF 00000000 8000001D 00000003 => 00004143 03C0003F 000007FF 00000001 8000001E ******** => 00000010 00000100 00000000 00000000 8FFFFFFF ******** => 00000000 00000000 00000000 00000000 blis-0.6.1/travis/cpuid/sandybridge.def000066400000000000000000000056601360743507500200570ustar00rootroot00000000000000# # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2018, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # CPU: Intel Xeon E3-1230 v2, 3700 MHz # 00000000 ******** => 0000000D 756E6547 6C65746E 49656E69 00000001 ******** => 000306A9 00100800 7FBAE3FF BFEBFBFF 00000002 ******** => 76035A01 00F0B2FF 00000000 00CA0000 00000003 ******** => 00000000 00000000 00000000 00000000 00000005 ******** => 00000040 00000040 00000003 00001120 00000006 ******** => 00000077 00000002 00000009 00000000 00000007 ******** => 00000000 00000281 00000000 00000000 00000008 ******** => 00000000 00000000 00000000 00000000 00000009 ******** => 00000000 00000000 00000000 00000000 0000000A ******** => 07300403 00000000 00000000 00000603 0000000C ******** => 00000000 00000000 00000000 00000000 0000000D 00000000 => 00000007 00000240 00000340 00000000 0000000D 00000001 => 00000100 00000240 00000000 00000000 80000000 ******** => 80000008 00000000 00000000 00000000 80000001 ******** => 00000000 00000000 00000001 28100000 80000002 ******** => 20202020 6E492020 286C6574 58202952 80000003 ******** => 286E6F65 43202952 45205550 32312D33 80000004 ******** => 56203033 20402032 30332E33 007A4847 80000005 ******** => 00000000 00000000 00000000 00000000 80000006 ******** => 00000000 00000000 01006040 00000000 80000007 ******** => 00000000 00000000 00000000 00000100 80000008 ******** => 00003024 00000000 00000000 00000000 blis-0.6.1/travis/cpuid/skx.def000066400000000000000000000106661360743507500163730ustar00rootroot00000000000000# # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2018, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # CPU: Intel Xeon Platinum 8180, 2500 MHz # 00000000 ******** => 00000016 756E6547 6C65746E 49656E69 00000001 ******** => 00050654 00400800 7FFEFBFF BFEBFBFF 00000002 ******** => 76036301 00F0B5FF 00000000 00C30000 00000003 ******** => 00000000 00000000 00000000 00000000 00000004 00000000 => 7C004121 01C0003F 0000003F 00000000 00000004 00000001 => 7C004122 01C0003F 0000003F 00000000 00000004 00000002 => 7C004143 03C0003F 000003FF 00000000 00000004 00000003 => 7C0FC163 0280003F 0000DFFF 00000004 00000005 ******** => 00000040 00000040 00000003 00002020 00000006 ******** => 00000EF7 00000002 00000009 00000000 00000007 ******** => 00000000 D39FFFFB 00000008 00000000 00000008 ******** => 00000000 00000000 00000000 00000000 00000009 ******** => 00000000 00000000 00000000 00000000 0000000A ******** => 07300404 00000000 00000000 00000603 0000000B 00000000 => 00000001 00000002 00000100 00000000 0000000B 00000001 => 00000006 00000038 00000201 00000000 0000000C ******** => 00000000 00000000 00000000 00000000 0000000D 00000000 => 000002FF 00000A80 00000A88 00000000 0000000D 00000001 => 0000000F 00000A00 00000100 00000000 0000000D 00000002 => 00000100 00000240 00000000 00000000 0000000D 00000003 => 00000040 000003C0 00000000 00000000 0000000D 00000004 => 00000040 00000400 00000000 00000000 0000000D 00000005 => 00000040 00000440 00000000 00000000 0000000D 00000006 => 00000200 00000480 00000000 00000000 0000000D 00000007 => 00000400 00000680 00000000 00000000 0000000D 00000008 => 00000080 00000000 00000001 00000000 0000000D 00000009 => 00000008 00000A80 00000000 00000000 0000000E ******** => 00000000 00000000 00000000 00000000 0000000F 00000000 => 00000000 000000DF 00000000 00000002 0000000F 00000001 => 00000000 0001C000 000000DF 00000007 00000010 00000000 => 00000000 0000000A 00000000 00000000 00000010 00000001 => 0000000A 00000600 00000004 0000000F 00000011 ******** => 00000000 00000000 00000000 00000000 00000012 00000000 => 00000000 00000000 00000000 00000000 00000012 00000001 => 00000000 00000000 00000000 00000000 00000013 ******** => 00000000 00000000 00000000 00000000 00000014 00000000 => 00000001 0000000F 00000007 00000000 00000014 00000001 => 02490002 003F3FFF 00000000 00000000 00000015 ******** => 00000002 000000C8 00000000 00000000 00000016 ******** => 000009C4 00000ED8 00000064 00000000 80000000 ******** => 80000008 00000000 00000000 00000000 80000001 ******** => 00000000 00000000 00000121 2C100000 80000002 ******** => 65746E49 2952286C 6F655820 2952286E 80000003 ******** => 616C5020 756E6974 3138206D 43203038 80000004 ******** => 40205550 352E3220 7A484730 00000000 80000005 ******** => 00000000 00000000 00000000 00000000 80000006 ******** => 00000000 00000000 01006040 00000000 80000007 ******** => 00000000 00000000 00000000 00000100 80000008 ******** => 0000302E 00000000 00000000 00000000 blis-0.6.1/travis/cpuid/skx1.def000066400000000000000000000106521360743507500164470ustar00rootroot00000000000000# # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2018, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # CPU: Intel Xeon Bronze 3106 # 00000000 ******** => 00000016 756E6547 6C65746E 49656E69 00000001 ******** => 00050654 00100800 7FFEFBFF BFEBFBFF 00000002 ******** => 76036301 00F0B6FF 00000000 00C30000 00000003 ******** => 00000000 00000000 00000000 00000000 00000004 00000000 => 1C004121 01C0003F 0000003F 00000000 00000004 00000001 => 1C004122 01C0003F 0000003F 00000000 00000004 00000002 => 1C004143 03C0003F 000003FF 00000000 00000004 00000003 => 1C03C163 0280003F 00002FFF 00000004 00000005 ******** => 00000040 00000040 00000003 00002020 00000006 ******** => 00000EF5 00000002 00000009 00000000 00000007 ******** => 00000000 D39FFFFB 00000008 00000000 00000008 ******** => 00000000 00000000 00000000 00000000 00000009 ******** => 00000000 00000000 00000000 00000000 0000000A ******** => 07300804 00000000 00000000 00000603 0000000B 00000000 => 00000001 00000001 00000100 00000000 0000000B 00000001 => 00000004 00000006 00000201 00000000 0000000C ******** => 00000000 00000000 00000000 00000000 0000000D 00000000 => 000002FF 00000340 00000A88 00000000 0000000D 00000001 => 0000000F 00000340 00000100 00000000 0000000D 00000002 => 00000100 00000240 00000000 00000000 0000000D 00000003 => 00000040 000003C0 00000000 00000000 0000000D 00000004 => 00000040 00000400 00000000 00000000 0000000D 00000005 => 00000040 00000440 00000000 00000000 0000000D 00000006 => 00000200 00000480 00000000 00000000 0000000D 00000007 => 00000400 00000680 00000000 00000000 0000000D 00000008 => 00000080 00000000 00000001 00000000 0000000D 00000009 => 00000008 00000A80 00000000 00000000 0000000E ******** => 00000000 00000000 00000000 00000000 0000000F 00000000 => 00000000 0000002F 00000000 00000002 0000000F 00000001 => 00000000 00006000 0000002F 00000007 00000010 00000000 => 00000000 0000000A 00000000 00000000 00000010 00000001 => 0000000A 00000600 00000004 0000000F 00000011 ******** => 00000000 00000000 00000000 00000000 00000012 00000000 => 00000000 00000000 00000000 00000000 00000012 00000001 => 00000000 00000000 00000000 00000000 00000013 ******** => 00000000 00000000 00000000 00000000 00000014 00000000 => 00000001 0000000F 00000007 00000000 00000014 00000001 => 02490002 003F3FFF 00000000 00000000 00000015 ******** => 00000002 00000088 00000000 00000000 00000016 ******** => 000006A4 000006A4 00000064 00000000 80000000 ******** => 80000008 00000000 00000000 00000000 80000001 ******** => 00000000 00000000 00000121 2C100000 80000002 ******** => 65746E49 2952286C 6F655820 2952286E 80000003 ******** => 6F724220 20657A6E 34303133 55504320 80000004 ******** => 31204020 4730372E 00007A48 00000000 80000005 ******** => 00000000 00000000 00000000 00000000 80000006 ******** => 00000000 00000000 01006040 00000000 80000007 ******** => 00000000 00000000 00000000 00000100 80000008 ******** => 0000302E 00000000 00000000 00000000 blis-0.6.1/travis/cpuid/steamroller.def000066400000000000000000000104711360743507500201110ustar00rootroot00000000000000# # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2018, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # CPU: AMD A10-7850K, 4000 MHz # 00000000 ******** => 0000000D 68747541 444D4163 69746E65 00000001 ******** => 00630F01 00040800 3E98320B 178BFBFF 00000002 ******** => 00000000 00000000 00000000 00000000 00000003 ******** => 00000000 00000000 00000000 00000000 00000005 ******** => 00000040 00000040 00000003 00000000 00000006 ******** => 00000000 00000000 00000001 00000000 00000007 ******** => 00000000 00000009 00000000 00000000 00000008 ******** => 00000000 00000000 00000000 00000000 00000009 ******** => 00000000 00000000 00000000 00000000 0000000A ******** => 00000000 00000000 00000000 00000000 0000000C ******** => 00000000 00000000 00000000 00000000 0000000D 00000000 => 00000007 00000340 000003C0 40000000 0000000D 00000001 => 00000100 00000240 00000000 00000000 80000000 ******** => 8000001E 68747541 444D4163 69746E65 80000001 ******** => 00630F01 10000000 0FEBBFFF 2FD3FBFF 80000002 ******** => 20444D41 2D303141 30353837 5041204B 80000003 ******** => 69772055 52206874 6F656461 4D54286E 80000004 ******** => 37522029 61724720 63696870 00202073 80000005 ******** => FF40FF18 FF40FF30 10040140 60030140 80000006 ******** => 64006400 64004200 08008140 00000000 80000007 ******** => 00000000 00000001 00000000 000027D9 80000008 ******** => 00003030 00000000 00004003 00000000 80000009 ******** => 00000000 00000000 00000000 00000000 8000000A ******** => 00000001 00010000 00000000 00001CFF 8000000B ******** => 00000000 00000000 00000000 00000000 8000000C ******** => 00000000 00000000 00000000 00000000 8000000D ******** => 00000000 00000000 00000000 00000000 8000000E ******** => 00000000 00000000 00000000 00000000 8000000F ******** => 00000000 00000000 00000000 00000000 80000010 ******** => 00000000 00000000 00000000 00000000 80000011 ******** => 00000000 00000000 00000000 00000000 80000012 ******** => 00000000 00000000 00000000 00000000 80000013 ******** => 00000000 00000000 00000000 00000000 80000014 ******** => 00000000 00000000 00000000 00000000 80000015 ******** => 00000000 00000000 00000000 00000000 80000016 ******** => 00000000 00000000 00000000 00000000 80000017 ******** => 00000000 00000000 00000000 00000000 80000018 ******** => 00000000 00000000 00000000 00000000 80000019 ******** => F040F018 64006400 00000000 00000000 8000001A ******** => 00000003 00000000 00000000 00000000 8000001B ******** => 000001FF 00000000 00000000 00000000 8000001C ******** => 00000000 80032013 00010200 E000000F 8000001D 00000000 => 00000121 00C0003F 0000003F 00000000 8000001D 00000001 => 00004122 0080003F 000001FF 00000000 8000001D 00000002 => 00004143 03C0003F 000007FF 00000001 8000001E ******** => 00000010 00000100 00000000 00000000 8FFFFFFF ******** => 00000000 00000000 00000000 00000000 blis-0.6.1/travis/cpuid/zen.def000066400000000000000000000107451360743507500163600ustar00rootroot00000000000000# # BLIS # An object-based framework for developing high-performance BLAS-like # libraries. # # Copyright (C) 2018, The University of Texas at Austin # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # - Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # - Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # - Neither the name(s) of the copyright holder(s) nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # CPU: AMD EPYC 7551P, 3000 MHz # 00000000 ******** => 0000000D 68747541 444D4163 69746E65 00000001 ******** => 00800F12 00400800 7ED8320B 178BFBFF 00000002 ******** => 00000000 00000000 00000000 00000000 00000003 ******** => 00000000 00000000 00000000 00000000 00000005 ******** => 00000040 00000040 00000003 00000011 00000006 ******** => 00000004 00000000 00000001 00000000 00000007 ******** => 00000000 209C01A9 00000000 00000000 00000008 ******** => 00000000 00000000 00000000 00000000 00000009 ******** => 00000000 00000000 00000000 00000000 0000000A ******** => 00000000 00000000 00000000 00000000 0000000C ******** => 00000000 00000000 00000000 00000000 0000000D 00000000 => 00000007 00000340 00000340 00000000 0000000D 00000001 => 0000000F 00000340 00000000 00000000 0000000D 00000002 => 00000100 00000240 00000000 00000000 80000000 ******** => 8000001F 68747541 444D4163 69746E65 80000001 ******** => 00800F12 40000000 35C233FF 2FD3FBFF 80000002 ******** => 20444D41 43595045 35353720 33205031 80000003 ******** => 6F432D32 50206572 65636F72 726F7373 80000004 ******** => 20202020 20202020 20202020 00202020 80000005 ******** => FF40FF40 FF40FF40 20080140 40040140 80000006 ******** => 36006400 56006400 02006140 0200C140 80000007 ******** => 00000000 0000001B 00000000 00006799 80000008 ******** => 00003030 00000007 0000603F 00000000 80000009 ******** => 00000000 00000000 00000000 00000000 8000000A ******** => 00000001 00008000 00000000 0001BCFF 8000000B ******** => 00000000 00000000 00000000 00000000 8000000C ******** => 00000000 00000000 00000000 00000000 8000000D ******** => 00000000 00000000 00000000 00000000 8000000E ******** => 00000000 00000000 00000000 00000000 8000000F ******** => 00000000 00000000 00000000 00000000 80000010 ******** => 00000000 00000000 00000000 00000000 80000011 ******** => 00000000 00000000 00000000 00000000 80000012 ******** => 00000000 00000000 00000000 00000000 80000013 ******** => 00000000 00000000 00000000 00000000 80000014 ******** => 00000000 00000000 00000000 00000000 80000015 ******** => 00000000 00000000 00000000 00000000 80000016 ******** => 00000000 00000000 00000000 00000000 80000017 ******** => 00000000 00000000 00000000 00000000 80000018 ******** => 00000000 00000000 00000000 00000000 80000019 ******** => F040F040 00000000 00000000 00000000 8000001A ******** => 00000003 00000000 00000000 00000000 8000001B ******** => 000003FF 00000000 00000000 00000000 8000001C ******** => 00000000 00000000 00000000 00000000 8000001D 00000000 => 00004121 01C0003F 0000003F 00000000 8000001D 00000001 => 00004122 00C0003F 000000FF 00000000 8000001D 00000002 => 00004143 01C0003F 000003FF 00000002 8000001D 00000003 => 0001C163 03C0003F 00001FFF 00000001 8000001E ******** => 00000000 00000100 00000300 00000000 8000001F ******** => 0000000F 0000016F 0000000F 00000001 8FFFFFFF ******** => 00000000 00000000 00000000 00000000 blis-0.6.1/travis/do_sde.sh000077500000000000000000000031021360743507500155610ustar00rootroot00000000000000#!/bin/bash set -e set -x SDE_VERSION=sde-external-8.16.0-2018-01-30-lin SDE_TARBALL=$SDE_VERSION.tar.bz2 SDE=$SDE_VERSION/sde64 curl --verbose --form accept_license=1 --form form_id=intel_licensed_dls_step_1 \ --output /dev/null --cookie-jar jar.txt \ --location https://software.intel.com/protected-download/267266/144917 curl --verbose --cookie jar.txt --output $SDE_TARBALL \ https://software.intel.com/system/files/managed/2a/1a/$SDE_TARBALL tar xvf $SDE_TARBALL make -j2 testsuite-bin cp $DIST_PATH/testsuite/input.general.fast input.general cp $DIST_PATH/testsuite/input.operations.fast input.operations TMP=`ldd ./test_libblis.x | grep ld | sed 's/^.*=> //'` LD_SO=${TMP%% *} TMP=`ldd ./test_libblis.x | grep libc | sed 's/^.*=> //'` LIBC_SO=${TMP%% *} TMP=`ldd ./test_libblis.x | grep libm | sed 's/^.*=> //'` LIBM_SO=${TMP%% *} for LIB in $LD_SO $LIBC_SO $LIBM_SO; do $DIST_PATH/travis/patch-ld-so.py $LIB .tmp chmod a+x .tmp sudo mv .tmp $LIB done for ARCH in penryn sandybridge haswell skx knl piledriver steamroller excavator zen; do if [ "$ARCH" = "knl" ]; then $SDE -knl -- ./test_libblis.x > output.testsuite else $SDE -cpuid_in $DIST_PATH/travis/cpuid/$ARCH.def -- ./test_libblis.x > output.testsuite fi $DIST_PATH/testsuite/check-blistest.sh ./output.testsuite TMP=`grep "active sub-configuration" output.testsuite` CONFIG=${TMP##* } if [ "$CONFIG" != "$ARCH" ]; then echo "Wrong configuration chosen:" echo " Expected: $ARCH" echo " Got: $CONFIG" exit 1 fi done blis-0.6.1/travis/do_testsuite.sh000077500000000000000000000007631360743507500170510ustar00rootroot00000000000000#!/bin/bash set -e set -x export BLIS_JC_NT=1 export BLIS_IC_NT=2 export BLIS_JR_NT=1 export BLIS_IR_NT=1 if [ "$TEST" = "FAST" ]; then make testblis-fast elif [ "$TEST" = "MD" ]; then make testblis-md elif [ "$TEST" = "SALT" ]; then # Disable multithreading within BLIS. export BLIS_JC_NT=1 BLIS_IC_NT=1 BLIS_JR_NT=1 BLIS_IR_NT=1 make testblis-salt else make testblis fi $DIST_PATH/testsuite/check-blistest.sh ./output.testsuite make testblas $DIST_PATH/blastest/check-blastest.sh blis-0.6.1/travis/patch-ld-so.py000077500000000000000000000005721360743507500164650ustar00rootroot00000000000000#!/usr/bin/env python # # Patch ld.so to disable runtime CPUID detection # Taken from https://stackoverflow.com/a/44483482 # import re import sys infile, outfile = sys.argv[1:] d = open(infile, 'rb').read() # Match CPUID(eax=0), "xor eax,eax" followed closely by "cpuid" o = re.sub(b'(\x31\xc0.{0,32})\x0f\xa2', b'\\1\x66\x90', d) #assert d != o open(outfile, 'wb').write(o) blis-0.6.1/version000066400000000000000000000000061360743507500140650ustar00rootroot000000000000000.6.1